diff --git a/MLRISC.tgz b/MLRISC.tgz new file mode 100644 index 0000000..1dab3cb Binary files /dev/null and b/MLRISC.tgz differ diff --git a/MLRISC/Doc/Makefile b/MLRISC/Doc/Makefile new file mode 100644 index 0000000..7000c59 --- /dev/null +++ b/MLRISC/Doc/Makefile @@ -0,0 +1,13 @@ +# +# Build all documentation +# + +all: + (cd pictures; make) + (cd html; make) + (cd latex; make) + +clean: + (cd pictures; make clean) + (cd html; make clean) + (cd latex; make clean) diff --git a/MLRISC/Doc/README b/MLRISC/Doc/README new file mode 100644 index 0000000..0dfedc8 --- /dev/null +++ b/MLRISC/Doc/README @@ -0,0 +1,16 @@ +You will need the following programs to build the documentation. + +1. Perl 5.004 or more recent +2. LaTeX (I use the TeTeX distribution) +3. fig2dev 3.2 (from xfig) + +To build the documentation do the following: + +1. In this directory, type make +2. cd latex +3. make again +4. make again +5. make again (Three times to make LaTeX stablize on the cross references) + +The HTML pages will be in the directory html, starting from html/index.html +The LaTeX documents will be in the directory latex. diff --git a/MLRISC/Doc/graphics/fun-ssa-value-graph.gif b/MLRISC/Doc/graphics/fun-ssa-value-graph.gif new file mode 100644 index 0000000..0590677 Binary files /dev/null and b/MLRISC/Doc/graphics/fun-ssa-value-graph.gif differ diff --git a/MLRISC/Doc/graphics/mandelbrot-cdg-survey.gif b/MLRISC/Doc/graphics/mandelbrot-cdg-survey.gif new file mode 100644 index 0000000..cd61109 Binary files /dev/null and b/MLRISC/Doc/graphics/mandelbrot-cdg-survey.gif differ diff --git a/MLRISC/Doc/graphics/mandelbrot-cfg-survey.gif b/MLRISC/Doc/graphics/mandelbrot-cfg-survey.gif new file mode 100644 index 0000000..08de1e1 Binary files /dev/null and b/MLRISC/Doc/graphics/mandelbrot-cfg-survey.gif differ diff --git a/MLRISC/Doc/graphics/mandelbrot-ddg.gif b/MLRISC/Doc/graphics/mandelbrot-ddg.gif new file mode 100644 index 0000000..5bde633 Binary files /dev/null and b/MLRISC/Doc/graphics/mandelbrot-ddg.gif differ diff --git a/MLRISC/Doc/graphics/mandelbrot-dom-survey.gif b/MLRISC/Doc/graphics/mandelbrot-dom-survey.gif new file mode 100644 index 0000000..2309039 Binary files /dev/null and b/MLRISC/Doc/graphics/mandelbrot-dom-survey.gif differ diff --git a/MLRISC/Doc/graphics/mandelbrot-loop-nesting.gif b/MLRISC/Doc/graphics/mandelbrot-loop-nesting.gif new file mode 100644 index 0000000..cf637bc Binary files /dev/null and b/MLRISC/Doc/graphics/mandelbrot-loop-nesting.gif differ diff --git a/MLRISC/Doc/graphics/mandelbrot-opt.gif b/MLRISC/Doc/graphics/mandelbrot-opt.gif new file mode 100644 index 0000000..ac990cc Binary files /dev/null and b/MLRISC/Doc/graphics/mandelbrot-opt.gif differ diff --git a/MLRISC/Doc/graphics/mandelbrot-region-survey.gif b/MLRISC/Doc/graphics/mandelbrot-region-survey.gif new file mode 100644 index 0000000..e2fb3e5 Binary files /dev/null and b/MLRISC/Doc/graphics/mandelbrot-region-survey.gif differ diff --git a/MLRISC/Doc/graphics/mandelbrot-ssa.gif b/MLRISC/Doc/graphics/mandelbrot-ssa.gif new file mode 100644 index 0000000..dd73c93 Binary files /dev/null and b/MLRISC/Doc/graphics/mandelbrot-ssa.gif differ diff --git a/MLRISC/Doc/graphics/smlnj.jpg b/MLRISC/Doc/graphics/smlnj.jpg new file mode 100644 index 0000000..c80826a Binary files /dev/null and b/MLRISC/Doc/graphics/smlnj.jpg differ diff --git a/MLRISC/Doc/graphics/vh32.png b/MLRISC/Doc/graphics/vh32.png new file mode 100644 index 0000000..061ceec Binary files /dev/null and b/MLRISC/Doc/graphics/vh32.png differ diff --git a/MLRISC/Doc/graphics/vh401.gif b/MLRISC/Doc/graphics/vh401.gif new file mode 100644 index 0000000..519340f Binary files /dev/null and b/MLRISC/Doc/graphics/vh401.gif differ diff --git a/MLRISC/Doc/html/Makefile b/MLRISC/Doc/html/Makefile new file mode 100644 index 0000000..b8110cf --- /dev/null +++ b/MLRISC/Doc/html/Makefile @@ -0,0 +1,86 @@ +# +# This file compiles the documentation +# + +HTML= mlrisc.html mltexdoc.html + + +TEX= ../latex/annotations.tex \ + ../latex/graphics.tex \ + ../latex/instructions.tex \ + ../latex/mltree.tex \ + ../latex/delayslots.tex \ + ../latex/cells.tex \ + ../latex/cluster.tex \ + ../latex/constants.tex \ + ../latex/pseudo-ops.tex \ + ../latex/streams.tex \ + ../latex/labelexp.tex \ + ../latex/labels.tex \ + ../latex/regions.tex \ + ../latex/regmap.tex \ + ../latex/graphs.tex \ + ../latex/mlrisc-ir.tex \ + ../latex/compiler-graphs.tex \ + ../latex/SSA.tex \ + ../latex/VLIW.tex \ + ../latex/ra.tex \ + ../latex/ILP.tex \ + ../latex/mlrisc-arch.tex \ + ../latex/future-work.tex \ + ../latex/asm.tex \ + ../latex/mc.tex \ + ../latex/span-dep.tex \ + ../latex/instrsel.tex \ + ../latex/availability.tex \ + ../latex/gc.tex \ + ../latex/mlrisc-gen.tex \ + ../latex/contributions.tex \ + ../latex/systems.tex \ + ../latex/mlrisc-graphics.tex \ + ../latex/contributors.tex \ + ../latex/requirements.tex \ + ../latex/INTRO.tex \ + ../latex/problem.tex \ + ../latex/mlrisc-compiler.tex \ + ../latex/mlrisc-ir-rep.tex \ + ../latex/backend-opt.tex \ + ../latex/sys-integration.tex \ + ../latex/optimizations.tex \ + ../latex/span-dep.tex \ + ../latex/mlrisc-md.tex \ + ../latex/mlrisc-ra.tex \ + ../latex/line-counts.tex \ + ../latex/sparc.tex \ + ../latex/alpha.tex \ + ../latex/x86.tex \ + ../latex/ppc.tex \ + ../latex/mips.tex \ + ../latex/hppa.tex \ + ../latex/C6.tex \ + ../latex/mltree-ext.tex \ + ../latex/mltree-util.tex + +all: pictures $(HTML) makelinks + +pictures: + (cd ../pictures; make) + +cvsrm: + cvsrm $(HTML) + +clean: + /bin/rm -f *.html + +makelinks: + @if [ ! -d graphics ]; then ln -f -s ../graphics .; fi + @if [ ! -d pictures ]; then ln -f -s ../pictures .; fi + @if [ ! -f index.html ]; then ln -f -s INTRO.html index.html ; fi + + + +%.html: ../latex/%.tex mltex2html mltex.thm + perl mltex2html ../latex/$(@:.html=.tex) + +mlrisc.html: $(TEX) +mltexdoc.html: ../latex/mltex.tex diff --git a/MLRISC/Doc/html/mltex.thm b/MLRISC/Doc/html/mltex.thm new file mode 100644 index 0000000..9a2ae9e --- /dev/null +++ b/MLRISC/Doc/html/mltex.thm @@ -0,0 +1,116 @@ +############################################################################### +# +# Theme file for MLTeX +# +############################################################################### + +# +# Background color for document +# +$BACKGROUND_COLOR="#ffffff"; #white + +# +# Default text color for document +# +$TEXT_COLOR="#000020"; #very dark blue + +# +# Color of page title +# +$TITLE_COLOR="#aa0000"; #dark red + +# +# Color of the abstract title +# +$ABSTRACT_TITLE_COLOR="#486591"; #black + +# +# Color of each section title +# +$SECTION_COLOR="#486591"; + +# +# Color of each subsection title +# +$SUBSECTION_COLOR="#486591"; + +# +# Color of each subsubsection title +# +$SUBSUBSECTION_COLOR="#486591"; + +# +# Color of each paragraph title +# +$PARAGRAPH_COLOR="#486591"; + +# +# Colors for macros \newdef and \newtype +# +$NEWDEF_COLOR="#ff0000"; #red +$NEWTYPE_COLOR="#ff0000"; #red + +# +# Colors for html links +# +$LINK_COLOR="navy"; +$VLINK_COLOR="gray"; +$ALINK_COLOR="maroon"; + +# +# Colors for SML type variables, keywords and identifiers +# +$SML_TYVAR_COLOR="#00aaaa"; +$SML_KEYWORD_COLOR="#6060a0"; +$SML_IDENT_COLOR="#9c4040"; + +# +# Color for emphasis +# +$EMPH_COLOR="#ff0000"; #red + +# +# Color for description items +# +$DESC_COLOR="#000070"; #blue + +# +# Color for captions +# +$CAPTION_COLOR="#007777"; #green/blue + +# +# Color of ML code +# +$CODE_COLOR="#000000"; # black + +# +# Background color of table of contents +# +$TOC_BACKGROUND_COLOR="#e6e6e6"; + +$SECTION_TOC_TEXT_COLOR="#486591"; +$MAJORSECTION_TEXT_COLOR="ffffff"; +$MAJORSECTION_BACKGROUND_COLOR="#486591"; +$SECTION_TOC_BACKGROUND_COLOR="#e6e6e6"; +$TOC_FACE="hevetica"; +$MAX_LOCAL_TOC_ENTRY_LENGTH=40; +$MAX_GLOBAL_TOC_ENTRY_LENGTH=40; +$GLOBAL_TOC_WIDTH=170; +$SCREEN_WIDTH=700; +$TEXT_WIDTH=600; +$TOC_SIZE=-1; + +$X_PIXELS = 1024; +$Y_PIXELS = 768; +$IMAGE_SCALING=0.8; +$PAPER_HEIGHT = "11in"; +$PAPER_WIDTH = "8.5in"; + +@AUTHORS=("Lal George", "Allen Leung"); +@EMAILS=('george@research.bell-labs.com','leunga@cs.nyu.edu'); + +$WWWHOST="www.cs.nyu.edu"; +$URLPREFIX="leunga/MLRISC/Doc/html/"; + +1; diff --git a/MLRISC/Doc/html/mltex2html b/MLRISC/Doc/html/mltex2html new file mode 100755 index 0000000..24d393f --- /dev/null +++ b/MLRISC/Doc/html/mltex2html @@ -0,0 +1,1441 @@ +#!/usr/local/bin/perl +# +# This tool generates HTML pages in my own format given a stylized Latex file. +# +# The output is in HTML 4.01 Transitional form. +# +# Allen Leung (leunga@cs.nyu.edu) + +############################################################################ +# +# Formatting parameters +# +############################################################################ +$BACKGROUND_COLOR="#ffffff"; #white +$TEXT_COLOR="#000020"; +$TITLE_COLOR="#aa0000"; #dark red +$ABSTRACT_TITLE_COLOR="#486591"; #black +$SECTION_COLOR="#486591"; #dark red +$SUBSECTION_COLOR="#486591"; #dark red +$SUBSUBSECTION_COLOR="#486591"; #dark red +$PARAGRAPH_COLOR="#486591"; #dark red +$NEWDEF_COLOR="#ff0000"; #red +$NEWTYPE_COLOR="#ff0000"; #red + +$LINK_COLOR="navy"; +$VLINK_COLOR="gray"; +$ALINK_COLOR="maroon"; + +$SML_TYVAR_COLOR="#00aaaa"; #green +$SML_KEYWORD_COLOR="#6060a0"; #blue +$SML_IDENT_COLOR="#9c4040"; + +$EMPH_COLOR="#ff0000"; #red +$DESC_COLOR="#000070"; #blue +$CAPTION_COLOR="#007777"; #green/blue +$CODE_COLOR="#000000"; # black +$TOC_BACKGROUND_COLOR="#e6e6e6"; +$SECTION_TOC_TEXT_COLOR="#486591"; +$MAJORSECTION_TEXT_COLOR="ffffff"; +$MAJORSECTION_BACKGROUND_COLOR="#486591"; +$SECTION_TOC_BACKGROUND_COLOR="#e6e6e6"; +$TOC_FACE="hevetica"; +$MAX_LOCAL_TOC_ENTRY_LENGTH=40; +$MAX_GLOBAL_TOC_ENTRY_LENGTH=40; +$GLOBAL_TOC_WIDTH=170; +$SCREEN_WIDTH=700; +$TEXT_WIDTH=400; +$TOC_SIZE=-1; + +$X_PIXELS = 1024; +$Y_PIXELS = 768; +$IMAGE_SCALING=0.8; +$PAPER_HEIGHT = "11in"; +$PAPER_WIDTH = "8.5in"; + +@AUTHORS=("Lal George", "Allen Leung"); +@EMAILS=('george@research.bell-labs.com','leunga@cs.nyu.edu'); +$PWD=`pwd`; +$DATE=`date`; +$HOSTNAME=`hostname`; +chop $HOSTNAME; +chop $DATE; +chop $PWD; +$USER=$ENV{"USER"}; +$WWWHOST="www.cs.nyu.edu"; +$URLPREFIX="leunga/MLRISC/Doc/html/"; + +$BEGINPRE="
";
+$ENDPRE="
"; +$BEGINALLTT="
";
+$ENDALLTT="
"; + +############################################################################ +# +# Global variables and tables +# +############################################################################ +$PARAM='[^\{\}]*'; +sub globalInit() +{ +@GLOBALTOC=(); # table of contents for entire document +@LOCALTOC=(); # table of contents for each section +%LINKLABEL={}; # name to link for referenceing +%LINKNAME={}; +$PATHNAME=""; # current pathname +$FILENAME=""; # current file +$HTML_FILENAME=""; +$LINE_NO=0; # current line +$GENERATE_TOC=0; # generate table of contents? +$PAGE_TITLE=""; +$DOCUMENT_TITLE=""; +$DOCUMENT_ABSTRACT=""; +$DOCUMENT_AUTHOR=""; +%SECTION_TEXT = (); +@TEXT=(); +@TEXT_STACK=(); +} + +sub IGNORE { return ""; } + +############################################################################ +# +# Error +# +############################################################################ +sub msg +{ my($text) = @_; + print "$FILENAME:$LINE_NO: ", $text, "\n"; +} +sub warning { msg("WARNING: " . $_[0]); } +sub error { msg($_[0]); die($_[0]); } +############################################################################ +# +# Filename processing +# +############################################################################ +sub basename +{ my($f) = @_; + $f =~ s|^.*/||; + return $f; +} + +sub dirname +{ my($f) = @_; + $f =~ s|^(.*/).*$|\1|; + return $f; +} + +sub suffix +{ my($f) = @_; + $f = basename($f); + $f =~ /^.*\.(.*)/; + return $1; +} + +sub replaceSuffix +{ my($f, $old, $new) = @_; + if ($f =~ /^(.*\.)$old/) + { return $1 . $new; + } else + { error("file $f does not have suffix $old"); + } +} + +############################################################################ +# +# Indexing and Cross Referencing. +# +############################################################################ +sub localtocentry # add local index entry +{ my($name,$level) = @_; + my(@entry) = ($name, $level); + push @LOCALTOC, \@entry; + my($label) = $LABELCOUNTER++; + $LINKLABEL{$name} = '#'. $label; + $LINKNAME{$name} = $name; + $CURRENT_SECTION_NAME=$name; + return ""; +} + +sub globaltocentry # add global index entry +{ my($name, $level) = @_; + my(@entry) = ($name, $level); + push @GLOBALTOC, \@entry; + my($key) = "section:$name"; + $LINKLABEL{$key} = $HTML_FILENAME; + $LINKNAME{$key} = $name; + $CURRENT_SECTION_NAME=$name; +} + +sub tableofcontents +{ $GENERATE_TOC = 1; + print "[Generating table of contents]\n"; + return ""; +} + +sub label +{ my($label) = @_; + $LINKLABEL{$label} = $HTML_FILENAME . '#' . $label; + $LINKNAME{$label} = $CURRENT_SECTION_NAME; + return ""; +} + +sub ref +{ my($label) = @_; + return "{$label}"; +} + +sub lookupRef +{ my($label) = @_; + my($ref) = $LINKLABEL{$label}; + my($name) = $LINKNAME{$label}; + return " $name"; +} + +sub resolveReferences +{ my($text) = @_; + $text =~ s/\{($PARAM)\}/&lookupRef($1)/oge; + return $text; +} + +############################################################################ +# +# Execute a subprogram +# +############################################################################ +sub runprog +{ my($command) = @_; + print STDERR "running: ", $command, "\n"; + system("$command"); + if ($?) { die("$? $command"); } +} + +############################################################################ +# +# Environment/command processing +# +############################################################################ +sub enterMode +{ my($m) = @_; + push @MODE_STACK, $m; + $MODE = $m; +} + +sub leaveMode +{ my($m) = @_; + if ($MODE ne $m) + { die("Trying to leave $m but actually in " . $MODE . " mode"); } + pop @MODE_STACK; + $MODE = $MODE_STACK[$#MODE_STACK]; + $parbreak=1; +} + +sub pushTextStack +{ push @TEXT_STACK, \@TEXT; + @TEXT=(); +} + +sub popTextStack +{ my(@text) = @TEXT; + my($old) = pop @TEXT_STACK; + @TEXT = @{$old}; + return "@text"; +} + +%DEFINEDNAMES = {}; +%BEGINENV = {}; # how to process \begin{env} +%ENDENV = {}; # how to process \end{env} +%HTMLENV = {}; # html tag translation +%COMMAND = {}; # how to process \command +%ARITY = {}; # how many parameters does the environment has +@ENV0 = (); +@ENV1 = (); +@ENV2 = (); +@COMMAND0 = (); +@COMMAND1 = (); +@COMMAND2 = (); +@COMMAND3 = (); + +sub newenvironment +{ my($name,$arity,$begin,$end) = @_; + if (defined $DEFINEDNAMES{$name}) + { die("$name has already been defined"); + } + $DEFINEDNAMES{$name} = "env"; + $BEGINENV{$name} = $begin; + $ENDENV{$name} = $end; + $ARITY{$name} = $arity; + push @ENV0, $name if $arity == 0; + push @ENV1, $name if $arity == 1; + push @ENV2, $name if $arity == 2; +} + +sub newhtmlenvironment +{ my($name,$tag) = @_; + if (defined $DEFINEDNAMES{$name}) + { die("$name has already been defined"); + } + $DEFINEDNAMES{$name} = "htmltag"; + $HTMLENV{$name} = $tag; + push @ENV0, $name; +} + +# This environment does not cause trailing paragraph breaks +sub newhtmlparenvironment +{ newhtmlenvironment(@_); + my($name) = @_; + $PARENV{$name} = 1; +} + +sub newcommand +{ my($name,$arity,$command) = @_; + if (defined $DEFINEDNAMES{$name}) + { die("$name has already been defined"); + } + $DEFINEDNAMES{$name} = "command"; + $COMMAND{$name} = $command; + $ARITY{$name} = $arity; + push @COMMAND0, $name if $arity == 0; + push @COMMAND1, $name if $arity == 1; + push @COMMAND2, $name if $arity == 2; + push @COMMAND3, $name if $arity == 3; +} + +sub beginenv +{ my($env,@args) = @_; + my($type) = $DEFINEDNAMES{$env}; + if (! defined $type) + { warning("environment $env is not defined"); + return "\\begin{$env}"; + } + if ($type eq "env") + { my($func) = $BEGINENV{$env}; + return $func->(@args); # call function + } + return "<$HTMLENV{$env}>" if $type eq "htmltag"; + die("don't know how to handle \\begin{$env} at line $LINE_NO"); +} + +sub endenv +{ my($env) = @_; + my($type) = $DEFINEDNAMES{$env}; + if (! defined $type) + { warning("environment $env is not defined"); + return "\\end{$env}"; + } + return "" if $type eq "htmltag"; + if ($type eq "env") + { my($func) = $ENDENV{$env}; + my($text) = $func->(); # call function + $parbreak = 1; + return $text; + } + die("don't know how to handle \\end{$env} at line $LINE_NO"); +} + +sub command +{ my($command,@args) = @_; + my($type) = $DEFINEDNAMES{$command}; + if (! defined $type) + { warning("command $command is not defined"); + return "\\$env"; + } + #print "[$command]"; + if ($type eq "command") + { my($func) = $COMMAND{$command}; + if (! $func) + { die("don't know how to handle \\$command at line $LINE_NO"); } + return $func->(@args); + } + die("don't know how to handle \\$command at line $LINE_NO"); +} +############################################################################ +# +# Color processing +# +############################################################################ +$COLORS{"red"} = "#ff0000"; +$COLORS{"green"} = "#00ff00"; +$COLORS{"blue"} = "#0000ff"; +$COLORS{"darkred"} = "#aa0000"; +$COLORS{"darkgreen"} = "#00aa00"; +$COLORS{"darkblue"} = "#0000aa"; +$COLORS{"black"} = "#000000"; +$COLORS{"grey"} = "#777777"; +$COLORS{"white"} = "#ffffff"; + +sub colorOf +{ my($c) = @_; + return $COLORS{$c} if (defined $COLORS{$c}); + return "\"$c\""; +} +sub begincolor { my($c) = @_; return ""; } +sub endcolor { return ""; } + +############################################################################ +# +# Generic LaTeX fonts and sizes handling. +# +############################################################################ +sub bf { return ""; } +sub tt { return ""; } +sub it { return ""; } +sub em { return ""; } +sub sf { return ""; } +sub rm { return ""; } +sub huge { return ""; } +sub LARGE { return ""; } +sub Large { return ""; } +sub large { return ""; } +sub normalsize { return ""; } +sub small { return ""; } +sub footnotesize { return ""; } +sub scriptsize { return ""; } +sub tiny { return ""; } + +############################################################################ +# +# Headers and footers +# +############################################################################ +sub header +{ $HEADER= <<"END"; + + + +END + return $HEADER; +} + +sub footer +{ my($filename) = @_; + my($URL)="http://$WWWHOST/$URLPREFIX$outfile"; + my($contact) = "\n"; + for ($i=0; $i <= $#AUTHORS; $i++) + { $author = $AUTHORS[$i]; + $email = $EMAILS[$i]; + $contact .= "\n"; + } + $contact .= "
\n"; + $contact .= "$author\n"; + $contact .= "
\n"; + +$FOOTER= <<"END"; +
+ + + + + + + + + +
+ $contact + + + SML/NJ + + + Validate this page + +
+ + Generated by + + mltex2html + + + +
+ + Last modified: $DATE by $USER\@$HOSTNAME + +
+END + return $FOOTER; +} + +############################################################################ +# +# SML keyword highlighting +# +############################################################################ +@KEYWORDS=("structure", "datatype", "type", "of", "eqtype", "struct", + "end", "case", "if", "then", "else", "signature", + "functor", "withtype", "sharing", "include", "where", + "val", "fun", "handle", "raise", "exception", "let", "in", + "local", "abstype", "rec", "and", "andalso", "orelse", "open", + "infix", "infixr", "nonfix" + ); + +@DEFKEYWORDS=("functor", "structure", "signature", "exception", "withtype", + "datatype", "type", "eqtype", "fun", "val"); + +$KEYWORDS = "@KEYWORDS"; +$KEYWORDS =~ s/ /|/g; +$DEFKEYWORDS = "@DEFKEYWORDS"; +$DEFKEYWORDS =~ s/ /|/g; +$SMLIDENT = "[a-zA-Z_][a-zA-Z0-9_']*"; +$SMLTYPEVAR = "('+$SMLIDENT)"; + +sub highlightKeyword { return "$_[0]"; } +sub highlightTyvar { return "$_[0]"; } +sub highlightIdent +{ my($keyword, $typevar, $ident) = @_; + if ($typevar ne "") { $typevar .= " "; } + return "$keyword $typevar$ident"; +} + +sub escape +{ my($line) = @_; + $line =~ s|<|<|g; + $line =~ s|>|>|g; + $line =~ s|<|<|g; + $line =~ s|>|>|g; + return $line; +} + + +sub smldisplay +{ my($line) = @_; + #$line =~ s/\b(($DEFKEYWORDS)\s+($SMLTYPEVAR|\(\s*$SMLTYPEVAR(\s*,\s*$SMLTYPEVAR)*\))?)\s*($SMLIDENT)(\s*=)/&highlightIdent($2,$3,$8) . $9/oge; + $line =~ s/\b($KEYWORDS)\b/&highlightKeyword($1)/oge; + $line =~ s/^(sig)\b/&highlightKeyword($1)/oge; + $line =~ s/([^\.\w])(sig)\b/$1 . &highlightKeyword($2)/oge; + $line =~ s/('\w+)/&highlightTyvar($1)/oge; + return $line; +} + +sub sml +{ return "" . smldisplay(@_) . ""; +} +############################################################################ +# +# Sectioning commands +# +############################################################################ +sub include +{ my($filename) = @_; + $FILENAME= $PATHNAME . $filename . ".tex"; + $HTML_FILENAME=replaceSuffix(basename($FILENAME),"tex","html"); + print STDERR "[Processing $FILENAME]\n"; + my($title,@text) = processSection($FILENAME); + my($localtoc) = ""; + if ($#LOCALTOC >= 0) + { my(@toctitle) = ($title, 0); + my(@LOCALTOC) = (\@toctitle, @LOCALTOC); + $localtoc = + makeTOC("local", + "border=0 align=right bgcolor=\"$SECTION_TOC_BACKGROUND_COLOR\"", + $MAX_LOCAL_TOC_ENTRY_LENGTH,"",@LOCALTOC); + } + my(@entry) = ($title, $localtoc, @text); + $SECTION_TEXT{$FILENAME} = \@entry; + return ""; +} + + +sub section +{ my($name) = @_; + #print "section $name\n"; + $parbreak=1; + $PAGE_TITLE=$name; + globaltocentry($name,1); + return ""; +} + +sub majorsection +{ my($name) = @_; + my(@entry) = ($name, 0); + push @GLOBALTOC, \@entry; + $parbreak=1; + return ""; +} + +sub title +{ $DOCUMENT_TITLE = $_[0]; + $parbreak=1; + return ""; +} + +sub subsection +{ my($name) = @_; + #print "[subsection $name]\n"; + $parbreak=1; + my($label) = localtocentry($name, 2); + return <<"END" +$label +

$name

+END +} + +sub subsubsection +{ my($name) = @_; + #print "subsubsection $name\n"; + $parbreak=1; + my($label) = localtocentry($name, 3); + return <<"END"; +$label +

$name

+END +} + +sub paragraph +{ my($name) = @_; + #print "paragraph $name\n"; + $parbreak=1; + my($label) = localtocentry($name, 4); + return <<"END"; +$label +

$name

+END +} + +sub beginabstract +{ print "[begin abstract]\n"; + pushTextStack(); + return ""; +} + +sub endabstract +{ print "[end abstract]\n"; + $DOCUMENT_ABSTRACT = popTextStack(); + return ""; +} + +############################################################################ +# +# Hypertext links +# +############################################################################ +sub href { return "$_[1]"; } +sub mlrischref { return "$_[1]"; } +sub externhref { return "$_[1]"; } + +############################################################################ +# +# Images and figures +# +########################################################################### + +sub isGraphics +{ my($filename) = @_; + return 1 if $filename =~ /\.png$/; + return 1 if $filename =~ /\.jpg$/; + return 1 if $filename =~ /\.gif$/; + return 1 if $filename =~ /\.tif$/; + return 1 if $filename =~ /\.tiff$/; + return 0; +} + +sub image { return "\"$_[0]\""; } + +# +# Given a image file, try to find out if the original source exists +# Paths are hardwired for now. +sub figOf +{ my($filename) = @_; + my($basename) = ""; + $basename = $1 if $filename =~ m|^\.\./pictures/eps/(.*)\.eps|; + my($fig) = "../pictures/fig/$basename.fig"; + return $fig if (-f $fig); + return ""; +} + +# +# Try to convert the eps file to png +# +sub imageOf +{ my($filename) = @_; + $basename = $1 if ($filename =~ m|^\.\./pictures/eps/(.*)\.eps$|); + my($png) = "../pictures/png/$basename.png"; + return $png if -f $png; + my($jpg) = "../pictures/jpeg/$basename.jpg"; + return $jpg if -f $jpg; + + my($fig) = figOf($filename); + if ($fig) + { runprog("fig2dev -L png $fig $png"); + return $png; + } + return ""; +} + +# +# Try to translate LaTeX lengths into pixel sizes +# +sub lengthOf +{ my($l) = @_; + return $1 * 0.4 if $l =~ /^(\d*(\.\d*)?)em$/; # making a guess + return $1 * 0.4 if $l =~ /^(\d*(\.\d*)?)cm$/; + return $1 * 1.0 if $l =~ /^(\d*(\.\d*)?)in$/; + if (! ($l =~ /^\s*$/)) { die("lengthOf($l)"); } + return 0; +} +sub heightOf { return lengthOf(@_) / lengthOf($PAPER_HEIGHT) * $Y_PIXELS * $IMAGE_SCALING; } +sub widthOf { return lengthOf(@_) / lengthOf($PAPER_WIDTH) * $X_PIXELS * $IMAGE_SCALING; } + +# +# Try to translate sizes into pixel sizes +# +sub makeSize +{ my($height, $width) = @_; + $height = heightOf($height); + $width = widthOf($width); + my($size) = ""; + $size .= "height=$height" if $height; + $size .= " width=$width" if $width; + return $size; +} + + +sub psfig +{ my($params) = @_; + my($filename) = ($params =~ /figure=([^ ,]*)/); + my($height) = ($params =~ /height=([^ ,]*)/); + my($width) = ($params =~ /width=([^ ,]*)/); + my($size) = makeSize($height, $width); + + if (! defined $filename) { die("figure not found in $params"); } + if (! -f $filename) { warning("filename $filename not found"); } + + my($img) = imageOf($filename); + + if ($img) + { return <<"END"; + + Click to enlarge + +END + } else + { return " $filename "; + } +} + +sub cpsfig { return "
" . psfig(@_) . "
"; } + + +sub beginfig +{ my($border,$align) = @_; + $CAPTION = ""; + $align = "align=left" if $align eq "l"; + $align = "align=right" if $align eq "r"; + $align = "align=center" if $align eq "c"; + return " + + +END + } + return "$caption
"; +} + +sub endfig +{ my($caption) = @_; + msg("new figure, caption=$caption"); + if ($caption ne "") + { $caption = <<"END"; +
+ + $caption + +
"; +} + +sub beginFigure { return beginfig(0,""); } +sub endFigure { return endfig($CAPTION); } + +sub caption +{ my($text) = @_; + $CAPTION = $text; + return ""; +} + +sub beginwrapfigure +{ my($align,$size) = @_; + return beginfig(0,$align); +} + +sub endwrapfigure { return endfig($CAPTION); } + +############################################################################ +# +# Generic tabular processing +# +############################################################################ +sub begintab +{ my($taboptions, $html) = @_; + enterMode("tabular"); + + # guess the border + my($border) = 0; + $border = 1 if ($taboptions =~ /^\|/); + $border = 2 if ($taboptions =~ /^\|\|/); + + if ($html eq "") { $html = "align=center"; } + + push @tabular_stack, $taboptions; + push @current_taboptions, $taboptions; + + return "" . newtabitem(); +} + +sub endtab +{ leaveMode("tabular"); + pop @tabular_stack; + pop @current_tab; + return "
"; +} + +sub newtabitem +{ my($t) = $current_taboptions[$#current_taboptions]; + my($align) = "left"; + + $t = $1 if $t =~ /^\|+(.*)/; + ($align = "center", $t = $1) if $t =~ /^c(.*)/; + ($align = "left", $t = $1) if $t =~ /^l(.*)/; + ($align = "right", $t = $1) if $t =~ /^r(.*)/; + + $current_taboptions[$#current_taboptions] = $t; + + return ""; +} + +sub addtab +{ return "" . newtabitem(); +} + +sub tabnewline +{ $current_taboptions[$#current_taboptions] = + $tabular_stack[$#tabular_stack]; + return "" . newtabitem(); +} + +sub newline +{ return "
"; +} + +############################################################################ +# +# Math mode processing +# +############################################################################ +$MATHPARAM='[^$]*'; +$MATHPARAM2='([^$\\]|\\[^)]+)*'; +sub math +{ my($math) = @_; + + $math =~ s|&||g; + + $math =~ s|\\le\b|<=|g; + $math =~ s|\\ge\b|>=|g; + + #quote all < and > first + $math =~ s//>/g; + + # fonts changing + $math =~ s|{\\tt\s+($PARAM)}|\1|g; + + # subscript/superscripts + $math =~ s|_([^{])|\1|g; + $math =~ s|\^([^{])|\1|g; + $math =~ s|_{($PARAM)}|\1|g; + $math =~ s|^{($PARAM)}|\1|g; + $math =~ s|\\sb{($PARAM)}|\1|g; + $math =~ s|\\sp{($PARAM)}|\1|g; + + # other stuff + $math =~ s|\\edge\{($PARAM)\}|&rarr\1|g; + $math =~ s|\\defas\b|=|g; + $math =~ s|\\lim\b|lim|g; + $math =~ s|\\sin\b|sin|g; + $math =~ s|\\cos\b|cos|g; + $math =~ s|\\tan\b|tan|g; + $math =~ s|\\log\b|log|g; + $math =~ s|\\equiv\b|==|g; + $math =~ s|\\ldots\b|...|g; + + $math =~ s|\\in\b|∈|g; + $math =~ s|\\alpha\b|α|g; + $math =~ s|\\beta\b|β|g; + $math =~ s|\\gamma\b|γ|g; + $math =~ s|\\lambda\b|λ|g; + $math =~ s|\\phi\b|φ|g; + $math =~ s|\\sum\b|∑|g; + $math =~ s|\\cup\b|∪|g; + $math =~ s|\\cap\b|∩|g; + $math =~ s|\\rightarrow\b|→|g; + $math =~ s|\\leftarrow\b|←|g; + + $math =~ s/\{/{/g; + $math =~ s/\}/}/g; + + #unquote all < and > + $math =~ s|<|<|g; + $math =~ s|>|>|g; + + $math =~ s|\\ne\b|<>|g; + + $math =~ s|\\\\|
|g; + + return "$math"; +} + +sub displaymath +{ my($math) = @_; + return "
" . math($math) . "
"; +} + +sub stupidBrowserMath +{ + s#&rarr(;|\b)#->#g; + s#&larr(;|\b)#<-#g; + s#&isin(;|\b)#in #g; +} + + +############################################################################ +# +# Frames and boxes +# +############################################################################ +sub beginboxit { return "
"; } +sub endboxit { return "
"; } + +############################################################################ +# +# Setup the commands and environments +# +############################################################################ +newenvironment("color",1, \&begincolor, \&endcolor); + +newhtmlenvironment("small","small"); +newhtmlenvironment("Bold","b"); +newhtmlenvironment("Italics","i"); +newhtmlenvironment("Emph","em"); +newhtmlenvironment("address","address"); + +newhtmlparenvironment("quotation","blockquote"); +newhtmlparenvironment("center","center"); +newhtmlparenvironment("enumerate","ol"); +newhtmlparenvironment("itemize","ul"); +newhtmlparenvironment("description","dl"); + +newenvironment("boxit", 0, \&beginboxit, \&endboxit); +newenvironment("Boxit", 0, \&beginboxit, \&endboxit); + +newcommand("title", 1, \&title); +newcommand("section", 1, \§ion); +newcommand("subsection", 1, \&subsection); +newcommand("subsubsection", 1, \&subsubsection); +newcommand("paragraph", 1, \¶graph); +newcommand("include", 1, \&include); +newcommand("majorsection", 1, \&majorsection); +newenvironment("abstract",0,\&beginabstract,\&endabstract); + +newcommand("bf",0,\&bf); +newcommand("tt",0,\&tt); +newcommand("it",0,\&it); +newcommand("em",0,\&em); +newcommand("sf",0,\&sf); +newcommand("rm",0,\&rm); +newcommand("huge",0,\&huge); +newcommand("LARGE",0,\&LARGE); +newcommand("Large",0,\&Large); +newcommand("large",0,\&large); +newcommand("normalsize",0,\&normalsize); +newcommand("footnotesize",0,\&footnotesize); +newcommand("scriptsize",0,\&scriptsize); +newcommand("tiny",0,\&tiny); + +newcommand("Term", 1, sub { return "$_[0]"; } ); + +newcommand("italics", 1, sub { return "$_[0]"; }); +newcommand("bold", 1, sub { return "$_[0]"; }); +newcommand("emph", 1, sub { return "$_[0]"; }); + +newcommand("href", 2, \&href); +newcommand("mlrischref", 2, \&mlrischref); +newcommand("externhref", 2, \&externhref); + +newcommand("image", 3, \&image); +newcommand("psfig", 1, \&psfig); +newcommand("cpsfig", 1, \&cpsfig); + +newcommand("linebreak", 0, \&IGNORE); +newcommand("hr", 0, sub { return "
"; }); +newcommand("hline", 0, \&IGNORE); +newcommand("pagebreak", 0, \&IGNORE); + +newcommand("br", 1, sub { return "
"; }); + +newcommand("label",1, \&label); +newcommand("ref",1, \&ref); + +newcommand("MLRISC", 0, sub { return "MLRISC"; }); +newcommand("LaTeX", 0, sub { return "LATEX"; }); +newcommand("MLTeX", 0, sub { return "MLTEX"; }); + + +newcommand("caption", 1, \&caption); +newenvironment("Figure", 0, \&beginFigure, \&endFigure); +newenvironment("wrapfigure", 2, \&beginwrapfigure, \&endwrapfigure); + +sub newdef { return "$_[0]"; } +sub newtype { return "$_[0]"; } + +newcommand("newdef",1,\&newdef); +newcommand("newtype",1,\&newtype); + +newcommand("cite", 1, sub { return "[$_[0]]"; }); +newcommand("tableofcontents", 0, \&tableofcontents); + +############################################################################ +# +# Import style file +# +############################################################################ +require "mltex.thm"; + +############################################################################ +# +# Setup the patterns for command and environment parsing +# +############################################################################ +$COMMAND0 = "@COMMAND0"; $COMMAND0 =~ s/ /|/g; +$COMMAND1 = "@COMMAND1"; $COMMAND1 =~ s/ /|/g; +$COMMAND2 = "@COMMAND2"; $COMMAND2 =~ s/ /|/g; +$COMMAND3 = "@COMMAND3"; $COMMAND3 =~ s/ /|/g; +$COMMAND0 = 'XXXXXXXXXXXXXXXXX' if ($COMMAND0 eq ""); +$COMMAND1 = 'XXXXXXXXXXXXXXXXX' if ($COMMAND1 eq ""); +$COMMAND2 = 'XXXXXXXXXXXXXXXXX' if ($COMMAND2 eq ""); +$COMMAND3 = 'XXXXXXXXXXXXXXXXX' if ($COMMAND3 eq ""); +$ENV0 = "@ENV0"; $ENV0 =~ s/ /|/g; +$ENV1 = "@ENV1"; $ENV1 =~ s/ /|/g; +$ENV2 = "@ENV2"; $ENV2 =~ s/ /|/g; +$ENV0 = 'XXXXXXXXXXXXXXXXX' if ($ENV0 eq ""); +$ENV1 = 'XXXXXXXXXXXXXXXXX' if ($ENV1 eq ""); +$ENV2 = 'XXXXXXXXXXXXXXXXX' if ($ENV2 eq ""); + +#print "COMMAND0 = $COMMAND0\n"; +#print "COMMAND1 = $COMMAND1\n"; +#print "COMMAND2 = $COMMAND2\n"; +#print "COMMAND3 = $COMMAND3\n"; +#print "ENV0 = $ENV0\n"; +#print "ENV1 = $ENV1\n"; +#print "ENV2 = $ENV2\n"; + +############################################################################ +# +# Indexing and Table of Contents generation. +# +############################################################################ +sub truncentry +{ my($entry,$max_len) = @_; + if ($max_len > 0 && length($entry) >= $max_len) + { $entry = substr($entry, 0, $max_len-1) . "..."; + } + return $entry; +} + +############################################################################ +# +# Make a table of contents +# +############################################################################ +sub makeTOC +{ my($type, # local or global + $html, # extra html parameters to the table + $max_len, # maximal length of each entry + $highlight, # which entry to highlight + @TOC # the entries + ) = @_; + + return "" if $#TOC < 0; + + my($toc) = ""; + my($labelprefix) = ""; + if ($type eq "global") { $labelprefix = "section:"; } + + $toc = <<"END"; + + + +
+END + + for $entry (@TOC) + { my($name) = $entry->[0]; + my($level) = $entry->[1]; + my($label) = $LINKLABEL{$labelprefix . $name}; + my($text) = truncentry($name,$max_len); + + # Major section heading + if ($level == 0) + { + $toc .= <<"END"; +
+ +
+ $text + +
+END + } else + # Normal extries + { + my($indent) = "-" x ($level - 2); + + if ($name eq $highlight) + { $text = "$text"; } + + if ($type eq "local") + { $text = "$text"; + } else + { $text = "$text"; + } + + $toc .= <<"END"; + $indent$text
+END + } + } + + $toc .= <<"END"; +
+ +END + return $toc; +} +############################################################################ +# +# Generate the output +# +############################################################################ + +sub writeSection +{ my($filename,$TITLE,$localtoc,@TEXT) = @_; + my($outfile) = replaceSuffix(basename($filename), "tex", "html"); + my($toc) = ""; + + print STDERR "[Generating $outfile]\n"; + + # Global table of contents + if ($GENERATE_TOC) + { $toc = + makeTOC("global", + "border=0 width=$GLOBAL_TOC_WIDTH bgcolor=\"$TOC_BACKGROUND_COLOR\"", + $MAX_GLOBAL_TOC_ENTRY_LENGTH, $TITLE, @GLOBALTOC); + $toc = <<"END"; + + $toc + +END + } + + open(OUTFILE,">$outfile") || die("$!: $outfile"); + + $header = header($TITLE); + $footer = footer($outfile); + $maintext = resolveReferences("@TEXT"); + $TOTAL_WIDTH = $TEXT_WIDTH + $GLOBAL_TOC_WIDTH; + + print OUTFILE <<"END"; +$header + + + $TITLE + + + + + $toc + + + +
+

$TITLE

+
+ $localtoc + $maintext + $footer +
+ + +END + close OUTFILE; +} + +############################################################################ +# +# Write out the main page +# +############################################################################ +sub writeDocument +{ my($filename) = @_; + my($outfile) = replaceSuffix(basename($filename), "tex", "html"); + + print STDERR "[Generating $outfile]\n"; + + my($toc) = makeTOC("global","border=0 width=$GLOBAL_TOC_WIDTH", + $MAX_GLOBAL_TOC_ENTRY_LENGTH, "", @GLOBALTOC); + + open(OUTFILE,">$outfile") || die("$!: $outfile"); + my($header) = header($DOCUMENT_TITLE); + my($footer) = footer($outfile); + + print OUTFILE <<"END"; +$header + + + $DOCUMENT_TITLE + + + + + + + +
$toc +

$DOCUMENT_TITLE

+
+
+ $DOCUMENT_AUTHOR +
+
+ Abstract +
+
+ $DOCUMENT_ABSTRACT +
+ $footer +
+ + + + + +END + close OUTFILE; +} + +############################################################################ +# +# Initialization +# +############################################################################ +sub init() +{ + $MODE = "latex"; + @MODE_STACK=($MODE); + @tabular_stack=(); + + $LINE_NO=0; + @TEXT=(); + @LOCALTOC=(); + $LABELCOUNTER="link0000"; +} + +############################################################################ +# +# Main loop for processing a section +# +############################################################################ +sub processSection +{ my ($filename) = @_; + + if (! ($filename =~/\.tex$/)) + { die("$filename must end in .tex"); + } + open(INFILE,$filename) || die("$!: $filename"); + + init(); + + local($LINE_NO) = 0; + + while () + { + $LINE_NO++; + + # Verbatim mode handling + if ($MODE eq "verbatim") + { if (s|\\end{verbatim}|$ENDPRE|) { leaveMode("verbatim"); } + else + { s//>/g; + } + push @TEXT, $_; + next; + } + + if ($MODE eq "alltt") + { if (s|\\end{alltt}|$ENDALLTT|) { leaveMode("alltt"); }; + push @TEXT, $_; + next; + } + + $_ = escape($_) if $MODE eq "sml"; + + # \verb + # Must match the mimimum number of times + s|\\verb(.)(.*?)\1|\2|g if ($MODE ne "verbatim"); + + # paragraph + if ($MODE eq "latex") + { if (/^\s+$/) + { if (! $parbreak) { $_ = "

\n"; $parbreak=1; } + } else + { $parbreak = 0; } + } + + # special formatting environments + if (s|\\begin{SML}|$BEGINPRE|) { enterMode("sml"); }; + if (s|\\end{SML}|$ENDPRE|) { leaveMode("sml"); }; + if (s|\\begin{code}|$BEGINPRE|) { enterMode("code"); }; + if (s|\\end{code}|$ENDPRE|) { leaveMode("code"); }; + if (s|\\begin{verbatim}|$BEGINPRE|) { enterMode("verbatim"); }; + if (s|\\begin{alltt}|$BEGINALLTT|) { enterMode("alltt"); }; + + # method + if (s|\\begin{methods}|

|) { enterMode("methods"); }; + if (s|\\end{methods}|
|) { leaveMode("methods"); }; + s|\&|
|g if $MODE eq "methods"; + s|\\\\|
|g if $MODE eq "methods"; + + # Itemize + s|\\item\[([^\]]*)\]|
\1
|; + s|\\item|
  • |; + + # Tables + s|\\begin{tabular}{($PARAM)}|&begintab($1,"")|oge; + s|\\end{tabular}|&endtab()|oge; + + s|\\begin{Table}{($PARAM)}{($PARAM)}|&begintab($1,$2)|oge; + s|\\end{Table}|&endtab()|oge; + + s|\&|&addtab()|oge if $MODE eq "tabular"; + s|\\\\|&tabnewline()|oge if $MODE eq "tabular"; + s|\\\\|&newline()|oge if $MODE eq "latex"; + + # SML mode handling + s|\\sml{($PARAM)}|&sml($1)|oge; + s|\\code{($PARAM)}|\1|g; + if ($MODE eq "sml") { $_ = smldisplay($_); } + + # Math mode handling + if ($MODE ne "sml") { s|\$($MATHPARAM)\$|&math($1)|eg; } + s|\\\(($MATHPARAM2)\\\)|&math($1)|oeg; + if (/\\\[\s*\S|\S\s*\\\[/) + { error("please put \\[ on a separate line by itself"); } + if (/^\s*\S+\s*\\\]\s*$/) + { error("please put \\] on a separate line by itself"); } + if ($MODE eq "displaymath") { $_ = math($_); } + if ($MODE eq "eqnarray*") { $_ = math($_); } + if ($MODE eq "eqnarray") { $_ = math($_); } + + if (s|\\\[\s*$|
    |) { enterMode("displaymath"); } + if (s|\\\]|
    |) { leaveMode("displaymath"); } + if (s|\\begin{eqnarray\*}|
    |) { enterMode("eqnarray*"); } + if (s|\\end{eqnarray\*}|
    |) { leaveMode("eqnarray*"); } + if (s|\\begin{eqnarray}|
    |) { enterMode("eqnarray"); } + if (s|\\end{eqnarray}|
    |) { leaveMode("eqnarray"); } + + # How to handle environment + s|~?\\begin{($ENV0)}|&beginenv($1)|oge; + s|~?\\begin{($ENV1)}{($PARAM)}|&beginenv($1,$2)|oge; + s|~?\\begin{($ENV2)}{($PARAM)}{($PARAM)}|&beginenv($1,$2,$3)|oge; + s|~?\\end{($PARAM)}|&endenv($1)|oge; + + # How to handle commands + s/~?\\($COMMAND0)(\{\}|\b)/&command($1)/oge; + s|~?\\($COMMAND1){($PARAM)}|&command($1,$2)|oge; + s|~?\\($COMMAND2){($PARAM)}{($PARAM)}|&command($1,$2,$3)|oge; + s|~?\\($COMMAND3){($PARAM)}{($PARAM)}{($PARAM)}|&command($1,$2,$3,$4)|oge; + + # Math mode stuff + s|\\_|_|g; + s|\\{|{|g; + s|\\}|}|g; + + # Indentation + s|\\noindent ||g; + + # + # Not all browers can handle the math stuff yet. Do this in the mean time. + # + stupidBrowserMath(); + + push @TEXT, $_; + } + close INFILE; + return ($PAGE_TITLE, @TEXT); + +} + +############################################################################ +# +# Main loop for processing a document +# +############################################################################ +sub processDocument +{ my($filename) = @_; + my($line, $output); + + globalInit(); + + $MODE = "latex"; + @MODE_STACK=($MODE); + + $PATHNAME = ""; + if ($filename =~ /^(.*\/)[^\/]*$/) { $PATHNAME = $1; } + + $LINE_NO=0; + $FILENAME=$filename; + + open (DOCUMENT, $filename) || die("$! $filename"); + while ($_ = ) + { $LINE_NO++; + s|%.*$||; #skip comments + + s|\\\\|&newline()|oge if $MODE eq "latex"; + + # How to handle environment + s|~?\\begin{($ENV0)}|&beginenv($1)|oge; + s|~?\\begin{($ENV1)}{($PARAM)}|&beginenv($1,$2)|oge; + s|~?\\begin{($ENV2)}{($PARAM)}{($PARAM)}|&beginenv($1,$2,$3)|oge; + s|~?\\end{($PARAM)}|&endenv($1)|oge; + + # How to handle commands + s/~?\\($COMMAND0)(\{\}|\b)/&command($1)/oge; + s|~?\\($COMMAND1){($PARAM)}|&command($1,$2)|oge; + s|~?\\($COMMAND2){($PARAM)}{($PARAM)}|&command($1,$2,$3)|oge; + s|~?\\($COMMAND3){($PARAM)}{($PARAM)}{($PARAM)}|&command($1,$2,$3,$4)|oge; + + push @TEXT, $_; + } + close DOCUMENT; + + # Write out the sections + foreach $file (keys %SECTION_TEXT) + { my($output) = $SECTION_TEXT{$file}; + my($title, $localtoc, @text) = @{$output}; + writeSection($file, $title, $localtoc, @text); + } + + # Write out the main page + writeDocument($filename); +} + +#main +foreach $file (@ARGV) +{ processDocument($file) +} + + diff --git a/MLRISC/Doc/latex/C6.tex b/MLRISC/Doc/latex/C6.tex new file mode 100644 index 0000000..66d7790 --- /dev/null +++ b/MLRISC/Doc/latex/C6.tex @@ -0,0 +1,3 @@ +\section{The TI C6x Back End} + +No documentation yet. diff --git a/MLRISC/Doc/latex/ILP.tex b/MLRISC/Doc/latex/ILP.tex new file mode 100644 index 0000000..c860e4a --- /dev/null +++ b/MLRISC/Doc/latex/ILP.tex @@ -0,0 +1,23 @@ +\section{ILP Optimizations} +\subsection{Introduction} + This section is under construction. A new scheduler framework +for superscalars that ties into the machine description language +is currently being developed. +\subsection{The ILP ToolBox} +\subsubsection{List Scheduler} +\subsubsection{Ranking Algorithms} + Some more complex ranking algorithms (than say critical path) have been +implemented. These are: +\begin{itemize} + \item The algorithm of + \mlrischref{scheduling/PalemSimons.sig}{Palem and Simons} + which appeared in TOPLAS '93. This algorithm + computes the modified deadlines of a set instructions, with + precedence, latency, and deadlines constraints. + + \item The algorithm of + \mlrischref{scheduling/LeungPalemPnueli.sig}{Leung, Palem, and Pnueli} + which appeared in PACT '98. + This algorithm computes the modified deadlines of a set of instructions, + with precedence, latency, release-times and deadline constraints. +\end{itemize} diff --git a/MLRISC/Doc/latex/INTRO.tex b/MLRISC/Doc/latex/INTRO.tex new file mode 100644 index 0000000..406ff4d --- /dev/null +++ b/MLRISC/Doc/latex/INTRO.tex @@ -0,0 +1,48 @@ +\section{MLRISC} + \begin{center} + \begin{Bold} + A framework for retargetable and optimizing compiler back ends + \end{Bold} + \end{center} +\begin{center} + \begin{tabular}{cc} + \begin{address} + \href{mailto:george@research.bell-labs.com}{Lal George} + \end{address} & + \begin{address} + \href{mailto:leunga@cs.nyu.edu}{ Allen Leung} + \end{address} \\ + Bell Labs & New York University \\ + \end{tabular} +\end{center} + +\begin{center} +\image{MLRISC logo}{pictures/png/uncol.png}{align="middle"} + +\begin{Italics} + \href{contributors.html}{Contributors} +\end{Italics} +\end{center} + +Writing native code generators for modern processors is a significant +investment. Unfortunately it is difficult +to reuse this investment for other architectures, and even more +difficult to reuse for other source language compilers. MLRISC is +a customizable optimizing back-end written in +\externhref{http://cm.bell-labs.com/cm/cs/what/smlnj/sml.html}{Standard ML} +and has been successfully retargeted to multiple architectures. +MLRISC deals elegantly with the special requirements imposed by the +execution model of different high-level, typed languages, by allowing +many components of the system to be customized to fit the source language +semantics and runtime system requirements. + +The \begin{color}{#aa0000}Overview\end{color} pages on the left provide +an introduction the MLRISC system, mostly from the client's perspective, +while the \begin{color}{#aa0000}System\end{color} +pages give a more detailed look at the +innards, and are of interest to MLRISC hackers. As usual, development of +the system has outpaced the documentation process substantally; thus +the latter part of the document is incomplete but it may still be useful. + +These pages are also available in +\href{../latex/mlrisc.ps}{tech report} form. diff --git a/MLRISC/Doc/latex/Makefile b/MLRISC/Doc/latex/Makefile new file mode 100644 index 0000000..9e3246e --- /dev/null +++ b/MLRISC/Doc/latex/Makefile @@ -0,0 +1,35 @@ +# +# This file compiles the documentation +# + +TEX= sml.tex mlrisc.tex mltexdoc.tex +PDF= $(TEX:.tex=.pdf) + +all: pictures $(PDF) + +pictures: + (cd ../pictures; make) + +pdf: $(PDF) + +%.pdf: + latexmk -bibtex -ps- -pdf $(@:.pdf=.tex) + +mlrisc.pdf: annotations.tex graphics.tex instructions.tex mltree.tex \ + delayslots.tex cells.tex cluster.tex constants.tex \ + pseudo-ops.tex streams.tex labelexp.tex labels.tex regions.tex \ + regmap.tex graphs.tex mlrisc-ir.tex compiler-graphs.tex \ + SSA.tex VLIW.tex ra.tex ILP.tex mlrisc-arch.tex \ + future-work.tex asm.tex mc.tex instrsel.tex \ + availability.tex gc.tex mlrisc-gen.tex contributions.tex \ + systems.tex mlrisc-graphics.tex contributors.tex \ + requirements.tex INTRO.tex problem.tex mlrisc-compiler.tex \ + mlrisc-ir-rep.tex backend-opt.tex sys-integration.tex \ + optimizations.tex span-dep.tex mlrisc-md.tex line-counts.tex \ + sparc.tex alpha.tex x86.tex ppc.tex mips.tex hppa.tex C6.tex \ + mltree-ext.tex mltree-util.tex + +mltexdoc.pdf: mltex.tex + +clean: + rm -f $(PDF) *.aux *.log *.bbl *.blg diff --git a/MLRISC/Doc/latex/SSA.tex b/MLRISC/Doc/latex/SSA.tex new file mode 100644 index 0000000..f181219 --- /dev/null +++ b/MLRISC/Doc/latex/SSA.tex @@ -0,0 +1,21 @@ +\section{SSA Optimizations}\label{sec:ssa} + +All SSA optimization modules satisfy the signature +\mlrischref{SSA/ssa-optimization.sig}{SSA\_OPTIMIZATION}, +which is defined as: +\begin{SML} +signature SSA_OPTIMIZATION = sig + structure SSA : SSA + + val optimize : SSA.ssa -> SSA.ssa +end +\end{SML} + +The following SSA based scalar optimizations have been implemented in MLRISC. +\begin{itemize} +\item \mlrischref{SSA/ssa-dead-code-elim.sml}{Dead code elimination} +\item \mlrischref{SSA/ssa-gvn.sml}{Global value numbering, constant folding, algebraic simplication} +\item \mlrischref{SSA/ssa-gcm.sml}{Global code motion} +\item \mlrischref{SSA/ssa-cond-const-prop.sml}{Conditional constant propagation} +\item \mlrischref{SSA/ssa-op-str-red.sml}{Strength reduction} +\end{itemize} diff --git a/MLRISC/Doc/latex/VLIW.tex b/MLRISC/Doc/latex/VLIW.tex new file mode 100644 index 0000000..36761c8 --- /dev/null +++ b/MLRISC/Doc/latex/VLIW.tex @@ -0,0 +1,21 @@ +\section{Optimizations for VLIW/EPIC Architectures} + +\subsection{Overview} +Many newer architectures such as the upcoming IA-64 and the +DSPs such as the C6 are VLIW or so called EPIC machines. +These architectures depends on the compiler to +extract instruction level parallelism (\newdef{ILP}) +and data level parallelism (\newdef{DLP}). + +Optimizations for these architectures include: +\begin{itemize} + \item Hyperblock construction + \item Predication and predicate analysis + \item Hyperblock scheduling + \item Modulo scheduling +\end{itemize} + +\subsection{Hyperblocks} +\subsection{Predicate Analysis} +\subsection{Hyperblock Scheduling} +\subsection{Modulo Scheduling} diff --git a/MLRISC/Doc/latex/alpha.tex b/MLRISC/Doc/latex/alpha.tex new file mode 100644 index 0000000..48a54ea --- /dev/null +++ b/MLRISC/Doc/latex/alpha.tex @@ -0,0 +1,97 @@ +\section{The Alpha Back End} + +\subsection{Trap Shadows, Floating Exceptions, and Denormalized Numbers on the DEC Alpha} + + \emph{By Andrew W. Appel and Lal George, Nov 28, 1995} + + See section 4.7.5.1 of the \emph{Alpha Architecture Reference Manual}. + + The Alpha has imprecise exceptions, meaning that if a floating + point instruction raises an IEEE exception, the exception may + not interrupt the processor until several successive instructions have + completed. ML, on the other hand, may want a "precise" model + of floating point exceptions. + + Furthermore, the Alpha hardware does not support denormalized numbers + (for ``gradual underflow''). Instead, underflow always rounds to zero. + However, each floating operation (add, mult, etc.) has a trapping + variant that will raise an exception (imprecisely, of course) on + underflow; in that case, the instruction will produce a zero result + AND an exception will occur. In fact, there are several variants + of each instruction; three variants of MULT are: +\begin{description} + \item[MULT s1,s2,d] truncate denormalized result to zero; no exception + \item[MULT/U s1,s2,d] truncate denormalized result to zero; raise UNDERFLOW + \item[MULT/SU s1,s2,d] software completion, producing denormalized result +\end{description} + + The hardware treats the \verb|MULT/U| and \verb|MULT/SU| + instructions identically, + truncating a denormalized result to zero and raising the UNDERFLOW + exception. But the operating system, on an UNDERFLOW exception, + examines the faulting instruction to see if it's an \verb|/SU| + form, and if so, + recalculates \verb|s1*s2|, puts the right answer in \verb|d|, and continues, + all without invoking the user's signal handler. + + Because most machines compute with denormalized numbers in hardware, + to maximize portability of SML programs, we use the \verb|MULT/SU| form. + (and \verb|ADD/SU|, \verb|SUB/SU|, etc.) But to use this form successfully, + certain rules have to be followed. Basically, d cannot be the same + register as s1 or s2, because the opsys needs to be able to + recalculate the operation using the original contents of s1 and s2, + and the MULT/SU instruction will overwrite d even if it traps. + + More generally, we may want to have a sequence of floating-point + instructions. The rules for such a sequence are: + + 1. The sequence should end with a \verb|TRAPB| (trap barrier) instruction. + (This could be relaxed somewhat, but certainly a \verb|TRAPB| would + be a good idea sometime before the next branch instruction or + update of an ML reference variable, or any other ML side effect.) + 2. No instruction in the sequence should destroy any operand of itself + or of any previous instruction in the sequence. + 3. No two instructions in the sequence should write the same destination + register. + + We can achieve these conditions by the following trick in the + Alpha code generator. Each instruction in the sequence will write + to a different temporary; this is guaranteed by the translation from + ML-RISC. At the beginning of the sequence, we will put a special + pseudo-instruction (we call it \verb|DEFFREG|) that ``defines'' + the destination + register of the arithmetic instruction. If there are $K$ arithmetic + instructions in the sequence, then we'll insert $K$ + \verb|DEFFREG| instructions + all at the beginning of the sequence. + Then, each arithop will not only ``define'' its destination temporary + but will ``use'' it as well. When all these instructions are fed to + the liveness analyzer, the resulting interference graph will then + have inteference edges satisfying conditions 2 and 3 above. + + Of course, \verb|DEFFREG| doesn't actually generate any code. In our model + of the Alpha, every instruction generates exactly 4 bytes of code + except the ``span-dependent'' ones. Therefore, we'll specify \verb|DEFFREG| + as a span-dependent instruction whose minimum and maximum sizes are zero. + + At the moment, we do not group arithmetic operations into sequences; + that is, each arithop will be preceded by a single \verb|DEFFREG| and + followed by a \verb|TRAPB|. To avoid the cost of all those \verb|TRAPB|'s, + we should improve this when we have time. Warning: Don't put more + than 31 instructions in the sequence, because they're all required + to write to different destination registers! + + What about multiple traps? For example, suppose a sequence of + instructions produces an Overflow and a Divide-by-Zero exception? + ML would like to know only about the earliest trap, but the hardware + will report \emph{BOTH} traps to the operating system. However, as long + as the rules above are followed (and the software-completion versions + of the arithmetic instructions are used), the operating system will + have enough information to know which instruction produced the + trap. It is very probable that the operating system will report \emph{ONLY} + the earlier trap to the user process, but I'm not sure. + + For a hint about what the operating system is doing in its own + trap-handler (with software completion), see section 6.3.2 of + ``\emph{OpenVMS Alpha Software}'' (Part II of the Alpha Architecture + Manual). This stuff should apply to Unix (OSF1) as well as VMS. diff --git a/MLRISC/Doc/latex/annotations.tex b/MLRISC/Doc/latex/annotations.tex new file mode 100644 index 0000000..4998fa0 --- /dev/null +++ b/MLRISC/Doc/latex/annotations.tex @@ -0,0 +1,33 @@ +\section{Annotations} + +\subsection{Overview} +A compiler front-end has to be propagate information to +the back-end. An optimization phase may have to leave behind information +at various places of the IR so that other phases can reuse such information. +MLRISC uses the \newdef{annotations} +mechanism for these functions. +Individual instructions, basic blocks, and flow graph edges, +can be attached one or more annotations. + +The basic MLRISC system understands many annotations. Some examples are: +\begin{description} + \item[COMMENT] + these can be used to attach comments. If attached to + an instruction, the assemblers will output + them as part of their assembly output. + \item[BRANCH\_PROB] + these can be attached to a branch instruction to indicate + the probability in which is it taken. + \item[EXECUTION\_FREQ] + these can be attached to a basic block to indicate + its expected execution frequency +\end{description} + +\subsection{Details} +The primitive annotations datatype is defined +to have this \mlrischref{library/annotations.sig}{signature}. +In addition, MLRISC predefined a few primitive annotations that are +recognized by the core system. This signature is +\mlrischref{instructions/mlriscAnnotations.sig}{MLRISC\_ANNOTATIONS}. +More detailed documentation can be found in this +\href{http://cm.bell-labs.com/cm/cs/what/smlnj/compiler-notes/annotations.ps}{paper}. diff --git a/MLRISC/Doc/latex/asm.tex b/MLRISC/Doc/latex/asm.tex new file mode 100644 index 0000000..58ce07a --- /dev/null +++ b/MLRISC/Doc/latex/asm.tex @@ -0,0 +1,60 @@ +\section{Assemblers} + +\subsubsection{Overview} +Assemblers in MLRISC satisfy the signature +\mlrischref{emit/instruction-emitter.sig}{INSTRUCTION\_EMITTER}, +which is defined as: +\begin{SML} +signature INSTRUCTION_EMITTER = +sig + structure I : \href{instructions.html}{INSTRUCTIONS} + structure C : \href{cells.html}{CELLS} + structure S : \href{streams.html}{INSTRUCTION_STREAM} + structure P : \href{pseudo-ops.html}{PSEUDO_OPS} + sharing I.C = C + sharing S.P = P + + val makeStream : Annotations.annotations -> + ((int -> int) -> I.instruction -> unit, + unit,'b,'c,'d,'e) S.stream +end +\end{SML} + +The function \sml{makeStream} returns an instruction stream. +By default the output is bound to the stream \sml{AsmStream.asmOutStream} +defined in the structure +\mlrischref{emit/asmStream.sml}{AsmStream} at creation time. + +The structure \sml{AsmStream} satisfy the following signature. +\begin{SML} +signature ASM_STREAM = sig + val asmOutStream : TextIO.outstream ref + val withStream : TextIO.outstream -> ('a -> 'b) -> 'a -> 'b +end +\end{SML} +\subsubsection{Redirecting the Output} +It is possible to redirect the output of an instruction stream. +For example, the following statement +\begin{SML} + val asm = makeStream [] +\end{SML} +binds the output of \sml{asm} to \sml{AsmStream.asmOutStream}, which +by default is just \sml{TextIO.stdOut}. On the other hand, the +statement +\begin{SML} + val asm = AsmStream.withStream mystream makeStream [] +\end{SML} +binds the output of asm to \sml{mystream}. + +\subsubsection{More Details} + +Assemblers are automatically generated by the +\href{mlrisc-md.html}{MDGen} tool. Some specific generated +assemblers are listed below: +\begin{enumerate} + \item \mlrischref{sparc/emit/sparcAsm.sml}{Sparc} + \item \mlrischref{hppa/emit/hppaAsm.sml}{Hppa} + \item \mlrischref{alpha/emit/alphaAsm.sml}{Alpha} + \item \mlrischref{ppc/emit/ppcAsm.sml}{Power PC} + \item \mlrischref{x86/emit/x86Asm.sml}{X86} +\end{enumerate} diff --git a/MLRISC/Doc/latex/availability.tex b/MLRISC/Doc/latex/availability.tex new file mode 100644 index 0000000..9249c02 --- /dev/null +++ b/MLRISC/Doc/latex/availability.tex @@ -0,0 +1,33 @@ +\section{How to Obtain MLRISC} + +There are a few ways to obtain the MLRISC system. +\begin{enumerate} +\item +An old version of MLRISC is available from +\externhref{http://cm.bell-labs.com/cm/cs/what/smlnj/doc/MLRISC/quick-tour/index.html}{this link}. +This version is stable but very out-dated, and does +not contain the most up-to-date features. +\item +New experimental versions are available from the +\externhref{http://cm.bell-labs.com/cm/cs/what/smlnj/software.html}{SML/NJ software page} as part of the SML/NJ compiler releases. +These versions are relative stable, but +do not include the entire MLRISC source tree. +\item \href{mailto:leunga@cs.nyu.edu}{Allen} +keeps an up-to-date version of MLRISC at NYU for private use. +This version includes everything but is under constant changes, so beware! +To access the CVS repository, set your \sml{CVSROOT} environment variable +to +\begin{verbatim} + :pserver:mlrisc@react-ilp.cs.nyu.edu:/home/leunga/mlrisc +\end{verbatim} +and checkout the repository using +\begin{verbatim} + cvs co MLRISC++ +\end{verbatim} +The password to use is \sml{mlrisc}. +\item +Generally speaking, you can get the latest version of MLRISC by asking +\href{mailto:george@research.bell-labs.com}{Lal}. +\end{enumerate} +MLRISC is \newdef{free, open source} software, and is released under the +\href{http://cm.bell-labs.com/cm/cs/what/smlnj/license.html}{SML/NJ license}. diff --git a/MLRISC/Doc/latex/backend-opt.tex b/MLRISC/Doc/latex/backend-opt.tex new file mode 100644 index 0000000..d947242 --- /dev/null +++ b/MLRISC/Doc/latex/backend-opt.tex @@ -0,0 +1,15 @@ +\section{Back End Optimizations} + + Once MLRisc trees have been generated, they are passed into a module + that generates a flowgraph of target machine instructions. Again, + this module and all subsequent optimization phases have been + specialized to the front end. + \image{Back end optimizations}{pictures/png/optimization.png}{align=right} + Nearly all + instruction selection modules provided by MLRISC use a simple tree + pattern matching algorithm rather than the more heavy weight BURG + tools --- including the x86 \begin{color}{#580000} It is important to + emphasis that all optimizations are performed on the flowgraph of + target machine instructions and \emph{not} MLRisc + immediate IR. \end{color} There is complete flexibility in the order, + and nature of the optimizations performed. diff --git a/MLRISC/Doc/latex/cells.tex b/MLRISC/Doc/latex/cells.tex new file mode 100644 index 0000000..7ea0ce9 --- /dev/null +++ b/MLRISC/Doc/latex/cells.tex @@ -0,0 +1,146 @@ +\section{Cells} + +MLRISC uses +the \mlrischref{instructions/cells.sig}{CELLS} +interface to define all readable/writable resources +in a machine architecture, or \emph{cells} +The types defined herein are: +\begin{itemize} + \item \sml{cellkind} -- different classes of cells are assigned + difference cellkinds. The following cellkinds should be present + \begin{itemize} + \item \sml{GP} -- general purpose registers. + \item \sml{FP} -- floating point registers. + \item \sml{CC} -- condition code registers. + \end{itemize} + In addition, the cellkinds \sml{MEM} and \sml{CTRL} + should also be defined. These are used for representing + memory based data dependence and control dependence. + \begin{itemize} + \item \sml{MEM} -- memory + \item \sml{CTRL} -- control dependence + \end{itemize} + \item \sml{regmap} -- \href{regmap.html}{register map} + \item \sml{cellset} -- a cellset represent a set of cells. This + type can be used to denote live-in/live-out information. Cellsets are + implemented as immutable abstract types. +\end{itemize} + +These core definitions are defined in the following signature +\begin{SML} +signature \mlrischref{instructions/cells.sig}{CELLS\_BASIS} = +sig + eqtype cellkind + type cell = int + type regmap = cell Intmap.intmap + exception Cells + + val cellkinds : cellkind list + val cellkindToString : cellkind -> string + val firstPseudo : cell + val Reg : cellkind -> int -> cell + val GPReg : int -> cell + val FPReg : int -> cell + val cellRange : cellkind -> {low:int, high:int} + val newCell : cellkind -> 'a -> cell + val cellKind : cell -> cellkind + val updateCellKind : cell * cellkind -> unit + val numCell : cellkind -> unit -> int + val maxCell : unit -> cell + val newReg : 'a -> cell + val newFreg : 'a -> cell + val newVar : cell -> cell + val regmap : unit -> regmap + val lookup : regmap -> cell -> cell + val reset : unit -> unit +end +\end{SML} + +\begin{itemize} + \item\sml{cellkinds} -- this is a list of all the cellkinds defined in the +architecture + \item\sml{cellkindToString} -- this function maps a cellkind into its name + \item\sml{firstPseudo} -- MLRISC numbered physical resources + in the architecture from 0 to firstPseudo-1. + This is the first usable virtual register number. + \item\sml{Reg} -- This function maps the $i$th physical + resource of a particular cellkind to its internal encoding used by MLRISC. + Note that all resources in MLRISC are named uniquely. + \item\sml{GPReg} -- abbreviation for \sml{Reg GP} + \item\sml{FPReg} -- abbreviation for \sml{Reg FP} + \item \sml{cellRange} -- this returns a range \sml{{low, high}} + when given a cellkind, with denotes the range of physical resources + \item \sml{newCell} -- This function returns a new virtual register + of a particular cellkind. + \item \sml{newReg} -- abbreviation as \sml{newCell GP} + \item \sml{newFreg} -- abbreviation as \sml{newCell FP} + \item \sml{cellKind} -- When given a cell number, this returns its + cellkind. Note that this feature is not enabled by default. + \item \sml{updateCellKind} -- updates the cellkind of a cell. + \item \sml{numCell} -- returns the number of virtual cells allocated for one cellkind. + \item \sml{maxCell} -- returns the next virtual cell id. + \item \sml{newVar} -- given a cell id, return a new cell id of + the same cellkind. + \item \sml{regmap} -- This function returns a new empty regmap + \item \sml{lookup} -- This converts a regmap into a lookup function. + \item \sml{reset} -- This function resets all counters associated +with all virtual cells. +\end{itemize} + +\begin{SML} +signature CELLS = sig + include CELLS_BASIS + val GP : cellkind + val FP : cellkind + val CC : cellkind + val MEM : cellkind + val CTRL : cellkind + val toString : cellkind -> cell -> string + val stackptrR : cell + val asmTmpR : cell + val fasmTmp : cell + val zeroReg : cellkind -> cell option + + type cellset + + val empty : cellset + val addCell : cellkind -> cell * cellset -> cellset + val rmvCell : cellkind -> cell * cellset -> cellset + val addReg : cell * cellset -> cellset + val rmvReg : cell * cellset -> cellset + val addFreg : cell * cellset -> cellset + val rmvFreg : cell * cellset -> cellset + val getCell : cellkind -> cellset -> cell list + val updateCell : cellkind -> cellset * cell list -> cellset + + val cellsetToString : cellset -> string + val cellsetToString' : (cell -> cell) -> cellset -> string + + val cellsetToCells : cellset -> cell list +end +\end{SML} + +\begin{itemize} + \item \sml{toString} -- convert a cell id of a certain cellkind into +its assembly name. + \item \sml{stackptrR} -- the cell id of the stack pointer register. + \item \sml{asmTmpR} -- the cell id of the assembly temporary + \item \sml{fasmTmp} -- the cell id of the floating point temporary + \item \sml{zeroReg} -- given the cellkind, returns the cell id of the + source that always hold the value of zero, if there is any. + \item \sml{empty} -- an empty cellset + \item \sml{addCell} -- inserts a cell into a cellset + \item \sml{rmvCell} -- remove a cell from a cellset + \item \sml{addReg} -- abbreviation for \sml{addCell GP} + \item \sml{rmvReg} -- abbreviation for \sml{rmvCell GP} + \item \sml{addFreg} -- abbreviation for \sml{addCell FP} + \item \sml{rmvFreg} -- abbreviation for \sml{rmvCell FP} + \item \sml{getCell} -- lookup all cells of a particular cellkind from +the cellset + \item \sml{updateCell} -- replace all cells of a particular cellkind +from the cellset. + \item \sml{cellsetToString} -- pretty print a cellset + \item \sml{cellsetToString'} -- pretty print a cellset, but first +apply a regmap function. + \item \sml{cellsetToCells} -- convert a cellset into list form. +\end{itemize} diff --git a/MLRISC/Doc/latex/cluster.tex b/MLRISC/Doc/latex/cluster.tex new file mode 100644 index 0000000..bb820f9 --- /dev/null +++ b/MLRISC/Doc/latex/cluster.tex @@ -0,0 +1,53 @@ +\section{Cluster} + +A \newdef{cluster} +represents a compilation unit in linearized form, +and contains information about the control flow, global annotations, +block and edge execution frequencies, and live-in/live-out information. + +Its signature is: +\begin{SML} +signature FLOWGRAPH = sig + structure C : \href{cells.html}{CELLS} + structure I : \href{instructions.html}{INSTRUCTIONS} + structure P : \href{pseudo-ops.html}{PSEUDO_OPS} + structure W : \href{freq.html}{FREQ} + sharing I.C = C + + datatype block = + PSEUDO of P.pseudo_op + | LABEL of Label.label + | BBLOCK of + \{ blknum : int, + freq : W.freq ref, + annotations : Annotations.annotations ref, + liveIn : C.cellset ref, + liveOut : C.cellset ref, + succ : edge list ref, + pred : edge list ref, + insns : I.instruction list ref + \} + | ENTRY of + \{blknum : int, freq : W.freq ref, succ : edge list ref\} + | EXIT of + \{blknum : int, freq : W.freq ref, pred : edge list ref\} + withtype edge = block * W.freq ref + + datatype cluster = + CLUSTER of \{ + blocks: block list, + entry : block, + exit : block, + regmap: C.regmap, + blkCounter : int ref, + annotations : Annotations.annotations ref + \} +end +\end{SML} + +Clusters are used in +\href{span-dep.html}{span dependency resolution}, +\href{delayslots.html}{delay slot filling}, +\href{asm.html}{assembly}, +and \href{mc.html}{machine code} +output, since these phases require the code laid out in linearized form. diff --git a/MLRISC/Doc/latex/compiler-graphs.tex b/MLRISC/Doc/latex/compiler-graphs.tex new file mode 100644 index 0000000..52322da --- /dev/null +++ b/MLRISC/Doc/latex/compiler-graphs.tex @@ -0,0 +1,434 @@ +\section{Basic Compiler Graphs} + +\subsection{Introduction} +In this section we describe the set of core compiler specific graphs and +algorithms implemented in MLRISC. +Mostly of these algorithms are parameterized with respect +to the actual intermediate representation, and as such they +do not provide many facilities that are provided by higher abstraction +layers, such as in \href{mlrisc-ir.html}{MLRISC IR}, +or in \href{SSA.html}{SSA}. + +\subsubsection{Dominator/Post-dominator Trees} +\newdef{Dominance} +is a fundamental concept in compiler optimizations. +Node $A$ $dominates$ $B$ +iff all paths from the start node +to $B$ intersects A. A dual notion is the concept of +$post-dominance$: +$A$ \newdef{post-dominates} $B$ iff all paths from $B$ to the stop node +intersects $A$. A (post-)dominator tree can be used +to summarize the dominance/post-dominance relationship. + +\begin{SML} + functor \mlrischref{ir/dominator.sml}{DominatorTree} + (GraphImpl : GRAPH_IMPLEMENTATION) : DOMINATOR_TREE +\end{SML} + The functor implements dominator analysis and +creates a dominator/post-dominator tree from a graph $G$. A dominator tree is implemented as a graph +with the following definition: +\begin{SML} + signature \mlrischref{ir/dominator.sig}{DOMINATOR_TREE} = sig + exception Dominator + datatype 'n dom_node = + DOM of \{ node : 'n, level : int, preorder : int, postorder : int \} + type ('n,'e,'g) dom_info + type ('n,'e,'g) dominator_tree = ('n dom_node,unit,('n,'e,'g) dom_info) graph + type ('n,'e,'g) postdominator_tree = ('n dom_node,unit,('n,'e,'g) dom_info) graph +\end{SML} + +We annotated each node in +a dominator tree with three extra fields of information, which +is useful for other algorithms: +\begin{itemize} + \item\sml{level} is the nesting level of the tree. The root + node has level 0, children of the root has level 1 and so on. + \item\sml{preorder} is the preorder numbering of a node + \item\sml{preorder} is the postorder numbering of a node. +\end{itemize} + +To create a dominator tree and a postdominator tree +from a graph, the following function should be called. +\begin{SML} + val dominator_trees : ('n,'e,'g) graph -> + ('n,'e,'g) dominator_tree * ('n,'e,'g) postdominator_tree +\end{SML} +We use the algorithm of Tarjan and Lengauer, which +runs in time $O(|V+E|\alpha(|V+E|))$ where $\alpha$ is the functional +inverse of the Ackermann function. + +To perform many common queries on a dominator tree, we first +call the function \sml{methods} to obtain a method object. +\begin{SML} + val methods : ('n,'e,'g) dominator_tree -> dominator_methods +\end{SML} + +The methods are packed into the following type: +\begin{SML} + type dominator_methods = + \{ dominates : node_id * node_id -> bool, + immediately_dominates : node_id * node_id -> bool, + strictly_dominates : node_id * node_id -> bool, + postdominates : node_id * node_id -> bool, + immediately_postdominates : node_id * node_id -> bool, + strictly_postdominates : node_id * node_id -> bool, + control_equivalent : node_id * node_id -> bool, + idom : node_id -> node_id, $(* ~1 if none *)$ + idoms : node_id -> node_id list, + doms : node_id -> node_id list, + ipdom : node_id -> node_id, $(* ~1 if none *)$ + ipdoms : node_id -> node_id list, + pdoms : node_id -> node_id list, + dom_lca : node_id * node_id -> node_id, + pdom_lca : node_id * node_id -> node_id, + dom_level : node_id -> int, + pdom_level : node_id -> int, + control_equivalent_partitions : unit -> node_id list list + \} +\end{SML} + +The query methods are as follows: +\begin{methods} + dominates($a,b$) & returns true iff $a$ dominates $b$ \\ + immediately\_dominates($a,b$) & returns true iff $a$ immediately dominates $b$ \\ + strictly\_dominates($a,b$) & returns true iff $a$ strictly dominates $b$ \\ + postdominates($a,b$) & returns true iff $a$ post-dominates $b$ \\ + immediately\_postdominates($a,b$) & returns true iff $a$ immediately post-dominates $b$ \\ + strictly\_postdominates($a,b$) & returns true iff $a$ strictly post-dominates $b$ \\ + control\_equivalent($a,b$) & + returns true iff $a$ dominates $b$ and vice versa \\ + idom($a$) & returns the immediate dominator of $a$, or $-1$ if none exists \\ + idoms($a$) & returns all nodes that $a$ immediately dominates \\ + doms($a$) & returns all nodes that $a$ dominates (including $a$ itself) \\ + ipdom($a$) & returns the immediate post-dominator of $a$, or $-1$ if none exists \\ + ipdoms($a$) & returns all nodes that $a$ immediately post-dominates \\ + pdoms($a$) & returns all nodes that $a$ post-dominates (including $a$ itself) \\ + dom\_lca($a,b$) & returns the least common ancestor of $a$ and $b$ in + the dominator tree \\ + pdom\_lca($a,b$) & returns the least common ancestor of $a$ and $b$ + in the post-dominator tree \\ + dom\_level($a$) & returns the nesting level of $a$ in the dominator tree \\ + pdom\_level($b$) & returns the nesting level of $a$ in the post-dominator + tree \\ + control\_equivalent\_partitions & partitions the graph into + a set of control equivalent nodes. +\end{methods} + +The methods \sml{dom_lca}, \sml{pdom_lca} and +\sml{control_equivalent_partitions} executes in $O(n)$ time, where +$n$ is the size of the dominator tree. The other methods run in $O(1)$ time. + +\subsubsection{Control Dependence Graph} +Given two nodes $A$ and $B$ in a control flow graph $G$, +we say that $B$ is \newdef{control dependent} on $A$ iff +\begin{itemize} + \item $B$ post-dominates a successor of $A$ + \item $B$ does not strictly post-dominates $A$ +\end{itemize} +Intuitively, $B$ is control dependent on $A$ means that +some path in the program that goes through $A$ can by-passed $B$, +and furthermore, $A$ is the point in which this divergence can occur. +Control dependence is used to various kinds of analysis and optimizations in +a compiler, such as code motion and global scheduling~\cite{bernstein-rodeh}. + +To build a control dependence graph, the functor +\sml{ControlDependenceGraph} can be used: +\begin{SML} + signature \mlrischref{ir/cdg.sig}{CONTROL_DEPENDENCE_GRAPH} = sig + type ('n,'e,'g) cdg = ('n,'e,'g) graph + + val control_dependence_graph : + ('e -> bool) -> + ('n,'e,'g) dominator_tree * + ('n,'e,'g) postdominator_tree -> + ('n,'e,'g) cdg + end + functor \mlrischref{ir/cdg.sml}{ControlDependenceGraph} + (structure Dom : DOMINATOR_TREE + structure GraphImpl : GRAPH_IMPLEMENTATION + ) : CONTROL_DEPENDENCE_GRAPH +\end{SML} +The control depedence graph is a subcomponent of the +program dependence graph commonly used in +modern compiler optimizations. + +\subsubsection{Dominance Frontiers} + +Many algorithms involving the notion of control dependence or dominance +can be rephrased in terms of \newdef{dominance frontiers}. +A node $A$ is in the dominance frontiers of $B$ iff +$B$ dominates a predecessor of $A$ but $B$ does not strictly-dominate $A$. +We denote this as $A \in DF(B)$. +The dual notion of \newdef{post-dominance frontiers} can be defined +analogously using the post-dominator tree\footnote{Control dependence +can be defined in terms of post-dominance frontiers.}. + +\begin{SML} + functor \mlrischref{ir/dominance-frontier.sml}{DominanceFrontiers}(Dom : DOMINATOR_TREE) : DOMINANCE_FRONTIERS +\end{SML} +The functor \sml{DominanceFrontiers} can be used to +compute all the dominance frontiers of all the nodes in a graph. +It has the following signature. + +\begin{SML} + signature \mlrischref{ir/dominance-frontier.sig}{DOMINANCE_FRONTIERS} = sig + structure Dom : DOMINATOR_TREE + type dominance_frontiers = node_id list array + val DFs : ('n,'e,'g) Dom.dominator_tree -> dominance_frontiers + end +\end{SML} + +\subsubsection{Iterated Dominance Frontiers} + +\newdef{Iterated dominance frontiers} (denoted as $DF^+$) are defined +as the least fixed point of iterating the operation $DF$. Formally, +define the dominance frontiers on a set $S$ as follows: +\[ + DF(S) \defas \Union_{A \in S} DF(A) +\] +Define iteration of $DF$, denoted as $DF^n$, as follows: +\begin{eqnarray*} + DF^1(S) & \defas & DF(S) \\ + DF^{n+1}(S) & \defas & DF(S \union DF^n(S)) \\ +\end{eqnarray*} +The iterated dominance frontiers $DF^+(S)$ on a set $S$ are defined as +the limit: +\[ + DF^+(S) \defas \lim_{n \to \infty} DF^n(S) +\] + +Iterated dominance frontiers of a set $S$ can be computed in +time $O(|S|+|V|+|E|)$ using the +algorithm by Sreedhar and Gao~\cite{linear-time-IDF}\footnote{ +In practice it is often sub-linear in $|V|+|E|$.}. + +\begin{SML} + functor \mlrischref{ir/djgraph.sml}{DJGraph}(Dom : DOMINATOR_TREE) : DJ_GRAPH +\end{SML} +The functor \sml{DJGraph} implements this algorithm. +It satisfies the signature below: +\begin{SML} + signature \mlrischref{ir/djgraph.sig}{DJ_GRAPH} = sig + structure Dom : DOMINATOR_TREE + type ('n,'e,'g) dj_graph = ('n,'e,'g) Dom.dominator_tree + val dj_graph : ('n,'e,'g) dj_graph -> + \{ DF : node_id -> node_id list, + IDF : node_id -> node_id list, + IDFs : node_id list -> node_id list + \} + end +\end{SML} +The function \sml{dj_graph} takes a dominator tree and returns +three query methods for computing dominance and iterated dominance frontiers. +Method \sml{DF} computes $DF(v)$ for a single node $v$. +Method \sml{IDF} computes the $DF^+(v)$, and method +\sml{IDFs} computes $DF^+(S)$ when given a set of node ids. +The dominator tree must not be updated while these operations +are being performed. + +Sreedhar's original algorithm is phrased in terms of the +DJ-graph, which is a fusion of the dominator tree +with its underlying flowgraph. Our variant operates on the +dominator tree and the flowgraph at the same time, without +building an intermediate data structure. + +Iterated dominance frontiers are used +in many algorithms that deal with the notion of dominance. +For example, our SSA construction algorithm uses iterated +dominance frontiers to identify confluent points in the program +where $phi$-functions are to be placed. + +\subsubsection{Loop Nesting Tree} + +A \newdef{natural loop} $L$ in a graph is a maximal +strongly connected component +such that all nodes in $L$ are dominated by a single node $h$, called +the \newdef{loop header}. Loops tend to form good optimization candidates +and consequently \newdef{loop detection} is an essential task in a compiler. +The functor +\begin{SML} + functor \mlrischref{ir/loop-structure.sml}{LoopStructure} + (structure GraphImpl : GRAPH_IMPLEMENTATION + structure Dom : DOMINATOR_TREE + ) : LOOP_STRUCTURE +\end{SML} +recognizes all natural loops in a graph and built a +\newdef{loop nesting tree} +that describes the loop nesting relationship between graphs. + +\begin{SML} + signature \mlrischref{ir/loop-structure.sig}{LOOP_STRUCTURE} = sig + structure Dom : DOMINATOR_TREE + datatype ('n,'e,'g) loop = + LOOP of \{ nesting : int, + header : node_id, + loop_nodes : node_id list, + backedges : 'e edge list, + exits : 'e edge list + \} + + type ('n,'e,'g) loop_info + type ('n,'e,'g) loop_structure = (('n,'e,'g) loop,unit, ('n,'e,'g) loop_info) graph + + val loop_structure : ('n,'e,'g) Dom.dominator_tree -> ('n,'e,'g) loop_structure + val nesting_level : ('n,'e,'g) loop_structure -> node_id array + val header : ('n,'e,'g) loop_structure -> node_id array + end +\end{SML} + +Our algorithm computes the loop nesting tree in time +$O((|V|+|E|)\alpha(|V|+|E|))$. +Each node in this tree represents a loop in the flowgraph, except the +root of the tree, which represents the entire graph. +Given a flowgraph $G$, the root +of the loop nesting tree is defined to be the sole vertex in +\sml{#entry} $G$. Other nodes in the tree +are indexed by the loop header node ids. + +Loop detection classifies each loop and for +each loop $L$, the following information is obtained: +\begin{itemize} + \item An integer \sml{nesting}. The root of the tree has nesting + depth 0. The top level loops have nesting depth 1, etc. + \item The node id of the loop \sml{header} $h$. + \item A set of \sml{loop_nodes}. Loop nodes are + nodes that are in the strongly connected + component $L$, but excluding the header $h$ + and all nodes that are part of any nested loops. + Thus all nodes are uniquely partitioned in header nodes and + loop nodes, and loop nodes are further partitioned into different + sets according to which headers they are immediately nested under. + \item A set of \sml{backedges}. A back-edge is an + edge that targets the header $h$ and originates from a loop node + in $L$. + \item A set of loop \sml{exits}. An exit-edge is an edge + that originates from a loop node within $L$ + targets a node outside of $L$. Note that this set does not include + any exit-edges contained in loops nested in $L$ but + target a node out of $L$. +\end{itemize} + +\subsubsection{Static Single Assignment} + +An SSA construction algorithm based on~\cite{SSA,Briggs-SSA,linear-time-IDF} +is implemented in the following functor: +\begin{SML} + functor \mlrischref{ir/ssa.sml}{StaticSingleAssignmentForm} + (Dom : DOMINATOR_TREE) : STATIC_SINGLE_ASSIGNMENT_FORM +\end{SML} + +SSA-based optimizations in MLRISC +are actually implemented on top of a +high-level SSA layer described in Section~\ref{sec:ssa}. +So it is not necessary to use this module directly. Nevertheless, +there can be situations in which this module can be specialized in other +ways; for example, in the construction of sparse evaluation graphs. + +\begin{SML} + signature \mlrischref{ir/ssa.sig}{STATIC_SINGLE_ASSIGNMENT_FORM} = sig + structure Dom : DOMINATOR_TREE + type var = int + type phi = var * var * var list $(* orig def/def/uses *)$ + type renamer = \{defs : var list, uses: var list\} -> + \{defs : var list, uses: var list\} + type copy = \{dst : var list, src: var list\} -> unit + + val compute_ssa : + ('n,'e,'g) Dom.dominator_tree -> + \{ max_var : var, + defs : 'n node -> var list, + is_live : var * int -> bool, + rename_var : var -> var, + rename_stmt : \{rename:renamer,copy:copy\} -> 'n node -> unit, + insert_phi : \{block : 'n node, + in_edges : 'e edge list, + phis : phi list + \} -> unit + \} -> unit + end +\end{SML} + +This module defines the function \sml{compute_ssa}, which +constructs an SSA graph. It requires +the following information from the client: +\begin{itemize} +\item A dominator tree of the flowgraph. +\item \sml{max_var} -- the maximum variable id (integer) that exists +in the flowgraph. All variables are assumed to be indexed by non-negative + integers. +\item \sml{defs}($X$) -- a function that returns $defs(X)$, +i.e.~the set of variable names defined in block $X$. +If a minimal SSA form is desired, this set should include all the definitions +in $X$. If a pruned SSA form is required, this set should +include only the set of names that are live-out in $X$. +\item \sml{is_live}($v,X$) -- a function that determines if +variable $v$ is live-in into block $X$. If not, a $\phi$-function will +not be placed in $X$. For example, to compute +the minimal-SSA form, this function should always return true. +\item \sml{rename_var}($v$) -- a function that returns a new +unique name for variable $v$. +\item \sml{rename_stmt} -- a function of type + \sml{{rename:renamer,copy:copy} -> 'n node -> unit} where +\begin{SML} + type renamer = \{defs : var list, uses: var list\} -> + \{defs : var list, uses: var list\} + type copy = \{dst : var list, src: var list\} -> unit +\end{SML} +Function \sml{rename_stmt} is called for each block +in the flowgraph in the order of the dominator tree, and +is responsible for renaming all the variables in $X$ by +calling the functions \sml{renamer} or \sml{copy}. +Function \sml{renamer} renames all definitions and uses of +a statement, while function \sml{copy} renames +of a set of parallel assignments +\item \sml{insert_phi}($X$,$es$,$phis$) -- + a function that inserts a set of + $\phi$-definitions $phis$ in block $X$, where $es$ + is the list of control flow edges that merge into block $X$. +\end{itemize} + +\subsubsection{IDEFS/IUSE sets} +Reif and Tarjan define the following useful notions for +computing approximate birth-points for expressions, which in turn +can be used to drive other optimizations. +Given a node $X$, let $idom(X)$ denote the immediate dominator of $X$. +Let $def(X)$ ($use(X)$) denote all the definitions (uses) in $X$. +Given a path $p \equiv v_1\ldots v_n$, define $def(p)$ ($use(p)$) as +\begin{eqnarray*} + def(v_1\ldots v_n) & \equiv &\union_{i \in 1 \ldots n} def(v_i) \\ + use(v_1\ldots v_n) & \equiv &\union_{i \in 1 \ldots n} use(v_i) +\end{eqnarray*} + +Let $P(X)$ denotes all the paths from $idom(X)$ to $X$ +that does not cross $idom(X)$ internally. Then define +$idef(X)$ ($iuse(X)$) as: +\begin{eqnarray*} + idef(X) & \equiv & \Union_{idom(X) v_1 \ldots v_n X \in P(X)} + def(v_1\ldots v_n) \\ + iuse(X) & \equiv & \Union_{idom(X) v_1 \ldots v_n X \in P(X)} + use(v_1\ldots v_n) +\end{eqnarray*} +The sets $ipostdef(X)$ and $ipostuse(X)$ are defined analogously +using the postdominator tree. + +\begin{SML} + signature \mlrischref{ir/idefs2.sig}{IDEFS} = sig + type var = int + val compute_idefs : + \{def_use : 'n Graph.node -> var list * var list, + cfg : ('n,'e,'g) Graph.graph + \} -> + \{ idefuse : unit -> (RegSet.regset * RegSet.regset) Array.array, + ipostdefuse : unit -> (RegSet.regset * RegSet.regset) Array.array + \} + end + structure \mlrischref{ir/idefs2.sml}{IDefs} : IDEFS +\end{SML} +Structure \sml{IDefs} implements the function +\sml{comput_idefs} for computing +the $idef$, $iuse$, $ipostdef$ and $ipostuse$ sets of a control flow +graph. It takes as arguments a flowgraph and a function \sml{def_use}, which +takes a graph node and returns the def/use sets of the node. +It returns two functions \sml{idefuse} and \sml{ipostdefuse} which +compute the $idef/iuse$ and $ipostdef/ipostuse$ sets. These sets +are returned as arrays indexed by node ids. diff --git a/MLRISC/Doc/latex/constants.tex b/MLRISC/Doc/latex/constants.tex new file mode 100644 index 0000000..2d55833 --- /dev/null +++ b/MLRISC/Doc/latex/constants.tex @@ -0,0 +1,36 @@ +\section{Client Defined Constants} +\subsubsection{Introduction} +MLRISC allows the client to inject abstract +\newdef{constants} that are resolved +only at the end of the compilation phase into the instruction stream. +These constants can be used whereever an integer literal is expected. +Typical usage are stack frame offsets for spill locations which are only +known after register allocation, +and garbage collection and exception map which are resolved only +when all address calculation are performed. + +\subsubsection{The Details} +Client defined constants should satsify the following signature: +\begin{SML} +signature \mlrischref{instructions/constant.sig}{CONSTANT} = sig + type const + + val toString : const -> string + val valueOf : const -> int + val hash : const -> word + val == : const * const -> bool +end +\end{SML} + +The methods are: +\begin{methods} + toString & a pretty printing function \\ + valueOf & returns the value of the constant \\ + hash & returns the hash value of the constant \\ + == & compare two constants for identity \\ +\end{methods} + +The method \sml{toString} should be implemented in all cases. +The method \sml{valueOf} is necessary only if machine code generation +is used. The last two methods, \sml{hash} and \sml{==} are necessary +only if SSA optimizations are used. diff --git a/MLRISC/Doc/latex/contributions.tex b/MLRISC/Doc/latex/contributions.tex new file mode 100644 index 0000000..bc5e56f --- /dev/null +++ b/MLRISC/Doc/latex/contributions.tex @@ -0,0 +1,60 @@ +\section{Contributions} + The optimizations provided by MLRISC are at a similar level to + those performed by the Impact compiler; several target back ends + exist (Dec Alpha, HPPA, Sparc, x86, and PPC); but more importantly, the + framework has been demonstrated in \href{systems.html}{real use} + for languages with radically different execution models. These include: + + \begin{center} + \begin{tabular}{|c|c|} \hline + Compiler & Association \\ \hline + \begin{color}{#005500}SML/NJ\end{color} & Bell Labs and Princeton\\\hline + \begin{color}{#005500}TIL\end{color} & CMU \\ \hline + \begin{color}{#005500}Tiger\end{color} & Princeton \\ \hline + \begin{color}{#005500}C--\end{color} & OGI \\ \hline + \begin{color}{#005500}SML/Regions\end{color} & DIKU \\ \hline + \begin{color}{#005500}Moby\end{color} & Bell Labs \\ \hline + \end{tabular} + \end{center} + + The strength of MLRISC lies in the ability to easily create high + quality code generator for each of these systems. For example: + + \begin{description} + \item[Tiger:] Has an execution + model very similar to C with stack allocated activation frames, + and also maintains static and dynamic chains to support lexical + scoping. + + \item[TIL:] Is similar to C in its + use of activation frames, however it uses a + \emph{typed intermediate language} that + supports \emph{almost tag-free} + garbage collection. This has severe implications on the + interaction of spilling and garbage collection. The set of live + variables and their locations, be it registers or frame slots, + is recorded in a trace table for a specific program point. When + spilling occurs, it is necessary to adjust some of these trace + tables to reflect the new locations of live variables. + + \item[SML/NJ:] Has no runtime + stack, but stores all execution context in a garbage collected + heap. This arrangement imposes special requirements for spilling + registers. SML/NJ also does \emph{dynamic linking} --- that is + to say, no use is made of a conventional linker, but machine + code is generated directly and linked into the interactive + environment, dynamically. + + \item[C--:] Is a C-like portable assembly + language used as an intermediate language for high level typed language, + and provides direct compilation support for exceptions and + precise garbage collection. In addition, it allows + interoperability with C function calls. +\end{description} + + It is not uncommon for any of these systems to store special global + values in dedicated registers, and use their own parameter passing + and callee-save conventions. In any language that supports garbage + collection, there are also the issues of generating gc type maps, + and gc-safety in aggressive optimizations. MLRISC deals with all these + important issues by allowing customization of many aspects of the system. diff --git a/MLRISC/Doc/latex/contributors.tex b/MLRISC/Doc/latex/contributors.tex new file mode 100644 index 0000000..2c1da87 --- /dev/null +++ b/MLRISC/Doc/latex/contributors.tex @@ -0,0 +1,15 @@ +\section{Contributors} + \subsubsection{Past} + \begin{itemize} + \item Florent Guillame (INRIA) + \item George C. Necula (CMU) + \item Ken Cline (CMU) + \item Andrew Bernard (CMU) + \item Dino Oliva (NEC) + \end{itemize} + +\subsubsection{Present} + \begin{itemize} + \item Allen Leung (NYU) + \item Fermin Reig (University of Glasgow) + \end{itemize} diff --git a/MLRISC/Doc/latex/delayslots.tex b/MLRISC/Doc/latex/delayslots.tex new file mode 100644 index 0000000..2164534 --- /dev/null +++ b/MLRISC/Doc/latex/delayslots.tex @@ -0,0 +1,174 @@ +\section{Delay Slot Filling} +\subsection{ Overview } + + Superscalar architectures such as the Sparc, MIPS, and PA-RISC +contain delayed branch and/or load instructions. +Delay slot filling is necessary +task of the back end to keep the instruction pipelines busy. To accomodate +the intricate semantics of branch delay slot in various architectures, +MLRISC uses the following very general framework for dealing with +delayed instructions. + +\begin{description} + \item[Instruction representation] + To make it easy to deal with instruction with delay slot, MLRISC allow + the following extensions to instruction representations. + \begin{itemize} + \item Instructions with delay slot may have a + \begin{color}{#aa0000}nop\end{color} flag. When this flag is true + the delay slot is assumed to be filled with a NOP instruction. + \item Instructions with delay slots that can be nullified may have a + \begin{color}{#aa0000}nullified\end{color} flag. + When this flag is true the branch delay slot is assumed to be + nullified. + \end{itemize} + \item[Nullification semantics] + Unfortunately, nullification semantics + in architectures vary. In general, MLRISC allows the following + additional nullification characteristics to be specified. + \begin{itemize} + \item Nullification can be specified as illegal; this is needed + because some instructions can not be nullified + \item When nullification is enabled, the semantics of the delay slot + instruction may depend on the direction of the branch, and whether + a conditional test succeeds. + \item Certain class of instructions may be declared to be illegal + to fit into certain class of delay slots. + \end{itemize} +\end{description} + +For example, conditional branch instructions on the Sparc are defined +as follows: +\begin{verbatim} + Bicc of {b:branch, a:bool, label:Label.label, nop:bool} + asm: ``b\t DELAY_SLOTS_PROPERTIES + -- machine properties for delay slot filling, if a machine + architecture contains branch delay slots or load delay slots. + \item \codehref{../SSA/ssaProps.sig}{ SSA_PROPERTIES } -- + semantics properties for performing optimizations in Static Single + Assignment form. +\end{itemize} + +In general, writing a backend is tedious even with +SML's abstraction capabilities. +Furthermore, the machine description is procedural in natural +and must be checked by hand. + +\subsection{ What is in MDGen? } +The MDGen tool simplifies the process of developing a new MLRISC backend. +MDGen provides the following: +\begin{itemize} + \item A representation description language for specifying the + machine encoding of the instruction set, + using an extension of ML's algebraic datatype facility. + \item A semantics description language for specifying the abstract semantics + of the instructions. +\end{itemize} + +Both sub-languages are based on ML's syntax and semantics, so +they should be readily familiar to all MLRISC users. + +A backend developer can specify a new machine architecture using the MDGen +language, and in turn, the MDGen tool generates ML modules that are +required by the MLRISC system. + +The basic concepts of MDGen are inspired largely from +Norman Ramsey's +New Jersey Machine Code Tool Kit and +Ramsey and Davidson's + +Lambda RTL + +\subsection{A Sample Description} + +Here we present a sample MDGen description, using the Alpha as an example. +We highlight all keywords in the MDGen language +in. A typical machine description +is structured as follows: + +\begin{SML} +architecture Alpha = + struct + + name "Alpha" + + superscalar + + little endian + + lowercase assembly + + \href{#cells}{Storage cells and locations} + \href{#encoding}{Instruction encoding formats specification} + \href{#instruction}{Instruction definition} +end +\end{SML} + +Here, we declare that the Alpha is a superscalar machine using +little endian encoding. Furthermore, assembly output should be displayed +in lowercase-- this is for personal esthetic reasons only; most assemblers +are case insensitive. + + + +\subsubsection{ Specifying Storage Cells and Locations } + +A cell is an abstract resource location +for holding data values. On typical machines, the types of +cells include general purpose registers, floating point registers, +and condition code registers. + +The \sml{storage} declaration defines different +cellkinds. MLRISC requires the +cellkinds \sml{GP}, \sml{FP}, \sml{CC} to be defined. +These are the cellkinds for general purpose registers, floating point +registers and condition code registers. + +In the following sequence of declarations, a few things are defined: +\begin{itemize} + \item The cellkinds \sml{GP, FP, CC} are defined. + Furthermore, the cellkinds \sml{MEM, CTRL}, which stand + for memory and control (dependence), are also implicitly defined. + \item The \sml{assembly as} clauses specify how a specific cell type is + to be displayed. Here, we specify that register 30, the + stack pointer, should be displayed specially as \sml{$sp}. + \item The \sml{in cellset} clause, when attached, tells MDGen that + the associated cellkind should be part of the + \href{cellset.html}{ cellset }. The clause \sml{in cellset GP} + tells MDGen that the a cell of type \sml{CC} should be treated + the same as a \sml{GP} + \item The \sml{locations} declarations define a few abbreviations: + \sml{stackptrR} is the stack pointer, \sml{asmTmpR} is + the assembly temporary, \sml{fasmTmp} is the floating point + assembly temporary etc. +\end{itemize} + + +\begin{SML} + storage + GP = 32 cells of 64 bits in cellset called "register" + assembly as (fn 30 => "$sp" + | r => "$"^Int.toString r) + | FP = 32 cells of 64 bits in cellset called "floating point register" + assembly as (fn f => "f"^Int.toString f) + | CC = cells of 64 bits in cellset GP called "condition code register" + assembly as "cc" + locations + stackptrR = $GP[30] + and asmTmpR = $GP[28] + and fasmTmp = $FP[30] + and GPReg r = $GP[r] + and FPReg f = $GP[f] +\end{SML} + +

    + Specifying the Representation of Instructions

    +\begin{SML} + structure Instruction = + struct + datatype ea = + Direct of $GP + | FDirect of $FP + | Displace of {base: $GP, disp:int} + + datatype operand = + REGop of $GP ``'' (GP) + | IMMop of int ``'' + | HILABop of LabelExp.labexp ``hi()'' + | LOLABop of LabelExp.labexp ``lo()'' + | LABop of LabelExp.labexp ``'' + | CONSTop of Constant.const ``'' + + (* + * When I say ! after the datatype name XXX, it means generate a + * function emit_XXX that converts the constructors into the corresponding + * assembly text. By default, it uses the same name as the constructor, + * but may be modified by the lowercase/uppercase assembly directive. + * + *) + datatype branch! = + BR 0x30 + | BSR 0x34 + | BLBC 0x3 + | BEQ 0x39 | BLT 0x3a | BLE 0x3b + | BLBS 0x3c | BNE 0x3d | BGE 0x3e + | BGT 0x3f + + datatype fbranch! = + FBEQ 0x31 | FBLT 0x32 + | FBLE 0x33 | FBNE 0x35 + | FBGE 0x36 | FBGT 0x37 + + datatype load! = LDL 0x28 | LDL_L 0x2A | LDQ 0x29 | LDQ_L 0x2B | LDQ_U 0x0B + datatype store! = STL 0x2C | STQ 0x2D | STQ_U 0x0F + datatype fload[0x20..0x23]! = LDF | LDG | LDS | LDT + datatype fstore[0x24..0x27]! = STF | STG | STS | STT + + (* non-trapping opcodes *) + datatype operate! = (* table C-5 *) + ADDL (0wx10,0wx00) | ADDQ (0wx10,0wx20) + | CMPBGE(0wx10,0wx0f) | CMPEQ (0wx10,0wx2d) + | CMPLE (0wx10,0wx6d) | CMPLT (0wx10,0wx4d) | CMPULE (0wx10,0wx3d) + | CMPULT(0wx10,0wx1d) | SUBL (0wx10,0wx09) + | SUBQ (0wx10,0wx29) + | S4ADDL(0wx10,0wx02) | S4ADDQ (0wx10,0wx22) | S4SUBL (0wx10,0wx0b) + | S4SUBQ(0wx10,0wx2b) | S8ADDL (0wx10,0wx12) | S8ADDQ (0wx10,0wx32) + | S8SUBL(0wx10,0wx1b) | S8SUBQ (0wx10,0wx3b) + + | AND (0wx11,0wx00) | BIC (0wx11,0wx08) | BIS (0wx11,0wx20) + | CMOVEQ(0wx11,0wx24) | CMOVLBC(0wx11,0wx16) | CMOVLBS(0wx11,0wx14) + | CMOVGE(0wx11,0wx46) | CMOVGT (0wx11,0wx66) | CMOVLE (0wx11,0wx64) + | CMOVLT(0wx11,0wx44) | CMOVNE (0wx11,0wx26) | EQV (0wx11,0wx48) + | ORNOT (0wx11,0wx28) | XOR (0wx11,0wx40) + + | EXTBL (0wx12,0wx06) | EXTLH (0wx12,0wx6a) | EXTLL(0wx12,0wx26) + | EXTQH (0wx12,0wx7a) | EXTQL (0wx12,0wx36) | EXTWH(0wx12,0wx5a) + | EXTWL (0wx12,0wx16) | INSBL (0wx12,0wx0b) | INSLH(0wx12,0wx67) + | INSLL (0wx12,0wx2b) | INSQH (0wx12,0wx77) | INSQL(0wx12,0wx3b) + | INSWH (0wx12,0wx57) | INSWL (0wx12,0wx1b) | MSKBL(0wx12,0wx02) + | MSKLH (0wx12,0wx62) | MSKLL (0wx12,0wx22) | MSKQH(0wx12,0wx72) + | MSKQL (0wx12,0wx32) | MSKWH (0wx12,0wx52) | MSKWL(0wx12,0wx12) + | SLL (0wx12,0wx39) | SRA (0wx12,0wx3c) | SRL (0wx12,0wx34) + | ZAP (0wx12,0wx30) | ZAPNOT (0wx12,0wx31) + | MULL (0wx13,0wx00) | MULQ (0wx13,0wx20) + | UMULH (0wx13,0wx30) + | SGNXL "addl" (0wx10,0wx00) (* same as ADDL *) + + (* conditional moves *) + + datatype pseudo_op! = DIVL | DIVLU + + datatype operateV! = (* table C-5 opc/func *) + ADDLV (0wx10,0wx40) | ADDQV (0wx10,0wx60) + | SUBLV (0wx10,0wx49) | SUBQV (0wx10,0wx69) + | MULLV (0wx13,0wx00) | MULQV (0wx13,0wx60) + + datatype foperate! = (* table C-6 *) + CPYS (0wx17,0wx20) | CPYSE (0wx17,0wx022) | CPYSN (0wx17,0wx021) + | CVTLQ (0wx17,0wx010) | CVTQL (0wx17,0wx030) | CVTQLSV (0wx17,0wx530) + | CVTQLV (0wx17,0wx130) + | FCMOVEQ (0wx17,0wx02a) | FCMOVEGE (0wx17,0wx02d) | FCMOVEGT (0wx17,0wx02f) + | FCMOVLE (0wx17,0wx02e) | FCMOVELT (0wx17,0wx02c) | FCMOVENE (0wx17,0wx02b) + | MF_FPCR (0wx17,0wx025) | MT_FPCR (0wx17,0wx024) + + (* table C-7 *) + | CMPTEQ (0wx16,0wx0a5) | CMPTLT (0wx16,0wx0a6) | CMPTLE (0wx16,0wx0a7) + | CMPTUN (0wx16,0wx0a4) + + datatype foperateV! = + ADDSSUD 0wx5c0 + | ADDTSUD 0wx5e0 + | CVTQSC 0wx3c + | CVTQTC 0wx3e + | CVTTSC 0wx2c + | CVTTQC 0wx2f + | DIVSSUD 0wx5ec + | DIVTSUD 0wx5c3 + | MULSSUD 0wx5c2 + | MULTSUD 0wx5e2 + | SUBSSUD 0wx5c1 + | SUBTSUD 0wx5e1 + + datatype osf_user_palcode! = + BPT 0x80 | BUGCHK 0x81 | CALLSYS 0x83 + | GENTRAP 0xaa | IMB 0x86 | RDUNIQUE 0x9e | WRUNIQUE 0x9f + + end (* Instruction *) +\end{SML} + +

    + Specifying the Instruction Encoding Formats

    + + The Alpha has very simple instruction encoding formats. + + +\begin{SML} + instruction formats 32 bits + Memory{opc:6, ra:5, rb:GP 5, disp: signed 16} (* p3-9 *) + (* derived from Memory *) + | LoadStore{opc,ra,rb,disp} = + let val disp = + case disp of + I.REGop rb => emit_GP rb + | I.IMMop i => itow i + | I.HILABop le => itow(LabelExp.valueOf le) + | I.LOLABop le => itow(LabelExp.valueOf le) + | I.LABop le => itow(LabelExp.valueOf le) + | I.CONSTop c => itow(Constant.valueOf c) + in Memory{opc,ra,rb,disp} + end + | ILoadStore{opc,r:GP,b,d} = LoadStore{opc,ra=r,rb=b,disp=d} + | FLoadStore{opc,r:FP,b,d} = LoadStore{opc,ra=r,rb=b,disp=d} + + | Jump{opc:6,ra:GP 5,rb:GP 5,h:2,disp:int signed 14} (* table C-3 *) + | Memory_fun{opc:6, ra:GP 5, rb:GP 5, func:16} (* p3-9 *) + | Branch{opc:branch 6, ra:GP 5, disp:signed 21} (* p3-10 *) + | Fbranch{opc:fbranch 6, ra:FP 5, disp:signed 21} (* p3-10 *) + (* p3-11 *) + | Operate0{opc:6,ra:GP 5,rb:GP 5,sbz:13..15,_:1=0,func:5..11,rc:GP 5} + (* p3-11 *) + | Operate1{opc:6,ra:GP 5,lit:signed 13..20,_:1=1,func:5..11,rc:GP 5} + | Operate{opc,ra,rb,func,rc} = + (case rb of + I.REGop rb => Operate0{opc,ra,rb,func,rc,sbz=0w0} + | I.IMMop i => Operate1{opc,ra,lit=itow i,func,rc} + | I.HILABop le => Operate1{opc,ra,lit=itow(LabelExp.valueOf le),func,rc} + | I.LOLABop le => Operate1{opc,ra,lit=itow(LabelExp.valueOf le),func,rc} + | I.LABop le => Operate1{opc,ra,lit=itow(LabelExp.valueOf le),func,rc} + | I.CONSTop c => Operate1{opc,ra,lit=itow(Constant.valueOf c),func,rc} + ) + | Foperate{opc:6,fa:FP 5,fb:FP 5,func:5..15,fc:FP 5} + | Pal{opc:6=0,func:26} +\end{SML} + + +\subsubsection{ Specifying the instruction set } + +\begin{SML} + structure MC = + struct + (* compute displacement address *) + fun disp lab = itow(Label.addrOf lab - !loc - 4) ~>> 0w2 + end + + (* + * The main instruction set definition consists of the following: + * 1) constructor-like declaration defines the view of the instruction, + * 2) assembly directive in funny quotes `` '', + * 3) machine encoding expression, + * 4) semantics expression in [[ ]], + * 5) delay slot directives etc (not necessary in this architecture!) + *) + instruction + DEFFREG of $FP (* define a floating point register *) + ``deffreg '' + (* Pseudo instruction for the register allocator *) + + (* Load/Store *) + | LDA of {r: $GP, b: $GP, d:operand} (* use of REGop is illegal *) + ``lda\t, ()'' + ILoadStore{opc=0w08,r,b,d} + + | LDAH of {r: $GP, b: $GP, d:operand} (* use of REGop is illegal *) + ``ldah\t, ()'' + ILoadStore{opc=0w09,r,b,d} + + | LOAD of {ldOp:load, r: $GP, b: $GP, d:operand, mem:Region.region} + ``\t, ()'' + ILoadStore{opc=emit_load ldOp,r,b,d} + + | STORE of {stOp:store, r: $GP, b: $GP, d:operand, mem:Region.region} + ``\t, ()'' + ILoadStore{opc=emit_store stOp,r,b,d} + + | FLOAD of {ldOp:fload, r: $FP, b: $GP, d:operand, mem:Region.region} + ``\t, ()'' + FLoadStore{opc=emit_fload ldOp,r,b,d} + + | FSTORE of {stOp:fstore, r: $FP, b: $GP, d:operand, mem:Region.region} + ``\t, ()'' + FLoadStore{opc=emit_fstore stOp,r,b,d} + + (* Control Instructions *) + | JMPL of {r: $GP, b: $GP, d:int} * Label.label list + ``jmpl\t, ()'' + Jump{opc=0wx1a,h=0w0,ra=r,rb=b,disp=d} (* table C-3 *) + + | JSR of {r: $GP, b: $GP, d:int} * C.cellset * C.cellset + ``jsr\t, ()'' + Jump{opc=0wx1a,h=0w1,ra=r,rb=b,disp=d} + + | RET of {r: $GP, b: $GP, d:int} + ``ret\t, ()'' + Jump{opc=0wx1a,h=0w2,ra=r,rb=b,disp=d} + + | BRANCH of branch * $GP * Label.label + `` , + + +\subsection{ 4 Machine Descriptions } +Here are some machine descriptions in varing degree of completion. + +\begin{itemize} + \item \codehref{../sparc/sparc.mdl}{ Sparc } + \item \codehref{../hppa/hppa.mdl}{ Hppa } + \item \codehref{../alpha/alpha.mdl}{ Alpha } + \item \codehref{../ppc/ppc.mdl}{ PowerPC } + \item \codehref{../x86/x86.mdl}{ X86 } +\end{itemize} + +\subsection{ Syntax Highlighting Macros } + +\begin{itemize} + \item \href{md.vim}{ For vim 5.3 } +\end{itemize} + + + diff --git a/MLRISC/Doc/latex/mips.tex b/MLRISC/Doc/latex/mips.tex new file mode 100644 index 0000000..f738990 --- /dev/null +++ b/MLRISC/Doc/latex/mips.tex @@ -0,0 +1,3 @@ +\section{The MIPS Back End} + +No documentation yet. diff --git a/MLRISC/Doc/latex/mlrisc-arch.tex b/MLRISC/Doc/latex/mlrisc-arch.tex new file mode 100644 index 0000000..07cd496 --- /dev/null +++ b/MLRISC/Doc/latex/mlrisc-arch.tex @@ -0,0 +1,115 @@ +\section{Architecture of MLRISC} + +\subsection{Core Components} + + The core components of MLRISC allow the client to quickly construct +an backend for various architectures. These components include: +\begin{itemize} + \item The \href{mltree.html}{MLTREE} language, + which is a RTL-like intermediate language + that is used by the client + to communicate to the MLRISC system. A client is + responsible for writing the module that generates MLTREE from + the source program representation. + \item \href{instrsel.html}{Instruction selection modules}, + which generates target machine + instructions from MLTREE. + \item The \href{ra.html}{Register Allocator}, + which performs register allocation. + \item \href{asm.html}{Assemblers}, which emits assembly code. +\end{itemize} + +For systems that require direct machine code generation, the following +modules are included: +\begin{itemize} + \item \href{span-dep.html}{Span dependency resolution} + modules, which compute addresses + from symbolic addresses, + fill delay slots, and expand instructions that are + \newdef{span dependent} + \item \href{mc.html}{Machine code emitters}, + which emit executable machine code into a binary stream. +\end{itemize} + +\subsection{Optimization Modules} + +In addition, MLRISC has been enhanced to support various types of +machine level optimizations. These include: + +\begin{itemize} + \item Core optimizations, which includes + various types of control flow transformation, + and architectural specific peephole optimizations. + \item SSA based scalar optimizations + \item ILP optimizations for superscalars + \item ILP optimizations for VLIW/EPIC architectures + \item GC safety analysis +\end{itemize} + +\subsection{Basic Concepts} + + Basic concepts in MLRISC are: +\begin{itemize} + \item \href{instructions.html}{Instructions} -- + the instruction set of the target architecture. + \item \href{cells.html}{Cells} -- which describes registers, +memory and other mutable resources in the machine. + \item \href{regions.html}{Regions} -- a client defined + abstract type used to represent aliasing information available from +the front-end. + \item \href{constants.html}{Constants} -- a client defined + place holder used to represent constants whose values are unknown + in the front-end. + \item \href{pseudo-ops.html}{Pseudo Ops} -- a client defined + + \item \href{annotations.html}{Annotations} -- this is + a generic mechinism for propagating information in the MLRISC sstem. + The client may attach arbitrary annotation of various granularity + to MLRISC's program representation, + which can then be propagated to later phases. + These can be information related to profiling frequency, dependence, + comments, and/or types. + The same mechanism is also used to propagate + analysis information one optimization phase to + another. + \item \href{streams.html}{Instruction Streams} -- an abstraction + for describing a stream of instructions. Instruction streams are + used to connect modules such as instruction selection, assembler, + machine code emitter, and + control flow graph builder. + \item \href{regmap.html}{Regmap} -- a mapping between registers + names. MLRISC register allocators represent the result of register + allocation as a regmap. + \item \href{labels.html}{Labels} -- a type representing +symbolic labels. + \item \href{labelexp.html}{Label Expressions} -- a type representing + constant expressions + involving symbolic labels. +\end{itemize} + +\subsection{How Things Are Fit Together} + + MLRISC uses two different program representations, clusters and MLRISC IR. +\begin{itemize} + \item \href{cluster.html}{Cluster} is light-weight representation +that is used when only the most basic optimizations are required. + \item \href{mlrisc-ir.html}{MLRISC IR} is more heavy-weight + representation that is built from the + \href{graphs.html}{MLRISC graph library} and the + \href{compiler-graphs.html}{MLRISC compiler graph library}. + MLRISC IR allows more complex transformations and analysis of the + program graph. +\end{itemize} +Conversion modules between the two representations are provided. + +In general MLRISC optimization phases are transformations applied on one +of these representations. Optimizations may be chained together to form +a compiler backend. For example, a minimal backend consists of +\begin{itemize} + \item the instruction selection module, which translates +\href{mltree.html}{MLTree} into target instructions, + \item the flowgraph builder, which conversts a stream of target instructions + into a cluster, + \item the register allocator, which performs register allocation, and + \item the assembly code emitter, which generates assembly output +\end{itemize} diff --git a/MLRISC/Doc/latex/mlrisc-compiler.tex b/MLRISC/Doc/latex/mlrisc-compiler.tex new file mode 100644 index 0000000..8991a62 --- /dev/null +++ b/MLRISC/Doc/latex/mlrisc-compiler.tex @@ -0,0 +1,54 @@ +\section{MLRISC Based Compiler} + A traditional compiler will typically consist of a + \begin{color}{#dd0000}lex/yacc\end{color} based front end, an + \begin{color}{#DD0000}optimization\end{color} + phase that is repeatedly invoked + over some intermediate representation, and finally a + \begin{color}{#DD0000}back end\end{color} + code generation phase. The intermediate + representation is usually at a level of detail appropriate to the + optimization being performed, and may be far removed from the + native instructions of the target architecture. The back end + proceeds by translating the intermediate representation into + instructions and registers for an abstract machine that is much + closer to the target architecture. Retargetting is then achieved + by mapping the registers and instructions of the abstract machine + to registers and instructions of the target architecture. + + \br{clear=left} + + \image{MLRISC based compiler}{pictures/png/compiler-2.png}{align=right} + + An MLRISC based compiler, on the other hand, translates the + intermediate representation into MLRISC instructions and it is the + MLRISC instructions that get mapped onto instructions of the target + architecture. Another possibility is to translate the front end + abstract machine instructions instead of the intermediate + representation. Once MLRISC instructions have been generated, + nearly all aspects of high quality code generation come for free. A + long story would be cut short if MLRISC were just another abstract + machine. + + \begin{color}{#580000} The key idea behind MLRISC is that there is no + single MLRISC instruction set or intermediate program + representation, \end{color} but the MLRISC intermediate representation + is specialized to the needs of the front end source language being + compiled. The specialization does not stop there, but the: + \begin{itemize} + \item \begin{color}{#005500}target instruction set\end{color}, + \item \begin{color}{#005500}flowgraph\end{color}, and + \item \begin{color}{#005500}entire optimization suite\end{color} + \end{itemize} + + are specialized to the needs of the front end. The ability to + consistently specialize each of these to create a back end for a + specific language, summarizes the characteristics of MLRISC that + distinguishes it from other retargetable backends. + + \begin{color}{#580000} It is important to emphasize that little + optimizations performed on the MLRISC intermediate + representation. \end{color} Most optimizations are done on a flowgraph of + target machine instructions, to enable optimizations that take advantage + of the characteristics of each architectural. + The MLRISC intermediate representation is just used as a stepping + stone to get to the flowgraph. diff --git a/MLRISC/Doc/latex/mlrisc-gen.tex b/MLRISC/Doc/latex/mlrisc-gen.tex new file mode 100644 index 0000000..946aa52 --- /dev/null +++ b/MLRISC/Doc/latex/mlrisc-gen.tex @@ -0,0 +1,60 @@ +\section{MLRisc Generation} + Every compiler will eventually compile down to an abstract machine + that it believes will execute source programs efficiently. The + abstract machine will typically consists of abstract machine + registers and instructions, one or more stacks, and parameter + passing conventions. The hope is that all this will map down + efficiently onto the target machine. Indeed, the abstract machine + should be reasonably close to architectures that are envisioned as + possible targets. Several step need to be followed in the generation + of MLRisc. + + \begin{enumerate} + \item The first step in generating target machine code is to define + the MLRisc intermediate representation after it has been + appropriately specialized. The interfaces that describe the + dimensions of specialization are quite simple. Depending on the + compiler, these may be target dependent; for example, in the SML/NJ + compiler, the encoding of registers used to indicate the roots of + garbage collection depend on how the runtime system decodes the + information. + + \item The only real connection between the MLRisc intermediate + representation and the target machine is that the first + $0..K-1$ MLRisc registers map onto the first $K$ + physical registers on the target machine. Thus some mapping of + dedicated abstract machine registers to physical target registers is + required. It is not always necessary to map abstract machine + registers to physical machine registers. For example, on + architectures like the x86 with few registers, some abstract machine + registers may be mapped to fixed memory locations. Thus an abstract + machine register like the \sml{maskReg} may have something like: +\begin{SML} + LOAD(32, LABEL maskRegLab) +\end{SML} +spliced instead. + + \item The unit of compilation is called a + \href{cluster.html}{cluster} which + is the smallest unit for inter-procedural optimizations. A cluster + will typically consist of several entry points that may call each + other, as well as call local functions in the module. For maximum + flexibility, the parameter passing convention for local functions + should be specialized by the \href{mlrisc-ra.html}{register allocator}. + + Once the MLRisc trees for a cluster have been built, they must + be converted into target assembly or machine code. This is done by + building up a function (\newdef{codegen}) that + glues together optimizations modules that have been specialized. For + example, the target instruction set must be specialized to hold the + MLRisc constants; the flowgraph must be specialized to carry these + instructions as well as the MLRisc pseudo-ops; the optimization + modules must know about several front end constraints such as how to + spill registers. + \end{enumerate} + + If the module that translates the abstract machine instructions + into MLRisc instructions has been appropriately parameterized, then + it can be reused for multiple target architectures. For high level + languages it is better to generate MLRisc instructions from the high + level intermediate form used by the front end of the compiler. diff --git a/MLRISC/Doc/latex/mlrisc-graphics.tex b/MLRISC/Doc/latex/mlrisc-graphics.tex new file mode 100644 index 0000000..2bbae09 --- /dev/null +++ b/MLRISC/Doc/latex/mlrisc-graphics.tex @@ -0,0 +1,31 @@ +\section{Graphical Interface} + All the major data structures and intermediate program states can be + viewed graphically using + \externhref{http://www.Informatik.Uni-Bremen.DE/~davinci/}{\begin{color}{red}daVinci\end{color}} and + \externhref{http://www.cs.uni-sb.de/RW/users/sander/html/gsvcg1.html}{\begin{color}{red}vcg\end{color}} + The following screen dumps are intended to represent the range of + possibilities. Graphical tools like these are an indispensible + debugging aid. Each of the dumps below were taken when generating + code for the \begin{color}{red}mandelbrot\end{color} on the HPPA + architecture. It will be necessary to make netscape fill the size of + the screen to view these easily. Even though some of these graphs + look quite complex, daVinci has several \emph{navigational} modes + that allow walking to successors, or predecessors, or navigating + through a scaled down map of the graph. The navigational view is +shown as another window, and the view into the graph that is being +displayed is usually outlined in \begin{color}{blue}blue\end{color}. + + \begin{description} + \item[\href{graphics/mandelbrot-opt.gif}{Control Flowgraph after Optimization:}] Each basic block is shown with its dynamic profile and + code before and after a specific optimization. This view + saves having to pour through pages of assembly code listings -- + a tedious and frustrating activity. + \item[\href{graphics/mandelbrot-ssa.gif}{SSA form:}] + The generated flow graph is converted to SSA form which +makes many code improvement optimizations easy and efficient. + \item[\href{graphics/mandelbrot-ddg.gif}{Data Dependency Graph}] + A graphical view of the data dependency graph and the various +kinds of dependencies decorating the edges, provides a useful clue to +why instructions got rearranged the way they did. The navigational +view helps to control the complexity in the display. + \end{description} diff --git a/MLRISC/Doc/latex/mlrisc-ir-rep.tex b/MLRISC/Doc/latex/mlrisc-ir-rep.tex new file mode 100644 index 0000000..56c23f3 --- /dev/null +++ b/MLRISC/Doc/latex/mlrisc-ir-rep.tex @@ -0,0 +1,158 @@ +\section{MLRISC Intermediate Representation} + The MLRISC intermediate language is called + \newdef{MLTREE} At the lowest level, the core of MLTREE is a + \italics{Register Transfer Language (RTL)} + but represented in tree form. The tree + form makes it convenient to use tree pattern matching tools like + BURG (where appropriate) to do target instruction selection. Thus a + tree such as: + + \begin{SML} + MV(32, t, + ADDT(32, MULT(32, REG(32, b), REG(32, b)), + MULT(32, MULT(REG(32, a), LI(4)), REG(32, c)))) + \end{SML} + + computes \sml{t := b*b + 4*a*c} to 32-bit precision. + The nodes \sml{ADDT} and + \sml{MULT} are the trapping form of addition and multiplication, + and \sml{LI} is used for integer constants. An infinite number + of registers are assumed by the model, however depending on the + target machine the first \sml{0..K} registers map onto the first + \sml{K} registers on the target machine. Everything else is + assumed to be a pseudo-register. The \sml{REG} node is used to + indicate a general purpose register. + + + The core MLTREE language makes no assumptions about instructions or + calling convections of the target architecture. Trees can be + created and combined in almost any form, with certain meaningless + trees such as \sml{LOAD(32, FLOAD(64, LI 0))} being forbidden by the + MLTREE type structure. + + Such pure trees are nice but inadequate in real compilers. One + needs to be able to propagate front end specific information, such + as frame sizes and frame offsets where the actual values are only + available after register allocation and spilling. One could add + support for frames in MLRISC, however this becomes a slippery slope + because some compilers (e.g. SML/NJ) do not have a conventional + notion of frames --- indeed there is no runtime stack in the + execution of SML/NJ. A frame organization for one person may not + meet the needs for another, and so on. In MLRISC, the special + requirements of different compilers is communicated into the MLTREE + language, and subsequently into the optimizations phases, by + specializing the MLTREE data structure with client specific + information. There are currently \emph{five} dimensions over + which one could specialize the MLTREE language. + + \begin{description} + \item[Constants] Constants are an + abstraction for integer literals whose value is known after + certain phases of code generation. Frame sizes and offsets are an + example. + \image{MLRISC intermediate representation}{pictures/png/mlrisc-ir.png}{align=right} + \item[Regions] While the data + dependencies between arithmetic operations is implicit in the + instruction, the data dependencies between memory operations is + not. Regions are an abstract view of memory that make this + dependence explicit and is specially useful for instruction + reordering. + + \item[Pseudo-ops] Pseudo-ops are + intended to correspond to pseudo-op directives provided by native + assemblers to lay out data, jump tables, and perform alignment. + + \item[Annotations] + \href{annotations.html}{Annotations} are used + for injecting semantics and other program information from the front-end + into the backend. For example, a probability annotation can be + attached to a branch instruction. Similarly, line number annotations + can be attached to basic blocks to aid debugging. + In many language implementations function local variables are + spilled to activation frames on the stack. Spill slots contribute + to the size of a function's frame. When an instruction produces a + spill, we may need to update the frame associated to that + instruction (increase the size of its spilling area). The frame + for the current function can be injected in an annotation, which + can be later examined by the spill callback during register allocation. + + Annotations are + implemented as an universal type and can be arbitrarily extended. + Individual annotations can be associated + with compiler objects of varying granularity, + from compilation units, to regions, basic blocks, flow edges, + and down to the instructions. + + + \item[User Defined Extensions] + In the most extreme case, the basic constructors defined in the MLTREE + language may be inadequate for the task at hand. + MLTREE allows the client to arbitrarily extend + the set of statements and expressions to more closely match the + source language and the target architecture(s). + + For example, when using MLRISC for the backend of a DSP compiler + it may be useful to extend the set of MLRISC operators to include + fix point and saturated arithmetic. + Similarly, when developing a language for loop parallelization, it may + be useful to extend the MLTREE language with higher-level loop + constructs. + \end{description} + +\subsection{Examples} + + In the SML/NJ compiler, an encoding of a list of registers + is passed to the garbage collector as the roots of live + variables. This encoding cannot be computed until register + allocation has been performed, therefore the integer literal + encoding is represented as an abstract + \href{constants.html}{constant}. + + Again, in the SML/NJ compiler, most stores are for initializing + records in the allocation space, therefore representing every slot in + the allocation space as a unique region allows one to commute + most store instructions. Similarly, most loads are from + \emph{immutable} records, and a simple analysis marks these are + being accesses to \emph{read-only} memory. Read-only memory is + characterized as having multiple \emph{uses} but no + \emph{definitions}. + + In the TIL compiler, a \emph{trace table} is generated for + every call site that records the set of live variables, their + location (register or stack offset), and the type associated with + the variable. This table is integrated into the program using the + abstract pseudo-op mechanism. An interesting aspect of these tables + is that they may need adjustment based on the results of register + spilling. + + The more convention use of the psuedo-op abstraction is to + propagate function prologue and epilogue information. + + The constants abstraction are created by a tree node called + \sml{CONST}. In the SML/NJ compiler, the tree that communicates + garbage collection information looks like: + +\begin{verbatim} + MV(32, maskReg, CONST{r110,r200,r300,r400 ...}) +\end{verbatim} + + where \sml{maskReg} is a dedicated register. On the DEC Alpha, + this would get translated to: + +\begin{verbatim} + LDA maskReg, {encode(r110,r200,r300,r400, ...)} +\end{verbatim} + + which indicates that the alpha instruction set (and optimization + suite) know about these types of values. Further, after + register allocation, the \sml{LDA} instruction may not be + sufficient as the encoding may result in a value that is too large + as an operand to \sml{LDA}. Two instructions may ultimately be + required to load the encoding into the \sml{maskReg} + register. This expansion is done during + \href{span-dep.html}{span-dependency resolution}. + + All these examples are intended to indicate that one + intermediate representation and optimization suite does not fit + all, but that the intermediate representation and optimization + suite needs to be specialized to the needs of the client. diff --git a/MLRISC/Doc/latex/mlrisc-ir.tex b/MLRISC/Doc/latex/mlrisc-ir.tex new file mode 100644 index 0000000..2af2ef9 --- /dev/null +++ b/MLRISC/Doc/latex/mlrisc-ir.tex @@ -0,0 +1,609 @@ +\section{The MLRISC IR} +\subsection{Introduction} + +In this section we will describe the MLRISC intermediate representation. + +\subsubsection{Control Flow Graph} +The control flow graph is the main view of the IR. +A control flow graph satisfies the following signature: +\begin{SML} + signature \mlrischref{IR/mlrisc-cfg.sig}{CONTROL_FLOW_GRAPH} = sig + structure I : INSTRUCTIONS + structure P : PSEUDO_OPS + structure C : CELLS + structure W : FIXED_POINT + sharing I.C = C + + \italics{definitions} + end +\end{SML} + +The following structures nested within a CFG: +\begin{itemize} + \item \sml{I : INSTRUCTIONS} is the instruction structure. + \item \sml{P : PSEUDO_OPS} is the structure with the definition +of pseudo ops. + \item \sml{C : CELLS} is the cells structure describing the +register conventions of the architecture. + \item \sml{W : FIXED_POINT} is a structure that contains +a fixed point type used in execution frequency annotations. +\end{itemize} + +The type \sml{weight} below is used in execution frequency annotations: +\begin{SML} + type weight = W.fixed_point +\end{SML} + +There are a few different kinds of basic blocks, described +by the type \sml{block_kind} below: +\begin{SML} + datatype block_kind = + START + | STOP + | FUNCTION_ENTRY + | NORMAL + | HYPERBLOCK +\end{SML} + +A basic block is defined as the datatype \sml{block}, defined below: +\begin{SML} + and data = LABEL of Label.label + | PSEUDO of P.pseudo_op + + and block = + BLOCK of + \{ id : int, + kind : block_kind, + name : B.name, + freq : weight ref, + data : data list ref, + labels : Label.label list ref, + insns : I.instruction list ref, + annotations : Annotations.annotations ref + \} +\end{SML} + +Edges in a CFG are annotated with the type \sml{edge_info}, +defined below: +\begin{SML} + and edge_kind = ENTRY + | EXIT + | JUMP + | FALLSTHRU + | BRANCH of bool + | SWITCH of int + | SIDEEXIT of int + + and edge_info = + EDGE of \{ k : edge_kind, + w : weight ref, + a : Annotations.annotations ref + \} +\end{SML} + +Type \sml{cfg} below defines a control flow graph: +\begin{SML} + type edge = edge_info edge + type node = block node + + datatype info = + INFO of \{ regmap : C.regmap, + annotations : Annotations.annotations ref, + firstBlock : int ref, + reorder : bool ref + \} + type cfg = (block,edge_info,info) graph +\end{SML} + +\subsubsection{Low-level Interface} + The following subsection describes the low-level interface to a CFG. +These functions should be used with care since they do not +always maintain high-level structural invariants imposed on +the representation. In general, higher level interfaces exist +so knowledge of this interface is usually not necessary for customizing +MLRISC. + + Various kinds of annotations on basic blocks are defined below: +\begin{SML} + exception LIVEOUT of C.cellset + exception CHANGED of unit -> unit + exception CHANGEDONCE of unit -> unit +\end{SML} +The annotation \sml{LIVEOUT} is used record live-out information +on an escaping block. +The annotations \sml{CHANGED} and \sml{CHANGEDONCE} are used +internally for maintaining views on a CFG. These should not be used +directly. + + The following are low-level functions for building new basic blocks. +The functions \sml{new}\emph{XXX} build empty basic blocks of a specific +type. The function \sml{defineLabel} returns a label to a basic block; +and if one does not exist then a new label will be generated automatically. +The functions \sml{emit} and \sml{show_block} are low-level +routines for displaying a basic block. +\begin{SML} + val newBlock : int * B.name -> block + val newStart : int -> block + val newStop : int -> block + val newFunctionEntry : int -> block + val copyBlock : int * block -> block + val defineLabel : block -> Label.label + val emit : C.regmap -> block -> unit + val show_block : C.regmap -> block -> string +\end{SML} + + Methods for building a CFG are listed as follows: +\begin{SML} + val cfg : info -> cfg + val new : C.regmap -> cfg + val subgraph : cfg -> cfg + val init : cfg -> unit + val changed : cfg -> unit + val removeEdge : cfg -> edge -> unit +\end{SML} + Again, these methods should be used only with care. + + The following functions allow the user to extract low-level information +from a flowgraph. Function \sml{regmap} returns the current register map. +Function \sml{regmap} returns a function that lookups the current register +map. Function \sml{liveOut} returns liveOut information from a block; +it returns the empty cellset if the block is not an escaping block. +Function \sml{fallsThruFrom} takes a node id $v$ and locates the +block $u$ (if any) that flows into $v$ without going through a branch +instruction. Similarly, the function \sml{fallsThruTo} takes +a node id $u$ and locates the block (if any) that $u$ flows into +with going through a branch instruction. If $u$ falls through to +$v$ in any feasible code layout $u$ must preceed $v$. +\begin{SML} + val regmap : cfg -> C.regmap + val reglookup : cfg -> C.register -> C.register + val liveOut : block -> C.cellset + val fallsThruFrom : cfg * node_id -> node_id option + val fallsThruTo : cfg * node_id -> node_id option +\end{SML} + + To support graph viewing of a CFG, the following low-level +primitives are provided: +\begin{SML} + val viewStyle : cfg -> (block,edge_info,info) GraphLayout.style + val viewLayout : cfg -> GraphLayout.layout + val headerText : block -> string + val footerText : block -> string + val subgraphLayout : { cfg : cfg, subgraph : cfg } -> GraphLayout.layout +\end{SML} + + Finally, a miscellany function for control dependence graph building. +\begin{SML} + val cdgEdge : edge_info -> bool +\end{SML} + +\subsubsection{IR} +The MLRISC intermediate representation is a composite +view of various compiler data structures, including the control +flow graph, (post-)dominator trees, control dependence graph, and +loop nesting tree. Basic compiler optimizations in MLRISC +operate on this data structure; advance optimizations +operate on more complex representations which use this +representation as the base layer. +\begin{wrapfigure}{r}{4.5in} + \begin{Boxit} +% \psfig{figure=../pictures/eps/mlrisc-IR.eps,width=4.5in} + \includegraphics[width=4.5in]{../pictures/pdf/mlrisc-IR} + \end{Boxit} + \caption{The MLRISC IR} +\end{wrapfigure} + +This IR provides a few additional functionalities: +\begin{itemize} + \item Edge frequencies -- execution frequencies +are maintained on all control flow edges. + \item Extensible annotations -- semantics information can be + represented as annotations on the graph. + \item Multiple facets -- + Facets are high-level views that automatically keep themselves +up-to-date. Computed facets are cached and out-of-date facets +are recomputed by demand. +The IR defines a mechanism to attach multiple facets to the IR. +\end{itemize} + +The signature of the IR is listed below +\begin{SML} + signature \mlrischref{IR/mlrisc-ir.sig}{MLRISC_IR} = sig + structure I : INSTRUCTIONS + structure CFG : CONTROL_FLOW_GRAPH + structure Dom : DOMINATOR_TREE + structure CDG : CONTROL_DEPENDENCE_GRAPH + structure Loop : LOOP_STRUCTURE + structure Util : CFG_UTIL + sharing Util.CFG = CFG + sharing CFG.I = I + sharing Loop.Dom = CDG.Dom = Dom + + type cfg = CFG.cfg + type IR = CFG.cfg + type dom = (CFG.block,CFG.edge_info,CFG.info) Dom.dominator_tree + type pdom = (CFG.block,CFG.edge_info,CFG.info) Dom.postdominator_tree + type cdg = (CFG.block,CFG.edge_info,CFG.info) CDG.cdg + type loop = (CFG.block,CFG.edge_info,CFG.info) Loop.loop_structure + + val dom : IR -> dom + val pdom : IR -> pdom + val cdg : IR -> cdg + val loop : IR -> loop + + val changed : IR -> unit + val memo : (IR -> 'facet) -> IR -> 'facet + val addLayout : string -> (IR -> GraphLayout.layout) -> unit + val view : string -> IR -> unit + val views : string list -> IR -> unit + val viewSubgraph : IR -> cfg -> unit + end +\end{SML} + +The following facets are predefined: dominator, post-dominator tree, +control dependence graph and loop nesting structure. +The functions \sml{dom}, \sml{pdom}, \sml{cdg}, \sml{loop} +are \newdef{facet extraction} methods that +compute up-to-date views of these facets. + +The following protocol is used for facets: +\begin{itemize} +\item When the IR is changed, +the function \sml{changed} should be called to +signal that all facets attached to the IR should be updated. +\item To add a new facet of type \sml{F} that is computed by demand, +the programmer has to provide a facet construction +function \sml{f : IR -> F}. Call the function \sml{mem} +to register the new facet. For example, let \sml{val g = memo f}. +Then the function \sml{g} can be used to as a new facet extraction +function for facet \sml{F}. +\item To register a graph viewing function, call +the function \sml{addLayout} and provide an appropriate +graph layout function. For example, we can say +\sml{addLayout "F" layoutF} to register a graph layout function +for a facet called ``F''. +\end{itemize} + +To view an IR, the functions \sml{view}, \sml{views} or +\sml{viewSubgraph} can be used. They have the following interpretation: +\begin{itemize} +\item \sml{view} computes a layout for one facet of the IR and displays +it. The predefined facets are called +``dom'', ``pdom'', ``cdg'', ``loop.'' The IR can be +viewed as the facet ``cfg.'' In addition, there is a layout +named ``doms'' which displays the dominator tree and the post-dominator +tree together, with the post-dominator inverted. +\item \sml{views} computes a set of facets and displays it together +in one single picture. +\item \sml{viewSubgraph} layouts a subgraph of the IR. +This creates a picture with the subgraph highlighted and embedded +in the whole IR. +\end{itemize} + +\subsubsection{Building a CFG} + +There are two basic methods of building a CFG: +\begin{itemize} +\item convert a sequence of machine instructions +into a CFG through the emitter interface, described below, and +\item convert it from a \newdef{cluster}, which is +the basic linearized representation used in the MLRISC system. +\end{itemize} +The first method requires you to perform instruction selection +from a compiler front-end, but allows you to bypass all other +MLRISC phases if desired. The second method allows you +to take advantage of various MLRISC's instruction selection modules +currently available. We describe these methods in this section. + +\paragraph{Directly from Instructions} + Signature \sml{CODE_EMITTER} below describes an abstract emitter interface +for accepting a linear stream of instructions from a source +and perform a sequence of actions based on this +stream\footnote{Unlike the signature {\tt EMITTER\_NEW} or +{\tt FLOWGRAPH\_GEN}, it has the advantage that it is not +tied into any form of specific flowgraph representation.}. + +\begin{SML} + signature \mlrischref{extensions/code-emitter.sig}{CODE_EMITTER} = sig + structure I : INSTRUCTIONS + structure C : CELLS + structure P : PSEUDO_OPS + sharing I.C = C + + type emitter = + \{ defineLabel : Label.label -> unit, + entryLabel : Label.label -> unit, + exitBlock : C.cellset -> unit, + pseudoOp : P.pseudo_op -> unit, + emitInstr : I.instruction -> unit, + comment : string -> unit, + init : int -> unit, + finish : unit -> unit + \} + end +\end{SML} + +The code emitter interface has the following informal protocol. +\begin{methods} + init($n$) & Initializes the emitter and signals that + the back-end should + allocate space for $n$ bytes of machine code. + The number is ignored for non-machine code back-ends. \\ + defineLabel($l$) & Defines a new label $l$ at the current position.\\ + entryLabel($l$) & Defines a new entry label $l$ at the current position. + An entry label defines an entry point into the current flow graph. + Note that multiple entry points are allowed\\ + exitBlock($c$) & Defines an exit at the current position. + The cellset $c$ represents the live-out information \\ + pseudOp($p$) & Emits an pseudo op $p$ at the current position \\ + emitInstr($i$) & Emits an instruction $i$ at the current position \\ + blockName($b$) & Changes the block name to $b$ \\ + comment($msg$) & Emits a comment $msg$ at the current position \\ + finish & Signals that the use of the emitter is finished. + The emitter is free to perform its post-processing functions. + When this is finished the CFG is built. +\end{methods} + +The functor \sml{ControlFlowGraphGen} below can be +used to create a CFG builder that uses the \sml{CODE_EMITTER} interface. +\begin{SML} + signature \mlrischref{IR/mlrisc-cfg-gen.sig}{CONTROL_FLOW_GRAPH_GEN} = sig + structure CFG : CONTROL_FLOW_GRAPH + structure Emitter : CODE_EMITTER + sharing Emitter.I = CFG.I + sharing Emitter.P = CFG.P + val emitter : CFG.cfg -> Emitter.emitter + end + functor \mlrischref{IR/mlrisc-cfg-gen.sml}{ControlFlowGraphGen} + (structure CFG : CONTROL_FLOW_GRAPH + structure Emitter : CODE_EMITTER + structure P : INSN_PROPERTIES + sharing CFG.I = Emitter.I = P.I + sharing CFG.P = Emitter.P + sharing CFG.B = Emitter.B + ) : CONTROL_FLOW_GRAPH_GEN +\end{SML} + +\paragraph{Cluster to CFG} + +The core \MLRISC{} system implements many instruction selection +front-ends. The result of an instruction selection module is a linear +code layout block called a cluster. The functor \sml{Cluster2CFG} below +generates a translator that translates a cluster into a CFG: +\begin{SML} + signature \mlrischref{IR/mlrisc-cluster2cfg.sig}{CLUSTER2CFG} = sig + structure CFG : CONTROL_FLOW_GRAPH + structure F : FLOWGRAPH + sharing CFG.I = F.I + sharing CFG.P = F.P + sharing CFG.B = F.B + val cluster2cfg : F.cluster -> CFG.cfg + end + functor \mlrischref{IR/mlrisc-cluster2cfg.sml}{Cluster2CFG} + (structure CFG : CONTROL_FLOW_GRAPH + structure F : FLOWGRAPH + structure P : INSN_PROPERTIES + sharing CFG.I = F.I = P.I + sharing CFG.P = F.P + sharing CFG.B = F.B + ) : CLUSTER2CFG +\end{SML} + +\paragraph{CFG to Cluster} + +The basic \MLRISC{} system also implements many back-end functions +such as register allocation, assembly output and machine code output. +These modules all utilize the cluster representation. The +functor \mlrischref{IR/mlrisc-cfg2cluster.sml}{CFG2Cluster} +below generates a translator +that converts a CFG into a cluster. With the previous functor, +the CFG and the cluster presentation can be freely inter-converted. +\begin{SML} + signature \mlrischref{IR/mlrisc-cfg2cluster.sig}{CFG2CLUSTER} = sig + structure CFG : CONTROL_FLOW_GRAPH + structure F : FLOWGRAPH + sharing CFG.I = F.I + sharing CFG.P = F.P + sharing CFG.B = F.B + val cfg2cluster : { cfg : CFG.cfg, relayout : bool } -> F.cluster + end + functor \mlrischref{IR/mlrisc-cfg2cluster.sml}{CFG2Cluster} + (structure CFG : CONTROL_FLOW_GRAPH + structure F : FLOWGRAPH + sharing CFG.I = F.I + sharing CFG.P = F.P + sharing CFG.B = F.B + val patchBranch : {instr:CFG.I.instruction, backwards:bool} -> + CFG.I.instruction list + ) : CFG2CLUSTER +\end{SML} + +When a CFG originates from a cluster, we try to preserve +the same code layout through out all optimizations when possible. +The function \sml{cfg2cluster} takes an optional flag +that specifies we should force the recomputation of +the code layout of a control flow graph when translating a CFG +back into a cluster. + +\subsubsection{Basic CFG Transformations} + +Basic CFG transformations are implemented in the functor +\sml{CFGUtil}. These transformations include splitting edges, merging +edges, removing unreachable code and tail duplication. +\begin{SML} + functor \mlrischref{IR/mlrisc-cfg-util.sml}{CFGUtil} + (structure CFG : CONTROL_FLOW_GRAPH + structure P : INSN_PROPERTIES + sharing P.I = CFG.I + ) : CFG_UTIL +\end{SML} + +The signature of \sml{CFGUtil} is defined below: +\begin{SML} + signature \mlrischref{IR/mlrisc-cfg-util.sig}{CFG_UTIL} = sig + structure CFG : CONTROL_FLOW_GRAPH + val updateJumpLabel : CFG.cfg -> node_id -> unit + val mergeEdge : CFG.cfg -> CFG.edge -> bool + val eliminateJump : CFG.cfg -> node_id -> bool + val insertJump : CFG.cfg -> node_id -> bool + val splitEdge : CFG.cfg -> { edge : CFG.edge, jump : bool } + -> { edge : CFG.edge, node : CFG.node } + val isMerge : CFG.cfg -> node_id -> bool + val isSplit : CFG.cfg -> node_id -> bool + val hasSideExits : CFG.cfg -> node_id -> bool + val isCriticalEdge : CFG.cfg -> CFG.edge -> bool + val splitAllCriticalEdges : CFG.cfg -> unit + val ceed : CFG.cfg -> node_id * node_id -> bool + val tailDuplicate : CFG.cfg -> \{ subgraph : CFG.cfg, root : node_id \} + -> \{ nodes : CFG.node list, + edges : CFG.edge list \} + val removeUnreachableCode : CFG.cfg -> unit + val mergeAllEdges : CFG.cfg -> unit + end +\end{SML} + +These functions have the following meanings: +\begin{itemize} + \item \sml{updateJumpLabel} $G u$. This function + updates the label of the branch instruction in a block $u$ + to be consistent with the control flow edges with source $u$. + This is an nop if the CFG is already consistent. + \item \sml{mergeEdge} $G e$. This function merges edge + $e \equiv u \edge{} v$ + in the graph $G$ if possible. This is successful only if + there are no other edges flowing into $v$ and no other edges + flowing out from $u$. It returns true if the merge + operation is successful. If successful, the nodes $u$ and $v$ + will be coalesced into the block $u$. The jump instruction (if any) + in the node $u$ will also be elided. + \item \sml{eliminateJump} $G u$. This function eliminate the + jump instruction at the end of block $u$ if it is feasible. + \item \sml{insertJump} $G u$. This function inserts a jump + instruction in block $u$ if it is feasible. + \item \sml{splitEdge} $G e$. This function + split the control flow edge $e$, and return a new edge $e'$ and the + new block $u$ as return values. It addition, it takes as + argument a flag \sml{jump}. If this flag is true, + then a jump instruction is always placed in the + split; otherwise, we try to eliminate the jump when feasible. + \item \sml{isMerge} $G u$. This function tests whether block $u$ + is a \newdef{merge} node. A merge node is a node that + has two or more incoming flow edges. + \item \sml{isSplit} $G u$. This function tests whether block $u$ + is a \newdef{split} node. A split node is a node that + has two or more outgoing flow edges. + \item \sml{hasSideExits} $G u$. This function tests whether + a block has side exits $G$. This assumes that $u$ + is a hyperblock. + \item \sml{isCriticalEdge} $G e$. This function tests whether + the edge $e$ is a \newdef{critical} edge. The + edge $e \equiv u \edge{} v$ is critical iff + there are $u$ is merge node and $v$ is a split node. + \item \sml{splitAllCriticalEdges} $G$. This function goes + through the CFG $G$ and splits + all critical edges in the CFG. + This can introduce extra jumps and basic blocks in the program. + \item \sml{mustPreceed} $G (u,v)$. This function + checks whether two blocks $u$ and $v$ are necessarily adjacent. + Blocks $u$ and $v$ must be adjacent iff $u$ must preceed $v$ + in any feasible code layout. + \item \sml{tailDuplicate}. + \begin{SML} + val tailDuplicate : CFG.cfg -> \{ subgraph : CFG.cfg, root : node_id \} + -> \{ nodes : CFG.node list, + edges : CFG.edge list \} + \end{SML} +\begin{Figure} +\begin{boxit} +%\cpsfig{figure=../pictures/eps/tail-duplication.eps,width=3in} +\begin{center} + \includegraphics[width=3in]{../pictures/pdf/tail-duplication} +\end{center}% +\end{boxit} +\label{fig:tail-duplication} +\caption{Tail-duplication} +\end{Figure} + + This function tail-duplicates the region \sml{subgraph} + until it only has a single entry \sml{root}. + Return the set of new nodes and new edges. + The region is represented as a subgraph view of the CFG. + Figure~\ref{fig:tail-duplication} illustrates + this transformation. + + \item \sml{removeUnreachableCode} $G$. This function + removes all unreachable code from the graph. + \item \sml{mergeAllEdges} $G$. This function tries to merge all + the edges in the flowgraph $G$. Merging is performed in the + non-increasing order of edge frequencies. +\end{itemize} + +\subsubsection{Dataflow Analysis} +MLRISC provides a simple customizable module for performing +iterative dataflow analysis. A dataflow analyzer +has the following signature: + +\begin{SML} + signature \mlrischref{IR/dataflow.sig}{DATAFLOW_ANALYZER} = sig + structure CFG : CONTROL_FLOW_GRAPH + type dataflow_info + val analyze : CFG.cfg * dataflow_info -> dataflow_info + end +\end{SML} + +A dataflow problem is described by the signature \sml{DATAFLOW_PROBLEM}, +described below: +\begin{SML} + signature \mlrischref{IR/dataflow.sig}{DATAFLOW_PROBLEM} = sig + structure CFG : CONTROL_FLOW_GRAPH + type domain + type dataflow_info + val forward : bool + val bot : domain + val == : domain * domain -> bool + val join : domain list -> domain + val prologue : CFG.cfg * dataflow_info -> + CFG.block node -> + \{ input : domain, + output : domain, + transfer : domain -> domain + \} + val epilogue : CFG.cfg * dataflow_info -> + \{ node : CFG.block node, + input : domain, + output : domain + \} -> unit + end +\end{SML} +This description contains the following items +\begin{itemize} +\item \sml{type domain} is the abstract lattice domain $D$. +\item \sml{type dataflow_info} is where the dataflow information +is stored. +\item \sml{forward} is true iff the dataflow problem is in the +forward direction +\item \sml{bot} is the bottom element of $D$. +\item \sml{==} is the equality function on $D$. +\item \sml{join} is the least-upper-bound function on $D$. +\item \sml{prologue} is a user-supplied function that performs +pre-processing and setup. For each CFG node $X$, this function +computes +\begin{itemize} + \item \sml{input} -- which is the initial input value of $X$ + \item \sml{output} -- which is the initial output value of $X$ + \item \sml{transfer} -- which is the transfer function on $X$. +\end{itemize} +\item \sml{epilogue} is a function that performs post-processing. +It visits each node $X$ in the flowgraph and return the resulting +\sml{input} and \sml{output} value for $X$. +\end{itemize} + +To generate a new dataflow analyzer from a dataflow problem, +the functor \sml{Dataflow} can be used: +\begin{SML} + functor \mlrischref{IR/dataflow.sml}{Dataflow}(P : DATAFLOW_PROBLEM) : DATAFLOW_ANALYZER = +\end{SML} + +\subsubsection{Static Branch Prediction} + +\subsubsection{Branch Optimizations} diff --git a/MLRISC/Doc/latex/mlrisc-md.tex b/MLRISC/Doc/latex/mlrisc-md.tex new file mode 100644 index 0000000..ab9ab23 --- /dev/null +++ b/MLRISC/Doc/latex/mlrisc-md.tex @@ -0,0 +1,662 @@ +\section{Machine Description} +\subsection{Overview} + + \newdef{MDGen} is a simple tool for generating +various modules in the MLRISC customizable code generator +directly from machine descriptions. These descriptions +contain architectural information such as: +\begin{enumerate} + \item How the the register file(s) are organized. + \item How instructions are encoded in machine code: MLRISC uses +this information to generate machine instructions directly into a byte stream. +Directly machine code generation is used in the SML/NJ compiler. + \item How instructions are pretty printed in assembly: this is used +for debugging and also for assembly output for other non-SML/NJ backends. + \item How instructions are internally represented in MLRISC. + \item Other information needed for performing optimizations, which + include: + \begin{enumerate} + \item The register transfer list (RTL) that defines the + operational semantics of the instruction. + \item Delay slot mechanisms. + \item Information for performing span dependency resolution. + \item Pipeline and reservation table characteristics. + \end{enumerate} +\end{enumerate} + +Currently, item 5 is not ready for prime time. + +\subsubsection{Why MDGen?} +MLRISC manipulates all instruction sets via a set of abstract +interfaces, which allows the programmer to arbitrarily choose an +instruction representation that is most convenient for a particular +architecture. However, various functions that manipulate +this representation must be provided by the instruction set's programmer. +As the number and complexities of each optimizations grow, and as +the number of architectures increases, the functions +for manipulating the instructions become more numerous and complex. +In order to keep the effort of developing and maintaining +an instruction set manageable, +the MDGen tool is developed to (partially) automate this task. + +\subsubsection{Syntax} + + MDGen's machine descriptions are written in a syntax that is very +much like that of +\externhref{http://cm.bell-labs.com/cm/cs/what/smlnj/sml.html}{Standard ML}. +Most core SML constructs are recognized. +In addition, new declaration forms specific to MDGen are +used to specify architectural information. + +\paragraph{Reserved Words} + All SML keywords are reserved words in MDGen. + In addition, the following keywords are also reserved: + +\begin{verbatim} + always architecture assembly at backwards big bits branching called + candidate cell cells cellset debug delayslot dependent endian field + fields formats forwards instruction internal little locations lowercase + name never nodelayslot nullified opcode ordering padded pipeline predicated + register rtl signed span storage superscalar unsigned uppercase + verbatim version vliw when +\end{verbatim} + + Two kinds are quotations marks are also reserved: +\begin{SML} + [[ ]] + `` '' +\end{SML} + + The first \sml{[[ ]]} is for describing semantics. The +second \sml{`` ''} is for describing assembly syntax. + +\paragraph{Syntactic Sugar} + + MDGen recognizes the following syntactic sugar. +\begin{description} +\item[Record abbreviations] +Record expressions such as \sml{{x=x,y=y,z=z}} +can be simplified to just \sml{{x,y,z}}. +\item[Binary literals] +Literals in binary can be written with the prefix \sml{0b} (for integer types) +or \sml{0wb} (for word types). For example, \sml{0wb101111} is the same +as \sml{0wx2f} and \sml{0w79}. +\item[Bit slices] + A bit slice, which extracts a range of bits from a word, can be written +using an \sml{at} expression. For example, \sml{w at [16..18]} +means the same thing as \verb|Word32.andb(Word32.>>(w, 0w16),0w7)|, i.e. +it extracts bit 16 to 18 from \sml{w}. +The least significant bit the zeroth bit. + +In general, we can write: +\begin{SML} + w at [range1, range2, ..., rangen] +\end{SML} +to extract a sequence of slices from $w$ and concatenate them together. +For example, the expression +\begin{SML} + 0wxabcd at [0..3, 4..7, 8..11, 12..15] +\end{SML} +swap the 4 nybbles from the 16-bit word, and evaluates to \sml{0wxdcba}. + +\item[Signature] +Signature declarations of the form +\begin{SML} + val x y z : int -> int +\end{SML} +can be used as a shorthand for the more verbose: +\begin{SML} + val x : int -> int + val y : int -> int + val z : int -> int +\end{SML} +\end{description} + +\subsubsection{Elaboration Semantics} + + Unfortunately, there is no complete formal semantics of how +an MDGen specification elaborates. + But generally speaking, a machine description is a just a structure +(in the SML sense). Different components of this structure describe +different aspects of the architecture. + +\paragraph{Syntactic Overloading} +In general, the syntactic overloading are used heavily in MDGen. +There are three types of definitions: +\begin{itemize} + \item Definitions that defines properties of the instruction set. + \item Definitions of functions and terms that are in the RTL meta-language. +The syntax of MDGen's RTL language is borrowed heavily from Lambda-RTL, +which in turns is borrowed heavily from SML. + \item Definitions of functions and types that are to be included in the +output generated by the MDGen tool. These are usually auxiliary +helper functions and definitions. +\end{itemize} +In general, entities of type 2, when appearing in other context, are +properly meta-quoted in the semantics quotations \sml{[[ ]]}. + +\subsubsection{Basic Structure of A Machine Description} + + The machine description for an architecture are defined via +an \sml{architecture} declaration, which has the following general +form. + +\begin{SML} +architecture name = +struct + \Term{architecture type declaration} + \Term{endianess declaration} + \Term{storage class declarations} + \Term{locations declarations} + \Term{assembly case declarations} + \Term{delayslot declaration} + \Term{instruction machine encoding format declarations} + \Term{nested structure declarations} + \Term{instruction definition} +end +\end{SML} + +\subsection{Describing the Architecture} + +\subsubsection{Architecture type} + Architecture type declaration specifies whether the architecture is +a superscalar or a VLIW/EPIC machine. Currently, this information is +ignored. + +\begin{SML} + \Term{architecture type declaration} ::= superscalar | vliw +\end{SML} + +\subsubsection{Storage class} + +Storage class declarations specify various information about the +registers in the architecture. For example, the Alpha has 32 general +purpose registers and 32 floating point registers. In addition, MLRISC +requires that each architecture specifies a (pseudo) register +type\footnote{Called cellkind in MLRISC.} for +holding condition codes (\sml{CC}). +To specify these information in MDGen, we can say: + +\begin{SML} + storage + GP "r" = 32 cells of 64 bits in cellset called "register" + assembly as (fn (30,_) => "$sp" + | (r,_) => "$"^Int.toString r + ) + | FP "f" = 32 cells of 64 bits in cellset called "floating point register" + assembly as (fn (f,_) => "$f"^Int.toString f) + | CC "cc" = cells of 64 bits in cellset GP called "condition code register" + assembly as "cc" +\end{SML} + +\begin{itemize} + \item There are 32 64-bit general purpose registers, +32 64-bit floating point registers, while \sml{CC} is not a +real register type. + \item Cellsets +are used by MLRISC for annotating liveness information in the program. + The clause \sml{in cellset} states that register type \sml{GP} +and \sml{FP} are allotted their own components in the cellset, +while the register type \sml{CC} are put +in the same cellset component as \sml{GP}. + \item The clause \sml{assembly as} specifies + how each register is to be pretty printed. On the Alpha, general + purpose register are pretty printed with prefix \sml{$}, while + floating point registers are pretty printed with the prefix \sml{$f}. + A special case is made for register 30, which is the stack pointer, and + is pretty printing as \sml{$sp}. Pseudo condition code registers + are pretty printed with the prefix \sml{cc}. +\end{itemize} + +\subsubsection{Locations} + + Special locations in the register files can be declared using the +\sml{locations} declarations. On the Alpha, GPR +30 is the stack pointer, GPR 28 and floating point register 30 +are used as the assembly temporaries. This special constants +can be defined as follows: + +\begin{SML} + locations + stackptrR = $GP[30] + and asmTmpR = $GP[28] + and fasmTmp = $FP[30] +\end{SML} + +\subsection{Specifying the Machine Encoding} +\subsubsection{Endianess} + +The endianess declaration specifies whether the machine is little +endian or big endian so that the correct machine instruction encoding +functions can be generated. The general syntax of this is: + +\begin{SML} + \Term{endianess declaration} ::= little endian | big endian +\end{SML} + +The Alpha is little endian, so we just say: +\begin{SML} + little endian +\end{SML} + +\subsubsection{Defining New Instruction Formats} + + How instructions are encoded are specified using +\sml{instruction format} declarations. An instruction format declaration +has the following syntax: +\begin{SML} + \Term{instruction machine encoding format declarations} ::= + instruction formats n bits + \Term{format}1 + | \Term{format}2 + | \Term{format}3 + | ... + | \Term{format}n-1 + | \Term{format}n +\end{SML} + +Each encoding format can be a primitive format, or a derived format. + +\paragraph{Primitive formats} + +A primitive format is simply specified by giving it a name and specifying +the position, names and types of its fields. This is usually the same +way it is described in a architectural reference manual. + + +Here is how we specify some of the (32 bit) primitive instruction formats +used in the Alpha. +\begin{SML} + instruction formats 32 bits + Memory\{opc:6, ra:5, rb:GP 5, disp: signed 16\} + | Jump\{opc:6=0wx1a,ra:GP 5,rb:GP 5,h:2,disp:int signed 14\} + | Memory_fun\{opc:6, ra:GP 5, rb:GP 5, func:16\} + | Branch\{opc:branch 6, ra:GP 5, disp:signed 21\} + | Fbranch\{opc:fbranch 6, ra:FP 5, disp:signed 21\} + | Operate0\{opc:6,ra:GP 5,rb:GP 5,sbz:13..15=0,_:1=0,func:5..11,rc:GP 5\} + | Operate1\{opc:6,ra:GP 5,lit:signed 13..20,_:1=1,func:5..11,rc:GP 5\} +\end{SML} + +For example, the format \sml{Memory} +\begin{SML} + Memory\{opc:6, ra:5, rb:GP 5, disp: signed 16\} +\end{SML} +has a 6-bit opcode field, a 5-bit \sml{ra} field, a 5-bit \sml{rb} +field which always hold a general purpose register, and a 16-bit +sign-extended displacement field. The field to the left is positioned +at the most significant bits, while the field to the right is positioned +at the least. The widths of these fields must add up to 32 bits. + + +Similarly, the format \sml{Jump} +\begin{SML} + Jump{opc:6=0wx1a,ra:GP 5,rb:GP 5,h:2,disp:int signed 14} +\end{SML} +contains a 6-bit opcode field which always hold the constant \sml{0x1a}, +two 5-bit fields \sml{ra} and \sml{rb} which are of type \sml{GP}, +and a 14-bit sign-extended field of type integer. + + Each field in a primitive format has one of 5 forms: +\begin{SML} + \Term{name} : \Term{position} + \Term{name} : \Term{position} = \Term{value} + \Term{name} : \Term{type} \Term{position} + \Term{name} : \Term{type} \Term{position} = \Term{value} + _ : \Term{position} = \Term{value} +\end{SML} +where \Term{position} is either a width, or a bits range +$n$\sml{..}$m$, +with an optional \sml{signed} prefix. The last form, with a wild card +for the field name, can be used to specify an anonymous field that +always has a fixed value. + + + By default, a field has type \sml{Word32.word}. If a type $T$ +is specified, then the function \sml{emit_}$T$ is implicitly called +to convert the type into the appropriate encoding. The function +\sml{emit_}$T$ are generated automatically by MDGen if it is a cellkind +defined by the \sml{storage} class declaration, or if it is a primitive +type such as integer or boolean. +There are also other ways to automatically generate this function +(more on this later.) + + For example, the format \sml{Operate1} +\begin{SML} + Operate1\{opc:6,ra:GP 5,lit:signed 13..20,_:1=1,func:5..11,rc:GP 5\} +\end{SML} +states that bits 26 to 31 are allocated to field \sml{opc}, +bits 21 to 25 are allocated to field \sml{ra}, which is of type +\sml{GP}, bits 13 to 20 are allocated to field \sml{lit}, bit 12 +is a single bit of value 1, etc. + + +MDGen generates a function for each primitive format declaration of +the same name that can be used for emitting the instruction. +In the case of the Alpha, the following functions are generated: +\begin{SML} + val Memory : \{opc:Word32.word, ra:Word32.word, + rb:int, disp:Word32.word\} -> unit + val Jump : \{ra:int, rb:int, disp:Word32.word\} -> unit + val Operate1 : \{opc:Word32.word, ra:int, lit:Word32.word, + func:Word32.word, rc:int\} -> unit +\end{SML} + +\paragraph{Derived formats} + + Derived formats are simply instruction formats that are defined +in terms of other formats. On the alpha, we have a \sml{Operate} +format that simplifies to either \sml{Operate0} or \sml{Operate1}, +depending on whether the second argument is a literal or a register. +\begin{SML} + Operate\{opc,ra,rb,func,rc\} = + (case rb of + I.REGop rb => Operate0\{opc,ra,rb,func,rc\} + | I.IMMop i => Operate1\{opc,ra,lit=itow i,func,rc\} + | I.HILABop le => Operate1\{opc,ra,lit=High{le=le},func,rc\} + | I.LOLABop le => Operate1\{opc,ra,lit=Low{le=le},func,rc\} + | I.LABop le => Operate1\{opc,ra,lit=itow(LabelExp.valueOf le),func,rc\} + ) +\end{SML} + +\subsubsection{Generating Encoding Functions} + + In MLRISC, we represent an instruction as a set of ML datatypes. +Some of these datatypes represent specific fields or +opcodes of the instructions. +MDGen lets us to associate a machine encoding to each datatype constructor +directly in the specification, and automatically generates an +encoding function for these datatypes. + +There are two different ways of specifying an encoding. The first way +is just to write the machine encoding directly next the constructor. +Here's an example directly from the Alpha description: +\begin{SML} + structure Instruction = + struct + datatype branch! = (* table C-2 *) + BR 0x30 + | BSR 0x34 + | BLBC 0x38 + | BEQ 0x39 | BLT 0x3a | BLE 0x3b + | BLBS 0x3c | BNE 0x3d | BGE 0x3e + | BGT 0x3f + + datatype fbranch! = (* table C-2 *) + FBEQ 0x31 | FBLT 0x32 + | FBLE 0x33 | FBNE 0x35 + | FBGE 0x36 | FBGT 0x37 + + ... + end +\end{SML} + +The datatypes \sml{branch} and \sml{fbranch} represent specific +branch opcodes for integer branches \sml{BRANCH}, or floating point +branches \sml{FBRANCH}. On the Alpha, instruction \sml{BR} is encoded +with an opcode of \sml{0x30}, instruction \sml{BSR} is encoded +as \sml{0x34} etc. MDGen automatically generates two functions +\begin{SML} + val emit_branch : branch -> Word32.word + val emit_fbranch : branch -> Word32.word +\end{SML} +that perform this encoding. + +In the specification for the instruction set, we state that the +\sml{BRANCH} instruction should be encoded using format \sml{Branch}, +while the \sml{FBRANCH} instruction should be encoded using +format \sml{Fbranch}. +\begin{SML} + structure MC = + struct + (* Auxiliary function for computing the displacement of a label *) + fun disp ... = ... + ... + end + + ... + + instruction + ... + + | BRANCH of branch * $GP * Label.label + Branch\{opc=branch,ra=GP,disp=disp label\} + + | FBRANCH of fbranch * $FP * Label.label + Fbranch\{opc=fbranch,ra=FP,disp=disp label\} + + | ... +\end{SML} + +Since the primitive instructions formats \sml{Branch} and \sml{FBranch} +are defined with branch and fbranch as the type in the opcode field +\begin{SML} + | Branch\{opc:branch 6, ra:GP 5, disp:signed 21\} + | Fbranch\{opc:fbranch 6, ra:FP 5, disp:signed 21\} +\end{SML} +the functions \sml{emit_branch} and \sml{emit_fbranch} are implicitly +called. + + +Another way to specify an encoding is to specify a range, as +in the following example: +\begin{SML} + datatype fload[0x20..0x23]! = LDF | LDG | LDS | LDT + + datatype fstore[0x24..0x27]! = STF | STG | STS | STT +\end{SML} + +This states that \sml{LDF} should be assigned the encoding \sml{0x20}, +\sml{LDG} the encoding \sml{0x21} etc. This form is useful for +specifying a consecutive range. + +\subsubsection{Encoding Variable Length Instructions} + + Most architectures nowadays have fixed length encodings for instructions. +There are some notatable exceptions, however. +The Intel x86 architecture uses a legacy +variable length encoding. Modern RISC machines developed for +embedded systems may utilize space-reduction compression schemes in their +instruction sets. Finally, VLIW machines usually have some form +of NOP compression scheme for compacting issue packets. + +\subsection{Specifying the Assembly Formats} + +\subsubsection{Assembly Case Declaration} + + The assembly case declaration specifies whether the assembly should be +emitted in lower case, upper case, or verbatim. If either lower case +or upper case is specified, all literal strings are converted to the +appropriate case. The general syntax of this declaration is: + +\begin{SML} + \Term{assembly case declaration} ::= + lowercase assembly + | uppercase assembly + | verbatim assembly +\end{SML} + +\subsubsection{Assembly Annotations} + + Assembly output are specified in the assembly meta quotations +\sml{`` ''}, or string quotations \sml{" "}. +For example, here is a fragment from the Alpha description: + +\begin{SML} + instruction + ... + | LOAD of \{ldOp:load, r: $GP, b: $GP, d:operand, mem:Region.region\} + ``\t, ()'' + + | STORE of \{stOp:store, r: $GP, b: $GP, d:operand, mem:Region.region\} + ``\t, ()'' + + | BRANCH of branch * $GP * Label.label + ``\t,
  • \t(), '' + mc: Load{Op=emit_loadi li,b=r,im14=low_sign_ext_im14(opn i),t=t} + rtl: ``
  • '' + latency: LOAD + pipeline: LOAD + + | LOAD of {l:load, r1: $GP, r2: $GP, t: $GP, mem:Region.region} + asm: ``\t(), '' + mc: let val (ext4,u,m) = emit_load l + in IndexedLoad{Op=0w3,b=r1,x=r2,ext4,u,t,m} + end + rtl: ``'' + latency: LOAD + pipeline: LOAD + + | STORE of {st:store,b: $GP,d:operand,r: $GP, mem:Region.region} + asm: ``\t, ()'' + mc: Store{st,b=b,im14=low_sign_ext_im14(opn d),r=r} + rtl: ``'' + pipeline: STORE + + | ARITH of {a:arith,r1: $GP, r2: $GP, t: $GP} + asm: ``\t, , '' + mc: Arith{a,r1,r2,t} + rtl: ``'' + latency: ARITH + pipeline: ARITH + + | ARITHI of {ai:arithi, i:operand, r: $GP, t: $GP} + asm: ``\t, , '' + mc: (case ai of + I.ADDIL => LongImmed{Op=0wxa,r=r,im21=assemble_21(opn i)} + | _ => let val (Op,e) = emit_arithi ai + in Arithi{Op,r,t,im11=low_sign_ext_im11(opn i),e} + end + ) + rtl: ``'' + latency: ARITH + pipeline: ARITH + + (* This is a composite instruction. + * The effect is the same as t <- if r1 cc r2 then i+b else 0 + * if t1 = t2 + * COMCLR,cc r1, r2, t1 + * LDO i(b), t2 + *) + | COMCLR_LDO of {cc:bcond, r1: $GP, r2: $GP, t1 : $GP, + i:int, b: $GP, t2: $GP} + asm: (``comclr,\t, , \n\t''; + ``ldo\t(), '' + ) + mc: let val (c,f) = cmpCond cc + in CompareClear{r1,r2,t=t1,c,f,ext=0wx22}; + Load{Op=0wx0d,b,im14=low_sign_ext_im14(itow i),t=t2} + end + rtl: if t1 = t2 then ``COMCLR_LDO2_'' + else if t1 = 0 then ``COMCLR_LDO3_'' + else ``'' + latency: ARITH + pipeline: ARITH + + | COMICLR_LDO of {cc:bcond, i1:operand, r2: $GP, t1 : $GP, + i2:int, b: $GP, t2: $GP} + asm: (``comiclr,\t, , \n\t''; + ``ldo\t(), '' + ) + mc: let val (c,f) = cmpCond cc + in CompareImmClear{r=r2,t=t1,c,f,im11=low_sign_ext_im11(opn i1)}; + Load{Op=0wx0d,b,im14=low_sign_ext_im14(itow i2),t=t2} + end + rtl: if t1 = t2 then ``COMICLR_LDO2_'' + else if t1 = 0 then ``COMICLR_LDO3_'' + else ``COMICLR_LDO_'' + latency: ARITH + pipeline: ARITH + + | SHIFTV of {sv:shiftv, r: $GP, len:int, t: $GP} + asm: ``\t, , '' + mc: (case sv of + I.VEXTRU => Extract{Op=0wx34,r,t,ext3=0w4,p=0,clen=32-len} + | I.VEXTRS => Extract{Op=0wx34,r,t,ext3=0w5,p=0,clen=32-len} + | I.ZVDEP => Deposit{Op=0wx35,t,r,ext3=0w0,cp=0,clen=32-len} + ) + rtl: ``'' + latency: ARITH + pipeline: ARITH + + | SHIFT of {s:shift, r: $GP, p:int, len:int, t: $GP} + asm: ``\t,

    , , '' + mc: (case s of + I.EXTRU => Extract{Op=0wx34,r,t,ext3=0w6,p=p,clen=32-len} + | I.EXTRS => Extract{Op=0wx34,r,t,ext3=0w7,p=p,clen=32-len} + | I.ZDEP => Deposit{Op=0wx35,t,r,ext3=0w2,cp=31-p,clen=32-len} + ) + rtl: ``'' + latency: ARITH + pipeline: ARITH + + | BCOND of {cmp: cmp, bc:bcond,r1: $GP,r2: $GP,n:bool,nop:bool, + t:Label.label, f:Label.label} + asm: ``,\t, , '' + mc: bcond(cmp,bc,r1,r2,n,t,nop) + rtl: ``_'' + padding: nop = true + nullified: n = true + delayslot: not nullified orelse + (branching forwards andalso taken orelse + branching backwards andalso not taken + ) + delayslot candidate: false + pipeline: BRANCH + + | BCONDI of {cmpi: cmpi, bc:bcond, i:int, r2: $GP, n:bool, nop:bool, + t:Label.label, f:Label.label} + asm: ``,\t, , '' + mc: bcondi(cmpi,bc,i,r2,n,t,nop) + rtl: ``_'' + padding: nop = true + nullified: n = true + delayslot: not nullified orelse + (branching forwards andalso taken orelse + branching backwards andalso not taken + ) + delayslot candidate: false + pipeline: BRANCH + + (* bc must be either < or >= *) + | BB of {bc:bitcond,r: $GP, p:int, n:bool, nop:bool, + t:Label.label, f:Label.label} + asm: ``bb,\t,

    , '' + mc: branchOnBit(bc,r,p,n,t,nop) + rtl: ``BB_'' + padding: nop = true + nullified: n = true + delayslot: not nullified orelse + (branching forwards andalso taken orelse + branching backwards andalso not taken + ) + delayslot candidate: false + pipeline: BRANCH + + | B of {lab:Label.label, n:bool} + asm: ``b\t'' + mc: branchLink(0wx3a,zeroR,lab,0w0,n) + rtl: ``B'' + nullified: n = true + delayslot candidate: false + pipeline: BRANCH + + (* + * This composite instruction is generated only during span dependence + * resolution when trying to resolve conditional branches. + * The expanded sequence is 12 bytes long. + * Basically, the branch and link instruction jumps directly to + * the next instruction at tmpLab, and put the address of tmpLab + 4 + * into register tmp. The offset computation in addil computes the + * actual address of lab. + *) + | LONGJUMP of {lab:Label.label, n:bool, tmp: $GP, tmpLab:Label.label} + asm: (``bl,n\t, \n''; + ``:\n\t''; + ``addil -(+4), \n\t''; + ``bv\t%r0()'' + ) + mc: let val offset = + T.SUB(32,T.LABEL lab, + T.ADD(32,T.LABEL tmpLab, T.LI(IntInf.fromInt 4))) + in (* set the location of tmpLab *) + Label.setAddr(tmpLab, !loc+4); + branchLink(0wx3a,tmp,tmpLab,0w0,n); + LongImmed{Op=0wxa,r=tmp, + im21=assemble_21(itow(MLTreeEval.valueOf offset))}; + BranchVectored{Op=0wx3a,t=tmp,x=zeroR,ext3=0w6,n=n} + end + rtl: ``B'' + nullified: n = true + delayslot candidate: false + pipeline: BRANCH + + | BE of {b: $GP, d:operand, sr:int, n:bool, labs: Label.label list} + asm: ``be\t(,)'' + mc: let val (w,w1,w2) = assemble_17(opn d) + in BranchExternal{Op=0wx38,b=b,w1=w1,s=assemble_3(itow sr), + w2=w2,n=n,w=w} + end + nullified: n = true + delayslot candidate: false + pipeline: BRANCH + + | BV of {x: $GP, b: $GP, labs: Label.label list, n:bool} + asm: ``bv\t()'' + mc: BranchVectored{Op=0wx3a,t=b,x=x,ext3=0w6,n=n} + rtl: ``BV'' + nullified: n = true + delayslot candidate: false + pipeline: BRANCH + + | BLR of {x: $GP, t: $GP, labs: Label.label list, n:bool} + asm: ``blr\t()'' + mc: BranchVectored{Op=0wx3a,t=t,x=x,ext3=0w2,n=n} + nullified: n = true + delayslot candidate: false + pipeline: BRANCH + + | BL of {lab:Label.label ,t: $GP, defs: $cellset, uses: $cellset, + cutsTo: Label.label list, mem:Region.region, n:bool} + asm: ``bl\t, '' + mc: branchLink(0wx3a,t,lab,0w0,n) + nullified: n = true + delayslot candidate: false + pipeline: BRANCH + + | BLE of {d:operand,b: $GP, sr:int, t: $GP, + defs: $cellset, uses: $cellset, cutsTo: Label.label list, + mem:Region.region} + asm: ``ble\t(,)< + emit_defs(defs)>'' + mc: (case (d,CellsBasis.registerId t) of + (I.IMMED 0,31) => + BranchExternal{Op=0wx39,b=b,w1=0w0,s=assemble_3(itow sr), + w2=0w0,n=true,w=0w0} + | _ => error "BLE: not implemented" + ) + rtl: ``BLE'' + nullified: false + delayslot candidate: false + pipeline: BRANCH + + (* BLE implicitly defines %r31. The destination register t + * is assigned in the delay slot. + *) + | LDIL of {i:operand, t: $GP} + asm: ``ldil\t, '' + mc: LongImmed{Op=0wx8,r=t,im21=assemble_21(opn i)} + rtl: ``LDIL'' + latency: ARITH + pipeline: ARITH + + | LDO of {i:operand, b: $GP, t: $GP} + asm: ``ldo\t(), '' + mc: Load{Op=0wx0d,b,im14=low_sign_ext_im14(opn i),t=t} + rtl: if b = 0 then ``LDO2'' else ``LDO'' + latency: ARITH + pipeline: ARITH + + | MTCTL of {r: $GP, t: $CR} + asm: ``mtctl\t, '' + mc: MoveToControlReg{Op=0w0,t,r,rv=0w0,ext8=0wxc2} + rtl: ``MTCTL'' + latency: ARITH + pipeline: ARITH + + | FSTORE of {fst:fstore,b: $GP, d:int, r: $FP,mem:Region.region} + asm: ``\t, ()'' + mc: (case fst of + I.FSTDS => CoProcShort{Op=0wxb,b,im5=low_sign_ext_im5(itow d), + s=0w0,a=0w0,ls=0w1,uid=0w0,rt=r} + | I.FSTWS => CoProcShort{Op=0wx9,b,im5=low_sign_ext_im5(itow d), + s=0w0,a=0w0,ls=0w1,uid=0w1,rt=r} + ) + rtl: ``'' + pipeline: STORE + + | FSTOREX of {fstx:fstorex, b: $GP, x: $GP,r: $FP,mem:Region.region} + asm: ``\t, ()'' + mc: let val (Op,uid,u,m) = emit_fstorex fstx + in CoProcIndexed{Op=Op,b,x,s=0w0,u,m,ls=0w1,uid=uid,rt=r} + end + rtl: ``'' + pipeline: STORE + + | FLOAD of {fl:fload, b: $GP, d:int, t: $FP, mem:Region.region} + asm: ``\t(), '' + mc: (case fl of + I.FLDDS => CoProcShort{Op=0wxb,b,im5=low_sign_ext_im5(itow d), + s=0w0,a=0w0,ls=0w0,uid=0w0,rt=t} + | I.FLDWS => CoProcShort{Op=0wx9,b,im5=low_sign_ext_im5(itow d), + s=0w0,a=0w0,ls=0w0,uid=0w1,rt=t} + ) + rtl: ``'' + latency: LOAD + pipeline: LOAD + + | FLOADX of {flx:floadx, b: $GP, x: $GP, t: $FP, mem:Region.region} + asm: ``\t(), '' + mc: let val (Op,uid,u,m) = emit_floadx flx + in CoProcIndexed{Op=Op,b,x,s=0w0,u,m,ls=0w0,uid=uid,rt=t} + end + rtl: ``'' + latency: LOAD + pipeline: LOAD + + | FARITH of {fa:farith,r1: $FP, r2: $FP,t: $FP} + asm: ``\t, , '' + mc: (case fa of + I.XMPYU => FloatOp3Maj0E{sop=0w2,f=0w1,r1,r2,t,r11=0w0,r22=0w0} + | _ => let val (sop,fmt) = emit_farith fa + in FloatOp3Maj0C{sop,r1,r2,t,n=0w0,fmt} end + ) + rtl: ``'' + latency: (case fa of + (I.FMPY_S | I.FMPY_D | I.FMPY_Q) => FMPY + | (I.FDIV_S | I.FDIV_D | I.FDIV_Q) => FDIV + | _ => FARITH + ) + pipeline: (case fa of + (I.FMPY_S | I.FMPY_D | I.FMPY_Q) => FMPY + | (I.FDIV_S | I.FDIV_D | I.FDIV_Q) => FDIV + | _ => FARITH + ) + + | FUNARY of {fu:funary,f: $FP, t: $FP} + asm: ``\t, '' + mc: let val (sop,fmt) = emit_funary fu + in FloatOp0Maj0C{r=f,t=t,sop=sop,fmt=fmt} + end + rtl: ``'' + latency: FARITH + pipeline: FARITH + + | FCNV of {fcnv:fcnv, f: $FP, t: $FP} + asm: ``\t, '' + mc: let val (sop,sf,df) = emit_fcnv fcnv + in FloatOp1Maj0E{r=f,t=t,sop=sop,sf=sf,df=df,r2=0w1,t2=0w0} + end + rtl: ``'' + latency: FARITH + pipeline: FARITH + + (* The following three instructions have been replaced by FBRANCH. + This make life much easier for instruction schedulers. + | FCMP of fcond * int * int + | FTEST + | FBCC of {t:Label.label, f:Label.label, n:bool} + *) + | FBRANCH of {cc:fcond, fmt:fmt, f1: $FP, f2: $FP, + t:Label.label, f:Label.label, n:bool, long:bool} + asm: (``fcmp,,\t, \n\t''; + ``ftest\n\t''; + ``b\t'' + ) + (* fmt = 1 means double precision; will have to extend later *) + mc: (FloatOp2Maj0C{r1=f1,r2=f2,sop=0w0,fmt=emit_fmt fmt, + n=0w0,c=emit_fcond cc}; + FTest{}; + branchLink(0wx3a,zeroR,t,0w0,n) (* B,n t *) + ) + rtl: ``FBRANCH_'' + nullified: n + delayslot candidate: false + pipeline: BRANCH + + | BREAK of {code1:int, code2:int} + asm: ``break\t, '' + delayslot candidate: false + + | NOP + asm: ``nop'' + mc: NOP{} + rtl: ``NOP'' + pipeline: NOP + + | SOURCE of {} + asm: ``source'' + mc: () + + | SINK of {} + asm: ``sink'' + mc: () + + | PHI of {} + asm: ``phi'' + mc: () + + structure SSA = + struct + + fun operand(ty,I.REG r) = T.REG(ty, r) + | operand(ty,I.IMMED i) = T.LI(IntInf.fromInt i) + (*| operand(ty,I.LabExp(le,_)) = T.LABEL le*) + | operand _ = error "operand" + + end + +end diff --git a/MLRISC/hppa/instructions/hppaCells.sml b/MLRISC/hppa/instructions/hppaCells.sml new file mode 100644 index 0000000..b1e16f5 --- /dev/null +++ b/MLRISC/hppa/instructions/hppaCells.sml @@ -0,0 +1,118 @@ +(* + * WARNING: This file was automatically generated by MDLGen (v3.1) + * from the machine description file "hppa/hppa.mdl". + * DO NOT EDIT this file directly + *) + + +signature HPPACELLS = +sig + include CELLS + val CR : CellsBasis.cellkind + val CELLSET : CellsBasis.cellkind + val showGP : CellsBasis.register_id -> string + val showFP : CellsBasis.register_id -> string + val showCR : CellsBasis.register_id -> string + val showCC : CellsBasis.register_id -> string + val showMEM : CellsBasis.register_id -> string + val showCTRL : CellsBasis.register_id -> string + val showCELLSET : CellsBasis.register_id -> string + val showGPWithSize : CellsBasis.register_id * CellsBasis.sz -> string + val showFPWithSize : CellsBasis.register_id * CellsBasis.sz -> string + val showCRWithSize : CellsBasis.register_id * CellsBasis.sz -> string + val showCCWithSize : CellsBasis.register_id * CellsBasis.sz -> string + val showMEMWithSize : CellsBasis.register_id * CellsBasis.sz -> string + val showCTRLWithSize : CellsBasis.register_id * CellsBasis.sz -> string + val showCELLSETWithSize : CellsBasis.register_id * CellsBasis.sz -> string + val returnPtr : CellsBasis.cell + val sar : CellsBasis.cell + val r0 : CellsBasis.cell + val f0 : CellsBasis.cell + val addGP : CellsBasis.cell * cellset -> cellset + val addFP : CellsBasis.cell * cellset -> cellset + val addCR : CellsBasis.cell * cellset -> cellset + val addCC : CellsBasis.cell * cellset -> cellset + val addMEM : CellsBasis.cell * cellset -> cellset + val addCTRL : CellsBasis.cell * cellset -> cellset + val addCELLSET : CellsBasis.cell * cellset -> cellset +end + +structure HppaCells : HPPACELLS = +struct + exception HppaCells + fun error msg = MLRiscErrorMsg.error("HppaCells",msg) + open CellsBasis + fun showGPWithSize (r, ty) = (fn (r, _) => "%r" ^ (Int.toString r) + ) (r, ty) + and showFPWithSize (r, ty) = (fn (f, _) => "%f" ^ (Int.toString f) + ) (r, ty) + and showCRWithSize (r, ty) = (fn (cr, _) => "%cr" ^ (Int.toString cr) + ) (r, ty) + and showCCWithSize (r, ty) = (fn _ => "cc" + ) (r, ty) + and showMEMWithSize (r, ty) = (fn (r, _) => "m" ^ (Int.toString r) + ) (r, ty) + and showCTRLWithSize (r, ty) = (fn (r, _) => "ctrl" ^ (Int.toString r) + ) (r, ty) + and showCELLSETWithSize (r, ty) = (fn _ => "CELLSET" + ) (r, ty) + fun showGP r = showGPWithSize (r, 32) + fun showFP r = showFPWithSize (r, 64) + fun showCR r = showCRWithSize (r, 32) + fun showCC r = showCCWithSize (r, 32) + fun showMEM r = showMEMWithSize (r, 8) + fun showCTRL r = showCTRLWithSize (r, 0) + fun showCELLSET r = showCELLSETWithSize (r, 0) + val CR = CellsBasis.newCellKind {name="CR", nickname="cr"} + and CELLSET = CellsBasis.newCellKind {name="CELLSET", nickname="cellset"} + structure MyCells = Cells + (exception Cells = HppaCells + val firstPseudo = 256 + val desc_GP = CellsBasis.DESC {low=0, high=31, kind=CellsBasis.GP, defaultValues=[(0, + 0)], zeroReg=SOME 0, toString=showGP, toStringWithSize=showGPWithSize, + counter=ref 0, dedicated=ref 0, physicalRegs=ref CellsBasis.array0} + and desc_FP = CellsBasis.DESC {low=32, high=63, kind=CellsBasis.FP, + defaultValues=[(32, 0)], zeroReg=SOME 0, toString=showFP, toStringWithSize=showFPWithSize, + counter=ref 0, dedicated=ref 0, physicalRegs=ref CellsBasis.array0} + and desc_CR = CellsBasis.DESC {low=64, high=95, kind=CR, defaultValues=[], + zeroReg=NONE, toString=showCR, toStringWithSize=showCRWithSize, + counter=ref 0, dedicated=ref 0, physicalRegs=ref CellsBasis.array0} + and desc_MEM = CellsBasis.DESC {low=96, high=95, kind=CellsBasis.MEM, + defaultValues=[], zeroReg=NONE, toString=showMEM, toStringWithSize=showMEMWithSize, + counter=ref 0, dedicated=ref 0, physicalRegs=ref CellsBasis.array0} + and desc_CTRL = CellsBasis.DESC {low=96, high=95, kind=CellsBasis.CTRL, + defaultValues=[], zeroReg=NONE, toString=showCTRL, toStringWithSize=showCTRLWithSize, + counter=ref 0, dedicated=ref 0, physicalRegs=ref CellsBasis.array0} + and desc_CELLSET = CellsBasis.DESC {low=96, high=95, kind=CELLSET, defaultValues=[], + zeroReg=NONE, toString=showCELLSET, toStringWithSize=showCELLSETWithSize, + counter=ref 0, dedicated=ref 0, physicalRegs=ref CellsBasis.array0} + val cellKindDescs = [(CellsBasis.GP, desc_GP), (CellsBasis.FP, desc_FP), + (CR, desc_CR), (CellsBasis.CC, desc_GP), (CellsBasis.MEM, desc_MEM), + (CellsBasis.CTRL, desc_CTRL), (CELLSET, desc_CELLSET)] + val cellSize = 4 + ) + + open MyCells + val addGP = CellSet.add + and addFP = CellSet.add + and addCR = CellSet.add + and addCC = CellSet.add + and addMEM = CellSet.add + and addCTRL = CellSet.add + and addCELLSET = CellSet.add + val RegGP = Reg GP + and RegFP = Reg FP + and RegCR = Reg CR + and RegCC = Reg CC + and RegMEM = Reg MEM + and RegCTRL = Reg CTRL + and RegCELLSET = Reg CELLSET + val returnPtr = RegGP 2 + val stackptrR = RegGP 30 + val asmTmpR = RegGP 29 + val fasmTmp = RegFP 31 + val sar = RegCR 11 + val r0 = RegGP 0 + val f0 = RegFP 0 +end + diff --git a/MLRISC/hppa/instructions/hppaFreqProps.sml b/MLRISC/hppa/instructions/hppaFreqProps.sml new file mode 100644 index 0000000..a1dc88c --- /dev/null +++ b/MLRISC/hppa/instructions/hppaFreqProps.sml @@ -0,0 +1,52 @@ +(* hppaFreqProps.sml + * + * COPYRIGHT (c) 2002 Bell Labs, Lucent Technologies + * + * Extract frequency properties from the HP architecture + * + * -- Allen + *) + +functor HppaFreqProps(HppaInstr : HPPAINSTR): FREQUENCY_PROPERTIES = +struct + + structure I = HppaInstr + + val p10 = Probability.percent 10 + val p50 = Probability.percent 50 + val p90 = Probability.percent 90 + val p100 = Probability.always + + fun hppaBranchProb(I.BCOND{cmp=I.COMBT,bc=I.EQ,...}) = p10 + | hppaBranchProb(I.BCOND{cmp=I.COMBF,bc=I.EQ,...}) = p90 + | hppaBranchProb(I.BCOND{cmp=I.COMBT,bc=I.NE,...}) = p90 + | hppaBranchProb(I.BCOND{cmp=I.COMBF,bc=I.NE,...}) = p10 + | hppaBranchProb(I.BCONDI{cmpi=I.COMIBT,bc=I.EQ,...}) = p10 + | hppaBranchProb(I.BCONDI{cmpi=I.COMIBF,bc=I.EQ,...}) = p90 + | hppaBranchProb(I.BCONDI{cmpi=I.COMIBT,bc=I.NE,...}) = p90 + | hppaBranchProb(I.BCONDI{cmpi=I.COMIBF,bc=I.NE,...}) = p10 + | hppaBranchProb(I.BCOND _) = p50 (* default *) + | hppaBranchProb(I.BCONDI _) = p50 (* default *) + | hppaBranchProb(I.FBRANCH _) = p50 (* default *) + (*| hppaBranchProb(I.BB{bc=I.BCLR, p=31, ...}) = 10 + | hppaBranchProb(I.BB{bc=I.BSET, p=31, ...}) = 90 *) + | hppaBranchProb(I.BB _) = p50 (* branch on bit *) + | hppaBranchProb(I.B _) = p100 (* unconditional *) + | hppaBranchProb(I.BE{labs=[], ...}) = p100 (* escapes *) + | hppaBranchProb(I.BE{labs,...}) = + Probability.prob(1, length labs) (* assume equal prob *) + | hppaBranchProb(I.BV{labs=[],...}) = p100 (* escapes *) + | hppaBranchProb(I.BV{labs,...}) = + Probability.prob(1, length labs) (* assume equal prob *) + | hppaBranchProb(I.BLR{labs,...}) = + Probability.prob(1, length labs) (* assume equal prob *) + | hppaBranchProb _ = Probability.never (* non-branch *) + fun branchProb(I.ANNOTATION{a, i, ...}) = + (case #peek MLRiscAnnotations.BRANCH_PROB a of + SOME b => b + | NONE => branchProb i + ) + | branchProb(I.INSTR(i)) = hppaBranchProb(i) + | branchProb _ = Probability.never + +end diff --git a/MLRISC/hppa/instructions/hppaInstr.sml b/MLRISC/hppa/instructions/hppaInstr.sml new file mode 100644 index 0000000..069c26c --- /dev/null +++ b/MLRISC/hppa/instructions/hppaInstr.sml @@ -0,0 +1,632 @@ +(* + * WARNING: This file was automatically generated by MDLGen (v3.1) + * from the machine description file "hppa/hppa.mdl". + * DO NOT EDIT this file directly + *) + + +signature HPPAINSTR = +sig + structure C : HPPACELLS + structure CB : CELLS_BASIS = CellsBasis + structure T : MLTREE + structure Constant: CONSTANT + structure Region : REGION + sharing Constant = T.Constant + sharing Region = T.Region + datatype fmt = + SGL + | DBL + | QUAD + datatype loadi = + LDW + | LDH + | LDB + datatype store = + STW + | STH + | STB + datatype load = + LDWX + | LDWX_S + | LDWX_M + | LDWX_SM + | LDHX + | LDHX_S + | LDHX_M + | LDHX_SM + | LDBX + | LDBX_M + datatype cmp = + COMBT + | COMBF + datatype cmpi = + COMIBT + | COMIBF + datatype arith = + ADD + | ADDL + | ADDO + | SH1ADD + | SH1ADDL + | SH1ADDO + | SH2ADD + | SH2ADDL + | SH2ADDO + | SH3ADD + | SH3ADDL + | SH3ADDO + | SUB + | SUBO + | OR + | XOR + | AND + | ANDCM + datatype arithi = + ADDI + | ADDIO + | ADDIL + | SUBI + | SUBIO + datatype shiftv = + VEXTRU + | VEXTRS + | ZVDEP + datatype shift = + EXTRU + | EXTRS + | ZDEP + datatype farith = + FADD_S + | FADD_D + | FADD_Q + | FSUB_S + | FSUB_D + | FSUB_Q + | FMPY_S + | FMPY_D + | FMPY_Q + | FDIV_S + | FDIV_D + | FDIV_Q + | XMPYU + datatype funary = + FCPY_S + | FCPY_D + | FCPY_Q + | FABS_S + | FABS_D + | FABS_Q + | FSQRT_S + | FSQRT_D + | FSQRT_Q + | FRND_S + | FRND_D + | FRND_Q + datatype fcnv = + FCNVFF_SD + | FCNVFF_SQ + | FCNVFF_DS + | FCNVFF_DQ + | FCNVFF_QS + | FCNVFF_QD + | FCNVXF_S + | FCNVXF_D + | FCNVXF_Q + | FCNVFX_S + | FCNVFX_D + | FCNVFX_Q + | FCNVFXT_S + | FCNVFXT_D + | FCNVFXT_Q + datatype fstore = + FSTDS + | FSTWS + datatype fstorex = + FSTDX + | FSTDX_S + | FSTDX_M + | FSTDX_SM + | FSTWX + | FSTWX_S + | FSTWX_M + | FSTWX_SM + datatype floadx = + FLDDX + | FLDDX_S + | FLDDX_M + | FLDDX_SM + | FLDWX + | FLDWX_S + | FLDWX_M + | FLDWX_SM + datatype fload = + FLDDS + | FLDWS + datatype bcond = + EQ + | LT + | LE + | LTU + | LEU + | NE + | GE + | GT + | GTU + | GEU + datatype bitcond = + BSET + | BCLR + datatype fcond = + False_ + | False + | ? + | !<=> + | == + | EQT + | ?= + | !<> + | !?>= + | < + | ?< + | !>= + | !?> + | <= + | ?<= + | !> + | !?<= + | > + | ?> + | !<= + | !?< + | >= + | ?>= + | !< + | !?= + | <> + | != + | NET + | !? + | <=> + | True_ + | True + datatype scond = + ALL_ZERO + | LEFTMOST_ONE + | LEFTMOST_ZERO + | RIGHTMOST_ONE + | RIGHTMOST_ZERO + datatype field_selector = + F + | S + | D + | R + | T + | P + datatype ea = + Direct of CellsBasis.cell + | FDirect of CellsBasis.cell + | Displace of {base:CellsBasis.cell, disp:T.labexp, mem:Region.region} + datatype operand = + REG of CellsBasis.cell + | IMMED of int + | LabExp of T.labexp * field_selector + | HILabExp of T.labexp * field_selector + | LOLabExp of T.labexp * field_selector + datatype addressing_mode = + DISPea of CellsBasis.cell * operand + | INDXea of CellsBasis.cell * CellsBasis.cell + | INDXSCALEDea of CellsBasis.cell * CellsBasis.cell + datatype instr = + LOADI of {li:loadi, r:CellsBasis.cell, i:operand, t:CellsBasis.cell, mem:Region.region} + | LOAD of {l:load, r1:CellsBasis.cell, r2:CellsBasis.cell, t:CellsBasis.cell, + mem:Region.region} + | STORE of {st:store, b:CellsBasis.cell, d:operand, r:CellsBasis.cell, mem:Region.region} + | ARITH of {a:arith, r1:CellsBasis.cell, r2:CellsBasis.cell, t:CellsBasis.cell} + | ARITHI of {ai:arithi, i:operand, r:CellsBasis.cell, t:CellsBasis.cell} + | COMCLR_LDO of {cc:bcond, r1:CellsBasis.cell, r2:CellsBasis.cell, t1:CellsBasis.cell, + i:int, b:CellsBasis.cell, t2:CellsBasis.cell} + | COMICLR_LDO of {cc:bcond, i1:operand, r2:CellsBasis.cell, t1:CellsBasis.cell, + i2:int, b:CellsBasis.cell, t2:CellsBasis.cell} + | SHIFTV of {sv:shiftv, r:CellsBasis.cell, len:int, t:CellsBasis.cell} + | SHIFT of {s:shift, r:CellsBasis.cell, p:int, len:int, t:CellsBasis.cell} + | BCOND of {cmp:cmp, bc:bcond, r1:CellsBasis.cell, r2:CellsBasis.cell, n:bool, + nop:bool, t:Label.label, f:Label.label} + | BCONDI of {cmpi:cmpi, bc:bcond, i:int, r2:CellsBasis.cell, n:bool, nop:bool, + t:Label.label, f:Label.label} + | BB of {bc:bitcond, r:CellsBasis.cell, p:int, n:bool, nop:bool, t:Label.label, + f:Label.label} + | B of {lab:Label.label, n:bool} + | LONGJUMP of {lab:Label.label, n:bool, tmp:CellsBasis.cell, tmpLab:Label.label} + | BE of {b:CellsBasis.cell, d:operand, sr:int, n:bool, labs:Label.label list} + | BV of {x:CellsBasis.cell, b:CellsBasis.cell, labs:Label.label list, n:bool} + | BLR of {x:CellsBasis.cell, t:CellsBasis.cell, labs:Label.label list, n:bool} + | BL of {lab:Label.label, t:CellsBasis.cell, defs:C.cellset, uses:C.cellset, + cutsTo:Label.label list, mem:Region.region, n:bool} + | BLE of {d:operand, b:CellsBasis.cell, sr:int, t:CellsBasis.cell, defs:C.cellset, + uses:C.cellset, cutsTo:Label.label list, mem:Region.region} + | LDIL of {i:operand, t:CellsBasis.cell} + | LDO of {i:operand, b:CellsBasis.cell, t:CellsBasis.cell} + | MTCTL of {r:CellsBasis.cell, t:CellsBasis.cell} + | FSTORE of {fst:fstore, b:CellsBasis.cell, d:int, r:CellsBasis.cell, mem:Region.region} + | FSTOREX of {fstx:fstorex, b:CellsBasis.cell, x:CellsBasis.cell, r:CellsBasis.cell, + mem:Region.region} + | FLOAD of {fl:fload, b:CellsBasis.cell, d:int, t:CellsBasis.cell, mem:Region.region} + | FLOADX of {flx:floadx, b:CellsBasis.cell, x:CellsBasis.cell, t:CellsBasis.cell, + mem:Region.region} + | FARITH of {fa:farith, r1:CellsBasis.cell, r2:CellsBasis.cell, t:CellsBasis.cell} + | FUNARY of {fu:funary, f:CellsBasis.cell, t:CellsBasis.cell} + | FCNV of {fcnv:fcnv, f:CellsBasis.cell, t:CellsBasis.cell} + | FBRANCH of {cc:fcond, fmt:fmt, f1:CellsBasis.cell, f2:CellsBasis.cell, + t:Label.label, f:Label.label, n:bool, long:bool} + | BREAK of {code1:int, code2:int} + | NOP + | SOURCE of {} + | SINK of {} + | PHI of {} + and instruction = + LIVE of {regs: C.cellset, spilled: C.cellset} + | KILL of {regs: C.cellset, spilled: C.cellset} + | COPY of {k: CellsBasis.cellkind, + sz: int, (* in bits *) + dst: CellsBasis.cell list, + src: CellsBasis.cell list, + tmp: ea option (* NONE if |dst| = {src| = 1 *)} + | ANNOTATION of {i:instruction, a:Annotations.annotation} + | INSTR of instr + val loadi : {li:loadi, r:CellsBasis.cell, i:operand, t:CellsBasis.cell, + mem:Region.region} -> instruction + val load : {l:load, r1:CellsBasis.cell, r2:CellsBasis.cell, t:CellsBasis.cell, + mem:Region.region} -> instruction + val store : {st:store, b:CellsBasis.cell, d:operand, r:CellsBasis.cell, + mem:Region.region} -> instruction + val arith : {a:arith, r1:CellsBasis.cell, r2:CellsBasis.cell, t:CellsBasis.cell} -> instruction + val arithi : {ai:arithi, i:operand, r:CellsBasis.cell, t:CellsBasis.cell} -> instruction + val comclr_ldo : {cc:bcond, r1:CellsBasis.cell, r2:CellsBasis.cell, t1:CellsBasis.cell, + i:int, b:CellsBasis.cell, t2:CellsBasis.cell} -> instruction + val comiclr_ldo : {cc:bcond, i1:operand, r2:CellsBasis.cell, t1:CellsBasis.cell, + i2:int, b:CellsBasis.cell, t2:CellsBasis.cell} -> instruction + val shiftv : {sv:shiftv, r:CellsBasis.cell, len:int, t:CellsBasis.cell} -> instruction + val shift : {s:shift, r:CellsBasis.cell, p:int, len:int, t:CellsBasis.cell} -> instruction + val bcond : {cmp:cmp, bc:bcond, r1:CellsBasis.cell, r2:CellsBasis.cell, + n:bool, nop:bool, t:Label.label, f:Label.label} -> instruction + val bcondi : {cmpi:cmpi, bc:bcond, i:int, r2:CellsBasis.cell, n:bool, nop:bool, + t:Label.label, f:Label.label} -> instruction + val bb : {bc:bitcond, r:CellsBasis.cell, p:int, n:bool, nop:bool, t:Label.label, + f:Label.label} -> instruction + val b : {lab:Label.label, n:bool} -> instruction + val longjump : {lab:Label.label, n:bool, tmp:CellsBasis.cell, tmpLab:Label.label} -> instruction + val be : {b:CellsBasis.cell, d:operand, sr:int, n:bool, labs:Label.label list} -> instruction + val bv : {x:CellsBasis.cell, b:CellsBasis.cell, labs:Label.label list, n:bool} -> instruction + val blr : {x:CellsBasis.cell, t:CellsBasis.cell, labs:Label.label list, + n:bool} -> instruction + val bl : {lab:Label.label, t:CellsBasis.cell, defs:C.cellset, uses:C.cellset, + cutsTo:Label.label list, mem:Region.region, n:bool} -> instruction + val ble : {d:operand, b:CellsBasis.cell, sr:int, t:CellsBasis.cell, defs:C.cellset, + uses:C.cellset, cutsTo:Label.label list, mem:Region.region} -> instruction + val ldil : {i:operand, t:CellsBasis.cell} -> instruction + val ldo : {i:operand, b:CellsBasis.cell, t:CellsBasis.cell} -> instruction + val mtctl : {r:CellsBasis.cell, t:CellsBasis.cell} -> instruction + val fstore : {fst:fstore, b:CellsBasis.cell, d:int, r:CellsBasis.cell, mem:Region.region} -> instruction + val fstorex : {fstx:fstorex, b:CellsBasis.cell, x:CellsBasis.cell, r:CellsBasis.cell, + mem:Region.region} -> instruction + val fload : {fl:fload, b:CellsBasis.cell, d:int, t:CellsBasis.cell, mem:Region.region} -> instruction + val floadx : {flx:floadx, b:CellsBasis.cell, x:CellsBasis.cell, t:CellsBasis.cell, + mem:Region.region} -> instruction + val farith : {fa:farith, r1:CellsBasis.cell, r2:CellsBasis.cell, t:CellsBasis.cell} -> instruction + val funary : {fu:funary, f:CellsBasis.cell, t:CellsBasis.cell} -> instruction + val fcnv : {fcnv:fcnv, f:CellsBasis.cell, t:CellsBasis.cell} -> instruction + val fbranch : {cc:fcond, fmt:fmt, f1:CellsBasis.cell, f2:CellsBasis.cell, + t:Label.label, f:Label.label, n:bool, long:bool} -> instruction + val break : {code1:int, code2:int} -> instruction + val nop : instruction + val source : {} -> instruction + val sink : {} -> instruction + val phi : {} -> instruction +end + +functor HppaInstr(T: MLTREE + ) : HPPAINSTR = +struct + structure C = HppaCells + structure CB = CellsBasis + structure T = T + structure Region = T.Region + structure Constant = T.Constant + datatype fmt = + SGL + | DBL + | QUAD + datatype loadi = + LDW + | LDH + | LDB + datatype store = + STW + | STH + | STB + datatype load = + LDWX + | LDWX_S + | LDWX_M + | LDWX_SM + | LDHX + | LDHX_S + | LDHX_M + | LDHX_SM + | LDBX + | LDBX_M + datatype cmp = + COMBT + | COMBF + datatype cmpi = + COMIBT + | COMIBF + datatype arith = + ADD + | ADDL + | ADDO + | SH1ADD + | SH1ADDL + | SH1ADDO + | SH2ADD + | SH2ADDL + | SH2ADDO + | SH3ADD + | SH3ADDL + | SH3ADDO + | SUB + | SUBO + | OR + | XOR + | AND + | ANDCM + datatype arithi = + ADDI + | ADDIO + | ADDIL + | SUBI + | SUBIO + datatype shiftv = + VEXTRU + | VEXTRS + | ZVDEP + datatype shift = + EXTRU + | EXTRS + | ZDEP + datatype farith = + FADD_S + | FADD_D + | FADD_Q + | FSUB_S + | FSUB_D + | FSUB_Q + | FMPY_S + | FMPY_D + | FMPY_Q + | FDIV_S + | FDIV_D + | FDIV_Q + | XMPYU + datatype funary = + FCPY_S + | FCPY_D + | FCPY_Q + | FABS_S + | FABS_D + | FABS_Q + | FSQRT_S + | FSQRT_D + | FSQRT_Q + | FRND_S + | FRND_D + | FRND_Q + datatype fcnv = + FCNVFF_SD + | FCNVFF_SQ + | FCNVFF_DS + | FCNVFF_DQ + | FCNVFF_QS + | FCNVFF_QD + | FCNVXF_S + | FCNVXF_D + | FCNVXF_Q + | FCNVFX_S + | FCNVFX_D + | FCNVFX_Q + | FCNVFXT_S + | FCNVFXT_D + | FCNVFXT_Q + datatype fstore = + FSTDS + | FSTWS + datatype fstorex = + FSTDX + | FSTDX_S + | FSTDX_M + | FSTDX_SM + | FSTWX + | FSTWX_S + | FSTWX_M + | FSTWX_SM + datatype floadx = + FLDDX + | FLDDX_S + | FLDDX_M + | FLDDX_SM + | FLDWX + | FLDWX_S + | FLDWX_M + | FLDWX_SM + datatype fload = + FLDDS + | FLDWS + datatype bcond = + EQ + | LT + | LE + | LTU + | LEU + | NE + | GE + | GT + | GTU + | GEU + datatype bitcond = + BSET + | BCLR + datatype fcond = + False_ + | False + | ? + | !<=> + | == + | EQT + | ?= + | !<> + | !?>= + | < + | ?< + | !>= + | !?> + | <= + | ?<= + | !> + | !?<= + | > + | ?> + | !<= + | !?< + | >= + | ?>= + | !< + | !?= + | <> + | != + | NET + | !? + | <=> + | True_ + | True + datatype scond = + ALL_ZERO + | LEFTMOST_ONE + | LEFTMOST_ZERO + | RIGHTMOST_ONE + | RIGHTMOST_ZERO + datatype field_selector = + F + | S + | D + | R + | T + | P + datatype ea = + Direct of CellsBasis.cell + | FDirect of CellsBasis.cell + | Displace of {base:CellsBasis.cell, disp:T.labexp, mem:Region.region} + datatype operand = + REG of CellsBasis.cell + | IMMED of int + | LabExp of T.labexp * field_selector + | HILabExp of T.labexp * field_selector + | LOLabExp of T.labexp * field_selector + datatype addressing_mode = + DISPea of CellsBasis.cell * operand + | INDXea of CellsBasis.cell * CellsBasis.cell + | INDXSCALEDea of CellsBasis.cell * CellsBasis.cell + datatype instr = + LOADI of {li:loadi, r:CellsBasis.cell, i:operand, t:CellsBasis.cell, mem:Region.region} + | LOAD of {l:load, r1:CellsBasis.cell, r2:CellsBasis.cell, t:CellsBasis.cell, + mem:Region.region} + | STORE of {st:store, b:CellsBasis.cell, d:operand, r:CellsBasis.cell, mem:Region.region} + | ARITH of {a:arith, r1:CellsBasis.cell, r2:CellsBasis.cell, t:CellsBasis.cell} + | ARITHI of {ai:arithi, i:operand, r:CellsBasis.cell, t:CellsBasis.cell} + | COMCLR_LDO of {cc:bcond, r1:CellsBasis.cell, r2:CellsBasis.cell, t1:CellsBasis.cell, + i:int, b:CellsBasis.cell, t2:CellsBasis.cell} + | COMICLR_LDO of {cc:bcond, i1:operand, r2:CellsBasis.cell, t1:CellsBasis.cell, + i2:int, b:CellsBasis.cell, t2:CellsBasis.cell} + | SHIFTV of {sv:shiftv, r:CellsBasis.cell, len:int, t:CellsBasis.cell} + | SHIFT of {s:shift, r:CellsBasis.cell, p:int, len:int, t:CellsBasis.cell} + | BCOND of {cmp:cmp, bc:bcond, r1:CellsBasis.cell, r2:CellsBasis.cell, n:bool, + nop:bool, t:Label.label, f:Label.label} + | BCONDI of {cmpi:cmpi, bc:bcond, i:int, r2:CellsBasis.cell, n:bool, nop:bool, + t:Label.label, f:Label.label} + | BB of {bc:bitcond, r:CellsBasis.cell, p:int, n:bool, nop:bool, t:Label.label, + f:Label.label} + | B of {lab:Label.label, n:bool} + | LONGJUMP of {lab:Label.label, n:bool, tmp:CellsBasis.cell, tmpLab:Label.label} + | BE of {b:CellsBasis.cell, d:operand, sr:int, n:bool, labs:Label.label list} + | BV of {x:CellsBasis.cell, b:CellsBasis.cell, labs:Label.label list, n:bool} + | BLR of {x:CellsBasis.cell, t:CellsBasis.cell, labs:Label.label list, n:bool} + | BL of {lab:Label.label, t:CellsBasis.cell, defs:C.cellset, uses:C.cellset, + cutsTo:Label.label list, mem:Region.region, n:bool} + | BLE of {d:operand, b:CellsBasis.cell, sr:int, t:CellsBasis.cell, defs:C.cellset, + uses:C.cellset, cutsTo:Label.label list, mem:Region.region} + | LDIL of {i:operand, t:CellsBasis.cell} + | LDO of {i:operand, b:CellsBasis.cell, t:CellsBasis.cell} + | MTCTL of {r:CellsBasis.cell, t:CellsBasis.cell} + | FSTORE of {fst:fstore, b:CellsBasis.cell, d:int, r:CellsBasis.cell, mem:Region.region} + | FSTOREX of {fstx:fstorex, b:CellsBasis.cell, x:CellsBasis.cell, r:CellsBasis.cell, + mem:Region.region} + | FLOAD of {fl:fload, b:CellsBasis.cell, d:int, t:CellsBasis.cell, mem:Region.region} + | FLOADX of {flx:floadx, b:CellsBasis.cell, x:CellsBasis.cell, t:CellsBasis.cell, + mem:Region.region} + | FARITH of {fa:farith, r1:CellsBasis.cell, r2:CellsBasis.cell, t:CellsBasis.cell} + | FUNARY of {fu:funary, f:CellsBasis.cell, t:CellsBasis.cell} + | FCNV of {fcnv:fcnv, f:CellsBasis.cell, t:CellsBasis.cell} + | FBRANCH of {cc:fcond, fmt:fmt, f1:CellsBasis.cell, f2:CellsBasis.cell, + t:Label.label, f:Label.label, n:bool, long:bool} + | BREAK of {code1:int, code2:int} + | NOP + | SOURCE of {} + | SINK of {} + | PHI of {} + and instruction = + LIVE of {regs: C.cellset, spilled: C.cellset} + | KILL of {regs: C.cellset, spilled: C.cellset} + | COPY of {k: CellsBasis.cellkind, + sz: int, (* in bits *) + dst: CellsBasis.cell list, + src: CellsBasis.cell list, + tmp: ea option (* NONE if |dst| = {src| = 1 *)} + | ANNOTATION of {i:instruction, a:Annotations.annotation} + | INSTR of instr + val loadi = INSTR o LOADI + and load = INSTR o LOAD + and store = INSTR o STORE + and arith = INSTR o ARITH + and arithi = INSTR o ARITHI + and comclr_ldo = INSTR o COMCLR_LDO + and comiclr_ldo = INSTR o COMICLR_LDO + and shiftv = INSTR o SHIFTV + and shift = INSTR o SHIFT + and bcond = INSTR o BCOND + and bcondi = INSTR o BCONDI + and bb = INSTR o BB + and b = INSTR o B + and longjump = INSTR o LONGJUMP + and be = INSTR o BE + and bv = INSTR o BV + and blr = INSTR o BLR + and bl = INSTR o BL + and ble = INSTR o BLE + and ldil = INSTR o LDIL + and ldo = INSTR o LDO + and mtctl = INSTR o MTCTL + and fstore = INSTR o FSTORE + and fstorex = INSTR o FSTOREX + and fload = INSTR o FLOAD + and floadx = INSTR o FLOADX + and farith = INSTR o FARITH + and funary = INSTR o FUNARY + and fcnv = INSTR o FCNV + and fbranch = INSTR o FBRANCH + and break = INSTR o BREAK + and nop = INSTR NOP + and source = INSTR o SOURCE + and sink = INSTR o SINK + and phi = INSTR o PHI +end + diff --git a/MLRISC/hppa/instructions/hppaProps.sml b/MLRISC/hppa/instructions/hppaProps.sml new file mode 100644 index 0000000..df9b170 --- /dev/null +++ b/MLRISC/hppa/instructions/hppaProps.sml @@ -0,0 +1,318 @@ +(* hppaProps.sml + * + * COPYRIGHT (c) 1996 Bell Laboratories. + * + *) + +functor HppaProps + ( structure HppaInstr : HPPAINSTR + structure MLTreeEval : MLTREE_EVAL where T = HppaInstr.T + structure MLTreeHash : MLTREE_HASH where T = HppaInstr.T + ) : INSN_PROPERTIES = +struct + structure I = HppaInstr + structure C = HppaInstr.C + structure CB = CellsBasis + + exception NegateConditional + + fun error msg = MLRiscErrorMsg.error("HppaProps",msg) + + datatype kind = IK_JUMP | IK_NOP | IK_INSTR | IK_COPY | IK_CALL + | IK_CALL_WITH_CUTS | IK_PHI | IK_SOURCE | IK_SINK + datatype target = LABELLED of Label.label | FALLTHROUGH | ESCAPES + + val zeroR = Option.valOf(C.zeroReg CB.GP) + val r31 = C.Reg CB.GP 31 + + (*======================================================================== + * Instruction Kinds + *========================================================================*) + (* Note: BLE and BL used to implement calls are not view as branches *) + fun instrKind(I.ANNOTATION{i, ...}) = instrKind i + | instrKind(I.COPY _) = IK_COPY + | instrKind(I.INSTR instr) = + (case instr + of (I.BCOND _) => IK_JUMP + | (I.BCONDI _) => IK_JUMP + | (I.BB _) => IK_JUMP + | (I.B _) => IK_JUMP + | (I.BE _) => IK_JUMP + | (I.FBRANCH _)=> IK_JUMP + | (I.BV _) => IK_JUMP + | (I.BLR _) => IK_JUMP + | (I.BREAK _) => IK_JUMP + | (I.NOP) => IK_NOP + | (I.BL{cutsTo=_::_,...}) => IK_CALL_WITH_CUTS + | (I.BL _) => IK_CALL + | (I.BLE{cutsTo=_::_,...}) => IK_CALL_WITH_CUTS + | (I.BLE _) => IK_CALL + | (I.PHI _) => IK_PHI + | (I.SOURCE _) => IK_SOURCE + | (I.SINK _) => IK_SINK + | _ => IK_INSTR) + | instrKind _ = error "instrKind" + + fun moveInstr(I.COPY _) = true + | moveInstr(I.ANNOTATION{i,...}) = moveInstr i + | moveInstr _ = false + + fun nop() = I.nop + + (*======================================================================== + * Parallel Move + *========================================================================*) + fun moveTmpR(I.COPY{tmp, ...}) = + (case tmp + of SOME(I.Direct r) => SOME r + | SOME(I.FDirect f) => SOME f + | _ => NONE + (*esac*)) + | moveTmpR(I.ANNOTATION{i,...}) = moveTmpR i + | moveTmpR _ = NONE + + fun moveDstSrc(I.COPY{dst, src, ...}) = (dst, src) + | moveDstSrc(I.ANNOTATION{i,...}) = moveDstSrc i + | moveDstSrc _ = error "moveDstSrc" + + (*======================================================================== + * Branches and Calls/Returns + *========================================================================*) + fun branchTargets(I.ANNOTATION{i,...}) = branchTargets i + | branchTargets(I.INSTR instr) = + (case instr + of (I.BCOND{t, ...}) => [LABELLED t, FALLTHROUGH] + | (I.BCONDI{t, ...}) => [LABELLED t, FALLTHROUGH] + | (I.BB{t, ...}) => [LABELLED t, FALLTHROUGH] + | (I.B{lab, ...}) => [LABELLED lab] + | (I.FBRANCH{t,...}) => [LABELLED t, FALLTHROUGH] + | (I.BE{labs=[],...}) => [ESCAPES] + | (I.BE{labs,...}) => map LABELLED labs + | (I.BV{labs=[],...}) => [ESCAPES] + | (I.BV{labs,...}) => map LABELLED labs + | (I.BLR{labs,...}) => map LABELLED labs + | (I.BL{cutsTo,...}) => FALLTHROUGH::map LABELLED cutsTo + | (I.BLE{cutsTo,...}) => FALLTHROUGH::map LABELLED cutsTo + | (I.BREAK _) => [ESCAPES] + | _ => error "branchTargets" + (*easc*)) + | branchTargets _ = error "branchTargets" + + fun jump label = I.b{lab=label,n=true} + + val immedRange = {lo= ~8192, hi=8191} + fun loadImmed{immed,t} = + I.ldo{i=if #lo immedRange <= immed andalso immed <= #hi immedRange + then I.IMMED immed + else I.LabExp(I.T.LI(I.T.I.fromInt(32,immed)),I.F),b=zeroR,t=t} + fun loadOperand{opn,t} = I.ldo{i=opn,b=zeroR,t=t} + + fun setJumpTarget(I.ANNOTATION{a,i}, l) = I.ANNOTATION{a=a, i=setJumpTarget(i,l)} + | setJumpTarget(I.INSTR(I.B{n,...}), L) = I.b{lab=L,n=n} + | setJumpTarget _ = error "setJumpTarget" + + fun setBranchTargets{i=I.ANNOTATION{a,i}, t, f} = + I.ANNOTATION{a=a, i=setBranchTargets{i=i, t=t, f=f}} + | setBranchTargets{i=I.INSTR(I.BCOND{cmp,bc,r1,r2,t,f,n,nop}), t=T, f=F} = + I.bcond{cmp=cmp,bc=bc,r1=r1,r2=r2,t=T,f=F,n=n,nop=nop} + | setBranchTargets{i=I.INSTR(I.BCONDI{cmpi,bc,i,r2,t,f,n,nop=nop}),t=T, f=F} = + I.bcondi{cmpi=cmpi,bc=bc,i=i,r2=r2,t=T,f=F,n=n,nop=nop} + | setBranchTargets{i=I.INSTR(I.BB{bc,r,p,t,f,n,nop}), t=T, f=F} = + I.bb{bc=bc,r=r,p=p,t=T,f=F,n=n,nop=nop} + | setBranchTargets{i=I.INSTR(I.FBRANCH{cc,fmt,n,long,f1,f2,...}), t=T, f=F} = + I.fbranch{cc=cc,fmt=fmt,t=T,f=F,n=n,long=long,f1=f1,f2=f2} + | setBranchTargets _ = error "setBranchTargets" + + + (* negate the branch. Since the HPPA instruction representation tracks both + * the true and false target labels, we set the false label to be the + * old true label and set the true label to be the argument label. + *) + fun negateConditional (br, lab) = let + fun revFcond I.? = I.!? + | revFcond I.!<=> = I.<=> + | revFcond I.== = I.!= + | revFcond I.?= = I.!?= + | revFcond I.!<> = I.<> + | revFcond I.!?>= = I.?>= + | revFcond I.< = I.!< + | revFcond I.?< = I.!?< + | revFcond I.!>= = I.>= + | revFcond I.!?> = I.?> + | revFcond I.<= = I.!<= + | revFcond I.?<= = I.!?<= + | revFcond I.!> = I.> + | revFcond I.!?<= = I.?<= + | revFcond I.> = I.!> + | revFcond I.?> = I.!?> + | revFcond I.!<= = I.<= + | revFcond I.!?< = I.?< + | revFcond I.>= = I.!>= + | revFcond I.?>= = I.!?>= + | revFcond I.!< = I.< + | revFcond I.!?= = I.?= + | revFcond I.<> = I.!<> + | revFcond I.!= = I.== + | revFcond I.!? = I.? + | revFcond I.<=> = I.!<=> + | revFcond _ = error "revFcond" + fun negate (I.INSTR(I.BCOND{cmp,bc,r1,r2,t,f,n,nop})) = I.bcond{ + bc=bc, r1=r1, r2=r2, t=lab, f=t, n=n, nop=nop, + cmp=case cmp of I.COMBT => I.COMBF | I.COMBF => I.COMBT + } + | negate (I.INSTR(I.BCONDI{cmpi,bc,i,r2,t,f,n,nop})) = I.bcondi{ + bc=bc, i=i, r2=r2, t=lab, f=t, n=n, nop=nop, + cmpi=case cmpi of I.COMIBT => I.COMIBF | I.COMIBF => I.COMIBT + } + | negate (I.INSTR(I.BB{bc,r,p,t,f,n,nop})) = I.bb{ + bc=case bc of I.BSET => I.BCLR | I.BCLR => I.BSET, + r=r,p=p,t=lab,f=t,n=n,nop=nop + } + | negate (I.INSTR(I.FBRANCH{cc,fmt,f1,f2,t,f,n,long})) = + I.fbranch{cc=revFcond cc,fmt=fmt,f1=f1,f2=f2,t=lab,f=t,n=n,long=long} + | negate (I.ANNOTATION{i,a}) = I.ANNOTATION{i=negate i,a=a} + | negate _ = raise NegateConditional + in + negate br + end + + (*======================================================================== + * Equality and hashing for operands + *========================================================================*) + fun hashFieldSel I.F = 0w0 + | hashFieldSel I.S = 0w1 + | hashFieldSel I.D = 0w2 + | hashFieldSel I.R = 0w3 + | hashFieldSel I.T = 0w4 + | hashFieldSel I.P = 0w5 + fun hashOpn(I.IMMED i) = Word.fromInt i + | hashOpn(I.LabExp(l,f)) = MLTreeHash.hash l + hashFieldSel f + | hashOpn(I.HILabExp(l,f)) = MLTreeHash.hash l + hashFieldSel f + 0w10000 + | hashOpn(I.LOLabExp(l,f)) = MLTreeHash.hash l + hashFieldSel f + 0w20000 + | hashOpn(I.REG r) = CB.hashCell r + fun eqOpn(I.IMMED i,I.IMMED j) = i = j + | eqOpn(I.REG x,I.REG y) = CB.sameColor(x,y) + | eqOpn(I.LabExp(a,b),I.LabExp(c,d)) = + b = d andalso MLTreeEval.==(a,c) + | eqOpn(I.HILabExp(a,b),I.HILabExp(c,d)) = + b = d andalso MLTreeEval.==(a,c) + | eqOpn(I.LOLabExp(a,b),I.LOLabExp(c,d)) = + b = d andalso MLTreeEval.==(a,c) + | eqOpn _ = false + + + (*======================================================================== + * Definition and use (for register allocation mainly) + *========================================================================*) + fun defUseR instr = let + fun hppaDU instr = let + fun trap((I.ADDO | I.SUBO | I.SH1ADDO), d, u) = (d, u) + | trap(_, d, u) = (d, u) + fun trapi((I.ADDIO | I.SUBIO), d, u) = (d, u) + | trapi(_, d, u) = (d, u) + in + case instr + of I.STORE {b, r,...} => ([], [b,r]) + | I.LOAD {l, r1, r2, t, ...} => ([t], [r1,r2]) + | I.LOADI {li, r, t, ...} => ([t], [r]) + | I.ARITH {a, r1, r2, t, ...} => trap(a, [t], [r1,r2]) + | I.ARITHI {ai, r, t, ...} => trapi(ai, [t], [r]) + | I.COMCLR_LDO{r1, r2, b, t1, t2, ...}=> + if CB.sameColor(t1,t2) then ([t1], [b, r1, r2]) + else ([t1, t2], [b, r1, r2, t2]) + | I.COMICLR_LDO{i1, r2, b, t1, t2, ...}=> + if CB.sameColor(t1,t2) then ([t1], [b, r2]) + else ([t1, t2], [b, r2, t2]) + | I.SHIFTV {r, t, ...} => ([t], [r]) + | I.SHIFT {r, t, ...} => ([t], [r]) + | I.BCOND {r1, r2, ...} => ([], [r1,r2]) + | I.BCONDI {r2, ...} => ([], [r2]) + | I.BB {r, ...} => ([], [r]) + | I.BV {x, b, ...} => ([], [x,b]) + | I.BE {b, ...} => ([], [b]) + | I.BLR{x, t, ...} => ([t], [x]) + | I.BL{defs, uses, ...} => (C.getReg defs, C.getReg uses) + | I.BLE{t, b, defs, uses, ...}=> + (r31 :: t :: C.getReg defs, b :: C.getReg uses) + | I.LDIL{i, t} => ([t], []) + | I.LDO{b, t, ...} => ([t], [b]) + | I.MTCTL{r, t} => ([], [r]) + | I.FSTORE {b, ...} => ([], [b]) + | I.FSTOREX {b, x, ...} => ([], [b,x]) + | I.FLOAD {b, ...} => ([], [b]) + | I.FLOADX{b, x, ...} => ([], [b,x]) + | _ => ([],[]) + end + in + case instr + of I.ANNOTATION{i, ...} => defUseR i + | I.LIVE{regs, ...} => ([], C.getReg regs) + | I.KILL{regs, ...} => (C.getReg regs, []) + | I.INSTR(i) => hppaDU(i) + | I.COPY{k, dst, src, tmp, ...} => let + val (d,u) = case k of CB.GP => (dst, src) | _ => ([], []) + in + case tmp + of SOME(I.Direct r) => (r::d, u) + | SOME(I.Displace{base, ...}) => (* (d, base::u) *) (d, u) + | _ => (d,u) + end + end + + fun defUseF instr = let + fun hppaDU instr = + case instr + of I.FSTORE {r, ...} => ([], [r]) + | I.FSTOREX{r, ...} => ([], [r]) + | I.FLOAD{t, ...} => ([t], []) + | I.FLOADX{t, ...} => ([t], []) + | I.FARITH {r1, r2, t, ...} => ([t], [r1,r2]) + | I.FUNARY {f, t, ...} => ([t], [f]) + | I.FCNV {f, t, ...} => ([t], [f]) + | I.FBRANCH{f1, f2,...} => ([], [f1, f2]) + | I.BL{defs, uses, ...} => (C.getFreg defs, C.getFreg uses) + | I.BLE{defs, uses, ...} => (C.getFreg defs, C.getFreg uses) + | _ => ([],[]) + in + case instr + of I.ANNOTATION{i, ...} => defUseF i + | I.INSTR(i) => hppaDU(i) + | I.LIVE{regs, ...} => ([], C.getFreg regs) + | I.KILL{regs, ...} => (C.getFreg regs, []) + | I.COPY{k, dst, src, tmp, ...} => let + val (d, u) = case k of CB.FP => (dst, src) | _ => ([],[]) + in + case tmp + of SOME(I.FDirect f) => (f::d, u) + | _ => (d, u) + end + end + + fun defUse CB.GP = defUseR + | defUse CB.FP = defUseF + | defUse _ = error "defUse" + + (*======================================================================== + * Annotations + *========================================================================*) + fun getAnnotations(I.ANNOTATION{i,a}) = + let val (i,an) = getAnnotations i in (i,a::an) end + | getAnnotations i = (i,[]) + fun annotate(i,a) = I.ANNOTATION{i=i,a=a} + + (*======================================================================== + * Replicate an instruction + *========================================================================*) + fun replicate(I.ANNOTATION{i,a}) = I.ANNOTATION{i=replicate i,a=a} + | replicate(I.COPY{k, sz, tmp=SOME _, dst, src}) = let + val tmp = case k of CB.GP => C.newReg() + | CB.FP => C.newFreg() + | _ => error "replicate: neither GP nor FP" + in + I.COPY{k=k, sz=sz, tmp=SOME(I.Direct(tmp)), dst=dst, src=src} + end + | replicate i = i +end + + + diff --git a/MLRISC/hppa/instructions/hppaShuffle.sig b/MLRISC/hppa/instructions/hppaShuffle.sig new file mode 100644 index 0000000..d8899f4 --- /dev/null +++ b/MLRISC/hppa/instructions/hppaShuffle.sig @@ -0,0 +1,11 @@ +(* hppaShuffle.sig -- shuffle src registers into destination registers *) + +signature HPPASHUFFLE = sig + structure I : HPPAINSTR + + type t = {tmp:I.ea option, dst:CellsBasis.cell list, src:CellsBasis.cell list} + + val shuffle : t -> I.instruction list + val shufflefp : t -> I.instruction list +end + diff --git a/MLRISC/hppa/instructions/hppaShuffle.sml b/MLRISC/hppa/instructions/hppaShuffle.sml new file mode 100644 index 0000000..d438f37 --- /dev/null +++ b/MLRISC/hppa/instructions/hppaShuffle.sml @@ -0,0 +1,41 @@ +functor HppaShuffle(I:HPPAINSTR) : HPPASHUFFLE = struct + structure I = I + structure C = I.C + structure Shuffle = Shuffle(I) + structure CB = CellsBasis + type t = {tmp:I.ea option, dst:CB.cell list, src:CB.cell list} + + fun error msg = MLRiscErrorMsg.error("HppaShuffle",msg) + + val zeroR = Option.valOf(C.zeroReg CB.GP) + + fun move{src=I.Direct rs, dst=I.Direct rt} = + [I.arith{a=I.OR, r1=rs, r2=zeroR, t=rt}] + | move{src=I.Displace{base, disp, mem}, dst=I.Direct rt} = + [I.loadi{li=I.LDW, r=base, i=I.LabExp(disp,I.F), t=rt, mem=mem}] + | move{src=I.Direct rs, dst=I.Displace{base, disp, mem}} = + [I.store{st=I.STW, b=base, d=I.LabExp(disp,I.F), r=rs, mem=mem}] + | move _ = error "move" + + fun fmove{src=I.FDirect fs, dst=I.FDirect fd} = + [I.funary{fu=I.FCPY_D, f=fs, t=fd}] + | fmove{src=I.Displace{base, disp, mem}, dst=I.FDirect ft} = let + val tmp = I.C.newCell CB.GP () + in + [I.ldo{i=I.LabExp(disp,I.F), b=base, t=tmp}, + I.floadx{flx=I.FLDDX, b=tmp, x=zeroR, t=ft, mem=mem}] + end + | fmove{src=I.FDirect fs, dst=I.Displace{base, disp, mem}} = let + val tmp = I.C.newCell CB.GP () + in + [I.ldo{i=I.LabExp(disp,I.F), b=base, t=tmp}, + I.fstorex{fstx=I.FSTDX, b=tmp, x=zeroR, r=fs, mem=mem}] + end + | fmove _ = error "move" + + val shuffle = Shuffle.shuffle{mvInstr=move, ea=I.Direct} + + val shufflefp = Shuffle.shuffle {mvInstr=fmove, ea=I.FDirect} +end + + diff --git a/MLRISC/hppa/mltree/hppa.sml b/MLRISC/hppa/mltree/hppa.sml new file mode 100644 index 0000000..50a58b4 --- /dev/null +++ b/MLRISC/hppa/mltree/hppa.sml @@ -0,0 +1,893 @@ +(* hppa.sml + * + * COPYRIGHT (c) 1996 AT&T Bell Laboratories. + * + * generates machine code from the mltree. + * + * This new version has been completely rewritten to take (more) advantage + * of the new improved instruction set. + * + * Please see the README.hppa file for details. + * + * -- Allen + *) + +functor Hppa + (structure HppaInstr : HPPAINSTR + structure ExtensionComp : MLTREE_EXTENSION_COMP + where I = HppaInstr + and T = HppaInstr.T + structure MilliCode : HPPA_MILLICODE + where I = HppaInstr + structure LabelComp : LABEL_COMP + where I = HppaInstr + and T = HppaInstr.T + val costOfMultiply : int ref + val costOfDivision : int ref + ) : MLTREECOMP = +struct + structure I = HppaInstr + structure T = I.T + structure TS = ExtensionComp.TS + structure C = I.C + structure CB = CellsBasis + structure MC = MilliCode + structure LC = LabelComp + structure Region = I.Region + structure A = MLRiscAnnotations + structure CFG = ExtensionComp.CFG + + type instrStream = (I.instruction, C.cellset, CFG.cfg) TS.stream + type mltreeStream = (T.stm, T.mlrisc list, CFG.cfg) TS.stream + + structure Gen = MLTreeGen(structure T = T + structure Cells = C + val intTy = 32 + val naturalWidths = [32] + datatype rep = SE | ZE | NEITHER + val rep = NEITHER + ) + fun mkcopy{dst, src, tmp} = + I.COPY{k=CB.GP, sz=32, dst=dst, src=src, tmp=tmp} + fun mkfcopy{dst, src, tmp} = + I.COPY{k=CB.FP, sz=64, dst=dst, src=src, tmp=tmp} + structure W = Word32 + functor Multiply32 = MLTreeMult + (structure I = I + structure T = T + structure CB = CB + val intTy = 32 + type arg = {r1:CB.cell,r2:CB.cell,d:CB.cell} + type argi = {r:CB.cell,i:int,d:CB.cell} + + fun mov{r,d} = mkcopy{dst=[d],src=[r],tmp=NONE} + fun add{r1,r2,d} = I.arith{a=I.ADD,r1=r1,r2=r2,t=d} + fun slli{r,i,d} = [I.shift{s=I.ZDEP,r=r,p=31-i,len=32-i,t=d}] + fun srli{r,i,d} = [I.shift{s=I.EXTRU,r=r,p=31-i,len=32-i,t=d}] + fun srai{r,i,d} = [I.shift{s=I.EXTRS,r=r,p=31-i,len=32-i,t=d}] + ) + + (* signed, trapping version of multiply and divide *) + structure Mult32 = Multiply32 + (val trapping = true + val multCost = costOfMultiply + val divCost = costOfDivision + fun addv{r1,r2,d} = [I.arith{a=I.ADDO,r1=r1,r2=r2,t=d}] + fun subv{r1,r2,d} = [I.arith{a=I.SUBO,r1=r1,r2=r2,t=d}] + val sh1addv = SOME(fn{r1,r2,d} => [I.arith{a=I.SH1ADDO,r1=r1,r2=r2,t=d}]) + val sh2addv = SOME(fn{r1,r2,d} => [I.arith{a=I.SH2ADDO,r1=r1,r2=r2,t=d}]) + val sh3addv = SOME(fn{r1,r2,d} => [I.arith{a=I.SH3ADDO,r1=r1,r2=r2,t=d}]) + ) + (val signed = true) + + (* unsigned, non-trapping version of multiply and divide *) + structure Mulu32 = Multiply32 + (val trapping = false + val multCost = costOfMultiply + val divCost = costOfDivision + fun addv{r1,r2,d} = [I.arith{a=I.ADD,r1=r1,r2=r2,t=d}] + fun subv{r1,r2,d} = [I.arith{a=I.SUB,r1=r1,r2=r2,t=d}] + val sh1addv = SOME(fn{r1,r2,d} => [I.arith{a=I.SH1ADDL,r1=r1,r2=r2,t=d}]) + val sh2addv = SOME(fn{r1,r2,d} => [I.arith{a=I.SH2ADDL,r1=r1,r2=r2,t=d}]) + val sh3addv = SOME(fn{r1,r2,d} => [I.arith{a=I.SH3ADDL,r1=r1,r2=r2,t=d}]) + ) + (val signed = false) + + (* signed, non-trapping version of multiply and divide *) + structure Muls32 = Multiply32 + (val trapping = false + val multCost = costOfMultiply + val divCost = costOfDivision + fun addv{r1,r2,d} = [I.arith{a=I.ADD,r1=r1,r2=r2,t=d}] + fun subv{r1,r2,d} = [I.arith{a=I.SUB,r1=r1,r2=r2,t=d}] + val sh1addv = SOME(fn{r1,r2,d} => [I.arith{a=I.SH1ADDL,r1=r1,r2=r2,t=d}]) + val sh2addv = SOME(fn{r1,r2,d} => [I.arith{a=I.SH2ADDL,r1=r1,r2=r2,t=d}]) + val sh3addv = SOME(fn{r1,r2,d} => [I.arith{a=I.SH3ADDL,r1=r1,r2=r2,t=d}]) + ) + (val signed = true) + + fun error msg = MLRiscErrorMsg.error("Hppa",msg) + + datatype ea = datatype I.addressing_mode + + datatype times248 = TIMES1 | TIMES2 | TIMES4 | TIMES8 + + datatype amode = + AMode of I.addressing_mode + | DISP of CB.cell * T.I.machine_int + + + fun LI i = T.LI(T.I.fromInt(32, i)) + fun toInt mi = T.I.toInt(32, mi) + fun toInt32 mi = T.I.toInt32(32, mi) + fun toWord mi = T.I.toWord(32, mi) + fun toWord32 mi = T.I.toWord32(32, mi) + fun EQ(x,y) = T.I.EQ(32, x, y) + fun LT(x,y) = T.I.LT(32, x, y) + fun GE(x,y) = T.I.GE(32, x, y) + + + fun selectInstructions + (instrStream as + TS.S.STREAM{emit=emitInstruction, defineLabel, entryLabel, getAnnotations, + beginCluster, endCluster, annotation, + exitBlock, pseudoOp, comment, ...}) = + let + (* operand type and effective addresss *) + + val newReg = C.newReg + val newFreg = C.newFreg + val CRReg = C.Reg C.CR + val zeroR = C.r0 + val zeroF = C.f0 + val zeroEA = I.Direct zeroR + val zeroT = T.REG(32,zeroR) + val zeroImmed = I.IMMED 0 + val zeroOpn = zeroImmed + + val emit = emitInstruction o I.INSTR + + local + fun f(i,[]) = i + | f(i, a::an) = f (I.ANNOTATION{i=i, a=a}, an) + in + fun mark(i, an) = emitInstruction(f(I.INSTR i, an)) + fun mark'(i, an) = emitInstruction(f(i, an)) + end + + val ldLabelEA = LC.ldLabelEA emitInstruction + val ldLabelOpnd = LC.ldLabelOpnd emitInstruction + + (* Check whether an expression is being multiplied by 2, 4, or 8 *) + local + fun mul(mi,e, exp) = + if EQ(mi, 2) then (TIMES2, e) + else if EQ(mi, 4) then (TIMES4, e) + else if EQ(mi, 8) then (TIMES8, e) + else (TIMES1, exp) + in + fun times(exp) = + (case exp + of T.MULU(_, e, T.LI mi) => mul(mi, e, exp) + | T.MULU(_, T.LI mi, e) => mul(mi, e, exp) + | T.SLL(_, e, T.LI mi) => + if EQ(mi, 1) then (TIMES2, e) + else if EQ(mi, 2) then (TIMES4, e) + else if EQ(mi, 3) then (TIMES8, e) + else (TIMES1, exp) + | _ => (TIMES1, exp) + (*esac*)) + + (* trapping version of the above *) + fun timest(exp as T.MULT(_, e, T.LI mi)) = mul(mi, e, exp) + | timest(exp as T.MULT(_, T.LI mi, e)) = mul(mi, e, exp) + | timest e = (TIMES1, e) + end (*local*) + + fun im5 n = LT(n, 16) andalso GE(n, ~16) + fun im11 n = LT(n, 1024) andalso GE(n, ~1024) + fun im14 n = LT(n, 8192) andalso GE(n, ~8192) + + (* Split values into 11 low bits and 21 high bits *) + fun split11w w = + {hi = Word32.toIntX(Word32.~>>(w,0w11)), + lo = Word32.toIntX(Word32.andb(w,0wx7ff))} + fun split11 n = split11w(toWord32 n) + + (* load immediate *) + fun loadImmed(n,t,an) = + if im14 n + then mark(I.LDO{i=I.IMMED(toInt n),b=zeroR,t=t},an) + else let val {hi,lo} = split11 n + val tmp = newReg() + in emit(I.LDIL{i=I.IMMED hi,t=tmp}); + mark(I.LDO{i=I.IMMED lo,b=tmp,t=t},an) + end + + (* generate code to load a immediate constant *) + fun immed (n: T.I.machine_int) = + let val t = newReg() in loadImmed(n,t,[]); t end + + (* load constant *) + fun loadConst(c,t,an) = + mark(I.LDO{b=zeroR,i=I.LabExp(c,I.F),t=t},an) (* XXX *) + + (* convert an operand into a register *) + fun reduceOpn i = + let val t = newReg() + in emit(I.LDO{i=i,b=zeroR,t=t}); t end + + (* emit parallel copies *) + fun copy(dst,src,an) = + mark'(mkcopy{dst=dst,src=src, + tmp=case dst of [_] => NONE | _ => SOME(I.Direct(newReg()))},an) + fun fcopy(dst,src,an) = + mark'(mkfcopy{dst=dst,src=src, + tmp=case dst of [_] => NONE | _ => SOME(I.FDirect(newFreg()))},an) + + (* move register s to register t *) + fun move(s,t,an) = + if CB.sameColor(s,t) orelse CB.registerId t = 0 then () + else if CB.registerId s = 0 then + mark(I.LDO{i=zeroImmed,b=zeroR,t=t},an) + else mark'(mkcopy{src=[s],dst=[t],tmp=NONE},an) + + (* move floating point register s to register t *) + fun fmove(s,t,an) = + if CB.sameColor(s,t) then () + else mark'(mkfcopy{src=[s],dst=[t],tmp=NONE},an) + + (* generate millicode function call *) + fun milliCall(milliFn, e1, e2, rd) = + let val rs = expr e1 + val rt = expr e2 + in app emitInstruction (milliFn{rs=rs,rt=rt,rd=rd}) end + + (* emit an arithmetic op with possible immediate mode + * The immed operand is the first operand on the HPPA! Arrrrggggghhhh! + *) + and immedArith(a,ai,e1,e2,t,an) = + case (opn e1,expr e2) of + (I.REG r1,r2) => mark(I.ARITH{a=a,r1=r1,r2=r2,t=t},an) + | (i,r) => mark(I.ARITHI{ai=ai,r=r,i=i,t=t},an) + + (* emit a commutative arithmetic op with immediate mode *) + and commImmedArith(a,ai,e1,e2,t,an) = + case (opn e1,opn e2) of + (I.REG r1,I.REG r2) => mark(I.ARITH{a=a,r1=r1,r2=r2,t=t},an) + | (I.REG r,i) => mark(I.ARITHI{ai=ai,r=r,i=i,t=t},an) + | (i,I.REG r) => mark(I.ARITHI{ai=ai,r=r,i=i,t=t},an) + | (i,j) => mark(I.ARITHI{ai=ai,r=reduceOpn i,i=j,t=t},an) + + (* emit an arithmetic op *) + and arith(a,e1,e2,t,an) = + mark(I.ARITH{a=a,r1=expr e1,r2=expr e2,t=t},an) + + (* emit an unary floating point op *) + and funary(a,e,t,an) = mark(I.FUNARY{fu=a,f=fexpr e,t=t},an) + + (* emit an conversion floating point op *) + and fcnv(a,e,t,an) = mark(I.FCNV{fcnv=a,f=fexpr e,t=t},an) + + (* emit a binary floating point op *) + and farith(a,e1,e2,t,an) = + mark(I.FARITH{fa=a,r1=fexpr e1,r2=fexpr e2,t=t},an) + + (* convert an expression into an addressing mode + * scale is the size of the data being addressed. + * + * Return the addressing mode and an infinite precision immediate + * in the case of DISPea. + *) + and addr(scale,T.ADD(_,e,T.LI n)) = DISP(expr e, n) + | addr(scale,T.ADD(_,e,c as T.CONST _)) = + AMode(DISPea(expr e,I.LabExp(c,I.F))) + | addr(scale,T.ADD(ty,i as T.LI _,e)) = addr(scale,T.ADD(ty,e,i)) + | addr(scale,T.ADD(_,c as T.CONST _,e)) = + AMode(DISPea(expr e,I.LabExp(c,I.F))) + | addr(scale,T.ADD(_,e,T.LABEXP le)) = + let val rs = expr e + val (rt, opnd) = ldLabelEA le + in case (CB.registerId rt, opnd) of + (0, opnd) => AMode(DISPea(rs,opnd)) + | (_,I.IMMED 0) => AMode(INDXea(rs,rt)) + | (_,opnd) => + let val tmp = newReg() + in emit(I.ARITH{a=I.ADD,r1=rs,r2=rt,t=tmp}); + AMode(DISPea(tmp,opnd)) + end + end + | addr(scale,T.ADD(t,e1 as T.LABEXP l,e2)) = addr(scale,T.ADD(t,e2,e1)) + | addr(scale,T.ADD(_,e1,e2)) = + let (* check for special multiply add sequence + * here, e1 is is scaled + *) + fun scaleIndexed(actualScale,opcode,e1,e2) = + if actualScale = scale then (* can we use scaled indexing mode?*) + let val x = expr e1 + val b = expr e2 + in AMode(INDXSCALEDea(b,x)) + end + else (* no, use the SHnADD operator, then *) + let val tmp = newReg() + in emit(I.ARITH{a=opcode,r1=expr e1,r2=expr e2,t=tmp}); + AMode(DISPea(tmp,zeroImmed)) + end + in case times e1 of + (TIMES2,e1) => scaleIndexed(16,I.SH1ADD,e1,e2) + | (TIMES4,e1) => scaleIndexed(32,I.SH2ADD,e1,e2) + | (TIMES8,e1) => scaleIndexed(64,I.SH3ADD,e1,e2) + | _ => + case times e2 of + (TIMES2,e2) => scaleIndexed(16,I.SH1ADD,e2,e1) + | (TIMES4,e2) => scaleIndexed(32,I.SH2ADD,e2,e1) + | (TIMES8,e2) => scaleIndexed(64,I.SH3ADD,e2,e1) + | _ => AMode(INDXea(expr e1,expr e2)) + end + | addr(scale,T.SUB(ty,e,T.LI n)) = addr(scale,T.ADD(ty,e,T.LI(T.I.NEGT(32,n)))) + | addr(scale,T.LABEXP lexp) = AMode(DISPea(ldLabelEA(lexp))) + | addr(scale,ea) = AMode(DISPea(expr ea,zeroImmed)) + + (* emit an integer load + * li - load immediate, + * l - load indexed + * ls - load indexed with scaling + * r1 is base r2 is x + *) + and load(scale,li,l,ls,ea,t,mem,an) = + case addr(scale,ea) + of DISP(r, off) => + if im14 off then + mark(I.LOADI{li=li,r=r,i=I.IMMED(toInt off),t=t,mem=mem},an) + else + mark(I.LOAD{l=l,r1=r,r2=immed off,t=t,mem=mem},an) + | AMode(DISPea(r,i)) => mark(I.LOADI{li=li,r=r,i=i,t=t,mem=mem},an) + | AMode(INDXea(r1,r2)) => mark(I.LOAD{l=l,r1=r1,r2=r2,t=t,mem=mem},an) + | AMode(INDXSCALEDea(b,x)) => mark(I.LOAD{l=ls,r1=b,r2=x,t=t,mem=mem},an) + + (* emit an integer store *) + and store(st,ea,r,mem,an) = + let val (b,d) = + case addr(0,ea) + of DISP(b, disp) => + if im14 disp then (b,I.IMMED(toInt disp) ) + else let val {hi,lo} = split11 disp + val tmp1 = newReg() + val tmp2 = newReg() + in emit(I.LDIL{i=I.IMMED hi,t=tmp1}); + emit(I.ARITH{a=I.ADD,r1=b,r2=tmp1,t=tmp2}); + (tmp2,I.IMMED lo) + end + | AMode(DISPea bd) => bd + | AMode(INDXea(r1,r2)) => + let val tmp = newReg() + in emit(I.ARITH{a=I.ADD,r1=r1,r2=r2,t=tmp}); + (tmp,I.IMMED 0) + end + | AMode(INDXSCALEDea _) => error "store" + in mark(I.STORE{st=st,b=b,d=d,r=r,mem=mem},an) end + + (* emit a floating point load *) + and fload(scale,fl,flx,flxs,ea,t,mem,an) = + case addr(scale,ea) of + AMode(INDXea(b,x)) => mark(I.FLOADX{flx=flx,b=b,x=x,t=t,mem=mem},an) + | AMode(INDXSCALEDea(b,x)) => + mark(I.FLOADX{flx=flxs,b=b,x=x,t=t,mem=mem},an) + | AMode(DISPea(b,d)) => + let val tmp = newReg() + in emit(I.ARITHI{ai=I.ADDI,r=b,i=d,t=tmp}); + mark(I.FLOADX{flx=flx,b=tmp,x=zeroR,t=t,mem=mem},an) + end + | DISP(b, d) => + if im5 d then + mark(I.FLOAD{fl=fl,b=b,d=toInt d,t=t,mem=mem},an) + else + mark(I.FLOADX{flx=flx,b=b,x=immed d,t=t,mem=mem},an) + + (* emit a floating point store *) + and fstore(scale,fst,fstx,fstxs,ea,data,mem,an) = + let val r = fexpr data + in case addr(scale,ea) of + DISP(b, d) => + if im5 d then + mark(I.FSTORE{fst=fst,b=b,d=(toInt d),r=r,mem=mem},an) + else mark(I.FSTOREX{fstx=fstx,b=b,x=immed d,r=r,mem=mem},an) + | AMode(DISPea(b,d)) => + let val tmp = newReg() + in emit(I.ARITHI{ai=I.ADDI,r=b,i=d,t=tmp}); + mark(I.FSTORE{fst=I.FSTDS,b=tmp,d=0,r=r,mem=mem},an) + end + | AMode(INDXea(b,x)) => + mark(I.FSTOREX{fstx=fstx,b=b,x=x,r=r,mem=mem},an) + | AMode(INDXSCALEDea(b,x)) => + mark(I.FSTOREX{fstx=fstxs,b=b,x=x,r=r,mem=mem},an) + end + + (* emit an integer branch instruction *) + + (* generate a branch *) + and branch(T.CMP(ty,cc,T.LI n,e),lab,an) = (* optimize cmp immed *) + emitBranchCmpWithImmed(ty,cc,n,e,lab,an) + | branch(T.CMP(ty,cc,e1,e2 as T.LI _),lab,an) = (* commute *) + branch(T.CMP(ty,T.Basis.swapCond cc,e2,e1),lab,an) + | branch(T.CMP(ty,cc,a,b),lab,an) = (* do the usual *) + emitBranch(ty,cc,expr a,expr b,lab,an) + | branch(T.FCMP(fty,cc,a,b),lab,an) = + let val f1 = fexpr a + val f2 = fexpr b + val fallThrough = Label.anon() + fun fcond T.== = I.!= + | fcond T.?<> = I.== + | fcond T.? = I.<=> + | fcond T.<=> = I.? + | fcond T.> = I.?<= + | fcond T.>= = I.?< + | fcond T.?> = I.<= + | fcond T.?>= = I.< + | fcond T.< = I.?>= + | fcond T.<= = I.?> + | fcond T.?< = I.>= + | fcond T.?<= = I.> + | fcond T.<> = I.?= + | fcond T.?= = I.<> + | fcond _ = error "fcond" + in mark(I.FBRANCH{cc=fcond cc,f1=f1,f2=f2,t=lab,f=fallThrough, + fmt=getFmt a,n=true,long=false},an); + defineLabel fallThrough + end + | branch(e,lab,an) = error "branch: what is the semantics?" + + (* generate a branch cmp with immed *) + and emitBranchCmpWithImmed(ty,cc,n,e2 as T.ANDB(_,e,T.LI mask),t,an) = + emitBranchOnBit(ty,cc,n,e2,e,toWord32 mask,t,an) + | emitBranchCmpWithImmed(ty,cc,n,e2 as T.ANDB(_,T.LI mask,e),t,an) = + emitBranchOnBit(ty,cc,n,e2,e,toWord32 mask,t,an) + | emitBranchCmpWithImmed(ty,cc,n,e2,t,an) = + emitBranchI(ty,cc,n,e2,t,an) + + (* generate a branch on bit *) + and emitBranchOnBit(ty,cc,n,e2,e,mask,t,an) = + let fun isPowerOf2 w = W.andb(w,w-0w1) = 0w0 + fun log w = + let fun f(0w1,n) = n + | f(w,n) = f(W.>>(w,0w1),n+1) + in f(w,0) end + val n' = toWord32 n + in if (n' = 0w0 orelse n' = mask) andalso + (cc = T.EQ orelse cc = T.NE) andalso + (mask > 0w0 andalso isPowerOf2 mask) then (* bit test! *) + let val bc = + case (cc,n') of + (T.EQ,0w0) => I.BCLR (* bit is 0 *) + | (T.EQ,_) => I.BSET (* bit is 1 *) + | (T.NE,0w0) => I.BSET (* bit is 1 *) + | (T.NE,_) => I.BCLR (* bit is 0 *) + | _ => error "emitBranchOnBit" + val f = Label.anon() + val bit = 31 - log mask + in mark(I.BB{bc=bc,r=expr e,p=bit,t=t,f=f, + n=false, nop=true},an); + defineLabel f + end + else + emitBranchI(ty,cc,n,e2,t,an) + end + + (* generate a branch cmp with immediate *) + and emitBranchI(ty,cc,n,e2,t,an) = + let val r2 = expr e2 + in if im5 n then + let val f = Label.anon() + val (cmpi,bc) = + case cc of + T.LT => (I.COMIBT, I.LT) + | T.LE => (I.COMIBT, I.LE) + | T.GT => (I.COMIBF, I.LE) + | T.GE => (I.COMIBF, I.LT) + | T.EQ => (I.COMIBT, I.EQ) + | T.LTU => (I.COMIBT, I.LTU) + | T.LEU => (I.COMIBT, I.LEU) + | T.GEU => (I.COMIBF, I.LTU) + | T.GTU => (I.COMIBF, I.LEU) + | T.NE => (I.COMIBF, I.EQ) + | _ => error "emitBranchI" + in mark(I.BCONDI{cmpi=cmpi,bc=bc,i=toInt(n),r2=r2,t=t,f=f, + n=false, nop=true},an); + defineLabel f + end + else emitBranch(ty,cc,immed n,r2,t,an) + end + + (* generate a branch *) + and emitBranch(ty,cond,r1,r2,t,an) = + let val f = Label.anon() + val (cmp,bc,r1,r2) = + case cond of + T.LT => (I.COMBT, I.LT, r1, r2) + | T.LE => (I.COMBT, I.LE, r1, r2) + | T.GT => (I.COMBT, I.LT, r2, r1) + | T.GE => (I.COMBT, I.LE, r2, r1) + | T.EQ => (I.COMBT, I.EQ, r1, r2) + | T.LTU => (I.COMBT, I.LTU, r1, r2) + | T.LEU => (I.COMBT, I.LEU, r1, r2) + | T.GEU => (I.COMBT, I.LEU, r2, r1) + | T.GTU => (I.COMBT, I.LTU, r2, r1) + | T.NE => (I.COMBF, I.EQ, r1, r2) + | _ => error "emitBranch" + in mark(I.BCOND{cmp=cmp,bc=bc,r1=r1,r2=r2,t=t,f=f, + n=false,nop=true},an); + defineLabel f + end + + and getFmt e = + case Gen.Size.fsize e of + 32 => I.SGL + | 64 => I.DBL + | 128 => I.QUAD + | _ => error "getFmt" + + and goto(l,an) = mark(I.B{lab=l,n=true},an) + + (* generate code for a statement *) + and stmt(T.MV(32,t,e),an) = doExpr(e,t,an) + | stmt(T.FMV(64,t,e),an) = doFexpr(e,t,an) + | stmt(T.CCMV(t,e),an) = doCCexpr(e,t,an) + | stmt(T.COPY(32,dst,src),an) = copy(dst,src,an) + | stmt(T.FCOPY(64,dst,src),an) = fcopy(dst,src,an) + | stmt(T.JMP(T.LABEL l,_),an) = goto(l,an) + | stmt(T.JMP(ea,labs),an) = jmp(ea,labs,an) + | stmt(s as T.CALL { pops=0, ...},an) = call(s,an) + | stmt(T.CALL _, _) = error "pops<>0 not implemented" + | stmt(T.RET _,an) = + mark(I.BV{labs=[],x=zeroR,b=C.returnPtr,n=true},an) + | stmt(T.STORE(8,ea,t,mem),an) = store(I.STB,ea,expr t,mem,an) + | stmt(T.STORE(16,ea,t,mem),an) = store(I.STH,ea,expr t,mem,an) + | stmt(T.STORE(32,ea,t,mem),an) = store(I.STW,ea,expr t,mem,an) + | stmt(T.FSTORE(32,ea,t,mem),an) = + fstore(32,I.FSTWS,I.FSTWX,I.FSTWX_S,ea,t,mem,an) + | stmt(T.FSTORE(64,ea,t,mem),an) = + fstore(64,I.FSTDS,I.FSTDX,I.FSTDX_S,ea,t,mem,an) + | stmt(T.BCC(cc,lab),an) = branch(cc,lab,an) + | stmt(T.DEFINE l,_) = defineLabel l + | stmt(T.LIVE S,an) = mark'(I.LIVE{regs=cellset S,spilled=C.empty},an) + | stmt(T.KILL S,an) = mark'(I.KILL{regs=cellset S,spilled=C.empty},an) + | stmt(T.ANNOTATION(i,a),an) = stmt(i,a::an) + | stmt(T.EXT s,an) = + ExtensionComp.compileSext (reducer()) {stm=s, an=an} + | stmt(s,_) = doStmts(Gen.compileStm s) + + and doStmt s = stmt(s,[]) + and doStmts ss = app doStmt ss + + and jmp(e,labs,an) = let + fun disp(r, i) = let + val b = newReg() + in emit(I.ARITHI{ai=I.ADDI, i=i, r=r, t=b}); + (b, zeroR) + end + + val (b,x) = + case addr(32,e) of + DISP(b, i) => + if i = 0 then (b, zeroR) else disp(b, I.IMMED(toInt i)) + | AMode(DISPea(r,i)) => disp(r, i) + | AMode(INDXea(r1,r2)) => let val b=newReg() + in emit(I.ARITH{a=I.ADD,r1=r1,r2=r2,t=b}); + (b,zeroR) + end + | AMode(INDXSCALEDea(b,x)) => (b,x) + in mark(I.BV{b=b,x=x,n=true,labs=labs},an) end + + and call(s,an) = let val reduce = {stm=doStmt, rexp=expr, emit=emitInstruction} + in LC.doCall(reduce,s) end + + (* Optimize addition *) + and plus(times,sh1add,sh2add,sh3add,add,addi,a,b,t,an) = + case times a of + (TIMES2,a) => arith(sh1add,a,b,t,an) + | (TIMES4,a) => arith(sh2add,a,b,t,an) + | (TIMES8,a) => arith(sh3add,a,b,t,an) + | _ => + case times b of + (TIMES2,b) => arith(sh1add,b,a,t,an) + | (TIMES4,b) => arith(sh2add,b,a,t,an) + | (TIMES8,b) => arith(sh3add,b,a,t,an) + | _ => commImmedArith(add,addi,a,b,t,an) + + (* Round to zero for division: + * d <- r + i + * d <- if r >= 0 then r else d + *) + and divu32 x = Mulu32.divide{mode=T.TO_ZERO,stm=doStmt} x + and divs32 x = Muls32.divide{mode=T.TO_ZERO,stm=doStmt} x + and divt32 x = Mult32.divide{mode=T.TO_ZERO,stm=doStmt} x + + and muldiv(ty,genConst,milliFn,a,b,t,commute,an) = + let fun const(a,i) = + let val r = expr a + in app emitInstruction (genConst{r=r,i=toInt i,d=t}) + handle _ => milliCall(milliFn,T.REG(ty,r),T.LI i,t) + end + in case (commute,a,b) of + (_,a,T.LI i) => const(a,i) + | (true,T.LI i,a) => const(a,i) + | (_,a,b) => milliCall(milliFn,a,b,t) + end + + (* compile shift *) + and shift(immedShift,varShift,e,T.LI n,t,an) = let + val n = toInt n + in + if n < 0 orelse n > 31 then error "shift" + else mark(I.SHIFT{s=immedShift,r=expr e,p=31-n,len=32-n,t=t},an) + end + | shift(immedShift,varShift,e1,e2,t,an) = + let val r1 = expr e1 + val r2 = expr e2 + val tmp = newReg() + in emit(I.ARITHI{ai=I.SUBI, i=I.IMMED 31, r=r2, t=tmp}); + emit(I.MTCTL{r=tmp, t=CRReg 11}); + mark(I.SHIFTV{sv=varShift,r=r1,len=32, t=t},an) + end + + (* Generate a COMCLR_LDO/COMICLR_LDO instruction sequence: + * COMCLR,cond r1, r2, t1 + * LDO i(b), t2 + * + * Note: + * t <- if cond(r1,r2) then i else 0 can be mapped into: + * + * COMCLR,cond r1, r2, t + * LDO i(0), t + * + * if cond(r1,r2) then t <- e can be mapped into: + * + * t' <- e + * COMCLR,cond r1, r2, 0 + * LDO 0(t'), t + * + * t <- if cond(r1,r2) then e1 else e2 can be mapped into: + * + * t <- e2 + * t' <- e1 + * COMCLR,cond r1, r2, 0 + * LDO 0(t'), t + *) + and comclr(cond,x,y,yes,no,t,an) = + let val (cond, i1, r2) = + case (opn x, opn y) of + (x, I.REG r2) => (cond, x, r2) + | (I.REG r1, y) => (T.Basis.swapCond cond, y, r1) + | (x, y) => (cond, x, reduceOpn y) + val cc = case cond of + T.LT => I.GE + | T.LE => I.GT + | T.GT => I.LE + | T.GE => I.LT + | T.EQ => I.NE + | T.LTU => I.GEU + | T.LEU => I.GTU + | T.GEU => I.LTU + | T.GTU => I.LEU + | T.NE => I.EQ + | _ => error "comclr" + val tmp = newReg() + val (b,i) = + case yes + of T.LI n => if im14 n then (zeroR, toInt(n)) else + let val {hi,lo} = split11 n + val b = newReg() + in emit(I.LDIL{i=I.IMMED hi,t=b}); (b,lo) end + | e => (expr e, 0) + (*esac*) + + val t1 = + case no + of T.LI z => (* false case is zero *) + if z = 0 then tmp else (doExpr(no,tmp,[]); zeroR) + | _ => (doExpr(no,tmp,[]); zeroR) (* move false case to tmp *) + (*esac*) + + val instr = + case i1 of + I.REG r1 => + I.COMCLR_LDO{cc=cc,r1=r1,r2=r2,b=b,i=i,t1=t1,t2=tmp} + | _ => I.COMICLR_LDO{cc=cc,i1=i1,r2=r2,b=b,i2=i,t1=t1,t2=tmp} + in mark(instr, an); + move(tmp, t, []) + end + + (* convert an expression into a register *) + and expr(exp) = let + fun comp() = let + val t = newReg() + in doExpr(exp, t, []); t + end + in + case exp + of T.REG(_, r) => r + | T.LI z => if z = 0 then zeroR else comp() + | _ => comp() + end + + (* compute an integer expression and put the result in register t *) + and doExpr(e,t,an) = + case e of + T.REG(_,r) => move(r,t,an) + | T.LI n => loadImmed(n,t,an) + | T.LABEXP le => + (case ldLabelOpnd{label=le,pref=SOME t} of + I.REG r => move(r,t,an) + | opnd => mark(I.LDO{i=opnd,b=zeroR,t=t},an) + ) + | T.CONST _ => loadConst(e,t,an) + | T.LABEL _ => loadConst(e,t,an) + | T.ADD(_,a,b) => plus(times, + I.SH1ADDL,I.SH2ADDL,I.SH3ADDL,I.ADD,I.ADDI, + a,b,t,an) + | T.SUB(_,a,T.LI mi) => + if mi = 0 then doExpr(a,t,an) + else commImmedArith(I.ADD,I.ADDI,a,T.LI(T.I.NEGT(32,mi)),t,an) + | T.SUB(_,a,b) => immedArith(I.SUB,I.SUBI,a,b,t,an) + | T.ADDT(_,a,b) => plus(timest, + I.SH1ADDO,I.SH2ADDO,I.SH3ADDO,I.ADDO,I.ADDIO, + a,b,t,an) + | T.SUBT(_,a,T.LI n) => + commImmedArith(I.ADDO,I.ADDIO,a,T.LI(T.I.NEGT(32,n)),t,an) + | T.SUBT(_,a,b) => immedArith(I.SUBO,I.SUBIO,a,b,t,an) + + | T.ANDB(_,a,T.NOTB(_,b)) => arith(I.ANDCM,a,b,t,an) + | T.ANDB(_,T.NOTB(_,a),b) => arith(I.ANDCM,b,a,t,an) + | T.ANDB(_,a,b) => arith(I.AND,a,b,t,an) + | T.ORB(_,a,b) => arith(I.OR,a,b,t,an) + | T.XORB(_,a,b) => arith(I.XOR,a,b,t,an) + + | T.SLL(_,a,b) => shift(I.ZDEP,I.ZVDEP,a,b,t,an) + | T.SRL(_,a,b) => shift(I.EXTRU,I.VEXTRU,a,b,t,an) + | T.SRA(_,a,b) => shift(I.EXTRS,I.VEXTRS,a,b,t,an) + | T.MULU(32,a,b) => muldiv(32,Mulu32.multiply,MC.mulu,a,b,t,true,an) + | T.MULS(32,a,b) => muldiv(32,Muls32.multiply,MC.mulu,a,b,t,true,an) + | T.MULT(32,a,b) => muldiv(32,Mult32.multiply,MC.mulo,a,b,t,true,an) + | T.DIVU(32,a,b) => muldiv(32,divu32,MC.divu,a,b,t,false,an) +(* FIXME: The following is a hack: We use the trapping div in place of + * the non-trapping since we currently expect that the non-trapping + * div will only be used where there is some high-level reasoning + * that the trapping div would in fact not trap. *) + | T.DIVS(T.DIV_TO_ZERO,32,a,b) => + muldiv(32,divs32,MC.divo,a,b,t,false,an) + | T.DIVT(T.DIV_TO_ZERO,32,a,b) => + muldiv(32,divt32,MC.divo,a,b,t,false,an) + + | T.LOAD(8,ea,mem) => load(8,I.LDB,I.LDBX,I.LDBX,ea,t,mem,an) + | T.LOAD(16,ea,mem) => load(16,I.LDH,I.LDHX,I.LDHX_S,ea,t,mem,an) + | T.LOAD(32,ea,mem) => load(32,I.LDW,I.LDWX,I.LDWX_S,ea,t,mem,an) + + | T.COND(_,T.CMP(_,cond,x,y),yes,no) => comclr(cond,x,y,yes,no,t,an) + | T.LET(s,e) => (doStmt s; doExpr(e, t, an)) + | T.MARK(e,A.MARKREG f) => (f t; doExpr(e,t,an)) + | T.MARK(e,a) => doExpr(e,t,a::an) + | T.PRED(e,c) => doExpr(e,t,A.CTRLUSE c::an) + | T.REXT e => + ExtensionComp.compileRext (reducer()) {e=e, rd=t, an=an} + | e => doExpr(Gen.compileRexp e,t,an) + + (* convert an expression into a floating point register *) + and fexpr(T.FREG(_,r)) = r + | fexpr e = let val t = newFreg() + in doFexpr(e,t,[]); t end + + (* compute a floating point expression and put the result in t *) + and doFexpr(e,t,an) = + case e of + (* single precision *) + T.FREG(32,r) => fmove(r,t,an) + | T.FLOAD(32,ea,mem) => + fload(32,I.FLDWS,I.FLDWX,I.FLDWX_S,ea,t,mem,an) + | T.FADD(32,a,b) => farith(I.FADD_S,a,b,t,an) + | T.FSUB(32,a,b) => farith(I.FSUB_S,a,b,t,an) + | T.FMUL(32,a,b) => farith(I.FMPY_S,a,b,t,an) + | T.FDIV(32,a,b) => farith(I.FDIV_S,a,b,t,an) + | T.FABS(32,a) => funary(I.FABS_S,a,t,an) + | T.FSQRT(32,a) => funary(I.FSQRT_S,a,t,an) + + (* double precision *) + | T.FREG(64,r) => fmove(r,t,an) + | T.FLOAD(64,ea,mem) => + fload(64,I.FLDDS,I.FLDDX,I.FLDDX_S,ea,t,mem,an) + | T.FADD(64,a,b) => farith(I.FADD_D,a,b,t,an) + | T.FSUB(64,a,b) => farith(I.FSUB_D,a,b,t,an) + | T.FMUL(64,a,b) => farith(I.FMPY_D,a,b,t,an) + | T.FDIV(64,a,b) => farith(I.FDIV_D,a,b,t,an) + | T.FABS(64,a) => funary(I.FABS_D,a,t,an) + | T.FSQRT(64,a) => funary(I.FSQRT_D,a,t,an) + + (* conversions *) + | T.CVTF2F(fty,fty',e) => + (case (fty,fty') of + (64,32) => fcnv(I.FCNVFF_SD,e,t,an) + | (32,64) => fcnv(I.FCNVFF_DS,e,t,an) + | (32,32) => doFexpr(e,t,an) + | (64,64) => doFexpr(e,t,an) + | _ => error "CVTF2F" + ) + | T.CVTI2F(32,_,e) => app emitInstruction (MilliCode.cvti2s{rs=expr e,fd=t}) + | T.CVTI2F(64,_,e) => app emitInstruction (MilliCode.cvti2d{rs=expr e,fd=t}) + + (* negation is implemented as subtraction *) + | T.FNEG(ty,a) => doFexpr(T.FSUB(ty,T.FREG(ty,zeroF),a),t,an) + + | T.FMARK(e,A.MARKREG f) => (f t; doFexpr(e,t,an)) + | T.FMARK(e,a) => doFexpr(e,t,a::an) + | T.FPRED(e,c) => doFexpr(e,t,A.CTRLUSE c::an) + | T.FEXT e => + ExtensionComp.compileFext (reducer()) {e=e, fd=t, an=an} + | e => error "doFexpr" + + and doCCexpr(T.CC(_,r),t,an) = move(r,t,an) + | doCCexpr(T.FCC(_,r),t,an) = move(r,t,an) + | doCCexpr(T.CMP(ty,cond,e1,e2),t,an) = error "doCCexpr" + | doCCexpr(T.CCMARK(e,A.MARKREG f),t,an) = (f t; doCCexpr(e,t,an)) + | doCCexpr(T.CCMARK(e,a),t,an) = doCCexpr(e,t,a::an) + | doCCexpr(T.CCEXT e,t,an) = + ExtensionComp.compileCCext (reducer()) {e=e,ccd=t,an=an} + | doCCexpr e = error "doCCexpr" + + and ccExpr(T.CC(_,r)) = r + | ccExpr(T.FCC(_,r)) = r + | ccExpr e = let val t = newReg() in doCCexpr(e,t,[]); t end + + (* convert an expression into an operand *) + and opn(c as T.CONST _) = I.LabExp(c,I.F) + | opn(l as T.LABEL _) = I.LabExp(l,I.F) + | opn(T.LABEXP le) = ldLabelOpnd{label=le,pref=NONE} + | opn(e as T.LI n) = if im11 n then I.IMMED(toInt n) + else I.REG(expr e) + | opn e = I.REG(expr e) + + and addrOf e = + case addr(0, e) + of AMode mode => mode + | DISP(r, mi) => DISPea(r, I.IMMED(toInt mi)) + + and reducer() = + TS.REDUCER{reduceRexp = expr, + reduceFexp = fexpr, + reduceCCexp = ccExpr, + reduceStm = stmt, + operand = opn, + reduceOperand = reduceOpn, + addressOf = addrOf, + emit = mark', + instrStream = instrStream, + mltreeStream = self() + } + + (* convert mlrisc to cellset: + * condition code registers are mapped onto general registers + *) + and cellset mlrisc = + let fun g([],acc) = acc + | g(T.GPR(T.REG(_,r))::regs,acc) = g(regs,C.addReg(r,acc)) + | g(T.FPR(T.FREG(_,f))::regs,acc) = g(regs,C.addFreg(f,acc)) + | g(T.CCR(T.CC(_,cc))::regs,acc) = g(regs,C.addReg(cc,acc)) + | g(_::regs, acc) = g(regs, acc) + in g(mlrisc, C.empty) end + + and self() = + TS.S.STREAM + { beginCluster = beginCluster, + endCluster = endCluster, + emit = doStmt, + pseudoOp = pseudoOp, + defineLabel = defineLabel, + entryLabel = entryLabel, + comment = comment, + annotation = annotation, + getAnnotations = getAnnotations, + exitBlock = fn regs => exitBlock(cellset regs) + } + in self() + end + + + end diff --git a/MLRISC/hppa/mltree/hppaLabelComp.sig b/MLRISC/hppa/mltree/hppaLabelComp.sig new file mode 100644 index 0000000..53cac63 --- /dev/null +++ b/MLRISC/hppa/mltree/hppaLabelComp.sig @@ -0,0 +1,28 @@ +signature LABEL_COMP = sig + structure T : MLTREE + structure I : INSTRUCTIONS + + type reduce = + {stm: T.stm -> unit, + rexp: T.rexp -> CellsBasis.cell, + emit:I.instruction -> unit + } + (* functions to emit MLRISC statements or register expressions *) + + val ldLabelEA : + (I.instruction -> unit) -> T.labexp -> (CellsBasis.cell * I.operand) + (* generate a label operand to use as an effective address *) + + val ldLabelOpnd : + (I.instruction -> unit) -> + {label:T.labexp, pref:CellsBasis.cell option} -> I.operand + (* generate a label operand to be used by immediate instructions *) + + val doJmp : reduce * T.stm -> unit + (* compile a jump involving a label *) + + val doCall : reduce * T.stm -> unit + (* compile a call involving a label *) + +end + diff --git a/MLRISC/hppa/mltree/hppaMillicode.sig b/MLRISC/hppa/mltree/hppaMillicode.sig new file mode 100644 index 0000000..936dd90 --- /dev/null +++ b/MLRISC/hppa/mltree/hppaMillicode.sig @@ -0,0 +1,13 @@ +signature HPPA_MILLICODE = sig + structure I : HPPAINSTR + structure CB : CELLS_BASIS = CellsBasis + + val divu : {rs:CB.cell, rt:CB.cell, rd:CB.cell} -> I.instruction list + val mulo : {rs:CB.cell, rt:CB.cell, rd:CB.cell} -> I.instruction list + val divo : {rs:CB.cell, rt:CB.cell, rd:CB.cell} -> I.instruction list + val mulu : {rs:CB.cell, rt:CB.cell, rd:CB.cell} -> I.instruction list + val cvti2s : {rs:CB.cell, fd:CB.cell} -> I.instruction list + val cvti2d : {rs:CB.cell, fd:CB.cell} -> I.instruction list + val cvti2q : {rs:CB.cell, fd:CB.cell} -> I.instruction list +end + diff --git a/MLRISC/hppa/ra/hppaRegAlloc.sml b/MLRISC/hppa/ra/hppaRegAlloc.sml new file mode 100644 index 0000000..a544867 --- /dev/null +++ b/MLRISC/hppa/ra/hppaRegAlloc.sml @@ -0,0 +1,74 @@ +(* hppaRegAlloc.sml --- hppa integer and floating register allocator + * + * COPYRIGHT (c) 1996 AT&T Bell Laboratories. + * + *) + +(* Integer and floating register allocators are a partial application + * of a curried functor. + *) + +functor HppaRegAlloc(structure I : INSTRUCTIONS where C = HppaCells + structure P : INSN_PROPERTIES where I = I + structure F : FLOWGRAPH where I = I and P = P + structure Asm : INSTRUCTION_EMITTER where I = I and P = P) : + sig + functor IntRa (structure RaUser : RA_USER_PARAMS + where I = I + where type B.name = F.B.name) : RA + functor FloatRa (structure RaUser : RA_USER_PARAMS + where I = I + where type B.name = F.B.name) : RA + end= +struct + + structure C=I.C + + (* liveness analysis for general purpose registers *) + structure RegLiveness = + Liveness(structure Flowgraph=F + structure Instruction=I + val defUse = P.defUse C.GP + val regSet = C.getCell C.GP + val cellset = C.updateCell C.GP) + + + functor IntRa = + RegAllocator + (structure RaArch = struct + structure InsnProps = P + structure AsmEmitter = Asm + structure I = I + structure Liveness=RegLiveness + + val defUse = P.defUse C.GP + val firstPseudoR = 32 + val maxPseudoR = C.maxCell + val numRegs = C.numCell C.GP + val regSet = C.getCell C.GP + end) + + (* liveness analysis for floating point registers *) + structure FregLiveness = + Liveness(structure Flowgraph=F + structure Instruction=I + val defUse = P.defUse C.FP + val regSet = C.getCell C.FP + val cellset = C.updateCell C.FP) + + functor FloatRa = + RegAllocator + (structure RaArch = struct + structure InsnProps = P + structure AsmEmitter = Asm + structure I = I + structure Liveness=FregLiveness + + val defUse = P.defUse C.FP + val firstPseudoR = 64 + val maxPseudoR = C.maxCell + val numRegs = HppaCells.numCell C.FP + val regSet = C.getCell C.FP + end) +end + diff --git a/MLRISC/hppa/ra/hppaRewrite.sml b/MLRISC/hppa/ra/hppaRewrite.sml new file mode 100644 index 0000000..d3462f1 --- /dev/null +++ b/MLRISC/hppa/ra/hppaRewrite.sml @@ -0,0 +1,198 @@ +(* hppaRewrite.sml -- rewrite an hppa instruction + * + * COPYRIGHT (c) 1997 Bell Labs + *) + +functor HppaRewrite(Instr:HPPAINSTR) = struct + structure I = Instr + structure C = I.C + structure CB = CellsBasis + structure CS = CB.CellSet + + fun error msg = MLRiscErrorMsg.error("HppaRewrite",msg) + + fun rewriteUse(instr, rs, rt) = let + fun replc r = if CB.sameColor(r,rs) then rt else r + fun replcEA(SOME(I.Displace{base, disp, mem})) = + SOME(I.Displace{base=replc base, disp=disp, mem=mem}) + | replcEA ea = ea + fun hppaUse(instr) = + (case instr + of I.STORE{st, b, d, r, mem} => + I.STORE{st=st, b=replc b, d=d, r=replc r, mem=mem} + | I.LOAD{l, r1, r2, t, mem} => + I.LOAD{l=l, r1=replc r1, r2=replc r2, t=t, mem=mem} + | I.LOADI{li, r, i, t, mem=mem} => + I.LOADI{li=li, r=replc r, i=i, t=t, mem=mem} + | I.ARITH{a, r1, r2, t} => I.ARITH{a=a, r1=replc r1, r2=replc r2, t=t} + | I.ARITHI{ai, r, i, t} => I.ARITHI{ai=ai, r=replc r, i=i, t=t} + | I.COMCLR_LDO{cc, r1, r2, b, i, t1, t2} => + if CB.registerId t1 <> 0 andalso not(CB.sameColor(t1,t2)) + andalso CB.sameColor(t2,rs) then + error "rewriteUse: COMCLR_LDO" + else + I.COMCLR_LDO{cc=cc, r1=replc r1, r2=replc r2, b=replc b, i=i, + t1=t1, t2=t2} + | I.COMICLR_LDO{cc, i1, r2, b, i2, t1, t2} => + if CB.registerId t1 <> 0 andalso not(CB.sameColor(t1,t2)) + andalso CB.sameColor(t2,rs) then + error "rewriteUse: COMICLR_LDO" + else + I.COMICLR_LDO{cc=cc, i1=i1, r2=replc r2, b=replc b, i2=i2, + t1=t1, t2=t2} + | I.SHIFTV{sv, r, len, t} => I.SHIFTV{sv=sv, r=replc r, len=len, t=t} + | I.SHIFT{s, r, p, len, t} => I.SHIFT{s=s, r=replc r, p=p, len=len, t=t} + | I.BCOND{cmp, bc, r1, r2, t, f, n, nop} => + I.BCOND{cmp=cmp, bc=bc, r1=replc r1, r2=replc r2, t=t, f=f,n=n, nop=nop} + | I.BCONDI{cmpi, bc, i, r2, t, f, n, nop} => + I.BCONDI{cmpi=cmpi, bc=bc, i=i, r2=replc r2, t=t, f=f,n=n, nop=nop} + | I.BB{bc, r, p, t, f, n, nop} => + I.BB{bc=bc, r=replc r, p=p, t=t, f=f,n=n, nop=nop} + | I.BV{x, b, labs, n} => I.BV{x=replc x, b=replc b, labs=labs,n=n} + | I.BE{b, d, sr, labs, n} => I.BE{b=replc b, d=d, sr=sr, labs=labs, n=n} + | I.BLR{x, t, labs, n} => I.BLR{x=replc x, t=t, labs=labs,n=n} + | I.BLE{b, d, sr, t, defs, uses, cutsTo, mem} => + I.BLE{b=replc b, d=d, sr=sr, t=t, defs=defs, + uses=CS.map {from=rs,to=rt} uses, cutsTo=cutsTo, mem=mem} + | I.BL{lab, t, defs, uses, mem, cutsTo, n} => + I.BL{lab=lab, t=t, defs=defs, cutsTo=cutsTo, + uses=CS.map {from=rs,to=rt} uses, mem=mem, n=n} + | I.LDO{b, t, i} => I.LDO{b=replc b, t=t, i=i} + | I.MTCTL{r, t} => I.MTCTL{r=replc r, t=t} + | I.FSTORE{fst, b, d, r, mem} => + I.FSTORE{fst=fst, b=replc b, d=d, r=r, mem=mem} + | I.FSTOREX{fstx, b, x, r, mem} => + I.FSTOREX{fstx=fstx, b=replc b, x=replc x, r=r, mem=mem} + | I.FLOAD{fl, b, d, t, mem} => + I.FLOAD{fl=fl, b=replc b, d=d, t=t, mem=mem} + | I.FLOADX{flx, b, x, t, mem} => + I.FLOADX{flx=flx, b=replc b, x=replc x, t=t, mem=mem} + | _ => instr + (*esac*)) + in + case instr + of (I.ANNOTATION{i, ...}) => rewriteUse(i, rs, rt) + | I.LIVE{regs, spilled} => + I.LIVE{regs=C.addReg(rt, C.rmvReg(rs, regs)), spilled=spilled} + | I.INSTR(i) => I.INSTR(hppaUse(i)) + | I.COPY{k as CB.GP, sz, dst, src, tmp} => + I.COPY{k=k, sz=sz, dst=dst, src=map replc src, tmp=replcEA tmp} + | _ => error "rewriteUse" + end + + fun rewriteDef(instr, rs, rt) = let + fun replc r = if CB.sameColor(r,rs) then rt else r + fun ea (SOME(I.Direct r)) = SOME(I.Direct (replc r)) + | ea x = x + fun hppaDef(instr) = + (case instr + of I.ARITH{a, r1, r2, t} => I.ARITH{a=a, r1=r1, r2=r2, t=replc t} + | I.ARITHI{ai, i, r, t} => I.ARITHI{ai=ai, i=i, r=r, t=replc t} + | I.LOAD{l, r1, r2, t, mem} => I.LOAD{l=l,r1=r1,r2=r2,t=replc t,mem=mem} + | I.LOADI{li, i, r, t, mem} => I.LOADI{li=li,i=i,r=r,t=replc t,mem=mem} + | I.COMCLR_LDO{cc, r1, r2, b, i, t1, t2} => + if CB.registerId t1 <> 0 andalso not(CB.sameColor(t1,t2)) + andalso CB.sameColor(t2,rs) then + error "rewriteDef: COMCLR_LDO" + else + I.COMCLR_LDO{cc=cc, r1=r1, r2=r2, b=b, i=i, t1=replc t1, t2=replc t2} + | I.COMICLR_LDO{cc, i1, r2, b, i2, t1, t2} => + if CB.registerId t1 <> 0 andalso not(CB.sameColor(t1,t2)) + andalso CB.sameColor(t2,rs) then + error "rewriteDef: COMICLR_LDO" + else + I.COMICLR_LDO{cc=cc, i1=i1, r2=r2, b=b, i2=i2, + t1=replc t1, t2=replc t2} + | I.SHIFTV{sv, r, len, t} => I.SHIFTV{sv=sv, r=r, len=len, t=replc t} + | I.SHIFT{s, r, p, len, t} => I.SHIFT{s=s, r=r, p=p, len=len, t=replc t} + | I.BLR{x, t, labs, n} => I.BLR{x=x, t=replc t, labs=labs,n=n} + | I.BLE{d, b, sr, t, defs, uses, cutsTo, mem} => + I.BLE{d=d, b=b, sr=sr, t=replc t, + defs=CS.map {from=rs,to=rt} defs, uses=uses, + cutsTo=cutsTo, mem=mem} + | I.BL{lab, t, defs, uses, mem, cutsTo, n} => + I.BL{lab=lab, t=replc t, cutsTo=cutsTo, + defs=CS.map {from=rs,to=rt} defs, uses=uses, mem=mem, n=n} + | I.LDIL{i, t} => I.LDIL{i=i, t=replc t} + | I.LDO{i, b, t} => I.LDO{i=i, b=b, t=replc t} + | _ => instr + (*esac*)) + in + case instr + of (I.ANNOTATION{i, ...}) => rewriteDef(i, rs, rt) + | I.KILL{regs, spilled} => + I.KILL{regs=C.addReg(rt, C.rmvReg(rs, regs)), spilled=spilled} + | I.INSTR(i) => I.INSTR(hppaDef(i)) + | I.COPY{k as CB.GP, sz, dst, src, tmp} => + I.COPY{k=k, sz=sz, dst=map replc dst, src=src, tmp=ea tmp} + | _ => error "rewriteDef" + end + + + fun frewriteUse(instr, fs, ft) = let + fun replc r = if CB.sameColor(r,fs) then ft else r + fun hppaUse(instr) = + (case instr + of I.FSTORE{fst, b, d, r, mem} => + I.FSTORE{fst=fst, b=b, d=d, r=replc r, mem=mem} + | I.FSTOREX{fstx, b, x, r, mem} => + I.FSTOREX{fstx=fstx, b=b, x=x, r=replc r, mem=mem} + | I.FARITH{fa, r1, r2, t} => + I.FARITH{fa=fa, r1=replc r1, r2=replc r2, t=t} + | I.FUNARY{fu, f, t} => I.FUNARY{fu=fu, f=replc f, t=t} + | I.FCNV{fcnv, f, t} => I.FCNV{fcnv=fcnv, f=replc f, t=t} + | I.FBRANCH{cc,fmt,f1,f2,t,f,n,long} => + I.FBRANCH{cc=cc,fmt=fmt,f1=replc f1,f2=replc f2,t=t,f=f,n=n,long=long} + | I.BLE{d, b, sr, t, defs, uses, cutsTo, mem} => + I.BLE{d=d, b=b, sr=sr, t=replc t, defs=defs, + uses=CS.map {from=fs,to=ft} uses, cutsTo=cutsTo, mem=mem} + | I.BL{lab, t, defs, uses, mem, cutsTo, n} => + I.BL{lab=lab, t=t, defs=defs, cutsTo=cutsTo, + uses=CS.map {from=fs,to=ft} uses, mem=mem, n=n} + | _ => instr + (*esac*)) + in + case instr + of (I.ANNOTATION{i, ...}) => frewriteUse(i, fs, ft) + | I.INSTR(i) => I.INSTR(hppaUse(i)) + | I.LIVE{regs, spilled} => + I.LIVE{regs=C.addFreg(ft, C.rmvFreg(fs, regs)), spilled=spilled} + | I.COPY{k as CB.FP, sz, dst, src, tmp} => + I.COPY{k=k, sz=sz, dst=dst, src=map replc src, tmp=tmp} + + | _ => error "frewriteUse" + end + + fun frewriteDef(instr, fs, ft) = let + fun replc r = if CB.sameColor(r,fs) then ft else r + fun ea (SOME(I.FDirect f)) = SOME(I.FDirect(replc f)) + | ea x = x + fun hppaDef(instr) = + (case instr + of I.FLOAD{fl, b, d, t, mem} => + I.FLOAD{fl=fl, b=b, d=d, t=replc t,mem=mem} + | I.FLOADX{flx, b, x, t, mem} => + I.FLOADX{flx=flx, b=b, x=x, t=replc t, mem=mem} + | I.FARITH {fa, r1, r2, t} => I.FARITH{fa=fa, r1=r1, r2=r2, t=replc t} + | I.FUNARY{fu, f, t} => I.FUNARY{fu=fu, f=f, t=replc t} + | I.FCNV{fcnv, f, t} => I.FCNV{fcnv=fcnv, f=f, t=replc t} + | I.BLE{d, b, sr, t, defs, uses, cutsTo, mem} => + I.BLE{d=d, b=b, sr=sr, t=replc t, cutsTo=cutsTo, + defs=CS.map {from=fs,to=ft} defs, uses=uses, mem=mem} + | I.BL{lab, t, defs, uses, mem, cutsTo, n} => + I.BL{lab=lab, t=t, cutsTo=cutsTo, + defs=CS.map {from=fs,to=ft} defs, uses=uses, mem=mem, n=n} + | _ => instr + (*esac*)) + in + case instr + of (I.ANNOTATION{i, ...}) => frewriteDef(i, fs, ft) + | I.INSTR(i) => I.INSTR(hppaDef(i)) + | I.KILL{regs, spilled} => + I.KILL{regs=C.addFreg(ft, C.rmvFreg(fs, regs)), spilled=spilled} + | I.COPY{k as CB.FP, sz, dst, src, tmp} => + I.COPY{k=k, sz=sz, dst=map replc dst, src=src, tmp=ea tmp} + | _ => error "frewriteDef" + end +end + diff --git a/MLRISC/hppa/ra/hppaSpillInstr.sml b/MLRISC/hppa/ra/hppaSpillInstr.sml new file mode 100644 index 0000000..cda5adb --- /dev/null +++ b/MLRISC/hppa/ra/hppaSpillInstr.sml @@ -0,0 +1,84 @@ +(* hppaSpillInstr.sml + * + * COPYRIGHT (c) 2002 Bell Labs, Lucent Technologies + * + * Hppa instructions to emit when spilling an instruction. + *) + +functor HppaSpillInstr(Instr : HPPAINSTR) : ARCH_SPILL_INSTR = struct + structure I = Instr + structure C = I.C + structure Rewrite = HppaRewrite(I) + structure CB=CellsBasis + + fun error msg = MLRiscErrorMsg.error ("HppaSpillInstr", msg) + + val tmpR = I.C.asmTmpR + fun storeToEA CB.GP (reg, I.Displace{base, disp, mem}) = + [I.store{st=I.STW, b=base, d=I.LabExp(disp, I.F), r=reg, mem=mem}] + | storeToEA CB.FP (reg, I.Displace{base, disp, mem}) = + [I.ldil{i=I.HILabExp(disp, I.F), t=tmpR}, + I.ldo{i=I.LOLabExp(disp, I.F), b=tmpR, t=tmpR}, + I.fstorex{fstx=I.FSTDX, b=base, x=tmpR, r=reg, mem=mem}] + | storeToEA _ _ = error "storeToEA" + + fun loadFromEA CB.GP (reg, I.Displace{base, disp, mem}) = + [I.loadi{li=I.LDW, r=base, i=I.LabExp(disp,I.F), t=reg, mem=mem}] + | loadFromEA CB.FP (reg, I.Displace{base, disp, mem}) = + [I.ldil{i=I.HILabExp(disp, I.F), t=tmpR}, + I.ldo{i=I.LOLabExp(disp, I.F), b=tmpR, t=tmpR}, + I.floadx{flx=I.FLDDX, b=base, x=tmpR, t=reg, mem=mem}] + | loadFromEA _ _ = error "loadFromEA" + + fun spillToEA ck reg_ea = + {code=storeToEA ck reg_ea, proh=[], newReg=NONE} + fun reloadFromEA ck reg_ea = + {code=loadFromEA ck reg_ea, proh=[], newReg=NONE} + + (* spill a register to spillLoc *) + fun spillR (instr, reg, ea) = let + val newR = C.newReg() + val instr' = Rewrite.rewriteDef(instr, reg, newR) + in + {code=instr' :: storeToEA CB.GP (newR, ea), + proh=[newR], + newReg=SOME newR} + end + + fun spillF (instr, reg, ea) = let + val newR = C.newFreg() + val instr' = Rewrite.frewriteDef(instr, reg, newR) + in + {code=instr' :: storeToEA CB.FP (newR, ea), + proh=[newR], + newReg=SOME newR} + end + + (* reload a register from spillLoc *) + fun reloadR(instr, reg, ea) = let + val newR = C.newReg() + val instr' = Rewrite.rewriteUse(instr, reg, newR) + in + {code= loadFromEA CB.GP (newR, ea) @ [instr'], + proh=[newR], + newReg=SOME newR} + end + + fun reloadF(instr, reg, ea) = let + val newR = C.newFreg() + val instr' = Rewrite.frewriteUse(instr, reg, newR) + in + {code=loadFromEA CB.FP (newR, ea) @ [instr'], + proh=[newR], + newReg=SOME newR} + end + + fun spill CellsBasis.GP = spillR + | spill CellsBasis.FP = spillF + | spill _ = error "spill" + + fun reload CellsBasis.GP = reloadR + | reload CellsBasis.FP = reloadF + | reload _ = error "reload" +end + diff --git a/MLRISC/instructions/.cm/GUID/peephole.sig b/MLRISC/instructions/.cm/GUID/peephole.sig new file mode 100644 index 0000000..6e8cd1b --- /dev/null +++ b/MLRISC/instructions/.cm/GUID/peephole.sig @@ -0,0 +1 @@ +guid-$OTHER-MLRISC/(Peephole.cm):../instructions/peephole.sig-1714016098.040 diff --git a/MLRISC/instructions/.cm/SKEL/peephole.sig b/MLRISC/instructions/.cm/SKEL/peephole.sig new file mode 100644 index 0000000..c3395a3 --- /dev/null +++ b/MLRISC/instructions/.cm/SKEL/peephole.sig @@ -0,0 +1,2 @@ +Skeleton 5 +ac"PEEPHOLE"h1ad"I"gp1c"INSTRUCTIONS" \ No newline at end of file diff --git a/MLRISC/instructions/.cm/amd64-unix/peephole.sig b/MLRISC/instructions/.cm/amd64-unix/peephole.sig new file mode 100644 index 0000000..a3349fb Binary files /dev/null and b/MLRISC/instructions/.cm/amd64-unix/peephole.sig differ diff --git a/MLRISC/instructions/block-names.sig b/MLRISC/instructions/block-names.sig new file mode 100644 index 0000000..9447e75 --- /dev/null +++ b/MLRISC/instructions/block-names.sig @@ -0,0 +1,10 @@ +(* BLOCK_NAME + * + * The type name, is used to label basic blocks. + *) +signature BLOCK_NAMES = sig + type name + val default : name + val toString : name -> string + val == : name * name -> bool +end diff --git a/MLRISC/instructions/cells-basis.sig b/MLRISC/instructions/cells-basis.sig new file mode 100644 index 0000000..ae8c6be --- /dev/null +++ b/MLRISC/instructions/cells-basis.sig @@ -0,0 +1,218 @@ +(* + * This updated signature describes the abstractions on ``cells'', which + * denote storage cells in the machine architecture. + * + * Allen Leung (12/2/00) + *) +signature CELLS_BASIS = +sig + type sz = int (* width in bits *) + type cell_id = int (* unique cell identifier *) + type register_id = int (* register id *) + type register_num = int (* register number *) + (* Note: register_id and register_num should probably be made into + * different datatypes with different tags, but FLINT currently boxes + * such objects. + *) + + datatype cellkindInfo = INFO of {name:string, nickname:string} + datatype cellkindDesc = + DESC of + {kind : cellkind, + counter : int ref, + dedicated : int ref, + (* It is sometimes desirable to allocate dedicated + * pseudo registers that will get rewritten to something else, + * e.g., the virtual frame pointer. + * Since these registers are never assigned a register by + * the register allocator, a limited number of these kinds + * of registers may be generated. + *) + low : int, + high : int, + toString : register_id -> string, + toStringWithSize : register_id * sz -> string, + defaultValues : (register_id * int) list, + physicalRegs : cell Array.array ref, + zeroReg : register_id option + } + + + (* Cellkind denotes the types of storage cells. + * This definition is further augumented by architecture specific + * cells descriptions. Type cellkind is an equality type. + *) + and cellkind = + GP (* general purpose register *) + | FP (* floating point register *) + | CC (* condition code register *) + + | MEM (* memory *) + | CTRL (* control dependence *) + + | MISC_KIND of cellkindInfo ref (* client defined *) + + + (* + * A cell is a stateful object reprensenting a storage cell in a + * processor. Cells are partitioned into their kinds, such as + * GP (general purpose, i.e., integer, registers), * FP + * (floating point registers) etc. Each cell has an unique cell_id + * that determines its identity. Its attributes include + * + * 1. its color, and + * 2. other client defined properties, + * which is represented as a property list of annotations. + * + * Note that cell_id and color are two distinct concepts; for example, + * two different cells may have the same color. + * + * Type cell is not an equality type. We provide the function + * sameCell for testing for object identity, and the function + * sameColor for testing for color identity. For most things, + * sameColor is the right function to use. + *) + and cell = + CELL of {id : cell_id, + col : cellColor ref, + desc : cellkindDesc, + an : Annotations.annotations ref + } + and cellColor = + MACHINE of register_id + | PSEUDO + | ALIASED of cell + | SPILLED + + (* + * Basic functions on cellkinds + *) + val cellkindToString : cellkind -> string (* name *) + val cellkindToNickname : cellkind -> string (* abbreviation *) + val newCellKind : {name:string,nickname:string} -> cellkind + + (* + * Basic functions on cells. + * All functions marked with +++ implicitly chases aliases. + * + * Function register_id returns the current color of a node. + * The color of a pseudo register is the same as its cell_id. + * A spilled node is given a color of ~1, so all spilled nodes have + * the same color. + * + * NOTE: distinction between registerId and registerNum: + * Function register_id returns register_id. + * Physical registers in distinct + * cell classes are given disjoint register_ids. So for example, + * the register id for r0 and f0 in the Alpha are different. + * + * The function, registerNum, on the other hand, returns a + * register number of a cell that starts from 0 for physical registers. + * So registerNum r0 = registerNum f0 = 0. It behaves the same + * as registerId in other cases. + * + * The function physicalRegisterNum is the same as registerNum, + * except that it is an error to call it on a pseudo or spilled cell. + * As a rule, use registerId whenever possible. Function registerNum + * is used only if you have to deal with machine encoding. + *) + val cellId : cell -> cell_id (* return cell id *) + val cellkind : cell -> cellkind (* return cellkind *) + val isConst : cell -> bool + val annotations : cell -> Annotations.annotations ref + val sameCell : cell * cell -> bool (* object identity *) + val sameKind : cell * cell -> bool (* same cellkind? *) + val chase : cell -> cell (* chase aliases +++ *) + val sameAliasedCell : cell * cell -> bool (* chase aliases +++ *) + val hashCell : cell -> word + val registerId : cell -> register_id (* +++ *) + val registerNum : cell -> register_num (* +++ *) + val physicalRegisterNum : cell -> int (* +++ *) + val sameColor : cell * cell -> bool (* color identity +++ *) + val compareColor : cell * cell -> order (* +++ *) + val toString : cell -> string (* pretty print a cell +++ *) + val toStringWithSize : cell * sz -> string (* +++ *) + + (* Set the color of the 'from' cell to be the same as + * the 'to' cell. The 'from' cell MUST be a pseudo register, + * and cannot be of kind CONST. + *) + val setAlias : {from: cell, to: cell} -> unit (* +++ *) + + (* + * The following abstraction represents a set of cells + * indexed by colors. When two or more cells with the same color + * exists, we arbitrarily choose a representative. + * WARNING: while using sorted_cells it is important not to + * update the colors in the elements, or you'll get wrong results. + *) + structure SortedCells : + sig + type sorted_cells + val empty : sorted_cells + val enter : cell * sorted_cells -> sorted_cells + val rmv : cell * sorted_cells -> sorted_cells + val member : cell * sorted_cells -> bool + val eq : sorted_cells * sorted_cells -> bool + val notEq : sorted_cells * sorted_cells -> bool + val uniq : cell list -> sorted_cells + val difference : sorted_cells * sorted_cells -> sorted_cells + val intersect : sorted_cells * sorted_cells -> sorted_cells + val union : sorted_cells * sorted_cells -> sorted_cells + val return : sorted_cells -> cell list + val isEmpty : sorted_cells -> bool + val emptyIntersection : sorted_cells * sorted_cells -> bool + val nonEmptyIntersection : sorted_cells * sorted_cells -> bool + end + + (* + * Hash table indexed by cell id. + * IMPORTANT: this table is not indexed by color! + *) + structure HashTable : MONO_HASH_TABLE where type Key.hash_key = cell + + (* + * Hash table indexed by cell color. + * IMPORTANT: this table is indexed by color! + * ALSO: DO NOT change the colors of the cells while using this table! + *) + structure ColorTable : MONO_HASH_TABLE where type Key.hash_key = cell + + (* + * Cell set represents a map from cellkind to sorted_cells. + *) + structure CellSet : + sig + type cellset + (* cellset functions *) + val empty : cellset + val add : cell * cellset -> cellset + val rmv : cell * cellset -> cellset + val get : cellkindDesc -> cellset -> cell list + val update : cellkindDesc -> cellset * cell list -> cellset + val map : {from:cell, to:cell} -> cellset -> cellset + + (* convert cellset into a list of cells *) + val toCellList : cellset -> cell list + + (* pretty printing *) + val toString : cellset -> string + end + + (* + * These annotations adds extra definitions and uses to an instruction + *) + exception DEF_USE of {cellkind:cellkind, defs:cell list, uses:cell list} + val DEFUSE : {cellkind:cellkind, defs:cell list, uses:cell list} + Annotations.property + + (* Internal use for alias analysis; don't use! *) + val mem : register_id -> cell + + (* Internal use only! *) + val show : cellkindDesc -> register_id -> string + val showWithSize : cellkindDesc -> register_id * sz -> string + + val array0 : cell Array.array +end + diff --git a/MLRISC/instructions/cells-basis.sml b/MLRISC/instructions/cells-basis.sml new file mode 100644 index 0000000..c77ceff --- /dev/null +++ b/MLRISC/instructions/cells-basis.sml @@ -0,0 +1,380 @@ +(* cells-basis.sml + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Description of cell and other updatable cells. + * + * -- Allen. + *) + +structure CellsBasis : CELLS_BASIS = +struct + + datatype cellkindInfo = INFO of {name:string, nickname:string} + + type sz = int (* width in bits *) + type cell_id = int (* unique cell identifier *) + type register_id = int (* encoding of phsyical registers *) + type register_num = int + + (* Cellkind denote the types of storage cells. + * This definition is further augumented by architecture specific + * cells descriptions. Type cellkind is an equality type. + *) + datatype cellkind = + GP (* general purpose register *) + | FP (* floating point register *) + | CC (* condition code register *) + + | MEM (* memory *) + | CTRL (* control dependence *) + + | MISC_KIND of cellkindInfo ref (* client defined *) + + (* This data structure is automatically generated by MDGen to + * describe a cellkind. + *) + datatype cellkindDesc = + DESC of + {kind : cellkind, + counter : int ref, + dedicated : int ref, + (* It is sometimes desirable to allocate dedicated + * pseudo registers that will get rewritten to something else, + * e.g., the virtual frame pointer. + * Since these registers are never assigned a register by + * the register allocator, a limited number of these kinds + * of registers may be generated. + *) + low : int, + high : int, + toString : register_id -> string, + toStringWithSize : register_id * sz -> string, + defaultValues : (register_id * int) list, + physicalRegs : cell Array.array ref, + zeroReg : register_id option + } + + and cell = + CELL of {id : cell_id, + col : cellColor ref, + desc : cellkindDesc, + an : Annotations.annotations ref + } + + and cellColor = + MACHINE of register_id + | PSEUDO + | ALIASED of cell + | SPILLED + + val array0 = Array.tabulate(0, fn _ => raise Match) : cell Array.array + + fun error msg = MLRiscErrorMsg.error ("CellBasis", msg) + + val i2s = Int.toString + + fun cellkindToString GP = "GP" + | cellkindToString FP = "FP" + | cellkindToString CC = "CC" + | cellkindToString MEM = "MEM" + | cellkindToString CTRL = "CTRL" + | cellkindToString (MISC_KIND(ref(INFO{name, ...}))) = name + + fun cellkindToNickname GP = "r" + | cellkindToNickname FP = "f" + | cellkindToNickname CC = "cc" + | cellkindToNickname MEM = "m" + | cellkindToNickname CTRL = "ctrl" + | cellkindToNickname (MISC_KIND(ref(INFO{nickname, ...}))) = nickname + + fun newCellKind{name="GP", ...} = GP + | newCellKind{name="FP", ...} = FP + | newCellKind{name="CC", ...} = CC + | newCellKind{name="MEM", ...} = MEM + | newCellKind{name="CTRL", ...} = CTRL + | newCellKind{name, nickname} = + MISC_KIND(ref(INFO{name=name, nickname=nickname})) + + fun chase(CELL{col=ref(ALIASED c), ...}) = chase(c) + | chase c = c + + fun registerId(CELL{col=ref(ALIASED c), ...}) = registerId(c) + | registerId(CELL{col=ref(MACHINE r), ...}) = r + | registerId(CELL{col=ref(SPILLED), ...}) = ~1 + | registerId(CELL{col=ref(PSEUDO), id, ...}) = id + + fun registerNum(CELL{col=ref(ALIASED c), ...}) = registerNum(c) + | registerNum(CELL{col=ref(MACHINE r), desc=DESC{low,...}, ...}) = r-low + | registerNum(CELL{col=ref SPILLED, id, ...}) = ~1 + | registerNum(CELL{col=ref PSEUDO, id, ...}) = id + + fun physicalRegisterNum(CELL{col=ref(ALIASED c), ...}) = + physicalRegisterNum(c) + | physicalRegisterNum(CELL{col=ref(MACHINE r), + desc=DESC{low,...}, ...}) = r-low + | physicalRegisterNum(CELL{col=ref SPILLED, id, ...}) = + error("physicalRegisterNum: SPILLED: "^i2s id) + | physicalRegisterNum(CELL{col=ref PSEUDO, id, ...}) = + error("physicalRegisterNum: PSEUDO: "^i2s id) + + + fun cellId(CELL{id, ...}) = id + + fun hashCell(CELL{id, ...}) = Word.fromInt id + fun hashColor c = Word.fromInt(registerId c) + fun desc(CELL{desc, ...}) = desc + fun sameCell(c1, c2) = cellId(c1) = cellId(c2) + fun sameDesc(DESC{counter=x, ...}, DESC{counter=y, ...}) = x=y + fun sameKind(c1, c2) = sameDesc(desc c1,desc c2) + fun sameAliasedCell(c1, c2) = sameCell(chase c1, chase c2) + fun sameColor(c1, c2) = registerId c1 = registerId c2 + fun compareColor(c1, c2) = Int.compare(registerId c1, registerId c2) + fun cellkind(CELL{desc=DESC{kind, ...}, ...}) = kind + fun annotations(CELL{an, ...}) = an + + fun setAlias{from, to} = + let val CELL{id, col, desc=DESC{kind, ...}, ...} = chase from + val to as CELL{col=colTo, ...} = chase to + in if col = colTo then () (* prevent self-loops *) + else if id < 0 then error "setAlias: constant" + else case (!col, kind) + of (PSEUDO, _) => col := ALIASED to + | _ => error "setAlias: non-pseudo" + end + + fun isConst(CELL{id, ...}) = id < 0 + + (* Pretty printing of cells *) + fun toString(CELL{col=ref(ALIASED c), ...}) = toString(c) + | toString(c as CELL{desc=DESC{toString, ...}, ...}) = + toString(registerNum c) + + fun toStringWithSize(c as CELL{desc=DESC{toStringWithSize,...},...},sz) = + toStringWithSize(registerNum c,sz) + + fun cnv(r, low, high) = if low <= r andalso r <= high then r - low else r + fun show(DESC{toString, low, high, ...}) r = toString(cnv(r,low,high)) + fun showWithSize(DESC{toStringWithSize, low, high, ...}) (r, sz) = + toStringWithSize(cnv(r,low,high),sz) + + structure SortedCells = struct + type sorted_cells = cell list + + val empty = [] + + val size = List.length + + fun enter(cell, l) = let + val c = registerId cell + fun f [] = [cell] + | f (l as (h::t)) = + let val ch = registerId h + in if c < ch then cell::l else if c > ch then h::f t else l + end + in f l + end + + fun member(x, l) = + let val x = registerId x + in List.exists (fn y => registerId y = x) l + end + + fun rmv(cell, l) = let + val c = registerId cell + fun f [] = [] + | f (l as (h::t)) = + let val ch = registerId h + in if c = ch then t + else if c < ch then l + else h::f l + end + in f l + end + + fun uniq (cells) = List.foldl enter [] (map chase cells) + + fun difference([], _) = [] + | difference(l, []) = l + | difference(l1 as x::xs, l2 as y::ys) = + let val cx = registerId x and cy = registerId y + in if cx = cy then difference(xs,ys) + else if cx < cy then x::difference(xs,l2) + else difference(l1,ys) + end + + fun union(a, []) = a + | union([], a) = a + | union(l1 as x::xs, l2 as y::ys) = + let val cx = registerId x and cy = registerId y + in if cx = cy then x::union(xs,ys) + else if cx < cy then x::union(xs,l2) + else y::union(l1,ys) + end + + fun intersect(a, []) = [] + | intersect([], a) = [] + | intersect(l1 as x::xs, l2 as y::ys) = + let val cx = registerId x and cy = registerId y + in if cx = cy then x::intersect(xs,ys) + else if cx < cy then intersect(xs,l2) + else intersect(l1,ys) + end + + fun notEq([], []) = false + | notEq([], l) = true + | notEq(_, []) = true + | notEq(x::l1, y::l2) = registerId x <> registerId y orelse notEq(l1,l2) + + fun eq([], []) = true + | eq(x::l1, y::l2) = registerId x = registerId y orelse eq(l1,l2) + | eq(_, _) = false + + fun return cs = cs + + fun isEmpty [] = true + | isEmpty _ = false + + fun emptyIntersection(_, []) = true + | emptyIntersection([], _) = true + | emptyIntersection(l1 as x::xs, l2 as y::ys) = + let val cx = registerId x and cy = registerId y + in if cx = cy then false + else if cx < cy then emptyIntersection(xs,l2) + else emptyIntersection(l1,ys) + end + + fun nonEmptyIntersection(_, []) = false + | nonEmptyIntersection([], _) = false + | nonEmptyIntersection(l1 as x::xs, l2 as y::ys) = + let val cx = registerId x and cy = registerId y + in if cx = cy then true + else if cx < cy then nonEmptyIntersection(xs,l2) + else nonEmptyIntersection(l1,ys) + end + end + + structure HashTable = + HashTableFn(type hash_key = cell + val hashVal = hashCell + val sameKey = sameCell) + + structure ColorTable = + HashTableFn(type hash_key = cell + val hashVal = hashColor + val sameKey = sameColor) + + structure CellSet = + struct + type cellset = (cellkindDesc * cell list) list + val empty = [] + + fun same(DESC{counter=c1,...}, DESC{counter=c2,...}) = c1=c2 + + fun descOf (CELL{desc, ...}) = desc + + fun add (r, cellset:cellset) = + let val k = descOf r + fun loop [] = [(k,[r])] + | loop((x as (k',s))::cellset) = + if same(k,k') then (k',r::s)::cellset + else x::loop cellset + in loop cellset end + + fun rmv (r, cellset:cellset) = + let val k = descOf r + val c = registerId r + fun filter [] = [] + | filter(r::rs) = if registerId r = c then filter rs + else r::filter rs + fun loop [] = [] + | loop((x as (k',s))::cellset) = + if same(k,k') then (k',filter s)::cellset else x::loop cellset + in loop cellset end + + fun get (k : cellkindDesc) = let + fun loop ([] : cellset) = [] + | loop ((x as (k',s))::cellset) = + if same(k, k') then s else loop cellset + in + loop + end + + fun update (k : cellkindDesc) (cellset:cellset, s) = let + fun loop [] = [(k,s)] + | loop((x as (k',_))::cellset) = + if same(k,k') then (k',s)::cellset else x::loop cellset + in + loop cellset + end + + fun map {from,to} (cellset:cellset) = + let val CELL{desc=k,...} = from + val cf = registerId from + fun trans r = if registerId r = cf then to else r + fun loop [] = [] + | loop((x as (k',s))::cellset) = + if same(k, k') then (k',List.map trans s)::cellset + else x::loop cellset + in loop cellset end + + val toCellList : cellset -> cell list = + List.foldr (fn ((_,S),S') => S @ S') [] + + (* Pretty print cellset *) + fun printSet(f,set,S) = + let fun loop([], S) = "}"::S + | loop([x], S) = f(chase x)::"}"::S + | loop(x::xs, S) = f(chase x)::" "::loop(xs, S) + in "{"::loop(set, S) end + + fun toString' cellset = + let fun pr cellset = + let fun loop((DESC{kind, ...},s)::rest, S)= + (case s of + [] => loop(rest, S) + | _ => cellkindToString kind::"=":: + printSet(toString,s," "::loop(rest,S)) + ) + | loop([],S) = S + in String.concat(loop(cellset, [])) + end + in pr cellset end + + val toString = toString' + end (* CellSet *) + + (* + * These annotations specifies definitions and uses + * for a pseudo instruction. + *) + exception DEF_USE of {cellkind:cellkind, defs:cell list, uses:cell list} + val DEFUSE = Annotations.new' + {create=DEF_USE, + get=fn DEF_USE x => x | e => raise e, + toString=fn{cellkind,defs,uses} => + "DEFUSE"^cellkindToString cellkind + } + (* + * Hack for generating memory aliasing cells + *) + val memDesc = + DESC + {kind = MEM, + counter = ref 0, + dedicated = ref 0, + low = 0, + high = ~1, + toString = fn m => "m"^i2s m, + toStringWithSize = fn (m, _) => "m"^i2s m, + defaultValues = [], + physicalRegs = ref array0, + zeroReg = NONE + } + + fun mem id = CELL{id=id, an=ref [], desc=memDesc, col=ref(MACHINE id)} + + val array0 = Array.tabulate(0, fn _ => raise Match) : cell Array.array +end + diff --git a/MLRISC/instructions/cells.sig b/MLRISC/instructions/cells.sig new file mode 100644 index 0000000..befd1a6 --- /dev/null +++ b/MLRISC/instructions/cells.sig @@ -0,0 +1,124 @@ +(* + * This updated signature describes the abstractions on ``cells'', which + * denote storage cells in the machine architecture. + * + * Allen Leung (12/2/00) + *) + + +(* + * Things that are architecture specific. + *) +signature CELLS = sig + + val cellkinds : CellsBasis.cellkind list + (* list of all the cellkinds *) + + val firstPseudo : CellsBasis.cell_id + (* first pseudo register *) + + val cellkindDesc : CellsBasis.cellkind -> CellsBasis.cellkindDesc + (* find descriptor *) + + val cellRange : CellsBasis.cellkind -> {low:int, high:int} + (* given a cellkind returns its encoding range *) + + val Reg : CellsBasis.cellkind -> (CellsBasis.register_num -> CellsBasis.cell) + (* Returns the nth physical register of the given kind, + * raises Cells if there are no physical register of the given number. + * Also raises Cells if the given number if outside of the range. + * NOTE: this function returns the same cell for the + * same argument every time. See also the function cloneCell below + *) + + val Regs : + CellsBasis.cellkind -> + {from : CellsBasis.register_num, + to : CellsBasis.register_num, + step : int + } -> + CellsBasis.cell list + (* return a list of cells *) + + val Cell : CellsBasis.cellkind -> (CellsBasis.register_id -> CellsBasis.cell) + (* Same as Reg but we take the id instead. + * So, registerNum(Reg k r) = r, and + * registerId(Cell k id) = id + *) + + val GPReg : int -> CellsBasis.cell (* abbreviation for Reg GP *) + val FPReg : int -> CellsBasis.cell (* abbreviation for Reg FP *) + + (* + * Generate a new cell for a virtual register. The new cell + * is a pseudo register that is distinct from any other registers. + * IMPORTANT: if you are using newCell, it is important to + * partially apply it first to get a function. Then uses this + * function generate new cells. The first application takes + * time. + *) + val newCell : CellsBasis.cellkind -> ('a -> CellsBasis.cell) + val newReg : 'a -> CellsBasis.cell (* abbreviation for newCell GP *) + val newFreg : 'a -> CellsBasis.cell (* abbreviation for newCell FP *) + + val newDedicatedCell : CellsBasis.cellkind -> ('a -> CellsBasis.cell) + + (* lookup the number of virtual registers in a CellsBasis.cellkind *) + val numCell : CellsBasis.cellkind -> (unit -> int) + + (* the next virtual register name *) + val maxCell : unit -> CellsBasis.cell_id + + (* Given a cell c, create a new pseudo register that has the same + * cellkind as c, and a new property list initialized + * with the contents of c's properity list. + * Note: the numCell kind is NOT updated! + *) + val newVar : CellsBasis.cell -> CellsBasis.cell + + (* This is the same as above, except that if the original + * cell is colored, then the new cell has the same color. + * Note that it is possible to have two cells (or more) with + * the same physical color. In these cases they can be used + * to denote the same register, but they have different identities, + * and different property lists. This may be useful for + * representing the same register used in different situations. + * See the function Reg above. + *) + val cloneCell : CellsBasis.cell -> CellsBasis.cell + + (* Reset all counters. *) + val reset : unit -> unit + + (* Abbreviations for cellsets *) + type cellset = CellsBasis.CellSet.cellset + + val empty : cellset + val getReg : cellset -> CellsBasis.cell list + val addReg : CellsBasis.cell * cellset -> cellset + val rmvReg : CellsBasis.cell * cellset -> cellset + val getFreg : cellset -> CellsBasis.cell list + val addFreg : CellsBasis.cell * cellset -> cellset + val rmvFreg : CellsBasis.cell * cellset -> cellset + + val getCellsByKind : CellsBasis.cellkind -> cellset -> CellsBasis.cell list + val updateCellsByKind : CellsBasis.cellkind + -> cellset * CellsBasis.cell list + -> cellset + + (* Return a register that is always zero on the architecture, + * if one exists. IMPORTANT: each call returns the same cell. + * See also cloneCell above. + *) + val zeroReg : CellsBasis.cellkind -> CellsBasis.cell option + + val defaultValues : CellsBasis.cellkind -> (CellsBasis.register_id * int) list + + val stackptrR : CellsBasis.cell (* stack pointer register *) + val asmTmpR : CellsBasis.cell (* assembly temporary *) + val fasmTmp : CellsBasis.cell (* floating point temporary *) + val cellSize : int +end + + + diff --git a/MLRISC/instructions/cells.sml b/MLRISC/instructions/cells.sml new file mode 100644 index 0000000..a810466 --- /dev/null +++ b/MLRISC/instructions/cells.sml @@ -0,0 +1,173 @@ +(* + * Description of cell and other updatable cells. + * + * -- Allen. + *) + +(* + * This functor is applied to create the cells structure for an architecture + *) +functor Cells + (exception Cells + val firstPseudo : int + val cellKindDescs : (CellsBasis.cellkind * CellsBasis.cellkindDesc) list + val cellSize : int + ) : CELLS = +struct + + open CellsBasis + + exception Cells = Cells + + val i2s = Int.toString + + fun error msg = MLRiscErrorMsg.error(exnName Cells, msg) + + val cellkinds = map (fn (kind,_) => kind) cellKindDescs + val firstPseudo = firstPseudo + val maxDedicatedCells = 256 + val firstName = firstPseudo + maxDedicatedCells + val name = ref firstName +(* val cellCounter = name *) + + val _ = app (fn (_, desc as DESC{physicalRegs, high, low, ...}) => + let val n = high - low + 1 + in if n <= 0 then () + else let val a = Array.tabulate(n, fn nth => + let val reg = nth + low + in CELL{id=reg, col=ref(MACHINE reg), + an=ref [], desc=desc} + end) + in physicalRegs := a + end + end) cellKindDescs + + fun nextName() = let val id = !name in name := !name + 1; id end + + fun desc(k:cellkind) = + let fun loop [] = error("missing info for "^cellkindToString k) + | loop((kind,info)::defs) = + if kind = k then info else loop defs + in loop cellKindDescs end + + val cellkindDesc = desc + + fun cellRange k = + let val DESC{low,high,...} = desc k + in {low=low,high=high} end + + fun Reg k = let + val desc as DESC{low,kind,physicalRegs,...} = desc k + in + fn nth => (Array.sub(!physicalRegs,nth) handle _ => raise Cells) + end + + fun Regs k = + let val Reg = Reg k + fun loop{from, to, step} = + if from > to then [] + else Reg from :: loop{from=from+step, to=to, step=step} + in loop end + + fun Cell k = + let val desc as DESC{low,kind,physicalRegs,...} = desc k + in fn reg => + Array.sub(!physicalRegs,reg - low) handle _ => raise Cells + end + + val GPReg = Reg GP + val FPReg = Reg FP + + (* Counters *) + fun newCell k = + let val desc as DESC{counter,...} = desc k + in fn _ => + let val r = !name + in name := r + 1; + counter := !counter + 1; + CELL{id=r, col=ref PSEUDO, an=ref [], desc=desc} + end + end + + local val desc as DESC{counter, ...} = desc GP + in fun newReg _ = + let val r = !name + in name := r + 1; + counter := !counter + 1; + CELL{id=r, col=ref PSEUDO, an=ref [], desc=desc} + end + end + + local val desc as DESC{counter, ...} = desc FP + in fun newFreg _ = + let val r = !name + in name := r + 1; + counter := !counter + 1; + CELL{id=r, col=ref PSEUDO, an=ref [], desc=desc} + end + end + + fun newDedicatedCell k = + let val desc as DESC{dedicated,...} = desc k + in fn _ => + let val d = !dedicated + in dedicated := d + 1; + if d >= maxDedicatedCells then + error "too many dedicated cells" + else + CELL{id=firstPseudo+d, col=ref PSEUDO, an=ref [], desc=desc} + end + end + + fun newVar (CELL{desc, an, ...}) = + let val r = !name + in name := r + 1; + CELL{id=r, col=ref PSEUDO, an=ref(!an), desc=desc} + end + + fun cloneCell c = + let val CELL{desc, an, col, ...} = chase c + val r = !name + in name := r + 1; + CELL{id=r, col=ref(!col), an=ref(!an), desc=desc} + end + + fun numCell k = let val DESC{counter, ...} = desc k + in fn () => !counter end + + fun maxCell() = !name + + fun reset() = + (app (fn (_,DESC{counter, ...}) => counter := 0) cellKindDescs; + name := firstName + ) + + type cellset = CellSet.cellset + val empty = CellSet.empty + fun getCellsByKind (k : cellkind) = CellSet.get (desc k) + fun updateCellsByKind (k : cellkind) = CellSet.update (desc k) + val getReg = getCellsByKind GP + val getFreg = getCellsByKind FP + val addReg = CellSet.add + val addFreg = CellSet.add + val rmvReg = CellSet.rmv + val rmvFreg = CellSet.rmv + + (* Misc *) + fun zeroReg k = + let val desc as DESC{zeroReg, physicalRegs, low, ...} = desc k + in case zeroReg of + NONE => NONE + | SOME r => SOME(Array.sub(!physicalRegs, r)) + end + + fun defaultValues k = + let val DESC{defaultValues, ...} = desc k + in defaultValues end + + (* dummy values for now; these get redefined for each architecture *) + val stackptrR = GPReg 0 + val asmTmpR = GPReg 0 + val fasmTmp = FPReg 0 + val cellSize = cellSize +end diff --git a/MLRISC/instructions/constant.sig b/MLRISC/instructions/constant.sig new file mode 100644 index 0000000..7069d57 --- /dev/null +++ b/MLRISC/instructions/constant.sig @@ -0,0 +1,14 @@ +(* constant.sml --- constants used to specialize MLRISC and the code generators. + * + * COPYRIGHT (c) 1996 AT&T Bell Laboratories. + * + *) + +signature CONSTANT = sig + type const + + val toString : const -> string + val valueOf : const -> int + val hash : const -> word + val == : const * const -> bool +end diff --git a/MLRISC/instructions/expandCopies.sig b/MLRISC/instructions/expandCopies.sig new file mode 100644 index 0000000..88f57a8 --- /dev/null +++ b/MLRISC/instructions/expandCopies.sig @@ -0,0 +1,5 @@ +signature EXPAND_COPIES = +sig + structure I : INSTRUCTIONS + val expandCopies : I.instruction -> I.instruction list +end diff --git a/MLRISC/instructions/freqProps.sig b/MLRISC/instructions/freqProps.sig new file mode 100644 index 0000000..810860a --- /dev/null +++ b/MLRISC/instructions/freqProps.sig @@ -0,0 +1,14 @@ +(* + * This is the abstract interface for extracting various kinds of + * frequency information from the program. + *) + +signature FREQUENCY_PROPERTIES = + sig + + structure I : INSTRUCTIONS + + (* Branch probability *) + val branchProb : I.instruction -> Probability.prob + + end diff --git a/MLRISC/instructions/freqProps.sml b/MLRISC/instructions/freqProps.sml new file mode 100644 index 0000000..6bb88ee --- /dev/null +++ b/MLRISC/instructions/freqProps.sml @@ -0,0 +1,22 @@ +(* freqProps.sml + * + * COPYRIGHT (c) 2002 Bell Labs, Lucent Technologies + * + * Generic module for extracting the frequency information. + *) + +functor FreqProps (Props : INSN_PROPERTIES) : FREQUENCY_PROPERTIES = + struct + + structure I = Props.I + + val fifty_fifty = Probability.prob(1, 2) + val get = #get MLRiscAnnotations.BRANCH_PROB + + (* Branch probability *) + fun branchProb instr = (case get(#2(Props.getAnnotations instr)) + of SOME b => b + | NONE => fifty_fifty + (* end case *)) + + end diff --git a/MLRISC/instructions/insnProps.sig b/MLRISC/instructions/insnProps.sig new file mode 100644 index 0000000..63fde27 --- /dev/null +++ b/MLRISC/instructions/insnProps.sig @@ -0,0 +1,77 @@ +(* + * Basic Instruction properties that must be supported on all architectures. + * + * -- Allen + *) +signature INSN_PROPERTIES = +sig + structure I : INSTRUCTIONS + structure C : CELLS + sharing I.C = C + + (* classify instructions *) + datatype kind = IK_JUMP (* branches, including returns *) + | IK_NOP (* no ops *) + | IK_INSTR (* normal instructions *) + | IK_COPY (* parallel copy *) + | IK_CALL (* call instructions *) + | IK_CALL_WITH_CUTS (* call with cut edges *) + | IK_PHI (* A phi node (SSA) *) + | IK_SINK (* A sink node (SSA) *) + | IK_SOURCE (* A source node (SSA) *) + + val instrKind : I.instruction -> kind + + (* parallel moves *) + val moveInstr : I.instruction -> bool + val moveTmpR : I.instruction -> CellsBasis.cell option + val moveDstSrc : I.instruction -> CellsBasis.cell list * CellsBasis.cell list + + (* no op *) + val nop : unit -> I.instruction + + (* jump instruction *) + val jump : Label.label -> I.instruction + + (* load immediate; must be within immedRange *) + val immedRange : {lo:int, hi:int} + val loadImmed : {immed:int, t:CellsBasis.cell} -> I.instruction + val loadOperand : {opn:I.operand, t:CellsBasis.cell} -> I.instruction + + (* + * Targets of a branch instruction + * precondition: instruction must be of type IK_JUMP. + *) + datatype target = LABELLED of Label.label | FALLTHROUGH | ESCAPES + val branchTargets : I.instruction -> target list + + (* Set the jump target; error if not a jump instruction. *) + val setJumpTarget : I.instruction * Label.label -> I.instruction + + (* Set the branch target; error if not a branch instruction, t=true, f=false case *) + val setBranchTargets : {i:I.instruction, t:Label.label, f:Label.label} -> I.instruction + + (* equality and hashing on operands *) + val eqOpn : I.operand * I.operand -> bool + val hashOpn : I.operand -> word + + (* Given a conditional jump instruction and label, return a conditional + * jump that has the complimentary condition and that targets the given + * label. If the given instruction is not a conditional jump, then + * the NegateConditional exception is raised. + *) + exception NegateConditional + val negateConditional : (I.instruction * Label.label) -> I.instruction + + (* definition/use for the RA *) + val defUse : CellsBasis.cellkind -> + I.instruction -> (CellsBasis.cell list * CellsBasis.cell list) + + (* annotations *) + val getAnnotations : I.instruction -> + I.instruction * Annotations.annotation list + val annotate : I.instruction * Annotations.annotation -> I.instruction + + val replicate : I.instruction -> I.instruction +end + diff --git a/MLRISC/instructions/instructions.sig b/MLRISC/instructions/instructions.sig new file mode 100644 index 0000000..cbd007c --- /dev/null +++ b/MLRISC/instructions/instructions.sig @@ -0,0 +1,29 @@ +(* instructions.sig + * + * COPYRIGHT (c) 2002 Bell Labs, Lucent Technologies. + * + * This signature specifies the abstract view of an instruction. + *) + +signature INSTRUCTIONS = +sig + structure C : CELLS + structure CB :CELLS_BASIS = CellsBasis + type operand (* operands supported by architecture *) + type addressing_mode (* addressing mode *) + type ea (* effective address for accessing memory *) + type instr (* architecture instructions *) + + datatype instruction = (* partially abstract *) + LIVE of {regs: C.cellset, spilled: C.cellset} + | KILL of {regs: C.cellset, spilled: C.cellset} + | COPY of + {k: CB.cellkind, + sz: int, (* in bits *) + dst: CB.cell list, + src: CB.cell list, + tmp: ea option (* = NONE if |dst| = |src| = 1 *) + } + | ANNOTATION of {i: instruction, a: Annotations.annotation} + | INSTR of instr +end diff --git a/MLRISC/instructions/label-sig.sml b/MLRISC/instructions/label-sig.sml new file mode 100644 index 0000000..3997796 --- /dev/null +++ b/MLRISC/instructions/label-sig.sml @@ -0,0 +1,55 @@ +(* label-sig.sml + * + * COPYRIGHT (c) 2001 Bell Labs, Lucent Technologies + * + * An abstract interface to MLRISC labels. Labels com in three flavors: + * global labels have fixed names and are imported/exported from the + * current compilation unit; local labels have names generated from some + * given prefix; and anonymous labels have internally generated names that + * are not in the compilation unit's symbol table. + *) + +signature LABEL = + sig + + type label + + (* make a global label *) + val global : string -> label + + (* make a label generator; note that if the prefix string is "", then + * the standard prefix "L" will be used. + *) + val label : string -> unit -> label + + (* make an anonymous label *) + val anon : unit -> label + + (* label equality, comparisons, and hashing *) + val same : (label * label) -> bool + val compare : (label * label) -> order + val hash : label -> word + + (* label addresses *) + exception GlobalLabel + val setAddr : (label * int) -> unit + val addrOf : label -> int + + (* return a string representation of the label; this function is meant for + * debugging; use the fmt function for assembly output. + *) + val toString : label -> string + + (* format a label for assembly output. The gPrefix argument is the target + * ABI's prefix for global symbols (e.g., "_" or "") and the aPrefix is + * the target assembler's prefix for anonymous labels. Local labels are + * emitted using their specified prefxix. + *) + val fmt : {gPrefix : string, aPrefix : string}-> label -> string + + (* reset the internal counter used to generate unique IDs for labels; this + * function should never be called when there are label values still in use. + *) + val reset : unit -> unit + + end diff --git a/MLRISC/instructions/label.sml b/MLRISC/instructions/label.sml new file mode 100644 index 0000000..51e4e79 --- /dev/null +++ b/MLRISC/instructions/label.sml @@ -0,0 +1,73 @@ +(* label.sml + * + * COPYRIGHT (c) 2001 Bell Labs, Lucent Technologies + *) + +structure Label :> LABEL = + struct + + datatype label_kind = GLOB of string | LOCAL of string | ANON + + type label = { + id : word, + addr : int ref, + kind : label_kind + } + + local + val cnt = ref 0w0 + in + fun reset () = cnt := 0w0 + fun mkLab k = let val id = !cnt in cnt := id+0w1; {id=id, addr=ref ~1, kind=k} end + end (* local *) + + (* make a global label *) + fun global name = mkLab(GLOB name) + + (* make a label generator; note that if the prefix string is "", then + * the standard prefix "L" will be used. + *) + fun label "" = label "L" + | label prefix = let + val kind = LOCAL prefix + in + fn () => mkLab kind + end + + (* make an anonymous label *) + fun anon () = mkLab ANON + + (* label equality, comparisons, and hashing *) + fun same (l1 : label, l2 : label) = (#id l1 = #id l2) + fun compare (l1 : label, l2 : label) = Word.compare(#id l1, #id l2) + fun hash (l : label) = #id l + + (* label addresses *) + exception GlobalLabel + fun setAddr ({id, addr, kind=GLOB _}, _) = raise GlobalLabel + | setAddr ({id, addr, kind}, a) = addr := a + + fun addrOf {id, addr, kind=GLOB _} = raise GlobalLabel + | addrOf {id, addr, kind} = !addr + + (* return a string representation of the label; this function is meant for + * debugging; use the fmt function for assembly output. + *) + fun toString {id, addr, kind=GLOB name} = name + | toString {id, addr, kind=LOCAL prefix} = prefix ^ Word.toString id + | toString {id, addr, kind=ANON} = ".L" ^ Word.toString id + + (* format a label for assembly output. The gPrefix argument is the target + * ABI's prefix for global symbols (e.g., "_" or "") and the aPrefix is + * the target assembler's prefix for anonymous labels. Local labels are + * emitted using their specified prefxix. + *) + fun fmt {gPrefix, aPrefix} = let + fun toStr {id, addr, kind=GLOB name} = gPrefix ^ name + | toStr {id, addr, kind=LOCAL prefix} = prefix ^ Word.toString id + | toStr {id, addr, kind=ANON} = aPrefix ^ Word.toString id + in + toStr + end + + end diff --git a/MLRISC/instructions/mlriscAnnotations.sig b/MLRISC/instructions/mlriscAnnotations.sig new file mode 100644 index 0000000..36c14af --- /dev/null +++ b/MLRISC/instructions/mlriscAnnotations.sig @@ -0,0 +1,112 @@ +(* mlriscAnnotations.sig + * + * COPYRIGHT (c) 2002 Bell Labs, Lucent Technologies + * + * These are some basic annotations understood by the MLRISC system. + * The MLRISC client can create its own annotations and propagate them + * to MLRISC. Client-defined annotations are ignored by MLRISC. + * + * -- Allen + * + * TODO: There should be comments to say that the annotations are block + * or instruction annotations. -- Lal. + *) + +signature MLRISC_ANNOTATIONS = +sig + + structure C : CELLS_BASIS = CellsBasis + + (* + * The branch probability of conditional branches. + * The client can attach this with conditional branches. + * nnThis has no effect otherwise. + * + * Currently, the annotation is recognized by the static branch prediction + * mondule. + *) + exception BRANCHPROB of Probability.prob + val BRANCH_PROB : Probability.prob Annotations.property + + (* The execution frequency of a basic block + * You can attach this at a basic block. + *) + exception EXECUTIONFREQ of int + val EXECUTION_FREQ : int Annotations.property + + (* No effect at all; this just allows you to insert comments *) + val COMMENT : string Annotations.property + + (* Instructions in the block should not be reordered *) + val NOREORDER : unit Annotations.property + + (* + * Control dependence definition and use. + * + * To use these, the client should generate + * control dependence virtual registers via Cells.newCell Cells.CTRL + * and attach these annotations to instructions and basic blocks. + * + * These annotations are currently recognized by the SSA optimization + * modules. + *) + exception CTRLDEF of C.cell + exception CTRLUSE of C.cell + val CTRL_DEF : C.cell Annotations.property + val CTRL_USE : C.cell Annotations.property + + (* + * Attach this annotation to assemblers for pretty printing + * client defined cell informations. + *) + val PRINT_CELLINFO : (C.cell -> string) Annotations.property + + (* + * Does a compilation unit has GC information? + *) + val GC_INFO : unit Annotations.property + + (* + * Disable all optimizations in the cluster + *) + val NO_OPTIMIZATION : unit Annotations.property + + (* + * Mark basic block that is used for calling the GC + *) + val CALLGC : unit Annotations.property + val GCSAFEPOINT : string Annotations.property + + (* + * Insert block names + *) + exception BLOCKNAMES of Annotations.annotations + val BLOCK_NAMES : Annotations.annotations Annotations.property + + (* + * This annotation inserts an empty basic block + *) + exception EMPTYBLOCK + val EMPTY_BLOCK : unit Annotations.property + + (* + * Enter information for a register. + *) + exception MARKREG of C.cell -> unit + val MARK_REG : (C.cell -> unit) Annotations.property + + (* + * Disable branch chaining optimization on a jump + *) + val NO_BRANCH_CHAINING : unit Annotations.property + + (* + * Code has reference to a virtual (dedicated) frame pointer. + *) + val USES_VIRTUAL_FRAME_POINTER : unit Annotations.property + + (* + * Define return arguments of a call (hack for x86) + *) + val RETURN_ARG : C.cell Annotations.property +end diff --git a/MLRISC/instructions/mlriscAnnotations.sml b/MLRISC/instructions/mlriscAnnotations.sml new file mode 100644 index 0000000..f16803f --- /dev/null +++ b/MLRISC/instructions/mlriscAnnotations.sml @@ -0,0 +1,78 @@ +(* mlriscAnnotations.sml + * + * COPYRIGHT (c) 2002 Bell Labs, Lucent Technologies + * + * These are some basic annotations understood by the MLRISC system + * + * -- Allen + *) + +structure MLRiscAnnotations : MLRISC_ANNOTATIONS = struct + + structure A = Annotations + structure C = CellsBasis + + (* the branch probability of conditional branches *) + (* in percentage *) + exception BRANCHPROB of Probability.prob + val BRANCH_PROB = A.new'{create=BRANCHPROB, + get=fn BRANCHPROB b => b | e => raise e, + toString=fn p => "branch("^Probability.toString p^")"} + + (* the execution frequency of a basic block *) + exception EXECUTIONFREQ of int + val EXECUTION_FREQ = A.new'{create=EXECUTIONFREQ, + get=fn EXECUTIONFREQ x => x | e => raise e, + toString=fn r => "freq("^Int.toString r^")"} + + (* no effect at all; just allows you to insert comments *) + val COMMENT = A.new(SOME(fn s => s)) + + (* Instructions in the block should not be reordered *) + val NOREORDER = A.new(NONE : (unit->string) option) + + fun listify f = + let fun g [] = "" + | g [x] = f x + | g (x::xs) = f x^" "^g xs + in g end + + (* control dependence use *) + exception CTRLDEF of C.cell + exception CTRLUSE of C.cell + val CTRL_USE = A.new'{create=CTRLUSE, + get=fn CTRLUSE x => x | e => raise e, + toString=C.toString} + val CTRL_DEF = A.new'{create=CTRLDEF, + get=fn CTRLDEF x => x | e => raise e, + toString=C.toString} + + val NO_OPTIMIZATION = A.new(SOME(fn () => "NO_OPTIMIZATION")) + val CALLGC = A.new(SOME(fn () => "CALLGC")) + val GCSAFEPOINT = A.new(SOME(fn s => "GCSAFEPOINT: "^s)) + val GC_INFO = A.new(SOME(fn () => "GC_INFO")) + + exception BLOCKNAMES of A.annotations + val BLOCK_NAMES = A.new'{create=BLOCKNAMES, + get=fn BLOCKNAMES n => n | e => raise e, + toString=fn _ => "BLOCK_NAMES"} + + exception EMPTYBLOCK + val EMPTY_BLOCK = A.new'{create=fn () => EMPTYBLOCK, + get=fn EMPTYBLOCK => () | e => raise e, + toString=fn () => "EMPTY_BLOCK"} + + exception MARKREG of C.cell -> unit + val MARK_REG = A.new'{toString=fn _ => "MARK_REG", + create=MARKREG, + get=fn MARKREG f => f | e => raise e + } + val PRINT_CELLINFO = A.new(SOME(fn _ => "PRINT_CELLINFO")) + : (C.cell -> string) A.property + + val NO_BRANCH_CHAINING = A.new(SOME(fn () => "NO_BRANCH_CHAINING")) + + val USES_VIRTUAL_FRAME_POINTER = A.new(SOME(fn () => "HAS_VIRTUAL_FRAME_POINTER")) + + val RETURN_ARG = A.new(SOME(C.toString)) +end diff --git a/MLRISC/instructions/mlriscOptimization.sig b/MLRISC/instructions/mlriscOptimization.sig new file mode 100644 index 0000000..75004b6 --- /dev/null +++ b/MLRISC/instructions/mlriscOptimization.sig @@ -0,0 +1,9 @@ +(* + * Abstract signature of an optimization phase + *) +signature MLRISC_OPTIMIZATION = +sig + type flowgraph (* representation is abstract *) + val name : string (* name of optimization *) + val run : flowgraph -> flowgraph (* run optimization *) +end diff --git a/MLRISC/instructions/peephole.sig b/MLRISC/instructions/peephole.sig new file mode 100644 index 0000000..29f5854 --- /dev/null +++ b/MLRISC/instructions/peephole.sig @@ -0,0 +1,8 @@ +signature PEEPHOLE = +sig + structure I : INSTRUCTIONS + + (* Instructions are in reversed order *) + val peephole : I.instruction list -> I.instruction list + +end diff --git a/MLRISC/instructions/pseudoOps.sig b/MLRISC/instructions/pseudoOps.sig new file mode 100644 index 0000000..d10cbfd --- /dev/null +++ b/MLRISC/instructions/pseudoOps.sig @@ -0,0 +1,31 @@ +(* pseudo-ops.sml --- description of assembly pseudo-ops + * + * COPYRIGHT (c) 1996 AT&T Bell Laboratories. + * + *) + +signature PSEUDO_OPS = sig + type pseudo_op + + val toString : pseudo_op -> string + + val emitValue : {pOp:pseudo_op, loc:int, emit:Word8.word -> unit} -> unit + (* emit value of pseudo op give current location counter and output + * stream. The value emitted should respect the endianness of the + * target machine. + *) + + val sizeOf : pseudo_op * int -> int + (* Size of the pseudo_op in bytes given the current location counter + * The location counter is provided in case some pseudo ops are + * dependent on alignment considerations. + *) + + val adjustLabels : pseudo_op * int -> bool + (* adjust the value of labels in the pseudo_op given the current + * location counter. + *) + +end + + diff --git a/MLRISC/instructions/region.sig b/MLRISC/instructions/region.sig new file mode 100644 index 0000000..1e4a4b5 --- /dev/null +++ b/MLRISC/instructions/region.sig @@ -0,0 +1,8 @@ +signature REGION = +sig + type region + val stack : region + val readonly : region + val memory : region + val toString : region -> string +end diff --git a/MLRISC/instructions/regionInfo.sml b/MLRISC/instructions/regionInfo.sml new file mode 100644 index 0000000..5cccf6d --- /dev/null +++ b/MLRISC/instructions/regionInfo.sml @@ -0,0 +1,23 @@ +signature REGION_INFO = +sig + + (* + * An action associated with a region: + * This can be initialization, read, or update + * + * Mutability: is the region writable? + * This can be mutatble or immutable + * + * A strong update to the same exact location always override the + * previous action. + *) + datatype action = INIT | READ | UPDATE + type kind = {action:action, strong:bool, mutable:bool} + +end + +structure RegionInfo = +struct + datatype action = INIT | READ | UPDATE + type kind = {action:action, strong:bool, mutable:bool} +end diff --git a/MLRISC/instructions/regionProps.sig b/MLRISC/instructions/regionProps.sig new file mode 100644 index 0000000..1e15340 --- /dev/null +++ b/MLRISC/instructions/regionProps.sig @@ -0,0 +1,16 @@ +(* + * Extract information from user defined regions + *) +signature REGION_PROPERTIES = +sig + + structure Region : REGION + structure RegionInfo : REGION_INFO + + val readKind : Region.region -> RegionInfo.kind + val writeKind : Region.region -> RegionInfo.kind + val readFrom : Region.region -> CellsBasis.cell list (* uses *) + val writeTo : Region.region -> + CellsBasis.cell list * CellsBasis.cell list (* defs/uses *) + +end diff --git a/MLRISC/instructions/rewrite.sig b/MLRISC/instructions/rewrite.sig new file mode 100644 index 0000000..f90ad27 --- /dev/null +++ b/MLRISC/instructions/rewrite.sig @@ -0,0 +1,15 @@ +(* + * Signature for rewriting (renaming) cells inside instructions. + *) + +signature REWRITE_INSTRUCTIONS = +sig + + structure I : INSTRUCTIONS + + (* from to *) + val rewriteDef : I.instruction * CellsBasis.cell * CellsBasis.cell -> I.instruction + val rewriteUse : I.instruction * CellsBasis.cell * CellsBasis.cell -> I.instruction + val frewriteDef : I.instruction * CellsBasis.cell * CellsBasis.cell -> I.instruction + val frewriteUse : I.instruction * CellsBasis.cell * CellsBasis.cell -> I.instruction +end diff --git a/MLRISC/instructions/shuffle.sig b/MLRISC/instructions/shuffle.sig new file mode 100644 index 0000000..76c3610 --- /dev/null +++ b/MLRISC/instructions/shuffle.sig @@ -0,0 +1,7 @@ +signature SHUFFLE = +sig + structure I : INSTRUCTIONS + type t = {tmp:I.ea option,dst:CellsBasis.cell list,src:CellsBasis.cell list} + val shuffle : t -> I.instruction list + val shufflefp : t -> I.instruction list +end diff --git a/MLRISC/instructions/shuffle.sml b/MLRISC/instructions/shuffle.sml new file mode 100644 index 0000000..0563d82 --- /dev/null +++ b/MLRISC/instructions/shuffle.sml @@ -0,0 +1,70 @@ +(* shuffle.sml -- implements the parallel copy instruction as a sequence + * of moves. + * + * COPYRIGHT (c) 1996 Bell Laboratories. + * + *) + + +functor Shuffle(I : INSTRUCTIONS) : + sig + val shuffle : + {mvInstr : {dst:I.ea, src:I.ea} -> I.instruction list, + ea : CellsBasis.cell -> I.ea} + -> + {tmp : I.ea option, + dst : CellsBasis.cell list, + src : CellsBasis.cell list} + -> I.instruction list + end = +struct + structure C = I.C + + datatype obj = TEMP | CELL of CellsBasis.cell + + fun equal (r1, r2) = CellsBasis.sameColor(r1,r2) + + fun equalObj (TEMP, TEMP) = true + | equalObj (CELL u, CELL v) = equal(u, v) + | equalObj _ = false + + fun shuffle{mvInstr, ea} {tmp, dst, src} = let + fun mv{dst, src, instrs} = List.revAppend(mvInstr{dst=dst,src=src}, instrs) + + fun opnd dst = case dst of + TEMP => Option.valOf tmp + | CELL dst => ea dst + + (* perform unconstrained moves *) + fun loop((p as (rd,rs))::rest, changed, used, done, instrs) = + if List.exists (fn r => equalObj(r, rd)) used then + loop(rest, changed, used, p::done, instrs) + else loop(rest, true, used, done, + mv{dst=opnd rd, src=opnd rs, instrs=instrs}) + | loop([], changed, _, done, instrs) = (changed, done, instrs) + + fun cycle([], instrs) = instrs + | cycle(moves, instrs) = + (case loop(moves, false, map #2 moves, [], instrs) + of (_, [], instrs) => instrs + | (true, acc, instrs) => cycle(acc, instrs) + | (false, (rd,rs)::acc, instrs) => let + fun rename(p as (a,b)) = + if equalObj(rd, b) then (a, TEMP) else p + val acc' = (rd, rs) :: map rename acc + val instrs' = mv{dst=Option.valOf tmp, src=opnd rd, instrs=instrs} + val (_, acc'', instrs'') = + loop(acc', false, map #2 acc', [], instrs') + in cycle(acc'', instrs'') + end + (*esac*)) + + (* remove moves that have been coalesced. *) + val rmvCoalesced = + ListPair.foldl (fn (rd, rs, mvs) => + if equal (rd, rs) then mvs + else (CELL rd, CELL rs) :: mvs) [] + in rev (cycle (rmvCoalesced(dst, src), [])) + end +end + diff --git a/MLRISC/instructions/stream.sig b/MLRISC/instructions/stream.sig new file mode 100644 index 0000000..579b97b --- /dev/null +++ b/MLRISC/instructions/stream.sig @@ -0,0 +1,38 @@ +(* + * This is a generic instruction stream datatype. + * Components such as assemblers, machine code emitters, instruction + * selection modules communicate with each via this interface. + * + * -- Allen + *) + +signature INSTRUCTION_STREAM = +sig + + structure P : PSEUDO_OPS + datatype ('a,'b,'c,'d) stream = + STREAM of + { beginCluster: int -> unit, (* start new compilation unit *) + endCluster : 'b -> 'd, (* end compilation unit *) + emit : 'a -> unit, (* emit instruction *) + pseudoOp : P.pseudo_op -> unit, (* emit a pseudo op *) + defineLabel : Label.label -> unit, (* define a local label *) + entryLabel : Label.label -> unit, (* define an external label *) + comment : string -> unit, (* emit comment *) + annotation : Annotations.annotation -> unit, (* add annotation *) + getAnnotations: unit -> Annotations.propList ref, (* get annotations*) + exitBlock : 'c -> unit (* mark the end of a procedure *) + } + + (* Note: + o Each compilation unit should be wrapped between beginCluster/endCluster. + + o The method annotation adds an annotation to the current basic block, + not to the current instruction. + + o The method comment add a comment to the current basic block. + Usually comment(msg) is the same as + annotation(BasicAnnotations.COMMENT msg). + *) + +end diff --git a/MLRISC/instructions/stream.sml b/MLRISC/instructions/stream.sml new file mode 100644 index 0000000..ce76b41 --- /dev/null +++ b/MLRISC/instructions/stream.sml @@ -0,0 +1,28 @@ +(* + * This is a generic instruction stream datatype. + * Components such as assemblers, machine code emitters, instruction + * selection modules communicate with each via this interface. + * + * -- Allen + *) + +functor InstructionStream(P : PSEUDO_OPS) : INSTRUCTION_STREAM = +struct + + structure P = P + + datatype ('a,'b,'c,'d) stream = + STREAM of + { beginCluster: int -> unit, (* start new compilation unit *) + endCluster : 'b -> 'd, (* end compilation unit *) + emit : 'a -> unit, (* emit instruction *) + pseudoOp : P.pseudo_op -> unit, (* emit a pseudo op *) + defineLabel : Label.label -> unit, (* define a local label *) + entryLabel : Label.label -> unit, (* define an external label *) + comment : string -> unit, (* emit comment *) + annotation : Annotations.annotation -> unit, (* add annotation *) + getAnnotations : unit -> Annotations.propList ref, (* get annotations*) + exitBlock : 'c -> unit (* mark the end of a procedure *) + } + +end diff --git a/MLRISC/ir-archive/cdg.sig b/MLRISC/ir-archive/cdg.sig new file mode 100644 index 0000000..568632d --- /dev/null +++ b/MLRISC/ir-archive/cdg.sig @@ -0,0 +1,32 @@ +(* + * This is a generic module for computing the control dependence graph + * from any graph with an entry and an exit. + * The graph is treated as a control flow graph. + * The edge predicate is used to determine whether an edge should be + * treated as a branch edge. + * + * -- Allen + *) + +signature CONTROL_DEPENDENCE_GRAPH = +sig + + structure Dom : DOMINATOR_TREE + + type ('n,'e,'g) cdg = ('n,'e,'g) Graph.graph + + val control_dependence_graph : + ('e -> bool) -> + ('n,'e,'g) Dom.postdominator_tree -> + ('n,'e,'g) cdg + + val control_dependence_graph' : + ('n Graph.node -> 'n2 Graph.node) -> + ('e Graph.edge -> 'e2 Graph.edge) -> + ('g -> 'g2) -> + ('e -> bool) -> + ('n,'e,'g) Dom.postdominator_tree -> + ('n2,'e2,'g2) cdg + +end + diff --git a/MLRISC/ir-archive/cdg.sml b/MLRISC/ir-archive/cdg.sml new file mode 100644 index 0000000..297787b --- /dev/null +++ b/MLRISC/ir-archive/cdg.sml @@ -0,0 +1,66 @@ +(* + * This is a generic module for computing the control dependence graph + * from a graph with an entry and an exit. + * The graph is treated as a control flow graph. + * The edge predicate is used to determine whether an edge should be + * treated as a branch edge. + * + * -- Allen + *) + +functor ControlDependenceGraph + (structure Dom : DOMINATOR_TREE + structure GraphImpl : GRAPH_IMPLEMENTATION + ) : CONTROL_DEPENDENCE_GRAPH = + +struct + + structure Dom = Dom + structure G = Graph + structure GI = GraphImpl + + type ('n,'e,'g) cdg = ('n,'e,'g) Graph.graph + + fun control_dependence_graph' f_node f_edge f_graph is_conditional + (PDom as G.GRAPH pdom) = + let val G.GRAPH cfg = Dom.cfg PDom + val N = #capacity cfg () + val cdg_info = f_graph (#graph_info cfg) + val CDG as G.GRAPH cdg = GI.graph("CDG", cdg_info, N) + val ipdom = Dom.idom PDom + val add_edge = fn e => #add_edge cdg (f_edge e) + val out_edges = #out_edges cfg + + (* create the control dependence nodes *) + val _ = #forall_nodes cfg (fn n => #add_node cdg (f_node n)) + + (* create the control dependence edges *) + val _ = #forall_nodes cfg + (fn node as (X,bb) => + let val ipdom_X = ipdom X + fun loop (X,Z,L) = + if ipdom_X = ~1 orelse ipdom_X <> Z then + (* Z is immediately control dependent on X *) + (add_edge (X,Z,L); + case ipdom Z of + ~1 => () + | Z => loop (X,Z,L)) + else () + in + app (fn (X,Z,L) => + (* Z is a successor of X on label L *) + if is_conditional L then loop(X,Z,L) + else () + ) (out_edges X) + end) + in + CDG + end + + fun control_dependence_graph is_conditional = + control_dependence_graph' + (fn n => n) + (fn e => e) + (fn g => g) is_conditional + +end diff --git a/MLRISC/ir-archive/cfg-restructure.sig b/MLRISC/ir-archive/cfg-restructure.sig new file mode 100644 index 0000000..65dfe98 --- /dev/null +++ b/MLRISC/ir-archive/cfg-restructure.sig @@ -0,0 +1,22 @@ +(* + * Insert various types of dummy blocks into the CFG. + * This is probably no longer used. + * + * -- Allen + *) + +signature CONTROL_FLOW_GRAPH_RESTRUCTURE = +sig + + structure Loop : LOOP_STRUCTURE + + val restructure : + ('n,'e,'g) Graph.graph * ('n,'e,'g) Loop.loop_structure -> + { add_preheader : ({header : 'n Graph.node, + entries : 'e Graph.edge list + } -> unit) option, + add_landing_pad : ({exit:'e Graph.edge} -> unit) option + } -> unit + +end + diff --git a/MLRISC/ir-archive/cfg-restructure.sml b/MLRISC/ir-archive/cfg-restructure.sml new file mode 100644 index 0000000..9e699fc --- /dev/null +++ b/MLRISC/ir-archive/cfg-restructure.sml @@ -0,0 +1,51 @@ +(* + * This module inserts preheaders and other stuff. + * This is probably no longer used. + * + * -- Allen + *) + +functor ControlFlowGraphRestructure + (structure Loop : LOOP_STRUCTURE) : CONTROL_FLOW_GRAPH_RESTRUCTURE = +struct + structure Loop = Loop + structure G = Graph + + fun restructure (G.GRAPH cfg,G.GRAPH loop) + { add_preheader, + add_landing_pad + } = + let val add_node = #add_node cfg + fun preheader f = + fn {header,backedges} => + let val in_edges = #in_edges cfg header + fun g([],entries) = entries + | g((e as (i,j,_))::es,entries) = + if List.exists (fn (i',j',_) => i=i' andalso j=j') + backedges then g(es,entries) + else g(es,e::entries) + in f{header =(header,#node_info cfg header), + entries=g(in_edges,[]) + } + end + + fun landing_pads f = fn {exits} => app (fn e => f {exit=e}) exits + + fun nop _ = () + val insert_preheader = case add_preheader of + SOME f => preheader f + | NONE => nop + val insert_landing_pads = case add_landing_pad of + SOME f => landing_pads f + | NONE => nop + fun process_loop(i,Loop.LOOP{header,backedges=[],exits,...}) = () + | process_loop(i,Loop.LOOP{header,backedges,exits,...}) = + (insert_preheader{header=header,backedges=backedges}; + insert_landing_pads{exits=exits} + ) + in + #forall_nodes loop process_loop + end + +end + diff --git a/MLRISC/ir-archive/compute-freq.sig b/MLRISC/ir-archive/compute-freq.sig new file mode 100644 index 0000000..87c42ba --- /dev/null +++ b/MLRISC/ir-archive/compute-freq.sig @@ -0,0 +1,27 @@ +(* + * This module computes frequencies when given branch probabilities + * It has been generalized from the old static branch predication + * so that it can be applied to other graph based reprensentations. + * + * -- Allen + *) + +signature COMPUTE_FREQUENCIES = +sig + + structure Loop : LOOP_STRUCTURE + structure W : FREQ + + val compute_frequencies : + { cfg : ('n,'e,'g) Graph.graph, + loop : ('n,'e,'g) Loop.loop_structure, + (* multiplier for each loop nesting *) + loopMultiplier : int, + nodeFreq : 'n -> W.freq ref, (* frequency of a node *) + edgeFreq : 'e -> W.freq ref, (* frequency of an edge *) + branchProb : 'n -> int, (* branch probability of a node *) + (* is the edge a taken branch edge? *) + isTakenBranch : 'e Graph.edge -> bool + } -> unit + +end diff --git a/MLRISC/ir-archive/compute-freq.sml b/MLRISC/ir-archive/compute-freq.sml new file mode 100644 index 0000000..1253d96 --- /dev/null +++ b/MLRISC/ir-archive/compute-freq.sml @@ -0,0 +1,136 @@ +(* + * This module computes frequencies when given branch probabilities. + * Bug fix: + * This module didn't work on irreducible flowgraphs! + * The problem was caused + * + * -- Allen + *) + +functor ComputeFrequencies + (structure Loop : LOOP_STRUCTURE + structure Freq : FREQ + ) : COMPUTE_FREQUENCIES = +struct + + structure Loop = Loop + structure Dom = Loop.Dom + structure G = Graph + structure S = BitSet + structure W = Freq + structure A = Array + structure H = HashArray + + val op div = W.div + + fun compute_frequencies + {cfg,loop,loopMultiplier,nodeFreq,edgeFreq,branchProb,isTakenBranch} = + let val G.GRAPH cfg = cfg + val Loop as G.GRAPH loop = loop + val ENTRY = case #entries cfg () of + [ENTRY] => ENTRY + | _ => raise Graph.NotSingleEntry + val N = #capacity cfg () + val marked = S.create N + val number_of_entries = length(#out_edges cfg ENTRY) + val entry_weight = W.*(W.fromInt 100,number_of_entries) + + (* indexed by headers *) + val likely_exits = H.array(N,[]) + val exit_counts = H.array(N,0) + + (* indexed by nodes *) + val entry_edges = A.tabulate(N,#in_edges cfg) + val header_of = Loop.header Loop + val nodeFreqs = A.array(N,0) + val branchProbs = A.array(N,0) + val TIMES = 20 + + val _ = #forall_nodes cfg (fn (b,b') => + (A.update(nodeFreqs,b,!(nodeFreq b')); + A.update(branchProbs,b,branchProb b') + )) + + fun is_exit_edge (e as (i,j,_)) = + List.exists (fn (i',j',_) => i = i' andalso j = j') + (H.sub(likely_exits,A.sub(header_of,i))) + + val sum = List.foldr (fn ((_,_,e),m) => !(edgeFreq e) + m) 0 + + fun exit_weight_of i = + let val h = A.sub(header_of,i) + val w = A.sub(nodeFreqs,h) + in w div (loopMultiplier * H.sub(exit_counts,h)) + end + + val entryEdges = Loop.entryEdges Loop + + fun preprocess(header,Loop.LOOP{exits,...}) = + let val real_exits = + List.filter (fn (i,_,_) => A.sub(branchProbs,i) > 0) exits + in H.update(likely_exits,header,real_exits); + H.update(exit_counts,header,length real_exits); + A.update(entry_edges,header,entryEdges header) + end + + fun propagate(0,_) = (print "Out of time\n") + | propagate(n,[]) = () + | propagate(n,i::worklist) = + let val _ = S.reset(marked,i) + val old_weight = A.sub(nodeFreqs,i) + val new_weight = sum(A.sub(entry_edges,i)) + val new_weight = if i = ENTRY then entry_weight + else (case H.sub(likely_exits,i) of + [] => new_weight (* not a real loop! *) + | _ => W.*(new_weight,loopMultiplier) + ) + in if old_weight = new_weight then + propagate(n,worklist) + else (A.update(nodeFreqs,i,new_weight); + propagate_edge_weight(#out_edges cfg i,new_weight,[]); + propagate'(n,#out_edges cfg i,worklist) + ) + end + + and propagate'(n,[],worklist) = propagate(n,worklist) + | propagate'(n,(i,j,_)::es,worklist) = + if S.markAndTest(marked,j) then + propagate'(n,es,worklist) + else propagate'(Int.-(n,1),es,j::worklist) + + and propagate_edge_weight([],W,es') = process_non_exits(W,es') + | propagate_edge_weight((edge as (i,_,e))::es,W,es') = + if is_exit_edge edge then + let val exit_weight = exit_weight_of(A.sub(header_of,i)) + val w = edgeFreq e + in w := exit_weight; + propagate_edge_weight(es,W-exit_weight,es') + end + else + propagate_edge_weight(es,W,edge::es') + + and process_non_exits(W,[]) = () + | process_non_exits(W,[(_,_,e)]) = edgeFreq e := W + | process_non_exits(W,es as [edge1 as (i,_,e1),(_,_,e2)]) = + if i = ENTRY then divide_evenly(W,es) else + let val w = edgeFreq e1 + val w' = edgeFreq e2 + val (w_F,w_T) = if isTakenBranch edge1 then (w',w) else (w,w') + val p = A.sub(branchProbs,i) + in w_T := W.*(W,p) div 100; + w_F := W - !w_T + end + | process_non_exits(W,es) = divide_evenly(W,es) + + and divide_evenly(W,es) = + let val W' = W div (length es) + in app (fn (_,_,e) => edgeFreq e := W') es + end + + in + #forall_nodes loop preprocess; + propagate(TIMES * N, [ENTRY]); + #forall_nodes cfg (fn (b,b') => nodeFreq b' := A.sub(nodeFreqs,b)) + end handle Overflow => print "[Overflow]\n" + +end diff --git a/MLRISC/ir-archive/compute-freq2.sig b/MLRISC/ir-archive/compute-freq2.sig new file mode 100644 index 0000000..698e0b7 --- /dev/null +++ b/MLRISC/ir-archive/compute-freq2.sig @@ -0,0 +1,27 @@ +(* + * This module computes frequencies when given branch probabilities + * It has been generalized from the old static branch predication + * so that it can be applied to other graph based reprensentations. + * + * -- Allen + *) + +signature COMPUTE_FREQUENCIES2 = +sig + + structure Derived : DERIVED_GRAPH + structure W : FREQ + + val compute_frequencies : + { cfg : ('n,'e,'g) Graph.graph, + derived : ('n,'e) Derived.derived_graph, + (* multiplier for each loop nesting *) + loopMultiplier : int, + nodeFreq : 'n -> W.freq ref, (* frequency of a node *) + edgeFreq : 'e -> W.freq ref, (* frequency of an edge *) + branchProb : 'n -> int, (* branch probability of a node *) + (* is the edge a taken branch edge? *) + isTakenBranch : 'e Graph.edge -> bool + } -> unit + +end diff --git a/MLRISC/ir-archive/compute-freq2.sml b/MLRISC/ir-archive/compute-freq2.sml new file mode 100644 index 0000000..d1373eb --- /dev/null +++ b/MLRISC/ir-archive/compute-freq2.sml @@ -0,0 +1,92 @@ +(* + * This module computes frequencies when given branch probabilities. + * The last module still didn't work on irreducible flowgraphs! + * I'm rewriting it using a completely different algorithm. + * + * -- Allen + *) + +functor ComputeFrequencies2 + (structure DerivedGraph : DERIVED_GRAPH + structure Freq : FREQ + ) : COMPUTE_FREQUENCIES2 = +struct + + structure Derived = DerivedGraph + structure W = Freq + structure G = Graph + structure A = Array + structure HT = HashTable + + val op div = W.div + val SOME inf = W.maxInt + + fun compute_frequencies + {cfg=G.GRAPH cfg,derived as G.GRAPH dg, + loopMultiplier,nodeFreq,edgeFreq,branchProb,isTakenBranch} = + let val ENTRY = case #entries cfg () of + [ENTRY] => ENTRY + | _ => raise Graph.NotSingleEntry + val N = #capacity cfg () + + fun hash(i,j,_) = Word.<<(Word.fromInt i,0w16) + Word.fromInt j + fun equal((a:int,b:int,_),(c,d,_)) = a = c andalso b = d + exception NotThere + val edgeProbs = HT.mkTable (hash,equal) (10,NotThere) + val addProb = HT.insert edgeProbs + val getProb = HT.lookup edgeProbs + + fun computeEdgeProb(n,n') = + let fun divide_evenly(edges) = + let val W' = 100 div (length edges) + fun loop([],w) = () + | loop([e],w) = addProb(e,w) + | loop(e::es,w) = (addProb(e,W'); loop(es,w-W')) + in loop(edges,100) end + val edges = #out_edges cfg n + in if n = ENTRY then divide_evenly edges else + case edges of + [] => () + | [e] => addProb(e,100) + | [e1,e2] => + let val prob = branchProb n' + val prob = if isTakenBranch e1 then prob else 100 - prob + in addProb(e1,prob); + addProb(e2,100-prob) + end + | es => divide_evenly es + end + + (* Initialize the set of edge probabilities *) + val _ = #forall_nodes cfg computeEdgeProb + + val visited = A.array(N,~1) + + fun process(scc as stamp::_,_) = + let val _ = app (fn b => A.update(visited,b,stamp)) scc + fun collect([],inFreq,isLoop) = (inFreq,isLoop) + | collect(n::ns,inFreq,isLoop) = + let fun loop([],inFreq,isLoop) = (inFreq,isLoop) + | loop((i,j,e)::es,inFreq,isLoop) = + if A.sub(visited,i) = stamp + then loop(es,inFreq,true) + else loop(es,inFreq + + !(nodeFreq(#node_info cfg i)) * getProb e, + isLoop) + val (inFreq,isLoop) = loop(#in_edges dg n,inFreq,isLoop) + in collect(ns,inFreq,isLoop) end + val (freq,isLoop) = collect(scc,0,false) + val freq = if stamp = ENTRY then + W.*(W.fromInt 100,length(#out_edges cfg ENTRY)) + else if isLoop then freq * loopMultiplier div 100 + else freq div 100 + in app (fn b => nodeFreq(#node_info cfg b) := freq) scc + end + + in GraphSCC.scc (ReversedGraphView.rev_view derived) process (); + HT.appi (fn ((i,_,e),w) => + edgeFreq e := (w * !(nodeFreq(#node_info cfg i))) div 100) + edgeProbs + end handle Overflow => () + +end diff --git a/MLRISC/ir-archive/derived-graph.sig b/MLRISC/ir-archive/derived-graph.sig new file mode 100644 index 0000000..9e275e2 --- /dev/null +++ b/MLRISC/ir-archive/derived-graph.sig @@ -0,0 +1,18 @@ +(* + * Compute Tarjan's dominator derived graph from a dominator tree. + * This is used partly to computing path expressions. Alternatively, + * it can also be used for testing for reducibility. In particular, + * cycles involving more than one node represent irreducible loops + * in the flow graph. + * + * -- Allen + *) + +signature DERIVED_GRAPH = +sig + structure Dom : DOMINATOR_TREE + type ('n,'e) derived_graph = ('n,'e Graph.edge,unit) Graph.graph + + val derived_graph : (* O(n+e) *) + ('n,'e,'g) Dom.dominator_tree -> ('n,'e) derived_graph +end diff --git a/MLRISC/ir-archive/derived-graph.sml b/MLRISC/ir-archive/derived-graph.sml new file mode 100644 index 0000000..f924954 --- /dev/null +++ b/MLRISC/ir-archive/derived-graph.sml @@ -0,0 +1,46 @@ +(* + * Compute Tarjan's dominator derived graph from a dominator tree. + * This is used partly to computing path expressions. Alternatively, + * it can also be used for testing for reducibility. In particular, + * cycles involving more than one node represent irreducible loops + * in the flow graph. + * + * -- Allen + *) + +functor DerivedGraph(Dom : DOMINATOR_TREE): DERIVED_GRAPH = +struct + structure Dom = Dom + structure G = Graph + structure GI = Dom.GI + structure A = Array + + type ('n,'e) derived_graph = ('n,'e Graph.edge,unit) Graph.graph + + fun derived_graph (Dom as G.GRAPH dom) = + let val N = #capacity dom () + val D as G.GRAPH d = GI.graph("derived graph",(),N) + val G.GRAPH cfg = Dom.cfg Dom + val ancestors = A.array(Dom.max_levels Dom,0) + val levelsMap = Dom.levelsMap Dom + fun dfs lvl i = + let val _ = A.update(ancestors,lvl,i) + val _ = #add_node d (i,#node_info cfg i) + fun add_edge (e as (i,j,_)) = + let val level = A.sub(levelsMap,j) + in if lvl < level then + #add_edge d (i,j,e) (* i idom j ! *) + else + #add_edge d (A.sub(ancestors,level),j,e) + end + in app add_edge (#out_edges cfg i); + app (dfs (lvl+1)) (#succ dom i) + end + + in app (dfs 0) (#entries dom ()); + #set_entries d (#entries dom ()); + D + end + +end + diff --git a/MLRISC/ir-archive/dj-dataflow.sig b/MLRISC/ir-archive/dj-dataflow.sig new file mode 100644 index 0000000..42f5953 --- /dev/null +++ b/MLRISC/ir-archive/dj-dataflow.sig @@ -0,0 +1,16 @@ +(* + * Perform elimination based dataflow analysis (from Sreedhar's work) + *) +signature DJ_DATAFLOW = +sig + + structure Dom : DOMINATOR_TREE + + val analyze : + { closure : {y:Graph.node_id} -> unit, + var_elim : {y:Graph.node_id, z:Graph.node_id} -> unit, + fixpoint : {scc:Graph.node_id list} -> unit, + compute : {y:Graph.node_id, z:Graph.node_id} -> unit + } -> ('n,'e,'g) Dom.dominator_tree -> unit + +end diff --git a/MLRISC/ir-archive/dj-dataflow.sml b/MLRISC/ir-archive/dj-dataflow.sml new file mode 100644 index 0000000..2a490f3 --- /dev/null +++ b/MLRISC/ir-archive/dj-dataflow.sml @@ -0,0 +1,233 @@ +(* + * Perform elimination based dataflow analysis, using Sreedhar's DJ-graph + * based algorithm. I'm using the eager elimination method because in + * practice it is linear and is much easier to implement. + *) +functor DJDataflow(Dom : DOMINATOR_TREE) : DJ_DATAFLOW = +struct + + structure Dom = Dom + structure G = Graph + structure A = Array + + val debug = true + + fun error msg = MLRiscErrorMsg.error("DJDataflow",msg) + + fun analyze {closure, var_elim, fixpoint, compute} (Dom as G.GRAPH dom) = + let val L = Dom.max_levels Dom + val N = #capacity dom () + val CFG as G.GRAPH cfg = Dom.cfg Dom + val levelsMap = Dom.levelsMap Dom + val idomsMap = Dom.idomsMap Dom + + (* + * These store the current join edges during the graph reduction process + *) + val joinOutEdges = A.array(N,[]) + val joinInEdges = A.array(N,[]) + + (* Priority lists indexed by levels so that all non-join nodes + * always appear in front of the list + *) + val PriorityList = A.array(L,[]) + + (* A node i is a non-join node iff all its predecessors are either + * i or idom(i) + *) + fun isNonJoinNode i = + case A.sub(joinInEdges,i) of + [] => true + | [(j,_,_)] => i = j + | _ => false + + (* Remove the join edge y -> z with edge id *) + fun removeJoinEdge(y,z,id) = + let fun rmvEdges((e as (_,_,id'))::es, es') = + if id' = id then List.revAppend(es', es) + else rmvEdges(es, e::es') + | rmvEdges _ = error "rmvEdges" + in A.update(joinOutEdges, y, rmvEdges(A.sub(joinOutEdges,y), [])); + A.update(joinInEdges, z, rmvEdges(A.sub(joinInEdges, z), [])) + end + + (* Remove all same level i join edges in nodes *) + fun removeAllSameLevelEdges(i,nodes) = + let fun rmv([], es') = es' + | rmv((e as (x,y,_))::es, es') = + rmv(es, if A.sub(levelsMap,y) = i then es' else e::es') + fun loop [] = () + | loop(x::xs) = + (A.update(joinOutEdges, x, rmv(A.sub(joinOutEdges, x), [])); + A.update(joinInEdges, x, rmv(A.sub(joinInEdges, x), [])); + loop xs + ) + in loop nodes + end + + + (* Insert a new join edge *) + fun insertJoinEdge(e as (x,y,_)) = + (A.update(joinOutEdges, x, e::A.sub(joinOutEdges, x)); + A.update(joinInEdges, y, e::A.sub(joinInEdges, y)) + ) + + (* Does the edge y -> z exists? *) + fun doesn'tHasEdge(y,z) = + let fun loop [] = true + | loop((_,z',_)::es) = z<>z' andalso loop es + in loop(A.sub(joinOutEdges, y)) end + + (* Put all nodes into its level in the priority list. + * Initialize the join edges arrays. + *) + fun initialize() = + let val edgeId = ref 0 + in #forall_nodes cfg + (fn (i,_) => + let val lvl = A.sub(levelsMap,i) + fun addJoinEdges([],id,outEdges) = + (A.update(joinOutEdges,i,outEdges); id) + | addJoinEdges((i,j,_)::es,id,outEdges) = + if A.sub(idomsMap,j) = i then + addJoinEdges(es, id, outEdges) + else let val e = (i,j,id) + in A.update(joinInEdges,j,e::A.sub(joinInEdges,j)); + addJoinEdges(es, id+1, e::outEdges) + end + in A.update(PriorityList,lvl,i::A.sub(PriorityList,lvl)); + edgeId := addJoinEdges(#out_edges cfg i, !edgeId, []) + end + ) + end + + (* All these nodes are on the same level and they cannot be reduced. + * Determine the SCC for the nodes at level i + *) + fun CollapseIrreducible(i, joinNodes) = + let fun out_edges y = + let fun filter([], es') = es' + | filter((e as (y,z,_))::es, es') = + filter(es, if A.sub(levelsMap,z) = i then e::es' else es') + in filter(A.sub(joinOutEdges, y), []) end + fun dumpSCC scc = + (print "scc:\n"; + app (fn x => + (print(Int.toString x^" "); + app (fn (x,y,_) => + print(Int.toString x^"->"^Int.toString y^" ")) + (out_edges x); + print "\n" + )) + scc + ) + fun processSCC(scc) = + (if debug then dumpSCC scc else (); + fixpoint{scc=scc} + ) + val sccs = + GraphSCC.scc' + {N = #capacity cfg (), + nodes = joinNodes, + out_edges = out_edges + } op:: [] + in app processSCC sccs; + removeAllSameLevelEdges(i,joinNodes) + end + + (* Self loops y->y + * Compute the closure H_y : O_y = f^*_y(O_y) + * Then delete the edge y->y. + * y must be a non-join node before and after this transformation. + *) + fun Eager1(y,id) = + (closure{y=y}; + removeJoinEdge(y,y,id) + ) + + (* Same level y->z + * Eliminate O_y in H_z by replacing it with the RHS of H_y + * Delete the edge y->z; + * if (z becomes a non-join node) then put z at the head of the + * priority list at level i + *) + fun Eager2a(y,z,i,id,queue,prune) = + (var_elim{y=y,z=z}; + removeJoinEdge(y,z,id); + if isNonJoinNode z then (z::queue,true) else (queue,prune) + ) + + (* Different levels y->z + * Eliminate O_y in H_z by replacing it with RHS of H_y + * x = idom(y) + * Delete the edge y->z + * if (x->z does not exist) then + * Insert a new J edge x->z + *) + fun Eager2b(y,z,id) = + (var_elim{y=y,z=z}; + let val x = A.sub(idomsMap,y) + in removeJoinEdge(y,z,id); + if doesn'tHasEdge(x,z) then insertJoinEdge(x,z,id) + else () + end + ) + + fun ReduceLevel i = + let fun loop([], [], _) = () + | loop([], joinNodes, prune) = + let fun filter([], ns) = ns + | filter(z::zs, ns) = + if isNonJoinNode z then filter(zs, ns) + else filter(zs, z::ns) + (* If prune is true, then the joinNodes list may + * contain processed non-join nodes, + * so we have to prune them out first. + *) + val joinNodes = + if prune then filter(joinNodes,[]) else joinNodes + in case joinNodes of + [] => () + | _ => (CollapseIrreducible(i,joinNodes); ReduceLevel i) + end + | loop(y::Q, joinNodes, prune) = + if isNonJoinNode y then + let fun process([], Q, prune) = (Q, prune) + | process((y,z,id)::es,Q, prune) = + if z = y then (Eager1(y,id); process(es, Q, prune)) + else if A.sub(levelsMap,z) = i then + let val (Q, prune) = Eager2a(y,z,i,id,Q,prune) + in process(es, Q, prune) end + else (Eager2b(y,z,id); process(es, Q, prune)) + val (Q, prune) = process(A.sub(joinOutEdges, y),Q,prune) + in loop(Q, joinNodes, prune) + end + else loop(Q, y::joinNodes, prune) + in loop(A.sub(PriorityList,i), [], false) + end + + (* Propagate the results from the top of the dominator tree + * down to its children. + *) + fun DomTDPropagate() = + let val ENTRY = hd(#entries dom ()) + fun walk y = + let fun walkChildren [] = () + | walkChildren((_,z,_)::es) = + (compute{y=y, z=z}; walk z; walkChildren es) + in walkChildren(#out_edges dom y) end + in walk ENTRY + end + + fun MainDFA() = + let fun loop ~1 = () + | loop i = (ReduceLevel i; loop(i-1)) + in initialize(); + loop(L-1); + DomTDPropagate() + end + + in MainDFA() + end + +end diff --git a/MLRISC/ir-archive/djgraph.sig b/MLRISC/ir-archive/djgraph.sig new file mode 100644 index 0000000..19aab56 --- /dev/null +++ b/MLRISC/ir-archive/djgraph.sig @@ -0,0 +1,31 @@ +(* + * This signature describes DJ-graph and related algorithms. + * + * -- Allen + *) + +signature DJ_GRAPH = +sig + + structure Dom : DOMINATOR_TREE + + type ('n,'e,'g) dj_graph (* abstract type now! *) + + val DJ : ('n,'e,'g) Dom.dominator_tree -> ('n,'e,'g) dj_graph + + val DF : ('n,'e,'g) dj_graph -> Graph.node_id -> Graph.node_id list + + val IDFs : ('n,'e,'g) dj_graph -> + Graph.node_id list -> Graph.node_id list (* DF^+(S) *) + + (* For constructing pruned SSA, we actually need to compute + * DF^+(defs(v)) \intersect LiveIn(v) + * for each variable v, i.e. only places where v is live. + * The following function computes this with liveness incrementally. + *) + val LiveIDFs : ('n,'e,'g) dj_graph -> + {defs : Graph.node_id list, (* blocks with definitions *) + localLiveIn : Graph.node_id list (* blocks that are local live in *) + } -> Graph.node_id list + +end diff --git a/MLRISC/ir-archive/djgraph.sml b/MLRISC/ir-archive/djgraph.sml new file mode 100644 index 0000000..1a7281c --- /dev/null +++ b/MLRISC/ir-archive/djgraph.sml @@ -0,0 +1,243 @@ +(* + * The algorithm for computing iterated dominance frontier. + * This is the algorithm by Sreedhar, Gao and Lee. + * + * --Allen + *) + +functor DJGraph (Dom : DOMINATOR_TREE) : DJ_GRAPH = +struct + + structure G = Graph + structure Dom = Dom + structure A = Array + + type ('n,'e,'g) dj_graph = ('n,'e,'g) Dom.dominator_tree + + fun error msg = MLRiscErrorMsg.error("DJGraph",msg) + + val stats = false (* collect statistics? *) + val visitCount = MLRiscControl.getCounter "dj-visit-count" + val idfCount = MLRiscControl.getCounter "dj-IDF-count" + val idfSize = MLRiscControl.getCounter "dj-IDF-size" + val liveVisitCount = MLRiscControl.getCounter "dj-live-visit-count" + val maxBlockSize = MLRiscControl.getCounter "dj-max-block-size" + val totalBlockSize = MLRiscControl.getCounter "dj-total-block-size" + val debug = false + + fun DJ x = x + + (* Compute dominance frontier *) + fun DF (D as G.GRAPH dom) = + let val G.GRAPH cfg = Dom.cfg D + val L = Dom.max_levels D + val N = #capacity dom () + val levels = Dom.levelsMap D + val in_phi = A.array(N,0) (* has appeared in the DF set? *) + val stamp = ref 0 + fun new_stamp() = let val s = !stamp + 1 in stamp := s; s end + + fun unmarked(marked,i,stamp : int) = + let val s = A.sub(marked,i) + in if s = stamp then false else (A.update(marked,i,stamp); true) + end + + (* + * Compute the dominance frontiers of a node + * Dominance frontier of x: + * The set of all nodes y such that x dominates a predecessor + * of y but x doesn't strictly dominates y. + *) + fun DF x = + let val stamp = new_stamp() + val level_x = A.sub(levels,x) + fun walk(z, S) = + let fun scan((_,y,_)::es,S) = + if A.sub(levels,y) <= level_x andalso + unmarked(in_phi,y,stamp) then scan(es,y::S) + else scan(es,S) + | scan([],S) = S + val S = scan(#out_edges cfg z,S) + fun walkList([],S) = S + | walkList((_,z,_)::es,S) = walkList(es,walk(z,S)) + in walkList(#out_edges dom z,S) + end + in walk(x,[]) + end + + in DF end + + (* Compute iterated dominance frontier *) + fun IDFs (D as G.GRAPH dom) = + let val G.GRAPH cfg = Dom.cfg D + val L = Dom.max_levels D + val N = #capacity dom () + val levels = Dom.levelsMap D + val in_phi = A.array(N,0) (* has appeared in the DF set? *) + val stamp = ref 0 + fun new_stamp() = let val s = !stamp + 1 in stamp := s; s end + + fun unmarked(marked,i,stamp : int) = + let val s = A.sub(marked,i) + in if s = stamp then false else (A.update(marked,i,stamp); true) + end + + val in_alpha = A.array(N,0) (* has appeared in N_alpha? *) + val visited = A.array(N,0) (* has it been visited *) + val piggybank = A.array(L,[]) (* nodes in the piggy bank *) + + val n = ref 0 + (* + * This algorithm is described in POPL 95 + *) + fun IDFs xs = + let val stamp = new_stamp() + val _ = if stats then (idfCount := !idfCount + 1; n := !visitCount) + else () + fun init([],l) = l + | init(x::xs,l) = + let val l_x = A.sub(levels,x) + in A.update(in_alpha,x,stamp); + A.update(piggybank,l_x,x::A.sub(piggybank,l_x)); + init(xs,if l < l_x then l_x else l) + end + fun visit(y,level_x,S) = + let fun scan([],S) = S + | scan((_,z,_)::es,S) = + let val level_z = A.sub(levels,z) + in if level_z <= level_x andalso unmarked(in_phi,z,stamp) + then (if A.sub(in_alpha,z) <> stamp + then A.update(piggybank,level_z, + z::A.sub(piggybank,level_z)) + else (); + scan(es,z::S)) + else scan(es,S) + end + fun visitSucc([],S) = S + | visitSucc((_,z,_)::es,S) = + visitSucc(es,if unmarked(visited,z,stamp) + then visit(z,level_x,S) else S) + val S = scan(#out_edges cfg y,S) + in if stats then visitCount := !visitCount + 1 else (); + visitSucc(#out_edges dom y,S) + end + + fun visitAll(~1,S) = S + | visitAll(l,S) = + case A.sub(piggybank,l) of + [] => visitAll(l-1,S) + | x::xs => (A.update(visited,x,stamp); + A.update(piggybank,l,xs); + visitAll(l,visit(x,A.sub(levels,x),S))) + + val L = init(xs,~1) + val IDF = visitAll(L,[]) + in if stats then + (idfSize := !idfSize + length IDF; + maxBlockSize := Int.max(!maxBlockSize, N); + totalBlockSize := !totalBlockSize + N + ) + else (); + if debug then print("N="^Int.toString N^" visits="^ + Int.toString(!visitCount - !n)^"\n") else (); + IDF + end + + in IDFs + end + + fun LiveIDFs(D as G.GRAPH dom) = + let val G.GRAPH cfg = Dom.cfg D + val L = Dom.max_levels D + val N = #capacity dom () + val levels = Dom.levelsMap D + + val in_phi = A.array(N,0) (* has appeared in the DF set? *) + val stamp = ref 0 + fun new_stamp() = let val s = !stamp + 2 in stamp := s; s end + + val in_alpha = A.array(N,0) (* has appeared in N_alpha? *) + val piggybank = A.array(L,[]) (* nodes in the piggy bank *) + val liveIn = A.array(N,0) (* is a variable live in *) + val visited = A.array(N,0) + + fun unmarked(marked,i,stamp : int) = + let val s = A.sub(marked,i) + in if s = stamp then false else (A.update(marked,i,stamp); true) + end + + fun LiveIDFs {defs, localLiveIn=[]} = [] (* special case *) + | LiveIDFs {defs=xs, localLiveIn} = + let val stamp = new_stamp() + val _ = if stats then idfCount := !idfCount + 1 else () + (* val n = ref 0 + val m = ref 0 *) + + fun initDefs([],maxLvl) = maxLvl + | initDefs(x::xs,maxLvl) = + let val lvl_x = A.sub(levels,x) + in A.update(in_alpha,x,stamp); + A.update(piggybank,lvl_x,x::A.sub(piggybank,lvl_x)); + initDefs(xs,if maxLvl < lvl_x then lvl_x else maxLvl) + end + + fun markLiveIn(b) = + let fun markPred [] = () + | markPred((j,_,_)::es) = + (if A.sub(liveIn,j) <> stamp andalso + A.sub(in_alpha,j) <> stamp then + markLiveIn j + else (); + markPred es + ) + in (* m := !m + 1; *) + A.update(liveIn,b,stamp); + if stats then liveVisitCount := !liveVisitCount + 1 else (); + markPred(#in_edges cfg b) + end + + fun initLiveIn [] = () + | initLiveIn(x::xs) = (markLiveIn x; initLiveIn xs) + + fun isLive b = A.sub(liveIn,b) = stamp + + fun visit(y,level_x,S) = + let fun scan([],S) = S + | scan((_,z,_)::es,S) = + let val level_z = A.sub(levels,z) + in if level_z <= level_x andalso + isLive z andalso + unmarked(in_phi,z,stamp) + then (if A.sub(in_alpha,z) <> stamp + then A.update(piggybank,level_z, + z::A.sub(piggybank,level_z)) + else (); + scan(es,z::S)) + else scan(es,S) + end + fun visitSucc([],S) = S + | visitSucc((_,z,_)::es,S) = + visitSucc(es,if isLive z andalso unmarked(visited,z,stamp) + then visit(z,level_x,S) else S) + val S = scan(#out_edges cfg y,S) + in visitSucc(#out_edges dom y,S) + end + + fun visitAll(~1,S) = S + | visitAll(l,S) = + case A.sub(piggybank,l) of + [] => visitAll(l-1,S) + | x::xs => (A.update(visited,x,stamp); + A.update(piggybank,l,xs); + visitAll(l,visit(x,A.sub(levels,x),S))) + + val L = initDefs(xs, ~1) + in initLiveIn(localLiveIn); + visitAll(L, []) + end + + in LiveIDFs + end + +end + diff --git a/MLRISC/ir-archive/dominance-frontier.sig b/MLRISC/ir-archive/dominance-frontier.sig new file mode 100644 index 0000000..67ff794 --- /dev/null +++ b/MLRISC/ir-archive/dominance-frontier.sig @@ -0,0 +1,18 @@ +(* + * This module computes the dominance frontiers from a dominator + * tree. This computation is done in batch mode. + * + * -- Allen + *) + +signature DOMINANCE_FRONTIERS = +sig + + structure Dom : DOMINATOR_TREE + + type dominance_frontiers = Graph.node_id list Array.array + + val DFs : ('n,'e,'g) Dom.dominator_tree -> dominance_frontiers + +end + diff --git a/MLRISC/ir-archive/dominance-frontier.sml b/MLRISC/ir-archive/dominance-frontier.sml new file mode 100644 index 0000000..e7b2f15 --- /dev/null +++ b/MLRISC/ir-archive/dominance-frontier.sml @@ -0,0 +1,50 @@ +(* Computation of the dominance frontier using the algorithm + * of Cytron, Ferrante, Rosen, Wegman and Zadeck in TOPLAS 91 + * + * -- Allen + *) + +functor DominanceFrontiers (Dom : DOMINATOR_TREE) + : DOMINANCE_FRONTIERS = +struct + + structure Dom = Dom + structure G = Graph + structure A = Array + + type dominance_frontiers = G.node_id list A.array + + fun DFs (Dom as G.GRAPH dom) = + let val N = #capacity dom () + val DF = A.array(N,[]) : dominance_frontiers + val G.GRAPH cfg = Dom.cfg Dom + val immediately_dominates = Dom.immediately_dominates Dom + fun computeDF X = + let (* the successors in X that are not strictly dominated by X *) + val S = foldr (fn ((_,Y,_),S) => + if immediately_dominates(X,Y) + then S else Y::S) [] (#out_edges cfg X) + (* Nodes in the dominance frontier of n that are not + * dominated by n's immediate dominator + *) + fun computeChild((_,Z,_),S) = + let val DF_Z = computeDF Z + val S = foldr (fn (Y,S) => + if immediately_dominates(X,Y) + then S else Y::S) S DF_Z + in S + end + val S = foldl computeChild S (#out_edges dom X) + in + A.update(DF,X,S); + S + end + + val [root] = #entries dom () + val _ = computeDF root + in + DF + end + +end + diff --git a/MLRISC/ir-archive/dominator.sig b/MLRISC/ir-archive/dominator.sig new file mode 100644 index 0000000..5b653ab --- /dev/null +++ b/MLRISC/ir-archive/dominator.sig @@ -0,0 +1,91 @@ +(* + * This is the signature of a dominator tree. + * The dominator tree includes lots of query methods. + * + * -- Allen + *) + +signature DOMINATOR_TREE = +sig + + structure GI : GRAPH_IMPLEMENTATION + + exception Dominator + + type ('n,'e,'g) dom_info + + (* Dominator/postdominator trees *) + type ('n,'e,'g) dominator_tree = + ('n,unit,('n,'e,'g) dom_info) Graph.graph + type ('n,'e,'g) postdominator_tree = + ('n,unit,('n,'e,'g) dom_info) Graph.graph + + type node = Graph.node_id + + (* Compute the (post)dominator tree from a flowgraph *) + val makeDominator : ('n,'e,'g) Graph.graph -> ('n,'e,'g) dominator_tree + val makePostdominator : ('n,'e,'g) Graph.graph -> + ('n,'e,'g) postdominator_tree + + (* The following methods work on both dominator/postdominator trees. + * When operating on a postdominator tree, the interpretation of these + * methods are reversed in the obvious manner. + *) + + (* Extract the original CFG *) + val cfg : ('n,'e,'g) dominator_tree -> ('n,'e,'g) Graph.graph + + (* The height of the dominator tree *) + val max_levels : ('n,'e,'g) dominator_tree -> int + + (* Return a map from node id -> level (level(root) = 0) *) + val levelsMap : ('n,'e,'g) dominator_tree -> int Array.array + + (* Return a map from node id i -> the node_id j, + * where j is the level 1 node that dominates i. + * Special case: if i = ENTRY, then j = ENTRY. + * This table is cached. + *) + val entryPos : ('n,'e,'g) dominator_tree -> int Array.array + + (* Return a map from node id -> immediate (post)dominator *) + val idomsMap : ('n,'e,'g) dominator_tree -> int Array.array + + (* Immediately (post)dominates? *) + val immediately_dominates : ('n,'e,'g) dominator_tree -> node * node -> bool + + (* (Post)dominates? *) + val dominates : ('n,'e,'g) dominator_tree -> node * node -> bool + + (* Strictly (post)dominates? *) + val strictly_dominates : ('n,'e,'g) dominator_tree -> node * node -> bool + + (* Immediate (post)dominator of a node (~1 if none) *) + val idom : ('n,'e,'g) dominator_tree -> node -> node + + (* Nodes that the node immediately (post)dominates *) + val idoms : ('n,'e,'g) dominator_tree -> node -> node list + + (* Nodes that the node (post)dominates (includes self) *) + val doms : ('n,'e,'g) dominator_tree -> node -> node list + + (* Return the level of a node in the tree *) + val level : ('n,'e,'g) dominator_tree -> node -> int + + (* Return the least common ancestor of a pair of nodes *) + val lca : ('n,'e,'g) dominator_tree -> node * node -> node + + (* The following methods require both the dominator and postdominator trees. + *) + (* Are two nodes control equivalent? *) + val control_equivalent : + ('n,'e,'g) dominator_tree * ('n,'e,'g) postdominator_tree -> + node * node -> bool + + (* Compute the control equivalent partitions of a graph *) + val control_equivalent_partitions : + ('n,'e,'g) dominator_tree * ('n,'e,'g) postdominator_tree -> + node list list + +end + diff --git a/MLRISC/ir-archive/dominator.sml b/MLRISC/ir-archive/dominator.sml new file mode 100644 index 0000000..c5d366a --- /dev/null +++ b/MLRISC/ir-archive/dominator.sml @@ -0,0 +1,391 @@ +(* + * Computation of the dominator tree representation from the + * control flow graph. I'm using the old algorithm by Lengauer and Tarjan. + * + * Note: to deal with CFG with endless loops, + * by default we assume instructions are postdominated by STOP. + * + * -- Allen + *) + +functor DominatorTree (GraphImpl : GRAPH_IMPLEMENTATION + ) : DOMINATOR_TREE = +struct + + structure GI = GraphImpl + structure G = Graph + structure Rev = ReversedGraphView + structure A = Array + structure NodeSet = BitSet + + exception Dominator + + fun singleEntryOf (G.GRAPH g) = + case #entries g () of + [e] => e + | _ => raise Dominator + + type node = G.node_id + + datatype ('n,'e,'g) dom_info = + INFO of + { cfg : ('n,'e,'g) G.graph, + edge_label : string, + levelsMap : int Array.array, + preorder : int Array.array option ref, + postorder : int Array.array option ref, + entryPos : int Array.array option ref, + max_levels : int ref + } + type ('n,'e,'g) dominator_tree = ('n,unit,('n,'e,'g) dom_info) G.graph + type ('n,'e,'g) postdominator_tree = ('n,unit,('n,'e,'g) dom_info) G.graph + + fun graph_info (G.GRAPH dom) : ('n,'e,'g) dom_info = #graph_info dom + + fun cfg(G.GRAPH dom) = let val INFO{cfg,...} = #graph_info dom in cfg end + fun max_levels(G.GRAPH dom) = + let val INFO{max_levels,...} = #graph_info dom in !max_levels end + + (* + * This is the main Lengauer/Tarjan algorithm + *) + fun tarjan_lengauer (name,edge_label) (origCFG,CFG as (G.GRAPH cfg)) = + let val N = #order cfg () + val M = #capacity cfg () + val r = singleEntryOf CFG + val in_edges = #in_edges cfg + val succ = #succ cfg + val dfnum = A.array (M, ~1) + val vertex = A.array (N, ~1) + val parent = A.array (M, ~1) + val bucket = A.array (M, []) : node list array + val semi = A.array (M, r) + val ancestor = A.array (M, ~1) + val idom = A.array (M, r) + val samedom = A.array (M, ~1) + val best = A.array (M, ~1) + val max_levels = ref 0 + val levelsMap = A.array(M,~1000000) + val dom_info = INFO{ cfg = origCFG, + edge_label = edge_label, + levelsMap = levelsMap, + preorder = ref NONE, + postorder = ref NONE, + entryPos = ref NONE, + max_levels = max_levels + } + val Dom as G.GRAPH domtree = GI.graph(name, dom_info, N) + + (* step 1 + * Initialize semi dominators and parent map + *) + fun dfs(p,n,N) = + if A.sub(dfnum,n) = ~1 then + (A.update(dfnum,n,N); + A.update(vertex,N,n); + A.update(parent,n,p); + dfsSucc(n,succ n,N+1) + ) + else N + and dfsSucc(p,[],N) = N + | dfsSucc(p,n::ns,N) = dfsSucc(p,ns,dfs(p,n,N)) + + and dfsAll(n::ns,N) = dfsAll(ns,dfs(~1,n,N)) + | dfsAll([],N) = () + val nonRoots = List.foldr + (fn ((r',_),l) => if r <> r' then r'::l else l) [] + (#nodes cfg ()) + val _ = dfsAll(nonRoots,dfs(~1,r,0)) + + (* + fun pr s = print (s ^ "\n") + fun dumpArray title a = + pr(title ^ ": " ^ + String.concat(A.foldr + (fn (i,s) => Int.toString i::" "::s) [] a)) + + val _ = pr("root = " ^ Int.toString r) + val _ = dumpArray "vertex" vertex + val _ = dumpArray "dfnum" dfnum + val _ = dumpArray "parent" parent + val _ = Msg.printMessages(fn _ => CFG.G.printGraph (!Msg.outStream) cfg) + *) + + fun link(p,n) = (A.update(ancestor,n,p); A.update(best,n,n)) + + fun ancestorWithLowestSemi v = + let val a = A.sub(ancestor,v) + in if a <> ~1 andalso A.sub(ancestor,a) <> ~1 then + let val b = ancestorWithLowestSemi a + in A.update(ancestor,v,A.sub(ancestor,a)); + if A.sub(dfnum,A.sub(semi,b)) < + A.sub(dfnum,A.sub(semi,A.sub(best,v))) then + A.update(best,v,b) + else () + end + else (); + let val u = A.sub(best,v) + in if u = ~1 then v else u + end + end + + (* steps 2 and 3 + * Compute vertex, bucket and semi maps + *) + fun compute 0 = () + | compute i = + let val n = A.sub(vertex,i) + val p = A.sub(parent,n) + fun computeSemi ((v,n,_)::rest,s) = + if v = n then computeSemi(rest,s) + else + let val s' = if A.sub(dfnum,v) < A.sub(dfnum,n) then v + else A.sub(semi,ancestorWithLowestSemi(v)) + val s = if A.sub(dfnum,s') < + A.sub(dfnum,s) then s' + else s + in computeSemi(rest,s) + end + | computeSemi ([], s) = s + in if p <> ~1 then + let val s = computeSemi(in_edges n, p) + in A.update(semi,n,s); + A.update(bucket,s,n::A.sub(bucket,s)); + link(p,n); + app (fn v => + let val y = ancestorWithLowestSemi(v) + in if A.sub(semi,y) = A.sub(semi,v) then + A.update(idom,v,p) else A.update(samedom,v,y) + end) (A.sub(bucket,p)); + A.update(bucket,p,[]) + end else (); + compute(i-1) + end + val _ = compute (N-1) + + (* + val _ = dumpArray "semi" idom + val _ = dumpArray "idom" idom + *) + + (* step 4 update dominators *) + fun updateIdoms i = + if i < N then + let val n = A.sub(vertex, i) + in if A.sub(samedom, n) <> ~1 + then A.update(idom, n, A.sub(idom, A.sub(samedom, n))) + else (); + updateIdoms (i+1) + end + else () + val _ = updateIdoms 1 + + (* + val _ = dumpArray "idom" idom + *) + + (* Create the nodes/edges of the dominator tree *) + fun buildGraph(i,maxLevel) = + if i < N then + let val v = A.sub(vertex,i) + in #add_node domtree (v,#node_info cfg v); + if v <> r then + let val w = A.sub(idom,v) + val l = A.sub(levelsMap,w)+1 + in A.update(levelsMap,v,l); + #add_edge domtree (w,v,()); + buildGraph(i+1,if l >= maxLevel then l else maxLevel) + end + else + (A.update(levelsMap,v,0); + buildGraph(i+1,maxLevel) + ) + end + else maxLevel + + val max = buildGraph(0,1) + in + max_levels := max+1; + #set_entries domtree [r]; + (* Msg.printMessages(fn _ => G.printGraph (!Msg.outStream) domtree); *) + Dom + end + + + (* The algorithm specialized to making dominators and postdominators *) + fun makeDominator cfg = tarjan_lengauer("Dom","dom") (cfg,cfg) + fun makePostdominator cfg = + tarjan_lengauer("PDom","pdom") (cfg,Rev.rev_view cfg) + + (* Methods *) + + (* Does i immediately dominate j? *) + fun immediately_dominates (G.GRAPH D) (i,j) = + case #in_edges D j of + (k,_,_)::_ => i = k + | _ => false + + (* immediate dominator of n *) + fun idom (G.GRAPH D) n = + case #in_edges D n of + (n,_,_)::_ => n + | _ => ~1 + + (* nodes that n immediately dominates *) + fun idoms (G.GRAPH D) = #succ D + + (* nodes that n dominates *) + fun doms (G.GRAPH D) = + let fun subtree ([],S) = S + | subtree (n::ns,S) = subtree(#succ D n,subtree(ns,n::S)) + in fn n => subtree([n], []) + end + + + fun prePostOrders(g as G.GRAPH dom) = + let val INFO{ preorder,postorder,...} = #graph_info dom + (* Compute the preorder/postorder numbers *) + fun computeThem() = + let val N = #capacity dom () + val r = singleEntryOf g + val pre = A.array(N,~1000000) + val post = A.array(N,~1000000) + fun computeNumbering(preorder,postorder,n) = + let val _ = A.update(pre,n,preorder) + val (preorder',postorder') = + computeNumbering'(preorder+1,postorder,#out_edges dom n) + in A.update(post,n,postorder'); + (preorder',postorder'+1) + end + + and computeNumbering'(preorder,postorder,[]) = + (preorder,postorder) + | computeNumbering'(preorder,postorder,(_,n,_)::es) = + let val (preorder',postorder') = + computeNumbering(preorder,postorder,n) + val (preorder',postorder') = + computeNumbering'(preorder',postorder',es) + in (preorder',postorder') + end + in computeNumbering(0,0,r) ; + preorder := SOME pre; + postorder := SOME post; + (pre,post) + end + in case (!preorder,!postorder) of + (SOME pre,SOME post) => (pre,post) + | _ => computeThem() + end + + (* Level *) + fun level (G.GRAPH D) = + let val INFO{levelsMap,...} = #graph_info D + in fn i => A.sub(levelsMap,i) end + + (* Entry position *) + fun entryPos(g as G.GRAPH D) = + let val INFO{entryPos,...} = #graph_info D + in case !entryPos of + SOME t => t + | NONE => + let val entry = singleEntryOf g + val N = #capacity D () + val t = A.array(N, entry) + fun init(X,Y) = + (A.update(t,X,Y); + app (fn Z => init(Z,Y)) (#succ D X) + ) + in entryPos := SOME t; + app (fn Z => init(Z,Z)) (#succ D entry); + t + end + end + + (* Least common ancestor *) + fun lca (Dom as G.GRAPH D) (a,b) = + let val l_a = level Dom a + val l_b = level Dom b + fun idom i = case #in_edges D i of + (j,_,_)::_ => j + | [] => raise Fail "DominatorTree:lca:idom: []" + fun up_a(a,l_a) = if l_a > l_b then up_a(idom a,l_a-1) else a + fun up_b(b,l_b) = if l_b > l_a then up_b(idom b,l_b-1) else b + val a = up_a(a,l_a) + val b = up_b(b,l_b) + fun up_both(a,b) = if a = b then a else up_both(idom a,idom b) + in up_both(a,b) end + + (* is x and ancestor of y in D? + * This is true iff PREORDER(x) <= PREORDER(y) and + * POSTORDER(x) >= POSTORDER(y) + *) + fun dominates Dom = + let val (pre,post) = prePostOrders Dom + in fn (x,y) => + let val a = A.sub(pre,x) + val b = A.sub(post,x) + val c = A.sub(pre,y) + val d = A.sub(post,y) + in a <= c andalso b >= d + end + end + + fun strictly_dominates Dom = + let val (pre,post) = prePostOrders Dom + in fn (x,y) => + let val a = A.sub(pre,x) + val b = A.sub(post,x) + val c = A.sub(pre,y) + val d = A.sub(post,y) + in a < c andalso b > d + end + end + + fun control_equivalent (Dom,PDom) = + let val dom = dominates Dom + val pdom = dominates PDom + in fn (x,y) => dom(x,y) andalso pdom(y,x) orelse dom(y,x) andalso pdom(x,y) + end + + (* control equivalent partitions + * two nodes a and b are control equivalent iff + * a dominates b and b postdominates a (or vice versa) + * We use the following property of dominators to avoid wasteful work: + * If i dom j dom k and j not pdom i then + * k not pdom i + * This algorithm runs in O(n) + *) + fun control_equivalent_partitions (G.GRAPH D,PDom) = + let val postdominates = dominates PDom + fun walkDom([],S) = S + | walkDom(n::waiting,S) = + let val (waiting,S,S') = + findEquiv(n,#out_edges D n,waiting,S,[n]) + in walkDom(waiting,S'::S) + end + and findEquiv(i,[],waiting,S,S') = (waiting,S,S') + | findEquiv(i,(_,j,_)::es,waiting,S,S') = + if postdominates(j,i) then + let val (waiting,S,S') = findEquiv(i,es,waiting,S,j::S') + in findEquiv(i,#out_edges D j,waiting,S,S') + end + else + findEquiv(i,es,j::waiting,S,S') + + val equivSets = walkDom(#entries D (),[]) + in + equivSets + end + + fun levelsMap(G.GRAPH dom) = + let val INFO{levelsMap,...} = #graph_info dom + in levelsMap end + + fun idomsMap(G.GRAPH dom) = + let val idoms = A.array(#capacity dom (),~1) + in #forall_edges dom (fn (i,j,_) => A.update(idoms,j,i)); + idoms + end + +end + diff --git a/MLRISC/ir-archive/e-djgraph.sml b/MLRISC/ir-archive/e-djgraph.sml new file mode 100644 index 0000000..f3d49b0 --- /dev/null +++ b/MLRISC/ir-archive/e-djgraph.sml @@ -0,0 +1,198 @@ +(* + * This is my E-compressed DJ-graph data structure + * --Allen + *) + +functor E_DJGraph (Dom : DOMINATOR_TREE) : DJ_GRAPH = +struct + + structure G = Graph + structure Dom = Dom + structure A = Array + + fun error msg = MLRiscErrorMsg.error("E-DJGraph",msg) + + val stats = false (* collect statistics? *) + val visitCount = MLRiscControl.getCounter "dj-visit-count" + val idfCount = MLRiscControl.getCounter "dj-IDF-count" + val idfSize = MLRiscControl.getCounter "dj-IDF-size" + val liveVisitCount = MLRiscControl.getCounter "dj-live-visit-count" + + datatype tree = NODE of int * tree list + + datatype ('n,'e,'g) dj_graph = + DJGRAPH of + { dom : ('n,'e,'g) Dom.dominator_tree, + trees : tree option A.array, + jedges : int list A.array + } + + fun DJ(Dom as G.GRAPH dom) = + let val G.GRAPH cfg = Dom.cfg Dom + val L = Dom.max_levels Dom + val N = #capacity dom () + val levelsMap = Dom.levelsMap Dom + val rank_J = A.array(N, 0) + val trees = A.array(N, NONE) + val jedges = A.array(N, []) + val buckets = A.array(L, []) + + fun ExitTrees a = + let fun foreachDedge [] = () + | foreachDedge((_,b,_)::es) = (ExitTrees b; foreachDedge es) + val _ = foreachDedge (#out_edges dom a) + val lvl_a = A.sub(levelsMap, a) + fun foreachJedge([], rank) = A.update(rank_J,a,rank) + | foreachJedge((a,b,_)::es, rank) = + let val lvl_b = A.sub(levelsMap, b) + in if lvl_b <= lvl_a then + foreachJedge(es, if lvl_b < rank then lvl_b else rank) + else + foreachJedge(es, rank) + end + val _ = foreachJedge (#out_edges cfg a, L+1) + fun buildTree([], succ) = NODE(a,succ) + | buildTree((_,b,_)::es, succ) = + (case A.sub(trees, b) of + NONE => buildTree(es, succ) + | SOME t => buildTree(es, t::succ) + ) + val t_a = buildTree(#out_edges dom a, []) + in A.update(trees, a, pruneTree(A.sub(levelsMap, a), t_a)) + end + + and pruneTree(lvl_a, NODE(x,succ)) = + let fun foreachSucc([], subtrees) = subtrees + | foreachSucc(t::ts, subtrees) = + foreachSucc(ts, + case pruneTree(lvl_a, t) of + NONE => subtrees + | SOME t => t::subtrees + ) + val subtrees = foreachSucc(succ, []) + in case (A.sub(rank_J,x) <= lvl_a, subtrees) of + (false,[]) => NONE + | (false,[t]) => SOME t + | (_,ts) => SOME(NODE(x,ts)) + end + + fun fillJedges l = + if l < 0 then () else + let fun fill [] = () + | fill ((a,b)::es) = + (A.update(jedges, a, b::A.sub(jedges, a)); fill es) + in fill(A.sub(buckets, l)); + fillJedges(l-1) + end + + val [ENTRY] = #entries dom () + in ExitTrees ENTRY; + fillJedges(L-1); + DJGRAPH{dom=Dom, trees=trees, jedges=jedges} + end + + (* Compute dominance frontier *) + fun DF _ = error "DF" + + (* Compute iterated dominance frontier *) + fun IDFs _ = error "IDFs" + + (* Compute iterated dominance frontier with liveness *) + fun LiveIDFs(DJGRAPH{dom=Dom as G.GRAPH dom, jedges, trees}) = + let val G.GRAPH cfg = Dom.cfg Dom + val L = Dom.max_levels Dom + val N = #capacity dom () + val levels = Dom.levelsMap Dom + val in_phi = A.array(N,0) (* has appeared in the DF set? *) + val liveIn = A.array(N,0) + val stamp = ref 0 + fun new_stamp() = let val s = !stamp + 1 in stamp := s; s end + + val in_alpha = A.array(N,0) (* has appeared in N_alpha? *) + val visited = A.array(N,0) (* has it been visited *) + val piggybank = A.array(L,[]) (* nodes in the piggy bank *) + + fun LiveIDFs{defs=xs, localLiveIn=[]} = [] + | LiveIDFs{defs=xs, localLiveIn} = + let val stamp = new_stamp() + val _ = if stats then idfCount := !idfCount + 1 else () + fun init([],l) = l + | init(x::xs,l) = + let val l_x = A.sub(levels,x) + in A.update(in_alpha,x,stamp); + A.update(piggybank,l_x,x::A.sub(piggybank,l_x)); + init(xs,if l < l_x then l_x else l) + end + + fun markLiveIn(b) = + let fun markPred [] = () + | markPred((j,_,_)::es) = + (if A.sub(liveIn,j) <> stamp andalso + A.sub(in_alpha,j) <> stamp then + markLiveIn j + else (); + markPred es + ) + in A.update(liveIn,b,stamp); + if stats then liveVisitCount := !liveVisitCount + 1 else (); + markPred(#in_edges cfg b) + end + + fun initLiveIn [] = () + | initLiveIn(x::xs) = (markLiveIn x; initLiveIn xs) + + fun isLive b = A.sub(liveIn,b) = stamp + + fun visit(x,S) = + case A.sub(trees,x) of + NONE => S + | SOME t => walk(t,A.sub(levels,x),S) + + and walk(NODE(y,succ_y),level_x,S) = + if A.sub(visited,y) <> stamp then + let val _ = A.update(visited,y,stamp) + fun foreachJedge([],S) = S + | foreachJedge(z::zs,S) = + let val level_z = A.sub(levels,z) + in if level_z <= level_x then + if isLive z andalso A.sub(in_phi,z) <> stamp + (* z is a new IDF^+ candidate; + * make sure it is live. + *) + then (A.update(in_phi,z,stamp); + if A.sub(in_alpha,z) <> stamp + then A.update(piggybank,level_z, + z::A.sub(piggybank,level_z)) + else (); + foreachJedge(zs,z::S) + ) + else foreachJedge(zs,S) + else S + end + fun foreachEedge([], S) = S + | foreachEedge((t as NODE(z,_))::ts,S) = + foreachEedge(ts,if isLive z then walk(t,level_x,S) else S) + + val _ = if stats then visitCount := !visitCount + 1 else (); + in foreachEedge(succ_y, foreachJedge(A.sub(jedges, y),S)) + end + else S + + fun visitAll(~1,S) = S + | visitAll(l,S) = + case A.sub(piggybank,l) of + [] => visitAll(l-1,S) + | x::xs => (A.update(piggybank,l,xs); + visitAll(l,visit(x,S))) + + val L = init(xs,~1) + val _ = initLiveIn localLiveIn + val IDF = visitAll(L,[]) + in if stats then idfSize := !idfSize + length IDF else (); + IDF + end + + in LiveIDFs + end + +end diff --git a/MLRISC/ir-archive/fast-path.sig b/MLRISC/ir-archive/fast-path.sig new file mode 100644 index 0000000..bff50f4 --- /dev/null +++ b/MLRISC/ir-archive/fast-path.sig @@ -0,0 +1,30 @@ +(* + * This module implements Tarjan's fast path computation algorithm. + * + * -- Allen + *) + +signature TARJAN_FAST_PATH = +sig + + structure Dom : DOMINATOR_TREE + + (* path expression *) + datatype 'e pexp = + NUL + | EMP + | EDGE of 'e Graph.edge + | || of 'e pexp * 'e pexp + | ++ of 'e pexp * 'e pexp + + (* path sequence *) + type 'e pseq = ('e pexp * Graph.node_id * Graph.node_id) list + + (* Given a path sequence and a source node $s$, solve for the path of $s$ *) + val solve : 'e pseq -> Graph.node_id -> 'e pexp + + (* decompose a graph into a path sequence *) + val eliminate : ('n,'e,'g) Graph.graph -> 'e pseq + +end + diff --git a/MLRISC/ir-archive/fast-path.sml b/MLRISC/ir-archive/fast-path.sml new file mode 100644 index 0000000..cd2594f --- /dev/null +++ b/MLRISC/ir-archive/fast-path.sml @@ -0,0 +1,46 @@ +(* + * This module implements Tarjan's fast path computation algorithm. + * + * -- Allen + *) + +functor TarjanFastPath(Dom : DOMINATOR_TREE) : TARJAN_FAST_PATH = +struct + + structure Dom = Dom + structure G = Graph + structure A = Array + + datatype 'e pexp = + NULLSET + | LAMBDA + | EDGE of 'e Graph.edge + | || of 'e pexp * 'e pexp + | ++ of 'e pexp * 'e pexp + + type 'e pseq = ('e pexp * int * int) list + + infix || ++ + + fun simp (NUL || x) = x + | simp (x || NUL) = x + | simp (NUL ++ x) = NUL + | simp (x ++ NUL) = NUL + | simp (EMP ++ x) = x + | simp x = x + + fun solve P s = + + fun decompose_and_sequence (G as G.GRAPH G,Dom as G.GRAPH dom) = + let val N = #capacity dom () + val ancestor = A.array(N,~1) + val derived = A.array(N,~1) + val S = A.array(N,LAMBDA) + val R = A.array(N,NULLSET) + fun eliminate (G.GRAPH G) = + + fun walk(u,lvl,sequence) = + in + end + +end diff --git a/MLRISC/ir-archive/idefs2.sig b/MLRISC/ir-archive/idefs2.sig new file mode 100644 index 0000000..05777c5 --- /dev/null +++ b/MLRISC/ir-archive/idefs2.sig @@ -0,0 +1,25 @@ +(* + * This is Reif and Tarjan's algorithm (SIAM J Computing 1981) + * for computing approximate birthpoints for expressions. + * For each basic block B, + * idef(x) = { defs(v_i) | i = 1 ... n in all paths + * idom(x) v_1 v_2 ... v_n x where n >= 1 and + * v_i <> idom(x) for all 1 <= i <= n + * } + * -- Allen + *) +signature IDEFS = +sig + + type var = int + + val compute_idefs : + {def_use : 'n Graph.node -> var list * var list, + cfg : ('n,'e,'g) Graph.graph + } -> + { idefuse : unit -> (int list * int list) Array.array, + ipostdefuse : unit -> (int list * int list) Array.array + } + +end + diff --git a/MLRISC/ir-archive/idefs2.sml b/MLRISC/ir-archive/idefs2.sml new file mode 100644 index 0000000..1e5c64d --- /dev/null +++ b/MLRISC/ir-archive/idefs2.sml @@ -0,0 +1,166 @@ +(* + * This is Reif and Tarjan's algorithm (SIAM J Computing 1981) + * for computing approximate birthpoints for expressions. + * For each basic block B, + * idef(x) = { defs(v_i) | i = 1 ... n in all paths + * idom(x) v_1 v_2 ... v_n x where n >= 1 and + * v_i <> idom(x) for all 1 <= i <= n + * } + * -- Allen + *) + +structure IDefs : IDEFS = +struct + + structure G = Graph + structure SL = SortedList + structure A = Array + structure Rev = ReversedGraphView + + type var = int + + fun compute_idefs {def_use, cfg} = + let val CFG as G.GRAPH cfg = cfg + val N = #capacity cfg () + val DU = A.array(N,([],[])) + val _ = #forall_nodes cfg + (fn (b,b') => let val (d,u) = def_use(b,b') + in A.update(DU,b,(SL.uniq d,SL.uniq u)) + end) + fun dump(name,a) = + (print(name^"="); + A.appi (fn (i,v) => + print(Int.toString i ^ "=" ^Int.toString v^" ")) + (a,0,NONE); + print "\n") + + fun tarjan_lengauer(G.GRAPH cfg) = + let val [ENTRY] = #entries cfg () + val vertex = A.array(N,~1) + val parent = A.array(N,~1) + val semi = A.array(N,~1) + val bucket = A.array(N,[]) + val dom = A.array(N,~1) + val sdefuse = A.array(N,([],[])) + val idefuse = A.array(N,([],[])) + val ancestor = A.array(N,~1) + val treeparent = A.array(N,~1) + val label = A.array(N,~1) + fun dfs(p,n,i) = + if A.sub(semi,i) <> ~1 then n + else + (A.update(parent,i,p); + A.update(semi,i,n); + A.update(vertex,n,i); + A.update(label,i,i); + dfs'(i,n+1,#succ cfg i) + ) + and dfs'(p,n,[]) = n + | dfs'(p,n,i::is) = dfs'(p,dfs(p,n,i),is) + val n = dfs(~1,0,ENTRY) + + fun COMPRESS v = + if A.sub(ancestor,A.sub(ancestor,v)) <> ~1 then + (COMPRESS(A.sub(ancestor,v)); + let val label_ancestor_v = A.sub(label,A.sub(ancestor,v)) + val label_v = A.sub(label,v) + in if A.sub(semi,label_ancestor_v) < + A.sub(semi,label_v) then + A.update(label,v,label_ancestor_v) + else () + end; + A.update(ancestor,v,A.sub(ancestor,A.sub(ancestor,v))) + ) + else () + + fun LINK(v,w) = (A.update(ancestor,w,v); + A.update(treeparent,w,v)) + fun EVAL v = + if A.sub(ancestor,v) = ~1 then v + else (COMPRESS v; A.sub(label,v)) + fun EVALDEFUSE v = + let fun up(v,D,U) = + let val p = A.sub(treeparent,v) + in if p = ~1 then (D,U) + else let val (d,u) = A.sub(DU,v) + val (d',u') = A.sub(sdefuse,v) + in up(p,SL.merge(d,SL.merge(d',D)), + SL.merge(u,SL.merge(u',U))) + end + end + in + up(v,[],[]) + end + fun step2_3 0 = () + | step2_3 i = + let val w = A.sub(vertex,i) + val parent_w = A.sub(parent,w) + fun step2 [] = () + | step2 ((v,_,_)::vs) = + let val u = EVAL v + val semi_u = A.sub(semi,u) + in if semi_u < A.sub(semi,w) then + A.update(semi,w,semi_u) + else (); + let val (d,u) = EVALDEFUSE v + val (d',u') = A.sub(sdefuse,w) + in A.update(sdefuse,w,(SL.merge(d,d'), + SL.merge(u,u'))) + end; + step2 vs + end + val _ = step2(#in_edges cfg w) + val vertex_semi_w = A.sub(vertex,A.sub(semi,w)) + val _ = A.update(bucket,vertex_semi_w, + w::A.sub(bucket,vertex_semi_w)) + val _ = LINK(parent_w,w) + fun step3 [] = () + | step3 (v::vs) = + let val u = EVAL v + in A.update(dom,v,if A.sub(semi,u) < A.sub(semi,v) + then u else parent_w); + let val (d,u) = A.sub(sdefuse,v) + val (d',u') = EVALDEFUSE(A.sub(parent,v)) + in A.update(idefuse,v,(SL.merge(d,d'), + SL.merge(u,u'))) + end; + step3 vs + end + val _ = step3 (A.sub(bucket,parent_w)) + val _ = A.update(bucket,parent_w,[]) + in step2_3(i-1) + end + val _ = step2_3(n-1) + (* + val _ = print("n = "^Int.toString n^"\n") + val _ = dump("vertex",vertex) + val _ = dump("parent",parent) + val _ = dump("semi",semi) + val _ = dump("dom",dom) + val _ = dump("ancestor",ancestor) + val _ = dump("label",label) + *) + fun step4 i = + if i = n then () + else let val w = A.sub(vertex,i) + in if A.sub(dom,w) <> A.sub(vertex,A.sub(semi,w)) then + let val (d,u) = A.sub(idefuse,A.sub(dom,w)) + val (d',u') = A.sub(idefuse,w) + in A.update(idefuse,w,(SL.merge(d,d'), + SL.merge(u,u'))); + A.update(dom,w,A.sub(dom,A.sub(dom,w))) + end + else (); + step4(i+1) + end + val _ = step4 1 + in idefuse + end + in + {idefuse = fn _ => tarjan_lengauer(CFG), + ipostdefuse = fn _ => tarjan_lengauer(Rev.rev_view CFG) + } + end + +end + diff --git a/MLRISC/ir-archive/k-djgraph.sml b/MLRISC/ir-archive/k-djgraph.sml new file mode 100644 index 0000000..0e3a7fb --- /dev/null +++ b/MLRISC/ir-archive/k-djgraph.sml @@ -0,0 +1,344 @@ +(* + * The algorithm for computing iterated dominance + * frontier is my own algorithm which uses the $k$-compressed DJ-graph, + * which is a variant of DJ-graph due to Sreedhar, Gao and Lee. Here, + * I've set k=2. The algorithm using $k$-compressed DJ-graph is significantly + * faster than the DJ-graph version when |DF(x)| <= k. + * + * The write up will be in my thesis. + * + * --Allen + *) + +functor K_DJGraph (Dom : DOMINATOR_TREE) : DJ_GRAPH = +struct + + structure G = Graph + structure Dom = Dom + structure A = Array + + type ('n,'e,'g) dj_graph = ('n,'e,'g) Dom.dominator_tree + + fun error msg = MLRiscErrorMsg.error("K_DJGraph",msg) + + val stats = true (* collect statistics? *) + val levelPrune = true + val domPrune = true + val pathPrune = true + val visitCount = MLRiscControl.getCounter "dj-visit-count" + val liveVisitCount = MLRiscControl.getCounter "dj-live-visit-count" + val debug = true + val K_max = 2 + + fun DJ x = x + + (* Compute dominance frontier *) + fun DF (D as G.GRAPH dom) = + let val G.GRAPH cfg = Dom.cfg D + val L = Dom.max_levels D + val N = #capacity dom () + val levels = Dom.levelsMap D + val in_DF = A.array(N,0) (* has appeared in the DF set? *) + val stamp = ref 0 + fun new_stamp() = let val s = !stamp + 1 in stamp := s; s end + + fun unmarked(marked,i,stamp : int) = + let val s = A.sub(marked,i) + in if s = stamp then false else (A.update(marked,i,stamp); true) + end + + (* + * Compute the dominance frontiers of a node + * Dominance frontier of x: + * The set of all nodes y such that x dominates a predecessor + * of y but x doesn't strictly dominates y. + *) + fun DF x = + let val stamp = new_stamp() + val level_x = A.sub(levels,x) + fun walk(z, S) = + let fun scan((_,y,_)::es,S) = + if A.sub(levels,y) <= level_x andalso + unmarked(in_DF,y,stamp) then scan(es,y::S) + else scan(es,S) + | scan([],S) = S + val S = scan(#out_edges cfg z,S) + fun walkList([],S) = S + | walkList((_,z,_)::es,S) = walkList(es,walk(z,S)) + in walkList(#out_edges dom z,S) + end + in walk(x,[]) + end + + in DF end + + (* Compute iterated dominance frontier *) + fun IDFs (D as G.GRAPH dom) = + let val G.GRAPH cfg = Dom.cfg D + val L = Dom.max_levels D + val N = #capacity dom () + val levels = Dom.levelsMap D + val in_DF = A.array(N,0) (* has appeared in the DF set? *) + val stamp = ref 0 + fun new_stamp() = let val s = !stamp + 1 in stamp := s; s end + + fun unmarked(marked,i,stamp : int) = + let val s = A.sub(marked,i) + in if s = stamp then false else (A.update(marked,i,stamp); true) + end + + val in_alpha = A.array(N,0) (* has appeared in N_alpha? *) + val visited = A.array(N,0) (* has it been visited *) + val piggybank = A.array(L,[]) (* nodes in the piggy bank *) + + (* + * This algorithm is described in POPL 95 + *) + fun IDFs xs = + let val stamp = new_stamp() + fun init([],l) = l + | init(x::xs,l) = + let val l_x = A.sub(levels,x) + in A.update(in_alpha,x,stamp); + A.update(piggybank,l_x,x::A.sub(piggybank,l_x)); + init(xs,if l < l_x then l_x else l) + end + fun visit(y,level_x,S) = + let fun scan([],S) = S + | scan((_,z,_)::es,S) = + let val level_z = A.sub(levels,z) + in if level_z <= level_x andalso unmarked(in_DF,z,stamp) + then (if A.sub(in_alpha,z) <> stamp + then A.update(piggybank,level_z, + z::A.sub(piggybank,level_z)) + else (); + scan(es,z::S)) + else scan(es,S) + end + fun visitSucc([],S) = S + | visitSucc((_,z,_)::es,S) = + visitSucc(es,if unmarked(visited,z,stamp) + then visit(z,level_x,S) else S) + val S = scan(#out_edges cfg y,S) + in visitSucc(#out_edges dom y,S) + end + + fun visitAll(~1,S) = S + | visitAll(l,S) = + case A.sub(piggybank,l) of + [] => visitAll(l-1,S) + | x::xs => (A.update(visited,x,stamp); + A.update(piggybank,l,xs); + visitAll(l,visit(x,A.sub(levels,x),S))) + + val L = init(xs,~1) + in visitAll(L,[]) + end + + in IDFs + end + + + (* Compute iterated dominance frontier intersected with liveness. + * This is my special algorithm! The idea is that when we find a + * new node b in IDF^+(S) we first check whether b is liveIn. If not, + * we can prune the search right there. If so, we continue as normal. + * Checking whether something is liveIn triggers the incremental liveness + * routine. + * + * -- Allen + *) + datatype kind = JOIN | DOM + + fun LiveIDFs(D as G.GRAPH dom) = + let val G.GRAPH cfg = Dom.cfg D + val L = Dom.max_levels D + val N = #capacity dom () + val levels = Dom.levelsMap D + + val in_phi = A.array(N,0) (* has appeared in the DF set? *) + val stamp = ref 0 + fun new_stamp() = let val s = !stamp + 2 in stamp := s; s end + + val in_alpha = A.array(N,0) (* has appeared in N_alpha? *) + val piggybank = A.array(L,[]) (* nodes in the piggy bank *) + val minJLevels = A.array(N,10000000) + val djGraph = A.array(N,[]) (* path compressed dj graph *) + val liveIn = A.array(N,0) (* is a variable live in *) + val visited = A.array(N,0) + val strictly_dominates = Dom.dominates D + + val K_inf = 255 + + fun compressDJGraph(X, lvl) = + let val nextLvl = lvl + 1 + val stamp = ~X + + (* merge join list, make sure there are no duplicates *) + fun mergeJoin(Z, E, n) = + if A.sub(visited, Z) = stamp orelse + A.sub(levels, Z) >= lvl then (E, n) + else (A.update(visited, Z, stamp); + (Z::E, n+1)) + + fun mergeJoins([], E, n) = (E, n) + | mergeJoins(Z::Zs, E, n) = + let val (E, n) = mergeJoin(Z, E, n) + in mergeJoins(Zs, E, n) + end + + fun appendJoins([], E) = E + | appendJoins(Z::Zs, E) = appendJoins(Zs, (JOIN,Z)::E) + + fun collapse([], DJ_X) = DJ_X + | collapse((e as (DOM,_))::Zs, DJ_X) = collapse(Zs, e::DJ_X) + | collapse((e as (JOIN,Z))::Zs, DJ_X) = + if A.sub(levels, Z) <= lvl then collapse(Zs, e::DJ_X) + else collapse(Zs, DJ_X) + + (* L_X -- min level of all join edges in SubTree(X) + * DJ_X -- all dj-graph edges of X + * E_X -- all J-edges in SubTree(X) to level < lvl. + * K_X -- |E_X| + *) + fun walkDomSucc([], L_X, DJ_X, E_X, K_X) = (L_X, DJ_X, E_X, K_X) + | walkDomSucc((_,Y,_)::es, L_X, DJ_X, E_X, K_X) = + let val (L_Y, E_Y, K_Y) = compressDJGraph(Y, nextLvl) + val L_X = Int.min(L_X, L_Y) + in if pathPrune then + if L_Y >= nextLvl then + (* disconnect dom edge! *) + walkDomSucc(es, L_X, DJ_X, E_X, K_X) + else if K_Y <= K_max then + (* path compress! *) + let val (E_X, K_X) = mergeJoins(E_Y, E_X, K_X) + in walkDomSucc(es, L_X, appendJoins(E_Y, DJ_X), E_X, K_X) + end + else + let val Zs = A.sub(djGraph, Y) + in if length Zs <= K_max then + walkDomSucc(es, L_X, collapse(Zs,DJ_X), [], K_inf) + else + walkDomSucc(es, L_X, (DOM,Y)::DJ_X, [], K_inf) + end + else + walkDomSucc(es, L_X, (DOM,Y)::DJ_X, [], K_inf) + end + fun walkCFGSucc([], L_X, DJ_X, E_X, K_X) = (L_X, DJ_X, E_X, K_X) + | walkCFGSucc((_,Y,_)::es, L_X, DJ_X, E_X, K_X) = + let val L_X = Int.min(L_X, A.sub(levels, Y)) + val (E_X, K_X) = mergeJoin(Y, E_X, K_X) + in walkCFGSucc(es, L_X, (JOIN,Y)::DJ_X, E_X, K_X) + end + + val (L_X, DJ_X, E_X, K_X) = + walkDomSucc(#out_edges dom X, 10000000, [], [], 0) + val (L_X, DJ_X, E_X, K_X) = + walkCFGSucc(#out_edges cfg X, L_X, DJ_X, E_X, K_X) + + in A.update(minJLevels, X, L_X); + A.update(djGraph, X, DJ_X); + (L_X, E_X, K_X) + end + + val [ENTRY] = #entries dom () + val _ = compressDJGraph(ENTRY, 0) + + + fun LiveIDFs {defs, localLiveIn=[]} = [] (* special case *) + | LiveIDFs {defs=xs, localLiveIn} = + let val stamp = new_stamp() + (* val n = ref 0 + val m = ref 0 *) + + fun initDefs([],maxLvl) = maxLvl + | initDefs(x::xs,maxLvl) = + let val lvl_x = A.sub(levels,x) + in A.update(in_alpha,x,stamp); + A.update(piggybank,lvl_x,x::A.sub(piggybank,lvl_x)); + initDefs(xs,if maxLvl < lvl_x then lvl_x else maxLvl) + end + + fun markLiveIn(b) = + let fun markPred [] = () + | markPred((j,_,_)::es) = + (if A.sub(liveIn,j) <> stamp andalso + A.sub(in_alpha,j) <> stamp then + markLiveIn j + else (); + markPred es + ) + in (* m := !m + 1; *) + A.update(liveIn,b,stamp); + if stats then liveVisitCount := !liveVisitCount + 1 else (); + markPred(#in_edges cfg b) + end + + fun initLiveIn [] = () + | initLiveIn(x::xs) = (markLiveIn x; initLiveIn xs) + + fun isLive b = A.sub(liveIn,b) = stamp + + fun visit(y,level_x,S) = + let fun foreach([],S) = S + | foreach((JOIN,z)::zs,S) = + let val level_z = A.sub(levels,z) + in if level_z <= level_x andalso + A.sub(in_phi,z) <> stamp andalso + isLive z + (* z is a new IDF^+ candidate; + * make sure it is live. + *) + then (A.update(in_phi,z,stamp); + if A.sub(in_alpha,z) <> stamp + then A.update(piggybank,level_z, + z::A.sub(piggybank,level_z)) + else (); + foreach(zs,z::S) + ) + else foreach(zs,S) + end + | foreach((DOM,z)::zs,S) = + foreach(zs,if isLive z andalso + A.sub(visited,z) <> stamp andalso + (not levelPrune orelse + A.sub(minJLevels,z) <= level_x) + then (A.update(visited,z,stamp); + visit(z,level_x,S) + ) + else S) + in if stats then visitCount := !visitCount + 1 else (); + foreach(A.sub(djGraph, y),S) + end + + fun visitAll(~1,S) = S + | visitAll(l,S) = + case A.sub(piggybank,l) of + [] => visitAll(l-1,S) + | x::xs => + let val _ = A.update(piggybank,l,xs) + val _ = A.update(visited,x,stamp); + val S = visit(x, A.sub(levels, x), S) + in + visitAll(l,S) + end + + fun domTest([x],uses) = + let fun loop [] = true + | loop(y::ys) = strictly_dominates(x,y) andalso loop ys + in loop uses end + | domTest _ = false + + in if domPrune andalso domTest(xs,localLiveIn) then [] + else + let val L = initDefs(xs, ~1) + in initLiveIn(localLiveIn); + visitAll(L, []) + end + end + + in LiveIDFs + end + +end + diff --git a/MLRISC/ir-archive/l-djgraph.sml b/MLRISC/ir-archive/l-djgraph.sml new file mode 100644 index 0000000..1624119 --- /dev/null +++ b/MLRISC/ir-archive/l-djgraph.sml @@ -0,0 +1,259 @@ +(* + * This is my L-compressed DJ-graph data structure for optimal SSA + * construction. For the description of this algorithm, see: + * http://www.cs.nyu.edu/leunga/my-papers/linear-ssa.ps + * --Allen + *) + +functor L_DJGraph (Dom : DOMINATOR_TREE) : DJ_GRAPH = +struct + + structure G = Graph + structure Dom = Dom + structure A = Array + + datatype exit_tree = + NODE of { name : int, + jedges : int list, + eedges : exit_tree list, + minVisitedLevel : int ref, + visited : int ref + } + + datatype ('n,'e,'g) dj_graph = + DJGRAPH of + { dom : ('n,'e,'g) Dom.dominator_tree, + trees : exit_tree list A.array, + stamp : int ref + } + + fun error msg = MLRiscErrorMsg.error("L-DJGraph",msg) + + val stats = false (* collect statistics? *) + val visitCount = MLRiscControl.getCounter "dj-visit-count" + val idfCount = MLRiscControl.getCounter "dj-IDF-count" + val idfSize = MLRiscControl.getCounter "dj-IDF-size" + val liveVisitCount = MLRiscControl.getCounter "dj-live-visit-count" + val debug = true + + fun DJ(Dom as G.GRAPH dom) = + let val G.GRAPH cfg = Dom.cfg Dom + val N = #capacity dom () + val [ENTRY] = #entries dom () + val levelsMap = Dom.levelsMap Dom + val L = Dom.max_levels Dom + val trees = A.array(N, []) + val levels' = A.array(L, ~1) + val exitLevels' = A.array(L, ~1) + val T = A.array(L, []) + val J = A.array(L, []) + + fun LTrees a = + let (* recurse *) + fun foreachDedge([]) = () + | foreachDedge((_,b,_)::es) = (LTrees b; foreachDedge es) + + val _ = foreachDedge(#out_edges dom a) + + val lvl_a = A.sub(levelsMap, a) + + (* partition J-edges *) + fun foreachJedge([], levels) = levels + | foreachJedge((_,b,_)::es, levels) = + let val lvl_b = A.sub(levelsMap, b) + in if lvl_b > lvl_a then (* non-J-edge *) + foreachJedge(es, levels) + else + let val _ = + if A.sub(exitLevels',lvl_b) = a then + A.update(J, lvl_b, b::A.sub(J, lvl_b)) + else + (A.update(exitLevels',lvl_b,a); + A.update(J, lvl_b, [b]) + ) + val levels = + if A.sub(levels',lvl_b) = a then + levels + else + (A.update(T, lvl_b, []); + A.update(levels',lvl_b,a); + lvl_b::levels + ) + in foreachJedge(es, levels) + end + end + + val levels = foreachJedge(#out_edges cfg a, []) + + (* partition subtrees *) + fun foreachDedge([], levels) = levels + | foreachDedge((_,b,_)::es, levels) = + let fun foreachTree([], levels) = levels + | foreachTree((l,t)::ts, levels) = + let val levels = + if l <= lvl_a then + if A.sub(levels',l) = a then + (A.update(T, l, t::A.sub(T, l)); levels) + else (A.update(levels',l,a); + A.update(T, l, [t]); + l::levels) + else levels + in foreachTree(ts, levels) end + val levels = foreachTree(A.sub(trees, b), levels) + in foreachDedge(es, levels) + end + val levels = foreachDedge(#out_edges dom a, levels) + + (* Build Trees(a) *) + fun buildTrees([], trees_a) = trees_a + | buildTrees(l::levels, trees_a) = + let fun makeNode(succs) = + let val jedges = if A.sub(exitLevels',l) = a then + A.sub(J,l) else [] + in (l,NODE{name=a, jedges=jedges, eedges=succs, + minVisitedLevel=ref 0, visited=ref 0}) + end + val T_l = + case A.sub(T,l) of + [] => makeNode [] + | ts as [t] => if A.sub(exitLevels',l) <> a then (l,t) + else makeNode ts + | ts => makeNode ts + in buildTrees(levels, T_l::trees_a) + end + val trees_a = buildTrees(levels, []) + in A.update(trees, a, trees_a) + end + + fun bucketSort(trees) = + let val buckets = A.array(L, []) + val _ = + #forall_nodes dom + (fn (a,_) => + let fun insert [] = () + | insert((l,t)::ts) = + (A.update(buckets,l,(a,t)::A.sub(buckets,l)); insert ts) + in insert(A.sub(trees,a)) + end) + val trees = A.array(N, []) + fun collect(l) = + if l >= L then () + else let fun dist [] = () + | dist((a,t)::ts) = + (A.update(trees,a,t::A.sub(trees,a)); dist ts) + in dist(A.sub(buckets, l)); + collect(l+1) + end + in collect 0; + trees + end + + val _ = LTrees ENTRY (* build Trees *) + val trees = bucketSort(trees) (* sort trees *) + in DJGRAPH{dom=Dom, trees=trees, stamp=ref 0} + end + + (* Compute dominance frontier *) + fun DF _ = error "DF" + fun IDFs _ = error "IDFs" + + fun LiveIDFs (DJGRAPH{trees, dom, stamp, ...}) = + let val G.GRAPH cfg = Dom.cfg dom + val levelsMap = Dom.levelsMap dom + val N = #capacity cfg () + val inphi = A.array(N, 0) + val inalpha = A.array(N, 0) + val live = A.array(N, 0) + fun newStamp() = + let val s = Word.toIntX(Word.fromInt(!stamp) + 0w1) + in stamp := s; s + end + fun LiveIDFs{defs, localLiveIn=[]} = [] + | LiveIDFs{defs, localLiveIn} = + let val stamp = newStamp() + + fun initDefs([]) = () + | initDefs(x::xs) = (A.update(inalpha, x, stamp); initDefs xs) + + fun markLiveIn(b) = + let fun markPred [] = () + | markPred((j,_,_)::es) = + (if A.sub(live,j) <> stamp andalso + A.sub(inalpha,j) <> stamp then + markLiveIn j + else (); + markPred es + ) + in (* m := !m + 1; *) + A.update(live,b,stamp); + if stats then liveVisitCount := !liveVisitCount + 1 else (); + markPred(#in_edges cfg b) + end + + fun initLiveIn [] = () + | initLiveIn(x::xs) = (markLiveIn x; initLiveIn xs) + + fun isLive b = A.sub(live, b) = stamp + + fun unmarked(X,b) = A.sub(X,b) <> stamp + fun mark(X,b) = A.update(X,b,stamp) + + fun visit(x, queue, IDF) = + let val level_x = A.sub(levelsMap, x) + + fun walk(NODE{name=y, jedges, eedges, visited, minVisitedLevel}, + queue, IDF) = + let fun foreachJedge([], queue, IDF) = (queue, IDF) + | foreachJedge(z::es, queue, IDF) = + if isLive z andalso unmarked(inphi,z) + then (mark(inphi, z); + foreachJedge + (es, + if unmarked(inalpha,z) then z::queue else queue, + z::IDF) + ) + else foreachJedge(es, queue, IDF) + fun foreachEEdge([], queue, IDF) = (queue, IDF) + | foreachEEdge((z as NODE{name=z',...})::es, queue, IDF) = + if isLive z' then + let val (queue, IDF) = walk(z, queue, IDF) + in foreachEEdge(es, queue, IDF) end + else + foreachEEdge(es, queue, IDF) + in if !visited = stamp then (* visited before *) + (minVisitedLevel := Int.min(!minVisitedLevel, level_x); + (queue, IDF) + ) + else + (minVisitedLevel := level_x; (* non-visited *) + visited := stamp; + let val (queue, IDF) = foreachJedge(jedges, queue, IDF) + in foreachEEdge(eedges, queue, IDF) end + ) + end + + fun ancestorHasBeenProcessed + (NODE{visited, minVisitedLevel, ...}) = + !visited = stamp andalso !minVisitedLevel < level_x + + fun foreachTree([], queue, IDF) = (queue, IDF) + | foreachTree(t::ts, queue, IDF) = + if ancestorHasBeenProcessed t then (queue, IDF) + else let val (queue, IDF) = walk(t, queue, IDF) + in foreachTree(ts, queue, IDF) end + in foreachTree(A.sub(trees, x), queue, IDF) + end + + fun visitAll([], IDF) = IDF + | visitAll(x::queue, IDF) = + let val (queue, IDF) = visit(x, queue, IDF) + in visitAll(queue, IDF) end + + in initDefs defs; + initLiveIn localLiveIn; + visitAll(defs, []) + end + in LiveIDFs + end + +end diff --git a/MLRISC/ir-archive/loop-structure.sig b/MLRISC/ir-archive/loop-structure.sig new file mode 100644 index 0000000..dabc328 --- /dev/null +++ b/MLRISC/ir-archive/loop-structure.sig @@ -0,0 +1,57 @@ +(* loop-structure.sig + * + * COPYRIGHT (c) 2002 Bell Labs, Lucent Technologies. + * + * This module is responsible for locating loop structures (intervals). + * All loops have only one single entry (via the header) but + * potentially multiple exits, i.e. the header dominates all nodes + * within the loop. Other definitions are used for ``loops'' and ``headers'' + * in the literature. We choose a structural definition that has nicer + * properties. + * + * -- Allen + *) + +signature LOOP_STRUCTURE = +sig + + structure Dom : DOMINATOR_TREE + structure GI : GRAPH_IMPLEMENTATION + + (* + * DEF: An edge i -> j is a backedge iff j dom i. + * Here, j is the header, and i -> j \in backedges(j) + * A loop is identified by its header h. + *) + datatype ('n,'e,'g) loop = + LOOP of { nesting : int, + header : Graph.node_id, + loop_nodes : Graph.node_id list, + backedges : 'e Graph.edge list, + exits : 'e Graph.edge list + } + + type ('n,'e,'g) loop_info + + type ('n,'e,'g) loop_structure = + (('n,'e,'g) loop,unit, ('n,'e,'g) loop_info) Graph.graph + + val dom : ('n,'e,'g) loop_structure -> + ('n,'e,'g) Dom.dominator_tree + + (* O(n+e) *) + val loop_structure : ('n,'e,'g) Dom.dominator_tree -> + ('n,'e,'g) loop_structure + + (* return an array mapping node id -> nesting level *) + val nesting_level : ('n,'e,'g) loop_structure -> Graph.node_id Array.array + + (* return an array mapping node id -> header that it belongs to *) + val header : ('n,'e,'g) loop_structure -> Graph.node_id Array.array + + (* given a header, return the set of entry edges into the loop *) + val entryEdges : ('n,'e,'g) loop_structure -> Graph.node_id -> + 'e Graph.edge list + +end + diff --git a/MLRISC/ir-archive/loop-structure.sml b/MLRISC/ir-archive/loop-structure.sml new file mode 100644 index 0000000..6f56c31 --- /dev/null +++ b/MLRISC/ir-archive/loop-structure.sml @@ -0,0 +1,217 @@ +(* + * This module is responsible for locating loop structures (intervals). + * All loops have only one single entry (via the header) but + * potentially multiple exits, i.e. the header dominates all nodes. + * Basically this is Tarjan's algorithm. + * + * The old version is broken as reported by William Chen. + * This is a rewrite. + *) + +functor LoopStructure (structure GraphImpl : GRAPH_IMPLEMENTATION + structure Dom : DOMINATOR_TREE) + : LOOP_STRUCTURE = +struct + + structure G = Graph + structure GI = GraphImpl + structure Dom = Dom + structure A = Array + structure U = URef + + datatype ('n,'e,'g) loop = + LOOP of { nesting : int, + header : G.node_id, + loop_nodes : G.node_id list, + backedges : 'e G.edge list, + exits : 'e G.edge list + } + + datatype ('n,'e,'g) loop_info = + INFO of { dom : ('n,'e,'g) Dom.dominator_tree } + + type ('n,'e,'g) loop_structure = + (('n,'e,'g) loop, unit, ('n,'e,'g) loop_info) Graph.graph + + fun dom(G.GRAPH{graph_info=INFO{dom,...},...}) = dom + + fun loop_structure DOM = + let + val info = INFO{ dom = DOM } + val G.GRAPH cfg = Dom.cfg DOM + val G.GRAPH dom = DOM + val N = #capacity dom () + val dominates = Dom.dominates DOM + val LS as G.GRAPH ls = GI.graph ("Loop structure",info,N) + val ENTRY = case #entries cfg () of + [ENTRY] => ENTRY + | _ => raise Graph.NotSingleEntry + + (* mapping from node id -> header *) + val headers = A.array(N, ~1) + + (* mapping from header -> previous header in the loop *) + val lastHeaders = A.array(N, ~1) + + (* mark all visited nodes during construction *) + val visited = A.array(N, ~1) + + (* mapping from nodes id -> collapsed header during construction *) + val P = A.tabulate(N, U.uRef) + + (* walk the dominator tree and return a list of loops *) + fun walk (X, loops) = + let + (* Look for backedges *) + val backedges = List.filter + (fn (Y, X, _) => dominates(X, Y)) (#in_edges cfg X) + (* X is a header iff it has backedges or X is the ENTRY *) + val is_header = case backedges of [] => X = ENTRY | _ => true + + (* Walk the dominator tree first *) + val loops = List.foldr walk loops (#succ dom X) + in + (* If X is a header node then collaspe all the nodes within + * the loop into the header. The entry node has to be + * treated specially, unfortunately. + *) + if is_header then + let val L = mark(X, X, []) + val L = if X = ENTRY then find_entry_loop_nodes [] else L + val () = collapse(X, L) + val exits = find_exits(L, []) + in (* Create a new loop node *) + (X, backedges, L, exits)::loops + end + else + loops + end + + + (* mark all the nodes that are within the loop identified + * by the header. Return a list of loop nodes. + *) + and mark(X, header, L) = + if A.sub(visited, X) <> header then + let + (* mark X as visited *) + val _ = A.update(visited, X, header) + + (* header of X *) + val H_X = A.sub(headers, X) + + val L = if H_X = ~1 then (* X has no header yet *) + X::L + else if H_X = X andalso A.sub(lastHeaders, X) = ~1 then + (* Add loop edge *) + (A.update(lastHeaders, X, header); + #add_edge ls (header, X, ()); + L + ) + else L + in List.foldr (fn ((Y, _, _), L) => + let val Y = U.!! (A.sub(P, Y)) + in if dominates(header, Y) then mark(Y, header, L) else L + end) L (#in_edges cfg X) + end + else L + + (* collapse all nodes in L to the header H *) + and collapse(H, L) = + let val h = A.sub(P, H) + in List.app (fn X => + (U.link (A.sub(P, X), h); + if A.sub(headers, X) = ~1 then + A.update(headers, X, H) + else ())) L + end + + (* find all nodes that are not part of any loops *) + and find_entry_loop_nodes L = + List.foldr (fn ((X, _), L) => + if A.sub(headers, X) = ~1 then + X::L + else if X <> ENTRY andalso + A.sub(headers, X) = X andalso + A.sub(lastHeaders, X) = ~1 then + (#add_edge ls (ENTRY, X, ()); + A.update(lastHeaders, X, ENTRY); + L + ) + else + L + ) L (#nodes cfg ()) + + + (* find all edges that can exit from the loop H *) + and find_exits([],exits) = exits + | find_exits(X::Xs,exits) = + let fun f((e as (X,Y,_))::es,exits) = + if A.sub(headers,Y) = ~1 + then f(es,e::exits) + else f(es,exits) + | f([], exits) = exits + in find_exits(Xs, f(#out_edges cfg X, exits)) + end + + (* walk tree and create edges *) + val loops = walk (ENTRY, []) + + (* create nodes *) + val () = List.app (fn (H, backedges, loop_nodes, exits) => + let val last = A.sub(lastHeaders, H) + val nesting = if last = ~1 then 0 + else + let val LOOP{nesting, ...} = + #node_info ls last + in nesting+1 end + in #add_node ls (H, LOOP{nesting = nesting, + header = H, + backedges = backedges, + loop_nodes = loop_nodes, + exits = exits}) + end) loops + in + LS + end + + fun nesting_level(G.GRAPH L) = let + val INFO{dom=G.GRAPH dom,...} = #graph_info L + val N = #capacity dom () + val levels = A.array(N,0) + fun tabulate(_,LOOP{nesting,header,loop_nodes,...}) = + (A.update(levels,header,nesting); + app (fn i => A.update(levels,i,nesting)) loop_nodes) + in + #forall_nodes L tabulate; levels + end + + fun header(G.GRAPH L) = let + val INFO{dom=G.GRAPH dom,...} = #graph_info L + val N = #capacity dom () + val headers = A.array(N,0) + fun tabulate(_,LOOP{header,loop_nodes,...}) = + (A.update(headers,header,header); + app (fn i => A.update(headers,i,header)) loop_nodes) + in + #forall_nodes L tabulate; headers + end + + fun entryEdges(Loop as G.GRAPH L) = let + val dom = dom Loop + val G.GRAPH cfg = Dom.cfg dom + val dominates = Dom.dominates dom + fun entryEdges(header) = + if #has_node L header then + List.filter (fn (i,j,_) => not(dominates(j,i))) + (#in_edges cfg header) + else [] + in entryEdges + end + + fun isBackEdge(Loop as G.GRAPH L) = + let val dom = Dom.dominates(dom Loop) + in fn (v,w) => #has_node L w andalso dom(w,v) + end +end + diff --git a/MLRISC/ir-archive/reducibility.sig b/MLRISC/ir-archive/reducibility.sig new file mode 100644 index 0000000..1bde408 --- /dev/null +++ b/MLRISC/ir-archive/reducibility.sig @@ -0,0 +1,11 @@ +(* + * This module tests for reducibility of a loop + * + * -- Allen + *) +signature REDUCIBILITY = +sig + structure Loop : LOOP_STRUCTURE + + val is_reducible : ('n,'e,'g) Loop.loop_structure -> Graph.node_id -> bool +end diff --git a/MLRISC/ir-archive/reducibility.sml b/MLRISC/ir-archive/reducibility.sml new file mode 100644 index 0000000..331cadf --- /dev/null +++ b/MLRISC/ir-archive/reducibility.sml @@ -0,0 +1,29 @@ +(* + * This module tests for reducibility of a loop + * + * -- Allen + *) +functor Reducibility(Loop : LOOP_STRUCTURE) : REDUCIBILITY = +struct + structure Loop = Loop + structure Dom = Loop.Dom + structure G = Graph + + structure Derived = DerivedGraph(Dom) + + fun is_reducible(Loop) = + let val Dom = Loop.dom Loop + val headers = Loop.header Loop + val Derived as G.GRAPH derived = Derived.derived_graph Dom + val N = #capacity derived () + val irreducible = BitSet.create N + fun markIrreducible([_],_) = () (* simple cycles are reducible *) + | markIrreducible(cycle,_) = + app (fn n => BitSet.set(irreducible,n)) cycle + val _ = GraphSCC.scc Derived markIrreducible () + fun isReducible n = + let val h = Array.sub(headers,n) + in not(BitSet.contains(irreducible,n)) end + in isReducible + end +end diff --git a/MLRISC/ir-archive/ssa.sig b/MLRISC/ir-archive/ssa.sig new file mode 100644 index 0000000..1824a30 --- /dev/null +++ b/MLRISC/ir-archive/ssa.sig @@ -0,0 +1,36 @@ +(* + * This generic module is used for computing static single assignment form. + * Actually only the renaming and iterated dominance frontiers computation + * is implemented here. + * + * -- Allen + *) +signature STATIC_SINGLE_ASSIGNMENT_FORM = +sig + + structure Dom : DOMINATOR_TREE + + type var = int + type phi = var * var * var list (* orig def/def/uses *) + type renamer = {defs : var list, uses: var list} -> + {defs : var list, uses: var list} + type copy = {dst : var list, src: var list} -> unit + + (* + * Given a set of definitions for each block, + * Compute the set of phi nodes. + *) + val compute_ssa : + ('n,'e,'g) Dom.dominator_tree -> + { max_var : var, + defs : 'n Graph.node -> var list, + is_live : var * int -> bool, + rename_var : var -> var, + rename_stmt : {rename:renamer,copy:copy} -> 'n Graph.node -> unit, + insert_phi : {block : 'n Graph.node, + in_edges : 'e Graph.edge list, + phis : phi list + } -> unit + } -> unit +end + diff --git a/MLRISC/ir-archive/ssa.sml b/MLRISC/ir-archive/ssa.sml new file mode 100644 index 0000000..89f82b7 --- /dev/null +++ b/MLRISC/ir-archive/ssa.sml @@ -0,0 +1,178 @@ +(* + * SSA placement module. This is the algorithm from Cytron et al.'s + * TOPLAS paper. This module is kept generic so that we can also use it + * to compute sparse evaluation graphs, factored redef/use chains (of Wolfe) + * etc. + * + * This implementation uses Sreedhar et al.'s DJ-graph to compute + * the iterated dominance frontier, which should be slightly faster + * than the default implementation. + * + * For the stack of renamed variables, we use the scheme proposed + * by Briggs, Cooper, Harvey and Simpson in Software Practice & Experience + * 1988. (Actually we don't) + * + * -- Allen + *) + +functor StaticSingleAssignmentForm + (Dom : DOMINATOR_TREE) : STATIC_SINGLE_ASSIGNMENT_FORM = +struct + structure Dom = Dom + structure G = Graph + structure A = Array + + type var = int + type phi = var * var * var list + type renamer = {defs : var list, uses: var list} -> + {defs : var list, uses: var list} + type copy = {dst : var list, src : var list} -> unit + + structure DJ = DJGraph(Dom) + + fun app f = + let fun g [] = () + | g (x::xs) = (f x; g xs) + in g end + + (* + * Place join nodes at the iterated dominance frontier of def_sites(v) + * that is live. + *) + fun place_joins (Dom as G.GRAPH dom) + { max_var=V, defs, is_live } = + let val N = #capacity dom () + val G.GRAPH cfg = Dom.cfg Dom + val def_sites = A.array(V,[]) (* indexed by var *) + val phis = A.array(N,[]) (* indexed by block id *) + + (* compute the def sites of all variables *) + val _ = #forall_nodes cfg + (fn (n,block) => + app (fn v => A.update(def_sites,v,n::A.sub(def_sites,v))) + (defs(n,block)) + ) + (* compute phi placements for a variable *) + val IDFs = DJ.IDFs Dom + fun place_phi(v,[]) = () + | place_phi(v,def_sites) = + let fun place_all [] = () + | place_all(Y::Ys) = + (if is_live(v,Y) then + A.update(phis,Y,(v,v,[])::A.sub(phis,Y)) + else (); + place_all Ys) + in place_all (IDFs def_sites) + end + + val _ = A.appi place_phi (def_sites,0,NONE) + in phis + end + + (* + * Rename variables and compute the ssa form + *) + fun compute_ssa (Dom as G.GRAPH dom) + { max_var=V, defs, is_live, rename_stmt, insert_phi, rename_var } = + let val N = #capacity dom () + val G.GRAPH cfg = Dom.cfg Dom + val [ENTRY] = #entries dom () + val phis = place_joins Dom {max_var=V,defs=defs,is_live=is_live} + val stacks = A.array(V,[]) (* indexed by var *) + val in_edges = A.array(N,[]) + + (* Lookup the current renaming of v *) + fun lookup v = + case A.sub(stacks,v) of + v'::_ => v' + | _ => v + + (* Retract one entry of v *) + fun pop v = case A.sub(stacks,v) of _::l => A.update(stacks,v,l) + + fun search X = + let val X' = #node_info cfg X + val old_defs = ref [] + + fun rename_use v = + if v < 0 then v + else + let val vs = A.sub(stacks,v) + val v' = case vs of v'::_ => v' | _ => v + in v' + end + + fun rename_uses [] = [] + | rename_uses (v::vs) = rename_use v::rename_uses vs + + (* rename a definition of v *) + fun rename_def v = + let val v' = rename_var v + val vs = A.sub(stacks,v) + in A.update(stacks,v,v'::vs); + old_defs := v :: !old_defs; + v' + end + + fun rename_defs [] = [] + | rename_defs (v::vs) = rename_def v::rename_defs vs + + fun copy_def(v,v') = + (A.update(stacks,v,v'::A.sub(stacks,v)); + old_defs := v :: !old_defs) + + (* parallel copy *) + fun copy {dst,src} = + ListPair.app copy_def (dst,rename_uses src) + + (* rename statement of the form defs := uses in block X + * We must rename the uses first!!! + *) + fun rename {defs,uses} = + let val uses' = rename_uses uses + val defs' = rename_defs defs + in {defs=defs',uses=uses'} + end + + (* rename the definition of phi functions *) + fun rename_phi_def X = + let val X_phis = A.sub(phis,X) + fun rn [] = [] + | rn((v',v,uses)::rest) = (v',rename_def v,uses)::rn rest + val X_phis = rn X_phis + in A.update(phis,X,X_phis) + end + + (* rename the uses of phi functions *) + fun rename_phi_use X = + let val out_edges = #out_edges cfg X + fun rename_phi_of_Y (e as (X,Y,_)) = + let val Y_phis = A.sub(phis,Y) + fun insert_uses [] = [] + | insert_uses((v',v,uses)::rest) = + (v',v,rename_use v'::uses)::insert_uses rest + in A.update(in_edges,Y,e::A.sub(in_edges,Y)); + A.update(phis,Y,insert_uses Y_phis) + end + in app rename_phi_of_Y out_edges + end + + in + rename_phi_def X; + rename_stmt {rename=rename,copy=copy} (X,X'); + rename_phi_use X; + app search (#succ dom X); + app pop (!old_defs) + end + + (* place phis *) + fun place_phi (B as (b,_)) = + insert_phi{block=B,in_edges=A.sub(in_edges,b),phis=A.sub(phis,b)} + + in + search ENTRY; + #forall_nodes cfg place_phi + end + +end + diff --git a/MLRISC/ir-archive/test-all.sml b/MLRISC/ir-archive/test-all.sml new file mode 100644 index 0000000..be8c11e --- /dev/null +++ b/MLRISC/ir-archive/test-all.sml @@ -0,0 +1,3 @@ + +val () = #set (CM.symval "UNSHARED_MLRISC") (SOME 1); +CM.make "test.cm"; diff --git a/MLRISC/ir-archive/test.cm b/MLRISC/ir-archive/test.cm new file mode 100644 index 0000000..4887978 --- /dev/null +++ b/MLRISC/ir-archive/test.cm @@ -0,0 +1,28 @@ +Group is +#if defined(NEW_CM) +#if SMLNJ_VERSION * 100 + SMLNJ_MINOR_VERSION >= 11030 + $/basis.cm +#else + basis.cm +#endif +#endif + + $/smlnj-lib.cm +#if defined(UNSHARED_MLRISC) + ../cm/Control.cm + ../cm/Graphs.cm + ../cm/Visual.cm + ../cm/MLRISC.cm +#else + $/Control.cm + $/Graphs.cm + $/Visual.cm + $/MLRISC.cm +#endif + + (* ../cm/ir-archive.cm *) + (* test1.sml + test2.sml + test3.sml *) + + test4.sml diff --git a/MLRISC/ir-archive/test1.sml b/MLRISC/ir-archive/test1.sml new file mode 100644 index 0000000..6ec0c3d --- /dev/null +++ b/MLRISC/ir-archive/test1.sml @@ -0,0 +1,35 @@ +structure TestDJGraph = +struct +structure Graph = Graph; +val G as Graph.GRAPH g = DirectedGraph.graph("foo",(),10) : + (string,int,unit) Graph.graph +structure Dom = DominatorTree(DirectedGraph) +structure DJ = DJGraph(Dom) +val _ = app (#add_node g) + [(0,"s"), + (1,"v1"), + (2,"v2"), + (3,"v3"), + (4,"v4"), + (5,"t") + ] +val _ = #set_entries g [0] +val _ = #set_exits g [5] +val _ = app (#add_edge g) + [(0,1,16), + (0,2,13), + (1,2,10), + (2,1,4), + (1,3,12), + (2,4,14), + (3,2,9), + (4,3,7), + (3,5,20), + (4,5,4) + ] + +val dom = Dom.makeDominator G +val pdom = Dom.makePostdominator G +val IDFs = DJ.IDFs dom + +end diff --git a/MLRISC/ir-archive/test2.sml b/MLRISC/ir-archive/test2.sml new file mode 100644 index 0000000..35dd0d4 --- /dev/null +++ b/MLRISC/ir-archive/test2.sml @@ -0,0 +1,91 @@ +(* + * This example is from the paper + * ``A New Frameowrk for Elimination Based Data Flow Analysis using DJ Graphs'' + * By Sreedhar et. al. + *) +structure TestDJDataflow = +struct +structure Graph = Graph; +val CFG as Graph.GRAPH cfg = DirectedGraph.graph("cfg",(),10) : + (string,unit,unit) Graph.graph +structure Viewer = GraphViewer(AllDisplays) +structure L = GraphLayout +structure Dom = DominatorTree(DirectedGraph) +structure DJ = DJGraph(Dom) +structure Dataflow = DJDataflow(Dom) + +val _ = app (#add_node cfg) + [(0,"0"), + (1,"1"), + (2,"2"), + (3,"3"), + (4,"4"), + (5,"5"), + (6,"6"), + (7,"7"), + (8,"8"), + (9,"9") + ] +val _ = #set_entries cfg [0] +val _ = #set_exits cfg [9] +val _ = app (#add_edge cfg) + [(0,1,()), + (1,2,()), + (2,3,()), + (2,8,()), + (3,4,()), + (3,5,()), + (4,6,()), + (4,9,()), + (5,6,()), + (6,2,()), + (6,7,()), + (7,9,()), + (8,1,()), + (8,9,()), + (9,0,()) + ] + +val Dom = Dom.makeDominator CFG + +fun viewCFG _ = + Viewer.view(L.makeLayout + {graph = fn _ => [], + node = fn (i,_) => [L.LABEL(Int.toString i)], + edge = fn (i,j,_) => [L.COLOR "red"] + } CFG) +fun viewDom _ = + Viewer.view(L.makeLayout + {graph = fn _ => [], + node = fn (i,_) => [L.LABEL(Int.toString i)], + edge = fn (i,j,_) => [L.COLOR "red"] + } Dom) +fun viewDJ _ = + let fun iso kind G = + IsomorphicGraphView.map (fn x => x) (fn x => kind) (fn g => ()) G + val idom = Dom.immediately_dominates Dom + val Dom = iso [L.COLOR "red"] Dom + val CFG = iso [L.COLOR "green"] CFG + val CFG' = SubgraphView.subgraph_view + (map #1 (#nodes cfg ())) + (fn (i,j,_) => not(idom(i,j))) CFG + val DJ = UnionGraphView.union_view (fn _ => ()) (Dom,CFG') + in Viewer.view(L.makeLayout + {graph = fn _ => [], + node = fn (i,_) => [L.LABEL(Int.toString i)], + edge = fn (i,j,e) => e + } DJ) + end + +fun testDataflow() = +let fun closure{y} = print("Closure "^Int.toString y^"\n") + fun var_elim{y,z} = print("Variable elim "^Int.toString y^ + "->"^Int.toString z^"\n") + fun fixpoint{scc} = () + fun compute{y,z} = () +in Dataflow.analyze{closure=closure, var_elim=var_elim, + fixpoint=fixpoint, compute=compute} Dom +end + +end + diff --git a/MLRISC/ir-archive/test3.sml b/MLRISC/ir-archive/test3.sml new file mode 100644 index 0000000..ea6db80 --- /dev/null +++ b/MLRISC/ir-archive/test3.sml @@ -0,0 +1,88 @@ +(* + * This example is from the paper + * ``A New Frameowrk for Elimination Based Data Flow Analysis using DJ Graphs'' + * By Sreedhar et. al. + * This is the irreducible example. + *) +structure TestDJDataflow2 = +struct +structure Graph = Graph; +val CFG as Graph.GRAPH cfg = DirectedGraph.graph("cfg",(),10) : + (string,unit,unit) Graph.graph +structure Viewer = GraphViewer(AllDisplays) +structure L = GraphLayout +structure Dom = DominatorTree(DirectedGraph) +structure DJ = DJGraph(Dom) +structure Dataflow = DJDataflow(Dom) + +val _ = app (#add_node cfg) + [(0,"0"), + (1,"1"), + (2,"2"), + (3,"3"), + (4,"4"), + (5,"5"), + (6,"6"), + (7,"7"), + (8,"8") + ] +val _ = #set_entries cfg [0] +val _ = #set_exits cfg [9] +val _ = app (#add_edge cfg) + [(0,1,()), + (1,2,()), + (1,3,()), + (2,4,()), + (3,4,()), + (4,5,()), + (4,6,()), + (5,7,()), + (6,7,()), + (7,4,()), + (7,8,()), + (8,3,()) + ] + +val Dom = Dom.makeDominator CFG + +fun viewCFG _ = + Viewer.view(L.makeLayout + {graph = fn _ => [], + node = fn (i,_) => [L.LABEL(Int.toString i)], + edge = fn (i,j,_) => [L.COLOR "red"] + } CFG) +fun viewDom _ = + Viewer.view(L.makeLayout + {graph = fn _ => [], + node = fn (i,_) => [L.LABEL(Int.toString i)], + edge = fn (i,j,_) => [L.COLOR "red"] + } Dom) +fun viewDJ _ = + let fun iso kind G = + IsomorphicGraphView.map (fn x => x) (fn x => kind) (fn g => ()) G + val idom = Dom.immediately_dominates Dom + val Dom = iso [L.COLOR "red"] Dom + val CFG = iso [L.COLOR "green"] CFG + val CFG' = SubgraphView.subgraph_view + (map #1 (#nodes cfg ())) + (fn (i,j,_) => not(idom(i,j))) CFG + val DJ = UnionGraphView.union_view (fn _ => ()) (Dom,CFG') + in Viewer.view(L.makeLayout + {graph = fn _ => [], + node = fn (i,_) => [L.LABEL(Int.toString i)], + edge = fn (i,j,e) => e + } DJ) + end + +fun testDataflow() = +let fun closure{y} = print("Closure "^Int.toString y^"\n") + fun var_elim{y,z} = print("Variable elim "^Int.toString y^ + "->"^Int.toString z^"\n") + fun fixpoint{scc} = () + fun compute{y,z} = () +in Dataflow.analyze{closure=closure, var_elim=var_elim, + fixpoint=fixpoint, compute=compute} Dom +end + +end + diff --git a/MLRISC/library/annotations.sig b/MLRISC/library/annotations.sig new file mode 100644 index 0000000..d52a043 --- /dev/null +++ b/MLRISC/library/annotations.sig @@ -0,0 +1,52 @@ +(* + * User definable annotations. + * + * Note: annotations will now be used extensively in all part of + * the optimizer. + * + * Idea is stolen from Stephen Weeks + * + * -- Allen + *) + +signature ANNOTATIONS = +sig + + type annotation + type annotations = annotation list + type propList = annotations + + exception NoProperty + + type 'a property = + { get : annotations -> 'a option, + peek : annotation -> 'a option, + lookup : annotations -> 'a, + contains : annotations -> bool, + set : 'a * annotations -> annotations, + rmv : annotations -> annotations, + create : 'a -> annotation + } + type flag = unit property + + (* + * Generate a new annotation. + * Client should provide a pretty printing function. + *) + val new : ('a -> string) option -> 'a property + val new' : {toString: 'a -> string, + get : exn -> 'a, + create : 'a -> exn + } -> 'a property + + (* + * Pretty print an annotation + *) + val toString : annotation -> string + + (* + * Attach a pretty printer + *) + val attachPrettyPrinter : (annotation -> string) -> unit + +end diff --git a/MLRISC/library/annotations.sml b/MLRISC/library/annotations.sml new file mode 100644 index 0000000..d0f0594 --- /dev/null +++ b/MLRISC/library/annotations.sml @@ -0,0 +1,89 @@ +(* + * User definable annotations. + * + * Note: annotations will now be used extensively in all part of + * the optimizer. + * + * Idea is stolen from Stephen Weeks + * + * -- Allen + *) + +structure Annotations : ANNOTATIONS = +struct + + type annotation = exn + type annotations = annotation list + type propList = annotations + exception NoProperty + type 'a property = + { get : annotations -> 'a option, + peek : annotation -> 'a option, + lookup : annotations -> 'a, + contains : annotations -> bool, + set : 'a * annotations -> annotations, + rmv : annotations -> annotations, + create : 'a -> annotation + } + type flag = unit property + + val prettyPrinters = ref [] : (annotation -> string) list ref + + fun attachPrettyPrinter p = prettyPrinters := p :: !prettyPrinters + + fun toString a = + let fun pr([]) = "" + | pr(p::ps) = (p a handle _ => pr ps) + in pr(!prettyPrinters) end + + (* + * Look ma, a real use of generative exceptions! + *) + fun 'a new(toString) = + let exception Annotation of 'a + fun get [] = NONE + | get (Annotation x::_) = SOME x + | get (_::l) = get l + fun peek(Annotation x) = SOME x + | peek _ = NONE + fun lookup [] = raise NoProperty + | lookup (Annotation x::_) = x + | lookup (_::l) = lookup l + fun contains [] = false + | contains (Annotation _::_) = true + | contains (_::l) = contains l + fun set(x,[]) = [Annotation x] + | set(x,Annotation _::l) = Annotation x::l + | set(x,y::l) = y::set(x,l) + fun rmv [] = [] + | rmv (Annotation _::l) = rmv l + | rmv (x::l) = x::rmv l + in case toString of + NONE => () + | SOME f => attachPrettyPrinter(fn Annotation x => f x | e => raise e); + { get=get, peek=peek, lookup=lookup, contains=contains, + set=set, rmv=rmv, create=Annotation + } + end + + fun 'a new'{create, toString, get=get'} = + let fun get [] = NONE + | get (x::l) = SOME(get' x) handle _ => get l + fun peek x = SOME(get' x) handle _ => NONE + fun lookup [] = raise NoProperty + | lookup (x::l) = get' x handle _ => lookup l + fun contains [] = false + | contains (x::l) = (get' x; true) handle _ => contains l + fun set(x,[]) = [create x] + | set(x,a::l) = (get' a; create x::l) handle _ => a::set(x,l) + fun rmv [] = [] + | rmv (x::l) = (get' x; rmv l) handle _ => x::rmv l + in attachPrettyPrinter(toString o get'); + { get=get, peek=peek, lookup=lookup, contains=contains, + set=set, rmv=rmv, create=create + } + end + + +end + diff --git a/MLRISC/library/bitset.sig b/MLRISC/library/bitset.sig new file mode 100644 index 0000000..0880c2b --- /dev/null +++ b/MLRISC/library/bitset.sig @@ -0,0 +1,23 @@ +(* + * Dense set in bitvector format. + * + * -- Allen + *) + +signature BITSET = +sig + + type bitset + + val create : int -> bitset + val size : bitset -> int + val contains : bitset * int -> bool + val set : bitset * int -> unit + val reset : bitset * int -> unit + val clear : bitset -> unit + val markAndTest : bitset * int -> bool + val unmarkAndTest : bitset * int -> bool + val toString : bitset -> string + +end + diff --git a/MLRISC/library/bitset.sml b/MLRISC/library/bitset.sml new file mode 100644 index 0000000..7b75a33 --- /dev/null +++ b/MLRISC/library/bitset.sml @@ -0,0 +1,74 @@ +(* + * Non growable dense set in bitvector format. + * + * -- Allen + *) + +structure BitSet :> BITSET = +struct + + structure A = Word8Array + structure W = Word8 + open A + + infix << >> & || + infix sub + + type bitset = array + + val word = Word.fromInt + val int = Word.toInt + val op & = Word.andb + val op >> = Word.>> + val op << = W.<< + + fun create n = array((n+7) div 8, 0wx0) + + fun size a = length(a) * 8 + + fun set (a, i) = + let val byte = int((word i) >> 0w3) + val mask = W.<< (0w1, (word i) & 0w7) + in update(a, byte, W.orb(a sub byte, mask)) end + + fun reset (a, i) = + let val byte = int((word i) >> 0w3) + val mask = W.notb(W.<< (0w1, (word i) & 0w7)) + in update(a, byte, W.andb(a sub byte, mask)) end + + fun clear a = modify (fn _ => 0wx0) a + + fun copy (a) = tabulate (length a, fn i => a sub i) + + fun toString (a) = + let fun f i = if i < length a then W.toString(a sub i)::f(i+1) else [] + val s = String.concat(f 0) + in "[" ^ s ^ "]" end + + fun contains (a, i) = + let val byte = int((word i) >> 0w3) + val mask = W.<<(0w1, (word i) & 0w7) + in W.andb(A.sub(a, byte), mask) <> 0wx0 end + + fun markAndTest (a, i) = + let val byte = int((word i) >> 0w3) + val mask = W.<<(0w1, (word i) & 0w7) + val word = A.sub(a,byte) + in if W.andb(word, mask) <> 0wx0 then + true + else + (A.update(a, byte, W.orb(word, mask)); false) + end + + fun unmarkAndTest (a, i) = + let val byte = int(word i >> 0w3) + val mask = W.<<(0w1, (word i) & 0w7) + val word = A.sub(a,byte) + in if W.andb(word, mask) <> 0wx0 then + (A.update(a, byte, W.andb(word,W.notb mask)); true) + else + false + end + +end + diff --git a/MLRISC/library/cache.sml b/MLRISC/library/cache.sml new file mode 100644 index 0000000..e0e85d1 --- /dev/null +++ b/MLRISC/library/cache.sml @@ -0,0 +1,34 @@ +(* + * This is a simple cache datatype. + * + * -- Allen + *) + +signature CACHE_REF = +sig + + type 'a cache + + val cache : ('a -> 'b) -> 'a -> 'b cache + val flush : 'a cache -> unit + val ! : 'a cache -> 'a + val := : 'a cache * 'a -> unit + +end + +structure CacheRef :> CACHE_REF = +struct + + type 'a cache = 'a option ref * (unit -> 'a) + + fun cache f x = (ref NONE, fn _ => f x) + + fun flush (x,_) = x := NONE + + fun ! (r as ref NONE,f) = let val x = f() in r := SOME x; x end + | ! (r as ref(SOME x),f) = x + + val op := = fn((r, _),x) => r := SOME x + +end + diff --git a/MLRISC/library/catlist.sml b/MLRISC/library/catlist.sml new file mode 100644 index 0000000..47a42a5 --- /dev/null +++ b/MLRISC/library/catlist.sml @@ -0,0 +1,76 @@ +(* + * Constant time concatenable list. + * + * -- Allen + *) + +signature CATNETABLE_LIST = +sig + type 'a catlist + val empty : 'a catlist + val null : 'a catlist -> bool + val length : 'a catlist -> int + val cons : 'a * 'a catlist -> 'a catlist + val unit : 'a -> 'a catlist + val append : 'a catlist * 'a catlist -> 'a catlist + val hd : 'a catlist -> 'a + val tl : 'a catlist -> 'a catlist + + val fromList : 'a list -> 'a catlist + val toList : 'a catlist -> 'a list + + val map : ('a -> 'b) -> 'a catlist -> 'b catlist + val app : ('a -> unit) -> 'a catlist -> unit +end + +structure CatnetableList :> CATNETABLE_LIST = +struct + datatype 'a catlist = empty | unit of 'a | @ of 'a catlist * 'a catlist + + fun null empty = true + | null _ = false + + fun length empty = 0 + | length (unit _) = 1 + | length (a @ b) = length a + length b + + fun hd empty = raise Empty + | hd (unit a) = a + | hd (a @ b) = hd a + + fun tl empty = raise Empty + | tl (unit a) = empty + | tl ((unit _) @ a) = a + | tl ((a @ b) @ c) = tl(a @ (b @ c)) + | tl (empty @ c) = tl c + + fun cons(a,empty) = unit a + | cons(a,b) = unit a @ b + + fun append(empty,a) = a + | append(a,empty) = a + | append(a,b) = a @ b + + fun map f l = + let fun g empty = empty + | g (unit a) = unit(f a) + | g (a @ b) = (g a) @ (g b) + in g l end + + fun app f l = + let fun g empty = () + | g (unit a) = f a + | g (a @ b) = (g a; g b) + in g l end + + fun fromList [] = empty + | fromList (a::b) = cons(a,fromList b) + + fun toList l = + let fun g(empty,l) = l + | g(unit a,l) = a::l + | g(a @ b, l) = g(a,g(b,l)) + in g(l,[]) end + +end + diff --git a/MLRISC/library/dynamic-array.sml b/MLRISC/library/dynamic-array.sml new file mode 100644 index 0000000..dc16832 --- /dev/null +++ b/MLRISC/library/dynamic-array.sml @@ -0,0 +1,110 @@ +(* dynamic-array.sml + * + * COPYRIGHT (c) 2015 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Dynamic (dense) array. + * + * -- Allen + *) + +structure DynArray : sig + + include ARRAY + + val fromArray : 'a Array.array * 'a * int -> 'a array + val baseArray : 'a array -> 'a Array.array + val checkArray: 'a array * 'a Array.array -> unit + val clear : 'a array * int -> unit + val expandTo : 'a array * int -> unit + + end = struct + structure A = Array + structure AS = ArraySlice + type 'a vector = 'a A.vector + datatype 'a array = ARRAY of 'a A.array ref * 'a * int ref + + exception Subscript = General.Subscript + exception Size = General.Size + exception Unimplemented + + infix 9 sub + + val maxLen = A.maxLen + + fun array (n,d) = ARRAY(ref(A.array (n,d)), d, ref 0) + fun clear (ARRAY(a,def,cnt),n) = (a := A.array(n,def); cnt := n) + fun fromArray(a,d,n) = ARRAY(ref a, d, ref n) + + fun baseArray(ARRAY(ref a,_,_)) = a + fun checkArray(ARRAY(ref a,_,_),a') = if a = a' then () else raise Match + + fun length (ARRAY (ref a,_,ref n)) = n + + fun (ARRAY(ref a, d, _)) sub i = A.sub(a,i) handle _ => d + + fun update (ARRAY(r as ref a, d, n), i, e) = + (A.update(a,i,e); n := Int.max(!n,i+1)) handle _ => + let val new_size = Int.max(i+1,!n*2) + val new_size = if new_size < 10 then 10 else new_size + val new_array = A.array(new_size,d) + in A.copy {src = a, dst = new_array, di = 0}; + r := new_array; + n := i+1; + A.update(new_array, i, e) + end + + fun expandTo(arr as ARRAY(_, d, _), N) = update(arr, N-1, d) + + fun tabulate (n, f) = + let val array = A.tabulate(n, f) + val default = A.sub(array,0) + in + ARRAY(ref array, default, ref n) + end handle _ => raise Size + + fun fromList l = + let val array = A.fromList l + val default = A.sub(array,0) + in + ARRAY(ref array, default, ref (A.length array)) + end handle _ => raise Size + + fun slice (ARRAY (ref a, _, ref n)) = AS.slice (a, 0, SOME n) + + fun appi f arr = AS.appi f (slice arr) + fun app f arr = AS.app f (slice arr) + + fun copy { src, dst, di } = + appi (fn (i, x) => update (dst, i + di, x)) src + + fun copyVec { src, dst, di } = + Vector.appi (fn (i, x) => update (dst, i + di, x)) src + + fun foldli f init arr = AS.foldli f init (slice arr) + fun foldri f init arr = AS.foldri f init (slice arr) + fun foldl f init arr = AS.foldl f init (slice arr) + fun foldr f init arr = AS.foldr f init (slice arr) + fun modifyi f arr = AS.modifyi f (slice arr) + fun modify f arr = AS.modify f (slice arr) + fun findi p arr = AS.findi p (slice arr) + fun find p arr = AS.find p (slice arr) + fun exists p arr = AS.exists p (slice arr) + fun all p arr = AS.all p (slice arr) + fun collate c (a1, a2) = AS.collate c (slice a1, slice a2) + fun vector arr = AS.vector (slice arr) + + (* additional operations from Basis Library proposal 2015-003 *) + fun toList arr = foldr (op ::) [] arr + + fun fromVector v = let + val arr = A.fromVector v + val default = A.sub(arr, 0) + in + ARRAY(ref arr, default, ref (A.length arr)) + end + handle _ => raise Size + + val toVector = vector + +end diff --git a/MLRISC/library/dynamic-bitset.sml b/MLRISC/library/dynamic-bitset.sml new file mode 100644 index 0000000..2bc0e3f --- /dev/null +++ b/MLRISC/library/dynamic-bitset.sml @@ -0,0 +1,128 @@ +(* + * Growable bitset. + * + * -- Allen + *) + +structure DynamicBitSet :> BITSET = +struct + + structure A = Word8Array + structure W = Word8 + open A + + infix << >> & || + infix sub + + type bitset = array ref + + val word = Word.fromInt + val int = Word.toInt + val op & = Word.andb + val op >> = Word.>> + val op << = W.<< + + fun create n = ref(array((n+7)div 8, 0wx0)) + + fun size a = length(! a) * 8 + + fun grow (r as ref a, i) = + let val new_size = Int.max(length a * 2, i) + val new_array = array(new_size, 0wx0) + val _ = copy { src = a, si = 0, dst = new_array, di = 0, + len = NONE } + in r := new_array + end + + fun set (r as ref a, i) = + let val byte = int((word i) >> 0w3) + val mask = W.<< (0w1, (word i) & 0w7) + in update(a, byte, W.orb(a sub byte, mask)) end + handle Subscript => (grow (r, i+1); set(r,i)) + + fun reset (r as ref a, i) = + let val byte = int((word i) >> 0w3) + val mask = W.notb(W.<< (0w1, (word i) & 0w7)) + in update(a, byte, W.andb(a sub byte, mask)) end + handle Subscript => () + + fun clear (ref a) = modify (fn _ => 0wx0) a + + fun negate (ref a) = ref(tabulate (length a, fn i => W.notb(a sub i))) + + fun union (ref a, ref b) = + let val m = Int.max(length a, length b) + val n = Int.min(length a, length b) + val c = array(m, 0wx0) + fun f ~1 = () + | f i = update(c, i, W.orb(a sub i, b sub i)) + in f n; + copy { src = if length a > length b then a else b, + si = n, dst = c, di = n, len = NONE }; + ref c + end + + fun intersect (ref a, ref b) = + let val n = Int.min(length a, length b) + val c = array(n, 0wx0) + fun f ~1 = () + | f i = update(c, i, W.andb(a sub i, b sub i)) + in f n; + ref c + end + + fun diff (ref a, ref b) = + let val m = length a + val c = array(m, 0wx0) + fun f ~1 = () + | f i = update(c, i, W.andb(a sub i, W.notb(b sub i))) + in f m; ref c + end + + fun unionWith (r as ref a, ref b) = + (if length b > length a then grow(r, length b) else (); + modifyi (fn (i,x) => W.orb(x,b sub i)) (a, 0, NONE)) + + fun intersectWith (ref a, ref b) = + modifyi (fn (i,x) => W.andb(x,b sub i)) (a, 0, NONE) + + fun diffWith (ref a, ref b) = + modifyi (fn (i,x) => W.andb(x,W.notb(b sub i))) (a, 0, NONE) + + fun complement (ref a) = modify W.notb a + + fun copy (ref a) = ref(tabulate (length a, fn i => a sub i)) + + fun toString (ref a) = + let fun f i = if i < length a then W.toString(a sub i)::f(i+1) else [] + val s = String.concat(f 0) + in "[" ^ s ^ "]" end + + fun contains (r as ref a, i) = + let val byte = int((word i) >> 0w3) + val mask = W.<<(0w1, (word i) & 0w7) + in W.andb(A.sub(a, byte), mask) <> 0wx0 end + handle Subscript => false + + fun markAndTest (r as ref a, i) = + let val byte = int((word i) >> 0w3) + val mask = W.<<(0w1, (word i) & 0w7) + val word = A.sub(a,byte) + in if W.andb(word, mask) <> 0wx0 then + true + else + (A.update(a, byte, W.orb(word, mask)); false) + end handle Subscript => (grow (r, i+1); markAndTest(r,i)) + + fun unmarkAndTest (r as ref a, i) = + let val byte = int(word i >> 0w3) + val mask = W.<<(0w1, (word i) & 0w7) + val word = A.sub(a,byte) + in if W.andb(word, mask) <> 0wx0 then + (A.update(a, byte, W.andb(word,W.notb mask)); true) + else + false + end handle Subscript => false + +end + diff --git a/MLRISC/library/fixed-point.sig b/MLRISC/library/fixed-point.sig new file mode 100644 index 0000000..d83a617 --- /dev/null +++ b/MLRISC/library/fixed-point.sig @@ -0,0 +1,31 @@ +(* + * A simple fixed point datatype + * + * -- Allen + *) + +signature FIXED_POINT = +sig + type fixed_point = Word31.word + + val fixed_point : int * int -> fixed_point + + val zero : fixed_point + val one : fixed_point + + val compare : fixed_point * fixed_point -> order + + val * : fixed_point * fixed_point -> fixed_point + val / : fixed_point * fixed_point -> fixed_point + val scale : fixed_point * int -> fixed_point + val div : fixed_point * int -> fixed_point + val min : fixed_point * fixed_point -> fixed_point + val max : fixed_point * fixed_point -> fixed_point + + val toString : fixed_point -> string + val toReal : fixed_point -> real + val toWord : fixed_point -> word + val fromReal : real -> fixed_point + val fromInt : int -> fixed_point +end + diff --git a/MLRISC/library/fixed-point.sml b/MLRISC/library/fixed-point.sml new file mode 100644 index 0000000..46e89d3 --- /dev/null +++ b/MLRISC/library/fixed-point.sml @@ -0,0 +1,59 @@ +(* + * A simple fixed point datatype + * + * -- Allen + *) + +functor FixedPoint (val decimal_bits : int) : FIXED_POINT = +struct + + structure W = Word31 + + infix << >> + infix & || + + val decimal_places = (decimal_bits + 2) div 3 + val realToString = Real.fmt (StringCvt.FIX(SOME decimal_places)) + + val op<< = W.<< + val op>> = W.>> + val op& = W.andb + val op|| = W.orb + val op div = W.div + val word = W.fromInt + val int = W.toInt + + type fixed_point = W.word + + val bits = word decimal_bits + val bits2 = bits >> 0w1 + val bits4 = bits >> 0w2 + + val zero = 0w0 + val one = 0w1 << bits + val realOne = Real.fromInt(int one) + + val compare = W.compare + + val op* = fn(i:fixed_point,j:fixed_point) => W.*(i >> bits2,j >> bits2) + val op/ = fn(i:fixed_point,j:fixed_point) => (i << bits) div j + + val mask = one - 0w1 + + fun fromInt i = (word i) << bits + fun fixed_point(a,b) = let val b' = word b + in (((word a) << bits) + (b' >> 0w1)) div b' end + fun fromReal r = word(Real.round(Real.*(r, realOne))) + fun toReal p = Real./(Real.fromInt(int p), realOne) + fun toWord f = f + fun toString p = realToString(toReal p) + handle Overflow => "inf" + + fun scale(i,j) = W.*(i, word j) + fun i div j = W.div(i,word j) + + val min = W.min + val max = W.max + +end + diff --git a/MLRISC/library/freq.sig b/MLRISC/library/freq.sig new file mode 100644 index 0000000..97dceff --- /dev/null +++ b/MLRISC/library/freq.sig @@ -0,0 +1,11 @@ +(* + * This represents execution frequency. + * + * -- Allen + *) + +signature FREQ = +sig + type freq = int + include INTEGER where type int = freq +end diff --git a/MLRISC/library/freq.sml b/MLRISC/library/freq.sml new file mode 100644 index 0000000..9e33a1b --- /dev/null +++ b/MLRISC/library/freq.sml @@ -0,0 +1,13 @@ +(* + * Execution frequency + * + * -- Allen + *) + +structure Freq : FREQ = +struct + + type freq = int + open Int + +end diff --git a/MLRISC/library/hash-array.sml b/MLRISC/library/hash-array.sml new file mode 100644 index 0000000..074f3e8 --- /dev/null +++ b/MLRISC/library/hash-array.sml @@ -0,0 +1,217 @@ +(* hash-array.sml + * + * COPYRIGHT (c) 2015 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Dynamic (sparse) array that uses hashing + * + * -- Allen + *) + +structure HashArray : sig + + include ARRAY + + val array' : int * (int -> 'a) -> 'a array + val array'': int * (int -> 'a) -> 'a array + val clear : 'a array -> unit + val remove : 'a array * int -> unit + val dom : 'a array -> int list + val copy_array : 'a array -> 'a array + + end = struct + + structure A = Array + + datatype 'a default = V of 'a | F of int -> 'a | U of int -> 'a + datatype 'a array = + ARRAY of (int * 'a) list A.array ref * 'a default * int ref * int ref + + type 'a vector = 'a Vector.vector + + val maxLen = A.maxLen + + fun array(n,d) = ARRAY(ref(A.array(16,[])),V d,ref n,ref 0) + fun array'(n,f) = ARRAY(ref(A.array(16,[])),F f,ref n,ref 0) + fun array''(n,f) = ARRAY(ref(A.array(16,[])),U f,ref n,ref 0) + fun clear(ARRAY(r,d,n,c)) = (r := A.array(16,[]); n := 0; c := 0) + + fun roundsize n = + let fun loop i = if i >= n then i else loop(i+i) + in loop 1 end + + fun copy_array(ARRAY(ref a,d,ref n,ref c)) = + let val a' = A.array(n,[]) + val _ = A.copy{src=a,dst=a',di=0} + in ARRAY(ref a',d,ref n,ref c) + end + + val itow = Word.fromInt + val wtoi = Word.toIntX + fun index(a, i) = wtoi(Word.andb(itow i, itow(Array.length a - 1))) + + fun tabulate(n,f) = + let val N = n*n+1 + val N = if N < 16 then 16 else roundsize N + val a = A.array(N,[]) + fun ins i = + let val pos = index(a, i) + val x = f i + in A.update(a,pos,(i,x)::A.sub(a,pos)); x + end + fun insert 0 = ins 0 + | insert i = (ins i; insert(i-1)) + in if n < 0 then + ARRAY(ref a,F(fn _ => raise Subscript),ref 0,ref 0) + else + ARRAY(ref a,V(insert(n-1)),ref n,ref n) + end + + fun fromList l = + let val n = length l + val N = n*n+1 + val N = if N < 16 then 16 else roundsize N + val a = A.array(N,[]) + fun ins(i,x) = + let val pos = index(a,i) + in A.update(a,pos,(i,x)::A.sub(a,pos)); x + end + fun insert(i,[]) = F(fn _ => raise Subscript) + | insert(i,[x]) = V(ins(i,x)) + | insert(i,x::l) = (ins(i,x); insert(i+1,l)) + in ARRAY(ref a,insert(0,l),ref n,ref n) + end + + fun length(ARRAY(_,_,ref n,_)) = n + + fun sub(a' as ARRAY(ref a,d,_,_),i) = + let val pos = index(a,i) + fun search [] = (case d of + V d => d + | F f => f i + | U f => let val x = f i + in update(a',i,x); x end + ) + | search ((j,x)::l) = if i = j then x else search l + in search(A.sub(a,pos)) end + + and update(a' as ARRAY(ref a,_,n,s as ref size),i,x) = + let val N = A.length a + val pos = index(a,i) + fun change([],l) = + if size+size >= N then grow(a',i,x) + else (s := size + 1; A.update(a,pos,(i,x)::l)) + | change((y as (j,_))::l',l) = + if j = i then A.update(a,pos,(i,x)::l'@l) + else change(l',y::l) + in + change(A.sub(a,pos),[]); + if i >= !n then n := i+1 else () + end + + and grow(ARRAY(a' as ref a,_,_,_),i,x) = + let val N = A.length a + val N' = N+N + val a'' = A.array(N',[]) + fun insert(i,x) = + let val pos = index(a'',i) + in A.update(a'',pos,(i,x)::A.sub(a'',pos)) end + in + A.app (List.app insert) a; + insert(i,x); + a' := a'' + end + + fun remove(a' as ARRAY(ref a,_,n,s as ref size),i) = + let val N = A.length a + val pos = index(a,i) + fun change([],_) = () + | change((y as (j,_))::l',l) = + if j = i then (s := size - 1; A.update(a,pos,l'@l)) + else change(l',y::l) + in change(A.sub(a,pos),[]) + end + + (* These seem bogus since they do not run in order *) + fun appi f (ARRAY(ref a,_,ref n,_)) = A.app (List.app f) a + fun app f (ARRAY(ref a,_,_,_)) = A.app (List.app (fn (_,x) => f x)) a + + fun copy { src, dst, di } = + appi (fn (i, x) => update (dst, i, x)) src + + fun copyVec { src, dst, di } = + Vector.appi (fn (i, x) => update (dst, di + i, x)) src + + (* These seem bogus since they do not run in order *) + fun foldli f e (ARRAY(ref a,_,_,_)) = + A.foldl (fn (l, e) => List.foldl (fn ((i,x),e) => f (i,x,e)) e l) e a + fun foldri f e (ARRAY(ref a,_,_,_)) = + A.foldr (fn (l, e) => List.foldr (fn ((i,x),e) => f (i,x,e)) e l) e a + + fun foldl f e (ARRAY(ref a,_,_,_)) = + A.foldl (fn (l,e) => List.foldl (fn ((_,x),e) => f(x,e)) e l) e a + fun foldr f e (ARRAY(ref a,_,_,_)) = + A.foldr (fn (l,e) => List.foldr (fn ((_,x),e) => f(x,e)) e l) e a + + fun modifyi f (ARRAY(ref a,_,_,_)) = + A.modify (List.map (fn (i,x) => (i, f (i, x)))) a + + fun modify f (ARRAY(ref a,_,_,_)) = + A.modify (List.map (fn (i,x) => (i,f x))) a + + fun dom(ARRAY(ref a,_,_,_)) = + A.foldl (fn (e,l) => List.foldr (fn ((i,_),l) => i::l) l e) [] a + + fun findi p (ARRAY(ref a,_,_,_)) = let + val len = A.length a + fun fnd i = + if i >= len then NONE + else case List.find p (A.sub (a, i)) of + NONE => fnd (i + 1) + | some => some + in + fnd 0 + end + + fun find p (ARRAY(ref a,_,_,_)) = let + val len = A.length a + fun fnd i = + if i >= len then NONE + else case List.find (p o #2) (A.sub (a, i)) of + NONE => fnd (i + 1) + | SOME (_, x) => SOME x + in + fnd 0 + end + + fun exists p arr = isSome (find p arr) + fun all p arr = not (isSome (find (not o p) arr)) + fun collate _ _ = raise Fail "HashArray.collate unimplemented" + + fun vector arr = Vector.fromList (rev (foldl op :: [] arr)) + + (* additional operations from Basis Library proposal 2015-003 *) + fun toList arr = foldr (op ::) [] arr + + fun fromVector v = let + val n = Vector.length v + val N = n*n+1 + val N = if N < 16 then 16 else roundsize N + val a = A.array(N, []) + fun ins (i, x) = let + val pos = index(a, i) + in + A.update(a, pos, (i,x)::A.sub(a, pos)); x + end + fun lp i = if (i < n) + then (ins (i, Vector.sub(v, i)); lp(i+1)) + else if (i = 0) + then F(fn _ => raise Subscript) + else V(Vector.sub(v, i-1)) + in + ARRAY(ref a, lp 0, ref n, ref n) + end + + val toVector = vector + +end diff --git a/MLRISC/library/hash-table.sig b/MLRISC/library/hash-table.sig new file mode 100644 index 0000000..f02d7a7 --- /dev/null +++ b/MLRISC/library/hash-table.sig @@ -0,0 +1,29 @@ +(* + * Signature of the hash table datatype + * + * -- Allen + *) + +signature HASHTABLE = +sig + + type ('a,'b) table + + val create : { hash : 'a -> word, + == : 'a * 'a -> bool, + exn : exn, + size : int + } -> ('a,'b) table + + val size : ('a,'b) table -> int + val clear : ('a,'b) table -> unit + val insert : ('a,'b) table -> 'a * 'b -> unit + val remove : ('a,'b) table -> 'a -> unit + val lookup : ('a,'b) table -> 'a -> 'b + val copy : ('a,'b) table -> ('a,'b) table + val app : ('a * 'b -> unit) -> ('a,'b) table -> unit + val map : ('a * 'b -> 'c) -> ('a,'b) table -> 'c list + val fold : ('a * 'b * 'c -> 'c) -> 'c -> ('a,'b) table -> 'c + +end + diff --git a/MLRISC/library/hash-table.sml b/MLRISC/library/hash-table.sml new file mode 100644 index 0000000..7babbc5 --- /dev/null +++ b/MLRISC/library/hash-table.sml @@ -0,0 +1,83 @@ +(* + * Hash table. + * + * -- Allen + *) +structure Hashtable :> HASHTABLE = +struct + + structure A = Array + + type ('a,'b) table = ('a -> word) * + ('a * 'a -> bool) * + exn * + ('a * 'b) list A.array ref * + int ref + + infix == + + fun create{hash,==,exn,size} = (hash,op==,exn,ref(A.array(size,[])),ref 0) + fun copy(hash,op==,exn,ref a,ref c) = + (hash,op==,exn,ref(A.tabulate(A.length a,fn i => A.sub(a,i))), ref c) + fun size (_,_,_,_,ref n) = n + fun clear (_,_,_,ref a,c) = + let fun f ~1 = () + | f i = (A.update(a,i,[]); f(i-1)) + in f(A.length a - 1); c := 0 end + fun insert (hash,op==,exn,A as ref a,c) (k,v) = + let val N = A.length a + val h = Word.toIntX(hash k) mod N + val es = A.sub(a,h) + fun ins ([],es') = (A.update(a,h,(k,v)::es'); + c := !c + 1; + if !c >= N then grow(hash,A,N) else () + ) + | ins ((e as (k',_))::es,es') = + if k == k' then A.update(a,h,(k,v)::es'@es) + else ins(es,e::es') + in ins (es,[]) + end + + and grow(hash,A as ref a,N) = + let val M = N + N + val M = if M < 13 then 13 else M + val a' = A.array(M,[]) + fun ins (k,v) = let val h = Word.toIntX(hash k) mod M + in A.update(a',h,(k,v)::A.sub(a',h)) end + in A.app (fn es => app ins es) a; + A := a' + end + + fun remove (hash,op==,exn,ref a,c) k = + let val N = A.length a + val h = Word.toIntX(hash k) mod N + val es = A.sub(a,h) + fun del ([],es') = () + | del ((e as (k',_))::es,es') = + if k == k' then (A.update(a,h,es'@es); c := !c - 1) + else del(es,e::es') + in del (es,[]) + end + + fun lookup(hash,op==,exn,ref a,_) k = + let val N = A.length a + val h = Word.toIntX(hash k) mod N + fun find [] = raise exn + | find ((k',v)::es) = if k == k' then v else find es + in find(A.sub(a,h)) + end + + fun app f (_,_,_,ref A,_) = A.app (List.app f) A + + fun map f (_,_,_,ref A,_) = + let fun fl([],x) = x + | fl((k,v)::es,x) = f(k,v)::fl(es,x) + in A.foldr fl [] A end + + fun fold f x (_,_,_,ref A,_) = + let fun fl([],x) = x + | fl((k,v)::es,x) = f(k,v,fl(es,x)) + in A.foldr fl x A end + +end + diff --git a/MLRISC/library/hashBag.sig b/MLRISC/library/hashBag.sig new file mode 100644 index 0000000..078cbc8 --- /dev/null +++ b/MLRISC/library/hashBag.sig @@ -0,0 +1,37 @@ +(* + * Bag datatype that uses hashing + * + * -- Allen + *) + +signature HASH_BAG = +sig + + type 'a bag + + val create : { order : 'a * 'a -> order, + hash : 'a -> int, + exn : exn + } -> int -> 'a bag + + val size : 'a bag -> int + val bucketSize : 'a bag -> int + val isEmpty : 'a bag -> bool + val insert : 'a bag -> 'a -> unit + val insertN : 'a bag -> 'a * int -> unit + val remove : 'a bag -> 'a -> unit + val removeN : 'a bag -> 'a * int -> unit + val removeAll : 'a bag -> 'a -> unit + val toList : 'a bag -> ('a * int) list + val toDupList : 'a bag -> 'a list + val clear : 'a bag -> unit + val contains : 'a bag -> 'a -> bool + val count : 'a bag -> 'a -> int + val app : ('a * int -> unit) -> 'a bag -> unit + val dupApp : ('a -> unit) -> 'a bag -> unit + val fold : (('a * int) * 'b -> 'b) -> 'b -> 'a bag -> 'b + val dupFold : ('a * 'b -> 'b) -> 'b -> 'a bag -> 'b + val toString : ('a -> string) -> 'a bag -> string + +end + diff --git a/MLRISC/library/hashBag.sml b/MLRISC/library/hashBag.sml new file mode 100644 index 0000000..941d752 --- /dev/null +++ b/MLRISC/library/hashBag.sml @@ -0,0 +1,72 @@ +(* + * Bag datatype that uses hashing + * + * -- Allen + *) + +structure HashBag :> HASH_BAG = +struct + + structure S = HashMap + + type 'a bag = ('a,int) S.map * int ref + + fun create x n = (S.create x n, ref 0) + + fun insert (bag,c) i = + (S.update bag ((i,1),fn x => x + 1); c := !c + 1) + + fun insertN (bag,c) (i,n:int) = + (S.update bag ((i,n),fn x => x + n); c := !c + n) + + fun size (_,c) = !c + + fun bucketSize (bag,_) = S.bucketSize bag + + fun isEmpty (_,c) = !c = 0 + + fun remove (bag,c) i = + let val x = S.lookupOrElse bag 0 i + in if x > 0 then (S.insert bag (i,x-1); c := !c - 1) else () + end + + fun removeN (bag,c) (i,n) = + let val x = S.lookupOrElse bag 0 i + in if x > n then (S.insert bag (i,x-n); c := !c - n) + else (c := !c - Int.min(x,n); S.remove bag i) + end + + fun removeAll (bag,c) i = S.remove bag i + + fun toList (bag,_) = S.toList bag + + fun clear (bag,c) = (S.clear bag; c := 0) + + fun contains (bag,_) i = S.contains bag i + + fun count (bag,_) i = S.lookupOrElse bag 0 i + + fun app f (bag,_) = S.app f bag + + fun dupApp f (bag,_) = + let fun f' (x,0) = () + | f' (x,n) = (f x; f'(x,n-1)) + in + S.app f' bag + end + + fun fold f x (bag,_) = S.fold f x bag + + fun dupFold f x (bag,_) = + let fun f' ((x,0),l) = l + | f' ((x,n),l) = f'((x,n-1),f(x,l)) + in S.fold f' x bag + end + + fun toDupList bag = dupFold (op::) [] bag + + fun toString str bag = + "{" ^ dupFold (fn (x,"") => str x + | (x,l) => str x ^ ", " ^ l) "" bag ^ "}" +end + diff --git a/MLRISC/library/hashMap.sig b/MLRISC/library/hashMap.sig new file mode 100644 index 0000000..fc4be26 --- /dev/null +++ b/MLRISC/library/hashMap.sig @@ -0,0 +1,34 @@ +(* + * Map datatype that uses hashing. + * + * -- Allen + *) + +signature HASH_MAP = +sig + + type ('a,'b) map + + val create : { order : 'a * 'a -> order, + hash : 'a -> int, + exn : exn + } -> int -> ('a,'b) map + + val size : ('a,'b) map -> int + val bucketSize : ('a,'b) map -> int + val isEmpty : ('a,'b) map -> bool + val insert : ('a,'b) map -> ('a * 'b) -> unit + val update : ('a,'b) map -> (('a * 'b) * ('b -> 'b)) -> unit + val remove : ('a,'b) map -> 'a -> unit + val lookup : ('a,'b) map -> 'a -> 'b + val lookupOrElse : ('a,'b) map -> 'b -> 'a -> 'b + val toList : ('a,'b) map -> ('a * 'b) list + val clear : ('a,'b) map -> unit + val contains : ('a,'b) map -> 'a -> bool + val app : ('a * 'b -> unit) -> ('a,'b) map -> unit + val fold : (('a * 'b) * 'c -> 'c) -> 'c -> ('a,'b) map -> 'c + val toString : (('a -> string) * ('b -> string)) -> ('a,'b) map + -> string + +end + diff --git a/MLRISC/library/hashMap.sml b/MLRISC/library/hashMap.sml new file mode 100644 index 0000000..c77c981 --- /dev/null +++ b/MLRISC/library/hashMap.sml @@ -0,0 +1,154 @@ +(* + * map datatype that uses hashing. + * + * -- allen + *) + +structure HashMap :> HASH_MAP = +struct + + structure A = Array + + datatype 'a tree = NODE of 'a * 'a tree * 'a tree | EMPTY + + datatype ('a,'b) map = + MAP of + { table : ('a * 'b) tree Array.array ref, + size : int ref, + order : 'a * 'a -> order, + hash : 'a -> int, + exn : exn + } + + fun create { order, hash, exn } N = + let val N = if N <= 10 then 10 else N + in + MAP { table = ref(Array.array(N,EMPTY)), + size = ref 0, + order = order, + hash = hash, + exn = exn + } + end + + fun size (MAP { size, ... }) = !size + + fun bucketSize (MAP { table, ... }) = Array.length (!table) + + fun isEmpty (MAP { size, ... }) = !size = 0 + + fun clear (MAP { size, table, ... }) = + (table := A.array(A.length(!table),EMPTY); size := 0) + + and insert (m as MAP { size, table = ref T, order, hash, exn,...}) + (e as (x,y)) = + let val pos = hash x mod A.length T + fun ins EMPTY = (size := !size + 1; NODE(e,EMPTY,EMPTY)) + | ins (NODE(e' as (x',y'),l,r)) = + case order(x,x') of + LESS => NODE(e',ins l,r) + | EQUAL => NODE(e,l,r) + | GREATER => NODE(e',l,ins r) + in A.update(T,pos,ins(A.sub(T,pos))); + if !size > 6 * A.length T then + grow m + else () + end + + and grow (MAP { size, table = table as ref T, order, hash, exn, ... }) = + let val m2 as + MAP{table = ref T',...} = create{ order=order, hash=hash, exn=exn } + (!size * 2 + 10) (* : ('a,'b) map *) + val ins = insert m2 + fun loop EMPTY = () + | loop (NODE(e,l,r)) = (ins e; loop l; loop r) + in A.app loop T; table := T' + end + + and update (m as MAP { size, table = ref T, order, hash, exn,...}) + (e as (x,y), f) = + let val pos = hash x mod A.length T + fun ins EMPTY = (size := !size + 1; NODE(e,EMPTY,EMPTY)) + | ins (NODE(e' as (x',y'),l,r)) = + case order(x,x') of + LESS => NODE(e',ins l,r) + | EQUAL => NODE((x',f y'),l,r) + | GREATER => NODE(e',l,ins r) + in A.update(T,pos,ins(A.sub(T,pos))); + if !size > 6 * A.length T then + grow m + else () + end + + fun remove (MAP { size, table = ref T, order, hash, exn,...}) x = + let val pos = hash x mod A.length T + fun del EMPTY = EMPTY + | del (NODE(e' as (x',_),l,r)) = + case order(x,x') of + LESS => NODE(e',del l,r) + | EQUAL => (size := !size - 1; + case (l,r) of + (EMPTY,r) => r + | (l,EMPTY) => l + | _ => let val (leftmost,r') = delLeftMost r + in NODE(leftmost,l,r') + end + ) + | GREATER => NODE(e',l,del r) + and delLeftMost EMPTY = raise exn + | delLeftMost (NODE(e,EMPTY,r)) = (e,r) + | delLeftMost (NODE(e,l,r)) = + let val (e',r') = delLeftMost r + in (e',NODE(e,l,r')) + end + + in A.update(T,pos,del(A.sub(T,pos))) + end + + fun lookup (MAP { table = ref T, order, hash, exn, ... }) x = + let val pos = hash x mod A.length T + fun look EMPTY = raise exn + | look (NODE(e' as (x',y'),l,r)) = + case order(x,x') of + LESS => look l + | EQUAL => y' + | GREATER => look r + in look (A.sub(T,pos)) + end + + fun lookupOrElse m default x = lookup m x handle _ => default + + fun contains (MAP { table = ref T, order, hash, ... }) x = + let val pos = hash x mod A.length T + fun find EMPTY = false + | find (NODE(e' as (x',y'),l,r)) = + case order(x,x') of + LESS => find l + | EQUAL => true + | GREATER => find r + in find(A.sub(T,pos)) + end + + fun fold f x = + fn (MAP { table = ref T, ... }) => + let fun collect (EMPTY,L) = L + | collect (NODE(e,l,r),L) = collect(l,collect(r,f(e,L))) + in A.foldl (fn (t,l) => collect(t,l)) x T + end + + fun app f = + fn (MAP { table = ref T, ... }) => + let fun appTree EMPTY = () + | appTree (NODE(e,l,r)) = (f e; appTree l; appTree r) + in A.app appTree T + end + + fun toList map = fold (op::) [] map + + fun toString (f,g) map = + "{" ^ fold (fn ((x,y),"") => "(" ^ f x ^ ", " ^ g y ^ ")" + | ((x,y),l) => "(" ^ f x ^ ", " ^ g y ^ "), " ^ l + ) "" map ^ "}" + +end + diff --git a/MLRISC/library/hashMultimap.sig b/MLRISC/library/hashMultimap.sig new file mode 100644 index 0000000..0f60619 --- /dev/null +++ b/MLRISC/library/hashMultimap.sig @@ -0,0 +1,38 @@ +(* + * Multimap datatype that uses hashing. + * + * -- allen + *) + +signature HASH_MULTIMAP = +sig + + type ('a,'b) multimap + + val create : { order : 'a * 'a -> order, + hash : 'a -> int, + exn : exn + } -> int -> ('a,'b) multimap + + val size : ('a,'b) multimap -> int + val bucketSize : ('a,'b) multimap -> int + val isEmpty : ('a,'b) multimap -> bool + val insert : ('a,'b) multimap -> ('a * 'b) -> unit + val update : ('a,'b) multimap -> ('a * 'b list) -> unit + val removeAll : ('a,'b) multimap -> 'a -> unit + val lookup : ('a,'b) multimap -> 'a -> 'b list + val toList : ('a,'b) multimap -> ('a * 'b list) list + val toDupList : ('a,'b) multimap -> ('a * 'b) list + val clear : ('a,'b) multimap -> unit + val contains : ('a,'b) multimap -> 'a -> bool + val count : ('a,'b) multimap -> 'a -> int + val app : ('a * 'b list -> unit) -> ('a,'b) multimap -> unit + val dupApp : ('a * 'b -> unit) -> ('a,'b) multimap -> unit + val fold : (('a * 'b list) * 'c -> 'c) -> 'c + -> ('a,'b) multimap -> 'c + val dupFold : (('a * 'b) * 'c -> 'c) -> 'c -> ('a,'b) multimap -> 'c + val toString : (('a -> string) * ('b -> string)) -> ('a,'b) multimap + -> string + +end + diff --git a/MLRISC/library/hashMultimap.sml b/MLRISC/library/hashMultimap.sml new file mode 100644 index 0000000..3ecad4b --- /dev/null +++ b/MLRISC/library/hashMultimap.sml @@ -0,0 +1,76 @@ +(* + * Multimap datatype that uses hashing. + * + * -- allen + *) + +structure HashMultimap :> HASH_MULTIMAP = +struct + + structure S = HashMap + + type ('a,'b) multimap = ('a,'b list) S.map * int ref + + + fun create x n = (S.create x n, ref 0) + + fun size (_,c) = !c + fun bucketSize (m,_) = S.bucketSize m + fun isEmpty (_,c) = !c = 0 + + fun insert (m,c) (e as (x,y)) = + (S.update m ((x,[y]),fn ys => y::ys); c := !c + 1) + + fun removeAll (m,c) i = + let val stuff = S.lookup m i + in S.remove m i; c := !c - length stuff + end handle _ => () + + fun update (m,c) (e as (x,ys)) = + let val stuff = S.lookupOrElse m [] x + in S.insert m e; c := !c - length stuff + length ys + end + + fun lookup (m,_) i = S.lookup m i + + fun contains (m,_) i = S.contains m i + + fun count (m,_) i = length(S.lookupOrElse m [] i) + + fun toList (m,_) = S.toList m + + fun toDupList (m,_) = + let fun collect (x,[],l) = l + | collect (x,h::t,l) = (x,h)::collect(x,t,l) + in + S.fold (fn ((x,ys),l) => collect (x,ys,l)) [] m + end + + fun clear (m,c) = (S.clear m; c := 0) + + fun dupApp f (m,_) = + let fun call (x,[]) = () + | call (x,h::t) = (f(x,h); call(x,t)) + in + S.app call m + end + + fun app f (m,_) = S.app f m + + fun dupFold f x (m,_) = + let fun collect((x,[]),l) = l + | collect((x,h::t),l) = collect((x,t),f((x,h),l)) + in + S.fold collect x m + end + + fun fold f x (m,_) = S.fold f x m + + fun toString (f,g) m = + "{" ^ + dupFold (fn ((x,y),"") => "(" ^ f x ^ ", " ^ g y ^ ")" + | ((x,y),l) => "(" ^ f x ^ ", " ^ g y ^ "), " ^ l) + "" m ^ "}" + +end + diff --git a/MLRISC/library/hashSet.sig b/MLRISC/library/hashSet.sig new file mode 100644 index 0000000..b34f361 --- /dev/null +++ b/MLRISC/library/hashSet.sig @@ -0,0 +1,29 @@ +(* + * A set datatype that uses hashing + * + * -- Allen + *) + +signature HASH_SET = +sig + + type 'a set + + val create : { order : 'a * 'a -> order, + hash : 'a -> int + } -> int -> 'a set + + val size : 'a set -> int + val bucketSize : 'a set -> int + val isEmpty : 'a set -> bool + val insert : 'a set -> 'a -> unit + val remove : 'a set -> 'a -> unit + val toList : 'a set -> 'a list + val clear : 'a set -> unit + val contains : 'a set -> 'a -> bool + val app : ('a -> unit) -> 'a set -> unit + val fold : ('a * 'b -> 'b) -> 'b -> 'a set -> 'b + val toString : ('a -> string) -> 'a set -> string + +end + diff --git a/MLRISC/library/hashSet.sml b/MLRISC/library/hashSet.sml new file mode 100644 index 0000000..409c176 --- /dev/null +++ b/MLRISC/library/hashSet.sml @@ -0,0 +1,100 @@ +(* + * A set datatype that uses hashing + * + * -- Allen + *) + +structure HashSet :> HASH_SET = +struct + + structure A = Array + + datatype 'a set = + SET of + { table : 'a list Array.array ref, + size : int ref, + order : 'a * 'a -> order, + hash : 'a -> int + } + + fun create { order, hash } N = + let val N = if N <= 10 then 10 else N + in + SET { table = ref(Array.array(N,[])), + size = ref 0, + order = order, + hash = hash + } + end + + fun size (SET { size, ... }) = !size + + fun bucketSize (SET { table, ... }) = Array.length (!table) + + fun isEmpty (SET { size, ... }) = !size = 0 + + fun clear (SET { size, table, ... }) = + (table := A.array(A.length(!table),[]); size := 0) + + and insert (m as SET { size, table = ref T, order, hash,...}) x = + let val pos = hash x mod A.length T + val list = A.sub(T,pos) + fun ins [] = (size := !size + 1; + A.update(T,pos,x::list); + if !size > 6 * A.length T then grow m else ()) + | ins (x'::rest) = + case order(x,x') of + EQUAL => () + | _ => ins rest + in + ins list + end + + and grow (SET { size, table = table as ref T, order, hash, ... }) = + let val m2 as + SET{table = ref T',...} = create{ order=order, hash=hash } + (!size * 2 + 10) + in A.app (app (insert m2)) T; table := T' + end + + fun remove (SET { size, table = ref T, order, hash,...}) x = + let val pos = hash x mod A.length T + val list = A.sub(T,pos) + fun del ([],list) = () + | del (x'::rest,list) = + case order(x,x') of + EQUAL => (size := !size - 1; + A.update(T,pos,rest@list) + ) + | _ => del (rest,x'::list) + + in del(list,[]) + end + + fun contains (SET { table = ref T, order, hash, ... }) x = + let val pos = hash x mod A.length T + fun find [] = false + | find (x'::rest) = + case order(x,x') of + EQUAL => true + | _ => find rest + in find(A.sub(T,pos)) + end + + fun fold f x = + fn (SET { table = ref T, ... }) => + A.foldl (fn (t,l) => List.foldl f l t) x T + + fun app f = + fn (SET { table = ref T, ... }) => + A.app (List.app f) T + + fun toList set = fold (op::) [] set + + fun toString f set = + "{" ^ fold (fn (x,"") => f x + | (x,l) => f x ^ ", " ^ l + ) "" set ^ "}" + +end + diff --git a/MLRISC/library/heap.sml b/MLRISC/library/heap.sml new file mode 100644 index 0000000..634e896 --- /dev/null +++ b/MLRISC/library/heap.sml @@ -0,0 +1,94 @@ +(* + * This implements a priority queue using a heap + * + * -- Allen + *) + +structure PriorityHeap :> PRIORITY_QUEUE = +struct + structure A = Array + exception EmptyPriorityQueue + exception Unimplemented + + datatype 'a priority_queue = + HEAP of { less : 'a * 'a -> bool, + heap : 'a A.array, + size : int ref + } + + fun createN(less,N,dummy) = + HEAP{less=less, heap = A.array(N,dummy), size = ref 0} + + fun unimplemented() = raise Unimplemented + + fun create _ = unimplemented() + fun merge _ = unimplemented() + fun mergeInto _ = unimplemented() + fun toList _ = unimplemented() + + fun isEmpty (HEAP{ size = ref 0, ... }) = true + | isEmpty _ = false + + fun clear (HEAP{ size, ... }) = size := 0 + + fun min(HEAP{ size = ref 0, ... }) = raise EmptyPriorityQueue + | min(HEAP{ heap, ... }) = A.sub(heap, 0) + + + fun insert(HEAP{ size, heap, less, ...}) x = + let val N = !size + fun siftup 0 = 0 + | siftup i = + let val j = (i-1) div 2 + val y = A.sub(heap,j) + in if less(x,y) then (A.update(heap,i,y); siftup j) + else i + end + in size := N + 1; + A.update(heap,siftup N,x) + end + + fun siftDown(heap, less, N, i, x) = + let fun siftdown (i, x) = + let val j = i + i + 1 + val k = j + 1 + in if j >= N then i + else let val y = A.sub(heap,j) + in if k >= N then + if less(y,x) then go(i,x,j,y) else i + else + let val z = A.sub(heap,k) + in if less(y,x) then + if less(z,y) then go(i,x,k,z) + else go(i,x,j,y) + else if less(z,x) then go(i,x,k,z) + else i + end + end + end + and go(i,x,j,y) = (A.update(heap,i,y); siftdown(j,x)) + val pos_x = siftdown(i, x) + in A.update(heap, pos_x, x); + pos_x + end + + fun deleteMin(HEAP{ size = ref 0, ...}) = raise EmptyPriorityQueue + | deleteMin(HEAP{ size, heap, less, ...}) = + let val N = !size - 1 + val min = A.sub(heap,0) + val x = A.sub(heap,N) + val x_pos = siftDown(heap, less, N, 0, x) + in size := N; + min + end + + fun fromList less data = + let val heap = A.fromList data + val N = A.length heap + fun make_heap ~1 = () + | make_heap i = + (siftDown(heap,less,N,i,A.sub(heap,i)); make_heap(i-1)) + in if N >= 2 then make_heap((N+1) div 2) else (); + HEAP{ less = less, heap = heap, size = ref N } + end +end diff --git a/MLRISC/library/int-set.sml b/MLRISC/library/int-set.sml new file mode 100644 index 0000000..dc86be9 --- /dev/null +++ b/MLRISC/library/int-set.sml @@ -0,0 +1,111 @@ +(* + * A fixed capacity integer set datatype + * + * -- Allen + *) + +signature INTSET = +sig + + type intset + + val intset : int -> intset + val contains : intset * int -> bool + val add : intset * int -> unit + val remove : intset * int -> unit + val clear : intset -> unit + val size : intset -> int + val capacity : intset -> int + val is_empty : intset -> bool + val app : (int -> unit) -> intset -> unit + val fold : (int * 'a -> 'a) -> 'a -> intset -> 'a + val toList : intset -> int list + val toString : intset -> string + val copy : intset -> intset + val + : intset * intset -> intset + val - : intset * intset -> intset + val * : intset * intset -> intset + val union : intset * intset -> unit + val diff : intset * intset -> unit + +end + +structure IntSet :> INTSET = +struct + + structure A = Array + datatype intset = SET of {stack : int A.array, + pos : int A.array, + count : int ref + } + fun intset n = SET{stack=A.array(n,0),pos=A.array(n,0),count=ref 0} + fun contains(SET{stack,pos,count},i) = + let val j = A.sub(pos,i) + in j < !count andalso A.sub(stack,j) = i end + + fun add(SET{stack,pos,count},i) = + let val j = A.sub(pos,i) + val n = !count + in if j < n andalso A.sub(stack,j) = i then () + else (A.update(stack,n,i); A.update(pos,i,n); count := n + 1) + end + fun remove(SET{stack,pos,count},i) = + let val j = A.sub(pos,i) + val n = !count + val k = A.sub(stack,j) + in if j < n andalso i = k then + let val k' = A.sub(stack,n-1) + in A.update(stack,j,k'); + A.update(pos,k',j); + count := n - 1 + end + else () + end + + fun clear(SET{count,...}) = count := 0 + fun size(SET{count,...}) = !count + fun capacity(SET{stack,...}) = A.length stack + fun is_empty(SET{count,...}) = !count = 0 + fun app f (SET{count,stack,...}) = + let fun g ~1 = () + | g i = (f(A.sub(stack,i)); g(i-1)) + in g(!count - 1) end + fun fold f x (SET{count,stack,...}) = + let fun g(~1,x) = x + | g(i,x) = g(i-1,f(A.sub(stack,i),x)) + in g(!count - 1,x) end + fun toList set = fold op:: [] set + fun toString set = + String.concat( + "{"::fold (fn (i,[x]) => [Int.toString i,x] + | (i,s) => Int.toString i::","::s) ["}"] set) + + fun copy (SET{stack,pos,count}) = + let val N = A.length stack + val stack' = A.array(N,0) + val pos' = A.array(N,0) + val n = !count + fun f(i,x) = (A.update(stack',i,x);A.update(pos',x,i)) + in A.appi f (stack,0,SOME n); + SET{stack=stack', + pos =pos', + count=ref n + } + end + + fun union(s1,s2) = app (fn x => add(s2,x)) s1 + fun diff(s1,s2) = app (fn x => remove(s2,x)) s1 + fun s1 + s2 = let val s3 = copy s1 + in union(s2,s3); s3 end + fun s1 - s2 = let val s3 = copy s1 + in diff(s2,s3); s3 end + fun s1 * s2 = let val s3 = intset(capacity s1) + in app (fn x => if contains(s2,x) then add(s3,x) else ()) + s1; + s3 + end + + + +end + diff --git a/MLRISC/library/intmap.sig b/MLRISC/library/intmap.sig new file mode 100644 index 0000000..88162d8 --- /dev/null +++ b/MLRISC/library/intmap.sig @@ -0,0 +1,48 @@ +(* Copyright 1989 by AT&T Bell Laboratories *) +signature INTMAP = + sig + type 'a intmap + + (* return a named intmap *) + val namednew : string * int * exn -> 'a intmap + + (* return an unnamed intmap *) + val new : int * exn -> 'a intmap + + (* return the number of elements *) + val elems : 'a intmap -> int + + (* insert a new binding *) + val add : 'a intmap -> int * 'a -> unit + + (* remove a key; no effect if the key does not exists *) + val rmv : 'a intmap -> int -> unit + + (* lookup a key; raises exception if the key does not exists *) + val map : 'a intmap -> int -> 'a + + (* lookup a key; return the key if no associated binding exists *) + val mapInt : int intmap -> int -> int + + (* lookup a key; return the default value if the key is missing *) + val mapWithDefault : 'a intmap * 'a -> int -> 'a + + (* iterate over an intmap *) + val app : (int * 'a -> unit) -> 'a intmap -> unit + + (* convert an intmap to a list *) + val intMapToList: 'a intmap -> (int * 'a) list + + (* return the keys in an intmap *) + val keys: 'a intmap -> int list + + (* return the values in an intmap *) + val values: 'a intmap -> 'a list + + (* clear an intmap *) + val clear : 'a intmap -> unit + + (* copy an intmap *) + val copy : 'a intmap -> 'a intmap + end + diff --git a/MLRISC/library/intmap.sml b/MLRISC/library/intmap.sml new file mode 100644 index 0000000..3febc24 --- /dev/null +++ b/MLRISC/library/intmap.sml @@ -0,0 +1,117 @@ +(* Copyright 1989 by AT&T Bell Laboratories *) +structure Intmap : INTMAP = +struct + open Array List + infix 9 sub + val wtoi = Word.toIntX + val itow = Word.fromInt + datatype 'a bucket = NIL | B of (int * 'a * 'a bucket) + datatype 'a intmap = + H of {table: 'a bucket array ref,elems: int ref,exn: exn,name: string option} + fun clear(H{table,elems,...}) = + if !elems > 0 then (table := array(32,NIL); elems := 0) else () + fun bucketapp f = + let fun loop NIL = () + | loop(B(i,j,r)) = (f(i,j); loop r) + in loop + end + fun roundsize size = + let fun f x = if x >= size then x else f (x*2) + in f 1 + end + fun namednew(name, size, exn) = + H {table=ref(array(roundsize size,NIL)),elems=ref 0,exn=exn,name=SOME name} + fun new(size, exn) = + H {table=ref(array(roundsize size,NIL)),elems=ref 0,exn=exn,name=NONE} + val elems = fn (H{elems,...}) => !elems + fun index(a, i) = wtoi (Word.andb(itow i, itow(Array.length a - 1))) + fun map (H{table,exn,...}) = + (fn i => let fun find NIL = raise exn + | find(B(i',j,r)) = if i=i' then j else find r + val ref a = table + in find(a sub (index(a, i))) + end) + fun mapWithDefault (H{table,exn,...},default) = + (fn i => let fun find NIL = default + | find(B(i',j,r)) = if i=i' then j else find r + val ref a = table + in find(a sub (index(a, i))) + end) + fun mapInt (H{table,exn,...}) = + (fn i => let fun find NIL = i + | find(B(i',j,r)) = if i=i' then j else find r + val ref a = table + in find(a sub (index(a, i))) + end) + fun rmv (H{table=ref a,elems,...}) i = + let fun f(B(i',j,r)) = if i=i' then (elems := !elems-1; r) else B(i',j,f r) + | f x = x + val indx = index(a, i) + in update(a, indx, f(a sub indx)) + end + fun app f (H{table=ref a,...}) = + let fun zap 0 = () + | zap n = let val m = n-1 in bucketapp f (a sub m); zap m end + in zap(Array.length a) + end + fun add (m as H{table as ref a, elems, name, ...}) (v as (i,j)) = + let val size = Array.length a + in if !elems <> size + then let val index = wtoi (Word.andb(itow i, itow(size-1))) + fun f(B(i',j',r)) = if i=i' then B(i,j,r) else B(i',j',f r) + | f x = (elems := !elems+1; B(i,j,x)) + in update(a,index,f(a sub index)) + end + else let val newsize = size+size + val newsize1 = newsize-1 + val new = array(newsize,NIL) + fun bucket n = + let fun add'(a,b,B(i,j,r)) = + if wtoi (Word.andb(itow i, itow newsize1)) = n + then add'(B(i,j,a),b,r) + else add'(a,B(i,j,b),r) + | add'(a,b,NIL) = + (update(new,n,a); + update(new,n+size,b); + bucket(n+1)) + in add'(NIL,NIL,a sub n) + end + in + bucket 0 handle Subscript => (); + table := new; + add m v + end + end + fun intMapToList(H{table,...})= + let val a = !table; + val last = Array.length a - 1 + fun loop (0, NIL, acc) = acc + | loop (n, B(i,j,r), acc) = loop(n, r, (i,j)::acc) + | loop (n, NIL, acc) = loop(n-1, a sub (n-1), acc) + in loop(last,a sub last,[]) + end + fun values(H{table,...})= + let val a = !table; + val last = Array.length a - 1 + fun loop (0, NIL, acc) = acc + | loop (n, B(i,j,r), acc) = loop(n, r, j::acc) + | loop (n, NIL, acc) = loop(n-1, a sub (n-1), acc) + in loop(last,a sub last,[]) + end + fun keys(H{table,...})= + let val a = !table; + val last = Array.length a - 1 + fun loop (0, NIL, acc) = acc + | loop (n, B(i,j,r), acc) = loop(n, r, i::acc) + | loop (n, NIL, acc) = loop(n-1, a sub (n-1), acc) + in loop(last,a sub last,[]) + end + + fun copy(H{table=ref a,elems,exn,name}) = + let val a' = Array.array(Array.length a,NIL) + in Array.copy{di=0, dst=a', len=NONE, si=0, src=a}; + H{table=ref a', elems=ref(!elems), exn=exn, name=name} + end + +end + diff --git a/MLRISC/library/line-break.sml b/MLRISC/library/line-break.sml new file mode 100644 index 0000000..3f215ca --- /dev/null +++ b/MLRISC/library/line-break.sml @@ -0,0 +1,19 @@ +signature LINE_BREAK = +sig + val lineBreak : int -> string -> string +end + +structure LineBreak : LINE_BREAK = +struct + fun lineBreak maxChars text = + let fun loop([],_,text) = String.concat(rev text) + | loop(s::ss,n,text) = + let val m = String.size s + 1 + val n' = m+n + in if n' > maxChars + then loop(ss, m, s::" "::"\n"::text) + else loop(ss, n', s::" "::text) + end + val toks = String.fields (fn c => c = #" ") text + in loop(toks, 0, []) end +end diff --git a/MLRISC/library/orig-dynamic-array.sml b/MLRISC/library/orig-dynamic-array.sml new file mode 100644 index 0000000..3c8b3e1 --- /dev/null +++ b/MLRISC/library/orig-dynamic-array.sml @@ -0,0 +1,100 @@ +(* + * Dynamic (dense) array. + * + * -- Allen + *) + +structure DynArray : + sig include ARRAY + val fromArray : 'a Array.array * 'a * int -> 'a array + val baseArray : 'a array -> 'a Array.array + val checkArray: 'a array * 'a Array.array -> unit + val clear : 'a array * int -> unit + val expandTo : 'a array * int -> unit + end = + struct + structure A = Array + type 'a vector = 'a A.vector + datatype 'a array = ARRAY of 'a A.array ref * 'a * int ref + + exception Subscript = General.Subscript + exception Size = General.Size + exception Unimplemented + + infix 9 sub + + val maxLen = A.maxLen + + fun array (n,d) = ARRAY(ref(A.array (n,d)), d, ref 0) + fun clear (ARRAY(a,def,cnt),n) = (a := A.array(n,def); cnt := n) + fun fromArray(a,d,n) = ARRAY(ref a, d, ref n) + + fun baseArray(ARRAY(ref a,_,_)) = a + fun checkArray(ARRAY(ref a,_,_),a') = if a = a' then () else raise Match + + fun length (ARRAY (ref a,_,ref n)) = n + + fun (ARRAY(ref a, d, _)) sub i = A.sub(a,i) handle _ => d + + fun update (ARRAY(r as ref a, d, n), i, e) = + (A.update(a,i,e); n := Int.max(!n,i+1)) handle _ => + let val new_size = Int.max(i+1,!n*2) + val new_size = if new_size < 10 then 10 else new_size + val new_array = A.array(new_size,d) + in A.copy {src = a, si = 0, len = NONE, dst = new_array, di = 0}; + r := new_array; + n := i+1; + A.update(new_array, i, e) + end + + fun expandTo(arr as ARRAY(_, d, _), N) = update(arr, N-1, d) + + fun extract (ARRAY(r as ref a, _, ref n), i, j) = A.extract (a, i, j) + + fun copy { src = ARRAY(ref a,_,sz), si, len, dst, di } = + let val n = case len of SOME l => si + l + | NONE => !sz + fun cp(i,j) = + if i < n then (update(dst,j,A.sub(a,i)); cp(i+1,j+1)) else () + in cp (si, di) + end + + fun copyVec { src, si, len, dst = ARRAY(ref a,_,sz), di } = + A.copyVec { src = src, si = si, len = len, dst = a, di = di } + + fun tabulate (n, f) = + let val array = A.tabulate(n, f) + val default = A.sub(array,0) + in + ARRAY(ref array, default, ref n) + end handle _ => raise Size + + fun fromList l = + let val array = A.fromList l + val default = A.sub(array,0) + in + ARRAY(ref array, default, ref (List.length l)) + end handle _ => raise Size + + fun app f (ARRAY (ref a,_,ref n)) = + A.appi (fn (_,x) => f x) (a,0,SOME n) + + fun foldl f u (ARRAY (ref a,_,ref n)) = + A.foldli (fn (_,x,y) => f (x,y)) u (a, 0, SOME n) + + fun foldr f u (ARRAY (ref a,_,ref n)) = + A.foldri (fn (_,x,y) => f (x,y)) u (a, 0, SOME n) + + fun modify f (ARRAY (ref a,_,ref n)) = + A.modifyi (fn (_,x) => f x) (a, 0, SOME n) + + fun appi f (ARRAY(ref a,_,ref n), i, j) = A.appi f (a, i, j) + + fun foldli f u (ARRAY(ref a,_,ref n), i, j) = A.foldli f u (a, i, j) + + fun foldri f u (ARRAY(ref a,_,ref n), i, j) = A.foldri f u (a, i, j) + + fun modifyi f (ARRAY(ref a,_,ref n), i, j) = A.modifyi f (a, i, j) + +end + diff --git a/MLRISC/library/orig-hash-array.sml b/MLRISC/library/orig-hash-array.sml new file mode 100644 index 0000000..5f0a483 --- /dev/null +++ b/MLRISC/library/orig-hash-array.sml @@ -0,0 +1,183 @@ +(* + * Dynamic (sparse) array that uses hashing + * + * -- Allen + *) + +structure HashArray : + sig include ARRAY + val array' : int * (int -> 'a) -> 'a array + val array'': int * (int -> 'a) -> 'a array + val clear : 'a array -> unit + val remove : 'a array * int -> unit + val dom : 'a array -> int list + val copy_array : 'a array -> 'a array + end = +struct + structure A = Array + + datatype 'a default = V of 'a | F of int -> 'a | U of int -> 'a + datatype 'a array = + ARRAY of (int * 'a) list A.array ref * 'a default * int ref * int ref + + type 'a vector = 'a Vector.vector + + val maxLen = A.maxLen + + exception HashArrayUnimplemented + + fun unimplemented _ = raise HashArrayUnimplemented + + fun array(n,d) = ARRAY(ref(A.array(16,[])),V d,ref n,ref 0) + fun array'(n,f) = ARRAY(ref(A.array(16,[])),F f,ref n,ref 0) + fun array''(n,f) = ARRAY(ref(A.array(16,[])),U f,ref n,ref 0) + fun clear(ARRAY(r,d,n,c)) = (r := A.array(16,[]); n := 0; c := 0) + + fun roundsize n = + let fun loop i = if i >= n then i else loop(i+i) + in loop 1 end + + fun copy_array(ARRAY(ref a,d,ref n,ref c)) = + let val a' = A.array(n,[]) + val _ = A.copy{src=a,dst=a',si=0,di=0,len=NONE} + in ARRAY(ref a',d,ref n,ref c) + end + + val itow = Word.fromInt + val wtoi = Word.toIntX + fun index(a, i) = wtoi(Word.andb(itow i, itow(Array.length a - 1))) + + fun tabulate(n,f) = + let val N = n*n+1 + val N = if N < 16 then 16 else roundsize N + val a = A.array(N,[]) + fun ins i = + let val pos = index(a, i) + val x = f i + in A.update(a,pos,(i,x)::A.sub(a,pos)); x + end + fun insert 0 = ins 0 + | insert i = (ins i; insert(i-1)) + in if n < 0 then + ARRAY(ref a,F(fn _ => raise Subscript),ref 0,ref 0) + else + ARRAY(ref a,V(insert(n-1)),ref n,ref n) + end + + fun fromList l = + let val n = length l + val N = n*n+1 + val N = if N < 16 then 16 else roundsize N + val a = A.array(N,[]) + fun ins(i,x) = + let val pos = index(a,i) + in A.update(a,pos,(i,x)::A.sub(a,pos)); x + end + fun insert(i,[]) = F(fn _ => raise Subscript) + | insert(i,[x]) = V(ins(i,x)) + | insert(i,x::l) = (ins(i,x); insert(i+1,l)) + in ARRAY(ref a,insert(0,l),ref n,ref n) + end + + fun length(ARRAY(_,_,ref n,_)) = n + + fun sub(a' as ARRAY(ref a,d,_,_),i) = + let val pos = index(a,i) + fun search [] = (case d of + V d => d + | F f => f i + | U f => let val x = f i + in update(a',i,x); x end + ) + | search ((j,x)::l) = if i = j then x else search l + in search(A.sub(a,pos)) end + + and update(a' as ARRAY(ref a,_,n,s as ref size),i,x) = + let val N = A.length a + val pos = index(a,i) + fun change([],l) = + if size+size >= N then grow(a',i,x) + else (s := size + 1; A.update(a,pos,(i,x)::l)) + | change((y as (j,_))::l',l) = + if j = i then A.update(a,pos,(i,x)::l'@l) + else change(l',y::l) + in + change(A.sub(a,pos),[]); + if i >= !n then n := i+1 else () + end + + and grow(ARRAY(a' as ref a,_,_,_),i,x) = + let val N = A.length a + val N' = N+N + val a'' = A.array(N',[]) + fun insert(i,x) = + let val pos = index(a'',i) + in A.update(a'',pos,(i,x)::A.sub(a'',pos)) end + in + A.app (List.app insert) a; + insert(i,x); + a' := a'' + end + + fun remove(a' as ARRAY(ref a,_,n,s as ref size),i) = + let val N = A.length a + val pos = index(a,i) + fun change([],_) = () + | change((y as (j,_))::l',l) = + if j = i then (s := size - 1; A.update(a,pos,l'@l)) + else change(l',y::l) + in change(A.sub(a,pos),[]) + end + + fun extract (a as ARRAY(_,_,ref n,_),i,j) = + let val j = case j of SOME j => i+j | NONE => n + fun f(k,l) = if k < i then l else f(k-1,sub(a,k)::l) + in + Vector.fromList(f(j-1,[])) + end + + fun copy { src = src as ARRAY(_,_,ref n,_), si, len, dst, di } = + let val j = case len of SOME len => si+len | NONE => n + fun f(k,k') = if k >= j then () + else (update(dst,k',sub(src,k)); f(k+1,k'+1)) + in f(si,di) + end + + val copyVec = unimplemented + + fun app f (ARRAY(ref a,_,_,_)) = A.app (List.app (fn (_,x) => f x)) a + fun foldl f e (ARRAY(ref a,_,_,_)) = + A.foldl (fn (l,e) => List.foldl (fn ((_,x),e) => f(x,e)) e l) e a + fun foldr f e (ARRAY(ref a,_,_,_)) = + A.foldr (fn (l,e) => List.foldr (fn ((_,x),e) => f(x,e)) e l) e a + + fun modify f (ARRAY(ref a,_,_,_)) = + A.modify (List.map (fn (i,x) => (i,f x))) a + + fun appi f (ARRAY(ref a,_,ref n,_),i,j) = + let val j = case j of SOME j => i+j | NONE => n + in A.app (List.app + (fn (k,x) => if k >= i andalso k < j then f(k,x) else ())) a + end + fun foldli f e (ARRAY(ref a,_,ref n,_),i,j) = + let val j = case j of SOME j => i+j | NONE => n + in A.foldl (fn (l,e) => List.foldl + (fn ((k,x),e) => if k >= i andalso k < j then f(k,x,e) else e) e l) + e a + end + fun foldri f e (ARRAY(ref a,_,ref n,_),i,j) = + let val j = case j of SOME j => i+j | NONE => n + in A.foldr (fn (l,e) => List.foldr + (fn ((k,x),e) => if k >= i andalso k < j then f(k,x,e) else e) e l) + e a + end + fun dom(ARRAY(ref a,_,_,_)) = + A.foldl (fn (e,l) => List.foldr (fn ((i,_),l) => i::l) l e) [] a + + fun modifyi f (ARRAY(ref a,_,ref n,_),i,j) = + let val j = case j of SOME j => i+j | NONE => n + in A.modify (List.map(fn (k,x) => if k >= i andalso k < j then (k,f(k,x)) + else (k,x))) a + end +end + diff --git a/MLRISC/library/priQueue.sig b/MLRISC/library/priQueue.sig new file mode 100644 index 0000000..4092549 --- /dev/null +++ b/MLRISC/library/priQueue.sig @@ -0,0 +1,27 @@ +(* + * Signature of an imperative priority queue. + * + * -- Allen + *) + +signature PRIORITY_QUEUE = +sig + + type 'a priority_queue + + exception EmptyPriorityQueue + + val create : ('a * 'a -> bool) -> 'a priority_queue + val createN : ('a * 'a -> bool) * int * 'a -> 'a priority_queue + val isEmpty : 'a priority_queue -> bool + val clear : 'a priority_queue -> unit + val min : 'a priority_queue -> 'a + val deleteMin : 'a priority_queue -> 'a + val merge : 'a priority_queue * 'a priority_queue -> 'a priority_queue + val mergeInto : { src : 'a priority_queue, dst : 'a priority_queue } -> unit + val insert : 'a priority_queue -> 'a -> unit + val fromList : ('a * 'a -> bool) -> 'a list -> 'a priority_queue + val toList : 'a priority_queue -> 'a list + +end + diff --git a/MLRISC/library/priQueue.sml b/MLRISC/library/priQueue.sml new file mode 100644 index 0000000..d581b22 --- /dev/null +++ b/MLRISC/library/priQueue.sml @@ -0,0 +1,82 @@ +(* + * Priority queues implemented as leftist trees + * + * -- Allen + *) + +structure PriorityQueue :> PRIORITY_QUEUE = +struct + + + (* A leftist tree is a binary tree with priority ordering + * with the invariant that the left branch is always the taller one + *) + datatype 'a leftist = NODE of 'a * int * 'a leftist * 'a leftist + | EMPTY + + datatype 'a priority_queue = PQ of { less : 'a * 'a -> bool, + root : 'a leftist ref + } + + exception EmptyPriorityQueue + + (* assume a is smaller than b *) + fun mergeTrees less (a,b) = + let fun dist EMPTY = 0 + | dist (NODE(_,d,_,_)) = d + + fun m (EMPTY,a) = a + | m (a, EMPTY) = a + | m (a as NODE(x,d,l,r), b as NODE(y,d',l',r')) = + let val (root,l,r) = + if less(x,y) then (x,l,m(r,b)) else (y,l',m(r',a)) + val d_l = dist l + val d_r = dist r + val (l,r) = if d_l >= d_r then (l,r) else (r,l) + in + NODE(root,1+Int.max(d_l,d_r),l,r) + end + in m (a, b) + end + + fun create less = PQ { less = less, root = ref EMPTY } + fun createN (less,_,_) = create less + + fun min (PQ { root = ref(NODE(x,_,_,_)), ... }) = x + | min _ = raise EmptyPriorityQueue + + fun isEmpty (PQ { root = ref EMPTY, ... }) = true + | isEmpty _ = false + + fun clear (PQ { root, ... }) = root := EMPTY + + fun deleteMin (PQ { root = root as ref(NODE(x,_,l,r)), less }) = + (root := mergeTrees less (l,r); x) + | deleteMin _ = raise EmptyPriorityQueue + + fun merge (PQ { root = r1, less }, PQ { root = r2, ...}) = + PQ { root = ref(mergeTrees less (!r1,!r2)), less = less } + + fun mergeInto { src = PQ { root = ref t1, less }, + dst = PQ { root = r as ref t2, ...} } = + r := mergeTrees less (t1,t2) + + fun mergeElems (less, q, elements) = + let fun m (q,[]) = q + | m (q,e::es) = m(mergeTrees less (q, NODE(e,1,EMPTY,EMPTY)), es) + in m(q, elements) + end + + fun insert (PQ { root = r as ref t1, less}) x = + r := mergeTrees less (t1,NODE(x,1,EMPTY,EMPTY)) + + fun fromList less list = + PQ { root = ref(mergeElems(less, EMPTY, list)), less = less } + + fun collect (EMPTY, e) = e + | collect (NODE(x,_,l,r),e) = collect(l,collect(r,x::e)) + + fun toList (PQ { root = ref t, ... }) = collect (t, []) + +end + diff --git a/MLRISC/library/probability.sml b/MLRISC/library/probability.sml new file mode 100644 index 0000000..c58bf57 --- /dev/null +++ b/MLRISC/library/probability.sml @@ -0,0 +1,177 @@ +(* probability.sml + * + * COPYRIGHT (c) 2002 Bell Labs, Lucent Technologies. + * + * A representation of probabilities for branch prediction. + *) + +signature PROBABILITY = + sig + + type prob + + exception BadProb + + val never : prob (* 0% probability *) + val unlikely : prob (* very close to 0% *) + val likely : prob (* very close to 100% *) + val always : prob (* 100% probability *) + + val prob : (int * int) -> prob + val fromFreq : int list -> prob list + + val + : (prob * prob) -> prob + val - : (prob * prob) -> prob + val * : (prob * prob) -> prob + val / : (prob * int) -> prob + val not : prob -> prob (* not p == always - p *) + + val percent : int -> prob + + (* combine a conditional branch probability (trueProb) with a + * prediction heuristic (takenProb) using Dempster-Shafer theory. + *) + val combineProb2 : {trueProb : prob, takenProb : prob} -> {t : prob, f : prob} + + val toReal : prob -> real + val toString : prob -> string + + end + +structure Probability :> PROBABILITY = + struct + + open IntInf + + val zero = fromInt 0 + val one = fromInt 1 + val two = fromInt 2 + val hundred = fromInt 100 + fun eq (a, b) = (compare(a, b) = EQUAL) + + (* Probabilities are represented as positive rationals. Zero is + * represented as PROB(0w0, 0w0) and one is represented as + * PROB(0w1, 0w1). There are several invariants about PROB(n, d): + * 1) n <= d + * 2) if n = 0w0, then d = 0w0 (uniqueness of zero) + * 3) if d = 0w1, then n = 0w1 (uniqueness of one) + *) + datatype prob = PROB of (IntInf.int * IntInf.int) + + exception BadProb + + val never = PROB(zero, one) + val unlikely = PROB(one, fromInt 1000) + val likely = PROB(fromInt 999, fromInt 1000) + val always = PROB(one, one) + + fun gcd (m, n) = if eq(n, zero) then m else gcd(n, m mod n) + + fun normalize (n, d) = + if eq(n, zero) then never + else (case compare(n, d) + of LESS => let + val g = gcd(n, d) + in + if eq(g, one) + then PROB(n, d) + else PROB(n div g, d div g) + end + | EQUAL => always + | GREATER => raise BadProb + (* end case *)) + + fun prob (n, d) = + if Int.>(n, d) orelse Int.<(n, 0) orelse Int.<=(d, 0) + then raise Domain + else normalize(fromInt n, fromInt d) + + fun add (PROB(n1, d1), PROB(n2, d2)) = normalize(d2*n1 + d1*n2, d1*d2) + + fun sub (PROB(n1, d1), PROB(n2, d2)) = let + val n1' = d2*n1 + val n2' = d1*n2 + in + if (n1' < n2') then raise BadProb else normalize(n1'-n2', d1*d2) + end + + fun mul (PROB(n1, d1), PROB(n2, d2)) = normalize (n1*n2, d1*d2) + + fun divide (PROB(n, d), m) = if Int.<=(m, 0) + then raise BadProb + else if eq(n, zero) then never + else normalize(n, d * fromInt m) + + fun percent n = + if Int.<(n, 0) then raise BadProb + else normalize(fromInt n, hundred) + + fun fromFreq l = let + fun sum ([], tot) = tot + | sum (w::r, tot) = if Int.<(w, 0) + then raise BadProb + else sum(r, fromInt w + tot) + val tot = sum (l, zero) + in + List.map (fn w => normalize(fromInt w, tot)) l + end + + fun toReal (PROB(n, d)) = + if eq(n, zero) then 0.0 + else if eq(d, one) then 1.0 + else let + val sz = log2 d + val (n, d) = if Int.>=(sz, 30) + then let + val scale = pow(two, Int.-(sz, 30)) + val n = n div scale + in + (if n > zero then n else one, d div scale) + end + else (n, d) + fun toReal n = Real.fromLargeInt(toLarge n) + in + toReal n / toReal d + end + + fun toString (PROB(n, d)) = + if eq(n, zero) then "0" + else if eq(d, one) then "1" + else concat [IntInf.toString n, "/", IntInf.toString d] + + (* combine a conditional branch probability (trueProb) with a + * prediction heuristic (takenProb) using Dempster-Shafer theory. + * The basic equations (from Wu-Larus 1994) are: + * t = trueProb*takenProb / d + * f = ((1-trueProb)*(1-takenProb)) / d + * where + * d = trueProb*takenProb + ((1-trueProb)*(1-takenProb)) + *) + fun combineProb2 {trueProb=PROB(n1, d1), takenProb=PROB(n2, d2)} = let + (* compute sn/sd, where + * sd/sn = (trueProb*takenProb) + (1-trueProb)*(1-takenProb) + *) + val d12 = d1*d2 + val n12 = n1*n2 + val (sn, sd) = let + val n = d12 + two*n12 - (d2*n1) - (d1*n2) + in + (d12, n) + end + (* compute the true probability *) + val t as PROB(tn, td) = normalize(n12*sn, d12*sd) + (* compute the false probability *) + val f = PROB(td-tn, td) + in + {t = t, f = f} + end + + fun not (PROB(n, d)) = PROB(d-n, d) + + val op + = add + val op - = sub + val op * = mul + val op / = divide + + end + diff --git a/MLRISC/library/randlist.sml b/MLRISC/library/randlist.sml new file mode 100644 index 0000000..0f65eed --- /dev/null +++ b/MLRISC/library/randlist.sml @@ -0,0 +1,122 @@ +(* + * Random Access Lists (due to Chris Okasaki) + * + * -- Allen + *) + +signature RANDOM_ACCESS_LIST = +sig + + type 'a rand_list + + (* O(1) operations *) + val empty : 'a rand_list + val length : 'a rand_list -> int + val null : 'a rand_list -> bool + val cons : 'a * 'a rand_list -> 'a rand_list + val hd : 'a rand_list -> 'a + val tl : 'a rand_list -> 'a rand_list + + (* O(log n) operations *) + val sub : 'a rand_list * int -> 'a + val update : 'a rand_list * int * 'a -> 'a rand_list + + (* O(n) operations *) + val fromList : 'a list -> 'a rand_list + val toList : 'a rand_list -> 'a list + + (* O(n) operations *) + val map : ('a -> 'b) -> 'a rand_list -> 'b rand_list + val app : ('a -> unit) -> 'a rand_list -> unit + val foldl : ('a * 'b -> 'b) -> 'b -> 'a rand_list -> 'b + val foldr : ('a * 'b -> 'b) -> 'b -> 'a rand_list -> 'b +end + +structure RandomAccessList :> RANDOM_ACCESS_LIST = +struct + + datatype 'a tree = LEAF of 'a | NODE of 'a tree * 'a * 'a tree + + type 'a rand_list = (int * 'a tree) list + + fun tree_sub (LEAF x,0,_) = x + | tree_sub (LEAF _,_,_) = raise Subscript + | tree_sub (NODE(_,x,_),0,_) = x + | tree_sub (NODE(l,x,r),i,N) = + let val N' = N div 2 + in if i <= N' then tree_sub(l,i-1,N') + else tree_sub(r,i-1-N',N') + end + + fun tree_update (LEAF _,0,x,_) = LEAF x + | tree_update (LEAF _,_,_,_) = raise Subscript + | tree_update (NODE(l,_,r),0,x,_) = NODE(l,x,r) + | tree_update (NODE(l,y,r),i,x,N) = + let val N' = N div 2 + in if i <= N' then NODE(tree_update(l,i-1,x,N'),y,r) + else NODE(l,y,tree_update(r,i-1-N',x,N')) + end + + val empty = [] + + fun null [] = true | null _ = false + + fun length rl = + let fun f([],n) = n + | f((m,_)::l,n) = f(l,m+n) + in f(rl,0) + end + + fun cons (x, rl as ((m,t)::(n,u)::l)) = + if m = n then (m+n+1,NODE(t,x,u))::l + else (1,LEAF x)::rl + | cons (x, rl) = (1,LEAF x)::rl + + fun hd ((_,LEAF x)::_) = x + | hd ((_,NODE(_,x,_))::_) = x + | hd [] = raise Empty + + fun tl ((_,LEAF x)::rl) = rl + | tl ((n,NODE(l,x,r))::rl) = + let val n' = n div 2 + in (n',l)::(n',r)::rl + end + | tl [] = raise Empty + + fun sub([],_) = raise Subscript + | sub((n,t)::rl,i) = if i < n then tree_sub(t,i,n) + else sub(rl,i-n) + + fun update([],_,_) = raise Subscript + | update((p as (n,t))::rl,i,x) = + if i < n then (n,tree_update(t,i,x,n))::rl + else p::update(rl,i-n,x) + + fun map f rl = + let fun g (LEAF x) = LEAF(f x) + | g (NODE(l,x,r)) = NODE(g l,f x,g r) + in List.map (fn (n,t) => (n,g t)) rl + end + + fun app f rl = + let fun g (LEAF x) = f x + | g (NODE(l,x,r)) = (f x; g l; g r) + in List.app (fn (_,t) => g t) rl + end + + fun foldl f u rl = + let fun g (LEAF x,u) = f(x,u) + | g (NODE(l,x,r),u) = g(r,g(l,f(x,u))) + in List.foldl (fn ((_,t),x) => g(t,x)) u rl + end + + fun foldr f u rl = + let fun g (LEAF x,u) = f(x,u) + | g (NODE(l,x,r),u) = f(x,g(l,g(r,u))) + in List.foldr (fn ((_,t),x) => g(t,x)) u rl + end + + fun fromList l = List.foldr cons empty l + fun toList rl = foldr op:: [] rl +end + diff --git a/MLRISC/library/regset.sig b/MLRISC/library/regset.sig new file mode 100644 index 0000000..d7b98a3 --- /dev/null +++ b/MLRISC/library/regset.sig @@ -0,0 +1,35 @@ +(* + * Register set datatype + * + * -- Allen + *) + +signature REGISTER_SET = +sig + + type regset + type reg = int + + val empty : regset + val fromList : reg list -> regset + val sort : reg list -> reg list + val fromSortedList : reg list -> regset + val insert : regset * reg -> regset + val remove : regset * reg -> regset + val insertChanged : regset * reg -> regset * bool + val removeChanged : regset * reg -> regset * bool + val == : regset * regset -> bool + val app : (reg -> unit) -> regset -> unit + val contains : regset * reg -> bool + val exists : regset * reg list -> bool + val isEmpty : regset -> bool + val toList : regset -> reg list + val toString : regset -> string + val union : regset list -> regset + val intersects : regset list -> regset + val + : regset * regset -> regset + val - : regset * regset -> regset + val * : regset * regset -> regset + +end + diff --git a/MLRISC/library/regset.sml b/MLRISC/library/regset.sml new file mode 100644 index 0000000..7c95129 --- /dev/null +++ b/MLRISC/library/regset.sml @@ -0,0 +1,130 @@ +(* + * Register set datatype. Implemented as sorted lists. + * + * -- Allen + *) + +structure RegSet :> REGISTER_SET = +struct + + type reg = int + + type regset = reg list + + val empty = [] + + fun sort [] = [] + | sort (l as [_]) = l + | sort (l as [x,y]) = if Int.<(x,y) then l else + if x = y then [x] else [y,x] + | sort l = + let val (a,b) = split (l,[],[]) + in mergeUniq(sort a, sort b) + end + + and split ([],a,b) = (a,b) + | split (r::rs,a,b) = split(rs,r::b,a) + + and mergeUniq(l as u::us, l' as v::vs) = + if u = v then mergeUniq(l,vs) + else if Int.<(u,v) then u::mergeUniq(us,l') + else v::mergeUniq(l,vs) + | mergeUniq(l,[]) = l + | mergeUniq([],l) = l + + fun union [] = [] + | union (r::rs) = mergeUniq(r,union rs) + + fun difference ([],_) = [] + | difference (set,[]) = set + | difference (set as r::rs,set' as r'::rs') = + if r = r' then difference(rs,set') + else if r < r' then r::difference(rs,set') + else (* r > r' *) difference(set,rs') + + fun intersect (set,[]) = [] + | intersect ([],set) = [] + | intersect (set as r::rs,set' as r'::rs') = + if r = r' then r::intersect(rs,rs') + else if r < r' then intersect(rs,set') + else intersect(set,rs') + + fun intersects [] = [] + | intersects [a] = a + | intersects (a::b) = intersect(a,intersects b) + + fun ==([],[]) = true + | ==(r::rs,r'::rs') = (r : int) = r' andalso ==(rs,rs') + | ==(_,_) = false + + fun isEmpty [] = true + | isEmpty _ = false + + val app = List.app + + fun contains ([], r) = false + | contains (r'::rs,r) = r' = r orelse (r > r' andalso contains(rs,r)) + + fun exists (set, []) = false + | exists (set, r::rs) = contains(set,r) orelse exists(set,rs) + + fun insert([],r) = [r] + | insert(set as r'::rs,r) = + if r = r' then set + else if r' < r then r'::insert(rs,r) + else r::set + + fun insertChanged (set,r) = + let fun ins [] = ([r],true) + | ins (set as r'::rs) = + if r = r' then (set,false) + else if r > r' then + let val (rs,changed) = ins rs + in if changed then (r'::rs,true) + else (set,false) + end + else (r::set,true) + in ins set + end + + fun remove ([],r) = [] + | remove (set as r'::rs,r) = + if r' = r then rs + else if r' < r then r'::remove(rs,r) + else set + + fun removeChanged (set,r) = + let fun rmv [] = ([],false) + | rmv (set as r'::rs) = + if r = r' then (rs,true) + else if r > r' then + let val (rs,changed) = rmv rs + in if changed then (r'::rs,true) + else (set,false) + end + else (set,false) + in + rmv set + end + + fun fromList l = sort l + fun fromSortedList l = l + fun toList set = set + + fun toString set = + let fun collect([],l) = l + | collect(r::rs,l) = Int.toString r::collect'(rs,l) + and collect'(rs,l) = + let val l = collect(rs,l) + in case l of [_] => l + | l => ","::l + end + in String.concat("{"::collect(set,["}"])) + end + + val op + = mergeUniq + val op - = difference + val op * = intersect + +end + diff --git a/MLRISC/library/sortedlist.sml b/MLRISC/library/sortedlist.sml new file mode 100644 index 0000000..598c7dd --- /dev/null +++ b/MLRISC/library/sortedlist.sml @@ -0,0 +1,63 @@ +(* Copyright 1989 by AT&T Bell Laboratories *) +structure SortedList = +struct + +fun enter(new:int,l) = + let fun f [] = [new] + | f (l as h::t) = if newh then h::f t else l + in f l + end + +fun merge(a,[]) = a + | merge([],a) = a + | merge(l as (i:int)::a, m as j::b) = + if j [] +end + +fun uniq l = + let fun split([],l,r) = (l,r) + | split(h::t,l,r) = split(t,r,h::l) + fun sort [] = [] + | sort (l as [_]) = l + | sort (l as [x : int,y : int]) = + if x = y then [x] else if x < y then l else [y,x] + | sort l = let val (l,r) = split(l,[],[]) + in merge(sort l, sort r) end + in sort l + end + +fun remove(x as (xl:int)::xr, y as yl::yr) = + if xl>yl then yl::remove(x,yr) else remove(xr,if xl bool) -> 'a list -> 'a list + val sort_uniq : ('a * 'a -> bool) -> + ('a * 'a -> bool) -> 'a list -> 'a list + val merge : ('a * 'a -> bool) -> 'a list * 'a list -> 'a list + val merge_uniq : ('a * 'a -> bool) -> + ('a * 'a -> bool) -> 'a list * 'a list -> 'a list + val merge_uniqs : ('a * 'a -> bool) -> + ('a * 'a -> bool) -> 'a list list -> 'a list + val uniq : ('a * 'a -> bool) -> 'a list -> 'a list + +end + +structure Sorting : SORTING = +struct + + infix == + + fun gensort merge op< l = + let fun sort [] = [] + | sort (l as [x]) = l + | sort (l as [x,y]) = if x < y then l else [y,x] + | sort l = + let fun split([],a,b) = (a,b) + | split(x::xs,a,b) = split(xs,b,x::a) + val (a,b) = split(l,[],[]) + in merge (sort a, sort b) + end + in sort l + end + + fun merge op< (a,b) = + let fun m ([],a) = a + | m (a,[]) = a + | m (a as (u::v), b as (w::x)) = + if u < w then u::m(v,b) else w::m(a,x) + in m(a,b) + end + + fun merge_uniq op< op== (a,b) = + let fun m ([],a) = uniq op== a + | m (a,[]) = uniq op== a + | m (a as (u::v), b as (w::x)) = + if u == w then m(a,x) + else if u < w then u::m(v,b) + else w::m(a,x) + in m(a,b) + end + + and uniq op== l = + let fun f [] = [] + | f (l as [x]) = l + | f (x::(l as (y::z))) = if x == y then f l else x::f l + in f l + end + + + fun sort op< l = gensort (merge op<) op< l + + fun sort_uniq op< op== l = gensort (merge_uniq op< op==) op< l + + fun merge_uniqs op< op== l = sort_uniq op< op== (List.concat l) + +end diff --git a/MLRISC/library/sorting2.sml b/MLRISC/library/sorting2.sml new file mode 100644 index 0000000..8f86db1 --- /dev/null +++ b/MLRISC/library/sorting2.sml @@ -0,0 +1,70 @@ +(* + * Newer merge sort. Stolen from a NJPLS meeting. + * + * -- Allen + *) + +signature SORTING = +sig + + val sort : ('a * 'a -> bool) -> 'a list -> 'a list + val uniq : ('a * 'a -> bool) -> 'a list -> 'a list + val sort_uniq : ('a * 'a -> bool) -> + ('a * 'a -> bool) -> 'a list -> 'a list + +end + +structure Sorting : SORTING = +struct + + infix == + + val SOME maxInt = Int.maxInt + + fun sort op< = + let fun getRun [] = (0,[]) + | getRun (h::t) = + let fun loop(last,n,[]) = (n,[]) + | loop(last,n,l as h::t) = + if h < last then (n,l) else loop(h,n+1,t) + in loop(h,1,t) end + fun head([],_) = [] + | head(_,0) = [] + | head(h::t,n) = h::head(t,n-1) + fun merge(a,alen,b,blen) = + let fun loop([],_,b,blen) = head(b,blen) + | loop(_,0,b,blen) = head(b,blen) + | loop(a,alen,[],_) = head(a,alen) + | loop(a,alen,_,0) = head(a,alen) + | loop(a as ah::at,alen,b as bh::bt,blen) = + if ah < bh then ah::loop(at,alen-1,b,blen) + else bh::loop(a,alen,bt,blen-1) + in loop(a,alen,b,blen) end + fun iter(sorted,slen,[],want) = (sorted,slen,[]) + | iter(sorted,slen,unsorted,want) = + if slen >= want then (sorted,slen,unsorted) + else + let val (runlen,runtail) = getRun unsorted + val (sorted',slen',unsorted) = + if runlen >= slen then + (unsorted,runlen,runtail) + else + iter(unsorted,runlen,runtail,runlen) + in iter(merge(sorted,slen,sorted',slen'), + slen+slen',unsorted,want) + end + fun main list = + let val (sorted,_,_) = iter([],0,list,maxInt) + in sorted end + in main end + + fun uniq op== = + let fun f [] = [] + | f (l as [x]) = l + | f (x::(l as (y::z))) = if x == y then f l else x::f l + in f + end + + fun sort_uniq op< op== l = uniq op== (sort op< l) + +end diff --git a/MLRISC/library/sources.cm b/MLRISC/library/sources.cm new file mode 100644 index 0000000..016ee18 --- /dev/null +++ b/MLRISC/library/sources.cm @@ -0,0 +1,79 @@ +Library + signature ANNOTATIONS + structure Annotations + signature BITSET + structure BitSet + signature CATNETABLE_LIST + structure CatnetableList + structure DynamicArray + signature FREQ + structure Freq + structure HashArray + signature PRIORITY_QUEUE + structure PriorityQueue + signature STRING_STREAM + structure StringStream + structure Word64 +is +#if defined(NEW_CM) + $basis.cm + $smlnj-lib.cm +#else + smlnj-lib.cm +#endif + + annotations.sig + annotations.sml + bitset.sig + bitset.sml +(* + cache.sml + *) + catlist.sml + dynamic-array.sml +(* + dynamic-bitset.sml + fixed-point.sig + fixed-point.sml + *) + freq.sig + freq.sml + hash-array.sml +(* + hash-table.sig + hash-table.sml + hashMap.sig + hashMap.sml + hashBag.sig + hashBag.sml + hashMultimap.sig + hashMultimap.sml + hashSet.sig + hashSet.sml + int-set.sml + *) + priQueue.sig + priQueue.sml +(* + probability.sml + randlist.sml + regset.sig + regset.sml + sorting2.sml + *) + stringStream.sml +(* + susp.sml + transaction-glue.sml + transaction-log.sig + transaction-log.sml + transaction.sig + transaction.sml + tree-map.sml + undoable-array.sml + undoable-ref.sml + union-find.sml + uref.sml + *) + word64.sml + diff --git a/MLRISC/library/stringOutStream.sig b/MLRISC/library/stringOutStream.sig new file mode 100644 index 0000000..58a069e --- /dev/null +++ b/MLRISC/library/stringOutStream.sig @@ -0,0 +1,14 @@ +(* + * This module allows use to bind a streambuf to an outstream. + * We can use this to capture all the output to a stream as a single string. + *) +signature STRING_OUTSTREAM = +sig + type streambuf + + val mkStreamBuf : unit -> streambuf + val getString : streambuf -> string + val setString : streambuf * string -> unit + val openStringOut : streambuf -> TextIO.outstream + +end diff --git a/MLRISC/library/stringOutStream.sml b/MLRISC/library/stringOutStream.sml new file mode 100644 index 0000000..2c25277 --- /dev/null +++ b/MLRISC/library/stringOutStream.sml @@ -0,0 +1,49 @@ +(* + * The basis seems to be missing a string (out)stream type. + * This is it. + * + * -- Allen. + *) +structure StringOutStream :> STRING_OUTSTREAM = +struct + + structure TextIO = TextIO + structure TextPrimIO = TextPrimIO + + type streambuf = string list ref + + fun mkStreamBuf () = ref [] : streambuf + fun getString (ref s) = String.concat(List.rev s) + fun setString (r,s) = r := [s] + + fun openStringOut buffer = + let fun writeVec sl = + (buffer := CharVectorSlice.vector sl :: !buffer; + CharVectorSlice.length sl) + fun writeArr sl = + (buffer := CharArraySlice.vector sl :: !buffer; + CharArraySlice.length sl) + val writer = + TextPrimIO.WR + { name = "string stream", + chunkSize = 512, + writeVec = SOME writeVec, + writeArr = SOME writeArr, + writeVecNB = SOME (SOME o writeVec), + writeArrNB = SOME (SOME o writeArr), + block = NONE, + canOutput = NONE, + getPos = NONE, + setPos = NONE, + endPos = NONE, + verifyPos = NONE, + close = fn () => (), + ioDesc = NONE + } + val outstream = TextIO.mkOutstream + (TextIO.StreamIO.mkOutstream (writer,IO.NO_BUF)) + in outstream + end + +end + diff --git a/MLRISC/library/susp.sml b/MLRISC/library/susp.sml new file mode 100644 index 0000000..889dd35 --- /dev/null +++ b/MLRISC/library/susp.sml @@ -0,0 +1,25 @@ +(* + * Force/delay + * + * -- Allen + *) + +signature SUSPENSION = +sig + type 'a susp + val $$ : (unit -> 'a) -> 'a susp + val !! : 'a susp -> 'a +end + +structure Suspension :> SUSPENSION = +struct + datatype 'a thunk = VALUE of 'a | CLOSURE of unit -> 'a + type 'a susp = 'a thunk ref + + fun $$ e = ref(CLOSURE e) + fun !! (ref (VALUE v)) = v + | !! (r as ref(CLOSURE e)) = + let val v = e() + in r := VALUE v; v end +end + diff --git a/MLRISC/library/transaction-glue.sml b/MLRISC/library/transaction-glue.sml new file mode 100644 index 0000000..2772176 --- /dev/null +++ b/MLRISC/library/transaction-glue.sml @@ -0,0 +1,12 @@ +(* + * Basic kinds of undoable imperative data structures + * -- Allen + *) + +structure Transaction = TransactionFn(TransactionLog) + +structure UndoableRef = UndoableRefFn(TransactionLog) + +structure UndoableArray = UndoableArrayFn(structure Array = Array + structure Log = TransactionLog) + diff --git a/MLRISC/library/transaction-log.sig b/MLRISC/library/transaction-log.sig new file mode 100644 index 0000000..18bcb75 --- /dev/null +++ b/MLRISC/library/transaction-log.sig @@ -0,0 +1,21 @@ +(* + * This implements a transaction log. + * + * -- Allen + *) + +signature TRANSACTION_LOG = +sig + + exception TransactionLog + + type version = int + val version : version ref + val add_object : { rollback : version -> unit, + commit : version -> unit } -> unit + val begin : unit -> unit + val commit : unit -> unit + val abort : unit -> unit + val init : unit -> unit +end + diff --git a/MLRISC/library/transaction-log.sml b/MLRISC/library/transaction-log.sml new file mode 100644 index 0000000..5fa6c8a --- /dev/null +++ b/MLRISC/library/transaction-log.sml @@ -0,0 +1,50 @@ +(* + * This implements a transaction log. This is used + * for undoable data structures. + * + * -- Allen + *) + +structure TransactionLog : TRANSACTION_LOG = +struct + exception TransactionLog + + type version = int + val version = ref 0 + val log = ref [] : (version * { rollback : version -> unit, + commit : version -> unit + } list ref) list ref + fun add_object f = + case !log of + (ver,trail)::_ => trail := f :: !trail + | [] => raise TransactionLog + + fun init() = (version := 0; log := []) + + fun begin () = + let val new_ver = !version+1 + in version := new_ver; + log := (new_ver,ref []) :: !log + end + + fun abort () = + let val old_ver = !version - 1 + in case !log of + (_,ref trail)::rest => + (app (fn {rollback,...} => rollback old_ver) trail; + version := old_ver; + log := rest) + | [] => raise TransactionLog + end + + fun commit () = + let val old_ver = !version - 1 + in case !log of + (_,ref trail)::rest => + (app (fn {commit,...} => commit old_ver) trail; + version := old_ver; + log := rest) + | [] => raise TransactionLog + end +end + diff --git a/MLRISC/library/transaction.sig b/MLRISC/library/transaction.sig new file mode 100644 index 0000000..470eb2e --- /dev/null +++ b/MLRISC/library/transaction.sig @@ -0,0 +1,15 @@ +(* + * Start a transaction + * + * -- Allen + *) + +signature TRANSACTION = +sig + + exception Abort + + val transaction : 'a -> (unit -> 'a) -> 'a + +end + diff --git a/MLRISC/library/transaction.sml b/MLRISC/library/transaction.sml new file mode 100644 index 0000000..b7f1ccf --- /dev/null +++ b/MLRISC/library/transaction.sml @@ -0,0 +1,24 @@ +(* + * This starts a transaction + * + * -- Allen + *) + +functor Transaction(Log : TRANSACTION_LOG) : TRANSACTION = +struct + + exception Abort + + fun transaction default func = + let + val _ = Log.begin() + val x = func() + val _ = Log.commit() + in + x + end + handle Abort => (Log.abort(); default) + | e => (Log.abort(); raise e) + +end + diff --git a/MLRISC/library/tree-map.sml b/MLRISC/library/tree-map.sml new file mode 100644 index 0000000..a6c8169 --- /dev/null +++ b/MLRISC/library/tree-map.sml @@ -0,0 +1,86 @@ +(* + * This implements a functional map + * + * -- Allen + *) + +signature TREE_MAP = +sig + type key + type 'a map + exception NotFound + val empty : 'a map + val insert : 'a map * key * 'a -> 'a map + val remove : 'a map * key -> 'a map + val lookup : 'a map * key -> 'a + val lookup' : 'a map * key -> key * 'a + val toList : 'a map -> (key * 'a) list + val fromList : (key * 'a) list -> 'a map + val foldl : (key * 'a * 'b -> 'b) -> 'b -> 'a map -> 'b + val foldr : (key * 'a * 'b -> 'b) -> 'b -> 'a map -> 'b +end + +functor TreeMap + (type key + exception NotFound + val compare : key * key -> order + ) : TREE_MAP = +struct + type key = key + datatype 'a map = NODE of key * 'a * 'a map * 'a map + | EMPTY + + exception NotFound = NotFound + val empty = EMPTY + fun insert(EMPTY,k',v') = NODE(k',v',EMPTY,EMPTY) + | insert(NODE(k,v,l,r),k',v') = + case compare(k',k) of + EQUAL => NODE(k,v',l,r) + | LESS => NODE(k,v,insert(l,k',v'),r) + | GREATER => NODE(k,v,l,insert(r,k',v')) + fun lookup'(EMPTY,k) = raise NotFound + | lookup'(NODE(k,v,l,r),k') = + case compare(k',k) of + EQUAL => (k,v) + | LESS => lookup'(l,k') + | GREATER => lookup'(r,k') + fun lookup(t,k) = #2(lookup'(t,k)) + fun remove(EMPTY,k) = EMPTY + | remove(NODE(k,v,l,r),k') = + case compare(k',k) of + EQUAL => + (case (l,r) of + (EMPTY,r) => r + | (l,EMPTY) => l + | (_,_) => let fun remove_succ EMPTY = EMPTY + | remove_succ(NODE(_,_,EMPTY,r)) = r + | remove_succ(NODE(k,v,l,r)) = + NODE(k,v,remove_succ l,r) + in NODE(k,v,l,remove_succ r) + end + ) + | LESS => NODE(k,v,remove(l,k'),r) + | GREATER => NODE(k,v,l,remove(r,k')) + + fun foldl f x = + let fun g(EMPTY,x) = x + | g(NODE(k,v,l,r),x) = g(l,f(k,v,g(r,x))) + in fn t => g(t,x) end + + fun foldr f x = + let fun g(EMPTY,x) = x + | g(NODE(k,v,l,r),x) = g(r,f(k,v,g(l,x))) + in fn t => g(t,x) end + + fun toList m = + let fun collect(EMPTY,L) = L + | collect(NODE(k,v,l,r),L) = collect(l,collect(r,(k,v)::L)) + in collect(m,[]) end + + fun fromList l = + let fun f([],m) = m + | f((k,v)::l,m) = f(l,insert(m,k,v)) + in f(l,EMPTY) end + +end + diff --git a/MLRISC/library/undoable-array.sml b/MLRISC/library/undoable-array.sml new file mode 100644 index 0000000..428b678 --- /dev/null +++ b/MLRISC/library/undoable-array.sml @@ -0,0 +1,64 @@ +(* + * Create a version of arrays that keeps track of its versions. + * + * -- Allen + *) + +functor UndoableArray + (structure Array : ARRAY + structure Log : TRANSACTION_LOG) : ARRAY = +struct + + structure A = Array + + type 'a vector = 'a A.vector + type 'a array = 'a A.array * Log.version ref + + infix 9 sub + + val maxLen = A.maxLen + + fun array (n,d) = (A.array (n,d),ref(!Log.version)) + + fun get (a,_) = a + + fun commit (a,v) = fn ver => v := ver + fun rollback (a,v) = + let val N = A.length a + val a' = A.array(N,A.sub(a,0)) + in A.copy{src=a, si=0, len=NONE, dst = a', di = 0}; + fn ver => (A.copy{src=a',si=0,len=NONE,dst=a,di=0}; v := ver) + end + + fun get' (A as (a,v)) = + let val ver = !Log.version + in if !v <> ver then + (Log.add_object {commit = commit A, + rollback = rollback A}; + v := ver + ) + else (); + a + end + + fun length a = A.length(get a) + fun a sub i = A.sub(get a,i) + fun update (a, i, e) = A.update(get' a, i, e) + fun extract (a, i, j) = A.extract(get a, i, j) + fun copy {src, si, len, dst, di } = + A.copy{src=get src, si=si, len=len, dst=get' dst, di=di} + fun copyVec { src, si, len, dst, di } = + A.copyVec { src = src, si = si, len = len, dst = get' dst, di = di } + fun tabulate (n, f) = (A.tabulate(n,f),ref(!Log.version)) + fun fromList l = (A.fromList l,ref(!Log.version)) + fun app f a = A.app f (get a) + fun foldl f u a = A.foldl f u (get a) + fun foldr f u a = A.foldr f u (get a) + fun modify f a = A.modify f (get' a) + fun appi f (a,i,j) = A.appi f (get a, i, j) + fun foldli f u (a, i, j) = A.foldli f u (get a, i, j) + fun foldri f u (a, i, j) = A.foldri f u (get a, i, j) + fun modifyi f (a, i, j) = A.modifyi f (get' a, i, j) + +end + diff --git a/MLRISC/library/undoable-ref.sml b/MLRISC/library/undoable-ref.sml new file mode 100644 index 0000000..2b7d849 --- /dev/null +++ b/MLRISC/library/undoable-ref.sml @@ -0,0 +1,44 @@ +(* + * A reference that allows undo. + * + * -- Allen + *) + +signature UNDOABLE_REF = +sig + eqtype 'a uref + val uref : 'a -> 'a uref + val ! : 'a uref -> 'a + val := : 'a uref * 'a -> unit +end + +functor UndoableRef (Log : TRANSACTION_LOG) : UNDOABLE_REF = +struct + + type 'a uref = 'a ref * Log.version ref + + fun uref a = (ref a, ref(!Log.version)) + + fun !! (r,_) = !r + + fun commit (x,v) = fn ver => v := ver + + fun rollback (x,v) = + let val x' = !x + in fn ver => (x := x'; v := ver) + end + + fun ::= (r as (x,v),y) = + let val ver = !Log.version + in if !v <> ver then (Log.add_object{rollback = rollback r, + commit = commit r + }; + v := ver) + else (); + x := y + end + + val ! = !! + val op := = ::= +end + diff --git a/MLRISC/library/union-find.sml b/MLRISC/library/union-find.sml new file mode 100644 index 0000000..b4fc3b6 --- /dev/null +++ b/MLRISC/library/union-find.sml @@ -0,0 +1,37 @@ +(* + * Union-find + * + * -- Allen + *) + +signature UNION_FIND = +sig + + type 'a union_find + + val union_find : int * (int -> 'a) -> 'a union_find + val find : 'a union_find -> int -> 'a + val union' : 'a union_find -> int * int -> bool + val union : 'a union_find -> ('a * 'a -> 'a) -> int * int -> bool + val == : 'a union_find -> int * int -> bool +end + +structure Unionfind :> UNION_FIND = +struct + + structure A = Array + structure U = UnionFindRef + + type 'a union_find = 'a U.uref A.array + + fun union_find (n,f) = A.tabulate(n,(fn i => U.uref(f i))) + + fun find U x = U.!!(A.sub(U,x)) + + fun union' U (x,y) = U.union' (A.sub(U,x),A.sub(U,y)) + + fun union U f (x,y) = U.union f (A.sub(U,x),A.sub(U,y)) + + fun == U (x,y) = U.==(A.sub(U,x),A.sub(U,y)) + +end diff --git a/MLRISC/library/uref.sml b/MLRISC/library/uref.sml new file mode 100644 index 0000000..a42e1c6 --- /dev/null +++ b/MLRISC/library/uref.sml @@ -0,0 +1,64 @@ +(* + * References that can be merged + * + * -- Allen + *) + +signature UNION_FIND_REF = +sig + + type 'a uref + + val uref : 'a -> 'a uref + val !! : 'a uref -> 'a + val ::= : 'a uref * 'a -> unit + val == : 'a uref * 'a uref -> bool + val eq : 'a uref * 'a uref -> bool + val find : 'a uref -> 'a uref + val union : ('a * 'a -> 'a) -> 'a uref * 'a uref -> bool + val union' : 'a uref * 'a uref -> bool +end + +structure UnionFindRef :> UNION_FIND_REF = +struct + + datatype 'a uptree = ROOT of 'a * int + | LINK of 'a uref + withtype 'a uref = 'a uptree ref + + fun uref x = ref(ROOT(x,1)) + fun eq (x : 'a uref,y : 'a uref) = x = y + fun find r = + let fun look (r as ref(ROOT _)) = r + | look (r' as ref(LINK r)) = + let val r'' = look r + in if r <> r'' then r' := LINK r'' else (); + r'' + end + in look r end + + fun == (x,y) = find x = find y + + fun !! r = let val ROOT(x,_) = !(find r) in x end + fun ::=(r,x) = let val r as ref(ROOT(_,w)) = find r + in r := ROOT(x,w) end + fun union f (x,y) = + let val r as ref(x as ROOT(i,w)) = find x + val r' as ref(y as ROOT(j,w')) = find y + in if r = r' then false + else if w > w' then + (r := ROOT(f(i,j),w+w'); r' := LINK r; true) + else + (r' := ROOT(f(i,j),w+w'); r := LINK r'; true) + end + fun union' (x,y) = + let val r as ref(x as ROOT(i,w)) = find x + val r' as ref(y as ROOT(j,w')) = find y + in if r = r' then false + else if w > w' then + (r := ROOT(i,w+w'); r' := LINK r; true) + else + (r' := ROOT(j,w+w'); r := LINK r'; true) + end +end + diff --git a/MLRISC/library/word64.sml b/MLRISC/library/word64.sml new file mode 100644 index 0000000..55aefff --- /dev/null +++ b/MLRISC/library/word64.sml @@ -0,0 +1,189 @@ +(* + * 64-bit word datatype. + * Word64.word is implemented as Word32.word * Word32.word + * A constant of this type can be specified as a pair of 32-bit words. + * Also pattern matching can also be applied in the same manner. + * + * -- Allen + *) + +structure Word64 : WORD = +struct + structure W = Word32 + + type word = W.word * W.word (* high, low *) + + val wordSize = 64 + + fun isNeg w = W.>>(w,0w31) = 0w1 (* test the sign bit *) + + fun toLargeWord(x,y) = y (* strip high order bits *) + fun toLargeWordX(x,y) = y (* strip high order bits *) + fun fromLargeWord w = (0w0 : W.word,w) + + fun toLargeInt(x:W.word,y) = + if x <> 0w0 orelse isNeg y then raise Overflow + else W.toLargeInt y + + fun toLargeIntX(x,y) = + if x = 0w0 then + if isNeg y then raise Overflow else W.toLargeInt y + else if (W.notb x) = 0w0 then + if isNeg y then W.toLargeIntX y else raise Overflow + else raise Overflow + + fun fromLargeInt i = (if i >= 0 then 0w0 else W.notb 0w0,W.fromLargeInt i) + + fun toInt(x:W.word,y) = + if x <> 0w0 orelse isNeg y then raise Overflow else W.toInt y + + fun toIntX(x,y) = + if x = 0w0 then + if isNeg y then raise Overflow else W.toInt y + else if (W.notb x) = 0w0 then + if isNeg y then W.toIntX y else raise Overflow + else raise Overflow + + fun fromInt i = if i >= 0 then (0w0:W.word,W.fromInt i) + else (W.notb 0w0,W.fromInt i) + + fun orb((a,b),(c,d)) = (W.orb(a,c),W.orb(b,d)) + + fun xorb((a,b),(c,d)) = (W.xorb(a,c),W.xorb(b,d)) + + fun andb((a,b),(c,d)) = (W.andb(a,c),W.andb(b,d)) + + fun notb(a,b) = (W.notb a,W.notb b) + + fun plus((a,b),(c,d)) = + let val y = W.+(b,d) + val x = W.+(a,c) + val x = if y < b then W.+(x,0w1) else x (* carry *) + in (x,y) end + + fun minus((a,b),(c,d)) = + let val x = W.-(a,c) + val y = W.-(b,d) + val x = if b < d then W.-(x,0w1) else x (* borrow *) + in (x,y) end + + fun mult((a,b),(c,d)) = + let (* multiply 32x32 -> 64. + * Split them into two pairs of 16 bit words in order to deal + * with carries in a portable manner. This is really annoying. + *) + fun multiply(u,v) = + let val a = W.>>(u,0w16) + val b = W.andb(u,0wxffff) + val c = W.>>(v,0w16) + val d = W.andb(v,0wxffff) + val ac = a*c + val bc = b*c + val ad = a*d + val bd = b*d + val bc_hi = W.>>(bc,0w16) + val bc_lo = W.<<(bc,0w16) + val ad_hi = W.>>(ad,0w16) + val ad_lo = W.<<(ad,0w16) + val AC = (ac,0w0:W.word) + val BC = (bc_hi,bc_lo) + val AD = (ad_hi,ad_lo) + val BD = (0w0:W.word,bd) + in plus(AC,plus(BC,plus(AD,BD))) end + fun shift32(a,b) = (b,0w0) + val ad = multiply(a,d) + val bc = multiply(b,c) + val bd = multiply(b,d) + in plus(plus(shift32(ad),shift32(bc)),bd) end + + fun gt((a,b):word,(c,d):word) = a > c orelse a=c andalso b > d + fun ge((a,b):word,(c,d):word) = a > c orelse a=c andalso b >= d + fun lt((a,b):word,(c,d):word) = a < b orelse a=c andalso b < d + fun le((a,b):word,(c,d):word) = a < b orelse a=c andalso b <= d + + fun compare ((a,b):word, (c,d):word) = + if a < c then LESS + else if a > c then GREATER + else if b < d then LESS + else if b > d then GREATER + else EQUAL + + fun sll((a,b),c) = + if c >= 0w32 then + let val x = W.<<(b,c-0w32) + in (x,0w0) end + else let val x = W.<<(a,c) + val y = W.<<(b,c) + val z = W.>>(b,0w32-c) + in (W.orb(x,z),y) end + + fun srl((a,b),c) = + if c >= 0w32 then + let val y = W.>>(a,c-0w32) + in (0w0,y) end + else let val x = W.>>(a,c) + val y = W.>>(b,c) + val z = W.<<(W.andb(a,W.<<(0w1,c)-0w1),0w32-c) + in (x,W.orb(y,z)) end + + fun sra((a,b),c) = + if c >= 0w32 then + let val y = W.~>>(a,c-0w32) + val x = if isNeg a then W.notb 0w0 else 0w0 + in (x,y) end + else let val x = W.~>>(a,c) + val y = W.>>(b,c) + val z = W.<<(W.andb(a,W.<<(0w1,c)-0w1),0w32-c) + in (x,W.orb(y,z)) end + + fun min (w1, w2) = if lt(w1,w2) then w1 else w2 + + fun max (w1, w2) = if gt(w1,w2) then w1 else w2 + + fun divide((a,b):word,(0w0,0w0):word) = raise Div + | divide((0w0,b),(0w0,d)) = (0w0:W.word,b div d) + | divide((a,b),(c,d)) = raise Match + (* okay, not yet supported, I'm lazy *) + + fun padZero(b,0) = b + | padZero(b,n) = padZero("0"^b,n-1) + + fun hex(0w0,y) = W.toString y + | hex(x,y) = + let val a = W.toString x + val b = W.toString y + in a^padZero(b,8-size b) end + + fun bin(0w0,y) = W.fmt StringCvt.BIN y + | bin(x,y) = + let val a = W.fmt StringCvt.BIN x + val b = W.fmt StringCvt.BIN y + in a^padZero(b,32-size b) end + + fun fmt StringCvt.BIN = bin + | fmt StringCvt.DEC = raise Match + | fmt StringCvt.HEX = hex + | fmt StringCvt.OCT = raise Match + + val toString = hex + + val scan = fn _ => raise Match + fun fromString s = + case W.fromString s of + SOME w => SOME(0w0:W.word,w) + | NONE => NONE + + val op < = lt + val op <= = le + val op > = gt + val op >= = ge + val op * = mult + val op + = plus + val op - = minus + val op << = sll + val op >> = srl + val op ~>> = sra + val op div = divide + fun op mod(a:word,b:word):word = a-(a div b)*b + +end diff --git a/MLRISC/make.sml b/MLRISC/make.sml new file mode 100644 index 0000000..d159de8 --- /dev/null +++ b/MLRISC/make.sml @@ -0,0 +1,24 @@ +(* + * Regenerates all the machine description generated files. + * This works for only 110.39+ + *) +(* val () = #set(CM.symval "UNSHARED_MLRISC") (SOME 1); *) + +(* From 110.57 on, we need the following new magic *) + +fun set f = #set(CM.Anchor.anchor f) (SOME "cm"); +val _ = app set ["Control.cm", "Lib.cm", "Graphs.cm", "MLRISC.cm", + "MLTREE.cm"]; + +fun b() = CM.make "Tools/MDL/sources.cm"; +val _ = b(); +fun c f = MDLGen.gen(f^"/"^f^".mdl"); +val _ = app c +[ "x86" +, "amd64" +, "sparc" +, "alpha" +, "hppa" +, "ppc" +(* , "mips" *) +]; diff --git a/MLRISC/makeall-110.0.6.sml b/MLRISC/makeall-110.0.6.sml new file mode 100644 index 0000000..14f0587 --- /dev/null +++ b/MLRISC/makeall-110.0.6.sml @@ -0,0 +1,7 @@ +(* + * Recompile everything in this directory + *) +val current = ref ""; +fun make f = (print("[Compiling "^f^"]\n"); current := f; CM.make'("cm/"^f)); +fun again _ = make(!current); +val _ = app make files; diff --git a/MLRISC/makeall-110.25.sml b/MLRISC/makeall-110.25.sml new file mode 100644 index 0000000..e0d819e --- /dev/null +++ b/MLRISC/makeall-110.25.sml @@ -0,0 +1,14 @@ +(* + * Recompile everything in this directory + *) +CM.autoload "full-cm.cm"; +val current = ref ""; +fun make f = (print("[Compiling "^f^"]\n"); current := f; CM.recomp("cm/"^f)); +fun again _ = make(!current); +val _ = app CM.Anchor.cancel files; + +fun makeall [] = true + | makeall(f::fs) = make f andalso makeall fs +; + +val _ = makeall files; diff --git a/MLRISC/makeall-new.sml b/MLRISC/makeall-new.sml new file mode 100644 index 0000000..c4ffb8f --- /dev/null +++ b/MLRISC/makeall-new.sml @@ -0,0 +1,27 @@ +(* + * Recompile everything in this directory + *) +(* CM.autoload "$/full-cm.cm"; *) + +(* Register the nowhere tool *) +CM.make "$smlnj/cm/tools.cm"; +val _ = Tools.registerStdShellCmdTool + { tool = "Nowhere", + class = "nowhere", + suffixes = ["peep"], + cmdStdPath = "nowhere", + template = NONE, + extensionStyle = + Tools.REPLACE (["nowhere"], [("sml", SOME "sml", fn too => too)]), + dflopts = [] }; + +val current = ref ""; +fun make f = (print("[Compiling "^f^"]\n"); current := f; CM.recomp("cm/"^f)); +fun again _ = make(!current); +fun makeall [] = true + | makeall(f::fs) = make f andalso makeall fs +; + +fun set f = #set(CM.Anchor.anchor f) (SOME "cm"); +val _ = app set files; +val _ = makeall files; diff --git a/MLRISC/makeall.sml b/MLRISC/makeall.sml new file mode 100644 index 0000000..93610ac --- /dev/null +++ b/MLRISC/makeall.sml @@ -0,0 +1,71 @@ +(* + * Recompile everything in this directory + *) +use "autoload.sml" handle _ => (); + +val files = +[ + "Control.cm", + "Lib.cm", + + "MLRISC.cm", + "SPARC.cm", + "ALPHA.cm", + "HPPA.cm", + "IA32.cm", + "PPC.cm", +(* "MIPS.cm", *) + + "Peephole.cm", + "ALPHA-Peephole.cm", + "SPARC-Peephole.cm", + "IA32-Peephole.cm", + + "Graphs.cm", + "Visual.cm", + "ir.cm", + "MLTREE.cm", + "RA.cm", + "GC.cm", + "IR.cm", + "RTL.cm", + "Region.cm", + + "ALPHA-RTL.cm", + "SPARC-RTL.cm", + "HPPA-RTL.cm", + "IA32-RTL.cm", + + "SSA.cm", + + "Opt.cm", + + "ALPHA-SSA.cm", + "SPARC-SSA.cm", + "HPPA-SSA.cm", + "IA32-SSA.cm" + +(* "VLIW.cm", *) +(* "Sched.cm", *) + +(* + "ALPHA-Sched.cm", + "SPARC-Sched.cm", + "HPPA-Sched.cm", + "PPC-Sched.cm", + "IA32-Sched.cm" +*) + (*"ALPHA-GC.cm", + "SPARC-GC.cm", + "HPPA-GC.cm", + "IA32-GC.cm", + "PPC-GC.cm",*) +]; + +val _ = (* Try to guess the version *) +use (case #version_id(Compiler.version) of + [110,0,_] => "makeall-110.0.6.sml" + | 110::ver::_ => if Int.>=(ver,30) then "makeall-new.sml" + else "makeall-110.25.sml" + ) +; diff --git a/MLRISC/mdl.sh b/MLRISC/mdl.sh new file mode 100755 index 0000000..048a087 --- /dev/null +++ b/MLRISC/mdl.sh @@ -0,0 +1,30 @@ +#!/bin/sh +# +# COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) +# All rights reserved. +# +# A script for running the MDL tool on a specification file. +# +# usage: mdl.sh +# + +if [ $# -ne 1 ] ; then + echo "usage: mdl.sh " + exit 1 +fi +TARGET=$1 + +src="$TARGET/$TARGET.mdl" + +if [ ! -r "$src" ] ; then + echo "mdl.sh: unknown target" + exit 1 +fi + +sml < #set(CM.Anchor.anchor f) (SOME "cm")) [ + "Control.cm", "Lib.cm", "Graphs.cm", "MLRISC.cm", "MLTREE.cm" + ]; +CM.make "Tools/MDL/sources.cm"; +MDLGen.gen "$src"; +XXXX diff --git a/MLRISC/mips/backpatch/mipsDelaySlotProps.sml b/MLRISC/mips/backpatch/mipsDelaySlotProps.sml new file mode 100644 index 0000000..7b24f52 --- /dev/null +++ b/MLRISC/mips/backpatch/mipsDelaySlotProps.sml @@ -0,0 +1,64 @@ +functor MIPSDelaySlots + (structure I : MIPSINSTR + structure P : INSN_PROPERTIES where I = I + ) : DELAY_SLOT_PROPERTIES = +struct + structure I = I + structure C = I.C + structure SL = C.SortedCells + + fun error msg = MLRiscErrorMsg.error("MIPSDelaySlotProps",msg) + + datatype delay_slot = D_NONE | D_ERROR | D_ALWAYS | D_TAKEN | D_FALLTHRU + + val delaySlotSize = 4 + + fun delaySlot{instr, backward} = + case instr of + I.J{nop, ...} => {n=false,nOn=D_ALWAYS,nOff=D_ERROR,nop=nop} + | I.JR{nop, ...} => {n=false,nOn=D_ALWAYS,nOff=D_ERROR,nop=nop} + | I.JAL{nop, ...} => {n=false,nOn=D_ALWAYS,nOff=D_ERROR,nop=nop} + | I.JALR{nop, ...} => {n=false,nOn=D_ALWAYS,nOff=D_ERROR,nop=nop} + | I.BRANCH{nop, ...} => {n=false,nOn=D_ALWAYS,nOff=D_ERROR,nop=nop} + | I.FBRANCH{nop, ...} => {n=false,nOn=D_ALWAYS,nOff=D_ERROR,nop=nop} + | I.ANNOTATION{i,...} => delaySlot{instr=i,backward=backward} + | _ => {n=false,nOn=D_ERROR,nOff=D_NONE,nop=false} + + fun enableDelaySlot{instr, n=true, nop} = + error "enableDelaySlot: can't nullify" + | enableDelaySlot{instr, n, nop} = + case instr of + I.J{lab, ...} => I.J{lab=lab,nop=nop} + | I.JR{rs, labels, ...} => I.JR{rs=rs,labels=labels,nop=nop} + | I.JAL{lab, defs, uses, cutsTo, mem, ...} => + I.JAL{lab=lab, defs=defs, uses=uses, cutsTo=cutsTo, mem=mem, nop=nop} + | I.JALR{rs, rt, defs, uses, cutsTo, mem, ...} => + I.JALR{rs=rs, rt=rt, + defs=defs, uses=uses, cutsTo=cutsTo, mem=mem, nop=nop} + | I.RET _ => I.RET{nop=nop} + | I.BRANCH{likely, cond, rs, rt, lab, ...} => + I.BRANCH{likely=likely, cond=cond, rs=rs, rt=rt, lab=lab, nop=nop} + | I.FBRANCH{likely, fbranch, cc, lab, ...} => + I.FBRANCH{likely=likely, fbranch=fbranch, cc=cc, lab=lab, nop=nop} + | I.ANNOTATION{i,a} => + I.ANNOTATION{i=enableDelaySlot{instr=i,n=n,nop=nop},a=a} + | _ => error "enableDelaySlot" + + val defUseI = P.defUse C.GP + val defUseF = P.defUse C.FP + val zeroR = Option.valOf(C.zeroReg C.GP) + fun conflict{src=i,dst=j} = error "conflict" + + fun delaySlotCandidate + {jmp, delaySlot=(I.J _ | I.JR _ | I.JAL _ | I.JALR _ + | I.RET _ | I.BRANCH _ | I.FBRANCH _)} = false + | delaySlotCandidate{jmp=I.ANNOTATION{i,...},delaySlot} = + delaySlotCandidate{jmp=i,delaySlot=delaySlot} + | delaySlotCandidate{jmp,delaySlot=I.ANNOTATION{i,...}} = + delaySlotCandidate{jmp=jmp,delaySlot=i} + | delaySlotCandidate _ = true + + fun setTarget(I.ANNOTATION{i,a},lab) = I.ANNOTATION{i=setTarget(i,lab),a=a} + | setTarget _ = error "setTarget" + +end diff --git a/MLRISC/mips/backpatch/mipsDelaySlots.sml b/MLRISC/mips/backpatch/mipsDelaySlots.sml new file mode 100644 index 0000000..a375439 --- /dev/null +++ b/MLRISC/mips/backpatch/mipsDelaySlots.sml @@ -0,0 +1,38 @@ +(* + * This file was automatically generated by MDGen (v3.0) + * from the machine description file "mips/mips.md". + *) + + +functor MIPSDelaySlots(structure I : MIPSINSTR + structure P : INSN_PROPERTIES + where I = I + ) : DELAY_SLOT_PROPERTIES = +struct + structure I = I + datatype delay_slot = D_NONE | D_ERROR | D_ALWAYS | D_TAKEN | D_FALLTHRU + + fun error msg = MLRiscErrorMsg.error("MIPSDelaySlots",msg) + fun delaySlot {instr, backward} = let + fun delaySlot instr = + ( + case instr of + _ => {nop=true, n=false, nOn=D_ERROR, nOff=D_NONE} + ) + in delaySlot instr + end + + fun enableDelaySlot _ = error "enableDelaySlot" + fun conflict _ = error "conflict" + fun delaySlotCandidate {jmp, delaySlot} = let + fun delaySlotCandidate delaySlot = + ( + case delaySlot of + _ => true + ) + in delaySlotCandidate delaySlot + end + + fun setTarget _ = error "setTarget" +end + diff --git a/MLRISC/mips/backpatch/mipsJumps.sml b/MLRISC/mips/backpatch/mipsJumps.sml new file mode 100644 index 0000000..fe33a8b --- /dev/null +++ b/MLRISC/mips/backpatch/mipsJumps.sml @@ -0,0 +1,64 @@ +(* mipsJumps.sml --- information to resolve jumps. + * + *) +functor MIPSJumps + (structure Instr : MIPSINSTR + structure Shuffle : MIPSSHUFFLE + sharing Shuffle.I = Instr) : SDI_JUMPS = +struct + structure I = Instr + structure C = I.C + structure Const = I.Constant + structure LE = I.LabelExp + + fun error msg = MLRiscErrorMsg.error("MIPSJumps",msg) + + val branchDelayedArch = true + + fun isSdi instr = + let fun opnd (I.Lab _) = true + | opnd _ = false + in case instr of + I.LUI{imm, ...} => opnd imm + | I.ARITH{i, ...} => opnd i + | I.LOAD{d, ...} => opnd d + | I.STORE{d, ...} => opnd d + | I.FLOAD{d, ...} => opnd d + | I.FSTORE{d, ...} => opnd d + | I.J _ => true + | I.JR _ => true + | I.JAL _ => true + | I.JALR _ => true + | I.RET _ => true + | I.BRANCH _ => true + | I.FBRANCH _ => true + | I.ANNOTATION{i,...} => isSdi i + | _ => false + end + + fun minSize(I.COPY _) = 0 + | minSize(I.FCOPY _) = 0 + | minSize(I.ANNOTATION{i,...}) = minSize i + | minSize _ = 4 + + (* max Size is not used for the mips span dependency analysis. *) + fun maxSize _ = error "maxSize" + + fun sdiSize(instr, labMap, loc) = + let fun branchOffset lab = (labMap lab - loc - 8) div 4 + fun delay nop = if nop then 8 else 4 + fun branch(nop, label) = delay nop (* XXX *) + in case instr of + I.J{nop, lab, ...} => branch(nop,lab) + | I.JAL{nop, lab, ...} => branch(nop,lab) + | I.BRANCH{nop, lab, ...} => branch(nop,lab) + | I.FBRANCH{nop, lab, ...} => branch(nop,lab) + | I.JR{nop, ...} => delay nop + | I.JALR{nop, ...} => delay nop + | I.ANNOTATION{i, ...} => sdiSize(i, labMap, loc) + | _ => error "sdiSize" + end + + fun expand(instr, size, pos) = error "expand" + +end diff --git a/MLRISC/mips/emit/mipsAsm.sml b/MLRISC/mips/emit/mipsAsm.sml new file mode 100644 index 0000000..d0bf83e --- /dev/null +++ b/MLRISC/mips/emit/mipsAsm.sml @@ -0,0 +1,589 @@ +(* + * WARNING: This file was automatically generated by MDLGen (v3.0) + * from the machine description file "mips/mips.mdl". + * DO NOT EDIT this file directly + *) + + +functor MIPSAsmEmitter(structure S : INSTRUCTION_STREAM + structure Instr : MIPSINSTR + where T = S.P.T + structure Shuffle : MIPSSHUFFLE + where I = Instr + structure MLTreeEval : MLTREE_EVAL + where T = Instr.T + ) : INSTRUCTION_EMITTER = +struct + structure I = Instr + structure C = I.C + structure T = I.T + structure S = S + structure P = S.P + structure Constant = I.Constant + + open AsmFlags + + fun error msg = MLRiscErrorMsg.error("MIPSAsmEmitter",msg) + + fun makeStream formatAnnotations = + let val stream = !AsmStream.asmOutStream + fun emit' s = TextIO.output(stream,s) + val newline = ref true + val tabs = ref 0 + fun tabbing 0 = () + | tabbing n = (emit' "\t"; tabbing(n-1)) + fun emit s = (tabbing(!tabs); tabs := 0; newline := false; emit' s) + fun nl() = (tabs := 0; if !newline then () else (newline := true; emit' "\n")) + fun comma() = emit "," + fun tab() = tabs := 1 + fun indent() = tabs := 2 + fun ms n = let val s = Int.toString n + in if n<0 then "-"^String.substring(s,1,size s-1) + else s + end + fun emit_label lab = emit(P.Client.AsmPseudoOps.lexpToString(T.LABEL lab)) + fun emit_labexp le = emit(P.Client.AsmPseudoOps.lexpToString (T.LABEXP le)) + fun emit_const c = emit(Constant.toString c) + fun emit_int i = emit(ms i) + fun paren f = (emit "("; f(); emit ")") + fun defineLabel lab = emit(P.Client.AsmPseudoOps.defineLabel lab^"\n") + fun entryLabel lab = defineLabel lab + fun comment msg = (tab(); emit("/* " ^ msg ^ " */"); nl()) + fun annotation a = comment(Annotations.toString a) + fun getAnnotations() = error "getAnnotations" + fun doNothing _ = () + fun fail _ = raise Fail "AsmEmitter" + fun emit_region mem = comment(I.Region.toString mem) + val emit_region = + if !show_region then emit_region else doNothing + fun pseudoOp pOp = (emit(P.toString pOp); emit "\n") + fun init size = (comment("Code Size = " ^ ms size); nl()) + val emitCellInfo = AsmFormatUtil.reginfo + (emit,formatAnnotations) + fun emitCell r = (emit(CellsBasis.toString r); emitCellInfo r) + fun emit_cellset(title,cellset) = + (nl(); comment(title^CellsBasis.CellSet.toString cellset)) + val emit_cellset = + if !show_cellset then emit_cellset else doNothing + fun emit_defs cellset = emit_cellset("defs: ",cellset) + fun emit_uses cellset = emit_cellset("uses: ",cellset) + val emit_cutsTo = + if !show_cutsTo then AsmFormatUtil.emit_cutsTo emit + else doNothing + fun emitter instr = + let + fun asm_load (I.LD) = "ld" + | asm_load (I.LW) = "lw" + | asm_load (I.LH) = "lh" + | asm_load (I.LHU) = "lhu" + | asm_load (I.LB) = "lb" + | asm_load (I.LBU) = "lbu" + | asm_load (I.LWL) = "lwl" + | asm_load (I.LWR) = "lwr" + | asm_load (I.LWU) = "lwu" + | asm_load (I.LDL) = "ldl" + | asm_load (I.LDR) = "ldr" + | asm_load (I.ULH) = "ulh" + | asm_load (I.ULHU) = "ulhu" + | asm_load (I.ULW) = "ulw" + | asm_load (I.ULD) = "uld" + and emit_load x = emit (asm_load x) + and asm_store (I.SD) = "sd" + | asm_store (I.SW) = "sw" + | asm_store (I.SH) = "sh" + | asm_store (I.SB) = "sb" + | asm_store (I.SWL) = "swl" + | asm_store (I.SWR) = "swr" + | asm_store (I.SDL) = "sdl" + | asm_store (I.SDR) = "sdr" + | asm_store (I.USH) = "ush" + | asm_store (I.USW) = "usw" + | asm_store (I.USD) = "usd" + and emit_store x = emit (asm_store x) + and asm_fload (I.LDC1) = "ldc1" + | asm_fload (I.LWC1) = "lwc1" + and emit_fload x = emit (asm_fload x) + and asm_fstore (I.SDC1) = "sdc1" + | asm_fstore (I.SWC1) = "swc1" + and emit_fstore x = emit (asm_fstore x) + and asm_fcond (I.FF) = "f" + | asm_fcond (I.FUN) = "un" + | asm_fcond (I.FEQ) = "eq" + | asm_fcond (I.FUEQ) = "fueq" + | asm_fcond (I.FOLT) = "folt" + | asm_fcond (I.FULT) = "ult" + | asm_fcond (I.FOLE) = "ole" + | asm_fcond (I.FULE) = "ule" + | asm_fcond (I.FNGLE) = "ngle" + | asm_fcond (I.FSP) = "sf" + | asm_fcond (I.FNGL) = "ngl" + | asm_fcond (I.FSEQ) = "seq" + | asm_fcond (I.FLT) = "flt" + | asm_fcond (I.FNGE) = "fnge" + | asm_fcond (I.FLE) = "le" + | asm_fcond (I.FNGT) = "ngt" + and emit_fcond x = emit (asm_fcond x) + and asm_cond (I.EQ) = "eq" + | asm_cond (I.NE) = "ne" + | asm_cond (I.LEZ) = "lez" + | asm_cond (I.GTZ) = "gtz" + | asm_cond (I.LTZ) = "ltz" + | asm_cond (I.GEZ) = "gez" + and emit_cond x = emit (asm_cond x) + and asm_fbranch (I.BC1T) = "bc1t" + | asm_fbranch (I.BC1F) = "bc1f" + and emit_fbranch x = emit (asm_fbranch x) + and asm_likely (I.LIKELY) = "l" + | asm_likely (I.UNLIKELY) = "" + and emit_likely x = emit (asm_likely x) + and asm_arith (I.ADD) = "add" + | asm_arith (I.ADDU) = "addu" + | asm_arith (I.AND) = "and" + | asm_arith (I.XOR) = "xor" + | asm_arith (I.MUL) = "mul" + | asm_arith (I.MULO) = "mulo" + | asm_arith (I.MULOU) = "mulou" + | asm_arith (I.NOR) = "nor" + | asm_arith (I.OR) = "or" + | asm_arith (I.SEQ) = "seq" + | asm_arith (I.SGT) = "sgt" + | asm_arith (I.SGE) = "sge" + | asm_arith (I.SGEU) = "sgeu" + | asm_arith (I.SGTU) = "sgtu" + | asm_arith (I.SLT) = "slt" + | asm_arith (I.SLE) = "sle" + | asm_arith (I.SLEU) = "sleu" + | asm_arith (I.SLTU) = "sltu" + | asm_arith (I.SNE) = "sne" + | asm_arith (I.SUB) = "sub" + | asm_arith (I.SUBU) = "subu" + | asm_arith (I.REM) = "rem" + | asm_arith (I.REMU) = "remu" + | asm_arith (I.SRA) = "sra" + | asm_arith (I.SLL) = "sll" + | asm_arith (I.SRL) = "srl" + | asm_arith (I.ROR) = "ror" + | asm_arith (I.ROL) = "rol" + | asm_arith (I.MOVN) = "movn" + | asm_arith (I.MOVZ) = "movz" + | asm_arith (I.DADD) = "dadd" + | asm_arith (I.DADDU) = "daddu" + | asm_arith (I.DMUL) = "dmul" + | asm_arith (I.DMULO) = "dmulo" + | asm_arith (I.DMULOU) = "dmulou" + | asm_arith (I.DSUB) = "dsub" + | asm_arith (I.DSUBU) = "dsubu" + | asm_arith (I.DREM) = "drem" + | asm_arith (I.DREMU) = "dremu" + | asm_arith (I.DROL) = "drol" + | asm_arith (I.DROR) = "dror" + | asm_arith (I.DSLL) = "dsll" + | asm_arith (I.DSLL32) = "dsll32" + | asm_arith (I.DSLLV) = "dsllv" + | asm_arith (I.DSRA) = "dsra" + | asm_arith (I.DSRA32) = "dsra32" + | asm_arith (I.DSRAV) = "dsrav" + | asm_arith (I.DSRL) = "dsrl" + | asm_arith (I.DSRL32) = "dsrl32" + | asm_arith (I.DSRLV) = "dsrlv" + and emit_arith x = emit (asm_arith x) + and asm_unary (I.ABS) = "abs" + | asm_unary (I.NEG) = "neg" + | asm_unary (I.NEGU) = "negu" + | asm_unary (I.NOT) = "not" + | asm_unary (I.DABS) = "dabs" + | asm_unary (I.DNEG) = "dneg" + | asm_unary (I.DNEGU) = "dnegu" + and emit_unary x = emit (asm_unary x) + and asm_multiply (I.MULT) = "mult" + | asm_multiply (I.MULTU) = "multu" + | asm_multiply (I.DMULT) = "dmult" + | asm_multiply (I.DMULTU) = "dmultu" + and emit_multiply x = emit (asm_multiply x) + and asm_divide (I.DIV) = "div" + | asm_divide (I.DIVU) = "divu" + | asm_divide (I.DDIV) = "ddiv" + | asm_divide (I.DDIVU) = "ddivu" + and emit_divide x = emit (asm_divide x) + and asm_trap (I.TEQ) = "teq" + | asm_trap (I.TNE) = "tne" + | asm_trap (I.TLT) = "tlt" + | asm_trap (I.TLTU) = "tltu" + | asm_trap (I.TGE) = "tge" + | asm_trap (I.TGEU) = "tgeu" + and emit_trap x = emit (asm_trap x) + and asm_farith (I.ADD_D) = "add.d" + | asm_farith (I.ADD_S) = "add.s" + | asm_farith (I.SUB_D) = "sub.d" + | asm_farith (I.SUB_S) = "sub.s" + | asm_farith (I.MUL_D) = "mul.d" + | asm_farith (I.MUL_S) = "mul.s" + | asm_farith (I.DIV_D) = "div.d" + | asm_farith (I.DIV_S) = "div.s" + and emit_farith x = emit (asm_farith x) + and asm_funary (I.MOV_D) = "mov.d" + | asm_funary (I.MOV_S) = "mov.s" + | asm_funary (I.ABS_D) = "abs.d" + | asm_funary (I.ABS_S) = "abs.s" + | asm_funary (I.NEG_D) = "neg.d" + | asm_funary (I.NEG_S) = "neg.s" + | asm_funary (I.SQRT_D) = "sqrt.d" + | asm_funary (I.SQRT_S) = "sqrt.s" + | asm_funary (I.CVT_SD) = "cvt.s.d" + | asm_funary (I.CVT_SW) = "cvt.s.w" + | asm_funary (I.CVT_DS) = "cvt.d.s" + | asm_funary (I.CVT_DW) = "cvt.d.w" + | asm_funary (I.CVT_WS) = "cvt.w.s" + | asm_funary (I.CVT_WD) = "cvt.w.d" + | asm_funary (I.CVT_SL) = "cvt.s.l" + | asm_funary (I.CVT_DL) = "cvt.d.l" + | asm_funary (I.CVT_LS) = "cvt.l.s" + | asm_funary (I.CVT_LD) = "cvt.l.d" + and emit_funary x = emit (asm_funary x) + and asm_cvti2f (I.MTC1) = "mtc1" + | asm_cvti2f (I.DMTC1) = "dmtc1" + and emit_cvti2f x = emit (asm_cvti2f x) + and asm_cvtf2i (I.MFC1) = "mfc1" + | asm_cvtf2i (I.DMFC1) = "dmfc1" + and emit_cvtf2i x = emit (asm_cvtf2i x) + and asm_farith3 (I.MADD_D) = "madd.d" + | asm_farith3 (I.MADD_S) = "madd.s" + | asm_farith3 (I.NMADD_D) = "nmadd.d" + | asm_farith3 (I.NMADD_S) = "nmadd.s" + | asm_farith3 (I.MSUB_D) = "msub.d" + | asm_farith3 (I.MSUB_S) = "msub.s" + | asm_farith3 (I.NMSUB_D) = "nmsub.d" + | asm_farith3 (I.NMSUB_S) = "nmsub.s" + and emit_farith3 x = emit (asm_farith3 x) + and asm_fround (I.TRUNC_WS) = "trunc.w.s" + | asm_fround (I.TRUNC_WD) = "trunc.w.d" + | asm_fround (I.ROUND_WS) = "round.w.d" + | asm_fround (I.ROUND_WD) = "round.w.d" + | asm_fround (I.CEIL_WD) = "ceil.w.d" + | asm_fround (I.CEIL_WS) = "ceil.w.s" + | asm_fround (I.CEILU_WD) = "ceilu.w.d" + | asm_fround (I.CEILU_WS) = "ceilu.w.s" + | asm_fround (I.FLOOR_WD) = "floor.w.d" + | asm_fround (I.FLOOR_WS) = "floor.w.s" + | asm_fround (I.FLOORU_WD) = "flooru.w.d" + | asm_fround (I.FLOORU_WS) = "flooru.w.s" + | asm_fround (I.ROUNDU_WD) = "roundu.w.d" + | asm_fround (I.ROUNDU_WS) = "roundu.w.s" + | asm_fround (I.TRUNCU_WD) = "truncu.w.d" + | asm_fround (I.TRUNCU_WS) = "truncu.w.s" + | asm_fround (I.TRUNC_LS) = "trunc.l.s" + | asm_fround (I.TRUNC_LD) = "trunc.l.d" + | asm_fround (I.ROUND_LS) = "round.l.s" + | asm_fround (I.ROUND_LD) = "round.l.d" + | asm_fround (I.CEIL_LS) = "ceil.l.s" + | asm_fround (I.CEIL_LD) = "ceil.l.d" + | asm_fround (I.FLOOR_LS) = "floor.l.s" + | asm_fround (I.FLOOR_LD) = "floor.l.d" + and emit_fround x = emit (asm_fround x) + and asm_fmt (I.SINGLE) = "s" + | asm_fmt (I.DOUBLE) = "d" + and emit_fmt x = emit (asm_fmt x) + and emit_operand (I.Imm int) = emit_int int + | emit_operand (I.Reg GP) = emitCell GP + | emit_operand (I.Lab labexp) = emit_labexp labexp + | emit_operand (I.HiLab labexp) = + ( emit "$hi("; + emit_labexp labexp; + emit ")" ) + | emit_operand (I.LoLab labexp) = + ( emit "$lo("; + emit_labexp labexp; + emit ")" ) + +(*#line 244.7 "mips/mips.mdl"*) + fun immedSuffix (s, I.Reg _) = s + | immedSuffix (s, _) = + let +(*#line 246.15 "mips/mips.mdl"*) + val n = String.size s + in + (case String.sub (s, n - 1) of + #"u" => (String.substring (s, 0, n - 1)) ^ "iu" + | _ => s ^ "i" + ) + end + +(*#line 253.7 "mips/mips.mdl"*) + fun indexed (s, I.Reg _) = + let +(*#line 254.15 "mips/mips.mdl"*) + val prefix = String.substring (s, 0, 2) + +(*#line 255.15 "mips/mips.mdl"*) + val suffix = String.substring (s, 2, 4) + in (prefix ^ "x") ^ suffix + end + | indexed (s, _) = s + +(*#line 260.7 "mips/mips.mdl"*) + fun emit_nop false = () + | emit_nop true = emit "\n\tnop" + fun emitInstr' instr = + (case instr of + I.NOP => emit "nop" + | I.LUI{rt, imm} => + ( emit "lui\t"; + emitCell rt; + emit ", "; + emit_operand imm ) + | I.LA{rt, b, d} => + ( emit "la\t"; + emitCell rt; + emit ", "; + emitCell b; + emit ", "; + emit_operand d ) + | I.DLA{rt, b, d} => + ( emit "dla\t"; + emitCell rt; + emit ", "; + emitCell b; + emit ", "; + emit_operand d ) + | I.LOAD{l, rt, b, d, mem} => + ( emit_load l; + emit "\t"; + emitCell rt; + emit ", "; + emit_operand d; + emit "("; + emitCell b; + emit ")"; + emit_region mem ) + | I.STORE{s, rs, b, d, mem} => + ( emit_store s; + emit "\t"; + emitCell rs; + emit ", "; + emit_operand d; + emit "("; + emitCell b; + emit ")"; + emit_region mem ) + | I.FLOAD{l, ft, b, d, mem} => + ( indexed (asm_fload l, d); + emit "\t"; + emitCell ft; + emit ", "; + emit_operand d; + emit "("; + emitCell b; + emit ")"; + emit_region mem ) + | I.FSTORE{s, fs, b, d, mem} => + ( indexed (asm_fstore s, d); + emit "\t"; + emitCell fs; + emit ", "; + emit_operand d; + emit "("; + emitCell b; + emit ")"; + emit_region mem ) + | I.FCMP{fcond, fmt, cc, fs1, fs2} => + ( emit "c."; + emit_fcond fcond; + emit "."; + emit_fmt fmt; + emit "\t"; + emitCell cc; + emit ", "; + emitCell fs1; + emit ", "; + emitCell fs2 ) + | I.TRAP{t, rs, i} => + ( emit_trap t; + emit "\t"; + emitCell rs; + emit ", "; + emit_operand i ) + | I.J{lab, nop} => + ( emit "j\t"; + emit_label lab; + emit_nop nop ) + | I.JR{rs, labels, nop} => + ( emit "jr\t"; + emitCell rs; + emit_nop nop ) + | I.JAL{lab, defs, uses, cutsTo, mem, nop} => + ( emit "jal\t"; + emit_label lab; + emit_region mem; + emit_defs defs; + emit_uses uses; + emit_cutsTo cutsTo; + emit_nop nop ) + | I.JALR{rt, rs, defs, uses, cutsTo, mem, nop} => + ( emit "jalr\t"; + emitCell rt; + emit ", "; + emitCell rs; + emit_region mem; + emit_defs defs; + emit_uses uses; + emit_cutsTo cutsTo; + emit_nop nop ) + | I.RET{nop} => + ( emit "jr\t$31"; + emit_nop nop ) + | I.BRANCH{likely, cond, rs, rt, lab, nop} => + ( emit "b"; + emit_cond cond; + emit_likely likely; + emit "\t"; + emitCell rs; + emit ", "; + emitCell rt; + emit ", "; + emit_label lab; + emit_nop nop ) + | I.FBRANCH{likely, fbranch, cc, lab, nop} => + ( emit_fbranch fbranch; + emit_likely likely; + emit "\t"; + emitCell cc; + emit ", "; + emit_label lab; + emit_nop nop ) + | I.ARITH{oper, rt, rs, i} => + ( immedSuffix (asm_arith oper, i); + emit "\t"; + emitCell rt; + emit ", "; + emitCell rs; + emit ", "; + emit_operand i ) + | I.UNARY{oper, rt, rs} => + ( emit_unary oper; + emit "\t"; + emitCell rt; + emit ", "; + emitCell rs ) + | I.MULTIPLY{oper, rt, rs} => + ( emit_multiply oper; + emit "\t"; + emitCell rt; + emit ", "; + emitCell rs ) + | I.DIVIDE{oper, rt, rs} => + ( emit_divide oper; + emit "\t"; + emitCell rt; + emit ", "; + emitCell rs ) + | I.MFLO GP => + ( emit "mflo\t"; + emitCell GP ) + | I.MTLO GP => + ( emit "mtlo\t"; + emitCell GP ) + | I.MFHI GP => + ( emit "mfhi\t"; + emitCell GP ) + | I.MTHI GP => + ( emit "mthi\t"; + emitCell GP ) + | I.BREAK int => + ( emit "break\t"; + emit_int int ) + | I.FARITH{oper, ft, fs1, fs2} => + ( emit_farith oper; + emit "\t"; + emitCell ft; + emit ", "; + emitCell fs1; + emit ", "; + emitCell fs2 ) + | I.FUNARY{oper, ft, fs} => + ( emit_funary oper; + emit "\t"; + emitCell ft; + emit ", "; + emitCell fs ) + | I.FARITH3{oper, ft, fs1, fs2, fs3} => + ( emit_farith3 oper; + emit "\t"; + emitCell ft; + emit ", "; + emitCell fs1; + emit ", "; + emitCell fs2; + emit ", "; + emitCell fs3 ) + | I.FROUND{oper, ft, fs1, rs2} => + ( emit_fround oper; + emit "\t"; + emitCell ft; + emit ", "; + emitCell fs1; + emit ", "; + emitCell fs1; + emit ", "; + emitCell rs2 ) + | I.CVTI2F{cvt, rs, ft} => + ( emit_cvti2f cvt; + emit "\t"; + emitCell ft; + emit ", "; + emitCell rs ) + | I.CVTF2I{cvt, fs, rt} => + ( emit_cvtf2i cvt; + emit "\t"; + emitCell rt; + emit ", "; + emitCell fs ) + | I.COPY{dst, src, impl, tmp} => emitInstrs (Shuffle.shuffle {tmp=tmp, + src=src, dst=dst}) + | I.FCOPY{dst, src, impl, tmp} => emitInstrs (Shuffle.shufflefp {tmp=tmp, + src=src, dst=dst}) + | I.ANNOTATION{i, a} => + ( comment (Annotations.toString a); + nl (); + emitInstr i ) + | I.PHI{} => emit "phi" + | I.SOURCE{} => emit "source" + | I.SINK{} => emit "sink" + ) + in tab(); emitInstr' instr; nl() + end (* emitter *) + and emitInstrIndented i = (indent(); emitInstr i; nl()) + and emitInstrs instrs = + app (if !indent_copies then emitInstrIndented + else emitInstr) instrs + + and emitInstr(I.ANNOTATION{i,a}) = + ( comment(Annotations.toString a); + nl(); + emitInstr i ) + | emitInstr(I.LIVE{regs, spilled}) = + comment("live= " ^ CellsBasis.CellSet.toString regs ^ + "spilled= " ^ CellsBasis.CellSet.toString spilled) + | emitInstr(I.KILL{regs, spilled}) = + comment("killed:: " ^ CellsBasis.CellSet.toString regs ^ + "spilled:: " ^ CellsBasis.CellSet.toString spilled) + | emitInstr(I.INSTR i) = emitter i + | emitInstr(I.COPY{k=CellsBasis.GP, sz, src, dst, tmp}) = + emitInstrs(Shuffle.shuffle{tmp=tmp, src=src, dst=dst}) + | emitInstr(I.COPY{k=CellsBasis.FP, sz, src, dst, tmp}) = + emitInstrs(Shuffle.shufflefp{tmp=tmp, src=src, dst=dst}) + | emitInstr _ = error "emitInstr" + + in S.STREAM{beginCluster=init, + pseudoOp=pseudoOp, + emit=emitInstr, + endCluster=fail, + defineLabel=defineLabel, + entryLabel=entryLabel, + comment=comment, + exitBlock=doNothing, + annotation=annotation, + getAnnotations=getAnnotations + } + end +end + diff --git a/MLRISC/mips/emit/mipsDasm.sml b/MLRISC/mips/emit/mipsDasm.sml new file mode 100644 index 0000000..679108b --- /dev/null +++ b/MLRISC/mips/emit/mipsDasm.sml @@ -0,0 +1,17 @@ +(* + * This file was automatically generated by MDGen + * from the machine description file "mips/mips.md". + *) + + +functor MIPSDisassembler(structure Instr : MIPSINSTR + ) : DISASSEMBLER = +struct + structure I = Instr + structure C = I.C + structure LE = LabelExp + structure Constant = I.Constant + + fun error msg = MLRiscErrorMsg.error("MIPSDasm",msg) +end + diff --git a/MLRISC/mips/emit/mipsMC.sml b/MLRISC/mips/emit/mipsMC.sml new file mode 100644 index 0000000..de82ff6 --- /dev/null +++ b/MLRISC/mips/emit/mipsMC.sml @@ -0,0 +1,157 @@ +(* + * WARNING: This file was automatically generated by MDLGen (v3.0) + * from the machine description file "mips/mips.mdl". + * DO NOT EDIT this file directly + *) + + +functor MIPSMCEmitter(structure Instr : MIPSINSTR + structure MLTreeEval : MLTREE_EVAL where T = Instr.T + structure Stream : INSTRUCTION_STREAM + structure CodeString : CODE_STRING + ) : INSTRUCTION_EMITTER = +struct + structure I = Instr + structure C = I.C + structure Constant = I.Constant + structure T = I.T + structure S = Stream + structure P = S.P + structure W = Word32 + + (* MIPS is little endian *) + + fun error msg = MLRiscErrorMsg.error("MIPSMC",msg) + fun makeStream _ = + let infix && || << >> ~>> + val op << = W.<< + val op >> = W.>> + val op ~>> = W.~>> + val op || = W.orb + val op && = W.andb + val itow = W.fromInt + fun emit_bool false = 0w0 : W.word + | emit_bool true = 0w1 : W.word + val emit_int = itow + fun emit_word w = w + fun emit_label l = itow(Label.addrOf l) + fun emit_labexp le = itow(MLTreeEval.valueOf le) + fun emit_const c = itow(Constant.valueOf c) + val loc = ref 0 + + (* emit a byte *) + fun eByte b = + let val i = !loc in loc := i + 1; CodeString.update(i,b) end + + (* emit the low order byte of a word *) + (* note: fromLargeWord strips the high order bits! *) + fun eByteW w = + let val i = !loc + in loc := i + 1; CodeString.update(i,Word8.fromLargeWord w) end + + fun doNothing _ = () + fun fail _ = raise Fail "MCEmitter" + fun getAnnotations () = error "getAnnotations" + + fun pseudoOp pOp = P.emitValue{pOp=pOp, loc= !loc,emit=eByte} + + fun init n = (CodeString.init n; loc := 0) + + + fun eWord32 w = + let val b8 = w + val w = w >> 0wx8 + val b16 = w + val w = w >> 0wx8 + val b24 = w + val w = w >> 0wx8 + val b32 = w + in + ( eByteW b8; + eByteW b16; + eByteW b24; + eByteW b32 ) + end + fun emit_GP r = itow (CellsBasis.physicalRegisterNum r) + and emit_FP r = itow (CellsBasis.physicalRegisterNum r) + and emit_CC r = itow (CellsBasis.physicalRegisterNum r) + and emit_COND r = itow (CellsBasis.physicalRegisterNum r) + and emit_HI r = itow (CellsBasis.physicalRegisterNum r) + and emit_LO r = itow (CellsBasis.physicalRegisterNum r) + and emit_MEM r = itow (CellsBasis.physicalRegisterNum r) + and emit_CTRL r = itow (CellsBasis.physicalRegisterNum r) + and emit_CELLSET r = itow (CellsBasis.physicalRegisterNum r) + fun Load {l, rt, b, offset} = + let val rt = emit_GP rt + val b = emit_GP b + in eWord32 ((l << 0wx1a) + ((rt << 0wx15) + ((b << 0wx10) + (offset && 0wxffff)))) + end + and Special {rs, rt, opc} = + let val rs = emit_GP rs + val rt = emit_GP rt + in eWord32 ((rs << 0wx15) + ((rt << 0wx10) + opc)) + end + fun emitter instr = + let + fun emitInstr (I.NOP) = error "NOP" + | emitInstr (I.LUI{rt, imm}) = error "LUI" + | emitInstr (I.LA{rt, b, d}) = error "LA" + | emitInstr (I.DLA{rt, b, d}) = error "DLA" + | emitInstr (I.LOAD{l, rt, b, d, mem}) = error "LOAD" + | emitInstr (I.STORE{s, rs, b, d, mem}) = error "STORE" + | emitInstr (I.FLOAD{l, ft, b, d, mem}) = error "FLOAD" + | emitInstr (I.FSTORE{s, fs, b, d, mem}) = error "FSTORE" + | emitInstr (I.FCMP{fcond, fmt, cc, fs1, fs2}) = error "FCMP" + | emitInstr (I.TRAP{t, rs, i}) = error "TRAP" + | emitInstr (I.J{lab, nop}) = error "J" + | emitInstr (I.JR{rs, labels, nop}) = error "JR" + | emitInstr (I.JAL{lab, defs, uses, cutsTo, mem, nop}) = error "JAL" + | emitInstr (I.JALR{rt, rs, defs, uses, cutsTo, mem, nop}) = error "JALR" + | emitInstr (I.RET{nop}) = error "RET" + | emitInstr (I.BRANCH{likely, cond, rs, rt, lab, nop}) = error "BRANCH" + | emitInstr (I.FBRANCH{likely, fbranch, cc, lab, nop}) = error "FBRANCH" + | emitInstr (I.ARITH{oper, rt, rs, i}) = error "ARITH" + | emitInstr (I.UNARY{oper, rt, rs}) = error "UNARY" + | emitInstr (I.MULTIPLY{oper, rt, rs}) = error "MULTIPLY" + | emitInstr (I.DIVIDE{oper, rt, rs}) = error "DIVIDE" + | emitInstr (I.MFLO GP) = error "MFLO" + | emitInstr (I.MTLO GP) = error "MTLO" + | emitInstr (I.MFHI GP) = error "MFHI" + | emitInstr (I.MTHI GP) = error "MTHI" + | emitInstr (I.BREAK int) = error "BREAK" + | emitInstr (I.FARITH{oper, ft, fs1, fs2}) = error "FARITH" + | emitInstr (I.FUNARY{oper, ft, fs}) = error "FUNARY" + | emitInstr (I.FARITH3{oper, ft, fs1, fs2, fs3}) = error "FARITH3" + | emitInstr (I.FROUND{oper, ft, fs1, rs2}) = error "FROUND" + | emitInstr (I.CVTI2F{cvt, rs, ft}) = error "CVTI2F" + | emitInstr (I.CVTF2I{cvt, fs, rt}) = error "CVTF2I" + | emitInstr (I.COPY{dst, src, impl, tmp}) = error "COPY" + | emitInstr (I.FCOPY{dst, src, impl, tmp}) = error "FCOPY" + | emitInstr (I.ANNOTATION{i, a}) = emitInstr i + | emitInstr (I.PHI{}) = () + | emitInstr (I.SOURCE{}) = () + | emitInstr (I.SINK{}) = () + in + emitInstr instr + end + + fun emitInstruction(I.ANNOTATION{i, ...}) = emitInstruction(i) + | emitInstruction(I.INSTR(i)) = emitter(i) + | emitInstruction(I.LIVE _) = () + | emitInstruction(I.KILL _) = () + | emitInstruction _ = error "emitInstruction" + + in S.STREAM{beginCluster=init, + pseudoOp=pseudoOp, + emit=emitInstruction, + endCluster=fail, + defineLabel=doNothing, + entryLabel=doNothing, + comment=doNothing, + exitBlock=doNothing, + annotation=doNothing, + getAnnotations=getAnnotations + } + end +end + diff --git a/MLRISC/mips/instructions/mipsCells.sml b/MLRISC/mips/instructions/mipsCells.sml new file mode 100644 index 0000000..0a38e3c --- /dev/null +++ b/MLRISC/mips/instructions/mipsCells.sml @@ -0,0 +1,148 @@ +(* + * WARNING: This file was automatically generated by MDLGen (v3.0) + * from the machine description file "mips/mips.mdl". + * DO NOT EDIT this file directly + *) + + +signature MIPSCELLS = +sig + include CELLS + val COND : CellsBasis.cellkind + val HI : CellsBasis.cellkind + val LO : CellsBasis.cellkind + val CELLSET : CellsBasis.cellkind + val showGP : CellsBasis.register_id -> string + val showFP : CellsBasis.register_id -> string + val showCC : CellsBasis.register_id -> string + val showCOND : CellsBasis.register_id -> string + val showHI : CellsBasis.register_id -> string + val showLO : CellsBasis.register_id -> string + val showMEM : CellsBasis.register_id -> string + val showCTRL : CellsBasis.register_id -> string + val showCELLSET : CellsBasis.register_id -> string + val showGPWithSize : CellsBasis.register_id * CellsBasis.sz -> string + val showFPWithSize : CellsBasis.register_id * CellsBasis.sz -> string + val showCCWithSize : CellsBasis.register_id * CellsBasis.sz -> string + val showCONDWithSize : CellsBasis.register_id * CellsBasis.sz -> string + val showHIWithSize : CellsBasis.register_id * CellsBasis.sz -> string + val showLOWithSize : CellsBasis.register_id * CellsBasis.sz -> string + val showMEMWithSize : CellsBasis.register_id * CellsBasis.sz -> string + val showCTRLWithSize : CellsBasis.register_id * CellsBasis.sz -> string + val showCELLSETWithSize : CellsBasis.register_id * CellsBasis.sz -> string + val linkR : CellsBasis.cell + val frameptrR : CellsBasis.cell + val globalptrR : CellsBasis.cell + val r0 : CellsBasis.cell + val addGP : CellsBasis.cell * cellset -> cellset + val addFP : CellsBasis.cell * cellset -> cellset + val addCC : CellsBasis.cell * cellset -> cellset + val addCOND : CellsBasis.cell * cellset -> cellset + val addHI : CellsBasis.cell * cellset -> cellset + val addLO : CellsBasis.cell * cellset -> cellset + val addMEM : CellsBasis.cell * cellset -> cellset + val addCTRL : CellsBasis.cell * cellset -> cellset + val addCELLSET : CellsBasis.cell * cellset -> cellset +end + +structure MIPSCells : MIPSCELLS = +struct + exception MIPSCells + fun error msg = MLRiscErrorMsg.error("MIPSCells",msg) + open CellsBasis + fun showGPWithSize (r, ty) = (fn (1, _) => "$at" + | (28, _) => "$gp" + | (29, _) => "$sp" + | (30, _) => "$fp" + | (r, _) => "$" ^ (Int.toString r) + ) (r, ty) + and showFPWithSize (r, ty) = (fn (f, _) => "$f" ^ (Int.toString f) + ) (r, ty) + and showCCWithSize (r, ty) = (fn (r, _) => "$" ^ (Int.toString r) + ) (r, ty) + and showCONDWithSize (r, ty) = (fn (r, _) => Int.toString r + ) (r, ty) + and showHIWithSize (r, ty) = (fn _ => "$hi" + ) (r, ty) + and showLOWithSize (r, ty) = (fn _ => "$lo" + ) (r, ty) + and showMEMWithSize (r, ty) = (fn (r, _) => "m" ^ (Int.toString r) + ) (r, ty) + and showCTRLWithSize (r, ty) = (fn (r, _) => "ctrl" ^ (Int.toString r) + ) (r, ty) + and showCELLSETWithSize (r, ty) = (fn _ => "CELLSET" + ) (r, ty) + fun showGP r = showGPWithSize (r, 64) + fun showFP r = showFPWithSize (r, 64) + fun showCC r = showCCWithSize (r, 64) + fun showCOND r = showCONDWithSize (r, 64) + fun showHI r = showHIWithSize (r, 64) + fun showLO r = showLOWithSize (r, 64) + fun showMEM r = showMEMWithSize (r, 8) + fun showCTRL r = showCTRLWithSize (r, 0) + fun showCELLSET r = showCELLSETWithSize (r, 0) + val COND = CellsBasis.newCellKind {name="COND", nickname="cond"} + and HI = CellsBasis.newCellKind {name="HI", nickname="hi"} + and LO = CellsBasis.newCellKind {name="LO", nickname="lo"} + and CELLSET = CellsBasis.newCellKind {name="CELLSET", nickname="cellset"} + structure MyCells = Cells + (exception Cells = MIPSCells + val firstPseudo = 256 + val desc_GP = CellsBasis.DESC {low=0, high=31, kind=CellsBasis.GP, defaultValues=[(0, + 0)], zeroReg=SOME 0, toString=showGP, toStringWithSize=showGPWithSize, + counter=ref 0, dedicated=ref 0, physicalRegs=ref CellsBasis.array0} + and desc_FP = CellsBasis.DESC {low=32, high=63, kind=CellsBasis.FP, + defaultValues=[], zeroReg=NONE, toString=showFP, toStringWithSize=showFPWithSize, + counter=ref 0, dedicated=ref 0, physicalRegs=ref CellsBasis.array0} + and desc_COND = CellsBasis.DESC {low=64, high=71, kind=COND, defaultValues=[], + zeroReg=NONE, toString=showCOND, toStringWithSize=showCONDWithSize, + counter=ref 0, dedicated=ref 0, physicalRegs=ref CellsBasis.array0} + and desc_HI = CellsBasis.DESC {low=72, high=72, kind=HI, defaultValues=[], + zeroReg=NONE, toString=showHI, toStringWithSize=showHIWithSize, + counter=ref 0, dedicated=ref 0, physicalRegs=ref CellsBasis.array0} + and desc_LO = CellsBasis.DESC {low=73, high=73, kind=LO, defaultValues=[], + zeroReg=NONE, toString=showLO, toStringWithSize=showLOWithSize, + counter=ref 0, dedicated=ref 0, physicalRegs=ref CellsBasis.array0} + and desc_MEM = CellsBasis.DESC {low=74, high=73, kind=CellsBasis.MEM, + defaultValues=[], zeroReg=NONE, toString=showMEM, toStringWithSize=showMEMWithSize, + counter=ref 0, dedicated=ref 0, physicalRegs=ref CellsBasis.array0} + and desc_CTRL = CellsBasis.DESC {low=74, high=73, kind=CellsBasis.CTRL, + defaultValues=[], zeroReg=NONE, toString=showCTRL, toStringWithSize=showCTRLWithSize, + counter=ref 0, dedicated=ref 0, physicalRegs=ref CellsBasis.array0} + and desc_CELLSET = CellsBasis.DESC {low=74, high=73, kind=CELLSET, defaultValues=[], + zeroReg=NONE, toString=showCELLSET, toStringWithSize=showCELLSETWithSize, + counter=ref 0, dedicated=ref 0, physicalRegs=ref CellsBasis.array0} + val cellKindDescs = [(CellsBasis.GP, desc_GP), (CellsBasis.FP, desc_FP), + (CellsBasis.CC, desc_GP), (COND, desc_COND), (HI, desc_HI), (LO, + desc_LO), (CellsBasis.MEM, desc_MEM), (CellsBasis.CTRL, desc_CTRL), + (CELLSET, desc_CELLSET)] + ) + + open MyCells + val addGP = CellSet.add + and addFP = CellSet.add + and addCC = CellSet.add + and addCOND = CellSet.add + and addHI = CellSet.add + and addLO = CellSet.add + and addMEM = CellSet.add + and addCTRL = CellSet.add + and addCELLSET = CellSet.add + val RegGP = Reg GP + and RegFP = Reg FP + and RegCC = Reg CC + and RegCOND = Reg COND + and RegHI = Reg HI + and RegLO = Reg LO + and RegMEM = Reg MEM + and RegCTRL = Reg CTRL + and RegCELLSET = Reg CELLSET + val stackptrR = RegGP 29 + val linkR = RegGP 31 + val frameptrR = RegGP 30 + val globalptrR = RegGP 28 + val asmTmpR = RegGP 1 + val fasmTmp = RegFP 30 + val r0 = RegGP 0 +end + diff --git a/MLRISC/mips/instructions/mipsFreqProps.sml b/MLRISC/mips/instructions/mipsFreqProps.sml new file mode 100644 index 0000000..72d1446 --- /dev/null +++ b/MLRISC/mips/instructions/mipsFreqProps.sml @@ -0,0 +1,23 @@ +(* + * Extract frequency information from the MIPS architecture + * + * -- Allen + *) + +functor MIPSFreqProps(MIPSInstr : MIPSINSTR) : FREQUENCY_PROPERTIES = +struct + + structure I = MIPSInstr + + fun branchProb(I.ANNOTATION{a, i, ...}) = + (case #peek MLRiscAnnotations.BRANCH_PROB a of + SOME b => b + | NONE => branchProb i + ) + | branchProb(I.BRANCH{likely,...}) = 50 (* default *) + | branchProb(I.FBRANCH{likely,...}) = 50 (* default *) + | branchProb(I.J _) = 100 (* unconditional *) + | branchProb(I.JR{labels,...}) = 100 div length labels (* assume equal prob *) + | branchProb _ = 0 (* non-branch *) + +end diff --git a/MLRISC/mips/instructions/mipsInstr.sml b/MLRISC/mips/instructions/mipsInstr.sml new file mode 100644 index 0000000..31f23ef --- /dev/null +++ b/MLRISC/mips/instructions/mipsInstr.sml @@ -0,0 +1,675 @@ +(* + * WARNING: This file was automatically generated by MDLGen (v3.0) + * from the machine description file "mips/mips.mdl". + * DO NOT EDIT this file directly + *) + + +signature MIPSINSTR = +sig + structure C : MIPSCELLS + structure CB : CELLS_BASIS = CellsBasis + structure T : MLTREE + structure Constant: CONSTANT + structure Region : REGION + sharing Constant = T.Constant + sharing Region = T.Region + datatype load = + LD + | LW + | LH + | LHU + | LB + | LBU + | LWL + | LWR + | LWU + | LDL + | LDR + | ULH + | ULHU + | ULW + | ULD + datatype store = + SD + | SW + | SH + | SB + | SWL + | SWR + | SDL + | SDR + | USH + | USW + | USD + datatype fload = + LDC1 + | LWC1 + datatype fstore = + SDC1 + | SWC1 + datatype fcond = + FF + | FUN + | FEQ + | FUEQ + | FOLT + | FULT + | FOLE + | FULE + | FNGLE + | FSP + | FNGL + | FSEQ + | FLT + | FNGE + | FLE + | FNGT + datatype cond = + EQ + | NE + | LEZ + | GTZ + | LTZ + | GEZ + datatype fbranch = + BC1T + | BC1F + datatype likely = + LIKELY + | UNLIKELY + datatype arith = + ADD + | ADDU + | AND + | XOR + | MUL + | MULO + | MULOU + | NOR + | OR + | SEQ + | SGT + | SGE + | SGEU + | SGTU + | SLT + | SLE + | SLEU + | SLTU + | SNE + | SUB + | SUBU + | REM + | REMU + | SRA + | SLL + | SRL + | ROR + | ROL + | MOVN + | MOVZ + | DADD + | DADDU + | DMUL + | DMULO + | DMULOU + | DSUB + | DSUBU + | DREM + | DREMU + | DROL + | DROR + | DSLL + | DSLL32 + | DSLLV + | DSRA + | DSRA32 + | DSRAV + | DSRL + | DSRL32 + | DSRLV + datatype unary = + ABS + | NEG + | NEGU + | NOT + | DABS + | DNEG + | DNEGU + datatype multiply = + MULT + | MULTU + | DMULT + | DMULTU + datatype divide = + DIV + | DIVU + | DDIV + | DDIVU + datatype trap = + TEQ + | TNE + | TLT + | TLTU + | TGE + | TGEU + datatype farith = + ADD_D + | ADD_S + | SUB_D + | SUB_S + | MUL_D + | MUL_S + | DIV_D + | DIV_S + datatype funary = + MOV_D + | MOV_S + | ABS_D + | ABS_S + | NEG_D + | NEG_S + | SQRT_D + | SQRT_S + | CVT_SD + | CVT_SW + | CVT_DS + | CVT_DW + | CVT_WS + | CVT_WD + | CVT_SL + | CVT_DL + | CVT_LS + | CVT_LD + datatype cvti2f = + MTC1 + | DMTC1 + datatype cvtf2i = + MFC1 + | DMFC1 + datatype farith3 = + MADD_D + | MADD_S + | NMADD_D + | NMADD_S + | MSUB_D + | MSUB_S + | NMSUB_D + | NMSUB_S + datatype fround = + TRUNC_WS + | TRUNC_WD + | ROUND_WS + | ROUND_WD + | CEIL_WD + | CEIL_WS + | CEILU_WD + | CEILU_WS + | FLOOR_WD + | FLOOR_WS + | FLOORU_WD + | FLOORU_WS + | ROUNDU_WD + | ROUNDU_WS + | TRUNCU_WD + | TRUNCU_WS + | TRUNC_LS + | TRUNC_LD + | ROUND_LS + | ROUND_LD + | CEIL_LS + | CEIL_LD + | FLOOR_LS + | FLOOR_LD + datatype fmt = + SINGLE + | DOUBLE + datatype operand = + Imm of int + | Reg of CellsBasis.cell + | Lab of T.labexp + | HiLab of T.labexp + | LoLab of T.labexp + datatype ea = + Direct of CellsBasis.cell + | FDirect of CellsBasis.cell + | Displace of {base:CellsBasis.cell, disp:int} + type addressing_mode = C.cell * operand + datatype instr = + NOP + | LUI of {rt:CellsBasis.cell, imm:operand} + | LA of {rt:CellsBasis.cell, b:CellsBasis.cell, d:operand} + | DLA of {rt:CellsBasis.cell, b:CellsBasis.cell, d:operand} + | LOAD of {l:load, rt:CellsBasis.cell, b:CellsBasis.cell, d:operand, mem:Region.region} + | STORE of {s:store, rs:CellsBasis.cell, b:CellsBasis.cell, d:operand, mem:Region.region} + | FLOAD of {l:fload, ft:CellsBasis.cell, b:CellsBasis.cell, d:operand, mem:Region.region} + | FSTORE of {s:fstore, fs:CellsBasis.cell, b:CellsBasis.cell, d:operand, + mem:Region.region} + | FCMP of {fcond:fcond, fmt:fmt, cc:CellsBasis.cell, fs1:CellsBasis.cell, + fs2:CellsBasis.cell} + | TRAP of {t:trap, rs:CellsBasis.cell, i:operand} + | J of {lab:Label.label, nop:bool} + | JR of {rs:CellsBasis.cell, labels:Label.label list, nop:bool} + | JAL of {lab:Label.label, defs:C.cellset, uses:C.cellset, cutsTo:Label.label list, + mem:Region.region, nop:bool} + | JALR of {rt:CellsBasis.cell, rs:CellsBasis.cell, defs:C.cellset, uses:C.cellset, + cutsTo:Label.label list, mem:Region.region, nop:bool} + | RET of {nop:bool} + | BRANCH of {likely:likely, cond:cond, rs:CellsBasis.cell, rt:CellsBasis.cell, + lab:Label.label, nop:bool} + | FBRANCH of {likely:likely, fbranch:fbranch, cc:CellsBasis.cell, lab:Label.label, + nop:bool} + | ARITH of {oper:arith, rt:CellsBasis.cell, rs:CellsBasis.cell, i:operand} + | UNARY of {oper:unary, rt:CellsBasis.cell, rs:CellsBasis.cell} + | MULTIPLY of {oper:multiply, rt:CellsBasis.cell, rs:CellsBasis.cell} + | DIVIDE of {oper:divide, rt:CellsBasis.cell, rs:CellsBasis.cell} + | MFLO of CellsBasis.cell + | MTLO of CellsBasis.cell + | MFHI of CellsBasis.cell + | MTHI of CellsBasis.cell + | BREAK of int + | FARITH of {oper:farith, ft:CellsBasis.cell, fs1:CellsBasis.cell, fs2:CellsBasis.cell} + | FUNARY of {oper:funary, ft:CellsBasis.cell, fs:CellsBasis.cell} + | FARITH3 of {oper:farith3, ft:CellsBasis.cell, fs1:CellsBasis.cell, fs2:CellsBasis.cell, + fs3:CellsBasis.cell} + | FROUND of {oper:fround, ft:CellsBasis.cell, fs1:CellsBasis.cell, rs2:CellsBasis.cell} + | CVTI2F of {cvt:cvti2f, rs:CellsBasis.cell, ft:CellsBasis.cell} + | CVTF2I of {cvt:cvtf2i, fs:CellsBasis.cell, rt:CellsBasis.cell} + | COPY of {dst:(CellsBasis.cell) list, src:(CellsBasis.cell) list, impl:instruction list option ref, + tmp:ea option} + | FCOPY of {dst:(CellsBasis.cell) list, src:(CellsBasis.cell) list, impl:instruction list option ref, + tmp:ea option} + | ANNOTATION of {i:instruction, a:Annotations.annotation} + | PHI of {} + | SOURCE of {} + | SINK of {} + and instruction = + LIVE of {regs: C.cellset, spilled: C.cellset} + | KILL of {regs: C.cellset, spilled: C.cellset} + | COPY of {k: CellsBasis.cellkind, + sz: int, (* in bits *) + dst: CellsBasis.cell list, + src: CellsBasis.cell list, + tmp: ea option (* NONE if |dst| = {src| = 1 *)} + | ANNOTATION of {i:instruction, a:Annotations.annotation} + | INSTR of instr + val nop : instruction + val lui : {rt:CellsBasis.cell, imm:operand} -> instruction + val la : {rt:CellsBasis.cell, b:CellsBasis.cell, d:operand} -> instruction + val dla : {rt:CellsBasis.cell, b:CellsBasis.cell, d:operand} -> instruction + val load : {l:load, rt:CellsBasis.cell, b:CellsBasis.cell, d:operand, mem:Region.region} -> instruction + val store : {s:store, rs:CellsBasis.cell, b:CellsBasis.cell, d:operand, + mem:Region.region} -> instruction + val fload : {l:fload, ft:CellsBasis.cell, b:CellsBasis.cell, d:operand, + mem:Region.region} -> instruction + val fstore : {s:fstore, fs:CellsBasis.cell, b:CellsBasis.cell, d:operand, + mem:Region.region} -> instruction + val fcmp : {fcond:fcond, fmt:fmt, cc:CellsBasis.cell, fs1:CellsBasis.cell, + fs2:CellsBasis.cell} -> instruction + val trap : {t:trap, rs:CellsBasis.cell, i:operand} -> instruction + val j : {lab:Label.label, nop:bool} -> instruction + val jr : {rs:CellsBasis.cell, labels:Label.label list, nop:bool} -> instruction + val jal : {lab:Label.label, defs:C.cellset, uses:C.cellset, cutsTo:Label.label list, + mem:Region.region, nop:bool} -> instruction + val jalr : {rt:CellsBasis.cell, rs:CellsBasis.cell, defs:C.cellset, uses:C.cellset, + cutsTo:Label.label list, mem:Region.region, nop:bool} -> instruction + val ret : {nop:bool} -> instruction + val branch : {likely:likely, cond:cond, rs:CellsBasis.cell, rt:CellsBasis.cell, + lab:Label.label, nop:bool} -> instruction + val fbranch : {likely:likely, fbranch:fbranch, cc:CellsBasis.cell, lab:Label.label, + nop:bool} -> instruction + val arith : {oper:arith, rt:CellsBasis.cell, rs:CellsBasis.cell, i:operand} -> instruction + val unary : {oper:unary, rt:CellsBasis.cell, rs:CellsBasis.cell} -> instruction + val multiply : {oper:multiply, rt:CellsBasis.cell, rs:CellsBasis.cell} -> instruction + val divide : {oper:divide, rt:CellsBasis.cell, rs:CellsBasis.cell} -> instruction + val mflo : CellsBasis.cell -> instruction + val mtlo : CellsBasis.cell -> instruction + val mfhi : CellsBasis.cell -> instruction + val mthi : CellsBasis.cell -> instruction + val break : int -> instruction + val farith : {oper:farith, ft:CellsBasis.cell, fs1:CellsBasis.cell, fs2:CellsBasis.cell} -> instruction + val funary : {oper:funary, ft:CellsBasis.cell, fs:CellsBasis.cell} -> instruction + val farith3 : {oper:farith3, ft:CellsBasis.cell, fs1:CellsBasis.cell, fs2:CellsBasis.cell, + fs3:CellsBasis.cell} -> instruction + val fround : {oper:fround, ft:CellsBasis.cell, fs1:CellsBasis.cell, rs2:CellsBasis.cell} -> instruction + val cvti2f : {cvt:cvti2f, rs:CellsBasis.cell, ft:CellsBasis.cell} -> instruction + val cvtf2i : {cvt:cvtf2i, fs:CellsBasis.cell, rt:CellsBasis.cell} -> instruction + val copy : {dst:(CellsBasis.cell) list, src:(CellsBasis.cell) list, impl:instruction list option ref, + tmp:ea option} -> instruction + val fcopy : {dst:(CellsBasis.cell) list, src:(CellsBasis.cell) list, impl:instruction list option ref, + tmp:ea option} -> instruction + val annotation : {i:instruction, a:Annotations.annotation} -> instruction + val phi : {} -> instruction + val source : {} -> instruction + val sink : {} -> instruction +end + +functor MIPSInstr(T: MLTREE + ) : MIPSINSTR = +struct + structure C = MIPSCells + structure CB = CellsBasis + structure T = T + structure Region = T.Region + structure Constant = T.Constant + datatype load = + LD + | LW + | LH + | LHU + | LB + | LBU + | LWL + | LWR + | LWU + | LDL + | LDR + | ULH + | ULHU + | ULW + | ULD + datatype store = + SD + | SW + | SH + | SB + | SWL + | SWR + | SDL + | SDR + | USH + | USW + | USD + datatype fload = + LDC1 + | LWC1 + datatype fstore = + SDC1 + | SWC1 + datatype fcond = + FF + | FUN + | FEQ + | FUEQ + | FOLT + | FULT + | FOLE + | FULE + | FNGLE + | FSP + | FNGL + | FSEQ + | FLT + | FNGE + | FLE + | FNGT + datatype cond = + EQ + | NE + | LEZ + | GTZ + | LTZ + | GEZ + datatype fbranch = + BC1T + | BC1F + datatype likely = + LIKELY + | UNLIKELY + datatype arith = + ADD + | ADDU + | AND + | XOR + | MUL + | MULO + | MULOU + | NOR + | OR + | SEQ + | SGT + | SGE + | SGEU + | SGTU + | SLT + | SLE + | SLEU + | SLTU + | SNE + | SUB + | SUBU + | REM + | REMU + | SRA + | SLL + | SRL + | ROR + | ROL + | MOVN + | MOVZ + | DADD + | DADDU + | DMUL + | DMULO + | DMULOU + | DSUB + | DSUBU + | DREM + | DREMU + | DROL + | DROR + | DSLL + | DSLL32 + | DSLLV + | DSRA + | DSRA32 + | DSRAV + | DSRL + | DSRL32 + | DSRLV + datatype unary = + ABS + | NEG + | NEGU + | NOT + | DABS + | DNEG + | DNEGU + datatype multiply = + MULT + | MULTU + | DMULT + | DMULTU + datatype divide = + DIV + | DIVU + | DDIV + | DDIVU + datatype trap = + TEQ + | TNE + | TLT + | TLTU + | TGE + | TGEU + datatype farith = + ADD_D + | ADD_S + | SUB_D + | SUB_S + | MUL_D + | MUL_S + | DIV_D + | DIV_S + datatype funary = + MOV_D + | MOV_S + | ABS_D + | ABS_S + | NEG_D + | NEG_S + | SQRT_D + | SQRT_S + | CVT_SD + | CVT_SW + | CVT_DS + | CVT_DW + | CVT_WS + | CVT_WD + | CVT_SL + | CVT_DL + | CVT_LS + | CVT_LD + datatype cvti2f = + MTC1 + | DMTC1 + datatype cvtf2i = + MFC1 + | DMFC1 + datatype farith3 = + MADD_D + | MADD_S + | NMADD_D + | NMADD_S + | MSUB_D + | MSUB_S + | NMSUB_D + | NMSUB_S + datatype fround = + TRUNC_WS + | TRUNC_WD + | ROUND_WS + | ROUND_WD + | CEIL_WD + | CEIL_WS + | CEILU_WD + | CEILU_WS + | FLOOR_WD + | FLOOR_WS + | FLOORU_WD + | FLOORU_WS + | ROUNDU_WD + | ROUNDU_WS + | TRUNCU_WD + | TRUNCU_WS + | TRUNC_LS + | TRUNC_LD + | ROUND_LS + | ROUND_LD + | CEIL_LS + | CEIL_LD + | FLOOR_LS + | FLOOR_LD + datatype fmt = + SINGLE + | DOUBLE + datatype operand = + Imm of int + | Reg of CellsBasis.cell + | Lab of T.labexp + | HiLab of T.labexp + | LoLab of T.labexp + datatype ea = + Direct of CellsBasis.cell + | FDirect of CellsBasis.cell + | Displace of {base:CellsBasis.cell, disp:int} + type addressing_mode = C.cell * operand + datatype instr = + NOP + | LUI of {rt:CellsBasis.cell, imm:operand} + | LA of {rt:CellsBasis.cell, b:CellsBasis.cell, d:operand} + | DLA of {rt:CellsBasis.cell, b:CellsBasis.cell, d:operand} + | LOAD of {l:load, rt:CellsBasis.cell, b:CellsBasis.cell, d:operand, mem:Region.region} + | STORE of {s:store, rs:CellsBasis.cell, b:CellsBasis.cell, d:operand, mem:Region.region} + | FLOAD of {l:fload, ft:CellsBasis.cell, b:CellsBasis.cell, d:operand, mem:Region.region} + | FSTORE of {s:fstore, fs:CellsBasis.cell, b:CellsBasis.cell, d:operand, + mem:Region.region} + | FCMP of {fcond:fcond, fmt:fmt, cc:CellsBasis.cell, fs1:CellsBasis.cell, + fs2:CellsBasis.cell} + | TRAP of {t:trap, rs:CellsBasis.cell, i:operand} + | J of {lab:Label.label, nop:bool} + | JR of {rs:CellsBasis.cell, labels:Label.label list, nop:bool} + | JAL of {lab:Label.label, defs:C.cellset, uses:C.cellset, cutsTo:Label.label list, + mem:Region.region, nop:bool} + | JALR of {rt:CellsBasis.cell, rs:CellsBasis.cell, defs:C.cellset, uses:C.cellset, + cutsTo:Label.label list, mem:Region.region, nop:bool} + | RET of {nop:bool} + | BRANCH of {likely:likely, cond:cond, rs:CellsBasis.cell, rt:CellsBasis.cell, + lab:Label.label, nop:bool} + | FBRANCH of {likely:likely, fbranch:fbranch, cc:CellsBasis.cell, lab:Label.label, + nop:bool} + | ARITH of {oper:arith, rt:CellsBasis.cell, rs:CellsBasis.cell, i:operand} + | UNARY of {oper:unary, rt:CellsBasis.cell, rs:CellsBasis.cell} + | MULTIPLY of {oper:multiply, rt:CellsBasis.cell, rs:CellsBasis.cell} + | DIVIDE of {oper:divide, rt:CellsBasis.cell, rs:CellsBasis.cell} + | MFLO of CellsBasis.cell + | MTLO of CellsBasis.cell + | MFHI of CellsBasis.cell + | MTHI of CellsBasis.cell + | BREAK of int + | FARITH of {oper:farith, ft:CellsBasis.cell, fs1:CellsBasis.cell, fs2:CellsBasis.cell} + | FUNARY of {oper:funary, ft:CellsBasis.cell, fs:CellsBasis.cell} + | FARITH3 of {oper:farith3, ft:CellsBasis.cell, fs1:CellsBasis.cell, fs2:CellsBasis.cell, + fs3:CellsBasis.cell} + | FROUND of {oper:fround, ft:CellsBasis.cell, fs1:CellsBasis.cell, rs2:CellsBasis.cell} + | CVTI2F of {cvt:cvti2f, rs:CellsBasis.cell, ft:CellsBasis.cell} + | CVTF2I of {cvt:cvtf2i, fs:CellsBasis.cell, rt:CellsBasis.cell} + | COPY of {dst:(CellsBasis.cell) list, src:(CellsBasis.cell) list, impl:instruction list option ref, + tmp:ea option} + | FCOPY of {dst:(CellsBasis.cell) list, src:(CellsBasis.cell) list, impl:instruction list option ref, + tmp:ea option} + | ANNOTATION of {i:instruction, a:Annotations.annotation} + | PHI of {} + | SOURCE of {} + | SINK of {} + and instruction = + LIVE of {regs: C.cellset, spilled: C.cellset} + | KILL of {regs: C.cellset, spilled: C.cellset} + | COPY of {k: CellsBasis.cellkind, + sz: int, (* in bits *) + dst: CellsBasis.cell list, + src: CellsBasis.cell list, + tmp: ea option (* NONE if |dst| = {src| = 1 *)} + | ANNOTATION of {i:instruction, a:Annotations.annotation} + | INSTR of instr + val nop = INSTR NOP + and lui = INSTR o LUI + and la = INSTR o LA + and dla = INSTR o DLA + and load = INSTR o LOAD + and store = INSTR o STORE + and fload = INSTR o FLOAD + and fstore = INSTR o FSTORE + and fcmp = INSTR o FCMP + and trap = INSTR o TRAP + and j = INSTR o J + and jr = INSTR o JR + and jal = INSTR o JAL + and jalr = INSTR o JALR + and ret = INSTR o RET + and branch = INSTR o BRANCH + and fbranch = INSTR o FBRANCH + and arith = INSTR o ARITH + and unary = INSTR o UNARY + and multiply = INSTR o MULTIPLY + and divide = INSTR o DIVIDE + and mflo = INSTR o MFLO + and mtlo = INSTR o MTLO + and mfhi = INSTR o MFHI + and mthi = INSTR o MTHI + and break = INSTR o BREAK + and farith = INSTR o FARITH + and funary = INSTR o FUNARY + and farith3 = INSTR o FARITH3 + and fround = INSTR o FROUND + and cvti2f = INSTR o CVTI2F + and cvtf2i = INSTR o CVTF2I + and copy = INSTR o COPY + and fcopy = INSTR o FCOPY + and annotation = INSTR o ANNOTATION + and phi = INSTR o PHI + and source = INSTR o SOURCE + and sink = INSTR o SINK +end + diff --git a/MLRISC/mips/instructions/mipsProps.sml b/MLRISC/mips/instructions/mipsProps.sml new file mode 100644 index 0000000..2d846dd --- /dev/null +++ b/MLRISC/mips/instructions/mipsProps.sml @@ -0,0 +1,200 @@ +(* mipsProps.sml + * + * Modified from AlphaProps + * -- Allen + *) + +functor MIPSProps(MIPSInstr:MIPSINSTR) : INSN_PROPERTIES = +struct + structure I = MIPSInstr + structure C = I.C + structure LE = I.LabelExp + + exception NegateConditional + + fun error msg = MLRiscErrorMsg.error ("mipsProps.",msg) + + val zeroR = C.r0 + + datatype kind = IK_JUMP | IK_NOP | IK_INSTR | IK_COPY | IK_CALL + | IK_CALL_WITH_CUTS | IK_PHI | IK_SOURCE | IK_SINK + datatype target = LABELLED of Label.label | FALLTHROUGH | ESCAPES + + (*======================================================================== + * Instruction Kinds + *========================================================================*) + fun instrKind(I.BRANCH _) = IK_JUMP + | instrKind(I.FBRANCH _) = IK_JUMP + | instrKind(I.JR _) = IK_JUMP + | instrKind(I.J _) = IK_JUMP + | instrKind(I.COPY _) = IK_COPY + | instrKind(I.FCOPY _) = IK_COPY + | instrKind(I.JAL{cutsTo=[],...}) = IK_CALL + | instrKind(I.JAL _) = IK_CALL_WITH_CUTS + | instrKind(I.JALR{cutsTo=[],...}) = IK_CALL + | instrKind(I.JALR _) = IK_CALL_WITH_CUTS + | instrKind(I.RET _) = IK_JUMP + | instrKind(I.PHI _) = IK_PHI + | instrKind(I.SOURCE _) = IK_SOURCE + | instrKind(I.SINK _) = IK_SINK + | instrKind(I.ANNOTATION{i,...}) = instrKind i + | instrKind _ = IK_INSTR + + fun moveInstr(I.COPY _) = true + | moveInstr(I.FCOPY _) = true + | moveInstr(I.ANNOTATION{i,...}) = moveInstr i + | moveInstr _ = false + + val nop = fn () => I.ARITH{oper=I.ADD, rs=zeroR, i=I.Reg zeroR, rt=zeroR} + + (*======================================================================== + * Parallel Move + *========================================================================*) + fun moveTmpR(I.COPY{tmp=SOME(I.Direct r), ...}) = SOME r + | moveTmpR(I.FCOPY{tmp=SOME(I.FDirect f), ...}) = SOME f + | moveTmpR(I.ANNOTATION{i,...}) = moveTmpR i + | moveTmpR _ = NONE + + fun moveDstSrc(I.COPY{dst, src, ...}) = (dst, src) + | moveDstSrc(I.FCOPY{dst, src, ...}) = (dst, src) + | moveDstSrc(I.ANNOTATION{i,...}) = moveDstSrc i + | moveDstSrc _ = error "moveDstSrc" + + (*======================================================================== + * Branches and Calls/Returns + *========================================================================*) + fun branchTargets(I.J{lab, ...}) = [LABELLED lab] + | branchTargets(I.BRANCH{lab, ...}) = [LABELLED lab, FALLTHROUGH] + | branchTargets(I.FBRANCH{lab, ...}) = [LABELLED lab, FALLTHROUGH] + | branchTargets(I.JR{labels=[],...}) = [ESCAPES] + | branchTargets(I.JR{labels,...}) = map LABELLED labels + | branchTargets(I.JAL{cutsTo, ...}) = FALLTHROUGH::map LABELLED cutsTo + | branchTargets(I.JALR{cutsTo, ...}) = FALLTHROUGH::map LABELLED cutsTo + | branchTargets(I.RET _) = [ESCAPES] + | branchTargets(I.ANNOTATION{i,...}) = branchTargets i + | branchTargets _ = error "branchTargets" + + fun jump label = I.J{lab=label, nop=true} + + val immedRange = {lo= ~32768, hi = 32768} + fun loadImmed{immed,t} = error "loadImmed" + fun loadOperand{opn,t} = error "loadOperand" + + fun setTargets(I.ANNOTATION{i,a},labs) = + I.ANNOTATION{i=setTargets(i,labs),a=a} + | setTargets(i,_) = i + + fun negateConditional br = let + in + raise NegateConditional + end + + (*======================================================================== + * Equality and hashing for operands + *========================================================================*) + fun hashOpn(I.Reg r) = C.hashCell r + | hashOpn(I.Imm i) = Word.fromInt i + | hashOpn(I.HiLab l) = I.LabelExp.hash l + | hashOpn(I.LoLab l) = I.LabelExp.hash l + | hashOpn(I.Lab l) = I.LabelExp.hash l + + fun eqOpn(I.Reg a,I.Reg b) = C.sameColor(a,b) + | eqOpn(I.Imm a,I.Imm b) = a = b + | eqOpn(I.HiLab a,I.HiLab b) = I.LabelExp.==(a,b) + | eqOpn(I.LoLab a,I.LoLab b) = I.LabelExp.==(a,b) + | eqOpn(I.Lab a,I.Lab b) = I.LabelExp.==(a,b) + | eqOpn _ = false + + (*======================================================================== + * Definition and use (for register allocation mainly) + *========================================================================*) + fun defUseR instr = + let + fun opnd2(rs1, I.Reg rs2) = [rs1,rs2] + | opnd2(rs1, _) = [rs1] + in + case instr of + I.LUI{rt, ...} => ([rt], []) + + (* load/store instructions *) + | I.LOAD{rt, b, ...} => ([rt], [b]) + | I.STORE{rs, b, ...} => ([], [rs, b]) + | I.FLOAD{b, d, ...} => ([], opnd2(b, d)) + | I.FSTORE{b, d, ...} => ([], opnd2(b, d)) + + (* branch+call instructions *) + | I.JR{rs, ...} => ([], [rs]) + | I.JAL{defs, uses, ...} => (C.linkR::C.getReg defs, C.getReg uses) + | I.JALR{rt, rs, defs, uses, ...} => + (rt::C.getReg defs,rs::C.getReg uses) + | I.RET _ => ([], [C.linkR]) + + (* arithmetic *) + | I.TRAP{rs, i, ...} => ([], opnd2(rs, i)) + | I.ARITH{rt, rs, i, ...} => ([rt], opnd2(rs, i)) + | I.UNARY{rt, rs, ...} => ([rt], [rs]) + | I.MULTIPLY{rs, rt, ...} => ([], [rs, rt]) + | I.DIVIDE{rs, rt, ...} => ([], [rs, rt]) + | I.MTLO rs => ([], [rs]) + | I.MTHI rs => ([], [rs]) + | I.MFLO rt => ([rt], []) + | I.MFHI rt => ([rt], []) + + | I.FROUND{rs2, ...} => ([], [rs2]) + | I.CVTI2F{rs, ...} => ([], [rs]) + | I.CVTF2I{rt, ...} => ([rt], []) + + (* copy *) + | I.COPY{dst, src, tmp=SOME(I.Direct r), ...} => (r::dst, src) + | I.COPY{dst, src, ...} => (dst, src) + + | I.ANNOTATION{a=C.DEF_USE{cellkind=C.GP,defs,uses}, i, ...} => + let val (d,u) = defUseR i in (defs@d, u@uses) end + | I.ANNOTATION{a, i, ...} => defUseR i + | _ => ([],[]) + end + + (* Use of FP registers *) + fun defUseF instr = + case instr of + I.FLOAD{ft, ...} => ([ft], []) + | I.FSTORE{fs, ...} => ([], [fs]) + | I.FARITH{fs1, fs2, ft, ...} => ([ft], [fs1, fs2]) + | I.FARITH3{fs1, fs2, fs3, ft, ...} => ([ft], [fs1, fs2, fs3]) + | I.FUNARY{fs, ft, ...} => ([ft], [fs]) + | I.FROUND{ft, fs1, ...} => ([ft], [fs1]) + | I.CVTF2I{fs, ...} => ([], [fs]) + | I.CVTI2F{ft, ...} => ([ft], []) + | I.FCMP{fs1, fs2, ...} => ([], [fs1, fs2]) + | I.FCOPY{dst, src, tmp=SOME(I.FDirect f), ...} => (f::dst, src) + | I.FCOPY{dst, src, ...} => (dst, src) + | I.JALR{defs,uses, ...} => (C.getFreg defs,C.getFreg uses) + | I.JAL{defs,uses, ...} => (C.getFreg defs,C.getFreg uses) + | I.ANNOTATION{a=C.DEF_USE{cellkind=C.FP,defs,uses}, i, ...} => + let val (d,u) = defUseF i in (defs@d, u@uses) end + | I.ANNOTATION{a, i, ...} => defUseF i + | _ => ([],[]) + + fun defUse C.GP = defUseR + | defUse C.FP = defUseF + | defUse _ = error "defUse" + + (*======================================================================= + * Annotations + *=======================================================================*) + fun getAnnotations(I.ANNOTATION{i,a}) = + let val (i,an) = getAnnotations i in (i,a::an) end + | getAnnotations i = (i,[]) + fun annotate(i,a) = I.ANNOTATION{i=i,a=a} + + (*======================================================================== + * Replicate an instruction + *========================================================================*) + fun replicate(I.ANNOTATION{i,a}) = I.ANNOTATION{i=replicate i,a=a} + | replicate(I.COPY{tmp=SOME _, dst, src, impl}) = + I.COPY{tmp=SOME(I.Direct(C.newReg())), dst=dst, src=src, impl=ref NONE} + | replicate(I.FCOPY{tmp=SOME _, dst, src, impl}) = + I.FCOPY{tmp=SOME(I.FDirect(C.newFreg())), + dst=dst, src=src, impl=ref NONE} + | replicate i = i +end diff --git a/MLRISC/mips/instructions/mipsShuffle.sig b/MLRISC/mips/instructions/mipsShuffle.sig new file mode 100644 index 0000000..fa481dd --- /dev/null +++ b/MLRISC/mips/instructions/mipsShuffle.sig @@ -0,0 +1,10 @@ +(* mipsShuffle.sig -- shuffle src registers into destination registers *) + +signature MIPSSHUFFLE = sig + structure I : MIPSINSTR + + type t = {tmp:I.ea option,dst:I.C.cell list, src:I.C.cell list} + + val shuffle : t -> I.instruction list + val shufflefp : t -> I.instruction list +end diff --git a/MLRISC/mips/instructions/mipsShuffle.sml b/MLRISC/mips/instructions/mipsShuffle.sml new file mode 100644 index 0000000..5caf350 --- /dev/null +++ b/MLRISC/mips/instructions/mipsShuffle.sml @@ -0,0 +1,29 @@ +functor MIPSShuffle(I:MIPSINSTR) : MIPSSHUFFLE = struct + structure I = I + structure Shuffle = Shuffle(I) + + type t = {tmp:I.ea option, dst:I.C.cell list, src:I.C.cell list} + + val mem=I.Region.memory + + val zeroR = I.Reg I.C.r0 + + fun move{src=I.Direct rs, dst=I.Direct rt} = + [I.ARITH{oper=I.ADDU, rs=rs, i=zeroR, rt=rt}] + | move{src=I.Direct rs, dst=I.Displace{base, disp}} = + [I.STORE{s=I.SW, rs=rs, b=base, d=I.Imm disp, mem=mem}] + | move{src=I.Displace{base, disp}, dst=I.Direct rt} = + [I.LOAD{l=I.LW, rt=rt, b=base, d=I.Imm disp, mem=mem}] + + fun fmove{src=I.FDirect fs, dst=I.FDirect ft} = + [I.FUNARY{oper=I.MOV_D, fs=fs, ft=ft}] + | fmove{src=I.FDirect fs, dst=I.Displace{base, disp}} = + [I.FSTORE{s=I.SDC1, fs=fs, b=base, d=I.Imm disp, mem=mem}] + | fmove{src=I.Displace{base, disp}, dst=I.FDirect ft} = + [I.FLOAD{l=I.LDC1, ft=ft, b=base, d=I.Imm disp, mem=mem}] + + val shuffle = Shuffle.shuffle {mvInstr=move, ea=I.Direct} + + val shufflefp = Shuffle.shuffle {mvInstr=fmove, ea=I.FDirect} +end + diff --git a/MLRISC/mips/mips.mdl b/MLRISC/mips/mips.mdl new file mode 100644 index 0000000..2bbc546 --- /dev/null +++ b/MLRISC/mips/mips.mdl @@ -0,0 +1,471 @@ +(* + * MIPS IV architecture. + * + * Note: information herein is derived from the documents + * ``MIPSpro Assembly Language Programmer's Guide'' + * Document Number 007-2318-002 and + * + * MIPS R4000 Microprocessor User's Manual, Second Edition + * + * MIPS IV Instruction Set Revision 3.2 Sept, 1995, Charles Price + * + * Basically, the differences between MIPS I, II, III, IV are: + * + * MIPS I: old 32-bit architecture. I think this + one has delay loads with no interlock(?) + * MIPS II: some 64-bit operations (32-bit address space?) + * MIPS III: 64-bit CPU with unsigned word loads + * MIPS IV: register + register addressing mode for FPU + * + * We will support MIPS III and MIPS IV in MLRISC, and also provide + * support for 32-bit mode. But MIPS I and MIPS II will not be supported. + * + * -- Allen (leunga@cs.nyu.edu) + *) +architecture MIPS = +struct + + superscalar + + little endian (* is this right??? *) + + lowercase assembly + + storage + GP = $r[32] of 64 bits where $r[0] = 0 + asm: (fn (1,_) => "$at" (* assembler temporary *) + | (28,_) => "$gp" (* global pointer *) + | (29,_) => "$sp" (* stack pointer *) + | (30,_) => "$fp" (* frame pointer *) + | (r,_) => "$"^Int.toString r) + | FP = $f[32] of 64 bits asm: (fn (f,_) => "$f"^Int.toString f) + | CC = $cc[] of 64 bits aliasing GP asm: (fn (r,_) => "$"^Int.toString r) + + (* condition code register *) + | COND = $cond[8] of 64 bits asm: (fn (r,_) => Int.toString r) + | HI = $hi[1] of 64 bits asm: "$hi" + | LO = $lo[1] of 64 bits asm: "$lo" + | MEM = $m[] of 8 aggregable bits asm: (fn (r,_) => "m"^Int.toString r) + | CTRL = $ctrl[] asm: (fn (r,_) => "ctrl"^Int.toString r) + + locations + stackptrR = $r[29] + and linkR = $r[31] (* link address from JAL *) + and frameptrR = $r[30] + and globalptrR = $r[28] + and asmTmpR = $r[1] + and fasmTmp = $f[30] + and r0 = $r[0] + + (* Note on MIPS terminologies + * + * B byte - 8 bits + * H halfword - 16 bits + * W word - 32 bits + * D double - 64 bits + *) + structure RTL = + struct + include "Tools/basis.mdl" + open Basis + + fun disp(b,d) = $r[b] + d + fun byte x = (x : #8 bits) + fun half x = (x : #16 bits) + fun word x = (x : #32 bits) + fun dword x = (x : #64 bits) + + rtl LB{rt,b,d,mem} = $r[rt] := sx(byte $m[disp(b,d):mem]) + rtl LBU{rt,b,d,mem} = $r[rt] := zx(byte $m[disp(b,d):mem]) + rtl LH{rt,b,d,mem} = $r[rt] := sx(half $m[disp(b,d):mem]) + rtl LHU{rt,b,d,mem} = $r[rt] := zx(half $m[disp(b,d):mem]) + rtl LW{rt,b,d,mem} = $r[rt] := sx(word $m[disp(b,d):mem]) + rtl LD{rt,b,d,mem} = $r[rt] := dword $m[disp(b,d):mem] + + end + + structure Instruction = + struct + + (* L[BWH] are sign extended by default *) + datatype load! = LD | LW | LH | LHU | LB | LBU + | LWL | LWR | LWU | LDL | LDR + | ULH | ULHU | ULW | ULD (* unaligned *) + + datatype store! = SD | SW | SH | SB + | SWL | SWR | SDL | SDR + | USH | USW | USD + + datatype fload! = LDC1 | LWC1 + datatype fstore! = SDC1 | SWC1 + + datatype fcond = + FF "f" | FUN "un" | FEQ "eq" | FUEQ "fueq" + | FOLT "folt" | FULT "ult" | FOLE "ole" | FULE "ule" + | FNGLE "ngle" | FSP "sf" | FNGL "ngl" | FSEQ "seq" + | FLT "flt" | FNGE "fnge" | FLE "le" | FNGT "ngt" + + datatype cond! = + EQ | NE | LEZ | GTZ | LTZ | GEZ + + datatype fbranch! = BC1T (* true *) | BC1F (* false *) + + datatype likely = LIKELY "L" | UNLIKELY "" + + (* + * Note, ADD may raise overflow exception + * ADDU is the non-trapping version. + * Same with other operators such as DADD, SUB, DSUB etc. + * + * Instructions that may take 16-bit immediate operands: + * ADDI, ADDIU, SLTI, SLTIU, ANDI, + * ORI, XORI, LUI, DADDI, DADDIU + * + * The immediate operands are unsigned in ORI, ANDI, XORI + *) + datatype arith! = ADD | ADDU | AND | XOR | MUL + | MULO | MULOU | NOR | OR + | SEQ | SGT | SGE | SGEU | SGTU | SLT | SLE + | SLEU | SLTU | SNE | SUB | SUBU | REM | REMU + | SRA | SLL | SRL | ROR | ROL + | MOVN | MOVZ (* conditional moves *) + + (* 64-bit operations from MIPS III *) + | DADD | DADDU | DMUL | DMULO | DMULOU + | DSUB | DSUBU | DREM | DREMU + | DROL | DROR + | DSLL | DSLL32 | DSLLV + | DSRA | DSRA32 | DSRAV + | DSRL | DSRL32 | DSRLV + + datatype unary! = ABS | NEG | NEGU | NOT + | DABS | DNEG | DNEGU + + datatype multiply! = MULT | MULTU | DMULT | DMULTU + + datatype divide! = DIV | DIVU | DDIV | DDIVU + + datatype trap! = TEQ | TNE | TLT | TLTU | TGE | TGEU + + datatype farith! = ADD_D "add.d" | ADD_S "add.s" + | SUB_D "sub.d" | SUB_S "sub.s" + | MUL_D "mul.d" | MUL_S "mul.s" + | DIV_D "div.d" | DIV_S "div.s" + + datatype funary! = MOV_D "mov.d" | MOV_S "mov.s" + | ABS_D "abs.d" | ABS_S "abs.s" + | NEG_D "neg.d" | NEG_S "neg.s" + | SQRT_D "sqrt.d" | SQRT_S "sqrt.s" + + | CVT_SD "cvt.s.d" (* S <- D *) + | CVT_SW "cvt.s.w" + | CVT_DS "cvt.d.s" + | CVT_DW "cvt.d.w" + | CVT_WS "cvt.w.s" + | CVT_WD "cvt.w.d" + | CVT_SL "cvt.s.l" + | CVT_DL "cvt.d.l" + | CVT_LS "cvt.l.s" + | CVT_LD "cvt.l.d" + + datatype cvti2f! = MTC1 | DMTC1 + datatype cvtf2i! = MFC1 | DMFC1 + + (* multiply and add/subtract *) + datatype farith3! = MADD_D "madd.d" | MADD_S "madd.s" + | NMADD_D "nmadd.d" | NMADD_S "nmadd.s" + | MSUB_D "msub.d" | MSUB_S "msub.s" + | NMSUB_D "nmsub.d" | NMSUB_S "nmsub.s" + + (* truncate and rounding *) + datatype fround! = TRUNC_WS "trunc.w.s" + | TRUNC_WD "trunc.w.d" + | ROUND_WS "round.w.d" + | ROUND_WD "round.w.d" + | CEIL_WD "ceil.w.d" + | CEIL_WS "ceil.w.s" + | CEILU_WD "ceilu.w.d" + | CEILU_WS "ceilu.w.s" + | FLOOR_WD "floor.w.d" + | FLOOR_WS "floor.w.s" + | FLOORU_WD "flooru.w.d" + | FLOORU_WS "flooru.w.s" + | ROUNDU_WD "roundu.w.d" + | ROUNDU_WS "roundu.w.s" + | TRUNCU_WD "truncu.w.d" + | TRUNCU_WS "truncu.w.s" + | TRUNC_LS "trunc.l.s" + | TRUNC_LD "trunc.l.d" + | ROUND_LS "round.l.s" + | ROUND_LD "round.l.d" + | CEIL_LS "ceil.l.s" + | CEIL_LD "ceil.l.d" + | FLOOR_LS "floor.l.s" + | FLOOR_LD "floor.l.d" + + datatype fmt = SINGLE "S" | DOUBLE "D" + + datatype operand! = Imm of int ``'' rtl: immed int (* 16 bits *) + | Reg of $GP ``'' rtl: $r[GP] + | Lab of T.labexp ``'' + | HiLab of T.labexp ``$hi()'' + | LoLab of T.labexp ``$lo()'' + + datatype ea = Direct of $GP + | FDirect of $FP + | Displace of {base: $GP, disp:int} + + type addressing_mode = C.cell * operand + + end (* Instruction *) + + (* + * MIPS instructions are all 32-bits. + * Address offsets in base+disp is 16 bits. + *) + instruction formats 32 bits + Load{l:6, rt: $GP 5, b: $GP 5, offset:signed 16} + + | Special{_:6=0, rs: $GP 5, rt: $GP 5, _:10=0, opc:6} + + + (* + * Assembly output helper functions + *) + structure Assembly = + struct + (* Add the i suffix for immediate operands of arithmetic instructions + * For example: + * ADD rt, rs1, rs2 + * ADDI rt, rs, 10 + * ADDU rt, rs1, rs2 + * ADDIU rt, rs, 10 + *) + fun immedSuffix(s, I.Reg _) = s + | immedSuffix(s, _) = + let val n = String.size s + in case String.sub(s, n-1) of + #"u" => String.substring(s, 0, n-1)^"iu" + | _ => s^"i" + end + + (* LDC1 -> LDXC1 when using the indexed addressing mode *) + fun indexed(s, I.Reg _) = + let val prefix = String.substring(s, 0, 2) + val suffix = String.substring(s, 2, 4) + in prefix^"x"^suffix end + | indexed(s, _) = s + + (* Emit nop at delay slot *) + fun emit_nop false = () | emit_nop true = emit "\n\tnop" + end (* Asm *) + + (* + * Reservation tables and pipeline definitions for scheduling + *) + + (* Function units *) + resource mem and alu and falu and fmul and fdiv and branch + + (* Different implementations of cpus *) + cpu default 4 [1 mem, 2 alu, 2 falu, 2 fmul, 1 fdiv, 1 branch] + + (* Definitions of various reservation tables *) + pipeline NOP _ = [] + and ARITH _ = [alu] + and LOAD _ = [mem] + and STORE _ = [mem,mem,mem] + and FARITH _ = [falu] + and FMUL _ = [fmul,fmul] + and FDIV _ = [fdiv,fdiv*50] + and BRANCH _ = [branch] + + instruction + NOP + ``nop'' + + (* + * Load upper immediate + *) + | LUI of {rt: $GP, imm:operand} + ``lui\t, '' + + (* + * Load address + *) + | LA of {rt: $GP, b: $GP, d:operand} + ``la\t, , '' + + | DLA of {rt: $GP, b: $GP, d:operand} + ``dla\t, , '' + + (* + * load and store instructions: + *) + | LOAD of {l:load, rt: $GP, b: $GP, d:operand, mem:Region.region} + asm: ``\t, ()'' + rtl: ``'' + + | STORE of {s:store, rs: $GP, b: $GP, d:operand, mem:Region.region} + ``\t, ()'' + + | FLOAD of {l:fload, ft: $FP, b: $GP, d:operand, mem:Region.region} + ``\t, ()'' + + | FSTORE of {s:fstore, fs: $GP, b: $GP, d:operand, mem:Region.region} + ``\t, ()'' + + (* + * compare instructions: + *) + | FCMP of {fcond:fcond, fmt:fmt, cc: $COND, fs1: $FP, fs2: $FP} + ``c..\t, , '' + + (* + * Integer trapping + *) + | TRAP of {t:trap, rs: $GP, i:operand} + ``\t, '' + + (* + * Branch instructions. + * All branch instruction have delayslots. + * We represent them as complex instructions with optional nops attached + *) + + (* jump; branch delay *) + | J of {lab:Label.label, nop:bool} + asm: ``j\t'' + padding: nop + delayslot: true + delayslot candidate: false + + (* jump register; branch delay *) + | JR of {rs: $GP, labels:Label.label list, nop:bool} + asm: ``jr\t'' + padding: nop + delayslot: true + delayslot candidate: false + + (* jump and link, set $r31 <- PC + 8; branch delay *) + | JAL of {lab:Label.label, defs:C.cellset, uses: C.cellset, + cutsTo: Label.label list, mem:Region.region, nop:bool} + asm: ``jal\t< + emit_defs(defs)>'' + padding: nop + delayslot: true + delayslot candidate: false + + (* jump and link register, set $rt <- PC + 8; branch delay *) + | JALR of {rt: $GP, rs: $GP, + defs:C.cellset, uses: C.cellset, + cutsTo: Label.label list, mem:Region.region, nop:bool} + asm: ``jalr\t, < + emit_defs(defs)>'' + padding: nop + delayslot: true + delayslot candidate: false + + (* pseudo op for return; branch delay *) + | RET of {nop:bool} + asm: ``jr\t$31'' + padding: nop + delayslot: true + delayslot candidate: false + + (* Branch; comparing rs and rt *) + | BRANCH of {likely:likely, cond:cond, rs: $GP, rt: $GP, + lab:Label.label, nop:bool} + asm: ``b\t, , '' + padding: nop + delayslot: true + delayslot candidate: false + + (* Note: on MIPS II, III there must be at least one instruction + * between the set condition code instruction and the branch *) + | FBRANCH of {likely:likely, fbranch:fbranch, cc: $COND, lab:Label.label, + nop:bool} + asm: ``\t, '' + padding: nop + delayslot: true + delayslot candidate: false + + (* + * Arithmetic instructions: + * arguments are (rt,rs,rt/immed) with the exception of sub (sigh). + *) + | ARITH of {oper:arith, rt: $GP, rs: $GP, i:operand} + ``\t, , '' + + | UNARY of {oper:unary, rt: $GP, rs: $GP} + ``\t, '' + + (* + * integer mult and div related: + *) + | MULTIPLY of {oper:multiply, rt: $GP, rs: $GP} + ``\t, '' + + | DIVIDE of {oper:divide, rt: $GP, rs: $GP} + ``\t, '' + + | MFLO of $GP + ``mflo\t'' + + | MTLO of $GP + ``mtlo\t'' + + | MFHI of $GP + ``mfhi\t'' + + | MTHI of $GP + ``mthi\t'' + + | BREAK of int + ``break\t'' + + (* + * Floating point arithmetic: + *) + | FARITH of {oper:farith, ft: $FP, fs1: $FP, fs2: $FP} + ``\t, , '' + + | FUNARY of {oper:funary, ft: $FP, fs: $FP} + ``\t, '' + + | FARITH3 of {oper:farith3, ft: $FP, fs1: $FP, fs2: $FP, fs3: $FP} + ``\t, , , '' + + | FROUND of {oper:fround, ft: $FP, fs1: $FP, rs2: $GP} + ``\t, , , '' + + | CVTI2F of {cvt:cvti2f, rs: $GP, ft: $FP} + ``\t, '' + + | CVTF2I of {cvt:cvtf2i, fs: $FP, rt: $GP} + ``\t, '' + + | COPY of { dst: $GP list, src: $GP list, + impl:instruction list option ref, tmp:ea option} + asm: emitInstrs (Shuffle.shuffle{tmp,src,dst}) + + | FCOPY of { dst: $FP list, src: $FP list, + impl:instruction list option ref, tmp:ea option} + asm: emitInstrs (Shuffle.shufflefp{tmp,src,dst}) + + | ANNOTATION of {i:instruction, a:Annotations.annotation} + asm: (comment(Annotations.toString a); nl(); emitInstr i) + mc: (emitInstr i) + + | PHI of {} + asm: ``phi'' + mc: () + + | SOURCE of {} + asm: ``source'' + mc: () + + | SINK of {} + asm: ``sink'' + mc: () +end diff --git a/MLRISC/mips/mltree/mips.sml b/MLRISC/mips/mltree/mips.sml new file mode 100644 index 0000000..f99e2f0 --- /dev/null +++ b/MLRISC/mips/mltree/mips.sml @@ -0,0 +1,512 @@ +(* + * This is a revamping of the MIPS32 instruction selection module + * using the new MLTREE and instruction representation. I've dropped + * the suffix 32 since we now support 64 bit datatypes. + * + * o How to simulate 32-bit in 64-mode + * All 32-bit values are sign extended to 64-bits. + * The working is similar to the Alpha and the Sparc architecture. + * o I'm using the native multiplication instructions for even simple + * multiply with a constant. Too lazy to add the multiply module for now. + * + * -- Allen + *) + +functor MIPS + (structure MIPSInstr : MIPSINSTR + structure PseudoInstrs : MIPS_PSEUDO_INSTR + structure ExtensionComp : MLTREE_EXTENSION_COMP + where I = MIPSInstr + sharing PseudoInstrs.I = MIPSInstr + + (* + * MIPS architecture version + *) + datatype mipsVersion = I | II | III | IV + val mipsVersion : mipsVersion ref + ) : MLTREECOMP = +struct + + structure I = MIPSInstr + structure T = I.T + structure S = T.Stream + structure R = T.Region + structure C = MIPSInstr.C + structure LE = I.LabelExp + structure P = PseudoInstrs + structure A = MLRiscAnnotations + + fun error msg = MLRiscErrorMsg.error("MIPS",msg) + + type instrStream = (I.instruction,C.cellset) T.stream + type mltreeStream = (T.stm,T.mlrisc list) T.stream + + (* + * This module is used to simulate operations of non-standard widths. + *) + structure Gen = MLTreeGen(structure T = T + val intTy = 64 + val naturalWidths = [32,64] + datatype rep = SE | ZE | NEITHER + val rep = SE (* sign extended? XXX *) + ) + + val zeroR = C.r0 + val zeroOpnd = I.Reg zeroR + val zeroImm = I.Imm 0 + val zero = IntInf.fromInt 0 + fun toInt i = T.I.toInt(32, i) + + fun selectInstructions + (instrStream as + S.STREAM{emit,beginCluster,endCluster,getAnnotations, + defineLabel,entryLabel,pseudoOp,annotation, + exitBlock,comment,...}) = + let + (* jmp+label were a trap is generated -- one per cluster *) + val trapLabel = ref (NONE : (I.instruction * Label.label) option) + + (* Add an overflow trap *) + fun trap() = () + + val newReg = C.newReg + val newFreg = C.newFreg + + fun mark'(i,[]) = i + | mark'(i,a::an) = mark'(I.ANNOTATION{i=i,a=a},an) + fun mark(i,an) = emit(mark'(i,an)) + + fun move(s,d,an) = + if C.sameCell(s,d) orelse C.sameCell(d,zeroR) then () else + mark(I.COPY{dst=[d],src=[s],impl=ref NONE,tmp=NONE},an) + + fun fmove(s,d,an) = + if C.sameCell(s,d) then () else + mark(I.FCOPY{dst=[d],src=[s],impl=ref NONE,tmp=NONE},an) + + (* emit a copy *) + fun copy(dst,src,an) = + mark(I.COPY{dst=dst,src=src,impl=ref NONE, + tmp=case dst of + [_] => NONE | _ => SOME(I.Direct(newReg()))},an) + + (* emit a floating point copy *) + fun fcopy(dst,src,an) = + mark(I.FCOPY{dst=dst,src=src,impl=ref NONE, + tmp=case dst of + [_] => NONE | _ => SOME(I.FDirect(newFreg()))},an) + + (* emit load label expression *) + fun loadLabexp(le,d,an) = + mark(I.ARITH{oper=I.ADDU,rt=d,rs=zeroR,i=I.Lab le},an) + + (* emit load immediate *) + fun loadImmed(n,d,an) = error "loadImmed" + + (* generate an expression and return the register that holds the result *) + fun expr(T.REG(_,r)) = r + | expr(e as T.LI i) = if i = zero then zeroR else expr' e + | expr e = expr' e + and expr' e = let val r = newReg() in doExpr(e,r,[]); r end + + (* convert an operand into a register *) + and reduceOpn(I.Reg r) = r + | reduceOpn(I.Imm 0) = zeroR + | reduceOpn opn = + let val d = newReg() + in emit(I.ARITH{oper=I.ADDU,rt=d,rs=zeroR,i=opn}); d end (* XXX *) + + (* generate an operand *) + and opn(T.REG(_,r)) = I.Reg r + | opn(T.LI i) = if i = zero then zeroOpnd else error "opn" + | opn(T.LABEXP le) = I.Lab le + | opn e = I.Reg(expr e) + + (* compute base+displacement from an expression *) + and addr exp = + let fun toLexp(I.Imm i) = T.LI(IntInf.fromInt i) + | toLexp(I.Lab le) = le + | toLexp _ = error "addr.toLexp" + + fun add(t,n,I.Imm m) = + I.Imm(toInt(T.I.ADD(t,n,IntInf.fromInt m))) + | add(t,n,I.Lab le) = I.Lab(T.ADD(t,T.LI n,le)) + | add(t,n,_) = error "addr.add" + + fun addLe(ty,le,I.Imm 0) = I.Lab le + | addLe(ty,le,disp) = I.Lab(T.ADD(ty,le,toLexp disp)) + + fun sub(t,n,I.Imm m) = + I.Imm(toInt(T.I.SUB(t,IntInf.fromInt m,n))) + | sub(t,n,I.Lab le) = I.Lab(T.SUB(t,le,T.LI n)) + | sub(t,n,_) = error "addr.sub" + + fun subLe(ty,le,I.Imm 0) = I.Lab le + | subLe(ty,le,disp) = I.Lab(T.SUB(ty,le,toLexp disp)) + + (* Should really take into account of the address width XXX *) + fun fold(T.ADD(t,e,T.LI n),disp) = fold(e,add(t,n,disp)) + | fold(T.ADD(t,e,x as T.CONST _),disp) = fold(e,addLe(t,x,disp)) | fold(T.ADD(t,e,x as T.LABEL _),disp) = fold(e,addLe(t,x,disp)) | fold(T.ADD(t,e,T.LABEXP l),disp) = fold(e,addLe(t,l,disp)) + | fold(T.ADD(t,T.LI n,e),disp) = fold(e, add(t,n,disp)) + | fold(T.ADD(t,x as T.CONST _,e),disp) = fold(e,addLe(t,x,disp)) | fold(T.ADD(t,x as T.LABEL _,e),disp) = fold(e,addLe(t,x,disp)) | fold(T.ADD(t,T.LABEXP l,e),disp) = fold(e,addLe(t,l,disp)) + | fold(T.SUB(t,e,T.LI n),disp) = fold(e,sub(t,n,disp)) + | fold(T.SUB(t,e,x as T.CONST _),disp) = fold(e,subLe(t,x,disp)) | fold(T.SUB(t,e,x as T.LABEL _),disp) = fold(e,subLe(t,x,disp)) | fold(T.SUB(t,e,T.LABEXP l),disp) = fold(e,subLe(t,l,disp)) + | fold(e,disp) = (expr e,disp) + + in fold(exp, zeroImm) + end + + (* compute addressing mode for floating point. + * In MIPS IV mode we also support register+register mode. + *) + and faddr exp = + case !mipsVersion of + IV => + (case exp of + T.ADD(_,T.REG(_,b),T.REG(_,i)) => (b, I.Reg i) + | _ => addr exp + ) + | _ => addr exp + + (* generate an arithmetic operator *) + and arith(oper,a,b,d,an) = + mark(I.ARITH{oper=oper,rt=d,rs=expr a,i=I.Reg(expr b)},an) + + (* generate a commutative arithmetic operator + * that can take an immediate operand + *) + and commarithi(oper,a,b,d,an) = + let val (a, b) = + case b of + (T.LI _ | T.CONST _ | T.LABEXP _ | T.LABEL _) => (b, a) + | _ => (a, b) + in mark(I.ARITH{oper=oper,rt=d,rs=expr a,i=opn b},an) + end + + (* generate an unary arithmetic operator *) + and unary(oper,a,d,an) = + mark(I.UNARY{oper=oper,rt=d,rs=expr a},an) + + (* generate a load *) + and load(ld,ea,rt,mem,an) = + let val (base,offset) = addr ea + in mark(I.LOAD{l=ld,rt=rt,b=base,d=offset,mem=mem},an) end + + (* generate a store *) + and store(st,ea,data,mem,an) = + let val (base,offset) = addr ea + in mark(I.STORE{s=st,rs=expr data,b=base,d=offset,mem=mem},an) end + + (* generate multiply. + * Note: low order result is in the LO register + *) + and multiply(oper,a,b,d,an) = + (mark(I.MULTIPLY{oper=oper,rs=expr a,rt=expr b},an); + emit(I.MFLO d) + ) + + (* generate divide + * Note: quotient in LO; remainder is in HI + *) + and divide(oper,a,b,d,an) = + (mark(I.DIVIDE{oper=oper,rs=expr a,rt=expr b},an); + emit(I.MFLO d) + ) + + and rem(oper,a,b,d,an) = + (mark(I.DIVIDE{oper=oper,rs=expr a,rt=expr b},an); + emit(I.MFHI d) + ) + + (* generate an expression that targets register d *) + and doExpr(exp,d,an) = + case exp of + T.REG(_,r) => move(r,d,an) + | T.LI n => loadImmed(n,d,an) + | T.LABEL _ => loadLabexp(exp,d,an) + | T.CONST _ => loadLabexp(exp,d,an) + | T.LABEXP le => loadLabexp(le,d,an) + + (* 32 bit support *) + | T.NEG(32, a) => unary(I.NEGU,a,d,an) + | T.ADD(32, a, b) => commarithi(I.ADDU,a,b,d,an) + | T.SUB(32, a, b) => arith(I.SUBU,a,b,d,an) + | T.MULS(32, a, b) => multiply(I.MULT,a,b,d,an) + | T.MULU(32, a, b) => multiply(I.MULTU,a,b,d,an) + | T.DIVS(32, a, b) => divide(I.DIV,a,b,d,an) + | T.DIVU(32, a, b) => divide(I.DIVU,a,b,d,an) + | T.QUOTS(32, a, b) => error "quots" + | T.REMS(32, a, b) => rem(I.DIV,a,b,d,an) + | T.REMU(32, a, b) => rem(I.DIVU,a,b,d,an) + + | T.NEGT(32, a) => unary(I.NEG,a,d,an) + | T.ADDT(32, a, b) => commarithi(I.ADD,a,b,d,an) + | T.SUBT(32, a, b) => arith(I.SUB,a,b,d,an) + | T.MULT(32, a, b) => error "mult" + | T.DIVT(32, a, b) => error "divt" + | T.QUOTT(32, a, b) => error "quott" + | T.REMT(32, a, b) => error "remt" + + | T.SLL(32, a, b) => arith(I.SLL,a,b,d,an) + | T.SRL(32, a, b) => arith(I.SRL,a,b,d,an) + | T.SRA(32, a, b) => arith(I.SRA,a,b,d,an) + + (* 64 bit support *) + | T.NEG(64, a) => unary(I.DNEGU,a,d,an) + | T.ADD(64, a, b) => arith(I.DADDU,a,b,d,an) + | T.SUB(64, a, b) => arith(I.DSUBU,a,b,d,an) + | T.MULS(64, a, b) => multiply(I.DMULT,a,b,d,an) + | T.MULU(64, a, b) => multiply(I.DMULTU,a,b,d,an) + | T.DIVS(64, a, b) => divide(I.DDIV,a,b,d,an) + | T.DIVU(64, a, b) => divide(I.DDIVU,a,b,d,an) + | T.QUOTS(64, a, b) => error "quots" + | T.REMS(64, a, b) => rem(I.DDIV,a,b,d,an) + | T.REMU(64, a, b) => rem(I.DDIVU,a,b,d,an) + + | T.NEGT(64, a) => unary(I.DNEG,a,d,an) + | T.ADDT(64, a, b) => commarithi(I.DADD,a,b,d,an) + | T.SUBT(64, a, b) => arith(I.DSUB,a,b,d,an) + | T.MULT(64, a, b) => error "mult" + | T.DIVT(64, a, b) => error "divt" + | T.QUOTT(64, a, b) => error "quott" + | T.REMT(64, a, b) => error "remt" + + | T.SLL(64, a, b) => arith(I.DSLL,a,b,d,an) + | T.SRL(64, a, b) => arith(I.DSRL,a,b,d,an) + | T.SRA(64, a, b) => arith(I.DSRA,a,b,d,an) + + (* Bit ops *) + | T.ANDB(_, a, b) => commarithi(I.AND,a,b,d,an) + | T.ORB(_, a, b) => commarithi(I.OR,a,b,d,an) + | T.XORB(_, a, b) => commarithi(I.XOR,a,b,d,an) + + (* Conditional move *) + | T.COND(_, cc, yes, no) => error "cond" + + (* Loads *) + | T.SX(_,_,T.LOAD(8,ea,mem)) => load(I.LB, ea, d, mem, an) + | T.SX(_,_,T.LOAD(16,ea,mem)) => load(I.LH, ea, d, mem, an) + | T.SX(_,_,T.LOAD(32,ea,mem)) => load(I.LW, ea, d, mem, an) + | T.ZX(_,_,T.LOAD(8,ea,mem)) => load(I.LBU, ea, d, mem, an) + | T.ZX(_,_,T.LOAD(16,ea,mem)) => load(I.LHU, ea, d, mem, an) + | T.LOAD(8, ea, mem) => load(I.LBU, ea, d, mem, an) + | T.LOAD(16, ea, mem) => load(I.LHU, ea, d, mem, an) + | T.LOAD(32, ea, mem) => load(I.LW, ea, d, mem, an) + | T.LOAD(64, ea, mem) => load(I.LD, ea, d, mem, an) + + (* Annotations *) + | T.MARK(e, A.MARKREG f) => (f d; doExpr(e, d, an)) + | T.MARK(e, a) => doExpr(e, d, a::an) + + (* Control dependence *) + | T.PRED(e,c) => doExpr(e, d, A.CTRLUSE c::an) + + (* Extension *) + | T.REXT e => ExtensionComp.compileRext (reducer()) {e=e, rd=d, an=an} + + (* Defaults *) + | e => doExpr(Gen.compileRexp e,d,an) + + (* generate a floating point expression + * return the register that holds the result + *) + and fexpr(T.FREG(_,r)) = r + | fexpr e = let val d = newFreg() in doFexpr(e,d,[]); d end + + and farith(oper,a,b,d,an) = + mark(I.FARITH{oper=oper,fs1=fexpr a,fs2=fexpr b,ft=d},an) + and funary(oper,a,d,an) = + mark(I.FUNARY{oper=oper,fs=fexpr a,ft=d},an) + and farith3(oper,a,b,c,d,an) = + mark(I.FARITH3{oper=oper,fs1=fexpr a,fs2=fexpr b,fs3=fexpr c,ft=d},an) + and fload(ld,ea,fd,mem,an) = + let val (base,offset) = faddr ea + in mark(I.FLOAD{l=ld,ft=fd,b=base,d=offset,mem=mem},an) end + and fstore(st,ea,fs,mem,an) = + let val (base,offset) = faddr ea + in mark(I.FSTORE{s=st,fs=fexpr fs,b=base,d=offset,mem=mem},an) end + + (* generate a floating point expression that targets register d *) + and doFexpr(e,d,an) = + case e of + T.FREG(_,f) => fmove(f,d,an) + + (* single precision support *) + | T.FADD(32,a,b) => farith(I.ADD_S,a,b,d,an) + | T.FSUB(32,a,b) => farith(I.SUB_S,a,b,d,an) + | T.FMUL(32,a,b) => farith(I.MUL_S,a,b,d,an) + | T.FDIV(32,a,b) => farith(I.DIV_S,a,b,d,an) + | T.FABS(32,a) => funary(I.ABS_S,a,d,an) + | T.FNEG(32,a) => funary(I.NEG_S,a,d,an) + | T.FSQRT(32,a) => funary(I.SQRT_S,a,d,an) + + (* double precision support *) + | T.FADD(64,a,b) => farith(I.ADD_D,a,b,d,an) + | T.FSUB(64,a,b) => farith(I.SUB_D,a,b,d,an) + | T.FMUL(64,a,b) => farith(I.MUL_D,a,b,d,an) + | T.FDIV(64,a,b) => farith(I.DIV_D,a,b,d,an) + | T.FABS(64,a) => funary(I.ABS_D,a,d,an) + | T.FNEG(64,a) => funary(I.NEG_D,a,d,an) + | T.FSQRT(64,a) => funary(I.SQRT_D,a,d,an) + + (* copy sign *) + | T.FCOPYSIGN _ => error "fcopysign" + + (* loads *) + | T.FLOAD(32,ea,mem) => fload(I.LWC1,ea,d,mem,an) + | T.FLOAD(64,ea,mem) => fload(I.LDC1,ea,d,mem,an) + + (* floating/floating conversion + * Note: it is not necessary to convert single precision + * to double on the alpha. + *) + | T.CVTF2F(to,from,e) => + if from = to then doFexpr(e, d, an) + else + (case (to,from) of + (32,64) => funary(I.CVT_SD,e,d,an) + | (64,32) => funary(I.CVT_DS,e,d,an) (* use normal rounding *) + | _ => error "CVTF2F" + ) + + (* integer -> floating point conversion *) + | T.CVTI2F(fty,ty,e) => error "cvti2f" + + | T.FMARK(e,A.MARKREG f) => (f d; doFexpr(e,d,an)) + | T.FMARK(e,a) => doFexpr(e,d,a::an) + | T.FPRED(e,c) => doFexpr(e, d, A.CTRLUSE c::an) + | T.FEXT e => ExtensionComp.compileFext (reducer()) {e=e, fd=d, an=an} + | _ => error "doFexpr" + + (* generate an unconditional branch *) + and goto(lab,an) = mark(I.J{lab=lab,nop=true},an) + + (* generate an unconditional jump *) + and jmp(e,labels,an) = mark(I.JR{rs=expr e,labels=labels,nop=true},an) + + (* generate a call instruction *) + and call(ea,flow,defs,uses,cutsTo,mem,an) = + let val defs=cellset defs + val uses=cellset uses + val instr = + case ea of + (T.LABEL lab) => + I.JAL{lab=lab,defs=defs,uses=uses,cutsTo=cutsTo, + mem=mem,nop=true} + | _ => I.JALR{rt=C.linkR, rs=expr ea, + defs=defs,uses=uses,cutsTo=cutsTo,mem=mem,nop=true} + in mark(instr,an) + end + + (* generate a return instruction *) + and ret(an) = mark(I.RET{nop=true},an) + + (* generate an branch instruction *) + and branch(e,label,an) = error "branch" + + (* generate a comparison *) + and cmp(ty,cond,e1,e2,d,an) = error "cmp" + + and doCCexpr(T.CC(_,r),d,an) = move(r,d,an) + | doCCexpr(T.FCC(_,r),d,an) = fmove(r,d,an) + | doCCexpr(T.CMP(ty,cond,e1,e2),d,an) = cmp(ty,cond,e1,e2,d,an) + | doCCexpr(T.FCMP(fty,cond,e1,e2),d,an) = error "doCCexpr.fcmp" + | doCCexpr(T.CCMARK(e,A.MARKREG f),d,an) = (f d; doCCexpr(e,d,an)) + | doCCexpr(T.CCMARK(e,a),d,an) = doCCexpr(e,d,a::an) + | doCCexpr(T.CCEXT e,d,an) = + ExtensionComp.compileCCext (reducer()) {e=e, ccd=d, an=an} + | doCCexpr _ = error "doCCexpr" + + and ccExpr(T.CC(_,r)) = r + | ccExpr(T.FCC(_,r)) = r + | ccExpr e = let val d = newReg() + in doCCexpr(e,d,[]); d end + + (* compile a statement *) + and stmt(s,an) = + case s of + T.MV(ty,r,e) => doExpr(e,r,an) + | T.FMV(ty,r,e) => doFexpr(e,r,an) + | T.CCMV(r,e) => doCCexpr(e,r,an) + | T.COPY(ty,dst,src) => copy(dst,src,an) + | T.FCOPY(ty,dst,src) => fcopy(dst,src,an) + | T.JMP(T.LABEL lab,_) => goto(lab,an) + | T.JMP(e,labs) => jmp(e,labs,an) + | T.BCC(cc,lab) => branch(cc,lab,an) + | T.CALL{funct,targets,defs,uses,region} => + call(funct,targets,defs,uses,[],region,an) + | T.FLOW_TO(T.CALL{funct,targets,defs,uses,region},cuts)=> + call(funct,targets,defs,uses,cuts,region,an) + | T.RET _ => ret(an) + | T.STORE(8,ea,data,mem) => store(I.SB,ea,data,mem,an) + | T.STORE(16,ea,data,mem) => store(I.SH,ea,data,mem,an) + | T.STORE(32,ea,data,mem) => store(I.SW,ea,data,mem,an) + | T.STORE(64,ea,data,mem) => store(I.SD,ea,data,mem,an) + | T.FSTORE(32,ea,data,mem) => fstore(I.SWC1,ea,data,mem,an) + | T.FSTORE(64,ea,data,mem) => fstore(I.SDC1,ea,data,mem,an) + | T.DEFINE l => defineLabel l + | T.LIVE S => mark'(I.LIVE{regs=cellset S,spilled=C.empty},an) + | T.KILL S => mark'(I.KILL{regs=cellset S,spilled=C.empty},an) + | T.ANNOTATION(s,a) => stmt(s,a::an) + | T.EXT s => ExtensionComp.compileSext (reducer()) {stm=s,an=an} + | s => doStmts (Gen.compileStm s) + + and reducer() = + T.REDUCER{reduceRexp = expr, + reduceFexp = fexpr, + reduceCCexp = ccExpr, + reduceStm = stmt, + operand = opn, + reduceOperand = reduceOpn, + addressOf = addr, + emit = mark, + instrStream = instrStream, + mltreeStream = self() + } + + and doStmt s = stmt(s,[]) + and doStmts ss = app doStmt ss + + (* convert mlrisc to cellset: + * condition code registers are mapped onto general registers + *) + and cellset mlrisc = + let fun g([],acc) = acc + | g(T.GPR(T.REG(_,r))::regs,acc) = g(regs,C.addReg(r,acc)) + | g(T.FPR(T.FREG(_,f))::regs,acc) = g(regs,C.addFreg(f,acc)) + | g(T.CCR(T.CC(_,cc))::regs,acc) = g(regs,C.addReg(cc,acc)) + | g(T.CCR(T.FCC(_,cc))::regs,acc) = g(regs,C.addReg(cc,acc)) + | g(_::regs, acc) = g(regs, acc) + in g(mlrisc, C.empty) end + + and beginCluster' n = + (trapLabel := NONE; + beginCluster n + ) + + and endCluster' a = + (case !trapLabel of + NONE => () + | SOME(_, lab) => (defineLabel lab) (* XXX *) + (*esac*); + endCluster a + ) + + and self() = + S.STREAM + { beginCluster = beginCluster', + endCluster = endCluster', + emit = doStmt, + pseudoOp = pseudoOp, + defineLabel = defineLabel, + entryLabel = entryLabel, + comment = comment, + annotation = annotation, + getAnnotations = getAnnotations, + exitBlock = fn regs => exitBlock(cellset regs) + } + in self() + end + +end + diff --git a/MLRISC/mips/mltree/mipsPseudoInstr.sig b/MLRISC/mips/mltree/mipsPseudoInstr.sig new file mode 100644 index 0000000..607ab74 --- /dev/null +++ b/MLRISC/mips/mltree/mipsPseudoInstr.sig @@ -0,0 +1,13 @@ +(* mipsPseudoInstr.sig --- mips pseudo instructions *) + +signature MIPS_PSEUDO_INSTR = +sig + structure I : MIPSINSTR + structure T : MLTREE + structure C : MIPSCELLS + sharing C = I.C + sharing I.T = T + + type reduceOpnd = I.operand -> C.cell +end + diff --git a/MLRISC/mips/mltree/mipsRTL.sml b/MLRISC/mips/mltree/mipsRTL.sml new file mode 100644 index 0000000..7142e18 --- /dev/null +++ b/MLRISC/mips/mltree/mipsRTL.sml @@ -0,0 +1,29 @@ +(* + * WARNING: This file was automatically generated by MDLGen (v3.0) + * from the machine description file "mips/mips.mdl". + * DO NOT EDIT this file directly + *) + + +functor MIPSRTL(structure RTL : MLTREE_RTL + structure C : MIPSCELLS + )= +struct + structure T = RTL.T + structure P = struct + end + + fun LB {rt, b, d, mem} = T.ASSIGN (64, T.$ (64, C.GP, rt), T.SX (64, 8, + T.$ (8, C.MEM, T.ADD (64, T.$ (64, C.GP, b), d)))) + fun LBU {rt, b, d, mem} = T.ASSIGN (64, T.$ (64, C.GP, rt), T.ZX (64, 8, + T.$ (8, C.MEM, T.ADD (64, T.$ (64, C.GP, b), d)))) + fun LH {rt, b, d, mem} = T.ASSIGN (64, T.$ (64, C.GP, rt), T.SX (64, 16, + T.$ (8, C.MEM, T.ADD (64, T.$ (64, C.GP, b), d)))) + fun LHU {rt, b, d, mem} = T.ASSIGN (64, T.$ (64, C.GP, rt), T.ZX (64, 16, + T.$ (8, C.MEM, T.ADD (64, T.$ (64, C.GP, b), d)))) + fun LW {rt, b, d, mem} = T.ASSIGN (64, T.$ (64, C.GP, rt), T.SX (64, 32, + T.$ (8, C.MEM, T.ADD (64, T.$ (64, C.GP, b), d)))) + fun LD {rt, b, d, mem} = T.ASSIGN (64, T.$ (64, C.GP, rt), T.$ (8, C.MEM, + T.ADD (64, T.$ (64, C.GP, b), d))) +end + diff --git a/MLRISC/mips/mltree/mipsRTLProps.sml b/MLRISC/mips/mltree/mipsRTLProps.sml new file mode 100644 index 0000000..ab3fc1b --- /dev/null +++ b/MLRISC/mips/mltree/mipsRTLProps.sml @@ -0,0 +1,97 @@ +(* + * WARNING: This file was automatically generated by MDLGen (v3.0) + * from the machine description file "mips/mips.mdl". + * DO NOT EDIT this file directly + *) + + +functor MIPSRTLProps(structure Instr : MIPSINSTR + structure RegionProps : REGION_PROPERTIES + structure RTL : MLTREE_RTL + structure OperandTable : OPERAND_TABLE where I = Instr + structure Asm : INSTRUCTION_EMITTER where I = Instr + sharing Instr.T = RTL.T + ) : RTL_PROPERTIES = +struct + structure I = Instr + structure C = I.C + structure RTL = RTL + structure T = RTL.T + structure OT = OperandTable + + datatype value = CELL of C.cell + | OPERAND of I.operand + + fun error msg = MLRiscErrorMsg.error("MIPSRTLProps",msg) + fun bug(msg,instr) = + let val Asm.S.STREAM{emit, ...} = Asm.makeStream [] + in emit instr; error msg end + structure MIPSRTL = MIPSRTL + (structure RTL = RTL + structure C = C + ) + + structure Arch = struct + local + val TMP0 = {rt=T.PARAM 0, b=T.PARAM 1, d=T.PARAM 0, mem=T.PARAM 0} + in + val LB = RTL.new (MIPSRTL.LB TMP0) + val LBU = RTL.new (MIPSRTL.LBU TMP0) + val LH = RTL.new (MIPSRTL.LH TMP0) + val LHU = RTL.new (MIPSRTL.LHU TMP0) + val LW = RTL.new (MIPSRTL.LW TMP0) + val LD = RTL.new (MIPSRTL.LD TMP0) + end + end + + fun rtl instr = + let fun undefined() = bug("rtl",instr) + fun query (I.LOAD{l, rt, b, d, mem}) = + (case l of + I.LD => Arch.LD + | I.LW => Arch.LW + | I.LH => Arch.LH + | I.LHU => Arch.LHU + | I.LB => Arch.LB + | I.LBU => Arch.LBU + | _ => undefined () + ) + | query _ = undefined () + in query instr + end + fun defUse valueNumberingMethods instr = + let fun undefined() = bug("defUse",instr) + (* methods for computing value numbers *) + val OT.VALUE_NUMBERING + {int, int32, intinf, word, word32, operand, ...} = + valueNumberingMethods + (* methods for type conversion *) + fun get_int(x,L) = CELL(int x)::L + fun get_int32(x,L) = CELL(int32 x)::L + fun get_intinf(x,L) = CELL(intinf x)::L + fun get_word(x,L) = CELL(word x)::L + fun get_word32(x,L) = CELL(word32 x)::L + fun get_cell(x,L) = CELL x::L + fun get_label(x,L) = L + fun get_cellset(x,L) = map CELL (C.CellSet.toCellList x)@L + fun get_operand(x,L) = OPERAND x::L + fun get_int'(x) = [CELL(int x)] + fun get_int32'(x) = [CELL(int32 x)] + fun get_intinf'(x) = [CELL(intinf x)] + fun get_word'(x) = [CELL(word x)] + fun get_word32'(x) = [CELL(word32 x)] + fun get_cell'(x) = [CELL x] + fun get_label'(x) = [] + fun get_cellset'(x) = map CELL (C.CellSet.toCellList x) + fun get_operand'(x) = [OPERAND x] + fun query (I.LOAD{l, rt, b, d, mem}) = + (case l of + (I.LD | I.LW | I.LH | I.LHU | I.LB | I.LBU) => (get_cell (rt, + []), get_operand (d, get_cell (b, []))) + | _ => undefined () + ) + | query _ = undefined () + in query instr + end +end + diff --git a/MLRISC/mips/ra/mipsRewrite.sml b/MLRISC/mips/ra/mipsRewrite.sml new file mode 100644 index 0000000..94ba830 --- /dev/null +++ b/MLRISC/mips/ra/mipsRewrite.sml @@ -0,0 +1,127 @@ +(* mipsRewrite.sml -- rewrite an mips instruction + * + *) + +functor MIPSRewrite(Instr : MIPSINSTR) = +struct + structure I = Instr + structure C = I.C + + fun error msg = MLRiscErrorMsg.error("MipsRewrite",msg) + + fun rewriteUse(instr, rs, rt) = + let fun match r = C.sameColor(r,rs) + fun R r = if match r then rt else r + fun O(i as I.Reg r) = if match r then I.Reg rt else i + | O i = i + in case instr of + I.LOAD{l,rt,b,d,mem} => I.LOAD{l=l,rt=rt,b=R b,d=O d,mem=mem} + | I.STORE{s,rs,b,d,mem} => I.STORE{s=s,rs=R rs,b=R b,d=O d,mem=mem} + | I.FLOAD{l,ft,b,d,mem} => I.FLOAD{l=l,ft=ft,b=R b,d=O d,mem=mem} + | I.FSTORE{s,fs,b,d,mem} => I.FSTORE{s=s,fs=fs,b=R b,d=O d,mem=mem} + | I.FROUND{oper, ft, fs1, rs2} => + I.FROUND{oper=oper, ft=ft, fs1=fs1, rs2=R rs2} + | I.TRAP{t,rs,i} => I.TRAP{t=t,rs=R rs,i=O i} + | I.JR{rs,labels,nop} => I.JR{rs=R rs,labels=labels,nop=nop} + | I.JAL{lab,defs,uses,cutsTo,mem,nop} => + I.JAL{lab=lab,defs=defs,uses=C.CellSet.map {from=rs,to=rt} uses, + cutsTo=cutsTo,mem=mem,nop=nop} + | I.JALR{rt,rs,defs,uses,cutsTo,mem,nop} => + I.JALR{rt=rt,rs=R rs, + defs=defs,uses=C.CellSet.map {from=rs,to=rt} uses, + cutsTo=cutsTo,mem=mem,nop=nop} + | I.BRANCH{likely,cond,rs,rt,lab,nop} => + I.BRANCH{likely=likely,cond=cond,rs=R rs,rt=R rt,lab=lab,nop=nop} + | I.ARITH{oper,rt,rs,i} => I.ARITH{oper=oper,rt=rt,rs=R rs,i=O i} + | I.UNARY{oper,rt,rs} => I.UNARY{oper=oper,rt=rt,rs=R rs} + | I.MULTIPLY{oper,rt,rs} => I.MULTIPLY{oper=oper,rt=R rt,rs=R rs} + | I.DIVIDE{oper,rt,rs} => I.DIVIDE{oper=oper,rt=R rt,rs=R rs} + | I.MTLO rs => I.MTLO(R rs) + | I.MTHI rs => I.MTHI(R rs) + | I.CVTI2F{cvt, rs, ft} => I.CVTI2F{cvt=cvt, rs=R rs, ft=ft} + | I.COPY{src,dst,tmp,impl} => + I.COPY{src=map R src,dst=dst,tmp=tmp,impl=impl} + | I.ANNOTATION{i,a} => I.ANNOTATION{i=rewriteUse(i,rs,rt),a=a} + | _ => instr + end + + fun rewriteDef(instr, rs, rt) = + let fun match r = C.sameColor(r,rs) + fun R r = if match r then rt else r + in case instr of + I.LUI{rt, imm} => I.LUI{rt=R rt, imm=imm} + | I.LOAD{l,rt,b,d,mem} => I.LOAD{l=l,rt=R rt,b=b,d=d,mem=mem} + | I.JAL{lab,defs,uses,cutsTo,mem,nop} => + I.JAL{lab=lab,defs=C.CellSet.map{from=rs,to=rt} defs,uses=uses, + cutsTo=cutsTo,mem=mem,nop=nop} + | I.JALR{rt,rs,defs,uses,cutsTo,mem,nop} => + I.JALR{rt=R rt,rs=rs, + uses=uses,defs=C.CellSet.map {from=rs,to=rt} defs, + cutsTo=cutsTo,mem=mem,nop=nop} + | I.ARITH{oper,rt,rs,i} => I.ARITH{oper=oper,rt=R rt,rs=rs,i=i} + | I.UNARY{oper,rt,rs} => I.UNARY{oper=oper,rt=R rt,rs=rs} + | I.MFLO rt => I.MFLO(R rt) + | I.MFHI rt => I.MFHI(R rt) + | I.CVTF2I{cvt, rt, fs} => I.CVTF2I{cvt=cvt, rt=R rt, fs=fs} + | I.COPY{src,dst,tmp,impl} => + I.COPY{src=src,dst=map R dst,tmp=tmp,impl=impl} + | I.ANNOTATION{i,a} => I.ANNOTATION{i=rewriteUse(i,rs,rt),a=a} + | _ => instr + end + + fun frewriteUse(instr, fs, ft) = + let fun match f = C.sameColor(f,fs) + fun R f = if match f then ft else f + in case instr of + I.FSTORE{s,fs,b,d,mem} => I.FSTORE{s=s,fs=R fs,b=b,d=d,mem=mem} + | I.JAL{lab,defs,uses,cutsTo,mem,nop} => + I.JAL{lab=lab,defs=defs,uses=C.CellSet.map {from=fs,to=ft} uses, + cutsTo=cutsTo,mem=mem,nop=nop} + | I.JALR{rt,rs,defs,uses,cutsTo,mem,nop} => + I.JALR{rt=rt,rs=rs, + defs=defs,uses=C.CellSet.map {from=fs,to=ft} uses, + cutsTo=cutsTo,mem=mem,nop=nop} + | I.FARITH{oper, ft, fs1, fs2} => + I.FARITH{oper=oper, ft=ft, fs1=R fs1, fs2=R fs2} + | I.FUNARY{oper, ft, fs} => I.FUNARY{oper=oper, ft=ft, fs=R fs} + | I.FROUND{oper, ft, fs1, rs2} => + I.FROUND{oper=oper, ft=ft, fs1=R fs1, rs2=rs2} + | I.CVTF2I{cvt, fs, rt} => I.CVTF2I{cvt=cvt, fs=R fs, rt=rt} + | I.FARITH3{oper, ft, fs1, fs2, fs3} => + I.FARITH3{oper=oper, ft=ft, fs1=R fs1, fs2=R fs2, fs3=R fs3} + | I.FCMP{fcond, fmt, cc, fs1, fs2} => + I.FCMP{fcond=fcond, fmt=fmt, cc=cc, fs1=R fs1, fs2=R fs2} + | I.FCOPY{src,dst,tmp,impl} => + I.FCOPY{src=map R src,dst=dst,tmp=tmp,impl=impl} + | I.ANNOTATION{i,a} => I.ANNOTATION{i=frewriteUse(i,fs,ft),a=a} + | _ => instr + end + + fun frewriteDef(instr, fs, ft) = + let fun match f = C.sameColor(f,fs) + fun R f = if match f then ft else f + in case instr of + I.FLOAD{l,ft,b,d,mem} => I.FLOAD{l=l,ft=R ft,b=b,d=d,mem=mem} + | I.JAL{lab,defs,uses,cutsTo,mem,nop} => + I.JAL{lab=lab,uses=uses,defs=C.CellSet.map {from=fs,to=ft} defs, + cutsTo=cutsTo,mem=mem,nop=nop} + | I.JALR{rt,rs,defs,uses,cutsTo,mem,nop} => + I.JALR{rt=rt,rs=rs, + uses=uses,defs=C.CellSet.map {from=fs,to=ft} defs, + cutsTo=cutsTo,mem=mem,nop=nop} + | I.FARITH{oper, ft, fs1, fs2} => + I.FARITH{oper=oper, ft=R ft, fs1=fs1, fs2=fs2} + | I.FUNARY{oper, ft, fs} => I.FUNARY{oper=oper, ft=R ft, fs=fs} + | I.FROUND{oper, ft, fs1, rs2} => + I.FROUND{oper=oper, ft=R ft, fs1=fs1, rs2=rs2} + | I.CVTI2F{cvt, rs, ft} => I.CVTI2F{cvt=cvt, rs=rs, ft=R ft} + | I.FARITH3{oper, ft, fs1, fs2, fs3} => + I.FARITH3{oper=oper, ft=R ft, fs1=fs1, fs2=fs2, fs3=fs3} + | I.FCOPY{src,dst,tmp,impl} => + I.FCOPY{src=src,dst=map R dst,tmp=tmp,impl=impl} + | I.ANNOTATION{i,a} => I.ANNOTATION{i=frewriteDef(i,fs,ft),a=a} + | _ => instr + end + +end + diff --git a/MLRISC/mltree/build-rtl.sig b/MLRISC/mltree/build-rtl.sig new file mode 100644 index 0000000..77effa8 --- /dev/null +++ b/MLRISC/mltree/build-rtl.sig @@ -0,0 +1,128 @@ +(* + * Functions for building an RTL. + *) +signature BUILD_RTL = +sig + structure RTL : MLTREE_RTL + type ty = int + + val map : int -> ('a -> 'b) -> 'a list -> 'b list + + val fetch : ty -> RTL.loc -> RTL.exp + val := : ty -> RTL.loc * RTL.exp -> RTL.action + val aggb : ty * ty -> RTL.cell -> RTL.loc + val aggl : ty * ty -> RTL.cell -> RTL.loc + val idaggr: ty -> RTL.cell -> RTL.loc + val copy : ty -> 'a * 'a -> RTL.action + val ! : ty * string * string -> RTL.exp + val $ : string * ty -> RTL.exp -> RTL.cell + val $$ : string * ty -> RTL.exp * RTL.exp -> RTL.cell + + val intConst : ty -> int -> RTL.exp + val wordConst : ty -> Word32.word -> RTL.exp + + val newOp : string -> RTL.exp list -> RTL.exp + val newCond : string -> RTL.exp list -> RTL.exp + + + val immed : ty -> RTL.exp -> RTL.exp + val operand : ty -> RTL.exp -> RTL.exp + val label : ty -> RTL.exp -> RTL.exp + val forall : ty -> RTL.exp -> RTL.exp + val ? : ty -> RTL.exp + + val not : RTL.cond -> RTL.cond + val False : RTL.cond + val True : RTL.cond + + val sx : ty * ty -> RTL.exp -> RTL.exp + val zx : ty * ty -> RTL.exp -> RTL.exp + val bitslice : ty -> (int * int) list -> RTL.exp -> RTL.exp + + (* Integer operators *) + val ~ : ty -> RTL.exp -> RTL.exp + val + : ty -> RTL.exp * RTL.exp -> RTL.exp + val - : ty -> RTL.exp * RTL.exp -> RTL.exp + val muls : ty -> RTL.exp * RTL.exp -> RTL.exp + val mulu : ty -> RTL.exp * RTL.exp -> RTL.exp + val divs : ty -> RTL.div_rounding_mode * RTL.exp * RTL.exp -> RTL.exp + val divu : ty -> RTL.exp * RTL.exp -> RTL.exp + val rems : ty -> RTL.div_rounding_mode * RTL.exp * RTL.exp -> RTL.exp + val remu : ty -> RTL.exp * RTL.exp -> RTL.exp + + val andb : ty -> RTL.exp * RTL.exp -> RTL.exp + val orb : ty -> RTL.exp * RTL.exp -> RTL.exp + val xorb : ty -> RTL.exp * RTL.exp -> RTL.exp + val eqvb : ty -> RTL.exp * RTL.exp -> RTL.exp + val notb : ty -> RTL.exp -> RTL.exp + val << : ty -> RTL.exp * RTL.exp -> RTL.exp + val >> : ty -> RTL.exp * RTL.exp -> RTL.exp + val ~>> : ty -> RTL.exp * RTL.exp -> RTL.exp + + (* Trapping operators *) + val addt : ty -> RTL.exp * RTL.exp -> RTL.exp + val subt : ty -> RTL.exp * RTL.exp -> RTL.exp + val mult : ty -> RTL.exp * RTL.exp -> RTL.exp + val divt : ty -> RTL.exp * RTL.exp -> RTL.exp + val remt : ty -> RTL.exp * RTL.exp -> RTL.exp + + val cond : ty -> RTL.cond * RTL.exp * RTL.exp -> RTL.exp + + (* Integer comparisons *) + val == : ty -> RTL.exp * RTL.exp -> RTL.cond + val <> : ty -> RTL.exp * RTL.exp -> RTL.cond + val > : ty -> RTL.exp * RTL.exp -> RTL.cond + val < : ty -> RTL.exp * RTL.exp -> RTL.cond + val <= : ty -> RTL.exp * RTL.exp -> RTL.cond + val >= : ty -> RTL.exp * RTL.exp -> RTL.cond + val ltu : ty -> RTL.exp * RTL.exp -> RTL.cond + val leu : ty -> RTL.exp * RTL.exp -> RTL.cond + val gtu : ty -> RTL.exp * RTL.exp -> RTL.cond + val geu : ty -> RTL.exp * RTL.exp -> RTL.cond + + (* Floating point operators *) + val fadd : ty -> RTL.exp * RTL.exp -> RTL.exp + val fsub : ty -> RTL.exp * RTL.exp -> RTL.exp + val fmul : ty -> RTL.exp * RTL.exp -> RTL.exp + val fdiv : ty -> RTL.exp * RTL.exp -> RTL.exp + val fabs : ty -> RTL.exp -> RTL.exp + val fneg : ty -> RTL.exp -> RTL.exp + val fsqrt : ty -> RTL.exp -> RTL.exp + + (* Floating point comparisons *) + val |?| : ty -> RTL.exp * RTL.exp -> RTL.cond + val |!<=>| : ty -> RTL.exp * RTL.exp -> RTL.cond + val |==| : ty -> RTL.exp * RTL.exp -> RTL.cond + val |?=| : ty -> RTL.exp * RTL.exp -> RTL.cond + val |!<>| : ty -> RTL.exp * RTL.exp -> RTL.cond + val |!?>=| : ty -> RTL.exp * RTL.exp -> RTL.cond + val |<| : ty -> RTL.exp * RTL.exp -> RTL.cond + val |?<| : ty -> RTL.exp * RTL.exp -> RTL.cond + val |!>=| : ty -> RTL.exp * RTL.exp -> RTL.cond + val |!?>| : ty -> RTL.exp * RTL.exp -> RTL.cond + val |<=| : ty -> RTL.exp * RTL.exp -> RTL.cond + val |?<=| : ty -> RTL.exp * RTL.exp -> RTL.cond + val |!>| : ty -> RTL.exp * RTL.exp -> RTL.cond + val |!?<=| : ty -> RTL.exp * RTL.exp -> RTL.cond + val |>| : ty -> RTL.exp * RTL.exp -> RTL.cond + val |?>| : ty -> RTL.exp * RTL.exp -> RTL.cond + val |!<=| : ty -> RTL.exp * RTL.exp -> RTL.cond + val |!?<| : ty -> RTL.exp * RTL.exp -> RTL.cond + val |>=| : ty -> RTL.exp * RTL.exp -> RTL.cond + val |?>=| : ty -> RTL.exp * RTL.exp -> RTL.cond + val |!<| : ty -> RTL.exp * RTL.exp -> RTL.cond + val |!?=| : ty -> RTL.exp * RTL.exp -> RTL.cond + val |<>| : ty -> RTL.exp * RTL.exp -> RTL.cond + val |!=| : ty -> RTL.exp * RTL.exp -> RTL.cond + val |!?| : ty -> RTL.exp * RTL.exp -> RTL.cond + val |<=>| : ty -> RTL.exp * RTL.exp -> RTL.cond + val |?<>| : ty -> RTL.exp * RTL.exp -> RTL.cond + + (* Action combinators *) + val || : RTL.action * RTL.action -> RTL.action (* parallel RTL.actions *) + val Nop : RTL.action (* empty RTL.action *) + val Jmp : int -> RTL.exp -> RTL.action (* jump to address *) + val Call : int -> RTL.exp -> RTL.action (* call address *) + val Ret : RTL.action (* return *) + val If : RTL.cond * RTL.action * RTL.action -> RTL.action (* if/then/else *) +end diff --git a/MLRISC/mltree/build-rtl.sml b/MLRISC/mltree/build-rtl.sml new file mode 100644 index 0000000..31ac9c5 --- /dev/null +++ b/MLRISC/mltree/build-rtl.sml @@ -0,0 +1,173 @@ +(* + * This takes a bunch of RTL and build a database that can be reused. + *) +structure BuildRTL : BUILD_RTL = +struct + structure RTL = MLTreeRTL + structure T = RTL.T + + type ty = int + + fun newOper name = ref{name=name,hash=0w0,attribs=0w0} + + fun wordConst ty w = T.LI32(w) + fun intConst ty i = wordConst ty (Word32.fromInt i) + + fun error msg = MLRiscErrorMsg.error("BuildRTL",msg) + + fun fetch ty loc = T.REXT(ty,RTL.FETCH loc) + + val wildCard = newOper "?" + + fun op:= ty (x,y) = T.EXT(RTL.ASSIGN(x,y)) + val noregion = T.LI 0 + fun $ (k,ty) addr = RTL.CELL(k,ty,addr,noregion) + fun $$ (k,ty) (addr,region) = RTL.CELL(k,ty,addr,region) + + fun aggb (t1,t2) cell = RTL.AGG(t2,RTL.BIG_ENDIAN,cell) + fun aggl (t1,t2) cell = RTL.AGG(t2,RTL.LITTLE_ENDIAN,cell) + fun idaggr t cell = RTL.AGG(t,RTL.LITTLE_ENDIAN,cell) + fun copy ty (dst,src) = T.COPY(ty,[],[]) + val dummyTy = 32 + + fun ! (t,x,y) = T.REXT(t,RTL.ARG(x,y)) + + (* Integer operators *) + fun unary f ty x = f(ty,x) + fun binary f ty (x, y) = f(ty,x,y) + fun ternary f ty (x, y, z) = f(ty, x, y, z) + + fun operand ty opn = opn + fun label ty label = label + fun immed ty imm = imm + + datatype kind = GP | FP | CC + + fun newOp name = + let val oper = newOper name + in fn xs => T.REXT(32,RTL.OP(oper,xs)) : RTL.exp + end + + val newCond = newOp + + fun sx (t1,t2) e = T.CVTI2I(t2,T.SIGN_EXTEND,t1,e) + fun zx (t1,t2) e = T.CVTI2I(t2,T.ZERO_EXTEND,t1,e) + fun ? ty = T.REXT(ty,RTL.OP(wildCard,[])) + fun forall t e = T.REXT(t,RTL.FORALL e) + + fun bitslice t2 ranges e = + let val t1 = foldr (fn ((a,b),l) => b-a+1+l) 0 ranges + val r = map (fn (a,b) => {from=T.LI a,to=T.LI b}) ranges + in T.REXT(t1,RTL.SLICE(r,t2,e)) end + + val not = T.NOT + val False = T.FALSE + val True = T.TRUE + + val op + = binary T.ADD + val op - = binary T.SUB + val muls = binary T.MULS + val mulu = binary T.MULU + val divs = ternary T.DIVS + val divu = binary T.DIVU + val rems = ternary T.REMS + val remu = binary T.REMU + fun ~ ty x = (op - ty) (intConst ty 0,x) + + val andb = binary T.ANDB + val orb = binary T.ORB + val xorb = binary T.XORB + val notb = unary T.NOTB + val << = binary T.SLL + val >> = binary T.SRL + val ~>> = binary T.SRA + fun eqvb ty (x,y) = notb ty (xorb ty (x,y)) + + (* Trapping operators *) + val addt = binary T.ADDT + val subt = binary T.SUBT + val mult = binary T.MULT + val divt = binary T.DIVT + val remt = binary T.REMT + + fun cond ty (x,y,z) = T.COND(ty, x, y, z) + + (* Integer comparisons *) + fun cmp cond ty (x,y) = T.CMP(ty,cond,x,y) + + val == = cmp T.EQ + val op <> = cmp T.NE + val op > = cmp T.GT + val op < = cmp T.LT + val op <= = cmp T.LE + val op >= = cmp T.GE + val ltu = cmp T.LTU + val leu = cmp T.LEU + val gtu = cmp T.GTU + val geu = cmp T.GEU + + (* Floating point operators *) + fun funary f = + let val oper = newOper f + in fn ty => fn x => T.REXT(ty,RTL.OP(oper,[x])) + end + fun fbinary f = + let val oper = newOper f + in fn ty => fn (x,y) => T.REXT(ty,RTL.OP(oper,[x, y])) + end + + val fadd = fbinary "FADD" + val fsub = fbinary "FSUB" + val fmul = fbinary "FMUL" + val fdiv = fbinary "FDIV" + val fabs = funary "FABS" + val fneg = funary "FNEG" + val fsqrt = funary "FSQRT" + + (* Floating point comparisons *) + fun fcmp fcond = + let val name = T.Basis.fcondToString fcond + val oper = newOper name + in fn ty => fn (x,y) => + T.CMP(ty,T.NE,T.REXT(ty,RTL.OP(oper,[x,y])),T.LI 0) + end + + val |?| = fcmp T.? + val |!<=>| = fcmp T.!<=> + val |==| = fcmp T.== + val |?=| = fcmp T.?= + val |!<>| = fcmp T.!<> + val |!?>=| = fcmp T.!?>= + val |<| = fcmp T.< + val |?<| = fcmp T.?< + val |!>=| = fcmp T.!>= + val |!?>| = fcmp T.!?> + val |<=| = fcmp T.<= + val |?<=| = fcmp T.?<= + val |!>| = fcmp T.!> + val |!?<=| = fcmp T.!?<= + val |>| = fcmp T.> + val |?>| = fcmp T.?> + val |!<=| = fcmp T.!<= + val |!?<| = fcmp T.!?< + val |>=| = fcmp T.>= + val |?>=| = fcmp T.?>= + val |!<| = fcmp T.!< + val |!?=| = fcmp T.!?= + val |<>| = fcmp T.<> + val |!=| = fcmp T.!= + val |!?| = fcmp T.!? + val |<=>| = fcmp T.<=> + val |?<>| = fcmp T.?<> + + (* Action combinators *) + fun ||(a,b) = T.EXT(RTL.PAR(a,b)) + val Nop = T.SEQ [] + fun Jmp ty e = T.JMP([],e,[]) + fun Call ty e = T.CALL{funct=e,targets=[],defs=[],uses=[], + cdefs=[],cuses=[],region= ~1} + val Ret = T.RET([],[]) + fun If(x,y,z) = T.IF([],x,y,z) + + fun map ty = List.map +end diff --git a/MLRISC/mltree/instr-gen.sig b/MLRISC/mltree/instr-gen.sig new file mode 100644 index 0000000..a46af29 --- /dev/null +++ b/MLRISC/mltree/instr-gen.sig @@ -0,0 +1,21 @@ +(* + * Generate a linear sequence of instructions + *) +signature INSTR_GEN = +sig + structure C : CELLS + structure I : INSTRUCTIONS + structure S : INSTRUCTION_STREAM + structure CFG : CONTROL_FLOW_GRAPH + + sharing I.C = C + sharing CFG.P = S.P + + (* + * This function creates an instruction stream, which can be + * used to emit instruction into the instruction list. + *) + val newStream : I.instruction list ref -> + (I.instruction, Annotations.annotations, 'a, CFG.cfg) S.stream + +end diff --git a/MLRISC/mltree/instr-gen.sml b/MLRISC/mltree/instr-gen.sml new file mode 100644 index 0000000..4b5d6cd --- /dev/null +++ b/MLRISC/mltree/instr-gen.sml @@ -0,0 +1,35 @@ +(* + * Generate a linear sequence of instructions + *) +functor InstrGen + (structure I : INSTRUCTIONS + structure Stream : INSTRUCTION_STREAM + structure CFG : CONTROL_FLOW_GRAPH + where I = I + and P = Stream.P + ) : INSTR_GEN = +struct + structure C = I.C + structure I = I + structure S = Stream + structure CFG = CFG + + (* Pretty stupid, eh? *) + fun newStream(instrs) = + let fun emit i = instrs := i :: !instrs + fun can'tUse _ = MLRiscErrorMsg.error("InstrGen","unimplemented") + in Stream.STREAM + { beginCluster = can'tUse, + endCluster = can'tUse, + emit = emit, + pseudoOp = can'tUse, + defineLabel = can'tUse, + entryLabel = can'tUse, + comment = can'tUse, + annotation = can'tUse, + getAnnotations = can'tUse, + exitBlock = can'tUse + } + end + +end diff --git a/MLRISC/mltree/machine-int.sig b/MLRISC/mltree/machine-int.sig new file mode 100644 index 0000000..b971e54 --- /dev/null +++ b/MLRISC/mltree/machine-int.sig @@ -0,0 +1,116 @@ +(* + * This module implements 2's complement arithmetic of various widths. + *) +signature MACHINE_INT = +sig + + type machine_int = IntInf.int + type sz = int (* width in bits *) + + datatype div_rounding_mode = DIV_TO_ZERO | DIV_TO_NEGINF + + val hash : machine_int -> word + + (* machine_int <-> other types *) + val fromInt : sz * int -> machine_int + val fromInt32 : sz * Int32.int -> machine_int + val fromInt64 : sz * Int64.int -> machine_int + val fromWord : sz * word -> machine_int + val fromWord32 : sz * Word32.word -> machine_int + val fromWord64 : sz * Word64.word -> machine_int + + val toInt : sz * machine_int -> int + val toInt32 : sz * machine_int -> Int32.int + val toInt64 : sz * machine_int -> Int64.int + val toWord : sz * machine_int -> word + val toWord32 : sz * machine_int -> Word32.word + val toWord64 : sz * machine_int -> Word64.word + + val fromString : sz * string -> machine_int option + val toString : sz * machine_int -> string + val toHexString : sz * machine_int -> string + val toBinString : sz * machine_int -> string + + + (* when in doubt, use this to narrow to a given width! *) + val narrow : sz * IntInf.int -> machine_int + + (* convert to signed/unsigned representation *) + val signed : sz * machine_int -> IntInf.int + val unsigned : sz * machine_int -> IntInf.int + + (* Split a machine_int of length sz into words of word sizes. + * The least significant word is at the front of the list + *) + val split : {sz:sz, wordSize:sz, i:machine_int} -> machine_int list + + (* two's complement operators *) + val NEG : sz * machine_int -> machine_int + val ABS : sz * machine_int -> machine_int + val ADD : sz * machine_int * machine_int -> machine_int + val SUB : sz * machine_int * machine_int -> machine_int + val MULS : sz * machine_int * machine_int -> machine_int + val DIVS : div_rounding_mode * + sz * machine_int * machine_int -> machine_int + val REMS : div_rounding_mode * + sz * machine_int * machine_int -> machine_int + + (* unsigned operators *) + val MULU : sz * machine_int * machine_int -> machine_int + val DIVU : sz * machine_int * machine_int -> machine_int +(* + val QUOTU : sz * machine_int * machine_int -> machine_int +*) + val REMU : sz * machine_int * machine_int -> machine_int + + (* Signed, trapping operators, may raise Overflow *) + val ABST : sz * machine_int -> machine_int + val NEGT : sz * machine_int -> machine_int + val ADDT : sz * machine_int * machine_int -> machine_int + val SUBT : sz * machine_int * machine_int -> machine_int + val MULT : sz * machine_int * machine_int -> machine_int + val DIVT : div_rounding_mode * + sz * machine_int * machine_int -> machine_int + + (* bit operators *) + val NOTB : sz * machine_int -> machine_int + val ANDB : sz * machine_int * machine_int -> machine_int + val ORB : sz * machine_int * machine_int -> machine_int + val XORB : sz * machine_int * machine_int -> machine_int + val EQVB : sz * machine_int * machine_int -> machine_int + val SLL : sz * machine_int * machine_int -> machine_int + val SRL : sz * machine_int * machine_int -> machine_int + val SRA : sz * machine_int * machine_int -> machine_int + val BITSLICE : sz * (int * int) list * machine_int -> machine_int + + (* Other useful operators *) + val Sll : sz * machine_int * word -> machine_int + val Srl : sz * machine_int * word -> machine_int + val Sra : sz * machine_int * word -> machine_int + val pow2 : int -> machine_int + val maxOfSize : sz -> machine_int + val minOfSize : sz -> machine_int + val isInRange : sz * machine_int -> bool + + (* Indexing *) + val bitOf : sz * machine_int * int -> word (* 0w0 or 0w1 *) + val byteOf : sz * machine_int * int -> word (* 8 bits *) + val halfOf : sz * machine_int * int -> word (* 16 bits *) + val wordOf : sz * machine_int * int -> Word32.word (* 32 bits *) + + (* type promotion *) + val SX : sz (* to *) * sz (* from *) * machine_int -> machine_int + val ZX : sz (* to *) * sz (* from *) * machine_int -> machine_int + + (* comparisions *) + val EQ : sz * machine_int * machine_int -> bool + val NE : sz * machine_int * machine_int -> bool + val GT : sz * machine_int * machine_int -> bool + val GE : sz * machine_int * machine_int -> bool + val LT : sz * machine_int * machine_int -> bool + val LE : sz * machine_int * machine_int -> bool + val LTU : sz * machine_int * machine_int -> bool + val GTU : sz * machine_int * machine_int -> bool + val LEU : sz * machine_int * machine_int -> bool + val GEU : sz * machine_int * machine_int -> bool +end diff --git a/MLRISC/mltree/machine-int.sml b/MLRISC/mltree/machine-int.sml new file mode 100644 index 0000000..fb4ad2b --- /dev/null +++ b/MLRISC/mltree/machine-int.sml @@ -0,0 +1,245 @@ +(* + * How to evaluate constants for various widths. + * + * Internally, we represent machine_int as a signed integer. + * So when we do bit or unsigned operations we have to convert to + * the unsigned representation first. + * + * Note: this implementation requires andb, orb, xorb etc in IntInf. + * You have to upgrade to the latest version of smlnj-lib if this + * fails to compile. + *) +local + + val maxSz = 65 + +in + +structure MachineInt : MACHINE_INT = +struct + + structure I = IntInf + structure S = String + type machine_int = I.int + type sz = int + + datatype div_rounding_mode = DIV_TO_ZERO | DIV_TO_NEGINF + + val itow = Word.fromInt + + (* Parse hex or binary, but not octal, that's for wussies *) + val hexToInt = StringCvt.scanString (I.scan StringCvt.HEX) + val binToInt = StringCvt.scanString (I.scan StringCvt.BIN) + + (* Precompute some tables for faster arithmetic *) + local + val pow2table = Array.tabulate(maxSz,fn n => I.<<(1,itow n)) (* 2^n *) + val masktable = Array.tabulate(maxSz, + fn n => I.-(I.<<(1,itow n),1)) (* 2^n-1 *) + val maxtable = Array.tabulate(maxSz+1, + fn 0 => 0 + | n => I.-(I.<<(1,itow(n-1)),1)) (* 2^{n-1}-1 *) + val mintable = Array.tabulate(maxSz+1, + fn 0 => 0 + | n => I.~(I.<<(1,itow(n-1)))) (* -2^{n-1} *) + in + + fun pow2 i = if i < maxSz then Array.sub(pow2table, i) + else I.<<(1,itow i) + fun maskOf sz = if sz < maxSz then Array.sub(masktable, sz) + else I.-(I.<<(1,itow sz),1) + fun maxOfSize sz = if sz < maxSz then Array.sub(maxtable, sz) + else I.-(I.<<(1,itow(sz-1)),1) + fun minOfSize sz = if sz < maxSz then Array.sub(mintable, sz) + else I.~(I.<<(1,itow(sz-1))) + end + + (* queries *) + fun isNeg(i) = I.sign i < 0 + fun isPos(i) = I.sign i > 0 + fun isZero(i) = I.sign i = 0 + fun isNonNeg(i) = I.sign i >= 0 + fun isNonPos(i) = I.sign i <= 0 + fun isEven(i) = isZero(I.rem(i,2)) + fun isOdd(i) = not(isEven(i)) + + (* to unsigned representation *) + fun unsigned(sz, i) = if isNeg i then I.+(i, pow2 sz) else i + + (* to signed representation *) + fun signed(sz, i) = if I.>(i, maxOfSize sz) then I.-(i, pow2 sz) else i + + (* Narrow to the representation of a given type *) + fun narrow(sz, i) = signed(sz, I.andb(i, maskOf sz)) + + (* Recognize 0x and 0b prefix and do the right thing *) + fun fromString(sz, s) = + let val n = S.size s + fun conv(i,negate) = + if n >= 2+i andalso S.sub(s, i) = #"0" then + (case S.sub(s, i+1) of + #"x" => (hexToInt (S.substring(s,2+i,n-2-i)), negate) + | #"b" => (binToInt (S.substring(s,2+i,n-2-i)), negate) + | _ => (I.fromString s, false) + ) + else (I.fromString s, false) + val (result, negate) = + if s = "" then (NONE, false) + else if S.sub(s, 0) = #"~" then conv(1, true) + else conv(0, false) + in case (result, negate) of + (SOME n, true) => SOME(narrow(sz, I.~ n)) + | (SOME n, false) => SOME(narrow(sz, n)) + | (NONE, _) => NONE + end + + (* Convert types into IntInf without losing precision. *) + structure Cvt = + struct + structure W = Word + structure W32 = Word32 + val wtoi = W.toIntX + val w32toi = W32.toIntX + val fromInt = I.fromInt + val fromInt32 = Int32.toLarge + val fromInt64 = Int64.toLarge + fun fromWord w = I.fromLarge(Word.toLargeInt w) +(* + fun fromWord32 w = I.+(I.<<(I.fromInt(w32toi(W32.>>(w,0w16))),0w16), + I.fromInt(w32toi(W32.andb(w,0wxffff)))) +*) + fun fromWord32 w = I.fromLarge(Word32.toLargeInt w) + fun fromWord64 w = I.fromLarge(Word64.toLargeInt w) + end + (* machine_int <-> other types *) + fun fromInt(sz,i) = narrow(sz,Cvt.fromInt i) + fun fromInt32(sz,i) = narrow(sz,Cvt.fromInt32 i) + fun fromInt64(sz,i) = narrow(sz,Cvt.fromInt64 i) + fun fromWord(sz,w) = narrow(sz,Cvt.fromWord w) + fun fromWord32(sz,w) = narrow(sz,Cvt.fromWord32 w) + fun fromWord64(sz,w) = narrow(sz,Cvt.fromWord64 w) + fun toString(sz,i) = I.toString i + val toHex = I.fmt StringCvt.HEX + val toBin = I.fmt StringCvt.BIN + fun toHexString(sz, i) = "0x"^toHex(unsigned(sz, i)) + fun toBinString(sz, i) = "0b"^toBin(unsigned(sz, i)) + fun toInt(sz, i) = I.toInt(narrow(sz, i)) + fun toInt32(sz, i) = Int32.fromLarge(narrow(sz, i)) + fun toInt64(sz, i) = Int64.fromLarge(narrow(sz, i)) + fun toWord(sz, i) = Word.fromLargeInt(I.toLarge(unsigned(sz, i))) + fun toWord32(sz, i) = Word32.fromLargeInt(I.toLarge(unsigned(sz, i))) +(* + let val i = unsigned(sz, i) + val lo = I.andb(i,0xffff) + val hi = I.~>>(i,0w16) + fun tow32 i = Word32.fromLargeInt(I.toLarge i) + in tow32 lo + Word32.<<(tow32 hi, 0w16) end +*) + fun toWord64(sz, i) = Word64.fromLargeInt(I.toLarge(unsigned(sz, i))) + + fun hash i = Word.fromInt(I.toInt(I.andb(i,0x1fffffff))) + + fun isInRange(sz, i) = I.<=(minOfSize sz,i) andalso I.<=(i,maxOfSize sz) + + fun signedBinOp f (sz,i,j) = narrow(sz, f(i, j)) + + fun signedUnaryOp f (sz,i) = narrow(sz, f i) + + fun unsignedBinOp f (sz,i,j) = narrow(sz, f(unsigned(sz,i), unsigned(sz,j))) + + fun trappingUnaryOp f (sz,i) = + let val x = f i + in if isInRange(sz, x) then x else raise Overflow + end + + fun trappingBinOp f (sz,i,j) = + let val x = f(i,j) + in if isInRange(sz, x) then x else raise Overflow + end + + (* two's complement operators *) + val NEG = signedUnaryOp I.~ + val ABS = signedUnaryOp I.abs + val ADD = signedBinOp I.+ + val SUB = signedBinOp I.- + val MULS = signedBinOp I.* + fun DIVS (DIV_TO_ZERO, ty, x, y) = signedBinOp I.quot (ty, x, y) + | DIVS (DIV_TO_NEGINF, ty, x, y) = signedBinOp I.div (ty, x, y) + fun REMS (DIV_TO_ZERO, ty, x, y) = signedBinOp I.rem (ty, x, y) + | REMS (DIV_TO_NEGINF, ty, x, y) = signedBinOp I.mod (ty, x, y) + val MULU = unsignedBinOp I.* + val DIVU = unsignedBinOp I.div +(* + val QUOTU = unsignedBinOp I.quot +*) + val REMU = unsignedBinOp I.rem + + val NEGT = trappingUnaryOp I.~ + val ABST = trappingUnaryOp I.abs + val ADDT = trappingBinOp I.+ + val SUBT = trappingBinOp I.- + val MULT = trappingBinOp I.* + fun DIVT (DIV_TO_ZERO, ty, x, y) = trappingBinOp I.quot (ty, x, y) + | DIVT (DIV_TO_NEGINF, ty, x, y) = trappingBinOp I.div (ty, x, y) + + fun NOTB(sz,x) = narrow(sz,I.notb x) + fun ANDB(sz,x,y) = narrow(sz,I.andb(x,y)) + fun ORB(sz,x,y) = narrow(sz,I.orb(x,y)) + fun XORB(sz,x,y) = narrow(sz,I.xorb(x,y)) + fun EQVB(sz,x,y) = narrow(sz,I.xorb(I.notb x,y)) + fun Sll(sz,x,y) = narrow(sz,I.<<(x, y)) + fun Srl(sz,x,y) = narrow(sz,I.~>>(unsigned(sz, x), y)) + fun Sra(sz,x,y) = narrow(sz,I.~>>(x, y)) + fun SLL(sz,x,y) = Sll(sz,x,toWord(sz, y)) + fun SRL(sz,x,y) = Srl(sz,x,toWord(sz, y)) + fun SRA(sz,x,y) = Sra(sz,x,toWord(sz, y)) + + fun BITSLICE(sz,sl,x) = + let fun slice([],n) = n + | slice((from,to)::sl,n) = + slice(sl, ORB(sz, narrow(to-from+1, + Srl(sz, x, Word.fromInt from)), n)) + in slice(sl, 0) + end + + fun bitOf(sz, i, b) = + toWord(1, narrow(1, Srl(sz, i, Word.fromInt b))) + fun byteOf(sz, i, b) = + toWord(8, narrow(8, Srl(sz, i, Word.fromInt(b*8)))) + fun halfOf(sz, i, h) = + toWord(16, narrow(16, Srl(sz, i, Word.fromInt(h*16)))) + fun wordOf(sz, i, w) = + toWord32(32, narrow(32, Srl(sz, i, Word.fromInt(w*32)))) + + (* type promotion *) + fun SX(toSz,fromSz,i) = narrow(toSz, narrow(fromSz, i)) + fun ZX(toSz,fromSz,i) = narrow(toSz, unsigned(fromSz, narrow(fromSz, i))) + + (* comparisions *) + fun EQ(sz,i:I.int,j) = i = j + fun NE(sz,i:I.int,j) = i <> j + fun GT(sz,i:I.int,j) = i > j + fun GE(sz,i:I.int,j) = i >= j + fun LT(sz,i:I.int,j) = i < j + fun LE(sz,i:I.int,j) = i <= j + fun LTU(sz,i,j) = unsigned(sz, i) < unsigned(sz, j) + fun GTU(sz,i,j) = unsigned(sz, i) > unsigned(sz, j) + fun LEU(sz,i,j) = unsigned(sz, i) <= unsigned(sz, j) + fun GEU(sz,i,j) = unsigned(sz, i) >= unsigned(sz, j) + (* + * Split an integer "i" of size "sz" into words of size "wordSize" + *) + fun split{sz, wordSize, i} = + let fun loop(sz, i, ws) = + if sz <= 0 then rev ws + else + let val w = narrow(wordSize, i) + val i = IntInf.~>>(i, Word.fromInt wordSize) + in loop(sz - wordSize, i, w::ws) + end + in loop(sz, unsigned(sz, i), []) + end + +end + +end diff --git a/MLRISC/mltree/mltree-basis.sig b/MLRISC/mltree/mltree-basis.sig new file mode 100644 index 0000000..922165c --- /dev/null +++ b/MLRISC/mltree/mltree-basis.sig @@ -0,0 +1,77 @@ +(* mltree-basis.sig + * + * COPYRIGHT (c) 2002 Bell Labs, Lucent Technologies + *) + +signature MLTREE_BASIS = +sig + + type attribs = word + + type misc_op = {name:string, hash:word, attribs:attribs ref} + + datatype cond = LT | LTU | LE | LEU | EQ | NE | GE | GEU | GT | GTU + | SETCC + | MISC_COND of {name:string, hash:word, attribs:word ref} + +(* Floating-point conditions; the semantics follow the IEEE specification and + * are determined by four properties: GT -- greater than, EQ -- equal, + * LT -- less than, UO -- unordered. In the table below, we have a column + * for each of these properties and one for the negation of the operator. + *) + datatype fcond (* GT EQ LT UO negation *) + (* --- --- --- --- -------- *) + = == (* F T F F ?<> *) + | ?<> (* T F T T == *) + | > (* T F F F ?<= *) + | >= (* T T F F ?< *) + | < (* F F F F ?>= *) + | <= (* F T F F ?> *) + | ? (* F F F T <=> *) + | <> (* T F T F ?= *) + | <=> (* T T T F ? *) + | ?> (* T F F T <= *) + | ?>= (* T T F T < *) + | ?< (* F F F T >= *) + | ?<= (* F T F T > *) + | ?= (* F T F T <> *) + | SETFCC + | MISC_FCOND of {name:string, hash:word, attribs:word ref} + + datatype rounding_mode = TO_NEAREST | TO_NEGINF | TO_POSINF | TO_ZERO + + datatype div_rounding_mode = DIV_TO_NEGINF | DIV_TO_ZERO + + datatype ext = SIGN_EXTEND | ZERO_EXTEND + + (* Should be datatypes, but FLINT does not optimize them well *) + type ty = int + type fty = int + + (* Invert the conditional when swapping the two arguments + * of the comparision. IMPORTANT: this is not the negation! + *) + val swapCond : cond -> cond + + (* Invert the conditional when swapping the two arguments + * of the comparision. IMPORTANT: this is not the negation! + *) + val swapFcond : fcond -> fcond + + (* This is the negation! *) + val negateCond : cond -> cond + + (* This is the negation! *) + val negateFcond : fcond -> fcond + + (* hashing functions *) + val hashCond : cond -> word + val hashFcond : fcond -> word + val hashRoundingMode : rounding_mode -> word + + (* pretty printing *) + val condToString : cond -> string + val fcondToString : fcond -> string + val roundingModeToString : rounding_mode -> string + +end diff --git a/MLRISC/mltree/mltree-basis.sml b/MLRISC/mltree/mltree-basis.sml new file mode 100644 index 0000000..2584b20 --- /dev/null +++ b/MLRISC/mltree/mltree-basis.sml @@ -0,0 +1,119 @@ +(* mltree-basis.sml + * + * COPYRIGHT (c) 2002 Bell Labs, Lucent Technologies + *) + +structure MLTreeBasis : MLTREE_BASIS = +struct + + type attribs = word + + type misc_op = {name:string, hash:word, attribs:attribs ref} + + datatype cond = LT | LTU | LE | LEU | EQ | NE | GE | GEU | GT | GTU + | SETCC + | MISC_COND of {name:string, hash:word, attribs:attribs ref} + +(* Floating-point conditions: see mltree-basis.sig for documentation *) + datatype fcond + = == | ?<> | > | >= | < | <= | ? | <> | <=> + | ?> | ?>= | ?< | ?<= | ?= + | SETFCC + | MISC_FCOND of {name:string, hash:word, attribs:word ref} + + datatype ext = SIGN_EXTEND | ZERO_EXTEND + + datatype rounding_mode = TO_NEAREST | TO_NEGINF | TO_POSINF | TO_ZERO + + datatype div_rounding_mode = DIV_TO_NEGINF | DIV_TO_ZERO + + fun error(msg, oper) = MLRiscErrorMsg.error("MLTreeBasis",msg^": "^oper) + + nonfix <> < > >= <= + + (* These should be datatypes, but FLINT does not optimize them well *) + type ty = int + type fty = int + + fun condToString cond = + case cond of + LT => "LT" | LTU => "LTU" | LE => "LE" | LEU => "LEU" + | EQ => "EQ" | NE => "NE" | GE => "GE" | GEU => "GEU" + | GT => "GT" | GTU => "GTU" + | SETCC => "SETCC" + | MISC_COND{name,...} => name + + fun fcondToString fcond = case fcond + of == => "==" | ?<> => "?<>" + | > => ">" | >= => ">=" | < => "<" | <= => "<=" + | ? => "?" | <> => "<>" | <=> => "<=>" + | ?> => "?<" | ?>= => "?>=" | ?< => "?<" | ?<= => "?<=" | ?= => "?=" + | SETFCC => "SETFCC" + | MISC_FCOND{name, ...} => name + + fun swapCond cond = + case cond of + LT => GT | LTU => GTU | LE => GE | LEU => GEU | EQ => EQ + | NE => NE | GE => LE | GEU => LEU | GT => LT | GTU => LTU + | cond => error("swapCond",condToString cond) + +(* swap order of arguments *) + fun swapFcond fcond = + case fcond of + ? => ? | == => == + | ?= => ?= + | < => > | ?< => ?> + | <= => >= | ?<= => ?>= + | > => < + | ?> => ?< + | >= => <= | ?>= => ?<= + | <> => <> + | <=> => <=> | ?<> => ?<> + | fcond => error("swapFcond",fcondToString fcond) + + fun negateCond cond = + case cond of + LT => GE | LTU => GEU | LE => GT | LEU => GTU | EQ => NE + | NE => EQ | GE => LT | GEU => LTU | GT => LE | GTU => LEU + | cond => error("negateCond",condToString cond) + + fun negateFcond fcond = + case fcond of + == => ?<> | ?<> => == | ? => <=> + | <=> => ? | > => ?<= | >= => ?< + | ?> => <= | ?>= => < | < => ?>= + | <= => ?> | ?< => >= | ?<= => > + | <> => ?= | ?= => <> + | _ => error("negateFcond", fcondToString fcond) + + fun hashCond cond = + case cond of + LT => 0w123 | LTU => 0w758 | LE => 0w81823 | LEU => 0w1231 + | EQ => 0w987 | NE => 0w8819 | GE => 0w88123 | GEU => 0w975 + | GT => 0w1287 | GTU => 0w2457 + | SETCC => 0w23 + | MISC_COND{hash, ...} => hash + + fun hashFcond fcond = + case fcond of + ? => 0w123 | == => 0w12345 | ?= => 0w123456 + | < => 0w23456 | ?< => 0w345 + | <= => 0w456 | ?<= => 0w4567 + | > => 0w5678 | ?> => 0w56789 + | >= => 0w67890 | ?>= => 0w789 + | <> => 0w890 + | <=> => 0w991 | ?<> => 0w391 + | SETFCC => 0w94 + | MISC_FCOND{hash, ...} => hash + + fun hashRoundingMode m = + case m of + TO_NEAREST => 0w1 | TO_NEGINF => 0w10 + | TO_POSINF => 0w100 | TO_ZERO => 0w1000 + + fun roundingModeToString m = + case m of + TO_NEAREST => "TO_NEAREST" | TO_NEGINF => "TO_NEGINF" + | TO_POSINF => "TO_POSINF" | TO_ZERO => "TO_ZERO" + +end (* MLTreeBasis *) diff --git a/MLRISC/mltree/mltree-check-ty.sml b/MLRISC/mltree/mltree-check-ty.sml new file mode 100644 index 0000000..7583736 --- /dev/null +++ b/MLRISC/mltree/mltree-check-ty.sml @@ -0,0 +1,137 @@ +(* mltree-check-ty.sml + * + * Check that MLRISC programs have consistent types. + *) + +functor MLTreeCheckTy + (structure T : MLTREE + val intTy : T.ty (* size of integer word *)) : sig + val check : T.stm -> bool + end = struct + + exception AmbiguousType + + exception TypeError + + fun chkEq (ty, tys) = List.all (fn SOME ty' => ty' = ty | NONE => true) tys + + fun chkTys (ty, tys) = if chkEq (ty, tys) + then ty + else raise TypeError + + (* check well-formedness of a list of expressions *) + fun checkRexps (ty, es) = let + val tys = List.map (fn e => SOME (checkRexp e) handle AmbiguousType => NONE) es + in + chkTys(ty, tys) + end + + and checkRexp (T.REG(ty,_)) = ty + (* the type of a literal expression depends on its surrounding context *) + | checkRexp (T.LI _) = raise AmbiguousType + | checkRexp (T.LABEL _) = intTy + (* the type of a literal expression depends on its surrounding context *) + | checkRexp (T.CONST _) = raise AmbiguousType + | checkRexp (T.LABEXP e) = checkRexp e + | checkRexp (T.NEG(ty, e)) = checkRexps(ty, [e]) + | checkRexp (T.ADD(ty,e1,e2)) = checkRexps(ty, [e1, e2]) + | checkRexp (T.SUB(ty,e1,e2)) = checkRexps(ty, [e1, e2]) + | checkRexp (T.MULS(ty,e1,e2)) = checkRexps(ty, [e1, e2]) + | checkRexp (T.DIVS(_,ty,e1,e2)) = checkRexps(ty, [e1, e2]) + | checkRexp (T.REMS(_,ty,e1,e2)) = checkRexps(ty, [e1, e2]) + | checkRexp (T.MULU(ty,e1,e2)) = checkRexps(ty, [e1, e2]) + | checkRexp (T.DIVU(ty,e1,e2)) = checkRexps(ty, [e1, e2]) + | checkRexp (T.REMU(ty,e1,e2)) = checkRexps(ty, [e1, e2]) + | checkRexp (T.NEGT(ty,e)) = checkRexps(ty, [e]) + | checkRexp (T.ADDT(ty,e1,e2)) = checkRexps(ty, [e1, e2]) + | checkRexp (T.SUBT(ty,e1,e2)) = checkRexps(ty, [e1, e2]) + | checkRexp (T.MULT(ty,e1,e2)) = checkRexps(ty, [e1, e2]) + | checkRexp (T.DIVT(_,ty,e1,e2)) = checkRexps(ty, [e1, e2]) + | checkRexp (T.ANDB(ty,e1,e2)) = checkRexps(ty, [e1, e2]) + | checkRexp (T.ORB(ty,e1,e2)) = checkRexps(ty, [e1, e2]) + | checkRexp (T.XORB(ty,e1,e2)) = checkRexps(ty, [e1, e2]) + | checkRexp (T.EQVB(ty,e1,e2)) = checkRexps(ty, [e1, e2]) + | checkRexp (T.NOTB(ty,e)) = checkRexps(ty, [e]) + | checkRexp (T.SRA(ty,e1,e2)) = checkRexps(ty, [e1, e2]) + | checkRexp (T.SRL(ty,e1,e2)) = checkRexps(ty, [e1, e2]) + | checkRexp (T.SLL(ty,e1,e2)) = checkRexps(ty, [e1, e2]) + | checkRexp (T.SX(toTy,fromTy,e)) = (checkRexps(fromTy, [e]); toTy) + | checkRexp (T.ZX(toTy,fromTy,e)) = (checkRexps(fromTy, [e]); toTy) + | checkRexp (T.CVTF2I(ty,_,_,_)) = ty + | checkRexp (T.COND(ty,cce,e1,e2)) = (checkCCexp cce; checkRexps(ty, [e1, e2])) + | checkRexp (T.LOAD(ty,ea,_)) = (checkRexps(intTy, [ea]); ty) + | checkRexp (T.PRED(e,_)) = checkRexp e + | checkRexp (T.LET(_,e)) = checkRexp e + | checkRexp (T.REXT(ty,_)) = ty + | checkRexp (T.MARK(e,_)) = checkRexp e + | checkRexp (T.OP(ty,_,_)) = ty + | checkRexp (T.ARG(ty,_,_)) = ty + | checkRexp (T.$(ty,_,_)) = ty + | checkRexp (T.PARAM _) = intTy + | checkRexp (T.BITSLICE(ty,_,_)) = ty + | checkRexp (T.???) = intTy + + and checkFexps (ty, es) = let + val tys = List.map (fn e => SOME (checkFexp e) handle AmbiguousType => NONE) es + in + chkTys(ty, tys) + end + + and checkFexp (T.FREG(ty,_)) = raise AmbiguousType + | checkFexp (T.FLOAD(ty,ea,_)) = (checkRexps(intTy, [ea]); ty) + | checkFexp (T.FADD(ty,e1,e2)) = checkFexps(ty, [e1, e2]) + | checkFexp (T.FSUB(ty,e1,e2)) = checkFexps(ty, [e1, e2]) + | checkFexp (T.FMUL(ty,e1,e2)) = checkFexps(ty, [e1, e2]) + | checkFexp (T.FDIV(ty,e1,e2)) = checkFexps(ty, [e1, e2]) + | checkFexp (T.FABS(ty,e)) = checkFexps(ty, [e]) + | checkFexp (T.FNEG(ty,e)) = checkFexps(ty, [e]) + | checkFexp (T.FSQRT(ty,e)) = checkFexps(ty, [e]) + | checkFexp (T.FCOND(ty,ce,e1,e2)) = (checkCCexp ce; checkFexps(ty, [e1, e2])) + | checkFexp (T.CVTI2F(ty,_,_)) = ty + | checkFexp (T.CVTF2F(ty,_,_)) = ty + | checkFexp (T.FCOPYSIGN(ty,e1,e2)) = checkFexps(ty, [e1, e2]) + | checkFexp (T.FPRED(e,_)) = checkFexp e + | checkFexp (T.FEXT(ty,_)) = ty + | checkFexp (T.FMARK(e,_)) = checkFexp e + + (* don't care about ambiguous types *) + and checkRexpB (ty, e) = checkRexp e = ty handle AmbiguousType => true + + and checkCCexp cce = checkCCexpB cce orelse raise TypeError + + and checkCCexpB cce = (case cce + of T.NOT cce => checkCCexpB cce + | ( T.AND (cce1, cce2) | T.OR (cce1, cce2) | T.XOR (cce1, cce2) | T.EQV (cce1, cce2) ) => + checkCCexpB cce1 andalso checkCCexpB cce2 + | T.CMP (ty, _, e1, e2) => ty = checkRexp e1 andalso ty = checkRexp e2 + | T.FCMP (fty, _, e1, e2) => fty = checkFexp e1 andalso fty = checkFexp e2 + | T.CCMARK (cce, _) => checkCCexpB cce + | T.CCEXT (ty, ccext) => true + (* end case *)) + + fun check stm = (case stm + of T.MV (ty, d, e) => checkRexpB (ty, e) + | T.CCMV (dst, cce) => checkCCexpB cce + | T.FMV (fty, dst, e) => checkFexp e = fty + | T.COPY _ => true + | T.FCOPY _ => true + | T.JMP (e, _) => checkRexpB (intTy, e) + | T.BCC (cce, _) => checkCCexpB cce + | T.CALL {funct, ...} => checkRexpB (intTy, funct) + | T.FLOW_TO (stm, _) => check stm + | T.RET _ => true + | T.IF (cce, stm1, stm2) => checkCCexpB cce andalso check stm1 andalso check stm2 + | T.STORE (ty, e1, e2, _) => checkRexpB (intTy, e1) andalso checkRexpB(intTy, e2) + | T.FSTORE (fty, e1, e2, _) => checkRexpB (intTy, e1) andalso fty = checkFexp e2 + | T.REGION (stm, _) => check stm + | T.SEQ stms => List.all check stms + | T.DEFINE _ => true + | T.ANNOTATION (stm, _) => check stm + | T.EXT _ => true + | T.LIVE _ => true + | T.KILL _ => true + | _ => true + (* end case *)) + handle TypeError => false + + end (* MLTreeCheckTy *) diff --git a/MLRISC/mltree/mltree-eval.sig b/MLRISC/mltree/mltree-eval.sig new file mode 100644 index 0000000..ea02a4c --- /dev/null +++ b/MLRISC/mltree/mltree-eval.sig @@ -0,0 +1,34 @@ +(* mltree-eval.sig + * + * COPYRIGHT (c) 2001 Lucent Technologies, Bell Laboratories. + * + * Utilites to evaluate and compare mltree expressions. + *) + +signature MLTREE_EVAL = sig + structure T : MLTREE + + (* + * Equality + *) + val eqStm : T.stm * T.stm -> bool + val eqRexp : T.rexp * T.rexp -> bool + val eqFexp : T.fexp * T.fexp -> bool + val eqCCexp : T.ccexp * T.ccexp -> bool + val eqMlriscs : T.mlrisc list * T.mlrisc list -> bool + val == : T.labexp * T.labexp -> bool + + + (* + * Value + *) + exception NonConst + val eval : + {const:T.Constant.const -> IntInf.int, + label:Label.label -> int} + -> + {rexp : T.rexp -> IntInf.int, + ccexp : T.ccexp -> bool} + + val valueOf : T.labexp -> int +end diff --git a/MLRISC/mltree/mltree-eval.sml b/MLRISC/mltree/mltree-eval.sml new file mode 100644 index 0000000..b037a1c --- /dev/null +++ b/MLRISC/mltree/mltree-eval.sml @@ -0,0 +1,264 @@ +(* mltree-eval.sml + * + * COPYRIGHT (c) 2001 Lucent Technologies, Bell Laboratories. + * + * Utilites to evaluate and compare mltree expressions. + *) + +functor MLTreeEval + (structure T : MLTREE + (* equality extensions *) + val eqSext : T.equality -> T.sext * T.sext -> bool + val eqRext : T.equality -> T.rext * T.rext -> bool + val eqFext : T.equality -> T.fext * T.fext -> bool + val eqCCext : T.equality -> T.ccext * T.ccext -> bool + ) : MLTREE_EVAL = +struct + structure T = T + structure I = T.I + structure Constant = T.Constant + structure C = CellsBasis + + val eqLabel = Label.same + fun eqLabels([],[]) = true + | eqLabels(a::b,c::d) = eqLabel(a,c) andalso eqLabels(b,d) + | eqLabels _ = false + and eqCell(C.CELL{id=x, ...},C.CELL{id=y, ...}) = x=y + and eqCells([], []) = true + | eqCells(x::xs,y::ys) = eqCell(x,y) andalso eqCells(xs,ys) + | eqCells _ = false + and eqCopy((t1,dst1,src1),(t2,dst2,src2)) = + t1=t2 andalso eqCells(dst1,dst2) andalso eqCells(src1,src2) + and eqCtrl(c1,c2) = eqCell(c1,c2) + and eqCtrls(c1,c2) = eqCells(c1,c2) + + (* statements *) + and equality() = {stm=eqStm, rexp=eqRexp, fexp=eqFexp, ccexp=eqCCexp} + and eqStm(T.MV(a,b,c),T.MV(d,e,f)) = + a=d andalso eqCell(b,e) andalso eqRexp(c,f) + | eqStm(T.CCMV(a,b),T.CCMV(c,d)) = eqCell(a,c) andalso eqCCexp(b,d) + | eqStm(T.FMV(a,b,c),T.FMV(d,e,f)) = + a=d andalso eqCell(b,e) andalso eqFexp(c,f) + | eqStm(T.COPY x,T.COPY y) = eqCopy(x,y) + | eqStm(T.FCOPY x,T.FCOPY y) = eqCopy(x,y) + | eqStm(T.JMP(a,b),T.JMP(a',b')) = eqRexp(a,a') + | eqStm(T.CALL{funct=a,defs=b,uses=c,...}, + T.CALL{funct=d,defs=e,uses=f,...}) = + eqRexp(a,d) andalso eqMlriscs(b,e) andalso eqMlriscs(c,f) + | eqStm(T.RET _,T.RET _) = true + | eqStm(T.STORE(a,b,c,_),T.STORE(d,e,f,_)) = + a=d andalso eqRexp(b,e) andalso eqRexp(c,f) + | eqStm(T.FSTORE(a,b,c,_),T.FSTORE(d,e,f,_)) = + a=d andalso eqRexp(b,e) andalso eqFexp(c,f) + | eqStm(T.ANNOTATION(s1, _),s2) = eqStm(s1,s2) + | eqStm(s1,T.ANNOTATION(s2, _)) = eqStm(s1,s2) + | eqStm(T.PHI x,T.PHI y) = x=y + | eqStm(T.SOURCE,T.SOURCE) = true + | eqStm(T.SINK,T.SINK) = true + | eqStm(T.BCC(b,c),T.BCC(b',c')) = + eqCCexp(b,b') andalso eqLabel(c,c') + | eqStm(T.IF(b,c,d),T.IF(b',c',d')) = + eqCCexp(b,b') andalso eqStm(c,c') andalso eqStm(d,d') + | eqStm(T.RTL{attribs=x,...},T.RTL{attribs=y,...}) = x=y + | eqStm(T.REGION(a,b),T.REGION(a',b')) = eqCtrl(b,b') andalso eqStm(a,a') + | eqStm(T.EXT a,T.EXT a') = eqSext (equality()) (a,a') + | eqStm _ = false + + and eqStms([],[]) = true + | eqStms(a::b,c::d) = eqStm(a,c) andalso eqStms(b,d) + | eqStms _ = false + + and eqMlrisc(T.CCR a,T.CCR b) = eqCCexp(a,b) + | eqMlrisc(T.GPR a,T.GPR b) = eqRexp(a,b) + | eqMlrisc(T.FPR a,T.FPR b) = eqFexp(a,b) + | eqMlrisc _ = false + + and eqMlriscs([],[]) = true + | eqMlriscs(a::b,c::d) = eqMlrisc(a,c) andalso eqMlriscs(b,d) + | eqMlriscs _ = false + + and eq2((a,b,c),(d,e,f)) = a=d andalso eqRexp(b,e) andalso eqRexp(c,f) + and eq3((m,a,b,c),(n,d,e,f)) = + m = n andalso a=d andalso eqRexp(b,e) andalso eqRexp(c,f) + + and eqRexp(T.REG(a,b),T.REG(c,d)) = a=c andalso eqCell(b,d) + | eqRexp(T.LI a,T.LI b) = a = b + | eqRexp(T.LABEL a,T.LABEL b) = eqLabel(a,b) + | eqRexp(T.LABEXP a,T.LABEXP b) = eqRexp(a,b) + | eqRexp(T.CONST a,T.CONST b) = Constant.==(a,b) + | eqRexp(T.NEG(t,x),T.NEG(t',x')) = t = t' andalso eqRexp(x,x') + | eqRexp(T.ADD x,T.ADD y) = eq2(x,y) + | eqRexp(T.SUB x,T.SUB y) = eq2(x,y) + | eqRexp(T.MULS x,T.MULS y) = eq2(x,y) + | eqRexp(T.DIVS x,T.DIVS y) = eq3(x,y) + | eqRexp(T.REMS x,T.REMS y) = eq3(x,y) + | eqRexp(T.MULU x,T.MULU y) = eq2(x,y) + | eqRexp(T.DIVU x,T.DIVU y) = eq2(x,y) + | eqRexp(T.REMU x,T.REMU y) = eq2(x,y) + | eqRexp(T.NEGT(t,x),T.NEGT(t',x')) = t = t' andalso eqRexp(x,x') + | eqRexp(T.ADDT x,T.ADDT y) = eq2(x,y) + | eqRexp(T.SUBT x,T.SUBT y) = eq2(x,y) + | eqRexp(T.MULT x,T.MULT y) = eq2(x,y) + | eqRexp(T.DIVT x,T.DIVT y) = eq3(x,y) + | eqRexp(T.ANDB x,T.ANDB y) = eq2(x,y) + | eqRexp(T.ORB x,T.ORB y) = eq2(x,y) + | eqRexp(T.XORB x,T.XORB y) = eq2(x,y) + | eqRexp(T.EQVB x,T.EQVB y) = eq2(x,y) + | eqRexp(T.NOTB(a,b),T.NOTB(c,d)) = a=c andalso eqRexp(b,d) + | eqRexp(T.SRA x,T.SRA y) = eq2(x,y) + | eqRexp(T.SRL x,T.SRL y) = eq2(x,y) + | eqRexp(T.SLL x,T.SLL y) = eq2(x,y) + | eqRexp(T.COND(a,b,c,d),T.COND(e,f,g,h)) = + a=e andalso eqCCexp(b,f) andalso eqRexp(c,g) andalso eqRexp(d,h) + | eqRexp(T.SX(a,b,c),T.SX(a',b',c')) = + a=a' andalso b=b' andalso eqRexp(c,c') + | eqRexp(T.ZX(a,b,c),T.ZX(a',b',c')) = + a=a' andalso b=b' andalso eqRexp(c,c') + | eqRexp(T.CVTF2I(a,b,c,d),T.CVTF2I(e,f,g,h)) = + a=e andalso b=f andalso c=g andalso eqFexp(d,h) + | eqRexp(T.LOAD(a,b,_),T.LOAD(c,d,_)) = a=c andalso eqRexp(b,d) + | eqRexp(T.LET(a,b),T.LET(c,d)) = eqStm(a,c) andalso eqRexp(b,d) + | eqRexp(T.ARG x,T.ARG y) = x = y + | eqRexp(T.PARAM x,T.PARAM y) = x = y + | eqRexp(T.???,T.???) = true + | eqRexp(T.$(t1,k1,e1),T.$(t2,k2,e2)) = + t1=t2 andalso k1=k2 andalso eqRexp(e1,e2) + | eqRexp(T.BITSLICE(t1,s1,e1),T.BITSLICE(t2,s2,e2)) = + t1=t2 andalso s1=s2 andalso eqRexp(e1,e2) + | eqRexp(T.MARK(a,_),b) = eqRexp(a,b) + | eqRexp(a,T.MARK(b,_)) = eqRexp(a,b) + | eqRexp(T.PRED(a,b),T.PRED(a',b')) = eqCtrl(b,b') andalso eqRexp(a,a') + | eqRexp(T.REXT(a,b),T.REXT(a',b')) = + a=a' andalso eqRext (equality()) (b,b') + | eqRexp _ = false + + and eqRexps([],[]) = true + | eqRexps(a::b,c::d) = eqRexp(a,c) andalso eqRexps(b,d) + | eqRexps _ = false + + and eq2'((a,b,c),(d,e,f)) = a=d andalso eqFexp(b,e) andalso eqFexp(c,f) + and eq1'((a,b),(d,e)) = a=d andalso eqFexp(b,e) + + and eqFexp(T.FREG(t1,x),T.FREG(t2,y)) = t1=t2 andalso eqCell(x,y) + | eqFexp(T.FLOAD(a,b,_),T.FLOAD(c,d,_)) = a=c andalso eqRexp(b,d) + | eqFexp(T.FADD x,T.FADD y) = eq2'(x,y) + | eqFexp(T.FMUL x,T.FMUL y) = eq2'(x,y) + | eqFexp(T.FSUB x,T.FSUB y) = eq2'(x,y) + | eqFexp(T.FDIV x,T.FDIV y) = eq2'(x,y) + | eqFexp(T.FCOPYSIGN x, T.FCOPYSIGN y) = eq2'(x,y) + | eqFexp(T.FCOND(t,x,y,z), T.FCOND(t',x',y',z')) = + t=t' andalso eqCCexp(x,x') andalso eqFexp(y,y') andalso eqFexp(z,z') + | eqFexp(T.FABS x,T.FABS y) = eq1'(x,y) + | eqFexp(T.FNEG x,T.FNEG y) = eq1'(x,y) + | eqFexp(T.FSQRT x,T.FSQRT y) = eq1'(x,y) + | eqFexp(T.CVTI2F(a,b,c),T.CVTI2F(a',b',c')) = + a=a' andalso b=b' andalso eqRexp(c,c') + | eqFexp(T.CVTF2F(a,b,c),T.CVTF2F(a',b',c')) = + a=a' andalso b=b' andalso eqFexp(c,c') + | eqFexp(T.FEXT(a,f),T.FEXT(b,g)) = a=b andalso eqFext (equality()) (f,g) + | eqFexp(T.FMARK(a,_),b) = eqFexp(a,b) + | eqFexp(a,T.FMARK(b,_)) = eqFexp(a,b) + | eqFexp(T.FPRED(a,b),T.FPRED(a',b')) = eqCtrl(b,b') andalso eqFexp(a,a') + | eqFexp _ = false + + and eqFexps([],[]) = true + | eqFexps(a::b,c::d) = eqFexp(a,c) andalso eqFexps(b,d) + | eqFexps _ = false + + and eqCCexp(T.CC(c1,x),T.CC(c2,y)) = c1=c2 andalso eqCell(x,y) + | eqCCexp(T.FCC(c1,x),T.FCC(c2,y)) = c1=c2 andalso eqCell(x,y) + | eqCCexp(T.CMP(x,a,b,c),T.CMP(y,d,e,f)) = + a=d andalso eqRexp(b,e) andalso eqRexp(c,f) andalso x = y + | eqCCexp(T.FCMP(x,a,b,c),T.FCMP(y,d,e,f)) = + a=d andalso eqFexp(b,e) andalso eqFexp(c,f) andalso x = y + | eqCCexp(T.NOT x, T.NOT y) = eqCCexp(x,y) + | eqCCexp(T.AND x, T.AND y) = eqCCexp2(x,y) + | eqCCexp(T.OR x, T.OR y) = eqCCexp2(x,y) + | eqCCexp(T.XOR x, T.XOR y) = eqCCexp2(x,y) + | eqCCexp(T.EQV x, T.EQV y) = eqCCexp2(x,y) + | eqCCexp(T.CCMARK(a,_),b) = eqCCexp(a,b) + | eqCCexp(a,T.CCMARK(b,_)) = eqCCexp(a,b) + | eqCCexp(T.CCEXT(t,a),T.CCEXT(t',b)) = + t=t' andalso eqCCext (equality()) (a,b) + | eqCCexp(T.TRUE, T.TRUE) = true + | eqCCexp(T.FALSE, T.FALSE) = true + | eqCCexp _ = false + + and eqCCexp2((x,y),(x',y')) = eqCCexp(x,x') andalso eqCCexp(y,y') + + and eqCCexps([],[]) = true + | eqCCexps(a::b,c::d) = eqCCexp(a,c) andalso eqCCexps(b,d) + | eqCCexps _ = false + + exception NonConst + + fun eval{label, const} = + let fun drm T.DIV_TO_ZERO = I.DIV_TO_ZERO + | drm T.DIV_TO_NEGINF = I.DIV_TO_NEGINF + fun rexp(T.LI i) = i + | rexp(T.CONST c) = const c + | rexp(T.LABEL l) = IntInf.fromInt(label l) + | rexp(T.LABEXP e) = rexp e + + | rexp(T.NEG(sz,x)) = I.NEG(sz,rexp x) + | rexp(T.ADD(sz,x,y)) = I.ADD(sz,rexp x,rexp y) + | rexp(T.SUB(sz,x,y)) = I.SUB(sz,rexp x,rexp y) + + | rexp(T.MULS(sz,x,y)) = I.MULS(sz,rexp x,rexp y) + | rexp(T.DIVS(m,sz,x,y)) = I.DIVS(drm m,sz,rexp x,rexp y) + | rexp(T.REMS(m,sz,x,y)) = I.REMS(drm m,sz,rexp x,rexp y) + + | rexp(T.MULU(sz,x,y)) = I.MULU(sz,rexp x,rexp y) + | rexp(T.DIVU(sz,x,y)) = I.DIVU(sz,rexp x,rexp y) + | rexp(T.REMU(sz,x,y)) = I.REMU(sz,rexp x,rexp y) + + | rexp(T.NEGT(sz,x)) = I.NEGT(sz,rexp x) + | rexp(T.ADDT(sz,x,y)) = I.ADDT(sz,rexp x,rexp y) + | rexp(T.SUBT(sz,x,y)) = I.SUBT(sz,rexp x,rexp y) + | rexp(T.MULT(sz,x,y)) = I.MULT(sz,rexp x,rexp y) + | rexp(T.DIVT(m,sz,x,y)) = I.DIVT(drm m,sz,rexp x,rexp y) + + | rexp(T.NOTB(sz,x)) = I.NOTB(sz,rexp x) + | rexp(T.ANDB(sz,x,y)) = I.ANDB(sz,rexp x,rexp y) + | rexp(T.ORB(sz,x,y)) = I.ORB(sz,rexp x,rexp y) + | rexp(T.XORB(sz,x,y)) = I.XORB(sz,rexp x,rexp y) + | rexp(T.EQVB(sz,x,y)) = I.EQVB(sz,rexp x,rexp y) + | rexp(T.SLL(sz,x,y)) = I.SLL(sz,rexp x,rexp y) + | rexp(T.SRL(sz,x,y)) = I.SRL(sz,rexp x,rexp y) + | rexp(T.SRA(sz,x,y)) = I.SRA(sz,rexp x,rexp y) + | rexp(T.BITSLICE(sz,x,y)) = I.BITSLICE(sz,x,rexp y) + + | rexp(T.COND(sz,cc,x,y)) = if ccexp cc then rexp x else rexp y + | rexp(T.SX(a,b,x)) = I.SX(a,b,rexp x) + | rexp(T.ZX(a,b,x)) = I.ZX(a,b,rexp x) + | rexp(T.MARK(e,_)) = rexp e + + | rexp _ = raise NonConst + and ccexp(T.TRUE) = true + | ccexp(T.FALSE) = false + | ccexp(T.CMP(sz,T.EQ,x,y)) = I.EQ(sz,rexp x,rexp y) + | ccexp(T.CMP(sz,T.NE,x,y)) = I.NE(sz,rexp x,rexp y) + | ccexp(T.CMP(sz,T.GT,x,y)) = I.GT(sz,rexp x,rexp y) + | ccexp(T.CMP(sz,T.GE,x,y)) = I.GE(sz,rexp x,rexp y) + | ccexp(T.CMP(sz,T.LT,x,y)) = I.LT(sz,rexp x,rexp y) + | ccexp(T.CMP(sz,T.LE,x,y)) = I.LE(sz,rexp x,rexp y) + | ccexp(T.CMP(sz,T.GTU,x,y)) = I.GTU(sz,rexp x,rexp y) + | ccexp(T.CMP(sz,T.LTU,x,y)) = I.LTU(sz,rexp x,rexp y) + | ccexp(T.CMP(sz,T.GEU,x,y)) = I.GEU(sz,rexp x,rexp y) + | ccexp(T.CMP(sz,T.LEU,x,y)) = I.LEU(sz,rexp x,rexp y) + | ccexp(T.NOT x) = not(ccexp x) + | ccexp(T.AND(x,y)) = ccexp x andalso ccexp y + | ccexp(T.OR(x,y)) = ccexp x orelse ccexp y + | ccexp(T.XOR(x,y)) = ccexp x <> ccexp y + | ccexp(T.EQV(x,y)) = ccexp x = ccexp y + | ccexp(T.CCMARK(e,_)) = ccexp e + | ccexp _ = raise NonConst + in {rexp=rexp, ccexp=ccexp} + end + + fun valueOf e = + IntInf.toInt + (#rexp(eval{const=fn c => IntInf.fromInt(Constant.valueOf c), + label=Label.addrOf}) e) + val == = eqRexp +end diff --git a/MLRISC/mltree/mltree-extension.sig b/MLRISC/mltree/mltree-extension.sig new file mode 100644 index 0000000..8c2413b --- /dev/null +++ b/MLRISC/mltree/mltree-extension.sig @@ -0,0 +1,9 @@ +signature MLTREE_EXTENSION = +sig + + type ('s,'r,'f,'c) sx + type ('s,'r,'f,'c) rx + type ('s,'r,'f,'c) fx + type ('s,'r,'f,'c) ccx + +end diff --git a/MLRISC/mltree/mltree-fold.sig b/MLRISC/mltree/mltree-fold.sig new file mode 100644 index 0000000..85b117b --- /dev/null +++ b/MLRISC/mltree/mltree-fold.sig @@ -0,0 +1,9 @@ +(* A fold function for MLTree datatypes + * Useful for performing transformation on MLTree + *) +signature MLTREE_FOLD = +sig + structure T : MLTREE + + val fold : 'b T.folder -> 'b T.folder +end diff --git a/MLRISC/mltree/mltree-fold.sml b/MLRISC/mltree/mltree-fold.sml new file mode 100644 index 0000000..c21f9e6 --- /dev/null +++ b/MLRISC/mltree/mltree-fold.sml @@ -0,0 +1,155 @@ +functor MLTreeFold + (structure T : MLTREE + (* Extension mechnism *) + val sext : 'b T.folder -> T.sext * 'b -> 'b + val rext : 'b T.folder -> T.ty * T.rext * 'b -> 'b + val fext : 'b T.folder -> T.fty * T.fext * 'b -> 'b + val ccext : 'b T.folder -> T.ty * T.ccext * 'b -> 'b + ) : MLTREE_FOLD = +struct + structure T = T + + fun fold{rexp=doRexp, fexp=doFexp, ccexp=doCCexp, stm=doStm} = + let fun stm(s,x) = + let val x = + case s of + T.MV(ty,dst,e) => rexp(e,x) + | T.CCMV(dst,e) => ccexp(e,x) + | T.FMV(fty,dst,e) => fexp(e,x) + | T.COPY _ => x + | T.FCOPY _ => x + | T.JMP(e,cf) => rexp(e,x) + | T.BCC(cc,l) => ccexp(cc,x) + | T.CALL{funct,defs,uses,...} => + mlriscs(uses,mlriscs(defs,rexp(funct,x))) + | T.RET _ => x + | T.FLOW_TO (s, _) => stm(s,x) + | T.IF(cc,yes,no) => stm(no,stm(yes,ccexp(cc,x))) + | T.STORE(ty,ea,d,r) => rexp(d,rexp(ea,x)) + | T.FSTORE(fty,ea,d,r) => fexp(d,rexp(ea,x)) + | T.REGION(s,ctrl) => stm(s,x) + | T.SEQ s => stms(s,x) + | T.DEFINE _ => x + | T.ANNOTATION(s,an) => stm(s,x) + | T.EXT s => + sext {stm=stm, rexp=rexp, fexp=fexp, ccexp=ccexp} (s,x) + | T.PHI _ => x + | T.ASSIGN(_,a,b) => rexp(b,rexp(a,x)) + | T.SOURCE => x + | T.SINK => x + | T.RTL _ => x + | T.LIVE ls => mlriscs (ls, x) + | T.KILL ks => mlriscs (ks, x) + in doStm(s,x) end + + and stms(ss,x) = foldr stm x ss + + and rexp(e,x) = + let val x = case e of + T.REG _ => x + | T.LI _ => x + | T.LABEL _ => x + | T.LABEXP _ => x + | T.CONST _ => x + | T.NEG(ty,a) => rexp(a,x) + | T.ADD(ty,a,b) => rexp2(a,b,x) + | T.SUB(ty,a,b) => rexp2(a,b,x) + | T.MULS(ty,a,b) => rexp2(a,b,x) + | T.DIVS(m,ty,a,b) => rexp2(a,b,x) + | T.REMS(m,ty,a,b) => rexp2(a,b,x) + | T.MULU(ty,a,b) => rexp2(a,b,x) + | T.DIVU(ty,a,b) => rexp2(a,b,x) + | T.REMU(ty,a,b) => rexp2(a,b,x) + | T.NEGT(ty,a) => rexp(a,x) + | T.ADDT(ty,a,b) => rexp2(a,b,x) + | T.SUBT(ty,a,b) => rexp2(a,b,x) + | T.MULT(ty,a,b) => rexp2(a,b,x) + | T.DIVT(m,ty,a,b) => rexp2(a,b,x) + | T.ANDB(ty,a,b) => rexp2(a,b,x) + | T.ORB(ty,a,b) => rexp2(a,b,x) + | T.XORB(ty,a,b) => rexp2(a,b,x) + | T.EQVB(ty,a,b) => rexp2(a,b,x) + | T.NOTB(ty,a) => rexp(a,x) + | T.SRA(ty,a,b) => rexp2(a,b,x) + | T.SRL(ty,a,b) => rexp2(a,b,x) + | T.SLL(ty,a,b) => rexp2(a,b,x) + | T.SX(t,t',e) => rexp(e,x) + | T.ZX(t,t',e) => rexp(e,x) + | T.CVTF2I(ty,mode,fty,e) => fexp(e,x) + | T.COND(ty,cc,yes,no) => rexp(no,rexp(yes,ccexp(cc,x))) + | T.LOAD(ty,ea,r) => rexp(ea,x) + | T.PRED(e,ctrl) => rexp(e,x) + | T.LET(s,e) => rexp(e,stm(s,x)) + | T.REXT(t,e) => + rext{stm=stm, rexp=rexp, fexp=fexp, ccexp=ccexp} (t,e,x) + | T.MARK(e,an) => rexp(e,x) + | T.OP(ty,oper,es) => rexps(es,x) + | T.ARG _ => x + | T.PARAM _ => x + | T.BITSLICE(_,_,e) => rexp(e, x) + | T.$(ty,k,e) => rexp(e, x) + | T.??? => x + in doRexp(e,x) end + + and rexp2(a,b,x) = rexp(b,rexp(a,x)) + + and rexps(es,x) = foldr rexp x es + + and fexp(e,x) = + let val x = case e of + T.FREG _ => x + | T.FLOAD(fty,e,r) => rexp(e,x) + | T.FADD(fty,a,b) => fexp2(a,b,x) + | T.FSUB(fty,a,b) => fexp2(a,b,x) + | T.FMUL(fty,a,b) => fexp2(a,b,x) + | T.FDIV(fty,a,b) => fexp2(a,b,x) + | T.FABS(fty,e) => fexp(e,x) + | T.FNEG(fty,e) => fexp(e,x) + | T.FSQRT(fty,e) => fexp(e,x) + | T.FCOPYSIGN(fty,a,b) => fexp2(a,b,x) + | T.FCOND(fty,c,a,b) => fexp2(a,b,ccexp(c,x)) + | T.CVTI2F(fty,ty,e) => rexp(e,x) + | T.CVTF2F(fty,fty',e) => fexp(e,x) + | T.FPRED(e,ctrl) => fexp(e,x) + | T.FEXT(t,e) => + fext {stm=stm, rexp=rexp, fexp=fexp, ccexp=ccexp} (t,e,x) + | T.FMARK(e,an) => fexp(e,x) + in doFexp(e,x) end + + and fexp2(a,b,x) = fexp(b,fexp(a,x)) + + and fexps(es,x) = foldr fexp x es + + and ccexp(e,x) = + let val x = case e of + T.CC _ => x + | T.FCC _ => x + | T.TRUE => x + | T.FALSE => x + | T.NOT e => ccexp(e,x) + | T.AND(a,b) => ccexp2(a,b,x) + | T.OR(a,b) => ccexp2(a,b,x) + | T.XOR(a,b) => ccexp2(a,b,x) + | T.EQV(a,b) => ccexp2(a,b,x) + | T.CMP(ty,cond,a,b) => rexp2(a,b,x) + | T.FCMP(ty,fcond,a,b) => fexp2(a,b,x) + | T.CCMARK(e,an) => ccexp(e,x) + | T.CCEXT(t,e) => + ccext{stm=stm, rexp=rexp, fexp=fexp, ccexp=ccexp}(t,e,x) + in doCCexp(e,x) end + + and ccexp2(a,b,x) = ccexp(b,ccexp(a,x)) + + and mlriscs(m,x) = foldr mlrisc x m + + and mlrisc(m,x) = + let val x = + case m of + T.CCR e => ccexp(e,x) + | T.GPR e => rexp(e,x) + | T.FPR e => fexp(e,x) + in x end + + in { rexp=rexp, fexp=fexp, ccexp=ccexp, stm=stm } end +end (* MLTreeFold *) + diff --git a/MLRISC/mltree/mltree-gen.sig b/MLRISC/mltree/mltree-gen.sig new file mode 100644 index 0000000..f2d1937 --- /dev/null +++ b/MLRISC/mltree/mltree-gen.sig @@ -0,0 +1,41 @@ +(* + * This module provides various generic MLTREE transformations. + * Basically, we want to support various non built-in datatype widths. + * This module handles the translation. + * + * -- Allen + *) +signature MLTREEGEN = +sig + + structure T : MLTREE + structure Size : MLTREE_SIZE + where T = T + + val condOf : T.ccexp -> T.Basis.cond + val fcondOf : T.ccexp -> T.Basis.fcond + + (* + * Perform simplification + *) + val compileRexp : T.rexp -> T.rexp + val compileFexp : T.fexp -> T.fexp + val compileStm : T.stm -> T.stm list + + (* + * Simulate conditional expression. + *) + val compileCond : + {exp : T.ty * T.ccexp * T.rexp * T.rexp, + an : Annotations.annotations, + rd : CellsBasis.cell + } -> T.stm list + + val compileFcond : + {exp : T.fty * T.ccexp * T.fexp * T.fexp, + an : Annotations.annotations, + fd : CellsBasis.cell + } -> T.stm list + + +end diff --git a/MLRISC/mltree/mltree-gen.sml b/MLRISC/mltree/mltree-gen.sml new file mode 100644 index 0000000..8b529be --- /dev/null +++ b/MLRISC/mltree/mltree-gen.sml @@ -0,0 +1,338 @@ +(* mltree-gen.sml + * + * COPYRIGHT (c) 2002 Bell Labs, Lucent Technologies + * + * This is a generic module for transforming MLTREE expressions: + * (1) expressions involving non-standard type widths are promoted when + * necessary. + * (2) operators that cannot be directly handled are expanded into + * more complex instruction sequences when necessary. + * + * -- Allen + *) + +functor MLTreeGen ( + structure T : MLTREE + structure Cells : CELLS + val intTy : T.ty (* size of integer word *) + + (* This is a list of possible data widths to promote to. + * The list must be in increasing sizes. + * We'll try to promote to the next largest size. + *) + val naturalWidths : T.ty list + + (* + * Are integers of widths less than the size of integer word. + * automatically sign extended, zero extended, or neither. + * When in doubt, choose neither since it is conservative. + *) + datatype rep = SE | ZE | NEITHER + val rep : rep + + ) : MLTREEGEN = struct + + structure T = T + structure Size = MLTreeSize(structure T = T val intTy = intTy) + structure C = CellsBasis + + fun error msg = MLRiscErrorMsg.error("MLTreeGen",msg) + fun unsupported what = error ("unsupported: " ^ what) + + val zeroT = T.LI 0 + fun LI i = T.LI(T.I.fromInt(intTy, i)) + + fun condOf(T.CC(cc,_)) = cc + | condOf(T.CMP(_,cc,_,_)) = cc + | condOf(T.CCMARK(cc,_)) = condOf cc + | condOf _ = error "condOf" + + fun fcondOf(T.FCC(fcc,_)) = fcc + | fcondOf(T.FCMP(_,fcc,_,_)) = fcc + | fcondOf(T.CCMARK(cc,_)) = fcondOf cc + | fcondOf _ = error "fcondOf" + + val W = intTy + + (* To compute f.ty(a,b) + * + * let r1 <- a << (intTy - ty) + * r2 <- b << (intTy - ty) + * r3 <- f(a,b) + * in r3 ~>> (intTy - ty) end + * + * Lal showed me this neat trick! + *) + fun arith(rightShift,f,ty,a,b) = + let val shift = LI(W-ty) + in rightShift(W,f(W,T.SLL(W,a,shift),T.SLL(W,b,shift)),shift) + end + + fun promoteTy(ty) = + let fun loop([]) = + unsupported("can't promote integer width "^Int.toString ty) + | loop(t::ts) = if t > ty then t else loop ts + in loop(naturalWidths) end + + fun promotable rightShift (e, f, ty, a, b) = + case naturalWidths of + [] => arith(rightShift,f,ty,a,b) + | _ => f(promoteTy(ty), a, b) + + fun isNatural w = let + fun loop [] = false + | loop (h :: t) = h = w orelse w > h andalso loop t + in + loop naturalWidths + end + + (* Implement division with round-to-negative-infinity in terms + * of division with round-to-zero. + * The logic is as follows: + * - if q > 0, then we are done since any rounding was + * at the same time TO_ZERO and TO_NEGINF + * (This is the fast path that does not require calculating the remainder.) + * - otherwise we calculate r and see if it is zero; if so, no adjustment + * - finally if r and b have the same sign (i.e., r XOR b >= 0) + * we still don't need adjustment + * - otherwise adjust + * + * Instruction selection for machines (e.g., x86) where the hardware returns both + * q and r anyway should implement this logic directly. + *) + fun divinf (xdiv, ty, aexp, bexp) = let + val a = Cells.newReg () + val b = Cells.newReg () + val q = Cells.newReg () + val r = Cells.newReg () + val zero = T.LI 0 + val one = T.LI 1 + in + T.LET + (T.SEQ + [T.MV (ty, a, aexp), + T.MV (ty, b, bexp), + T.MV (ty, q, xdiv (T.DIV_TO_ZERO, ty, T.REG (ty, a), T.REG (ty, b))), + T.IF (T.CMP (ty, T.Basis.GT, T.REG (ty, q), zero), + T.SEQ [], + T.SEQ + [T.MV (ty, r, T.SUB (ty, T.REG (ty, a), + T.MULS (ty, T.REG (ty, b), + T.REG (ty, q)))), + T.IF (T.CMP (ty, T.Basis.EQ, T.REG (ty, r), zero), + T.SEQ [], + T.IF (T.CMP (ty, T.Basis.GE, + T.XORB (ty, T.REG (ty, b), T.REG (ty, r)), + zero), + T.SEQ [], + T.MV (ty, q, T.SUB (ty, T.REG (ty, q), + one))))])], + T.REG (ty, q)) + end + + (* Same for rem when rounding to negative infinity. + * Since we have to return (and therefore calculate) the remainder anyway, + * we can skip the q > 0 test because it will be caught by the samesign(r,b) + * test. + * + * The odd case is when a = MININT and b = -1 in which case the DIVS op + * will overflow and trap on some machines. On others the result + * will be bogus. Should we fix that? *) + fun reminf (ty, aexp, bexp) = let + val a = Cells.newReg () + val b = Cells.newReg () + val q = Cells.newReg () + val r = Cells.newReg () + val zero = T.LI 0 + in + T.LET + (T.SEQ + [T.MV (ty, a, aexp), + T.MV (ty, b, bexp), + T.MV (ty, q, T.DIVS (T.DIV_TO_ZERO, ty, T.REG (ty, a), + T.REG (ty, b))), + T.MV (ty, r, T.SUB (ty, T.REG (ty, a), + T.MULS (ty, T.REG (ty, q), + T.REG (ty, b)))), + T.IF (T.CMP (ty, T.Basis.EQ, T.REG (ty, r), zero), + T.SEQ [], + T.IF (T.CMP (ty, T.Basis.GE, + T.XORB (ty, T.REG (ty, b), T.REG (ty, r)), + zero), + T.SEQ [], + T.MV (ty, r, T.ADD (ty, T.REG (ty, r), T.REG (ty, b)))))], + T.REG (ty, r)) + end + + (* Same for rem when rounding to zero. *) + fun remzero (xdiv, xmul, ty, aexp, bexp) = let + val a = Cells.newReg () + val b = Cells.newReg () + in + T.LET (T.SEQ [T.MV (ty, a, aexp), + T.MV (ty, b, bexp)], + T.SUB (ty, T.REG (ty, a), + xmul (ty, T.REG (ty, b), + xdiv (T.DIV_TO_ZERO, ty, T.REG (ty, a), + T.REG (ty, b))))) + end + + (* + * Translate integer expressions of unknown types into the appropriate + * term. + *) + + fun DIVREMz d (ty, a, b) = d (T.DIV_TO_ZERO, ty, a, b) + + fun compileRexp(exp) = + case exp of + T.CONST c => T.LABEXP exp + + (* non overflow trapping ops *) + | T.NEG(ty,a) => T.SUB(ty, zeroT, a) + | T.ADD(ty,a,b) => promotable T.SRA (exp,T.ADD,ty,a,b) + | T.SUB(ty,a,b) => promotable T.SRA (exp,T.SUB,ty,a,b) + | T.MULS(ty,a,b) => promotable T.SRA (exp,T.MULS,ty,a,b) + | T.DIVS(T.DIV_TO_ZERO,ty,a,b) => + promotable T.SRA (exp,DIVREMz T.DIVS,ty,a,b) + | T.DIVS(T.DIV_TO_NEGINF,ty,a,b) => divinf (T.DIVS,ty,a,b) + | T.REMS(T.DIV_TO_ZERO,ty,a,b) => + if isNatural ty then remzero (T.DIVS,T.MULS,ty,a,b) + else promotable T.SRA (exp,DIVREMz T.REMS,ty,a,b) + | T.REMS(T.DIV_TO_NEGINF,ty,a,b) => reminf (ty,a,b) + | T.MULU(ty,a,b) => promotable T.SRL (exp,T.MULU,ty,a,b) + | T.DIVU(ty,a,b) => promotable T.SRL (exp,T.DIVU,ty,a,b) + | T.REMU(ty,a,b) => + if isNatural ty then + remzero (fn (_,ty,a,b) => T.DIVU (ty,a,b),T.MULU,ty,a,b) + else promotable T.SRL (exp,T.REMU,ty,a,b) + + (* for overflow trapping ops; we have to do the simulation *) + | T.NEGT(ty,a) => T.SUBT(ty,zeroT,a) + | T.ADDT(ty,a,b) => arith (T.SRA,T.ADDT,ty,a,b) + | T.SUBT(ty,a,b) => arith (T.SRA,T.SUBT,ty,a,b) + | T.MULT(ty,a,b) => arith (T.SRA,T.MULT,ty,a,b) + | T.DIVT(T.DIV_TO_ZERO,ty,a,b) => arith (T.SRA,DIVREMz T.DIVT,ty,a,b) + | T.DIVT(T.DIV_TO_NEGINF,ty,a,b) => divinf (T.DIVT,ty,a,b) + + (* conditional evaluation rules *) +(*** XXX: Seems wrong. + | T.COND(ty,T.CC(cond,r),x,y) => + T.COND(ty,T.CMP(ty,cond,T.REG(ty,r),zeroT),x,y) +***) + | T.COND(ty,T.CCMARK(cc,a),x,y) => T.MARK(T.COND(ty,cc,x,y),a) +(*** XXX: TODO + | T.COND(ty,T.CMP(t,cc,e1,e2),x as (T.LI 0 | T.LI32 0w0),y) => + T.COND(ty,T.CMP(t,T.Basis.negateCond cc,e1,e2),y,T.LI 0) + (* we'll let others strength reduce the multiply *) +***) + | T.COND(ty,cc as T.FCMP _, yes, no) => let + val tmp = Cells.newReg() + in + T.LET( + T.SEQ[T.MV(ty, tmp, no), T.IF(cc, T.MV(ty, tmp, yes), T.SEQ [])], + T.REG(ty,tmp)) + end +(*** XXX: TODO + | T.COND(ty,cc,e1,(T.LI 0 | T.LI32 0w0)) => + T.MULU(ty,T.COND(ty,cc,T.LI 1,T.LI 0),e1) + | T.COND(ty,cc,T.LI m,T.LI n) => + T.ADD(ty,T.MULU(ty,T.COND(ty,cc,T.LI 1,T.LI 0),T.LI(m-n)),T.LI n) +***) + + | T.COND(ty,cc,e1,e2) => + T.ADD(ty,T.MULU(ty,T.COND(ty,cc,T.LI 1,zeroT),T.SUB(ty,e1,e2)),e2) + + (* ones-complement. + * WARNING: we are assuming two's complement architectures here. + * Are there any architectures in use nowadays that doesn't use + * two's complement for integer arithmetic? + *) + | T.NOTB(ty,e) => T.XORB(ty,e,T.LI ~1) + + (* + * Default ways of converting integers to integers + *) + | T.SX(ty,fromTy,e) => + if fromTy = ty then e + else if rep = SE andalso fromTy < ty andalso + fromTy >= hd naturalWidths then e + else + let val shift = T.LI(T.I.fromInt(intTy, W - fromTy)) + in T.SRA(W,T.SLL(W,e,shift),shift) + end + | T.ZX(toTy, fromTy, e) => let + (* NOTE: we cannot assume that the high bits of the source are zero! *) + fun mask 8 = T.ANDB(toTy, e, T.LI 0xff) + | mask 16 = T.ANDB(toTy,e,T.LI 0xffff) + | mask 32 = T.ANDB(toTy,e,T.LI 0xffffffff) + | mask 64 = e + | mask _ = unsupported "unknown expression" + in + if (fromTy = toTy) then e + else if (fromTy < toTy) then mask fromTy + else mask toTy + end + + (* + * Converting floating point to integers. + * The following rule handles the case when ty is not + * one of the naturally supported widths on the machine. + *) + | T.CVTF2I(ty,round,fty,e) => + let val ty' = promoteTy(ty) + in T.SX(ty,ty',T.CVTF2I(ty',round,fty,e)) + end + + (* Promote to higher width and zero high bits *) + | T.SLL(ty, data, shift) => + let val ty' = promoteTy(ty) + in T.ZX(ty, ty', T.SLL(ty', data, shift)) end + + | exp => unsupported("unknown expression") + + fun compileFexp fexp = unsupported("unknown expression") + + fun mark(s,[]) = s + | mark(s,a::an) = mark(T.ANNOTATION(s,a),an) + + fun compileStm (T.SEQ s) = s + | compileStm (T.IF(cond,T.JMP(T.LABEL L,_),T.SEQ [])) = + [T.BCC(cond,L)] + | compileStm (T.IF(cond,yes,no)) = + let val L1 = Label.anon() + val L2 = Label.anon() + in [T.BCC(cond,L1), + no, + T.JMP(T.LABEL L2,[]), + T.DEFINE L1, + yes, + T.DEFINE L2 + ] + end + | compileStm stm = error "compileStm" + + (* + * This function translations conditional expressions into a + * branch sequence. + * Note: we'll actually take advantage of the fact that + * e1 and e2 are allowed to be eagerly evaluated. + *) + fun compileCond{exp=(ty,ccexp,e1,e2),rd,an} = + let val L1 = Label.anon() + in [T.MV(ty,rd,e1), + mark(T.BCC(ccexp,L1),an), + T.MV(ty,rd,e2), + T.DEFINE L1 + ] + end + fun compileFcond{exp=(fty,ccexp,e1,e2),fd,an} = + let val L1 = Label.anon() + in [T.FMV(fty,fd,e1), + mark(T.BCC(ccexp,L1),an), + T.FMV(fty,fd,e2), + T.DEFINE L1 + ] + end + +end diff --git a/MLRISC/mltree/mltree-hash.sig b/MLRISC/mltree/mltree-hash.sig new file mode 100644 index 0000000..c577a5d --- /dev/null +++ b/MLRISC/mltree/mltree-hash.sig @@ -0,0 +1,17 @@ +(* mltree-hash.sig + * + * COPYRIGHT (c) 2001 Lucent Technologies, Bell Laboratories. + * + * Utilities to hash mltree expressions + *) + +signature MLTREE_HASH = sig + structure T : MLTREE + val hash : T.labexp -> word + + val hashStm : T.stm -> word + val hashRexp : T.rexp -> word + val hashFexp : T.fexp -> word + val hashCCexp : T.ccexp -> word + +end \ No newline at end of file diff --git a/MLRISC/mltree/mltree-hash.sml b/MLRISC/mltree/mltree-hash.sml new file mode 100644 index 0000000..fd31896 --- /dev/null +++ b/MLRISC/mltree/mltree-hash.sml @@ -0,0 +1,179 @@ +(* mltree-hash.sml + * + * COPYRIGHT (c) 2002 Bell Labs, Lucent Technologies. + *) + +functor MLTreeHash + (structure T : MLTREE + (* Hashing extensions *) + val hashSext : T.hasher -> T.sext -> word + val hashRext : T.hasher -> T.rext -> word + val hashFext : T.hasher -> T.fext -> word + val hashCCext : T.hasher -> T.ccext -> word + ) : MLTREE_HASH = +struct + structure T = T + structure I = T.I + structure Constant = T.Constant + structure B = T.Basis + structure C = CellsBasis + structure W = Word + + val w = W.fromInt + val i2s = Int.toString + val toLower = String.map Char.toLower + + fun error msg = MLRiscErrorMsg.error("LabelExp",msg) + fun wv(C.CELL{id, ...}) = w id + fun wvs is = + let fun f([],h) = h + | f(i::is,h) = f(is,wv i+h) + in f(is,0w0) end + + (* + * Hashing + *) + val hashLabel = Label.hash + fun hasher() = {stm=hashStm, rexp=hashRexp, fexp=hashFexp, ccexp=hashCCexp} + and hashCtrl ctrl = wv ctrl + and hashStm stm = + case stm of + T.MV(t,dst,rexp) => 0w123 + w t + wv dst + hashRexp rexp + | T.CCMV(dst,ccexp) => 0w1234 + wv dst + hashCCexp ccexp + | T.FMV(fty,dst,fexp) => 0w12345 + w fty + wv dst + hashFexp fexp + | T.COPY(ty,dst,src) => 0w234 + w ty + wvs dst + wvs src + | T.FCOPY(fty,dst,src) => 0w456 + w fty + wvs dst + wvs src + | T.JMP(ea,labels) => 0w45 + hashRexp ea + | T.CALL{funct,targets,defs,uses,region,pops} => + hashRexp funct + hashMlriscs defs + hashMlriscs uses + | T.RET _ => 0w567 + | T.STORE(ty,ea,data,mem) => 0w888 + w ty + hashRexp ea + hashRexp data + | T.FSTORE(fty,ea,data,mem) => 0w7890 + w fty + hashRexp ea + hashFexp data + | T.BCC(a,lab) => 0w233 + hashCCexp a + hashLabel lab + | T.IF(a,b,c) => 0w233 + hashCCexp a + hashStm b + hashStm c + | T.ANNOTATION(stm, a) => hashStm stm + | T.PHI{preds,block} => w block + | T.SOURCE => 0w123 + | T.SINK => 0w423 + | T.REGION(stm,ctrl) => hashStm stm + hashCtrl ctrl + | T.RTL{hash,...} => hash + | T.SEQ ss => hashStms(ss, 0w23) + | T.ASSIGN(ty,lhs,rhs) => w ty + hashRexp lhs + hashRexp rhs + | _ => error "hashStm" + + and hashStms([],h) = h + | hashStms(s::ss,h) = hashStms(ss,hashStm s + h) + + and hashMlrisc(T.CCR ccexp) = hashCCexp ccexp + | hashMlrisc(T.GPR rexp) = hashRexp rexp + | hashMlrisc(T.FPR fexp) = hashFexp fexp + + and hashMlriscs [] = 0w123 + | hashMlriscs(m::ms) = hashMlrisc m + hashMlriscs ms + + and hash2(ty,x,y) = w ty + hashRexp x + hashRexp y + + and hashm T.DIV_TO_ZERO = 0w158 + | hashm T.DIV_TO_NEGINF = 0w159 + + and hash3(m,ty,x,y) = hashm m + w ty + hashRexp x + hashRexp y + + and hashRexp rexp = + case rexp + of T.REG(ty, src) => w ty + wv src + | T.LI i => I.hash i + | T.LABEL l => hashLabel l + | T.LABEXP le => hashRexp rexp + | T.CONST c => Constant.hash c + | T.NEG(ty, x) => w ty + hashRexp x + 0w24 + | T.ADD x => hash2 x + 0w234 + | T.SUB x => hash2 x + 0w456 + | T.MULS x => hash2 x + 0w2131 + | T.DIVS x => hash3 x + 0w156 + | T.REMS x => hash3 x + 0w231 + | T.MULU x => hash2 x + 0w123 + | T.DIVU x => hash2 x + 0w1234 + | T.REMU x => hash2 x + 0w211 + | T.NEGT(ty, x) => w ty + hashRexp x + 0w1224 + | T.ADDT x => hash2 x + 0w1219 + | T.SUBT x => hash2 x + 0w999 + | T.MULT x => hash2 x + 0w7887 + | T.DIVT x => hash3 x + 0w88884 + | T.ANDB x => hash2 x + 0w12312 + | T.ORB x => hash2 x + 0w558 + | T.XORB x => hash2 x + 0w234 + | T.EQVB x => hash2 x + 0w734 + | T.NOTB(ty, x) => w ty + hashRexp x + | T.SRA x => hash2 x + 0w874 + | T.SRL x => hash2 x + 0w223 + | T.SLL x => hash2 x + 0w499 + | T.COND(ty,e,e1,e2) => w ty + hashCCexp e + hashRexp e1 + hashRexp e2 + | T.SX(ty, ty', rexp) => 0w232 + w ty + w ty' + hashRexp rexp + | T.ZX(ty, ty', rexp) => 0w737 + w ty + w ty' + hashRexp rexp + | T.CVTF2I(ty, round, ty', fexp) => + w ty + B.hashRoundingMode round + w ty' + hashFexp fexp + | T.LOAD(ty, ea, mem) => w ty + hashRexp ea + 0w342 + | T.LET(stm, rexp) => hashStm stm + hashRexp rexp + | T.PRED(e, ctrl) => hashRexp e + hashCtrl ctrl + | T.MARK(e, _) => hashRexp e + | T.REXT(ty, rext) => w ty + hashRext (hasher()) rext + | T.??? => 0w485 + | T.OP(ty,oper,es) => hashRexps(es, w ty + hashOper oper) + | T.ARG _ => 0w23 + | T.$(ty, k, e) => w ty + hashRexp e + | T.PARAM n => w n + | T.BITSLICE(ty, sl, e) => w ty + hashRexp e + + and hashOper(T.OPER{hash, ...}) = hash + + and hashRexps([],h) = h + | hashRexps(e::es,h) = hashRexps(es,hashRexp e + h) + + and hash2'(ty,x,y) = w ty + hashFexp x + hashFexp y + + and hashFexp fexp = + case fexp of + T.FREG(fty, src) => w fty + wv src + | T.FLOAD(fty, ea, mem) => w fty + hashRexp ea + | T.FADD x => hash2' x + 0w123 + | T.FMUL x => hash2' x + 0w1234 + | T.FSUB x => hash2' x + 0w12345 + | T.FDIV x => hash2' x + 0w234 + | T.FCOPYSIGN x => hash2' x + 0w883 + | T.FCOND(fty,c,x,y) => w fty + hashCCexp c + hashFexp x + hashFexp y + | T.FABS(fty, fexp) => w fty + hashFexp fexp + 0w2345 + | T.FNEG(fty, fexp) => w fty + hashFexp fexp + 0w23456 + | T.FSQRT(fty, fexp) => w fty + hashFexp fexp + 0w345 + | T.CVTI2F(fty, ty, rexp) => w fty + w ty + hashRexp rexp + | T.CVTF2F(fty, fty', fexp) => w fty + hashFexp fexp + w fty' + | T.FMARK(e, _) => hashFexp e + | T.FPRED(e, ctrl) => hashFexp e + hashCtrl ctrl + | T.FEXT(fty, fext) => w fty + hashFext (hasher()) fext + + and hashFexps([],h) = h + | hashFexps(e::es,h) = hashFexps(es,hashFexp e + h) + + and hashCCexp ccexp = + case ccexp of + T.CC(cc, src) => B.hashCond cc + wv src + | T.FCC(fcc, src) => B.hashFcond fcc + wv src + | T.CMP(ty, cond, x, y) => + w ty + B.hashCond cond + hashRexp x + hashRexp y + | T.FCMP(fty, fcond, x, y) => + w fty + B.hashFcond fcond + hashFexp x + hashFexp y + | T.NOT x => 0w2321 + hashCCexp x + | T.AND(x,y) => 0w2321 + hashCCexp x + hashCCexp y + | T.OR(x,y) => 0w8721 + hashCCexp x + hashCCexp y + | T.XOR(x,y) => 0w6178 + hashCCexp x + hashCCexp y + | T.EQV(x,y) => 0w178 + hashCCexp x + hashCCexp y + | T.TRUE => 0w0 + | T.FALSE => 0w1232 + | T.CCMARK(e, _) => hashCCexp e + | T.CCEXT(ty,ccext) => w ty + hashCCext (hasher()) ccext + + and hashCCexps([],h) = h + | hashCCexps(e::es,h) = hashCCexps(es,hashCCexp e + h) + + + val hash = hashRexp +end diff --git a/MLRISC/mltree/mltree-labexp.sig b/MLRISC/mltree/mltree-labexp.sig new file mode 100644 index 0000000..19ce552 --- /dev/null +++ b/MLRISC/mltree/mltree-labexp.sig @@ -0,0 +1,41 @@ +(* labelExp.sml -- expressions involving labels + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * + *) +signature LABELEXP = +sig + structure T : MLTREE + val valueOf : T.labexp -> int + val toString : T.labexp -> string + val hash : T.labexp -> word + val == : T.labexp * T.labexp -> bool + + (* + * Hashing + *) + val hashStm : T.stm -> word + val hashRexp : T.rexp -> word + val hashFexp : T.fexp -> word + val hashCCexp : T.ccexp -> word + + (* + * Equality + *) + val eqStm : T.stm * T.stm -> bool + val eqRexp : T.rexp * T.rexp -> bool + val eqFexp : T.fexp * T.fexp -> bool + val eqCCexp : T.ccexp * T.ccexp -> bool + val eqMlriscs : T.mlrisc list * T.mlrisc list -> bool + + (* + * Value + *) + exception NonConst + val eval : {const:T.Constant.const -> IntInf.int, + label:Label.label -> int} -> + {rexp : T.rexp -> IntInf.int, + ccexp : T.ccexp -> bool + } +end + diff --git a/MLRISC/mltree/mltree-labexp.sml b/MLRISC/mltree/mltree-labexp.sml new file mode 100644 index 0000000..e3d124f --- /dev/null +++ b/MLRISC/mltree/mltree-labexp.sml @@ -0,0 +1,481 @@ +(* labelExp.sml -- expressions involving labels + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * + *) + +functor LabelExp + (structure T : MLTREE + (* Hashing extensions *) + val hashSext : T.hasher -> T.sext -> word + val hashRext : T.hasher -> T.rext -> word + val hashFext : T.hasher -> T.fext -> word + val hashCCext : T.hasher -> T.ccext -> word + (* Equality extensions *) + val eqSext : T.equality -> T.sext * T.sext -> bool + val eqRext : T.equality -> T.rext * T.rext -> bool + val eqFext : T.equality -> T.fext * T.fext -> bool + val eqCCext : T.equality -> T.ccext * T.ccext -> bool + (* assembly output *) + val labelFmt : {gPrefix : string, aPrefix: string} + ) : LABELEXP = +struct + + structure T = T + structure I = T.I + structure Constant = T.Constant + structure B = T.Basis + structure C = CellsBasis + structure W = Word + + val w = W.fromInt + val i2s = Int.toString + val toLower = String.map Char.toLower + + fun error msg = MLRiscErrorMsg.error("LabelExp",msg) + fun wv(C.CELL{id, ...}) = w id + fun wvs is = + let fun f([],h) = h + | f(i::is,h) = f(is,wv i+h) + in f(is,0w0) end + + (* + * Hashing + *) + val hashLabel = Label.hash + fun hasher() = {stm=hashStm, rexp=hashRexp, fexp=hashFexp, ccexp=hashCCexp} + and hashCtrl ctrl = wv ctrl + and hashStm stm = + case stm of + T.MV(t,dst,rexp) => 0w123 + w t + wv dst + hashRexp rexp + | T.CCMV(dst,ccexp) => 0w1234 + wv dst + hashCCexp ccexp + | T.FMV(fty,dst,fexp) => 0w12345 + w fty + wv dst + hashFexp fexp + | T.COPY(ty,dst,src) => 0w234 + w ty + wvs dst + wvs src + | T.FCOPY(fty,dst,src) => 0w456 + w fty + wvs dst + wvs src + | T.JMP(ea,labels) => 0w45 + hashRexp ea + | T.CALL{funct,targets,defs,uses,region,pops} => + hashRexp funct + hashMlriscs defs + hashMlriscs uses + | T.RET _ => 0w567 + | T.STORE(ty,ea,data,mem) => 0w888 + w ty + hashRexp ea + hashRexp data + | T.FSTORE(fty,ea,data,mem) => 0w7890 + w fty + hashRexp ea + hashFexp data + | T.BCC(a,lab) => 0w233 + hashCCexp a + hashLabel lab + | T.IF(a,b,c) => 0w233 + hashCCexp a + hashStm b + hashStm c + | T.ANNOTATION(stm, a) => hashStm stm + | T.PHI{preds,block} => w block + | T.SOURCE => 0w123 + | T.SINK => 0w423 + | T.REGION(stm,ctrl) => hashStm stm + hashCtrl ctrl + | T.RTL{hash,...} => hash + | T.SEQ ss => hashStms(ss, 0w23) + | T.ASSIGN(ty,lhs,rhs) => w ty + hashRexp lhs + hashRexp rhs + | _ => error "hashStm" + + and hashStms([],h) = h + | hashStms(s::ss,h) = hashStms(ss,hashStm s + h) + + and hashMlrisc(T.CCR ccexp) = hashCCexp ccexp + | hashMlrisc(T.GPR rexp) = hashRexp rexp + | hashMlrisc(T.FPR fexp) = hashFexp fexp + + and hashMlriscs [] = 0w123 + | hashMlriscs(m::ms) = hashMlrisc m + hashMlriscs ms + + and hash2(ty,x,y) = w ty + hashRexp x + hashRexp y + + and hashRexp rexp = + case rexp of + T.REG(ty, src) => w ty + wv src + | T.LI i => I.hash i + | T.LABEL l => hashLabel l + | T.LABEXP le => hashRexp rexp + | T.CONST c => Constant.hash c + | T.NEG(ty, x) => w ty + hashRexp x + 0w24 + | T.ADD x => hash2 x + 0w234 + | T.SUB x => hash2 x + 0w456 + | T.MULS x => hash2 x + 0w2131 + | T.DIVS x => hash2 x + 0w156 + | T.QUOTS x => hash2 x + 0w1565 + | T.REMS x => hash2 x + 0w231 + | T.MULU x => hash2 x + 0w123 + | T.DIVU x => hash2 x + 0w1234 + | T.REMU x => hash2 x + 0w211 + | T.NEGT(ty, x) => w ty + hashRexp x + 0w1224 + | T.ADDT x => hash2 x + 0w1219 + | T.SUBT x => hash2 x + 0w999 + | T.MULT x => hash2 x + 0w7887 + | T.DIVT x => hash2 x + 0w88884 + | T.QUOTT x => hash2 x + 0w8884 + | T.REMT x => hash2 x + 0w99 + | T.ANDB x => hash2 x + 0w12312 + | T.ORB x => hash2 x + 0w558 + | T.XORB x => hash2 x + 0w234 + | T.EQVB x => hash2 x + 0w734 + | T.NOTB(ty, x) => w ty + hashRexp x + | T.SRA x => hash2 x + 0w874 + | T.SRL x => hash2 x + 0w223 + | T.SLL x => hash2 x + 0w499 + | T.COND(ty,e,e1,e2) => w ty + hashCCexp e + hashRexp e1 + hashRexp e2 + | T.SX(ty, ty', rexp) => 0w232 + w ty + w ty' + hashRexp rexp + | T.ZX(ty, ty', rexp) => 0w737 + w ty + w ty' + hashRexp rexp + | T.CVTF2I(ty, round, ty', fexp) => + w ty + B.hashRoundingMode round + w ty' + hashFexp fexp + | T.LOAD(ty, ea, mem) => w ty + hashRexp ea + 0w342 + | T.LET(stm, rexp) => hashStm stm + hashRexp rexp + | T.PRED(e, ctrl) => hashRexp e + hashCtrl ctrl + | T.MARK(e, _) => hashRexp e + | T.REXT(ty, rext) => w ty + hashRext (hasher()) rext + | T.??? => 0w485 + | T.OP(ty,oper,es) => hashRexps(es, w ty + hashOper oper) + | T.ARG _ => 0w23 + | T.$(ty, k, e) => w ty + hashRexp e + | T.PARAM n => w n + | T.BITSLICE(ty, sl, e) => w ty + hashRexp e + + and hashOper(T.OPER{hash, ...}) = hash + + and hashRexps([],h) = h + | hashRexps(e::es,h) = hashRexps(es,hashRexp e + h) + + and hash2'(ty,x,y) = w ty + hashFexp x + hashFexp y + + and hashFexp fexp = + case fexp of + T.FREG(fty, src) => w fty + wv src + | T.FLOAD(fty, ea, mem) => w fty + hashRexp ea + | T.FADD x => hash2' x + 0w123 + | T.FMUL x => hash2' x + 0w1234 + | T.FSUB x => hash2' x + 0w12345 + | T.FDIV x => hash2' x + 0w234 + | T.FCOPYSIGN x => hash2' x + 0w883 + | T.FCOND(fty,c,x,y) => w fty + hashCCexp c + hashFexp x + hashFexp y + | T.FABS(fty, fexp) => w fty + hashFexp fexp + 0w2345 + | T.FNEG(fty, fexp) => w fty + hashFexp fexp + 0w23456 + | T.FSQRT(fty, fexp) => w fty + hashFexp fexp + 0w345 + | T.CVTI2F(fty, ty, rexp) => w fty + w ty + hashRexp rexp + | T.CVTF2F(fty, fty', fexp) => w fty + hashFexp fexp + w fty' + | T.FMARK(e, _) => hashFexp e + | T.FPRED(e, ctrl) => hashFexp e + hashCtrl ctrl + | T.FEXT(fty, fext) => w fty + hashFext (hasher()) fext + + and hashFexps([],h) = h + | hashFexps(e::es,h) = hashFexps(es,hashFexp e + h) + + and hashCCexp ccexp = + case ccexp of + T.CC(cc, src) => B.hashCond cc + wv src + | T.FCC(fcc, src) => B.hashFcond fcc + wv src + | T.CMP(ty, cond, x, y) => + w ty + B.hashCond cond + hashRexp x + hashRexp y + | T.FCMP(fty, fcond, x, y) => + w fty + B.hashFcond fcond + hashFexp x + hashFexp y + | T.NOT x => 0w2321 + hashCCexp x + | T.AND(x,y) => 0w2321 + hashCCexp x + hashCCexp y + | T.OR(x,y) => 0w8721 + hashCCexp x + hashCCexp y + | T.XOR(x,y) => 0w6178 + hashCCexp x + hashCCexp y + | T.EQV(x,y) => 0w178 + hashCCexp x + hashCCexp y + | T.TRUE => 0w0 + | T.FALSE => 0w1232 + | T.CCMARK(e, _) => hashCCexp e + | T.CCEXT(ty,ccext) => w ty + hashCCext (hasher()) ccext + + and hashCCexps([],h) = h + | hashCCexps(e::es,h) = hashCCexps(es,hashCCexp e + h) + + val eqLabel = Label.same + fun eqLabels([],[]) = true + | eqLabels(a::b,c::d) = eqLabel(a,c) andalso eqLabels(b,d) + | eqLabels _ = false + and eqCell(C.CELL{id=x, ...},C.CELL{id=y, ...}) = x=y + and eqCells([], []) = true + | eqCells(x::xs,y::ys) = eqCell(x,y) andalso eqCells(xs,ys) + | eqCells _ = false + and eqCopy((t1,dst1,src1),(t2,dst2,src2)) = + t1=t2 andalso eqCells(dst1,dst2) andalso eqCells(src1,src2) + and eqCtrl(c1,c2) = eqCell(c1,c2) + and eqCtrls(c1,c2) = eqCells(c1,c2) + + (* statements *) + and equality() = {stm=eqStm, rexp=eqRexp, fexp=eqFexp, ccexp=eqCCexp} + and eqStm(T.MV(a,b,c),T.MV(d,e,f)) = + a=d andalso eqCell(b,e) andalso eqRexp(c,f) + | eqStm(T.CCMV(a,b),T.CCMV(c,d)) = eqCell(a,c) andalso eqCCexp(b,d) + | eqStm(T.FMV(a,b,c),T.FMV(d,e,f)) = + a=d andalso eqCell(b,e) andalso eqFexp(c,f) + | eqStm(T.COPY x,T.COPY y) = eqCopy(x,y) + | eqStm(T.FCOPY x,T.FCOPY y) = eqCopy(x,y) + | eqStm(T.JMP(a,b),T.JMP(a',b')) = eqRexp(a,a') + | eqStm(T.CALL{funct=a,defs=b,uses=c,...}, + T.CALL{funct=d,defs=e,uses=f,...}) = + eqRexp(a,d) andalso eqMlriscs(b,e) andalso eqMlriscs(c,f) + | eqStm(T.RET _,T.RET _) = true + | eqStm(T.STORE(a,b,c,_),T.STORE(d,e,f,_)) = + a=d andalso eqRexp(b,e) andalso eqRexp(c,f) + | eqStm(T.FSTORE(a,b,c,_),T.FSTORE(d,e,f,_)) = + a=d andalso eqRexp(b,e) andalso eqFexp(c,f) + | eqStm(T.ANNOTATION(s1, _),s2) = eqStm(s1,s2) + | eqStm(s1,T.ANNOTATION(s2, _)) = eqStm(s1,s2) + | eqStm(T.PHI x,T.PHI y) = x=y + | eqStm(T.SOURCE,T.SOURCE) = true + | eqStm(T.SINK,T.SINK) = true + | eqStm(T.BCC(b,c),T.BCC(b',c')) = + eqCCexp(b,b') andalso eqLabel(c,c') + | eqStm(T.IF(b,c,d),T.IF(b',c',d')) = + eqCCexp(b,b') andalso eqStm(c,c') andalso eqStm(d,d') + | eqStm(T.RTL{attribs=x,...},T.RTL{attribs=y,...}) = x=y + | eqStm(T.REGION(a,b),T.REGION(a',b')) = eqCtrl(b,b') andalso eqStm(a,a') + | eqStm(T.EXT a,T.EXT a') = eqSext (equality()) (a,a') + | eqStm _ = false + + and eqStms([],[]) = true + | eqStms(a::b,c::d) = eqStm(a,c) andalso eqStms(b,d) + | eqStms _ = false + + and eqMlrisc(T.CCR a,T.CCR b) = eqCCexp(a,b) + | eqMlrisc(T.GPR a,T.GPR b) = eqRexp(a,b) + | eqMlrisc(T.FPR a,T.FPR b) = eqFexp(a,b) + | eqMlrisc _ = false + + and eqMlriscs([],[]) = true + | eqMlriscs(a::b,c::d) = eqMlrisc(a,c) andalso eqMlriscs(b,d) + | eqMlriscs _ = false + + and eq2((a,b,c),(d,e,f)) = a=d andalso eqRexp(b,e) andalso eqRexp(c,f) + + and eqRexp(T.REG(a,b),T.REG(c,d)) = a=c andalso eqCell(b,d) + | eqRexp(T.LI a,T.LI b) = a=b + | eqRexp(T.LABEL a,T.LABEL b) = eqLabel(a,b) + | eqRexp(T.LABEXP a,T.LABEXP b) = eqRexp(a,b) + | eqRexp(T.CONST a,T.CONST b) = Constant.==(a,b) + | eqRexp(T.NEG(t,x),T.NEG(t',x')) = t = t' andalso eqRexp(x,x') + | eqRexp(T.ADD x,T.ADD y) = eq2(x,y) + | eqRexp(T.SUB x,T.SUB y) = eq2(x,y) + | eqRexp(T.MULS x,T.MULS y) = eq2(x,y) + | eqRexp(T.DIVS x,T.DIVS y) = eq2(x,y) + | eqRexp(T.QUOTS x,T.QUOTS y) = eq2(x,y) + | eqRexp(T.REMS x,T.REMS y) = eq2(x,y) + | eqRexp(T.MULU x,T.MULU y) = eq2(x,y) + | eqRexp(T.DIVU x,T.DIVU y) = eq2(x,y) + | eqRexp(T.REMU x,T.REMU y) = eq2(x,y) + | eqRexp(T.NEGT(t,x),T.NEGT(t',x')) = t = t' andalso eqRexp(x,x') + | eqRexp(T.ADDT x,T.ADDT y) = eq2(x,y) + | eqRexp(T.SUBT x,T.SUBT y) = eq2(x,y) + | eqRexp(T.MULT x,T.MULT y) = eq2(x,y) + | eqRexp(T.DIVT x,T.DIVT y) = eq2(x,y) + | eqRexp(T.QUOTT x,T.QUOTT y) = eq2(x,y) + | eqRexp(T.REMT x,T.REMT y) = eq2(x,y) + | eqRexp(T.ANDB x,T.ANDB y) = eq2(x,y) + | eqRexp(T.ORB x,T.ORB y) = eq2(x,y) + | eqRexp(T.XORB x,T.XORB y) = eq2(x,y) + | eqRexp(T.EQVB x,T.EQVB y) = eq2(x,y) + | eqRexp(T.NOTB(a,b),T.NOTB(c,d)) = a=c andalso eqRexp(b,d) + | eqRexp(T.SRA x,T.SRA y) = eq2(x,y) + | eqRexp(T.SRL x,T.SRL y) = eq2(x,y) + | eqRexp(T.SLL x,T.SLL y) = eq2(x,y) + | eqRexp(T.COND(a,b,c,d),T.COND(e,f,g,h)) = + a=e andalso eqCCexp(b,f) andalso eqRexp(c,g) andalso eqRexp(d,h) + | eqRexp(T.SX(a,b,c),T.SX(a',b',c')) = + a=a' andalso b=b' andalso eqRexp(c,c') + | eqRexp(T.ZX(a,b,c),T.ZX(a',b',c')) = + a=a' andalso b=b' andalso eqRexp(c,c') + | eqRexp(T.CVTF2I(a,b,c,d),T.CVTF2I(e,f,g,h)) = + a=e andalso b=f andalso c=g andalso eqFexp(d,h) + | eqRexp(T.LOAD(a,b,_),T.LOAD(c,d,_)) = a=c andalso eqRexp(b,d) + | eqRexp(T.LET(a,b),T.LET(c,d)) = eqStm(a,c) andalso eqRexp(b,d) + | eqRexp(T.ARG x,T.ARG y) = x = y + | eqRexp(T.PARAM x,T.PARAM y) = x = y + | eqRexp(T.???,T.???) = true + | eqRexp(T.$(t1,k1,e1),T.$(t2,k2,e2)) = + t1=t2 andalso k1=k2 andalso eqRexp(e1,e2) + | eqRexp(T.BITSLICE(t1,s1,e1),T.BITSLICE(t2,s2,e2)) = + t1=t2 andalso s1=s2 andalso eqRexp(e1,e2) + | eqRexp(T.MARK(a,_),b) = eqRexp(a,b) + | eqRexp(a,T.MARK(b,_)) = eqRexp(a,b) + | eqRexp(T.PRED(a,b),T.PRED(a',b')) = eqCtrl(b,b') andalso eqRexp(a,a') + | eqRexp(T.REXT(a,b),T.REXT(a',b')) = + a=a' andalso eqRext (equality()) (b,b') + | eqRexp _ = false + + and eqRexps([],[]) = true + | eqRexps(a::b,c::d) = eqRexp(a,c) andalso eqRexps(b,d) + | eqRexps _ = false + + and eq2'((a,b,c),(d,e,f)) = a=d andalso eqFexp(b,e) andalso eqFexp(c,f) + and eq1'((a,b),(d,e)) = a=d andalso eqFexp(b,e) + + and eqFexp(T.FREG(t1,x),T.FREG(t2,y)) = t1=t2 andalso eqCell(x,y) + | eqFexp(T.FLOAD(a,b,_),T.FLOAD(c,d,_)) = a=c andalso eqRexp(b,d) + | eqFexp(T.FADD x,T.FADD y) = eq2'(x,y) + | eqFexp(T.FMUL x,T.FMUL y) = eq2'(x,y) + | eqFexp(T.FSUB x,T.FSUB y) = eq2'(x,y) + | eqFexp(T.FDIV x,T.FDIV y) = eq2'(x,y) + | eqFexp(T.FCOPYSIGN x, T.FCOPYSIGN y) = eq2'(x,y) + | eqFexp(T.FCOND(t,x,y,z), T.FCOND(t',x',y',z')) = + t=t' andalso eqCCexp(x,x') andalso eqFexp(y,y') andalso eqFexp(z,z') + | eqFexp(T.FABS x,T.FABS y) = eq1'(x,y) + | eqFexp(T.FNEG x,T.FNEG y) = eq1'(x,y) + | eqFexp(T.FSQRT x,T.FSQRT y) = eq1'(x,y) + | eqFexp(T.CVTI2F(a,b,c),T.CVTI2F(a',b',c')) = + a=a' andalso b=b' andalso eqRexp(c,c') + | eqFexp(T.CVTF2F(a,b,c),T.CVTF2F(a',b',c')) = + a=a' andalso b=b' andalso eqFexp(c,c') + | eqFexp(T.FEXT(a,f),T.FEXT(b,g)) = a=b andalso eqFext (equality()) (f,g) + | eqFexp(T.FMARK(a,_),b) = eqFexp(a,b) + | eqFexp(a,T.FMARK(b,_)) = eqFexp(a,b) + | eqFexp(T.FPRED(a,b),T.FPRED(a',b')) = eqCtrl(b,b') andalso eqFexp(a,a') + | eqFexp _ = false + + and eqFexps([],[]) = true + | eqFexps(a::b,c::d) = eqFexp(a,c) andalso eqFexps(b,d) + | eqFexps _ = false + + and eqCCexp(T.CC(c1,x),T.CC(c2,y)) = c1=c2 andalso eqCell(x,y) + | eqCCexp(T.FCC(c1,x),T.FCC(c2,y)) = c1=c2 andalso eqCell(x,y) + | eqCCexp(T.CMP(x,a,b,c),T.CMP(y,d,e,f)) = + a=d andalso eqRexp(b,e) andalso eqRexp(c,f) andalso x = y + | eqCCexp(T.FCMP(x,a,b,c),T.FCMP(y,d,e,f)) = + a=d andalso eqFexp(b,e) andalso eqFexp(c,f) andalso x = y + | eqCCexp(T.NOT x, T.NOT y) = eqCCexp(x,y) + | eqCCexp(T.AND x, T.AND y) = eqCCexp2(x,y) + | eqCCexp(T.OR x, T.OR y) = eqCCexp2(x,y) + | eqCCexp(T.XOR x, T.XOR y) = eqCCexp2(x,y) + | eqCCexp(T.EQV x, T.EQV y) = eqCCexp2(x,y) + | eqCCexp(T.CCMARK(a,_),b) = eqCCexp(a,b) + | eqCCexp(a,T.CCMARK(b,_)) = eqCCexp(a,b) + | eqCCexp(T.CCEXT(t,a),T.CCEXT(t',b)) = + t=t' andalso eqCCext (equality()) (a,b) + | eqCCexp(T.TRUE, T.TRUE) = true + | eqCCexp(T.FALSE, T.FALSE) = true + | eqCCexp _ = false + + and eqCCexp2((x,y),(x',y')) = eqCCexp(x,x') andalso eqCCexp(y,y') + + and eqCCexps([],[]) = true + | eqCCexps(a::b,c::d) = eqCCexp(a,c) andalso eqCCexps(b,d) + | eqCCexps _ = false + + exception NonConst + + fun eval{label, const} = + let fun rexp(T.LI i) = i + | rexp(T.CONST c) = const c + | rexp(T.LABEL l) = IntInf.fromInt(label l) + | rexp(T.LABEXP e) = rexp e + + | rexp(T.NEG(sz,x)) = I.NEG(sz,rexp x) + | rexp(T.ADD(sz,x,y)) = I.ADD(sz,rexp x,rexp y) + | rexp(T.SUB(sz,x,y)) = I.SUB(sz,rexp x,rexp y) + + | rexp(T.MULS(sz,x,y)) = I.MULS(sz,rexp x,rexp y) + | rexp(T.DIVS(sz,x,y)) = I.DIVS(sz,rexp x,rexp y) + | rexp(T.QUOTS(sz,x,y)) = I.QUOTS(sz,rexp x,rexp y) + | rexp(T.REMS(sz,x,y)) = I.REMS(sz,rexp x,rexp y) + + | rexp(T.MULU(sz,x,y)) = I.MULU(sz,rexp x,rexp y) + | rexp(T.DIVU(sz,x,y)) = I.DIVU(sz,rexp x,rexp y) + | rexp(T.REMU(sz,x,y)) = I.REMU(sz,rexp x,rexp y) + + | rexp(T.NEGT(sz,x)) = I.NEGT(sz,rexp x) + | rexp(T.ADDT(sz,x,y)) = I.ADDT(sz,rexp x,rexp y) + | rexp(T.SUBT(sz,x,y)) = I.SUBT(sz,rexp x,rexp y) + | rexp(T.MULT(sz,x,y)) = I.MULT(sz,rexp x,rexp y) + | rexp(T.DIVT(sz,x,y)) = I.DIVT(sz,rexp x,rexp y) + | rexp(T.QUOTT(sz,x,y)) = I.QUOTT(sz,rexp x,rexp y) + | rexp(T.REMT(sz,x,y)) = I.REMT(sz,rexp x,rexp y) + + | rexp(T.NOTB(sz,x)) = I.NOTB(sz,rexp x) + | rexp(T.ANDB(sz,x,y)) = I.ANDB(sz,rexp x,rexp y) + | rexp(T.ORB(sz,x,y)) = I.ORB(sz,rexp x,rexp y) + | rexp(T.XORB(sz,x,y)) = I.XORB(sz,rexp x,rexp y) + | rexp(T.EQVB(sz,x,y)) = I.EQVB(sz,rexp x,rexp y) + | rexp(T.SLL(sz,x,y)) = I.SLL(sz,rexp x,rexp y) + | rexp(T.SRL(sz,x,y)) = I.SRL(sz,rexp x,rexp y) + | rexp(T.SRA(sz,x,y)) = I.SRA(sz,rexp x,rexp y) + | rexp(T.BITSLICE(sz,x,y)) = I.BITSLICE(sz,x,rexp y) + + | rexp(T.COND(sz,cc,x,y)) = if ccexp cc then rexp x else rexp y + | rexp(T.SX(a,b,x)) = I.SX(a,b,rexp x) + | rexp(T.ZX(a,b,x)) = I.ZX(a,b,rexp x) + | rexp(T.MARK(e,_)) = rexp e + + | rexp _ = raise NonConst + and ccexp(T.TRUE) = true + | ccexp(T.FALSE) = false + | ccexp(T.CMP(sz,T.EQ,x,y)) = I.EQ(sz,rexp x,rexp y) + | ccexp(T.CMP(sz,T.NE,x,y)) = I.NE(sz,rexp x,rexp y) + | ccexp(T.CMP(sz,T.GT,x,y)) = I.GT(sz,rexp x,rexp y) + | ccexp(T.CMP(sz,T.GE,x,y)) = I.GE(sz,rexp x,rexp y) + | ccexp(T.CMP(sz,T.LT,x,y)) = I.LT(sz,rexp x,rexp y) + | ccexp(T.CMP(sz,T.LE,x,y)) = I.LE(sz,rexp x,rexp y) + | ccexp(T.CMP(sz,T.GTU,x,y)) = I.GTU(sz,rexp x,rexp y) + | ccexp(T.CMP(sz,T.LTU,x,y)) = I.LTU(sz,rexp x,rexp y) + | ccexp(T.CMP(sz,T.GEU,x,y)) = I.GEU(sz,rexp x,rexp y) + | ccexp(T.CMP(sz,T.LEU,x,y)) = I.LEU(sz,rexp x,rexp y) + | ccexp(T.NOT x) = not(ccexp x) + | ccexp(T.AND(x,y)) = ccexp x andalso ccexp y + | ccexp(T.OR(x,y)) = ccexp x orelse ccexp y + | ccexp(T.XOR(x,y)) = ccexp x <> ccexp y + | ccexp(T.EQV(x,y)) = ccexp x = ccexp y + | ccexp(T.CCMARK(e,_)) = ccexp e + | ccexp _ = raise NonConst + in {rexp=rexp, ccexp=ccexp} + end + + fun valueOf e = + IntInf.toInt + (#rexp(eval{const=fn c => IntInf.fromInt(Constant.valueOf c), + label=Label.addrOf}) e) + val == = eqRexp + val hash = hashRexp + + val resolveConstants = MLRiscControl.getFlag "asm-resolve-constants" + val _ = resolveConstants := true + + (* This module should be parameterised, in order to generate + * target label expressions for assembly code purposes. + *) +(* operator precedences: + (Note: these differ from C's precedences) + 2 MULT, DIV, LSHIFT, RSHIFT + 1 AND, OR + 0 PLUS, MINUS +*) + + fun parens (str, prec, op_prec) = + if prec > op_prec then "(" ^ str ^ ")" else str + + fun prInt i = if i < 0 then "-"^Int.toString(~i) else Int.toString i + fun prIntInf i = if IntInf.sign i < 0 then "-"^IntInf.toString(IntInf.~ i) + else IntInf.toString i + + fun toString le = toStr(le, 0) + + and toStr(T.LABEL lab, _) = Label.fmt labelFmt lab + | toStr(T.LABEXP le, p) = toStr(le, p) + | toStr(T.NEG(_, T.CONST c), _) = + if !resolveConstants then prInt(~(Constant.valueOf c)) + else "(-" ^ Constant.toString c ^ ")" + | toStr(T.NEG(_, T.LI i), _) = prIntInf(~i) + | toStr(T.NEG(_, lexp), prec) = parens(toStr(lexp, 3), prec, 3) + | toStr(T.CONST c, _) = + if !resolveConstants then prInt(Constant.valueOf c) + else Constant.toString c + | toStr(T.LI i, _) = prIntInf i + | toStr(T.MULS(_,lexp1, lexp2), prec) = + parens(toStr(lexp1, 2) ^ "*" ^ toStr(lexp2,2), prec, 2) + | toStr(T.DIVS(_,lexp1, lexp2), prec) = + parens(toStr(lexp1, 2) ^ "/" ^ toStr(lexp2,2), prec, 2) + | toStr(T.SLL(_,lexp, cnt), prec) = + parens(toStr(lexp,2) ^ "<<" ^ toStr(cnt,2), prec, 2) + | toStr(T.SRL(_,lexp, cnt), prec) = + parens(toStr(lexp,2) ^ ">>" ^ toStr(cnt,2), prec, 2) + | toStr(T.ANDB(_,lexp, mask), prec) = + parens(toStr(lexp,1) ^ "&" ^ toStr(mask, 1), prec, 1) + | toStr(T.ORB(_,lexp, mask), prec) = + parens(toStr(lexp, 1) ^ "|" ^ toStr(mask, 1), prec, 1) + | toStr(T.ADD(_,lexp1, lexp2), prec) = + parens(toStr(lexp1, 0) ^ "+" ^ toStr(lexp2, 0), prec, 0) + | toStr(T.SUB(_,lexp1, lexp2), prec) = + parens(toStr(lexp1, 0) ^ "-" ^ toStr(lexp2, 0), prec, 0) + | toStr _ = error "toStr" + +end diff --git a/MLRISC/mltree/mltree-mult.sig b/MLRISC/mltree/mltree-mult.sig new file mode 100644 index 0000000..827c051 --- /dev/null +++ b/MLRISC/mltree/mltree-mult.sig @@ -0,0 +1,21 @@ +(* + * Let's generate good multiplication/division code! + * + * -- Allen + *) +signature MLTREE_MULT_DIV = +sig + + structure T : MLTREE + structure C : CELLS + structure I : INSTRUCTIONS where C=C + + exception TooComplex + + val multiply : {r:CellsBasis.cell,i:int,d:CellsBasis.cell} -> I.instruction list + + val divide : { mode:T.Basis.rounding_mode, + stm :T.stm -> unit + } -> {r:CellsBasis.cell,i:int,d:CellsBasis.cell} -> I.instruction list + +end diff --git a/MLRISC/mltree/mltree-mult.sml b/MLRISC/mltree/mltree-mult.sml new file mode 100644 index 0000000..41c26eb --- /dev/null +++ b/MLRISC/mltree/mltree-mult.sml @@ -0,0 +1,200 @@ +(* + * Generate multiplication/division by a constant. + * This module is mainly used for architectures without fast integer multiply. + * + * -- Allen + *) +functor MLTreeMult + (structure I : INSTRUCTIONS + structure T : MLTREE + + structure CB : CELLS_BASIS = CellsBasis + val intTy : int (* width of integer type *) + + type argi = {r:CB.cell, i:int, d:CB.cell} + type arg = {r1:CB.cell, r2:CB.cell, d:CB.cell} + + (* these are always non-overflow trapping *) + val mov : {r:CB.cell, d:CB.cell} -> I.instruction + val add : arg -> I.instruction + val slli : argi -> I.instruction list + val srli : argi -> I.instruction list + val srai : argi -> I.instruction list + ) + (val trapping : bool (* trap on overflow? *) + val multCost : int ref (* cost of multiplication *) + + (* basic ops; these have to implemented by the architecture *) + + (* if trapping = true, then the following MUST trap on overflow *) + val addv : arg -> I.instruction list + val subv : arg -> I.instruction list + + (* some architectures, like the PA-RISC and the Alpha, + * have these types of special ops + * if trapping = true, then the following MUST also trap on overflow + *) + val sh1addv : (arg -> I.instruction list) option (* a*2 + b *) + val sh2addv : (arg -> I.instruction list) option (* a*4 + b *) + val sh3addv : (arg -> I.instruction list) option (* a*8 + b *) + ) + (val signed : bool (* signed? *) + ) : MLTREE_MULT_DIV = +struct + structure T = T + structure I = I + structure C = I.C + structure W = Word + structure A = Array + + type arg = argi + + infix << >> ~>> || && + val itow = W.fromInt + val wtoi = W.toIntX + val op << = W.<< + val op >> = W.>> + val op ~>> = W.~>> + val op || = W.orb + val op && = W.andb + + exception TooComplex + + fun error msg = MLRiscErrorMsg.error("MLTreeMult",msg) + + val zeroR = C.zeroReg CB.GP + val shiftri = if signed then srai else srli + + fun isPowerOf2 w = ((w - 0w1) && w) = 0w0 + + fun log2 n = (* n must be > 0!!! *) + let fun loop(0w1,pow) = pow + | loop(w,pow) = loop(w >> 0w1,pow+1) + in loop(n,0) end + + fun zeroBits(w,lowZeroBits) = + if (w && 0w1) = 0w1 then (w,lowZeroBits) + else zeroBits(w >> 0w1,lowZeroBits+0w1) + + (* Non overflow trapping version of multiply: + * We can use add, shadd, shift, sub to perform the multiplication + *) + fun multiplyNonTrap{r,i,d} = + let fun mult(r,w,maxCost,d) = + if maxCost <= 0 then raise TooComplex + else if isPowerOf2 w then slli{r=r,i=log2 w,d=d} + else + (case (w,sh1addv,sh2addv,sh3addv) of + (* some base cases *) + (0w3,SOME f,_,_) => f{r1=r,r2=r,d=d} + | (0w5,_,SOME f,_) => f{r1=r,r2=r,d=d} + | (0w9,_,_,SOME f) => f{r1=r,r2=r,d=d} + | _ => (* recurse on the bit patterns of w *) + let val tmp = C.newReg() + in if (w && 0w1) = 0w1 then (* low order bit is 1 *) + if (w && 0w2) = 0w2 then (* second bit is 1 *) + mult(r,w+0w1,maxCost-1,tmp) @ + subv{r1=tmp,r2=r,d=d} + else (* second bit is 0 *) + mult(r,w-0w1,maxCost-1,tmp) @ + addv{r1=tmp,r2=r,d=d} + else (* low order bit is 0 *) + let val (w,lowZeroBits) = zeroBits(w,0w0) + in mult(r,w,maxCost-1,tmp) @ + slli{r=tmp,i=wtoi lowZeroBits,d=d} + end + end + ) + in if i <= 0 then raise TooComplex + else if i = 1 then [mov{r=r,d=d}] + else mult(r,itow i,!multCost,d) + end + + (* The semantics of roundToZero{r,i,d} is: + * if r >= 0 then d <- r + * else d <- r + i + *) + fun roundToZero stm {ty,r,i,d} = + let val reg = T.REG(ty,r) + in stm(T.MV(ty,d, + T.COND(ty,T.CMP(ty,T.GE,reg, T.LI 0),reg, + T.ADD(ty,reg,T.LI(T.I.fromInt(intTy,i)))))) + end + + + (* + * Simulate rounding towards zero for signed division + *) + fun roundDiv{mode=T.TO_NEGINF,r,...} = ([],r) (* no rounding necessary *) + | roundDiv{mode=T.TO_ZERO,stm,r,i} = + if signed then + let val d = C.newReg() + in if i = 2 then (* special case for division by 2 *) + let val tmpR = C.newReg() + in (srli{r=r,i=intTy - 1,d=tmpR}@[add{r1=r,r2=tmpR,d=d}], d) + end + else + (* invoke rounding callback *) + let val () = roundToZero stm {ty=intTy,r=r,i=i-1,d=d} + in ([],d) end + end + else ([],r) (* no rounding for unsigned division *) + | roundDiv{mode,...} = + error("Integer rounding mode "^ + T.Basis.roundingModeToString mode^" is not supported") + + fun divideNonTrap{mode,stm}{r,i,d} = + if i > 0 andalso isPowerOf2(itow i) + then + let val (code,r) = roundDiv{mode=mode,stm=stm,r=r,i=i} + in code@shiftri{r=r,i=log2(itow i),d=d} end (* won't overflow *) + else raise TooComplex + + (* Overflow trapping version of multiply: + * We can use only add and shadd to perform the multiplication, + * because of overflow trapping problem. + *) + fun multiplyTrap{r,i,d} = + let fun mult(r,w,maxCost,d) = + if maxCost <= 0 then raise TooComplex + else + (case (w,sh1addv,sh2addv,sh3addv,zeroR) of + (* some simple base cases *) + (0w2,_,_,_,_) => addv{r1=r,r2=r,d=d} + | (0w3,SOME f,_,_,_) => f{r1=r,r2=r,d=d} + | (0w4,_,SOME f,_,SOME z) => f{r1=r,r2=z,d=d} + | (0w5,_,SOME f,_,_) => f{r1=r,r2=r,d=d} + | (0w8,_,_,SOME f,SOME z) => f{r1=r,r2=z,d=d} + | (0w9,_,_,SOME f,_) => f{r1=r,r2=r,d=d} + | _ => (* recurse on the bit patterns of w *) + let val tmp = C.newReg() + in if (w && 0w1) = 0w1 then + mult(r,w - 0w1,maxCost-1,tmp) @ addv{r1=tmp,r2=r,d=d} + else + case (w && 0w7, sh3addv, zeroR) of + (0w0, SOME f, SOME z) => (* times 8 *) + mult(r,w >> 0w3,maxCost-1,tmp) @ f{r1=tmp,r2=z,d=d} + | _ => + case (w && 0w3, sh2addv, zeroR) of + (0w0, SOME f, SOME z) => (* times 4 *) + mult(r,w >> 0w2,maxCost-1,tmp) @ f{r1=tmp,r2=z,d=d} + | _ => + mult(r,w >> 0w1,maxCost-1,tmp) @ addv{r1=tmp,r2=tmp,d=d} + end + ) + in if i <= 0 then raise TooComplex + else if i = 1 then [mov{r=r,d=d}] + else mult(r,itow i,!multCost,d) + end + + fun divideTrap{mode,stm}{r,i,d} = + if i > 0 andalso isPowerOf2(itow i) + then + let val (code,r) = roundDiv{mode=mode,stm=stm,r=r,i=i} + in code@shiftri{r=r,i=log2(itow i),d=d} end (* won't overflow *) + else raise TooComplex + + fun multiply x = if trapping then multiplyTrap x else multiplyNonTrap x + fun divide x = if trapping then divideTrap x else divideNonTrap x + +end diff --git a/MLRISC/mltree/mltree-rewrite.sig b/MLRISC/mltree/mltree-rewrite.sig new file mode 100644 index 0000000..cbcd70a --- /dev/null +++ b/MLRISC/mltree/mltree-rewrite.sig @@ -0,0 +1,16 @@ +(* A rewrite function for MLTree datatypes + * Useful for performing transformation on MLTree. + * The signature is a bit hairy since we have to deal with extensions. + *) +signature MLTREE_REWRITE = +sig + structure T : MLTREE + + val rewrite : + (* User supplied transformations *) + { rexp : (T.rexp -> T.rexp) -> (T.rexp -> T.rexp), + fexp : (T.fexp -> T.fexp) -> (T.fexp -> T.fexp), + ccexp : (T.ccexp -> T.ccexp) -> (T.ccexp -> T.ccexp), + stm : (T.stm -> T.stm) -> (T.stm -> T.stm) + } -> T.rewriter +end diff --git a/MLRISC/mltree/mltree-rewrite.sml b/MLRISC/mltree/mltree-rewrite.sml new file mode 100644 index 0000000..b4b8001 --- /dev/null +++ b/MLRISC/mltree/mltree-rewrite.sml @@ -0,0 +1,158 @@ +functor MLTreeRewrite + (structure T : MLTREE + (* Traversal extensions *) + val sext : T.rewriter -> T.sext -> T.sext + val rext : T.rewriter -> T.rext -> T.rext + val fext : T.rewriter -> T.fext -> T.fext + val ccext : T.rewriter -> T.ccext -> T.ccext + ) : MLTREE_REWRITE = +struct + structure T = T + + type rewriters = + { stm : T.stm -> T.stm, + rexp : T.rexp -> T.rexp, + fexp : T.fexp -> T.fexp, + ccexp : T.ccexp -> T.ccexp + } + + fun rewrite{rexp=doRexp, fexp=doFexp, ccexp=doCCexp, stm=doStm} = + let fun stm s = + let val s = + case s of + T.MV(ty,dst,e) => T.MV(ty,dst,rexp e) + | T.CCMV(dst,e) => T.CCMV(dst,ccexp e) + | T.FMV(fty,dst,e) => T.FMV(fty,dst,fexp e) + | T.COPY _ => s + | T.FCOPY _ => s + | T.JMP(e,cf) => T.JMP(rexp e,cf) + | T.BCC(cc,l) => T.BCC(ccexp cc,l) + | T.CALL{funct,targets,defs,uses,region,pops} => + T.CALL{funct=rexp funct,targets=targets, + defs=mlriscs defs,uses=mlriscs uses, + region=region,pops=pops} + | T.FLOW_TO(s,controlflow) => T.FLOW_TO(stm s,controlflow) + | T.RET _ => s + | T.IF(cc,yes,no) => T.IF(ccexp cc,stm yes,stm no) + | T.STORE(ty,ea,d,r) => T.STORE(ty,rexp ea,rexp d,r) + | T.FSTORE(fty,ea,d,r) => T.FSTORE(fty,rexp ea,fexp d,r) + | T.REGION(s,ctrl) => T.REGION(stm s,ctrl) + | T.SEQ s => T.SEQ(stms s) + | T.DEFINE _ => s + | T.ANNOTATION(s,an) => T.ANNOTATION(stm s,an) + | T.EXT s => + T.EXT(sext {rexp=rexp, fexp=fexp, ccexp=ccexp, stm=stm} s) + | T.PHI _ => s + | T.SOURCE => s + | T.SINK => s + | T.RTL _ => s + | T.ASSIGN(ty,x,y) => T.ASSIGN(ty,rexp x, rexp y) + | T.LIVE ls => T.LIVE (mlriscs ls) + | T.KILL ks => T.KILL (mlriscs ks) + in doStm stm s end + + and stms ss = map stm ss + + and rexp e = + let val e = case e of + T.REG _ => e + | T.LI _ => e + | T.LABEL _ => e + | T.LABEXP _ => e + | T.CONST _ => e + | T.NEG(ty,x) => T.NEG(ty,rexp x) + | T.ADD(ty,x,y) => T.ADD(ty,rexp x,rexp y) + | T.SUB(ty,x,y) => T.SUB(ty,rexp x,rexp y) + | T.MULS(ty,x,y) => T.MULS(ty,rexp x,rexp y) + | T.DIVS(m,ty,x,y) => T.DIVS(m,ty,rexp x,rexp y) + | T.REMS(m,ty,x,y) => T.REMS(m,ty,rexp x,rexp y) + | T.MULU(ty,x,y) => T.MULU(ty,rexp x,rexp y) + | T.DIVU(ty,x,y) => T.DIVU(ty,rexp x,rexp y) + | T.REMU(ty,x,y) => T.REMU(ty,rexp x,rexp y) + | T.NEGT(ty,x) => T.NEGT(ty,rexp x) + | T.ADDT(ty,x,y) => T.ADDT(ty,rexp x,rexp y) + | T.SUBT(ty,x,y) => T.SUBT(ty,rexp x,rexp y) + | T.MULT(ty,x,y) => T.MULT(ty,rexp x,rexp y) + | T.DIVT(m,ty,x,y) => T.DIVT(m,ty,rexp x,rexp y) + | T.ANDB(ty,x,y) => T.ANDB(ty,rexp x,rexp y) + | T.ORB(ty,x,y) => T.ORB(ty,rexp x,rexp y) + | T.XORB(ty,x,y) => T.XORB(ty,rexp x,rexp y) + | T.EQVB(ty,x,y) => T.EQVB(ty,rexp x,rexp y) + | T.NOTB(ty,x) => T.NOTB(ty,rexp x) + | T.SRA(ty,x,y) => T.SRA(ty,rexp x,rexp y) + | T.SRL(ty,x,y) => T.SRL(ty,rexp x,rexp y) + | T.SLL(ty,x,y) => T.SLL(ty,rexp x,rexp y) + | T.SX(t,t',e) => T.SX(t,t',rexp e) + | T.ZX(t,t',e) => T.ZX(t,t',rexp e) + | T.CVTF2I(ty,mode,fty,e) => T.CVTF2I(ty,mode,fty,fexp e) + | T.COND(ty,cc,yes,no) => T.COND(ty,ccexp cc,rexp yes,rexp no) + | T.LOAD(ty,ea,r) => T.LOAD(ty,rexp ea,r) + | T.PRED(e,ctrl) => T.PRED(rexp e,ctrl) + | T.LET(s,e) => T.LET(stm s,rexp e) + | T.REXT(ty,e) => + T.REXT(ty,rext {rexp=rexp, fexp=fexp, ccexp=ccexp, stm=stm} e) + | T.MARK(e,an) => T.MARK(rexp e,an) + | T.$(ty,k,e) => T.$(ty,k,rexp e) + | T.ARG _ => e + | T.PARAM _ => e + | T.BITSLICE(ty,sl,e) => T.BITSLICE(ty,sl,rexp e) + | T.??? => T.??? + | T.OP(ty,oper,es) => T.OP(ty,oper,rexps es) + in doRexp rexp e end + + and rexps es = map rexp es + + and fexp e = + let val e = case e of + T.FREG _ => e + | T.FLOAD(fty,e,r) => T.FLOAD(fty,rexp e,r) + | T.FADD(fty,x,y) => T.FADD(fty,fexp x,fexp y) + | T.FSUB(fty,x,y) => T.FSUB(fty,fexp x,fexp y) + | T.FMUL(fty,x,y) => T.FMUL(fty,fexp x,fexp y) + | T.FDIV(fty,x,y) => T.FDIV(fty,fexp x,fexp y) + | T.FABS(fty,x) => T.FABS(fty,fexp x) + | T.FNEG(fty,x) => T.FNEG(fty,fexp x) + | T.FSQRT(fty,x) => T.FSQRT(fty,fexp x) + | T.FCOPYSIGN(fty,x,y) => T.FCOPYSIGN(fty,fexp x,fexp y) + | T.FCOND(fty,c,x,y) => T.FCOND(fty,ccexp c,fexp x,fexp y) + | T.CVTI2F(fty,ty,e) => T.CVTI2F(fty,ty,rexp e) + | T.CVTF2F(fty,fty',e) => T.CVTF2F(fty,fty',fexp e) + | T.FPRED(e,ctrl) => T.FPRED(fexp e,ctrl) + | T.FEXT(fty,e) => + T.FEXT(fty,fext {rexp=rexp, fexp=fexp, ccexp=ccexp, stm=stm} e) + | T.FMARK(e,an) => T.FMARK(fexp e,an) + in doFexp fexp e end + + and fexps es = map fexp es + + and ccexp e = + let val e = case e of + T.CC _ => e + | T.FCC _ => e + | T.TRUE => e + | T.FALSE => e + | T.NOT e => T.NOT(ccexp e) + | T.AND(x,y) => T.AND(ccexp x,ccexp y) + | T.OR(x,y) => T.OR(ccexp x,ccexp y) + | T.XOR(x,y) => T.XOR(ccexp x,ccexp y) + | T.EQV(x,y) => T.EQV(ccexp x,ccexp y) + | T.CMP(ty,cond,x,y) => T.CMP(ty,cond,rexp x,rexp y) + | T.FCMP(ty,fcond,x,y) => T.FCMP(ty,fcond,fexp x,fexp y) + | T.CCMARK(e,an) => T.CCMARK(ccexp e,an) + | T.CCEXT(ty,e) => + T.CCEXT(ty,ccext {rexp=rexp, fexp=fexp, ccexp=ccexp, stm=stm} e) + in doCCexp ccexp e end + + and mlriscs m = map mlrisc m + + and mlrisc m = + let val m = + case m of + T.CCR e => T.CCR(ccexp e) + | T.GPR e => T.GPR(rexp e) + | T.FPR e => T.FPR(fexp e) + in m end + + in { rexp=rexp, fexp=fexp, ccexp=ccexp, stm=stm } end +end (* MLTreeFold *) + diff --git a/MLRISC/mltree/mltree-rtl.sig b/MLRISC/mltree/mltree-rtl.sig new file mode 100644 index 0000000..440f059 --- /dev/null +++ b/MLRISC/mltree/mltree-rtl.sig @@ -0,0 +1,93 @@ +(* + * This signature describes the internal RTL representation. + * The internal representation differs from the user representation it that + * it is lambda-lifted, i.e., instead of having references like REG(32,123), + * it has references like PARAM i, which refers to the ith parameter. + * + * This representation is chosen so that multiple instructions can + * share the same rtl template. Also, so that the templates can be + * created once before compilation begins. + *) +signature MLTREE_RTL = +sig + structure T : MLTREE + structure Util : MLTREE_UTILS + structure Rewrite : MLTREE_REWRITE + structure Fold : MLTREE_FOLD + sharing Util.T = Rewrite.T = Fold.T = T + + type ty = T.ty + type var = T.var + type rtl = T.stm + type exp = T.rexp + type cond = T.ccexp + type div_rounding_mode = T.div_rounding_mode + + datatype pos = IN of int | OUT of int | IO of int * int (* def/use *) + + (*----------------------------------------------------------------------- + * Basic Operations + *-----------------------------------------------------------------------*) + val showRTL : {def:int->string, + use:int->string, + regionDef:T.Region.region->string, + regionUse:T.Region.region->string} -> T.printer + val rtlToString : rtl -> string + val expToString : exp -> string + val hashRTL : rtl -> word + val eqRTL : rtl * rtl -> bool + + (*----------------------------------------------------------------------- + * Construction + *-----------------------------------------------------------------------*) + val newOp : {name:string, attribs:T.Basis.attribs} -> T.Basis.misc_op + val new : rtl -> rtl + val pin : rtl -> rtl + val COPY : rtl + val JMP : rtl + + (*----------------------------------------------------------------------- + * Type queries + *-----------------------------------------------------------------------*) + val isConditionalBranch : rtl -> bool + val isJump : rtl -> bool + val isLooker : rtl -> bool + + (*----------------------------------------------------------------------- + * Def/use queries. + *-----------------------------------------------------------------------*) + val defUse : rtl -> exp list * exp list + + (*----------------------------------------------------------------------- + * Assign positions to all arguments + *-----------------------------------------------------------------------*) + val argPos : rtl -> (exp * int) list * (exp * int) list + exception NotAnArgument + val argOf : rtl -> string -> (exp * pos) + + (*----------------------------------------------------------------------- + * Number of arguments that an rtl maps into + *-----------------------------------------------------------------------*) + datatype arity = ZERO | ONE | MANY + val arity : exp -> arity (* number of values *) + val nonConstArity : exp -> arity (* number of non-constant values *) + + (*----------------------------------------------------------------------- + * Extract naming constraints, if any + *-----------------------------------------------------------------------*) + val namingConstraints : exp list * exp list -> + { fixedDefs : (exp * int) list, (* these define fixed locations *) + fixedUses : (exp * int) list, (* these define fixed locations *) + twoAddress : exp list (* these are both src and dst *) + } + + (*----------------------------------------------------------------------- + * Code motion queries + *-----------------------------------------------------------------------*) + val can'tMoveUp : rtl -> bool + val can'tMoveDown : rtl -> bool + val pinned : rtl -> bool + val hasSideEffect : rtl -> bool + val can'tBeRemoved : rtl -> bool + +end diff --git a/MLRISC/mltree/mltree-rtl.sml b/MLRISC/mltree/mltree-rtl.sml new file mode 100644 index 0000000..44e9315 --- /dev/null +++ b/MLRISC/mltree/mltree-rtl.sml @@ -0,0 +1,355 @@ +(* + * Basic RTLs and query functions on these RTLs + * + * -- Allen + *) +functor MLTreeRTL + (structure Util : MLTREE_UTILS + structure Rewrite : MLTREE_REWRITE + structure Fold : MLTREE_FOLD + sharing Util.T = Rewrite.T = Fold.T + ) : MLTREE_RTL = +struct + + structure T = Util.T + structure Util = Util + structure Rewrite = Rewrite + structure Fold = Fold + structure W = Word + structure C = CellsBasis + + fun error msg = MLRiscErrorMsg.error("MLTreeRTL",msg) + + datatype pos = IN of int | OUT of int | IO of int * int + datatype arity = ZERO | ONE | MANY + + val itow = Word.fromInt + infix || + val op || = W.orb + + type ty = T.ty + type rtl = T.stm + type exp = T.rexp + type cond = T.ccexp + type var = T.var + type hasher = T.hasher + type equality = T.equality + type printer = T.printer + type div_rounding_mode = T.div_rounding_mode + + val hashRTL = Util.hashStm + val eqRTL = Util.eqStm + val showRTL = Util.show + val rtlToString = Util.stmToString + val expToString = Util.rexpToString + + (*----------------------------------------------------------------------- + * Attributes + *-----------------------------------------------------------------------*) + val A_TRAPPING = W.<<(0w1,0w1) (* may cause traps *) + val A_PINNED = W.<<(0w1,0w2) (* cannot be moved *) + val A_SIDEEFFECT = W.<<(0w1,0w3) (* has side effect *) + val A_MUTATOR = W.<<(0w1,0w4) + val A_LOOKER = W.<<(0w1,0w5) + val A_BRANCH = W.<<(0w1,0w6) (* conditional branch *) + val A_JUMP = W.<<(0w1,0w7) (* has control flow *) + val A_PURE = 0wx0 + fun isOn(a,flag) = Word.andb(a,flag) <> 0w0 + + (*----------------------------------------------------------------------- + * Create new RTL operators + *-----------------------------------------------------------------------*) + val hashCnt = ref 0w0 + fun newHash() = let val h = !hashCnt in hashCnt := h + 0w124127; h end + fun newOp{name,attribs} = {name=name,attribs=ref attribs,hash=newHash()} + + (*----------------------------------------------------------------------- + * Reduce a RTL to compiled internal form + *-----------------------------------------------------------------------*) + fun reduce rtl = + let + in rtl + end + + (*----------------------------------------------------------------------- + * Collect attributes + *-----------------------------------------------------------------------*) + fun attribsOf rtl = + let fun stm(T.STORE _,a) = a || (A_SIDEEFFECT || A_MUTATOR) + | stm(T.JMP _, a) = a || (A_JUMP || A_SIDEEFFECT) + | stm(T.IF _, a) = a || (A_BRANCH || A_JUMP || A_SIDEEFFECT) + | stm(T.RET _, a) = a || (A_JUMP || A_SIDEEFFECT) + | stm(T.CALL _, a) = a || A_SIDEEFFECT + | stm(T.ASSIGN(_,T.$(_,C.MEM,addr),value),a) = + a || (A_SIDEEFFECT || A_MUTATOR) + | stm(_, a) = a + fun rexp(T.ADDT _,a) = a || A_TRAPPING + | rexp(T.SUBT _,a) = a || A_TRAPPING + | rexp(T.MULT _,a) = a || A_TRAPPING + | rexp(T.DIVT _,a) = a || A_TRAPPING + | rexp(T.LOAD _,a) = a || A_LOOKER + | rexp(T.$(_,C.MEM,_),a) = a || A_LOOKER + | rexp(_, a) = a + fun fexp(_, a) = a + fun ccexp(_, a) = a + in #stm (Fold.fold{stm=stm,rexp=rexp, fexp=fexp, ccexp=ccexp}) rtl + end + + + (*----------------------------------------------------------------------- + * Create a uniq RTL + *-----------------------------------------------------------------------*) + fun new(rtl) = + let val rtl = reduce rtl + val attribs = attribsOf(rtl, A_PURE) + val rtl = + case rtl of + T.COPY _ => rtl + | _ => T.RTL{e=rtl,hash=newHash(),attribs=ref attribs} + in rtl + end + + val COPY = T.COPY(0,[],[]) + val JMP = new(T.JMP(T.PARAM 0,[])) + + + fun pin(x as T.RTL{attribs, ...}) = + (attribs := (!attribs || A_PINNED); x) + | pin _ = error "pin" + + (*----------------------------------------------------------------------- + * Type queries + *-----------------------------------------------------------------------*) + fun hasSideEffect(T.RTL{attribs, ...}) = isOn(!attribs, A_SIDEEFFECT) + | hasSideEffect _ = false + fun isConditionalBranch(T.RTL{attribs, ...}) = isOn(!attribs,A_BRANCH) + | isConditionalBranch _ = false + fun isJump(T.RTL{attribs, ...}) = isOn(!attribs,A_JUMP) + | isJump(T.JMP _) = true + | isJump _ = false + fun isLooker(T.RTL{attribs, ...}) = isOn(!attribs,A_LOOKER) + | isLooker _ = false + + (*----------------------------------------------------------------------- + * Def/use queries + *-----------------------------------------------------------------------*) + fun defUse rtl = + let fun contains x = List.exists(fn y => Util.eqRexp(x,y)) + fun diff(A,B) = List.filter (fn z => not(contains z B)) A + fun uniq([], l) = rev l + | uniq(x::xs, l) = if contains x l then uniq(xs,l) else uniq(xs,x::l) + + fun stm(T.ASSIGN(_,x, y), d, u) = + let val (d, u) = lhs(x, d, u) + in rhs(y, d, u) end + | stm(T.COPY _, d, u) = (d, u) (* XXX *) + | stm(T.RET _, d, u) = (d, u) + | stm(T.RTL{e, ...}, d, u) = stm(e, d, u) + | stm(T.JMP(e,_), d, u) = rhs(e, d, u) + | stm(T.IF(x,y,z), d, u) = + let val (d, u) = cond(x, d, u) + val (d1, u) = stm(y, [], u) + val (d2, u) = stm(z, [], u) + val u1 = diff(d1,d2) + val u2 = diff(d2,d1) + in (d @ d1 @ d2, u @ u1 @ u2) + end + | stm(T.SEQ rtls, d, u) = stms(rtls, d, u) + | stm(T.CALL{funct,...}, d, u) = rhs(funct, d, u) + | stm(rtl, d, u) = error("defUse.stm: "^rtlToString rtl) + + and stms([], d, u) = (d, u) + | stms(s::ss, d, u) = let val (d, u) = stm(s, d, u) + in stms(ss, d, u) end + + and rhs(T.LI _, d, u) = (d, u) + | rhs(x as T.ARG _, d, u) = (d, x::u) + | rhs(x as T.PARAM _, d, u) = (d, x::u) + | rhs(T.ADD(_,x,y), d, u) = binOp(x, y, d, u) + | rhs(T.SUB(_,x,y), d, u) = binOp(x, y, d, u) + | rhs(T.MULS(_,x,y), d, u) = binOp(x, y, d, u) + | rhs(T.MULU(_,x,y), d, u) = binOp(x, y, d, u) + | rhs(T.DIVS(_,_,x,y), d, u) = binOp(x, y, d, u) + | rhs(T.DIVU(_,x,y), d, u) = binOp(x, y, d, u) + | rhs(T.REMS(_,_,x,y), d, u) = binOp(x, y, d, u) + | rhs(T.REMU(_,x,y), d, u) = binOp(x, y, d, u) + | rhs(T.ADDT(_,x,y), d, u) = binOp(x, y, d, u) + | rhs(T.SUBT(_,x,y), d, u) = binOp(x, y, d, u) + | rhs(T.MULT(_,x,y), d, u) = binOp(x, y, d, u) + | rhs(T.DIVT(_,_,x,y), d, u) = binOp(x, y, d, u) + | rhs(T.SLL(_,x,y), d, u) = binOp(x, y, d, u) + | rhs(T.SRL(_,x,y), d, u) = binOp(x, y, d, u) + | rhs(T.SRA(_,x,y), d, u) = binOp(x, y, d, u) + | rhs(T.ANDB(_,x,y), d, u) = binOp(x, y, d, u) + | rhs(T.ORB(_,x,y), d, u) = binOp(x, y, d, u) + | rhs(T.XORB(_,x,y), d, u) = binOp(x, y, d, u) + | rhs(T.EQVB(_,x,y), d, u) = binOp(x, y, d, u) + | rhs(T.NEG(_,x), d, u) = rhs(x, d, u) + | rhs(T.NEGT(_,x), d, u) = rhs(x, d, u) + | rhs(T.NOTB(_,x), d, u) = rhs(x, d, u) + | rhs(T.SX(_,_,x), d, u) = rhs(x, d, u) + | rhs(T.ZX(_,_,x), d, u) = rhs(x, d, u) + | rhs(x as T.$(_,_,T.ARG _), d, u) = (d, x::u) + | rhs(x as T.$(_,_,T.PARAM _), d, u) = (d, x::u) + | rhs(x as T.$(_,_,e), d, u) = rhs(e, d, x::u) + | rhs(T.CVTF2I(_,_,_,x), d, u) = fexp(x, d, u) + | rhs(T.OP(_,_,es), d, u) = rexps(es, d, u) + | rhs(T.COND(_,x,y,z), d, u) = + let val (d, u) = cond(x, d, u) + in binOp(y, z, d, u) end + | rhs(T.BITSLICE(_,_,e), d, u) = rhs(e, d, u) + | rhs(T.???, d, u) = (d, u) + | rhs(e, d, u) = error("defUse.rhs: "^Util.rexpToString e) + + and binOp(x, y, d, u) = + let val (d, u) = rhs(x, d, u) + in rhs(y, d, u) end + + and rexps([], d, u) = (d, u) + | rexps(e::es, d, u) = + let val (d, u) = rhs(e, d, u) + in rexps(es, d, u) end + + and lhs(x as T.$(_,_,T.ARG _), d, u) = (x::d, u) + | lhs(x as T.$(_,_,T.PARAM _), d, u) = (x::d, u) + | lhs(x as T.$(_,_,addr), d, u) = rhs(addr, x::d, u) + | lhs(x as T.ARG _, d, u) = (x::d, u) + | lhs(x as T.PARAM _, d, u) = (x::d, u) + | lhs(T.???, d, u) = (d, u) + | lhs(e, d, u) = error("defUse.lhs: "^Util.rexpToString e) + + and fexp(T.FADD(_, x, y), d, u) = fbinOp(x, y, d, u) + | fexp(T.FSUB(_, x, y), d, u) = fbinOp(x, y, d, u) + | fexp(T.FMUL(_, x, y), d, u) = fbinOp(x, y, d, u) + | fexp(T.FDIV(_, x, y), d, u) = fbinOp(x, y, d, u) + | fexp(T.FCOPYSIGN(_, x, y), d, u) = fbinOp(x, y, d, u) + | fexp(T.FCOND(_, x, y, z), d, u) = + let val (d, u) = cond(x, d, u) + in fbinOp(y, z, d, u) end + | fexp(T.FSQRT(_, x), d, u) = fexp(x, d, u) + | fexp(T.FABS(_, x), d, u) = fexp(x, d, u) + | fexp(T.FNEG(_, x), d, u) = fexp(x, d, u) + | fexp(T.CVTI2F(_, _, x), d, u) = rhs(x, d, u) + | fexp(e, d, u) = error("defUse.fexp: "^Util.fexpToString e) + + and fbinOp(x, y, d, u) = + let val (d, u) = fexp(x, d, u) + in fexp(y, d, u) end + + and cond(T.CMP(_,_,x,y), d, u) = binOp(x, y, d, u) + | cond(T.FCMP(_,_,x,y), d, u) = fbinOp(x, y, d, u) + | cond(T.TRUE, d, u) = (d, u) + | cond(T.FALSE, d, u) = (d, u) + | cond(T.NOT x, d, u) = cond(x, d, u) + | cond(T.AND(x, y), d, u) = cond2(x, y, d, u) + | cond(T.OR(x, y), d, u) = cond2(x, y, d, u) + | cond(T.XOR(x, y), d, u) = cond2(x, y, d, u) + | cond(T.EQV(x, y), d, u) = cond2(x, y, d, u) + | cond(e, d, u) = error("defUse.cond: "^Util.ccexpToString e) + + and cond2(x, y, d, u) = + let val (d, u) = cond(x, d, u) + in cond(y, d, u) end + + val (d, u) = stm(rtl, [], []) + + in (uniq(d, []), uniq(u, [])) + end + + (*----------------------------------------------------------------------- + * Giving definitions and uses. Find out the naming constraints. + *-----------------------------------------------------------------------*) + fun namingConstraints(defs, uses) = + let fun collectFixed((x as T.$(_,_,T.LI r))::xs, fixed, rest) = + collectFixed(xs, (x, IntInf.toInt r)::fixed, rest) + | collectFixed(x::xs, fixed, rest) = + collectFixed(xs, fixed, x::rest) + | collectFixed([], fixed, rest) = (fixed, rest) + val (fixedUses, otherUses) = collectFixed(uses, [], []) + val (fixedDefs, otherDefs) = collectFixed(defs, [], []) + val fixed = + List.filter + (fn x => List.exists (fn y => Util.eqRexp(x,y)) otherUses) + otherDefs + in {fixedUses=fixedUses, + fixedDefs=fixedDefs, + twoAddress=fixed + } + end + + (*----------------------------------------------------------------------- + * Assign positions to each argument + *-----------------------------------------------------------------------*) + fun argPos rtl = + let val (defs, uses) = defUse rtl + fun pos([], i, ds) = ds + | pos(d::defs, i, ds) = pos(defs, i+1, (d,i)::ds) + val ds = pos(defs, 0, []) + val us = pos(uses, 0, []) + in (ds, us) + end + + exception NotAnArgument + + fun argOf rtl = + let val (defs, uses) = argPos rtl + fun find(this,(x as (T.$(_,_,T.ARG(_,_,name)),_))::xs) = + if this = name then SOME x else find(this, xs) + | find(this,(x as (T.ARG(_,_,name),_))::xs) = + if this = name then SOME x else find(this, xs) + | find(this,_::xs) = find(this, xs) + | find(this,[]) = NONE + fun lookup name = + case (find(name,defs), find(name,uses)) of + (SOME(x,i),SOME(_,j)) => (x,IO(i,j)) + | (SOME(x, i), NONE) => (x,OUT i) + | (NONE, SOME(x, i)) => (x,IN i) + | (NONE, NONE) => raise NotAnArgument + in lookup + end + + (*----------------------------------------------------------------------- + * Return the arity of an argument + *-----------------------------------------------------------------------*) + fun arity(T.ARG _) = MANY + | arity(T.$(_,C.MEM,_)) = MANY + | arity(T.$(_,_,_)) = ONE + | arity _ = raise NotAnArgument + + fun nonConstArity(T.ARG _) = MANY + | nonConstArity(T.$(_,C.MEM,_)) = MANY + | nonConstArity(T.$(_,_,_)) = ONE + | nonConstArity _ = raise NotAnArgument + + (*----------------------------------------------------------------------- + * Code motion queries + *-----------------------------------------------------------------------*) + fun can'tMoveUp(T.RTL{attribs, ...}) = + isOn(!attribs, A_SIDEEFFECT || A_TRAPPING || A_PINNED) + | can'tMoveUp(T.PHI _) = true + | can'tMoveUp(T.SOURCE) = true + | can'tMoveUp(T.SINK) = true + | can'tMoveUp _ = false + + fun can'tMoveDown(T.PHI _) = true + | can'tMoveDown(T.SOURCE) = true + | can'tMoveDown(T.SINK) = true + | can'tMoveDown(T.RTL{attribs, ...}) = + isOn(!attribs, A_SIDEEFFECT || A_BRANCH || A_JUMP || A_TRAPPING || + A_PINNED || + A_LOOKER (* can be avoided with pure loads! XXX *)) + | can'tMoveDown rtl = error("can'tMoveDown: "^rtlToString rtl) + + fun pinned(T.RTL{attribs, ...}) = + isOn(!attribs, A_SIDEEFFECT || A_TRAPPING || A_PINNED) + | pinned(T.PHI _) = true + | pinned(T.SOURCE) = true + | pinned(T.SINK) = true + | pinned _ = false + fun can'tBeRemoved(T.RTL{attribs, ...}) = + isOn(!attribs, A_SIDEEFFECT || A_BRANCH || A_JUMP) + | can'tBeRemoved(T.SOURCE) = true + | can'tBeRemoved(T.SINK) = true + | can'tBeRemoved _ = false + +end diff --git a/MLRISC/mltree/mltree-simplify.in b/MLRISC/mltree/mltree-simplify.in new file mode 100644 index 0000000..0e8de12 --- /dev/null +++ b/MLRISC/mltree/mltree-simplify.in @@ -0,0 +1,238 @@ +(* + * Performs simple local optimizations. + * This version uses IntInf + *) +local + + structure T = + struct + include "mltree-basis.sig" + include "mltree.sig" + end + +in + +functor MLTreeSimplifier + (structure T : MLTREE + structure Size : MLTREE_SIZE + where T = T + (* Extension *) + val sext : T.rewriter -> T.sext -> T.sext + val rext : T.rewriter -> T.rext -> T.rext + val fext : T.rewriter -> T.fext -> T.fext + val ccext : T.rewriter -> T.ccext -> T.ccext + ) : MLTREE_SIMPLIFIER = +struct + + structure T = T + structure I = T.I + structure R = MLTreeRewrite + (structure T = T + val sext = sext and rext = rext and fext = fext and ccext = ccext + ) + + type simplifier = T.rewriter + + val _ = "literals" + val zero = 0i + val zeroT = T.LI zero + + fun simplify {addressWidth, signedAddress} = + let + + fun dm T.DIV_TO_ZERO = I.DIV_TO_ZERO + | dm T.DIV_TO_NEGINF = I.DIV_TO_NEGINF + + fun sim ==> exp = + let + in (* perform algebraic simplification and constant folding *) + case exp of + T.ADD(ty,T.ADD(ty', a, T.LI x), T.LI y) where ty = ty' => + T.ADD(ty,a,T.LI(I.ADD(ty,x,y))) + | T.ADD(ty,T.LI 0i,x) => x + | T.ADD(ty,x,T.LI 0i) => x + | T.ADD(ty,T.LI x,T.LI y) => T.LI(I.ADD(ty,x,y)) + | T.ADD(ty,T.LABEXP x,T.LABEXP y) => T.LABEXP(T.ADD(ty,x,y)) + + | T.SUB(ty,T.LI 0i,T.SUB(ty',T.LI 0i, a)) where ty = ty' => a + | T.SUB(ty,T.SUB(ty', a, T.LI x), T.LI y) where ty = ty' => + T.SUB(ty,a,T.LI(I.ADD(ty,x,y))) + | T.SUB(ty,T.LABEXP x,T.LABEXP y) => T.LABEXP(T.SUB(ty,x,y)) + + | T.SUB(ty,a,T.LI 0i) => a + | T.SUB(ty,T.LI x,T.LI y) => T.LI(I.SUB(ty,x,y)) + + | T.MULS(ty,T.LI 0i,_) => zeroT + | T.MULS(ty,_,T.LI 0i) => zeroT + | T.MULS(ty,T.LI 1i,x) => x + | T.MULS(ty,x,T.LI 1i) => x + | T.MULS(ty,T.LI x,T.LI y) => T.LI(I.MULS(ty,x,y)) + | T.MULS(ty,T.LABEXP x,T.LABEXP y) => T.LABEXP(T.MULS(ty,x,y)) + + | T.DIVS(m,ty,a,T.LI 1i) => a + | T.DIVS(m,ty,T.LI x,T.LI y) where y <> zero => T.LI(I.DIVS(dm m,ty,x,y)) + | T.DIVS(m,ty,T.LABEXP x,T.LABEXP y) => T.LABEXP(T.DIVS(m,ty,x,y)) + + | T.REMS(m,ty,a,T.LI 1i) => zeroT + | T.REMS(m,ty,T.LI x,T.LI y) where y <> zero => T.LI(I.REMS(dm m,ty,x,y)) + | T.REMS(m,ty,T.LABEXP x,T.LABEXP y) => T.LABEXP(T.REMS(m,ty,x,y)) + + | T.MULU(ty,T.LI 0i,_) => zeroT + | T.MULU(ty,_,T.LI 0i) => zeroT + | T.MULU(ty,T.LI 1i,x) => x + | T.MULU(ty,x,T.LI 1i) => x + | T.MULU(ty,T.LI x,T.LI y) => T.LI(I.MULU(ty,x,y)) + | T.MULU(ty,T.LABEXP x,T.LABEXP y) => T.LABEXP(T.MULU(ty,x,y)) + + | T.DIVU(ty,a,T.LI 1i) => a + | T.DIVU(ty,T.LI x,T.LI y) where y <> zero => T.LI(I.DIVU(ty,x,y)) + | T.DIVU(ty,T.LABEXP x,T.LABEXP y) => T.LABEXP(T.DIVU(ty,x,y)) + + | T.REMU(ty,a,T.LI 1i) => zeroT + | T.REMU(ty,T.LI x,T.LI y) where y <> zero => T.LI(I.REMU(ty,x,y)) + | T.REMU(ty,T.LABEXP x,T.LABEXP y) => T.LABEXP(T.REMU(ty,x,y)) + + | T.NEGT(ty,T.LI x) => (T.LI(I.NEGT(ty,x)) handle Overflow => exp) + | T.NEGT(ty,T.LABEXP x) => T.LABEXP(T.NEGT(ty,x)) + + | T.ADDT(ty,T.LI 0i,x) => x + | T.ADDT(ty,x,T.LI 0i) => x + | T.ADDT(ty,T.LI x,T.LI y) => + (T.LI(I.ADDT(ty,x,y)) handle Overflow => exp) + | T.ADDT(ty,T.LABEXP x,T.LABEXP y) => T.LABEXP(T.ADDT(ty,x,y)) + + | T.SUBT(ty,a,T.LI 0i) => a + | T.SUBT(ty,T.LI x,T.LI y) => + (T.LI(I.SUBT(ty,x,y)) handle Overflow => exp) + | T.SUBT(ty,T.LABEXP x,T.LABEXP y) => T.LABEXP(T.SUBT(ty,x,y)) + + | T.MULT(ty,T.LI 0i,_) => zeroT + | T.MULT(ty,_,T.LI 0i) => zeroT + | T.MULT(ty,T.LI 1i,x) => x + | T.MULT(ty,x,T.LI 1i) => x + | T.MULT(ty,T.LI x,T.LI y) => + (T.LI(I.MULT(ty,x,y)) handle Overflow => exp) + | T.MULT(ty,T.LABEXP x,T.LABEXP y) => T.LABEXP(T.MULT(ty,x,y)) + + | T.DIVT(m,ty,a,T.LI 1i) => a + | T.DIVT(m,ty,T.LI x,T.LI y) where y <> zero => T.LI(I.DIVT(dm m,ty,x,y)) + | T.DIVT(m,ty,T.LABEXP x,T.LABEXP y) => T.LABEXP(T.DIVT(m,ty,x,y)) + + | T.ANDB(_,_,b as T.LI 0i) => b + | T.ANDB(_,a as T.LI 0i,_) => a + | T.ANDB(ty,T.NOTB(ty',a),T.NOTB(ty'',b)) + where ty = ty' andalso ty' = ty'' => T.NOTB(ty,T.ORB(ty,a,b)) + | T.ANDB(ty,T.LI x,T.LI y) => T.LI(I.ANDB(ty,x,y)) + | T.ANDB(ty,T.LABEXP x,T.LABEXP y) => T.LABEXP(T.ANDB(ty,x,y)) + + | T.ORB(_,a,T.LI 0i) => a + | T.ORB(_,T.LI 0i,b) => b + | T.ORB(ty,T.NOTB(ty',a),T.NOTB(ty'',b)) + where ty = ty' andalso ty' = ty'' => T.NOTB(ty,T.ANDB(ty,a,b)) + | T.ORB(ty,T.LI x,T.LI y) => T.LI(I.ORB(ty,x,y)) + | T.ORB(ty,T.LABEXP x,T.LABEXP y) => T.LABEXP(T.ORB(ty,x,y)) + + | T.XORB(ty,a,T.LI 0i) => a + | T.XORB(ty,T.LI 0i,b) => b + | T.XORB(ty,T.NOTB(ty',a),T.NOTB(ty'',b)) + where ty = ty' andalso ty' = ty'' => T.NOTB(ty,T.XORB(ty,a,b)) + | T.XORB(ty,T.LI x,T.LI y) => T.LI(I.XORB(ty,x,y)) + | T.XORB(ty,T.LABEXP x,T.LABEXP y) => T.LABEXP(T.XORB(ty,x,y)) + + | T.EQVB(ty,a,T.LI 0i) => zeroT + | T.EQVB(ty,T.LI 0i,b) => zeroT + | T.EQVB(ty,T.LI x,T.LI y) => T.LI(I.EQVB(ty,x,y)) + | T.EQVB(ty,T.LABEXP x,T.LABEXP y) => T.LABEXP(T.EQVB(ty,x,y)) + + | T.NOTB(ty,T.NOTB(ty',a)) where ty = ty' => a + | T.NOTB(ty,T.LI n) => T.LI(I.NOTB(ty, n)) + | T.NOTB(ty,T.LABEXP x) => T.LABEXP(T.NOTB(ty,x)) + + | T.SRA(ty,a,T.LI 0i) => a + | T.SRA(ty,T.LI 0i,_) => zeroT + | T.SRA(ty,T.LI x,T.LI y) => T.LI(I.SRA(ty,x,y)) + | T.SRA(ty,T.LABEXP x,T.LABEXP y) => T.LABEXP(T.SRA(ty,x,y)) + + | T.SRL(ty,a,T.LI 0i) => a + | T.SRL(ty,T.LI 0i,_) => zeroT + | T.SRL(ty,_,T.LI n) where IntInf.<=(IntInf.fromInt ty,n) => zeroT + | T.SRL(ty,T.LI x,T.LI y) => T.LI(I.SRL(ty,x,y)) + | T.SRL(ty,T.LABEXP x,T.LABEXP y) => T.LABEXP(T.SRL(ty,x,y)) + + | T.SLL(ty,a,T.LI 0i) => a + | T.SLL(ty,T.LI 0i,_) => zeroT + | T.SLL(ty,_,T.LI n) where IntInf.<=(IntInf.fromInt ty,n) => zeroT + | T.SLL(ty,T.LI x,T.LI y) => T.LI(I.SLL(ty,x,y)) + | T.SLL(ty,T.LABEXP x,T.LABEXP y) => T.LABEXP(T.SLL(ty,x,y)) + + (* reig *) + + (* MLtree does not have an UNSIGNED LOAD operation. In targets + where both uload and sload are provided, T.LOAD translates to + uload. To get the sload, the client must emit: + T.SX(ty,ty',T.LOAD(ty',ea,mem)) + We don't want to simplify this here, so that the instruction + selector sees it. + *) + + | T.SX(ty,ty',T.LOAD _) => exp + + | T.SX(ty,ty',e) where ty = ty' => e + | T.SX(ty,ty',T.LI n) => T.LI(I.SX(ty,ty',n)) + | T.SX(ty,ty',T.LABEXP x) => T.LABEXP(T.SX(ty,ty',x)) + + | T.ZX(ty,ty',e) where ty = ty' => e + | T.ZX(ty,ty',T.LI n) => T.LI(I.ZX(ty,ty',n)) + | T.ZX(ty,ty',T.LABEXP x) => T.LABEXP(T.ZX(ty,ty',x)) + + | T.COND(ty,T.TRUE,a,b) => a + | T.COND(ty,T.FALSE,a,b) => b + + | exp => exp + end + + and simStm ==> (T.IF(T.TRUE,yes,no)) = yes + | simStm ==> (T.IF(T.FALSE,yes,no)) = no + | simStm ==> (T.SEQ[x]) = x + | simStm ==> s = s + + and simF ==> (T.FNEG(ty,T.FNEG(ty',e))) where (ty = ty') = e + | simF ==> (T.CVTF2F(ty,ty',e)) where (ty = ty') = e + | simF ==> (T.FCOND(ty,T.TRUE,yes,no)) = yes + | simF ==> (T.FCOND(ty,T.FALSE,yes,no)) = no + | simF ==> exp = exp + + and cc false = T.FALSE | cc true = T.TRUE + and simCC ==> (T.CMP(ty,T.EQ,T.LI x,T.LI y)) = cc(I.EQ(ty,x,y)) + | simCC ==> (T.CMP(ty,T.NE,T.LI x,T.LI y)) = cc(I.NE(ty,x,y)) + | simCC ==> (T.CMP(ty,T.GT,T.LI x,T.LI y)) = cc(I.GT(ty,x,y)) + | simCC ==> (T.CMP(ty,T.GE,T.LI x,T.LI y)) = cc(I.GE(ty,x,y)) + | simCC ==> (T.CMP(ty,T.LT,T.LI x,T.LI y)) = cc(I.LT(ty,x,y)) + | simCC ==> (T.CMP(ty,T.LE,T.LI x,T.LI y)) = cc(I.LE(ty,x,y)) + | simCC ==> (T.CMP(ty,T.GTU,T.LI x,T.LI y)) = cc(I.GTU(ty,x,y)) + | simCC ==> (T.CMP(ty,T.LTU,T.LI x,T.LI y)) = cc(I.LTU(ty,x,y)) + | simCC ==> (T.CMP(ty,T.GEU,T.LI x,T.LI y)) = cc(I.GEU(ty,x,y)) + | simCC ==> (T.CMP(ty,T.LEU,T.LI x,T.LI y)) = cc(I.LEU(ty,x,y)) + | simCC ==> (T.AND(T.TRUE,x)) = x + | simCC ==> (T.AND(x,T.TRUE)) = x + | simCC ==> (T.AND(T.FALSE,x)) = T.FALSE + | simCC ==> (T.AND(x,T.FALSE)) = T.FALSE + | simCC ==> (T.OR(T.FALSE,x)) = x + | simCC ==> (T.OR(x,T.FALSE)) = x + | simCC ==> (T.OR(T.TRUE,x)) = T.TRUE + | simCC ==> (T.OR(x,T.TRUE)) = T.TRUE + | simCC ==> (T.XOR(T.TRUE,T.TRUE)) = T.FALSE + | simCC ==> (T.XOR(T.FALSE,x)) = x + | simCC ==> (T.XOR(x,T.FALSE)) = x + | simCC ==> (T.XOR(T.TRUE,x)) = T.NOT x + | simCC ==> (T.XOR(x,T.TRUE)) = T.NOT x + | simCC ==> (T.EQV(T.FALSE,T.FALSE)) = T.TRUE + | simCC ==> (T.EQV(T.TRUE,x)) = x + | simCC ==> (T.EQV(x,T.TRUE)) = x + | simCC ==> (T.EQV(T.FALSE,x)) = T.NOT x + | simCC ==> (T.EQV(x,T.FALSE)) = T.NOT x + | simCC ==> exp = exp + + in R.rewrite {rexp=sim,fexp=simF,ccexp=simCC,stm=simStm} end +end +end (* local *) diff --git a/MLRISC/mltree/mltree-simplify.sig b/MLRISC/mltree/mltree-simplify.sig new file mode 100644 index 0000000..554a82f --- /dev/null +++ b/MLRISC/mltree/mltree-simplify.sig @@ -0,0 +1,16 @@ +(* + * Performs simple local optimizations. + * Constant folding, algebraic simplication and some dead code elimination. + *) +signature MLTREE_SIMPLIFIER = +sig + + structure T : MLTREE + + type simplifier = T.rewriter + val simplify : + { addressWidth : int, (* width of address in bits *) + signedAddress : bool (* is the address computation signed? *) + } -> simplifier + +end diff --git a/MLRISC/mltree/mltree-simplify.sml b/MLRISC/mltree/mltree-simplify.sml new file mode 100644 index 0000000..75e2867 --- /dev/null +++ b/MLRISC/mltree/mltree-simplify.sml @@ -0,0 +1,1371 @@ +(* WARNING: this is generated by running 'nowhere mltree-simplify.in'. + * Do not edit this file directly. + * Version 1.2.2 + *) + +(*#line 15.1 "mltree-simplify.in"*) +functor MLTreeSimplifier( +(*#line 16.4 "mltree-simplify.in"*) + structure T : MLTREE + +(*#line 17.4 "mltree-simplify.in"*) + structure Size : MLTREE_SIZE where T=T + +(*#line 20.4 "mltree-simplify.in"*) + val sext : T.rewriter -> T.sext -> T.sext + +(*#line 21.4 "mltree-simplify.in"*) + val rext : T.rewriter -> T.rext -> T.rext + +(*#line 22.4 "mltree-simplify.in"*) + val fext : T.rewriter -> T.fext -> T.fext + +(*#line 23.4 "mltree-simplify.in"*) + val ccext : T.rewriter -> T.ccext -> T.ccext + ): MLTREE_SIMPLIFIER = +struct + +(*#line 27.4 "mltree-simplify.in"*) + structure T = T + +(*#line 28.4 "mltree-simplify.in"*) + structure I = T.I + +(*#line 29.4 "mltree-simplify.in"*) + structure R = MLTreeRewrite + ( +(*#line 30.7 "mltree-simplify.in"*) + structure T = T + +(*#line 31.7 "mltree-simplify.in"*) + val sext = sext + and rext = rext + and fext = fext + and ccext = ccext + ) + + +(*#line 34.4 "mltree-simplify.in"*) + type simplifier = T.rewriter + +(*#line 36.4 "mltree-simplify.in"*) + val lit_11 = (IntInf.fromInt 0) + and lit_16 = (IntInf.fromInt 1) + +(*#line 37.4 "mltree-simplify.in"*) + val zero = (IntInf.fromInt 0) + +(*#line 38.4 "mltree-simplify.in"*) + val zeroT = T.LI zero + +(*#line 40.4 "mltree-simplify.in"*) + fun simplify {addressWidth, signedAddress} = + let +(*#line 43.4 "mltree-simplify.in"*) + fun dm (T.DIV_TO_ZERO) = I.DIV_TO_ZERO + | dm (T.DIV_TO_NEGINF) = I.DIV_TO_NEGINF + +(*#line 46.4 "mltree-simplify.in"*) + fun sim ==> exp = + let val v_3 = exp + fun state_165 e = e + fun state_149 () = zeroT + fun state_158 () = zeroT + fun state_180 v_3 = + let val exp = v_3 + in exp + end + fun state_118 (v_1, v_4) = + let val b = v_4 + and ty = v_1 + in b + end + fun state_15 (v_1, v_0) = + let val a = v_0 + and ty = v_1 + in a + end + fun state_148 v_1 = + let val ty = v_1 + in zeroT + end + fun state_140 v_1 = + let val ty = v_1 + in zeroT + end + fun state_157 v_1 = + let val ty = v_1 + in zeroT + end + fun state_109 v_4 = + let val b = v_4 + in b + end + fun state_45 (v_1, v_4) = + let val ty = v_1 + and x = v_4 + in x + end + fun state_43 v_1 = + let val ty = v_1 + in zeroT + end + fun state_47 (v_1, v_0) = + let val ty = v_1 + and x = v_0 + in x + end + fun state_85 (v_1, v_4) = + let val ty = v_1 + and x = v_4 + in x + end + fun state_83 v_1 = + let val ty = v_1 + in zeroT + end + fun state_87 (v_1, v_0) = + let val ty = v_1 + and x = v_0 + in x + end + fun state_23 (v_1, v_4) = + let val ty = v_1 + and x = v_4 + in x + end + fun state_21 v_1 = + let val ty = v_1 + in zeroT + end + fun state_25 (v_1, v_0) = + let val ty = v_1 + and x = v_0 + in x + end + fun state_127 (v_1, v_4) = + let val b = v_4 + and ty = v_1 + in zeroT + end + fun state_100 v_0 = + let val a = v_0 + in a + end + fun state_69 (v_1, v_0) = + let val ty = v_1 + and x = v_0 + in x + end + fun state_5 (v_1, v_0) = + let val ty = v_1 + and x = v_0 + in x + end + fun state_1717 (v_3, v_1, v_10, v_4) = (if ((IntInf.compare (v_10, lit_11)) = EQUAL) + then (state_118 (v_1, v_4)) + else (state_180 v_3)) + fun state_1450 (v_3, v_1, v_0, v_2) = (if ((IntInf.compare (v_2, lit_11)) = EQUAL) + then (state_15 (v_1, v_0)) + else (state_180 v_3)) + fun state_1279 (v_3, v_1, v_10) = (if ((IntInf.compare (v_10, lit_11)) = EQUAL) + then (state_148 v_1) + else (state_180 v_3)) + fun state_1192 (v_3, v_1, v_10) = (if ((IntInf.compare (v_10, lit_11)) = EQUAL) + then (state_140 v_1) + else (state_180 v_3)) + fun state_1021 (v_3, v_1, v_10) = (if ((IntInf.compare (v_10, lit_11)) = EQUAL) + then (state_157 v_1) + else (state_180 v_3)) + fun state_916 (v_3, v_10, v_4) = (if ((IntInf.compare (v_10, lit_11)) = EQUAL) + then (state_109 v_4) + else (state_180 v_3)) + fun state_820 (v_3, v_1, v_0, v_2) = (if ((IntInf.compare (v_2, lit_11)) = EQUAL) + then (state_43 v_1) + else (if ((IntInf.compare (v_2, lit_16)) = EQUAL) + then (state_47 (v_1, v_0)) + else (state_180 v_3))) + fun state_731 (v_3, v_1, v_0, v_2) = (if ((IntInf.compare (v_2, lit_11)) = EQUAL) + then (state_83 v_1) + else (if ((IntInf.compare (v_2, lit_16)) = EQUAL) + then (state_87 (v_1, v_0)) + else (state_180 v_3))) + fun state_642 (v_3, v_1, v_0, v_2) = (if ((IntInf.compare (v_2, lit_11)) = EQUAL) + then (state_21 v_1) + else (if ((IntInf.compare (v_2, lit_16)) = EQUAL) + then (state_25 (v_1, v_0)) + else (state_180 v_3))) + fun state_555 (v_3, v_1, v_10, v_4) = (if ((IntInf.compare (v_10, lit_11)) = EQUAL) + then (state_127 (v_1, v_4)) + else (state_180 v_3)) + fun state_441 (v_3, v_0, v_10) = (if ((IntInf.compare (v_10, lit_11)) = EQUAL) + then (state_100 v_0) + else (state_180 v_3)) + fun state_354 (v_3, v_1, v_0, v_2) = (if ((IntInf.compare (v_2, lit_11)) = EQUAL) + then (state_69 (v_1, v_0)) + else (state_180 v_3)) + fun state_181 (v_3, v_1, v_0, v_2) = (if ((IntInf.compare (v_2, lit_11)) = EQUAL) + then (state_5 (v_1, v_0)) + else (state_180 v_3)) + fun state_1451 (v_3, v_1, v_0, v_4) = + (case v_4 of + T.LI v_2 => state_1450 (v_3, v_1, v_0, v_2) + | _ => state_180 v_3 + ) + fun state_182 (v_3, v_1, v_0, v_4) = + (case v_4 of + T.LI v_2 => state_181 (v_3, v_1, v_0, v_2) + | _ => state_180 v_3 + ) + in + (case v_3 of + T.ADD v_5 => + let val (v_1, v_0, v_4) = v_5 + in + (case v_0 of + T.ADD v_10 => + let val (v_7, v_9, v_13) = v_10 + in + (case v_13 of + T.LI v_12 => + (case v_4 of + T.LI v_2 => + let val a = v_9 + and ty = v_1 + and ty' = v_7 + and x = v_12 + and y = v_2 + in (if (ty = ty') + then (T.ADD (ty, a, T.LI (I.ADD (ty, x, y)))) + else (state_181 (v_3, v_1, v_0, v_2))) + end + | _ => state_180 v_3 + ) + | _ => state_182 (v_3, v_1, v_0, v_4) + ) + end + | T.LABEXP v_10 => + (case v_4 of + T.LABEXP v_2 => + let val ty = v_1 + and x = v_10 + and y = v_2 + in T.LABEXP (T.ADD (ty, x, y)) + end + | T.LI v_2 => state_181 (v_3, v_1, v_0, v_2) + | _ => state_180 v_3 + ) + | T.LI v_10 => (if ((IntInf.compare (v_10, lit_11)) = EQUAL) + then + let val ty = v_1 + and x = v_4 + in x + end + else + (case v_4 of + T.LI v_2 => (if ((IntInf.compare (v_2, lit_11)) = EQUAL) + then (state_5 (v_1, v_0)) + else + let val ty = v_1 + and x = v_10 + and y = v_2 + in T.LI (I.ADD (ty, x, y)) + end) + | _ => state_180 v_3 + )) + | _ => state_182 (v_3, v_1, v_0, v_4) + ) + end + | T.ADDT v_5 => + let val (v_1, v_0, v_4) = v_5 + in + (case v_0 of + T.LABEXP v_10 => + (case v_4 of + T.LABEXP v_2 => + let val ty = v_1 + and x = v_10 + and y = v_2 + in T.LABEXP (T.ADDT (ty, x, y)) + end + | T.LI v_2 => state_354 (v_3, v_1, v_0, v_2) + | _ => state_180 v_3 + ) + | T.LI v_10 => (if ((IntInf.compare (v_10, lit_11)) = EQUAL) + then + let val ty = v_1 + and x = v_4 + in x + end + else + (case v_4 of + T.LI v_2 => (if ((IntInf.compare (v_2, lit_11)) = EQUAL) + then (state_69 (v_1, v_0)) + else + let val ty = v_1 + and x = v_10 + and y = v_2 + in ((T.LI (I.ADDT (ty, x, y))) handle Overflow => exp +) + end) + | _ => state_180 v_3 + )) + | _ => + (case v_4 of + T.LI v_2 => state_354 (v_3, v_1, v_0, v_2) + | _ => state_180 v_3 + ) + ) + end + | T.ANDB v_5 => + let val (v_1, v_0, v_4) = v_5 + in + (case v_4 of + T.LABEXP v_2 => + (case v_0 of + T.LABEXP v_10 => + let val ty = v_1 + and x = v_10 + and y = v_2 + in T.LABEXP (T.ANDB (ty, x, y)) + end + | T.LI v_10 => state_441 (v_3, v_0, v_10) + | _ => state_180 v_3 + ) + | T.LI v_2 => (if ((IntInf.compare (v_2, lit_11)) = EQUAL) + then + let val b = v_4 + in b + end + else + (case v_0 of + T.LI v_10 => (if ((IntInf.compare (v_10, lit_11)) = EQUAL) + then (state_100 v_0) + else + let val ty = v_1 + and x = v_10 + and y = v_2 + in T.LI (I.ANDB (ty, x, y)) + end) + | _ => state_180 v_3 + )) + | T.NOTB v_2 => + (case v_0 of + T.LI v_10 => state_441 (v_3, v_0, v_10) + | T.NOTB v_10 => + let val (v_7, v_9) = v_10 + in + let val (v_6, v_8) = v_2 + in + let val a = v_9 + and b = v_8 + and ty = v_1 + and ty' = v_7 + and ty'' = v_6 + in (if ((ty = ty') andalso (ty' = ty'')) + then (T.NOTB (ty, T.ORB (ty, a, b))) + else (state_180 v_3)) + end + end + end + | _ => state_180 v_3 + ) + | _ => + (case v_0 of + T.LI v_10 => state_441 (v_3, v_0, v_10) + | _ => state_180 v_3 + ) + ) + end + | T.COND v_5 => + let val (v_1, v_0, v_4, v_18) = v_5 + in + (case v_0 of + T.FALSE => + let val a = v_4 + and b = v_18 + and ty = v_1 + in b + end + | T.TRUE => + let val a = v_4 + and b = v_18 + and ty = v_1 + in a + end + | _ => state_180 v_3 + ) + end + | T.DIVS v_5 => + let val (v_1, v_0, v_4, v_18) = v_5 + in + (case v_18 of + T.LABEXP v_17 => + (case v_4 of + T.LABEXP v_2 => + let val m = v_1 + and ty = v_0 + and x = v_2 + and y = v_17 + in T.LABEXP (T.DIVS (m, ty, x, y)) + end + | _ => state_180 v_3 + ) + | T.LI v_17 => (if ((IntInf.compare (v_17, lit_16)) = EQUAL) + then + let val a = v_4 + and m = v_1 + and ty = v_0 + in a + end + else + (case v_4 of + T.LI v_2 => + let val m = v_1 + and ty = v_0 + and x = v_2 + and y = v_17 + in (if (y <> zero) + then (T.LI (I.DIVS (dm m, ty, x, y))) + else (state_180 v_3)) + end + | _ => state_180 v_3 + )) + | _ => state_180 v_3 + ) + end + | T.DIVT v_5 => + let val (v_1, v_0, v_4, v_18) = v_5 + in + (case v_18 of + T.LABEXP v_17 => + (case v_4 of + T.LABEXP v_2 => + let val m = v_1 + and ty = v_0 + and x = v_2 + and y = v_17 + in T.LABEXP (T.DIVT (m, ty, x, y)) + end + | _ => state_180 v_3 + ) + | T.LI v_17 => (if ((IntInf.compare (v_17, lit_16)) = EQUAL) + then + let val a = v_4 + and m = v_1 + and ty = v_0 + in a + end + else + (case v_4 of + T.LI v_2 => + let val m = v_1 + and ty = v_0 + and x = v_2 + and y = v_17 + in (if (y <> zero) + then (T.LI (I.DIVT (dm m, ty, x, y))) + else (state_180 v_3)) + end + | _ => state_180 v_3 + )) + | _ => state_180 v_3 + ) + end + | T.DIVU v_5 => + let val (v_1, v_0, v_4) = v_5 + in + (case v_4 of + T.LABEXP v_2 => + (case v_0 of + T.LABEXP v_10 => + let val ty = v_1 + and x = v_10 + and y = v_2 + in T.LABEXP (T.DIVU (ty, x, y)) + end + | _ => state_180 v_3 + ) + | T.LI v_2 => (if ((IntInf.compare (v_2, lit_16)) = EQUAL) + then + let val a = v_0 + and ty = v_1 + in a + end + else + (case v_0 of + T.LI v_10 => + let val ty = v_1 + and x = v_10 + and y = v_2 + in (if (y <> zero) + then (T.LI (I.DIVU (ty, x, y))) + else (state_180 v_3)) + end + | _ => state_180 v_3 + )) + | _ => state_180 v_3 + ) + end + | T.EQVB v_5 => + let val (v_1, v_0, v_4) = v_5 + in + (case v_4 of + T.LABEXP v_2 => + (case v_0 of + T.LABEXP v_10 => + let val ty = v_1 + and x = v_10 + and y = v_2 + in T.LABEXP (T.EQVB (ty, x, y)) + end + | T.LI v_10 => state_555 (v_3, v_1, v_10, v_4) + | _ => state_180 v_3 + ) + | T.LI v_2 => (if ((IntInf.compare (v_2, lit_11)) = EQUAL) + then + let val a = v_0 + and ty = v_1 + in zeroT + end + else + (case v_0 of + T.LI v_10 => (if ((IntInf.compare (v_10, lit_11)) = EQUAL) + then (state_127 (v_1, v_4)) + else + let val ty = v_1 + and x = v_10 + and y = v_2 + in T.LI (I.EQVB (ty, x, y)) + end) + | _ => state_180 v_3 + )) + | _ => + (case v_0 of + T.LI v_10 => state_555 (v_3, v_1, v_10, v_4) + | _ => state_180 v_3 + ) + ) + end + | T.MULS v_5 => + let val (v_1, v_0, v_4) = v_5 + in + (case v_0 of + T.LABEXP v_10 => + (case v_4 of + T.LABEXP v_2 => + let val ty = v_1 + and x = v_10 + and y = v_2 + in T.LABEXP (T.MULS (ty, x, y)) + end + | T.LI v_2 => state_642 (v_3, v_1, v_0, v_2) + | _ => state_180 v_3 + ) + | T.LI v_10 => (if ((IntInf.compare (v_10, lit_11)) = EQUAL) + then + let val ty = v_1 + in zeroT + end + else (if ((IntInf.compare (v_10, lit_16)) = EQUAL) + then + (case v_4 of + T.LI v_2 => (if ((IntInf.compare (v_2, lit_11)) = EQUAL) + then (state_21 v_1) + else (if ((IntInf.compare (v_2, lit_16)) = EQUAL) + then (state_23 (v_1, v_4)) + else (state_23 (v_1, v_4)))) + | _ => state_23 (v_1, v_4) + ) + else + (case v_4 of + T.LI v_2 => (if ((IntInf.compare (v_2, lit_11)) = EQUAL) + then (state_21 v_1) + else (if ((IntInf.compare (v_2, lit_16)) = EQUAL) + then (state_25 (v_1, v_0)) + else + let val ty = v_1 + and x = v_10 + and y = v_2 + in T.LI (I.MULS (ty, x, y)) + end)) + | _ => state_180 v_3 + ))) + | _ => + (case v_4 of + T.LI v_2 => state_642 (v_3, v_1, v_0, v_2) + | _ => state_180 v_3 + ) + ) + end + | T.MULT v_5 => + let val (v_1, v_0, v_4) = v_5 + in + (case v_0 of + T.LABEXP v_10 => + (case v_4 of + T.LABEXP v_2 => + let val ty = v_1 + and x = v_10 + and y = v_2 + in T.LABEXP (T.MULT (ty, x, y)) + end + | T.LI v_2 => state_731 (v_3, v_1, v_0, v_2) + | _ => state_180 v_3 + ) + | T.LI v_10 => (if ((IntInf.compare (v_10, lit_11)) = EQUAL) + then + let val ty = v_1 + in zeroT + end + else (if ((IntInf.compare (v_10, lit_16)) = EQUAL) + then + (case v_4 of + T.LI v_2 => (if ((IntInf.compare (v_2, lit_11)) = EQUAL) + then (state_83 v_1) + else (if ((IntInf.compare (v_2, lit_16)) = EQUAL) + then (state_85 (v_1, v_4)) + else (state_85 (v_1, v_4)))) + | _ => state_85 (v_1, v_4) + ) + else + (case v_4 of + T.LI v_2 => (if ((IntInf.compare (v_2, lit_11)) = EQUAL) + then (state_83 v_1) + else (if ((IntInf.compare (v_2, lit_16)) = EQUAL) + then (state_87 (v_1, v_0)) + else + let val ty = v_1 + and x = v_10 + and y = v_2 + in ((T.LI (I.MULT (ty, x, y))) handle Overflow => exp +) + end)) + | _ => state_180 v_3 + ))) + | _ => + (case v_4 of + T.LI v_2 => state_731 (v_3, v_1, v_0, v_2) + | _ => state_180 v_3 + ) + ) + end + | T.MULU v_5 => + let val (v_1, v_0, v_4) = v_5 + in + (case v_0 of + T.LABEXP v_10 => + (case v_4 of + T.LABEXP v_2 => + let val ty = v_1 + and x = v_10 + and y = v_2 + in T.LABEXP (T.MULU (ty, x, y)) + end + | T.LI v_2 => state_820 (v_3, v_1, v_0, v_2) + | _ => state_180 v_3 + ) + | T.LI v_10 => (if ((IntInf.compare (v_10, lit_11)) = EQUAL) + then + let val ty = v_1 + in zeroT + end + else (if ((IntInf.compare (v_10, lit_16)) = EQUAL) + then + (case v_4 of + T.LI v_2 => (if ((IntInf.compare (v_2, lit_11)) = EQUAL) + then (state_43 v_1) + else (if ((IntInf.compare (v_2, lit_16)) = EQUAL) + then (state_45 (v_1, v_4)) + else (state_45 (v_1, v_4)))) + | _ => state_45 (v_1, v_4) + ) + else + (case v_4 of + T.LI v_2 => (if ((IntInf.compare (v_2, lit_11)) = EQUAL) + then (state_43 v_1) + else (if ((IntInf.compare (v_2, lit_16)) = EQUAL) + then (state_47 (v_1, v_0)) + else + let val ty = v_1 + and x = v_10 + and y = v_2 + in T.LI (I.MULU (ty, x, y)) + end)) + | _ => state_180 v_3 + ))) + | _ => + (case v_4 of + T.LI v_2 => state_820 (v_3, v_1, v_0, v_2) + | _ => state_180 v_3 + ) + ) + end + | T.NEGT v_5 => + let val (v_1, v_0) = v_5 + in + (case v_0 of + T.LABEXP v_10 => + let val ty = v_1 + and x = v_10 + in T.LABEXP (T.NEGT (ty, x)) + end + | T.LI v_10 => + let val ty = v_1 + and x = v_10 + in ((T.LI (I.NEGT (ty, x))) handle Overflow => exp +) + end + | _ => state_180 v_3 + ) + end + | T.NOTB v_5 => + let val (v_1, v_0) = v_5 + in + (case v_0 of + T.LABEXP v_10 => + let val ty = v_1 + and x = v_10 + in T.LABEXP (T.NOTB (ty, x)) + end + | T.LI v_10 => + let val n = v_10 + and ty = v_1 + in T.LI (I.NOTB (ty, n)) + end + | T.NOTB v_10 => + let val (v_7, v_9) = v_10 + in + let val a = v_9 + and ty = v_1 + and ty' = v_7 + in (if (ty = ty') + then a + else (state_180 v_3)) + end + end + | _ => state_180 v_3 + ) + end + | T.ORB v_5 => + let val (v_1, v_0, v_4) = v_5 + in + (case v_4 of + T.LABEXP v_2 => + (case v_0 of + T.LABEXP v_10 => + let val ty = v_1 + and x = v_10 + and y = v_2 + in T.LABEXP (T.ORB (ty, x, y)) + end + | T.LI v_10 => state_916 (v_3, v_10, v_4) + | _ => state_180 v_3 + ) + | T.LI v_2 => (if ((IntInf.compare (v_2, lit_11)) = EQUAL) + then + let val a = v_0 + in a + end + else + (case v_0 of + T.LI v_10 => (if ((IntInf.compare (v_10, lit_11)) = EQUAL) + then (state_109 v_4) + else + let val ty = v_1 + and x = v_10 + and y = v_2 + in T.LI (I.ORB (ty, x, y)) + end) + | _ => state_180 v_3 + )) + | T.NOTB v_2 => + (case v_0 of + T.LI v_10 => state_916 (v_3, v_10, v_4) + | T.NOTB v_10 => + let val (v_7, v_9) = v_10 + in + let val (v_6, v_8) = v_2 + in + let val a = v_9 + and b = v_8 + and ty = v_1 + and ty' = v_7 + and ty'' = v_6 + in (if ((ty = ty') andalso (ty' = ty'')) + then (T.NOTB (ty, T.ANDB (ty, a, b))) + else (state_180 v_3)) + end + end + end + | _ => state_180 v_3 + ) + | _ => + (case v_0 of + T.LI v_10 => state_916 (v_3, v_10, v_4) + | _ => state_180 v_3 + ) + ) + end + | T.REMS v_5 => + let val (v_1, v_0, v_4, v_18) = v_5 + in + (case v_18 of + T.LABEXP v_17 => + (case v_4 of + T.LABEXP v_2 => + let val m = v_1 + and ty = v_0 + and x = v_2 + and y = v_17 + in T.LABEXP (T.REMS (m, ty, x, y)) + end + | _ => state_180 v_3 + ) + | T.LI v_17 => (if ((IntInf.compare (v_17, lit_16)) = EQUAL) + then + let val a = v_4 + and m = v_1 + and ty = v_0 + in zeroT + end + else + (case v_4 of + T.LI v_2 => + let val m = v_1 + and ty = v_0 + and x = v_2 + and y = v_17 + in (if (y <> zero) + then (T.LI (I.REMS (dm m, ty, x, y))) + else (state_180 v_3)) + end + | _ => state_180 v_3 + )) + | _ => state_180 v_3 + ) + end + | T.REMU v_5 => + let val (v_1, v_0, v_4) = v_5 + in + (case v_4 of + T.LABEXP v_2 => + (case v_0 of + T.LABEXP v_10 => + let val ty = v_1 + and x = v_10 + and y = v_2 + in T.LABEXP (T.REMU (ty, x, y)) + end + | _ => state_180 v_3 + ) + | T.LI v_2 => (if ((IntInf.compare (v_2, lit_16)) = EQUAL) + then + let val a = v_0 + and ty = v_1 + in zeroT + end + else + (case v_0 of + T.LI v_10 => + let val ty = v_1 + and x = v_10 + and y = v_2 + in (if (y <> zero) + then (T.LI (I.REMU (ty, x, y))) + else (state_180 v_3)) + end + | _ => state_180 v_3 + )) + | _ => state_180 v_3 + ) + end + | T.SLL v_5 => + let val (v_1, v_0, v_4) = v_5 + in + (case v_4 of + T.LABEXP v_2 => + (case v_0 of + T.LABEXP v_10 => + let val ty = v_1 + and x = v_10 + and y = v_2 + in T.LABEXP (T.SLL (ty, x, y)) + end + | T.LI v_10 => state_1021 (v_3, v_1, v_10) + | _ => state_180 v_3 + ) + | T.LI v_2 => (if ((IntInf.compare (v_2, lit_11)) = EQUAL) + then + let val a = v_0 + and ty = v_1 + in a + end + else + (case v_0 of + T.LI v_10 => (if ((IntInf.compare (v_10, lit_11)) = EQUAL) + then (state_157 v_1) + else + let val n = v_2 + and ty = v_1 + in (if (IntInf.<= (IntInf.fromInt ty, n)) + then (state_158 ()) + else + let val ty = v_1 + and x = v_10 + and y = v_2 + in T.LI (I.SLL (ty, x, y)) + end) + end) + | _ => + let val n = v_2 + and ty = v_1 + in (if (IntInf.<= (IntInf.fromInt ty, n)) + then (state_158 ()) + else (state_180 v_3)) + end + )) + | _ => + (case v_0 of + T.LI v_10 => state_1021 (v_3, v_1, v_10) + | _ => state_180 v_3 + ) + ) + end + | T.SRA v_5 => + let val (v_1, v_0, v_4) = v_5 + in + (case v_4 of + T.LABEXP v_2 => + (case v_0 of + T.LABEXP v_10 => + let val ty = v_1 + and x = v_10 + and y = v_2 + in T.LABEXP (T.SRA (ty, x, y)) + end + | T.LI v_10 => state_1192 (v_3, v_1, v_10) + | _ => state_180 v_3 + ) + | T.LI v_2 => (if ((IntInf.compare (v_2, lit_11)) = EQUAL) + then + let val a = v_0 + and ty = v_1 + in a + end + else + (case v_0 of + T.LI v_10 => (if ((IntInf.compare (v_10, lit_11)) = EQUAL) + then (state_140 v_1) + else + let val ty = v_1 + and x = v_10 + and y = v_2 + in T.LI (I.SRA (ty, x, y)) + end) + | _ => state_180 v_3 + )) + | _ => + (case v_0 of + T.LI v_10 => state_1192 (v_3, v_1, v_10) + | _ => state_180 v_3 + ) + ) + end + | T.SRL v_5 => + let val (v_1, v_0, v_4) = v_5 + in + (case v_4 of + T.LABEXP v_2 => + (case v_0 of + T.LABEXP v_10 => + let val ty = v_1 + and x = v_10 + and y = v_2 + in T.LABEXP (T.SRL (ty, x, y)) + end + | T.LI v_10 => state_1279 (v_3, v_1, v_10) + | _ => state_180 v_3 + ) + | T.LI v_2 => (if ((IntInf.compare (v_2, lit_11)) = EQUAL) + then + let val a = v_0 + and ty = v_1 + in a + end + else + (case v_0 of + T.LI v_10 => (if ((IntInf.compare (v_10, lit_11)) = EQUAL) + then (state_148 v_1) + else + let val n = v_2 + and ty = v_1 + in (if (IntInf.<= (IntInf.fromInt ty, n)) + then (state_149 ()) + else + let val ty = v_1 + and x = v_10 + and y = v_2 + in T.LI (I.SRL (ty, x, y)) + end) + end) + | _ => + let val n = v_2 + and ty = v_1 + in (if (IntInf.<= (IntInf.fromInt ty, n)) + then (state_149 ()) + else (state_180 v_3)) + end + )) + | _ => + (case v_0 of + T.LI v_10 => state_1279 (v_3, v_1, v_10) + | _ => state_180 v_3 + ) + ) + end + | T.SUB v_5 => + let val (v_1, v_0, v_4) = v_5 + in + (case v_0 of + T.LABEXP v_10 => + (case v_4 of + T.LABEXP v_2 => + let val ty = v_1 + and x = v_10 + and y = v_2 + in T.LABEXP (T.SUB (ty, x, y)) + end + | T.LI v_2 => state_1450 (v_3, v_1, v_0, v_2) + | _ => state_180 v_3 + ) + | T.LI v_10 => + (case v_4 of + T.LI v_2 => (if ((IntInf.compare (v_2, lit_11)) = EQUAL) + then (state_15 (v_1, v_0)) + else + let val ty = v_1 + and x = v_10 + and y = v_2 + in T.LI (I.SUB (ty, x, y)) + end) + | T.SUB v_2 => + let val (v_6, v_8, v_14) = v_2 + in (if ((IntInf.compare (v_10, lit_11)) = EQUAL) + then + (case v_8 of + T.LI v_15 => (if ((IntInf.compare (v_15, lit_11)) = EQUAL) + then + let val a = v_14 + and ty = v_1 + and ty' = v_6 + in (if (ty = ty') + then a + else (state_180 v_3)) + end + else (state_180 v_3)) + | _ => state_180 v_3 + ) + else (state_180 v_3)) + end + | _ => state_180 v_3 + ) + | T.SUB v_10 => + let val (v_7, v_9, v_13) = v_10 + in + (case v_13 of + T.LI v_12 => + (case v_4 of + T.LI v_2 => + let val a = v_9 + and ty = v_1 + and ty' = v_7 + and x = v_12 + and y = v_2 + in (if (ty = ty') + then (T.SUB (ty, a, T.LI (I.ADD (ty, x, y)))) + else (state_1450 (v_3, v_1, v_0, v_2))) + end + | _ => state_180 v_3 + ) + | _ => state_1451 (v_3, v_1, v_0, v_4) + ) + end + | _ => state_1451 (v_3, v_1, v_0, v_4) + ) + end + | T.SUBT v_5 => + let val (v_1, v_0, v_4) = v_5 + in + (case v_4 of + T.LABEXP v_2 => + (case v_0 of + T.LABEXP v_10 => + let val ty = v_1 + and x = v_10 + and y = v_2 + in T.LABEXP (T.SUBT (ty, x, y)) + end + | _ => state_180 v_3 + ) + | T.LI v_2 => (if ((IntInf.compare (v_2, lit_11)) = EQUAL) + then + let val a = v_0 + and ty = v_1 + in a + end + else + (case v_0 of + T.LI v_10 => + let val ty = v_1 + and x = v_10 + and y = v_2 + in ((T.LI (I.SUBT (ty, x, y))) handle Overflow => exp +) + end + | _ => state_180 v_3 + )) + | _ => state_180 v_3 + ) + end + | T.SX v_5 => + let val (v_1, v_0, v_4) = v_5 + in + (case v_4 of + T.LABEXP v_2 => + let val e = v_4 + and ty = v_1 + and ty' = v_0 + in (if (ty = ty') + then (state_165 e) + else + let val ty = v_1 + and ty' = v_0 + and x = v_2 + in T.LABEXP (T.SX (ty, ty', x)) + end) + end + | T.LI v_2 => + let val e = v_4 + and ty = v_1 + and ty' = v_0 + in (if (ty = ty') + then (state_165 e) + else + let val n = v_2 + and ty = v_1 + and ty' = v_0 + in T.LI (I.SX (ty, ty', n)) + end) + end + | T.LOAD v_2 => + let val ty = v_1 + and ty' = v_0 + in exp + end + | _ => + let val e = v_4 + and ty = v_1 + and ty' = v_0 + in (if (ty = ty') + then (state_165 e) + else (state_180 v_3)) + end + ) + end + | T.XORB v_5 => + let val (v_1, v_0, v_4) = v_5 + in + (case v_4 of + T.LABEXP v_2 => + (case v_0 of + T.LABEXP v_10 => + let val ty = v_1 + and x = v_10 + and y = v_2 + in T.LABEXP (T.XORB (ty, x, y)) + end + | T.LI v_10 => state_1717 (v_3, v_1, v_10, v_4) + | _ => state_180 v_3 + ) + | T.LI v_2 => (if ((IntInf.compare (v_2, lit_11)) = EQUAL) + then + let val a = v_0 + and ty = v_1 + in a + end + else + (case v_0 of + T.LI v_10 => (if ((IntInf.compare (v_10, lit_11)) = EQUAL) + then (state_118 (v_1, v_4)) + else + let val ty = v_1 + and x = v_10 + and y = v_2 + in T.LI (I.XORB (ty, x, y)) + end) + | _ => state_180 v_3 + )) + | T.NOTB v_2 => + (case v_0 of + T.LI v_10 => state_1717 (v_3, v_1, v_10, v_4) + | T.NOTB v_10 => + let val (v_7, v_9) = v_10 + in + let val (v_6, v_8) = v_2 + in + let val a = v_9 + and b = v_8 + and ty = v_1 + and ty' = v_7 + and ty'' = v_6 + in (if ((ty = ty') andalso (ty' = ty'')) + then (T.NOTB (ty, T.XORB (ty, a, b))) + else (state_180 v_3)) + end + end + end + | _ => state_180 v_3 + ) + | _ => + (case v_0 of + T.LI v_10 => state_1717 (v_3, v_1, v_10, v_4) + | _ => state_180 v_3 + ) + ) + end + | T.ZX v_5 => + let val (v_1, v_0, v_4) = v_5 + in + let val e = v_4 + and ty = v_1 + and ty' = v_0 + in (if (ty = ty') + then e + else + (case v_4 of + T.LABEXP v_2 => + let val ty = v_1 + and ty' = v_0 + and x = v_2 + in T.LABEXP (T.ZX (ty, ty', x)) + end + | T.LI v_2 => + let val n = v_2 + and ty = v_1 + and ty' = v_0 + in T.LI (I.ZX (ty, ty', n)) + end + | _ => state_180 v_3 + )) + end + end + | _ => state_180 v_3 + ) + end + and simStm ==> (T.IF(T.TRUE, yes, no)) = yes + | simStm ==> (T.IF(T.FALSE, yes, no)) = no + | simStm ==> (T.SEQ[x]) = x + | simStm ==> s = s + and simF p_0 p_1 = + let val v_29 = (p_0, p_1) + fun state_8 (v_19, v_20) = + let val ==> = v_19 + and exp = v_20 + in exp + end + in + let val (v_19, v_20) = v_29 + in + (case v_20 of + T.CVTF2F v_26 => + let val (v_22, v_25, v_27) = v_26 + in + let val ==> = v_19 + and e = v_27 + and ty = v_22 + and ty' = v_25 + in (if (ty = ty') + then e + else (state_8 (v_19, v_20))) + end + end + | T.FCOND v_26 => + let val (v_22, v_25, v_27, v_28) = v_26 + in + (case v_25 of + T.FALSE => + let val ==> = v_19 + and no = v_28 + and ty = v_22 + and yes = v_27 + in no + end + | T.TRUE => + let val ==> = v_19 + and no = v_28 + and ty = v_22 + and yes = v_27 + in yes + end + | _ => state_8 (v_19, v_20) + ) + end + | T.FNEG v_26 => + let val (v_22, v_25) = v_26 + in + (case v_25 of + T.FNEG v_24 => + let val (v_21, v_23) = v_24 + in + let val ==> = v_19 + and e = v_23 + and ty = v_22 + and ty' = v_21 + in (if (ty = ty') + then e + else (state_8 (v_19, v_20))) + end + end + | _ => state_8 (v_19, v_20) + ) + end + | _ => state_8 (v_19, v_20) + ) + end + end + and cc false = T.FALSE + | cc true = T.TRUE + and simCC ==> (T.CMP(ty, T.EQ, T.LI x, T.LI y)) = cc (I.EQ (ty, x, y)) + | simCC ==> (T.CMP(ty, T.NE, T.LI x, T.LI y)) = cc (I.NE (ty, x, y)) + | simCC ==> (T.CMP(ty, T.GT, T.LI x, T.LI y)) = cc (I.GT (ty, x, y)) + | simCC ==> (T.CMP(ty, T.GE, T.LI x, T.LI y)) = cc (I.GE (ty, x, y)) + | simCC ==> (T.CMP(ty, T.LT, T.LI x, T.LI y)) = cc (I.LT (ty, x, y)) + | simCC ==> (T.CMP(ty, T.LE, T.LI x, T.LI y)) = cc (I.LE (ty, x, y)) + | simCC ==> (T.CMP(ty, T.GTU, T.LI x, T.LI y)) = cc (I.GTU (ty, x, y)) + | simCC ==> (T.CMP(ty, T.LTU, T.LI x, T.LI y)) = cc (I.LTU (ty, x, y)) + | simCC ==> (T.CMP(ty, T.GEU, T.LI x, T.LI y)) = cc (I.GEU (ty, x, y)) + | simCC ==> (T.CMP(ty, T.LEU, T.LI x, T.LI y)) = cc (I.LEU (ty, x, y)) + | simCC ==> (T.AND(T.TRUE, x)) = x + | simCC ==> (T.AND(x, T.TRUE)) = x + | simCC ==> (T.AND(T.FALSE, x)) = T.FALSE + | simCC ==> (T.AND(x, T.FALSE)) = T.FALSE + | simCC ==> (T.OR(T.FALSE, x)) = x + | simCC ==> (T.OR(x, T.FALSE)) = x + | simCC ==> (T.OR(T.TRUE, x)) = T.TRUE + | simCC ==> (T.OR(x, T.TRUE)) = T.TRUE + | simCC ==> (T.XOR(T.TRUE, T.TRUE)) = T.FALSE + | simCC ==> (T.XOR(T.FALSE, x)) = x + | simCC ==> (T.XOR(x, T.FALSE)) = x + | simCC ==> (T.XOR(T.TRUE, x)) = T.NOT x + | simCC ==> (T.XOR(x, T.TRUE)) = T.NOT x + | simCC ==> (T.EQV(T.FALSE, T.FALSE)) = T.TRUE + | simCC ==> (T.EQV(T.TRUE, x)) = x + | simCC ==> (T.EQV(x, T.TRUE)) = x + | simCC ==> (T.EQV(T.FALSE, x)) = T.NOT x + | simCC ==> (T.EQV(x, T.FALSE)) = T.NOT x + | simCC ==> exp = exp + in R.rewrite {rexp=sim, fexp=simF, ccexp=simCC, stm=simStm} + end +end + diff --git a/MLRISC/mltree/mltree-size.sig b/MLRISC/mltree/mltree-size.sig new file mode 100644 index 0000000..bbdbd65 --- /dev/null +++ b/MLRISC/mltree/mltree-size.sig @@ -0,0 +1,21 @@ +(* + * This module provides functions for computing the size MLTREE transformations. + * Basically, we want to support various non built-in datatype widths. + * This module handles the translation. + * + * -- Allen + *) +signature MLTREE_SIZE = +sig + + structure T : MLTREE + + val intTy : int (* natural width of integers *) + + (* + * Return the size of an expression + *) + val size : T.rexp -> T.ty + val fsize : T.fexp -> T.ty + +end diff --git a/MLRISC/mltree/mltree-size.sml b/MLRISC/mltree/mltree-size.sml new file mode 100644 index 0000000..1e942e4 --- /dev/null +++ b/MLRISC/mltree/mltree-size.sml @@ -0,0 +1,70 @@ +functor MLTreeSize + (structure T : MLTREE + val intTy : T.ty (* size of integer word *) + ) : MLTREE_SIZE = +struct + structure T = T + + val intTy = intTy + + fun size(T.REG(ty,_)) = ty + | size(T.LI _) = intTy + | size(T.LABEL _) = intTy + | size(T.CONST _) = intTy + | size(T.LABEXP e) = size e + | size(T.NEG(ty,_)) = ty + | size(T.ADD(ty,_,_)) = ty + | size(T.SUB(ty,_,_)) = ty + | size(T.MULS(ty,_,_)) = ty + | size(T.DIVS(_,ty,_,_)) = ty + | size(T.REMS(_,ty,_,_)) = ty + | size(T.MULU(ty,_,_)) = ty + | size(T.DIVU(ty,_,_)) = ty + | size(T.REMU(ty,_,_)) = ty + | size(T.NEGT(ty,_)) = ty + | size(T.ADDT(ty,_,_)) = ty + | size(T.SUBT(ty,_,_)) = ty + | size(T.MULT(ty,_,_)) = ty + | size(T.DIVT(_,ty,_,_)) = ty + | size(T.ANDB(ty,_,_)) = ty + | size(T.ORB(ty,_,_)) = ty + | size(T.XORB(ty,_,_)) = ty + | size(T.EQVB(ty,_,_)) = ty + | size(T.NOTB(ty,_)) = ty + | size(T.SRA(ty,_,_)) = ty + | size(T.SRL(ty,_,_)) = ty + | size(T.SLL(ty,_,_)) = ty + | size(T.SX(ty,_,_)) = ty + | size(T.ZX(ty,_,_)) = ty + | size(T.CVTF2I(ty,_,_,_)) = ty + | size(T.COND(ty,_,_,_)) = ty + | size(T.LOAD(ty,_,_)) = ty + | size(T.PRED(e,_)) = size e + | size(T.LET(_,e)) = size e + | size(T.REXT(ty,_)) = ty + | size(T.MARK(e,_)) = size e + | size(T.OP(ty,_,_)) = ty + | size(T.ARG(ty,_,_)) = ty + | size(T.$(ty,_,_)) = ty + | size(T.PARAM _) = intTy + | size(T.BITSLICE(ty,_,_)) = ty + | size(T.???) = intTy + + fun fsize(T.FREG(ty,_)) = ty + | fsize(T.FLOAD(ty,_,_)) = ty + | fsize(T.FADD(ty,_,_)) = ty + | fsize(T.FSUB(ty,_,_)) = ty + | fsize(T.FMUL(ty,_,_)) = ty + | fsize(T.FDIV(ty,_,_)) = ty + | fsize(T.FABS(ty,_)) = ty + | fsize(T.FNEG(ty,_)) = ty + | fsize(T.FSQRT(ty,_)) = ty + | fsize(T.FCOND(ty,_,_,_)) = ty + | fsize(T.CVTI2F(ty,_,_)) = ty + | fsize(T.CVTF2F(ty,_,_)) = ty + | fsize(T.FCOPYSIGN(ty,_,_)) = ty + | fsize(T.FPRED(e,_)) = fsize e + | fsize(T.FEXT(ty,_)) = ty + | fsize(T.FMARK(e,_)) = fsize e + +end diff --git a/MLRISC/mltree/mltree-stream.sig b/MLRISC/mltree/mltree-stream.sig new file mode 100644 index 0000000..1e43c95 --- /dev/null +++ b/MLRISC/mltree/mltree-stream.sig @@ -0,0 +1,36 @@ +(* mltree-stream.sig + * + * COPYRIGHT (c) 2001 Lucent Technologies, Bell Laboratories. + * + *) + +signature MLTREE_STREAM = sig + structure T : MLTREE + structure S : INSTRUCTION_STREAM + + (* + * Instruction streams + *) + type ('i,'cellset, 'cfg) stream = ('i, T.an list, 'cellset, 'cfg) S.stream + + (* + * Extension mechanism + *) + datatype ('instr, 'cellset, 'operand, 'addressing_mode, 'cfg) reducer = + REDUCER of + {reduceRexp : T.rexp -> T.reg, + reduceFexp : T.fexp -> T.reg, + reduceCCexp : T.ccexp -> T.reg, + reduceStm : T.stm * T.an list -> unit, + operand : T.rexp -> 'operand, + reduceOperand : 'operand -> T.reg, + addressOf : T.rexp -> 'addressing_mode, + emit : 'instr * T.an list -> unit, + instrStream : ('instr,'cellset, 'cfg) stream, + mltreeStream : (T.stm,T.mlrisc list, 'cfg) stream + } +end + + + + diff --git a/MLRISC/mltree/mltree-stream.sml b/MLRISC/mltree/mltree-stream.sml new file mode 100644 index 0000000..edadd96 --- /dev/null +++ b/MLRISC/mltree/mltree-stream.sml @@ -0,0 +1,36 @@ +(* mltree-stream.sml + * + * COPYRIGHT (c) 2001 Lucent Technologies, Bell Laboratories. + * + *) +functor MLTreeStream + ( structure T : MLTREE + structure S : INSTRUCTION_STREAM + ) : MLTREE_STREAM = +struct + structure T = T + structure S = S + (* + * Instruction streams + *) + type ('i,'cellset, 'cfg) stream = ('i, T.an list, 'cellset, 'cfg) S.stream + + (* + * Extension mechanism + *) + datatype ('instr,'cellset,'operand,'addressing_mode, 'cfg) reducer = + REDUCER of + { reduceRexp : T.rexp -> T.reg, + reduceFexp : T.fexp -> T.reg, + reduceCCexp : T.ccexp -> T.reg, + reduceStm : T.stm * T.an list -> unit, + operand : T.rexp -> 'operand, + reduceOperand : 'operand -> T.reg, + addressOf : T.rexp -> 'addressing_mode, + emit : 'instr * T.an list -> unit, + instrStream : ('instr,'cellset, 'cfg) stream, + mltreeStream : (T.stm,T.mlrisc list, 'cfg) stream + } + +end + diff --git a/MLRISC/mltree/mltree-utils.sig b/MLRISC/mltree/mltree-utils.sig new file mode 100644 index 0000000..e5a6571 --- /dev/null +++ b/MLRISC/mltree/mltree-utils.sig @@ -0,0 +1,42 @@ +(* + * Common operations on MLTREE + * + * -- Allen + *) +signature MLTREE_UTILS = +sig + + structure T : MLTREE + + (* + * Hashing + *) + val hashStm : T.stm -> word + val hashRexp : T.rexp -> word + val hashFexp : T.fexp -> word + val hashCCexp : T.ccexp -> word + + (* + * Equality + *) + val eqStm : T.stm * T.stm -> bool + val eqRexp : T.rexp * T.rexp -> bool + val eqFexp : T.fexp * T.fexp -> bool + val eqCCexp : T.ccexp * T.ccexp -> bool + val eqMlriscs : T.mlrisc list * T.mlrisc list -> bool + + (* + * Pretty printing + *) + val show : {def : int -> string, + use : int -> string, + regionDef : T.Region.region -> string, + regionUse : T.Region.region -> string + } -> T.printer + + val stmToString : T.stm -> string + val rexpToString : T.rexp -> string + val fexpToString : T.fexp -> string + val ccexpToString : T.ccexp -> string + +end diff --git a/MLRISC/mltree/mltree-utils.sml b/MLRISC/mltree/mltree-utils.sml new file mode 100644 index 0000000..e6e01d6 --- /dev/null +++ b/MLRISC/mltree/mltree-utils.sml @@ -0,0 +1,622 @@ +(* + * Common operations on MLTREE + * + * -- Allen + *) +functor MLTreeUtils + (structure T : MLTREE + (* Hashing extensions *) + val hashSext : T.hasher -> T.sext -> word + val hashRext : T.hasher -> T.rext -> word + val hashFext : T.hasher -> T.fext -> word + val hashCCext : T.hasher -> T.ccext -> word + + (* Equality extensions *) + val eqSext : T.equality -> T.sext * T.sext -> bool + val eqRext : T.equality -> T.rext * T.rext -> bool + val eqFext : T.equality -> T.fext * T.fext -> bool + val eqCCext : T.equality -> T.ccext * T.ccext -> bool + + (* Pretty printing extensions *) + val showSext : T.printer -> T.sext -> string + val showRext : T.printer -> T.ty * T.rext -> string + val showFext : T.printer -> T.fty * T.fext -> string + val showCCext : T.printer -> T.ty * T.ccext -> string + ) : MLTREE_UTILS = +struct + + structure T = T + structure I = T.I + structure Constant = T.Constant + structure Region = T.Region + structure B = T.Basis + structure C = CellsBasis + structure W = Word + + + val w = W.fromInt + val i2s = Int.toString + val toLower = String.map Char.toLower + + fun error msg = MLRiscErrorMsg.error("MLTreeUtils",msg) + fun wv(C.CELL{id, ...}) = w id + fun wvs is = + let fun f([],h) = h + | f(i::is,h) = f(is,wv i+h) + in f(is,0w0) end + + + (* + * Hashing + *) + val hashLabel = Label.hash + fun hasher() = {stm=hashStm, rexp=hashRexp, fexp=hashFexp, ccexp=hashCCexp} + and hashCtrl ctrl = wv ctrl + and hashStm stm = + case stm of + T.MV(t,dst,rexp) => 0w123 + w t + wv dst + hashRexp rexp + | T.CCMV(dst,ccexp) => 0w1234 + wv dst + hashCCexp ccexp + | T.FMV(fty,dst,fexp) => 0w12345 + w fty + wv dst + hashFexp fexp + | T.COPY(ty,dst,src) => 0w234 + w ty + wvs dst + wvs src + | T.FCOPY(fty,dst,src) => 0w456 + w fty + wvs dst + wvs src + | T.JMP(ea,labels) => 0w45 + hashRexp ea + | T.CALL{funct,targets,defs,uses,region,pops} => + hashRexp funct + hashMlriscs defs + hashMlriscs uses + | T.FLOW_TO(stm, _) => hashStm stm + | T.RET _ => 0w567 + | T.STORE(ty,ea,data,mem) => 0w888 + w ty + hashRexp ea + hashRexp data + | T.FSTORE(fty,ea,data,mem) => 0w7890 + w fty + hashRexp ea + hashFexp data + | T.BCC(a,lab) => 0w233 + hashCCexp a + hashLabel lab + | T.IF(a,b,c) => 0w233 + hashCCexp a + hashStm b + hashStm c + | T.ANNOTATION(stm, a) => hashStm stm + | T.PHI{preds,block} => w block + | T.SOURCE => 0w123 + | T.SINK => 0w423 + | T.REGION(stm,ctrl) => hashStm stm + hashCtrl ctrl + | T.RTL{hash,...} => hash + | T.SEQ ss => hashStms(ss, 0w23) + | T.ASSIGN(ty,lhs,rhs) => w ty + hashRexp lhs + hashRexp rhs + | _ => error "hashStm" + + and hashStms([],h) = h + | hashStms(s::ss,h) = hashStms(ss,hashStm s + h) + + and hashMlrisc(T.CCR ccexp) = hashCCexp ccexp + | hashMlrisc(T.GPR rexp) = hashRexp rexp + | hashMlrisc(T.FPR fexp) = hashFexp fexp + + and hashMlriscs [] = 0w123 + | hashMlriscs(m::ms) = hashMlrisc m + hashMlriscs ms + + and hash2(ty,x,y) = w ty + hashRexp x + hashRexp y + + and hashm T.DIV_TO_ZERO = 0w158 + | hashm T.DIV_TO_NEGINF = 0w159 + + and hash3(m,ty,x,y) = hashm m + w ty + hashRexp x + hashRexp y + + and hashRexp rexp = + case rexp of + T.REG(ty, src) => w ty + wv src + | T.LI i => I.hash i + | T.LABEL l => hashLabel l + | T.LABEXP le => hashRexp rexp + | T.CONST c => Constant.hash c + | T.NEG(ty, x) => w ty + hashRexp x + 0w24 + | T.ADD x => hash2 x + 0w234 + | T.SUB x => hash2 x + 0w456 + | T.MULS x => hash2 x + 0w2131 + | T.DIVS x => hash3 x + 0w156 + | T.REMS x => hash3 x + 0w231 + | T.MULU x => hash2 x + 0w123 + | T.DIVU x => hash2 x + 0w1234 + | T.REMU x => hash2 x + 0w211 + | T.NEGT(ty, x) => w ty + hashRexp x + 0w1224 + | T.ADDT x => hash2 x + 0w1219 + | T.SUBT x => hash2 x + 0w999 + | T.MULT x => hash2 x + 0w7887 + | T.DIVT x => hash3 x + 0w88884 + | T.ANDB x => hash2 x + 0w12312 + | T.ORB x => hash2 x + 0w558 + | T.XORB x => hash2 x + 0w234 + | T.EQVB x => hash2 x + 0w734 + | T.NOTB(ty, x) => w ty + hashRexp x + | T.SRA x => hash2 x + 0w874 + | T.SRL x => hash2 x + 0w223 + | T.SLL x => hash2 x + 0w499 + | T.COND(ty,e,e1,e2) => w ty + hashCCexp e + hashRexp e1 + hashRexp e2 + | T.SX(ty, ty', rexp) => 0w232 + w ty + w ty' + hashRexp rexp + | T.ZX(ty, ty', rexp) => 0w737 + w ty + w ty' + hashRexp rexp + | T.CVTF2I(ty, round, ty', fexp) => + w ty + B.hashRoundingMode round + w ty' + hashFexp fexp + | T.LOAD(ty, ea, mem) => w ty + hashRexp ea + 0w342 + | T.LET(stm, rexp) => hashStm stm + hashRexp rexp + | T.PRED(e, ctrl) => hashRexp e + hashCtrl ctrl + | T.MARK(e, _) => hashRexp e + | T.REXT(ty, rext) => w ty + hashRext (hasher()) rext + | T.??? => 0w485 + | T.OP(ty,oper,es) => hashRexps(es, w ty + hashOper oper) + | T.ARG _ => 0w23 + | T.$(ty, k, e) => w ty + hashRexp e + | T.PARAM n => w n + | T.BITSLICE(ty, sl, e) => w ty + hashRexp e + + and hashOper(T.OPER{hash, ...}) = hash + + and hashRexps([],h) = h + | hashRexps(e::es,h) = hashRexps(es,hashRexp e + h) + + and hash2'(ty,x,y) = w ty + hashFexp x + hashFexp y + + and hashFexp fexp = + case fexp of + T.FREG(fty, src) => w fty + wv src + | T.FLOAD(fty, ea, mem) => w fty + hashRexp ea + | T.FADD x => hash2' x + 0w123 + | T.FMUL x => hash2' x + 0w1234 + | T.FSUB x => hash2' x + 0w12345 + | T.FDIV x => hash2' x + 0w234 + | T.FCOPYSIGN x => hash2' x + 0w883 + | T.FCOND(fty,c,x,y) => w fty + hashCCexp c + hashFexp x + hashFexp y + | T.FABS(fty, fexp) => w fty + hashFexp fexp + 0w2345 + | T.FNEG(fty, fexp) => w fty + hashFexp fexp + 0w23456 + | T.FSQRT(fty, fexp) => w fty + hashFexp fexp + 0w345 + | T.CVTI2F(fty, ty, rexp) => w fty + w ty + hashRexp rexp + | T.CVTF2F(fty, fty', fexp) => w fty + hashFexp fexp + w fty' + | T.FMARK(e, _) => hashFexp e + | T.FPRED(e, ctrl) => hashFexp e + hashCtrl ctrl + | T.FEXT(fty, fext) => w fty + hashFext (hasher()) fext + + and hashFexps([],h) = h + | hashFexps(e::es,h) = hashFexps(es,hashFexp e + h) + + and hashCCexp ccexp = + case ccexp of + T.CC(cc, src) => B.hashCond cc + wv src + | T.FCC(fcc, src) => B.hashFcond fcc + wv src + | T.CMP(ty, cond, x, y) => + w ty + B.hashCond cond + hashRexp x + hashRexp y + | T.FCMP(fty, fcond, x, y) => + w fty + B.hashFcond fcond + hashFexp x + hashFexp y + | T.NOT x => 0w2321 + hashCCexp x + | T.AND(x,y) => 0w2321 + hashCCexp x + hashCCexp y + | T.OR(x,y) => 0w8721 + hashCCexp x + hashCCexp y + | T.XOR(x,y) => 0w6178 + hashCCexp x + hashCCexp y + | T.EQV(x,y) => 0w178 + hashCCexp x + hashCCexp y + | T.TRUE => 0w0 + | T.FALSE => 0w1232 + | T.CCMARK(e, _) => hashCCexp e + | T.CCEXT(ty,ccext) => w ty + hashCCext (hasher()) ccext + + and hashCCexps([],h) = h + | hashCCexps(e::es,h) = hashCCexps(es,hashCCexp e + h) + + (* + * Equality + *) + + val eqLabel = Label.same + fun eqLabels([],[]) = true + | eqLabels(a::b,c::d) = eqLabel(a,c) andalso eqLabels(b,d) + | eqLabels _ = false + and eqCell(C.CELL{id=x, ...},C.CELL{id=y, ...}) = x=y + and eqCells([], []) = true + | eqCells(x::xs,y::ys) = eqCell(x,y) andalso eqCells(xs,ys) + | eqCells _ = false + and eqCopy((t1,dst1,src1),(t2,dst2,src2)) = + t1=t2 andalso eqCells(dst1,dst2) andalso eqCells(src1,src2) + and eqCtrl(c1,c2) = eqCell(c1,c2) + and eqCtrls(c1,c2) = eqCells(c1,c2) + + (* statements *) + and equality() = {stm=eqStm, rexp=eqRexp, fexp=eqFexp, ccexp=eqCCexp} + and eqStm(T.MV(a,b,c),T.MV(d,e,f)) = + a=d andalso eqCell(b,e) andalso eqRexp(c,f) + | eqStm(T.CCMV(a,b),T.CCMV(c,d)) = eqCell(a,c) andalso eqCCexp(b,d) + | eqStm(T.FMV(a,b,c),T.FMV(d,e,f)) = + a=d andalso eqCell(b,e) andalso eqFexp(c,f) + | eqStm(T.COPY x,T.COPY y) = eqCopy(x,y) + | eqStm(T.FCOPY x,T.FCOPY y) = eqCopy(x,y) + | eqStm(T.JMP(a,b),T.JMP(a',b')) = eqRexp(a,a') + | eqStm(T.CALL{funct=a,defs=b,uses=c,...}, + T.CALL{funct=d,defs=e,uses=f,...}) = + eqRexp(a,d) andalso eqMlriscs(b,e) andalso eqMlriscs(c,f) + | eqStm(T.FLOW_TO(x,a), T.FLOW_TO(y,b)) = + eqStm(x,y) andalso eqLabels(a,b) + | eqStm(T.RET _,T.RET _) = true + | eqStm(T.STORE(a,b,c,_),T.STORE(d,e,f,_)) = + a=d andalso eqRexp(b,e) andalso eqRexp(c,f) + | eqStm(T.FSTORE(a,b,c,_),T.FSTORE(d,e,f,_)) = + a=d andalso eqRexp(b,e) andalso eqFexp(c,f) + | eqStm(T.ANNOTATION(s1, _),s2) = eqStm(s1,s2) + | eqStm(s1,T.ANNOTATION(s2, _)) = eqStm(s1,s2) + | eqStm(T.PHI x,T.PHI y) = x=y + | eqStm(T.SOURCE,T.SOURCE) = true + | eqStm(T.SINK,T.SINK) = true + | eqStm(T.BCC(b,c),T.BCC(b',c')) = + eqCCexp(b,b') andalso eqLabel(c,c') + | eqStm(T.IF(b,c,d),T.IF(b',c',d')) = + eqCCexp(b,b') andalso eqStm(c,c') andalso eqStm(d,d') + | eqStm(T.RTL{attribs=x,...},T.RTL{attribs=y,...}) = x=y + | eqStm(T.REGION(a,b),T.REGION(a',b')) = eqCtrl(b,b') andalso eqStm(a,a') + | eqStm(T.EXT a,T.EXT a') = eqSext (equality()) (a,a') + | eqStm _ = false + + and eqStms([],[]) = true + | eqStms(a::b,c::d) = eqStm(a,c) andalso eqStms(b,d) + | eqStms _ = false + + and eqMlrisc(T.CCR a,T.CCR b) = eqCCexp(a,b) + | eqMlrisc(T.GPR a,T.GPR b) = eqRexp(a,b) + | eqMlrisc(T.FPR a,T.FPR b) = eqFexp(a,b) + | eqMlrisc _ = false + + and eqMlriscs([],[]) = true + | eqMlriscs(a::b,c::d) = eqMlrisc(a,c) andalso eqMlriscs(b,d) + | eqMlriscs _ = false + + and eq2((a,b,c),(d,e,f)) = a=d andalso eqRexp(b,e) andalso eqRexp(c,f) + + and eq3((m,a,b,c),(n,d,e,f)) = + m=n andalso a=d andalso eqRexp(b,e) andalso eqRexp(c,f) + + and eqRexp(T.REG(a,b),T.REG(c,d)) = a=c andalso eqCell(b,d) + | eqRexp(T.LI a,T.LI b) = a = b + | eqRexp(T.LABEL a,T.LABEL b) = eqLabel(a,b) + | eqRexp(T.LABEXP a,T.LABEXP b) = eqRexp(a,b) + | eqRexp(T.CONST a,T.CONST b) = Constant.==(a,b) + | eqRexp(T.NEG(t,x),T.NEG(t',x')) = t = t' andalso eqRexp(x,x') + | eqRexp(T.ADD x,T.ADD y) = eq2(x,y) + | eqRexp(T.SUB x,T.SUB y) = eq2(x,y) + | eqRexp(T.MULS x,T.MULS y) = eq2(x,y) + | eqRexp(T.DIVS x,T.DIVS y) = eq3(x,y) + | eqRexp(T.REMS x,T.REMS y) = eq3(x,y) + | eqRexp(T.MULU x,T.MULU y) = eq2(x,y) + | eqRexp(T.DIVU x,T.DIVU y) = eq2(x,y) + | eqRexp(T.REMU x,T.REMU y) = eq2(x,y) + | eqRexp(T.NEGT(t,x),T.NEGT(t',x')) = t = t' andalso eqRexp(x,x') + | eqRexp(T.ADDT x,T.ADDT y) = eq2(x,y) + | eqRexp(T.SUBT x,T.SUBT y) = eq2(x,y) + | eqRexp(T.MULT x,T.MULT y) = eq2(x,y) + | eqRexp(T.DIVT x,T.DIVT y) = eq3(x,y) + | eqRexp(T.ANDB x,T.ANDB y) = eq2(x,y) + | eqRexp(T.ORB x,T.ORB y) = eq2(x,y) + | eqRexp(T.XORB x,T.XORB y) = eq2(x,y) + | eqRexp(T.EQVB x,T.EQVB y) = eq2(x,y) + | eqRexp(T.NOTB(a,b),T.NOTB(c,d)) = a=c andalso eqRexp(b,d) + | eqRexp(T.SRA x,T.SRA y) = eq2(x,y) + | eqRexp(T.SRL x,T.SRL y) = eq2(x,y) + | eqRexp(T.SLL x,T.SLL y) = eq2(x,y) + | eqRexp(T.COND(a,b,c,d),T.COND(e,f,g,h)) = + a=e andalso eqCCexp(b,f) andalso eqRexp(c,g) andalso eqRexp(d,h) + | eqRexp(T.SX(a,b,c),T.SX(a',b',c')) = + a=a' andalso b=b' andalso eqRexp(c,c') + | eqRexp(T.ZX(a,b,c),T.ZX(a',b',c')) = + a=a' andalso b=b' andalso eqRexp(c,c') + | eqRexp(T.CVTF2I(a,b,c,d),T.CVTF2I(e,f,g,h)) = + a=e andalso b=f andalso c=g andalso eqFexp(d,h) + | eqRexp(T.LOAD(a,b,_),T.LOAD(c,d,_)) = a=c andalso eqRexp(b,d) + | eqRexp(T.LET(a,b),T.LET(c,d)) = eqStm(a,c) andalso eqRexp(b,d) + | eqRexp(T.ARG x,T.ARG y) = x = y + | eqRexp(T.PARAM x,T.PARAM y) = x = y + | eqRexp(T.???,T.???) = true + | eqRexp(T.$(t1,k1,e1),T.$(t2,k2,e2)) = + t1=t2 andalso k1=k2 andalso eqRexp(e1,e2) + | eqRexp(T.BITSLICE(t1,s1,e1),T.BITSLICE(t2,s2,e2)) = + t1=t2 andalso s1=s2 andalso eqRexp(e1,e2) + | eqRexp(T.MARK(a,_),b) = eqRexp(a,b) + | eqRexp(a,T.MARK(b,_)) = eqRexp(a,b) + | eqRexp(T.PRED(a,b),T.PRED(a',b')) = eqCtrl(b,b') andalso eqRexp(a,a') + | eqRexp(T.REXT(a,b),T.REXT(a',b')) = + a=a' andalso eqRext (equality()) (b,b') + | eqRexp _ = false + + and eqRexps([],[]) = true + | eqRexps(a::b,c::d) = eqRexp(a,c) andalso eqRexps(b,d) + | eqRexps _ = false + + and eq2'((a,b,c),(d,e,f)) = a=d andalso eqFexp(b,e) andalso eqFexp(c,f) + and eq1'((a,b),(d,e)) = a=d andalso eqFexp(b,e) + + and eqFexp(T.FREG(t1,x),T.FREG(t2,y)) = t1=t2 andalso eqCell(x,y) + | eqFexp(T.FLOAD(a,b,_),T.FLOAD(c,d,_)) = a=c andalso eqRexp(b,d) + | eqFexp(T.FADD x,T.FADD y) = eq2'(x,y) + | eqFexp(T.FMUL x,T.FMUL y) = eq2'(x,y) + | eqFexp(T.FSUB x,T.FSUB y) = eq2'(x,y) + | eqFexp(T.FDIV x,T.FDIV y) = eq2'(x,y) + | eqFexp(T.FCOPYSIGN x, T.FCOPYSIGN y) = eq2'(x,y) + | eqFexp(T.FCOND(t,x,y,z), T.FCOND(t',x',y',z')) = + t=t' andalso eqCCexp(x,x') andalso eqFexp(y,y') andalso eqFexp(z,z') + | eqFexp(T.FABS x,T.FABS y) = eq1'(x,y) + | eqFexp(T.FNEG x,T.FNEG y) = eq1'(x,y) + | eqFexp(T.FSQRT x,T.FSQRT y) = eq1'(x,y) + | eqFexp(T.CVTI2F(a,b,c),T.CVTI2F(a',b',c')) = + a=a' andalso b=b' andalso eqRexp(c,c') + | eqFexp(T.CVTF2F(a,b,c),T.CVTF2F(a',b',c')) = + a=a' andalso b=b' andalso eqFexp(c,c') + | eqFexp(T.FEXT(a,f),T.FEXT(b,g)) = a=b andalso eqFext (equality()) (f,g) + | eqFexp(T.FMARK(a,_),b) = eqFexp(a,b) + | eqFexp(a,T.FMARK(b,_)) = eqFexp(a,b) + | eqFexp(T.FPRED(a,b),T.FPRED(a',b')) = eqCtrl(b,b') andalso eqFexp(a,a') + | eqFexp _ = false + + and eqFexps([],[]) = true + | eqFexps(a::b,c::d) = eqFexp(a,c) andalso eqFexps(b,d) + | eqFexps _ = false + + and eqCCexp(T.CC(c1,x),T.CC(c2,y)) = c1=c2 andalso eqCell(x,y) + | eqCCexp(T.FCC(c1,x),T.FCC(c2,y)) = c1=c2 andalso eqCell(x,y) + | eqCCexp(T.CMP(x,a,b,c),T.CMP(y,d,e,f)) = + a=d andalso eqRexp(b,e) andalso eqRexp(c,f) andalso x = y + | eqCCexp(T.FCMP(x,a,b,c),T.FCMP(y,d,e,f)) = + a=d andalso eqFexp(b,e) andalso eqFexp(c,f) andalso x = y + | eqCCexp(T.NOT x, T.NOT y) = eqCCexp(x,y) + | eqCCexp(T.AND x, T.AND y) = eqCCexp2(x,y) + | eqCCexp(T.OR x, T.OR y) = eqCCexp2(x,y) + | eqCCexp(T.XOR x, T.XOR y) = eqCCexp2(x,y) + | eqCCexp(T.EQV x, T.EQV y) = eqCCexp2(x,y) + | eqCCexp(T.CCMARK(a,_),b) = eqCCexp(a,b) + | eqCCexp(a,T.CCMARK(b,_)) = eqCCexp(a,b) + | eqCCexp(T.CCEXT(t,a),T.CCEXT(t',b)) = + t=t' andalso eqCCext (equality()) (a,b) + | eqCCexp(T.TRUE, T.TRUE) = true + | eqCCexp(T.FALSE, T.FALSE) = true + | eqCCexp _ = false + + and eqCCexp2((x,y),(x',y')) = eqCCexp(x,x') andalso eqCCexp(y,y') + + and eqCCexps([],[]) = true + | eqCCexps(a::b,c::d) = eqCCexp(a,c) andalso eqCCexps(b,d) + | eqCCexps _ = false + + (* + * Pretty printing + *) + fun show {def,use,regionDef,regionUse} = + let fun ty t = "."^i2s t + fun fty 32 = ".s" + | fty 64 = ".d" + | fty 128 = ".q" + | fty t = ty t + + fun reg(t,v) = C.toString v^ty t + fun freg(t,v) = C.toString v^fty t + fun ccreg v = C.toString v + fun ctrlreg v = C.toString v + + fun srcReg(t,v) = reg(t,v) + fun srcFreg(t,v) = freg(t,v) + fun srcCCreg v = ccreg v + fun srcCtrlreg v = ctrlreg v + + fun dstReg(t,v) = reg(t,v) + fun dstFreg(t,v) = freg(t,v) + fun dstCCreg v = ccreg v + fun dstCtrlreg v = ctrlreg v + + fun srcParam(i) = def i handle _ => "<"^i2s i^">" + fun dstParam(i) = use i handle _ => "<"^i2s i^">" + + fun listify f = + let fun g(t,[]) = "" + | g(t,[r]) = f(t,r) + | g(t,r::rs) = f(t,r)^","^g(t,rs) + in g end + + fun listify' f = (String.concatWith ",") o (List.map f) + + val srcRegs = listify srcReg + val dstRegs = listify dstReg + val srcFregs = listify srcFreg + val dstFregs = listify dstFreg + val srcCCregs = listify' srcCCreg + val dstCCregs = listify' dstCCreg + val srcCtrlregs = listify' srcCtrlreg + val dstCtrlregs = listify' dstCtrlreg + fun usectrl cr = " ["^srcCtrlreg cr^"]" + fun usectrls [] = "" + | usectrls cr = " ["^srcCtrlregs cr^"]" + fun defctrl cr = ""^dstCtrlreg cr^" <- " + fun defctrls [] = "" + | defctrls cr = ""^dstCtrlregs cr^" <- " + + fun copy(t,dst,src) = dstRegs(t, dst)^" := "^srcRegs(t, src) + fun fcopy(t,dst,src) = dstFregs(t, dst)^" := "^srcFregs(t, src) + + fun shower() = {stm=stm, rexp=rexp, fexp=fexp, ccexp=ccexp, + dstReg=dstReg, srcReg=srcReg} + (* pretty print a statement *) + and stm(T.MV(t,dst,e)) = dstReg(t,dst)^" := "^rexp e + | stm(T.CCMV(dst,e)) = dstCCreg dst^" := "^ccexp e + | stm(T.FMV(fty,dst,e)) = dstFreg(fty,dst)^" := "^fexp e + | stm(T.COPY(ty,dst,src)) = copy(ty,dst,src) + | stm(T.FCOPY(fty,dst,src)) = fcopy(fty,dst,src) + | stm(T.JMP(ea,labels)) = "jmp "^rexp ea + | stm(T.BCC(a,lab)) = + "bcc "^ccexp a^" "^Label.toString lab + | stm(T.CALL{funct,targets,defs,uses,region,pops}) = + "call "^rexp funct + | stm(T.FLOW_TO(s, targets)) = + stm s^" ["^listify' Label.toString targets^"]" + | stm(T.RET(flow)) = "ret" + | stm(T.IF(a,b,T.SEQ [])) = "if "^ccexp a^" then "^stm b + | stm(T.IF(a,b,c)) = "if "^ccexp a^" then "^stm b^" else "^stm c + | stm(T.STORE(ty,ea,e,mem)) = store(ty,"",ea,mem,e) + | stm(T.FSTORE(fty,ea,e,mem)) = fstore(fty,"",ea,mem,e) + | stm(T.REGION(s,cr)) = stm s^usectrl cr + | stm(T.SEQ []) = "skip" + | stm(T.SEQ s) = stms(";\n",s) + | stm(T.DEFINE lab) = Label.toString lab ^ ":" + | stm(T.ANNOTATION(s, a)) = stm s + | stm(T.EXT x) = showSext (shower()) x + | stm(T.LIVE exps) = "live: " ^ mlriscs exps + | stm(T.KILL exps) = "kill: " ^ mlriscs exps + | stm(T.PHI{preds, block}) = "phi["^i2s block^"]" + | stm(T.ASSIGN(ty,lhs,T.???)) = "define "^rexp lhs + | stm(T.ASSIGN(ty,T.???,rhs)) = "use "^rexp rhs + | stm(T.ASSIGN(ty,x,rhs)) = lhs x^" := "^rexp rhs + | stm(T.SOURCE) = "source" + | stm(T.SINK) = "sink" + | stm(T.RTL{e,...}) = stm e + + and stms(sep,[]) = "" + | stms(sep,[s]) = stm s + | stms(sep,s::ss) = stm s^sep^stms(sep,ss) + + and lhs(T.PARAM i) = dstParam i + | lhs(T.$(ty,k,T.PARAM i)) = dstParam i + | lhs(e) = rexp e + + (* pretty print an expression *) + and rexp(T.REG(ty, src)) = srcReg(ty,src) + | rexp(T.LI i) = IntInf.toString i + | rexp(T.LABEL l) = Label.toString l + | rexp(T.CONST c) = Constant.toString c + | rexp(T.LABEXP le) = rexp le + | rexp(T.NEG x) = unary("~",x) + | rexp(T.ADD x) = binary("+",x) + | rexp(T.SUB x) = binary("-",x) + | rexp(T.MULS x) = two("muls",x) + | rexp(T.DIVS x) = three("divs",x) + | rexp(T.REMS x) = three("rems",x) + | rexp(T.MULU x) = two("mulu",x) + | rexp(T.DIVU x) = two("divu",x) + | rexp(T.REMU x) = two("remu",x) + | rexp(T.NEGT x) = one("negt",x) + | rexp(T.ADDT x) = two("addt",x) + | rexp(T.SUBT x) = two("subt",x) + | rexp(T.MULT x) = two("mult",x) + | rexp(T.DIVT x) = three("divt",x) + | rexp(T.ANDB x) = binary("&",x) + | rexp(T.ORB x) = binary("|",x) + | rexp(T.XORB x) = binary("^",x) + | rexp(T.EQVB x) = binary("eqvb",x) + | rexp(T.NOTB x) = unary("!",x) + | rexp(T.SRA x) = binary("~>>",x) + | rexp(T.SRL x) = binary(">>",x) + | rexp(T.SLL x) = binary("<<",x) + | rexp(T.COND(t,cc,e1,e2)) = + "cond"^ty t^"("^ccexp cc^","^rexp e1^","^rexp e2^")" + | rexp(T.SX(t, t', e)) = "sx"^ty t^ty t'^" "^rexp e + | rexp(T.ZX(t, t', e)) = "zx"^ty t^ty t'^" "^rexp e + | rexp(T.CVTF2I(t, round, t', e)) = + "cvtf2i"^ty t^toLower(B.roundingModeToString round)^ + fty t'^" "^fexp e + | rexp(T.LOAD(ty, ea, mem)) = load(ty,"",ea,mem) + | rexp(T.LET(s, e)) = stm s^";"^rexp e + | rexp(T.PRED(e, cr)) = rexp e^usectrl cr + | rexp(T.MARK(e, _)) = rexp e + | rexp(T.REXT e) = showRext (shower()) e + | rexp(T.???) = "???" + | rexp(T.OP(t,opc,es)) = oper opc^ty t^" "^rexps es + | rexp(T.ARG(t,ref(T.REP kind),name)) = + name^":"^kind^(if t = 0 then "" else ty t) + | rexp(T.PARAM n) = srcParam n + | rexp(T.$(ty,k,e)) = + "$"^C.cellkindToNickname k^"["^rexp e^"]" + | rexp(T.BITSLICE(ty,sl,e)) = rexp e^" at "^slices sl + + and oper(T.OPER{name,...}) = name + + and parenRexp + (e as (T.REG _ | T.LI _ | T.$ _ | T.ARG _)) = + rexp e + | parenRexp e = "("^rexp e^")" + + and slices sc = listify' (fn (from,to) => i2s from^".."^i2s to) sc + + (* pretty print a real expression *) + and fexp(T.FREG f) = srcFreg f + | fexp(T.FLOAD(fty, ea, mem)) = fload(fty,"",ea,mem) + | fexp(T.FADD x) = two'("fadd",x) + | fexp(T.FMUL x) = two'("fmul",x) + | fexp(T.FSUB x) = two'("fsub",x) + | fexp(T.FDIV x) = two'("fdiv",x) + | fexp(T.FCOPYSIGN x) = two'("fcopysign",x) + | fexp(T.FABS x) = one'("fabs",x) + | fexp(T.FNEG x) = one'("fneg",x) + | fexp(T.FSQRT x) = one'("fsqrt",x) + | fexp(T.FCOND(t,cc,e1,e2)) = + "fcond"^fty t^ccexp cc^"("^fexp e1^","^fexp e2^")" + | fexp(T.CVTI2F(t, t', e)) = "cvti2f"^ty t'^" "^rexp e + | fexp(T.CVTF2F(t, t', e)) = "cvtf2f"^fty t^fty t'^" "^fexp e + | fexp(T.FPRED(e, cr)) = fexp e^usectrl cr + | fexp(T.FMARK(e, _)) = fexp e + | fexp(T.FEXT e) = showFext (shower()) e + + and ccexp(T.CC(cc,r)) = srcCCreg r^toLower(B.condToString cc) + | ccexp(T.FCC(fcc,r)) = srcCCreg r^toLower(B.fcondToString fcc) + | ccexp(T.CMP(t,T.SETCC,x,y)) = "setcc"^ty t^pair(x,y) + | ccexp(T.CMP(t,cc,x,y)) = + "cmp"^toLower(B.condToString cc)^ty t^pair(x,y) + | ccexp(T.FCMP(t,T.SETFCC,x,y)) = "setfcc"^ty t^pair'(x,y) + | ccexp(T.FCMP(t,fcc,x,y)) = + "fcmp"^toLower(B.fcondToString fcc)^fty t^pair'(x,y) + | ccexp(T.NOT x) = "not "^ccexp x + | ccexp(T.AND(x,y)) = two''(" and ",x,y) + | ccexp(T.OR(x,y)) = two''(" or ",x,y) + | ccexp(T.XOR(x,y)) = two''(" xor ",x,y) + | ccexp(T.EQV(x,y)) = two''(" eqv ",x,y) + | ccexp(T.CCMARK(e, _)) = ccexp e + | ccexp(T.TRUE) = "true" + | ccexp(T.FALSE) = "false" + | ccexp(T.CCEXT(e)) = showCCext (shower()) e + + and mlrisc(T.GPR e) = rexp e + | mlrisc(T.FPR e) = fexp e + | mlrisc(T.CCR e) = ccexp e + + and mlriscs l = listify' mlrisc l + + (* Auxiliary functions *) + and one(opcode,(t,x)) = opcode^ty t^"("^rexp x^")" + and two(opcode,(t,x,y)) = opcode^ty t^pair(x,y) + and three(opcode,(m,t,x,y)) = opcode^dmr m^ty t^pair(x,y) + and dmr T.DIV_TO_ZERO = "{0}" + | dmr T.DIV_TO_NEGINF = "{-inf}" + and binary(opcode,(t,x,y)) = parenRexp x^" "^opcode^ty t^" "^parenRexp y + and unary(opcode,(t,x)) = opcode^ty t^" "^parenRexp x + and pair(x,y) = "("^rexp x^","^rexp y^")" + and one'(opcode,(t,x)) = opcode^fty t^"("^fexp x^")" + and two'(opcode,(t,x,y)) = opcode^fty t^pair'(x,y) + and two''(c,x,y) = "("^ccexp x^ c ^ ccexp y^")" + and pair'(x,y) = "("^fexp x^","^fexp y^")" + and rexps es = "("^foldr (fn (e,"") => rexp e + | (e,x) => rexp e^","^x) "" es^")" + and fexps es = "("^foldr (fn (e,"") => fexp e + | (e,x) => fexp e^","^x) "" es^")" + and ccexps es = "("^foldr (fn (e,"") => ccexp e + | (e,x) => ccexp e^","^x) "" es^")" + and store(t,u,ea,m,e) = memdef(t,u,ea,m)^" := "^rexp e + and fstore(t,u,ea,m,e) = fmemdef(t,u,ea,m)^" := "^fexp e + and ccstore(u,ea,m,e) = ccmemdef(u,ea,m)^" := "^ccexp e + and load(t,u,ea,m) = memuse(t,u,ea,m) + and fload(t,u,ea,m) = fmemuse(t,u,ea,m) + and ccload(u,ea,m) = ccmemuse(u,ea,m) + and addr(u,ea,m,show) = + let val r = show m handle _ => Region.toString m + val r = if r = "" then r else ":"^r + in u^"["^rexp ea^r^"]" end + and mem(t,u,ea,m,show) = "mem"^ty t^addr(u,ea,m,show) + and fmem(t,u,ea,m,show) = "mem"^fty t^addr(u,ea,m,show) + and ccmem(u,ea,m,show) = "mem"^addr(u,ea,m,show) + and memdef(t,u,ea,m) = mem(t,u,ea,m,regionDef) + and fmemdef(t,u,ea,m) = fmem(t,u,ea,m,regionDef) + and ccmemdef(u,ea,m) = ccmem(u,ea,m,regionDef) + and memuse(t,u,ea,m) = mem(t,u,ea,m,regionUse) + and fmemuse(t,u,ea,m) = fmem(t,u,ea,m,regionUse) + and ccmemuse(u,ea,m) = ccmem(u,ea,m,regionUse) + in shower() + end + + exception Nothing + + fun dummy _ = raise Nothing + val dummy = {def=dummy, use=dummy, regionDef=dummy, regionUse=dummy} + + fun stmToString s = #stm(show dummy) s + fun rexpToString s = #rexp(show dummy) s + fun fexpToString s = #fexp(show dummy) s + fun ccexpToString s = #ccexp(show dummy) s + +end diff --git a/MLRISC/mltree/mltree.sig b/MLRISC/mltree/mltree.sig new file mode 100644 index 0000000..90f25e9 --- /dev/null +++ b/MLRISC/mltree/mltree.sig @@ -0,0 +1,238 @@ +(* mltree.sig + * + * COPYRIGHT (c) 1994 AT&T Bell Laboratories. + * + *) + +signature MLTREE = sig + structure Constant : CONSTANT + structure Region : REGION +(* structure Stream : INSTRUCTION_STREAM *) + structure Basis : MLTREE_BASIS + structure Extension : MLTREE_EXTENSION + structure I : MACHINE_INT + + type ty = Basis.ty + type fty = Basis.fty + type var = CellsBasis.cell (* variable *) + type src = var (* source variable *) + type dst = var (* destination variable *) + type reg = var (* physical register *) + type an = Annotations.annotation + + datatype cond = datatype Basis.cond + datatype fcond = datatype Basis.fcond + datatype rounding_mode = datatype Basis.rounding_mode + datatype div_rounding_mode = datatype Basis.div_rounding_mode + datatype ext = datatype Basis.ext + + (* Statements/effects. These types are parameterized by the statement + * extension type. Unfortunately, this has to be made polymorphic to make + * it possible for recursive type definitions to work. + *) + datatype stm = + (* assignment *) + MV of ty * dst * rexp + | CCMV of dst * ccexp + | FMV of fty * dst * fexp + + (* parallel copies *) + | COPY of ty * dst list * src list + | FCOPY of fty * dst list * src list + + (* control flow *) + | JMP of rexp * controlflow +(* + | SWITCH of + {tblLab: Label.label, (* label associated with table *) + base : rexp option, (* base pointer -- if any *) + table : fn Label.label -> rexp, (* get table address *) + index : rexp, (* index into table *) + targets : controlflow} (* targets of switch *) +*) + | BCC of ccexp * Label.label + | CALL of {funct:rexp, targets:controlflow, + defs:mlrisc list, uses:mlrisc list, + region: Region.region, + pops: Int32.int} + | FLOW_TO of stm * controlflow + | RET of controlflow + | IF of ccexp * stm * stm + + (* memory update: ea, data *) + | STORE of ty * rexp * rexp * Region.region + | FSTORE of fty * rexp * fexp * Region.region + + (* control dependence *) + | REGION of stm * ctrl + + | SEQ of stm list (* sequencing *) + | DEFINE of Label.label (* define local label *) + + | ANNOTATION of stm * an + | EXT of sext (* extension *) + + (* synthetic instructions to indicated that the regs are live or + * killed at this program point. The spilled list must always + * start out as the empty list. + *) + | LIVE of mlrisc list + | KILL of mlrisc list + + (* RTL operators: + * The following are used internally + * for describing instruction semantics. + * The frontend must not use these. + *) + | PHI of {preds:int list,block:int} + | ASSIGN of ty * rexp * rexp + | SOURCE + | SINK + | RTL of {hash:word, attribs:Basis.attribs ref, e:stm} + + and rexp = + REG of ty * reg + + (* sizes of constants are inferred by context *) + | LI of I.machine_int + | LABEL of Label.label + | CONST of Constant.const + | LABEXP of rexp + + | NEG of ty * rexp + | ADD of ty * rexp * rexp + | SUB of ty * rexp * rexp + + (* signed multiplication etc. *) + | MULS of ty * rexp * rexp + | DIVS of div_rounding_mode * ty * rexp * rexp + | REMS of div_rounding_mode * ty * rexp * rexp + + (* unsigned multiplication etc. *) + | MULU of ty * rexp * rexp + | DIVU of ty * rexp * rexp + | REMU of ty * rexp * rexp + + (* overflow-trapping versions of above. These are all signed *) + | NEGT of ty * rexp + | ADDT of ty * rexp * rexp + | SUBT of ty * rexp * rexp + | MULT of ty * rexp * rexp + | DIVT of div_rounding_mode * ty * rexp * rexp + (* there is no REMT because remainder never overflows *) + + (* bit operations *) + | ANDB of ty * rexp * rexp + | ORB of ty * rexp * rexp + | XORB of ty * rexp * rexp + | EQVB of ty * rexp * rexp + | NOTB of ty * rexp + + | SRA of ty * rexp * rexp (* value, shift *) + | SRL of ty * rexp * rexp + | SLL of ty * rexp * rexp + + (* type promotion/conversion *) + | SX of ty * ty * rexp (* toTy, fromTy *) + | ZX of ty * ty * rexp (* toTy, fromTy *) + | CVTF2I of ty * rounding_mode * fty * fexp + + (* + * COND(ty,cc,e1,e2): + * Evaluate into either e1 or e2, depending on cc. + * Both e1 and e2 are allowed to be evaluated eagerly. + *) + | COND of ty * ccexp * rexp * rexp + + (* integer load *) + | LOAD of ty * rexp * Region.region + + (* predication *) + | PRED of rexp * ctrl + + | LET of stm * rexp + + | REXT of ty * rext + + | MARK of rexp * an + + | OP of ty * oper * rexp list + | ARG of ty * rep ref * string + | $ of ty * CellsBasis.cellkind * rexp + | PARAM of int + | BITSLICE of ty * (int * int) list * rexp + | ??? + + and rep = REP of string + + and oper = OPER of Basis.misc_op + + and fexp = + FREG of fty * src + | FLOAD of fty * rexp * Region.region + + | FADD of fty * fexp * fexp + | FMUL of fty * fexp * fexp + | FSUB of fty * fexp * fexp + | FDIV of fty * fexp * fexp + | FABS of fty * fexp + | FNEG of fty * fexp + | FSQRT of fty * fexp + | FCOND of fty * ccexp * + fexp * fexp + | FCOPYSIGN of fty * fexp (*sign*) * fexp (*magnitude*) + + | CVTI2F of fty * ty * rexp (* from signed integer *) + | CVTF2F of fty * fty * fexp (* float to float conversion *) + + | FPRED of fexp * ctrl + + | FEXT of fty * fext + + | FMARK of fexp * an + + and ccexp = + CC of Basis.cond * src + | FCC of Basis.fcond * src + | TRUE + | FALSE + | NOT of ccexp + | AND of ccexp * ccexp + | OR of ccexp * ccexp + | XOR of ccexp * ccexp + | EQV of ccexp * ccexp + | CMP of ty * Basis.cond * rexp * rexp + | FCMP of fty * Basis.fcond * fexp * fexp + | CCMARK of ccexp * an + | CCEXT of ty * ccext + + and mlrisc = + CCR of ccexp + | GPR of rexp + | FPR of fexp + + withtype controlflow = Label.label list (* control flow info *) + and ctrl = var (* control dependence info *) + and ctrls = ctrl list + and sext = (stm, rexp, fexp, ccexp) Extension.sx + and rext = (stm, rexp, fexp, ccexp) Extension.rx + and fext = (stm, rexp, fexp, ccexp) Extension.fx + and ccext = (stm, rexp, fexp, ccexp) Extension.ccx + and labexp = rexp + (* + * Useful type abbreviations for working for MLTree. + *) + type rewriter = (* rewriting functions *) + {stm:stm->stm, rexp:rexp->rexp, fexp:fexp->fexp, ccexp:ccexp->ccexp} + type 'a folder = (* aggregation functions *) + {stm:stm*'a->'a, rexp:rexp*'a->'a, fexp:fexp*'a->'a, ccexp:ccexp*'a->'a} + type hasher = (* hashing functions *) + {stm:stm->word, rexp:rexp->word, fexp:fexp->word, ccexp:ccexp->word} + type equality = (* comparison functions *) + {stm:stm * stm->bool, rexp:rexp * rexp->bool, + fexp:fexp * fexp->bool, ccexp:ccexp * ccexp->bool} + type printer = (* pretty printing functions *) + {stm:stm->string, rexp:rexp->string, fexp:fexp->string, ccexp:ccexp->string, + dstReg : ty * var -> string, srcReg : ty * var -> string} + +end (* MLTREE *) diff --git a/MLRISC/mltree/mltree.sml b/MLRISC/mltree/mltree.sml new file mode 100644 index 0000000..7f5f1b1 --- /dev/null +++ b/MLRISC/mltree/mltree.sml @@ -0,0 +1,228 @@ +(* mltree.sml + * + * COPYRIGHT (c) 1994 AT&T Bell Laboratories. + * + *) +functor MLTreeF(structure Constant : CONSTANT + structure Region : REGION + structure Extension : MLTREE_EXTENSION + ) : MLTREE = +struct + structure Constant = Constant + structure Region = Region + structure Basis = MLTreeBasis + structure Extension = Extension + structure I = MachineInt + + type ty = Basis.ty + type fty = Basis.fty + type var = CellsBasis.cell (* variable *) + type src = var (* source variable *) + type dst = var (* destination variable *) + type reg = var (* physical register *) + type an = Annotations.annotation + + datatype cond = datatype Basis.cond + datatype fcond = datatype Basis.fcond + datatype rounding_mode = datatype Basis.rounding_mode + datatype div_rounding_mode = datatype Basis.div_rounding_mode + datatype ext = datatype Basis.ext + + (* Statements/effects. These types are parameterized by the statement + * extension type. Unfortunately, this has to be made polymorphic to make + * it possible for recursive type definitions to work. + *) + datatype stm = + (* assignment *) + MV of ty * dst * rexp + | CCMV of dst * ccexp + | FMV of fty * dst * fexp + + (* parallel copies *) + | COPY of ty * dst list * src list + | FCOPY of fty * dst list * src list + + (* Control flow *) + | JMP of rexp * controlflow + | BCC of ccexp * Label.label + | CALL of {funct:rexp, targets:controlflow, + defs:mlrisc list, uses:mlrisc list, + region: Region.region, pops: Int32.int} + | FLOW_TO of stm * controlflow + | RET of controlflow + | IF of ccexp * stm * stm + + (* memory update: ea, data *) + | STORE of ty * rexp * rexp * Region.region + | FSTORE of fty * rexp * fexp * Region.region + + (* control dependence *) + | REGION of stm * ctrl + + | SEQ of stm list (* sequencing *) + | DEFINE of Label.label (* define local label *) + + | ANNOTATION of stm * an + | EXT of sext (* extension *) + + | LIVE of mlrisc list + | KILL of mlrisc list + + (* RTL operators: + * The following are used internally + * for describing instruction semantics. + * The frontend must not use these. + *) + | PHI of {preds:int list,block:int} + | ASSIGN of ty * rexp * rexp + | SOURCE + | SINK + | RTL of {hash:word, attribs:Basis.attribs ref, e:stm} + + and rexp = + REG of ty * reg + + (* sizes of constants are inferred by context *) + | LI of I.machine_int + | LABEL of Label.label + | CONST of Constant.const + | LABEXP of rexp + + | NEG of ty * rexp + | ADD of ty * rexp * rexp + | SUB of ty * rexp * rexp + + (* signed multiplication etc. *) + | MULS of ty * rexp * rexp + | DIVS of div_rounding_mode * ty * rexp * rexp + | REMS of div_rounding_mode * ty * rexp * rexp + + (* unsigned multiplication etc. *) + | MULU of ty * rexp * rexp + | DIVU of ty * rexp * rexp + | REMU of ty * rexp * rexp + + (* overflow-trapping versions of above. These are all signed *) + | NEGT of ty * rexp + | ADDT of ty * rexp * rexp + | SUBT of ty * rexp * rexp + | MULT of ty * rexp * rexp + | DIVT of div_rounding_mode * ty * rexp * rexp + + (* bit operations *) + | ANDB of ty * rexp * rexp + | ORB of ty * rexp * rexp + | XORB of ty * rexp * rexp + | EQVB of ty * rexp * rexp + | NOTB of ty * rexp + + | SRA of ty * rexp * rexp (* value, shift *) + | SRL of ty * rexp * rexp + | SLL of ty * rexp * rexp + + (* type promotion/conversion *) + | SX of ty * ty * rexp (* toTy, fromTy *) + | ZX of ty * ty * rexp (* toTy, fromTy *) + | CVTF2I of ty * rounding_mode * fty * fexp + + (* + * COND(ty,cc,e1,e2): + * Evaluate into either e1 or e2, depending on cc. + * Both e1 and e2 are allowed to be evaluated eagerly. + *) + | COND of ty * ccexp * rexp * rexp + + (* integer load *) + | LOAD of ty * rexp * Region.region + + (* predication *) + | PRED of rexp * ctrl + + | LET of stm * rexp + + | REXT of ty * rext + + | MARK of rexp * an + + | OP of ty * oper * rexp list + | ARG of ty * rep ref * string + | $ of ty * CellsBasis.cellkind * rexp + | PARAM of int + | BITSLICE of ty * (int * int) list * rexp + | ??? + + and rep = REP of string + + and oper = OPER of Basis.misc_op + + and fexp = + FREG of fty * src + | FLOAD of fty * rexp * Region.region + + | FADD of fty * fexp * fexp + | FMUL of fty * fexp * fexp + | FSUB of fty * fexp * fexp + | FDIV of fty * fexp * fexp + | FABS of fty * fexp + | FNEG of fty * fexp + | FSQRT of fty * fexp + | FCOND of fty * ccexp * + fexp * fexp + | FCOPYSIGN of fty * fexp (*sign*) * fexp (*magnitude*) + + | CVTI2F of fty * ty * rexp (* from signed integer *) + | CVTF2F of fty * fty * fexp (* float to float conversion *) + + | FPRED of fexp * ctrl + + | FEXT of fty * fext + + | FMARK of fexp * an + + and ccexp = + CC of Basis.cond * src + | FCC of Basis.fcond * src + | TRUE + | FALSE + | NOT of ccexp + | AND of ccexp * ccexp + | OR of ccexp * ccexp + | XOR of ccexp * ccexp + | EQV of ccexp * ccexp + | CMP of ty * Basis.cond * rexp * rexp + | FCMP of fty * Basis.fcond * fexp * fexp + | CCMARK of ccexp * an + | CCEXT of ty * ccext + + and mlrisc = + CCR of ccexp + | GPR of rexp + | FPR of fexp + + withtype controlflow = Label.label list (* control flow info *) + and ctrl = var (* control dependence info *) + and ctrls = ctrl list + and sext = (stm, rexp, fexp, ccexp) Extension.sx + and rext = (stm, rexp, fexp, ccexp) Extension.rx + and fext = (stm, rexp, fexp, ccexp) Extension.fx + and ccext = (stm, rexp, fexp, ccexp) Extension.ccx + and labexp = rexp + + (* + * Useful type abbreviations for working for MLTree. + *) + type rewriter = (* rewriting functions *) + {stm:stm->stm, rexp:rexp->rexp, fexp:fexp->fexp, ccexp:ccexp->ccexp} + type 'a folder = (* aggregation functions *) + {stm:stm*'a->'a, rexp:rexp*'a->'a, fexp:fexp*'a->'a, ccexp:ccexp*'a->'a} + type hasher = (* hashing functions *) + {stm:stm->word, rexp:rexp->word, fexp:fexp->word, ccexp:ccexp->word} + type equality = (* comparison functions *) + {stm:stm * stm->bool, rexp:rexp * rexp->bool, + fexp:fexp * fexp->bool, ccexp:ccexp * ccexp->bool} + type printer = (* pretty printing functions *) + {stm:stm->string, rexp:rexp->string, fexp:fexp->string, ccexp:ccexp->string, + dstReg : ty * var -> string, srcReg : ty * var -> string} + +end (* MLTREE *) + diff --git a/MLRISC/mltree/mltreecomp.sig b/MLRISC/mltree/mltreecomp.sig new file mode 100644 index 0000000..79ceab5 --- /dev/null +++ b/MLRISC/mltree/mltreecomp.sig @@ -0,0 +1,60 @@ +(* mltreeComp.sig --- translate mltrees to a flowgraph of target machine code. + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + *) + +(* + * This signature describes how MLTree extensions are compiled. + *) +signature MLTREE_EXTENSION_COMP = +sig + structure T : MLTREE + structure TS : MLTREE_STREAM where T = T + structure I : INSTRUCTIONS + structure CFG : CONTROL_FLOW_GRAPH + where I = I + and P = TS.S.P + (* + * The reducer is given to the client during the compilation of + * the user extensions. + *) + type reducer = + (I.instruction,I.C.cellset,I.operand,I.addressing_mode,CFG.cfg) TS.reducer + + val compileSext : reducer -> {stm:T.sext, an:T.an list} -> unit + val compileRext : reducer -> {e:T.ty * T.rext, rd:CellsBasis.cell, an:T.an list} -> unit + val compileFext : reducer -> {e:T.ty * T.fext, fd:CellsBasis.cell, an:T.an list} -> unit + val compileCCext : reducer -> {e:T.ty * T.ccext, ccd:CellsBasis.cell, an:T.an list} -> unit +end + + + + +signature MLTREECOMP = +sig + structure TS : MLTREE_STREAM + structure I : INSTRUCTIONS + structure CFG : CONTROL_FLOW_GRAPH + where I = I + and P = TS.S.P + structure Gen : MLTREEGEN + where T = TS.T + + type instrStream = (I.instruction, I.C.cellset, CFG.cfg) TS.stream + type mltreeStream = (TS.T.stm, TS.T.mlrisc list, CFG.cfg) TS.stream + + (* + * The instruction selection phase converts an instruction stream + * into a mltree stream. Please see the file "instructions/stream.sig" + * for description of the stream interface. + * + * Note: the mltree stream does NOT support direct instruction emission. + * Fo equivalent functionality, you can use the emit method + * of the instruction stream instead. + *) + val selectInstructions : instrStream -> mltreeStream +end + + + + diff --git a/MLRISC/mltree/operand-table.sig b/MLRISC/mltree/operand-table.sig new file mode 100644 index 0000000..952a0f6 --- /dev/null +++ b/MLRISC/mltree/operand-table.sig @@ -0,0 +1,59 @@ +(* + * A table for storing operands for a compilation unit. + * We give each distinct operand a unique (negative) value number. + *) +signature OPERAND_TABLE = +sig + + structure I : INSTRUCTIONS + + type operandTable + type valueNumber = I.C.cell + + datatype const = + INT of int (* small integer operand *) + | INTINF of MachineInt.machine_int (* large integer operand *) + | OPERAND of I.operand (* other operand *) + + datatype valueNumberMethods = + VALUE_NUMBERING of + { int : int -> valueNumber, + word : word -> valueNumber, + word32 : Word32.word -> valueNumber, + int32 : Int32.int -> valueNumber, + intinf : IntInf.int -> valueNumber, + operand : I.operand -> valueNumber + } + + exception NoOperand + exception NoInt + exception NoIntInf + exception NoConst + + (* Special values *) + val bot : valueNumber + val top : valueNumber + val volatile : valueNumber + + (* Create a new table *) + val create : int ref -> operandTable + + (* Lookup methods *) + + (* Value number -> int/operand/label *) + val const : valueNumber -> const + val int : operandTable -> int -> valueNumber + val word : operandTable -> word -> valueNumber + val int32 : operandTable -> Int32.int -> valueNumber + val word32 : operandTable -> Word32.word -> valueNumber + val intinf : operandTable -> IntInf.int -> valueNumber + val operand : operandTable -> I.operand -> valueNumber + + (* Create new value numbers *) + val makeNewValueNumbers : operandTable -> valueNumberMethods + + (* Lookup but don't create *) + val lookupValueNumbers : operandTable -> valueNumberMethods + + +end diff --git a/MLRISC/mltree/operand-table.sml b/MLRISC/mltree/operand-table.sml new file mode 100644 index 0000000..d3cbfd3 --- /dev/null +++ b/MLRISC/mltree/operand-table.sml @@ -0,0 +1,189 @@ +(* + * A table for storing operands for a compilation unit. + * We give each distinct operand a unique (negative) value number. + *) +functor OperandTable(Props : INSN_PROPERTIES) : OPERAND_TABLE = +struct + + structure I = Props.I + structure C = I.C + structure IH = IntHashTable + structure H = HashTable + + type valueNumber = C.cell + + datatype const = + INT of int (* small integer operands *) + | INTINF of IntInf.int (* large integer operands *) + | OPERAND of I.operand (* other operand *) + + structure IntInfMap = + RedBlackMapFn(type ord_key = IntInf.int + val compare = IntInf.compare + ) + + datatype operandTable = + TABLE of + { intTable : valueNumber IH.hash_table, + miTable : valueNumber IntInfMap.map ref, + opnTable : (I.operand,valueNumber) H.hash_table, + nextValueNumber : int ref + } + + datatype valueNumberMethods = + VALUE_NUMBERING of + { int : int -> valueNumber, + word : word -> valueNumber, + int32 : Int32.int -> valueNumber, + word32 : Word32.word -> valueNumber, + intinf : IntInf.int -> valueNumber, + operand : I.operand -> valueNumber + } + + exception NoOperand + exception NoConst + exception NoInt + exception NoIntInf + + val gp = C.cellkindDesc C.GP + + exception CONST of const + + fun mkConst(vn, const) = + C.CELL{id=vn, an=ref [CONST const], col=ref C.PSEUDO, desc=gp} + + val bot = C.CELL{id= ~9999999, an=ref [], col=ref C.PSEUDO, desc=gp} + val top = C.CELL{id= ~9999998, an=ref [], col=ref C.PSEUDO, desc=gp} + val volatile = C.CELL{id= ~9999997, an=ref [], col=ref C.PSEUDO, desc=gp} + + fun create(nextValueNumber) = + let + val opnTable = H.mkTable(Props.hashOpn,Props.eqOpn) (32,NoOperand) + val intTable = IH.mkTable (32, NoInt) + val miTable = ref IntInfMap.empty + + fun newInt i = + let val vn = !nextValueNumber (* value number *) + val _ = nextValueNumber := vn - 1; + val v = mkConst(vn, INT i) + in IH.insert intTable (i, v) + end + + fun init(n,0) = () + | init(n,m) = (newInt n; init(n+1,m-1)) + + in init(0,2); + TABLE{ intTable = intTable, + miTable = miTable, + opnTable = opnTable, + nextValueNumber = nextValueNumber + } + end + + fun wordToIntInf w = IntInf.fromInt(Word.toIntX w) + fun word32ToIntInf w = Word32.toLargeIntX w + fun wordToInt w = Word.toIntX w + fun word32ToInt w = Word32.toIntX w + fun intInfToInt i = IntInf.toInt i + fun intInfToInt32 i = Int32.fromLarge i + fun intToIntInf i = IntInf.fromInt i + fun intToInt32 i = Int32.fromInt i + fun int32ToIntInf i = Int32.toLarge i + fun int32ToInt i = Int32.toInt i + + (* Lookup the value number of a constant *) + fun int(TABLE{intTable, ...}) = IH.lookup intTable + + fun word(TABLE{intTable, ...}) w = IH.lookup intTable (wordToInt w) + + fun word32(TABLE{intTable, miTable, ...}) w = + IH.lookup intTable (word32ToInt w) handle Overflow => + case IntInfMap.find(!miTable, word32ToIntInf w) of + SOME v => v + | NONE => raise NoIntInf + + fun int32(TABLE{intTable, miTable, ...}) w = + IH.lookup intTable (int32ToInt w) handle Overflow => + case IntInfMap.find(!miTable, int32ToIntInf w) of + SOME v => v + | NONE => raise NoIntInf + + fun intinf(TABLE{intTable, miTable, ...}) i = + IH.lookup intTable (intInfToInt i) handle Overflow => + case IntInfMap.find(!miTable,i) of + SOME v => v + | NONE => raise NoIntInf + + fun operand(TABLE{opnTable,...}) = H.lookup opnTable + + fun lookupValueNumbers tbl = + VALUE_NUMBERING + { int = int tbl, + word = word tbl, + word32 = word32 tbl, + int32 = int32 tbl, + intinf = intinf tbl, + operand = operand tbl + } + + (* create new value numebers *) + fun makeNewValueNumbers(TABLE{opnTable, + nextValueNumber,intTable,miTable,...}) = + let val findOpn = H.find opnTable + val findInt = IH.find intTable + val insertOpn = H.insert opnTable + val insertInt = IH.insert intTable + + fun newConst(const) = + let val vn = !nextValueNumber + in nextValueNumber := vn - 1; + mkConst(vn,const) + end + + fun mkOpn opn = + case findOpn opn of + SOME v => v + | NONE => let val v = newConst(OPERAND opn) + in insertOpn(opn, v); v end + fun mkInt i = + case findInt i of + SOME v => v + | NONE => let val v = newConst(INT i) + in insertInt(i, v); v end + + fun insertIntInf(i, v) = + miTable := IntInfMap.insert(!miTable, i, v) + + fun mkIntInf' i = + case IntInfMap.find(!miTable, i) of + SOME v => v + | NONE => let val v = newConst(INTINF i) + in insertIntInf(i, v); v end + + fun mkIntInf i = mkInt(intInfToInt i) handle _ => mkIntInf' i + + fun mkWord w = mkInt(wordToInt w) + + fun mkInt32 i = mkInt(int32ToInt i) + handle _ => mkIntInf'(int32ToIntInf i) + + fun mkWord32 w = mkInt(word32ToInt w) + handle _ => mkIntInf'(word32ToIntInf w) + in VALUE_NUMBERING + {int=mkInt, + word=mkWord, + word32=mkWord32, + int32=mkInt32, + intinf=mkIntInf, + operand=mkOpn + } + end + + (* value number -> const *) + fun const(C.CELL{an, ...}) = + let fun find(CONST c::_) = c + | find(_::an) = find an + | find [] = raise NoConst + in find(!an) end + +end diff --git a/MLRISC/mltree/rtl-build.sig b/MLRISC/mltree/rtl-build.sig new file mode 100644 index 0000000..f57e3b3 --- /dev/null +++ b/MLRISC/mltree/rtl-build.sig @@ -0,0 +1,125 @@ +(* + * How to build primitive RTL operators + *) +signature RTL_BUILD = +sig + structure T : MLTREE + type ty = T.ty + type cond = T.cond + type fcond = T.fcond + + type effect + type region + type exp + type bool + type div_rounding_mode + + val intConst : ty -> int -> exp (* integer constant *) + val wordConst : ty -> Word32.word -> exp (* word constant *) + val ??? : ty -> exp (* an undefined value *) + + val newOp : string -> exp list -> exp (* create new operator *) + + val immed : ty -> exp -> exp (* immediate value *) + val operand : ty -> exp -> exp + val label : ty -> exp -> exp + val $ : CellsBasis.cellkind * ty -> exp -> exp + val Mem : CellsBasis.cellkind * ty -> exp * region -> exp + val Arg : ty * string * string -> exp + + (* Signed/unsigned promotion *) + val sx : ty * ty -> exp -> exp + val zx : ty * ty -> exp -> exp + + (* Integer operators *) + val ~ : ty -> exp -> exp + val + : ty -> exp * exp -> exp + val - : ty -> exp * exp -> exp + val muls : ty -> exp * exp -> exp + val mulu : ty -> exp * exp -> exp + val divs : ty -> div_rounding_mode * exp * exp -> exp + val divu : ty -> exp * exp -> exp + val rems : ty -> div_rounding_mode * exp * exp -> exp + val remu : ty -> exp * exp -> exp + + val addt : ty -> exp * exp -> exp + val subt : ty -> exp * exp -> exp + val mult : ty -> exp * exp -> exp + val divt : ty -> div_rounding_mode * exp * exp -> exp + + val notb : ty -> exp -> exp + val andb : ty -> exp * exp -> exp + val orb : ty -> exp * exp -> exp + val xorb : ty -> exp * exp -> exp + val eqvb : ty -> exp * exp -> exp + val << : ty -> exp * exp -> exp + val >> : ty -> exp * exp -> exp + val ~>> : ty -> exp * exp -> exp + val BitSlice : ty -> (int * int) list -> exp -> exp + + (* Boolean operators *) + (* val Cond : ty -> bool * exp * exp -> exp *) + val False : bool + val True : bool + val Not : bool -> bool + val And : bool * bool -> bool + val Or : bool * bool -> bool + val Cond : ty -> bool * exp * exp -> exp + + (* Integer comparisons *) + val == : ty -> exp * exp -> bool + val <> : ty -> exp * exp -> bool + val < : ty -> exp * exp -> bool + val > : ty -> exp * exp -> bool + val <= : ty -> exp * exp -> bool + val >= : ty -> exp * exp -> bool + val ltu : ty -> exp * exp -> bool + val leu : ty -> exp * exp -> bool + val gtu : ty -> exp * exp -> bool + val geu : ty -> exp * exp -> bool + val setcc : ty -> exp * exp -> bool + val getcc : ty -> exp * T.cond -> bool + + (* Floating point operators *) + val fadd : ty -> exp * exp -> exp + val fsub : ty -> exp * exp -> exp + val fmul : ty -> exp * exp -> exp + val fdiv : ty -> exp * exp -> exp + val fcopysign : ty -> exp * exp -> exp + val fabs : ty -> exp -> exp + val fneg : ty -> exp -> exp + val fsqrt : ty -> exp -> exp + + (* Floating point comparisons *) + val |?| : ty -> exp * exp -> bool + val |==| : ty -> exp * exp -> bool + val |?=| : ty -> exp * exp -> bool + val |<| : ty -> exp * exp -> bool + val |?<| : ty -> exp * exp -> bool + val |<=| : ty -> exp * exp -> bool + val |?<=| : ty -> exp * exp -> bool + val |>| : ty -> exp * exp -> bool + val |?>| : ty -> exp * exp -> bool + val |>=| : ty -> exp * exp -> bool + val |?>=| : ty -> exp * exp -> bool + val |<>| : ty -> exp * exp -> bool + val |<=>| : ty -> exp * exp -> bool + val |?<>| : ty -> exp * exp -> bool + val setfcc : ty -> exp * exp -> bool + val getfcc : ty -> exp * T.fcond -> bool + + (* Effect combinators *) + val := : ty -> exp * exp -> effect + val Par : effect * effect -> effect (* parallel effects *) + val Nop : effect (* empty effect *) + val Jmp : ty -> exp -> effect (* jump to address *) + val Call : ty -> exp -> effect (* call address *) + val Ret : effect (* return *) + val If : bool * effect * effect -> effect (* if/then/else *) + + val map : ty -> ('a -> 'b) -> 'a list -> 'b list + + val getNewOps : unit -> T.Basis.misc_op list + val clearNewOps : unit -> unit + +end diff --git a/MLRISC/mltree/rtl-build.sml b/MLRISC/mltree/rtl-build.sml new file mode 100644 index 0000000..d41cce3 --- /dev/null +++ b/MLRISC/mltree/rtl-build.sml @@ -0,0 +1,165 @@ +(* + * Build MLTree-based RTLs + *) + +functor RTLBuild(RTL : MLTREE_RTL) : RTL_BUILD = +struct + structure RTL = RTL + structure T = RTL.T + structure I = T.I + + type effect = RTL.rtl + type exp = T.rexp + type ty = T.ty + type bool = T.ccexp + type region = T.rexp + type cond = T.cond + type fcond = T.fcond + type div_rounding_mode = T.div_rounding_mode + + fun error msg = MLRiscErrorMsg.error("RTLBuild",msg) + + val hashCounter = ref 0w23 + fun newHash() = !hashCounter before hashCounter := !hashCounter + 0w23499 + fun newOper name = {name=name, hash=newHash(), attribs=ref 0w0} + + val newOpList = ref [] : T.Basis.misc_op list ref + fun getNewOps() = !newOpList + fun clearNewOps() = newOpList := [] + + fun newOp name = + let val oper = newOper name + val _ = newOpList := oper :: !newOpList; + val oper = T.OPER oper + in fn es => T.OP(32, oper, es) (* XXX *) + end + + fun op:= ty (lhs,rhs) = T.ASSIGN(ty,lhs,rhs) + + fun $ (k,ty) e = T.$(ty,k,e) + + fun Mem (k,ty) (addr,mem) = T.$(ty,k,addr) + + fun ??? ty = T.??? + fun Arg (ty,kind,name) = T.ARG(ty,ref(T.REP kind),name) + fun BitSlice ty slice e = T.BITSLICE(ty,slice,e) + + fun operand ty exp = exp + fun immed ty exp = exp + fun label ty exp = exp + + (* integer *) + fun intConst ty i = T.LI(I.fromInt(ty, i)) + fun wordConst ty w = T.LI(I.fromWord32(ty, w)) + + fun ternaryOp oper ty (x, y, z) = oper(x, ty, y, z) + fun binOp oper ty (x, y) = oper(ty,x,y) + fun unaryOp oper ty x = oper(ty,x) + + fun sx (from,to) e = T.SX(to, from, e) + fun zx (from,to) e = T.ZX(to, from, e) + + val op~ = unaryOp T.NEG + val op+ = binOp T.ADD + val op- = binOp T.SUB + val muls = binOp T.MULS + val divs = ternaryOp T.DIVS + val rems = ternaryOp T.REMS + val mulu = binOp T.MULU + val divu = binOp T.DIVU + val remu = binOp T.REMU + + val negt = unaryOp T.NEGT + val addt = binOp T.ADDT + val subt = binOp T.SUBT + val mult = binOp T.MULT + val divt = ternaryOp T.DIVT + + val notb = unaryOp T.NOTB + val andb = binOp T.ANDB + val orb = binOp T.ORB + val xorb = binOp T.XORB + val eqvb = binOp T.EQVB + val ~>> = binOp T.SRA + val >> = binOp T.SRL + val << = binOp T.SLL + + val True = T.TRUE + val False = T.FALSE + val Not = T.NOT + val And = T.AND + val Or = T.OR + val Xor = T.XOR + fun cmp cc ty (x,y) = T.CMP(ty,cc,x,y) + fun Cond ty (cond,x,y) = T.COND(ty,cond,x,y) + + val op== = cmp T.EQ + val op<> = cmp T.NE + val op>= = cmp T.GE + val op> = cmp T.GT + val op<= = cmp T.LE + val op< = cmp T.LT + val geu = cmp T.GEU + val gtu = cmp T.GTU + val leu = cmp T.LEU + val ltu = cmp T.LTU + val setcc = cmp T.SETCC + fun getcc ty (e,cc) = T.CMP(ty,cc,e,T.???) + (* floating point *) + fun i2f(ty,x) = T.CVTI2F(ty,ty,x) + fun f2i(ty,x) = T.CVTF2I(ty,T.TO_ZERO,ty,x) + fun fbinOp oper ty (x,y) = f2i(ty,oper(ty,i2f(ty,x),i2f(ty,y))) + fun funaryOp oper ty (x) = f2i(ty,oper(ty,i2f(ty,x))) + fun fcmp fcc ty (x,y) = T.FCMP(ty,fcc,i2f(ty,x),i2f(ty, y)) + fun getfcc ty (e,cc) = T.FCMP(ty,cc,i2f(ty,e),i2f(ty,T.???)) + + val fadd = fbinOp T.FADD + val fsub = fbinOp T.FSUB + val fmul = fbinOp T.FMUL + val fdiv = fbinOp T.FDIV + val fcopysign = fbinOp T.FCOPYSIGN + val fneg = funaryOp T.FNEG + val fabs = funaryOp T.FABS + val fsqrt = funaryOp T.FSQRT + + val |?| = fcmp T.? + val |==| = fcmp T.== + val |?=| = fcmp T.?= + val |<| = fcmp T.< + val |?<| = fcmp T.?< + val |<=| = fcmp T.<= + val |?<=| = fcmp T.?<= + val |>| = fcmp T.> + val |?>| = fcmp T.?> + val |>=| = fcmp T.>= + val |?>=| = fcmp T.?>= + val |<>| = fcmp T.<> + val |<=>| = fcmp T.<=> + val |?<>| = fcmp T.?<> + val setfcc = fcmp T.SETFCC + + (* effects *) + val Nop = T.SEQ [] + fun Jmp ty addr = T.JMP(addr,[]) + fun Call ty addr = T.CALL{funct=addr, targets=[], + defs=[], uses=[], + region=T.Region.memory, + pops=0} + val Ret = T.RET([]) + + fun If(T.TRUE, yes, no) = yes + | If(T.FALSE, yes, no) = no + | If(T.CMP(ty,cc,x,y),T.SEQ [],no) = + T.IF(T.CMP(ty,T.Basis.negateCond cc,x,y), no, Nop) + | If(a,b,c) = T.IF(a,b,c) + + fun Par(T.SEQ[],y) = y + | Par(x,T.SEQ[]) = x + | Par(T.SEQ xs,T.SEQ ys) = T.SEQ(xs@ys) + | Par(T.SEQ xs,y) = T.SEQ(xs@[y]) + | Par(x,T.SEQ ys) = T.SEQ(x::ys) + | Par(x,y) = T.SEQ[x,y] + + val map = fn _ => List.map + +end diff --git a/MLRISC/mltree/rtl-props.sig b/MLRISC/mltree/rtl-props.sig new file mode 100644 index 0000000..c18c18a --- /dev/null +++ b/MLRISC/mltree/rtl-props.sig @@ -0,0 +1,34 @@ +(* + * Interface of rtl info extraction from instructions. + * The code matching this interface is automatically generated by the MDL tool. + *) +signature RTL_PROPERTIES = +sig + + structure I : INSTRUCTIONS + structure C : CELLS + structure RTL : MLTREE_RTL + structure OT : OPERAND_TABLE + sharing I.C = C + sharing OT.I = I + sharing RTL.T = I.T + + datatype value = + CELL of C.cell (* a single value *) + | OPERAND of I.operand (* a complex operand *) + + (* Return the RTL describing the semantics of an instruction + * The rtl returned is in lambda-lifted form, i.e. it contains + * references to PARAM n, which refers to the nth input or output parameter. + *) + val rtl : I.instruction -> RTL.rtl + + (* + * Return the input/ouput parameters of an instruction. + * The input/output matches positionally with the info returned by + * the function rtl. + *) + val defUse : OT.valueNumberMethods -> I.instruction + -> value list * value list + +end diff --git a/MLRISC/omit-frameptr/omit-frame-pointer.sig b/MLRISC/omit-frameptr/omit-frame-pointer.sig new file mode 100644 index 0000000..d7e54f7 --- /dev/null +++ b/MLRISC/omit-frameptr/omit-frame-pointer.sig @@ -0,0 +1,15 @@ +(* omit-frame-pointer.sig + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * omit the frame pointer based by rewriting to use the stack pointer. + *) + +signature OMIT_FRAME_POINTER = sig + structure I : INSTRUCTIONS + structure CFG : CONTROL_FLOW_GRAPH where I = I + + (* idelta is the intial displacement between the fp and sp. *) + val omitframeptr : {vfp:CellsBasis.cell, idelta:Int32.int option, cfg:CFG.cfg} -> unit +end diff --git a/MLRISC/ppc/README.ppc b/MLRISC/ppc/README.ppc new file mode 100644 index 0000000..ad272eb --- /dev/null +++ b/MLRISC/ppc/README.ppc @@ -0,0 +1,13 @@ +Changes to the instruction set +============================== + +1. Added Region.region to CALL instructions. +2. Added a lot of 64-bit and single-precision floating point instructions + +New optimizations in the PowerPC backend +====================================== +1. Propagation of annotations. +2. Strength reduction of unsigned 32-bit multiplication and division. +3. NOTB folding is implemented. For example, this combines + NOTB with AND into NAND. +4. 3-operand floating point instructions are generated. diff --git a/MLRISC/ppc/backpatch/ppcDelaySlots.sml b/MLRISC/ppc/backpatch/ppcDelaySlots.sml new file mode 100644 index 0000000..69823c1 --- /dev/null +++ b/MLRISC/ppc/backpatch/ppcDelaySlots.sml @@ -0,0 +1,39 @@ +(* + * This file was automatically generated by MDGen (v3.0) + * from the machine description file "ppc/ppc.md". + *) + + +functor PPCDelaySlots(structure I : PPCINSTR + structure P : INSN_PROPERTIES + where I = I + ) : DELAY_SLOT_PROPERTIES = +struct + structure I = I + datatype delay_slot = D_NONE | D_ERROR | D_ALWAYS | D_TAKEN | D_FALLTHRU + + fun error msg = MLRiscErrorMsg.error("PPCDelaySlots",msg) + val delaySlotSize = 4 + fun delaySlot {instr, backward} = let + fun delaySlot instr = + ( + case instr of + _ => {nop=true, n=false, nOn=D_ERROR, nOff=D_NONE} + ) + in delaySlot instr + end + + fun enableDelaySlot _ = error "enableDelaySlot" + fun conflict _ = error "conflict" + fun delaySlotCandidate {jmp, delaySlot} = let + fun delaySlotCandidate delaySlot = + ( + case delaySlot of + _ => true + ) + in delaySlotCandidate delaySlot + end + + fun setTarget _ = error "setTarget" +end + diff --git a/MLRISC/ppc/backpatch/ppcJumps.sml b/MLRISC/ppc/backpatch/ppcJumps.sml new file mode 100644 index 0000000..e12d7d2 --- /dev/null +++ b/MLRISC/ppc/backpatch/ppcJumps.sml @@ -0,0 +1,218 @@ +functor PPCJumps + (structure Instr: PPCINSTR + structure Shuffle : PPCSHUFFLE where I = Instr + structure MLTreeEval : MLTREE_EVAL where T = Instr.T + ) : SDI_JUMPS = +struct + structure I = Instr + structure C = I.C + structure Const = I.Constant + structure CB = CellsBasis + + fun error msg = MLRiscErrorMsg.error("PPCJumps",msg) + + val warn_long_branch = + MLRiscControl.mkFlag ("ppc-warn-long-branch", + "whether to warn about long form of branch") + + val branchDelayedArch = false + + fun isSdi(I.ANNOTATION{i,...}) =isSdi i + | isSdi(I.LIVE _) = true + | isSdi(I.KILL _) = true + | isSdi(I.COPY _) = true + | isSdi(I.INSTR instr) = let + fun operand(I.LabelOp _) = true + | operand _ = false + in + case instr + of I.L{d, ...} => operand d + | I.LF{d, ...} => operand d + | I.ST{d, ...} => operand d + | I.STF{d, ...} => operand d + | I.ARITHI{im, ...} => operand im + | I.ROTATEI{sh, ...} => operand sh + | I.COMPARE{rb, ...} => operand rb + | I.TW{si, ...} => operand si + | I.TD{si, ...} => operand si + | I.BC{addr, ...} => operand addr + | _ => false + end + + + (* max Size is not used for the PPC span dependency analysis. *) + fun maxSize _ = error "maxSize" + + fun minSize(I.LIVE _) = 0 + | minSize(I.KILL _) = 0 + | minSize(I.COPY _) = 0 + | minSize(I.ANNOTATION{i,...}) = minSize i + | minSize _ = 4 + + fun sdiSize(I.ANNOTATION{i, ...}, labmap, loc) = sdiSize(i, labmap, loc) + | sdiSize(I.LIVE _, _, _) = 0 + | sdiSize(I.KILL _, _, _) = 0 + | sdiSize(I.COPY{k=CB.GP, src, dst, tmp, ...}, _, _) = + 4 * length(Shuffle.shuffle{tmp=tmp, dst=dst, src=src}) + | sdiSize(I.COPY{k=CB.FP, src, dst, tmp, ...}, _, _) = + 4 * length(Shuffle.shufflefp{src=src, dst=dst, tmp=tmp}) + | sdiSize(I.INSTR instr, labmap, loc) = let + fun signed16 n = ~32768 <= n andalso n < 32768 + fun signed12 n = ~2048 <= n andalso n < 2048 + fun signed14 n = ~8192 <= n andalso n < 8192 + fun unsigned16 n = 0 <= n andalso n < 65536 + fun unsigned5 n = 0 <=n andalso n < 32 + + fun operand(I.LabelOp le, inRange, lo, hi) = + if inRange(MLTreeEval.valueOf le) then lo else hi + | operand _ = error "sdiSize:operand" + in + case instr + of I.L{ld=(I.LBZ | I.LHZ | I.LHA | I.LWZ),d,...} => + operand(d, signed16, 4, 8) + | I.L{d,...} => operand(d, signed12, 4, 8) + | I.LF{ld=(I.LFS | I.LFD), d, ...} => operand(d, signed16, 4, 8) + | I.LF{d, ...} => operand(d, signed12, 4, 8) + | I.ST{st=(I.STB | I.STH | I.STW), d, ...} => operand(d, signed16, 4, 8) + | I.ST{d, ...} => operand(d, signed12, 4, 8) + | I.STF{st=(I.STFS | I.STFD), d, ...} => operand(d, signed16, 4, 8) + | I.STF{d, ...} => operand(d, signed12, 4, 8) + | I.ARITHI{oper, im, ...} => + (case oper + of I.ADDI => operand(im, signed16, 4, 8) + | (I.ADDIS | I.SUBFIC | I.MULLI) => operand(im, signed16, 4, 12) + | (I.ANDI_Rc | I.ANDIS_Rc | I.ORI | I.ORIS | I.XORI | I.XORIS) => + operand(im, unsigned16, 4, 12) + | (I.SRAWI | I.SRADI) => operand(im, unsigned5, 4, 12) + (*esac*)) + | I.ROTATEI{sh, ...} => error "sdiSize:ROTATE" + | I.COMPARE{cmp, rb, ...} => + (case cmp + of I.CMP => operand(rb, signed16, 4, 12) + | I.CMPL => operand(rb, unsigned16, 4, 12) + (*esac*)) + | I.BC{addr=I.LabelOp lexp, ...} => + if signed14((MLTreeEval.valueOf lexp - loc) div 4) then 4 else 8 + | _ => error "sdiSize" + end + | sdiSize _ = error "sdiSize" + + fun valueOf(I.LabelOp lexp) = MLTreeEval.valueOf lexp + | valueOf _ = error "valueOf" + + fun split opnd = let + val i = valueOf opnd + val w = Word.fromInt i + val hi = Word.~>>(w, 0w16) + val lo = Word.andb(w, 0w65535) + val (high,low) = + if lo < 0w32768 then (hi, lo) else (hi+0w1, lo-0w65536) + in (Word.toIntX high, Word.toIntX low) + end + + fun cnv I.ADDI = I.ADD + | cnv I.SUBFIC = I.SUBF + | cnv I.MULLI = I.MULLW + | cnv I.ANDI_Rc = I.AND + | cnv I.ORI = I.OR + | cnv I.XORI = I.XOR + | cnv I.SRAWI = I.SRAW + | cnv I.SRADI = I.SRAD + | cnv _ = error "cnv" + + fun expand(I.ANNOTATION{i, ...}, size, pos) = expand(i, size, pos) + | expand(I.LIVE _, _, _) = [] + | expand(I.KILL _, _, _) = [] + | expand(I.COPY{k=CB.GP, src, tmp, dst, ...}, _, _) = + Shuffle.shuffle{src=src, dst=dst, tmp=tmp} + | expand(I.COPY{k=CB.FP, src, tmp, dst, ...}, _, _) = + Shuffle.shufflefp{src=src, dst=dst, tmp=tmp} + | expand(instr as I.INSTR i, size, pos) = + (case i + of I.L{ld, rt, ra, d, mem} => + (case size + of 4 => [I.l{ld=ld, rt=rt, ra=ra, d=I.ImmedOp(valueOf d), mem=mem}] + | 8 => let + val (hi,lo) = split d + in + [I.arithi{oper=I.ADDIS, rt=C.asmTmpR, ra=ra, im=I.ImmedOp hi}, + I.l{ld=ld, rt=rt, ra=C.asmTmpR, d=I.ImmedOp lo, mem=mem}] + end + | _ => error "expand:L" + (*esac*)) + | I.LF{ld, ft, ra, d, mem} => + (case size + of 4 => [I.lf{ld=ld, ft=ft, ra=ra, d=I.ImmedOp(valueOf d), mem=mem}] + | 8 => let + val (hi,lo) = split d + in + [I.arithi{oper=I.ADDIS, rt=C.asmTmpR, ra=ra, im=I.ImmedOp hi}, + I.lf{ld=ld, ft=ft, ra=C.asmTmpR, d=I.ImmedOp lo, mem=mem}] + end + | _ => error "expand:LF" + (*esac*)) + | I.ST{st, rs, ra, d, mem} => + (case size + of 4 => [I.st{st=st, rs=rs, ra=ra, d=I.ImmedOp(valueOf d), mem=mem}] + | 8 => let + val (hi,lo) = split d + in + [I.arithi{oper=I.ADDIS, rt=C.asmTmpR, ra=ra, im=I.ImmedOp hi}, + I.st{st=st, rs=rs, ra=C.asmTmpR, d=I.ImmedOp lo, mem=mem}] + end + | _ => error "expand:ST" + (*esac*)) + | I.STF{st, fs, ra, d, mem} => + (case size + of 4 => [I.stf{st=st, fs=fs, ra=ra, d=I.ImmedOp(valueOf d), mem=mem}] + | 8 => let + val (hi,lo) = split d + in + [I.arithi{oper=I.ADDIS, rt=C.asmTmpR, ra=ra, im=I.ImmedOp hi}, + I.stf{st=st, fs=fs, ra=C.asmTmpR, d=I.ImmedOp lo, mem=mem}] + end + | _ => error "expand:STF" + (*esac*)) + | I.ARITHI{oper, rt, ra, im} => + (case size + of 4 => [I.arithi{oper=oper, rt=rt, ra=ra, im=I.ImmedOp(valueOf im)}] + | 8 => let val (hi, lo) = split im (* must be ADDI *) + in [I.arithi{oper=I.ADDIS, rt=rt, ra=ra, im=I.ImmedOp hi}, + I.arithi{oper=I.ADDI, rt=rt, ra=rt, im=I.ImmedOp lo}] + end + | 12 => + let val (hi,lo) = split im + in [I.arithi{oper=I.ADDIS, rt=C.asmTmpR, ra=C.Reg CellsBasis.GP 0, + im=I.ImmedOp hi}, + I.arithi{oper=I.ADDI,rt=C.asmTmpR,ra=C.asmTmpR,im=I.ImmedOp lo}, + I.arith{oper=cnv oper, rt=rt, ra=ra, rb=C.asmTmpR, OE=false, + Rc=(oper = I.ANDI_Rc)}] + end + | _ => error "ARITHI" + (*esac*)) + | I.BC{bo, bf, bit, fall, addr, LK} => + (case size + of 4 => [instr] + | 8 => let + val newBO = + (case bo + of I.TRUE => I.FALSE + | I.FALSE => I.TRUE + | I.ALWAYS => error "expand:newBO:BC" + | I.COUNTER{eqZero, cond} => error "expand:newBO:COUNTER" + (*esac*)) + in + if !warn_long_branch then + print("emiting long form of branch" ^ "\n") + else (); + [I.bc{bo=newBO, bf=bf, bit=bit, addr=fall, fall=fall, LK=false}, + I.b{addr=addr, LK=LK}] + end + | _ => error "expand:BC" + (*esac*)) + (* The other span dependent instructions are not generated *) + | I.COMPARE _ => error "expand:COMPARE" + | _ => error "expand" + (*esac*)) + | expand _ = error "expand" +end diff --git a/MLRISC/ppc/c-calls/ppc-macosx.sml b/MLRISC/ppc/c-calls/ppc-macosx.sml new file mode 100644 index 0000000..24fe75d --- /dev/null +++ b/MLRISC/ppc/c-calls/ppc-macosx.sml @@ -0,0 +1,476 @@ +(* ppc-macosx.sml + * + * COPYRIGHT (c) 2003 John Reppy (http://www.cs.uchicago.edu/~jhr) + * All rights reserved. + * + * C function calls for the PowerPC using the MacOS X ABI. + * + * Register conventions: + * + * Register Callee-save Purpose + * -------- ----------- ------- + * GPR0 no Zero + * 1 no Stack pointer + * 2 no scratch (TOC on AIX) + * 3 no arg0 and return result + * 4-10 no arg1-arg7 + * 11 no scratch + * 12 no holds taget of indirect call + * 13-31 yes callee-save registers + * + * FPR0 no scratch + * 1-13 no floating-point arguments + * 14-31 yes floating-point callee-save registers + * + * V0-V1 no scratch vector registers + * 2-13 no vector argument registers + * 14-19 no scratch vector registers + * 20-31 yes callee-save vector registers + * + * LR no link register holds return address + * + * CR0-CR1 no scratch condition registers + * 2-4 yes callee-save condition registers + * 5-7 no scratch condition registers + * + * Calling convention: + * + * Return result: + * + Integer and pointer results are returned in GPR3 + * + 64-bit integers (long long) returned in GPR3/GPR4 + * + float/double results are returned in FPR1 + * + Struct results are returned in space provided by the caller. + * The address of this space is passed to the callee as an + * implicit first argument in GPR3 and the first real argument is + * passed in GPR4. + * + * Function arguments: + * * arguments (except for floating-point values) are passed in + * registers GPR3-GPR10 + * + * Note also that stack frames are supposed to be 16-byte aligned. + *) + +(* we extend the interface to support generating the stubs needed for + * dynamic linking (see "Inside MacOS X: Mach-O Runtime Architecture" + * for details. + *) +signature PPC_MACOSX_C_CALLS = + sig + include C_CALLS + +(* + val genStub : { + name : T.rexp, + proto : CTypes.c_proto, + paramAlloc : {szb : int, align : int} -> bool, + structRet : {szb : int, align : int} -> T.rexp, + saveRestoreDedicated : + T.mlrisc list -> {save: T.stm list, restore: T.stm list}, + callComment : string option, + args : c_arg list + } -> { + callseq : T.stm list, + result: T.mlrisc list + } +*) + + end; + +functor PPCMacOSX_CCalls ( + + structure T : MLTREE + + ): C_CALLS = struct + + structure T = T + structure CTy = CTypes + structure C = PPCCells + + fun error msg = MLRiscErrorMsg.error ("PPCCompCCalls", msg) + + (* the location of arguments/parameters; offsets are given with respect to the + * low end of the parameter area. + *) + datatype arg_location + = Reg of T.ty * T.reg * T.I.machine_int option + (* integer/pointer argument in register *) + | FReg of T.fty * T.reg * T.I.machine_int option + (* floating-point argument in register *) + | Stk of T.ty * T.I.machine_int (* integer/pointer argument in parameter area *) + | FStk of T.fty * T.I.machine_int (* floating-point argument in parameter area *) + | Args of arg_location list + + val wordTy = 32 + val fltTy = 32 (* MLRISC type of float *) + val dblTy = 64 (* MLRISC type of double *) + + (* shorts and chars are promoted to 32-bits *) + val naturalIntSz = wordTy + + (* stack pointer *) + val spReg = T.REG(wordTy, C.GPReg 1) + + (* registers used for parameter passing *) + val argGPRs = List.map C.GPReg [3, 4, 5, 6, 7, 8, 9, 10] + val argFPRs = List.map C.FPReg [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] + val resGPR = C.GPReg 3 + val resGPR2 = C.GPReg 4 + val resRegLoc = Reg(wordTy, resGPR, NONE) + val resRegLoc2 = Reg(wordTy, resGPR2, NONE) + val resRegLocPair = Args[resRegLoc, resRegLoc2] + val resFPR = C.FPReg 1 + + (* C callee-save registers *) + val calleeSaveRegs = List.map C.GPReg [ + 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, + 23, 24, 25, 26, 27, 28, 29, 30, 31 + ] + val calleeSaveFRegs = List.map C.FPReg [ + 14, 15, 16, 17, 18, 19, 20, 21, 22, + 23, 24, 25, 26, 27, 28, 29, 30, 31 + ] + + (* C caller-save registers (including argument registers) *) + val callerSaveRegs = + T.FPR(T.FREG(dblTy, C.FPReg 0)) :: + (List.map (fn r => T.GPR(T.REG(wordTy, C.GPReg r))) [2, 11, 12]) + + val linkReg = T.GPR(T.REG(wordTy, C.lr)) + + (* the parameter area lies just above the linkage area in the caller's frame. + * The linkage area is 24 bytes, so the first parameter is at 24(sp). + *) + val paramAreaOffset = 24 + + (* size, padding, and natural alignment for integer types. Note that the + * padding is based on the parameter-passing description on p. 35 of the + * documentation and the alignment is from p. 31. + *) + fun sizeOfInt CTy.I_char = {sz = 1, pad = 3, align = 1} + | sizeOfInt CTy.I_short = {sz = 2, pad = 2, align = 2} + | sizeOfInt CTy.I_int = {sz = 4, pad = 0, align = 4} + | sizeOfInt CTy.I_long = {sz = 4, pad = 0, align = 4} + | sizeOfInt CTy.I_long_long = {sz = 8, pad = 0, align = 8} + + (* sizes of other C types *) + val sizeOfPtr = {sz = 4, pad = 0, align = 4} + + (* align the address to the given alignment, which must be a power of 2 *) + fun alignAddr (addr, align) = let + val mask = Word.fromInt(align-1) + in + Word.toIntX(Word.andb(Word.fromInt addr + mask, Word.notb mask)) + end + + (* compute the size and alignment information for a struct; tys is the list + * of member types. The alignment is what Apple calls the "embedding" alignment. + * The total size is padded to agree with the struct's alignment. + *) + fun sizeOfStruct tys = let + fun ssz [] = {sz = 0, align = 1} + | ssz (first::rest) = let + fun f ([], maxAlign, offset) = + {sz = alignAddr(offset, maxAlign), align = maxAlign} + | f (ty::tys, maxAlign, offset) = let + val {sz, align} = sizeOfTy ty + val align = Int.min(align, 4) + val offset = alignAddr(offset, align) + in + f (tys, Int.max(maxAlign, align), offset+sz) + end + val {sz, align} = sizeOfTy first + in + f (rest, align, sz) + end + in + ssz tys + end + + (* the size alignment of a union type is the maximum of the sizes and alignments of the + * members. The final size is padded to agree with the alignment. + *) + and sizeOfUnion tys = let + fun usz [] = {sz = 0, align = 1} + | usz (first::rest) = let + fun f ([], maxAlign, maxSz) = + {sz = alignAddr(maxSz, maxAlign), align = maxAlign} + | f (ty::tys, maxAlign, maxSz) = let + val {sz, align} = sizeOfTy ty + in + f (tys, Int.max(maxAlign, align), Int.max(align, maxAlign)) + end + val {sz, align} = sizeOfTy first + in + f (rest, align, sz) + end + in + usz tys + end + + and sizeOfTy CTy.C_void = error "unexpected void argument type" + | sizeOfTy CTy.C_float = {sz = 4, align = 4} + | sizeOfTy CTy.C_double = {sz = 8, align = 8} + | sizeOfTy CTy.C_long_double = {sz = 8, align = 8} + | sizeOfTy (CTy.C_unsigned isz) = let + val {sz, align, ...} = sizeOfInt isz + in + {sz = sz, align = align} + end + | sizeOfTy (CTy.C_signed isz) = let + val {sz, align, ...} = sizeOfInt isz + in + {sz = sz, align = align} + end + | sizeOfTy CTy.C_PTR = {sz = 4, align = 4} + | sizeOfTy (CTy.C_ARRAY(ty, n)) = let + val {sz, align} = sizeOfTy ty + in + {sz = n*sz, align = align} + end + | sizeOfTy (CTy.C_STRUCT tys) = sizeOfStruct tys + | sizeOfTy (CTy.C_UNION tys) = sizeOfUnion tys + + (* compute the layout of a C call's arguments *) + fun layout {conv, retTy, paramTys} = let + fun gprRes isz = (case #sz(sizeOfInt isz) + of 8 => SOME resRegLocPair + | _ => SOME resRegLoc + (* end case *)) + val (resLoc, argGPRs, structRet) = (case retTy + of CTy.C_void => (NONE, argGPRs, NONE) + | CTy.C_float => (SOME(FReg(fltTy, resFPR, NONE)), argGPRs, NONE) + | CTy.C_double => (SOME(FReg(dblTy, resFPR, NONE)), argGPRs, NONE) + | CTy.C_long_double => (SOME(FReg(dblTy, resFPR, NONE)), argGPRs, NONE) + | CTy.C_unsigned isz => (gprRes isz, argGPRs, NONE) + | CTy.C_signed isz => (gprRes isz, argGPRs, NONE) + | CTy.C_PTR => (SOME resRegLoc, argGPRs, NONE) + | CTy.C_ARRAY _ => error "array return type" + | CTy.C_STRUCT s => let + val sz = #sz(sizeOfStruct s) + in + (* Note that this is a place where the MacOS X and Linux ABIs differ. + * In Linux, GPR3/GPR4 are used to return composite values of 8 bytes. + *) + (SOME resRegLoc, List.tl argGPRs, SOME{szb=sz, align=4}) + end + | CTy.C_UNION u => let + val sz = #sz(sizeOfUnion u) + in + (SOME resRegLoc, List.tl argGPRs, SOME{szb=sz, align=4}) + end + (* end case *)) + fun assign ([], offset, _, _, layout) = (offset, List.rev layout) + | assign (ty::tys, offset, availGPRs, availFPRs, layout) = ( + case ty + of CTy.C_void => error "unexpected void argument type" + | CTy.C_float => (case (availGPRs, availFPRs) + of (_::gprs, fpr::fprs) => + assign (tys, offset+4, gprs, fprs, FReg(fltTy, fpr, SOME offset)::layout) + | ([], fpr::fprs) => + assign (tys, offset+4, [], fprs, FReg(fltTy, fpr, SOME offset)::layout) + | ([], []) => + assign (tys, offset+4, [], [], FStk(fltTy, offset)::layout) + | _ => error "FPRs exhausted before GPRs" + (* end case *)) + | CTy.C_double => + assignFPR (tys, offset, availGPRs, availFPRs, layout) + | CTy.C_long_double => + assignFPR (tys, offset, availGPRs, availFPRs, layout) + | (CTy.C_unsigned isz | CTy.C_signed isz) => + assignGPR([sizeOfInt isz], tys, offset, availGPRs, availFPRs, layout) + | CTy.C_PTR => + assignGPR([sizeOfPtr], tys, offset, availGPRs, availFPRs, layout) + | CTy.C_ARRAY _ => + assignGPR([sizeOfPtr], tys, offset, availGPRs, availFPRs, layout) + | CTy.C_STRUCT tys' => + assignMem(sizeOfStruct tys', tys, offset, availGPRs, availFPRs, layout) + | CTy.C_UNION tys' => + assignMem(sizeOfUnion tys', tys, offset, availGPRs, availFPRs, layout) + (* end case *)) + (* assign a GP register and memory for an integer/pointer argument. *) + and assignGPR ([], args, offset, availGPRs, availFPRs, layout) = + assign (args, offset, availGPRs, availFPRs, layout) + | assignGPR ({ sz = 8, ... } :: szs, + args, offset, availGPRs, availFPRs, layout) = + (* The C compiler seems to treat "long long" arguments + * as two individual 4-byte arguments. There seems to be + * no 8-byte alignment requirement, as far as I can tell. + * - Matthias *) + assignGPR ({ sz = 4, pad = 0, align = 4 } :: + { sz = 4, pad = 0, align = 4 } :: szs, + args, offset, availGPRs, availFPRs, layout) + | assignGPR ({ sz, pad, ... } :: szs, + args, offset, availGPRs, availFPRs, layout) = + let val (loc, availGPRs) = + case availGPRs + of [] => (Stk(wordTy, offset), []) + | r1 :: rs => (Reg(wordTy, r1, SOME offset), rs) + val offset = offset + IntInf.fromInt (sz + pad) + in assignGPR (szs, args, offset, availGPRs, availFPRs, loc :: layout) + end + + (* assign a FP register and memory/GPRs for double-precision argument. *) + and assignFPR (args, offset, availGPRs, availFPRs, layout) = let + fun continue (availGPRs, availFPRs, loc) = + assign (args, offset+8, availGPRs, availFPRs, loc::layout) + fun freg fpr = FReg(dblTy, fpr, SOME offset) + in + case (availGPRs, availFPRs) + of (_::_::gprs, fpr::fprs) => continue (gprs, fprs, freg fpr) + | (_, fpr::fprs) => continue ([], fprs, freg fpr) + | ([], []) => continue ([], [], FStk(dblTy, offset)) + | _ => error "FPRs exhausted before GPRs" + (* end case *) + end + (* assign a argument locations to pass a composite argument (struct or union) *) + and assignMem ({sz, ...}, args, offset, availGPRs, availFPRs, layout) = let + val sz = IntInf.fromInt sz + fun assignMem (relOffset, availGPRs, fields) = + if (relOffset < sz) + then let + val (loc, availGPRs) = (case availGPRs + of [] => (Stk(wordTy, offset+relOffset), []) + | r1::rs => (Reg(wordTy, r1, SOME(offset+relOffset)), rs) + (* end case *)) + in + assignMem (relOffset+4, availGPRs, loc::fields) + end + else assign (args, offset+relOffset, availGPRs, availFPRs, + Args(List.rev fields) :: layout) + in + assignMem (0, availGPRs, []) + end + val (sz, argLocs) = assign (paramTys, 0, argGPRs, argFPRs, []) + in { + argLocs = argLocs, + argMem = {szb = IntInf.toInt sz, align = 4}, + resLoc = resLoc, + structRetLoc = structRet + } end + + datatype c_arg + = ARG of T.rexp + | FARG of T.fexp + | ARGS of c_arg list + + val memRg = T.Region.memory + val stkRg = T.Region.memory + + (* SP-based address of parameter at given offset *) + fun paramAddr off = + T.ADD(wordTy, spReg, T.LI(off + IntInf.fromInt paramAreaOffset)) + + fun genCall { + name, proto, paramAlloc, structRet, saveRestoreDedicated, + callComment, args + } = let + val {conv, retTy, paramTys} = proto + val {argLocs, argMem, resLoc, structRetLoc} = layout proto + (* inform the client of the size of the parameter area *) + val _ = if not(paramAlloc argMem) + then raise Fail "parameter memory allocation not implemented yet" + else () + (* generate code to assign the arguments to their locations *) + fun assignArgs ([], [], stms) = stms + | assignArgs (Reg(ty, r, _) :: locs, ARG exp :: args, stms) = + assignArgs (locs, args, T.MV(ty, r, exp) :: stms) + | assignArgs (Stk(ty, off) :: locs, ARG exp :: args, stms) = + assignArgs (locs, args, T.STORE(ty, paramAddr off, exp, stkRg) :: stms) + | assignArgs (FReg(ty, r, _) :: locs, FARG fexp :: args, stms) = + assignArgs (locs, args, T.FMV(ty, r, fexp) :: stms) + | assignArgs (FStk(ty, off) :: locs, FARG fexp :: args, stms) = + assignArgs (locs, args, T.FSTORE(ty, paramAddr off, fexp, stkRg) :: stms) + | assignArgs ((Args locs') :: locs, (ARGS args') :: args, stms) = + raise Fail "ARGS constructor is obsolete" + | assignArgs ((Args locs') :: locs, ARG exp :: args, stms) = let + (* MLRISC expression for address inside the source struct *) + fun addr 0 = T.LOAD(wordTy, exp, memRg) + | addr offset = T.LOAD(wordTy, T.ADD(wordTy, exp, T.LI offset), memRg) + fun copy ([], _, stms) = assignArgs(locs, args, stms) + | copy (Reg(ty, r, _) :: locs, offset, stms) = + copy (locs, offset+4, T.MV(ty, r, addr offset)::stms) + | copy (Stk(ty, off) :: locs, offset, stms) = let + val r = C.newReg() + in + copy (locs, offset+4, + T.STORE(ty, paramAddr off, T.REG(wordTy, r), stkRg) + :: T.MV(ty, r, addr offset) :: stms) + end + | copy _ = raise Fail "unexpected FReg/FStk/Args in location list" + in + (* copy data from memory specified by exp to locs' *) + copy (locs', 0, stms) + end + | assignArgs _ = error "argument/formal mismatch" + val argSetupCode = List.rev(assignArgs(argLocs, args, [])) + (* convert the result location to an MLRISC expression list *) + val result = (case resLoc + of NONE => [] + | SOME(Reg(ty, r, _)) => [T.GPR(T.REG(ty, r))] + | SOME(FReg(ty, r, _)) => [T.FPR(T.FREG(ty, r))] + | SOME(Args[Reg(ty1,r1,_),Reg(ty2,r2,_)]) => + [T.GPR(T.REG(ty1,r1)), T.GPR(T.REG(ty2,r2))] + | SOME _ => raise Fail "bogus result location" + (* end case *)) + (* make struct return-area setup (if necessary) *) + val setupStructRet = (case structRetLoc + of NONE => [] + | SOME loc => let + val structAddr = structRet loc + in + [T.MV(wordTy, resGPR, structAddr)] + end + (* end case *)) + (* determine the registers used and defined by this call *) + val (uses, defs) = let + val locs = (case resLoc + of NONE => argLocs + | SOME loc => loc::argLocs + (* end case *)) + (* get the list of registers used to pass arguments and results *) + fun addArgReg (Reg(ty, r, _)::locs, argRegs) = + addArgReg (locs, T.GPR(T.REG(ty, r))::argRegs) + | addArgReg (FReg(ty, r, _)::locs, argRegs) = + addArgReg (locs, T.FPR(T.FREG(ty, r))::argRegs) + | addArgReg ((Args locs')::locs, argRegs) = + addArgReg (locs, addArgReg(locs', argRegs)) + | addArgReg (_::locs, argRegs) = addArgReg(locs, argRegs) + | addArgReg ([], argRegs) = argRegs + val argRegs = addArgReg (locs, []) + in + (argRegs, linkReg :: callerSaveRegs) + end + (* the actual call instruction *) + val callStm = T.CALL { + funct = name, targets = [], + defs = defs, uses = uses, + region = memRg, pops = 0 + } + (* annotate, if necessary *) + val callStm = (case callComment + of NONE => callStm + | SOME c => T.ANNOTATION(callStm, #create MLRiscAnnotations.COMMENT c) + (* end case *)) + (* take care of dedicated client registers *) + val {save, restore} = saveRestoreDedicated defs + val callseq = List.concat [ + setupStructRet, + argSetupCode, + save, + [callStm], + restore + ] + in + (* check calling convention *) + case conv + of ("" | "ccall") => () + | _ => error (concat [ + "unknown calling convention \"", + String.toString conv, "\"" + ]) + (* end case *); + {callseq = callseq, result = result} + end + + end diff --git a/MLRISC/ppc/emit/ppcAsm.sml b/MLRISC/ppc/emit/ppcAsm.sml new file mode 100644 index 0000000..646f56e --- /dev/null +++ b/MLRISC/ppc/emit/ppcAsm.sml @@ -0,0 +1,575 @@ +(* + * WARNING: This file was automatically generated by MDLGen (v3.1) + * from the machine description file "ppc/ppc.mdl". + * DO NOT EDIT this file directly + *) + + +functor PPCAsmEmitter(structure S : INSTRUCTION_STREAM + structure Instr : PPCINSTR + where T = S.P.T + structure Shuffle : PPCSHUFFLE + where I = Instr + structure MLTreeEval : MLTREE_EVAL + where T = Instr.T + ) : INSTRUCTION_EMITTER = +struct + structure I = Instr + structure C = I.C + structure T = I.T + structure S = S + structure P = S.P + structure Constant = I.Constant + + open AsmFlags + + fun error msg = MLRiscErrorMsg.error("PPCAsmEmitter",msg) + + fun makeStream formatAnnotations = + let val stream = !AsmStream.asmOutStream + fun emit' s = TextIO.output(stream,s) + val newline = ref true + val tabs = ref 0 + fun tabbing 0 = () + | tabbing n = (emit' "\t"; tabbing(n-1)) + fun emit s = (tabbing(!tabs); tabs := 0; newline := false; emit' s) + fun nl() = (tabs := 0; if !newline then () else (newline := true; emit' "\n")) + fun comma() = emit "," + fun tab() = tabs := 1 + fun indent() = tabs := 2 + fun ms n = let val s = Int.toString n + in if n<0 then "-"^String.substring(s,1,size s-1) + else s + end + fun emit_label lab = emit(P.Client.AsmPseudoOps.lexpToString(T.LABEL lab)) + fun emit_labexp le = emit(P.Client.AsmPseudoOps.lexpToString (T.LABEXP le)) + fun emit_const c = emit(Constant.toString c) + fun emit_int i = emit(ms i) + fun paren f = (emit "("; f(); emit ")") + fun defineLabel lab = emit(P.Client.AsmPseudoOps.defineLabel lab^"\n") + fun entryLabel lab = defineLabel lab + fun comment msg = (tab(); emit("/* " ^ msg ^ " */"); nl()) + fun annotation a = comment(Annotations.toString a) + fun getAnnotations() = error "getAnnotations" + fun doNothing _ = () + fun fail _ = raise Fail "AsmEmitter" + fun emit_region mem = comment(I.Region.toString mem) + val emit_region = + if !show_region then emit_region else doNothing + fun pseudoOp pOp = (emit(P.toString pOp); emit "\n") + fun init size = (comment("Code Size = " ^ ms size); nl()) + val emitCellInfo = AsmFormatUtil.reginfo + (emit,formatAnnotations) + fun emitCell r = (emit(CellsBasis.toString r); emitCellInfo r) + fun emit_cellset(title,cellset) = + (nl(); comment(title^CellsBasis.CellSet.toString cellset)) + val emit_cellset = + if !show_cellset then emit_cellset else doNothing + fun emit_defs cellset = emit_cellset("defs: ",cellset) + fun emit_uses cellset = emit_cellset("uses: ",cellset) + val emit_cutsTo = + if !show_cutsTo then AsmFormatUtil.emit_cutsTo emit + else doNothing + fun emitter instr = + let + fun asm_spr (I.XER) = "xer" + | asm_spr (I.LR) = "lr" + | asm_spr (I.CTR) = "ctr" + and emit_spr x = emit (asm_spr x) + and emit_operand (I.RegOp GP) = emitCell GP + | emit_operand (I.ImmedOp int) = emit_int int + | emit_operand (I.LabelOp labexp) = emit_labexp labexp + and asm_load (I.LBZ) = "lbz" + | asm_load (I.LBZE) = "lbze" + | asm_load (I.LHZ) = "lhz" + | asm_load (I.LHZE) = "lhze" + | asm_load (I.LHA) = "lha" + | asm_load (I.LHAE) = "lhae" + | asm_load (I.LWZ) = "lwz" + | asm_load (I.LWZE) = "lwze" + | asm_load (I.LDE) = "lde" + | asm_load (I.LBZU) = "lbzu" + | asm_load (I.LHZU) = "lhzu" + | asm_load (I.LHAU) = "lhau" + | asm_load (I.LWZU) = "lwzu" + | asm_load (I.LDZU) = "ldzu" + and emit_load x = emit (asm_load x) + and asm_store (I.STB) = "stb" + | asm_store (I.STBE) = "stbe" + | asm_store (I.STH) = "sth" + | asm_store (I.STHE) = "sthe" + | asm_store (I.STW) = "stw" + | asm_store (I.STWE) = "stwe" + | asm_store (I.STDE) = "stde" + | asm_store (I.STBU) = "stbu" + | asm_store (I.STHU) = "sthu" + | asm_store (I.STWU) = "stwu" + | asm_store (I.STDU) = "stdu" + and emit_store x = emit (asm_store x) + and asm_fload (I.LFS) = "lfs" + | asm_fload (I.LFSE) = "lfse" + | asm_fload (I.LFD) = "lfd" + | asm_fload (I.LFDE) = "lfde" + | asm_fload (I.LFSU) = "lfsu" + | asm_fload (I.LFDU) = "lfdu" + and emit_fload x = emit (asm_fload x) + and asm_fstore (I.STFS) = "stfs" + | asm_fstore (I.STFSE) = "stfse" + | asm_fstore (I.STFD) = "stfd" + | asm_fstore (I.STFDE) = "stfde" + | asm_fstore (I.STFSU) = "stfsu" + | asm_fstore (I.STFDU) = "stfdu" + and emit_fstore x = emit (asm_fstore x) + and asm_cmp (I.CMP) = "cmp" + | asm_cmp (I.CMPL) = "cmpl" + and emit_cmp x = emit (asm_cmp x) + and asm_fcmp (I.FCMPO) = "fcmpo" + | asm_fcmp (I.FCMPU) = "fcmpu" + and emit_fcmp x = emit (asm_fcmp x) + and asm_unary (I.NEG) = "neg" + | asm_unary (I.EXTSB) = "extsb" + | asm_unary (I.EXTSH) = "extsh" + | asm_unary (I.EXTSW) = "extsw" + | asm_unary (I.CNTLZW) = "cntlzw" + | asm_unary (I.CNTLZD) = "cntlzd" + and emit_unary x = emit (asm_unary x) + and asm_funary (I.FMR) = "fmr" + | asm_funary (I.FNEG) = "fneg" + | asm_funary (I.FABS) = "fabs" + | asm_funary (I.FNABS) = "fnabs" + | asm_funary (I.FSQRT) = "fsqrt" + | asm_funary (I.FSQRTS) = "fsqrts" + | asm_funary (I.FRSP) = "frsp" + | asm_funary (I.FCTIW) = "fctiw" + | asm_funary (I.FCTIWZ) = "fctiwz" + | asm_funary (I.FCTID) = "fctid" + | asm_funary (I.FCTIDZ) = "fctidz" + | asm_funary (I.FCFID) = "fcfid" + and emit_funary x = emit (asm_funary x) + and asm_farith (I.FADD) = "fadd" + | asm_farith (I.FSUB) = "fsub" + | asm_farith (I.FMUL) = "fmul" + | asm_farith (I.FDIV) = "fdiv" + | asm_farith (I.FADDS) = "fadds" + | asm_farith (I.FSUBS) = "fsubs" + | asm_farith (I.FMULS) = "fmuls" + | asm_farith (I.FDIVS) = "fdivs" + and emit_farith x = emit (asm_farith x) + and asm_farith3 (I.FMADD) = "fmadd" + | asm_farith3 (I.FMADDS) = "fmadds" + | asm_farith3 (I.FMSUB) = "fmsub" + | asm_farith3 (I.FMSUBS) = "fmsubs" + | asm_farith3 (I.FNMADD) = "fnmadd" + | asm_farith3 (I.FNMADDS) = "fnmadds" + | asm_farith3 (I.FNMSUB) = "fnmsub" + | asm_farith3 (I.FNMSUBS) = "fnmsubs" + | asm_farith3 (I.FSEL) = "fsel" + and emit_farith3 x = emit (asm_farith3 x) + and asm_arith (I.ADD) = "add" + | asm_arith (I.SUBF) = "subf" + | asm_arith (I.MULLW) = "mullw" + | asm_arith (I.MULLD) = "mulld" + | asm_arith (I.MULHW) = "mulhw" + | asm_arith (I.MULHWU) = "mulhwu" + | asm_arith (I.DIVW) = "divw" + | asm_arith (I.DIVD) = "divd" + | asm_arith (I.DIVWU) = "divwu" + | asm_arith (I.DIVDU) = "divdu" + | asm_arith (I.AND) = "and" + | asm_arith (I.OR) = "or" + | asm_arith (I.XOR) = "xor" + | asm_arith (I.NAND) = "nand" + | asm_arith (I.NOR) = "nor" + | asm_arith (I.EQV) = "eqv" + | asm_arith (I.ANDC) = "andc" + | asm_arith (I.ORC) = "orc" + | asm_arith (I.SLW) = "slw" + | asm_arith (I.SLD) = "sld" + | asm_arith (I.SRW) = "srw" + | asm_arith (I.SRD) = "srd" + | asm_arith (I.SRAW) = "sraw" + | asm_arith (I.SRAD) = "srad" + and emit_arith x = emit (asm_arith x) + and asm_arithi (I.ADDI) = "addi" + | asm_arithi (I.ADDIS) = "addis" + | asm_arithi (I.SUBFIC) = "subfic" + | asm_arithi (I.MULLI) = "mulli" + | asm_arithi (I.ANDI_Rc) = "andi." + | asm_arithi (I.ANDIS_Rc) = "andis." + | asm_arithi (I.ORI) = "ori" + | asm_arithi (I.ORIS) = "oris" + | asm_arithi (I.XORI) = "xori" + | asm_arithi (I.XORIS) = "xoris" + | asm_arithi (I.SRAWI) = "srawi" + | asm_arithi (I.SRADI) = "sradi" + and emit_arithi x = emit (asm_arithi x) + and asm_rotate (I.RLWNM) = "rlwnm" + | asm_rotate (I.RLDCL) = "rldcl" + | asm_rotate (I.RLDCR) = "rldcr" + and emit_rotate x = emit (asm_rotate x) + and asm_rotatei (I.RLWINM) = "rlwinm" + | asm_rotatei (I.RLWIMI) = "rlwimi" + | asm_rotatei (I.RLDICL) = "rldicl" + | asm_rotatei (I.RLDICR) = "rldicr" + | asm_rotatei (I.RLDIC) = "rldic" + | asm_rotatei (I.RLDIMI) = "rldimi" + and emit_rotatei x = emit (asm_rotatei x) + and asm_ccarith (I.CRAND) = "crand" + | asm_ccarith (I.CROR) = "cror" + | asm_ccarith (I.CRXOR) = "crxor" + | asm_ccarith (I.CRNAND) = "crnand" + | asm_ccarith (I.CRNOR) = "crnor" + | asm_ccarith (I.CREQV) = "creqv" + | asm_ccarith (I.CRANDC) = "crandc" + | asm_ccarith (I.CRORC) = "crorc" + and emit_ccarith x = emit (asm_ccarith x) + and asm_bit (I.LT) = "lt" + | asm_bit (I.GT) = "gt" + | asm_bit (I.EQ) = "eq" + | asm_bit (I.SO) = "so" + | asm_bit (I.FL) = "lt" + | asm_bit (I.FG) = "gt" + | asm_bit (I.FE) = "eq" + | asm_bit (I.FU) = "un" + | asm_bit (I.FX) = "lt" + | asm_bit (I.FEX) = "gt" + | asm_bit (I.VX) = "eq" + | asm_bit (I.OX) = "so" + and emit_bit x = emit (asm_bit x) + +(*#line 608.7 "ppc/ppc.mdl"*) + fun emitx (s, I.RegOp _) = (if ((String.sub (s, (size s) - 1)) = #"e") + then + ( emit (String.substring (s, 0, (size s) - 1)); + emit "xe" ) + else + ( emit s; + emit "x" )) + | emitx (s, _) = emit s + +(*#line 614.7 "ppc/ppc.mdl"*) + fun eOERc {OE=false, Rc=false} = () + | eOERc {OE=false, Rc=true} = emit "." + | eOERc {OE=true, Rc=false} = emit "o" + | eOERc {OE=true, Rc=true} = emit "o." + +(*#line 618.7 "ppc/ppc.mdl"*) + fun eRc false = "" + | eRc true = "." + +(*#line 619.7 "ppc/ppc.mdl"*) + fun cr_bit (cr, bit) = (4 * (CellsBasis.physicalRegisterNum cr)) + + (case bit of + I.LT => 0 + | I.GT => 1 + | I.EQ => 2 + | I.SO => 3 + | I.FL => 0 + | I.FG => 1 + | I.FE => 2 + | I.FU => 3 + | I.FX => 0 + | I.FEX => 1 + | I.VX => 2 + | I.OX => 3 + ) + +(*#line 626.7 "ppc/ppc.mdl"*) + fun eCRbit x = emit (Int.toString (cr_bit x)) + +(*#line 627.7 "ppc/ppc.mdl"*) + fun eLK true = emit "l" + | eLK false = () + +(*#line 628.7 "ppc/ppc.mdl"*) + fun eI (I.RegOp _) = () + | eI _ = emit "i" + +(*#line 629.7 "ppc/ppc.mdl"*) + fun eBI (bo, bf, bit) = + (case (bo, CellsBasis.physicalRegisterNum bf) of + (I.ALWAYS, _) => () + | (I.COUNTER{cond=NONE, ...}, _) => () + | (_, 0) => emit (asm_bit bit) + | (_, n) => emit ((("4*cr" ^ (Int.toString n)) ^ "+") ^ (asm_bit bit)) + ) + +(*#line 635.7 "ppc/ppc.mdl"*) + fun emit_bo bo = emit + (case bo of + I.TRUE => "t" + | I.FALSE => "f" + | I.ALWAYS => "" + | I.COUNTER{eqZero, cond=NONE} => (if eqZero + then "dz" + else "dnz") + | I.COUNTER{eqZero, cond=SOME cc} => (if eqZero + then "dz" + else "dnz") ^ (if cc + then "t" + else "f") + ) + +(*#line 646.7 "ppc/ppc.mdl"*) + fun eME (SOME me) = + ( emit ", "; + emit_int me ) + | eME NONE = () + +(*#line 649.7 "ppc/ppc.mdl"*) + fun addr (ra, I.RegOp rb) = + ( emitCell ra; + emit ", "; + emitCell rb ) + | addr (ra, d) = + ( emit_operand d; + emit "("; + emitCell ra; + emit ")" ) + fun emitInstr' instr = + (case instr of + I.L{ld, rt, ra, d, mem} => + ( emitx (asm_load ld, d); + emit "\t"; + emitCell rt; + emit ", "; + addr (ra, d); + emit_region mem ) + | I.LF{ld, ft, ra, d, mem} => + ( emitx (asm_fload ld, d); + emit "\t"; + emitCell ft; + emit ", "; + addr (ra, d); + emit_region mem ) + | I.ST{st, rs, ra, d, mem} => + ( emitx (asm_store st, d); + emit "\t"; + emitCell rs; + emit ", "; + addr (ra, d); + emit_region mem ) + | I.STF{st, fs, ra, d, mem} => + ( emitx (asm_fstore st, d); + emit "\t"; + emitCell fs; + emit ", "; + addr (ra, d); + emit_region mem ) + | I.UNARY{oper, rt, ra, Rc, OE} => + ( emit_unary oper; + eOERc {Rc=Rc, OE=OE}; + emit "\t"; + emitCell rt; + emit ", "; + emitCell ra ) + | I.ARITH{oper, rt, ra, rb, Rc, OE} => + ( emit_arith oper; + eOERc {Rc=Rc, OE=OE}; + emit "\t"; + emitCell rt; + emit ", "; + emitCell ra; + emit ", "; + emitCell rb ) + | I.ARITHI{oper, rt, ra, im} => + ( emit_arithi oper; + emit "\t"; + emitCell rt; + emit ", "; + emitCell ra; + emit ", "; + emit_operand im ) + | I.ROTATE{oper, ra, rs, sh, mb, me} => + ( emit_rotate oper; + emit "\t"; + emitCell ra; + emit ", "; + emitCell rs; + emit ", "; + emitCell sh; + emit ", "; + emit_int mb; + eME me ) + | I.ROTATEI{oper, ra, rs, sh, mb, me} => + ( emit_rotatei oper; + emit "\t"; + emitCell ra; + emit ", "; + emitCell rs; + emit ", "; + emit_operand sh; + emit ", "; + emit_int mb; + eME me ) + | I.COMPARE{cmp, l, bf, ra, rb} => + ( emit_cmp cmp; + eI rb; + emit "\t"; + emitCell bf; + emit ", "; + emit (if l + then "1" + else "0"); + emit ", "; + emitCell ra; + emit ", "; + emit_operand rb ) + | I.FCOMPARE{cmp, bf, fa, fb} => + ( emit_fcmp cmp; + emit "\t"; + emitCell bf; + emit ", "; + emitCell fa; + emit ", "; + emitCell fb ) + | I.FUNARY{oper, ft, fb, Rc} => + ( emit_funary oper; + eRc Rc; + emit "\t"; + emitCell ft; + emit ", "; + emitCell fb ) + | I.FARITH{oper, ft, fa, fb, Rc} => + ( emit_farith oper; + eRc Rc; + emit "\t"; + emitCell ft; + emit ", "; + emitCell fa; + emit ", "; + emitCell fb ) + | I.FARITH3{oper, ft, fa, fb, fc, Rc} => + ( emit_farith3 oper; + eRc Rc; + emit "\t"; + emitCell ft; + emit ", "; + emitCell fa; + emit ", "; + emitCell fb; + emit ", "; + emitCell fc ) + | I.CCARITH{oper, bt, ba, bb} => + ( emit_ccarith oper; + emit "\t"; + eCRbit bt; + emit ", "; + eCRbit ba; + emit ", "; + eCRbit bb ) + | I.MCRF{bf, bfa} => + ( emit "mcrf\t"; + emitCell bf; + emit ", "; + emitCell bfa ) + | I.MTSPR{rs, spr} => + ( emit "mt"; + emitCell spr; + emit "\t"; + emitCell rs ) + | I.MFSPR{rt, spr} => + ( emit "mf"; + emitCell spr; + emit "\t"; + emitCell rt ) + | I.LWARX{rt, ra, rb} => + ( emit "lwarx\t"; + emitCell rt; + emit ", "; + emitCell ra; + emit ", "; + emitCell rb ) + | I.STWCX{rs, ra, rb} => + ( emit "stwcx.\t"; + emitCell rs; + emit ", "; + emitCell ra; + emit ", "; + emitCell rb ) + | I.TW{to, ra, si} => + ( emit "tw"; + eI si; + emit "\t"; + emit_int to; + emit ", "; + emitCell ra; + emit ", "; + emit_operand si ) + | I.TD{to, ra, si} => + ( emit "td"; + eI si; + emit "\t"; + emit_int to; + emit ", "; + emitCell ra; + emit ", "; + emit_operand si ) + | I.BC{bo, bf, bit, addr, LK, fall} => + ( emit "b"; + emit_bo bo; + eLK LK; + emit "\t"; + eBI (bo, bf, bit); + emit ", "; + emit_operand addr ) + | I.BCLR{bo, bf, bit, LK, labels} => + ( emit "b"; + emit_bo bo; + emit "lr"; + eLK LK; + emit "\t"; + eBI (bo, bf, bit)) + | I.B{addr, LK} => + ( emit "b"; + eLK LK; + emit "\t"; + emit_operand addr ) + | I.CALL{def, use, cutsTo, mem} => + ( emit "blrl"; + emit_region mem; + emit_defs def; + emit_uses use; + emit_cutsTo cutsTo ) + | I.SOURCE{} => emit "source" + | I.SINK{} => emit "sink" + | I.PHI{} => emit "phi" + ) + in tab(); emitInstr' instr; nl() + end (* emitter *) + and emitInstrIndented i = (indent(); emitInstr i; nl()) + and emitInstrs instrs = + app (if !indent_copies then emitInstrIndented + else emitInstr) instrs + + and emitInstr(I.ANNOTATION{i,a}) = + ( comment(Annotations.toString a); + nl(); + emitInstr i ) + | emitInstr(I.LIVE{regs, spilled}) = + comment("live= " ^ CellsBasis.CellSet.toString regs ^ + "spilled= " ^ CellsBasis.CellSet.toString spilled) + | emitInstr(I.KILL{regs, spilled}) = + comment("killed:: " ^ CellsBasis.CellSet.toString regs ^ + "spilled:: " ^ CellsBasis.CellSet.toString spilled) + | emitInstr(I.INSTR i) = emitter i + | emitInstr(I.COPY{k=CellsBasis.GP, sz, src, dst, tmp}) = + emitInstrs(Shuffle.shuffle{tmp=tmp, src=src, dst=dst}) + | emitInstr(I.COPY{k=CellsBasis.FP, sz, src, dst, tmp}) = + emitInstrs(Shuffle.shufflefp{tmp=tmp, src=src, dst=dst}) + | emitInstr _ = error "emitInstr" + + in S.STREAM{beginCluster=init, + pseudoOp=pseudoOp, + emit=emitInstr, + endCluster=fail, + defineLabel=defineLabel, + entryLabel=entryLabel, + comment=comment, + exitBlock=doNothing, + annotation=annotation, + getAnnotations=getAnnotations + } + end +end + diff --git a/MLRISC/ppc/emit/ppcAsmSyntax.sml b/MLRISC/ppc/emit/ppcAsmSyntax.sml new file mode 100644 index 0000000..883e31c --- /dev/null +++ b/MLRISC/ppc/emit/ppcAsmSyntax.sml @@ -0,0 +1,6 @@ +structure PPCAsmSyntax = struct + + val ibm_syntax = MLRiscControl.mkFlag ("ppc-ibm-syntax", + "whether IBM syntax should be used in the PPC assembler") + +end diff --git a/MLRISC/ppc/emit/ppcMC.sml b/MLRISC/ppc/emit/ppcMC.sml new file mode 100644 index 0000000..a2524c9 --- /dev/null +++ b/MLRISC/ppc/emit/ppcMC.sml @@ -0,0 +1,777 @@ +(* + * WARNING: This file was automatically generated by MDLGen (v3.1) + * from the machine description file "ppc/ppc.mdl". + * DO NOT EDIT this file directly + *) + + +functor PPCMCEmitter(structure Instr : PPCINSTR + structure MLTreeEval : MLTREE_EVAL where T = Instr.T + structure Stream : INSTRUCTION_STREAM + structure CodeString : CODE_STRING + ) : INSTRUCTION_EMITTER = +struct + structure I = Instr + structure C = I.C + structure Constant = I.Constant + structure T = I.T + structure S = Stream + structure P = S.P + structure W = Word32 + + (* PPC is big endian *) + + fun error msg = MLRiscErrorMsg.error("PPCMC",msg) + fun makeStream _ = + let infix && || << >> ~>> + val op << = W.<< + val op >> = W.>> + val op ~>> = W.~>> + val op || = W.orb + val op && = W.andb + val itow = W.fromInt + fun emit_bool false = 0w0 : W.word + | emit_bool true = 0w1 : W.word + val emit_int = itow + fun emit_word w = w + fun emit_label l = itow(Label.addrOf l) + fun emit_labexp le = itow(MLTreeEval.valueOf le) + fun emit_const c = itow(Constant.valueOf c) + val w32ToByte = Word8.fromLarge o Word32.toLarge + val loc = ref 0 + + (* emit a byte *) + fun eByte b = + let val i = !loc in loc := i + 1; CodeString.update(i,b) end + + (* emit the low order byte of a word *) + (* note: fromLargeWord strips the high order bits! *) + fun eByteW w = + let val i = !loc + in loc := i + 1; CodeString.update(i, w32ToByte w) end + + fun doNothing _ = () + fun fail _ = raise Fail "MCEmitter" + fun getAnnotations () = error "getAnnotations" + + fun pseudoOp pOp = P.emitValue{pOp=pOp, loc= !loc,emit=eByte} + + fun init n = (CodeString.init n; loc := 0) + + + fun eWord32 w = + let val b8 = w + val w = w >> 0wx8 + val b16 = w + val w = w >> 0wx8 + val b24 = w + val w = w >> 0wx8 + val b32 = w + in + ( eByteW b32; + eByteW b24; + eByteW b16; + eByteW b8 ) + end + fun emit_GP r = itow (CellsBasis.physicalRegisterNum r) + and emit_FP r = itow (CellsBasis.physicalRegisterNum r) + and emit_CC r = itow (CellsBasis.physicalRegisterNum r) + and emit_SPR r = itow (CellsBasis.physicalRegisterNum r) + and emit_MEM r = itow (CellsBasis.physicalRegisterNum r) + and emit_CTRL r = itow (CellsBasis.physicalRegisterNum r) + and emit_CELLSET r = itow (CellsBasis.physicalRegisterNum r) + fun emit_operand (I.RegOp GP) = emit_GP GP + | emit_operand (I.ImmedOp int) = itow int + | emit_operand (I.LabelOp labexp) = itow (MLTreeEval.valueOf labexp) + and emit_fcmp (I.FCMPO) = (0wx20 : Word32.word) + | emit_fcmp (I.FCMPU) = (0wx0 : Word32.word) + and emit_unary (I.NEG) = (0wx68 : Word32.word) + | emit_unary (I.EXTSB) = (0wx3BA : Word32.word) + | emit_unary (I.EXTSH) = (0wx39A : Word32.word) + | emit_unary (I.EXTSW) = (0wx3DA : Word32.word) + | emit_unary (I.CNTLZW) = (0wx1A : Word32.word) + | emit_unary (I.CNTLZD) = (0wx3A : Word32.word) + and emit_funary (I.FMR) = (0wx3F, 0wx48) + | emit_funary (I.FNEG) = (0wx3F, 0wx28) + | emit_funary (I.FABS) = (0wx3F, 0wx108) + | emit_funary (I.FNABS) = (0wx3F, 0wx88) + | emit_funary (I.FSQRT) = (0wx3F, 0wx16) + | emit_funary (I.FSQRTS) = (0wx3B, 0wx16) + | emit_funary (I.FRSP) = (0wx3F, 0wxC) + | emit_funary (I.FCTIW) = (0wx3F, 0wxE) + | emit_funary (I.FCTIWZ) = (0wx3F, 0wxF) + | emit_funary (I.FCTID) = (0wx3F, 0wx32E) + | emit_funary (I.FCTIDZ) = (0wx3F, 0wx32F) + | emit_funary (I.FCFID) = (0wx3F, 0wx34E) + and emit_farith (I.FADD) = (0wx3F, 0wx15) + | emit_farith (I.FSUB) = (0wx3F, 0wx14) + | emit_farith (I.FMUL) = (0wx3F, 0wx19) + | emit_farith (I.FDIV) = (0wx3F, 0wx12) + | emit_farith (I.FADDS) = (0wx3B, 0wx15) + | emit_farith (I.FSUBS) = (0wx3B, 0wx14) + | emit_farith (I.FMULS) = (0wx3B, 0wx19) + | emit_farith (I.FDIVS) = (0wx3B, 0wx12) + and emit_farith3 (I.FMADD) = (0wx3F, 0wx1D) + | emit_farith3 (I.FMADDS) = (0wx3B, 0wx1D) + | emit_farith3 (I.FMSUB) = (0wx3F, 0wx1C) + | emit_farith3 (I.FMSUBS) = (0wx3B, 0wx1C) + | emit_farith3 (I.FNMADD) = (0wx3F, 0wx1F) + | emit_farith3 (I.FNMADDS) = (0wx3B, 0wx1F) + | emit_farith3 (I.FNMSUB) = (0wx3F, 0wx1E) + | emit_farith3 (I.FNMSUBS) = (0wx3B, 0wx1E) + | emit_farith3 (I.FSEL) = (0wx3F, 0wx17) + and emit_bo (I.TRUE) = (0wxC : Word32.word) + | emit_bo (I.FALSE) = (0wx4 : Word32.word) + | emit_bo (I.ALWAYS) = (0wx14 : Word32.word) + | emit_bo (I.COUNTER{eqZero, cond}) = + (case cond of + NONE => (if eqZero + then 0wx12 + else 0wx10) + | SOME cc => + (case (eqZero, cc) of + (false, false) => 0wx0 + | (false, true) => 0wx8 + | (true, false) => 0wx2 + | (true, true) => 0wxA + ) + ) + and emit_arith (I.ADD) = (0wx10A : Word32.word) + | emit_arith (I.SUBF) = (0wx28 : Word32.word) + | emit_arith (I.MULLW) = (0wxEB : Word32.word) + | emit_arith (I.MULLD) = (0wxE9 : Word32.word) + | emit_arith (I.MULHW) = (0wx4B : Word32.word) + | emit_arith (I.MULHWU) = (0wxB : Word32.word) + | emit_arith (I.DIVW) = (0wx1EB : Word32.word) + | emit_arith (I.DIVD) = (0wx1E9 : Word32.word) + | emit_arith (I.DIVWU) = (0wx1CB : Word32.word) + | emit_arith (I.DIVDU) = (0wx1C9 : Word32.word) + | emit_arith (I.AND) = (0wx1C : Word32.word) + | emit_arith (I.OR) = (0wx1BC : Word32.word) + | emit_arith (I.XOR) = (0wx13C : Word32.word) + | emit_arith (I.NAND) = (0wx1DC : Word32.word) + | emit_arith (I.NOR) = (0wx7C : Word32.word) + | emit_arith (I.EQV) = (0wx11C : Word32.word) + | emit_arith (I.ANDC) = (0wx3C : Word32.word) + | emit_arith (I.ORC) = (0wx19C : Word32.word) + | emit_arith (I.SLW) = (0wx18 : Word32.word) + | emit_arith (I.SLD) = (0wx1B : Word32.word) + | emit_arith (I.SRW) = (0wx218 : Word32.word) + | emit_arith (I.SRD) = (0wx21B : Word32.word) + | emit_arith (I.SRAW) = (0wx318 : Word32.word) + | emit_arith (I.SRAD) = (0wx31A : Word32.word) + and emit_arithi (I.ADDI) = (0wxE : Word32.word) + | emit_arithi (I.ADDIS) = (0wxF : Word32.word) + | emit_arithi (I.SUBFIC) = (0wx8 : Word32.word) + | emit_arithi (I.MULLI) = (0wx7 : Word32.word) + | emit_arithi (I.ANDI_Rc) = (0wx1C : Word32.word) + | emit_arithi (I.ANDIS_Rc) = (0wx1D : Word32.word) + | emit_arithi (I.ORI) = (0wx18 : Word32.word) + | emit_arithi (I.ORIS) = (0wx19 : Word32.word) + | emit_arithi (I.XORI) = (0wx1A : Word32.word) + | emit_arithi (I.XORIS) = (0wx1B : Word32.word) + | emit_arithi (I.SRAWI) = error "SRAWI" + | emit_arithi (I.SRADI) = error "SRADI" + and emit_ccarith (I.CRAND) = (0wx101 : Word32.word) + | emit_ccarith (I.CROR) = (0wx1C1 : Word32.word) + | emit_ccarith (I.CRXOR) = (0wxC1 : Word32.word) + | emit_ccarith (I.CRNAND) = (0wxE1 : Word32.word) + | emit_ccarith (I.CRNOR) = (0wx21 : Word32.word) + | emit_ccarith (I.CREQV) = (0wx121 : Word32.word) + | emit_ccarith (I.CRANDC) = (0wx81 : Word32.word) + | emit_ccarith (I.CRORC) = (0wx1A1 : Word32.word) + fun x_form {opcd, rt, ra, rb, xo, rc} = + let val rc = emit_bool rc + in eWord32 ((opcd << 0wx1A) + ((rt << 0wx15) + ((ra << 0wx10) + ((rb << 0wxB) + ((xo << 0wx1) + rc))))) + end + and xl_form {opcd, bt, ba, bb, xo, lk} = + let val lk = emit_bool lk + in eWord32 ((opcd << 0wx1A) + ((bt << 0wx15) + ((ba << 0wx10) + ((bb << 0wxB) + ((xo << 0wx1) + lk))))) + end + and m_form {opcd, rs, ra, rb, mb, me, rc} = + let val rc = emit_bool rc + in eWord32 ((opcd << 0wx1A) + ((rs << 0wx15) + ((ra << 0wx10) + ((rb << 0wxB) + ((mb << 0wx6) + ((me << 0wx1) + rc)))))) + end + and a_form {opcd, frt, fra, frb, frc, xo, rc} = + let val rc = emit_bool rc + in eWord32 ((opcd << 0wx1A) + ((frt << 0wx15) + ((fra << 0wx10) + ((frb << 0wxB) + ((frc << 0wx6) + ((xo << 0wx1) + rc)))))) + end + and loadx {rt, ra, rb, xop} = + let val rt = emit_GP rt + val ra = emit_GP ra + val rb = emit_GP rb + in eWord32 ((rt << 0wx15) + ((ra << 0wx10) + ((rb << 0wxB) + ((xop << 0wx1) + 0wx7C000000)))) + end + and loadd {opcd, rt, ra, d} = + let val rt = emit_GP rt + val ra = emit_GP ra + val d = emit_operand d + in eWord32 ((opcd << 0wx1A) + ((rt << 0wx15) + ((ra << 0wx10) + (d && 0wxFFFF)))) + end + and loadde {opcd, rt, ra, de, xop} = + let val rt = emit_GP rt + val ra = emit_GP ra + val de = emit_operand de + in eWord32 ((opcd << 0wx1A) + ((rt << 0wx15) + ((ra << 0wx10) + (((de && 0wxFFF) << 0wx4) + xop)))) + end + and load {ld, rt, ra, d} = + (case (d, ld) of + (I.RegOp rb, I.LBZ) => loadx {rt=rt, ra=ra, rb=rb, xop=0wx57} + | (I.RegOp rb, I.LBZE) => loadx {rt=rt, ra=ra, rb=rb, xop=0wx5F} + | (I.RegOp rb, I.LHZ) => loadx {rt=rt, ra=ra, rb=rb, xop=0wx117} + | (I.RegOp rb, I.LHZE) => loadx {rt=rt, ra=ra, rb=rb, xop=0wx11F} + | (I.RegOp rb, I.LHA) => loadx {rt=rt, ra=ra, rb=rb, xop=0wx157} + | (I.RegOp rb, I.LHAE) => loadx {rt=rt, ra=ra, rb=rb, xop=0wx15F} + | (I.RegOp rb, I.LWZ) => loadx {rt=rt, ra=ra, rb=rb, xop=0wx17} + | (I.RegOp rb, I.LWZE) => loadx {rt=rt, ra=ra, rb=rb, xop=0wx1F} + | (I.RegOp rb, I.LDE) => loadx {rt=rt, ra=ra, rb=rb, xop=0wx31F} + | (d, I.LBZ) => loadd {opcd=0wx22, rt=rt, ra=ra, d=d} + | (de, I.LBZE) => loadde {opcd=0wx3A, rt=rt, ra=ra, de=de, xop=0wx0} + | (d, I.LHZ) => loadd {opcd=0wx28, rt=rt, ra=ra, d=d} + | (de, I.LHZE) => loadde {opcd=0wx3A, rt=rt, ra=ra, de=de, xop=0wx2} + | (d, I.LHA) => loadd {opcd=0wx2A, rt=rt, ra=ra, d=d} + | (de, I.LHAE) => loadde {opcd=0wx3A, rt=rt, ra=ra, de=de, xop=0wx4} + | (d, I.LWZ) => loadd {opcd=0wx20, rt=rt, ra=ra, d=d} + | (de, I.LWZE) => loadde {opcd=0wx3A, rt=rt, ra=ra, de=de, xop=0wx6} + | (de, I.LDE) => loadde {opcd=0wx3E, rt=rt, ra=ra, de=de, xop=0wx0} + | (I.RegOp rb, I.LHAU) => loadx {rt=rt, ra=ra, rb=rb, xop=0wx177} + | (I.RegOp rb, I.LHZU) => loadx {rt=rt, ra=ra, rb=rb, xop=0wx137} + | (I.RegOp rb, I.LWZU) => loadx {rt=rt, ra=ra, rb=rb, xop=0wx37} + | (d, I.LHZU) => loadd {opcd=0wx29, rt=rt, ra=ra, d=d} + | (d, I.LWZU) => loadd {opcd=0wx21, rt=rt, ra=ra, d=d} + ) + and floadx {ft, ra, rb, xop} = + let val ft = emit_FP ft + val ra = emit_GP ra + val rb = emit_GP rb + in eWord32 ((ft << 0wx15) + ((ra << 0wx10) + ((rb << 0wxB) + ((xop << 0wx1) + 0wx7C000000)))) + end + and floadd {opcd, ft, ra, d} = + let val ft = emit_FP ft + val ra = emit_GP ra + val d = emit_operand d + in eWord32 ((opcd << 0wx1A) + ((ft << 0wx15) + ((ra << 0wx10) + (d && 0wxFFFF)))) + end + and floadde {opcd, ft, ra, de, xop} = + let val ft = emit_FP ft + val ra = emit_GP ra + val de = emit_operand de + in eWord32 ((opcd << 0wx1A) + ((ft << 0wx15) + ((ra << 0wx10) + (((de && 0wxFFF) << 0wx4) + xop)))) + end + and fload {ld, ft, ra, d} = + (case (d, ld) of + (I.RegOp rb, I.LFS) => floadx {ft=ft, ra=ra, rb=rb, xop=0wx217} + | (I.RegOp rb, I.LFSE) => floadx {ft=ft, ra=ra, rb=rb, xop=0wx21F} + | (I.RegOp rb, I.LFD) => floadx {ft=ft, ra=ra, rb=rb, xop=0wx257} + | (I.RegOp rb, I.LFDE) => floadx {ft=ft, ra=ra, rb=rb, xop=0wx25F} + | (I.RegOp rb, I.LFDU) => floadx {ft=ft, ra=ra, rb=rb, xop=0wx277} + | (d, I.LFS) => floadd {ft=ft, ra=ra, d=d, opcd=0wx30} + | (de, I.LFSE) => floadde {ft=ft, ra=ra, de=de, opcd=0wx3E, xop=0wx4} + | (d, I.LFD) => floadd {ft=ft, ra=ra, d=d, opcd=0wx32} + | (de, I.LFDE) => floadde {ft=ft, ra=ra, de=de, opcd=0wx3E, xop=0wx6} + | (d, I.LFDU) => floadd {ft=ft, ra=ra, d=d, opcd=0wx33} + ) + and storex {rs, ra, rb, xop} = + let val rs = emit_GP rs + val ra = emit_GP ra + val rb = emit_GP rb + in eWord32 ((rs << 0wx15) + ((ra << 0wx10) + ((rb << 0wxB) + ((xop << 0wx1) + 0wx7C000000)))) + end + and stored {opcd, rs, ra, d} = + let val rs = emit_GP rs + val ra = emit_GP ra + val d = emit_operand d + in eWord32 ((opcd << 0wx1A) + ((rs << 0wx15) + ((ra << 0wx10) + (d && 0wxFFFF)))) + end + and storede {opcd, rs, ra, de, xop} = + let val rs = emit_GP rs + val ra = emit_GP ra + val de = emit_operand de + in eWord32 ((opcd << 0wx1A) + ((rs << 0wx15) + ((ra << 0wx10) + (((de && 0wxFFF) << 0wx4) + xop)))) + end + and store {st, rs, ra, d} = + (case (d, st) of + (I.RegOp rb, I.STB) => storex {rs=rs, ra=ra, rb=rb, xop=0wxD7} + | (I.RegOp rb, I.STBE) => storex {rs=rs, ra=ra, rb=rb, xop=0wxDF} + | (I.RegOp rb, I.STH) => storex {rs=rs, ra=ra, rb=rb, xop=0wx197} + | (I.RegOp rb, I.STHE) => storex {rs=rs, ra=ra, rb=rb, xop=0wx19F} + | (I.RegOp rb, I.STW) => storex {rs=rs, ra=ra, rb=rb, xop=0wx97} + | (I.RegOp rb, I.STWE) => storex {rs=rs, ra=ra, rb=rb, xop=0wx9F} + | (I.RegOp rb, I.STDE) => storex {rs=rs, ra=ra, rb=rb, xop=0wx39F} + | (d, I.STB) => stored {rs=rs, ra=ra, d=d, opcd=0wx26} + | (de, I.STBE) => storede {rs=rs, ra=ra, de=de, opcd=0wx3A, xop=0wx8} + | (d, I.STH) => stored {rs=rs, ra=ra, d=d, opcd=0wx2C} + | (de, I.STHE) => storede {rs=rs, ra=ra, de=de, opcd=0wx3A, xop=0wxA} + | (d, I.STW) => stored {rs=rs, ra=ra, d=d, opcd=0wx24} + | (de, I.STWE) => storede {rs=rs, ra=ra, de=de, opcd=0wx3A, xop=0wxE} + | (de, I.STDE) => storede {rs=rs, ra=ra, de=de, opcd=0wx3E, xop=0wx8} + ) + and fstorex {fs, ra, rb, xop} = + let val fs = emit_FP fs + val ra = emit_GP ra + val rb = emit_GP rb + in eWord32 ((fs << 0wx15) + ((ra << 0wx10) + ((rb << 0wxB) + ((xop << 0wx1) + 0wx7C000000)))) + end + and fstored {opcd, fs, ra, d} = + let val fs = emit_FP fs + val ra = emit_GP ra + val d = emit_operand d + in eWord32 ((opcd << 0wx1A) + ((fs << 0wx15) + ((ra << 0wx10) + (d && 0wxFFFF)))) + end + and fstorede {opcd, fs, ra, de, xop} = + let val fs = emit_FP fs + val ra = emit_GP ra + val de = emit_operand de + in eWord32 ((opcd << 0wx1A) + ((fs << 0wx15) + ((ra << 0wx10) + (((de && 0wxFFF) << 0wx4) + xop)))) + end + and fstore {st, fs, ra, d} = + (case (d, st) of + (I.RegOp rb, I.STFS) => fstorex {fs=fs, ra=ra, rb=rb, xop=0wx297} + | (I.RegOp rb, I.STFSE) => fstorex {fs=fs, ra=ra, rb=rb, xop=0wx29F} + | (I.RegOp rb, I.STFD) => fstorex {fs=fs, ra=ra, rb=rb, xop=0wx2D7} + | (I.RegOp rb, I.STFDE) => fstorex {fs=fs, ra=ra, rb=rb, xop=0wx2F7} + | (d, I.STFS) => fstored {fs=fs, ra=ra, d=d, opcd=0wx34} + | (de, I.STFSE) => fstorede {fs=fs, ra=ra, de=de, opcd=0wx3E, xop=0wxC} + | (d, I.STFD) => fstored {fs=fs, ra=ra, d=d, opcd=0wx36} + | (de, I.STFDE) => fstorede {fs=fs, ra=ra, de=de, opcd=0wx3E, xop=0wxE} + ) + and unary' {ra, rt, OE, oper, Rc} = + let val ra = emit_GP ra + val rt = emit_GP rt + val OE = emit_bool OE + val oper = emit_unary oper + val Rc = emit_bool Rc + in eWord32 ((ra << 0wx15) + ((rt << 0wx10) + ((OE << 0wxA) + ((oper << 0wx1) + (Rc + 0wx7C000000))))) + end + and unary {ra, rt, oper, OE, Rc} = + (case oper of + I.NEG => unary' {ra=rt, rt=ra, oper=oper, OE=OE, Rc=Rc} + | _ => unary' {ra=ra, rt=rt, oper=oper, OE=OE, Rc=Rc} + ) + and arith' {rt, ra, rb, OE, oper, Rc} = + let val rt = emit_GP rt + val ra = emit_GP ra + val rb = emit_GP rb + val OE = emit_bool OE + val oper = emit_arith oper + val Rc = emit_bool Rc + in eWord32 ((rt << 0wx15) + ((ra << 0wx10) + ((rb << 0wxB) + ((OE << 0wxA) + ((oper << 0wx1) + (Rc + 0wx7C000000)))))) + end + and arithi' {oper, rt, ra, im} = + let val oper = emit_arithi oper + val rt = emit_GP rt + val ra = emit_GP ra + val im = emit_operand im + in eWord32 ((oper << 0wx1A) + ((rt << 0wx15) + ((ra << 0wx10) + (im && 0wxFFFF)))) + end + and srawi {rs, ra, sh} = + let val rs = emit_GP rs + val ra = emit_GP ra + val sh = emit_operand sh + in eWord32 ((rs << 0wx15) + ((ra << 0wx10) + (((sh && 0wx1F) << 0wxB) + 0wx7C000670))) + end + and sradi' {rs, ra, sh, sh2} = + let val rs = emit_GP rs + val ra = emit_GP ra + in eWord32 ((rs << 0wx15) + ((ra << 0wx10) + ((sh << 0wxB) + ((sh2 << 0wx1) + 0wx7C000674)))) + end + and sradi {rs, ra, sh} = + let val sh = emit_operand sh + in sradi' {rs=rs, ra=ra, sh=(sh && 0wx1F), sh2=((sh << 0wx5) && 0wx1)} + end + and arith {oper, rt, ra, rb, OE, Rc} = + (case oper of + (I.ADD | I.SUBF | I.MULLW | I.MULLD | I.MULHW | I.MULHWU | I.DIVW | I.DIVD | I.DIVWU | I.DIVDU) => + arith' {oper=oper, rt=rt, ra=ra, rb=rb, OE=OE, Rc=Rc} + | _ => arith' {oper=oper, rt=ra, ra=rt, rb=rb, OE=OE, Rc=Rc} + ) + and arithi {oper, rt, ra, im} = + (case oper of + (I.ADDI | I.ADDIS | I.SUBFIC | I.MULLI) => arithi' {oper=oper, rt=rt, + ra=ra, im=im} + | I.SRAWI => srawi {rs=ra, ra=rt, sh=im} + | I.SRADI => sradi {rs=ra, ra=rt, sh=im} + | _ => arithi' {oper=oper, rt=ra, ra=rt, im=im} + ) + and Cmpl {bf, l, ra, rb} = + let val bf = emit_CC bf + val l = emit_bool l + val ra = emit_GP ra + val rb = emit_GP rb + in eWord32 ((bf << 0wx17) + ((l << 0wx15) + ((ra << 0wx10) + ((rb << 0wxB) + 0wx7C000040)))) + end + and Cmpli {bf, l, ra, ui} = + let val bf = emit_CC bf + val l = emit_bool l + val ra = emit_GP ra + val ui = emit_operand ui + in eWord32 ((bf << 0wx17) + ((l << 0wx15) + ((ra << 0wx10) + ((ui && 0wxFFFF) + 0wx28000000)))) + end + and Cmp {bf, l, ra, rb} = + let val bf = emit_CC bf + val l = emit_bool l + val ra = emit_GP ra + val rb = emit_GP rb + in eWord32 ((bf << 0wx17) + ((l << 0wx15) + ((ra << 0wx10) + ((rb << 0wxB) + 0wx7C000000)))) + end + and Cmpi {bf, l, ra, si} = + let val bf = emit_CC bf + val l = emit_bool l + val ra = emit_GP ra + val si = emit_operand si + in eWord32 ((bf << 0wx17) + ((l << 0wx15) + ((ra << 0wx10) + ((si && 0wxFFFF) + 0wx2C000000)))) + end + and compare {cmp, bf, l, ra, rb} = + (case (cmp, rb) of + (I.CMP, I.RegOp rb) => Cmp {bf=bf, l=l, ra=ra, rb=rb} + | (I.CMPL, I.RegOp rb) => Cmpl {bf=bf, l=l, ra=ra, rb=rb} + | (I.CMP, si) => Cmpi {bf=bf, l=l, ra=ra, si=si} + | (I.CMPL, ui) => Cmpli {bf=bf, l=l, ra=ra, ui=ui} + ) + and fcmp {bf, fa, fb, cmp} = + let val bf = emit_CC bf + val fa = emit_FP fa + val fb = emit_FP fb + val cmp = emit_fcmp cmp + in eWord32 ((bf << 0wx17) + ((fa << 0wx10) + ((fb << 0wxB) + ((cmp << 0wx1) + 0wxFC000000)))) + end + and funary {oper, ft, fb, Rc} = + let val oper = emit_funary oper + val ft = emit_FP ft + val fb = emit_FP fb + in + let +(*#line 455.12 "ppc/ppc.mdl"*) + val (opcd, xo) = oper + in + (case oper of + (0wx3F, 0wx16) => a_form {opcd=opcd, frt=ft, fra=0wx0, frb=fb, + frc=0wx0, xo=xo, rc=Rc} + | (0wx3B, 0wx16) => a_form {opcd=opcd, frt=ft, fra=0wx0, frb=fb, + frc=0wx0, xo=xo, rc=Rc} + | _ => x_form {opcd=opcd, rt=ft, ra=0wx0, rb=fb, xo=xo, rc=Rc} + ) + end + end + and farith {oper, ft, fa, fb, Rc} = + let val ft = emit_FP ft + val fa = emit_FP fa + val fb = emit_FP fb + in + let +(*#line 468.12 "ppc/ppc.mdl"*) + val (opcd, xo) = emit_farith oper + in + (case oper of + (I.FMUL | I.FMULS) => a_form {opcd=opcd, frt=ft, fra=fa, frb=0wx0, + frc=fb, xo=xo, rc=Rc} + | _ => a_form {opcd=opcd, frt=ft, fra=fa, frb=fb, frc=0wx0, xo=xo, + rc=Rc} + ) + end + end + and farith3 {oper, ft, fa, fc, fb, Rc} = + let val oper = emit_farith3 oper + val ft = emit_FP ft + val fa = emit_FP fa + val fc = emit_FP fc + val fb = emit_FP fb + in + let +(*#line 477.12 "ppc/ppc.mdl"*) + val (opcd, xo) = oper + in a_form {opcd=opcd, frt=ft, fra=fa, frb=fb, frc=fc, xo=xo, rc=Rc} + end + end + and cr_bit {cc} = + let +(*#line 482.12 "ppc/ppc.mdl"*) + val (cr, bit) = cc + in ((emit_CC cr) << 0wx2) + (itow + (case bit of + I.LT => 0 + | I.GT => 1 + | I.EQ => 2 + | I.SO => 3 + | I.FL => 0 + | I.FG => 1 + | I.FE => 2 + | I.FU => 3 + | I.FX => 0 + | I.FEX => 1 + | I.VX => 2 + | I.OX => 3 + )) + end + and ccarith {oper, bt, ba, bb} = + let val oper = emit_ccarith oper + in xl_form {opcd=0wx13, bt=cr_bit {cc=bt}, ba=cr_bit {cc=ba}, bb=cr_bit {cc=bb}, + xo=oper, lk=false} + end + and twr {to, ra, rb} = + let val to = emit_int to + val ra = emit_GP ra + val rb = emit_GP rb + in eWord32 ((to << 0wx15) + ((ra << 0wx10) + ((rb << 0wxB) + 0wx7C000008))) + end + and twi {to, ra, si} = + let val to = emit_int to + val ra = emit_GP ra + val si = emit_operand si + in eWord32 ((to << 0wx15) + ((ra << 0wx10) + ((si && 0wxFFFF) + 0wxC000000))) + end + and tw {to, ra, si} = + (case si of + I.RegOp rb => twr {to=to, ra=ra, rb=rb} + | _ => twi {to=to, ra=ra, si=si} + ) + and tdr {to, ra, rb} = + let val to = emit_int to + val ra = emit_GP ra + val rb = emit_GP rb + in eWord32 ((to << 0wx15) + ((ra << 0wx10) + ((rb << 0wxB) + 0wx7C000088))) + end + and tdi {to, ra, si} = + let val to = emit_int to + val ra = emit_GP ra + val si = emit_operand si + in eWord32 ((to << 0wx15) + ((ra << 0wx10) + ((si && 0wxFFFF) + 0wx8000000))) + end + and td {to, ra, si} = + (case si of + I.RegOp rb => tdr {to=to, ra=ra, rb=rb} + | _ => tdi {to=to, ra=ra, si=si} + ) + and mcrf {bf, bfa} = + let val bf = emit_CC bf + val bfa = emit_CC bfa + in eWord32 ((bf << 0wx17) + ((bfa << 0wx12) + 0wx4C000000)) + end + and mtspr' {rs, spr} = + let val rs = emit_GP rs + in eWord32 ((rs << 0wx15) + ((spr << 0wxB) + 0wx7C0003A6)) + end + and mtspr {rs, spr} = + let val spr = emit_SPR spr + in mtspr' {rs=rs, spr=((spr && 0wx1F) << 0wx5) + ((spr << 0wx5) && 0wx1F)} + end + and mfspr' {rt, spr} = + let val rt = emit_GP rt + in eWord32 ((rt << 0wx15) + ((spr << 0wxB) + 0wx7C0002A6)) + end + and mfspr {rt, spr} = + let val spr = emit_SPR spr + in mfspr' {rt=rt, spr=((spr && 0wx1F) << 0wx5) + ((spr << 0wx5) && 0wx1F)} + end + and b {li, aa, lk} = + let val aa = emit_bool aa + val lk = emit_bool lk + in eWord32 (((li && 0wxFFFFFF) << 0wx2) + ((aa << 0wx1) + (lk + 0wx48000000))) + end + and be {li, aa, lk} = + let val aa = emit_bool aa + val lk = emit_bool lk + in eWord32 (((li && 0wxFFFFFF) << 0wx2) + ((aa << 0wx1) + (lk + 0wx58000000))) + end + and bc {bo, bi, bd, aa, lk} = + let val bo = emit_bo bo + val aa = emit_bool aa + val lk = emit_bool lk + in eWord32 ((bo << 0wx15) + ((bi << 0wx10) + (((bd && 0wx3FFF) << 0wx2) + ((aa << 0wx1) + (lk + 0wx40000000))))) + end + and bce {bo, bi, bd, aa, lk} = + let val bo = emit_bo bo + val aa = emit_bool aa + val lk = emit_bool lk + in eWord32 ((bo << 0wx15) + ((bi << 0wx10) + (((bd && 0wx3FFF) << 0wx2) + ((aa << 0wx1) + (lk + 0wx40000000))))) + end + and bclr {bo, bi, lk} = + let val bo = emit_bo bo + val lk = emit_bool lk + in eWord32 ((bo << 0wx15) + ((bi << 0wx10) + (lk + 0wx4C000020))) + end + and bclre {bo, bi, lk} = + let val bo = emit_bo bo + val lk = emit_bool lk + in eWord32 ((bo << 0wx15) + ((bi << 0wx10) + (lk + 0wx4C000022))) + end + and bcctr {bo, bi, lk} = + let val bo = emit_bo bo + val lk = emit_bool lk + in eWord32 ((bo << 0wx15) + ((bi << 0wx10) + (lk + 0wx4C000420))) + end + and bcctre {bo, bi, lk} = + let val bo = emit_bo bo + val lk = emit_bool lk + in eWord32 ((bo << 0wx15) + ((bi << 0wx10) + (lk + 0wx4C000422))) + end + and rlwnm {rs, ra, sh, mb, me} = + let val rs = emit_GP rs + val ra = emit_GP ra + val sh = emit_GP sh + val mb = emit_int mb + val me = emit_int me + in eWord32 ((rs << 0wx15) + ((ra << 0wx10) + ((sh << 0wxB) + ((mb << 0wx6) + ((me << 0wx1) + 0wx5C000000))))) + end + and rlwinm {rs, ra, sh, mb, me} = + let val rs = emit_GP rs + val ra = emit_GP ra + val mb = emit_int mb + val me = emit_int me + in eWord32 ((rs << 0wx15) + ((ra << 0wx10) + ((sh << 0wxB) + ((mb << 0wx6) + ((me << 0wx1) + 0wx54000000))))) + end + and rldcl {rs, ra, sh, mb} = + let val rs = emit_GP rs + val ra = emit_GP ra + val sh = emit_GP sh + val mb = emit_int mb + in eWord32 ((rs << 0wx15) + ((ra << 0wx10) + ((sh << 0wxB) + ((mb << 0wx6) + 0wx78000010)))) + end + and rldicl {rs, ra, sh, mb, sh2} = + let val rs = emit_GP rs + val ra = emit_GP ra + val mb = emit_int mb + in eWord32 ((rs << 0wx15) + ((ra << 0wx10) + ((sh << 0wxB) + ((mb << 0wx6) + ((sh2 << 0wx1) + 0wx78000000))))) + end + and rldcr {rs, ra, sh, mb} = + let val rs = emit_GP rs + val ra = emit_GP ra + val sh = emit_GP sh + val mb = emit_int mb + in eWord32 ((rs << 0wx15) + ((ra << 0wx10) + ((sh << 0wxB) + ((mb << 0wx6) + 0wx78000012)))) + end + and rldicr {rs, ra, sh, mb, sh2} = + let val rs = emit_GP rs + val ra = emit_GP ra + val mb = emit_int mb + in eWord32 ((rs << 0wx15) + ((ra << 0wx10) + ((sh << 0wxB) + ((mb << 0wx6) + ((sh2 << 0wx1) + 0wx78000004))))) + end + and rldic {rs, ra, sh, mb, sh2} = + let val rs = emit_GP rs + val ra = emit_GP ra + val mb = emit_int mb + in eWord32 ((rs << 0wx15) + ((ra << 0wx10) + ((sh << 0wxB) + ((mb << 0wx6) + ((sh2 << 0wx1) + 0wx78000008))))) + end + and rlwimi {rs, ra, sh, mb, me} = + let val rs = emit_GP rs + val ra = emit_GP ra + val mb = emit_int mb + val me = emit_int me + in eWord32 ((rs << 0wx15) + ((ra << 0wx10) + ((sh << 0wxB) + ((mb << 0wx6) + ((me << 0wx1) + 0wx50000000))))) + end + and rldimi {rs, ra, sh, mb, sh2} = + let val rs = emit_GP rs + val ra = emit_GP ra + val mb = emit_int mb + in eWord32 ((rs << 0wx15) + ((ra << 0wx10) + ((sh << 0wxB) + ((mb << 0wx6) + ((sh2 << 0wx1) + 0wx7800000C))))) + end + and rotate {oper, ra, rs, sh, mb, me} = + (case (oper, me) of + (I.RLWNM, SOME me) => rlwnm {ra=ra, rs=rs, sh=sh, mb=mb, me=me} + | (I.RLDCL, _) => rldcl {ra=ra, rs=rs, sh=sh, mb=mb} + | (I.RLDCR, _) => rldcr {ra=ra, rs=rs, sh=sh, mb=mb} + | _ => error "rotate" + ) + and rotatei {oper, ra, rs, sh, mb, me} = + let val sh = emit_operand sh + in + (case (oper, me) of + (I.RLWINM, SOME me) => rlwinm {ra=ra, rs=rs, sh=sh, mb=mb, me=me} + | (I.RLWIMI, SOME me) => rlwimi {ra=ra, rs=rs, sh=sh, mb=mb, me=me} + | (I.RLDICL, _) => rldicl {ra=ra, rs=rs, sh=(sh && 0wx1F), sh2=((sh << 0wx5) && 0wx1), + mb=mb} + | (I.RLDICR, _) => rldicr {ra=ra, rs=rs, sh=(sh && 0wx1F), sh2=((sh << 0wx5) && 0wx1), + mb=mb} + | (I.RLDIC, _) => rldic {ra=ra, rs=rs, sh=(sh && 0wx1F), sh2=((sh << 0wx5) && 0wx1), + mb=mb} + | (I.RLDIMI, _) => rldimi {ra=ra, rs=rs, sh=(sh && 0wx1F), sh2=((sh << 0wx5) && 0wx1), + mb=mb} + | _ => error "rotatei" + ) + end + and lwarx {rt, ra, rb} = + let val rt = emit_GP rt + val ra = emit_GP ra + val rb = emit_GP rb + in eWord32 ((rt << 0wx15) + ((ra << 0wx10) + ((rb << 0wxB) + 0wx7C000028))) + end + and stwcx {rs, ra, rb} = + let val rs = emit_GP rs + val ra = emit_GP ra + val rb = emit_GP rb + in eWord32 ((rs << 0wx15) + ((ra << 0wx10) + ((rb << 0wxB) + 0wx7C00012D))) + end + +(*#line 578.7 "ppc/ppc.mdl"*) + fun relative (I.LabelOp lexp) = (itow ((MLTreeEval.valueOf lexp) - ( ! loc))) ~>> 0wx2 + | relative _ = error "relative" + fun emitter instr = + let + fun emitInstr (I.L{ld, rt, ra, d, mem}) = load {ld=ld, rt=rt, ra=ra, d=d} + | emitInstr (I.LF{ld, ft, ra, d, mem}) = fload {ld=ld, ft=ft, ra=ra, d=d} + | emitInstr (I.ST{st, rs, ra, d, mem}) = store {st=st, rs=rs, ra=ra, d=d} + | emitInstr (I.STF{st, fs, ra, d, mem}) = fstore {st=st, fs=fs, ra=ra, + d=d} + | emitInstr (I.UNARY{oper, rt, ra, Rc, OE}) = unary {oper=oper, rt=rt, + ra=ra, OE=OE, Rc=Rc} + | emitInstr (I.ARITH{oper, rt, ra, rb, Rc, OE}) = arith {oper=oper, rt=rt, + ra=ra, rb=rb, OE=OE, Rc=Rc} + | emitInstr (I.ARITHI{oper, rt, ra, im}) = arithi {oper=oper, rt=rt, ra=ra, + im=im} + | emitInstr (I.ROTATE{oper, ra, rs, sh, mb, me}) = rotate {oper=oper, + ra=ra, rs=rs, sh=sh, mb=mb, me=me} + | emitInstr (I.ROTATEI{oper, ra, rs, sh, mb, me}) = rotatei {oper=oper, + ra=ra, rs=rs, sh=sh, mb=mb, me=me} + | emitInstr (I.COMPARE{cmp, l, bf, ra, rb}) = compare {cmp=cmp, bf=bf, + l=l, ra=ra, rb=rb} + | emitInstr (I.FCOMPARE{cmp, bf, fa, fb}) = fcmp {cmp=cmp, bf=bf, fa=fa, + fb=fb} + | emitInstr (I.FUNARY{oper, ft, fb, Rc}) = funary {oper=oper, ft=ft, fb=fb, + Rc=Rc} + | emitInstr (I.FARITH{oper, ft, fa, fb, Rc}) = farith {oper=oper, ft=ft, + fa=fa, fb=fb, Rc=Rc} + | emitInstr (I.FARITH3{oper, ft, fa, fb, fc, Rc}) = farith3 {oper=oper, + ft=ft, fa=fa, fb=fb, fc=fc, Rc=Rc} + | emitInstr (I.CCARITH{oper, bt, ba, bb}) = ccarith {oper=oper, bt=bt, + ba=ba, bb=bb} + | emitInstr (I.MCRF{bf, bfa}) = mcrf {bf=bf, bfa=bfa} + | emitInstr (I.MTSPR{rs, spr}) = mtspr {rs=rs, spr=spr} + | emitInstr (I.MFSPR{rt, spr}) = mfspr {rt=rt, spr=spr} + | emitInstr (I.LWARX{rt, ra, rb}) = lwarx {rt=rt, ra=ra, rb=rb} + | emitInstr (I.STWCX{rs, ra, rb}) = stwcx {rs=rs, ra=ra, rb=rb} + | emitInstr (I.TW{to, ra, si}) = tw {to=to, ra=ra, si=si} + | emitInstr (I.TD{to, ra, si}) = td {to=to, ra=ra, si=si} + | emitInstr (I.BC{bo, bf, bit, addr, LK, fall}) = bc {bo=bo, bi=cr_bit {cc=(bf, + bit)}, bd=relative addr, aa=false, lk=LK} + | emitInstr (I.BCLR{bo, bf, bit, LK, labels}) = bclr {bo=bo, bi=cr_bit {cc=(bf, + bit)}, lk=LK} + | emitInstr (I.B{addr, LK}) = b {li=relative addr, aa=false, lk=LK} + | emitInstr (I.CALL{def, use, cutsTo, mem}) = bclr {bo=I.ALWAYS, bi=0wx0, + lk=true} + | emitInstr (I.SOURCE{}) = () + | emitInstr (I.SINK{}) = () + | emitInstr (I.PHI{}) = () + in + emitInstr instr + end + + fun emitInstruction(I.ANNOTATION{i, ...}) = emitInstruction(i) + | emitInstruction(I.INSTR(i)) = emitter(i) + | emitInstruction(I.LIVE _) = () + | emitInstruction(I.KILL _) = () + | emitInstruction _ = error "emitInstruction" + + in S.STREAM{beginCluster=init, + pseudoOp=pseudoOp, + emit=emitInstruction, + endCluster=fail, + defineLabel=doNothing, + entryLabel=doNothing, + comment=doNothing, + exitBlock=doNothing, + annotation=doNothing, + getAnnotations=getAnnotations + } + end +end + diff --git a/MLRISC/ppc/flowgraph/ppcDarwinPseudoOps.sml b/MLRISC/ppc/flowgraph/ppcDarwinPseudoOps.sml new file mode 100644 index 0000000..daf6933 --- /dev/null +++ b/MLRISC/ppc/flowgraph/ppcDarwinPseudoOps.sml @@ -0,0 +1,132 @@ +(* ppcDarwinPseudoOps.sml + * + * COPYRIGHT (c) 2002 Bell labs, Lucent Technologies. + * + * PPC/Darwin (aka MacOS X) pseudo operations. + *) + +functor PPCDarwinPseudoOps ( + structure T : MLTREE + structure MLTreeEval : MLTREE_EVAL where T = T + ) : PSEUDO_OPS_BASIS = struct + + structure T = T + structure PB = PseudoOpsBasisTyp + structure Fmt = Format + + structure Endian = PseudoOpsBig ( + structure T = T + structure MLTreeEval=MLTreeEval + val icache_alignment = 16 + val max_alignment = SOME 7 + val nop = {sz=4, en=0wx60000000: Word32.word}) (* FIX:: ori 0, 0, 0 *) + +(* EXPAND + structure GasPseudoOps = + GasPseudoOps(structure T = T + val labFmt = {gPrefix="", aPrefix="L"}) +*) + + type 'a pseudo_op = (T.labexp, 'a) PB.pseudo_op + + fun error msg = MLRiscErrorMsg.error ("PPCDarwinPseudoOps.", msg) + + val sizeOf = Endian.sizeOf + val emitValue = Endian.emitValue + + val labelToString = Label.fmt {gPrefix="", aPrefix="L"} + + fun prIntInf i = + if IntInf.sign i < 0 then "-"^IntInf.toString(IntInf.~ i) + else IntInf.toString i + + fun prInt i = if i < 0 then "-"^Int.toString(~i) else Int.toString i + + (* operator precedences: + Note: these differ from C's precedences + 2 MULT, DIV, LSHIFT, RSHIFT + 1 AND, OR + 0 PLUS, MINUS + *) + + fun parens (str, prec, op_prec) = + if prec > op_prec then "(" ^ str ^ ")" else str + + fun lexpToString le = toStr(le, 0) + + and toStr(T.LABEL lab, _) = labelToString lab + | toStr(T.LABEXP le, p) = toStr(le, p) + | toStr(T.CONST c, _) = + (prInt(T.Constant.valueOf c) handle _ => T.Constant.toString c) + | toStr(T.LI i, _) = prIntInf i + | toStr(T.MULS(_,lexp1, lexp2), _) = toStr(lexp1, 2) ^ "*" ^ toStr(lexp2,2) + | toStr(T.DIVS(T.DIV_TO_ZERO,_,lexp1, lexp2), _) = + toStr(lexp1, 2) ^ "/" ^ toStr(lexp2,2) + | toStr(T.SLL(_,lexp, cnt), prec) = toStr(lexp,2) ^ "<<" ^ toStr(cnt,2) + | toStr(T.SRL(_,lexp, cnt), prec) = toStr(lexp,2) ^ ">>" ^ toStr(cnt,2) + | toStr(T.ANDB(_,lexp, mask), prec) = + parens(toStr(lexp,1) ^ "&" ^ toStr(mask, 1), prec, 1) + | toStr(T.ORB(_,lexp, mask), prec) = + parens(toStr(lexp, 1) ^ "|" ^ toStr(mask, 1), prec, 1) + | toStr(T.ADD(_,lexp1, lexp2), prec) = + parens(toStr(lexp1, 0) ^ "+" ^ toStr(lexp2, 0), prec, 0) + | toStr(T.SUB(_,lexp1, lexp2), prec) = + parens(toStr(lexp1, 0) ^ "-" ^ toStr(lexp2, 0), prec, 0) + | toStr _ = error "toStr" + + fun decls (fmt, labs) = + String.concat + (map (fn lab => (Fmt.format fmt [Fmt.STR (lexpToString(T.LABEL lab))])) labs) + + fun toString(PB.ALIGN_SZ n) = Fmt.format "\t.align\t%d" [Fmt.INT n] + | toString(PB.ALIGN_ENTRY) = "\t.align\t4" (* 16 byte boundary *) + | toString(PB.ALIGN_LABEL) = "\t.align\t2" + + | toString(PB.DATA_LABEL lab) = labelToString lab ^ ":" + | toString(PB.DATA_READ_ONLY) = "\t.const_data" + | toString(PB.DATA) = "\t.data" + | toString(PB.BSS) = "\t.section\t__DATA,__BSS" + | toString(PB.TEXT) = "\t.text" + | toString(PB.SECTION at) = "\t.section\t" ^ Atom.toString at + | toString(PB.REORDER) = "" + | toString(PB.NOREORDER) = "" + | toString(PB.INT{sz, i}) = let + fun join [] = [] + | join [lexp] = [lexpToString lexp] + | join (lexp::r) = lexpToString lexp :: "," :: join r + val pop = (case sz + of 8 => "\t.byte\t" + | 16 => "\t.short\t" + | 32 => "\t.long\t" + | 64 => error "INT64" + | _ => error ("pop: INT sz = " ^ Int.toString sz) + (* end case *)) + in + String.concat (pop :: join i) + end + | toString(PB.ASCII s) = + Fmt.format "\t.ascii\t\"%s\"" [Fmt.STR(String.toCString s)] + | toString(PB.ASCIIZ s) = + Fmt.format "\t.asciz \"%s\"" [Fmt.STR(String.toCString s)] + | toString(PB.SPACE sz) = Fmt.format "\t.space\t%d" [Fmt.INT sz] + | toString(PB.FLOAT{sz, f}) = let + fun join [] = [] + | join [f] = [f] + | join (f::r) = f :: "," :: join r + val pop = (case sz + of 32 => "\t.single " + | 64 => "\t.double " + | _ => error ("pop: FLOAT sz = " ^ Int.toString sz) + (* end case *)) + in + String.concat (pop :: join f) + end + | toString(PB.IMPORT labs) = decls("\t.extern\t%s", labs) + | toString(PB.EXPORT labs) = decls("\t.globl\t%s", labs) + | toString(PB.COMMENT txt) = Fmt.format "; %s" [Fmt.STR txt] + | toString(PB.EXT _) = error "EXT" + + fun defineLabel lab = labelToString lab ^ ":" + + val wordSize = 32 + end diff --git a/MLRISC/ppc/flowgraph/ppcGasPseudoOps.sml b/MLRISC/ppc/flowgraph/ppcGasPseudoOps.sml new file mode 100644 index 0000000..e627bfa --- /dev/null +++ b/MLRISC/ppc/flowgraph/ppcGasPseudoOps.sml @@ -0,0 +1,33 @@ +functor PPCGasPseudoOps + ( structure T : MLTREE + structure MLTreeEval : MLTREE_EVAL where T = T + ) : PSEUDO_OPS_BASIS = + +struct + structure T = T + structure PB = PseudoOpsBasisTyp + structure Fmt = Format + + structure Endian = + PseudoOpsBig + (structure T = T + structure MLTreeEval=MLTreeEval + val icache_alignment = 16 + val max_alignment = SOME 7 + val nop = {sz=4, en=0wx60000000: Word32.word}) (* FIX:: ori 0, 0, 0 *) + + structure GasPseudoOps = + GasPseudoOps(structure T = T + val labFmt = {gPrefix="", aPrefix="L"}) + + type 'a pseudo_op = (T.labexp, 'a) PB.pseudo_op + + fun error msg = MLRiscErrorMsg.error ("GasPseudoOps.", msg) + + val sizeOf = Endian.sizeOf + val emitValue = Endian.emitValue + val lexpToString = GasPseudoOps.lexpToString + val toString = GasPseudoOps.toString + val defineLabel = GasPseudoOps.defineLabel + val wordSize = 32 +end diff --git a/MLRISC/ppc/instructions/ppcCells.sml b/MLRISC/ppc/instructions/ppcCells.sml new file mode 100644 index 0000000..5938dd2 --- /dev/null +++ b/MLRISC/ppc/instructions/ppcCells.sml @@ -0,0 +1,128 @@ +(* + * WARNING: This file was automatically generated by MDLGen (v3.1) + * from the machine description file "ppc/ppc.mdl". + * DO NOT EDIT this file directly + *) + + +signature PPCCELLS = +sig + include CELLS + val SPR : CellsBasis.cellkind + val CELLSET : CellsBasis.cellkind + val showGP : CellsBasis.register_id -> string + val showFP : CellsBasis.register_id -> string + val showCC : CellsBasis.register_id -> string + val showSPR : CellsBasis.register_id -> string + val showMEM : CellsBasis.register_id -> string + val showCTRL : CellsBasis.register_id -> string + val showCELLSET : CellsBasis.register_id -> string + val showGPWithSize : CellsBasis.register_id * CellsBasis.sz -> string + val showFPWithSize : CellsBasis.register_id * CellsBasis.sz -> string + val showCCWithSize : CellsBasis.register_id * CellsBasis.sz -> string + val showSPRWithSize : CellsBasis.register_id * CellsBasis.sz -> string + val showMEMWithSize : CellsBasis.register_id * CellsBasis.sz -> string + val showCTRLWithSize : CellsBasis.register_id * CellsBasis.sz -> string + val showCELLSETWithSize : CellsBasis.register_id * CellsBasis.sz -> string + val r0 : CellsBasis.cell + val xer : CellsBasis.cell + val lr : CellsBasis.cell + val ctr : CellsBasis.cell + val addGP : CellsBasis.cell * cellset -> cellset + val addFP : CellsBasis.cell * cellset -> cellset + val addCC : CellsBasis.cell * cellset -> cellset + val addSPR : CellsBasis.cell * cellset -> cellset + val addMEM : CellsBasis.cell * cellset -> cellset + val addCTRL : CellsBasis.cell * cellset -> cellset + val addCELLSET : CellsBasis.cell * cellset -> cellset +end + +structure PPCCells : PPCCELLS = +struct + exception PPCCells + fun error msg = MLRiscErrorMsg.error("PPCCells",msg) + open CellsBasis + fun showGPWithSize (r, ty) = (fn (r, _) => (if ( ! PPCAsmSyntax.ibm_syntax) + then (Int.toString r) + else ("r" ^ (Int.toString r))) + ) (r, ty) + and showFPWithSize (r, ty) = (fn (f, _) => (if ( ! PPCAsmSyntax.ibm_syntax) + then (Int.toString f) + else ("f" ^ (Int.toString r))) + ) (r, ty) + and showCCWithSize (r, ty) = (fn (cr, _) => "cr" ^ (Int.toString cr) + ) (r, ty) + and showSPRWithSize (r, ty) = (fn (1, _) => "xer" + | (8, _) => "lr" + | (9, _) => "ctr" + | (r, _) => Int.toString r + ) (r, ty) + and showMEMWithSize (r, ty) = (fn (r, _) => "m" ^ (Int.toString r) + ) (r, ty) + and showCTRLWithSize (r, ty) = (fn (r, _) => "ctrl" ^ (Int.toString r) + ) (r, ty) + and showCELLSETWithSize (r, ty) = (fn _ => "CELLSET" + ) (r, ty) + fun showGP r = showGPWithSize (r, 64) + fun showFP r = showFPWithSize (r, 64) + fun showCC r = showCCWithSize (r, 4) + fun showSPR r = showSPRWithSize (r, 64) + fun showMEM r = showMEMWithSize (r, 8) + fun showCTRL r = showCTRLWithSize (r, 8) + fun showCELLSET r = showCELLSETWithSize (r, 0) + val SPR = CellsBasis.newCellKind {name="SPR", nickname="spr"} + and CELLSET = CellsBasis.newCellKind {name="CELLSET", nickname="cellset"} + structure MyCells = Cells + (exception Cells = PPCCells + val firstPseudo = 256 + val desc_GP = CellsBasis.DESC {low=0, high=31, kind=CellsBasis.GP, defaultValues=[], + zeroReg=NONE, toString=showGP, toStringWithSize=showGPWithSize, + counter=ref 0, dedicated=ref 0, physicalRegs=ref CellsBasis.array0} + and desc_FP = CellsBasis.DESC {low=32, high=63, kind=CellsBasis.FP, + defaultValues=[], zeroReg=NONE, toString=showFP, toStringWithSize=showFPWithSize, + counter=ref 0, dedicated=ref 0, physicalRegs=ref CellsBasis.array0} + and desc_CC = CellsBasis.DESC {low=64, high=71, kind=CellsBasis.CC, + defaultValues=[], zeroReg=NONE, toString=showCC, toStringWithSize=showCCWithSize, + counter=ref 0, dedicated=ref 0, physicalRegs=ref CellsBasis.array0} + and desc_SPR = CellsBasis.DESC {low=72, high=103, kind=SPR, defaultValues=[], + zeroReg=NONE, toString=showSPR, toStringWithSize=showSPRWithSize, + counter=ref 0, dedicated=ref 0, physicalRegs=ref CellsBasis.array0} + and desc_MEM = CellsBasis.DESC {low=104, high=103, kind=CellsBasis.MEM, + defaultValues=[], zeroReg=NONE, toString=showMEM, toStringWithSize=showMEMWithSize, + counter=ref 0, dedicated=ref 0, physicalRegs=ref CellsBasis.array0} + and desc_CTRL = CellsBasis.DESC {low=104, high=103, kind=CellsBasis.CTRL, + defaultValues=[], zeroReg=NONE, toString=showCTRL, toStringWithSize=showCTRLWithSize, + counter=ref 0, dedicated=ref 0, physicalRegs=ref CellsBasis.array0} + and desc_CELLSET = CellsBasis.DESC {low=104, high=103, kind=CELLSET, + defaultValues=[], zeroReg=NONE, toString=showCELLSET, toStringWithSize=showCELLSETWithSize, + counter=ref 0, dedicated=ref 0, physicalRegs=ref CellsBasis.array0} + val cellKindDescs = [(CellsBasis.GP, desc_GP), (CellsBasis.FP, desc_FP), + (CellsBasis.CC, desc_CC), (SPR, desc_SPR), (CellsBasis.MEM, desc_MEM), + (CellsBasis.CTRL, desc_CTRL), (CELLSET, desc_CELLSET)] + val cellSize = 8 + ) + + open MyCells + val addGP = CellSet.add + and addFP = CellSet.add + and addCC = CellSet.add + and addSPR = CellSet.add + and addMEM = CellSet.add + and addCTRL = CellSet.add + and addCELLSET = CellSet.add + val RegGP = Reg GP + and RegFP = Reg FP + and RegCC = Reg CC + and RegSPR = Reg SPR + and RegMEM = Reg MEM + and RegCTRL = Reg CTRL + and RegCELLSET = Reg CELLSET + val stackptrR = RegGP 1 + val asmTmpR = RegGP 28 + val fasmTmp = RegFP 0 + val r0 = RegGP 0 + val xer = RegSPR 1 + val lr = RegSPR 8 + val ctr = RegSPR 9 +end + diff --git a/MLRISC/ppc/instructions/ppcCompInstrExt.sml b/MLRISC/ppc/instructions/ppcCompInstrExt.sml new file mode 100644 index 0000000..b2354cd --- /dev/null +++ b/MLRISC/ppc/instructions/ppcCompInstrExt.sml @@ -0,0 +1,75 @@ +(* ppcCompInstrExt.sml + * + * COPYRIGHT (c) 2004 John Reppy (http://www.cs.uchicago.edu/~jhr) + * All rights reserved. + * + * emit code for extensions to the ppc instruction set. + *) + +signature PPCCOMP_INSTR_EXT = + sig + structure I : PPCINSTR + structure TS : MLTREE_STREAM + where T = I.T + structure CFG : CONTROL_FLOW_GRAPH + where I = I + and P = TS.S.P + + type reducer = + (I.instruction, I.C.cellset, I.operand, I.addressing_mode, CFG.cfg) TS.reducer + + val compileSext : + reducer + -> {stm: (I.T.stm, I.T.rexp, I.T.fexp, I.T.ccexp) PPCInstrExt.sext, + an: I.T.an list} + -> unit + end + +functor PPCCompInstrExt ( + + structure I : PPCINSTR + structure TS : MLTREE_STREAM + where T = I.T + structure CFG : CONTROL_FLOW_GRAPH + where P = TS.S.P + and I = I + + ) : PPCCOMP_INSTR_EXT = struct + + structure CFG = CFG + structure T = TS.T + structure I = I + structure C = I.C + structure X = PPCInstrExt + structure TS = TS + + type stm = (T.stm, T.rexp, T.fexp, T.ccexp) X.sext + + type reducer = + (I.instruction, I.C.cellset, I.operand, I.addressing_mode, CFG.cfg) TS.reducer + + fun error msg = MLRiscErrorMsg.error("PPCCompInstrExt", msg) + + fun compileSext (reducer : reducer) {stm : stm, an : T.an list} = let + val TS.REDUCER{ + reduceRexp, operand, emit, instrStream, addressOf, ... + } = reducer + val TS.S.STREAM{emit=emitI, ...} = instrStream + fun emit' inst = emit(I.INSTR inst, an) + in + case stm + of X.STWU{src, ea} => let + val (base, disp) = addressOf ea + in + emit' (I.ST{ + st = I.STWU, + rs = reduceRexp src, + ra = base, + d = disp, + mem = T.Region.memory + }) + end + (* end case *) + end + + end diff --git a/MLRISC/ppc/instructions/ppcFreqProps.sml b/MLRISC/ppc/instructions/ppcFreqProps.sml new file mode 100644 index 0000000..70977c7 --- /dev/null +++ b/MLRISC/ppc/instructions/ppcFreqProps.sml @@ -0,0 +1,37 @@ +(* ppcFreqProps.sml + * + * COPYRIGHT (c) 2002 Bell Labs, Lucent Technologies + * + * Extract frequency information from the PowerPC architecture + * + * -- Allen + *) + +functor PPCFreqProps(PPCInstr : PPCINSTR) : FREQUENCY_PROPERTIES = +struct + + structure I = PPCInstr + + val p10 = Probability.percent 10 + val p50 = Probability.percent 50 + val p90 = Probability.percent 90 + val p100 = Probability.always + + fun ppcBranchProb(I.BC _) = p50 + | ppcBranchProb(I.BCLR{labels=[],bo=I.ALWAYS,...}) = p100 + | ppcBranchProb(I.BCLR{labels,bo=I.ALWAYS,...}) = + Probability.prob(1, length labels) + | ppcBranchProb(I.BCLR{labels=[],bo,...}) = p50 + | ppcBranchProb(I.BCLR{labels,bo,...}) = + Probability.prob(1, length labels) + | ppcBranchProb _ = Probability.never (* non-branch *) + + fun branchProb(I.ANNOTATION{a, i, ...}) = + (case #peek MLRiscAnnotations.BRANCH_PROB a of + SOME b => b + | NONE => branchProb i + ) + | branchProb(I.INSTR(i)) = ppcBranchProb(i) + | branchProb _ = Probability.never + +end diff --git a/MLRISC/ppc/instructions/ppcInstr.sml b/MLRISC/ppc/instructions/ppcInstr.sml new file mode 100644 index 0000000..c6d3ec4 --- /dev/null +++ b/MLRISC/ppc/instructions/ppcInstr.sml @@ -0,0 +1,562 @@ +(* + * WARNING: This file was automatically generated by MDLGen (v3.1) + * from the machine description file "ppc/ppc.mdl". + * DO NOT EDIT this file directly + *) + + +signature PPCINSTR = +sig + structure C : PPCCELLS + structure CB : CELLS_BASIS = CellsBasis + structure T : MLTREE + structure Constant: CONSTANT + structure Region : REGION + sharing Constant = T.Constant + sharing Region = T.Region + type gpr = int + type fpr = int + type ccr = int + type crf = int + datatype spr = + XER + | LR + | CTR + datatype operand = + RegOp of CellsBasis.cell + | ImmedOp of int + | LabelOp of T.labexp + type addressing_mode = CellsBasis.cell * operand + datatype ea = + Direct of CellsBasis.cell + | FDirect of CellsBasis.cell + | Displace of {base:CellsBasis.cell, disp:T.labexp, mem:Region.region} + datatype load = + LBZ + | LBZE + | LHZ + | LHZE + | LHA + | LHAE + | LWZ + | LWZE + | LDE + | LBZU + | LHZU + | LHAU + | LWZU + | LDZU + datatype store = + STB + | STBE + | STH + | STHE + | STW + | STWE + | STDE + | STBU + | STHU + | STWU + | STDU + datatype fload = + LFS + | LFSE + | LFD + | LFDE + | LFSU + | LFDU + datatype fstore = + STFS + | STFSE + | STFD + | STFDE + | STFSU + | STFDU + datatype cmp = + CMP + | CMPL + datatype fcmp = + FCMPO + | FCMPU + datatype unary = + NEG + | EXTSB + | EXTSH + | EXTSW + | CNTLZW + | CNTLZD + datatype funary = + FMR + | FNEG + | FABS + | FNABS + | FSQRT + | FSQRTS + | FRSP + | FCTIW + | FCTIWZ + | FCTID + | FCTIDZ + | FCFID + datatype farith = + FADD + | FSUB + | FMUL + | FDIV + | FADDS + | FSUBS + | FMULS + | FDIVS + datatype farith3 = + FMADD + | FMADDS + | FMSUB + | FMSUBS + | FNMADD + | FNMADDS + | FNMSUB + | FNMSUBS + | FSEL + datatype bo = + TRUE + | FALSE + | ALWAYS + | COUNTER of {eqZero:bool, cond:bool option} + datatype arith = + ADD + | SUBF + | MULLW + | MULLD + | MULHW + | MULHWU + | DIVW + | DIVD + | DIVWU + | DIVDU + | AND + | OR + | XOR + | NAND + | NOR + | EQV + | ANDC + | ORC + | SLW + | SLD + | SRW + | SRD + | SRAW + | SRAD + datatype arithi = + ADDI + | ADDIS + | SUBFIC + | MULLI + | ANDI_Rc + | ANDIS_Rc + | ORI + | ORIS + | XORI + | XORIS + | SRAWI + | SRADI + datatype rotate = + RLWNM + | RLDCL + | RLDCR + datatype rotatei = + RLWINM + | RLWIMI + | RLDICL + | RLDICR + | RLDIC + | RLDIMI + datatype ccarith = + CRAND + | CROR + | CRXOR + | CRNAND + | CRNOR + | CREQV + | CRANDC + | CRORC + datatype bit = + LT + | GT + | EQ + | SO + | FL + | FG + | FE + | FU + | FX + | FEX + | VX + | OX + datatype xerbit = + SO64 + | OV64 + | CA64 + | SO32 + | OV32 + | CA32 + type cr_bit = (CellsBasis.cell) * bit + datatype instr = + L of {ld:load, rt:CellsBasis.cell, ra:CellsBasis.cell, d:operand, mem:Region.region} + | LF of {ld:fload, ft:CellsBasis.cell, ra:CellsBasis.cell, d:operand, mem:Region.region} + | ST of {st:store, rs:CellsBasis.cell, ra:CellsBasis.cell, d:operand, mem:Region.region} + | STF of {st:fstore, fs:CellsBasis.cell, ra:CellsBasis.cell, d:operand, + mem:Region.region} + | UNARY of {oper:unary, rt:CellsBasis.cell, ra:CellsBasis.cell, Rc:bool, + OE:bool} + | ARITH of {oper:arith, rt:CellsBasis.cell, ra:CellsBasis.cell, rb:CellsBasis.cell, + Rc:bool, OE:bool} + | ARITHI of {oper:arithi, rt:CellsBasis.cell, ra:CellsBasis.cell, im:operand} + | ROTATE of {oper:rotate, ra:CellsBasis.cell, rs:CellsBasis.cell, sh:CellsBasis.cell, + mb:int, me:int option} + | ROTATEI of {oper:rotatei, ra:CellsBasis.cell, rs:CellsBasis.cell, sh:operand, + mb:int, me:int option} + | COMPARE of {cmp:cmp, l:bool, bf:CellsBasis.cell, ra:CellsBasis.cell, rb:operand} + | FCOMPARE of {cmp:fcmp, bf:CellsBasis.cell, fa:CellsBasis.cell, fb:CellsBasis.cell} + | FUNARY of {oper:funary, ft:CellsBasis.cell, fb:CellsBasis.cell, Rc:bool} + | FARITH of {oper:farith, ft:CellsBasis.cell, fa:CellsBasis.cell, fb:CellsBasis.cell, + Rc:bool} + | FARITH3 of {oper:farith3, ft:CellsBasis.cell, fa:CellsBasis.cell, fb:CellsBasis.cell, + fc:CellsBasis.cell, Rc:bool} + | CCARITH of {oper:ccarith, bt:cr_bit, ba:cr_bit, bb:cr_bit} + | MCRF of {bf:CellsBasis.cell, bfa:CellsBasis.cell} + | MTSPR of {rs:CellsBasis.cell, spr:CellsBasis.cell} + | MFSPR of {rt:CellsBasis.cell, spr:CellsBasis.cell} + | LWARX of {rt:CellsBasis.cell, ra:CellsBasis.cell, rb:CellsBasis.cell} + | STWCX of {rs:CellsBasis.cell, ra:CellsBasis.cell, rb:CellsBasis.cell} + | TW of {to:int, ra:CellsBasis.cell, si:operand} + | TD of {to:int, ra:CellsBasis.cell, si:operand} + | BC of {bo:bo, bf:CellsBasis.cell, bit:bit, addr:operand, LK:bool, fall:operand} + | BCLR of {bo:bo, bf:CellsBasis.cell, bit:bit, LK:bool, labels:Label.label list} + | B of {addr:operand, LK:bool} + | CALL of {def:C.cellset, use:C.cellset, cutsTo:Label.label list, mem:Region.region} + | SOURCE of {} + | SINK of {} + | PHI of {} + and instruction = + LIVE of {regs: C.cellset, spilled: C.cellset} + | KILL of {regs: C.cellset, spilled: C.cellset} + | COPY of {k: CellsBasis.cellkind, + sz: int, (* in bits *) + dst: CellsBasis.cell list, + src: CellsBasis.cell list, + tmp: ea option (* NONE if |dst| = {src| = 1 *)} + | ANNOTATION of {i:instruction, a:Annotations.annotation} + | INSTR of instr + val l : {ld:load, rt:CellsBasis.cell, ra:CellsBasis.cell, d:operand, mem:Region.region} -> instruction + val lf : {ld:fload, ft:CellsBasis.cell, ra:CellsBasis.cell, d:operand, mem:Region.region} -> instruction + val st : {st:store, rs:CellsBasis.cell, ra:CellsBasis.cell, d:operand, mem:Region.region} -> instruction + val stf : {st:fstore, fs:CellsBasis.cell, ra:CellsBasis.cell, d:operand, + mem:Region.region} -> instruction + val unary : {oper:unary, rt:CellsBasis.cell, ra:CellsBasis.cell, Rc:bool, + OE:bool} -> instruction + val arith : {oper:arith, rt:CellsBasis.cell, ra:CellsBasis.cell, rb:CellsBasis.cell, + Rc:bool, OE:bool} -> instruction + val arithi : {oper:arithi, rt:CellsBasis.cell, ra:CellsBasis.cell, im:operand} -> instruction + val rotate : {oper:rotate, ra:CellsBasis.cell, rs:CellsBasis.cell, sh:CellsBasis.cell, + mb:int, me:int option} -> instruction + val rotatei : {oper:rotatei, ra:CellsBasis.cell, rs:CellsBasis.cell, sh:operand, + mb:int, me:int option} -> instruction + val compare : {cmp:cmp, l:bool, bf:CellsBasis.cell, ra:CellsBasis.cell, + rb:operand} -> instruction + val fcompare : {cmp:fcmp, bf:CellsBasis.cell, fa:CellsBasis.cell, fb:CellsBasis.cell} -> instruction + val funary : {oper:funary, ft:CellsBasis.cell, fb:CellsBasis.cell, Rc:bool} -> instruction + val farith : {oper:farith, ft:CellsBasis.cell, fa:CellsBasis.cell, fb:CellsBasis.cell, + Rc:bool} -> instruction + val farith3 : {oper:farith3, ft:CellsBasis.cell, fa:CellsBasis.cell, fb:CellsBasis.cell, + fc:CellsBasis.cell, Rc:bool} -> instruction + val ccarith : {oper:ccarith, bt:cr_bit, ba:cr_bit, bb:cr_bit} -> instruction + val mcrf : {bf:CellsBasis.cell, bfa:CellsBasis.cell} -> instruction + val mtspr : {rs:CellsBasis.cell, spr:CellsBasis.cell} -> instruction + val mfspr : {rt:CellsBasis.cell, spr:CellsBasis.cell} -> instruction + val lwarx : {rt:CellsBasis.cell, ra:CellsBasis.cell, rb:CellsBasis.cell} -> instruction + val stwcx : {rs:CellsBasis.cell, ra:CellsBasis.cell, rb:CellsBasis.cell} -> instruction + val tw : {to:int, ra:CellsBasis.cell, si:operand} -> instruction + val td : {to:int, ra:CellsBasis.cell, si:operand} -> instruction + val bc : {bo:bo, bf:CellsBasis.cell, bit:bit, addr:operand, LK:bool, fall:operand} -> instruction + val bclr : {bo:bo, bf:CellsBasis.cell, bit:bit, LK:bool, labels:Label.label list} -> instruction + val b : {addr:operand, LK:bool} -> instruction + val call : {def:C.cellset, use:C.cellset, cutsTo:Label.label list, mem:Region.region} -> instruction + val source : {} -> instruction + val sink : {} -> instruction + val phi : {} -> instruction +end + +functor PPCInstr(T: MLTREE + ) : PPCINSTR = +struct + structure C = PPCCells + structure CB = CellsBasis + structure T = T + structure Region = T.Region + structure Constant = T.Constant + type gpr = int + type fpr = int + type ccr = int + type crf = int + datatype spr = + XER + | LR + | CTR + datatype operand = + RegOp of CellsBasis.cell + | ImmedOp of int + | LabelOp of T.labexp + type addressing_mode = CellsBasis.cell * operand + datatype ea = + Direct of CellsBasis.cell + | FDirect of CellsBasis.cell + | Displace of {base:CellsBasis.cell, disp:T.labexp, mem:Region.region} + datatype load = + LBZ + | LBZE + | LHZ + | LHZE + | LHA + | LHAE + | LWZ + | LWZE + | LDE + | LBZU + | LHZU + | LHAU + | LWZU + | LDZU + datatype store = + STB + | STBE + | STH + | STHE + | STW + | STWE + | STDE + | STBU + | STHU + | STWU + | STDU + datatype fload = + LFS + | LFSE + | LFD + | LFDE + | LFSU + | LFDU + datatype fstore = + STFS + | STFSE + | STFD + | STFDE + | STFSU + | STFDU + datatype cmp = + CMP + | CMPL + datatype fcmp = + FCMPO + | FCMPU + datatype unary = + NEG + | EXTSB + | EXTSH + | EXTSW + | CNTLZW + | CNTLZD + datatype funary = + FMR + | FNEG + | FABS + | FNABS + | FSQRT + | FSQRTS + | FRSP + | FCTIW + | FCTIWZ + | FCTID + | FCTIDZ + | FCFID + datatype farith = + FADD + | FSUB + | FMUL + | FDIV + | FADDS + | FSUBS + | FMULS + | FDIVS + datatype farith3 = + FMADD + | FMADDS + | FMSUB + | FMSUBS + | FNMADD + | FNMADDS + | FNMSUB + | FNMSUBS + | FSEL + datatype bo = + TRUE + | FALSE + | ALWAYS + | COUNTER of {eqZero:bool, cond:bool option} + datatype arith = + ADD + | SUBF + | MULLW + | MULLD + | MULHW + | MULHWU + | DIVW + | DIVD + | DIVWU + | DIVDU + | AND + | OR + | XOR + | NAND + | NOR + | EQV + | ANDC + | ORC + | SLW + | SLD + | SRW + | SRD + | SRAW + | SRAD + datatype arithi = + ADDI + | ADDIS + | SUBFIC + | MULLI + | ANDI_Rc + | ANDIS_Rc + | ORI + | ORIS + | XORI + | XORIS + | SRAWI + | SRADI + datatype rotate = + RLWNM + | RLDCL + | RLDCR + datatype rotatei = + RLWINM + | RLWIMI + | RLDICL + | RLDICR + | RLDIC + | RLDIMI + datatype ccarith = + CRAND + | CROR + | CRXOR + | CRNAND + | CRNOR + | CREQV + | CRANDC + | CRORC + datatype bit = + LT + | GT + | EQ + | SO + | FL + | FG + | FE + | FU + | FX + | FEX + | VX + | OX + datatype xerbit = + SO64 + | OV64 + | CA64 + | SO32 + | OV32 + | CA32 + type cr_bit = (CellsBasis.cell) * bit + datatype instr = + L of {ld:load, rt:CellsBasis.cell, ra:CellsBasis.cell, d:operand, mem:Region.region} + | LF of {ld:fload, ft:CellsBasis.cell, ra:CellsBasis.cell, d:operand, mem:Region.region} + | ST of {st:store, rs:CellsBasis.cell, ra:CellsBasis.cell, d:operand, mem:Region.region} + | STF of {st:fstore, fs:CellsBasis.cell, ra:CellsBasis.cell, d:operand, + mem:Region.region} + | UNARY of {oper:unary, rt:CellsBasis.cell, ra:CellsBasis.cell, Rc:bool, + OE:bool} + | ARITH of {oper:arith, rt:CellsBasis.cell, ra:CellsBasis.cell, rb:CellsBasis.cell, + Rc:bool, OE:bool} + | ARITHI of {oper:arithi, rt:CellsBasis.cell, ra:CellsBasis.cell, im:operand} + | ROTATE of {oper:rotate, ra:CellsBasis.cell, rs:CellsBasis.cell, sh:CellsBasis.cell, + mb:int, me:int option} + | ROTATEI of {oper:rotatei, ra:CellsBasis.cell, rs:CellsBasis.cell, sh:operand, + mb:int, me:int option} + | COMPARE of {cmp:cmp, l:bool, bf:CellsBasis.cell, ra:CellsBasis.cell, rb:operand} + | FCOMPARE of {cmp:fcmp, bf:CellsBasis.cell, fa:CellsBasis.cell, fb:CellsBasis.cell} + | FUNARY of {oper:funary, ft:CellsBasis.cell, fb:CellsBasis.cell, Rc:bool} + | FARITH of {oper:farith, ft:CellsBasis.cell, fa:CellsBasis.cell, fb:CellsBasis.cell, + Rc:bool} + | FARITH3 of {oper:farith3, ft:CellsBasis.cell, fa:CellsBasis.cell, fb:CellsBasis.cell, + fc:CellsBasis.cell, Rc:bool} + | CCARITH of {oper:ccarith, bt:cr_bit, ba:cr_bit, bb:cr_bit} + | MCRF of {bf:CellsBasis.cell, bfa:CellsBasis.cell} + | MTSPR of {rs:CellsBasis.cell, spr:CellsBasis.cell} + | MFSPR of {rt:CellsBasis.cell, spr:CellsBasis.cell} + | LWARX of {rt:CellsBasis.cell, ra:CellsBasis.cell, rb:CellsBasis.cell} + | STWCX of {rs:CellsBasis.cell, ra:CellsBasis.cell, rb:CellsBasis.cell} + | TW of {to:int, ra:CellsBasis.cell, si:operand} + | TD of {to:int, ra:CellsBasis.cell, si:operand} + | BC of {bo:bo, bf:CellsBasis.cell, bit:bit, addr:operand, LK:bool, fall:operand} + | BCLR of {bo:bo, bf:CellsBasis.cell, bit:bit, LK:bool, labels:Label.label list} + | B of {addr:operand, LK:bool} + | CALL of {def:C.cellset, use:C.cellset, cutsTo:Label.label list, mem:Region.region} + | SOURCE of {} + | SINK of {} + | PHI of {} + and instruction = + LIVE of {regs: C.cellset, spilled: C.cellset} + | KILL of {regs: C.cellset, spilled: C.cellset} + | COPY of {k: CellsBasis.cellkind, + sz: int, (* in bits *) + dst: CellsBasis.cell list, + src: CellsBasis.cell list, + tmp: ea option (* NONE if |dst| = {src| = 1 *)} + | ANNOTATION of {i:instruction, a:Annotations.annotation} + | INSTR of instr + val l = INSTR o L + and lf = INSTR o LF + and st = INSTR o ST + and stf = INSTR o STF + and unary = INSTR o UNARY + and arith = INSTR o ARITH + and arithi = INSTR o ARITHI + and rotate = INSTR o ROTATE + and rotatei = INSTR o ROTATEI + and compare = INSTR o COMPARE + and fcompare = INSTR o FCOMPARE + and funary = INSTR o FUNARY + and farith = INSTR o FARITH + and farith3 = INSTR o FARITH3 + and ccarith = INSTR o CCARITH + and mcrf = INSTR o MCRF + and mtspr = INSTR o MTSPR + and mfspr = INSTR o MFSPR + and lwarx = INSTR o LWARX + and stwcx = INSTR o STWCX + and tw = INSTR o TW + and td = INSTR o TD + and bc = INSTR o BC + and bclr = INSTR o BCLR + and b = INSTR o B + and call = INSTR o CALL + and source = INSTR o SOURCE + and sink = INSTR o SINK + and phi = INSTR o PHI +end + diff --git a/MLRISC/ppc/instructions/ppcInstrExt.sml b/MLRISC/ppc/instructions/ppcInstrExt.sml new file mode 100644 index 0000000..9576ff3 --- /dev/null +++ b/MLRISC/ppc/instructions/ppcInstrExt.sml @@ -0,0 +1,13 @@ +(* ppcInstrExt.sml + * + * COPYRIGHT (c) 2004 John Reppy (http://www.cs.uchicago.edu/~jhr) + * All rights reserved. + *) + +structure PPCInstrExt = + struct + + datatype ('s, 'r, 'f, 'c) sext + = STWU of {src : 'r, ea : 'r} (* store word and update *) + + end diff --git a/MLRISC/ppc/instructions/ppcProps.sml b/MLRISC/ppc/instructions/ppcProps.sml new file mode 100644 index 0000000..d1dd455 --- /dev/null +++ b/MLRISC/ppc/instructions/ppcProps.sml @@ -0,0 +1,246 @@ +(* ppcProps.sml + * + * COPYRIGHT (c) 2002 Bell Labs, Lucent Technologies + *) + +functor PPCProps + ( structure PPCInstr : PPCINSTR + structure MLTreeEval : MLTREE_EVAL where T = PPCInstr.T + structure MLTreeHash : MLTREE_HASH where T = PPCInstr.T + ) : INSN_PROPERTIES = +struct + structure I = PPCInstr + structure C = I.C + structure T = I.T + structure CB = CellsBasis + + exception NegateConditional + + fun error msg = MLRiscErrorMsg.error("PPCProps",msg) + + datatype kind = IK_JUMP | IK_NOP | IK_INSTR | IK_COPY | IK_CALL + | IK_CALL_WITH_CUTS | IK_PHI | IK_SOURCE | IK_SINK + datatype target = LABELLED of Label.label | FALLTHROUGH | ESCAPES + + (* This stupid architecture doesn't really have a dedicated zero register *) + fun zeroR() = C.Reg CB.GP 0 + + fun instrKind(I.ANNOTATION{i, ...}) = instrKind i + | instrKind(I.COPY _) = IK_COPY + | instrKind(I.INSTR instr) = let + fun eqTest to = Word.andb(Word.fromInt to, 0w4) <> 0w0 + fun trapAlways{to, ra, si} = + (case si + of I.RegOp rb => + if CellsBasis.sameColor(ra,rb) andalso eqTest(to) then IK_JUMP + else IK_INSTR + | I.ImmedOp 0 => + if CellsBasis.registerId ra = 0 andalso eqTest(to) then IK_JUMP + else IK_INSTR + | _ => error "trapAlways: neither RegOp nor ImmedOp(0)" + (*esac*)) + in + case instr + of (I.BC _) => IK_JUMP + | (I.BCLR _) => IK_JUMP + | (I.B _) => IK_JUMP + | (I.TW t) => trapAlways(t) + | (I.TD t) => trapAlways(t) + | (I.ARITHI{oper=I.ORI, rt, ra, im=I.ImmedOp 0}) => + if CB.registerId rt = 0 andalso CB.registerId ra = 0 then IK_NOP + else IK_INSTR + | (I.CALL{cutsTo=_::_,...}) => IK_CALL_WITH_CUTS + | (I.CALL _) => IK_CALL + | (I.PHI _) => IK_PHI + | (I.SOURCE _) => IK_SOURCE + | (I.SINK _) => IK_SINK + | _ => IK_INSTR + (*esac*) + end + | instrKind _ = error "instrKind" + + fun moveInstr(I.COPY _) = true + | moveInstr(I.ANNOTATION{i,...}) = moveInstr i + | moveInstr _ = false + + fun nop () = I.arithi{oper=I.ORI, rt=zeroR(), ra=zeroR(), im=I.ImmedOp 0} + + fun moveTmpR(I.COPY{tmp, ...}) = + (case tmp + of SOME(I.Direct r) => SOME r + | SOME(I.FDirect f) => SOME f + | _ => NONE + (*esac*)) + | moveTmpR(I.ANNOTATION{i,...}) = moveTmpR i + | moveTmpR _ = NONE + + fun moveDstSrc(I.COPY{dst,src,...}) = (dst,src) + | moveDstSrc(I.ANNOTATION{i,...}) = moveDstSrc i + | moveDstSrc _ = error "moveDstSrc" + + + fun branchTargets(I.INSTR(I.BC{bo=I.ALWAYS, addr, ...})) = + (case addr + of I.LabelOp(T.LABEL lab) => [LABELLED lab] + | _ => error "branchTargets:BC:ALWAYS" + (*esac*)) + | branchTargets(I.INSTR(I.BC{addr, ...})) = + (case addr + of I.LabelOp(T.LABEL lab) => [LABELLED lab, FALLTHROUGH] + | _ => error "branchTargets:BC" + (*esac*)) + | branchTargets(I.INSTR(I.BCLR{labels, bo=I.ALWAYS, ...})) = + (case labels of [] => [ESCAPES] | _ => map LABELLED labels) + | branchTargets(I.INSTR(I.BCLR{labels, ...})) = + (case labels of [] => [ESCAPES, FALLTHROUGH] | _ => map LABELLED labels) + | branchTargets(I.INSTR(I.B{addr=I.LabelOp(T.LABEL lab), LK})) = [LABELLED lab] + | branchTargets(I.INSTR(I.CALL{cutsTo, ...})) = FALLTHROUGH::map LABELLED cutsTo + | branchTargets(I.INSTR(I.TD _)) = [ESCAPES] + | branchTargets(I.INSTR(I.TW _)) = [ESCAPES] + | branchTargets(I.ANNOTATION{i,...}) = branchTargets i + | branchTargets _ = error "branchTargets" + + fun labelOp l = I.LabelOp(T.LABEL l) + + fun setJumpTarget(I.ANNOTATION{a,i}, l) = I.ANNOTATION{a=a, i=setJumpTarget(i,l)} + | setJumpTarget(I.INSTR(I.BC{bo as I.ALWAYS, bf, bit, addr, fall, LK}), lab) = + I.bc{bo=bo, bf=bf, bit=bit, fall=fall, LK=LK, addr=labelOp lab} + | setJumpTarget(I.INSTR(I.B{addr, LK}), lab) = I.b{addr=labelOp(lab), LK=LK} + | setJumpTarget _ = error "setJumpTarget" + + fun setBranchTargets{i=I.ANNOTATION{a,i}, t, f} = + I.ANNOTATION{a=a, i=setBranchTargets{i=i, t=t, f=f}} + | setBranchTargets{i=I.INSTR(I.BC{bo=I.ALWAYS, bf, bit, addr, fall, LK}), ...} = + error "setBranchTargets" + | setBranchTargets{i=I.INSTR(I.BC{bo, bf, bit, addr, fall, LK}), t, f} = + I.bc{bo=bo, bf=bf, bit=bit, LK=LK, addr=labelOp t, fall=labelOp f} + | setBranchTargets _ = error "setBranchTargets" + + fun jump lab = I.b{addr=I.LabelOp(T.LABEL lab), LK=false} + + fun negateConditional(I.ANNOTATION{a,i}, l) = + I.ANNOTATION{a=a, i=negateConditional(i, l)} + | negateConditional(I.INSTR(I.BC{bo, bf, bit, addr, fall, LK}), lab) = let + val bo' = (case bo + of I.TRUE => I.FALSE + | I.FALSE => I.TRUE + | I.ALWAYS => error "negateCondtional: ALWAYS" + | I.COUNTER{eqZero, cond=NONE} => I.COUNTER{eqZero=not eqZero, cond=NONE} + | I.COUNTER{eqZero, cond=SOME b} => error "negateConditional: COUNTER" + (*esac*)) + in + I.bc{bo=bo', bf=bf, bit=bit, addr=labelOp lab, fall=fall, LK=LK} + end + | negateConditional _ = error "negateConditional" + + val immedRange = {lo= ~32768, hi=32767} + + fun loadImmed{immed,t} = + I.arithi + {oper=I.ADDI, rt=t, ra=zeroR(), + im=if #lo immedRange <= immed andalso immed <= #hi immedRange + then I.ImmedOp immed else I.LabelOp(I.T.LI(IntInf.fromInt immed))} + fun loadOperand{opn,t} = + I.arithi{oper=I.ADDI, rt=t, ra=zeroR(), im=opn} + + + fun hashOpn(I.RegOp r) = CB.hashCell r + | hashOpn(I.ImmedOp i) = Word.fromInt i + | hashOpn(I.LabelOp l) = MLTreeHash.hash l + fun eqOpn(I.RegOp a,I.RegOp b) = CB.sameColor(a,b) + | eqOpn(I.ImmedOp a,I.ImmedOp b) = a = b + | eqOpn(I.LabelOp a,I.LabelOp b) = MLTreeEval.==(a,b) + | eqOpn _ = false + + fun defUseR instr = let + fun ppcDU instr = let + fun operand(I.RegOp r,use) = r::use + | operand(_,use) = use + in + case instr + of I.L{rt, ra, d, ...} => ([rt], operand(d,[ra])) + | I.LF{ra, d, ...} => ([], operand(d,[ra])) + | I.ST{rs, ra, d, ...} => ([], operand(d,[rs,ra])) + | I.STF{ra, d, ...} => ([], operand(d,[ra])) + | I.UNARY{rt, ra, ...} => ([rt], [ra]) + | I.ARITH{rt, ra, rb, ...} => ([rt], [ra,rb]) + | I.ARITHI{rt, ra, im, ...} => ([rt], operand(im,[ra])) + | I.ROTATE{ra, rs, sh, ...} => ([ra], [rs,sh]) + | I.ROTATEI{ra, rs, sh, ...} => ([ra], operand(sh,[rs])) + | I.COMPARE{ra, rb, ...} => ([], operand(rb,[ra])) + | I.MTSPR{rs, ...} => ([], [rs]) + | I.MFSPR{rt, ...} => ([rt], []) + | I.TW{to, ra, si} => ([], operand(si,[ra])) + | I.TD{to, ra, si} => ([], operand(si,[ra])) + | I.CALL{def, use, ...} => (C.getReg def, C.getReg use) + | I.LWARX{rt, ra, rb, ...} => ([rt], [ra, rb]) + | I.STWCX{rs, ra, rb, ...} => ([], [rs, ra, rb]) + | _ => ([], []) + end + in + case instr + of I.ANNOTATION{i, ...} => defUseR i + | I.LIVE{regs, ...} => ([], C.getReg regs) + | I.KILL{regs, ...} => (C.getReg regs, []) + | I.INSTR(i) => ppcDU(i) + | I.COPY{k, dst, src, tmp, ...} => let + val (d,u) = case k of CB.GP => (dst, src) | _ => ([], []) + in + case tmp + of SOME(I.Direct r) => (r::d, u) + | SOME(I.Displace{base, ...}) => (d, base::u) + | _ => (d,u) + end + end + + fun defUseF instr = let + fun ppcDU instr = + (case instr + of I.LF{ft, ...} => ([ft],[]) + | I.STF{fs, ...} => ([], [fs]) + | I.FCOMPARE{fa, fb, ...} => ([], [fa, fb]) + | I.FUNARY{ft, fb, ...} => ([ft], [fb]) + | I.FARITH{ft, fa, fb, ...} => ([ft], [fa, fb]) + | I.FARITH3{ft, fa, fb, fc, ...} => ([ft], [fa, fb, fc]) + | I.CALL{def, use, ...} => (C.getFreg def,C.getFreg use) + | _ => ([], []) + (*esac*)) + in + case instr + of I.ANNOTATION{i, ...} => defUseF i + | I.LIVE{regs, ...} => ([], C.getFreg regs) + | I.KILL{regs, ...} => (C.getFreg regs, []) + | I.INSTR(i) => ppcDU(i) + | I.COPY{k, dst, src, tmp, ...} => let + val (d, u) = case k of CB.FP => (dst, src) | _ => ([],[]) + in + case tmp + of SOME(I.FDirect f) => (f::d, u) + | _ => (d, u) + end + end + fun defUseCC instr = error "defUseCC: not implemented" + + fun defUse CB.GP = defUseR + | defUse CB.FP = defUseF + | defUse CB.CC = defUseCC + | defUse _ = error "defUse" + + (*======================================================================== + * Annotations + *========================================================================*) + fun getAnnotations(I.ANNOTATION{i,a}) = + let val (i,an) = getAnnotations i in (i,a::an) end + | getAnnotations i = (i,[]) + fun annotate(i,a) = I.ANNOTATION{i=i,a=a} + + (*======================================================================== + * Replicate an instruction + *========================================================================*) + fun replicate(I.ANNOTATION{i,a}) = I.ANNOTATION{i=replicate i,a=a} + | replicate(I.COPY{k, sz, tmp=SOME _, dst, src}) = + I.COPY{k=k, sz=sz, tmp=SOME(I.Direct(C.newReg())), dst=dst, src=src} + | replicate i = i +end + + diff --git a/MLRISC/ppc/instructions/ppcShuffle.sig b/MLRISC/ppc/instructions/ppcShuffle.sig new file mode 100644 index 0000000..ee295ec --- /dev/null +++ b/MLRISC/ppc/instructions/ppcShuffle.sig @@ -0,0 +1,8 @@ +signature PPCSHUFFLE = sig + structure I : PPCINSTR + + type t = {tmp:I.ea option, dst:CellsBasis.cell list, src:CellsBasis.cell list} + + val shuffle : t -> I.instruction list + val shufflefp : t -> I.instruction list +end diff --git a/MLRISC/ppc/instructions/ppcShuffle.sml b/MLRISC/ppc/instructions/ppcShuffle.sml new file mode 100644 index 0000000..9ccf0fb --- /dev/null +++ b/MLRISC/ppc/instructions/ppcShuffle.sml @@ -0,0 +1,31 @@ +functor PPCShuffle(I:PPCINSTR) = struct + structure I = I + structure Shuffle = Shuffle(I) + + type t = {tmp:I.ea option, dst:CellsBasis.cell list, src:CellsBasis.cell list} + + fun error msg = MLRiscErrorMsg.error("PPCShuffle",msg) + + (* WARNING: these move operators assume 32 bit addressing is used! + * Allen + *) + fun move{src=I.Direct rs, dst=I.Direct rd} = + [I.arith{oper=I.OR, rt=rd, ra=rs, rb=rs, Rc=false, OE=false}] + | move{src=I.Direct rs, dst=I.Displace{base, disp, mem}} = + [I.st{st=I.STW, rs=rs, ra=base, d=I.LabelOp disp, mem=mem}] + | move{src=I.Displace{base, disp, mem}, dst=I.Direct rt} = + [I.l{ld=I.LWZ, rt=rt, ra=base, d=I.LabelOp disp, mem=mem}] + | move _ = error "move" + + fun fmove{src=I.FDirect fs, dst=I.FDirect fd} = + [I.funary{oper=I.FMR, fb=fs, ft=fd, Rc=false}] + | fmove{src=I.FDirect fs, dst=I.Displace{base, disp, mem}} = + [I.stf{st=I.STFD, fs=fs, ra=base, d=I.LabelOp disp, mem=mem}] + | fmove{src=I.Displace{base, disp, mem}, dst=I.FDirect ft} = + [I.lf{ld=I.LFD, ft=ft, ra=base, d=I.LabelOp disp, mem=mem}] + | fmove _ = error "fmove" + + val shuffle = Shuffle.shuffle {mvInstr=move, ea=I.Direct} + + val shufflefp = Shuffle.shuffle {mvInstr=fmove, ea=I.FDirect} +end diff --git a/MLRISC/ppc/mltree/ppc.sml b/MLRISC/ppc/mltree/ppc.sml new file mode 100644 index 0000000..94d81f6 --- /dev/null +++ b/MLRISC/ppc/mltree/ppc.sml @@ -0,0 +1,820 @@ +(* ppc.sml + * + * COPYRIGHT (c) 2002 Bell Labs, Lucent Technologies + * + * I've substantially modified this code generator to support the new MLTREE. + * Please see the file README.hppa for the ugly details. + * + * -- Allen + *) + +functor PPC + (structure PPCInstr : PPCINSTR + structure PseudoInstrs : PPC_PSEUDO_INSTR + where I = PPCInstr + structure ExtensionComp : MLTREE_EXTENSION_COMP + where I = PPCInstr and T = PPCInstr.T + + (* + * Support 64 bit mode? + * This should be set to false for SML/NJ + *) + val bit64mode : bool + + (* + * Cost of multiplication in cycles + *) + val multCost : int ref + ) = +struct + structure I = PPCInstr + structure T = I.T + structure TS = ExtensionComp.TS + structure C = PPCInstr.C + structure CB = CellsBasis + structure W32 = Word32 + structure A = MLRiscAnnotations + structure CFG = ExtensionComp.CFG + + fun error msg = MLRiscErrorMsg.error("PPC",msg) + + type instrStream = (I.instruction, CB.CellSet.cellset, CFG.cfg) TS.stream + type mltreeStream = (T.stm, T.mlrisc list, CFG.cfg) TS.stream + + + val (intTy,naturalWidths) = if bit64mode then (64,[32,64]) else (32,[32]) + structure Gen = MLTreeGen + (structure T = T + structure Cells = C + val intTy = intTy + val naturalWidths = naturalWidths + datatype rep = SE | ZE | NEITHER + val rep = NEITHER + ) + + (* + * Special instructions + *) + fun MTLR r = I.MTSPR{rs=r, spr=C.lr} + fun MFLR r = I.MFSPR{rt=r, spr=C.lr} + val CR0 = C.Reg CB.CC 0 + val RET = I.BCLR{bo=I.ALWAYS, bf=CR0, bit=I.LT, LK=false, labels=[]} + fun SLLI32{r,i,d} = + I.ROTATEI{oper=I.RLWINM,ra=d,rs=r,sh=I.ImmedOp i,mb=0,me=SOME(31-i)} + fun SRLI32{r,i,d} = + I.ROTATEI{oper=I.RLWINM,ra=d,rs=r,sh=I.ImmedOp(Int.mod(32-i,32)),mb=i,me=SOME(31)} + fun COPY{dst, src, tmp} = + I.COPY{k=CB.GP, sz=32, dst=dst, src=src, tmp=tmp} + fun FCOPY{dst, src, tmp} = + I.COPY{k=CB.FP, sz=64, dst=dst, src=src, tmp=tmp} + + (* + * Integer multiplication + *) + functor Multiply32 = MLTreeMult + (structure I = I + structure T = T + structure CB = CellsBasis + val intTy = 32 + type arg = {r1:CB.cell,r2:CB.cell,d:CB.cell} + type argi = {r:CB.cell,i:int,d:CB.cell} + + fun mov{r,d} = COPY{dst=[d],src=[r],tmp=NONE} + fun add{r1,r2,d}= I.arith{oper=I.ADD,ra=r1,rb=r2,rt=d,Rc=false,OE=false} + fun slli{r,i,d} = [I.INSTR(SLLI32{r=r,i=i,d=d})] + fun srli{r,i,d} = [I.INSTR(SRLI32{r=r,i=i,d=d})] + fun srai{r,i,d} = [I.arithi{oper=I.SRAWI,rt=d,ra=r,im=I.ImmedOp i}] + ) + + structure Mulu32 = Multiply32 + (val trapping = false + val multCost = multCost + fun addv{r1,r2,d}=[I.arith{oper=I.ADD,ra=r1,rb=r2,rt=d,Rc=false,OE=false}] + fun subv{r1,r2,d}=[I.arith{oper=I.SUBF,ra=r2,rb=r1,rt=d,Rc=false,OE=false}] + val sh1addv = NONE + val sh2addv = NONE + val sh3addv = NONE + ) + (val signed = false) + + structure Muls32 = Multiply32 + (val trapping = false + val multCost = multCost + fun addv{r1,r2,d}=[I.arith{oper=I.ADD,ra=r1,rb=r2,rt=d,Rc=false,OE=false}] + fun subv{r1,r2,d}=[I.arith{oper=I.SUBF,ra=r2,rb=r1,rt=d,Rc=false,OE=false}] + val sh1addv = NONE + val sh2addv = NONE + val sh3addv = NONE + ) + (val signed = true) + + structure Mult32 = Multiply32 + (val trapping = true + val multCost = multCost + fun addv{r1,r2,d} = error "Mult32.addv" + fun subv{r1,r2,d} = error "Mult32.subv" + val sh1addv = NONE + val sh2addv = NONE + val sh3addv = NONE + ) + (val signed = true) + + fun selectInstructions + (instrStream as + TS.S.STREAM{emit=emitInstruction,comment,getAnnotations, + defineLabel,entryLabel,pseudoOp,annotation, + beginCluster,endCluster,exitBlock,...}) = + let + val emit = emitInstruction o I.INSTR + + (* mark an instruction with annotations *) + fun annotate(instr,[]) = instr + | annotate(instr,a::an) = annotate(I.ANNOTATION{i=instr,a=a},an) + fun mark'(instr, an) = emitInstruction(annotate(instr, an)) + fun mark(instr,an) = emitInstruction(annotate(I.INSTR instr,an)) + + (* Label where trap is generated. + * For overflow trapping instructions, we generate a branch + * to this label. + *) + val trapLabel : Label.label option ref = ref NONE + val zeroR = C.r0 + + val newReg = C.newReg + val newFreg = C.newFreg + val newCCreg = C.newCell CB.CC + + + fun LT (x,y) = T.I.LT(32, x, y) + fun LE (x,y) = T.I.LE(32, x, y) + fun toInt mi = T.I.toInt(32, mi) + fun LI i = T.I.fromInt(32, i) + + fun signed16 mi = LE(~0x8000, mi) andalso LT(mi, 0x8000) + fun signed12 mi = LE(~0x800, mi) andalso LT(mi, 0x800) + fun unsigned16 mi = LE(0, mi) andalso LT(mi, 0x10000) + fun unsigned5 mi = LE(0, mi) andalso LT(mi, 32) + fun unsigned6 mi = LE(0, mi) andalso LT(mi, 64) + + fun move(rs,rd,an) = + if CB.sameColor(rs,rd) then () + else mark'(COPY{dst=[rd],src=[rs],tmp=NONE},an) + + fun fmove(fs,fd,an) = + if CB.sameColor(fs,fd) then () + else mark'(FCOPY{dst=[fd],src=[fs],tmp=NONE},an) + + fun ccmove(ccs,ccd,an) = + if CB.sameColor(ccd,ccs) then () else mark(I.MCRF{bf=ccd, bfa=ccs},an) + + fun copy(dst, src, an) = + mark'(COPY{dst=dst, src=src, + tmp=case dst of [_] => NONE + | _ => SOME(I.Direct(newReg()))},an) + fun fcopy(dst, src, an) = + mark'(FCOPY{dst=dst, src=src, + tmp=case dst of [_] => NONE + | _ => SOME(I.FDirect(newFreg()))},an) + + fun emitBranch{bo, bf, bit, addr, LK} = + let val fallThrLab = Label.anon() + val fallThrOpnd = I.LabelOp(T.LABEL fallThrLab) + in + emit(I.BC{bo=bo, bf=bf, bit=bit, addr=addr, LK=LK, fall=fallThrOpnd}); + defineLabel fallThrLab + end + + fun split n = let + val wtoi = Word32.toIntX + val w = T.I.toWord32(32, n) + val hi = W32.~>>(w, 0w16) + val lo = W32.andb(w, 0w65535) + val (high, low) = + if W32.<(lo,0w32768) then (hi, lo) else (hi+0w1, lo-0w65536) + in + (wtoi high, wtoi low) + end + + fun loadImmedHiLo(0, lo, rt, an) = + mark(I.ARITHI{oper=I.ADDI, rt=rt, ra=zeroR, im=I.ImmedOp lo}, an) + | loadImmedHiLo(hi, lo, rt, an) = + (mark(I.ARITHI{oper=I.ADDIS, rt=rt, ra=zeroR, im=I.ImmedOp hi}, an); + if lo = 0 then () + else emit(I.ARITHI{oper=I.ADDI, rt=rt, ra=rt, im=I.ImmedOp lo})) + + fun loadImmed(n, rt, an) = + if signed16 n then + mark(I.ARITHI{oper=I.ADDI, rt=rt, ra=zeroR, im=I.ImmedOp(toInt(n))}, an) + else let val (hi, lo) = split n + in loadImmedHiLo(hi, lo, rt, an) + end + + fun loadLabexp(lexp, rt, an) = + mark(I.ARITHI{oper=I.ADDI, rt=rt, ra=zeroR, im=I.LabelOp lexp}, an) + + fun immedOpnd range (e1, e2 as T.LI i) = + (expr e1, if range i then I.ImmedOp(toInt i) else I.RegOp(expr e2)) + | immedOpnd _ (e1, x as T.CONST _) = (expr e1, I.LabelOp x) + | immedOpnd _ (e1, x as T.LABEL _) = (expr e1, I.LabelOp x) + | immedOpnd _ (e1, T.LABEXP lexp) = (expr e1, I.LabelOp lexp) + | immedOpnd _ (e1, e2) = (expr e1, I.RegOp(expr e2)) + + and commImmedOpnd range (e1 as T.LI _, e2) = + immedOpnd range (e2, e1) + | commImmedOpnd range (e1 as T.CONST _, e2) = + immedOpnd range (e2, e1) + | commImmedOpnd range (e1 as T.LABEL _, e2) = + immedOpnd range (e2, e1) + | commImmedOpnd range (e1 as T.LABEXP _, e2) = + immedOpnd range (e2, e1) + | commImmedOpnd range arg = immedOpnd range arg + + and eCommImm range (oper, operi, e1, e2, rt, an) = + (case commImmedOpnd range (e1, e2) + of (ra, I.RegOp rb) => + mark(I.ARITH{oper=oper, ra=ra, rb=rb, rt=rt, Rc=false, OE=false},an) + | (ra, opnd) => + mark(I.ARITHI{oper=operi, ra=ra, im=opnd, rt=rt},an) + (*esac*)) + + (* + * Compute a base/displacement effective address + *) + and addr(size,T.ADD(_, e, T.LI i)) = + let val ra = expr e + in if size i then (ra, I.ImmedOp(toInt i)) else + let val (hi, lo) = split i + val tmpR = newReg() + in emit(I.ARITHI{oper=I.ADDIS, rt=tmpR, ra=ra, im=I.ImmedOp hi}); + (tmpR, I.ImmedOp lo) + end + end + | addr(size,T.ADD(ty, T.LI i, e)) = addr(size,T.ADD(ty, e, T.LI i)) + | addr(size,exp as T.SUB(ty, e, T.LI i)) = + (addr(size,T.ADD(ty, e, T.LI (T.I.NEGT(32, i)))) + handle Overflow => (expr exp, I.ImmedOp 0)) + | addr(size,T.ADD(_, e1, e2)) = (expr e1, I.RegOp (expr e2)) + | addr(size,e) = (expr e, I.ImmedOp 0) + + (* convert mlrisc to cellset: *) + and cellset mlrisc = + let val addCCReg = CB.CellSet.add + fun g([],acc) = acc + | g(T.GPR(T.REG(_,r))::regs,acc) = g(regs,C.addReg(r,acc)) + | g(T.FPR(T.FREG(_,f))::regs,acc) = g(regs,C.addFreg(f,acc)) + | g(T.CCR(T.CC(_,cc))::regs,acc) = g(regs,addCCReg(cc,acc)) + | g(T.CCR(T.FCC(_,cc))::regs,acc) = g(regs,addCCReg(cc,acc)) + | g(_::regs, acc) = g(regs, acc) + in g(mlrisc, C.empty) end + + (* + * Translate a statement, and annotate it + *) + and stmt(T.MV(_, rd, e),an) = doExpr(e, rd, an) + | stmt(T.FMV(_, fd, e),an) = doFexpr(e, fd, an) + | stmt(T.CCMV(ccd, ccexp), an) = doCCexpr(ccexp, ccd, an) + | stmt(T.COPY(_, dst, src), an) = copy(dst, src, an) + | stmt(T.FCOPY(_, dst, src), an) = fcopy(dst, src, an) + | stmt(T.JMP(T.LABEXP lexp, labs),an) = + mark(I.B{addr=I.LabelOp lexp, LK=false},an) + | stmt(T.JMP(x as (T.LABEL _ | T.CONST _), labs),an) = + mark(I.B{addr=I.LabelOp x, LK=false},an) + | stmt(T.JMP(rexp, labs),an) = + let val rs = expr(rexp) + in emit(MTLR(rs)); + mark(I.BCLR{bo=I.ALWAYS,bf=CR0,bit=I.LT,LK=false,labels=labs},an) + end + | stmt(T.CALL{funct, targets, defs, uses, region, pops, ...}, an) = + call(funct, targets, defs, uses, region, [], an, pops) + | stmt(T.FLOW_TO(T.CALL{funct, targets, defs, uses, region, pops,...}, + cutTo), an) = + call(funct, targets, defs, uses, region, cutTo, an, pops) + | stmt(T.RET flow,an) = mark(RET,an) + | stmt(T.STORE(ty,ea,data,mem),an) = store(ty,ea,data,mem,an) + | stmt(T.FSTORE(ty,ea,data,mem),an) = fstore(ty,ea,data,mem,an) + | stmt(T.BCC(cc, lab),an) = branch(cc,lab,an) + | stmt(T.DEFINE l, _) = defineLabel l + | stmt(T.LIVE S, an) = mark'(I.LIVE{regs=cellset S,spilled=C.empty},an) + | stmt(T.KILL S, an) = mark'(I.KILL{regs=cellset S,spilled=C.empty},an) + | stmt(T.ANNOTATION(s,a),an) = stmt(s,a::an) + | stmt(T.EXT s,an) = ExtensionComp.compileSext(reducer()) {stm=s, an=an} + | stmt(s, _) = doStmts(Gen.compileStm s) + + and call(funct, targets, defs, uses, region, cutsTo, an, 0) = + let val defs=cellset(defs) + val uses=cellset(uses) + in emit(MTLR(expr funct)); + mark(I.CALL{def=defs, use=uses, cutsTo=cutsTo, mem=region}, an) + end + | call _ = error "pops<>0 not implemented" + + and branch(T.CMP(_, _, T.LI _, T.LI _), _, _) = error "branch(LI,LI)" + | branch(T.CMP(ty, cc, e1 as T.LI _, e2), lab, an) = + let val cc' = T.Basis.swapCond cc + in branch(T.CMP(ty, cc', e2, e1), lab, an) + end + | branch(cmp as T.CMP(ty, cond, e1, e2), lab, an) = let + val (bo, cf) = + (case cond + of T.LT => (I.TRUE, I.LT) + | T.LE => (I.FALSE, I.GT) + | T.EQ => (I.TRUE, I.EQ) + | T.NE => (I.FALSE, I.EQ) + | T.GT => (I.TRUE, I.GT) + | T.GE => (I.FALSE, I.LT) + | T.LTU => (I.TRUE, I.LT) + | T.LEU => (I.FALSE, I.GT) + | T.GTU => (I.TRUE, I.GT) + | T.GEU => (I.FALSE, I.LT) + | (T.SETCC | T.MISC_COND _) => error "branch(CMP)" + (*esac*)) + val ccreg = if true then CR0 else newCCreg() (* XXX *) + val addr = I.LabelOp(T.LABEL lab) + fun default() = + (doCCexpr(cmp, ccreg, []); + emitBranch{bo=bo, bf=ccreg, bit=cf, addr=addr, LK=false}) + in + case (e1, e2) + of (T.ANDB(_, a1, a2), T.LI z) => + if z = 0 then + (case commImmedOpnd unsigned16 (a1, a2) + of (ra, I.RegOp rb) => + emit(I.ARITH{oper=I.AND, ra=ra, rb=rb, rt=newReg(), Rc=true, OE=false}) + | (ra, opnd) => + emit(I.ARITHI{oper=I.ANDI_Rc, ra=ra, im=opnd, rt=newReg()}) + (*esac*); + branch(T.CC(cond, CR0), lab, an)) + else + default() + | _ => + default() + end + | branch(T.CC(cc, cr), lab, an) = + let val addr=I.LabelOp(T.LABEL lab) + fun branch(bo, bit) = + emitBranch{bo=bo, bf=cr, bit=bit, addr=addr, LK=false} + in case cc of + T.EQ => branch(I.TRUE, I.EQ) + | T.NE => branch(I.FALSE, I.EQ) + | (T.LT | T.LTU) => branch(I.TRUE, I.LT) + | (T.LE | T.LEU) => branch(I.FALSE, I.GT) + | (T.GE | T.GEU) => branch(I.FALSE, I.LT) + | (T.GT | T.GTU) => branch(I.TRUE, I.GT) + | (T.SETCC | T.MISC_COND _) => error "branch(CC)" + end + | branch(cmp as T.FCMP(fty, cond, _, _), lab, an) = + let val ccreg = if true then CR0 else newCCreg() (* XXX *) + val labOp = I.LabelOp(T.LABEL lab) + fun branch(bo, bf, bit) = + emitBranch{bo=bo, bf=bf, bit=bit, addr=labOp, LK=false} + fun test2bits(bit1, bit2) = + let val ba=(ccreg, bit1) + val bb=(ccreg, bit2) + val bt=(ccreg, I.FL) + in emit(I.CCARITH{oper=I.CROR, bt=bt, ba=ba, bb=bb}); + branch(I.TRUE, ccreg, I.FL) + end + in doCCexpr(cmp, ccreg, []); + case cond of + T.== => branch(I.TRUE, ccreg, I.FE) + | T.?<> => branch(I.FALSE, ccreg, I.FE) + | T.? => branch(I.TRUE, ccreg, I.FU) + | T.<=> => branch(I.FALSE, ccreg, I.FU) + | T.> => branch(I.TRUE, ccreg, I.FG) + | T.>= => test2bits(I.FG, I.FE) + | T.?> => test2bits(I.FU, I.FG) + | T.?>= => branch(I.FALSE, ccreg, I.FL) + | T.< => branch(I.TRUE, ccreg, I.FL) + | T.<= => test2bits(I.FL, I.FE) + | T.?< => test2bits(I.FU, I.FL) + | T.?<= => branch(I.FALSE, ccreg, I.FG) + | T.<> => test2bits(I.FL, I.FG) + | T.?= => test2bits(I.FU, I.FE) + | (T.SETFCC | T.MISC_FCOND _) => error "branch(FCMP)" + (*esac*) + end + | branch _ = error "branch" + + and doStmt s = stmt(s,[]) + + and doStmts ss = app doStmt ss + + (* Emit an integer store *) + and store(ty, ea, data, mem, an) = + let val (st,size) = case (ty,Gen.Size.size ea) of + (8,32) => (I.STB,signed16) + | (8,64) => (I.STBE,signed12) + | (16,32) => (I.STH,signed16) + | (16,64) => (I.STHE,signed12) + | (32,32) => (I.STW,signed16) + | (32,64) => (I.STWE,signed12) + | (64,64) => (I.STDE,signed12) + | _ => error "store" + val (r, disp) = addr(size,ea) + in mark(I.ST{st=st, rs=expr data, ra=r, d=disp, mem=mem}, an) end + + (* Emit a floating point store *) + and fstore(ty, ea, data, mem, an) = + let val (st,size) = case (ty,Gen.Size.size ea) of + (32,32) => (I.STFS,signed16) + | (32,64) => (I.STFSE,signed12) + | (64,32) => (I.STFD,signed16) + | (64,64) => (I.STFDE,signed12) + | _ => error "fstore" + val (r, disp) = addr(size,ea) + in mark(I.STF{st=st,fs=fexpr data, ra=r, d=disp, mem=mem},an) end + + and subfImmed(i, ra, rt, an) = + if signed16 i then + mark(I.ARITHI{oper=I.SUBFIC, rt=rt, ra=ra, im=I.ImmedOp(toInt i)}, an) + else + mark(I.ARITH{oper=I.SUBF, rt=rt, ra=ra, rb=expr(T.LI i), + Rc=false, OE=false}, an) + + (* Generate an arithmetic instruction *) + and arith(oper, e1, e2, rt, an) = + mark(I.ARITH{oper=oper,ra=expr e1,rb=expr e2,rt=rt,OE=false,Rc=false}, + an) + + (* Generate a trapping instruction *) + and arithTrapping(oper, e1, e2, rt, an) = + let val ra = expr e1 val rb = expr e2 + in mark(I.ARITH{oper=oper,ra=ra,rb=rb,rt=rt,OE=true,Rc=true},an); + overflowTrap() + end + + (* Generate an overflow trap *) + and overflowTrap() = + let val label = case !trapLabel of + NONE => let val l = Label.anon() + in trapLabel := SOME l; l end + | SOME l => l + in emitBranch{bo=I.TRUE, bf=CR0, bit=I.SO, LK=false, + addr=I.LabelOp(T.LABEL label)} + end + + (* Generate a load and annotate the instruction *) + and load(ld32, ld64, ea, mem, rt, an) = + let val (ld,size) = + if bit64mode andalso Gen.Size.size ea = 64 + then (ld64,signed12) + else (ld32,signed16) + val (r, disp) = addr(size,ea) + in mark(I.L{ld=ld, rt=rt, ra=r, d=disp, mem=mem},an) + end + + (* Generate a SRA shift operation and annotate the instruction *) + and sra(oper, operi, e1, e2, rt, an) = + case immedOpnd unsigned5 (e1, e2) of + (ra, I.RegOp rb) => + mark(I.ARITH{oper=oper,rt=rt,ra=ra,rb=rb,Rc=false,OE=false},an) + | (ra, rb) => + mark(I.ARITHI{oper=operi, rt=rt, ra=ra, im=rb},an) + + (* Generate a SRL shift operation and annotate the instruction *) + and srl32(e1, e2, rt, an) = + case immedOpnd unsigned5 (e1, e2) of + (ra, I.ImmedOp n) => + mark(SRLI32{r=ra,i=n,d=rt},an) + | (ra, rb) => + mark(I.ARITH{oper=I.SRW,rt=rt,ra=ra,rb=reduceOpn rb, + Rc=false,OE=false},an) + + and sll32(e1, e2, rt, an) = + case immedOpnd unsigned5 (e1, e2) of + (ra, rb as I.ImmedOp n) => + mark(SLLI32{r=ra,i=n,d=rt},an) + | (ra, rb) => + mark(I.ARITH{oper=I.SLW,rt=rt,ra=ra,rb=reduceOpn rb, + Rc=false,OE=false},an) + + (* Generate a subtract operation *) + and subtract(ty, e1, e2 as T.LI i, rt, an) = + (doExpr(T.ADD(ty, e1, T.LI (T.I.NEGT(32, i))), rt, an) + handle Overflow => + mark(I.ARITH{oper=I.SUBF, rt=rt, ra=expr e2, + rb=expr e1, OE=false, Rc=false}, an) + ) + | subtract(ty, T.LI i, e2, rt, an) = subfImmed(i, expr e2, rt, an) + | subtract(ty, x as (T.CONST _ | T.LABEL _), e2, rt, an) = + mark(I.ARITHI{oper=I.SUBFIC,rt=rt,ra=expr e2, + im=I.LabelOp x},an) + | subtract(ty, e1, e2, rt, an) = + let val rb = expr e1 val ra = expr e2 + in mark(I.ARITH{oper=I.SUBF,rt=rt,ra=ra,rb=rb,Rc=false,OE=false},an) + end + + (* Generate optimized multiplication code *) + and multiply(ty,oper,operi,genMult,e1,e2,rt,an) = + let fun nonconst(e1,e2) = + [annotate( + case commImmedOpnd signed16 (e1,e2) of + (ra,I.RegOp rb) => + I.arith{oper=oper,ra=ra,rb=rb,rt=rt,OE=false,Rc=false} + | (ra,im) => I.arithi{oper=operi,ra=ra,im=im,rt=rt}, + an)] + fun const(e,i) = + let val r = expr e + in genMult{r=r,i=toInt(i),d=rt} + handle _ => nonconst(T.REG(ty,r),T.LI i) + end + val instrs = + case (e1,e2) of + (_,T.LI i) => const(e1,i) + | (T.LI i,_) => const(e2,i) + | _ => nonconst(e1,e2) + in app emitInstruction instrs end + + and divu32 x = Mulu32.divide{mode=T.TO_ZERO,stm=doStmt} x + + and divs32 x = Muls32.divide{mode=T.TO_ZERO,stm=doStmt} x + + and divt32 x = Mult32.divide{mode=T.TO_ZERO,stm=doStmt} x + + (* Generate optimized division code *) + and divide(ty,oper,genDiv,e1,e2,rt,overflow,an) = + let fun nonconst(e1,e2) = + (mark(I.ARITH{oper=oper,ra=expr e1,rb=expr e2,rt=rt, + OE=overflow,Rc=overflow},an); + if overflow then overflowTrap() else () + ) + fun const(e,i) = + let val r = expr e + in app emitInstruction (genDiv{r=r,i=toInt(i),d=rt}) + handle _ => nonconst(T.REG(ty,r),T.LI i) + end + in case (e1,e2) of + (_,T.LI i) => const(e1,i) + | _ => nonconst(e1,e2) + end + + (* Reduce an operand into a register *) + and reduceOpn(I.RegOp r) = r + | reduceOpn opn = + let val rt = newReg() + in emit(I.ARITHI{oper=I.ADDI, rt=rt, ra=zeroR, im=opn}); + rt + end + + (* Reduce an expression, and returns the register that holds + * the value. + *) + and expr(rexp as T.REG(_,r)) = + if CB.sameColor(C.lr, r) then + let val rt = newReg() + in doExpr(rexp, rt, []); rt end + else r + | expr(rexp) = + let val rt = newReg() + in doExpr(rexp, rt, []); rt end + + (* doExpr(e, rt, an) -- + * reduce the expression e, assigns it to rd, + * and annotate the expression with an + *) + and doExpr(e, rt, an) = + if CB.sameColor(rt,C.lr) then + let val rt = newReg() in doExpr(e,rt,[]); mark(MTLR rt,an) end + else + case e of + T.REG(_,rs) => if CB.sameColor(rs,C.lr) then mark(MFLR rt,an) + else move(rs,rt,an) + | T.LI i => loadImmed(i, rt, an) + | T.LABEXP lexp => loadLabexp(lexp, rt, an) + | T.CONST _ => loadLabexp(e, rt, an) + | T.LABEL _ => loadLabexp(e, rt, an) + + (* All data widths *) + | T.ADD(_, e1, e2) => eCommImm signed16 (I.ADD,I.ADDI,e1,e2,rt,an) + | T.SUB(ty, e1, e2) => subtract(ty, e1, e2, rt, an) + + (* Special PPC bit operations *) + | T.ANDB(_,e1,T.NOTB(_,e2)) => arith(I.ANDC,e1,e2,rt,an) + | T.ORB(_,e1,T.NOTB(_,e2)) => arith(I.ORC,e1,e2,rt,an) + | T.XORB(_,e1,T.NOTB(_,e2)) => arith(I.EQV,e1,e2,rt,an) + | T.EQVB(_,e1,e2) => arith(I.EQV,e1,e2,rt,an) + | T.ANDB(_,T.NOTB(_,e1),e2) => arith(I.ANDC,e2,e1,rt,an) + | T.ORB(_,T.NOTB(_,e1),e2) => arith(I.ORC,e2,e1,rt,an) + | T.XORB(_,T.NOTB(_,e1),e2) => arith(I.EQV,e2,e1,rt,an) + | T.NOTB(_,T.ANDB(_,e1,e2)) => arith(I.NAND,e1,e2,rt,an) + | T.NOTB(_,T.ORB(_,e1,e2)) => arith(I.NOR,e1,e2,rt,an) + | T.NOTB(_,T.XORB(_,e1,e2)) => arith(I.EQV,e1,e2,rt,an) + + | T.ANDB(_, e1, e2) => + eCommImm unsigned16(I.AND,I.ANDI_Rc,e1,e2,rt,an) + | T.ORB(_, e1, e2) => eCommImm unsigned16(I.OR,I.ORI,e1,e2,rt,an) + | T.XORB(_, e1, e2) => eCommImm unsigned16(I.XOR,I.XORI,e1,e2,rt,an) + + (* 32 bit support *) + | T.MULU(32, e1, e2) => multiply(32,I.MULLW,I.MULLI, + Mulu32.multiply,e1,e2,rt,an) + | T.DIVU(32, e1, e2) => divide(32,I.DIVWU,divu32,e1,e2,rt,false,an) + + | T.MULS(32, e1, e2) => multiply(32,I.MULLW,I.MULLI, + Muls32.multiply,e1,e2,rt,an) + | T.DIVS(T.DIV_TO_ZERO, 32, e1, e2) => + (* On the PPC we turn overflow checking on despite this + * being DIVS. That's because divide-by-zero is also + * indicated through "overflow" instead of causing a trap. *) + divide(32,I.DIVW,divs32,e1,e2,rt, + true (* !! *), + an) + + | T.ADDT(32, e1, e2) => arithTrapping(I.ADD, e1, e2, rt, an) + | T.SUBT(32, e1, e2) => arithTrapping(I.SUBF, e2, e1, rt, an) + | T.MULT(32, e1, e2) => arithTrapping(I.MULLW, e1, e2, rt, an) + | T.DIVT(T.DIV_TO_ZERO, 32, e1, e2) => + divide(32,I.DIVW,divt32,e1,e2,rt,true,an) + + | T.SRA(32, e1, e2) => sra(I.SRAW, I.SRAWI, e1, e2, rt, an) + | T.SRL(32, e1, e2) => srl32(e1, e2, rt, an) + | T.SLL(32, e1, e2) => sll32(e1, e2, rt, an) + + (* 64 bit support *) + | T.SRA(64, e1, e2) => sra(I.SRAD, I.SRADI, e1, e2, rt, an) + (*| T.SRL(64, e1, e2) => srl(32, I.SRD, I.RLDINM, e1, e2, rt, an) + | T.SLL(64, e1, e2) => sll(32, I.SLD, I.RLDINM, e1, e2, rt, an)*) + + (* loads *) + | T.LOAD(8,ea,mem) => load(I.LBZ,I.LBZE,ea,mem,rt,an) + | T.LOAD(16,ea, mem) => load(I.LHZ,I.LHZE,ea,mem,rt,an) + | T.LOAD(32,ea, mem) => load(I.LWZ,I.LWZE,ea,mem,rt,an) + | T.LOAD(64,ea, mem) => load(I.LDE,I.LDE,ea,mem,rt,an) + + (* Conditional expression *) + | T.COND exp => + doStmts(Gen.compileCond{exp=exp,an=an,rd=rt}) + + (* Misc *) + | T.LET(s,e) => (doStmt s; doExpr(e, rt, an)) + | T.MARK(e, A.MARKREG f) => (f rt; doExpr(e,rt,an)) + | T.MARK(e, a) => doExpr(e,rt,a::an) + | T.REXT e => ExtensionComp.compileRext (reducer()) {e=e,rd=rt,an=an} + | e => doExpr(Gen.compileRexp e,rt,an) + + (* Generate a floating point load *) + and fload(ld32, ld64, ea, mem, ft, an) = + let val (ld,size) = + if bit64mode andalso Gen.Size.size ea = 64 then (ld64,signed12) + else (ld32,signed16) + val (r, disp) = addr(size,ea) + in mark(I.LF{ld=ld, ft=ft, ra=r, d=disp, mem=mem}, an) end + + (* Generate a floating-point binary operation *) + and fbinary(oper, e1, e2, ft, an) = + mark(I.FARITH{oper=oper,fa=fexpr e1,fb=fexpr e2,ft=ft,Rc=false}, an) + + (* Generate a floating-point 3-operand operation + * These are of the form + * +/- e1 * e3 +/- e2 + *) + and f3(oper, e1, e2, e3, ft, an) = + mark(I.FARITH3{oper=oper,fa=fexpr e1,fb=fexpr e2,fc=fexpr e3, + ft=ft,Rc=false}, an) + + (* Generate a floating-point unary operation *) + and funary(oper, e, ft, an) = + mark(I.FUNARY{oper=oper, ft=ft, fb=fexpr e, Rc=false}, an) + + (* Reduce the expression fexp, return the register that holds + * the value. + *) + and fexpr(T.FREG(_,f)) = f + | fexpr(e) = + let val ft = newFreg() + in doFexpr(e, ft, []); ft end + + (* doExpr(fexp, ft, an) -- + * reduce the expression fexp, and assigns + * it to ft. Also annotate fexp. + *) + and doFexpr(e, ft, an) = + case e of + T.FREG(_,fs) => fmove(fs,ft,an) + + (* Single precision support *) + | T.FLOAD(32, ea, mem) => fload(I.LFS,I.LFSE,ea,mem,ft,an) + + (* special 3 operand floating point arithmetic *) + | T.FADD(32,T.FMUL(32,a,c),b) => f3(I.FMADDS,a,b,c,ft,an) + | T.FADD(32,b,T.FMUL(32,a,c)) => f3(I.FMADDS,a,b,c,ft,an) + | T.FSUB(32,T.FMUL(32,a,c),b) => f3(I.FMSUBS,a,b,c,ft,an) + | T.FSUB(32,b,T.FMUL(32,a,c)) => f3(I.FNMSUBS,a,b,c,ft,an) + | T.FNEG(32,T.FADD(32,T.FMUL(32,a,c),b)) => f3(I.FNMADDS,a,b,c,ft,an) + | T.FNEG(32,T.FADD(32,b,T.FMUL(32,a,c))) => f3(I.FNMADDS,a,b,c,ft,an) + | T.FSUB(32,T.FNEG(32,T.FMUL(32,a,c)),b) => f3(I.FNMADDS,a,b,c,ft,an) + + | T.FADD(32, e1, e2) => fbinary(I.FADDS, e1, e2, ft, an) + | T.FSUB(32, e1, e2) => fbinary(I.FSUBS, e1, e2, ft, an) + | T.FMUL(32, e1, e2) => fbinary(I.FMULS, e1, e2, ft, an) + | T.FDIV(32, e1, e2) => fbinary(I.FDIVS, e1, e2, ft, an) + + (* Double precision support *) + | T.FLOAD(64, ea, mem) => fload(I.LFD,I.LFDE,ea,mem,ft,an) + + (* special 3 operand floating point arithmetic *) + | T.FADD(64,T.FMUL(64,a,c),b) => f3(I.FMADD,a,b,c,ft,an) + | T.FADD(64,b,T.FMUL(64,a,c)) => f3(I.FMADD,a,b,c,ft,an) + | T.FSUB(64,T.FMUL(64,a,c),b) => f3(I.FMSUB,a,b,c,ft,an) + | T.FSUB(64,b,T.FMUL(64,a,c)) => f3(I.FNMSUB,a,b,c,ft,an) + | T.FNEG(64,T.FADD(64,T.FMUL(64,a,c),b)) => f3(I.FNMADD,a,b,c,ft,an) + | T.FNEG(64,T.FADD(64,b,T.FMUL(64,a,c))) => f3(I.FNMADD,a,b,c,ft,an) + | T.FSUB(64,T.FNEG(64,T.FMUL(64,a,c)),b) => f3(I.FNMADD,a,b,c,ft,an) + + | T.FADD(64, e1, e2) => fbinary(I.FADD, e1, e2, ft, an) + | T.FSUB(64, e1, e2) => fbinary(I.FSUB, e1, e2, ft, an) + | T.FMUL(64, e1, e2) => fbinary(I.FMUL, e1, e2, ft, an) + | T.FDIV(64, e1, e2) => fbinary(I.FDIV, e1, e2, ft, an) + | T.CVTI2F(64,_,e) => + app emitInstruction (PseudoInstrs.cvti2d{reg=expr e,fd=ft}) + + (* Single/double precision support *) + | T.FABS((32|64), e) => funary(I.FABS, e, ft, an) + | T.FNEG((32|64), e) => funary(I.FNEG, e, ft, an) + | T.FSQRT(32, e) => funary(I.FSQRTS, e, ft, an) + | T.FSQRT(64, e) => funary(I.FSQRT, e, ft, an) + + | T.CVTF2F(64,32,e) => doFexpr(e,ft,an) (* 32->64 is a no-op *) + | T.CVTF2F(32,32,e) => doFexpr(e,ft,an) + | T.CVTF2F(64,64,e) => doFexpr(e,ft,an) + | T.CVTF2F(32,64,e) => funary(I.FRSP,e,ft,an) + + (* Misc *) + | T.FMARK(e, A.MARKREG f) => (f ft; doFexpr(e,ft,an)) + | T.FMARK(e, a) => doFexpr(e,ft,a::an) + | T.FEXT e => ExtensionComp.compileFext (reducer()) {e=e,fd=ft,an=an} + | _ => error "doFexpr" + + and ccExpr(T.CC(_,cc)) = cc + | ccExpr(T.FCC(_,cc)) = cc + | ccExpr(ccexp) = + let val cc = newCCreg() + in doCCexpr(ccexp,cc,[]); cc end + + (* Reduce an condition expression, and assigns the result to ccd *) + and doCCexpr(ccexp, ccd, an) = + case ccexp of + T.CMP(ty, cc, e1, e2) => + let val (opnds, cmp) = + case cc of + (T.LT | T.LE | T.EQ | T.NE | T.GT | T.GE) => + (immedOpnd signed16, I.CMP) + | _ => (immedOpnd unsigned16, I.CMPL) + val (opndA, opndB) = opnds(e1, e2) + val l = case ty of + 32 => false + | 64 => true + | _ => error "doCCexpr" + in mark(I.COMPARE{cmp=cmp, l=l, bf=ccd, ra=opndA, rb=opndB},an) + end + | T.FCMP(fty, fcc, e1, e2) => + mark(I.FCOMPARE{cmp=I.FCMPU, bf=ccd, fa=fexpr e1, fb=fexpr e2},an) + | T.CC(_,cc) => ccmove(cc,ccd,an) + | T.CCMARK(cc,A.MARKREG f) => (f ccd; doCCexpr(cc,ccd,an)) + | T.CCMARK(cc,a) => doCCexpr(cc,ccd,a::an) + | T.CCEXT e => + ExtensionComp.compileCCext (reducer()) {e=e, ccd=ccd, an=an} + | _ => error "doCCexpr: Not implemented" + + and emitTrap() = emit(I.TW{to=31,ra=zeroR,si=I.ImmedOp 0}) + + and beginCluster' _ = + (trapLabel := NONE; beginCluster 0) + + and endCluster' a = + (case !trapLabel of + SOME label => + (defineLabel label; emitTrap(); trapLabel := NONE) + | NONE => (); + endCluster a) + + and reducer() = + TS.REDUCER{reduceRexp = expr, + reduceFexp = fexpr, + reduceCCexp = ccExpr, + reduceStm = stmt, + operand = (fn _ => error "operand"), + reduceOperand = reduceOpn, + addressOf = (fn _ => error "addressOf"), + emit = emitInstruction o annotate, + instrStream = instrStream, + mltreeStream = self() + } + and self() = + TS.S.STREAM + { beginCluster = beginCluster', + endCluster = endCluster', + emit = doStmt, + pseudoOp = pseudoOp, + defineLabel = defineLabel, + entryLabel = entryLabel, + comment = comment, + annotation = annotation, + getAnnotations=getAnnotations, + exitBlock = fn mlrisc => exitBlock(cellset mlrisc) + } + in self() + end + +end + diff --git a/MLRISC/ppc/mltree/ppcPseudoInstr.sig b/MLRISC/ppc/mltree/ppcPseudoInstr.sig new file mode 100644 index 0000000..8974b9c --- /dev/null +++ b/MLRISC/ppc/mltree/ppcPseudoInstr.sig @@ -0,0 +1,6 @@ +signature PPC_PSEUDO_INSTR = sig + structure I : PPCINSTR + + val cvti2d : {reg:CellsBasis.cell, fd:CellsBasis.cell} -> I.instruction list + (* cvti2d(reg) -- convert integer held reg to 64 bit float held in fd *) +end diff --git a/MLRISC/ppc/ppc.mdl b/MLRISC/ppc/ppc.mdl new file mode 100644 index 0000000..8b00ed5 --- /dev/null +++ b/MLRISC/ppc/ppc.mdl @@ -0,0 +1,789 @@ +(* + * This is the machine description file of PowerPC derived from Lal's code. + * I have no idea what the instructions do so it probably won't work. + * + * Note: I've now added lots of instructions for 64-bit and single precision + * floating point support. + * + * I'm using Book E: PowerPC Architecture Enhanced for Embedded Applications + * as the reference + * + * -- Allen + *) + +architecture PPC = +struct + + superscalar + + big endian + + lowercase assembly + + storage + GP = $r[32] of 64 bits asm: ( + fn (r,_) => if !PPCAsmSyntax.ibm_syntax + then Int.toString r + else "r"^Int.toString r) + | FP = $f[32] of 64 bits asm: ( + fn (f,_) => if !PPCAsmSyntax.ibm_syntax + then Int.toString f + else "f"^Int.toString r) + | CC = $cc[8] of 4 bits asm: (fn (cr,_) => "cr"^Int.toString cr) + | SPR = $spr[32] of 64 bits + asm: (fn (1,_) => "xer" + | (8,_) => "lr" + | (9,_) => "ctr" + | (r,_) => Int.toString r + ) + | MEM = $m[] of 8 aggregable bits asm: (fn (r,_) => "m"^Int.toString r) + | CTRL = $ctrl[] of 8 bits asm: (fn (r,_) => "ctrl"^Int.toString r) + + locations + stackptrR = $r[1] + and asmTmpR = $r[28] + and fasmTmp = $f[0] + and r0 = $r[0] + + (* the encoding of these are from page 372 *) + and xer = $spr[1] (* Integer exception register *) + and lr = $spr[8] (* Link register *) + and ctr = $spr[9] (* counter register *) + + structure RTL = + struct + end + + structure Instruction = + struct + type gpr = int (* general purpose register *) + type fpr = int (* floating point register *) + type ccr = int (* condition code register *) + type crf = int (* condition register field *) + + datatype spr! = XER | LR | CTR + + datatype operand = + RegOp of $GP ``'' (emit_GP GP) rtl: $r[GP] + | ImmedOp of int ``'' (itow int) rtl: immed int + | LabelOp of T.labexp ``'' + (itow(MLTreeEval.valueOf labexp)) + + type addressing_mode = CellsBasis.cell * operand + + datatype ea = + Direct of $GP + | FDirect of $FP + | Displace of {base: $GP, disp: T.labexp, mem: Region.region} + + (* Load/store operators that have the E suffix means that 64-bit + * addressing is used. Note: the x suffix is implicitly added if rb is a + * register. + * + * -- Allen + *) + + + datatype load! = LBZ (* load byte and zero *) + | LBZE + | LHZ (* load half word and zero *) + | LHZE + | LHA (* load half word algebraic *) + | LHAE + | LWZ (* load word and zero *) + | LWZE + | LDE (* load double word extended + * Note: there is no LD or LDX!!! + *) + + | LBZU + | LHZU + | LHAU + | LWZU + | LDZU + + datatype store! = STB + | STBE + | STH + | STHE + | STW + | STWE + | STDE + + | STBU + | STHU + | STWU + | STDU + + datatype fload! = LFS + | LFSE + | LFD + | LFDE + + | LFSU + | LFDU + + datatype fstore! = STFS + | STFSE + | STFD + | STFDE + + | STFSU + | STFDU + + datatype cmp! = CMP | CMPL + + datatype fcmp! = FCMPO 0w32 (* ordered *) + | FCMPU 0w0 (* unordered *) + + (* xo *) + datatype unary! = NEG 0w104 + | EXTSB 0w954 (* extend sign byte *) + | EXTSH 0w922 (* extend sign halfword *) + | EXTSW 0w986 (* extend sign word *) + | CNTLZW 0w26 (* count leading zeros word *) + | CNTLZD 0w58 (* count leading zeros double word *) + + + (* opcd/xo *) + datatype funary! = FMR (0w63,0w72) + | FNEG (0w63,0w40) + | FABS (0w63,0w264) + | FNABS (0w63,0w136) + | FSQRT (0w63,0w22) + | FSQRTS (0w59,0w22) + | FRSP (0w63,0w12) (* round to single precision *) + | FCTIW (0w63,0w14) (* convert to integer word *) + | FCTIWZ (0w63,0w15) (* convert to integer word *) + | FCTID (0w63,0w814) (* convert to double word *) + | FCTIDZ (0w63,0w815) (* convert to double word *) + | FCFID (0w63,0w846) (* convert from double word *) + + (* opcd/xo *) + datatype farith! = FADD (0w63,0w21) + | FSUB (0w63,0w20) + | FMUL (0w63,0w25) + | FDIV (0w63,0w18) + | FADDS (0w59,0w21) + | FSUBS (0w59,0w20) + | FMULS (0w59,0w25) + | FDIVS (0w59,0w18) + + (* opcd, xo *) + datatype farith3! = FMADD (0w63,0w29) + | FMADDS (0w59,0w29) + | FMSUB (0w63,0w28) + | FMSUBS (0w59,0w28) + | FNMADD (0w63,0w31) + | FNMADDS (0w59,0w31) + | FNMSUB (0w63,0w30) + | FNMSUBS (0w59,0w30) + | FSEL (0w63,0w23) (* floating point select *) + + datatype bo = + TRUE 0wb01100 (* 011zy *) + | FALSE 0wb00100 (* 001zy *) + | ALWAYS 0wb10100 (* 1z1zz *) + | COUNTER of {eqZero:bool, cond:bool option} + (case cond of + NONE => if eqZero then 0wb10010 (* 1z01y *) + else 0wb10000 (* 1z00y *) + | SOME cc => case (eqZero,cc) of + (false,false) => 0wb00000 (* 0000y *) + | (false,true) => 0wb01000 (* 0100y *) + | (true,false) => 0wb00010 (* 0001y *) + | (true,true) => 0wb01010 (* 0101y *) + ) + + (* operation ARITH ARITHI *) + datatype arith! = (* --------- ----- ------ *) + (* xo *) + ADD 0w266 (* add add addi *) + | SUBF 0w40 (* subtract from subf subfic *) + | MULLW 0w235 (* multiply mullw mulli *) + | MULLD 0w233 (* multiply double word mulld - *) + | MULHW 0w75 (* multiply high word mulhw - *) + | MULHWU 0w11 (* multiply high word unsigned mulhwu - *) + | DIVW 0w491 (* divide word divw - *) + | DIVD 0w489 (* divide doubleword divd - *) + | DIVWU 0w459 (* divide word unsigned divwu - *) + | DIVDU 0w457 (* divide doubleword unsigned divdu - *) + | AND 0w28 (* and and andi *) + | OR 0w444 (* or or ori *) + | XOR 0w316 (* xor xor xori *) + | NAND 0w476 (* nand *) + | NOR 0w124 (* nor *) + | EQV 0w284 (* eqv *) + | ANDC 0w60 (* and with complement andc - *) + | ORC 0w412 (* or with complement orc - *) + | SLW 0w24 (* shift left word slw rlwinm *) + | SLD 0w27 (* shift left double word sld rldinm *) + | SRW 0w536 (* shift right word srw rlwinm *) + | SRD 0w539 (* shift right double word srd rldinm *) + | SRAW 0w792 (* shift right algebraic word sraw srawi *) + | SRAD 0w794 (* shift right algebraic dword srad sradi *) + + datatype arithi! = (* --------- ----- ------ *) + (* opcd *) + ADDI 0w14 (* add add addi *) + | ADDIS 0w15 (* add-shifted - addis *) + | SUBFIC 0w8 (* subtract from subf subfic *) + | MULLI 0w7 (* multiply mullw mulli *) + | ANDI_Rc "andi." 0w28 (* and and andi *) + | ANDIS_Rc "andis." 0w29(* and-shifted - andis *) + | ORI 0w24 (* or or ori *) + | ORIS 0w25 (* or-shifted - ori *) + | XORI 0w26 (* xor xor xori *) + | XORIS 0w27 (* xor-shifted - xoris *) + (* + | SLWI (* !!! *) (* shift left word slw rlwinm *) + | SLDI (* !!! *) (* shift left double word sld rldinm *) + | SRWI (* !!! *) (* shift right word srw rlwinm *) + | SRDI (* !!! *) (* shift right double word srd rldinm *) + *) + | SRAWI (* shift right algebric word sraw srawi *) + | SRADI (* shift right algebraic dword srad sradi *) + + (* !!! means that these are pseudo ops! *) + + datatype rotate! = + (* opcd *) + RLWNM (* rotate left word AND mask rlwnm rlwinm *) + | RLDCL + | RLDCR + + datatype rotatei! = + RLWINM (* rotate left word AND mask rlwnm rlwinm *) + | RLWIMI + | RLDICL + | RLDICR + | RLDIC + | RLDIMI + + datatype ccarith! = (* page 47-49 *) + (* xo *) + CRAND 0w257 (* cond. reg. AND *) + | CROR 0w449 (* cond. reg. OR *) + | CRXOR 0w193 (* cond. reg. XOR *) + | CRNAND 0w225 (* cond. reg. NAND *) + | CRNOR 0w33 (* cond. reg. NOR *) + | CREQV 0w289 (* cond. reg. EQV *) + | CRANDC 0w129 (* cond. reg. AND with complement *) + | CRORC 0w417 (* cond. reg. OR with complement *) + + + (* bits in condition code *) + datatype bit! = + LT "lt" | GT "gt" | EQ "eq" | SO "so" (* cr0 *) + | FL "lt" | FG "gt" | FE "eq" | FU "un" (* cr1 *) + (* Lal: as far as I can tell there don't seem to be mnemonics + * for these, however using lt, gt, eq, so should fool + * the assembler into looking at the right bits in the + * cc field. Of course the bf field had better be cr1. + *) + | FX "lt" | FEX "gt" | VX "eq" | OX "so" + + (* bits in integer exception register *) + datatype xerbit = SO64 (* summary overflow 64 *) + | OV64 (* overflow 64 *) + | CA64 (* carry 64 *) + | SO32 (* summary overflow 32 bits *) + | OV32 (* overflow 32 bits *) + | CA32 (* carry 32 bits *) + + type cr_bit = $CC * bit + end (* Instruction *) + + (* + * The following describes the encoding of the instructions. + *) + instruction formats 32 bits + + (* primitives *) + x_form{opcd:6,rt:5,ra:5,rb:5,xo:10,rc:bool 1} + | xl_form{opcd:6,bt:5,ba:5,bb:5,xo:10,lk:bool 1} + | m_form{opcd:6,rs:5,ra:5,rb:5,mb:5,me:5,rc:bool 1} + | a_form{opcd:6,frt:5,fra:5,frb:5,frc:5,xo:5,rc:bool 1} + + (* integer loads *) + | loadx{opcd:6=31,rt:GP 5,ra:GP 5,rb:GP 5,xop:10,rc:1=0} + | loadd{opcd:6,rt:GP 5,ra:GP 5,d:operand signed 16} + | loadde{opcd:6,rt:GP 5,ra:GP 5,de:operand signed 12,xop:4} + + | load{ld,rt,ra,d} = + (case (d,ld) of + (I.RegOp rb,I.LBZ) => loadx{rt,ra,rb,xop=0w87} (* lbzx *) + | (I.RegOp rb,I.LBZE) => loadx{rt,ra,rb,xop=0w95} (* lbzxe *) + | (I.RegOp rb,I.LHZ) => loadx{rt,ra,rb,xop=0w279} (* lhzx *) + | (I.RegOp rb,I.LHZE) => loadx{rt,ra,rb,xop=0w287} (* lhzxe *) + | (I.RegOp rb,I.LHA) => loadx{rt,ra,rb,xop=0w343} (* lhax *) + | (I.RegOp rb,I.LHAE) => loadx{rt,ra,rb,xop=0w351} (* lhaxe *) + | (I.RegOp rb,I.LWZ) => loadx{rt,ra,rb,xop=0w23} (* lwzx *) + | (I.RegOp rb,I.LWZE) => loadx{rt,ra,rb,xop=0w31} (* lwzxe *) + | (I.RegOp rb,I.LDE) => loadx{rt,ra,rb,xop=0w799} (* ldxe *) + + | (d,I.LBZ) => loadd{opcd=0w34,rt,ra,d} + | (de,I.LBZE) => loadde{opcd=0w58,rt,ra,de,xop=0w0} + | (d,I.LHZ) => loadd{opcd=0w40,rt,ra,d} + | (de,I.LHZE) => loadde{opcd=0w58,rt,ra,de,xop=0w2} + | (d,I.LHA) => loadd{opcd=0w42,rt,ra,d} + | (de,I.LHAE) => loadde{opcd=0w58,rt,ra,de,xop=0w4} + | (d,I.LWZ) => loadd{opcd=0w32,rt,ra,d} + | (de,I.LWZE) => loadde{opcd=0w58,rt,ra,de,xop=0w6} + | (de,I.LDE) => loadde{opcd=0w62,rt,ra,de,xop=0w0} + + | (I.RegOp rb,I.LHAU) => loadx{rt,ra,rb,xop=0w375} (* lhaux *) + | (I.RegOp rb,I.LHZU) => loadx{rt,ra,rb,xop=0w311} (* lhzux *) + | (I.RegOp rb,I.LWZU) => loadx{rt,ra,rb,xop=0w55} (* lwzux *) + | (d,I.LHZU) => loadd{opcd=0w41,rt,ra,d} + | (d,I.LWZU) => loadd{opcd=0w33,rt,ra,d} + ) + + (* floating point loads *) + | floadx{opcd:6=31,ft:FP 5,ra:GP 5,rb:GP 5,xop:10,rc:1=0} + | floadd{opcd:6,ft:FP 5,ra:GP 5,d:operand signed 16} + | floadde{opcd:6,ft:FP 5,ra:GP 5,de:operand signed 12,xop:4} + + | fload{ld,ft,ra,d} = + (case (d,ld) of + (I.RegOp rb,I.LFS) => floadx{ft,ra,rb,xop=0w535} + | (I.RegOp rb,I.LFSE) => floadx{ft,ra,rb,xop=0w543} + | (I.RegOp rb,I.LFD) => floadx{ft,ra,rb,xop=0w599} + | (I.RegOp rb,I.LFDE) => floadx{ft,ra,rb,xop=0w607} + | (I.RegOp rb,I.LFDU) => floadx{ft,ra,rb,xop=0w631} + | (d,I.LFS) => floadd{ft,ra,d,opcd=0w48} + | (de,I.LFSE) => floadde{ft,ra,de,opcd=0w62,xop=0w4} + | (d,I.LFD) => floadd{ft,ra,d,opcd=0w50} + | (de,I.LFDE) => floadde{ft,ra,de,opcd=0w62,xop=0w6} + | (d,I.LFDU) => floadd{ft,ra,d,opcd=0w51} + ) + + (* integer stores *) + | storex{opcd:6=31,rs:GP 5,ra:GP 5,rb:GP 5,xop:10,rc:1=0} + | stored{opcd:6,rs:GP 5,ra:GP 5,d:operand signed 16} + | storede{opcd:6,rs:GP 5,ra:GP 5,de:operand signed 12,xop:4} + + | store{st,rs,ra,d} = + (case (d,st) of + (I.RegOp rb,I.STB) => storex{rs,ra,rb,xop=0w215} + | (I.RegOp rb,I.STBE) => storex{rs,ra,rb,xop=0w223} + | (I.RegOp rb,I.STH) => storex{rs,ra,rb,xop=0w407} + | (I.RegOp rb,I.STHE) => storex{rs,ra,rb,xop=0w415} + | (I.RegOp rb,I.STW) => storex{rs,ra,rb,xop=0w151} + | (I.RegOp rb,I.STWE) => storex{rs,ra,rb,xop=0w159} + | (I.RegOp rb,I.STDE) => storex{rs,ra,rb,xop=0w927} + | (d,I.STB) => stored{rs,ra,d,opcd=0w38} + | (de,I.STBE) => storede{rs,ra,de,opcd=0w58,xop=0w8} + | (d,I.STH) => stored{rs,ra,d,opcd=0w44} + | (de,I.STHE) => storede{rs,ra,de,opcd=0w58,xop=0w10} + | (d,I.STW) => stored{rs,ra,d,opcd=0w36} + | (de,I.STWE) => storede{rs,ra,de,opcd=0w58,xop=0w14} + | (de,I.STDE) => storede{rs,ra,de,opcd=0w62,xop=0w8} + ) + + (* floating point stores *) + | fstorex{opcd:6=31,fs:FP 5,ra:GP 5,rb:GP 5,xop:10,rc:1=0} + | fstored{opcd:6,fs:FP 5,ra:GP 5,d:operand signed 16} + | fstorede{opcd:6,fs:FP 5,ra:GP 5,de:operand signed 12,xop:4} + + | fstore{st,fs,ra,d} = + (case (d,st) of + (I.RegOp rb,I.STFS) => fstorex{fs,ra,rb,xop=0w663} + | (I.RegOp rb,I.STFSE) => fstorex{fs,ra,rb,xop=0w671} + | (I.RegOp rb,I.STFD) => fstorex{fs,ra,rb,xop=0w727} + | (I.RegOp rb,I.STFDE) => fstorex{fs,ra,rb,xop=0w759} + | (d,I.STFS) => fstored{fs,ra,d,opcd=0w52} + | (de,I.STFSE) => fstorede{fs,ra,de,opcd=0w62,xop=0w12} + | (d,I.STFD) => fstored{fs,ra,d,opcd=0w54} + | (de,I.STFDE) => fstorede{fs,ra,de,opcd=0w62,xop=0w14} + ) + + (* integer arithmetic *) + | unary'{opcd:6=31,ra:GP 5,rt:GP 5,_:5=0,OE:bool 1,oper:unary 9,Rc:bool 1} + | unary{ra,rt,oper,OE,Rc} = + (case oper of + I.NEG => unary'{ra=rt,rt=ra,oper,OE,Rc} (* swapped! *) + | _ => unary'{ra,rt,oper,OE,Rc} + ) + | arith'{opcd:6=31,rt:GP 5,ra:GP 5,rb:GP 5,OE:bool 1,oper:arith 9,Rc:bool 1} + | arithi'{oper:arithi 6,rt:GP 5,ra:GP 5,im:operand signed 16} + | srawi{opcd:6=31,rs:GP 5,ra:GP 5,sh:operand signed 5,xop:10=824,Rc:1=0} + | sradi'{opcd:6=31,rs:GP 5,ra:GP 5,sh:5,xop:9=0w413,sh2:1,Rc:1=0} + | sradi{rs,ra,sh:operand signed 6} = + sradi'{rs=rs,ra=ra,sh=(sh at [0..4]),sh2=sh at [5]} + + | arith{oper,rt,ra,rb,OE,Rc} = + (case oper of + (I.ADD | I.SUBF | I.MULLW | I.MULLD | I.MULHW | I.MULHWU | + I.DIVW | I.DIVD | I.DIVWU | I.DIVDU) => + arith'{oper,rt,ra,rb,OE,Rc} + (* For some unknown reasons, the encoding of rt and ra + * are swapped! + *) + | _ => arith'{oper,rt=ra,ra=rt,rb,OE,Rc} + ) + + | arithi{oper,rt,ra,im} = + (case oper of + (I.ADDI | I.ADDIS | I.SUBFIC | I.MULLI) => arithi'{oper,rt,ra,im} + | I.SRAWI => srawi{rs=ra,ra=rt,sh=im} + | I.SRADI => sradi{rs=ra,ra=rt,sh=im} + (* For some unknown reasons, the encoding of rt and ra + * are swapped! + *) + | _ => arithi'{oper,rt=ra,ra=rt,im} + ) + + (* integer compare *) + | Cmpl{opcd:6=31,bf:CC 3,_:1=0,l:bool 1,ra:GP 5,rb:GP 5,xo:10=32,_:1=0} + | Cmpli{opcd:6=10,bf:CC 3,_:1=0,l:bool 1,ra:GP 5,ui:operand signed 16} + | Cmp{opcd:6=31,bf:CC 3,_:1=0,l:bool 1,ra:GP 5,rb:GP 5,xo:10=0,_:1=0} + | Cmpi{opcd:6=11,bf:CC 3,_:1=0,l:bool 1,ra:GP 5,si:operand signed 16} + | compare{cmp,bf,l,ra,rb} = + (case (cmp,rb) of + (I.CMP,I.RegOp rb) => Cmp{bf,l,ra,rb} + | (I.CMPL,I.RegOp rb) => Cmpl{bf,l,ra,rb} + | (I.CMP,si) => Cmpi{bf,l,ra,si} + | (I.CMPL,ui) => Cmpli{bf,l,ra,ui} + ) + + (* floating point compare *) + | fcmp{opcd:6=63,bf:CC 3,_:2=0,fa:FP 5,fb:FP 5,cmp:fcmp 10,_:1=0} + + (* floating point unary *) + | funary{oper:funary,ft:FP,fb:FP,Rc} = + let val (opcd,xo) = oper + in + case oper + of (0wx3f, 0wx16) => (* FSQRT *) + a_form{opcd=opcd,frt=ft,fra=0w0,frb=fb,frc=0w0,xo=xo,rc=Rc} + | (0wx3b, 0wx16) => (* FSQRTS *) + a_form{opcd=opcd,frt=ft,fra=0w0,frb=fb,frc=0w0,xo=xo,rc=Rc} + | _ => + x_form{opcd=opcd,rt=ft,ra=0w0,rb=fb,xo=xo,rc=Rc} + end + + (* floating point binary *) + | farith{oper,ft:FP,fa:FP,fb:FP,Rc} = + let val (opcd,xo) = emit_farith oper + in case oper of + (I.FMUL | I.FMULS) => + a_form{opcd=opcd,frt=ft,fra=fa,frb=0w0,frc=fb,xo=xo,rc=Rc} + | _ => a_form{opcd=opcd,frt=ft,fra=fa,frb=fb,frc=0w0,xo=xo,rc=Rc} + end + + (* floating point ternary *) + | farith3{oper:farith3,ft:FP,fa:FP,fc:FP,fb:FP,Rc} = + let val (opcd,xo) = oper + in a_form{opcd=opcd,frt=ft,fra=fa,frb=fb,frc=fc,xo=xo,rc=Rc} + end + + | cr_bit{cc} = + let val (cr,bit) = cc + in (emit_CC cr << 0w2) + + itow( + case bit of + I.LT => 0 | I.GT => 1 | I.EQ => 2 | I.SO => 3 + | I.FL => 0 | I.FG => 1 | I.FE => 2 | I.FU => 3 + | I.FX => 0 | I.FEX => 1 | I.VX => 2 | I.OX => 3 + ) + end + + | ccarith{oper:ccarith,bt,ba,bb} = + xl_form{opcd=0w19,bt=cr_bit{cc=bt},ba=cr_bit{cc=ba},bb=cr_bit{cc=bb}, + xo=oper,lk=false} + + (* trap on word *) + | twr{opcd:6=31,to:int 5,ra:GP 5,rb:GP 5,xop:10=4,_:1=0} + | twi{opcd:6=3,to:int 5,ra:GP 5,si:operand signed 16} + | tw{to,ra,si} = + (case si of I.RegOp rb => twr{to,ra,rb} | _ => twi{to,ra,si}) + + (* trap on double word *) + | tdr{opcd:6=31,to:int 5,ra:GP 5,rb:GP 5,xop:10=68,_:1=0} + | tdi{opcd:6=2,to:int 5,ra:GP 5,si:operand signed 16} + | td{to,ra,si} = + (case si of I.RegOp rb => tdr{to,ra,rb} | _ => tdi{to,ra,si}) + + (* move condition field p49 *) + | mcrf{opcd:6=19,bf:CC 3,_:2=0,bfa:CC 3,_:18=0} + + (* move from/to special purpose register p131/132 + * the encoding of spr = spr[0..4] || spr[5..9] + *) + | mtspr'{opcd:6=31,rs:GP 5,spr:10,xop:10=467,_:1=0} + | mtspr{rs,spr:SPR} = + mtspr'{rs,spr=((spr at [0..4]) << 0w5) + (spr at [5..9])} + | mfspr'{opcd:6=31,rt:GP 5,spr:10,xop:10=339,_:1=0} + | mfspr{rt,spr:SPR} = + mfspr'{rt,spr=((spr at [0..4]) << 0w5) + (spr at [5..9])} + + (* Branch p41 *) + | b{opcd:6=18,li:signed 24,aa:bool 1,lk:bool 1} + | be{opcd:6=22,li:signed 24,aa:bool 1,lk:bool 1} + + (* Branch conditional p42 *) + | bc{opcd:6=16,bo:bo 5,bi:5,bd:signed 14,aa:bool 1,lk:bool 1} + | bce{opcd:6=16,bo:bo 5,bi:5,bd:signed 14,aa:bool 1,lk:bool 1} + + (* Branch conditional to link register *) + | bclr{opcd:6=19,bo:bo 5,bi:5,_:5=0,xop:10=16,lk:bool 1} + | bclre{opcd:6=19,bo:bo 5,bi:5,_:5=0,xop:10=17,lk:bool 1} + + (* Branch conditional to count register *) + | bcctr{opcd:6=19,bo:bo 5,bi:5,_:5=0,xop:10=528,lk:bool 1} + | bcctre{opcd:6=19,bo:bo 5,bi:5,_:5=0,xop:10=529,lk:bool 1} + + (* Rotate *) + | rlwnm{oper:6=23,rs:GP 5,ra:GP 5,sh:GP 5,mb:int 5,me:int 5,Rc:1=0} + | rlwinm{oper:6=21,rs:GP 5,ra:GP 5,sh:5,mb:int 5,me:int 5,Rc:1=0} + | rldcl{oper:6=30,rs:GP 5,ra:GP 5,sh:GP 5,mb:int 5,_:5=8,Rc:1=0} + | rldicl{oper:6=30,rs:GP 5,ra:GP 5,sh:5,mb:int 5,_:4=0,sh2:1,Rc:1=0} + | rldcr{oper:6=30,rs:GP 5,ra:GP 5,sh:GP 5,mb:int 5,_:5=9,Rc:1=0} + | rldicr{oper:6=30,rs:GP 5,ra:GP 5,sh:5,mb:int 5,_:4=1,sh2:1,Rc:1=0} + | rldic{oper:6=30,rs:GP 5,ra:GP 5,sh:5,mb:int 5,_:4=2,sh2:1,Rc:1=0} + | rlwimi{oper:6=20,rs:GP 5,ra:GP 5,sh:5,mb:int 5,me:int 5,Rc:1=0} + | rldimi{oper:6=30,rs:GP 5,ra:GP 5,sh:5,mb:int 5,_:4=3,sh2:1,Rc:1=0} + + | rotate{oper,ra,rs,sh,mb,me} = + (case (oper,me) of + (I.RLWNM,SOME me) => rlwnm{ra,rs,sh,mb,me} + | (I.RLDCL,_) => rldcl{ra,rs,sh,mb} + | (I.RLDCR,_) => rldcr{ra,rs,sh,mb} + | _ => error "rotate" + ) + | rotatei{oper,ra,rs,sh:operand,mb,me} = + (case (oper,me) of + (I.RLWINM,SOME me) => rlwinm{ra,rs,sh,mb,me} + | (I.RLWIMI,SOME me) => rlwimi{ra,rs,sh=sh,mb,me} + | (I.RLDICL,_) => rldicl{ra,rs,sh=sh at [0..4],sh2=sh at [5],mb} + | (I.RLDICR,_) => rldicr{ra,rs,sh=sh at [0..4],sh2=sh at [5],mb} + | (I.RLDIC,_) => rldic{ra,rs,sh=sh at [0..4],sh2=sh at [5],mb} + | (I.RLDIMI,_) => rldimi{ra,rs,sh=sh at [0..4],sh2=sh at [5],mb} + | _ => error "rotatei" + ) + + (* Taken from a googled link: + http://publibn.boulder.ibm.com/doc_link/en_US/a_doc_lib/aixassem/alangref/lwarx.htm + *) + | lwarx{oper:6=0w31, rt:GP 5, ra:GP 5, rb:GP 5, _:10=0w20, _:1=0} + | stwcx{oper:6=0w31, rs:GP 5, ra:GP 5, rb:GP 5, _:10=0w150, _:1=1} + + (* + * Some helper functions for generating machine code. + * These are copied from Lal's code. + *) + structure MC = + struct + fun relative(I.LabelOp lexp) = itow(MLTreeEval.valueOf lexp - !loc) ~>> 0w2 + | relative _ = error "relative" + end + + (* + * Reservation tables and pipeline definitions for scheduling + *) + + (* Function units *) + resource issue and mem and alu and falu and fmul and fdiv and branch + + (* Different implementations of cpus *) + cpu default 2 [2 issue, 2 mem, 1 alu] (* 2 issue machine *) + + (* Definitions of various reservation tables *) + pipeline NOP _ = [issue] + and ARITH _ = [issue^^alu] + and LOAD _ = [issue^^mem] + and STORE _ = [issue^^mem,mem,mem] + and FARITH _ = [issue^^falu] + and FMUL _ = [issue^^fmul,fmul] + and FDIV _ = [issue^^fdiv,fdiv*50] + and BRANCH _ = [issue^^branch] + + (* + * Some helper functions for generating assembly code. + *) + structure Assembly = + struct + (* Add the x suffix if necessary; this is a stupid hack *) + fun emitx(s,I.RegOp _) = + if String.sub(s,size s-1) = #"e" then + (emit(String.substring(s,0,size s-1)); emit "xe") + else (emit(s); emit "x") + | emitx(s,_) = emit s + + fun eOERc{OE=false,Rc=false} = () + | eOERc{OE=false,Rc=true} = emit "." + | eOERc{OE=true,Rc=false} = emit "o" + | eOERc{OE=true,Rc=true} = emit "o." + fun eRc false = "" | eRc true = "." + fun cr_bit(cr,bit) = + 4 * (CellsBasis.physicalRegisterNum cr) + + (case bit of + I.LT => 0 | I.GT => 1 | I.EQ => 2 | I.SO => 3 + | I.FL => 0 | I.FG => 1 | I.FE => 2 | I.FU => 3 + | I.FX => 0 | I.FEX => 1 | I.VX => 2 | I.OX => 3 + ) + fun eCRbit x = emit(Int.toString(cr_bit x)) + fun eLK true = emit "l" | eLK false = () + fun eI (I.RegOp _) = () | eI _ = emit "i" + fun eBI(bo, bf, bit) = + case (bo, CellsBasis.physicalRegisterNum bf) of + (I.ALWAYS, _) => () + | (I.COUNTER{cond=NONE, ...}, _) => () + | (_,0) => emit(asm_bit bit) + | (_,n) => emit("4*cr" ^ Int.toString n ^ "+" ^ asm_bit bit) + fun emit_bo bo = + emit(case bo + of I.TRUE => "t" + | I.FALSE => "f" + | I.ALWAYS => "" + | I.COUNTER{eqZero, cond=NONE} => if eqZero then "dz" else "dnz" + | I.COUNTER{eqZero, cond=SOME cc} => + (if eqZero then "dz" else "dnz") ^ + (if cc then "t" else "f") + (*esac*)) + + fun eME(SOME me) = (emit ", "; emit_int me) + | eME(NONE) = () + + fun addr(ra,I.RegOp rb) = (emitCell ra; emit ", "; emitCell rb) + | addr(ra,d) = (emit_operand d; emit "("; emitCell ra; emit ")") + + end (* Assembly *) + + instruction + L of {ld:load, rt: $GP, ra: $GP, d:operand, mem:Region.region} + ``\t, '' + load{ld,rt,ra,d} + + | LF of {ld:fload, ft: $FP, ra: $GP, d:operand, mem:Region.region} + ``\t, '' + fload{ld,ft,ra,d} + + | ST of {st:store, rs: $GP, ra: $GP, d:operand, mem:Region.region} + ``\t, '' + store{st,rs,ra,d} + + | STF of {st:fstore, fs: $FP, ra: $GP, d:operand, mem:Region.region} + ``\t, '' + fstore{st,fs,ra,d} + + | UNARY of {oper:unary, rt: $GP, ra: $GP, Rc:bool, OE:bool} + ``\t, '' + unary{oper,rt,ra,OE,Rc} + + | ARITH of {oper:arith, rt: $GP, ra: $GP, rb: $GP, Rc:bool, OE:bool} + ``\t, , '' + arith{oper,rt,ra,rb,OE,Rc} + + | ARITHI of {oper:arithi, rt: $GP, ra: $GP, im:operand} + ``\t, , '' + arithi{oper,rt,ra,im} + + | ROTATE of {oper:rotate, ra: $GP, rs: $GP, sh: $GP, mb:int, me:int option} + ``\t, , , '' + rotate{oper,ra,rs,sh,mb,me} + + | ROTATEI of {oper:rotatei, ra: $GP, rs: $GP, sh:operand, mb:int, me:int option} + ``\t, , , '' + rotatei{oper,ra,rs,sh,mb,me} + + | COMPARE of {cmp:cmp, l:bool, bf: $CC, ra: $GP, rb:operand} + ``\t, , , '' + compare{cmp,bf,l,ra,rb} + + | FCOMPARE of {cmp:fcmp, bf: $CC, fa: $FP, fb: $FP} + ``\t, , '' + fcmp{cmp,bf,fa,fb} + + | FUNARY of {oper:funary, ft: $FP, fb: $FP, Rc:bool} + ``\t, '' + funary{oper,ft,fb,Rc} + + | FARITH of {oper:farith, ft: $FP, fa: $FP, fb: $FP, Rc:bool} + ``\t, , '' + farith{oper,ft,fa,fb,Rc} + + | FARITH3 of {oper:farith3, ft: $FP, fa: $FP, fb: $FP, fc: $FP, Rc:bool} + ``\t, , , '' + farith3{oper,ft,fa,fb,fc,Rc} + + | CCARITH of {oper:ccarith, bt:cr_bit, ba:cr_bit, bb:cr_bit} + ``\t, , '' + ccarith{oper,bt,ba,bb} + + | MCRF of {bf: $CC, bfa: $CC} (* move condition register field p49 *) + ``mcrf\t, '' + mcrf{bf,bfa} + + (* move to special register p131 *) + | MTSPR of {rs: $GP, spr: $SPR} + ``mt\t'' + mtspr{rs,spr} + + (* move from special register p132 *) + | MFSPR of {rt: $GP, spr: $SPR} + ``mf\t'' + mfspr{rt,spr} + + (* Load word and reserve indexed *) + | LWARX of {rt: $GP, ra: $GP, rb: $GP} + ``lwarx\t, , '' + lwarx{rt, ra, rb} + + (* Store word conditional indexed *) + | STWCX of {rs: $GP, ra: $GP, rb: $GP} + ``stwcx.\t, , '' + stwcx{rs, ra, rb} + + (* Trapping word *) + | TW of {to:int, ra: $GP, si:operand} + ``tw\t, , '' + tw{to,ra,si} + + (* Trapping double word *) + | TD of {to:int, ra: $GP, si:operand} + ``td\t, , '' + td{to,ra,si} + + (* Control Instructions - AA is always assumed to be 0 *) + | BC of {bo:bo, bf: $CC, bit:bit, addr:operand, LK:bool, fall:operand} + ``b\t, '' + bc{bo,bi=cr_bit{cc=(bf,bit)},bd=relative addr,aa=false,lk=LK} + + | BCLR of {bo:bo, bf: $CC, bit:bit, LK:bool, labels:Label.label list} + ``blr\t'' + bclr{bo,bi=cr_bit{cc=(bf,bit)},lk=LK} + + | B of {addr:operand, LK:bool} + ``b\t'' + b{li=relative addr,aa=false,lk=LK} + + (* CALL = BCLR {bo=ALWAYS, bf=0, bit=0, LK=true, labels=[] *) + | CALL of {def:C.cellset, use:C.cellset, + cutsTo: Label.label list, mem: Region.region} + ``blrl'' + bclr{bo=I.ALWAYS,bi=0w0,lk=true} + + | SOURCE of {} + asm: ``source'' + mc: () + + | SINK of {} + asm: ``sink'' + mc: () + + | PHI of {} + asm: ``phi'' + mc: () + + structure SSA = + struct + + fun operand(ty, I.RegOp r) = T.REG(32, r) + | operand(ty, I.ImmedOp i) = T.LI i + (*| operand(ty, I.LabelOp le) = T.LABEL le*) + + end + + end diff --git a/MLRISC/ppc/ra/ppcRegAlloc.sml b/MLRISC/ppc/ra/ppcRegAlloc.sml new file mode 100644 index 0000000..781ee3b --- /dev/null +++ b/MLRISC/ppc/ra/ppcRegAlloc.sml @@ -0,0 +1,70 @@ +(* ppcRegAlloc.sml + * + * COPYRIGHT (c) 2018 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +functor PPCRegAlloc(structure I : INSTRUCTIONS where C = PPCCells + structure P : INSN_PROPERTIES where I = I + structure F : FLOWGRAPH where I = I and P = P + structure Asm : INSTRUCTION_EMITTER where I=I and P=P) : + sig + functor IntRa (structure RaUser : RA_USER_PARAMS + where I = I + where type B.name = F.B.name) : RA + functor FloatRa (structure RaUser : RA_USER_PARAMS + where I = I + where type B.name = F.B.name) : RA + end= +struct + + structure C=I.C + + (* liveness analysis for general purpose registers *) + structure RegLiveness = + Liveness(structure Flowgraph=F + structure Instruction=I + val defUse = P.defUse C.GP + val regSet = C.getCell C.GP + val cellset = C.updateCell C.GP) + + + functor IntRa = + RegAllocator + (structure RaArch = struct + structure InsnProps = P + structure AsmEmitter = Asm + structure I = I + structure Liveness=RegLiveness + + val defUse = P.defUse C.GP + val firstPseudoR = 32 + val maxPseudoR = C.maxCell + val numRegs = C.numCell C.GP + val regSet = C.getCell C.GP + end) + + (* liveness analysis for floating point registers *) + structure FregLiveness = + Liveness(structure Flowgraph=F + structure Instruction=I + val defUse = P.defUse C.FP + val regSet = C.getCell C.FP + val cellset = C.updateCell C.FP) + + functor FloatRa = + RegAllocator + (structure RaArch = struct + structure InsnProps = P + structure AsmEmitter = Asm + structure I = I + structure Liveness=FregLiveness + + val defUse = P.defUse C.FP + val firstPseudoR = 64 + val maxPseudoR = C.maxCell + val numRegs = C.numCell C.FP + val regSet = C.getCell C.FP + end) +end + diff --git a/MLRISC/ppc/ra/ppcRewrite.sml b/MLRISC/ppc/ra/ppcRewrite.sml new file mode 100644 index 0000000..c25e96f --- /dev/null +++ b/MLRISC/ppc/ra/ppcRewrite.sml @@ -0,0 +1,176 @@ +functor PPCRewrite(Instr : PPCINSTR) = struct + structure I = Instr + structure C = I.C + structure CB = CellsBasis + structure CS = CB.CellSet + + fun error msg = MLRiscErrorMsg.error ("PPCRewrite", msg) + fun ea(NONE, _, _) = NONE + | ea(e as SOME(I.Direct r), rs, rt) = + if CB.sameColor(r,rs) then SOME(I.Direct rt) else e + | ea(e as SOME(I.FDirect r), rs, rt) = + if CB.sameColor(r,rs) then SOME(I.FDirect rt) else e + | ea(e as SOME(I.Displace{base, disp, mem}), rs, rt) = + if CB.sameColor(base,rs) then + SOME(I.Displace{base=rt, disp=disp, mem=mem}) + else e + + fun rewriteUse(instr, rs, rt) = let + fun rplac r = if CB.sameColor(r,rs) then rt else r + fun rwOperand(opnd as I.RegOp r) = + if CB.sameColor(r,rs) then I.RegOp rt else opnd + | rwOperand opnd = opnd + fun ea(SOME(I.Displace{base, disp, mem})) = + SOME(I.Displace{base=rplac base, disp=disp, mem=mem}) + | ea x = x + + fun ppcUse(instr) = + (case instr + of I.L {ld, rt, ra, d, mem} => + I.L{ld=ld, rt=rt, ra=rplac ra, d=rwOperand d, mem=mem} + | I.LF {ld, ft, ra, d, mem} => + I.LF{ld=ld, ft=ft, ra=rplac ra, d=rwOperand d, mem=mem} + | I.ST {st, rs, ra, d, mem} => + I.ST{st=st, rs=rplac rs, ra=rplac ra, d=rwOperand d, mem=mem} + | I.STF {st, fs, ra, d, mem} => + I.STF{st=st, fs=fs, ra=rplac ra, d=rwOperand d, mem=mem} + | I.UNARY {oper, rt, ra, Rc, OE} => + I.UNARY{oper=oper, rt=rt, ra=rplac ra, Rc=Rc, OE=OE} + | I.ARITH{oper, rt, ra, rb, Rc, OE} => + I.ARITH{oper=oper, rt=rt, ra=rplac ra, rb=rplac rb, Rc=Rc, OE=OE} + | I.ARITHI{oper, rt, ra, im} => + I.ARITHI{oper=oper, rt=rt, ra=rplac ra, im=rwOperand im} + | I.ROTATE {oper, ra, rs, sh, mb, me} => + I.ROTATE{oper=oper, ra=ra, rs=rplac rs, sh=rplac sh, mb=mb, me=me} + | I.ROTATEI {oper, ra, rs, sh, mb, me} => + I.ROTATEI{oper=oper, ra=ra, rs=rplac rs, sh=rwOperand sh, mb=mb, me=me} + | I.COMPARE {cmp, bf, l, ra, rb} => + I.COMPARE{cmp=cmp, bf=bf, l=l, ra=rplac ra, rb=rwOperand rb} + | I.MTSPR{rs, spr} => I.MTSPR{rs=rplac rs, spr=spr} + | I.TW {to, ra, si} => I.TW{to=to, ra=rplac ra, si=rwOperand si} + | I.TD {to, ra, si} => I.TD{to=to, ra=rplac ra, si=rwOperand si} + | I.CALL {def, use, cutsTo, mem} => + I.CALL{def=def, use=CS.map {from=rs,to=rt} use, + cutsTo=cutsTo, mem=mem} + | I.LWARX {rt, ra, rb} => + I.LWARX{rt=rt, ra=rplac ra, rb=rplac rb} + | I.STWCX {rs, ra, rb} => + I.STWCX{rs=rplac rs, ra=rplac ra, rb=rplac rb} + | _ => instr + (*esac*)) + in + case instr + of (I.ANNOTATION{i, ...}) => rewriteUse(i, rs, rt) + | I.INSTR(i) => I.INSTR(ppcUse(i)) + | I.COPY{k, sz, dst, src, tmp} => + I.COPY{k=k, sz=sz, dst=dst, tmp= ea tmp, + src=case k of CB.GP => map rplac src | _ => src} + | I.LIVE{regs, spilled} => + I.LIVE{regs=C.addReg(rt, C.rmvReg(rs, regs)), spilled=spilled} + | _ => error "rewriteUse" + end + + + fun rewriteDef(instr, rs, rt) = let + fun rplac r = if CB.sameColor(r,rs) then rt else r + fun ea (SOME(I.Direct r)) = SOME(I.Direct(rplac r)) + | ea x = x + fun ppcDef(instr) = + (case instr + of I.L {ld, rt, ra, d, mem} => + I.L{ld=ld, rt=rplac rt, ra=ra, d=d, mem=mem} + | I.UNARY {oper, rt, ra, Rc, OE} => + I.UNARY{oper=oper, rt=rplac rt, ra=ra, Rc=Rc, OE=OE} + | I.ARITH {oper, rt, ra, rb, Rc, OE} => + I.ARITH{oper=oper, rt=rplac rt, ra=ra, rb=rb, Rc=Rc, OE=OE} + | I.ARITHI {oper, rt, ra, im} => + I.ARITHI {oper=oper, rt=rplac rt, ra=ra, im=im} + | I.ROTATE {oper, ra, rs, sh, mb, me} => + I.ROTATE {oper=oper, ra=rplac ra, rs=rs, sh=sh, mb=mb, me=me} + | I.ROTATEI {oper, ra, rs, sh, mb, me} => + I.ROTATEI {oper=oper, ra=rplac ra, rs=rs, sh=sh, mb=mb, me=me} + | I.MFSPR {rt, spr} => I.MFSPR{rt=rplac rt, spr=spr} + | I.CALL {def, use, cutsTo, mem} => + I.CALL{def=CS.map {from=rs,to=rt} def, use=use, + cutsTo=cutsTo, mem=mem} + | I.LWARX {rt, ra, rb} => + I.LWARX{rt=rplac rt, ra=ra, rb=rb} + | _ => instr + (*esac*)) + in + case instr + of (I.ANNOTATION{i, ...}) => rewriteDef(i, rs, rt) + | I.INSTR(i) => I.INSTR(ppcDef(i)) + | I.KILL{regs, spilled} => + I.KILL{regs=C.addReg(rt, C.rmvReg(rs, regs)), spilled=spilled} + | I.COPY {k, sz, dst, src, tmp} => + I.COPY{k=k, sz=sz, src=src, tmp=ea tmp, + dst=case k of CB.GP => map rplac dst | _ => dst} + | _ => error "rewriteDef" + end + + + fun frewriteUse(instr, fs, ft) = let + fun rplac r = if CB.sameColor(r,fs) then ft else r + fun ppcUse(instr) = + (case instr + of I.STF {st, fs, ra, d, mem} => + I.STF{st=st, fs=rplac fs, ra=ra, d=d, mem=mem} + | I.CALL{def, use, cutsTo, mem} => + I.CALL{def=def, use=CS.map {from=fs,to=ft} use, + cutsTo=cutsTo, mem=mem} + | I.FCOMPARE {cmp, bf, fa, fb} => + I.FCOMPARE{cmp=cmp, bf=bf, fa=rplac fa, fb=rplac fb} + | I.FUNARY {oper, ft, fb, Rc} => + I.FUNARY{oper=oper, ft=ft, fb=rplac fb, Rc=Rc} + | I.FARITH {oper, ft, fa, fb, Rc} => + I.FARITH{oper=oper, ft=ft, fa=rplac fa, fb=rplac fb, Rc=Rc} + | I.FARITH3 {oper, ft, fa, fb, fc, Rc} => + I.FARITH3{oper=oper,ft=ft,fa=rplac fa, fb=rplac fb, fc=rplac fc,Rc=Rc} + | _ => instr + (*esac*)) + in + case instr + of (I.ANNOTATION{i, ...}) => frewriteUse(i, fs, ft) + | I.INSTR(i) => I.INSTR(ppcUse(i)) + | I.LIVE{regs, spilled} => + I.LIVE{regs=C.addFreg(ft, C.rmvFreg(fs, regs)), spilled=spilled} + | I.COPY {k as CB.FP, sz, dst, src, tmp} => + I.COPY{k=k, sz=sz, dst=dst, src=map rplac src, tmp=tmp} + + | _ => error "frewriteUse" + + end + + fun frewriteDef(instr, fs, ft) = let + fun rplac r = if CB.sameColor(r,fs) then ft else r + fun rplacEA (SOME(I.FDirect f)) = SOME(I.FDirect(rplac f)) + | rplacEA ea = ea + fun ppcDef(instr) = + (case instr + of I.LF{ld, ft, ra, d, mem} => + I.LF{ld=ld, ft=rplac ft, ra=ra, d=d, mem=mem} + | I.FUNARY {oper, ft, fb, Rc} => + I.FUNARY{oper=oper, ft=rplac ft, fb=fb, Rc=Rc} + | I.FARITH{oper, ft, fa, fb, Rc} => + I.FARITH{oper=oper, ft=rplac ft, fa=fa, fb=fb, Rc=Rc} + | I.FARITH3{oper, ft, fa, fb, fc, Rc} => + I.FARITH3{oper=oper, ft=rplac ft, fa=fa, fb=fb, fc=fc, Rc=Rc} + (* CALL = BCLR {bo=ALWAYS, bf=0, bit=0, LK=true, labels=[] *) + | I.CALL{def, use, cutsTo, mem} => + I.CALL{def=CS.map {from=fs,to=ft} def, use=use, + cutsTo=cutsTo, mem=mem} + | _ => instr + (*esac*)) + in + case instr + of (I.ANNOTATION{i, ...}) => frewriteDef(i, fs, ft) + | I.INSTR(i) => I.INSTR(ppcDef(i)) + | I.KILL{regs, spilled} => + I.KILL{regs=C.addFreg(ft, C.rmvFreg(fs, regs)), spilled=spilled} + | I.COPY {k as CB.FP, sz, dst, src, tmp} => + I.COPY{k=k, sz=sz, dst=map rplac dst, src=src, tmp=rplacEA tmp} + | _ => error "frewriteDef" + end +end + diff --git a/MLRISC/ppc/ra/ppcSpillInstr.sml b/MLRISC/ppc/ra/ppcSpillInstr.sml new file mode 100644 index 0000000..8a2bcab --- /dev/null +++ b/MLRISC/ppc/ra/ppcSpillInstr.sml @@ -0,0 +1,81 @@ +(* ppcSpillInstr.sml + * + * COPYRIGHT (c) 2002 Bell Labs, Lucent Technologies + * + * PPC instructions to emit when spilling an instruction. + *) + +functor PPCSpillInstr(Instr : PPCINSTR) : ARCH_SPILL_INSTR = struct + structure I = Instr + structure C = I.C + structure Rewrite = PPCRewrite(I) + structure CB = CellsBasis + + + fun error msg = MLRiscErrorMsg.error ("PPCSpillInstr", msg) + + fun storeToEA CB.GP (reg, I.Displace{base, disp, mem}) = + I.st{st=I.STW, rs=reg, ra=base, d=I.LabelOp disp, mem=mem} + | storeToEA CB.FP (freg, I.Displace{base, disp, mem}) = + I.stf{st=I.STFD, ra=base, d=I.LabelOp disp, fs=freg, mem=mem} + | storeToEA _ _ = error "storeToEA" + + fun loadFromEA CB.GP (reg, I.Displace{base, disp, mem}) = + I.l{ld=I.LWZ, ra=base, d=I.LabelOp disp, rt=reg, mem=mem} + | loadFromEA CB.FP (freg, I.Displace{base, disp, mem}) = + I.lf{ld=I.LFD, ra=base, d=I.LabelOp disp, ft=freg, mem=mem} + | loadFromEA _ _ = error "loadFromEA" + + fun spillToEA ck reg_ea = + {code=[storeToEA ck reg_ea], proh=[], newReg=NONE} + + fun reloadFromEA ck reg_ea = + {code=[loadFromEA ck reg_ea], proh=[], newReg=NONE} + + (* spill a register to spillLoc *) + fun spillR (instr, reg, ea) = let + val newR = C.newReg() + val instr' = Rewrite.rewriteDef(instr, reg, newR) + in + {code=[instr', storeToEA CB.GP (newR, ea)], + proh=[newR], + newReg=SOME newR} + end + + fun spillF (instr, reg, ea) = let + val newR = C.newFreg() + val instr' = Rewrite.frewriteDef(instr, reg, newR) + in + {code=[instr', storeToEA CB.FP (newR, ea)], + proh=[newR], + newReg=SOME newR} + end + + (* reload a register from spillLoc *) + fun reloadR(instr, reg, ea) = let + val newR = C.newReg() + val instr' = Rewrite.rewriteUse(instr, reg, newR) + in + {code=[loadFromEA CB.GP (newR, ea), instr'], + proh=[newR], + newReg=SOME newR} + end + + fun reloadF(instr, reg, ea) = let + val newR = C.newFreg() + val instr' = Rewrite.frewriteUse(instr, reg, newR) + in + {code=[loadFromEA CB.FP (newR, ea), instr'], + proh=[newR], + newReg=SOME newR} + end + + fun spill CellsBasis.GP = spillR + | spill CellsBasis.FP = spillF + | spill _ = error "spill" + + fun reload CellsBasis.GP = reloadR + | reload CellsBasis.FP = reloadF + | reload _ = error "reload" +end + diff --git a/MLRISC/ra/.cm/GUID/chaitin-spillheur2.sml b/MLRISC/ra/.cm/GUID/chaitin-spillheur2.sml new file mode 100644 index 0000000..8f99653 --- /dev/null +++ b/MLRISC/ra/.cm/GUID/chaitin-spillheur2.sml @@ -0,0 +1 @@ +guid-$OTHER-MLRISC/(RA.cm):../ra/chaitin-spillheur2.sml-1714016097.883 diff --git a/MLRISC/ra/.cm/GUID/chow-hennessy-spillheur2.sml b/MLRISC/ra/.cm/GUID/chow-hennessy-spillheur2.sml new file mode 100644 index 0000000..5ea5d58 --- /dev/null +++ b/MLRISC/ra/.cm/GUID/chow-hennessy-spillheur2.sml @@ -0,0 +1 @@ +guid-$OTHER-MLRISC/(RA.cm):../ra/chow-hennessy-spillheur2.sml-1714016097.846 diff --git a/MLRISC/ra/.cm/GUID/ra-spill-with-renaming.sml b/MLRISC/ra/.cm/GUID/ra-spill-with-renaming.sml new file mode 100644 index 0000000..2398cad --- /dev/null +++ b/MLRISC/ra/.cm/GUID/ra-spill-with-renaming.sml @@ -0,0 +1 @@ +guid-$OTHER-MLRISC/(RA.cm):../ra/ra-spill-with-renaming.sml-1714016097.913 diff --git a/MLRISC/ra/.cm/SKEL/chaitin-spillheur2.sml b/MLRISC/ra/.cm/SKEL/chaitin-spillheur2.sml new file mode 100644 index 0000000..db57331 --- /dev/null +++ b/MLRISC/ra/.cm/SKEL/chaitin-spillheur2.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ae"ImprovedChaitinSpillHeur"jh3aG"gp1d"RAGraph"egp1 f2d"RACore"d"Real"gp1c"RA_SPILL_HEURISTICS" \ No newline at end of file diff --git a/MLRISC/ra/.cm/SKEL/chow-hennessy-spillheur2.sml b/MLRISC/ra/.cm/SKEL/chow-hennessy-spillheur2.sml new file mode 100644 index 0000000..6623a5a --- /dev/null +++ b/MLRISC/ra/.cm/SKEL/chow-hennessy-spillheur2.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ae"ImprovedChowHennessySpillHeur"jh4aG"gp1d"RAGraph"aHeap"gp1d"PriorityHeap"egp1%f6%C7d"Option"d"RACore"d"IntHashTable"d"Real"Ngp1c"RA_SPILL_HEURISTICS" \ No newline at end of file diff --git a/MLRISC/ra/.cm/SKEL/ra-spill-with-renaming.sml b/MLRISC/ra/.cm/SKEL/ra-spill-with-renaming.sml new file mode 100644 index 0000000..722c6c4 --- /dev/null +++ b/MLRISC/ra/.cm/SKEL/ra-spill-with-renaming.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ae"RASpillWithRenaming"i3aInsnProps"gp1c"INSN_PROPERTIES"aAsm"gp1c"INSTRUCTION_EMITTER"f2Word"d"MLRiscErrorMsg"jh9aI"gp29aP"gp1aC"gp29aCBase"gp1d"CellsBasis"CaCore"gp1d"RACore"aG"gp28 aT"jgp19gp1e"RASpillTypes"egp1f9 9C8d"Int"d"IntHashTable"<NNgp1c"RA_SPILL" \ No newline at end of file diff --git a/MLRISC/ra/.cm/amd64-unix/chaitin-spillheur2.sml b/MLRISC/ra/.cm/amd64-unix/chaitin-spillheur2.sml new file mode 100644 index 0000000..983064d Binary files /dev/null and b/MLRISC/ra/.cm/amd64-unix/chaitin-spillheur2.sml differ diff --git a/MLRISC/ra/.cm/amd64-unix/chow-hennessy-spillheur2.sml b/MLRISC/ra/.cm/amd64-unix/chow-hennessy-spillheur2.sml new file mode 100644 index 0000000..b69a3d3 Binary files /dev/null and b/MLRISC/ra/.cm/amd64-unix/chow-hennessy-spillheur2.sml differ diff --git a/MLRISC/ra/.cm/amd64-unix/ra-spill-with-renaming.sml b/MLRISC/ra/.cm/amd64-unix/ra-spill-with-renaming.sml new file mode 100644 index 0000000..fbb046a Binary files /dev/null and b/MLRISC/ra/.cm/amd64-unix/ra-spill-with-renaming.sml differ diff --git a/MLRISC/ra/arch-spill-instr.sig b/MLRISC/ra/arch-spill-instr.sig new file mode 100644 index 0000000..b91475f --- /dev/null +++ b/MLRISC/ra/arch-spill-instr.sig @@ -0,0 +1,33 @@ +(* arch-spill-instr.sig + * + * COPYRIGHT (c) 2002 Bell Labs, Lucent Technologies + * + * Architecture specific instructions to emit when spilling an instruction. + *) + +(* TODO: Some day, all these interface functions will be sensitive to + * the size being spilled or reloaded --- but today is not the day! + *) +signature ARCH_SPILL_INSTR = sig + structure I : INSTRUCTIONS + structure CB : CELLS_BASIS = CellsBasis + + val spillToEA : + CB.cellkind -> + CB.cell * I.ea -> + {code:I.instruction list, proh:CB.cell list, newReg:CB.cell option} + + val reloadFromEA : + CB.cellkind -> + CB.cell * I.ea -> + {code:I.instruction list, proh:CB.cell list, newReg:CB.cell option} + + val spill : + CB.cellkind -> + I.instruction * CB.cell * I.ea -> + {code:I.instruction list, proh:CB.cell list, newReg:CB.cell option} + val reload : + CB.cellkind -> + I.instruction * CB.cell * I.ea -> + {code:I.instruction list, proh:CB.cell list, newReg:CB.cell option} +end \ No newline at end of file diff --git a/MLRISC/ra/chaitin-spillheur.sml b/MLRISC/ra/chaitin-spillheur.sml new file mode 100644 index 0000000..624a187 --- /dev/null +++ b/MLRISC/ra/chaitin-spillheur.sml @@ -0,0 +1,79 @@ +(* + * This module implements the Chaitin heuristic (but weighted by + * priorities) + *) +structure ChaitinSpillHeur : RA_SPILL_HEURISTICS = +struct + + structure G = RAGraph + + open G + + exception NoCandidate + + val mode = RACore.NO_OPTIMIZATION + + fun init() = () + + (* + * Potential spill phase. + * Find a cheap node to spill according to Chaitin's heuristic. + *) + fun chooseSpillNode{graph, hasBeenSpilled, spillWkl} = + let fun chase(NODE{color=ref(ALIASED n),...}) = chase n + | chase n = n + val infiniteCost = 123456789.0 + val don'tUse = 223456789.0 + + (* The spill worklist is maintained only lazily. So we have + * to prune away those nodes that are already removed from the + * interference graph. After pruning the spillWkl, + * it may be the case that there aren't anything to be + * spilled after all. + *) + + (* + * Choose node with the lowest cost and have the maximal degree + *) + fun chaitin([], best, lowestCost, spillWkl) = + (best, lowestCost, spillWkl) + | chaitin(node::rest, best, lowestCost, spillWkl) = + (case chase node of + node as NODE{number, pri, defs, uses, + degree=ref deg, color=ref PSEUDO,...} => + let fun cost() = !pri / real deg + val cost = + case (!defs, !uses) of + (_,[]) => (* defs but no use *) + ~1.0 - real deg + | ([d],[u]) => (* defs after use; don't use *) + let fun plus({block,insn},n) = {block=block,insn=insn+n} + in if d = plus(u,1) orelse d = plus(u,2) + then don'tUse else cost() + end + | _ => cost() + in if cost < lowestCost andalso not(hasBeenSpilled number) + then + (case best of + NONE => chaitin(rest, SOME node, cost, spillWkl) + | SOME best => + chaitin(rest, SOME node, cost, best::spillWkl) + ) + else + chaitin(rest, best, lowestCost, node::spillWkl) + end + | _ => (* discard node *) + chaitin(rest, best, lowestCost, spillWkl) + ) + + (* val _ = print("["^Int.toString(length spillWkl)^"]") *) + + val (potentialSpillNode, cost, newSpillWkl) = + chaitin(spillWkl, NONE, infiniteCost, []) + in case (potentialSpillNode, newSpillWkl) of + (NONE, []) => {node=NONE, cost=cost, spillWkl=[]} + | (NONE, _) => raise NoCandidate + | (SOME node, spillWkl) => + {node=SOME node, cost=cost, spillWkl=spillWkl} + end +end diff --git a/MLRISC/ra/chaitin-spillheur2.sml b/MLRISC/ra/chaitin-spillheur2.sml new file mode 100644 index 0000000..3ca0dbe --- /dev/null +++ b/MLRISC/ra/chaitin-spillheur2.sml @@ -0,0 +1,108 @@ +(* + * This module implements the Chaitin heuristic (but weighted by + * priorities). This version also takes into account of savings in + * coalescing if a virtual is not spilled. You should use this version + * if your program uses direct style and makes use of calleesave registers. + *) +functor ImprovedChaitinSpillHeur + (val moveRatio : real + (* cost of move compared to load/store; should be <= 1.0 *) + ) : RA_SPILL_HEURISTICS = +struct + + structure G = RAGraph + + open G + + exception NoCandidate + + val mode = RACore.NO_OPTIMIZATION + + fun init() = () + + (* + * Potential spill phase. + * Find a cheap node to spill according to Chaitin's heuristic. + *) + fun chooseSpillNode{graph, hasBeenSpilled, spillWkl} = + let fun chase(NODE{color=ref(ALIASED n),...}) = chase n + | chase n = n + val infiniteCost = 123456789.0 + val don'tUse = 223456789.0 + + (* Savings due to coalescing when a node is not spilled *) + fun moveSavings(NODE{movecnt=ref 0, ...}) = 0.0 + | moveSavings(NODE{movelist, ...}) = + let fun loop([], savings) = + foldr (fn ((_,a),b) => Real.max(a,b)) 0.0 savings + | loop(MV{status=ref(WORKLIST | GEORGE_MOVE | BRIGGS_MOVE), + dst, src, cost, ...}::mvs, savings) = + let fun add(c,[]) = [(c,cost)] + | add(c,(x as (c':int,s))::savings) = + if c = c' then (c',s+cost)::savings + else x::add(c,savings) + val savings = + case chase dst of + NODE{color=ref(COLORED c), ...} => add(c,savings) + | _ => + case chase src of + NODE{color=ref(COLORED c), ...} => add(c,savings) + | _ => savings + in loop(mvs, savings) end + | loop(_::mvs, savings) = loop(mvs, savings) + in loop(!movelist, []) end + + (* The spill worklist is maintained only lazily. So we have + * to prune away those nodes that are already removed from the + * interference graph. After pruning the spillWkl, + * it may be the case that there aren't anything to be + * spilled after all. + *) + + (* + * Choose node with the lowest cost and have the maximal degree + *) + fun chaitin([], best, lowestCost, spillWkl) = + (best, lowestCost, spillWkl) + | chaitin(node::rest, best, lowestCost, spillWkl) = + (case chase node of + node as NODE{number, pri, defs, uses, + degree=ref deg, color=ref PSEUDO,...} => + let fun cost() = + let val moveSavings = moveRatio * moveSavings(node) + in (!pri + moveSavings) / real deg end + val cost = + case (!defs, !uses) of + (_,[]) => (* defs but no use *) + ~1.0 - real deg + | ([d],[u]) => (* defs after use; don't use *) + let fun plus({block,insn},n) = {block=block,insn=insn+n} + in if d = plus(u,1) orelse d = plus(u,2) + then don'tUse else cost() + end + | _ => cost() + in if cost < lowestCost andalso not(hasBeenSpilled number) + then + (case best of + NONE => chaitin(rest, SOME node, cost, spillWkl) + | SOME best => + chaitin(rest, SOME node, cost, best::spillWkl) + ) + else + chaitin(rest, best, lowestCost, node::spillWkl) + end + | _ => (* discard node *) + chaitin(rest, best, lowestCost, spillWkl) + ) + + (* val _ = print("["^Int.toString(length spillWkl)^"]") *) + + val (potentialSpillNode, cost, newSpillWkl) = + chaitin(spillWkl, NONE, infiniteCost, []) + in case (potentialSpillNode, newSpillWkl) of + (NONE, []) => {node=NONE, cost=cost, spillWkl=[]} + | (NONE, _) => raise NoCandidate + | (SOME node, spillWkl) => + {node=SOME node, cost=cost, spillWkl=spillWkl} + end +end diff --git a/MLRISC/ra/chow-hennessy-spillheur.sml b/MLRISC/ra/chow-hennessy-spillheur.sml new file mode 100644 index 0000000..e1a97ae --- /dev/null +++ b/MLRISC/ra/chow-hennessy-spillheur.sml @@ -0,0 +1,102 @@ +(* + * This module implements a Chow-Hennessy-style spill heuristic + *) +structure ChowHennessySpillHeur : RA_SPILL_HEURISTICS = +struct + + structure G = RAGraph + structure Heap = PriorityHeap + + open G + + exception NoCandidate + + val mode = RACore.COMPUTE_SPAN + + val cache = ref NONE : (G.node * real) Heap.priority_queue option ref + + fun init() = cache := NONE + + (* + * Potential spill phase. + * Find a cheap node to spill according to Chow Hennessy's heuristic. + *) + fun chooseSpillNode{graph as G.GRAPH{span, ...}, + hasBeenSpilled, spillWkl} = + let fun chase(NODE{color=ref(ALIASED n),...}) = chase n + | chase n = n + (* The spill worklist is maintained only lazily. So we have + * to prune away those nodes that are already removed from the + * interference graph. After pruning the spillWkl, + * it may be the case that there aren't anything to be + * spilled after all. + *) + fun chowHennessy spills = + let (* Compute savings due to moves *) + val spillSavings = RACore.moveSavings graph + val lookupSpan = IntHashTable.find (Option.valOf(!span)) + val lookupSpan = + fn r => case lookupSpan r of SOME s => s | NONE => 0.0 + val _ = span := NONE + fun loop([], L, pruned) = (L, pruned) + | loop(node::rest, L, pruned) = + (case chase node of + node as NODE{number, pri, defs, uses, + degree=ref deg, color=ref PSEUDO,...} => + if hasBeenSpilled number + then loop(rest, L, false) + else + let fun newnode() = + let val span = lookupSpan number + val savings = spillSavings number + val spillCost = !pri + val totalCost = spillCost - savings + (*val rank = ((real totalCost)+0.01) / real(span)*) + val rank = (totalCost + 0.5) / (span + real deg) + in loop(rest, (node, rank)::L, false) end + in case (!defs, !uses) of + (_, []) => (* one def no use *) + loop(rest, (node, ~1.0 - real(deg))::L, false) + | ([d], [u]) => (* defs after use; don't use *) + let fun plus({block,insn},n) = {block=block,insn=insn+n} + in if d = plus(u,1) orelse d = plus(u,2) then + loop(rest, L, false) + else + newnode() + end + | _ => newnode() + end + | _ => loop(rest, L, pruned) (* discard node *) + ) + in loop(spills, [], true) + end + + fun chooseNode heap = + let fun loop() = + let val (node,cost) = Heap.deleteMin heap + in case chase node of + node as NODE{color=ref PSEUDO, ...} => + {node=SOME(node), cost=cost, spillWkl=spillWkl} + | _ => loop() + end + in loop() + end handle _ => {node=NONE, cost=0.0, spillWkl=[]} + + in case !cache of + SOME heap => chooseNode heap + | NONE => + let val (L, pruned) = chowHennessy(spillWkl) + in if pruned then (* done *) + {node=NONE, cost=0.0, spillWkl=[]} + else + (case L of + [] => raise NoCandidate + | _ => let fun rank((_,x), (_,y)) = Real.<(x, y) + val heap = Heap.fromList rank L + in cache := SOME heap; + chooseNode heap + end + ) + end + end +end diff --git a/MLRISC/ra/chow-hennessy-spillheur2.sml b/MLRISC/ra/chow-hennessy-spillheur2.sml new file mode 100644 index 0000000..811a509 --- /dev/null +++ b/MLRISC/ra/chow-hennessy-spillheur2.sml @@ -0,0 +1,127 @@ +(* + * This module implements a Chow-Hennessy-style spill heuristic + *) +functor ImprovedChowHennessySpillHeur + (val moveRatio : real) : RA_SPILL_HEURISTICS = +struct + + structure G = RAGraph + structure Heap = PriorityHeap + + open G + + exception NoCandidate + + val mode = RACore.COMPUTE_SPAN + + val cache = ref NONE : (G.node * real) Heap.priority_queue option ref + + fun init() = cache := NONE + + (* + * Potential spill phase. + * Find a cheap node to spill according to Chow Hennessy's heuristic. + *) + fun chooseSpillNode{graph as G.GRAPH{span, ...}, + hasBeenSpilled, spillWkl} = + let fun chase(NODE{color=ref(ALIASED n),...}) = chase n + | chase n = n + + (* Savings due to coalescing when a node is not spilled *) + fun moveSavings(NODE{movecnt=ref 0, ...}) = 0.0 + | moveSavings(NODE{movelist, ...}) = + let fun loop([], savings) = + foldr (fn ((_,a),b) => Real.max(a,b)) 0.0 savings + | loop(MV{status=ref(WORKLIST | GEORGE_MOVE | BRIGGS_MOVE), + dst, src, cost, ...}::mvs, savings) = + let fun add(c,[]) = [(c,cost)] + | add(c,(x as (c':int,s))::savings) = + if c = c' then (c',s+cost)::savings + else x::add(c,savings) + val savings = + case chase dst of + NODE{color=ref(COLORED c), ...} => add(c,savings) + | _ => + case chase src of + NODE{color=ref(COLORED c), ...} => add(c,savings) + | _ => savings + in loop(mvs, savings) end + | loop(_::mvs, savings) = loop(mvs, savings) + in loop(!movelist, []) end + + (* The spill worklist is maintained only lazily. So we have + * to prune away those nodes that are already removed from the + * interference graph. After pruning the spillWkl, + * it may be the case that there aren't anything to be + * spilled after all. + *) + fun chowHennessy spills = + let (* Compute savings due to moves *) + val spillSavings = RACore.moveSavings graph + val lookupSpan = IntHashTable.find (Option.valOf(!span)) + val lookupSpan = + fn r => case lookupSpan r of SOME s => s | NONE => 0.0 + val _ = span := NONE + fun loop([], L, pruned) = (L, pruned) + | loop(node::rest, L, pruned) = + (case chase node of + node as NODE{number, pri, defs, uses, + degree=ref deg, color=ref PSEUDO,...} => + if hasBeenSpilled number + then loop(rest, L, false) + else + let fun newnode() = + let val span = lookupSpan number + val savings = spillSavings number + val spillCost = !pri + val totalCost = spillCost - savings + (*val rank = ((real totalCost)+0.01) / real(span)*) + val rank = (totalCost + 0.5 + moveSavings(node)) + / (span+ real deg) + in loop(rest, (node, rank)::L, false) end + in case (!defs, !uses) of + (_, []) => (* one def no use *) + loop(rest, (node, ~1.0 - real(deg))::L, false) + | ([d], [u]) => (* defs after use; don't use *) + let fun plus({block,insn},n) = {block=block,insn=insn+n} + in if d = plus(u,1) orelse d = plus(u,2) then + loop(rest, L, false) + else + newnode() + end + | _ => newnode() + end + | _ => loop(rest, L, pruned) (* discard node *) + ) + in loop(spills, [], true) + end + + fun chooseNode heap = + let fun loop() = + let val (node,cost) = Heap.deleteMin heap + in case chase node of + node as NODE{color=ref PSEUDO, ...} => + {node=SOME(node), cost=cost, spillWkl=spillWkl} + | _ => loop() + end + in loop() + end handle _ => {node=NONE, cost=0.0, spillWkl=[]} + + in case !cache of + SOME heap => chooseNode heap + | NONE => + let val (L, pruned) = chowHennessy(spillWkl) + in if pruned then (* done *) + {node=NONE, cost=0.0, spillWkl=[]} + else + (case L of + [] => raise NoCandidate + | _ => let fun rank((_,x), (_,y)) = Real.<(x, y) + val heap = Heap.fromList rank L + in cache := SOME heap; + chooseNode heap + end + ) + end + end +end diff --git a/MLRISC/ra/cluster-partitioner.sml b/MLRISC/ra/cluster-partitioner.sml new file mode 100644 index 0000000..7452c0c --- /dev/null +++ b/MLRISC/ra/cluster-partitioner.sml @@ -0,0 +1,228 @@ +(* + * Partition a cluster into multiple smaller clusters for region-based + * register allocation. + *) +functor ClusterPartitioner + (structure Flowgraph : FLOWGRAPH + structure InsnProps : INSN_PROPERTIES + sharing Flowgraph.I = InsnProps.I + ) : RA_FLOWGRAPH_PARTITIONER = +struct + structure F = Flowgraph + structure I = F.I + structure C = I.C + structure PQ = PriorityQueue + structure Liveness = Liveness(Flowgraph) + structure A = Array + + type flowgraph = F.cluster + + val debug = true + + fun error msg = MLRiscErrorMsg.error("ClusterPartitioner",msg) + + val maxSize = MLRiscControl.getInt "ra-max-region-size" + val _ = maxSize := 300 + + fun numberOfBlocks(F.CLUSTER{blkCounter,...}) = !blkCounter + + (* + * Partition the cluster into a set of clusters so that each can + * be allocated independently. + *) + fun partition(F.CLUSTER{blkCounter, blocks, entry, exit, + annotations, ...}) + cellkind processRegion = + (* Number of basic blocks *) + let val N = !blkCounter + + val _ = if debug then + print("[Region based register allocation: "^ + Int.toString N^"]\n") + else () + val maxSize = !maxSize + + (* Perform global liveness analysis first. + * Unfortunately, I know of no way of avoiding this step because + * we have to know which values are live across regions. + *) + val _ = Liveness.liveness{blocks=blocks, + defUse=InsnProps.defUse cellkind, + getCell=C.getCellsByKind cellkind, + updateCell=C.updateCellsByKind cellkind + } + + val F.ENTRY{succ=entrySucc, ...} = entry + val F.EXIT{pred=exitPred, ...} = exit + val initTrail = [(entrySucc,!entrySucc), (exitPred, !exitPred)] + + (* Priority queue of basic blocks in non-increasing order + * of execution frequency + *) + fun higherFreq(F.BBLOCK{freq=a,...}, F.BBLOCK{freq=b,...}) = !a > !b + | higherFreq _ = error "higherFreq" + val blocks = List.foldr (fn (b as F.BBLOCK _,l) => b::l | (_,l) => l) + [] blocks + val seedQueue = PQ.fromList higherFreq blocks + + (* Current region id *) + val regionCounter = ref 0 + fun newRegionId() = + let val regionId = !regionCounter + in regionCounter := !regionCounter + 1; regionId end + + (* Has the block been included in any region? + * Non-negative means yes. The number is the region id in which + * the block belongs. + *) + val processed = A.array(N, ~1) + + fun hasBeenProcessed n = A.sub(processed,n) >= 0 + fun markAsProcessed(n, regionId) = A.update(processed,n,regionId) + + (* Get an unprocessed seed block from the queue *) + fun getSeedBlock(regionId) = + case PQ.deleteMin seedQueue of + block as F.BBLOCK{blknum, insns, ...} => + if hasBeenProcessed blknum then getSeedBlock(regionId) + else block + | _ => error "getSeedBlock" + + fun resetTrail [] = () + | resetTrail((r,x)::trail) = (r := x; resetTrail trail) + + (* + * Grow a region. Currently, region growth is limited only by size. + * Note that we only select nodes with one out edges as possible + * region cut points. We also try not to make a region too small + * as it will waste initialization time. It's a delicate balance. + *) + fun growRegion() = + let val regionId = newRegionId() + fun add([], Q) = Q + | add((b as F.BBLOCK{blknum, ...},_)::bs, Q) = + if hasBeenProcessed blknum then add(bs, Q) + else add(bs, b::Q) + | add(_::bs, Q) = add(bs, Q) + fun grow((b as F.BBLOCK{blknum, succ, pred, insns, ...})::F, B, + size, blks, m) = + if hasBeenProcessed blknum + then grow(F, B, size, blks, m) + else + let val n = length(!insns) + val newSize = size + n + in if m > 0 andalso newSize > maxSize andalso length(!succ) = 1 + then grow(F, B, size, blks, m) + else (markAsProcessed(blknum, regionId); + grow(F, add(!pred,add(!succ,B)), newSize, + b::blks, m+1) + ) + end + | grow([], [], size, blks, m) = (size, blks, m) + | grow([], B, size, blks, m) = grow(rev B, [], size, blks, m) + | grow _ = error "grow" + + (* Find a seed block *) + val seed = getSeedBlock(regionId) + + (* Grow until we reach some limit *) + val (totalSize, blocks, blockCount) = grow([seed], [], 0, [], 0) + + (* Now create a cluster with only these blocks + * We have to update the edges so that region-entry edges + * are made into entry edges and region-exit edges are + * made into exit edges. + *) + fun makeSubgraph(blocks) = + let fun inSubgraph(y) = A.sub(processed,y) = regionId + fun processSucc(b,x,(e as (F.BBLOCK{blknum=y, ...},freq))::es, + es', exit, exitFreq) = + if inSubgraph(y) then + processSucc(b,x,es,e::es',exit,exitFreq) + else processSucc(b,x,es,es',true, exitFreq + !freq) + | processSucc(b,x,(e as (F.EXIT{blknum=y,...},freq))::es,es', + exit, exitFreq) = + processSucc(b,x,es,es', true, exitFreq + !freq) + | processSucc(b,x,[],es',true, exitFreq) = + let val w = ref exitFreq + in exitPred := (b,w) :: !exitPred; + ((exit,w)::es', true) + end + | processSucc(b,x,[],es', false, exitFreq) = (es', false) + | processSucc _ = error "processSucc" + + fun processPred(b,x,(e as (F.BBLOCK{blknum=y, ...},freq))::es, + es', entry, entryFreq) = + if inSubgraph(y) then + processPred(b,x,es,e::es',entry,entryFreq) + else processPred(b,x,es,es',true,entryFreq + !freq) + | processPred(b,x,(e as (F.ENTRY{blknum=y,...},freq))::es,es', + entry, entryFreq) = + processPred(b,x,es,es',true, entryFreq + !freq) + | processPred(b,x,[], es', true, entryFreq) = + let val w = ref entryFreq + in entrySucc := (b,w) :: !entrySucc; + ((entry,w)::es', true) + end + | processPred(b,x,[], es', false, entryFreq) = (es', false) + | processPred _ = error "processPred" + + fun processNodes([], trail) = trail + | processNodes( + (b as F.BBLOCK{blknum=n,liveIn,liveOut,succ,pred,...}) + ::nodes, trail) = + let val (succ', exit) = processSucc(b,n,!succ,[],false,0) + val trail = if exit then (succ, !succ)::trail else trail + val (pred', entry) = processPred(b,n,!pred,[],false,0) + val trail = if entry then (pred, !pred)::trail else trail + in succ := succ'; + pred := pred'; + (* To save space, clear liveIn and + * liveOut information (if it is not an exit) + *) + liveIn := CellsBasis.CellSet.empty; + if exit then () else liveOut := CellsBasis.CellSet.empty; + processNodes(nodes, trail) + end + | processNodes _ = error "processNodes" + + val _ = entrySucc := [] + val _ = exitPred := [] + val trail = processNodes(blocks, initTrail) + in trail + end + + (* Make a subgraph with the appropriate edges *) + val trail = makeSubgraph(blocks) + + val region = + F.CLUSTER{blkCounter = blkCounter, + blocks = blocks, + entry = entry, + exit = exit, + annotations = annotations + } + in (regionId, region, trail, blockCount) + end + + (* + * Extract a new region to compile. Raises PQ.EmptyPriorityQueue if + * everything is finished. + *) + fun iterate() = + let val (id, region, trail, blockCount) = growRegion() (* get a region *) + in if debug then + print("[Region "^Int.toString id^" has "^Int.toString blockCount^ + " blocks]\n") + else (); + processRegion region; (* allocate this region *) + resetTrail trail; (* reset the flowgraph *) + iterate() (* process next region *) + end + + in (* Repeat until the entire flowgraph has been processed *) + iterate() handle PQ.EmptyPriorityQueue => (); + if debug then print "[Region based register allocation done]\n" else () + end + +end diff --git a/MLRISC/ra/cluster-ra.sml b/MLRISC/ra/cluster-ra.sml new file mode 100644 index 0000000..129fe40 --- /dev/null +++ b/MLRISC/ra/cluster-ra.sml @@ -0,0 +1,568 @@ +(* cluster-ra.sml + * + * COPYRIGHT (c) 2002 Bell Labs, Lucent Technologies + * + * This module provides services for the new RA when using the cluster + * representation. + * The algorithm is adapted from + * Algorithm 19.17 from Appel, Modern Compiler Implementation in ML, + * Calculation of live ranges in SSA form. We don't directly use SSA + * here but the principles are the same. + * + * -- Allen + *) + +functor ClusterRA + (structure Asm : INSTRUCTION_EMITTER + structure Flowgraph : CONTROL_FLOW_GRAPH + where I = Asm.I + and P = Asm.S.P + structure InsnProps : INSN_PROPERTIES + where I = Flowgraph.I + structure Spill : RA_SPILL + where I = Flowgraph.I + ) : RA_FLOWGRAPH = +struct + structure CFG = Flowgraph + structure I = CFG.I + structure G = RAGraph + structure Props = InsnProps + structure Core = RACore + structure A = Array + structure UA = Unsafe.Array (* okay, I'm cheating a bit here *) + structure Spill = Spill + + open G + structure C = I.C + structure CB = CellsBasis + + fun isOn(flag,mask) = Word.andb(flag,mask) <> 0w0 + + val dump_size = MLRiscControl.mkFlag ("ra-dump-size", "whether to show RA size") + + type flowgraph = CFG.cfg (* flowgraph is a cluster *) + + fun error msg = MLRiscErrorMsg.error("ClusterRA", msg) + + val mode = 0w0 + + fun uniqCells s = CB.SortedCells.return(CB.SortedCells.uniq s) + + fun chaseCell(c as CB.CELL{col=ref(CB.MACHINE r),...}) = (c,r) + | chaseCell(CB.CELL{col=ref(CB.ALIASED c), ...}) = chaseCell c + | chaseCell(c as CB.CELL{col=ref CB.SPILLED, ...}) = (c,~1) + | chaseCell(c as CB.CELL{col=ref CB.PSEUDO, id, ...}) = (c,id) + + fun colorOf(CB.CELL{col=ref(CB.MACHINE r),...}) = r + | colorOf(CB.CELL{col=ref(CB.ALIASED c), ...}) = colorOf c + | colorOf(CB.CELL{col=ref CB.SPILLED, ...}) = ~1 + | colorOf(CB.CELL{col=ref CB.PSEUDO, id, ...}) = id + + fun chase(NODE{color=ref(ALIASED n), ...}) = chase n + | chase n = n + + exception NotThere + + val Asm.S.STREAM{emit,...} = Asm.makeStream [] + + fun dumpFlowgraph(txt, cfg as Graph.GRAPH graph, outstrm) = let + fun say txt = TextIO.output(outstrm, txt) + fun sayPseudo p = (say(CFG.P.toString p); say "\n") + val CFG.INFO{data, ...} = #graph_info graph + in + CFG.dump(outstrm, txt, cfg); + app sayPseudo (rev(!data)) + end + + val annotations = CFG.annotations + + val dummyBlock = CFG.newBlock(~1, ref 0.0) + + val uniq = ListMergeSort.uniqueSort + (fn ({block=b1,insn=i1},{block=b2,insn=i2}) => + case Int.compare(b1,b2) of + EQUAL => Int.compare(i1,i2) + | ord => ord) + + fun services(cfg as Graph.GRAPH graph) = let + val CFG.INFO{annotations=clAnns, ...} = #graph_info graph + val blocks = #nodes graph () + fun maxBlockId ((id, CFG.BLOCK _)::rest, curr) = + if id > curr then maxBlockId(rest, id) else maxBlockId(rest, curr) + | maxBlockId([], curr) = curr + + val N = maxBlockId(blocks, #capacity graph ()) + + (* + * Construct program point + *) + fun progPt(blknum, instrId) = {block=blknum, insn=instrId} + fun blockNum{block,insn} = block + fun instrNum{block,insn} = insn + + (* blocks indexed by block id *) + val blockTable = A.array(N, (#new_id graph (), dummyBlock)) + + (* fill block table *) + val _ = List.app (fn b as (nid, _) => Array.update(blockTable, nid, b)) blocks + + val EXIT = (case #exits graph () of [e] => e | _ => error "EXIT") + + (* + * Building the interference graph + *) + fun buildIt (cellkind, + G as GRAPH{nodes, dedicated, mode, span, copyTmps, ...}) = + + let (* definitions indexed by block id+instruction id *) + val defsTable = A.array(N, A.array(0, [] : node list)) + val marked = A.array(N, ~1) + val addEdge = Core.addEdge G + + (* copies indexed by source + * This table maps variable v to the program points where + * v is a source of a copy. + *) + val copyTable = IntHashTable.mkTable(N, NotThere) + : {dst:CB.cell,pt:G.programPoint} list IntHashTable.hash_table + val lookupCopy = IntHashTable.find copyTable + val lookupCopy = fn r => case lookupCopy r of SOME c => c + | NONE => [] + val addCopy = IntHashTable.insert copyTable + + val stamp = ref 0 + + (* Allocate the arrays *) + fun alloc [] = () + | alloc((id, CFG.BLOCK{insns, ...})::blocks) = + (UA.update(defsTable, id, A.array(length(!insns)+1, [])); + alloc blocks) + val _ = alloc blocks + + (* + * Remove pseudo use generated by copy temporaries + *) + fun rmPseudoUses [] = () + | rmPseudoUses(NODE{uses,...}::ns) = (uses := []; rmPseudoUses ns) + + (* + * Initialize the definitions before computing the liveness for v. + *) + fun initialize(v, v', useSites) = let + (* First we remove all definitions for all copies + * with v as source. + * When we have a copy and while we are processing v + * + * x <- v + * + * x does not really interfere with v at this point, + * so we remove the definition of x temporarily. + *) + fun markCopies([], trail) = trail + | markCopies({pt, dst}::copies, trail) = + let val b = blockNum pt + val i = instrNum pt + val defs = UA.sub(defsTable, b) + val nodes = UA.sub(defs, i) + fun revAppend([], nodes) = nodes + | revAppend(n::ns, nodes) = revAppend(ns, n::nodes) + val dstColor = colorOf dst + fun removeDst([], nodes') = nodes' + | removeDst((d as NODE{number=r,...})::nodes, nodes')= + if r = dstColor then revAppend(nodes', nodes) + else removeDst(nodes, d::nodes') + val nodes' = removeDst(nodes, []) + in UA.update(defs, i, nodes'); + markCopies(copies, (defs, i, nodes)::trail) + end + + (* + * Then we mark all use sites of v with a fake definition so that + * the scanning will terminate correctly at these points. + *) + fun markUseSites([], trail) = trail + | markUseSites(pt::pts, trail) = + let val b = blockNum pt + val i = instrNum pt + val defs = UA.sub(defsTable, b) + val nodes = UA.sub(defs, i) + in UA.update(defs, i, v'::nodes); + markUseSites(pts, (defs, i, nodes)::trail) + end + + val copies = lookupCopy v + val trail = markCopies(copies, []) + val trail = markUseSites(useSites, trail) + in trail end + + fun cleanup [] = () + | cleanup ((defs, i, nodes)::trail) = + (UA.update(defs, i, nodes); cleanup trail) + (* + * Perform incremental liveness analysis on register v + * and compute the span + *) + fun liveness(v, v' as NODE{uses, ...}, cellV) = let + val st = !stamp + val _ = stamp := st + 1 + fun foreachUseSite([], span) = span + | foreachUseSite(u::uses, span) = let + val b = blockNum u + val i = instrNum u + val block as (_, CFG.BLOCK{freq, ...}) = UA.sub(blockTable, b) + val span = + if i = 0 then liveOutAtBlock(block, span) (* live out *) + else let + val f = !freq + val defs = UA.sub(defsTable, b) + in liveOutAtStmt(block, A.length defs, defs, i+1, f, span+f) + end + in foreachUseSite(uses, span) + end + + and visitPred((nid, _), span) = + let fun foreachPred([], span) = span + | foreachPred(nid::pred, span) = let + val span = liveOutAtBlock((nid, #node_info graph nid), span) + in foreachPred(pred, span) + end + in + foreachPred(#pred graph nid, span) + end + + and liveOutAtStmt(block, nDefs, defs, pos, freq, span) = + (* v is live out *) + if pos < nDefs then + let fun foreachDef([], true) = span + | foreachDef([], false) = + liveOutAtStmt(block, nDefs, defs, + pos+1, freq, span+freq) + | foreachDef((d as NODE{number=r, ...})::ds, kill) = + if r = v then foreachDef(ds, true) + else (addEdge(d, v'); foreachDef(ds, kill)) + in foreachDef(UA.sub(defs, pos), false) + end + else visitPred(block, span) + + and liveOutAtBlock(block as (nid, CFG.BLOCK{freq, ...}), span) = + (* v is live out at the current block *) + if UA.sub(marked, nid) = st then span + else let + val defs = UA.sub(defsTable, nid) + in + UA.update(marked, nid, st); + liveOutAtStmt(block, A.length defs, defs, 1, !freq, span) + end + + val useSites = uniq(!uses) + val trail = initialize(v, v', useSites) + val span = foreachUseSite (useSites, 0.0) + val _ = cleanup trail + in + span + end + + val newNodes = Core.newNodes G + val getnode = IntHashTable.lookup nodes + val insnDefUse = Props.defUse cellkind + val getCell = C.getCellsByKind cellkind + + fun isDedicated r = dedicated r + + (* Remove all dedicated or spilled registers from the list *) + fun rmvDedicated regs = + let fun loop([], rs') = rs' + | loop(r::rs, rs') = + let fun rmv(r as CB.CELL{col=ref(CB.PSEUDO), id, ...}) = + if isDedicated(id) then loop(rs, rs') else loop(rs, r::rs') + | rmv(CB.CELL{col=ref(CB.ALIASED r), ...}) = rmv r + | rmv(r as CB.CELL{col=ref(CB.MACHINE col), ...}) = + if isDedicated col then loop(rs, rs') + else loop(rs, r::rs') + | rmv(CB.CELL{col=ref(CB.SPILLED), ...}) = loop(rs,rs') + in rmv r + end + in loop(regs, []) end + + (* + * Create parallel move + *) + fun mkMoves(insn, pt, cost, mv, tmps) = + (case insn of + I.ANNOTATION{i, ...} => + (* strip away the annotation. + * Note: we are assuming annotations cannot change + * the semantics of the copies. + *) + mkMoves(i, pt, cost, mv, tmps) + | I.COPY{dst, src, k, ...} => + (* If it is a parallel copy, deal with the copy temporary + * properly. If it is a register, + * create a pseudo use site just below the end of + * the copy instruction. This is to make sure that + * the temporary is colored properly. If the copy temporary + * doesn't exist or if it has been spilled, do nothing. + *) + if k = cellkind then + let val tmps = + case Props.moveTmpR insn of + SOME r => + (* Add a pseudo use for tmpR *) + (case chase(getnode(colorOf r)) of + tmp as NODE{uses,defs=ref [d],...} => + let fun prev{block,insn}={block=block,insn=insn-1} + in uses := [prev d]; + tmp::tmps + end + | _ => error "mkMoves" + ) + | NONE => tmps + fun moves([], [], mv) = mv + | moves(d::ds, s::ss, mv) = + let val (d, cd) = chaseCell d + val (s, cs) = chaseCell s + in if isDedicated cd orelse isDedicated cs + then moves(ds, ss, mv) + else if cd = cs then moves(ds, ss, mv) + else + let val _ = + addCopy(cs, {dst=d, pt=pt}::lookupCopy cs); + val dst = chase(getnode cd) + val src = chase(getnode cs) + in moves(ds, ss, MV{dst=dst, src=src, + status=ref WORKLIST, + hicount=ref 0, + (* kind=REG_TO_REG, *) + cost=cost}::mv + ) + end + end + | moves _ = error "moves" + in (moves(dst, src, mv), tmps) end + else (mv, tmps) + | _ => (mv, tmps) + ) + + (* Add the nodes first *) + fun mkNodes([], mv, tmps) = (mv, tmps) + | mkNodes((nid, blk)::blocks, mv, tmps) = let + val CFG.BLOCK{insns, freq=ref w, annotations, ...} = blk + val succ = #succ graph nid + val liveOut = CFG.liveOut blk + val dtab = A.sub(defsTable, nid) + fun scan([], pt, i, mv, tmps) = (pt, i, mv, tmps) + | scan(insn::rest, pt, i, mv, tmps) = + let val (d, u) = insnDefUse insn + val defs = rmvDedicated d + val uses = rmvDedicated u + val defs = newNodes{cost=w, pt=pt, + defs=defs, uses=uses} + val _ = UA.update(dtab, i, defs) + val (mv, tmps) = mkMoves(insn, pt, w, mv, tmps) + fun next{block,insn} = {block=block,insn=insn+1} + in scan(rest,next pt, i+1, mv, tmps) + end + val (pt, i, mv, tmps) = + scan(!insns, progPt(nid,1), 1, mv, tmps) + in + (* If the block is escaping, then all liveout + * registers are considered used here. + *) + case succ + of [id] => + if id = EXIT then let + val liveSet = rmvDedicated( + uniqCells(getCell(liveOut))) + in newNodes{cost=w, pt=progPt(nid, 0), + defs=[], uses=liveSet}; () + end + else () + | _ => () + (*esac*); + mkNodes(blocks, mv, tmps) + end + + (* Add the edges *) + + val (moves, tmps) = mkNodes(blocks, [], []) + in + IntHashTable.appi + (let val setSpan : (int * real) -> unit = + if isOn(mode,Core.COMPUTE_SPAN) then + let val spanMap = IntHashTable.mkTable + (IntHashTable.numItems nodes, NotThere) + val setSpan = IntHashTable.insert spanMap + val _ = span := SOME spanMap + in setSpan end + else fn _ => () + in fn (v, v' as NODE{cell, color, ...}) => + let fun computeLiveness() = + setSpan(v, liveness(v, v', cell)) + in case !color of + PSEUDO => computeLiveness() + | COLORED _ => computeLiveness() + | MEMREG _ => computeLiveness() + | _ => () + end + end + ) nodes; + if isOn(Core.SAVE_COPY_TEMPS, mode) then copyTmps := tmps else (); + rmPseudoUses tmps; + moves + end (* buildIt *) + + (* + * Build the interference graph initially. + *) + fun build(G, cellkind) = let + val moves = buildIt(cellkind, G) + val i2s = Int.toString + in + if !dump_size then let + val GRAPH{nodes, bitMatrix,...} = G + val insns = + foldr (fn ((_,CFG.BLOCK{insns,...}),n) => length(!insns) + n) 0 blocks + in + TextIO.output + (!MLRiscControl.debug_stream, + "RA #blocks="^i2s N ^ + " #insns="^i2s insns ^ + " #nodes="^i2s(IntHashTable.numItems nodes) ^ + " #edges="^i2s(Core.BM.size(!bitMatrix)) ^ + " #moves="^i2s(length moves)^"\n") + end + else (); + moves + end + + (* + * Rebuild the interference graph; + * We'll just do it from scratch for now. + *) + fun rebuild(cellkind, G) = + (Core.clearNodes G; + buildIt(cellkind, G) + ) + + (* + * Spill a set of nodes and rewrite the flowgraph + *) + fun spill{copyInstr, spill, spillSrc, spillCopyTmp, + reload, reloadDst, renameSrc, graph, + cellkind, nodes=nodesToSpill} = + let (* Remove the interference graph now *) + val _ = Core.clearGraph graph + + (* maps program point to registers to be spilled *) + val spillSet = G.PPtHashTable.mkTable(32, NotThere) + + (* maps program point to registers to be reloaded *) + val reloadSet = G.PPtHashTable.mkTable(32, NotThere) + + (* maps program point to registers to be killed *) + val killSet = G.PPtHashTable.mkTable(32, NotThere) + + val spillRewrite = Spill.spillRewrite + { graph=graph, + spill=spill, + spillSrc=spillSrc, + spillCopyTmp=spillCopyTmp, + reload=reload, + reloadDst=reloadDst, + renameSrc=renameSrc, + copyInstr=copyInstr, + cellkind=cellkind, + spillSet=spillSet, + reloadSet=reloadSet, + killSet=killSet + } + + (* set of basic blocks that are affected *) + val affectedBlocks = IntHashTable.mkTable(32, NotThere) + + val addAffectedBlocks = IntHashTable.insert affectedBlocks + + fun ins set = let + val add = G.PPtHashTable.insert set + val look = G.PPtHashTable.find set + val look = fn r => case look r of SOME s => s | NONE => [] + fun enter(r, []) = () + | enter(r, pt::pts) = + (add (pt, r::look pt); + addAffectedBlocks (blockNum pt, true); + enter(r, pts) + ) + in enter + end + + val insSpillSet = ins spillSet + val insReloadSet = ins reloadSet + val insKillSet = + let + val add = G.PPtHashTable.insert killSet + val look = G.PPtHashTable.find killSet + val look = fn r => case look r of SOME s => s | NONE => [] + fun enter(r, []) = () + | enter(r, pt::pts) = (add(pt, r::look pt); enter(r, pts)) + in enter + end + + (* Mark all spill/reload locations *) + fun markSpills(G.NODE{color, number, cell, defs, uses, ...}) = + let fun spillIt(defs, uses) = + (insSpillSet(cell, defs); + insReloadSet(cell, uses); + (* Definitions but no use! *) + case uses of + [] => insKillSet(cell, defs) + | _ => () + ) + val d = !defs + val u = !uses + in + case !color + of G.SPILLED => spillIt(d,u) + | G.SPILL_LOC _ => spillIt(d,u) + | G.MEMREG _ => spillIt(d,u) + | G.PSEUDO => spillIt(d,u) + | _ => () + end + val _ = app markSpills nodesToSpill + + (* Rewrite all affected blocks *) + fun rewriteAll (blknum, _) = let + val (_, CFG.BLOCK{insns as ref instrs, annotations, ...}) = + A.sub(blockTable, blknum) + val instrs = + spillRewrite{pt=progPt(blknum, length instrs), + instrs=instrs, annotations=annotations} + in + insns := instrs + end + + + fun mark(G.NODE{color, ...}) = + (case !color + of PSEUDO => color := SPILLED + | SPILLED => () + | SPILL_LOC _ => () + | ALIASED _ => () + | MEMREG _ => () + | COLORED _ => error "mark: COLORED" + | REMOVED => error "mark: REMOVED" + (*esac*)) + in + IntHashTable.appi rewriteAll affectedBlocks; + app mark nodesToSpill; + rebuild(cellkind, graph) + end (* spill *) + in + { build = build, + spill = spill, + programPoint= fn{block,instr} => progPt(block,instr), + blockNum = blockNum, + instrNum = instrNum + } + end +end + diff --git a/MLRISC/ra/getreg.sig b/MLRISC/ra/getreg.sig new file mode 100644 index 0000000..27481a0 --- /dev/null +++ b/MLRISC/ra/getreg.sig @@ -0,0 +1,21 @@ +(* + * A simple round robin based register allocator. + * Now with the ability to get register pairs. + * -- Allen + *) +signature GETREG = +sig + exception GetReg + + (* get a register, unconstrained but with optional preference *) + (* if sub(proh,r) = stamp that means the register is prohibited *) + val getreg : {pref:int list, stamp:int, proh:int Array.array} -> int + + (* get a register pair, must be an even/odd pair, returns the + * even register (i.e. the smaller one) + *) + val getpair : {pref:int list, stamp:int, proh:int Array.array} -> int + + (* reset the state *) + val reset : unit -> unit +end diff --git a/MLRISC/ra/getreg.sml b/MLRISC/ra/getreg.sml new file mode 100644 index 0000000..cabdf86 --- /dev/null +++ b/MLRISC/ra/getreg.sml @@ -0,0 +1,82 @@ +(* getreg.sml + * + * COPYRIGHT (c) 1996 Bell Laboratories. + * + *) + +(** A simple round robin register allocator **) +functor GetReg(val first : int (* start from ``first'' *) + val nRegs : int (* n registers *) + val available : int list) : GETREG = +struct + exception GetReg + val size = first+nRegs + val allRegs = Array.array(size,false) + val preferred = Array.array(size,~1) + + val lastReg = ref first + + fun reset () = (lastReg := first; Array.modify(fn _ => ~1) preferred) + + val _ = app (fn r => Array.update(allRegs,r,true)) available + + fun getreg{pref,stamp:int,proh} = + let (* use preferred registers whenever possible *) + fun checkPreferred [] = find(!lastReg) + | checkPreferred(r::rs) = + if Array.sub(proh,r) <> stamp andalso + Array.sub(allRegs,r) then r + else checkPreferred rs + + (* if not, use the round robin scheme to look for a register *) + and find(start) = + let val limit = Array.length allRegs + fun search r = + if Array.sub(proh,r) <> stamp andalso + Array.sub(allRegs,r) then r + else let val r = r+1 + val r = if r >= limit then first else r + in if r = start then raise GetReg + else search r + end + val found = search(start) + val next = found + 1 + val next = if next >= limit then first else next + in lastReg := next; + found + end + in checkPreferred pref + end + + val lastRegPair = ref first + + fun getpair{pref, stamp:int, proh} = let + (* if not, use the round robin scheme to look for a register *) + fun find(start) = let + val limit = Array.length allRegs + fun search r = + if Array.sub(proh,r) <> stamp + andalso Array.sub(proh,r+1) <> stamp + andalso Array.sub(allRegs,r) + andalso Array.sub(allRegs,r+1) then r + else let + val nxt = r+1 + val nxtR = if nxt+1 >= limit then first else nxt + in + if nxtR = start then raise GetReg else search nxtR + end + val found = search(start) + val next = found + 1 + val next = if next+1 >= limit then first else next + in + lastRegPair := next; + found + end + in find(!lastRegPair) + end + +end + + + + diff --git a/MLRISC/ra/getreg2.sml b/MLRISC/ra/getreg2.sml new file mode 100644 index 0000000..9dac587 --- /dev/null +++ b/MLRISC/ra/getreg2.sml @@ -0,0 +1,47 @@ +(* getreg2.sml + * + * COPYRIGHT (c) 1996 Bell Laboratories. + * + *) + +(** A simple first come/first serve register allocator **) +functor GetReg2(val first : int (* start from ``first'' *) + val nRegs : int (* n registers *) + val available : int list) : GETREG = +struct + exception GetReg + val size = first+nRegs + val allRegs = Array.array(size,false) + val preferred = Array.array(size,~1) + + fun reset () = Array.modify(fn _ => ~1) preferred + + val _ = app (fn r => Array.update(allRegs,r,true)) available + + fun getreg{pref,stamp:int,proh} = + let (* use preferred registers whenever possible *) + fun checkPreferred [] = find(first) + | checkPreferred(r::rs) = + if Array.sub(proh,r) <> stamp andalso + Array.sub(allRegs,r) then r + else checkPreferred rs + + and find(start) = + let val limit = Array.length allRegs + fun search r = + if Array.sub(proh,r) <> stamp andalso + Array.sub(allRegs,r) then r + else let val r = r+1 + in if r >= limit then raise GetReg else search r + end + in search start + end + in checkPreferred pref end + + fun getpair{pref,stamp:int,proh} = raise GetReg (* unimplemented *) + +end + + + + diff --git a/MLRISC/ra/liveness.sml b/MLRISC/ra/liveness.sml new file mode 100644 index 0000000..c9e9ec3 --- /dev/null +++ b/MLRISC/ra/liveness.sml @@ -0,0 +1,188 @@ +(* liveness.sml + * + * COPYRIGHT (c) 1996 Bell Laboratories. + * + *) + +(** liveness.sml - computes live variables **) + + +(* I've moved the parameters of the functor to the function arguments + * so that it is more flexible. + * + * -- Allen 4/28/00 + *) + +(* TODO: The liveness module should take a + * C.cellset list IntHashTable.hash_table + *) + +signature LIVENESS = sig + + structure CFG : CONTROL_FLOW_GRAPH + + type liveness_table = + CellsBasis.SortedCells.sorted_cells IntHashTable.hash_table + + type du = CellsBasis.cell list * CellsBasis.cell list + + (* one def/use step (given defUse function, take du after instruction + * to du before instruction *) + val duStep : (CFG.I.instruction -> du) -> + CFG.I.instruction * du -> du + + (* one step for liveness (on a per-instruction basis) *) + val liveStep : (CFG.I.instruction -> du) -> + CFG.I.instruction * CellsBasis.SortedCells.sorted_cells -> + CellsBasis.SortedCells.sorted_cells + + val liveness : { + defUse : CFG.I.instruction -> du, + getCell : CellsBasis.CellSet.cellset -> CellsBasis.cell list + } -> CFG.cfg + -> {liveIn : liveness_table, + liveOut : liveness_table + } + +end + + +functor Liveness(Flowgraph : CONTROL_FLOW_GRAPH) : LIVENESS = struct + structure CFG = Flowgraph + structure I = CFG.I + structure SC = CellsBasis.SortedCells + structure CS = CellsBasis.CellSet + structure HT = IntHashTable + structure G = Graph + + type liveness_table = SC.sorted_cells HT.hash_table + + type du = CellsBasis.cell list * CellsBasis.cell list + + fun error msg = MLRiscErrorMsg.error("Liveness",msg) + + val NotFound = General.Fail("Liveness: Not Found") (* exception *) + + fun prList(l,msg:string) = let + fun pr([]) = print "\n" + | pr(x::xs) = (print(Int.toString x ^ " "); pr xs) + in print msg; pr l + end + + fun duStep defUse (insn, (def, use)) = let + val (d, u) = defUse insn + val d0 = SC.uniq d + val def' = SC.union (d0, def) + val use' = SC.union (SC.uniq u, SC.difference (use, d0)) + in + (def', use') + end + + fun liveStep defUse (insn, liveout) = let + val (d, u) = defUse insn + in + SC.union (SC.uniq u, SC.difference (liveout, SC.uniq d)) + end + + fun liveness {defUse,getCell} = let + val getCell = SC.uniq o getCell + + fun dataflow (cfg as G.GRAPH graph) = let + val blocks = #nodes graph () + val nNodes = #order graph () + + val liveIn : SC.sorted_cells HT.hash_table = HT.mkTable(nNodes, NotFound) + val liveOut : SC.sorted_cells HT.hash_table = HT.mkTable(nNodes, NotFound) + val uses : SC.sorted_cells HT.hash_table = HT.mkTable(nNodes, NotFound) + val defs : SC.sorted_cells HT.hash_table = HT.mkTable(nNodes, NotFound) + + (* compute block aggregate definition use. *) + fun initDefUse(nid, CFG.BLOCK{insns, ...}) = let + val (def, use) = foldl (duStep defUse) (SC.empty, SC.empty) (!insns) + in + HT.insert uses (nid, use); + HT.insert defs (nid, def) + end + + (* gather the liveOut information *) + fun initLiveOut(nid, CFG.BLOCK{annotations, ...}) = + (case #get CFG.LIVEOUT (!annotations) + of NONE => HT.insert liveOut (nid, SC.empty) + | SOME cs => HT.insert liveOut (nid, getCell cs) + (*esac*)) + + + fun initLiveIn () = + #forall_nodes graph (fn (nid, _) => HT.insert liveIn (nid, SC.empty)) + + fun init() = ( + #forall_nodes graph initDefUse; + #forall_nodes graph initLiveOut; + initLiveIn()) + + fun inB(nid) = let + val use = HT.lookup uses nid + val def = HT.lookup defs nid + val liveOut = HT.lookup liveOut nid + val livein = SC.union(use, SC.difference(liveOut, def)) + val changed = SC.notEq(HT.lookup liveIn nid, livein) + in + HT.insert liveIn (nid, livein); changed + end + + + fun outB(nid, CFG.BLOCK{annotations, ...}) = let + fun inSucc([], acc) = acc + | inSucc(nid::ns, acc) = + inSucc(ns, SC.union(HT.lookup liveIn nid, acc)) + val oldLiveOut = HT.lookup liveOut nid + val newLiveOut = inSucc(#succ graph nid, SC.empty) + in + HT.insert liveOut (nid, newLiveOut); + SC.notEq(oldLiveOut, newLiveOut) + end + + fun bottomup() = let + val visitedTbl : bool HT.hash_table = HT.mkTable(nNodes, NotFound) + fun isVisited nid = + (case HT.find visitedTbl nid of NONE => false | _ => true) + fun visit(nid, changed) = let + fun visitSucc([], changed') = changed' + | visitSucc(nid::ns, changed') = let + val CFG.BLOCK{kind, ...} = #node_info graph nid + in case kind + of CFG.STOP => visitSucc(ns, changed') + | CFG.NORMAL => + if isVisited nid then visitSucc(ns, changed') + else visitSucc(ns, visit(nid, changed')) + | _ => error "visit.visitSucc" + end + + val _ = HT.insert visitedTbl (nid, true) + + val changed' = visitSucc(#succ graph nid, changed); + val block = #node_info graph nid + val change1 = outB(nid, block) + val change2 = inB(nid) + in + changed' orelse change1 orelse change2 + end + + fun forall([], changed) = changed + | forall((nid,block)::rest, changed) = + if isVisited(nid) then forall(rest, changed) + else forall(rest, visit(nid, changed)) + in + forall(blocks, false) + end + + fun repeat n = if bottomup() then repeat(n+1) else (n+1) + + in + init(); repeat 0; {liveIn=liveIn, liveOut=liveOut} + end + + in dataflow + end +end + diff --git a/MLRISC/ra/mem-ra.sml b/MLRISC/ra/mem-ra.sml new file mode 100644 index 0000000..de295f5 --- /dev/null +++ b/MLRISC/ra/mem-ra.sml @@ -0,0 +1,451 @@ +(* + * This module implements the memory coalescing capability of the + * register allocator. + *) +functor MemoryRA(Flowgraph : RA_FLOWGRAPH) : RA_FLOWGRAPH = +struct + + structure G = RAGraph + structure A = Array + structure W = Word + + val debug = false + + open G RACore + + val ra_spill_coal = MLRiscControl.mkCounter ("ra-spill-coalescing", + "RA spill coalesce count") + val ra_spill_prop = MLRiscControl.mkCounter ("ra-spill-propagation", + "RA spill propagation count") + + local + + fun error msg = MLRiscErrorMsg.error("RACore", msg) + + fun concat([], b) = b + | concat(x::a, b) = concat(a, x::b) + + fun chase(NODE{color=ref(ALIASED n),...}) = chase n + | chase n = n + + in + + fun isOn(flag,mask) = Word.andb(flag,mask) <> 0w0 + + fun isMemLoc(SPILLED) = true + | isMemLoc(SPILL_LOC _) = true + | isMemLoc(MEMREG _) = true + | isMemLoc _ = false + + (* + * Spill coalescing. + * Coalesce non-interfering moves between spilled nodes, + * in non-increasing order of move cost. + *) + fun spillCoalescing(GRAPH{bitMatrix, ...}) = let + val member = BM.member(!bitMatrix) + val addEdge = BM.add(!bitMatrix) + in + fn nodesToSpill => let + (* Find moves between two spilled nodes *) + fun collectMoves([], mv') = mv' + | collectMoves(NODE{movelist, color, ...}::ns, mv') = let + fun ins([], mv') = collectMoves(ns, mv') + | ins(MV{status=ref(COALESCED | CONSTRAINED), ...}::mvs, mv') = + ins(mvs, mv') + | ins((mv as MV{dst, src, ...})::mvs, mv') = let + val NODE{color=ref cd, number=nd, ...} = chase dst + val NODE{color=ref cs, number=ns, ...} = chase src + in + if nd=ns then ins(mvs, mv') + else (case (cd, cs) + of (MEMREG _, MEMREG _) => ins(mvs, mv') + | _ => + if isMemLoc cd andalso isMemLoc cs then + ins(mvs, MV.add(mv, mv')) + else + ins(mvs, mv') + (*esac*)) + end + in + if isMemLoc (!color) then ins(!movelist, mv') + else collectMoves(ns, mv') + end + + (* Coalesce moves between two spilled nodes *) + fun coalesceMoves(MV.EMPTY) = () + | coalesceMoves(MV.TREE(MV{dst, src, cost, ...}, _, l, r)) = + let val dst as NODE{color=colorDst, ...} = chase dst + val src = chase src + + (* Make sure that dst has not been assigned a spill location *) + val (dst, src) = + case !colorDst of SPILLED => (dst, src) | _ => (src, dst) + + val dst as NODE{number=d, color=colorDst, adj=adjDst, + defs=defsDst, uses=usesDst, ...} = dst + val src as NODE{number=s, color=colorSrc, adj=adjSrc, + defs=defsSrc, uses=usesSrc, ...} = src + + (* combine adjacency lists *) + fun union([], adjSrc) = adjSrc + | union((n as NODE{color, adj=adjT, + number=t, ...})::adjDst, adjSrc) = + (case !color of + (SPILLED | MEMREG _ | SPILL_LOC _ | PSEUDO) => + if addEdge(s, t) then + (adjT := src :: !adjT; union(adjDst, n::adjSrc)) + else union(adjDst, adjSrc) + | COLORED _ => + if addEdge(s, t) then union(adjDst, n::adjSrc) + else union(adjDst, adjSrc) + | _ => union(adjDst, adjSrc) + ) + + val mvs = MV.merge(l,r) + + fun f() = + ((* print(Int.toString d ^"<->"^Int.toString s^"\n");*) + ra_spill_coal := !ra_spill_coal + 1; + (* unify *) + colorDst := ALIASED src; + adjSrc := union(!adjDst, !adjSrc); + defsSrc := concat(!defsDst, !defsSrc); + usesSrc := concat(!usesDst, !usesSrc); + coalesceMoves mvs) + in + if d = s then coalesceMoves mvs + else (case !colorDst + of MEMREG _ => coalesceMoves mvs + | SPILLED => + if member(d,s) then coalesceMoves mvs else f() + | SPILL_LOC loc => + if member(d,s) then coalesceMoves mvs else f() + | _ => error "coalesceMoves" + (*esac*)) + end + in coalesceMoves(collectMoves(nodesToSpill, MV.EMPTY)) + end + end (*spillCoalesce*) + + (* + * Spill propagation. + * This one uses a simple local lookahead algorithm. + *) + fun spillPropagation(G as GRAPH{bitMatrix, memRegs, ...}) nodesToSpill = + let val spillCoalescing = spillCoalescing G + exception SpillProp + val visited = IntHashTable.mkTable(32, SpillProp) + : bool IntHashTable.hash_table + val hasBeenVisited = IntHashTable.find visited + val hasBeenVisited = fn r => case hasBeenVisited r of NONE => false + | SOME _ => true + val markAsVisited = IntHashTable.insert visited + val member = BM.member(!bitMatrix) + + (* compute savings due to spill coalescing. + * The move list must be associated with a colorable node. + * The pinned flag is to prevent the spill node from coalescing + * two different fixed memory registers. + *) + fun coalescingSavings + (node as NODE{number=me, movelist, pri=ref spillcost, ...}) = + let fun interferes(x,[]) = false + | interferes(x,NODE{number=y, ...}::ns) = + x = y orelse member(x,y) orelse interferes(x, ns) + + fun moveSavings([], pinned, total) = (pinned, total+total) + | moveSavings(MV{status=ref(CONSTRAINED | COALESCED), ...}::mvs, + pinned, total) = + moveSavings(mvs, pinned, total) + | moveSavings(MV{dst, src, cost, ...}::mvs, pinned, total) = + let val NODE{number=d, color=dstCol, ...} = chase dst + val NODE{number=s, color=srcCol, ...} = chase src + + (* How much can be saved by coalescing with the memory + * location x. + *) + fun savings(x) = + if member(d, s) then + (if debug then print "interfere\n" else (); + moveSavings(mvs, pinned, total)) + else if x = ~1 then + (if debug then print (Real.toString cost^"\n") else (); + moveSavings(mvs, pinned, total+cost)) + else if pinned >= 0 andalso pinned <> x then + (* already coalesced with another mem reg *) + (if debug then print "pinned\n" else (); + moveSavings(mvs, pinned, total)) + else + (if debug then print (Real.toString cost^"\n") else (); + moveSavings(mvs, x, total+cost)) + + val _ = if debug then + (print("Savings "^Int.toString d^" <-> "^ + Int.toString s^"=") + ) else () + in if d = s then + (if debug then print "0 (trivial)\n" else (); + moveSavings(mvs, pinned, total) + ) + else + case (!dstCol, !srcCol) of + (SPILLED, PSEUDO) => savings(~1) + | (MEMREG(m, _), PSEUDO) => savings(m) + | (SPILL_LOC s, PSEUDO) => savings(~s) + | (PSEUDO, SPILLED) => savings(~1) + | (PSEUDO, MEMREG(m, _)) => savings(m) + | (PSEUDO, SPILL_LOC s) => savings(~s) + | _ => (if debug then print "0 (other)\n" else (); + moveSavings(mvs, pinned, total)) + end + + (* Find initial budget *) + val _ = if debug then + print("Trying to propagate "^Int.toString me^ + " spill cost="^Real.toString spillcost^"\n") + else () + + val (pinned, savings) = moveSavings(!movelist, ~1, 0.0) + val budget = spillcost - savings + val S = [node] + + (* Find lookahead nodes *) + fun lookaheads([], L) = L + | lookaheads(MV{cost, dst, src, ...}::mvs, L) = + let val dst as NODE{number=d, ...} = chase dst + val src as NODE{number=s, ...} = chase src + fun check(n, node as NODE{color=ref PSEUDO, ...}) = + if n = me orelse member(n, me) then + lookaheads(mvs, L) + else + add(n, node, L, []) + | check _ = lookaheads(mvs, L) + and add(x, x', (l as (c,n' as NODE{number=y, ...}))::L, L') = + if x = y then + lookaheads(mvs, (cost+c, n')::List.revAppend(L', L)) + else add(x, x', L, l::L') + | add(x, x', [], L') = + lookaheads(mvs, (cost, x')::L') + in if d = me then check(s, src) else check(d, dst) + end + + (* Now try to improve it by also propagating the lookahead nodes *) + fun improve([], pinned, budget, S) = (budget, S) + | improve((cost, node as NODE{number=n, movelist, pri, ...})::L, + pinned, budget, S) = + if interferes(n, S) then + (if debug then + print ("Excluding "^Int.toString n^" (interferes)\n") + else (); + improve(L, pinned, budget, S)) + else + let val (pinned', savings) = moveSavings(!movelist, pinned, 0.0) + val defUseSavings = cost+cost + val spillcost = !pri + val budget' = budget - savings - defUseSavings + spillcost + in if budget' <= budget then + (if debug then print ("Including "^Int.toString n^"\n") + else (); + improve(L, pinned', budget', node::S) + ) + else + (if debug then print ("Excluding "^Int.toString n^"\n") + else (); + improve(L, pinned, budget, S)) + end + + in if budget <= 0.0 then (budget, S) + else improve(lookaheads(!movelist, []), pinned, budget, S) + end + + (* Insert all spillable neighbors onto the worklist *) + fun insert([], worklist) = worklist + | insert((node as NODE{color=ref PSEUDO, number, ...})::adj, worklist) = + if hasBeenVisited number + then insert(adj, worklist) + else (markAsVisited (number, true); + insert(adj, node::worklist)) + | insert(_::adj, worklist) = insert(adj, worklist) + + fun insertAll([], worklist) = worklist + | insertAll(NODE{adj, ...}::nodes, worklist) = + insertAll(nodes, insert(!adj, worklist)) + + val marker = SPILLED + + (* Process all nodes from the worklist *) + fun propagate([], spilled) = spilled + | propagate((node as NODE{color=ref PSEUDO, ...})::worklist, + spilled) = + let val (budget, S) = coalescingSavings(node) + fun spillNodes([]) = () + | spillNodes(NODE{color, ...}::nodes) = + (ra_spill_prop := !ra_spill_prop + 1; + color := marker; (* spill the node *) + spillNodes nodes + ) + + in if budget <= 0.0 + then (* propagate spill *) + (if debug then + (print("Propagating "); + app (fn NODE{number=x, ...} => print(Int.toString x^" ")) + S; + print "\n") + else (); + spillNodes S; + (* run spill coalescing *) + spillCoalescing S; + propagate(insertAll(S, worklist), List.revAppend(S,spilled)) + ) + else + propagate(worklist, spilled) + end + | propagate(_::worklist, spilled) = + propagate(worklist, spilled) + + (* Initialize worklist *) + fun init([], worklist) = worklist + | init(NODE{adj, color=ref(c), ...}::rest, worklist) = + if isMemLoc (c) then + init(rest, insert(!adj, worklist)) + else + init(rest, worklist) + + (* + * Iterate between spill coalescing and propagation + *) + fun iterate(spillWorkList, spilled) = + let (* run one round of coalescing first *) + val _ = spillCoalescing spillWorkList + val propagationWorkList = init(spillWorkList, []) + (* iterate on our own spill nodes *) + val spilled = propagate(propagationWorkList, spilled) + (* try the memory registers too *) + val spilled = propagate(!memRegs, spilled) + in spilled + end + + in iterate(nodesToSpill, nodesToSpill) + end + + + (* + * Spill coloring. + * Assign logical spill locations to all the spill nodes. + * + * IMPORTANT BUG FIX: + * Spilled copy temporaries are assigned its own set of colors and + * cannot share with another other nodes. They can share colors with + * themselves however. + * + * spillLoc is the first available (logical) spill location. + *) + + fun spillColoring(GRAPH{spillLoc, copyTmps, mode, ...}) nodesToSpill = let + val proh = A.array(length nodesToSpill, ~1) + val firstColor= !spillLoc + + fun colorCopyTmps(tmps) = let + fun spillTmp(NODE{color as ref(SPILLED), ...}, found) = + (color := SPILL_LOC(firstColor); true) + | spillTmp(_, found) = found + in + if List.foldl spillTmp false tmps then + (spillLoc := !spillLoc + 1; firstColor + 1) + else firstColor + end + + (* color the copy temporaries first *) + val firstColor = + if isOn(mode, RACore.HAS_PARALLEL_COPIES) then + colorCopyTmps(!copyTmps) + else firstColor + + fun selectColor([], _, lastLoc) = (spillLoc := lastLoc) + | selectColor(NODE{color as ref(SPILLED), number, adj, ...}::nodes, + currLoc, lastLoc) = + let + fun neighbors(NODE{color=ref(SPILL_LOC s), ...}) = + A.update(proh, s - firstColor, number) + | neighbors(NODE{color=ref(ALIASED n), ...}) = neighbors n + | neighbors _ = () + + val _ = app neighbors (!adj) + + fun findColor(loc, startingPt) = + if loc = lastLoc then findColor(firstColor, startingPt) + else if A.sub(proh, loc-firstColor) <> number then (loc, lastLoc) + else if loc = startingPt then (lastLoc, lastLoc+1) + else findColor(loc+1, startingPt) + + val (loc, lastLoc) = findColor(currLoc + 1, currLoc) + + in + color := SPILL_LOC(loc); (* mark with color *) + selectColor(nodes, loc, lastLoc) + end + | selectColor(_::nodes, currLoc, lastLoc) = + selectColor(nodes, currLoc, lastLoc) + in + (* color the rest of the spilled nodes *) + selectColor(nodesToSpill, firstColor, !spillLoc + 1) + end (* spillColoring *) + + end (* local *) + + structure F = Flowgraph + + open F + + val SPILL_COALESCING = 0wx100 + val SPILL_COLORING = 0wx200 + val SPILL_PROPAGATION = 0wx400 + + (* + * New services that also perform memory allocation + *) + fun services f = + let val {build, spill=spillMethod, + blockNum, instrNum, programPoint} = F.services f + + (* Mark nodes that are immediately aliased to mem regs; + * These are nodes that need also to be spilled + *) + fun markMemRegs [] = () + | markMemRegs(NODE{number=r, + color as ref(ALIASED + (NODE{color=ref(col), ...})), ...}::ns) = + (case col of MEMREG _ => color := col | _ => (); + markMemRegs(ns)) + | markMemRegs(_::ns) = markMemRegs ns + + (* + * Actual spill phase. + * Perform the memory coalescing phases first, before doing an + * actual spill. + *) + fun spillIt{graph = G as GRAPH{mode, ...}, nodes, + copyInstr, spill, spillSrc, spillCopyTmp, + reload, reloadDst, renameSrc, cellkind} = + let + val nodes = if isOn(mode,SPILL_PROPAGATION) then + spillPropagation G nodes else nodes + val _ = if isOn(mode,SPILL_COALESCING) then + spillCoalescing G nodes else () + val _ = if isOn(mode,SPILL_COLORING) then + spillColoring G nodes else () + val _ = if isOn(mode,SPILL_COALESCING+SPILL_PROPAGATION) + then markMemRegs nodes else () + in spillMethod + {graph=G, nodes=nodes, copyInstr=copyInstr, + spill=spill, spillSrc=spillSrc, spillCopyTmp=spillCopyTmp, + reload=reload, reloadDst=reloadDst, + renameSrc=renameSrc, cellkind=cellkind} + end + in {build=build, spill=spillIt, programPoint=programPoint, + blockNum=blockNum, instrNum=instrNum} + end + +end diff --git a/MLRISC/ra/ra-bitmatrix.sig b/MLRISC/ra/ra-bitmatrix.sig new file mode 100644 index 0000000..f0dcb72 --- /dev/null +++ b/MLRISC/ra/ra-bitmatrix.sig @@ -0,0 +1,20 @@ + +signature RA_BITMATRIX = sig + + datatype bucket = NIL | B of int * int * bucket + datatype hashTable = + SMALL of word list Array.array ref * word + | LARGE of bucket Array.array ref * word + (* | BITMATRIX of Word8Array.array *) + + datatype bitMatrix = + BM of {table:hashTable, + elems:int ref, + edges:int} + + val empty : bitMatrix + val edges : bitMatrix -> int + val size : bitMatrix -> int + val add : bitMatrix -> (int * int) -> bool + val member : bitMatrix -> (int * int) -> bool +end diff --git a/MLRISC/ra/ra-core.sig b/MLRISC/ra/ra-core.sig new file mode 100644 index 0000000..3127769 --- /dev/null +++ b/MLRISC/ra/ra-core.sig @@ -0,0 +1,132 @@ +(* ra-core.sig + * + * COPYRIGHT (c) 2002 Bell Labs, Lucent Technologies. + * + * Note: This is the core of the new register allocator, i.e. the portion + * that manipulates only the interference graph and not the flowgraph. + * + * -- Allen + *) + +signature RA_CORE = +sig + + structure G : RA_GRAPH = RAGraph + structure BM : RA_BITMATRIX + structure MV : RA_PRIORITY_QUEUE where type elem = G.move + structure FZ : RA_PRIORITY_QUEUE where type elem = G.node + + type move_queue + type freeze_queue + + val NO_OPTIMIZATION : G.mode + val BIASED_SELECTION : G.mode + val DEAD_COPY_ELIM : G.mode + val COMPUTE_SPAN : G.mode + val SAVE_COPY_TEMPS : G.mode + val HAS_PARALLEL_COPIES : G.mode + + (* + * Basic functions + *) + + (* dump the interference graph to a stream *) + val dumpGraph : G.interferenceGraph -> TextIO.outstream -> unit + val show : G.interferenceGraph -> G.node -> string + + (* add an edge to the interference graph *) + val addEdge : G.interferenceGraph -> G.node * G.node -> unit + + (* + * Function to create new nodes + *) + val newNodes : G.interferenceGraph -> + {cost:real,pt:G.programPoint,defs:G.C.cell list,uses:G.C.cell list} -> + G.node list (* defs *) + + (* + * Update the colors of cell to reflect the current interference graph + *) + val updateCellColors : G.interferenceGraph -> unit + val updateCellAliases : G.interferenceGraph -> unit + + val markDeadCopiesAsSpilled : G.interferenceGraph -> unit + + (* + * Return the spill location id of the interference graph + *) + val spillLoc : G.interferenceGraph -> int -> int + val spillLocToString : G.interferenceGraph -> int -> string + + (* + * Create an initial set of worklists from a new interference graph + * and a list of moves + *) + val initWorkLists : G.interferenceGraph -> + { moves : G.move list + } -> + { simplifyWkl : G.node list, + moveWkl : move_queue, + freezeWkl : freeze_queue, + spillWkl : G.node list (* high degreee nodes *) + } + + (* + * Clear the interference graph but keep the nodes table intact + *) + val clearGraph : G.interferenceGraph -> unit + + (* + * Remove all adjacency lists from the nodes table. + *) + val clearNodes : G.interferenceGraph -> unit + + (* + * Simplify, Coalease and Freeze until the work list is done + *) + val iteratedCoalescing : + G.interferenceGraph -> + { simplifyWkl : G.node list, + moveWkl : move_queue, + freezeWkl : freeze_queue, + stack : G.node list + } -> + { stack : G.node list + } + + (* + * potentially spill a node. + *) + val potentialSpillNode : + G.interferenceGraph -> + { node : G.node, + cost : real, + stack : G.node list + } -> + { moveWkl : move_queue, + freezeWkl : freeze_queue, + stack : G.node list + } + + (* + * Color nodes on the stack, using Briggs' optimistic spilling. + * Return a list of actual spills + *) + val select : + G.interferenceGraph -> + { stack : G.node list + } -> + { spills : G.node list (* actual spills *) + } + + (* + * Incorporate memory <-> register moves + *) + val initMemMoves : G.interferenceGraph -> unit + + (* + * Compute spill savings due to memory <-> register moves + *) + val moveSavings : G.interferenceGraph -> (int -> real) + +end diff --git a/MLRISC/ra/ra-core.sml b/MLRISC/ra/ra-core.sml new file mode 100644 index 0000000..594fd74 --- /dev/null +++ b/MLRISC/ra/ra-core.sml @@ -0,0 +1,1274 @@ +(* ra-core.sml + * + * COPYRIGHT (c) 2002 Bell Labs, Lucent Technologies. + * + * Overview + * ======== + * This implementation of iterated coalescing differ from the old one in + * various substantial ways: + * + * 1. The move list is prioritized. Higher ranking moves are coalesced first. + * This tends to favor coalescing of moves that has higher priority. + * + * 2. The freeze list is prioritized. Lower ranking nodes are unfrozen + * first. Since freeze disable moves, this tends to disable moves + * of low priority. + * + * 3. The simplify worklist is not kept explicitly during the + * simplify/coalesce/freeze phases. Instead, whenever a non-move + * related node with degree < K is discovered, we call simplify + * to remove it from the graph immediately. + * + * I think this has a few advantages. + * (a) There is less bookkeeping. + * (b) Simplify adds coalescable moves to the move list. + * By doing simplify eagerly, moves are added to the move list + * faster, allowing higher ranking moves to ``preempt'' low + * ranking moves. + * + * 4. Support for register pairs + * + * Important Invariants + * ==================== + * 1. Adjacency list + * a. All nodes on the adjacency list are distinct + * b. nodes with color ALIASED or REMOVED are NOT consider to be + * on the adjacency list + * c. If a node x is COLORED, then we DON'T keep track of + * its adjacency list + * d. When a node has been removed, there aren't any moves associated + * with it. + * 2. Moves + * a. Moves marked WORKLIST are on the worklist. + * b. Moves marked MOVE are NOT on the worklist. + * c. Moves marked LOST are frozen and are in fact never considered again. + * d. Moves marked CONSTRAINED cannot be coalesced because the src and dst + * interfere + * e. Moves marked COALESCED have been coalesced. + * f. The movecnt in a node is always the number of nodes + * currently marked as WORKLIST or MOVE, i.e. the moves that + * are associated with the node. When this is zero, the node is + * considered to be non-move related. + * g. Moves on the move worklist are always distinct. + * 3. + * + * Allen. + * + *) + +local + + val debug = false + val tally = false + +in + +structure RACore : RA_CORE = +struct + + structure G = RAGraph + structure A = Array + structure W = Word + structure W8A = Word8Array + structure W8 = Word8 + structure C = RAGraph.C + + (* For debugging, uncomment Unsafe. *) + structure UA = Unsafe.Array + structure UW8A = Unsafe.Word8Array + + open G + + val verbose = MLRiscControl.mkFlag ("ra-verbose", "RA chattiness") + val ra_spill_coal = MLRiscControl.mkCounter ("ra-spill-coalescing", + "RA spill coalescing counter") + val ra_spill_prop = MLRiscControl.mkCounter ("ra-spill-propagation", + "RA spill propagation counter") + +(* + val good_briggs = MLRiscControl.getCounter "good-briggs" + val bad_briggs = MLRiscControl.getCounter "bad-briggs" + val good_george = MLRiscControl.getCounter "good-george" + val bad_george = MLRiscControl.getCounter "bad-george" + val good_freeze = MLRiscControl.getCounter "good-freeze" + val bad_freeze = MLRiscControl.getCounter "bad-freeze" + *) + + val NO_OPTIMIZATION = 0wx0 + val BIASED_SELECTION = 0wx1 + val DEAD_COPY_ELIM = 0wx2 + val COMPUTE_SPAN = 0wx4 + val SAVE_COPY_TEMPS = 0wx8 + val HAS_PARALLEL_COPIES = 0wx10 + val SPILL_COALESCING = 0wx100 + val SPILL_COLORING = 0wx200 + val SPILL_PROPAGATION = 0wx400 + val MEMORY_COALESCING = + SPILL_COALESCING + SPILL_COLORING + SPILL_PROPAGATION + + val i2s = Int.toString + val r2s = Real.toString + + local + + fun isOn(flag,mask) = Word.andb(flag,mask) <> 0w0 + + fun error msg = MLRiscErrorMsg.error("RACore", msg) + + fun concat([], b) = b + | concat(x::a, b) = concat(a, x::b) + + in + + + structure FZ = RaPriQueue + (type elem=node + fun less(NODE{movecost=ref p1,...}, NODE{movecost=ref p2,...}) = p1 <= p2 + ) + structure MV = RaPriQueue + (type elem=G.move + fun less(MV{cost=p1,...}, MV{cost=p2,...}) = p1 >= p2 + ) + + type move_queue = MV.pri_queue + type freeze_queue = FZ.pri_queue + + + (* + * Utility functions + *) + fun chase(NODE{color=ref(ALIASED r), ...}) = chase r + | chase x = x + + fun cellId(C.CELL{id, ...}) = id + + fun col2s col = + case col of + PSEUDO => "" + | REMOVED => "r" + | ALIASED _ => "a" + | COLORED c => "["^i2s c^"]" + | MEMREG (_,m) => "m" ^ "{" ^ C.toString m ^ "}" + | SPILLED => "s" + | SPILL_LOC c => "s" ^ "{" ^ i2s c ^ "}" + + fun node2s (NODE{cell, color, pri,...}) = i2s(cellId cell)^col2s(!color) + + fun show G (node as NODE{pri,...}) = + node2s node^(if !verbose then "("^r2s(!pri)^")" else "") + + (* + * Dump the interference graph + *) + fun dumpGraph(G as G.GRAPH{nodes, showReg, K,...}) stream = + let fun pr s = TextIO.output(stream, s) + val show = show G + fun prMove(MV{src, dst, status=ref(WORKLIST | BRIGGS_MOVE | GEORGE_MOVE), + cost,...}) = + pr(node2s(chase dst)^" <- "^node2s(chase src)^ + "("^r2s(cost)^") ") + | prMove _ = () + + fun prAdj(n,n' as NODE{adj, degree, uses, defs, + color, pri, movecnt, movelist, ...}) = + (pr(show n'); + if !verbose then pr(" deg="^i2s(!degree)) else (); + (case !color of + ALIASED n => (pr " => "; pr(show n); pr "\n") + | _ => + (pr(" <-->"); + app (fn n => (pr " "; pr(show n))) (!adj); pr "\n"; + if !verbose andalso !movecnt > 0 then + (pr("\tmoves "^i2s(!movecnt)^": "); + app prMove (!movelist); + pr "\n" + ) + else () + ) + ) + ) + + in pr("=========== K="^i2s K^" ===========\n"); + app prAdj (ListMergeSort.sort (fn ((x, _),(y, _)) => x > y) + (IntHashTable.listItemsi nodes)) + end + + + (* + * Function to create new nodes. + * Note: it is up to the caller to remove all dedicated registers. + *) + fun newNodes (G.GRAPH{nodes, firstPseudoR, ...}) = let + val getnode = IntHashTable.lookup nodes + val addnode = IntHashTable.insert nodes + + fun colorOf(C.CELL{col=ref(C.MACHINE r), ...}) = r + | colorOf(C.CELL{id, ...}) = id + + fun getNode(cell as C.CELL{col, ...}) = + (getnode(colorOf cell)) + handle _ => let + val reg = colorOf cell + val col = + case !col + of C.MACHINE r => G.COLORED r + | C.PSEUDO => G.PSEUDO + | C.ALIASED _ => error "getNode: ALIASED" + | C.SPILLED => error "getNode: SPILLED" + val node = + NODE{number=reg, + cell=cell, color= ref col, degree=ref 0, + adj=ref[], movecnt=ref 0, movelist=ref[], + movecost=ref 0.0, pri=ref 0.0, defs=ref[], + uses=ref[]} + + in addnode(reg, node); node + end + + + fun defUse{defs, uses, pt, cost} = let + fun def cell = let + val node as NODE{pri, defs, ...} = getNode (cell) + in + pri := !pri + cost; + defs := pt :: !defs; + node + end + fun use cell = let + val node as NODE{pri, uses, ...} = getNode(cell) + in + pri := !pri + cost; + uses := pt :: !uses + end + in + List.app use uses; + List.map def defs + end + in defUse + end + + (* + * Add an edge (x, y) to the interference graph. + * Nop if the edge already exists. + * Note: adjacency lists of colored nodes are not stored + * within the interference graph to save space. + * Now we allow spilled node to be added to the edge; these do not + * count toward the degree. + *) + fun addEdge(GRAPH{bitMatrix,...}) = + let val addBitMatrix = BM.add(!bitMatrix) + in fn (x as NODE{number=xn, color=colx, adj=adjx, degree=degx, ...}, + y as NODE{number=yn, color=coly, adj=adjy, degree=degy, ...}) => + if xn = yn then () + else if addBitMatrix(xn, yn) then + (case (!colx, !coly) of + (PSEUDO, PSEUDO) => (adjx := y:: !adjx; degx := !degx+1; + adjy := x:: !adjy; degy := !degy+1) + | (PSEUDO, COLORED _) => (adjx := y:: !adjx; degx := !degx+1) + | (PSEUDO, MEMREG _) => (adjx := y:: !adjx; adjy := x:: !adjy) + | (PSEUDO, SPILL_LOC _) => (adjx := y:: !adjx; adjy := x:: !adjy) + | (PSEUDO, SPILLED) => () + | (COLORED _, PSEUDO) => (adjy := x:: !adjy; degy := !degy+1) + | (COLORED _, COLORED _) => () (* x<>y, can't alias *) + | (COLORED _, MEMREG _) => () (* x<>y, can't alias *) + | (COLORED _, SPILL_LOC _) => () (* x<>y, can't alias *) + | (COLORED _, SPILLED) => () + | (MEMREG _, PSEUDO) => (adjx := y:: !adjx; adjy := x:: !adjy) + | (MEMREG _, COLORED _) => () (* x<>y, can't alias *) + | (MEMREG _, MEMREG _) => () (* x<>y, can't alias *) + | (MEMREG _, SPILL_LOC _) => () (* x<>y, can't alias *) + | (MEMREG _, SPILLED) => () + | (SPILL_LOC _, PSEUDO) => (adjx := y:: !adjx; adjy := x:: !adjy) + | (SPILL_LOC _, COLORED _) => () (* x<>y, can't alias *) + | (SPILL_LOC _, MEMREG _) => () (* x<>y, can't alias *) + | (SPILL_LOC _, SPILL_LOC _) => () (* x<>y, can't alias *) + | (SPILL_LOC _, SPILLED) => () (* x<>y, can't alias *) + | (SPILLED, _) => () + | (colx, coly) => + error("addEdge x="^i2s xn^col2s colx^" y="^i2s yn^col2s coly) + ) + else () (* edge already there *) + end + + fun isFixedMem(SPILL_LOC _) = true + | isFixedMem(MEMREG _) = true + | isFixedMem(SPILLED) = true + | isFixedMem _ = false + + fun isFixed(COLORED _) = true + | isFixed c = isFixedMem(c) + + (* + * Initialize a list of worklists + *) + fun initWorkLists + (GRAPH{nodes, K, bitMatrix, pseudoCount, + firstPseudoR, deadCopies, memMoves, mode, ...}) {moves} = + let + (* Filter moves that already have an interference + * Also initialize the movelist and movecnt fields at this time. + *) + val member = BM.member(!bitMatrix) + + fun setInfo(NODE{color=ref PSEUDO, movecost, movecnt, movelist,...}, + mv, cost) = + (movelist := mv :: !movelist; + movecnt := !movecnt + 1; + movecost := !movecost + cost + ) + | setInfo _ = () + + + (* filter moves that cannot be coalesced *) + fun filter([], mvs', mem) = (mvs', mem) + | filter((mv as MV{src as NODE{number=x, color=ref colSrc,...}, + dst as NODE{number=y, color=ref colDst,...}, + cost, ...})::mvs, + mvs', mem) = + if isFixed colSrc andalso isFixed colDst then + filter(mvs, mvs', mem) + else if isFixedMem colSrc orelse isFixedMem colDst then + filter(mvs, mvs', mv::mem) + else if member(x, y) then + filter(mvs, mvs', mem) + else + (setInfo(src, mv, cost); + setInfo(dst, mv, cost); + filter(mvs, MV.add(mv, mvs'), mem)) + + (* like filter but does dead copy elimination *) + fun filterDead([], mvs', mem, dead) = (mvs', mem, dead) + | filterDead((mv as + MV{src as NODE{number=x, color as ref colSrc, + pri, adj, uses,...}, + dst as NODE{number=y, cell=celly, color=ref colDst, + defs=dstDefs, uses=dstUses,...}, + cost, ...})::mvs, + mvs', mem, dead) = + if (isFixed colSrc andalso isFixed colDst) then + filterDead(mvs, mvs', mem, dead) + else if isFixedMem colSrc orelse isFixedMem colDst then + filterDead(mvs, mvs', mv::mem, dead) + else (case (colSrc, colDst, dstDefs, dstUses) + of (_, PSEUDO, ref [pt], ref [])=> + (* eliminate dead copy *) + let fun decDegree [] = () + | decDegree(NODE{color=ref PSEUDO, degree, ...}::adj) = + (degree := !degree - 1; decDegree adj) + | decDegree(_::adj) = decDegree adj + fun elimUses([], _, uses, pri, cost) = (uses, pri) + | elimUses(pt::pts, pt':G.programPoint, uses, pri, cost) = + if pt = pt' then elimUses(pts, pt', uses, pri-cost, cost) + else elimUses(pts, pt', pt::uses, pri, cost) + val (uses', pri') = elimUses(!uses, pt, [], !pri, cost); + in pri := pri'; + uses := uses'; + color := ALIASED src; + decDegree(!adj); + filterDead(mvs, mvs', mem, celly::dead) + end + | _ => (* normal moves *) + if member(x, y) (* moves that interfere *) + then filterDead(mvs, mvs', mem, dead) + else (setInfo(src, mv, cost); + setInfo(dst, mv, cost); + filterDead(mvs, MV.add(mv, mvs'), mem, dead) + ) + ) + + (* + * Scan all nodes in the graph and check which worklist they should + * go into. + *) + fun collect([], simp, fz, moves, spill, pseudos) = + (pseudoCount := pseudos; + {simplifyWkl = simp, + moveWkl = moves, + freezeWkl = fz, + spillWkl = spill + } + ) + | collect(node::rest, simp, fz, moves, spill, pseudos) = + (case node of + NODE{color=ref PSEUDO, movecnt, degree, ...} => + if !degree >= K then + collect(rest, simp, fz, moves, node::spill, pseudos+1) + else if !movecnt > 0 then + collect(rest, simp, FZ.add(node, fz), + moves, spill, pseudos+1) + else + collect(rest, node::simp, fz, moves, spill, + pseudos+1) + | _ => collect(rest, simp, fz, moves, spill, pseudos) + ) + + (* First build the move priqueue *) + val (mvs, mem) = + if isOn(mode, DEAD_COPY_ELIM) then + let val (mvs, mem, dead) = filterDead(moves, MV.EMPTY, [], []) + in deadCopies := dead; (mvs, mem) + end + else filter(moves, MV.EMPTY, []) + + in memMoves := mem; (* memory moves *) + collect(IntHashTable.listItems nodes, [], FZ.EMPTY, mvs, [], 0) + end + + (* + * Return a regmap that returns the current spill location + * during spilling. + *) + fun spillLoc(G.GRAPH{nodes,...}) = + let val getnode = IntHashTable.lookup nodes + fun num(NODE{color=ref(ALIASED n), ...}) = num n + | num(NODE{color=ref(SPILLED), number, ...}) = number + | num(NODE{color=ref(SPILL_LOC s), number, ...}) = ~s + | num(NODE{color=ref(MEMREG(m, _)), number, ...}) = m + | num(NODE{number, ...}) = number + fun lookup r = num(getnode r) handle _ => r + in lookup + end + + fun spillLocToString(G.GRAPH{nodes,...}) = + let val getnode = IntHashTable.lookup nodes + fun num(NODE{color=ref(ALIASED n), ...}) = num n + | num(NODE{color=ref(SPILLED), cell, ...}) = "spilled "^C.toString cell + | num(NODE{color=ref(SPILL_LOC s), number, ...}) = "frame "^i2s s + | num(NODE{color=ref(MEMREG(_,m)), ...}) = "memreg "^C.toString m + | num(NODE{number, ...}) = "error "^i2s number + fun lookup r = num(getnode r) + in lookup + end + + (* + * Core phases: + * Simplify, coalesce, freeze. + * + * NOTE: When a node's color is REMOVED or ALIASED, + * it is not considered to be part of the adjacency list + * + * 1. The move list has no duplicate + * 2. The freeze list may have duplicates + *) + fun iteratedCoalescingPhases + (G as GRAPH{K, bitMatrix, spillFlag, trail, stamp, mode, + pseudoCount, ...}) = + let val member = BM.member(!bitMatrix) + val addEdge = addEdge G + val show = show G + val memoryCoalescingOn = isOn(mode, MEMORY_COALESCING) + + (* + * SIMPLIFY node: + * precondition: node must be part of the interference graph (PSEUDO) + *) + fun simplify(node as NODE{color, number, adj, degree, (*pair,*)...}, + mv, fz, stack) = + let val _ = if debug then print("Simplifying "^show node^"\n") else () + fun forallAdj([], mv, fz, stack) = (mv, fz, stack) + | forallAdj((n as NODE{color=ref PSEUDO, degree as ref d,...})::adj, + mv, fz, stack) = + if d = K then + let val (mv, fz, stack) = lowDegree(n, mv, fz, stack) + in forallAdj(adj, mv, fz, stack) end + else (degree := d - 1; forallAdj(adj, mv, fz, stack)) + | forallAdj(_::adj, mv, fz, stack) = forallAdj(adj, mv, fz, stack) + in color := REMOVED; + pseudoCount := !pseudoCount - 1; + forallAdj(!adj, mv, fz, node::stack) (* push onto stack *) + end (* simplify *) + + and simplifyAll([], mv, fz, stack) = (mv, fz, stack) + | simplifyAll(node::simp, mv, fz, stack) = + let val (mv, fz, stack) = simplify(node, mv, fz, stack) + in simplifyAll(simp, mv, fz, stack) end + + (* + * Decrement the degree of a pseudo node. + * precondition: node must be part of the interference graph + * If the degree of the node is now K-1. + * Then if (a) the node is move related, freeze it. + * (b) the node is non-move related, simplify it + * + * node -- the node to decrement degree + * mv -- queue of move candidates to be coalesced + * fz -- queue of freeze candidates + * stack -- stack of removed nodes + *) + and lowDegree(node as NODE{degree as ref d, movecnt, adj, color,...}, + (* false, *) mv, fz, stack) = + (* normal edge *) + (if debug then + print("DecDegree "^show node^" d="^i2s(d-1)^"\n") else (); + degree := K - 1; + (* node is now low degree!!! *) + let val mv = enableMoves(!adj, mv) + in if !movecnt > 0 then (* move related *) + (mv, FZ.add(node, fz), stack) + else (* non-move related, simplify now! *) + simplify(node, mv, fz, stack) + end + ) + (* + | decDegree(node as NODE{degree as ref d, movecnt, adj, color,...}, + true, mv, fz, stack) = (* register pair edge *) + (degree := d - 2; + if d >= K andalso !degree < K then + (* node is now low degree!!! *) + let val mv = enableMoves(node :: !adj, mv) + in if !movecnt > 0 then (* move related *) + (mv, FZ.add(node, fz), stack) + else (* non-move related, simplify now! *) + simplify(node, mv, fz, stack) + end + else + (mv, fz, stack) + ) + *) + + (* + * Enable moves: + * given: a list of nodes (some of which are not in the graph) + * do: all moves associated with these nodes are inserted + * into the move worklist + *) + and enableMoves([], mv) = mv + | enableMoves(n::ns, mv) = + let (* add valid moves onto the worklist. + * there are no duplicates on the move worklist! + *) + fun addMv([], ns, mv) = enableMoves(ns, mv) + | addMv((m as MV{status, hicount as ref hi, ...})::rest, + ns, mv) = + (case !status of + (BRIGGS_MOVE | GEORGE_MOVE) => + (* decrements hi, when hi <= 0 enable move *) + if hi <= 1 then + (status := WORKLIST; addMv(rest, ns, MV.add(m, mv))) + else + (hicount := hi-1; addMv(rest, ns, mv)) + | _ => addMv(rest, ns, mv) + ) + in (* make sure the nodes are actually in the graph *) + case n of + NODE{movelist, color=ref PSEUDO, movecnt,...} => + if !movecnt > 0 then (* is it move related? *) + addMv(!movelist, ns, mv) + else + enableMoves(ns, mv) + | _ => enableMoves(ns, mv) + end (* enableMoves *) + + (* + * Brigg's conservative coalescing test: + * given: an unconstrained move (x, y) + * return: true or false + *) + fun conservative(hicount, + x as NODE{degree=ref dx, adj=xadj, (* pair=px, *) ...}, + y as NODE{degree=ref dy, adj=yadj, (* pair=py, *) ...}) = + dx + dy < K orelse + let (* + * hi -- is the number of nodes with deg > K (without duplicates) + * n -- the number of nodes that have deg = K but not neighbors + * of both x and y + * We use the movecnt as a flag indicating whether + * a node has been visited. A negative count is used to mark + * a visited node. + *) + fun undo([], extraHi) = + extraHi <= 0 orelse (hicount := extraHi; false) + | undo(movecnt::tr, extraHi) = + (movecnt := ~1 - !movecnt; undo(tr, extraHi)) + fun loop([], [], hi, n, tr) = undo(tr, (hi + n) - K + 1) + | loop([], yadj, hi, n, tr) = loop(yadj, [], hi, n, tr) + | loop(NODE{color, movecnt as ref m, degree=ref deg, ...}::vs, + yadj, hi, n, tr) = + (case !color of + COLORED _ => + if m < 0 then + (* node has been visited before *) + loop(vs, yadj, hi, n, tr) + else + (movecnt := ~1 - m; (* mark as visited *) + loop(vs, yadj, hi+1, n, movecnt::tr)) + | PSEUDO => + if deg < K then loop(vs, yadj, hi, n, tr) + else if m >= 0 then + (* node has never been visited before *) + (movecnt := ~1 - m; (* mark as visited *) + if deg = K + then loop(vs, yadj, hi, n+1, movecnt::tr) + else loop(vs, yadj, hi+1, n, movecnt::tr) + ) + else + (* node has been visited before *) + if deg = K then loop(vs, yadj, hi, n-1, tr) + else loop(vs, yadj, hi, n, tr) + | _ => loop(vs, yadj, hi, n, tr) (* REMOVED/ALIASED *) + ) + in loop(!xadj, !yadj, 0, 0, []) end + + (* + * Heuristic used to determine whether a pseudo and machine register + * can be coalesced. + * Precondition: + * The two nodes are assumed not to interfere. + *) + fun safe(hicount, reg, NODE{adj, ...}) = + let fun loop([], hi) = hi = 0 orelse (hicount := hi; false) + | loop(n::adj, hi) = + (case n of + (* Note: Actively we only have to consider pseudo nodes and not + * nodes that are removed, since removed nodes either have + * deg < K or else optimistic spilling must be in effect! + *) + NODE{degree,number,color=ref(PSEUDO | REMOVED), ...} => + if !degree < K orelse member(reg, number) then loop(adj, hi) + else loop(adj, hi+1) + | _ => loop(adj, hi) + ) + in loop(!adj, 0) end + + (* + * Decrement the active move count of a node. + * When the move count reaches 0 and the degree < K + * simplify the node immediately. + * Precondition: node must be a node in the interference graph + * The node can become a non-move related node. + *) + fun decMoveCnt + (node as NODE{movecnt, color=ref PSEUDO, degree, movecost,...}, + cnt, cost, mv, fz, stack) = + let val newCnt = !movecnt - cnt + in movecnt := newCnt; + movecost := !movecost - cost; + if newCnt = 0 andalso !degree < K (* low degree and movecnt = 0 *) + then (simplify(node, mv, fz, stack)) + else (mv, fz, stack) + end + | decMoveCnt(_, _, _, mv, fz, stack) = (mv, fz, stack) + + (* + * Combine two nodes u and v into one. + * v is replaced by u + * u is the new combined node + * Precondition: u <> v and u and v must be unconstrained + * + * u, v -- two nodes to be merged, must be distinct! + * coloingv -- is u a colored node? + * mvcost -- the cost of the move that has been eliminated + * mv -- the queue of moves + * fz -- the queue of freeze candidates + * stack -- stack of removed nodes + *) + fun combine(u, v, coloringv, mvcost, mv, fz, stack) = + let val NODE{color=vcol, pri=pv, movecnt=cntv, movelist=movev, adj=adjv, + defs=defsv, uses=usesv, degree=degv, ...} = v + val NODE{color=ucol, pri=pu, movecnt=cntu, movelist=moveu, adj=adju, + defs=defsu, uses=usesu, degree=degu, ...} = u + + (* merge movelists together, taking the opportunity + * to prune the lists + *) + fun mergeMoveList([], mv) = mv + | mergeMoveList((m as MV{status,hicount,src,dst,...})::rest, mv) = + (case !status of + BRIGGS_MOVE => + (* if we are changing a copy from v <-> w to uv <-> w + * makes sure we reset its trigger count, so that it + * will be tested next. + *) + (if coloringv then + (status := GEORGE_MOVE; + hicount := 0; + if debug then + print ("New george "^show src^"<->"^show dst^"\n") + else () + ) + else (); + mergeMoveList(rest, m::mv) + ) + | GEORGE_MOVE => + (* if u is colored and v is not, then the move v <-> w + * becomes uv <-> w where w is colored. This can always + * be discarded. + *) + (if coloringv then mergeMoveList(rest, mv) + else mergeMoveList(rest, m::mv) + ) + | WORKLIST => mergeMoveList(rest, m::mv) + | _ => mergeMoveList(rest, mv) + ) + + (* Form combined node; add the adjacency list of v to u *) + fun union([], mv, fz, stack) = (mv, fz, stack) + | union((t as NODE{color, degree, ...})::adj, + mv, fz, stack) = + (case !color of + (COLORED _ | SPILL_LOC _ | MEMREG _ | SPILLED) => + (addEdge(t, u); union(adj, mv, fz, stack)) + | PSEUDO => + (addEdge(t, u); + let + val d = !degree + in + if d = K then + let val (mv, fz, stack) = lowDegree(t, mv, fz, stack) + in union(adj, mv, fz, stack) + end + else (degree := d - 1; union(adj, mv, fz, stack)) + end + ) + | _ => union(adj, mv, fz, stack) + ) + in vcol := ALIASED u; + (* combine the priority of both: + * note that since the mvcost has been counted twice + * in the original priority, we substract it twice + * from the new priority. + *) + pu := !pu + !pv - mvcost - mvcost; + (* combine the def/use pts of both nodes. + * Strictly speaking, the def/use points of the move + * should also be removed. But since we never spill + * a coalesced node and only spilling makes use of these + * def/use points, we are safe for now. + * + * New comment: with spill propagation, it is necessary + * to keep track of the spilled program points. + *) + if memoryCoalescingOn then + (defsu := concat(!defsu, !defsv); + usesu := concat(!usesu, !usesv) + ) + else (); + case !ucol of + PSEUDO => + (if !cntv > 0 then moveu := mergeMoveList(!movev, !moveu) + else (); + movev := []; (* XXX kill the list to free space *) + cntu := !cntu + !cntv + ) + | _ => () + ; + cntv := 0; + + let val removingHi = !degv >= K andalso (!degu >= K orelse coloringv) + (* Update the move count of the combined node *) + val (mv, fz, stack) = union(!adjv, mv, fz, stack) + val (mv, fz, stack) = + decMoveCnt(u, 2, mvcost + mvcost, mv, fz, stack) + (* If either v or u are high degree then at least one high degree + * node is removed from the neighbors of uv after coalescing + *) + val mv = if removingHi then enableMoves(!adju, mv) else mv + in coalesce(mv, fz, stack) + end + end + + (* + * COALESCE: + * Repeat coalescing and simplification until mv is empty. + *) + and coalesce(MV.EMPTY, fz, stack) = (fz, stack) + | coalesce(MV.TREE(MV{src, dst, status, hicount, cost, ...}, _, l, r), + fz, stack) = + let (* val _ = coalesce_count := !coalesce_count + 1 *) + val u = chase src + val v as NODE{color=ref vcol, ...} = chase dst + (* make u the colored one *) + val (u as NODE{number=u', color=ref ucol, ...}, + v as NODE{number=v', color=ref vcol, ...}) = + case vcol of + COLORED _ => (v, u) + | _ => (u, v) + val _ = if debug then print ("Coalescing "^show u^"<->"^show v + ^" ("^r2s cost^")") else () + val mv = MV.merge(l, r) + fun coalesceIt(status, v) = + (status := COALESCED; + if !spillFlag then trail := UNDO(v, status, !trail) else () + ) + in if u' = v' then (* trivial move *) + let val _ = if debug then print(" Trivial\n") else () + val _ = coalesceIt(status, v) + in coalesce(decMoveCnt(u, 2, cost+cost, mv, fz, stack)) + end + else + (case vcol of + COLORED _ => + (* two colored nodes cannot be coalesced *) + (status := CONSTRAINED; + if debug then print(" Both Colored\n") else (); + coalesce(mv, fz, stack)) + | _ => + if member(u', v') then + (* u and v interfere *) + let val _ = status := CONSTRAINED + val _ = if debug then print(" Interfere\n") else (); + val (mv, fz, stack) = + decMoveCnt(u, 1, cost, mv, fz, stack) + in coalesce(decMoveCnt(v, 1, cost, mv, fz, stack)) end + else + case ucol of + COLORED _ => (* u is colored, v is not *) + if safe(hicount, u', v) then + (if debug then print(" Safe\n") else (); + (*if tally then good_george := !good_george+1 else ();*) + coalesceIt(status, v); + combine(u, v, true, cost, mv, fz, stack) + ) + else + ((* remove it from the move list *) + status := GEORGE_MOVE; + (*if tally then bad_george := !bad_george + 1 else ();*) + if debug then print(" Unsafe\n") else (); + coalesce(mv, fz, stack) + ) + | _ => (* u, v are not colored *) + if conservative(hicount, u, v) then + (if debug then print(" OK\n") else (); + (*if tally then good_briggs := !good_briggs+1 else ();*) + coalesceIt(status, v); + combine(u, v, false, cost, mv, fz, stack) + ) + else (* conservative test failed *) + ((* remove it from the move list *) + status := BRIGGS_MOVE; + (*if tally then bad_briggs := !bad_briggs + 1 else ();*) + if debug then print(" Non-conservative\n") else (); + coalesce(mv, fz, stack) + ) + ) + end + + (* mark a node n as frozen: + * Go thru all the moves (n, m), decrement the move count of m + * precondition: degree must be < K + * movecnt must be > 0 + * node -- the node to be frozen + * fz -- a queue of freeze candidates + * stack -- stack of removed nodes + *) + fun markAsFrozen( + node as NODE{number=me, degree, + adj, movelist, movecnt as ref mc,...}, + fz, stack) = + let val _ = if debug then print("Mark as frozen "^i2s me^"\n") + else () + (* eliminate all moves, return a list of nodes that + * can be simplified + *) + fun elimMoves([], simp) = simp + | elimMoves(MV{status, src, dst, ...}::mvs, simp) = + case !status of + WORKLIST => error "elimMoves" + | (BRIGGS_MOVE | GEORGE_MOVE) => (* mark move as lost *) + let val _ = status := LOST + val src as NODE{number=s,...} = chase src + val you = if s = me then chase dst else src + in case you of + NODE{color=ref(COLORED _),...} => + elimMoves(mvs, simp) + | NODE{movecnt as ref c, degree, ...} => (* pseudo *) + (movecnt := c - 1; + if c = 1 andalso !degree < K then + elimMoves(mvs, you::simp) + else + elimMoves(mvs, simp) + ) + end + | _ => elimMoves(mvs, simp) + + (* Note: + * We are removing a high degree node, so try to enable all moves + * associated with its neighbors. + *) + + val mv = if !degree >= K then enableMoves(!adj, MV.EMPTY) + else MV.EMPTY + + in if mc = 0 + then simplify(node, mv, fz, stack) + else + (movecnt := 0; + simplifyAll(node::elimMoves(!movelist, []), mv, fz, stack) + ) + end + + (* + * FREEZE: + * Repeat picking + * a node with degree < K from the freeze list and freeze it. + * fz -- queue of freezable nodes + * stack -- stack of removed nodes + * undo -- trail of coalesced moves after potential spill + *) + fun freeze(fz, stack) = + let fun loop(FZ.EMPTY, FZ.EMPTY, stack) = stack + | loop(FZ.EMPTY, newFz, _) = error "no freeze candidate" + | loop(FZ.TREE(node, _, l, r), newFz, stack) = + let val fz = FZ.merge(l, r) + in case node of + (* This node has not been simplified + * This must be a move-related node. + *) + NODE{color=ref PSEUDO, degree, ...} => + if !degree >= K (* can't be frozen yet? *) + then + ((*if tally then bad_freeze := !bad_freeze+1 else ();*) + loop(fz, FZ.add(node,newFz), stack)) + else (* freeze node *) + let val _ = + if debug then print("Freezing "^show node^"\n") + else () + (*val _ = + if tally then good_freeze := !good_freeze + 1 + else ()*) + val (mv, fz, stack) = markAsFrozen(node, fz, stack) + val (fz, stack) = coalesce(mv, fz, stack) + in ((* print("[freezing again "^ + i2s(!blocked)^"]"); *) + loop(FZ.merge(fz, newFz), FZ.EMPTY, stack)) + end + | _ => + ((*if tally then bad_freeze := !bad_freeze + 1 else ();*) + loop(fz, newFz, stack)) + end + in (* print("[freezing "^i2s(!blocked)^"]"); *) + loop(fz, FZ.EMPTY, stack) + end + + (* + * Sort simplify worklist in increasing degree. + * Matula and Beck suggests that we should always remove the + * node with the lowest degree first. This is an approximation of + * the idea. + *) + (* + val buckets = A.array(K, []) : G.node list A.array + fun sortByDegree nodes = + let fun insert [] = () + | insert((n as NODE{degree=ref deg, ...})::rest) = + (UA.update(buckets, deg, n::UA.sub(buckets, deg)); insert rest) + fun collect(~1, L) = L + | collect(deg, L) = collect(deg-1, concat(UA.sub(buckets, deg), L)) + in insert nodes; + collect(K-1, []) + end + *) + + (* + * Iterate over simplify, coalesce, freeze + *) + fun iterate{simplifyWkl, moveWkl, freezeWkl, stack} = + let (* simplify everything *) + val (mv, fz, stack) = + simplifyAll((* sortByDegree *) simplifyWkl, + moveWkl, freezeWkl, stack) + val (fz, stack) = coalesce(mv, fz, stack) + val stack = freeze(fz, stack) + in {stack=stack} + end + in {markAsFrozen=markAsFrozen, iterate=iterate} + end + + (* + * The main entry point for the iterated coalescing algorithm + *) + fun iteratedCoalescing G = + let val {iterate,...} = iteratedCoalescingPhases G + in iterate end + + + (* + * Potential Spill: + * Find some node on the spill list and just optimistically + * remove it from the graph. + *) + fun potentialSpillNode (G as G.GRAPH{spillFlag,...}) = let + val {markAsFrozen,...} = iteratedCoalescingPhases G + in fn {node, cost, stack} => + let val _ = spillFlag := true (* potential spill found *) + val (mv, fz, stack) = markAsFrozen(node, FZ.EMPTY, stack) + in if cost < 0.0 then + let val NODE{color, ...} = node in color := SPILLED end + else (); + {moveWkl=mv, freezeWkl=fz, stack=stack} + end + end + + + + (* + * SELECT: + * Using optimistic spilling + *) + fun select(G as GRAPH{getreg, getpair, trail, firstPseudoR, stamp, + spillFlag, proh, mode, ...}) {stack} = + let + fun undoCoalesced END = () + | undoCoalesced(UNDO(NODE{number, color, ...}, status, trail)) = + (status := BRIGGS_MOVE; + if number < firstPseudoR then () else color := PSEUDO; + undoCoalesced trail + ) + val show = show G + + (* Fast coloring, assume no spilling can occur *) + fun fastcoloring([], stamp) = ([], stamp) + | fastcoloring((node as NODE{color, (* pair, *) adj, ...})::stack, + stamp) = + let (* set up the proh array *) + fun neighbors [] = () + | neighbors(r::rs) = + let fun mark(NODE{color=ref(COLORED c), ...}) = + (UA.update(proh, c, stamp); neighbors rs) + | mark(NODE{color=ref(ALIASED n), ...}) = mark n + | mark _ = neighbors rs + in mark r end + val _ = neighbors(!adj) + in color := COLORED(getreg{pref=[], proh=proh, stamp=stamp}); + fastcoloring(stack, stamp+1) + end + + (* Briggs' optimistic spilling heuristic *) + fun optimistic([], spills, stamp) = (spills, stamp) + | optimistic((node as NODE{color=ref(SPILLED), ...})::stack, + spills, stamp) = + optimistic(stack, node::spills, stamp) + | optimistic((node as NODE{color as ref REMOVED, (* pair, *) adj, ...})::stack, + spills, stamp) = let + (* set up the proh array *) + fun neighbors [] = () + | neighbors(r::rs) = + let fun mark(NODE{color=ref(COLORED c), ...}) = + (UA.update(proh, c, stamp); neighbors rs) + | mark(NODE{color=ref(ALIASED n), ...}) = mark n + | mark _ = neighbors rs + in mark r end + val _ = neighbors(!adj) + val spills = + let val col = getreg{pref=[], proh=proh, stamp=stamp} + in color := COLORED col; spills + end handle _ => node::spills + in optimistic(stack, spills, stamp+1) + end + | optimistic _ = error "optimistic" + + (* Briggs' optimistic spilling heuristic, with biased coloring *) + fun biasedColoring([], spills, stamp) = (spills, stamp) + | biasedColoring((node as NODE{color=ref(SPILLED), ...})::stack, + spills, stamp) = + biasedColoring(stack, node::spills, stamp) + | biasedColoring((node as NODE{color=ref(SPILL_LOC _), ...})::stack, + spills, stamp) = + biasedColoring(stack, node::spills, stamp) + | biasedColoring((node as NODE{color=ref(MEMREG _), ...})::stack, + spills, stamp) = + biasedColoring(stack, node::spills, stamp) + | biasedColoring( + (node as NODE{number, color, adj, + (* pair, *) movecnt, movelist,...})::stack, + spills, stamp) = + let (* set up the proh array *) + fun neighbors [] = () + | neighbors(r::rs) = + (case chase r of + NODE{color=ref(COLORED c), ...} => + (UA.update(proh, c, stamp); neighbors rs) + | _ => neighbors rs + ) + (* + * Look at lost moves and see if it is possible to + * color the move with the same color + *) + fun getPref([], pref) = pref + | getPref(MV{status=ref(LOST | BRIGGS_MOVE | GEORGE_MOVE), + src, dst, ...}::mvs, pref) = + let val src as NODE{number=s,...} = chase src + val other = if s = number then chase dst else src + in case other of + NODE{color=ref(COLORED c),...} => getPref(mvs, c::pref) + | _ => getPref(mvs, pref) + end + | getPref(_::mvs, pref) = getPref(mvs, pref) + + val _ = neighbors(!adj) + val pref = getPref(!movelist,[]) + val spills = + let val col = getreg{pref=[], proh=proh, stamp=stamp} + in color := COLORED col; spills + end handle _ => node::spills + in biasedColoring(stack, spills, stamp+1) end + + val (spills, st) = + if isOn(mode, BIASED_SELECTION) then + biasedColoring(stack, [], !stamp) + else if !spillFlag then + optimistic(stack, [], !stamp) + else + fastcoloring(stack, !stamp) + + in stamp := st; + case spills of + [] => {spills=[]} + | spills => + let fun undo [] = () + | undo(NODE{color,...}::nodes) = (color := PSEUDO; undo nodes) + in undo stack; + undoCoalesced (!trail); + trail := END; + {spills=spills} + end + end (*select*) + + (* + * Incorporate memory<->register moves into the interference graph + *) + fun initMemMoves(GRAPH{memMoves, ...}) = + let fun move(NODE{movelist, movecost, ...}, mv, cost) = + (movelist := mv :: !movelist; + movecost := cost + !movecost + ) + + fun setMove(dst, src, mv, cost) = + (move(dst, mv, cost); move(src, mv, cost)) + + fun init [] = () + | init((mv as MV{dst, src, cost, ...})::mvs) = + let val dst as NODE{color=ref dstCol, ...} = chase dst + val src as NODE{color=ref srcCol, ...} = chase src + in + if isFixedMem(srcCol) andalso isFixedMem(dstCol) then + setMove(dst, src, mv, cost) + else (case (srcCol, dstCol) + of (PSEUDO, _) => + if isFixedMem dstCol then setMove(dst, src, mv, cost) + else error "initMemMoves" + | (_, PSEUDO) => + if isFixedMem srcCol then setMove(dst, src, mv, cost) + else error "initMemMoves" + | (COLORED _, _) => + if isFixedMem dstCol then () else error "initMemMoves" + | (_, COLORED _) => + if isFixedMem srcCol then () else error "initMemMoves" + | _ => error "initMemMoves" + (*esac*)); + init mvs + end + val moves = !memMoves + in memMoves := []; + init moves + end + + + (* + * Compute savings due to memory<->register moves + *) + fun moveSavings(GRAPH{memMoves=ref [], ...}) = (fn node => 0.0) + | moveSavings(GRAPH{memMoves, bitMatrix, ...}) = + let exception Savings + val savingsMap = IntHashTable.mkTable(32, Savings) + : {pinned:int,cost:cost} IntHashTable.hash_table + val savings = IntHashTable.find savingsMap + val savings = fn r => case savings r of NONE => {pinned= ~1, cost=0.0} + | SOME s => s + val addSavings = IntHashTable.insert savingsMap + val member = BM.member(!bitMatrix) + fun incSavings(u, v, c) = + let val {pinned, cost} = savings u + in if pinned <> ~1 andalso v <> pinned orelse member(u, v) + then () + else addSavings(u, {pinned=v, cost=cost + c + c}) + end + fun computeSavings [] = () + | computeSavings(MV{dst, src, cost, ...}::mvs) = + let val src as NODE{number=u, color=cu, ...} = chase src + val dst as NODE{number=v, color=cv, ...} = chase dst + in case (!cu, !cv) + of (cu, PSEUDO) => + if isFixedMem (cu) then incSavings(v, u, cost) else () + | (PSEUDO, cv) => + if isFixedMem (cv) then incSavings(u, v, cost) else () + | _ => (); + computeSavings mvs + end + in computeSavings (!memMoves); + fn node => #cost(savings node) + end + + (* + * Update the color of cells + *) + fun updateCellColors(GRAPH{nodes, deadCopies, ...}) = + let fun enter(C.CELL{col, ...},c) = col := c + fun cellOf(NODE{cell, ...}) = cell + fun set(NODE{cell, color=ref(COLORED c),...}) = + enter(cell, C.MACHINE c) + | set(NODE{cell, color=ref(ALIASED alias),...}) = + enter(cell, C.ALIASED(cellOf alias)) + | set(NODE{cell, color=ref(SPILLED),...}) = + enter(cell, C.SPILLED) + | set(NODE{cell, color=ref(SPILL_LOC s),...}) = + enter(cell, C.SPILLED) + | set(NODE{cell, color=ref(MEMREG(m, _)),...})= + enter(cell, C.MACHINE m) + | set(NODE{cell, color=ref PSEUDO, ...}) = () + | set(_) = error("updateCellColors") + in IntHashTable.app set nodes + end + + (* + * Update aliases before spill rewriting. + *) + fun updateCellAliases(GRAPH{nodes, deadCopies, ...}) = + let fun enter(C.CELL{col, ...},c) = col := c + fun cellOf(NODE{cell, ...}) = cell + fun set(NODE{cell, color=ref(COLORED c),...}) = () + | set(NODE{cell, color=ref(ALIASED alias),...}) = + enter(cell, C.ALIASED(cellOf alias)) + | set(NODE{cell, color=ref(SPILLED),...}) = () + | set(NODE{cell, color=ref(SPILL_LOC s),...}) = () + | set(NODE{cell, color=ref(MEMREG _),...})= () + | set(NODE{cell, color=ref PSEUDO, ...}) = () + | set(_) = error("updateCellAliases") + in IntHashTable.app set nodes + end + + fun markDeadCopiesAsSpilled(GRAPH{deadCopies, ...}) = + let fun enter(C.CELL{col, ...},c) = col := c + in case !deadCopies of + [] => () + | dead => app (fn r => enter(r, C.SPILLED)) dead + end + + (* + * Clear the interference graph, but keep the nodes + *) + fun clearGraph(GRAPH{bitMatrix, maxRegs, trail, spillFlag, + deadCopies, memMoves, copyTmps, ...}) = + let val edges = BM.edges(!bitMatrix) + in trail := END; + spillFlag := false; + deadCopies := []; + memMoves := []; + copyTmps := []; + bitMatrix := BM.empty; + bitMatrix := G.newBitMatrix{edges=edges, maxRegs=maxRegs()} + end + + fun clearNodes(GRAPH{nodes,...}) = + let fun init(_, NODE{pri, degree, adj, movecnt, movelist, + movecost, defs, uses, ...}) = + (pri := 0.0; degree := 0; adj := []; movecnt := 0; movelist := []; + defs := []; uses := []; movecost := 0.0) + in IntHashTable.appi init nodes + end + + end (* local *) + +end + +end (* local *) diff --git a/MLRISC/ra/ra-deadCodeE.sml b/MLRISC/ra/ra-deadCodeE.sml new file mode 100644 index 0000000..8f05f89 --- /dev/null +++ b/MLRISC/ra/ra-deadCodeE.sml @@ -0,0 +1,83 @@ +(* + * This is a hack module for removing dead code that are discovered by + * the register allocator. This module acts as a wrapper + * for the generic RA flowgraph module. + * + * -- Allen + *) + +functor RADeadCodeElim + (Flowgraph : RA_FLOWGRAPH) + ( (* check for dead code on these cellkinds only *) + val cellkind : CellsBasis.cellkind -> bool + (* Dead registers are stored here. *) + val deadRegs : bool IntHashTable.hash_table + (* Affected blocks *) + val affectedBlocks : bool IntHashTable.hash_table + val spillInit : Flowgraph.G.interferenceGraph * CellsBasis.cellkind + -> unit + ) : RA_FLOWGRAPH = +struct + structure F = Flowgraph + + open F + + (* We must save all the copy temporaries for this to work *) + val mode = RACore.SAVE_COPY_TEMPS + + fun isOn(flag,mask) = Word.andb(flag,mask) <> 0w0 + + (* + * New services that also removes dead code + *) + fun services f = + let val {build, spill, blockNum, instrNum, programPoint} = F.services f + (* + * The following build method marks all pseudo registers + * that are dead, and record their definition points. + *) + fun findDeadCode(G.GRAPH{nodes, copyTmps, mode, ...}) = + let val dead = IntHashTable.insert deadRegs + val affected = IntHashTable.insert affectedBlocks + val affectedList = app (fn d => affected(blockNum d, true)) + + (* Mark all copy temporaries *) + val marker = [{block=0,insn=0}] + fun markCopyTmps [] = () + | markCopyTmps(G.NODE{uses, ...}::tmps) = + (uses := marker; markCopyTmps tmps) + fun unmarkCopyTmps [] = () + | unmarkCopyTmps(G.NODE{uses, ...}::tmps) = + (uses := []; unmarkCopyTmps tmps) + + fun enter(_, G.NODE{uses=ref [], defs, number=reg, ...}) = + (* This is dead, but make sure it is not a copy temporary. + * Those cannot be eliminated. + *) + (affectedList (!defs); dead(reg, true)) + | enter _ = () + + in markCopyTmps(!copyTmps); + IntHashTable.appi enter nodes; + unmarkCopyTmps(!copyTmps); + if isOn(mode, RACore.HAS_PARALLEL_COPIES) then () + else copyTmps := [] (* clean up now *) + end + + (* + * Build the graph, then remove dead code. + *) + fun buildIt(graph, kind) = + let val moves = build(graph, kind) + in if cellkind kind then findDeadCode(graph) else (); + moves + end + + fun spillIt(arg as {graph, cellkind, ...}) = + (spillInit(graph, cellkind); spill arg) + + in {build=buildIt, spill=spillIt, programPoint=programPoint, + blockNum=blockNum, instrNum=instrNum} + end + +end diff --git a/MLRISC/ra/ra-flowgraph-part.sig b/MLRISC/ra/ra-flowgraph-part.sig new file mode 100644 index 0000000..6a80d18 --- /dev/null +++ b/MLRISC/ra/ra-flowgraph-part.sig @@ -0,0 +1,16 @@ +signature RA_FLOWGRAPH_PARTITIONER = +sig + + structure C : CELLS + + type flowgraph + + (* Number of basic blocks in the flowgraph *) + val numberOfBlocks : flowgraph -> int + + (* Partition a flowgraph into smaller subgraphs and apply + * allocation to them individually + *) + val partition : flowgraph -> CellsBasis.cellkind -> (flowgraph -> flowgraph) -> unit + +end diff --git a/MLRISC/ra/ra-flowgraph.sig b/MLRISC/ra/ra-flowgraph.sig new file mode 100644 index 0000000..6837166 --- /dev/null +++ b/MLRISC/ra/ra-flowgraph.sig @@ -0,0 +1,56 @@ +(* + * Abstract view a flowgraph required by the new register allocator. + * In order to allow different representation to share the same + * register allocator core, each representation should implement the + * following interface to talk to the new RA. + * + * -- Allen + *) + +signature RA_FLOWGRAPH = +sig + + structure I : INSTRUCTIONS + structure C : CELLS + structure G : RA_GRAPH = RAGraph + structure Spill : RA_SPILL + sharing Spill.I = I + sharing I.C = C + + type flowgraph + + val mode : G.mode + + (* Dump the flograph to a stream *) + val dumpFlowgraph : string * flowgraph * TextIO.outstream -> unit + + (* Dump the flograph to a stream *) + val annotations : flowgraph -> Annotations.annotations ref + + (* + * Interface for communicating with the new register allocator. + * It is expected that the services will cache enough information + * during build so that the rebuild and spill phases can be execute + * quickly. + *) + val services : flowgraph -> + { build : G.interferenceGraph * CellsBasis.cellkind-> + G.move list, (* build the graph *) + spill : {copyInstr : Spill.copyInstr, + spill : Spill.spill, + spillSrc : Spill.spillSrc, + spillCopyTmp : Spill.spillCopyTmp, + reload : Spill.reload, + reloadDst : Spill.reloadDst, + renameSrc : Spill.renameSrc, + graph : G.interferenceGraph, + nodes : G.node list, + cellkind : CellsBasis.cellkind + } -> G.move list, + (* spill/rebuild the graph *) + programPoint : {block:int, instr:int} -> G.programPoint, + blockNum : G.programPoint -> int, + instrNum : G.programPoint -> int + } + +end diff --git a/MLRISC/ra/ra-graph.sig b/MLRISC/ra/ra-graph.sig new file mode 100644 index 0000000..67f7d89 --- /dev/null +++ b/MLRISC/ra/ra-graph.sig @@ -0,0 +1,175 @@ +(* ra-graph.sig + * + * COPYRIGHT (c) 2002 Bell Labs, Lucent Technologies. + * + * This is the new interference graph used by the new register allocator. + * + * -- Allen + *) + +signature RA_GRAPH = +sig + + structure C : CELLS_BASIS + structure BM : RA_BITMATRIX = RaBitmatrix + (* + * The following are the data structures used in the register allocator. + *) + + exception Nodes + + type priority = real + type cost = real + + (* + * The following represent a program point in the program. + * + * The last instruction in the block is numbered 1, i.e. the instruction + * numbering is in reverse. The number 0 is reserved for "live-out". + * + *) + type programPoint = {block:int, insn:int} + + (* Hash table indexed by program point *) + structure PPtHashTable : MONO_HASH_TABLE + where type Key.hash_key = programPoint + + type frame_offset = int + type logical_spill_id = int + + datatype spillLoc = + FRAME of logical_spill_id (* spill to a new frame location *) + | MEM_REG of C.cell (* spill to a memory register *) + + (* Hash table indexed by spill location *) + structure SpillLocHashTable : MONO_HASH_TABLE + where type Key.hash_key = spillLoc + + type mode = word + + datatype interferenceGraph = + GRAPH of + { bitMatrix : BM.bitMatrix ref, + nodes : node IntHashTable.hash_table, + K : int, + firstPseudoR : int, + dedicated : int -> bool, + getreg : {pref:int list, stamp:int, proh:int Array.array} -> int, + getpair : {pref:int list, stamp:int, proh:int Array.array} -> int, + proh : int Array.array, + stamp : int ref, + + (* Info to undo a spill when an optimistic spill has occurred *) + spillFlag : bool ref, + + spilledRegs : bool IntHashTable.hash_table, + (*registers that have been spilled*) + trail : trailInfo ref, + + (* how to pretty print a register *) + showReg : C.cell -> string, + + (* how many registers there are? *) + numRegs : int, + maxRegs : unit -> int, + + (* dead copies *) + deadCopies : C.cell list ref, + copyTmps : node list ref, + memMoves : move list ref, + memRegs : node list ref, + + (* spill locations *) + spillLoc : int ref, + + (* span indexed by node id *) + span : cost IntHashTable.hash_table option ref, + + (* mode *) + mode : mode, + + pseudoCount : int ref + } + + and moveStatus = BRIGGS_MOVE (* not yet coalesceable *) + | GEORGE_MOVE (* not yet coalesceable *) + | COALESCED (* coalesced *) + | CONSTRAINED (* src and target intefere *) + | LOST (* frozen moves *) + | WORKLIST (* on the move worklist *) + + and move = + MV of {src : node, (* source register of move *) + dst : node, (* destination register of move *) +(* kind : moveKind, (* kind of move *) *) + cost : cost, (* cost *) + status : moveStatus ref, (* coalesced? *) + hicount: int ref (* neighbors of high degree *) + } + + and moveKind = REG_TO_REG (* register to register *) + | EVEN_TO_REG (* even register in pair to register *) + | ODD_TO_REG (* odd register in pair to register *) + | PAIR_TO_PAIR (* register pair to register pair *) + | REG_TO_EVEN (* register to even register in pair *) + | REG_TO_ODD (* register to odd register in pair *) +(* and moveKind = REGmvk (* register-register move *) + | MEMREGmvk (* move involving memReg *) + +*) + + and nodeStatus = + PSEUDO (* pseudo register *) + | REMOVED (* removed from the interference graph *) + | ALIASED of node (* coalesced *) + | COLORED of int (* colored *) + | MEMREG of int * C.cell(* register implemented in memory *) + | SPILLED (* spilled *) + | SPILL_LOC of int (* spilled at logical location *) + + (* Note on SPILLED: + * SPILLED ~1 means that the spill location is still undetermined + * SPILLED c, c >= 0 means that c is a fixed "memory register" + * SPILLED c, c < ~1 means that c is a logical spill location + * assigned by the register allocator + *) + + and node = + NODE of { number : int, (* node number *) + cell: C.cell, + movecnt: int ref, (* #moves this node is involved in *) + movelist: move list ref, (* moves associated with this node *) + degree : int ref, (* current degree *) + color : nodeStatus ref, (* status *) + adj : node list ref, (* adjacency list *) + pri : priority ref, (* priority *) + movecost : cost ref, (* move cost *) + (* pair : bool, *) (* register pair? *) + defs : programPoint list ref, + uses : programPoint list ref + } + + and trailInfo = END | UNDO of node * moveStatus ref * trailInfo + + (* Create a new bitMatrix *) + val newBitMatrix : {edges : int, maxRegs : int} -> BM.bitMatrix + + (* Create a new interference graph *) + val newGraph : { nodes : node IntHashTable.hash_table, + numRegs : int, + maxRegs : unit -> int, + K : int, + firstPseudoR : int, + dedicated : int -> bool, + showReg : C.cell -> string, + getreg : + {pref:int list,stamp:int,proh:int Array.array} -> int, + getpair : + {pref:int list,stamp:int,proh:int Array.array} -> int, + proh : int Array.array, + mode : mode, + spillLoc : int ref, + memRegs : C.cell list + } -> interferenceGraph + +end diff --git a/MLRISC/ra/ra-graph.sml b/MLRISC/ra/ra-graph.sml new file mode 100644 index 0000000..781db28 --- /dev/null +++ b/MLRISC/ra/ra-graph.sml @@ -0,0 +1,209 @@ +(* + * This is the new interference graph used by the new register allocator. + * + * -- Allen + *) + +structure RAGraph : RA_GRAPH = +struct + + structure C = CellsBasis + + structure BM = RaBitmatrix + + type priority = real + + type programPoint = {block:int, insn:int} + + structure PPtHashTable = HashTableFn + (type hash_key = programPoint + fun hashVal{block,insn} = + Word.<<(Word.fromInt block,0w7) + Word.fromInt insn + fun sameKey(x:programPoint,y) = x = y + ) + + type frame_offset = int + type logical_spill_id = int + datatype spillLoc = FRAME of logical_spill_id + | MEM_REG of C.cell + + structure SpillLocHashTable = HashTableFn + (type hash_key = spillLoc + fun hashVal(FRAME i) = Word.fromInt i + | hashVal(MEM_REG r) = C.hashCell r + fun sameKey(FRAME i, FRAME j) = i = j + | sameKey(MEM_REG x,MEM_REG y) = C.sameColor(x, y) + | sameKey _ = false + ) + + type cost = real + + type mode = word + + datatype interferenceGraph = + GRAPH of { bitMatrix : BM.bitMatrix ref, + nodes : node IntHashTable.hash_table, + K : int, + firstPseudoR : int, + dedicated : int -> bool, + getreg : + {pref:int list, stamp:int, proh:int Array.array} -> int, + getpair : + {pref:int list, stamp:int, proh:int Array.array} -> int, + proh : int Array.array, + stamp : int ref, + + (* Info to undo a spill when an optimistic spill has occurred *) + spillFlag : bool ref, + spilledRegs : bool IntHashTable.hash_table, + trail : trailInfo ref, + + showReg : C.cell -> string, + numRegs : int, + maxRegs : unit -> int, + + deadCopies : C.cell list ref, + copyTmps : node list ref, + memMoves : move list ref, + memRegs : node list ref, + + spillLoc : int ref, + span : cost IntHashTable.hash_table option ref, + mode : mode, + pseudoCount : int ref + } + + and moveStatus = BRIGGS_MOVE | GEORGE_MOVE + | COALESCED | CONSTRAINED | LOST | WORKLIST + + and move = + MV of {src : node, (* source register of move *) + dst : node, (* destination register of move *) + (* kind: moveKind, *) (* what kind of move *) + cost : cost, (* cost *) + status : moveStatus ref, (* coalesced? *) + hicount : int ref (* neighbors of high degree *) + } + + and moveKind = REG_TO_REG (* register to register *) + | EVEN_TO_REG (* even register in pair to register *) + | ODD_TO_REG (* odd register in pair to register *) + | PAIR_TO_PAIR (* register pair to register pair *) + | REG_TO_EVEN (* register to even register in pair *) + | REG_TO_ODD (* register to odd register in pair *) + + and nodeStatus = + PSEUDO (* pseudo register *) + | REMOVED (* removed from the interference graph *) + | ALIASED of node (* coalesced *) + | COLORED of int (* colored *) + | MEMREG of int * C.cell(* register implemented in memory *) + | SPILLED (* spilled *) + | SPILL_LOC of int (* spilled at logical location *) + + and node = + NODE of { number : int, (* node number *) + cell : C.cell, + movecnt: int ref, (* #moves this node is involved in *) + movelist: move list ref, (* moves associated with this node *) + degree : int ref, (* current degree *) + color : nodeStatus ref, (* status *) + adj : node list ref, (* adjacency list *) + pri : priority ref, (* priority *) + movecost : cost ref, (* move cost *) + (* pair : bool, *) (* register pair? *) + defs : programPoint list ref, + uses : programPoint list ref + } + + and trailInfo = END | UNDO of node * moveStatus ref * trailInfo + + exception Nodes + + fun error msg = MLRiscErrorMsg.error("NewRAGraph", msg) + + val stampCounter = ref 0 + + (* Create a new bitMatrix *) + fun roundSize size = + let fun f(x, shift) = + if x >= size then (x, Word.>>(shift, 0w1)) + else f(x+x, shift+0w1) + in f(64, 0w6) end + + val max = Word.<<(0w1,Word.>>(Word.fromInt Word.wordSize,0w1)) + val _ = if max < Word.<<(0w1,0w15) + then error "word size too small" else () + + fun newBitMatrix{edges, maxRegs} = + let val table = + (* if maxRegs < 1024 then + let val denseBytes = (maxRegs * (maxRegs + 1) + 15) div 16 + in BITMATRIX(Word8Array.array(denseBytes,0w0)) + end + else *) + let val (tableSize, shift) = roundSize edges + in if Word.fromInt maxRegs < max then + BM.SMALL(ref(Array.array(tableSize,[])),shift) + else + BM.LARGE(ref(Array.array(tableSize, BM.NIL)),shift) + end + in BM.BM{table=table, elems=ref 0, edges=edges} + end + + (* Create a new interference graph *) + fun newGraph{nodes,K,firstPseudoR,dedicated,spillLoc, + getreg,getpair,showReg,maxRegs,numRegs,proh, + memRegs,mode} = + let (* lower triangular bitmatrix primitives *) + (* NOTE: The average ratio of E/N is about 16 *) + val bitMatrix = newBitMatrix{edges=numRegs * 16,maxRegs=maxRegs()} + + (* Make memory register nodes *) + fun makeMemRegs [] = [] + | makeMemRegs(cells) = + let val add = IntHashTable.insert nodes + fun loop([], ns) = ns + | loop(cell::cells, ns) = + let val id = C.registerId cell + val node = + NODE{number=id, + pri=ref 0.0,adj=ref [],degree=ref 0,movecnt=ref 0, + color=ref(MEMREG(id,cell)), + defs=ref [], uses=ref [], + movecost=ref 0.0,movelist=ref [], cell=cell} + in add(id, node); loop(cells, node::ns) + end + in loop(cells, []) + end + + val memRegs = makeMemRegs memRegs + + in if !stampCounter > 10000000 then stampCounter := 0 else (); + GRAPH{ bitMatrix = ref bitMatrix, + nodes = nodes, + K = K, + firstPseudoR = firstPseudoR, + dedicated = dedicated, + getreg = getreg, + getpair = getpair, + proh = proh, + stamp = stampCounter, + spillFlag = ref false, + spilledRegs = IntHashTable.mkTable(2,Nodes), + trail = ref END, + showReg = fn _ => raise Match, + numRegs = numRegs, + maxRegs = maxRegs, + deadCopies = ref [], + copyTmps = ref [], + memMoves = ref [], + memRegs = ref memRegs, + spillLoc = spillLoc, + span = ref NONE, + mode = mode, + pseudoCount = ref 0 + } + end + +end diff --git a/MLRISC/ra/ra-iteratedCoalescing.sml b/MLRISC/ra/ra-iteratedCoalescing.sml new file mode 100644 index 0000000..7d5c017 --- /dev/null +++ b/MLRISC/ra/ra-iteratedCoalescing.sml @@ -0,0 +1,796 @@ +(** Graph coloring register allocation. graph + ** Implements the 'iterated register coalescing' scheme described + ** in POPL'96, and TOPLAS v18 #3, pp 325-353. + **) +(* + * This is a reorganization of the old iterated coalescing + * register allocator using a more modular implementation. + * + * --- Allen + *) +functor OldRegAllocator + (structure RaArch : RA_ARCH_PARAMS) + (structure RaUser : RA_USER_PARAMS + where I = RaArch.I + where B = RaArch.Liveness.F.B + ) : RA = +struct + + structure F = RaArch.Liveness.F + structure Core = OldRACore + structure G = Core.G + structure A = Array + structure I = RaArch.I + structure C = I.C + structure P = RaArch.InsnProps + structure SL = SortedList + + datatype mode = REGISTER_ALLOCATION | COPY_PROPAGATION + + open G + + fun error msg = MLRiscErrorMsg.error("IteratedCoalescing",msg) + + (* + * Debugging flags + *) + val cfg_before_ra = MLRiscControl.getFlag "dump-cfg-before-ra" + val cfg_after_ra = MLRiscControl.getFlag "dump-cfg-after-ra" + val cfg_after_spill = MLRiscControl.getFlag "dump-cfg-after-spilling" + val dump_graph = MLRiscControl.getFlag "dump-interference-graph" + val ra_count = MLRiscControl.getCounter "ra-count" + val rewrite = MLRiscControl.getCounter "ra-rewrites" + + (* + * Set of dedicated registers. + * Note: I'm using an array for testing for dedicated registers. + * Hopefully this is now a bit faster than before. -- Allen + *) + val spillRegSentinel = ~1 (* what is this? *) + val dedicated = SL.uniq(RaUser.dedicated) + val firstPseudoR = RaArch.firstPseudoR + val dedicatedRegs = A.array(firstPseudoR,false) + val _ = app (fn r => if r >= 0 andalso r < firstPseudoR then + A.update(dedicatedRegs,r,true) else ()) dedicated + fun isDedicated r = r < 0 orelse r < firstPseudoR andalso + Unsafe.Array.sub(dedicatedRegs,r) + (* Note: The following is no long necessary! + * Note: This function maintains the order of members in rset + * which is important when dealing with parallel copies. + *) + fun rmvDedicated rset = + let fun loop(x::xs, xs') = loop(xs, if isDedicated x then xs' else x::xs') + | loop([], xs') = xs' + in loop(rset,[]) + end + + + (* register mapping functions *) + fun uniqMap(f, l) = SL.uniq(map f l) + + fun prList (l:int list,msg:string) = + let fun pr [] = print "\n" + | pr (x::xs) = (print (Int.toString x ^ " "); pr xs) + in print msg; pr l + end + + (* debugging *) + fun printBlocks(blks, regmap, annotations) = + let val regmap = C.lookup regmap + val RaArch.AsmEmitter.S.STREAM{emit,...} = + AsmStream.withStream TextIO.stdOut + RaArch.AsmEmitter.makeStream annotations + val emit = emit regmap + fun prBlks([]) = print"\n" + | prBlks(F.BBLOCK{blknum,insns,liveOut,liveIn, + succ,pred,...}::blocks)= + let + fun regset cellset = map regmap (RaArch.regSet(cellset)) + fun pr [] = prList(regset(!liveOut), "liveOut: ") + | pr (instr::rest) = (emit instr; pr rest) + fun blkNum(F.BBLOCK{blknum, ...},_) = blknum + | blkNum(F.ENTRY{blknum, ...},_) = blknum + | blkNum(F.EXIT{blknum, ...},_) = blknum + | blkNum _ = error "printBlocks.prBlks.blkNum" + in + print("BLOCK" ^ Int.toString blknum ^ "\n"); + prList(regset (!liveIn), "LiveIn :"); + prList(map blkNum (!pred),"predecessors: "); + case !insns of [] => print "empty instruction sequence\n" + | l => pr(rev l) + (*esac*); + prList(map blkNum (!succ),"successors: "); + prBlks(blocks) + end + | prBlks(F.LABEL lab::blocks) = + (print(Label.nameOf lab^":\n"); prBlks(blocks)) + | prBlks(F.PSEUDO pOp::blocks) = (print (F.P.toString pOp); prBlks(blocks)) + | prBlks(_::blocks) = prBlks(blocks) + in prBlks blks + end + + fun debug(flag, msg, blocks, regmap, annotations) = + if !flag then + (print ("------------------" ^ msg ^ " ----------------\n"); + printBlocks(blocks,regmap,annotations)) + else () + + + (* Utility functions *) + fun newNode(num, col) = + NODE{number=num, + color=ref col, + degree=ref 0, + adj=ref [], + movecnt = ref 0, + movelist = ref []} + + fun nodeNumber(NODE{number, ...}) = number + + fun chase(NODE{color=ref(ALIASED r), ...}) = chase r + | chase x = x + + fun nodeMember(_, []) = false + | nodeMember(node as NODE{number=x, ...}, NODE{number=y,...}::rest) = + x = y orelse nodeMember(node, rest) + + + fun isMoveRelated(NODE{movecnt=ref 0, ...}) = false + | isMoveRelated _ = true + + exception PrevSpills + val prevSpills = Intmap.new(32,PrevSpills) : bool Intmap.intmap + val isSpilled = Intmap.mapWithDefault(prevSpills,false) + val enterSpilled = Intmap.add prevSpills + fun markAsSpilled r = enterSpilled(r,true) + + (* + * This is the new register allocator! + *) + fun ra mode prohibit + (cluster as F.CLUSTER{regmap,blocks,annotations=ref an,...}) = + if RaArch.numRegs() = 0 then cluster + else + let + val _ = Intmap.clear prevSpills + val _ = app (fn (i,j) => + let fun loop(i) = + if i <= j then (markAsSpilled i; loop(i+1)) else () + in loop i end) prohibit + + (* number of blocks *) + val numBlocks = foldr (fn (F.BBLOCK _,n) => n + 1 | (_,n) => n) 0 blocks + + val blockDU = A.array(numBlocks,[] : (node list * node list) list) + val cblocks = A.array(numBlocks,F.LABEL(Label.newLabel "")) + val numOfBlocks = A.length cblocks + + (* remainInfo: blocks where spill nodes are defined and used. *) + type info = int list Intmap.intmap + val remainInfo : (info * info) option ref = ref NONE + + fun cleanupSpillInfo() = remainInfo := NONE + + (** + ** Build blockDU and cblocks. + ** This is done once per RA. + **) + fun initialize() = + let val nodes = Intmap.new(32,Nodes) + fun mkNode i = + newNode(i, if i < firstPseudoR then COLORED(i) else PSEUDO) + val lookupNodes = Intmap.map nodes + val enterNodes = Intmap.add nodes + fun newnode n = + lookupNodes n + handle _ => + let val node = mkNode n + in enterNodes (n, node); node + end + fun blockDefUse((b as F.BBLOCK{insns,liveOut,succ, ...})::blks, n) = + let fun insnDefUse insn = + let val (d,u) = RaArch.defUse insn + fun rmv [] = [] + | rmv (l as [x]) = + if isDedicated x then [] else [newnode x] + | rmv set = map newnode (rmvDedicated set) + in (rmv d, rmv u) end + in Unsafe.Array.update(cblocks, n, b); + Unsafe.Array.update(blockDU, n, map insnDefUse (!insns)); + case !succ of + [(F.EXIT _,_)] => + app (fn i => (newnode i; ())) + (rmvDedicated(RaArch.regSet(!liveOut))) + | _ => (); + blockDefUse(blks, n+1) + end + | blockDefUse(_::blks, n) = blockDefUse(blks, n) + | blockDefUse([], _) = () + + (* if copy propagation was done prior to register allocation + * then some nodes may already be aliased. + *) + fun updateAliases() = + let val alias = Intmap.mapInt regmap + fun fixup(num, NODE{color, ...}) = + if num < firstPseudoR then () + else let val reg = alias num + in if reg=num then () else + color := ALIASED(newnode reg) + end + in Intmap.app fixup nodes end + in blockDefUse(blocks,0); + updateAliases(); + nodes + end + + (** + ** Run liveness analysis + **) + fun liveness(nodes,blocks) = + let val getnode = Intmap.map nodes + fun regmap i = + let val node = getnode i + in case node + of NODE{color= ref (COLORED r), ...} => r + | NODE{color=ref PSEUDO, ...} => nodeNumber node + | NODE{color=ref(ALIASED r), ...} => nodeNumber(chase node) + | _ => error "liveness.regmap" + end handle _ => i (* XXX *) + in RaArch.Liveness.liveness(blocks, regmap) + end + + (* + * Given a set of registers, remove all spilled and dedicated nodes. + * NOTE: we assume that dedicated registers are NEVER entered into + * nodes Intmap. + *) + fun collectNodes(getnode,regs) = + let fun loop([],xs) = xs + | loop(r::rs,xs) = + (case chase(getnode r) of + NODE{color=ref(COLORED ~1),...} => loop(rs,xs) + | x => loop(rs,x::xs) + ) handle _ => loop(rs,xs) (* dedicated *) + in loop(regs,[]) end + + + (** + ** Builds the interference graph and initialMove list + **) + fun build(graph as GRAPH{bitMatrix,nodes,...}) = + let (* The movecnt field is used to (lazily) record members in the + * live set. Deleted members are removed during an + * addEdgeForallLive operation. + *) + val getnode = Intmap.map nodes + val chaseReg = chase o getnode + val chaseRegs = map chaseReg + val addEdge = Core.addEdge graph + val member = BM.member bitMatrix + fun memBitMatrix(NODE{number=x,...}, NODE{number=y,...}) = + member (if x movecnt := 0) live; + forallBlocks(n-1, mvs)) + | doBlock(instr::rest, (def',use')::bdu, live', mvs) = + let val def = map chase def' + val use = map chase use' + (* move instructions are treated specially *) + (* There is a subtle interaction between parallel + moves and interference graph construction. When we + have {d1, ... dn} <- {s1, ... sn} and liveOut we + should make di interfere with: + + liveOut U {d1, ... dn} U ({s1, ... sn} \ {si}) + + This is not currently done. + *) + fun zip(d::defs, u::uses) = + if isDedicated d orelse + isDedicated u then zip(defs, uses) + else + let val d as NODE{number=x,...} = chaseReg d + val u as NODE{number=y,...} = chaseReg u + in if x = y then zip(defs,uses) + else + MV{dst=d, src=u, status=ref WORKLIST}:: + zip(defs, uses) + end + | zip([],[]) = mvs + + (* Assumes that the move temporary + * if present is always the + * first thing on the definition list. + *) + val moves = + if P.moveInstr instr then + let val (defs,uses) = RaArch.defUse instr + val defs = + case defs of + [] => [] + | _::rest => case P.moveTmpR instr of + SOME _ => rest + | NONE => defs + in zip(defs,uses) + end + else mvs + val live = + if length def > 1 then + addEdgeForallLive(def, insert(def, live')) + else addEdgeForallLive(def, live') + in app delete def; + doBlock(rest, bdu, insert(use,live), moves) + end + val lout = collectNodes(getnode,RaArch.regSet(!liveOut)) + in doBlock(!insns, bdu, insert(lout, []), mvs) + end + (* Filter moves that already have an interference. + * Also initialize the movelist and movecnt fields at this time. + *) + fun filter [] = [] + | filter (MV{src=NODE{color=ref(COLORED _), ...}, + dst=NODE{color=ref(COLORED _), ...}, ...}::rest) = + filter rest + | filter ((mv as MV{src, dst, ...})::rest) = + if memBitMatrix(src, dst) then filter rest + else let + fun info(u as NODE{color=ref PSEUDO, movecnt, movelist,...}) = + (movelist := mv :: !movelist; movecnt := 1 + !movecnt) + | info _ = () + in info src; info dst; mv::filter rest + end + in filter(forallBlocks(numOfBlocks-1, [])) + end (* build *) + + (** + ** select a spill node + **) + fun selectSpill (GRAPH{nodes,spillFlag,K,...}, + {simplifyWkl, spillWkl, stack, moveWkl, freezeWkl}) = + let (* duCount: compute the def/use points of spilled nodes. *) + val getnode = Intmap.map nodes + val chaseReg = chase o getnode + fun duCount spillable = + let val size = length spillable + exception Info + val defInfo : info = Intmap.new(size,Info) + val useInfo : info = Intmap.new(size,Info) + val addDef = Intmap.add defInfo + val addUse = Intmap.add useInfo + val getDefs = Intmap.mapWithDefault (defInfo,[]) + val getUses = Intmap.mapWithDefault (useInfo,[]) + + (* doblocks --- + * updates the defInfo and useInfo tables to indicate + * the blocks where spillable live ranges are defined and used. + *) + fun doblocks ~1 = () + | doblocks blknum = + let val bdu = A.sub(blockDU,blknum) + fun iter [] = () + | iter((def',use')::rest) = + let val def = uniqMap(nodeNumber o chase, def') + val use = uniqMap(nodeNumber o chase, use') + fun updateDef n = addDef(n, blknum::getDefs n) + fun updateUse n = addUse(n, blknum::getUses n) + in app updateDef (SL.intersect(def,spillable)); + app updateUse (SL.intersect(use,spillable)); + iter rest + end + in iter(bdu); + doblocks(blknum-1) + end + + (* If a node is live going out of an block terminated by + * an escaping branch, it may be necessary to reload the + * the node just prior to taking the branch. We will therefore + * record this as a definition of the node. + *) + fun doBBlocks n = + let val F.BBLOCK{blknum,liveIn,liveOut,succ,...} = + A.sub(cblocks,n) + val liveout = + uniqMap (nodeNumber, + collectNodes(getnode,RaArch.regSet(!liveOut))) + in case !succ of + [(F.EXIT _,_)] => + (case SL.intersect(spillable,liveout) + of [] => doBBlocks(n+1) + | some => + (app (fn n => addDef(n, blknum::getDefs n)) some; + doBBlocks (n+1)) + (*esac*)) + | _ => doBBlocks(n+1) + (*esac*) + end (* doBBlocks *) + in doblocks (numOfBlocks - 1); + doBBlocks 0 handle _ => (); + (defInfo,useInfo) + end (* duCount *) + + (* Since the spillWkl is not actively maintained, the set of + * spillable nodes for which def/use info is needed is a subset + * of spillWkl. + *) + fun remainingNodes() = + let fun prune [] = [] + | prune((n as NODE{color=ref PSEUDO, ...}) ::ns) = + n::prune ns + | prune((n as NODE{color=ref(ALIASED _), ...})::ns) = + prune(chase n::ns) + | prune(_::ns) = prune ns + in case !remainInfo of + SOME info => prune spillWkl + | NONE => + let (* first time spilling *) + val spillable = prune ( spillWkl) + in remainInfo := + (case spillable + of [] => NONE + | _ => SOME(duCount(uniqMap(nodeNumber, spillable))) + (*esac*)); + spillable + end + end + + (** apply the Chaitin heuristic to find the spill node **) + fun chaitinHeuristic(spillable) = + let val infinity = 1000000.0 + val infinityi= 1000000 + val SOME(dinfo,uinfo) = !remainInfo + val getdInfo = Intmap.map dinfo + val getuInfo = Intmap.map uinfo + fun coreDump [] = () + | coreDump ((node as NODE{number, degree, adj, ...})::rest) = + (print(concat + ["number =", Int.toString number, + " node =", Int.toString(nodeNumber (chase node)), + " degree = ", Int.toString (!degree), + " adj = "]); + prList(map (nodeNumber o chase) (!adj), ""); + print "\n"; + coreDump rest) + fun iter([],node,cmin) = + if node <> ~1 then + (if !cfg_after_spill then + print("Spilling node "^Int.toString node^ + " cost="^Real.toString cmin^"\n") else (); + getnode node + ) + else (coreDump spillable; + prList(Intmap.keys prevSpills,"PrevSpills: "); + error "chaitinHeuristic.iter") + | iter((node as NODE{number, degree, ...})::rest,cnode,cmin) = + let + (* An exeception will be raised if the node is defined + * but not used. This is not a suitable node to spill. + *) + val cost = + (length(getdInfo number) handle _ => 0) + + (length(getuInfo number) handle _ => infinityi) + val heuristic = real cost / real (!degree) + in + if heuristic < cmin andalso not(isSpilled number) + then iter(rest, number, heuristic) + else iter(rest, cnode, cmin) + end + in iter(spillable, ~1, infinity) + end + in case mode of + COPY_PROPAGATION => + {spillWkl=[], simplifyWkl=[], stack=[], moveWkl=[], freezeWkl=[]} + | REGISTER_ALLOCATION => + (case remainingNodes() of + [] => {spillWkl=[], simplifyWkl=simplifyWkl, + stack=stack, moveWkl=moveWkl, freezeWkl=freezeWkl} + | spillWkl => + let val spillNode = chaitinHeuristic(spillWkl) + val simpWkl = + if isMoveRelated spillNode + then spillNode::Core.wklFromFrozen(K,spillNode) + else [spillNode] + in spillFlag:=true; + {simplifyWkl=simpWkl, + spillWkl = spillWkl, + freezeWkl = freezeWkl, + stack = stack, + moveWkl = moveWkl} + end + (*esac*)) + end (* selectSpill *) + + + (** rewriteGraph(spillList) - + ** an unsuccessful round of coloring has taken + ** place with nodes in spillList having been spilled. The + ** flowgraph must be updated and the entire process repeated. + **) + fun rewriteGraph (graph as GRAPH{nodes,...}, spillList) = + let val _ = rewrite := !rewrite + 1 + val SOME(dInfo,uInfo) = !remainInfo + val getnode = Intmap.map nodes + val enternode = Intmap.add nodes + val chaseReg = chase o getnode + val chaseRegs = map chaseReg + + fun newdu (d, u) = + let fun rmv([],nodes) = nodes + | rmv(r::rs,nodes) = + let val node = chase(getnode r) handle _ => + let val n = newNode(r, PSEUDO) + in enternode (r, n); n + end + in rmv(rs,node::nodes) end + fun rmv' rs = rmv(rmvDedicated rs,[]) + in (rmv' d, rmv' u) + end (* newdu *) + + val defUse = newdu o RaArch.defUse + + (* blocks where spill code is required for node n *) + fun affectedBlocks node = + let val n = nodeNumber node + in SL.merge(SL.uniq(Intmap.mapWithDefault (dInfo,[]) n), + SL.uniq(Intmap.mapWithDefault (uInfo,[]) n)) + end + + val mapr = C.lookup regmap + val markProh = app markAsSpilled + + (* Insert spill code into the affected blocks *) + fun doBlocks([], _) = () + | doBlocks(blknum::rest, node) = + let val F.BBLOCK{insns, liveOut, name, ...} = + A.sub(cblocks, blknum) + val bdu = A.sub(blockDU, blknum) + val liveOut = collectNodes(getnode,RaArch.regSet(!liveOut)) + val spillReg = nodeNumber node + + (* note: the instruction list start out in reverse order. *) + fun doInstrs([], [], newI, newBDU) = + (rev newI, rev newBDU) + | doInstrs(instr::rest, (du as (d,u))::bDU, newI, newBDU) = + let val defs=map chase d + val uses=map chase u + + fun outputInstrs(instrs, I, bDU) = + {newI=instrs @ I, + newBDU=(map defUse instrs) @ bDU} + + fun newReloadCopy(rds, rss) = + let fun f(rd::rds, rs::rss, rds', rss') = + if mapr rs = spillReg + then(([rd], [rs]), (rds@rds', rss@rss')) + else f(rds, rss, rd::rds', rs::rss') + | f([], [], _, _) = error "newReloadCopy.f" + in f(rds, rss, [], []) end + + (* insert reloading code and continue *) + fun reloadInstr(instr, du, newI, newBDU)= + let val {code, proh} = + RaUser.reload{regmap=mapr, instr=instr, + reg=spillReg, id=name} + val _ = markProh proh + val {newI, newBDU} = + outputInstrs(code, newI, newBDU) + in doInstrs(rest, bDU, newI, newBDU) end + + (* insert reload code for copies. *) + fun reloadCopy(du, instr, newI, newBDU) = + if nodeMember(node, #2 du) then + (case (P.moveDstSrc(instr)) + of ([d], [u]) => + reloadInstr(instr,du,newI,newBDU) + | (defs, uses) => + let val (mv, cpy) = newReloadCopy(defs, uses) + val cpyInstr = RaUser.copyInstr(cpy, instr) + val duCpy = defUse cpyInstr + val {code, proh} = + RaUser.reload + {regmap=mapr, + instr=RaUser.copyInstr(mv, instr), + reg=spillReg, id=name} + val _ = markProh proh + val {newI, newBDU} = + outputInstrs(code, newI, newBDU) + in (* recurse to deal with multiple uses *) + reloadCopy(duCpy, cpyInstr, newI, newBDU) + end + (*esac*)) + else + doInstrs(rest, bDU, instr::newI, du::newBDU) + + (* insert reload code *) + fun reload(du as (d,u), instr, newI, newBDU) = + if P.moveInstr(instr) then + reloadCopy(du, instr, newI, newBDU) + else if nodeMember(node, u) then + let val {code, proh} = + RaUser.reload{regmap=mapr, instr=instr, + reg=spillReg, id=name} + val {newI, newBDU} = + outputInstrs(code, newI, newBDU) + val _ = markProh proh + in doInstrs(rest, bDU, newI, newBDU) + end + else + doInstrs(rest, bDU, instr::newI, du::newBDU) + + + fun spillInstr(instr, newI, newBDU) = + let val {code, instr, proh} = + RaUser.spill{regmap=mapr, instr=instr, reg=spillReg, id=name} + val _ = markProh proh + val {newI, newBDU} = outputInstrs(code, newI, newBDU) + in case instr + of NONE => doInstrs(rest, bDU, newI, newBDU) + | SOME instr => + reload(defUse instr, instr, newI, newBDU) + end + + fun spillCopy() = + let (* Note:: There is a guarantee that the node + * will never be aliased to another register. + *) + fun newSpillCopy(rds, rss) = + let fun f(rd::rds, rs::rss, rds', rss') = + if mapr rd = spillReg then + (([rd], [rs]), (rds@rds', rss@rss')) + else f(rds, rss, rd::rds', rs::rss') + | f([], [], _, _) = error "newSpillCopy" + in f(rds, rss, [], []) end + + fun spillCpyDst() = + let val (mv, cpy) = newSpillCopy(P.moveDstSrc(instr)) + val (newI, newBDU) = + (case cpy + of ([],[]) => (newI, newBDU) + | _ => let val cpyInstr = RaUser.copyInstr(cpy, instr) + in (cpyInstr::newI, defUse cpyInstr::newBDU) + end + (*esac*)) + val instr = RaUser.copyInstr(mv, instr) + in spillInstr(instr, newI, newBDU) + end + in case P.moveTmpR instr + of NONE => spillCpyDst() + | SOME r => + if mapr r=spillReg + then spillInstr(instr, newI, newBDU) + else spillCpyDst() + (*esac*) + end (* spillCopy *) + in (* insert spill code *) + if nodeMember(node, defs) then + if P.moveInstr instr then spillCopy() + else spillInstr(instr, newI, newBDU) + else + reload((defs,uses), instr, newI, newBDU) + end + + (* special action if the last instruction is an escaping + * branch and the node is live across the branch. + * We discover if the node needs to be spilled or reloaded. + *) + fun blockEnd(instrs as instr::rest, bDU as du::bdu) = + let fun escapes [] = false + | escapes (P.ESCAPES::_) = true + | escapes (_::targets) = escapes targets + in if nodeMember(node, liveOut) then + (case P.instrKind instr + of P.IK_JUMP => + if escapes(P.branchTargets instr) then let + val {code,...} = + RaUser.reload{regmap=mapr, instr=instr, reg=spillReg, id=name} + val reloadDU = map defUse code + in (rev code@rest, rev reloadDU@bdu) + end + else (instrs, bDU) + | _ => (instrs, bDU) + (*esac*)) + else (instrs, bDU) + end + | blockEnd([],[]) = ([], []) + + val (newInstrs, newBdu) = + doInstrs(!insns, bdu, [], []) + val (newInstrs, newBdu) = blockEnd(newInstrs, newBdu) + in insns := newInstrs; + A.update(blockDU, blknum, newBdu); + doBlocks(rest, node) + end (* doBlocks *) + + (* The optimistic coloring selection may come up with a node + * that has already been spilled. Must be careful not to spill + * it twice. + *) + fun glue [] = () + | glue((node as NODE{number, color, ...})::rest) = + (if not(isSpilled number) then + (doBlocks(affectedBlocks node, node) + before color := COLORED(spillRegSentinel) + ) + else (); + glue rest + ) + + (* redoAlgorithm + * -- rerun graph coloring but note that spilling may + * have introduced new registers. + *) + fun redoAlgorithm(spillList) = + let val _ = app (markAsSpilled o nodeNumber) spillList + fun init(_, NODE{color=ref PSEUDO, degree, adj, + movecnt, movelist, ...}) = + (degree:=0; adj := []; movecnt:=0; movelist:=[]) + | init _ = () + in Intmap.app init nodes + end + in glue(spillList); + redoAlgorithm(spillList); + debug(cfg_after_spill,"after spilling",blocks,regmap,an) + end (* rewriteGraph *) + + (** + ** The main driver + **) + fun graphColoring(nodes) = + let (* Create an empty interference graph *) + val graph = newGraph + {nodes=nodes, + K=RaUser.nFreeRegs, + numRegs=RaArch.numRegs(), + regmap=regmap, + getreg=RaUser.getreg, + firstPseudoR=firstPseudoR + } + val moves = build graph (* build interference graph *) + val worklists = Core.makeWorkLists graph moves + val simpCoalFz = Core.simplifyCoalesceFreeze graph + + (* Note: freezeWkl or spillWkl are maintained lazily. *) + fun iterate wl = + case simpCoalFz wl of + wl as {spillWkl= _::_, ...} => iterate(selectSpill(graph,wl)) + | wl => + (case mode of + COPY_PROPAGATION => Core.finishCP graph + | REGISTER_ALLOCATION => + (case Core.optimisticSpilling graph wl of + [] => Core.finishRA graph + | spills => (rewriteGraph(graph,spills); + graphColoring(nodes)) + ) + ) + in if !dump_graph then Core.dumpGraph graph else (); + debug(cfg_before_ra,"before register allocation",blocks,regmap,an); + iterate worklists + end + val nodes = initialize() + in liveness(nodes,blocks); (* run liveness analysis *) + graphColoring(nodes); + debug(cfg_after_ra,"after register allocation",blocks,regmap,an); + ra_count := !ra_count + 1; + cluster + end + +end diff --git a/MLRISC/ra/ra-params.sig b/MLRISC/ra/ra-params.sig new file mode 100644 index 0000000..975d464 --- /dev/null +++ b/MLRISC/ra/ra-params.sig @@ -0,0 +1,68 @@ +(* ra-params.sig --- machine parameter required for register allocation. + * + * Copyright 1996 AT&T Bell Laboratories + * + *) + +signature RA_ARCH_PARAMS = sig + + structure Liveness : LIVENESS + structure InsnProps : INSN_PROPERTIES + structure AsmEmitter : INSTRUCTION_EMITTER + structure I : INSTRUCTIONS + + (* all modules work on the same instruction type *) + sharing Liveness.F.I = InsnProps.I = AsmEmitter.I = I + + val firstPseudoR : int + val maxPseudoR : unit -> int + val numRegs : unit -> int + val regSet : I.C.cellset -> int list + val defUse : I.instruction -> (int list * int list) +end + + + +signature RA_USER_PARAMS = sig + + structure I : INSTRUCTIONS + structure B : BLOCK_NAMES + + val nFreeRegs : int + val dedicated : int list (* dedicated registers *) + val getreg : {pref: int list, stamp:int, proh: int Array.array} -> int + val copyInstr : (int list * int list) * I.instruction -> I.instruction + + val spill : + {instr : I.instruction, (* instruction where spill is to occur *) + reg: int, (* register to spill *) + regmap: int -> int, (* register map *) + id : B.name (* block name *) + } + -> + {code: I.instruction list, (* spill or reload code *) + proh: int list, (* regs prohibited from future spilling *) + instr: I.instruction option} (* possibly changed instruction *) + + val reload : + {instr : I.instruction, (* instruction where spill is to occur *) + reg: int, (* register to spill *) + regmap: int -> int, (* register map *) + id : B.name (* block name *) + } + -> + {code: I.instruction list, (* spill or reload code *) + proh: int list} (* regs prohibited from future spilling *) + +end + + + +signature RA = sig + structure F : FLOWGRAPH + datatype mode = REGISTER_ALLOCATION | COPY_PROPAGATION + + val ra: mode -> (int * int) list -> F.cluster -> F.cluster +end + + diff --git a/MLRISC/ra/ra-priqueue.sig b/MLRISC/ra/ra-priqueue.sig new file mode 100644 index 0000000..cdec440 --- /dev/null +++ b/MLRISC/ra/ra-priqueue.sig @@ -0,0 +1,16 @@ +(* + * Interface of a fast (applicative) + * version of priority queue just for the register allocator + * + * -- Allen + *) +signature RA_PRIORITY_QUEUE = +sig + + type elem + datatype pri_queue = + EMPTY | TREE of elem * int * pri_queue * pri_queue + + val add : elem * pri_queue -> pri_queue + val merge : pri_queue * pri_queue -> pri_queue +end diff --git a/MLRISC/ra/ra-spill-types.sml b/MLRISC/ra/ra-spill-types.sml new file mode 100644 index 0000000..f1aca76 --- /dev/null +++ b/MLRISC/ra/ra-spill-types.sml @@ -0,0 +1,85 @@ +functor RASpillTypes(I : INSTRUCTIONS) = +struct + + structure G = RAGraph + structure C = I.C + structure CB = CellsBasis + + type copyInstr = + (CB.cell list * CB.cell list) * I.instruction -> I.instruction list + + (* + * Spill the value associated with reg into spillLoc. + * All definitions of instr should be renamed to a new temporary newReg. + *) + type spill = + {instr : I.instruction, (* instruction where spill is to occur *) + reg : CB.cell, (* register to spill *) + spillLoc : G.spillLoc, (* logical spill location *) + kill : bool, (* can we kill the current node? *) + annotations : Annotations.annotations ref (* annotations *) + } -> + {code : I.instruction list, (* instruction + spill code *) + proh : CB.cell list, (* prohibited from future spilling *) + newReg : CB.cell option (* the spilled value is available here *) + } + + (* Spill the register src into spillLoc. + * The value is originally from register reg. + *) + type spillSrc = + {src : CB.cell, (* register to spill from *) + reg : CB.cell, (* the register *) + spillLoc : G.spillLoc, (* logical spill location *) + annotations : Annotations.annotations ref (* annotations *) + } -> I.instruction list (* spill code *) + + (* + * Spill the temporary associated with a copy into spillLoc + *) + type spillCopyTmp = + {copy : I.instruction, (* copy to spill *) + reg : CB.cell, (* the register *) + spillLoc : G.spillLoc, (* logical spill location *) + annotations : Annotations.annotations ref (* annotations *) + } -> I.instruction (* spill code *) + + (* + * Reload the value associated with reg from spillLoc. + * All uses of instr should be renamed to a new temporary newReg. + *) + type reload = + {instr : I.instruction, (* instruction where spill is to occur *) + reg : CB.cell, (* register to spill *) + spillLoc : G.spillLoc, (* logical spill location *) + annotations : Annotations.annotations ref (* annotations *) + } -> + {code : I.instruction list, (* instr + reload code *) + proh : CB.cell list, (* prohibited from future spilling *) + newReg : CB.cell option (* the reloaded value is here *) + } + + (* + * Rename all uses fromSrc to toSrc + *) + type renameSrc = + {instr : I.instruction, (* instruction where spill is to occur *) + fromSrc : CB.cell, (* register to rename *) + toSrc : CB.cell (* register to rename to *) + } -> + {code : I.instruction list, (* renamed instr *) + proh : CB.cell list, (* prohibited from future spilling *) + newReg : CB.cell option (* the renamed value is here *) + } + + (* Reload the register dst from spillLoc. + * The value is originally from register reg. + *) + type reloadDst = + {dst : CB.cell, (* register to reload to *) + reg : CB.cell, (* the register *) + spillLoc : G.spillLoc, (* logical spill location *) + annotations : Annotations.annotations ref (* annotations *) + } -> I.instruction list (* reload code *) + +end diff --git a/MLRISC/ra/ra-spill-with-renaming.sml b/MLRISC/ra/ra-spill-with-renaming.sml new file mode 100644 index 0000000..d5d2b8b --- /dev/null +++ b/MLRISC/ra/ra-spill-with-renaming.sml @@ -0,0 +1,588 @@ +(* ra-spill-with-renaming.sml + * + * COPYRIGHT (c) 2001 Bell Labs, Lucent Technologies + * + * This version also performs local renaming on the spill code. + * For example, spilling t below + * + * t <- ... + * .. <- t + * .... + * .... + * .. <- t + * .. <- t + * + * would result in + * + * tmp1 <- ... + * mem[t] <- tmp1 + * .. <- tmp1 <---- rename from t to tmp1 + * .... + * .... + * tmp2 <- mem[t] + * .. <- tmp2 + * .. <- tmp2 <---- rename from t to tmp2 + * + * That is, we try to avoid inserting reload code whenever it is possible. + * This is done by keeping track of which values are live locally. + * + * Allen (5/9/00) + * + *) +(* + * This module manages the spill/reload process. + * The reason this is detached from the main module is that + * I can't understand the old code. + * + * Okay, now I understand the code. + * + * The new code does things slightly differently. + * Here, we are given an instruction and a list of registers to spill + * and reload. We rewrite the instruction until all instances of these + * registers are rewritten. + * + * (12/13/99) Some major caveats when spill coalescing/coloring is used: + * When parallel copies are generated and spill coalescing/coloring is used, + * two special cases have to be identified: + * + * Case 1 (spillLoc dst = spillLoc src) + * Suppose we have a parallel copy + * (u,v) <- (x,y) + * where u has to be spilled and y has to reloaded. When both + * u and y are mapped to location M. The following wrong code may + * be generated: + * M <- x (spill u) + * v <- M (reload y) + * This is incorrect. Instead, we generate a dummy copy and + * delay the spill after the reload, like this: + * + * tmp <- x (save value of u) + * v <- M (reload y) + * M <- tmp (spill u) + * Case 2 (spillLoc copyTmp = spillLoc src) + * Another case that can cause problems is when the spill location of + * the copy temporary is the same as that of one of the sources: + * + * (a, b, v) <- (b, a, u) where spillLoc(u) = spillLoc(tmp) = v + * + * The incorrect code is + * (a, b) <- (b, a) + * v <- M + * But then the shuffle code for the copy can clobber the location M. + * + * tmp <- M + * (a, b) <- (b, a) + * v <- tmp + * + * (Note that spillLoc copyTmp = spillLoc src can never happen) + * + * -- Allen + *) + +local + + val debug = false + +in + +functor RASpillWithRenaming + (structure InsnProps : INSN_PROPERTIES + structure Asm : INSTRUCTION_EMITTER + where I = InsnProps.I + + (* Spilling a variable v creates tiny live-ranges at all its definitions + * and uses. The following parameter is the maximal distance of + * live-ranges created between a definition and its use, + * measured in the number of instructions. If, max_dist = D, then + * the spill routine will never create a new live-range that is more + * than D instructions apart. + *) + val max_dist : int ref + + (* When this parameter is on, the spill routine will keep track of + * multiple values for the renaming process. This is recommended + * if the architecture has a lot of free registers. But it should + * probably be turned off on the x86. + *) + val keep_multiple_values : bool ref + ) : RA_SPILL = +struct + + structure I = InsnProps.I + structure P = InsnProps + structure C = I.C + structure CBase = CellsBasis + structure Core = RACore + structure G = Core.G + + fun error msg = MLRiscErrorMsg.error("RASpillWithRenaming",msg) + + fun dec1 n = Word.toIntX(Word.fromInt n - 0w1) + fun dec{block,insn} = {block=block,insn=dec1 insn} + + structure T = RASpillTypes(I) + open T + + fun uniq s = CBase.SortedCells.return(CBase.SortedCells.uniq s) + val i2s = Int.toString + fun pt2s{block,insn} = "b"^i2s block^":"^i2s insn + + val Asm.S.STREAM{emit, ...} = Asm.makeStream[] + + (* val spilledCopyTmps = MLRiscControl.getCounter "ra-spilled-copy-temps" *) + + (* + * The following function performs spilling. + *) + fun spillRewrite + {graph=G as G.GRAPH{showReg, spilledRegs, nodes, mode, dedicated,...}, + spill : spill, + spillCopyTmp : spillCopyTmp, + spillSrc : spillSrc, + renameSrc : renameSrc, + reload : reload, + reloadDst : reloadDst, + copyInstr : copyInstr, + cellkind, + spillSet, reloadSet, killSet + } = + let + (* Must do this to make sure the interference graph is + * reflected to the cells + *) + val _ = Core.updateCellAliases G + + val getSpillLoc = Core.spillLoc G + fun spillLocOf(CBase.CELL{id, ...}) = getSpillLoc id + val spillLocsOf = map spillLocOf + val getnode = IntHashTable.lookup nodes + val getnode = fn CBase.CELL{id, ...} => getnode id + + val MAX_DIST = !max_dist + + val insnDefUse = P.defUse cellkind + + fun hasNonDedicated rs = + let fun loop [] = false + | loop(r::rs) = + if dedicated(CBase.registerId r) then loop rs else true + in loop rs end + + (* Merge prohibited registers *) + val enterSpill = IntHashTable.insert spilledRegs + val addProh = app (fn c => enterSpill(CBase.registerId c,true)) + + val getSpills = G.PPtHashTable.find spillSet + val getSpills = fn p => case getSpills p of SOME s => s | NONE => [] + val getReloads = G.PPtHashTable.find reloadSet + val getReloads = fn p => case getReloads p of SOME s => s | NONE => [] + val getKills = G.PPtHashTable.find killSet + val getKills = fn p => case getKills p of SOME s => s | NONE => [] + + fun getLoc(G.NODE{color=ref(G.ALIASED n), ...}) = getLoc n + | getLoc(G.NODE{color=ref(G.MEMREG(_, m)), ...}) = G.MEM_REG m + | getLoc(G.NODE{color=ref(G.SPILL_LOC s), ...}) = G.FRAME s + | getLoc(G.NODE{color=ref(G.SPILLED), number, ...}) = G.FRAME number + | getLoc(G.NODE{color=ref(G.PSEUDO), number, ...}) = G.FRAME number + | getLoc _ = error "getLoc" + + fun printRegs regs = + app (fn r => print(concat[ + CBase.toString r, " [", Core.spillLocToString G (CBase.cellId r), + "] " + ])) regs + + val parallelCopies = Word.andb(Core.HAS_PARALLEL_COPIES, mode) <> 0w0 + + fun chase(CBase.CELL{col=ref(CBase.ALIASED c), ...}) = chase c + | chase c = c + + fun cellId(CBase.CELL{id, ...}) = id + + fun sameCell(CBase.CELL{id=x,...}, CBase.CELL{id=y, ...}) = x=y + + fun same(x,regToSpill) = sameCell(chase x,regToSpill) + + (* + * Rewrite the instruction given that a bunch of registers have + * to be spilled and reloaded. + *) + fun spillRewrite{pt, instrs, annotations} = + let + (* Environment manipulation functions. + * The environment is just a list of triples. + *) + fun update(pt,env,r,NONE) = kill(env, r) + | update(pt,env,r,SOME newReg) = + (* if the register is a dedicated register, conservatively kill + * the value r in the current environment. this is necessary + * because dedicated registers have been removed from the + * def/use information. + *) + if dedicated(CBase.registerId newReg) + then kill(env, r) + else (r,newReg,pt)::(if !keep_multiple_values then env else []) + + and kill(env,r) = + let fun loop([], env') = env' + | loop((binding as (r',_,_))::env,env') = + loop(env, + if CBase.sameColor(r, r') then env' else binding::env') + in loop(env, []) end + + (* + * Insert reloading code for an instruction. + * Note: reload code goes after the instruction, if any. + *) + fun reloadInstr(pt,instr,regToSpill,env,spillLoc) = + let val {code, proh, newReg} = + reload{instr=instr,reg=regToSpill, + spillLoc=spillLoc,annotations=annotations} + in addProh(proh); + (code,update(pt,env,regToSpill,newReg)) + end + + (* + * Renaming the source for an instruction. + *) + fun renameInstr(pt,instr,regToSpill,env,toSrc) = + let val {code, proh, newReg} = + renameSrc{instr=instr, fromSrc=regToSpill,toSrc=toSrc} + in addProh(proh); + (code,update(pt,env,regToSpill,newReg)) + end + + (* + * Remove uses of regToSpill from a set of parallel copies. + * If there are multiple uses, then return multiple moves. + *) + fun extractUses(regToSpill, rds, rss) = + let fun loop(rd::rds, rs::rss, newRds, rds', rss') = + if same(rs,regToSpill) then + loop(rds, rss, rd::newRds, rds', rss') + else + loop(rds, rss, newRds, rd::rds', rs::rss') + | loop(_, _, newRds, rds', rss') = (newRds, rds', rss') + in loop(rds, rss, [], [], []) end + + (* + * Insert reload code for the sources of a copy. + * Transformation: + * d1..dn <- s1..sn + * => + * d1..dn/r <- s1...sn/r. + * reload code + * reload copies + * + *) + fun reloadCopySrc(instr,regToSpill,env,spillLoc) = + let val (dst, src) = P.moveDstSrc instr + val (rds, copyDst, copySrc) = extractUses(regToSpill, dst, src) + fun processMoves([], reloadCode) = reloadCode + | processMoves(rd::rds, reloadCode) = + let val code = + reloadDst{spillLoc=spillLoc,reg=regToSpill, + dst=rd,annotations=annotations} + in processMoves(rds, code@reloadCode) + end + val reloadCode = processMoves(rds, []) + in case copyDst of + [] => (reloadCode, env) + | _ => (copyInstr((copyDst, copySrc), instr) @ reloadCode, env) + end + + fun diff({block=b1:int,insn=i1},{block=b2,insn=i2}) = + if b1=b2 then i1-i2 else MAX_DIST+1 + + (* + * Insert reload code + *) + fun reload(pt,instr,regToSpill,env,spillLoc) = + if P.moveInstr instr then + reloadCopySrc(instr,regToSpill,env,spillLoc) + else + let fun lookup [] = + reloadInstr(pt,instr,regToSpill,env,spillLoc) + | lookup((r,currentReg,defPt)::env) = + if CBase.sameColor(r,regToSpill) then + if defPt = pt + then lookup env(* this is NOT the right renaming!*) + else if diff(defPt,pt) <= MAX_DIST then + renameInstr(pt,instr,regToSpill,env,currentReg) + else + reloadInstr(pt,instr,regToSpill,env,spillLoc) + else + lookup(env) + in lookup env + end + + (* + * Check whether the id is in a list + *) + fun containsId(id,[]) = false + | containsId(id:CBase.cell_id,r::rs) = r = id orelse containsId(id,rs) + fun spillConflict(G.FRAME loc, rs) = containsId(~loc, rs) + | spillConflict(G.MEM_REG(CBase.CELL{id, ...}), rs) = + containsId(id, rs) + + fun contains(r',[]) = false + | contains(r',r::rs) = sameCell(r',r) orelse contains(r',rs) + + (* + * Insert spill code for an instruction. + * Spill code occur after the instruction. + * If the value in regToSpill is never used, the client also + * has the opportunity to remove the instruction. + *) + fun spillInstr(pt,instr,regToSpill,spillLoc,kill,env) = + let val {code, proh, newReg} = + spill{instr=instr, + kill=kill, spillLoc=spillLoc, + reg=regToSpill, annotations=annotations} + in addProh(proh); + (code, update(pt,env,regToSpill,newReg)) + end + + (* Remove the definition regToSpill <- from + * parallel copies rds <- rss. + * Note, there is a guarantee that regToSpill is not aliased + * to another register in the rds set. + *) + fun extractDef(regToSpill,rds,rss,kill) = + let fun loop(rd::rds, rs::rss, rds', rss') = + if spillLocOf rd = spillLocOf rs then + (rs, rds@rds', rss@rss', true) + else if same(rd, regToSpill) then + (rs, rds@rds', rss@rss', kill) + else loop(rds, rss, rd::rds', rs::rss') + | loop _ = let + fun pr r = print(concat[ + CBase.toString r, ":", i2s(spillLocOf r), " " + ]) + in + print("rds="); + app pr rds; + print("\nrss="); + app pr rss; + print "\n"; + error("extractDef: "^CBase.toString regToSpill) + end + in loop(rds, rss, [], []) end + + (* + * Insert spill code for a destination of a copy + * suppose d = r and we have a copy d <- s in + * d1...dn <- s1...sn + * + * d1...dn <- s1...sn + * => + * spill s to spillLoc + * d1...dn/d <- s1...sn/s + * + * However, if the spill code may ovewrite the spill location + * shared by other uses, we do the following less + * efficient scheme: + * + * /* save the result of d */ + * d1...dn, tmp <- s1...sn, s + * spill tmp to spillLoc /* spill d */ + * + *) + fun spillCopyDst(pt,instr,regToSpill,spillLoc, + kill,env,don'tOverwrite) = + let val (dst, src) = P.moveDstSrc instr + val (mvSrc,copyDst,copySrc,kill) = + extractDef(regToSpill,dst,src,kill) + val copy = case copyDst of + [] => [] + | _ => copyInstr((copyDst,copySrc),instr) + in if kill + then (* kill the move *) + ((* print ("Copy "^Int.toString(hd mvDst)^" <- "^ + Int.toString(hd mvSrc)^" removed\n"); *) + (copy, env) + ) + else (* normal spill *) + if spillConflict(spillLoc, don'tOverwrite) orelse + (* if the register is a dedicated register, treat the copy as + * a normal spill. this is necessary because dedicated registers + * have been removed from the def/use information. + *) + dedicated(CBase.registerId mvSrc) then + let (* cycle found *) + (*val _ = print("Register r"^Int.toString regToSpill^ + " overwrites ["^Int.toString spillLoc^"]\n")*) + val tmp = I.C.newVar regToSpill (* new temporary *) + val copy = copyInstr((tmp::copyDst, mvSrc::copySrc), + instr) + val spillCode = spillSrc{src=tmp,reg=regToSpill, + spillLoc=spillLoc, + annotations=annotations} + in (copy @ spillCode, [(regToSpill,tmp,pt)]) + end + else + let (* spill the move instruction *) + val spillCode = spillSrc{src=mvSrc,reg=regToSpill, + spillLoc=spillLoc, + annotations=annotations} + in (spillCode @ copy, [(regToSpill,mvSrc,pt)]) + end + end + + (* + * Insert spill code for a copy + *) + fun spillCopy(pt,instr,regToSpill,spillLoc,kill,env,don'tOverwrite)= + case P.moveTmpR instr of + NONE => spillCopyDst(pt,instr,regToSpill,spillLoc,kill,env, + don'tOverwrite) + | SOME tmp => + if same(tmp, regToSpill) + then ((* spilledCopyTmps := !spilledCopyTmps + 1; *) + [spillCopyTmp{copy=instr, spillLoc=spillLoc, + reg=regToSpill, + annotations=annotations}], []) + else spillCopyDst(pt,instr,regToSpill,spillLoc,kill, + env, don'tOverwrite) + + (* + * Insert spill code + *) + fun spill(pt,instr,regToSpill,spillLoc,killSet,env,don'tOverwrite) = + let val kill = contains(regToSpill,killSet) + in if P.moveInstr instr then + spillCopy(pt,instr,regToSpill,spillLoc,kill,env,don'tOverwrite) + else + spillInstr(pt,instr,regToSpill,spillLoc,kill,env) + end + + fun contains([],reg) = false + | contains(r::rs,reg) = same(r,reg) orelse contains(rs,reg) + fun hasDef(i,reg) = contains(#1(insnDefUse i),reg) + fun hasUse(i,reg) = contains(#2(insnDefUse i),reg) + + fun spillOneReg(pt,[],_,_,_,env,_) = ([], env) + | spillOneReg(pt,i::instrs,r,spillLoc,killSet,env,don'tOverwrite)= + if hasDef(i,r) then + let val (instrs',env) = + spill(pt,i,r,spillLoc,killSet,env,don'tOverwrite) + in spillOneReg(pt,instrs'@instrs,r,spillLoc, + killSet,env,don'tOverwrite) + end + else + let val (instrs,env) = + spillOneReg(pt,instrs,r,spillLoc,killSet, + env,don'tOverwrite) + in (i::instrs, env) + end + + fun reloadOneReg(pt,[],_,env,_) = ([], env) + | reloadOneReg(pt,i::instrs,r,env,spillLoc) = + if hasUse(i,r) then + let val (instrs',env) = reload(pt,i,r,env,spillLoc) + in reloadOneReg(pt,instrs'@instrs,r,env,spillLoc) + end + else + let val (instrs, env) = reloadOneReg(pt,instrs,r,env,spillLoc) + in (i::instrs, env) + end + + (* This function spills a set of registers for an instruction *) + fun spillAll(pt,instrs,[],killSet,env,don'tOverwrite) = (instrs,env) + | spillAll(pt,instrs,r::rs,killSet,env,don'tOverwrite) = + let val node = getnode r + val spillLoc = getLoc node + val (instrs, env) = + spillOneReg(pt,instrs,r,spillLoc,killSet, + env,don'tOverwrite) + in spillAll(pt,instrs,rs,killSet,env,don'tOverwrite) + end + + (* This function reloads a set of registers for an instruction *) + fun reloadAll(pt,instrs,env,[]) = (instrs, env) + | reloadAll(pt,instrs,env,r::rs) = + let val node = getnode r + val spillLoc = getLoc node + val (instrs, env) = reloadOneReg(pt,instrs,r,env,spillLoc) + in reloadAll(pt, instrs, env, rs) + end + + fun loop([], pt, env, newInstrs) = newInstrs + | loop(instr::rest, pt, env, newInstrs) = + let val spillRegs = getSpills pt + val reloadRegs = getReloads pt + in case (spillRegs, reloadRegs) of + ([], []) => + let val env' = + case env of + [] => [] (* An approximation here *) + | _ => let val (defs, uses) = insnDefUse instr + in if hasNonDedicated defs orelse + hasNonDedicated uses then [] + else env + end + (* should be handled better *) + in loop(rest, dec pt, env', instr::newInstrs) + end + | _ => + (* Eliminate duplicates from the spill/reload candidates *) + let val killRegs = getKills pt + val spillRegs = uniq spillRegs + val reloadRegs = uniq reloadRegs + + (* spill locations that we can't overwrite if we + * are spilling a parallel copy + *) + val don'tOverwrite = + if parallelCopies then spillLocsOf reloadRegs + else [] + + fun prEnv env = ( + print("Env="); + app (fn (r,v,_) => + print(concat[ + CBase.toString r, "=>", + CBase.toString v, " " + ])) env; + print "\n") + + val (instrs,env) = + spillAll(pt,[instr],spillRegs,killRegs, + env,don'tOverwrite) + + val _ = if debug then + (print("pt="^pt2s pt^"\n"); + case spillRegs of + [] => () + | _ => (print("Spilling "); + printRegs spillRegs; + print "\n"); + case reloadRegs of + [] => () + | _ => (print("Reloading "); + printRegs reloadRegs; + print "\n"); + print "Before:"; emit instr; + prEnv env + ) else () + + val (instrs, env) = + reloadAll(pt,instrs,env,reloadRegs) + + val _ = if debug then + (print "After:"; app emit instrs; + print "------------------\n") + else () + + fun concat([], l) = l + | concat(a::b, l) = concat(b, a::l) + in loop(rest, dec pt, env, concat(instrs, newInstrs)) + end + end + in loop(rev instrs, pt, [], []) + end + in spillRewrite + end +end + +end (* local *) diff --git a/MLRISC/ra/ra-spill.sig b/MLRISC/ra/ra-spill.sig new file mode 100644 index 0000000..2d19e00 --- /dev/null +++ b/MLRISC/ra/ra-spill.sig @@ -0,0 +1,118 @@ +(* + * This module manages the spill/reload process. + * + * -- Allen + *) +signature RA_SPILL = +sig + + structure I : INSTRUCTIONS + structure G : RA_GRAPH = RAGraph + structure C : CELLS + sharing I.C = C + + structure CB : CELLS_BASIS = CellsBasis + type copyInstr = + (CB.cell list * CB.cell list) * I.instruction -> I.instruction list + + (* + * Spill the value associated with reg into spillLoc. + * All definitions of instr should be renamed to a new temporary newReg. + *) + type spill = + {instr : I.instruction, (* instruction where spill is to occur *) + reg : CB.cell, (* register to spill *) + spillLoc : G.spillLoc, (* logical spill location *) + kill : bool, (* can we kill the current node? *) + annotations : Annotations.annotations ref (* annotations *) + } -> + {code : I.instruction list, (* instruction + spill code *) + proh : CB.cell list, (* prohibited from future spilling *) + newReg : CB.cell option (* the spilled value is available here *) + } + + (* Spill the register src into spillLoc. + * The value is originally from register reg. + *) + type spillSrc = + {src : CB.cell, (* register to spill from *) + reg : CB.cell, (* the register *) + spillLoc : G.spillLoc, (* logical spill location *) + annotations : Annotations.annotations ref (* annotations *) + } -> I.instruction list (* spill code *) + + (* + * Spill the temporary associated with a copy into spillLoc + *) + type spillCopyTmp = + {copy : I.instruction, (* copy to spill *) + reg : CB.cell, (* the register *) + spillLoc : G.spillLoc, (* logical spill location *) + annotations : Annotations.annotations ref (* annotations *) + } -> I.instruction (* spill code *) + + (* + * Reload the value associated with reg from spillLoc. + * All uses of instr should be renamed to a new temporary newReg. + *) + type reload = + {instr : I.instruction, (* instruction where spill is to occur *) + reg : CB.cell, (* register to spill *) + spillLoc : G.spillLoc, (* logical spill location *) + annotations : Annotations.annotations ref (* annotations *) + } -> + {code : I.instruction list, (* instr + reload code *) + proh : CB.cell list, (* prohibited from future spilling *) + newReg : CB.cell option (* the reloaded value is here *) + } + + (* + * Rename all uses fromSrc to toSrc + *) + type renameSrc = + {instr : I.instruction, (* instruction where spill is to occur *) + fromSrc : CB.cell, (* register to rename *) + toSrc : CB.cell (* register to rename to *) + } -> + {code : I.instruction list, (* renamed instr *) + proh : CB.cell list, (* prohibited from future spilling *) + newReg : CB.cell option (* the renamed value is here *) + } + + (* Reload the register dst from spillLoc. + * The value is originally from register reg. + *) + type reloadDst = + {dst : CB.cell, (* register to reload to *) + reg : CB.cell, (* the register *) + spillLoc : G.spillLoc, (* logical spill location *) + annotations : Annotations.annotations ref (* annotations *) + } -> I.instruction list (* reload code *) + + (* + * The following function rewrites an instruction and insert + * spill and reload code around it. The list of spill and reload + * registers may have duplicates. + *) + val spillRewrite : + { graph : G.interferenceGraph, + spill : spill, + spillSrc : spillSrc, + spillCopyTmp : spillCopyTmp, + reload : reload, + reloadDst : reloadDst, + renameSrc : renameSrc, + copyInstr : copyInstr, + cellkind : CB.cellkind, + spillSet : CB.cell list G.PPtHashTable.hash_table, + reloadSet : CB.cell list G.PPtHashTable.hash_table, + killSet : CB.cell list G.PPtHashTable.hash_table + } -> + { pt : G.programPoint, (* starting program pt *) + annotations : Annotations.annotations ref, (* annotations *) + instrs : I.instruction list (* instructions to spill *) + } -> + I.instruction list (* instruction sequence after rewriting *) + (* Note, instructions are in reverse order *) + +end diff --git a/MLRISC/ra/ra-spill.sml b/MLRISC/ra/ra-spill.sml new file mode 100644 index 0000000..d89a67a --- /dev/null +++ b/MLRISC/ra/ra-spill.sml @@ -0,0 +1,455 @@ +(* + * This module manages the spill/reload process. + * The reason this is detached from the main module is that + * I can't understand the old code. + * + * Okay, now I understand the code. + * + * The new code does things slightly differently. + * Here, we are given an instruction and a list of registers to spill + * and reload. We rewrite the instruction until all instances of these + * registers are rewritten. + * + * (12/13/99) Some major caveats when spill coalescing/coloring is used: + * When parallel copies are generated and spill coalescing/coloring is used, + * two special cases have to be identified: + * + * Case 1 (spillLoc dst = spillLoc src) + * Suppose we have a parallel copy + * (u,v) <- (x,y) + * where u has to be spilled and y has to reloaded. When both + * u and y are mapped to location M. The following wrong code may + * be generated: + * M <- x (spill u) + * v <- M (reload y) + * This is incorrect. Instead, we generate a dummy copy and + * delay the spill after the reload, like this: + * + * tmp <- x (save value of u) + * v <- M (reload y) + * M <- tmp (spill u) + * Case 2 (spillLoc copyTmp = spillLoc src) + * Another case that can cause problems is when the spill location of + * the copy temporary is the same as that of one of the sources: + * + * (a, b, v) <- (b, a, u) where spillLoc(u) = spillLoc(tmp) = v + * + * The incorrect code is + * (a, b) <- (b, a) + * v <- M + * But then the shuffle code for the copy can clobber the location M. + * + * tmp <- M + * (a, b) <- (b, a) + * v <- tmp + * + * (Note that spillLoc copyTmp = spillLoc src can never happen) + * + * -- Allen + *) + +local + + val debug = false + +in + +functor RASpill + (structure InsnProps : INSN_PROPERTIES + structure Asm : INSTRUCTION_EMITTER + where I = InsnProps.I + ) : RA_SPILL = +struct + + structure I = InsnProps.I + structure P = InsnProps + structure C = I.C + structure Core = RACore + structure G = Core.G + structure CB = CellsBasis + + fun error msg = MLRiscErrorMsg.error("RASpill",msg) + + val keep_dead_copies = + MLRiscControl.mkFlag + ("ra-preserve-dead-copies", + "Dead copies are not removed when spilling") + + fun dec1 n = Word.toIntX(Word.fromInt n - 0w1) + fun dec{block,insn} = {block=block,insn=dec1 insn} + + structure T = RASpillTypes(I) + open T + + fun uniq s = CB.SortedCells.return(CB.SortedCells.uniq s) + val i2s = Int.toString + fun pt2s{block,insn} = "b"^i2s block^":"^i2s insn + + val Asm.S.STREAM{emit, ...} = Asm.makeStream[] + + (* val spilledCopyTmps = MLRiscControl.getCounter "ra-spilled-copy-temps" *) + + (* + * The following function performs spilling. + *) + fun spillRewrite + {graph=G as G.GRAPH{showReg, spilledRegs, nodes, mode, ...}, + spill : spill, + spillCopyTmp : spillCopyTmp, + spillSrc : spillSrc, + renameSrc : renameSrc, + reload : reload, + reloadDst : reloadDst, + copyInstr : copyInstr, + cellkind, + spillSet, reloadSet, killSet + } = + let + (* Must do this to make sure the interference graph is + * reflected to the cells + *) + val _ = Core.updateCellAliases G + + val getSpillLoc = Core.spillLoc G + fun spillLocOf(CB.CELL{id, ...}) = getSpillLoc id + val spillLocsOf = map spillLocOf + val getnode = IntHashTable.lookup nodes + val getnode = fn CB.CELL{id, ...} => getnode id + + val insnDefUse = P.defUse cellkind + + (* Merge prohibited registers *) + val enterSpill = IntHashTable.insert spilledRegs + val addProh = app (fn c => enterSpill(CellsBasis.registerId c,true)) + + val getSpills = G.PPtHashTable.find spillSet + val getSpills = fn p => case getSpills p of SOME s => s | NONE => [] + val getReloads = G.PPtHashTable.find reloadSet + val getReloads = fn p => case getReloads p of SOME s => s | NONE => [] + val getKills = G.PPtHashTable.find killSet + val getKills = fn p => case getKills p of SOME s => s | NONE => [] + + fun getLoc(G.NODE{color=ref(G.ALIASED n), ...}) = getLoc n + | getLoc(G.NODE{color=ref(G.MEMREG(_, m)), ...}) = G.MEM_REG m + | getLoc(G.NODE{color=ref(G.SPILL_LOC s), ...}) = G.FRAME s + | getLoc(G.NODE{color=ref(G.SPILLED), number, ...}) = G.FRAME number + | getLoc(G.NODE{color=ref(G.PSEUDO), number, ...}) = G.FRAME number + | getLoc _ = error "getLoc" + + fun printRegs regs = + app (fn r => print(CellsBasis.toString r^" ["^ + Core.spillLocToString G (CellsBasis.cellId r)^"] ")) regs + + val parallelCopies = Word.andb(Core.HAS_PARALLEL_COPIES, mode) <> 0w0 + + fun chase(CB.CELL{col=ref(CB.ALIASED c), ...}) = chase c + | chase c = c + + fun cellId(CB.CELL{id, ...}) = id + + fun sameCell(CB.CELL{id=x,...}, CB.CELL{id=y, ...}) = x=y + + fun same(x,regToSpill) = sameCell(chase x,regToSpill) + + (* + * Rewrite the instruction given that a bunch of registers have + * to be spilled and reloaded. + *) + fun spillRewrite{pt, instrs, annotations} = + let + (* + * Insert reloading code for an instruction. + * Note: reload code goes after the instruction, if any. + *) + fun reloadInstr(instr,regToSpill,spillLoc) = + let val {code, proh, newReg} = + reload{instr=instr,reg=regToSpill, + spillLoc=spillLoc,annotations=annotations} + in addProh(proh); + code + end + + (* + * Renaming the source for an instruction. + *) + fun renameInstr(instr,regToSpill,toSrc) = + let val {code, proh, newReg} = + renameSrc{instr=instr, fromSrc=regToSpill,toSrc=toSrc} + in addProh(proh); + code + end + + (* + * Remove uses of regToSpill from a set of parallel copies. + * If there are multiple uses, then return multiple moves. + *) + fun extractUses(regToSpill, rds, rss) = + let fun loop(rd::rds, rs::rss, newRds, rds', rss') = + if same(rs,regToSpill) then + loop(rds, rss, rd::newRds, rds', rss') + else + loop(rds, rss, newRds, rd::rds', rs::rss') + | loop(_, _, newRds, rds', rss') = (newRds, rds', rss') + in loop(rds, rss, [], [], []) end + + (* + * Insert reload code for the sources of a copy. + * Transformation: + * d1..dn <- s1..sn + * => + * d1..dn/r <- s1...sn/r. + * reload code + * reload copies + * + *) + fun reloadCopySrc(instr,regToSpill,spillLoc) = + let val (dst, src) = P.moveDstSrc instr + val (rds, copyDst, copySrc) = extractUses(regToSpill, dst, src) + fun processMoves([], reloadCode) = reloadCode + | processMoves(rd::rds, reloadCode) = + let val code = + reloadDst{spillLoc=spillLoc,reg=regToSpill, + dst=rd,annotations=annotations} + in processMoves(rds, code@reloadCode) + end + val reloadCode = processMoves(rds, []) + in case copyDst of + [] => reloadCode + | _ => copyInstr((copyDst, copySrc), instr) @ reloadCode + end + + (* + * Insert reload code + *) + fun reload(instr,regToSpill,spillLoc) = + if P.moveInstr instr then + reloadCopySrc(instr,regToSpill,spillLoc) + else + reloadInstr(instr,regToSpill,spillLoc) + + (* + * Check whether the id is in a list + *) + fun containsId(id,[]) = false + | containsId(id:CB.cell_id,r::rs) = r = id orelse containsId(id,rs) + fun spillConflict(G.FRAME loc, rs) = containsId(~loc, rs) + | spillConflict(G.MEM_REG(CB.CELL{id, ...}), rs) = + containsId(id, rs) + + fun contains(r',[]) = false + | contains(r',r::rs) = sameCell(r',r) orelse contains(r',rs) + + (* + * Insert spill code for an instruction. + * Spill code occur after the instruction. + * If the value in regToSpill is never used, the client also + * has the opportunity to remove the instruction. + *) + fun spillInstr(instr,regToSpill,spillLoc,kill) = + let val {code, proh, newReg} = + spill{instr=instr, + kill=kill, spillLoc=spillLoc, + reg=regToSpill, annotations=annotations} + in addProh(proh); + code + end + + (* Remove the definition regToSpill <- from + * parallel copies rds <- rss. + * Note, there is a guarantee that regToSpill is not aliased + * to another register in the rds set. + *) + fun extractDef(regToSpill,rds,rss,kill) = + let fun loop(rd::rds, rs::rss, rds', rss') = + if spillLocOf rd = spillLocOf rs then + (rs, rds@rds', rss@rss', true) + else if same(rd, regToSpill) then + (rs, rds@rds', rss@rss', kill) + else loop(rds, rss, rd::rds', rs::rss') + | loop _ = + (print("rds="); + app (fn r => print(CellsBasis.toString r^":"^ + i2s(spillLocOf r)^" ")) rds; + print("\nrss="); + app (fn r => print(CellsBasis.toString r^":"^ + i2s(spillLocOf r)^" ")) rss; + print "\n"; + error("extractDef: "^CellsBasis.toString regToSpill)) + in loop(rds, rss, [], []) end + + (* + * Insert spill code for a destination of a copy + * suppose d = r and we have a copy d <- s in + * d1...dn <- s1...sn + * + * d1...dn <- s1...sn + * => + * spill s to spillLoc + * d1...dn/d <- s1...sn/s + * + * However, if the spill code may ovewrite the spill location + * shared by other uses, we do the following less + * efficient scheme: + * + * /* save the result of d */ + * d1...dn, tmp <- s1...sn, s + * spill tmp to spillLoc /* spill d */ + * + *) + fun spillCopyDst(instr,regToSpill,spillLoc,kill,don'tOverwrite) = + let val (dst, src) = P.moveDstSrc instr + val (mvSrc,copyDst,copySrc,kill) = + extractDef(regToSpill,dst,src,kill) + val copy = case copyDst of + [] => [] + | _ => copyInstr((copyDst,copySrc),instr) + in + if kill andalso not(!keep_dead_copies) + then (* kill the move *) + ((* print ("Copy "^Int.toString(hd mvDst)^" <- "^ + Int.toString(hd mvSrc)^" removed\n"); *) + copy + ) + else (* normal spill *) + if spillConflict(spillLoc, don'tOverwrite) then + let (* cycle found *) + (*val _ = print("Register r"^Int.toString regToSpill^ + " overwrites ["^Int.toString spillLoc^"]\n")*) + val tmp = C.newVar regToSpill (* new temporary *) + val copy = copyInstr((tmp::copyDst, mvSrc::copySrc), + instr) + val spillCode = spillSrc{src=tmp,reg=regToSpill, + spillLoc=spillLoc, + annotations=annotations} + in copy @ spillCode + end + else + let (* spill the move instruction *) + val spillCode = spillSrc{src=mvSrc,reg=regToSpill, + spillLoc=spillLoc, + annotations=annotations} + in spillCode @ copy + end + end + + (* + * Insert spill code for a copy + *) + fun spillCopy(instr,regToSpill,spillLoc,kill,don'tOverwrite) = + case P.moveTmpR instr of + NONE => spillCopyDst(instr,regToSpill,spillLoc,kill, + don'tOverwrite) + | SOME tmp => + if same(tmp, regToSpill) + then ((* spilledCopyTmps := !spilledCopyTmps + 1; *) + [spillCopyTmp{copy=instr, reg=regToSpill, + spillLoc=spillLoc, + annotations=annotations}]) + else spillCopyDst(instr,regToSpill,spillLoc,kill, + don'tOverwrite) + + (* + * Insert spill code + *) + fun spill(instr,regToSpill,spillLoc,killSet,don'tOverwrite) = + let val kill = contains(regToSpill,killSet) + in if P.moveInstr instr then + spillCopy(instr,regToSpill,spillLoc,kill,don'tOverwrite) + else + spillInstr(instr,regToSpill,spillLoc,kill) + end + + fun contains([],reg) = false + | contains(r::rs,reg) = same(r,reg) orelse contains(rs,reg) + fun hasDef(i,reg) = contains(#1(insnDefUse i),reg) + fun hasUse(i,reg) = contains(#2(insnDefUse i),reg) + + fun spillOneReg([],_,_,_,_) = [] + | spillOneReg(i::instrs,r,spillLoc,killSet,don'tOverwrite) = + if hasDef(i,r) + then + spillOneReg(spill(i,r,spillLoc,killSet,don'tOverwrite)@instrs, + r,spillLoc,killSet,don'tOverwrite) + else i::spillOneReg(instrs,r,spillLoc,killSet,don'tOverwrite) + + fun reloadOneReg([],_,_) = [] + | reloadOneReg(i::instrs,r,spillLoc) = + if hasUse(i,r) + then reloadOneReg(reload(i,r,spillLoc)@instrs, + r,spillLoc) + else i::reloadOneReg(instrs,r,spillLoc) + + (* This function spills a set of registers for an instruction *) + fun spillAll(instrs,[],killSet,don'tOverwrite) = instrs + | spillAll(instrs,r::rs,killSet,don'tOverwrite) = + let val node = getnode r + val spillLoc = getLoc node + in spillAll( + spillOneReg(instrs,r,spillLoc,killSet,don'tOverwrite), + rs,killSet,don'tOverwrite) + end + + (* This function reloads a set of registers for an instruction *) + fun reloadAll(instrs,[]) = instrs + | reloadAll(instrs,r::rs) = + let val node = getnode r + val spillLoc = getLoc node + in reloadAll(reloadOneReg(instrs,r,spillLoc),rs) + end + + fun loop([], pt, newInstrs) = newInstrs + | loop(instr::rest, pt, newInstrs) = + let val spillRegs = getSpills pt + val reloadRegs = getReloads pt + in case (spillRegs, reloadRegs) of + ([], []) => loop(rest, dec pt, instr::newInstrs) + | _ => + (* Eliminate duplicates from the spill/reload candidates *) + let val killRegs = getKills pt + val spillRegs = uniq spillRegs + val reloadRegs = uniq reloadRegs + + (* spill locations that we can't overwrite if we + * are spilling a parallel copy + *) + val don'tOverwrite = + if parallelCopies then spillLocsOf reloadRegs + else [] + + val instrs = spillAll([instr],spillRegs,killRegs, + don'tOverwrite) + + val _ = if debug then + (print("pt="^pt2s pt^"\n"); + case spillRegs of + [] => () + | _ => (print("Spilling "); + printRegs spillRegs; + print "\n"); + case reloadRegs of + [] => () + | _ => (print("Reloading "); + printRegs reloadRegs; + print "\n"); + print "Before:"; emit instr + ) else () + + val instrs = reloadAll(instrs,reloadRegs) + + val _ = if debug then + (print "After:"; app emit instrs; + print "------------------\n") + else () + + fun concat([], l) = l + | concat(a::b, l) = concat(b, a::l) + in loop(rest, dec pt, concat(instrs, newInstrs)) + end + end + in loop(rev instrs, pt, []) + end + in spillRewrite + end +end + +end (* local *) diff --git a/MLRISC/ra/ra-spillheur.sig b/MLRISC/ra/ra-spillheur.sig new file mode 100644 index 0000000..50f0224 --- /dev/null +++ b/MLRISC/ra/ra-spillheur.sig @@ -0,0 +1,23 @@ +(* + * Spill heuristics should match the following signature. + *) +signature RA_SPILL_HEURISTICS = +sig + structure G : RA_GRAPH = RAGraph + + exception NoCandidate + + val mode : G.mode + + val init : unit -> unit + + val chooseSpillNode : + { graph : G.interferenceGraph, + spillWkl : G.node list, + hasBeenSpilled : int -> bool + } -> + { spillWkl : G.node list, + node : G.node option, + cost : real + } +end diff --git a/MLRISC/ra/ra.sig b/MLRISC/ra/ra.sig new file mode 100644 index 0000000..7fdacac --- /dev/null +++ b/MLRISC/ra/ra.sig @@ -0,0 +1,66 @@ +(* + * The interface to the new register allocator. + * + * -- Allen + *) +signature RA = +sig + + structure I : INSTRUCTIONS + structure C : CELLS + structure F : RA_FLOWGRAPH + sharing F.I = I + sharing I.C = C + structure CB : CELLS_BASIS = CellsBasis + + type getreg = { pref : CB.cell_id list, + stamp : int, + proh : int Array.array + } -> CB.cell_id + + type mode = word + + datatype spillLoc = datatype RAGraph.spillLoc + + (* + * Optimizations/options: + * Or them together + *) + val NO_OPTIMIZATION : mode + val DEAD_COPY_ELIM : mode + val BIASED_SELECTION : mode + val SPILL_COLORING : mode + val SPILL_COALESCING : mode + val SPILL_PROPAGATION : mode + val HAS_PARALLEL_COPIES : mode + (* The above MUST be used when spill coloring is used and + * you have parallel copies in the program. Otherwise, phathom + * problems involving copy temporaries may appear. + *) + + (* + * Perform register allocation. + * + * spillProh is a list of register ranges (inclusive) that cannot be spilled. + * + *) + type raClient = + { cellkind : CB.cellkind, (* kind of register *) + spillProh : CB.cell list, (* don't spill these *) + memRegs : CB.cell list, (* memory registers *) + K : int, (* number of colors *) + dedicated : int -> bool, (* dedicated registers *) + getreg : getreg, (* how to find a color *) + copyInstr : F.Spill.copyInstr, (* how to make a copy *) + spill : F.Spill.spill, (* spill callback *) + spillSrc : F.Spill.spillSrc, (* spill callback *) + spillCopyTmp : F.Spill.spillCopyTmp, (* spill callback *) + reload : F.Spill.reload, (* reload callback *) + reloadDst : F.Spill.reloadDst, (* reload callback *) + renameSrc : F.Spill.renameSrc, (* rename callback *) + mode : mode (* mode *) + } + + val ra : raClient list -> F.flowgraph -> F.flowgraph + +end diff --git a/MLRISC/ra/ra.sml b/MLRISC/ra/ra.sml new file mode 100644 index 0000000..388f0b2 --- /dev/null +++ b/MLRISC/ra/ra.sml @@ -0,0 +1,399 @@ +(* + * This is the new register allocator based on + * the 'iterated register coalescing' scheme described + * in POPL'96, and TOPLAS v18 #3, pp 325-353. + * + * Now with numerous extensions: + * + * 0. Dead copy elimination (optional) + * 1. Priority based coalescing + * 2. Priority based freezing + * 3. Priority based spilling + * 4. Biased selection (optional) + * 5. Spill Coalescing (optional) + * 6. Spill Propagation (optional) + * 7. Spill Coloring (optional) + * + * For details, please see the paper from + * + * http://cm.bell-labs.com/cm/cs/what/smlnj/compiler-notes/index.html + * + * The basic structure of this register allocator is as follows: + * 1. RAGraph. This module enscapsulates the interference graph + * datatype (adjacency list + interference graph + node table) + * and contains nothing architecture specific. + * 2. RACore. This module implements the main part of the iterated + * coalescing algorithm, with frequency enhancements. + * 3. RA_FLOWGRAPH. This register allocator is parameterized + * with respect to this signature. This basically abstracts out + * the representation of the program flowgraph, and provide + * a few services to the main allocator, such as building the + * interference graph, rewriting the flowgraph after spilling, + * and rebuilding the interference graph after spilling. + * This module is responsible for caching any information necessary + * to make spilling fast. + * 4. This functor. This functor drives the entire process. + * + * -- Allen Leung (leunga@cs.nyu.edu) + *) + +functor RegisterAllocator + (SpillHeuristics : RA_SPILL_HEURISTICS) + (Flowgraph : RA_FLOWGRAPH where C = CellsBasis) : RA = +struct + + structure F = Flowgraph + structure I = F.I + structure Core = RACore + structure C = I.C + structure G = Core.G + structure CB = CellsBasis + + type getreg = { pref : CB.cell_id list, + stamp : int, + proh : int Array.array + } -> CB.cell_id + + type mode = word + + datatype spillLoc = datatype G.spillLoc + + type raClient = + { cellkind : CB.cellkind, (* kind of register *) + spillProh : CB.cell list, (* don't spill these *) + memRegs : CB.cell list, (* memory registers *) + K : int, (* number of colors *) + dedicated : int -> bool, (* dedicated registers *) + getreg : getreg, (* how to find a color *) + copyInstr : F.Spill.copyInstr, (* how to make a copy *) + spill : F.Spill.spill, (* spill callback *) + spillSrc : F.Spill.spillSrc, (* spill callback *) + spillCopyTmp : F.Spill.spillCopyTmp, (* spill callback *) + reload : F.Spill.reload, (* reload callback *) + reloadDst : F.Spill.reloadDst, (* reload callback *) + renameSrc : F.Spill.renameSrc, (* rename callback *) + mode : mode (* mode *) + } + + val debug = false + + val NO_OPTIMIZATION = 0wx0 + val DEAD_COPY_ELIM = Core.DEAD_COPY_ELIM + val BIASED_SELECTION = Core.BIASED_SELECTION + val HAS_PARALLEL_COPIES = Core.HAS_PARALLEL_COPIES + val SPILL_COALESCING = 0wx100 + val SPILL_COLORING = 0wx200 + val SPILL_PROPAGATION = 0wx400 + + fun isOn(flag, mask) = Word.andb(flag,mask) <> 0w0 + + open G + structure C = I.C + + fun error msg = MLRiscErrorMsg.error("RegisterAllocator",msg) + + (* + * Debugging flags + counters + *) + val cfg_before_ra = MLRiscControl.mkFlag ("dump-cfg-before-ra", + "whether CFG is shown before RA") + val cfg_after_ra = MLRiscControl.mkFlag ("dump-cfg-after-ra", + "whether CFG is shown after RA") + val cfg_after_spill = MLRiscControl.mkFlag ("dump-cfg-after-spilling", + "whether CFG is shown after spill phase") + val cfg_before_ras = MLRiscControl.mkFlag ("dump-cfg-before-all-ra", + "whether CFG is shown before all RA") + val cfg_after_ras = MLRiscControl.mkFlag ("dump-cfg-after-all-ra", + "whether CFG is shown after all RA") + val dump_graph = MLRiscControl.mkFlag ("dump-interference-graph", + "whether interference graph is shown") + val debug_spill = MLRiscControl.mkFlag ("ra-debug-spilling", + "debug mode for spill phase") + val ra_count = MLRiscControl.mkCounter ("ra-count", "RA counter") + val rebuild_count = MLRiscControl.mkCounter ("ra-rebuild", "RA build counter") + +(* + val count_dead = MLRiscControl.getFlag "ra-count-dead-code" + val dead = MLRiscControl.getCounter "ra-dead-code" + *) + val debug_stream = MLRiscControl.debug_stream + + (* + * Optimization flags + *) +(* + val rematerialization = MLRiscControl.getFlag "ra-rematerialization" + *) + + exception NodeTable + + val i2s = Int.toString + + (* This array is used for getreg. + * We allocate it once. + *) + val proh = Array.array(C.firstPseudo, ~1) + + (* + * Register allocator. + * spillProh is a list of registers that are not candidates for spills. + *) + fun ra params flowgraph = + let + (* Flowgraph methods *) + val {build=buildMethod, spill=spillMethod, ...} = F.services flowgraph + + (* global spill location counter *) + (* Note: spillLoc cannot be zero as negative locations are + * returned to the client to indicate spill locations. + *) + val spillLoc=ref 1 + + (* How to dump the flowgraph *) + fun dumpFlowgraph (flag, title) = + if !flag then F.dumpFlowgraph(title, flowgraph,!debug_stream) else () + + (* Main function *) + fun regalloc{getreg, K, dedicated, copyInstr, + spill, spillSrc, spillCopyTmp, renameSrc, + reload, reloadDst, spillProh, cellkind, mode, + memRegs} = + let val numCell = C.numCell cellkind () + in if numCell = 0 + then () + else + let (* the nodes table *) + val nodes = IntHashTable.mkTable(numCell,NodeTable) + val mode = if isOn(HAS_PARALLEL_COPIES, mode) then + Word.orb(Core.SAVE_COPY_TEMPS, mode) + else mode + (* create an empty interference graph *) + val G = G.newGraph{nodes=nodes, + K=K, + dedicated=dedicated, + numRegs=numCell, + maxRegs=C.maxCell, + showReg=CellsBasis.toString, + getreg=getreg, + getpair=fn _ => error "getpair", + firstPseudoR=C.firstPseudo, + proh=proh, + mode=Word.orb(Flowgraph.mode, + Word.orb(mode,SpillHeuristics.mode)), + spillLoc=spillLoc, + memRegs=memRegs + } + val G.GRAPH{spilledRegs, pseudoCount, spillFlag, ...} = G + + val hasBeenSpilled = IntHashTable.find spilledRegs + val hasBeenSpilled = + fn r => case hasBeenSpilled r of SOME _ => true | NONE => false + + fun logGraph(header,G) = + if !dump_graph then + (TextIO.output(!debug_stream, + "-------------"^header^"-----------\n"); + Core.dumpGraph G (!debug_stream) + ) + else () + + (* + * Build the interference graph + *) + fun buildGraph(G) = + let val _ = if debug then print "build..." else () + val moves = buildMethod(G,cellkind) + val worklists = + (Core.initWorkLists G) {moves=moves} + in logGraph("build",G); + if debug then + let val G.GRAPH{bitMatrix=ref(G.BM.BM{elems, ...}), ...} = G + in print ("done: nodes="^i2s(IntHashTable.numItems nodes)^ + " edges="^i2s(!elems)^ + " moves="^i2s(length moves)^ + "\n") + end else (); + worklists + end + + (* + * Potential spill phase + *) + fun chooseVictim{spillWkl} = + let fun dumpSpillCandidates(spillWkl) = + (print "Spill candidates:\n"; + app (fn n => print(Core.show G n^" ")) spillWkl; + print "\n" + ) + (* Initialize if it is the first time we spill *) + val _ = if !spillFlag then () else SpillHeuristics.init() + (* Choose a node *) + val {node,cost,spillWkl} = + SpillHeuristics.chooseSpillNode + {graph=G, hasBeenSpilled=hasBeenSpilled, + spillWkl=spillWkl} + handle SpillHeuristics.NoCandidate => + (Core.dumpGraph G (!debug_stream); + dumpSpillCandidates(spillWkl); + error ("chooseVictim") + ) + in if !debug_spill then + (case node of + NONE => () + | SOME(best as NODE{defs,uses,...}) => + print("Spilling node "^Core.show G best^ + " cost="^Real.toString cost^ + " defs="^i2s(length(!defs))^ + " uses="^i2s(length(!uses))^"\n" + ) + ) else (); + {node=node,cost=cost,spillWkl=spillWkl} + end + + (* + * Mark spill nodes + *) + fun markSpillNodes nodesToSpill = + let val marker = SPILLED + fun loop [] = () + | loop(NODE{color, ...}::ns) = (color := marker; loop ns) + in loop nodesToSpill end + + (* Mark nodes that are immediately aliased to mem regs; + * These are nodes that need also to be spilled + *) + fun markMemRegs [] = () + | markMemRegs(NODE{number=r, color as ref(ALIASED + (NODE{color=ref(col as MEMREG _), ...})), ...}::ns) = + (color := col; + markMemRegs ns) + | markMemRegs(_::ns) = markMemRegs ns + + (* + * Actual spill phase. + * Insert spill node and incrementally + * update the interference graph. + *) + fun actualSpills{spills} = + let val _ = if debug then print "spill..." else (); + val _ = if isOn(mode, + SPILL_COALESCING+ + SPILL_PROPAGATION+ + SPILL_COLORING) then + markSpillNodes spills + else () + val _ = if isOn(mode,SPILL_PROPAGATION+SPILL_COALESCING) then + Core.initMemMoves G + else () + val _ = logGraph("actual spill",G); + val {simplifyWkl,freezeWkl,moveWkl,spillWkl} = + Core.initWorkLists G + {moves=spillMethod{graph=G, cellkind=cellkind, + spill=spill, spillSrc=spillSrc, + spillCopyTmp=spillCopyTmp, + renameSrc=renameSrc, + reload=reload, reloadDst=reloadDst, + copyInstr=copyInstr, nodes=spills + } + } + val _ = dumpFlowgraph(cfg_after_spill,"after spilling") + in logGraph("rebuild",G); + if debug then print "done\n" else (); + rebuild_count := !rebuild_count + 1; + (simplifyWkl, moveWkl, freezeWkl, spillWkl, []) + end + + (* + * Main loop of the algorithm + *) + fun main(G) = + let + + (* Main loop *) + fun loop(simplifyWkl,moveWkl,freezeWkl,spillWkl,stack) = + let val iteratedCoal = Core.iteratedCoalescing G + val potentialSpill = Core.potentialSpillNode G + (* simplify/coalesce/freeze/potential spill phases + * simplifyWkl -- non-move related nodes with low degree + * moveWkl -- moves to be considered for coalescing + * freezeWkl -- move related nodes (with low degree) + * spillWkl -- potential spill nodes + * stack -- simplified nodes + *) + fun iterate(simplifyWkl,moveWkl,freezeWkl,spillWkl,stack) = + let (* perform iterated coalescing *) + val {stack} = iteratedCoal{simplifyWkl=simplifyWkl, + moveWkl=moveWkl, + freezeWkl=freezeWkl, + stack=stack} + in case spillWkl of + [] => stack (* nothing to spill *) + | _ => + if !pseudoCount = 0 (* all nodes simplified *) + then stack + else + let val {node,cost,spillWkl} = + chooseVictim{spillWkl=spillWkl} + in case node of + SOME node => (* spill node and continue *) + let val _ = if debug then print "-" else () + val {moveWkl,freezeWkl,stack} = + potentialSpill{node=node, + cost=cost, + stack=stack} + in iterate([],moveWkl,freezeWkl,spillWkl,stack) + end + | NONE => stack (* nothing to spill *) + end + end + + val {spills} = + if K = 0 then + {spills=spillWkl} + else + let (* simplify the nodes *) + val stack = iterate + (simplifyWkl,moveWkl,freezeWkl,spillWkl,stack) + (* color the nodes *) + in (Core.select G) {stack=stack} + end + in (* check for actual spills *) + case spills of + [] => () + | spills => loop(actualSpills{spills=spills}) + end + + val {simplifyWkl, moveWkl, freezeWkl, spillWkl} = buildGraph G + + in loop(simplifyWkl, moveWkl, freezeWkl, spillWkl, []) + end + + fun initSpillProh(cells) = + let val markAsSpilled = IntHashTable.insert spilledRegs + fun mark r = markAsSpilled(CellsBasis.registerId r,true) + in app mark cells end + + in dumpFlowgraph(cfg_before_ra,"before register allocation"); + initSpillProh spillProh; + main(G); (* main loop *) + (* update the colors for all cells *) + logGraph("done",G); + Core.updateCellColors G; + Core.markDeadCopiesAsSpilled G; + ra_count := !ra_count + 1; + dumpFlowgraph(cfg_after_ra,"after register allocation"); + (* Clean up spilling *) + SpillHeuristics.init() + end + end + + fun regallocs [] = () + | regallocs(p::ps) = (regalloc p; regallocs ps) + + in dumpFlowgraph(cfg_before_ras,"before register allocation"); + regallocs params; + dumpFlowgraph(cfg_after_ras,"after register allocation"); + flowgraph + end + +end diff --git a/MLRISC/ra/raBitmatrix.sml b/MLRISC/ra/raBitmatrix.sml new file mode 100644 index 0000000..becc315 --- /dev/null +++ b/MLRISC/ra/raBitmatrix.sml @@ -0,0 +1,211 @@ +(* + * Bit Matrix routines + *) +structure RaBitmatrix : RA_BITMATRIX = struct + structure W = Word + structure A = Array + structure UA = Unsafe.Array + + datatype bitMatrix = + BM of {table:hashTable, + elems:int ref, + edges:int} + and hashTable = + SMALL of word list Array.array ref * word + | LARGE of bucket Array.array ref * word + (* | BITMATRIX of Word8Array.array *) + + and bucket = NIL | B of int * int * bucket + + exception Nodes + fun hashFun(i, j, shift, size) = + let val i = W.fromInt i + val j = W.fromInt j + val h = W.+(W.<<(i, shift), W.+(i, j)) + val mask = W.-(W.fromInt size, 0w1) + in W.toIntX(W.andb(h, mask)) end + + val empty = BM{table=SMALL(ref(A.array(2, [])), 0w0), elems=ref 0, edges=0} + + (* + val indices = A.array(1024,0) + + fun init(i,j) = + if i < 1024 then + (A.update(indices, i, j); init(i+1, i+j+1)) + else () + + val _ = init(0, 0) + *) + fun size (BM{elems, ...}) = !elems + + fun edges(BM{table=SMALL(ref table, _), ...}) = A.length table + | edges(BM{table=LARGE(ref table, _), ...}) = A.length table + (*| edges(BM{table=BITMATRIX _, edges, ...}) = edges *) + + fun member(BM{table=SMALL(table, shift), ...}) = + (fn (i, j) => + let val (i,j) = if i < j then (i, j) else (j, i) + val k = W.+(W.<<(W.fromInt i, 0w15), W.fromInt j) + fun find [] = false + | find(k'::b) = k = k' orelse find b + val tab = !table + in find(UA.sub(tab, hashFun(i, j, shift, A.length tab))) end + ) + | member(BM{table=LARGE(table, shift), ...}) = + (fn (i, j) => + let val (i,j) = if i < j then (i, j) else (j, i) + fun find NIL = false + | find(B(i',j',b)) = i = i' andalso j = j' orelse find b + val tab = !table + in find(UA.sub(tab, hashFun(i, j, shift, A.length tab))) end + ) + (* + | member(BM{table=BITMATRIX table, ...}) = + (fn (i, j) => + let val (i,j) = if i > j then (i, j) else (j, i) + val bit = W.fromInt(UA.sub(indices, i) + j) + val index = W.toIntX(W.>>(bit, 0w3)) + val mask = W.<<(0w1, W.andb(bit, 0w7)) + in W.andb(W.fromInt(W8.toInt(UW8A.sub(table, index))), mask) <> 0w0 + end + ) + *) + + fun add (BM{table=SMALL(table, shift), elems, ...}) = + let fun insert(i, j) = + let val (i,j) = if i < j then (i, j) else (j, i) + val tab = !table + val len = A.length tab + in if !elems < len then + let val index = hashFun(i, j, shift, len) + val k = W.+(W.<<(W.fromInt i, 0w15), W.fromInt j) + fun find [] = false + | find(k'::b) = k = k' orelse find b + val b = UA.sub(tab, index) + in if find b then false + else (UA.update(tab, index, k::b); + elems := !elems + 1; true) + end + else (* grow table *) + let val oldTable = tab + val oldSize = A.length oldTable + val newSize = oldSize + oldSize + val newTable = A.array(newSize,[]) + fun enter n = + if n < oldSize then + let fun loop([],a,b) = + (UA.update(newTable, n, a); + UA.update(newTable, n + oldSize, b); + enter(n+1)) + | loop(k::l,a,b) = + let val i = W.toIntX(W.>>(k, 0w15)) + val j = W.toIntX(W.-(k,W.<<(W.fromInt i, 0w15))) + in if hashFun(i, j, shift, newSize) = n + then loop(l, k::a, b) + else loop(l, a, k::b) + end + in loop(UA.sub(oldTable, n), [], []) end + else () + in table := newTable; + enter 0; + insert(i, j) + end + end + in insert + end + | add (BM{table=LARGE(table, shift), elems, ...}) = + let fun insert(i, j) = + let val (i,j) = if i < j then (i, j) else (j, i) + val tab = !table + val len = A.length tab + in if !elems < len then + let val index = hashFun(i, j, shift, len) + fun find NIL = false + | find(B(i',j',b)) = i = i' andalso j = j' orelse find b + val b = UA.sub(tab, index) + in if find b then false + else (UA.update(tab, index, B(i,j,b)); + elems := !elems + 1; true) + end + else (* grow table *) + let val oldTable = tab + val oldSize = A.length oldTable + val newSize = oldSize + oldSize + val newTable = A.array(newSize,NIL) + fun enter n = + if n < oldSize then + let fun loop(NIL,a,b) = + (UA.update(newTable, n, a); + UA.update(newTable, n + oldSize, b); + enter(n+1)) + | loop(B(i,j,l),a,b) = + if hashFun(i, j, shift, newSize) = n + then loop(l, B(i,j,a), b) + else loop(l, a, B(i,j,b)) + in loop(UA.sub(oldTable, n), NIL, NIL) end + else () + in table := newTable; + enter 0; + insert(i, j) + end + end + in insert + end + (* + | add(BM{table=BITMATRIX table, ...}) = + (fn (i, j) => + let val (i,j) = if i > j then (i, j) else (j, i) + val bit = W.fromInt(UA.sub(indices, i) + j) + val index = W.toIntX(W.>>(bit, 0w3)) + val mask = W.<<(0w1, W.andb(bit, 0w7)) + val value = W.fromInt(W8.toInt(UW8A.sub(table, index))) + in if W.andb(value, mask) <> 0w0 then false + else (UW8A.update(table, index, + W8.fromInt(W.toIntX(W.orb(value, mask)))); true) + end + ) + *) + + fun delete (BM{table=SMALL(table, shift), elems, ...}) = + (fn (i,j) => + let val k = W.+(W.<<(W.fromInt i, 0w15), W.fromInt j) + fun find [] = [] + | find(k'::b) = + if k = k' then (elems := !elems - 1; b) else k'::find b + val tab = !table + val index = hashFun(i, j, shift, A.length tab) + val n = !elems + in UA.update(tab, index, find(UA.sub(tab, index))); + !elems <> n + end + ) + | delete (BM{table=LARGE(table, shift), elems, ...}) = + (fn (i,j) => + let fun find NIL = NIL + | find(B(i', j', b)) = + if i = i' andalso j = j' then (elems := !elems - 1; b) + else B(i', j', find b) + val tab = !table + val index = hashFun(i, j, shift, A.length tab) + val n = !elems + in UA.update(tab, index, find(UA.sub(tab, index))); + !elems <> n + end + ) + (* + | delete(BM{table=BITMATRIX table, ...}) = + (fn (i, j) => + let val (i,j) = if i > j then (i, j) else (j, i) + val bit = W.fromInt(UA.sub(indices, i) + j) + val index = W.toIntX(W.>>(bit, 0w3)) + val mask = W.-(W.<<(0w1, W.andb(bit, 0w7)), 0w1) + val value = W.fromInt(W8.toInt(UW8A.sub(table, index))) + in if W.andb(value, mask) = 0w0 then false + else (UW8A.update(table, index, + W8.fromInt(W.toIntX(W.andb(value,W.notb mask)))); + true) + end + ) + *) +end diff --git a/MLRISC/ra/raBitset.sml b/MLRISC/ra/raBitset.sml new file mode 100644 index 0000000..fe4bb23 --- /dev/null +++ b/MLRISC/ra/raBitset.sml @@ -0,0 +1,120 @@ +(* raBitset.sml + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * + * Imperative bitsets. + * + * This has been written specially for the register allocator. + * The computation of n(n+1)/2 very quickly overflows in practice. + * + *) + +(** This has been written specially for the register allocator. + ** We use a hash table representation, because it performs better + ** than a linear representation except for small numbers of live + ** ranges. + **) + +signature BITMATRIX = sig + type bitMatrix + val new : int -> bitMatrix + val add : bitMatrix -> (int * int) -> bool + val member : bitMatrix -> (int * int) -> bool + val delete : bitMatrix -> (int * int) -> bool +(* val clear : bitMatrix * int -> unit +*) +end + + +structure TriangularBitMatrix :> BITMATRIX = +struct + + datatype bucket = NIL | B of (int * int * bucket) + + datatype bitMatrix = + INTPAIRMAP of {table : bucket Array.array ref, + elems : int ref, + size : word ref, + shift : word} + val itow = Word.fromInt + val wtoi = Word.toInt + fun roundsize size = let + fun f(x, shift) = + if x >= size then (x, Word.>>(shift, 0w1)) + else f(x*2, Word.+(shift,0w1)) + in f(64, 0w6) + end + + fun new size = let + val (tblSize, shift) = roundsize size + val tbl = Array.array(tblSize,NIL) + in (* note: size is offset by 1 *) + INTPAIRMAP{table = ref tbl, + elems = ref 0, + size = ref(itow(tblSize-1)), + shift = shift} + end + + fun moduloSize(i, j, shift, sz) = + Word.toIntX + (Word.andb + (Word.+(Word.<<(itow i, shift), itow j), + sz)) + + fun member(INTPAIRMAP{table,elems,size,shift,...}) (i,j) = let + fun find NIL = false + | find(B(i',j',b)) = (i=i' andalso j=j') orelse find b + in find(Array.sub(!table, moduloSize(i, j, shift, !size))) + end + + fun add (t as INTPAIRMAP{table,elems,size,shift,...}) (v as (i,j)) = let + val ref tbl = table + val ref sz = size + val isz = wtoi sz + in + if !elems <> isz then let + val indx = moduloSize(i, j, shift, sz) + fun find NIL = false + | find(B(i',j',r)) = (i=i' andalso j=j') orelse find r + val b = Array.sub(tbl,indx) + in + if find b then false + else (Unsafe.Array.update(tbl,indx,B(i,j,b)); + elems := !elems + 1; + true) + end + else let + val newsize=isz+isz+2 + val new = Array.array(newsize,NIL) + val newsize1 = itow(newsize-1) + fun redo n = let + fun add'(a,b,B(i,j,r)) = + if moduloSize(i, j, shift, newsize1) = n then + add'(B(i,j,a),b,r) + else add'(a,B(i,j,b),r) + | add'(a,b,NIL) = + (Array.update(new,n,a); + Array.update(new,n+isz+1,b); + redo(n+1)) + in add'(NIL, NIL, Array.sub(tbl,n)) + end + in + table:=new; + size:=itow(newsize-1); + redo 0 handle _ => (); + add t v + end + end + + fun delete(INTPAIRMAP{table=ref table,elems,size,shift,...}) (i,j) = let + fun find NIL = NIL + | find(B(i',j',b)) = + if i=i' andalso j=j' then (elems := !elems-1; b) else B(i',j',find b) + val indx = moduloSize(i, j, shift, !size) + val n = !elems + in Unsafe.Array.update(table, indx, find(Array.sub(table,indx))); + !elems <> n (* changed? *) + end + +end + diff --git a/MLRISC/ra/raPriQueue.sml b/MLRISC/ra/raPriQueue.sml new file mode 100644 index 0000000..d6ede87 --- /dev/null +++ b/MLRISC/ra/raPriQueue.sml @@ -0,0 +1,30 @@ + (* + * Priority Queue. Let's hope the compiler will inline it for performance + *) +functor RaPriQueue(type elem val less : elem * elem -> bool) : RA_PRIORITY_QUEUE = struct + + (* A leftist tree is a binary tree with priority ordering + * with the invariant that the left branch is always the taller one + *) + type elem = elem + datatype pri_queue = TREE of elem * int * pri_queue * pri_queue | EMPTY + + fun merge'(EMPTY, EMPTY) = (EMPTY, 0) + | merge'(EMPTY, a as TREE(_, d, _, _)) = (a, d) + | merge'(a as TREE(_, d, _, _), EMPTY) = (a, d) + | merge'(a as TREE(x, d, l, r), b as TREE(y, d', l', r')) = + let val (root, l, r1, r2) = + if less(x, y) then (x, l, r, b) else (y, l', r', a) + val (r, d_r) = merge'(r1, r2) + val d_l = case l of EMPTY => 0 | TREE(_, d, _, _) => d + val (l, r, d_t) = if d_l >= d_r then (l, r, d_l+1) else (r, l, d_r+1) + in (TREE(root, d_t, l, r), d_t) end + + fun merge(a, b) = #1(merge'(a, b)) + + fun add (x, EMPTY) = TREE(x, 1, EMPTY, EMPTY) + | add (x, b as TREE(y, d', l', r')) = + if less(x,y) then TREE(x, d'+1, b, EMPTY) + else #1(merge'(TREE(x, 1, EMPTY, EMPTY), b)) +end + diff --git a/MLRISC/ra/region-based-ra.sml b/MLRISC/ra/region-based-ra.sml new file mode 100644 index 0000000..7166945 --- /dev/null +++ b/MLRISC/ra/region-based-ra.sml @@ -0,0 +1,37 @@ +(* + * Region-based register allocator. + * This register allocation takes a cluster of large size and partitions + * it into manageable pieces to be allocated. + *) +functor RegionBasedRA + (RA : RA) + (FlowgraphPartitioner : RA_FLOWGRAPH_PARTITIONER + where type flowgraph = RA.F.flowgraph + where C = RA.C + ) : RA = +struct + + structure F = RA.F + structure FP = FlowgraphPartitioner + structure I = F.I + structure C = I.C + structure Core = RACore + structure G = Core.G + + open RA + + val maxBlocks = MLRiscControl.mkInt ("ra-max-blocks", "max block count for region-based RA") + + (* Main entry point. + * All the magic is actually done in the FlowgraphPartitioner module + *) + fun ra (params:raClient list) flowgraph = + if FP.numberOfBlocks flowgraph > !maxBlocks then + let fun ra (param as {cellkind, ...}) = + FP.partition flowgraph cellkind (RA.ra [param]) + in app ra params; + flowgraph + end + else RA.ra params flowgraph + +end diff --git a/MLRISC/ra/risc-ra.sml b/MLRISC/ra/risc-ra.sml new file mode 100644 index 0000000..d5d2ec3 --- /dev/null +++ b/MLRISC/ra/risc-ra.sml @@ -0,0 +1,364 @@ +(* + * This functor factors out the machine independent part of the register + * allocator. It performs integer and floating register allocation. + * This works well for RISC machines; but not applicable to x86. + *) +functor RISC_RA + (structure I : INSTRUCTIONS + structure Asm : INSTRUCTION_EMITTER + where I = I + structure CFG : CONTROL_FLOW_GRAPH + where I = I + and P = Asm.S.P + structure InsnProps : INSN_PROPERTIES + where I = I + structure Rewrite : REWRITE_INSTRUCTIONS + where I = I + structure SpillInstr : ARCH_SPILL_INSTR + where I = I + + (* Spilling heuristics determines which node should be spilled. + * You can use Chaitin, ChowHenessey, or one of your own. + *) + structure SpillHeur : RA_SPILL_HEURISTICS + + (* The Spill module figures out the strategies for inserting + * spill code. You can use RASpill, or RASpillWithRenaming, + * or write your own if you are feeling adventurous. + *) + structure Spill : RA_SPILL where I = I + + val architecture : string + + (* Is this a pure instruction *) + val pure : I.instruction -> bool + + + datatype spillOperandKind = SPILL_LOC | CONST_VAL + type spill_info (* user-defined abstract type *) + + (* Called before RA begins *) + val beforeRA : CFG.cfg -> spill_info + + structure Int : + sig + + val avail : CellsBasis.cell list (* list of available registers *) + val dedicated : CellsBasis.cell list (* list of registers that are dedicated *) + + val spillLoc : + { info:spill_info, + an :Annotations.annotations ref, + cell:CellsBasis.cell, (* spilled cell *) + id :RAGraph.logical_spill_id + } + -> { opnd: I.ea, + kind: spillOperandKind + } + + (* Mode for RA optimizations *) + val mode : RAGraph.mode + end + + structure Float : + sig + + val avail : CellsBasis.cell list (* list of available registers *) + val dedicated : CellsBasis.cell list (* list of registers that are dedicated *) + + val spillLoc : + spill_info * Annotations.annotations ref * RAGraph.logical_spill_id + -> I.ea + + (* Mode for RA optimizations *) + val mode : RAGraph.mode + end + ) : CFG_OPTIMIZATION = +struct + structure CFG = CFG + structure I = CFG.I + structure P = InsnProps + structure C = I.C + structure G = RAGraph + structure CB = CellsBasis + + (* The generic register allocator *) + structure Ra = + RegisterAllocator + (SpillHeur) + (* (ChowHennessySpillHeur) *) + (ClusterRA + (structure Flowgraph = CFG + structure Asm = Asm + structure InsnProps = InsnProps + structure Spill = Spill + ) + ) + + val name = "RISC_RA" + + (* Counters for register allocation *) + val intSpillsCnt = MLRiscControl.mkCounter ("ra-int-spills", "RA int spill count") + val intReloadsCnt = MLRiscControl.mkCounter ("ra-int-reloads", "RA int reload count") + val intRenamesCnt = MLRiscControl.mkCounter ("ra-int-renames", "RA int rename count") + val floatSpillsCnt = MLRiscControl.mkCounter ("ra-float-spills", "RA float spill count") + val floatReloadsCnt = MLRiscControl.mkCounter ("ra-float-reloads", "RA float reload count") + val floatRenamesCnt = MLRiscControl.mkCounter ("ra-float-renames", "RA float rename count") + + fun inc c = c := !c + 1 + + fun error msg = MLRiscErrorMsg.error("RISC RA "^architecture,msg) + + (* + * Make arithmetic non-overflow trapping. + * This makes sure that if we happen to run the compiler for a long + * period of time overflowing counters will not crash the compiler. + *) + fun x + y = Word.toIntX(Word.+(Word.fromInt x, Word.fromInt y)) + fun x - y = Word.toIntX(Word.-(Word.fromInt x, Word.fromInt y)) + + (* GetReg specialized to integer and floating point registers *) + fun isDedicated (len, arr, others) r = + (r < len andalso Array.sub(arr, r)) orelse List.exists (fn d => r = d) others + + fun mark(arr, _, [], others) = others + | mark(arr, len, r::rs, others) = let + val r = CellsBasis.registerId r + in + if r >= len then mark(arr, len, rs, r::others) + else (Array.update(arr, r, true); mark(arr, len, rs, others)) + end + + fun annotate([], i) = i + | annotate(a::an, i) = annotate(an, I.ANNOTATION{a=a, i=i}) + + local + val {low,high} = C.cellRange CellsBasis.GP + val arr = Array.array(high+1,false) + val others = mark(arr, high+1, Int.dedicated, []) + in + structure GR = GetReg(val first=low val nRegs=high-low+1 + val available=map CellsBasis.registerId Int.avail) + val dedicatedR : int -> bool = isDedicated (high+1, arr, others) + end + + fun getRegLoc(S, an, cell, Ra.FRAME loc) = + Int.spillLoc{info=S, an=an, cell=cell, id=loc} + | getRegLoc _ = error "getRegLoc" + + fun copy((rds as [d], rss as [s]), I.COPY{sz, ...}) = + if CB.sameColor(d,s) then [] + else [I.COPY{k=CB.GP, sz=sz, dst=rds, src=rss, tmp=NONE}] + | copy((rds, rss), I.COPY{tmp, sz, ...}) = + [I.COPY{k=CB.GP, sz=sz, dst=rds, src=rss, tmp=tmp}] + | copy _ = error "copy: COPY?" + + fun spillR S {annotations, kill=true, reg, spillLoc, instr} = + if pure instr then {code=[], proh=[], newReg=NONE} + else spillR S {annotations=annotations,kill=false, + spillLoc=spillLoc, + reg=reg,instr=instr} + | spillR S {annotations=an, kill, reg, spillLoc, instr} = let + fun annotate([], i) = i + | annotate(a::an, i) = annotate(an, I.ANNOTATION{a=a, i=i}) + + (* preserve annotation on instruction *) + fun spill(instrAn, I.ANNOTATION{a, i}) = spill(a::instrAn, i) + | spill(instrAn, I.KILL{regs, spilled}) = + {code= + [annotate + (instrAn, + I.KILL {regs=C.rmvReg(reg, regs), + spilled=C.addReg(reg, spilled)})], + proh = [], + newReg=NONE} + | spill(instrAn, I.LIVE _) = error "spillR: LIVE" + | spill(_, I.COPY _) = error "spillR: COPY" + | spill(instrAn, I.INSTR _) = let + val {opnd=spillLoc:I.ea, kind} = getRegLoc (S, an, reg, spillLoc) + in + inc intSpillsCnt; + SpillInstr.spill CB.GP (instr, reg, spillLoc) + end + in spill([], instr) + end + + (* spill src at the spill location for reg i.e. spillLoc *) + fun spillReg S {annotations=an,src,reg,spillLoc} = + (inc intSpillsCnt; + #code(SpillInstr.spillToEA CB.GP (src, #opnd(getRegLoc(S, an, reg, spillLoc))))) + + + (* Spill the temporary associated with a copy *) + fun spillTmp S {annotations=an, reg, copy=I.COPY{k=CB.GP, sz, tmp, dst, src}, spillLoc} = let + val loc = #opnd(getRegLoc(S, an, reg, spillLoc)) + in + inc intSpillsCnt; + I.COPY{k=CB.GP, sz=sz, tmp=SOME loc, dst=dst, src=src} + end + | spillTmp _ _ = error "spillTmp" + + (* Rename integer register *) + fun renameR{fromSrc,toSrc,instr} = + let val _ = inc intRenamesCnt + val instr' = Rewrite.rewriteUse(instr, fromSrc, toSrc) + in {code=[instr'], proh=[], newReg=SOME toSrc} + end + + (* Reload integer register *) + fun reloadR S {annotations=an, reg, spillLoc, instr} = let + fun reload(instrAn, I.ANNOTATION{a, i}) = reload(a::instrAn, i) + | reload(instrAn, I.LIVE{regs, spilled}) = + {code=[I.LIVE{regs=C.rmvReg(reg, regs), spilled=C.addReg(reg, spilled)}], + proh=[], + newReg=NONE} + | reload(_, I.KILL _) = error "reloadR: KILL" + | reload (_, I.COPY _) = error "reloadR: COPY" + | reload(instrAn, instr as I.INSTR _) = let + val spillLoc = #opnd (getRegLoc(S, an, reg, spillLoc)) + in + inc intReloadsCnt; + SpillInstr.reload CB.GP (instr, reg, spillLoc) + end + in reload([], instr) + end + + (* reload the register dst from the spill location for reg, i.e. spillLoc *) + fun reloadReg S {annotations=an,reg,dst,spillLoc} = + (inc intReloadsCnt; + #code(SpillInstr.reloadFromEA CB.GP (dst, #opnd(getRegLoc(S, an, reg, spillLoc))))) + + + (*-------------------------------------------------------------*) + local + val {low,high} = C.cellRange CellsBasis.FP + val arr = Array.array(high+1,false) + val others = mark(arr, high+1, Float.dedicated, []) + in + structure FR = GetReg(val first=low val nRegs=high-low+1 + val available=map CellsBasis.registerId Float.avail) + val dedicatedF : int -> bool = isDedicated(high+1, arr, others) + end + + fun getFregLoc(S, an, Ra.FRAME loc) = Float.spillLoc(S, an, loc) + | getFregLoc _ = error "getFregLoc" + + fun fcopy((rds as [d], rss as [s]), I.COPY{sz, ...}) = + if CB.sameColor(d,s) then [] + else [I.COPY{k=CB.FP, sz=sz, dst=rds, src=rss, tmp=NONE}] + | fcopy((rds, rss), I.COPY{tmp, sz, ...}) = + [I.COPY{k=CB.FP, sz=sz, dst=rds, src=rss, tmp=tmp}] + | fcopy _ = error "fcopy: COPY?" + + (* Spill floating point register *) + fun spillF S {annotations, kill=true, reg, spillLoc, instr} = + if pure instr then {code=[], proh=[], newReg=NONE} + else spillF S {annotations=annotations,kill=false, + spillLoc=spillLoc, reg=reg,instr=instr} + | spillF S {annotations=an, kill, reg, spillLoc, instr} = let + (* preserve annotation on instruction *) + fun spill(instrAn, I.ANNOTATION{a, i}) = spill(a::instrAn, i) + | spill(instrAn, I.KILL{regs, spilled}) = + {code= + [annotate + (instrAn, + I.KILL {regs=C.rmvFreg(reg, regs), + spilled=C.addFreg(reg, spilled)})], + proh = [], + newReg=NONE} + | spill(instrAn, I.LIVE _) = error "spillF: LIVE" + | spill(_, I.COPY _) = error "spillF: COPY" + | spill(instrAn, I.INSTR _) = + (inc floatSpillsCnt; + SpillInstr.spill CB.FP (instr, reg, getFregLoc(S, an, spillLoc))) + in spill([], instr) + end + + (* spill src at the spill location for reg, i.e. spillLoc *) + fun spillFreg S {annotations=an,reg,src,spillLoc} = + (inc floatSpillsCnt; + #code(SpillInstr.spillToEA CB.FP (src, getFregLoc(S, an, spillLoc)))) + + (* Spill the temporary associated with a copy *) + fun spillFtmp S {annotations=an, reg, copy=I.COPY{k=CB.FP, sz, tmp, dst, src}, spillLoc} = let + val loc = getFregLoc(S, an, spillLoc) + in + inc floatSpillsCnt; + I.COPY{k=CB.FP, sz=sz, tmp=SOME loc, dst=dst, src=src} + end + | spillFtmp _ _ = error "spillFtmp" + + + (* Rename floating point register *) + fun renameF{fromSrc,toSrc,instr} = + let val _ = inc floatRenamesCnt + val instr' = Rewrite.frewriteUse(instr, fromSrc, toSrc) + in {code=[instr'], proh=[], newReg=SOME toSrc} + end + + (* Reload floating point register *) + fun reloadF S {annotations=an, reg, spillLoc, instr} = let + fun reload(instrAn, I.ANNOTATION{a,i}) = reload(a::instrAn, i) + | reload(instrAn, I.LIVE{regs, spilled}) = + {code=[I.LIVE{regs=C.rmvFreg(reg, regs), spilled=C.addFreg(reg, spilled)}], + proh=[], + newReg=NONE} + | reload(_, I.KILL _) = error "reloadF: KILL" + | reload (_, I.COPY _) = error "reloadF: COPY" + | reload(instrAn, instr as I.INSTR _) = + (inc floatReloadsCnt; + SpillInstr.reload CB.FP (instr, reg, getFregLoc(S, an, spillLoc))) + in reload([], instr) + end + + (* reload register dst from the spill location for reg, i.e. spillLoc *) + fun reloadFreg S {annotations=an,reg,dst,spillLoc} = + (inc floatReloadsCnt; + #code (SpillInstr.reloadFromEA CB.FP (dst, getFregLoc(S, an, spillLoc)))) + + val KR = length Int.avail + val KF = length Float.avail + + fun params S = + [ { cellkind = CellsBasis.GP, + getreg = GR.getreg, + spill = spillR S, + spillSrc = spillReg S, + spillCopyTmp = spillTmp S, + reload = reloadR S, + reloadDst = reloadReg S, + renameSrc = renameR, + K = KR, + dedicated = dedicatedR, + copyInstr = copy, + spillProh = [], + memRegs = [], + mode = Int.mode + } : Ra.raClient, + { cellkind = CellsBasis.FP, + getreg = FR.getreg, + spill = spillF S, + spillSrc = spillFreg S, + spillCopyTmp = spillFtmp S, + reload = reloadF S, + reloadDst = reloadFreg S, + renameSrc = renameF, + K = KF, + dedicated = dedicatedF, + copyInstr = fcopy, + spillProh = [], + memRegs = [], + mode = Float.mode + } : Ra.raClient + ] : Ra.raClient list + + fun run cluster = let + val S = beforeRA cluster + in + GR.reset(); + FR.reset(); + Ra.ra (params S) cluster + end + +end + diff --git a/MLRISC/scheduling/LeungPalemPnueli.sig b/MLRISC/scheduling/LeungPalemPnueli.sig new file mode 100644 index 0000000..f89e50d --- /dev/null +++ b/MLRISC/scheduling/LeungPalemPnueli.sig @@ -0,0 +1,22 @@ +(* + * This is the algorithm from PACT '98 (me, Krishna Palem and Amir Pnueli) + * Note: the dag is assumed to be in transitively closed form. + * + * -- Allen + *) +signature LEUNG_PALEM_PNUELI = +sig + + exception Infeasible + + val rank : { dag : ('n,'e,'g) Graph.graph, (* dag *) + l : 'e Graph.edge -> int, (* latency *) + r : 'n Graph.node -> int, (* individual release times *) + d : 'n Graph.node -> int, (* individual deadlines *) + m : int (* number of processors *) + } -> + { r' : int Array.array, (* modified release times *) + d' : int Array.array (* modified deadlines *) + } + +end diff --git a/MLRISC/scheduling/LeungPalemPnueli.sml b/MLRISC/scheduling/LeungPalemPnueli.sml new file mode 100644 index 0000000..5dff857 --- /dev/null +++ b/MLRISC/scheduling/LeungPalemPnueli.sml @@ -0,0 +1,109 @@ +(* + * This is my algorithm from PACT '98. + * + * -- Allen + *) +structure LeungPalemPnueli :> LEUNG_PALEM_PNUELI = +struct + + structure G = Graph + structure A = Array + structure PQ = PriorityQueue + + exception Infeasible + + fun rank{dag,l,r,d,m} = + let val G.GRAPH G = dag + val N = #capacity G () + val r' = A.array(N,0) (* modified release times *) + val d' = A.array(N,0) (* modified deadlines *) + val r_hat = A.array(N,0) (* backschedule modified release times *) + val d_hat = A.array(N,0) (* backschedule modified deadlines *) + + val node_ids = map #1 (#nodes G ()) + + fun initReleaseTimes() = + let fun update i = + A.update(r',i, + foldr (fn (e as (j,_,_),r_i) => + Int.max(A.sub(r',j) + l e + 1,r_i)) + (r(i,#node_info G i)) (#in_edges G i)) + in app update (GraphTopsort.topsort dag node_ids) end + + fun initDeadlines() = + let fun update i = + A.update(d',i, + foldr (fn (e as (_,j,_),d_i) => + Int.min(A.sub(d',j) - l e - 1,d_i)) + (d (i,#node_info G i)) (#out_edges G i)) + in app update (GraphTopsort.topsort (ReversedGraphView.rev_view dag) + node_ids) + end + + + (* unit time tasks, no-precedence constraints with + * deadlines d_hat and release times r_hat. + * I'm using an asymtotically slower (n log n) + * algorithm than the one described in the paper. + *) + fun UET(S) = + let fun byReleaseTimes(i,j) = A.sub(r_hat,i) > A.sub(r_hat,j) + fun byDeadlines(i,j) = A.sub(d_hat,i) < A.sub(d_hat,j) + val ready = PQ.create byDeadlines + val ins = PQ.insert ready + fun listSchedule(waiting,t,0) = listSchedule(waiting,t+1,m) + | listSchedule(waiting,t,m) = + let val j = PQ.deleteMin ready + in t < A.sub(d_hat,j) andalso (* check for infeasbility! *) + listSchedule(waiting,t,m-1) + end handle PQ.EmptyPriorityQueue => + (* no more ready nodes *) + let fun release(t,[]) = (t,[]) + | release(t,l as j::waiting) = + if A.sub(r_hat,j) > t then (t,l) + else (ins j; release(t,waiting)) + in case waiting of + [] => true (* feasible *) + | waiting as j::_ => + let val (t,waiting) = release(A.sub(r_hat,j),waiting) + in listSchedule(waiting,t,m) end + end + in listSchedule(ListMergeSort.sort byReleaseTimes S,0,m) end + + fun backSchedule(i,r'_i,S) = + let fun loop d'_i = + if r'_i >= d'_i then raise Infeasible + else + let val _ = A.update(d_hat,i,d'_i) + val _ = A.update(r_hat,i,d'_i-1) + val _ = app (fn e as (_,j,_) => + A.update(r_hat,j,Int.max(d'_i + l e,A.sub(r',j)))) + (#out_edges G i) + in if UET S then d'_i + else loop(d'_i-1) + end + + in app (fn j => (A.update(d_hat,j,A.sub(d',j)); + A.update(r_hat,j,A.sub(r',j)))) S; + loop(A.sub(d',i)) + end + + fun mainLoop([],_) = () + | mainLoop(i::U,S) = + let val r'_i = A.sub(r',i) + val S = i::S + val d'_i = backSchedule(i,r'_i,S) + in A.update(d',i,d'_i); + if d'_i <= r'_i then raise Infeasible + else mainLoop(U,S) + end + fun byNonIncreasingReleaseTimes(i,j) = A.sub(r',i) < A.sub(r',j) + + in (* initialize the modified deadlines/release times *) + initReleaseTimes(); + initDeadlines(); + mainLoop(ListMergeSort.sort byNonIncreasingReleaseTimes node_ids,[]); + {r'=r',d'=d'} + end + +end diff --git a/MLRISC/scheduling/PalemSimons.sig b/MLRISC/scheduling/PalemSimons.sig new file mode 100644 index 0000000..f1b764b --- /dev/null +++ b/MLRISC/scheduling/PalemSimons.sig @@ -0,0 +1,17 @@ +(* + * This algorithm is in Krishna and Barbara Simons' paper from TOPLAS '93. + * Note: the dag is assumed to be in transitively closed form. + * + * -- Allen + *) +signature PALEM_SIMONS = +sig + + val rank : { dag : ('n,'e,'g) Graph.graph, (* dag *) + l : 'e Graph.edge -> int, (* latency *) + d : 'n Graph.node -> int, (* individual deadlines *) + m : int (* number of processors *) + } -> + { d' : int Array.array (* modified deadlines *) + } +end diff --git a/MLRISC/scheduling/PalemSimons.sml b/MLRISC/scheduling/PalemSimons.sml new file mode 100644 index 0000000..42e16b7 --- /dev/null +++ b/MLRISC/scheduling/PalemSimons.sml @@ -0,0 +1,82 @@ +(* + * This is Krishna Palem's and Barbara Simons' algorithm. + * + * -- Allen + *) +structure PalemSimons :> PALEM_SIMONS = +struct + structure G = Graph + structure A = Array + + fun rank{dag,l,d,m} = + let val G.GRAPH G = dag + val N = #capacity G () + val d' = A.array(N,0) (* modified deadlines *) + val order = A.array(N,0) (* node id -> rank order in swr *) + val rank = A.array(N,0) (* rank order -> rank in swr *) + val tree = A.array(N,0) (* rank order -> tree *) + val content = A.array(N,0) (* rank order -> filled slots *) + val capacity = A.array(N,0) (* rank order -> max slots *) + + + fun backSchedule i = + let (* p is the current rank order within succs *) + fun initTrees([],_,_) = () + | initTrees((j,_,d_j)::succs,last_d_j,p) = + if last_d_j = d_j then + (A.update(order,j,p); + initTrees(succs,last_d_j,p)) + else + let val p = p+1 + in A.update(tree,p,p); (* new tree *) + A.update(rank,p,d_j); + A.update(content,p,0); + A.update(capacity,p,(d_j - last_d_j)*m); + A.update(order,j,p); + initTrees(succs,d_j,p) + end + + fun FIND p = + let val q = A.sub(tree,p) + in if q = p then p else + let val r = FIND q + in A.update(tree,p,r); r end + end + + fun UNION(p,q) = A.update(tree,p,q) + + fun insert([],d_i) = d_i + | insert((j,l_j,d_j)::swr,d_i) = + let val ord = A.sub(order,j) + val p = FIND ord + val c = A.sub(content,p) + val _ = A.update(content,p,c + 1) + val D' = A.sub(rank,p) - c div m + val d_i = Int.min(D' - 1 - l_j,d_i) + in if c >= A.sub(capacity,p) then + let val q = FIND(A.sub(order,A.sub(tree,p-1))) + in UNION(p,q) + end + else (); + insert(swr,d_i) + end + + val succs = #out_edges G i + val list = map (fn e as (_,j,_) => (j,l e,A.sub(d',j))) succs + fun byRank((_,_,d_i),(_,_,d_j)) = d_i > d_j + val _ = initTrees(ListMergeSort.sort byRank list,~123456789,~1) + fun byLatencyAndRank((_,l_i,d_i),(_,l_j,d_j)) = + l_i < l_j orelse (l_i = l_j andalso d_i < d_j) + val d_i = insert(ListMergeSort.sort byLatencyAndRank list, + d(i,#node_info G i)) + in A.update(d',i,d_i) + end + + in (* backward scheduling in reverse topological order *) + app backSchedule + (GraphTopsort.topsort (ReversedGraphView.rev_view dag) + (map #1 (#nodes G ()))); + {d'=d'} + end + +end diff --git a/MLRISC/scheduling/Sched.cm b/MLRISC/scheduling/Sched.cm new file mode 100644 index 0000000..25b8ae1 --- /dev/null +++ b/MLRISC/scheduling/Sched.cm @@ -0,0 +1,102 @@ +(* + * This library contains the superscalar scheduling stuff. + *) + +Library + signature BASIC_BLOCK_SCHEDULER + signature BASIC_BLOCK_SCHEDULER_DDG_BUILDER + signature BASIC_BLOCK_SCHEDULER_DDG_VIEWER + signature GLOBAL_SCHEDULER_DDG_VIEWER + signature GLOBAL_SCHEDULING_ALGORITHM + signature LEUNG_PALEM_PNUELI + signature LIST_SCHEDULER + signature PALEM_SIMONS + signature REGION_BUILDER + signature SCHEDULER_DDG + signature SCHEDULER_DDG_BUILDER + signature SCHEDULING_ALIASING + signature SCHEDULING_PROPERTIES + signature SCHEDULING_RANKS + structure LeungPalemPnueli + structure PalemSimons + functor BBScheduler + functor BasicBlockSchedulerDDGBuilder + functor BasicBlockSchedulerDDGViewer + functor ClusterBasicBlockScheduler + functor DAGScheduling + functor GlobalCriticalPath + functor GlobalScheduler + functor GlobalSchedulerDDGViewer + functor ListScheduler + functor LocalCriticalPath + functor RegionBuilder + functor SchedulerDDG + functor SchedulerDDGBuilder +is + +#if defined(NEW_CM) +#if SMLNJ_VERSION * 100 + SMLNJ_MINOR_VERSION >= 11028 + $basis.cm + $smlnj-lib.cm +#else + basis.cm + smlnj-lib.cm +#endif +#else + smlnj-lib.cm +#endif + + ../MLRISC.cm + ../control/Control.cm + ../library/Lib.cm + ../graphs/Graphs.cm + ../visualization/Visual.cm + ../ir/ir.cm + ../IR/IR.cm + ../mltree/RTL.cm + + (* Rank algorithms *) + PalemSimons.sig + PalemSimons.sml + LeungPalemPnueli.sig + LeungPalemPnueli.sml + + (* Data dependence dag representation *) + schedulerDDG.sig + schedulerDDG.sml + + (* Signature of a rank function *) + schedulingRanks.sig + + (* Architectural Properties *) + schedulingProps.sig + + (* Basic block scheduling *) + localCP.sml + buildLocalDDG.sig + buildLocalDDG.sml + bbScheduler.sig + bbScheduler.sml + bbDDGViewer.sig + bbDDGViewer.sml + clusterBBScheduler.sml + + (* Superscalar scheduling. These are required if you want to + * play with instruction scheduling for superscalars. + * It requires the MLRISC IR stuff. !!! + *) + listScheduler.sig + listScheduler.sml + buildDDG.sig + buildDDG.sml + schedulingAliasing.sig + regionBuilder.sig + regionBuilder.sml + + (* Various global scheduling algorithms *) + globalCP.sml + globalDDGViewer.sig + globalDDGViewer.sml + globalSchedulingAlgo.sig + dagScheduling.sml + globalScheduler.sml diff --git a/MLRISC/scheduling/bbAliasing.sig b/MLRISC/scheduling/bbAliasing.sig new file mode 100644 index 0000000..59ea856 --- /dev/null +++ b/MLRISC/scheduling/bbAliasing.sig @@ -0,0 +1,8 @@ +signature BASIC_BLOCK_ALIASING = +sig + + structure I : INSTRUCTIONS + + val aliasing : unit -> (I.instruction -> I.C.cell list * I.C.cell list) + +end diff --git a/MLRISC/scheduling/bbDDGViewer.sig b/MLRISC/scheduling/bbDDGViewer.sig new file mode 100644 index 0000000..4a43ac7 --- /dev/null +++ b/MLRISC/scheduling/bbDDGViewer.sig @@ -0,0 +1,18 @@ +(* + * View a scheduler DDG constructed for basic block scheduling + * + * -- Allen + *) +signature BASIC_BLOCK_SCHEDULER_DDG_VIEWER = +sig + + structure DDG : SCHEDULER_DDG + structure I : INSTRUCTIONS + sharing DDG.I = I + + val toString : I.instruction -> string + + val view : (I.instruction,DDG.latency) DDG.ddg -> unit + +end + diff --git a/MLRISC/scheduling/bbDDGViewer.sml b/MLRISC/scheduling/bbDDGViewer.sml new file mode 100644 index 0000000..e0d1f30 --- /dev/null +++ b/MLRISC/scheduling/bbDDGViewer.sml @@ -0,0 +1,32 @@ +(* + * View a scheduler DDG constructed for basic block scheduling + * + * -- Allen + *) +functor BasicBlockSchedulerDDGViewer + (structure GraphViewer : GRAPH_VIEWER + structure DDG : SCHEDULER_DDG + structure FormatInsn : FORMAT_INSTRUCTION + sharing FormatInsn.I = DDG.I + ) : BASIC_BLOCK_SCHEDULER_DDG_VIEWER = +struct + + structure DDG = DDG + structure I = DDG.I + structure L = GraphLayout + + val edgeColor = L.COLOR "red" + + val toString = FormatInsn.toString [] + + fun view ddg = + GraphViewer.view + (GraphLayout.makeLayout + {graph = fn _ => [], + node = fn (_,i) => [L.LABEL(toString i)], + edge = fn (_,_,lat) => [L.LABEL(Int.toString lat),edgeColor] + } + ddg + ) + +end diff --git a/MLRISC/scheduling/bbScheduler.sig b/MLRISC/scheduling/bbScheduler.sig new file mode 100644 index 0000000..1e33209 --- /dev/null +++ b/MLRISC/scheduling/bbScheduler.sig @@ -0,0 +1,17 @@ +(* + * A light weight basic block scheduler. + *) +signature BASIC_BLOCK_SCHEDULER = +sig + structure I : INSTRUCTIONS + structure C : CELLS + sharing I.C = C + + (* + * Note: the instructions are assumed to be in reverse order, + * the same as in the cluster and the CFG representation. + *) + val schedule : {cpu:string} + -> I.instruction list -> I.instruction list + +end diff --git a/MLRISC/scheduling/bbScheduler.sml b/MLRISC/scheduling/bbScheduler.sml new file mode 100644 index 0000000..7e740c4 --- /dev/null +++ b/MLRISC/scheduling/bbScheduler.sml @@ -0,0 +1,139 @@ +(* + * This is a very light weight, not very extensible, basic block scheduler. + * When you don't want to pay the price of all the global scheduling + * stuff. + *) +functor BBScheduler + (structure InsnProps : INSN_PROPERTIES + structure SchedProps : SCHEDULING_PROPERTIES + structure Rank : SCHEDULING_RANKS + where type edge = int + structure Viewer : BASIC_BLOCK_SCHEDULER_DDG_VIEWER + sharing Rank.DDG = Viewer.DDG + sharing Rank.DDG.I = InsnProps.I = SchedProps.I = Viewer.I + val prepass : bool + ) : BASIC_BLOCK_SCHEDULER = +struct + structure I = InsnProps.I + structure C = I.C + structure DDG = Rank.DDG + structure SchedProps = DDG.SchedProps + structure PQ = PriorityQueue + structure A = Array + structure DA = DynArray + structure G = Graph + structure Build = + BasicBlockSchedulerDDGBuilder + (structure DDG = DDG + structure InsnProps = InsnProps + structure SchedProps = SchedProps + ) + + val view_ddg = MLRiscControl.getFlag "view-ddg" + val view_IR = MLRiscControl.getFlag "view-IR" + + val debug = MLRiscControl.getFlag "debug-scheduling" + val dump = MLRiscControl.getFlag "dump-test-block" + val id = MLRiscControl.getCounter "block-id" + val block = MLRiscControl.getInt "test-block" + + fun error msg = MLRiscErrorMsg.error("BBScheduler.",msg) + + fun schedule {cpu} = + let val cpu_info as + SchedProps.CPU_INFO + {newTable,findSlot,pipeline,insert,defUse,...} = + SchedProps.info{backward=true, cpu=SchedProps.cpu cpu} + val split = SchedProps.splitCopies + + fun sched insns = + let val insns' = if prepass then + List.foldr List.revAppend [] (map split insns) + else insns + val N = length insns' + in if N <= 3 then insns else schedInsns'(N, insns') + end + + and schedInsns'(N, insns) = + (id := !id + 1; + if !debug andalso !id <> !block then insns else + let val _ = if !dump then dumpInsns("Before",insns) else (); + val insns = schedInsns(N, insns) + in if !debug then print("BLOCK "^Int.toString (!id)^"\n") else (); + if !dump then dumpInsns("After",insns) else (); + insns + end + ) + + and dumpInsns(title, insns) = + (print(title^" BLOCK "^Int.toString (!id)^"\n"); + app (fn i => + let val (d,u) = defUse i + val d = map #1 d + fun pr rs = app (fn r => print(C.toString r)) rs + in print(Viewer.toString i^"\n"); + (* print "defs="; pr d; + print " uses="; pr u; + print "\n" *) () + end) (rev insns) + ) + and schedInsns(N, insns) = + let val DDG as G.GRAPH ddg = DDG.newDDG(N) + val {succ, pred, nodes} = DDG.internalInfo DDG + val _ = Build.buildDDG{ddg=DDG,cpu_info=cpu_info} insns + val _ = if !view_IR andalso !view_ddg + then Viewer.view DDG else () + val rank = Rank.rank DDG + val issueTimes = A.array(N,0) + val outDeg = A.array(N,0) + val instructions = DA.array(N, []) + val ready = PQ.create rank + + fun init (i,i') = + let val n = length(A.sub(succ,i)) + in if n = 0 then PQ.insert ready (i,i') + else A.update(outDeg,i,n) + end + + fun updatePred(i) = + let fun process (j,i,latency) = + let val c = A.sub(outDeg,j) + in if c = 1 then PQ.insert ready (j,#node_info ddg j) + else A.update(outDeg,j,c-1) + end + in app process (A.sub(pred,i)) end + + fun findASlot(rt,i,i') = + let fun latest([],t) = t + | latest((i,j,latency)::es,t) = + latest(es,Int.min(t,A.sub(issueTimes,j)-latency-1)) + val t = latest(A.sub(succ,i),0) + val p = pipeline i' + val t' = findSlot(rt,t,p) + in insert(rt,t',p); + DA.update(instructions,~t,i'::DA.sub(instructions,~t)); + t' + end + + fun sched(rt) = + let val (i,i') = PQ.deleteMin ready + val t = findASlot(rt,i,i') + in (*print("["^Int.toString t^"]"^Viewer.toString i'^"\n");*) + A.update(issueTimes,i,t); + updatePred(i); + sched(rt) + end + + val _ = #forall_nodes ddg init + val rt = newTable(length insns) + val _ = sched(rt) handle PQ.EmptyPriorityQueue => () + + fun linearize table = + DA.foldr (fn (instrs,l) => List.revAppend(instrs,l)) [] table + + in linearize instructions + end + in sched + end + +end diff --git a/MLRISC/scheduling/buildDDG.sig b/MLRISC/scheduling/buildDDG.sig new file mode 100644 index 0000000..9ad5679 --- /dev/null +++ b/MLRISC/scheduling/buildDDG.sig @@ -0,0 +1,18 @@ +(* + * Signature of data dependence graph builder. + *) + +signature SCHEDULER_DDG_BUILDER = +sig + structure CFG : CONTROL_FLOW_GRAPH + structure DDG : SCHEDULER_DDG + sharing DDG.I = CFG.I + + val buildDDG : + { cpu_info : DDG.SchedProps.cpu_info, + cfg : CFG.cfg, + numberOfInstructions : int, + blockIdTbl : int Array.array + } -> (DDG.node, DDG.edge) DDG.ddg + +end diff --git a/MLRISC/scheduling/buildDDG.sml b/MLRISC/scheduling/buildDDG.sml new file mode 100644 index 0000000..14fb91e --- /dev/null +++ b/MLRISC/scheduling/buildDDG.sml @@ -0,0 +1,331 @@ +(* + * This module builds the data dependence graph for acyclic scheduling. + * + * Notes: + * 1. Special source and sink nodes are added to each basic block. + * These nodes anchor live-in and live-out values. + * 2. If a block has a branch, then it is control dependent on the live-in + * node. + * + * -- Allen + *) + +functor SchedulerDDGBuilder + (structure DDG : SCHEDULER_DDG + structure CFG : CONTROL_FLOW_GRAPH + structure RTLProps : RTL_PROPERTIES + structure InsnProps : INSN_PROPERTIES + sharing InsnProps.I = RTLProps.I = DDG.I = CFG.I + ) : SCHEDULER_DDG_BUILDER = +struct + + structure DDG = DDG + structure CFG = CFG + structure RTL = RTLProps.RTL + structure G = Graph + structure I = CFG.I + structure C = I.C + structure SchedProps = DDG.SchedProps + structure HA = HashArray + structure A = Array + structure W8A = Word8Array + structure SL = SortedList + + exception BuildDDG + + fun error msg = MLRiscErrorMsg.error("BuildDDG",msg) + + val i2s = Int.toString + + (* Zero register magic! *) + val zeroTbl = W8A.array(C.firstPseudo, 0w0) + val _ = List.app (fn k => + case C.zeroReg k of + SOME r => W8A.update(zeroTbl, r, 0w1) + | NONE => () + ) C.cellkinds + fun isZero r = W8A.sub(zeroTbl, r) <> 0w0 handle _ => false + + exception Nothing + + fun buildDDG{cpu_info, cfg, numberOfInstructions, blockIdTbl} = + let val CFG as G.GRAPH cfg = cfg + (* The number of nodes <= instructions + livein + liveout per block *) + val M = numberOfInstructions + #order cfg () * 2 + val DDG as G.GRAPH ddg = DDG.newDDG M + val globalInfo = DDG.globalInfo DDG + + (* Extract instruction properties *) + val SchedProps.CPU_INFO{defUse, ...} = cpu_info + + (* Regmap magic! *) + val regmap = C.lookup(CFG.regmap CFG) + val regmapDefs = map (fn (r,l) => (regmap r,l)) + val regmapUses = map regmap + fun simplifyCopy(instr, dst, src) = + let fun loop([], [], dst', src') = (dst', src') + | loop((d,l)::dst, s::src, dst', src') = + let val d = regmap d and s = regmap s + in if d = s then loop(dst, src, dst', src') + else loop(dst, src, (d,l)::dst', s::src') + end + | loop _ = error "simplifyCopy" + val (dst, src) = loop(dst, src, [], []) + + (* add the copy temporary! *) + val dst = case dst of + [] => dst + | _ => case InsnProps.moveTmpR instr of + SOME r => (regmap r,~1)::dst + | _ => dst + in (dst, src) + end + + (* Edge constructors *) + (* memory *) + fun m_flow(m,l) = DDG.EDGE{l=l, r=m, d=DDG.MEM_FLOW} + val m_anti = DDG.EDGE{l= ~1, r= ~1, d=DDG.MEM_ANTI} + val m_output = DDG.EDGE{l=0, r= ~1, d=DDG.MEM_OUTPUT} + (* register *) + fun flow(r,l) = DDG.EDGE{l=l, r=r, d=DDG.FLOW} + val output = DDG.EDGE{l=0, r= ~1, d=DDG.OUTPUT} + val anti = DDG.EDGE{l= ~1, r= ~1, d=DDG.ANTI} + (* control dependence *) + fun c_flow(r,l) = DDG.EDGE{l=l, r=r, d=DDG.CTRL} + val c_dep = DDG.EDGE{l= ~1, r= ~1, d=DDG.CTRL} + val c_output = DDG.EDGE{l=0, r= ~1, d=DDG.CTRL} + val c_anti = DDG.EDGE{l= ~1, r= ~1, d=DDG.CTRL_ANTI} + + (* How to make a new edge *) + val newEdge = #add_edge ddg + (* val newEdge = fn (i,j,e) => (print(i2s i^"->"^i2s j^" "^DDG.edgeToString e^"\n"); newEdge(i,j,e)) handle e => raise e *) + + (* A table of definitions and uses indexed by block *) + val defUseTbl = HA.array'(13, fn _ => raise BuildDDG) + + (* Create nodes for block b *) + fun createNodes(id, b, [], ops) = (id, ops) + | createNodes(id, b, instr::instrs, ops) = + let val (d, u) = defUse instr + fun newNode(defs, uses) = + let val node = DDG.NODE{b=b,instr=instr,defs=defs,uses=uses} + in #add_node ddg (id, node); + createNodes(id+1, b, instrs, (id, node)::ops) + end + in case InsnProps.instrKind instr of + InsnProps.IK_COPY => + (case simplifyCopy(instr, d, u) of + ([], []) => createNodes(id, b, instrs, ops) + | (d, u) => newNode(d, u) + ) + | _ => newNode(regmapDefs d, regmapUses u) + end + + (* Scan one block; ops are in forward order *) + fun scanBlock{ops,liveIn=(liveIn,_),defTbl,useTbl} = + let fun addOutputAndAnti j (r,_) = + (app (fn i => newEdge(i,j,anti)) (HA.sub(useTbl,r)); + app (fn (i,e) => newEdge(i,j,output)) (HA.sub(defTbl,r)) + ) + + fun addFlow j r = + app (fn (i,e) => newEdge(i,j,e)) (HA.sub(defTbl,r)) + + (* Update def/use *) + fun addDef i (r,l) = + if isZero r then () + else + (HA.update(defTbl,r,[(i,flow(r,l))]); HA.update(useTbl,r,[])) + + fun addUse i r = + if isZero r then () + else HA.update(useTbl,r,i::HA.sub(useTbl,r)) + + fun scan [] = () + | scan((i,DDG.NODE{instr, defs, uses,...})::rest) = + let val rtl = RTLProps.rtl instr + in if RTL.can'tMoveUp rtl then + newEdge(liveIn, i, c_dep) + else (); + app (addOutputAndAnti i) defs; + app (addFlow i) uses; + (* update defs/uses *) + app (addUse i) uses; + app (addDef i) defs; + scan rest + end + in scan ops + end + + + val blockId = ref 0 + val nodeId = ref 0 + val blockMap = A.array(#order cfg (), 0) + val liveInMap = IntHashTable.mkTable(13, Nothing) + val liveOutMap = IntHashTable.mkTable(13, Nothing) + val specialMap = IntHashTable.mkTable(32, Nothing) + val addSpecial = IntHashTable.insert specialMap + val isSpecial = IntHashTable.find specialMap + val isSpecial = fn b => case isSpecial b of SOME _ => true + | NONE => false + + (* Process a basic block in topological order of the region: + * 1. create all the nodes in the DDG + * 2. add the edges + *) + fun processBlock(b,b' as CFG.BLOCK{insns,...}) = + let val bid = !blockId (* block id *) + val _ = A.update(blockMap, bid, b) + val _ = A.update(blockIdTbl, b, bid) + val _ = blockId := bid + 1 + + fun createNode(instr, defs, uses) = + let val node = (!nodeId, + DDG.NODE{instr=instr,b=bid,defs=defs,uses=uses}) + in nodeId := !nodeId + 1; + #add_node ddg node; + node + end + + (* Create the nodes *) + val (newNodeId, ops) = createNodes(!nodeId, bid, !insns, []) + val _ = nodeId := newNodeId + + val revAppend = List.revAppend + + val defs = HA.array(13, []) + val uses = HA.array(13, []) + + (* edge Y->X is an internal region edge + * merge definition and uses from Y => X + *) + fun mergeDefUse(Y,X,_) = + let val {defTbl, useTbl} = HA.sub(defUseTbl, Y) + in HA.appi (fn (r,es) => + HA.update(defs, r, revAppend(es, HA.sub(defs, r)))) + (defTbl, 0, NONE); + HA.appi (fn (r,is) => + HA.update(uses, r, revAppend(is, HA.sub(uses, r)))) + (useTbl, 0, NONE) + end + + fun addCtrlDepEdge(i, j) = newEdge(i,j,c_dep) + + (* Add a live-in node for a block that summarizes the + * values that are coming live-in from side-exits + *) + fun addLiveIn X = + let val entry_edges = #entry_edges cfg X + val liveIn = + SL.uniq(foldr (fn ((Y,_,_),S) => + let val CFG.BLOCK{annotations,...} = #node_info cfg Y + in case #get DDG.LIVENESS (!annotations) of + SOME{liveOut, ...} => revAppend(liveOut,S) + | NONE => S + end) [] entry_edges) + val liveInNode as (i,_) = + createNode(SchedProps.source, + map (fn r => (r,~1)) liveIn, []) + val _ = IntHashTable.insert liveInMap (bid, liveInNode) + val _ = addSpecial(i, true) + fun addOutputAndAnti j r = + (app (fn i => if isSpecial j then () + else newEdge(i,j,anti)) (HA.sub(uses,r)); + app (fn (i,e) => + if isSpecial i then () + else newEdge(i,j,output)) (HA.sub(defs,r)) + ) + in app (addOutputAndAnti i) liveIn; + app (fn r => HA.update(defs, r, + (i,DDG.EDGE{l= ~1,r=r,d=DDG.LIVEIN})::HA.sub(defs, r))) + liveIn; + liveInNode + end + + val _ = app mergeDefUse (#in_edges cfg b) + val liveInNode = addLiveIn b + + (* Add a live-out node for a block that summarizes the + * values that are going live-out from side-exits + *) + fun addLiveOut X = + (case #exit_edges cfg X of + exit_edges => + let fun createLiveOutNode(liveOut) = + let val node as (i, _) = + createNode(SchedProps.sink, [], liveOut) + in IntHashTable.insert liveOutMap (bid, node); + addSpecial(i, true); + node + end + val liveOut = + if List.exists + (fn (_,_,CFG.EDGE{k,...}) => k = CFG.EXIT) + exit_edges + then + let val CFG.BLOCK{annotations,...} = #node_info cfg X + in case #get DDG.LIVENESS (!annotations) of + SOME{liveOut, ...} => liveOut + | NONE => error "missing live out" + end + else + SL.uniq(foldr (fn ((_,Y,_),S) => + let val CFG.BLOCK{annotations,...} = #node_info cfg Y + in case #get DDG.LIVENESS (!annotations) of + SOME{liveIn, ...} => revAppend(liveIn,S) + | NONE => S + end) [] exit_edges) + + val liveOutNode as (i,_) = + case !insns of + [] => createLiveOutNode(liveOut) + | jmp::_ => + case InsnProps.instrKind jmp of + InsnProps.IK_JUMP => + (* add a control dependence edge to the liveIn *) + let val jmpNode as (j,_) = List.last ops + in addCtrlDepEdge(#1 liveInNode, j); + jmpNode + end + | _ => createLiveOutNode(liveOut) + fun addUse i r = + if isZero r then () + else HA.update(uses,r,i::HA.sub(uses,r)) + fun addLiveOut j r = + app (fn (i,DDG.EDGE{l,r,...}) => + newEdge(i,j,DDG.EDGE{l=l,r=r,d=DDG.LIVEOUT})) + (HA.sub(defs,r)) + + in app (addLiveOut i) liveOut; + addLiveOutCtrlDep i; + app (addUse i) liveOut + end + ) + + (* Add control dependences edges from all the instructions + * to the live-out node + *) + and addLiveOutCtrlDep(j) = + app (fn node as (i,_) => + if i = j then () else addCtrlDepEdge(i,j) + ) ops + + val _ = scanBlock{ops=ops, liveIn=liveInNode, + defTbl=defs, useTbl=uses}; + + in addLiveOut b; + HA.update(defUseTbl, b, {defTbl=defs, useTbl=uses}) + end + + (* Build the entire dag *) + fun buildDag() = + let val allNodes = #nodes cfg () (* must be in topological order! *) + in app processBlock allNodes + end + + in buildDag(); + globalInfo := + SOME{blockMap=blockMap, liveInMap=liveInMap, liveOutMap=liveOutMap}; + DDG + end +end diff --git a/MLRISC/scheduling/buildLocalDDG.sig b/MLRISC/scheduling/buildLocalDDG.sig new file mode 100644 index 0000000..c89962d --- /dev/null +++ b/MLRISC/scheduling/buildLocalDDG.sig @@ -0,0 +1,16 @@ +signature BASIC_BLOCK_SCHEDULER_DDG_BUILDER = +sig + + structure I : INSTRUCTIONS + structure C : CELLS + structure DDG : SCHEDULER_DDG + sharing I = DDG.I + sharing C = I.C + + (* instructions in reverse order *) + type ddg = (I.instruction,DDG.latency) DDG.ddg + val buildDDG : { cpu_info : DDG.SchedProps.cpu_info, + ddg : ddg + } -> I.instruction list -> unit + +end diff --git a/MLRISC/scheduling/buildLocalDDG.sml b/MLRISC/scheduling/buildLocalDDG.sml new file mode 100644 index 0000000..7df351a --- /dev/null +++ b/MLRISC/scheduling/buildLocalDDG.sml @@ -0,0 +1,96 @@ +(* + * Build a DDG from a basic block + *) +functor BasicBlockSchedulerDDGBuilder + (structure DDG : SCHEDULER_DDG + structure InsnProps : INSN_PROPERTIES + sharing DDG.I = InsnProps.I + ) : BASIC_BLOCK_SCHEDULER_DDG_BUILDER = +struct + + structure DDG = DDG + structure I = DDG.I + structure C = I.C + structure SchedProps = DDG.SchedProps + structure H = C.ColorTable + structure G = Graph + + type architecture = string + type ddg = (I.instruction,DDG.latency) DDG.ddg + + fun error msg = MLRiscErrorMsg.error("BasicBlockSchedulerDDGBuilder",msg) + + val COPY_LATENCY = 0 + + exception NotThere + + (* + * Build a DAG from a list of instructions (in reverse order) + * This is just a simple def/use analysis. + *) + fun buildDDG{cpu_info,ddg=G.GRAPH ddg} = + let val SchedProps.CPU_INFO{defUse,...} = cpu_info + fun buildDAG insns = + let val defMap = H.mkTable(31,NotThere) + val useMap = H.mkTable(31,NotThere) + val findUse = H.find useMap + val findDef = H.find defMap + val rmvUse = H.remove useMap + val rmvDef = H.remove defMap + fun lookupUse r = case findUse r of NONE => [] | SOME x => x + fun lookupDef r = case findDef r of NONE => [] | SOME x => x + val insertUse = H.insert useMap + val insertDef = H.insert defMap + + fun flowDep i (r,latency) = + app (fn j => #add_edge ddg (i,j,latency)) (lookupUse r) + fun outputDep i (r,_) = + app (fn j => #add_edge ddg (i,j,~1)) (lookupDef r) + fun antiDep i r = + app (fn j => #add_edge ddg (i,j,~1)) (lookupDef r) + fun ctrlDep i j = #add_edge ddg (i,j,~1) + fun addDefs n (r,l) = (rmvUse r; insertDef(r, [n])) + fun addUses n r = insertUse(r,n::lookupUse r) + + fun copyDstSrc i' = + let val (dst, src) = InsnProps.moveDstSrc i' + fun coalesce(d::ds, s::ss, dst, src) = + if C.sameColor(d,s) then coalesce(ds, ss, dst, src) + else coalesce(ds, ss, (d,COPY_LATENCY)::dst, s::src) + | coalesce([], [], dst, src) = (dst, src) + | coalesce _ = error "coalesce" + + val (dst, src) = coalesce(dst, src, [], []) + val dst = case InsnProps.moveTmpR i' of + NONE => dst + | SOME tmp => (tmp,~1)::dst + in (dst, src) end + + fun scan(i,[],branches,succs) = () + | scan(i,i'::insns,branches,succs) = + let val _ = #add_node ddg (i,i') (* create node *) + val kind = InsnProps.instrKind i' + val (defs,uses) = + case kind of + InsnProps.IK_COPY => copyDstSrc i' + | _ => defUse i' + val _ = #add_node ddg (i,i') + val _ = app (flowDep i) defs + val _ = app (outputDep i) defs + val _ = app (antiDep i) uses + val _ = app (ctrlDep i) branches + val _ = app (addDefs i) defs + val _ = app (addUses i) uses + val branches = + case kind of + InsnProps.IK_JUMP => [i] + | InsnProps.IK_CALL => (app (ctrlDep i) succs; [i]) + | _ => branches + in scan(i+1,insns,branches,i::succs) + end + in scan(0,insns,[],[]) + end + in buildDAG + end + +end diff --git a/MLRISC/scheduling/clusterBBScheduler.sml b/MLRISC/scheduling/clusterBBScheduler.sml new file mode 100644 index 0000000..07dccf0 --- /dev/null +++ b/MLRISC/scheduling/clusterBBScheduler.sml @@ -0,0 +1,30 @@ +(* + * Simple minded basic block scheduling + *) +functor ClusterBasicBlockScheduler + (structure Flowgraph : FLOWGRAPH + structure BBSched : BASIC_BLOCK_SCHEDULER + sharing Flowgraph.I = BBSched.I + val cpu : string ref + ) : CLUSTER_OPTIMIZATION = +struct + + structure F = Flowgraph + type flowgraph = F.cluster + + val name = "Basic Block Scheduling" + + fun run(cluster as F.CLUSTER{blocks, annotations, ...}) = + if #contains MLRiscAnnotations.NO_OPTIMIZATION (!annotations) + then cluster + else + let val schedule = BBSched.schedule {cpu= !cpu} + fun sched(F.BBLOCK{annotations, insns, ...}) = + if #contains MLRiscAnnotations.NO_OPTIMIZATION (!annotations) + then () + else insns := schedule(! insns) + | sched _ = () + in app sched blocks; + cluster + end +end diff --git a/MLRISC/scheduling/compensation.sig b/MLRISC/scheduling/compensation.sig new file mode 100644 index 0000000..a4bf89c --- /dev/null +++ b/MLRISC/scheduling/compensation.sig @@ -0,0 +1,13 @@ +(* + * This is the signature for inserting compensation code. + * + * -- Allen + *) + +signature SCHEDULING_COMPENSATION = +sig + + structure CFG : CONTROL_FLOW_GRAPH + structure DDG : SCHEDULER_DDG + +end diff --git a/MLRISC/scheduling/compensation.sml b/MLRISC/scheduling/compensation.sml new file mode 100644 index 0000000..c5ab841 --- /dev/null +++ b/MLRISC/scheduling/compensation.sml @@ -0,0 +1,14 @@ +(* + * Module for inserting compensation code. + * + * -- Allen + *) + +functor SchedulingCompensation + (structure CFG : CONTROL_FLOW_GRAPH + structure DDG : SCHEDULER_DAG + sharing CFG.I = DDG.I + ) : SCHEDULING_COMPENSATION = +struct + +end diff --git a/MLRISC/scheduling/dagScheduling.sml b/MLRISC/scheduling/dagScheduling.sml new file mode 100644 index 0000000..0eb9771 --- /dev/null +++ b/MLRISC/scheduling/dagScheduling.sml @@ -0,0 +1,47 @@ +(* + * A region based scheduler. + * + * -- Allen + *) + +functor DAGScheduling + (structure ListScheduler : LIST_SCHEDULER + structure DDGBuilder : SCHEDULER_DDG_BUILDER + structure Ranks : SCHEDULING_RANKS + where type edge = DDGBuilder.DDG.edge + structure Viewer : GLOBAL_SCHEDULER_DDG_VIEWER + sharing DDGBuilder.DDG = ListScheduler.DDG = Ranks.DDG + sharing DDGBuilder.CFG = ListScheduler.CFG + sharing Viewer.IR = ListScheduler.IR + sharing Viewer.DDG = DDGBuilder.DDG + ) : GLOBAL_SCHEDULING_ALGORITHM = +struct + structure IR = ListScheduler.IR + structure CFG = ListScheduler.CFG + structure DDG = ListScheduler.DDG + structure G = Graph + + val i2s = Int.toString + + val view_IR = MLRiscControl.getFlag "view-IR" + + fun schedule cpu_info { ir, region, numberOfInstructions, blockIdTbl } = + let val DDG as G.GRAPH ddg = + DDGBuilder.buildDDG {cpu_info=cpu_info, + cfg=region, blockIdTbl=blockIdTbl, + numberOfInstructions=numberOfInstructions} + val _ = print("V(ddg)="^i2s(#order ddg ())^ + " E(ddg)="^i2s(#size ddg ())^"\n") + val _ = if !view_IR then Viewer.view ir DDG else () + val ranking = Ranks.rank DDG + in ListScheduler.listScheduler + {cpu_info=cpu_info, + blockIdTbl=blockIdTbl, + cfg=ir, + region=region, + ddg=DDG, + ranking=ranking + } + end + +end diff --git a/MLRISC/scheduling/globalCP.sml b/MLRISC/scheduling/globalCP.sml new file mode 100644 index 0000000..631fe2c --- /dev/null +++ b/MLRISC/scheduling/globalCP.sml @@ -0,0 +1,31 @@ +functor GlobalCriticalPath + (DDG : SCHEDULER_DDG) : SCHEDULING_RANKS where type edge = DDG.edge = +struct + + structure DDG = DDG + structure I = DDG.I + structure G = Graph + structure A = Array + + type edge = DDG.edge + + fun rank(DDG as G.GRAPH ddg) = + let val N = #capacity ddg () + val len = A.array(N,0) + val children = A.tabulate(N,fn i => length(#out_edges ddg i)) + fun process i = + let fun g((i,j,DDG.EDGE{l,...})::es,n) = + g(es,Int.max(A.sub(len,j) + l + 1,n)) + | g([],n) = n + in A.update(len,i,g(#out_edges ddg i,0)) + end + fun order((i,_),(j,_)) = + case Int.compare(A.sub(len,i),A.sub(len,j)) of + EQUAL => A.sub(children,i) > A.sub(children,j) + | LESS => false + | GREATER => true + in app process (rev (GraphTopsort.topsort DDG (map #1 (#nodes ddg ())))); + order + end + +end diff --git a/MLRISC/scheduling/globalDDGViewer.sig b/MLRISC/scheduling/globalDDGViewer.sig new file mode 100644 index 0000000..7297ace --- /dev/null +++ b/MLRISC/scheduling/globalDDGViewer.sig @@ -0,0 +1,17 @@ +(* + * View a scheduler DDG constructed for basic block scheduling + * + * -- Allen + *) +signature GLOBAL_SCHEDULER_DDG_VIEWER = +sig + + structure IR : MLRISC_IR + structure DDG : SCHEDULER_DDG + structure I : INSTRUCTIONS + sharing DDG.I = I + + val view : IR.IR -> (DDG.node,DDG.edge) DDG.ddg -> unit + +end + diff --git a/MLRISC/scheduling/globalDDGViewer.sml b/MLRISC/scheduling/globalDDGViewer.sml new file mode 100644 index 0000000..7d322e9 --- /dev/null +++ b/MLRISC/scheduling/globalDDGViewer.sml @@ -0,0 +1,39 @@ +(* + * View a scheduler DDG constructed for basic block scheduling + * + * -- Allen + *) +functor GlobalSchedulerDDGViewer + (structure GraphViewer : GRAPH_VIEWER + structure IR : MLRISC_IR + structure DDG : SCHEDULER_DDG + structure FormatInsn : FORMAT_INSTRUCTION + sharing IR.I = FormatInsn.I = DDG.I + ) : GLOBAL_SCHEDULER_DDG_VIEWER = +struct + + structure IR = IR + structure DDG = DDG + structure I = DDG.I + structure L = GraphLayout + + val edgeColor = L.COLOR "red" + + val i2s = Int.toString + + + fun view IR ddg = + let val regmap = IR.CFG.regmap IR + val toString = FormatInsn.toString [] (I.C.lookup regmap) + in GraphViewer.view + (GraphLayout.makeLayout + {graph = fn _ => [], + node = fn (_,DDG.NODE{instr,b,...}) => + [L.LABEL("["^i2s b^"] "^toString instr)], + edge = fn (_,_,e) => [L.LABEL(DDG.edgeToString e), edgeColor] + } + ddg + ) + end + +end diff --git a/MLRISC/scheduling/globalScheduler.sml b/MLRISC/scheduling/globalScheduler.sml new file mode 100644 index 0000000..f7d7cf3 --- /dev/null +++ b/MLRISC/scheduling/globalScheduler.sml @@ -0,0 +1,116 @@ +(* + * A top level functor and ties all the modules for global scheduling + * together. + *) + +functor GlobalScheduler + (structure IR : MLRISC_IR + structure DDG : SCHEDULER_DDG + structure InsnProps : INSN_PROPERTIES + structure RTLProps : RTL_PROPERTIES + structure Viewer : GLOBAL_SCHEDULER_DDG_VIEWER + structure FormatInsn : FORMAT_INSTRUCTION + (* structure Rewrite : REWRITE_INSTRUCTIONS *) + sharing InsnProps.I = IR.I = DDG.I = (* = Rewrite.I *) + FormatInsn.I = RTLProps.I + sharing Viewer.IR = IR + sharing Viewer.DDG = DDG + ) : MLRISC_IR_OPTIMIZATION = +struct + structure IR = IR + structure CFG = IR.CFG + structure I = IR.I + structure SchedProps = DDG.SchedProps + structure SL = SortedList + structure G = Graph + structure A = Array + + structure RegionBuilder = RegionBuilder(IR) + + structure DDGBuilder = + SchedulerDDGBuilder + (structure DDG = DDG + structure CFG = CFG + structure InsnProps = InsnProps + structure RTLProps = RTLProps + ) + + structure ListScheduler = + ListScheduler + (structure IR = IR + structure DDG = DDG + structure InsnProps = InsnProps + structure FormatInsn = FormatInsn + (* structure Rewrite = Rewrite *) + ) + + structure GlobalCP = GlobalCriticalPath(DDG) + + structure DAGScheduling = + DAGScheduling + (structure ListScheduler = ListScheduler + structure DDGBuilder = DDGBuilder + structure Viewer = Viewer + structure Ranks = GlobalCP + ) + + structure Liveness = LivenessAnalysis(CFG) + + type flowgraph = IR.IR + + val name = "global scheduling" + + val cpu = MLRiscControl.getString "cpu" + val _ = cpu := "default" + + fun computeLiveness(cpu_info,CFG as G.GRAPH cfg) = + let val SchedProps.CPU_INFO{defUse=insnDefUse, ...} = cpu_info + val regmap = I.C.lookup(CFG.regmap CFG) + fun defUse(_,CFG.BLOCK{insns, ...}) = + let fun scan([], def, use) = (def, use) + | scan(i::is, def, use) = + let val (d,u) = insnDefUse i + val u = SL.uniq(map regmap u) + val u' = SL.difference(u, def) + val use' = SL.merge(u', use) + val d = SL.uniq(map (fn (r,l) => regmap r) d) + val d' = SL.difference(d, use') + val def' = SL.merge(d', def) + in scan(is, def', use') + end + in scan(rev(!insns), [], []) + end + + fun liveOut(_, block) = + map regmap (I.C.CellSet.toCellList (CFG.liveOut block)) + + fun result{block=(_,CFG.BLOCK{annotations, ...}), liveIn, liveOut} = + annotations := + #set DDG.LIVENESS ({liveIn=liveIn, liveOut=liveOut}, !annotations) + + in Liveness.liveness{cfg=CFG,defUse=defUse,liveOut=liveOut, result=result} + end + + fun run IR = + let val maxBlocks=100 + val maxInstrs=1000 + val minFreqRatio=0.01 + val traceOnly=false + val sideEntries=true + val internalBackEdges=false + val insertDummyBlocks=false + val params = {maxBlocks = maxBlocks, + maxInstrs = maxInstrs, + traceOnly = traceOnly, + minFreqRatio = minFreqRatio, + sideEntries = sideEntries, + internalBackEdges = internalBackEdges, + insertDummyBlocks = insertDummyBlocks + } + val cpu_info = SchedProps.info{backward=false,cpu=SchedProps.cpu(!cpu)} + in computeLiveness(cpu_info,IR); + RegionBuilder.regionBuilder params IR (DAGScheduling.schedule cpu_info); + IR + end + +end diff --git a/MLRISC/scheduling/globalSchedulingAlgo.sig b/MLRISC/scheduling/globalSchedulingAlgo.sig new file mode 100644 index 0000000..de4768c --- /dev/null +++ b/MLRISC/scheduling/globalSchedulingAlgo.sig @@ -0,0 +1,23 @@ +(* + * Various global scheduling algorithm have this signature + * + * -- Allen + *) + +signature GLOBAL_SCHEDULING_ALGORITHM = +sig + + structure IR : MLRISC_IR + structure CFG : CONTROL_FLOW_GRAPH + structure DDG : SCHEDULER_DDG + sharing DDG.I = CFG.I + sharing IR.CFG = CFG + + val schedule : DDG.SchedProps.cpu_info (* architecture *) -> + { ir : IR.IR, (* The overall IR *) + region : IR.cfg, (* Subregion for scheduling *) + numberOfInstructions : int, + blockIdTbl : int Array.array + } -> unit + +end diff --git a/MLRISC/scheduling/listScheduler.sig b/MLRISC/scheduling/listScheduler.sig new file mode 100644 index 0000000..5f3986a --- /dev/null +++ b/MLRISC/scheduling/listScheduler.sig @@ -0,0 +1,26 @@ +(* + * A customizable list scheduler that works on a region at a time. + * A region is a subset of the control flow graph. + * + * -- Allen + *) +signature LIST_SCHEDULER = +sig + + structure I : INSTRUCTIONS + structure IR : MLRISC_IR + structure DDG : SCHEDULER_DDG + structure CFG : CONTROL_FLOW_GRAPH + sharing DDG.I = IR.I = I + sharing IR.CFG = CFG + + val listScheduler : + { cpu_info : DDG.SchedProps.cpu_info, + blockIdTbl : int Array.array, + ranking : DDG.node Graph.node * DDG.node Graph.node -> bool, + cfg : CFG.cfg, (* the entire program! *) + region : CFG.cfg, (* current region *) + ddg : (DDG.node, DDG.edge) DDG.ddg + } -> unit + +end diff --git a/MLRISC/scheduling/listScheduler.sml b/MLRISC/scheduling/listScheduler.sml new file mode 100644 index 0000000..58269b8 --- /dev/null +++ b/MLRISC/scheduling/listScheduler.sml @@ -0,0 +1,765 @@ +(* Disclaimer... + * ============= + * + * I've written and re-written many global schedulers thru the years. + * It is always hard to get right. Hopefully this is the last time I have + * to write/rewrite one for a long while... + * + * A parameterizable list scheduler. + * ================================ + * This list scheduler does a few things: + * 1. Works on a region at a time instead of one basic block + * 2. Can perform replication + * 3. Can perform register renaming to get around of anti-/output- dependences + * 4. Recognizes the distinction between initializations (which can be + * speculation) versus stores (which cannot be). + * + * Some notes on how the list scheduling algorithm work: + * 1. (Side)-entries and (side)-exits are cfg edges that come into and out of + * the current region. + * 2. The region to be scheduled has to be acyclic. Cyclic edges are cut + * arbitrarily (by the region forming combinator.) + * 3. Every block that has side-entries has an "live-in" node that summaries + * all the values that are defined coming in the side-entires. Similarly, + * for all blocks with side-exits we have "live-out" nodes. + * 4. During list scheduling, multiple blocks may be "open" at the same time. + * Instructions can only be placed within open blocks. + * 5. Once every instruction (that appears in the block originally) + * has been scheduled, the block is then "closed". + * 6. A new block is opened if all its predecessors is closed. + * 7. "Ready" instructions, i.e. instructions with all its predecessors + * schedules are put onto a priority list. The priority list is ranked + * by the execution frequency of the instruction. + * 8. At each step, an instruction i is chosen from the priority list to + * be scheduled. This instruction has to be placed at "all" open blocks + * that reaches the block where instruction originates. This may involve + * replicating the instruction. For this transformation to be legal, + * structural and profitability checks have to be performed. + * + * a. Structural check determines whether it is semantics preserving + * to put this instruction into the set of open blocks. + * b. Profitability check determines whether it is profitable, i.e. is + * it okay to put this instruction to these blocks or should we delay + * it. + * + * Instructions that fail these criteria are moved into a pending queue. + * 9. Instructions from the pending queue are released back into the ready + * queue whenever the set of open blocks change. + * 10. BUT ... this is not the entire story. When scheduling dags, the + * dependency graph initially built is insufficient to summarize all + * dependences. For that to work, incremental liveness computation + * must also be performed. This is how it works: + * + * a. Each open block keeps track of what registers are live at the + * current time. Liveness can be inferred via the dependence dag + * + * -- Allen (leunga@cs.nyu.edu) 6/1/00 + *) +functor ListScheduler + (structure DDG : SCHEDULER_DDG + structure IR : MLRISC_IR + structure InsnProps : INSN_PROPERTIES + structure FormatInsn : FORMAT_INSTRUCTION + (* structure Rewrite : REWRITE_INSTRUCTIONS *) + sharing DDG.I = InsnProps.I = IR.I = (* = Rewrite.I *) + FormatInsn.I + ) : LIST_SCHEDULER = +struct + + structure IR = IR + structure CFG = IR.CFG + structure DDG = DDG + structure I = DDG.I + structure SchedProps = DDG.SchedProps + structure G = Graph + structure A = Array + structure DA = DynArray + structure W8A = Word8Array + structure PQ = PriorityQueue + + fun error msg = MLRiscErrorMsg.error("ListScheduler",msg) + + val debug = true + val verbose = true + val safetyCheck = true + + val i2s = Int.toString + + exception NotOpened + exception NotLive + + val dummyJump = (~1,DDG.NODE{instr=InsnProps.nop(),defs=[],uses=[],b= ~1}) + + (* data structure to hold info about a block *) + datatype openBlock = + OPEN_BLOCK of + {bid : int, (* block id *) + reachables : W8A.array, (* reachable set *) + rt : SchedProps.reservation_table, (* reservation table *) + sigma : I.instruction list DA.array, + liveSet : DDG.edge G.edge list IntHashTable.hash_table, + jumpScheduled : bool ref, + jumpTime : int ref, + jumpNode : DDG.node G.node ref + } + + val profitabilityRatio = 0.5 + + fun listScheduler{ranking, cpu_info, blockIdTbl, cfg, region, ddg} = + let + (* Extract architecture info from the data base *) + val SchedProps.CPU_INFO{newTable, findSlot, pipeline, insert, ...} = + cpu_info + + (* The data structures: + * succ, pred --- adjacency lists + * blockMap --- mapping from internal block id -> real block id + * liveInMap --- mapping from internal block id -> live in node + * liveOutMap --- mapping from internal block id -> live out node + * issueTimeTbl --- node id --> its issue time + * inDegsTbl --- node id --> its current in-degree + * insnCountTbl --- internal block id --> number of unscheduled instrs + *) + val DDG as G.GRAPH ddg = ddg + val CFG as G.GRAPH cfg = cfg + val Region as G.GRAPH region = region + val {succ, pred, ...} = DDG.internalInfo DDG + val SOME{blockMap, liveInMap, liveOutMap, ...} = !(DDG.globalInfo DDG) + val N = #capacity ddg () (* number of instructions *) + val M = #order region () (* number of blocks in the region *) + + (* Internal tables indexed by instruction id *) + val issueTimeTbl = A.array(N,~1) (* issue times of instructions *) + val inDegsTbl = A.array(N,0) (* in-degree of a node *) + + (* Internal tables indexed by block id *) + val insnCountTbl = A.array(M, 0) (* number of instructions per block *) + val freqTbl = A.array(M, 0) (* execution frequency of blocks *) + val predCountTbl = A.array(M, 0) (* in degree of blocks *) + val rtTbl = A.array(M,newTable 0) + val startTimeTbl = A.array(M, 0) + val maxTimeTbl = A.array(M, 0) + val isLegalTbl = A.array(M, 0) (* is it legal to schedule + block id at this time *) + val isProfitableTbl = A.array(M, 0) + val profitabilityTbl = A.array(M, 0.0) (* priority of block *) + val liveSetTbl = A.array(M, IntHashTable.mkTable(0, NotLive)) + + val stampCounter = ref 0 + fun newStamp() = + let val st = !stampCounter + 1 + in stampCounter := st; st end + + (* Linearize the schedule *) + fun linearize sigma = + DA.foldl (fn (instrs,l) => instrs @ l) [] sigma + + (* It is okay to move an instruction from block id *) + fun isLegalMove id = A.sub(isLegalTbl, id) = !stampCounter + fun isProfitableMove id = A.sub(isProfitableTbl, id) = !stampCounter + + val showInsn = FormatInsn.toString [] (I.C.lookup (CFG.regmap CFG)) + fun showOp(DDG.NODE{instr,b,...}) = + showInsn instr^" ["^i2s(A.sub(blockMap,b))^"]" + + fun isJump instr = + case InsnProps.instrKind instr of + InsnProps.IK_JUMP => true + | _ => false + + (* Real priority function *) + fun priorityFun(I as (i,DDG.NODE{b=b_i,...}), + J as (j,DDG.NODE{b=b_j,...})) = + let val p_i = A.sub(profitabilityTbl,b_i) + val p_j = A.sub(profitabilityTbl,b_j) + in case Real.compare(p_i,p_j) of + EQUAL => ranking(I,J) + | GREATER => true + | LESS => false + end + + + (* Initialization steps: + * 1. Initialize the frequency array + * 2. Count the number of predecessors of each block in the region + * 3. Count the number of non-special instructions + * 4. Initialize the pending queue + *) + fun initialize() = + let (* Initialize the frequencies *) + val _ = + A.appi (fn (id,b) => + let val CFG.BLOCK{freq, ...} = #node_info region b + in A.update(freqTbl, id, !freq); + A.update(predCountTbl, id, length(#in_edges region b)) + end) (blockMap, 0, NONE) + val pendingNodes = + foldr + (fn ((i,i'),pending) => + let val inEdges = #in_edges ddg i + val n = length inEdges + val DDG.NODE{b, instr, ...} = i' + fun addToPending() = + if n = 0 + then (i, i')::pending + else (A.update(inDegsTbl, i, n); pending) + in case InsnProps.instrKind instr of + InsnProps.IK_SINK => pending + | InsnProps.IK_SOURCE => pending + | _ => + (A.update(insnCountTbl, b, A.sub(insnCountTbl, b) + 1); + addToPending() + ) + end + ) [] (#nodes ddg ()) + in pendingNodes + end + + (* Queues *) + val readyQueue = PQ.create priorityFun + val enqueue = PQ.insert readyQueue + val pending = ref(initialize()) + (* + val enqueue = if debug then + (fn (i,i') => (print("QUEUEING "^showOp i'^"\n"); enqueue (i,i'))) + else enqueue + *) + + (* === Incremental liveness computation routines === *) + + (* + * Add an instruction into the current live set of block bid. + *) + fun addInstrToLiveSet(i, i' as DDG.NODE{defs, uses, ...}, liveSet) = + let val lookupLiveSet = IntHashTable.find liveSet + val lookupLiveSet = fn b => case lookupLiveSet b of SOME x => x + | NONE => [] + val updateLiveSet = IntHashTable.insert liveSet + + fun rmvUse r = + let fun loop([], es') = es' + | loop((e as (j,k,_))::es, es') = + if i = k then loop(es, es') else loop(es, e::es') + val es = lookupLiveSet r + val es = loop(es, []) + in updateLiveSet(r, es) end + + fun rmvUses [] = () + | rmvUses(r::uses) = (rmvUse r; rmvUses uses) + + fun addDef(r, e) = updateLiveSet(r, e::lookupLiveSet r) + + fun addDefs [] = () + | addDefs((edge as (i,j,e as DDG.EDGE{r,d,...}))::es) = + ((* print(i2s i^" -> "^i2s j^" "^DDG.edgeToString e^"\n"); *) + if r >= 0 then addDef(r, edge) else (); + addDefs es + ) + + in rmvUses uses; + addDefs (A.sub(succ, i)) + end + + (* + * Check whether it is a legal code motion to move an instruction i + * from block "from" to block "to". Instruction i must have no + * unscheduled predecessors at this point. + *) + fun isIllegalCodeMotion(i, i' as DDG.NODE{defs, ...}, liveSet) = + let (* Check whether instruction i defines a register r + * that is currently live. If so, the associated code motion is + * illegal (without renaming) + *) + val lookupLiveSet = IntHashTable.find liveSet + val lookupLiveSet = fn b => case lookupLiveSet b of SOME x => x + | NONE => [] + (* + * Add an output- dependence edge between two nodes + *) + fun addOutputDepEdge(i,j,r) = + (#add_edge ddg (i,j, DDG.EDGE{l= ~1, d=DDG.OUTPUT, r=r}); + A.update(inDegsTbl, j, A.sub(inDegsTbl, j) + 1) + ) + + fun isLiveReg r = + let fun loop [] = false + | loop((j,k:int,e)::es) = + if i = k then loop es else + (if debug then + print("BAD: "^i2s j^" -> "^i2s k^" "^ + DDG.edgeToString e^ + " "^showOp(#node_info ddg j)^" -> "^ + " "^showOp(#node_info ddg k)^"\n" + ) + else (); + true + ) + (* if i = k then i is the use of r so it doesn't count *) + in loop(lookupLiveSet r) + end + fun canKillLiveValues [] = false + | canKillLiveValues((r,_)::defs) = + isLiveReg r orelse canKillLiveValues defs + in canKillLiveValues defs + end + + (* Find out the time slot to insert the instruction j in + * reservation table rt (from block id) + *) + fun findScheduleSlot(bid, rt, p, j, j') = + let fun earliest([], t) = t + | earliest((i,j,e as DDG.EDGE{l,...})::es, t) = + let val t' = A.sub(issueTimeTbl,i) + val t'' = t' + l + 1 + in (* if debug then + print(i2s i^" -> "^i2s j^" "^DDG.edgeToString e^ + " t'="^i2s t'^" t''="^i2s t''^"\n") + else (); *) + earliest(es, Int.max(t, t'')) + end + val t_min = earliest(A.sub(pred, j), A.sub(startTimeTbl, bid)) + in findSlot(rt, t_min, p) + end + + (* Release an instruction when all its predecessors + * have been scheduled. Note that fake sink and source instructions + * must be treated specially and so we don't release them onto the queue. + *) + fun releaseInstr j = + let val j' as DDG.NODE{instr,b,...} = #node_info ddg j + in case InsnProps.instrKind instr of + InsnProps.IK_SOURCE => () + | _ => if isProfitableMove b then enqueue(j,j') + else pending := (j,j') :: !pending + end + + (* Release the successors of an instruction + * after it has been scheduled + *) + fun updateSucc(i) = + let fun loop [] = () + | loop((i,j,_)::es) = + let val n = A.sub(inDegsTbl, j) + in A.update(inDegsTbl, j, n-1); + if n = 1 then releaseInstr j else (); + loop es + end + in loop(A.sub(succ, i)) + end + + (* Release the live-in node for block id *) + fun releaseLiveIn(bid,liveSet) = + let val liveInNode as (j,j') = IntHashTable.lookup liveInMap bid + in if A.sub(issueTimeTbl, j) < 0 then + (addInstrToLiveSet(j,j',liveSet); + A.update(issueTimeTbl, j, 0); + updateSucc j; + if debug then print("LIVEIN "^showOp j'^"\n") else () + ) + else () + end handle _ => () (* no live-in node, so don't bother *) + + (* Release the live-out node for block id *) + fun releaseLiveOut(bid, liveSet) = + let val liveOutNode as (j,j' as DDG.NODE{instr,...}) = + IntHashTable.lookup liveOutMap bid + in case InsnProps.instrKind instr of + InsnProps.IK_SINK => + (addInstrToLiveSet(j,j',liveSet); + A.update(issueTimeTbl, j, 0); + updateSucc j; + if debug then print("LIVEOUT "^showOp j'^"\n") else () + ) + | _ => error("releaseLiveOut "^showOp j') + end handle _ => () (* no live-out node, so don't bother *) + + fun printOpenBlocks blocks = + "[ "^ + foldr (fn (OPEN_BLOCK{bid,...},l) => + i2s(A.sub(blockMap,bid))^" "^l) "" blocks + ^"]" + + (* Move legal pending nodes from the pending queue and the ready queue + * to the ready queue. + *) + fun moveLegalPendingToReady() = + let fun scan([], pending) = pending + | scan((node as (j,DDG.NODE{b,...}))::nodes, pending) = + if isProfitableMove b then + (enqueue node; scan(nodes, pending)) + else + scan(nodes, node::pending) + val waiting = List.revAppend(PQ.toList readyQueue, !pending) + in PQ.clear readyQueue; + pending := scan(waiting, []) + end + + + (* Given a set of openBlocks, compute the set of legal blocks + * and profitable blocks that can be scheduled at the current time. + * At this point, we also compute the profitability of moving + * an instruction from bid to the openBlockList. + * Move instructions from pending queue to the priority queue. + *) + fun updatePermittableCodeMotion openBlockList = + let val stamp = newStamp() + + (* What is the cost of moving an instruction from block source to + * the blocks in openBlockList? + *) + fun codeMotionCost(source) = + let fun loop([], C) = C + | loop(OPEN_BLOCK{reachables, bid=target, ...}::L, C) = + if W8A.sub(reachables, source) = 0w0 then loop(L, C) + else let val freq = A.sub(freqTbl, target) + in loop(L, C+freq) end + in loop(openBlockList, 0) + end + + (* Check whether it is profitable to move an instruction from + * block source. 1.0 means non-speculative. < 1.0 means + * speculative + *) + fun isProfitable(source) = + let val origCost = A.sub(freqTbl, source) + val moveCost = codeMotionCost(source) + val profitability = real origCost / real moveCost + in A.update(profitabilityTbl,source,profitability); + profitability >= profitabilityRatio + end + + fun markLegal([]) = () + | markLegal(bid::Xs) = + if A.sub(isLegalTbl, bid) = stamp then markLegal Xs else + (A.update(isLegalTbl, bid, stamp); + if debug then print(i2s(A.sub(blockMap,bid))) else (); + if isProfitable bid then + (if debug then print "+" else (); + A.update(isProfitableTbl, bid, stamp) + ) + else (); + if debug then print " " else (); + markLegal + (markSucc(#out_edges region (A.sub(blockMap, bid)), Xs)) + ) + and markSucc([], Xs) = Xs + | markSucc((_,Y,_)::es, Xs) = + if predAllLegal Y then markSucc(es, A.sub(blockIdTbl,Y)::Xs) + else markSucc(es, Xs) + + and predAllLegal X = + let fun loop [] = true + | loop((Y,_,_)::es) = + A.sub(isLegalTbl, A.sub(blockIdTbl, Y)) = stamp + andalso loop es + in (* IMPORTANT: prevent hoisting past side entries! *) + case #entry_edges region X of + [] => loop(#in_edges region X) + | _ => false + end + in if debug then print("LEGAL: ") else (); + markLegal(map (fn OPEN_BLOCK{bid,...} => bid) openBlockList); + if debug then print("\n") else (); + moveLegalPendingToReady(); + openBlockList + end + + (* Open a new block b. + * Mark all blocks that b reaches. + *) + fun openBlock(b, openBlockList) = + let val bid = A.sub(blockIdTbl, b) + in if A.sub(isLegalTbl, bid) < 0 then (* closed permenantly! *) + openBlockList + else openBlock'(bid, b, openBlockList) + end + + and openBlock'(bid, b, openBlockList) = + let val reachables = W8A.array(M,0w0) + fun markReachables b = + let val bid = A.sub(blockIdTbl, b) + in if W8A.sub(reachables, bid) = 0w0 then + (W8A.update(reachables, bid, 0w1); + app markReachables (#succ region b) + ) + else () + end + val _ = markReachables b + fun mergeIncomingBlocks() = + let val liveSet = IntHashTable.mkTable(32,NotLive) + val lookupLiveSet = IntHashTable.find liveSet + val lookupLiveSet = + fn b => case lookupLiveSet b of SOME x => x | NONE => [] + val addLiveSet = IntHashTable.insert liveSet + fun merge([], NONE) = (newTable 5, 0) + | merge([], SOME(_,t,rt)) = (rt, t+1) + | merge((Y,_,CFG.EDGE{w,...})::es,rt) = + let val Y_id = A.sub(blockIdTbl, Y) + val liveSet_Y = A.sub(liveSetTbl, Y_id) + val rt = + case rt of + NONE => SOME(!w, + A.sub(maxTimeTbl,Y_id), + A.sub(rtTbl,Y_id)) + | SOME(w',_,rt') => + if !w > w' then + SOME(!w, + A.sub(maxTimeTbl,Y_id), + A.sub(rtTbl,Y_id)) + else rt + in IntHashTable.appi (fn (r,es) => + addLiveSet(r, List.revAppend(es, lookupLiveSet r))) + liveSet_Y; + merge(es, rt) + end + val (rt, startTime) = merge (#in_edges region b, NONE) + in A.update(rtTbl, bid, rt); + A.update(startTimeTbl, bid, startTime); + A.update(liveSetTbl, bid, liveSet); + (liveSet, rt) + end + val _ = if debug then + print("OPENING "^i2s b^" "^printOpenBlocks openBlockList^ + "("^i2s(A.sub(insnCountTbl,bid))^" insns)\n") + else (); + val (liveSet,rt) = mergeIncomingBlocks() + (* release live-in anchor of block b *) + val _ = releaseLiveIn(bid, liveSet) + val openBlock = + OPEN_BLOCK{bid=bid, rt=rt, + reachables=reachables, + liveSet=liveSet, + jumpScheduled=ref false, + sigma=DA.array(5,[]), + jumpTime=ref 10000000, + jumpNode=ref dummyJump + } + val openBlockList = + updatePermittableCodeMotion(openBlock::openBlockList) + in if A.sub(insnCountTbl, bid) = 0 then + closeBlock(bid, openBlockList) + else + openBlockList + end + + (* Close a block *) + and closeBlock(bid, openBlockList) = + let fun rmv((x as OPEN_BLOCK{bid=bid',rt,jumpScheduled,jumpTime,liveSet, + jumpNode=ref(j,j'),sigma,...})::L, L') = + if bid = bid' then + let val b = A.sub(blockMap, bid) + val CFG.BLOCK{insns, ...} = #node_info region b + val instrs = linearize sigma + val instrs = if !jumpScheduled then + let val DDG.NODE{instr=jmp,...} = j' + in addInstrToLiveSet(j, j', liveSet); + jmp :: instrs + end + else instrs + val _ = insns := instrs; + (* release live-in anchor of block id if it hasn't already + been released *) + val _ = releaseLiveIn(bid, liveSet); + (* release live-out anchor of block id *) + val _ = releaseLiveOut(bid, liveSet); + val n = A.sub(insnCountTbl, bid) + in if n > 0 then + print("WARNING block "^i2s b^" has "^i2s n^ + " instruction(s) left over\n") + else (); + List.revAppend(L',L) + end + else rmv(L, x::L') + | rmv([], _) = raise NotOpened (* not found, it's okay *) + fun decCounts([], openBlockList) = openBlockList + | decCounts((_,Y,_)::es, openBlockList) = + let val bid_Y = A.sub(blockIdTbl, Y) + val n = A.sub(predCountTbl, bid_Y) - 1 + in A.update(predCountTbl, bid_Y, n); + if n = 0 then decCounts(es, openBlock(Y, openBlockList)) + else decCounts(es, openBlockList) + end + val openBlockList = rmv(openBlockList, []) + val _ = if debug then + print("CLOSING "^i2s(A.sub(blockMap, bid))^" "^ + printOpenBlocks openBlockList^"\n") + else () + val out_edges = #out_edges region (A.sub(blockMap, bid)) + + val openBlockList = decCounts(out_edges, openBlockList) + in (* mark this block as closed forever *) + A.update(isLegalTbl, bid, ~1); + updatePermittableCodeMotion openBlockList + end handle NotOpened => openBlockList + + (* Close all blocks that have jump instruction scheduled *) + fun closeAllJumpedBlocks openBlockList = + let fun loop([], L') = L' + | loop((B as OPEN_BLOCK{bid, jumpScheduled,...})::L, L') = + if !jumpScheduled then loop(L, closeBlock(bid, B::L')) + else loop(L, B::L') + in loop(openBlockList, []) end + + (* Schedule an instruction: + * Given an instruction and a set of openBlocks, find out where + * the instruction has to be inserted at. + *) + fun scheduleInstr(openBlockList, j, j' as DDG.NODE{instr,b,...}) = + let val isJump = isJump instr + (* val blockName = #create MLRiscAnnotations.COMMENT + (i2s(A.sub(blockMap, bid))) *) + + val pipeline = pipeline instr + + (* Pass one: find out where to perform the code motion + * and whether it is legal + *) + fun pass1([], insertionPoints) = pass2(insertionPoints, 0) + | pass1((B as OPEN_BLOCK{rt,bid,reachables,liveSet,jumpTime,...}):: + openBlocks, insertionPoints) = + if W8A.sub(reachables, b) = 0w0 (* unreachable! *) + then pass1(openBlocks, insertionPoints) + else if bid <> b andalso + isIllegalCodeMotion(j, j', liveSet) then + (* this is illegal; put instruction back to the + * pending queue. + *) + (if debug then print("ILLEGAL "^showOp j'^"\n") else (); + pending := (j,j') :: !pending; + openBlockList + ) + else let val time = findScheduleSlot(bid, rt, pipeline, j, j') + in if time > !jumpTime then + (* Can't schedule this instruction because + * it must follow the jump instruction! + * Close this block instead. + *) + (pending := (j,j') :: !pending; + closeBlock(bid, openBlockList) + ) + else + pass1(openBlocks, (time, B)::insertionPoints) + end + + + (* Pass two: perform the actual insertion *) + and pass2([], replicationCount) = finish() + | pass2((time, + OPEN_BLOCK{bid, rt, reachables, liveSet, sigma, + jumpScheduled, jumpTime, jumpNode, ...}):: + insertionPoints, replicationCount) = + (* a copy of instruction j has to be placed in reservation + * table rt. + *) + let val instr = if replicationCount > 0 then + InsnProps.replicate instr else instr + (* val instr = if bid <> b then + InsnProps.annotate(instr,blockName) + else instr *) + in insert(rt, time, pipeline); + A.update(issueTimeTbl, j, + Int.max(time, A.sub(issueTimeTbl, j))); + A.update(maxTimeTbl, bid, + Int.max(time, A.sub(maxTimeTbl, bid))); + if debug andalso (verbose orelse b <> bid) then + print( + "Time "^i2s time^ + (if replicationCount > 0 then + " ("^i2s replicationCount^")" + else "")^ + " "^showInsn instr^ + " ["^i2s(A.sub(blockMap,b))^ + "] scheduled in block "^i2s(A.sub(blockMap,bid))^ + (if b <> bid then " ***" else "")^ + (if !jumpScheduled then "!!!" else "")^ + "\n") + else (); + (* Jump processing *) + if isJump then + (jumpScheduled := true; + jumpTime := time; + jumpNode := (j, j') + ) + else + (addInstrToLiveSet(j, j', liveSet); + DA.update(sigma, time, instr::DA.sub(sigma, time)) + ); + pass2(insertionPoints, replicationCount+1) + end + + (* Do these things after successfully scheduled an instruction *) + and finish() = + let val _ = updateSucc j; + val n = A.sub(insnCountTbl, b) - 1 + in A.update(insnCountTbl, b, n); + (* if we have run out instructions or else + * we have scheduled the jump instruction, we can + * close the current block. + * At this point we wait until we can't find any instructions + * that be scheduled ahead of the current jump instruction. + *) + if isJump then openBlockList + else if n = 0 then closeBlock(b, openBlockList) + else openBlockList + end + + in pass1(openBlockList, []) + end + + (* Main loop *) + fun schedule(openBlockList) = + if PQ.isEmpty readyQueue then + let val L = closeAllJumpedBlocks openBlockList + val L = updatePermittableCodeMotion L + in case L of + [] => () + | _ => schedule L + end + else + let val (j, j' as DDG.NODE{b,...}) = PQ.deleteMin readyQueue + val openBlockList = scheduleInstr(openBlockList,j,j') + in schedule openBlockList + end + + fun scheduleAll() = + let val entries = #entries region () + (* find blocks without predecessors in the region *) + val openBlockList = foldr + (fn (b,L) => if A.sub(predCountTbl, A.sub(blockIdTbl, b)) = 0 + then openBlock(b,L) else L + ) [] entries + in case openBlockList of + [] => error "cyclic region" + | _ => schedule(updatePermittableCodeMotion openBlockList) + end + + fun sanityCheck() = + let val ok = ref true + in #forall_nodes ddg + (fn (i,i') => + if A.sub(issueTimeTbl,i) < 0 then + (print("UNSCHEDULED "^showOp i'^ + " |pred|="^i2s(A.sub(inDegsTbl, i))^"\n"); + app (fn (j,i,e) => + if A.sub(issueTimeTbl,j) < 0 then + (print("\t"^i2s j^" -> "^i2s i^" "^ + DDG.edgeToString e); + print("\t"^showOp(#node_info ddg j)^"\n") + ) + else ()) (#in_edges ddg i); + print "\n"; + ok := false + ) + else () + ); + if !ok then () else error "Scheduling error" + end + + in (* #forall_edges ddg (fn (i,j,e) => + print(showOp(#node_info ddg i)^" -> "^showOp(#node_info ddg j)^" "^ + DDG.edgeToString e^"\n")); *) + scheduleAll(); + if safetyCheck then sanityCheck() else () + end + +end diff --git a/MLRISC/scheduling/localCP.sml b/MLRISC/scheduling/localCP.sml new file mode 100644 index 0000000..4101c4e --- /dev/null +++ b/MLRISC/scheduling/localCP.sml @@ -0,0 +1,30 @@ +functor LocalCriticalPath + (DDG : SCHEDULER_DDG) : SCHEDULING_RANKS where type edge = DDG.latency = +struct + + structure DDG = DDG + structure I = DDG.I + structure G = Graph + structure A = Array + + type edge = DDG.latency + + fun rank(DDG as G.GRAPH ddg) = + let val N = #capacity ddg () + val len = A.array(N,0) + val parents = A.tabulate(N,fn i => length(#in_edges ddg i)) + fun process i = + let fun g((i,j,l)::es,n) = g(es,Int.max(A.sub(len,j) + l + 1,n)) + | g([],n) = n + in A.update(len,i,g(#out_edges ddg i,0)) + end + fun order((i,_),(j,_)) = + case Int.compare(A.sub(len,i),A.sub(len,j)) of + EQUAL => A.sub(parents,i) > A.sub(parents,j) + | LESS => false + | GREATER => true + in app process (rev (GraphTopsort.topsort DDG (map #1 (#nodes ddg ())))); + order + end + +end diff --git a/MLRISC/scheduling/regionBuilder.sig b/MLRISC/scheduling/regionBuilder.sig new file mode 100644 index 0000000..d8f0372 --- /dev/null +++ b/MLRISC/scheduling/regionBuilder.sig @@ -0,0 +1,29 @@ +(* + * This module partitions the IR according to some partitioning criteria + * and frequency. This is used mainly for global scheduling. + * + * -- Allen + *) + +signature REGION_BUILDER = +sig + structure IR : MLRISC_IR + + val regionBuilder : + { maxBlocks : int, + maxInstrs : int, + minFreqRatio : real, + sideEntries : bool, (* can the region has side entries *) + traceOnly : bool, (* no splits or merges? *) + internalBackEdges : bool, (* can the region has internal back edges*) + insertDummyBlocks : bool + } -> + IR.IR -> + ({ir : IR.IR, (* The entire program *) + region : IR.cfg, (* The subregion in question *) + numberOfInstructions : int, + blockIdTbl : int Array.array + } -> unit + ) -> unit + +end diff --git a/MLRISC/scheduling/regionBuilder.sml b/MLRISC/scheduling/regionBuilder.sml new file mode 100644 index 0000000..5549d1c --- /dev/null +++ b/MLRISC/scheduling/regionBuilder.sml @@ -0,0 +1,205 @@ +(* + * Partition the IR into regions according to partition criteria + * and frequencies. Then feed the regions into the instruction + * scheduler. + * + * The partitional criteria can be: + * 1. The maximum number of blocks + * 2. The maximum number of instructions + *) + +functor RegionBuilder(IR : MLRISC_IR) : REGION_BUILDER = +struct + structure IR = IR + structure CFG = IR.CFG + structure Util = IR.Util + structure G = Graph + structure A = Array + structure PQ = PriorityQueue + structure DA = DynArray + + fun error msg = MLRiscErrorMsg.error("RegionBuilder",msg) + + val view_IR = MLRiscControl.getFlag "view-IR" + + val i2s = Int.toString + + fun regionBuilder {maxBlocks, maxInstrs, sideEntries, minFreqRatio, + traceOnly, internalBackEdges, insertDummyBlocks + } (IR as G.GRAPH cfg) schedule = + let val N = #capacity cfg () + val G.GRAPH loop = IR.loop IR + + (* Note: tables must be dynamic because the cfg may be changed + * while scheduling is being performed + *) + val processed = DA.array(N, false) + fun isProcessed i = DA.sub(processed, i) + fun markAsProcessed i = DA.update(processed, i, true) + val blockIdTbl = DA.array(N, 0) + + (* A queue of all the blocks ranked by priority + * Give loop headers extra priority + *) + fun freqOf(CFG.BLOCK{freq,...}) = !freq + fun highestFreqFirst((i,i'),(j,j')) = + let val f_i = freqOf i' + val f_j = freqOf j' + in if f_i = f_j then #has_node loop i + else f_i > f_j + end + val seeds = PQ.fromList highestFreqFirst (#nodes cfg ()) + + (* Initialization *) + fun initialization() = + (app markAsProcessed (#entries cfg ()); + app markAsProcessed (#exits cfg ()) + ) + + (* Locate an unprocessed seed block; raises exception if everything + * is done. + *) + fun newSeed() = + let val (i,i') = PQ.deleteMin seeds + in if isProcessed i then newSeed() + else if freqOf i' = 0 then raise PQ.EmptyPriorityQueue + else (i,i') + end + + (* Grow a region according to the various parameters *) + fun grow(seed as (s,s')) = + let val freq = real(freqOf s') + val minFreq = freq * minFreqRatio + + (* Remove non candidates *) + fun prune(j,j') = isProcessed j orelse real(freqOf j') < minFreq + + fun pruneEdge(w) = real(!w) < minFreq + + fun followSucc([], blocks) = blocks + | followSucc((_,j,CFG.EDGE{w,...})::es, blocks) = + let val j' = #node_info cfg j + in if pruneEdge w orelse prune(j,j') then followSucc(es, blocks) + else followSucc(es, (j,j')::blocks) + end + + fun followPred([], blocks) = blocks + | followPred((j,_,CFG.EDGE{w,...})::es, blocks) = + let val j' = #node_info cfg j + in if pruneEdge w orelse prune(j,j') then followPred(es, blocks) + else followPred(es, (j,j')::blocks) + end + + + val queue = PQ.fromList highestFreqFirst [seed] + val enqueue = PQ.insert queue + + fun chooseBest [] = [] + | chooseBest ((j,j')::rest) = + let val w = freqOf j' + fun find([],j,j',w) = [(j,j')] + | find((k,k')::rest, j, j', w) = + let val w' = freqOf k' + in if w' > w then find(rest, k, k', w') + else find(rest, j, j', w) + end + in find(rest, j, j', w) end + + fun add([], blocks, blockCount) = (blocks, blockCount) + | add((j,j')::rest, blocks, blockCount) = + if isProcessed j then add(rest, blocks, blockCount) + else (markAsProcessed j; + enqueue (j,j'); + add(rest, j::blocks, blockCount+1) + ) + + (* Find the region using best first search *) + fun collect(front, back, blockCount) = + if PQ.isEmpty queue orelse blockCount >= maxBlocks then + front @ rev back + else + let val node as (j,j') = PQ.deleteMin queue + val succs = followSucc(#out_edges cfg j, []) + val succs = if traceOnly then chooseBest succs else succs + val (back, blockCount) = add(succs, back, blockCount) + (* val preds = followPred(#in_edges cfg j, []) + val preds = if traceOnly then chooseBest preds else preds + val (front, blockCount) = add(preds, front, blockCount) *) + in collect(front, back, blockCount) + end + + val _ = markAsProcessed s (* mark the seed block as processed *) + val blocks = collect([s], [], 1) + (* The blocks collected are not in linear order *) + in blocks + end + + (* Create a new subgraph from the blocks *) + fun makeSubgraph blocks = + if traceOnly then TraceView.trace_view blocks IR + else AcyclicSubgraphView.acyclic_view blocks IR + + (* + * Perform tail duplication if no side entries are allowed + * BUG: make sure liveness information is kept up-to-date! XXX + *) + fun tailDuplication(root, subgraph) = + let val {nodes, edges} = Util.tailDuplicate IR + {subgraph=subgraph, root=root} + val ins = PQ.insert seeds + fun newNode (b,b') = (ins(b,b'); DA.update(blockIdTbl, b, 0)) + in (* add new nodes created as a consequence of tail duplication + * onto the queue so that they will be properly processed later. + *) + app newNode nodes + end + + + (* Create a new region *) + fun createRegion() = + let val seed = newSeed() + val blocks = grow seed + val subgraph = makeSubgraph blocks; + in if sideEntries then () else tailDuplication(hd blocks, subgraph); + subgraph + end + + (* Number of instructions *) + fun numberOfInstructions(G.GRAPH cfg) = + let val size = ref 0 + in #forall_nodes cfg (fn (_,CFG.BLOCK{insns, ...}) => + size := !size + length(!insns)); + !size + end + + fun sizeOf(G.GRAPH cfg) = #order cfg () + + (* Main loop *) + fun main() = + let val region as G.GRAPH R = createRegion() + val size = sizeOf region + in if size <= 1 then () + else + let val numberOfInstructions = numberOfInstructions region + in if numberOfInstructions <= 2 then () + else + let val _ = + (print("REGION["^i2s(#order R ())^"] "); + app (fn (X,_) => print(i2s X^" ")) (#nodes R ()); + print "\n") + + in if !view_IR then IR.viewSubgraph IR region else (); + schedule{ir=IR, region=region, + blockIdTbl=DA.baseArray blockIdTbl, + numberOfInstructions=numberOfInstructions}; + if !view_IR then IR.viewSubgraph IR region else () + end + end; + main() + end + + in initialization(); + main() handle PQ.EmptyPriorityQueue => () + end + +end diff --git a/MLRISC/scheduling/schedulerDDG.sig b/MLRISC/scheduling/schedulerDDG.sig new file mode 100644 index 0000000..c3d88dd --- /dev/null +++ b/MLRISC/scheduling/schedulerDDG.sig @@ -0,0 +1,72 @@ +(* + * This interface describes a DDG for acyclic global scheduling + * (for non-predicated architectures.) + * Hyperblock scheduling uses another data structure. + * + * -- Allen + *) +signature SCHEDULER_DDG = +sig + + structure I : INSTRUCTIONS + structure C : CELLS + structure SchedProps : SCHEDULING_PROPERTIES + sharing SchedProps.I = I + sharing I.C = C + + (* Dependence type *) + datatype dependence = + FLOW | OUTPUT | ANTI (* register based dependence *) + | MEM_FLOW | MEM_OUTPUT | MEM_ANTI (* memory based dependence *) + | CTRL | CTRL_ANTI (* control dependence *) + | LIVEIN | LIVEOUT + + type latency = SchedProps.latency + + datatype edge = EDGE of {l : latency, (* latency *) + r : C.cell, (* register *) + d : dependence (* dependence type *) + } + + datatype node = NODE of {instr: I.instruction, b:int, + defs:(C.cell * latency) list, uses:C.cell list} + + type ('node,'edge) info + + (* The actual ddg is parameterized with respect to the node and edge type. + * For local scheduling 'node = instruction and 'edge = latency + * For global scheduling 'node = node and 'edge = edge + *) + type ('node,'edge) ddg = ('node,'edge,('node,'edge) info) Graph.graph + type block = int + type blockMap = block Array.array (* mapping from block id -> block *) + type liveInMap = node Graph.node IntHashTable.hash_table + type liveOutMap = node Graph.node IntHashTable.hash_table + + type ('node,'edge) internalInfo = + {succ : 'edge Graph.edge list Array.array, + pred : 'edge Graph.edge list Array.array, + nodes : 'node option Array.array + } + + type globalInfo = + {liveInMap : liveInMap, + liveOutMap : liveOutMap, + blockMap : blockMap + } + + (* Create an empty DDG with a maximum number of nodes. + * At the same time return its internal adjlist representation. + * Just in we want to make the scheduler work fast. + *) + val newDDG : int -> ('node,'edge) ddg + val internalInfo : ('node,'edge) ddg -> ('node,'edge) internalInfo + val globalInfo : ('node,'edge) ddg -> globalInfo option ref + + (* pretty print an edge (useful for graphical display) *) + val edgeToString : edge -> string + + (* liveness annotation *) + val LIVENESS : {liveIn:C.cell list, liveOut:C.cell list} Annotations.property + +end diff --git a/MLRISC/scheduling/schedulerDDG.sml b/MLRISC/scheduling/schedulerDDG.sml new file mode 100644 index 0000000..f678065 --- /dev/null +++ b/MLRISC/scheduling/schedulerDDG.sml @@ -0,0 +1,114 @@ +(* + * This module describes a DDG for acyclic global scheduling + * (for non-predicated architectures.) + * Hyperblock scheduling uses another data structure. + * + * -- Allen + *) +functor SchedulerDDG(SchedProps : SCHEDULING_PROPERTIES) : SCHEDULER_DDG = +struct + + structure SchedProps = SchedProps + structure I = SchedProps.I + structure C = I.C + structure A = Array + structure GI = DirectedGraph(A) + structure G = Graph + + datatype dependence = + FLOW | OUTPUT | ANTI (* register based dependence *) + | MEM_FLOW | MEM_OUTPUT | MEM_ANTI (* memory based dependence *) + | CTRL | CTRL_ANTI (* control dependence *) + | LIVEIN | LIVEOUT + + type latency = SchedProps.latency + + datatype edge = EDGE of {l : latency, (* latency *) + r : C.cell, (* register *) + d : dependence (* dependence type *) + } + + datatype node = NODE of {instr: I.instruction, b:int, + defs:(C.cell * latency) list, uses:C.cell list} + + type liveInMap = node Graph.node IntHashTable.hash_table + type liveOutMap = node Graph.node IntHashTable.hash_table + type block = int + type blockMap = block Array.array (* mapping from block id -> block *) + type liveInMap = node Graph.node IntHashTable.hash_table + type liveOutMap = node Graph.node IntHashTable.hash_table + + type ('node,'edge) internalInfo = + {succ : 'edge Graph.edge list Array.array, + pred : 'edge Graph.edge list Array.array, + nodes : 'node option Array.array + } + + type globalInfo = + {liveInMap : liveInMap, + liveOutMap : liveOutMap, + blockMap : blockMap + } + + datatype ('node,'edge) info = + INFO of {internalInfo: ('node,'edge) internalInfo, + globalInfo : globalInfo option ref + } + + withtype ('node,'edge) ddg = ('node,'edge,('node,'edge) info) Graph.graph + + fun newDDG(n) = + let val succ = A.array(n,[]) + val pred = A.array(n,[]) + val nodes= A.array(n,NONE) + val info = INFO{internalInfo={succ=succ,pred=pred,nodes=nodes}, + globalInfo=ref NONE} + val ddg = GI.newGraph{name="DDG",info=info, + pred=pred,succ=succ,nodes=nodes} + in ddg + end + + fun internalInfo(G.GRAPH ddg) = + let val INFO{internalInfo, ...} = #graph_info ddg + in internalInfo end + + fun globalInfo(G.GRAPH ddg) = + let val INFO{globalInfo, ...} = #graph_info ddg + in globalInfo end + + fun latToString i = if i < 0 then "-"^Int.toString(~i) else Int.toString i + + (* Slow but pretty way of pretty printing registers *) + fun showReg(prefix,r) = prefix^C.toString r + + fun edgeToString(EDGE{l,d,r}) = + let val (dep,prefix) = + case d of + FLOW => ("","r") + | OUTPUT => ("out","r") + | ANTI => ("anti","r") + | MEM_FLOW => ("","m") + | MEM_OUTPUT => ("out","m") + | MEM_ANTI => ("anti","m") + | CTRL => ("ctrl","c") + | CTRL_ANTI => ("anti","c") + | LIVEIN => ("livein","r") + | LIVEOUT => ("liveout","r") + val lat = if l = 0 then "" else " "^latToString l + + val reg = "("^showReg(prefix,r)^")" + in dep ^ lat ^ reg end + + fun cellsToString S = + let fun pr r = showReg("r",r) + in LineBreak.lineBreak 50 + (List.foldr (fn (r,l) => if l = "" then pr r else pr r^" "^l) "" S) + end + + val LIVENESS = Annotations.new + (SOME(fn {liveIn,liveOut} => + "liveIn: "^cellsToString liveIn^"\n"^ + "liveOut: "^cellsToString liveOut^"\n" + )) + +end diff --git a/MLRISC/scheduling/schedulingAliasing.sig b/MLRISC/scheduling/schedulingAliasing.sig new file mode 100644 index 0000000..1d0c2da --- /dev/null +++ b/MLRISC/scheduling/schedulingAliasing.sig @@ -0,0 +1,7 @@ +signature SCHEDULING_ALIASING = +sig + structure Region : REGION + + val write : Region.region -> (int * int) list (* def/use *) + val read : Region.region -> int list +end diff --git a/MLRISC/scheduling/schedulingProps.sig b/MLRISC/scheduling/schedulingProps.sig new file mode 100644 index 0000000..37f2c13 --- /dev/null +++ b/MLRISC/scheduling/schedulingProps.sig @@ -0,0 +1,74 @@ +(* + * This signature describes the machine properties needed by the + * global schedulers. + * + * -- Allen + *) +signature SCHEDULING_PROPERTIES = +sig + + structure I : INSTRUCTIONS + + (* + * Type reservation_table is used to represent the state + * of the pipeline as a partial schedule is constructed. + * Type resource represents the resources used by an instruction + * during its execution. Normally this is represented by a + * reservation table. These are kept abstract so that the + * client can have freedom on how to implement these things. + *) + type reservation_table + type pipeline + type latency = int + type time = int + + type cpu (* name identifying the implementaton *) + + (* special instructions *) + val source : I.instruction + val sink : I.instruction + + (* convert a name to a specific implementation *) + val cpu : string -> cpu + + datatype cpu_info = + CPU_INFO of + { (* maximum number of instructions issued per cycle *) + maxIssues : int, + (* + * Definition/use. Definitions contain latencies. + *) + defUse : I.instruction -> (I.C.cell * latency) list * I.C.cell list, + + (* + * Create a new reservation table of at most n time steps. + * If the backward flag is on then we are actually building + * the schedule in backwards manner. + *) + newTable : int -> reservation_table, + + (* + * Extract the pipeline characteristics of an instruction + *) + pipeline : I.instruction -> pipeline, + (* + * Take a reservation table, a time step t, and an instruction. + * Find a slot to insert the instruction into the reservation + * table at the earliest (latest) feasible time no earlier (later) + * than t. + *) + findSlot : reservation_table * time * pipeline -> time, + insert : reservation_table * time * pipeline -> unit + } + + (* This function takes an architecture name and returns + * a bunch of properties specific to the architecture. + * It is structured this way so that we can dynamically change the + * architecture parameter. + *) + val info : {cpu:cpu, backward:bool (* backward scheduling? *)} -> cpu_info + + val splitCopies : (I.C.cell -> I.C.cell) -> + I.instruction -> I.instruction list + +end diff --git a/MLRISC/scheduling/schedulingRanks.sig b/MLRISC/scheduling/schedulingRanks.sig new file mode 100644 index 0000000..7ba67b5 --- /dev/null +++ b/MLRISC/scheduling/schedulingRanks.sig @@ -0,0 +1,18 @@ +(* + * Rank functions used for scheduling. + * + * -- Allen + *) + +signature SCHEDULING_RANKS = +sig + + structure I : INSTRUCTIONS + structure DDG : SCHEDULER_DDG + sharing DDG.I = I + + type edge + val rank : ('node,edge) DDG.ddg -> + 'node Graph.node * 'node Graph.node -> bool + +end diff --git a/MLRISC/scheduling/sources.cm b/MLRISC/scheduling/sources.cm new file mode 100644 index 0000000..8f56e5e --- /dev/null +++ b/MLRISC/scheduling/sources.cm @@ -0,0 +1,11 @@ +Group is + ../library/sources.cm + ../graphs/sources.cm + ../visualization/sources.cm + PalemSimons.sig + PalemSimons.sml + LeungPalemPnueli.sig + LeungPalemPnueli.sml + test1.sml + test2.sml + wuhui.sml diff --git a/MLRISC/scheduling/test1.sml b/MLRISC/scheduling/test1.sml new file mode 100644 index 0000000..dcb90bc --- /dev/null +++ b/MLRISC/scheduling/test1.sml @@ -0,0 +1,54 @@ +(* + * This is the example I used in the PACT '98 paper + *) +structure Test1 = +struct + + structure G = DirectedGraph + + fun makeDag nodes edges = + let val dag as Graph.GRAPH G = G.graph("Test1",(),10) + in app (fn n => #add_node G (n,n)) nodes; + app (#add_edge G) edges; + dag + end + + val dag = makeDag [1,2,3,4,5,6,7,8] + [(1,2,0), + (1,4,0), + (2,3,0), + (2,5,0), + (4,3,1), + (4,5,1), + (3,6,1), + (7,6,0) + ] + + fun close dag = + TransitiveClosure.acyclic_transitive_closure2 + {+ = fn(i,j) => i+j+1, + max = Int.max + } dag + + fun palemSimons (dag as Graph.GRAPH G) = + let val dag' as Graph.GRAPH G' = G.graph("Tmp",(),10) + val _ = #forall_nodes G (#add_node G') + val _ = #forall_edges G (#add_edge G') + in PalemSimons.rank{dag = dag', + l = fn(_,_,l) => l, + d = fn _ => 10, + m = 1 + } + end + + structure View = GraphViewerFn(daVinci) + structure L = GraphLayout + + fun view dag = + View.view( + L.makeLayout{node=fn(n,_)=>[L.LABEL(Int.toString n)], + edge=fn(i,j,l)=>[L.LABEL(Int.toString l),L.COLOR "red"], + graph=fn _ =>[]} dag + ) + +end diff --git a/MLRISC/scheduling/test2.sml b/MLRISC/scheduling/test2.sml new file mode 100644 index 0000000..a2c00f5 --- /dev/null +++ b/MLRISC/scheduling/test2.sml @@ -0,0 +1,68 @@ +(* + * This is the example I used in the PACT '98 paper + *) + +structure Test2 = +struct + + structure G = DirectedGraph + + fun makeDag nodes edges = + let val dag as Graph.GRAPH G = G.graph("Test1",(),10) + in app (#add_node G) nodes; + app (#add_edge G) edges; + dag + end + + val dag = makeDag [(1,(0,2)), + (2,(1,2)), + (3,(0,9)), + (4,(0,9)), + (5,(0,7)), + (6,(0,9)), + (7,(6,9)), + (8,(6,8)) + ] + [(1,2,0), + (1,4,0), + (2,3,0), + (2,5,0), + (4,3,1), + (4,5,1), + (3,6,1), + (7,6,0) + ] + + fun close dag = + TransitiveClosure.acyclic_transitive_closure2 + {+ = fn(i,j) => i+j+1, + max = Int.max + } dag + + fun leung (dag as Graph.GRAPH G) = + let val dag' as Graph.GRAPH G' = G.graph("Tmp",(),10) + val _ = #forall_nodes G (#add_node G') + val _ = #forall_edges G (#add_edge G') + in LeungPalemPnueli.rank + {dag = dag', + l = fn(_,_,l) => l, + d = fn(_,(_,d)) => d, + r = fn(_,(r,_)) => r, + m = 1 + } + end + + structure View = GraphViewerFn(daVinci) + structure L = GraphLayout + + fun view dag = + View.view( + L.makeLayout{node=fn(n,(r,d))=> + [L.LABEL(Int.toString n^" r="^Int.toString r^ + " d="^Int.toString d) + ], + edge=fn(i,j,l)=>[L.LABEL(Int.toString l),L.COLOR "red"], + graph=fn _ =>[]} dag + ) + +end diff --git a/MLRISC/scheduling/wuhui.sml b/MLRISC/scheduling/wuhui.sml new file mode 100644 index 0000000..bdcd333 --- /dev/null +++ b/MLRISC/scheduling/wuhui.sml @@ -0,0 +1,66 @@ +(* + * This is Wu Hui's example. + *) + +structure WuHui = +struct + + structure G = DirectedGraph + + fun makeDag nodes edges = + let val dag as Graph.GRAPH G = G.graph("Test1",(),10) + in app (#add_node G) nodes; + app (#add_edge G) edges; + dag + end + + val dag as Graph.GRAPH G = + makeDag [(1,(0,6)), + (2,(0,6)), + (3,(1,6)), + (4,(1,6)), + (5,(1,6)), + (6,(3,6)) + ] + [(1,3,1), + (1,4,1), + (2,4,1), + (2,5,1), + (3,6,0), + (4,6,1), + (5,6,1) + ] + + fun close dag = + TransitiveClosure.acyclic_transitive_closure2 + {+ = fn(i,j) => i+j+1, + max = Int.max + } dag + + fun leung (dag as Graph.GRAPH G) = + let val dag' as Graph.GRAPH G' = G.graph("Tmp",(),10) + val _ = #forall_nodes G (#add_node G') + val _ = #forall_edges G (#add_edge G') + in LeungPalemPnueli.rank + {dag = dag', + l = fn(_,_,l) => l, + d = fn(_,(_,d)) => d, + r = fn(_,(r,_)) => r, + m = 1 + } + end + + structure View = GraphViewerFn(daVinci) + structure L = GraphLayout + + fun view dag = + View.view( + L.makeLayout{node=fn(n,(r,d))=> + [L.LABEL(Int.toString n^" r="^Int.toString r^ + " d="^Int.toString d) + ], + edge=fn(i,j,l)=>[L.LABEL(Int.toString l),L.COLOR "red"], + graph=fn _ =>[]} dag + ) + +end diff --git a/MLRISC/sparc/README.sparc b/MLRISC/sparc/README.sparc new file mode 100644 index 0000000..42a29dc --- /dev/null +++ b/MLRISC/sparc/README.sparc @@ -0,0 +1,32 @@ +Changes to the instruction set +============================== + +1. The cc bit in ARITH ops have been removed. The CC option + is now merged with the arithmetic opcode. I think this saves + a bit of space since most of the time the cc bit is false. + +2. The following V9 instructions have been added + + MULX + SMULX + DIVX + SLLX + SRLX + SRAX + LDX + STX + MOVcc (* conditional moves on condition code *) + FMOVcc (* conditional moves on condition code *) + MOVR (* conditional moves on integer condition *) + BR (* branch on integer register with prediction *) + BP (* branch on integer condition with prediction *) + + Not everything is generated by the instruction selection module yet. + +New optimizations in the Sparc backend +====================================== +1. Strength reduction for multiply/division by a constant. +2. Propagation of annotations +3. There is a flag is determines whether we should use BR instructions. + These branch instructions branches on the value of an integer register. +4. NOTB folding is implemented diff --git a/MLRISC/sparc/backpatch/sparcDelaySlotProps.sml b/MLRISC/sparc/backpatch/sparcDelaySlotProps.sml new file mode 100644 index 0000000..e552cd8 --- /dev/null +++ b/MLRISC/sparc/backpatch/sparcDelaySlotProps.sml @@ -0,0 +1,135 @@ +functor SparcDelaySlots + (structure I : SPARCINSTR + structure P : INSN_PROPERTIES where I = I + (* sharing/defn conflict: sharing P.I = I*) + ) : DELAY_SLOT_PROPERTIES = +struct + structure I = I + structure C = I.C + structure SL = CellsBasis.SortedCells + + fun error msg = MLRiscErrorMsg.error("SparcDelaySlotProps",msg) + + datatype delay_slot = D_NONE | D_ERROR | D_ALWAYS | D_TAKEN | D_FALLTHRU + + val delaySlotSize = 4 + + fun delaySlot{instr, backward} = + case instr of + I.INSTR(I.CALL{nop,...}) => {n=false,nOn=D_ERROR,nOff=D_ALWAYS,nop=nop} + | I.INSTR(I.JMP{nop,...}) => {n=false,nOn=D_ERROR,nOff=D_ALWAYS,nop=nop} + | I.INSTR(I.JMPL{nop,...}) => {n=false,nOn=D_ERROR,nOff=D_ALWAYS,nop=nop} + | I.INSTR(I.RET{nop,...}) => {n=false,nOn=D_ERROR,nOff=D_ALWAYS,nop=nop} + | I.INSTR(I.Bicc{b=I.BA,a,nop,...}) => {n=false,nOn=D_NONE,nOff=D_ALWAYS,nop=nop} + | I.INSTR(I.Bicc{a,nop,...}) => {n=a,nOn=D_TAKEN,nOff=D_ALWAYS,nop=nop} + | I.INSTR(I.FBfcc{a,nop,...}) => {n=a,nOn=D_TAKEN,nOff=D_ALWAYS,nop=nop} + | I.INSTR(I.BR{a,nop,...}) => {n=a,nOn=D_TAKEN,nOff=D_ALWAYS,nop=nop} + | I.INSTR(I.BP{a,nop,...}) => {n=a,nOn=D_TAKEN,nOff=D_ALWAYS,nop=nop} + | I.INSTR(I.FCMP{nop,...}) => {n=false,nOn=D_ERROR,nOff=D_ALWAYS,nop=nop} + | I.ANNOTATION{i,...} => delaySlot{instr=i,backward=backward} + | _ => {n=false,nOn=D_ERROR,nOff=D_NONE,nop=false} + + fun enableDelaySlot{instr, n, nop} = + case (instr,n) of + (I.INSTR(I.CALL{defs,uses,label,cutsTo,mem,...}),false) => + I.call{defs=defs,uses=uses,label=label,cutsTo=cutsTo, + nop=nop,mem=mem} + | (I.INSTR(I.JMPL{r,i,d,defs,uses,mem,cutsTo,...}),false) => + I.jmpl{r=r,i=i,d=d,defs=defs,uses=uses,cutsTo=cutsTo, + nop=nop,mem=mem} + | (I.INSTR(I.JMP{r,i,labs,...}),false) => + I.jmp{r=r,i=i,labs=labs,nop=nop} + | (I.INSTR(I.RET{leaf,...}),false) => I.ret{leaf=leaf,nop=nop} + | (I.INSTR(I.Bicc{b,a,label,...}),_) => I.bicc{b=b,a=n,nop=nop,label=label} + | (I.INSTR(I.FBfcc{b,a,label,...}),_) => I.fbfcc{b=b,a=n,nop=nop,label=label} + | (I.INSTR(I.BR{nop,label,p,r,rcond,...}),_) => + I.br{rcond=rcond,r=r,a=n,nop=nop,label=label,p=p} + | (I.INSTR(I.BP{nop,label,p,cc,b,...}),_) => + I.bp{b=b,cc=cc,a=n,nop=nop,label=label,p=p} + | (I.INSTR(I.FCMP{cmp,r1,r2,...}),false) => I.fcmp{cmp=cmp,r1=r1,r2=r2,nop=nop} + | (I.ANNOTATION{i,a},n) => + I.ANNOTATION{i=enableDelaySlot{instr=i,n=n,nop=nop},a=a} + | _ => error "enableDelaySlot" + + val defUseI = P.defUse CellsBasis.GP + val defUseF = P.defUse CellsBasis.FP + val psr = [C.psr] + val fsr = [C.fsr] + val y = [C.y] + val zeroR = Option.valOf(C.zeroReg CellsBasis.GP) + val everything = [C.y,C.psr,C.fsr] + fun conflict{src=i,dst=j} = + let fun cc I.ANDCC = true + | cc I.ANDNCC = true + | cc I.ORCC = true + | cc I.ORNCC = true + | cc I.XORCC = true + | cc I.XNORCC = true + | cc I.ADDCC = true + | cc I.TADDCC = true + | cc I.TADDTVCC = true + | cc I.SUBCC = true + | cc I.TSUBCC = true + | cc I.TSUBTVCC= true + | cc I.UMULCC = true + | cc I.SMULCC = true + | cc I.UDIVCC = true + | cc I.SDIVCC = true + | cc _ = false + fun defUseOther(I.INSTR(I.Ticc _)) = ([],psr) + | defUseOther(I.INSTR(I.ARITH{a,...})) = + if cc a then (psr,[]) else ([],[]) + | defUseOther(I.INSTR(I.WRY _)) = (y,[]) + | defUseOther(I.INSTR(I.RDY _)) = ([],y) + | defUseOther(I.INSTR(I.FCMP _)) = (fsr,[]) + | defUseOther(I.INSTR(I.Bicc{b=I.BA,...})) = ([],[]) + | defUseOther(I.INSTR(I.Bicc _)) = ([],psr) + | defUseOther(I.INSTR(I.FBfcc _)) = ([],fsr) + | defUseOther(I.INSTR(I.MOVicc _)) = ([],psr) + | defUseOther(I.INSTR(I.MOVfcc _)) = ([],fsr) + | defUseOther(I.INSTR(I.FMOVicc _)) = ([],psr) + | defUseOther(I.INSTR(I.FMOVfcc _)) = ([],fsr) + | defUseOther(I.INSTR(I.CALL _)) = (everything,[]) + | defUseOther(I.INSTR(I.JMPL _)) = (everything,[]) + | defUseOther(I.ANNOTATION{i,...}) = defUseOther i + | defUseOther _ = ([],[]) + fun clash(defUse) = + let val (di,ui) = defUse i + val (dj,uj) = defUse j + in SL.nonEmptyIntersection(di,uj) orelse + SL.nonEmptyIntersection(di,dj) orelse + SL.nonEmptyIntersection(ui,dj) + end + fun toSL f i = let val (d,u) = f i + in (SL.uniq d, SL.uniq u) end + fun defUseInt i = + let val (d,u) = defUseI i + val d = SL.uniq d + val u = SL.uniq u + (* no dependence on register 0! *) + in (SL.rmv(zeroR,d), SL.rmv(zeroR,u)) end + in clash(defUseInt) orelse + clash(toSL defUseF) orelse + clash(toSL defUseOther) + end + + fun delaySlotCandidate{jmp,delaySlot= + ( I.INSTR(I.CALL _) | I.INSTR(I.Bicc _) | I.INSTR(I.FBfcc _) + | I.INSTR(I.Ticc _) | I.INSTR(I.BR _) | I.INSTR(I.JMP _) | I.INSTR(I.JMPL _) + | I.INSTR(I.RET _) | I.INSTR(I.BP _) | I.INSTR(I.FCMP _))} = false + | delaySlotCandidate{jmp=I.ANNOTATION{i,...},delaySlot} = + delaySlotCandidate{jmp=i,delaySlot=delaySlot} + | delaySlotCandidate{jmp,delaySlot=I.ANNOTATION{i,...}} = + delaySlotCandidate{jmp=jmp,delaySlot=i} + | delaySlotCandidate _ = true + + fun setTarget(I.INSTR(I.Bicc{b,a,nop,...}),lab) = I.bicc{b=b,a=a,nop=nop,label=lab} + | setTarget(I.INSTR(I.FBfcc{b,a,nop,...}),lab) = I.fbfcc{b=b,a=a,nop=nop,label=lab} + | setTarget(I.INSTR(I.BR{rcond,p,r,a,nop,...}),lab) = + I.br{rcond=rcond,p=p,r=r,a=a,nop=nop,label=lab} + | setTarget(I.INSTR(I.BP{b,p,cc,a,nop,...}),lab) = + I.bp{b=b,p=p,cc=cc,a=a,nop=nop,label=lab} + | setTarget(I.ANNOTATION{i,a},lab) = I.ANNOTATION{i=setTarget(i,lab),a=a} + | setTarget _ = error "setTarget" + +end diff --git a/MLRISC/sparc/backpatch/sparcDelaySlots.sml b/MLRISC/sparc/backpatch/sparcDelaySlots.sml new file mode 100644 index 0000000..bbd63ba --- /dev/null +++ b/MLRISC/sparc/backpatch/sparcDelaySlots.sml @@ -0,0 +1,58 @@ +(* + * This file was automatically generated by MDGen (v3.0) + * from the machine description file "sparc/sparc.md". + *) + + +functor SparcDelaySlots(structure I : SparcINSTR + structure P : INSN_PROPERTIES + where I = I + ) : DELAY_SLOT_PROPERTIES = +struct + structure I = I + datatype delay_slot = D_NONE | D_ERROR | D_ALWAYS | D_TAKEN | D_FALLTHRU + + fun error msg = MLRiscErrorMsg.error("SparcDelaySlots",msg) + fun delaySlot {instr, backward} = let + fun delaySlot instr = + ( + case instr of + I.Bicc{b, a, label, nop} => {nop=nop, n=a andalso + ( + case b of + I.BA => false + | _ => true + ), nOn=D_NONE, nOff=D_ALWAYS} + | I.FBfcc{b, a, label, nop} => {nop=nop, n=a, nOn=D_NONE, nOff=D_ALWAYS} + | I.JMP{r, i, labs, nop} => {nop=nop, n=false, nOn=D_NONE, nOff=D_ALWAYS} + | I.JMPL{r, i, d, defs, uses, nop, mem} => {nop=nop, n=false, nOn=D_NONE, nOff=D_ALWAYS} + | I.CALL{defs, uses, label, nop, mem} => {nop=nop, n=false, nOn=D_NONE, nOff=D_ALWAYS} + | I.FCMP{cmp, r1, r2, nop} => {nop=nop, n=false, nOn=D_NONE, nOff=D_ALWAYS} + | I.RET{leaf, nop} => {nop=nop, n=false, nOn=D_NONE, nOff=D_ALWAYS} + | _ => {nop=true, n=false, nOn=D_ERROR, nOff=D_NONE} + ) + in delaySlot instr + end + + fun enableDelaySlot _ = error "enableDelaySlot" + fun conflict _ = error "conflict" + fun delaySlotCandidate {jmp, delaySlot} = let + fun delaySlotCandidate delaySlot = + ( + case delaySlot of + I.Bicc{b, a, label, nop} => false + | I.FBfcc{b, a, label, nop} => false + | I.JMP{r, i, labs, nop} => false + | I.JMPL{r, i, d, defs, uses, nop, mem} => false + | I.CALL{defs, uses, label, nop, mem} => false + | I.Ticc{t, cc, r, i} => false + | I.FCMP{cmp, r1, r2, nop} => false + | I.RET{leaf, nop} => false + | _ => true + ) + in delaySlotCandidate delaySlot + end + + fun setTarget _ = error "setTarget" +end + diff --git a/MLRISC/sparc/backpatch/sparcJumps.sml b/MLRISC/sparc/backpatch/sparcJumps.sml new file mode 100644 index 0000000..43763bd --- /dev/null +++ b/MLRISC/sparc/backpatch/sparcJumps.sml @@ -0,0 +1,245 @@ +(* sparcJumps.sml --- information to resolve jumps. + * + * COPYRIGHT (c) 1996 Bell Laboratories. + * + *) +functor SparcJumps + (structure Instr:SPARCINSTR + structure Shuffle:SPARCSHUFFLE where I = Instr + structure MLTreeEval : MLTREE_EVAL where T = Instr.T + ) : SDI_JUMPS = +struct + structure I = Instr + structure C = Instr.C + structure Const = I.Constant + structure CB = CellsBasis + + fun error msg = MLRiscErrorMsg.error("SparcJumps",msg) + + val branchDelayedArch = true + + + fun isSdi(I.ANNOTATION{i,...}) = isSdi i + | isSdi(I.LIVE _) = true + | isSdi(I.KILL _) = true + | isSdi(I.COPY _) = true + | isSdi(I.INSTR instr) = let + fun oper(I.IMMED n) = false + | oper(I.REG _) = false + | oper(I.HI _) = false + | oper(I.LO _) = false + | oper(I.LAB _) = true + in case instr of + I.ARITH{i,...} => oper i + | I.SHIFT{i,...} => oper i + | I.LOAD{i,...} => oper i + | I.STORE{i,...} => oper i + | I.FLOAD{i,...} => oper i + | I.FSTORE{i,...} => oper i + | I.JMPL{i,...} => oper i + | I.JMP{i,...} => oper i + | I.MOVicc{i,...} => oper i + | I.MOVfcc{i,...} => oper i + | I.MOVR{i,...} => oper i + | I.CALL _ => true + | I.Bicc _ => true + | I.FBfcc _ => true + | I.BR _ => true + | I.BP _ => true + | I.Ticc{i,...} => oper i + | I.WRY{i,...} => oper i + | I.SAVE{i,...} => oper i + | I.RESTORE{i,...} => oper i + (* The following is only true of Version 8 *) + | I.FPop1{a=(I.FMOVd | I.FNEGd | I.FABSd), ...} => true + | _ => false + end + + fun minSize(I.ANNOTATION{i,...}) = minSize i + | minSize(I.LIVE _) = 0 + | minSize(I.KILL _) = 0 + | minSize (I.COPY _) = 0 (* ? *) + | minSize(I.INSTR instr) = + (case instr + of (I.Bicc{nop=true,...}) => 8 + | (I.FBfcc{nop=true,...}) => 8 + | (I.JMP{nop=true,...}) => 8 + | (I.JMPL{nop=true,...}) => 8 + | (I.CALL{nop=true,...}) => 8 + | (I.BR{nop=true,...}) => 8 + | (I.BP{nop=true,...}) => 8 + | (I.RET{nop=true,...}) => 8 + | (I.FCMP{nop=true,...}) => 8 + | (I.FPop1{a=(I.FMOVd | I.FNEGd | I.FABSd),...}) => 8 + | _ => 4 + (*esac*)) + + fun maxSize (I.INSTR(I.FPop1{a=(I.FMOVd | I.FNEGd | I.FABSd),...})) = 8 + | maxSize (I.ANNOTATION{i,...}) = maxSize i + | maxSize _ = 4 + + fun immed13 n = ~4096 <= n andalso n < 4096 + fun immed22 n = ~0x200000 <= n andalso n < 0x1fffff + fun immed16 n = ~0x8000 <= n andalso n < 0x8000 + fun immed19 n = ~0x40000 <= n andalso n < 0x40000 + fun immed30 n = ~0x4000000 <= n andalso n < 0x3ffffff + + fun instrLength([],n) = n + | instrLength(I.INSTR(I.FPop1{a=(I.FMOVd | I.FNEGd | I.FABSd),...})::is,n) = + instrLength(is,n+8) + | instrLength(_::is,n) = instrLength(is,n+4) + + fun sdiSize(I.ANNOTATION{i, ...}, labmap, loc) = sdiSize(i, labmap, loc) + | sdiSize(I.LIVE _, _, _) = 0 + | sdiSize(I.KILL _, _, _) = 0 + | sdiSize(I.COPY{k=CB.GP, src, dst, tmp, ...}, _, _) = + 4 * length(Shuffle.shuffle{tmp=tmp, dst=dst, src=src}) + | sdiSize(I.COPY{k=CB.FP, src, dst, tmp, ...}, _, _) = let + val instrs = Shuffle.shufflefp{src=src, dst=dst, tmp=tmp} + in instrLength(instrs, 0) + end + | sdiSize(instr as I.INSTR i, labMap, loc) = let + fun oper(I.IMMED n,_) = 4 + | oper(I.REG _,_) = 4 + | oper(I.HI _,_) = 4 + | oper(I.LO _,_) = 4 + | oper(I.LAB lexp,hi) = if immed13(MLTreeEval.valueOf lexp) then 4 else hi + fun displacement lab = ((labMap lab) - loc) div 4 + fun branch22 lab = if immed22(displacement lab) then 4 else 16 + fun branch19 lab = if immed19(displacement lab) then 4 else 16 + fun branch16 lab = if immed16(displacement lab) then 4 else 16 + fun call lab = if immed30(displacement lab) then 4 else 20 + fun delaySlot false = 0 + | delaySlot true = 4 + in + case i + of I.ARITH{a=I.OR,r,i,...} => + if CellsBasis.cellId r = 0 then oper(i,8) else oper(i,12) + | I.ARITH{i,...} => oper(i,12) + | I.SHIFT{i,...} => oper(i,12) + | I.LOAD{i,...} => oper(i,12) + | I.STORE{i,...} => oper(i,12) + | I.FLOAD{i,...} => oper(i,12) + | I.FSTORE{i,...} => oper(i,12) + | I.Ticc{i,...} => oper(i,12) + | I.SAVE{i,...} => oper(i,12) + | I.RESTORE{i,...} => oper(i,12) + | I.MOVicc{i,...} => oper(i,12) + | I.MOVfcc{i,...} => oper(i,12) + | I.MOVR{i,...} => oper(i,12) + | I.JMPL{i,nop,...} => oper(i,12) + delaySlot nop + | I.JMP{i,nop,...} => oper(i,12) + delaySlot nop + | I.Bicc{label,nop,...} => branch22 label + delaySlot nop + | I.FBfcc{label,nop,...} => branch22 label + delaySlot nop + | I.BR{label,nop,...} => branch16 label + delaySlot nop + | I.BP{label,nop,...} => branch19 label + delaySlot nop + | I.CALL{label,...} => call label + | I.WRY{i,...} => oper(i,12) + | I.FPop1{a=(I.FMOVd | I.FNEGd | I.FABSd),...} => 8 + | _ => error "sdiSize" + end + | sdiSize _ = error "sdiSize" + + fun split22_10 n = + let val w = Word32.fromInt n + in {hi=Word32.toInt(Word32.>>(w,0w10)), + lo=Word32.toInt(Word32.andb(w,0wx3ff)) + } + end + + fun split(I.LAB lexp) = split22_10(MLTreeEval.valueOf lexp) + | split _ = error "split" + + (* Expand the immediate constant into two instructions *) + fun expandImm(immed,instr) = + let val {lo,hi} = split immed + in + [I.sethi{i=hi,d=C.asmTmpR}, + I.arith{a=I.OR,r=C.asmTmpR,i=I.IMMED lo,d=C.asmTmpR}, + I.INSTR instr + ] + end + + (* Expand a span dependent instruction *) + fun expand(I.ANNOTATION{i, ...}, size, pos) = expand(i, size, pos) + | expand(I.LIVE _, _, _) = [] + | expand(I.KILL _, _, _) = [] + | expand(I.COPY{k=CB.GP, src, tmp, dst, ...}, _, _) = + Shuffle.shuffle{src=src, dst=dst, tmp=tmp} + | expand(I.COPY{k=CB.FP, src, tmp, dst, ...}, _, _) = + Shuffle.shufflefp{src=src, dst=dst, tmp=tmp} + | expand(instr as (I.INSTR i), size, pos) = + (case (i,size) + of (_,4) => [instr] + | (I.ARITH{a=I.OR,r,i,d},8) => + if CellsBasis.cellId r = 0 then + let val {lo,hi} = split i + in [I.sethi{i=hi,d=C.asmTmpR}, + I.arith{a=I.OR,r=C.asmTmpR,i=I.IMMED lo,d=d} + ] + end + else error "ARITH" + | (I.ARITH{a,r,i,d},12) => + expandImm(i,I.ARITH{a=a,r=r,i=I.REG C.asmTmpR,d=d}) + | (I.SHIFT{s,r,i,d},12) => + expandImm(i,I.SHIFT{s=s,r=r,i=I.REG C.asmTmpR,d=d}) + | (I.SAVE{r,i,d},12) => + expandImm(i,I.SAVE{r=r,i=I.REG C.asmTmpR,d=d}) + | (I.RESTORE{r,i,d},12) => + expandImm(i,I.RESTORE{r=r,i=I.REG C.asmTmpR,d=d}) + | (I.LOAD{l,r,i,d,mem},12) => + expandImm(i,I.LOAD{l=l,r=r,i=I.REG C.asmTmpR,d=d,mem=mem}) + | (I.STORE{s,r,i,d,mem},12) => + expandImm(i,I.STORE{s=s,r=r,i=I.REG C.asmTmpR,d=d,mem=mem}) + | (I.FLOAD{l,r,i,d,mem},12) => + expandImm(i,I.FLOAD{l=l,r=r,i=I.REG C.asmTmpR,d=d,mem=mem}) + | (I.FSTORE{s,r,i,d,mem},12) => + expandImm(i,I.FSTORE{s=s,r=r,i=I.REG C.asmTmpR,d=d,mem=mem}) + | (I.MOVicc{b,i,d},12) => + expandImm(i,I.MOVicc{b=b,i=I.REG C.asmTmpR,d=d}) + | (I.MOVfcc{b,i,d},12) => + expandImm(i,I.MOVfcc{b=b,i=I.REG C.asmTmpR,d=d}) + | (I.MOVR{rcond,r,i,d},12) => + expandImm(i,I.MOVR{rcond=rcond,r=r,i=I.REG C.asmTmpR,d=d}) + | (I.JMPL _,8) => [instr] + | (I.JMP _,8) => [instr] + | (I.Bicc _,8) => [instr] + | (I.FBfcc _,8) => [instr] + | (I.BR _,8) => [instr] + | (I.BP _,8) => [instr] + | (I.JMPL{r,i,d,defs,uses,cutsTo,nop,mem},(12 | 16)) => + expandImm(i,I.JMPL{r=r,i=I.REG C.asmTmpR,d=d,defs=defs,uses=uses, + cutsTo=cutsTo,nop=nop,mem=mem}) + | (I.JMP{r,i,labs,nop},(12 | 16)) => + expandImm(i,I.JMP{r=r,i=I.REG C.asmTmpR,labs=labs,nop=nop}) + | (I.Ticc{t,cc,r,i},12) => + expandImm(i,I.Ticc{t=t,cc=cc,r=r,i=I.REG C.asmTmpR}) + (* + * The sparc uses 22bits signed extended displacement offsets + * Let's hope it's enough + *) + | (I.Bicc{b,a,label,nop},_) => error "Bicc" + | (I.FBfcc{b,a,label,nop},_) => error "FBfcc" + | (I.FPop1{a, r, d}, _) => let + fun nextRegNum c = C.FPReg(CellsBasis.registerNum c + 1) + (* Note: if r=d then the move is not required. + * This needs to be factored into the size before it + * can be done here. + *) + fun doDouble(oper) = + [I.fpop1{a=oper, r=r, d=d}, + I.fpop1{a=I.FMOVs, r=nextRegNum r, d=nextRegNum d}] + in + case a + of I.FMOVd => doDouble(I.FMOVs) + | I.FNEGd => doDouble(I.FNEGs) + | I.FABSd => doDouble(I.FABSs) + | _ => error "expand: FPop1" + end + | (I.WRY{r,i},12) => expandImm(i,I.WRY{r=r,i=I.REG C.asmTmpR}) + | _ => error "expand" + (*esac*)) + | expand _ = error "expand" + +end + diff --git a/MLRISC/sparc/c-calls/sparc-c-calls.sml b/MLRISC/sparc/c-calls/sparc-c-calls.sml new file mode 100644 index 0000000..6a27f75 --- /dev/null +++ b/MLRISC/sparc/c-calls/sparc-c-calls.sml @@ -0,0 +1,495 @@ +(* sparc-c-calls.sml + * + * COPYRIGHT (c) 2001 Bell Labs, Lucent Technologies + * + * author: Matthias Blume (blume@reseach.bell-labs.com) + * + * Comment: This is a first cut. It might be quite sub-optimal for some cases. + * (For example, I make no attempt at using ldd/ldx for + * copying stuff around because this would require keeping + * more track of alignment issues.) + * + * C function calls for the Sparc + * + * Register conventions: + * + * ? + * + * Calling convention: + * + * Return result: + * + Integer and pointer results are returned in %o0 + * + 64-bit integers (long long) returned in %o1/%o1 + * + float results are returned in %f0; double in %f0/%f1 + * + Struct results are returned in space provided by the caller. + * The address of this space is passed to the callee as a hidden + * implicit argument on the stack (in the caller's frame). It + * gets stored at [%sp+64] (from the caller's point of view). + * An UNIMP instruction must be placed after the call instruction, + * indicating how much space has been reserved for the return value. + * + long double results are returned like structs + * + * Function arguments: + * + Arguments that are smaller than a word are promoted to word-size. + * + Up to six argument words (words 0-5) are passed in registers + * %o0...%o5. This includes doubles and long longs. Alignment for + * those types is NOT maintained, i.e., it is possible for an 8-byte + * quantity to end up in an odd-even register pair. + * * Arguments beyond 6 words are passed on the stack in the caller's + * frame. For this, the caller must reserve space in its frame + * prior to the call. Argument word 6 appears at [%sp+92], word 7 + * at [%sp+96], ... + * + struct arguments are passed as pointers to a copy of the struct. + * The copy itself is allocated by the caller in its stack frame. + * + long double arguments are passed like structs (i.e., via pointer + * to temp copy) + * + Space for argument words 0-5 is already allocated in the + * caller's frame. This space might be used by the callee to + * save those arguments that must be addressable. %o0 corresponds + * to [%sp+68], %o1 to [%sp+72], ... + *) +functor Sparc_CCalls + (structure T : MLTREE + val ix : (T.stm, T.rexp, T.fexp, T.ccexp) SparcInstrExt.sext + -> T.sext): C_CALLS = +struct + structure T = T + structure Ty = CTypes + structure C = SparcCells + structure IX = SparcInstrExt + + fun error msg = MLRiscErrorMsg.error ("SparcCompCCalls", msg) + + datatype c_arg = + ARG of T.rexp + | FARG of T.fexp + | ARGS of c_arg list + + val mem = T.Region.memory + val stack = T.Region.memory + + val maxRegArgs = 6 + val paramAreaOffset = 68 + + fun LI i = T.LI (T.I.fromInt (32, i)) + + val GP = C.GPReg + val FP = C.FPReg + + fun greg r = GP r + fun oreg r = GP (r + 8) + fun ireg r = GP (r + 24) + fun freg r = FP r + + fun reg32 r = T.REG (32, r) + fun freg64 r = T.FREG (64, r) + + val sp = oreg 6 + val spreg = reg32 sp + + fun addli (x, 0) = x + | addli (x, d) = let + val d' = T.I.fromInt (32, d) + in + case x of + T.ADD (_, r, T.LI d) => + T.ADD (32, r, T.LI (T.I.ADD (32, d, d'))) + | _ => T.ADD (32, x, T.LI d') + end + + fun argaddr n = addli (spreg, paramAreaOffset + 4*n) + + (* temp location for transfers through memory *) + val tmpaddr = argaddr 1 + + fun roundup (i, a) = a * ((i + a - 1) div a) + + (* calculate size and alignment for a C type *) + fun szal (Ty.C_void | Ty.C_float | Ty.C_PTR | + Ty.C_signed (Ty.I_int | Ty.I_long) | + Ty.C_unsigned (Ty.I_int | Ty.I_long)) = (4, 4) + | szal (Ty.C_double | + Ty.C_signed Ty.I_long_long | + Ty.C_unsigned Ty.I_long_long) = (8, 8) + | szal (Ty.C_long_double) = (16, 8) + | szal (Ty.C_signed Ty.I_char | Ty.C_unsigned Ty.I_char) = (1, 1) + | szal (Ty.C_signed Ty.I_short | Ty.C_unsigned Ty.I_short) = (2, 2) + | szal (Ty.C_ARRAY (t, n)) = let val (s, a) = szal t in (n * s, a) end + | szal (Ty.C_STRUCT l) = + let (* i: next free memory address (relative to struct start); + * a: current total alignment, + * l: list of struct member types *) + fun pack (i, a, []) = + (* when we are done with all elements, the total size + * of the struct must be padded out to its own alignment *) + (roundup (i, a), a) + | pack (i, a, t :: tl) = let + val (ts, ta) = szal t (* size and alignment for member *) + in + (* member must be aligned according to its own + * alignment requirement; the next free position + * is then at "aligned member-address plus member-size"; + * new total alignment is max of current alignment + * and member alignment (assuming all alignments are + * powers of 2) *) + pack (roundup (i, ta) + ts, Int.max (a, ta), tl) + end + in + pack (0, 1, l) + end + | szal (Ty.C_UNION l) = + let (* m: current max size + * a: current total alignment *) + fun overlay (m, a, []) = (roundup (m, a), a) + | overlay (m, a, t :: tl) = + let val (ts, ta) = szal t + in + overlay (Int.max (m, ts), Int.max (a, ta), tl) + end + in + overlay (0, 1, l) + end + +(**** START NEW CODE ****) + + (* shorts and chars are promoted to 32-bits *) + val naturalIntSz = 32 + + (* the location of arguments/parameters; offsets are given with respect to the + * low end of the parameter area (see paramAreaOffset above). + *) + datatype arg_location + = Reg of T.ty * T.reg * T.I.machine_int option + (* integer/pointer argument in register *) + | FReg of T.fty * T.reg * T.I.machine_int option + (* floating-point argument in register *) + | Stk of T.ty * T.I.machine_int (* integer/pointer argument in parameter area *) + | FStk of T.fty * T.I.machine_int (* floating-point argument in parameter area *) + | Args of arg_location list + + fun layout {conv, retTy, paramTys} = let + in + raise Fail "layout not implemented yet" + end + + (* C callee-save registers *) + val calleeSaveRegs = (* %l0-%l7 and %i0-%i7 *) + List.tabulate (16, fn r => GP(r+16)) + val calleeSaveFRegs = [] + +(**** END NEW CODE ****) + + fun genCall { name, proto, paramAlloc, structRet, saveRestoreDedicated, + callComment, args } = let + val { conv, retTy, paramTys } = proto + val _ = case conv of + ("" | "ccall") => () + | _ => error (concat ["unknown calling convention \"", + String.toString conv, "\""]) + val res_szal = + case retTy of + (Ty.C_long_double | Ty.C_STRUCT _ | Ty.C_UNION _) => + SOME (szal retTy) + | _ => NONE + + val nargwords = let + fun loop ([], n) = n + | loop (t :: tl, n) = + loop (tl, (case t of + (Ty.C_double | Ty.C_signed Ty.I_long_long | + Ty.C_unsigned Ty.I_long_long) => 2 + | _ => 1) + n) + in + loop (paramTys, 0) + end + + val regargwords = Int.min (nargwords, maxRegArgs) + val stackargwords = Int.max (nargwords, maxRegArgs) - maxRegArgs + + val stackargsstart = paramAreaOffset + 4 * maxRegArgs + + val scratchstart = stackargsstart + 4 * stackargwords + + (* Copy struct or part thereof to designated area on the stack. + * An already properly aligned address (relative to %sp) is + * in to_off. *) + fun struct_copy (sz, al, ARG a, t, to_off, cpc) = + (* Two main cases here: + * 1. t is C_STRUCT _ or C_UNION _; + * in this case "a" computes the address + * of the struct to be copied. + * 2. t is some other non-floating type; "a" computes the + * the corresponding value (i.e., not its address). + *) + let fun ldst ty = + T.STORE (ty, addli (spreg, to_off), a, stack) :: cpc + in + case t of + (Ty.C_void | Ty.C_PTR | + Ty.C_signed (Ty.I_int | Ty.I_long) | + Ty.C_unsigned (Ty.I_int | Ty.I_long)) => ldst 32 + | (Ty.C_signed Ty.I_char | Ty.C_unsigned Ty.I_char) => ldst 8 + | (Ty.C_signed Ty.I_short | Ty.C_unsigned Ty.I_short) => + ldst 16 + | (Ty.C_signed Ty.I_long_long | + Ty.C_unsigned Ty.I_long_long) => ldst 64 + | (Ty.C_ARRAY _) => + error "ARRAY within gather/scatter struct" + | (Ty.C_STRUCT _ | Ty.C_UNION _) => + (* Here we have to do the equivalent of a "memcpy". *) + let val from = a (* argument is address of struct *) + fun cp (ty, incr) = let + fun load_from from_off = + T.LOAD (32, addli (from, from_off), mem) + (* from_off is relative to from, + * to_off is relative to %sp *) + fun loop (i, from_off, to_off, cpc) = + if i <= 0 then cpc + else loop (i - incr, + from_off + incr, to_off + incr, + T.STORE (ty, addli (spreg, to_off), + load_from from_off, + stack) + :: cpc) + in + loop (sz, 0, to_off, cpc) + end + in + case al of + 1 => cp (8, 1) + | 2 => cp (16, 2) + | _ => (* 4 or more *) cp (32, 4) + end + | (Ty.C_float | Ty.C_double | Ty.C_long_double) => + error "floating point type does not match ARG" + end +(* + | struct_copy (_, _, ARGS args, Ty.C_STRUCT tl, to_off, cpc) = + (* gather/scatter case *) + let fun loop ([], [], _, cpc) = cpc + | loop (t :: tl, a :: al, to_off, cpc) = let + val (tsz, tal) = szal t + val to_off' = roundup (to_off, tal) + val cpc' = struct_copy (tsz, tal, a, t, to_off', cpc) + in + loop (tl, al, to_off' + tsz, cpc') + end + | loop _ = + error "number of types does not match number of arguments" + in + loop (tl, args, to_off, cpc) + end +*) + | struct_copy (_, _, ARGS _, _, _, _) = + error "gather/scatter (ARGS) not supported (obsolete)" + | struct_copy (sz, al, FARG a, t, to_off, cpc) = + let fun fldst ty = + T.FSTORE (ty, addli (spreg, to_off), a, stack) :: cpc + in + case t of + Ty.C_float => fldst 32 + | Ty.C_double => fldst 64 + | Ty.C_long_double => fldst 128 + | _ => error "non-floating-point type does not match FARG" + end + + val (stackdelta, argsetupcode, copycode) = let + fun loop ([], [], _, ss, asc, cpc) = + (roundup (Int.max (0, ss - stackargsstart), 8), asc, cpc) + | loop (t :: tl, a :: al, n, ss, asc, cpc) = let + fun wordassign a = + if n < 6 then T.MV (32, oreg n, a) + else T.STORE (32, argaddr n, a, stack) + fun wordarg (a, cpc, ss) = + loop (tl, al, n + 1, ss, wordassign a :: asc, cpc) + + fun dwordmemarg (addr, region, tmpstore) = let + fun toreg (n, addr) = + T.MV (32, oreg n, T.LOAD (32, addr, region)) + fun tomem (n, addr) = + T.STORE (32, + argaddr n, + T.LOAD (32, addr, region), + stack) + fun toany (n, addr) = + if n < 6 then toreg (n, addr) else tomem (n, addr) + in + (* if n < 6 andalso n div 2 = 0 then + * use ldd here once MLRISC gets its usage right + * else + * ... *) + loop (tl, al, n+2, ss, + tmpstore @ + toany (n, addr) + :: toany (n+1, addli (addr, 4)) + :: asc, + cpc) + end + fun dwordarg mkstore = + if n > 6 andalso n div 2 = 1 then + (* 8-byte aligned memory *) + loop (tl, al, n+2, ss, + mkstore (argaddr n) :: asc, + cpc) + else dwordmemarg (tmpaddr, stack, [mkstore tmpaddr]) + in + case (t, a) of + ((Ty.C_void | Ty.C_PTR | Ty.C_ARRAY _ | + Ty.C_unsigned (Ty.I_int | Ty.I_long) | + Ty.C_signed (Ty.I_int | Ty.I_long)), + ARG a) => wordarg (a, cpc, ss) + | (Ty.C_signed Ty.I_char, ARG a) => + wordarg (T.SX (32, 8, a), cpc, ss) + | (Ty.C_unsigned Ty.I_char, ARG a) => + wordarg (T.ZX (32, 8, a), cpc, ss) + | (Ty.C_signed Ty.I_short, ARG a) => + wordarg (T.SX (32, 16, a), cpc, ss) + | (Ty.C_unsigned Ty.I_short, ARG a) => + wordarg (T.ZX (32, 16, a), cpc, ss) + | ((Ty.C_signed Ty.I_long_long | + Ty.C_unsigned Ty.I_long_long), ARG a) => + (case a of + T.LOAD (_, addr, region) => + dwordmemarg (addr, region, []) + | _ => dwordarg (fn addr => + T.STORE (64, addr, a, stack))) + | (Ty.C_float, FARG a) => + (* we use the stack region reserved for storing + * %o0-%o5 as temporary storage for transferring + * floating point values *) + (case a of + T.FLOAD (_, addr, region) => + wordarg (T.LOAD (32, addr, region), cpc, ss) + | _ => + if n < 6 then let + val ld = T.MV (32, oreg n, + T.LOAD (32, tmpaddr, stack)) + val cp = T.FSTORE (32, tmpaddr, a, stack) + in + loop (tl, al, n + 1, ss, cp :: ld :: asc, cpc) + end + else loop (tl, al, n + 1, ss, + T.FSTORE (32, argaddr n, a, stack) + :: asc, + cpc)) + | (Ty.C_double, FARG a) => + (case a of + T.FLOAD (_, addr, region) => + dwordmemarg (addr, region, []) + | _ => dwordarg (fn addr => + T.FSTORE (64, addr, a, stack))) + | (Ty.C_long_double, FARG a) => let + (* Copy 128-bit floating point value (16 bytes) + * into scratch space (aligned at 8-byte boundary). + * The address of the scratch copy is then + * passed as a regular 32-bit argument. *) + val ss' = roundup (ss, 8) + val ssaddr = addli (spreg, ss') + in + wordarg (ssaddr, + T.FSTORE (128, ssaddr, a, stack) :: cpc, + ss' + 16) + end + | (t as (Ty.C_STRUCT _ | Ty.C_UNION _), a) => let + (* copy entire struct into scratch space + * (aligned according to struct's alignment + * requirements). The address of the scratch + * copy is then passed as a regular 32-bit + * argument. *) + val (sz, al) = szal t + val ss' = roundup (ss, al) + val ssaddr = addli (spreg, ss') + val cpc' = struct_copy (sz, al, a, t, ss', cpc) + in + wordarg (ssaddr, cpc', ss' + sz) + end + | _ => error "argument/type mismatch" + end + | loop _ = error "wrong number of arguments" + in + loop (paramTys, args, 0, scratchstart, [], []) + end + + val (defs, uses) = let + val gp = T.GPR o reg32 + val fp = T.FPR o freg64 + val g_regs = map (gp o greg) [1, 2, 3, 4, 5, 6, 7] + val a_regs = map (gp o oreg) [0, 1, 2, 3, 4, 5] + val l_reg = gp (oreg 7) + val f_regs = map (fp o freg) + [0, 2, 4, 6, 8, 10, 12, 14, + 16, 18, 20, 22, 24, 26, 28, 30] + (* a call instruction defines all caller-save registers: + * - %g1 - %g7 + * - %o0 - %o5 (argument registers) + * - %o7 (link register) + * - all fp registers *) + + val defs = g_regs @ a_regs @ l_reg :: f_regs + (* A call instruction "uses" just the argument registers. *) + val uses = List.take (a_regs, regargwords) + in + (defs, uses) + end + + val result = + case retTy of + Ty.C_float => [T.FPR (T.FREG (32, FP 0))] + | Ty.C_double => [T.FPR (T.FREG (64, FP 0))] (* %f0/%f1 *) + | Ty.C_long_double => [] + | (Ty.C_STRUCT _ | Ty.C_UNION _) => [] + | Ty.C_ARRAY _ => error "array return type" + | (Ty.C_PTR | Ty.C_void | + Ty.C_signed (Ty.I_int | Ty.I_long) | + Ty.C_unsigned (Ty.I_int | Ty.I_long)) => + [T.GPR (T.REG (32, oreg 0))] + | (Ty.C_signed Ty.I_char | Ty.C_unsigned Ty.I_char) => + [T.GPR (T.REG (8, oreg 0))] + | (Ty.C_signed Ty.I_short | Ty.C_unsigned Ty.I_short) => + [T.GPR (T.REG (16, oreg 0))] + | (Ty.C_signed Ty.I_long_long | Ty.C_unsigned Ty.I_long_long) => + [T.GPR (T.REG (64, oreg 0))] + + val { save, restore } = saveRestoreDedicated defs + + val (sretsetup, srethandshake) = + case res_szal of + NONE => ([], []) + | SOME (sz, al) => let + val addr = structRet { szb = sz, align = al } + in + ([T.STORE (32, addli (spreg, 64), addr, stack)], + [T.EXT (ix (IX.UNIMP sz))]) + end + + val call = T.CALL { funct = name, targets = [], + defs = defs, uses = uses, + region = mem, pops = 0 } + + val call = + case callComment of + NONE => call + | SOME c => + T.ANNOTATION (call, #create MLRiscAnnotations.COMMENT c) + + val (sp_sub, sp_add) = + if stackdelta = 0 then ([], []) else + if paramAlloc { szb = stackdelta, align = 4 } then ([], []) + else ([T.MV (32, sp, T.SUB (32, spreg, LI stackdelta))], + [T.MV (32, sp, addli (spreg, stackdelta))]) + + val callseq = + List.concat [sp_sub, + copycode, + argsetupcode, + sretsetup, + save, + [call], + srethandshake, + restore, + sp_add] + + in + { callseq = callseq, result = result } + end +end diff --git a/MLRISC/sparc/emit/sparcAsm.sml b/MLRISC/sparc/emit/sparcAsm.sml new file mode 100644 index 0000000..510b7bb --- /dev/null +++ b/MLRISC/sparc/emit/sparcAsm.sml @@ -0,0 +1,604 @@ +(* + * WARNING: This file was automatically generated by MDLGen (v3.1) + * from the machine description file "sparc/sparc.mdl". + * DO NOT EDIT this file directly + *) + + +functor SparcAsmEmitter(structure S : INSTRUCTION_STREAM + structure Instr : SPARCINSTR + where T = S.P.T + structure Shuffle : SPARCSHUFFLE + where I = Instr + structure MLTreeEval : MLTREE_EVAL + where T = Instr.T + +(*#line 466.21 "sparc/sparc.mdl"*) + val V9 : bool + ) : INSTRUCTION_EMITTER = +struct + structure I = Instr + structure C = I.C + structure T = I.T + structure S = S + structure P = S.P + structure Constant = I.Constant + + open AsmFlags + + fun error msg = MLRiscErrorMsg.error("SparcAsmEmitter",msg) + + fun makeStream formatAnnotations = + let val stream = !AsmStream.asmOutStream + fun emit' s = TextIO.output(stream,s) + val newline = ref true + val tabs = ref 0 + fun tabbing 0 = () + | tabbing n = (emit' "\t"; tabbing(n-1)) + fun emit s = (tabbing(!tabs); tabs := 0; newline := false; emit' s) + fun nl() = (tabs := 0; if !newline then () else (newline := true; emit' "\n")) + fun comma() = emit "," + fun tab() = tabs := 1 + fun indent() = tabs := 2 + fun ms n = let val s = Int.toString n + in if n<0 then "-"^String.substring(s,1,size s-1) + else s + end + fun emit_label lab = emit(P.Client.AsmPseudoOps.lexpToString(T.LABEL lab)) + fun emit_labexp le = emit(P.Client.AsmPseudoOps.lexpToString (T.LABEXP le)) + fun emit_const c = emit(Constant.toString c) + fun emit_int i = emit(ms i) + fun paren f = (emit "("; f(); emit ")") + fun defineLabel lab = emit(P.Client.AsmPseudoOps.defineLabel lab^"\n") + fun entryLabel lab = defineLabel lab + fun comment msg = (tab(); emit("/* " ^ msg ^ " */"); nl()) + fun annotation a = comment(Annotations.toString a) + fun getAnnotations() = error "getAnnotations" + fun doNothing _ = () + fun fail _ = raise Fail "AsmEmitter" + fun emit_region mem = comment(I.Region.toString mem) + val emit_region = + if !show_region then emit_region else doNothing + fun pseudoOp pOp = (emit(P.toString pOp); emit "\n") + fun init size = (comment("Code Size = " ^ ms size); nl()) + val emitCellInfo = AsmFormatUtil.reginfo + (emit,formatAnnotations) + fun emitCell r = (emit(CellsBasis.toString r); emitCellInfo r) + fun emit_cellset(title,cellset) = + (nl(); comment(title^CellsBasis.CellSet.toString cellset)) + val emit_cellset = + if !show_cellset then emit_cellset else doNothing + fun emit_defs cellset = emit_cellset("defs: ",cellset) + fun emit_uses cellset = emit_cellset("uses: ",cellset) + val emit_cutsTo = + if !show_cutsTo then AsmFormatUtil.emit_cutsTo emit + else doNothing + fun emitter instr = + let + fun asm_load (I.LDSB) = "ldsb" + | asm_load (I.LDSH) = "ldsh" + | asm_load (I.LDUB) = "ldub" + | asm_load (I.LDUH) = "lduh" + | asm_load (I.LD) = "ld" + | asm_load (I.LDX) = "ldx" + | asm_load (I.LDD) = "ldd" + and emit_load x = emit (asm_load x) + and asm_store (I.STB) = "stb" + | asm_store (I.STH) = "sth" + | asm_store (I.ST) = "st" + | asm_store (I.STX) = "stx" + | asm_store (I.STD) = "std" + and emit_store x = emit (asm_store x) + and asm_fload (I.LDF) = "ldf" + | asm_fload (I.LDDF) = "lddf" + | asm_fload (I.LDQF) = "ldqf" + | asm_fload (I.LDFSR) = "ldfsr" + | asm_fload (I.LDXFSR) = "ldxfsr" + and emit_fload x = emit (asm_fload x) + and asm_fstore (I.STF) = "stf" + | asm_fstore (I.STDF) = "stdf" + | asm_fstore (I.STFSR) = "stfsr" + and emit_fstore x = emit (asm_fstore x) + and asm_arith (I.AND) = "and" + | asm_arith (I.ANDCC) = "andcc" + | asm_arith (I.ANDN) = "andn" + | asm_arith (I.ANDNCC) = "andncc" + | asm_arith (I.OR) = "or" + | asm_arith (I.ORCC) = "orcc" + | asm_arith (I.ORN) = "orn" + | asm_arith (I.ORNCC) = "orncc" + | asm_arith (I.XOR) = "xor" + | asm_arith (I.XORCC) = "xorcc" + | asm_arith (I.XNOR) = "xnor" + | asm_arith (I.XNORCC) = "xnorcc" + | asm_arith (I.ADD) = "add" + | asm_arith (I.ADDCC) = "addcc" + | asm_arith (I.TADD) = "tadd" + | asm_arith (I.TADDCC) = "taddcc" + | asm_arith (I.TADDTV) = "taddtv" + | asm_arith (I.TADDTVCC) = "taddtvcc" + | asm_arith (I.SUB) = "sub" + | asm_arith (I.SUBCC) = "subcc" + | asm_arith (I.TSUB) = "tsub" + | asm_arith (I.TSUBCC) = "tsubcc" + | asm_arith (I.TSUBTV) = "tsubtv" + | asm_arith (I.TSUBTVCC) = "tsubtvcc" + | asm_arith (I.UMUL) = "umul" + | asm_arith (I.UMULCC) = "umulcc" + | asm_arith (I.SMUL) = "smul" + | asm_arith (I.SMULCC) = "smulcc" + | asm_arith (I.UDIV) = "udiv" + | asm_arith (I.UDIVCC) = "udivcc" + | asm_arith (I.SDIV) = "sdiv" + | asm_arith (I.SDIVCC) = "sdivcc" + | asm_arith (I.MULX) = "mulx" + | asm_arith (I.SDIVX) = "sdivx" + | asm_arith (I.UDIVX) = "udivx" + and emit_arith x = emit (asm_arith x) + and asm_shift (I.SLL) = "sll" + | asm_shift (I.SRL) = "srl" + | asm_shift (I.SRA) = "sra" + | asm_shift (I.SLLX) = "sllx" + | asm_shift (I.SRLX) = "srlx" + | asm_shift (I.SRAX) = "srax" + and emit_shift x = emit (asm_shift x) + and asm_farith1 (I.FiTOs) = "fitos" + | asm_farith1 (I.FiTOd) = "fitod" + | asm_farith1 (I.FiTOq) = "fitoq" + | asm_farith1 (I.FsTOi) = "fstoi" + | asm_farith1 (I.FdTOi) = "fdtoi" + | asm_farith1 (I.FqTOi) = "fqtoi" + | asm_farith1 (I.FsTOd) = "fstod" + | asm_farith1 (I.FsTOq) = "fstoq" + | asm_farith1 (I.FdTOs) = "fdtos" + | asm_farith1 (I.FdTOq) = "fdtoq" + | asm_farith1 (I.FqTOs) = "fqtos" + | asm_farith1 (I.FqTOd) = "fqtod" + | asm_farith1 (I.FMOVs) = "fmovs" + | asm_farith1 (I.FNEGs) = "fnegs" + | asm_farith1 (I.FABSs) = "fabss" + | asm_farith1 (I.FMOVd) = "fmovd" + | asm_farith1 (I.FNEGd) = "fnegd" + | asm_farith1 (I.FABSd) = "fabsd" + | asm_farith1 (I.FMOVq) = "fmovq" + | asm_farith1 (I.FNEGq) = "fnegq" + | asm_farith1 (I.FABSq) = "fabsq" + | asm_farith1 (I.FSQRTs) = "fsqrts" + | asm_farith1 (I.FSQRTd) = "fsqrtd" + | asm_farith1 (I.FSQRTq) = "fsqrtq" + and emit_farith1 x = emit (asm_farith1 x) + and asm_farith2 (I.FADDs) = "fadds" + | asm_farith2 (I.FADDd) = "faddd" + | asm_farith2 (I.FADDq) = "faddq" + | asm_farith2 (I.FSUBs) = "fsubs" + | asm_farith2 (I.FSUBd) = "fsubd" + | asm_farith2 (I.FSUBq) = "fsubq" + | asm_farith2 (I.FMULs) = "fmuls" + | asm_farith2 (I.FMULd) = "fmuld" + | asm_farith2 (I.FMULq) = "fmulq" + | asm_farith2 (I.FsMULd) = "fsmuld" + | asm_farith2 (I.FdMULq) = "fdmulq" + | asm_farith2 (I.FDIVs) = "fdivs" + | asm_farith2 (I.FDIVd) = "fdivd" + | asm_farith2 (I.FDIVq) = "fdivq" + and emit_farith2 x = emit (asm_farith2 x) + and asm_fcmp (I.FCMPs) = "fcmps" + | asm_fcmp (I.FCMPd) = "fcmpd" + | asm_fcmp (I.FCMPq) = "fcmpq" + | asm_fcmp (I.FCMPEs) = "fcmpes" + | asm_fcmp (I.FCMPEd) = "fcmped" + | asm_fcmp (I.FCMPEq) = "fcmpeq" + and emit_fcmp x = emit (asm_fcmp x) + and asm_branch (I.BN) = "n" + | asm_branch (I.BE) = "e" + | asm_branch (I.BLE) = "le" + | asm_branch (I.BL) = "l" + | asm_branch (I.BLEU) = "leu" + | asm_branch (I.BCS) = "cs" + | asm_branch (I.BNEG) = "neg" + | asm_branch (I.BVS) = "vs" + | asm_branch (I.BA) = "" + | asm_branch (I.BNE) = "ne" + | asm_branch (I.BG) = "g" + | asm_branch (I.BGE) = "ge" + | asm_branch (I.BGU) = "gu" + | asm_branch (I.BCC) = "cc" + | asm_branch (I.BPOS) = "pos" + | asm_branch (I.BVC) = "vs" + and emit_branch x = emit (asm_branch x) + and asm_rcond (I.RZ) = "rz" + | asm_rcond (I.RLEZ) = "rlez" + | asm_rcond (I.RLZ) = "rlz" + | asm_rcond (I.RNZ) = "rnz" + | asm_rcond (I.RGZ) = "rgz" + | asm_rcond (I.RGEZ) = "rgez" + and emit_rcond x = emit (asm_rcond x) + and asm_prediction (I.PT) = "pt" + | asm_prediction (I.PN) = "pn" + and emit_prediction x = emit (asm_prediction x) + and asm_fbranch (I.FBN) = "fbn" + | asm_fbranch (I.FBNE) = "fbne" + | asm_fbranch (I.FBLG) = "fblg" + | asm_fbranch (I.FBUL) = "fbul" + | asm_fbranch (I.FBL) = "fbl" + | asm_fbranch (I.FBUG) = "fbug" + | asm_fbranch (I.FBG) = "fbg" + | asm_fbranch (I.FBU) = "fbu" + | asm_fbranch (I.FBA) = "fb" + | asm_fbranch (I.FBE) = "fbe" + | asm_fbranch (I.FBUE) = "fbue" + | asm_fbranch (I.FBGE) = "fbge" + | asm_fbranch (I.FBUGE) = "fbuge" + | asm_fbranch (I.FBLE) = "fble" + | asm_fbranch (I.FBULE) = "fbule" + | asm_fbranch (I.FBO) = "fbo" + and emit_fbranch x = emit (asm_fbranch x) + and asm_fsize (I.S) = "s" + | asm_fsize (I.D) = "d" + | asm_fsize (I.Q) = "q" + and emit_fsize x = emit (asm_fsize x) + and emit_operand (I.REG GP) = emitCell GP + | emit_operand (I.IMMED int) = emit_int int + | emit_operand (I.LAB labexp) = emit_labexp labexp + | emit_operand (I.LO labexp) = + ( emit "%lo("; + emit_labexp labexp; + emit ")" ) + | emit_operand (I.HI labexp) = + ( emit "%hi("; + emit_labexp labexp; + emit ")" ) + +(*#line 469.7 "sparc/sparc.mdl"*) + fun emit_leaf false = () + | emit_leaf true = emit "l" + +(*#line 470.7 "sparc/sparc.mdl"*) + fun emit_nop false = () + | emit_nop true = emit "\n\tnop" + +(*#line 471.7 "sparc/sparc.mdl"*) + fun emit_a false = () + | emit_a true = emit ",a" + +(*#line 472.7 "sparc/sparc.mdl"*) + fun emit_cc false = () + | emit_cc true = emit "cc" + fun emitInstr' instr = + (case instr of + I.LOAD{l, d, r, i, mem} => + ( emit_load l; + emit "\t["; + emitCell r; + emit "+"; + emit_operand i; + emit "], "; + emitCell d; + emit_region mem ) + | I.STORE{s, d, r, i, mem} => + ( emit_store s; + emit "\t"; + emitCell d; + emit ", ["; + emitCell r; + emit "+"; + emit_operand i; + emit "]"; + emit_region mem ) + | I.FLOAD{l, r, i, d, mem} => + ( emit_fload l; + emit "\t["; + emitCell r; + emit "+"; + emit_operand i; + emit "], "; + emitCell d; + emit_region mem ) + | I.FSTORE{s, d, r, i, mem} => + ( emit_fstore s; + emit "\t["; + emitCell r; + emit "+"; + emit_operand i; + emit "], "; + emitCell d; + emit_region mem ) + | I.UNIMP{const22} => + ( emit "unimp "; + emit_int const22 ) + | I.SETHI{i, d} => + let +(*#line 656.18 "sparc/sparc.mdl"*) + val i = Word32.toString (Word32.<< (Word32.fromInt i, 0wxA)) + in + ( emit "sethi\t%hi(0x"; + emit i; + emit "), "; + emitCell d ) + end + | I.ARITH{a, r, i, d} => + (case (a, CellsBasis.registerId r, CellsBasis.registerId d, i) of + (I.OR, 0, _, I.REG _) => + ( emit "mov\t"; + emit_operand i; + emit ", "; + emitCell d ) + | (I.OR, 0, _, _) => + ( emit "set\t"; + emit_operand i; + emit ", "; + emitCell d ) + | (I.SUBCC, _, 0, _) => + ( emit "cmp\t"; + emitCell r; + emit ", "; + emit_operand i ) + | _ => + ( emit_arith a; + emit "\t"; + emitCell r; + emit ", "; + emit_operand i; + emit ", "; + emitCell d ) + ) + | I.SHIFT{s, r, i, d} => + ( emit_shift s; + emit "\t"; + emitCell r; + emit ", "; + emit_operand i; + emit ", "; + emitCell d ) + | I.MOVicc{b, i, d} => + ( emit "mov"; + emit_branch b; + emit "\t"; + emit_operand i; + emit ", "; + emitCell d ) + | I.MOVfcc{b, i, d} => + ( emit "mov"; + emit_fbranch b; + emit "\t"; + emit_operand i; + emit ", "; + emitCell d ) + | I.MOVR{rcond, r, i, d} => + ( emit "movr"; + emit_rcond rcond; + emit "\t"; + emitCell r; + emit ", "; + emit_operand i; + emit ", "; + emitCell d ) + | I.FMOVicc{sz, b, r, d} => + ( emit "fmov"; + emit_fsize sz; + emit_branch b; + emit "\t"; + emitCell r; + emit ", "; + emitCell d ) + | I.FMOVfcc{sz, b, r, d} => + ( emit "fmov"; + emit_fsize sz; + emit_fbranch b; + emit "\t"; + emitCell r; + emit ", "; + emitCell d ) + | I.Bicc{b, a, label, nop} => + ( emit "b"; + emit_branch b; + emit_a a; + emit "\t"; + emit_label label; + emit_nop nop ) + | I.FBfcc{b, a, label, nop} => + ( emit_fbranch b; + emit_a a; + emit "\t"; + emit_label label; + emit_nop nop ) + | I.BR{rcond, p, r, a, label, nop} => + ( emit "b"; + emit_rcond rcond; + emit_a a; + emit_prediction p; + emit "\t"; + emitCell r; + emit ", "; + emit_label label; + emit_nop nop ) + | I.BP{b, p, cc, a, label, nop} => + ( emit "bp"; + emit_branch b; + emit_a a; + emit_prediction p; + emit "\t%"; + emit (if (cc = I.ICC) + then "i" + else "x"); + emit "cc, "; + emit_label label; + emit_nop nop ) + | I.JMP{r, i, labs, nop} => + ( emit "jmp\t"; + emitCell r; + emit "+"; + emit_operand i; + emit_nop nop ) + | I.JMPL{r, i, d, defs, uses, cutsTo, nop, mem} => + ( emit "jmpl\t"; + emitCell r; + emit "+"; + emit_operand i; + emit ", "; + emitCell d; + emit_region mem; + emit_defs defs; + emit_uses uses; + emit_cutsTo cutsTo; + emit_nop nop ) + | I.CALL{defs, uses, label, cutsTo, nop, mem} => + ( emit "call\t"; + emit_label label; + emit_region mem; + emit_defs defs; + emit_uses uses; + emit_cutsTo cutsTo; + emit_nop nop ) + | I.Ticc{t, cc, r, i} => + ( emit "t"; + emit_branch t; + emit "\t"; + (if (cc = I.ICC) + then () + else (emit "%xcc, ")); + emitCell r; + emit "+"; + emit_operand i ) + | I.FPop1{a, r, d} => + let +(*#line 764.18 "sparc/sparc.mdl"*) + fun f (a, r, d) = + ( emit a; + emit "\t"; + emit (C.showFP r); + emit ", "; + emit (C.showFP d)) + +(*#line 769.18 "sparc/sparc.mdl"*) + fun g (a, r, d) = + let +(*#line 770.22 "sparc/sparc.mdl"*) + val r = CellsBasis.registerNum r + and d = CellsBasis.registerNum d + in f (a, r, d); + emit "\n\t"; + f ("fmovs", r + 1, d + 1) + end + +(*#line 774.18 "sparc/sparc.mdl"*) + fun h (a, r, d) = + let +(*#line 775.22 "sparc/sparc.mdl"*) + val r = CellsBasis.registerNum r + and d = CellsBasis.registerNum d + in f (a, r, d); + emit "\n\t"; + f ("fmovs", r + 1, d + 1); + emit "\n\t"; + f ("fmovs", r + 2, d + 2); + emit "\n\t"; + f ("fmovs", r + 3, d + 3) + end + in (if V9 + then + ( emit_farith1 a; + emit "\t"; + emitCell r; + emit ", "; + emitCell d ) + else + (case a of + I.FMOVd => g ("fmovs", r, d) + | I.FNEGd => g ("fnegs", r, d) + | I.FABSd => g ("fabss", r, d) + | I.FMOVq => h ("fmovs", r, d) + | I.FNEGq => h ("fnegs", r, d) + | I.FABSq => h ("fabss", r, d) + | _ => + ( emit_farith1 a; + emit "\t"; + emitCell r; + emit ", "; + emitCell d ) + )) + end + | I.FPop2{a, r1, r2, d} => + ( emit_farith2 a; + emit "\t"; + emitCell r1; + emit ", "; + emitCell r2; + emit ", "; + emitCell d ) + | I.FCMP{cmp, r1, r2, nop} => + ( emit_fcmp cmp; + emit "\t"; + emitCell r1; + emit ", "; + emitCell r2; + emit_nop nop ) + | I.SAVE{r, i, d} => + ( emit "save\t"; + emitCell r; + emit ", "; + emit_operand i; + emit ", "; + emitCell d ) + | I.RESTORE{r, i, d} => + ( emit "restore\t"; + emitCell r; + emit ", "; + emit_operand i; + emit ", "; + emitCell d ) + | I.RDY{d} => + ( emit "rd\t%y, "; + emitCell d ) + | I.WRY{r, i} => + ( emit "wr\t"; + emitCell r; + emit ", "; + emit_operand i; + emit ", %y" ) + | I.RET{leaf, nop} => + ( emit "ret"; + emit_leaf leaf; + emit_nop nop ) + | I.SOURCE{} => emit "source" + | I.SINK{} => emit "sink" + | I.PHI{} => emit "phi" + ) + in tab(); emitInstr' instr; nl() + end (* emitter *) + and emitInstrIndented i = (indent(); emitInstr i; nl()) + and emitInstrs instrs = + app (if !indent_copies then emitInstrIndented + else emitInstr) instrs + + and emitInstr(I.ANNOTATION{i,a}) = + ( comment(Annotations.toString a); + nl(); + emitInstr i ) + | emitInstr(I.LIVE{regs, spilled}) = + comment("live= " ^ CellsBasis.CellSet.toString regs ^ + "spilled= " ^ CellsBasis.CellSet.toString spilled) + | emitInstr(I.KILL{regs, spilled}) = + comment("killed:: " ^ CellsBasis.CellSet.toString regs ^ + "spilled:: " ^ CellsBasis.CellSet.toString spilled) + | emitInstr(I.INSTR i) = emitter i + | emitInstr(I.COPY{k=CellsBasis.GP, sz, src, dst, tmp}) = + emitInstrs(Shuffle.shuffle{tmp=tmp, src=src, dst=dst}) + | emitInstr(I.COPY{k=CellsBasis.FP, sz, src, dst, tmp}) = + emitInstrs(Shuffle.shufflefp{tmp=tmp, src=src, dst=dst}) + | emitInstr _ = error "emitInstr" + + in S.STREAM{beginCluster=init, + pseudoOp=pseudoOp, + emit=emitInstr, + endCluster=fail, + defineLabel=defineLabel, + entryLabel=entryLabel, + comment=comment, + exitBlock=doNothing, + annotation=annotation, + getAnnotations=getAnnotations + } + end +end + diff --git a/MLRISC/sparc/emit/sparcMC.sml b/MLRISC/sparc/emit/sparcMC.sml new file mode 100644 index 0000000..eeea673 --- /dev/null +++ b/MLRISC/sparc/emit/sparcMC.sml @@ -0,0 +1,543 @@ +(* + * WARNING: This file was automatically generated by MDLGen (v3.1) + * from the machine description file "sparc/sparc.mdl". + * DO NOT EDIT this file directly + *) + + +functor SparcMCEmitter(structure Instr : SPARCINSTR + structure MLTreeEval : MLTREE_EVAL where T = Instr.T + structure Stream : INSTRUCTION_STREAM + structure CodeString : CODE_STRING + ) : INSTRUCTION_EMITTER = +struct + structure I = Instr + structure C = I.C + structure Constant = I.Constant + structure T = I.T + structure S = Stream + structure P = S.P + structure W = Word32 + + (* Sparc is big endian *) + + fun error msg = MLRiscErrorMsg.error("SparcMC",msg) + fun makeStream _ = + let infix && || << >> ~>> + val op << = W.<< + val op >> = W.>> + val op ~>> = W.~>> + val op || = W.orb + val op && = W.andb + val itow = W.fromInt + fun emit_bool false = 0w0 : W.word + | emit_bool true = 0w1 : W.word + val emit_int = itow + fun emit_word w = w + fun emit_label l = itow(Label.addrOf l) + fun emit_labexp le = itow(MLTreeEval.valueOf le) + fun emit_const c = itow(Constant.valueOf c) + val w32ToByte = Word8.fromLarge o Word32.toLarge + val loc = ref 0 + + (* emit a byte *) + fun eByte b = + let val i = !loc in loc := i + 1; CodeString.update(i,b) end + + (* emit the low order byte of a word *) + (* note: fromLargeWord strips the high order bits! *) + fun eByteW w = + let val i = !loc + in loc := i + 1; CodeString.update(i, w32ToByte w) end + + fun doNothing _ = () + fun fail _ = raise Fail "MCEmitter" + fun getAnnotations () = error "getAnnotations" + + fun pseudoOp pOp = P.emitValue{pOp=pOp, loc= !loc,emit=eByte} + + fun init n = (CodeString.init n; loc := 0) + + + fun eWord32 w = + let val b8 = w + val w = w >> 0wx8 + val b16 = w + val w = w >> 0wx8 + val b24 = w + val w = w >> 0wx8 + val b32 = w + in + ( eByteW b32; + eByteW b24; + eByteW b16; + eByteW b8 ) + end + fun emit_GP r = itow (CellsBasis.physicalRegisterNum r) + and emit_FP r = itow (CellsBasis.physicalRegisterNum r) + and emit_Y r = itow (CellsBasis.physicalRegisterNum r) + and emit_PSR r = itow (CellsBasis.physicalRegisterNum r) + and emit_FSR r = itow (CellsBasis.physicalRegisterNum r) + and emit_CC r = itow (CellsBasis.physicalRegisterNum r) + and emit_MEM r = itow (CellsBasis.physicalRegisterNum r) + and emit_CTRL r = itow (CellsBasis.physicalRegisterNum r) + and emit_CELLSET r = itow (CellsBasis.physicalRegisterNum r) + fun emit_load (I.LDSB) = (0wx9 : Word32.word) + | emit_load (I.LDSH) = (0wxA : Word32.word) + | emit_load (I.LDUB) = (0wx1 : Word32.word) + | emit_load (I.LDUH) = (0wx2 : Word32.word) + | emit_load (I.LD) = (0wx0 : Word32.word) + | emit_load (I.LDX) = (0wxB : Word32.word) + | emit_load (I.LDD) = (0wx3 : Word32.word) + and emit_store (I.STB) = (0wx5 : Word32.word) + | emit_store (I.STH) = (0wx6 : Word32.word) + | emit_store (I.ST) = (0wx4 : Word32.word) + | emit_store (I.STX) = (0wxE : Word32.word) + | emit_store (I.STD) = (0wx7 : Word32.word) + and emit_fload (I.LDF) = (0wx20 : Word32.word) + | emit_fload (I.LDDF) = (0wx23 : Word32.word) + | emit_fload (I.LDQF) = (0wx22 : Word32.word) + | emit_fload (I.LDFSR) = (0wx21 : Word32.word) + | emit_fload (I.LDXFSR) = (0wx21 : Word32.word) + and emit_fstore (I.STF) = (0wx24 : Word32.word) + | emit_fstore (I.STDF) = (0wx27 : Word32.word) + | emit_fstore (I.STFSR) = (0wx25 : Word32.word) + and emit_arith (I.AND) = (0wx1 : Word32.word) + | emit_arith (I.ANDCC) = (0wx11 : Word32.word) + | emit_arith (I.ANDN) = (0wx5 : Word32.word) + | emit_arith (I.ANDNCC) = (0wx15 : Word32.word) + | emit_arith (I.OR) = (0wx2 : Word32.word) + | emit_arith (I.ORCC) = (0wx12 : Word32.word) + | emit_arith (I.ORN) = (0wx6 : Word32.word) + | emit_arith (I.ORNCC) = (0wx16 : Word32.word) + | emit_arith (I.XOR) = (0wx3 : Word32.word) + | emit_arith (I.XORCC) = (0wx13 : Word32.word) + | emit_arith (I.XNOR) = (0wx7 : Word32.word) + | emit_arith (I.XNORCC) = (0wx17 : Word32.word) + | emit_arith (I.ADD) = (0wx0 : Word32.word) + | emit_arith (I.ADDCC) = (0wx10 : Word32.word) + | emit_arith (I.TADD) = (0wx20 : Word32.word) + | emit_arith (I.TADDCC) = (0wx30 : Word32.word) + | emit_arith (I.TADDTV) = (0wx22 : Word32.word) + | emit_arith (I.TADDTVCC) = (0wx32 : Word32.word) + | emit_arith (I.SUB) = (0wx4 : Word32.word) + | emit_arith (I.SUBCC) = (0wx14 : Word32.word) + | emit_arith (I.TSUB) = (0wx21 : Word32.word) + | emit_arith (I.TSUBCC) = (0wx31 : Word32.word) + | emit_arith (I.TSUBTV) = (0wx23 : Word32.word) + | emit_arith (I.TSUBTVCC) = (0wx33 : Word32.word) + | emit_arith (I.UMUL) = (0wxA : Word32.word) + | emit_arith (I.UMULCC) = (0wx1A : Word32.word) + | emit_arith (I.SMUL) = (0wxB : Word32.word) + | emit_arith (I.SMULCC) = (0wx1B : Word32.word) + | emit_arith (I.UDIV) = (0wxE : Word32.word) + | emit_arith (I.UDIVCC) = (0wx1E : Word32.word) + | emit_arith (I.SDIV) = (0wxF : Word32.word) + | emit_arith (I.SDIVCC) = (0wx1F : Word32.word) + | emit_arith (I.MULX) = (0wx9 : Word32.word) + | emit_arith (I.SDIVX) = (0wx2D : Word32.word) + | emit_arith (I.UDIVX) = (0wxD : Word32.word) + and emit_shift (I.SLL) = (0wx25, 0wx0) + | emit_shift (I.SRL) = (0wx26, 0wx0) + | emit_shift (I.SRA) = (0wx27, 0wx0) + | emit_shift (I.SLLX) = (0wx25, 0wx1) + | emit_shift (I.SRLX) = (0wx26, 0wx1) + | emit_shift (I.SRAX) = (0wx27, 0wx1) + and emit_farith1 (I.FiTOs) = (0wxC4 : Word32.word) + | emit_farith1 (I.FiTOd) = (0wxC8 : Word32.word) + | emit_farith1 (I.FiTOq) = (0wxCC : Word32.word) + | emit_farith1 (I.FsTOi) = (0wxD1 : Word32.word) + | emit_farith1 (I.FdTOi) = (0wxD2 : Word32.word) + | emit_farith1 (I.FqTOi) = (0wxD3 : Word32.word) + | emit_farith1 (I.FsTOd) = (0wxC9 : Word32.word) + | emit_farith1 (I.FsTOq) = (0wxD5 : Word32.word) + | emit_farith1 (I.FdTOs) = (0wxC6 : Word32.word) + | emit_farith1 (I.FdTOq) = (0wxCE : Word32.word) + | emit_farith1 (I.FqTOs) = (0wxC7 : Word32.word) + | emit_farith1 (I.FqTOd) = (0wxCB : Word32.word) + | emit_farith1 (I.FMOVs) = (0wx1 : Word32.word) + | emit_farith1 (I.FNEGs) = (0wx5 : Word32.word) + | emit_farith1 (I.FABSs) = (0wx9 : Word32.word) + | emit_farith1 (I.FMOVd) = error "FMOVd" + | emit_farith1 (I.FNEGd) = error "FNEGd" + | emit_farith1 (I.FABSd) = error "FABSd" + | emit_farith1 (I.FMOVq) = error "FMOVq" + | emit_farith1 (I.FNEGq) = error "FNEGq" + | emit_farith1 (I.FABSq) = error "FABSq" + | emit_farith1 (I.FSQRTs) = (0wx29 : Word32.word) + | emit_farith1 (I.FSQRTd) = (0wx2A : Word32.word) + | emit_farith1 (I.FSQRTq) = (0wx2B : Word32.word) + and emit_farith2 (I.FADDs) = (0wx41 : Word32.word) + | emit_farith2 (I.FADDd) = (0wx42 : Word32.word) + | emit_farith2 (I.FADDq) = (0wx43 : Word32.word) + | emit_farith2 (I.FSUBs) = (0wx45 : Word32.word) + | emit_farith2 (I.FSUBd) = (0wx46 : Word32.word) + | emit_farith2 (I.FSUBq) = (0wx47 : Word32.word) + | emit_farith2 (I.FMULs) = (0wx49 : Word32.word) + | emit_farith2 (I.FMULd) = (0wx4A : Word32.word) + | emit_farith2 (I.FMULq) = (0wx4B : Word32.word) + | emit_farith2 (I.FsMULd) = (0wx69 : Word32.word) + | emit_farith2 (I.FdMULq) = (0wx6E : Word32.word) + | emit_farith2 (I.FDIVs) = (0wx4D : Word32.word) + | emit_farith2 (I.FDIVd) = (0wx4E : Word32.word) + | emit_farith2 (I.FDIVq) = (0wx4F : Word32.word) + and emit_fcmp (I.FCMPs) = (0wx51 : Word32.word) + | emit_fcmp (I.FCMPd) = (0wx52 : Word32.word) + | emit_fcmp (I.FCMPq) = (0wx53 : Word32.word) + | emit_fcmp (I.FCMPEs) = (0wx55 : Word32.word) + | emit_fcmp (I.FCMPEd) = (0wx56 : Word32.word) + | emit_fcmp (I.FCMPEq) = (0wx57 : Word32.word) + and emit_branch (I.BN) = (0wx0 : Word32.word) + | emit_branch (I.BE) = (0wx1 : Word32.word) + | emit_branch (I.BLE) = (0wx2 : Word32.word) + | emit_branch (I.BL) = (0wx3 : Word32.word) + | emit_branch (I.BLEU) = (0wx4 : Word32.word) + | emit_branch (I.BCS) = (0wx5 : Word32.word) + | emit_branch (I.BNEG) = (0wx6 : Word32.word) + | emit_branch (I.BVS) = (0wx7 : Word32.word) + | emit_branch (I.BA) = (0wx8 : Word32.word) + | emit_branch (I.BNE) = (0wx9 : Word32.word) + | emit_branch (I.BG) = (0wxA : Word32.word) + | emit_branch (I.BGE) = (0wxB : Word32.word) + | emit_branch (I.BGU) = (0wxC : Word32.word) + | emit_branch (I.BCC) = (0wxD : Word32.word) + | emit_branch (I.BPOS) = (0wxE : Word32.word) + | emit_branch (I.BVC) = (0wxF : Word32.word) + and emit_rcond (I.RZ) = (0wx1 : Word32.word) + | emit_rcond (I.RLEZ) = (0wx2 : Word32.word) + | emit_rcond (I.RLZ) = (0wx3 : Word32.word) + | emit_rcond (I.RNZ) = (0wx5 : Word32.word) + | emit_rcond (I.RGZ) = (0wx6 : Word32.word) + | emit_rcond (I.RGEZ) = (0wx7 : Word32.word) + and emit_cc (I.ICC) = (0wx0 : Word32.word) + | emit_cc (I.XCC) = (0wx2 : Word32.word) + and emit_fbranch (I.FBN) = (0wx0 : Word32.word) + | emit_fbranch (I.FBNE) = (0wx1 : Word32.word) + | emit_fbranch (I.FBLG) = (0wx2 : Word32.word) + | emit_fbranch (I.FBUL) = (0wx3 : Word32.word) + | emit_fbranch (I.FBL) = (0wx4 : Word32.word) + | emit_fbranch (I.FBUG) = (0wx5 : Word32.word) + | emit_fbranch (I.FBG) = (0wx6 : Word32.word) + | emit_fbranch (I.FBU) = (0wx7 : Word32.word) + | emit_fbranch (I.FBA) = (0wx8 : Word32.word) + | emit_fbranch (I.FBE) = (0wx9 : Word32.word) + | emit_fbranch (I.FBUE) = (0wxA : Word32.word) + | emit_fbranch (I.FBGE) = (0wxB : Word32.word) + | emit_fbranch (I.FBUGE) = (0wxC : Word32.word) + | emit_fbranch (I.FBLE) = (0wxD : Word32.word) + | emit_fbranch (I.FBULE) = (0wxE : Word32.word) + | emit_fbranch (I.FBO) = (0wxF : Word32.word) + and emit_fsize (I.S) = (0wx4 : Word32.word) + | emit_fsize (I.D) = (0wx6 : Word32.word) + | emit_fsize (I.Q) = (0wx7 : Word32.word) + fun opn {i} = + let +(*#line 478.11 "sparc/sparc.mdl"*) + fun hi22 w = (itow w) ~>> 0wxA + +(*#line 479.11 "sparc/sparc.mdl"*) + fun lo10 w = ((itow w) && 0wx3FF) + in + (case i of + I.REG rs2 => error "opn" + | I.IMMED i => itow i + | I.LAB l => itow (MLTreeEval.valueOf l) + | I.LO l => lo10 (MLTreeEval.valueOf l) + | I.HI l => hi22 (MLTreeEval.valueOf l) + ) + end + and rr {op1, rd, op3, rs1, rs2} = + let val rs1 = emit_GP rs1 + val rs2 = emit_GP rs2 + in eWord32 ((op1 << 0wx1E) + ((rd << 0wx19) + ((op3 << 0wx13) + ((rs1 << 0wxE) + rs2)))) + end + and ri {op1, rd, op3, rs1, simm13} = + let val rs1 = emit_GP rs1 + in eWord32 ((op1 << 0wx1E) + ((rd << 0wx19) + ((op3 << 0wx13) + ((rs1 << 0wxE) + ((simm13 && 0wx1FFF) + 0wx2000))))) + end + and rix {op1, op3, r, i, d} = + (case i of + I.REG rs2 => rr {op1=op1, op3=op3, rs1=r, rs2=rs2, rd=d} + | _ => ri {op1=op1, op3=op3, rs1=r, rd=d, simm13=opn {i=i}} + ) + and rir {op1, op3, r, i, d} = + let val d = emit_GP d + in rix {op1=op1, op3=op3, r=r, i=i, d=d} + end + and rif {op1, op3, r, i, d} = + let val d = emit_FP d + in rix {op1=op1, op3=op3, r=r, i=i, d=d} + end + and load {l, r, i, d} = + let val l = emit_load l + in rir {op1=0wx3, op3=l, r=r, i=i, d=d} + end + and store {s, r, i, d} = + let val s = emit_store s + in rir {op1=0wx3, op3=s, r=r, i=i, d=d} + end + and fload {l, r, i, d} = + let val l = emit_fload l + in rif {op1=0wx3, op3=l, r=r, i=i, d=d} + end + and fstore {s, r, i, d} = + let val s = emit_fstore s + in rif {op1=0wx3, op3=s, r=r, i=i, d=d} + end + and sethi {rd, imm22} = + let val rd = emit_GP rd + val imm22 = emit_int imm22 + in eWord32 ((rd << 0wx19) + ((imm22 && 0wx3FFFFF) + 0wx1000000)) + end + and NOP {} = eWord32 0wx1000000 + and unimp {const22} = + let val const22 = emit_int const22 + in eWord32 const22 + end + and delay {nop} = (if nop + then (NOP {}) + else ()) + and arith {a, r, i, d} = + let val a = emit_arith a + in rir {op1=0wx2, op3=a, r=r, i=i, d=d} + end + and shiftr {rd, op3, rs1, x, rs2} = + let val rs2 = emit_GP rs2 + in eWord32 ((rd << 0wx19) + ((op3 << 0wx13) + ((rs1 << 0wxE) + ((x << 0wxC) + (rs2 + 0wx80000000))))) + end + and shifti {rd, op3, rs1, x, cnt} = eWord32 ((rd << 0wx19) + ((op3 << 0wx13) + ((rs1 << 0wxE) + ((x << 0wxC) + ((cnt && 0wx3F) + 0wx80002000))))) + and shift {s, r, i, d} = + let val s = emit_shift s + val r = emit_GP r + val d = emit_GP d + in + let +(*#line 517.13 "sparc/sparc.mdl"*) + val (op3, x) = s + in + (case i of + I.REG rs2 => shiftr {op3=op3, rs1=r, rs2=rs2, rd=d, x=x} + | _ => shifti {op3=op3, rs1=r, cnt=opn {i=i}, rd=d, x=x} + ) + end + end + and save {r, i, d} = rir {op1=0wx2, op3=0wx3C, r=r, i=i, d=d} + and restore {r, i, d} = rir {op1=0wx2, op3=0wx3D, r=r, i=i, d=d} + and bicc {a, b, disp22} = + let val a = emit_bool a + val b = emit_branch b + in eWord32 ((a << 0wx1D) + ((b << 0wx19) + ((disp22 && 0wx3FFFFF) + 0wx800000))) + end + and fbfcc {a, b, disp22} = + let val a = emit_bool a + val b = emit_fbranch b + in eWord32 ((a << 0wx1D) + ((b << 0wx19) + ((disp22 && 0wx3FFFFF) + 0wx1800000))) + end + and call {disp30} = eWord32 ((disp30 && 0wx3FFFFFFF) + 0wx40000000) + and jmpl {r, i, d} = rir {op1=0wx2, op3=0wx38, r=r, i=i, d=d} + and jmp {r, i} = rix {op1=0wx2, op3=0wx38, r=r, i=i, d=0wx0} + and ticcr {op1, rd, op3, rs1, cc, rs2} = + let val rs1 = emit_GP rs1 + val cc = emit_cc cc + val rs2 = emit_GP rs2 + in eWord32 ((op1 << 0wx1E) + ((rd << 0wx19) + ((op3 << 0wx13) + ((rs1 << 0wxE) + ((cc << 0wxB) + rs2))))) + end + and ticci {op1, rd, op3, rs1, cc, sw_trap} = + let val rs1 = emit_GP rs1 + val cc = emit_cc cc + in eWord32 ((op1 << 0wx1E) + ((rd << 0wx19) + ((op3 << 0wx13) + ((rs1 << 0wxE) + ((cc << 0wxB) + ((sw_trap && 0wx7F) + 0wx2000)))))) + end + and ticcx {op1, op3, cc, r, i, d} = + (case i of + I.REG rs2 => ticcr {op1=op1, op3=op3, cc=cc, rs1=r, rs2=rs2, rd=d} + | _ => ticci {op1=op1, op3=op3, cc=cc, rs1=r, rd=d, sw_trap=opn {i=i}} + ) + and ticc {t, cc, r, i} = + let val t = emit_branch t + in ticcx {op1=0wx2, d=t, op3=0wx3A, cc=cc, r=r, i=i} + end + and rdy {d} = + let val d = emit_GP d + in eWord32 ((d << 0wx19) + 0wx81400000) + end + and wdy {r, i} = rix {op1=0wx2, op3=0wx30, r=r, i=i, d=0wx0} + and fop_1 {d, a, r} = eWord32 ((d << 0wx19) + ((a << 0wx5) + (r + 0wx81A00000))) + and fop1 {a, r, d} = + let val a = emit_farith1 a + val r = emit_FP r + val d = emit_FP d + in fop_1 {a=a, r=r, d=d} + end + and fdouble {a, r, d} = + let val a = emit_farith1 a + val r = emit_FP r + val d = emit_FP d + in + ( fop_1 {a=a, r=r, d=d}; + fop_1 {a=0wx1, r=r + 0wx1, d=d + 0wx1}) + end + and fquad {a, r, d} = + let val a = emit_farith1 a + val r = emit_FP r + val d = emit_FP d + in + ( fop_1 {a=a, r=r, d=d}; + fop_1 {a=0wx1, r=r + 0wx1, d=d + 0wx1}; + fop_1 {a=0wx1, r=r + 0wx2, d=d + 0wx2}; + fop_1 {a=0wx1, r=r + 0wx3, d=d + 0wx3}) + end + and fop2 {d, r1, a, r2} = + let val d = emit_FP d + val r1 = emit_FP r1 + val a = emit_farith2 a + val r2 = emit_FP r2 + in eWord32 ((d << 0wx19) + ((r1 << 0wxE) + ((a << 0wx5) + (r2 + 0wx81A00000)))) + end + and fcmp {rs1, opf, rs2} = + let val rs1 = emit_FP rs1 + val opf = emit_fcmp opf + val rs2 = emit_FP rs2 + in eWord32 ((rs1 << 0wxE) + ((opf << 0wx5) + (rs2 + 0wx81A80000))) + end + and cmovr {op3, rd, cc2, cond, cc1, cc0, rs2} = eWord32 ((op3 << 0wx18) + ((rd << 0wx13) + ((cc2 << 0wx12) + ((cond << 0wxE) + ((cc1 << 0wxC) + ((cc0 << 0wxB) + (rs2 + 0wx80000000))))))) + and cmovi {op3, rd, cc2, cond, cc1, cc0, simm11} = eWord32 ((op3 << 0wx18) + ((rd << 0wx13) + ((cc2 << 0wx12) + ((cond << 0wxE) + ((cc1 << 0wxC) + ((cc0 << 0wxB) + ((simm11 && 0wx7FF) + 0wx80002000))))))) + and cmov {op3, cond, cc2, cc1, cc0, i, rd} = + (case i of + I.REG rs2 => cmovr {op3=op3, cond=cond, rs2=emit_GP rs2, rd=rd, cc0=cc0, + cc1=cc1, cc2=cc2} + | _ => cmovi {op3=op3, cond=cond, rd=rd, cc0=cc0, cc1=cc1, cc2=cc2, + simm11=opn {i=i}} + ) + and movicc {b, i, d} = + let val b = emit_branch b + val d = emit_GP d + in cmov {op3=0wx2C, cond=b, i=i, rd=d, cc2=0wx1, cc1=0wx0, cc0=0wx0} + end + and movfcc {b, i, d} = + let val b = emit_fbranch b + val d = emit_GP d + in cmov {op3=0wx2C, cond=b, i=i, rd=d, cc2=0wx0, cc1=0wx0, cc0=0wx0} + end + and fmovicc {sz, b, r, d} = + let val sz = emit_fsize sz + val b = emit_branch b + val r = emit_FP r + val d = emit_FP d + in cmovr {op3=0wx2C, cond=b, rs2=r, rd=d, cc2=0wx1, cc1=0wx0, cc0=0wx0} + end + and fmovfcc {sz, b, r, d} = + let val sz = emit_fsize sz + val b = emit_fbranch b + val r = emit_FP r + val d = emit_FP d + in cmovr {op3=0wx2C, cond=b, rs2=r, rd=d, cc2=0wx0, cc1=0wx0, cc0=0wx0} + end + and movrr {rd, rs1, rcond, rs2} = + let val rd = emit_GP rd + val rs1 = emit_GP rs1 + val rs2 = emit_GP rs2 + in eWord32 ((rd << 0wx19) + ((rs1 << 0wxE) + ((rcond << 0wxA) + (rs2 + 0wx81780000)))) + end + and movri {rd, rs1, rcond, simm10} = + let val rd = emit_GP rd + val rs1 = emit_GP rs1 + in eWord32 ((rd << 0wx19) + ((rs1 << 0wxE) + ((rcond << 0wxA) + ((simm10 && 0wx3FF) + 0wx81782000)))) + end + and movr {rcond, r, i, d} = + let val rcond = emit_rcond rcond + in + (case i of + I.REG rs2 => movrr {rcond=rcond, rs1=r, rs2=rs2, rd=d} + | _ => movri {rcond=rcond, rs1=r, rd=d, simm10=opn {i=i}} + ) + end + +(*#line 596.7 "sparc/sparc.mdl"*) + fun disp label = (itow ((Label.addrOf label) - ( ! loc))) ~>> 0wx2 + +(*#line 597.7 "sparc/sparc.mdl"*) + val r15 = C.Reg CellsBasis.GP 15 + and r31 = C.Reg CellsBasis.GP 31 + fun emitter instr = + let + fun emitInstr (I.LOAD{l, d, r, i, mem}) = load {l=l, r=r, i=i, d=d} + | emitInstr (I.STORE{s, d, r, i, mem}) = store {s=s, r=r, i=i, d=d} + | emitInstr (I.FLOAD{l, r, i, d, mem}) = fload {l=l, r=r, i=i, d=d} + | emitInstr (I.FSTORE{s, d, r, i, mem}) = fstore {s=s, r=r, i=i, d=d} + | emitInstr (I.UNIMP{const22}) = unimp {const22=const22} + | emitInstr (I.SETHI{i, d}) = sethi {imm22=i, rd=d} + | emitInstr (I.ARITH{a, r, i, d}) = arith {a=a, r=r, i=i, d=d} + | emitInstr (I.SHIFT{s, r, i, d}) = shift {s=s, r=r, i=i, d=d} + | emitInstr (I.MOVicc{b, i, d}) = movicc {b=b, i=i, d=d} + | emitInstr (I.MOVfcc{b, i, d}) = movfcc {b=b, i=i, d=d} + | emitInstr (I.MOVR{rcond, r, i, d}) = movr {rcond=rcond, r=r, i=i, d=d} + | emitInstr (I.FMOVicc{sz, b, r, d}) = fmovicc {sz=sz, b=b, r=r, d=d} + | emitInstr (I.FMOVfcc{sz, b, r, d}) = fmovfcc {sz=sz, b=b, r=r, d=d} + | emitInstr (I.Bicc{b, a, label, nop}) = + ( bicc {b=b, a=a, disp22=disp label}; + delay {nop=nop}) + | emitInstr (I.FBfcc{b, a, label, nop}) = + ( fbfcc {b=b, a=a, disp22=disp label}; + delay {nop=nop}) + | emitInstr (I.BR{rcond, p, r, a, label, nop}) = error "BR" + | emitInstr (I.BP{b, p, cc, a, label, nop}) = error "BP" + | emitInstr (I.JMP{r, i, labs, nop}) = + ( jmp {r=r, i=i}; + delay {nop=nop}) + | emitInstr (I.JMPL{r, i, d, defs, uses, cutsTo, nop, mem}) = + ( jmpl {r=r, i=i, d=d}; + delay {nop=nop}) + | emitInstr (I.CALL{defs, uses, label, cutsTo, nop, mem}) = + ( call {disp30=disp label}; + delay {nop=nop}) + | emitInstr (I.Ticc{t, cc, r, i}) = ticc {t=t, r=r, cc=cc, i=i} + | emitInstr (I.FPop1{a, r, d}) = + (case a of + I.FMOVd => fdouble {a=I.FMOVs, r=r, d=d} + | I.FNEGd => fdouble {a=I.FNEGs, r=r, d=d} + | I.FABSd => fdouble {a=I.FABSs, r=r, d=d} + | I.FMOVq => fquad {a=I.FMOVs, r=r, d=d} + | I.FNEGq => fquad {a=I.FNEGs, r=r, d=d} + | I.FABSq => fquad {a=I.FABSs, r=r, d=d} + | _ => fop1 {a=a, r=r, d=d} + ) + | emitInstr (I.FPop2{a, r1, r2, d}) = fop2 {a=a, r1=r1, r2=r2, d=d} + | emitInstr (I.FCMP{cmp, r1, r2, nop}) = + ( fcmp {opf=cmp, rs1=r1, rs2=r2}; + delay {nop=nop}) + | emitInstr (I.SAVE{r, i, d}) = save {r=r, i=i, d=d} + | emitInstr (I.RESTORE{r, i, d}) = restore {r=r, i=i, d=d} + | emitInstr (I.RDY{d}) = rdy {d=d} + | emitInstr (I.WRY{r, i}) = wdy {r=r, i=i} + | emitInstr (I.RET{leaf, nop}) = + ( jmp {r=(if leaf + then r31 + else r15), i=I.IMMED 8}; + delay {nop=nop}) + | emitInstr (I.SOURCE{}) = () + | emitInstr (I.SINK{}) = () + | emitInstr (I.PHI{}) = () + in + emitInstr instr + end + + fun emitInstruction(I.ANNOTATION{i, ...}) = emitInstruction(i) + | emitInstruction(I.INSTR(i)) = emitter(i) + | emitInstruction(I.LIVE _) = () + | emitInstruction(I.KILL _) = () + | emitInstruction _ = error "emitInstruction" + + in S.STREAM{beginCluster=init, + pseudoOp=pseudoOp, + emit=emitInstruction, + endCluster=fail, + defineLabel=doNothing, + entryLabel=doNothing, + comment=doNothing, + exitBlock=doNothing, + annotation=doNothing, + getAnnotations=getAnnotations + } + end +end + diff --git a/MLRISC/sparc/flowgraph/sparcGasPseudoOps.sml b/MLRISC/sparc/flowgraph/sparcGasPseudoOps.sml new file mode 100644 index 0000000..3675d5f --- /dev/null +++ b/MLRISC/sparc/flowgraph/sparcGasPseudoOps.sml @@ -0,0 +1,33 @@ +functor SparcGasPseudoOps + ( structure T : MLTREE + structure MLTreeEval : MLTREE_EVAL where T = T + ) : PSEUDO_OPS_BASIS = + +struct + structure T = T + structure PB = PseudoOpsBasisTyp + structure Fmt = Format + + structure Endian = + PseudoOpsBig + (structure T = T + structure MLTreeEval=MLTreeEval + val icache_alignment = 16 + val max_alignment = SOME 7 + val nop = {sz=4, en=0wx1000000: Word32.word}) + + structure GasPseudoOps = + GasPseudoOps(structure T = T + val labFmt = {gPrefix="", aPrefix="L"}) + + type 'a pseudo_op = (T.labexp, 'a) PB.pseudo_op + + fun error msg = MLRiscErrorMsg.error ("GasPseudoOps.", msg) + + val sizeOf = Endian.sizeOf + val emitValue = Endian.emitValue + val lexpToString = GasPseudoOps.lexpToString + val toString = GasPseudoOps.toString + val defineLabel = GasPseudoOps.defineLabel + val wordSize = 32 +end diff --git a/MLRISC/sparc/instructions/sparcCells.sml b/MLRISC/sparc/instructions/sparcCells.sml new file mode 100644 index 0000000..881a41c --- /dev/null +++ b/MLRISC/sparc/instructions/sparcCells.sml @@ -0,0 +1,163 @@ +(* + * WARNING: This file was automatically generated by MDLGen (v3.1) + * from the machine description file "sparc/sparc.mdl". + * DO NOT EDIT this file directly + *) + + +signature SPARCCELLS = +sig + include CELLS + val Y : CellsBasis.cellkind + val PSR : CellsBasis.cellkind + val FSR : CellsBasis.cellkind + val CELLSET : CellsBasis.cellkind + val showGP : CellsBasis.register_id -> string + val showFP : CellsBasis.register_id -> string + val showY : CellsBasis.register_id -> string + val showPSR : CellsBasis.register_id -> string + val showFSR : CellsBasis.register_id -> string + val showCC : CellsBasis.register_id -> string + val showMEM : CellsBasis.register_id -> string + val showCTRL : CellsBasis.register_id -> string + val showCELLSET : CellsBasis.register_id -> string + val showGPWithSize : CellsBasis.register_id * CellsBasis.sz -> string + val showFPWithSize : CellsBasis.register_id * CellsBasis.sz -> string + val showYWithSize : CellsBasis.register_id * CellsBasis.sz -> string + val showPSRWithSize : CellsBasis.register_id * CellsBasis.sz -> string + val showFSRWithSize : CellsBasis.register_id * CellsBasis.sz -> string + val showCCWithSize : CellsBasis.register_id * CellsBasis.sz -> string + val showMEMWithSize : CellsBasis.register_id * CellsBasis.sz -> string + val showCTRLWithSize : CellsBasis.register_id * CellsBasis.sz -> string + val showCELLSETWithSize : CellsBasis.register_id * CellsBasis.sz -> string + val frameptrR : CellsBasis.cell + val linkReg : CellsBasis.cell + val y : CellsBasis.cell + val psr : CellsBasis.cell + val fsr : CellsBasis.cell + val r0 : CellsBasis.cell + val addGP : CellsBasis.cell * cellset -> cellset + val addFP : CellsBasis.cell * cellset -> cellset + val addY : CellsBasis.cell * cellset -> cellset + val addPSR : CellsBasis.cell * cellset -> cellset + val addFSR : CellsBasis.cell * cellset -> cellset + val addCC : CellsBasis.cell * cellset -> cellset + val addMEM : CellsBasis.cell * cellset -> cellset + val addCTRL : CellsBasis.cell * cellset -> cellset + val addCELLSET : CellsBasis.cell * cellset -> cellset +end + +structure SparcCells : SPARCCELLS = +struct + exception SparcCells + fun error msg = MLRiscErrorMsg.error("SparcCells",msg) + open CellsBasis + fun showGPWithSize (r, ty) = (fn (r, _) => (if (r < 8) + then ("%g" ^ (Int.toString r)) + else (if (r = 14) + then "%sp" + else (if (r < 16) + then ("%o" ^ (Int.toString (r - 8))) + else (if (r < 24) + then ("%l" ^ (Int.toString (r - 16))) + else (if (r = 30) + then "%fp" + else (if (r < 32) + then ("%i" ^ (Int.toString (r - 24))) + else ("%r" ^ (Int.toString r)))))))) + ) (r, ty) + and showFPWithSize (r, ty) = (fn (f, _) => "%f" ^ (Int.toString f) + ) (r, ty) + and showYWithSize (r, ty) = (fn _ => "%y" + ) (r, ty) + and showPSRWithSize (r, ty) = (fn (0, _) => "%psr" + | (n, _) => "%psr" ^ (Int.toString n) + ) (r, ty) + and showFSRWithSize (r, ty) = (fn (0, _) => "%fsr" + | (n, _) => "%fsr" ^ (Int.toString n) + ) (r, ty) + and showCCWithSize (r, ty) = (fn _ => "%cc" + ) (r, ty) + and showMEMWithSize (r, ty) = (fn (r, _) => "m" ^ (Int.toString r) + ) (r, ty) + and showCTRLWithSize (r, ty) = (fn (r, _) => "ctrl" ^ (Int.toString r) + ) (r, ty) + and showCELLSETWithSize (r, ty) = (fn _ => "CELLSET" + ) (r, ty) + fun showGP r = showGPWithSize (r, 64) + fun showFP r = showFPWithSize (r, 32) + fun showY r = showYWithSize (r, 64) + fun showPSR r = showPSRWithSize (r, 64) + fun showFSR r = showFSRWithSize (r, 64) + fun showCC r = showCCWithSize (r, 64) + fun showMEM r = showMEMWithSize (r, 8) + fun showCTRL r = showCTRLWithSize (r, 0) + fun showCELLSET r = showCELLSETWithSize (r, 0) + val Y = CellsBasis.newCellKind {name="Y", nickname="y"} + and PSR = CellsBasis.newCellKind {name="PSR", nickname="psr"} + and FSR = CellsBasis.newCellKind {name="FSR", nickname="fsr"} + and CELLSET = CellsBasis.newCellKind {name="CELLSET", nickname="cellset"} + structure MyCells = Cells + (exception Cells = SparcCells + val firstPseudo = 256 + val desc_GP = CellsBasis.DESC {low=0, high=31, kind=CellsBasis.GP, defaultValues=[(0, + 0)], zeroReg=SOME 0, toString=showGP, toStringWithSize=showGPWithSize, + counter=ref 0, dedicated=ref 0, physicalRegs=ref CellsBasis.array0} + and desc_FP = CellsBasis.DESC {low=32, high=63, kind=CellsBasis.FP, + defaultValues=[], zeroReg=NONE, toString=showFP, toStringWithSize=showFPWithSize, + counter=ref 0, dedicated=ref 0, physicalRegs=ref CellsBasis.array0} + and desc_Y = CellsBasis.DESC {low=64, high=64, kind=Y, defaultValues=[], + zeroReg=NONE, toString=showY, toStringWithSize=showYWithSize, + counter=ref 0, dedicated=ref 0, physicalRegs=ref CellsBasis.array0} + and desc_PSR = CellsBasis.DESC {low=65, high=65, kind=PSR, defaultValues=[], + zeroReg=NONE, toString=showPSR, toStringWithSize=showPSRWithSize, + counter=ref 0, dedicated=ref 0, physicalRegs=ref CellsBasis.array0} + and desc_FSR = CellsBasis.DESC {low=66, high=66, kind=FSR, defaultValues=[], + zeroReg=NONE, toString=showFSR, toStringWithSize=showFSRWithSize, + counter=ref 0, dedicated=ref 0, physicalRegs=ref CellsBasis.array0} + and desc_MEM = CellsBasis.DESC {low=67, high=66, kind=CellsBasis.MEM, + defaultValues=[], zeroReg=NONE, toString=showMEM, toStringWithSize=showMEMWithSize, + counter=ref 0, dedicated=ref 0, physicalRegs=ref CellsBasis.array0} + and desc_CTRL = CellsBasis.DESC {low=67, high=66, kind=CellsBasis.CTRL, + defaultValues=[], zeroReg=NONE, toString=showCTRL, toStringWithSize=showCTRLWithSize, + counter=ref 0, dedicated=ref 0, physicalRegs=ref CellsBasis.array0} + and desc_CELLSET = CellsBasis.DESC {low=67, high=66, kind=CELLSET, defaultValues=[], + zeroReg=NONE, toString=showCELLSET, toStringWithSize=showCELLSETWithSize, + counter=ref 0, dedicated=ref 0, physicalRegs=ref CellsBasis.array0} + val cellKindDescs = [(CellsBasis.GP, desc_GP), (CellsBasis.FP, desc_FP), + (Y, desc_Y), (PSR, desc_PSR), (FSR, desc_FSR), (CellsBasis.CC, + desc_GP), (CellsBasis.MEM, desc_MEM), (CellsBasis.CTRL, desc_CTRL), + (CELLSET, desc_CELLSET)] + val cellSize = 8 + ) + + open MyCells + val addGP = CellSet.add + and addFP = CellSet.add + and addY = CellSet.add + and addPSR = CellSet.add + and addFSR = CellSet.add + and addCC = CellSet.add + and addMEM = CellSet.add + and addCTRL = CellSet.add + and addCELLSET = CellSet.add + val RegGP = Reg GP + and RegFP = Reg FP + and RegY = Reg Y + and RegPSR = Reg PSR + and RegFSR = Reg FSR + and RegCC = Reg CC + and RegMEM = Reg MEM + and RegCTRL = Reg CTRL + and RegCELLSET = Reg CELLSET + val stackptrR = RegGP 14 + val frameptrR = RegGP 30 + val asmTmpR = RegGP 10 + val linkReg = RegGP 15 + val fasmTmp = RegFP 30 + val y = RegY 0 + val psr = RegPSR 0 + val fsr = RegFSR 0 + val r0 = RegGP 0 +end + diff --git a/MLRISC/sparc/instructions/sparcFreqProps.sml b/MLRISC/sparc/instructions/sparcFreqProps.sml new file mode 100644 index 0000000..ff4e6d4 --- /dev/null +++ b/MLRISC/sparc/instructions/sparcFreqProps.sml @@ -0,0 +1,46 @@ +(* sparcFreqProps.sml + * + * COPYRIGHT (c) 2002 Bell Labs, Lucent Technologies + * + * Extract frequency information from the sparc architecture + * + * -- Allen + *) + +functor SparcFreqProps(SparcInstr : SPARCINSTR) : FREQUENCY_PROPERTIES = +struct + + structure I = SparcInstr + + val p10 = Probability.percent 10 + val p50 = Probability.percent 50 + val p90 = Probability.percent 90 + val p100 = Probability.always + + fun cond I.BA = p100 + | cond I.BE = p10 + | cond I.BNE = p90 + | cond _ = p50 + + fun fcond I.FBA = p100 + | fcond I.FBE = p10 + | fcond I.FBNE = p90 + | fcond _ = p50 + + fun sparcBranchProb(I.Bicc{b,...}) = cond b + | sparcBranchProb(I.FBfcc{b,...}) = fcond b + | sparcBranchProb(I.BP{b,...}) = cond b + | sparcBranchProb(I.BR _) = p50 + | sparcBranchProb(I.JMP _) = p100 + | sparcBranchProb(I.RET _) = p100 + | sparcBranchProb _ = Probability.never (* non-branch *) + + fun branchProb(I.ANNOTATION{a, i, ...}) = + (case #peek MLRiscAnnotations.BRANCH_PROB a of + SOME b => b + | NONE => branchProb i + ) + | branchProb(I.INSTR(i)) = sparcBranchProb(i) + | branchProb _ = Probability.never + +end diff --git a/MLRISC/sparc/instructions/sparcInstr.sml b/MLRISC/sparc/instructions/sparcInstr.sml new file mode 100644 index 0000000..dd319d5 --- /dev/null +++ b/MLRISC/sparc/instructions/sparcInstr.sml @@ -0,0 +1,542 @@ +(* + * WARNING: This file was automatically generated by MDLGen (v3.1) + * from the machine description file "sparc/sparc.mdl". + * DO NOT EDIT this file directly + *) + + +signature SPARCINSTR = +sig + structure C : SPARCCELLS + structure CB : CELLS_BASIS = CellsBasis + structure T : MLTREE + structure Constant: CONSTANT + structure Region : REGION + sharing Constant = T.Constant + sharing Region = T.Region + datatype load = + LDSB + | LDSH + | LDUB + | LDUH + | LD + | LDX + | LDD + datatype store = + STB + | STH + | ST + | STX + | STD + datatype fload = + LDF + | LDDF + | LDQF + | LDFSR + | LDXFSR + datatype fstore = + STF + | STDF + | STFSR + datatype arith = + AND + | ANDCC + | ANDN + | ANDNCC + | OR + | ORCC + | ORN + | ORNCC + | XOR + | XORCC + | XNOR + | XNORCC + | ADD + | ADDCC + | TADD + | TADDCC + | TADDTV + | TADDTVCC + | SUB + | SUBCC + | TSUB + | TSUBCC + | TSUBTV + | TSUBTVCC + | UMUL + | UMULCC + | SMUL + | SMULCC + | UDIV + | UDIVCC + | SDIV + | SDIVCC + | MULX + | SDIVX + | UDIVX + datatype shift = + SLL + | SRL + | SRA + | SLLX + | SRLX + | SRAX + datatype farith1 = + FiTOs + | FiTOd + | FiTOq + | FsTOi + | FdTOi + | FqTOi + | FsTOd + | FsTOq + | FdTOs + | FdTOq + | FqTOs + | FqTOd + | FMOVs + | FNEGs + | FABSs + | FMOVd + | FNEGd + | FABSd + | FMOVq + | FNEGq + | FABSq + | FSQRTs + | FSQRTd + | FSQRTq + datatype farith2 = + FADDs + | FADDd + | FADDq + | FSUBs + | FSUBd + | FSUBq + | FMULs + | FMULd + | FMULq + | FsMULd + | FdMULq + | FDIVs + | FDIVd + | FDIVq + datatype fcmp = + FCMPs + | FCMPd + | FCMPq + | FCMPEs + | FCMPEd + | FCMPEq + datatype branch = + BN + | BE + | BLE + | BL + | BLEU + | BCS + | BNEG + | BVS + | BA + | BNE + | BG + | BGE + | BGU + | BCC + | BPOS + | BVC + datatype rcond = + RZ + | RLEZ + | RLZ + | RNZ + | RGZ + | RGEZ + datatype cc = + ICC + | XCC + datatype prediction = + PT + | PN + datatype fbranch = + FBN + | FBNE + | FBLG + | FBUL + | FBL + | FBUG + | FBG + | FBU + | FBA + | FBE + | FBUE + | FBGE + | FBUGE + | FBLE + | FBULE + | FBO + datatype ea = + Direct of CellsBasis.cell + | FDirect of CellsBasis.cell + | Displace of {base:CellsBasis.cell, disp:T.labexp, mem:Region.region} + datatype fsize = + S + | D + | Q + datatype operand = + REG of CellsBasis.cell + | IMMED of int + | LAB of T.labexp + | LO of T.labexp + | HI of T.labexp + type addressing_mode = CellsBasis.cell * operand + datatype instr = + LOAD of {l:load, d:CellsBasis.cell, r:CellsBasis.cell, i:operand, mem:Region.region} + | STORE of {s:store, d:CellsBasis.cell, r:CellsBasis.cell, i:operand, mem:Region.region} + | FLOAD of {l:fload, r:CellsBasis.cell, i:operand, d:CellsBasis.cell, mem:Region.region} + | FSTORE of {s:fstore, d:CellsBasis.cell, r:CellsBasis.cell, i:operand, + mem:Region.region} + | UNIMP of {const22:int} + | SETHI of {i:int, d:CellsBasis.cell} + | ARITH of {a:arith, r:CellsBasis.cell, i:operand, d:CellsBasis.cell} + | SHIFT of {s:shift, r:CellsBasis.cell, i:operand, d:CellsBasis.cell} + | MOVicc of {b:branch, i:operand, d:CellsBasis.cell} + | MOVfcc of {b:fbranch, i:operand, d:CellsBasis.cell} + | MOVR of {rcond:rcond, r:CellsBasis.cell, i:operand, d:CellsBasis.cell} + | FMOVicc of {sz:fsize, b:branch, r:CellsBasis.cell, d:CellsBasis.cell} + | FMOVfcc of {sz:fsize, b:fbranch, r:CellsBasis.cell, d:CellsBasis.cell} + | Bicc of {b:branch, a:bool, label:Label.label, nop:bool} + | FBfcc of {b:fbranch, a:bool, label:Label.label, nop:bool} + | BR of {rcond:rcond, p:prediction, r:CellsBasis.cell, a:bool, label:Label.label, + nop:bool} + | BP of {b:branch, p:prediction, cc:cc, a:bool, label:Label.label, nop:bool} + | JMP of {r:CellsBasis.cell, i:operand, labs:Label.label list, nop:bool} + | JMPL of {r:CellsBasis.cell, i:operand, d:CellsBasis.cell, defs:C.cellset, + uses:C.cellset, cutsTo:Label.label list, nop:bool, mem:Region.region} + | CALL of {defs:C.cellset, uses:C.cellset, label:Label.label, cutsTo:Label.label list, + nop:bool, mem:Region.region} + | Ticc of {t:branch, cc:cc, r:CellsBasis.cell, i:operand} + | FPop1 of {a:farith1, r:CellsBasis.cell, d:CellsBasis.cell} + | FPop2 of {a:farith2, r1:CellsBasis.cell, r2:CellsBasis.cell, d:CellsBasis.cell} + | FCMP of {cmp:fcmp, r1:CellsBasis.cell, r2:CellsBasis.cell, nop:bool} + | SAVE of {r:CellsBasis.cell, i:operand, d:CellsBasis.cell} + | RESTORE of {r:CellsBasis.cell, i:operand, d:CellsBasis.cell} + | RDY of {d:CellsBasis.cell} + | WRY of {r:CellsBasis.cell, i:operand} + | RET of {leaf:bool, nop:bool} + | SOURCE of {} + | SINK of {} + | PHI of {} + and instruction = + LIVE of {regs: C.cellset, spilled: C.cellset} + | KILL of {regs: C.cellset, spilled: C.cellset} + | COPY of {k: CellsBasis.cellkind, + sz: int, (* in bits *) + dst: CellsBasis.cell list, + src: CellsBasis.cell list, + tmp: ea option (* NONE if |dst| = {src| = 1 *)} + | ANNOTATION of {i:instruction, a:Annotations.annotation} + | INSTR of instr + val load : {l:load, d:CellsBasis.cell, r:CellsBasis.cell, i:operand, mem:Region.region} -> instruction + val store : {s:store, d:CellsBasis.cell, r:CellsBasis.cell, i:operand, mem:Region.region} -> instruction + val fload : {l:fload, r:CellsBasis.cell, i:operand, d:CellsBasis.cell, mem:Region.region} -> instruction + val fstore : {s:fstore, d:CellsBasis.cell, r:CellsBasis.cell, i:operand, + mem:Region.region} -> instruction + val unimp : {const22:int} -> instruction + val sethi : {i:int, d:CellsBasis.cell} -> instruction + val arith : {a:arith, r:CellsBasis.cell, i:operand, d:CellsBasis.cell} -> instruction + val shift : {s:shift, r:CellsBasis.cell, i:operand, d:CellsBasis.cell} -> instruction + val movicc : {b:branch, i:operand, d:CellsBasis.cell} -> instruction + val movfcc : {b:fbranch, i:operand, d:CellsBasis.cell} -> instruction + val movr : {rcond:rcond, r:CellsBasis.cell, i:operand, d:CellsBasis.cell} -> instruction + val fmovicc : {sz:fsize, b:branch, r:CellsBasis.cell, d:CellsBasis.cell} -> instruction + val fmovfcc : {sz:fsize, b:fbranch, r:CellsBasis.cell, d:CellsBasis.cell} -> instruction + val bicc : {b:branch, a:bool, label:Label.label, nop:bool} -> instruction + val fbfcc : {b:fbranch, a:bool, label:Label.label, nop:bool} -> instruction + val br : {rcond:rcond, p:prediction, r:CellsBasis.cell, a:bool, label:Label.label, + nop:bool} -> instruction + val bp : {b:branch, p:prediction, cc:cc, a:bool, label:Label.label, nop:bool} -> instruction + val jmp : {r:CellsBasis.cell, i:operand, labs:Label.label list, nop:bool} -> instruction + val jmpl : {r:CellsBasis.cell, i:operand, d:CellsBasis.cell, defs:C.cellset, + uses:C.cellset, cutsTo:Label.label list, nop:bool, mem:Region.region} -> instruction + val call : {defs:C.cellset, uses:C.cellset, label:Label.label, cutsTo:Label.label list, + nop:bool, mem:Region.region} -> instruction + val ticc : {t:branch, cc:cc, r:CellsBasis.cell, i:operand} -> instruction + val fpop1 : {a:farith1, r:CellsBasis.cell, d:CellsBasis.cell} -> instruction + val fpop2 : {a:farith2, r1:CellsBasis.cell, r2:CellsBasis.cell, d:CellsBasis.cell} -> instruction + val fcmp : {cmp:fcmp, r1:CellsBasis.cell, r2:CellsBasis.cell, nop:bool} -> instruction + val save : {r:CellsBasis.cell, i:operand, d:CellsBasis.cell} -> instruction + val restore : {r:CellsBasis.cell, i:operand, d:CellsBasis.cell} -> instruction + val rdy : {d:CellsBasis.cell} -> instruction + val wry : {r:CellsBasis.cell, i:operand} -> instruction + val ret : {leaf:bool, nop:bool} -> instruction + val source : {} -> instruction + val sink : {} -> instruction + val phi : {} -> instruction +end + +functor SparcInstr(T: MLTREE + ) : SPARCINSTR = +struct + structure C = SparcCells + structure CB = CellsBasis + structure T = T + structure Region = T.Region + structure Constant = T.Constant + datatype load = + LDSB + | LDSH + | LDUB + | LDUH + | LD + | LDX + | LDD + datatype store = + STB + | STH + | ST + | STX + | STD + datatype fload = + LDF + | LDDF + | LDQF + | LDFSR + | LDXFSR + datatype fstore = + STF + | STDF + | STFSR + datatype arith = + AND + | ANDCC + | ANDN + | ANDNCC + | OR + | ORCC + | ORN + | ORNCC + | XOR + | XORCC + | XNOR + | XNORCC + | ADD + | ADDCC + | TADD + | TADDCC + | TADDTV + | TADDTVCC + | SUB + | SUBCC + | TSUB + | TSUBCC + | TSUBTV + | TSUBTVCC + | UMUL + | UMULCC + | SMUL + | SMULCC + | UDIV + | UDIVCC + | SDIV + | SDIVCC + | MULX + | SDIVX + | UDIVX + datatype shift = + SLL + | SRL + | SRA + | SLLX + | SRLX + | SRAX + datatype farith1 = + FiTOs + | FiTOd + | FiTOq + | FsTOi + | FdTOi + | FqTOi + | FsTOd + | FsTOq + | FdTOs + | FdTOq + | FqTOs + | FqTOd + | FMOVs + | FNEGs + | FABSs + | FMOVd + | FNEGd + | FABSd + | FMOVq + | FNEGq + | FABSq + | FSQRTs + | FSQRTd + | FSQRTq + datatype farith2 = + FADDs + | FADDd + | FADDq + | FSUBs + | FSUBd + | FSUBq + | FMULs + | FMULd + | FMULq + | FsMULd + | FdMULq + | FDIVs + | FDIVd + | FDIVq + datatype fcmp = + FCMPs + | FCMPd + | FCMPq + | FCMPEs + | FCMPEd + | FCMPEq + datatype branch = + BN + | BE + | BLE + | BL + | BLEU + | BCS + | BNEG + | BVS + | BA + | BNE + | BG + | BGE + | BGU + | BCC + | BPOS + | BVC + datatype rcond = + RZ + | RLEZ + | RLZ + | RNZ + | RGZ + | RGEZ + datatype cc = + ICC + | XCC + datatype prediction = + PT + | PN + datatype fbranch = + FBN + | FBNE + | FBLG + | FBUL + | FBL + | FBUG + | FBG + | FBU + | FBA + | FBE + | FBUE + | FBGE + | FBUGE + | FBLE + | FBULE + | FBO + datatype ea = + Direct of CellsBasis.cell + | FDirect of CellsBasis.cell + | Displace of {base:CellsBasis.cell, disp:T.labexp, mem:Region.region} + datatype fsize = + S + | D + | Q + datatype operand = + REG of CellsBasis.cell + | IMMED of int + | LAB of T.labexp + | LO of T.labexp + | HI of T.labexp + type addressing_mode = CellsBasis.cell * operand + datatype instr = + LOAD of {l:load, d:CellsBasis.cell, r:CellsBasis.cell, i:operand, mem:Region.region} + | STORE of {s:store, d:CellsBasis.cell, r:CellsBasis.cell, i:operand, mem:Region.region} + | FLOAD of {l:fload, r:CellsBasis.cell, i:operand, d:CellsBasis.cell, mem:Region.region} + | FSTORE of {s:fstore, d:CellsBasis.cell, r:CellsBasis.cell, i:operand, + mem:Region.region} + | UNIMP of {const22:int} + | SETHI of {i:int, d:CellsBasis.cell} + | ARITH of {a:arith, r:CellsBasis.cell, i:operand, d:CellsBasis.cell} + | SHIFT of {s:shift, r:CellsBasis.cell, i:operand, d:CellsBasis.cell} + | MOVicc of {b:branch, i:operand, d:CellsBasis.cell} + | MOVfcc of {b:fbranch, i:operand, d:CellsBasis.cell} + | MOVR of {rcond:rcond, r:CellsBasis.cell, i:operand, d:CellsBasis.cell} + | FMOVicc of {sz:fsize, b:branch, r:CellsBasis.cell, d:CellsBasis.cell} + | FMOVfcc of {sz:fsize, b:fbranch, r:CellsBasis.cell, d:CellsBasis.cell} + | Bicc of {b:branch, a:bool, label:Label.label, nop:bool} + | FBfcc of {b:fbranch, a:bool, label:Label.label, nop:bool} + | BR of {rcond:rcond, p:prediction, r:CellsBasis.cell, a:bool, label:Label.label, + nop:bool} + | BP of {b:branch, p:prediction, cc:cc, a:bool, label:Label.label, nop:bool} + | JMP of {r:CellsBasis.cell, i:operand, labs:Label.label list, nop:bool} + | JMPL of {r:CellsBasis.cell, i:operand, d:CellsBasis.cell, defs:C.cellset, + uses:C.cellset, cutsTo:Label.label list, nop:bool, mem:Region.region} + | CALL of {defs:C.cellset, uses:C.cellset, label:Label.label, cutsTo:Label.label list, + nop:bool, mem:Region.region} + | Ticc of {t:branch, cc:cc, r:CellsBasis.cell, i:operand} + | FPop1 of {a:farith1, r:CellsBasis.cell, d:CellsBasis.cell} + | FPop2 of {a:farith2, r1:CellsBasis.cell, r2:CellsBasis.cell, d:CellsBasis.cell} + | FCMP of {cmp:fcmp, r1:CellsBasis.cell, r2:CellsBasis.cell, nop:bool} + | SAVE of {r:CellsBasis.cell, i:operand, d:CellsBasis.cell} + | RESTORE of {r:CellsBasis.cell, i:operand, d:CellsBasis.cell} + | RDY of {d:CellsBasis.cell} + | WRY of {r:CellsBasis.cell, i:operand} + | RET of {leaf:bool, nop:bool} + | SOURCE of {} + | SINK of {} + | PHI of {} + and instruction = + LIVE of {regs: C.cellset, spilled: C.cellset} + | KILL of {regs: C.cellset, spilled: C.cellset} + | COPY of {k: CellsBasis.cellkind, + sz: int, (* in bits *) + dst: CellsBasis.cell list, + src: CellsBasis.cell list, + tmp: ea option (* NONE if |dst| = {src| = 1 *)} + | ANNOTATION of {i:instruction, a:Annotations.annotation} + | INSTR of instr + val load = INSTR o LOAD + and store = INSTR o STORE + and fload = INSTR o FLOAD + and fstore = INSTR o FSTORE + and unimp = INSTR o UNIMP + and sethi = INSTR o SETHI + and arith = INSTR o ARITH + and shift = INSTR o SHIFT + and movicc = INSTR o MOVicc + and movfcc = INSTR o MOVfcc + and movr = INSTR o MOVR + and fmovicc = INSTR o FMOVicc + and fmovfcc = INSTR o FMOVfcc + and bicc = INSTR o Bicc + and fbfcc = INSTR o FBfcc + and br = INSTR o BR + and bp = INSTR o BP + and jmp = INSTR o JMP + and jmpl = INSTR o JMPL + and call = INSTR o CALL + and ticc = INSTR o Ticc + and fpop1 = INSTR o FPop1 + and fpop2 = INSTR o FPop2 + and fcmp = INSTR o FCMP + and save = INSTR o SAVE + and restore = INSTR o RESTORE + and rdy = INSTR o RDY + and wry = INSTR o WRY + and ret = INSTR o RET + and source = INSTR o SOURCE + and sink = INSTR o SINK + and phi = INSTR o PHI +end + diff --git a/MLRISC/sparc/instructions/sparcPeephole.peep b/MLRISC/sparc/instructions/sparcPeephole.peep new file mode 100644 index 0000000..7e49dff --- /dev/null +++ b/MLRISC/sparc/instructions/sparcPeephole.peep @@ -0,0 +1,40 @@ +local + + structure I = + struct + include "sparcInstr.sml" + end + +in + +functor SparcPeephole + (structure Instr : SPARCINSTR + structure Eval : MLTREE_EVAL + sharing Eval.T = Instr.T + ) : PEEPHOLE = +struct + structure I = Instr + structure CB = CellsBasis + + (* IMPORTANT: instructions are now given in forward order *) + fun peephole instrs = + let fun isZero(I.LAB le) = ((Eval.valueOf le = 0) handle _ => false) + | isZero(I.REG r) = CB.registerNum r = 0 + | isZero(I.IMMED i) = i = 0 + | isZero _ = false + + fun removable(I.INSTR(I.ARITH{a=(I.ADD | I.SUB), r, i, d})) = + CB.sameColor(r,d) andalso isZero i + | removable(I.ANNOTATION{i,a}) = removable i + | removable _ = false + + fun loop(current, instrs) = + case current of + [] => instrs + | i::rest where removable i => loop(rest, instrs) + | i::rest => loop(rest, i::instrs) + in loop(instrs, []) + end +end + +end diff --git a/MLRISC/sparc/instructions/sparcPeephole.sml b/MLRISC/sparc/instructions/sparcPeephole.sml new file mode 100644 index 0000000..82424c2 --- /dev/null +++ b/MLRISC/sparc/instructions/sparcPeephole.sml @@ -0,0 +1,100 @@ +(* WARNING: this is generated by running 'nowhere sparcPeephole.peep'. + * Do not edit this file directly. + * Version 1.2.2 + *) + +(*#line 10.1 "sparcPeephole.peep"*) +functor SparcPeephole( +(*#line 11.5 "sparcPeephole.peep"*) + structure Instr : SPARCINSTR + +(*#line 12.5 "sparcPeephole.peep"*) + structure Eval : MLTREE_EVAL + +(*#line 13.7 "sparcPeephole.peep"*) + sharing Eval.T = Instr.T + ): PEEPHOLE = +struct + +(*#line 16.4 "sparcPeephole.peep"*) + structure I = Instr + +(*#line 17.4 "sparcPeephole.peep"*) + structure CB = CellsBasis + +(*#line 20.4 "sparcPeephole.peep"*) + fun peephole instrs = + let +(*#line 21.8 "sparcPeephole.peep"*) + fun isZero (I.LAB le) = (((Eval.valueOf le) = 0) handle _ => false +) + | isZero (I.REG r) = (CB.registerNum r) = 0 + | isZero (I.IMMED i) = i = 0 + | isZero _ = false + +(*#line 26.8 "sparcPeephole.peep"*) + fun removable p_0 = + let val v_9 = p_0 + fun state_5 () = false + fun state_2 (v_0, v_1, v_2) = + let val d = v_0 + and i = v_1 + and r = v_2 + in (CB.sameColor (r, d)) andalso (isZero i) + end + in + let val v_8 = v_9 + in + (case v_8 of + I.ANNOTATION v_5 => + let val {a=v_7, i=v_6, ...} = v_5 + in + let val a = v_7 + and i = v_6 + in removable i + end + end + | I.INSTR v_5 => + (case v_5 of + I.ARITH v_4 => + let val {a=v_3, d=v_0, i=v_1, r=v_2, ...} = v_4 + in + (case v_3 of + I.ADD => state_2 (v_0, v_1, v_2) + | I.SUB => state_2 (v_0, v_1, v_2) + | _ => state_5 () + ) + end + | _ => state_5 () + ) + | _ => state_5 () + ) + end + end + +(*#line 31.8 "sparcPeephole.peep"*) + fun loop (current, instrs) = + let val v_13 = current + in + (case v_13 of + op :: v_12 => + let val (v_11, v_10) = v_12 + in + let val i = v_11 + and rest = v_10 + in (if (removable i) + then (loop (rest, instrs)) + else + let val i = v_11 + and rest = v_10 + in loop (rest, i :: instrs) + end) + end + end + | nil => instrs + ) + end + in loop (instrs, []) + end +end + diff --git a/MLRISC/sparc/instructions/sparcProps.sml b/MLRISC/sparc/instructions/sparcProps.sml new file mode 100644 index 0000000..8f6c283 --- /dev/null +++ b/MLRISC/sparc/instructions/sparcProps.sml @@ -0,0 +1,294 @@ +(* sparcProps.sml + * + * COPYRIGHT (c) 2002 Bell Labs, Lucent Technologies + *) + +functor SparcProps + (structure SparcInstr : SPARCINSTR + structure MLTreeEval : MLTREE_EVAL where T = SparcInstr.T + structure MLTreeHash : MLTREE_HASH where T = SparcInstr.T + ) : INSN_PROPERTIES = +struct + structure I = SparcInstr + structure C = I.C + structure T = I.T + structure CB = CellsBasis + + exception NegateConditional + + fun error msg = MLRiscErrorMsg.error("SparcProps",msg) + + datatype kind = IK_JUMP | IK_NOP | IK_INSTR | IK_COPY | IK_CALL + | IK_CALL_WITH_CUTS | IK_PHI | IK_SOURCE | IK_SINK + datatype target = LABELLED of Label.label | FALLTHROUGH | ESCAPES + + val zeroR = Option.valOf(C.zeroReg CB.GP) + val r15 = C.Reg CB.GP 15 + val r31 = C.Reg CB.GP 31 + + (*======================================================================== + * Instruction Kinds + *========================================================================*) + fun instrKind(I.ANNOTATION{i, ...}) = instrKind i + | instrKind(I.COPY _) = IK_COPY + | instrKind(I.INSTR instr) = + (case instr + of (I.Bicc _) => IK_JUMP + | (I.FBfcc _) => IK_JUMP + | (I.JMP _) => IK_JUMP + | (I.RET _) => IK_JUMP + | (I.BR _) => IK_JUMP + | (I.BP _) => IK_JUMP + | (I.Ticc {t=I.BA, ...}) => IK_JUMP (* trap always *) + | (I.CALL{cutsTo=_::_,...}) => IK_CALL_WITH_CUTS + | (I.CALL _) => IK_CALL + | (I.JMPL{cutsTo=_::_,...}) => IK_CALL_WITH_CUTS + | (I.JMPL _) => IK_CALL + | (I.PHI _) => IK_PHI + | (I.SOURCE _) => IK_SOURCE + | (I.SINK _) => IK_SINK + | _ => IK_INSTR + (*esac*)) + | instrKind _ = error "instrKind" + + fun branchTargets(I.ANNOTATION{i,...}) = branchTargets i + | branchTargets(I.INSTR(instr)) = + (case instr + of (I.Bicc{b=I.BA,label,...}) => [LABELLED label] + | (I.Bicc{label,...}) => [LABELLED label, FALLTHROUGH] + | (I.FBfcc{b=I.FBA,label,...}) => [LABELLED label] + | (I.FBfcc{label,...}) => [LABELLED label, FALLTHROUGH] + | (I.BR{label,...}) => [LABELLED label, FALLTHROUGH] + | (I.BP{label,...}) => [LABELLED label, FALLTHROUGH] + | (I.JMP{labs=[],...}) => [ESCAPES] + | (I.RET _) => [ESCAPES] + | (I.JMP{labs,...}) => map LABELLED labs + | (I.CALL{cutsTo,...}) => FALLTHROUGH::map LABELLED cutsTo + | (I.JMPL{cutsTo,...}) => FALLTHROUGH::map LABELLED cutsTo + | (I.Ticc{t=I.BA, ...}) => [ESCAPES] + | _ => error "branchTargets" + (*esac*)) + | branchTargets _ = error "branchTargets" + + + fun setJumpTarget(I.ANNOTATION{a,i}, l) = I.ANNOTATION{a=a, i=setJumpTarget(i,l)} + | setJumpTarget(I.INSTR(I.Bicc{b=I.BA,a,nop,...}), L) = + I.bicc{b=I.BA,a=a,label=L,nop=nop} + | setJumpTarget _ = error "setJumpTarget" + + + fun setBranchTargets{i=I.ANNOTATION{a,i}, t, f} = + I.ANNOTATION{a=a, i=setBranchTargets{i=i, t=t, f=f}} + | setBranchTargets{i=I.INSTR(I.Bicc{b=I.BA,a,nop,...}), ...} = + error "setBranchTargets: Bicc" + | setBranchTargets{i=I.INSTR(I.Bicc{b,a,nop,...}), t, f} = + I.bicc{b=b,a=a,label=t,nop=nop} + | setBranchTargets{i=I.INSTR(I.FBfcc{b,a,nop,...}), t=T, ...} = + I.fbfcc{b=b, a=a, label=T, nop=nop} + | setBranchTargets{i=I.INSTR(I.BR{rcond,p,r,a,nop,...}), t=T, ...} = + I.br{rcond=rcond, p=p, r=r, a=a, label=T, nop=nop} + | setBranchTargets{i=I.INSTR(I.BP{b,cc,p,a,nop,...}), t=T, ...} = + I.bp{b=b, cc=cc, p=p, a=a, label=T, nop=nop} + | setBranchTargets _ = error "setBranchTargets" + + fun revCond I.BA = I.BN + | revCond I.BN = I.BA + | revCond I.BNE = I.BE + | revCond I.BE = I.BNE + | revCond I.BG = I.BLE + | revCond I.BLE = I.BG + | revCond I.BGE = I.BL + | revCond I.BL = I.BGE + | revCond I.BGU = I.BLEU + | revCond I.BLEU = I.BGU + | revCond I.BCC = I.BCS + | revCond I.BCS = I.BCC + | revCond I.BPOS = I.BNEG + | revCond I.BNEG = I.BPOS + | revCond I.BVC = I.BVS + | revCond I.BVS = I.BVC + + fun revFcond I.FBA = I.FBN + | revFcond I.FBN = I.FBA + | revFcond I.FBU = I.FBO + | revFcond I.FBG = I.FBULE + | revFcond I.FBUG = I.FBLE + | revFcond I.FBL = I.FBUGE + | revFcond I.FBUL = I.FBGE + | revFcond I.FBLG = I.FBUE + | revFcond I.FBNE = I.FBE + | revFcond I.FBE = I.FBNE + | revFcond I.FBUE = I.FBLG + | revFcond I.FBGE = I.FBUL + | revFcond I.FBUGE = I.FBL + | revFcond I.FBLE = I.FBUG + | revFcond I.FBULE = I.FBG + | revFcond I.FBO = I.FBU + + fun revRcond I.RZ = I.RNZ + | revRcond I.RLEZ = I.RGZ + | revRcond I.RLZ = I.RGEZ + | revRcond I.RNZ = I.RZ + | revRcond I.RGZ = I.RLEZ + | revRcond I.RGEZ = I.RLZ + + fun revP I.PT = I.PN + | revP I.PN = I.PT + + fun negateConditional (I.INSTR(I.Bicc{b,a,nop,...}), lab) = + I.bicc{b=revCond b,a=a,label=lab,nop=nop} + | negateConditional (I.INSTR(I.FBfcc{b,a,nop,...}), lab) = + I.fbfcc{b=revFcond b,a=a,label=lab,nop=nop} + | negateConditional (I.INSTR(I.BR{p,r,rcond,a,nop,...}), lab) = + I.br{p=revP p,a=a,r=r,rcond=revRcond rcond,label=lab,nop=nop} + | negateConditional (I.INSTR(I.BP{b,cc,p,a,nop,...}), lab) = + I.bp{p=revP p,a=a,b=revCond b,cc=cc,label=lab,nop=nop} + | negateConditional (I.ANNOTATION{i,a}, lab) = + I.ANNOTATION{i=negateConditional(i, lab), a=a} + | negateConditional _ = raise NegateConditional + + fun jump label = I.bicc{b=I.BA,a=true,label=label,nop=true} + + val immedRange = {lo= ~4096, hi = 4095} + + fun loadImmed{immed,t} = + I.arith{a=I.OR,r=zeroR,i= + if #lo immedRange <= immed andalso immed <= #hi immedRange + then I.IMMED immed else I.LAB(T.LI(IntInf.fromInt immed)),d=t} + fun loadOperand{opn, t} = I.arith{a=I.OR,r=zeroR,i=opn, d=t} + + fun moveInstr(I.ANNOTATION{i,...}) = moveInstr i + | moveInstr(I.COPY _) = true + | moveInstr(I.LIVE _) = false + | moveInstr(I.KILL _) = false + | moveInstr _ = false + + fun nop() = I.sethi{d=zeroR, i=0} + + (*======================================================================== + * Parallel Move + *========================================================================*) + fun moveTmpR(I.COPY{tmp, ...}) = + (case tmp + of SOME(I.Direct r) => SOME r + | SOME(I.FDirect f) => SOME f + | _ => NONE + (*esac*)) + | moveTmpR(I.ANNOTATION{i,...}) = moveTmpR i + | moveTmpR _ = NONE + + + fun moveDstSrc(I.COPY{dst,src,...}) = (dst,src) + | moveDstSrc(I.ANNOTATION{i,...}) = moveDstSrc i + | moveDstSrc _ = error "moveDstSrc" + + (*======================================================================== + * Equality and hashing + *========================================================================*) + fun hashOpn(I.REG r) = CB.hashCell r + | hashOpn(I.IMMED i) = Word.fromInt i + | hashOpn(I.LAB l) = MLTreeHash.hash l + | hashOpn(I.LO l) = MLTreeHash.hash l + | hashOpn(I.HI l) = MLTreeHash.hash l + fun eqOpn(I.REG a,I.REG b) = CB.sameColor(a,b) + | eqOpn(I.IMMED a,I.IMMED b) = a = b + | eqOpn(I.LAB a,I.LAB b) = MLTreeEval.==(a,b) + | eqOpn(I.LO a,I.LO b) = MLTreeEval.==(a,b) + | eqOpn(I.HI a,I.HI b) = MLTreeEval.==(a,b) + | eqOpn _ = false + + fun defUseR instr = let + fun oper (I.REG r,def,use) = (def,r::use) + | oper (_,def,use) = (def,use) + fun sparcDU instr = + (case instr + of I.LOAD {r,d,i,...} => oper(i,[d],[r]) + | I.STORE {r,d,i,...} => oper(i,[],[r,d]) + | I.FLOAD {r,d,i,...} => oper(i,[],[r]) + | I.FSTORE {r,d,i,...} => oper(i,[],[r]) + | I.SETHI {d,...} => ([d],[]) + | I.ARITH {r,i,d,...} => oper(i,[d],[r]) + | I.SHIFT {r,i,d,...} => oper(i,[d],[r]) + | I.JMPL{defs,uses,d,r,i,...} => + oper(i,d:: C.getReg defs,r:: C.getReg uses) + | I.BR{r,...} => ([],[r]) + | I.MOVicc{i,d,...} => oper(i,[d],[d]) + | I.MOVfcc{i,d,...} => oper(i,[d],[d]) + | I.MOVR{r,i,d,...} => oper(i,[d],[r,d]) + | I.CALL{defs,uses,...} => (r15 :: C.getReg defs, C.getReg uses) + | I.JMP{r,i,...} => oper(i,[],[r]) + | I.RET{leaf=false,...} => ([],[r31]) + | I.RET{leaf=true,...} => ([],[r15]) + | I.SAVE{r,i,d} => oper(i,[d],[r]) + | I.RESTORE{r,i,d} => oper(i,[d],[r]) + | I.Ticc{r,i,...} => oper(i,[],[r]) + | I.RDY{d,...} => ([d],[]) + | I.WRY{r,i,...} => oper(i,[],[r]) + | _ => ([],[]) + (*esac*)) + in + case instr + of I.ANNOTATION{i, ...} => defUseR i + | I.LIVE{regs, ...} => ([], C.getReg regs) + | I.KILL{regs, ...} => (C.getReg regs, []) + | I.INSTR(i) => sparcDU(i) + | I.COPY{k, dst, src, tmp, ...} => let + val (d,u) = case k of CB.GP => (dst, src) | _ => ([], []) + in + case tmp + of SOME(I.Direct r) => (r::d, u) + | SOME(I.Displace{base, ...}) => (d, base::u) + | _ => (d,u) + end + end + + (* Use of FP registers *) + fun defUseF instr = let + fun sparcDU instr = + (case instr of + I.FLOAD{r,d,i,...} => ([d],[]) + | I.FSTORE{r,d,i,...} => ([],[d]) + | I.FPop1{r,d,...} => ([d],[r]) + | I.FPop2{r1,r2,d,...} => ([d],[r1,r2]) + | I.FCMP{r1,r2,...} => ([],[r1,r2]) + | I.JMPL{defs,uses,...} => (C.getFreg defs,C.getFreg uses) + | I.CALL{defs,uses,...} => (C.getFreg defs,C.getFreg uses) + | I.FMOVicc{r,d,...} => ([d],[r,d]) + | I.FMOVfcc{r,d,...} => ([d],[r,d]) + | _ => ([],[]) + (*esac*)) + in + case instr + of I.ANNOTATION{i, ...} => defUseF i + | I.LIVE{regs, ...} => ([], C.getFreg regs) + | I.KILL{regs, ...} => (C.getFreg regs, []) + | I.COPY{k, dst, src, tmp, ...} => let + val (d, u) = case k of CB.FP => (dst, src) | _ => ([],[]) + in + case tmp + of SOME(I.FDirect f) => (f::d, u) + | _ => (d, u) + end + | I.INSTR(i) => sparcDU(i) + end + + fun defUse CB.GP = defUseR + | defUse CB.FP = defUseF + | defUse _ = error "defUse" + + (*======================================================================== + * Annotations + *========================================================================*) + fun getAnnotations(I.ANNOTATION{i,a}) = + let val (i,an) = getAnnotations i in (i,a::an) end + | getAnnotations i = (i,[]) + fun annotate(i,a) = I.ANNOTATION{i=i,a=a} + + (*======================================================================== + * Replicate an instruction + *========================================================================*) + fun replicate(I.ANNOTATION{i,a}) = I.ANNOTATION{i=replicate i,a=a} + | replicate(I.COPY{k, sz, tmp=SOME _, dst, src}) = + I.COPY{k=k, sz=sz, tmp=SOME(I.Direct(C.newReg())), dst=dst, src=src} + | replicate i = i +end diff --git a/MLRISC/sparc/instructions/sparcShuffle.sig b/MLRISC/sparc/instructions/sparcShuffle.sig new file mode 100644 index 0000000..564abd9 --- /dev/null +++ b/MLRISC/sparc/instructions/sparcShuffle.sig @@ -0,0 +1,11 @@ +(* sparcShuffle.sig -- shuffle src registers into destination registers *) + +signature SPARCSHUFFLE = sig + structure I : SPARCINSTR + + type t = {tmp:I.ea option, dst:CellsBasis.cell list, src:CellsBasis.cell list} + + val shuffle : t -> I.instruction list + val shufflefp : t -> I.instruction list +end + diff --git a/MLRISC/sparc/instructions/sparcShuffle.sml b/MLRISC/sparc/instructions/sparcShuffle.sml new file mode 100644 index 0000000..2433b70 --- /dev/null +++ b/MLRISC/sparc/instructions/sparcShuffle.sml @@ -0,0 +1,33 @@ +functor SparcShuffle(I:SPARCINSTR) : SPARCSHUFFLE = +struct + structure I = I + structure W = Word32 + structure Shuffle = Shuffle(I) + structure CB = CellsBasis + type t = {tmp:I.ea option, dst:CB.cell list, src:CB.cell list} + + fun error msg = MLRiscErrorMsg.error("SparcShuffle",msg) + val zeroR = Option.valOf(I.C.zeroReg CB.GP) + + fun move{src=I.Direct rs, dst=I.Direct rt} = + [I.arith{a=I.OR, r=zeroR, i=I.REG rs, d=rt}] + | move{src=I.Displace{base, disp, mem}, dst=I.Direct rt} = + [I.load{l=I.LD, r=base, i=I.LAB disp, d=rt, mem=mem}] + | move{src=I.Direct rs, dst=I.Displace{base, disp, mem}} = + [I.store{s=I.ST, r=base, i=I.LAB disp, d=rs, mem=mem}] + | move _ = error "move" + + fun fmove{src=I.FDirect fs, dst=I.FDirect fd} = + [I.fpop1{a=I.FMOVd, r=fs, d=fd}] + | fmove{src=I.Displace{base, disp, mem}, dst=I.FDirect ft} = + [I.fload{l=I.LDDF, r=base, i=I.LAB disp, d=ft, mem=mem}] + | fmove{src=I.FDirect fs, dst=I.Displace{base, disp, mem}} = + [I.fstore{s=I.STDF, r=base, i=I.LAB disp, d=fs, mem=mem}] + | fmove _ = error "fmove" + + val shuffle = Shuffle.shuffle{mvInstr = move, ea=I.Direct} + + val shufflefp = Shuffle.shuffle {mvInstr=fmove, ea=I.FDirect} +end + + diff --git a/MLRISC/sparc/instructions/sparccomp-instr-ext.sml b/MLRISC/sparc/instructions/sparccomp-instr-ext.sml new file mode 100644 index 0000000..918dbaa --- /dev/null +++ b/MLRISC/sparc/instructions/sparccomp-instr-ext.sml @@ -0,0 +1,57 @@ +(* sparccomp-instr-ext.sml + * + * COPYRIGHT (c) 2001 Bell Labs, Lucent Technologies + * + * compiling a trivial extensions to the Sparc instruction set + * (UNIMP instruction) + *) +signature SPARCCOMP_INSTR_EXT = sig + structure T : MLTREE + structure I : SPARCINSTR + where T = T + structure TS : MLTREE_STREAM + where T = I.T + structure CFG : CONTROL_FLOW_GRAPH + where I = I + + + type reducer = + (I.instruction, I.C.cellset, I.operand, I.addressing_mode, CFG.cfg) TS.reducer + + val compileSext : + reducer + -> { stm: (T.stm, T.rexp, T.fexp, T.ccexp) SparcInstrExt.sext, + an: T.an list } + -> unit +end + +functor SparcCompInstrExt + (structure I : SPARCINSTR + structure TS : MLTREE_STREAM + where T = I.T + structure CFG : CONTROL_FLOW_GRAPH + where I = I + and P = TS.S.P + ) : SPARCCOMP_INSTR_EXT = +struct + structure CFG = CFG + structure T = TS.T + structure TS = TS + structure I = I + structure C = I.C + structure X = SparcInstrExt + + type stm = (T.stm, T.rexp, T.fexp, T.ccexp) X.sext + + type reducer = + (I.instruction, I.C.cellset, I.operand, I.addressing_mode, CFG.cfg) TS.reducer + + fun compileSext reducer { stm: stm, an: T.an list } = let + val TS.REDUCER { emit, operand, reduceOperand, ... } = reducer + in + case stm + of X.UNIMP i => emit (I.unimp {const22 = i}, an) + | X.SAVE (r, i, d) => emit(I.save{r=reduceOperand(operand r), i=operand i, d=reduceOperand(operand d)}, an) + | X.RESTORE (r, i, d) => emit(I.restore{r=reduceOperand(operand r), i=operand i, d=reduceOperand(operand d)}, an) + end +end diff --git a/MLRISC/sparc/instructions/sparcinstr-ext.sml b/MLRISC/sparc/instructions/sparcinstr-ext.sml new file mode 100644 index 0000000..33c4241 --- /dev/null +++ b/MLRISC/sparc/instructions/sparcinstr-ext.sml @@ -0,0 +1,13 @@ +(* sparcinstr-ext.sml + * + * COPYRIGHT (c) 2001 Bell Labs, Lucent Technologies + * + * a trivial extension to the Sparc instruction set. + *) +structure SparcInstrExt = struct + datatype ('s, 'r, 'f, 'c) sext = + UNIMP of int + | SAVE of ('r * 'r * 'r) + | RESTORE of ('r * 'r * 'r) + +end diff --git a/MLRISC/sparc/mltree/sparc.sml b/MLRISC/sparc/mltree/sparc.sml new file mode 100644 index 0000000..b8e7e40 --- /dev/null +++ b/MLRISC/sparc/mltree/sparc.sml @@ -0,0 +1,785 @@ +(* sparc.sml + * + * COPYRIGHT (c) 2002 Bell Labs, Lucent Technologies + * + * This is a new instruction selection module for Sparc, + * using the new instruction representation and the new MLTREE representation. + * Support for V9 has been added. + * + * The cc bit in arithmetic op are now embedded within the arithmetic + * opcode. This should save some space. + * + * -- Allen + *) + +functor Sparc + (structure SparcInstr : SPARCINSTR + structure PseudoInstrs : SPARC_PSEUDO_INSTR + where I = SparcInstr + structure ExtensionComp : MLTREE_EXTENSION_COMP + where I = SparcInstr + and T = SparcInstr.T + + + (* + * The client should also specify these parameters. + * These are the estimated cost of these instructions. + * The code generator will use alternative sequences that are + * cheaper when their costs are lower. + *) + val muluCost : int ref (* cost of unsigned multiplication in cycles *) + val divuCost : int ref (* cost of unsigned division in cycles *) + val multCost : int ref (* cost of trapping/signed multiplication in cycles *) + val divtCost : int ref (* cost of trapping/signed division in cycles *) + + (* + * If you don't want to use register windows at all, set this to false. + *) + val registerwindow : bool ref (* should we use register windows? *) + + val V9 : bool (* should we use V9 instruction set? *) + val useBR : bool ref + (* should we use the BR instruction (when in V9)? + * I think it is a good idea to use it. + *) + ) : MLTREECOMP = +struct + structure I = SparcInstr + structure T = I.T + structure TS = ExtensionComp.TS + structure R = T.Region + structure C = I.C + structure CB = CellsBasis + structure W = Word32 + structure P = PseudoInstrs + structure A = MLRiscAnnotations + structure CFG = ExtensionComp.CFG + + type instrStream = (I.instruction, C.cellset, CFG.cfg) TS.stream + type mltreeStream = (T.stm, T.mlrisc list, CFG.cfg) TS.stream + + fun toInt n = T.I.toInt(32, n) + fun LI i = T.LI(T.I.fromInt(32, i)) + fun LT (n,m) = T.I.LT(32, n, m) + fun LE (n,m) = T.I.LE(32, n, m) + fun COPY{dst, src, tmp} = + I.COPY{k=CB.GP, sz=32, dst=dst, src=src, tmp=tmp} + fun FCOPY{dst, src, tmp} = + I.COPY{k=CB.FP, sz=64, dst=dst, src=src, tmp=tmp} + + val intTy = if V9 then 64 else 32 + structure Gen = MLTreeGen(structure T = T + structure Cells = C + val intTy = intTy + val naturalWidths = if V9 then [32,64] else [32] + datatype rep = SE | ZE | NEITHER + val rep = NEITHER + ) + + functor Multiply32 = MLTreeMult + (structure I = I + structure T = T + structure CB = CellsBasis + type arg = {r1:CB.cell,r2:CB.cell,d:CB.cell} + type argi = {r:CB.cell,i:int,d:CB.cell} + + val intTy = 32 + fun mov{r,d} = COPY{dst=[d],src=[r],tmp=NONE} + fun add{r1,r2,d} = I.arith{a=I.ADD,r=r1,i=I.REG r2,d=d} + fun slli{r,i,d} = [I.shift{s=I.SLL,r=r,i=I.IMMED i,d=d}] + fun srli{r,i,d} = [I.shift{s=I.SRL,r=r,i=I.IMMED i,d=d}] + fun srai{r,i,d} = [I.shift{s=I.SRA,r=r,i=I.IMMED i,d=d}] + ) + + functor Multiply64 = MLTreeMult + (structure I = I + structure T = T + structure CB = CellsBasis + type arg = {r1:CB.cell,r2:CB.cell,d:CB.cell} + type argi = {r:CB.cell,i:int,d:CB.cell} + + val intTy = 64 + fun mov{r,d} = COPY{dst=[d],src=[r],tmp=NONE} + fun add{r1,r2,d} = I.arith{a=I.ADD,r=r1,i=I.REG r2,d=d} + fun slli{r,i,d} = [I.shift{s=I.SLLX,r=r,i=I.IMMED i,d=d}] + fun srli{r,i,d} = [I.shift{s=I.SRLX,r=r,i=I.IMMED i,d=d}] + fun srai{r,i,d} = [I.shift{s=I.SRAX,r=r,i=I.IMMED i,d=d}] + ) + + (* signed, trapping version of multiply and divide *) + structure Mult32 = Multiply32 + (val trapping = true + val multCost = multCost + fun addv{r1,r2,d} = + I.arith{a=I.ADDCC,r=r1,i=I.REG r2,d=d}::PseudoInstrs.overflowtrap32 + fun subv{r1,r2,d} = + I.arith{a=I.SUBCC,r=r1,i=I.REG r2,d=d}::PseudoInstrs.overflowtrap32 + val sh1addv = NONE + val sh2addv = NONE + val sh3addv = NONE + ) + (val signed = true) + + (* unsigned, non-trapping version of multiply and divide *) + functor Mul32 = Multiply32 + (val trapping = false + val multCost = muluCost + fun addv{r1,r2,d} = [I.arith{a=I.ADD,r=r1,i=I.REG r2,d=d}] + fun subv{r1,r2,d} = [I.arith{a=I.SUB,r=r1,i=I.REG r2,d=d}] + val sh1addv = NONE + val sh2addv = NONE + val sh3addv = NONE + ) + structure Mulu32 = Mul32(val signed = false) + + structure Muls32 = Mul32(val signed = true) + + (* signed, trapping version of multiply and divide *) + structure Mult64 = Multiply64 + (val trapping = true + val multCost = multCost + fun addv{r1,r2,d} = + I.arith{a=I.ADDCC,r=r1,i=I.REG r2,d=d}::PseudoInstrs.overflowtrap64 + fun subv{r1,r2,d} = + I.arith{a=I.SUBCC,r=r1,i=I.REG r2,d=d}::PseudoInstrs.overflowtrap64 + val sh1addv = NONE + val sh2addv = NONE + val sh3addv = NONE + ) + (val signed = true) + + (* unsigned, non-trapping version of multiply and divide *) + functor Mul64 = Multiply64 + (val trapping = false + val multCost = muluCost + fun addv{r1,r2,d} = [I.arith{a=I.ADD,r=r1,i=I.REG r2,d=d}] + fun subv{r1,r2,d} = [I.arith{a=I.SUB,r=r1,i=I.REG r2,d=d}] + val sh1addv = NONE + val sh2addv = NONE + val sh3addv = NONE + ) + structure Mulu64 = Mul64(val signed = false) + + structure Muls64 = Mul64(val signed = true) + + datatype commutative = COMMUTE | NOCOMMUTE + datatype cc = REG (* write to register *) + | CC (* set condition code *) + | CC_REG (* do both *) + + fun error msg = MLRiscErrorMsg.error("Sparc",msg) + + + + fun selectInstructions + (instrStream as + TS.S.STREAM{emit=emitInstruction,defineLabel,entryLabel,pseudoOp,annotation,getAnnotations, + beginCluster,endCluster,exitBlock,comment,...}) = + let + val emit = emitInstruction o I.INSTR + (* Flags *) + val useBR = !useBR + val registerwindow = !registerwindow + + val trap32 = PseudoInstrs.overflowtrap32 + val trap64 = PseudoInstrs.overflowtrap64 + val zeroR = C.r0 + val newReg = C.newReg + val newFreg = C.newFreg + fun immed13 n = LE(~4096, n) andalso LT(n, 4096) + fun immed13w w = let val x = W.~>>(w,0w12) + in x = 0w0 orelse (W.notb x) = 0w0 end + fun splitw w = {hi=W.toInt(W.>>(w,0w10)),lo=W.toInt(W.andb(w,0wx3ff))} + fun split n = splitw(T.I.toWord32(32, n)) + + + val zeroOpn = I.REG zeroR (* zero value operand *) + + fun cond T.LT = I.BL + | cond T.LTU = I.BCS + | cond T.LE = I.BLE + | cond T.LEU = I.BLEU + | cond T.EQ = I.BE + | cond T.NE = I.BNE + | cond T.GE = I.BGE + | cond T.GEU = I.BCC + | cond T.GT = I.BG + | cond T.GTU = I.BGU + | cond _ = error "cond" + + fun rcond T.LT = I.RLZ + | rcond T.LE = I.RLEZ + | rcond T.EQ = I.RZ + | rcond T.NE = I.RNZ + | rcond T.GE = I.RGEZ + | rcond T.GT = I.RGZ + | rcond _ = error "rcond" + + fun signedCmp(T.LT | T.LE | T.EQ | T.NE | T.GE | T.GT) = true + | signedCmp _ = false + + fun fcond T.== = I.FBE + | fcond T.?<> = I.FBNE + | fcond T.? = I.FBU + | fcond T.<=> = I.FBO + | fcond T.> = I.FBG + | fcond T.>= = I.FBGE + | fcond T.?> = I.FBUG + | fcond T.?>= = I.FBUGE + | fcond T.< = I.FBL + | fcond T.<= = I.FBLE + | fcond T.?< = I.FBUL + | fcond T.?<= = I.FBULE + | fcond T.<> = I.FBLG + | fcond T.?= = I.FBUE + | fcond fc = error("fcond "^T.Basis.fcondToString fc) + + fun annotate(i,[]) = i + | annotate(i,a::an) = annotate(I.ANNOTATION{i=i,a=a},an) + fun mark'(i,an) = emitInstruction(annotate(i,an)) + fun mark(i,an) = emitInstruction(annotate(I.INSTR i,an)) + + (* convert an operand into a register *) + fun reduceOpn(I.REG r) = r + | reduceOpn(I.IMMED 0) = zeroR + | reduceOpn i = + let val d = newReg() + in emit(I.ARITH{a=I.OR,r=zeroR,i=i,d=d}); d end + + (* emit parallel copies *) + fun copy(dst,src,an) = + mark'(COPY{dst=dst,src=src, + tmp=case dst of [_] => NONE + | _ => SOME(I.Direct(newReg()))},an) + fun fcopy(dst,src,an) = + mark'(FCOPY{dst=dst,src=src, + tmp=case dst of [_] => NONE + | _ => SOME(I.FDirect(newFreg()))},an) + + (* move register s to register d *) + fun move(s,d,an) = + if CB.sameColor(s,d) orelse CB.registerId d = 0 then () + else mark'(COPY{dst=[d],src=[s],tmp=NONE},an) + + (* move floating point register s to register d *) + fun fmoved(s,d,an) = + if CB.sameColor(s,d) then () + else mark'(FCOPY{dst=[d],src=[s],tmp=NONE},an) + fun fmoves(s,d,an) = fmoved(s,d,an) (* error "fmoves" for now!!! XXX *) + fun fmoveq(s,d,an) = error "fmoveq" + + (* load immediate *) + and loadImmed(n,d,cc,an) = + let val or = if cc <> REG then I.ORCC else I.OR + in if immed13 n then mark(I.ARITH{a=or,r=zeroR,i=I.IMMED(toInt n),d=d},an) + else let val {hi,lo} = split n + in if lo = 0 then + (mark(I.SETHI{i=hi,d=d},an); genCmp0(cc,d)) + else let val t = newReg() + in emit(I.SETHI{i=hi,d=t}); + mark(I.ARITH{a=or,r=t,i=I.IMMED lo,d=d},an) + end + end + end + + (* load label expression *) + and loadLabel(lab,d,cc,an) = + let val or = if cc <> REG then I.ORCC else I.OR + in mark(I.ARITH{a=or,r=zeroR,i=I.LAB lab,d=d},an) end + + (* emit an arithmetic op *) + and arith(a,acc,e1,e2,d,cc,comm,trap,an) = + let val (a,d) = case cc of + REG => (a,d) + | CC => (acc,zeroR) + | CC_REG => (acc,d) + in case (opn e1,opn e2,comm) of + (i,I.REG r,COMMUTE)=> mark(I.ARITH{a=a,r=r,i=i,d=d},an) + | (I.REG r,i,_) => mark(I.ARITH{a=a,r=r,i=i,d=d},an) + | (r,i,_) => mark(I.ARITH{a=a,r=reduceOpn r,i=i,d=d},an) + ; + case trap of [] => () | _ => app emitInstruction trap + end + + (* emit a shift op *) + and shift(s,e1,e2,d,cc,an) = + (mark(I.SHIFT{s=s,r=expr e1,i=opn e2,d=d},an); + genCmp0(cc,d) + ) + + (* emit externally defined multiply or division operation (V8) *) + and extarith(gen,genConst,e1,e2,d,cc,comm) = + let fun nonconst(e1,e2) = + case (opn e1,opn e2,comm) of + (i,I.REG r,COMMUTE) => gen({r=r,i=i,d=d},reduceOpn) + | (I.REG r,i,_) => gen({r=r,i=i,d=d},reduceOpn) + | (r,i,_) => gen({r=reduceOpn r,i=i,d=d},reduceOpn) + fun const(e,i) = + let val r = expr e + in genConst{r=r,i=toInt i,d=d} + handle _ => gen({r=r,i=opn(T.LI i),d=d},reduceOpn) + end + val instrs = + case (comm,e1,e2) of + (_,e1,T.LI i) => const(e1,i) + | (COMMUTE,T.LI i,e2) => const(e2,i) + | _ => nonconst(e1,e2) + in app emitInstruction instrs; + genCmp0(cc,d) + end + + (* emit 64-bit multiply or division operation (V9) *) + and muldiv64(a,genConst,e1,e2,d,cc,comm,an) = + let fun nonconst(e1,e2) = + [annotate( + case (opn e1,opn e2,comm) of + (i,I.REG r,COMMUTE) => I.arith{a=a,r=r,i=i,d=d} + | (I.REG r,i,_) => I.arith{a=a,r=r,i=i,d=d} + | (r,i,_) => I.arith{a=a,r=reduceOpn r,i=i,d=d},an) + ] + fun const(e,i) = + let val r = expr e + in genConst{r=r,i=toInt i,d=d} + handle _ => [annotate(I.arith{a=a,r=r,i=opn(T.LI i),d=d},an)] + end + val instrs = + case (comm,e1,e2) of + (_,e1,T.LI i) => const(e1,i) + | (COMMUTE,T.LI i,e2) => const(e2,i) + | _ => nonconst(e1,e2) + in app emitInstruction instrs; + genCmp0(cc,d) + end + + (* divisions *) + and divu32 x = Mulu32.divide{mode=T.TO_ZERO,stm=doStmt} x + and divs32 x = Muls32.divide{mode=T.TO_ZERO,stm=doStmt} x + and divt32 x = Mult32.divide{mode=T.TO_ZERO,stm=doStmt} x + and divu64 x = Mulu64.divide{mode=T.TO_ZERO,stm=doStmt} x + and divs64 x = Muls64.divide{mode=T.TO_ZERO,stm=doStmt} x + and divt64 x = Mult64.divide{mode=T.TO_ZERO,stm=doStmt} x + + (* emit an unary floating point op *) + and funary(a,e,d,an) = mark(I.FPop1{a=a,r=fexpr e,d=d},an) + + (* emit a binary floating point op *) + and farith(a,e1,e2,d,an) = + mark(I.FPop2{a=a,r1=fexpr e1,r2=fexpr e2,d=d},an) + + (* convert an expression into an addressing mode *) + and addr(T.ADD(ty, (T.ADD (_, e, T.LI n)| + T.ADD (_, T.LI n, e)), T.LI n')) = + addr(T.ADD (ty, e, T.LI (T.I.ADD (ty, n, n')))) + | addr(T.ADD(ty, T.SUB (_, e, T.LI n), T.LI n')) = + addr(T.ADD (ty, e, T.LI (T.I.SUB (ty, n', n)))) + | addr(T.ADD(_,e,T.LI n)) = + if immed13 n then (expr e,I.IMMED(toInt n)) + else let val d = newReg() + in loadImmed(n,d,REG,[]); (d,opn e) end + | addr(T.ADD(_,e,x as T.CONST c)) = (expr e,I.LAB x) + | addr(T.ADD(_,e,x as T.LABEL l)) = (expr e,I.LAB x) + | addr(T.ADD(_,e,T.LABEXP x)) = (expr e,I.LAB x) + | addr(T.ADD(ty,i as T.LI _,e)) = addr(T.ADD(ty,e,i)) + | addr(T.ADD(_,x as T.CONST c,e)) = (expr e,I.LAB x) + | addr(T.ADD(_,x as T.LABEL l,e)) = (expr e,I.LAB x) + | addr(T.ADD(_,T.LABEXP x,e)) = (expr e,I.LAB x) + | addr(T.ADD(_,e1,e2)) = (expr e1,I.REG(expr e2)) + | addr(T.SUB(ty,e,T.LI n)) = addr(T.ADD(ty,e,T.LI(T.I.NEG(32,n)))) + | addr(x as T.LABEL l) = (zeroR,I.LAB x) + | addr(T.LABEXP x) = (zeroR,I.LAB x) + | addr a = (expr a,zeroOpn) + + (* emit an integer load *) + and load(l,a,d,mem,cc,an) = + let val (r,i) = addr a + in mark(I.LOAD{l=l,r=r,i=i,d=d,mem=mem},an); + genCmp0(cc,d) + end + + (* emit an integer store *) + and store(s,a,d,mem,an) = + let val (r,i) = addr a + in mark(I.STORE{s=s,r=r,i=i,d=expr d,mem=mem},an) end + + (* emit a floating point load *) + and fload(l,a,d,mem,an) = + let val (r,i) = addr a + in mark(I.FLOAD{l=l,r=r,i=i,d=d,mem=mem},an) end + + (* emit a floating point store *) + and fstore(s,a,d,mem,an) = + let val (r,i) = addr a + in mark(I.FSTORE{s=s,r=r,i=i,d=fexpr d,mem=mem},an) end + + (* emit a jump *) + and jmp(a,labs,an) = + let val (r,i) = addr a + in mark(I.JMP{r=r,i=i,labs=labs,nop=true},an) end + + (* convert mlrisc to cellset *) + and cellset mlrisc = + let fun g([],set) = set + | g(T.GPR(T.REG(_,r))::regs,set) = g(regs,CB.CellSet.add(r,set)) + | g(T.FPR(T.FREG(_,f))::regs,set) = g(regs,CB.CellSet.add(f,set)) + | g(T.CCR(T.CC(_,cc))::regs,set) = g(regs,CB.CellSet.add(cc,set)) + | g(_::regs, set) = g(regs,set) + in g(mlrisc, C.empty) end + + (* emit a function call *) + and call(a,flow,defs,uses,mem,cutsTo,an,0) = + let val (r,i) = addr a + val defs=cellset(defs) + val uses=cellset(uses) + in case (CB.registerId r,i) of + (0,I.LAB(T.LABEL l)) => + mark(I.CALL{label=l,defs=C.addReg(C.linkReg,defs),uses=uses, + cutsTo=cutsTo,mem=mem,nop=true},an) + | _ => mark(I.JMPL{r=r,i=i,d=C.linkReg,defs=defs,uses=uses, + cutsTo=cutsTo,mem=mem,nop=true},an) + end + | call _ = error "pops<>0 not implemented" + + (* emit an integer branch instruction *) + and branch(T.CMP(ty,cond,a,b),lab,an) = + let val (cond,a,b) = + case a of + (T.LI _ | T.CONST _ | T.LABEL _) => + (T.Basis.swapCond cond,b,a) + | _ => (cond,a,b) + in if V9 then + branchV9(cond,a,b,lab,an) + else + (doExpr(T.SUB(ty,a,b),newReg(),CC,[]); br(cond,lab,an)) + end + | branch(T.CC(cond,r),lab,an) = + if CB.sameColor(r, C.psr) then br(cond,lab,an) + else (genCmp0(CC,r); br(cond,lab,an)) + | branch(T.FCMP(fty,cond,a,b),lab,an) = + let val cmp = case fty of + 32 => I.FCMPs + | 64 => I.FCMPd + | _ => error "fbranch" + in emit(I.FCMP{cmp=cmp,r1=fexpr a,r2=fexpr b,nop=true}); + mark(I.FBfcc{b=fcond cond,a=false,label=lab,nop=true},an) + end + | branch _ = error "branch" + + and branchV9(cond,a,b,lab,an) = + let val size = Gen.Size.size a + in if useBR andalso signedCmp cond then + let val r = newReg() + in doExpr(T.SUB(size,a,b),r,REG,[]); + brcond(cond,r,lab,an) + end + else + let val cc = case size of 32 => I.ICC + | 64 => I.XCC + | _ => error "branchV9" + in doExpr(T.SUB(size,a,b),newReg(),CC,[]); + bp(cond,cc,lab,an) + end + end + + and br(c,lab,an) = mark(I.Bicc{b=cond c,a=true,label=lab,nop=true},an) + + and brcond(c,r,lab,an) = + mark(I.BR{rcond=rcond c,r=r,p=I.PT,a=true,label=lab,nop=true},an) + + and bp(c,cc,lab,an) = + mark(I.BP{b=cond c,cc=cc,p=I.PT,a=true,label=lab,nop=true},an) + + (* generate code for a statement *) + and stmt(T.MV(_,d,e),an) = doExpr(e,d,REG,an) + | stmt(T.FMV(_,d,e),an) = doFexpr(e,d,an) + | stmt(T.CCMV(d,e),an) = doCCexpr(e,d,an) + | stmt(T.COPY(_,dst,src),an) = copy(dst,src,an) + | stmt(T.FCOPY(_,dst,src),an) = fcopy(dst,src,an) + | stmt(T.JMP(T.LABEL l,_),an) = + mark(I.Bicc{b=I.BA,a=true,label=l,nop=false},an) + | stmt(T.JMP(e,labs),an) = jmp(e,labs,an) + | stmt(T.CALL{funct,targets,defs,uses,region,pops,...},an) = + call(funct,targets,defs,uses,region,[],an,pops) + | stmt(T.FLOW_TO + (T.CALL{funct,targets,defs,uses,region,pops,...},cutsTo),an) = + call(funct,targets,defs,uses,region,cutsTo,an,pops) + | stmt(T.RET _,an) = mark(I.RET{leaf=not registerwindow,nop=true},an) + | stmt(T.STORE(8,a,d,mem),an) = store(I.STB,a,d,mem,an) + | stmt(T.STORE(16,a,d,mem),an) = store(I.STH,a,d,mem,an) + | stmt(T.STORE(32,a,d,mem),an) = store(I.ST,a,d,mem,an) + | stmt(T.STORE(64,a,d,mem),an) = + store(if V9 then I.STX else I.STD,a,d,mem,an) + | stmt(T.FSTORE(32,a,d,mem),an) = fstore(I.STF,a,d,mem,an) + | stmt(T.FSTORE(64,a,d,mem),an) = fstore(I.STDF,a,d,mem,an) + | stmt(T.BCC(cc,lab),an) = branch(cc,lab,an) + | stmt(T.DEFINE l,_) = defineLabel l + | stmt(T.ANNOTATION(s,a),an) = stmt(s,a::an) + | stmt(T.EXT s,an) = ExtensionComp.compileSext(reducer()) {stm=s, an=an} + | stmt(s,an) = doStmts(Gen.compileStm s) + + and doStmt s = stmt(s,[]) + + and doStmts ss = app doStmt ss + + (* convert an expression into a register *) + and expr e = let + fun comp() = let + val d = newReg() + in doExpr(e, d, REG, []); d + end + in case e + of T.REG(_,r) => r + | T.LI z => if z = 0 then zeroR else comp() + | _ => comp() + end + + (* compute an integer expression and put the result in register d + * If cc is set then set the condition code with the result. + *) + and doExpr(e,d,cc,an) = + case e of + T.REG(_,r) => (move(r,d,an); genCmp0(cc,r)) + | T.LI n => loadImmed(n,d,cc,an) + | T.LABEL l => loadLabel(e,d,cc,an) + | T.CONST c => loadLabel(e,d,cc,an) + | T.LABEXP x => loadLabel(x,d,cc,an) + + (* generic 32/64 bit support *) + | T.ADD(_,a,b) => arith(I.ADD,I.ADDCC,a,b,d,cc,COMMUTE,[],an) + | T.SUB(_,a,b) => let + fun default() = arith(I.SUB,I.SUBCC,a,b,d,cc,NOCOMMUTE,[],an) + in + case b + of T.LI z => + if z = 0 then doExpr(a,d,cc,an) else default() + | _ => default() + (*esac*) + end + + | T.ANDB(_,a,T.NOTB(_,b)) => + arith(I.ANDN,I.ANDNCC,a,b,d,cc,NOCOMMUTE,[],an) + | T.ORB(_,a,T.NOTB(_,b)) => + arith(I.ORN,I.ORNCC,a,b,d,cc,NOCOMMUTE,[],an) + | T.XORB(_,a,T.NOTB(_,b)) => + arith(I.XNOR,I.XNORCC,a,b,d,cc,COMMUTE,[],an) + | T.ANDB(_,T.NOTB(_,a),b) => + arith(I.ANDN,I.ANDNCC,b,a,d,cc,NOCOMMUTE,[],an) + | T.ORB(_,T.NOTB(_,a),b) => + arith(I.ORN,I.ORNCC,b,a,d,cc,NOCOMMUTE,[],an) + | T.XORB(_,T.NOTB(_,a),b) => + arith(I.XNOR,I.XNORCC,b,a,d,cc,COMMUTE,[],an) + | T.NOTB(_,T.XORB(_,a,b)) => + arith(I.XNOR,I.XNORCC,a,b,d,cc,COMMUTE,[],an) + + | T.ANDB(_,a,b) => arith(I.AND,I.ANDCC,a,b,d,cc,COMMUTE,[],an) + | T.ORB(_,a,b) => arith(I.OR,I.ORCC,a,b,d,cc,COMMUTE,[],an) + | T.XORB(_,a,b) => arith(I.XOR,I.XORCC,a,b,d,cc,COMMUTE,[],an) + | T.NOTB(_,a) => arith(I.XNOR,I.XNORCC,a,LI 0,d,cc,COMMUTE,[],an) + + (* 32 bit support *) + | T.SRA(32,a,b) => shift(I.SRA,a,b,d,cc,an) + | T.SRL(32,a,b) => shift(I.SRL,a,b,d,cc,an) + | T.SLL(32,a,b) => shift(I.SLL,a,b,d,cc,an) + | T.ADDT(32,a,b)=> + arith(I.ADDCC,I.ADDCC,a,b,d,CC_REG,COMMUTE,trap32,an) + | T.SUBT(32,a,b)=> + arith(I.SUBCC,I.SUBCC,a,b,d,CC_REG,NOCOMMUTE,trap32,an) + | T.MULU(32,a,b) => extarith(P.umul32, + Mulu32.multiply,a,b,d,cc,COMMUTE) + | T.MULS(32,a,b) => extarith(P.smul32, + Muls32.multiply,a,b,d,cc,COMMUTE) + | T.MULT(32,a,b) => extarith(P.smul32trap, + Mult32.multiply,a,b,d,cc,COMMUTE) + | T.DIVU(32,a,b) => extarith(P.udiv32,divu32,a,b,d,cc,NOCOMMUTE) + | T.DIVS(T.DIV_TO_ZERO,32,a,b) => + extarith(P.sdiv32,divs32,a,b,d,cc,NOCOMMUTE) + | T.DIVT(T.DIV_TO_ZERO,32,a,b) => + extarith(P.sdiv32trap,divt32,a,b,d,cc,NOCOMMUTE) + + (* 64 bit support *) + | T.SRA(64,a,b) => shift(I.SRAX,a,b,d,cc,an) + | T.SRL(64,a,b) => shift(I.SRLX,a,b,d,cc,an) + | T.SLL(64,a,b) => shift(I.SLLX,a,b,d,cc,an) + | T.ADDT(64,a,b)=> + arith(I.ADDCC,I.ADDCC,a,b,d,CC_REG,COMMUTE,trap64,an) + | T.SUBT(64,a,b)=> + arith(I.SUBCC,I.SUBCC,a,b,d,CC_REG,NOCOMMUTE,trap64,an) + | T.MULU(64,a,b) => + muldiv64(I.MULX,Mulu64.multiply,a,b,d,cc,COMMUTE,an) + | T.MULS(64,a,b) => + muldiv64(I.MULX,Muls64.multiply,a,b,d,cc,COMMUTE,an) + | T.MULT(64,a,b) => + (muldiv64(I.MULX,Mult64.multiply,a,b,d,CC_REG,COMMUTE,an); + app emitInstruction trap64) + | T.DIVU(64,a,b) => muldiv64(I.UDIVX,divu64,a,b,d,cc,NOCOMMUTE,an) + | T.DIVS(T.DIV_TO_ZERO,64,a,b) => + muldiv64(I.SDIVX,divs64,a,b,d,cc,NOCOMMUTE,an) + | T.DIVT(T.DIV_TO_ZERO,64,a,b) => + muldiv64(I.SDIVX,divt64,a,b,d,cc,NOCOMMUTE,an) + + (* loads *) + | T.LOAD(8,a,mem) => load(I.LDUB,a,d,mem,cc,an) + | T.SX(_,_,T.LOAD(8,a,mem)) => load(I.LDSB,a,d,mem,cc,an) + | T.LOAD(16,a,mem) => load(I.LDUH,a,d,mem,cc,an) + | T.SX(_,_,T.LOAD(16,a,mem)) => load(I.LDSH,a,d,mem,cc,an) + | T.LOAD(32,a,mem) => load(I.LD,a,d,mem,cc,an) + | T.LOAD(64,a,mem) => + load(if V9 then I.LDX else I.LDD,a,d,mem,cc,an) + + (* conditional expression *) + | T.COND exp => doStmts (Gen.compileCond{exp=exp,rd=d,an=an}) + + (* misc *) + | T.LET(s,e) => (doStmt s; doExpr(e, d, cc, an)) + | T.MARK(e,A.MARKREG f) => (f d; doExpr(e,d,cc,an)) + | T.MARK(e,a) => doExpr(e,d,cc,a::an) + | T.PRED(e,c) => doExpr(e,d,cc,A.CTRLUSE c::an) + | T.REXT e => ExtensionComp.compileRext (reducer()) {e=e, rd=d, an=an} + | e => doExpr(Gen.compileRexp e,d,cc,an) + + (* generate a comparison with zero *) + and genCmp0(REG,_) = () + | genCmp0(_,d) = emit(I.ARITH{a=I.SUBCC,r=d,i=zeroOpn,d=zeroR}) + + (* convert an expression into a floating point register *) + and fexpr(T.FREG(_,r)) = r + | fexpr e = let val d = newFreg() in doFexpr(e,d,[]); d end + + (* compute a floating point expression and put the result in d *) + and doFexpr(e,d,an) = + case e of + (* single precision *) + T.FREG(32,r) => fmoves(r,d,an) + | T.FLOAD(32,ea,mem) => fload(I.LDF,ea,d,mem,an) + | T.FADD(32,a,b) => farith(I.FADDs,a,b,d,an) + | T.FSUB(32,a,b) => farith(I.FSUBs,a,b,d,an) + | T.FMUL(32,a,b) => farith(I.FMULs,a,b,d,an) + | T.FDIV(32,a,b) => farith(I.FDIVs,a,b,d,an) + | T.FABS(32,a) => funary(I.FABSs,a,d,an) + | T.FNEG(32,a) => funary(I.FNEGs,a,d,an) + | T.FSQRT(32,a) => funary(I.FSQRTs,a,d,an) + + (* double precision *) + | T.FREG(64,r) => fmoved(r,d,an) + | T.FLOAD(64,ea,mem) => fload(I.LDDF,ea,d,mem,an) + | T.FADD(64,a,b) => farith(I.FADDd,a,b,d,an) + | T.FSUB(64,a,b) => farith(I.FSUBd,a,b,d,an) + | T.FMUL(64,a,b) => farith(I.FMULd,a,b,d,an) + | T.FDIV(64,a,b) => farith(I.FDIVd,a,b,d,an) + | T.FABS(64,a) => funary(I.FABSd,a,d,an) + | T.FNEG(64,a) => funary(I.FNEGd,a,d,an) + | T.FSQRT(64,a) => funary(I.FSQRTd,a,d,an) + + (* quad precision *) + | T.FREG(128,r) => fmoveq(r,d,an) + | T.FADD(128,a,b) => farith(I.FADDq,a,b,d,an) + | T.FSUB(128,a,b) => farith(I.FSUBq,a,b,d,an) + | T.FMUL(128,a,b) => farith(I.FMULq,a,b,d,an) + | T.FDIV(128,a,b) => farith(I.FDIVq,a,b,d,an) + | T.FABS(128,a) => funary(I.FABSq,a,d,an) + | T.FNEG(128,a) => funary(I.FNEGq,a,d,an) + | T.FSQRT(128,a) => funary(I.FSQRTq,a,d,an) + + (* floating point to floating point *) + | T.CVTF2F(ty,ty',e) => + (case (ty,ty') of + (32,32) => doFexpr(e,d,an) + | (64,32) => funary(I.FsTOd,e,d,an) + | (128,32) => funary(I.FsTOq,e,d,an) + | (32,64) => funary(I.FdTOs,e,d,an) + | (64,64) => doFexpr(e,d,an) + | (128,64) => funary(I.FdTOq,e,d,an) + | (32,128) => funary(I.FqTOs,e,d,an) + | (64,128) => funary(I.FqTOd,e,d,an) + | (128,128) => doFexpr(e,d,an) + | _ => error "CVTF2F" + ) + + (* integer to floating point *) + | T.CVTI2F(32,32,e) => app emitInstruction (P.cvti2s({i=opn e,d=d},reduceOpn)) + | T.CVTI2F(64,32,e) => app emitInstruction (P.cvti2d({i=opn e,d=d},reduceOpn)) + | T.CVTI2F(128,32,e) => app emitInstruction (P.cvti2q({i=opn e,d=d},reduceOpn)) + + | T.FMARK(e,A.MARKREG f) => (f d; doFexpr(e,d,an)) + | T.FMARK(e,a) => doFexpr(e,d,a::an) + | T.FPRED(e,c) => doFexpr(e,d,A.CTRLUSE c::an) + | T.FEXT e => ExtensionComp.compileFext (reducer()) {e=e, fd=d, an=an} + | e => doFexpr(Gen.compileFexp e,d,an) + + and doCCexpr(T.CMP(ty,cond,e1,e2),cc,an) = + if CB.sameColor(cc,C.psr) then + doExpr(T.SUB(ty,e1,e2),newReg(),CC,an) + else error "doCCexpr" + | doCCexpr(T.CC(_,r),d,an) = + if CB.sameColor(r,C.psr) then error "doCCexpr" + else move(r,d,an) + | doCCexpr(T.CCMARK(e,A.MARKREG f),d,an) = (f d; doCCexpr(e,d,an)) + | doCCexpr(T.CCMARK(e,a),d,an) = doCCexpr(e,d,a::an) + | doCCexpr(T.CCEXT e,d,an) = + ExtensionComp.compileCCext (reducer()) {e=e, ccd=d, an=an} + | doCCexpr e = error "doCCexpr" + + and ccExpr e = let val d = newReg() in doCCexpr(e,d,[]); d end + + (* convert an expression into an operand *) + and opn(x as T.CONST c) = I.LAB x + | opn(x as T.LABEL l) = I.LAB x + | opn(T.LABEXP x) = I.LAB x + | opn(e as T.LI n) = + if n = 0 then zeroOpn + else if immed13 n then I.IMMED(toInt n) + else I.REG(expr e) + | opn e = I.REG(expr e) + + and reducer() = + TS.REDUCER{reduceRexp = expr, + reduceFexp = fexpr, + reduceCCexp = ccExpr, + reduceStm = stmt, + operand = opn, + reduceOperand = reduceOpn, + addressOf = addr, + emit = emitInstruction o annotate, + instrStream = instrStream, + mltreeStream = self() + } + and self() = + TS.S.STREAM + { beginCluster = beginCluster, + endCluster = endCluster, + emit = doStmt, + pseudoOp = pseudoOp, + defineLabel = defineLabel, + entryLabel = entryLabel, + comment = comment, + annotation = annotation, + getAnnotations = getAnnotations, + exitBlock = fn regs => exitBlock(cellset regs) + } + in self() + end + +end + +(* + * Machine code generator for SPARC. + * + * The SPARC architecture has 32 general purpose registers (%g0 is always 0) + * and 32 single precision floating point registers. + * + * Some Ugliness: double precision floating point registers are + * register pairs. There are no double precision moves, negation and absolute + * values. These require two single precision operations. I've created + * composite instructions FMOVd, FNEGd and FABSd to stand for these. + * + * All integer arithmetic instructions can optionally set the condition + * code register. We use this to simplify certain comparisons with zero. + * + * Integer multiplication, division and conversion from integer to floating + * go thru the pseudo instruction interface, since older sparcs do not + * implement these instructions in hardware. + * + * In addition, the trap instruction for detecting overflow is a parameter. + * This allows different trap vectors to be used. + * + * -- Allen + *) diff --git a/MLRISC/sparc/mltree/sparcPseudoInstr.sig b/MLRISC/sparc/mltree/sparcPseudoInstr.sig new file mode 100644 index 0000000..9e0df86 --- /dev/null +++ b/MLRISC/sparc/mltree/sparcPseudoInstr.sig @@ -0,0 +1,38 @@ +(* + * sparcPseudoInstr.sig --- Sparc pseudo instructions + *) + +signature SPARC_PSEUDO_INSTR = sig + structure I : SPARCINSTR + + type format1 = + {r:CellsBasis.cell, i:I.operand, d:CellsBasis.cell} * + (I.operand -> CellsBasis.cell) -> I.instruction list + + type format2 = + {i:I.operand, d:CellsBasis.cell} * + (I.operand -> CellsBasis.cell) -> I.instruction list + (* + * Signed and unsigned multiplications. + * These are all 32 bit operations + *) + val umul32 : format1 (* unsigned/non-trapping *) + val smul32 : format1 (* signed/non-trapping *) + val smul32trap : format1 (* trap on overflow *) + val udiv32 : format1 (* unsigned/non-trapping *) + val sdiv32 : format1 (* signed/non-trapping *) + val sdiv32trap : format1 (* trap on overflow/zero *) + + (* convert integer into floating point *) + val cvti2d : format2 + val cvti2s : format2 + val cvti2q : format2 + + (* 32-bit overflow detection *) + val overflowtrap32 : I.instruction list + + (* 64-bit overflow detection *) + val overflowtrap64 : I.instruction list + +end + diff --git a/MLRISC/sparc/ra/sparcRegAlloc.sml b/MLRISC/sparc/ra/sparcRegAlloc.sml new file mode 100644 index 0000000..d24ca23 --- /dev/null +++ b/MLRISC/sparc/ra/sparcRegAlloc.sml @@ -0,0 +1,75 @@ +(* sparcRegAlloc.sml --- sparc integer and floating register allocator + * + * COPYRIGHT (c) 1996 AT&T Bell Laboratories. + * + *) + +(* Integer and floating register allocators are a partial application + * of a curried functor. + *) + +functor SparcRegAlloc(structure I : INSTRUCTIONS where C = SparcCells + structure P : INSN_PROPERTIES where I = I + structure F : FLOWGRAPH where I = I + structure Asm : INSTRUCTION_EMITTER where I = I and P=F.P + ) : + sig + structure I : INSTRUCTIONS + functor IntRa (structure RaUser : RA_USER_PARAMS + where I = I and B = F.B) : RA + functor FloatRa (structure RaUser : RA_USER_PARAMS + where I = I and B = F.B) : RA + end= +struct + + structure I = I + structure C=I.C + + (* liveness analysis for general purpose registers *) + structure RegLiveness = + Liveness(structure Flowgraph=F + structure Instruction=I + val defUse = P.defUse C.GP + val regSet = C.getCell C.GP + val cellset = C.updateCell C.GP) + + + functor IntRa = + RegAllocator + (structure RaArch = struct + structure InsnProps = P + structure AsmEmitter = Asm + structure I = I + structure Liveness=RegLiveness + + val defUse = P.defUse C.GP + val firstPseudoR = 32 + val maxPseudoR = SparcCells.maxCell + val numRegs = SparcCells.numCell SparcCells.GP + val regSet = C.getCell C.GP + end) + + (* liveness analysis for floating point registers *) + structure FregLiveness = + Liveness(structure Flowgraph=F + structure Instruction=I + val defUse = P.defUse C.FP + val regSet = C.getCell C.FP + val cellset = C.updateCell C.FP) + + functor FloatRa = + RegAllocator + (structure RaArch = struct + structure InsnProps = P + structure AsmEmitter = Asm + structure I = I + structure Liveness=FregLiveness + + val defUse = P.defUse C.FP + val firstPseudoR = 64 + val maxPseudoR = SparcCells.maxCell + val numRegs = SparcCells.numCell SparcCells.FP + val regSet = C.getCell C.FP + end) +end + diff --git a/MLRISC/sparc/ra/sparcRewrite.sml b/MLRISC/sparc/ra/sparcRewrite.sml new file mode 100644 index 0000000..93452ac --- /dev/null +++ b/MLRISC/sparc/ra/sparcRewrite.sml @@ -0,0 +1,158 @@ +functor SparcRewrite(Instr:SPARCINSTR) = +struct + structure I = Instr + structure C = I.C + structure CB = CellsBasis + structure CS = CB.CellSet + + fun error msg = MLRiscErrorMsg.error ("SparcRewrite", msg) + + fun rewriteUse(instr,rs,rt) = let + fun match r = CB.sameColor(r,rs) + fun R r = if match r then rt else r + fun O(i as I.REG r) = if match r then I.REG rt else i + | O i = i + fun EA(SOME(I.Displace{base, disp, mem})) = + SOME(I.Displace{base=R base, disp=disp, mem=mem}) + | EA ea = ea + fun sparcUse(instr) = + (case instr of + I.LOAD{l,r,i,d,mem} => I.LOAD{l=l,r=R r,i=O i,d=d,mem=mem} + | I.STORE{s,d,r,i,mem} => I.STORE{s=s,d=R d,r=R r,i=O i,mem=mem} + | I.FLOAD{l,r,i,d,mem} => I.FLOAD{l=l,r=R r,i=O i,d=d,mem=mem} + | I.FSTORE{s,d,r,i,mem} => I.FSTORE{s=s,d=d,r=R r,i=O i,mem=mem} + | I.ARITH{a,r,i,d} => I.ARITH{a=a,r=R r,i=O i,d=d} + | I.SHIFT{s,r,i,d} => I.SHIFT{s=s,r=R r,i=O i,d=d} + | I.BR{r,p,rcond,a,nop,label} => + I.BR{r=R r,p=p,rcond=rcond,a=a,nop=nop,label=label} + | I.MOVicc{b,i,d} => I.MOVicc{b=b,i=O i,d=R d} + | I.MOVfcc{b,i,d} => I.MOVfcc{b=b,i=O i,d=R d} + | I.MOVR{rcond,r,i,d} => I.MOVR{rcond=rcond,r=R r,i=O i,d=R d} + | I.JMP{r,i,labs,nop} => I.JMP{r=R r,i=O i,labs=labs,nop=nop} + | I.JMPL{r,i,d,defs,uses,cutsTo,nop,mem} => + I.JMPL{r=R r,i=O i,d=d,defs=defs, + uses=CS.map {from=rs,to=rt} uses, + cutsTo=cutsTo,nop=nop,mem=mem} + | I.CALL{defs,uses,label,cutsTo,nop,mem} => + I.CALL{defs=defs,uses=CS.map {from=rs,to=rt} uses, + label=label,cutsTo=cutsTo,nop=nop,mem=mem} + | I.SAVE{r,i,d} => I.SAVE{r=R r,i=O i,d=d} + | I.RESTORE{r,i,d} => I.RESTORE{r=R r,i=O i,d=d} + | I.WRY{r,i} => I.WRY{r=R r,i=O i} + | I.Ticc{t,cc,r,i} => I.Ticc{t=t,cc=cc,r=R r,i=O i} + | _ => instr + (*esac*)) + in + case instr + of (I.ANNOTATION{i, ...}) => rewriteUse(i, rs, rt) + | I.LIVE{regs, spilled} => I.LIVE{regs=C.addReg(rt, C.rmvReg(rs, regs)), + spilled=spilled} + | I.INSTR(i) => I.INSTR(sparcUse(i)) + | I.COPY{k as CB.GP, sz, src,dst,tmp} => + I.COPY{k=k, sz=sz, src=map R src,dst=dst,tmp=EA tmp} + | _ => error "rewriteUse" + end + + fun rewriteDef(instr,rs,rt) = let + fun match r = CB.sameColor(r,rs) + fun R r = if match r then rt else r + fun ea(SOME(I.Direct r)) = SOME(I.Direct(R r)) + | ea x = x + fun sparcDef(instr) = + (case instr + of I.LOAD{l,r,i,d,mem} => I.LOAD{l=l,r=r,i=i,d=R d,mem=mem} + | I.ARITH{a,r,i,d} => I.ARITH{a=a,r=r,i=i,d=R d} + | I.SHIFT{s,r,i,d} => I.SHIFT{s=s,r=r,i=i,d=R d} + | I.SETHI{i,d} => I.SETHI{i=i,d=R d} + | I.MOVicc{b,i,d} => I.MOVicc{b=b,i=i,d=R d} + | I.MOVfcc{b,i,d} => I.MOVfcc{b=b,i=i,d=R d} + | I.MOVR{rcond,r,i,d} => I.MOVR{rcond=rcond,r=r,i=i,d=R d} + | I.JMPL{r,i,d,defs,uses,cutsTo,nop,mem} => + I.JMPL{r=r,i=i,d=R d,defs=CS.map {from=rs,to=rt} defs, + uses=uses,cutsTo=cutsTo,nop=nop,mem=mem} + | I.CALL{defs,uses,label,cutsTo,nop,mem} => + I.CALL{defs=CS.map {from=rs,to=rt} defs, + uses=uses,label=label,cutsTo=cutsTo,nop=nop,mem=mem} + | I.SAVE{r,i,d} => I.SAVE{r=r,i=i,d=R d} + | I.RESTORE{r,i,d} => I.RESTORE{r=r,i=i,d=R d} + | I.RDY{d} => I.RDY{d=R d} + | _ => instr + (*esac*)) + in + case instr + of I.ANNOTATION{i, ...} => rewriteDef(i, rs, rt) + | I.KILL{regs, spilled} => + I.KILL{regs=C.addReg(rt, C.rmvReg(rs, regs)), spilled=spilled} + | I.INSTR(i) => I.INSTR(sparcDef(i)) + | I.COPY{k as CB.GP, sz, src,dst,tmp} => + I.COPY{k=k, sz=sz, src=src, dst=map R dst,tmp=ea tmp} + | _ => error "rewriteDef" + end + + + fun frewriteUse(instr,rs,rt) = let + fun match r = CB.sameColor(r,rs) + fun R r = if match r then rt else r + fun sparcUse(instr) = + (case instr of + I.FPop1{a,r,d} => I.FPop1{a=a,r=R r,d=d} + | I.FPop2{a,r1,r2,d} => I.FPop2{a=a,r1=R r1,r2=R r2,d=d} + | I.FCMP{cmp,r1,r2,nop} => I.FCMP{cmp=cmp,r1=R r1,r2=R r2,nop=nop} + | I.FSTORE{s,r,i,d,mem} => I.FSTORE{s=s,r=r,i=i,d=R d,mem=mem} + | I.FMOVicc{sz,b,r,d} => I.FMOVicc{sz=sz,b=b,r=R r,d=R d} + | I.FMOVfcc{sz,b,r,d} => I.FMOVfcc{sz=sz,b=b,r=R r,d=R d} + | I.JMPL{r,i,d,defs,uses,cutsTo,nop,mem} => + I.JMPL{r=r,i=i,d=d,defs=defs, + uses=CS.map {from=rs,to=rt} uses, + cutsTo=cutsTo,nop=nop,mem=mem} + | I.CALL{defs,uses,label,cutsTo,nop,mem} => + I.CALL{defs=defs,uses=CS.map {from=rs,to=rt} uses, + label=label,cutsTo=cutsTo,nop=nop,mem=mem} + | _ => instr + (*esac*)) + in + case instr + of I.ANNOTATION{i, ...} => frewriteUse(i, rs, rt) + | I.INSTR(i) => I.INSTR(sparcUse(i)) + | I.LIVE{regs, spilled} => + I.LIVE{regs=C.addFreg(rt, C.rmvFreg(rs, regs)), spilled=spilled} + | I.COPY{k as CB.FP, sz, src,dst,tmp} => + I.COPY{k=k, sz=sz, src=map R src,dst=dst,tmp=tmp} + | _ => error "frewriteUse" + + end + + + fun frewriteDef(instr,rs,rt) = let + fun match r = CB.sameColor(r,rs) + fun R r = if match r then rt else r + fun ea(SOME(I.FDirect r)) = SOME(I.FDirect(R r)) + | ea x = x + fun sparcDef(instr) = + (case instr of + I.FPop1{a,r,d} => I.FPop1{a=a,r=r,d=R d} + | I.FPop2{a,r1,r2,d} => I.FPop2{a=a,r1=r1,r2=r2,d=R d} + | I.FLOAD{l,r,i,d,mem} => I.FLOAD{l=l,r=r,i=i,d=R d,mem=mem} + | I.FMOVicc{sz,b,r,d} => I.FMOVicc{sz=sz,b=b,r=r,d=R d} + | I.FMOVfcc{sz,b,r,d} => I.FMOVfcc{sz=sz,b=b,r=r,d=R d} + | I.JMPL{r,i,d,defs,uses,cutsTo,nop,mem} => + I.JMPL{r=r,i=i,d=d,defs=CS.map {from=rs,to=rt} defs, + uses=uses,cutsTo=cutsTo,nop=nop,mem=mem} + | I.CALL{defs,uses,label,cutsTo,nop,mem} => + I.CALL{defs=CS.map {from=rs,to=rt} defs, + uses=uses,label=label,cutsTo=cutsTo,nop=nop,mem=mem} + | _ => instr + (*esac*)) + in + case instr + of I.ANNOTATION{i, ...} => frewriteDef(i, rs, rt) + | I.KILL{regs, spilled} => + I.KILL{regs=C.addFreg(rt, C.rmvFreg(rs, regs)), spilled=spilled} + | I.INSTR(i) => I.INSTR(sparcDef(i)) + | I.COPY{k as CB.FP, sz, src,dst,tmp} => + I.COPY{k=k, sz=sz, src=src,dst=map R dst,tmp=ea tmp} + | _ => error "frewriteUse" + + end +end + diff --git a/MLRISC/sparc/ra/sparcSpillInstr.sml b/MLRISC/sparc/ra/sparcSpillInstr.sml new file mode 100644 index 0000000..6cf39e0 --- /dev/null +++ b/MLRISC/sparc/ra/sparcSpillInstr.sml @@ -0,0 +1,86 @@ +(* sparcSpillInstr.sml + * + * COPYRIGHT (c) 2002 Bell Labs, Lucent Technologies + * + * Sparc instructions to emit when spilling an instruction. + *) + +functor SparcSpillInstr(Instr : SPARCINSTR) : ARCH_SPILL_INSTR = struct + structure I = Instr + structure C = I.C + structure Rewrite = SparcRewrite(I) + structure CB = CellsBasis + + fun error msg = MLRiscErrorMsg.error ("SparcSpillInstr", msg) + + fun storeAtEA CB.GP (reg, I.Displace{base, disp, mem}) = + I.store{s=I.ST, r=base, i=I.LAB disp, d=reg, mem=mem} + | storeAtEA CB.FP (reg, I.Displace{base, disp, mem}) = + I.fstore{s=I.STDF, r=base, i=I.LAB disp, d=reg, mem=mem} + | storeAtEA _ _ = error "storeAtEA" + + fun loadFromEA CB.GP (reg, I.Displace{base, disp, mem}) = + I.load{l=I.LD, d=reg, r=base, i=I.LAB disp, mem=mem} + | loadFromEA CB.FP (reg, I.Displace{base, disp, mem}) = + I.fload{l=I.LDDF, d=reg, r=base, i=I.LAB disp, mem=mem} + | loadFromEA _ _ = error "loadFromEA" + + + fun spillToEA ck reg_ea = + {code=[storeAtEA ck reg_ea], proh=[], newReg=NONE} + + fun reloadFromEA ck reg_ea = + {code=[loadFromEA ck reg_ea], proh=[], newReg=NONE} + + (* spill a register to spillLoc *) + fun spillR (instr, reg, spillLoc) = let + val newR = C.newReg() + val instr' = Rewrite.rewriteDef(instr, reg, newR) + in + {code=[instr', storeAtEA CB.GP (newR, spillLoc)], + proh=[newR], + newReg=SOME newR} + end + + fun spillF (instr, reg, spillLoc) = let + val newR = C.newFreg() + val instr' = Rewrite.frewriteDef(instr, reg, newR) + in + {code=[instr', storeAtEA CB.FP (newR, spillLoc)], + proh=[newR], + newReg=SOME newR} + end + + + + + (* reload a register from spillLoc *) + fun reloadR(instr, reg, spillLoc) = let + val newR = C.newReg() + val instr' = Rewrite.rewriteUse(instr, reg, newR) + in + {code=[loadFromEA CB.GP (newR, spillLoc), instr'], + proh=[newR], + newReg=SOME newR} + end + + fun reloadF(instr, reg, spillLoc) = let + val newR = C.newFreg() + val instr' = Rewrite.frewriteUse(instr, reg, newR) + in + {code=[loadFromEA CB.FP (newR, spillLoc), instr'], + proh=[newR], + newReg=SOME newR} + end + + + + fun spill CellsBasis.GP = spillR + | spill CellsBasis.FP = spillF + | spill _ = error "spill" + + fun reload CellsBasis.GP = reloadR + | reload CellsBasis.FP = reloadF + | reload _ = error "reload" +end + diff --git a/MLRISC/sparc/sparc.mdl b/MLRISC/sparc/sparc.mdl new file mode 100644 index 0000000..5a2dc39 --- /dev/null +++ b/MLRISC/sparc/sparc.mdl @@ -0,0 +1,863 @@ +(* + * This has been upgraded to V9. + *) + +architecture Sparc = +struct + + superscalar + + big endian + + lowercase assembly + + storage + GP = $r[32] of 64 bits where $r[0] = 0 + asm: (fn (r,_) => + if r < 8 then "%g"^Int.toString r + else if r = 14 then "%sp" + else if r < 16 then "%o"^Int.toString(r-8) + else if r < 24 then "%l"^Int.toString(r-16) + else if r = 30 then "%fp" + else if r < 32 then "%i"^Int.toString(r-24) + else "%r"^Int.toString r + ) + | FP = $f[32] of 32 bits asm: (fn (f,_) => "%f"^Int.toString f) + | Y = $y[1] of 64 bits asm: "%y" + | PSR = $psr[1] of 64 bits + asm: (fn (0,_) => "%psr" + | (n,_) => "%psr"^Int.toString n) + | FSR = $fsr[1] of 64 bits + asm: (fn (0,_) => "%fsr" + | (n,_) => "%fsr"^Int.toString n) + | CC = $cc[] of 64 bits aliasing GP asm: "%cc" + | MEM = $m[] of 8 aggregable bits asm: (fn (r,_) => "m"^Int.toString r) + | CTRL = $ctrl[] asm: (fn (r,_) => "ctrl"^Int.toString r) + + locations + stackptrR = $r[14] (* %o6 = %sp *) + and frameptrR = $r[30] (* %i6 = %fp *) + and asmTmpR = $r[10] (* %o2 *) + and linkReg = $r[15] + and fasmTmp = $f[30] + and y = $y[0] + and psr = $psr[0] + and fsr = $fsr[0] + and r0 = $r[0] + + structure RTL = + struct + include "Tools/basis.mdl" + open Basis + infix 1 || + infix 3 << >> ~>> + + fun %% l = (l : #64 bits) + + (* Updates condition code *) + fun cc{} = Kill $psr[0] + + fun byte x = (x : #8 bits) + fun hword x = (x : #16 bits) + fun word x = (x : #32 bits) + fun dword x = (x : #64 bits) + fun single x = (x : #32 bits) + fun double x = (x : #64 bits) + fun quad x = (x : #128 bits) + + fun disp(r,i) = $r[r] + i + + (* read from/write to the y register *) + rtl RDY{d} = $r[d] := $y[0] + rtl WRY{r,i} = $y[0] := disp(r,i) + + rtl SETHI{i,d} = $r[d] := i << 10 + + (* Integer load/store *) + rtl LDSB{r,i,d,mem} = $r[d] := sx(byte $m[disp(r,i):mem]) + rtl LDSH{r,i,d,mem} = $r[d] := sx(hword $m[disp(r,i):mem]) + rtl LDUB{r,i,d,mem} = $r[d] := zx(byte $m[disp(r,i):mem]) + rtl LDUH{r,i,d,mem} = $r[d] := zx(hword $m[disp(r,i):mem]) + rtl LD{r,i,d,mem} = $r[d] := zx(word $m[disp(r,i):mem]) + rtl LDX{r,i,d,mem} = $r[d] := zx(quad $m[disp(r,i):mem]) + rtl STB{r,i,d,mem} = $m[disp(r,i):mem] := $r[d] at [0..7] + rtl STH{r,i,d,mem} = $m[disp(r,i):mem] := $r[d] at [0..15] + rtl ST{r,i,d,mem} = $m[disp(r,i):mem] := $r[d] at [0..31] + rtl STX{r,i,d,mem} = $m[disp(r,i):mem] := $r[d] at [0..63] + + (* Integer opcodes *) + + (* These are built-in sparc bitops *) + fun andn(x,y) = andb(x,notb y) + fun orn(x,y) = orb(x,notb y) + fun xnor(x,y) = notb(xorb(x,y)) + + (* Tagged additions operators. We just fake these by + * generating new operators. + *) + rtl tadd taddtv tsub tsubtv : #n bits * #n bits -> #n bits + + fun multiply opc {r,i,d} = + $r[d] := opc($r[r],i) || + Kill $y[0] + fun multiplycc opc {r,i,d} = multiply opc {r,i,d} || cc{} + fun divide opc {r,i,d} = + $r[d] := opc($r[r],i) (* XXX *) + fun dividecc opc {r,i,d} = divide opc {r,i,d} || cc{} + + fun logical opc {r,i,d} = $r[d] := opc($r[r],i) + fun logicalcc opc {r,i,d} = $r[d] := opc($r[r],i) || cc{} + fun arith opc {r,i,d} = $r[d] := opc($r[r],i) + fun arithcc opc {r,i,d} = $r[d] := opc($r[r],i) || cc{} + + rtl li{i,d} = $r[d] := i (* load immediate *) + + rtl [AND,ANDN,OR,ORN,XOR,XNOR] = + map logical [andb, andn, orb, orn, xorb, xnor] + + rtl [ANDCC, ANDNCC, ORCC, ORNCC, XORCC, XNORCC] = + map logicalcc [andb, andn, orb, orn, xorb, xnor] + + rtl [ADD, TADD, TADDTV, SUB, TSUB, TSUBTV] = + map arith [(+), tadd, taddtv, (-), tsub, tsubtv] + + rtl [ADDCC, TADDCC, TADDTVCC, SUBCC, TSUBCC, TSUBTVCC] = + map arithcc [(+), tadd, taddtv, (-), tsub, tsubtv] + + rtl [UMUL,SMUL] = map multiply [mulu,muls] + rtl [UMULCC,SMULCC] = map multiplycc [mulu,muls] + rtl [UDIV,SDIV] = map divide [divu,divs] + rtl [UDIVCC,SDIVCC] = map dividecc [divu,divs] + + rtl [MULX, SDIVX, UDIVX] = map arith [muls, divs, divu] + rtl [SLL, SRL, SRA] = map logical [(<<), (>>), (~>>)] (* XXX *) + rtl [SLLX, SRLX, SRAX] = map logical [(<<), (>>), (~>>)] (* XXX *) + + local fun xor(a, b) = a == b + (* Extract bits from the $psr *) + val N = ($psr[0] at [23]) == 1 + val Z = ($psr[0] at [22]) == 1 + val V = ($psr[0] at [21]) == 1 + val C = ($psr[0] at [20]) == 1 + in val [A, E, LE, L, LEU, CS, NEG, VS, (* XXX *) + N, NE, G, GE, GU, CC, POS, VC] = + [true, Z, Z, Z, Z, C, N, V, + false, not Z, Z, Z, Z, not C,not N,not V + ] + end + + val integer_tests = + [N, E, LE, L, LEU, CS, NEG, VS, + A, NE, G, GE, GU, CC, POS, VC] + + (* Integer branches *) + fun branch status {label} = if status then Jmp(%%label) else () + rtl [BN, BE, BLE, BL, BLEU, BCS, BNEG, BVS, + BA, BNE, BG, BGE, BGU, BCC, BPOS, BVC] = + map branch integer_tests + + rtl JMP{r,i} = Jmp(disp(r,i)) + + (* Conditional moves *) + fun MOVicc icc {i, d} = if icc then $r[d] := i else () + fun FMOVicc icc {r, d} = if icc then $f[d] := $f[r] else () + + val MOV ^^ + [E, LE, L, LEU, CS, NEG, VS, + NE, G, GE, GU, CC, POS, VC] = + map MOVicc + [E, LE, L, LEU, CS, NEG, VS, + NE, G, GE, GU, CC, POS, VC] + val FMOV ^^ + [E, LE, L, LEU, CS, NEG, VS, + NE, G, GE, GU, CC, POS, VC] = + map FMOVicc + [E, LE, L, LEU, CS, NEG, VS, + NE, G, GE, GU, CC, POS, VC] + + fun MOVR rcc {r, i, d} = if rcc($r[r], 0) then $r[d] := i else () + rtl MOVR ^^ [Z, LEZ, LZ, NZ, GZ, GEZ] = + map MOVR [(==), (<=), (<), (<>), (>), (>=)] + + (* Floating point load/store *) + rtl LDF{r,i,d,mem} = $f[d] := $m[disp(r,i):mem] + rtl LDDF{r,i,d,mem} = $f[d] := $m[disp(r,i):mem] + rtl LDQF{r,i,d,mem} = $f[d] := $m[disp(r,i):mem] + rtl STF{r,i,d,mem} = $m[disp(r,i):mem] := $f[d] + rtl STDF{r,i,d,mem} = $m[disp(r,i):mem] := $f[d] + + rtl LDFSR{r,i,mem} = $fsr[0] := $m[disp(r,i):mem] (* XXX *) + rtl LDXFSR{r,i,mem} = $fsr[0] := $m[disp(r,i):mem] (* XXX *) + rtl STFSR{r,i,mem} = $m[disp(r,i):mem] := $fsr[0] (* XXX *) + + (* conversions *) + rtl fitos fitod fitoq fstoi fdtoi fqtoi fsqrt + fstod fstoq fdtos fdtoq fqtos fqtod : #n bits -> #n bits + + fun fmovs x = x + fun fmovd x = x + fun fmovq x = x + + fun funary opc {r,d} = $f[d] := opc $f[r] + + (* Floating point unary operations *) + rtl [FiTOs, FiTOd, FiTOq, FsTOi, FdTOi, FqTOi, + FsTOd, FsTOq, FdTOs, FdTOq, FqTOs, FqTOd, + FMOVs, FNEGs, FABSs, FMOVd, FNEGd, FABSd, + FMOVq, FNEGq, FABSq, FSQRTs, FSQRTd, FSQRTq] = (* XXX *) + map funary + [fitos, fitod, fitoq, fstoi, fdtoi, fqtoi, + fstod, fstoq, fdtos, fdtoq, fqtos, fqtod, + fmovs, fneg, fabs, fmovd, fneg, fabs, + fmovq, fneg, fabs, fsqrt, fsqrt, fsqrt] + + (* Floating point binary operations *) + fun fbinary opc {r1,r2,d} = $f[d] := opc($f[r1],$f[r2]) + rtl fsmuld fdmulq : #n bits * #n bits -> #n bits (* XXX *) + rtl [FADDs, FADDd, FADDq, FSUBs, FSUBd, FSUBq, (* XXX *) + FMULs, FMULd, FMULq, FsMULd, FdMULq, + FDIVs, FDIVd, FDIVq] = + map fbinary + [fadd, fadd, fadd, fsub, fsub, fsub, + fmul, fmul, fmul, fsmuld, fdmulq, + fdiv, fdiv, fdiv] + + (* Floating point comparisons *) + rtl Nan : #32 bits -> #32 bits + fun nan(r) = (* if Nan($f[r]) == 0 then () else () *) () + rtl cmps cmpd cmpq : #n bits * #n bits -> #n bits + rtl FCMPs{r1,r2} = $fsr[0] := cmps($f[r1],$f[r2]) + rtl FCMPd{r1,r2} = $fsr[0] := cmpd($f[r1],$f[r2]) + rtl FCMPq{r1,r2} = $fsr[0] := cmpq($f[r1],$f[r2]) + rtl FCMPEs{r1,r2} = $fsr[0] := cmps($f[r1],$f[r2]) || nan(r1) || nan(r2) + rtl FCMPEd{r1,r2} = $fsr[0] := cmpd($f[r1],$f[r2]) || nan(r1) || nan(r2) + rtl FCMPEq{r1,r2} = $fsr[0] := cmpq($f[r1],$f[r2]) || nan(r1) || nan(r2) + + local val X = $fsr[0] == 0 + in val floating_point_tests as + [FN, FNE, FLG, FUL, FL, FUG, FG, FU, + FA, FE, FUE, FGE, FUGE, FLE, FULE, FO] = + [X, X, X, X, X, X, X, X, + X, X, X, X, X, X, X, X + ] + end + fun fbranch fcc {label} = if fcc then Jmp(%%label) else () + rtl [FBN, FBNE, FBLG, FBUL, FBL, FBUG, FBG, FBU, + FBA, FBE, FBUE, FBGE, FBUGE, FBLE, FBULE, FBO] = + map fbranch floating_point_tests + + (* Floating point conditional moves *) + fun MOVfcc fcc {i, d} = if fcc then $r[d] := i else () + fun FMOVfcc fcc { r, d} = if fcc then $f[d] := $f[r] else () + + rtl MOV ^^ [N, NE, LG, UL, L, UG, G, U, + A, E, UE, GE, UGE, LE, ULE, O] = + map MOVfcc floating_point_tests + + rtl FMOV ^^ [N, NE, LG, UL, L, UG, G, U, + A, E, UE, GE, UGE, LE, ULE, O] = + map FMOVfcc floating_point_tests + + (* Traps *) + fun Trap x = Jmp x (* XXX *) + fun Ticc cc {r,i} = if cc then Trap(disp(r,i)) else () + fun Txcc cc {r,i} = if cc then Trap(disp(r,i)) else () + + rtl TICC ^^ [BN,BE, BLE,BL, BLEU,BCS,BNEG,BVS, + BA,BNE,BG, BGE,BGU, BCC,BPOS,BVC] = + map Ticc integer_tests + rtl TXCC ^^ [BN,BE, BLE,BL, BLEU,BCS,BNEG,BVS, + BA,BNE,BG, BGE,BGU, BCC,BPOS,BVC] = + map Txcc integer_tests + + (* Jmps, calls and returns *) + rtl JMP{r,i} = Jmp(disp(r,i)) + rtl JMPL{r,i,d,defs,uses} = + Call(disp(r,i)) || + $r[d] := ??? || + Kill $cellset[defs] || + Use $cellset[uses] + rtl CALL{label,defs,uses} = + Call(%%label) || + Kill $cellset[defs] || + Use $cellset[uses] + rtl RET{} = Ret + + end (* RTL *) + + structure Instruction = + struct + datatype load : op3! = LDSB 0b001001 + | LDSH 0b001010 + | LDUB 0b000001 + | LDUH 0b000010 + | LD 0b000000 + | LDX 0b001011 (* v9 *) + | LDD 0b000011 + datatype store : op3! = STB 0b000101 + | STH 0b000110 + | ST 0b000100 + | STX 0b001110 (* v9 *) + | STD 0b000111 + datatype fload : op3! = LDF 0b100000 + | LDDF 0b100011 + | LDQF 0b100010 (* v9 *) + | LDFSR 0b100001 (* rd = 0 *) + | LDXFSR 0b100001 (* v9 *) (* rd = 1 *) + datatype fstore : op3! = STF 0b100100 + | STDF 0b100111 + | STFSR 0b100101 + datatype arith : op3! = AND 0b000001 + | ANDCC 0b010001 + | ANDN 0b000101 + | ANDNCC 0b010101 + | OR 0b000010 + | ORCC 0b010010 + | ORN 0b000110 + | ORNCC 0b010110 + | XOR 0b000011 + | XORCC 0b010011 + | XNOR 0b000111 + | XNORCC 0b010111 + | ADD 0b000000 + | ADDCC 0b010000 + | TADD 0b100000 + | TADDCC 0b110000 + | TADDTV 0b100010 + | TADDTVCC 0b110010 + | SUB 0b000100 + | SUBCC 0b010100 + | TSUB 0b100001 + | TSUBCC 0b110001 + | TSUBTV 0b100011 + | TSUBTVCC 0b110011 + | UMUL 0b001010 + | UMULCC 0b011010 + | SMUL 0b001011 + | SMULCC 0b011011 + | UDIV 0b001110 + | UDIVCC 0b011110 + | SDIV 0b001111 + | SDIVCC 0b011111 + (* v9 extensions *) + | MULX 0b001001 + | SDIVX 0b101101 + | UDIVX 0b001101 + (* op3, x *) + datatype shift : op3! = SLL (0wb100101,0w0) + | SRL (0wb100110,0w0) + | SRA (0wb100111,0w0) + (* v9 extensions *) + | SLLX (0wb100101,0w1) + | SRLX (0wb100110,0w1) + | SRAX (0wb100111,0w1) + datatype farith1 : opf! = FiTOs 0b011000100 + | FiTOd 0b011001000 + | FiTOq 0b011001100 + | FsTOi 0b011010001 + | FdTOi 0b011010010 + | FqTOi 0b011010011 + | FsTOd 0b011001001 + | FsTOq 0b011010101 + | FdTOs 0b011000110 + | FdTOq 0b011001110 + | FqTOs 0b011000111 + | FqTOd 0b011001011 + | FMOVs 0b000000001 + | FNEGs 0b000000101 + | FABSs 0b000001001 + | FMOVd | FNEGd | FABSd (* composite instr *) + | FMOVq | FNEGq | FABSq (* composite instr *) + | FSQRTs 0b000101001 + | FSQRTd 0b000101010 + | FSQRTq 0b000101011 + datatype farith2 :opf! = FADDs 0b001000001 + | FADDd 0b001000010 + | FADDq 0b001000011 + | FSUBs 0b001000101 + | FSUBd 0b001000110 + | FSUBq 0b001000111 + | FMULs 0b001001001 + | FMULd 0b001001010 + | FMULq 0b001001011 + | FsMULd 0b001101001 + | FdMULq 0b001101110 + | FDIVs 0b001001101 + | FDIVd 0b001001110 + | FDIVq 0b001001111 + datatype fcmp : opf! = FCMPs 0b001010001 + | FCMPd 0b001010010 + | FCMPq 0b001010011 + | FCMPEs 0b001010101 + | FCMPEd 0b001010110 + | FCMPEq 0b001010111 + + datatype branch [0..15] : cond! = + BN "n" + | BE "e" + | BLE "le" + | BL "l" + | BLEU "leu" + | BCS "cs" + | BNEG "neg" + | BVS "vs" + | BA "" + | BNE "ne" + | BG "g" + | BGE "ge" + | BGU "gu" + | BCC "cc" + | BPOS "pos" + | BVC "vs" + + datatype rcond! = (* V9 integer conditions *) + RZ 0b001 + | RLEZ 0b010 + | RLZ 0b011 + | RNZ 0b101 + | RGZ 0b110 + | RGEZ 0b111 + + datatype cc = (* V9 condition register *) + ICC 0b00 + | XCC 0b10 + + datatype prediction! = (* V9 branch prediction bit *) + PT | PN + + datatype fbranch [0..15] : cond! = + FBN + | FBNE + | FBLG + | FBUL + | FBL + | FBUG + | FBG + | FBU + | FBA "fb" + | FBE + | FBUE + | FBGE + | FBUGE + | FBLE + | FBULE + | FBO + + datatype ea = Direct of $GP + | FDirect of $GP + | Displace of {base: $GP, disp: T.labexp, mem: Region.region} + + (* used to encode the opf_low field V9 *) + datatype fsize! = S 0b00100 + | D 0b00110 + | Q 0b00111 + + datatype operand = + REG of $GP ``'' rtl: $r[GP] + | IMMED of int ``'' rtl: immed int + | LAB of T.labexp ``'' rtl: labexp + | LO of T.labexp ``%lo()'' rtl: lo(labexp) + | HI of T.labexp ``%hi()'' rtl: hi(labexp) + + type addressing_mode = CellsBasis.cell * operand + + end (* Instruction *) + + functor Assembly(val V9 : bool) = + struct + (* Some helper functions for assembly generation *) + fun emit_leaf false = () | emit_leaf true = emit "l" + fun emit_nop false = () | emit_nop true = emit "\n\tnop" + fun emit_a false = () | emit_a true = emit ",a" + fun emit_cc false = () | emit_cc true = emit "cc" + end + + instruction formats 32 bits + (* Extract the value of an operand *) + opn{i} = + let fun hi22 w = (itow w) ~>> 0w10 + fun lo10 w = (itow w) at [0..9] + in case i of + I.REG rs2 => error "opn" + | I.IMMED i => itow i + | I.LAB l => itow(MLTreeEval.valueOf l) + | I.LO l => lo10(MLTreeEval.valueOf l) + | I.HI l => hi22(MLTreeEval.valueOf l) + end + + (* basic formats, integer source registers, target type not determined.*) + | rr {op1:2, rd:5, op3:6, rs1:GP 5, i:1=0, asi:8=0, rs2:GP 5} + | ri {op1:2, rd:5, op3:6, rs1:GP 5, i:1=1, simm13:signed 13} + | rix{op1,op3,r,i,d} = + (case i of + I.REG rs2 => rr{op1,op3,rs1=r,rs2=rs2,rd=d} + | _ => ri{op1,op3,rs1=r,rd=d,simm13=opn{i}} + ) + + (* GP + imm/GP -> GP *) + | rir{op1,op3,r,i,d:GP} = rix{op1,op3,r,i,d} + (* GP + imm/GP -> FP *) + | rif{op1,op3,r,i,d:FP} = rix{op1,op3,r,i,d} + + (* formats found in the Sparc architecture manual *) + | load{l:load,r,i,d} = rir{op1=0w3,op3=l,r,i,d} (* p90 *) + | store{s:store,r,i,d} = rir{op1=0w3,op3=s,r,i,d} (* p95 *) + | fload{l:fload,r,i,d} = rif{op1=0w3,op3=l,r,i,d} (* p92 *) + | fstore{s:fstore,r,i,d} = rif{op1=0w3,op3=s,r,i,d} (* p97 *) + | sethi {op1:2=0, rd:GP 5, op2:3=0b100, imm22:int signed 22} (* p104 *) + | NOP {op1:2=0, rd:5=0, op2:3=0b100, imm22:22=0} (* p105 *) + | unimp {op1:2=0, rd:5=0, op2:3=0, const22:int unsigned 22} (* p137 *) + | delay {nop} = if nop then NOP{} else () (* delay slot *) + | arith {a:arith,r,i,d} = (* p106 *) + rir{op1=0w2,op3=a,r,i,d} + + | shiftr {op1:2=2, rd:5, op3:6, rs1:5, i:1=0, x:1, asi:7=0, rs2:GP 5} + | shifti {op1:2=2, rd:5, op3:6, rs1:5, i:1=1, x:1, asi:6=0, cnt:signed 6} + | shift {s:shift,r:GP,i,d:GP} = + let val (op3,x) = s + in case i of + I.REG rs2 => shiftr{op3,rs1=r,rs2=rs2,rd=d,x=x} (* p218 v9 *) + | _ => shifti{op3,rs1=r,cnt=opn{i},rd=d,x=x} + end + | save {r,i,d} = rir{op1=0w2,op3=0wb111100,r,i,d} (* p117 *) + | restore {r,i,d} = rir{op1=0w2,op3=0wb111101,r,i,d} (* p117 *) + | bicc{op1:2=0,a:bool 1, b:branch 4, op2:3=0b010, disp22:signed 22} + | fbfcc{op1:2=0,a:bool 1, b:fbranch 4, op2:3=0b110, disp22:signed 22} + | call {op1:2=1, disp30:signed 30} (* p125 *) + | jmpl {r,i,d} = rir{op1=0w2,op3=0wb111000,r,i,d} (* p126 *) + | jmp {r,i} = rix{op1=0w2,op3=0wb111000,r,i,d=0w0} + + | ticcr {op1:2, rd:5, op3:6, rs1:GP 5, i:1=0, cc:cc 2, _:6=0, rs2:GP 5} + | ticci {op1:2, rd:5, op3:6, rs1:GP 5, i:1=1, cc:cc 2, _:4=0, + sw_trap:signed 7} + | ticcx{op1,op3,cc,r,i,d} = + (case i of + I.REG rs2 => ticcr{op1,op3,cc,rs1=r,rs2=rs2,rd=d} + | _ => ticci{op1,op3,cc,rs1=r,rd=d,sw_trap=opn{i}} + ) + | ticc {t:branch,cc,r,i} = + ticcx{op1=0w2,d=t,op3=0wb111010,cc,r,i} (* p237 (V9) *) + + | rdy {op2:2=2,d:GP 5,op3:6=0b101000,rs1:5=0,x:0..13=0} (* p131 *) + | wdy {r,i} = rix{op1=0w2,op3=0wb110000,r,i,d=0w0} (* p133 *) + + (* one input floating point format *) + | fop_1 {op1:2=2, d:5, op3:6=0b110100, rs1:5=0, a:9, r:5} + | fop1 {a:farith1,r:FP,d:FP} = fop_1{a,r,d} + + (* generate composite instruction *) + | fdouble{a:farith1,r:FP,d:FP} = + (fop_1{a,r,d}; + fop_1{a=0w1,r=r+0w1,d=d+0w1} + ) + | fquad{a:farith1,r:FP,d:FP} = + (fop_1{a,r,d}; + fop_1{a=0w1,r=r+0w1,d=d+0w1}; + fop_1{a=0w1,r=r+0w2,d=d+0w2}; + fop_1{a=0w1,r=r+0w3,d=d+0w3} + ) + + (* two inputs floating point format *) + | fop2 {op1:2=2, d:FP 5, op3:6=0b110100, r1:FP 5, a:farith2 9, r2:FP 5} + | fcmp {op1:2=2, rd:25..29=0, op3:6=0b110101, rs1:FP 5, opf:fcmp 9,rs2:FP 5} + + (* conditional moves formats (V9) *) + | cmovr{op1:2=2,op3:6,rd:5,cc2:1,cond:4,i:1=0,cc1:1,cc0:1,_:6=0,rs2:5} + | cmovi{op1:2=2,op3:6,rd:5,cc2:1,cond:4,i:1=1,cc1:1,cc0:1,simm11:signed 11} + | cmov{op3,cond,cc2,cc1,cc0,i,rd} = + (case i of + I.REG rs2 => cmovr{op3,cond,rs2=emit_GP rs2,rd,cc0,cc1,cc2} + | _ => cmovi{op3,cond,rd,cc0,cc1,cc2,simm11=opn{i}} + ) + + | movicc {b:branch,i,d:GP} = + cmov{op3=0wb101100,cond=b,i,rd=d,cc2=0w1,cc1=0w0,cc0=0w0} + | movfcc {b:fbranch,i,d:GP} = (* use fcc0 *) + cmov{op3=0wb101100,cond=b,i,rd=d,cc2=0w0,cc1=0w0,cc0=0w0} + | fmovicc{sz:fsize,b:branch,r:FP,d:FP} = + cmovr{op3=0wb101100,cond=b,rs2=r,rd=d,cc2=0w1,cc1=0w0,cc0=0w0} + | fmovfcc{sz:fsize,b:fbranch,r:FP,d:FP} = (* use fcc0 *) + cmovr{op3=0wb101100,cond=b,rs2=r,rd=d,cc2=0w0,cc1=0w0,cc0=0w0} + + (* move integer register on register condition format *) + | movrr {op1:2=2, rd:GP 5, op3:6=0b101111, rs1:GP 5, i:1=0, rcond:3, + asi:5=0, rs2:GP 5} + | movri {op1:2=2, rd:GP 5, op3:6=0b101111, rs1:GP 5, i:1=1, rcond:3, + simm10:signed 10} + | movr{rcond:rcond,r,i,d} = + (case i of + I.REG rs2 => movrr{rcond,rs1=r,rs2=rs2,rd=d} + | _ => movri{rcond,rs1=r,rd=d,simm10=opn{i}} + ) + + structure MC = + struct + (* this computes the displacement address *) + fun disp label = itow((Label.addrOf label - !loc)) ~>> 0w2 + val r15 = C.Reg CellsBasis.GP 15 + and r31 = C.Reg CellsBasis.GP 31 + end + + + (* + * Reservation tables and pipeline definitions for scheduling + *) + + (* Function units *) + resource issue and mem and alu and falu and fmul and fdiv and branch + + (* Different implementations of cpus *) + cpu default 2 [2 issue, 2 mem, 1 alu] (* 2 issue machine *) + + (* Definitions of various reservation tables *) + pipeline NOP _ = [issue] + and ARITH _ = [issue^^alu] + and LOAD _ = [issue^^mem] + and STORE _ = [issue^^mem,mem,mem] + and FARITH _ = [issue^^falu] + and FMUL _ = [issue^^fmul,fmul] + and FDIV _ = [issue^^fdiv,fdiv*50] + and BRANCH _ = [issue^^branch] + + (* + * Notation: + * r -- source register + * i -- source operand (immed or register) + * d -- destination register (or data register in store instructions) + *) + instruction + LOAD of { l:load, d: $GP, r: $GP, i:operand, mem:Region.region } + asm: ``\t[+], '' + mc: load{l,r,i,d} + rtl: ``'' + latency: 1 + + | STORE of { s:store, d: $GP, r: $GP, i:operand, mem:Region.region } + asm: ``\t, [+]'' + mc: store{s,r,i,d} + rtl: ``'' + + | FLOAD of { l:fload, r: $GP, i:operand, d: $FP, mem:Region.region } + asm: ``\t[+], '' + mc: fload{l,r,i,d} + rtl: ``'' + latency: 1 + + | FSTORE of { s:fstore, d: $FP, r: $GP, i:operand, mem:Region.region } + asm: ``\t[+], '' + mc: fstore{s,r,i,d} + rtl: ``'' + + | UNIMP of { const22: int } + asm: ``unimp '' + mc: unimp{const22} + + | SETHI of { i:int, d: $GP } + asm: let val i = Word32.toString(Word32.<<(Word32.fromInt i,0w10)) + in ``sethi\t%hi(0x), '' + end + mc: sethi{imm22=i,rd=d} + rtl: ``SETHI'' + + | ARITH of { a:arith, r: $GP, i:operand, d: $GP } + asm: (case (a,CellsBasis.registerId r,CellsBasis.registerId d, i) of + (* generate abbreviations! *) + (I.OR,0,_,I.REG _) => ``mov\t, '' + | (I.OR,0,_,_) => ``set\t, '' + | (I.SUBCC,_,0,_) => ``cmp\t, '' + | _ => ``\t, , '' + ) + mc: arith{a,r,i,d} + rtl: (case (a,CellsBasis.registerId r) of + (I.OR, 0) => ``

  • '' + | _ => ``'' + ) + + | SHIFT of { s:shift, r: $GP, i:operand, d: $GP } + asm: ``\t, , '' + mc: shift{s,r,i,d} + rtl: ``'' + + (* Conditional moves! *) + | MOVicc of {b:branch, i:operand, d: $GP } (* V9 *) + asm: ``mov\t, '' + mc: movicc{b,i,d} + + | MOVfcc of {b:fbranch, i:operand, d: $GP } (* V9 *) + asm: ``mov\t, '' + mc: movfcc{b,i,d} + + | MOVR of {rcond:rcond, r: $GP, i: operand, d: $GP} (* V9 *) + asm: ``movr\t, , '' + mc: movr{rcond,r,i,d} + + | FMOVicc of {sz:fsize, b:branch, r: $FP, d: $FP } (* V9 *) + asm: ``fmov\t, '' + mc: fmovicc{sz,b,r,d} + + | FMOVfcc of {sz:fsize, b:fbranch, r: $FP, d: $FP } (* V9 *) + asm: ``fmov\t, '' + mc: fmovfcc{sz,b,r,d} + + | Bicc of { b:branch, a:bool, label:Label.label, nop:bool} + asm: ``b\t\t

    \t,

    \t%cc, \t, '' + else + case a of + I.FMOVd => g("fmovs",r,d) + | I.FNEGd => g("fnegs",r,d) + | I.FABSd => g("fabss",r,d) + | I.FMOVq => h("fmovs",r,d) + | I.FNEGq => h("fnegs",r,d) + | I.FABSq => h("fabss",r,d) + | _ => ``\t, '' + end + mc: (case a of + (* composite instructions *) + I.FMOVd => fdouble{a=I.FMOVs,r,d} + | I.FNEGd => fdouble{a=I.FNEGs,r,d} + | I.FABSd => fdouble{a=I.FABSs,r,d} + | I.FMOVq => fquad{a=I.FMOVs,r,d} + | I.FNEGq => fquad{a=I.FNEGs,r,d} + | I.FABSq => fquad{a=I.FABSs,r,d} + | _ => fop1{a,r,d} + ) + rtl: ``'' + + | FPop2 of { a:farith2, r1: $FP, r2: $FP, d: $FP } + asm: ``\t, , '' + mc: fop2{a,r1,r2,d} + rtl: ``'' + + | FCMP of { cmp:fcmp, r1: $FP, r2: $FP, nop:bool } + asm: ``\t, '' + mc: (fcmp{opf=cmp,rs1=r1,rs2=r2}; delay{nop}) + rtl: ``'' + padding: nop = true + nullified: false + delayslot candidate: false + latency: 1 + + | SAVE of {r: $GP, i:operand, d: $GP} + asm: ``save\t, , '' + mc: save{r,i,d} + + | RESTORE of {r: $GP, i:operand, d: $GP} + asm: ``restore\t, , '' + mc: restore{r,i,d} + + | RDY of {d: $GP} + asm: ``rd\t%y, '' + mc: rdy{d} + rtl: ``RDY'' + + | WRY of {r: $GP,i:operand} + asm: ``wr\t, , %y'' + mc: wdy{r,i} + rtl: ``WRY'' + + | RET of {leaf:bool,nop:bool} + asm: ``ret'' + mc: (jmp{r=if leaf then r31 else r15,i=I.IMMED 8}; delay{nop}) + rtl: ``RET'' + padding: nop = true + nullified: false + + | SOURCE of {} + asm: ``source'' + mc: () + + | SINK of {} + asm: ``sink'' + mc: () + + | PHI of {} + asm: ``phi'' + mc: () + + structure SSA = + struct + fun operand(ty,I.REG r) = T.REG(ty, r) + | operand(ty,I.IMMED i) = T.LI(IntInf.fromInt i) + (*| operand(ty,I.LAB le) = T.LABEL le*) + | operand(ty,_) = error "operand" + end + +end diff --git a/MLRISC/staged-alloc/README b/MLRISC/staged-alloc/README new file mode 100644 index 0000000..24b4b68 --- /dev/null +++ b/MLRISC/staged-alloc/README @@ -0,0 +1,16 @@ +Staged Allocation for MLRISC +Mike Rainey (mrainey@cs.uchicago.edu) + +Staged allocation is a technique for handling calling conventions for different +architectures and languages. The basic idea is to use a domain-specific language +for calling conventions and an allocator machine. The allocator machine takes +a convention encoded in this language and a function signature and returns the +machine locations for passing and returning values. From there, it is trivial +to generate the calling sequence. For the full treatment, see the original +paper by Olinsky et. al. + + * Staged allocation: a compositional technique for specifying and implementing procedure calling conventions - http://www.eecs.harvard.edu/~nr/pubs/staged-abstract.html + +Our library consists of two parts: + * allocator - follows the operational semantics given in the paper + * conventions - several calling conventions \ No newline at end of file diff --git a/MLRISC/staged-alloc/allocator/.cm/GUID/staged-allocation-fn.sml b/MLRISC/staged-alloc/allocator/.cm/GUID/staged-allocation-fn.sml new file mode 100644 index 0000000..f55681c --- /dev/null +++ b/MLRISC/staged-alloc/allocator/.cm/GUID/staged-allocation-fn.sml @@ -0,0 +1 @@ +guid-$OTHER-MLRISC/(StagedAlloc.cm):../staged-alloc/allocator/staged-allocation-fn.sml-1714016102.352 diff --git a/MLRISC/staged-alloc/allocator/.cm/GUID/staged-allocation-sig.sml b/MLRISC/staged-alloc/allocator/.cm/GUID/staged-allocation-sig.sml new file mode 100644 index 0000000..7ba8168 --- /dev/null +++ b/MLRISC/staged-alloc/allocator/.cm/GUID/staged-allocation-sig.sml @@ -0,0 +1 @@ +guid-$OTHER-MLRISC/(StagedAlloc.cm):../staged-alloc/allocator/staged-allocation-sig.sml-1714016102.323 diff --git a/MLRISC/staged-alloc/allocator/.cm/SKEL/staged-allocation-fn.sml b/MLRISC/staged-alloc/allocator/.cm/SKEL/staged-allocation-fn.sml new file mode 100644 index 0000000..1f6a1eb --- /dev/null +++ b/MLRISC/staged-alloc/allocator/.cm/SKEL/staged-allocation-fn.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f2d"List"d"Int"ae"StagedAllocationFn"jh1ad"Store"gp1d"IntBinaryMap"gp1c"STAGED_ALLOCATION" \ No newline at end of file diff --git a/MLRISC/staged-alloc/allocator/.cm/SKEL/staged-allocation-sig.sml b/MLRISC/staged-alloc/allocator/.cm/SKEL/staged-allocation-sig.sml new file mode 100644 index 0000000..8b2030c --- /dev/null +++ b/MLRISC/staged-alloc/allocator/.cm/SKEL/staged-allocation-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ac"STAGED_ALLOCATION"h0 \ No newline at end of file diff --git a/MLRISC/staged-alloc/allocator/.cm/amd64-unix/staged-allocation-fn.sml b/MLRISC/staged-alloc/allocator/.cm/amd64-unix/staged-allocation-fn.sml new file mode 100644 index 0000000..ffa182a Binary files /dev/null and b/MLRISC/staged-alloc/allocator/.cm/amd64-unix/staged-allocation-fn.sml differ diff --git a/MLRISC/staged-alloc/allocator/.cm/amd64-unix/staged-allocation-sig.sml b/MLRISC/staged-alloc/allocator/.cm/amd64-unix/staged-allocation-sig.sml new file mode 100644 index 0000000..78672fb Binary files /dev/null and b/MLRISC/staged-alloc/allocator/.cm/amd64-unix/staged-allocation-sig.sml differ diff --git a/MLRISC/staged-alloc/allocator/staged-allocation-fn.sml b/MLRISC/staged-alloc/allocator/staged-allocation-fn.sml new file mode 100644 index 0000000..12e741c --- /dev/null +++ b/MLRISC/staged-alloc/allocator/staged-allocation-fn.sml @@ -0,0 +1,284 @@ +(* staged-allocation-fn.sml + * + * This code implements the Staged Allocation technique for calling conventions. + * You can find the POPL06 paper describing this technique at + * http://www.eecs.harvard.edu/~nr/pubs/staged-abstract.html + * + * Mike Rainey (mrainey@cs.uchicago.edu) + * + *) + +functor StagedAllocationFn ( + type reg_id + type loc_kind + val memSize : int (* number of bytes addressable in the target machine *) + ) :> STAGED_ALLOCATION + where type loc_kind = loc_kind + where type reg_id = reg_id + = + struct + + exception StagedAlloc of string + + type loc_kind = loc_kind + type width = int + + type req = (width * loc_kind * int) + + (* locations consist of machine registers, offsets in to overflow blocks, combinations of + * locations, and narrowed locations (Figure 3). + *) + type reg_id = reg_id + type reg = (int * loc_kind * reg_id) + datatype loc + = REG of reg + | BLOCK_OFFSET of (width * loc_kind * int) + | COMBINE of (loc * loc) + | NARROW of (loc * width * loc_kind) (* specifies a coercion to the given width and kind *) + + (* the store + * the store keeps three pieces of information + * - a map from counters to their values + * - the overflow block + * - the list of allocated registers + *) + type counter = int + structure Store = IntBinaryMap + type store = (int Store.map * loc option * loc list) + fun insert ((store, ob, regs), c, n) = (Store.insert (store, c, n), ob, regs) + fun init cs = List.foldl (fn (c, store) => insert(store, c, 0)) (Store.empty, NONE, []) cs + fun find ((store, _, _), c) = (case Store.find (store, c) + of SOME v => v + | NONE => raise StagedAlloc "missing store location" + (* end case *)) + fun setOverflowBlock ((store, _, regs), ob) = (store, SOME ob, regs) + fun addReg ((store, ob, regs), reg) = (store, ob, reg :: regs) + + datatype block_direction = UP | DOWN + + (* language for specifying calling conventions (Figure 7) *) + datatype stage + = OVERFLOW of { (* overflow block (usually corresponds to a runtime stack) *) + counter : counter, + blockDirection : block_direction, + maxAlign : int + } + | WIDEN of (width -> width) + | CHOICE of ( (req -> bool) * stage) list (* choose the first stage whose corresponding + * predicate is true. *) + | REGS_BY_ARGS of (counter * reg list) (* the first n arguments go into the first n + * registers *) + | ARGCOUNTER of counter + | REGS_BY_BITS of (counter * reg list) (* the first n bits arguments go into the first + * n bits of registers *) + | BITCOUNTER of counter + | SEQ of stage list (* sequence of stages *) + | PAD of counter (* specifies an alignment (this rule applies even + * for registers) *) + | ALIGN_TO of (width -> width) (* specifies an alignment *) + + (* source for globally unique counter values *) + local + val globalCounter = ref 0 + in + fun freshCounter () = let + val c = !globalCounter + in + globalCounter := c + 1; + c + end + end (* local *) + + (* bit width of a machine location *) + fun width (REG (w, _, _)) = w + | width (BLOCK_OFFSET (w, _, _)) = w + | width (COMBINE (l1, l2)) = width l1 + width l2 + | width (NARROW (_, w, _)) = w + + fun useRegs rs = let + val c = freshCounter () + in + (c, SEQ [BITCOUNTER c, REGS_BY_BITS (c, rs)]) + end + + fun divides (x, y) = Int.mod (x, y) = 0 + fun toMemSize sz = sz div memSize + val roundUp = Int.max + + (* Figure 8 *) + fun dropBits (0, rs) = rs + | dropBits (n, []) = [] + | dropBits (n, r as (w, _, _) :: rs) = if (n >= w) + then dropBits (n - w, rs) + else rs + + (* Figure 8 *) + fun drop (0, rs) = rs + | drop (n, []) = [] + | drop (n, r :: rs) = drop (n - 1, rs) + + (* Figure 6: allocator machine *) + fun step stages ((w, k, al), store) = (case stages + of [] => (NONE, store) + (* allocate upwards on the overflow block *) + | OVERFLOW{counter, blockDirection=UP, maxAlign} :: stages => + if (divides(maxAlign, al) andalso divides(w, memSize)) + then let + val n = find(store, counter) + val n' = roundUp(n, al) + val store = insert(store, counter, n + toMemSize w) + val ob = BLOCK_OFFSET (w, k, n) + val store = setOverflowBlock(store, ob) + in + (SOME ob, store) + end + else raise StagedAlloc "overflow up" + (* allocate downwards on the overflow block *) + | OVERFLOW{counter, blockDirection=DOWN, maxAlign} :: stages => + if (divides(maxAlign, al) andalso divides(w, memSize)) + then let + val n = find(store, counter) + val n' = roundUp(n, al) + w div memSize + val store = insert(store, counter, n') + val ob = BLOCK_OFFSET (w, k, n) + val store = setOverflowBlock(store, ob) + in + (SOME ob, store) + end + else raise StagedAlloc "overflow down" + (* widen a location *) + | WIDEN f :: stages => + if (w <= f w) + then let + val (SOME loc, store') = step stages ((f w, k, al), store) + val loc' = if w = f w + then loc (* eliminate unnecessary narrowed locations *) + else NARROW(loc, w, k) + in + (SOME loc', store') + end + else raise StagedAlloc "widen" + (* choose the first stage whose corresponding predicate is true. *) + | CHOICE choices :: stages => let + fun choose [] = raise StagedAlloc "choose" + | choose ((p, c) :: choices) = if (p (w, k, al)) + then c + else choose choices + val choice = choose choices + in + step (choice :: stages) ((w, k, al), store) + end + (* the first n arguments go into the first n registers *) + | REGS_BY_ARGS (c, rs) :: stages => let + val n = find(store, c) + val rs' = drop(n, rs) + in + case rs' + of [] => step stages ((w, k, al), store) + | (r as (w', _, _)) :: _ => if (w' = w) + then let + val loc = REG r + val store = addReg(store, loc) + in + (SOME loc, store) + end + else raise StagedAlloc "regs by args" + end + (* increment the argument counter *) + | ARGCOUNTER c :: stages => let + val (SOME loc, store) = step stages ((w, k, al), store) + val n = find(store, c) + val store = insert(store, c, n + 1) + in + (SOME loc, store) + end + (* the first n bits arguments go into the first n bits of registers *) + | REGS_BY_BITS (c, rs) :: stages => let + val n = find(store, c) + val rs' = dropBits(n, rs) + in + case rs' + of [] => (* insufficient bits *) + step stages ((w, k, al), store) + | (r as (w', _, _)) :: _ => if (w' = w) + then let (* the arg fits into the regs *) + val loc = REG r + val store = addReg(store, loc) + in + (SOME loc, store) + end + else if w' < w + then let (* some of the arg's bits fit into the regs *) + val store = insert (store, c, n + w') + val loc = REG r + val store = addReg(store, loc) + val (SOME loc', store) = + step (REGS_BY_BITS (c, rs) :: stages) ((w - w', k, al), store) + val store = addReg(store, loc') + val loc'' = COMBINE (loc, loc') + val n' = find(store, c) + val store = insert(store, c, n' - w') + in + (SOME loc'', store) + end + else raise Fail "incorrect number of bits" + end + | BITCOUNTER c :: stages => let + val (SOME loc, store) = step stages ((w, k, al), store) + val n = find(store, c) + val store = insert(store, c, n + w) + in + (SOME loc, store) + end + | SEQ ss :: stages => step (ss @ stages) ((w, k, al), store) + | PAD c :: stages => let + val n = find(store, c) + val n' = roundUp(n, al * memSize) + val store = insert(store, c, n') + val (SOME loc, store) = step stages ((w, k, al), store) + in + (SOME loc, store) + end + | ALIGN_TO f :: stages => step stages ((w, k, f al), store) + (* end case *)) + + fun allocate stages (req, store) = let + val (SOME loc, store) = step stages (req, store) + in + (loc, store) + end + handle Match => raise StagedAlloc "failed to allocate" + + fun allocate' stages (req, (locs, store)) = let + val (loc, store) = allocate stages (req, store) + in + (loc :: locs, store) + end + + fun allocateSeq stages (reqs, store) = let + val (locs, store') = List.foldl (allocate' stages) ([], store) reqs + in + (List.rev locs, store') + end + + fun allocateSeqs stages (reqss, store) = let + fun alloc (reqs, (locss, store)) = let + val (locs, store) = allocateSeq stages (reqs, store) + in + (locs :: locss, store) + end + val (locss, store') = List.foldl alloc ([], store) reqss + in + (List.rev locss, store') + end + + fun freeze (stages, (_, ob, regs)) = + {overflowBlock=ob, allocatedRegs=regs} + + (* extract the kind of a location *) + fun kindOfLoc (REG(_, k, _)) = k + | kindOfLoc (BLOCK_OFFSET(_, k, _)) = k + | kindOfLoc (COMBINE(l1, l2)) = kindOfLoc l1 + | kindOfLoc (NARROW(_, _, k)) = k + + end (* StagedAllocationFn *) diff --git a/MLRISC/staged-alloc/allocator/staged-allocation-sig.sml b/MLRISC/staged-alloc/allocator/staged-allocation-sig.sml new file mode 100644 index 0000000..e73a1c3 --- /dev/null +++ b/MLRISC/staged-alloc/allocator/staged-allocation-sig.sml @@ -0,0 +1,106 @@ + (* staged-allocation-sig.sml + * + * This code implements the Staged Allocation technique for calling conventions. + * You can find the POPL06 paper describing this technique at + * http://www.eecs.harvard.edu/~nr/pubs/staged-abstract.html + * + * Mike Rainey (mrainey@cs.uchicago.edu) + * + * + * Terminology for staged allocation (see the paper for more details): + * counter - stores of current the number of bits allocated to the call + * location - a mechanism for passing a parameter, e.g., machine registers, stack locations, etc. + * req - corresponds to a parameter + * alignment - alignment in bits for a location + * width - width in bits for a location + * stage - one rule for specifying calling conventions + * + *) + +signature STAGED_ALLOCATION = + sig + + type loc_kind (* gprs, fprs, stack locations, etc. *) + type width = int (* bit width *) + + type req = (width * loc_kind * int) (* the last field is the alignment *) + + (* locations consist of machine registers, offsets in to overflow blocks, combinations of + * locations, and narrowed locations. + *) + type reg_id + type reg = (int * loc_kind * reg_id) + datatype loc + = REG of reg + | BLOCK_OFFSET of (width * loc_kind * int) + | COMBINE of (loc * loc) + | NARROW of (loc * width * loc_kind) (* specifies a coercion to the given width and kind *) + + type counter (* abstract counter for a convention *) + type store (* counter -> "bit offset" *) + + datatype block_direction = UP | DOWN (* direction in which the overflow block grows *) + + (* language for specifying calling conventions *) + datatype stage + = OVERFLOW of { (* overflow block (usually corresponds to a runtime stack) *) + counter : counter, + blockDirection : block_direction, + maxAlign : int + } + | WIDEN of (width -> width) + | CHOICE of ( (req -> bool) * stage) list (* choose the first stage whose corresponding + * predicate is true. *) + | REGS_BY_ARGS of (counter * reg list) (* the first n arguments go into the first n + * registers *) + | ARGCOUNTER of counter + | REGS_BY_BITS of (counter * reg list) (* the first n bits arguments go into the first + * n bits of registers *) + | BITCOUNTER of counter + | SEQ of stage list (* sequence of stages *) + | PAD of counter (* specifies an alignment (this rule applies even + * for registers) *) + | ALIGN_TO of (width -> width) (* specifies an alignment *) + + exception StagedAlloc of string + + (* bit width of a machine location *) + val width : loc -> int + + (* Create a counter. *) + val freshCounter : unit -> counter + + (* helper function that creates a counter c, and returns the sequence: + * [BITCOUNTER c, REGS_BY_BITS (c, regs)] (this function is taken from + * the paper). + *) + val useRegs : reg list -> (counter * stage) + + (* find the value stored at a counter. *) + val find : (store * counter) -> int + + (* initialize a list of counters for a calling convention. *) + val init : counter list -> store + + (* takes an spec and automaton and allocates a location *) + val allocate : stage list -> (req * store) + -> (loc * store) + + (* allocate lifted to sequences of requests *) + val allocateSeq : stage list -> (req list * store) + -> (loc list * store) + + (* allocateSeq lifted to sequences of sequences of requests *) + val allocateSeqs : stage list -> (req list list * store) + -> (loc list list * store) + + (* takes an automaton (the first two parameters) and returns the overflow block and the set of + * registers used by previous calls to allocate + *) + val freeze : (stage list * store) + -> {overflowBlock : loc option, allocatedRegs : loc list} + + (* extract the kind of a location *) + val kindOfLoc : loc -> loc_kind + + end (* STAGED_ALLOCATION *) diff --git a/MLRISC/staged-alloc/conventions/.cm/GUID/c-loc-kind.sml b/MLRISC/staged-alloc/conventions/.cm/GUID/c-loc-kind.sml new file mode 100644 index 0000000..92b8bff --- /dev/null +++ b/MLRISC/staged-alloc/conventions/.cm/GUID/c-loc-kind.sml @@ -0,0 +1 @@ +guid-$OTHER-MLRISC/(StagedAlloc.cm):../staged-alloc/conventions/c-loc-kind.sml-1714016102.320 diff --git a/MLRISC/staged-alloc/conventions/.cm/GUID/sparc-c-fn.sml b/MLRISC/staged-alloc/conventions/.cm/GUID/sparc-c-fn.sml new file mode 100644 index 0000000..a7408d9 --- /dev/null +++ b/MLRISC/staged-alloc/conventions/.cm/GUID/sparc-c-fn.sml @@ -0,0 +1 @@ +guid-$OTHER-MLRISC/(StagedAlloc.cm):../staged-alloc/conventions/sparc-c-fn.sml-1714016102.426 diff --git a/MLRISC/staged-alloc/conventions/.cm/GUID/x86-64-c-fn.sml b/MLRISC/staged-alloc/conventions/.cm/GUID/x86-64-c-fn.sml new file mode 100644 index 0000000..95168a1 --- /dev/null +++ b/MLRISC/staged-alloc/conventions/.cm/GUID/x86-64-c-fn.sml @@ -0,0 +1 @@ +guid-$OTHER-MLRISC/(StagedAlloc.cm):../staged-alloc/conventions/x86-64-c-fn.sml-1714016102.451 diff --git a/MLRISC/staged-alloc/conventions/.cm/GUID/x86-c-fn.sml b/MLRISC/staged-alloc/conventions/.cm/GUID/x86-c-fn.sml new file mode 100644 index 0000000..f731533 --- /dev/null +++ b/MLRISC/staged-alloc/conventions/.cm/GUID/x86-c-fn.sml @@ -0,0 +1 @@ +guid-$OTHER-MLRISC/(StagedAlloc.cm):../staged-alloc/conventions/x86-c-fn.sml-1714016102.329 diff --git a/MLRISC/staged-alloc/conventions/.cm/SKEL/c-loc-kind.sml b/MLRISC/staged-alloc/conventions/.cm/SKEL/c-loc-kind.sml new file mode 100644 index 0000000..49493e4 --- /dev/null +++ b/MLRISC/staged-alloc/conventions/.cm/SKEL/c-loc-kind.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ad"CLocKind"h0 \ No newline at end of file diff --git a/MLRISC/staged-alloc/conventions/.cm/SKEL/sparc-c-fn.sml b/MLRISC/staged-alloc/conventions/.cm/SKEL/sparc-c-fn.sml new file mode 100644 index 0000000..c2d5abd --- /dev/null +++ b/MLRISC/staged-alloc/conventions/.cm/SKEL/sparc-c-fn.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1CLocKind"ae"SparcCConventionFn"i2aSA"gp1c"STAGED_ALLOCATION"f4(d"List"d"Int"h0 \ No newline at end of file diff --git a/MLRISC/staged-alloc/conventions/.cm/SKEL/x86-64-c-fn.sml b/MLRISC/staged-alloc/conventions/.cm/SKEL/x86-64-c-fn.sml new file mode 100644 index 0000000..76e9e73 --- /dev/null +++ b/MLRISC/staged-alloc/conventions/.cm/SKEL/x86-64-c-fn.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1CLocKind"ae"X86_64CConventionFn"i2aSA"gp1c"STAGED_ALLOCATION"f4)d"List"d"Int"h0 \ No newline at end of file diff --git a/MLRISC/staged-alloc/conventions/.cm/SKEL/x86-c-fn.sml b/MLRISC/staged-alloc/conventions/.cm/SKEL/x86-c-fn.sml new file mode 100644 index 0000000..765c1d5 --- /dev/null +++ b/MLRISC/staged-alloc/conventions/.cm/SKEL/x86-c-fn.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1CLocKind"ae"X86CConventionFn"i2aSA"gp1c"STAGED_ALLOCATION"f4&d"List"d"Int"h0 \ No newline at end of file diff --git a/MLRISC/staged-alloc/conventions/.cm/amd64-unix/c-loc-kind.sml b/MLRISC/staged-alloc/conventions/.cm/amd64-unix/c-loc-kind.sml new file mode 100644 index 0000000..1a392ab Binary files /dev/null and b/MLRISC/staged-alloc/conventions/.cm/amd64-unix/c-loc-kind.sml differ diff --git a/MLRISC/staged-alloc/conventions/.cm/amd64-unix/sparc-c-fn.sml b/MLRISC/staged-alloc/conventions/.cm/amd64-unix/sparc-c-fn.sml new file mode 100644 index 0000000..f69d56f Binary files /dev/null and b/MLRISC/staged-alloc/conventions/.cm/amd64-unix/sparc-c-fn.sml differ diff --git a/MLRISC/staged-alloc/conventions/.cm/amd64-unix/x86-64-c-fn.sml b/MLRISC/staged-alloc/conventions/.cm/amd64-unix/x86-64-c-fn.sml new file mode 100644 index 0000000..cd96152 Binary files /dev/null and b/MLRISC/staged-alloc/conventions/.cm/amd64-unix/x86-64-c-fn.sml differ diff --git a/MLRISC/staged-alloc/conventions/.cm/amd64-unix/x86-c-fn.sml b/MLRISC/staged-alloc/conventions/.cm/amd64-unix/x86-c-fn.sml new file mode 100644 index 0000000..d60811b Binary files /dev/null and b/MLRISC/staged-alloc/conventions/.cm/amd64-unix/x86-c-fn.sml differ diff --git a/MLRISC/staged-alloc/conventions/c-loc-kind.sml b/MLRISC/staged-alloc/conventions/c-loc-kind.sml new file mode 100644 index 0000000..ee9c802 --- /dev/null +++ b/MLRISC/staged-alloc/conventions/c-loc-kind.sml @@ -0,0 +1,11 @@ +structure CLocKind = + struct + + (* kinds of locations for passing C arguments *) + datatype loc_kind + = GPR (* general-purpose registers *) + | FPR (* floating-point registers *) + | STK (* stack locations *) + | FSTK (* floating-point stack locations *) + + end diff --git a/MLRISC/staged-alloc/conventions/power-pc-fn.sml b/MLRISC/staged-alloc/conventions/power-pc-fn.sml new file mode 100644 index 0000000..a032b39 --- /dev/null +++ b/MLRISC/staged-alloc/conventions/power-pc-fn.sml @@ -0,0 +1,79 @@ +(* power-pc-fn.sml + * + * C calling convention for the OS X Power PC. + * + *) + +functor PowerPCCConventionFn ( + type reg_id + + (* parameter GPRs *) + val r3 : reg_id + val r4 : reg_id + val r5 : reg_id + val r6 : reg_id + val r7 : reg_id + val r8 : reg_id + val r9 : reg_id + val r10 : reg_id + (* parameter FPRs *) + val f1 : reg_id + val f2 : reg_id + val f3 : reg_id + val f4 : reg_id + val f5 : reg_id + val f6 : reg_id + val f7 : reg_id + val f8 : reg_id + val f9 : reg_id + val f10 : reg_id + val f11 : reg_id + val f12 : reg_id + val f13 : reg_id + + structure SA : STAGED_ALLOCATION + where type reg_id = reg_id + where type loc_kind = CLocKind.loc_kind + + ) = struct + + datatype loc_kind = datatype CLocKind.loc_kind + + fun gpr r = (32, GPR, r) + fun gprs rs = List.map gpr rs + fun fpr r = (64, FPR, r) + fun fprs rs = List.map fpr rs + + val useRegs = #2 o SA.useRegs + + val paramFprs = [f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13] + val paramGprs = [r3, r4, r5, r6, r7, r8, r9, r10] + + (* conventions for calling a C function *) + val cStack = SA.freshCounter() + val cCallGpr = SA.freshCounter() + val params = [ + SA.WIDEN (fn w => Int.max(32, w)), + SA.BITCOUNTER cCallGpr, + SA.CHOICE [ + (fn (w, k, store) => k = FPR, + SA.SEQ [SA.WIDEN(fn w => 64), SA.USEREGS_RESERVE (List.map fpr paramFprs)]) + (fn (w, k, store) => true, + SA.USEREGS_RESERVE (List.map gpr paramGprs)) + ] + ] + + (* rules for returning values *) + val returns = [ + SA.CHOICE [ + (fn (w, k, store) => k = FPR, SA.SEQ [ + SA.WIDEN(fn w => 64), useRegs [fpr f1] + ]), + (fn (w, k, store) => true, SA.SEQ [ + SA.WIDEN(fn w => 32), useRegs [gpr r3, gpr r4] + ]) + + ] + ] + + end diff --git a/MLRISC/staged-alloc/conventions/sparc-c-fn.sml b/MLRISC/staged-alloc/conventions/sparc-c-fn.sml new file mode 100644 index 0000000..ced43fd --- /dev/null +++ b/MLRISC/staged-alloc/conventions/sparc-c-fn.sml @@ -0,0 +1,87 @@ +(* sparc-c-fn.sml + * + * Calling convention for the Sparc: + * + * Return result: + * + Integer and pointer results are returned in %o0 + * + 64-bit integers (long long) returned in %o1/%o1 + * + float results are returned in %f0; double in %f0/%f1 + * + Struct results are returned in space provided by the caller. + * The address of this space is passed to the callee as a hidden + * implicit argument on the stack (in the caller's frame). It + * gets stored at [%sp+64] (from the caller's point of view). + * An UNIMP instruction must be placed after the call instruction, + * indicating how much space has been reserved for the return value. + * + long double results are returned like structs + * + * Function arguments: + * + Arguments that are smaller than a word are promoted to word-size. + * + Up to six argument words (words 0-5) are passed in registers + * %o0...%o5. This includes doubles and long longs. Alignment for + * those types is NOT maintained, i.e., it is possible for an 8-byte + * quantity to end up in an odd-even register pair. + * * Arguments beyond 6 words are passed on the stack in the caller's + * frame. For this, the caller must reserve space in its frame + * prior to the call. Argument word 6 appears at [%sp+92], word 7 + * at [%sp+96], ... + * + struct arguments are passed as pointers to a copy of the struct. + * The copy itself is allocated by the caller in its stack frame. + * + long double arguments are passed like structs (i.e., via pointer + * to temp copy) + * + Space for argument words 0-5 is already allocated in the + * caller's frame. This space might be used by the callee to + * save those arguments that must be addressable. %o0 corresponds + * to [%sp+68], %o1 to [%sp+72], ... + *) + +functor SparcCConventionFn ( + type reg_id + + (* parameter GPRs*) + val r8 : reg_id + val r9 : reg_id + val r10 : reg_id + val r11 : reg_id + val r12 : reg_id + val r13 : reg_id + (* parameter FPRs *) + val f0 : reg_id + val f1 : reg_id + + structure SA : STAGED_ALLOCATION + where type reg_id = reg_id + where type loc_kind = CLocKind.loc_kind + + ) = struct + + datatype loc_kind = datatype CLocKind.loc_kind + + fun gpr r = (32, GPR, r) + fun gprs rs = List.map gpr rs + fun fpr r = (64, FPR, r) + fun fprs rs = List.map fpr rs + + (* convention for calling a C function *) + val (cParam, paramRegs) = SA.useRegs (List.map gpr [r8, r9, r10, r12, r13]) + val cStack = SA.freshCounter() + val params = [ + SA.WIDEN (fn w => Int.max(32, w)), + paramRegs, + SA.OVERFLOW{counter=cStack, blockDirection=SA.UP, maxAlign=8} + ] + + (* convention for returning values *) + val (cFRet, retFlt) = SA.useRegs (List.map fpr [f0, f1]) + val (cRet, retGpr) = SA.useRegs (List.map gpr [r8]) + val return = [ + SA.WIDEN (fn w => Int.max(32, w)), + SA.CHOICE [ + (fn (w, k, store) => k = FPR, retFlt), + (fn (w, k, store) => true, retGpr) + ] + ] + + (* initial store *) + val store0 = SA.init[cStack, cParam, cFRet, cRet] + + end diff --git a/MLRISC/staged-alloc/conventions/x86-64-c-fn.sml b/MLRISC/staged-alloc/conventions/x86-64-c-fn.sml new file mode 100644 index 0000000..9b2ee85 --- /dev/null +++ b/MLRISC/staged-alloc/conventions/x86-64-c-fn.sml @@ -0,0 +1,129 @@ +(* x86-64-c-fn.sml + * + * C calling convention for the X86-64 using the System V ABI + * + * Register conventions: + * %rax return value (caller save) + * %rbx optional base pointer (callee save) + * %rbp optional frame pointer (callee save) + * %rdi arg 1 (caller save) + * %rsi arg 2 (caller save) + * %rdx arg 3 (caller save) + * %rcx arg 4 (caller save) + * %r8 arg 5 (caller save) + * %r9 arg 6 (caller save) + * %r10 chain pointer (caller save) + * %r11 scratch (caller save) + * %r12-r15 general purpose (callee save) + * %xmm0-xmm1 pass/return fp (caller save) + * %xmm2-xmm7 pass fp (caller save) + * %xmm8-xmm15 scratch (caller save) + * + * Calling conventions: + * + * Return result: + * + Integer and pointer results are returned in %rax. + * + 128-bit integers returned in %rax/%rdx. + * + Floating-point results returned in %xmm0/%xmm1. + * + Small structs returned in %rax/%rdx. Otherwise, returned + * in space provided by the caller. + * + * Function arguments: + * + The first 6 integer arguments go in the argument registers. + * + The first 8 floating-point registers go in %xmm0-xmm8. + * + Otherwise, arguments are pushed on the stack right to left. + * + The stack is 16-byte aligned. + * + Struct arguments are padded out to word length. + *) + + +functor X86_64CConventionFn ( + type reg_id + + (* relevant GPRs *) + val rax : reg_id + val rdi : reg_id + val rsi : reg_id + val rdx : reg_id + val rcx : reg_id + val r8 : reg_id + val r9 : reg_id + (* relevant FPRs*) + val xmm0 : reg_id + val xmm1 : reg_id + val xmm2 : reg_id + val xmm3 : reg_id + val xmm4 : reg_id + val xmm5 : reg_id + val xmm6 : reg_id + val xmm7 : reg_id + + structure SA : STAGED_ALLOCATION + where type reg_id = reg_id + where type loc_kind = CLocKind.loc_kind + + ) = struct + + datatype loc_kind = datatype CLocKind.loc_kind + + fun gpr r = (64, GPR, r) + fun gprs rs = List.map gpr rs + fun fpr r = (64, FPR, r) + fun fprs rs = List.map fpr rs + + (* registers *) + val gprRetRegs = gprs [rax, rdx] + val fprRetRegs = fprs [xmm0, xmm1] + + val gprParamRegs = gprs [rdi, rsi, rdx, rcx, r8, r9] + val fprParamRegs = fprs [xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7] + + (* staged-allocation counters *) + val cCallStk = SA.freshCounter () + val cCallGpr = SA.freshCounter () + val cCallFpr = SA.freshCounter () + val cRetGpr = SA.freshCounter () + val cRetFpr = SA.freshCounter () + + (* max frame alignment in bytes *) + val maxAlign = 16 + + (* initial store *) + val store0 = SA.init [cCallStk, cCallGpr, cCallFpr, cRetFpr, cRetGpr] + + (* rules for passing arguments *) + val params = [ + SA.WIDEN (fn w => Int.max (64, w)), + SA.CHOICE [ + (fn (w, k, store) => (k = GPR), SA.SEQ [ + SA.BITCOUNTER cCallGpr, + SA.REGS_BY_BITS (cCallGpr, gprParamRegs) + ]), + (fn (w, k, store) => (k = FPR), SA.SEQ [ + SA.BITCOUNTER cCallFpr, + SA.REGS_BY_BITS (cCallFpr, fprParamRegs) + ]), + (fn (w, k, store) => (k = STK orelse k = FSTK), + SA.OVERFLOW {counter=cCallStk, + blockDirection=SA.UP, + maxAlign=maxAlign}) + ], + SA.OVERFLOW {counter=cCallStk, + blockDirection=SA.UP, + maxAlign=maxAlign} + ] + + (* rules for returning values *) + val returns = [ + SA.WIDEN (fn w => Int.max (64, w)), + SA.CHOICE [ + (fn (w, k, store) => (k = GPR), + SA.SEQ [ + SA.BITCOUNTER cRetGpr, + SA.REGS_BY_BITS (cRetGpr, gprRetRegs)]), + (fn (w, k, store) => (k = FPR), + SA.SEQ [ + SA.BITCOUNTER cRetFpr, + SA.REGS_BY_BITS (cRetGpr, fprRetRegs)])] + ] + end diff --git a/MLRISC/staged-alloc/conventions/x86-c-fn.sml b/MLRISC/staged-alloc/conventions/x86-c-fn.sml new file mode 100644 index 0000000..1931d6b --- /dev/null +++ b/MLRISC/staged-alloc/conventions/x86-c-fn.sml @@ -0,0 +1,90 @@ +(* x86-c-fn.sml + * + * + * C function calls for the X86 using the System V ABI + * + * Register conventions: + * + * %eax return value (caller save) + * %ebx global offset for PIC (callee save) + * %ecx scratch (caller save) + * %edx extra return/scratch (caller save) + * %ebp optional frame pointer (callee save) + * %esp stack pointer (callee save) + * %esi locals (callee save) + * %edi locals (callee save) + * + * %st(0) top of FP stack; FP return value + * %st(1..7) FP stack; must be empty on entry and return + * + * Calling convention: + * + * Return result: + * + Integer and pointer results are returned in %eax. Small + * integer results are not promoted. + * + 64-bit integers (long long) returned in %eax/%edx + * + Floating point results are returned in %st(0) (all types). + * + Struct results are returned in space provided by the caller. + * The address of this space is passed to the callee as an + * implicit 0th argument, and on return %eax contains this + * address. The called function is responsible for removing + * this argument from the stack using a "ret $4" instruction. + * NOTE: the MacOS X ABI returns small structs in %eax/%edx. + * + * Function arguments: + * + Arguments are pushed on the stack right to left. + * + Integral and pointer arguments take one word on the stack. + * + float arguments take one word on the stack. + * + double arguments take two words on the stack. The i386 ABI does + * not require double word alignment for these arguments. + * + long double arguments take three words on the stack. + * + struct arguments are padded out to word length. + * + *) + +functor X86CConventionFn ( + type reg_id + val eax : reg_id + val edx : reg_id + val st0 : reg_id + structure SA : STAGED_ALLOCATION + where type reg_id = reg_id + where type loc_kind = CLocKind.loc_kind + ) = struct + + datatype loc_kind = datatype CLocKind.loc_kind + + fun gpr r = (32, GPR, r) + fun gprs rs = List.map gpr rs + fun fpr r = (80, FPR, r) + fun fprs rs = List.map fpr rs + + (* conventions for calling a C function *) + val maxAlign = 16 + val cStack = SA.freshCounter() + val params = [ + SA.SEQ[ + SA.WIDEN (fn ty => Int.max(32, ty)), + SA.OVERFLOW {counter=cStack, blockDirection=SA.UP, maxAlign=maxAlign} + ] + ] + + (* conventions for returning from a C call *) + val (cInt, retInGpr) = SA.useRegs (gprs [eax, edx]) + val (cFloat, retInFpr) = SA.useRegs (fprs [st0]) + val returns = [ + SA.CHOICE [ + (* return in general-purpose register *) + (fn (ty, k, store) => k = GPR, + SA.SEQ [SA.WIDEN (fn ty => Int.max(ty, 32)), + retInGpr]), + (* return in floating-point register *) + (fn (ty, k, store) => k = FPR, + SA.SEQ [SA.WIDEN (fn ty => 80), retInFpr]) + ] + ] + + (* initial store *) + val store0 = SA.init [cInt, cFloat, cStack] + +end (* X86CConventionsFn *) diff --git a/MLRISC/visualization/allDisplays.sml b/MLRISC/visualization/allDisplays.sml new file mode 100644 index 0000000..3fc8766 --- /dev/null +++ b/MLRISC/visualization/allDisplays.sml @@ -0,0 +1,36 @@ +(* + * This module ties together all the visualization backends. + * + * -- Allen + *) + +structure AllDisplays : GRAPH_DISPLAY = +struct + + val viewer = MLRiscControl.getString "viewer" + + fun visualize print = + (case !viewer of + "daVinci" => daVinci.visualize print + | "vcg" => VCG.visualize print + | "dot" => Dot.visualize print + | _ => daVinci.visualize print + ) + + fun program() = + (case !viewer of + "daVinci" => daVinci.program() + | "vcg" => VCG.program() + | "dot" => Dot.program() + | _ => daVinci.program() + ) + + fun suffix() = + (case !viewer of + "daVinci" => daVinci.suffix() + | "vcg" => VCG.suffix() + | "dot" => Dot.suffix() + | _ => daVinci.suffix() + ) + +end diff --git a/MLRISC/visualization/cfgViewer.sml b/MLRISC/visualization/cfgViewer.sml new file mode 100644 index 0000000..65c6251 --- /dev/null +++ b/MLRISC/visualization/cfgViewer.sml @@ -0,0 +1,93 @@ +functor CFGViewer + (structure CFG : CONTROL_FLOW_GRAPH + structure GraphViewer : GRAPH_VIEWER + structure Asm : INSTRUCTION_EMITTER where I = CFG.I) + : sig + val view : CFG.cfg -> unit + end = +struct + structure CFG = CFG + structure L = GraphLayout + structure FMT = FormatInstruction(Asm) + structure G = Graph + + val outline = MLRiscControl.getFlag "view-outline" + + fun view(cfg as G.GRAPH g) = let + val CFG.INFO{annotations, ...} = #graph_info g + val toString = FMT.toString (!annotations) + fun graph _ = [] + val colorScale = + Array.fromList + ["#ccffff", "#99ffff", "#66ccff", "#54a9ff", "#ccff99", + "#ffff99", "#ffcc66", "#ff9966", "#cc6666", "#d14949", + "#d14949"] + + fun weightRange([], min, max) = (min, max-min) + | weightRange((_,_,CFG.EDGE{w, ...})::rest, min, max) = let + val wt = !w + in + if wt > max then weightRange(rest, min, wt) + else if wt < min then weightRange(rest, wt, max) + else weightRange(rest, min, max) + end + + val (loWt, range) = weightRange( #edges g (), ~1.0, ~1.0) + + fun color w = let + val pos = + if range < 100.0 + then floor(((w-loWt) * 10.0) / range) + else floor (Math.log10(w-loWt) * 10.0 / Math.log10 range) + in + Array.sub(colorScale, pos) + end + + val ENTRY = hd(#entries g ()) + val EXIT = hd(#exits g ()) + + val red = L.COLOR "#ff0000" + val yellow = L.COLOR "yellow" + val green = L.COLOR "green" + + fun edge(i,j,CFG.EDGE{w, ...}) = + let val label = L.LABEL(Real.toString (!w)) + in [label, L.COLOR(color(!w))] + end + + fun title(blknum,ref freq) = + " "^Int.toString blknum ^ " freq="^Real.toString freq + + fun ann(annotations) = + List.foldl(fn (a,l) => "/* "^Annotations.toString a^" */\n"^l) "" + (!annotations) + + fun node(_, CFG.BLOCK{kind, labels, id, freq, insns, annotations, ...}) = + (case kind + of CFG.START => + [L.LABEL("entry"^title(id,freq)^"\n"^ann(annotations))] + | CFG.STOP => + [L.LABEL("exit"^title(id,freq))] + | _ => + [L.LABEL("BLK"^title(id,freq)^"\n"^ + (case !labels + of [] => "" + | labs => + String.concatWith ":\n" (map Label.toString labs) ^ ":\n" + (*easc*)) ^ + ann(annotations)^ + (if !outline then "" else + List.foldl + (fn (i,t) => let val text = toString i + in if text = "" then t else text^"\n"^t + end) + "" + (!insns)))] + (*esac*)) + + in + GraphViewer.view + (L.makeLayout{graph=graph, edge=edge, node=node} cfg) + end + +end diff --git a/MLRISC/visualization/daVinci.sml b/MLRISC/visualization/daVinci.sml new file mode 100644 index 0000000..9875f8d --- /dev/null +++ b/MLRISC/visualization/daVinci.sml @@ -0,0 +1,121 @@ +(* + * This backend communicates with the daVinci tool. + * + * -- Allen + *) + +structure daVinci : GRAPH_DISPLAY = +struct + + structure L = GraphLayout + structure G = Graph + + fun suffix() = ".daVinci" + fun program() = "daVinci" + + fun visualize out (G.GRAPH G) = + let val l = ref 0 + fun newLabel() = (l := !l + 1; "L" ^ Int.toString(!l)) + val spaces = " "; + fun int n = out (Int.toString n) + fun nl() = out "\n" + fun tab t = out(String.substring(spaces,0,t)) handle _ => out spaces + fun nice l = String.toString (String.map (fn #"\t" => #" " + | c => c) l) + fun quote s = (out "\""; out s; out "\"") + fun comma() = out ", " + fun atom(a,b) = (out "a("; quote a; comma(); quote b; out ")") + fun OBJECT l = atom("OBJECT",nice l) + fun FONTFAMILY f = atom("FONTFAMILY",f) + fun FONTSTYLE s = atom("FONTSTYLE",s) + fun COLOR c = atom("COLOR",c) + fun EDGECOLOR c = atom("EDGECOLOR",c) + fun Dir () = atom("_DIR","none") + fun label l = (OBJECT l; comma(); + FONTFAMILY "courier"; comma(); + FONTSTYLE "normal" + ) + + exception FOUND of string + + fun nodeAttrib (L.LABEL l) = label l + | nodeAttrib (L.COLOR c) = COLOR c + | nodeAttrib (L.BORDERLESS) = atom("_GO","text") + | nodeAttrib (L.BORDER_COLOR c) = COLOR c + | nodeAttrib _ = () + + and isNodeAttrib (L.LABEL l) = true + | isNodeAttrib (L.COLOR c) = true + | isNodeAttrib (L.BORDERLESS) = true + | isNodeAttrib (L.BORDER_COLOR c) = true + | isNodeAttrib _ = false + + and edgeAttrib (L.COLOR c) = EDGECOLOR c + | edgeAttrib (L.ARROW_COLOR c) = EDGECOLOR c + | edgeAttrib (L.EDGEPATTERN p) = atom("EDGEPATTERN",p) + | edgeAttrib L.DIR = Dir() + | edgeAttrib _ = () + + and isEdgeAttrib (L.COLOR c) = true + | isEdgeAttrib (L.ARROW_COLOR c) = true + | isEdgeAttrib (L.EDGEPATTERN p) = true + | isEdgeAttrib (L.DIR) = true + | isEdgeAttrib _ = false + + and findEdgeLabel ((L.LABEL "")::l) = findEdgeLabel l + | findEdgeLabel ((L.LABEL l)::_) = raise FOUND l + | findEdgeLabel (_::l) = findEdgeLabel l + | findEdgeLabel [] = () + + and listify comma f [] = () + | listify comma f [x] = f x + | listify comma f (x::xs) = (f x; comma(); listify comma f xs) + + and attribs t (p,gen) a = + (tab t; out "[\n"; + tab (t+2); listify comma gen (List.filter p a); nl(); + tab t; out "]\n" + ) + + fun doNode t (n,a) = + ( tab t; + out "l(\""; int n; out "\",n(\"\",\n"; + attribs (t+2) (isNodeAttrib,nodeAttrib) a; + comma(); + tab (t+2); out "[\n"; + listify comma (doEdge (t+2)) (#out_edges G n); + tab (t+2); out "]))\n" + ) + + and doEdge t (i,j,a) = + ((findEdgeLabel a; + tab t; out "l(\""; + int i; out "->"; int j; + (* dummy label; daVinci chokes on duplicated edge names *) + out "-"; out(newLabel()); + out "\",e(\"\",\n"; + attribs (t+2) (isEdgeAttrib,edgeAttrib) a; + tab t; out ",r(\""; int j; out "\")))") + handle FOUND l => + let val x = newLabel() + in + (tab t; out "l(\""; int i; out("->"^x^"\",e(\"\","); + attribs (t+2) (isEdgeAttrib,edgeAttrib) (L.DIR::a); + out ",l(\""; out(newLabel()); + out "\",n(\"\",[a(\"OBJECT\",\""; + out l; out "\"),a(\"_GO\",\"text\")],"; + out("[l(\""^x^"->"); int j; out "\",e(\"\","; + attribs (t+2) (isEdgeAttrib,edgeAttrib) a; + tab t; out ",r(\""; int j; out "\")))]))))" + ) + end + ) + + in out "[\n"; + listify comma (doNode 2) (#nodes G ()); + out "]\n" + end + + +end + diff --git a/MLRISC/visualization/dot.sml b/MLRISC/visualization/dot.sml new file mode 100644 index 0000000..3b94367 --- /dev/null +++ b/MLRISC/visualization/dot.sml @@ -0,0 +1,48 @@ +(* + * This communicates with the dot tool + * + * -- Allen + *) + +structure Dot : GRAPH_DISPLAY = +struct + + structure L = GraphLayout + structure G = Graph + + fun suffix() = ".dot" + fun program() = "dotty" + + fun visualize out (G.GRAPH G) = + let val spaces = " "; + fun int n = out (Int.toString n) + fun tab t = out(String.substring(spaces,0,t)) handle _ => out spaces + fun semi() = out ";" + fun name n = if n < 0 then (out "XX"; int(~n)) + else (out "X"; int n) + fun attribs t a = (out "[ shape=box"; doAttribs t "," a; out "]") + + and doAttrib t comma (L.LABEL "") = false + | doAttrib t comma (L.LABEL l) = (out comma; tab t; label l; true) + | doAttrib t comma (L.COLOR c) = + (out comma; tab t; out "color=\""; out c; out "\""; true) + | doAttrib t comma _ = false + + and doAttribs t comma [] = () + | doAttribs t comma (l::ls) = + doAttribs t (if doAttrib t comma l then ",\n" else comma) ls + + and label l = (out "label=\""; out(String.toString l); out "\"\n") + + fun doNode t (n,a) = (tab t; name n; attribs t a; semi()) + + fun doEdge t (i,j,a) = + (tab t; name i; out "-> "; name j; attribs t a; semi()) + + in out("digraph " ^ #name G ^ " {\n"); + #forall_nodes G (doNode 2); + #forall_edges G (doEdge 2); + out "}\n" + end + +end diff --git a/MLRISC/visualization/graphDisplay.sig b/MLRISC/visualization/graphDisplay.sig new file mode 100644 index 0000000..4055ccf --- /dev/null +++ b/MLRISC/visualization/graphDisplay.sig @@ -0,0 +1,15 @@ +(* + * This is the signature of a visualization backend. + * + * -- Allen + *) + +signature GRAPH_DISPLAY = +sig + + val suffix : unit -> string + val program : unit -> string + val visualize : (string -> unit) -> GraphLayout.layout -> unit + +end + diff --git a/MLRISC/visualization/graphLayout.sml b/MLRISC/visualization/graphLayout.sml new file mode 100644 index 0000000..61980db --- /dev/null +++ b/MLRISC/visualization/graphLayout.sml @@ -0,0 +1,46 @@ +(* + * Here are some graph layout annotations. + * + * -- Allen + *) + +structure GraphLayout = +struct + + datatype format = + LABEL of string + | COLOR of string + | NODE_COLOR of string + | EDGE_COLOR of string + | TEXT_COLOR of string + | ARROW_COLOR of string + | BACKARROW_COLOR of string + | BORDER_COLOR of string + | BORDERLESS + | SHAPE of string + | ALGORITHM of string + | EDGEPATTERN of string + | DIR (* for internal use only! *) + + val STYLE = Annotations.new(SOME(fn _ => "STYLE")) : + format list Annotations.property + + type ('n,'e,'g) style = + { edge : 'e Graph.edge -> format list, + node : 'n Graph.node -> format list, + graph : 'g -> format list + } + + type layout = (format list, format list, format list) Graph.graph + + fun makeLayout {node,edge,graph} G = + IsomorphicGraphView.map node edge graph G + + fun makeLayout' G = + let val edgeColor = [COLOR "red"] + in makeLayout {node=fn (i,_) => [LABEL(Int.toString i)], + edge=fn _ => edgeColor, + graph=fn _ => []} G + end +end + diff --git a/MLRISC/visualization/graphViewer.sig b/MLRISC/visualization/graphViewer.sig new file mode 100644 index 0000000..f4d67dd --- /dev/null +++ b/MLRISC/visualization/graphViewer.sig @@ -0,0 +1,13 @@ +(* + * Graph viewer signature. + * + * -- Allen + *) + +signature GRAPH_VIEWER = +sig + + val view : GraphLayout.layout -> unit + +end + diff --git a/MLRISC/visualization/graphViewer.sml b/MLRISC/visualization/graphViewer.sml new file mode 100644 index 0000000..1809aa6 --- /dev/null +++ b/MLRISC/visualization/graphViewer.sml @@ -0,0 +1,46 @@ +(* + * This module starts a graph viewer. + * + * -- Allen + *) + +functor GraphViewer(D : GRAPH_DISPLAY) : GRAPH_VIEWER = +struct + + structure L = GraphLayout + structure G = Graph + structure FileSys = OS.FileSys + + val tmpName = MLRiscControl.getString "tmpName" + + fun display exec (layout as G.GRAPH l) filename = + let val filename = filename ^ D.suffix() + val _ = print("[ "^ #name l^": "^ + D.program() ^ " " ^ filename ^ + " "^Int.toString(#order l ())^" nodes"^ + " "^Int.toString(#size l ())^" edges"); + val file = TextIO.openOut filename + val out = fn s => TextIO.output(file,s) + val _ = D.visualize out layout + val _ = TextIO.closeOut file + val _ = print(" ]\n") + val _ = exec filename + in + () + end handle e => + (print("[Uncaught exception in "^exnName e^" graph viewer]\n"); raise e) + + fun system filename = (OS.Process.system + ((D.program()) ^ " " ^ filename); + FileSys.remove filename) + + fun fork filename = (OS.Process.system( + "(" ^ (D.program()) ^ " " ^ filename ^ + "; /bin/rm " ^ filename ^ ") &")) + + fun getTmpName() = + if !tmpName = "" then FileSys.tmpName() else !tmpName + + fun view layout = display system layout (getTmpName()) +end + diff --git a/MLRISC/visualization/mlrisc-format-insn.sml b/MLRISC/visualization/mlrisc-format-insn.sml new file mode 100644 index 0000000..0a6c04a --- /dev/null +++ b/MLRISC/visualization/mlrisc-format-insn.sml @@ -0,0 +1,47 @@ +(* + * This just provide a very simple pretty printing function. + * It is used for visualization. + * + * -- Allen + * + *) + +signature FORMAT_INSTRUCTION = +sig + structure I : INSTRUCTIONS + + val toString : Annotations.annotations -> I.instruction -> string + +end + +functor FormatInstruction(Asm : INSTRUCTION_EMITTER) : FORMAT_INSTRUCTION = +struct + structure I = Asm.I + + fun toString an insn = + let val buffer = StringOutStream.mkStreamBuf() + val S = StringOutStream.openStringOut buffer + val () = AsmStream.withStream S + (fn insn => + let val Asm.S.STREAM{emit,...} = Asm.makeStream an + in emit insn + end) insn + val text = StringOutStream.getString buffer + fun isSpace #" " = true + | isSpace #"\t" = true + | isSpace _ = false + val text = foldr (fn (x,"") => x | (x,y) => x^" "^y) "" + (String.tokens isSpace text) + fun stripNL "" = "" + | stripNL s = + let fun f(0) = "" + | f(i) = + case String.sub(s,i) of + #"\n" => f(i-1) + | #" " => f(i-1) + | _ => String.extract(s,0,SOME(i+1)) + in f(size s - 1) end + in stripNL text end + +end + diff --git a/MLRISC/visualization/sources.cm b/MLRISC/visualization/sources.cm new file mode 100644 index 0000000..188df75 --- /dev/null +++ b/MLRISC/visualization/sources.cm @@ -0,0 +1,25 @@ +Library + signature GRAPH_DISPLAY + signature GRAPH_VIEWER + structure daVinci + structure Dot + structure GraphLayout + structure VCG + functor GraphViewerFn + functor AllDisplaysFn +is + ../library/sources.cm + ../graphs/sources.cm + allDisplays.sml + daVinci.sml + dot.sml + graphDisplay.sig + graphLayout.sml + graphViewer.sig + graphViewer.sml + vcg.sml + +#if defined(NEW_CM) + $basis.cm +#endif + diff --git a/MLRISC/visualization/vcg.sml b/MLRISC/visualization/vcg.sml new file mode 100644 index 0000000..59d9985 --- /dev/null +++ b/MLRISC/visualization/vcg.sml @@ -0,0 +1,71 @@ +(* + * This module communicates with the vcg tool. + * + * -- Allen + *) + +structure VCG : GRAPH_DISPLAY = +struct + + structure L = GraphLayout + structure G = Graph + + fun suffix() = ".vcg" + fun program() = "xvcg" + + fun visualize out (G.GRAPH G) = + let val spaces = " "; + fun int n = out (Int.toString n) + fun nl() = out "\n" + fun tab t = out(String.substring(spaces,0,t)) handle _ => out spaces + fun color k c = (out k; out c; nl()) + fun openBrace t k = (tab t; out k; out ": {\n") + fun closeBrace t = (tab t; out "}\n") + + fun doStyle t (L.ALGORITHM a) = + (tab t; out "layoutalgorithm: "; out a; nl()) + | doStyle t (L.NODE_COLOR c) = (tab t; color "node.color: " c) + | doStyle t (L.EDGE_COLOR c) = (tab t; color "edge.color: " c) + | doStyle t (L.TEXT_COLOR c) = (tab t; color "textcolor: " c) + | doStyle t (L.ARROW_COLOR c) = (tab t; color "arrowcolor: " c) + | doStyle t (L.BACKARROW_COLOR c) = (tab t; color "backarrowcolor: " c) + | doStyle t (L.BORDER_COLOR c) = (tab t; color "bordercolor: " c) + | doStyle t _ = () + + fun label l = (out "label: \""; out(String.toString l); out "\"") + + fun doAttrib t (L.LABEL "") = () + | doAttrib t (L.LABEL l) = (tab t; label l; nl()) + | doAttrib t (L.COLOR c) = (tab t; color "color: " c) + | doAttrib t (L.BORDERLESS) = (tab t; color "bordercolor: " "white") + | doAttrib t _ = () + + fun doNode t (n,a) = + (openBrace t "node"; + tab (t+2); out "title: \""; int n; out "\"\n"; + app (doAttrib (t+2)) a; + closeBrace t) + + fun doEdge t kind (i,j,a) = + (openBrace t kind; + tab (t+2); out "sourcename: \""; int i; out "\"\n"; + tab (t+2); out "targetname: \""; int j; out "\"\n"; + app (doAttrib (t+2)) a; + closeBrace t) + + fun defaultStyle t = + (tab t; out "display_edge_labels: yes\n"; + tab t; out "layoutalgorithm: minbackward\n" + ) + + in out "graph: {\n"; + defaultStyle 2; + app (doStyle 2) (#graph_info G); + #forall_nodes G (doNode 2); + #forall_edges G (doEdge 2 "edge"); + out "}\n" + end + + +end + diff --git a/MLRISC/x86/README.x86 b/MLRISC/x86/README.x86 new file mode 100644 index 0000000..5c164fc --- /dev/null +++ b/MLRISC/x86/README.x86 @@ -0,0 +1,18 @@ +Changes to the instruction set +============================== + +1. The operands Displaced and Indexed now take an extra Region argument. + This is necessary for propagating alias analysis info to the backend. +2. The CALL instruction also takes an additional Region argument. +3. The assembly is generated by the MDGen tool. The machine code emitter + is not, since it is doing a lot of funny things. + +New optimizations in the x86 backend +====================================== + + +Changes to the instruction selection module +=========================================== + +1. It now uses the new MLTREE +2. It also propagates annotations to the backend. diff --git a/MLRISC/x86/backpatch/x86Jumps.sml b/MLRISC/x86/backpatch/x86Jumps.sml new file mode 100644 index 0000000..120792e --- /dev/null +++ b/MLRISC/x86/backpatch/x86Jumps.sml @@ -0,0 +1,116 @@ +(* X86Jumps.sml --- information to resolve jumps for runtime code generation. + * + * COPYRIGHT (c) 1997 Bell Laboratories. + *) + +functor X86Jumps + (structure Instr : X86INSTR + structure Eval : MLTREE_EVAL where T = Instr.T + structure Shuffle : X86SHUFFLE where I = Instr + structure MCEmitter : MC_EMIT where I = Instr) : SDI_JUMPS = +struct + structure I = Instr + structure C = I.C + structure Const = I.Constant + + fun error msg = MLRiscErrorMsg.error("X86Jumps",msg) + + val esp = 4 + val ebp = 5 + val branchDelayedArch = false + + fun imm8 i = ~128 <= i andalso i < 128 + + fun isSdi(I.ANNOTATION{i, ...}) = isSdi i + | isSdi(I.LIVE _) = true + | isSdi(I.KILL _) = true + | isSdi(I.COPY _) = false + | isSdi(I.INSTR instr) = let + fun operand(I.ImmedLabel _) = true + | operand(I.LabelEA _) = true + | operand(I.Displace{disp, ...}) = operand disp + | operand(I.Indexed{disp, ...}) = operand disp + | operand _ = false + fun cmptest{lsrc, rsrc} = operand lsrc orelse operand rsrc + in + case instr + of I.JMP(opnd, _) => operand opnd + | I.JCC{opnd, ...} => operand opnd + | I.BINARY{src, dst, ...} => operand src orelse operand dst + | I.MOVE{src, dst, ...} => operand src orelse operand dst + | I.LEA{addr, ...} => operand addr + | ( I.CMPL arg | I.CMPW arg | I.CMPB arg + | I.TESTL arg | I.TESTW arg | I.TESTB arg) => cmptest arg + | I.MULTDIV{src, ...} => operand src + | I.MUL3{src1, ...} => operand src1 + | I.UNARY{opnd, ...} => operand opnd + | I.SET{opnd, ...} => operand opnd + | I.CMOV{src, dst, ...} => operand src + | (I.PUSHL opnd | I.PUSHW opnd | I.PUSHB opnd) => operand opnd + | I.POP opnd => operand opnd + | I.FSTPT opnd => operand opnd + | I.FSTPL opnd => operand opnd + | I.FSTPS opnd => operand opnd + | I.FSTL opnd => operand opnd + | I.FSTS opnd => operand opnd + | I.FLDT opnd => operand opnd + | I.FLDL opnd => operand opnd + | I.FLDS opnd => operand opnd + | I.FBINARY{src, dst, ...} => operand src orelse operand dst + | I.FIBINARY{src, ...} => operand src + | I.FILD opnd => operand opnd + | I.FILDL opnd => operand opnd + | I.FILDLL opnd => operand opnd + | _ => false + end + + fun minSize(I.ANNOTATION{i, ...}) = minSize i + | minSize(I.LIVE _) = 0 + | minSize(I.KILL _) = 0 + | minSize(I.INSTR i) = + (case i + of I.JMP _ => 2 + | I.JCC _ => 2 + | I.LEA _ => 2 + | _ => 1) + | minSize _ = error"minSize" + + + fun maxSize _ = 12 + + (* value of span-dependent operand *) + fun operand(I.ImmedLabel le) = Eval.valueOf le + | operand(I.LabelEA le) = Eval.valueOf le + | operand _ = error "operand" + + val encode = MCEmitter.emitInstr + + fun sdiSize(I.ANNOTATION{i, ...}, labmap, loc) = sdiSize(i, labmap, loc) + | sdiSize(I.LIVE _, _, _) = 0 + | sdiSize(I.KILL _, _, _) = 0 + + | sdiSize(I.INSTR instr, labmap, loc) = let + fun branch(opnd, short, long) = let + val offset = operand opnd - loc + in if imm8(offset - 2) then short else long + end + in + case instr + of I.JMP(opnd, _) => branch(opnd, 2, 5) + | I.JCC{opnd, ...} => branch(opnd, 2, 6) + | _ => Word8Vector.length(encode(I.INSTR instr)) + end (*sdiSize*) + | sdiSize _ = error "sdiSize" + + fun expand(I.ANNOTATION{i,...}, size, loc) = expand(i, size, loc) + | expand(I.LIVE _, _, _) = [] + | expand(I.KILL _, _, _) = [] + | expand(I.INSTR instr, size, loc) = + (case instr + of I.JMP(opnd, labs) => [I.jmp(I.Relative(operand opnd-loc), labs)] + | I.JCC{cond, opnd} => + [I.jcc{cond=cond, opnd=I.Relative(operand opnd-loc)}] + | opnd => [I.INSTR opnd]) + | expand _ = error "expand" +end + diff --git a/MLRISC/x86/c-calls/ia32-svid.sml b/MLRISC/x86/c-calls/ia32-svid.sml new file mode 100644 index 0000000..a9bb61c --- /dev/null +++ b/MLRISC/x86/c-calls/ia32-svid.sml @@ -0,0 +1,471 @@ +(* ia32-svid.sml + * + * COPYRIGHT (c) 2000 Bell Labs, Lucent Technologies + * + * C function calls for the IA32 using the System V ABI + * + * Register conventions: + * + * %eax return value (caller save) + * %ebx global offset for PIC (callee save) + * %ecx scratch (caller save) + * %edx extra return/scratch (caller save) + * %ebp optional frame pointer (callee save) + * %esp stack pointer (callee save) + * %esi locals (callee save) + * %edi locals (callee save) + * + * %st(0) top of FP stack; FP return value + * %st(1..7) FP stack; must be empty on entry and return + * + * Calling convention: + * + * Return result: + * + Integer and pointer results are returned in %eax. Small + * integer results are not promoted. + * + 64-bit integers (long long) returned in %eax/%edx + * + Floating point results are returned in %st(0) (all types). + * + Struct results are returned in space provided by the caller. + * The address of this space is passed to the callee as an + * implicit 0th argument, and on return %eax contains this + * address. The called function is responsible for removing + * this argument from the stack using a "ret $4" instruction. + * NOTE: the MacOS X ABI returns small structs in %eax/%edx. + * + * Function arguments: + * + Arguments are pushed on the stack right to left. + * + Integral and pointer arguments take one word on the stack. + * + float arguments take one word on the stack. + * + double arguments take two words on the stack. The i386 ABI does + * not require double word alignment for these arguments. + * + long double arguments take three words on the stack. + * + struct arguments are padded out to word length. + * + * Questions: + * - what about stack frame alignment? + *) + +functor IA32SVID_CCalls ( + structure T : MLTREE + val ix : (T.stm,T.rexp,T.fexp,T.ccexp) X86InstrExt.sext -> T.sext + (* Note that the fast_loating_point flag must match the one passed + * to the code generator module. + *) + val fast_floating_point : bool ref + (* alignment requirement for stack frames; should be a power of two + * that is at least four. + *) + val frameAlign : int + (* Should small structs/unions be returned in %eax/%edx? *) + val returnSmallStructsInRegs : bool + ) : C_CALLS = struct + + structure T = T + structure Ty = CTypes + structure C = X86Cells + structure IX = X86InstrExt + + fun error msg = MLRiscErrorMsg.error ("IA32SVID_CCalls", msg) + + datatype c_arg + = ARG of T.rexp + | FARG of T.fexp + | ARGS of c_arg list + + val mem = T.Region.memory + val stack = T.Region.stack + + (* MLRISC types *) + val wordTy = 32 + val fltTy = 32 + val dblTy = 64 + val xdblTy = 80 + + (* shorts and chars are promoted to 32-bits *) + val naturalIntSz = wordTy + + val paramAreaOffset = 0 (* stack offset to param area *) + + (* This annotation is used to indicate that a call returns a fp value + * in %st(0) + *) + val fpReturnValueInST0 = #create MLRiscAnnotations.RETURN_ARG C.ST0 + + val sp = C.esp + val spR = T.REG(wordTy, sp) + + fun fpr(sz,f) = T.FPR(T.FREG(sz, f)) + fun gpr(sz,r) = T.GPR(T.REG(sz, r)) + val eax = C.eax + val st0 = C.ST(0) + + (* the C calling convention requires that the FP stack be empty on function + * entry. We add the fpStk list to the defs when the fast_floating_point flag + * is set. + *) + val fpStk = List.tabulate(8, fn i => fpr(xdblTy, C.ST i)) + + (* note that the caller saves includes the result register (%eax) *) + val callerSaves = [gpr(wordTy, eax), gpr(wordTy, C.ecx), gpr(wordTy, C.edx)] + + (* C callee-save registers *) + val calleeSaveRegs = [C.ebx, C.esi, C.edi] (* C callee-save registers *) + val calleeSaveFRegs = [] (* C callee-save floating-point registers *) + + (* align the address to the given alignment, which must be a power of 2 *) + fun alignAddr (addr, align) = let + val mask = Word.fromInt(align-1) + in + Word.toIntX(Word.andb(Word.fromInt addr + mask, Word.notb mask)) + end + + fun align4 addr = Word.toIntX(Word.andb(Word.fromInt addr + 0w3, Word.notb 0w3)) + + (* size and natural alignment for integer types. *) + fun sizeOfInt Ty.I_char = {ty = 8, sz = 1, align = 1} + | sizeOfInt Ty.I_short = {ty = 16, sz = 2, align = 2} + | sizeOfInt Ty.I_int = {ty = 32, sz = 4, align = 4} + | sizeOfInt Ty.I_long = {ty = 32, sz = 4, align = 4} + | sizeOfInt Ty.I_long_long = {ty = 64, sz = 8, align = 4} + + (* sizes of other C types *) + val sizeOfPtr = {ty = 32, sz = 4, align = 4} + + (* compute the size and alignment information for a struct; tys is the list + * of member types. + * The total size is padded to agree with the struct's alignment. + *) + fun sizeOfStruct tys = let + fun ssz ([], maxAlign, offset) = + {sz = alignAddr(offset, maxAlign), align = maxAlign} + | ssz (ty::tys, maxAlign, offset) = let + val {sz, align} = sizeOfTy ty + val offset = alignAddr(offset, align) + in + ssz (tys, Int.max(maxAlign, align), offset+sz) + end + in + ssz (tys, 1, 0) + end + + (* the size alignment of a union type is the maximum of the sizes and alignments of the + * members. The final size is padded to agree with the alignment. + *) + and sizeOfUnion tys = let + fun usz ([], maxAlign, maxSz) = + {sz = alignAddr(maxSz, maxAlign), align = maxAlign} + | usz (ty::tys, maxAlign, maxSz) = let + val {sz, align} = sizeOfTy ty + in + usz (tys, Int.max(maxAlign, align), Int.max(sz, maxSz)) + end + in + usz (tys, 1, 0) + end + + and sizeOfTy Ty.C_void = error "unexpected void argument type" + | sizeOfTy Ty.C_float = {sz = 4, align = 4} + | sizeOfTy Ty.C_double = {sz = 8, align = 4} + | sizeOfTy Ty.C_long_double = {sz = 12, align = 4} + | sizeOfTy (Ty.C_unsigned isz) = let + val {sz, align, ...} = sizeOfInt isz + in + {sz = sz, align = align} + end + | sizeOfTy (Ty.C_signed isz) = let + val {sz, align, ...} = sizeOfInt isz + in + {sz = sz, align = align} + end + | sizeOfTy Ty.C_PTR = {sz = 4, align = 4} + | sizeOfTy (Ty.C_ARRAY(ty, n)) = let + val {sz, align} = sizeOfTy ty + in + {sz = n*sz, align = align} + end + | sizeOfTy (Ty.C_STRUCT tys) = sizeOfStruct tys + | sizeOfTy (Ty.C_UNION tys) = sizeOfUnion tys + + (* the location of arguments/parameters; offsets are given with respect to the + * low end of the parameter area (see paramAreaOffset above). + *) + datatype arg_location + = Reg of T.ty * T.reg * T.I.machine_int option + (* integer/pointer argument in register *) + | FReg of T.fty * T.reg * T.I.machine_int option + (* floating-point argument in register *) + | Stk of T.ty * T.I.machine_int (* integer/pointer argument in parameter area *) + | FStk of T.fty * T.I.machine_int (* floating-point argument in parameter area *) + | Args of arg_location list + + fun intResult iTy = (case #ty(sizeOfInt iTy) + of 64 => raise Fail "register pair result" + | ty => (SOME(Reg(ty, eax, NONE)), NONE, 0) + (* end case *)) + + fun layout {conv, retTy, paramTys} = let + (* get the location of the result (resLoc) and the offset of the first + * parameter/argument. If the result is a struct or union, then we also + * compute the size and alignment of the result type (structRetLoc). + *) + val (resLoc, structRetLoc, argOffset) = (case retTy + of Ty.C_void => (NONE, NONE, 0) + | Ty.C_float => (SOME(FReg(fltTy, st0, NONE)), NONE, 0) + | Ty.C_double => (SOME(FReg(dblTy, st0, NONE)), NONE, 0) + | Ty.C_long_double => (SOME(FReg(xdblTy, st0, NONE)), NONE, 0) + | Ty.C_unsigned iTy => intResult iTy + | Ty.C_signed iTy => intResult iTy + | Ty.C_PTR => (SOME(Reg(wordTy, eax, NONE)), NONE, 0) + | Ty.C_ARRAY _ => error "array return type" + | Ty.C_STRUCT tys => let + val {sz, align} = sizeOfStruct tys + in + if (sz > 8) orelse (not returnSmallStructsInRegs) + then (SOME(Reg(wordTy, eax, NONE)), SOME{szb=sz, align=align}, 4) + else raise Fail "small struct return not implemented yet" + end + | Ty.C_UNION tys => let + val {sz, align} = sizeOfUnion tys + in + if (sz > 8) orelse (not returnSmallStructsInRegs) + then (SOME(Reg(wordTy, eax, NONE)), SOME{szb=sz, align=align}, 4) + else raise Fail "small union return not implemented yet" + end + (* end case *)) + fun assign ([], offset, locs) = (List.rev locs, align4 offset) + | assign (paramTy::params, offset, locs) = let + fun next {ty, align, sz} = let + val offset = alignAddr (offset, align) + in + assign (params, offset+sz, Stk(ty, IntInf.fromInt offset)::locs) + end + fun nextFlt (ty, szb) = let + val offset = alignAddr (offset, 4) + in + assign (params, offset+szb, FStk(ty, IntInf.fromInt offset)::locs) + end + fun assignMem {sz, align} = let + fun f (nb, offset, locs') = + if (nb >= 4) + then f(nb-4, offset+4, Stk(wordTy, IntInf.fromInt offset)::locs') + else if (nb >= 2) + then f(nb-2, offset+2, Stk(16, IntInf.fromInt offset)::locs') + else if (nb = 1) + then f(nb, offset+1, Stk(8, IntInf.fromInt offset)::locs') + else assign(params, align4 offset, Args(List.rev locs')::locs) + in + f (sz, offset, []) + end + in + case paramTy + of Ty.C_void => error "void argument type" + | Ty.C_float => nextFlt (fltTy, 4) + | Ty.C_double => nextFlt (dblTy, 8) + | Ty.C_long_double => nextFlt (xdblTy, 12) + | Ty.C_unsigned iTy => next (sizeOfInt iTy) + | Ty.C_signed iTy => next (sizeOfInt iTy) + | Ty.C_PTR => next sizeOfPtr + | Ty.C_ARRAY _ => next sizeOfPtr + | Ty.C_STRUCT tys => assignMem(sizeOfStruct tys) + | Ty.C_UNION tys => assignMem(sizeOfUnion tys) + (* end case *) + end + val (argLocs, argSz) = assign (paramTys, argOffset, []) + val argMem = {szb = alignAddr (argSz, frameAlign), align = frameAlign} + in { + argLocs = argLocs, argMem = argMem, + resLoc = resLoc, structRetLoc = structRetLoc + } end + + (* List of registers defined by a C Call with the given return type; this list + * is the result registers plus the caller-save registers. + *) + fun definedRegs resTy = if !fast_floating_point + then let + val defs = callerSaves @ fpStk + in + case resTy + of (Ty.C_unsigned(Ty.I_long_long)) => gpr(wordTy, C.edx) :: defs + | (Ty.C_signed(Ty.I_long_long)) => gpr(wordTy, C.edx) :: defs + | _ => defs + (* end case *) + end + else (case resTy + of (Ty.C_float) => fpr(fltTy, st0) :: callerSaves + | (Ty.C_double) => fpr(dblTy, st0) :: callerSaves + | (Ty.C_long_double) => fpr(xdblTy, st0) :: callerSaves + | (Ty.C_unsigned(Ty.I_long_long)) => gpr(wordTy, C.edx) :: callerSaves + | (Ty.C_signed(Ty.I_long_long)) => gpr(wordTy, C.edx) :: callerSaves + | _ => callerSaves + (* end case *)) + + fun fstp (32, f) = T.EXT(ix(IX.FSTPS(f))) + | fstp (64, f) = T.EXT(ix(IX.FSTPL(f))) + | fstp (80, f) = T.EXT(ix(IX.FSTPT(f))) + | fstp (sz, f) = error ("fstp(" ^ Int.toString sz ^ ",_)") + + fun genCall { + name, proto, paramAlloc, structRet, saveRestoreDedicated, callComment, args + } = let + val {argLocs, argMem, resLoc, structRetLoc} = layout proto + (* instruction to allocate space for arguments *) + val argAlloc = if ((#szb argMem = 0) orelse paramAlloc argMem) + then [] + else [T.MV(wordTy, sp, T.SUB(wordTy, spR, T.LI(IntInf.fromInt(#szb argMem))))] + (* for functions that return a struct/union, pass the location as an + * implicit first argument. Because the callee removes this implicit + * argument from the stack, we must also keep track of the size of the + * explicit arguments. + *) + val (args, argLocs, explicitArgSzB) = (case structRetLoc + of SOME pos => + (ARG(structRet pos)::args, Stk(wordTy, 0)::argLocs, #szb argMem - 4) + | NONE => (args, argLocs, #szb argMem) + (* end case *)) + (* generate instructions to copy arguments into argument area + * using %esp to address the argument area. + *) + val copyArgs = let + fun offSP 0 = spR + | offSP offset = T.ADD(wordTy, spR, T.LI offset) + fun f ([], [], stms) = List.rev stms + | f (arg::args, loc::locs, stms) = let + val stms = (case (arg, loc) + of (ARG(rexp as T.REG _), Stk(mty, offset)) => + T.STORE(mty, offSP offset, rexp, stack) + :: stms + | (ARG rexp, Stk(mty, offset)) => let + val tmp = C.newReg() + in + T.STORE(wordTy, offSP offset, T.REG(wordTy, tmp), stack) + :: T.MV(wordTy, tmp, rexp) + :: stms + end + | (ARG rexp, Args memLocs) => let + (* addrR is used to address the source of the memory object + * being passed to the memLocs. loadAddr is the code to + * initialize addrR. + *) + val (loadAddr, addrR) = (case rexp + of T.REG _ => ([], rexp) + | _ => let + val r = C.newReg() + in + ([T.MV(wordTy, r, rexp)], T.REG(wordTy, r)) + end + (* end case *)) + fun addr 0 = addrR + | addr offset = T.ADD(wordTy, addrR, T.LI offset) + (* stack offset of first destination word *) + val baseOffset = (case memLocs + of Stk(ty, offset)::_ => offset + | _ => error "bogus Args" + (* end case *)) + fun copy ([], stms) = stms + | copy (Stk(ty, offset)::locs, stms) = let + val tmp = C.newReg() + val stms = + T.STORE(ty, offSP offset, T.REG(ty, tmp), stack) + :: T.MV(ty, tmp, T.LOAD(ty, addr(offset - baseOffset), mem)) + :: stms + in + copy (locs, stms) + end + | copy _ = error "bogus memory location" + in + copy (memLocs, loadAddr @ stms) + end + | (FARG(fexp as T.FREG _), FStk(ty, offset)) => + T.FSTORE(ty, offSP offset, fexp, stack) :: stms + | (FARG fexp, FStk(ty, offset)) => let + val tmp = C.newFreg() + in + T.FSTORE(ty, offSP offset, T.FREG(ty, tmp), stack) + :: T.FMV(ty, tmp, fexp) + :: stms + end + | (ARGS _, _) => raise Fail "ARGS obsolete" + | _ => error "impossible location" + (* end case *)) + in + f (args, locs, stms) + end + | f _ = error "argument arity error" + in + f (args, argLocs, []) + end + (* the SVID specifies that the caller pops arguments, but the callee + * pops the arguments in a stdcall on Windows. I'm not sure what other + * differences there might be between the SVID and Windows ABIs. (JHR) + *) + val calleePops = (case #conv proto + of (""|"ccall") => false + | "stdcall" => true + | conv => error (concat [ + "unknown calling convention \"", String.toString conv, "\"" + ]) + (* end case *)) + val defs = definedRegs(#retTy proto) + val { save, restore } = saveRestoreDedicated defs + val callStm = T.CALL{ + funct=name, targets=[], defs=defs, uses=[], + region = mem, + pops = if calleePops + then Int32.fromInt(#szb argMem) + else Int32.fromInt(#szb argMem - explicitArgSzB) + } + val callStm = (case callComment + of NONE => callStm + | SOME c => T.ANNOTATION (callStm, #create MLRiscAnnotations.COMMENT c) + (* end case *)) + (* If return type is floating point then add an annotation RETURN_ARG + * This is currently a hack. Eventually MLTREE *should* support + * return arguments for CALLs. + * --- Allen + *) + val callStm = if !fast_floating_point + andalso ((#retTy proto = Ty.C_float) + orelse (#retTy proto = Ty.C_double) + orelse (#retTy proto = Ty.C_long_double)) + then T.ANNOTATION(callStm, fpReturnValueInST0) + else callStm + (* code to pop the arguments from the stack *) + val popArgs = if calleePops orelse (explicitArgSzB = 0) + then [] + else [T.MV(wordTy, sp, T.ADD(wordTy, spR, T.LI(IntInf.fromInt explicitArgSzB)))] + (* code to copy the result into fresh pseudo registers *) + val (resultRegs, copyResult) = (case resLoc + of NONE => ([], []) + | SOME(Reg(ty, r, _)) => let + val resReg = C.newReg() + in +(* FIXME: the ABI specifies that the high bits of %eax are undefined when the return + * type is an 8 or 16-bit integer type (see https://github.com/smlnj/legacy/issues/272). + * We should be zero/sign extending the result in those cases. This means that we + * also need to know if the result is signed so we may need a different type for + * result locations. + *) + ([T.GPR(T.REG(ty, resReg))], [T.COPY(ty, [resReg], [r])]) + end + | SOME(FReg(ty, r, _)) => let + val resReg = C.newFreg() + val res = [T.FPR(T.FREG(ty, resReg))] + in + (* If we are using fast floating point mode then do NOT + * generate FSTP. + * --- Allen + *) + if !fast_floating_point + then (res, [T.FCOPY(ty, [resReg], [r])]) + else (res, [fstp(ty, T.FREG(ty, resReg))]) + end + | _ => error "bogus result location" + (* end case *)) + (* assemble the call sequence *) + val callSeq = argAlloc @ copyArgs @ save @ [callStm] @ restore @ popArgs @ copyResult + in + {callseq=callSeq, result=resultRegs} + end + + end + diff --git a/MLRISC/x86/emit/x86Asm.sml b/MLRISC/x86/emit/x86Asm.sml new file mode 100644 index 0000000..1e46642 --- /dev/null +++ b/MLRISC/x86/emit/x86Asm.sml @@ -0,0 +1,792 @@ +(* + * WARNING: This file was automatically generated by MDLGen (v3.1) + * from the machine description file "x86/x86.mdl". + * DO NOT EDIT this file directly + *) + + +functor X86AsmEmitter(structure S : INSTRUCTION_STREAM + structure Instr : X86INSTR + where T = S.P.T + structure Shuffle : X86SHUFFLE + where I = Instr + structure MLTreeEval : MLTREE_EVAL + where T = Instr.T + +(*#line 512.7 "x86/x86.mdl"*) + structure MemRegs : MEMORY_REGISTERS where I=Instr + +(*#line 513.7 "x86/x86.mdl"*) + val memRegBase : CellsBasis.cell option + ) : INSTRUCTION_EMITTER = +struct + structure I = Instr + structure C = I.C + structure T = I.T + structure S = S + structure P = S.P + structure Constant = I.Constant + + open AsmFlags + + fun error msg = MLRiscErrorMsg.error("X86AsmEmitter",msg) + + fun makeStream formatAnnotations = + let val stream = !AsmStream.asmOutStream + fun emit' s = TextIO.output(stream,s) + val newline = ref true + val tabs = ref 0 + fun tabbing 0 = () + | tabbing n = (emit' "\t"; tabbing(n-1)) + fun emit s = (tabbing(!tabs); tabs := 0; newline := false; emit' s) + fun nl() = (tabs := 0; if !newline then () else (newline := true; emit' "\n")) + fun comma() = emit "," + fun tab() = tabs := 1 + fun indent() = tabs := 2 + fun ms n = let val s = Int.toString n + in if n<0 then "-"^String.substring(s,1,size s-1) + else s + end + fun emit_label lab = emit(P.Client.AsmPseudoOps.lexpToString(T.LABEL lab)) + fun emit_labexp le = emit(P.Client.AsmPseudoOps.lexpToString (T.LABEXP le)) + fun emit_const c = emit(Constant.toString c) + fun emit_int i = emit(ms i) + fun paren f = (emit "("; f(); emit ")") + fun defineLabel lab = emit(P.Client.AsmPseudoOps.defineLabel lab^"\n") + fun entryLabel lab = defineLabel lab + fun comment msg = (tab(); emit("/* " ^ msg ^ " */"); nl()) + fun annotation a = comment(Annotations.toString a) + fun getAnnotations() = error "getAnnotations" + fun doNothing _ = () + fun fail _ = raise Fail "AsmEmitter" + fun emit_region mem = comment(I.Region.toString mem) + val emit_region = + if !show_region then emit_region else doNothing + fun pseudoOp pOp = (emit(P.toString pOp); emit "\n") + fun init size = (comment("Code Size = " ^ ms size); nl()) + val emitCellInfo = AsmFormatUtil.reginfo + (emit,formatAnnotations) + fun emitCell r = (emit(CellsBasis.toString r); emitCellInfo r) + fun emit_cellset(title,cellset) = + (nl(); comment(title^CellsBasis.CellSet.toString cellset)) + val emit_cellset = + if !show_cellset then emit_cellset else doNothing + fun emit_defs cellset = emit_cellset("defs: ",cellset) + fun emit_uses cellset = emit_cellset("uses: ",cellset) + val emit_cutsTo = + if !show_cutsTo then AsmFormatUtil.emit_cutsTo emit + else doNothing + fun emitter instr = + let + fun asm_cond (I.EQ) = "e" + | asm_cond (I.NE) = "ne" + | asm_cond (I.LT) = "l" + | asm_cond (I.LE) = "le" + | asm_cond (I.GT) = "g" + | asm_cond (I.GE) = "ge" + | asm_cond (I.B) = "b" + | asm_cond (I.BE) = "be" + | asm_cond (I.A) = "a" + | asm_cond (I.AE) = "ae" + | asm_cond (I.C) = "c" + | asm_cond (I.NC) = "nc" + | asm_cond (I.P) = "p" + | asm_cond (I.NP) = "np" + | asm_cond (I.O) = "o" + | asm_cond (I.NO) = "no" + and emit_cond x = emit (asm_cond x) + and asm_binaryOp (I.ADDL) = "addl" + | asm_binaryOp (I.SUBL) = "subl" + | asm_binaryOp (I.ANDL) = "andl" + | asm_binaryOp (I.ORL) = "orl" + | asm_binaryOp (I.XORL) = "xorl" + | asm_binaryOp (I.SHLL) = "shll" + | asm_binaryOp (I.SARL) = "sarl" + | asm_binaryOp (I.SHRL) = "shrl" + | asm_binaryOp (I.IMULL) = "imull" + | asm_binaryOp (I.ADCL) = "adcl" + | asm_binaryOp (I.SBBL) = "sbbl" + | asm_binaryOp (I.ADDW) = "addw" + | asm_binaryOp (I.SUBW) = "subw" + | asm_binaryOp (I.ANDW) = "andw" + | asm_binaryOp (I.ORW) = "orw" + | asm_binaryOp (I.XORW) = "xorw" + | asm_binaryOp (I.SHLW) = "shlw" + | asm_binaryOp (I.SARW) = "sarw" + | asm_binaryOp (I.SHRW) = "shrw" + | asm_binaryOp (I.IMULW) = "imulw" + | asm_binaryOp (I.ADDB) = "addb" + | asm_binaryOp (I.SUBB) = "subb" + | asm_binaryOp (I.ANDB) = "andb" + | asm_binaryOp (I.ORB) = "orb" + | asm_binaryOp (I.XORB) = "xorb" + | asm_binaryOp (I.SHLB) = "shlb" + | asm_binaryOp (I.SARB) = "sarb" + | asm_binaryOp (I.SHRB) = "shrb" + | asm_binaryOp (I.IMULB) = "imulb" + | asm_binaryOp (I.BTSW) = "btsw" + | asm_binaryOp (I.BTCW) = "btcw" + | asm_binaryOp (I.BTRW) = "btrw" + | asm_binaryOp (I.BTSL) = "btsl" + | asm_binaryOp (I.BTCL) = "btcl" + | asm_binaryOp (I.BTRL) = "btrl" + | asm_binaryOp (I.ROLW) = "rolw" + | asm_binaryOp (I.RORW) = "rorw" + | asm_binaryOp (I.ROLL) = "roll" + | asm_binaryOp (I.RORL) = "rorl" + | asm_binaryOp (I.XCHGB) = "xchgb" + | asm_binaryOp (I.XCHGW) = "xchgw" + | asm_binaryOp (I.XCHGL) = "xchgl" + | asm_binaryOp (I.LOCK_ADCW) = "lock\n\tadcw" + | asm_binaryOp (I.LOCK_ADCL) = "lock\n\tadcl" + | asm_binaryOp (I.LOCK_ADDW) = "lock\n\taddw" + | asm_binaryOp (I.LOCK_ADDL) = "lock\n\taddl" + | asm_binaryOp (I.LOCK_ANDW) = "lock\n\tandw" + | asm_binaryOp (I.LOCK_ANDL) = "lock\n\tandl" + | asm_binaryOp (I.LOCK_BTSW) = "lock\n\tbtsw" + | asm_binaryOp (I.LOCK_BTSL) = "lock\n\tbtsl" + | asm_binaryOp (I.LOCK_BTRW) = "lock\n\tbtrw" + | asm_binaryOp (I.LOCK_BTRL) = "lock\n\tbtrl" + | asm_binaryOp (I.LOCK_BTCW) = "lock\n\tbtcw" + | asm_binaryOp (I.LOCK_BTCL) = "lock\n\tbtcl" + | asm_binaryOp (I.LOCK_ORW) = "lock\n\torw" + | asm_binaryOp (I.LOCK_ORL) = "lock\n\torl" + | asm_binaryOp (I.LOCK_SBBW) = "lock\n\tsbbw" + | asm_binaryOp (I.LOCK_SBBL) = "lock\n\tsbbl" + | asm_binaryOp (I.LOCK_SUBW) = "lock\n\tsubw" + | asm_binaryOp (I.LOCK_SUBL) = "lock\n\tsubl" + | asm_binaryOp (I.LOCK_XORW) = "lock\n\txorw" + | asm_binaryOp (I.LOCK_XORL) = "lock\n\txorl" + | asm_binaryOp (I.LOCK_XADDB) = "lock\n\txaddb" + | asm_binaryOp (I.LOCK_XADDW) = "lock\n\txaddw" + | asm_binaryOp (I.LOCK_XADDL) = "lock\n\txaddl" + and emit_binaryOp x = emit (asm_binaryOp x) + and asm_multDivOp (I.IMULL1) = "imull" + | asm_multDivOp (I.MULL1) = "mull" + | asm_multDivOp (I.IDIVL1) = "idivl" + | asm_multDivOp (I.DIVL1) = "divl" + and emit_multDivOp x = emit (asm_multDivOp x) + and asm_unaryOp (I.DECL) = "decl" + | asm_unaryOp (I.INCL) = "incl" + | asm_unaryOp (I.NEGL) = "negl" + | asm_unaryOp (I.NOTL) = "notl" + | asm_unaryOp (I.DECW) = "decw" + | asm_unaryOp (I.INCW) = "incw" + | asm_unaryOp (I.NEGW) = "negw" + | asm_unaryOp (I.NOTW) = "notw" + | asm_unaryOp (I.DECB) = "decb" + | asm_unaryOp (I.INCB) = "incb" + | asm_unaryOp (I.NEGB) = "negb" + | asm_unaryOp (I.NOTB) = "notb" + | asm_unaryOp (I.LOCK_DECL) = "lock\n\tdecl" + | asm_unaryOp (I.LOCK_INCL) = "lock\n\tincl" + | asm_unaryOp (I.LOCK_NEGL) = "lock\n\tnegl" + | asm_unaryOp (I.LOCK_NOTL) = "lock\n\tnotl" + and emit_unaryOp x = emit (asm_unaryOp x) + and asm_shiftOp (I.SHLDL) = "shldl" + | asm_shiftOp (I.SHRDL) = "shrdl" + and emit_shiftOp x = emit (asm_shiftOp x) + and asm_bitOp (I.BTW) = "btw" + | asm_bitOp (I.BTL) = "btl" + | asm_bitOp (I.LOCK_BTW) = "lock\n\tbtw" + | asm_bitOp (I.LOCK_BTL) = "lock\n\tbtl" + and emit_bitOp x = emit (asm_bitOp x) + and asm_move (I.MOVL) = "movl" + | asm_move (I.MOVB) = "movb" + | asm_move (I.MOVW) = "movw" + | asm_move (I.MOVSWL) = "movswl" + | asm_move (I.MOVZWL) = "movzwl" + | asm_move (I.MOVSBL) = "movsbl" + | asm_move (I.MOVZBL) = "movzbl" + and emit_move x = emit (asm_move x) + and asm_fbinOp (I.FADDP) = "faddp" + | asm_fbinOp (I.FADDS) = "fadds" + | asm_fbinOp (I.FMULP) = "fmulp" + | asm_fbinOp (I.FMULS) = "fmuls" + | asm_fbinOp (I.FCOMS) = "fcoms" + | asm_fbinOp (I.FCOMPS) = "fcomps" + | asm_fbinOp (I.FSUBP) = "fsubp" + | asm_fbinOp (I.FSUBS) = "fsubs" + | asm_fbinOp (I.FSUBRP) = "fsubrp" + | asm_fbinOp (I.FSUBRS) = "fsubrs" + | asm_fbinOp (I.FDIVP) = "fdivp" + | asm_fbinOp (I.FDIVS) = "fdivs" + | asm_fbinOp (I.FDIVRP) = "fdivrp" + | asm_fbinOp (I.FDIVRS) = "fdivrs" + | asm_fbinOp (I.FADDL) = "faddl" + | asm_fbinOp (I.FMULL) = "fmull" + | asm_fbinOp (I.FCOML) = "fcoml" + | asm_fbinOp (I.FCOMPL) = "fcompl" + | asm_fbinOp (I.FSUBL) = "fsubl" + | asm_fbinOp (I.FSUBRL) = "fsubrl" + | asm_fbinOp (I.FDIVL) = "fdivl" + | asm_fbinOp (I.FDIVRL) = "fdivrl" + and emit_fbinOp x = emit (asm_fbinOp x) + and asm_fibinOp (I.FIADDS) = "fiadds" + | asm_fibinOp (I.FIMULS) = "fimuls" + | asm_fibinOp (I.FICOMS) = "ficoms" + | asm_fibinOp (I.FICOMPS) = "ficomps" + | asm_fibinOp (I.FISUBS) = "fisubs" + | asm_fibinOp (I.FISUBRS) = "fisubrs" + | asm_fibinOp (I.FIDIVS) = "fidivs" + | asm_fibinOp (I.FIDIVRS) = "fidivrs" + | asm_fibinOp (I.FIADDL) = "fiaddl" + | asm_fibinOp (I.FIMULL) = "fimull" + | asm_fibinOp (I.FICOML) = "ficoml" + | asm_fibinOp (I.FICOMPL) = "ficompl" + | asm_fibinOp (I.FISUBL) = "fisubl" + | asm_fibinOp (I.FISUBRL) = "fisubrl" + | asm_fibinOp (I.FIDIVL) = "fidivl" + | asm_fibinOp (I.FIDIVRL) = "fidivrl" + and emit_fibinOp x = emit (asm_fibinOp x) + and asm_funOp (I.FCHS) = "fchs" + | asm_funOp (I.FABS) = "fabs" + | asm_funOp (I.FTST) = "ftst" + | asm_funOp (I.FXAM) = "fxam" + | asm_funOp (I.FPTAN) = "fptan" + | asm_funOp (I.FPATAN) = "fpatan" + | asm_funOp (I.FXTRACT) = "fxtract" + | asm_funOp (I.FPREM1) = "fprem1" + | asm_funOp (I.FDECSTP) = "fdecstp" + | asm_funOp (I.FINCSTP) = "fincstp" + | asm_funOp (I.FPREM) = "fprem" + | asm_funOp (I.FYL2XP1) = "fyl2xp1" + | asm_funOp (I.FSQRT) = "fsqrt" + | asm_funOp (I.FSINCOS) = "fsincos" + | asm_funOp (I.FRNDINT) = "frndint" + | asm_funOp (I.FSCALE) = "fscale" + | asm_funOp (I.FSIN) = "fsin" + | asm_funOp (I.FCOS) = "fcos" + and emit_funOp x = emit (asm_funOp x) + and asm_fenvOp (I.FLDENV) = "fldenv" + | asm_fenvOp (I.FNLDENV) = "fnldenv" + | asm_fenvOp (I.FSTENV) = "fstenv" + | asm_fenvOp (I.FNSTENV) = "fnstenv" + and emit_fenvOp x = emit (asm_fenvOp x) + and asm_fsize (I.FP32) = "s" + | asm_fsize (I.FP64) = "l" + | asm_fsize (I.FP80) = "t" + and emit_fsize x = emit (asm_fsize x) + and asm_isize (I.I8) = "8" + | asm_isize (I.I16) = "16" + | asm_isize (I.I32) = "32" + | asm_isize (I.I64) = "64" + and emit_isize x = emit (asm_isize x) + +(*#line 515.6 "x86/x86.mdl"*) + fun memReg r = MemRegs.memReg {reg=r, base=Option.valOf memRegBase} + +(*#line 516.6 "x86/x86.mdl"*) + fun emitInt32 i = + let +(*#line 517.10 "x86/x86.mdl"*) + val s = Int32.toString i + +(*#line 518.10 "x86/x86.mdl"*) + val s = (if (i >= 0) + then s + else ("-" ^ (String.substring (s, 1, (size s) - 1)))) + in emit s + end + +(*#line 521.6 "x86/x86.mdl"*) + val {low=SToffset, ...} = C.cellRange CellsBasis.FP + +(*#line 523.6 "x86/x86.mdl"*) + fun emitScale 0 = emit "1" + | emitScale 1 = emit "2" + | emitScale 2 = emit "4" + | emitScale 3 = emit "8" + | emitScale _ = error "emitScale" + and eImmed (I.Immed i) = emitInt32 i + | eImmed (I.ImmedLabel lexp) = emit_labexp lexp + | eImmed _ = error "eImmed" + and emit_operand opn = + (case opn of + I.Immed i => + ( emit "$"; + emitInt32 i ) + | I.ImmedLabel lexp => + ( emit "$"; + emit_labexp lexp ) + | I.LabelEA le => emit_labexp le + | I.Relative _ => error "emit_operand" + | I.Direct r => emitCell r + | I.MemReg r => emit_operand (memReg opn) + | I.ST f => emitCell f + | I.FPR f => + ( emit "%f"; + emit (Int.toString (CellsBasis.registerNum f))) + | I.FDirect f => emit_operand (memReg opn) + | I.Displace{base, disp, mem, ...} => + ( emit_disp disp; + emit "("; + emitCell base; + emit ")"; + emit_region mem ) + | I.Indexed{base, index, scale, disp, mem, ...} => + ( emit_disp disp; + emit "("; + + (case base of + NONE => () + | SOME base => emitCell base + ); + comma (); + emitCell index; + comma (); + emitScale scale; + emit ")"; + emit_region mem ) + ) + and emit_operand8 (I.Direct r) = emit (CellsBasis.toStringWithSize (r, 8)) + | emit_operand8 opn = emit_operand opn + and emit_disp (I.Immed 0) = () + | emit_disp (I.Immed i) = emitInt32 i + | emit_disp (I.ImmedLabel lexp) = emit_labexp lexp + | emit_disp _ = error "emit_disp" + +(*#line 568.7 "x86/x86.mdl"*) + fun stupidGas (I.ImmedLabel lexp) = emit_labexp lexp + | stupidGas opnd = + ( emit "*"; + emit_operand opnd ) + +(*#line 572.7 "x86/x86.mdl"*) + fun isMemOpnd (I.MemReg _) = true + | isMemOpnd (I.FDirect f) = true + | isMemOpnd (I.LabelEA _) = true + | isMemOpnd (I.Displace _) = true + | isMemOpnd (I.Indexed _) = true + | isMemOpnd _ = false + +(*#line 578.7 "x86/x86.mdl"*) + fun chop fbinOp = + let +(*#line 579.15 "x86/x86.mdl"*) + val n = size fbinOp + in + (case Char.toLower (String.sub (fbinOp, n - 1)) of + (#"s" | #"l") => String.substring (fbinOp, 0, n - 1) + | _ => fbinOp + ) + end + +(*#line 585.7 "x86/x86.mdl"*) + fun isST0 (I.ST r) = (CellsBasis.registerNum r) = 0 + | isST0 _ = false + +(*#line 589.7 "x86/x86.mdl"*) + fun emit_fbinaryOp (binOp, src, dst) = (if (isMemOpnd src) + then + ( emit_fbinOp binOp; + emit "\t"; + emit_operand src ) + else + ( emit (chop (asm_fbinOp binOp)); + emit "\t"; + + (case (isST0 src, isST0 dst) of + (_, true) => + ( emit_operand src; + emit ", %st" ) + | (true, _) => + ( emit "%st, "; + emit_operand dst ) + | _ => error "emit_fbinaryOp" + ))) + +(*#line 599.7 "x86/x86.mdl"*) + val emit_dst = emit_operand + +(*#line 600.7 "x86/x86.mdl"*) + val emit_src = emit_operand + +(*#line 601.7 "x86/x86.mdl"*) + val emit_opnd = emit_operand + +(*#line 602.7 "x86/x86.mdl"*) + val emit_opnd8 = emit_operand8 + +(*#line 603.7 "x86/x86.mdl"*) + val emit_rsrc = emit_operand + +(*#line 604.7 "x86/x86.mdl"*) + val emit_lsrc = emit_operand + +(*#line 605.7 "x86/x86.mdl"*) + val emit_addr = emit_operand + +(*#line 606.7 "x86/x86.mdl"*) + val emit_src1 = emit_operand + +(*#line 607.7 "x86/x86.mdl"*) + val emit_ea = emit_operand + +(*#line 608.7 "x86/x86.mdl"*) + val emit_count = emit_operand + fun emitInstr' instr = + (case instr of + I.NOP => emit "nop" + | I.JMP(operand, list) => + ( emit "jmp\t"; + stupidGas operand ) + | I.JCC{cond, opnd} => + ( emit "j"; + emit_cond cond; + emit "\t"; + stupidGas opnd ) + | I.CALL{opnd, defs, uses, return, cutsTo, mem, pops} => + ( emit "call\t"; + stupidGas opnd; + emit_region mem; + emit_defs defs; + emit_uses uses; + emit_cellset ("return", return); + emit_cutsTo cutsTo ) + | I.ENTER{src1, src2} => + ( emit "enter\t"; + emit_operand src1; + emit ", "; + emit_operand src2 ) + | I.LEAVE => emit "leave" + | I.RET option => + ( emit "ret"; + + (case option of + NONE => () + | SOME e => + ( emit "\t"; + emit_operand e ) + )) + | I.MOVE{mvOp, src, dst} => + ( emit_move mvOp; + emit "\t"; + emit_src src; + emit ", "; + emit_dst dst ) + | I.LEA{r32, addr} => + ( emit "leal\t"; + emit_addr addr; + emit ", "; + emitCell r32 ) + | I.CMPL{lsrc, rsrc} => + ( emit "cmpl\t"; + emit_rsrc rsrc; + emit ", "; + emit_lsrc lsrc ) + | I.CMPW{lsrc, rsrc} => + ( emit "cmpb\t"; + emit_rsrc rsrc; + emit ", "; + emit_lsrc lsrc ) + | I.CMPB{lsrc, rsrc} => + ( emit "cmpb\t"; + emit_rsrc rsrc; + emit ", "; + emit_lsrc lsrc ) + | I.TESTL{lsrc, rsrc} => + ( emit "testl\t"; + emit_rsrc rsrc; + emit ", "; + emit_lsrc lsrc ) + | I.TESTW{lsrc, rsrc} => + ( emit "testw\t"; + emit_rsrc rsrc; + emit ", "; + emit_lsrc lsrc ) + | I.TESTB{lsrc, rsrc} => + ( emit "testb\t"; + emit_rsrc rsrc; + emit ", "; + emit_lsrc lsrc ) + | I.BITOP{bitOp, lsrc, rsrc} => + ( emit_bitOp bitOp; + emit "\t"; + emit_rsrc rsrc; + emit ", "; + emit_lsrc lsrc ) + | I.BINARY{binOp, src, dst} => + (case (src, binOp) of + (I.Direct _, (I.SARL | I.SHRL | I.SHLL | I.SARW | I.SHRW | I.SHLW | I.SARB | I.SHRB | I.SHLB)) => + + ( emit_binaryOp binOp; + emit "\t%cl, "; + emit_dst dst ) + | _ => + ( emit_binaryOp binOp; + emit "\t"; + emit_src src; + emit ", "; + emit_dst dst ) + ) + | I.SHIFT{shiftOp, src, dst, count} => + (case count of + I.Direct ecx => + ( emit_shiftOp shiftOp; + emit "\t"; + emit_src src; + emit ", "; + emit_dst dst ) + | _ => + ( emit_shiftOp shiftOp; + emit "\t"; + emit_src src; + emit ", "; + emit_count count; + emit ", "; + emit_dst dst ) + ) + | I.CMPXCHG{lock, sz, src, dst} => + ( (if lock + then (emit "lock\n\t") + else ()); + emit "cmpxchg"; + + (case sz of + I.I8 => emit "b" + | I.I16 => emit "w" + | I.I32 => emit "l" + | I.I64 => error "CMPXCHG: I64" + ); + + ( emit "\t"; + emit_src src; + emit ", "; + emit_dst dst ) ) + | I.MULTDIV{multDivOp, src} => + ( emit_multDivOp multDivOp; + emit "\t"; + emit_src src ) + | I.MUL3{dst, src2, src1} => + ( emit "imull\t$"; + emitInt32 src2; + emit ", "; + emit_src1 src1; + emit ", "; + emitCell dst ) + | I.UNARY{unOp, opnd} => + ( emit_unaryOp unOp; + emit "\t"; + emit_opnd opnd ) + | I.SET{cond, opnd} => + ( emit "set"; + emit_cond cond; + emit "\t"; + emit_opnd8 opnd ) + | I.CMOV{cond, src, dst} => + ( emit "cmov"; + emit_cond cond; + emit "\t"; + emit_src src; + emit ", "; + emitCell dst ) + | I.PUSHL operand => + ( emit "pushl\t"; + emit_operand operand ) + | I.PUSHW operand => + ( emit "pushw\t"; + emit_operand operand ) + | I.PUSHB operand => + ( emit "pushb\t"; + emit_operand operand ) + | I.PUSHFD => emit "pushfd" + | I.POPFD => emit "popfd" + | I.POP operand => + ( emit "popl\t"; + emit_operand operand ) + | I.CDQ => emit "cdq" + | I.INTO => emit "into" + | I.FBINARY{binOp, src, dst} => emit_fbinaryOp (binOp, src, dst) + | I.FIBINARY{binOp, src} => + ( emit_fibinOp binOp; + emit "\t"; + emit_src src ) + | I.FUNARY funOp => emit_funOp funOp + | I.FUCOM operand => + ( emit "fucom\t"; + emit_operand operand ) + | I.FUCOMP operand => + ( emit "fucomp\t"; + emit_operand operand ) + | I.FUCOMPP => emit "fucompp" + | I.FCOMPP => emit "fcompp" + | I.FCOMI operand => + ( emit "fcomi\t"; + emit_operand operand; + emit ", %st" ) + | I.FCOMIP operand => + ( emit "fcomip\t"; + emit_operand operand; + emit ", %st" ) + | I.FUCOMI operand => + ( emit "fucomi\t"; + emit_operand operand; + emit ", %st" ) + | I.FUCOMIP operand => + ( emit "fucomip\t"; + emit_operand operand; + emit ", %st" ) + | I.FXCH{opnd} => + ( emit "fxch\t"; + emitCell opnd ) + | I.FSTPL operand => + (case operand of + I.ST _ => + ( emit "fstp\t"; + emit_operand operand ) + | _ => + ( emit "fstpl\t"; + emit_operand operand ) + ) + | I.FSTPS operand => + ( emit "fstps\t"; + emit_operand operand ) + | I.FSTPT operand => + ( emit "fstps\t"; + emit_operand operand ) + | I.FSTL operand => + (case operand of + I.ST _ => + ( emit "fst\t"; + emit_operand operand ) + | _ => + ( emit "fstl\t"; + emit_operand operand ) + ) + | I.FSTS operand => + ( emit "fsts\t"; + emit_operand operand ) + | I.FLD1 => emit "fld1" + | I.FLDL2E => emit "fldl2e" + | I.FLDL2T => emit "fldl2t" + | I.FLDLG2 => emit "fldlg2" + | I.FLDLN2 => emit "fldln2" + | I.FLDPI => emit "fldpi" + | I.FLDZ => emit "fldz" + | I.FLDL operand => + (case operand of + I.ST _ => + ( emit "fld\t"; + emit_operand operand ) + | _ => + ( emit "fldl\t"; + emit_operand operand ) + ) + | I.FLDS operand => + ( emit "flds\t"; + emit_operand operand ) + | I.FLDT operand => + ( emit "fldt\t"; + emit_operand operand ) + | I.FILD operand => + ( emit "fild\t"; + emit_operand operand ) + | I.FILDL operand => + ( emit "fildl\t"; + emit_operand operand ) + | I.FILDLL operand => + ( emit "fildll\t"; + emit_operand operand ) + | I.FNSTSW => emit "fnstsw" + | I.FENV{fenvOp, opnd} => + ( emit_fenvOp fenvOp; + emit "\t"; + emit_opnd opnd ) + | I.FMOVE{fsize, src, dst} => + ( emit "fmove"; + emit_fsize fsize; + emit "\t"; + emit_src src; + emit ", "; + emit_dst dst ) + | I.FILOAD{isize, ea, dst} => + ( emit "fiload"; + emit_isize isize; + emit "\t"; + emit_ea ea; + emit ", "; + emit_dst dst ) + | I.FBINOP{fsize, binOp, lsrc, rsrc, dst} => + ( emit_fbinOp binOp; + emit_fsize fsize; + emit "\t"; + emit_lsrc lsrc; + emit ", "; + emit_rsrc rsrc; + emit ", "; + emit_dst dst ) + | I.FIBINOP{isize, binOp, lsrc, rsrc, dst} => + ( emit_fibinOp binOp; + emit_isize isize; + emit "\t"; + emit_lsrc lsrc; + emit ", "; + emit_rsrc rsrc; + emit ", "; + emit_dst dst ) + | I.FUNOP{fsize, unOp, src, dst} => + ( emit_funOp unOp; + emit_fsize fsize; + emit "\t"; + emit_src src; + emit ", "; + emit_dst dst ) + | I.FCMP{i, fsize, lsrc, rsrc} => + ( (if i + then (emit "fcmpi") + else (emit "fcmp")); + + ( emit_fsize fsize; + emit "\t"; + emit_lsrc lsrc; + emit ", "; + emit_rsrc rsrc ) ) + | I.SAHF => emit "sahf" + | I.LFENCE => emit "lfence" + | I.MFENCE => emit "mfence" + | I.SFENCE => emit "sfence" + | I.PAUSE => emit "pause" + | I.LAHF => emit "lahf" + | I.SOURCE{} => emit "source" + | I.SINK{} => emit "sink" + | I.PHI{} => emit "phi" + ) + in tab(); emitInstr' instr; nl() + end (* emitter *) + and emitInstrIndented i = (indent(); emitInstr i; nl()) + and emitInstrs instrs = + app (if !indent_copies then emitInstrIndented + else emitInstr) instrs + + and emitInstr(I.ANNOTATION{i,a}) = + ( comment(Annotations.toString a); + nl(); + emitInstr i ) + | emitInstr(I.LIVE{regs, spilled}) = + comment("live= " ^ CellsBasis.CellSet.toString regs ^ + "spilled= " ^ CellsBasis.CellSet.toString spilled) + | emitInstr(I.KILL{regs, spilled}) = + comment("killed:: " ^ CellsBasis.CellSet.toString regs ^ + "spilled:: " ^ CellsBasis.CellSet.toString spilled) + | emitInstr(I.INSTR i) = emitter i + | emitInstr(I.COPY{k=CellsBasis.GP, sz, src, dst, tmp}) = + emitInstrs(Shuffle.shuffle{tmp=tmp, src=src, dst=dst}) + | emitInstr(I.COPY{k=CellsBasis.FP, sz, src, dst, tmp}) = + emitInstrs(Shuffle.shufflefp{tmp=tmp, src=src, dst=dst}) + | emitInstr _ = error "emitInstr" + + in S.STREAM{beginCluster=init, + pseudoOp=pseudoOp, + emit=emitInstr, + endCluster=fail, + defineLabel=defineLabel, + entryLabel=entryLabel, + comment=comment, + exitBlock=doNothing, + annotation=annotation, + getAnnotations=getAnnotations + } + end +end + diff --git a/MLRISC/x86/emit/x86MC.sml b/MLRISC/x86/emit/x86MC.sml new file mode 100644 index 0000000..6199649 --- /dev/null +++ b/MLRISC/x86/emit/x86MC.sml @@ -0,0 +1,298 @@ +(* + * WARNING: This file was automatically generated by MDLGen (v3.1) + * from the machine description file "x86/x86.mdl". + * DO NOT EDIT this file directly + *) + + +functor X86MCEmitter(structure Instr : X86INSTR + structure MLTreeEval : MLTREE_EVAL where T = Instr.T + structure Stream : INSTRUCTION_STREAM + structure CodeString : CODE_STRING + ) : INSTRUCTION_EMITTER = +struct + structure I = Instr + structure C = I.C + structure Constant = I.Constant + structure T = I.T + structure S = Stream + structure P = S.P + structure W = Word32 + + (* X86 is little endian *) + + fun error msg = MLRiscErrorMsg.error("X86MC",msg) + fun makeStream _ = + let infix && || << >> ~>> + val op << = W.<< + val op >> = W.>> + val op ~>> = W.~>> + val op || = W.orb + val op && = W.andb + val itow = W.fromInt + fun emit_bool false = 0w0 : W.word + | emit_bool true = 0w1 : W.word + val emit_int = itow + fun emit_word w = w + fun emit_label l = itow(Label.addrOf l) + fun emit_labexp le = itow(MLTreeEval.valueOf le) + fun emit_const c = itow(Constant.valueOf c) + val w32ToByte = Word8.fromLarge o Word32.toLarge + val loc = ref 0 + + (* emit a byte *) + fun eByte b = + let val i = !loc in loc := i + 1; CodeString.update(i,b) end + + (* emit the low order byte of a word *) + (* note: fromLargeWord strips the high order bits! *) + fun eByteW w = + let val i = !loc + in loc := i + 1; CodeString.update(i, w32ToByte w) end + + fun doNothing _ = () + fun fail _ = raise Fail "MCEmitter" + fun getAnnotations () = error "getAnnotations" + + fun pseudoOp pOp = P.emitValue{pOp=pOp, loc= !loc,emit=eByte} + + fun init n = (CodeString.init n; loc := 0) + + + fun eWord8 w = + let val b8 = w + in eByteW b8 + end + and eWord16 w = + let val b8 = w + val w = w >> 0wx8 + val b16 = w + in + ( eByteW b8; + eByteW b16 ) + end + and eWord32 w = + let val b8 = w + val w = w >> 0wx8 + val b16 = w + val w = w >> 0wx8 + val b24 = w + val w = w >> 0wx8 + val b32 = w + in + ( eByteW b8; + eByteW b16; + eByteW b24; + eByteW b32 ) + end + fun emit_GP r = itow (CellsBasis.physicalRegisterNum r) + and emit_FP r = itow (CellsBasis.physicalRegisterNum r) + and emit_CC r = itow (CellsBasis.physicalRegisterNum r) + and emit_EFLAGS r = itow (CellsBasis.physicalRegisterNum r) + and emit_FFLAGS r = itow (CellsBasis.physicalRegisterNum r) + and emit_MEM r = itow (CellsBasis.physicalRegisterNum r) + and emit_CTRL r = itow (CellsBasis.physicalRegisterNum r) + and emit_CELLSET r = itow (CellsBasis.physicalRegisterNum r) + fun emit_cond (I.EQ) = (0wx4 : Word32.word) + | emit_cond (I.NE) = (0wx5 : Word32.word) + | emit_cond (I.LT) = (0wxC : Word32.word) + | emit_cond (I.LE) = (0wxE : Word32.word) + | emit_cond (I.GT) = (0wxF : Word32.word) + | emit_cond (I.GE) = (0wxD : Word32.word) + | emit_cond (I.B) = (0wx2 : Word32.word) + | emit_cond (I.BE) = (0wx6 : Word32.word) + | emit_cond (I.A) = (0wx7 : Word32.word) + | emit_cond (I.AE) = (0wx3 : Word32.word) + | emit_cond (I.C) = (0wx2 : Word32.word) + | emit_cond (I.NC) = (0wx3 : Word32.word) + | emit_cond (I.P) = (0wxA : Word32.word) + | emit_cond (I.NP) = (0wxB : Word32.word) + | emit_cond (I.O) = (0wx0 : Word32.word) + | emit_cond (I.NO) = (0wx1 : Word32.word) + and emit_fibinOp (I.FIADDS) = (0wxDE, 0) + | emit_fibinOp (I.FIMULS) = (0wxDE, 1) + | emit_fibinOp (I.FICOMS) = (0wxDE, 2) + | emit_fibinOp (I.FICOMPS) = (0wxDE, 3) + | emit_fibinOp (I.FISUBS) = (0wxDE, 4) + | emit_fibinOp (I.FISUBRS) = (0wxDE, 5) + | emit_fibinOp (I.FIDIVS) = (0wxDE, 6) + | emit_fibinOp (I.FIDIVRS) = (0wxDE, 7) + | emit_fibinOp (I.FIADDL) = (0wxDA, 0) + | emit_fibinOp (I.FIMULL) = (0wxDA, 1) + | emit_fibinOp (I.FICOML) = (0wxDA, 2) + | emit_fibinOp (I.FICOMPL) = (0wxDA, 3) + | emit_fibinOp (I.FISUBL) = (0wxDA, 4) + | emit_fibinOp (I.FISUBRL) = (0wxDA, 5) + | emit_fibinOp (I.FIDIVL) = (0wxDA, 6) + | emit_fibinOp (I.FIDIVRL) = (0wxDA, 7) + and emit_funOp (I.FCHS) = (0wxE0 : Word32.word) + | emit_funOp (I.FABS) = (0wxE1 : Word32.word) + | emit_funOp (I.FTST) = (0wxE4 : Word32.word) + | emit_funOp (I.FXAM) = (0wxE5 : Word32.word) + | emit_funOp (I.FPTAN) = (0wxF2 : Word32.word) + | emit_funOp (I.FPATAN) = (0wxF3 : Word32.word) + | emit_funOp (I.FXTRACT) = (0wxF4 : Word32.word) + | emit_funOp (I.FPREM1) = (0wxF5 : Word32.word) + | emit_funOp (I.FDECSTP) = (0wxF6 : Word32.word) + | emit_funOp (I.FINCSTP) = (0wxF7 : Word32.word) + | emit_funOp (I.FPREM) = (0wxF8 : Word32.word) + | emit_funOp (I.FYL2XP1) = (0wxF9 : Word32.word) + | emit_funOp (I.FSQRT) = (0wxFA : Word32.word) + | emit_funOp (I.FSINCOS) = (0wxFB : Word32.word) + | emit_funOp (I.FRNDINT) = (0wxFC : Word32.word) + | emit_funOp (I.FSCALE) = (0wxFD : Word32.word) + | emit_funOp (I.FSIN) = (0wxFE : Word32.word) + | emit_funOp (I.FCOS) = (0wxFF : Word32.word) + fun modrm {mod, reg, rm} = eWord8 ((op mod << 0wx6) + ((reg << 0wx3) + rm)) + and reg {opc, reg} = eWord8 ((opc << 0wx3) + reg) + and sib {ss, index, base} = eWord8 ((ss << 0wx6) + ((index << 0wx3) + base)) + and immed8 {imm} = eWord8 imm + and immed32 {imm} = eWord32 imm + and immedOpnd {opnd} = + (case opnd of + I.Immed i32 => i32 + | I.ImmedLabel le => lexp le + | I.LabelEA le => lexp le + | _ => error "immedOpnd" + ) + and extension {opc, opnd} = + (case opnd of + I.Direct r => modrm {mod=3, reg=opc, rm=r} + | I.MemReg _ => extension {opc=opc, opnd=memReg opnd} + | I.FDirect _ => extension {opc=opc, opnd=memReg opnd} + | I.Displace{base, disp, ...} => + let +(*#line 475.13 "x86/x86.mdl"*) + val immed = immedOpnd {opnd=disp} + in () + end + | I.Indexed{base=NONE, index, scale, disp, ...} => () + | I.Indexed{base=SOME b, index, scale, disp, ...} => () + | _ => error "immedExt" + ) + and encodeST {prefix, opc, st} = + let val st = emit_FP st + in eWord16 ((prefix << 0wx8) + ((opc << 0wx3) + st)) + end + and encodeReg {prefix, reg, opnd} = + let val reg = emit_GP reg + in + ( emit prefix; + immedExt {opc=reg, opnd=opnd}) + end + and arith {opc1, opc2, src, dst} = + (case (src, dst) of + (I.ImmedLabel le, dst) => arith {opc1=opc1, opc2=opc2, src=I.Immed (lexp le), + dst=dst} + | (I.LabelEA le, dst) => arith {opc1=opc1, opc2=opc2, src=I.Immed (lexp le), + dst=dst} + | (I.Immed i, dst) => () + | (src, I.Direct r) => encodeReg {prefix=opc1 + op3, reg=reg, opnd=src} + | (I.Direct r, dst) => encodeReg {prefix=opc1 + 0wx1, reg=reg, opnd=dst} + | _ => error "arith" + ) + fun emitter instr = + let + fun emitInstr (I.NOP) = error "NOP" + | emitInstr (I.JMP(operand, list)) = error "JMP" + | emitInstr (I.JCC{cond, opnd}) = error "JCC" + | emitInstr (I.CALL{opnd, defs, uses, return, cutsTo, mem, pops}) = error "CALL" + | emitInstr (I.ENTER{src1, src2}) = error "ENTER" + | emitInstr (I.LEAVE) = error "LEAVE" + | emitInstr (I.RET option) = error "RET" + | emitInstr (I.MOVE{mvOp, src, dst}) = error "MOVE" + | emitInstr (I.LEA{r32, addr}) = error "LEA" + | emitInstr (I.CMPL{lsrc, rsrc}) = error "CMPL" + | emitInstr (I.CMPW{lsrc, rsrc}) = error "CMPW" + | emitInstr (I.CMPB{lsrc, rsrc}) = error "CMPB" + | emitInstr (I.TESTL{lsrc, rsrc}) = error "TESTL" + | emitInstr (I.TESTW{lsrc, rsrc}) = error "TESTW" + | emitInstr (I.TESTB{lsrc, rsrc}) = error "TESTB" + | emitInstr (I.BITOP{bitOp, lsrc, rsrc}) = error "BITOP" + | emitInstr (I.BINARY{binOp, src, dst}) = error "BINARY" + | emitInstr (I.SHIFT{shiftOp, src, dst, count}) = error "SHIFT" + | emitInstr (I.CMPXCHG{lock, sz, src, dst}) = error "CMPXCHG" + | emitInstr (I.MULTDIV{multDivOp, src}) = error "MULTDIV" + | emitInstr (I.MUL3{dst, src2, src1}) = error "MUL3" + | emitInstr (I.UNARY{unOp, opnd}) = error "UNARY" + | emitInstr (I.SET{cond, opnd}) = error "SET" + | emitInstr (I.CMOV{cond, src, dst}) = error "CMOV" + | emitInstr (I.PUSHL operand) = error "PUSHL" + | emitInstr (I.PUSHW operand) = error "PUSHW" + | emitInstr (I.PUSHB operand) = error "PUSHB" + | emitInstr (I.PUSHFD) = error "PUSHFD" + | emitInstr (I.POPFD) = error "POPFD" + | emitInstr (I.POP operand) = error "POP" + | emitInstr (I.CDQ) = error "CDQ" + | emitInstr (I.INTO) = error "INTO" + | emitInstr (I.FBINARY{binOp, src, dst}) = error "FBINARY" + | emitInstr (I.FIBINARY{binOp, src}) = error "FIBINARY" + | emitInstr (I.FUNARY funOp) = error "FUNARY" + | emitInstr (I.FUCOM operand) = error "FUCOM" + | emitInstr (I.FUCOMP operand) = error "FUCOMP" + | emitInstr (I.FUCOMPP) = error "FUCOMPP" + | emitInstr (I.FCOMPP) = error "FCOMPP" + | emitInstr (I.FCOMI operand) = error "FCOMI" + | emitInstr (I.FCOMIP operand) = error "FCOMIP" + | emitInstr (I.FUCOMI operand) = error "FUCOMI" + | emitInstr (I.FUCOMIP operand) = error "FUCOMIP" + | emitInstr (I.FXCH{opnd}) = error "FXCH" + | emitInstr (I.FSTPL operand) = error "FSTPL" + | emitInstr (I.FSTPS operand) = error "FSTPS" + | emitInstr (I.FSTPT operand) = error "FSTPT" + | emitInstr (I.FSTL operand) = error "FSTL" + | emitInstr (I.FSTS operand) = error "FSTS" + | emitInstr (I.FLD1) = error "FLD1" + | emitInstr (I.FLDL2E) = error "FLDL2E" + | emitInstr (I.FLDL2T) = error "FLDL2T" + | emitInstr (I.FLDLG2) = error "FLDLG2" + | emitInstr (I.FLDLN2) = error "FLDLN2" + | emitInstr (I.FLDPI) = error "FLDPI" + | emitInstr (I.FLDZ) = error "FLDZ" + | emitInstr (I.FLDL operand) = error "FLDL" + | emitInstr (I.FLDS operand) = error "FLDS" + | emitInstr (I.FLDT operand) = error "FLDT" + | emitInstr (I.FILD operand) = error "FILD" + | emitInstr (I.FILDL operand) = error "FILDL" + | emitInstr (I.FILDLL operand) = error "FILDLL" + | emitInstr (I.FNSTSW) = error "FNSTSW" + | emitInstr (I.FENV{fenvOp, opnd}) = error "FENV" + | emitInstr (I.FMOVE{fsize, src, dst}) = error "FMOVE" + | emitInstr (I.FILOAD{isize, ea, dst}) = error "FILOAD" + | emitInstr (I.FBINOP{fsize, binOp, lsrc, rsrc, dst}) = error "FBINOP" + | emitInstr (I.FIBINOP{isize, binOp, lsrc, rsrc, dst}) = error "FIBINOP" + | emitInstr (I.FUNOP{fsize, unOp, src, dst}) = error "FUNOP" + | emitInstr (I.FCMP{i, fsize, lsrc, rsrc}) = error "FCMP" + | emitInstr (I.SAHF) = error "SAHF" + | emitInstr (I.LFENCE) = error "LFENCE" + | emitInstr (I.MFENCE) = error "MFENCE" + | emitInstr (I.SFENCE) = error "SFENCE" + | emitInstr (I.PAUSE) = error "PAUSE" + | emitInstr (I.LAHF) = error "LAHF" + | emitInstr (I.SOURCE{}) = () + | emitInstr (I.SINK{}) = () + | emitInstr (I.PHI{}) = () + in + emitInstr instr + end + + fun emitInstruction(I.ANNOTATION{i, ...}) = emitInstruction(i) + | emitInstruction(I.INSTR(i)) = emitter(i) + | emitInstruction(I.LIVE _) = () + | emitInstruction(I.KILL _) = () + | emitInstruction _ = error "emitInstruction" + + in S.STREAM{beginCluster=init, + pseudoOp=pseudoOp, + emit=emitInstruction, + endCluster=fail, + defineLabel=doNothing, + entryLabel=doNothing, + comment=doNothing, + exitBlock=doNothing, + annotation=doNothing, + getAnnotations=getAnnotations + } + end +end + diff --git a/MLRISC/x86/flowgraph/x86-darwin-pseudo-ops.sml b/MLRISC/x86/flowgraph/x86-darwin-pseudo-ops.sml new file mode 100644 index 0000000..2b3db45 --- /dev/null +++ b/MLRISC/x86/flowgraph/x86-darwin-pseudo-ops.sml @@ -0,0 +1,39 @@ +(* x86-darwin-pseudo-ops.sml + * + * COPYRIGHT (c) 2006 The SML/NJ Fellowship (www.smlnj.org) + * All rights reserved. + *) + +functor X86DarwinPseudoOps ( + + structure T : MLTREE + structure MLTreeEval : MLTREE_EVAL where T = T + + ) : PSEUDO_OPS_BASIS = struct + + structure T = T + structure PB = PseudoOpsBasisTyp + structure Fmt = Format + + structure Endian = + PseudoOpsLittle + (structure T = T + structure MLTreeEval=MLTreeEval + val icache_alignment = 16 + val max_alignment = SOME 7 + val nop = {sz=1, en=0wx90: Word32.word}) + + structure POps = DarwinPseudoOps(T) + + type 'a pseudo_op = (T.labexp, 'a) PB.pseudo_op + + fun error msg = MLRiscErrorMsg.error ("X86DarwinPseudoOps.", msg) + + val sizeOf = Endian.sizeOf + val emitValue = Endian.emitValue + val lexpToString = POps.lexpToString + val toString = POps.toString + val defineLabel = POps.defineLabel + + val wordSize = 32 + end diff --git a/MLRISC/x86/flowgraph/x86GasPseudoOps.sml b/MLRISC/x86/flowgraph/x86GasPseudoOps.sml new file mode 100644 index 0000000..5b441e1 --- /dev/null +++ b/MLRISC/x86/flowgraph/x86GasPseudoOps.sml @@ -0,0 +1,38 @@ +(* x86GasPseudoOps.sml + * + * COPYRIGHT (c) 2002 Bell Labs, Lucent Technologies + * + *) +functor X86GasPseudoOps + ( structure T : MLTREE + structure MLTreeEval : MLTREE_EVAL where T = T + ) : PSEUDO_OPS_BASIS = + +struct + structure T = T + structure PB = PseudoOpsBasisTyp + structure Fmt = Format + + structure Endian = + PseudoOpsLittle + (structure T = T + structure MLTreeEval=MLTreeEval + val icache_alignment = 16 + val max_alignment = SOME 7 + val nop = {sz=1, en=0wx90: Word32.word}) + + structure GasPseudoOps = + GasPseudoOps(structure T = T + val labFmt = {gPrefix="", aPrefix="L"}) + + type 'a pseudo_op = (T.labexp, 'a) PB.pseudo_op + + fun error msg = MLRiscErrorMsg.error ("GasPseudoOps.", msg) + + val sizeOf = Endian.sizeOf + val emitValue = Endian.emitValue + val lexpToString = GasPseudoOps.lexpToString + val toString = GasPseudoOps.toString + val defineLabel = GasPseudoOps.defineLabel + val wordSize = 32 +end diff --git a/MLRISC/x86/instructions/.cm/GUID/x86Peephole.sml b/MLRISC/x86/instructions/.cm/GUID/x86Peephole.sml new file mode 100644 index 0000000..31f60c3 --- /dev/null +++ b/MLRISC/x86/instructions/.cm/GUID/x86Peephole.sml @@ -0,0 +1 @@ +guid-$OTHER-MLRISC/(IA32-Peephole.cm):../x86/instructions/x86Peephole.sml-1714016098.084 diff --git a/MLRISC/x86/instructions/.cm/SKEL/x86Peephole.sml b/MLRISC/x86/instructions/.cm/SKEL/x86Peephole.sml new file mode 100644 index 0000000..2218820 --- /dev/null +++ b/MLRISC/x86/instructions/.cm/SKEL/x86Peephole.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ae"X86Peephole"i3aInstr"gp1c"X86INSTR"aEval"gp1c"MLTREE_EVAL"f1)jh3aI"gp1aC"gp2 ad"CBase"gp1d"CellsBasis"gp1c"PEEPHOLE" \ No newline at end of file diff --git a/MLRISC/x86/instructions/.cm/amd64-unix/x86Peephole.sml b/MLRISC/x86/instructions/.cm/amd64-unix/x86Peephole.sml new file mode 100644 index 0000000..9e3e55a Binary files /dev/null and b/MLRISC/x86/instructions/.cm/amd64-unix/x86Peephole.sml differ diff --git a/MLRISC/x86/instructions/x86-leaf-opt.sml b/MLRISC/x86/instructions/x86-leaf-opt.sml new file mode 100644 index 0000000..ded8592 --- /dev/null +++ b/MLRISC/x86/instructions/x86-leaf-opt.sml @@ -0,0 +1,179 @@ +(* Stolen from John Reppy's Moby compiler: + * + * x86-leaf-opt.sml + * + * COPYRIGHT (c) 2001 Bell Labs, Lucent Technologies + * + * Optimization of leaf procedures for the IA32. We define a leaf procedure + * to be one that does not make calls and does not allocate any extra stack + * space (other than the usual linkage). We optimize by removing the saved + * frame-pointer and rewriting instructions that use the frame-pointer to + * ones that use the stack pointer. + * + * Eventually, we may support tail calls from leaf procedures. + * + *) + +functor X86LeafOpt + (structure X86Instr : X86INSTR + structure FlowGraph : FLOWGRAPH where I = X86Instr + val isLeaf : FlowGraph.cluster -> bool + ) : CLUSTER_OPTIMIZATION = +struct + + structure F = FlowGraph + structure I = X86Instr + structure C = I.C + + type flowgraph = F.cluster + + val name = "X86LeafOpt" + + (* is a register the frame pointer? *) + fun isFP reg = C.sameColor(reg, C.ebp) + (* is a register the stack pointer? *) + fun isSP reg = C.sameColor(reg, C.esp) + + fun error msg = MLRiscErrorMsg.error("X86LeafOpt",msg) + + fun err (blknum, msg) = error(concat[ + "BLOCK ", Int.toString blknum, ": ", msg + ]) + + fun optimize (F.CLUSTER cluster) = let + fun rewriteOpnd (opnd as I.Displace{base, disp, mem}) = + if (isFP base) + then (case disp + of I.Immed n => + I.Displace{base = C.esp, disp = I.Immed(n-4), mem = mem} + | _ => error "unable to rewrite displacement operand" + (* end case *)) + else opnd + | rewriteOpnd (opnd as I.Indexed{base=SOME r, index, scale, disp, mem}) = + if (isFP r) + then (case disp + of I.Immed n => I.Indexed{ + base = SOME C.esp, index = index, scale = scale, + disp = I.Immed(n-4), mem = mem + } + | _ => error "unable to rewrite indexed operand" + (* end case *)) + else opnd + | rewriteOpnd opnd = opnd + fun rewriteInsn insn = (case insn + of I.JMP(opnd, labs) => I.JMP(rewriteOpnd opnd, labs) + | I.JCC{cond, opnd} => I.JCC{cond = cond, opnd = rewriteOpnd opnd} + | I.CALL _ => error "unexpected call" + | I.MOVE{mvOp, src, dst} => I.MOVE{ + mvOp = mvOp, + src = rewriteOpnd src, + dst = rewriteOpnd dst + } + | I.LEA{r32, addr} => I.LEA{r32 = r32, addr = rewriteOpnd addr} + | I.CMPL{lsrc, rsrc} => + I.CMPL{lsrc = rewriteOpnd lsrc, rsrc = rewriteOpnd rsrc} + | I.CMPW{lsrc, rsrc} => + I.CMPW{lsrc = rewriteOpnd lsrc, rsrc = rewriteOpnd rsrc} + | I.CMPB{lsrc, rsrc} => + I.CMPB{lsrc = rewriteOpnd lsrc, rsrc = rewriteOpnd rsrc} + | I.TESTL{lsrc, rsrc} => + I.TESTL{lsrc = rewriteOpnd lsrc, rsrc = rewriteOpnd rsrc} + | I.TESTW{lsrc, rsrc} => + I.TESTW{lsrc = rewriteOpnd lsrc, rsrc = rewriteOpnd rsrc} + | I.TESTB{lsrc, rsrc} => + I.TESTB{lsrc = rewriteOpnd lsrc, rsrc = rewriteOpnd rsrc} + | I.BITOP{bitOp, lsrc, rsrc} => I.BITOP{ + bitOp = bitOp, + lsrc = rewriteOpnd lsrc, + rsrc = rewriteOpnd rsrc + } + | I.BINARY{binOp, src, dst} => I.BINARY{ + binOp = binOp, + src = rewriteOpnd src, + dst = rewriteOpnd dst + } + | I.MULTDIV{multDivOp, src} => I.MULTDIV{ + multDivOp = multDivOp, src = rewriteOpnd src + } + | I.MUL3{dst, src2, src1} => I.MUL3{ + dst = dst, src2 = src2, src1 = rewriteOpnd src1 + } + | I.UNARY{unOp, opnd} => + I.UNARY{unOp = unOp, opnd = rewriteOpnd opnd} + | I.SET{cond, opnd} => I.SET{cond = cond, opnd = rewriteOpnd opnd} + | I.CMOV{cond, src, dst} => I.CMOV{ + cond = cond, src = rewriteOpnd src, dst = dst + } + | I.PUSHL _ => error "unexpected pushl" + | I.PUSHW _ => error "unexpected pushw" + | I.PUSHB _ => error "unexpected pushb" + | I.POP _ => error "unexpected popl" + | I.COPY _ => error "unexpected copy" + | I.FCOPY _ => error "unexpected fcopy" + | I.FBINARY{binOp, src, dst} => I.FBINARY{ + binOp = binOp, src = rewriteOpnd src, dst = rewriteOpnd dst + } + | I.FIBINARY{binOp, src} => I.FIBINARY{ + binOp = binOp, src = rewriteOpnd src + } + | I.FUCOM opnd => I.FUCOM(rewriteOpnd opnd) + | I.FUCOMP opnd => I.FUCOMP(rewriteOpnd opnd) + | I.FSTPL opnd => I.FSTPL(rewriteOpnd opnd) + | I.FSTPS opnd => I.FSTPS(rewriteOpnd opnd) + | I.FSTPT opnd => I.FSTPT(rewriteOpnd opnd) + | I.FSTL opnd => I.FSTL(rewriteOpnd opnd) + | I.FSTS opnd => I.FSTS(rewriteOpnd opnd) + | I.FLDL opnd => I.FLDL(rewriteOpnd opnd) + | I.FLDS opnd => I.FLDS(rewriteOpnd opnd) + | I.FLDT opnd => I.FLDT(rewriteOpnd opnd) + | I.FILD opnd => I.FILD(rewriteOpnd opnd) + | I.FILDL opnd => I.FILDL(rewriteOpnd opnd) + | I.FILDLL opnd => I.FILDLL(rewriteOpnd opnd) + | I.FENV{fenvOp, opnd} => + I.FENV{fenvOp = fenvOp, opnd = rewriteOpnd opnd} + | I.ANNOTATION{i, a} => I.ANNOTATION{i = rewriteInsn i, a = a} + | _ => insn + (* end case *)) + (* rewrite the instructions of a block *) + fun rewriteBlock (F.BBLOCK{insns, ...}) = + insns := List.map rewriteInsn (!insns) + | rewriteBlock _ = () + (* rewrite the exit protocol of an exit block *) + fun rewriteExit (F.BBLOCK{blknum, insns, ...}, _) = ( + case !insns + of (ret as I.RET _)::I.LEAVE::rest => + insns := ret :: rest + | (I.JMP _ :: _) => () (* non-local control flow *) + | _ => err(blknum,"unable to rewrite exit protocol") + (* end case *)) + (* rewrite the entry protocol of an entry block *) + fun rewriteEntry (F.BBLOCK{blknum, insns, ...}, _) = let + fun rewrite [ + I.BINARY{binOp=I.SUBL, src=I.ImmedLabel _, dst=I.Direct a}, + I.MOVE{mvOp=I.MOVL, src=I.Direct b, dst=I.Direct c}, + I.PUSHL(I.Direct d) + ] = if ((isSP a) andalso (isSP b) + andalso (isFP c) andalso (isFP d)) + then [] + else err(blknum, "unable to rewrite entry protocol") + | rewrite (insn::rest) = insn :: rewrite rest + | rewrite [] = err(blknum, "unable to rewrite entry protocol") + in + insns := rewrite(!insns) + end + in + (* first, we rewrite the exit and entry blocks *) + case #exit cluster + of F.EXIT{pred, ...} => List.app rewriteExit (!pred) + (* end case *); + case #entry cluster + of F.ENTRY{succ, ...} => List.app rewriteEntry (!succ) + (* end case *); + (* then rewrite the instructions to use the %esp instead of %ebp *) + List.app rewriteBlock (#blocks cluster) + end + + fun run cluster = + (if isLeaf cluster then optimize cluster else (); cluster) + + end diff --git a/MLRISC/x86/instructions/x86Cells.sml b/MLRISC/x86/instructions/x86Cells.sml new file mode 100644 index 0000000..9c4ea71 --- /dev/null +++ b/MLRISC/x86/instructions/x86Cells.sml @@ -0,0 +1,169 @@ +(* + * WARNING: This file was automatically generated by MDLGen (v3.1) + * from the machine description file "x86/x86.mdl". + * DO NOT EDIT this file directly + *) + + +signature X86CELLS = +sig + include CELLS + val EFLAGS : CellsBasis.cellkind + val FFLAGS : CellsBasis.cellkind + val CELLSET : CellsBasis.cellkind + val showGP : CellsBasis.register_id -> string + val showFP : CellsBasis.register_id -> string + val showCC : CellsBasis.register_id -> string + val showEFLAGS : CellsBasis.register_id -> string + val showFFLAGS : CellsBasis.register_id -> string + val showMEM : CellsBasis.register_id -> string + val showCTRL : CellsBasis.register_id -> string + val showCELLSET : CellsBasis.register_id -> string + val showGPWithSize : CellsBasis.register_id * CellsBasis.sz -> string + val showFPWithSize : CellsBasis.register_id * CellsBasis.sz -> string + val showCCWithSize : CellsBasis.register_id * CellsBasis.sz -> string + val showEFLAGSWithSize : CellsBasis.register_id * CellsBasis.sz -> string + val showFFLAGSWithSize : CellsBasis.register_id * CellsBasis.sz -> string + val showMEMWithSize : CellsBasis.register_id * CellsBasis.sz -> string + val showCTRLWithSize : CellsBasis.register_id * CellsBasis.sz -> string + val showCELLSETWithSize : CellsBasis.register_id * CellsBasis.sz -> string + val eax : CellsBasis.cell + val ecx : CellsBasis.cell + val edx : CellsBasis.cell + val ebx : CellsBasis.cell + val esp : CellsBasis.cell + val ebp : CellsBasis.cell + val esi : CellsBasis.cell + val edi : CellsBasis.cell + val ST : int -> CellsBasis.cell + val ST0 : CellsBasis.cell + val eflags : CellsBasis.cell + val addGP : CellsBasis.cell * cellset -> cellset + val addFP : CellsBasis.cell * cellset -> cellset + val addCC : CellsBasis.cell * cellset -> cellset + val addEFLAGS : CellsBasis.cell * cellset -> cellset + val addFFLAGS : CellsBasis.cell * cellset -> cellset + val addMEM : CellsBasis.cell * cellset -> cellset + val addCTRL : CellsBasis.cell * cellset -> cellset + val addCELLSET : CellsBasis.cell * cellset -> cellset +end + +structure X86Cells : X86CELLS = +struct + exception X86Cells + fun error msg = MLRiscErrorMsg.error("X86Cells",msg) + open CellsBasis + fun showGPWithSize (r, ty) = (fn (0, 8) => "%al" + | (0, 16) => "%ax" + | (0, 32) => "%eax" + | (1, 8) => "%cl" + | (1, 16) => "%cx" + | (1, 32) => "%ecx" + | (2, 8) => "%dl" + | (2, 16) => "%dx" + | (2, 32) => "%edx" + | (3, 8) => "%bl" + | (3, 16) => "%bx" + | (3, 32) => "%ebx" + | (4, 16) => "%sp" + | (4, 32) => "%esp" + | (5, 16) => "%bp" + | (5, 32) => "%ebp" + | (6, 16) => "%si" + | (6, 32) => "%esi" + | (7, 16) => "%di" + | (7, 32) => "%edi" + | (r, _) => "%" ^ (Int.toString r) + ) (r, ty) + and showFPWithSize (r, ty) = (fn (f, _) => (if (f < 8) + then (("%st(" ^ (Int.toString f)) ^ ")") + else ("%f" ^ (Int.toString f))) + ) (r, ty) + and showCCWithSize (r, ty) = (fn _ => "cc" + ) (r, ty) + and showEFLAGSWithSize (r, ty) = (fn _ => "$eflags" + ) (r, ty) + and showFFLAGSWithSize (r, ty) = (fn _ => "$fflags" + ) (r, ty) + and showMEMWithSize (r, ty) = (fn _ => "mem" + ) (r, ty) + and showCTRLWithSize (r, ty) = (fn _ => "ctrl" + ) (r, ty) + and showCELLSETWithSize (r, ty) = (fn _ => "CELLSET" + ) (r, ty) + fun showGP r = showGPWithSize (r, 32) + fun showFP r = showFPWithSize (r, 64) + fun showCC r = showCCWithSize (r, 32) + fun showEFLAGS r = showEFLAGSWithSize (r, 32) + fun showFFLAGS r = showFFLAGSWithSize (r, 32) + fun showMEM r = showMEMWithSize (r, 8) + fun showCTRL r = showCTRLWithSize (r, 0) + fun showCELLSET r = showCELLSETWithSize (r, 0) + val EFLAGS = CellsBasis.newCellKind {name="EFLAGS", nickname="eflags"} + and FFLAGS = CellsBasis.newCellKind {name="FFLAGS", nickname="fflags"} + and CELLSET = CellsBasis.newCellKind {name="CELLSET", nickname="cellset"} + structure MyCells = Cells + (exception Cells = X86Cells + val firstPseudo = 256 + val desc_GP = CellsBasis.DESC {low=0, high=31, kind=CellsBasis.GP, defaultValues=[], + zeroReg=NONE, toString=showGP, toStringWithSize=showGPWithSize, + counter=ref 0, dedicated=ref 0, physicalRegs=ref CellsBasis.array0} + and desc_FP = CellsBasis.DESC {low=32, high=63, kind=CellsBasis.FP, + defaultValues=[], zeroReg=NONE, toString=showFP, toStringWithSize=showFPWithSize, + counter=ref 0, dedicated=ref 0, physicalRegs=ref CellsBasis.array0} + and desc_EFLAGS = CellsBasis.DESC {low=64, high=64, kind=EFLAGS, defaultValues=[], + zeroReg=NONE, toString=showEFLAGS, toStringWithSize=showEFLAGSWithSize, + counter=ref 0, dedicated=ref 0, physicalRegs=ref CellsBasis.array0} + and desc_FFLAGS = CellsBasis.DESC {low=65, high=65, kind=FFLAGS, defaultValues=[], + zeroReg=NONE, toString=showFFLAGS, toStringWithSize=showFFLAGSWithSize, + counter=ref 0, dedicated=ref 0, physicalRegs=ref CellsBasis.array0} + and desc_MEM = CellsBasis.DESC {low=66, high=65, kind=CellsBasis.MEM, + defaultValues=[], zeroReg=NONE, toString=showMEM, toStringWithSize=showMEMWithSize, + counter=ref 0, dedicated=ref 0, physicalRegs=ref CellsBasis.array0} + and desc_CTRL = CellsBasis.DESC {low=66, high=65, kind=CellsBasis.CTRL, + defaultValues=[], zeroReg=NONE, toString=showCTRL, toStringWithSize=showCTRLWithSize, + counter=ref 0, dedicated=ref 0, physicalRegs=ref CellsBasis.array0} + and desc_CELLSET = CellsBasis.DESC {low=66, high=65, kind=CELLSET, defaultValues=[], + zeroReg=NONE, toString=showCELLSET, toStringWithSize=showCELLSETWithSize, + counter=ref 0, dedicated=ref 0, physicalRegs=ref CellsBasis.array0} + val cellKindDescs = [(CellsBasis.GP, desc_GP), (CellsBasis.FP, desc_FP), + (CellsBasis.CC, desc_GP), (EFLAGS, desc_EFLAGS), (FFLAGS, desc_FFLAGS), + (CellsBasis.MEM, desc_MEM), (CellsBasis.CTRL, desc_CTRL), (CELLSET, + desc_CELLSET)] + val cellSize = 4 + ) + + open MyCells + val addGP = CellSet.add + and addFP = CellSet.add + and addCC = CellSet.add + and addEFLAGS = CellSet.add + and addFFLAGS = CellSet.add + and addMEM = CellSet.add + and addCTRL = CellSet.add + and addCELLSET = CellSet.add + val RegGP = Reg GP + and RegFP = Reg FP + and RegCC = Reg CC + and RegEFLAGS = Reg EFLAGS + and RegFFLAGS = Reg FFLAGS + and RegMEM = Reg MEM + and RegCTRL = Reg CTRL + and RegCELLSET = Reg CELLSET + val eax = RegGP 0 + val ecx = RegGP 1 + val edx = RegGP 2 + val ebx = RegGP 3 + val esp = RegGP 4 + val ebp = RegGP 5 + val esi = RegGP 6 + val edi = RegGP 7 + val stackptrR = RegGP 4 + val ST = (fn x => RegFP x + ) + val ST0 = RegFP 0 + val asmTmpR = RegGP 0 + val fasmTmp = RegFP 0 + val eflags = RegEFLAGS 0 +end + diff --git a/MLRISC/x86/instructions/x86FreqProps.sml b/MLRISC/x86/instructions/x86FreqProps.sml new file mode 100644 index 0000000..d7a5198 --- /dev/null +++ b/MLRISC/x86/instructions/x86FreqProps.sml @@ -0,0 +1,39 @@ +(* x86FreqProps.sml + * + * COPYRIGHT (c) 2002 Bell Labs, Lucent Technologies + * + * Extract frequency information from the X86 architecture + * + * -- Allen + *) +functor X86FreqProps(X86Instr : X86INSTR) : FREQUENCY_PROPERTIES = +struct + + structure I = X86Instr + + val p0_001 = Probability.prob(1,1000) + val p10 = Probability.percent 10 + val p50 = Probability.percent 50 + val p90 = Probability.percent 90 + val p100 = Probability.always + + fun x86BranchProb(I.JCC{cond=I.EQ,...}) = p10 + | x86BranchProb(I.JCC{cond=I.O,...}) = p0_001 + | x86BranchProb(I.JCC{cond=I.NE,...}) = p90 + | x86BranchProb(I.JCC{cond=I.NO,...}) = p100 + | x86BranchProb(I.JCC{cond=I.P,...}) = p0_001 (* fp unordered test *) + | x86BranchProb(I.JCC{cond=I.NP,...}) = p100 + | x86BranchProb(I.JCC _) = p50 (* default *) + | x86BranchProb(I.JMP _) = p100 + | x86BranchProb _ = Probability.never (* non-branch *) + + and branchProb(I.ANNOTATION{a, i, ...}) = + (case #peek MLRiscAnnotations.BRANCH_PROB a of + SOME b => b + | NONE => branchProb i + ) + | branchProb (I.INSTR i) = x86BranchProb i + | branchProb _ = Probability.never + +end + diff --git a/MLRISC/x86/instructions/x86Instr.sml b/MLRISC/x86/instructions/x86Instr.sml new file mode 100644 index 0000000..1d125e6 --- /dev/null +++ b/MLRISC/x86/instructions/x86Instr.sml @@ -0,0 +1,787 @@ +(* + * WARNING: This file was automatically generated by MDLGen (v3.1) + * from the machine description file "x86/x86.mdl". + * DO NOT EDIT this file directly + *) + + +signature X86INSTR = +sig + structure C : X86CELLS + structure CB : CELLS_BASIS = CellsBasis + structure T : MLTREE + structure Constant: CONSTANT + structure Region : REGION + sharing Constant = T.Constant + sharing Region = T.Region + datatype operand = + Immed of Int32.int + | ImmedLabel of T.labexp + | Relative of int + | LabelEA of T.labexp + | Direct of CellsBasis.cell + | FDirect of CellsBasis.cell + | FPR of CellsBasis.cell + | ST of CellsBasis.cell + | MemReg of CellsBasis.cell + | Displace of {base:CellsBasis.cell, disp:operand, mem:Region.region} + | Indexed of {base:(CellsBasis.cell) option, index:CellsBasis.cell, scale:int, + disp:operand, mem:Region.region} + type addressing_mode = operand + type ea = operand + datatype cond = + EQ + | NE + | LT + | LE + | GT + | GE + | B + | BE + | A + | AE + | C + | NC + | P + | NP + | O + | NO + datatype binaryOp = + ADDL + | SUBL + | ANDL + | ORL + | XORL + | SHLL + | SARL + | SHRL + | IMULL + | ADCL + | SBBL + | ADDW + | SUBW + | ANDW + | ORW + | XORW + | SHLW + | SARW + | SHRW + | IMULW + | ADDB + | SUBB + | ANDB + | ORB + | XORB + | SHLB + | SARB + | SHRB + | IMULB + | BTSW + | BTCW + | BTRW + | BTSL + | BTCL + | BTRL + | ROLW + | RORW + | ROLL + | RORL + | XCHGB + | XCHGW + | XCHGL + | LOCK_ADCW + | LOCK_ADCL + | LOCK_ADDW + | LOCK_ADDL + | LOCK_ANDW + | LOCK_ANDL + | LOCK_BTSW + | LOCK_BTSL + | LOCK_BTRW + | LOCK_BTRL + | LOCK_BTCW + | LOCK_BTCL + | LOCK_ORW + | LOCK_ORL + | LOCK_SBBW + | LOCK_SBBL + | LOCK_SUBW + | LOCK_SUBL + | LOCK_XORW + | LOCK_XORL + | LOCK_XADDB + | LOCK_XADDW + | LOCK_XADDL + datatype multDivOp = + IMULL1 + | MULL1 + | IDIVL1 + | DIVL1 + datatype unaryOp = + DECL + | INCL + | NEGL + | NOTL + | DECW + | INCW + | NEGW + | NOTW + | DECB + | INCB + | NEGB + | NOTB + | LOCK_DECL + | LOCK_INCL + | LOCK_NEGL + | LOCK_NOTL + datatype shiftOp = + SHLDL + | SHRDL + datatype bitOp = + BTW + | BTL + | LOCK_BTW + | LOCK_BTL + datatype move = + MOVL + | MOVB + | MOVW + | MOVSWL + | MOVZWL + | MOVSBL + | MOVZBL + datatype fbinOp = + FADDP + | FADDS + | FMULP + | FMULS + | FCOMS + | FCOMPS + | FSUBP + | FSUBS + | FSUBRP + | FSUBRS + | FDIVP + | FDIVS + | FDIVRP + | FDIVRS + | FADDL + | FMULL + | FCOML + | FCOMPL + | FSUBL + | FSUBRL + | FDIVL + | FDIVRL + datatype fibinOp = + FIADDS + | FIMULS + | FICOMS + | FICOMPS + | FISUBS + | FISUBRS + | FIDIVS + | FIDIVRS + | FIADDL + | FIMULL + | FICOML + | FICOMPL + | FISUBL + | FISUBRL + | FIDIVL + | FIDIVRL + datatype funOp = + FCHS + | FABS + | FTST + | FXAM + | FPTAN + | FPATAN + | FXTRACT + | FPREM1 + | FDECSTP + | FINCSTP + | FPREM + | FYL2XP1 + | FSQRT + | FSINCOS + | FRNDINT + | FSCALE + | FSIN + | FCOS + datatype fenvOp = + FLDENV + | FNLDENV + | FSTENV + | FNSTENV + datatype fsize = + FP32 + | FP64 + | FP80 + datatype isize = + I8 + | I16 + | I32 + | I64 + datatype instr = + NOP + | JMP of operand * Label.label list + | JCC of {cond:cond, opnd:operand} + | CALL of {opnd:operand, defs:C.cellset, uses:C.cellset, return:C.cellset, + cutsTo:Label.label list, mem:Region.region, pops:Int32.int} + | ENTER of {src1:operand, src2:operand} + | LEAVE + | RET of operand option + | MOVE of {mvOp:move, src:operand, dst:operand} + | LEA of {r32:CellsBasis.cell, addr:operand} + | CMPL of {lsrc:operand, rsrc:operand} + | CMPW of {lsrc:operand, rsrc:operand} + | CMPB of {lsrc:operand, rsrc:operand} + | TESTL of {lsrc:operand, rsrc:operand} + | TESTW of {lsrc:operand, rsrc:operand} + | TESTB of {lsrc:operand, rsrc:operand} + | BITOP of {bitOp:bitOp, lsrc:operand, rsrc:operand} + | BINARY of {binOp:binaryOp, src:operand, dst:operand} + | SHIFT of {shiftOp:shiftOp, src:operand, dst:operand, count:operand} + | CMPXCHG of {lock:bool, sz:isize, src:operand, dst:operand} + | MULTDIV of {multDivOp:multDivOp, src:operand} + | MUL3 of {dst:CellsBasis.cell, src2:Int32.int, src1:operand} + | UNARY of {unOp:unaryOp, opnd:operand} + | SET of {cond:cond, opnd:operand} + | CMOV of {cond:cond, src:operand, dst:CellsBasis.cell} + | PUSHL of operand + | PUSHW of operand + | PUSHB of operand + | PUSHFD + | POPFD + | POP of operand + | CDQ + | INTO + | FBINARY of {binOp:fbinOp, src:operand, dst:operand} + | FIBINARY of {binOp:fibinOp, src:operand} + | FUNARY of funOp + | FUCOM of operand + | FUCOMP of operand + | FUCOMPP + | FCOMPP + | FCOMI of operand + | FCOMIP of operand + | FUCOMI of operand + | FUCOMIP of operand + | FXCH of {opnd:CellsBasis.cell} + | FSTPL of operand + | FSTPS of operand + | FSTPT of operand + | FSTL of operand + | FSTS of operand + | FLD1 + | FLDL2E + | FLDL2T + | FLDLG2 + | FLDLN2 + | FLDPI + | FLDZ + | FLDL of operand + | FLDS of operand + | FLDT of operand + | FILD of operand + | FILDL of operand + | FILDLL of operand + | FNSTSW + | FENV of {fenvOp:fenvOp, opnd:operand} + | FMOVE of {fsize:fsize, src:operand, dst:operand} + | FILOAD of {isize:isize, ea:operand, dst:operand} + | FBINOP of {fsize:fsize, binOp:fbinOp, lsrc:operand, rsrc:operand, dst:operand} + | FIBINOP of {isize:isize, binOp:fibinOp, lsrc:operand, rsrc:operand, dst:operand} + | FUNOP of {fsize:fsize, unOp:funOp, src:operand, dst:operand} + | FCMP of {i:bool, fsize:fsize, lsrc:operand, rsrc:operand} + | SAHF + | LFENCE + | MFENCE + | SFENCE + | PAUSE + | LAHF + | SOURCE of {} + | SINK of {} + | PHI of {} + and instruction = + LIVE of {regs: C.cellset, spilled: C.cellset} + | KILL of {regs: C.cellset, spilled: C.cellset} + | COPY of {k: CellsBasis.cellkind, + sz: int, (* in bits *) + dst: CellsBasis.cell list, + src: CellsBasis.cell list, + tmp: ea option (* NONE if |dst| = {src| = 1 *)} + | ANNOTATION of {i:instruction, a:Annotations.annotation} + | INSTR of instr + val nop : instruction + val jmp : operand * Label.label list -> instruction + val jcc : {cond:cond, opnd:operand} -> instruction + val call : {opnd:operand, defs:C.cellset, uses:C.cellset, return:C.cellset, + cutsTo:Label.label list, mem:Region.region, pops:Int32.int} -> instruction + val enter : {src1:operand, src2:operand} -> instruction + val leave : instruction + val ret : operand option -> instruction + val move : {mvOp:move, src:operand, dst:operand} -> instruction + val lea : {r32:CellsBasis.cell, addr:operand} -> instruction + val cmpl : {lsrc:operand, rsrc:operand} -> instruction + val cmpw : {lsrc:operand, rsrc:operand} -> instruction + val cmpb : {lsrc:operand, rsrc:operand} -> instruction + val testl : {lsrc:operand, rsrc:operand} -> instruction + val testw : {lsrc:operand, rsrc:operand} -> instruction + val testb : {lsrc:operand, rsrc:operand} -> instruction + val bitop : {bitOp:bitOp, lsrc:operand, rsrc:operand} -> instruction + val binary : {binOp:binaryOp, src:operand, dst:operand} -> instruction + val shift : {shiftOp:shiftOp, src:operand, dst:operand, count:operand} -> instruction + val cmpxchg : {lock:bool, sz:isize, src:operand, dst:operand} -> instruction + val multdiv : {multDivOp:multDivOp, src:operand} -> instruction + val mul3 : {dst:CellsBasis.cell, src2:Int32.int, src1:operand} -> instruction + val unary : {unOp:unaryOp, opnd:operand} -> instruction + val set : {cond:cond, opnd:operand} -> instruction + val cmov : {cond:cond, src:operand, dst:CellsBasis.cell} -> instruction + val pushl : operand -> instruction + val pushw : operand -> instruction + val pushb : operand -> instruction + val pushfd : instruction + val popfd : instruction + val pop : operand -> instruction + val cdq : instruction + val into : instruction + val fbinary : {binOp:fbinOp, src:operand, dst:operand} -> instruction + val fibinary : {binOp:fibinOp, src:operand} -> instruction + val funary : funOp -> instruction + val fucom : operand -> instruction + val fucomp : operand -> instruction + val fucompp : instruction + val fcompp : instruction + val fcomi : operand -> instruction + val fcomip : operand -> instruction + val fucomi : operand -> instruction + val fucomip : operand -> instruction + val fxch : {opnd:CellsBasis.cell} -> instruction + val fstpl : operand -> instruction + val fstps : operand -> instruction + val fstpt : operand -> instruction + val fstl : operand -> instruction + val fsts : operand -> instruction + val fld1 : instruction + val fldl2e : instruction + val fldl2t : instruction + val fldlg2 : instruction + val fldln2 : instruction + val fldpi : instruction + val fldz : instruction + val fldl : operand -> instruction + val flds : operand -> instruction + val fldt : operand -> instruction + val fild : operand -> instruction + val fildl : operand -> instruction + val fildll : operand -> instruction + val fnstsw : instruction + val fenv : {fenvOp:fenvOp, opnd:operand} -> instruction + val fmove : {fsize:fsize, src:operand, dst:operand} -> instruction + val fiload : {isize:isize, ea:operand, dst:operand} -> instruction + val fbinop : {fsize:fsize, binOp:fbinOp, lsrc:operand, rsrc:operand, dst:operand} -> instruction + val fibinop : {isize:isize, binOp:fibinOp, lsrc:operand, rsrc:operand, dst:operand} -> instruction + val funop : {fsize:fsize, unOp:funOp, src:operand, dst:operand} -> instruction + val fcmp : {i:bool, fsize:fsize, lsrc:operand, rsrc:operand} -> instruction + val sahf : instruction + val lfence : instruction + val mfence : instruction + val sfence : instruction + val pause : instruction + val lahf : instruction + val source : {} -> instruction + val sink : {} -> instruction + val phi : {} -> instruction +end + +functor X86Instr(T: MLTREE + ) : X86INSTR = +struct + structure C = X86Cells + structure CB = CellsBasis + structure T = T + structure Region = T.Region + structure Constant = T.Constant + datatype operand = + Immed of Int32.int + | ImmedLabel of T.labexp + | Relative of int + | LabelEA of T.labexp + | Direct of CellsBasis.cell + | FDirect of CellsBasis.cell + | FPR of CellsBasis.cell + | ST of CellsBasis.cell + | MemReg of CellsBasis.cell + | Displace of {base:CellsBasis.cell, disp:operand, mem:Region.region} + | Indexed of {base:(CellsBasis.cell) option, index:CellsBasis.cell, scale:int, + disp:operand, mem:Region.region} + type addressing_mode = operand + type ea = operand + datatype cond = + EQ + | NE + | LT + | LE + | GT + | GE + | B + | BE + | A + | AE + | C + | NC + | P + | NP + | O + | NO + datatype binaryOp = + ADDL + | SUBL + | ANDL + | ORL + | XORL + | SHLL + | SARL + | SHRL + | IMULL + | ADCL + | SBBL + | ADDW + | SUBW + | ANDW + | ORW + | XORW + | SHLW + | SARW + | SHRW + | IMULW + | ADDB + | SUBB + | ANDB + | ORB + | XORB + | SHLB + | SARB + | SHRB + | IMULB + | BTSW + | BTCW + | BTRW + | BTSL + | BTCL + | BTRL + | ROLW + | RORW + | ROLL + | RORL + | XCHGB + | XCHGW + | XCHGL + | LOCK_ADCW + | LOCK_ADCL + | LOCK_ADDW + | LOCK_ADDL + | LOCK_ANDW + | LOCK_ANDL + | LOCK_BTSW + | LOCK_BTSL + | LOCK_BTRW + | LOCK_BTRL + | LOCK_BTCW + | LOCK_BTCL + | LOCK_ORW + | LOCK_ORL + | LOCK_SBBW + | LOCK_SBBL + | LOCK_SUBW + | LOCK_SUBL + | LOCK_XORW + | LOCK_XORL + | LOCK_XADDB + | LOCK_XADDW + | LOCK_XADDL + datatype multDivOp = + IMULL1 + | MULL1 + | IDIVL1 + | DIVL1 + datatype unaryOp = + DECL + | INCL + | NEGL + | NOTL + | DECW + | INCW + | NEGW + | NOTW + | DECB + | INCB + | NEGB + | NOTB + | LOCK_DECL + | LOCK_INCL + | LOCK_NEGL + | LOCK_NOTL + datatype shiftOp = + SHLDL + | SHRDL + datatype bitOp = + BTW + | BTL + | LOCK_BTW + | LOCK_BTL + datatype move = + MOVL + | MOVB + | MOVW + | MOVSWL + | MOVZWL + | MOVSBL + | MOVZBL + datatype fbinOp = + FADDP + | FADDS + | FMULP + | FMULS + | FCOMS + | FCOMPS + | FSUBP + | FSUBS + | FSUBRP + | FSUBRS + | FDIVP + | FDIVS + | FDIVRP + | FDIVRS + | FADDL + | FMULL + | FCOML + | FCOMPL + | FSUBL + | FSUBRL + | FDIVL + | FDIVRL + datatype fibinOp = + FIADDS + | FIMULS + | FICOMS + | FICOMPS + | FISUBS + | FISUBRS + | FIDIVS + | FIDIVRS + | FIADDL + | FIMULL + | FICOML + | FICOMPL + | FISUBL + | FISUBRL + | FIDIVL + | FIDIVRL + datatype funOp = + FCHS + | FABS + | FTST + | FXAM + | FPTAN + | FPATAN + | FXTRACT + | FPREM1 + | FDECSTP + | FINCSTP + | FPREM + | FYL2XP1 + | FSQRT + | FSINCOS + | FRNDINT + | FSCALE + | FSIN + | FCOS + datatype fenvOp = + FLDENV + | FNLDENV + | FSTENV + | FNSTENV + datatype fsize = + FP32 + | FP64 + | FP80 + datatype isize = + I8 + | I16 + | I32 + | I64 + datatype instr = + NOP + | JMP of operand * Label.label list + | JCC of {cond:cond, opnd:operand} + | CALL of {opnd:operand, defs:C.cellset, uses:C.cellset, return:C.cellset, + cutsTo:Label.label list, mem:Region.region, pops:Int32.int} + | ENTER of {src1:operand, src2:operand} + | LEAVE + | RET of operand option + | MOVE of {mvOp:move, src:operand, dst:operand} + | LEA of {r32:CellsBasis.cell, addr:operand} + | CMPL of {lsrc:operand, rsrc:operand} + | CMPW of {lsrc:operand, rsrc:operand} + | CMPB of {lsrc:operand, rsrc:operand} + | TESTL of {lsrc:operand, rsrc:operand} + | TESTW of {lsrc:operand, rsrc:operand} + | TESTB of {lsrc:operand, rsrc:operand} + | BITOP of {bitOp:bitOp, lsrc:operand, rsrc:operand} + | BINARY of {binOp:binaryOp, src:operand, dst:operand} + | SHIFT of {shiftOp:shiftOp, src:operand, dst:operand, count:operand} + | CMPXCHG of {lock:bool, sz:isize, src:operand, dst:operand} + | MULTDIV of {multDivOp:multDivOp, src:operand} + | MUL3 of {dst:CellsBasis.cell, src2:Int32.int, src1:operand} + | UNARY of {unOp:unaryOp, opnd:operand} + | SET of {cond:cond, opnd:operand} + | CMOV of {cond:cond, src:operand, dst:CellsBasis.cell} + | PUSHL of operand + | PUSHW of operand + | PUSHB of operand + | PUSHFD + | POPFD + | POP of operand + | CDQ + | INTO + | FBINARY of {binOp:fbinOp, src:operand, dst:operand} + | FIBINARY of {binOp:fibinOp, src:operand} + | FUNARY of funOp + | FUCOM of operand + | FUCOMP of operand + | FUCOMPP + | FCOMPP + | FCOMI of operand + | FCOMIP of operand + | FUCOMI of operand + | FUCOMIP of operand + | FXCH of {opnd:CellsBasis.cell} + | FSTPL of operand + | FSTPS of operand + | FSTPT of operand + | FSTL of operand + | FSTS of operand + | FLD1 + | FLDL2E + | FLDL2T + | FLDLG2 + | FLDLN2 + | FLDPI + | FLDZ + | FLDL of operand + | FLDS of operand + | FLDT of operand + | FILD of operand + | FILDL of operand + | FILDLL of operand + | FNSTSW + | FENV of {fenvOp:fenvOp, opnd:operand} + | FMOVE of {fsize:fsize, src:operand, dst:operand} + | FILOAD of {isize:isize, ea:operand, dst:operand} + | FBINOP of {fsize:fsize, binOp:fbinOp, lsrc:operand, rsrc:operand, dst:operand} + | FIBINOP of {isize:isize, binOp:fibinOp, lsrc:operand, rsrc:operand, dst:operand} + | FUNOP of {fsize:fsize, unOp:funOp, src:operand, dst:operand} + | FCMP of {i:bool, fsize:fsize, lsrc:operand, rsrc:operand} + | SAHF + | LFENCE + | MFENCE + | SFENCE + | PAUSE + | LAHF + | SOURCE of {} + | SINK of {} + | PHI of {} + and instruction = + LIVE of {regs: C.cellset, spilled: C.cellset} + | KILL of {regs: C.cellset, spilled: C.cellset} + | COPY of {k: CellsBasis.cellkind, + sz: int, (* in bits *) + dst: CellsBasis.cell list, + src: CellsBasis.cell list, + tmp: ea option (* NONE if |dst| = {src| = 1 *)} + | ANNOTATION of {i:instruction, a:Annotations.annotation} + | INSTR of instr + val nop = INSTR NOP + and jmp = INSTR o JMP + and jcc = INSTR o JCC + and call = INSTR o CALL + and enter = INSTR o ENTER + and leave = INSTR LEAVE + and ret = INSTR o RET + and move = INSTR o MOVE + and lea = INSTR o LEA + and cmpl = INSTR o CMPL + and cmpw = INSTR o CMPW + and cmpb = INSTR o CMPB + and testl = INSTR o TESTL + and testw = INSTR o TESTW + and testb = INSTR o TESTB + and bitop = INSTR o BITOP + and binary = INSTR o BINARY + and shift = INSTR o SHIFT + and cmpxchg = INSTR o CMPXCHG + and multdiv = INSTR o MULTDIV + and mul3 = INSTR o MUL3 + and unary = INSTR o UNARY + and set = INSTR o SET + and cmov = INSTR o CMOV + and pushl = INSTR o PUSHL + and pushw = INSTR o PUSHW + and pushb = INSTR o PUSHB + and pushfd = INSTR PUSHFD + and popfd = INSTR POPFD + and pop = INSTR o POP + and cdq = INSTR CDQ + and into = INSTR INTO + and fbinary = INSTR o FBINARY + and fibinary = INSTR o FIBINARY + and funary = INSTR o FUNARY + and fucom = INSTR o FUCOM + and fucomp = INSTR o FUCOMP + and fucompp = INSTR FUCOMPP + and fcompp = INSTR FCOMPP + and fcomi = INSTR o FCOMI + and fcomip = INSTR o FCOMIP + and fucomi = INSTR o FUCOMI + and fucomip = INSTR o FUCOMIP + and fxch = INSTR o FXCH + and fstpl = INSTR o FSTPL + and fstps = INSTR o FSTPS + and fstpt = INSTR o FSTPT + and fstl = INSTR o FSTL + and fsts = INSTR o FSTS + and fld1 = INSTR FLD1 + and fldl2e = INSTR FLDL2E + and fldl2t = INSTR FLDL2T + and fldlg2 = INSTR FLDLG2 + and fldln2 = INSTR FLDLN2 + and fldpi = INSTR FLDPI + and fldz = INSTR FLDZ + and fldl = INSTR o FLDL + and flds = INSTR o FLDS + and fldt = INSTR o FLDT + and fild = INSTR o FILD + and fildl = INSTR o FILDL + and fildll = INSTR o FILDLL + and fnstsw = INSTR FNSTSW + and fenv = INSTR o FENV + and fmove = INSTR o FMOVE + and fiload = INSTR o FILOAD + and fbinop = INSTR o FBINOP + and fibinop = INSTR o FIBINOP + and funop = INSTR o FUNOP + and fcmp = INSTR o FCMP + and sahf = INSTR SAHF + and lfence = INSTR LFENCE + and mfence = INSTR MFENCE + and sfence = INSTR SFENCE + and pause = INSTR PAUSE + and lahf = INSTR LAHF + and source = INSTR o SOURCE + and sink = INSTR o SINK + and phi = INSTR o PHI +end + diff --git a/MLRISC/x86/instructions/x86MemRegs.sig b/MLRISC/x86/instructions/x86MemRegs.sig new file mode 100644 index 0000000..317456a --- /dev/null +++ b/MLRISC/x86/instructions/x86MemRegs.sig @@ -0,0 +1,4 @@ +signature MEMORY_REGISTERS = sig + structure I : X86INSTR + val memReg : {reg:I.operand, base: CellsBasis.cell} -> I.ea +end diff --git a/MLRISC/x86/instructions/x86Peephole.peep b/MLRISC/x86/instructions/x86Peephole.peep new file mode 100644 index 0000000..0859bd3 --- /dev/null +++ b/MLRISC/x86/instructions/x86Peephole.peep @@ -0,0 +1,121 @@ +(* + * Note, this file contains conditional pattern matching rules. + * You'll have to run it thru the tool wheregen + * (source for this is in the directory Tools/WhereGen) + * to generate the output. + * + * -- Allen + *) + +local + + + structure I = + struct + include "x86Instr.sml" (* import instruction definitions *) + end + +in + +functor X86Peephole + (structure Instr : X86INSTR + structure Eval : MLTREE_EVAL + sharing Instr.T = Eval.T + ) : PEEPHOLE = +struct + structure I = Instr + structure C = I.C + structure CBase = CellsBasis + + (* IMPORTANT: instructions are given in forward order *) + fun peephole instrs = + let fun isStackPtr(I.Direct r) = CBase.sameColor(r, C.esp) + | isStackPtr _ = false + + fun isZeroLE le = (Eval.valueOf le = 0) handle _ => false + + fun isZero(I.Immed n) = n = 0 + | isZero(I.ImmedLabel le) = isZeroLE le + | isZero _ = false + + fun isZeroOpt NONE = true + | isZeroOpt (SOME opn) = isZero opn + + fun loop(code, instrs) = + (case code of + [] => instrs + + (* x <- x +/- 0; + *) + | I.INSTR(I.BINARY{binOp=(I.ADDL | I.SUBL), + src=I.ImmedLabel le, ...})::rest + where isZeroLE le => loop(rest, instrs) + + (* remove lea 0(r), r *) + | I.INSTR(I.LEA{r32, + addr=I.Displace{base, disp=I.ImmedLabel le,...}})::rest + where (isZeroLE le) andalso + CBase.sameColor(r32,base) => loop(rest, instrs) + + (* addl n, %esp; subl m, %esp + * => addl (n-m), %esp ;; when m < n + * => - ;; when m = n + * => subl (m-n), %esp ;; when m > n + *) + | I.INSTR(I.BINARY{binOp=I.ADDL, src=I.Immed n, dst=I.Direct d_i}):: + I.INSTR(I.BINARY{binOp=I.SUBL, src=I.Immed m, dst=I.Direct d_j}):: + rest + where CBase.sameColor(d_i, C.esp) andalso + CBase.sameColor(d_j, C.esp) => + if (m = n) then loop (rest, instrs) + else if (m < n) then + loop(rest, + I.binary{binOp=I.ADDL, src=I.Immed(n-m), + dst=I.Direct(C.esp)}::instrs) + else + loop(rest, + I.binary{binOp=I.SUBL, src=I.Immed(m-n), + dst=I.Direct(C.esp)}::instrs) + + (* push folding: + * subl 4, %esp + * movl src, 0(%esp) (where src <> %esp !!! ) + * => + * pushl src + *) + | I.INSTR(I.BINARY{binOp=I.SUBL,src=I.Immed 4,dst=I.Direct dst_i}):: + I.INSTR(I.MOVE{mvOp=I.MOVL,src, + dst=I.Displace{base,disp=I.Immed 0,...}}) + ::rest + where CBase.sameColor(base, C.esp) andalso + CBase.sameColor(dst_i, C.esp) andalso + not(isStackPtr src) => + loop(rest, I.pushl src::instrs) + + (* pop folding: + * movl 0(%esp), dst (where dst <> %esp!!!!) + * addl 4, %esp + * => + * popl dst + *) + | I.INSTR(I.MOVE{mvOp=I.MOVL, + src=I.Displace{base, disp=I.Immed 0, ...}, dst}):: + I.INSTR(I.BINARY{binOp=I.ADDL, src=I.Immed 4, + dst=I.Direct dst_i}):: + rest + where CBase.sameColor(base, C.esp) andalso + CBase.sameColor(dst_i,C.esp) andalso + not(isStackPtr dst) => + loop(rest, I.pop dst::instrs) + + | I.INSTR(I.MOVE{mvOp=I.MOVL, src, dst as I.Direct _})::rest + where isZero src => + loop(rest, I.binary{binOp=I.XORL, src=dst, dst=dst}::instrs) + + | i::rest => loop(rest, i::instrs) + ) + in loop(instrs, []) + end +end + +end diff --git a/MLRISC/x86/instructions/x86Peephole.sml b/MLRISC/x86/instructions/x86Peephole.sml new file mode 100644 index 0000000..66670c1 --- /dev/null +++ b/MLRISC/x86/instructions/x86Peephole.sml @@ -0,0 +1,301 @@ +(* WARNING: this is generated by running 'nowhere x86Peephole.peep'. + * Do not edit this file directly. + * Version 1.2.2 + *) + +(*#line 20.1 "x86Peephole.peep"*) +functor X86Peephole( +(*#line 21.5 "x86Peephole.peep"*) + structure Instr : X86INSTR + +(*#line 22.5 "x86Peephole.peep"*) + structure Eval : MLTREE_EVAL + +(*#line 23.7 "x86Peephole.peep"*) + sharing Instr.T = Eval.T + ): PEEPHOLE = +struct + +(*#line 26.4 "x86Peephole.peep"*) + structure I = Instr + +(*#line 27.4 "x86Peephole.peep"*) + structure C = I.C + +(*#line 28.4 "x86Peephole.peep"*) + structure CBase = CellsBasis + +(*#line 31.4 "x86Peephole.peep"*) + fun peephole instrs = + let +(*#line 32.8 "x86Peephole.peep"*) + fun isStackPtr (I.Direct r) = CBase.sameColor (r, C.esp) + | isStackPtr _ = false + +(*#line 35.8 "x86Peephole.peep"*) + fun isZeroLE le = (((Eval.valueOf le) = 0) handle _ => false +) + +(*#line 37.8 "x86Peephole.peep"*) + fun isZero (I.Immed n) = n = 0 + | isZero (I.ImmedLabel le) = isZeroLE le + | isZero _ = false + +(*#line 41.8 "x86Peephole.peep"*) + fun isZeroOpt NONE = true + | isZeroOpt (SOME opn) = isZero opn + +(*#line 44.8 "x86Peephole.peep"*) + fun loop (code, instrs) = + let val v_34 = code + fun state_9 (v_0, v_3) = + let val i = v_0 + and rest = v_3 + in loop (rest, i :: instrs) + end + fun state_22 (v_0, v_17, v_3) = + let val le = v_17 + and rest = v_3 + in (if (isZeroLE le) + then (loop (rest, instrs)) + else (state_9 (v_0, v_3))) + end + fun state_51 (v_0, v_1, v_2, v_3) = + (case v_1 of + I.Direct v_26 => + let val dst = v_1 + and rest = v_3 + and src = v_2 + in (if (isZero src) + then (loop (rest, (I.binary {binOp=I.XORL, src=dst, dst=dst}) :: instrs)) + else (state_9 (v_0, v_3))) + end + | _ => state_9 (v_0, v_3) + ) + in + (case v_34 of + op :: v_33 => + let val (v_0, v_3) = v_33 + in + (case v_0 of + I.INSTR v_32 => + (case v_32 of + I.BINARY v_19 => + let val {binOp=v_31, dst=v_1, src=v_2, ...} = v_19 + in + (case v_31 of + I.ADDL => + (case v_2 of + I.Immed v_17 => + (case v_1 of + I.Direct v_26 => + (case v_3 of + op :: v_14 => + let val (v_13, v_4) = v_14 + in + (case v_13 of + I.INSTR v_12 => + (case v_12 of + I.BINARY v_11 => + let val {binOp=v_10, dst=v_9, src=v_8, ...} = v_11 + in + (case v_10 of + I.SUBL => + (case v_9 of + I.Direct v_5 => + (case v_8 of + I.Immed v_7 => + let val d_i = v_26 + and d_j = v_5 + and m = v_7 + and n = v_17 + and rest = v_4 + in (if ((CBase.sameColor (d_i, C.esp)) andalso (CBase.sameColor (d_j, C.esp))) + then (if (m = n) + then (loop (rest, instrs)) + else (if (m < n) + then (loop (rest, (I.binary {binOp=I.ADDL, src=I.Immed (n - m), dst=I.Direct C.esp}) :: instrs)) + else (loop (rest, (I.binary {binOp=I.SUBL, src=I.Immed (m - n), dst=I.Direct C.esp}) :: instrs)))) + else (state_9 (v_0, v_3))) + end + | _ => state_9 (v_0, v_3) + ) + | _ => state_9 (v_0, v_3) + ) + | _ => state_9 (v_0, v_3) + ) + end + | _ => state_9 (v_0, v_3) + ) + | _ => state_9 (v_0, v_3) + ) + end + | nil => state_9 (v_0, v_3) + ) + | _ => state_9 (v_0, v_3) + ) + | I.ImmedLabel v_17 => state_22 (v_0, v_17, v_3) + | _ => state_9 (v_0, v_3) + ) + | I.SUBL => + (case v_2 of + I.Immed v_17 => + (case v_1 of + I.Direct v_26 => + (case v_17 of + 4 => + (case v_3 of + op :: v_14 => + let val (v_13, v_4) = v_14 + in + (case v_13 of + I.INSTR v_12 => + (case v_12 of + I.MOVE v_11 => + let val {dst=v_9, mvOp=v_28, src=v_8, ...} = v_11 + in + (case v_9 of + I.Displace v_5 => + let val {base=v_27, disp=v_30, ...} = v_5 + in + (case v_30 of + I.Immed v_29 => + (case v_29 of + 0 => + (case v_28 of + I.MOVL => + let val base = v_27 + and dst_i = v_26 + and rest = v_4 + and src = v_8 + in (if (((CBase.sameColor (base, C.esp)) andalso (CBase.sameColor (dst_i, C.esp))) andalso (not (isStackPtr src))) + then (loop (rest, (I.pushl src) :: instrs)) + else (state_9 (v_0, v_3))) + end + | _ => state_9 (v_0, v_3) + ) + | _ => state_9 (v_0, v_3) + ) + | _ => state_9 (v_0, v_3) + ) + end + | _ => state_9 (v_0, v_3) + ) + end + | _ => state_9 (v_0, v_3) + ) + | _ => state_9 (v_0, v_3) + ) + end + | nil => state_9 (v_0, v_3) + ) + | _ => state_9 (v_0, v_3) + ) + | _ => state_9 (v_0, v_3) + ) + | I.ImmedLabel v_17 => state_22 (v_0, v_17, v_3) + | _ => state_9 (v_0, v_3) + ) + | _ => state_9 (v_0, v_3) + ) + end + | I.LEA v_19 => + let val {addr=v_25, r32=v_20, ...} = v_19 + in + (case v_25 of + I.Displace v_24 => + let val {base=v_22, disp=v_23, ...} = v_24 + in + (case v_23 of + I.ImmedLabel v_21 => + let val base = v_22 + and le = v_21 + and r32 = v_20 + and rest = v_3 + in (if ((isZeroLE le) andalso (CBase.sameColor (r32, base))) + then (loop (rest, instrs)) + else (state_9 (v_0, v_3))) + end + | _ => state_9 (v_0, v_3) + ) + end + | _ => state_9 (v_0, v_3) + ) + end + | I.MOVE v_19 => + let val {dst=v_1, mvOp=v_18, src=v_2, ...} = v_19 + in + (case v_18 of + I.MOVL => + (case v_2 of + I.Displace v_17 => + let val {base=v_6, disp=v_16, ...} = v_17 + in + (case v_16 of + I.Immed v_15 => + (case v_15 of + 0 => + (case v_3 of + op :: v_14 => + let val (v_13, v_4) = v_14 + in + (case v_13 of + I.INSTR v_12 => + (case v_12 of + I.BINARY v_11 => + let val {binOp=v_10, dst=v_9, src=v_8, ...} = v_11 + in + (case v_10 of + I.ADDL => + (case v_9 of + I.Direct v_5 => + (case v_8 of + I.Immed v_7 => + (case v_7 of + 4 => + let val base = v_6 + and dst = v_1 + and dst_i = v_5 + and rest = v_4 + in (if (((CBase.sameColor (base, C.esp)) andalso (CBase.sameColor (dst_i, C.esp))) andalso (not (isStackPtr dst))) + then (loop (rest, (I.pop dst) :: instrs)) + else (state_51 (v_0, v_1, v_2, v_3))) + end + | _ => state_51 (v_0, v_1, v_2, v_3) + ) + | _ => state_51 (v_0, v_1, v_2, v_3) + ) + | _ => state_51 (v_0, v_1, v_2, v_3) + ) + | _ => state_51 (v_0, v_1, v_2, v_3) + ) + end + | _ => state_51 (v_0, v_1, v_2, v_3) + ) + | _ => state_51 (v_0, v_1, v_2, v_3) + ) + end + | nil => state_51 (v_0, v_1, v_2, v_3) + ) + | _ => state_51 (v_0, v_1, v_2, v_3) + ) + | _ => state_51 (v_0, v_1, v_2, v_3) + ) + end + | _ => state_51 (v_0, v_1, v_2, v_3) + ) + | _ => state_9 (v_0, v_3) + ) + end + | _ => state_9 (v_0, v_3) + ) + | _ => state_9 (v_0, v_3) + ) + end + | nil => instrs + ) + end + in loop (instrs, []) + end +end + diff --git a/MLRISC/x86/instructions/x86Props.sml b/MLRISC/x86/instructions/x86Props.sml new file mode 100644 index 0000000..6cd50da --- /dev/null +++ b/MLRISC/x86/instructions/x86Props.sml @@ -0,0 +1,415 @@ +(* x86Props.sml -- 32bit, x86 instruction semantic properties + * + * COPYRIGHT (c) 1997 Bell Laboratories. + *) + +functor X86Props + (structure Instr : X86INSTR + structure MLTreeHash : MLTREE_HASH where T = Instr.T + structure MLTreeEval : MLTREE_EVAL where T = Instr.T + ) : INSN_PROPERTIES = +struct + structure I = Instr + structure C = I.C + structure T = I.T + structure CB = CellsBasis + + exception NegateConditional + + fun error msg = MLRiscErrorMsg.error("X86Props",msg) + + datatype kind = IK_JUMP | IK_NOP | IK_INSTR | IK_COPY | IK_CALL + | IK_CALL_WITH_CUTS | IK_PHI | IK_SOURCE | IK_SINK + datatype target = LABELLED of Label.label | FALLTHROUGH | ESCAPES + (*======================================================================== + * Instruction Kinds + *========================================================================*) + fun instrKind (I.ANNOTATION{i, ...}) = instrKind i + | instrKind (I.COPY _) = IK_COPY + | instrKind (I.INSTR i) = + (case i + of I.JMP _ => IK_JUMP + | I.JCC _ => IK_JUMP + | I.CALL{cutsTo=_::_,...} => IK_CALL_WITH_CUTS + | I.CALL _ => IK_CALL + | I.PHI _ => IK_PHI + | I.SOURCE _ => IK_SOURCE + | I.SINK _ => IK_SINK + | I.RET _ => IK_JUMP + | I.INTO => IK_JUMP + | _ => IK_INSTR) + | instrKind _ = IK_INSTR + + fun moveInstr(I.ANNOTATION{i, ...}) = moveInstr i + | moveInstr(I.LIVE _) = false + | moveInstr(I.KILL _) = false + | moveInstr(I.COPY _) = true + | moveInstr(I.INSTR i) = + (case i + of I.MOVE{mvOp=I.MOVL, src=I.Direct _, dst=I.MemReg _, ...} => true + | I.MOVE{mvOp=I.MOVL, src=I.MemReg _, dst=I.Direct _, ...} => true + | I.FMOVE{fsize=I.FP64,src=I.FPR _,dst=I.FPR _, ...} => true + | I.FMOVE{fsize=I.FP64,src=I.FPR _,dst=I.FDirect _, ...} => true + | I.FMOVE{fsize=I.FP64,src=I.FDirect _,dst=I.FPR _, ...} => true + | I.FMOVE{fsize=I.FP64,src=I.FDirect _,dst=I.FDirect _, ...} => true + | _ => false ) + + + fun isMemMove(I.INSTR(i)) = + (case i + of I.MOVE{mvOp=I.MOVL, src=I.Direct _, dst=I.MemReg _, ...} => true + | I.MOVE{mvOp=I.MOVL, src=I.MemReg _, dst=I.Direct _, ...} => true + | I.FMOVE{fsize=I.FP64,src=I.FPR _,dst=I.FPR _, ...} => true + | I.FMOVE{fsize=I.FP64,src=I.FPR _,dst=I.FDirect _, ...} => true + | I.FMOVE{fsize=I.FP64,src=I.FDirect _,dst=I.FPR _, ...} => true + | I.FMOVE{fsize=I.FP64,src=I.FDirect _,dst=I.FDirect _, ...} => true + | _ => false + (*esac*)) + | isMemMove _ = false + + + fun memMove(I.INSTR(i)) = + (case i + of I.MOVE{src=I.Direct rs, dst=I.MemReg rd, ...} => ([rd], [rs]) + | I.MOVE{src=I.MemReg rs, dst=I.Direct rd, ...} => ([rd], [rs]) + | I.FMOVE{src=I.FPR rs, dst=I.FPR rd, ...} => ([rd], [rs]) + | I.FMOVE{src=I.FDirect rs, dst=I.FPR rd, ...} => ([rd], [rs]) + | I.FMOVE{src=I.FPR rs, dst=I.FDirect rd, ...} => ([rd], [rs]) + | I.FMOVE{src=I.FDirect rs, dst=I.FDirect rd, ...} => ([rd], [rs]) + | _ => error "memMove: INSTR" + (*esac*)) + | memMove _ = error "memMove" + + val nop = fn () => I.nop + + + (*======================================================================== + * Parallel Move + *========================================================================*) + fun moveTmpR(I.ANNOTATION{i,...}) = moveTmpR i + | moveTmpR(I.COPY{k=CB.GP, tmp=SOME(I.Direct r), ...}) = SOME r + | moveTmpR(I.COPY{k=CB.FP, tmp=SOME(I.FDirect f), ...}) = SOME f + | moveTmpR(I.COPY{k=CB.FP, tmp=SOME(I.FPR f), ...}) = SOME f + | moveTmpR _ = NONE + + fun moveDstSrc(I.ANNOTATION{i,...}) = moveDstSrc i + | moveDstSrc(I.COPY{src, dst, ...}) = (dst, src) + | moveDstSrc(I.INSTR i) = + (case i + of I.MOVE{src=I.Direct rs, dst=I.MemReg rd, ...} => ([rd], [rs]) + | I.MOVE{src=I.MemReg rs, dst=I.Direct rd, ...} => ([rd], [rs]) + | I.FMOVE{src=I.FPR rs, dst=I.FPR rd, ...} => ([rd], [rs]) + | I.FMOVE{src=I.FDirect rs, dst=I.FPR rd, ...} => ([rd], [rs]) + | I.FMOVE{src=I.FPR rs, dst=I.FDirect rd, ...} => ([rd], [rs]) + | I.FMOVE{src=I.FDirect rs, dst=I.FDirect rd, ...} => ([rd], [rs]) + | _ => error "moveDstSrc") + | moveDstSrc _ = error "moveDstSrc2" + (*===================================================================== + * Branches and Calls/Returns + *=====================================================================*) + fun branchTargets(I.ANNOTATION{i,...}) = branchTargets i + | branchTargets(I.INSTR i) = + (case i + of I.JMP(_, []) => [ESCAPES] + | I.JMP(_, labs) => map LABELLED labs + | I.RET _ => [ESCAPES] + | I.JCC{opnd=I.ImmedLabel(T.LABEL(lab)), ...} => + [FALLTHROUGH, LABELLED lab] + | I.CALL{cutsTo, ...} => FALLTHROUGH :: map LABELLED cutsTo + | I.INTO => [ESCAPES] + | _ => error "branchTargets") + | branchTargets _ = error "branchTargets" + + fun jump label = I.jmp (I.ImmedLabel(T.LABEL label), [label]) + + exception NotImplemented + + fun setJumpTarget(I.ANNOTATION{a,i}, l) = I.ANNOTATION{a=a, i=setJumpTarget(i,l)} + | setJumpTarget(I.INSTR(I.JMP(I.ImmedLabel _, _)), lab) = jump lab + | setJumpTarget _ = error "setJumpTarget" + + fun setBranchTargets{i=I.ANNOTATION{a,i}, t, f} = + I.ANNOTATION{a=a, i=setBranchTargets{i=i, t=t, f=f}} + | setBranchTargets{i=I.INSTR(I.JCC{cond,opnd=I.ImmedLabel _}), t, ...} = + I.jcc{cond=cond,opnd=I.ImmedLabel(T.LABEL t)} + | setBranchTargets _ = error "setBranchTargets" + + fun negateConditional (I.ANNOTATION{i,a}, lab) = + I.ANNOTATION{i=negateConditional(i,lab), a=a} + | negateConditional (I.INSTR(I.JCC{cond,opnd=I.ImmedLabel(T.LABEL _)}), lab) = + let + val cond' = (case cond + of I.EQ => I.NE + | I.NE => I.EQ + | I.LT => I.GE + | I.LE => I.GT + | I.GT => I.LE + | I.GE => I.LT + | I.B => I.AE + | I.BE => I.A + | I.A => I.BE + | I.AE => I.B + | I.C => I.NC + | I.NC => I.C + | I.P => I.NP + | I.NP => I.P + | I.O => I.NO + | I.NO => I.O + (* end case *)) + in + I.INSTR(I.JCC{cond=cond', opnd=I.ImmedLabel(T.LABEL lab)}) + end + | negateConditional _ = error "negateConditional" + + val immedRange={lo= ~1073741824, hi=1073741823} + val toInt32 = Int32.fromLarge o Int.toLarge + fun loadImmed{immed,t} = + I.move{mvOp=I.MOVL,src=I.Immed(toInt32 immed),dst=I.Direct t} + fun loadOperand{opn,t} = I.move{mvOp=I.MOVL,src=opn,dst=I.Direct t} + + (*===================================================================== + * Hashing and Equality on operands + *=====================================================================*) + fun hashOpn(I.Immed i) = Word.fromInt(Int32.toInt i) + | hashOpn(I.ImmedLabel le) = MLTreeHash.hash le + 0w123 + | hashOpn(I.Relative i) = Word.fromInt i + 0w1232 + | hashOpn(I.LabelEA le) = MLTreeHash.hash le + 0w44444 + | hashOpn(I.Direct r) = CB.hashCell r + | hashOpn(I.MemReg r) = CB.hashCell r + 0w2123 + | hashOpn(I.ST f) = CB.hashCell f + 0w88 + | hashOpn(I.FPR f) = CB.hashCell f + 0w881 + | hashOpn(I.FDirect f) = CB.hashCell f + 0w31245 + | hashOpn(I.Displace {base, disp, ...}) = + hashOpn disp + CB.hashCell base + | hashOpn(I.Indexed {base, index, scale, disp, ...}) = + CB.hashCell index + Word.fromInt scale + hashOpn disp + fun eqOpn(I.Immed a,I.Immed b) = a = b + | eqOpn(I.ImmedLabel a,I.ImmedLabel b) = MLTreeEval.==(a,b) + | eqOpn(I.Relative a,I.Relative b) = a = b + | eqOpn(I.LabelEA a,I.LabelEA b) = MLTreeEval.==(a,b) + | eqOpn(I.Direct a,I.Direct b) = CB.sameColor(a,b) + | eqOpn(I.MemReg a,I.MemReg b) = CB.sameColor(a,b) + | eqOpn(I.FDirect a,I.FDirect b) = CB.sameColor(a,b) + | eqOpn(I.ST a,I.ST b) = CB.sameColor(a,b) + | eqOpn(I.FPR a,I.FPR b) = CB.sameColor(a,b) + | eqOpn(I.Displace{base=a,disp=b,...},I.Displace{base=c,disp=d,...}) = + CB.sameColor(a,c) andalso eqOpn(b,d) + | eqOpn(I.Indexed{base=a,index=b,scale=c,disp=d,...}, + I.Indexed{base=e,index=f,scale=g,disp=h,...}) = + CB.sameColor(b,f) andalso c = g + andalso sameCellOption(a,e) andalso eqOpn(d,h) + | eqOpn _ = false + and sameCellOption(NONE, NONE) = true + | sameCellOption(SOME x, SOME y) = CB.sameColor(x,y) + | sameCellOption _ = false + + (*======================================================================== + * Definition and use (for register allocation mainly) + *========================================================================*) + val eaxPair = [C.edx, C.eax] + + fun defUseR instr = let + fun operandAcc(I.Direct r, acc) = r::acc + | operandAcc(I.MemReg r, acc) = r::acc + | operandAcc(I.Displace{base, ...}, acc) = base::acc + | operandAcc(I.Indexed{base=SOME b, index, ...}, acc) = b::index::acc + | operandAcc(I.Indexed{base=NONE, index, ...}, acc) = index::acc + | operandAcc(_, acc) = acc + + fun x86DefUseR instr = let + fun operandUse opnd = operandAcc(opnd, []) + + fun operandUse2(src1, src2) = ([], operandAcc(src1, operandUse src2)) + fun operandUse3(x, y, z) = ([], operandAcc(x, operandAcc(y, operandUse y))) + + fun operandDef(I.Direct r) = [r] + | operandDef(I.MemReg r) = [r] + | operandDef _ = [] + + fun multdiv{src, multDivOp} = let + val uses = operandUse src + in + case multDivOp + of (I.IDIVL1 | I.DIVL1) => (eaxPair, C.edx::C.eax::uses) + | (I.IMULL1 | I.MULL1) => (eaxPair, C.eax::uses) + end + + fun unary opnd = (operandDef opnd, operandUse opnd) + fun cmptest{lsrc, rsrc} = ([], operandAcc(lsrc, operandUse rsrc)) + fun espOnly() = let val sp = [C.stackptrR] in (sp, sp) end + fun push arg = ([C.stackptrR], operandAcc(arg, [C.stackptrR])) + fun float opnd = ([], operandUse opnd) + in + case instr + of I.JMP(opnd, _) => ([], operandUse opnd) + | I.JCC{opnd, ...} => ([], operandUse opnd) + | I.CALL{opnd,defs,uses,...} => + (C.getReg defs, operandAcc(opnd, C.getReg uses)) + | I.MOVE{src, dst=I.Direct r, ...} => ([r], operandUse src) + | I.MOVE{src, dst=I.MemReg r, ...} => ([r], operandUse src) + | I.MOVE{src, dst, ...} => ([], operandAcc(dst, operandUse src)) + | I.LEA{r32, addr} => ([r32], operandUse addr) + | ( I.CMPL arg | I.CMPW arg | I.CMPB arg + | I.TESTL arg | I.TESTW arg | I.TESTB arg ) => cmptest arg + | I.BITOP{lsrc, rsrc, ...} => cmptest{lsrc=lsrc,rsrc=rsrc} + | I.BINARY{binOp=I.XORL,src=I.Direct rs,dst=I.Direct rd,...} => + if CB.sameColor(rs,rd) then ([rd],[]) else ([rd],[rs,rd]) + | I.BINARY{src,dst,...} => + (operandDef dst, operandAcc(src, operandUse dst)) + | I.SHIFT{src,dst,count,...} => + (operandDef dst, + operandAcc(count, operandAcc(src, operandUse dst))) + | I.CMPXCHG{src, dst, ...} => + (C.eax::operandDef dst, C.eax::operandAcc(src, operandUse dst)) + | I.ENTER _ => ([C.esp, C.ebp], [C.esp, C.ebp]) + | I.LEAVE => ([C.esp, C.ebp], [C.esp, C.ebp]) + | I.MULTDIV arg => multdiv arg + | I.MUL3{src1, dst, ...}=> ([dst], operandUse src1) + + | I.UNARY{opnd, ...} => unary opnd + | I.SET{opnd, ...} => unary opnd + | ( I.PUSHL arg | I.PUSHW arg | I.PUSHB arg ) => push arg + | I.POP arg => (C.stackptrR::operandDef arg, [C.stackptrR]) + | I.PUSHFD => espOnly() + | I.POPFD => espOnly() + | I.CDQ => ([C.edx], [C.eax]) + | I.FSTPT opnd => float opnd + | I.FSTPL opnd => float opnd + | I.FSTPS opnd => float opnd + | I.FSTL opnd => float opnd + | I.FSTS opnd => float opnd + | I.FLDL opnd => float opnd + | I.FLDS opnd => float opnd + | I.FILD opnd => float opnd + | I.FILDL opnd => float opnd + | I.FILDLL opnd => float opnd + | I.FBINARY{src, ...} => ([], operandUse src) + | I.FIBINARY{src, ...} => ([], operandUse src) + | I.FENV{opnd, ...} => ([], operandUse opnd) + | I.FNSTSW => ([C.eax], []) + | I.FUCOM opnd => float opnd + | I.FUCOMP opnd => float opnd + | I.FCOMI opnd => float opnd + | I.FCOMIP opnd => float opnd + | I.FUCOMI opnd => float opnd + | I.FUCOMIP opnd => float opnd + + | I.FMOVE{src, dst, ...} => operandUse2(src, dst) + | I.FILOAD{ea, dst, ...} => operandUse2(ea, dst) + | I.FCMP{lsrc, rsrc, ...} => operandUse2(lsrc, rsrc) + | I.FBINOP{lsrc, rsrc, dst, ...} => operandUse3(lsrc, rsrc, dst) + | I.FIBINOP{lsrc, rsrc, dst, ...} => operandUse3(lsrc, rsrc, dst) + | I.FUNOP{src, dst, ...} => operandUse2(src, dst) + + | I.SAHF => ([], [C.eax]) + | I.LAHF => ([C.eax], []) + (* This sets the low order byte, + * do potentially it may define *and* use + *) + | I.CMOV{src,dst,...} => ([dst], operandAcc(src, [dst])) + | _ => ([], []) + end + in + case instr + of I.ANNOTATION{i, ...} => defUseR i + | I.LIVE{regs, ...} => ([], C.getReg regs) + | I.KILL{regs, ...} => (C.getReg regs, []) + | I.COPY{k=CB.GP, dst, src, tmp, ...} => + (case tmp + of NONE => (dst, src) + | SOME(I.Direct r) => (r::dst, src) + | SOME(I.MemReg r) => (r::dst, src) + | SOME(ea) => (dst, operandAcc(ea, src)) + (*esac*)) + | I.COPY _ => ([], []) + | I.INSTR i => x86DefUseR(i) + end + + fun defUseF instr = let + + fun x86DefUseF instr = let + fun operand(I.FDirect f) = [f] + | operand(I.FPR f) = [f] + | operand _ = [] + + fun operandAcc(I.FDirect f, acc) = f::acc + | operandAcc(I.FPR f, acc) = f::acc + | operandAcc(_ , acc) = acc + + fun fbinop(lsrc, rsrc, dst) = + let val def = operand dst + val use = operandAcc(lsrc, operand rsrc) + in (def, use) + end + + val fcmpTmp = [C.ST 0] + + in + case instr + of I.FSTPT opnd => (operand opnd, []) + | I.FSTPL opnd => (operand opnd, []) + | I.FSTPS opnd => (operand opnd, []) + | I.FSTL opnd => (operand opnd, []) + | I.FSTS opnd => (operand opnd, []) + | I.FLDT opnd => ([], operand opnd) + | I.FLDL opnd => ([], operand opnd) + | I.FLDS opnd => ([], operand opnd) + | I.FUCOM opnd => ([], operand opnd) + | I.FUCOMP opnd => ([], operand opnd) + | I.FCOMI opnd => ([], operand opnd) + | I.FCOMIP opnd => ([], operand opnd) + | I.FUCOMI opnd => ([], operand opnd) + | I.FUCOMIP opnd => ([], operand opnd) + | I.CALL{defs, uses, ...} => (C.getFreg defs, C.getFreg uses) + | I.FBINARY{dst, src, ...}=> (operand dst, operand dst @ operand src) + + | I.FMOVE{src, dst, ...} => (operand dst, operand src) + | I.FILOAD{ea, dst, ...} => (operand dst, []) + | I.FCMP{lsrc, rsrc, ...} => (fcmpTmp, operandAcc(lsrc, operand rsrc)) + | I.FBINOP{lsrc, rsrc, dst, ...} => fbinop(lsrc, rsrc, dst) + | I.FIBINOP{lsrc, rsrc, dst, ...} => fbinop(lsrc, rsrc, dst) + | I.FUNOP{src, dst, ...} => (operand dst, operand src) + | _ => ([], []) + end + in + case instr + of (I.ANNOTATION{i, ...}) => defUseF(i) + | I.LIVE{regs, ...} => ([], C.getFreg regs) + | I.KILL{regs, ...} => (C.getFreg regs, []) + | I.COPY{k=CB.FP, dst, src, tmp, ...} => + (case tmp + of NONE => (dst, src) + | SOME(I.FDirect f) => (f::dst, src) + | SOME(I.FPR f) => (f::dst, src) + | _ => (dst, src) + (*esac*)) + | I.COPY _ => ([], []) + | (I.INSTR i) => x86DefUseF(i) + end + + fun defUse CB.GP = defUseR + | defUse CB.FP = defUseF + | defUse _ = error "defUse" + + (*======================================================================== + * Annotations + *========================================================================*) + fun getAnnotations(I.ANNOTATION{i,a}) = + let val (i,an) = getAnnotations i in (i,a::an) end + | getAnnotations i = (i,[]) + + fun annotate(i,a) = I.ANNOTATION{i=i,a=a} + + (*======================================================================== + * Replicate an instruction + *========================================================================*) + fun replicate(I.ANNOTATION{i,a}) = I.ANNOTATION{i=replicate i,a=a} +(* + | replicate(I.COPY{tmp=SOME _, dst, src}) = + I.COPY{tmp=SOME(I.Direct(C.newReg())), dst=dst, src=src} + | replicate(I.FCOPY{tmp=SOME _, dst, src}) = + I.FCOPY{tmp=SOME(I.FDirect(C.newFreg())), dst=dst, src=src} +*) + | replicate i = i +end + diff --git a/MLRISC/x86/instructions/x86Shuffle.sig b/MLRISC/x86/instructions/x86Shuffle.sig new file mode 100644 index 0000000..29622d0 --- /dev/null +++ b/MLRISC/x86/instructions/x86Shuffle.sig @@ -0,0 +1,8 @@ +signature X86SHUFFLE = sig + structure I : X86INSTR + + type t = {tmp:I.operand option, dst:CellsBasis.cell list, src:CellsBasis.cell list} + + val shuffle : t -> I.instruction list + val shufflefp : t -> I.instruction list +end diff --git a/MLRISC/x86/instructions/x86Shuffle.sml b/MLRISC/x86/instructions/x86Shuffle.sml new file mode 100644 index 0000000..4448008 --- /dev/null +++ b/MLRISC/x86/instructions/x86Shuffle.sml @@ -0,0 +1,139 @@ +(* NOTE on xchg on the x86 + * + * From Allen Leung: + * Here's why I didn't use xchg: + * + * o According to the optimization guide xchg mem, reg is complex, + * cannot be pipelined or paired at all. xchg reg, reg requires 3 uops. + * In contrast, mov mem, reg requires 1 or 2 uops. + * So xchgs loses out, at least on paper. + * [I haven't done any measurements though] + * + * o Secondly, unlike other architectures, parallel copies are split + * into individual copies during instruction selection. Here's why + * I did this: I found that more copies are retained and more spills + * are generated when keeping the parallel copies. My guess on this is + * that the copy temporary for parallel copies create addition + * interferences [even when they are not needed.] + * This is not a problem on RISC machines, because of plentiful registers. + * + * o Spilling of parallel copies is also a very complex business when + * memory coalescing is turned on. I think I have implemented a solution + * to this, but not using parallel copies keep life simple. This problem + * could be simpler with xchg...but I haven't thought about it much. + * + * From Fermin Reig: + * In the java@gcc.gnu.org, GC mailing lists there's been a discussion about + * the costs of xcgh. Here's some extracts of it: + * + * ---------------- + * > From: Emery Berger [mailto:emery@cs.utexas.edu] + * > + * > http://developer.intel.com/design/pentium4/manuals/24547203.pdf + * > + * > See Chapter 7.1. "For the P6 family processors, locked + * > operations serialize + * > all outstanding load and store operations (that is, wait for them to + * > complete). This rule is also true for the Pentium 4 + * > processor, with one + * > exception: load operations that reference weakly ordered + * > memory types (such + * > as the WC memory type) may not be serialized. " + * > + * ----------------- + * I just tried this on a 500 MHz Pentium III. I get about 23 cycles for + * + * lock; cmpxchg + * + * : + * and about 19 or 20 cycles for xchg (which has an implicit lock prefix). + * + * I got consistent results by timing a loop and by looking at an instruction + * level profile. Putting other stuff in the loop didn't seem to affect the + * time taken by xchg much. Here's the code in case someone else wants to try. + * (This requires Linux/gcc) + * ------------------- + * Chris Dodd pointed out on the GC mailing list that on recent Intel X86 + * processors: + * + * - cmpxchg without a lock prefix is much faster (roughly 3x or close to 15 + * cycles by my measurements) than either xchg (implied lock prefix) or lock; + * cmpxchg . + * + * - cmpxchg without the lock prefix is atomic on uniprocessors, i.e. it's not + * interruptable. + * + * As far as I can tell, none of the GNU libraries currently take advantage of + * this fact. Should they? + * + * This argues, for example, that I could get noticable additional speedup from + * Java hash synchronization on X86 by overwriting a few strategic "lock" + * prefixes with "nop"s when I notice that there's only one processor + * + * + * From John Reppy: + * + * Disregard what I said. The xchg instruction has an implicit lock prefix, + * so it is not useful for normal programming tasks. + *) + +functor X86Shuffle(I : X86INSTR) : X86SHUFFLE = +struct + structure I = I + structure C = I.C + structure CB = CellsBasis + structure Shuffle = Shuffle(I) + + type t = {tmp:I.operand option, dst:CellsBasis.cell list, src:CellsBasis.cell list} + + exception foo + val shuffle = + Shuffle.shuffle + {mvInstr=fn{dst, src} => [I.move{mvOp=I.MOVL, src=src, dst=dst}], + ea=I.Direct} + + (* + * These assume that the ''registers'' are mapped onto the memory + *) + + (* Note, this only works with double precision floating point *) + val shufflefpNormalAndSlow = + Shuffle.shuffle + {mvInstr=fn{dst, src} => [I.fldl src, I.fstpl dst], + ea = I.FDirect} + + (* + * This version makes use of the x86 floating point stack for hardware + * renaming! + *) + fun shufflefpNormal{tmp, src, dst} = let + val n = length src + in + if n <= 7 then let + fun gen(s::ss, d::ds, pushes, pops) = + if CB.sameColor(s,d) then gen(ss, ds, pushes, pops) + else + gen(ss, ds, + I.fldl(I.FDirect s)::pushes, + I.fstpl(I.FDirect d)::pops) + | gen(_, _, pushes, pops) = List.revAppend(pushes, pops) + in gen(src, dst, [], []) + end + else shufflefpNormalAndSlow{tmp=tmp, src=src, dst=dst} + end + + (* + * These assume that the ''registers'' are mapped onto the pseudo + * %fpr register. Only works with double precision floating point for + * now... + *) + val shufflefpFast = + Shuffle.shuffle + {mvInstr=fn{dst, src} => [I.fmove{fsize=I.FP64,src=src, dst=dst}], + ea = I.FPR} + + fun shufflefp(x as {tmp=SOME(I.FPR _), ...}) = shufflefpFast x + | shufflefp x = shufflefpNormal x + +end + diff --git a/MLRISC/x86/instructions/x86comp-instr-ext.sml b/MLRISC/x86/instructions/x86comp-instr-ext.sml new file mode 100644 index 0000000..d846378 --- /dev/null +++ b/MLRISC/x86/instructions/x86comp-instr-ext.sml @@ -0,0 +1,89 @@ +(* x86comp-instr-ext.sml + * + * COPYRIGHT (c) 2000 Bell Labs, Lucent Technologies + * + * emit code for extensions to the x86 instruction set. + *) +signature X86COMP_INSTR_EXT = sig + structure I : X86INSTR + structure TS : MLTREE_STREAM + where T = I.T + structure CFG : CONTROL_FLOW_GRAPH + where I = I + and P = TS.S.P + + type reducer = + (I.instruction, I.C.cellset, I.operand, I.addressing_mode, CFG.cfg) TS.reducer + + val compileSext : + reducer + -> {stm: (I.T.stm, I.T.rexp, I.T.fexp, I.T.ccexp) X86InstrExt.sext, + an: I.T.an list} + -> unit +end + + + + +functor X86CompInstrExt + ( structure I : X86INSTR + structure TS : MLTREE_STREAM + where T = I.T + structure CFG : CONTROL_FLOW_GRAPH + where P = TS.S.P + and I = I + ) : X86COMP_INSTR_EXT = +struct + structure CFG = CFG + structure T = TS.T + structure I = I + structure C = I.C + structure X = X86InstrExt + structure TS = TS + + type stm = (T.stm, T.rexp, T.fexp, T.ccexp) X.sext + + type reducer = + (I.instruction, I.C.cellset, I.operand, I.addressing_mode, CFG.cfg) TS.reducer + + val esp = C.esp + val espOpnd = I.Direct(esp) + + fun error msg = MLRiscErrorMsg.error("X86CompInstrExt", msg) + + val stackArea = I.Region.stack + + fun compileSext reducer {stm: stm, an:T.an list} = let + val TS.REDUCER{operand, emit, reduceFexp, instrStream, reduceOperand, + ...} = reducer + val TS.S.STREAM{emit=emitI, ...} = instrStream + fun fstp(sz, fstpInstr, fexp) = + (case fexp + of T.FREG(sz', f) => + if sz <> sz' then error "fstp: sz" + else emitI(I.INSTR(fstpInstr(I.FDirect f))) + | _ => error "fstp: fexp" + (*esac*)) + in + case stm + of X.PUSHL(rexp) => emit(I.pushl(operand rexp), an) + | X.POP(rexp) => emit(I.pop(operand rexp), an) + + | X.FSTPS(fexp) => fstp(32, I.FSTPS, fexp) + | X.FSTPL(fexp) => fstp(64, I.FSTPL, fexp) + | X.FSTPT(fexp) => fstp(80, I.FSTPT, fexp) + + | X.LEAVE => emit(I.leave, an) + | X.RET(rexp) => emit(I.ret(SOME(operand rexp)), an) + | X.LOCK_CMPXCHGL(src, dst) => + (* src must in a register *) + emit(I.cmpxchg{lock=true,sz=I.I32, + src=I.Direct(reduceOperand(operand src)), + dst=operand dst},an) + | X.PAUSE => emit(I.pause, an) + | X.MFENCE => emit(I.mfence, an) + | X.LFENCE => emit(I.lfence, an) + | X.SFENCE => emit(I.sfence, an) + + end +end diff --git a/MLRISC/x86/instructions/x86instr-ext.sml b/MLRISC/x86/instructions/x86instr-ext.sml new file mode 100644 index 0000000..575865b --- /dev/null +++ b/MLRISC/x86/instructions/x86instr-ext.sml @@ -0,0 +1,43 @@ +(* x86instr-ext.sml + * + * COPYRIGHT (c) 2000 Bell Labs, Lucent Technologies + * + * extensions to the x86 instruction set. + *) + +structure X86InstrExt = struct + datatype fsz = single | double | extended + + datatype ('s, 'r, 'f, 'c) sext + (* push an integer value onto the H/W stack *) + = PUSHL of 'r + | POP of 'r + + (* FSTPS/L/T is a way of pulling things off the floating point + * stack and must therefore take FREG f as argument + *) + | FSTPS of 'f + | FSTPL of 'f + | FSTPT of 'f + + | LEAVE + | RET of 'r + + | LOCK_CMPXCHGL of ('r * 'r) + + | PAUSE (* improves performance of spin-wait loops *) + (* performs a serializing operation on all load-from-memory and store-to-memory + * operations issued prior to the mfence instruction. + *) + | MFENCE + (* performs a serializing operation on all load-to-memory operations issued prior to + * the lfence instruction. + *) + | LFENCE + (* performs a serializing operation on all store-to-memory operations issued prior to + * the sfence instruction. + *) + | SFENCE + + +end diff --git a/MLRISC/x86/mltree/orig-x86-fp.sml b/MLRISC/x86/mltree/orig-x86-fp.sml new file mode 100644 index 0000000..f626953 --- /dev/null +++ b/MLRISC/x86/mltree/orig-x86-fp.sml @@ -0,0 +1,1674 @@ +(* x86-fp.sml + * + * COPYRIGHT (c) 2001 Bell Labs, Lucent Technologies + * + * This phase takes a cluster with pseudo x86 fp instructions, performs + * liveness analysis to determine their live ranges, and rewrite the + * program into the correct stack based code. + * + * The Basics + * ---------- + * o We assume there are 7 pseudo fp registers, %fp(0), ..., %fp(6), + * which are mapped onto the %st stack. One stack location is reserved + * for holding temporaries. + * o Important: for floating point comparisons, we actually need + * two extra stack locations in the worst case. We handle this by + * specifying that the instruction define an extra temporary fp register + * when necessary. + * o The mapping between %fp <-> %st may change from program point to + * program point. We keep track of this lazy renaming and try to minimize + * the number of FXCH that we insert. + * o At split and merge points, we may get inconsistent %fp <-> %st mappings. + * We handle this by inserting the appropriate renaming code. + * o Parallel copies (renaming) are rewritten into a sequence of FXCHs! + * + * Pseudo fp instructions Semantics + * -------------------------------------- + * FMOVE src, dst dst := src + * FILOAD ea, dst dst := cvti2f(mem[ea]) + * FBINOP lsrc, rsrc, dst dst := lsrc * rsrc + * FIBINOP lsrc, rsrc, dst dst := lsrc * cvti2f(rsrc) + * FUNOP src, dst dst := unaryOp src + * FCMP lsrc, rsrc fp condition code := fcmp(lsrc, rsrc) + * + * An instruction may use its source operand(s) destructively. + * We find this info using a global liveness analysis. + * + * The Translation + * --------------- + * o We keep track of the bindings between %fp registers and the + * %st(..) staack locations. + * o FXCH and FLDL are inserted at the appropriate places to move operands + * to %st(0). FLDL is used if the operand is not dead. FXCH is used + * if the operand is the last use. + * o FCOPY's between pseudo %fp registers are done by software renaming + * and generate no code by itself! + * o FSTL %st(1) are also generated to pop the stack after the last use + * of an operand. + * + * Note + * ---- + * 1. This module should be run after floating point register allocation. + * + * -- Allen Leung (leunga@cs.nyu.edu) + *) + +local + val debug = false (* set this to true to debug this module + * set this to false for production use. + *) + val debugLiveness = true (* debug liveness analysis *) + val debugDead = false (* debug dead code removal *) + val sanityCheck = true +in +functor X86FP + (structure X86Instr : X86INSTR + structure X86Props : INSN_PROPERTIES + where I = X86Instr + structure Flowgraph : CONTROL_FLOW_GRAPH + where I = X86Instr + structure Liveness : LIVENESS + where CFG = Flowgraph + structure Asm : INSTRUCTION_EMITTER + where I = X86Instr + and S.P = Flowgraph.P + ) : CFG_OPTIMIZATION = +struct + structure CFG = Flowgraph + structure G = Graph + structure I = X86Instr + structure T = I.T + structure P = X86Props + structure C = I.C + structure A = Array + structure L = Label + structure An = Annotations + structure CB = CellsBasis + structure SL = CB.SortedCells + structure HT = IntHashTable + structure IM = IntRedBlackMap + + type flowgraph = CFG.cfg + type an = An.annotations + + val name = "X86 floating point rewrite" + + val debugOn = MLRiscControl.mkFlag ("x86-fp-debug", "x86 fp debug mode") + val traceOn = MLRiscControl.mkFlag ("x86-fp-trace", "x86 fp trace mode") + + fun error msg = MLRiscErrorMsg.error("X86FP",msg) + fun pr msg = TextIO.output(!MLRiscControl.debug_stream,msg) + + val i2s = Int.toString + + (* + * No overflow checking is needed for integer arithmetic in this module + *) + + fun celllistToCellset l = List.foldr CB.CellSet.add CB.CellSet.empty l + fun celllistToString l = CB.CellSet.toString(celllistToCellset l) + + (* Annotation to mark split edges *) + exception TargetMovedTo of G.node_id + + (*----------------------------------------------------------------------- + * Primitive instruction handling routines + *-----------------------------------------------------------------------*) + + (* Annotation an instruction *) + fun mark(instr, []) = instr + | mark(instr, a::an) = mark(I.ANNOTATION{i=instr,a=a}, an) + + (* Add pop suffix to a binary operator *) + fun pop I.FADDL = I.FADDP | pop I.FADDS = I.FADDP + | pop I.FSUBL = I.FSUBP | pop I.FSUBS = I.FSUBP + | pop I.FSUBRL = I.FSUBRP | pop I.FSUBRS = I.FSUBRP + | pop I.FMULL = I.FMULP | pop I.FMULS = I.FMULP + | pop I.FDIVL = I.FDIVP | pop I.FDIVS = I.FDIVP + | pop I.FDIVRL = I.FDIVRP | pop I.FDIVRS = I.FDIVRP + | pop _ = error "fbinop.pop" + + (* Invert the operator *) + fun invert I.FADDL = I.FADDL | invert I.FADDS = I.FADDS + | invert I.FSUBL = I.FSUBRL | invert I.FSUBS = I.FSUBRS + | invert I.FSUBRL = I.FSUBL | invert I.FSUBRS = I.FSUBS + | invert I.FMULL = I.FMULL | invert I.FMULS = I.FMULS + | invert I.FDIVL = I.FDIVRL | invert I.FDIVS = I.FDIVRS + | invert I.FDIVRL = I.FDIVL | invert I.FDIVRS = I.FDIVS + | invert I.FADDP = I.FADDP | invert I.FMULP = I.FMULP + | invert I.FSUBP = I.FSUBRP | invert I.FSUBRP = I.FSUBP + | invert I.FDIVP = I.FDIVRP | invert I.FDIVRP = I.FDIVP + | invert _ = error "invert" + + (* Pseudo instructions *) + fun FLD(I.FP32, ea) = I.flds ea + | FLD(I.FP64, ea) = I.fldl ea + | FLD(I.FP80, ea) = I.fldt ea + + fun FILD(I.I8, ea) = error "FILD" + | FILD(I.I16, ea) = I.fild ea + | FILD(I.I32, ea) = I.fildl ea + | FILD(I.I64, ea) = I.fildll ea + + fun FSTP(I.FP32, ea) = I.fstps ea + | FSTP(I.FP64, ea) = I.fstpl ea + | FSTP(I.FP80, ea) = I.fstpt ea + + fun FST(I.FP32, ea) = I.fsts ea + | FST(I.FP64, ea) = I.fstl ea + | FST(I.FP80, ea) = error "FSTT" + + (*----------------------------------------------------------------------- + * Pretty print routines + *-----------------------------------------------------------------------*) + fun fregToString f = "%f"^i2s(CB.registerNum f) + fun fregsToString s = + List.foldr (fn (r,"") => fregToString r | + (r,s) => fregToString r^" "^s) "" s + + fun blknumOf(CFG.BLOCK{id, ...}) = id + + (*----------------------------------------------------------------------- + * A stack datatype that mimics the x86 floating point stack + * and keeps track of bindings between %st(n) and %fp(n). + *-----------------------------------------------------------------------*) + structure ST :> + sig + type stack + type stnum = int (* 0 -- 7 *) + val create : unit -> stack + val stack0 : stack + val copy : stack -> stack + val clear : stack -> unit + val fp : stack * CB.register_id -> stnum + val st : stack * stnum -> CB.register_id + val set : stack * stnum * CB.register_id -> unit + val push : stack * CB.register_id -> unit + val xch : stack * stnum * stnum -> unit + val pop : stack -> unit + val depth : stack -> int + val nonFull : stack -> unit + val kill : stack * CellsBasis.cell -> unit + val stackToString : stack -> string + val equal : stack * stack -> bool + end = + struct + type stnum = int + datatype stack = + STACK of + { st : CB.register_id A.array, (* mapping %st -> %fp registers *) + fp : stnum A.array, (* mapping %fp -> %st registers *) + sp : int ref (* stack pointer *) + } + + (* Create a new stack *) + fun create() = STACK{st=A.array(8,~1), fp=A.array(7,16), sp=ref ~1} + + val stack0 = create() + + (* Copy a stack *) + fun copy(STACK{st, fp, sp}) = + let val st' = A.array(8, ~1) + val fp' = A.array(7, 16) + in A.copy{src=st,dst=st',si=0,di=0,len=NONE}; + A.copy{src=fp,dst=fp',si=0,di=0,len=NONE}; + STACK{st=st', fp=fp', sp=ref(!sp)} + end + + (* Depth of stack *) + fun depth(STACK{sp, ...}) = !sp + 1 + + fun nonFull(STACK{sp, ...}) = + if !sp >= 7 then error "stack overflow" else () + + (* Given %st(n), lookup the corresponding %fp(n) *) + fun st(STACK{st, sp, ...}, n) = A.sub(st, !sp - n) + + (* Given %fp(n), lookup the corresponding %st(n) *) + fun fp(STACK{fp, sp, ...}, n) = !sp - A.sub(fp, n) + + fun stackToString stack = + let val depth = depth stack + fun f i = if i >= depth then " ]" + else "%st("^i2s i^")=%f"^i2s(st(stack,i))^" "^f(i+1) + in "[ "^f 0 end + + fun clear(STACK{st, fp, sp, ...}) = + (sp := ~1; A.modify(fn _ => ~1) st; A.modify(fn _ => 16) fp) + + (* Set %st(n) := %f *) + fun set(STACK{st, fp, sp, ...}, n, f) = + (A.update(st, !sp - n, f); + if f >= 0 then A.update(fp, f, !sp - n) else () + ) + + (* Pop one entry *) + fun pop(STACK{sp, st, fp, ...}) = sp := !sp - 1 + + (* Push %fp(f) onto %st(0) *) + fun push(stack as STACK{sp, ...}, f) = (sp := !sp + 1; set(stack, 0, f)) + + (* Exchange the contents of %st(m) and %st(n) *) + fun xch(stack, m, n) = + let val f_m = st(stack, m) + val f_n = st(stack, n) + in set(stack, m, f_n); + set(stack, n, f_m) + end + + fun kill(STACK{fp, ...}, f) = A.update(fp, CB.registerNum f, 16) + + fun equal(st1, st2) = + let val m = depth st1 + val n = depth st2 + fun loop i = + i >= m orelse (st(st1, i) = st(st2, i) andalso loop(i+1)) + in m = n andalso loop(0) + end + + end (* struct *) + + (*----------------------------------------------------------------------- + * Module to handle forward propagation. + * Forward propagation does the following: + * Given an instruction + * fmove mem, %fp(n) + * We delay the generation of the load until the first use of %fp(n), + * which we can further optimize by folding the load into the operand + * of the instruction, if it is the last use of this operand. + * If %fp(n) is dead then no load is necessary. + * Of course, we have to be careful whenever we encounter other + * instruction with a write. + *-----------------------------------------------------------------------*) + (* + structure ForwardPropagation :> + sig + type readbuffer + val create : ST.stack -> readbuffer + val load : readbuffer * C.cell * I.fsize * I.ea -> unit + val getreg : readbuffer * bool * C.cell * I.instruction list -> + I.operand * I.instruction list + val flush : readbuffer * I.instruction list -> I.instruction list + end = + struct + + datatype readbuffer = + READ of { stack : ST.stack, + loads : (I.fsize * I.ea) option A.array, + pending : int ref + } + + fun create stack = + READ{stack =stack, + loads =A.array(8, NONE), + pending =ref 0 + } + + fun load(READ{pending, loads, ...}, fd, fsize, mem) = + (A.update(loads, fd, SOME(fsize, mem)); + pending := !pending + 1 + ) + + (* Extract the operand for a register + * If it has a delayed load associated with it then + * we perform the load at this time. + *) + fun getreg(READ{pending, loads, stack, ...}, isLastUse, fs, code) = + case A.sub(loads, fs) of + NONE => + let val n = ST.st(stack, fs) + in if isLastUse + then (ST n, code) + else let val code = I.FLDL(ST n)::code + in ST.push(stack, fs); (ST0, code) + end + end + | SOME(fsize, mem) => + let val code = FLD(fsize, mem)::code + in A.update(loads, fs, NONE); (* delete load *) + pending := !pending - 1; + ST.push(stack, fs); (* fs is now in place *) + (ST0, code) + end + + (* Extract a binary operand. + * We'll try to fold this into the operand + *) + fun getopnd(READ{pending, loads, stack,...}, isLastUse, I.FPR fs, code) = + (case A.sub(loads, fs) of + NONE => + let val n = ST.st(stack, fs) + in if isLastUse fs (* regmap XXX *) + then (ST n, code) + else let val code = I.FLDL(ST n)::code + in ST.push(stack, fs); (ST0, code) + end + end + | SOME(fsize, mem) => + (A.update(loads, fs, NONE); (* delete load *) + pending := !pending - 1; + if isLastUse fs then (mem, code) + else let val code = FLD(fsize, mem)::code + in ST.push(stack, fs); + (ST0, code) + end + ) + ) + | getopnd(_, _, ea, code) = (ea, code) + + fun flush(READ{pending=ref 0,...}, code) = code + + end (* struct *) + *) + + (*----------------------------------------------------------------------- + * Module to handle delayed stores. + * Delayed store does the following: + * Given an instruction + * fstore %fp(n), %mem + * We delay the generation of the store until necessary. + * This gives us an opportunity to rearrange the order of the stores + * to eliminate unnecessary fxch. + *-----------------------------------------------------------------------*) + (* + structure DelayStore :> + sig + type writebuffer + val create : ST.stack -> writebuffer + val flush : writebuffer * I.instruction list -> I.instruction list + end = + struct + datatype writebuffer = + WRITE of { front : (I.ea * C.cell) list ref, + back : (I.ea * C.cell) list ref, + stack : ST.stack, + pending : int ref + } + fun create stack = WRITE{front=ref [], back=ref [], + stack=stack, pending=ref 0} + fun flush(WRITE{pending=ref 0,...}, code) = code + end (* struct *) + *) + + (*----------------------------------------------------------------------- + * Main routine. + * + * Algorithm: + * 1. Perform liveness analysis. + * 2. For each fp register, mark all its last use point(s). + * Registers are popped at their last uses. + * 3. Rewrite the instructions basic block by basic block. + * 4. Insert shuffle code at basic block boundaries. + * When necessary, split critical edges. + * 5. Sacrifice a goat to make sure things don't go wrong. + *-----------------------------------------------------------------------*) + fun run(Cfg as G.GRAPH cfg) = + let + val numberOfBlks = #capacity cfg () + val ENTRY = List.hd (#entries cfg ()) + val EXIT = List.hd (#exits cfg ()) + + val getCell = C.getCellsByKind CB.FP + (*extract the fp component of cellset*) + + val stTable = A.tabulate(8, fn n => I.ST(C.ST n)) + + fun ST n = (if sanityCheck andalso (n < 0 orelse n >= 8) then + pr("WARNING BAD %st("^i2s n^")\n") + else (); + A.sub(stTable, n) + ) + + fun FXCH n = I.fxch{opnd=C.ST n} + + val ST0 = ST 0 + val ST1 = ST 1 + val POP_ST = I.fstpl ST0 (* Instruction to pop an entry *) + + (* Dump instructions *) + fun dump instrs = + let val Asm.S.STREAM{emit, ...} = + AsmStream.withStream (!MLRiscControl.debug_stream) + Asm.makeStream [] + in app emit (rev instrs) + end + + (* Create assembly of instruction *) + fun assemble instr = + let val buf = StringOutStream.mkStreamBuf() + val stream = StringOutStream.openStringOut buf + val Asm.S.STREAM{emit, ...} = + AsmStream.withStream stream Asm.makeStream [] + val _ = emit instr + val s = StringOutStream.getString buf + val n = String.size s + in if n = 0 then s else String.substring(s, 0, n - 1) + end + + (*------------------------------------------------------------------ + * Perform liveness analysis on the floating point variables + * P.S. I'm glad I didn't throw away the code liveness code. + *------------------------------------------------------------------*) + val defUse = P.defUse CB.FP (* def/use properties *) + val {liveIn=liveInTable, liveOut=liveOutTable} = Liveness.liveness { + defUse=defUse, + (* updateCell=C.updateCellsByKind CB.FP, *) + getCell=getCell + } Cfg + (*------------------------------------------------------------------ + * Scan the instructions compute the last uses and dead definitions + * at each program point. Ideally we can do this during the code + * rewriting phase. But that's probably too error prone for now. + *------------------------------------------------------------------*) + fun computeLastUse(blknum, insns, liveOut) = + let fun scan([], _, lastUse) = lastUse + | scan(i::instrs, live, lastUse) = + let val (d, u) = defUse i + val d = SL.uniq(d)(* definitions *) + val u = SL.uniq(u)(* uses *) + val dead = SL.return(SL.difference(d, live)) + val live = SL.difference(live, d) + val last = SL.return(SL.difference(u, live)) + val live = SL.union(live, u) + val _ = + if debug andalso debugLiveness then + (case last of + [] => () + | _ => print(assemble i^"\tlast use="^ + fregsToString last^"\n") + ) + else () + in scan(instrs, live, (last,dead)::lastUse) + end + val liveOutSet = SL.uniq liveOut + val _ = + if debug andalso debugLiveness then + print("LiveOut("^i2s blknum^") = "^ + fregsToString(SL.return liveOutSet)^"\n") + else () + in scan(!insns, liveOutSet, []) + end + + (*------------------------------------------------------------------ + * Temporary work space + *------------------------------------------------------------------*) + val {high, low} = C.cellRange CB.FP + val n = high+1 + val lastUseTbl = A.array(n,~1) (* table for marking last uses *) + val useTbl = A.array(n,~1) (* table for marking uses *) + + (* %fp register bindings before and after a basic block *) + val bindingsIn = A.array(numberOfBlks, NONE) + val bindingsOut = A.array(numberOfBlks, NONE) + val stampCounter = ref ~4096 + + (* Edges that need splitting *) + exception NoEdgesToSplit + val edgesToSplit = IntHashTable.mkTable(32, NoEdgesToSplit) + val addEdgesToSplit = IntHashTable.insert edgesToSplit + fun lookupEdgesToSplit b = + getOpt(IntHashTable.find edgesToSplit b, []) + + (*------------------------------------------------------------------ + * Code for handling bindings between basic block + *------------------------------------------------------------------*) + + fun splitEdge(title, source, target, e) = + (if debug andalso !traceOn then + pr(title^" SPLITTING "^i2s source^"->"^ i2s target^"\n") + else (); + addEdgesToSplit(target,(source,target,e)::lookupEdgesToSplit target) + ) + + fun computeFreq(_,_,CFG.EDGE{w,...}) = !w + + (* Given a cellset, return a sorted and unique + * list of elements with all non-physical registers removed + *) + fun removeNonPhysical celllist = + let fun loop([], S) = SL.return(SL.uniq S) + | loop(f::fs, S) = + let val fx = CB.registerNum f + in loop(fs,if fx <= 7 then f::S else S) + end + in loop(celllist, []) + end + + (* Given a sorted and unique list of registers, + * Return a stack with these elements + *) + fun newStack fregs = + let val stack = ST.create() + in app (fn f => ST.push(stack, CB.registerNum f)) (rev fregs); + stack + end + + (* + * This function looks at all the entries on the stack, + * and generate code to deallocate all the dead values. + * The stack is updated. + *) + fun removeDeadValues(stack, liveSet, code) = + let val stamp = !stampCounter + val _ = stampCounter := !stampCounter - 1 + fun markLive [] = () + | markLive(r::rs) = + (A.update(useTbl, CB.registerNum r, stamp); markLive rs) + fun isLive f = A.sub(useTbl, f) = stamp + fun loop(i, depth, code) = + if i >= depth then code else + let val f = ST.st(stack, i) + in if isLive f (* live? *) + then loop(i+1, depth, code) + else + (if debug andalso !traceOn then + pr("REMOVING %f"^i2s f^" in %st("^i2s i^")"^ + " current stack="^ST.stackToString stack^"\n") + else (); + if i = 0 then + (ST.pop stack; + loop(0, depth-1, POP_ST::code) + ) + else (ST.xch(stack,0,i); + ST.pop stack; + loop(0, depth-1, I.fstpl(ST i)::code) + ) + ) + end + in markLive liveSet; + loop(0, ST.depth stack, code) + end + + + (*------------------------------------------------------------------ + * Given two stacks, source and target, where the bindings are + * permutation of each other, generate the minimal number of + * fxchs to match source with target. + * + * Important: source and target MUST be permutations of each other. + * + * Essentially, we first decompose the permutation into cycles, + * and process each cycle. + *------------------------------------------------------------------*) + fun shuffle(source, target, code) = + let val stamp = !stampCounter + val _ = stampCounter := !stampCounter - 1 + val permutation = lastUseTbl (* reuse the space *) + + val _ = if debug andalso !traceOn then + pr("SHUFFLE "^ST.stackToString source^ + "->"^ST.stackToString target^"\n") + else () + + (* Compute the initial permutation *) + val n = ST.depth source + fun computeInitialPermutation(i) = + if i >= n + then () + else let val f = ST.st(source, i) + val j = ST.fp(target, f) + in A.update(permutation, j, i); + computeInitialPermutation(i+1) + end + val _ = computeInitialPermutation 0 + + (* Decompose the initial permutation into cycles. + * The cycle involving 0 is treated specially. + *) + val visited = useTbl + fun isVisited i = A.sub(visited,i) = stamp + fun markAsVisited i = A.update(visited,i,stamp) + fun decomposeCycles(i, cycle0, cycles) = + if i >= n then (cycle0, cycles) + else if isVisited i orelse + A.sub(permutation, i) = i (* trivial cycle *) + then decomposeCycles(i+1, cycle0, cycles) + else let fun makeCycle(j, cycle, zero) = + let val k = A.sub(permutation, j) + val cycle = j::cycle + val zero = zero orelse j = 0 + in markAsVisited j; + if k = i then (cycle, zero) + else makeCycle(k, cycle, zero) + end + val (cycle, zero) = makeCycle(i, [], false) + in if zero then decomposeCycles(i+1, [cycle], cycles) + else decomposeCycles(i+1, cycle0, cycle::cycles) + end + + val (cycle0, cycles) = decomposeCycles(0, [], []) + + (* + * Generate shuffle for a cycle that does not involve 0. + * Given a cycle (c_1, ..., c_k), we generate this code: + * fxch %st(c_1), + * fxch %st(c_2), + * ... + * fxch %st(c_k), + * fxch %st(c_1) + *) + fun genxch([], code) = code + | genxch(c::cs, code) = genxch(cs, FXCH c::code) + + fun gen([], code) = error "shuffle.gen" + | gen(cs as (c::_), code) = FXCH c::genxch(cs, code) + + (* + * Generate shuffle for a cycle that involves 0. + * Given a cycle (c_1,...,c_k) we first shuffle this to + * an equivalent cycle (c_1, ..., c_k) where c'_k = 0, + * then we generate this code: + * fxch %st(c'_1), + * fxch %st(c'_2), + * ... + * fxch %st(c'_{k-1}), + *) + fun gen0([], code) = error "shuffle.gen0" + | gen0(cs, code) = + let fun rearrange(0::cs, cs') = cs@rev cs' + | rearrange(c::cs, cs') = rearrange(cs, c::cs') + | rearrange([], _) = error "shuffle.rearrange" + val cs = rearrange(cs, []) + in genxch(cs, code) + end + + (* + * Generate code. Must process the non-zero cycles first. + *) + val code = List.foldr gen code cycles + val code = List.foldr gen0 code cycle0 + in code + end (* shuffle *) + + (*------------------------------------------------------------------ + * Insert code at the end of a basic block. + * Make sure we put code in front of a transfer instruction + *------------------------------------------------------------------*) + fun insertAtEnd(insns, code) = + (case insns of + [] => code + | jmp::rest => + if P.instrKind jmp = P.IK_JUMP then + jmp::code@rest + else + code@insns + ) + + (*------------------------------------------------------------------ + * Magic for inserting shuffle code at the end of a basic block + *------------------------------------------------------------------*) + fun shuffleOut(stackOut, insns, b, block, liveOut) = + let + val liveOut = removeNonPhysical(liveOut) + + (* Generate code that remove unnecessary values *) + val code = removeDeadValues(stackOut, liveOut, []) + + fun done(stackOut, insns, code) = + (A.update(bindingsOut,b,SOME stackOut); + insertAtEnd(insns, code) + ) + + (* Generate code that shuffle values from source to target *) + fun match(source, target) = + done(target, insns, shuffle(source, target, [])) + + (* Generate code that shuffle values from source to liveOut *) + fun matchLiveOut() = + case liveOut of + [] => done(stackOut, insns, code) + | _ => match(stackOut, newStack liveOut) + + (* With multiple successors, find out which one we + * should connect to. Choose the one from the block that + * follows from this one, if that exists, or else choose + * from the edge with the highest frequency. + *) + fun find([], _, id, best) = (id, best) + | find((_, target, _)::edges, highestFreq, id, best) = + let val CFG.BLOCK{freq, ...} = #node_info cfg target + in if target = b+1 then (target, A.sub(bindingsIn, target)) + else (case A.sub(bindingsIn, target) of + NONE => find(edges, highestFreq, id, best) + | this as SOME stack => + if highestFreq < !freq then + find(edges, !freq, target, this) + else + find(edges, highestFreq, id, best) + ) + end + + (* + * Split all edges source->target except omitThis. + *) + fun splitAllEdgesExcept([], omitThis) = () + | splitAllEdgesExcept((source,target,e)::edges, omitThis) = + if target = EXIT then error "can't split exit edge!" + else + (if target <> omitThis andalso + target <= b andalso (* XXX *) + target <> ENTRY + then splitEdge("ShuffleOut",source,target,e) else (); + splitAllEdgesExcept(edges, omitThis) + ) + + (* Just one successor; + * try to match the bindings of the successor if it exist. + *) + fun matchIt succ = + let val (succBlock, target) = find(succ, ~1.0, ~1, NONE) + in splitAllEdgesExcept(succ, succBlock); + case target of + SOME stackIn => match(stackOut, stackIn) + | NONE => done(stackOut,insns,code) + end + + in case #out_edges cfg b of + [] => matchLiveOut() + | succ as [(_,target,_)] => + if target = EXIT then matchLiveOut() + else matchIt succ + | succ => matchIt succ + end (* shuffleOut *) + + (*------------------------------------------------------------------ + * Compute the initial fp stack bindings for basic block b. + *------------------------------------------------------------------*) + fun shuffleIn(b, block, liveIn) = + let + val liveInSet = removeNonPhysical liveIn + + (* With multiple predecessors, find out which one we + * should connect to. Choose the one from the block that + * falls into this one, if that exists, or else choose + * from the edge with the highest frequency. + *) + fun find([], _, best) = best + | find((source, _, _)::edges, highestFreq, best) = + let val CFG.BLOCK{freq, ...} = #node_info cfg source + in case A.sub(bindingsOut, source) of + NONE => find(edges, highestFreq, best) + | this as SOME stack => + if source = b-1 + then this (* falls into b *) + else if highestFreq < !freq then find(edges, !freq, this) + else find(edges, highestFreq, best) + end + + fun splitAllDoneEdges [] = () + | splitAllDoneEdges ((source, target, e)::edges) = + (if source < b andalso + source <> ENTRY andalso + source <> EXIT + then splitEdge("ShuffleIn", source, target, e) else (); + splitAllDoneEdges edges + ) + + (* The initial stack bindings are determined by the live set. + * No compensation code is needed. + *) + fun fromLiveIn() = + let val stackIn = + case liveInSet of + [] => ST.stack0 + | _ => + (pr("liveIn="^celllistToString liveIn^"\n"); + newStack liveInSet + ) + val stackOut = ST.copy stackIn + in (stackIn, stackOut, []) + end + + val pred = #in_edges cfg b + + val (stackIn, stackOut, code) = + case find(pred, ~1.0, NONE) of + NONE => (splitAllDoneEdges(pred); fromLiveIn()) + | SOME stackIn' => + (case pred of + [_] => (* one predecessor *) + (* Use the bindings as from the previous block + * We first have to deallocate all unused values. + *) + let val stackOut = ST.copy stackIn' + (* Clean the stack of unused entries *) + val code = removeDeadValues(stackOut, liveInSet, []) + in (stackIn', stackOut, code) end + | pred => (* more than one predecessors *) + let val stackIn = ST.copy stackIn' + val code = removeDeadValues(stackIn, liveInSet, []) + val stackOut = ST.copy stackIn + in (* If we have to generate code to deallocate + * the stack then we have split the edge. + *) + case code of + [] => () + | _ => splitAllDoneEdges(pred); + (stackIn, stackOut, []) + end + ) + in A.update(bindingsIn, b, SOME stackIn); + A.update(bindingsOut, b, SOME stackOut); + (stackIn, stackOut, code) + end + + (*------------------------------------------------------------------ + * Code for patching up critical edges. + * The trick is finding a good place to insert the critical edges. + * Let's call an edge x->y that requires compensation + * code c to be inserted an candidate edge. We write this as x->y(c) + * + * Here are the heuristics that we use to improve the final code: + * + * 1. Given two candidate edges a->x(c1) and b->x(c2) where c1=c2 + * then we can merge the two copies of compensation code. + * This is quite common. This generalizes to any number of edges. + * + * 2. Given two candidate edges a->x(c1) and b->x(c2) and where + * c1 and c2 are pops, we can partially share c1 and c2. + * Currently, I think I only recognize this case when + * x has no fp registers live-in. + * + * 3. Given two candidate edges a->x(c1) and b->x(c2), + * if a->x has a higher frequency then put the compensation + * code in front of x (so that it falls through into x) + * whenever possible. + * + * As you can see, the voodoo is strong here. + * + * The routine has two main phases: + * 1. Determine the compensation code by applying the heuristics + * above. + * 2. Then insert them and rebuild the cfg by renaming all block + * ids. This is currently necessary to keep the layout order + * consistent with the order of the id. + *------------------------------------------------------------------*) + fun repairCriticalEdges(Cfg as G.GRAPH cfg) = + let + val cleanup = [#create MLRiscAnnotations.COMMENT "cleanup edge"] + val critical = [#create MLRiscAnnotations.COMMENT "critical edge"] + + fun annotate(gen, an) = + app (fn ((_,CFG.BLOCK{annotations, ...}),_) => annotations := an) + gen + + (* + * Special case: target block has stack depth of 0. + * Just generate code that pop entries from the sources. + * To make things interesting, we try to share code among + * all the critical edges. + *) + fun genPoppingCode(_, []) = () + | genPoppingCode(targetId, edges) = + let (* Edges annotated with the source stack depth + * Ordered by increasing stack height + *) + val edges = + IM.listItemsi + (foldr (fn (edge as (sourceId, _, _), M) => + let val n = ST.depth(valOf(A.sub(bindingsOut,sourceId))) + in IM.insert(M, n, edge :: getOpt(IM.find(M, n), [])) + end) IM.empty edges) + + (* Generate n pops *) + fun pops(0, code) = code + | pops(n, code) = pops(n-1, POP_ST::code) + + (* Create the chain of blocks *) + fun makeChain(depth, [], chain) = chain + | makeChain(depth, (d, es)::es', chain) = + let val code = pops(d - depth, []) + in makeChain(d, es', (es, code)::chain) + end + + val chain = makeChain(0, edges, []) + + in annotate(CFG.splitEdges Cfg {groups=chain, jump=false}, cleanup) + end + + (* + * Generate repair code. + *) + fun genRepairCode(targetId, stackIn, edges) = + let val liveIn = IntHashTable.lookup liveInTable targetId + val liveInSet = removeNonPhysical liveIn + val _ = if debug then + pr("LiveIn = "^celllistToString liveIn^"\n") + else () + + (* Group all edges whose output stack configurations + * are the same. Each group is merged together into + * a single compensation block + *) + fun partition([], S) = S + | partition((e as (src,_,_))::es, S) = + let val stackOut = ST.copy(valOf(A.sub(bindingsOut,src))) + fun find([], S) = partition(es, ([e],stackOut)::S) + | find((x as (es',st'))::S', S) = + if ST.equal(stackOut,st') then + partition(es, (e::es',st')::S' @ S) + else + find(S', x::S) + in find(S, []) + end + + (* Partition by the source bindings *) + val S = partition(edges, []) + + (* Compute frequencies *) + val S = map (fn (es,st) => (CFG.sumEdgeFreqs es,es,st)) S + + (* Ordered by non-increasing frequencies *) + val S = ListMergeSort.sort (fn ((x,_,_),(y,_,_)) => x < y) S + + (* Generate code *) + fun gen(freq, edges, stackOut) = + let (* deallocate unused values *) + val code = removeDeadValues(stackOut,liveInSet,[]) + (* shuffle values *) + val code = shuffle(stackOut, stackIn, code) + in annotate( + CFG.splitEdges Cfg {groups=[(edges,code)], jump=false}, + critical) + end + + in app gen S + end + + (* Split all edges entering targetId *) + fun split(targetId, edges) = + let val stackIn = valOf(A.sub(bindingsIn,targetId)) + fun log(s, t, e) = + case A.sub (bindingsOut, s) of + SOME stackOut => + (pr("SPLIT "^i2s s^"->"^i2s t^" "^ + ST.stackToString stackOut^"->"^ + ST.stackToString stackIn^"\n")) + | NONE => error "split:stackOut" + val _ = if debug andalso !traceOn then app log edges else () + in if ST.depth stackIn = 0 then genPoppingCode(targetId, edges) + else genRepairCode(targetId, stackIn, edges) + end + + in IntHashTable.appi split edgesToSplit; + CFG.changed Cfg; + Cfg + end + + (*------------------------------------------------------------------ + * Process all blocks which are not the entry or the exit + *------------------------------------------------------------------*) + val stamp = ref 0 + fun rewriteAllBlocks (_, CFG.BLOCK{kind=CFG.START, ...}) = () + | rewriteAllBlocks (_, CFG.BLOCK{kind=CFG.STOP, ...}) = () + | rewriteAllBlocks + (blknum, block as CFG.BLOCK{insns, labels, annotations, ...}) = + let val _ = + if debug andalso !debugOn then + app (fn l => pr(L.toString l^":\n")) (!labels) + else (); + val liveIn = HT.lookup liveInTable blknum + val liveOut = HT.lookup liveOutTable blknum + val st = rewrite(!stamp, blknum, block, + insns, liveIn, liveOut, + annotations) + in stamp := st (* update stamp *) + end + + (*------------------------------------------------------------------ + * Translate code within a basic block. + * Each instruction is given a unique stamp for identifying last + * uses. + *------------------------------------------------------------------*) + and rewrite(stamp, blknum, block, insns, liveIn, liveOut, + annotations) = + let val (stackIn, stack, code) = shuffleIn(blknum, block, liveIn) + + (* Dump instructions when encountering a bug *) + fun bug msg = + (pr("-------- bug in block "^i2s blknum^" ----\n"); + dump(!insns); + error msg + ) + + fun loop(stamp, [], [], code) = (stamp, code) + | loop(stamp, instr::rest, (lastUse,dead)::lastUses, code) = + let fun mark(tbl, []) = () + | mark(tbl, r::rs) = + (A.update(tbl, CB.registerNum r, stamp); mark(tbl, rs)) + in mark(lastUseTbl,lastUse); (* mark all last uses *) + trans(stamp, instr, [], rest, dead, lastUses, code) + end + | loop _ = error "loop" + + (* + * Main routine that does the actual translation. + * A few reminders: + * o The instructions are processed in normal order + * and generated in the reversed order. + * o (Local) liveness is computed at the same time. + * o For each use, we have to find out whether it is + * the last use. If so, we can kill it and reclaim + * the stack entry at the same time. + *) + and trans(stamp, instr, an, rest, dead, lastUses, code) = + let (* Call this continuation when done with code generation *) + fun FINISH code = loop(stamp+1, rest, lastUses, code) + + fun KILL_THE_DEAD(dead, code) = + let fun kill([], code) = FINISH code + | kill(f::fs, code) = + let val fx = CB.registerNum f + in if debug andalso debugDead then + pr("DEAD "^fregToString f^" in "^ + ST.stackToString stack^"\n") + else (); + (* not a physical register *) + if fx >= 8 then kill(fs, code) + else + let val i = ST.fp(stack, fx) + in if debug andalso debugDead then + pr("KILLING "^fregToString f^ + "=%st("^i2s i^")\n") + else (); + if i < 0 then kill(fs, code) (* dead already *) + else if i = 0 then + (ST.pop stack; kill(fs, POP_ST::code)) + else + (ST.xch(stack,0,i); ST.pop stack; + kill(fs, I.fstpl(ST i)::code) + ) + end + end + in kill(dead, code) + end + + (* Call this continuation when done with floating point + * code generation. Remove all dead code first. + *) + fun DONE code = KILL_THE_DEAD(dead, code) + + (* Is this the last use of register f? *) + fun isLastUse f = A.sub(lastUseTbl, f) = stamp + + (* Is this value dead? *) + fun isDead f = + let fun loop [] = false + | loop(r::rs) = CB.sameColor(f,r) orelse loop rs + in loop dead end + + (* Dump the stack before each intruction for debugging *) + fun log() = if debug andalso !traceOn then + pr(ST.stackToString stack^assemble instr^"...\n") + else () + + (* Find the location of a source register *) + fun getfs(f) = + let val fx = CB.registerNum f + val s = ST.fp(stack, fx) + in (isLastUse fx,s) end + + (* Generate memory to memory move *) + fun mmmove(fsize,src,dst) = + let val _ = ST.nonFull stack + val code = FLD(fsize,src)::code + val code = mark(FSTP(fsize,dst),an)::code + in DONE code end + + (* Allocate a new register in %st(0) *) + fun alloc(f,code) = (ST.push(stack,CB.registerNum f); code) + + (* register -> register move *) + fun rrmove(fs,fd) = + if CB.sameColor(fs,fd) then DONE code + else + let val (dead,ss) = getfs fs + in if dead then (* fs is dead *) + (ST.set(stack,ss,CB.registerNum fd); (* rename fd to fs *) + DONE code (* no code is generated *) + ) + else (* fs is not dead; push it onto %st(0); + * set fd to %st(0) + *) + let val code = alloc(fd, code) + in DONE(mark(I.fldl(ST ss),an)::code) + end + end + + (* memory -> register move. + * Do dead code elimination here. + *) + fun mrmove(fsize,src,fd) = + if isDead fd + then FINISH code (* value has been killed *) + else + let val code = alloc(fd, code) + in DONE(mark(FLD(fsize,src),an)::code) + end + + (* exchange %st(n) and %st(0) *) + fun xch(n) = (ST.xch(stack,0,n); FXCH n) + + (* push %st(n) onto the stack *) + fun push(n) = (ST.push(stack,~2); I.fldl(ST n)) + + + (* push mem onto the stack *) + fun pushmem(src) = (ST.push(stack,~2); I.fldl(src)) + + (* register -> memory move. + * Use pop version of the opcode if it is the last use. + *) + fun rmmove(fsize,fs,dst) = + let fun fstp(code) = + (ST.pop stack; DONE(mark(FSTP(fsize,dst),an)::code)) + fun fst(code) = DONE(mark(FST(fsize,dst),an)::code) + in case getfs fs of + (true, 0) => fstp code + | (true, n) => fstp(xch n::code) + | (false, 0) => fst(code) + | (false, n) => fst(xch n::code) + end + + (* Floating point move *) + fun fmove{fsize,src=I.FPR fs,dst=I.FPR fd} = rrmove(fs,fd) + | fmove{fsize,src,dst=I.FPR fd} = mrmove(fsize,src,fd) + | fmove{fsize,src=I.FPR fs,dst} = rmmove(fsize,fs,dst) + | fmove{fsize,src,dst} = mmmove(fsize,src,dst) + + (* Floating point integer load operator *) + fun fiload{isize,ea,dst=I.FPR fd} = + let val code = alloc(fd, code) + val code = mark(FILD(isize,ea),an)::code + in DONE code + end + | fiload{isize,ea,dst} = + let val code = mark(FILD(isize,ea),an)::code + val code = I.fstpl(dst)::code (* XXX *) + in DONE code + end + + (* Make a copy of register fs to %st(0). *) + fun moveregtotop(fs, code) = + (case getfs fs of + (true, 0) => code + | (true, n) => xch n::code + | (false, n) => push n::code + ) + + fun movememtotop(fsize, mem, code) = + (ST.push(stack, ~2); FLD(fsize, mem)::code) + + (* Move an operand to top of stack *) + fun movetotop(fsize, I.FPR fs, code) = moveregtotop(fs, code) + | movetotop(fsize, mem, code) = movememtotop(fsize, mem, code) + + fun storeResult(fsize, dst, n, code) = + case dst of + I.FPR fd => (ST.set(stack, n, CB.registerNum fd); DONE code) + | mem => + let val code = if n = 0 then code else xch n::code + in ST.pop stack; DONE(FSTP(fsize, mem)::code) end + + (* Floating point unary operator *) + fun funop{fsize,unOp,src,dst} = + let val code = movetotop(fsize, src, code) + val code = mark(I.funary unOp,an)::code + + (* Moronic hack to deal with partial tangent! *) + val code = + case unOp of + I.FPTAN => + (if ST.depth stack >= 7 then error "FPTAN" + else (); + POP_ST::code (* pop the useless 1.0 *) + ) + | _ => code + in storeResult(fsize, dst, 0, code) + end + + (* Floating point binary operator. + * Note: + * binop src, dst + * means dst := dst binop src + * (lsrc := lsrc binop rsrc) + * on the x86 + *) + fun fbinop{fsize,binOp,lsrc,rsrc,dst} = + let (* generate code and set %st(n) = fd *) + (* op2 := op1 - op2 *) + fun oper(binOp,op1,op2,n,code) = + let val code = + mark(I.fbinary{binOp=binOp,src=op1,dst=op2},an) + ::code + in storeResult(I.FP64, dst, n, code) + end + + fun operR(binOp,op1,op2,n,code) = + oper(invert binOp,op1,op2,n,code) + + fun operP(binOp,op1,op2,n,code) = + (ST.pop stack; oper(pop binOp,op1,op2,n-1,code)) + + fun operRP(binOp,op1,op2,n,code) = + (ST.pop stack; operR(pop binOp,op1,op2,n-1,code)) + + (* Many special cases to consider. + * Basically, try to reuse stack space as + * much as possible by taking advantage of last uses. + * + * Stack=[st(0)=3.0 st(1)=2.0] + * fsub %st(1), %st [1,2.0] + * fsubr %st(1), %st [-1,2.0] + * fsub %st, %st(1) [3.0,1.0] + * fsubr %st, %st(1) [3.0,-1.0] + * + * fsubp %st, %st(1) [1] + * fsubrp %st, %st(1) [-1] + * So, + * fsub %st(n), %st (means %st - %st(n) -> %st) + * fsub %st, %st(n) (means %st - %st(n) -> %st(n)) + * fsubr %st(n), %st (means %st(n) - %st -> %st) + * fsubr %st, %st(n) (means %st(n) - %st -> %st(n)) + *) + fun reg2(fx, fy) = + let val (dx, sx) = getfs fx + val (dy, sy) = getfs fy + fun loop(dx, sx, dy, sy, code) = + (* op1, op2 (dst) *) + case (dx, sx, dy, sy) of + (true, 0, false, n) => oper(binOp,ST n,ST0,0,code) + | (false, n, true, 0) => operR(binOp,ST n,ST0,0,code) + | (true, n, true, 0) => operRP(binOp,ST0,ST n,n,code) + | (true, 0, true, n) => operP(binOp,ST0,ST n,n,code) + | (false, 0, true, n) => oper(binOp,ST0,ST n,n,code) + | (true, n, false, 0) => operR(binOp,ST0,ST n,n,code) + | (true, sx, dy, sy) => + loop(true, 0, dy, sy, xch sx::code) + | (dx, sx, true, sy) => + loop(dx, sx, true, 0, xch sy::code) + | (false, sx, false, sy) => + loop(true, 0, false, sy+1, push sx::code) + in if sx = sy then (* same register *) + let val code = + case (dx, sx) of + (true, 0) => code + | (true, n) => xch n::code + | (false, n) => push n::code + in oper(binOp,ST0,ST0,0,code) + end + else loop(dx, sx, dy, sy, code) + end + + (* reg/mem operands *) + fun regmem(binOp, fx, mem) = + case getfs fx of + (true, 0) => oper(binOp,mem,ST0,0,code) + | (true, n) => oper(binOp,mem,ST0,0,xch n::code) + | (false, n) => oper(binOp,mem,ST0,0,push n::code) + + (* Two memory operands. Optimize the case when + * the two operands are identical. + *) + fun mem2(lsrc, rsrc) = + let val _ = ST.push(stack,~2) + val code = FLD(fsize,lsrc)::code + val rsrc = if P.eqOpn(lsrc, rsrc) then ST0 else rsrc + in oper(binOp,rsrc,ST0,0,code) + end + + fun process(I.FPR fx, I.FPR fy) = reg2(fx, fy) + | process(I.FPR fx, mem) = regmem(binOp, fx, mem) + | process(mem, I.FPR fy) = regmem(invert binOp, fy, mem) + | process(lsrc, rsrc) = mem2(lsrc, rsrc) + + in process(lsrc, rsrc) + end + + (* Floating point binary operator with integer conversion *) + fun fibinop{isize,binOp,lsrc,rsrc,dst} = + let fun oper(binOp,src,code) = + let val code = mark(I.fibinary{binOp=binOp,src=src},an) + ::code + in storeResult(I.FP64, dst, 0, code) + end + + fun regmem(binOp, fx, mem) = + case getfs fx of + (true, 0) => oper(binOp, mem, code) + | (true, n) => oper(binOp, mem, xch n::code) + | (false, n) => oper(binOp, mem, push n::code) + + in case (lsrc, rsrc) of + (I.FPR fx, mem) => regmem(binOp, fx, mem) + | (lsrc, rsrc) => oper(binOp, rsrc, pushmem lsrc::code) + end + + (* Floating point comparison + * We have to make sure there are enough registers. + * The trick is that tmp is always a physical register. + * So we can always use it as temporary space if we + * have run out. + *) + fun fcmp{i,fsize,lsrc,rsrc} = + let fun fucompp code = + (ST.pop stack; ST.pop stack; + if i then + POP_ST :: mark(I.fucomip(ST 1), an) :: code + else + mark(I.fucompp,an) :: code + ) + fun fucomp(n) = + (ST.pop stack; + mark((if i then I.fucomip else I.fucomp)(ST n),an)) + fun fucom(n) = + mark((if i then I.fucomi else I.fucom)(ST n),an) + + fun genmemcmp() = + let val code = movememtotop(fsize, rsrc, code) + val code = movememtotop(fsize, lsrc, code) + in FINISH(fucompp(code)) + end + + fun genmemregcmp(lsrc, fy) = + case getfs fy of + (false, n) => + let val code = movememtotop(fsize, lsrc, code) + in FINISH(fucomp(n+1)::code) end + | (true, n) => + let val code = if n = 0 then code else xch n::code + val code = movememtotop(fsize, lsrc, code) + in FINISH(fucompp(code)) + end + + fun genregmemcmp(fx, rsrc) = + let val code = + case getfs fx of + (true, n) => + let val code = if n = 0 then code + else xch n::code + val code = movememtotop(fsize, rsrc, code) + in xch 1::code end + | (false, n) => + let val code = movememtotop(fsize, rsrc, code) + in push(n+1)::code + end + in FINISH(fucompp(code)) + end + + (* Deal with the special case when both sources are + * in the same register + *) + fun regsame(dx, sx) = + let val (code, cmp) = + case (dx, sx) of + (true, 0) => (code, fucomp 0) (* pop once! *) + | (false, 0) => (code, fucom 0) (* don't pop! *) + | (true, n) => (xch n::code, fucomp 0) + | (false, n) => (xch n::code, fucom 0) + in FINISH(cmp::code) end + + fun reg2(fx, fy) = + (* special case is when things are already in place. + * Note: should also generate FUCOM and FUCOMP!!! + *) + let val (dx, sx) = getfs fx + val (dy, sy) = getfs fy + fun fstp(n) = + (ST.xch(stack,n,0); ST.pop stack; I.fstpl(ST n)) + in if sx = sy then regsame(dx, sx) (* same register!*) + else + (* first, move sx to %st(0) *) + let val (sy, code) = + if sx = 0 then (sy, code) (* there already *) + else (if sy = 0 then sx else sy, + xch sx::code) + + (* Generate the appropriate comparison op *) + val (sy, code, popY) = + case (dx, dy, sy) of + (true, true, 0) => (~1,fucompp code, false) + | (true, _, _) => (sy-1,fucomp sy::code,dy) + | (false, _, _) => (sy, fucom sy::code, dy) + + (* Pop fy if it is dead and hasn't already + * been popped. + *) + val code = if popY then fstp sy::code else code + in FINISH code + end + end + + in case (lsrc, rsrc) of + (I.FPR x, I.FPR y) => reg2(x, y) + | (I.FPR x, mem) => genregmemcmp(x, mem) + | (mem, I.FPR y) => genmemregcmp(mem, y) + | _ => genmemcmp() + end + + + fun prCopy(dst, src) = + ListPair.app(fn (fd, fs) => + pr(fregToString(fd)^"<-"^fregToString fs^" ")) + (dst, src) + + (* Parallel copy magic. + * For each src registers, we find out + * 1. whether it is the last use, and if so, + * 2. whether it is used more than once. + * If a source is a last and unique use, then we + * can simply rename it to appropriate destination register. + *) + fun fcopy(I.COPY{dst,src,tmp,...}) = let + fun loop([], [], copies, renames) = (copies, renames) + | loop(fd::fds, fs::fss, copies, renames) = + let val fsx = CB.registerNum fs + in if isLastUse fsx then + if A.sub(useTbl,fsx) <> stamp + (* unused *) + then (A.update(useTbl,fsx,stamp); + loop(fds, fss, copies, + if CB.sameColor(fd,fs) then renames + else (fd, fs)::renames) + ) + else loop(fds, fss, (fd, fs)::copies, renames) + else loop(fds, fss, (fd, fs)::copies, renames) + end + | loop _ = error "fcopy.loop" + + (* generate code for the copies *) + fun genCopy([], code) = code + | genCopy((fd, fs)::copies, code) = + let val ss = ST.fp(stack, CB.registerNum fs) + val _ = ST.push(stack, CB.registerNum fd) + val code = I.fldl(ST ss)::code + in genCopy(copies, code) end + + (* perform the renaming; it must be done in parallel! *) + fun renaming(renames) = + let val ss = map (fn (_,fs) => + ST.fp(stack,CB.registerNum fs)) renames + in ListPair.app (fn ((fd,_),ss) => + ST.set(stack,ss,CB.registerNum fd)) + (renames, ss) + end + + (* val _ = if debug then + (ListPair.app (fn (fd, fs) => + pr(fregToString(regmap fd)^"<-"^ + fregToString(regmap fs)^" ") + ) (dst, src); + pr "\n") + else () *) + + val (copies, renames) = loop(dst, src, [], []) + val code = genCopy(copies, code) + in renaming renames; + case tmp of + SOME(I.FPR f) => + (if debug andalso debugDead + then pr("KILLING tmp "^fregToString f^"\n") + else (); + ST.kill(stack, f) + ) + | _ => (); + DONE code + end + | fcopy _ = error "fcopy" + + fun call(instr, return) = let + val code = mark(I.INSTR instr, an)::code + val returnSet = SL.return(SL.uniq(getCell return)) + in + case returnSet of + [] => () + | [r] => ST.push(stack, CB.registerNum r) + | _ => + error "can't return more than one fp argument (yet)"; + KILL_THE_DEAD(List.filter isDead returnSet, code) + end + fun x86trans instr = + (case instr + of I.FMOVE x => (log(); fmove x) + | I.FBINOP x => (log(); fbinop x) + | I.FIBINOP x => (log(); fibinop x) + | I.FUNOP x => (log(); funop x) + | I.FILOAD x => (log(); fiload x) + | I.FCMP x => (log(); fcmp x) + + (* handle calling convention *) + | I.CALL{return, ...} => (log(); call(instr,return)) + + (* + * Catch instructions that absolutely + * should not have been generated at this point. + *) + | (I.FLD1 | I.FLDL2E | I.FLDLG2 | I.FLDLN2 | I.FLDPI | + I.FLDZ | I.FLDL _ | I.FLDS _ | I.FLDT _ | + I.FILD _ | I.FILDL _ | I.FILDLL _ | + I.FENV _ | I.FBINARY _ | I.FIBINARY _ | I.FUNARY _ | + I.FUCOMPP | I.FUCOM _ | I.FUCOMP _ | I.FCOMPP | I.FXCH _ | + I.FCOMI _ | I.FCOMIP _ | I.FUCOMI _ | I.FUCOMIP _ | + I.FSTPL _ | I.FSTPS _ | I.FSTPT _ | I.FSTL _ | I.FSTS _ + ) => bug("Illegal FP instructions") + + (* Other instructions are untouched *) + | instr => FINISH(mark(I.INSTR instr, an)::code) + (*esac*)) + in + case instr + of I.ANNOTATION{a,i} => + trans(stamp, i, a::an, rest, dead, lastUses, code) + | I.COPY{k=CB.FP, ...} => (log(); fcopy instr) + | I.LIVE _ => DONE(mark(instr, an)::code) + | I.INSTR instr => x86trans(instr) + | _ => FINISH(mark(instr, an)::code) + end (* trans *) + + (* + * Check the translation result to see if it matches the original + * code. + *) + fun checkTranslation(stackIn, stackOut, insns) = + let val n = ref(ST.depth stackIn) + fun push() = n := !n + 1 + fun pop() = n := !n - 1 + fun scan(I.INSTR(I.FBINARY{binOp, ...})) = + (case binOp of + ( I.FADDP | I.FSUBP | I.FSUBRP | I.FMULP + | I.FDIVP | I.FDIVRP) => pop() + | _ => () + ) + | scan(I.INSTR(I.FIBINARY{binOp, ...})) = () + | scan(I.INSTR(I.FUNARY I.FPTAN)) = push() + | scan(I.INSTR(I.FUNARY _)) = () + | scan(I.INSTR(I.FLDL(I.ST n))) = push() + | scan(I.INSTR(I.FLDL mem)) = push() + | scan(I.INSTR(I.FLDS mem)) = push() + | scan(I.INSTR(I.FLDT mem)) = push() + | scan(I.INSTR(I.FSTL(I.ST n))) = () + | scan(I.INSTR(I.FSTPL(I.ST n))) = pop() + | scan(I.INSTR(I.FSTL mem)) = () + | scan(I.INSTR(I.FSTS mem)) = () + | scan(I.INSTR(I.FSTPL mem)) = pop() + | scan(I.INSTR(I.FSTPS mem)) = pop() + | scan(I.INSTR(I.FSTPT mem)) = pop() + | scan(I.INSTR(I.FXCH{opnd=i,...})) = () + | scan(I.INSTR(I.FUCOM _)) = () + | scan(I.INSTR(I.FUCOMP _)) = pop() + | scan(I.INSTR(I.FUCOMPP)) = (pop(); pop()) + | scan(I.INSTR(I.FILD mem)) = push() + | scan(I.INSTR(I.FILDL mem)) = push() + | scan(I.INSTR(I.FILDLL mem)) = push() + | scan(I.INSTR(I.CALL{return, ...})) = + (n := 0; (* clear the stack *) + (* Simulate the pushing of arguments *) + let val returnSet = SL.return(SL.uniq(getCell return)) + in app (fn _ => push()) returnSet + end + ) + | scan _ = () + val _ = app scan (rev insns); + val n = !n + val m = ST.depth stackOut + in + if n <> m then + (dump(insns); + bug("Bad translation n="^i2s n^ " expected="^i2s m^"\n") + ) + else () + end + + + (* Dump the initial code *) + val _ = if debug andalso !debugOn then + (pr("-------- block "^i2s blknum^" ----"^ + celllistToString liveIn^" "^ + ST.stackToString stackIn^"\n"); + dump (!insns); + pr("succ="); + app (fn b => pr(i2s b^" ")) (#succ cfg blknum); + pr("\n") + ) + else () + + (* Compute the last uses *) + val lastUse = computeLastUse(blknum, insns, liveOut) + + (* Rewrite the code *) + val (stamp, insns') = loop(stamp, rev(!insns), lastUse, code) + + (* Insert shuffle code at the end if necessary *) + val insns' = shuffleOut(stack, insns', blknum, block, liveOut) + + (* Dump translation *) + val _ = if debug andalso !debugOn then + (pr("-------- translation "^i2s blknum^"----"^ + celllistToString liveIn^" "^ + ST.stackToString stackIn^"\n"); + dump insns'; + pr("-------- done "^i2s blknum^"----"^ + celllistToString liveOut^" "^ + ST.stackToString stack^"\n") + ) + else () + + (* Check if things are okay *) + val _ = if debug andalso sanityCheck then + checkTranslation(stackIn, stack, insns') + else () + + in insns := insns'; (* update the instructions *) + stamp + end (* process *) + + in (* Translate all blocks *) + stamp := C.firstPseudo; + #forall_nodes cfg rewriteAllBlocks; + (* If we found critical edges, then we have to split them... *) + if IntHashTable.numItems edgesToSplit = 0 then Cfg + else repairCriticalEdges(Cfg) + end +end (* functor *) + +end (* local *) diff --git a/MLRISC/x86/mltree/x86-fp.sml b/MLRISC/x86/mltree/x86-fp.sml new file mode 100644 index 0000000..60a8610 --- /dev/null +++ b/MLRISC/x86/mltree/x86-fp.sml @@ -0,0 +1,1674 @@ +(* x86-fp.sml + * + * COPYRIGHT (c) 2001 Bell Labs, Lucent Technologies + * + * This phase takes a cluster with pseudo x86 fp instructions, performs + * liveness analysis to determine their live ranges, and rewrite the + * program into the correct stack based code. + * + * The Basics + * ---------- + * o We assume there are 7 pseudo fp registers, %fp(0), ..., %fp(6), + * which are mapped onto the %st stack. One stack location is reserved + * for holding temporaries. + * o Important: for floating point comparisons, we actually need + * two extra stack locations in the worst case. We handle this by + * specifying that the instruction define an extra temporary fp register + * when necessary. + * o The mapping between %fp <-> %st may change from program point to + * program point. We keep track of this lazy renaming and try to minimize + * the number of FXCH that we insert. + * o At split and merge points, we may get inconsistent %fp <-> %st mappings. + * We handle this by inserting the appropriate renaming code. + * o Parallel copies (renaming) are rewritten into a sequence of FXCHs! + * + * Pseudo fp instructions Semantics + * -------------------------------------- + * FMOVE src, dst dst := src + * FILOAD ea, dst dst := cvti2f(mem[ea]) + * FBINOP lsrc, rsrc, dst dst := lsrc * rsrc + * FIBINOP lsrc, rsrc, dst dst := lsrc * cvti2f(rsrc) + * FUNOP src, dst dst := unaryOp src + * FCMP lsrc, rsrc fp condition code := fcmp(lsrc, rsrc) + * + * An instruction may use its source operand(s) destructively. + * We find this info using a global liveness analysis. + * + * The Translation + * --------------- + * o We keep track of the bindings between %fp registers and the + * %st(..) staack locations. + * o FXCH and FLDL are inserted at the appropriate places to move operands + * to %st(0). FLDL is used if the operand is not dead. FXCH is used + * if the operand is the last use. + * o FCOPY's between pseudo %fp registers are done by software renaming + * and generate no code by itself! + * o FSTL %st(1) are also generated to pop the stack after the last use + * of an operand. + * + * Note + * ---- + * 1. This module should be run after floating point register allocation. + * + * -- Allen Leung (leunga@cs.nyu.edu) + *) + +local + val debug = false (* set this to true to debug this module + * set this to false for production use. + *) + val debugLiveness = true (* debug liveness analysis *) + val debugDead = false (* debug dead code removal *) + val sanityCheck = true +in +functor X86FP + (structure X86Instr : X86INSTR + structure X86Props : INSN_PROPERTIES + where I = X86Instr + structure Flowgraph : CONTROL_FLOW_GRAPH + where I = X86Instr + structure Liveness : LIVENESS + where CFG = Flowgraph + structure Asm : INSTRUCTION_EMITTER + where I = X86Instr + and S.P = Flowgraph.P + ) : CFG_OPTIMIZATION = +struct + structure CFG = Flowgraph + structure G = Graph + structure I = X86Instr + structure T = I.T + structure P = X86Props + structure C = I.C + structure A = Array + structure L = Label + structure An = Annotations + structure CB = CellsBasis + structure SL = CB.SortedCells + structure HT = IntHashTable + structure IM = IntRedBlackMap + + type flowgraph = CFG.cfg + type an = An.annotations + + val name = "X86 floating point rewrite" + + val debugOn = MLRiscControl.mkFlag ("x86-fp-debug", "x86 fp debug mode") + val traceOn = MLRiscControl.mkFlag ("x86-fp-trace", "x86 fp trace mode") + + fun error msg = MLRiscErrorMsg.error("X86FP",msg) + fun pr msg = TextIO.output(!MLRiscControl.debug_stream,msg) + + val i2s = Int.toString + + (* + * No overflow checking is needed for integer arithmetic in this module + *) + + fun celllistToCellset l = List.foldr CB.CellSet.add CB.CellSet.empty l + fun celllistToString l = CB.CellSet.toString(celllistToCellset l) + + (* Annotation to mark split edges *) + exception TargetMovedTo of G.node_id + + (*----------------------------------------------------------------------- + * Primitive instruction handling routines + *-----------------------------------------------------------------------*) + + (* Annotation an instruction *) + fun mark(instr, []) = instr + | mark(instr, a::an) = mark(I.ANNOTATION{i=instr,a=a}, an) + + (* Add pop suffix to a binary operator *) + fun pop I.FADDL = I.FADDP | pop I.FADDS = I.FADDP + | pop I.FSUBL = I.FSUBP | pop I.FSUBS = I.FSUBP + | pop I.FSUBRL = I.FSUBRP | pop I.FSUBRS = I.FSUBRP + | pop I.FMULL = I.FMULP | pop I.FMULS = I.FMULP + | pop I.FDIVL = I.FDIVP | pop I.FDIVS = I.FDIVP + | pop I.FDIVRL = I.FDIVRP | pop I.FDIVRS = I.FDIVRP + | pop _ = error "fbinop.pop" + + (* Invert the operator *) + fun invert I.FADDL = I.FADDL | invert I.FADDS = I.FADDS + | invert I.FSUBL = I.FSUBRL | invert I.FSUBS = I.FSUBRS + | invert I.FSUBRL = I.FSUBL | invert I.FSUBRS = I.FSUBS + | invert I.FMULL = I.FMULL | invert I.FMULS = I.FMULS + | invert I.FDIVL = I.FDIVRL | invert I.FDIVS = I.FDIVRS + | invert I.FDIVRL = I.FDIVL | invert I.FDIVRS = I.FDIVS + | invert I.FADDP = I.FADDP | invert I.FMULP = I.FMULP + | invert I.FSUBP = I.FSUBRP | invert I.FSUBRP = I.FSUBP + | invert I.FDIVP = I.FDIVRP | invert I.FDIVRP = I.FDIVP + | invert _ = error "invert" + + (* Pseudo instructions *) + fun FLD(I.FP32, ea) = I.flds ea + | FLD(I.FP64, ea) = I.fldl ea + | FLD(I.FP80, ea) = I.fldt ea + + fun FILD(I.I8, ea) = error "FILD" + | FILD(I.I16, ea) = I.fild ea + | FILD(I.I32, ea) = I.fildl ea + | FILD(I.I64, ea) = I.fildll ea + + fun FSTP(I.FP32, ea) = I.fstps ea + | FSTP(I.FP64, ea) = I.fstpl ea + | FSTP(I.FP80, ea) = I.fstpt ea + + fun FST(I.FP32, ea) = I.fsts ea + | FST(I.FP64, ea) = I.fstl ea + | FST(I.FP80, ea) = error "FSTT" + + (*----------------------------------------------------------------------- + * Pretty print routines + *-----------------------------------------------------------------------*) + fun fregToString f = "%f"^i2s(CB.registerNum f) + fun fregsToString s = + List.foldr (fn (r,"") => fregToString r | + (r,s) => fregToString r^" "^s) "" s + + fun blknumOf(CFG.BLOCK{id, ...}) = id + + (*----------------------------------------------------------------------- + * A stack datatype that mimics the x86 floating point stack + * and keeps track of bindings between %st(n) and %fp(n). + *-----------------------------------------------------------------------*) + structure ST :> + sig + type stack + type stnum = int (* 0 -- 7 *) + val create : unit -> stack + val stack0 : stack + val copy : stack -> stack + val clear : stack -> unit + val fp : stack * CB.register_id -> stnum + val st : stack * stnum -> CB.register_id + val set : stack * stnum * CB.register_id -> unit + val push : stack * CB.register_id -> unit + val xch : stack * stnum * stnum -> unit + val pop : stack -> unit + val depth : stack -> int + val nonFull : stack -> unit + val kill : stack * CellsBasis.cell -> unit + val stackToString : stack -> string + val equal : stack * stack -> bool + end = + struct + type stnum = int + datatype stack = + STACK of + { st : CB.register_id A.array, (* mapping %st -> %fp registers *) + fp : stnum A.array, (* mapping %fp -> %st registers *) + sp : int ref (* stack pointer *) + } + + (* Create a new stack *) + fun create() = STACK{st=A.array(8,~1), fp=A.array(7,16), sp=ref ~1} + + val stack0 = create() + + (* Copy a stack *) + fun copy(STACK{st, fp, sp}) = + let val st' = A.array(8, ~1) + val fp' = A.array(7, 16) + in A.copy{src=st,dst=st',di=0}; + A.copy{src=fp,dst=fp',di=0}; + STACK{st=st', fp=fp', sp=ref(!sp)} + end + + (* Depth of stack *) + fun depth(STACK{sp, ...}) = !sp + 1 + + fun nonFull(STACK{sp, ...}) = + if !sp >= 7 then error "stack overflow" else () + + (* Given %st(n), lookup the corresponding %fp(n) *) + fun st(STACK{st, sp, ...}, n) = A.sub(st, !sp - n) + + (* Given %fp(n), lookup the corresponding %st(n) *) + fun fp(STACK{fp, sp, ...}, n) = !sp - A.sub(fp, n) + + fun stackToString stack = + let val depth = depth stack + fun f i = if i >= depth then " ]" + else "%st("^i2s i^")=%f"^i2s(st(stack,i))^" "^f(i+1) + in "[ "^f 0 end + + fun clear(STACK{st, fp, sp, ...}) = + (sp := ~1; A.modify(fn _ => ~1) st; A.modify(fn _ => 16) fp) + + (* Set %st(n) := %f *) + fun set(STACK{st, fp, sp, ...}, n, f) = + (A.update(st, !sp - n, f); + if f >= 0 then A.update(fp, f, !sp - n) else () + ) + + (* Pop one entry *) + fun pop(STACK{sp, st, fp, ...}) = sp := !sp - 1 + + (* Push %fp(f) onto %st(0) *) + fun push(stack as STACK{sp, ...}, f) = (sp := !sp + 1; set(stack, 0, f)) + + (* Exchange the contents of %st(m) and %st(n) *) + fun xch(stack, m, n) = + let val f_m = st(stack, m) + val f_n = st(stack, n) + in set(stack, m, f_n); + set(stack, n, f_m) + end + + fun kill(STACK{fp, ...}, f) = A.update(fp, CB.registerNum f, 16) + + fun equal(st1, st2) = + let val m = depth st1 + val n = depth st2 + fun loop i = + i >= m orelse (st(st1, i) = st(st2, i) andalso loop(i+1)) + in m = n andalso loop(0) + end + + end (* struct *) + + (*----------------------------------------------------------------------- + * Module to handle forward propagation. + * Forward propagation does the following: + * Given an instruction + * fmove mem, %fp(n) + * We delay the generation of the load until the first use of %fp(n), + * which we can further optimize by folding the load into the operand + * of the instruction, if it is the last use of this operand. + * If %fp(n) is dead then no load is necessary. + * Of course, we have to be careful whenever we encounter other + * instruction with a write. + *-----------------------------------------------------------------------*) + (* + structure ForwardPropagation :> + sig + type readbuffer + val create : ST.stack -> readbuffer + val load : readbuffer * C.cell * I.fsize * I.ea -> unit + val getreg : readbuffer * bool * C.cell * I.instruction list -> + I.operand * I.instruction list + val flush : readbuffer * I.instruction list -> I.instruction list + end = + struct + + datatype readbuffer = + READ of { stack : ST.stack, + loads : (I.fsize * I.ea) option A.array, + pending : int ref + } + + fun create stack = + READ{stack =stack, + loads =A.array(8, NONE), + pending =ref 0 + } + + fun load(READ{pending, loads, ...}, fd, fsize, mem) = + (A.update(loads, fd, SOME(fsize, mem)); + pending := !pending + 1 + ) + + (* Extract the operand for a register + * If it has a delayed load associated with it then + * we perform the load at this time. + *) + fun getreg(READ{pending, loads, stack, ...}, isLastUse, fs, code) = + case A.sub(loads, fs) of + NONE => + let val n = ST.st(stack, fs) + in if isLastUse + then (ST n, code) + else let val code = I.FLDL(ST n)::code + in ST.push(stack, fs); (ST0, code) + end + end + | SOME(fsize, mem) => + let val code = FLD(fsize, mem)::code + in A.update(loads, fs, NONE); (* delete load *) + pending := !pending - 1; + ST.push(stack, fs); (* fs is now in place *) + (ST0, code) + end + + (* Extract a binary operand. + * We'll try to fold this into the operand + *) + fun getopnd(READ{pending, loads, stack,...}, isLastUse, I.FPR fs, code) = + (case A.sub(loads, fs) of + NONE => + let val n = ST.st(stack, fs) + in if isLastUse fs (* regmap XXX *) + then (ST n, code) + else let val code = I.FLDL(ST n)::code + in ST.push(stack, fs); (ST0, code) + end + end + | SOME(fsize, mem) => + (A.update(loads, fs, NONE); (* delete load *) + pending := !pending - 1; + if isLastUse fs then (mem, code) + else let val code = FLD(fsize, mem)::code + in ST.push(stack, fs); + (ST0, code) + end + ) + ) + | getopnd(_, _, ea, code) = (ea, code) + + fun flush(READ{pending=ref 0,...}, code) = code + + end (* struct *) + *) + + (*----------------------------------------------------------------------- + * Module to handle delayed stores. + * Delayed store does the following: + * Given an instruction + * fstore %fp(n), %mem + * We delay the generation of the store until necessary. + * This gives us an opportunity to rearrange the order of the stores + * to eliminate unnecessary fxch. + *-----------------------------------------------------------------------*) + (* + structure DelayStore :> + sig + type writebuffer + val create : ST.stack -> writebuffer + val flush : writebuffer * I.instruction list -> I.instruction list + end = + struct + datatype writebuffer = + WRITE of { front : (I.ea * C.cell) list ref, + back : (I.ea * C.cell) list ref, + stack : ST.stack, + pending : int ref + } + fun create stack = WRITE{front=ref [], back=ref [], + stack=stack, pending=ref 0} + fun flush(WRITE{pending=ref 0,...}, code) = code + end (* struct *) + *) + + (*----------------------------------------------------------------------- + * Main routine. + * + * Algorithm: + * 1. Perform liveness analysis. + * 2. For each fp register, mark all its last use point(s). + * Registers are popped at their last uses. + * 3. Rewrite the instructions basic block by basic block. + * 4. Insert shuffle code at basic block boundaries. + * When necessary, split critical edges. + * 5. Sacrifice a goat to make sure things don't go wrong. + *-----------------------------------------------------------------------*) + fun run(Cfg as G.GRAPH cfg) = + let + val numberOfBlks = #capacity cfg () + val ENTRY = List.hd (#entries cfg ()) + val EXIT = List.hd (#exits cfg ()) + + val getCell = C.getCellsByKind CB.FP + (*extract the fp component of cellset*) + + val stTable = A.tabulate(8, fn n => I.ST(C.ST n)) + + fun ST n = (if sanityCheck andalso (n < 0 orelse n >= 8) then + pr("WARNING BAD %st("^i2s n^")\n") + else (); + A.sub(stTable, n) + ) + + fun FXCH n = I.fxch{opnd=C.ST n} + + val ST0 = ST 0 + val ST1 = ST 1 + val POP_ST = I.fstpl ST0 (* Instruction to pop an entry *) + + (* Dump instructions *) + fun dump instrs = + let val Asm.S.STREAM{emit, ...} = + AsmStream.withStream (!MLRiscControl.debug_stream) + Asm.makeStream [] + in app emit (rev instrs) + end + + (* Create assembly of instruction *) + fun assemble instr = + let val buf = StringOutStream.mkStreamBuf() + val stream = StringOutStream.openStringOut buf + val Asm.S.STREAM{emit, ...} = + AsmStream.withStream stream Asm.makeStream [] + val _ = emit instr + val s = StringOutStream.getString buf + val n = String.size s + in if n = 0 then s else String.substring(s, 0, n - 1) + end + + (*------------------------------------------------------------------ + * Perform liveness analysis on the floating point variables + * P.S. I'm glad I didn't throw away the code liveness code. + *------------------------------------------------------------------*) + val defUse = P.defUse CB.FP (* def/use properties *) + val {liveIn=liveInTable, liveOut=liveOutTable} = Liveness.liveness { + defUse=defUse, + (* updateCell=C.updateCellsByKind CB.FP, *) + getCell=getCell + } Cfg + (*------------------------------------------------------------------ + * Scan the instructions compute the last uses and dead definitions + * at each program point. Ideally we can do this during the code + * rewriting phase. But that's probably too error prone for now. + *------------------------------------------------------------------*) + fun computeLastUse(blknum, insns, liveOut) = + let fun scan([], _, lastUse) = lastUse + | scan(i::instrs, live, lastUse) = + let val (d, u) = defUse i + val d = SL.uniq(d)(* definitions *) + val u = SL.uniq(u)(* uses *) + val dead = SL.return(SL.difference(d, live)) + val live = SL.difference(live, d) + val last = SL.return(SL.difference(u, live)) + val live = SL.union(live, u) + val _ = + if debug andalso debugLiveness then + (case last of + [] => () + | _ => print(assemble i^"\tlast use="^ + fregsToString last^"\n") + ) + else () + in scan(instrs, live, (last,dead)::lastUse) + end + val liveOutSet = SL.uniq liveOut + val _ = + if debug andalso debugLiveness then + print("LiveOut("^i2s blknum^") = "^ + fregsToString(SL.return liveOutSet)^"\n") + else () + in scan(!insns, liveOutSet, []) + end + + (*------------------------------------------------------------------ + * Temporary work space + *------------------------------------------------------------------*) + val {high, low} = C.cellRange CB.FP + val n = high+1 + val lastUseTbl = A.array(n,~1) (* table for marking last uses *) + val useTbl = A.array(n,~1) (* table for marking uses *) + + (* %fp register bindings before and after a basic block *) + val bindingsIn = A.array(numberOfBlks, NONE) + val bindingsOut = A.array(numberOfBlks, NONE) + val stampCounter = ref ~4096 + + (* Edges that need splitting *) + exception NoEdgesToSplit + val edgesToSplit = IntHashTable.mkTable(32, NoEdgesToSplit) + val addEdgesToSplit = IntHashTable.insert edgesToSplit + fun lookupEdgesToSplit b = + getOpt(IntHashTable.find edgesToSplit b, []) + + (*------------------------------------------------------------------ + * Code for handling bindings between basic block + *------------------------------------------------------------------*) + + fun splitEdge(title, source, target, e) = + (if debug andalso !traceOn then + pr(title^" SPLITTING "^i2s source^"->"^ i2s target^"\n") + else (); + addEdgesToSplit(target,(source,target,e)::lookupEdgesToSplit target) + ) + + fun computeFreq(_,_,CFG.EDGE{w,...}) = !w + + (* Given a cellset, return a sorted and unique + * list of elements with all non-physical registers removed + *) + fun removeNonPhysical celllist = + let fun loop([], S) = SL.return(SL.uniq S) + | loop(f::fs, S) = + let val fx = CB.registerNum f + in loop(fs,if fx <= 7 then f::S else S) + end + in loop(celllist, []) + end + + (* Given a sorted and unique list of registers, + * Return a stack with these elements + *) + fun newStack fregs = + let val stack = ST.create() + in app (fn f => ST.push(stack, CB.registerNum f)) (rev fregs); + stack + end + + (* + * This function looks at all the entries on the stack, + * and generate code to deallocate all the dead values. + * The stack is updated. + *) + fun removeDeadValues(stack, liveSet, code) = + let val stamp = !stampCounter + val _ = stampCounter := !stampCounter - 1 + fun markLive [] = () + | markLive(r::rs) = + (A.update(useTbl, CB.registerNum r, stamp); markLive rs) + fun isLive f = A.sub(useTbl, f) = stamp + fun loop(i, depth, code) = + if i >= depth then code else + let val f = ST.st(stack, i) + in if isLive f (* live? *) + then loop(i+1, depth, code) + else + (if debug andalso !traceOn then + pr("REMOVING %f"^i2s f^" in %st("^i2s i^")"^ + " current stack="^ST.stackToString stack^"\n") + else (); + if i = 0 then + (ST.pop stack; + loop(0, depth-1, POP_ST::code) + ) + else (ST.xch(stack,0,i); + ST.pop stack; + loop(0, depth-1, I.fstpl(ST i)::code) + ) + ) + end + in markLive liveSet; + loop(0, ST.depth stack, code) + end + + + (*------------------------------------------------------------------ + * Given two stacks, source and target, where the bindings are + * permutation of each other, generate the minimal number of + * fxchs to match source with target. + * + * Important: source and target MUST be permutations of each other. + * + * Essentially, we first decompose the permutation into cycles, + * and process each cycle. + *------------------------------------------------------------------*) + fun shuffle(source, target, code) = + let val stamp = !stampCounter + val _ = stampCounter := !stampCounter - 1 + val permutation = lastUseTbl (* reuse the space *) + + val _ = if debug andalso !traceOn then + pr("SHUFFLE "^ST.stackToString source^ + "->"^ST.stackToString target^"\n") + else () + + (* Compute the initial permutation *) + val n = ST.depth source + fun computeInitialPermutation(i) = + if i >= n + then () + else let val f = ST.st(source, i) + val j = ST.fp(target, f) + in A.update(permutation, j, i); + computeInitialPermutation(i+1) + end + val _ = computeInitialPermutation 0 + + (* Decompose the initial permutation into cycles. + * The cycle involving 0 is treated specially. + *) + val visited = useTbl + fun isVisited i = A.sub(visited,i) = stamp + fun markAsVisited i = A.update(visited,i,stamp) + fun decomposeCycles(i, cycle0, cycles) = + if i >= n then (cycle0, cycles) + else if isVisited i orelse + A.sub(permutation, i) = i (* trivial cycle *) + then decomposeCycles(i+1, cycle0, cycles) + else let fun makeCycle(j, cycle, zero) = + let val k = A.sub(permutation, j) + val cycle = j::cycle + val zero = zero orelse j = 0 + in markAsVisited j; + if k = i then (cycle, zero) + else makeCycle(k, cycle, zero) + end + val (cycle, zero) = makeCycle(i, [], false) + in if zero then decomposeCycles(i+1, [cycle], cycles) + else decomposeCycles(i+1, cycle0, cycle::cycles) + end + + val (cycle0, cycles) = decomposeCycles(0, [], []) + + (* + * Generate shuffle for a cycle that does not involve 0. + * Given a cycle (c_1, ..., c_k), we generate this code: + * fxch %st(c_1), + * fxch %st(c_2), + * ... + * fxch %st(c_k), + * fxch %st(c_1) + *) + fun genxch([], code) = code + | genxch(c::cs, code) = genxch(cs, FXCH c::code) + + fun gen([], code) = error "shuffle.gen" + | gen(cs as (c::_), code) = FXCH c::genxch(cs, code) + + (* + * Generate shuffle for a cycle that involves 0. + * Given a cycle (c_1,...,c_k) we first shuffle this to + * an equivalent cycle (c_1, ..., c_k) where c'_k = 0, + * then we generate this code: + * fxch %st(c'_1), + * fxch %st(c'_2), + * ... + * fxch %st(c'_{k-1}), + *) + fun gen0([], code) = error "shuffle.gen0" + | gen0(cs, code) = + let fun rearrange(0::cs, cs') = cs@rev cs' + | rearrange(c::cs, cs') = rearrange(cs, c::cs') + | rearrange([], _) = error "shuffle.rearrange" + val cs = rearrange(cs, []) + in genxch(cs, code) + end + + (* + * Generate code. Must process the non-zero cycles first. + *) + val code = List.foldr gen code cycles + val code = List.foldr gen0 code cycle0 + in code + end (* shuffle *) + + (*------------------------------------------------------------------ + * Insert code at the end of a basic block. + * Make sure we put code in front of a transfer instruction + *------------------------------------------------------------------*) + fun insertAtEnd(insns, code) = + (case insns of + [] => code + | jmp::rest => + if P.instrKind jmp = P.IK_JUMP then + jmp::code@rest + else + code@insns + ) + + (*------------------------------------------------------------------ + * Magic for inserting shuffle code at the end of a basic block + *------------------------------------------------------------------*) + fun shuffleOut(stackOut, insns, b, block, liveOut) = + let + val liveOut = removeNonPhysical(liveOut) + + (* Generate code that remove unnecessary values *) + val code = removeDeadValues(stackOut, liveOut, []) + + fun done(stackOut, insns, code) = + (A.update(bindingsOut,b,SOME stackOut); + insertAtEnd(insns, code) + ) + + (* Generate code that shuffle values from source to target *) + fun match(source, target) = + done(target, insns, shuffle(source, target, [])) + + (* Generate code that shuffle values from source to liveOut *) + fun matchLiveOut() = + case liveOut of + [] => done(stackOut, insns, code) + | _ => match(stackOut, newStack liveOut) + + (* With multiple successors, find out which one we + * should connect to. Choose the one from the block that + * follows from this one, if that exists, or else choose + * from the edge with the highest frequency. + *) + fun find([], _, id, best) = (id, best) + | find((_, target, _)::edges, highestFreq, id, best) = + let val CFG.BLOCK{freq, ...} = #node_info cfg target + in if target = b+1 then (target, A.sub(bindingsIn, target)) + else (case A.sub(bindingsIn, target) of + NONE => find(edges, highestFreq, id, best) + | this as SOME stack => + if highestFreq < !freq then + find(edges, !freq, target, this) + else + find(edges, highestFreq, id, best) + ) + end + + (* + * Split all edges source->target except omitThis. + *) + fun splitAllEdgesExcept([], omitThis) = () + | splitAllEdgesExcept((source,target,e)::edges, omitThis) = + if target = EXIT then error "can't split exit edge!" + else + (if target <> omitThis andalso + target <= b andalso (* XXX *) + target <> ENTRY + then splitEdge("ShuffleOut",source,target,e) else (); + splitAllEdgesExcept(edges, omitThis) + ) + + (* Just one successor; + * try to match the bindings of the successor if it exist. + *) + fun matchIt succ = + let val (succBlock, target) = find(succ, ~1.0, ~1, NONE) + in splitAllEdgesExcept(succ, succBlock); + case target of + SOME stackIn => match(stackOut, stackIn) + | NONE => done(stackOut,insns,code) + end + + in case #out_edges cfg b of + [] => matchLiveOut() + | succ as [(_,target,_)] => + if target = EXIT then matchLiveOut() + else matchIt succ + | succ => matchIt succ + end (* shuffleOut *) + + (*------------------------------------------------------------------ + * Compute the initial fp stack bindings for basic block b. + *------------------------------------------------------------------*) + fun shuffleIn(b, block, liveIn) = + let + val liveInSet = removeNonPhysical liveIn + + (* With multiple predecessors, find out which one we + * should connect to. Choose the one from the block that + * falls into this one, if that exists, or else choose + * from the edge with the highest frequency. + *) + fun find([], _, best) = best + | find((source, _, _)::edges, highestFreq, best) = + let val CFG.BLOCK{freq, ...} = #node_info cfg source + in case A.sub(bindingsOut, source) of + NONE => find(edges, highestFreq, best) + | this as SOME stack => + if source = b-1 + then this (* falls into b *) + else if highestFreq < !freq then find(edges, !freq, this) + else find(edges, highestFreq, best) + end + + fun splitAllDoneEdges [] = () + | splitAllDoneEdges ((source, target, e)::edges) = + (if source < b andalso + source <> ENTRY andalso + source <> EXIT + then splitEdge("ShuffleIn", source, target, e) else (); + splitAllDoneEdges edges + ) + + (* The initial stack bindings are determined by the live set. + * No compensation code is needed. + *) + fun fromLiveIn() = + let val stackIn = + case liveInSet of + [] => ST.stack0 + | _ => + (pr("liveIn="^celllistToString liveIn^"\n"); + newStack liveInSet + ) + val stackOut = ST.copy stackIn + in (stackIn, stackOut, []) + end + + val pred = #in_edges cfg b + + val (stackIn, stackOut, code) = + case find(pred, ~1.0, NONE) of + NONE => (splitAllDoneEdges(pred); fromLiveIn()) + | SOME stackIn' => + (case pred of + [_] => (* one predecessor *) + (* Use the bindings as from the previous block + * We first have to deallocate all unused values. + *) + let val stackOut = ST.copy stackIn' + (* Clean the stack of unused entries *) + val code = removeDeadValues(stackOut, liveInSet, []) + in (stackIn', stackOut, code) end + | pred => (* more than one predecessors *) + let val stackIn = ST.copy stackIn' + val code = removeDeadValues(stackIn, liveInSet, []) + val stackOut = ST.copy stackIn + in (* If we have to generate code to deallocate + * the stack then we have split the edge. + *) + case code of + [] => () + | _ => splitAllDoneEdges(pred); + (stackIn, stackOut, []) + end + ) + in A.update(bindingsIn, b, SOME stackIn); + A.update(bindingsOut, b, SOME stackOut); + (stackIn, stackOut, code) + end + + (*------------------------------------------------------------------ + * Code for patching up critical edges. + * The trick is finding a good place to insert the critical edges. + * Let's call an edge x->y that requires compensation + * code c to be inserted an candidate edge. We write this as x->y(c) + * + * Here are the heuristics that we use to improve the final code: + * + * 1. Given two candidate edges a->x(c1) and b->x(c2) where c1=c2 + * then we can merge the two copies of compensation code. + * This is quite common. This generalizes to any number of edges. + * + * 2. Given two candidate edges a->x(c1) and b->x(c2) and where + * c1 and c2 are pops, we can partially share c1 and c2. + * Currently, I think I only recognize this case when + * x has no fp registers live-in. + * + * 3. Given two candidate edges a->x(c1) and b->x(c2), + * if a->x has a higher frequency then put the compensation + * code in front of x (so that it falls through into x) + * whenever possible. + * + * As you can see, the voodoo is strong here. + * + * The routine has two main phases: + * 1. Determine the compensation code by applying the heuristics + * above. + * 2. Then insert them and rebuild the cfg by renaming all block + * ids. This is currently necessary to keep the layout order + * consistent with the order of the id. + *------------------------------------------------------------------*) + fun repairCriticalEdges(Cfg as G.GRAPH cfg) = + let + val cleanup = [#create MLRiscAnnotations.COMMENT "cleanup edge"] + val critical = [#create MLRiscAnnotations.COMMENT "critical edge"] + + fun annotate(gen, an) = + app (fn ((_,CFG.BLOCK{annotations, ...}),_) => annotations := an) + gen + + (* + * Special case: target block has stack depth of 0. + * Just generate code that pop entries from the sources. + * To make things interesting, we try to share code among + * all the critical edges. + *) + fun genPoppingCode(_, []) = () + | genPoppingCode(targetId, edges) = + let (* Edges annotated with the source stack depth + * Ordered by increasing stack height + *) + val edges = + IM.listItemsi + (foldr (fn (edge as (sourceId, _, _), M) => + let val n = ST.depth(valOf(A.sub(bindingsOut,sourceId))) + in IM.insert(M, n, edge :: getOpt(IM.find(M, n), [])) + end) IM.empty edges) + + (* Generate n pops *) + fun pops(0, code) = code + | pops(n, code) = pops(n-1, POP_ST::code) + + (* Create the chain of blocks *) + fun makeChain(depth, [], chain) = chain + | makeChain(depth, (d, es)::es', chain) = + let val code = pops(d - depth, []) + in makeChain(d, es', (es, code)::chain) + end + + val chain = makeChain(0, edges, []) + + in annotate(CFG.splitEdges Cfg {groups=chain, jump=false}, cleanup) + end + + (* + * Generate repair code. + *) + fun genRepairCode(targetId, stackIn, edges) = + let val liveIn = IntHashTable.lookup liveInTable targetId + val liveInSet = removeNonPhysical liveIn + val _ = if debug then + pr("LiveIn = "^celllistToString liveIn^"\n") + else () + + (* Group all edges whose output stack configurations + * are the same. Each group is merged together into + * a single compensation block + *) + fun partition([], S) = S + | partition((e as (src,_,_))::es, S) = + let val stackOut = ST.copy(valOf(A.sub(bindingsOut,src))) + fun find([], S) = partition(es, ([e],stackOut)::S) + | find((x as (es',st'))::S', S) = + if ST.equal(stackOut,st') then + partition(es, (e::es',st')::S' @ S) + else + find(S', x::S) + in find(S, []) + end + + (* Partition by the source bindings *) + val S = partition(edges, []) + + (* Compute frequencies *) + val S = map (fn (es,st) => (CFG.sumEdgeFreqs es,es,st)) S + + (* Ordered by non-increasing frequencies *) + val S = ListMergeSort.sort (fn ((x,_,_),(y,_,_)) => x < y) S + + (* Generate code *) + fun gen(freq, edges, stackOut) = + let (* deallocate unused values *) + val code = removeDeadValues(stackOut,liveInSet,[]) + (* shuffle values *) + val code = shuffle(stackOut, stackIn, code) + in annotate( + CFG.splitEdges Cfg {groups=[(edges,code)], jump=false}, + critical) + end + + in app gen S + end + + (* Split all edges entering targetId *) + fun split(targetId, edges) = + let val stackIn = valOf(A.sub(bindingsIn,targetId)) + fun log(s, t, e) = + case A.sub (bindingsOut, s) of + SOME stackOut => + (pr("SPLIT "^i2s s^"->"^i2s t^" "^ + ST.stackToString stackOut^"->"^ + ST.stackToString stackIn^"\n")) + | NONE => error "split:stackOut" + val _ = if debug andalso !traceOn then app log edges else () + in if ST.depth stackIn = 0 then genPoppingCode(targetId, edges) + else genRepairCode(targetId, stackIn, edges) + end + + in IntHashTable.appi split edgesToSplit; + CFG.changed Cfg; + Cfg + end + + (*------------------------------------------------------------------ + * Process all blocks which are not the entry or the exit + *------------------------------------------------------------------*) + val stamp = ref 0 + fun rewriteAllBlocks (_, CFG.BLOCK{kind=CFG.START, ...}) = () + | rewriteAllBlocks (_, CFG.BLOCK{kind=CFG.STOP, ...}) = () + | rewriteAllBlocks + (blknum, block as CFG.BLOCK{insns, labels, annotations, ...}) = + let val _ = + if debug andalso !debugOn then + app (fn l => pr(L.toString l^":\n")) (!labels) + else (); + val liveIn = HT.lookup liveInTable blknum + val liveOut = HT.lookup liveOutTable blknum + val st = rewrite(!stamp, blknum, block, + insns, liveIn, liveOut, + annotations) + in stamp := st (* update stamp *) + end + + (*------------------------------------------------------------------ + * Translate code within a basic block. + * Each instruction is given a unique stamp for identifying last + * uses. + *------------------------------------------------------------------*) + and rewrite(stamp, blknum, block, insns, liveIn, liveOut, + annotations) = + let val (stackIn, stack, code) = shuffleIn(blknum, block, liveIn) + + (* Dump instructions when encountering a bug *) + fun bug msg = + (pr("-------- bug in block "^i2s blknum^" ----\n"); + dump(!insns); + error msg + ) + + fun loop(stamp, [], [], code) = (stamp, code) + | loop(stamp, instr::rest, (lastUse,dead)::lastUses, code) = + let fun mark(tbl, []) = () + | mark(tbl, r::rs) = + (A.update(tbl, CB.registerNum r, stamp); mark(tbl, rs)) + in mark(lastUseTbl,lastUse); (* mark all last uses *) + trans(stamp, instr, [], rest, dead, lastUses, code) + end + | loop _ = error "loop" + + (* + * Main routine that does the actual translation. + * A few reminders: + * o The instructions are processed in normal order + * and generated in the reversed order. + * o (Local) liveness is computed at the same time. + * o For each use, we have to find out whether it is + * the last use. If so, we can kill it and reclaim + * the stack entry at the same time. + *) + and trans(stamp, instr, an, rest, dead, lastUses, code) = + let (* Call this continuation when done with code generation *) + fun FINISH code = loop(stamp+1, rest, lastUses, code) + + fun KILL_THE_DEAD(dead, code) = + let fun kill([], code) = FINISH code + | kill(f::fs, code) = + let val fx = CB.registerNum f + in if debug andalso debugDead then + pr("DEAD "^fregToString f^" in "^ + ST.stackToString stack^"\n") + else (); + (* not a physical register *) + if fx >= 8 then kill(fs, code) + else + let val i = ST.fp(stack, fx) + in if debug andalso debugDead then + pr("KILLING "^fregToString f^ + "=%st("^i2s i^")\n") + else (); + if i < 0 then kill(fs, code) (* dead already *) + else if i = 0 then + (ST.pop stack; kill(fs, POP_ST::code)) + else + (ST.xch(stack,0,i); ST.pop stack; + kill(fs, I.fstpl(ST i)::code) + ) + end + end + in kill(dead, code) + end + + (* Call this continuation when done with floating point + * code generation. Remove all dead code first. + *) + fun DONE code = KILL_THE_DEAD(dead, code) + + (* Is this the last use of register f? *) + fun isLastUse f = A.sub(lastUseTbl, f) = stamp + + (* Is this value dead? *) + fun isDead f = + let fun loop [] = false + | loop(r::rs) = CB.sameColor(f,r) orelse loop rs + in loop dead end + + (* Dump the stack before each intruction for debugging *) + fun log() = if debug andalso !traceOn then + pr(ST.stackToString stack^assemble instr^"...\n") + else () + + (* Find the location of a source register *) + fun getfs(f) = + let val fx = CB.registerNum f + val s = ST.fp(stack, fx) + in (isLastUse fx,s) end + + (* Generate memory to memory move *) + fun mmmove(fsize,src,dst) = + let val _ = ST.nonFull stack + val code = FLD(fsize,src)::code + val code = mark(FSTP(fsize,dst),an)::code + in DONE code end + + (* Allocate a new register in %st(0) *) + fun alloc(f,code) = (ST.push(stack,CB.registerNum f); code) + + (* register -> register move *) + fun rrmove(fs,fd) = + if CB.sameColor(fs,fd) then DONE code + else + let val (dead,ss) = getfs fs + in if dead then (* fs is dead *) + (ST.set(stack,ss,CB.registerNum fd); (* rename fd to fs *) + DONE code (* no code is generated *) + ) + else (* fs is not dead; push it onto %st(0); + * set fd to %st(0) + *) + let val code = alloc(fd, code) + in DONE(mark(I.fldl(ST ss),an)::code) + end + end + + (* memory -> register move. + * Do dead code elimination here. + *) + fun mrmove(fsize,src,fd) = + if isDead fd + then FINISH code (* value has been killed *) + else + let val code = alloc(fd, code) + in DONE(mark(FLD(fsize,src),an)::code) + end + + (* exchange %st(n) and %st(0) *) + fun xch(n) = (ST.xch(stack,0,n); FXCH n) + + (* push %st(n) onto the stack *) + fun push(n) = (ST.push(stack,~2); I.fldl(ST n)) + + + (* push mem onto the stack *) + fun pushmem(src) = (ST.push(stack,~2); I.fldl(src)) + + (* register -> memory move. + * Use pop version of the opcode if it is the last use. + *) + fun rmmove(fsize,fs,dst) = + let fun fstp(code) = + (ST.pop stack; DONE(mark(FSTP(fsize,dst),an)::code)) + fun fst(code) = DONE(mark(FST(fsize,dst),an)::code) + in case getfs fs of + (true, 0) => fstp code + | (true, n) => fstp(xch n::code) + | (false, 0) => fst(code) + | (false, n) => fst(xch n::code) + end + + (* Floating point move *) + fun fmove{fsize,src=I.FPR fs,dst=I.FPR fd} = rrmove(fs,fd) + | fmove{fsize,src,dst=I.FPR fd} = mrmove(fsize,src,fd) + | fmove{fsize,src=I.FPR fs,dst} = rmmove(fsize,fs,dst) + | fmove{fsize,src,dst} = mmmove(fsize,src,dst) + + (* Floating point integer load operator *) + fun fiload{isize,ea,dst=I.FPR fd} = + let val code = alloc(fd, code) + val code = mark(FILD(isize,ea),an)::code + in DONE code + end + | fiload{isize,ea,dst} = + let val code = mark(FILD(isize,ea),an)::code + val code = I.fstpl(dst)::code (* XXX *) + in DONE code + end + + (* Make a copy of register fs to %st(0). *) + fun moveregtotop(fs, code) = + (case getfs fs of + (true, 0) => code + | (true, n) => xch n::code + | (false, n) => push n::code + ) + + fun movememtotop(fsize, mem, code) = + (ST.push(stack, ~2); FLD(fsize, mem)::code) + + (* Move an operand to top of stack *) + fun movetotop(fsize, I.FPR fs, code) = moveregtotop(fs, code) + | movetotop(fsize, mem, code) = movememtotop(fsize, mem, code) + + fun storeResult(fsize, dst, n, code) = + case dst of + I.FPR fd => (ST.set(stack, n, CB.registerNum fd); DONE code) + | mem => + let val code = if n = 0 then code else xch n::code + in ST.pop stack; DONE(FSTP(fsize, mem)::code) end + + (* Floating point unary operator *) + fun funop{fsize,unOp,src,dst} = + let val code = movetotop(fsize, src, code) + val code = mark(I.funary unOp,an)::code + + (* Moronic hack to deal with partial tangent! *) + val code = + case unOp of + I.FPTAN => + (if ST.depth stack >= 7 then error "FPTAN" + else (); + POP_ST::code (* pop the useless 1.0 *) + ) + | _ => code + in storeResult(fsize, dst, 0, code) + end + + (* Floating point binary operator. + * Note: + * binop src, dst + * means dst := dst binop src + * (lsrc := lsrc binop rsrc) + * on the x86 + *) + fun fbinop{fsize,binOp,lsrc,rsrc,dst} = + let (* generate code and set %st(n) = fd *) + (* op2 := op1 - op2 *) + fun oper(binOp,op1,op2,n,code) = + let val code = + mark(I.fbinary{binOp=binOp,src=op1,dst=op2},an) + ::code + in storeResult(I.FP64, dst, n, code) + end + + fun operR(binOp,op1,op2,n,code) = + oper(invert binOp,op1,op2,n,code) + + fun operP(binOp,op1,op2,n,code) = + (ST.pop stack; oper(pop binOp,op1,op2,n-1,code)) + + fun operRP(binOp,op1,op2,n,code) = + (ST.pop stack; operR(pop binOp,op1,op2,n-1,code)) + + (* Many special cases to consider. + * Basically, try to reuse stack space as + * much as possible by taking advantage of last uses. + * + * Stack=[st(0)=3.0 st(1)=2.0] + * fsub %st(1), %st [1,2.0] + * fsubr %st(1), %st [-1,2.0] + * fsub %st, %st(1) [3.0,1.0] + * fsubr %st, %st(1) [3.0,-1.0] + * + * fsubp %st, %st(1) [1] + * fsubrp %st, %st(1) [-1] + * So, + * fsub %st(n), %st (means %st - %st(n) -> %st) + * fsub %st, %st(n) (means %st - %st(n) -> %st(n)) + * fsubr %st(n), %st (means %st(n) - %st -> %st) + * fsubr %st, %st(n) (means %st(n) - %st -> %st(n)) + *) + fun reg2(fx, fy) = + let val (dx, sx) = getfs fx + val (dy, sy) = getfs fy + fun loop(dx, sx, dy, sy, code) = + (* op1, op2 (dst) *) + case (dx, sx, dy, sy) of + (true, 0, false, n) => oper(binOp,ST n,ST0,0,code) + | (false, n, true, 0) => operR(binOp,ST n,ST0,0,code) + | (true, n, true, 0) => operRP(binOp,ST0,ST n,n,code) + | (true, 0, true, n) => operP(binOp,ST0,ST n,n,code) + | (false, 0, true, n) => oper(binOp,ST0,ST n,n,code) + | (true, n, false, 0) => operR(binOp,ST0,ST n,n,code) + | (true, sx, dy, sy) => + loop(true, 0, dy, sy, xch sx::code) + | (dx, sx, true, sy) => + loop(dx, sx, true, 0, xch sy::code) + | (false, sx, false, sy) => + loop(true, 0, false, sy+1, push sx::code) + in if sx = sy then (* same register *) + let val code = + case (dx, sx) of + (true, 0) => code + | (true, n) => xch n::code + | (false, n) => push n::code + in oper(binOp,ST0,ST0,0,code) + end + else loop(dx, sx, dy, sy, code) + end + + (* reg/mem operands *) + fun regmem(binOp, fx, mem) = + case getfs fx of + (true, 0) => oper(binOp,mem,ST0,0,code) + | (true, n) => oper(binOp,mem,ST0,0,xch n::code) + | (false, n) => oper(binOp,mem,ST0,0,push n::code) + + (* Two memory operands. Optimize the case when + * the two operands are identical. + *) + fun mem2(lsrc, rsrc) = + let val _ = ST.push(stack,~2) + val code = FLD(fsize,lsrc)::code + val rsrc = if P.eqOpn(lsrc, rsrc) then ST0 else rsrc + in oper(binOp,rsrc,ST0,0,code) + end + + fun process(I.FPR fx, I.FPR fy) = reg2(fx, fy) + | process(I.FPR fx, mem) = regmem(binOp, fx, mem) + | process(mem, I.FPR fy) = regmem(invert binOp, fy, mem) + | process(lsrc, rsrc) = mem2(lsrc, rsrc) + + in process(lsrc, rsrc) + end + + (* Floating point binary operator with integer conversion *) + fun fibinop{isize,binOp,lsrc,rsrc,dst} = + let fun oper(binOp,src,code) = + let val code = mark(I.fibinary{binOp=binOp,src=src},an) + ::code + in storeResult(I.FP64, dst, 0, code) + end + + fun regmem(binOp, fx, mem) = + case getfs fx of + (true, 0) => oper(binOp, mem, code) + | (true, n) => oper(binOp, mem, xch n::code) + | (false, n) => oper(binOp, mem, push n::code) + + in case (lsrc, rsrc) of + (I.FPR fx, mem) => regmem(binOp, fx, mem) + | (lsrc, rsrc) => oper(binOp, rsrc, pushmem lsrc::code) + end + + (* Floating point comparison + * We have to make sure there are enough registers. + * The trick is that tmp is always a physical register. + * So we can always use it as temporary space if we + * have run out. + *) + fun fcmp{i,fsize,lsrc,rsrc} = + let fun fucompp code = + (ST.pop stack; ST.pop stack; + if i then + POP_ST :: mark(I.fucomip(ST 1), an) :: code + else + mark(I.fucompp,an) :: code + ) + fun fucomp(n) = + (ST.pop stack; + mark((if i then I.fucomip else I.fucomp)(ST n),an)) + fun fucom(n) = + mark((if i then I.fucomi else I.fucom)(ST n),an) + + fun genmemcmp() = + let val code = movememtotop(fsize, rsrc, code) + val code = movememtotop(fsize, lsrc, code) + in FINISH(fucompp(code)) + end + + fun genmemregcmp(lsrc, fy) = + case getfs fy of + (false, n) => + let val code = movememtotop(fsize, lsrc, code) + in FINISH(fucomp(n+1)::code) end + | (true, n) => + let val code = if n = 0 then code else xch n::code + val code = movememtotop(fsize, lsrc, code) + in FINISH(fucompp(code)) + end + + fun genregmemcmp(fx, rsrc) = + let val code = + case getfs fx of + (true, n) => + let val code = if n = 0 then code + else xch n::code + val code = movememtotop(fsize, rsrc, code) + in xch 1::code end + | (false, n) => + let val code = movememtotop(fsize, rsrc, code) + in push(n+1)::code + end + in FINISH(fucompp(code)) + end + + (* Deal with the special case when both sources are + * in the same register + *) + fun regsame(dx, sx) = + let val (code, cmp) = + case (dx, sx) of + (true, 0) => (code, fucomp 0) (* pop once! *) + | (false, 0) => (code, fucom 0) (* don't pop! *) + | (true, n) => (xch n::code, fucomp 0) + | (false, n) => (xch n::code, fucom 0) + in FINISH(cmp::code) end + + fun reg2(fx, fy) = + (* special case is when things are already in place. + * Note: should also generate FUCOM and FUCOMP!!! + *) + let val (dx, sx) = getfs fx + val (dy, sy) = getfs fy + fun fstp(n) = + (ST.xch(stack,n,0); ST.pop stack; I.fstpl(ST n)) + in if sx = sy then regsame(dx, sx) (* same register!*) + else + (* first, move sx to %st(0) *) + let val (sy, code) = + if sx = 0 then (sy, code) (* there already *) + else (if sy = 0 then sx else sy, + xch sx::code) + + (* Generate the appropriate comparison op *) + val (sy, code, popY) = + case (dx, dy, sy) of + (true, true, 0) => (~1,fucompp code, false) + | (true, _, _) => (sy-1,fucomp sy::code,dy) + | (false, _, _) => (sy, fucom sy::code, dy) + + (* Pop fy if it is dead and hasn't already + * been popped. + *) + val code = if popY then fstp sy::code else code + in FINISH code + end + end + + in case (lsrc, rsrc) of + (I.FPR x, I.FPR y) => reg2(x, y) + | (I.FPR x, mem) => genregmemcmp(x, mem) + | (mem, I.FPR y) => genmemregcmp(mem, y) + | _ => genmemcmp() + end + + + fun prCopy(dst, src) = + ListPair.app(fn (fd, fs) => + pr(fregToString(fd)^"<-"^fregToString fs^" ")) + (dst, src) + + (* Parallel copy magic. + * For each src registers, we find out + * 1. whether it is the last use, and if so, + * 2. whether it is used more than once. + * If a source is a last and unique use, then we + * can simply rename it to appropriate destination register. + *) + fun fcopy(I.COPY{dst,src,tmp,...}) = let + fun loop([], [], copies, renames) = (copies, renames) + | loop(fd::fds, fs::fss, copies, renames) = + let val fsx = CB.registerNum fs + in if isLastUse fsx then + if A.sub(useTbl,fsx) <> stamp + (* unused *) + then (A.update(useTbl,fsx,stamp); + loop(fds, fss, copies, + if CB.sameColor(fd,fs) then renames + else (fd, fs)::renames) + ) + else loop(fds, fss, (fd, fs)::copies, renames) + else loop(fds, fss, (fd, fs)::copies, renames) + end + | loop _ = error "fcopy.loop" + + (* generate code for the copies *) + fun genCopy([], code) = code + | genCopy((fd, fs)::copies, code) = + let val ss = ST.fp(stack, CB.registerNum fs) + val _ = ST.push(stack, CB.registerNum fd) + val code = I.fldl(ST ss)::code + in genCopy(copies, code) end + + (* perform the renaming; it must be done in parallel! *) + fun renaming(renames) = + let val ss = map (fn (_,fs) => + ST.fp(stack,CB.registerNum fs)) renames + in ListPair.app (fn ((fd,_),ss) => + ST.set(stack,ss,CB.registerNum fd)) + (renames, ss) + end + + (* val _ = if debug then + (ListPair.app (fn (fd, fs) => + pr(fregToString(regmap fd)^"<-"^ + fregToString(regmap fs)^" ") + ) (dst, src); + pr "\n") + else () *) + + val (copies, renames) = loop(dst, src, [], []) + val code = genCopy(copies, code) + in renaming renames; + case tmp of + SOME(I.FPR f) => + (if debug andalso debugDead + then pr("KILLING tmp "^fregToString f^"\n") + else (); + ST.kill(stack, f) + ) + | _ => (); + DONE code + end + | fcopy _ = error "fcopy" + + fun call(instr, return) = let + val code = mark(I.INSTR instr, an)::code + val returnSet = SL.return(SL.uniq(getCell return)) + in + case returnSet of + [] => () + | [r] => ST.push(stack, CB.registerNum r) + | _ => + error "can't return more than one fp argument (yet)"; + KILL_THE_DEAD(List.filter isDead returnSet, code) + end + fun x86trans instr = + (case instr + of I.FMOVE x => (log(); fmove x) + | I.FBINOP x => (log(); fbinop x) + | I.FIBINOP x => (log(); fibinop x) + | I.FUNOP x => (log(); funop x) + | I.FILOAD x => (log(); fiload x) + | I.FCMP x => (log(); fcmp x) + + (* handle calling convention *) + | I.CALL{return, ...} => (log(); call(instr,return)) + + (* + * Catch instructions that absolutely + * should not have been generated at this point. + *) + | (I.FLD1 | I.FLDL2E | I.FLDLG2 | I.FLDLN2 | I.FLDPI | + I.FLDZ | I.FLDL _ | I.FLDS _ | I.FLDT _ | + I.FILD _ | I.FILDL _ | I.FILDLL _ | + I.FENV _ | I.FBINARY _ | I.FIBINARY _ | I.FUNARY _ | + I.FUCOMPP | I.FUCOM _ | I.FUCOMP _ | I.FCOMPP | I.FXCH _ | + I.FCOMI _ | I.FCOMIP _ | I.FUCOMI _ | I.FUCOMIP _ | + I.FSTPL _ | I.FSTPS _ | I.FSTPT _ | I.FSTL _ | I.FSTS _ + ) => bug("Illegal FP instructions") + + (* Other instructions are untouched *) + | instr => FINISH(mark(I.INSTR instr, an)::code) + (*esac*)) + in + case instr + of I.ANNOTATION{a,i} => + trans(stamp, i, a::an, rest, dead, lastUses, code) + | I.COPY{k=CB.FP, ...} => (log(); fcopy instr) + | I.LIVE _ => DONE(mark(instr, an)::code) + | I.INSTR instr => x86trans(instr) + | _ => FINISH(mark(instr, an)::code) + end (* trans *) + + (* + * Check the translation result to see if it matches the original + * code. + *) + fun checkTranslation(stackIn, stackOut, insns) = + let val n = ref(ST.depth stackIn) + fun push() = n := !n + 1 + fun pop() = n := !n - 1 + fun scan(I.INSTR(I.FBINARY{binOp, ...})) = + (case binOp of + ( I.FADDP | I.FSUBP | I.FSUBRP | I.FMULP + | I.FDIVP | I.FDIVRP) => pop() + | _ => () + ) + | scan(I.INSTR(I.FIBINARY{binOp, ...})) = () + | scan(I.INSTR(I.FUNARY I.FPTAN)) = push() + | scan(I.INSTR(I.FUNARY _)) = () + | scan(I.INSTR(I.FLDL(I.ST n))) = push() + | scan(I.INSTR(I.FLDL mem)) = push() + | scan(I.INSTR(I.FLDS mem)) = push() + | scan(I.INSTR(I.FLDT mem)) = push() + | scan(I.INSTR(I.FSTL(I.ST n))) = () + | scan(I.INSTR(I.FSTPL(I.ST n))) = pop() + | scan(I.INSTR(I.FSTL mem)) = () + | scan(I.INSTR(I.FSTS mem)) = () + | scan(I.INSTR(I.FSTPL mem)) = pop() + | scan(I.INSTR(I.FSTPS mem)) = pop() + | scan(I.INSTR(I.FSTPT mem)) = pop() + | scan(I.INSTR(I.FXCH{opnd=i,...})) = () + | scan(I.INSTR(I.FUCOM _)) = () + | scan(I.INSTR(I.FUCOMP _)) = pop() + | scan(I.INSTR(I.FUCOMPP)) = (pop(); pop()) + | scan(I.INSTR(I.FILD mem)) = push() + | scan(I.INSTR(I.FILDL mem)) = push() + | scan(I.INSTR(I.FILDLL mem)) = push() + | scan(I.INSTR(I.CALL{return, ...})) = + (n := 0; (* clear the stack *) + (* Simulate the pushing of arguments *) + let val returnSet = SL.return(SL.uniq(getCell return)) + in app (fn _ => push()) returnSet + end + ) + | scan _ = () + val _ = app scan (rev insns); + val n = !n + val m = ST.depth stackOut + in + if n <> m then + (dump(insns); + bug("Bad translation n="^i2s n^ " expected="^i2s m^"\n") + ) + else () + end + + + (* Dump the initial code *) + val _ = if debug andalso !debugOn then + (pr("-------- block "^i2s blknum^" ----"^ + celllistToString liveIn^" "^ + ST.stackToString stackIn^"\n"); + dump (!insns); + pr("succ="); + app (fn b => pr(i2s b^" ")) (#succ cfg blknum); + pr("\n") + ) + else () + + (* Compute the last uses *) + val lastUse = computeLastUse(blknum, insns, liveOut) + + (* Rewrite the code *) + val (stamp, insns') = loop(stamp, rev(!insns), lastUse, code) + + (* Insert shuffle code at the end if necessary *) + val insns' = shuffleOut(stack, insns', blknum, block, liveOut) + + (* Dump translation *) + val _ = if debug andalso !debugOn then + (pr("-------- translation "^i2s blknum^"----"^ + celllistToString liveIn^" "^ + ST.stackToString stackIn^"\n"); + dump insns'; + pr("-------- done "^i2s blknum^"----"^ + celllistToString liveOut^" "^ + ST.stackToString stack^"\n") + ) + else () + + (* Check if things are okay *) + val _ = if debug andalso sanityCheck then + checkTranslation(stackIn, stack, insns') + else () + + in insns := insns'; (* update the instructions *) + stamp + end (* process *) + + in (* Translate all blocks *) + stamp := C.firstPseudo; + #forall_nodes cfg rewriteAllBlocks; + (* If we found critical edges, then we have to split them... *) + if IntHashTable.numItems edgesToSplit = 0 then Cfg + else repairCriticalEdges(Cfg) + end +end (* functor *) + +end (* local *) diff --git a/MLRISC/x86/mltree/x86.sml b/MLRISC/x86/mltree/x86.sml new file mode 100644 index 0000000..16c7103 --- /dev/null +++ b/MLRISC/x86/mltree/x86.sml @@ -0,0 +1,2018 @@ +(* x86.sml + * + * COPYRIGHT (c) 1998 Bell Laboratories. + * + * This is a revised version that takes into account of + * the extended x86 instruction set, and has better handling of + * non-standard types. I've factored out the integer/floating point + * comparison code, added optimizations for conditional moves. + * The latter generates SETcc and CMOVcc (Pentium Pro only) instructions. + * To avoid problems, I have tried to incorporate as much of + * Lal's original magic incantations as possible. + * + * Some changes: + * + * 1. REMU/REMS are now supported + * 2. COND is supported by generating SETcc and/or CMOVcc; this + * may require at least a Pentium II to work. + * 3. Division by a constant has been optimized. Division by + * a power of 2 generates SHRL or SARL. + * 4. Better addressing mode selection has been implemented. This should + * improve array indexing on SML/NJ. + * 5. Generate testl/testb instead of andl whenever appropriate. This + * is recommended by the Intel Optimization Guide and seems to improve + * boxity tests on SML/NJ. + * + * More changes for floating point: + * A new mode is implemented which generates pseudo 3-address instructions + * for floating point. These instructions are register allocated the + * normal way, with the virtual registers mapped onto a set of pseudo + * %fp registers. These registers are then mapped onto the %st registers + * with a new postprocessing phase. + * + * -- Allen + *) +local + val rewriteMemReg = true (* should we rewrite memRegs *) + val enableFastFPMode = true (* set this to false to disable the mode *) +in + +functor X86 + (structure X86Instr : X86INSTR + structure MLTreeUtils : MLTREE_UTILS + where T = X86Instr.T + structure ExtensionComp : MLTREE_EXTENSION_COMP + where I = X86Instr and T = X86Instr.T + structure MLTreeStream : MLTREE_STREAM + where T = ExtensionComp.T + datatype arch = Pentium | PentiumPro | PentiumII | PentiumIII + val arch : arch ref + val cvti2f : + {ty: X86Instr.T.ty, + src: X86Instr.operand, + (* source operand, guaranteed to be non-memory! *) + an: Annotations.annotations ref (* cluster annotations *) + } -> + {instrs : X86Instr.instruction list,(* the instructions *) + tempMem: X86Instr.operand, (* temporary for CVTI2F *) + cleanup: X86Instr.instruction list (* cleanup code *) + } + (* When the following flag is set, we allocate floating point registers + * directly on the floating point stack + *) + val fast_floating_point : bool ref + ) : sig include MLTREECOMP + val rewriteMemReg : bool + end = +struct + structure I = X86Instr + structure T = I.T + structure TS = ExtensionComp.TS + structure C = I.C + structure Shuffle = Shuffle(I) + structure W32 = Word32 + structure A = MLRiscAnnotations + structure CFG = ExtensionComp.CFG + structure CB = CellsBasis + + type instrStream = (I.instruction,C.cellset,CFG.cfg) TS.stream + type mltreeStream = (T.stm,T.mlrisc list,CFG.cfg) TS.stream + + datatype kind = REAL | INTEGER + + structure Gen = MLTreeGen + (structure T = T + structure Cells = C + val intTy = 32 + val naturalWidths = [32] + datatype rep = SE | ZE | NEITHER + val rep = NEITHER + ) + + fun error msg = MLRiscErrorMsg.error("X86",msg) + + (* Should we perform automatic MemReg translation? + * If this is on, we can avoid doing RewritePseudo phase entirely. + *) + val rewriteMemReg = rewriteMemReg + + (* The following hardcoded *) + fun isMemReg r = rewriteMemReg andalso + let val r = CB.registerNum r + in r >= 8 andalso r < 32 + end + fun isFMemReg r = if enableFastFPMode andalso !fast_floating_point + then let val r = CB.registerNum r + in r >= 8 andalso r < 32 end + else true + val isAnyFMemReg = List.exists (fn r => + let val r = CB.registerNum r + in r >= 8 andalso r < 32 end + ) + + val ST0 = C.ST 0 + val ST7 = C.ST 7 + + val opcodes8 = {INC=I.INCB,DEC=I.DECB,ADD=I.ADDB,SUB=I.SUBB, + NOT=I.NOTB,NEG=I.NEGB, + SHL=I.SHLB,SHR=I.SHRB,SAR=I.SARB, + OR=I.ORB,AND=I.ANDB,XOR=I.XORB} + val opcodes16 = {INC=I.INCW,DEC=I.DECW,ADD=I.ADDW,SUB=I.SUBW, + NOT=I.NOTW,NEG=I.NEGW, + SHL=I.SHLW,SHR=I.SHRW,SAR=I.SARW, + OR=I.ORW,AND=I.ANDW,XOR=I.XORW} + val opcodes32 = {INC=I.INCL,DEC=I.DECL,ADD=I.ADDL,SUB=I.SUBL, + NOT=I.NOTL,NEG=I.NEGL, + SHL=I.SHLL,SHR=I.SHRL,SAR=I.SARL, + OR=I.ORL,AND=I.ANDL,XOR=I.XORL} + + (* + * The code generator + *) + fun selectInstructions + (instrStream as + TS.S.STREAM{emit=emitInstruction,defineLabel,entryLabel,pseudoOp, + annotation,getAnnotations,beginCluster,endCluster,exitBlock,comment,...}) = + let + val emit = emitInstruction o I.INSTR + exception EA + + (* label where a trap is generated -- one per cluster *) + val trapLabel = ref (NONE: (I.instruction * Label.label) option) + + (* flag floating point generation *) + val floatingPointUsed = ref false + + (* effective address of an integer register *) + fun IntReg r = if isMemReg r then I.MemReg r else I.Direct r + and RealReg r = if isFMemReg r then I.FDirect r else I.FPR r + + (* Add an overflow trap *) + fun trap() = + let + val jmp = + case !trapLabel of + NONE => let val label = Label.label "trap" () + val jmp = + I.ANNOTATION{i=I.jcc{cond=I.O, + opnd=I.ImmedLabel(T.LABEL label)}, + a=MLRiscAnnotations.BRANCHPROB (Probability.unlikely)} + in trapLabel := SOME(jmp, label); jmp end + | SOME(jmp, _) => jmp + in emitInstruction jmp end + + val newReg = C.newReg + val newFreg = C.newFreg + + fun fsize 32 = I.FP32 + | fsize 64 = I.FP64 + | fsize 80 = I.FP80 + | fsize _ = error "fsize" + + (* mark an expression with a list of annotations *) + fun mark'(i,[]) = emitInstruction(i) + | mark'(i,a::an) = mark'(I.ANNOTATION{i=i,a=a},an) + + (* annotate an expression and emit it *) + fun mark(i,an) = mark'(I.INSTR i,an) + + val emits = app emitInstruction + + (* emit parallel copies for integers + * Translates parallel copies that involve memregs into + * individual copies. + *) + fun copy([], [], an) = () + | copy(dst, src, an) = + let fun mvInstr{dst as I.MemReg rd, src as I.MemReg rs} = + if CB.sameColor(rd,rs) then [] else + let val tmpR = I.Direct(newReg()) + in [I.move{mvOp=I.MOVL, src=src, dst=tmpR}, + I.move{mvOp=I.MOVL, src=tmpR, dst=dst}] + end + | mvInstr{dst=I.Direct rd, src=I.Direct rs} = + if CB.sameColor(rd,rs) then [] + else [I.COPY{k=CB.GP, sz=32, dst=[rd], src=[rs], tmp=NONE}] + | mvInstr{dst, src} = [I.move{mvOp=I.MOVL, src=src, dst=dst}] + in + emits (Shuffle.shuffle{mvInstr=mvInstr, ea=IntReg} + {tmp=SOME(I.Direct(newReg())), + dst=dst, src=src}) + end + + (* conversions *) + val itow = Word.fromInt + val wtoi = Word.toInt + fun toInt32 i = T.I.toInt32(32, i) + val w32toi32 = Word32.toLargeIntX + val i32tow32 = Word32.fromLargeInt + + (* One day, this is going to bite us when precision(LargeInt)>32 *) + fun wToInt32 w = Int32.fromLarge(Word32.toLargeIntX w) + + (* some useful registers *) + val eax = I.Direct(C.eax) + val ecx = I.Direct(C.ecx) + val edx = I.Direct(C.edx) + + fun immedLabel lab = I.ImmedLabel(T.LABEL lab) + + (* Is the expression zero? *) + fun isZero(T.LI z) = z = 0 + | isZero(T.MARK(e,a)) = isZero e + | isZero _ = false + (* Does the expression set the zero bit? + * WARNING: we assume these things are not optimized out! + *) + fun setZeroBit(T.ANDB _) = true + | setZeroBit(T.ORB _) = true + | setZeroBit(T.XORB _) = true + | setZeroBit(T.SRA _) = true + | setZeroBit(T.SRL _) = true + | setZeroBit(T.SLL _) = true + | setZeroBit(T.SUB _) = true + | setZeroBit(T.ADDT _) = true + | setZeroBit(T.SUBT _) = true + | setZeroBit(T.MARK(e, _)) = setZeroBit e + | setZeroBit _ = false + + fun setZeroBit2(T.ANDB _) = true + | setZeroBit2(T.ORB _) = true + | setZeroBit2(T.XORB _) = true + | setZeroBit2(T.SRA _) = true + | setZeroBit2(T.SRL _) = true + | setZeroBit2(T.SLL _) = true + | setZeroBit2(T.ADD(32, _, _)) = true (* can't use leal! *) + | setZeroBit2(T.SUB _) = true + | setZeroBit2(T.ADDT _) = true + | setZeroBit2(T.SUBT _) = true + | setZeroBit2(T.MARK(e, _)) = setZeroBit2 e + | setZeroBit2 _ = false + + (* emit parallel copies for floating point + * Normal version. + *) + fun fcopy'(fty, [], [], _) = () + | fcopy'(fty, dst as [_], src as [_], an) = + mark'(I.COPY{k=CB.FP, sz=fty, dst=dst,src=src,tmp=NONE}, an) + | fcopy'(fty, dst, src, an) = + mark'(I.COPY{k=CB.FP, sz=fty, dst=dst,src=src,tmp=SOME(I.FDirect(newFreg()))}, an) + + (* emit parallel copies for floating point. + * Fast version. + * Translates parallel copies that involve memregs into + * individual copies. + *) + + fun fcopy''(fty, [], [], _) = () + | fcopy''(fty, dst, src, an) = + if true orelse isAnyFMemReg dst orelse isAnyFMemReg src then + let val fsize = fsize fty + fun mvInstr{dst, src} = [I.fmove{fsize=fsize, src=src, dst=dst}] + in + emits (Shuffle.shuffle{mvInstr=mvInstr, ea=RealReg} + {tmp=case dst of + [_] => NONE + | _ => SOME(I.FPR(newReg())), + dst=dst, src=src}) + end + else + mark'(I.COPY{k=CB.FP, sz=fty, dst=dst, + src=src,tmp= + case dst of + [_] => NONE + | _ => SOME(I.FPR(newFreg()))}, an) + + fun fcopy x = if enableFastFPMode andalso !fast_floating_point + then fcopy'' x else fcopy' x + + (* Translates MLTREE condition code to x86 condition code *) + fun cond T.LT = I.LT | cond T.LTU = I.B + | cond T.LE = I.LE | cond T.LEU = I.BE + | cond T.EQ = I.EQ | cond T.NE = I.NE + | cond T.GE = I.GE | cond T.GEU = I.AE + | cond T.GT = I.GT | cond T.GTU = I.A + | cond cc = error(concat["cond(", T.Basis.condToString cc, ")"]) + + fun zero dst = emit(I.BINARY{binOp=I.XORL, src=dst, dst=dst}) + + (* Move and annotate *) + fun move'(src as I.Direct s, dst as I.Direct d, an) = + if CB.sameColor(s,d) then () + else mark'(I.COPY{k=CB.GP, sz=32, dst=[d], src=[s], tmp=NONE}, an) + | move'(I.Immed 0, dst as I.Direct d, an) = + mark(I.BINARY{binOp=I.XORL, src=dst, dst=dst}, an) + | move'(src, dst, an) = mark(I.MOVE{mvOp=I.MOVL, src=src, dst=dst}, an) + + (* Move only! *) + fun move(src, dst) = move'(src, dst, []) + + val readonly = I.Region.readonly + + (* + * Compute an effective address. + *) + fun address(ea, mem) = let + (* Keep building a bigger and bigger effective address expressions + * The input is a list of trees + * b -- base + * i -- index + * s -- scale + * d -- immed displacement + *) + fun doEA([], b, i, s, d) = makeAddressingMode(b, i, s, d) + | doEA(t::trees, b, i, s, d) = + (case t of + T.LI n => doEAImmed(trees, toInt32 n, b, i, s, d) + | T.CONST _ => doEALabel(trees, t, b, i, s, d) + | T.LABEL _ => doEALabel(trees, t, b, i, s, d) + | T.LABEXP le => doEALabel(trees, le, b, i, s, d) + | T.ADD(32, t1, t2 as T.REG(_,r)) => + if isMemReg r then doEA(t2::t1::trees, b, i, s, d) + else doEA(t1::t2::trees, b, i, s, d) + | T.ADD(32, t1, t2) => doEA(t1::t2::trees, b, i, s, d) + | T.SUB(32, t1, T.LI n) => + doEA(t1::T.LI(T.I.NEG(32,n))::trees, b, i, s, d) + | T.SLL(32, t1, T.LI n) => let + val n = T.I.toInt(32, n) + in + case n + of 0 => displace(trees, t1, b, i, s, d) + | 1 => indexed(trees, t1, t, 1, b, i, s, d) + | 2 => indexed(trees, t1, t, 2, b, i, s, d) + | 3 => indexed(trees, t1, t, 3, b, i, s, d) + | _ => displace(trees, t, b, i, s, d) + end + | t => displace(trees, t, b, i, s, d) + ) + + (* Add an immed constant *) + and doEAImmed(trees, 0, b, i, s, d) = doEA(trees, b, i, s, d) + | doEAImmed(trees, n, b, i, s, I.Immed m) = + doEA(trees, b, i, s, I.Immed(n+m)) + | doEAImmed(trees, n, b, i, s, I.ImmedLabel le) = + doEA(trees, b, i, s, + I.ImmedLabel(T.ADD(32,le,T.LI(T.I.fromInt32(32, n))))) + | doEAImmed(trees, n, b, i, s, _) = error "doEAImmed" + + (* Add a label expression *) + and doEALabel(trees, le, b, i, s, I.Immed 0) = + doEA(trees, b, i, s, I.ImmedLabel le) + | doEALabel(trees, le, b, i, s, I.Immed m) = + doEA(trees, b, i, s, + I.ImmedLabel(T.ADD(32,le,T.LI(T.I.fromInt32(32, m)))) + handle Overflow => error "doEALabel: constant too large") + | doEALabel(trees, le, b, i, s, I.ImmedLabel le') = + doEA(trees, b, i, s, I.ImmedLabel(T.ADD(32,le,le'))) + | doEALabel(trees, le, b, i, s, _) = error "doEALabel" + + and makeAddressingMode (NONE, NONE, _, disp) = disp + | makeAddressingMode (SOME base, NONE, _, disp) = + I.Displace{base=base, disp=disp, mem=mem} + | makeAddressingMode (base, SOME index, scale, disp) = + I.Indexed{base=base, index=index, scale=scale, + disp=disp, mem=mem} + + (* generate code for tree and ensure that it is not in %esp *) + and exprNotEsp tree = + let val r = expr tree + in if CB.sameColor(r, C.esp) then + let val tmp = newReg() + in move(I.Direct r, I.Direct tmp); tmp end + else r + end + + (* Add a base register *) + and displace(trees, t, NONE, i, s, d) = (* no base yet *) + doEA(trees, SOME(expr t), i, s, d) + | displace(trees, t, b as SOME base, NONE, _, d) = (* no index *) + (* make t the index, but make sure that it is not %esp! *) + let val i = expr t + in if CB.sameColor(i, C.esp) then + (* swap base and index *) + if CB.sameColor(base, C.esp) then + doEA(trees, SOME i, b, 0, d) + else (* base and index = %esp! *) + let val index = newReg() + in move(I.Direct i, I.Direct index); + doEA(trees, b, SOME index, 0, d) + end + else + doEA(trees, b, SOME i, 0, d) + end + | displace(trees, t, SOME base, i, s, d) = (* base and index *) + let val b = expr(T.ADD(32,T.REG(32,base),t)) + in doEA(trees, SOME b, i, s, d) end + + (* Add an indexed register *) + and indexed(trees, t, t0, scale, b, NONE, _, d) = (* no index yet *) + doEA(trees, b, SOME(exprNotEsp t), scale, d) + | indexed(trees, _, t0, _, NONE, i, s, d) = (* no base *) + doEA(trees, SOME(expr t0), i, s, d) + | indexed(trees, _, t0, _, SOME base, i, s, d) = (*base and index*) + let val b = expr(T.ADD(32, t0, T.REG(32, base))) + in doEA(trees, SOME b, i, s, d) end + + in case doEA([ea], NONE, NONE, 0, I.Immed 0) of + I.Immed _ => raise EA + | I.ImmedLabel le => I.LabelEA le + | ea => ea + end (* address *) + + (* reduce an expression into an operand *) + and operand(T.LI i) = I.Immed(toInt32(i)) + | operand(x as (T.CONST _ | T.LABEL _)) = I.ImmedLabel x + | operand(T.LABEXP le) = I.ImmedLabel le + | operand(T.REG(_,r)) = IntReg r + | operand(T.LOAD(32,ea,mem)) = address(ea, mem) + | operand(t) = I.Direct(expr t) + + and moveToReg(opnd) = + let val dst = I.Direct(newReg()) + in move(opnd, dst); dst + end + + and reduceOpnd(I.Direct r) = r + | reduceOpnd opnd = + let val dst = newReg() + in move(opnd, I.Direct dst); dst + end + + (* ensure that the operand is either an immed or register *) + and immedOrReg(opnd as I.Displace _) = moveToReg opnd + | immedOrReg(opnd as I.Indexed _) = moveToReg opnd + | immedOrReg(opnd as I.MemReg _) = moveToReg opnd + | immedOrReg(opnd as I.LabelEA _) = moveToReg opnd + | immedOrReg opnd = opnd + + and isImmediate(I.Immed _) = true + | isImmediate(I.ImmedLabel _) = true + | isImmediate _ = false + + and regOrMem opnd = if isImmediate opnd then moveToReg opnd else opnd + + and isMemOpnd opnd = + (case opnd of + I.Displace _ => true + | I.Indexed _ => true + | I.MemReg _ => true + | I.LabelEA _ => true + | I.FDirect f => true + | _ => false + ) + + (* + * Compute an integer expression and put the result in + * the destination register rd. + *) + and doExpr(exp, rd : CB.cell, an) = + let val rdOpnd = IntReg rd + + fun equalRd(I.Direct r) = CB.sameColor(r, rd) + | equalRd(I.MemReg r) = CB.sameColor(r, rd) + | equalRd _ = false + + (* Emit a binary operator. If the destination is + * a memReg, do something smarter. + *) + fun genBinary(binOp, opnd1, opnd2) = + if isMemReg rd andalso + (isMemOpnd opnd1 orelse isMemOpnd opnd2) orelse + equalRd(opnd2) + then + let val tmpR = newReg() + val tmp = I.Direct tmpR + in move(opnd1, tmp); + mark(I.BINARY{binOp=binOp, src=opnd2, dst=tmp}, an); + move(tmp, rdOpnd) + end + else + (move(opnd1, rdOpnd); + mark(I.BINARY{binOp=binOp, src=opnd2, dst=rdOpnd}, an) + ) + + (* Generate a binary operator; it may commute *) + fun binaryComm(binOp, e1, e2) = + let val (opnd1, opnd2) = + case (operand e1, operand e2) of + (x as I.Immed _, y) => (y, x) + | (x as I.ImmedLabel _, y) => (y, x) + | (x, y as I.Direct _) => (y, x) + | (x, y) => (x, y) + in genBinary(binOp, opnd1, opnd2) + end + + (* Generate a binary operator; non-commutative *) + fun binary(binOp, e1, e2) = + genBinary(binOp, operand e1, operand e2) + + (* Generate a unary operator *) + fun unary(unOp, e) = + let val opnd = operand e + in if isMemReg rd andalso isMemOpnd opnd then + let val tmp = I.Direct(newReg()) + in move(opnd, tmp); move(tmp, rdOpnd) + end + else move(opnd, rdOpnd); + mark(I.UNARY{unOp=unOp, opnd=rdOpnd}, an) + end + + (* Generate shifts; the shift + * amount must be a constant or in %ecx *) + fun shift(opcode, e1, e2) = + let val (opnd1, opnd2) = (operand e1, operand e2) + in case opnd2 of + I.Immed _ => genBinary(opcode, opnd1, opnd2) + | _ => + if equalRd(opnd2) then + let val tmpR = newReg() + val tmp = I.Direct tmpR + in move(opnd1, tmp); + move(opnd2, ecx); + mark(I.BINARY{binOp=opcode, src=ecx, dst=tmp},an); + move(tmp, rdOpnd) + end + else + (move(opnd1, rdOpnd); + move(opnd2, ecx); + mark(I.BINARY{binOp=opcode, src=ecx, dst=rdOpnd},an) + ) + end + + (* Division or remainder: divisor must be in %edx:%eax pair *) + fun divrem (signed, e1, e2, resultReg) = + let val (opnd1, opnd2) = (operand e1, operand e2) + val _ = move(opnd1, eax) + val oper = if signed then (emit(I.CDQ); I.IDIVL1) + else (zero edx; I.DIVL1) + in mark(I.MULTDIV{multDivOp=oper, src=regOrMem opnd2},an); + move(resultReg, rdOpnd) +(* NOTE: on the x86, the IDIV instruction traps on overflow + if overflow then trap() else () +*) + end + + (* division with rounding towards negative infinity *) + fun divinf0 (e1, e2) = let + val o1 = operand e1 + val o2 = operand e2 + val l = Label.anon () + in + move (o1, eax); + emit I.CDQ; + mark (I.MULTDIV { multDivOp = I.IDIVL1, src = regOrMem o2 }, + an); +(* NOTE: on the x86, the IDIV instruction traps on overflow + if overflow then trap() else (); +*) + app emit [I.CMPL { lsrc = edx, rsrc = I.Immed 0 }, + I.JCC { cond = I.EQ, opnd = immedLabel l }, + I.BINARY { binOp = I.XORL, + src = regOrMem o2, + dst = edx }, + I.JCC { cond = I.GE, opnd = immedLabel l }, + I.UNARY { unOp = I.DECL, opnd = eax }]; + defineLabel l; + move (eax, rdOpnd) + end + + (* analyze for power-of-two-ness *) + fun analyze i' = let + val i = toInt32 i' + in + let val (isneg, a, w) = + if i >= 0 then (false, i, T.I.toWord32 (32, i')) + else (true, ~i, T.I.toWord32 (32, T.I.NEG (32, i'))) + fun log2 (0w1, p) = p + | log2 (w, p) = log2 (W32.>> (w, 0w1), p + 1) + in + if w > 0w1 andalso W32.andb (w - 0w1, w) = 0w0 then + (i, SOME (isneg, a, + T.LI (T.I.fromInt32 (32, log2 (w, 0))))) + else (i, NONE) + end handle _ => (i, NONE) + end + + (* Division by a power of two when rounding to neginf is the + * same as an arithmetic right shift. *) + fun divinf (e1, e2 as T.LI n') = + (case analyze n' of + (_, NONE) => divinf0 (e1, e2) + | (_, SOME (false, _, p)) => + shift (I.SARL, T.REG (32, expr e1), p) + | (_, SOME (true, _, p)) => let + val reg = expr e1 + in + emit(I.UNARY { unOp = I.NEGL, opnd = I.Direct reg }); + shift (I.SARL, T.REG (32, reg), p) + end) + | divinf (e1, e2) = divinf0 (e1, e2) + + fun reminf0 (e1, e2) = let + val o1 = operand e1 + val o2 = operand e2 + val l = Label.anon () + in + move (o1, eax); + emit I.CDQ; + mark (I.MULTDIV { multDivOp = I.IDIVL1, src = regOrMem o2 }, + an); + app emit [I.CMPL { lsrc = edx, rsrc = I.Immed 0 }, + I.JCC { cond = I.EQ, opnd = immedLabel l }]; + move (edx, eax); + app emit [I.BINARY { binOp = I.XORL, + src = regOrMem o2, dst = eax }, + I.JCC { cond = I.GE, opnd = immedLabel l }, + I.BINARY { binOp = I.ADDL, + src = regOrMem o2, dst = edx }]; + defineLabel l; + move (edx, rdOpnd) + end + + (* n mod (power-of-2) corresponds to a bitmask (AND). + * If the power is negative, then we must first negate + * the argument and then again negate the result. *) + fun reminf (e1, e2 as T.LI n') = + (case analyze n' of + (_, NONE) => reminf0 (e1, e2) + | (_, SOME (false, a, _)) => + binaryComm (I.ANDL, e1, + T.LI (T.I.fromInt32 (32, a - 1))) + | (_, SOME (true, a, _)) => let + val r1 = expr e1 + val o1 = I.Direct r1 + in + emit (I.UNARY { unOp = I.NEGL, opnd = o1 }); + emit (I.BINARY { binOp = I.ANDL, + src = I.Immed (a - 1), + dst = o1 }); + unary (I.NEGL, T.REG (32, r1)) + end) + | reminf (e1, e2) = reminf0 (e1, e2) + + (* Optimize the special case for division *) + fun divide (signed, e1, e2 as T.LI n') = + (case analyze n' of + (n, SOME (isneg, a, p)) => + if signed then + let val label = Label.anon () + val reg1 = expr e1 + val opnd1 = I.Direct reg1 + in + if isneg then + emit (I.UNARY { unOp = I.NEGL, + opnd = opnd1 }) + else if setZeroBit e1 then () + else emit (I.CMPL { lsrc = opnd1, + rsrc = I.Immed 0 }); + emit (I.JCC { cond = I.GE, + opnd = immedLabel label }); + emit (if a = 2 then + I.UNARY { unOp = I.INCL, + opnd = opnd1 } + else + I.BINARY { binOp = I.ADDL, + src = I.Immed (a - 1), + dst = opnd1 }); + defineLabel label; + shift (I.SARL, T.REG (32, reg1), p) + end + else shift (I.SHRL, e1, p) + | (n, NONE) => divrem(signed, e1, e2, eax)) + | divide (signed, e1, e2) = divrem (signed, e1, e2, eax) + + (* rem never causes overflow *) + fun rem (signed, e1, e2 as T.LI n') = + (case analyze n' of + (n, SOME (isneg, a, _)) => + if signed then + (* The following logic should work uniformely + * for both isneg and not isneg. It only uses + * the absolute value (a) of the divisor. + * Here is the formula: + * let p be a power of two and a = abs(p): + * + * x % p = x - ((x < 0 ? x + a - 1 : x) & (-a)) + * + * (That's what GCC seems to do.) + *) + let val r1 = expr e1 + val o1 = I.Direct r1 + val rt = newReg () + val tmp = I.Direct rt + val l = Label.anon () + in + move (o1, tmp); + if setZeroBit e1 then () + else emit (I.CMPL { lsrc = o1, + rsrc = I.Immed 0 }); + emit (I.JCC { cond = I.GE, + opnd = immedLabel l }); + emit (I.BINARY { binOp = I.ADDL, + src = I.Immed (a - 1), + dst = tmp }); + defineLabel l; + emit (I.BINARY { binOp = I.ANDL, + src = I.Immed (~a), + dst = tmp }); + binary (I.SUBL, T.REG (32, r1), T.REG (32, rt)) + end + else + if isneg then + (* this is really strange... *) + divrem (false, e1, e2, edx) + else + binaryComm (I.ANDL, e1, + T.LI (T.I.fromInt32 (32, n - 1))) + | (_, NONE) => divrem (signed, e1, e2, edx)) + | rem(signed, e1, e2) = divrem(signed, e1, e2, edx) + + (* Makes sure the destination must be a register *) + fun dstMustBeReg f = + if isMemReg rd then + let val tmpR = newReg() + val tmp = I.Direct(tmpR) + in f(tmpR, tmp); move(tmp, rdOpnd) end + else f(rd, rdOpnd) + + (* unsigned integer multiplication *) + fun uMultiply0 (e1, e2) = + (* note e2 can never be (I.Direct edx) *) + (move(operand e1, eax); + mark(I.MULTDIV{multDivOp=I.MULL1, + src=regOrMem(operand e2)},an); + move(eax, rdOpnd) + ) + + fun uMultiply (e1, e2 as T.LI n') = + (case analyze n' of + (_, SOME (false, _, p)) => shift (I.SHLL, e1, p) + | _ => uMultiply0 (e1, e2)) + | uMultiply (e1 as T.LI _, e2) = uMultiply (e2, e1) + | uMultiply (e1, e2) = uMultiply0 (e1, e2) + + (* signed integer multiplication: + * The only forms that are allowed that also sets the + * OF and CF flags are: + * + * (dst) (src1) (src2) + * imul r32, r32/m32, imm8 + * (dst) (src) + * imul r32, imm8 + * imul r32, imm32 + * imul r32, r32/m32 + * Note: destination must be a register! + *) + fun multiply (e1, e2) = + dstMustBeReg(fn (rd, rdOpnd) => + let fun doit(i1 as I.Immed _, i2 as I.Immed _) = + (move(i1, rdOpnd); + mark(I.BINARY{binOp=I.IMULL, dst=rdOpnd, src=i2},an)) + | doit(rm, i2 as I.Immed _) = doit(i2, rm) + | doit(imm as I.Immed(i), rm) = + mark(I.MUL3{dst=rd, src1=rm, src2=i},an) + | doit(r1 as I.Direct _, r2 as I.Direct _) = + (move(r1, rdOpnd); + mark(I.BINARY{binOp=I.IMULL, dst=rdOpnd, src=r2},an)) + | doit(r1 as I.Direct _, rm) = + (move(r1, rdOpnd); + mark(I.BINARY{binOp=I.IMULL, dst=rdOpnd, src=rm},an)) + | doit(rm, r as I.Direct _) = doit(r, rm) + | doit(rm1, rm2) = + if equalRd rm2 then + let val tmpR = newReg() + val tmp = I.Direct tmpR + in move(rm1, tmp); + mark(I.BINARY{binOp=I.IMULL, dst=tmp, src=rm2},an); + move(tmp, rdOpnd) + end + else + (move(rm1, rdOpnd); + mark(I.BINARY{binOp=I.IMULL, dst=rdOpnd, src=rm2},an) + ) + val (opnd1, opnd2) = (operand e1, operand e2) + in doit(opnd1, opnd2) + end + ) + + fun multiply_notrap (e1, e2 as T.LI n') = + (case analyze n' of + (_, SOME (isneg, _, p)) => let + val r1 = expr e1 + val o1 = I.Direct r1 + in + if isneg then + emit (I.UNARY { unOp = I.NEGL, opnd = o1 }) + else (); + shift (I.SHLL, T.REG (32, r1), p) + end + | _ => multiply (e1, e2)) + | multiply_notrap (e1 as T.LI _, e2) = multiply_notrap (e2, e1) + | multiply_notrap (e1, e2) = multiply (e1, e2) + + (* Emit a load instruction; makes sure that the destination + * is a register + *) + fun genLoad(mvOp, ea, mem) = + dstMustBeReg(fn (_, dst) => + mark(I.MOVE{mvOp=mvOp, src=address(ea, mem), dst=dst},an)) + + (* Generate a zero extended loads *) + fun load8(ea, mem) = genLoad(I.MOVZBL, ea, mem) + fun load16(ea, mem) = genLoad(I.MOVZWL, ea, mem) + fun load8s(ea, mem) = genLoad(I.MOVSBL, ea, mem) + fun load16s(ea, mem) = genLoad(I.MOVSWL, ea, mem) + fun load32(ea, mem) = genLoad(I.MOVL, ea, mem) + fun load64(ea, mem) = genLoad(I.MOVL, ea, mem) (* XXX64 *) + + (* Generate a sign extended loads *) + + (* Generate setcc instruction: + * semantics: MV(rd, COND(_, T.CMP(ty, cc, t1, t2), yes, no)) + * Bug, if eax is either t1 or t2 then problem will occur!!! + * Note that we have to use eax as the destination of the + * setcc because it only works on the registers + * %al, %bl, %cl, %dl and %[abcd]h. The last four registers + * are inaccessible in 32 bit mode. + *) + fun setcc(ty, cc, t1, t2, yes, no) = + let val (cc, yes, no) = + if yes > no then (cc, yes, no) + else (T.Basis.negateCond cc, no, yes) + in (* Clear the destination first. + * This this because stupid SETcc + * only writes to the low order + * byte. That's Intel architecture, folks. + *) + case (yes, no, cc) of + (1, 0, T.LT) => + let val tmp = I.Direct(expr(T.SUB(32,t1,t2))) + in move(tmp, rdOpnd); + emit(I.BINARY{binOp=I.SHRL,src=I.Immed 31,dst=rdOpnd}) + end + | (1, 0, T.GT) => + let val tmp = I.Direct(expr(T.SUB(32,t1,t2))) + in emit(I.UNARY{unOp=I.NOTL,opnd=tmp}); + move(tmp, rdOpnd); + emit(I.BINARY{binOp=I.SHRL,src=I.Immed 31,dst=rdOpnd}) + end + | (1, 0, _) => (* normal case *) + let val cc = cmp(true, ty, cc, t1, t2, []) + in mark(I.SET{cond=cond cc, opnd=eax}, an); + emit(I.BINARY{binOp=I.ANDL,src=I.Immed 255, dst=eax}); + move(eax, rdOpnd) + end + | (C1, C2, _) => + (* general case; + * from the Intel optimization guide p3-5 + *) + let val _ = zero eax; + val cc = cmp(true, ty, cc, t1, t2, []) + fun c19 (base, scale) = let + val addr = I.Indexed{base=base, + index=C.eax, + scale=scale, + disp=I.Immed C2, + mem=readonly} + val tmpR = newReg() + val tmp = I.Direct tmpR + in emit(I.SET{cond=cond cc, opnd=eax}); + mark(I.LEA{r32=tmpR, addr=addr}, an); + move(tmp, rdOpnd) + end + in + case C1-C2 of + 1 => c19 (NONE, 0) + | 2 => c19 (NONE, 1) + | 3 => c19 (SOME C.eax, 1) + | 4 => c19 (NONE, 2) + | 5 => c19 (SOME C.eax, 2) + | 8 => c19 (NONE, 3) + | 9 => c19 (SOME C.eax, 3) + | D => + (emit(I.SET{cond=cond(T.Basis.negateCond cc), + opnd=eax}); + emit(I.UNARY{unOp=I.DECL, opnd=eax}); + emit(I.BINARY{binOp=I.ANDL, + src=I.Immed D, dst=eax}); + if C2 = 0 then + move(eax, rdOpnd) + else + let val tmpR = newReg() + val tmp = I.Direct tmpR + in mark(I.LEA{addr= + I.Displace{ + base=C.eax, + disp=I.Immed C2, + mem=readonly}, + r32=tmpR}, an); + move(tmp, rdOpnd) + end) + end + end (* setcc *) + + (* Generate cmovcc instruction. + * on Pentium Pro and Pentium II only + *) + fun cmovcc(ty, cc, t1, t2, yes, no) = + let fun genCmov(dstR, _) = + let val _ = doExpr(no, dstR, []) (* false branch *) + val cc = cmp(true, ty, cc, t1, t2, []) (* compare *) + in mark(I.CMOV{cond=cond cc, src=regOrMem(operand yes), + dst=dstR}, an) + end + in dstMustBeReg genCmov + end + + fun unknownExp exp = doExpr(Gen.compileRexp exp, rd, an) + + (* Add n to rd *) + fun addN n = + let val n = operand n + val src = if isMemReg rd then immedOrReg n else n + in mark(I.BINARY{binOp=I.ADDL, src=src, dst=rdOpnd}, an) end + + (* Generate addition *) + fun addition(e1, e2) = + case e1 of + T.REG(_,rs) => if CB.sameColor(rs,rd) then addN e2 + else addition1(e1,e2) + | _ => addition1(e1,e2) + and addition1(e1, e2) = + case e2 of + T.REG(_,rs) => if CB.sameColor(rs,rd) then addN e1 + else addition2(e1,e2) + | _ => addition2(e1,e2) + and addition2(e1,e2) = + (dstMustBeReg(fn (dstR, _) => + mark(I.LEA{r32=dstR, addr=address(exp, readonly)}, an)) + handle EA => binaryComm(I.ADDL, e1, e2)) + + + in case exp of + T.REG(_,rs) => + if isMemReg rs andalso isMemReg rd then + let val tmp = I.Direct(newReg()) + in move'(I.MemReg rs, tmp, an); + move'(tmp, rdOpnd, []) + end + else move'(IntReg rs, rdOpnd, an) + | T.LI z => let + val n = toInt32 z + in + if n=0 then + (* As per Fermin's request, special optimization for rd := 0. + * Currently we don't bother with the size. + *) + if isMemReg rd then move'(I.Immed 0, rdOpnd, an) + else mark(I.BINARY{binOp=I.XORL, src=rdOpnd, dst=rdOpnd}, an) + else + move'(I.Immed(n), rdOpnd, an) + end + | (T.CONST _ | T.LABEL _) => + move'(I.ImmedLabel exp, rdOpnd, an) + | T.LABEXP le => move'(I.ImmedLabel le, rdOpnd, an) + + (* 32-bit addition *) + | T.ADD(32, e1, e2 as T.LI n) => let + val n = toInt32 n + in + case n + of 1 => unary(I.INCL, e1) + | ~1 => unary(I.DECL, e1) + | _ => addition(e1, e2) + end + | T.ADD(32, e1 as T.LI n, e2) => let + val n = toInt32 n + in + case n + of 1 => unary(I.INCL, e2) + | ~1 => unary(I.DECL, e2) + | _ => addition(e1, e2) + end + | T.ADD(32, e1, e2) => addition(e1, e2) + + (* 32-bit addition but set the flag! + * This is a stupid hack for now. + *) + | T.ADD(0, e, e1 as T.LI n) => let + val n = T.I.toInt(32, n) + in + if n=1 then unary(I.INCL, e) + else if n = ~1 then unary(I.DECL, e) + else binaryComm(I.ADDL, e, e1) + end + | T.ADD(0, e1 as T.LI n, e) => let + val n = T.I.toInt(32, n) + in + if n=1 then unary(I.INCL, e) + else if n = ~1 then unary(I.DECL, e) + else binaryComm(I.ADDL, e1, e) + end + | T.ADD(0, e1, e2) => binaryComm(I.ADDL, e1, e2) + + (* 32-bit subtraction *) + | T.SUB(32, e1, e2 as T.LI n) => let + val n = toInt32 n + in + case n + of 0 => doExpr(e1, rd, an) + | 1 => unary(I.DECL, e1) + | ~1 => unary(I.INCL, e1) + | _ => binary(I.SUBL, e1, e2) + end + | T.SUB(32, e1 as T.LI n, e2) => + if n = 0 then unary(I.NEGL, e2) + else binary(I.SUBL, e1, e2) + | T.SUB(32, e1, e2) => binary(I.SUBL, e1, e2) + + | T.MULU(32, x, y) => uMultiply(x, y) + | T.DIVU(32, x, y) => divide(false, x, y) + | T.REMU(32, x, y) => rem(false, x, y) + + | T.MULS(32, x, y) => multiply_notrap (x, y) + | T.DIVS(T.DIV_TO_ZERO, 32, x, y) => divide(true, x, y) + | T.DIVS(T.DIV_TO_NEGINF, 32, x, y) => divinf (x, y) + | T.REMS(T.DIV_TO_ZERO, 32, x, y) => rem(true, x, y) + | T.REMS(T.DIV_TO_NEGINF, 32, x, y) => reminf (x, y) + + | T.ADDT(32, x, y) => (binaryComm(I.ADDL, x, y); trap()) + | T.SUBT(32, T.LI 0, y) => (unary(I.NEGL, y); trap()) + | T.SUBT(32, x, y) => (binary(I.SUBL, x, y); trap()) + | T.MULT(32, x, y) => (multiply (x, y); trap ()) + | T.DIVT(T.DIV_TO_ZERO, 32, x, y) => divide(true, x, y) + | T.DIVT(T.DIV_TO_NEGINF, 32, x, y) => divinf (x, y) + + | T.ANDB(32, x, y) => binaryComm(I.ANDL, x, y) + | T.ORB(32, x, y) => binaryComm(I.ORL, x, y) + | T.XORB(32, x, y) => binaryComm(I.XORL, x, y) + | T.NOTB(32, x) => unary(I.NOTL, x) + + | T.SRA(32, x, y) => shift(I.SARL, x, y) + | T.SRL(32, x, y) => shift(I.SHRL, x, y) + | T.SLL(32, x, y) => shift(I.SHLL, x, y) + + | T.LOAD(8, ea, mem) => load8(ea, mem) + | T.LOAD(16, ea, mem) => load16(ea, mem) + | T.LOAD(32, ea, mem) => load32(ea, mem) + | T.LOAD(64, ea, mem) => load64(ea, mem) + + | T.SX(32,8,T.LOAD(8,ea,mem)) => load8s(ea, mem) + | T.SX(32,16,T.LOAD(16,ea,mem)) => load16s(ea, mem) + | T.ZX(32,8,T.LOAD(8,ea,mem)) => load8(ea, mem) + | T.ZX(32,16,T.LOAD(16,ea,mem)) => load16(ea, mem) + + | T.COND(32, T.CMP(ty, cc, t1, t2), y as T.LI yes, n as T.LI no) => + (case !arch of (* PentiumPro and higher has CMOVcc *) + Pentium => setcc(ty, cc, t1, t2, toInt32 yes, toInt32 no) + | _ => cmovcc(ty, cc, t1, t2, y, n) + ) + | T.COND(32, T.CMP(ty, cc, t1, t2), yes, no) => + (case !arch of (* PentiumPro and higher has CMOVcc *) + Pentium => unknownExp exp + | _ => cmovcc(ty, cc, t1, t2, yes, no) + ) + | T.LET(s,e) => (doStmt s; doExpr(e, rd, an)) + | T.MARK(e, A.MARKREG f) => (f rd; doExpr(e, rd, an)) + | T.MARK(e, a) => doExpr(e, rd, a::an) + | T.PRED(e,c) => doExpr(e, rd, A.CTRLUSE c::an) + | T.REXT e => + ExtensionComp.compileRext (reducer()) {e=e, rd=rd, an=an} + (* simplify and try again *) + | exp => unknownExp exp + end (* doExpr *) + + (* generate an expression and return its result register + * If rewritePseudo is on, the result is guaranteed to be in a + * non memReg register + *) + and expr(exp as T.REG(_, rd)) = + if isMemReg rd then genExpr exp else rd + | expr exp = genExpr exp + + and genExpr exp = + let val rd = newReg() in doExpr(exp, rd, []); rd end + + (* Compare an expression with zero. + * On the x86, TEST is superior to AND for doing the same thing, + * since it doesn't need to write out the result in a register. + *) + and cmpWithZero(cc as (T.EQ | T.NE), e as T.ANDB(ty, a, b), an) = + (case ty of + 8 => test(I.TESTB, a, b, an) + | 16 => test(I.TESTW, a, b, an) + | 32 => test(I.TESTL, a, b, an) + | _ => doExpr(e, newReg(), an); + cc) + | cmpWithZero(cc, e, an) = + let val e = + case e of (* hack to disable the lea optimization XXX *) + T.ADD(_, a, b) => T.ADD(0, a, b) + | e => e + in doExpr(e, newReg(), an); cc end + + (* Emit a test. + * The available modes are + * r/m, r + * r/m, imm + * On selecting the right instruction: TESTL/TESTW/TESTB. + * When anding an operand with a constant + * that fits within 8 (or 16) bits, it is possible to use TESTB, + * (or TESTW) instead of TESTL. Because x86 is little endian, + * this works for memory operands too. However, with TESTB, it is + * not possible to use registers other than + * AL, CL, BL, DL, and AH, CH, BH, DH. So, the best way is to + * perform register allocation first, and if the operand registers + * are one of EAX, ECX, EBX, or EDX, replace the TESTL instruction + * by TESTB. + *) + and test(testopcode, a, b, an) = + let val (_, opnd1, opnd2) = commuteComparison(T.EQ, true, a, b) + (* translate r, r/m => r/m, r *) + val (opnd1, opnd2) = + if isMemOpnd opnd2 then (opnd2, opnd1) else (opnd1, opnd2) + in mark(testopcode{lsrc=opnd1, rsrc=opnd2}, an) + end + + (* %eflags <- src *) + and moveToEflags src = + if CB.sameColor(src, C.eflags) then () + else (move(I.Direct src, eax); emit(I.LAHF)) + + (* dst <- %eflags *) + and moveFromEflags dst = + if CB.sameColor(dst, C.eflags) then () + else (emit(I.SAHF); move(eax, I.Direct dst)) + + (* generate a condition code expression + * The zero is for setting the condition code! + * I have no idea why this is used. + *) + and doCCexpr(T.CMP(ty, cc, t1, t2), rd, an) = + (cmp(false, ty, cc, t1, t2, an); + moveFromEflags rd + ) + | doCCexpr(T.CC(cond,rs), rd, an) = + if CB.sameColor(rs,C.eflags) orelse CB.sameColor(rd,C.eflags) then + (moveToEflags rs; moveFromEflags rd) + else + move'(I.Direct rs, I.Direct rd, an) + | doCCexpr(T.CCMARK(e,A.MARKREG f),rd,an) = (f rd; doCCexpr(e,rd,an)) + | doCCexpr(T.CCMARK(e,a), rd, an) = doCCexpr(e,rd,a::an) + | doCCexpr(T.CCEXT e, cd, an) = + ExtensionComp.compileCCext (reducer()) {e=e, ccd=cd, an=an} + | doCCexpr _ = error "doCCexpr" + + and ccExpr e = error "ccExpr" + + (* generate a comparison and sets the condition code; + * return the actual cc used. If the flag swapable is true, + * we can also reorder the operands. + *) + and cmp(swapable, ty, cc, t1, t2, an) = + (* == and <> can be always be reordered *) + let val swapable = swapable orelse cc = T.EQ orelse cc = T.NE + in (* Sometimes the comparison is not necessary because + * the bits are already set! + *) + if isZero t1 andalso setZeroBit2 t2 then + if swapable then + cmpWithZero(T.Basis.swapCond cc, t2, an) + else (* can't reorder the comparison! *) + genCmp(ty, false, cc, t1, t2, an) + else if isZero t2 andalso setZeroBit2 t1 then + cmpWithZero(cc, t1, an) + else genCmp(ty, swapable, cc, t1, t2, an) + end + + (* Give a and b which are the operands to a comparison (or test) + * Return the appropriate condition code and operands. + * The available modes are: + * r/m, imm + * r/m, r + * r, r/m + *) + and commuteComparison(cc, swapable, a, b) = + let val (opnd1, opnd2) = (operand a, operand b) + in (* Try to fold in the operands whenever possible *) + case (isImmediate opnd1, isImmediate opnd2) of + (true, true) => (cc, moveToReg opnd1, opnd2) + | (true, false) => + if swapable then (T.Basis.swapCond cc, opnd2, opnd1) + else (cc, moveToReg opnd1, opnd2) + | (false, true) => (cc, opnd1, opnd2) + | (false, false) => + (case (opnd1, opnd2) of + (_, I.Direct _) => (cc, opnd1, opnd2) + | (I.Direct _, _) => (cc, opnd1, opnd2) + | (_, _) => (cc, moveToReg opnd1, opnd2) + ) + end + + (* generate a real comparison; return the real cc used *) + and genCmp(ty, swapable, cc, a, b, an) = + let val (cc, opnd1, opnd2) = commuteComparison(cc, swapable, a, b) + in mark(I.CMPL{lsrc=opnd1, rsrc=opnd2}, an); cc + end + + (* generate code for jumps *) + and jmp(lexp as T.LABEL lab, labs, an) = + mark(I.JMP(I.ImmedLabel lexp, [lab]), an) + | jmp(T.LABEXP le, labs, an) = mark(I.JMP(I.ImmedLabel le, labs), an) + | jmp(ea, labs, an) = mark(I.JMP(operand ea, labs), an) + + (* convert mlrisc to cellset: + *) + and cellset mlrisc = + let val addCCReg = CB.CellSet.add + fun g([],acc) = acc + | g(T.GPR(T.REG(_,r))::regs,acc) = g(regs,C.addReg(r,acc)) + | g(T.FPR(T.FREG(_,f))::regs,acc) = g(regs,C.addFreg(f,acc)) + | g(T.CCR(T.CC(_,cc))::regs,acc) = g(regs,addCCReg(cc,acc)) + | g(T.CCR(T.FCC(_,cc))::regs,acc) = g(regs,addCCReg(cc,acc)) + | g(_::regs, acc) = g(regs, acc) + in g(mlrisc, C.empty) end + + (* generate code for calls *) + and call(ea, flow, def, use, mem, cutsTo, an, pops) = + let fun return(set, []) = set + | return(set, a::an) = + case #peek A.RETURN_ARG a of + SOME r => return(CB.CellSet.add(r, set), an) + | NONE => return(set, an) + in + mark(I.CALL{opnd=operand ea,defs=cellset(def),uses=cellset(use), + return=return(C.empty,an),cutsTo=cutsTo,mem=mem, + pops=pops},an) + end + + (* generate code for integer stores; first move data to %eax + * This is mainly because we can't allocate to registers like + * ah, dl, dx etc. + *) + and genStore(mvOp, ea, d, mem, an) = + let val src = + case immedOrReg(operand d) of + src as I.Direct r => + if CB.sameColor(r,C.eax) + then src else (move(src, eax); eax) + | src => src + in mark(I.MOVE{mvOp=mvOp, src=src, dst=address(ea,mem)},an) + end + + (* generate code for 8-bit integer stores *) + (* movb has to use %eax as source. Stupid x86! *) + and store8(ea, d, mem, an) = genStore(I.MOVB, ea, d, mem, an) + and store16(ea, d, mem, an) = + mark(I.MOVE{mvOp=I.MOVW, src=immedOrReg(operand d), dst=address(ea, mem)}, an) + and store32(ea, d, mem, an) = + move'(immedOrReg(operand d), address(ea, mem), an) + + (* generate code for branching *) + and branch(T.CMP(ty, cc, t1, t2), lab, an) = + (* allow reordering of operands *) + let val cc = cmp(true, ty, cc, t1, t2, []) + in mark(I.JCC{cond=cond cc, opnd=immedLabel lab}, an) end + | branch(T.FCMP(fty, fcc, t1, t2), lab, an) = + fbranch(fty, fcc, t1, t2, lab, an) + | branch(ccexp, lab, an) = + (doCCexpr(ccexp, C.eflags, []); + mark(I.JCC{cond=cond(Gen.condOf ccexp), opnd=immedLabel lab}, an) + ) + + (* generate code for floating point compare and branch *) + and fbranch(fty, fcc, t1, t2, lab, an) = + let fun j cc = mark(I.JCC{cond=cc, opnd=immedLabel lab},an) + in fbranching(fty, fcc, t1, t2, j) + end + + and fbranching(fty, fcc, t1, t2, j) = + let fun ignoreOrder (T.FREG _) = true + | ignoreOrder (T.FLOAD _) = true + | ignoreOrder (T.FMARK(e,_)) = ignoreOrder e + | ignoreOrder _ = false + + fun compare'() = (* Sethi-Ullman style *) + (if ignoreOrder t1 orelse ignoreOrder t2 then + (reduceFexp(fty, t2, []); reduceFexp(fty, t1, [])) + else (reduceFexp(fty, t1, []); reduceFexp(fty, t2, []); + emit(I.FXCH{opnd=C.ST(1)})); + emit(I.FUCOMPP); + fcc + ) + + fun compare''() = + (* direct style *) + (* Try to make lsrc the memory operand *) + let val lsrc = foperand(fty, t1) + val rsrc = foperand(fty, t2) + val fsize = fsize fty + fun cmp(lsrc, rsrc, fcc) = + let val i = !arch <> Pentium + in emit(I.FCMP{i=i,fsize=fsize,lsrc=lsrc,rsrc=rsrc}); + fcc + end + in case (lsrc, rsrc) of + (I.FPR _, I.FPR _) => cmp(lsrc, rsrc, fcc) + | (I.FPR _, mem) => cmp(mem,lsrc,T.Basis.swapFcond fcc) + | (mem, I.FPR _) => cmp(lsrc, rsrc, fcc) + | (lsrc, rsrc) => (* can't be both memory! *) + let val ftmpR = newFreg() + val ftmp = I.FPR ftmpR + in emit(I.FMOVE{fsize=fsize,src=rsrc,dst=ftmp}); + cmp(lsrc, ftmp, fcc) + end + end + + fun compare() = + if enableFastFPMode andalso !fast_floating_point + then compare''() else compare'() + + fun andil i = emit(I.BINARY{binOp=I.ANDL,src=I.Immed(i),dst=eax}) + fun testil i = emit(I.TESTL{lsrc=eax,rsrc=I.Immed(i)}) + fun xoril i = emit(I.BINARY{binOp=I.XORL,src=I.Immed(i),dst=eax}) + fun cmpil i = emit(I.CMPL{rsrc=I.Immed(i), lsrc=eax}) + fun sahf() = emit(I.SAHF) + fun branch(fcc) = + case fcc + of T.== => (andil 0x4400; xoril 0x4000; j(I.EQ)) + | T.?<> => (andil 0x4400; xoril 0x4000; j(I.NE)) + | T.? => (sahf(); j(I.P)) + | T.<=> => (sahf(); j(I.NP)) + | T.> => (testil 0x4500; j(I.EQ)) + | T.?<= => (testil 0x4500; j(I.NE)) + | T.>= => (testil 0x500; j(I.EQ)) + | T.?< => (testil 0x500; j(I.NE)) + | T.< => (andil 0x4500; cmpil 0x100; j(I.EQ)) + | T.?>= => (andil 0x4500; cmpil 0x100; j(I.NE)) + | T.<= => (andil 0x4100; cmpil 0x100; j(I.EQ); + cmpil 0x4000; j(I.EQ)) + | T.?> => (sahf(); j(I.P); testil 0x4100; j(I.EQ)) + | T.<> => (testil 0x4400; j(I.EQ)) + | T.?= => (testil 0x4400; j(I.NE)) + | _ => error(concat[ + "fbranch(", T.Basis.fcondToString fcc, ")" + ]) + (*esac*) + + (* + * P Z C + * x < y 0 0 1 + * x > y 0 0 0 + * x = y 0 1 0 + * unordered 1 1 1 + * When it's unordered, all three flags, P, Z, C are set. + *) + + fun fast_branch(fcc) = + case fcc + of T.== => orderedOnly(I.EQ) + | T.?<> => (j(I.P); j(I.NE)) + | T.? => j(I.P) + | T.<=> => j(I.NP) + | T.> => j(I.A) (* "JA" tests that _both_ Z and C are zero, so + * we do not need a separate test of P. + *) + | T.?<= => j(I.BE) + | T.>= => orderedOnly(I.AE) + | T.?< => j(I.B) + | T.< => orderedOnly(I.B) + | T.?>= => (j(I.P); j(I.AE)) + | T.<= => orderedOnly(I.BE) + | T.?> => (j(I.P); j(I.A)) + | T.<> => orderedOnly(I.NE) + | T.?= => j(I.EQ) + | _ => error(concat[ + "fbranch(", T.Basis.fcondToString fcc, ")" + ]) + (*esac*) + and orderedOnly fcc = + let val label = Label.anon() + in emit(I.JCC{cond=I.P, opnd=immedLabel label}); + j fcc; + defineLabel label + end + + val fcc = compare() + in if !arch <> Pentium andalso + (enableFastFPMode andalso !fast_floating_point) then + fast_branch(fcc) + else + (emit I.FNSTSW; + branch(fcc) + ) + end + + (*======================================================== + * Floating point code generation starts here. + * Some generic fp routines first. + *========================================================*) + + (* Can this tree be folded into the src operand of a floating point + * operations? + *) + and foldableFexp(T.FREG _) = true + | foldableFexp(T.FLOAD _) = true + | foldableFexp(T.CVTI2F(_, (16 | 32), _)) = true + | foldableFexp(T.CVTF2F(_, _, t)) = foldableFexp t + | foldableFexp(T.FMARK(t, _)) = foldableFexp t + | foldableFexp _ = false + + (* Move integer e of size ty into a memory location. + * Returns a quadruple: + * (INTEGER,return ty,effect address of memory location,cleanup code) + *) + and convertIntToFloat(ty, e) = + let val opnd = operand e + in if isMemOpnd opnd andalso (ty = 16 orelse ty = 32) + then (INTEGER, ty, opnd, []) + else + let val {instrs, tempMem, cleanup} = + cvti2f{ty=ty, src=opnd, an=getAnnotations()} + in emits instrs; + (INTEGER, 32, tempMem, cleanup) + end + end + + (*======================================================== + * Sethi-Ullman based floating point code generation as + * implemented by Lal + *========================================================*) + + and fld(32, opnd) = I.FLDS opnd + | fld(64, opnd) = I.FLDL opnd + | fld(80, opnd) = I.FLDT opnd + | fld _ = error "fld" + + and fild(16, opnd) = I.FILD opnd + | fild(32, opnd) = I.FILDL opnd + | fild(64, opnd) = I.FILDLL opnd + | fild _ = error "fild" + + and fxld(INTEGER, ty, opnd) = fild(ty, opnd) + | fxld(REAL, fty, opnd) = fld(fty, opnd) + + and fstp(32, opnd) = I.FSTPS opnd + | fstp(64, opnd) = I.FSTPL opnd + | fstp(80, opnd) = I.FSTPT opnd + | fstp _ = error "fstp" + + (* generate code for floating point stores *) + and fstore'(fty, ea, d, mem, an) = + (case d of + T.FREG(fty, fs) => emit(fld(fty, I.FDirect fs)) + | _ => reduceFexp(fty, d, []); + mark(fstp(fty, address(ea, mem)), an) + ) + + (* generate code for floating point loads *) + and fload'(fty, ea, mem, fd, an) = + let val ea = address(ea, mem) + in mark(fld(fty, ea), an); + if CB.sameColor(fd,ST0) then () + else emit(fstp(fty, I.FDirect fd)) + end + + and fexpr' e = (reduceFexp(64, e, []); C.ST(0)) + + (* generate floating point expression and put the result in fd *) + and doFexpr'(fty, T.FREG(_, fs), fd, an) = + (if CB.sameColor(fs,fd) then () + else mark'(I.COPY{k=CB.FP, sz=64, dst=[fd], src=[fs], tmp=NONE}, an) + ) + | doFexpr'(_, T.FLOAD(fty, ea, mem), fd, an) = + fload'(fty, ea, mem, fd, an) + | doFexpr'(fty, T.FEXT fexp, fd, an) = + (ExtensionComp.compileFext (reducer()) {e=fexp, fd=fd, an=an}; + if CB.sameColor(fd,ST0) then () else emit(fstp(fty, I.FDirect fd)) + ) + | doFexpr'(fty, e, fd, an) = + (reduceFexp(fty, e, []); + if CB.sameColor(fd,ST0) then () + else mark(fstp(fty, I.FDirect fd), an) + ) + + (* + * Generate floating point expression using Sethi-Ullman's scheme: + * This function evaluates a floating point expression, + * and put result in %ST(0). + *) + and reduceFexp(fty, fexp, an) = + let val ST = I.ST(C.ST 0) + val ST1 = I.ST(C.ST 1) + val cleanupCode = ref [] : I.instruction list ref + + datatype su_tree = + LEAF of int * T.fexp * ans + | BINARY of int * T.fty * fbinop * su_tree * su_tree * ans + | UNARY of int * T.fty * I.funOp * su_tree * ans + and fbinop = FADD | FSUB | FMUL | FDIV + | FIADD | FISUB | FIMUL | FIDIV + withtype ans = Annotations.annotations + + fun label(LEAF(n, _, _)) = n + | label(BINARY(n, _, _, _, _, _)) = n + | label(UNARY(n, _, _, _, _)) = n + + fun annotate(LEAF(n, x, an), a) = LEAF(n,x,a::an) + | annotate(BINARY(n,t,b,x,y,an), a) = BINARY(n,t,b,x,y,a::an) + | annotate(UNARY(n,t,u,x,an), a) = UNARY(n,t,u,x,a::an) + + (* Generate expression tree with sethi-ullman numbers *) + fun su(e as T.FREG _) = LEAF(1, e, []) + | su(e as T.FLOAD _) = LEAF(1, e, []) + | su(e as T.CVTI2F _) = LEAF(1, e, []) + | su(T.CVTF2F(_, _, t)) = su t + | su(T.FMARK(t, a)) = annotate(su t, a) + | su(T.FABS(fty, t)) = suUnary(fty, I.FABS, t) + | su(T.FNEG(fty, t)) = suUnary(fty, I.FCHS, t) + | su(T.FSQRT(fty, t)) = suUnary(fty, I.FSQRT, t) + | su(T.FADD(fty, t1, t2)) = suComBinary(fty,FADD,FIADD,t1,t2) + | su(T.FMUL(fty, t1, t2)) = suComBinary(fty,FMUL,FIMUL,t1,t2) + | su(T.FSUB(fty, t1, t2)) = suBinary(fty,FSUB,FISUB,t1,t2) + | su(T.FDIV(fty, t1, t2)) = suBinary(fty,FDIV,FIDIV,t1,t2) + | su _ = error "su" + + (* Try to fold the the memory operand or integer conversion *) + and suFold(e as T.FREG _) = (LEAF(0, e, []), false) + | suFold(e as T.FLOAD _) = (LEAF(0, e, []), false) + | suFold(e as T.CVTI2F(_,(16 | 32),_)) = (LEAF(0, e, []), true) + | suFold(T.CVTF2F(_, _, t)) = suFold t + | suFold(T.FMARK(t, a)) = + let val (t, integer) = suFold t + in (annotate(t, a), integer) end + | suFold e = (su e, false) + + (* Form unary tree *) + and suUnary(fty, funary, t) = + let val t = su t + in UNARY(label t, fty, funary, t, []) + end + + (* Form binary tree *) + and suBinary(fty, binop, ibinop, t1, t2) = + let val t1 = su t1 + val (t2, integer) = suFold t2 + val n1 = label t1 + val n2 = label t2 + val n = if n1=n2 then n1+1 else Int.max(n1,n2) + val myOp = if integer then ibinop else binop + in BINARY(n, fty, myOp, t1, t2, []) + end + + (* Try to fold in the operand if possible. + * This only applies to commutative operations. + *) + and suComBinary(fty, binop, ibinop, t1, t2) = + let val (t1, t2) = if foldableFexp t2 + then (t1, t2) else (t2, t1) + in suBinary(fty, binop, ibinop, t1, t2) end + + and sameTree(LEAF(_, T.FREG(t1,f1), []), + LEAF(_, T.FREG(t2,f2), [])) = + t1 = t2 andalso CB.sameColor(f1,f2) + | sameTree _ = false + + (* Traverse tree and generate code *) + fun gencode(LEAF(_, t, an)) = mark(fxld(leafEA t), an) + | gencode(BINARY(_, _, binop, x, t2 as LEAF(0, y, a1), a2)) = + let val _ = gencode x + val (_, fty, src) = leafEA y + fun gen(code) = mark(code, a1 @ a2) + fun binary(oper32, oper64) = + if sameTree(x, t2) then + gen(I.FBINARY{binOp=oper64, src=ST, dst=ST}) + else + let val oper = + if isMemOpnd src then + case fty of + 32 => oper32 + | 64 => oper64 + | _ => error "gencode: BINARY" + else oper64 + in gen(I.FBINARY{binOp=oper, src=src, dst=ST}) end + fun ibinary(oper16, oper32) = + let val oper = case fty of + 16 => oper16 + | 32 => oper32 + | _ => error "gencode: IBINARY" + in gen(I.FIBINARY{binOp=oper, src=src}) end + in case binop of + FADD => binary(I.FADDS, I.FADDL) + | FSUB => binary(I.FDIVS, I.FSUBL) + | FMUL => binary(I.FMULS, I.FMULL) + | FDIV => binary(I.FDIVS, I.FDIVL) + | FIADD => ibinary(I.FIADDS, I.FIADDL) + | FISUB => ibinary(I.FIDIVS, I.FISUBL) + | FIMUL => ibinary(I.FIMULS, I.FIMULL) + | FIDIV => ibinary(I.FIDIVS, I.FIDIVL) + end + | gencode(BINARY(_, fty, binop, t1, t2, an)) = + let fun doit(t1, t2, oper, operP, operRP) = + let (* oper[P] => ST(1) := ST oper ST(1); [pop] + * operR[P] => ST(1) := ST(1) oper ST; [pop] + *) + val n1 = label t1 + val n2 = label t2 + in if n1 < n2 andalso n1 <= 7 then + (gencode t2; + gencode t1; + mark(I.FBINARY{binOp=operP, src=ST, dst=ST1}, an)) + else if n2 <= n1 andalso n2 <= 7 then + (gencode t1; + gencode t2; + mark(I.FBINARY{binOp=operRP, src=ST, dst=ST1}, an)) + else + let (* both labels > 7 *) + val fs = I.FDirect(newFreg()) + in gencode t2; + emit(fstp(fty, fs)); + gencode t1; + mark(I.FBINARY{binOp=oper, src=fs, dst=ST}, an) + end + end + in case binop of + FADD => doit(t1,t2,I.FADDL,I.FADDP,I.FADDP) + | FMUL => doit(t1,t2,I.FMULL,I.FMULP,I.FMULP) + | FSUB => doit(t1,t2,I.FSUBL,I.FSUBP,I.FSUBRP) + | FDIV => doit(t1,t2,I.FDIVL,I.FDIVP,I.FDIVRP) + | _ => error "gencode.BINARY" + end + | gencode(UNARY(_, _, unaryOp, su, an)) = + (gencode(su); mark(I.FUNARY(unaryOp),an)) + + (* Generate code for a leaf. + * Returns the type and an effective address + *) + and leafEA(T.FREG(fty, f)) = (REAL, fty, I.FDirect f) + | leafEA(T.FLOAD(fty, ea, mem)) = (REAL, fty, address(ea, mem)) + | leafEA(T.CVTI2F(_, 32, t)) = int2real(32, t) + | leafEA(T.CVTI2F(_, 16, t)) = int2real(16, t) + | leafEA(T.CVTI2F(_, 8, t)) = int2real(8, t) + | leafEA _ = error "leafEA" + + and int2real(ty, e) = + let val (_, ty, ea, cleanup) = convertIntToFloat(ty, e) + in cleanupCode := !cleanupCode @ cleanup; + (INTEGER, ty, ea) + end + + in gencode(su fexp); + emits(!cleanupCode) + end (*reduceFexp*) + + (*======================================================== + * This section generates 3-address style floating + * point code. + *========================================================*) + + and isize 16 = I.I16 + | isize 32 = I.I32 + | isize _ = error "isize" + + and fstore''(fty, ea, d, mem, an) = + (floatingPointUsed := true; + mark(I.FMOVE{fsize=fsize fty, dst=address(ea,mem), + src=foperand(fty, d)}, + an) + ) + + and fload''(fty, ea, mem, d, an) = + (floatingPointUsed := true; + mark(I.FMOVE{fsize=fsize fty, src=address(ea,mem), + dst=RealReg d}, an) + ) + + and fiload''(ity, ea, d, an) = + (floatingPointUsed := true; + mark(I.FILOAD{isize=isize ity, ea=ea, dst=RealReg d}, an) + ) + + and fexpr''(e as T.FREG(_,f)) = + if isFMemReg f then transFexpr e else f + | fexpr'' e = transFexpr e + + and transFexpr e = + let val fd = newFreg() in doFexpr''(64, e, fd, []); fd end + + (* + * Process a floating point operand. Put operand in register + * when possible. The operand should match the given fty. + *) + and foperand(fty, e as T.FREG(fty', f)) = + if fty = fty' then RealReg f else I.FPR(fexpr'' e) + | foperand(fty, T.CVTF2F(_, _, e)) = + foperand(fty, e) (* nop on the x86 *) + | foperand(fty, e as T.FLOAD(fty', ea, mem)) = + (* fold operand when the precison matches *) + if fty = fty' then address(ea, mem) else I.FPR(fexpr'' e) + | foperand(fty, e) = I.FPR(fexpr'' e) + + (* + * Process a floating point operand. + * Try to fold in a memory operand or conversion from an integer. + *) + and fioperand(T.FREG(fty,f)) = (REAL, fty, RealReg f, []) + | fioperand(T.FLOAD(fty, ea, mem)) = + (REAL, fty, address(ea, mem), []) + | fioperand(T.CVTF2F(_, _, e)) = fioperand(e) (* nop on the x86 *) + | fioperand(T.CVTI2F(_, ty, e)) = convertIntToFloat(ty, e) + | fioperand(T.FMARK(e,an)) = fioperand(e) (* XXX *) + | fioperand(e) = (REAL, 64, I.FPR(fexpr'' e), []) + + (* Generate binary operator. Since the real binary operators + * does not take memory as destination, we also ensure this + * does not happen. + *) + and fbinop(targetFty, + binOp, binOpR, ibinOp, ibinOpR, lsrc, rsrc, fd, an) = + (* Put the mem operand in rsrc *) + let + fun isMemOpnd(T.FREG(_, f)) = isFMemReg f + | isMemOpnd(T.FLOAD _) = true + | isMemOpnd(T.CVTI2F(_, (16 | 32), _)) = true + | isMemOpnd(T.CVTF2F(_, _, t)) = isMemOpnd t + | isMemOpnd(T.FMARK(t, _)) = isMemOpnd t + | isMemOpnd _ = false + val (binOp, ibinOp, lsrc, rsrc) = + if isMemOpnd lsrc then (binOpR, ibinOpR, rsrc, lsrc) + else (binOp, ibinOp, lsrc, rsrc) + val lsrc = foperand(targetFty, lsrc) + val (kind, fty, rsrc, code) = fioperand(rsrc) + fun dstMustBeFreg f = + if targetFty <> 64 then + let val tmpR = newFreg() + val tmp = I.FPR tmpR + in mark(f tmp, an); + emit(I.FMOVE{fsize=fsize targetFty, + src=tmp, dst=RealReg fd}) + end + else mark(f(RealReg fd), an) + in case kind of + REAL => + dstMustBeFreg(fn dst => + I.FBINOP{fsize=fsize fty, binOp=binOp, + lsrc=lsrc, rsrc=rsrc, dst=dst}) + | INTEGER => + (dstMustBeFreg(fn dst => + I.FIBINOP{isize=isize fty, binOp=ibinOp, + lsrc=lsrc, rsrc=rsrc, dst=dst}); + emits code + ) + end + + and funop(fty, unOp, src, fd, an) = + let val src = foperand(fty, src) + in mark(I.FUNOP{fsize=fsize fty, + unOp=unOp, src=src, dst=RealReg fd},an) + end + + and doFexpr''(fty, e, fd, an) = + (floatingPointUsed := true; + case e of + T.FREG(_,fs) => if CB.sameColor(fs,fd) then () + else fcopy''(fty, [fd], [fs], an) + (* Stupid x86 does everything as 80-bits internally. *) + + (* Binary operators *) + | T.FADD(_, a, b) => fbinop(fty, + I.FADDL, I.FADDL, I.FIADDL, I.FIADDL, + a, b, fd, an) + | T.FSUB(_, a, b) => fbinop(fty, + I.FSUBL, I.FSUBRL, I.FISUBL, I.FISUBRL, + a, b, fd, an) + | T.FMUL(_, a, b) => fbinop(fty, + I.FMULL, I.FMULL, I.FIMULL, I.FIMULL, + a, b, fd, an) + | T.FDIV(_, a, b) => fbinop(fty, + I.FDIVL, I.FDIVRL, I.FIDIVL, I.FIDIVRL, + a, b, fd, an) + + (* Unary operators *) + | T.FNEG(_, a) => funop(fty, I.FCHS, a, fd, an) + | T.FABS(_, a) => funop(fty, I.FABS, a, fd, an) + | T.FSQRT(_, a) => funop(fty, I.FSQRT, a, fd, an) + + (* Load *) + | T.FLOAD(fty,ea,mem) => fload''(fty, ea, mem, fd, an) + + (* Type conversions *) + | T.CVTF2F(_, _, e) => doFexpr''(fty, e, fd, an) + | T.CVTI2F(_, ty, e) => + let val (_, ty, ea, cleanup) = convertIntToFloat(ty, e) + in fiload''(ty, ea, fd, an); + emits cleanup + end + + | T.FMARK(e,A.MARKREG f) => (f fd; doFexpr''(fty, e, fd, an)) + | T.FMARK(e, a) => doFexpr''(fty, e, fd, a::an) + | T.FPRED(e, c) => doFexpr''(fty, e, fd, A.CTRLUSE c::an) + | T.FEXT fexp => + ExtensionComp.compileFext (reducer()) {e=fexp, fd=fd, an=an} + | _ => error("doFexpr''") + ) + + (*======================================================== + * Tie the two styles of fp code generation together + *========================================================*) + and fstore(fty, ea, d, mem, an) = + if enableFastFPMode andalso !fast_floating_point + then fstore''(fty, ea, d, mem, an) + else fstore'(fty, ea, d, mem, an) + and fload(fty, ea, d, mem, an) = + if enableFastFPMode andalso !fast_floating_point + then fload''(fty, ea, d, mem, an) + else fload'(fty, ea, d, mem, an) + and fexpr e = + if enableFastFPMode andalso !fast_floating_point + then fexpr'' e else fexpr' e + and doFexpr(fty, e, fd, an) = + if enableFastFPMode andalso !fast_floating_point + then doFexpr''(fty, e, fd, an) + else doFexpr'(fty, e, fd, an) + + (*================================================================ + * Optimizations for x := x op y + * Special optimizations: + * Generate a binary operator, result must in memory. + * The source must not be in memory + *================================================================*) + and binaryMem(binOp, src, dst, mem, an) = + mark(I.BINARY{binOp=binOp, src=immedOrReg(operand src), + dst=address(dst,mem)}, an) + and unaryMem(unOp, opnd, mem, an) = + mark(I.UNARY{unOp=unOp, opnd=address(opnd,mem)}, an) + + and isOne(T.LI n) = n = 1 + | isOne _ = false + + (* + * Perform optimizations based on recognizing + * x := x op y or + * x := y op x + * first. + *) + and store(ty, ea, d, mem, an, + {INC,DEC,ADD,SUB,NOT,NEG,SHL,SHR,SAR,OR,AND,XOR}, + doStore + ) = + let fun default() = doStore(ea, d, mem, an) + fun binary1(t, t', unary, binary, ea', x) = + if t = ty andalso t' = ty then + if MLTreeUtils.eqRexp(ea, ea') then + if isOne x then unaryMem(unary, ea, mem, an) + else binaryMem(binary, x, ea, mem, an) + else default() + else default() + fun unary(t,unOp, ea') = + if t = ty andalso MLTreeUtils.eqRexp(ea, ea') then + unaryMem(unOp, ea, mem, an) + else default() + fun binary(t,t',binOp,ea',x) = + if t = ty andalso t' = ty andalso + MLTreeUtils.eqRexp(ea, ea') then + binaryMem(binOp, x, ea, mem, an) + else default() + + fun binaryCom1(t,unOp,binOp,x,y) = + if t = ty then + let fun again() = + case y of + T.LOAD(ty',ea',_) => + if ty' = ty andalso MLTreeUtils.eqRexp(ea, ea') then + if isOne x then unaryMem(unOp, ea, mem, an) + else binaryMem(binOp,x,ea,mem,an) + else default() + | _ => default() + in case x of + T.LOAD(ty',ea',_) => + if ty' = ty andalso MLTreeUtils.eqRexp(ea, ea') then + if isOne y then unaryMem(unOp, ea, mem, an) + else binaryMem(binOp,y,ea,mem,an) + else again() + | _ => again() + end + else default() + + fun binaryCom(t,binOp,x,y) = + if t = ty then + let fun again() = + case y of + T.LOAD(ty',ea',_) => + if ty' = ty andalso MLTreeUtils.eqRexp(ea, ea') then + binaryMem(binOp,x,ea,mem,an) + else default() + | _ => default() + in case x of + T.LOAD(ty',ea',_) => + if ty' = ty andalso MLTreeUtils.eqRexp(ea, ea') then + binaryMem(binOp,y,ea,mem,an) + else again() + | _ => again() + end + else default() + + in case d of + T.ADD(t,x,y) => binaryCom1(t,INC,ADD,x,y) + | T.SUB(t,T.LOAD(t',ea',_),x) => binary1(t,t',DEC,SUB,ea',x) + | T.ORB(t,x,y) => binaryCom(t,OR,x,y) + | T.ANDB(t,x,y) => binaryCom(t,AND,x,y) + | T.XORB(t,x,y) => binaryCom(t,XOR,x,y) + | T.SLL(t,T.LOAD(t',ea',_),x) => binary(t,t',SHL,ea',x) + | T.SRL(t,T.LOAD(t',ea',_),x) => binary(t,t',SHR,ea',x) + | T.SRA(t,T.LOAD(t',ea',_),x) => binary(t,t',SAR,ea',x) + | T.NEG(t,T.LOAD(t',ea',_)) => unary(t,NEG,ea') + | T.NOTB(t,T.LOAD(t',ea',_)) => unary(t,NOT,ea') + | _ => default() + end (* store *) + + (* generate code for a statement *) + and stmt(T.MV(_, rd, e), an) = doExpr(e, rd, an) + | stmt(T.FMV(fty, fd, e), an) = doFexpr(fty, e, fd, an) + | stmt(T.CCMV(ccd, e), an) = doCCexpr(e, ccd, an) + | stmt(T.COPY(_, dst, src), an) = copy(dst, src, an) + | stmt(T.FCOPY(fty, dst, src), an) = fcopy(fty, dst, src, an) + | stmt(T.JMP(e, labs), an) = jmp(e, labs, an) + | stmt(T.CALL{funct, targets, defs, uses, region, pops, ...}, an) = + call(funct,targets,defs,uses,region,[],an, pops) + | stmt(T.FLOW_TO(T.CALL{funct, targets, defs, uses, region, pops, ...}, + cutTo), an) = + call(funct,targets,defs,uses,region,cutTo,an, pops) + | stmt(T.RET _, an) = mark(I.RET NONE, an) + | stmt(T.STORE(8, ea, d, mem), an) = + store(8, ea, d, mem, an, opcodes8, store8) + | stmt(T.STORE(16, ea, d, mem), an) = + store(16, ea, d, mem, an, opcodes16, store16) + | stmt(T.STORE(32, ea, d, mem), an) = + store(32, ea, d, mem, an, opcodes32, store32) + | stmt(T.STORE(64, ea, d, mem), an) = + store(32, ea, d, mem, an, opcodes32, store32) (* XXX64 *) + + | stmt(T.FSTORE(fty, ea, d, mem), an) = fstore(fty, ea, d, mem, an) + | stmt(T.BCC(cc, lab), an) = branch(cc, lab, an) + | stmt(T.DEFINE l, _) = defineLabel l + | stmt(T.LIVE S, an) = mark'(I.LIVE{regs=cellset S,spilled=C.empty},an) + | stmt(T.KILL S, an) = mark'(I.KILL{regs=cellset S,spilled=C.empty},an) + | stmt(T.ANNOTATION(s, a), an) = stmt(s, a::an) + | stmt(T.EXT s, an) = + ExtensionComp.compileSext (reducer()) {stm=s, an=an} + | stmt(s, _) = doStmts(Gen.compileStm s) + + and doStmt s = stmt(s, []) + + and doStmts ss = app doStmt ss + + and beginCluster' _ = + ((* Must be cleared by the client. + * if rewriteMemReg then memRegsUsed := 0w0 else (); + *) + floatingPointUsed := false; + trapLabel := NONE; + beginCluster 0 + ) + and endCluster' a = + (case !trapLabel + of NONE => () + | SOME(_, lab) => (defineLabel lab; emit(I.INTO)) + (*esac*); + (* If floating point has been used allocate an extra + * register just in case we didn't use any explicit register + *) + if !floatingPointUsed then (newFreg(); ()) + else (); + endCluster(a) + ) + + and reducer() = + TS.REDUCER{reduceRexp = expr, + reduceFexp = fexpr, + reduceCCexp = ccExpr, + reduceStm = stmt, + operand = operand, + reduceOperand = reduceOpnd, + addressOf = fn e => address(e, I.Region.memory), (*XXX*) + emit = mark', + instrStream = instrStream, + mltreeStream = self() + } + + and self() = + TS.S.STREAM + { beginCluster = beginCluster', + endCluster = endCluster', + emit = doStmt, + pseudoOp = pseudoOp, + defineLabel = defineLabel, + entryLabel = entryLabel, + comment = comment, + annotation = annotation, + getAnnotations = getAnnotations, + exitBlock = fn mlrisc => exitBlock(cellset mlrisc) + } + + in self() + end + +end (* functor *) + +end (* local *) diff --git a/MLRISC/x86/mltree/x86RA.sml b/MLRISC/x86/mltree/x86RA.sml new file mode 100644 index 0000000..71d62ec --- /dev/null +++ b/MLRISC/x86/mltree/x86RA.sml @@ -0,0 +1,499 @@ +(* + * X86 specific register allocator. + * This module abstracts out all the nasty RA business on the x86. + * So you should only have to write the callbacks. + *) + +functor X86RA + ( structure I : X86INSTR + structure InsnProps : INSN_PROPERTIES where I = I + structure F : FLOWGRAPH where I = I + structure Asm : INSTRUCTION_EMITTER where I = I + + (* Spilling heuristics determines which node should be spilled. + * You can use Chaitin, ChowHenessey, or one of your own. + *) + structure SpillHeur : RA_SPILL_HEURISTICS + + (* The Spill module figures out the strategies for inserting + * spill code. You can use RASpill, or RASpillWithRenaming, + * or write your own if you are feeling adventurous. + *) + structure Spill : RA_SPILL where I = I + + sharing F.P = Asm.P + + (* Should we use allocate register on the floating point stack? + * Note that this flag must match the one passed to the code generator + * module. + *) + val fast_floating_point : bool ref + + datatype raPhase = SPILL_PROPAGATION + | SPILL_COLORING + + (* Called before register allocation; perform your initialization here. *) + val beforeRA : unit -> unit + + (* Integer register allocation parameters *) + structure Int : + sig + val avail : I.C.cell list + val dedicated : I.C.cell list + val memRegs : I.C.cell list + val phases : raPhase list + + val spillLoc : Annotations.annotations ref * + RAGraph.logical_spill_id -> I.operand + + (* This function is called once before spilling begins *) + val spillInit : RAGraph.interferenceGraph -> unit + + end + + (* Floating point register allocation parameters *) + structure Float : + sig + (* Sethi-Ullman mode *) + val avail : I.C.cell list + val dedicated : I.C.cell list + val memRegs : I.C.cell list + val phases : raPhase list + + val spillLoc : Annotations.annotations ref * + RAGraph.logical_spill_id -> I.operand + + (* This function is called once before spilling begins *) + val spillInit : RAGraph.interferenceGraph -> unit + + (* When fast_floating_point is on, use these instead: *) + val fastMemRegs : I.C.cell list + val fastPhases : raPhase list + end + + ) : CLUSTER_OPTIMIZATION = +struct + + structure F = F + structure I = I + structure C = I.C + + val name = "X86RA" + + type flowgraph = F.cluster + + val intSpillCnt = MLRiscControl.getCounter "ra-int-spills" + val floatSpillCnt = MLRiscControl.getCounter "ra-float-spills" + val intReloadCnt = MLRiscControl.getCounter "ra-int-reloads" + val floatReloadCnt = MLRiscControl.getCounter "ra-float-reloads" + val intRenameCnt = MLRiscControl.getCounter "ra-int-renames" + val floatRenameCnt = MLRiscControl.getCounter "ra-float-renames" + val x86CfgDebugFlg = MLRiscControl.getFlag "x86-cfg-debug" + +(* + val deadcode = MLRiscControl.getCounter "x86-dead-code" + val deadblocks = MLRiscControl.getCounter "x86-dead-blocks" + *) + + structure PrintFlowGraph= + PrintCluster(structure Flowgraph=F + structure Asm = Asm) + + structure X86FP = + X86FP(structure X86Instr = I + structure X86Props = InsnProps + structure Flowgraph = F + structure Liveness = Liveness(F) + structure Asm = Asm + ) + + structure X86Spill = X86Spill(structure Instr=I structure Props=InsnProps) + + (* + * Dead code elimination + *) + exception X86DeadCode + val affectedBlocks = + IntHashTable.mkTable(32,X86DeadCode) : bool IntHashTable.hash_table + val deadRegs = + IntHashTable.mkTable(32,X86DeadCode) : bool IntHashTable.hash_table + fun removeDeadCode(F.CLUSTER{blocks, ...}) = + let val find = IntHashTable.find deadRegs + fun isDead r = + case find (C.registerId r) of + SOME _ => true + | NONE => false + fun isAffected i = getOpt (IntHashTable.find affectedBlocks i, false) + fun isDeadInstr(I.ANNOTATION{i, ...}) = isDeadInstr i + | isDeadInstr(I.MOVE{dst=I.Direct rd, ...}) = isDead rd + | isDeadInstr(I.MOVE{dst=I.MemReg rd, ...}) = isDead rd + | isDeadInstr(I.COPY{dst=[rd], ...}) = isDead rd + | isDeadInstr _ = false + fun scan [] = () + | scan(F.BBLOCK{blknum, insns, ...}::rest) = + (if isAffected blknum then + ((* deadblocks := !deadblocks + 1; *) + insns := elim(!insns, []) + ) else (); + scan rest) + | scan(_::rest) = scan rest + and elim([], code) = rev code + | elim(i::instrs, code) = + if isDeadInstr i then + ((* deadcode := !deadcode + 1; *) elim(instrs, code)) + else elim(instrs, i::code) + in if IntHashTable.numItems affectedBlocks > 0 then + (scan blocks; + IntHashTable.clear deadRegs; + IntHashTable.clear affectedBlocks) + else () + end + + (* This function finds out which pseudo memory registers are unused. + * Those that are unused are made available for spilling. + * The register allocator calls this function right before spilling + * a set of nodes. + *) + val firstSpill = ref true + val firstFPSpill = ref true + + fun spillInit(graph, I.C.GP) = + if !firstSpill then (* only do this once! *) + (Int.spillInit graph; + firstSpill := false + ) + else () + | spillInit(graph, I.C.FP) = + if !firstFPSpill then + (Float.spillInit graph; + firstFPSpill := false + ) + else () + + (* This is the generic register allocator *) + structure Ra = + RegisterAllocator + (SpillHeur) + (MemoryRA (* for memory coalescing *) + (RADeadCodeElim (* do the funky dead code elimination stuff *) + (ClusterRA + (structure Flowgraph = F + structure Asm = Asm + structure InsnProps = InsnProps + structure Spill = Spill + ) + ) + (fun cellkind I.C.GP = true | cellkind _ = false + val deadRegs = deadRegs + val affectedBlocks = affectedBlocks + val spillInit = spillInit + ) + ) + ) + + + (* ------------------------------------------------------------------- + * Floating point stuff + * -------------------------------------------------------------------*) + val KF32 = length Float.avail + structure FR32 = GetReg(val nRegs=KF32 + val available=map C.registerId Float.avail + val first=C.registerId(I.C.ST 8)) + + val availF8 = C.Regs C.FP {from=0, to=6, step=1} + val KF8 = length availF8 + structure FR8 = GetReg(val nRegs=KF8 + val available=map C.registerId availF8 + val first=C.registerId(I.C.ST 0)) + + (* ------------------------------------------------------------------- + * Callbacks for floating point K=32 + * -------------------------------------------------------------------*) + fun copyInstrF((rds as [_], rss as [_]), _) = + I.FCOPY{dst=rds, src=rss, tmp=NONE} + | copyInstrF((rds, rss), I.FCOPY{tmp, ...}) = + I.FCOPY{dst=rds, src=rss, tmp=tmp} + | copyInstrF(x, I.ANNOTATION{i,a}) = + I.ANNOTATION{i=copyInstrF(x, i), a=a} + + val copyInstrF = fn x => [copyInstrF x] + + fun getFregLoc(an, Ra.FRAME loc) = Float.spillLoc(an, loc) + | getFregLoc(an, Ra.MEM_REG r) = I.FDirect r + + (* spill floating point *) + fun spillF{instr, reg, spillLoc, kill, annotations=an} = + (floatSpillCnt := !floatSpillCnt + 1; + X86Spill.fspill(instr, reg, getFregLoc(an, spillLoc)) + ) + + fun spillFreg{src, reg, spillLoc, annotations=an} = + (floatSpillCnt := !floatSpillCnt + 1; + [I.FLDL(I.FDirect(src)), I.FSTPL(getFregLoc(an, spillLoc))] + ) + + fun spillFcopyTmp{copy=I.FCOPY{dst, src, ...}, spillLoc, + annotations=an} = + (floatSpillCnt := !floatSpillCnt + 1; + I.FCOPY{dst=dst, src=src, tmp=SOME(getFregLoc(an, spillLoc))} + ) + | spillFcopyTmp{copy=I.ANNOTATION{i,a}, spillLoc, annotations} = + let val i = spillFcopyTmp{copy=i, spillLoc=spillLoc, + annotations=annotations} + in I.ANNOTATION{i=i, a=a} end + + (* rename floating point *) + fun renameF{instr, fromSrc, toSrc} = + (floatRenameCnt := !floatRenameCnt + 1; + X86Spill.freload(instr, fromSrc, I.FDirect toSrc) + ) + + (* reload floating point *) + fun reloadF{instr, reg, spillLoc, annotations=an} = + (floatReloadCnt := !floatReloadCnt + 1; + X86Spill.freload(instr, reg, getFregLoc(an, spillLoc)) + ) + + fun reloadFreg{dst, reg, spillLoc, annotations=an} = + (floatReloadCnt := !floatReloadCnt + 1; + [I.FLDL(getFregLoc(an, spillLoc)), I.FSTPL(I.FDirect dst)] + ) + + (* ------------------------------------------------------------------- + * Callbacks for floating point K=7 + * -------------------------------------------------------------------*) + fun FMemReg f = let val fx = C.registerNum f + in if fx >= 8 andalso fx < 32 + then I.FDirect f else I.FPR f + end + + fun copyInstrF'((rds as [d], rss as [s]), _) = + I.FMOVE{fsize=I.FP64,src=FMemReg s,dst=FMemReg d} + | copyInstrF'((rds, rss), I.FCOPY{tmp, ...}) = + I.FCOPY{dst=rds, src=rss, tmp=tmp} + | copyInstrF'(x, I.ANNOTATION{i, a}) = + I.ANNOTATION{i=copyInstrF'(x,i), a=a} + + val copyInstrF' = fn x => [copyInstrF' x] + + fun spillFreg'{src, reg, spillLoc, annotations=an} = + (floatSpillCnt := !floatSpillCnt + 1; + [I.FMOVE{fsize=I.FP64, src=FMemReg src, dst=getFregLoc(an,spillLoc)}] + ) + + fun renameF'{instr, fromSrc, toSrc} = + (floatRenameCnt := !floatRenameCnt + 1; + X86Spill.freload(instr, fromSrc, I.FPR toSrc) + ) + + fun reloadFreg'{dst, reg, spillLoc, annotations=an} = + (floatReloadCnt := !floatReloadCnt + 1; + [I.FMOVE{fsize=I.FP64, dst=FMemReg dst, src=getFregLoc(an,spillLoc)}] + ) + + (* ------------------------------------------------------------------- + * Integer 8 stuff + * -------------------------------------------------------------------*) + fun memToMemMove{dst, src} = + let val tmp = I.C.newReg() + in [I.MOVE{mvOp=I.MOVL,src=src,dst=I.Direct tmp}, + I.MOVE{mvOp=I.MOVL,src=I.Direct tmp,dst=dst} + ] + end + + fun copyInstrR((rds as [d], rss as [s]), _) = + if C.sameColor(d,s) then [] else + let val dx = C.registerNum d and sx = C.registerNum s + in case (dx >= 8 andalso dx < 32, sx >= 8 andalso sx < 32) of + (false, false) => [I.COPY{dst=rds, src=rss, tmp=NONE}] + | (true, false) => [I.MOVE{mvOp=I.MOVL,src=I.Direct s, + dst=I.MemReg d}] + | (false, true) => [I.MOVE{mvOp=I.MOVL,src=I.MemReg s, + dst=I.Direct d}] + | (true, true) => memToMemMove{src=I.MemReg s, dst=I.MemReg d} + end + | copyInstrR((rds, rss), I.COPY{tmp, ...}) = + [I.COPY{dst=rds, src=rss, tmp=tmp}] + | copyInstrR(x, I.ANNOTATION{i, a}) = + copyInstrR(x, i) (* XXX *) + + + fun getRegLoc(an, Ra.FRAME loc) = Int.spillLoc(an, loc) + | getRegLoc(an, Ra.MEM_REG r) = I.MemReg r + + (* No, logical spill locations... *) + + structure GR8 = GetReg(val nRegs=8 + val available=map C.registerId Int.avail + val first=0) + + val K8 = length Int.avail + + (* register allocation for general purpose registers *) + fun spillR8{instr, reg, spillLoc, kill, annotations=an} = + (intSpillCnt := !intSpillCnt + 1; + X86Spill.spill(instr, reg, getRegLoc(an, spillLoc)) + ) + + fun isMemReg r = let val x = C.registerNum r + in x >= 8 andalso x < 32 end + + fun spillReg{src, reg, spillLoc, annotations=an} = + let val _ = intSpillCnt := !intSpillCnt + 1; + val dstLoc = getRegLoc(an,spillLoc) + val isMemReg = isMemReg src + val srcLoc = if isMemReg then I.MemReg src else I.Direct src + in if InsnProps.eqOpn(srcLoc, dstLoc) then [] + else if isMemReg then memToMemMove{dst=dstLoc, src=srcLoc} + else [I.MOVE{mvOp=I.MOVL, src=srcLoc, dst=dstLoc}] + end + + fun spillCopyTmp{copy=I.COPY{src, dst,...}, spillLoc, annotations=an} = + (intSpillCnt := !intSpillCnt + 1; + I.COPY{dst=dst, src=src, tmp=SOME(getRegLoc(an, spillLoc))} + ) + + fun renameR8{instr, fromSrc, toSrc} = + (intRenameCnt := !intRenameCnt + 1; + X86Spill.reload(instr, fromSrc, I.Direct toSrc) + ) + + fun reloadR8{instr, reg, spillLoc, annotations=an} = + (intReloadCnt := !intReloadCnt + 1; + X86Spill.reload(instr, reg, getRegLoc(an,spillLoc)) + ) + + fun reloadReg{dst, reg, spillLoc, annotations=an} = + let val _ = intReloadCnt := !intReloadCnt + 1 + val srcLoc = getRegLoc(an, spillLoc) + val isMemReg = isMemReg dst + val dstLoc = if isMemReg then I.MemReg dst else I.Direct dst + in if InsnProps.eqOpn(srcLoc,dstLoc) then [] + else if isMemReg then memToMemMove{dst=dstLoc, src=srcLoc} + else [I.MOVE{mvOp=I.MOVL, src=srcLoc, dst=dstLoc}] + end + + fun resetRA() = + (firstSpill := true; + firstFPSpill := true; + IntHashTable.clear affectedBlocks; + IntHashTable.clear deadRegs; + if !fast_floating_point then FR8.reset() else FR32.reset(); + GR8.reset() + ) + + (* Dedicated + available registers *) + fun mark(a, l) = app (fn r => Array.update(a, C.registerId r, true)) l + + val dedicatedR = Array.array(32,false) + val dedicatedF32 = Array.array(64,false) + val dedicatedF8 = Array.array(64,false) + val _ = mark(dedicatedR, Int.dedicated) + val _ = mark(dedicatedF32, Float.dedicated) + + + fun phases ps = + let fun f([], m) = m + | f(SPILL_PROPAGATION::ps, m) = f(ps, Ra.SPILL_PROPAGATION+m) + | f(SPILL_COLORING::ps, m) = f(ps, Ra.SPILL_COLORING+m) + in f(ps, Ra.NO_OPTIMIZATION) + end + + (* RA parameters *) + + (* How to allocate integer registers: + * Perform register alocation + memory allocation + *) + val RAInt = {spill = spillR8, + spillSrc = spillReg, + spillCopyTmp= spillCopyTmp, + reload = reloadR8, + reloadDst = reloadReg, + renameSrc = renameR8, + copyInstr = copyInstrR, + K = K8, + getreg = GR8.getreg, + cellkind = I.C.GP, + dedicated = dedicatedR, + spillProh = [], + memRegs = Int.memRegs, + mode = phases(Int.phases) + } : Ra.raClient + + (* How to allocate floating point registers: + * Allocate all fp registers on the stack. This is the easy way. + *) + val RAFP32 ={spill = spillF, + spillSrc = spillFreg, + spillCopyTmp= spillFcopyTmp, + reload = reloadF, + reloadDst = reloadFreg, + renameSrc = renameF, + copyInstr = copyInstrF, + K = KF32, + getreg = FR32.getreg, + cellkind = I.C.FP, + dedicated = dedicatedF32, + spillProh = [], + memRegs = Float.memRegs, + mode = phases(Float.phases) + } : Ra.raClient + + (* How to allocate floating point registers: + * Allocate fp registers on the %st stack. Also perform + * memory allcoation. + *) + val RAFP8 ={spill = spillF, + spillSrc = spillFreg', + spillCopyTmp= spillFcopyTmp, + reload = reloadF, + reloadDst = reloadFreg', + renameSrc = renameF', + copyInstr = copyInstrF', + K = KF8, + getreg = FR8.getreg, + cellkind = I.C.FP, + dedicated = dedicatedF8, + spillProh = [], + memRegs = Float.fastMemRegs, + mode = phases(Float.fastPhases) + } : Ra.raClient + + (* Two RA modes, fast and normal *) + val fast_fp = [RAInt, RAFP8] + val normal_fp = [RAInt, RAFP32] + + (* The main ra routine *) + fun run cluster = + let val printGraph = + if !x86CfgDebugFlg then + PrintFlowGraph.printCluster(!MLRiscControl.debug_stream) + else fn msg => fn _ => () + + val _ = beforeRA() + val _ = resetRA() + + (* generic register allocator *) + + val cluster = Ra.ra + (if !fast_floating_point then fast_fp else normal_fp) + cluster + + val _ = removeDeadCode cluster + + val _ = printGraph "\t---After register allocation K=8---\n" cluster + + (* Run the FP translation phase when fast floating point has + * been enabled + *) + val cluster = + if !fast_floating_point andalso I.C.numCell I.C.FP () > 0 then + let val cluster = X86FP.run cluster + in printGraph "\t---After X86 FP translation ---\n" cluster; + cluster + end + else cluster + in cluster + end + +end diff --git a/MLRISC/x86/omit-frameptr/x86omit-frameptr.sml b/MLRISC/x86/omit-frameptr/x86omit-frameptr.sml new file mode 100644 index 0000000..f8d5bce --- /dev/null +++ b/MLRISC/x86/omit-frameptr/x86omit-frameptr.sml @@ -0,0 +1,385 @@ +(* x86omit-frameptr.sml + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * replaces uses and definitions of a virtual frame pointer (vfp) with the appropriate + * operation on the stack pointer. + * + * Invariant: fp = sp + delta && stack grows from high to low && fp >= sp + * + * Assumptions: At the entry node fp = sp + idelta + * + * The tricky business is to recognize that things that look like register may + * really be memory registers. + *) + +functor X86OmitFramePointer ( + structure I : X86INSTR + structure CFG : CONTROL_FLOW_GRAPH where I = I + structure MemRegs : MEMORY_REGISTERS where I=I + val memRegBase : CellsBasis.cell option): OMIT_FRAME_POINTER = +struct + structure CFG = CFG + structure I = I + structure C = I.C + structure CB = CellsBasis + structure HT = IntHashTable + val sp = C.esp + + val dumpCfg = MLRiscControl.mkFlag ("dump-cfg-after-omit-frame-pointer", + "whether CFG is shown after omit-framepointer phase") + + fun error msg = MLRiscErrorMsg.error("X86OmitFramePointer", msg) + + fun omitframeptr{vfp:CB.cell, idelta:Int32.int option, cfg as Graph.GRAPH graph} = let + + (* rewrite a list of instructions where the gap between fp and sp is delta *) + fun rewrite(instrs, idelta) = let + + (* What kind of register? *) + datatype which = SP | FP | OTHER + fun isSp cell = CB.sameColor(cell, sp) + fun isVfp cell = CB.sameColor(cell, vfp) + fun which(cell) = if isSp(cell) then SP else if isVfp(cell) then FP else OTHER + fun either(cell) = isSp(cell) orelse isVfp(cell) + + + (* Has the instruction been rewritten? *) + val changedFlag = ref false + + + (* + * rewrite a single instruction assuming gap (fp=sp+delta) + * returns NONE is instruction is deleted and SOME(instruction) otherwise. + *) + + fun doInstr(instr, delta:Int32.int option) = let + (* if a delta exists then add to it, + * otherwise maintain that there is no delta + *) + fun addToDelta i = + (case delta + of SOME d => SOME(i+d) + | NONE => NONE + (*esac*)) + + fun incOffset(i) = + (case delta + of NONE => error "incOffset" + | SOME k => k+i + (*esac*)) + + fun incDisp(I.Immed i) = I.Immed(incOffset(i)) + | incDisp _ = error "incDisp" (* CONSTANTS? *) + + fun operand(opnd as I.Displace{base, disp, mem}) = + if isVfp base then + (changedFlag := true; + I.Displace{base=sp, mem=mem, disp=incDisp(disp)}) + else opnd + | operand(opnd as I.Indexed{base, index, scale, disp, mem}) = + if isVfp index then + error "operand: frame pointer used in index" + else (case base + of NONE => opnd + | SOME b => + if isVfp b then + (changedFlag := true; + I.Indexed{base=SOME(sp), index=index, scale=scale, mem=mem, + disp=incDisp(disp)}) + else opnd + (*esac*)) + | operand(opnd as I.MemReg _) = + operand(MemRegs.memReg{reg=opnd, base=Option.valOf memRegBase}) + | operand(opnd as I.FDirect _) = + operand(MemRegs.memReg{reg=opnd, base=Option.valOf memRegBase}) + | operand(opnd) = opnd + + + fun annotate(i, k:Int32.int option) = let + val instr = + if !changedFlag then + (changedFlag := false; + case k + of NONE => i + | SOME d => + if d <> 0 then let + val cmt = "offset adjusted to " ^ Int32.toString d + val ann = #create MLRiscAnnotations.COMMENT cmt + in I.ANNOTATION{i=i, a=ann} + end + else i + (*esac*)) + else i + in (SOME(instr),k) + end + + fun unchanged(i:I.instr) = annotate(I.INSTR i, delta) + fun changedto(i, k) = annotate(I.INSTR i, k) + + fun compare(test, lsrc, rsrc) = unchanged(test{lsrc=operand(lsrc), rsrc=operand(rsrc)}) + fun float(oper, opnd) = unchanged(oper(operand(opnd))) + + fun doX86Instr (instr: I.instr) = + (case instr + of I.JMP(opnd,labs) => unchanged(I.JMP(operand opnd, labs)) + | I.JCC{cond:I.cond, opnd:I.operand} => + unchanged(I.JCC{cond=cond, opnd=operand(opnd)}) + | I.CALL{opnd, defs, uses, cutsTo, mem, return, pops=0} => + unchanged(I.CALL{opnd=operand(opnd), defs=defs, uses=uses, + cutsTo=cutsTo, mem=mem, pops=0, + return=return}) + | I.CALL{opnd, defs, uses, cutsTo, mem, return, pops} => + changedto(I.CALL{opnd=operand(opnd), defs=defs, uses=uses, + cutsTo=cutsTo, mem=mem, pops=pops, + return=return}, + addToDelta(~pops)) + | I.ENTER{src1=I.Immed i1, src2=I.Immed i2} => changedto(instr, addToDelta(i1 + i2*4)) + | I.LEAVE => (SOME(I.INSTR instr), NONE) + | I.RET opnd => (SOME(I.INSTR instr), NONE) + | I.MOVE{mvOp:I.move, src=I.Direct s, dst=I.Direct d} => + (case (which d, which s) + of (FP, SP) => (NONE, SOME 0) + | (SP, FP) => (case delta + of NONE => error "MOVE: (SP, FP)" + | SOME 0 => (NONE, SOME 0) + | SOME n => let + val addr = I.Displace{base=sp, disp=I.Immed(n), mem=I.Region.stack} + in + (SOME(I.lea{r32=sp, addr=addr}), SOME 0) + end + (*esac*)) + | (OTHER, OTHER) => unchanged(instr) + | (FP, FP) => (NONE, delta) + | (SP, SP) => (NONE, delta) + | (FP, _) => error "MOVE: to FP" + | (SP, _) => error "MOVE: to SP" + | (OTHER, SP) => unchanged(instr) + | (OTHER, FP) => error "MOVE: FP to OTHER" (* d:=sp+delta; lazy!*) + (*esac*)) + | I.MOVE{mvOp, src, dst as I.Direct d} => + if either(d) then error "MOVE: assignment to FP/SP" + else unchanged(I.MOVE{mvOp=mvOp, src=operand(src), dst=dst}) + | I.MOVE{mvOp, src, dst} => + unchanged(I.MOVE{mvOp=mvOp, src=operand(src), dst=operand(dst)}) + | I.LEA{r32:CB.cell, addr as I.Displace{base, disp=I.Immed d, mem}} => + (case (which r32, which base) + of (SP, SP) => + (* assumes stack grows from high to low. + * if sp is incremented by a positive delta, then the gap is + * reduced by delta-d; + * if sp is decremented, the the gap is increased and d is negative. + *) + changedto(instr, addToDelta(~d)) + | (SP, FP) => + (* sp = fp + d + * or sp = sp + delta + d + *) + changedto(I.LEA{r32=r32, addr=operand(addr)}, SOME(incOffset(d))) + | (FP, FP) => + (* fp = fp + d + * if d is positive, then the gap is increased to delta+d, + * if d is negative, then the gap is reduced. + *) + (NONE, SOME(incOffset(d))) + | (FP, SP) => (NONE, addToDelta(d)) + | (SP, OTHER) => error "LEA: sp changed by non-immed" + | (FP, OTHER) => error "LEA: fp changed by non-immed" + | _ => unchanged(instr) + (*esac*)) + | I.LEA{r32, addr} => + if either(r32) then error "LEA: SP/FP changed by non-immed" + else unchanged(I.LEA{r32=r32, addr=operand(addr)}) + | I.CMPL{lsrc: I.operand, rsrc: I.operand} => compare(I.CMPL, lsrc, rsrc) + | I.CMPW{lsrc: I.operand, rsrc: I.operand} => compare(I.CMPW, lsrc, rsrc) + | I.CMPB{lsrc: I.operand, rsrc: I.operand} => compare(I.CMPB, lsrc, rsrc) + | I.TESTL{lsrc: I.operand, rsrc: I.operand} => compare(I.TESTL, lsrc, rsrc) + | I.TESTW{lsrc: I.operand, rsrc: I.operand} => compare(I.TESTW, lsrc, rsrc) + | I.TESTB{lsrc: I.operand, rsrc: I.operand} => compare(I.TESTB, lsrc, rsrc) + | I.BITOP{bitOp:I.bitOp, lsrc: I.operand, rsrc: I.operand} => + unchanged(I.BITOP{bitOp=bitOp, lsrc=operand(lsrc), rsrc=operand(rsrc)}) + | I.BINARY{binOp=I.ADDL, src=I.Immed(k), dst=I.Direct(d)} => + (case which d + of SP => changedto(instr, addToDelta(~k)) + | FP => (NONE, SOME(incOffset(k))) + | OTHER => unchanged(instr) + (*esac*)) + | I.BINARY{binOp=I.SUBL, src=I.Immed(k), dst=I.Direct(d)} => + (case which d + of SP => changedto(instr, addToDelta(k)) + | FP => (NONE, SOME(incOffset(~k))) + | OTHER => unchanged(instr) + (*esac*)) + | I.BINARY{binOp, dst as I.Direct(d), src} => + if either(d) then error "binary: assignment to SP | FP" + else unchanged(I.BINARY{binOp=binOp, src=operand(src), dst=dst}) + | I.BINARY{binOp, src, dst} => + unchanged(I.BINARY{binOp=binOp, src=operand(src), dst=operand(dst)}) + | I.CMPXCHG{lock:bool, sz:I.isize, src:I.operand, dst:I.operand} => + unchanged(I.CMPXCHG{lock=lock, sz=sz, src=operand(src), dst=operand(dst)}) + | I.MULTDIV{multDivOp:I.multDivOp, src:I.operand} => + unchanged(I.MULTDIV{multDivOp=multDivOp, src=operand(src)}) + | I.MUL3{dst:CB.cell, src2:Int32.int, src1:I.operand} => + if either(dst) then error "MUL3: assignment to FP/SP" + else unchanged(I.MUL3{dst=dst, src2=src2, src1=operand(src1)}) + | I.UNARY{unOp=I.INCL, opnd as I.Direct(r)} => + (case (which r) + of SP => changedto(instr, addToDelta(~1)) + | FP => (NONE, SOME(incOffset(1))) + | OTHER => unchanged(I.UNARY{unOp=I.INCL, opnd=opnd}) + (*esac*)) + | I.UNARY{unOp=I.DECL, opnd as I.Direct(r)} => + (case (which r) + of SP => changedto(instr, addToDelta(1)) + | FP => (NONE, SOME(incOffset(~1))) + | OTHER => unchanged(I.UNARY{unOp=I.DECL, opnd=opnd}) + (*esac*)) + | I.UNARY{unOp, opnd} => unchanged(I.UNARY{unOp=unOp, opnd=operand(opnd)}) + | I.SET{cond:I.cond, opnd:I.operand} => + unchanged(I.SET{cond=cond, opnd=operand(opnd)}) + | I.CMOV{cond:I.cond, src as I.Direct(s), dst:CB.cell} => + if either(s) orelse either(dst) then + error "CMOV: FP/SP in conditional move" + else unchanged(I.CMOV{cond=cond, src=operand(src), dst=dst}) + | I.PUSHL opnd => changedto(I.PUSHL(operand(opnd)), addToDelta(4)) + | I.PUSHW opnd => changedto(I.PUSHW(operand(opnd)), addToDelta(2)) + | I.PUSHB opnd => changedto(I.PUSHB(operand(opnd)), addToDelta(1)) + | I.POP opnd => changedto(I.POP(operand(opnd)), addToDelta(~4)) + | I.FBINARY{binOp:I.fbinOp, src:I.operand, dst:I.operand} => + unchanged(I.FBINARY{binOp=binOp, src=operand(src), dst=operand(dst)}) + | I.FIBINARY{binOp:I.fibinOp, src:I.operand} => + unchanged(I.FIBINARY{binOp=binOp, src=operand(src)}) + | I.FUCOM opnd => unchanged(I.FUCOM(operand opnd)) + | I.FUCOMP opnd => unchanged(I.FUCOMP(operand (opnd))) + | I.FCOMI opnd => unchanged(I.FCOMI(operand opnd)) + | I.FCOMIP opnd => unchanged(I.FCOMIP(operand (opnd))) + | I.FUCOMI opnd => unchanged(I.FUCOMI(operand opnd)) + | I.FUCOMIP opnd => unchanged(I.FUCOMIP(operand (opnd))) + | I.FSTPL opnd => float(I.FSTPL, opnd) + | I.FSTPS opnd => float(I.FSTPS, opnd) + | I.FSTPT opnd => float(I.FSTPT, opnd) + | I.FSTL opnd => float(I.FSTL, opnd) + | I.FSTS opnd => float(I.FSTS, opnd) + | I.FLDL opnd => float(I.FLDL, opnd) + | I.FLDS opnd => float(I.FLDS, opnd) + | I.FLDT opnd => float(I.FLDT, opnd) + | I.FILD opnd => float(I.FILD, opnd) + | I.FILDL opnd => float(I.FILDLL, opnd) + | I.FILDLL opnd => float(I.FILDLL, opnd) + | I.FENV{fenvOp:I.fenvOp, opnd:I.operand} => + unchanged(I.FENV{fenvOp=fenvOp, opnd=operand(opnd)}) + | I.FMOVE{fsize:I.fsize, src:I.operand, dst:I.operand} => + unchanged(I.FMOVE{fsize=fsize, src=operand(src), dst=operand(dst)}) + | I.FILOAD{isize:I.isize, ea:I.operand, dst:I.operand} => + unchanged(I.FILOAD{isize=isize, ea=operand(ea), dst=operand(dst)}) + | I.FBINOP{fsize, binOp, lsrc, rsrc, dst} => + unchanged(I.FBINOP{fsize=fsize, binOp=binOp, lsrc=operand(lsrc), + rsrc=operand(rsrc), dst=operand(dst)}) + | I.FIBINOP{isize, binOp, lsrc, rsrc, dst} => + unchanged(I.FIBINOP{isize=isize, binOp=binOp, lsrc=operand(lsrc), + rsrc=operand(rsrc), dst=operand(dst)}) + | I.FUNOP{fsize:I.fsize, unOp:I.funOp, src:I.operand, dst:I.operand} => + unchanged(I.FUNOP{fsize=fsize, unOp=unOp, src=operand(src), + dst=operand(dst)}) + | I.FCMP{i,fsize:I.fsize, lsrc:I.operand, rsrc:I.operand} => + unchanged(I.FCMP{i=i,fsize=fsize, lsrc=operand(lsrc), rsrc=operand(rsrc)}) + | _ => unchanged(instr) + (*esac*)) + in + case instr + of I.ANNOTATION{i,a} => let + val (instr, delta) = doInstr(i, delta) + in + case instr + of NONE => (NONE, delta) + | SOME(i) => annotate(I.ANNOTATION{i=i, a=a}, delta) + end + | I.COPY{k=CB.GP, dst, src, ...} => let + (* the situation where SP <- FP is somewhat complicated. + * The copy must be extracted, and a lea generated. + * Should it be before or after the parallel copy? Depends on if SP is used. + * However, will such a thing ever exist in a parallel copy!? + *) + fun okay(s, d, acc) = + (case (which s, which d) + of (FP, SP) => true + | (SP, FP) => error "COPY:SP<-FP; lazy!" + | (SP, OTHER) => error "COPY:SP<-OTHER" + | (FP, OTHER) => error "COPY:FP<-OTHER" + | (OTHER, SP) => error "COPY:OTHER<-SP" + | (OTHER, FP) => error "COPY:OTHER<-FP" + | _ => acc + (*esac*)) + in annotate(instr, if ListPair.foldl okay false (dst, src) then SOME 0 else delta) + end + | I.INSTR instr => doX86Instr instr + | _ => annotate(instr, delta) (* unchanged *) + end (*doInstr*) + + (* rewrite instructions *) + fun doInstrs([], instrs, delta) = (instrs, delta) + | doInstrs(instr::rest, acc, delta) = let + val (instr, delta2) = doInstr(instr, delta) + in + case instr + of NONE => doInstrs(rest, acc, delta2) + | SOME(i) => doInstrs(rest, i::acc, delta2) + end + + + in doInstrs(instrs, [], idelta) + end (* rewrite *) + + + + + (* rewrite blocks using a depth first traversal of the blocks *) + val info : {visited:bool, delta: Int32.int option} HT.hash_table = + HT.mkTable(32, General.Fail "X86OmitFramePtr: Not Found") + val noInfo = {visited=false, delta=NONE} + + fun dfs (nid, delta) = let + fun doSucc(delta) = + app (fn snid => dfs(snid, delta)) (#succ graph nid) + val CFG.BLOCK{insns, kind, ...} = #node_info graph nid + in + case kind + of CFG.STOP => () + | CFG.START => doSucc(delta) + | CFG.NORMAL => let + val {visited, delta=d} = Option.getOpt(HT.find info nid, noInfo) + fun sameDelta(NONE, NONE) = true + | sameDelta(SOME i1: Int32.int option, SOME i2) = i1 = i2 + | sameDelta _ = false + in + if visited then (if sameDelta(d, delta) then () else error "dfs") + else let + val (instrs, delta2) = rewrite(rev(!insns), delta) + in + insns := instrs; + HT.insert info (nid, {visited=true, delta=delta}); + doSucc(delta2) + end + end + (*esac*) + end + + val CB.CELL{col, ...} = vfp + in + (* + * check that virtual frame pointer is a pseudo register or + * aliased to the stack pointer. + *) + case !col + of CB.PSEUDO => app (fn nid => dfs(nid, idelta)) (#entries graph ()) + | _ => error "virtual frame pointer not a pseudo register" + (*esac*) + + (* output cluster *) +(* if !dumpCfg then + PC.printCluster TextIO.stdOut "after omit frame pointer" cl + else () *) + end +end + diff --git a/MLRISC/x86/ra/x86PseudoR.sml b/MLRISC/x86/ra/x86PseudoR.sml new file mode 100644 index 0000000..7e38b5b --- /dev/null +++ b/MLRISC/x86/ra/x86PseudoR.sml @@ -0,0 +1,345 @@ +signature X86REWRITE_PSEUDO = sig + structure F : FLOWGRAPH + (* + * Takes a cluster and returns a range of registers to prohibit + * from spilling. The arguments are: + * 1. The first pseudo register + * 2. The regmap before RA32. If this is guaranteed to be + * an identity you can use the identity function. + * I use I.C.lookup regmap. + * + * 3. The cluster. + * + * It returns a range of registers. + * + * NOTE: this version does not assume that the original regmap + * is an identity. So there is some ugly regmap business to + * take care of. + * + *) + val rewrite : + { firstPseudo : F.I.C.cell, + originalRegmap : F.I.C.cell -> F.I.C.cell, + pruneCellSets : bool (* should we remove references to memory + * registers from all cell sets? + *) + } -> F.cluster -> F.I.C.cell * F.I.C.cell +end + + +functor X86RewritePseudo + (structure Instr : X86INSTR + structure Flowgraph : FLOWGRAPH where I = Instr + val ea : int -> Instr.operand) : X86REWRITE_PSEUDO = +struct + structure C = X86Cells + structure I = Instr + structure F = Flowgraph + + fun error msg = MLRiscErrorMsg.error("X86RewritePseudo",msg) + + fun rewrite {firstPseudo, originalRegmap, pruneCellSets} + (F.CLUSTER{blocks, regmap, ...}) = + let + val first = C.newReg() + val lookup = C.lookup regmap + fun shuffle(dests, srcs, tmp) = let + fun move(rd,rs) = I.MOVE{mvOp=I.MOVL, src=rs, dst=rd} + fun loop((p as (rd, dst, rs, src))::rest, changed, used, done, instrs) = + if List.exists (fn (r : I.C.cell) => dst=r) used then + loop(rest, changed, used, p::done, instrs) + else + loop(rest, true, used, done, move(I.Direct rd, I.Direct rs)::instrs) + | loop([], false, _, done, instrs) = (done, instrs) + | loop([], true, _, done, instrs) = + loop(done, false, map #4 done, [], instrs) + + fun cycle([], instrs) = instrs + | cycle(moves, instrs) = + (case loop(moves, false, map #4 moves, [], instrs) + of ([], instrs) => instrs + | ((rd, rd', rs, rs')::nonCyclic, instrs) => let + val SOME tmpR = tmp + val instrs' = move(tmpR, I.Direct rs)::instrs + val (cyclic, instrs'') = + loop(nonCyclic, false, map #4 nonCyclic, [], instrs') + in cycle(cyclic, move(I.Direct rd, Option.valOf tmp)::instrs'') + end + (*esac*)) + fun rmCoalesced([], [], remain, coalesced) = (remain, coalesced) + | rmCoalesced(rd::rds, rs::rss, remain, coalesced) = let + val dst = lookup rd + val src = lookup rs + in + if dst = ~1 then (* eliminate dead copies *) + rmCoalesced(rds, rss, remain, coalesced) + else if dst = src then + rmCoalesced(rds, rss, remain, + move(I.Direct rd, I.Direct rs)::coalesced) + else rmCoalesced(rds, rss, (rd, dst, rs, src)::remain, coalesced) + end + in rev (cycle (rmCoalesced(dests, srcs, [], []))) + end + + fun doBlock(F.BBLOCK{blknum, insns, liveOut, succ, ...}) = let + fun pseudoR r = (r >= 8 andalso r < firstPseudo) + fun resetLiveOut() = let + fun reset(gp, fp, cc) = + liveOut := (List.filter (not o pseudoR) gp, fp, cc) + in + case !succ + of [] => reset(!liveOut) + | [(F.EXIT _,_)] => reset(!liveOut) + | _ => () + end + + (* subst: hd(acc) is the last instruction in the stream. *) + fun subst(instr, acc) = let + fun mark(i,[]) = i + | mark(i,a::an) = mark(I.ANNOTATION{i=i,a=a},an) + + fun movl{src, dst, acc} = + I.MOVE{mvOp=I.MOVL, src=src, dst=dst}::acc + + fun displace(base, disp, acc, mem) = + let val base' = originalRegmap base + in if pseudoR base' then + let val tmpR = C.newReg() + val newDisp = I.Displace{base=tmpR, disp=disp, mem=mem} + in (newDisp, movl{src=ea base', dst=I.Direct tmpR, acc=acc}) + end + else (I.Displace{base=base, disp=disp, mem=mem}, acc) + end + + fun indexedEa(base, index, scale, disp, mem) = + I.Indexed{base=base, index=index, scale=scale, disp=disp, mem=mem} + + fun indexed(NONE, index, scale, disp, acc, mem) = + let val index' = originalRegmap index + in if pseudoR index' then + let val tmpR = C.newReg() + val newIndx = indexedEa(NONE, tmpR, scale, disp, mem) + in (newIndx,movl{src=ea index', dst=I.Direct tmpR, acc=acc}) + end + else (indexedEa(NONE, index, scale, disp, mem), acc) + end + | indexed(ba as SOME base, index, scale, disp, acc, mem) = + let val base' = originalRegmap base + val index' = originalRegmap index + val b = pseudoR base' + val i = pseudoR index' + in if b andalso i then + let val tmpB = C.newReg() + val tmpI = C.newReg() + val opnd = indexedEa(SOME tmpB, tmpI, scale, disp, mem) + in (opnd, movl{src=ea base', dst=I.Direct tmpB, + acc=movl{src=ea index', + dst=I.Direct tmpI, acc=acc}}) + end + else if b then let + val tmpB = C.newReg() + in (indexedEa(SOME tmpB, index, scale, disp, mem), + movl{src=ea base', dst=I.Direct tmpB, acc=acc}) + end + else if i then let + val tmpI = C.newReg() + in (indexedEa(ba, tmpI, scale, disp, mem), + movl{src=ea index', dst=I.Direct tmpI, acc=acc}) + end + else (indexedEa(ba, index, scale, disp, mem), acc) + + end + fun direct(r, acc) = + let val r' = originalRegmap r + in if pseudoR r' then (ea r', acc) else (I.Direct r, acc) + end + + fun operand(I.Direct r, acc) = direct(r, acc) + | operand(I.Indexed{base, index, scale, disp, mem}, acc) = + indexed(base, index, scale, disp, acc, mem) + | operand(I.Displace{base, disp, mem}, acc) = + displace(base, disp, acc, mem) + | operand arg = arg + + fun done(opnd, f, an) = + let val (opnd', acc') = operand(opnd, acc) + in mark(f opnd', an) :: acc' + end + + fun memArg(I.Displace _) = true + | memArg(I.Indexed _) = true + | memArg(I.MemReg _) = true + | memArg(I.LabelEA _) = true + | memArg _ = false + + fun withTmp f = + let val t = C.newReg() + in f t + end + + fun rewriteCmpTest(cmptest, lsrc, rsrc, an) = + let val (lsrcOpnd, acc1) = operand(lsrc, acc) + val (rsrcOpnd, acc2) = operand(rsrc, acc1) + in if memArg lsrcOpnd andalso memArg rsrcOpnd then + withTmp(fn t => + mark(cmptest{lsrc=I.Direct t, rsrc=rsrcOpnd},an):: + movl{src=lsrcOpnd, dst=I.Direct t, acc=acc2}) + else + mark(cmptest{lsrc=lsrcOpnd, rsrc=rsrcOpnd},an)::acc2 + end + + fun rewrite(instr,an) = + case instr + of I.JMP(opnd, labs) => done(opnd,fn opnd => I.JMP(opnd, labs),an) + | I.JCC{opnd, cond} => + done(opnd,fn opnd => I.JCC{opnd=opnd, cond=cond}, an) + | I.MOVE{src, dst, mvOp} => let + val (srcOpnd, acc1) = operand(src, acc) + val (dstOpnd, acc2) = operand(dst, acc1) + in + if memArg srcOpnd andalso memArg dstOpnd then + withTmp(fn t => + mark(I.MOVE{src=I.Direct t, dst=dstOpnd, mvOp=mvOp},an):: + movl{src=srcOpnd, dst=I.Direct t, acc=acc2}) + else + mark(I.MOVE{src=srcOpnd, dst=dstOpnd, mvOp=mvOp},an)::acc2 + end + | I.LEA{r32, addr} => let + val (srcOpnd, acc1) = operand(addr, acc) + val r32' = originalRegmap r32 + in + if pseudoR r32' then + withTmp(fn t => + movl{dst=ea r32', src=I.Direct t, + acc=mark(I.LEA{r32=t, addr=srcOpnd},an)::acc1}) + else mark(I.LEA{r32=r32, addr=srcOpnd},an)::acc1 + end + | I.CMPL{lsrc, rsrc} => rewriteCmpTest(I.CMPL, lsrc, rsrc, an) + | I.CMPW{lsrc, rsrc} => rewriteCmpTest(I.CMPW, lsrc, rsrc, an) + | I.CMPB{lsrc, rsrc} => rewriteCmpTest(I.CMPB, lsrc, rsrc, an) + | I.TESTL{lsrc, rsrc} => rewriteCmpTest(I.TESTL, lsrc, rsrc, an) + | I.TESTW{lsrc, rsrc} => rewriteCmpTest(I.TESTW, lsrc, rsrc, an) + | I.TESTB{lsrc, rsrc} => rewriteCmpTest(I.TESTB, lsrc, rsrc, an) + | I.BINARY{binOp, src, dst} => let + val (srcOpnd, acc1) = operand(src, acc) + val (dstOpnd, acc2) = operand(dst, acc1) + in + if memArg srcOpnd andalso memArg dstOpnd then + withTmp(fn t => + mark(I.BINARY{binOp=binOp,src=I.Direct t,dst=dstOpnd},an):: + movl{src=srcOpnd, dst=I.Direct t, acc=acc2}) + else + mark(I.BINARY{binOp=binOp,src=srcOpnd,dst=dstOpnd},an)::acc2 + end + | I.CALL(opnd,def,use,mem) => let + val (opnd1, acc1) = operand(opnd, acc) + fun cellset(gp, fp, cc) = + if pruneCellSets then + (List.filter (not o pseudoR) gp, fp, cc) + else + (gp, fp, cc) + in mark(I.CALL(opnd1, cellset def, cellset use, mem),an)::acc1 + end + | I.MULTDIV{multDivOp, src} => + done(src, + fn opnd => I.MULTDIV{multDivOp=multDivOp, src=opnd}, an) + | I.MUL3{dst, src1, src2} => let + val (src1Opnd, acc1) = operand(src1, acc) + val dst' = originalRegmap dst + in + if pseudoR dst' then + withTmp(fn t => + movl{dst=ea dst', src=I.Direct t, acc= + mark(I.MUL3{dst=t, src1=src1Opnd, src2=src2},an)::acc1}) + else mark(I.MUL3{dst=dst, src1=src1Opnd, src2=src2},an)::acc1 + end + | I.UNARY{unOp, opnd} => + done(opnd, fn opnd => I.UNARY{unOp=unOp, opnd=opnd}, an) + | I.SET{cond, opnd} => + done(opnd, fn opnd => I.SET{cond=cond, opnd=opnd}, an) + | I.PUSHL opnd => done(opnd, I.PUSHL, an) + | I.PUSHW opnd => done(opnd, I.PUSHW, an) + | I.PUSHB opnd => done(opnd, I.PUSHB, an) + | I.POP opnd => done(opnd, I.POP, an) + | I.CMOV{cond, src, dst} => + let val (srcOpnd, acc1) = operand(src, acc) + val dst' = originalRegmap dst + in if pseudoR dst then + withTmp(fn t => + movl{dst=ea dst', src=I.Direct t, acc= + mark(I.CMOV{cond=cond, dst=t, src=srcOpnd},an):: + acc1}) + else + mark(I.CMOV{cond=cond, dst=dst, src=srcOpnd},an)::acc1 + end + | I.COPY{dst, src, tmp} => let + (* Note: + * Parallel copies are not allowed after this point. + * Consider: + * (r8, r9, edx) <- (566, 567, 560) + * + * RA32 may well decide to allocate 560 to r8. + * After the rewrite we will get: + * + * mem[r8] <- 566 + * mem[r9] <- 567 + * edx <- 560 + * + * If 560 should spill, we all of a sudden have the + * incorrect value being read from the spill location. + *) + fun f((instr as I.MOVE{mvOp, src, dst})::rest, acc) = + (case (src, dst) + of (I.Direct s, I.Direct d) => + let val d' = originalRegmap d + val s' = originalRegmap s + in if s'=d' then f(rest, acc) + else if pseudoR d' andalso pseudoR s' then + f(rest, withTmp(fn t => + (movl{src=I.Direct t, dst=ea d', + acc=movl{src=ea s', + dst=I.Direct t, acc=acc}}))) + else if pseudoR d' then + f(rest, withTmp(fn t => + (movl{src=I.Direct s, dst=ea d', acc=acc}))) + else if pseudoR s' then + f(rest, withTmp(fn t => + (movl{src=ea s', dst=I.Direct d, acc=acc}))) + else f(rest,I.COPY{src=[s], dst=[d],tmp=NONE}::acc) + end + + | _ => f(rest, instr::acc) + (*esac*)) + + | f([], acc) = acc + in f(shuffle (dst, src, tmp), acc) + end + | I.FSTPT opnd => done(opnd, I.FSTPT, an) + | I.FSTPL opnd => done(opnd, I.FSTPL, an) + | I.FSTPS opnd => done(opnd, I.FSTPS, an) + | I.FSTL opnd => done(opnd, I.FSTL, an) + | I.FSTS opnd => done(opnd, I.FSTS, an) + | I.FLDT opnd => done(opnd, I.FLDT, an) + | I.FLDL opnd => done(opnd, I.FLDL, an) + | I.FLDS opnd => done(opnd, I.FLDS, an) + | I.FILD opnd => done(opnd, I.FILD, an) + | I.FILDL opnd => done(opnd, I.FILDL, an) + | I.FILDLL opnd => done(opnd, I.FILDLL, an) + | I.FENV{fenvOp, opnd} => done(opnd, + fn opnd => I.FENV{fenvOp=fenvOp,opnd=opnd}, an) + | I.FBINARY{src,dst,binOp} => + done(src, + fn opnd => I.FBINARY{binOp=binOp, src=opnd, dst=dst},an) + | I.FIBINARY{src,binOp} => + done(src, fn opnd => I.FIBINARY{binOp=binOp, src=opnd},an) + | I.ANNOTATION{i,a} => rewrite(i,a::an) + | _ => mark(instr,an)::acc + in rewrite(instr,[]) + end (* subst *) + in insns := List.foldl subst [] (rev(!insns)); + if pruneCellSets then resetLiveOut() else () + end (*doBlock*) + | doBlock _ = () + in app doBlock blocks; (first, C.newReg()) + end (* rewrite *) +end diff --git a/MLRISC/x86/ra/x86RA.sml b/MLRISC/x86/ra/x86RA.sml new file mode 100644 index 0000000..806296f --- /dev/null +++ b/MLRISC/x86/ra/x86RA.sml @@ -0,0 +1,727 @@ +(* + * X86 specific register allocator. + * This module abstracts out all the nasty RA business on the x86. + * So you should only have to write the callbacks. + * + * Here's more some info on the x86 functor. + *Basically the new functor encapsulates all the features in the + *x86 register allocator, including things like memory pseudo registers, + *and the new floating point allocator that maps things onto the %st registers. + *For floating point, we can also switch between the sethi-ullman mode and + *the %st register mode. + * + * Notes on the parameters of the functor: + * + *> structure SpillHeur : RA_SPILL_HEURISTICS + * + * This should be one of the spill heuristic module like ChaitinSpillHeur or + * Command ('i' to return to index): you can also roll your own. + * + *> structure Spill : RA_SPILL + * + * This should be either RASpill or RASpillWithRenaming. + * + *> val fast_floating_point : bool ref + * + * This flag is used to turn on the new x86 fp mode. The same flag + * is also passed to the x86 instruction selection module. + * + *> datatype raPhase = SPILL_PROPAGATION | SPILL_COLORING + * + * This datatype specifies which additional phases we should run. + * + *> val beforeRA : flowgraph -> spill_info + * + * This callback is invoked before each call to RA. The RA may have + * to perform both integer and floating point RA. This is called before + * integer RA. + * + * The callbacks for integer and floating point are separated into + * the substructures Int and Float. + * + *> structure Int : + *> sig + *> val avail : I.C.cell list + *> val dedicated : I.C.cell list + *> val memRegs : I.C.cell list + *> val phases : raPhase list + *> val spillLoc : spill_info * Annotations.annotations ref * + *> RAGraph.logical_spill_id -> I.operand + *> val spillInit : RAGraph.interferenceGraph -> unit + *> end + * + * avail is the list of registers available for allocation + * memRegs is the list of memory registers that may appear in the program + * phases is a list of additional RA phases. I recommend turning on + * everything: + * + * [SPILL_PROPAGATION, SPILL_COLORING] + * + * spillInit is called once before spilling occurs. + * + * spillLoc is a callback that maps logical_spill_ids into an x86 + * effective address. The list of allocations is from the block in which + * the spilled instruction occurs. The client should keep track of + * existing ids, and allocate a new effective address when a new id occurs. + * In general, the client should keep track of a single table of free + * spill space for both integer and floating point registers. + * + * Previously, the spill/reload routines have to do special things in the + * presence of memory registers, but that stuff is taken care of in the + * new module, so all spillLoc has to do is map logical_spill_ids into + * effective address. + * + *> structure Float : + *> sig + *> val avail : I.C.cell list + *> val dedicated : I.C.cell list + *> val memRegs : I.C.cell list + *> val phases : raPhase list + *> val spillLoc : spill_info * Annotations.annotations ref * + *> RAGraph.logical_spill_id -> I.operand + *> val spillInit : RAGraph.interferenceGraph -> unit + *> end + * + * For floating point, it is similar. + * + *> + *> val fastMemRegs : I.C.cell list + *> val fastPhases : raPhase list + * + * When fast_floating_point is turned on, we use different parameters: + * + * avail is set to [%st(0), ..., %st(6)] + * dedicated is set to [] + * memRegs is set to fastMemRegs + * + * In general, the flow of the module is like this: + * + * ra: + * call beforeRA() + * integer RA --- call Int.spillInit() once if spilling is needed + * floating fp RA --- call Real.spillInit() once if spilling is needed + * if !fast_floating_point then + * invoke the module X86FP to convert fake %fp registers + * into real %st registers + * endif + * + *) + +functor X86RA + ( structure I : X86INSTR + structure InsnProps : INSN_PROPERTIES + where I = I + structure CFG : CONTROL_FLOW_GRAPH + where I = I + structure Asm : INSTRUCTION_EMITTER + where I = I + and S.P = CFG.P + + (* Spilling heuristics determines which node should be spilled + * You can use Chaitin, ChowHenessey, or one of your own. + *) + structure SpillHeur : RA_SPILL_HEURISTICS + + (* The Spill module figures out the strategies for inserting + * spill code. You can use RASpill, or RASpillWithRenaming, + * or write your own if you are feeling adventurous. + *) + structure Spill : RA_SPILL where I = I + + + type spill_info (* user-defined abstract type *) + + (* Should we use allocate register on the floating point stack? + * Note that this flag must match the one passed to the code generator + * module. + *) + val fast_floating_point : bool ref + + datatype raPhase = SPILL_PROPAGATION + | SPILL_COLORING + + datatype spillOperandKind = SPILL_LOC | CONST_VAL + + (* Called before register allocation; perform your initialization here. *) + val beforeRA : CFG.cfg -> spill_info + + (* Integer register allocation parameters *) + structure Int : + sig + val avail : CellsBasis.cell list + val dedicated : CellsBasis.cell list + val memRegs : CellsBasis.cell list + val phases : raPhase list + + val spillLoc : {info:spill_info, + an :Annotations.annotations ref, + cell:CellsBasis.cell, (* spilled cell *) + id :RAGraph.logical_spill_id + } -> + { opnd: I.ea, + kind: spillOperandKind + } + + (* This function is called once before spilling begins *) + val spillInit : RAGraph.interferenceGraph -> unit + + end + + (* Floating point register allocation parameters *) + structure Float : + sig + (* Sethi-Ullman mode *) + val avail : CellsBasis.cell list + val dedicated : CellsBasis.cell list + val memRegs : CellsBasis.cell list + val phases : raPhase list + + val spillLoc : spill_info * Annotations.annotations ref * RAGraph.logical_spill_id + -> I.ea + + (* This function is called once before spilling begins *) + val spillInit : RAGraph.interferenceGraph -> unit + + (* When fast_floating_point is on, use these instead: *) + val fastMemRegs : CellsBasis.cell list + val fastPhases : raPhase list + end + + ) : CFG_OPTIMIZATION = +struct + + structure CFG = CFG + structure I = I + structure C = I.C + structure CB = CellsBasis + + val name = "X86RA" + + type flowgraph = CFG.cfg + + val intSpillCnt = MLRiscControl.mkCounter ("ra-int-spills", "RA int spill count") + val intReloadCnt = MLRiscControl.mkCounter ("ra-int-reloads", "RA int reload count") + val intRenameCnt = MLRiscControl.mkCounter ("ra-int-renames", "RA int rename count") + val floatSpillCnt = MLRiscControl.mkCounter ("ra-float-spills", "RA float spill count") + val floatReloadCnt = MLRiscControl.mkCounter ("ra-float-reloads", "RA float reload count") + val floatRenameCnt = MLRiscControl.mkCounter ("ra-float-renames", "RA float rename count") + + fun inc c = c := !c + 1 + + val x86CfgDebugFlg = MLRiscControl.mkFlag ("x86-cfg-debug", "x86 CFG debug mode") + + fun error msg = MLRiscErrorMsg.error("X86RA",msg) + +(* + val deadcode = MLRiscControl.getCounter "x86-dead-code" + val deadblocks = MLRiscControl.getCounter "x86-dead-blocks" + *) + + structure PrintFlowgraph= + PrintFlowgraph(structure CFG=CFG + structure Asm = Asm) + + structure X86FP = + X86FP(structure X86Instr = I + structure X86Props = InsnProps + structure Flowgraph = CFG + structure Liveness = Liveness(CFG) + structure Asm = Asm + ) + + structure X86SpillInstr = X86SpillInstr(structure Instr=I structure Props=InsnProps) + val spillFInstr = X86SpillInstr.spill CB.FP + val reloadFInstr = X86SpillInstr.reload CB.FP + val spillInstr = X86SpillInstr.spill CB.GP + val reloadInstr = X86SpillInstr.reload CB.GP + + fun annotate([], i) = i + | annotate(a::an, i) = annotate(an, I.ANNOTATION{a=a, i=i}) + + (* + * Dead code elimination + *) + exception X86DeadCode + val affectedBlocks = + IntHashTable.mkTable(32,X86DeadCode) : bool IntHashTable.hash_table + val deadRegs = + IntHashTable.mkTable(32,X86DeadCode) : bool IntHashTable.hash_table + + fun removeDeadCode(cfg as Graph.GRAPH graph) = let + val blocks = #nodes graph () + val find = IntHashTable.find deadRegs + fun isDead r = + case find (CB.cellId r) of + SOME _ => true + | NONE => false + fun isAffected i = getOpt (IntHashTable.find affectedBlocks i, false) + fun isDeadInstr(I.ANNOTATION{i, ...}) = isDeadInstr i + | isDeadInstr(I.INSTR(I.MOVE{dst=I.Direct rd, ...})) = isDead rd + | isDeadInstr(I.INSTR(I.MOVE{dst=I.MemReg rd, ...})) = isDead rd + | isDeadInstr(I.COPY{k=CB.GP, dst=[rd], ...}) = isDead rd + | isDeadInstr _ = false + fun scan [] = () + | scan((blknum, CFG.BLOCK{insns, ...})::rest) = + (if isAffected blknum then + ((* deadblocks := !deadblocks + 1; *) + insns := elim(!insns, []) + ) else (); + scan rest) + and elim([], code) = rev code + | elim(i::instrs, code) = + if isDeadInstr i then + ((* deadcode := !deadcode + 1; *) elim(instrs, code)) + else elim(instrs, i::code) + in if IntHashTable.numItems affectedBlocks > 0 then + (scan blocks; + IntHashTable.clear deadRegs; + IntHashTable.clear affectedBlocks) + else () + end + + (* This function finds out which pseudo memory registers are unused. + * Those that are unused are made available for spilling. + * The register allocator calls this function right before spilling + * a set of nodes. + *) + val firstSpill = ref true + val firstFPSpill = ref true + + fun spillInit(graph, CB.GP) = + if !firstSpill then (* only do this once! *) + (Int.spillInit graph; + firstSpill := false + ) + else () + | spillInit(graph, CB.FP) = + if !firstFPSpill then + (Float.spillInit graph; + firstFPSpill := false + ) + else () + | spillInit _ = error "spillInit" + + (* This is the generic register allocator *) + structure Ra = + RegisterAllocator + (SpillHeur) + (MemoryRA (* for memory coalescing *) + (RADeadCodeElim (* do the funky dead code elimination stuff *) + (ClusterRA + (structure Flowgraph = CFG + structure Asm = Asm + structure InsnProps = InsnProps + structure Spill = Spill + ) + ) + (fun cellkind CB.GP = true | cellkind _ = false + val deadRegs = deadRegs + val affectedBlocks = affectedBlocks + val spillInit = spillInit + ) + ) + ) + + + (* ------------------------------------------------------------------- + * Floating point stuff + * -------------------------------------------------------------------*) + val KF32 = length Float.avail + structure FR32 = GetReg(val nRegs=KF32 + val available=map CB.registerId Float.avail + val first=CB.registerId(I.C.ST 8)) + + val availF8 = C.Regs CB.FP {from=0, to=6, step=1} + val KF8 = length availF8 + structure FR8 = GetReg(val nRegs=KF8 + val available=map CB.registerId availF8 + val first=CB.registerId(I.C.ST 0)) + + (* ------------------------------------------------------------------- + * Callbacks for floating point K=32 + * -------------------------------------------------------------------*) + fun fcopy{dst, src, tmp} = + I.COPY{k=CB.FP, sz=64, dst=dst, src=src, tmp=tmp} + + fun copyInstrF((rds as [_], rss as [_]), _) = + fcopy{dst=rds, src=rss, tmp=NONE} + | copyInstrF((rds, rss), I.COPY{k=CB.FP, tmp, ...}) = + fcopy{dst=rds, src=rss, tmp=tmp} + | copyInstrF(x, I.ANNOTATION{i,a}) = + I.ANNOTATION{i=copyInstrF(x, i), a=a} + | copyInstrF _ = error "copyInstrF" + + val copyInstrF = fn x => [copyInstrF x] + + fun getFregLoc(S, an, Ra.FRAME loc) = Float.spillLoc(S, an, loc) + | getFregLoc(S, an, Ra.MEM_REG r) = I.FDirect r + + (* spill floating point *) + fun spillF S {annotations=an, kill, reg, spillLoc, instr} = let + (* preserve annotation on instruction *) + fun spill(instrAn, I.ANNOTATION{a, i}) = spill(a::instrAn, i) + | spill(instrAn, I.KILL{regs, spilled}) = + {code= + [annotate + (instrAn, + I.KILL {regs=C.rmvFreg(reg, regs), + spilled=C.addFreg(reg, spilled)})], + proh = [], + newReg=NONE} + | spill(instrAn, I.LIVE _) = error "spillF: LIVE" + | spill(_, I.COPY _) = error "spillF: COPY" + | spill(instrAn, I.INSTR _) = + (inc floatSpillCnt; + spillFInstr(instr, reg, getFregLoc(S, an, spillLoc))) + in spill([], instr) + end + + fun spillFreg S {src, reg, spillLoc, annotations=an} = + (inc floatSpillCnt; + let val fstp = [I.fstpl(getFregLoc(S, an, spillLoc))] + in if CB.sameColor(src,C.ST0) then fstp + else I.fldl(I.FDirect(src))::fstp + end + ) + + fun spillFcopyTmp S {copy=I.COPY{k=CB.FP, dst, src, ...}, spillLoc, reg, + annotations=an} = + (inc floatSpillCnt; + fcopy{dst=dst, src=src, tmp=SOME(getFregLoc(S, an, spillLoc))} + ) + | spillFcopyTmp S {copy=I.ANNOTATION{i,a}, spillLoc, reg, annotations} = + let val i = spillFcopyTmp S {copy=i, spillLoc=spillLoc, reg=reg, + annotations=annotations} + in I.ANNOTATION{i=i, a=a} end + | spillFcopyTmp _ _ = error "spillFcopyTmp" + + (* rename floating point *) + fun renameF{instr, fromSrc, toSrc} = + (inc floatRenameCnt; + reloadFInstr(instr, fromSrc, I.FDirect toSrc) + ) + + (* reload floating point *) + fun reloadF S {annotations=an,reg,spillLoc,instr} = let + fun reload(instrAn, I.ANNOTATION{a,i}) = reload(a::instrAn, i) + | reload(instrAn, I.LIVE{regs, spilled}) = + {code=[I.LIVE{regs=C.rmvFreg(reg, regs), spilled=C.addFreg(reg, spilled)}], + proh=[], + newReg=NONE} + | reload(_, I.KILL _) = error "reloadF: KILL" + | reload (_, I.COPY _) = error "reloadF: COPY" + | reload(instrAn, instr as I.INSTR _) = + (inc floatReloadCnt; + reloadFInstr(instr, reg, getFregLoc(S, an, spillLoc))) + in reload([], instr) + end + + fun reloadFreg S {dst, reg, spillLoc, annotations=an} = + (inc floatReloadCnt; + if CB.sameColor(dst,C.ST0) then + [I.fldl(getFregLoc(S, an, spillLoc))] + else + [I.fldl(getFregLoc(S, an, spillLoc)), I.fstpl(I.FDirect dst)] + ) + + (* ------------------------------------------------------------------- + * Callbacks for floating point K=7 + * -------------------------------------------------------------------*) + fun FMemReg f = let val fx = CB.registerNum f + in if fx >= 8 andalso fx < 32 + then I.FDirect f else I.FPR f + end + + fun copyInstrF'((rds as [d], rss as [s]), _) = + I.fmove{fsize=I.FP64,src=FMemReg s,dst=FMemReg d} + | copyInstrF'((rds, rss), I.COPY{k=CB.FP, tmp, ...}) = + fcopy{dst=rds, src=rss, tmp=tmp} + | copyInstrF'(x, I.ANNOTATION{i, a}) = + I.ANNOTATION{i=copyInstrF'(x,i), a=a} + | copyInstrF' _ = error "copyInstrF'" + + val copyInstrF' = fn x => [copyInstrF' x] + + fun spillFreg' S {src, reg, spillLoc, annotations=an} = + (inc floatSpillCnt; + [I.fmove{fsize=I.FP64, src=FMemReg src, + dst=getFregLoc(S, an,spillLoc)}] + ) + + fun renameF'{instr, fromSrc, toSrc} = + (inc floatRenameCnt; + reloadFInstr(instr, fromSrc, I.FPR toSrc) + ) + + fun reloadFreg' S {dst, reg, spillLoc, annotations=an} = + (inc floatReloadCnt; + [I.fmove{fsize=I.FP64, dst=FMemReg dst, + src=getFregLoc(S,an,spillLoc)}] + ) + + (* ------------------------------------------------------------------- + * Integer 8 stuff + * -------------------------------------------------------------------*) + fun copy{dst, src, tmp} = I.COPY{k=CB.GP, sz=32, dst=dst, src=src, tmp=tmp} + fun memToMemMove{dst, src} = + let val tmp = I.C.newReg() + in [I.move{mvOp=I.MOVL,src=src,dst=I.Direct tmp}, + I.move{mvOp=I.MOVL,src=I.Direct tmp,dst=dst} + ] + end + + fun copyInstrR((rds as [d], rss as [s]), _) = + if CB.sameColor(d,s) then [] else + let val dx = CB.registerNum d and sx = CB.registerNum s + in case (dx >= 8 andalso dx < 32, sx >= 8 andalso sx < 32) of + (false, false) => [copy{dst=rds, src=rss, tmp=NONE}] + | (true, false) => [I.move{mvOp=I.MOVL,src=I.Direct s, + dst=I.MemReg d}] + | (false, true) => [I.move{mvOp=I.MOVL,src=I.MemReg s, + dst=I.Direct d}] + | (true, true) => memToMemMove{src=I.MemReg s, dst=I.MemReg d} + end + | copyInstrR((rds, rss), I.COPY{k=CB.GP, tmp, ...}) = + [copy{dst=rds, src=rss, tmp=tmp}] + | copyInstrR(x, I.ANNOTATION{i, a}) = + copyInstrR(x, i) (* XXX *) + | copyInstrR _ = error "copyInstrR" + + + fun getRegLoc(S, an, cell, Ra.FRAME loc) = + Int.spillLoc{info=S, an=an, cell=cell, id=loc} + | getRegLoc(S, an, cell, Ra.MEM_REG r) = {opnd=I.MemReg r,kind=SPILL_LOC} + + (* No, logical spill locations... *) + + structure GR8 = GetReg(val nRegs=8 + val available=map CB.registerId Int.avail + val first=0) + + val K8 = length Int.avail + + (* register allocation for general purpose registers *) + fun spillR8 S {annotations=an, kill, reg, spillLoc, instr} = let + fun annotate([], i) = i + | annotate(a::an, i) = annotate(an, I.ANNOTATION{a=a, i=i}) + + (* preserve annotation on instruction *) + fun spill(instrAn, I.ANNOTATION{a,i}) = spill(a::instrAn, i) + | spill(instrAn, I.KILL{regs, spilled}) = + {code= + [annotate + (instrAn, + I.KILL {regs=C.rmvReg(reg, regs), + spilled=C.addReg(reg, spilled)})], + proh = [], + newReg=NONE} + | spill(instrAn, I.LIVE _) = error "spill: LIVE" + | spill(_, I.COPY _) = error "spill: COPY" + | spill(instrAn, I.INSTR _) = + (case getRegLoc(S, an, reg, spillLoc) + of {opnd=spillLoc, kind=SPILL_LOC} => + ( inc intSpillCnt; + spillInstr(annotate(instrAn, instr), reg, spillLoc) + ) + | _ => (* don't have to spill a constant *) + {code=[], newReg=NONE, proh=[]} + (*esac*)) + in spill([], instr) + end + + fun isMemReg r = let val x = CB.registerNum r + in x >= 8 andalso x < 32 end + + fun spillReg S {src, reg, spillLoc, annotations=an} = + let val _ = inc intSpillCnt + val {opnd=dstLoc,kind} = getRegLoc(S,an,reg,spillLoc) + val isMemReg = isMemReg src + val srcLoc = if isMemReg then I.MemReg src else I.Direct src + in if kind=CONST_VAL orelse InsnProps.eqOpn(srcLoc, dstLoc) then [] + else if isMemReg then memToMemMove{dst=dstLoc, src=srcLoc} + else [I.move{mvOp=I.MOVL, src=srcLoc, dst=dstLoc}] + end + + fun spillCopyTmp S {copy=I.COPY{k=CB.GP, src, dst,...}, + reg, spillLoc, annotations=an} = + (case getRegLoc(S, an, reg, spillLoc) of + {opnd=tmp, kind=SPILL_LOC} => + (inc intSpillCnt; + copy{dst=dst, src=src, tmp=SOME tmp} + ) + | _ => error "spillCopyTmp" + ) + | spillCopyTmp S {copy=I.ANNOTATION{i, a}, reg, spillLoc, annotations} = + I.ANNOTATION{i=spillCopyTmp S {copy=i, reg=reg, spillLoc=spillLoc, + annotations=annotations}, a=a} + | spillCopyTmp _ _ = error "spillCopyTmp(2)" + + fun renameR8{instr, fromSrc, toSrc} = + (inc intRenameCnt; + reloadInstr(instr, fromSrc, I.Direct toSrc) + ) + + + fun reloadR8 S {annotations=an, reg, spillLoc, instr} = let + fun reload(instrAn, I.ANNOTATION{a,i}) = reload(a::instrAn, i) + | reload(instrAn, I.LIVE{regs, spilled}) = + {code=[I.LIVE{regs=C.rmvReg(reg, regs), spilled=C.addReg(reg, spilled)}], + proh=[], + newReg=NONE} + | reload(_, I.KILL _) = error "reload: KILL" + | reload (_, I.COPY _) = error "reload: COPY" + | reload(instrAn, instr as I.INSTR _) = + ( inc intReloadCnt; + reloadInstr(annotate(instrAn, instr), reg, #opnd(getRegLoc(S,an,reg,spillLoc))) + ) + in reload([], instr) + end + + fun reloadReg S {dst, reg, spillLoc, annotations=an} = + let val _ = inc intReloadCnt + val srcLoc = #opnd(getRegLoc(S, an, reg, spillLoc)) + val isMemReg = isMemReg dst + val dstLoc = if isMemReg then I.MemReg dst else I.Direct dst + in if InsnProps.eqOpn(srcLoc,dstLoc) then [] + else if isMemReg then memToMemMove{dst=dstLoc, src=srcLoc} + else [I.move{mvOp=I.MOVL, src=srcLoc, dst=dstLoc}] + end + + fun resetRA() = + (firstSpill := true; + firstFPSpill := true; + IntHashTable.clear affectedBlocks; + IntHashTable.clear deadRegs; + if !fast_floating_point then FR8.reset() else FR32.reset(); + GR8.reset() + ) + + (* Dedicated + available registers *) + local + fun mark(arr, _, [], others) = others + | mark(arr, len, r::rs, others) = let + val r = CB.registerId r + in + if r >= len then mark(arr, len, rs, r::others) + else (Array.update(arr, r, true); mark(arr, len, rs, others)) + end + val dedicatedR = Array.array(32,false) + val dedicatedF32 = Array.array(64,false) + val otherR = mark(dedicatedR, 32, Int.dedicated, []) + val otherF32 = mark(dedicatedF32, 64, Float.dedicated, []) + fun isDedicated (len, arr, other) r = + (r < len andalso Array.sub(arr, r)) orelse List.exists (fn d => r = d) other + in + val isDedicatedR : int -> bool = isDedicated (32, dedicatedR, otherR) + val isDedicatedF32 : int -> bool = isDedicated (64, dedicatedF32, otherF32) + val isDedicatedF8 : int -> bool = fn _ => false + end + + fun phases ps = + let fun f([], m) = m + | f(SPILL_PROPAGATION::ps, m) = f(ps, Ra.SPILL_PROPAGATION+m) + | f(SPILL_COLORING::ps, m) = f(ps, Ra.SPILL_COLORING+m) + in f(ps, Ra.NO_OPTIMIZATION) + end + + (* RA parameters *) + + (* How to allocate integer registers: + * Perform register alocation + memory allocation + *) + fun RAInt S = + {spill = spillR8 S, + spillSrc = spillReg S, + spillCopyTmp= spillCopyTmp S, + reload = reloadR8 S, + reloadDst = reloadReg S, + renameSrc = renameR8, + copyInstr = copyInstrR, + K = K8, + getreg = GR8.getreg, + cellkind = CB.GP, + dedicated = isDedicatedR, + spillProh = [], + memRegs = Int.memRegs, + mode = phases(Int.phases) + } : Ra.raClient + + (* How to allocate floating point registers: + * Allocate all fp registers on the stack. This is the easy way. + *) + fun RAFP32 S = + {spill = spillF S, + spillSrc = spillFreg S, + spillCopyTmp= spillFcopyTmp S, + reload = reloadF S, + reloadDst = reloadFreg S, + renameSrc = renameF, + copyInstr = copyInstrF, + K = KF32, + getreg = FR32.getreg, + cellkind = CB.FP, + dedicated = isDedicatedF32, + spillProh = [], + memRegs = Float.memRegs, + mode = phases(Float.phases) + } : Ra.raClient + + (* How to allocate floating point registers: + * Allocate fp registers on the %st stack. Also perform + * memory allcoation. + *) + fun RAFP8 S = + {spill = spillF S, + spillSrc = spillFreg' S, + spillCopyTmp= spillFcopyTmp S, + reload = reloadF S, + reloadDst = reloadFreg' S, + renameSrc = renameF', + copyInstr = copyInstrF', + K = KF8, + getreg = FR8.getreg, + cellkind = CB.FP, + dedicated = isDedicatedF8, + spillProh = [], + memRegs = Float.fastMemRegs, + mode = phases(Float.fastPhases) + } : Ra.raClient + + (* Two RA modes, fast and normal *) + fun fast_fp S = [RAInt S, RAFP8 S] + fun normal_fp S = [RAInt S, RAFP32 S] + + (* The main ra routine *) + fun run cluster = + let val printGraph = + if !x86CfgDebugFlg then + PrintFlowgraph.printCFG(!MLRiscControl.debug_stream) + else fn msg => fn _ => () + + val S = beforeRA cluster + val _ = resetRA() + + (* generic register allocator *) + + val cluster = Ra.ra + (if !fast_floating_point then fast_fp S else normal_fp S) + cluster + + val _ = removeDeadCode cluster + + val _ = printGraph "\t---After register allocation K=8---\n" cluster + + (* Run the FP translation phase when fast floating point has + * been enabled + *) + val cluster = + if !fast_floating_point andalso I.C.numCell CB.FP () > 0 then + let val cluster = X86FP.run cluster + in printGraph "\t---After X86 FP translation ---\n" cluster; + cluster + end + else cluster + in cluster + end + +end diff --git a/MLRISC/x86/ra/x86RegAlloc.sml b/MLRISC/x86/ra/x86RegAlloc.sml new file mode 100644 index 0000000..4904d46 --- /dev/null +++ b/MLRISC/x86/ra/x86RegAlloc.sml @@ -0,0 +1,71 @@ +functor X86RegAlloc + (structure I : INSTRUCTIONS where C = X86Cells + structure P : INSN_PROPERTIES where I = I + structure F : FLOWGRAPH where I = I + structure Asm : INSTRUCTION_EMITTER where I = I and P = F.P + ) : + sig + + functor IntRa (structure RaUser : RA_USER_PARAMS + where I = I + and B = F.B) : RA + + functor FloatRa (structure RaUser : RA_USER_PARAMS + where I = I + and B = F.B) : RA + end = +struct + structure C = I.C + + (* liveness analysis for general purpose registers *) + structure RegLiveness = + Liveness(structure Flowgraph=F + structure Instruction=I + val defUse = P.defUse C.GP + val regSet = C.getCell C.GP + val cellset = C.updateCell C.GP) + + + (* integer register allocator *) + functor IntRa = + RegAllocator + (structure RaArch = struct + + structure InsnProps = P + structure AsmEmitter = Asm + structure I = I + structure Liveness=RegLiveness + val defUse = P.defUse C.GP + val firstPseudoR = 32 + val maxPseudoR = X86Cells.maxCell + val numRegs = X86Cells.numCell C.GP + val regSet = C.getCell C.GP + end) + + + + (* liveness analysis for floating point registers *) + structure FregLiveness = + Liveness(structure Flowgraph=F + structure Instruction=I + val defUse = P.defUse C.FP + val regSet = C.getCell C.FP + val cellset = C.updateCell C.FP) + + (* floating register allocator *) + functor FloatRa = + RegAllocator + (structure RaArch = struct + + structure InsnProps = P + structure AsmEmitter = Asm + structure Liveness=FregLiveness + structure I = I + + val defUse = P.defUse C.FP + val firstPseudoR = 64 + val maxPseudoR = X86Cells.maxCell + val numRegs = X86Cells.numCell C.FP + val regSet = C.getCell C.FP + end) +end diff --git a/MLRISC/x86/ra/x86Rewrite.sig b/MLRISC/x86/ra/x86Rewrite.sig new file mode 100644 index 0000000..6574b41 --- /dev/null +++ b/MLRISC/x86/ra/x86Rewrite.sig @@ -0,0 +1,15 @@ +(* x86Rewrite.sig + * + * COPYRIGHT (c) 2018 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +signature X86REWRITE = + sig + structure I : X86INSTR + structure CB : CELLS_BASIS = CellsBasis + val rewriteUse : I.instruction * CB.cell * CB.cell -> I.instruction + val rewriteDef : I.instruction * CB.cell * CB.cell -> I.instruction + val frewriteUse : I.instruction * CB.cell * CB.cell -> I.instruction + val frewriteDef : I.instruction * CB.cell * CB.cell -> I.instruction + end diff --git a/MLRISC/x86/ra/x86Rewrite.sml b/MLRISC/x86/ra/x86Rewrite.sml new file mode 100644 index 0000000..4d6846b --- /dev/null +++ b/MLRISC/x86/ra/x86Rewrite.sml @@ -0,0 +1,266 @@ +(* x86Rewrite.sml -- rewrite an alpha instruction + * + * COPYRIGHT (c) 1997 Bell Labs + *) +functor X86Rewrite(Instr : X86INSTR) : X86REWRITE = struct + structure I=Instr + structure C=I.C + structure CB = CellsBasis + fun error msg = MLRiscErrorMsg.error("X86Rewrite", msg) + + fun operand (rs,rt) opnd = + (case opnd + of I.Direct r => if CB.sameColor(r,rs) then I.Direct rt else opnd + | I.Displace{base, disp, mem} => + if CB.sameColor(base,rs) then I.Displace{base=rt, disp=disp, mem=mem} + else opnd + | I.Indexed{base as SOME b, index, scale, disp, mem} => let + val base'= if CB.sameColor(b,rs) then SOME rt else base + val index'=if CB.sameColor(index,rs) then rt else index + in I.Indexed{base=base', index=index', scale=scale, disp=disp, mem=mem} + end + | I.Indexed{base, index, scale, disp, mem=mem} => + if CB.sameColor(index,rs) then + I.Indexed{base=base, index=rt, scale=scale, disp=disp, mem=mem} + else opnd + | _ => opnd + (*esac*)) + + fun rewriteUse(instr, rs, rt) = let + val operand = operand (rs, rt) + fun replace r = if CB.sameColor(r,rs) then rt else r + fun rewriteX86Use(instr) = + (case instr + of I.JMP(opnd, labs) => I.JMP(operand opnd, labs) + | I.JCC{cond, opnd} => I.JCC{cond=cond, opnd = operand opnd} + | I.CALL{opnd, defs, uses, return, cutsTo, mem, pops} => + I.CALL{opnd=operand opnd, defs=defs, return=return, + uses=CB.CellSet.map {from=rs,to=rt} uses, cutsTo=cutsTo, + mem=mem, pops=pops} + | I.MOVE{mvOp, src, dst as I.Direct _} => + I.MOVE{mvOp=mvOp, src=operand src, dst=dst} + | I.MOVE{mvOp, src, dst} => + I.MOVE{mvOp=mvOp, src=operand src, dst=operand dst} + | I.LEA{r32, addr} => I.LEA{r32=r32, addr=operand addr} + | I.CMPL{lsrc, rsrc} => I.CMPL{lsrc=operand lsrc, rsrc=operand rsrc} + | I.CMPW{lsrc, rsrc} => I.CMPW{lsrc=operand lsrc, rsrc=operand rsrc} + | I.CMPB{lsrc, rsrc} => I.CMPB{lsrc=operand lsrc, rsrc=operand rsrc} + | I.TESTL{lsrc, rsrc} => I.TESTL{lsrc=operand lsrc, rsrc=operand rsrc} + | I.TESTW{lsrc, rsrc} => I.TESTW{lsrc=operand lsrc, rsrc=operand rsrc} + | I.TESTB{lsrc, rsrc} => I.TESTB{lsrc=operand lsrc, rsrc=operand rsrc} + | I.BITOP{bitOp, lsrc, rsrc} => + I.BITOP{bitOp=bitOp, lsrc=operand lsrc, rsrc=operand rsrc} + | I.BINARY{binOp, src, dst} => + I.BINARY{binOp=binOp, src=operand src, dst=operand dst} + | I.SHIFT{shiftOp, src, dst, count} => + I.SHIFT{shiftOp=shiftOp, src=operand src, dst=operand dst, + count=operand src} + | I.CMPXCHG{lock, sz, src, dst} => + I.CMPXCHG{lock=lock, sz=sz, src=operand src, dst=operand dst} + | I.MULTDIV{multDivOp, src} => + I.MULTDIV{multDivOp=multDivOp, src=operand src} + | I.MUL3{dst, src1, src2} => + I.MUL3{dst=dst, src1=operand src1, src2=src2} + | I.UNARY{unOp, opnd} => I.UNARY{unOp=unOp, opnd=operand opnd} + | I.SET{cond, opnd} => I.SET{cond=cond, opnd=operand opnd} + | I.PUSHL opnd => I.PUSHL(operand opnd) + | I.PUSHW opnd => I.PUSHW(operand opnd) + | I.PUSHB opnd => I.PUSHB(operand opnd) + | I.POP opnd => I.POP(operand opnd) + | I.FSTPT opnd => I.FSTPT(operand opnd) + | I.FSTPL opnd => I.FSTPL(operand opnd) + | I.FSTPS opnd => I.FSTPS(operand opnd) + | I.FSTL opnd => I.FSTL(operand opnd) + | I.FSTS opnd => I.FSTS(operand opnd) + | I.FLDT opnd => I.FLDT(operand opnd) + | I.FLDL opnd => I.FLDL(operand opnd) + | I.FLDS opnd => I.FLDS(operand opnd) + | I.FUCOM opnd => I.FUCOM(operand opnd) + | I.FUCOMP opnd => I.FUCOMP(operand opnd) + | I.FCOMI opnd => I.FCOMI(operand opnd) + | I.FCOMIP opnd => I.FCOMIP(operand opnd) + | I.FUCOMI opnd => I.FUCOMI(operand opnd) + | I.FUCOMIP opnd => I.FUCOMIP(operand opnd) + | I.FENV{fenvOp,opnd} => I.FENV{fenvOp=fenvOp, opnd=operand opnd} + | I.FBINARY{binOp, src, dst} => + I.FBINARY{binOp=binOp, src=operand src, dst=dst} + | I.FIBINARY{binOp, src} => + I.FIBINARY{binOp=binOp, src=operand src} + + (* Pseudo floating point instructions *) + | I.FMOVE{fsize,src,dst} => + I.FMOVE{fsize=fsize,src=operand src,dst=operand dst} + | I.FILOAD{isize,ea,dst} => + I.FILOAD{isize=isize,ea=operand ea,dst=operand dst} + | I.FBINOP{fsize,binOp,lsrc,rsrc,dst} => + I.FBINOP{fsize=fsize,binOp=binOp, + lsrc=operand lsrc,rsrc=operand rsrc,dst=operand dst} + | I.FIBINOP{isize,binOp,lsrc,rsrc,dst} => + I.FIBINOP{isize=isize,binOp=binOp, + lsrc=operand lsrc,rsrc=operand rsrc,dst=operand dst} + | I.FUNOP{fsize,unOp,src,dst} => + I.FUNOP{fsize=fsize,unOp=unOp,src=operand src,dst=operand dst} + | I.FCMP{i,fsize,lsrc,rsrc} => + I.FCMP{i=i,fsize=fsize,lsrc=operand lsrc,rsrc=operand rsrc} + + | I.CMOV{cond, src, dst} => I.CMOV{cond=cond, src=operand src, dst=dst} + | _ => instr + (*esac*)) + + fun f(I.ANNOTATION{a,i}) = + I.ANNOTATION{i=rewriteUse(i, rs, rt), + a = case a of + CB.DEF_USE{cellkind=CB.GP,defs,uses} => + CB.DEF_USE{cellkind=CB.GP,uses=map replace uses, + defs=defs} + | _ => a} + | f(I.INSTR i) = I.INSTR(rewriteX86Use(i)) + | f(I.COPY{k as CB.GP, sz, dst, src, tmp}) = + I.COPY{k=k, sz=sz, dst=dst, src=map replace src, tmp=tmp} + | f _ = error "rewriteUse:f" + in f (instr:I.instruction) + end + + fun rewriteDef(instr, rs, rt) = let + fun operand(opnd as I.Direct r) = + if CB.sameColor(r,rs) then I.Direct rt else opnd + | operand _ = error "operand: not I.Direct" + fun replace r = if CB.sameColor(r,rs) then rt else r + fun rewriteX86Def(instr) = + (case instr + of I.CALL{opnd, defs, uses, return, cutsTo, mem, pops} => + I.CALL{opnd=opnd, cutsTo=cutsTo, + return=CB.CellSet.map {from=rs,to=rt} return, pops=pops, + defs=CB.CellSet.map {from=rs,to=rt} defs, uses=uses, mem=mem} + | I.MOVE{mvOp, src, dst} => I.MOVE{mvOp=mvOp, src=src, dst=operand dst} + | I.LEA{r32, addr} => I.LEA{r32=replace r32, addr=addr} + | I.BINARY{binOp, src, dst} => + I.BINARY{binOp=binOp, src=src, dst=operand dst} + | I.SHIFT{shiftOp, src, dst, count} => + I.SHIFT{shiftOp=shiftOp, src=src, count=count, dst=operand dst} + | I.CMPXCHG{lock, sz, src, dst} => + I.CMPXCHG{lock=lock, sz=sz, src=src, dst=operand dst} + | I.MUL3{dst, src1, src2} => I.MUL3{dst=replace dst, src1=src1, src2=src2} + | I.UNARY{unOp, opnd} => I.UNARY{unOp=unOp, opnd=operand opnd} + | I.SET{cond, opnd} => I.SET{cond=cond, opnd=operand opnd} + | I.CMOV{cond, src, dst} => I.CMOV{cond=cond, src=src, dst=replace dst} + | _ => instr + (*esac*)) + + fun f (I.ANNOTATION{a,i}) = + I.ANNOTATION{i=rewriteDef(i,rs,rt), + a=(case a of + CB.DEF_USE{cellkind=CB.GP,defs,uses} => + CB.DEF_USE{cellkind=CB.GP,uses=uses, + defs=map replace defs} + | _ => a)} + | f (I.INSTR i) = I.INSTR(rewriteX86Def(i)) + | f (I.COPY{k as CB.GP, sz, dst, src, tmp}) = + I.COPY{k=k, sz=sz, dst=map replace dst, src=src, tmp=tmp} + | f _ = error "rewriteDef:f" + in f(instr) + end + + fun frewriteUse(instr, fs, ft) = let + fun foperand(opnd as I.FDirect f) = + if CB.sameColor(f,fs) then I.FDirect ft else opnd + | foperand(opnd as I.FPR f) = + if CB.sameColor(f,fs) then I.FPR ft else opnd + | foperand opnd = opnd + + fun replace f = if CB.sameColor(f,fs) then ft else f + fun frewriteX86Use(instr) = + (case instr + of I.FLDL opnd => I.FLDL(foperand opnd) + | I.FLDS opnd => I.FLDS(foperand opnd) + | I.CALL{opnd, defs, uses, return, cutsTo, mem, pops} => + I.CALL{opnd=opnd, defs=defs, return=return, cutsTo=cutsTo, + uses=CB.CellSet.map {from=fs, to=ft} uses, mem=mem, pops=pops } + | I.FBINARY{binOp, src, dst} => + I.FBINARY{binOp=binOp, src=foperand src, dst=foperand dst} + | I.FUCOM opnd => I.FUCOM(foperand opnd) + | I.FUCOMP opnd => I.FUCOMP(foperand opnd) + | I.FCOMI opnd => I.FCOMI(foperand opnd) + | I.FCOMIP opnd => I.FCOMIP(foperand opnd) + | I.FUCOMI opnd => I.FUCOMI(foperand opnd) + | I.FUCOMIP opnd => I.FUCOMIP(foperand opnd) + + (* Pseudo floating point instructions *) + | I.FMOVE{fsize,dst,src} => + I.FMOVE{fsize=fsize,dst=dst,src=foperand src} + | I.FBINOP{fsize,binOp,lsrc,rsrc,dst} => + I.FBINOP{fsize=fsize,binOp=binOp, + lsrc=foperand lsrc,rsrc=foperand rsrc,dst=dst} + | I.FIBINOP{isize,binOp,lsrc,rsrc,dst} => + I.FIBINOP{isize=isize,binOp=binOp, + lsrc=foperand lsrc,rsrc=foperand rsrc,dst=dst} + | I.FUNOP{fsize,unOp,src,dst} => + I.FUNOP{fsize=fsize,unOp=unOp,src=foperand src,dst=dst} + | I.FCMP{i,fsize,lsrc,rsrc} => + I.FCMP{i=i,fsize=fsize,lsrc=foperand lsrc,rsrc=foperand rsrc} + | _ => instr + (*esac*)) + + fun f(I.ANNOTATION{a, i}) = + I.ANNOTATION{i=frewriteUse(i,fs,ft), + a=case a of + CB.DEF_USE{cellkind=CB.FP,defs,uses} => + CB.DEF_USE{cellkind=CB.FP,uses=map replace uses, + defs=defs} + | _ => a} + | f(I.INSTR i) = I.INSTR(frewriteX86Use(i)) + | f(I.COPY{k as CB.FP, sz, dst, src, tmp}) = + I.COPY{k=k, sz=sz, dst=dst, src=map replace src, tmp=tmp} + | f _ = error "frewrite" + in f(instr) + end + + fun frewriteDef(instr, fs, ft) = let + fun foperand(opnd as I.FDirect r) = + if CB.sameColor(r,fs) then I.FDirect ft else opnd + | foperand(opnd as I.FPR r) = + if CB.sameColor(r,fs) then I.FPR ft else opnd + | foperand opnd = opnd + fun replace f = if CB.sameColor(f,fs) then ft else f + fun frewriteX86Def(instr) = + (case instr + of I.FSTPT opnd => I.FSTPT(foperand opnd) + | I.FSTPL opnd => I.FSTPL(foperand opnd) + | I.FSTPS opnd => I.FSTPS(foperand opnd) + | I.FSTL opnd => I.FSTL(foperand opnd) + | I.FSTS opnd => I.FSTS(foperand opnd) + | I.CALL{opnd, defs, uses, return, cutsTo, mem, pops} => + I.CALL{opnd=opnd, defs=CB.CellSet.map {from=fs, to=ft} defs, + return=CB.CellSet.map {from=fs, to=ft} return, + uses=uses, cutsTo=cutsTo, mem=mem, pops=pops} + | I.FBINARY{binOp, src, dst} => I.FBINARY{binOp=binOp, src=src, dst=foperand dst} + + (* Pseudo floating point instructions *) + | I.FMOVE{fsize,src,dst} => + I.FMOVE{fsize=fsize,src=src,dst=foperand dst} + | I.FILOAD{isize,ea,dst} => + I.FILOAD{isize=isize,ea=ea,dst=foperand dst} + | I.FBINOP{fsize,binOp,lsrc,rsrc,dst} => + I.FBINOP{fsize=fsize,binOp=binOp,lsrc=lsrc,rsrc=rsrc,dst=foperand dst} + | I.FIBINOP{isize,binOp,lsrc,rsrc,dst} => + I.FIBINOP{isize=isize,binOp=binOp,lsrc=lsrc,rsrc=rsrc,dst=foperand dst} + | I.FUNOP{fsize,unOp,src,dst} => + I.FUNOP{fsize=fsize,unOp=unOp,src=src,dst=foperand dst} + | _ => instr + (*esac*)) + + fun f(I.ANNOTATION{i,a}) = + I.ANNOTATION{i=frewriteDef(i,fs,ft), + a=case a of + CB.DEF_USE{cellkind=CB.FP,defs,uses} => + CB.DEF_USE{cellkind=CB.FP,uses=uses, + defs=map replace defs} + | _ => a} + | f(I.INSTR(i)) = I.INSTR(frewriteX86Def(i)) + | f(I.COPY{k as CB.FP, dst, src, tmp, sz}) = + I.COPY{k=k, sz=sz, dst=map replace dst, src=src, tmp=tmp} + | f _ = error "frewriteDef" + in f(instr) + end +end + diff --git a/MLRISC/x86/ra/x86SpillInstr.sml b/MLRISC/x86/ra/x86SpillInstr.sml new file mode 100644 index 0000000..3783a6a --- /dev/null +++ b/MLRISC/x86/ra/x86SpillInstr.sml @@ -0,0 +1,670 @@ +(* X86Spill.sml + * + * X86 spilling is complicated business. + * Allen: and it just got more complicated; now we have to recognize the regmap. + * I've also improved the spilling code so that more instructions are + * recognized. Addressing modes are now folded into the existing instruction + * whenever possible. This eliminates some redundant temporaries which were + * introduced before. + *) +functor X86SpillInstr(structure Instr: X86INSTR + structure Props: INSN_PROPERTIES where I = Instr + ) : ARCH_SPILL_INSTR = struct + + structure I = Instr + structure C = I.C + structure CB = CellsBasis + + fun error msg = MLRiscErrorMsg.impossible("X86Spill: "^ msg) + + fun immed(I.Immed _) = true + | immed(I.ImmedLabel _) = true + | immed _ = false + + fun immedOrReg(I.Direct r) = true + | immedOrReg(I.Immed _) = true + | immedOrReg(I.ImmedLabel _) = true + | immedOrReg _ = false + + fun isMemory(I.MemReg _) = true + | isMemory(I.Displace _) = true + | isMemory(I.Indexed _) = true + | isMemory(I.LabelEA _) = true + | isMemory _ = false + + (* Annotate instruction *) + fun annotate(instr,[]) = instr + | annotate(instr,a::an) = annotate(I.ANNOTATION{i=instr,a=a},an) + + fun mark(instr, an) = annotate(I.INSTR instr, an) + + fun liveKill(add, rmv) ({regs, spilled}, reg) = + {regs=rmv(reg, regs), spilled=add(reg, spilled)} + + val fLiveKill = liveKill (C.addFreg, C.rmvFreg) + val rLiveKill = liveKill (C.addReg, C.rmvReg) + + val newReg = C.newReg + + + fun spillR(instr, reg, spillLoc) = let + fun x86Spill(instr, an) = let + fun done(instr, an) = {code=[mark(instr, an)], proh=[], newReg=NONE} + in + case instr of + I.CALL{opnd=addr, defs, uses, return, cutsTo, mem, pops} => + done(I.CALL{opnd=addr, defs=C.rmvReg(reg,defs), + return=return, uses=uses, + cutsTo=cutsTo, mem=mem, pops=pops}, an) + | I.MOVE{mvOp as (I.MOVZBL|I.MOVSBL|I.MOVZWL|I.MOVSWL), src, dst} => + let val tmpR = newReg() val tmp = I.Direct tmpR + in {proh=[tmpR], newReg=SOME tmpR, + code=[mark(I.MOVE{mvOp=mvOp, src=src, dst=tmp}, an), + I.move{mvOp=I.MOVL, src=tmp, dst=spillLoc}] + } + end + | I.MOVE{mvOp, src as I.Direct rs, dst} => + if CB.sameColor(rs,reg) then {code=[], proh=[], newReg=NONE} + else done(I.MOVE{mvOp=mvOp, src=src, dst=spillLoc}, an) + | I.MOVE{mvOp, src, dst=I.Direct _} => + if Props.eqOpn(src, spillLoc) then {code=[], proh=[], newReg=NONE} + else if immed src then + done(I.MOVE{mvOp=mvOp, src=src, dst=spillLoc}, an) + else + let val tmpR = newReg() + val tmp = I.Direct tmpR + in {proh=[tmpR], + newReg=SOME tmpR, + code=[mark(I.MOVE{mvOp=mvOp, src=src, dst=tmp}, an), + I.move{mvOp=mvOp, src=tmp, dst=spillLoc}] + } + end + | I.LEA{addr, r32} => + let val tmpR = newReg() + in {proh=[tmpR], + newReg=SOME tmpR, + code=[mark(I.LEA{addr=addr, r32=tmpR}, an), + I.move{mvOp=I.MOVL, src=I.Direct tmpR, dst=spillLoc}] + } + end + | I.BINARY{binOp=I.XORL, src as I.Direct rs, dst=I.Direct rd} => + if CB.sameColor(rs,rd) then + {proh=[], + code=[mark(I.MOVE{mvOp=I.MOVL, src=I.Immed 0, dst=spillLoc}, an)], + newReg=NONE + } + else + {proh=[], + code=[mark(I.BINARY{binOp=I.XORL, src=src, dst=spillLoc}, an)], + newReg=NONE + } + | I.BINARY{binOp, src, dst} => let (* note: dst = reg *) + fun multBinOp(I.IMULL|I.IMULW|I.IMULB) = true + | multBinOp _ = false + in + if multBinOp binOp then let + (* destination must remain a register *) + val tmpR = newReg() + val tmp = I.Direct tmpR + in + {proh=[tmpR], + code= [I.move{mvOp=I.MOVL, src=spillLoc, dst=tmp}, + I.binary{binOp=binOp, src=src, dst=tmp}, + I.move{mvOp=I.MOVL, src=tmp, dst=spillLoc}], + newReg=SOME tmpR + } + end + else if immedOrReg src then + (* can replace the destination directly *) + done(I.BINARY{binOp=binOp, src=src, dst=spillLoc}, an) + else let + (* a memory src and non multBinOp + * --- cannot have two memory operands + *) + val tmpR = newReg() + val tmp = I.Direct tmpR + in + { proh=[tmpR], + code=[I.move{mvOp=I.MOVL, src=src, dst=tmp}, + I.binary{binOp=binOp, src=tmp, dst=spillLoc}], + newReg=NONE + } + end + end + | I.SHIFT{shiftOp, count, src, dst} => error "go and implement SHIFT" + | I.CMOV{cond, src, dst} => + (* note: dst must be a register *) + (case spillLoc of + I.Direct r => + {proh=[], + newReg=NONE, + code=[mark(I.CMOV{cond=cond,src=src,dst=r},an)] + } + | _ => + let val tmpR = newReg() + val tmp = I.Direct tmpR + in {proh=[tmpR], + newReg=SOME tmpR, + code=[I.move{mvOp=I.MOVL, src=spillLoc, dst=tmp}, + mark(I.CMOV{cond=cond,src=src,dst=tmpR},an), + I.move{mvOp=I.MOVL, src=tmp, dst=spillLoc}] + } + end + ) + + | I.CMPXCHG{lock,sz,src,dst} => + if immedOrReg src then + {proh=[], + code=[mark(I.CMPXCHG{lock=lock,sz=sz,src=src,dst=spillLoc},an)], + newReg=NONE + } + else + let val tmpR = newReg() + val tmp = I.Direct tmpR + in {proh=[], + code=[I.move{mvOp=I.MOVL, src=src, dst=tmp}, + mark(I.CMPXCHG{lock=lock,sz=sz,src=tmp,dst=spillLoc},an)], + newReg=NONE + } + end + | I.MULTDIV _ => error "spill: MULTDIV" + | I.MUL3{src1, src2, dst} => + let val tmpR = newReg() + in {proh=[tmpR], newReg=SOME tmpR, + code=[mark(I.MUL3{src1=src1, src2=src2, dst=tmpR}, an), + I.move{mvOp=I.MOVL, src=I.Direct tmpR, dst=spillLoc}] + } + end + | I.UNARY{unOp, opnd} => done(I.UNARY{unOp=unOp, opnd=spillLoc}, an) + | I.SET{cond, opnd} => done(I.SET{cond=cond, opnd=spillLoc}, an) + | I.POP _ => done(I.POP spillLoc, an) + | I.FNSTSW => error "spill: FNSTSW" + | _ => error "spill" + end (* x86Spill *) + + fun f(I.INSTR instr, an) = x86Spill(instr, an) + | f(I.ANNOTATION{a, i}, an) = f(i, a::an) + | f(I.KILL lk, an) = + {code=[annotate(I.KILL(rLiveKill (lk, reg)), an)], + proh=[], + newReg=NONE} + | f _ = error "spill:f" + in f(instr, []) + end + + fun reloadR(instr, reg, spillLoc) = let + fun x86Reload(instr, reg, spillLoc, an) = let + fun operand(rt, opnd) = + (case opnd + of I.Direct r => if CB.sameColor(r,reg) then I.Direct rt else opnd + | I.Displace{base, disp, mem} => + if CB.sameColor(base,reg) + then I.Displace{base=rt, disp=disp, mem=mem} + else opnd + | I.Indexed{base=NONE, index, scale, disp, mem=mem} => + if CB.sameColor(index,reg) then + I.Indexed{base=NONE, index=rt, scale=scale, disp=disp, mem=mem} + else opnd + | I.Indexed{base as SOME b, index, scale, disp, mem=mem} => + if CB.sameColor(b,reg) then + operand(rt, I.Indexed{base=SOME rt, index=index, + scale=scale, disp=disp, mem=mem}) + else if CB.sameColor(index,reg) then + I.Indexed{base=base, index=rt, scale=scale, disp=disp, mem=mem} + else opnd + | opnd => opnd + (*esac*)) + + fun done(instr, an) = {code=[mark(instr, an)], proh=[], newReg=NONE} + + fun isReloading (I.Direct r) = CB.sameColor(r,reg) + | isReloading _ = false + + (* This version assumes that the value of tmpR is killed *) + fun withTmp(f, an) = + case spillLoc of + I.Direct tmpR => + {newReg=NONE, + proh=[], + code=[mark(f tmpR, an)] + } + | _ => + let val tmpR = newReg() + in {newReg=NONE, + proh=[tmpR], + code=[I.move{mvOp=I.MOVL, src=spillLoc, dst=I.Direct tmpR}, + mark(f tmpR, an) + ] + } + end + + (* This version assumes that the value of tmpR is available afterwards *) + fun withTmpAvail(f, an) = + case spillLoc of + I.Direct tmpR => + {newReg=SOME tmpR, + proh=[tmpR], + code=[mark(f tmpR, an)] + } + | _ => + let val tmpR = newReg() + val tmp = I.Direct tmpR + in {newReg=SOME tmpR, + proh=[tmpR], + code=[I.move{mvOp=I.MOVL, src=spillLoc, dst=I.Direct tmpR}, + mark(f tmpR, an) + ] + } + end + + fun replace(opn as I.Direct r) = + if CB.sameColor(r,reg) then spillLoc else opn + | replace opn = opn + + (* Fold in a memory operand if possible. Makes sure that both operands + * are not in memory. lsrc cannot be immediate. + *) + fun reloadCmp(cmp, lsrc, rsrc, an) = + let fun reloadIt() = + withTmp(fn tmpR => + cmp{lsrc=operand(tmpR, lsrc), rsrc=operand(tmpR, rsrc)}, an) + in if immedOrReg lsrc andalso immedOrReg rsrc then + let val lsrc' = replace lsrc + val rsrc' = replace rsrc + in if isMemory lsrc' andalso isMemory rsrc' then + reloadIt() + else + done(cmp{lsrc=lsrc', rsrc=rsrc'}, an) + end + else reloadIt() + end + + fun reloadBT(bitOp, lsrc, rsrc, an) = + reloadCmp(fn {lsrc,rsrc} => I.BITOP{bitOp=bitOp,lsrc=lsrc,rsrc=rsrc}, + lsrc, rsrc, an) + + (* Fold in a memory operand if possible. Makes sure that the right + * operand is not in memory and left operand is not an immediate. + * lsrc rsrc + * AL, imm8 opc1 A8 + * EAX, imm32 opc1 A9 + * r/m8, imm8 opc2 F6/0 ib + * r/m32, imm32 opc2 F7/0 id + * r/m32, r32 opc3 85/r + *) + fun reloadTest(test, lsrc, rsrc, an) = + let fun reloadIt() = + withTmp(fn tmpR => + test{lsrc=operand(tmpR, lsrc), rsrc=operand(tmpR, rsrc)}, an) + in if immedOrReg lsrc andalso immedOrReg rsrc then + let val lsrc = replace lsrc + val rsrc = replace rsrc + in if isMemory rsrc then + if isMemory lsrc then reloadIt() + else (* it is commutative! *) + done(test{lsrc=rsrc, rsrc=lsrc}, an) + else + done(test{lsrc=lsrc, rsrc=rsrc}, an) + end + else reloadIt() + end + + fun reloadPush(push, arg as I.Direct _, an) = + done(push(replace arg), an) + | reloadPush(push, arg, an) = + withTmpAvail(fn tmpR => push(operand(tmpR, arg)), an) + + fun reloadReal(realOp, opnd, an) = + withTmpAvail(fn tmpR => realOp(operand(tmpR, opnd)), an) + in + case instr + of I.JMP(I.Direct _, labs) => done(I.JMP(spillLoc, labs), an) + | I.JMP(opnd, labs) => withTmp(fn t => I.JMP(operand(t, opnd), labs), an) + | I.JCC{opnd=I.Direct _, cond} => done(I.JCC{opnd=spillLoc, cond=cond}, an) + | I.JCC{opnd, cond} => + withTmp(fn t => I.JCC{opnd=operand(t,opnd), cond=cond}, an) + | I.CALL{opnd, defs, uses, return, cutsTo, mem, pops} => + withTmp(fn t => + I.CALL{opnd=operand(t, opnd), defs=defs, return=return,pops=pops, + uses=C.rmvReg(reg, uses), cutsTo=cutsTo, mem=mem}, an) + | I.MOVE{mvOp, src as I.Direct _, dst as I.Direct _} => + done(I.MOVE{mvOp=mvOp, src=replace src, dst=dst},an) + | I.MOVE{mvOp, src, dst as I.Direct _} => + withTmpAvail(fn t =>I.MOVE{mvOp=mvOp, src=operand(t, src), dst=dst},an) + | I.MOVE{mvOp, src as I.Direct _, dst} => + if Props.eqOpn(dst, spillLoc) then {code=[], proh=[], newReg=NONE} + else withTmpAvail (* dst is not the spill reg *) + (fn t => + I.MOVE{mvOp=mvOp, src=operand(t, src), dst=operand(t, dst)}, an) + | I.MOVE{mvOp, src, dst} => + withTmpAvail (* dst is not the spill reg *) + (fn t => + I.MOVE{mvOp=mvOp, src=operand(t, src), dst=operand(t, dst)}, an) + | I.LEA{r32, addr} => + withTmpAvail(fn tmpR => I.LEA{r32=r32, addr=operand(tmpR, addr)}, an) + | I.CMPL{lsrc, rsrc} => reloadCmp(I.CMPL, lsrc, rsrc, an) + | I.CMPW{lsrc, rsrc} => reloadCmp(I.CMPW, lsrc, rsrc, an) + | I.CMPB{lsrc, rsrc} => reloadCmp(I.CMPB, lsrc, rsrc, an) + | I.TESTL{lsrc, rsrc} => reloadTest(I.TESTL, lsrc, rsrc, an) + | I.TESTW{lsrc, rsrc} => reloadTest(I.TESTW, lsrc, rsrc, an) + | I.TESTB{lsrc, rsrc} => reloadTest(I.TESTB, lsrc, rsrc, an) + | I.BITOP{bitOp,lsrc, rsrc} => reloadBT(bitOp, lsrc, rsrc, an) + | I.BINARY{binOp, src, dst as I.Direct _} => + (case src of + I.Direct _ => + done(I.BINARY{binOp=binOp, src=replace src, dst=dst},an) + | _ => withTmp(fn tmpR => + I.BINARY{binOp=binOp, src=operand(tmpR, src), dst=dst}, an) + ) + | I.BINARY{binOp, src, dst} => + withTmp(fn tmpR => I.BINARY{binOp=binOp, src=operand(tmpR, src), + dst=operand(tmpR, dst)}, an) + | I.CMOV{cond, src, dst} => + if CB.sameColor(dst,reg) then + error "CMOV" + else + done(I.CMOV{cond=cond, src=spillLoc, dst=dst}, an) + | I.SHIFT{shiftOp, count, src, dst} => error "go and implement SHIFT" + | I.CMPXCHG{lock,sz,src,dst} => + withTmp(fn tmpR => I.CMPXCHG{lock=lock, sz=sz, + src=operand(tmpR, src), + dst=operand(tmpR, dst)},an) + | I.MULTDIV{multDivOp, src as I.Direct _} => + done(I.MULTDIV{multDivOp=multDivOp, src=replace src}, an) + | I.MULTDIV{multDivOp, src} => + withTmp(fn tmpR => + I.MULTDIV{multDivOp=multDivOp, src=operand(tmpR, src)}, an) + | I.MUL3{src1, src2, dst} => + withTmp(fn tmpR => + I.MUL3{src1=operand(tmpR, src1), src2=src2, + dst=if CB.sameColor(dst,reg) + then error "reload:MUL3" else dst}, an) + | I.UNARY{unOp, opnd} => + withTmpAvail + (fn tmpR => I.UNARY{unOp=unOp, opnd=operand(tmpR, opnd)}, an) + | I.SET{cond, opnd} => + withTmpAvail(fn tmpR => I.SET{cond=cond, opnd=operand(tmpR, opnd)}, an) + | I.PUSHL arg => reloadPush(I.PUSHL, arg, an) + | I.PUSHW arg => reloadPush(I.PUSHW, arg, an) + | I.PUSHB arg => reloadPush(I.PUSHB, arg, an) + | I.FILD opnd => reloadReal(I.FILD, opnd, an) + | I.FILDL opnd => reloadReal(I.FILDL, opnd, an) + | I.FILDLL opnd => reloadReal(I.FILDLL, opnd, an) + | I.FLDT opnd => reloadReal(I.FLDT, opnd, an) + | I.FLDL opnd => reloadReal(I.FLDL, opnd, an) + | I.FLDS opnd => reloadReal(I.FLDS, opnd, an) + | I.FSTPT opnd => reloadReal(I.FSTPT, opnd, an) + | I.FSTPL opnd => reloadReal(I.FSTPL, opnd, an) + | I.FSTPS opnd => reloadReal(I.FSTPS, opnd, an) + | I.FSTL opnd => reloadReal(I.FSTL, opnd, an) + | I.FSTS opnd => reloadReal(I.FSTS, opnd, an) + | I.FUCOM opnd => reloadReal(I.FUCOM, opnd, an) + | I.FUCOMP opnd => reloadReal(I.FUCOMP, opnd, an) + | I.FCOMI opnd => reloadReal(I.FCOMI, opnd, an) + | I.FCOMIP opnd => reloadReal(I.FCOMIP, opnd, an) + | I.FUCOMI opnd => reloadReal(I.FUCOMI, opnd, an) + | I.FUCOMIP opnd => reloadReal(I.FUCOMIP, opnd, an) + | I.FENV{fenvOp, opnd} => reloadReal(fn opnd => + I.FENV{fenvOp=fenvOp,opnd=opnd}, opnd, an) + | I.FBINARY{binOp, src, dst} => + withTmpAvail(fn tmpR => + I.FBINARY{binOp=binOp, src=operand(tmpR, src), dst=dst}, an) + | I.FIBINARY{binOp, src} => + withTmpAvail + (fn tmpR => I.FIBINARY{binOp=binOp, src=operand(tmpR, src)}, an) + + (* Pseudo fp instrctions *) + | I.FMOVE{fsize,src,dst} => + withTmpAvail + (fn tmpR => I.FMOVE{fsize=fsize, src=operand(tmpR, src), + dst=operand(tmpR, dst)}, an) + | I.FILOAD{isize,ea,dst} => + withTmpAvail + (fn tmpR => I.FILOAD{isize=isize, ea=operand(tmpR, ea), + dst=operand(tmpR, dst)}, an) + | I.FBINOP{fsize,binOp,lsrc,rsrc,dst} => + withTmpAvail(fn tmpR => + I.FBINOP{fsize=fsize, binOp=binOp, lsrc=operand(tmpR, lsrc), + rsrc=operand(tmpR, rsrc), dst=operand(tmpR, dst)}, an) + | I.FIBINOP{isize,binOp,lsrc,rsrc,dst} => + withTmpAvail(fn tmpR => + I.FIBINOP{isize=isize, binOp=binOp, lsrc=operand(tmpR, lsrc), + rsrc=operand(tmpR, rsrc), dst=operand(tmpR, dst)}, an) + | I.FUNOP{fsize,unOp,src,dst} => + withTmpAvail(fn tmpR => + I.FUNOP{fsize=fsize, unOp=unOp, src=operand(tmpR, src), + dst=operand(tmpR, dst)}, an) + | I.FCMP{i,fsize,lsrc,rsrc} => + withTmpAvail(fn tmpR => + I.FCMP{i=i,fsize=fsize, + lsrc=operand(tmpR, lsrc), rsrc=operand(tmpR, rsrc) + }, an) + + | _ => error "reload" + end (*x86Reload*) + + fun f(I.ANNOTATION{a, i}, an) = f(i, a::an) + | f(I.INSTR i, an) = x86Reload(i, reg, spillLoc, an) + | f(I.LIVE lk, an) = + {code=[annotate(I.LIVE(rLiveKill (lk, reg)), an)], + proh=[], + newReg=NONE} + | f _ = error "reload: f" + in f(instr, []) + end (* reload *) + + + + + fun spillF(instr, reg, spillLoc) = let + fun x86Fspill(instr, reg, spillLoc, an) = let + fun withTmp(f, fsize, an) = let + val tmpR = C.newFreg() + val tmp = I.FPR tmpR + in + { proh=[tmpR], + code=[mark(f tmp, an), + I.fmove{fsize=fsize, src=tmp, dst=spillLoc}], + newReg=SOME tmpR (* XXX Should we propagate the definition? *) + } + end + in + case instr + of I.FSTPL _ => {proh=[], code=[mark(I.FSTPL spillLoc, an)], newReg=NONE} + | I.FSTPS _ => {proh=[], code=[mark(I.FSTPS spillLoc, an)], newReg=NONE} + | I.FSTPT _ => {proh=[], code=[mark(I.FSTPT spillLoc, an)], newReg=NONE} + | I.FSTL _ => {proh=[], code=[mark(I.FSTL spillLoc, an)], newReg=NONE} + | I.FSTS _ => {proh=[], code=[mark(I.FSTS spillLoc, an)], newReg=NONE} + | I.CALL{opnd, defs, uses, return, cutsTo, mem, pops} => + {proh=[], + code=[mark(I.CALL{opnd=opnd, defs=C.rmvFreg(reg,defs), + return=return, uses=uses, + cutsTo=cutsTo, mem=mem, pops=pops}, an)], + newReg=NONE} + + (* Pseudo fp instrctions *) + | I.FMOVE{fsize,src,dst} => + if Props.eqOpn(src,spillLoc) then + {proh=[], code=[], newReg=NONE} + else + {proh=[],code=[mark(I.FMOVE{fsize=fsize,src=src,dst=spillLoc},an)], + newReg=NONE} + | I.FILOAD{isize,ea,dst} => + {proh=[],code=[mark(I.FILOAD{isize=isize,ea=ea,dst=spillLoc},an)], + newReg=NONE} (* XXX bad for single precision *) + | I.FBINOP{fsize as I.FP64,binOp,lsrc,rsrc,dst} => + {proh=[],code=[mark(I.FBINOP{fsize=fsize,binOp=binOp, + lsrc=lsrc, rsrc=rsrc, + dst=spillLoc},an)], + newReg=NONE} + | I.FBINOP{fsize,binOp,lsrc,rsrc,dst} => + withTmp(fn tmpR => + I.FBINOP{fsize=fsize, binOp=binOp, + lsrc=lsrc, rsrc=rsrc, dst=tmpR}, + fsize, an) + | I.FIBINOP{isize,binOp,lsrc,rsrc,dst} => + withTmp(fn tmpR => + I.FIBINOP{isize=isize, binOp=binOp, + lsrc=lsrc, rsrc=rsrc, dst=tmpR}, + I.FP64, an) (* XXX *) + | I.FUNOP{fsize,unOp,src,dst} => + {proh=[],code=[mark(I.FUNOP{fsize=fsize,unOp=unOp, + src=src,dst=spillLoc},an)], + newReg=NONE} + | _ => error "fspill" + (*esac*) + end (* x86Fspill *) + fun f(I.ANNOTATION{a,i}, an) = f(i, a::an) + | f(I.INSTR(i), an) = x86Fspill(i, reg, spillLoc, an) + | f(I.KILL lk, an) = + {code=[annotate(I.KILL(fLiveKill (lk, reg)), an)], + proh=[], + newReg=NONE} + | f _ = error "fspill:f" + in f(instr, []) + end + + + fun reloadF(instr, reg, spillLoc) = let + fun x86Freload(instr, reg, spillLoc, an) = let + fun rename(src as I.FDirect f) = + if CB.sameColor(f,reg) then spillLoc else src + | rename(src as I.FPR f) = + if CB.sameColor(f,reg) then spillLoc else src + | rename src = src + + fun withTmp(fsize, f, an) = + case spillLoc of + I.FDirect _ => {newReg=NONE, proh=[], code=[mark(f spillLoc, an)]} + | I.FPR _ => {newReg=NONE, proh=[], code=[mark(f spillLoc, an)]} + | _ => + let val ftmpR = C.newFreg() + val ftmp = I.FPR(ftmpR) + in {newReg=NONE, + proh=[ftmpR], + code=[I.fmove{fsize=fsize, src=spillLoc, dst=ftmp}, + mark(f ftmp, an) + ] + } + end + in + (case instr of + I.FLDT opnd => {code=[mark(I.FLDT spillLoc, an)], proh=[], newReg=NONE} + | I.FLDL opnd => {code=[mark(I.FLDL spillLoc, an)], proh=[], newReg=NONE} + | I.FLDS opnd => {code=[mark(I.FLDS spillLoc, an)], proh=[], newReg=NONE} + | I.FUCOM opnd => {code=[mark(I.FUCOM spillLoc, an)],proh=[],newReg=NONE} + | I.FUCOMP opnd => {code=[mark(I.FUCOMP spillLoc, an)],proh=[],newReg=NONE} + | I.FCOMI opnd => {code=[mark(I.FCOMI spillLoc, an)],proh=[],newReg=NONE} + | I.FCOMIP opnd => {code=[mark(I.FCOMIP spillLoc, an)],proh=[],newReg=NONE} + | I.FUCOMI opnd => {code=[mark(I.FUCOMI spillLoc, an)],proh=[],newReg=NONE} + | I.FUCOMIP opnd => {code=[mark(I.FUCOMIP spillLoc, an)],proh=[],newReg=NONE} + | I.FBINARY{binOp, src=I.FDirect f, dst} => + if CB.sameColor(f,reg) then + {code=[mark(I.FBINARY{binOp=binOp, src=spillLoc, dst=dst}, an)], + proh=[], + newReg=NONE} + else error "reloadF:FBINARY" + + (* Pseudo fp instructions. + *) + | I.FMOVE{fsize,src,dst} => + if Props.eqOpn(dst,spillLoc) then + {code=[], proh=[], newReg=NONE} + else + {code=[mark(I.FMOVE{fsize=fsize,src=spillLoc,dst=dst},an)], + proh=[], newReg=NONE} + | I.FBINOP{fsize,binOp,lsrc,rsrc,dst} => + {code=[mark(I.FBINOP{fsize=fsize,binOp=binOp, + lsrc=rename lsrc, rsrc=rename rsrc,dst=dst},an)], + proh=[], newReg=NONE} + | I.FIBINOP{isize,binOp,lsrc,rsrc,dst} => + {code=[mark(I.FIBINOP{isize=isize,binOp=binOp, + lsrc=rename lsrc,rsrc=rename rsrc,dst=dst},an)], + proh=[], newReg=NONE} + | I.FUNOP{fsize,unOp,src,dst} => + {code=[mark(I.FUNOP{fsize=fsize,unOp=unOp, + src=rename src, dst=dst},an)], + proh=[], newReg=NONE} + | I.FCMP{i,fsize,lsrc,rsrc} => + (* Make sure that both the lsrc and rsrc cannot be in memory *) + (case (lsrc, rsrc) of + (I.FPR fs1, I.FPR fs2) => + (case (CB.sameColor(fs1,reg), CB.sameColor(fs2,reg)) of + (true, true) => + withTmp(fsize, + fn tmp => I.FCMP{i=i,fsize=fsize,lsrc=tmp, rsrc=tmp}, an) + | (true, false) => + {code=[mark(I.FCMP{i=i,fsize=fsize,lsrc=spillLoc,rsrc=rsrc},an)], + proh=[], newReg=NONE} + | (false, true) => + {code=[mark(I.FCMP{i=i,fsize=fsize,lsrc=lsrc,rsrc=spillLoc},an)], + proh=[], newReg=NONE} + | _ => error "fcmp.1" + ) + | (I.FPR _, _) => + withTmp(fsize, + fn tmp => I.FCMP{i=i,fsize=fsize,lsrc=tmp, rsrc=rsrc}, an) + | (_, I.FPR _) => + withTmp(fsize, + fn tmp => I.FCMP{i=i,fsize=fsize,lsrc=lsrc, rsrc=tmp}, an) + | _ => error "fcmp.2" + ) + | I.CALL{opnd, defs, uses, return, cutsTo, mem, pops} => + {proh=[], + code=[mark(I.CALL{opnd=opnd, defs=C.rmvFreg(reg,defs), + return=return, pops=pops, + uses=uses, cutsTo=cutsTo, mem=mem}, an)], + newReg=NONE} + | _ => error "reloadF" + (*esac*)) + end (* x86Freload *) + + fun f(I.ANNOTATION{a, i}, an) = f(i, a::an) + | f(I.INSTR i, an) = x86Freload(i, reg, spillLoc, an) + | f(I.LIVE lk, an) = + {code=[annotate(I.LIVE(fLiveKill (lk, reg)), an)], + proh=[], + newReg=NONE} + | f _ = error "freload.f" + + in f(instr, []) + end + + fun spillToEA CB.GP (reg, ea) = let + fun returnMove() = + {code=[I.move{mvOp=I.MOVL, src=I.Direct reg, dst=ea}], + proh=[], newReg=NONE} + in + case ea + of I.MemReg _ => returnMove() + | I.Displace _ => returnMove() + | I.Indexed _ => returnMove() + | _ => error "spillToEA: GP" + end + | spillToEA CB.FP (freg, ea) = error "spillToEA: FP" + | spillToEA _ _ = error "spillToEA" + + fun reloadFromEA CB.GP (reg, ea) = let + fun returnMove() = + {code=[I.move{mvOp=I.MOVL, dst=I.Direct reg, src=ea}], + proh=[], + newReg=NONE} + in + case ea + of I.MemReg _ => returnMove() + | I.Displace _ => returnMove() + | I.Indexed _ => returnMove() + | _ => error "reloadFromEA: GP" + end + | reloadFromEA CB.FP (freg, ea) = error "spillToEA: FP" + | reloadFromEA _ _ = error "spillToEA" + + + fun reload CB.GP = reloadR + | reload CB.FP = reloadF + | reload _ = error "reload" + + fun spill CB.GP = spillR + | spill CB.FP = spillF + | spill _ = error "spill" +end diff --git a/MLRISC/x86/x86.mdl b/MLRISC/x86/x86.mdl new file mode 100644 index 0000000..2bd56c8 --- /dev/null +++ b/MLRISC/x86/x86.mdl @@ -0,0 +1,1019 @@ +(* + * 32bit, x86 instruction set. + * + * Note: + * 1. Segmentation registers and other weird stuff are not modelled. + * 2. The instruction set that I model is 32-bit oriented. + * I don't try to fit that 16-bit mode stuff in. + * 3. BCD arithmetic is missing + * 4. Multi-precision stuff is incomplete + * 5. No MMX (maybe we'll add this in later) + * 6. Slegdehammer extensions from AMD (more later) + * + * Allen Leung (leunga@cs.nyu.edu) + * + *) +architecture X86 = +struct + + superscalar (* superscalar machine *) + + little endian (* little endian architecture *) + + lowercase assembly (* print assembly in lower case *) + + (*------------------------------------------------------------------------ + * Note: While the x86 has only 8 integer and 8 floating point registers, + * the SMLNJ compiler fakes it by assuming that it has 32 integer + * and 32 floating point registers. That's why we have 32 integer + * and 32 floating point registers in this description. + * Probably pseudo memory registers should understood directly by + * the md tool. + * + *------------------------------------------------------------------------*) + + storage + GP = $r[32] of 32 bits + asm: (fn (0,8) => "%al" | (0,16) => "%ax" | (0,32) => "%eax" + | (1,8) => "%cl" | (1,16) => "%cx" | (1,32) => "%ecx" + | (2,8) => "%dl" | (2,16) => "%dx" | (2,32) => "%edx" + | (3,8) => "%bl" | (3,16) => "%bx" | (3,32) => "%ebx" + | (4,16) => "%sp" | (4,32) => "%esp" + | (5,16) => "%bp" | (5,32) => "%ebp" + | (6,16) => "%si" | (6,32) => "%esi" + | (7,16) => "%di" | (7,32) => "%edi" + | (r,_) => "%"^Int.toString r + ) + | FP = $f[32] of 64 bits + asm: (fn (f,_) => + if f < 8 then "%st("^Int.toString f^")" + else "%f"^Int.toString f (* pseudo register *) + ) + | CC = $cc[] of 32 bits aliasing GP asm: "cc" + | EFLAGS = $eflags[1] of 32 bits asm: "$eflags" + | FFLAGS = $fflags[1] of 32 bits asm: "$fflags" + | MEM = $m[] of 8 aggregable bits asm: "mem" + | CTRL = $ctrl[] asm: "ctrl" + + locations + eax = $r[0] + and ecx = $r[1] + and edx = $r[2] + and ebx = $r[3] + and esp = $r[4] + and ebp = $r[5] + and esi = $r[6] + and edi = $r[7] + and stackptrR = $r[4] + and ST(x) = $f[x] + and ST0 = $f[0] + and asmTmpR = $r[0] (* not used *) + and fasmTmp = $f[0] (* not used *) + and eflags = $eflags[0] + + (*------------------------------------------------------------------------ + * + * Representation for various opcodes. + * + *------------------------------------------------------------------------*) + structure Instruction = + struct + (* An effective address can be any combination of + * base + index*scale + disp + * or + * B + I*SCALE + DISP + * + * where any component is optional. The operand datatype captures + * all these combinations. + * + * DISP == Immed | ImmedLabel | Const + * B == Displace{base=B, disp=0} + * B+DISP == Displace{base=B, disp=DISP} + * I*SCALE+DISP == Indexed{base=NONE,index=I,scale=SCALE,disp=D} + * B+I*SCALE+DISP == Indexed{base=SOME B,index=I,scale=SCALE,disp=DISP} + * Note1: The index register cannot be EBP. + * The disp field must be one of Immed, ImmedLabel, or Const. + *) + + (* Note: Relative is only generated after sdi resolution *) + datatype operand = + Immed of Int32.int rtl: int + | ImmedLabel of T.labexp rtl: labexp + | Relative of int (* no semantics given *) + | LabelEA of T.labexp rtl: labexp (* XXX *) + | Direct of $GP rtl: $r[GP] + (* pseudo memory register for floating point *) + | FDirect of $FP rtl: $f[FP] + (* virtual floating point register *) + | FPR of $FP rtl: $f[FP] + | ST of $FP rtl: $f[FP] + (* pseudo memory register *) + | MemReg of $GP rtl: $r[GP] + | Displace of {base: $GP, disp:operand, mem:Region.region} + rtl: $m[$r[base] + disp : mem] + | Indexed of {base: $GP option, index: $GP, scale:int, + disp:operand, mem:Region.region} + rtl: $m[$r[base] + $r[index] << scale + disp : mem] + + type addressing_mode = operand + + type ea = operand + + datatype cond! = + EQ "e" 0w4 | NE 0w5 | LT "l" 0w12 | LE 0w14 | GT "g" 0w15 | GE 0w13 + | B 0w2 | BE (* below *) 0w6 | A 0w7 | AE (* above *) 0w3 + | C 0w2 | NC (* if carry *) 0w3 | P 0wxa | NP (* if parity *) 0wxb + | O 0w0 | NO (* overflow *) 0w1 + + (* LOCK can only be used in front of + * (Intel ordering, not gasm ordering) + * ADC, ADD, AND, BT mem, reg/imm + * BTS, BTR, BTC, OR mem, reg/imm + * SBB, SUB, XOR mem, reg/imm + * XCHG reg, mem + * XCHG mem, reg + * DEC, INC, NEG, NOT mem + *) + + datatype binaryOp! = + ADDL | SUBL | ANDL | ORL | XORL | SHLL | SARL | SHRL | IMULL + | ADCL | SBBL + | ADDW | SUBW | ANDW | ORW | XORW | SHLW | SARW | SHRW | IMULW + | ADDB | SUBB | ANDB | ORB | XORB | SHLB | SARB | SHRB | IMULB + | BTSW | BTCW | BTRW | BTSL | BTCL | BTRL + | ROLW | RORW | ROLL | RORL + | XCHGB | XCHGW | XCHGL + + (* Moby need these but I'm not going to handle them in the optimzer + * until Moby starts generating these things + *) + | LOCK_ADCW "lock\n\tadcw" + | LOCK_ADCL "lock\n\tadcl" + | LOCK_ADDW "lock\n\taddw" + | LOCK_ADDL "lock\n\taddl" + | LOCK_ANDW "lock\n\tandw" + | LOCK_ANDL "lock\n\tandl" + | LOCK_BTSW "lock\n\tbtsw" + | LOCK_BTSL "lock\n\tbtsl" + | LOCK_BTRW "lock\n\tbtrw" + | LOCK_BTRL "lock\n\tbtrl" + | LOCK_BTCW "lock\n\tbtcw" + | LOCK_BTCL "lock\n\tbtcl" + | LOCK_ORW "lock\n\torw" + | LOCK_ORL "lock\n\torl" + | LOCK_SBBW "lock\n\tsbbw" + | LOCK_SBBL "lock\n\tsbbl" + | LOCK_SUBW "lock\n\tsubw" + | LOCK_SUBL "lock\n\tsubl" + | LOCK_XORW "lock\n\txorw" + | LOCK_XORL "lock\n\txorl" + | LOCK_XADDB "lock\n\txaddb" + | LOCK_XADDW "lock\n\txaddw" + | LOCK_XADDL "lock\n\txaddl" + + (* One operand opcodes *) + datatype multDivOp! = + IMULL1 "imull" | MULL1 "mull" | IDIVL1 "idivl" | DIVL1 "divl" + + datatype unaryOp! = DECL | INCL | NEGL | NOTL + | DECW | INCW | NEGW | NOTW + | DECB | INCB | NEGB | NOTB + | LOCK_DECL "lock\n\tdecl" + | LOCK_INCL "lock\n\tincl" + | LOCK_NEGL "lock\n\tnegl" + | LOCK_NOTL "lock\n\tnotl" + + datatype shiftOp! = SHLDL | SHRDL + + datatype bitOp! = BTW + | BTL + | LOCK_BTW "lock\n\tbtw" + | LOCK_BTL "lock\n\tbtl" + + datatype move! = MOVL + | MOVB + | MOVW + | MOVSWL (* sx(word) -> long *) + | MOVZWL (* zx(word) -> long *) + | MOVSBL (* sx(byte) -> long *) + | MOVZBL (* zx(byte) -> long *) + + (* The Intel manual is incorrect on the description of FDIV and FDIVR *) + datatype fbinOp! = + FADDP | FADDS + | FMULP | FMULS + | FCOMS + | FCOMPS + | FSUBP | FSUBS (* ST(1) := ST-ST(1); [pop] *) + | FSUBRP | FSUBRS (* ST(1) := ST(1)-ST; [pop] *) + | FDIVP | FDIVS (* ST(1) := ST/ST(1); [pop] *) + | FDIVRP | FDIVRS (* ST(1) := ST(1)/ST; [pop] *) + | FADDL + | FMULL + | FCOML + | FCOMPL + | FSUBL (* ST(1) := ST-ST(1); [pop] *) + | FSUBRL (* ST(1) := ST(1)-ST; [pop] *) + | FDIVL (* ST(1) := ST/ST(1); [pop] *) + | FDIVRL (* ST(1) := ST(1)/ST; [pop] *) + + datatype fibinOp! = + FIADDS (0wxde,0) | FIMULS (0wxde,1) + | FICOMS (0wxde,2) | FICOMPS (0wxde,3) + | FISUBS (0wxde,4) | FISUBRS (0wxde,5) + | FIDIVS (0wxde,6) | FIDIVRS (0wxde,7) + | FIADDL (0wxda,0) | FIMULL (0wxda,1) + | FICOML (0wxda,2) | FICOMPL (0wxda,3) + | FISUBL (0wxda,4) | FISUBRL (0wxda,5) + | FIDIVL (0wxda,6) | FIDIVRL (0wxda,7) + + datatype funOp! = + (* the first byte is always d9; the second byte is listed *) + FCHS 0wxe0 + | FABS 0wxe1 + | FTST 0wxe4 + | FXAM 0wxe5 + | FPTAN 0wxf2 + | FPATAN 0wxf3 + | FXTRACT 0wxf4 + | FPREM1 0wxf5 + | FDECSTP 0wxf6 + | FINCSTP 0wxf7 + | FPREM 0wxf8 + | FYL2XP1 0wxf9 + | FSQRT 0wxfa + | FSINCOS 0wxfb + | FRNDINT 0wxfc + | FSCALE 0wxfd + | FSIN 0wxfe + | FCOS 0wxff + + datatype fenvOp! = FLDENV | FNLDENV | FSTENV | FNSTENV + + (* Intel floating point precision *) + datatype fsize = FP32 "s" | FP64 "l" | FP80 "t" + + (* Intel integer precision *) + datatype isize = I8 "8" | I16 "16" | I32 "32" | I64 "64" + + end (* Instruction *) + + (*------------------------------------------------------------------------ + * + * Here, I'm going to define the semantics of the instructions + * + *------------------------------------------------------------------------*) + structure RTL = + struct + + (* Get the basis *) + include "Tools/basis.mdl" + open Basis + infix 1 || (* parallel effects *) + infix 2 := (* assignment *) + + (* Some type abbreviations *) + fun byte x = (x : #8 bits) + fun word x = (x : #16 bits) + fun long x = (x : #32 bits) + fun float x = (x : #32 bits) + fun double x = (x : #64 bits) + fun real80 x = (x : #80 bits) + + (* Intel register abbreviations *) + val eax = $r[0] and ecx = $r[1] and edx = $r[2] and ebx = $r[3] + and esp = $r[4] and ebp = $r[5] and esi = $r[6] and edi = $r[7] + + (* Condition codes bits in eflag. + * Let's give symbolic name for each bit as per the Intel doc. + *) + rtl setFlag : #n bits -> #n bits + fun flag b = andb($eflags[0] >> b, 1) + val CF = flag 0 and PF = flag 2 + and ZF = flag 6 and SF = flag 7 and OF = flag 11 + + (* Now gets use the bits to express the conditions. Again from Intel. *) + (* conditions *) (* aliases *) + val B = CF == 1 val C = B and NAE = B + val BE = CF == 1 orelse ZF == 1 val NA = BE + val E = ZF == 1 val Z = E + val L = SF <> OF val NGE = L + val LE = SF <> OF orelse ZF == 1 val NG = LE + val NB = CF == 0 val AE = NB and NC = NB + val NBE = CF == 0 andalso ZF == 0 val A = NBE + val NE = ZF == 0 val NZ = NE + val NL = SF == OF val GE = NL + val NLE = ZF == 0 andalso SF == OF val G = NLE + val NO = OF == 0 + val NP = PF == 0 val PO = NP + val NS = SF == 0 + val O = OF == 1 + val P = PF == 1 val PE = P + val S = SF == 1 + + rtl NOP{} = () (* duh! *) + rtl LEA{addr, r32} = $r[r32] := addr (* this is completely wrong! XXX *) + + (* moves with type conversion *) + rtl MOVL{src,dst} = dst := long src + rtl MOVW{src,dst} = dst := word src + rtl MOVB{src,dst} = dst := byte src + rtl MOVSWL{src,dst} = dst := long(sx(word src)) + rtl MOVZWL{src,dst} = dst := long(zx(word src)) + rtl MOVSBL{src,dst} = dst := long(sx(byte src)) + rtl MOVZBL{src,dst} = dst := long(zx(byte src)) + + (* semantics of integer arithmetic; + * all instructions sets the condition code + *) + fun binop typ oper {dst,src} = dst := typ(oper(dst,src)) + fun arith typ oper {dst,src} = dst := typ(oper(dst,src)) + || $eflags[0] := ??? (* XXX *) + fun unary typ oper {opnd} = opnd := typ(oper opnd) + + fun inc x = x + 1 + fun dec x = x - 1 + + (* I'm too lazy to specify the semantics of these for now *) + rtl adc sbb bts btc btr rol ror xchg xadd cmpxchg + : #n bits * #n bits -> #n bits + + rtl [ADD,SUB,AND,OR,XOR]^^B = map (arith byte) [(+),(-),andb,orb,xorb] + rtl [ADD,SUB,AND,OR,XOR]^^W = map (arith word) [(+),(-),andb,orb,xorb] + rtl [ADD,SUB,AND,OR,XOR]^^L = map (arith long) [(+),(-),andb,orb,xorb] + rtl [SHR,SHL,SAR]^^B = map (binop byte) [(>>),(<<),(~>>)] + rtl [SHR,SHL,SAR]^^W = map (binop word) [(>>),(<<),(~>>)] + rtl [SHR,SHL,SAR]^^L = map (binop long) [(>>),(<<),(~>>)] + rtl [NEG,NOT,INC,DEC]^^B = map (unary byte) [(~),notb,inc,dec] + rtl [NEG,NOT,INC,DEC]^^W = map (unary word) [(~),notb,inc,dec] + rtl [NEG,NOT,INC,DEC]^^L = map (unary long) [(~),notb,inc,dec] + + + rtl [ADC,SBB,BTS,BTC,BTR,ROL,ROR,XCHG]^^B = + map (arith byte) [adc,sbb,bts,btc,btr,rol,ror,xchg] + rtl [ADC,SBB,BTS,BTC,BTR,ROL,ROR,XCHG]^^W = + map (arith word) [adc,sbb,bts,btc,btr,rol,ror,xchg] + rtl [ADC,SBB,BTS,BTC,BTR,ROL,ROR,XCHG]^^L = + map (arith long) [adc,sbb,bts,btc,btr,rol,ror,xchg] + + fun lockarith typ oper {src,dst}= + dst := typ(oper(dst,src)) + || Kill $eflags[0] (* XXX *) + fun lockunary typ oper {opnd} = + opnd := typ(oper(opnd)) + || Kill $eflags[0] (* XXX *) + + rtl LOCK_^^[ADD,SUB,AND,OR,XOR,XADD]^^B = + map (lockarith byte) [(+),(-),andb,orb,xorb,xadd] + rtl LOCK_^^[ADD,SUB,AND,OR,XOR,XADD]^^W = + map (lockarith word) [(+),(-),andb,orb,xorb,xadd] + rtl LOCK_^^[ADD,SUB,AND,OR,XOR,XADD]^^L = + map (lockarith long) [(+),(-),andb,orb,xorb,xadd] + rtl LOCK_^^[ADC,SBB,BTS,BTC,BTR,ROL,ROR,XCHG]^^B = + map (lockarith byte) [adc,sbb,bts,btc,btr,rol,ror,xchg] + rtl LOCK_^^[ADC,SBB,BTS,BTC,BTR,ROL,ROR,XCHG]^^W = + map (lockarith word) [adc,sbb,bts,btc,btr,rol,ror,xchg] + rtl LOCK_^^[ADC,SBB,BTS,BTC,BTR,ROL,ROR,XCHG]^^L = + map (lockarith long) [adc,sbb,bts,btc,btr,rol,ror,xchg] + rtl LOCK_^^[DEC,INC,NEG,NOT]^^L = + map (lockunary long) [dec,inc,(~),notb] + rtl LOCK_^^[CMPXCHG]^^B = map (lockarith byte) [cmpxchg] + rtl LOCK_^^[CMPXCHG]^^W = map (lockarith word) [cmpxchg] + rtl LOCK_^^[CMPXCHG]^^L = map (lockarith long) [cmpxchg] + + (* Multiplication/division *) + rtl upperMultiply : #n bits * #n bits -> #n bits + rtl MULL1{src} = eax := muls(eax, src) || + edx := upperMultiply(eax, src) || + $eflags[0] := ??? + rtl IDIVL1{src} = eax := divs(eax, src) || + edx := rems(eax, src) || + $eflags[0] := ??? + rtl DIVL1{src} = edx := divu(eax, src) || + edx := remu(eax, src) || + $eflags[0] := ??? + + (* test[b,w,l] *) + rtl TESTB {lsrc,rsrc} = $eflags[0] := setFlag(andb(byte lsrc, rsrc)) + rtl TESTW {lsrc,rsrc} = $eflags[0] := setFlag(andb(word lsrc, rsrc)) + rtl TESTL {lsrc,rsrc} = $eflags[0] := setFlag(andb(long lsrc, rsrc)) + + (* setcc *) + fun set cc {opnd} = opnd := byte(cond(cc, 0xff, 0x0)) + rtl SET^^ [EQ,NE,LT,LE,GT,GE,B,BE,A,AE,C,NC,P,NP,O,NO] = + map set [E ,NE,L, LE,G ,GE,B,BE,A,AE,C,NC,P,NP,O,NO] + + (* conditional move *) + fun cmov cc {src,dst} = if cc then $r[dst] := long src else () + rtl CMOV^^ [EQ,NE,LT,LE,GT,GE,B,BE,A,AE,C,NC,P,NP,O,NO] = + map cmov [E ,NE,L, LE,G ,GE,B,BE,A,AE,C,NC,P,NP,O,NO] + + (* push and pops *) + rtl PUSHL {operand} = $m[esp - 4] := long(operand) || esp := esp - 4 + rtl PUSHW {operand} = $m[esp - 2] := word(operand) || esp := esp - 2 + rtl PUSHB {operand} = $m[esp - 1] := byte(operand) || esp := esp - 1 + rtl POP {operand} = operand := long($m[esp]) || esp := esp + 4 + + (* semantics of branches and jumps *) + rtl JMP{operand} = Jmp(long operand) + fun jcc cc {opnd} = if cc then Jmp(long opnd) else () + rtl J^^ [EQ,NE,LT,LE,GT,GE,B,BE,A,AE,C,NC,P,NP,O,NO] = + map jcc [E ,NE,L, LE,G ,GE,B,BE,A,AE,C,NC,P,NP,O,NO] + rtl CALL{opnd,defs,uses} = + Call(long opnd) || + Kill $cellset[defs] || + Use $cellset[uses] + + (* semantics of floating point operators + * The 3-address fake operators first. + *) + fun fbinop typ oper {lsrc, rsrc, dst} = dst := typ(oper(lsrc, rsrc)) + fun funary typ oper {src, dst} = dst := typ(oper src) + rtl F^^[ADD,SUB,MUL,DIV]^^L = map (fbinop double) f^^[add,sub,mul,div] + rtl F^^[ADD,SUB,MUL,DIV]^^S = map (fbinop float) f^^[add,sub,mul,div] + rtl F^^[ADD,SUB,MUL,DIV]^^T = map (fbinop real80) f^^[add,sub,mul,div] + + (* semantics of trig/transendental functions are abstract *) + rtl fsqrt fsin fcos ftan fasin facos fatan fln fexp : #n bits -> #n bits + rtl F^^[CHS,ABS,SQRT,SIN,COS,TAN,ASIN,ACOS,ATAN,LN,EXP] = + map (funary real80) + f^^[neg,abs,sqrt,sin,cos,tan,asin,acos,atan,ln,exp] + end (* RTL *) + + (*------------------------------------------------------------------------ + * Machine Instruction encoding on the x86 + * Because of variable width instructions. + * We decompose each byte field into a seperate format first, then combine + * then to form the real instructions + *------------------------------------------------------------------------*) + instruction formats 8 bits + modrm{mod:2, reg:3, rm:3} + | reg{opc:5, reg:3} + | sib{ss:2, index:3, base:3} + | immed8{imm:8} + + instruction formats 32 bits + immed32{imm:32} + + (* + * Variable format instructions + *) + instruction formats + immedOpnd{opnd} = + (case opnd of + I.Immed i32 => i32 + | I.ImmedLabel le => lexp le + | I.LabelEA le => lexp le + | _ => error "immedOpnd" + ) + | extension{opc, opnd} = (* generate an extension *) + (case opnd of + I.Direct r => modrm{mod=3, reg=opc, rm=r} + | I.MemReg _ => extension{opc,opnd=memReg opnd} + | I.FDirect _ => extension{opc,opnd=memReg opnd} + | I.Displace{base, disp, ...} => + let val immed = immedOpnd{opnd=disp} + in () (* XXX *) + end + | I.Indexed{base=NONE, index, scale, disp, ...} => () + | I.Indexed{base=SOME b, index, scale, disp, ...} => () + | _ => error "immedExt" + ) + + instruction formats 16 bits + encodeST{prefix:8, opc:5, st: $FP 3} + + instruction formats + encodeReg{prefix:8, reg: $GP 3, opnd} = + (emit prefix; immedExt{opc=reg, opnd=opnd}) + | arith{opc1,opc2,src,dst} = + (case (src, dst) of + (I.ImmedLabel le, dst) => arith{opc1,opc2,src=I.Immed(lexp le),dst} + | (I.LabelEA le, dst) => arith{opc1,opc2,src=I.Immed(lexp le),dst} + | (I.Immed i,dst) => () + | (src, I.Direct r) => encodeReg{prefix=opc1+op3,reg,opnd=src} + | (I.Direct r,dst) => encodeReg{prefix=opc1+0w1,reg,opnd=dst} + | _ => error "arith" + ) + + (*------------------------------------------------------------------------ + * A bunch of routines for emitting assembly on the x86. + * This is a headache because the syntax is quite non-orthorgonal. + * So we have to write some code to help out the md tool + * Assembly note: + * Note: we are using the AT&T syntax (for Linux) and not the intel syntax + * memory operands have the form: + * section:disp(base, index, scale) + * Most of the complication is actually in emiting the correct + * operand syntax. + *------------------------------------------------------------------------*) + + functor Assembly + (structure MemRegs : MEMORY_REGISTERS where I = Instr + val memRegBase : CellsBasis.cell option) = + struct + fun memReg r = MemRegs.memReg {reg=r, base=Option.valOf memRegBase} + fun emitInt32 i = + let val s = Int32.toString i + val s = if i >= 0 then s else "-"^String.substring(s,1,size s-1) + in emit s end + + val {low=SToffset, ...} = C.cellRange CellsBasis.FP + + fun emitScale 0 = emit "1" + | emitScale 1 = emit "2" + | emitScale 2 = emit "4" + | emitScale 3 = emit "8" + | emitScale _ = error "emitScale" + + and eImmed(I.Immed (i)) = emitInt32 i + | eImmed(I.ImmedLabel lexp) = emit_labexp lexp + | eImmed _ = error "eImmed" + + + and emit_operand opn = + case opn of + I.Immed i => (emit "$"; emitInt32 i) + | I.ImmedLabel lexp => (emit "$"; emit_labexp lexp) + | I.LabelEA le => emit_labexp le + | I.Relative _ => error "emit_operand" + | I.Direct r => emitCell r + | I.MemReg r => emit_operand(memReg opn) + | I.ST f => emitCell f + | I.FPR f => (emit "%f"; emit(Int.toString(CellsBasis.registerNum f))) + | I.FDirect f => emit_operand(memReg opn) + | I.Displace{base,disp,mem,...} => + (emit_disp disp; emit "("; emitCell base; emit ")"; + emit_region mem) + | I.Indexed{base,index,scale,disp,mem,...} => + (emit_disp disp; emit "("; + case base of + NONE => () + | SOME base => emitCell base; + comma(); + emitCell index; comma(); + emitScale scale; emit ")"; emit_region mem) + + and emit_operand8(I.Direct r) = emit(CellsBasis.toStringWithSize(r,8)) + | emit_operand8 opn = emit_operand opn + + and emit_disp(I.Immed 0) = () + | emit_disp(I.Immed i) = emitInt32 i + | emit_disp(I.ImmedLabel lexp) = emit_labexp lexp + | emit_disp _ = error "emit_disp" + + (* The gas assembler does not like the "$" prefix for immediate + * labels in certain instructions. + *) + fun stupidGas(I.ImmedLabel lexp) = emit_labexp lexp + | stupidGas opnd = (emit "*"; emit_operand opnd) + + (* Display the floating point binary opcode *) + fun isMemOpnd(I.MemReg _) = true + | isMemOpnd(I.FDirect f) = true + | isMemOpnd(I.LabelEA _) = true + | isMemOpnd(I.Displace _) = true + | isMemOpnd(I.Indexed _) = true + | isMemOpnd _ = false + fun chop fbinOp = + let val n = size fbinOp + in case Char.toLower(String.sub(fbinOp,n-1)) of + (#"s" | #"l") => String.substring(fbinOp,0,n-1) + | _ => fbinOp + end + + fun isST0 (I.ST r) = CellsBasis.registerNum r = 0 + | isST0 _ = false + + (* Special syntax for binary operators *) + fun emit_fbinaryOp(binOp,src,dst) = + if isMemOpnd src then + (emit_fbinOp binOp; emit "\t"; emit_operand src) + else (emit(chop(asm_fbinOp binOp)); emit "\t"; + case (isST0 src, isST0 dst) of + (_, true) => (emit_operand src; emit ", %st") + | (true, _) => (emit "%st, "; emit_operand dst) + | _ => error "emit_fbinaryOp" + ) + + val emit_dst = emit_operand + val emit_src = emit_operand + val emit_opnd = emit_operand + val emit_opnd8 = emit_operand8 + val emit_rsrc = emit_operand + val emit_lsrc = emit_operand + val emit_addr = emit_operand + val emit_src1 = emit_operand + val emit_ea = emit_operand + val emit_count = emit_operand + end (* Assembly *) + + + (*------------------------------------------------------------------------ + * + * Reservation tables and pipeline definitions for scheduling. + * Faked for now as I don't have to time to look up the definitions + * from the Intel doc. + * + *------------------------------------------------------------------------*) + + (* Function units *) + resource issue and mem and alu and falu and fmul and fdiv and branch + + (* Different implementations of cpus *) + cpu default 2 [2 issue, 2 mem, 1 alu, 1 falu, 1 fmul] (* 2 issue machine *) + + (* Definitions of various reservation tables *) + pipeline NOP _ = [issue] + and ARITH _ = [issue^^alu] + and LOAD _ = [issue^^mem] + and STORE _ = [issue^^mem,mem,mem] + and FARITH _ = [issue^^falu] + and FMUL _ = [issue^^fmul,fmul] + and FDIV _ = [issue^^fdiv,fdiv*50] + and BRANCH _ = [issue^^branch] + + (*------------------------------------------------------------------------ + * + * Compiler representation of the instruction set. + * + *------------------------------------------------------------------------*) + instruction + NOP + asm: ``nop'' + rtl: ``NOP'' + + | JMP of operand * Label.label list + asm: ``jmp\t'' + rtl: ``JMP'' + + | JCC of {cond:cond, opnd:operand} + asm: ``j\t'' + rtl: ``J'' + + | CALL of {opnd: operand, defs: $cellset, uses: $cellset, + return: $cellset, cutsTo: Label.label list, mem: Region.region, + pops:Int32.int} + asm: ``call\t< + emit_defs(defs)>< + emit_uses(uses)>< + emit_cellset("return",return)>< + emit_cutsTo cutsTo>'' + rtl: ``CALL'' + + | ENTER of {src1:operand, src2:operand} + asm: ``enter\t, '' + + | LEAVE + asm: ``leave'' + + | RET of operand option + asm: ``ret () + | SOME e => (emit "\t"; emit_operand e)>'' + + (* integer *) + | MOVE of {mvOp:move, src:operand, dst:operand} + asm: ``\t, '' + rtl: ``'' + + | LEA of {r32: $GP, addr: operand} + asm: ``leal\t, '' + rtl: ``LEA'' + + | CMPL of {lsrc: operand, rsrc: operand} + asm: ``cmpl\t, '' + + | CMPW of {lsrc: operand, rsrc: operand} + ``cmpb\t, '' + + | CMPB of {lsrc: operand, rsrc: operand} + ``cmpb\t, '' + + | TESTL of {lsrc: operand, rsrc: operand} + asm: ``testl\t, '' + rtl: ``TESTL'' + + | TESTW of {lsrc: operand, rsrc: operand} + asm: ``testw\t, '' + rtl: ``TESTW'' + + | TESTB of {lsrc: operand, rsrc: operand} + asm: ``testb\t, '' + rtl: ``TESTB'' + + | BITOP of {bitOp:bitOp, lsrc: operand, rsrc: operand} + ``\t, '' + + | BINARY of {binOp:binaryOp, src:operand, dst:operand} + asm: (case (src,binOp) of + (I.Direct _, (* tricky business here for shifts *) + (I.SARL | I.SHRL | I.SHLL | + I.SARW | I.SHRW | I.SHLW | + I.SARB | I.SHRB | I.SHLB)) => ``\t%cl, '' + | _ => ``\t, '' + ) + (*rtl: ``''*) + | SHIFT of {shiftOp:shiftOp, src:operand, dst:operand, count:operand} + asm: (case count of (* must be %ecx if it is a register *) + I.Direct ecx => ``\t, '' + | _ => ``\t, , '' + ) + + | CMPXCHG of {lock:bool, sz:isize, src: operand, dst:operand} + asm: (if lock then ``lock\n\t'' else (); + ``cmpxchg''; + case sz of + I.I8 => ``b'' + | I.I16 => ``w'' + | I.I32 => ``l'' + | I.I64 => error "CMPXCHG: I64"; + ``\t, '' + ) + + | MULTDIV of {multDivOp:multDivOp, src:operand} + asm: ``\t'' + + | MUL3 of {dst: $GP, src2: Int32.int, src1:operand} + (* Fermin: constant operand must go first *) + asm: ``imull\t$, , '' + + | UNARY of {unOp:unaryOp, opnd:operand} + asm: ``\t'' + rtl: ``'' + + (* set byte on condition code; note that + * this only sets the low order byte, so it also + * uses its operand. + *) + | SET of {cond:cond, opnd:operand} + asm: ``set\t'' + rtl: ``SET'' + + (* conditional move; Pentium Pro or higher only + * Destination must be a register. + *) + | CMOV of {cond:cond, src:operand, dst: $GP} + asm: ``cmov\t, '' + rtl: ``CMOV'' + + | PUSHL of operand + asm: ``pushl\t'' + rtl: ``PUSHL'' + + | PUSHW of operand + asm: ``pushw\t'' + rtl: ``PUSHW'' + + | PUSHB of operand + asm: ``pushb\t'' + rtl: ``PUSHB'' + + | PUSHFD (* push $eflags onto stack *) + ``pushfd'' + + | POPFD (* pop $eflags onto stack *) + ``popfd'' + + | POP of operand + asm: ``popl\t'' + rtl: ``POP'' + + | CDQ + ``cdq'' + + | INTO + ``into'' + + (* floating *) + | FBINARY of {binOp:fbinOp, src:operand, dst:operand} + asm: (emit_fbinaryOp(binOp,src,dst)) + + | FIBINARY of {binOp:fibinOp, src:operand} + asm: ``\t'' (* the implied destination is %ST(0) *) + + | FUNARY of funOp + ``'' + + | FUCOM of operand + ``fucom\t'' + + | FUCOMP of operand + ``fucomp\t'' + + | FUCOMPP + ``fucompp'' + + | FCOMPP + ``fcompp'' + + | FCOMI of operand + ``fcomi\t, %st'' + + | FCOMIP of operand + ``fcomip\t, %st'' + + | FUCOMI of operand + ``fucomi\t, %st'' + + | FUCOMIP of operand + ``fucomip\t, %st'' + + | FXCH of {opnd: $FP} + ``fxch\t'' + + | FSTPL of operand + asm: (case operand of + I.ST _ => ``fstp\t'' + | _ => ``fstpl\t'' + ) + + | FSTPS of operand + ``fstps\t'' + + | FSTPT of operand + ``fstps\t'' + + | FSTL of operand + asm: (case operand of + I.ST _ => ``fst\t'' + | _ => ``fstl\t'' + ) + + | FSTS of operand + ``fsts\t'' + + | FLD1 + ``fld1'' + + | FLDL2E + ``fldl2e'' + + | FLDL2T + ``fldl2t'' + + | FLDLG2 + ``fldlg2'' + + | FLDLN2 + ``fldln2'' + + | FLDPI + ``fldpi'' + + | FLDZ + ``fldz'' + + | FLDL of operand + asm: (case operand of + I.ST _ => ``fld\t'' + | _ => ``fldl\t'' + ) + + | FLDS of operand + ``flds\t'' + + | FLDT of operand + ``fldt\t'' + + | FILD of operand + ``fild\t'' + + | FILDL of operand + ``fildl\t'' + + | FILDLL of operand + ``fildll\t'' + + | FNSTSW + ``fnstsw'' + + | FENV of {fenvOp:fenvOp, opnd:operand} (* load/store environment *) + ``\t'' + + (* pseudo floating ops *) + | FMOVE of {fsize:fsize, src:operand, dst:operand} + ``fmove\t, '' + + | FILOAD of {isize:isize, ea:operand, dst:operand} + ``fiload\t, '' + + | FBINOP of {fsize:fsize, + binOp:fbinOp, lsrc:operand, rsrc:operand, dst:operand} + ``\t, , '' + (* rtl: ``'' *) + + | FIBINOP of {isize:isize, + binOp:fibinOp, lsrc:operand, rsrc:operand, dst:operand} + ``\t, , '' + (* rtl: ``'' *) + + | FUNOP of {fsize:fsize, unOp:funOp, src:operand, dst:operand} + ``\t, '' + (* rtl: [[unOp fsize]] *) + + | FCMP of {i:bool,fsize:fsize, lsrc:operand, rsrc:operand} + asm: (if i then ``fcmpi'' else ``fcmp''; ``\t, '') + (* rtl: [["FCMP" fsize]] *) + + (* misc *) + | SAHF (* %flags -> %ah *) + ``sahf'' + + (*** concurrency operations ****) + + (* performs a serializing operation on all load-to-memory operations issued prior to + * the lfence instruction. + *) + | LFENCE + asm: ``lfence'' + rtl: ``LFENCE'' + + (* performs a serializing operation on all load-from-memory and store-to-memory + * operations issued prior to the mfence instruction. + *) + | MFENCE + asm: ``mfence'' + rtl: ``MFENCE'' + + (* performs a serializing operation on all store-to-memory operations issued prior to + * the sfence instruction. + *) + | SFENCE + asm: ``sfence'' + rtl: ``SFENCE'' + + (* improves performance of spin-wait loops *) + | PAUSE + asm: ``pause'' + rtl: ``PAUSE'' + + | LAHF (* %ah -> %flags *) + ``lahf'' + + | SOURCE of {} + asm: ``source'' + mc: () + + | SINK of {} + asm: ``sink'' + mc: () + + | PHI of {} + asm: ``phi'' + mc: () + + (*------------------------------------------------------------------------ + * Some helper routines for the SSA optimizer. + * These should go away soon. + *------------------------------------------------------------------------*) + structure SSA = + struct + fun operand(ty, I.Immed i) = T.LI(T.I.fromInt32(32,i)) + (*| operand(ty, I.ImmedLabel le) = T.LABEL le*) + | operand(ty, I.Direct r) = T.REG(ty, r) + | operand _ = error "operand" + end + (*------------------------------------------------------------------------ + * Some helper routines for the rewriting module. + * These should go away soon. + *------------------------------------------------------------------------*) + structure Rewrite = + struct + fun rewriteOperandUse (rs,rt,opnd) = + (case opnd + of I.Direct r => if C.sameColor(r,rs) then I.Direct rt else opnd + | I.Displace{base, disp, mem} => + if C.sameColor(base,rs) + then I.Displace{base=rt, disp=disp, mem=mem} + else opnd + | I.Indexed{base as SOME b, index, scale, disp, mem} => let + val base'= if C.sameColor(b,rs) then SOME rt else base + val index'=if C.sameColor(index,rs) then rt else index + in I.Indexed{base=base', index=index', scale=scale, + disp=disp, mem=mem} + end + | I.Indexed{base, index, scale, disp, mem=mem} => + if C.sameColor(index,rs) then + I.Indexed{base=base, index=rt, scale=scale, disp=disp, mem=mem} + else opnd + | _ => opnd + (*esac*)) + + fun rewriteOperandDef (rs,rt,opnd as I.Direct r) = + if C.sameColor(r,rs) then I.Direct rt else opnd + + fun frewriteOperandDef(fs,ft,opnd as I.FDirect f) = + if C.sameColor(f,fs) then I.FDirect ft else opnd + | frewriteOperandDef(fs,ft,opnd as I.FPR f) = + if C.sameColor(f,fs) then I.FPR ft else opnd + | frewriteOperandDef opnd = opnd + + fun frewriteOperandUse(fs,ft,opnd as I.FDirect r) = + if C.sameColor(r,fs) then I.FDirect ft else opnd + | frewriteOperandUse(fs,ft,opnd as I.FPR r) = + if C.sameColor(r,fs) then I.FPR ft else opnd + | frewriteOperandUse(fs,ft, opnd) = opnd + end + +end + diff --git a/MLRISC/x86/x86MC.sml b/MLRISC/x86/x86MC.sml new file mode 100644 index 0000000..f8a67ab --- /dev/null +++ b/MLRISC/x86/x86MC.sml @@ -0,0 +1,554 @@ +(* + * + * COPYRIGHT (c) 1996 Bell Laboratories. + * + * IMPORTANT NOTE: + * Integer registers are numbered from 0 - 31 (0-7 are physical) + * Floating point registers are numbered from 32-63 (32-39 are physical) + *) +functor X86MCEmitter + (structure Instr : X86INSTR + structure Shuffle : X86SHUFFLE where I = Instr + structure MLTreeEval : MLTREE_EVAL where T = Instr.T + structure MemRegs : MEMORY_REGISTERS where I = Instr + val memRegBase : CellsBasis.cell option + structure AsmEmitter : INSTRUCTION_EMITTER where I = Instr) : MC_EMIT = +struct + structure I = Instr + structure C = I.C + structure Const = I.Constant + structure W32 = Word32 + structure W8 = Word8 + structure W = LargeWord + structure CB = CellsBasis + structure LE = MLTreeEval + + val itow = Word.fromInt + val wtoi = Word.toInt + + fun error msg = MLRiscErrorMsg.impossible ("X86MCEmitter." ^ msg) + + (* + * Sanity check! + *) + + val eax = 0 val esp = 4 + val ecx = 1 val ebp = 5 + val edx = 2 val esi = 6 + val ebx = 3 val edi = 7 + + val opnd16Prefix = 0x66 + + fun const c = Int32.fromInt(Const.valueOf c) + fun lexp le = Int32.fromInt(LE.valueOf le) + + val toWord8 = Word8.fromLargeWord o LargeWord.fromLargeInt o Int32.toLarge + val eBytes = Word8Vector.fromList + fun eByte i = eBytes [W8.fromInt i] + local + val toLWord = (W.fromLargeInt o Int32.toLarge) + fun shift (w,cnt) = W8.fromLargeWord(W.>>(w, cnt)) + in + fun eShort i16 = let + val w = toLWord i16 + in [shift(w, 0w0), shift(w,0w8)] + end + fun eLong i32 = let + val w = toLWord i32 + in [shift(w, 0w0), shift(w,0w8), shift(w,0w16), shift(w,0w24)] + end + end + + fun emitInstrs(instrs) = Word8Vector.concat(map emitInstr instrs) + + and emitX86Instr(instr: I.instr) = let + val error = + fn msg => + let val AsmEmitter.S.STREAM{emit,...} = AsmEmitter.makeStream [] + in emit (I.INSTR instr); error msg end + + val rNum = CB.physicalRegisterNum + val fNum = CB.physicalRegisterNum + + fun memReg r = MemRegs.memReg{reg=r, base=Option.valOf memRegBase} + + datatype size = Zero | Bits8 | Bits32 + fun size i = + if i = 0 then Zero + else if Int32.<(i, 128) andalso Int32.<=(~128, i) then Bits8 + else Bits32 + + fun immedOpnd(I.Immed(i32)) = i32 + | immedOpnd(I.ImmedLabel le) = lexp le + | immedOpnd(I.LabelEA le) = lexp le + | immedOpnd _ = error "immedOpnd" + + nonfix mod + + fun scale(n, m) = Word.toIntX(Word.<<(Word.fromInt n, Word.fromInt m)) + fun modrm{mod, reg, rm} = W8.fromInt(scale(mod,6) + scale(reg,3) + rm) + fun sib{ss, index, base} = W8.fromInt(scale(ss,6) + scale(index,3) + base) + fun reg{opc, reg} = W8.fromInt(scale(opc,3) + reg) + + fun eImmedExt(opc, I.Direct r) = [modrm{mod=3, reg=opc, rm=rNum r}] + | eImmedExt(opc, opn as I.MemReg _) = eImmedExt(opc, memReg opn) + | eImmedExt(opc, I.Displace{base, disp, ...}) = let + val base = rNum base (* XXX rNum may be done twice *) + val immed = immedOpnd disp + fun displace(mod, eDisp) = + if base=esp then + modrm{mod=mod, reg=opc, rm=4}:: + sib{ss=0, index=4, base=esp}::eDisp immed + else + modrm{mod=mod, reg=opc, rm=base} :: eDisp immed + in + case size immed + of Zero => + if base=esp then + [modrm{mod=0, reg=opc, rm=4}, sib{ss=0,index=4,base=esp}] + else if base=ebp then + [modrm{mod=1, reg=opc, rm=ebp}, 0w0] + else + [modrm{mod=0, reg=opc, rm=base}] + | Bits8 => displace(1, fn i => [toWord8 i]) + | Bits32 => displace(2, eLong) + (*esac*) + end + | eImmedExt(opc, I.Indexed{base=NONE, index, scale, disp, ...}) = + (modrm{mod=0, reg=opc, rm=4} :: + sib{base=5, ss=scale, index=rNum index} :: + eLong(immedOpnd disp)) + | eImmedExt(opc, I.Indexed{base=SOME b, index, scale, disp, ...}) = let + val index = rNum index + val base = rNum b + val immed = immedOpnd disp + fun indexed(mod, eDisp) = + modrm{mod=mod, reg=opc, rm=4} :: + sib{ss=scale, index=index, base=base} :: eDisp immed + in + case size immed + of Zero => + if base=ebp then + [modrm{mod=1, reg=opc, rm=4}, + sib{ss=scale, index=index, base=5}, 0w0] + else + [modrm{mod=0, reg=opc, rm=4}, + sib{ss=scale, index=index, base=base}] + | Bits8 => indexed(1, fn i => [toWord8 i]) + | Bits32 => indexed(2, eLong) + (*esac*) + end + | eImmedExt(opc, opnd as I.FDirect f) = eImmedExt(opc, memReg opnd) + | eImmedExt(_, I.Immed _) = error "eImmedExt: Immed" + | eImmedExt(_, I.ImmedLabel _) = error "eImmedExt: ImmedLabel" + | eImmedExt(_, I.Relative _) = error "eImmedExt: Relative" + | eImmedExt(_, I.LabelEA _) = error "eImmedExt: LabelEA" + | eImmedExt(_, I.FPR _) = error "eImmedExt: FPR" + | eImmedExt(_, I.ST _) = error "eImmedExt: ST" + + (* Short hands for various encodings *) + fun encode(byte1, opc, opnd) = eBytes(byte1 :: eImmedExt(opc, opnd)) + fun encodeST(byte1, opc, STn) = eBytes[byte1, reg{opc=opc,reg=fNum STn}] + fun encode2(byte1, byte2, opc, opnd) = + eBytes(byte1 :: byte2 :: eImmedExt(opc, opnd)) + fun encodeReg(byte1, reg, opnd) = encode(byte1, rNum reg, opnd) + fun encodeLongImm(byte1, opc, opnd, i) = + eBytes(byte1 :: (eImmedExt(opc, opnd) @ eLong i)) + fun encodeShortImm(byte1, opc, opnd, w) = + eBytes(byte1 :: (eImmedExt(opc, opnd) @ eShort w)) + fun encodeByteImm(byte1, opc, opnd, b) = + eBytes(byte1 :: (eImmedExt(opc, opnd) @ [toWord8 b])) + + fun condCode cond = + (case cond + of I.EQ => 0w4 | I.NE => 0w5 + | I.LT => 0w12 | I.LE => 0w14 + | I.GT => 0w15 | I.GE => 0w13 + | I.A => 0w7 | I.AE => 0w3 + | I.B => 0w2 | I.BE => 0w6 + | I.C => 0w2 | I.NC => 0w3 + | I.P => 0wxa | I.NP => 0wxb + | I.O => 0w0 | I.NO => 0w1 + (*esac*)) + + (* arith: only 5 cases need be considered: + * dst, src + * ----------- + * EAX, imm32 + * r/m32, imm32 + * r/m32, imm8 + * r/m32, r32 + * r32, r/m32 + *) + fun arith(opc1, opc2) = let + fun f(I.ImmedLabel le, dst) = f(I.Immed(lexp le), dst) + | f(I.LabelEA le, dst) = f(I.Immed(lexp le), dst) + | f(I.Immed(i), dst) = + (case size i + of Bits32 => + (case dst + of I.Direct r => + if CB.physicalRegisterNum r = eax then + eBytes(W8.fromInt(8*opc2 + 5) :: eLong(i)) + else + encodeLongImm(0wx81, opc2, dst, i) + | _ => encodeLongImm(0wx81, opc2, dst, i) + (*esac*)) + | _ => encodeByteImm(0wx83, opc2, dst, i) (* 83 /digit ib *) + (*esac*)) + | f(src, I.Direct r) = encodeReg(opc1+0w3, r, src) + | f(I.Direct r, dst) = encodeReg(opc1+0w1, r, dst) + | f _ = error "arith.f" + in f + end + + (* test: the following cases need be considered: + * lsrc, rsrc + * ----------- + * AL, imm8 opc1 A8 + * EAX, imm32 opc1 A9 + * r/m8, imm8 opc2 F6/0 ib + * r/m32, imm32 opc2 F7/0 id + * r/m8, r8 opc3 84/r + * r/m32, r32 opc3 85/r + *) + fun test(bits, I.ImmedLabel le, lsrc) = test(bits, I.Immed(lexp le), lsrc) + | test(bits, I.LabelEA le, lsrc) = test(bits, I.Immed(lexp le), lsrc) + | test(bits, I.Immed(i), lsrc) = + (case (lsrc, i >= 0 andalso i < 255) of + (I.Direct r, false) => + if CB.physicalRegisterNum r = eax then eBytes(0wxA9 :: eLong i) + else encodeLongImm(0wxF7, 0, lsrc, i) + | (_, false) => encodeLongImm(0wxF7, 0, lsrc, i) + | (I.Direct r, true) => (* 8 bit *) + let val r = CB.physicalRegisterNum r + in if r = eax then eBytes[0wxA8, toWord8 i] + else if r < 4 then + (* unfortunately, only CL, DL, BL can be encoded *) + encodeByteImm(0wxF6, 0, lsrc, i) + else if bits = 8 then error "test.8" + else encodeLongImm(0wxF7, 0, lsrc, i) + end + | (_, true) => encodeByteImm(0wxF6, 0, lsrc, i) + ) + | test(8, rsrc as I.Direct r, lsrc) = + if rNum r < 4 then encodeReg(0wx84, r, lsrc) + else error "test.8" + | test(32, I.Direct r, lsrc) = encodeReg(0wx85, r, lsrc) + | test _ = error "test" + + in + case instr + of I.NOP => eByte 0x90 + | I.JMP(I.Relative i, _) => ((let + fun shortJmp() = eBytes[0wxeb, Word8.fromInt(i-2)] + in + case size(Int32.fromInt (i-2)) + of Bits32 => eBytes(0wxe9 :: eLong(Int32.fromInt(i-5))) + | _ => shortJmp() + (*esac*) + end + ) handle e => (print "JMP\n"; raise e)) + | I.JMP(opnd, _) => encode(0wxff, 4, opnd) + | I.JCC{cond, opnd=I.Relative i} => + let val code = condCode cond + in case size (Int32.fromInt(i-2)) + of Bits32 => + eBytes(0wx0f :: Word8.+(0wx80,code) :: eLong(Int32.fromInt(i-6))) + | _ => + eBytes[Word8.+(0wx70,code), Word8.fromInt(i-2)] + end + | I.CALL{opnd=I.Relative i,...} => eBytes(0wxe8::eLong(Int32.fromInt(i-5))) + | I.CALL{opnd, ...} => encode(0wxff, 2, opnd) + | I.RET NONE => eByte 0xc3 + (* integer *) + | I.MOVE{mvOp=I.MOVL, src, dst} => + let fun mv(I.Immed(i), I.Direct r) = + eBytes(Word8.+(0wxb8, Word8.fromInt(rNum r))::eLong(i)) + | mv(I.Immed(i), _) = encodeLongImm(0wxc7, 0, dst, i) + | mv(I.ImmedLabel le,dst) = mv(I.Immed(lexp le),dst) + | mv(I.LabelEA le,dst) = error "MOVL: LabelEA" + | mv(src as I.MemReg _, dst) = mv(memReg src, dst) + | mv(src, dst as I.MemReg _) = mv(src, memReg dst) + | mv(src,dst) = arith(0wx88, 0) (src, dst) + in mv(src,dst) end + | I.MOVE{mvOp=I.MOVW, src, dst} => let + fun immed16 i = Int32.<(i, 32768) andalso Int32.<=(~32768, i) + fun prefix v = Word8Vector.concat[eByte(opnd16Prefix), v] + fun mv(I.Immed(i), _) = + (case dst + of I.Direct r => + if immed16 i then + prefix(eBytes(W8.+(0wxb8, W8.fromInt(rNum r)):: eShort(i))) + else error "MOVW: Immediate too large" + | _ => prefix(encodeShortImm(0wxc7, 0, dst, i)) + (*esac*)) + | mv(src as I.MemReg _, dst) = mv(memReg src, dst) + | mv(src, dst as I.MemReg _) = mv(src, memReg dst) + | mv(src, dst) = prefix(arith(0wx88, 0) (src, dst)) + in mv(src, dst) + end + | I.MOVE{mvOp=I.MOVB, dst, src=I.Immed(i)} => + encodeByteImm(0wxc6, 0, dst, i) + (* 2007/02/20 AKL: just store the low order 8 bits. + Forget about checking for the range. This is because of + sign-extension issues in sml/nj's code generator. + (case size i + of Bits32 => error "MOVE: MOVB: imm8" + | _ => encodeByteImm(0wxc6, 0, dst, i) + (*esac*)) + *) + | I.MOVE{mvOp=I.MOVB, dst, src=I.Direct r} => encodeReg(0wx88, r, dst) + | I.MOVE{mvOp=I.MOVB, dst=I.Direct r, src} => encodeReg(0wx8a, r, src) + | I.MOVE{mvOp, src=I.Immed _, ...} => error "MOVE: Immed" + | I.MOVE{mvOp, src, dst=I.Direct r} => + let val byte2 = + case mvOp of + I.MOVZBL => 0wxb6 + | I.MOVZWL => 0wxb7 + | I.MOVSBL => 0wxbe + | I.MOVSWL => 0wxbf + | _ => error "MOV[SZ]X" + in eBytes(0wx0f :: byte2 :: eImmedExt(rNum r, src)) end + | I.MOVE _ => error "MOVE" + | I.CMOV{cond,src,dst} => + let val cond = condCode cond + in eBytes(0wx0f :: Word8.+(cond,0wx40) :: eImmedExt(rNum dst, src)) + end + | I.LEA{r32, addr} => encodeReg(0wx8d, r32, addr) + | I.CMPL{lsrc, rsrc} => arith(0wx38, 7) (rsrc, lsrc) + | (I.CMPW _ | I.CMPB _) => error "CMP" + | I.TESTL{lsrc, rsrc} => test(32, rsrc, lsrc) + | I.TESTB{lsrc, rsrc} => test(8, rsrc, lsrc) + | I.TESTW _ => error "TEST" + | I.BINARY{binOp, src, dst} => let + fun shift(code, src) = + (case src + of I.Immed (1) => encode(0wxd1, code, dst) + | I.Immed (n) => encodeByteImm(0wxc1, code, dst, n) + | I.Direct r => + if rNum r <> ecx then error "shift: Direct" + else encode(0wxd3, code, dst) + | I.MemReg _ => shift(code, memReg src) + | _ => error "shift" + (*esac*)) + in + case binOp + of I.ADDL => arith(0w0, 0) (src, dst) + | I.SUBL => arith(0wx28, 5) (src, dst) + | I.ANDL => arith(0wx20, 4) (src, dst) + | I.ORL => arith(0w8, 1) (src, dst) + | I.XORL => arith(0wx30, 6) (src, dst) + | I.SHLL => shift(4,src) + | I.SARL => shift(7,src) + | I.SHRL => shift(5,src) + | I.IMULL => + (case (src, dst) + of (I.Immed(i), I.Direct dstR) => + (case size i + of Bits32 => encodeLongImm(0wx69, rNum dstR, dst, i) + | _ => encodeByteImm(0wx6b, rNum dstR, dst, i) + ) + | (_, I.Direct dstR) => + eBytes(0wx0f::0wxaf::(eImmedExt(rNum dstR, src))) + | _ => error "imull" + ) + | _ => error "binary" + end + | I.MULTDIV{multDivOp, src} => let + val mulOp = + (case multDivOp of + I.MULL1 => 4 | I.IDIVL1 => 7 | I.DIVL1 => 6 + | I.IMULL1 => error "imull1") + in encode(0wxf7, mulOp, src) + end + | I.MUL3{dst, src1, src2=i} => + (case src1 + of I.Immed _ => error "mul3: Immed" + | I.ImmedLabel _ => error "mul3: ImmedLabel" + | _ => + (case size i + of Bits32 => encodeLongImm(0wx69, rNum dst, src1, i) + | _ => encodeByteImm(0wx6b, rNum dst, src1, i) + (*esac*)) + (*esac*)) + | I.UNARY{unOp, opnd} => + (case unOp + of I.DECL => + (case opnd + of I.Direct d => eByte(0x48 + rNum d) + | _ => encode(0wxff, 1, opnd) + (*esac*)) + | I.INCL => + (case opnd + of I.Direct d => eByte(0x40 + rNum d) + | _ => encode(0wxff, 0, opnd) + (*esac*)) + | I.NEGL => encode(0wxf7, 3, opnd) + | I.NOTL => encode(0wxf7, 2, opnd) + | _ => error "UNARY is not in DECL/INCL/NEGL,NOTL" + (*esac*)) + | I.SET{cond,opnd} => + eBytes(0wx0f :: Word8.+(0wx90,condCode cond) :: eImmedExt(0, opnd)) + | I.PUSHL(I.Immed(i)) => + (case size i + of Bits32 => eBytes(0wx68 :: eLong(i)) + | _ => eBytes[0wx6a, toWord8 i] + (*esac*)) + | I.PUSHL(I.Direct r) => eByte(0x50+rNum r) + | I.PUSHL opnd => encode(0wxff, 6, opnd) + | I.POP(I.Direct r) => eByte(0x58+rNum r) + | I.POP(opnd) => encode(0wx8f, 0, opnd) + | I.CDQ => eByte(0x99) + | I.INTO => eByte(0xce) + + (* floating *) + | I.FBINARY{binOp, src=I.ST src, dst=I.ST dst} => + let val src = W8.fromInt(fNum src) + val dst = W8.fromInt(fNum dst) + val (opc1, opc2) = + case (src, dst) of + (_, 0w0) => + (case binOp + of I.FADDL => (0wxd8, 0wxc0 + src) + | I.FMULL => (0wxd8, 0wxc8 + src) + | I.FSUBRL => (0wxd8, 0wxe8 + src) + | I.FSUBL => (0wxd8, 0wxe0 + src) (* gas XXX *) + | I.FDIVRL => (0wxd8, 0wxf8 + src) + | I.FDIVL => (0wxd8, 0wxf0 + src) (* gas XXX *) + | _ => error "FBINARY:pop:src=%st(n),dst=%st" + ) + | (0w0, _) => + (case binOp + of I.FADDP => (0wxde, 0wxc0 + dst) + | I.FMULP => (0wxde, 0wxc8 + dst) + | I.FSUBRP => (0wxde, 0wxe8 + dst) (* gas XXX *) + | I.FSUBP => (0wxde, 0wxe0 + dst) + | I.FDIVRP => (0wxde, 0wxf8 + dst) (* gas XXX *) + | I.FDIVP => (0wxde, 0wxf0 + dst) + + | I.FADDL => (0wxdc, 0wxc0 + dst) + | I.FMULL => (0wxdc, 0wxc8 + dst) + | I.FSUBRL => (0wxdc, 0wxe8 + dst) (* gas XXX *) + | I.FSUBL => (0wxdc, 0wxe0 + dst) + | I.FDIVRL => (0wxdc, 0wxf8 + dst) (* gas XXX *) + | I.FDIVL => (0wxdc, 0wxf0 + dst) + + | _ => error "FBINARY (0w0,_)" + ) + | (_, _) => error "FBINARY (src, dst) non %st(0)" + in eBytes[opc1, opc2] + end + | I.FBINARY{binOp, src, dst=I.ST dst} => + if CB.physicalRegisterNum dst = 0 then + let + val (opc, code) = + (case binOp of + I.FADDL => (0wxdc, 0) + | I.FMULL => (0wxdc, 1) + | I.FCOML => (0wxdc, 2) + | I.FCOMPL => (0wxdc, 3) + | I.FSUBL => (0wxdc, 4) + | I.FSUBRL => (0wxdc, 5) + | I.FDIVL => (0wxdc, 6) + | I.FDIVRL => (0wxdc, 7) + | I.FADDS => (0wxd8, 0) + | I.FMULS => (0wxd8, 1) + | I.FCOMS => (0wxd8, 2) + | I.FCOMPS => (0wxd8, 3) + | I.FSUBS => (0wxd8, 4) + | I.FSUBRS => (0wxd8, 5) + | I.FDIVS => (0wxd8, 6) + | I.FDIVRS => (0wxd8, 7) + | _ => error "FBINARY:pop:dst=%st" + (*esac*)) + in encode(opc, code, src) + end + else error "FBINARY" + | I.FIBINARY{binOp, src} => + let val (opc, code) = + case binOp of + I.FIADDL => (0wxda, 0) + | I.FIMULL => (0wxda, 1) + | I.FICOML => (0wxda, 2) + | I.FICOMPL => (0wxda, 3) + | I.FISUBL => (0wxda, 4) + | I.FISUBRL => (0wxda, 5) + | I.FIDIVL => (0wxda, 6) + | I.FIDIVRL => (0wxda, 7) + | I.FIADDS => (0wxde, 0) + | I.FIMULS => (0wxde, 1) + | I.FICOMS => (0wxde, 2) + | I.FICOMPS => (0wxde, 3) + | I.FISUBS => (0wxde, 4) + | I.FISUBRS => (0wxde, 5) + | I.FIDIVS => (0wxde, 6) + | I.FIDIVRS => (0wxde, 7) + in encode(opc, code, src) end + | I.FUNARY unOp => + eBytes[0wxd9, + case unOp + of I.FABS => 0wxe1 + | I.FCHS => 0wxe0 + | I.FSQRT => 0wxfa + | I.FSIN => 0wxfe + | I.FCOS => 0wxff + | I.FPTAN => 0wxf2 + | I.FPATAN => 0wxf3 + | I.FDECSTP => 0wxf6 + | I.FINCSTP => 0wxf7 + | _ => error "FUNARY" + ] + | I.FXCH{opnd} => encodeST(0wxd9, 25, opnd) + + | I.FUCOM(I.ST n) => encodeST(0wxdd, 28, n) + | I.FUCOMP(I.ST n) => encodeST(0wxdd, 29, n) + | I.FUCOMPP => eBytes[0wxda, 0wxe9] + | I.FCOMI(I.ST n) => encodeST(0wxdb, 0x1e, n) + | I.FCOMIP(I.ST n) => encodeST(0wxdf, 0x1e, n) + | I.FUCOMI(I.ST n) => encodeST(0wxdb, 0x1d, n) + | I.FUCOMIP(I.ST n) => encodeST(0wxdf, 0x1d, n) + + | I.FSTS opnd => encode(0wxd9, 2, opnd) + | I.FSTL(I.ST n) => encodeST(0wxdd, 26, n) + | I.FSTL opnd => encode(0wxdd, 2, opnd) + + | I.FSTPS opnd => encode(0wxd9, 3, opnd) + | I.FSTPL(I.ST n) => encodeST(0wxdd, 27, n) + | I.FSTPL opnd => encode(0wxdd, 3, opnd) + | I.FSTPT opnd => encode(0wxdb, 7, opnd) + + | I.FLD1 => eBytes[0wxd9,0wxe8] + | I.FLDL2T => eBytes[0wxd9,0wxe9] + | I.FLDL2E => eBytes[0wxd9,0wxea] + | I.FLDPI => eBytes[0wxd9,0wxeb] + | I.FLDLG2 => eBytes[0wxd9,0wxec] + | I.FLDLN2 => eBytes[0wxd9,0wxed] + | I.FLDZ => eBytes[0wxd9,0wxee] + | I.FLDS opnd => encode(0wxd9, 0, opnd) + + | I.FLDL(I.ST n) => encodeST(0wxd9, 24, n) + | I.FLDL opnd => encode(0wxdd, 0, opnd) + + | I.FILD opnd => encode(0wxdf, 0, opnd) + | I.FILDL opnd => encode(0wxdb, 0, opnd) + | I.FILDLL opnd => encode(0wxdf, 5, opnd) + + | I.FNSTSW => eBytes[0wxdf, 0wxe0] + + (* misc *) + | I.SAHF => eByte(0x9e) + | _ => error "emitInstr" + end + and emitInstr (I.LIVE _) = Word8Vector.fromList [] + | emitInstr (I.KILL _) = Word8Vector.fromList [] + | emitInstr(I.COPY{k, dst, src, tmp, ...}) = + (case k + of CB.GP => emitInstrs(Shuffle.shuffle {tmp=tmp, dst=dst, src=src}) + | CB.FP => emitInstrs(Shuffle.shufflefp {tmp=tmp, dst=dst, src=src}) + | _ => error "COPY" + (*esac*)) + | emitInstr (I.INSTR instr) = emitX86Instr instr + | emitInstr (I.ANNOTATION{i,...}) = emitInstr i + +end diff --git a/base/old-basis/2004/.cm/GUID/basis-unix.sml b/base/old-basis/2004/.cm/GUID/basis-unix.sml new file mode 100644 index 0000000..83e0eab --- /dev/null +++ b/base/old-basis/2004/.cm/GUID/basis-unix.sml @@ -0,0 +1 @@ +guid-$/(basis-2004.cm):basis-unix.sml-1714016081.862 diff --git a/base/old-basis/2004/.cm/GUID/basis.sml b/base/old-basis/2004/.cm/GUID/basis.sml new file mode 100644 index 0000000..7d5e62e --- /dev/null +++ b/base/old-basis/2004/.cm/GUID/basis.sml @@ -0,0 +1 @@ +guid-$/(basis-2004.cm):basis.sml-1714016081.158 diff --git a/base/old-basis/2004/.cm/SKEL/basis-unix.sml b/base/old-basis/2004/.cm/SKEL/basis-unix.sml new file mode 100644 index 0000000..3557bd3 --- /dev/null +++ b/base/old-basis/2004/.cm/SKEL/basis-unix.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d3aPOSIX"gp1c"POSIX_2004"ac"POSIX_TTY"gp1c"POSIX_TTY_2004"aPosix"jgp1=gp1 \ No newline at end of file diff --git a/base/old-basis/2004/.cm/SKEL/basis.sml b/base/old-basis/2004/.cm/SKEL/basis.sml new file mode 100644 index 0000000..c2a0562 --- /dev/null +++ b/base/old-basis/2004/.cm/SKEL/basis.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d9aARRAY"gp1c"ARRAY_2004"aARRAY_SLICE"gp1c"ARRAY_SLICE_2004"aLIST"gp1c"LIST_2004"aLIST_PAIR"gp1c"LIST_PAIR_2004"CaMATH"gp1c"MATH_2004"aMONO_ARRAY"gp1c"MONO_ARRAY_2004"aMONO_ARRAY_SLICE"gp1c"MONO_ARRAY_SLICE_2004"aMONO_VECTOR"gp1c"MONO_VECTOR_2004"aMONO_VECTOR_SLICE"gp1c"MONO_VECTOR_SLICE_2004"CaOPTION"gp1c"OPTION_2004"aREAL"gp1c"REAL_2004"aSTRING"gp1c"STRING_2004"aTEXT"gp1c"TEXT_2004"aVECTOR"gp1c"VECTOR_2004"CaVECTOR_SLICE"gp1c"VECTOR_SLICE_2004"aWORD"gp1c"WORD_2004"aArray"jgp18gp1aArraySlice"jgp1 gp1aList"jgp1'gp1CaListPair"jgp1=gp1aOption"jgp1gp1:aMath"jgp1-gp1:aReal"jgp1aReal64"jgp1CaReal64Array"jgp1,aReal64ArraySlice"jgp14aReal64Vector"jgp1)#aReal64VectorSlice"jgp1aText"jgp1(gp1CaVector"jgp1>gp1aVectorSlice"jgp1gp1:aWord"jgp11!aWord8"jgp1?aWord32"jgp1?CaWord64"jgp1.?aWord8Array"jgp1aWord8ArraySlice"jgp1"aWord8Vector"jgp17?aWord8VectorSlice"jgp1!CaCharArray"jgp2(/aCharArraySlice"jgp2( "aCharVector"jgp2((?aCharVectorSlice"jgp2(!aString"jgp2(#gp1,N \ No newline at end of file diff --git a/base/old-basis/2004/.cm/SKEL/target32-basis.sml b/base/old-basis/2004/.cm/SKEL/target32-basis.sml new file mode 100644 index 0000000..73e080a --- /dev/null +++ b/base/old-basis/2004/.cm/SKEL/target32-basis.sml @@ -0,0 +1,2 @@ +Skeleton 5 +aWord31"jgp1gp1c"WORD" \ No newline at end of file diff --git a/base/old-basis/2004/.cm/amd64-unix/basis-unix.sml b/base/old-basis/2004/.cm/amd64-unix/basis-unix.sml new file mode 100644 index 0000000..045425b Binary files /dev/null and b/base/old-basis/2004/.cm/amd64-unix/basis-unix.sml differ diff --git a/base/old-basis/2004/.cm/amd64-unix/basis.sml b/base/old-basis/2004/.cm/amd64-unix/basis.sml new file mode 100644 index 0000000..2c7e7ad Binary files /dev/null and b/base/old-basis/2004/.cm/amd64-unix/basis.sml differ diff --git a/base/old-basis/2004/basis-2004.cm b/base/old-basis/2004/basis-2004.cm new file mode 100644 index 0000000..bb5fe8e --- /dev/null +++ b/base/old-basis/2004/basis-2004.cm @@ -0,0 +1,56 @@ +(* basis-2004.cm + * + * The 2004 version of the Standard ML Basis Library. + * + * Copyright (c) 2022 by The Fellowship of SML/NJ + *) + +Library + + library($SMLNJ-BASIS/basis-common.cm) + - ( + (* new modules *) + signature EITHER (* proposal 2015-002 *) + signature FN (* proposal 2015-005 *) + signature REF (* proposal 2015-007 *) + structure Either (* proposal 2015-002 *) + structure Fn (* proposal 2015-005 *) + structure Ref (* proposal 2015-007 *) + (* backward compatible signatures *) + signature ARRAY_2004 + signature ARRAY_SLICE_2004 + signature LIST_2004 + signature LIST_PAIR_2004 + signature MATH_2004 + signature MONO_ARRAY_2004 + signature MONO_ARRAY_SLICE_2004 + signature MONO_VECTOR_2004 + signature MONO_VECTOR_SLICE_2004 + signature OPTION_2004 +#if defined(OPSYS_UNIX) + signature POSIX_2004 + signature POSIX_TTY_2004 +#endif + signature REAL_2004 + signature STRING_2004 + signature TEXT_2004 + signature VECTOR_2004 + signature VECTOR_SLICE_2004 + signature WORD_2004 + ) + +is + +(* the basis with the old versions of the APIs *) + $SMLNJ-BASIS/basis-common.cm + +(* code to rebind modules and signatures to the old APIs *) + basis.sml +#if defined(OPSYS_UNIX) + basis-unix.sml +#endif +#if defined(SIZE_64) + target32-basis.sml +#else (* SIZE_32 *) + target64-basis.sml +#endif diff --git a/base/old-basis/2004/basis-unix.sml b/base/old-basis/2004/basis-unix.sml new file mode 100644 index 0000000..1914050 --- /dev/null +++ b/base/old-basis/2004/basis-unix.sml @@ -0,0 +1,11 @@ +(* basis-unix.sml + * + * COPYRIGHT (c) 2022 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This file rebinds Unix-specific basis module names to their 2004 versions. + *) + +signature POSIX = POSIX_2004 +signature POSIX_TTY = POSIX_TTY_2004 +structure Posix : POSIX = Posix diff --git a/base/old-basis/2004/basis.sml b/base/old-basis/2004/basis.sml new file mode 100644 index 0000000..ab41e6e --- /dev/null +++ b/base/old-basis/2004/basis.sml @@ -0,0 +1,57 @@ +(* basis.sml + * + * COPYRIGHT (c) 2022 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This file rebinds various basis module names to their 2004 versions. + *) + +(* rebind basis signatures to their 2004 versions *) +signature ARRAY = ARRAY_2004 +signature ARRAY_SLICE = ARRAY_SLICE_2004 +signature LIST = LIST_2004 +signature LIST_PAIR = LIST_PAIR_2004 +signature MATH = MATH_2004 +signature MONO_ARRAY = MONO_ARRAY_2004 +signature MONO_ARRAY_SLICE = MONO_ARRAY_SLICE_2004 +signature MONO_VECTOR = MONO_VECTOR_2004 +signature MONO_VECTOR_SLICE = MONO_VECTOR_SLICE_2004 +signature OPTION = OPTION_2004 +signature REAL = REAL_2004 +signature STRING = STRING_2004 +signature TEXT = TEXT_2004 +signature VECTOR = VECTOR_2004 +signature VECTOR_SLICE = VECTOR_SLICE_2004 +signature WORD = WORD_2004 + +(* rebind basis structures using 2004 signatures *) +structure Array : ARRAY = Array +structure ArraySlice : ARRAY_SLICE = ArraySlice +structure List : LIST = List +structure ListPair : LIST_PAIR = ListPair +structure Option : OPTION = Option +structure Math : MATH = Math +structure Real : REAL = Real +structure Real64 : REAL = Real64 +structure Real64Array : MONO_ARRAY = Real64Array +structure Real64ArraySlice : MONO_ARRAY_SLICE = Real64ArraySlice +structure Real64Vector : MONO_VECTOR = Real64Vector +structure Real64VectorSlice : MONO_VECTOR_SLICE = Real64VectorSlice +structure Text : TEXT = Text +structure Vector : VECTOR = Vector +structure VectorSlice : VECTOR_SLICE = VectorSlice +structure Word : WORD = Word +structure Word8 : WORD = Word8 +structure Word32 : WORD = Word32 +structure Word64 : WORD = Word64 +structure Word8Array : MONO_ARRAY = Word8Array +structure Word8ArraySlice : MONO_ARRAY_SLICE = Word8ArraySlice +structure Word8Vector : MONO_VECTOR = Word8Vector +structure Word8VectorSlice : MONO_VECTOR_SLICE = Word8VectorSlice + +(* the Text modules are extracted from the Text structure *) +structure CharArray : MONO_ARRAY = Text.CharArray +structure CharArraySlice : MONO_ARRAY_SLICE = Text.CharArraySlice +structure CharVector : MONO_VECTOR = Text.CharVector +structure CharVectorSlice : MONO_VECTOR_SLICE = Text.CharVectorSlice +structure String : STRING = Text.String diff --git a/base/old-basis/2004/target32-basis.sml b/base/old-basis/2004/target32-basis.sml new file mode 100644 index 0000000..6a6f44e --- /dev/null +++ b/base/old-basis/2004/target32-basis.sml @@ -0,0 +1,10 @@ +(* target32-basis.sml + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This file rebinds various target-dependent basis module names to + * their 2004 versions (32-bit version) + *) + +structure Word31 : WORD = Word31 diff --git a/base/old-basis/2004/target64-basis.sml b/base/old-basis/2004/target64-basis.sml new file mode 100644 index 0000000..414923b --- /dev/null +++ b/base/old-basis/2004/target64-basis.sml @@ -0,0 +1,10 @@ +(* target64-basis.sml + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This file rebinds various target-dependent basis module names to + * their 2004 versions (32-bit version) + *) + +structure Word63 : WORD = Word63 diff --git a/base/runtime/c-libs/README b/base/runtime/c-libs/README new file mode 100644 index 0000000..9b7610a --- /dev/null +++ b/base/runtime/c-libs/README @@ -0,0 +1,34 @@ +This is the root directory of the ML callable C library source. +Versions of SML/NJ up to 106, used a single monolithic library +of C functions that were callable from ML, with a single table +of C function bindings. This meant that to add a C function, +required changing the run-time system source. In the new scheme, +the runtime system supports an extensible set of libraries that +are configured at build time. Note, however, that since the run- +time system and ML heap images are separate files, one can extend +the run-time system with additional libraries and then use existing +ML heap images with the extended run-time system. See the file +HOWTO-ADD-C-CODE in the notes directory for more details. + +The decentralization of the libraries represents a first step in +an ongoing process to make the ML/C interface easier to extend. +There will be continue to be changes in the way that these +libraries are organized and supported. Here is a tentative list +of anticipated changes: + + 1) as we move towards the new SML basis, the libraries will be + reorganized and redefined to reflect the new semantics. + + 2) support for non UNIX systems (Windows NT, Windows 95, OS/2, + and MacOS), will result in different versions of libraries + for different systems. + + 3) the generation of library interfaces will be automated. We plan + an extension to SML that we will feed into a preprocessor to + generate both the ML code that binds the C functions, and the + C function prototype declarations and library tables (the stuff + that is in "*/cfun-list.h" and "*/*-lib.c"). + + 4) support for dynamic loading of C libraries on machines that have + dynamic linking. + diff --git a/base/runtime/c-libs/c-libraries.c b/base/runtime/c-libs/c-libraries.c new file mode 100644 index 0000000..89209f2 --- /dev/null +++ b/base/runtime/c-libs/c-libraries.c @@ -0,0 +1,98 @@ +/*! \file c-libraries.c + * + * \author John Reppy + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This is the home of the CLibrary table, C library initialization code, + * and C function lookup code. It is part of the run-time proper (not part + * of libcfuns.a). + */ + +#ifdef OPSYS_UNIX +# include "ml-unixdep.h" /* for the HAS_POSIX_LIBRARIES option flag */ +#endif +#include "ml-base.h" +#include "ml-values.h" +#include "c-library.h" +#include "c-globals-tbl.h" +#include + +#define C_LIBRARY(lib) extern c_library_t lib; +#include "clib-list.h" +#undef C_LIBRARY + +PVT c_library_t *CLibs[] = { +#define C_LIBRARY(lib) &lib, +# include "clib-list.h" +#undef C_LIBRARY + NIL(c_library_t *) + }; + +/* InitCFunList: + * Initialize the list of C functions callable from ML. + */ +void InitCFunList () +{ + int i, j, libNameLen; + char *nameBuf; + + for (i = 0; CLibs[i] != NIL(c_library_t *); i++) { + c_library_t *clib = CLibs[i]; + cfunc_binding_t *cfuns = CLibs[i]->cfuns; + + if (clib->initFn != NIL(clib_init_fn_t)) { + /* call the libraries initialization function */ + (*(clib->initFn)) (0, 0/** argc, argv **/); + } + + /* register the C functions in the C symbol table */ + libNameLen = strlen(clib->libName) + 2; /* incl "." and "\0" */ + for (j = 0; cfuns[j].name != NIL(char *); j++) { + nameBuf = NEW_VEC(char, strlen(cfuns[j].name) + libNameLen); + sprintf (nameBuf, "%s.%s", clib->libName, cfuns[j].name); +#ifdef INDIRECT_CFUNC + RecordCSymbol (nameBuf, PTR_CtoML(&(cfuns[j]))); +#else + RecordCSymbol (nameBuf, PTR_CtoML(cfuns[j].cfunc)); +#endif + } + } + +} /* end of InitCFunList */ + +/* BindCFun: + * + * Search the C function table for the given function; return ML_unit, if + * not found. + * NOTE: eventually, we will raise an exception when the function isn't found. + */ +ml_val_t BindCFun (char *moduleName, char *funName) +{ + int i, j; + +#ifdef DEBUG_TRACE_CCALL + SayDebug("BindCFun: %s.%s\n", moduleName, funName); +#endif + for (i = 0; CLibs[i] != NIL(c_library_t *); i++) { + if (strcmp(CLibs[i]->libName, moduleName) == 0) { + cfunc_binding_t *cfuns = CLibs[i]->cfuns; + for (j = 0; cfuns[j].name != NIL(char *); j++) { + if (strcmp(cfuns[j].name, funName) == 0) +#ifdef INDIRECT_CFUNC + return PTR_CtoML(&(cfuns[j])); +#else + return PTR_CtoML(cfuns[j].cfunc); +#endif + } + /* here, we didn't find the library so we return ML_unit */ + return ML_unit; + } + } + + /* here, we didn't find the library so we return ML_unit */ + return ML_unit; + +} /* end of BindCFun */ + diff --git a/base/runtime/c-libs/clib-list.h b/base/runtime/c-libs/clib-list.h new file mode 100644 index 0000000..80e9616 --- /dev/null +++ b/base/runtime/c-libs/clib-list.h @@ -0,0 +1,51 @@ +/*! \file clib-list.h + * + * \author John Reppy + */ + +/* + * COPYRIGHT (c) 2021 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +C_LIBRARY(SMLNJ_RunT_Library) +C_LIBRARY(SMLNJ_Sig_Library) +C_LIBRARY(SMLNJ_Prof_Library) + +/* basis libraries */ +C_LIBRARY(SMLNJ_Time_Library) +C_LIBRARY(SMLNJ_Date_Library) +C_LIBRARY(SMLNJ_Math_Library) +C_LIBRARY(SMLNJ_Sock_Library) + +#ifdef HAS_POSIX_LIBRARIES +C_LIBRARY(POSIX_Error_Library) +C_LIBRARY(POSIX_FileSys_Library) +C_LIBRARY(POSIX_IO_Library) +C_LIBRARY(POSIX_ProcEnv_Library) +C_LIBRARY(POSIX_Process_Library) +C_LIBRARY(POSIX_Signal_Library) +C_LIBRARY(POSIX_SysDB_Library) +C_LIBRARY(POSIX_TTY_Library) +#endif + +#ifdef OPSYS_UNIX +C_LIBRARY(POSIX_OS_Library) +#elif defined(OPSYS_WIN32) +C_LIBRARY(WIN32_Library) +C_LIBRARY(WIN32_IO_Library) +C_LIBRARY(WIN32_FileSys_Library) +C_LIBRARY(WIN32_Process_Library) +#endif + +#ifdef MP_SUPPORT +C_LIBRARY(SMLNJ_MP_Library) +#endif + +#ifdef C_CALLS +C_LIBRARY(SMLNJ_CCalls_Library) +#endif + +#ifdef DLOPEN +C_LIBRARY(UNIX_Dynload_Library) +#endif diff --git a/base/runtime/c-libs/dl/cfun-list.h b/base/runtime/c-libs/dl/cfun-list.h new file mode 100644 index 0000000..585900c --- /dev/null +++ b/base/runtime/c-libs/dl/cfun-list.h @@ -0,0 +1,17 @@ +/* cfun-list.h + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * + * This file lists the directory library of C functions that are callable by ML. + */ + +#ifndef CLIB_NAME +#define CLIB_NAME "UNIX-Dynload" +#define CLIB_VERSION "1.0" +#define CLIB_DATE "January 1, 2001" +#endif + +CFUNC("dlopen", _ml_U_Dynload_dlopen, "string option * bool * bool -> Word32.word") +CFUNC("dlsym", _ml_U_Dynload_dlsym, "Word32.word * string -> Word32.word") +CFUNC("dlclose", _ml_U_Dynload_dlclose, "Word32.word -> unit") +CFUNC("dlerror", _ml_U_Dynload_dlerror, "unit -> string option") diff --git a/base/runtime/c-libs/dl/cfun-proto-list.h b/base/runtime/c-libs/dl/cfun-proto-list.h new file mode 100644 index 0000000..16b3e96 --- /dev/null +++ b/base/runtime/c-libs/dl/cfun-proto-list.h @@ -0,0 +1,18 @@ +/* cfun-proto-list.h + * + * COPYRIGHT (c) 1194 AT&T Bell Laboratories. + */ + +#ifndef _CFUN_PROTO_LIST_ +#define _CFUN_PROTO_LIST_ + +#ifndef _C_LIBRARY_ +# include "c-library.h" +#endif + +/* the external definitions for the C functions */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_PROTO(NAME, FUNC, MLTYPE) +#include "cfun-list.h" +#undef CFUNC + +#endif /* !_CFUN_PROTO_LIST_ */ diff --git a/base/runtime/c-libs/dl/dlclose.c b/base/runtime/c-libs/dl/dlclose.c new file mode 100644 index 0000000..73fe1aa --- /dev/null +++ b/base/runtime/c-libs/dl/dlclose.c @@ -0,0 +1,35 @@ +/*! \file dlclose.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#ifdef OPSYS_WIN32 +# include +#else +# include "ml-unixdep.h" +# include +#endif +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* 64BIT: use c_pointer type for handles */ +/* _ml_P_Dynload_dlclose : Word32.word -> unit + * + * Close dynamically loaded library. + */ +ml_val_t _ml_U_Dynload_dlclose (ml_state_t *msp, ml_val_t ml_handle) +{ + void *handle = (void *) (WORD_MLtoC (ml_handle)); + +#ifdef OPSYS_WIN32 + (void) FreeLibrary (handle); +#else + (void) dlclose (handle); +#endif + + return ML_unit; +} diff --git a/base/runtime/c-libs/dl/dlerror.c b/base/runtime/c-libs/dl/dlerror.c new file mode 100644 index 0000000..8205ea9 --- /dev/null +++ b/base/runtime/c-libs/dl/dlerror.c @@ -0,0 +1,70 @@ +/*! \file dlerror.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#ifndef OPSYS_WIN32 +# include "ml-unixdep.h" +# include +#endif +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +#ifdef OPSYS_WIN32 + +#include +#include +#include + +/* roll-your-own dlerror... */ +static int dl_error_read = 0; +static char *dl_error = NULL; + +void dlerror_set (const char *fmt, const char *s) +{ + if (dl_error != NIL(const char *)) { + FREE (dl_error); + } + dl_error = MALLOC (strlen (fmt) + strlen (s) + 1); + sprintf (dl_error, fmt, s); + dl_error_read = 0; +} + +char *dlerror (void) +{ + if (dl_error) { + if (dl_error_read) { + FREE (dl_error); + dl_error = NIL(char *); + } + else { + dl_error_read = 1; + } + } + + return dl_error; +} +#endif + +/* _ml_P_Dynload_dlerror : unit -> string option + * + * Extract error after unsuccessful dlopen/dlsym/dlclose. + */ +ml_val_t _ml_U_Dynload_dlerror (ml_state_t *msp, ml_val_t ml_handle) +{ + const char *e = dlerror (); + ml_val_t r, s; + + if (e == NULL) { + r = OPTION_NONE; + } + else { + s = ML_CString (msp, e); + OPTION_SOME (msp, r, s); + } + return r; +} diff --git a/base/runtime/c-libs/dl/dlopen.c b/base/runtime/c-libs/dl/dlopen.c new file mode 100644 index 0000000..99e365f --- /dev/null +++ b/base/runtime/c-libs/dl/dlopen.c @@ -0,0 +1,54 @@ +/* dlopen.c + * + * COPYRIGHT (c) 2000 by Lucent Technologies, Bell Laboratories + */ + +#ifdef OPSYS_WIN32 +# include +extern void dlerror_set (const char *fmt, const char *s); +#else +# include "ml-unixdep.h" +# include +#endif +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* 64BIT: use c_pointer type for handles */ +/* _ml_P_Dynload_dlopen : string * bool * bool -> Word32.word + * + * Open a dynamically loaded library. + */ +ml_val_t _ml_U_Dynload_dlopen (ml_state_t *msp, ml_val_t arg) +{ + ml_val_t ml_libname = REC_SEL (arg, 0); + int lazy = REC_SEL (arg, 1) == ML_true; + int global = REC_SEL (arg, 2) == ML_true; + char *libname = NULL; + void *handle; + ml_val_t res; + + if (ml_libname != OPTION_NONE) + libname = STR_MLtoC (OPTION_get (ml_libname)); + +#ifdef OPSYS_WIN32 + + handle = (void *) LoadLibrary (libname); + if (handle == NULL && libname != NULL) + dlerror_set ("Library `%s' not found", libname); + +#else + { + int flag = (lazy ? RTLD_LAZY : RTLD_NOW); + + if (global) flag |= RTLD_GLOBAL; + + handle = dlopen (libname, flag); + } +#endif + + WORD_ALLOC (msp, res, (Word_t) handle); + return res; +} diff --git a/base/runtime/c-libs/dl/dlsym.c b/base/runtime/c-libs/dl/dlsym.c new file mode 100644 index 0000000..333ecec --- /dev/null +++ b/base/runtime/c-libs/dl/dlsym.c @@ -0,0 +1,44 @@ +/* dlsym.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#ifdef OPSYS_WIN32 +# include +extern void dlerror_set (const char *fmt, const char *s); +#else +# include "ml-unixdep.h" +# include +#endif +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* 64BIT: use c_pointer type for handles */ +/* _ml_P_Dynload_dlsym : Word32.word * string -> Word32.word + * + * Extract symbol from dynamically loaded library. + */ +ml_val_t _ml_U_Dynload_dlsym (ml_state_t *msp, ml_val_t arg) +{ + ml_val_t ml_handle = REC_SEL (arg, 0); + char *symname = STR_MLtoC (REC_SEL (arg, 1)); + void *handle = (void *) (WORD_MLtoC (ml_handle)); + void *addr; + ml_val_t res; + +#ifdef OPSYS_WIN32 + addr = GetProcAddress (handle, symname); + if (addr == NULL && symname != NULL) { + dlerror_set ("Symbol `%s' not found", symname); + } +#else + addr = dlsym (handle, symname); +#endif + + WORD_ALLOC (msp, res, (Word_t) addr); + return res; +} diff --git a/base/runtime/c-libs/dl/dynload-lib.c b/base/runtime/c-libs/dl/dynload-lib.c new file mode 100644 index 0000000..7bff037 --- /dev/null +++ b/base/runtime/c-libs/dl/dynload-lib.c @@ -0,0 +1,27 @@ +/* unix-dynload-lib.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "c-library.h" +#include "cfun-proto-list.h" + + +/* the table of C functions and ML names */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_BIND(NAME, FUNC, MLTYPE) +PVT cfunc_binding_t CFunTable[] = { +#include "cfun-list.h" + CFUNC_NULL_BIND + }; +#undef CFUNC + + +/* the dynload library */ +c_library_t UNIX_Dynload_Library = { + CLIB_NAME, + CLIB_VERSION, + CLIB_DATE, + NIL(clib_init_fn_t), + CFunTable + }; diff --git a/base/runtime/c-libs/dl/makefile b/base/runtime/c-libs/dl/makefile new file mode 100644 index 0000000..428d093 --- /dev/null +++ b/base/runtime/c-libs/dl/makefile @@ -0,0 +1,41 @@ +# +# a template makefile for a C function library +# + +SHELL = /bin/sh + +INC_DIR = ../../include +CLIB_DIR = ../ + +INCLUDES = -I$(INC_DIR) -I$(CLIB_DIR) -I../../objs + +MAKE = make +AR = ar +ARFLAGS = rcv +RANLIB = ranlib + +LIBRARY = libunix-dynload.a + +VERSION = v-dummy + +OBJS = unix-dynload-lib.o \ + dlopen.o \ + dlclose.o \ + dlsym.o \ + dlerror.o + +$(LIBRARY) : $(VERSION) $(OBJS) + rm -rf $(LIBRARY) + $(AR) $(ARFLAGS) $(LIBRARY) $(OBJS) + $(RANLIB) $(LIBRARY) + +$(VERSION) : + ($(MAKE) MAKE="$(MAKE)" clean) + echo "$(VERSION)" > $(VERSION) + +.c.o: $(INC_DIR)/ml-unixdep.h $(INC_DIR)/ml-base.h $(INC_DIR)/ml-values.h \ + $(CLIB_DIR)/ml-c.h cfun-proto-list.h cfun-list.h + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) -c $< + +clean : + rm -f v-* *.o $(LIBRARY) diff --git a/base/runtime/c-libs/dl/makefile.win32 b/base/runtime/c-libs/dl/makefile.win32 new file mode 100644 index 0000000..4e21ad3 --- /dev/null +++ b/base/runtime/c-libs/dl/makefile.win32 @@ -0,0 +1,57 @@ +# +# the makefile for the dynload library +# win32 specific + +SHELL = + +INC_DIR = ..\..\include +CLIB_DIR = ..\ + +INCLUDES = /I$(INC_DIR) /I$(CLIB_DIR) /I..\..\objs + +MAKEFILE = makefile.win32 +MAKE = nmake /F$(MAKEFILE) +AR = lib +ARFLAGS = +RANLIB = lib + +LIBRARY = dynload.lib + +VERSION = v-dummy + +OBJS = unix-dynload-lib.obj \ + dlopen.obj \ + dlclose.obj \ + dlsym.obj \ + dlerror.obj + +$(LIBRARY) : $(VERSION) $(OBJS) + del /F /Q $(LIBRARY) + $(AR) $(ARFLAGS) /out:$(LIBRARY) $(OBJS) + $(RANLIB) /out:$(LIBRARY) + +$(VERSION) : + ($(MAKE) MAKE="$(MAKE)" clean) + echo "$(VERSION)" > $(VERSION) + +DEPENDENTS = $(INC_DIR)\ml-unixdep.h $(INC_DIR)\ml-base.h \ + $(INC_DIR)\ml-values.h \ + $(CLIB_DIR)\ml-c.h cfun-proto-list.h cfun-list.h + +unix-dynload-lib.obj: unix-dynload-lib.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c unix-dynload-lib.c + +dlopen.obj: dlopen.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c dlopen.c + +dlclose.obj: dlclose.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c dlclose.c + +dlerror.obj: dlerror.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c dlerror.c + +dlsym.obj: dlsym.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c dlsym.c + +clean : + del /F /Q v-* *.obj *.pdb $(LIBRARY) diff --git a/base/runtime/c-libs/dl/unix-dynload-lib.c b/base/runtime/c-libs/dl/unix-dynload-lib.c new file mode 100644 index 0000000..d7d4d54 --- /dev/null +++ b/base/runtime/c-libs/dl/unix-dynload-lib.c @@ -0,0 +1,28 @@ +/* dynload-lib.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "c-library.h" +#include "cfun-proto-list.h" + + +/* the table of C functions and ML names */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_BIND(NAME, FUNC, MLTYPE) +PVT cfunc_binding_t CFunTable[] = { +#include "cfun-list.h" + CFUNC_NULL_BIND + }; +#undef CFUNC + + +/* the dynload library */ +c_library_t UNIX_Dynload_Library = { + CLIB_NAME, + CLIB_VERSION, + CLIB_DATE, + NIL(clib_init_fn_t), + CFunTable + }; + diff --git a/base/runtime/c-libs/makefile b/base/runtime/c-libs/makefile new file mode 100644 index 0000000..0f7b324 --- /dev/null +++ b/base/runtime/c-libs/makefile @@ -0,0 +1,94 @@ +# +# the makefile for the C libraries +# +# Currently, this is only used to clean the library directories. +# + +CC = cc +CFLAGS = -O +MAKE = make +AR = ar +ARFLAGS = rcv +RANLIB = ranlib + +VERSION = v-dummy + +LIBS = posix-os/libposix-os.a \ + smlnj-runtime/libsmlnj-runt.a \ + smlnj-signals/libsmlnj-sig.a \ + smlnj-prof/libsmlnj-prof.a \ + smlnj-sockets/libsmlnj-sock.a \ + smlnj-time/libsmlnj-time.a \ + smlnj-date/libsmlnj-date.a \ + smlnj-math/libsmlnj-math.a \ + smlnj-mp/libsmlnj-mp.a \ + posix-error/libposix-error.a \ + posix-filesys/libposix-filesys.a \ + posix-io/libposix-io.a \ + posix-procenv/libposix-procenv.a \ + posix-process/libposix-process.a \ + posix-signal/libposix-signal.a \ + posix-sysdb/libposix-sysdb.a \ + posix-sysdb/libposix-tty.a \ + smlnj-ccalls/libsmlnj-ccalls.a \ + dl/libunix-dynload.a + +LIB_DIRS = posix-os \ + smlnj-runtime \ + smlnj-signals \ + smlnj-sockets \ + smlnj-prof \ + smlnj-time \ + smlnj-date \ + smlnj-math \ + smlnj-mp \ + posix-error \ + posix-filesys \ + posix-io \ + posix-procenv \ + posix-process \ + posix-signal \ + posix-sysdb \ + posix-tty \ + smlnj-ccalls \ + dl + + +# include directories for this level +# +OBJS_DIR = ../objs +INC_DIR = ../include +INCLUDES = -I$(OBJS_DIR) -I$(INC_DIR) + +# include directories for the library sub-directories +# +LIB_OBJS_DIR = ../../objs +LIB_INC_DIR = ../../include +LIB_INCLUDES = -I$(LIB_OBJS_DIR) -I$(LIB_INC_DIR) -I.. + +# +# arguments to recursive make +# +LIB_MK_ARGS = VERSION="$(VERSION)" \ + MAKE="$(MAKE)" \ + CC="$(CC)" CFLAGS="$(CFLAGS)" DEFS="$(DEFS)" \ + AR="$(AR)" ARFLAGS="$(ARFLAGS)" \ + RANLIB="$(RANLIB)" \ + INCLUDES="$(LIB_INCLUDES)" + +all: $(VERSION) + for dir in $(LIB_DIRS); do \ + (cd $$dir; echo "building $$dir"; $(MAKE) $(LIB_MK_ARGS)) ; \ + done + rm -rf libcfuns.a + +$(VERSION): + rm -f v-* *.o libcfuns.a + echo "$(VERSION)" > $(VERSION) + +clean : + for dir in $(LIB_DIRS); do \ + (cd $$dir; echo "cleaning $$dir"; $(MAKE) MAKE="$(MAKE)" clean) ; \ + done + rm -f v-* *.o + diff --git a/base/runtime/c-libs/makefile.win32 b/base/runtime/c-libs/makefile.win32 new file mode 100644 index 0000000..36951ac --- /dev/null +++ b/base/runtime/c-libs/makefile.win32 @@ -0,0 +1,92 @@ +# +# the makefile for the C libraries +# +# Currently, this is only used to clean the library directories. +# +# win32 specific + +CC = cl /nologo +CFLAGS = +MAKEFILE = makefile.win32 +MAKE = nmake /nologo /F$(MAKEFILE) +AR = lib +ARFLAGS = +RANLIB = lib + +VERSION = v-dummy + +LIB_DIRS = smlnj-runtime \ + smlnj-signals \ + smlnj-sockets \ + smlnj-prof \ + smlnj-time \ + smlnj-date \ + smlnj-math \ +# smlnj-mp \ +# smlnj-ccalls \ + win32 \ + win32-io \ + win32-filesys \ + win32-process + +#clean : +# for dir in $(LIB_DIRS); do \ +# (cd $$dir; echo "cleaning $$dir"; $(MAKE) MAKE="$(MAKE)" clean) \ +# done +# rm -f v-* *.o + +# ugh! +clean : + del /F /Q v-* *.obj *.pdb + + cd smlnj-runtime + $(MAKE) MAKE="$(MAKE)" clean + cd $(MAKEDIR) + + cd smlnj-signals + $(MAKE) MAKE="$(MAKE)" clean + cd $(MAKEDIR) + + cd smlnj-sockets + $(MAKE) MAKE="$(MAKE)" clean + cd $(MAKEDIR) + + cd smlnj-prof + $(MAKE) MAKE="$(MAKE)" clean + cd $(MAKEDIR) + + cd smlnj-time + $(MAKE) MAKE="$(MAKE)" clean + cd $(MAKEDIR) + + cd smlnj-date + $(MAKE) MAKE="$(MAKE)" clean + cd $(MAKEDIR) + + cd smlnj-math + $(MAKE) MAKE="$(MAKE)" clean + cd $(MAKEDIR) + +# cd smlnj-mp +# $(MAKE) MAKE="$(MAKE)" clean +# cd $(MAKEDIR) + +# cd smlnj-ccalls +# $(MAKE) MAKE="$(MAKE)" clean +# cd $(MAKEDIR) + + cd win32 + $(MAKE) MAKE="$(MAKE)" clean + cd $(MAKEDIR) + + cd win32-io + $(MAKE) MAKE="$(MAKE)" clean + cd $(MAKEDIR) + + cd win32-filesys + $(MAKE) MAKE="$(MAKE)" clean + cd $(MAKEDIR) + + cd win32-process + $(MAKE) MAKE="$(MAKE)" clean + cd $(MAKEDIR) diff --git a/base/runtime/c-libs/ml-c.h b/base/runtime/c-libs/ml-c.h new file mode 100644 index 0000000..516307f --- /dev/null +++ b/base/runtime/c-libs/ml-c.h @@ -0,0 +1,74 @@ +/* ml-c.h + * + * COPYRIGHT (c) 1993 by AT&T Bell Laboratories. + * + * Header file for C functions that are callable from ML. This defines + * a number of macros for checking return results and for raising the SysErr + * exception: + * + * RAISE_SYSERR(msp, sts) Raise the SysErr exception using the + * appropriate system error message (on + * some systems, sts may be an error code). + * + * RAISE_ERROR(msp, msg) Raise the SysErr exception using the + * given message (with NONE for the system + * error part). + * + * CHK_RETURN_VAL(msp, sts, val) Check sts for an error (< 0); if okay, + * then return val. Otherwise raise + * SysErr with the appropriate system + * error message. + * + * CHK_RETURN(msp, sts) Check sts for an error (< 0); if okay, + * then return it as the result (after + * converting to an ML int). + * + * CHK_RETURN_UNIT(msp, sts) Check sts for an error (< 0); if okay, + * then return unit. + */ + +#ifndef _ML_C_ +#define _ML_C_ + +#ifndef _ML_OSDEP_ +#include "ml-osdep.h" +#endif + + +#ifdef SYSCALL_RET_ERR +ml_val_t RaiseSysError (ml_state_t *msp, int err, const char *alt_msg, const char *at); +#define RAISE_SYSERR(msp, sts) \ + RaiseSysError((msp), (sts), NIL(char *), "<" __FILE__ ">") +#define RAISE_ERROR(msp, msg) \ + RaiseSysError((msp), 0, (msg), "<" __FILE__ ">") + +#else +ml_val_t RaiseSysError (ml_state_t *msp, const char *alt_msg, const char *at); +#define RAISE_SYSERR(msp, sts) \ + RaiseSysError((msp), NIL(char *), "<" __FILE__ ">") +#define RAISE_ERROR(msp, msg) \ + RaiseSysError((msp), (msg), "<" __FILE__ ">") + +#endif + +/* return a value to the calling ML code, but raise an exception if an error + * occured. + */ +#define CHK_RETURN_VAL(msp,sts,val) { \ + if ((sts) < 0) \ + return RAISE_SYSERR(msp, sts); \ + else \ + return (val); \ + } + +/* return sts to the calling ML code, but raise an exception if an error occured */ +#define CHK_RETURN(msp,sts) { \ + int __sts = (sts); \ + CHK_RETURN_VAL((msp), __sts, INT_CtoML(__sts)) \ + } + +/* return unit to the calling ML code, but raise an exception if an error occured */ +#define CHK_RETURN_UNIT(msp,sts) \ + CHK_RETURN_VAL(msp, sts, ML_unit) + +#endif /* !_ML_C_ */ diff --git a/base/runtime/c-libs/posix-error/cfun-list.h b/base/runtime/c-libs/posix-error/cfun-list.h new file mode 100644 index 0000000..76b1da4 --- /dev/null +++ b/base/runtime/c-libs/posix-error/cfun-list.h @@ -0,0 +1,17 @@ +/* cfun-list.h + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * + * This file lists the directory library of C functions that are callable by ML. + */ + +#ifndef CLIB_NAME +#define CLIB_NAME "POSIX-Error" +#define CLIB_VERSION "1.1" +#define CLIB_DATE "December 31, 1996" +#endif + +CFUNC("errmsg", _ml_P_Error_errmsg, "word -> string") +CFUNC("geterror", _ml_P_Error_geterror, "word -> sys_const") +CFUNC("listerrors", _ml_P_Error_listerrors, "unit -> sys_const list") + diff --git a/base/runtime/c-libs/posix-error/cfun-proto-list.h b/base/runtime/c-libs/posix-error/cfun-proto-list.h new file mode 100644 index 0000000..16b3e96 --- /dev/null +++ b/base/runtime/c-libs/posix-error/cfun-proto-list.h @@ -0,0 +1,18 @@ +/* cfun-proto-list.h + * + * COPYRIGHT (c) 1194 AT&T Bell Laboratories. + */ + +#ifndef _CFUN_PROTO_LIST_ +#define _CFUN_PROTO_LIST_ + +#ifndef _C_LIBRARY_ +# include "c-library.h" +#endif + +/* the external definitions for the C functions */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_PROTO(NAME, FUNC, MLTYPE) +#include "cfun-list.h" +#undef CFUNC + +#endif /* !_CFUN_PROTO_LIST_ */ diff --git a/base/runtime/c-libs/posix-error/errmsg.c b/base/runtime/c-libs/posix-error/errmsg.c new file mode 100644 index 0000000..3f77266 --- /dev/null +++ b/base/runtime/c-libs/posix-error/errmsg.c @@ -0,0 +1,47 @@ +/* errmsg.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-unixdep.h" +#include +#include +#include +#include "ml-base.h" +#include "ml-values.h" +#include "tags.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_P_Error_errmsg : int -> string + * + * Return the OS-dependent error message associated with error. + */ +ml_val_t _ml_P_Error_errmsg (ml_state_t *msp, ml_val_t arg) +{ + int errnum = INT_MLtoC(arg); + ml_val_t s; + +#if defined(HAS_STRERROR) + char *msg = strerror(errnum); + if (msg != 0) + s = ML_CString (msp, msg); + else { + char buf[64]; + sprintf(buf, "", errnum); + s = ML_CString (msp, buf); + } +#else + if ((0 <= errnum) && (errnum < sys_nerr)) + s = ML_CString (msp, sys_errlist[errnum]); + else { + char buf[64]; + sprintf(buf, "", errnum); + s = ML_CString (msp, buf); + } +#endif + + return s; + +} /* end of _ml_P_Error_errmsg */ diff --git a/base/runtime/c-libs/posix-error/geterror.c b/base/runtime/c-libs/posix-error/geterror.c new file mode 100644 index 0000000..89d516d --- /dev/null +++ b/base/runtime/c-libs/posix-error/geterror.c @@ -0,0 +1,24 @@ +/* geterror.c + * + * COPYRIGHT (c) 1996 AT&T Research. + * + * Return the system constant that corresponds to the given error name. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +extern sysconst_tbl_t _ErrorNo; + + +/* _ml_P_Error_geterror : int -> sys_const + */ +ml_val_t _ml_P_Error_geterror (ml_state_t *msp, ml_val_t arg) +{ + return ML_SysConst (msp, &_ErrorNo, INT_MLtoC(arg)); + +} /* end of _ml_P_Error_geterror */ + diff --git a/base/runtime/c-libs/posix-error/listerrors.c b/base/runtime/c-libs/posix-error/listerrors.c new file mode 100644 index 0000000..d73ddfe --- /dev/null +++ b/base/runtime/c-libs/posix-error/listerrors.c @@ -0,0 +1,25 @@ +/* listerrors.c + * + * COPYRIGHT (c) 1996 AT&T Research. + * + * Return the list of system constants that represents the known error + * codes. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +extern sysconst_tbl_t _ErrorNo; + + +/* _ml_P_Error_listerrors : int -> sys_const list + */ +ml_val_t _ml_P_Error_listerrors (ml_state_t *msp, ml_val_t arg) +{ + return ML_SysConstList (msp, &_ErrorNo); + +} /* end of _ml_P_Error_listerrors */ + diff --git a/base/runtime/c-libs/posix-error/makefile b/base/runtime/c-libs/posix-error/makefile new file mode 100644 index 0000000..4965c39 --- /dev/null +++ b/base/runtime/c-libs/posix-error/makefile @@ -0,0 +1,43 @@ +# +# a template makefile for a C function library +# + +SHELL = /bin/sh + +INC_DIR = ../../include +CLIB_DIR = ../ + +INCLUDES = -I$(INC_DIR) -I$(CLIB_DIR) -I../../objs + +MAKE = make +AR = ar +ARFLAGS = rcv +RANLIB = ranlib + +LIBRARY = libposix-error.a + +VERSION = v-dummy + +OBJS = posix-error-lib.o \ + posix-name-val.o \ + errmsg.o \ + geterror.o \ + listerrors.o \ + tbl-errno.o + +$(LIBRARY) : $(VERSION) $(OBJS) + rm -rf $(LIBRARY) + $(AR) $(ARFLAGS) $(LIBRARY) $(OBJS) + $(RANLIB) $(LIBRARY) + +$(VERSION) : + ($(MAKE) MAKE="$(MAKE)" clean) + echo "$(VERSION)" > $(VERSION) + +.c.o: $(INC_DIR)/ml-unixdep.h $(INC_DIR)/ml-base.h $(INC_DIR)/ml-values.h \ + $(CLIB_DIR)/ml-c.h cfun-proto-list.h cfun-list.h + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) -c $< + +clean : + rm -f v-* *.o $(LIBRARY) + diff --git a/base/runtime/c-libs/posix-error/posix-error-lib.c b/base/runtime/c-libs/posix-error/posix-error-lib.c new file mode 100644 index 0000000..678ec08 --- /dev/null +++ b/base/runtime/c-libs/posix-error/posix-error-lib.c @@ -0,0 +1,28 @@ +/* posix-error-lib.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "c-library.h" +#include "cfun-proto-list.h" + + +/* the table of C functions and ML names */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_BIND(NAME, FUNC, MLTYPE) +PVT cfunc_binding_t CFunTable[] = { +#include "cfun-list.h" + CFUNC_NULL_BIND + }; +#undef CFUNC + + +/* the POSIX Error library */ +c_library_t POSIX_Error_Library = { + CLIB_NAME, + CLIB_VERSION, + CLIB_DATE, + NIL(clib_init_fn_t), + CFunTable + }; + diff --git a/base/runtime/c-libs/posix-error/posix-name-val.c b/base/runtime/c-libs/posix-error/posix-name-val.c new file mode 100644 index 0000000..e3f479c --- /dev/null +++ b/base/runtime/c-libs/posix-error/posix-name-val.c @@ -0,0 +1,32 @@ +/* posix-name-val.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + * + * Support for string to int lookup. + */ + +#include +#include +#include "posix-name-val.h" + +static int cmp (const void *key, const void *item) +{ + return strcmp(((name_val_t*)key)->name, ((name_val_t*)item)->name); +} + +/* _ml_posix_nv_lookup + * + * Given a string key, an array of name/value pairs and the size of the + * array, find element in the array with matching key and return a pointer + * to it. If not found, return NULL. We use binary search, so we assume + * the array is sorted. + */ +name_val_t *_ml_posix_nv_lookup (char *key, name_val_t *array, int numelms) +{ + name_val_t k; + + k.name = key; + return ((name_val_t *)bsearch(&k,array,numelms,sizeof (name_val_t),cmp)); + +} /* end of _ml_posix_nv_lookup */ + diff --git a/base/runtime/c-libs/posix-error/posix-name-val.h b/base/runtime/c-libs/posix-error/posix-name-val.h new file mode 100644 index 0000000..6d8bf76 --- /dev/null +++ b/base/runtime/c-libs/posix-error/posix-name-val.h @@ -0,0 +1,18 @@ +/* posix-name-val.h + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + * + * Header file for handling string-to-int lookup. + */ + +#ifndef _ML_POSIX_NV_ +#define _ML_POSIX_NV_ + +typedef struct { + char* name; + int val; +} name_val_t; + +extern name_val_t *_ml_posix_nv_lookup (char *, name_val_t *, int); + +#endif /* !_ML_POSIX_NV_ */ diff --git a/base/runtime/c-libs/posix-error/tbl-errno.c b/base/runtime/c-libs/posix-error/tbl-errno.c new file mode 100644 index 0000000..5263175 --- /dev/null +++ b/base/runtime/c-libs/posix-error/tbl-errno.c @@ -0,0 +1,76 @@ +/* tbl-errno.c + * + * COPYRIGHT (c) 1996 AT&T Research. + * + * The table of system constants representing the Posix error codes. + */ + +#include "ml-unixdep.h" +#include "ml-base.h" +#include + +PVT sys_const_t tbl[] = { + {EACCES, "acces"}, + {EAGAIN, "again"}, +#if (defined(EWOULDBLOCK) && (EWOULDBLOCK != EAGAIN)) + {EWOULDBLOCK, "wouldblock"}, +#endif + {EBADF, "badf"}, +#ifdef EBADMSG + {EBADMSG, "badmsg"}, +#else + {0, "badmsg"}, +#endif + {EBUSY, "busy"}, +#ifdef ECANCELED + {ECANCELED, "canceled"}, +#else + {0, "canceled"}, +#endif + {ECHILD, "child"}, + {EDEADLK, "deadlk"}, + {EDOM, "dom"}, + {EEXIST, "exist"}, + {EFAULT, "fault"}, + {EFBIG, "fbig"}, + {EINPROGRESS, "inprogress"}, + {EINTR, "intr"}, + {EINVAL, "inval"}, + {EIO, "io"}, + {EISDIR, "isdir"}, + {ELOOP, "loop"}, + {EMFILE, "mfile"}, + {EMLINK, "mlink"}, + {EMSGSIZE, "msgsize"}, + {ENAMETOOLONG, "nametoolong"}, + {ENFILE, "nfile"}, + {ENODEV, "nodev"}, + {ENOENT, "noent"}, + {ENOEXEC, "noexec"}, + {ENOLCK, "nolck"}, + {ENOMEM, "nomem"}, + {ENOSPC, "nospc"}, + {ENOSYS, "nosys"}, + {ENOTDIR, "notdir"}, + {ENOTEMPTY, "notempty"}, +#ifdef ENOTSUP + {ENOTSUP, "notsup"}, +#else + {0, "notsup"}, +#endif + {ENOTTY, "notty"}, + {ENXIO, "nxio"}, + {EPERM, "perm"}, + {EPIPE, "pipe"}, + {ERANGE, "range"}, + {EROFS, "rofs"}, + {ESPIPE, "spipe"}, + {ESRCH, "srch"}, + {E2BIG, "toobig"}, + {EXDEV, "xdev"}, + }; + +sysconst_tbl_t _ErrorNo = { + sizeof(tbl) / sizeof(sys_const_t), + tbl + }; diff --git a/base/runtime/c-libs/posix-filesys/access.c b/base/runtime/c-libs/posix-filesys/access.c new file mode 100644 index 0000000..e192b2a --- /dev/null +++ b/base/runtime/c-libs/posix-filesys/access.c @@ -0,0 +1,32 @@ +/* access.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-unixdep.h" +#include INCLUDE_TYPES_H +#include +#include "ml-base.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + + +/* _ml_P_FileSys_access : (string * SysWord.word) -> bool + * name access_mode + * + * Determine accessibility of a file. + */ +ml_val_t _ml_P_FileSys_access (ml_state_t *msp, ml_val_t arg) +{ + ml_val_t path = REC_SEL(arg, 0); + ml_val_t ml_mode = REC_SEL(arg, 1); + mode_t mode = SYSWORD_MLtoC(ml_mode); + int sts; + + sts = access (STR_MLtoC(path), mode); + + return (sts == 0) ? ML_true : ML_false; + +} /* end of _ml_P_FileSys_access */ diff --git a/base/runtime/c-libs/posix-filesys/cfun-list.h b/base/runtime/c-libs/posix-filesys/cfun-list.h new file mode 100644 index 0000000..a826e0a --- /dev/null +++ b/base/runtime/c-libs/posix-filesys/cfun-list.h @@ -0,0 +1,43 @@ +/* cfun-list.h + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * + * This file lists the directory library of C functions that are callable by ML. + */ + +#ifndef CLIB_NAME +#define CLIB_NAME "POSIX-FileSys" +#define CLIB_VERSION "1.0" +#define CLIB_DATE "February 16, 1995" +#endif + +CFUNC("osval", _ml_P_FileSys_osval, "string -> int") +CFUNC("chdir", _ml_P_FileSys_chdir, "string -> unit") +CFUNC("getcwd", _ml_P_FileSys_getcwd, "unit -> string") +CFUNC("openf", _ml_P_FileSys_openf, "string * word * word -> int") +CFUNC("umask", _ml_P_FileSys_umask, "word -> word") +CFUNC("link", _ml_P_FileSys_link, "string * string -> unit") +CFUNC("rename", _ml_P_FileSys_rename, "string * string -> unit") +CFUNC("symlink", _ml_P_FileSys_symlink, "string * string -> unit") +CFUNC("mkdir", _ml_P_FileSys_mkdir, "string * word -> unit") +CFUNC("mkfifo", _ml_P_FileSys_mkfifo, "string * word -> unit") +CFUNC("unlink", _ml_P_FileSys_unlink, "string -> unit") +CFUNC("rmdir", _ml_P_FileSys_rmdir, "string -> unit") +CFUNC("readlink", _ml_P_FileSys_readlink, "string -> string") +CFUNC("stat", _ml_P_FileSys_stat, "string -> statrep") +CFUNC("lstat", _ml_P_FileSys_lstat, "string -> statrep") +CFUNC("fstat", _ml_P_FileSys_fstat, "word -> statrep") +CFUNC("access", _ml_P_FileSys_access, "string * word -> bool") +CFUNC("chmod", _ml_P_FileSys_chmod, "string * word -> unit") +CFUNC("fchmod", _ml_P_FileSys_fchmod, "int * word -> unit") +CFUNC("ftruncate", _ml_P_FileSys_ftruncate, "int * int -> unit") +CFUNC("chown", _ml_P_FileSys_chown, "string * word * word -> unit") +CFUNC("fchown", _ml_P_FileSys_fchown, "int * word * word -> unit") +CFUNC("utime", _ml_P_FileSys_utime, "string * int * int -> unit") +CFUNC("pathconf", _ml_P_FileSys_pathconf, "(string * string) -> word option") +CFUNC("fpathconf", _ml_P_FileSys_fpathconf, "(int * string) -> word option") +CFUNC("opendir", _ml_P_FileSys_opendir, "string -> object") +CFUNC("readdir", _ml_P_FileSys_readdir, "object -> string") +CFUNC("rewinddir", _ml_P_FileSys_rewinddir, "object -> unit") +CFUNC("closedir", _ml_P_FileSys_closedir, "object -> unit") + diff --git a/base/runtime/c-libs/posix-filesys/cfun-proto-list.h b/base/runtime/c-libs/posix-filesys/cfun-proto-list.h new file mode 100644 index 0000000..16b3e96 --- /dev/null +++ b/base/runtime/c-libs/posix-filesys/cfun-proto-list.h @@ -0,0 +1,18 @@ +/* cfun-proto-list.h + * + * COPYRIGHT (c) 1194 AT&T Bell Laboratories. + */ + +#ifndef _CFUN_PROTO_LIST_ +#define _CFUN_PROTO_LIST_ + +#ifndef _C_LIBRARY_ +# include "c-library.h" +#endif + +/* the external definitions for the C functions */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_PROTO(NAME, FUNC, MLTYPE) +#include "cfun-list.h" +#undef CFUNC + +#endif /* !_CFUN_PROTO_LIST_ */ diff --git a/base/runtime/c-libs/posix-filesys/chdir.c b/base/runtime/c-libs/posix-filesys/chdir.c new file mode 100644 index 0000000..607795a --- /dev/null +++ b/base/runtime/c-libs/posix-filesys/chdir.c @@ -0,0 +1,27 @@ +/* chdir.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-unixdep.h" +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include + + +/* _ml_P_FileSys_chdir : string -> unit + * + * Change working directory + */ +ml_val_t _ml_P_FileSys_chdir (ml_state_t *msp, ml_val_t arg) +{ + int sts; + + sts = chdir(STR_MLtoC(arg)); + + CHK_RETURN_UNIT(msp, sts) + +} /* end of _ml_P_FileSys_chdir */ diff --git a/base/runtime/c-libs/posix-filesys/chmod.c b/base/runtime/c-libs/posix-filesys/chmod.c new file mode 100644 index 0000000..55c816c --- /dev/null +++ b/base/runtime/c-libs/posix-filesys/chmod.c @@ -0,0 +1,28 @@ +/* chmod.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-unixdep.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include +#include + +/* _ml_P_FileSys_chmod : (string * word) -> unit + * name mode + * + * Change mode of file + */ +ml_val_t _ml_P_FileSys_chmod (ml_state_t *msp, ml_val_t arg) +{ + ml_val_t path = REC_SEL(arg, 0); + mode_t mode = REC_SELWORD(arg, 1); + int sts; + + sts = chmod (STR_MLtoC(path), mode); + + CHK_RETURN_UNIT(msp, sts) + +} /* end of _ml_P_FileSys_chmod */ diff --git a/base/runtime/c-libs/posix-filesys/chown.c b/base/runtime/c-libs/posix-filesys/chown.c new file mode 100644 index 0000000..f6559ef --- /dev/null +++ b/base/runtime/c-libs/posix-filesys/chown.c @@ -0,0 +1,31 @@ +/* chown.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-unixdep.h" +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include +#include + +/* _ml_P_FileSys_chown : (string * word * word) -> unit + * name uid gid + * + * Change owner and group of file given its name. + */ +ml_val_t _ml_P_FileSys_chown (ml_state_t *msp, ml_val_t arg) +{ + ml_val_t path = REC_SEL(arg, 0); + uid_t uid = REC_SELWORD(arg, 1); + gid_t gid = REC_SELWORD(arg, 2); + int sts; + + sts = chown (STR_MLtoC(path), uid, gid); + + CHK_RETURN_UNIT(msp, sts) + +} /* end of _ml_P_FileSys_chown */ diff --git a/base/runtime/c-libs/posix-filesys/closedir.c b/base/runtime/c-libs/posix-filesys/closedir.c new file mode 100644 index 0000000..7da4360 --- /dev/null +++ b/base/runtime/c-libs/posix-filesys/closedir.c @@ -0,0 +1,28 @@ +/* closedir.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-unixdep.h" +#include "ml-base.h" +#include "ml-values.h" +#include "tags.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include +#include + +/* _ml_P_FileSys_closedir : object -> unit + * + * Close a directory stream. + */ +ml_val_t _ml_P_FileSys_closedir (ml_state_t *msp, ml_val_t arg) +{ + int sts; + + sts = closedir(PTR_MLtoC(DIR, arg)); + + CHK_RETURN_UNIT(msp,sts) + +} /* end of _ml_P_FileSys_closedir */ diff --git a/base/runtime/c-libs/posix-filesys/fchmod.c b/base/runtime/c-libs/posix-filesys/fchmod.c new file mode 100644 index 0000000..511c646 --- /dev/null +++ b/base/runtime/c-libs/posix-filesys/fchmod.c @@ -0,0 +1,30 @@ +/* fchmod.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-unixdep.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include +#include + +/* _ml_P_FileSys_fchmod : (fd * SysWord.word) -> unit + * fd mode + * + * Change mode of file + */ +ml_val_t _ml_P_FileSys_fchmod (ml_state_t *msp, ml_val_t arg) +{ + int fd = REC_SELINT(arg, 0); + ml_val_t ml_mode = REC_SEL(arg, 1); + mode_t mode = SYSWORD_MLtoC(ml_mode); + int sts; + + sts = fchmod (fd, mode); + + CHK_RETURN_UNIT(msp, sts) + +} /* end of _ml_P_FileSys_fchmod */ diff --git a/base/runtime/c-libs/posix-filesys/fchown.c b/base/runtime/c-libs/posix-filesys/fchown.c new file mode 100644 index 0000000..6aa890b --- /dev/null +++ b/base/runtime/c-libs/posix-filesys/fchown.c @@ -0,0 +1,34 @@ +/* fchown.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-unixdep.h" +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include +#include + +/* _ml_P_FileSys_fchown : (int * SysWord.word * SysWord.word) -> unit + * fd uid gid + * + * Change owner and group of file given a file descriptor for it. + */ +ml_val_t _ml_P_FileSys_fchown (ml_state_t *msp, ml_val_t arg) +{ + int fd = REC_SELINT(arg, 0); + ml_val_t ml_uid = REC_SEL(arg, 1); + uid_t uid = SYSWORD_MLtoC(ml_uid); + ml_val_t ml_gid = REC_SEL(arg, 2); + gid_t gid = SYSWORD_MLtoC(ml_gid); + int sts; + + sts = fchown (fd, uid, gid); + + CHK_RETURN_UNIT(msp, sts) + +} /* end of _ml_P_FileSys_fchown */ diff --git a/base/runtime/c-libs/posix-filesys/ftruncate.c b/base/runtime/c-libs/posix-filesys/ftruncate.c new file mode 100644 index 0000000..990e9d8 --- /dev/null +++ b/base/runtime/c-libs/posix-filesys/ftruncate.c @@ -0,0 +1,29 @@ +/* ftruncate.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-unixdep.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include +#include + +/* _ml_P_FileSys_ftruncate_64 : (int * Position.int) -> unit + * + * Truncate or extend a file to a specified length + */ +ml_val_t _ml_P_FileSys_ftruncate (ml_state_t *msp, ml_val_t arg) +{ + int fd = REC_SELINT(arg, 0); + ml_val_t boxed_len = REC_SEL(arg, 1); + off_t len = (off_t)INT64_MLtoC(boxed_len); + int sts; + + sts = ftruncate (fd, len); + + CHK_RETURN_UNIT(msp, sts) + +} /* end of _ml_P_FileSys_ftruncate */ diff --git a/base/runtime/c-libs/posix-filesys/getcwd.c b/base/runtime/c-libs/posix-filesys/getcwd.c new file mode 100644 index 0000000..2edf66f --- /dev/null +++ b/base/runtime/c-libs/posix-filesys/getcwd.c @@ -0,0 +1,62 @@ +/* getcwd.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-unixdep.h" +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include +#include +#include +#include + + +/* _ml_P_FileSys_getcwd : unit -> string + * + * Get current working directory pathname + * + * Should this be written to avoid the extra copy? + */ +ml_val_t _ml_P_FileSys_getcwd (ml_state_t *msp, ml_val_t arg) +{ + char path[MAXPATHLEN]; + char* sts; + ml_val_t p; + int buflen; + char *buf; + + sts = getcwd(path, MAXPATHLEN); + + if (sts != NIL(char *)) + return ML_CString (msp, path); + + if (errno != ERANGE) + return RAISE_SYSERR(msp, sts); + + buflen = 2*MAXPATHLEN; + buf = MALLOC(buflen); + if (buf == NIL(char*)) + return RAISE_ERROR(msp, "no malloc memory"); + + while ((sts = getcwd(buf, buflen)) == NIL(char *)) { + FREE (buf); + if (errno != ERANGE) + return RAISE_SYSERR(msp, sts); + else { + buflen = 2*buflen; + buf = MALLOC(buflen); + if (buf == NIL(char*)) + return RAISE_ERROR(msp, "no malloc memory"); + } + } + + p = ML_CString (msp, buf); + FREE (buf); + + return p; + +} /* end of _ml_P_FileSys_getcwd */ diff --git a/base/runtime/c-libs/posix-filesys/link.c b/base/runtime/c-libs/posix-filesys/link.c new file mode 100644 index 0000000..e900829 --- /dev/null +++ b/base/runtime/c-libs/posix-filesys/link.c @@ -0,0 +1,30 @@ +/* link.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-unixdep.h" +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include + + +/* _ml_P_FileSys_link : string * string -> unit + * existing newname + * + * Creates a hard link from newname to existing file. + */ +ml_val_t _ml_P_FileSys_link (ml_state_t *msp, ml_val_t arg) +{ + int sts; + ml_val_t existing = REC_SEL(arg, 0); + ml_val_t newname = REC_SEL(arg, 1); + + sts = link(STR_MLtoC(existing), STR_MLtoC(newname)); + + CHK_RETURN_UNIT (msp, sts) + +} /* end of _ml_P_FileSys_link */ diff --git a/base/runtime/c-libs/posix-filesys/makefile b/base/runtime/c-libs/posix-filesys/makefile new file mode 100644 index 0000000..5b14ea1 --- /dev/null +++ b/base/runtime/c-libs/posix-filesys/makefile @@ -0,0 +1,72 @@ +# +# a template makefile for a C function library +# + +SHELL = /bin/sh + +ROOT_DIR = ../.. +INC_DIR = $(ROOT_DIR)/include +CONFIG_DIR = $(ROOT_DIR)/config +CLIB_DIR = ../ + +INCLUDES = -I$(INC_DIR) -I$(CLIB_DIR) -I../../objs + +MAKE = make +AR = ar +ARFLAGS = rcv +CPP = /lib/cpp +RANLIB = ranlib + +LIBRARY = libposix-filesys.a + +VERSION = v-dummy + +OBJS = posix-filesys-lib.o \ + osval.o \ + chdir.o \ + getcwd.o \ + openf.o \ + umask.o \ + link.o \ + rename.o \ + symlink.o \ + mkdir.o \ + mkfifo.o \ + unlink.o \ + rmdir.o \ + readlink.o \ + stat.o \ + access.o \ + chmod.o \ + fchmod.o \ + chown.o \ + fchown.o \ + ftruncate.o \ + utime.o \ + opendir.o \ + readdir.o \ + rewinddir.o \ + closedir.o \ + pathconf.o + +$(LIBRARY) : $(VERSION) $(OBJS) + rm -rf $(LIBRARY) + $(AR) $(ARFLAGS) $(LIBRARY) $(OBJS) + $(RANLIB) $(LIBRARY) + +$(VERSION) : + ($(MAKE) MAKE="$(MAKE)" clean) + echo "$(VERSION)" > $(VERSION) + +.c.o: $(INC_DIR)/ml-unixdep.h $(INC_DIR)/ml-base.h $(INC_DIR)/ml-values.h \ + $(CLIB_DIR)/ml-c.h cfun-proto-list.h cfun-list.h + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) -c $< + +pathconf.o : ml_pathconf.h + +ml_pathconf.h : + VERSION=$(VERSION) CPP="$(CPP)" $(CONFIG_DIR)/gen-posix-names.sh _PC_ ml_pathconf.h + +clean : + rm -f v-* *.o ml_pathconf.h $(LIBRARY) + diff --git a/base/runtime/c-libs/posix-filesys/mkdir.c b/base/runtime/c-libs/posix-filesys/mkdir.c new file mode 100644 index 0000000..86ded1e --- /dev/null +++ b/base/runtime/c-libs/posix-filesys/mkdir.c @@ -0,0 +1,29 @@ +/* mkdir.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-unixdep.h" +#include +#include +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_P_FileSys_mkdir : (string * SysWord.word) -> unit + * name mode + * + * Make a directory + */ +ml_val_t _ml_P_FileSys_mkdir (ml_state_t *msp, ml_val_t arg) +{ + ml_val_t path = REC_SEL(arg, 0); + ml_val_t ml_mode = REC_SEL(arg, 1); + mode_t mode = SYSWORD_MLtoC(ml_mode); + int sts; + + sts = mkdir (STR_MLtoC(path), mode); + + CHK_RETURN_UNIT(msp, sts) + +} /* end of _ml_P_FileSys_mkdir */ diff --git a/base/runtime/c-libs/posix-filesys/mkfifo.c b/base/runtime/c-libs/posix-filesys/mkfifo.c new file mode 100644 index 0000000..6533ad5 --- /dev/null +++ b/base/runtime/c-libs/posix-filesys/mkfifo.c @@ -0,0 +1,33 @@ +/* mkfifo.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-unixdep.h" +#include +#include +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + + +/* _ml_P_FileSys_mkfifo : (string * word) -> unit + * name mode + * + * Make a FIFO special file. + */ +ml_val_t _ml_P_FileSys_mkfifo (ml_state_t *msp, ml_val_t arg) +{ + ml_val_t path = REC_SEL(arg, 0); + ml_val_t ml_mode = REC_SEL(arg, 1); + mode_t mode = SYSWORD_MLtoC(ml_mode); + int sts; + + sts = mkfifo (STR_MLtoC(path), mode); + + CHK_RETURN_UNIT(msp, sts) + +} /* end of _ml_P_FileSys_mkfifo */ diff --git a/base/runtime/c-libs/posix-filesys/opendir.c b/base/runtime/c-libs/posix-filesys/opendir.c new file mode 100644 index 0000000..baf489d --- /dev/null +++ b/base/runtime/c-libs/posix-filesys/opendir.c @@ -0,0 +1,30 @@ +/* opendir.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-unixdep.h" +#include +#include +#include "ml-base.h" +#include "ml-values.h" +#include "tags.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_P_FileSys_opendir : string -> object + * + * Open and return a directory stream. + */ +ml_val_t _ml_P_FileSys_opendir (ml_state_t *msp, ml_val_t arg) +{ + DIR *dir; + + dir = opendir(STR_MLtoC(arg)); + if (dir == NIL(DIR *)) + return RAISE_SYSERR(msp, -1); + else + return PTR_CtoML(dir); + +} /* end of _ml_P_FileSys_opendir */ diff --git a/base/runtime/c-libs/posix-filesys/openf.c b/base/runtime/c-libs/posix-filesys/openf.c new file mode 100644 index 0000000..0aff768 --- /dev/null +++ b/base/runtime/c-libs/posix-filesys/openf.c @@ -0,0 +1,31 @@ +/* openf.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-unixdep.h" +#include +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_P_FileSys_openf : (string * SysWord.word * SysWord.word) -> int + * name flags mode + * + * Open a file and return the file descriptor. + */ +ml_val_t _ml_P_FileSys_openf (ml_state_t *msp, ml_val_t arg) +{ + ml_val_t path = REC_SEL(arg, 0); + ml_val_t ml_flags = REC_SEL(arg, 1); + int flags = SYSWORD_MLtoC(ml_flags); + ml_val_t ml_mode = REC_SEL(arg, 2); + int mode = SYSWORD_MLtoC(ml_mode); + int fd; + + fd = open (STR_MLtoC(path), flags, mode); + + CHK_RETURN(msp, fd) + +} /* end of _ml_P_FileSys_openf */ diff --git a/base/runtime/c-libs/posix-filesys/osval.c b/base/runtime/c-libs/posix-filesys/osval.c new file mode 100644 index 0000000..a37c778 --- /dev/null +++ b/base/runtime/c-libs/posix-filesys/osval.c @@ -0,0 +1,82 @@ +/* osval.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-unixdep.h" +#include +#include +#include +#include "ml-base.h" +#include "ml-values.h" +#include "tags.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include "../posix-error/posix-name-val.h" + +/* NOTE: the following table must be in alphabetical order!!! */ +PVT name_val_t values [] = { + {"A_EXEC", X_OK}, + {"A_FILE", F_OK}, + {"A_READ", R_OK}, + {"A_WRITE", W_OK}, + {"O_APPEND", O_APPEND}, + {"O_CREAT", O_CREAT}, +#ifdef O_DSYNC + {"O_DSYNC", O_DSYNC}, +#else + {"O_DSYNC", 0}, +#endif + {"O_EXCL", O_EXCL}, + {"O_NOCTTY", O_NOCTTY}, + {"O_NONBLOCK", O_NONBLOCK}, + {"O_RDONLY", O_RDONLY}, + {"O_RDWR", O_RDWR}, +#ifdef O_RSYNC + {"O_RSYNC", O_RSYNC}, +#else + {"O_RSYNC", 0}, +#endif +#ifdef O_SYNC + {"O_SYNC", O_SYNC}, +#else + {"O_SYNC", 0}, +#endif + {"O_TRUNC", O_TRUNC}, + {"O_WRONLY", O_WRONLY}, + {"irgrp", S_IRGRP}, + {"iroth", S_IROTH}, + {"irusr", S_IRUSR}, + {"irwxg", S_IRWXG}, + {"irwxo", S_IRWXO}, + {"irwxu", S_IRWXU}, + {"isgid", S_ISGID}, + {"isuid", S_ISUID}, + {"iwgrp", S_IWGRP}, + {"iwoth", S_IWOTH}, + {"iwusr", S_IWUSR}, + {"ixgrp", S_IXGRP}, + {"ixoth", S_IXOTH}, + {"ixusr", S_IXUSR}, +}; + +#define NUMELMS ((sizeof values)/(sizeof (name_val_t))) + +/* _ml_P_FileSys_osval : string -> int + * + * Return the OS-dependent, compile-time constant specified by the string. + */ +ml_val_t _ml_P_FileSys_osval (ml_state_t *msp, ml_val_t arg) +{ + name_val_t *res; + + res = _ml_posix_nv_lookup (STR_MLtoC(arg), values, NUMELMS); + if (res) + return INT_CtoML(res->val); + else { + return RAISE_ERROR(msp, "system constant not defined"); + } + +} /* end of _ml_P_FileSys_osval */ diff --git a/base/runtime/c-libs/posix-filesys/pathconf.c b/base/runtime/c-libs/posix-filesys/pathconf.c new file mode 100644 index 0000000..066bac1 --- /dev/null +++ b/base/runtime/c-libs/posix-filesys/pathconf.c @@ -0,0 +1,119 @@ +/* pathconf.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-unixdep.h" +#include +#include +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include "../posix-error/posix-name-val.h" + + /* The following table is generated from all _PC_ values + * in unistd.h. For most systems, this will include + _PC_CHOWN_RESTRICTED + _PC_LINK_MAX + _PC_MAX_CANON + _PC_MAX_INPUT + _PC_NAME_MAX + _PC_NO_TRUNC + _PC_PATH_MAX + _PC_PIPE_BUF + _PC_VDISABLE + * + * The full POSIX list is given in section 5.7.1 of Std 1003.1b-1993. + * + * The SML string used to look up these values has the same + * form but without the prefix, e.g., to lookup _PC_LINK_MAX, + * use pathconf (path, "LINK_MAX") + */ +static name_val_t values[] = { +#include "ml_pathconf.h" +}; + +#define NUMELMS ((sizeof values)/(sizeof (name_val_t))) + +/* mkValue : int -> SysWord.word option + * + * Convert return value from (f)pathconf to ML value. + */ +STATIC_INLINE ml_val_t mkValue (ml_state_t *msp, int val) +{ + ml_val_t p, obj; + + if (val >= 0) { + SYSWORD_ALLOC (msp, p, val); + OPTION_SOME(msp, obj, p); + } + else if (errno == 0) { + obj = OPTION_NONE; + } + else { + obj = RAISE_SYSERR(msp, val); + } + + return obj; + +} /* end of mkValue */ + +/* _ml_P_FileSys_pathconf : string * string -> SysWord.word option + * filename attribute + * + * Get configurable pathname attribute given pathname + */ +ml_val_t _ml_P_FileSys_pathconf (ml_state_t *msp, ml_val_t arg) +{ + int val; + ml_val_t mlPathname = REC_SEL(arg, 0); + ml_val_t mlAttr = REC_SEL(arg, 1); + char *pathname = STR_MLtoC(mlPathname); + name_val_t *attr; + + attr = _ml_posix_nv_lookup (STR_MLtoC(mlAttr), values, NUMELMS); + if (!attr) { + errno = EINVAL; + return RAISE_SYSERR(msp, -1); + } + + errno = 0; + while (((val = pathconf (pathname, attr->val)) == -1) && (errno == EINTR)) { + errno = 0; + continue; + } + + return (mkValue (msp, val)); + +} /* end of _ml_P_FileSys_pathconf */ + +/* _ml_P_FileSys_fpathconf : int * string -> SysWord.word option + * fd attribute + * + * Get configurable pathname attribute given pathname + */ +ml_val_t _ml_P_FileSys_fpathconf (ml_state_t *msp, ml_val_t arg) +{ + int val; + int fd = REC_SELINT(arg, 0); + ml_val_t mlAttr = REC_SEL(arg, 1); + name_val_t *attr; + + attr = _ml_posix_nv_lookup (STR_MLtoC(mlAttr), values, NUMELMS); + if (!attr) { + errno = EINVAL; + return RAISE_SYSERR(msp, -1); + } + + errno = 0; + while (((val = fpathconf (fd, attr->val)) == -1) && (errno == EINTR)) { + errno = 0; + continue; + } + + return mkValue (msp, val); + +} /* end of _ml_P_FileSys_fpathconf */ diff --git a/base/runtime/c-libs/posix-filesys/posix-filesys-lib.c b/base/runtime/c-libs/posix-filesys/posix-filesys-lib.c new file mode 100644 index 0000000..1755ff2 --- /dev/null +++ b/base/runtime/c-libs/posix-filesys/posix-filesys-lib.c @@ -0,0 +1,28 @@ +/* posix-filesys-lib.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "c-library.h" +#include "cfun-proto-list.h" + + +/* the table of C functions and ML names */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_BIND(NAME, FUNC, MLTYPE) +PVT cfunc_binding_t CFunTable[] = { +#include "cfun-list.h" + CFUNC_NULL_BIND + }; +#undef CFUNC + + +/* the POSIX Signal library */ +c_library_t POSIX_FileSys_Library = { + CLIB_NAME, + CLIB_VERSION, + CLIB_DATE, + NIL(clib_init_fn_t), + CFunTable + }; + diff --git a/base/runtime/c-libs/posix-filesys/readdir.c b/base/runtime/c-libs/posix-filesys/readdir.c new file mode 100644 index 0000000..08d1ac1 --- /dev/null +++ b/base/runtime/c-libs/posix-filesys/readdir.c @@ -0,0 +1,44 @@ +/* readdir.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-unixdep.h" +#include +#include +#include +#include "ml-base.h" +#include "ml-values.h" +#include "tags.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_P_FileSys_readdir : object -> string + * + * Return the next filename from the directory stream. + */ +ml_val_t _ml_P_FileSys_readdir (ml_state_t *msp, ml_val_t arg) +{ + struct dirent *dirent; + + while (TRUE) { + errno = 0; + dirent = readdir(PTR_MLtoC(DIR, arg)); + if (dirent == NIL(struct dirent *)) { + if (errno != 0) /* Error occurred */ + return RAISE_SYSERR(msp, -1); + else /* End of stream */ + return ML_string0; + } + else { + char *cp = dirent->d_name; + if ((cp[0] == '.') + && ((cp[1] == '\0') || ((cp[1] == '.') && (cp[2] == '\0')))) + continue; + else + return ML_CString (msp, cp); + } + } + +} /* end of _ml_P_FileSys_readdir */ diff --git a/base/runtime/c-libs/posix-filesys/readlink.c b/base/runtime/c-libs/posix-filesys/readlink.c new file mode 100644 index 0000000..f23ff08 --- /dev/null +++ b/base/runtime/c-libs/posix-filesys/readlink.c @@ -0,0 +1,80 @@ +/* readlink.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-unixdep.h" +#include +#include +#include +#include +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + + +/* _ml_P_FileSys_readlink : string -> string + * + * Read the value of a symbolic link. + * + * The following implementation assumes that the system readlink + * fills the given buffer as much as possible, without nul-termination, + * and returns the number of bytes copied. If the buffer is not large + * enough, the return value will be at least the buffer size. In that + * case, we find out how big the link really is, allocate a buffer to + * hold it, and redo the readlink. + * + * Note that the above semantics are not those of POSIX, which requires + * null-termination on success, and only fills the buffer up to as most + * the penultimate byte even on failure. + * + * Should this be written to avoid the extra copy, using heap memory? + */ +ml_val_t _ml_P_FileSys_readlink (ml_state_t *msp, ml_val_t arg) +{ + char *path = STR_MLtoC(arg); + char buf[MAXPATHLEN]; + int len; + + len = readlink(path, buf, MAXPATHLEN); + + if (len < 0) + return RAISE_SYSERR(msp, len); + else if (len < MAXPATHLEN) { + buf[len] = '\0'; + return ML_CString (msp, buf); + } + else { /* buffer not big enough */ + char *nbuf; + ml_val_t obj; + struct stat sbuf; + int res; + int nlen; + + /* Determine how big the link text is and allocate a buffer */ + res = lstat (path, &sbuf); + if (res < 0) + return RAISE_SYSERR(msp, res); + nlen = sbuf.st_size + 1; + nbuf = MALLOC(nlen); + if (nbuf == 0) + return RAISE_ERROR(msp, "out of malloc memory"); + + /* Try the readlink again. Give up on error or if len is still bigger + * than the buffer size. + */ + len = readlink(path, buf, len); + if (len < 0) + return RAISE_SYSERR(msp, len); + else if (len >= nlen) + return RAISE_ERROR(msp, "readlink failure"); + + nbuf[len] = '\0'; + obj = ML_CString (msp, nbuf); + FREE (nbuf); + return obj; + } + +} /* end of _ml_P_FileSys_readlink */ diff --git a/base/runtime/c-libs/posix-filesys/rename.c b/base/runtime/c-libs/posix-filesys/rename.c new file mode 100644 index 0000000..8206c1c --- /dev/null +++ b/base/runtime/c-libs/posix-filesys/rename.c @@ -0,0 +1,30 @@ +/* rename.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-unixdep.h" +#include +#include +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_P_FileSys_rename : string * string -> unit + * oldname newname + * + * Change the name of a file + */ +ml_val_t _ml_P_FileSys_rename (ml_state_t *msp, ml_val_t arg) +{ + int sts; + ml_val_t oldname = REC_SEL(arg, 0); + ml_val_t newname = REC_SEL(arg, 1); + + sts = rename(STR_MLtoC(oldname), STR_MLtoC(newname)); + + CHK_RETURN_UNIT (msp, sts) + +} /* end of _ml_P_FileSys_rename */ diff --git a/base/runtime/c-libs/posix-filesys/rewinddir.c b/base/runtime/c-libs/posix-filesys/rewinddir.c new file mode 100644 index 0000000..8bc5004 --- /dev/null +++ b/base/runtime/c-libs/posix-filesys/rewinddir.c @@ -0,0 +1,27 @@ +/* rewinddir.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-unixdep.h" +#include +#include +#include "ml-base.h" +#include "ml-values.h" +#include "tags.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_P_FileSys_rewinddir : object -> unit + * + * Rewind a directory stream. + */ +ml_val_t _ml_P_FileSys_rewinddir (ml_state_t *msp, ml_val_t arg) +{ + + rewinddir(PTR_MLtoC(DIR, arg)); + + return ML_unit; + +} /* end of _ml_P_FileSys_rewinddir */ diff --git a/base/runtime/c-libs/posix-filesys/rmdir.c b/base/runtime/c-libs/posix-filesys/rmdir.c new file mode 100644 index 0000000..019b09a --- /dev/null +++ b/base/runtime/c-libs/posix-filesys/rmdir.c @@ -0,0 +1,27 @@ +/* rmdir.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-unixdep.h" +#include +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + + +/* _ml_P_FileSys_rmdir : string -> unit + * + * Remove a directory + */ +ml_val_t _ml_P_FileSys_rmdir (ml_state_t *msp, ml_val_t arg) +{ + int sts; + + sts = rmdir(STR_MLtoC(arg)); + + CHK_RETURN_UNIT(msp, sts) + +} /* end of _ml_P_FileSys_rmdir */ diff --git a/base/runtime/c-libs/posix-filesys/stat.c b/base/runtime/c-libs/posix-filesys/stat.c new file mode 100644 index 0000000..f01aaa1 --- /dev/null +++ b/base/runtime/c-libs/posix-filesys/stat.c @@ -0,0 +1,176 @@ +/* stat.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-unixdep.h" +#include +#include +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +#define MODE_BITS (S_IRWXU | S_IRWXG | S_IRWXO | S_ISUID | S_ISGID) + +#if defined(STAT_HAS_TIMESPEC) +/* convert struct timespec to nanoseconds */ +STATIC_INLINE Unsigned64_t timespec_to_ns (struct timespec *ts) +{ + return NS_PER_SEC * (Unsigned64_t)ts->tv_sec + (Unsigned64_t)ts->tv_nsec; +} +#endif + +/* mkStatRep: + * + * This makes a representation of the struct stat to be returned + * to the SML side. It is a tuple with the following fields: + * + * file_type : int + * mode : SysWord.word + * ino : SysWord.word -- should be Word64.word + * dev : SysWord.word + * nlink : SysWord.word + * uid : SysWord.word + * gid : SysWord.word + * size : Position.int (aka Int64.int) + * atime : Word64.word + * mtime : Word64.word + * ctime : Word64.word + */ +PVT ml_val_t mkStatRep (ml_state_t *msp, struct stat *buf) +{ + int ftype; + Unsigned64_t aTim, mTim, cTim; + ml_val_t mode, ino, dev, uid, gid, nlink, sr, atime, mtime, ctime, size; + +#if ((S_IFDIR != 0x4000) || (S_IFCHR != 0x2000) || (S_IFBLK != 0x6000) || (S_IFREG != 0x8000) || (S_IFIFO != 0x1000) || (S_IFLNK != 0xA000) || (S_IFSOCK != 0xC000)) + if (S_ISDIR(buf->st_mode)) ftype = 0x4000; + else if (S_ISCHR(buf->st_mode)) ftype = 0x2000; + else if (S_ISBLK(buf->st_mode)) ftype = 0x6000; + else if (S_ISREG(buf->st_mode)) ftype = 0x8000; + else if (S_ISFIFO(buf->st_mode)) ftype = 0x1000; +#ifdef S_ISLNK + else if (S_ISLNK(buf->st_mode)) ftype = 0xA000; +#endif +#ifdef S_ISSOCK + else if (S_ISSOCK(buf->st_mode)) ftype = 0xC000; +#endif + else ftype = 0; +#else + ftype = buf->st_mode & 0xF000; +#endif + + SYSWORD_ALLOC (msp, mode, (Word_t)((buf->st_mode) & MODE_BITS)); +/* + WORD64_ALLOC (msp, ino, buf->st_ino); +*/ + SYSWORD_ALLOC (msp, ino, (Word_t)(buf->st_ino)); + SYSWORD_ALLOC (msp, dev, (Word_t)(buf->st_dev)); + SYSWORD_ALLOC (msp, nlink, (Word_t)(buf->st_nlink)); + SYSWORD_ALLOC (msp, uid, (Word_t)(buf->st_uid)); + SYSWORD_ALLOC (msp, gid, (Word_t)(buf->st_gid)); + INT64_ALLOC (msp, size, buf->st_size); + +#if !defined(STAT_HAS_TIMESPEC) + /* the old API with second-level granularity */ + aTim = NS_PER_SEC * (Unsigned64_t)buf->st_atime; + mTim = NS_PER_SEC * (Unsigned64_t)buf->st_mtime; + cTim = NS_PER_SEC * (Unsigned64_t)buf->st_ctime; +#elif defined(OPSYS_DARWIN) + /* macOS uses non-standard names for the fields */ + aTim = timespec_to_ns (&buf->st_atimespec); + mTim = timespec_to_ns (&buf->st_mtimespec); + cTim = timespec_to_ns (&buf->st_ctimespec); +#else + aTim = timespec_to_ns (&buf->st_atim); + mTim = timespec_to_ns (&buf->st_mtim); + cTim = timespec_to_ns (&buf->st_ctim); +#endif + WORD64_ALLOC (msp, atime, aTim); + WORD64_ALLOC (msp, mtime, mTim); + WORD64_ALLOC (msp, ctime, cTim); + + /* allocate the stat record */ + ML_AllocWrite(msp, 0, MAKE_DESC(11, DTAG_record)); + ML_AllocWrite(msp, 1, INT_CtoML(ftype)); + ML_AllocWrite(msp, 2, mode); + ML_AllocWrite(msp, 3, ino); + ML_AllocWrite(msp, 4, dev); + ML_AllocWrite(msp, 5, nlink); + ML_AllocWrite(msp, 6, uid); + ML_AllocWrite(msp, 7, gid); + ML_AllocWrite(msp, 8, size); + ML_AllocWrite(msp, 9, atime); + ML_AllocWrite(msp, 10, mtime); + ML_AllocWrite(msp, 11, ctime); + sr = ML_Alloc(msp, 11); + + return sr; + +} /* end of mkStatRep */ + +/* _ml_P_FileSys_stat : string -> statrep + * + * Query file status given file name. + */ +ml_val_t _ml_P_FileSys_stat (ml_state_t *msp, ml_val_t arg) +{ + char *path = STR_MLtoC(arg); + int sts; + struct stat buf; + + sts = stat(path, &buf); + + if (sts < 0) { + return RAISE_SYSERR(msp, sts); + } + + return (mkStatRep(msp, &buf)); + +} /* end of _ml_P_FileSys_stat */ + +/* _ml_P_FileSys_fstat : int -> statrep + * + * Query file status given file descriptor. + */ +ml_val_t _ml_P_FileSys_fstat (ml_state_t *msp, ml_val_t arg) +{ + int fd = INT_MLtoC(arg); + int sts; + struct stat buf; + + sts = fstat(fd, &buf); + + if (sts < 0) { + return RAISE_SYSERR(msp, sts); + } + else { + return mkStatRep(msp, &buf); + } + +} /* end of _ml_P_FileSys_fstat */ + +/* _ml_P_FileSys_lstat : string -> statrep + * + * Query file status given file name, but do not follow + * symbolic links. + */ +ml_val_t _ml_P_FileSys_lstat (ml_state_t *msp, ml_val_t arg) +{ + char *path = STR_MLtoC(arg); + int sts; + struct stat buf; + + sts = lstat(path, &buf); + + if (sts < 0) { + return RAISE_SYSERR(msp, sts); + } + else { + return mkStatRep(msp, &buf); + } + +} /* end of _ml_P_FileSys_lstat */ diff --git a/base/runtime/c-libs/posix-filesys/symlink.c b/base/runtime/c-libs/posix-filesys/symlink.c new file mode 100644 index 0000000..304e7af --- /dev/null +++ b/base/runtime/c-libs/posix-filesys/symlink.c @@ -0,0 +1,30 @@ +/* symlink.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-unixdep.h" +#include +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + + +/* _ml_P_FileSys_symlink : string * string -> unit + * existing newname + * + * Creates a symbolic link from newname to existing file. + */ +ml_val_t _ml_P_FileSys_symlink (ml_state_t *msp, ml_val_t arg) +{ + int sts; + ml_val_t existing = REC_SEL(arg, 0); + ml_val_t newname = REC_SEL(arg, 1); + + sts = symlink(STR_MLtoC(existing), STR_MLtoC(newname)); + + CHK_RETURN_UNIT (msp, sts) + +} /* end of _ml_P_FileSys_symlink */ diff --git a/base/runtime/c-libs/posix-filesys/umask.c b/base/runtime/c-libs/posix-filesys/umask.c new file mode 100644 index 0000000..bdd97af --- /dev/null +++ b/base/runtime/c-libs/posix-filesys/umask.c @@ -0,0 +1,29 @@ +/* umask.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-unixdep.h" +#include +#include +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_P_FileSys_umask : SysWord.word -> SysWord.word + * + * Set and get file creation mask + * Assumes umask never fails. + */ +ml_val_t _ml_P_FileSys_umask (ml_state_t *msp, ml_val_t arg) +{ + mode_t omask; + ml_val_t p; + + omask = umask(SYSWORD_MLtoC(arg)); + SYSWORD_ALLOC (msp, p, (SysWord_t)omask); + + return p; + +} /* end of _ml_P_FileSys_umask */ diff --git a/base/runtime/c-libs/posix-filesys/unlink.c b/base/runtime/c-libs/posix-filesys/unlink.c new file mode 100644 index 0000000..67443bd --- /dev/null +++ b/base/runtime/c-libs/posix-filesys/unlink.c @@ -0,0 +1,27 @@ +/* unlink.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-unixdep.h" +#include +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + + +/* _ml_P_FileSys_unlink : string -> unit + * + * Remove directory entry + */ +ml_val_t _ml_P_FileSys_unlink (ml_state_t *msp, ml_val_t arg) +{ + int sts; + + sts = unlink(STR_MLtoC(arg)); + + CHK_RETURN_UNIT(msp, sts) + +} /* end of _ml_P_FileSys_unlink */ diff --git a/base/runtime/c-libs/posix-filesys/utime.c b/base/runtime/c-libs/posix-filesys/utime.c new file mode 100644 index 0000000..a47f6af --- /dev/null +++ b/base/runtime/c-libs/posix-filesys/utime.c @@ -0,0 +1,47 @@ +/*! \file utime.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-unixdep.h" +#include +#include +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* FIXME: utimensat is the newest way to implement this mechanism */ + +/* _ml_P_FileSys_utime : (string * Word64.word * Word64.word) -> unit + * name actime(ns) modtime(ns) + * + * Sets file access and modification times. If + * actime = -1, then set both to current time. + */ +ml_val_t _ml_P_FileSys_utime (ml_state_t *msp, ml_val_t arg) +{ + ml_val_t path = REC_SEL(arg, 0); + Unsigned64_t actime = WORD64_MLtoC(REC_SEL(arg, 1)); + Unsigned64_t modtime = WORD64_MLtoC(REC_SEL(arg, 2)); + int sts; + + if (actime == 0xffffffffffffffff) { + sts = utimes (STR_MLtoC(path), NIL(struct timeval *)); + } + else { + struct timeval times[2]; + Unsigned64_t us = actime / 1000; /* convert to microseconds */ + times[0].tv_sec = us / 1000000; + times[0].tv_usec = us % 1000000; + us = modtime / 1000; /* convert to microseconds */ + times[1].tv_usec = us % 1000000; + times[1].tv_sec = us / 1000000; + sts = utimes (STR_MLtoC(path), times); + } + + CHK_RETURN_UNIT(msp, sts) + +} /* end of _ml_P_FileSys_utime */ diff --git a/base/runtime/c-libs/posix-io/cfun-list.h b/base/runtime/c-libs/posix-io/cfun-list.h new file mode 100644 index 0000000..6a6aae3 --- /dev/null +++ b/base/runtime/c-libs/posix-io/cfun-list.h @@ -0,0 +1,30 @@ +/* cfun-list.h + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * + * This file lists the directory library of C functions that are callable by ML. + */ + +#ifndef CLIB_NAME +#define CLIB_NAME "POSIX-IO" +#define CLIB_VERSION "1.0" +#define CLIB_DATE "February 16, 1995" +#endif + +CFUNC("osval", _ml_P_IO_osval, "string -> int") +CFUNC("pipe", _ml_P_IO_pipe, "unit -> int * int") +CFUNC("dup", _ml_P_IO_dup, "int -> int") +CFUNC("dup2", _ml_P_IO_dup2, "int * int -> unit") +CFUNC("close", _ml_P_IO_close, "int -> unit") +CFUNC("read", _ml_P_IO_read, "int * int -> Word8Vector.vector") +CFUNC("readbuf", _ml_P_IO_readbuf, "int * Word8Array.array * int -> int") +CFUNC("write", _ml_P_IO_write, "int * Word8Vector.vector * int -> int") +CFUNC("writebuf", _ml_P_IO_writebuf, "int * Word8Array.array * int * int -> int") +CFUNC("fcntl_d", _ml_P_IO_fcntl_d, "int * int -> int") +CFUNC("fcntl_gfd", _ml_P_IO_fcntl_gfd, "int -> word") +CFUNC("fcntl_sfd", _ml_P_IO_fcntl_sfd, "int * word -> unit") +CFUNC("fcntl_gfl", _ml_P_IO_fcntl_gfl, "int -> word * word") +CFUNC("fcntl_sfl", _ml_P_IO_fcntl_sfl, "int * word -> unit") +CFUNC("fcntl_l", _ml_P_IO_fcntl_l, "int * int * flock_rep -> flock_rep") +CFUNC("lseek", _ml_P_IO_lseek, "int * Position.int * int -> int") +CFUNC("fsync", _ml_P_IO_fsync, "int -> unit") diff --git a/base/runtime/c-libs/posix-io/cfun-proto-list.h b/base/runtime/c-libs/posix-io/cfun-proto-list.h new file mode 100644 index 0000000..16b3e96 --- /dev/null +++ b/base/runtime/c-libs/posix-io/cfun-proto-list.h @@ -0,0 +1,18 @@ +/* cfun-proto-list.h + * + * COPYRIGHT (c) 1194 AT&T Bell Laboratories. + */ + +#ifndef _CFUN_PROTO_LIST_ +#define _CFUN_PROTO_LIST_ + +#ifndef _C_LIBRARY_ +# include "c-library.h" +#endif + +/* the external definitions for the C functions */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_PROTO(NAME, FUNC, MLTYPE) +#include "cfun-list.h" +#undef CFUNC + +#endif /* !_CFUN_PROTO_LIST_ */ diff --git a/base/runtime/c-libs/posix-io/clib-list.h b/base/runtime/c-libs/posix-io/clib-list.h new file mode 100644 index 0000000..e69de29 diff --git a/base/runtime/c-libs/posix-io/close.c b/base/runtime/c-libs/posix-io/close.c new file mode 100644 index 0000000..839a401 --- /dev/null +++ b/base/runtime/c-libs/posix-io/close.c @@ -0,0 +1,25 @@ +/* close.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include + +/* _ml_P_IO_close : int -> unit + * + * Duplicate an open file descriptor + */ +ml_val_t _ml_P_IO_close (ml_state_t *msp, ml_val_t arg) +{ + int sts, fd = INT_MLtoC(arg); + + sts = close(fd); + + CHK_RETURN_UNIT(msp, sts) + +} /* end of _ml_P_IO_close */ diff --git a/base/runtime/c-libs/posix-io/dup.c b/base/runtime/c-libs/posix-io/dup.c new file mode 100644 index 0000000..d484f69 --- /dev/null +++ b/base/runtime/c-libs/posix-io/dup.c @@ -0,0 +1,26 @@ +/* dup.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include + +/* _ml_P_IO_dup : int -> int + * + * Duplicate an open file descriptor + */ +ml_val_t _ml_P_IO_dup (ml_state_t *msp, ml_val_t arg) +{ + int fd0 = INT_MLtoC(arg); + int fd1; + + fd1 = dup(fd0); + + CHK_RETURN(msp, fd1) + +} /* end of _ml_P_IO_dup */ diff --git a/base/runtime/c-libs/posix-io/dup2.c b/base/runtime/c-libs/posix-io/dup2.c new file mode 100644 index 0000000..8a9030e --- /dev/null +++ b/base/runtime/c-libs/posix-io/dup2.c @@ -0,0 +1,27 @@ +/* dup2.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include + +/* _ml_P_IO_dup2 : int * int -> unit + * + * Duplicate an open file descriptor + */ +ml_val_t _ml_P_IO_dup2 (ml_state_t *msp, ml_val_t arg) +{ + int sts; + int fd0 = REC_SELINT(arg, 0); + int fd1 = REC_SELINT(arg, 1); + + sts = dup2(fd0, fd1); + + CHK_RETURN_UNIT(msp,sts) + +} /* end of _ml_P_IO_dup2 */ diff --git a/base/runtime/c-libs/posix-io/fcntl_d.c b/base/runtime/c-libs/posix-io/fcntl_d.c new file mode 100644 index 0000000..b6650e9 --- /dev/null +++ b/base/runtime/c-libs/posix-io/fcntl_d.c @@ -0,0 +1,29 @@ +/* fcntl_d.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-unixdep.h" +#include +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_P_IO_fcntl_d : int * int -> int + * + * Duplicate an open file descriptor + */ +ml_val_t _ml_P_IO_fcntl_d (ml_state_t *msp, ml_val_t arg) +{ + int fd; + int fd0 = REC_SELINT(arg, 0); + int fd1 = REC_SELINT(arg, 1); + + fd = fcntl(fd0, F_DUPFD, fd1); + + CHK_RETURN(msp, fd) + +} /* end of _ml_P_IO_fcntl_d */ diff --git a/base/runtime/c-libs/posix-io/fcntl_gfd.c b/base/runtime/c-libs/posix-io/fcntl_gfd.c new file mode 100644 index 0000000..7821d83 --- /dev/null +++ b/base/runtime/c-libs/posix-io/fcntl_gfd.c @@ -0,0 +1,32 @@ +/* fcntl_gfd.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-unixdep.h" +#include +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_P_IO_fcntl_gfd : int -> SysWord.word + * + * Get the close-on-exec flag associated with the file descriptor. + */ +ml_val_t _ml_P_IO_fcntl_gfd (ml_state_t *msp, ml_val_t arg) +{ + int flag; + ml_val_t v; + + flag = fcntl(INT_MLtoC(arg), F_GETFD); + + if (flag == -1) { + return RAISE_SYSERR(msp, flag); + } + else { + SYSWORD_ALLOC (msp, v, (SysWord_t)flag); + return v; + } + +} /* end of _ml_P_IO_fcntl_gfd */ diff --git a/base/runtime/c-libs/posix-io/fcntl_gfl.c b/base/runtime/c-libs/posix-io/fcntl_gfl.c new file mode 100644 index 0000000..4d75c7c --- /dev/null +++ b/base/runtime/c-libs/posix-io/fcntl_gfl.c @@ -0,0 +1,34 @@ +/* fcntl_gfl.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-unixdep.h" +#include +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_P_IO_fcntl_gfl : int -> SysWord.word * SysWord.word + * + * Get file status flags and file access modes. + */ +ml_val_t _ml_P_IO_fcntl_gfl (ml_state_t *msp, ml_val_t arg) +{ + int fd = INT_MLtoC(arg); + int flag; + ml_val_t flags, mode, obj; + + flag = fcntl(fd, F_GETFD); + + if (flag < 0) + return RAISE_SYSERR(msp, flag); + + SYSWORD_ALLOC (msp, flags, (flag & (~O_ACCMODE))); + SYSWORD_ALLOC (msp, mode, (flag & O_ACCMODE)); + REC_ALLOC2(msp, obj, flags, mode); + + return obj; + +} /* end of _ml_P_IO_fcntl_gfl */ diff --git a/base/runtime/c-libs/posix-io/fcntl_l.c b/base/runtime/c-libs/posix-io/fcntl_l.c new file mode 100644 index 0000000..96ff7d6 --- /dev/null +++ b/base/runtime/c-libs/posix-io/fcntl_l.c @@ -0,0 +1,58 @@ +/* fcntl_l.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-unixdep.h" +#include +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_P_IO_fcntl_l : int * int * flock_rep -> flock_rep + * + * where + * + * flock_rep = int * int * Position.int * Position.int * int + * + * Handle record locking. + */ +ml_val_t _ml_P_IO_fcntl_l (ml_state_t *msp, ml_val_t arg) +{ + int fd = REC_SELINT(arg, 0); + int cmd = REC_SELINT(arg, 1); + ml_val_t flock_rep = REC_SEL(arg, 2), obj; + ml_val_t start = REC_SEL(flock_rep, 2); + ml_val_t length = REC_SEL(flock_rep, 3); + struct flock flock; + int sts; + + flock.l_type = REC_SELINT(flock_rep, 0); + flock.l_whence = REC_SELINT(flock_rep, 1); + flock.l_start = (off_t)INT64_MLtoC(start); + flock.l_len = (off_t)INT64_MLtoC(length); + + sts = fcntl(fd, cmd, &flock); + + if (sts < 0) { + return RAISE_SYSERR(msp, sts); + } + + /* allocate the 64-bit start and length values */ + INT64_ALLOC(msp, start, flock.l_start) + INT64_ALLOC(msp, length, flock.l_len) + + ML_AllocWrite (msp, 0, MAKE_DESC (DTAG_record, 5)); + ML_AllocWrite (msp, 1, INT_CtoML(flock.l_type)); + ML_AllocWrite (msp, 2, INT_CtoML(flock.l_whence)); + ML_AllocWrite (msp, 3, start); + ML_AllocWrite (msp, 4, length); + ML_AllocWrite (msp, 5, INT_CtoML(flock.l_pid)); + obj = ML_Alloc (msp, 5); + + return obj; + +} /* end of _ml_P_IO_fcntl_l */ diff --git a/base/runtime/c-libs/posix-io/fcntl_sfd.c b/base/runtime/c-libs/posix-io/fcntl_sfd.c new file mode 100644 index 0000000..23e3648 --- /dev/null +++ b/base/runtime/c-libs/posix-io/fcntl_sfd.c @@ -0,0 +1,28 @@ +/* fcntl_sfd.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-unixdep.h" +#include +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_P_IO_fcntl_sfd : int * SysWord.word -> unit + * + * Set the close-on-exec flag associated with the file descriptor. + */ +ml_val_t _ml_P_IO_fcntl_sfd (ml_state_t *msp, ml_val_t arg) +{ + int sts; + int fd0 = REC_SELINT(arg, 0); + ml_val_t ml_flag = REC_SEL(arg, 1); + SysWord_t flag = SYSWORD_MLtoC(ml_flag); + + sts = fcntl(fd0, F_SETFD, flag); + + CHK_RETURN_UNIT(msp, sts) + +} /* end of _ml_P_IO_fcntl_sfd */ diff --git a/base/runtime/c-libs/posix-io/fcntl_sfl.c b/base/runtime/c-libs/posix-io/fcntl_sfl.c new file mode 100644 index 0000000..32f3bec --- /dev/null +++ b/base/runtime/c-libs/posix-io/fcntl_sfl.c @@ -0,0 +1,30 @@ +/* fcntl_sfl.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-unixdep.h" +#include +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_P_IO_fcntl_sfl : int * word -> unit + * + * Set file status flags + */ +ml_val_t _ml_P_IO_fcntl_sfl (ml_state_t *msp, ml_val_t arg) +{ + int sts; + int fd0 = REC_SELINT(arg, 0); + ml_val_t ml_flag = REC_SEL(arg, 1); + SysWord_t flag = SYSWORD_MLtoC(ml_flag); + + sts = fcntl(fd0, F_SETFL, flag); + + CHK_RETURN_UNIT(msp,sts) + +} /* end of _ml_P_IO_fcntl_sfl */ diff --git a/base/runtime/c-libs/posix-io/fsync.c b/base/runtime/c-libs/posix-io/fsync.c new file mode 100644 index 0000000..6ca9f0c --- /dev/null +++ b/base/runtime/c-libs/posix-io/fsync.c @@ -0,0 +1,25 @@ +/* fsync.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include + +/* _ml_P_IO_fsync : int -> unit + * + * Duplicate an open file descriptor + */ +ml_val_t _ml_P_IO_fsync (ml_state_t *msp, ml_val_t arg) +{ + int sts, fd = INT_MLtoC(arg); + + sts = fsync(fd); + + CHK_RETURN_UNIT(msp, sts) + +} /* end of _ml_P_IO_fsync */ diff --git a/base/runtime/c-libs/posix-io/lseek.c b/base/runtime/c-libs/posix-io/lseek.c new file mode 100644 index 0000000..0b14853 --- /dev/null +++ b/base/runtime/c-libs/posix-io/lseek.c @@ -0,0 +1,38 @@ +/* lseek.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-unixdep.h" +#include INCLUDE_TYPES_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_P_IO_lseek : int * Position.int * int -> Position.int + * + * Move read/write file pointer. + */ +ml_val_t _ml_P_IO_lseek (ml_state_t *msp, ml_val_t arg) +{ + Int_t fd = REC_SELINT(arg, 0); + ml_val_t box_offset = REC_SEL(arg, 1); + off_t offset = (off_t)INT64_MLtoC(box_offset); + off_t pos; + Int_t whence = REC_SELINT(arg, 2); + ml_val_t box_pos; + + pos = lseek(fd, offset, whence); + + if (pos < 0) { + RAISE_SYSERR (msp, (int)pos); + } + + INT64_ALLOC(msp, box_pos, pos); + + return box_pos; + +} /* end of _ml_P_IO_lseek */ diff --git a/base/runtime/c-libs/posix-io/makefile b/base/runtime/c-libs/posix-io/makefile new file mode 100644 index 0000000..c871d15 --- /dev/null +++ b/base/runtime/c-libs/posix-io/makefile @@ -0,0 +1,55 @@ +# +# a template makefile for a C function library +# + +SHELL = /bin/sh + +INC_DIR = ../../include +CLIB_DIR = ../ + +INCLUDES = -I$(INC_DIR) -I$(CLIB_DIR) -I../../objs + +MAKE = make +AR = ar +ARFLAGS = rcv +RANLIB = ranlib + +LIBRARY = libposix-io.a + +VERSION = v-dummy + +OBJS = posix-io-lib.o \ + osval.o \ + pipe.o \ + dup.o \ + dup2.o \ + close.o \ + read.o \ + readbuf.o \ + write.o \ + writebuf.o \ + fcntl_d.o \ + fcntl_gfd.o \ + fcntl_sfd.o \ + fcntl_gfl.o \ + fcntl_sfl.o \ + fcntl_l.o \ + lseek.o \ + fsync.o + +$(LIBRARY) : $(VERSION) $(OBJS) + rm -rf $(LIBRARY) + $(AR) $(ARFLAGS) $(LIBRARY) $(OBJS) + $(RANLIB) $(LIBRARY) + +$(VERSION) : + ($(MAKE) MAKE="$(MAKE)" clean) + echo "$(VERSION)" > $(VERSION) + +.c.o: $(INC_DIR)/ml-unixdep.h $(INC_DIR)/ml-base.h $(INC_DIR)/ml-values.h \ + $(CLIB_DIR)/ml-c.h cfun-proto-list.h cfun-list.h + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) -c $< + +clean : + rm -f v-* *.o $(LIBRARY) + diff --git a/base/runtime/c-libs/posix-io/osval.c b/base/runtime/c-libs/posix-io/osval.c new file mode 100644 index 0000000..f5ef45a --- /dev/null +++ b/base/runtime/c-libs/posix-io/osval.c @@ -0,0 +1,64 @@ +/* osval.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-unixdep.h" +#include +#include +#include "ml-base.h" +#include "ml-values.h" +#include "tags.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include "../posix-error/posix-name-val.h" + +static name_val_t values [] = { + {"F_GETLK", F_GETLK}, + {"F_RDLCK", F_RDLCK}, + {"F_SETLK", F_SETLK}, + {"F_SETLKW", F_SETLKW}, + {"F_UNLCK", F_UNLCK}, + {"F_WRLCK", F_WRLCK}, + {"SEEK_CUR", SEEK_CUR}, + {"SEEK_END", SEEK_END}, + {"SEEK_SET", SEEK_SET}, + {"append", O_APPEND}, + {"cloexec", FD_CLOEXEC}, +#ifdef O_DSYNC + {"dsync", O_DSYNC}, +#else + {"dsync", 0}, +#endif + {"nonblock", O_NONBLOCK}, +#ifdef O_RSYNC + {"rsync", O_RSYNC}, +#else + {"rsync", 0}, +#endif +#ifdef O_SYNC + {"sync", O_SYNC}, +#else + {"sync", 0}, +#endif +}; + +#define NUMELMS ((sizeof values)/(sizeof (name_val_t))) + +/* _ml_P_IO_osval : string -> int + * + * Return the OS-dependent, compile-time constant specified by the string. + */ +ml_val_t _ml_P_IO_osval (ml_state_t *msp, ml_val_t arg) +{ + name_val_t *res; + + res = _ml_posix_nv_lookup (STR_MLtoC(arg), values, NUMELMS); + if (res) + return INT_CtoML(res->val); + else { + return RAISE_ERROR(msp, "system constant not defined"); + } + +} /* end of _ml_P_IO_osval */ diff --git a/base/runtime/c-libs/posix-io/pipe.c b/base/runtime/c-libs/posix-io/pipe.c new file mode 100644 index 0000000..e66fe62 --- /dev/null +++ b/base/runtime/c-libs/posix-io/pipe.c @@ -0,0 +1,29 @@ +/* pipe.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include + +/* _ml_P_IO_pipe : unit -> int * int + * + * Create a pipe and return its input and output descriptors. + */ +ml_val_t _ml_P_IO_pipe (ml_state_t *msp, ml_val_t arg) +{ + int fds[2]; + + if (pipe(fds) == -1) + return RAISE_SYSERR(msp, -1); + else { + ml_val_t obj; + REC_ALLOC2 (msp, obj, INT_CtoML(fds[0]), INT_CtoML(fds[1])); + return obj; + } + +} /* end of _ml_P_IO_pipe */ diff --git a/base/runtime/c-libs/posix-io/posix-io-lib.c b/base/runtime/c-libs/posix-io/posix-io-lib.c new file mode 100644 index 0000000..501d411 --- /dev/null +++ b/base/runtime/c-libs/posix-io/posix-io-lib.c @@ -0,0 +1,28 @@ +/* posix-io-lib.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "c-library.h" +#include "cfun-proto-list.h" + + +/* the table of C functions and ML names */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_BIND(NAME, FUNC, MLTYPE) +PVT cfunc_binding_t CFunTable[] = { +#include "cfun-list.h" + CFUNC_NULL_BIND + }; +#undef CFUNC + + +/* the POSIX Signal library */ +c_library_t POSIX_IO_Library = { + CLIB_NAME, + CLIB_VERSION, + CLIB_DATE, + NIL(clib_init_fn_t), + CFunTable + }; + diff --git a/base/runtime/c-libs/posix-io/read.c b/base/runtime/c-libs/posix-io/read.c new file mode 100644 index 0000000..f2c3ceb --- /dev/null +++ b/base/runtime/c-libs/posix-io/read.c @@ -0,0 +1,47 @@ +/* read.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-unixdep.h" +#include +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_P_IO_read : (int * int) -> Word8Vector.vector + * fd nbytes + * + * Read the specified number of bytes from the specified file, + * returning them in a vector. + */ +ml_val_t _ml_P_IO_read (ml_state_t *msp, ml_val_t arg) +{ + int fd = REC_SELINT(arg, 0); + int nbytes = REC_SELINT(arg, 1); + ml_val_t vec, res; + int n; + + if (nbytes == 0) + return ML_string0; + + /* allocate the vector; note that this might cause a GC */ + vec = ML_AllocRaw (msp, BYTES_TO_WORDS(nbytes)); + n = read (fd, PTR_MLtoC(char, vec), nbytes); + if (n < 0) + return RAISE_SYSERR(msp, n); + else if (n == 0) + return ML_string0; + + if (n < nbytes) { + /* we need to shrink the vector */ + ML_ShrinkRaw (msp, vec, BYTES_TO_WORDS(n)); + } + + SEQHDR_ALLOC (msp, res, DESC_string, vec, n); + + return res; + +} /* end of _ml_P_IO_read */ diff --git a/base/runtime/c-libs/posix-io/readbuf.c b/base/runtime/c-libs/posix-io/readbuf.c new file mode 100644 index 0000000..6ed1664 --- /dev/null +++ b/base/runtime/c-libs/posix-io/readbuf.c @@ -0,0 +1,31 @@ +/* readbuf.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_P_IO_readbuf : (int * Word8Array.array * int * int) -> int + * fd data nbytes start + * + * Read nbytes of data from the specified file into the given array, + * starting at start. Return the number of bytes read. Assume bounds + * have been checked. + */ +ml_val_t _ml_P_IO_readbuf (ml_state_t *msp, ml_val_t arg) +{ + int fd = REC_SELINT(arg, 0); + ml_val_t buf = REC_SEL(arg, 1); + int nbytes = REC_SELINT(arg, 2); + char *start = STR_MLtoC(buf) + REC_SELINT(arg, 3); + int n; + + n = read (fd, start, nbytes); + + CHK_RETURN (msp, n) + +} /* end of _ml_P_IO_readbuf */ + diff --git a/base/runtime/c-libs/posix-io/write.c b/base/runtime/c-libs/posix-io/write.c new file mode 100644 index 0000000..56daf44 --- /dev/null +++ b/base/runtime/c-libs/posix-io/write.c @@ -0,0 +1,30 @@ +/* write.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-unixdep.h" +#include +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + + +/* _ml_P_IO_write : (int * Word8Vector.vector * int) -> int + * + * Write the number of bytes of data from the given vector, + * starting at index 0, to the specified file. Return the + * number of bytes written. Assume bounds checks have been done. + */ +ml_val_t _ml_P_IO_write (ml_state_t *msp, ml_val_t arg) +{ + int fd = REC_SELINT(arg, 0); + ml_val_t data = REC_SEL(arg, 1); + size_t nbytes = REC_SELINT(arg, 2); + ssize_t n; + + n = write (fd, STR_MLtoC(data), nbytes); + + CHK_RETURN (msp, n) + +} /* end of _ml_P_IO_write */ diff --git a/base/runtime/c-libs/posix-io/writebuf.c b/base/runtime/c-libs/posix-io/writebuf.c new file mode 100644 index 0000000..fa908dd --- /dev/null +++ b/base/runtime/c-libs/posix-io/writebuf.c @@ -0,0 +1,32 @@ +/* writebuf.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-unixdep.h" +#include +#include "ml-base.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_P_IO_writebuf : (int * Word8Array.array * int * int) -> int + * fd data nbytes start + * + * Write nbytes of data from the given array to the specified file, + * starting at the given offset. Assume bounds have been checked. + */ +ml_val_t _ml_P_IO_writebuf (ml_state_t *msp, ml_val_t arg) +{ + int fd = REC_SELINT(arg, 0); + ml_val_t start = REC_SEL(arg, 1); + size_t nbytes = REC_SELINT(arg, 2); + char *data = STR_MLtoC(start) + REC_SELINT(arg, 3); + ssize_t n; + + n = write (fd, data, nbytes); + + CHK_RETURN (msp, n) + +} /* end of _ml_P_IO_writebuf */ + diff --git a/base/runtime/c-libs/posix-os/OLDselect.c b/base/runtime/c-libs/posix-os/OLDselect.c new file mode 100644 index 0000000..7aee8be --- /dev/null +++ b/base/runtime/c-libs/posix-os/OLDselect.c @@ -0,0 +1,224 @@ +/* select.c + * + * COPYRIGHT (c) 1994 by AT&T Bell Laboratories. + */ + +#include "ml-osdep.h" +#if defined(HAS_SELECT) +#include +#include +#elif defined(HAS_POLL) +#include +#include +#endif +#include +#include +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "tags.h" +#include "ml-state.h" +#include "ml-signal.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +#ifdef HAS_SELECT +PVT fd_set *ListToFDSet (ml_val_t fdl, fd_set *fds, int *width); +PVT ml_val_t FDSetToList (ml_state_t *msp, fd_set *fds, int width); +#endif + + +/* _ml_IO_select : (int list * int list * int list * (int * int) option) + * -> (int list * int list * int list) + * + * Check file descriptors for the readiness of I/O operations. + */ +ml_val_t _ml_IO_select (ml_state_t *msp, ml_val_t arg) +{ +#if ((! defined(HAS_SELECT)) && (! defined(HAS_POLL))) + return RAISE_ERROR (msp, "SMLNJ-IO.select unsupported"); +#else + ml_val_t rl = REC_SEL(arg, 0); + ml_val_t wl = REC_SEL(arg, 1); + ml_val_t el = REC_SEL(arg, 2); + ml_val_t timeout = REC_SEL(arg, 3); +#ifdef HAS_SELECT + fd_set rset, wset, eset; + fd_set *rfds, *wfds, *efds; + int width = 0, sts; + struct timeval t, *tp; + + rfds = ListToFDSet (rl, &rset, &width); + wfds = ListToFDSet (wl, &wset, &width); + efds = ListToFDSet (el, &eset, &width); + + if (isBOXED(timeout)) { + timeout = REC_SEL(timeout, 0); /* strip the SOME */ + t.tv_sec = REC_SELINT(timeout, 0); + t.tv_usec = REC_SELINT(timeout, 1); + tp = &t; + } + else + tp = 0; + +#else /* HAS_POLL */ + struct pollfd *fds; + int nr, nw, ne, nfds, i, t, sts; + +#define COUNT(cntr, l) { \ + ml_val_t __p = (l); \ + for (cntr = 0; __p != LIST_nil; __p = LIST_tl(__p)) \ + cntr++; \ + } +#define INSERT(req, l) { \ + ml_val_t __p = (l); \ + while (__p != LIST_nil) { \ + fds[i].fd = INT_MLtoC(LIST_hd(__p)); \ + fds[i].events = (req); \ + i++; \ + __p = LIST_tl(__p); \ + } \ + } + + COUNT(nr, rl); + COUNT(nw, wl); + COUNT(ne, el); + nfds = nr+nw+ne; + fds = NEW_VEC(struct pollfd, nfds); + i = 0; + INSERT(POLLIN, rl); + INSERT(POLLOUT, wl); +#ifdef POLLMSG + INSERT(POLLRDBAND|POLLPRI|POLLMSG|POLLHUP, el); +#else + INSERT(POLLRDBAND|POLLPRI|POLLHUP, el); +#endif + + if (isBOXED(timeout)) { + long sec, usec; + timeout = REC_SEL(timeout, 0); /* strip the SOME */ + sec = REC_SELINT(timeout, 0); + usec = REC_SELINT(timeout, 1); + t = (usec/1000 + sec*1000); + } + else + t = INFTIM; +#endif + + if (msp->ml_inSigHandler || msp->ml_maskSignals + || ((! SETJMP (msp->ml_syscallEnv)) && + (((msp->ml_ioWaitFlag = TRUE), (msp->ml_numPendingSigs == 0))))) + { +#ifdef HAS_SELECT + DO_SYSCALL (select (width, rfds, wfds, efds, tp), sts); +#else /* HAS_POLL */ + DO_SYSCALL (poll (fds, nfds, t), sts); +#endif + msp->ml_ioWaitFlag = FALSE; + } + else { +#ifdef HAS_POLL + FREE (fds); +#endif + BackupMLCont(msp); + /* re-enable signals */ + RESET_SIG_MASK(); + return msp->ml_arg; + } + + if (sts == -1) { +#ifdef HAS_POLL + FREE (fds); +#endif + return RAISE_SYSERR (msp, sts); + } + else { + ml_val_t rfdl, wfdl, efdl, res; + + if (sts == 0) + rfdl = wfdl = efdl = LIST_nil; + else { +#ifdef HAS_SELECT + rfdl = FDSetToList (msp, rfds, width); + wfdl = FDSetToList (msp, wfds, width); + efdl = FDSetToList (msp, efds, width); +#else /* HAS_POLL */ +#define BUILD_RESULT(l,n) { \ + l = LIST_nil; \ + while ((sts > 0) && (n > 0)) { \ + if (fds[i].revents != 0) { \ + sts--; \ + LIST_cons(msp, l, INT_CtoML(fds[i].fd), l); \ + } \ + n--; i++; \ + } \ + } + i = 0; + BUILD_RESULT(rfdl, nr); + BUILD_RESULT(wfdl, nw); + BUILD_RESULT(efdl, ne); +#endif + } + REC_ALLOC3 (msp, res, rfdl, wfdl, efdl); + +#ifdef HAS_POLL + FREE (fds); +#endif + + return res; + } +#endif +} /* end of _ml_IO_select */ + + +#ifdef HAS_SELECT + +/* ListToFDSet: + * + * Map a ML list of file descriptors to a fd_set. + */ +PVT fd_set *ListToFDSet (ml_val_t fdl, fd_set *fds, int *width) +{ + register int fd, maxfd = -1; + + FD_ZERO(fds); + while (fdl != LIST_nil) { + fd = INT_MLtoC(LIST_hd(fdl)); + if (fd > maxfd) + maxfd = fd; + FD_SET (fd, fds); + fdl = LIST_tl(fdl); + } + + if (maxfd >= 0) { + if (maxfd >= *width) + *width = maxfd+1; + return fds; + } + else + return (fd_set *)0; + +} /* end of ListToFDSet */ + +/* FDSetToList: + * + * Map a fd_set to a ML list of ready file descriptors. + */ +PVT ml_val_t FDSetToList (ml_state_t *msp, fd_set *fds, int width) +{ + register ml_val_t p; + register int i; + + if (fds == NIL(fd_set *)) + return LIST_nil; + + for (i = 0, p = LIST_nil; i < width; i++) { + if (FD_ISSET(i, fds)) + LIST_cons (msp, p, INT_CtoML(i), p); + } + + return p; + +} /* end of FDSetToList */ + +#endif /* HAS_SELECT */ diff --git a/base/runtime/c-libs/posix-os/README b/base/runtime/c-libs/posix-os/README new file mode 100644 index 0000000..0a9a4fe --- /dev/null +++ b/base/runtime/c-libs/posix-os/README @@ -0,0 +1,4 @@ +This directory contains C functions for implementing parts of the generic +OS interface on Unix systems that support Posix 1003.1b. These functions +support operations that are not part of Posix, but are present on most Unix +systems. diff --git a/base/runtime/c-libs/posix-os/cfun-list.h b/base/runtime/c-libs/posix-os/cfun-list.h new file mode 100644 index 0000000..a56d47a --- /dev/null +++ b/base/runtime/c-libs/posix-os/cfun-list.h @@ -0,0 +1,16 @@ +/* cfun-list.h + * + * COPYRIGHT (c) 1994 AT&T Bell Laboratories. + * + * This file lists the directory library of C functions that are callable by ML. + */ + +#ifndef CLIB_NAME +#define CLIB_NAME "POSIX-OS" +#define CLIB_VERSION "1.0" +#define CLIB_DATE "December 21, 1995" +#endif + +CFUNC("poll", _ml_OS_poll, "((int * word) list * (int * int) option) -> (int * word) list") +CFUNC("tmpname", _ml_OS_tmpname, "unit -> string") + diff --git a/base/runtime/c-libs/posix-os/cfun-proto-list.h b/base/runtime/c-libs/posix-os/cfun-proto-list.h new file mode 100644 index 0000000..16b3e96 --- /dev/null +++ b/base/runtime/c-libs/posix-os/cfun-proto-list.h @@ -0,0 +1,18 @@ +/* cfun-proto-list.h + * + * COPYRIGHT (c) 1194 AT&T Bell Laboratories. + */ + +#ifndef _CFUN_PROTO_LIST_ +#define _CFUN_PROTO_LIST_ + +#ifndef _C_LIBRARY_ +# include "c-library.h" +#endif + +/* the external definitions for the C functions */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_PROTO(NAME, FUNC, MLTYPE) +#include "cfun-list.h" +#undef CFUNC + +#endif /* !_CFUN_PROTO_LIST_ */ diff --git a/base/runtime/c-libs/posix-os/makefile b/base/runtime/c-libs/posix-os/makefile new file mode 100644 index 0000000..7e50736 --- /dev/null +++ b/base/runtime/c-libs/posix-os/makefile @@ -0,0 +1,41 @@ +# +# the makefile for the generic OS support library +# + +SHELL = /bin/sh + +INC_DIR = ../../include +CLIB_DIR = ../ + +INCLUDES = -I$(INC_DIR) -I$(CLIB_DIR) -I../../objs + +MAKE = make +AR = ar +ARFLAGS = rcv +RANLIB = ranlib + +LIBRARY = libposix-os.a + +VERSION = v-dummy + +OBJS = posix-os-lib.o \ + poll.o \ + tmpname.o + +$(LIBRARY) : $(VERSION) $(OBJS) + rm -rf $(LIBRARY) + $(AR) $(ARFLAGS) $(LIBRARY) $(OBJS) + $(RANLIB) $(LIBRARY) + +$(VERSION) : + ($(MAKE) MAKE="$(MAKE)" clean) + echo "$(VERSION)" > $(VERSION) + +.c.o: $(INC_DIR)/ml-unixdep.h $(INC_DIR)/ml-base.h $(INC_DIR)/ml-values.h \ + $(INC_DIR)/ml-objects.h \ + $(CLIB_DIR)/ml-c.h cfun-proto-list.h cfun-list.h + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) -c $< + +clean : + rm -f v-* *.o $(LIBRARY) + diff --git a/base/runtime/c-libs/posix-os/poll.c b/base/runtime/c-libs/posix-os/poll.c new file mode 100644 index 0000000..e207621 --- /dev/null +++ b/base/runtime/c-libs/posix-os/poll.c @@ -0,0 +1,220 @@ +/* poll.c + * + * COPYRIGHT (c) 1994 by AT&T Bell Laboratories. + * + * The run-time code for OS.IO.poll. Note that this implementation should + * satisfy the following two properties: + * + * 1) the list of return items should be in the same order as the + * corresponding list of arguments. + * + * 2) return items should contain no more information than was queried for + * (this matters when the same descriptor is covered by multiple items). + */ + +#include "ml-unixdep.h" +#if defined(HAS_SELECT) +# include INCLUDE_TYPES_H +# include INCLUDE_TIME_H +#elif defined(HAS_POLL) +# include +# include +#else +# error no support for I/O polling +#endif +#include INCLUDE_TIME_H +#include "ml-base.h" +#include "ml-c.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "cfun-proto-list.h" + +/* bit masks for polling descriptors (see src/sml-nj/boot/Unix/os-io.sml) */ +#define RD_BIT 0x1 +#define WR_BIT 0x2 +#define ERR_BIT 0x4 + +PVT ml_val_t ML_Poll (ml_state_t *msp, ml_val_t pollList, struct timeval *timeout); + + +/* _ml_OS_poll : ((int * word) list * (Int32.int * int) option) -> (int * word) list + */ +ml_val_t _ml_OS_poll (ml_state_t *msp, ml_val_t arg) +{ + ml_val_t pollList = REC_SEL(arg, 0); + ml_val_t timeout = REC_SEL(arg, 1); + struct timeval tv, *tvp; + + if (timeout == OPTION_NONE) + tvp = NIL(struct timeval *); + else { + timeout = OPTION_get(timeout); + tv.tv_sec = REC_SELINT32(timeout, 0); + tv.tv_usec = REC_SELINT(timeout, 1); + tvp = &tv; + } + + return ML_Poll (msp, pollList, tvp); + +} /* end of _ml_OS_poll */ + + +#ifdef HAS_POLL + +#ifdef POLLMSG +#define POLL_ERROR (POLLRDBAND|POLLPRI|POLLHUP|POLLMSG) +#else +#define POLL_ERROR (POLLRDBAND|POLLPRI|POLLHUP) +#endif + +/* ML_Poll: + * + * The version of the polling operation for systems that provide SVR4 polling. + */ +PVT ml_val_t ML_Poll (ml_state_t *msp, ml_val_t pollList, struct timeval *timeout) +{ + int tout, sts; + struct pollfd *fds, *fdp; + int nfds, i, flag; + ml_val_t l, item; + + if (timeout == NIL(struct timeval *)) + tout = -1; + else + /* convert to miliseconds */ + tout = (timeout->tv_sec * 1000) + (timeout->tv_usec / 1000); + + /* count the number of polling items */ + for (l = pollList, nfds = 0; l != LIST_nil; l = LIST_tl(l)) + nfds++; + + /* allocate the fds vector */ + fds = NEW_VEC(struct pollfd, nfds); + CLEAR_MEM (fds, sizeof(struct pollfd)*nfds); + + /* initialize the polling descriptors */ + for (l = pollList, fdp = fds; l != LIST_nil; l = LIST_tl(l), fdp++) { + item = LIST_hd(l); + fdp->fd = REC_SELINT(item, 0); + flag = REC_SELINT(item, 1); + if ((flag & RD_BIT) != 0) + fdp->events |= POLLIN; + if ((flag & WR_BIT) != 0) + fdp->events |= POLLOUT; + if ((flag & ERR_BIT) != 0) + fdp->events |= POLL_ERROR; + } + + sts = poll (fds, nfds, tout); + + if (sts < 0) { + FREE(fds); + return RAISE_SYSERR(msp, sts); + } + else { + for (i = nfds-1, l = LIST_nil; i >= 0; i--) { + fdp = &(fds[i]); + if (fdp->revents != 0) { + flag = 0; + if ((fdp->revents & POLLIN) != 0) + flag |= RD_BIT; + if ((fdp->revents & POLLOUT) != 0) + flag |= WR_BIT; + if ((fdp->revents & POLL_ERROR) != 0) + flag |= ERR_BIT; + REC_ALLOC2(msp, item, INT_CtoML(fdp->fd), INT_CtoML(flag)); + LIST_cons(msp, l, item, l); + } + } + FREE(fds); + return l; + } + +} /* end of ML_Poll */ + +#else /* HAS_SELECT */ + +/* ML_Poll: + * + * The version of the polling operation for systems that provide BSD select. + */ +PVT ml_val_t ML_Poll (ml_state_t *msp, ml_val_t pollList, struct timeval *timeout) +{ + fd_set rset, wset, eset; + fd_set *rfds, *wfds, *efds; + int maxFD, sts, fd, flag; + ml_val_t l, item; + + rfds = wfds = efds = NIL(fd_set *); + maxFD = 0; + for (l = pollList; l != LIST_nil; l = LIST_tl(l)) { + item = LIST_hd(l); + fd = REC_SELINT(item, 0); + flag = REC_SELINT(item, 1); + if ((flag & RD_BIT) != 0) { + if (rfds == NIL(fd_set *)) { + rfds = &rset; + FD_ZERO(rfds); + } + FD_SET (fd, rfds); + } + if ((flag & WR_BIT) != 0) { + if (wfds == NIL(fd_set *)) { + wfds = &wset; + FD_ZERO(wfds); + } + FD_SET (fd, wfds); + } + if ((flag & ERR_BIT) != 0) { + if (efds == NIL(fd_set *)) { + efds = &eset; + FD_ZERO(efds); + } + FD_SET (fd, efds); + } + if (fd > maxFD) maxFD = fd; + } + + sts = select (maxFD+1, rfds, wfds, efds, timeout); + + if (sts < 0) + return RAISE_SYSERR(msp, sts); + else if (sts == 0) + return LIST_nil; + else { + ml_val_t *resVec = NEW_VEC(ml_val_t, sts); + int i, resFlag; + + for (i = 0, l = pollList; l != LIST_nil; l = LIST_tl(l)) { + item = LIST_hd(l); + fd = REC_SELINT(item, 0); + flag = REC_SELINT(item, 1); + resFlag = 0; + if (((flag & RD_BIT) != 0) && FD_ISSET(fd, rfds)) + resFlag |= RD_BIT; + if (((flag & WR_BIT) != 0) && FD_ISSET(fd, wfds)) + resFlag |= WR_BIT; + if (((flag & ERR_BIT) != 0) && FD_ISSET(fd, efds)) + resFlag |= ERR_BIT; + if (resFlag != 0) { + REC_ALLOC2 (msp, item, INT_CtoML(fd), INT_CtoML(resFlag)); + resVec[i++] = item; + } + } + + ASSERT(i == sts); + + for (i = sts-1, l = LIST_nil; i >= 0; i--) { + item = resVec[i]; + LIST_cons (msp, l, item, l); + } + + FREE(resVec); + + return l; + } + +} /* end of ML_Poll */ + +#endif + diff --git a/base/runtime/c-libs/posix-os/posix-os-lib.c b/base/runtime/c-libs/posix-os/posix-os-lib.c new file mode 100644 index 0000000..80795c4 --- /dev/null +++ b/base/runtime/c-libs/posix-os/posix-os-lib.c @@ -0,0 +1,28 @@ +/* posix-os-lib.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "c-library.h" +#include "cfun-proto-list.h" + + +/* the table of C functions and ML names */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_BIND(NAME, FUNC, MLTYPE) +PVT cfunc_binding_t CFunTable[] = { +#include "cfun-list.h" + CFUNC_NULL_BIND + }; +#undef CFUNC + + +/* the OSectory library */ +c_library_t POSIX_OS_Library = { + CLIB_NAME, + CLIB_VERSION, + CLIB_DATE, + NIL(clib_init_fn_t), + CFunTable + }; + diff --git a/base/runtime/c-libs/posix-os/tmpname.c b/base/runtime/c-libs/posix-os/tmpname.c new file mode 100644 index 0000000..82f262b --- /dev/null +++ b/base/runtime/c-libs/posix-os/tmpname.c @@ -0,0 +1,50 @@ +/* tmpname.c + * + * COPYRIGHT (c) 2014 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-unixdep.h" +#include +#include "ml-base.h" +#include "ml-c.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "cfun-proto-list.h" + +#if defined(HAS_MKSTEMP) && defined(P_tmpdir) +# define TEMPLATE P_tmpdir "/SMLNJ-XXXXXX" +#endif + +/* _ml_OS_tmpname: + */ +ml_val_t _ml_OS_tmpname (ml_state_t *msp, ml_val_t arg) +{ +#if defined(HAS_MKSTEMP) && defined(P_tmpdir) + + /* mkstemp was added to the IEEE Std 1003.1 in 2004, so most systems should support it */ + char template[sizeof(TEMPLATE)]; + int sts; + + strcpy (template, TEMPLATE); + sts = mkstemp (template); + + if (sts < 0) { + return RAISE_SYSERR(msp, sts); + } + else { + close (sts); /* close the file descriptor */ + return ML_CString (msp, template); + } + +#else /* for old systems */ + char buf[L_tmpnam]; + + tmpnam (buf); + + return ML_CString (msp, buf); + +#endif + +} /* end of _ml_OS_tmpname */ + diff --git a/base/runtime/c-libs/posix-procenv/cfun-list.h b/base/runtime/c-libs/posix-procenv/cfun-list.h new file mode 100644 index 0000000..42622e2 --- /dev/null +++ b/base/runtime/c-libs/posix-procenv/cfun-list.h @@ -0,0 +1,36 @@ +/* cfun-list.h + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * + * This file lists the directory library of C functions that are callable by ML. + */ + +#ifndef CLIB_NAME +#define CLIB_NAME "POSIX-ProcEnv" +#define CLIB_VERSION "1.0" +#define CLIB_DATE "February 16, 1995" +#endif + +CFUNC("getpid", _ml_P_ProcEnv_getpid, "unit -> int") +CFUNC("getppid", _ml_P_ProcEnv_getppid, "unit -> int") +CFUNC("getuid", _ml_P_ProcEnv_getuid, "unit -> word") +CFUNC("geteuid", _ml_P_ProcEnv_geteuid, "unit -> word") +CFUNC("getgid", _ml_P_ProcEnv_getgid, "unit -> word") +CFUNC("getegid", _ml_P_ProcEnv_getegid, "unit -> word") +CFUNC("setuid", _ml_P_ProcEnv_setuid, "word -> unit") +CFUNC("setgid", _ml_P_ProcEnv_setgid, "word -> unit") +CFUNC("getgroups", _ml_P_ProcEnv_getgroups, "unit -> word list") +CFUNC("getlogin", _ml_P_ProcEnv_getlogin, "unit -> string") +CFUNC("getpgrp", _ml_P_ProcEnv_getpgrp, "unit -> int") +CFUNC("setsid", _ml_P_ProcEnv_setsid, "unit -> int") +CFUNC("setpgid", _ml_P_ProcEnv_setpgid, "int * int -> unit") +CFUNC("uname", _ml_P_ProcEnv_uname, "unit -> (string * string) list") +CFUNC("sysconf", _ml_P_ProcEnv_sysconf, "string -> word") +CFUNC("time", _ml_P_ProcEnv_time, "unit -> int") +CFUNC("times", _ml_P_ProcEnv_times, "unit -> int * int * int * int * int") +CFUNC("getenv", _ml_P_ProcEnv_getenv, "string -> string option") +CFUNC("environ", _ml_P_ProcEnv_environ, "unit -> string list") +CFUNC("ctermid", _ml_P_ProcEnv_ctermid, "unit -> string") +CFUNC("ttyname", _ml_P_ProcEnv_ttyname, "int -> string") +CFUNC("isatty", _ml_P_ProcEnv_isatty, "int -> bool") + diff --git a/base/runtime/c-libs/posix-procenv/cfun-proto-list.h b/base/runtime/c-libs/posix-procenv/cfun-proto-list.h new file mode 100644 index 0000000..16b3e96 --- /dev/null +++ b/base/runtime/c-libs/posix-procenv/cfun-proto-list.h @@ -0,0 +1,18 @@ +/* cfun-proto-list.h + * + * COPYRIGHT (c) 1194 AT&T Bell Laboratories. + */ + +#ifndef _CFUN_PROTO_LIST_ +#define _CFUN_PROTO_LIST_ + +#ifndef _C_LIBRARY_ +# include "c-library.h" +#endif + +/* the external definitions for the C functions */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_PROTO(NAME, FUNC, MLTYPE) +#include "cfun-list.h" +#undef CFUNC + +#endif /* !_CFUN_PROTO_LIST_ */ diff --git a/base/runtime/c-libs/posix-procenv/ctermid.c b/base/runtime/c-libs/posix-procenv/ctermid.c new file mode 100644 index 0000000..9a454ab --- /dev/null +++ b/base/runtime/c-libs/posix-procenv/ctermid.c @@ -0,0 +1,30 @@ +/* ctermid.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-unixdep.h" +#include +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_P_ProcEnv_ctermid: unit -> string + * + * Return pathname of controlling terminal. + */ +ml_val_t _ml_P_ProcEnv_ctermid (ml_state_t *msp, ml_val_t arg) +{ + char name[L_ctermid]; + char *sts; + + sts = ctermid(name); + if (sts == NIL(char *) || *sts == '\0') + return RAISE_ERROR(msp, "cannot determine controlling terminal"); + + return ML_CString (msp, name); + +} /* end of _ml_P_ProcEnv_ctermid */ + diff --git a/base/runtime/c-libs/posix-procenv/environ.c b/base/runtime/c-libs/posix-procenv/environ.c new file mode 100644 index 0000000..31ba282 --- /dev/null +++ b/base/runtime/c-libs/posix-procenv/environ.c @@ -0,0 +1,19 @@ +/* environ.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" + +/* _ml_P_ProcEnv_environ : unit -> string list + */ +ml_val_t _ml_P_ProcEnv_environ (ml_state_t *msp, ml_val_t arg) +{ + extern char **environ; + + return ML_CStringList (msp, environ); + +} /* end of _ml_P_ProcEnv_environ */ + diff --git a/base/runtime/c-libs/posix-procenv/getegid.c b/base/runtime/c-libs/posix-procenv/getegid.c new file mode 100644 index 0000000..310e1be --- /dev/null +++ b/base/runtime/c-libs/posix-procenv/getegid.c @@ -0,0 +1,23 @@ +/* getegid.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include +#include "ml-objects.h" +#include "cfun-proto-list.h" + +/* _ml_P_ProcEnv_getegid: unit -> SysWord.word + * + * Return effective group id + */ +ml_val_t _ml_P_ProcEnv_getegid (ml_state_t *msp, ml_val_t arg) +{ + ml_val_t p; + + SYSWORD_ALLOC (msp, p, (SysWord_t)(getegid())); + return p; + +} /* end of _ml_P_ProcEnv_getegid */ + diff --git a/base/runtime/c-libs/posix-procenv/getenv.c b/base/runtime/c-libs/posix-procenv/getenv.c new file mode 100644 index 0000000..97ceb3b --- /dev/null +++ b/base/runtime/c-libs/posix-procenv/getenv.c @@ -0,0 +1,32 @@ +/* getenv.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "cfun-proto-list.h" +#include + +/* _ml_P_ProcEnv_getenv: string -> string option + * + * Return value for environment name + */ +ml_val_t _ml_P_ProcEnv_getenv (ml_state_t *msp, ml_val_t arg) +{ + char *sts; + ml_val_t r, s; + + sts = getenv(STR_MLtoC(arg)); + if (sts == NIL(char *)) + r = OPTION_NONE; + else { + s = ML_CString(msp,sts); + OPTION_SOME(msp, r, s) + } + + return r; + +} /* end of _ml_P_ProcEnv_getenv */ + diff --git a/base/runtime/c-libs/posix-procenv/geteuid.c b/base/runtime/c-libs/posix-procenv/geteuid.c new file mode 100644 index 0000000..fffdf8f --- /dev/null +++ b/base/runtime/c-libs/posix-procenv/geteuid.c @@ -0,0 +1,23 @@ +/* geteuid.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include +#include "ml-objects.h" +#include "cfun-proto-list.h" + +/* _ml_P_ProcEnv_geteuid: unit -> SysWord.word + * + * Return effective user id + */ +ml_val_t _ml_P_ProcEnv_geteuid (ml_state_t *msp, ml_val_t arg) +{ + ml_val_t p; + + SYSWORD_ALLOC (msp, p, (SysWord_t)(geteuid())); + return p; + +} /* end of _ml_P_ProcEnv_geteuid */ + diff --git a/base/runtime/c-libs/posix-procenv/getgid.c b/base/runtime/c-libs/posix-procenv/getgid.c new file mode 100644 index 0000000..a5228ff --- /dev/null +++ b/base/runtime/c-libs/posix-procenv/getgid.c @@ -0,0 +1,23 @@ +/* getgid.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include +#include "ml-objects.h" +#include "cfun-proto-list.h" + +/* _ml_P_ProcEnv_getgid: unit -> SysWord.word + * + * Return group id + */ +ml_val_t _ml_P_ProcEnv_getgid (ml_state_t *msp, ml_val_t arg) +{ + ml_val_t p; + + SYSWORD_ALLOC (msp, p, (SysWord_t)(getgid())); + return p; + +} /* end of _ml_P_ProcEnv_getgid */ + diff --git a/base/runtime/c-libs/posix-procenv/getgroups.c b/base/runtime/c-libs/posix-procenv/getgroups.c new file mode 100644 index 0000000..9196b32 --- /dev/null +++ b/base/runtime/c-libs/posix-procenv/getgroups.c @@ -0,0 +1,89 @@ +/* getgroups.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-unixdep.h" +#include +#include +#include +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* some OSs use int[] as the second argument to getgroups(), when gid_t + * is not int. + */ +#ifdef INT_GIDLIST +typedef int gid; +#else +typedef gid_t gid; +#endif + + +/* mkList: + * + * Convert array of gid_t into a list of gid_t + */ +PVT ml_val_t mkList (ml_state_t *msp, int ngrps, gid gidset[]) +{ + ml_val_t p, w; + +/** NOTE: we should do something about possible GC!!! **/ + + p = LIST_nil; + while (ngrps-- > 0) { + WORD_ALLOC (msp, w, (Word_t)(gidset[ngrps])); + LIST_cons(msp, p, w, p); + } + + return p; +} + +/* _ml_P_ProcEnv_getgroups: unit -> word list + * + * Return supplementary group access list ids. + */ +ml_val_t _ml_P_ProcEnv_getgroups (ml_state_t *msp, ml_val_t arg) +{ + gid gidset[NGROUPS_MAX]; + int ngrps; + ml_val_t p; + + ngrps = getgroups (NGROUPS_MAX, gidset); + + if (ngrps == -1) { + gid *gp; + + /* If the error was not due to too small buffer size, + * raise exception. + */ + if (errno != EINVAL) + return RAISE_SYSERR(msp, -1); + + /* Find out how many groups there are and allocate enough space. */ + ngrps = getgroups (0, gidset); + gp = (gid *)MALLOC(ngrps * (sizeof (gid))); + if (gp == 0) { + errno = ENOMEM; + return RAISE_SYSERR(msp, -1); + } + + ngrps = getgroups (ngrps, gp); + + if (ngrps == -1) + p = RAISE_SYSERR(msp, -1); + else + p = mkList (msp, ngrps, gp); + + FREE ((void *)gp); + } + else + p = mkList (msp, ngrps, gidset); + + return p; + +} /* end of _ml_P_ProcEnv_getgroups */ + diff --git a/base/runtime/c-libs/posix-procenv/getlogin.c b/base/runtime/c-libs/posix-procenv/getlogin.c new file mode 100644 index 0000000..bc8ceef --- /dev/null +++ b/base/runtime/c-libs/posix-procenv/getlogin.c @@ -0,0 +1,28 @@ +/* getlogin.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include + +/* _ml_P_ProcEnv_getlogin: unit -> string + * + * Return login name + */ +ml_val_t _ml_P_ProcEnv_getlogin (ml_state_t *msp, ml_val_t arg) +{ + char* name; + + name = getlogin(); + if (name == NIL(char *)) + return RAISE_ERROR(msp, "no login name"); + + return ML_CString (msp, name); + +} /* end of _ml_P_ProcEnv_getlogin */ + diff --git a/base/runtime/c-libs/posix-procenv/getpgrp.c b/base/runtime/c-libs/posix-procenv/getpgrp.c new file mode 100644 index 0000000..7fe85c0 --- /dev/null +++ b/base/runtime/c-libs/posix-procenv/getpgrp.c @@ -0,0 +1,20 @@ +/* getpgrp.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "cfun-proto-list.h" +#include + +/* _ml_P_ProcEnv_getpgrp: unit -> int + * + * Return process group + */ +ml_val_t _ml_P_ProcEnv_getpgrp (ml_state_t *msp, ml_val_t arg) +{ + return INT_CtoML(getpgrp()); + +} /* end of _ml_P_ProcEnv_getpgrp */ + diff --git a/base/runtime/c-libs/posix-procenv/getpid.c b/base/runtime/c-libs/posix-procenv/getpid.c new file mode 100644 index 0000000..6343204 --- /dev/null +++ b/base/runtime/c-libs/posix-procenv/getpid.c @@ -0,0 +1,20 @@ +/* getpid.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "cfun-proto-list.h" +#include + +/* _ml_P_ProcEnv_getpid : unit -> int + * + * Return the process id of the current process. + */ +ml_val_t _ml_P_ProcEnv_getpid (ml_state_t *msp, ml_val_t arg) +{ + return INT_CtoML(getpid()); + +} /* end of _ml_P_ProcEnv_getpid */ diff --git a/base/runtime/c-libs/posix-procenv/getppid.c b/base/runtime/c-libs/posix-procenv/getppid.c new file mode 100644 index 0000000..4a4dfd4 --- /dev/null +++ b/base/runtime/c-libs/posix-procenv/getppid.c @@ -0,0 +1,20 @@ +/* getppid.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "cfun-proto-list.h" +#include + +/* _ml_P_ProcEnv_getppid : unit -> int + * + * Return the process id of the parent process. + */ +ml_val_t _ml_P_ProcEnv_getppid (ml_state_t *msp, ml_val_t arg) +{ + return INT_CtoML(getppid()); + +} /* end of _ml_P_ProcEnv_getppid */ diff --git a/base/runtime/c-libs/posix-procenv/getuid.c b/base/runtime/c-libs/posix-procenv/getuid.c new file mode 100644 index 0000000..b24a687 --- /dev/null +++ b/base/runtime/c-libs/posix-procenv/getuid.c @@ -0,0 +1,23 @@ +/* getuid.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-objects.h" +#include "cfun-proto-list.h" +#include + +/* _ml_P_ProcEnv_getuid: unit -> SysWord.word + * + * Return user id + */ +ml_val_t _ml_P_ProcEnv_getuid (ml_state_t *msp, ml_val_t arg) +{ + ml_val_t p; + + SYSWORD_ALLOC (msp, p, (Word_t)(getuid())); + return p; + +} /* end of _ml_P_ProcEnv_getuid */ + diff --git a/base/runtime/c-libs/posix-procenv/isatty.c b/base/runtime/c-libs/posix-procenv/isatty.c new file mode 100644 index 0000000..3ca7fac --- /dev/null +++ b/base/runtime/c-libs/posix-procenv/isatty.c @@ -0,0 +1,20 @@ +/* isatty.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "cfun-proto-list.h" +#include + +/* _ml_P_ProcEnv_isatty: int -> bool + * + * Is file descriptor associated with a terminal device + */ +ml_val_t _ml_P_ProcEnv_isatty (ml_state_t *msp, ml_val_t arg) +{ + return (isatty(INT_MLtoC(arg)) ? ML_true : ML_false); + +} /* end of _ml_P_ProcEnv_isatty */ + diff --git a/base/runtime/c-libs/posix-procenv/makefile b/base/runtime/c-libs/posix-procenv/makefile new file mode 100644 index 0000000..5d5505a --- /dev/null +++ b/base/runtime/c-libs/posix-procenv/makefile @@ -0,0 +1,68 @@ +# +# a template makefile for a C function library +# + +SHELL = /bin/sh + +ROOT_DIR = ../.. +INC_DIR = $(ROOT_DIR)/include +CONFIG_DIR = $(ROOT_DIR)/config +CLIB_DIR = ../ + +INCLUDES = -I$(INC_DIR) -I$(CLIB_DIR) -I../../objs + +MAKE = make +AR = ar +ARFLAGS = rcv +CPP = /lib/cpp +RANLIB = ranlib + +LIBRARY = libposix-procenv.a + +VERSION = v-dummy + +OBJS = posix-procenv-lib.o \ + getpid.o \ + getppid.o \ + getuid.o \ + geteuid.o \ + getgid.o \ + getegid.o \ + setuid.o \ + setgid.o \ + getgroups.o \ + getlogin.o \ + getpgrp.o \ + setsid.o \ + setpgid.o \ + uname.o \ + time.o \ + times.o \ + getenv.o \ + environ.o \ + ctermid.o \ + ttyname.o \ + sysconf.o \ + isatty.o + +$(LIBRARY) : $(VERSION) $(OBJS) + rm -rf $(LIBRARY) + $(AR) $(ARFLAGS) $(LIBRARY) $(OBJS) + $(RANLIB) $(LIBRARY) + +$(VERSION) : + ($(MAKE) MAKE="$(MAKE)" clean) + echo "$(VERSION)" > $(VERSION) + +.c.o: $(INC_DIR)/ml-unixdep.h $(INC_DIR)/ml-base.h $(INC_DIR)/ml-values.h \ + $(CLIB_DIR)/ml-c.h cfun-proto-list.h cfun-list.h + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) -c $< + +sysconf.o : ml_sysconf.h + +ml_sysconf.h : + VERSION=$(VERSION) CPP="$(CPP)" $(CONFIG_DIR)/gen-posix-names.sh _SC_ ml_sysconf.h + +clean : + rm -f v-* *.o ml_sysconf.h $(LIBRARY) + diff --git a/base/runtime/c-libs/posix-procenv/posix-procenv-lib.c b/base/runtime/c-libs/posix-procenv/posix-procenv-lib.c new file mode 100644 index 0000000..bf3da16 --- /dev/null +++ b/base/runtime/c-libs/posix-procenv/posix-procenv-lib.c @@ -0,0 +1,28 @@ +/* posix-procenv-lib.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "c-library.h" +#include "cfun-proto-list.h" + + +/* the table of C functions and ML names */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_BIND(NAME, FUNC, MLTYPE) +PVT cfunc_binding_t CFunTable[] = { +#include "cfun-list.h" + CFUNC_NULL_BIND + }; +#undef CFUNC + + +/* the POSIX Signal library */ +c_library_t POSIX_ProcEnv_Library = { + CLIB_NAME, + CLIB_VERSION, + CLIB_DATE, + NIL(clib_init_fn_t), + CFunTable + }; + diff --git a/base/runtime/c-libs/posix-procenv/setgid.c b/base/runtime/c-libs/posix-procenv/setgid.c new file mode 100644 index 0000000..b1cc89a --- /dev/null +++ b/base/runtime/c-libs/posix-procenv/setgid.c @@ -0,0 +1,25 @@ +/* setgid.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include + +/* _ml_P_ProcEnv_setgid: SysWord.word -> unit + * + * Set group id + */ +ml_val_t _ml_P_ProcEnv_setgid (ml_state_t *msp, ml_val_t arg) +{ + int sts; + + sts = setgid(SYSWORD_MLtoC(arg)); + + CHK_RETURN_UNIT(msp, sts) + +} /* end of _ml_P_ProcEnv_setgid */ + diff --git a/base/runtime/c-libs/posix-procenv/setpgid.c b/base/runtime/c-libs/posix-procenv/setpgid.c new file mode 100644 index 0000000..f97793e --- /dev/null +++ b/base/runtime/c-libs/posix-procenv/setpgid.c @@ -0,0 +1,26 @@ +/* setpgid.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include + +/* _ml_P_ProcEnv_setpgid: int * int -> unit + * + * Set user id + */ +ml_val_t _ml_P_ProcEnv_setpgid (ml_state_t *msp, ml_val_t arg) +{ + int sts; + + sts = setpgid(REC_SELINT(arg, 0),REC_SELINT(arg, 1)); + + CHK_RETURN_UNIT(msp, sts) + +} /* end of _ml_P_ProcEnv_setpgid */ + diff --git a/base/runtime/c-libs/posix-procenv/setsid.c b/base/runtime/c-libs/posix-procenv/setsid.c new file mode 100644 index 0000000..e5f1772 --- /dev/null +++ b/base/runtime/c-libs/posix-procenv/setsid.c @@ -0,0 +1,26 @@ +/* setsid.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-unixdep.h" +#include "ml-base.h" +#include "ml-values.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_P_ProcEnv_setsid: unit -> int + * + * Set session id + */ +ml_val_t _ml_P_ProcEnv_setsid (ml_state_t *msp, ml_val_t arg) +{ + pid_t pid; + + pid = setsid (); + + CHK_RETURN(msp, pid) + +} /* end of _ml_P_ProcEnv_setsid */ + diff --git a/base/runtime/c-libs/posix-procenv/setuid.c b/base/runtime/c-libs/posix-procenv/setuid.c new file mode 100644 index 0000000..bd3a3e4 --- /dev/null +++ b/base/runtime/c-libs/posix-procenv/setuid.c @@ -0,0 +1,25 @@ +/* setuid.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include + +/* _ml_P_ProcEnv_setuid: SysWord.word -> unit + * + * Set user id + */ +ml_val_t _ml_P_ProcEnv_setuid (ml_state_t *msp, ml_val_t arg) +{ + int sts; + + sts = setuid(SYSWORD_MLtoC(arg)); + + CHK_RETURN_UNIT(msp, sts) + +} /* end of _ml_P_ProcEnv_setuid */ + diff --git a/base/runtime/c-libs/posix-procenv/sysconf.c b/base/runtime/c-libs/posix-procenv/sysconf.c new file mode 100644 index 0000000..c7ae47d --- /dev/null +++ b/base/runtime/c-libs/posix-procenv/sysconf.c @@ -0,0 +1,76 @@ +/* sysconf.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-unixdep.h" +#include +#include +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include "../posix-error/posix-name-val.h" + + /* The following table is generated from all _SC_ values + * in unistd.h. For most systems, this will include + * _SC_ARG_MAX + * _SC_CHILD_MAX + * _SC_CLK_TCK + * _SC_JOB_CONTROL + * _SC_NGROUPS_MAX + * _SC_OPEN_MAX + * _SC_SAVED_IDS + * _SC_STREAM_MAX + * _SC_TZNAME_MAX + * _SC_VERSION + * + * The full POSIX list is given in section 4.8.1 of Std 1003.1b-1993. + * + * The SML string used to look up these values has the same + * form but without the prefix, e.g., to lookup _SC_ARG_MAX, + * use sysconf "ARG_MAX" + */ +static name_val_t values[] = { +#include "ml_sysconf.h" +}; + +#define NUMELMS ((sizeof values)/(sizeof (name_val_t))) + +/* _ml_P_ProcEnv_sysconf : string -> SysWord.word + * + * + * Get configurable system variables + */ +ml_val_t _ml_P_ProcEnv_sysconf (ml_state_t *msp, ml_val_t arg) +{ + long val; + name_val_t *attr; + ml_val_t p; + + attr = _ml_posix_nv_lookup (STR_MLtoC(arg), values, NUMELMS); + if (!attr) { + errno = EINVAL; + return RAISE_SYSERR(msp, -1); + } + + errno = 0; + while (((val = sysconf(attr->val)) == -1) && (errno == EINTR)) { + errno = 0; + continue; + } + + if (val >= 0) { + SYSWORD_ALLOC (msp, p, val); + return p; + } + else if (errno == 0) { + return RAISE_ERROR(msp, "unsupported POSIX feature"); + } + else { + return RAISE_SYSERR(msp, -1); + } + +} /* end of _ml_P_ProcEnv_sysconf */ diff --git a/base/runtime/c-libs/posix-procenv/time.c b/base/runtime/c-libs/posix-procenv/time.c new file mode 100644 index 0000000..d0e59c0 --- /dev/null +++ b/base/runtime/c-libs/posix-procenv/time.c @@ -0,0 +1,26 @@ +/* time.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include + +/* _ml_P_ProcEnv_time: unit -> Int32.int + * + * Return time in seconds from 00:00:00 UTC, January 1, 1970 + */ +ml_val_t _ml_P_ProcEnv_time (ml_state_t *msp, ml_val_t arg) +{ + time_t t; + + t = time (NIL(time_t*)); + + return INT32_CtoML(msp, t); + +} /* end of _ml_P_ProcEnv_time */ + diff --git a/base/runtime/c-libs/posix-procenv/times.c b/base/runtime/c-libs/posix-procenv/times.c new file mode 100644 index 0000000..1cf6a93 --- /dev/null +++ b/base/runtime/c-libs/posix-procenv/times.c @@ -0,0 +1,41 @@ +/* times.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-unixdep.h" +#include +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_P_ProcEnv_times: unit -> int * int * int * int * int + * + * Return process and child process times, in clock ticks. + */ +ml_val_t _ml_P_ProcEnv_times (ml_state_t *msp, ml_val_t arg) +{ + clock_t t; + struct tms ts; + ml_val_t v, e, u, s, cu, cs; + + t = times (&ts); + + if (t == -1) + return RAISE_SYSERR(msp, -1); + +/* FIXME: we should do the conversion to 64-bit nanoseconds here and then + * return the result as a tuple of 64-bit values + */ + e = INT32_CtoML(msp, t); + u = INT32_CtoML(msp, ts.tms_utime); + s = INT32_CtoML(msp, ts.tms_stime); + cu = INT32_CtoML(msp, ts.tms_cutime); + cs = INT32_CtoML(msp, ts.tms_cstime); + REC_ALLOC5(msp, v, e, u, s, cu, cs); + + return v; + +} /* end of _ml_P_ProcEnv_times */ diff --git a/base/runtime/c-libs/posix-procenv/ttyname.c b/base/runtime/c-libs/posix-procenv/ttyname.c new file mode 100644 index 0000000..2c2eace --- /dev/null +++ b/base/runtime/c-libs/posix-procenv/ttyname.c @@ -0,0 +1,28 @@ +/* ttyname.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include + +/* _ml_P_ProcEnv_ttyname: int -> string + * + * Return terminal name associated with file descriptor, if any. + */ +ml_val_t _ml_P_ProcEnv_ttyname (ml_state_t *msp, ml_val_t arg) +{ + char* name; + + name = ttyname(INT_MLtoC(arg)); + if (name == NIL(char *)) + return RAISE_ERROR(msp, "not a terminal device"); + + return ML_CString (msp, name); + +} /* end of _ml_P_ProcEnv_ttyname */ + diff --git a/base/runtime/c-libs/posix-procenv/uname.c b/base/runtime/c-libs/posix-procenv/uname.c new file mode 100644 index 0000000..d7a3370 --- /dev/null +++ b/base/runtime/c-libs/posix-procenv/uname.c @@ -0,0 +1,62 @@ +/* uname.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-unixdep.h" +#include +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_P_ProcEnv_uname: unit -> (string * string) list + * + * Return names of current system. + */ +ml_val_t _ml_P_ProcEnv_uname (ml_state_t *msp, ml_val_t arg) +{ + struct utsname name; + int sts; + ml_val_t l, p, s; + ml_val_t field; + + sts = uname (&name); + + if (sts == -1) + RAISE_SYSERR(msp, sts); + +/** NOTE: we should do something about possible GC!!! **/ + + l = LIST_nil; + + field = ML_CString(msp, "machine"); + s = ML_CString(msp, name.machine); + REC_ALLOC2(msp, p, field, s); + LIST_cons(msp, l, p, l); + + field = ML_CString(msp, "version"); + s = ML_CString(msp, name.version); + REC_ALLOC2(msp, p, field, s); + LIST_cons(msp, l, p, l); + + field = ML_CString(msp, "release"); + s = ML_CString(msp, name.release); + REC_ALLOC2(msp, p, field, s); + LIST_cons(msp, l, p, l); + + field = ML_CString(msp, "nodename"); + s = ML_CString(msp, name.nodename); + REC_ALLOC2(msp, p, field, s); + LIST_cons(msp, l, p, l); + + field = ML_CString(msp, "sysname"); + s = ML_CString(msp, name.sysname); + REC_ALLOC2(msp, p, field, s); + LIST_cons(msp, l, p, l); + + return l; + +} /* end of _ml_P_ProcEnv_uname */ + diff --git a/base/runtime/c-libs/posix-process/alarm.c b/base/runtime/c-libs/posix-process/alarm.c new file mode 100644 index 0000000..46dc04a --- /dev/null +++ b/base/runtime/c-libs/posix-process/alarm.c @@ -0,0 +1,33 @@ +/*! \file alarm.c + * + * \brief Implementation of OS.Process.alarm function. + * + * \author John Reppy + */ + +/* + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include + +/* _ml_P_Process_alarm : word64 -> word64 + * + * Set a process alarm clock + */ +ml_val_t _ml_P_Process_alarm (ml_state_t *msp, ml_val_t arg) +{ +/* TODO: use setitimer to get finer-grain periods */ + unsigned int t = (unsigned int)(WORD64_MLtoC(arg) / NS_PER_SEC); + + t = alarm(t); + + return ML_AllocWord64(msp, NS_PER_SEC * (Unsigned64_t)t); + +} /* end of _ml_P_Process_alarm */ diff --git a/base/runtime/c-libs/posix-process/cfun-list.h b/base/runtime/c-libs/posix-process/cfun-list.h new file mode 100644 index 0000000..169e85e --- /dev/null +++ b/base/runtime/c-libs/posix-process/cfun-list.h @@ -0,0 +1,25 @@ +/* cfun-list.h + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * + * This file lists the directory library of C functions that are callable by ML. + */ + +#ifndef CLIB_NAME +#define CLIB_NAME "POSIX-Process" +#define CLIB_VERSION "1.0" +#define CLIB_DATE "February 16, 1995" +#endif + +CFUNC("osval", _ml_P_Process_osval, "string -> int") +CFUNC("fork", _ml_P_Process_fork, "unit -> int") +CFUNC("exec", _ml_P_Process_exec, "(string * string list) -> 'a") +CFUNC("exece", _ml_P_Process_exece, "(string * string list * string list) -> 'a") +CFUNC("execp", _ml_P_Process_execp, "(string * string list) -> 'a") +CFUNC("waitpid", _ml_P_Process_waitpid, "int * word -> int * int * int") +CFUNC("exit", _ml_P_Process_exit, "int -> 'a") +CFUNC("kill", _ml_P_Process_kill, "int * int -> unit") +CFUNC("alarm", _ml_P_Process_alarm, "int -> int") +CFUNC("pause", _ml_P_Process_pause, "unit -> unit") +CFUNC("sleep", _ml_P_Process_sleep, "int -> int") + diff --git a/base/runtime/c-libs/posix-process/cfun-proto-list.h b/base/runtime/c-libs/posix-process/cfun-proto-list.h new file mode 100644 index 0000000..16b3e96 --- /dev/null +++ b/base/runtime/c-libs/posix-process/cfun-proto-list.h @@ -0,0 +1,18 @@ +/* cfun-proto-list.h + * + * COPYRIGHT (c) 1194 AT&T Bell Laboratories. + */ + +#ifndef _CFUN_PROTO_LIST_ +#define _CFUN_PROTO_LIST_ + +#ifndef _C_LIBRARY_ +# include "c-library.h" +#endif + +/* the external definitions for the C functions */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_PROTO(NAME, FUNC, MLTYPE) +#include "cfun-list.h" +#undef CFUNC + +#endif /* !_CFUN_PROTO_LIST_ */ diff --git a/base/runtime/c-libs/posix-process/exec.c b/base/runtime/c-libs/posix-process/exec.c new file mode 100644 index 0000000..954a561 --- /dev/null +++ b/base/runtime/c-libs/posix-process/exec.c @@ -0,0 +1,40 @@ +/* exec.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include + +/* _ml_P_Process_exec : string * string list -> 'a + * + * Overlay a new process image + */ +ml_val_t _ml_P_Process_exec (ml_state_t *msp, ml_val_t arg) +{ + int sts; + ml_val_t path = REC_SEL(arg, 0); + ml_val_t arglst = REC_SEL(arg, 1); + char **argv; + ml_val_t p; + char **cp; + + /* use the heap for temp space for the argv[] vector */ + cp = (char **)(msp->ml_allocPtr); +#ifdef SIZES_C64_ML32 + /* must 8-byte align this */ + cp = (char **)ROUNDUP((Unsigned64_t)cp, ADDR_SZB); +#endif + argv = cp; + for (p = arglst; p != LIST_nil; p = LIST_tl(p)) + *cp++ = STR_MLtoC(LIST_hd(p)); + *cp++ = 0; /* terminate the argv[] */ + + sts = execv(STR_MLtoC(path), argv); + CHK_RETURN (msp, sts) + +} /* end of _ml_P_Process_exec */ diff --git a/base/runtime/c-libs/posix-process/exece.c b/base/runtime/c-libs/posix-process/exece.c new file mode 100644 index 0000000..3ee707f --- /dev/null +++ b/base/runtime/c-libs/posix-process/exece.c @@ -0,0 +1,47 @@ +/* exece.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include + +/* _ml_P_Process_exece : string * string list * string list -> 'a + * + * Overlay a new process image, using specified environment. + */ +ml_val_t _ml_P_Process_exece (ml_state_t *msp, ml_val_t arg) +{ + int sts; + ml_val_t path = REC_SEL(arg, 0); + ml_val_t arglst = REC_SEL(arg, 1); + ml_val_t envlst = REC_SEL(arg, 2); + char **argv, **envp; + ml_val_t p; + char **cp; + + /* use the heap for temp space for the argv[] and envp[] vectors */ + cp = (char **)(msp->ml_allocPtr); +#ifdef SIZES_C64_ML32 + /* must 8-byte align this */ + cp = (char **)ROUNDUP((Unsigned64_t)cp, ADDR_SZB); +#endif + argv = cp; + for (p = arglst; p != LIST_nil; p = LIST_tl(p)) + *cp++ = STR_MLtoC(LIST_hd(p)); + *cp++ = 0; /* terminate the argv[] */ + + envp = cp; + for (p = envlst; p != LIST_nil; p = LIST_tl(p)) + *cp++ = STR_MLtoC(LIST_hd(p)); + *cp++ = 0; /* terminate the envp[] */ + + sts = execve(STR_MLtoC(path), argv, envp); + + CHK_RETURN (msp, sts) + +} /* end of _ml_P_Process_exece */ diff --git a/base/runtime/c-libs/posix-process/execp.c b/base/runtime/c-libs/posix-process/execp.c new file mode 100644 index 0000000..de9d889 --- /dev/null +++ b/base/runtime/c-libs/posix-process/execp.c @@ -0,0 +1,41 @@ +/* execp.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include + +/* _ml_P_Process_execp : string * string list -> 'a + * + * Overlay a new process image; resolve file to pathname using PATH + */ +ml_val_t _ml_P_Process_execp (ml_state_t *msp, ml_val_t arg) +{ + int sts; + ml_val_t file = REC_SEL(arg, 0); + ml_val_t arglst = REC_SEL(arg, 1); + char **argv; + ml_val_t p; + char **cp; + + /* use the heap for temp space for the argv[] vector */ + cp = (char **)(msp->ml_allocPtr); +#ifdef SIZES_C64_ML32 + /* must 8-byte align this */ + cp = (char **)ROUNDUP((Unsigned64_t)cp, ADDR_SZB); +#endif + argv = cp; + for (p = arglst; p != LIST_nil; p = LIST_tl(p)) + *cp++ = STR_MLtoC(LIST_hd(p)); + *cp++ = 0; /* terminate the argv[] */ + + sts = execvp(STR_MLtoC(file), argv); + + CHK_RETURN (msp, sts) + +} /* end of _ml_P_Process_execp */ diff --git a/base/runtime/c-libs/posix-process/exit.c b/base/runtime/c-libs/posix-process/exit.c new file mode 100644 index 0000000..eda4455 --- /dev/null +++ b/base/runtime/c-libs/posix-process/exit.c @@ -0,0 +1,23 @@ +/* exit.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include + +/* _ml_P_Process_exit : int -> 'a + * + * Exit from process + */ +ml_val_t _ml_P_Process_exit (ml_state_t *msp, ml_val_t arg) +{ + Exit (INT_MLtoC(arg)); + + /*NOTREACHED*/ + +} /* end of _ml_P_Process_exit */ diff --git a/base/runtime/c-libs/posix-process/fork.c b/base/runtime/c-libs/posix-process/fork.c new file mode 100644 index 0000000..5b6406b --- /dev/null +++ b/base/runtime/c-libs/posix-process/fork.c @@ -0,0 +1,25 @@ +/* fork.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include + +/* _ml_P_Process_fork : unit -> int + * + * Fork a new process. + */ +ml_val_t _ml_P_Process_fork (ml_state_t *msp, ml_val_t arg) +{ + int sts; + + sts = fork(); + + CHK_RETURN (msp, sts) + +} /* end of _ml_P_Process_fork */ diff --git a/base/runtime/c-libs/posix-process/kill.c b/base/runtime/c-libs/posix-process/kill.c new file mode 100644 index 0000000..7ef2ea0 --- /dev/null +++ b/base/runtime/c-libs/posix-process/kill.c @@ -0,0 +1,25 @@ +/* kill.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include + +/* _ml_P_Process_kill : int * int -> unit + * + * Send a signal to a process or a group of processes + */ +ml_val_t _ml_P_Process_kill (ml_state_t *msp, ml_val_t arg) +{ + int sts; + + sts = kill(REC_SELINT(arg, 0),REC_SELINT(arg, 1)); + + CHK_RETURN_UNIT (msp, sts) + +} /* end of _ml_P_Process_kill */ diff --git a/base/runtime/c-libs/posix-process/makefile b/base/runtime/c-libs/posix-process/makefile new file mode 100644 index 0000000..3755444 --- /dev/null +++ b/base/runtime/c-libs/posix-process/makefile @@ -0,0 +1,49 @@ +# +# a template makefile for a C function library +# + +SHELL = /bin/sh + +INC_DIR = ../../include +CLIB_DIR = ../ + +INCLUDES = -I$(INC_DIR) -I$(CLIB_DIR) -I../../objs + +MAKE = make +AR = ar +ARFLAGS = rcv +RANLIB = ranlib + +LIBRARY = libposix-process.a + +VERSION = v-dummy + +OBJS = posix-process-lib.o \ + osval.o \ + fork.o \ + exec.o \ + execp.o \ + exece.o \ + waitpid.o \ + exit.o \ + kill.o \ + alarm.o \ + pause.o \ + sleep.o + +$(LIBRARY) : $(VERSION) $(OBJS) + rm -rf $(LIBRARY) + $(AR) $(ARFLAGS) $(LIBRARY) $(OBJS) + $(RANLIB) $(LIBRARY) + +$(VERSION) : + ($(MAKE) MAKE="$(MAKE)" clean) + echo "$(VERSION)" > $(VERSION) + +.c.o: $(INC_DIR)/ml-unixdep.h $(INC_DIR)/ml-base.h $(INC_DIR)/ml-values.h \ + $(CLIB_DIR)/ml-c.h cfun-proto-list.h cfun-list.h + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) -c $< + +clean : + rm -f v-* *.o $(LIBRARY) + diff --git a/base/runtime/c-libs/posix-process/osval.c b/base/runtime/c-libs/posix-process/osval.c new file mode 100644 index 0000000..a758ea5 --- /dev/null +++ b/base/runtime/c-libs/posix-process/osval.c @@ -0,0 +1,41 @@ +/* osval.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-unixdep.h" +#include +#include +#include "ml-base.h" +#include "ml-values.h" +#include "tags.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include "../posix-error/posix-name-val.h" + +static name_val_t values [] = { + {"WNOHANG", WNOHANG}, +#ifdef WUNTRACED + {"WUNTRACED", WUNTRACED}, +#endif +}; + +#define NUMELMS ((sizeof values)/(sizeof (name_val_t))) + +/* _ml_P_Process_osval : string -> int + * + * Return the OS-dependent, compile-time constant specified by the string. + */ +ml_val_t _ml_P_Process_osval (ml_state_t *msp, ml_val_t arg) +{ + name_val_t *res; + + res = _ml_posix_nv_lookup (STR_MLtoC(arg), values, NUMELMS); + if (res) + return INT_CtoML(res->val); + else { + return RAISE_ERROR(msp, "system constant not defined"); + } + +} /* end of _ml_P_Process_osval */ diff --git a/base/runtime/c-libs/posix-process/pause.c b/base/runtime/c-libs/posix-process/pause.c new file mode 100644 index 0000000..8c7a155 --- /dev/null +++ b/base/runtime/c-libs/posix-process/pause.c @@ -0,0 +1,23 @@ +/* pause.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include + +/* _ml_P_Process_pause : unit -> unit + * + * Set a process alarm clock + */ +ml_val_t _ml_P_Process_pause (ml_state_t *msp, ml_val_t arg) +{ + pause(); + + return ML_unit; + +} /* end of _ml_P_Process_pause */ diff --git a/base/runtime/c-libs/posix-process/posix-process-lib.c b/base/runtime/c-libs/posix-process/posix-process-lib.c new file mode 100644 index 0000000..460dfd4 --- /dev/null +++ b/base/runtime/c-libs/posix-process/posix-process-lib.c @@ -0,0 +1,28 @@ +/* posix-process-lib.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "c-library.h" +#include "cfun-proto-list.h" + + +/* the table of C functions and ML names */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_BIND(NAME, FUNC, MLTYPE) +PVT cfunc_binding_t CFunTable[] = { +#include "cfun-list.h" + CFUNC_NULL_BIND + }; +#undef CFUNC + + +/* the POSIX Signal library */ +c_library_t POSIX_Process_Library = { + CLIB_NAME, + CLIB_VERSION, + CLIB_DATE, + NIL(clib_init_fn_t), + CFunTable + }; + diff --git a/base/runtime/c-libs/posix-process/sleep.c b/base/runtime/c-libs/posix-process/sleep.c new file mode 100644 index 0000000..126458d --- /dev/null +++ b/base/runtime/c-libs/posix-process/sleep.c @@ -0,0 +1,54 @@ +/*! \file sleep.c + * + * \author John Reppy + * + * Support for Posix.Process.sleep function + */ + +/* + * COPYRIGHT (c) 2017 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include +#include + +/* _ml_P_Process_sleep : Word64.word -> Word64.word + * + * Suspend execution for interval, which is given in nanoseconds. Returns 0 on + * normal completion, or else returns the amount of remaining time when signaled. + * + * TODO: generalize to finer-grain sleeping (bug #173) + */ +ml_val_t _ml_P_Process_sleep (ml_state_t *msp, ml_val_t arg) +{ + Unsigned64_t t = WORD64_MLtoC(arg); + +#if defined(HAS_NANOSLEEP) + struct timespec sleepTime, remainingTime; + sleepTime.tv_sec = (time_t)(t / NS_PER_SEC); + sleepTime.tv_nsec = (long)(t % NS_PER_SEC); + remainingTime.tv_sec = 0; + remainingTime.tv_nsec = 0; + if (nanosleep(&sleepTime, &remainingTime) == 0) { + t = 0; + } + else { + t = NS_PER_SEC * (Unsigned64_t)remainingTime.tv_sec + + (Unsigned64_t)remainingTime.tv_nsec; + } +#else + unsigned int sleepTime, remainingTime; + sleepTime = (unsigned int)(t / NS_PER_SEC); + remainingTime = sleep(sleepTime); + t = NS_PER_SEC * (Unsigned64_t)remainingTime; +#endif + + return ML_AllocWord64(msp, t); + +} /* end of _ml_P_Process_sleep */ diff --git a/base/runtime/c-libs/posix-process/waitpid.c b/base/runtime/c-libs/posix-process/waitpid.c new file mode 100644 index 0000000..de72cad --- /dev/null +++ b/base/runtime/c-libs/posix-process/waitpid.c @@ -0,0 +1,51 @@ +/* waitpid.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-unixdep.h" +#include +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_P_Process_waitpid : int * SysWord.word -> int * int * int + * + * Wait for child processes to stop or terminate + */ +ml_val_t _ml_P_Process_waitpid (ml_state_t *msp, ml_val_t arg) +{ + ml_val_t ml_options = REC_SEL(arg, 1); + int options = SYSWORD_MLtoC(ml_options); + int pid; + int status, how, val; + ml_val_t r; + + pid = waitpid(REC_SELINT(arg, 0), &status, options); + if (pid < 0) { + return RAISE_SYSERR(msp, pid); + } + + if (WIFEXITED(status)) { + how = 0; + val = WEXITSTATUS(status); + } + else if (WIFSIGNALED(status)) { + how = 1; + val = WTERMSIG(status); + } + else if (WIFSTOPPED(status)) { + how = 2; + val = WSTOPSIG(status); + } + else + return RAISE_ERROR(msp, "unknown child status"); + + REC_ALLOC3(msp, r, INT_CtoML(pid), INT_CtoML(how), INT_CtoML(val)); + + return r; + +} /* end of _ml_P_Process_waitpid */ diff --git a/base/runtime/c-libs/posix-signal/cfun-list.h b/base/runtime/c-libs/posix-signal/cfun-list.h new file mode 100644 index 0000000..b1c8205 --- /dev/null +++ b/base/runtime/c-libs/posix-signal/cfun-list.h @@ -0,0 +1,15 @@ +/* cfun-list.h + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * + * This file lists the directory library of C functions that are callable by ML. + */ + +#ifndef CLIB_NAME +#define CLIB_NAME "POSIX-Signal" +#define CLIB_VERSION "1.0" +#define CLIB_DATE "February 16, 1995" +#endif + +CFUNC("osval", _ml_P_Signal_osval, "string -> word") + diff --git a/base/runtime/c-libs/posix-signal/cfun-proto-list.h b/base/runtime/c-libs/posix-signal/cfun-proto-list.h new file mode 100644 index 0000000..16b3e96 --- /dev/null +++ b/base/runtime/c-libs/posix-signal/cfun-proto-list.h @@ -0,0 +1,18 @@ +/* cfun-proto-list.h + * + * COPYRIGHT (c) 1194 AT&T Bell Laboratories. + */ + +#ifndef _CFUN_PROTO_LIST_ +#define _CFUN_PROTO_LIST_ + +#ifndef _C_LIBRARY_ +# include "c-library.h" +#endif + +/* the external definitions for the C functions */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_PROTO(NAME, FUNC, MLTYPE) +#include "cfun-list.h" +#undef CFUNC + +#endif /* !_CFUN_PROTO_LIST_ */ diff --git a/base/runtime/c-libs/posix-signal/makefile b/base/runtime/c-libs/posix-signal/makefile new file mode 100644 index 0000000..ffddfd8 --- /dev/null +++ b/base/runtime/c-libs/posix-signal/makefile @@ -0,0 +1,39 @@ +# +# a template makefile for a C function library +# + +SHELL = /bin/sh + +INC_DIR = ../../include +CLIB_DIR = ../ + +INCLUDES = -I$(INC_DIR) -I$(CLIB_DIR) -I../../objs + +MAKE = make +AR = ar +ARFLAGS = rcv +RANLIB = ranlib + +LIBRARY = libposix-signal.a + +VERSION = v-dummy + +OBJS = posix-signal-lib.o \ + osval.o + +$(LIBRARY) : $(VERSION) $(OBJS) + rm -rf $(LIBRARY) + $(AR) $(ARFLAGS) $(LIBRARY) $(OBJS) + $(RANLIB) $(LIBRARY) + +$(VERSION) : + ($(MAKE) MAKE="$(MAKE)" clean) + echo "$(VERSION)" > $(VERSION) + +.c.o: $(INC_DIR)/ml-unixdep.h $(INC_DIR)/ml-base.h $(INC_DIR)/ml-values.h \ + $(CLIB_DIR)/ml-c.h cfun-proto-list.h cfun-list.h + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) -c $< + +clean : + rm -f v-* *.o $(LIBRARY) + diff --git a/base/runtime/c-libs/posix-signal/osval.c b/base/runtime/c-libs/posix-signal/osval.c new file mode 100644 index 0000000..ae81fc6 --- /dev/null +++ b/base/runtime/c-libs/posix-signal/osval.c @@ -0,0 +1,56 @@ +/* osval.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-unixdep.h" +#include +#include "ml-base.h" +#include "ml-values.h" +#include "tags.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include "../posix-error/posix-name-val.h" + +static name_val_t values [] = { + {"abrt", SIGABRT}, + {"alrm", SIGALRM}, + {"bus", SIGBUS}, + {"chld", SIGCHLD}, + {"cont", SIGCONT}, + {"fpe", SIGFPE}, + {"hup", SIGHUP}, + {"ill", SIGILL}, + {"int", SIGINT}, + {"kill", SIGKILL}, + {"pipe", SIGPIPE}, + {"quit", SIGQUIT}, + {"segv", SIGSEGV}, + {"stop", SIGSTOP}, + {"term", SIGTERM}, + {"tstp", SIGTSTP}, + {"ttin", SIGTTIN}, + {"ttou", SIGTTOU}, + {"usr1", SIGUSR1}, + {"usr2", SIGUSR2}, +}; + +#define NUMELMS ((sizeof values)/(sizeof (name_val_t))) + +/* _ml_P_Signal_osval : string -> word + * + * Return the OS-dependent, compile-time constant specified by the string. + */ +ml_val_t _ml_P_Signal_osval (ml_state_t *msp, ml_val_t arg) +{ + name_val_t *res; + + res = _ml_posix_nv_lookup (STR_MLtoC(arg), values, NUMELMS); + if (res) + return INT_CtoML(res->val); + else { + return RAISE_ERROR(msp, "system constant not defined"); + } + +} /* end of _ml_P_Signal_osval */ diff --git a/base/runtime/c-libs/posix-signal/posix-signal-lib.c b/base/runtime/c-libs/posix-signal/posix-signal-lib.c new file mode 100644 index 0000000..7a7e29e --- /dev/null +++ b/base/runtime/c-libs/posix-signal/posix-signal-lib.c @@ -0,0 +1,28 @@ +/* posix-signal-lib.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "c-library.h" +#include "cfun-proto-list.h" + + +/* the table of C functions and ML names */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_BIND(NAME, FUNC, MLTYPE) +PVT cfunc_binding_t CFunTable[] = { +#include "cfun-list.h" + CFUNC_NULL_BIND + }; +#undef CFUNC + + +/* the POSIX Signal library */ +c_library_t POSIX_Signal_Library = { + CLIB_NAME, + CLIB_VERSION, + CLIB_DATE, + NIL(clib_init_fn_t), + CFunTable + }; + diff --git a/base/runtime/c-libs/posix-sysdb/cfun-list.h b/base/runtime/c-libs/posix-sysdb/cfun-list.h new file mode 100644 index 0000000..9bb0273 --- /dev/null +++ b/base/runtime/c-libs/posix-sysdb/cfun-list.h @@ -0,0 +1,18 @@ +/* cfun-list.h + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * + * This file lists the directory library of C functions that are callable by ML. + */ + +#ifndef CLIB_NAME +#define CLIB_NAME "POSIX-SysDB" +#define CLIB_VERSION "1.0" +#define CLIB_DATE "February 16, 1995" +#endif + +CFUNC("getgrgid", _ml_P_SysDB_getgrgid, "word -> string * word * string list") +CFUNC("getgrnam", _ml_P_SysDB_getgrnam, "string -> string * word * string list") +CFUNC("getpwuid", _ml_P_SysDB_getpwuid, "word -> string * word * word * string * string") +CFUNC("getpwnam", _ml_P_SysDB_getpwnam, "string -> string * word * word * string * string") + diff --git a/base/runtime/c-libs/posix-sysdb/cfun-proto-list.h b/base/runtime/c-libs/posix-sysdb/cfun-proto-list.h new file mode 100644 index 0000000..16b3e96 --- /dev/null +++ b/base/runtime/c-libs/posix-sysdb/cfun-proto-list.h @@ -0,0 +1,18 @@ +/* cfun-proto-list.h + * + * COPYRIGHT (c) 1194 AT&T Bell Laboratories. + */ + +#ifndef _CFUN_PROTO_LIST_ +#define _CFUN_PROTO_LIST_ + +#ifndef _C_LIBRARY_ +# include "c-library.h" +#endif + +/* the external definitions for the C functions */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_PROTO(NAME, FUNC, MLTYPE) +#include "cfun-list.h" +#undef CFUNC + +#endif /* !_CFUN_PROTO_LIST_ */ diff --git a/base/runtime/c-libs/posix-sysdb/getgrgid.c b/base/runtime/c-libs/posix-sysdb/getgrgid.c new file mode 100644 index 0000000..6eaa30f --- /dev/null +++ b/base/runtime/c-libs/posix-sysdb/getgrgid.c @@ -0,0 +1,38 @@ +/* getgrgid.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-unixdep.h" +#include +#include +#include "ml-base.h" +#include "ml-values.h" +#include "tags.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_P_SysDB_getgrgid : SysWord.word -> string * SysWord.word * string list + * + * Get group file entry by gid. + */ +ml_val_t _ml_P_SysDB_getgrgid (ml_state_t *msp, ml_val_t arg) +{ + struct group* info; + ml_val_t gr_name, gr_gid, gr_mem, r; + + info = getgrgid(SYSWORD_MLtoC(arg)); + if (info == NIL(struct group *)) + return RAISE_SYSERR(msp, -1); + + gr_name = ML_CString (msp, info->gr_name); + SYSWORD_ALLOC (msp, gr_gid, (SysWord_t)(info->gr_gid)); + gr_mem = ML_CStringList(msp, info->gr_mem); + + REC_ALLOC3(msp, r, gr_name, gr_gid, gr_mem); + + return r; + +} /* end of _ml_P_SysDB_getgrgid */ diff --git a/base/runtime/c-libs/posix-sysdb/getgrnam.c b/base/runtime/c-libs/posix-sysdb/getgrnam.c new file mode 100644 index 0000000..22a35bc --- /dev/null +++ b/base/runtime/c-libs/posix-sysdb/getgrnam.c @@ -0,0 +1,38 @@ +/* getgrnam.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-unixdep.h" +#include +#include +#include "ml-base.h" +#include "ml-values.h" +#include "tags.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_P_SysDB_getgrnam : string -> string * SysWord.word * string list + * + * Get group file entry by name. + */ +ml_val_t _ml_P_SysDB_getgrnam (ml_state_t *msp, ml_val_t arg) +{ + struct group* info; + ml_val_t gr_name, gr_gid, gr_mem, r; + + info = getgrnam(STR_MLtoC(arg)); + if (info == NIL(struct group *)) + return RAISE_SYSERR(msp, -1); + + gr_name = ML_CString (msp, info->gr_name); + SYSWORD_ALLOC (msp, gr_gid, (Word_t)(info->gr_gid)); + gr_mem = ML_CStringList(msp, info->gr_mem); + + REC_ALLOC3(msp, r, gr_name, gr_gid, gr_mem); + + return r; + +} /* end of _ml_P_SysDB_getgrnam */ diff --git a/base/runtime/c-libs/posix-sysdb/getpwnam.c b/base/runtime/c-libs/posix-sysdb/getpwnam.c new file mode 100644 index 0000000..287b2ba --- /dev/null +++ b/base/runtime/c-libs/posix-sysdb/getpwnam.c @@ -0,0 +1,39 @@ +/* getpwnam.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-unixdep.h" +#include "ml-base.h" +#include "ml-values.h" +#include "tags.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include + +/* _ml_P_SysDB_getpwnam : string -> string * SysWord.word * SysWord.word * string * string + * + * Get password file entry by name. + */ +ml_val_t _ml_P_SysDB_getpwnam (ml_state_t *msp, ml_val_t arg) +{ + struct passwd* info; + ml_val_t pw_name, pw_uid, pw_gid, pw_dir, pw_shell, r; + + info = getpwnam(STR_MLtoC(arg)); + if (info == NIL(struct passwd *)) + return RAISE_SYSERR(msp, -1); + + pw_name = ML_CString (msp, info->pw_name); + SYSWORD_ALLOC (msp, pw_uid, (Word_t)(info->pw_uid)); + SYSWORD_ALLOC (msp, pw_gid, (Word_t)(info->pw_gid)); + pw_dir = ML_CString (msp, info->pw_dir); + pw_shell = ML_CString (msp, info->pw_shell); + + REC_ALLOC5(msp, r, pw_name, pw_uid, pw_gid, pw_dir, pw_shell); + + return r; + +} /* end of _ml_P_SysDB_getpwnam */ diff --git a/base/runtime/c-libs/posix-sysdb/getpwuid.c b/base/runtime/c-libs/posix-sysdb/getpwuid.c new file mode 100644 index 0000000..4e63c3b --- /dev/null +++ b/base/runtime/c-libs/posix-sysdb/getpwuid.c @@ -0,0 +1,40 @@ +/* getpwuid.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-unixdep.h" +#include +#include "ml-base.h" +#include "ml-values.h" +#include "tags.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_P_SysDB_getpwuid : SysWord.word -> string * SysWord.word * SysWord.word * string * string + * + * Get password file entry by uid. + */ +ml_val_t _ml_P_SysDB_getpwuid (ml_state_t *msp, ml_val_t arg) +{ + struct passwd* info; + ml_val_t pw_name, pw_uid, pw_gid, pw_dir, pw_shell, r; + + info = getpwuid(SYSWORD_MLtoC(arg)); + if (info == NIL(struct passwd *)) { + return RAISE_SYSERR(msp, -1); + } + + pw_name = ML_CString (msp, info->pw_name); + SYSWORD_ALLOC (msp, pw_uid, (Word_t)(info->pw_uid)); + SYSWORD_ALLOC (msp, pw_gid, (Word_t)(info->pw_gid)); + pw_dir = ML_CString (msp, info->pw_dir); + pw_shell = ML_CString (msp, info->pw_shell); + + REC_ALLOC5(msp, r, pw_name, pw_uid, pw_gid, pw_dir, pw_shell); + + return r; + +} /* end of _ml_P_SysDB_getpwuid */ diff --git a/base/runtime/c-libs/posix-sysdb/makefile b/base/runtime/c-libs/posix-sysdb/makefile new file mode 100644 index 0000000..f4002ff --- /dev/null +++ b/base/runtime/c-libs/posix-sysdb/makefile @@ -0,0 +1,42 @@ +# +# a template makefile for a C function library +# + +SHELL = /bin/sh + +INC_DIR = ../../include +CLIB_DIR = ../ + +INCLUDES = -I$(INC_DIR) -I$(CLIB_DIR) -I../../objs + +MAKE = make +AR = ar +ARFLAGS = rcv +RANLIB = ranlib + +LIBRARY = libposix-sysdb.a + +VERSION = v-dummy + +OBJS = posix-sysdb-lib.o \ + getgrgid.o \ + getgrnam.o \ + getpwuid.o \ + getpwnam.o + +$(LIBRARY) : $(VERSION) $(OBJS) + rm -rf $(LIBRARY) + $(AR) $(ARFLAGS) $(LIBRARY) $(OBJS) + $(RANLIB) $(LIBRARY) + +$(VERSION) : + ($(MAKE) MAKE="$(MAKE)" clean) + echo "$(VERSION)" > $(VERSION) + +.c.o: $(INC_DIR)/ml-unixdep.h $(INC_DIR)/ml-base.h $(INC_DIR)/ml-values.h \ + $(CLIB_DIR)/ml-c.h cfun-proto-list.h cfun-list.h + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) -c $< + +clean : + rm -f v-* *.o $(LIBRARY) + diff --git a/base/runtime/c-libs/posix-sysdb/posix-sysdb-lib.c b/base/runtime/c-libs/posix-sysdb/posix-sysdb-lib.c new file mode 100644 index 0000000..08a79d3 --- /dev/null +++ b/base/runtime/c-libs/posix-sysdb/posix-sysdb-lib.c @@ -0,0 +1,28 @@ +/* posix-sysdb-lib.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "c-library.h" +#include "cfun-proto-list.h" + + +/* the table of C functions and ML names */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_BIND(NAME, FUNC, MLTYPE) +PVT cfunc_binding_t CFunTable[] = { +#include "cfun-list.h" + CFUNC_NULL_BIND + }; +#undef CFUNC + + +/* the POSIX Signal library */ +c_library_t POSIX_SysDB_Library = { + CLIB_NAME, + CLIB_VERSION, + CLIB_DATE, + NIL(clib_init_fn_t), + CFunTable + }; + diff --git a/base/runtime/c-libs/posix-tty/cfun-list.h b/base/runtime/c-libs/posix-tty/cfun-list.h new file mode 100644 index 0000000..0cfc270 --- /dev/null +++ b/base/runtime/c-libs/posix-tty/cfun-list.h @@ -0,0 +1,29 @@ +/*! \file cfun-list.h + * + * This file lists the directory library of C functions that are callable by ML. + * + * \author John Reppy + */ + +/* + * COPYRIGHT (c) 2022 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#ifndef CLIB_NAME +#define CLIB_NAME "POSIX-TTY" +#define CLIB_VERSION "1.1" +#define CLIB_DATE "July 25, 2022" +#endif + +CFUNC("osval", _ml_P_TTY_osval, "string -> int") +CFUNC("tcgetattr", _ml_P_TTY_tcgetattr, "int -> termio_rep") +CFUNC("tcsetattr", _ml_P_TTY_tcsetattr, "int * int * termio_rep -> unit") +CFUNC("tcsendbreak", _ml_P_TTY_tcsendbreak, "int * int -> unit") +CFUNC("tcdrain", _ml_P_TTY_tcdrain, "int -> unit") +CFUNC("tcflush", _ml_P_TTY_tcflush, "int * int -> unit") +CFUNC("tcflow", _ml_P_TTY_tcflow, "int * int -> unit") +CFUNC("tcgetpgrp", _ml_P_TTY_tcgetpgrp, "int -> int") +CFUNC("tcsetpgrp", _ml_P_TTY_tcsetpgrp, "int * int -> unit") +/* Basis Library proposal 2021-001 */ +CFUNC("getwinsz", _ml_P_TTY_getwinsz, "int -> (int * int) option") diff --git a/base/runtime/c-libs/posix-tty/cfun-proto-list.h b/base/runtime/c-libs/posix-tty/cfun-proto-list.h new file mode 100644 index 0000000..16b3e96 --- /dev/null +++ b/base/runtime/c-libs/posix-tty/cfun-proto-list.h @@ -0,0 +1,18 @@ +/* cfun-proto-list.h + * + * COPYRIGHT (c) 1194 AT&T Bell Laboratories. + */ + +#ifndef _CFUN_PROTO_LIST_ +#define _CFUN_PROTO_LIST_ + +#ifndef _C_LIBRARY_ +# include "c-library.h" +#endif + +/* the external definitions for the C functions */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_PROTO(NAME, FUNC, MLTYPE) +#include "cfun-list.h" +#undef CFUNC + +#endif /* !_CFUN_PROTO_LIST_ */ diff --git a/base/runtime/c-libs/posix-tty/getwinsz.c b/base/runtime/c-libs/posix-tty/getwinsz.c new file mode 100644 index 0000000..5776739 --- /dev/null +++ b/base/runtime/c-libs/posix-tty/getwinsz.c @@ -0,0 +1,46 @@ +/*! \file getwinsz.c + * + * \author John Reppy + * + * Runtime support for Basis Library proposal 2021-001 (Add `getWindowSz` + * function to `Posix.TTY` structure). + */ + +/* + * COPYRIGHT (c) 2022 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-unixdep.h" +#include +#include +#include "ml-base.h" +#include "ml-objects.h" +#include "ml-values.h" + +/* _ml_P_TTY_getwinsz : int -> (int * int) option + */ +ml_val_t _ml_P_TTY_getwinsz (ml_state_t *msp, ml_val_t arg) +{ +#ifdef TIOCGWINSZ + int fd, sts; + struct winsize wsz; + + fd = INT_MLtoC(arg); + sts = ioctl (fd, TIOCGWINSZ, &wsz); + if (sts == 0) { + ml_val_t result; + OPTION_SOME( + msp, + result, + ML_Alloc2(msp, INT_CtoML(wsz.ws_row), INT_CtoML(wsz.ws_col))); + return result; + } + else { + return OPTION_NONE; + } +#else + return OPTION_NONE; +#endif + +} /* end of _ml_P_TTY_getwinsz */ diff --git a/base/runtime/c-libs/posix-tty/makefile b/base/runtime/c-libs/posix-tty/makefile new file mode 100644 index 0000000..b0bda5b --- /dev/null +++ b/base/runtime/c-libs/posix-tty/makefile @@ -0,0 +1,48 @@ +# +# a template makefile for a C function library +# + +SHELL = /bin/sh + +INC_DIR = ../../include +CLIB_DIR = ../ + +INCLUDES = -I$(INC_DIR) -I$(CLIB_DIR) -I../../objs + +MAKE = make +AR = ar +ARFLAGS = rcv +RANLIB = ranlib + +LIBRARY = libposix-tty.a + +VERSION = v-dummy + +OBJS = posix-tty-lib.o \ + osval.o \ + tcdrain.o \ + tcflow.o \ + tcflush.o \ + tcgetattr.o \ + tcgetpgrp.o \ + tcsendbreak.o \ + tcsetattr.o \ + tcsetpgrp.o \ + getwinsz.o + +$(LIBRARY) : $(VERSION) $(OBJS) + rm -rf $(LIBRARY) + $(AR) $(ARFLAGS) $(LIBRARY) $(OBJS) + $(RANLIB) $(LIBRARY) + +$(VERSION) : + ($(MAKE) MAKE="$(MAKE)" clean) + echo "$(VERSION)" > $(VERSION) + +.c.o: $(INC_DIR)/ml-osdep.h $(INC_DIR)/ml-base.h $(INC_DIR)/ml-values.h \ + $(CLIB_DIR)/ml-c.h cfun-proto-list.h cfun-list.h + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) -c $< + +clean : + rm -f v-* *.o $(LIBRARY) + diff --git a/base/runtime/c-libs/posix-tty/osval.c b/base/runtime/c-libs/posix-tty/osval.c new file mode 100644 index 0000000..bfdda61 --- /dev/null +++ b/base/runtime/c-libs/posix-tty/osval.c @@ -0,0 +1,106 @@ +/* osval.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-unixdep.h" +#include +#include "ml-base.h" +#include "ml-values.h" +#include "tags.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include "../posix-error/posix-name-val.h" + +PVT name_val_t values [] = { + {"B0", B0}, + {"B110", B110}, + {"B1200", B1200}, + {"B134", B134}, + {"B150", B150}, + {"B1800", B1800}, + {"B19200", B19200}, + {"B200", B200}, + {"B2400", B2400}, + {"B300", B300}, + {"B38400", B38400}, + {"B4800", B4800}, + {"B50", B50}, + {"B600", B600}, + {"B75", B75}, + {"B9600", B9600}, + {"BRKINT", BRKINT}, + {"CLOCAL", CLOCAL}, + {"CREAD", CREAD}, + {"CS5", CS5}, + {"CS6", CS6}, + {"CS7", CS7}, + {"CS8", CS8}, + {"CSIZE", CSIZE}, + {"CSTOPB", CSTOPB}, + {"ECHO", ECHO}, + {"ECHOE", ECHOE}, + {"ECHOK", ECHOK}, + {"ECHONL", ECHONL}, + {"EOF", VEOF}, + {"EOL", VEOL}, + {"ERASE", VERASE}, + {"HUPCL", HUPCL}, + {"ICANON", ICANON}, + {"ICRNL", ICRNL}, + {"IEXTEN", IEXTEN}, + {"IGNBRK", IGNBRK}, + {"IGNCR", IGNCR}, + {"IGNPAR", IGNPAR}, + {"INLCR", INLCR}, + {"INPCK", INPCK}, + {"INTR", VINTR}, + {"ISIG", ISIG}, + {"ISTRIP", ISTRIP}, + {"IXOFF", IXOFF}, + {"IXON", IXON}, + {"KILL", VKILL}, + {"MIN", VMIN}, + {"NCCS", NCCS}, + {"NOFLSH", NOFLSH}, + {"OPOST", OPOST}, + {"PARENB", PARENB}, + {"PARMRK", PARMRK}, + {"PARODD", PARODD}, + {"QUIT", VQUIT}, + {"START", VSTART}, + {"STOP", VSTOP}, + {"SUSP", VSUSP}, + {"TCIFLUSH", TCIFLUSH}, + {"TCIOFF", TCIOFF}, + {"TCIOFLUSH", TCIOFLUSH}, + {"TCION", TCION}, + {"TCOFLUSH", TCOFLUSH}, + {"TCOOFF", TCOOFF}, + {"TCOON", TCOON}, + {"TCSADRAIN", TCSADRAIN}, + {"TCSAFLUSH", TCSAFLUSH}, + {"TCSANOW", TCSANOW}, + {"TIME", VTIME}, + {"TOSTOP", TOSTOP}, +}; + +#define NUMELMS ((sizeof values)/(sizeof (name_val_t))) + +/* _ml_P_TTY_osval : string -> word + * + * Return the OS-dependent, compile-time constant specified by the string. + */ +ml_val_t _ml_P_TTY_osval (ml_state_t *msp, ml_val_t arg) +{ + name_val_t *res; + + res = _ml_posix_nv_lookup (STR_MLtoC(arg), values, NUMELMS); + if (res) + return INT_CtoML(res->val); + else { + return RAISE_ERROR(msp, "system constant not defined"); + } + +} /* end of _ml_P_TTY_osval */ diff --git a/base/runtime/c-libs/posix-tty/posix-tty-lib.c b/base/runtime/c-libs/posix-tty/posix-tty-lib.c new file mode 100644 index 0000000..045032d --- /dev/null +++ b/base/runtime/c-libs/posix-tty/posix-tty-lib.c @@ -0,0 +1,27 @@ +/* posix-io-lib.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "c-library.h" +#include "cfun-proto-list.h" + + +/* the table of C functions and ML names */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_BIND(NAME, FUNC, MLTYPE) +PVT cfunc_binding_t CFunTable[] = { +#include "cfun-list.h" + CFUNC_NULL_BIND + }; +#undef CFUNC + + +/* the POSIX Signal library */ +c_library_t POSIX_TTY_Library = { + CLIB_NAME, + CLIB_VERSION, + CLIB_DATE, + NIL(clib_init_fn_t), + CFunTable + }; diff --git a/base/runtime/c-libs/posix-tty/tcdrain.c b/base/runtime/c-libs/posix-tty/tcdrain.c new file mode 100644 index 0000000..ad5d455 --- /dev/null +++ b/base/runtime/c-libs/posix-tty/tcdrain.c @@ -0,0 +1,26 @@ +/* tcdrain.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-unixdep.h" +#include +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_P_TTY_tcdrain : int -> unit + * + * Wait for all output to be transmitted. + */ +ml_val_t _ml_P_TTY_tcdrain (ml_state_t *msp, ml_val_t arg) +{ + int sts, fd = INT_MLtoC(arg); + + sts = tcdrain(fd); + + CHK_RETURN_UNIT(msp, sts) + +} /* end of _ml_P_TTY_tcdrain */ diff --git a/base/runtime/c-libs/posix-tty/tcflow.c b/base/runtime/c-libs/posix-tty/tcflow.c new file mode 100644 index 0000000..d156779 --- /dev/null +++ b/base/runtime/c-libs/posix-tty/tcflow.c @@ -0,0 +1,26 @@ +/* tcflow.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-unixdep.h" +#include +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_P_TTY_tcflow : int * int -> unit + * + * Suspend transmission or receipt of data. + */ +ml_val_t _ml_P_TTY_tcflow (ml_state_t *msp, ml_val_t arg) +{ + int sts; + + sts = tcflow(REC_SELINT(arg, 0),REC_SELINT(arg, 1)); + + CHK_RETURN_UNIT(msp, sts) + +} /* end of _ml_P_TTY_tcflow */ diff --git a/base/runtime/c-libs/posix-tty/tcflush.c b/base/runtime/c-libs/posix-tty/tcflush.c new file mode 100644 index 0000000..d7e615a --- /dev/null +++ b/base/runtime/c-libs/posix-tty/tcflush.c @@ -0,0 +1,26 @@ +/* tcflush.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-unixdep.h" +#include +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_P_TTY_tcflush : int * int -> unit + * + * Discard data that is written but not sent, or received but not read. + */ +ml_val_t _ml_P_TTY_tcflush (ml_state_t *msp, ml_val_t arg) +{ + int sts; + + sts = tcflush(REC_SELINT(arg, 0),REC_SELINT(arg, 1)); + + CHK_RETURN_UNIT(msp, sts) + +} /* end of _ml_P_TTY_tcflush */ diff --git a/base/runtime/c-libs/posix-tty/tcgetattr.c b/base/runtime/c-libs/posix-tty/tcgetattr.c new file mode 100644 index 0000000..371df11 --- /dev/null +++ b/base/runtime/c-libs/posix-tty/tcgetattr.c @@ -0,0 +1,60 @@ +/* tcgetattr.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-unixdep.h" +#include INCLUDE_TIME_H +#include +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_P_TTY_tcgetattr : int -> termio_rep + * termio_rep = (SysWord.word * SysWord.word * SysWord.word * SysWord.word * string * SysWord.word * SysWord.word) + * + * Get parameters associated with tty. + * + * NOTE: the calls to cfget[io]speed by making the code more OS-dependent + * and using the structure of struct termios. + */ +ml_val_t _ml_P_TTY_tcgetattr (ml_state_t *msp, ml_val_t arg) +{ + int sts, fd = INT_MLtoC(arg); + ml_val_t iflag, oflag, cflag, lflag; + ml_val_t cc, ispeed, ospeed, obj; + struct termios data; + + sts = tcgetattr(fd, &data); + + if (sts < 0) { + return RAISE_SYSERR(msp, sts); + } + + /* allocate the vector; note that this might cause a GC */ + cc = ML_AllocString (msp, NCCS); + memcpy (GET_SEQ_DATAPTR(void, cc), data.c_cc, NCCS); + + SYSWORD_ALLOC (msp, iflag, data.c_iflag); + SYSWORD_ALLOC (msp, oflag, data.c_oflag); + SYSWORD_ALLOC (msp, cflag, data.c_cflag); + SYSWORD_ALLOC (msp, lflag, data.c_lflag); + SYSWORD_ALLOC (msp, ispeed, cfgetispeed (&data)); + SYSWORD_ALLOC (msp, ospeed, cfgetospeed (&data)); + + ML_AllocWrite (msp, 0, MAKE_DESC(DTAG_record, 7)); + ML_AllocWrite (msp, 1, iflag); + ML_AllocWrite (msp, 2, oflag); + ML_AllocWrite (msp, 3, cflag); + ML_AllocWrite (msp, 4, lflag); + ML_AllocWrite (msp, 5, cc); + ML_AllocWrite (msp, 6, ispeed); + ML_AllocWrite (msp, 7, ospeed); + obj = ML_Alloc(msp, 7); + + return obj; + +} /* end of _ml_P_TTY_tcgetattr */ diff --git a/base/runtime/c-libs/posix-tty/tcgetpgrp.c b/base/runtime/c-libs/posix-tty/tcgetpgrp.c new file mode 100644 index 0000000..9da3719 --- /dev/null +++ b/base/runtime/c-libs/posix-tty/tcgetpgrp.c @@ -0,0 +1,24 @@ +/* tcgetpgrp.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-unixdep.h" +#include +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_P_TTY_tcgetpgrp : int -> int + * + * Get foreground process group id of tty. + */ +ml_val_t _ml_P_TTY_tcgetpgrp (ml_state_t *msp, ml_val_t arg) +{ + int fd = INT_MLtoC(arg); + + return INT_CtoML(tcgetpgrp(fd)); + +} /* end of _ml_P_TTY_tcgetpgrp */ diff --git a/base/runtime/c-libs/posix-tty/tcsendbreak.c b/base/runtime/c-libs/posix-tty/tcsendbreak.c new file mode 100644 index 0000000..72679f4 --- /dev/null +++ b/base/runtime/c-libs/posix-tty/tcsendbreak.c @@ -0,0 +1,26 @@ +/* tcsendbreak.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-unixdep.h" +#include +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_P_TTY_tcsendbreak : int * int -> unit + * + * Send break condition on tty line. + */ +ml_val_t _ml_P_TTY_tcsendbreak (ml_state_t *msp, ml_val_t arg) +{ + int sts; + + sts = tcsendbreak(REC_SELINT(arg, 0),REC_SELINT(arg, 1)); + + CHK_RETURN_UNIT(msp, sts) + +} /* end of _ml_P_TTY_tcsendbreak */ diff --git a/base/runtime/c-libs/posix-tty/tcsetattr.c b/base/runtime/c-libs/posix-tty/tcsetattr.c new file mode 100644 index 0000000..6bf4f2d --- /dev/null +++ b/base/runtime/c-libs/posix-tty/tcsetattr.c @@ -0,0 +1,58 @@ +/* tcsetattr.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-unixdep.h" +#include +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_P_TTY_tcsetattr : int * int * termio_rep -> unit + * termio_rep = (SysWord.word * SysWord.word * SysWord.word * SysWord.word * string * SysWord.word * SysWord.word) + * + * Set parameters associated with tty. + * + * NOTE: the calls to cfset[io]speed by making the code more OS-dependent + * and using the structure of struct termios. + */ +ml_val_t _ml_P_TTY_tcsetattr (ml_state_t *msp, ml_val_t arg) +{ + int sts, fd = REC_SELINT(arg, 0); + int action = REC_SELINT(arg, 1); + ml_val_t termio_rep = REC_SEL(arg, 2); + struct termios data; + ml_val_t tmp; + + tmp = REC_SEL(termio_rep, 0); + data.c_iflag = SYSWORD_MLtoC(tmp); + tmp = REC_SEL(termio_rep, 1); + data.c_oflag = SYSWORD_MLtoC(tmp); + tmp = REC_SEL(termio_rep, 2); + data.c_cflag = SYSWORD_MLtoC(tmp); + tmp = REC_SEL(termio_rep, 3); + data.c_lflag = SYSWORD_MLtoC(tmp); + + memcpy (data.c_cc, GET_SEQ_DATAPTR(void, REC_SEL(termio_rep, 4)), NCCS); + + tmp = REC_SEL(termio_rep, 5); + sts = cfsetispeed (&data, SYSWORD_MLtoC(tmp)); + if (sts < 0) { + return RAISE_SYSERR(msp, sts); + } + + tmp = REC_SEL(termio_rep, 6); + sts = cfsetospeed (&data, SYSWORD_MLtoC(tmp)); + if (sts < 0) { + return RAISE_SYSERR(msp, sts); + } + + sts = tcsetattr(fd, action, &data); + + CHK_RETURN_UNIT(msp, sts) + +} /* end of _ml_P_TTY_tcsetattr */ diff --git a/base/runtime/c-libs/posix-tty/tcsetpgrp.c b/base/runtime/c-libs/posix-tty/tcsetpgrp.c new file mode 100644 index 0000000..f89d072 --- /dev/null +++ b/base/runtime/c-libs/posix-tty/tcsetpgrp.c @@ -0,0 +1,26 @@ +/* tcsetpgrp.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-unixdep.h" +#include +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_P_TTY_tcsetpgrp : int * int -> unit + * + * Set foreground process group id of tty. + */ +ml_val_t _ml_P_TTY_tcsetpgrp (ml_state_t *msp, ml_val_t arg) +{ + int sts; + + sts = tcsetpgrp(REC_SELINT(arg, 0),REC_SELINT(arg, 1)); + + CHK_RETURN_UNIT(msp, sts) + +} /* end of _ml_P_TTY_tcsetpgrp */ diff --git a/base/runtime/c-libs/smlnj-ccalls/COPYRIGHT b/base/runtime/c-libs/smlnj-ccalls/COPYRIGHT new file mode 100644 index 0000000..10fc9c8 --- /dev/null +++ b/base/runtime/c-libs/smlnj-ccalls/COPYRIGHT @@ -0,0 +1,5 @@ +/* + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + * + */ diff --git a/base/runtime/c-libs/smlnj-ccalls/c-calls-fns.c b/base/runtime/c-libs/smlnj-ccalls/c-calls-fns.c new file mode 100644 index 0000000..68f5741 --- /dev/null +++ b/base/runtime/c-libs/smlnj-ccalls/c-calls-fns.c @@ -0,0 +1,323 @@ +/* c-calls-fns.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + * + */ + +#include +#include +#include +#include "ml-base.h" +#include "ml-values.h" +#include "ml-state.h" +#include "tags.h" +#include "ml-objects.h" +#include "ml-globals.h" +#include "cache-flush.h" + +#include "c-calls.h" + +/* layout of a code_header must match offsets in c-entry.asm */ +typedef struct code_header { + ml_val_t the_fn; + char *argtypes[N_ARGS]; + char *rettype; + int nargs; +} code_header_t; + +extern Addr_t grabPC(); +extern Addr_t grabPCend(); + +Word_t *last_entry; /* points to the beginning of the last c-entry */ + /* executed set by grabPC in c-entry.asm */ +PVT code_header_t *last_code_hdr = NULL; /* last code hdr used */ + +#define CODE_HDR_START(p) \ + ((code_header_t *)((Byte_t *)(p)-sizeof(code_header_t))) + + +/* code for maintaining ML objects (currently only functions) potentially + * only reachable from C. Currently, we use a list. Objects on this + * list persist until the program completes; this is a known space leak... + */ + +ml_val_t CInterfaceRootList = LIST_nil;/* see gc/call-gc.c and gc/major-gc.c */ + +PVT void recordFnAsRoot(ml_state_t *msp,ml_val_t *rp) +{ + LIST_cons(msp,CInterfaceRootList,(ml_val_t) rp,CInterfaceRootList); +#ifdef DEBUG_C_CALLS + printf("recordFnAsRoot: added %x\n", rp); +#endif +} + +PVT ml_val_t saveState(ml_state_t *msp,ml_val_t cont) +{ + int n, i, j; + Word_t mask; + + /* compute space for save record */ + n = 0; + /* link, closure, arg, cont, and misc regs are in mask ... */ + mask = msp->ml_liveRegMask; /* should also be mask from REQ_CALLC */ + for (i = 0; mask != 0; i++, mask >>= 1) { + if ((mask & 1) != 0) + n++; + } + /* ... but pc, exnCont, varreg, and basereg (if defined) aren't */ + n += 3; +#ifdef BASE_INDX + n++; +#endif + /* also need to save the liveRegMask. we'll do this first */ + /* others?? */ + n++; +#if defined(SOFT_POLL) +#error +#endif +#ifdef DEBUG_C_CALLS + printf("saveState: size %d\n", n); +#endif + if (cont == (ml_val_t) NULL) { + ML_AllocWrite (msp, 0, MAKE_DESC(n, DTAG_record)); + j = 1; + } else { + n++; + ML_AllocWrite (msp, 0, MAKE_DESC(n, DTAG_record)); + ML_AllocWrite (msp, 1, cont); + j = 2; + } + ML_AllocWrite (msp, j++, INT_CtoML(msp->ml_liveRegMask)); + ML_AllocWrite (msp, j++, msp->ml_pc); + ML_AllocWrite (msp, j++, msp->ml_exnCont); + ML_AllocWrite (msp, j++, msp->ml_varReg); +#ifdef BASE_INDX + ML_AllocWrite (msp, j++, msp->ml_baseReg); +#endif + mask = msp->ml_liveRegMask; + for (i = 0; mask != 0; i++, mask >>= 1) { + if ((mask & 1) != 0) + ML_AllocWrite (msp, j++, msp->ml_roots[ArgRegMap[i]]); + } + ASSERT(j-1 == n); + return ML_Alloc(msp, n); +} + +PVT void restoreState(ml_state_t *msp,ml_val_t state,int holds_cont) +{ + int n, i, j; + Word_t mask; + + n = OBJ_LEN(state); +#ifdef DEBUG_C_CALLS + printf("restoreState: state size %d\n", n); +#endif + /* link, closure, arg, cont, and misc regs are in mask ... */ + /* ... but pc, exnCont, varreg, and basereg (if defined) aren't */ + /* and also need the liveRegMask. get this first */ + /* others?? */ + if (!holds_cont) { + j = 0; + } else { + /* skip function ptr */ + j = 1; + } + msp->ml_liveRegMask = REC_SELINT(state,j++); + msp->ml_pc = REC_SEL(state,j++); + msp->ml_exnCont = REC_SEL(state,j++); + msp->ml_varReg = REC_SEL(state,j++); +#ifdef BASE_INDX + msp->ml_baseReg = REC_SEL(state,j++); +#endif + mask = msp->ml_liveRegMask; + for (i = 0; mask != 0; i++, mask >>= 1) { + if ((mask & 1) != 0) + msp->ml_roots[ArgRegMap[i]] = REC_SEL(state,j++); + } + ASSERT(j == n); +} + +PVT void setup_msp(ml_state_t *msp,ml_val_t f, ml_val_t arg) +{ +#if (CALLEESAVE == 0) + extern ml_val_t return_a[]; +#endif + + /* save necessary state from current msp in calleesave register */ +#if (CALLEESAVE > 0) + msp->ml_calleeSave(1) = saveState(msp,NULL); + msp->ml_cont = PTR_CtoML(return_c); +#else + msp->ml_cont = saveState(msp,PTR_CtoML(return_a)); +#endif + + /* inherit exnCont (?) */ + /* leave msp->ml_exnCon as is */ + msp->ml_varReg = ML_unit; + msp->ml_arg = arg; + msp->ml_closure = f; + msp->ml_pc = + msp->ml_linkReg = GET_CODE_ADDR(f); +} + +PVT void restore_msp(ml_state_t *msp) +{ + /* restore previous msp */ +#if (CALLEESAVE > 0) + restoreState(visible_msp,visible_msp->ml_calleeSave(1),FALSE); +#else + restoreState(visible_msp,visible_msp->ml_cont,TRUE); +#endif +} + +/* convert result to C */ +PVT Word_t convert_result(ml_state_t *msp,code_header_t *chp,ml_val_t val) +{ + Word_t p, *q = &p; + char *t = chp->rettype; + int err; + + /* front-end of interface guarantees that ret is a valid + * return value for a C function: Word_t or some pointer + */ + err = datumMLtoC(msp,&t,&q,val); + if (err) + /* need better error reporting here ... */ + Die("convert_result: error converting return value to C"); + /* return C result*/ + return p; +} + + +/* entry points; must be visible to c-entry.asm + */ + +int no_args_entry() +{ + ml_val_t ret; + +#ifdef DEBUG_C_CALLS + printf("no_args_entry: entered\n"); +#endif + last_code_hdr = CODE_HDR_START(last_entry); +#ifdef DEBUG_C_CALLS + printf("no_args_entry: nargs in header is %d\n", last_code_hdr->nargs); +#endif + + /* setup msp for RunML evaluation of (f LIST_nil) */ + setup_msp(visible_msp, last_code_hdr->the_fn, LIST_nil); + + /* call ML fn, returns an ml_val_t (which is cdata) */ +#ifdef DEBUG_C_CALLS + printf("no_arg_entry: calling ML from C\n"); +#endif + RunML (visible_msp); + + +#ifdef DEBUG_C_CALLS + printf("no_args_entry: return value is %d\n", visible_msp->ml_arg); +#endif + + ret = visible_msp->ml_arg; + + restore_msp(visible_msp); + + return convert_result(visible_msp,last_code_hdr,ret); +} + +int some_args_entry(Word_t first,...) +{ + va_list ap; + ml_val_t lp = LIST_nil, ret; + Word_t next; + int i; + ml_val_t args[N_ARGS]; + +#ifdef DEBUG_C_CALLS + printf("some_args_entry: entered\n"); +#endif + last_code_hdr = CODE_HDR_START(last_entry); +#ifdef DEBUG_C_CALLS + printf("some_args_entry: nargs in header is %d\n", last_code_hdr->nargs); + printf("arg 0: %x\n",first); +#endif + ret = datumCtoML(visible_msp,last_code_hdr->argtypes[0],first,&lp); + LIST_cons(visible_msp,lp,ret,lp); + va_start(ap,first); + for (i = 1; i < last_code_hdr->nargs; i++) { + next = va_arg(ap,Word_t); +#ifdef DEBUG_C_CALLS + printf("arg %d: %x\n",i,next); +#endif + ret = datumCtoML(visible_msp,last_code_hdr->argtypes[i],next,&lp); + LIST_cons(visible_msp,lp,ret,lp); + } + va_end(ap); + + /* lp is backwards */ + lp = revMLList(lp,LIST_nil); + + /* setup msp for RunML evaluation of (f lp) */ + setup_msp(visible_msp, last_code_hdr->the_fn, lp); + + /* call ML fn, returns an ml_val_t (which is cdata) */ +#ifdef DEBUG_C_CALLS + printf("some_arg_entry: calling ML from C\n"); +#endif + RunML (visible_msp); + + +#ifdef DEBUG_C_CALLS + printf("some_args_entry: return value is %d\n", visible_msp->ml_arg); +#endif + + ret = visible_msp->ml_arg; + + restore_msp(visible_msp); + + return convert_result(visible_msp,last_code_hdr,ret); +} + +PVT void *build_entry(ml_state_t *msp,code_header_t h) +{ + int szb = ((Byte_t *)grabPCend) - ((Byte_t *)grabPC); + Byte_t *p; + + +#ifdef DEBUG_C_CALLS + printf ("grabPC=%lx, grabPCend=%lx, code size is %d\n", + grabPC, grabPCend, szb); + printf ("code_header size is %d\n", sizeof(code_header_t)); +#endif + ASSERT((sizeof(code_header_t) & 0x3) == 0); + p = (Byte_t *) memalign(sizeof(Word_t),szb+sizeof(code_header_t)); + *(code_header_t *)p = h; + recordFnAsRoot(msp,&(((code_header_t *)p)->the_fn)); + /* NB: to free this thing, we'll have to subtract sizeof(code_header_t) */ + p += sizeof(code_header_t); +#ifdef DEBUG_C_CALLS + printf ("new code starts at %x and ends at %x\n", p, p+szb); +#endif + memcpy (p, (void *)grabPC, szb); + FlushICache(p,szb); + return p; +} + +Word_t mk_C_function(ml_state_t *msp, + ml_val_t f,int nargs,char *argtypes[],char *rettype) +{ + code_header_t ch; + int i; + + /* create a code header; this will be copied by build entry */ + ch.the_fn = f; + ch.nargs = nargs; + for (i = 0; i < nargs; i++) + ch.argtypes[i] = argtypes[i]; /* argtypes[i] is a copy we can have */ + ch.rettype = rettype; /* rettype is a copy we can have */ + + /* build and return a C entry for f */ + return (Word_t) build_entry(msp,ch); +} + +/* end of c-calls-fns.c */ diff --git a/base/runtime/c-libs/smlnj-ccalls/c-calls-lib.c b/base/runtime/c-libs/smlnj-ccalls/c-calls-lib.c new file mode 100644 index 0000000..05aa35e --- /dev/null +++ b/base/runtime/c-libs/smlnj-ccalls/c-calls-lib.c @@ -0,0 +1,30 @@ +/* c-calls-lib.c + * + * COPYRIGHT (c) 1994 AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "c-library.h" +#include "cfun-proto-list.h" + + +/* the table of C functions and ML names */ +#define C_CALLS_CFUNC(NAME, FUNC, CTYPE, CARGS) \ + CFUNC_BIND(NAME, (cfunc_t) FUNC, "") +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_BIND(NAME, FUNC, MLTYPE) +PVT cfunc_binding_t CFunTable[] = { +#include "cfun-list.h" + CFUNC_NULL_BIND + }; +#undef CFUNC + + +/* the c-calls library */ +c_library_t SMLNJ_CCalls_Library = { + CLIB_NAME, + CLIB_VERSION, + CLIB_DATE, + NIL(clib_init_fn_t), + CFunTable + }; + diff --git a/base/runtime/c-libs/smlnj-ccalls/c-calls.c b/base/runtime/c-libs/smlnj-ccalls/c-calls.c new file mode 100644 index 0000000..b471cce --- /dev/null +++ b/base/runtime/c-libs/smlnj-ccalls/c-calls.c @@ -0,0 +1,1203 @@ +/* c-calls.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + * + * C-side support for calling user C functions from SML/NJ. + * + */ + +#include +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#if defined(INDIRECT_CFUNC) +# include "c-library.h" +#endif +#include "ml-c.h" +#include "c-calls.h" + + +/* assumptions: + * + * Word_t fits in a machine word + * + * restrictions: + * C function args must fit in Word_t + * C's double is the largest return value from a function + */ + +ml_val_t dummyRoot = ML_unit; /* empty root for GC */ + +/* visible_msp used to expose msp to C code */ +ml_state_t *visible_msp = NULL; + +#define CONS_SZB (3*WORD_SZB) /* desc + car + cdr */ +#define CADDR_SZB (2*WORD_SZB) /* string desc + ptr */ + +#define MK_SOME(msp,v) recAlloc1(msp,v) + +#define NULLARY_DATACON INT_CtoML(0) + +/* this map must correspond to the layout of the type cdata and ctype + datatypes defined in ML_FILES +*/ + +#define MLADDR_CODE '@' +#define MLARRAY_CODE 'A' +#define MLCHAR_CODE 'C' +#define MLDOUBLE_CODE 'D' +#define MLFLOAT_CODE 'R' +#define MLFUNCTION_CODE 'F' +#define MLINT_CODE 'I' +#define MLLONG_CODE 'L' +#define MLPTR_CODE 'P' +#define MLSHORT_CODE 'i' +#define MLSTRING_CODE 'S' +#define MLOPENSTRUCT_CODE '(' +#define MLCLOSESTRUCT_CODE ')' +#define MLOPENUNION_CODE '<' +#define MLCLOSEUNION_CODE '>' +#define MLVECTOR_CODE 'B' +#define MLVOID_CODE 'V' +#define MLPAD_CODE '#' + +#define MLSTRUCT_CODE MLOPENSTRUCT_CODE +#define MLUNION_CODE MLOPENUNION_CODE + + +/* this enumeration must match the sml/nj tags on the cdata datatype */ +/* see c-calls.sml */ + +#define MLADDR_TAG 0 +#define MLARRAY_TAG 1 +#define MLCHAR_TAG 2 +#define MLDOUBLE_TAG 3 +#define MLFLOAT_TAG 4 +#define MLFUNCTION_TAG 5 +#define MLINT_TAG 6 +#define MLLONG_TAG 7 +#define MLPTR_TAG 8 +#define MLSHORT_TAG 9 +#define MLSTRING_TAG 10 +#define MLSTRUCT_TAG 11 +#define MLUNION_TAG 12 +#define MLVECTOR_TAG 13 +/* #define MLVOID_TAG not used */ + +/* map from datatype tags to single char descriptor (aka code) */ +char typeMap[] = {MLADDR_CODE, + MLARRAY_CODE, + MLCHAR_CODE, + MLDOUBLE_CODE, + MLFLOAT_CODE, + MLFUNCTION_CODE, + MLINT_CODE, + MLLONG_CODE, + MLPTR_CODE, + MLSHORT_CODE, + MLSTRING_CODE, + MLSTRUCT_CODE, + MLUNION_CODE, + MLVECTOR_CODE, + MLVOID_CODE}; + +/* utility functions */ + +#define CHAR_RANGE 255 /* must agree with CharRange in c-calls.sml */ + +PVT int extractUnsigned(unsigned char **s,int bytes) +{ + int r = 0; + + while (bytes--) + r = r * CHAR_RANGE + (int) *((*s)++) - 1; + return r; +} + + + +/* could (should) use stdlib's strdup instead of this */ +PVT char *mk_strcpy(char *s) +{ + char *p; + + if ((p = (char *) MALLOC(strlen(s)+1)) == NULL) + Die("couldn't make string copy during C call\n"); + return strcpy(p,s); +} + +Word_t *checked_memalign(int n,int align) +{ + Word_t *p; + + if (align < sizeof(Word_t)) + align = sizeof(Word_t); + if ((p = (Word_t *)MALLOC(n)) == NULL) + Die("couldn't alloc memory for C call\n"); + + ASSERT(((Word_t)p & (Word_t)(align-1)) != 0); + + return p; +} + +PVT ml_val_t recAlloc1(ml_state_t *msp,ml_val_t v) +{ + ml_val_t ret; + + REC_ALLOC1(msp,ret,v); + return ret; +} + +PVT ml_val_t mkWord32(ml_state_t *msp, Word_t p) +{ + ML_AllocWrite(msp, 0, MAKE_DESC(sizeof(Word_t), DTAG_string)); + ML_AllocWrite(msp, 1, (ml_val_t)p); + return ML_Alloc(msp, sizeof(Word_t)); +} + +PVT Word_t getWord32(ml_val_t v) +{ + return (Word_t) REC_SEL(v,0); +} + +#define MK_CADDR(msp,p) mkWord32(msp,(Word_t) (p)) +#define GET_CADDR(v) (Word_t *)getWord32(v) + +PVT ml_val_t double_CtoML(ml_state_t *msp,double g) +{ + ml_val_t res; + +#ifdef DEBUG_C_CALLS +SayDebug("double_CtoML: building an ML double %l.15f\n", g); +#endif + /* Force REALD_SZB alignment */ + msp->ml_allocPtr = (ml_val_t *)((Addr_t)(msp->ml_allocPtr) | WORD_SZB); + ML_AllocWrite(msp,0,DESC_reald); + res = ML_Alloc(msp,(sizeof(double)>>2)); + memcpy (res, &g, sizeof(double)); + return res; +} + +/* ptrs to storage alloc'd by the interface. */ +typedef struct ptr_desc { + Word_t *ptr; + struct ptr_desc *next; +} ptrlist_t; + +PVT ptrlist_t *ptrlist = NULL; + +#ifdef DEBUG_C_CALLS +PVT int ptrlist_len() +{ + int i = 0; + ptrlist_t *p = ptrlist; + + while (p != NULL) { + i++; + p = p->next; + } + return i; +} +#endif + + +PVT void keep_ptr(Word_t *p) +{ + ptrlist_t *q = (ptrlist_t *) checked_alloc(sizeof(ptrlist_t)); + +#ifdef DEBUG_C_CALLS + SayDebug("keeping ptr %x, |ptrlist|=%d\n", p, ptrlist_len()); +#endif + + q->ptr = p; + q->next = ptrlist; + ptrlist = q; +} + +PVT void free_ptrlist() +{ + ptrlist_t *p; + +#ifdef DEBUG_C_CALLS + SayDebug("freeing ptr list, |ptrlist|=%d\n",ptrlist_len()); +#endif + p = ptrlist; + while (p != NULL) { + ptrlist = ptrlist->next; + FREE(p->ptr); /* the block */ + FREE(p); /* the block's descriptor */ + p = ptrlist; + } +} + +PVT ml_val_t ptrlist_to_MLlist(ml_state_t *msp) +{ + ml_val_t lp = LIST_nil; + ml_val_t v; + ptrlist_t *p; + +#ifdef DEBUG_C_CALLS + int i = 0; + SayDebug("converting ptrlist (|ptrlist|=%d) to ML list ",ptrlist_len()); +#endif + p = ptrlist; + while (p != NULL) { +#ifdef DEBUG_C_CALLS + i++; +#endif + ptrlist = p->next; + v = MK_CADDR(msp,p->ptr); + LIST_cons(msp, lp, v, lp); + FREE(p); + p = ptrlist; + } +#ifdef DEBUG_C_CALLS + SayDebug("of length %d\n", i); +#endif + return lp; +} + +/* return the number of bytes the ptrlist will occupy in the ML heap */ +PVT int ptrlist_space() +{ + int n = 0; + ptrlist_t *p; + + p = ptrlist; + while (p != NULL) { + p = p->next; + n += CONS_SZB + CADDR_SZB; + } +#ifdef DEBUG_C_CALLS + SayDebug("space for ptrlist is %d, |ptrlist|=%d\n",n,ptrlist_len()); +#endif + return n; +} + +PVT void save_ptrlist(ptrlist_t **save) +{ +#ifdef DEBUG_C_CALLS + SayDebug("saving ptrlist, |ptrlist|=%d\n", ptrlist_len()); +#endif + *save = ptrlist; + ptrlist = NULL; +} + +PVT void restore_ptrlist(ptrlist_t *save) +{ + ptrlist = save; +#ifdef DEBUG_C_CALLS + SayDebug("restoring ptrlist, |ptrlist|=%d\n", ptrlist_len()); +#endif +} + +ml_val_t revMLList(ml_val_t l,ml_val_t r) +{ + if (l == LIST_nil) + return r; + else { + ml_val_t tmp = LIST_tl(l); + + LIST_tl(l) = r; + return revMLList(tmp,l); + } +} + + +#define SMALL_SPACE 0 /* size to 'NeedGC' for a small obj, say <10 words */ + +PVT void spaceCheck(ml_state_t *msp, int bytes, ml_val_t *one_root) +{ + /* assume the ONE_K buffer will absorb descriptors, '\0' terminators */ + if (NeedGC(msp,bytes + ONE_K)) { +#ifdef DEBUG_C_CALLS +SayDebug("spaceCheck: invoking GC\n"); +#endif + InvokeGCWithRoots(msp,0,one_root,NIL(ml_val_t *)); + if (NeedGC(msp,bytes + ONE_K)) + Error("spaceCheck: cannot alloc ML space for ML-C conversion\n"); + } +} + + +/* interface functions */ + +PVT char *too_many_args = "c-calls with more than 15 args not supported\n"; + +/* call_word_fn + * used when the return type fits into a machine word (Word_t) + */ +PVT Word_t call_word_fn(Word_t (*f)(),int n,Word_t *args) +{ + Word_t ret = 0; + + switch(n) { + case 0: + ret = (*f)(); + break; + case 1: + ret = (*f)(args[0]); + break; + case 2: + ret = (*f)(args[0],args[1]); + break; + case 3: + ret = (*f)(args[0],args[1],args[2]); + break; + case 4: + ret = (*f)(args[0],args[1],args[2],args[3]); + break; + case 5: + ret = (*f)(args[0],args[1],args[2],args[3],args[4]); + break; + case 6: + ret = (*f)(args[0],args[1],args[2],args[3],args[4], + args[5]); + break; + case 7: + ret = (*f)(args[0],args[1],args[2],args[3],args[4], + args[5],args[6]); + break; + case 8: + ret = (*f)(args[0],args[1],args[2],args[3],args[4], + args[5],args[6],args[7]); + break; + case 9: + ret = (*f)(args[0],args[1],args[2],args[3],args[4], + args[5],args[6],args[7],args[8]); + break; + case 10: + ret = (*f)(args[0],args[1],args[2],args[3],args[4], + args[5],args[6],args[7],args[8],args[9]); + break; + case 11: + ret = (*f)(args[0],args[1],args[2],args[3],args[4], + args[5],args[6],args[7],args[8],args[9], + args[10]); + break; + case 12: + ret = (*f)(args[0],args[1],args[2],args[3],args[4], + args[5],args[6],args[7],args[8],args[9], + args[10],args[11]); + break; + case 13: + ret = (*f)(args[0],args[1],args[2],args[3],args[4], + args[5],args[6],args[7],args[8],args[9], + args[10],args[11],args[12]); + break; + case 14: + ret = (*f)(args[0],args[1],args[2],args[3],args[4], + args[5],args[6],args[7],args[8],args[9], + args[10],args[11],args[12],args[13]); + break; + case 15: + ret = (*f)(args[0],args[1],args[2],args[3],args[4], + args[5],args[6],args[7],args[8],args[9], + args[10],args[11],args[12],args[13],args[14]); + break; + default: + /* shouldn't happen; ML side assures this */ + Error(too_many_args); + } + +#ifdef DEBUG_C_CALLS + SayDebug("call_word_fn: return=0x%x\n",ret); +#endif + return ret; +} + +/* call_double_fn + */ +PVT double call_double_fn(double (*f)(),int n,Word_t *args) +{ + double ret; + + switch(n) { + case 0: + ret = (*f)(); + break; + case 1: + ret = (*f)(args[0]); + break; + case 2: + ret = (*f)(args[0],args[1]); + break; + case 3: + ret = (*f)(args[0],args[1],args[2]); + break; + case 4: + ret = (*f)(args[0],args[1],args[2],args[3]); + break; + case 5: + ret = (*f)(args[0],args[1],args[2],args[3],args[4]); + break; + case 6: + ret = (*f)(args[0],args[1],args[2],args[3],args[4], + args[5]); + break; + case 7: + ret = (*f)(args[0],args[1],args[2],args[3],args[4], + args[5],args[6]); + break; + case 8: + ret = (*f)(args[0],args[1],args[2],args[3],args[4], + args[5],args[6],args[7]); + break; + case 9: + ret = (*f)(args[0],args[1],args[2],args[3],args[4], + args[5],args[6],args[7],args[8]); + break; + case 10: + ret = (*f)(args[0],args[1],args[2],args[3],args[4], + args[5],args[6],args[7],args[8],args[9]); + break; + default: + /* shouldn't happen; ML side assures this */ + Error(too_many_args); + } + return ret; +} + +/* call_float_fn + */ +PVT float call_float_fn(float (*f)(),int n,Word_t *args) +{ + float ret; + + switch(n) { + case 0: + ret = (*f)(); + break; + case 1: + ret = (*f)(args[0]); + break; + case 2: + ret = (*f)(args[0],args[1]); + break; + case 3: + ret = (*f)(args[0],args[1],args[2]); + break; + case 4: + ret = (*f)(args[0],args[1],args[2],args[3]); + break; + case 5: + ret = (*f)(args[0],args[1],args[2],args[3],args[4]); + break; + case 6: + ret = (*f)(args[0],args[1],args[2],args[3],args[4], + args[5]); + break; + case 7: + ret = (*f)(args[0],args[1],args[2],args[3],args[4], + args[5],args[6]); + break; + case 8: + ret = (*f)(args[0],args[1],args[2],args[3],args[4], + args[5],args[6],args[7]); + break; + case 9: + ret = (*f)(args[0],args[1],args[2],args[3],args[4], + args[5],args[6],args[7],args[8]); + break; + case 10: + ret = (*f)(args[0],args[1],args[2],args[3],args[4], + args[5],args[6],args[7],args[8],args[9]); + break; + default: + /* shouldn't happen; ML side assures this */ + Error(too_many_args); + } + return ret; +} + +/* error handling */ + +#define NO_ERR 0 +#define ERR_TYPEMISMATCH 1 +#define ERR_EMPTY_AGGREGATE 2 +#define ERR_SZ_MISMATCH 3 +#define ERR_WRONG_ARG_COUNT 4 +#define ERR_TOO_MANY_ARGS 5 + +PVT char *errtbl[] = { + "no error", + "type mismatch", + "empty aggregate", + "array/vector size does not match registered size", + "wrong number of args in C call", + "current max of 10 args to C fn", + }; + +PVT char errbuf[100]; + +PVT ml_val_t RaiseError(ml_state_t *msp,int err) +{ + sprintf(errbuf,"SML/NJ-C-Interface: %s",errtbl[err]); + return RAISE_ERROR(msp, errbuf); +} + + +/* char *nextdatum(char *t) + * + * must match typeToCtl in c-calls.sml + */ +PVT char *nextdatum(char *t) +{ + int level = 0; + + do { + switch(*t) { + case MLFUNCTION_CODE: { + int nargs, i; + + t++; /* skip code */ + nargs = extractUnsigned((unsigned char **)&t,1); + /* skip arg types AND return type */ + for (i = 0; i < nargs+1; i++) { + t = nextdatum(t); + } + } + break; + case MLPTR_CODE: + /* can fall through as long as Cptr has 4 bytes of sz info */ + case MLARRAY_CODE: + case MLVECTOR_CODE: + t = nextdatum(t+5); /* skip 4 bytes of sz info & code */ + break; + case MLOPENUNION_CODE: + t++; /* skip 1 byte of sz info ; fall through */ + case MLOPENSTRUCT_CODE: + t++; /* skip code */ + level++; + break; + case MLCLOSEUNION_CODE: + case MLCLOSESTRUCT_CODE: + t++; /* skip code */ + level--; + break; + case MLINT_CODE: + case MLSHORT_CODE: + case MLLONG_CODE: + /* skip 1 byte of size; fall through */ + t++; + default: + t++; /* skip simple type */ + break; + } + } while (level); + return t; +} + +PVT void mkCint(Word_t src,Word_t **dst,int bytes) +{ +#ifdef DEBUG_C_CALLS + SayDebug("mkCint: placing integer into %d bytes at %x\n", bytes, *dst); +#endif + +#ifdef BYTE_ORDER_BIG + src <<= (sizeof(Word_t) - bytes)*8; +#endif + memcpy (*dst, &src, bytes); + (*(Byte_t **)dst) += bytes; +} + +PVT void mkMLint(Word_t **src,Word_t *dst,int bytes) +{ +#ifdef DEBUG_C_CALLS + SayDebug("mkMLint: reading integer from %x into %d bytes\n", *src, bytes); +#endif + + memcpy (dst, *src, bytes); +#ifdef BYTE_ORDER_BIG + *dst >>= (sizeof(Word_t) - bytes)*8; +#endif + *(Byte_t **)src += bytes; +} + + +#define DO_PAD(p,t) (*(Byte_t **)(p) += extractUnsigned((unsigned char **)(t),1)) +#define IF_PAD_DO_PAD(p,t) {if (**t == MLPAD_CODE) {++(*t); DO_PAD(p,t);}} + +int datumMLtoC(ml_state_t *msp,char **t,Word_t **p,ml_val_t datum) +{ + int tag = REC_SELINT(datum,0); + ml_val_t val = REC_SEL(datum,1); + int err = NO_ERR; + int sz = 0; + + while (**t == MLPAD_CODE) { + ++(*t); /* advance past code */ +#ifdef DEBUG_C_CALLS + SayDebug("datumMLtoC: adding pad from %x ", *p); +#endif + DO_PAD(p,t); +#ifdef DEBUG_C_CALLS + SayDebug(" to %x\n", *p); +#endif + } + if (typeMap[tag] != **t) { +#ifdef DEBUG_C_CALLS + SayDebug("datumMLtoC: type mismatch %c != %d\n",**t,tag); +#endif + return ERR_TYPEMISMATCH; + } + switch(*(*t)++) { + case MLFUNCTION_CODE: { + char *argtypes[N_ARGS], *rettype; + char *this_arg, *next_arg; + int nargs, len, i; + + nargs = extractUnsigned((unsigned char **)t,1); +#ifdef DEBUG_C_CALLS + SayDebug("datumMLtoC: function with %d args\n", nargs); +#endif + this_arg = *t; + for (i = 0; i < nargs; i++) { + next_arg = nextdatum(this_arg); + len = next_arg - this_arg; + argtypes[i] = (char *)checked_alloc(len+1); /* len plus null */ + strncpy(argtypes[i],this_arg,len); + argtypes[i][len] = '\0'; + this_arg = next_arg; +#ifdef DEBUG_C_CALLS + SayDebug("datumMLtoC: function arg[%d] is \"%s\"\n", + i,argtypes[i]); +#endif + } + /* get the return type */ + next_arg = nextdatum(this_arg); + len = next_arg - this_arg; + rettype = (char *)checked_alloc(len+1); /* len plus null */ + strncpy(rettype,this_arg,len); + rettype[len] = '\0'; +#ifdef DEBUG_C_CALLS + SayDebug("datumMLtoC: function returns \"%s\"\n", + rettype); +#endif + *t = next_arg; + *(*p)++ = mk_C_function(msp,val,nargs,argtypes,rettype); +#ifdef DEBUG_C_CALLS + SayDebug("datumMLtoC: made C function\n"); +#endif + } + break; + case MLPTR_CODE: { + int szb, align; + Word_t *q; + + szb = extractUnsigned((unsigned char **)t,4); + align = extractUnsigned((unsigned char **)t,1); +#ifdef DEBUG_C_CALLS + SayDebug("datumMLtoC: ptr szb=%d, align=%d\n", szb, align); +#endif + q = checked_memalign(szb,align); + keep_ptr(q); + *(*p)++ = (Word_t) q; +#ifdef DEBUG_C_CALLS + SayDebug("datumMLtoC: ptr substructure at %x\n", q); +#endif + if (err = datumMLtoC(msp,t,&q,val)) + return err; + } + break; + case MLCHAR_CODE: + *(*(Byte_t **)p)++ = (Byte_t) INT_MLtoC(val); + break; + case MLFLOAT_CODE: + sz = sizeof(float); + /* fall through */ + case MLDOUBLE_CODE: { + double g; + + if (!sz) { + /* came in through MLDOUBLE_CODE */ + sz = sizeof(double); + } + memcpy (&g, (Word_t *)val, sizeof(double)); +#ifdef DEBUG_C_CALLS +SayDebug("datumMLtoC: ML real %l.15f:%l.15f %.15f\n", *(double *)val, g, (float) g); +#endif + if (sz == sizeof(float)) + *(*(float **)p)++ = (float) g; + else + *(*(double **)p)++ = g; + } + break; + case MLINT_CODE: + case MLSHORT_CODE: + case MLLONG_CODE: +#ifdef DEBUG_C_CALLS + SayDebug("datumMLtoC: integer %d\n", getWord32(val)); +#endif + mkCint(getWord32(val),p,extractUnsigned((unsigned char **)t,1)); + break; + case MLADDR_CODE: +#ifdef DEBUG_C_CALLS + SayDebug("datumMLtoC: addr %x\n", GET_CADDR(val)); +#endif + *(*p)++ = (Word_t) GET_CADDR(val); + break; + case MLSTRING_CODE: { + char *r, *s; + + s = PTR_MLtoC(char,val); +#ifdef DEBUG_C_CALLS + SayDebug("datumMLtoC: string \"%s\"\n",s); +#endif + r = (char *) checked_alloc(strlen(s)+1); + strcpy(r,s); + keep_ptr((Word_t *) r); + *(*p)++ = (Word_t) r; +#ifdef DEBUG_C_CALLS + SayDebug("datumMLtoC: copied string \"%s\"=%x\n",r,r); +#endif + } + break; + case MLOPENSTRUCT_CODE: { + ml_val_t lp = val; + ml_val_t hd; + +#ifdef DEBUG_C_CALLS + SayDebug("datumMLtoC: struct\n"); +#endif + while (**t != MLCLOSESTRUCT_CODE) { + hd = LIST_hd(lp); + if (err = datumMLtoC(msp,t,p,hd)) + return err; + lp = LIST_tl(lp); + IF_PAD_DO_PAD(p,t); + } + (*t)++; /* advance past MLCLOSESTRUCT_CODE */ + } + break; + case MLOPENUNION_CODE: { + Byte_t *init_p = (Byte_t *) *p; + char *next_try; + + sz = extractUnsigned((unsigned char **)t,1); +#ifdef DEBUG_C_CALLS + SayDebug("datumMLtoC: union of size %d\n", sz); +#endif + if ((**t) == MLCLOSEUNION_CODE) + return ERR_EMPTY_AGGREGATE; + next_try = nextdatum(*t); + /* try union types until one matches or all fail */ + while ((err = datumMLtoC(msp,t,p,val)) == ERR_TYPEMISMATCH) { + *t = next_try; + if ((**t) == MLCLOSEUNION_CODE) { + err = ERR_TYPEMISMATCH; + break; + } + next_try = nextdatum(*t); + *p = (Word_t *) init_p; + } + if (err) + return err; + while (**t != MLCLOSEUNION_CODE) + *t = nextdatum(*t); + (*t)++; /* advance past MLCLOSEUNION_CODE */ + *p = (Word_t *) (init_p + sz); + } + break; + case MLARRAY_CODE: + case MLVECTOR_CODE: { + int nelems,elem_sz, i; + char *saved_t; + + nelems = extractUnsigned((unsigned char **)t,2); + elem_sz = extractUnsigned((unsigned char **)t,2); +#ifdef DEBUG_C_CALLS + SayDebug("datumMLtoC: array/vector of %d elems of size %d\n", + nelems, elem_sz); +#endif + i = sz = OBJ_LEN(val); +#ifdef DEBUG_C_CALLS + SayDebug("datumMLtoC: array/vector size is %d\n", sz); +#endif + + if (sz != nelems) + return ERR_SZ_MISMATCH; + saved_t = *t; + while (!err && i--) { + *t = saved_t; + err = datumMLtoC(msp,t,p,*(ml_val_t *)val++); + } + if (err) + return err; + } + break; + case MLCLOSESTRUCT_CODE: + case MLCLOSEUNION_CODE: + return ERR_EMPTY_AGGREGATE; + break; + default: + Die("datumMLtoC: cannot yet handle type\n"); + } + return err; +} + +/* ML entry point for 'datumMLtoC' */ +ml_val_t ml_datumMLtoC(ml_state_t *msp, ml_val_t arg) +{ + /* no GCs can occur since no allocation on ML heap */ + /* guaranteed that datum is a pointer (Cptr or Cstring) */ + char *type = REC_SELPTR(char,arg,0); + ml_val_t datum = REC_SEL(arg,1); + int err = 0; + Word_t p, *q = &p; + ml_val_t lp, ret; + ptrlist_t *saved_pl; + + save_ptrlist(&saved_pl); + err = datumMLtoC(msp,&type,&q,datum); + if (err) { + free_ptrlist(); + restore_ptrlist(saved_pl); + return RaiseError(msp,err); + } + /* return (result,list of pointers to alloc'd C objects) */ + spaceCheck(msp,ptrlist_space(),&dummyRoot); + lp = ptrlist_to_MLlist(msp); /* this frees the ptr descriptors */ + restore_ptrlist(saved_pl); + ret = MK_CADDR(msp,(Word_t *)p); + REC_ALLOC2(msp, ret, ret, lp); + return ret; +} + +PVT ml_val_t word_CtoML(ml_state_t *msp,char **t,Word_t **p, ml_val_t *root) +{ + ml_val_t ret = ML_unit; + ml_val_t mlval = ML_unit; + int tag; + char code; + + switch(code = *(*t)++) { + case MLPAD_CODE: +#ifdef DEBUG_C_CALLS + SayDebug("word_CtoML: skipping pad %x ", *p); +#endif + DO_PAD(p,t); +#ifdef DEBUG_C_CALLS + SayDebug(" to %x\n", *p); +#endif + return word_CtoML(msp,t,p,root); + case MLVOID_CODE: + return NULLARY_DATACON; + case MLCHAR_CODE: + tag = MLCHAR_TAG; + mlval = INT_CtoML(**(Byte_t **)p); + (*(Byte_t **)p)++; + break; + case MLPTR_CODE: { + Word_t q; +#ifdef DEBUG_C_CALLS + SayDebug("word_CtoML: ptr %x\n", **(Word_t ****)p); +#endif + tag = MLPTR_TAG; +#ifdef DEBUG_C_CALLS + SayDebug("word_CtoML: size is %d\n", + extractUnsigned((unsigned char **)t,4)); + SayDebug("word_CtoML: align is %d\n", + extractUnsigned((unsigned char **)t,1)); +#else + *t += 5; /* 5 bytes of size */ +#endif + q = **p; + mlval = word_CtoML(msp,t,(Word_t **) &q,root); + (*p)++; + } + break; + case MLINT_CODE: + tag = MLINT_TAG; + goto handle_int; + case MLSHORT_CODE: + tag = MLSHORT_TAG; + goto handle_int; + case MLLONG_CODE: + tag = MLLONG_TAG; +handle_int: + { + Word_t w; + + mkMLint(p,&w,extractUnsigned((unsigned char **)t,1)); + mlval = mkWord32(msp,w); + } + break; + case MLADDR_CODE: { + Word_t *cp = ** (Word_t ***) p; + +#ifdef DEBUG_C_CALLS + SayDebug("word_CtoML: C addr %x\n", cp); +#endif + tag = MLADDR_TAG; + mlval = MK_CADDR(msp,cp); + (*p)++; + } + break; + case MLFLOAT_CODE: { + /* C floats become ML reals, which are doubles... */ + tag = MLFLOAT_TAG; + mlval = double_CtoML(msp,(double) *(*(float **)p)++); +#ifdef DEBUG_C_CALLS + SayDebug("word_CtoML: made float %l.15f\n", *(double*)mlval); +#endif + } + break; + case MLDOUBLE_CODE: { + tag = MLDOUBLE_TAG; + mlval = double_CtoML(msp,*(*(double **)p)++); +#ifdef DEBUG_C_CALLS + SayDebug("word_CtoML: made double %l.15f\n", *(double*)mlval); +#endif + } + break; + case MLSTRING_CODE: +#ifdef DEBUG_C_CALLS + SayDebug("word_CtoML: string \"%s\"\n", (char *)**p); +#endif + tag = MLSTRING_TAG; + spaceCheck(msp,strlen((char *)**p),root); + mlval = ML_CString(msp,(char *) **p); + (*p)++; + break; + case MLOPENSTRUCT_CODE: { + ml_val_t local_root; + + tag = MLSTRUCT_TAG; + mlval = LIST_nil; + +#ifdef DEBUG_C_CALLS + SayDebug("word_CtoML: open struct\n"); +#endif + while (**t != MLCLOSESTRUCT_CODE) { + LIST_cons(msp,local_root,mlval,*root); + ret = word_CtoML(msp,t,p,&local_root); + mlval = LIST_hd(local_root); + *root = LIST_tl(local_root); + LIST_cons(msp,mlval,ret,mlval); + IF_PAD_DO_PAD(p,t); + } + (*t)++; /* advance past MLCLOSESTRUCT_CODE */ + mlval = revMLList(mlval,LIST_nil); + } + break; + case MLCLOSESTRUCT_CODE: + Die("word_CtoML: found lone MLCLOSESTRUCT_CODE"); + case MLARRAY_CODE: + case MLVECTOR_CODE: { + int szb; + char *saved_t; + ml_val_t res,local_root; + int n,i; + Word_t dtag; + + tag = (code == MLARRAY_CODE) ? MLARRAY_TAG : MLVECTOR_TAG; + dtag = (code == MLARRAY_CODE) ? DTAG_array : DTAG_vector; + n = extractUnsigned((unsigned char **)t,2); /* number of elements */ + szb = extractUnsigned((unsigned char **)t,2);/* element sz (bytes)*/ +#ifdef DEBUG_C_CALLS + SayDebug("word_CtoML: array/vector with %d elems of size %d\n", n, szb); +#endif + saved_t = *t; + spaceCheck(msp,szb*n,root); + /* ML_AllocArray isn't used here since it might call GC */ + ML_AllocWrite (msp, 0, MAKE_DESC(n,dtag)); + mlval = ML_Alloc (msp, n); + /* clear the array/vector so that it can be GC'd if necessary */ + for (i = 0; i < n; i++) { + PTR_MLtoC(ml_val_t,mlval)[i] = ML_unit; + } + for (i = 0; i < n; i++) { + *t = saved_t; + LIST_cons(msp,local_root,mlval,*root); + res = word_CtoML(msp,t,p,&local_root); + mlval = LIST_hd(local_root); + *root = LIST_tl(local_root); + PTR_MLtoC(ml_val_t,mlval)[i] = res; + } + } + break; + default: +#ifdef DEBUG_C_CALLS + SayDebug("word_CtoML: bad type is '%c'\n", *(*t-1)); +#endif + Die("word_CtoML: cannot yet handle type\n"); + } + REC_ALLOC2(msp,ret,INT_CtoML(tag),mlval); + return ret; +} + +/* static c-calls-fns.c needs to see this */ +ml_val_t datumCtoML(ml_state_t *msp, char *type, Word_t p, ml_val_t *root) +{ + ml_val_t ret; + +#ifdef DEBUG_C_CALLS + SayDebug("datumCtoML: C address is %x\n", p); +#endif + +#ifdef DEBUG_C_CALLS + SayDebug("datumCtoML: type is %s\n", type); +#endif + + switch (*type) { + case MLDOUBLE_CODE: + ret = double_CtoML(msp, *(double *)p); + REC_ALLOC2(msp,ret,INT_CtoML(MLDOUBLE_TAG),ret); + break; + case MLFLOAT_CODE: + ret = double_CtoML(msp, (double) (*(float *)p)); + REC_ALLOC2(msp,ret,INT_CtoML(MLFLOAT_TAG),ret); + break; + default: { + Word_t *q = &p; + ret = word_CtoML(msp,&type,&q,root); + } + break; + } +#ifdef DEBUG_C_CALLS + SayDebug("datumCtoML: returning\n"); +#endif + return ret; +} + + +/* ML entry point for 'datumCtoML' */ +ml_val_t ml_datumCtoML(ml_state_t *msp, ml_val_t arg) +{ + /* make copies of things that GC may move */ + char *type = mk_strcpy(REC_SELPTR(char,arg,0)); + Word_t *caddr = GET_CADDR(REC_SEL(arg,1)); + ml_val_t ret; + + ret = datumCtoML(msp,type,(Word_t) caddr,&arg); + FREE(type); + return ret; +} + + +/* ML entry point for 'c_call' */ +ml_val_t ml_c_call(ml_state_t *msp, ml_val_t arg) +{ +#if !defined(INDIRECT_CFUNC) + Word_t (*f)() = (Word_t (*)()) + REC_SELPTR(Word_t,arg,0); +#else + Word_t (*f)() = (Word_t (*)()) + ((cfunc_binding_t *)REC_SELPTR(Word_t,arg,0))->cfunc; +#endif + int n_cargs = REC_SELINT(arg,1); + ml_val_t carg_types = REC_SEL(arg,2); /* string list */ + char *cret_type = REC_SELPTR(char,arg,3); + ml_val_t cargs = REC_SEL(arg,4); /* cdata list */ + bool_t auto_free = REC_SELINT(arg,5); + ptrlist_t *saved_pl; + + ml_val_t p,q; + ml_val_t ret; + int i; + Word_t vals[N_ARGS]; + Word_t w; + int err = NO_ERR; + + if (n_cargs > N_ARGS) /* shouldn't see this; ML side insures this */ + return RaiseError(msp,ERR_TOO_MANY_ARGS); + + /* save the ptrlist since C can call ML can call C ... */ + save_ptrlist(&saved_pl); + + p = carg_types; + q = cargs; + i = 0; + while (p != LIST_nil && q != LIST_nil) { + char *carg_type = PTR_MLtoC(char,LIST_hd(p)); + Word_t *vp; + +#ifdef DEBUG_C_CALLS + SayDebug("ml_c_call: arg %d:\"%s\"\n",i,carg_type); +#endif + + vp = &vals[i]; + if (err = datumMLtoC(msp,&carg_type,&vp,LIST_hd(q))) + break; + i++; + p = LIST_tl(p); + q = LIST_tl(q); + } +#ifdef DEBUG_C_CALLS + SayDebug("ml_c_call: rettype is \"%s\"\n", cret_type); +#endif + + /* within ml_c_call, no ML allocation occurs above this point */ + + if (!err && (i != n_cargs)) + err = ERR_WRONG_ARG_COUNT; + if (err) { + free_ptrlist(); + restore_ptrlist(saved_pl); + return RaiseError(msp,err); + } +#ifdef DEBUG_C_CALLS + SayDebug("ml_c_call: calling C function at %x\n", f); +#endif + + /* expose msp so C has access to it */ + visible_msp = msp; + switch (*cret_type) { + case MLDOUBLE_CODE: + ret = double_CtoML(msp,call_double_fn((double (*)())f,n_cargs,vals)); + REC_ALLOC2(msp,ret,INT_CtoML(MLDOUBLE_TAG),ret); + break; + case MLFLOAT_CODE: + ret = double_CtoML(msp, + (double) call_float_fn((float(*)())f,n_cargs,vals)); + REC_ALLOC2(msp,ret,INT_CtoML(MLFLOAT_TAG),ret); + break; + case MLCHAR_CODE: { + Byte_t b = (Byte_t) call_word_fn(f,n_cargs,vals); + Byte_t *bp = &b; + + ret = word_CtoML(msp,&cret_type,(Word_t **)&bp,&dummyRoot); + } + break; + default: { + Word_t w = call_word_fn(f,n_cargs,vals); + Word_t *wp = &w; + + ret = word_CtoML(msp,&cret_type,&wp,&dummyRoot); + } + } +#ifdef DEBUG_C_CALLS + SayDebug("ml_c_call: returned from C function\n"); +#endif + +#ifdef DEBUG_C_CALLS + SayDebug("ml_c_call: auto_free is %d\n",auto_free); +#endif + + /* setup the return value, always a pair */ + { + ml_val_t lp = LIST_nil; + + if (auto_free) { +#ifdef DEBUG_C_CALLS + SayDebug("ml_c_call: performing auto-free\n"); +#endif + + free_ptrlist(); + } else { + /* return (result,list of pointers to alloc'd C objects) */ +#ifdef DEBUG_C_CALLS + SayDebug("ml_c_call: returning list of caddrs\n"); +#endif + spaceCheck(msp,ptrlist_space(),&ret); + lp = ptrlist_to_MLlist(msp); /* this frees the ptr descriptors */ + } + REC_ALLOC2(msp, ret, ret, lp); + } + restore_ptrlist(saved_pl); /* restore the previous ptrlist */ + return ret; +} + + +/* end of c-calls.c */ diff --git a/base/runtime/c-libs/smlnj-ccalls/c-calls.h b/base/runtime/c-libs/smlnj-ccalls/c-calls.h new file mode 100644 index 0000000..e90ad38 --- /dev/null +++ b/base/runtime/c-libs/smlnj-ccalls/c-calls.h @@ -0,0 +1,42 @@ +/* c-calls.h + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + * + */ + +#ifndef _C_CALLS_ +#define _C_CALLS_ + +#define N_ARGS 15 /* max number of args a ML callable C function may have */ + +#ifndef _ASM_ + +#include "ml-sizes.h" + +/* malloc's should return sufficiently aligned blocks */ +#define HAS_ALIGNED_MALLOC +#if defined(HAS_ALIGNED_MALLOC) +#include +#include + +#define memalign(align,sz) malloc(sz) +#endif + +#include + +extern Word_t *checked_memalign(int n,int align); +#define checked_alloc(n) checked_memalign((n),(1)) + +extern Word_t mk_C_function(ml_state_t *msp, + ml_val_t f, + int nargs,char *argtypes[],char *rettype); + +extern ml_val_t datumCtoML(ml_state_t *msp,char *type,Word_t p,ml_val_t *root); +extern int datumMLtoC(ml_state_t *msp,char **t,Word_t **p,ml_val_t ret); +extern ml_val_t revMLList(ml_val_t l,ml_val_t acc); + +extern ml_state_t *visible_msp; +#endif + +#endif /* !_C_CALLS_ */ + diff --git a/base/runtime/c-libs/smlnj-ccalls/c-entry.asm b/base/runtime/c-libs/smlnj-ccalls/c-entry.asm new file mode 100644 index 0000000..f8d9355 --- /dev/null +++ b/base/runtime/c-libs/smlnj-ccalls/c-entry.asm @@ -0,0 +1,195 @@ +/*! \file c-entry.asm + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "asm-base.h" + +#if defined(ARCH_X86) +#define CALL_BIAS 5 +#define cresult %eax + +#elif defined(ARCH_MIPS) + +#define CALL_BIAS 20 + +#define cresult $2 +#define carg0 $4 +#define carg1 $5 +#define carg2 $6 +#define carg3 $7 +/* #define ra $31 */ + +#define tmp0 $3 +#define tmp1 $8 + +#elif defined(ARCH_SPARC) + +#define CALL_BIAS 8 +#define DELAY nop + +#endif + +/* offsets must match the C declaration of code_header_t */ +#define NARGS_OFFSET -4 + +/* grabPC: + * routine to return the PC at entry to this function + * + * NOTE: this code must relocatable using bcopy. + */ + +#ifdef OPSYS_WIN32 + +ENTRY_M MACRO name + PUBLIC &name + &name LABEL FAR + EVEN +ENDM + + .386 + .MODEL FLAT + + + EXTRN _last_entry:DWORD + EXTRN _no_args_entry:FAR + EXTRN _some_args_entry:FAR + + TEXT + EVEN + +#else + + .text + + .align 2 + .globl CSYM(grabPC) + .globl CSYM(grabPCend) + +#endif + +#if defined(ARCH_X86) +#if defined(OPSYS_LINUX) +CSYM(grabPC): +/*->*/ call grabPCaux /* put pc in %eax */ + subl $CALL_BIAS,%eax /* adjust pc to point at "->" */ + lea CSYM(last_entry),%ecx /* save it */ + movl %eax,(%ecx) + cmpl $0,NARGS_OFFSET(%eax) + jne some_args + lea CSYM(no_args_entry),%ecx + jmp %ecx + /* should never get here */ +some_args: + lea CSYM(some_args_entry),%ecx + jmp %ecx + /* should never get here */ + +/* WARNING: this is x86-linux assembler specific! + * Above call must be relative. + */ +grabPCaux: + pop %eax /* grab return address */ + push %eax /* put it back */ + ret +CSYM(grabPCend): + nop +#elif defined(OPSYS_WIN32) + PUBLIC CSYM(grabPCend) + PUBLIC CSYM(grabPC) +CSYM(grabPC) LABEL FAR +/*->*/ call grabPCaux /* put pc in %eax */ + sub eax,CALL_BIAS /* adjust pc to point at "->" */ + lea ecx,CSYM(last_entry) /* save it */ + mov dword ptr 0 [ecx],eax + cmp dword ptr (NARGS_OFFSET) [eax],0 + jne some_args + lea ecx,CSYM(no_args_entry) + jmp ecx + /* should never get here */ +some_args: + lea ecx,CSYM(some_args_entry) + jmp ecx + /* should never get here */ + +grabPCaux: + pop eax /* grab return address */ + push eax /* put it back */ + ret +CSYM(grabPCend) LABEL FAR + + DATA + PUBLIC CSYM(asm_entry_szb) +CSYM(asm_entry_szb) DWORD CSYM(grabPCend) - CSYM(grabPC) + +#else +#error unknown x86 opsys +#endif +#elif defined(ARCH_MIPS) + +grabPCaux: + j tmp1 + + .align 2 + .ent grabPC 2 +CSYM(grabPC): +/*->*/ la tmp0,grabPCaux /* load address of grabPCaux */ + jalr tmp1,tmp0 /* call it, putting pc in tmp1 */ + subu tmp1,CALL_BIAS /* adjust pc to point at "->" */ + la tmp0,CSYM(last_entry) /* save it */ + sw tmp1,0(tmp0) + lw tmp0,NARGS_OFFSET(tmp1) + bnez tmp0,some_args + la t9,CSYM(no_args_entry) + j t9 /* call C, must use t9 here */ + /* should never get here */ +some_args: + la t9,CSYM(some_args_entry) + j t9 /* call C, must use t9 here */ + /* should never get here */ +CSYM(grabPCend): + +#elif defined(ARCH_SPARC) + .align 4 +CSYM(grabPC): +/*->*/ st %o0,[%sp-4] /* get some temps */ + mov %o7,%g1 /* save ret addr in %g1 */ + call grabPCaux /* call leaves pc in %o7 */ + DELAY + mov %o7,%o0 /* restore ret addr */ + mov %g1,%o7 + sub %o0,CALL_BIAS,%o0 /* unbias saved pc */ + set CSYM(last_entry),%g1 /* store it */ + st %o0,[%g1] + ld [%o0+NARGS_OFFSET],%o0 /* get # of args */ + tst %o0 + ld [%sp-4],%o0 /* relinquish temps */ + bnz some_args + nop + set CSYM(no_args_entry),%g1 + jmp %g1 + nop + /* should never get here */ +some_args: + set CSYM(some_args_entry),%g1 + jmp %g1 + nop + /* should never get here */ + +grabPCaux: + /* return address is in %o7 */ + retl + DELAY +CSYM(grabPCend): +#else +#error unknown target arch +#endif + +#ifdef OPSYS_WIN32 + END +#elif !defined(ARCH_SPARC) + .end +#endif + +/* end of c-entry.asm */ diff --git a/base/runtime/c-libs/smlnj-ccalls/cfun-list.h b/base/runtime/c-libs/smlnj-ccalls/cfun-list.h new file mode 100644 index 0000000..d20aff3 --- /dev/null +++ b/base/runtime/c-libs/smlnj-ccalls/cfun-list.h @@ -0,0 +1,19 @@ +/* cfun-list.h + * + * COPYRIGHT (c) 1994 AT&T Bell Laboratories. + * + * This file lists the directory library of C functions that are callable by ML. + */ + +#ifndef CLIB_NAME +#define CLIB_NAME "SMLNJ-CCalls" +#define CLIB_VERSION "0.0" +#define CLIB_DATE "March 3, 1995" +#endif + +CFUNC("c_call", ml_c_call, "") +CFUNC("datumMLtoC", ml_datumMLtoC, "") +CFUNC("datumCtoML", ml_datumCtoML, "") + +#include "cutil-cfuns.h" + diff --git a/base/runtime/c-libs/smlnj-ccalls/cfun-proto-list.h b/base/runtime/c-libs/smlnj-ccalls/cfun-proto-list.h new file mode 100644 index 0000000..f3bdf99 --- /dev/null +++ b/base/runtime/c-libs/smlnj-ccalls/cfun-proto-list.h @@ -0,0 +1,25 @@ +/* cfun-proto-list.h + * + * COPYRIGHT (c) 1994 AT&T Bell Laboratories. + */ + +#ifndef _CFUN_PROTO_LIST_ +#define _CFUN_PROTO_LIST_ + +#ifndef _C_LIBRARY_ +# include "c-library.h" +#endif + + +#define C_CALLS_CFUNC_PROTO(NAME, FUNC, CTYPE, CARGS) \ + extern CTYPE FUNC CARGS; + +/* the external definitions for the C functions */ +#define C_CALLS_CFUNC(NAME, FUNC, CTYPE, CARGS) \ + C_CALLS_CFUNC_PROTO(NAME,FUNC,CTYPE,CARGS) +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_PROTO(NAME, FUNC, MLTYPE) +#include "cfun-list.h" +#undef CFUNC +#undef C_CALLS_CFUNC + +#endif /* !_CFUN_PROTO_LIST_ */ diff --git a/base/runtime/c-libs/smlnj-ccalls/cutil-cfuns.h b/base/runtime/c-libs/smlnj-ccalls/cutil-cfuns.h new file mode 100644 index 0000000..255d856 --- /dev/null +++ b/base/runtime/c-libs/smlnj-ccalls/cutil-cfuns.h @@ -0,0 +1,14 @@ +/* cutil-cfuns.h + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + * + * Declarations for some useful user-level C functions. + * + * This file is included by cfun-list.h; it should only have C_CALLS_CFUNCs. + */ + +C_CALLS_CFUNC("ptos", ptos, char *, (void *)) +C_CALLS_CFUNC("ptoi", ptoi, int, (void *)) +C_CALLS_CFUNC("free", _FREE, void, (void *)) + +/* end of cutil-cfuns.h */ diff --git a/base/runtime/c-libs/smlnj-ccalls/cutil.c b/base/runtime/c-libs/smlnj-ccalls/cutil.c new file mode 100644 index 0000000..3dacb5c --- /dev/null +++ b/base/runtime/c-libs/smlnj-ccalls/cutil.c @@ -0,0 +1,27 @@ +/* cutil.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + * + * Some useful user-level C functions. + * Declared and registered on the C side by cutil-cfuns.h + */ + +char *ptos(void *p) +{ +#ifdef DEBUG_C_CALLS + printf("in ptos with string \"%s\"",(char *)p); +#endif + return (char *) p; +} + +int ptoi(int *p) +{ + int i; + + /* p may not be pointing to an aligned int, hence the memcpy */ + memcpy (&i, p, sizeof(int)); + return i; +} + + +/* end of cutil.c */ diff --git a/base/runtime/c-libs/smlnj-ccalls/makefile b/base/runtime/c-libs/smlnj-ccalls/makefile new file mode 100644 index 0000000..d77ff51 --- /dev/null +++ b/base/runtime/c-libs/smlnj-ccalls/makefile @@ -0,0 +1,58 @@ +# +# makefile for the C library that provides ML-callable C functions +# + +SHELL = /bin/sh + +INC_DIR = ../../include +CLIB_DIR = ../ + +INCLUDES = -I$(INC_DIR) -I$(CLIB_DIR) -I../../objs + +MAKE = make +AR = ar +ARFLAGS = rcv +RANLIB = ranlib + +LIBRARY = libsmlnj-ccalls.a + +VERSION = v-dummy + + + +OBJS = c-calls-lib.o \ + c-calls.o \ + c-calls-fns.o \ + c-entry.o \ + cutil.o + +C_CFUNS = cutil-cfuns.h + +$(LIBRARY) : $(VERSION) $(OBJS) + rm -rf $(LIBRARY) + $(AR) $(ARFLAGS) $(LIBRARY) $(OBJS) + $(RANLIB) $(LIBRARY) + +$(VERSION) : + ($(MAKE) MAKE="$(MAKE)" clean) + echo "$(VERSION)" > $(VERSION) + +c-calls-lib.o: $(INC_DIR)/ml-osdep.h $(INC_DIR)/ml-base.h \ + $(INC_DIR)/ml-values.h \ + $(CLIB_DIR)/ml-c.h cfun-proto-list.h cfun-list.h $(C_CFUNS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) $(LOCAL_INCLUDES) -c c-calls-lib.c + +c-entry.o: c-entry.asm c-calls.h + $(CPP) -D_ASM_ $(DEFS) $(INCLUDES) c-entry.asm > c-entry.s + $(AS) -o c-entry.o c-entry.s + +.c.o: $(INC_DIR)/ml-osdep.h $(INC_DIR)/ml-base.h $(INC_DIR)/ml-values.h \ + $(CLIB_DIR)/ml-c.h \ + $(INC_DIR)/cache-flush.h \ + c-calls.h \ + cfun-proto-list.h cfun-list.h + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) $(LOCAL_INCLUDES) -c $< + +clean : + rm -f v-* *.o *.s $(LIBRARY) + diff --git a/base/runtime/c-libs/smlnj-ccalls/makefile.win32 b/base/runtime/c-libs/smlnj-ccalls/makefile.win32 new file mode 100644 index 0000000..fe8f9c5 --- /dev/null +++ b/base/runtime/c-libs/smlnj-ccalls/makefile.win32 @@ -0,0 +1,66 @@ +# +# makefile for the C library that provides ML-callable C functions +# win32 specific + +SHELL = + +INC_DIR = ..\..\include +CLIB_DIR = ..\ + +INCLUDES = /I$(INC_DIR) /I$(CLIB_DIR) /I..\..\objs + +MAKEFILE = makefile.win32 +MAKE = nmake /F$(MAKEFILE) +AS = ml /Cx /Zd /Zi /Fr +AR = lib +ARFLAGS = +RANLIB = lib + +LIBRARY = libsmlnj-ccalls.lib + +VERSION = v-dummy + + + +OBJS = c-calls-lib.obj \ + c-calls.obj \ + c-calls-fns.obj \ + c-entry.obj \ + cutil.obj + +C_CFUNS = cutil-cfuns.h + +$(LIBRARY) : $(VERSION) $(OBJS) + del /F /Q $(LIBRARY) + $(AR) $(ARFLAGS) /out:$(LIBRARY) $(OBJS) + $(RANLIB) /out:$(LIBRARY) + +$(VERSION) : + ($(MAKE) MAKE="$(MAKE)" clean) + echo "$(VERSION)" > $(VERSION) + +DEPENDENTS = $(CLIB_DIR)\ml-c.h $(CLIB_DIR)\ml-c.h \ + $(INC_DIR)\ml-osdep.h $(INC_DIR)\ml-base.h \ + $(INC_DIR)\ml-values.h $(INC_DIR)\cache-flush.h \ + c-calls.h \ + cfun-proto-list.h cfun-list.h + +c-calls-lib.obj: $(INC_DIR)\ml-osdep.h $(DEPENDENTS) $(C_CFUNS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) $(LOCAL_INCLUDES) /c c-calls-lib.c + +c-entry.obj: c-entry.asm c-calls.h + $(CPP) /D_ASM_ $(DEFS) $(INCLUDES) c-entry.asm > c-entry.s + $(AS) /c c-entry.s + +c-calls.obj: $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) $(LOCAL_INCLUDES) /c c-calls.c + +c-calls-fns.obj: $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) $(LOCAL_INCLUDES) /c c-calls-fns.c + +cutil.obj: $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) $(LOCAL_INCLUDES) /c cutil.c + +clean: + del /F /Q v-* *.obj *.pdb *.s $(LIBRARY) + diff --git a/base/runtime/c-libs/smlnj-date/cfun-list.h b/base/runtime/c-libs/smlnj-date/cfun-list.h new file mode 100644 index 0000000..609074d --- /dev/null +++ b/base/runtime/c-libs/smlnj-date/cfun-list.h @@ -0,0 +1,20 @@ +/* cfun-list.h + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This file lists the directory library of C functions that are callable by ML. + */ + +#ifndef CLIB_NAME +#define CLIB_NAME "SMLNJ-Date" +#define CLIB_VERSION "1.3" +#define CLIB_DATE "June 9, 2019" +#endif + +CFUNC("localOffset", _ml_Date_localOffset, "") +CFUNC("localOffsetForTime", _ml_Date_localOffsetForTime, "") +CFUNC("localTime", _ml_Date_localtime, "") +CFUNC("gmTime", _ml_Date_gmtime, "") +CFUNC("mkTime", _ml_Date_mktime, "") +CFUNC("strfTime", _ml_Date_strftime, "") diff --git a/base/runtime/c-libs/smlnj-date/cfun-proto-list.h b/base/runtime/c-libs/smlnj-date/cfun-proto-list.h new file mode 100644 index 0000000..16b3e96 --- /dev/null +++ b/base/runtime/c-libs/smlnj-date/cfun-proto-list.h @@ -0,0 +1,18 @@ +/* cfun-proto-list.h + * + * COPYRIGHT (c) 1194 AT&T Bell Laboratories. + */ + +#ifndef _CFUN_PROTO_LIST_ +#define _CFUN_PROTO_LIST_ + +#ifndef _C_LIBRARY_ +# include "c-library.h" +#endif + +/* the external definitions for the C functions */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_PROTO(NAME, FUNC, MLTYPE) +#include "cfun-list.h" +#undef CFUNC + +#endif /* !_CFUN_PROTO_LIST_ */ diff --git a/base/runtime/c-libs/smlnj-date/gmtime.c b/base/runtime/c-libs/smlnj-date/gmtime.c new file mode 100644 index 0000000..f1c0688 --- /dev/null +++ b/base/runtime/c-libs/smlnj-date/gmtime.c @@ -0,0 +1,61 @@ +/*! \file gmtime.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-base.h" +#include "ml-objects.h" +#include "cfun-proto-list.h" +#include "ml-c.h" + +#if !defined(OPSYS_WIN32) + +#include "unix-date.h" + +/* _ml_Date_gmtime : Word64.word -> (int * int * int * int * int * int * int * int * int) + * + * Takes a UTC time value (in seconds), and converts it to a 9-tuple with + * the fields: tm_sec, tm_min, tm_hour, tm_mday, tm_mon, tm_year, tm_wday, + * tm_yday, and tm_isdst. + */ +ml_val_t _ml_Date_gmtime (ml_state_t *msp, ml_val_t arg) +{ + time_t t = ns_to_time(WORD64_MLtoC(arg)); + struct tm tmbuf; + + if (gmtime_r (&t, &tmbuf) == NULL) { + return RAISE_SYSERR(msp, 0); + } + else { + return _ml_alloc_tm (msp, &tmbuf); + } + +} /* end of _ml_Date_gmtime */ + +#else /* OPSYS_WIN32 */ + +#include "win32-date.h" + +/* _ml_Date_gmtime : Word64.word -> (int * int * int * int * int * int * int * int * int) + * + * Takes a UTC time value (in nanoseconds), and converts it to a 9-tuple with + * the fields: tm_sec, tm_min, tm_hour, tm_mday, tm_mon, tm_year, tm_wday, + * tm_yday, and tm_isdst. + */ +ml_val_t _ml_Date_gmtime (ml_state_t *msp, ml_val_t arg) +{ + FILETIME utcFT; + SYSTEMTIME utcST; + + ns_to_filetime (WORD64_MLtoC(arg), &utcFT); + + if (! FileTimeToSystemTime (&utcFT, &utcST)) { + return RAISE_SYSERR(msp, 0); + } + + return _ml_alloc_tm (msp, &utcST, 0); /* UTC is never adjusted for DST */ + +} /* end of _ml_Date_gmtime */ + +#endif diff --git a/base/runtime/c-libs/smlnj-date/localoffset.c b/base/runtime/c-libs/smlnj-date/localoffset.c new file mode 100644 index 0000000..62a27e5 --- /dev/null +++ b/base/runtime/c-libs/smlnj-date/localoffset.c @@ -0,0 +1,146 @@ +/*! \file localoffset.c + * + * \author John Reppy + * + * Runtime support for determining the local offset in seconds from UTC. + */ + +/* + * COPYRIGHT (c) 2015 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-base.h" +#include "ml-osdep.h" +#include "ml-base.h" +#include "ml-objects.h" +#include "cfun-proto-list.h" +#include "ml-c.h" + +#if !defined(OPSYS_WIN32) + +#include "unix-date.h" + +#if defined(HAS_GETTIMEOFDAY) +# include +#else +# error no timeofday mechanism +#endif + +/* LocalOffset: + * + * Helper function that takes a time t in seconds and returns the offset from UTC + * of t in the local timezone as an ML Int32.int value. This value reflects + * not only the geographical location of the host system, but + * also daylight savings time (if it is in effect at time t). + */ +PVT ml_val_t LocalOffset (ml_state_t *msp, time_t t) +{ + struct tm tmbuf; + int isDST; + time_t t2; + + /* get the local timezone's daylight saving's time info */ + if (localtime_r (&t, &tmbuf) == NULL) { + return RAISE_SYSERR(msp, 0); + } + isDST = tmbuf.tm_isdst; + + /* convert to UTC and local tm structs */ + if (gmtime_r (&t, &tmbuf) == NULL) { + return RAISE_SYSERR(msp, 0); + } + + /* convert the UTC tm struct back into seconds using the local timezone info (including + * the daylight savings time field from localTM). The local offset will be the difference + * between this value and the original time. + */ + tmbuf.tm_isdst = isDST; + t2 = mktime (&tmbuf); + + return INT32_CtoML(msp, t2 - t); + +} + +/* _ml_Date_localOffset : unit -> Int32.int + * + * Returns the offset from UTC of the current time in the local timezone. + * This value reflects not only the geographical location of the host system, but + * also daylight savings time (if it is in effect). + */ +ml_val_t _ml_Date_localOffset (ml_state_t *msp, ml_val_t arg) +{ + return LocalOffset (msp, time (NIL(time_t *))); + +} /* end of _ml_Date_localoffset */ + +/* _ml_Date_localOffsetForTime : Word64.word -> Int32.int + * + * Returns the offset from UTC of the given time in the local timezone. + * This value reflects not only the geographical location of the host system, but + * also daylight savings time (if it is in effect). + */ +ml_val_t _ml_Date_localOffsetForTime (ml_state_t *msp, ml_val_t arg) +{ + return LocalOffset (msp, ns_to_time(WORD64_MLtoC(arg))); + +} /* end of _ml_Date_localoffset */ + +#else /* OPSYS_WIN32 */ + +#include "win32-date.h" + +/* _ml_Date_localOffset : unit -> Int32.int + * + * Returns the offset from UTC of the current time in the local timezone. + * This value reflects not only the geographical location of the host system, but + * also daylight savings time (if it is in effect). + */ +ml_val_t _ml_Date_localOffset (ml_state_t *msp, ml_val_t arg) +{ + SYSTEMTIME localST; + FILETIME localFT, utcFT; + + GetLocalTime (&localST); + if (! SystemTimeToFileTime (&localST, &localFT)) { + return RAISE_SYSERR(msp, 0); + } + + if (LocalFileTimeToFileTime (&localFT, &utcFT)) { + /* compute offset (local - UTC) in seconds. */ + Int64_t localSec = (Int64_t)(filetime_to_100ns (&localFT) / 10000000); + Int64_t utcSec = (Int64_t)(filetime_to_100ns (&utcFT) / 10000000); + return INT32_CtoML(msp, (Int32_t)(localSec - utcSec)); + } + else { + return RAISE_SYSERR(msp, 0); + } + +} /* end of _ml_Date_localoffset */ + +/* _ml_Date_localOffsetForTime : Word64.word -> Int32.int + * + * Returns the offset from UTC of the given time in the local timezone. + * This value reflects not only the geographical location of the host system, but + * also daylight savings time (if it is in effect). + */ +ml_val_t _ml_Date_localOffsetForTime (ml_state_t *msp, ml_val_t arg) +{ + FILETIME localFT, utcFT; + + Unsigned64_t utcNsec = WORD64_MLtoC(arg); + ns_to_filetime (utcNsec, &utcFT); + + if (FileTimeToLocalFileTime (&utcFT, &localFT)) { + /* compute offset (local - UTC) in seconds. */ + Int64_t localSec = (Int64_t)(filetime_to_100ns (&localFT)); + Int64_t utcSec = (Int64_t)(utcNsec / NS_PER_SEC); + return INT32_CtoML(msp, (Int32_t)(localSec - utcSec)); + } + else { + return RAISE_SYSERR(msp, 0); + } + +} /* end of _ml_Date_localoffset */ + +#endif diff --git a/base/runtime/c-libs/smlnj-date/localtime.c b/base/runtime/c-libs/smlnj-date/localtime.c new file mode 100644 index 0000000..cbf0e42 --- /dev/null +++ b/base/runtime/c-libs/smlnj-date/localtime.c @@ -0,0 +1,83 @@ +/*! \file localtime.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-base.h" +#include "ml-objects.h" +#include "cfun-proto-list.h" +#include "ml-c.h" + +#if !defined(OPSYS_WIN32) + +#include "unix-date.h" + +/* _ml_Date_localtime : Word64.word -> (int * int * int * int * int * int * int * int * int) + * + * Takes a UTC time value (in seconds), and converts it to local time represented + * as a 9-tuple with the fields: tm_sec, tm_min, tm_hour, tm_mday, tm_mon, tm_year, + * tm_wday, tm_yday, and tm_isdst. + */ +ml_val_t _ml_Date_localtime (ml_state_t *msp, ml_val_t arg) +{ + time_t t = ns_to_time(WORD64_MLtoC(arg)); + struct tm tmbuf; + + if (localtime_r (&t, &tmbuf) == NULL) { + RAISE_SYSERR(msp,0); + } + else { + return _ml_alloc_tm (msp, &tmbuf); + } + +} /* end of _ml_Date_localtime */ + +#else /* OPSYS_WIN32 */ + +#include "win32-date.h" + +/* _ml_Date_localtime : Word64.word -> (int * int * int * int * int * int * int * int * int) + * + * Takes a UTC time value (in seconds), and converts it to local time represented + * as a 9-tuple with the fields: tm_sec, tm_min, tm_hour, tm_mday, tm_mon, tm_year, + * tm_wday, tm_yday, and tm_isdst. + */ +ml_val_t _ml_Date_localtime (ml_state_t *msp, ml_val_t arg) +{ + FILETIME utcFT, localFT; + SYSTEMTIME localST, utcST; + TIME_ZONE_INFORMATION tzInfo; + BOOL isDST; + + ns_to_filetime (WORD64_MLtoC(arg), &utcFT); + + /* convert to local system time */ + if (! FileTimeToLocalFileTime(&utcFT, &localFT)) { + return RAISE_SYSERR(msp, 0); + } + + /* convert to system time */ + if (! FileTimeToSystemTime(&localFT, &localST)) { + return RAISE_SYSERR(msp, 0); + } + + /* need to figure out if localST is in DST; we do this by getting the local + * timezone info for the given year and then check the range of dates + */ + if (! GetTimeZoneInformationForYear(localST.wYear, NULL, &tzInfo)) { + return RAISE_SYSERR(msp, 0); + } + if (tzInfo.StandardDate.wMonth == 0) { + /* timezone does not support DST */ + isDST = FALSE; + } + else { + /* TODO: test localST against tzInfo.StandardState and tzInfo.DaylightDate */ + } + + return _ml_alloc_tm (msp, &localST, isDST); + +} /* end of _ml_Date_localtime */ + +#endif diff --git a/base/runtime/c-libs/smlnj-date/makefile b/base/runtime/c-libs/smlnj-date/makefile new file mode 100644 index 0000000..333954b --- /dev/null +++ b/base/runtime/c-libs/smlnj-date/makefile @@ -0,0 +1,45 @@ +# +# the makefile for the Date library +# + +SHELL = /bin/sh + +INC_DIR = ../../include +CLIB_DIR = ../ + +INCLUDES = -I$(INC_DIR) -I$(CLIB_DIR) -I../../objs + +MAKE = make +AR = ar +ARFLAGS = rcv +RANLIB = ranlib + +LIBRARY = libsmlnj-date.a + +VERSION = v-dummy + +OBJS = smlnj-date-lib.o \ + gmtime.o \ + localoffset.o \ + localtime.o \ + mktime.o \ + strftime.o \ + unix-date.o + +$(OBJS) : $(VERSION) +$(LIBRARY) : $(VERSION) $(OBJS) + rm -rf $(LIBRARY) + $(AR) $(ARFLAGS) $(LIBRARY) $(OBJS) + $(RANLIB) $(LIBRARY) + +$(VERSION) : + ($(MAKE) MAKE="$(MAKE)" clean) + echo "$(VERSION)" > $(VERSION) + +.c.o: $(INC_DIR)/ml-unixdep.h $(INC_DIR)/ml-base.h $(INC_DIR)/ml-values.h \ + $(CLIB_DIR)/ml-c.h cfun-proto-list.h cfun-list.h + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) -c $< + +clean : + rm -f v-* *.o $(LIBRARY) + diff --git a/base/runtime/c-libs/smlnj-date/makefile.win32 b/base/runtime/c-libs/smlnj-date/makefile.win32 new file mode 100644 index 0000000..6124ca9 --- /dev/null +++ b/base/runtime/c-libs/smlnj-date/makefile.win32 @@ -0,0 +1,66 @@ +# +# the makefile for the Date library +# win32 specific + +SHELL = + +INC_DIR = ..\..\include +CLIB_DIR = ..\ + +INCLUDES = /I$(INC_DIR) /I$(CLIB_DIR) /I..\..\objs + +MAKEFILE = makefile.win32 +MAKE = nmake /F$(MAKEFILE) +AR = lib +ARFLAGS = +RANLIB = lib + +LIBRARY = libsmlnj-date.lib + +VERSION = v-dummy + +OBJS = smlnj-date-lib.obj \ + gmtime.obj \ + localoffset.obj \ + localtime.obj \ + mktime.obj \ + strftime.obj \ + win32-date.obj + +$(LIBRARY) : $(VERSION) $(OBJS) + del /F /Q $(LIBRARY) + $(AR) $(ARFLAGS) /out:$(LIBRARY) $(OBJS) + $(RANLIB) /out:$(LIBRARY) + +$(VERSION) : + ($(MAKE) MAKE="$(MAKE)" clean) + echo "$(VERSION)" > $(VERSION) + +DEPENDENTS = $(INC_DIR)\ml-unixdep.h $(INC_DIR)\ml-base.h \ + $(INC_DIR)\ml-values.h \ + $(CLIB_DIR)\ml-c.h cfun-proto-list.h cfun-list.h \ + win32-date.h + +smlnj-date-lib.obj: smlnj-date-lib.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c smlnj-date-lib.c + +gmtime.obj: gmtime.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c gmtime.c + +localoffset.obj: localoffset.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c localoffset.c + +localtime.obj: localtime.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c localtime.c + +mktime.obj: mktime.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c mktime.c + +strftime.obj: strftime.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c strftime.c + +win32-date.obj: strftime.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c win32-date.c + +clean : + del /F /Q v-* *.obj *.pdb $(LIBRARY) diff --git a/base/runtime/c-libs/smlnj-date/mktime.c b/base/runtime/c-libs/smlnj-date/mktime.c new file mode 100644 index 0000000..b7aaabc --- /dev/null +++ b/base/runtime/c-libs/smlnj-date/mktime.c @@ -0,0 +1,88 @@ +/*! \file mktime.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-base.h" +#include "ml-c.h" +#include "ml-objects.h" +#include "cfun-proto-list.h" + +#if !defined(OPSYS_WIN32) + +#include +#include "unix-date.h" + +/* _ml_Date_mktime : (int * int * int * int * int * int * int * int * int) -> Word64.word + * + * This takes a 9-tuple with the fields: tm_sec, tm_min, tm_hour, tm_mday, + * tm_mon, tm_year, tm_wday, tm_yday, tm_isdst, and returns the corresponding + * localtime value (in nanoseconds). + */ +ml_val_t _ml_Date_mktime (ml_state_t *msp, ml_val_t arg) +{ + struct tm tm; + time_t t; + + memset (&tm, 0, sizeof(tm)); + tm.tm_sec = REC_SELINT(arg, 0); + tm.tm_min = REC_SELINT(arg, 1); + tm.tm_hour = REC_SELINT(arg, 2); + tm.tm_mday = REC_SELINT(arg, 3); + tm.tm_mon = REC_SELINT(arg, 4); + tm.tm_year = REC_SELINT(arg, 5) - 1900; + /* tm.tm_wday = REC_SELINT(arg, 6); */ /* ignored by mktime */ + /* tm.tm_yday = REC_SELINT(arg, 7); */ /* ignored by mktime */ + tm.tm_isdst = REC_SELINT(arg, 8); + + t = mktime (&tm); + + if (t < 0) { + return RAISE_ERROR(msp, "Invalid date"); + } + else { + return ML_AllocWord64(msp, time_to_ns(t)); + } + +} /* end of _ml_Date_mktime */ + +#else /* OPSYS_WIN32 */ + +#include "win32-date.h" + +/* _ml_Date_mktime : (int * int * int * int * int * int * int * int * int) -> Word64.word + * + * This takes a 9-tuple with the fields: tm_sec, tm_min, tm_hour, tm_mday, + * tm_mon, tm_year, tm_wday, tm_yday, tm_isdst, and returns the corresponding + * localtime value (in nanoseconds). + */ +ml_val_t _ml_Date_mktime (ml_state_t *msp, ml_val_t arg) +{ + SYSTEMTIME localST; + FILETIME localFT, utcFT; + + localST.wSecond = REC_SELINT(arg, 0); + localST.wMinute = REC_SELINT(arg, 1); + localST.wHour = REC_SELINT(arg, 2); + localST.wDay = REC_SELINT(arg, 3); + localST.wMonth = REC_SELINT(arg, 4); + localST.wYear = REC_SELINT(arg, 5); + localST.wDayOfWeek = REC_SELINT(arg, 6); + localST.wMilliseconds = 0; + + /* convert to UTC FILETIME */ + if (! TzSpecificLocalTimeToSystemTime(NULL, &localST, &utcFT)) { + return RAISE_ERROR(msp, "Invalid date"); + } + + /* convert UTC FILETIME to local FILETIME */ + if (! FileTimeToLocalFileTime(&utcFT, &localFT)) { + return RAISE_ERROR(msp, "Invalid date"); + } + + return ML_AllocWord64(msp, filetime_to_ns(&localFT)); + +} /* end of _ml_Date_mktime */ + +#endif diff --git a/base/runtime/c-libs/smlnj-date/smlnj-date-lib.c b/base/runtime/c-libs/smlnj-date/smlnj-date-lib.c new file mode 100644 index 0000000..f7eb475 --- /dev/null +++ b/base/runtime/c-libs/smlnj-date/smlnj-date-lib.c @@ -0,0 +1,28 @@ +/* smlnj-date-lib.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "c-library.h" +#include "cfun-proto-list.h" + + +/* the table of C functions and ML names */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_BIND(NAME, FUNC, MLTYPE) +PVT cfunc_binding_t CFunTable[] = { +#include "cfun-list.h" + CFUNC_NULL_BIND + }; +#undef CFUNC + + +/* the Date library */ +c_library_t SMLNJ_Date_Library = { + CLIB_NAME, + CLIB_VERSION, + CLIB_DATE, + NIL(clib_init_fn_t), + CFunTable + }; + diff --git a/base/runtime/c-libs/smlnj-date/strftime.c b/base/runtime/c-libs/smlnj-date/strftime.c new file mode 100644 index 0000000..f9116b2 --- /dev/null +++ b/base/runtime/c-libs/smlnj-date/strftime.c @@ -0,0 +1,54 @@ +/*! \file strftime.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include +#include +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* FIXME: for Windows, we should use GetTimeFormatEx */ + +/* _ml_Date_strftime : + * (string * (int * int * int * int * int * int * int * int * int)) -> string + * + * This takes a format field and nine integer fields (sec, min, hour, mday, mon, + * year, wday, yday, and isdst), and converts it into a string representation + * according to the format string. + */ +ml_val_t _ml_Date_strftime (ml_state_t *msp, ml_val_t arg) +{ + ml_val_t fmt = REC_SEL(arg, 0); + ml_val_t res, date; + struct tm tm; + char buf[512]; + size_t sz; + + date = REC_SEL(arg, 1); + memset (&tm, 0, sizeof(tm)); + tm.tm_sec = REC_SELINT(date, 0); + tm.tm_min = REC_SELINT(date, 1); + tm.tm_hour = REC_SELINT(date, 2); + tm.tm_mday = REC_SELINT(date, 3); + tm.tm_mon = REC_SELINT(date, 4); + tm.tm_year = REC_SELINT(date, 5) - 1900; + tm.tm_wday = REC_SELINT(date, 6); + tm.tm_yday = REC_SELINT(date, 7); + tm.tm_isdst = REC_SELINT(date, 8); + + sz = strftime (buf, sizeof(buf), STR_MLtoC(fmt), &tm); + if (sz > 0) { + res = ML_AllocString(msp, sz); + strncpy (STR_MLtoC(res), buf, sz); + return res; + } + else { + return RAISE_ERROR(msp, "strftime failed"); + } + +} /* end of _ml_Date_strftime */ diff --git a/base/runtime/c-libs/smlnj-date/unix-date.c b/base/runtime/c-libs/smlnj-date/unix-date.c new file mode 100644 index 0000000..e6cafad --- /dev/null +++ b/base/runtime/c-libs/smlnj-date/unix-date.c @@ -0,0 +1,28 @@ +/*! \file unix-date.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-base.h" +#include "ml-objects.h" +#include "unix-date.h" + +/* allocate a 9-tuple for a `struct tm` value */ +ml_val_t _ml_alloc_tm (ml_state_t *msp, const struct tm *tm) +{ + + ML_AllocWrite(msp, 0, MAKE_DESC(DTAG_record, 9)); + ML_AllocWrite(msp, 1, INT_CtoML(tm->tm_sec)); + ML_AllocWrite(msp, 2, INT_CtoML(tm->tm_min)); + ML_AllocWrite(msp, 3, INT_CtoML(tm->tm_hour)); + ML_AllocWrite(msp, 4, INT_CtoML(tm->tm_mday)); + ML_AllocWrite(msp, 5, INT_CtoML(tm->tm_mon)); + ML_AllocWrite(msp, 6, INT_CtoML(tm->tm_year + 1900)); + ML_AllocWrite(msp, 7, INT_CtoML(tm->tm_wday)); + ML_AllocWrite(msp, 8, INT_CtoML(tm->tm_yday)); + ML_AllocWrite(msp, 9, INT_CtoML(tm->tm_isdst)); + + return ML_Alloc(msp, 9); + +} diff --git a/base/runtime/c-libs/smlnj-date/unix-date.h b/base/runtime/c-libs/smlnj-date/unix-date.h new file mode 100644 index 0000000..1821fb3 --- /dev/null +++ b/base/runtime/c-libs/smlnj-date/unix-date.h @@ -0,0 +1,29 @@ +/*! \file unix-date.h + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Common definitions for UNIX versions of the Date functions. + */ + +#ifndef _UNIX_DATE_H_ +#define _UNIX_DATE_H_ + +#include + +/* convert time_t value (in seconds) to 64-bit unsigned nanoseconds */ +STATIC_INLINE Unsigned64_t time_to_ns (time_t t) +{ + return NS_PER_SEC * (Unsigned64_t)t; +} + +/* convert 64-bit unsigned nanoseconds to a time_t value (in seconds) */ +STATIC_INLINE time_t ns_to_time (Unsigned64_t ns) +{ + return (time_t)(ns / NS_PER_SEC); +} + +/* allocate a 9-tuple for a `struct tm` value */ +ml_val_t _ml_alloc_tm (ml_state_t *msp, const struct tm *tm); + +#endif /* _UNIX_DATE_H_ */ diff --git a/base/runtime/c-libs/smlnj-date/win32-date.c b/base/runtime/c-libs/smlnj-date/win32-date.c new file mode 100644 index 0000000..b6edb30 --- /dev/null +++ b/base/runtime/c-libs/smlnj-date/win32-date.c @@ -0,0 +1,84 @@ +/*! \file win32-date.c + * + * \author John Reppy + */ + +/* + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Utility code for date functions on Windows + */ + +#include "ml-base.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "win32-date.h" + +STATIC_INLINE BOOL divisable (WORD a, WORD b) { return (a % b) == 0; } + +STATIC_INLINE BOOL isLeapYear (WORD y) +{ + return ((divisable(y, 4) && (! divisable(y, 100))) || divisable(y, 400)); +} + +/* compute the day of the year for a given SYSTEMTIME struct */ +int _ml_year_day (const SYSTEMTIME *st) +{ + int yday = st->wDay; + + /* January */ + if (st->wMonth == 1) return yday; + yday += 31; + /* February */ + if (st->wMonth == 2) return yday; + yday += (isLeapYear(st->wYear) ? 29 : 28); + /* March */ + if (st->wMonth == 3) return yday; + yday += 31; + /* April */ + if (st->wMonth == 4) return yday; + yday += 30; + /* May */ + if (st->wMonth == 5) return yday; + yday += 31; + /* June */ + if (st->wMonth == 6) return yday; + yday += 30; + /* July */ + if (st->wMonth == 7) return yday; + yday += 31; + /* August */ + if (st->wMonth == 8) return yday; + yday += 31; + /* September */ + if (st->wMonth == 9) return yday; + yday += 30; + /* October */ + if (st->wMonth == 10) return yday; + yday += 31; + /* November */ + if (st->wMonth == 11) return yday; + yday += 30; + + return yday; + +} + +ml_val_t _ml_alloc_tm (ml_state_t *msp, const SYSTEMTIME *st, BOOL isDST) +{ + /* The SYSTEMTIME struct has everything that we need except the day of the year */ + ML_AllocWrite(msp, 0, MAKE_DESC(DTAG_record, 9)); + ML_AllocWrite(msp, 1, INT_CtoML(st->wSecond)); + ML_AllocWrite(msp, 2, INT_CtoML(st->wMinute)); + ML_AllocWrite(msp, 3, INT_CtoML(st->wHour)); + ML_AllocWrite(msp, 4, INT_CtoML(st->wDay)); + ML_AllocWrite(msp, 5, INT_CtoML(st->wMonth - 1)); /* convert to 0..11 */ + ML_AllocWrite(msp, 6, INT_CtoML(st->wYear)); + ML_AllocWrite(msp, 7, INT_CtoML(st->wDayOfWeek)); + ML_AllocWrite(msp, 8, INT_CtoML(_ml_year_day(st))); + ML_AllocWrite(msp, 9, INT_CtoML(isDST)); + + return ML_Alloc(msp, 9); + +} diff --git a/base/runtime/c-libs/smlnj-date/win32-date.h b/base/runtime/c-libs/smlnj-date/win32-date.h new file mode 100644 index 0000000..49b7d51 --- /dev/null +++ b/base/runtime/c-libs/smlnj-date/win32-date.h @@ -0,0 +1,40 @@ +/*! \file win32-date.h + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Common definitions for Windows versions of the Date functions. + */ + +#ifndef _WIN32_DATE_H_ +#define _WIN32_DATE_H_ + +#include "ml-base.h" +#include + +/* convert a FILETIME to a 64-bit unsigned integer */ +STATIC_INLINE Unsigned64_t filetime_to_100ns (FILETIME *ft) +{ + return ((Unsigned64_t)ft->dwHighDateTime << 32) | (Unsigned64_t)ft->dwLowDateTime; +} + +/* convert an unsigned 64-bit nanoseconds value to a FILETIME value. */ +STATIC_INLINE void ns_to_filetime (Unsigned64_t ns, FILETIME *ft) +{ + ns /= 100; /* convert to 100ns units */ + + ft->dwLowDateTime = (DWORD)ns; + ft->dwHighDateTime = (DWORD)(ns >> 32); +} + +/* convert a FILETIME in 100ns units to unsigned nanoseconds */ +STATIC_INLINE Unsigned64_t filetime_to_ns (const FILETIME *ft) +{ + return 100 * filetime_to_100ns(ft); /* convert to nanoseconds */ +} + +/* compute the day of the year from a SYSTEMTIME struct */ +int _ml_year_day (const SYSTEMTIME *st); +ml_val_t _ml_alloc_tm (ml_state_t *msp, const SYSTEMTIME *st, BOOL isDST); + +#endif /* !_WIN32_DATE_H_ */ diff --git a/base/runtime/c-libs/smlnj-math/README b/base/runtime/c-libs/smlnj-math/README new file mode 100644 index 0000000..722a0b8 --- /dev/null +++ b/base/runtime/c-libs/smlnj-math/README @@ -0,0 +1,16 @@ +These are template files for the glue needed to put an ML callable +library together. See the "HOWTO-ADD-C-CODE" file in the "notes" +directory for complete details. The files are used as follows: + + makefile a makefile template; define the make variables LIBRARY + and OBJS. + + cfun-proto-list.h copy this file unchanged + + cfun-list.h copy this file, and replace the fields in <<>>. Add + a CFUNC entry for each C function that is callable from + ML. + + library-template.c copy and rename this file; change the <> + to the name of your library table. + diff --git a/base/runtime/c-libs/smlnj-math/atan64.c b/base/runtime/c-libs/smlnj-math/atan64.c new file mode 100644 index 0000000..ac83d93 --- /dev/null +++ b/base/runtime/c-libs/smlnj-math/atan64.c @@ -0,0 +1,23 @@ +/* atan64.c + * + * COPYRIGHT (c) 1996 AT&T Research. + */ + +#include +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "cfun-proto-list.h" + +/* _ml_Math_atan64: + */ +ml_val_t _ml_Math_atan64 (ml_state_t *msp, ml_val_t arg) +{ + double d = *(PTR_MLtoC(double, arg)); + ml_val_t res; + + REAL64_ALLOC(msp, res, atan(d)); + + return res; + +} /* end of _ml_Math_atan64 */ diff --git a/base/runtime/c-libs/smlnj-math/cfun-list.h b/base/runtime/c-libs/smlnj-math/cfun-list.h new file mode 100644 index 0000000..1aa1aac --- /dev/null +++ b/base/runtime/c-libs/smlnj-math/cfun-list.h @@ -0,0 +1,20 @@ +/* cfun-list.h + * + * COPYRIGHT (c) 1996 AT&T Bell Laboratories. + * + * This file lists the directory library of C functions that are callable by ML. + */ + +#ifndef CLIB_NAME +#define CLIB_NAME "SMLNJ-Math" +#define CLIB_VERSION "1.1" +#define CLIB_DATE "November 1, 1996" +#endif + +CFUNC("ctlRoundingMode", _ml_Math_ctlrndmode, "int option -> int") +CFUNC("cos64", _ml_Math_cos64, "real -> real") +CFUNC("sin64", _ml_Math_sin64, "real -> real") +CFUNC("exp64", _ml_Math_exp64, "real -> (real * int)") +CFUNC("log64", _ml_Math_log64, "real -> real") +CFUNC("sqrt64", _ml_Math_sqrt64, "real -> real") +CFUNC("atan64", _ml_Math_atan64, "real -> real") diff --git a/base/runtime/c-libs/smlnj-math/cfun-proto-list.h b/base/runtime/c-libs/smlnj-math/cfun-proto-list.h new file mode 100644 index 0000000..16b3e96 --- /dev/null +++ b/base/runtime/c-libs/smlnj-math/cfun-proto-list.h @@ -0,0 +1,18 @@ +/* cfun-proto-list.h + * + * COPYRIGHT (c) 1194 AT&T Bell Laboratories. + */ + +#ifndef _CFUN_PROTO_LIST_ +#define _CFUN_PROTO_LIST_ + +#ifndef _C_LIBRARY_ +# include "c-library.h" +#endif + +/* the external definitions for the C functions */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_PROTO(NAME, FUNC, MLTYPE) +#include "cfun-list.h" +#undef CFUNC + +#endif /* !_CFUN_PROTO_LIST_ */ diff --git a/base/runtime/c-libs/smlnj-math/cos64.c b/base/runtime/c-libs/smlnj-math/cos64.c new file mode 100644 index 0000000..70c4806 --- /dev/null +++ b/base/runtime/c-libs/smlnj-math/cos64.c @@ -0,0 +1,23 @@ +/* cos64.c + * + * COPYRIGHT (c) 1996 AT&T Research. + */ + +#include +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "cfun-proto-list.h" + +/* _ml_Math_cos64: + */ +ml_val_t _ml_Math_cos64 (ml_state_t *msp, ml_val_t arg) +{ + double d = *(PTR_MLtoC(double, arg)); + ml_val_t res; + + REAL64_ALLOC(msp, res, cos(d)); + + return res; + +} /* end of _ml_Math_cos64 */ diff --git a/base/runtime/c-libs/smlnj-math/ctlrndmode.c b/base/runtime/c-libs/smlnj-math/ctlrndmode.c new file mode 100644 index 0000000..0895d4c --- /dev/null +++ b/base/runtime/c-libs/smlnj-math/ctlrndmode.c @@ -0,0 +1,61 @@ +/*! \file ctlrndmode.c + * + * \author John Reppy + */ + +/* + * COPYRIGHT (c) 2022 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-base.h" +#include "fp-dep.h" +#include "ml-objects.h" +#include "cfun-proto-list.h" +#include "ml-c.h" + +#ifndef NO_ROUNDING_MODE_CTL +/* Mapping between the ML and C representations of rounding modes. */ +#if defined(RMODE_C_EQ_ML) +# define RMODE_CtoML(m) INT_CtoML(m) +# define RMODE_MLtoC(m) INT_MLtoC(m) +#else +# define RMODE_CtoML(m) \ + (RMODE_EQ(m, FE_TONEAREST) ? INT_CtoML(0) \ + : (RMODE_EQ(m, FE_TOWARDZERO) ? INT_CtoML(1) \ + : (RMODE_EQ(m, FE_UPWARD) ? INT_CtoML(2) : INT_CtoML(3)))) +PVT fe_rnd_mode_t ModeMap[4] = { + FE_TONEAREST, FE_TOWARDZERO, FE_UPWARD, FE_DOWNWARD + }; +# define RMODE_MLtoC(m) ModeMap[INT_MLtoC(m)] +#endif +#endif /* !NO_ROUNDING_MODE_CTL */ + +/* _ml_Math_ctlrndmode : int option -> int + * + * Get/set the rounding mode; the values are interpreted as follows: + * + * 0 To nearest + * 1 To zero + * 2 To +Inf + * 3 To -Inf + */ +ml_val_t _ml_Math_ctlrndmode (ml_state_t *msp, ml_val_t arg) +{ +#ifdef NO_ROUNDING_MODE_CTL + return RAISE_ERROR(msp, "Rounding mode control not supported"); + +#else + if (arg == OPTION_NONE) { + fe_rnd_mode_t res = fegetround(); + return RMODE_CtoML(res); + } + else { + fe_rnd_mode_t m = RMODE_MLtoC(OPTION_get(arg)); + fe_rnd_mode_t res = fesetround(m); + return RMODE_CtoML(res); + } +#endif + +} /* end of _ml_Math_ctlrndmode */ + diff --git a/base/runtime/c-libs/smlnj-math/dtoa.c b/base/runtime/c-libs/smlnj-math/dtoa.c new file mode 100644 index 0000000..ac96dff --- /dev/null +++ b/base/runtime/c-libs/smlnj-math/dtoa.c @@ -0,0 +1,2706 @@ +/**************************************************************** + * + * The author of this software is David M. Gay. + * + * Copyright (c) 1991 by Lucent Technologies. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHOR NOR LUCENT MAKES ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + * + ***************************************************************/ + +/* Please send bug reports to + David M. Gay + Bell Laboratories, Room 2C-463 + 600 Mountain Avenue + Murray Hill, NJ 07974-0636 + U.S.A. + dmg@bell-labs.com + */ + +/* On a machine with IEEE extended-precision registers, it is + * necessary to specify double-precision (53-bit) rounding precision + * before invoking strtod or dtoa. If the machine uses (the equivalent + * of) Intel 80x87 arithmetic, the call + * _control87(PC_53, MCW_PC); + * does this with many compilers. Whether this or another call is + * appropriate depends on the compiler; for this to work, it may be + * necessary to #include "float.h" or another system-dependent header + * file. + */ + +/* strtod for IEEE-, VAX-, and IBM-arithmetic machines. + * + * This strtod returns a nearest machine number to the input decimal + * string (or sets errno to ERANGE). With IEEE arithmetic, ties are + * broken by the IEEE round-even rule. Otherwise ties are broken by + * biased rounding (add half and chop). + * + * Inspired loosely by William D. Clinger's paper "How to Read Floating + * Point Numbers Accurately" [Proc. ACM SIGPLAN '90, pp. 92-101]. + * + * Modifications: + * + * 1. We only require IEEE, IBM, or VAX double-precision + * arithmetic (not IEEE double-extended). + * 2. We get by with floating-point arithmetic in a case that + * Clinger missed -- when we're computing d * 10^n + * for a small integer d and the integer n is not too + * much larger than 22 (the maximum integer k for which + * we can represent 10^k exactly), we may be able to + * compute (d*10^k) * 10^(e-k) with just one roundoff. + * 3. Rather than a bit-at-a-time adjustment of the binary + * result in the hard case, we use floating-point + * arithmetic to determine the adjustment to within + * one bit; only in really hard cases do we need to + * compute a second residual. + * 4. Because of 3., we don't need a large table of powers of 10 + * for ten-to-e (just some small tables, e.g. of 10^k + * for 0 <= k <= 22). + */ + +/* + * #define IEEE_8087 for IEEE-arithmetic machines where the least + * significant byte has the lowest address. + * #define IEEE_MC68k for IEEE-arithmetic machines where the most + * significant byte has the lowest address. + * #define Long int on machines with 32-bit ints and 64-bit longs. + * #define Sudden_Underflow for IEEE-format machines without gradual + * underflow (i.e., that flush to zero on underflow). + * #define IBM for IBM mainframe-style floating-point arithmetic. + * #define VAX for VAX-style floating-point arithmetic (D_floating). + * #define Unsigned_Shifts if >> does treats its left operand as unsigned. + * #define No_leftright to omit left-right logic in fast floating-point + * computation of dtoa. + * #define Check_FLT_ROUNDS if FLT_ROUNDS can assume the values 2 or 3. + * #define RND_PRODQUOT to use rnd_prod and rnd_quot (assembly routines + * that use extended-precision instructions to compute rounded + * products and quotients) with IBM. + * #define ROUND_BIASED for IEEE-format with biased rounding. + * #define Inaccurate_Divide for IEEE-format with correctly rounded + * products but inaccurate quotients, e.g., for Intel i860. + * #define Just_16 to store 16 bits per 32-bit Long when doing high-precision + * integer arithmetic. Whether this speeds things up or slows things + * down depends on the machine and the number being converted. + * #define KR_headers for old-style C function headers. + * #define Bad_float_h if your system lacks a float.h or if it does not + * define some or all of DBL_DIG, DBL_MAX_10_EXP, DBL_MAX_EXP, + * FLT_RADIX, FLT_ROUNDS, and DBL_MAX. + * #define MALLOC your_malloc, where your_malloc(n) acts like malloc(n) + * if memory is available and otherwise does something you deem + * appropriate. If MALLOC is undefined, malloc will be invoked + * directly -- and assumed always to succeed. + * #define INFNAN_CHECK on IEEE systems to cause strtod to check for + * Infinity and NaN (case insensitively). On some systems (e.g., + * some HP systems), it may be necessary to #define NAN_WORD0 + * appropriately -- to the most significant word of a quiet NaN. + * (On HP Series 700/800 machines, -DNAN_WORD0=0x7ff40000 works.) + * #define MULTIPLE_THREADS if the system offers preemptively scheduled + * multiple threads. In this case, you must provide (or suitably + * #define) two locks, acquired by ACQUIRE_DTOA_LOCK(n) and freed + * by FREE_DTOA_LOCK(n) for n = 0 or 1. (The second lock, accessed + * in pow5mult, ensures lazy evaluation of only one copy of high + * powers of 5; omitting this lock would introduce a small + * probability of wasting memory, but would otherwise be harmless.) + * You must also invoke freedtoa(s) to free the value s returned by + * dtoa. You may do so whether or not MULTIPLE_THREADS is #defined. + * #define NO_IEEE_Scale to disable new (Feb. 1997) logic in strtod that + * avoids underflows on inputs whose result does not underflow. + */ + +#ifndef Long +#define Long long +#endif +#ifndef ULong +typedef unsigned Long ULong; +#endif + +#ifdef DEBUG +#include "stdio.h" +#define Bug(x) {fprintf(stderr, "%s\n", x); exit(1);} +#endif + +#ifdef __cplusplus +#include "malloc.h" +#include "memory.h" +#else +#ifndef KR_headers +#include "stdlib.h" +#include "string.h" +#else +#include "malloc.h" +#include "memory.h" +#endif +#endif + +#ifdef MALLOC +#ifdef KR_headers +extern char *MALLOC(); +#else +extern void *MALLOC(size_t); +#endif +#else +#define MALLOC malloc +#endif + +#undef IEEE_Arith +#undef Avoid_Underflow +#ifdef IEEE_MC68k +#define IEEE_Arith +#endif +#ifdef IEEE_8087 +#define IEEE_Arith +#endif + +#include "errno.h" +#ifdef Bad_float_h +#undef __STDC__ + +#ifdef IEEE_Arith +#define DBL_DIG 15 +#define DBL_MAX_10_EXP 308 +#define DBL_MAX_EXP 1024 +#define FLT_RADIX 2 +#define FLT_ROUNDS 1 +#define DBL_MAX 1.7976931348623157e+308 +#endif + +#ifdef IBM +#define DBL_DIG 16 +#define DBL_MAX_10_EXP 75 +#define DBL_MAX_EXP 63 +#define FLT_RADIX 16 +#define FLT_ROUNDS 0 +#define DBL_MAX 7.2370055773322621e+75 +#endif + +#ifdef VAX +#define DBL_DIG 16 +#define DBL_MAX_10_EXP 38 +#define DBL_MAX_EXP 127 +#define FLT_RADIX 2 +#define FLT_ROUNDS 1 +#define DBL_MAX 1.7014118346046923e+38 +#endif + +#ifndef LONG_MAX +#define LONG_MAX 2147483647 +#endif + +#else /* ifndef Bad_float_h */ +#include "float.h" +#endif /* Bad_float_h */ + +#ifndef __MATH_H__ +#include "math.h" +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef CONST +#ifdef KR_headers +#define CONST /* blank */ +#else +#define CONST const +#endif +#endif + +#ifdef Unsigned_Shifts +#define Sign_Extend(a,b) if (b < 0) a |= 0xffff0000; +#else +#define Sign_Extend(a,b) /*no-op*/ +#endif + +#if defined(IEEE_8087) + defined(IEEE_MC68k) + defined(VAX) + defined(IBM) != 1 +Exactly one of IEEE_8087, IEEE_MC68k, VAX, or IBM should be defined. +#endif + +#ifdef IEEE_8087 +#define word0(x) ((ULong *)&x)[1] +#define word1(x) ((ULong *)&x)[0] +#else +#define word0(x) ((ULong *)&x)[0] +#define word1(x) ((ULong *)&x)[1] +#endif + +/* The following definition of Storeinc is appropriate for MIPS processors. + * An alternative that might be better on some machines is + * #define Storeinc(a,b,c) (*a++ = b << 16 | c & 0xffff) + */ +#if defined(IEEE_8087) + defined(VAX) +#define Storeinc(a,b,c) (((unsigned short *)a)[1] = (unsigned short)b, \ +((unsigned short *)a)[0] = (unsigned short)c, a++) +#else +#define Storeinc(a,b,c) (((unsigned short *)a)[0] = (unsigned short)b, \ +((unsigned short *)a)[1] = (unsigned short)c, a++) +#endif + +/* #define P DBL_MANT_DIG */ +/* Ten_pmax = floor(P*log(2)/log(5)) */ +/* Bletch = (highest power of 2 < DBL_MAX_10_EXP) / 16 */ +/* Quick_max = floor((P-1)*log(FLT_RADIX)/log(10) - 1) */ +/* Int_max = floor(P*log(FLT_RADIX)/log(10) - 1) */ + +#ifdef IEEE_Arith +#define Exp_shift 20 +#define Exp_shift1 20 +#define Exp_msk1 0x100000 +#define Exp_msk11 0x100000 +#define Exp_mask 0x7ff00000 +#define P 53 +#define Bias 1023 +#define Emin (-1022) +#define Exp_1 0x3ff00000 +#define Exp_11 0x3ff00000 +#define Ebits 11 +#define Frac_mask 0xfffff +#define Frac_mask1 0xfffff +#define Ten_pmax 22 +#define Bletch 0x10 +#define Bndry_mask 0xfffff +#define Bndry_mask1 0xfffff +#define LSB 1 +#define Sign_bit 0x80000000 +#define Log2P 1 +#define Tiny0 0 +#define Tiny1 1 +#define Quick_max 14 +#define Int_max 14 +#define Infinite(x) (word0(x) == 0x7ff00000) /* sufficient test for here */ +#ifndef NO_IEEE_Scale +#define Avoid_Underflow +#endif + +#else /* ifndef IEEE_Arith */ +#undef Sudden_Underflow +#define Sudden_Underflow +#ifdef IBM +#define Exp_shift 24 +#define Exp_shift1 24 +#define Exp_msk1 0x1000000 +#define Exp_msk11 0x1000000 +#define Exp_mask 0x7f000000 +#define P 14 +#define Bias 65 +#define Exp_1 0x41000000 +#define Exp_11 0x41000000 +#define Ebits 8 /* exponent has 7 bits, but 8 is the right value in b2d */ +#define Frac_mask 0xffffff +#define Frac_mask1 0xffffff +#define Bletch 4 +#define Ten_pmax 22 +#define Bndry_mask 0xefffff +#define Bndry_mask1 0xffffff +#define LSB 1 +#define Sign_bit 0x80000000 +#define Log2P 4 +#define Tiny0 0x100000 +#define Tiny1 0 +#define Quick_max 14 +#define Int_max 15 +#else /* VAX */ +#define Exp_shift 23 +#define Exp_shift1 7 +#define Exp_msk1 0x80 +#define Exp_msk11 0x800000 +#define Exp_mask 0x7f80 +#define P 56 +#define Bias 129 +#define Exp_1 0x40800000 +#define Exp_11 0x4080 +#define Ebits 8 +#define Frac_mask 0x7fffff +#define Frac_mask1 0xffff007f +#define Ten_pmax 24 +#define Bletch 2 +#define Bndry_mask 0xffff007f +#define Bndry_mask1 0xffff007f +#define LSB 0x10000 +#define Sign_bit 0x8000 +#define Log2P 1 +#define Tiny0 0x80 +#define Tiny1 0 +#define Quick_max 15 +#define Int_max 15 +#endif /* IBM, VAX */ +#endif /* IEEE_Arith */ + +#ifndef IEEE_Arith +#define ROUND_BIASED +#endif + +#ifdef RND_PRODQUOT +#define rounded_product(a,b) a = rnd_prod(a, b) +#define rounded_quotient(a,b) a = rnd_quot(a, b) +#ifdef KR_headers +extern double rnd_prod(), rnd_quot(); +#else +extern double rnd_prod(double, double), rnd_quot(double, double); +#endif +#else +#define rounded_product(a,b) a *= b +#define rounded_quotient(a,b) a /= b +#endif + +#define Big0 (Frac_mask1 | Exp_msk1*(DBL_MAX_EXP+Bias-1)) +#define Big1 0xffffffff + +#ifndef Just_16 +/* When Pack_32 is not defined, we store 16 bits per 32-bit Long. + * This makes some inner loops simpler and sometimes saves work + * during multiplications, but it often seems to make things slightly + * slower. Hence the default is now to store 32 bits per Long. + */ +#ifndef Pack_32 +#define Pack_32 +#endif +#endif + +#ifndef MULTIPLE_THREADS +#define ACQUIRE_DTOA_LOCK(n) /*nothing*/ +#define FREE_DTOA_LOCK(n) /*nothing*/ +#endif + +#define Kmax 15 + +#ifdef __cplusplus +extern "C" double strtod(const char *s00, char **se); +extern "C" char *dtoa(double d, int mode, int ndigits, + int *decpt, int *sign, char **rve); +#endif + + struct +Bigint { + struct Bigint *next; + int k, maxwds, sign, wds; + ULong x[1]; + }; + + typedef struct Bigint Bigint; + + static Bigint *freelist[Kmax+1]; + + static Bigint * +Balloc +#ifdef KR_headers + (k) int k; +#else + (int k) +#endif +{ + int x; + Bigint *rv; + + ACQUIRE_DTOA_LOCK(0); + if (rv = freelist[k]) { + freelist[k] = rv->next; + } + else { + x = 1 << k; + rv = (Bigint *)MALLOC(sizeof(Bigint) + (x-1)*sizeof(ULong)); + rv->k = k; + rv->maxwds = x; + } + FREE_DTOA_LOCK(0); + rv->sign = rv->wds = 0; + return rv; + } + + static void +Bfree +#ifdef KR_headers + (v) Bigint *v; +#else + (Bigint *v) +#endif +{ + if (v) { + ACQUIRE_DTOA_LOCK(0); + v->next = freelist[v->k]; + freelist[v->k] = v; + FREE_DTOA_LOCK(0); + } + } + +#define Bcopy(x,y) memcpy((char *)&x->sign, (char *)&y->sign, \ +y->wds*sizeof(Long) + 2*sizeof(int)) + + static Bigint * +multadd +#ifdef KR_headers + (b, m, a) Bigint *b; int m, a; +#else + (Bigint *b, int m, int a) /* multiply by m and add a */ +#endif +{ + int i, wds; + ULong *x, y; +#ifdef Pack_32 + ULong xi, z; +#endif + Bigint *b1; + + wds = b->wds; + x = b->x; + i = 0; + do { +#ifdef Pack_32 + xi = *x; + y = (xi & 0xffff) * m + a; + z = (xi >> 16) * m + (y >> 16); + a = (int)(z >> 16); + *x++ = (z << 16) + (y & 0xffff); +#else + y = *x * m + a; + a = (int)(y >> 16); + *x++ = y & 0xffff; +#endif + } + while(++i < wds); + if (a) { + if (wds >= b->maxwds) { + b1 = Balloc(b->k+1); + Bcopy(b1, b); + Bfree(b); + b = b1; + } + b->x[wds++] = a; + b->wds = wds; + } + return b; + } + + static Bigint * +s2b +#ifdef KR_headers + (s, nd0, nd, y9) CONST char *s; int nd0, nd; ULong y9; +#else + (CONST char *s, int nd0, int nd, ULong y9) +#endif +{ + Bigint *b; + int i, k; + Long x, y; + + x = (nd + 8) / 9; + for(k = 0, y = 1; x > y; y <<= 1, k++) ; +#ifdef Pack_32 + b = Balloc(k); + b->x[0] = y9; + b->wds = 1; +#else + b = Balloc(k+1); + b->x[0] = y9 & 0xffff; + b->wds = (b->x[1] = y9 >> 16) ? 2 : 1; +#endif + + i = 9; + if (9 < nd0) { + s += 9; + do b = multadd(b, 10, *s++ - '0'); + while(++i < nd0); + s++; + } + else + s += 10; + for(; i < nd; i++) + b = multadd(b, 10, *s++ - '0'); + return b; + } + + static int +hi0bits +#ifdef KR_headers + (x) register ULong x; +#else + (register ULong x) +#endif +{ + register int k = 0; + + if (!(x & 0xffff0000)) { + k = 16; + x <<= 16; + } + if (!(x & 0xff000000)) { + k += 8; + x <<= 8; + } + if (!(x & 0xf0000000)) { + k += 4; + x <<= 4; + } + if (!(x & 0xc0000000)) { + k += 2; + x <<= 2; + } + if (!(x & 0x80000000)) { + k++; + if (!(x & 0x40000000)) + return 32; + } + return k; + } + + static int +lo0bits +#ifdef KR_headers + (y) ULong *y; +#else + (ULong *y) +#endif +{ + register int k; + register ULong x = *y; + + if (x & 7) { + if (x & 1) + return 0; + if (x & 2) { + *y = x >> 1; + return 1; + } + *y = x >> 2; + return 2; + } + k = 0; + if (!(x & 0xffff)) { + k = 16; + x >>= 16; + } + if (!(x & 0xff)) { + k += 8; + x >>= 8; + } + if (!(x & 0xf)) { + k += 4; + x >>= 4; + } + if (!(x & 0x3)) { + k += 2; + x >>= 2; + } + if (!(x & 1)) { + k++; + x >>= 1; + if (!x & 1) + return 32; + } + *y = x; + return k; + } + + static Bigint * +i2b +#ifdef KR_headers + (i) int i; +#else + (int i) +#endif +{ + Bigint *b; + + b = Balloc(1); + b->x[0] = i; + b->wds = 1; + return b; + } + + static Bigint * +mult +#ifdef KR_headers + (a, b) Bigint *a, *b; +#else + (Bigint *a, Bigint *b) +#endif +{ + Bigint *c; + int k, wa, wb, wc; + ULong carry, y, z; + ULong *x, *xa, *xae, *xb, *xbe, *xc, *xc0; +#ifdef Pack_32 + ULong z2; +#endif + + if (a->wds < b->wds) { + c = a; + a = b; + b = c; + } + k = a->k; + wa = a->wds; + wb = b->wds; + wc = wa + wb; + if (wc > a->maxwds) + k++; + c = Balloc(k); + for(x = c->x, xa = x + wc; x < xa; x++) + *x = 0; + xa = a->x; + xae = xa + wa; + xb = b->x; + xbe = xb + wb; + xc0 = c->x; +#ifdef Pack_32 + for(; xb < xbe; xb++, xc0++) { + if (y = *xb & 0xffff) { + x = xa; + xc = xc0; + carry = 0; + do { + z = (*x & 0xffff) * y + (*xc & 0xffff) + carry; + carry = z >> 16; + z2 = (*x++ >> 16) * y + (*xc >> 16) + carry; + carry = z2 >> 16; + Storeinc(xc, z2, z); + } + while(x < xae); + *xc = carry; + } + if (y = *xb >> 16) { + x = xa; + xc = xc0; + carry = 0; + z2 = *xc; + do { + z = (*x & 0xffff) * y + (*xc >> 16) + carry; + carry = z >> 16; + Storeinc(xc, z, z2); + z2 = (*x++ >> 16) * y + (*xc & 0xffff) + carry; + carry = z2 >> 16; + } + while(x < xae); + *xc = z2; + } + } +#else + for(; xb < xbe; xc0++) { + if (y = *xb++) { + x = xa; + xc = xc0; + carry = 0; + do { + z = *x++ * y + *xc + carry; + carry = z >> 16; + *xc++ = z & 0xffff; + } + while(x < xae); + *xc = carry; + } + } +#endif + for(xc0 = c->x, xc = xc0 + wc; wc > 0 && !*--xc; --wc) ; + c->wds = wc; + return c; + } + + static Bigint *p5s; + + static Bigint * +pow5mult +#ifdef KR_headers + (b, k) Bigint *b; int k; +#else + (Bigint *b, int k) +#endif +{ + Bigint *b1, *p5, *p51; + int i; + static int p05[3] = { 5, 25, 125 }; + + if (i = k & 3) + b = multadd(b, p05[i-1], 0); + + if (!(k >>= 2)) + return b; + if (!(p5 = p5s)) { + /* first time */ +#ifdef MULTIPLE_THREADS + ACQUIRE_DTOA_LOCK(1); + if (!(p5 = p5s)) { + p5 = p5s = i2b(625); + p5->next = 0; + } + FREE_DTOA_LOCK(1); +#else + p5 = p5s = i2b(625); + p5->next = 0; +#endif + } + for(;;) { + if (k & 1) { + b1 = mult(b, p5); + Bfree(b); + b = b1; + } + if (!(k >>= 1)) + break; + if (!(p51 = p5->next)) { +#ifdef MULTIPLE_THREADS + ACQUIRE_DTOA_LOCK(1); + if (!(p51 = p5->next)) { + p51 = p5->next = mult(p5,p5); + p51->next = 0; + } + FREE_DTOA_LOCK(1); +#else + p51 = p5->next = mult(p5,p5); + p51->next = 0; +#endif + } + p5 = p51; + } + return b; + } + + static Bigint * +lshift +#ifdef KR_headers + (b, k) Bigint *b; int k; +#else + (Bigint *b, int k) +#endif +{ + int i, k1, n, n1; + Bigint *b1; + ULong *x, *x1, *xe, z; + +#ifdef Pack_32 + n = k >> 5; +#else + n = k >> 4; +#endif + k1 = b->k; + n1 = n + b->wds + 1; + for(i = b->maxwds; n1 > i; i <<= 1) + k1++; + b1 = Balloc(k1); + x1 = b1->x; + for(i = 0; i < n; i++) + *x1++ = 0; + x = b->x; + xe = x + b->wds; +#ifdef Pack_32 + if (k &= 0x1f) { + k1 = 32 - k; + z = 0; + do { + *x1++ = *x << k | z; + z = *x++ >> k1; + } + while(x < xe); + if (*x1 = z) + ++n1; + } +#else + if (k &= 0xf) { + k1 = 16 - k; + z = 0; + do { + *x1++ = *x << k & 0xffff | z; + z = *x++ >> k1; + } + while(x < xe); + if (*x1 = z) + ++n1; + } +#endif + else do + *x1++ = *x++; + while(x < xe); + b1->wds = n1 - 1; + Bfree(b); + return b1; + } + + static int +cmp +#ifdef KR_headers + (a, b) Bigint *a, *b; +#else + (Bigint *a, Bigint *b) +#endif +{ + ULong *xa, *xa0, *xb, *xb0; + int i, j; + + i = a->wds; + j = b->wds; +#ifdef DEBUG + if (i > 1 && !a->x[i-1]) + Bug("cmp called with a->x[a->wds-1] == 0"); + if (j > 1 && !b->x[j-1]) + Bug("cmp called with b->x[b->wds-1] == 0"); +#endif + if (i -= j) + return i; + xa0 = a->x; + xa = xa0 + j; + xb0 = b->x; + xb = xb0 + j; + for(;;) { + if (*--xa != *--xb) + return *xa < *xb ? -1 : 1; + if (xa <= xa0) + break; + } + return 0; + } + + static Bigint * +diff +#ifdef KR_headers + (a, b) Bigint *a, *b; +#else + (Bigint *a, Bigint *b) +#endif +{ + Bigint *c; + int i, wa, wb; + Long borrow, y; /* We need signed shifts here. */ + ULong *xa, *xae, *xb, *xbe, *xc; +#ifdef Pack_32 + Long z; +#endif + + i = cmp(a,b); + if (!i) { + c = Balloc(0); + c->wds = 1; + c->x[0] = 0; + return c; + } + if (i < 0) { + c = a; + a = b; + b = c; + i = 1; + } + else + i = 0; + c = Balloc(a->k); + c->sign = i; + wa = a->wds; + xa = a->x; + xae = xa + wa; + wb = b->wds; + xb = b->x; + xbe = xb + wb; + xc = c->x; + borrow = 0; +#ifdef Pack_32 + do { + y = (*xa & 0xffff) - (*xb & 0xffff) + borrow; + borrow = y >> 16; + Sign_Extend(borrow, y); + z = (*xa++ >> 16) - (*xb++ >> 16) + borrow; + borrow = z >> 16; + Sign_Extend(borrow, z); + Storeinc(xc, z, y); + } + while(xb < xbe); + while(xa < xae) { + y = (*xa & 0xffff) + borrow; + borrow = y >> 16; + Sign_Extend(borrow, y); + z = (*xa++ >> 16) + borrow; + borrow = z >> 16; + Sign_Extend(borrow, z); + Storeinc(xc, z, y); + } +#else + do { + y = *xa++ - *xb++ + borrow; + borrow = y >> 16; + Sign_Extend(borrow, y); + *xc++ = y & 0xffff; + } + while(xb < xbe); + while(xa < xae) { + y = *xa++ + borrow; + borrow = y >> 16; + Sign_Extend(borrow, y); + *xc++ = y & 0xffff; + } +#endif + while(!*--xc) + wa--; + c->wds = wa; + return c; + } + + static double +ulp +#ifdef KR_headers + (x) double x; +#else + (double x) +#endif +{ + register Long L; + double a; + + L = (word0(x) & Exp_mask) - (P-1)*Exp_msk1; +#ifndef Sudden_Underflow + if (L > 0) { +#endif +#ifdef IBM + L |= Exp_msk1 >> 4; +#endif + word0(a) = L; + word1(a) = 0; +#ifndef Sudden_Underflow + } + else { + L = -L >> Exp_shift; + if (L < Exp_shift) { + word0(a) = 0x80000 >> L; + word1(a) = 0; + } + else { + word0(a) = 0; + L -= Exp_shift; + word1(a) = L >= 31 ? 1 : 1 << 31 - L; + } + } +#endif + return a; + } + + static double +b2d +#ifdef KR_headers + (a, e) Bigint *a; int *e; +#else + (Bigint *a, int *e) +#endif +{ + ULong *xa, *xa0, w, y, z; + int k; + double d; +#ifdef VAX + ULong d0, d1; +#else +#define d0 word0(d) +#define d1 word1(d) +#endif + + xa0 = a->x; + xa = xa0 + a->wds; + y = *--xa; +#ifdef DEBUG + if (!y) Bug("zero y in b2d"); +#endif + k = hi0bits(y); + *e = 32 - k; +#ifdef Pack_32 + if (k < Ebits) { + d0 = Exp_1 | y >> Ebits - k; + w = xa > xa0 ? *--xa : 0; + d1 = y << (32-Ebits) + k | w >> Ebits - k; + goto ret_d; + } + z = xa > xa0 ? *--xa : 0; + if (k -= Ebits) { + d0 = Exp_1 | y << k | z >> 32 - k; + y = xa > xa0 ? *--xa : 0; + d1 = z << k | y >> 32 - k; + } + else { + d0 = Exp_1 | y; + d1 = z; + } +#else + if (k < Ebits + 16) { + z = xa > xa0 ? *--xa : 0; + d0 = Exp_1 | y << k - Ebits | z >> Ebits + 16 - k; + w = xa > xa0 ? *--xa : 0; + y = xa > xa0 ? *--xa : 0; + d1 = z << k + 16 - Ebits | w << k - Ebits | y >> 16 + Ebits - k; + goto ret_d; + } + z = xa > xa0 ? *--xa : 0; + w = xa > xa0 ? *--xa : 0; + k -= Ebits + 16; + d0 = Exp_1 | y << k + 16 | z << k | w >> 16 - k; + y = xa > xa0 ? *--xa : 0; + d1 = w << k + 16 | y << k; +#endif + ret_d: +#ifdef VAX + word0(d) = d0 >> 16 | d0 << 16; + word1(d) = d1 >> 16 | d1 << 16; +#else +#undef d0 +#undef d1 +#endif + return d; + } + + static Bigint * +d2b +#ifdef KR_headers + (d, e, bits) double d; int *e, *bits; +#else + (double d, int *e, int *bits) +#endif +{ + Bigint *b; + int de, i, k; + ULong *x, y, z; +#ifdef VAX + ULong d0, d1; + d0 = word0(d) >> 16 | word0(d) << 16; + d1 = word1(d) >> 16 | word1(d) << 16; +#else +#define d0 word0(d) +#define d1 word1(d) +#endif + +#ifdef Pack_32 + b = Balloc(1); +#else + b = Balloc(2); +#endif + x = b->x; + + z = d0 & Frac_mask; + d0 &= 0x7fffffff; /* clear sign bit, which we ignore */ +#ifdef Sudden_Underflow + de = (int)(d0 >> Exp_shift); +#ifndef IBM + z |= Exp_msk11; +#endif +#else + if (de = (int)(d0 >> Exp_shift)) + z |= Exp_msk1; +#endif +#ifdef Pack_32 + if (y = d1) { + if (k = lo0bits(&y)) { + x[0] = y | z << 32 - k; + z >>= k; + } + else + x[0] = y; + i = b->wds = (x[1] = z) ? 2 : 1; + } + else { +#ifdef DEBUG + if (!z) + Bug("Zero passed to d2b"); +#endif + k = lo0bits(&z); + x[0] = z; + i = b->wds = 1; + k += 32; + } +#else + if (y = d1) { + if (k = lo0bits(&y)) + if (k >= 16) { + x[0] = y | z << 32 - k & 0xffff; + x[1] = z >> k - 16 & 0xffff; + x[2] = z >> k; + i = 2; + } + else { + x[0] = y & 0xffff; + x[1] = y >> 16 | z << 16 - k & 0xffff; + x[2] = z >> k & 0xffff; + x[3] = z >> k+16; + i = 3; + } + else { + x[0] = y & 0xffff; + x[1] = y >> 16; + x[2] = z & 0xffff; + x[3] = z >> 16; + i = 3; + } + } + else { +#ifdef DEBUG + if (!z) + Bug("Zero passed to d2b"); +#endif + k = lo0bits(&z); + if (k >= 16) { + x[0] = z; + i = 0; + } + else { + x[0] = z & 0xffff; + x[1] = z >> 16; + i = 1; + } + k += 32; + } + while(!x[i]) + --i; + b->wds = i + 1; +#endif +#ifndef Sudden_Underflow + if (de) { +#endif +#ifdef IBM + *e = (de - Bias - (P-1) << 2) + k; + *bits = 4*P + 8 - k - hi0bits(word0(d) & Frac_mask); +#else + *e = de - Bias - (P-1) + k; + *bits = P - k; +#endif +#ifndef Sudden_Underflow + } + else { + *e = de - Bias - (P-1) + 1 + k; +#ifdef Pack_32 + *bits = 32*i - hi0bits(x[i-1]); +#else + *bits = (i+2)*16 - hi0bits(x[i]); +#endif + } +#endif + return b; + } +#undef d0 +#undef d1 + + static double +ratio +#ifdef KR_headers + (a, b) Bigint *a, *b; +#else + (Bigint *a, Bigint *b) +#endif +{ + double da, db; + int k, ka, kb; + + da = b2d(a, &ka); + db = b2d(b, &kb); +#ifdef Pack_32 + k = ka - kb + 32*(a->wds - b->wds); +#else + k = ka - kb + 16*(a->wds - b->wds); +#endif +#ifdef IBM + if (k > 0) { + word0(da) += (k >> 2)*Exp_msk1; + if (k &= 3) + da *= 1 << k; + } + else { + k = -k; + word0(db) += (k >> 2)*Exp_msk1; + if (k &= 3) + db *= 1 << k; + } +#else + if (k > 0) + word0(da) += k*Exp_msk1; + else { + k = -k; + word0(db) += k*Exp_msk1; + } +#endif + return da / db; + } + + static CONST double +tens[] = { + 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, + 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, + 1e20, 1e21, 1e22 +#ifdef VAX + , 1e23, 1e24 +#endif + }; + + static CONST double +#ifdef IEEE_Arith +bigtens[] = { 1e16, 1e32, 1e64, 1e128, 1e256 }; +static CONST double tinytens[] = { 1e-16, 1e-32, 1e-64, 1e-128, +#ifdef Avoid_Underflow + 9007199254740992.e-256 +#else + 1e-256 +#endif + }; +/* The factor of 2^53 in tinytens[4] helps us avoid setting the underflow */ +/* flag unnecessarily. It leads to a song and dance at the end of strtod. */ +#define Scale_Bit 0x10 +#define n_bigtens 5 +#else +#ifdef IBM +bigtens[] = { 1e16, 1e32, 1e64 }; +static CONST double tinytens[] = { 1e-16, 1e-32, 1e-64 }; +#define n_bigtens 3 +#else +bigtens[] = { 1e16, 1e32 }; +static CONST double tinytens[] = { 1e-16, 1e-32 }; +#define n_bigtens 2 +#endif +#endif + +#ifndef IEEE_Arith +#undef INFNAN_CHECK +#endif + +#ifdef INFNAN_CHECK + +#ifndef NAN_WORD0 +#define NAN_WORD0 0x7ff80000 +#endif + +#ifndef NAN_WORD1 +#define NAN_WORD1 0 +#endif + + static int +match +#ifdef KR_headers + (sp, t) char **sp, *t; +#else + (CONST char **sp, char *t) +#endif +{ + int c, d; + CONST char *s = *sp; + + while(d = *t++) { + if ((c = *++s) >= 'A' && c <= 'Z') + c += 'a' - 'A'; + if (c != d) + return 0; + } + *sp = s + 1; + return 1; + } +#endif /* INFNAN_CHECK */ + + double +strtod +#ifdef KR_headers + (s00, se) CONST char *s00; char **se; +#else + (CONST char *s00, char **se) +#endif +{ +#ifdef IEEE_Arith + int scale; +#endif + int bb2, bb5, bbe, bd2, bd5, bbbits, bs2, c, dsign, + e, e1, esign, i, j, k, nd, nd0, nf, nz, nz0, sign; + CONST char *s, *s0, *s1; + double aadj, aadj1, adj, rv, rv0; + Long L; + ULong y, z; + Bigint *bb, *bb1, *bd, *bd0, *bs, *delta; + sign = nz0 = nz = 0; + rv = 0.; + for(s = s00;;s++) switch(*s) { + case '-': + sign = 1; + /* no break */ + case '+': + if (*++s) + goto break2; + /* no break */ + case 0: + s = s00; + goto ret; + case '\t': + case '\n': + case '\v': + case '\f': + case '\r': + case ' ': + continue; + default: + goto break2; + } + break2: + if (*s == '0') { + nz0 = 1; + while(*++s == '0') ; + if (!*s) + goto ret; + } + s0 = s; + y = z = 0; + for(nd = nf = 0; (c = *s) >= '0' && c <= '9'; nd++, s++) + if (nd < 9) + y = 10*y + c - '0'; + else if (nd < 16) + z = 10*z + c - '0'; + nd0 = nd; + if (c == '.') { + c = *++s; + if (!nd) { + for(; c == '0'; c = *++s) + nz++; + if (c > '0' && c <= '9') { + s0 = s; + nf += nz; + nz = 0; + goto have_dig; + } + goto dig_done; + } + for(; c >= '0' && c <= '9'; c = *++s) { + have_dig: + nz++; + if (c -= '0') { + nf += nz; + for(i = 1; i < nz; i++) + if (nd++ < 9) + y *= 10; + else if (nd <= DBL_DIG + 1) + z *= 10; + if (nd++ < 9) + y = 10*y + c; + else if (nd <= DBL_DIG + 1) + z = 10*z + c; + nz = 0; + } + } + } + dig_done: + e = 0; + if (c == 'e' || c == 'E') { + if (!nd && !nz && !nz0) { + s = s00; + goto ret; + } + s00 = s; + esign = 0; + switch(c = *++s) { + case '-': + esign = 1; + case '+': + c = *++s; + } + if (c >= '0' && c <= '9') { + while(c == '0') + c = *++s; + if (c > '0' && c <= '9') { + L = c - '0'; + s1 = s; + while((c = *++s) >= '0' && c <= '9') + L = 10*L + c - '0'; + if (s - s1 > 8 || L > 19999) + /* Avoid confusion from exponents + * so large that e might overflow. + */ + e = 19999; /* safe for 16 bit ints */ + else + e = (int)L; + if (esign) + e = -e; + } + else + e = 0; + } + else + s = s00; + } + if (!nd) { + if (!nz && !nz0) { +#ifdef INFNAN_CHECK + /* Check for Nan and Infinity */ + switch(c) { + case 'i': + case 'I': + if (match(&s,"nfinity")) { + word0(rv) = 0x7ff00000; + word1(rv) = 0; + goto ret; + } + break; + case 'n': + case 'N': + if (match(&s, "an")) { + word0(rv) = NAN_WORD0; + word1(rv) = NAN_WORD1; + goto ret; + } + } +#endif /* INFNAN_CHECK */ + s = s00; + } + goto ret; + } + e1 = e -= nf; + + /* Now we have nd0 digits, starting at s0, followed by a + * decimal point, followed by nd-nd0 digits. The number we're + * after is the integer represented by those digits times + * 10**e */ + + if (!nd0) + nd0 = nd; + k = nd < DBL_DIG + 1 ? nd : DBL_DIG + 1; + rv = y; + if (k > 9) + rv = tens[k - 9] * rv + z; + bd0 = 0; + if (nd <= DBL_DIG +#ifndef RND_PRODQUOT + && FLT_ROUNDS == 1 +#endif + ) { + if (!e) + goto ret; + if (e > 0) { + if (e <= Ten_pmax) { +#ifdef VAX + goto vax_ovfl_check; +#else + /* rv = */ rounded_product(rv, tens[e]); + goto ret; +#endif + } + i = DBL_DIG - nd; + if (e <= Ten_pmax + i) { + /* A fancier test would sometimes let us do + * this for larger i values. + */ + e -= i; + rv *= tens[i]; +#ifdef VAX + /* VAX exponent range is so narrow we must + * worry about overflow here... + */ + vax_ovfl_check: + word0(rv) -= P*Exp_msk1; + /* rv = */ rounded_product(rv, tens[e]); + if ((word0(rv) & Exp_mask) + > Exp_msk1*(DBL_MAX_EXP+Bias-1-P)) + goto ovfl; + word0(rv) += P*Exp_msk1; +#else + /* rv = */ rounded_product(rv, tens[e]); +#endif + goto ret; + } + } +#ifndef Inaccurate_Divide + else if (e >= -Ten_pmax) { + /* rv = */ rounded_quotient(rv, tens[-e]); + goto ret; + } +#endif + } + e1 += nd - k; + +#ifdef IEEE_Arith + scale = 0; +#endif + + /* Get starting approximation = rv * 10**e1 */ + + if (e1 > 0) { + if (i = e1 & 15) + rv *= tens[i]; + if (e1 &= ~15) { + if (e1 > DBL_MAX_10_EXP) { + ovfl: + errno = ERANGE; +#ifdef __STDC__ + rv = HUGE_VAL; +#else + /* Can't trust HUGE_VAL */ +#ifdef IEEE_Arith + word0(rv) = Exp_mask; + word1(rv) = 0; +#else + word0(rv) = Big0; + word1(rv) = Big1; +#endif +#endif + if (bd0) + goto retfree; + goto ret; + } + if (e1 >>= 4) { + for(j = 0; e1 > 1; j++, e1 >>= 1) + if (e1 & 1) + rv *= bigtens[j]; + /* The last multiplication could overflow. */ + word0(rv) -= P*Exp_msk1; + rv *= bigtens[j]; + if ((z = word0(rv) & Exp_mask) + > Exp_msk1*(DBL_MAX_EXP+Bias-P)) + goto ovfl; + if (z > Exp_msk1*(DBL_MAX_EXP+Bias-1-P)) { + /* set to largest number */ + /* (Can't trust DBL_MAX) */ + word0(rv) = Big0; + word1(rv) = Big1; + } + else + word0(rv) += P*Exp_msk1; + } + + } + } + else if (e1 < 0) { + e1 = -e1; + if (i = e1 & 15) + rv /= tens[i]; + if (e1 &= ~15) { + e1 >>= 4; + if (e1 >= 1 << n_bigtens) + goto undfl; +#ifdef Avoid_Underflow + if (e1 & Scale_Bit) + scale = P; + for(j = 0; e1 > 0; j++, e1 >>= 1) + if (e1 & 1) + rv *= tinytens[j]; +#else + for(j = 0; e1 > 1; j++, e1 >>= 1) + if (e1 & 1) + rv *= tinytens[j]; + /* The last multiplication could underflow. */ + rv0 = rv; + rv *= tinytens[j]; + if (!rv) { + rv = 2.*rv0; + rv *= tinytens[j]; +#endif + if (!rv) { + undfl: + rv = 0.; + errno = ERANGE; + if (bd0) + goto retfree; + goto ret; + } +#ifndef Avoid_Underflow + word0(rv) = Tiny0; + word1(rv) = Tiny1; + /* The refinement below will clean + * this approximation up. + */ + } +#endif + } + } + + /* Now the hard part -- adjusting rv to the correct value.*/ + + /* Put digits into bd: true value = bd * 10^e */ + + bd0 = s2b(s0, nd0, nd, y); + + for(;;) { + bd = Balloc(bd0->k); + Bcopy(bd, bd0); + bb = d2b(rv, &bbe, &bbbits); /* rv = bb * 2^bbe */ + bs = i2b(1); + + if (e >= 0) { + bb2 = bb5 = 0; + bd2 = bd5 = e; + } + else { + bb2 = bb5 = -e; + bd2 = bd5 = 0; + } + if (bbe >= 0) + bb2 += bbe; + else + bd2 -= bbe; + bs2 = bb2; +#ifdef Sudden_Underflow +#ifdef IBM + j = 1 + 4*P - 3 - bbbits + ((bbe + bbbits - 1) & 3); +#else + j = P + 1 - bbbits; +#endif +#else + i = bbe + bbbits - 1; /* logb(rv) */ + if (i < Emin) /* denormal */ + j = bbe + (P-Emin); + else + j = P + 1 - bbbits; +#endif + bb2 += j; + bd2 += j; +#ifdef Avoid_Underflow + bd2 += scale; +#endif + i = bb2 < bd2 ? bb2 : bd2; + if (i > bs2) + i = bs2; + if (i > 0) { + bb2 -= i; + bd2 -= i; + bs2 -= i; + } + if (bb5 > 0) { + bs = pow5mult(bs, bb5); + bb1 = mult(bs, bb); + Bfree(bb); + bb = bb1; + } + if (bb2 > 0) + bb = lshift(bb, bb2); + if (bd5 > 0) + bd = pow5mult(bd, bd5); + if (bd2 > 0) + bd = lshift(bd, bd2); + if (bs2 > 0) + bs = lshift(bs, bs2); + delta = diff(bb, bd); + dsign = delta->sign; + delta->sign = 0; + i = cmp(delta, bs); + if (i < 0) { + /* Error is less than half an ulp -- check for + * special case of mantissa a power of two. + */ + if (dsign || word1(rv) || word0(rv) & Bndry_mask +#ifdef IEEE_Arith + || (word0(rv) & Exp_mask) <= Exp_msk1 +#endif + ) { +#ifdef Avoid_Underflow + if (!delta->x[0] && delta->wds == 1) + dsign = 2; +#endif + break; + } + delta = lshift(delta,Log2P); + if (cmp(delta, bs) > 0) + goto drop_down; + break; + } + if (i == 0) { + /* exactly half-way between */ + if (dsign) { + if ((word0(rv) & Bndry_mask1) == Bndry_mask1 + && word1(rv) == 0xffffffff) { + /*boundary case -- increment exponent*/ + word0(rv) = (word0(rv) & Exp_mask) + + Exp_msk1 +#ifdef IBM + | Exp_msk1 >> 4 +#endif + ; + word1(rv) = 0; +#ifdef Avoid_Underflow + dsign = 0; +#endif + break; + } + } + else if (!(word0(rv) & Bndry_mask) && !word1(rv)) { +#ifdef Avoid_Underflow + dsign = 2; +#endif + drop_down: + /* boundary case -- decrement exponent */ +#ifdef Sudden_Underflow + L = word0(rv) & Exp_mask; +#ifdef IBM + if (L < Exp_msk1) +#else + if (L <= Exp_msk1) +#endif + goto undfl; + L -= Exp_msk1; +#else + L = (word0(rv) & Exp_mask) - Exp_msk1; +#endif + word0(rv) = L | Bndry_mask1; + word1(rv) = 0xffffffff; +#ifdef IBM + goto cont; +#else + break; +#endif + } +#ifndef ROUND_BIASED + if (!(word1(rv) & LSB)) + break; +#endif + if (dsign) + rv += ulp(rv); +#ifndef ROUND_BIASED + else { + rv -= ulp(rv); +#ifndef Sudden_Underflow + if (!rv) + goto undfl; +#endif + } +#ifdef Avoid_Underflow + dsign = 1 - dsign; +#endif +#endif + break; + } + if ((aadj = ratio(delta, bs)) <= 2.) { + if (dsign) + aadj = aadj1 = 1.; + else if (word1(rv) || word0(rv) & Bndry_mask) { +#ifndef Sudden_Underflow + if (word1(rv) == Tiny1 && !word0(rv)) + goto undfl; +#endif + aadj = 1.; + aadj1 = -1.; + } + else { + /* special case -- power of FLT_RADIX to be */ + /* rounded down... */ + + if (aadj < 2./FLT_RADIX) + aadj = 1./FLT_RADIX; + else + aadj *= 0.5; + aadj1 = -aadj; + } + } + else { + aadj *= 0.5; + aadj1 = dsign ? aadj : -aadj; +#ifdef Check_FLT_ROUNDS + switch(FLT_ROUNDS) { + case 2: /* towards +infinity */ + aadj1 -= 0.5; + break; + case 0: /* towards 0 */ + case 3: /* towards -infinity */ + aadj1 += 0.5; + } +#else + if (FLT_ROUNDS == 0) + aadj1 += 0.5; +#endif + } + y = word0(rv) & Exp_mask; + + /* Check for overflow */ + + if (y == Exp_msk1*(DBL_MAX_EXP+Bias-1)) { + rv0 = rv; + word0(rv) -= P*Exp_msk1; + adj = aadj1 * ulp(rv); + rv += adj; + if ((word0(rv) & Exp_mask) >= + Exp_msk1*(DBL_MAX_EXP+Bias-P)) { + if (word0(rv0) == Big0 && word1(rv0) == Big1) + goto ovfl; + word0(rv) = Big0; + word1(rv) = Big1; + goto cont; + } + else + word0(rv) += P*Exp_msk1; + } + else { +#ifdef Sudden_Underflow + if ((word0(rv) & Exp_mask) <= P*Exp_msk1) { + rv0 = rv; + word0(rv) += P*Exp_msk1; + adj = aadj1 * ulp(rv); + rv += adj; +#ifdef IBM + if ((word0(rv) & Exp_mask) < P*Exp_msk1) +#else + if ((word0(rv) & Exp_mask) <= P*Exp_msk1) +#endif + { + if (word0(rv0) == Tiny0 + && word1(rv0) == Tiny1) + goto undfl; + word0(rv) = Tiny0; + word1(rv) = Tiny1; + goto cont; + } + else + word0(rv) -= P*Exp_msk1; + } + else { + adj = aadj1 * ulp(rv); + rv += adj; + } +#else + /* Compute adj so that the IEEE rounding rules will + * correctly round rv + adj in some half-way cases. + * If rv * ulp(rv) is denormalized (i.e., + * y <= (P-1)*Exp_msk1), we must adjust aadj to avoid + * trouble from bits lost to denormalization; + * example: 1.2e-307 . + */ + if (y <= (P-1)*Exp_msk1 && aadj >= 1.) { + aadj1 = (double)(int)(aadj + 0.5); + if (!dsign) + aadj1 = -aadj1; + } + adj = aadj1 * ulp(rv); + rv += adj; +#endif + } + z = word0(rv) & Exp_mask; +#ifdef Avoid_Underflow + if (!scale) +#endif + if (y == z) { + /* Can we stop now? */ + L = aadj; + aadj -= L; + /* The tolerances below are conservative. */ + if (dsign || word1(rv) || word0(rv) & Bndry_mask) { + if (aadj < .4999999 || aadj > .5000001) + break; + } + else if (aadj < .4999999/FLT_RADIX) + break; + } + cont: + Bfree(bb); + Bfree(bd); + Bfree(bs); + Bfree(delta); + } +#ifdef Avoid_Underflow + if (scale) { + if ((word0(rv) & Exp_mask) <= P*Exp_msk1 + && word1(rv) & 1 + && dsign != 2) + if (dsign) + rv += ulp(rv); + else + word1(rv) &= ~1; + word0(rv0) = Exp_1 - P*Exp_msk1; + word1(rv0) = 0; + rv *= rv0; + } +#endif + retfree: + Bfree(bb); + Bfree(bd); + Bfree(bs); + Bfree(bd0); + Bfree(delta); + ret: + if (se) + *se = (char *)s; + return sign ? -rv : rv; + } + + static int +quorem +#ifdef KR_headers + (b, S) Bigint *b, *S; +#else + (Bigint *b, Bigint *S) +#endif +{ + int n; + Long borrow, y; + ULong carry, q, ys; + ULong *bx, *bxe, *sx, *sxe; +#ifdef Pack_32 + Long z; + ULong si, zs; +#endif + + n = S->wds; +#ifdef DEBUG + /*debug*/ if (b->wds > n) + /*debug*/ Bug("oversize b in quorem"); +#endif + if (b->wds < n) + return 0; + sx = S->x; + sxe = sx + --n; + bx = b->x; + bxe = bx + n; + q = *bxe / (*sxe + 1); /* ensure q <= true quotient */ +#ifdef DEBUG + /*debug*/ if (q > 9) + /*debug*/ Bug("oversized quotient in quorem"); +#endif + if (q) { + borrow = 0; + carry = 0; + do { +#ifdef Pack_32 + si = *sx++; + ys = (si & 0xffff) * q + carry; + zs = (si >> 16) * q + (ys >> 16); + carry = zs >> 16; + y = (*bx & 0xffff) - (ys & 0xffff) + borrow; + borrow = y >> 16; + Sign_Extend(borrow, y); + z = (*bx >> 16) - (zs & 0xffff) + borrow; + borrow = z >> 16; + Sign_Extend(borrow, z); + Storeinc(bx, z, y); +#else + ys = *sx++ * q + carry; + carry = ys >> 16; + y = *bx - (ys & 0xffff) + borrow; + borrow = y >> 16; + Sign_Extend(borrow, y); + *bx++ = y & 0xffff; +#endif + } + while(sx <= sxe); + if (!*bxe) { + bx = b->x; + while(--bxe > bx && !*bxe) + --n; + b->wds = n; + } + } + if (cmp(b, S) >= 0) { + q++; + borrow = 0; + carry = 0; + bx = b->x; + sx = S->x; + do { +#ifdef Pack_32 + si = *sx++; + ys = (si & 0xffff) + carry; + zs = (si >> 16) + (ys >> 16); + carry = zs >> 16; + y = (*bx & 0xffff) - (ys & 0xffff) + borrow; + borrow = y >> 16; + Sign_Extend(borrow, y); + z = (*bx >> 16) - (zs & 0xffff) + borrow; + borrow = z >> 16; + Sign_Extend(borrow, z); + Storeinc(bx, z, y); +#else + ys = *sx++ + carry; + carry = ys >> 16; + y = *bx - (ys & 0xffff) + borrow; + borrow = y >> 16; + Sign_Extend(borrow, y); + *bx++ = y & 0xffff; +#endif + } + while(sx <= sxe); + bx = b->x; + bxe = bx + n; + if (!*bxe) { + while(--bxe > bx && !*bxe) + --n; + b->wds = n; + } + } + return q; + } + +#ifndef MULTIPLE_THREADS + static char *dtoa_result; +#endif + + static char * +#ifdef KR_headers +rv_alloc(i) int i; +#else +rv_alloc(int i) +#endif +{ + int j, k, *r; + + j = sizeof(ULong); + for(k = 0; + sizeof(Bigint) - sizeof(ULong) - sizeof(int) + j <= i; + j <<= 1) + k++; + r = (int*)Balloc(k); + *r = k; + return +#ifndef MULTIPLE_THREADS + dtoa_result = +#endif + (char *)(r+1); + } + + static char * +#ifdef KR_headers +nrv_alloc(s, rve, n) char *s, **rve; int n; +#else +nrv_alloc(char *s, char **rve, int n) +#endif +{ + char *rv, *t; + + t = rv = rv_alloc(n); + while(*t = *s++) t++; + if (rve) + *rve = t; + return rv; + } + +/* freedtoa(s) must be used to free values s returned by dtoa + * when MULTIPLE_THREADS is #defined. It should be used in all cases, + * but for consistency with earlier versions of dtoa, it is optional + * when MULTIPLE_THREADS is not defined. + */ + + void +#ifdef KR_headers +freedtoa(s) char *s; +#else +freedtoa(char *s) +#endif +{ + Bigint *b = (Bigint *)((int *)s - 1); + b->maxwds = 1 << (b->k = *(int*)b); + Bfree(b); +#ifndef MULTIPLE_THREADS + if (s == dtoa_result) + dtoa_result = 0; +#endif + } + +/* dtoa for IEEE arithmetic (dmg): convert double to ASCII string. + * + * Inspired by "How to Print Floating-Point Numbers Accurately" by + * Guy L. Steele, Jr. and Jon L. White [Proc. ACM SIGPLAN '90, pp. 92-101]. + * + * Modifications: + * 1. Rather than iterating, we use a simple numeric overestimate + * to determine k = floor(log10(d)). We scale relevant + * quantities using O(log2(k)) rather than O(k) multiplications. + * 2. For some modes > 2 (corresponding to ecvt and fcvt), we don't + * try to generate digits strictly left to right. Instead, we + * compute with fewer bits and propagate the carry if necessary + * when rounding the final digit up. This is often faster. + * 3. Under the assumption that input will be rounded nearest, + * mode 0 renders 1e23 as 1e23 rather than 9.999999999999999e22. + * That is, we allow equality in stopping tests when the + * round-nearest rule will give the same floating-point value + * as would satisfaction of the stopping test with strict + * inequality. + * 4. We remove common factors of powers of 2 from relevant + * quantities. + * 5. When converting floating-point integers less than 1e16, + * we use floating-point arithmetic rather than resorting + * to multiple-precision integers. + * 6. When asked to produce fewer than 15 digits, we first try + * to get by with floating-point arithmetic; we resort to + * multiple-precision integer arithmetic only if we cannot + * guarantee that the floating-point calculation has given + * the correctly rounded result. For k requested digits and + * "uniformly" distributed input, the probability is + * something like 10^(k-15) that we must resort to the Long + * calculation. + */ + + char * +dtoa +#ifdef KR_headers + (d, mode, ndigits, decpt, sign, rve) + double d; int mode, ndigits, *decpt, *sign; char **rve; +#else + (double d, int mode, int ndigits, int *decpt, int *sign, char **rve) +#endif +{ + /* Arguments ndigits, decpt, sign are similar to those + of ecvt and fcvt; trailing zeros are suppressed from + the returned string. If not null, *rve is set to point + to the end of the return value. If d is +-Infinity or NaN, + then *decpt is set to 9999. + + mode: + 0 ==> shortest string that yields d when read in + and rounded to nearest. + 1 ==> like 0, but with Steele & White stopping rule; + e.g. with IEEE P754 arithmetic , mode 0 gives + 1e23 whereas mode 1 gives 9.999999999999999e22. + 2 ==> max(1,ndigits) significant digits. This gives a + return value similar to that of ecvt, except + that trailing zeros are suppressed. + 3 ==> through ndigits past the decimal point. This + gives a return value similar to that from fcvt, + except that trailing zeros are suppressed, and + ndigits can be negative. + 4-9 should give the same return values as 2-3, i.e., + 4 <= mode <= 9 ==> same return as mode + 2 + (mode & 1). These modes are mainly for + debugging; often they run slower but sometimes + faster than modes 2-3. + 4,5,8,9 ==> left-to-right digit generation. + 6-9 ==> don't try fast floating-point estimate + (if applicable). + + Values of mode other than 0-9 are treated as mode 0. + + Sufficient space is allocated to the return value + to hold the suppressed trailing zeros. + */ + + int bbits, b2, b5, be, dig, i, ieps, ilim, ilim0, ilim1, + j, j1, k, k0, k_check, leftright, m2, m5, s2, s5, + spec_case, try_quick; + Long L; +#ifndef Sudden_Underflow + int denorm; + ULong x; +#endif + Bigint *b, *b1, *delta, *mlo, *mhi, *S; + double d2, ds, eps; + char *s, *s0; + +#ifndef MULTIPLE_THREADS + if (dtoa_result) { + freedtoa(dtoa_result); + dtoa_result = 0; + } +#endif + + if (word0(d) & Sign_bit) { + /* set sign for everything, including 0's and NaNs */ + *sign = 1; + word0(d) &= ~Sign_bit; /* clear sign bit */ + } + else + *sign = 0; + +#if defined(IEEE_Arith) + defined(VAX) +#ifdef IEEE_Arith + if ((word0(d) & Exp_mask) == Exp_mask) +#else + if (word0(d) == 0x8000) +#endif + { + /* Infinity or NaN */ + *decpt = 9999; +#ifdef IEEE_Arith + if (!word1(d) && !(word0(d) & 0xfffff)) + return nrv_alloc("Infinity", rve, 8); +#endif + return nrv_alloc("NaN", rve, 3); + } +#endif +#ifdef IBM + d += 0; /* normalize */ +#endif + if (!d) { + *decpt = 1; + return nrv_alloc("0", rve, 1); + } + + b = d2b(d, &be, &bbits); +#ifdef Sudden_Underflow + i = (int)(word0(d) >> Exp_shift1 & (Exp_mask>>Exp_shift1)); +#else + if (i = (int)(word0(d) >> Exp_shift1 & (Exp_mask>>Exp_shift1))) { +#endif + d2 = d; + word0(d2) &= Frac_mask1; + word0(d2) |= Exp_11; +#ifdef IBM + if (j = 11 - hi0bits(word0(d2) & Frac_mask)) + d2 /= 1 << j; +#endif + + /* log(x) ~=~ log(1.5) + (x-1.5)/1.5 + * log10(x) = log(x) / log(10) + * ~=~ log(1.5)/log(10) + (x-1.5)/(1.5*log(10)) + * log10(d) = (i-Bias)*log(2)/log(10) + log10(d2) + * + * This suggests computing an approximation k to log10(d) by + * + * k = (i - Bias)*0.301029995663981 + * + ( (d2-1.5)*0.289529654602168 + 0.176091259055681 ); + * + * We want k to be too large rather than too small. + * The error in the first-order Taylor series approximation + * is in our favor, so we just round up the constant enough + * to compensate for any error in the multiplication of + * (i - Bias) by 0.301029995663981; since |i - Bias| <= 1077, + * and 1077 * 0.30103 * 2^-52 ~=~ 7.2e-14, + * adding 1e-13 to the constant term more than suffices. + * Hence we adjust the constant term to 0.1760912590558. + * (We could get a more accurate k by invoking log10, + * but this is probably not worthwhile.) + */ + + i -= Bias; +#ifdef IBM + i <<= 2; + i += j; +#endif +#ifndef Sudden_Underflow + denorm = 0; + } + else { + /* d is denormalized */ + + i = bbits + be + (Bias + (P-1) - 1); + x = i > 32 ? word0(d) << 64 - i | word1(d) >> i - 32 + : word1(d) << 32 - i; + d2 = x; + word0(d2) -= 31*Exp_msk1; /* adjust exponent */ + i -= (Bias + (P-1) - 1) + 1; + denorm = 1; + } +#endif + ds = (d2-1.5)*0.289529654602168 + 0.1760912590558 + i*0.301029995663981; + k = (int)ds; + if (ds < 0. && ds != k) + k--; /* want k = floor(ds) */ + k_check = 1; + if (k >= 0 && k <= Ten_pmax) { + if (d < tens[k]) + k--; + k_check = 0; + } + j = bbits - i - 1; + if (j >= 0) { + b2 = 0; + s2 = j; + } + else { + b2 = -j; + s2 = 0; + } + if (k >= 0) { + b5 = 0; + s5 = k; + s2 += k; + } + else { + b2 -= k; + b5 = -k; + s5 = 0; + } + if (mode < 0 || mode > 9) + mode = 0; + try_quick = 1; + if (mode > 5) { + mode -= 4; + try_quick = 0; + } + leftright = 1; + switch(mode) { + case 0: + case 1: + ilim = ilim1 = -1; + i = 18; + ndigits = 0; + break; + case 2: + leftright = 0; + /* no break */ + case 4: + if (ndigits <= 0) + ndigits = 1; + ilim = ilim1 = i = ndigits; + break; + case 3: + leftright = 0; + /* no break */ + case 5: + i = ndigits + k + 1; + ilim = i; + ilim1 = i - 1; + if (i <= 0) + i = 1; + } + s = s0 = rv_alloc(i); + + if (ilim >= 0 && ilim <= Quick_max && try_quick) { + + /* Try to get by with floating-point arithmetic. */ + + i = 0; + d2 = d; + k0 = k; + ilim0 = ilim; + ieps = 2; /* conservative */ + if (k > 0) { + ds = tens[k&0xf]; + j = k >> 4; + if (j & Bletch) { + /* prevent overflows */ + j &= Bletch - 1; + d /= bigtens[n_bigtens-1]; + ieps++; + } + for(; j; j >>= 1, i++) + if (j & 1) { + ieps++; + ds *= bigtens[i]; + } + d /= ds; + } + else if (j1 = -k) { + d *= tens[j1 & 0xf]; + for(j = j1 >> 4; j; j >>= 1, i++) + if (j & 1) { + ieps++; + d *= bigtens[i]; + } + } + if (k_check && d < 1. && ilim > 0) { + if (ilim1 <= 0) + goto fast_failed; + ilim = ilim1; + k--; + d *= 10.; + ieps++; + } + eps = ieps*d + 7.; + word0(eps) -= (P-1)*Exp_msk1; + if (ilim == 0) { + S = mhi = 0; + d -= 5.; + if (d > eps) + goto one_digit; + if (d < -eps) + goto no_digits; + goto fast_failed; + } +#ifndef No_leftright + if (leftright) { + /* Use Steele & White method of only + * generating digits needed. + */ + eps = 0.5/tens[ilim-1] - eps; + for(i = 0;;) { + L = d; + d -= L; + *s++ = '0' + (int)L; + if (d < eps) + goto ret1; + if (1. - d < eps) + goto bump_up; + if (++i >= ilim) + break; + eps *= 10.; + d *= 10.; + } + } + else { +#endif + /* Generate ilim digits, then fix them up. */ + eps *= tens[ilim-1]; + for(i = 1;; i++, d *= 10.) { + L = d; + d -= L; + *s++ = '0' + (int)L; + if (i == ilim) { + if (d > 0.5 + eps) + goto bump_up; + else if (d < 0.5 - eps) { + while(*--s == '0'); + s++; + goto ret1; + } + break; + } + } +#ifndef No_leftright + } +#endif + fast_failed: + s = s0; + d = d2; + k = k0; + ilim = ilim0; + } + + /* Do we have a "small" integer? */ + + if (be >= 0 && k <= Int_max) { + /* Yes. */ + ds = tens[k]; + if (ndigits < 0 && ilim <= 0) { + S = mhi = 0; + if (ilim < 0 || d <= 5*ds) + goto no_digits; + goto one_digit; + } + for(i = 1;; i++) { + L = d / ds; + d -= L*ds; +#ifdef Check_FLT_ROUNDS + /* If FLT_ROUNDS == 2, L will usually be high by 1 */ + if (d < 0) { + L--; + d += ds; + } +#endif + *s++ = '0' + (int)L; + if (i == ilim) { + d += d; + if (d > ds || d == ds && L & 1) { + bump_up: + while(*--s == '9') + if (s == s0) { + k++; + *s = '0'; + break; + } + ++*s++; + } + break; + } + if (!(d *= 10.)) + break; + } + goto ret1; + } + + m2 = b2; + m5 = b5; + mhi = mlo = 0; + if (leftright) { + if (mode < 2) { + i = +#ifndef Sudden_Underflow + denorm ? be + (Bias + (P-1) - 1 + 1) : +#endif +#ifdef IBM + 1 + 4*P - 3 - bbits + ((bbits + be - 1) & 3); +#else + 1 + P - bbits; +#endif + } + else { + j = ilim - 1; + if (m5 >= j) + m5 -= j; + else { + s5 += j -= m5; + b5 += j; + m5 = 0; + } + if ((i = ilim) < 0) { + m2 -= i; + i = 0; + } + } + b2 += i; + s2 += i; + mhi = i2b(1); + } + if (m2 > 0 && s2 > 0) { + i = m2 < s2 ? m2 : s2; + b2 -= i; + m2 -= i; + s2 -= i; + } + if (b5 > 0) { + if (leftright) { + if (m5 > 0) { + mhi = pow5mult(mhi, m5); + b1 = mult(mhi, b); + Bfree(b); + b = b1; + } + if (j = b5 - m5) + b = pow5mult(b, j); + } + else + b = pow5mult(b, b5); + } + S = i2b(1); + if (s5 > 0) + S = pow5mult(S, s5); + + /* Check for special case that d is a normalized power of 2. */ + + spec_case = 0; + if (mode < 2) { + if (!word1(d) && !(word0(d) & Bndry_mask) +#ifndef Sudden_Underflow + && word0(d) & Exp_mask +#endif + ) { + /* The special case */ + b2 += Log2P; + s2 += Log2P; + spec_case = 1; + } + } + + /* Arrange for convenient computation of quotients: + * shift left if necessary so divisor has 4 leading 0 bits. + * + * Perhaps we should just compute leading 28 bits of S once + * and for all and pass them and a shift to quorem, so it + * can do shifts and ors to compute the numerator for q. + */ +#ifdef Pack_32 + if (i = ((s5 ? 32 - hi0bits(S->x[S->wds-1]) : 1) + s2) & 0x1f) + i = 32 - i; +#else + if (i = ((s5 ? 32 - hi0bits(S->x[S->wds-1]) : 1) + s2) & 0xf) + i = 16 - i; +#endif + if (i > 4) { + i -= 4; + b2 += i; + m2 += i; + s2 += i; + } + else if (i < 4) { + i += 28; + b2 += i; + m2 += i; + s2 += i; + } + if (b2 > 0) + b = lshift(b, b2); + if (s2 > 0) + S = lshift(S, s2); + if (k_check) { + if (cmp(b,S) < 0) { + k--; + b = multadd(b, 10, 0); /* we botched the k estimate */ + if (leftright) + mhi = multadd(mhi, 10, 0); + ilim = ilim1; + } + } + if (ilim <= 0 && mode > 2) { + if (ilim < 0 || cmp(b,S = multadd(S,5,0)) <= 0) { + /* no digits, fcvt style */ + no_digits: + k = -1 - ndigits; + goto ret; + } + one_digit: + *s++ = '1'; + k++; + goto ret; + } + if (leftright) { + if (m2 > 0) + mhi = lshift(mhi, m2); + + /* Compute mlo -- check for special case + * that d is a normalized power of 2. + */ + + mlo = mhi; + if (spec_case) { + mhi = Balloc(mhi->k); + Bcopy(mhi, mlo); + mhi = lshift(mhi, Log2P); + } + + for(i = 1;;i++) { + dig = quorem(b,S) + '0'; + /* Do we yet have the shortest decimal string + * that will round to d? + */ + j = cmp(b, mlo); + delta = diff(S, mhi); + j1 = delta->sign ? 1 : cmp(b, delta); + Bfree(delta); +#ifndef ROUND_BIASED + if (j1 == 0 && !mode && !(word1(d) & 1)) { + if (dig == '9') + goto round_9_up; + if (j > 0) + dig++; + *s++ = dig; + goto ret; + } +#endif + if (j < 0 || j == 0 && !mode +#ifndef ROUND_BIASED + && !(word1(d) & 1) +#endif + ) { + if (j1 > 0) { + b = lshift(b, 1); + j1 = cmp(b, S); + if ((j1 > 0 || j1 == 0 && dig & 1) + && dig++ == '9') + goto round_9_up; + } + *s++ = dig; + goto ret; + } + if (j1 > 0) { + if (dig == '9') { /* possible if i == 1 */ + round_9_up: + *s++ = '9'; + goto roundoff; + } + *s++ = dig + 1; + goto ret; + } + *s++ = dig; + if (i == ilim) + break; + b = multadd(b, 10, 0); + if (mlo == mhi) + mlo = mhi = multadd(mhi, 10, 0); + else { + mlo = multadd(mlo, 10, 0); + mhi = multadd(mhi, 10, 0); + } + } + } + else + for(i = 1;; i++) { + *s++ = dig = quorem(b,S) + '0'; + if (i >= ilim) + break; + b = multadd(b, 10, 0); + } + + /* Round off last digit */ + + b = lshift(b, 1); + j = cmp(b, S); + if (j > 0 || j == 0 && dig & 1) { + roundoff: + while(*--s == '9') + if (s == s0) { + k++; + *s++ = '1'; + goto ret; + } + ++*s++; + } + else { + while(*--s == '0'); + s++; + } + ret: + Bfree(S); + if (mhi) { + if (mlo && mlo != mhi) + Bfree(mlo); + Bfree(mhi); + } + ret1: + Bfree(b); + *s = 0; + *decpt = k + 1; + if (rve) + *rve = s; + return s0; + } +#ifdef __cplusplus +} +#endif diff --git a/base/runtime/c-libs/smlnj-math/exp64.c b/base/runtime/c-libs/smlnj-math/exp64.c new file mode 100644 index 0000000..5c9c2ed --- /dev/null +++ b/base/runtime/c-libs/smlnj-math/exp64.c @@ -0,0 +1,23 @@ +/* exp64.c + * + * COPYRIGHT (c) 1996 AT&T Research. + */ + +#include +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "cfun-proto-list.h" + +/* _ml_Math_exp64: + */ +ml_val_t _ml_Math_exp64 (ml_state_t *msp, ml_val_t arg) +{ + double d = *(PTR_MLtoC(double, arg)); + ml_val_t res; + + REAL64_ALLOC(msp, res, exp(d)); + + return res; + +} /* end of _ml_Math_exp64 */ diff --git a/base/runtime/c-libs/smlnj-math/fp-dep.h b/base/runtime/c-libs/smlnj-math/fp-dep.h new file mode 100644 index 0000000..5e28d4d --- /dev/null +++ b/base/runtime/c-libs/smlnj-math/fp-dep.h @@ -0,0 +1,158 @@ +/*! \file fp-dep.h + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * System dependencies for floating-point interface. This header file defines + * a subset of the X3J11 specification for Floating-point C extensions (March + * 29, 1995 version). Eventually, we expect that most C compilers will support + * this, and this header file won't be necessary. + * + * The following things are defined: + * + * Rounding modes: + * FE_TONEAREST to nearest + * FE_UPWARD to +Inf + * FE_DOWNWARD to -Inf + * FE_TOWARDZERO to 0.0 + * + * fe_rnd_mode_t The representation type of rounding modes. + * + * fegetround() Get rounding mode + * fesetround(rm) Set rounding mode (return old mode) + * + * RMODE_EQ(rm1, rm2) Compare two rounding modes for equality. + * + * RMODE_C_EQ_ML Set, if the rounding mode values for C are + * the same as for SML (ignoring representation). + */ + +#ifndef _FP_DEP_H_ +#define _FP_DEP_H_ + +#include "ml-osdep.h" + +#if defined(HAS_ANSI_C_FP_EXT) +# include +/* some compilers may ignore the fesetround() function if this is not ON */ +#pragma STDC FENV_ACCESS ON + +typedef int fe_rnd_mode_t; + +#elif defined(OPSYS_AIX) +# include +# ifndef FP_RND_RN + /** Some gcc installations screw up the header files, so that + ** doesn't contain these definitions. + **/ +# define FP_RND_RZ 0 +# define FP_RND_RN 1 +# define FP_RND_RP 2 +# define FP_RND_RM 3 +# endif +# define FE_TONEAREST FP_RND_RN +# define FE_TOWARDZERO FP_RND_RZ +# define FE_UPWARD FP_RND_RP +# define FE_DOWNWARD FP_RND_RM +typedef int fe_rnd_mode_t; +# define fegetround() fp_read_rnd() +# define fesetround(RM) fp_swap_rnd(RM) + +#elif defined(OPSYS_FREEBSD) +# include +# define FE_TONEAREST FP_RN +# define FE_TOWARDZERO FP_RZ +# define FE_UPWARD FP_RP +# define FE_DOWNWARD FP_RM +typedef int fe_rnd_mode_t; +# define fegetround() fpgetround() +# define fesetround(RM) fpsetround(RM) + +#elif (defined(OPSYS_NETBSD) || defined(OPSYS_NETBSD2) || defined(OPSYS_OPENBSD)) +# include +# define FE_TONEAREST FP_RN +# define FE_TOWARDZERO FP_RZ +# define FE_UPWARD FP_RP +# define FE_DOWNWARD FP_RM +typedef int fe_rnd_mode_t; +# define fegetround() fpgetround() +# define fesetround(RM) fpsetround(RM) + +#elif (defined(OPSYS_WIN32) || defined(OPSYS_CYGWIN)) +/** + ** Win32 can set (some) alternate math paramters, but then only by re-linking + ** with different objects. Best to do it by hand here as well. + **/ +# define FE_TONEAREST 0 +# define FE_TOWARDZERO 3 +# define FE_UPWARD 2 +# define FE_DOWNWARD 1 +typedef int fe_rnd_mode_t; +extern int fegetround (void); +extern int fesetround (int); + +#elif defined(OPSYS_MKLINUX) +/* we will probably have to write some assembler to support this. */ +# define NO_ROUNDING_MODE_CTL + +#elif defined(OPSYS_OSF1) +/* because of bugs in Digital's OS (versions earlier than V4.0), rounding + * mode control cannot be supported. Note that later versions of Digital + * Unix (V4.0+) are called OPSYS_DUNIX. + */ +# define NO_ROUNDING_MODE_CTL + +#elif defined(OPSYS_SOLARIS) +# include +# define FE_TONEAREST FP_RN +# define FE_TOWARDZERO FP_RZ +# define FE_UPWARD FP_RP +# define FE_DOWNWARD FP_RM +typedef int fe_rnd_mode_t; +# if defined(ARCH_X86) + /* There is a bug in the Solaris X86 implementation of + * fpgetround() and fpsetround(); we use the assembler code instead. + */ +extern int fegetround (void); +extern int fesetround (int); + +# else +# define fegetround() fpgetround() +# define fesetround(RM) fpsetround(RM) +# endif + +#elif defined(OPSYS_DARWIN) && defined(ARCH_PPC) +# include +# define FE_TONEAREST RN_NEAREST +# define FE_UPWARD RN_TOWARD_PLUS +# define FE_DOWNWARD RN_TOWARD_MINUS +# define FE_TOWARDZERO RN_TOWARD_ZERO + typedef ppc_fp_rn_t fe_rnd_mode_t; + PVT fe_rnd_mode_t fegetround() { return(get_fp_scr()).rn; } + PVT fe_rnd_mode_t fesetround(fe_rnd_mode_t rm) { + ppc_fp_scr_t fpstate = get_fp_scr(); + fe_rnd_mode_t old = fpstate.rn; + fpstate.rn = rm; + set_fp_scr(fpstate); + return (old); + } +#else +# error do not know about FP dependencies +#endif + + +#ifndef RMODE_EQ +# define RMODE_EQ(RM1, RM2) ((RM1) == (RM2)) +#endif + + +#ifndef RMODE_C_NEQ_ML +# if ((FE_TONEAREST == 0) && (FE_TOWARDZERO == 1) && (FE_UPWARD == 2) && (FE_DOWNWARD == 3)) +# define RMODE_C_EQ_ML +# endif +#endif + +#endif /* !_FP_DEP_H_ */ + + + diff --git a/base/runtime/c-libs/smlnj-math/g_fmt.c b/base/runtime/c-libs/smlnj-math/g_fmt.c new file mode 100644 index 0000000..543430a --- /dev/null +++ b/base/runtime/c-libs/smlnj-math/g_fmt.c @@ -0,0 +1,104 @@ +/**************************************************************** + * + * The author of this software is David M. Gay. + * + * Copyright (c) 1991, 1996 by Lucent Technologies. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHOR NOR LUCENT MAKES ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + * + ***************************************************************/ + +/* g_fmt(buf,x) stores the closest decimal approximation to x in buf; + * it suffices to declare buf + * char buf[32]; + */ + +#ifdef __cplusplus +extern "C" { +#endif + extern char *dtoa(double, int, int, int *, int *, char **); + extern char *g_fmt(char *, double); + extern void freedtoa(char*); +#ifdef __cplusplus + } +#endif + + char * +g_fmt(register char *b, double x) +{ + register int i, k; + register char *s; + int decpt, j, sign; + char *b0, *s0, *se; + + b0 = b; +#ifdef IGNORE_ZERO_SIGN + if (!x) { + *b++ = '0'; + *b = 0; + goto done; + } +#endif + s = s0 = dtoa(x, 0, 0, &decpt, &sign, &se); + if (sign) + *b++ = '-'; + if (decpt == 9999) /* Infinity or Nan */ { + while(*b++ = *s++); + goto done0; + } + if (decpt <= -4 || decpt > se - s + 5) { + *b++ = *s++; + if (*s) { + *b++ = '.'; + while(*b = *s++) + b++; + } + *b++ = 'e'; + /* sprintf(b, "%+.2d", decpt - 1); */ + if (--decpt < 0) { + *b++ = '-'; + decpt = -decpt; + } + else + *b++ = '+'; + for(j = 2, k = 10; 10*k <= decpt; j++, k *= 10); + for(;;) { + i = decpt / k; + *b++ = i + '0'; + if (--j <= 0) + break; + decpt -= i*k; + decpt *= 10; + } + *b = 0; + } + else if (decpt <= 0) { + *b++ = '.'; + for(; decpt < 0; decpt++) + *b++ = '0'; + while(*b++ = *s++); + } + else { + while(*b = *s++) { + b++; + if (--decpt == 0 && *s) + *b++ = '.'; + } + for(; decpt > 0; decpt--) + *b++ = '0'; + *b = 0; + } + done0: + freedtoa(s0); + done: + return b0; + } diff --git a/base/runtime/c-libs/smlnj-math/log64.c b/base/runtime/c-libs/smlnj-math/log64.c new file mode 100644 index 0000000..d42021c --- /dev/null +++ b/base/runtime/c-libs/smlnj-math/log64.c @@ -0,0 +1,23 @@ +/* log64.c + * + * COPYRIGHT (c) 1996 AT&T Research. + */ + +#include +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "cfun-proto-list.h" + +/* _ml_Math_log64: + */ +ml_val_t _ml_Math_log64 (ml_state_t *msp, ml_val_t arg) +{ + double d = *(PTR_MLtoC(double, arg)); + ml_val_t res; + + REAL64_ALLOC(msp, res, log(d)); + + return res; + +} /* end of _ml_Math_log64 */ diff --git a/base/runtime/c-libs/smlnj-math/makefile b/base/runtime/c-libs/smlnj-math/makefile new file mode 100644 index 0000000..1ef7474 --- /dev/null +++ b/base/runtime/c-libs/smlnj-math/makefile @@ -0,0 +1,48 @@ +# +# makefile for SMLNJ-Math function library +# + +SHELL = /bin/sh + +INC_DIR = ../../include +CLIB_DIR = ../ +MACH_DIR = ../../mach-dep + +INCLUDES = -I$(INC_DIR) -I$(CLIB_DIR) -I$(MACH_DIR) -I../../objs + +MAKE = make +AR = ar +ARFLAGS = rcv +RANLIB = ranlib + +LIBRARY = libsmlnj-math.a + +VERSION = v-dummy + +OBJS = smlnj-math-lib.o \ + ctlrndmode.o \ + atan64.o \ + cos64.o \ + exp64.o \ + log64.o \ + sin64.o \ + sqrt64.o + +$(LIBRARY) : $(VERSION) $(OBJS) + rm -rf $(LIBRARY) + $(AR) $(ARFLAGS) $(LIBRARY) $(OBJS) + $(RANLIB) $(LIBRARY) + +$(VERSION) : + ($(MAKE) MAKE="$(MAKE)" clean) + echo "$(VERSION)" > $(VERSION) + +.c.o: $(INC_DIR)/ml-unixdep.h $(INC_DIR)/ml-base.h $(INC_DIR)/ml-values.h \ + $(INC_DIR)/ml-osdep.h $(CLIB_DIR)/ml-c.h \ + fp-dep.h cfun-proto-list.h cfun-list.h \ + $(MACH_DIR)/ml-fp.h + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) -c $< + +clean : + rm -f v-* *.o $(LIBRARY) + diff --git a/base/runtime/c-libs/smlnj-math/makefile.win32 b/base/runtime/c-libs/smlnj-math/makefile.win32 new file mode 100644 index 0000000..66b32aa --- /dev/null +++ b/base/runtime/c-libs/smlnj-math/makefile.win32 @@ -0,0 +1,91 @@ +# +# makefile for SMLNJ-Math function library +# win32 specific + +SHELL = + +INC_DIR = ..\..\include +CLIB_DIR = ..\. +MACH_DIR = ..\..\mach-dep + +INCLUDES = /I$(INC_DIR) /I$(CLIB_DIR) /I$(MACH_DIR) /I..\..\objs + +MAKEFILE = makefile.win32 +MAKE = nmake /F$(MAKEFILE) +AR = lib +ARFLAGS = +RANLIB = lib + +LIBRARY = libsmlnj-math.lib + +VERSION = v-dummy + +OBJS = smlnj-math-lib.obj \ + ctlrndmode.obj \ + atan64.obj \ + cos64.obj \ + exp64.obj \ + log64.obj \ + sin64.obj \ + sqrt64.obj + +$(LIBRARY) : $(VERSION) $(OBJS) + del /F /Q $(LIBRARY) + $(AR) $(ARFLAGS) /out:$(LIBRARY) $(OBJS) + $(RANLIB) /out:$(LIBRARY) + +$(VERSION) : + ($(MAKE) MAKE="$(MAKE)" clean) + echo "$(VERSION)" > $(VERSION) + +smlnj-math-lib.obj: smlnj-math-lib.c \ + $(INC_DIR)\ml-base.h \ + $(INC_DIR)\c-library.h \ + cfun-proto-list.h cfun-list.h + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c smlnj-math-lib.c + +ctlrndmode.obj: ctlrndmode.c \ + $(INC_DIR)\ml-base.h \ + $(INC_DIR)\ml-objects.h \ + fp-dep.h cfun-proto-list.h + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c ctlrndmode.c + +atan64.obj: atan64.c \ + $(INC_DIR)\ml-base.h $(INC_DIR)\ml-values.h \ + $(INC_DIR)\ml-objects.h \ + cfun-proto-list.h + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c atan64.c + +cos64.obj: cos64.c \ + $(INC_DIR)\ml-base.h $(INC_DIR)\ml-values.h \ + $(INC_DIR)\ml-objects.h \ + cfun-proto-list.h + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c cos64.c + +exp64.obj: exp64.c \ + $(INC_DIR)\ml-base.h $(INC_DIR)\ml-values.h \ + $(INC_DIR)\ml-objects.h \ + cfun-proto-list.h + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c exp64.c + +log64.obj: log64.c \ + $(INC_DIR)\ml-base.h $(INC_DIR)\ml-values.h \ + $(INC_DIR)\ml-objects.h \ + cfun-proto-list.h + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c log64.c + +sin64.obj: sin64.c \ + $(INC_DIR)\ml-base.h $(INC_DIR)\ml-values.h \ + $(INC_DIR)\ml-objects.h \ + cfun-proto-list.h + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c sin64.c + +sqrt64.obj: sqrt64.c \ + $(INC_DIR)\ml-base.h $(INC_DIR)\ml-values.h \ + $(INC_DIR)\ml-objects.h \ + cfun-proto-list.h + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c sqrt64.c + +clean : + del /F /Q v-* *.obj *.pdb $(LIBRARY) + diff --git a/base/runtime/c-libs/smlnj-math/math.c b/base/runtime/c-libs/smlnj-math/math.c new file mode 100644 index 0000000..dc4a900 --- /dev/null +++ b/base/runtime/c-libs/smlnj-math/math.c @@ -0,0 +1,109 @@ +/* math.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-state.h" +#include "tags.h" +#include "ml-objects.h" +#include "ml-fp.h" + +#include + +#define REAL_ALLOC(msp, r, d) { \ + ml_state_t *__msp = (msp); \ + ml_val_t *__p = __msp->ml_allocPtr; \ + double *__dp; \ + *__p++ = DESC_reald; \ + __dp = (double *) __p; \ + *__dp++ = (d); \ + (r) = PTR_CtoML(__msp->ml_allocPtr + 1); \ + __msp->ml_allocPtr = (ml_val_t *) __dp; \ + } + +ml_val_t c_cos(ml_state_t *msp, ml_val_t arg) +{ + double d; + ml_val_t res; + + Save_ML_FPState(); + Restore_C_FPState(); + d = cos(*(PTR_MLtoC(double,arg))); + REAL_ALLOC(msp,res,d); + Restore_ML_FPState(); + return res; +} + +ml_val_t c_sin(ml_state_t *msp, ml_val_t arg) +{ + double d; + ml_val_t res; + + Save_ML_FPState(); + Restore_C_FPState(); + d = sin(*(PTR_MLtoC(double,arg))); + REAL_ALLOC(msp,res,d); + Restore_ML_FPState(); + return res; +} + +ml_val_t c_exp(ml_state_t *msp, ml_val_t arg) +{ + double d; + ml_val_t res; + extern int errno; + + Save_ML_FPState(); + Restore_C_FPState(); + errno = 0; + d = exp(*(PTR_MLtoC(double,arg))); + REAL_ALLOC(msp,res,d); + REC_ALLOC2(msp,res,res,INT_CtoML(errno)); + Restore_ML_FPState(); + return res; +} + +ml_val_t c_log(ml_state_t *msp, ml_val_t arg) +{ + double d; + ml_val_t res; + + Save_ML_FPState(); + Restore_C_FPState(); + d = log(*(PTR_MLtoC(double,arg))); + REAL_ALLOC(msp,res,d); + Restore_ML_FPState(); + return res; +} + +ml_val_t c_atan(ml_state_t *msp, ml_val_t arg) +{ + double d; + ml_val_t res; + + Save_ML_FPState(); + Restore_C_FPState(); + d = atan(*(PTR_MLtoC(double,arg))); + REAL_ALLOC(msp,res,d); + Restore_ML_FPState(); + return res; +} + +ml_val_t c_sqrt(ml_state_t *msp, ml_val_t arg) +{ + double d; + ml_val_t res; + + Save_ML_FPState(); + Restore_C_FPState(); + d = sqrt(*(PTR_MLtoC(double,arg))); + REAL_ALLOC(msp,res,d); + Restore_ML_FPState(); + return res; +} + +/* end of math.c */ + diff --git a/base/runtime/c-libs/smlnj-math/sin64.c b/base/runtime/c-libs/smlnj-math/sin64.c new file mode 100644 index 0000000..cd29702 --- /dev/null +++ b/base/runtime/c-libs/smlnj-math/sin64.c @@ -0,0 +1,23 @@ +/* sin64.c + * + * COPYRIGHT (c) 1996 AT&T Research. + */ + +#include +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "cfun-proto-list.h" + +/* _ml_Math_sin64: + */ +ml_val_t _ml_Math_sin64 (ml_state_t *msp, ml_val_t arg) +{ + double d = *(PTR_MLtoC(double, arg)); + ml_val_t res; + + REAL64_ALLOC(msp, res, sin(d)); + + return res; + +} /* end of _ml_Math_sin64 */ diff --git a/base/runtime/c-libs/smlnj-math/smlnj-math-lib.c b/base/runtime/c-libs/smlnj-math/smlnj-math-lib.c new file mode 100644 index 0000000..8a62914 --- /dev/null +++ b/base/runtime/c-libs/smlnj-math/smlnj-math-lib.c @@ -0,0 +1,28 @@ +/* smlnj-math-lib.c + * + * COPYRIGHT (c) 1994 AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "c-library.h" +#include "cfun-proto-list.h" + + +/* the table of C functions and ML names */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_BIND(NAME, FUNC, MLTYPE) +PVT cfunc_binding_t CFunTable[] = { +#include "cfun-list.h" + CFUNC_NULL_BIND + }; +#undef CFUNC + + +/* the Math library */ +c_library_t SMLNJ_Math_Library = { + CLIB_NAME, + CLIB_VERSION, + CLIB_DATE, + NIL(clib_init_fn_t), + CFunTable + }; + diff --git a/base/runtime/c-libs/smlnj-math/sqrt64.c b/base/runtime/c-libs/smlnj-math/sqrt64.c new file mode 100644 index 0000000..f71fdf8 --- /dev/null +++ b/base/runtime/c-libs/smlnj-math/sqrt64.c @@ -0,0 +1,23 @@ +/* sqrt64.c + * + * COPYRIGHT (c) 1996 AT&T Research. + */ + +#include +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "cfun-proto-list.h" + +/* _ml_Math_sqrt64: + */ +ml_val_t _ml_Math_sqrt64 (ml_state_t *msp, ml_val_t arg) +{ + double d = *(PTR_MLtoC(double, arg)); + ml_val_t res; + + REAL64_ALLOC(msp, res, sqrt(d)); + + return res; + +} /* end of _ml_Math_sqrt64 */ diff --git a/base/runtime/c-libs/smlnj-mp/acquire-proc.c b/base/runtime/c-libs/smlnj-mp/acquire-proc.c new file mode 100644 index 0000000..2b20413 --- /dev/null +++ b/base/runtime/c-libs/smlnj-mp/acquire-proc.c @@ -0,0 +1,21 @@ +/* acquire-proc.c + * + * COPYRIGHT (c) 1994 by AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "ml-mp.h" +#include "cfun-proto-list.h" + +/* _ml_MP_acquire_proc: + */ +ml_val_t _ml_MP_acquire_proc (ml_state_t *msp, ml_val_t arg) +{ + +#ifdef MP_SUPPORT + return MP_AcquireProc (msp, arg); +#else + Die ("ml_acquire_proc: no mp support\n"); +#endif + +} /* end of _ml_MP_acquire_proc */ diff --git a/base/runtime/c-libs/smlnj-mp/cfun-list.h b/base/runtime/c-libs/smlnj-mp/cfun-list.h new file mode 100644 index 0000000..1879117 --- /dev/null +++ b/base/runtime/c-libs/smlnj-mp/cfun-list.h @@ -0,0 +1,18 @@ +/* cfun-list.h + * + * COPYRIGHT (c) 1994 AT&T Bell Laboratories. + * + * This file lists the directory library of C functions that are callable by ML. + */ + +#ifndef CLIB_NAME +#define CLIB_NAME "SMLNJ-MP" +#define CLIB_VERSION "1.0" +#define CLIB_DATE "December 18, 1994" +#endif + +CFUNC("acquire_proc", _ml_MP_acquire_proc, "") +CFUNC("max_procs", _ml_MP_max_procs, "") +CFUNC("release_proc", _ml_MP_release_proc, "") +CFUNC("spin_lock", _ml_MP_spin_lock, "") + diff --git a/base/runtime/c-libs/smlnj-mp/cfun-proto-list.h b/base/runtime/c-libs/smlnj-mp/cfun-proto-list.h new file mode 100644 index 0000000..16b3e96 --- /dev/null +++ b/base/runtime/c-libs/smlnj-mp/cfun-proto-list.h @@ -0,0 +1,18 @@ +/* cfun-proto-list.h + * + * COPYRIGHT (c) 1194 AT&T Bell Laboratories. + */ + +#ifndef _CFUN_PROTO_LIST_ +#define _CFUN_PROTO_LIST_ + +#ifndef _C_LIBRARY_ +# include "c-library.h" +#endif + +/* the external definitions for the C functions */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_PROTO(NAME, FUNC, MLTYPE) +#include "cfun-list.h" +#undef CFUNC + +#endif /* !_CFUN_PROTO_LIST_ */ diff --git a/base/runtime/c-libs/smlnj-mp/libsmlnj-mp.c b/base/runtime/c-libs/smlnj-mp/libsmlnj-mp.c new file mode 100644 index 0000000..c28dbf7 --- /dev/null +++ b/base/runtime/c-libs/smlnj-mp/libsmlnj-mp.c @@ -0,0 +1,28 @@ +/* libsmlnj-mp.c + * + * COPYRIGHT (c) 1994 AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "c-library.h" +#include "cfun-proto-list.h" + + +/* the table of C functions and ML names */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_BIND(NAME, FUNC, MLTYPE) +PVT cfunc_binding_t CFunTable[] = { +#include "cfun-list.h" + CFUNC_NULL_BIND + }; +#undef CFUNC + + +/* the MP library */ +c_library_t SMLNJ_MP_Library = { + CLIB_NAME, + CLIB_VERSION, + CLIB_DATE, + NIL(clib_init_fn_t), + CFunTable + }; + diff --git a/base/runtime/c-libs/smlnj-mp/makefile b/base/runtime/c-libs/smlnj-mp/makefile new file mode 100644 index 0000000..de0c643 --- /dev/null +++ b/base/runtime/c-libs/smlnj-mp/makefile @@ -0,0 +1,43 @@ +# +# a template makefile for a C function library +# + +SHELL = /bin/sh + +INC_DIR = ../../include +CLIB_DIR = ../ + +INCLUDES = -I$(INC_DIR) -I$(CLIB_DIR) -I../../objs + +MAKE = make +AR = ar +ARFLAGS = rcv +RANLIB = ranlib + +LIBRARY = libsmlnj-mp.a + +VERSION = v-dummy + +OBJS = libsmlnj-mp.o \ + acquire-proc.o \ + max-procs.o \ + release-proc.o \ + spin-lock.o + +$(LIBRARY) : $(VERSION) $(OBJS) + rm -rf $(LIBRARY) + $(AR) $(ARFLAGS) $(LIBRARY) $(OBJS) + $(RANLIB) $(LIBRARY) + +$(VERSION) : + ($(MAKE) MAKE="$(MAKE)" clean) + echo "$(VERSION)" > $(VERSION) + +.c.o: $(INC_DIR)/ml-unixdep.h $(INC_DIR)/ml-base.h $(INC_DIR)/ml-values.h \ + $(INC_DIR)/ml-mp.h \ + $(CLIB_DIR)/ml-c.h cfun-proto-list.h cfun-list.h + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) -c $< + +clean : + rm -f v-* *.o $(LIBRARY) + diff --git a/base/runtime/c-libs/smlnj-mp/max-procs.c b/base/runtime/c-libs/smlnj-mp/max-procs.c new file mode 100644 index 0000000..35b0767 --- /dev/null +++ b/base/runtime/c-libs/smlnj-mp/max-procs.c @@ -0,0 +1,23 @@ +/* max-procs.c + * + * COPYRIGHT (c) 1994 by AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "ml-mp.h" +#include "cfun-proto-list.h" + + +/* _ml_MP_release_proc: + */ +ml_val_t _ml_MP_release_proc (ml_state_t *msp, ml_val_t arg) +{ + +#ifdef MP_SUPPORT + MP_ReleaseProc(msp); /* should not return */ + Die ("_ml_MP_release_proc: call unexpectedly returned\n"); +#else + Die ("_ml_MP_release_proc: no mp support\n"); +#endif + +} /* end of _ml_MP_release_proc */ diff --git a/base/runtime/c-libs/smlnj-mp/release-proc.c b/base/runtime/c-libs/smlnj-mp/release-proc.c new file mode 100644 index 0000000..cbb46d4 --- /dev/null +++ b/base/runtime/c-libs/smlnj-mp/release-proc.c @@ -0,0 +1,22 @@ +/* release-proc.c + * + * COPYRIGHT (c) 1994 by AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-mp.h" +#include "cfun-proto-list.h" + + +/* _ml_MP_max_procs: + */ +ml_val_t _ml_MP_max_procs (ml_state_t *msp, ml_val_t arg) +{ +#ifdef MP_SUPPORT + return INT_CtoML(MP_MaxProcs ()); +#else + Die ("_ml_MP_max_procs: no mp support\n"); +#endif + +} /* end of _ml_MP_max_procs */ diff --git a/base/runtime/c-libs/smlnj-mp/spin-lock.c b/base/runtime/c-libs/smlnj-mp/spin-lock.c new file mode 100644 index 0000000..76c6b36 --- /dev/null +++ b/base/runtime/c-libs/smlnj-mp/spin-lock.c @@ -0,0 +1,28 @@ +/* spin-lock.c + * + * COPYRIGHT (c) 1994 by AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-state.h" +#include "ml-objects.h" +#include "ml-mp.h" +#include "cfun-proto-list.h" + + +/* _ml_MP_spin_lock: + */ +ml_val_t _ml_MP_spin_lock (ml_state_t *msp, ml_val_t arg) +{ +#ifdef MP_SUPPORT + /* this code is for use the assembly (MIPS.prim.asm) try_lock and lock */ + ml_val_t r; + + REF_ALLOC(msp, r, ML_false); + return r; +#else + Die ("ml_spin_lock: no mp support\n"); +#endif + +} /* end of _ml_MP_spin_lock */ diff --git a/base/runtime/c-libs/smlnj-prof/README b/base/runtime/c-libs/smlnj-prof/README new file mode 100644 index 0000000..be64da9 --- /dev/null +++ b/base/runtime/c-libs/smlnj-prof/README @@ -0,0 +1,2 @@ +This library contains code to support profiling SML. + diff --git a/base/runtime/c-libs/smlnj-prof/cfun-list.h b/base/runtime/c-libs/smlnj-prof/cfun-list.h new file mode 100644 index 0000000..eb17967 --- /dev/null +++ b/base/runtime/c-libs/smlnj-prof/cfun-list.h @@ -0,0 +1,16 @@ +/* cfun-list.h + * + * COPYRIGHT (c) 1994 AT&T Bell Laboratories. + * + * This file lists the directory library of C functions that are callable by ML. + */ + +#ifndef CLIB_NAME +#define CLIB_NAME "SMLNJ-Prof" +#define CLIB_VERSION "1.0" +#define CLIB_DATE "December 15, 1994" +#endif + +CFUNC("setTimer", _ml_Prof_setptimer, "bool -> unit") +CFUNC("getQuantum", _ml_Prof_getpquantum, "unit -> int") +CFUNC("setTimeArray", _ml_Prof_setpref, "word array option -> unit") diff --git a/base/runtime/c-libs/smlnj-prof/cfun-proto-list.h b/base/runtime/c-libs/smlnj-prof/cfun-proto-list.h new file mode 100644 index 0000000..0dec52c --- /dev/null +++ b/base/runtime/c-libs/smlnj-prof/cfun-proto-list.h @@ -0,0 +1,18 @@ +/* cfun-proto-list.h + * + * COPYRIGHT (c) 1996 AT&T Research + */ + +#ifndef _CFUN_PROTO_LIST_ +#define _CFUN_PROTO_LIST_ + +#ifndef _C_LIBRARY_ +# include "c-library.h" +#endif + +/* the external definitions for the C functions */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_PROTO(NAME, FUNC, MLTYPE) +#include "cfun-list.h" +#undef CFUNC + +#endif /* !_CFUN_PROTO_LIST_ */ diff --git a/base/runtime/c-libs/smlnj-prof/getpquantum.c b/base/runtime/c-libs/smlnj-prof/getpquantum.c new file mode 100644 index 0000000..3a35ca5 --- /dev/null +++ b/base/runtime/c-libs/smlnj-prof/getpquantum.c @@ -0,0 +1,19 @@ +/* getpquantum.c + * + * COPYRIGHT (c) 1996 AT&T Research. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "cfun-proto-list.h" +#include "profile.h" + +/* _ml_Prof_getpquantum : unit -> int + * + * Return the profile timer quantim in microseconds. + */ +ml_val_t _ml_Prof_getpquantum (ml_state_t *msp, ml_val_t arg) +{ + return INT_CtoML(PROFILE_QUANTUM_US); + +} /* end of _ml_Prof_getpquantum */ diff --git a/base/runtime/c-libs/smlnj-prof/makefile b/base/runtime/c-libs/smlnj-prof/makefile new file mode 100644 index 0000000..7c7c814 --- /dev/null +++ b/base/runtime/c-libs/smlnj-prof/makefile @@ -0,0 +1,43 @@ +# +# the makefile for the profile support library +# + +SHELL = /bin/sh + +INC_DIR = ../../include +CLIB_DIR = ../ + +INCLUDES = -I$(INC_DIR) -I$(CLIB_DIR) -I../../objs +GC_INCLUDES = $(INCLUDES) -I../../gc + +MAKE = make +AR = ar +ARFLAGS = rcv +RANLIB = ranlib + +LIBRARY = libsmlnj-prof.a + +VERSION = v-dummy + +OBJS = smlnj-prof-lib.o \ + getpquantum.o \ + setpref.o \ + setptimer.o + +$(LIBRARY) : $(VERSION) $(OBJS) + rm -rf $(LIBRARY) + $(AR) $(ARFLAGS) $(LIBRARY) $(OBJS) + $(RANLIB) $(LIBRARY) + +$(VERSION) : + ($(MAKE) MAKE="$(MAKE)" clean) + echo "$(VERSION)" > $(VERSION) + +.c.o: $(INC_DIR)/ml-unixdep.h $(INC_DIR)/ml-base.h $(INC_DIR)/ml-values.h \ + $(INC_DIR)/ml-objects.h $(INC_DIR)/profile.h \ + $(CLIB_DIR)/ml-c.h cfun-proto-list.h cfun-list.h + $(CC) $(CFLAGS) $(DEFS) $(GC_INCLUDES) -c $< + +clean : + rm -f v-* *.o $(LIBRARY) + diff --git a/base/runtime/c-libs/smlnj-prof/makefile.win32 b/base/runtime/c-libs/smlnj-prof/makefile.win32 new file mode 100644 index 0000000..186e4e3 --- /dev/null +++ b/base/runtime/c-libs/smlnj-prof/makefile.win32 @@ -0,0 +1,67 @@ +# +# the makefile for the profile support library +# win32 specific + +SHELL = + +INC_DIR = ..\..\include +CLIB_DIR = ..\. + +INCLUDES = /I$(INC_DIR) /I$(CLIB_DIR) /I..\..\objs +GC_INCLUDES = $(INCLUDES) /I..\..\gc + +MAKEFILE = makefile.win32 +MAKE = nmake /F$(MAKEFILE) +AR = lib +ARFLAGS = +RANLIB = lib + +LIBRARY = libsmlnj-prof.lib + +VERSION = v-dummy + +OBJS = smlnj-prof-lib.obj \ + getpquantum.obj \ + setpref.obj \ + setptimer.obj + +$(LIBRARY) : $(VERSION) $(OBJS) + del /F /Q $(LIBRARY) + $(AR) $(ARFLAGS) /out:$(LIBRARY) $(OBJS) + $(RANLIB) /out:$(LIBRARY) + +$(VERSION) : + ($(MAKE) MAKE="$(MAKE)" clean) + echo "$(VERSION)" > $(VERSION) + +smlnj-prof-lib.obj: smlnj-prof-lib.c \ + $(INC_DIR)\ml-base.h \ + $(INC_DIR)\c-library.h \ + cfun-proto-list.h cfun-list.h + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c smlnj-prof-lib.c + +getpquantum.obj: getpquantum.c \ + cfun-proto-list.h \ + $(INC_DIR)\ml-base.h $(INC_DIR)\ml-values.h \ + $(INC_DIR)\profile.h + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c getpquantum.c + +setpref.obj: setpref.c \ + cfun-proto-list.h \ + $(INC_DIR)\ml-base.h $(INC_DIR)\ml-values.h \ + $(INC_DIR)\ml-objects.h $(INC_DIR)\ml-globals.h \ + $(CLIB_DIR)\ml-c.h \ + $(INC_DIR)\profile.h + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c setpref.c + +setptimer.obj: setptimer.c \ + cfun-proto-list.h \ + $(INC_DIR)\ml-base.h $(INC_DIR)\ml-values.h \ + $(INC_DIR)\ml-objects.h \ + $(CLIB_DIR)\ml-c.h \ + $(INC_DIR)\profile.h + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c setptimer.c + +clean : + del /F /Q v-* *.obj *.pdb $(LIBRARY) + diff --git a/base/runtime/c-libs/smlnj-prof/setpref.c b/base/runtime/c-libs/smlnj-prof/setpref.c new file mode 100644 index 0000000..f3b9b5f --- /dev/null +++ b/base/runtime/c-libs/smlnj-prof/setpref.c @@ -0,0 +1,59 @@ +/* setpref.c + * + * COPYRIGHT (c) 1996 AT&T Research. + */ + +#include "ml-base.h" +#include "ml-c.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-globals.h" +#include "cfun-proto-list.h" +#include "profile.h" + +extern void EnableProfSignals (void); +extern void DisableProfSignals (void); + +/* _ml_Prof_setpref : word array option -> unit + * + * Set the profile array reference; NONE means that there is no array. + */ +ml_val_t _ml_Prof_setpref (ml_state_t *msp, ml_val_t arg) +{ +#if defined(OPSYS_UNIX) || defined(OPSYS_WIN32) + bool_t enabled = (ProfCntArray != ML_unit); + int i; + + if (arg != OPTION_NONE) { + ProfCntArray = OPTION_get(arg); + if (! enabled) { + /* add ProfCntArray to the C roots */ + CRoots[NumCRoots++] = &ProfCntArray; +#ifdef OPSYS_UNIX + /* enable profiling signals */ + EnableProfSignals (); +#endif + } + } + else if (enabled) { + /* remove ProfCntArray from the C roots */ + for (i = 0; i < NumCRoots; i++) { + if (CRoots[i] == &ProfCntArray) { + CRoots[i] = CRoots[--NumCRoots]; + break; + } + } +#ifdef OPSYS_UNIX + /* disable profiling signals */ + DisableProfSignals (); +#endif + ProfCntArray = ML_unit; + } + + return ML_unit; +#else + return RAISE_ERROR(msp, "time profiling not supported"); +#endif + +} /* end of _ml_Prof_setpref */ + diff --git a/base/runtime/c-libs/smlnj-prof/setptimer.c b/base/runtime/c-libs/smlnj-prof/setptimer.c new file mode 100644 index 0000000..58accd7 --- /dev/null +++ b/base/runtime/c-libs/smlnj-prof/setptimer.c @@ -0,0 +1,191 @@ +/* setptimer.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * + * NOTE: this implementation is UNIX specific right now; I would like to + * define an OS abstraction layer for interval timers, which would cover + * both alarm timers and profiling, but I need to look at what other systems + * do first. + */ + +#ifdef OPSYS_UNIX +# include "ml-unixdep.h" +# include +#elif OPSYS_WIN32 +# include +#endif +#include "ml-base.h" +#include "ml-c.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-globals.h" +#include "cfun-proto-list.h" +#include "profile.h" + +extern void Enable (void); +extern void Disable (void); + +/* _ml_Prof_setptimer : bool -> unit + * + * Turn the profile timer on/off. + */ +ml_val_t _ml_Prof_setptimer (ml_state_t *msp, ml_val_t arg) +{ +#ifdef HAS_SETITIMER + struct itimerval new_itv; + int sts; + + + if (arg == ML_false) { + new_itv.it_interval.tv_sec = + new_itv.it_value.tv_sec = + new_itv.it_interval.tv_usec = + new_itv.it_value.tv_usec = 0; + } + else if (ProfCntArray == ML_unit) { + return RAISE_ERROR(msp, "no count array set"); + } + else { + new_itv.it_interval.tv_sec = + new_itv.it_value.tv_sec = 0; + new_itv.it_interval.tv_usec = + new_itv.it_value.tv_usec = PROFILE_QUANTUM_US; + } + + sts = setitimer (ITIMER_VIRTUAL, &new_itv, NIL(struct itimerval *)); + + CHK_RETURN_UNIT(msp, sts); +#elif OPSYS_WIN32 + if (arg == ML_false) + { + Disable(); + } + else if (ProfCntArray == ML_unit) + { + return RAISE_ERROR(msp, "no count array set"); + } + else + { + Enable(); + } + + return ML_unit; +#else + return RAISE_ERROR(msp, "time profiling not supported"); +#endif + +} /* end of _ml_Prof_setptimer */ + +#ifdef OPSYS_WIN32 +/* The pointer to the heap allocated array of call counts. +* When this pointer is ML_unit, then profiling is disabled. +*/ +ml_val_t ProfCntArray = ML_unit; +HANDLE g_hTimer = NULL; +HANDLE g_hPumpThread = NULL; +HANDLE g_hQueryThread = NULL; +#define TIMEOUT_VALUE 1000 + +/* local routines */ +VOID CALLBACK TimerAPCProc( + LPVOID lpArg, // Data value + DWORD dwTimerLowValue, // Timer low value + DWORD dwTimerHighValue ); // Timer high value + +/* This thread exists to provide a spot for the APC messages to be run */ +DWORD WINAPI PumpThread( LPVOID lpParam ) +{ + LARGE_INTEGER liDueTime; + + if (g_hTimer == NULL) + { + g_hTimer = CreateWaitableTimer(NULL, FALSE, TEXT("SMLNJ_PROF")); + + // Set it to go off in one quantum for the first time. + liDueTime.QuadPart = -1 * PROFILE_QUANTUM_US; + + SetWaitableTimer( + g_hTimer, // Handle to the timer object + &liDueTime, // When timer will become signaled + (PROFILE_QUANTUM_US / 1000 ), // Profile quantum in uS; convert to mS + TimerAPCProc, // Completion routine + lpParam, // Argument to the completion routine + FALSE ); // Do not restore a suspended system + } + + while(g_hTimer != NULL) + { + SleepEx(TIMEOUT_VALUE, TRUE); + } + return 0; +} + +FILETIME current, unused1, unused2, unused3; +LARGE_INTEGER oldTime, newTime; + +void Enable () +{ + if (g_hPumpThread == NULL) + { + HANDLE hThread = GetCurrentThread(); + + oldTime.LowPart = 0; + oldTime.HighPart = 0; + + DuplicateHandle(GetCurrentProcess(), + hThread, + GetCurrentProcess(), + &g_hQueryThread, + THREAD_QUERY_INFORMATION, + FALSE, + DUPLICATE_CLOSE_SOURCE); + + g_hPumpThread = CreateThread(NULL, 0, PumpThread, g_hQueryThread, 0, NULL); + } +} + +void Disable () +{ + if (g_hTimer != NULL) + { + CancelWaitableTimer(g_hTimer); + CloseHandle(g_hTimer); + g_hTimer = NULL; + if (WaitForSingleObject(g_hPumpThread, TIMEOUT_VALUE*2) != WAIT_TIMEOUT) + { + CloseHandle(g_hPumpThread); + g_hPumpThread = NULL; + CloseHandle(g_hQueryThread); + g_hQueryThread = NULL; + } + } +} + +/* + * Since there's no good way on a base Windows installation to create a thread-runtime + * specific timer, what we do instead is create a timer that checks to see if the elapsed + * quantum amount has expired on the VM thread's execution time. + */ +VOID CALLBACK TimerAPCProc( + LPVOID lpArg, // Data value + DWORD dwTimerLowValue, // Timer low value + DWORD dwTimerHighValue ) // Timer high value +{ + HANDLE hThread = (HANDLE)lpArg; + GetThreadTimes(hThread, &unused1, &unused2, &unused3, ¤t); + + newTime.LowPart = current.dwLowDateTime; + newTime.HighPart = current.dwHighDateTime; + // Have to divide by ten because the thread times are in 100-ns units, not 10us. + if ((newTime.QuadPart - oldTime.QuadPart) > (PROFILE_QUANTUM_US/10)) + { + Word_t *arr = GET_SEQ_DATAPTR(Word_t, ProfCntArray); + int indx = INT_MLtoC(DEREF(ProfCurrent)); + + arr[indx]++; + + oldTime = newTime; + } +} + +#endif \ No newline at end of file diff --git a/base/runtime/c-libs/smlnj-prof/smlnj-prof-lib.c b/base/runtime/c-libs/smlnj-prof/smlnj-prof-lib.c new file mode 100644 index 0000000..d2ad44f --- /dev/null +++ b/base/runtime/c-libs/smlnj-prof/smlnj-prof-lib.c @@ -0,0 +1,28 @@ +/* smlnj-runt-lib.c + * + * COPYRIGHT (c) 1994 AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "c-library.h" +#include "cfun-proto-list.h" + + +/* the table of C functions and ML names */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_BIND(NAME, FUNC, MLTYPE) +PVT cfunc_binding_t CFunTable[] = { +#include "cfun-list.h" + CFUNC_NULL_BIND + }; +#undef CFUNC + + +/* the Runtime library */ +c_library_t SMLNJ_Prof_Library = { + CLIB_NAME, + CLIB_VERSION, + CLIB_DATE, + NIL(clib_init_fn_t), + CFunTable + }; + diff --git a/base/runtime/c-libs/smlnj-runtime/alloc-code.c b/base/runtime/c-libs/smlnj-runtime/alloc-code.c new file mode 100644 index 0000000..8dba401 --- /dev/null +++ b/base/runtime/c-libs/smlnj-runtime/alloc-code.c @@ -0,0 +1,31 @@ +/* alloc-code.c + * + * COPYRIGHT (c) 1994 by AT&T Bell Laboratories. + */ + +#include "cache-flush.h" +#include "ml-base.h" +#include "ml-values.h" +#include "ml-state.h" +#include "ml-objects.h" +#include "cfun-proto-list.h" + + +/* _ml_RunT_alloc_code : int -> Word8Array.array + * + * Allocate a code object of the given size. + * + * Note: Generating the name string within the code object is now + * part of the code generator's responsibility. + */ +ml_val_t _ml_RunT_alloc_code (ml_state_t *msp, ml_val_t arg) +{ + int nbytes = INT_MLtoC(arg); + ml_val_t code, res; + + code = ML_AllocCode (msp, nbytes); + + SEQHDR_ALLOC(msp, res, DESC_word8arr, code, nbytes); + + return res; +} /* end of _ml_RunT_alloc_code */ diff --git a/base/runtime/c-libs/smlnj-runtime/argv.c b/base/runtime/c-libs/smlnj-runtime/argv.c new file mode 100644 index 0000000..9468f86 --- /dev/null +++ b/base/runtime/c-libs/smlnj-runtime/argv.c @@ -0,0 +1,18 @@ +/* argv.c + * + * COPYRIGHT (c) 1992 by AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "cfun-proto-list.h" + +/* _ml_Proc_argv: + */ +ml_val_t _ml_Proc_argv (ml_state_t *msp, ml_val_t arg) +{ + return ML_CStringList (msp, CmdLineArgs); + +} /* end of _ml_Proc_argv */ + diff --git a/base/runtime/c-libs/smlnj-runtime/blast-in.c b/base/runtime/c-libs/smlnj-runtime/blast-in.c new file mode 100644 index 0000000..e8b4ebe --- /dev/null +++ b/base/runtime/c-libs/smlnj-runtime/blast-in.c @@ -0,0 +1,30 @@ +/* blast_in.c + * + * COPYRIGHT (c) 1994 by AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "heap-io.h" +#include "cfun-proto-list.h" + +/* _ml_RunT_blast_in : string -> 'a + * + * Build an ML object from a string. + */ +ml_val_t _ml_RunT_blast_in (ml_state_t *msp, ml_val_t arg) +{ + bool_t errFlg = FALSE; + ml_val_t obj; + + obj = BlastIn (msp, GET_SEQ_DATAPTR(Byte_t, arg), GET_SEQ_LEN(arg), &errFlg); + + if (errFlg) + return RAISE_ERROR(msp, "blast_in"); + else + return obj; + +} /* end of _ml_RunT_blast_in */ + diff --git a/base/runtime/c-libs/smlnj-runtime/blast-out.c b/base/runtime/c-libs/smlnj-runtime/blast-out.c new file mode 100644 index 0000000..d495855 --- /dev/null +++ b/base/runtime/c-libs/smlnj-runtime/blast-out.c @@ -0,0 +1,28 @@ +/* blast_out.c + * + * COPYRIGHT (c) 1994 by AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-c.h" +#include "heap-io.h" +#include "cfun-proto-list.h" + +/* _ml_RunT_blast_out : 'a -> Word8Vector.vector + * + * Translate a heap object into a linear representation (vector of bytes). + */ +ml_val_t _ml_RunT_blast_out (ml_state_t *msp, ml_val_t arg) +{ + ml_val_t data; + + data = BlastOut (msp, arg); + + if (data == ML_unit) + return RAISE_ERROR(msp, "attempt to blast object failed"); + else + return data; + +} /* end of _ml_RunT_blast_out */ + diff --git a/base/runtime/c-libs/smlnj-runtime/cfun-list.h b/base/runtime/c-libs/smlnj-runtime/cfun-list.h new file mode 100644 index 0000000..2f917d1 --- /dev/null +++ b/base/runtime/c-libs/smlnj-runtime/cfun-list.h @@ -0,0 +1,33 @@ +/* cfun-list.h + * + * COPYRIGHT (c) 1994 AT&T Bell Laboratories. + * + * This file lists the directory library of C functions that are callable by ML. + */ + +#ifndef CLIB_NAME +#define CLIB_NAME "SMLNJ-RunT" +#define CLIB_VERSION "1.0" +#define CLIB_DATE "December 15, 1994" +#endif + +CFUNC("argv", _ml_Proc_argv, "unit -> string list") +CFUNC("rawArgv", _ml_Proc_raw_argv, "unit -> string list") +CFUNC("cmdName", _ml_Proc_cmd_name, "unit -> string") +CFUNC("shiftArgv", _ml_Proc_shift_argv, "unit -> unit") +CFUNC("blastIn", _ml_RunT_blast_in, "Word8Vector.vector -> 'a") +CFUNC("blastOut", _ml_RunT_blast_out, "'a -> Word8Vector.vector") +CFUNC("debug", _ml_RunT_debug, "string -> unit") +CFUNC("dummy", _ml_RunT_dummy, "string -> unit") +CFUNC("exportHeap", _ml_RunT_export_heap, "string -> bool") +CFUNC("exportFn", _ml_RunT_export_fun, "(string * (string list -> unit)) -> unit") +CFUNC("gcControl", _ml_RunT_gc_ctl, "(string * int ref) list -> unit") +CFUNC("itick", _ml_RunT_itick, "unit -> word64") +CFUNC("allocCode", _ml_RunT_alloc_code, "") +CFUNC("mkExec", _ml_RunT_mkexec, "Word8Vector * int -> object -> object") +CFUNC("mkLiterals", _ml_RunT_mkliterals, "Word8Vector.vector -> ovec") +CFUNC("sysInfo", _ml_RunT_sysinfo, "string -> string option") +CFUNC("record1", _ml_RunT_record1, "object -> object") +CFUNC("recordConcat", _ml_RunT_recordconcat, "(object * object) -> object") +CFUNC("setIntTimer", _ml_RunT_setitimer, "word64 option -> unit") + diff --git a/base/runtime/c-libs/smlnj-runtime/cfun-proto-list.h b/base/runtime/c-libs/smlnj-runtime/cfun-proto-list.h new file mode 100644 index 0000000..16b3e96 --- /dev/null +++ b/base/runtime/c-libs/smlnj-runtime/cfun-proto-list.h @@ -0,0 +1,18 @@ +/* cfun-proto-list.h + * + * COPYRIGHT (c) 1194 AT&T Bell Laboratories. + */ + +#ifndef _CFUN_PROTO_LIST_ +#define _CFUN_PROTO_LIST_ + +#ifndef _C_LIBRARY_ +# include "c-library.h" +#endif + +/* the external definitions for the C functions */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_PROTO(NAME, FUNC, MLTYPE) +#include "cfun-list.h" +#undef CFUNC + +#endif /* !_CFUN_PROTO_LIST_ */ diff --git a/base/runtime/c-libs/smlnj-runtime/cmd-name.c b/base/runtime/c-libs/smlnj-runtime/cmd-name.c new file mode 100644 index 0000000..dbcd5c5 --- /dev/null +++ b/base/runtime/c-libs/smlnj-runtime/cmd-name.c @@ -0,0 +1,18 @@ +/* cmd-name.c + * + * COPYRIGHT (c) 1997 Bell Labs, Lucent Technologies. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "cfun-proto-list.h" + +/* _ml_Proc_cmd_name : unit -> string + */ +ml_val_t _ml_Proc_cmd_name (ml_state_t *msp, ml_val_t arg) +{ + return ML_CString (msp, MLCmdName); + +} /* end of _ml_Proc_cmd_name */ + diff --git a/base/runtime/c-libs/smlnj-runtime/debug.c b/base/runtime/c-libs/smlnj-runtime/debug.c new file mode 100644 index 0000000..6b7a42c --- /dev/null +++ b/base/runtime/c-libs/smlnj-runtime/debug.c @@ -0,0 +1,23 @@ +/* debug.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + * + * Print a string out to the debug stream. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "cfun-proto-list.h" + + +/* _ml_RunT_debug : string -> unit + * + */ +ml_val_t _ml_RunT_debug (ml_state_t *msp, ml_val_t arg) +{ + SayDebug (STR_MLtoC(arg)); + + return ML_unit; + +} /* end of _ml_RunT_debug */ + diff --git a/base/runtime/c-libs/smlnj-runtime/dummy.c b/base/runtime/c-libs/smlnj-runtime/dummy.c new file mode 100644 index 0000000..20f616a --- /dev/null +++ b/base/runtime/c-libs/smlnj-runtime/dummy.c @@ -0,0 +1,26 @@ +/* dummy.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + * + * This is a dummy run-time routine for when we would like to call + * a null C function. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "cfun-proto-list.h" + +/* _ml_RunT_dummy : string -> unit + * + * The string argument can be used as a unique marker. + */ +ml_val_t _ml_RunT_dummy (ml_state_t *msp, ml_val_t arg) +{ + /* + char *s = STR_MLtoC(arg); + */ + + return ML_unit; + +} /* end of _ml_RunT_dummy */ + diff --git a/base/runtime/c-libs/smlnj-runtime/export-fun.c b/base/runtime/c-libs/smlnj-runtime/export-fun.c new file mode 100644 index 0000000..7dac946 --- /dev/null +++ b/base/runtime/c-libs/smlnj-runtime/export-fun.c @@ -0,0 +1,49 @@ +/*! \file export-fun.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-osdep.h" +#include +#include +#include "ml-base.h" +#include "ml-values.h" +#include "ml-state.h" +#include "heap-io.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + + +/* _ml_RunT_export_fun : (string * ((string * string list) -> int)) -> unit + * + * Export the given ML function. + */ +ml_val_t _ml_RunT_export_fun (ml_state_t *msp, ml_val_t arg) +{ + char fname[1024]; + ml_val_t mlName = REC_SEL(arg, 0); + ml_val_t funct = REC_SEL(arg, 1); + FILE *file; + int sts; + + QualifyImageName (strcpy(fname, STR_MLtoC(mlName))); + + if ((file = fopen(fname, "wb")) == NULL) + return RAISE_ERROR(msp, "unable to open file for writing"); + + sts = ExportFnImage (msp, funct, file); + fclose (file); + + if (sts == SUCCESS) { + Exit (0); + } + else { + Die ("export failed"); + } +/* NOTE: while it would be nice to raise a SysErr exception here, the ML state + * has been trashed as a side-effect of the export operation. + return RAISE_ERROR(msp, "export failed"); + */ + +} /* end of _ml_RunT_export_fun */ diff --git a/base/runtime/c-libs/smlnj-runtime/export-heap.c b/base/runtime/c-libs/smlnj-runtime/export-heap.c new file mode 100644 index 0000000..a854fc7 --- /dev/null +++ b/base/runtime/c-libs/smlnj-runtime/export-heap.c @@ -0,0 +1,43 @@ +/* export-heap.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-osdep.h" +#include +#include +#include "ml-base.h" +#include "ml-values.h" +#include "ml-state.h" +#include "heap-io.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + + +/* _ml_RunT_export_heap : string -> bool + * + * Export the world to the given file and return false (the exported version + * returns true). + */ +ml_val_t _ml_RunT_export_heap (ml_state_t *msp, ml_val_t arg) +{ + char fname[1024]; + FILE *file; + int sts; + + QualifyImageName (strcpy(fname, STR_MLtoC(arg))); + + if ((file = fopen(fname, "wb")) == NULL) + return RAISE_ERROR(msp, "unable to open file for writing"); + + msp->ml_arg = ML_true; + sts = ExportHeapImage (msp, file); + fclose (file); + + if (sts == SUCCESS) + return ML_false; + else + return RAISE_ERROR(msp, "export failed"); + +} /* end of _ml_RunT_export_heap */ + diff --git a/base/runtime/c-libs/smlnj-runtime/gc-ctl.c b/base/runtime/c-libs/smlnj-runtime/gc-ctl.c new file mode 100644 index 0000000..2467873 --- /dev/null +++ b/base/runtime/c-libs/smlnj-runtime/gc-ctl.c @@ -0,0 +1,133 @@ +/* gc-ctl.c + * + * COPYRIGHT (c) 2022 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * General interface for GC control functions. + */ + +#include "ml-base.h" +#include +#include "ml-values.h" +#include "ml-state.h" +#include "vproc-state.h" +#include "memory.h" +#include "heap.h" +#include "ml-objects.h" +#include "cfun-proto-list.h" + +#define STREQ(s1, s2) (strcmp((s1), STR_MLtoC(s2)) == 0) + +PVT void SetVMCache (ml_state_t *msp, ml_val_t cell); +PVT void DoGC (ml_state_t *msp, ml_val_t cell, ml_val_t *next); +PVT void AllGC (ml_state_t *msp, ml_val_t *next); + + +/* _ml_RunT_gc_ctl : (string * int ref) list -> unit + * + * Current control operations: + * + * ("SetVMCache", ref n) - sets VM cache level to n; returns old cache + * level. + * ("DoGC", ref n) - does a GC of the first "n" generations + * ("AllGC", _) - collects all generations. + * ("Messages", ref 0) - turn GC messages off + * ("Messages", ref n) - turn GC messages on (n > 0) + * ("SigThreshold", ref n) - set GC signal threshold to n (>= 0) + */ +ml_val_t _ml_RunT_gc_ctl (ml_state_t *msp, ml_val_t arg) +{ + while (arg != LIST_nil) { + ml_val_t cmd = LIST_hd(arg); + ml_val_t oper = REC_SEL(cmd, 0); + ml_val_t cell = REC_SEL(cmd, 1); + + arg = LIST_tl(arg); + + if (STREQ("SetVMCache", oper)) + SetVMCache (msp, cell); + else if (STREQ("DoGC", oper)) + DoGC (msp, cell, &arg); + else if (STREQ("AllGC", oper)) + AllGC (msp, &arg); + else if (STREQ("Messages", oper)) { + if (INT_MLtoC(DEREF(cell)) > 0) + GCMessages = TRUE; + else + GCMessages = FALSE; + } + else if (STREQ("LimitHeap", oper)) { + /* NOTE: this control is not needed once we have dynamically sized areans! */ + if (INT_MLtoC(DEREF(cell)) > 0) + UnlimitedHeap = FALSE; + else + UnlimitedHeap = TRUE; + } + else if (STREQ("SigThreshold", oper)) { + int threshold = INT_MLtoC(DEREF(cell)); + if (threshold < 0) threshold = 0; + msp->ml_vproc->vp_gcSigThreshold = threshold; + } + } + + return ML_unit; + +} /* end of _ml_RunT_gc_ctl */ + + +/* SetVMCache: + * + * Set the VM cache generation, return the old level. + */ +PVT void SetVMCache (ml_state_t *msp, ml_val_t arg) +{ + int level = INT_MLtoC(DEREF(arg)); + heap_t *heap = msp->ml_heap; + + if (level < 0) + level = 0; + else if (level > MAX_NUM_GENS) + level = MAX_NUM_GENS; + + if (level < heap->cacheGen) { + /* Free any cached memory objects. */ + int i; + for (i = level; i < heap->cacheGen; i++) + MEM_FreeMemObj (heap->gen[i]->cacheObj); + } + + ASSIGN(arg, INT_CtoML(heap->cacheGen)); + heap->cacheGen = level; + +} /* end of SetVMCache */ + + +/* DoGC: + * + * Force a garbage collection of the given level. + */ +PVT void DoGC (ml_state_t *msp, ml_val_t arg, ml_val_t *next) +{ + heap_t *heap = msp->ml_heap; + int level = INT_MLtoC(DEREF(arg)); + + if (level < 0) + level = 0; + else if (heap->numGens < level) + level = heap->numGens; + + InvokeGCWithRoots (msp, level, next, NIL(ml_val_t *)); + +} /* end of DoGC */ + + +/* AllGC: + * + * Force a garbage collection of all generations. + */ +PVT void AllGC (ml_state_t *msp, ml_val_t *next) +{ + InvokeGCWithRoots (msp, msp->ml_heap->numGens, next, NIL(ml_val_t *)); + +} /* end of AllGC */ + diff --git a/base/runtime/c-libs/smlnj-runtime/itick.c b/base/runtime/c-libs/smlnj-runtime/itick.c new file mode 100644 index 0000000..433f2f6 --- /dev/null +++ b/base/runtime/c-libs/smlnj-runtime/itick.c @@ -0,0 +1,50 @@ +/*! \file itick.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-base.h" +#ifdef OPSYS_UNIX +# include "ml-unixdep.h" +# include +# include +#elif defined(OPSYS_WIN32) +# include "win32-timers.h" +#endif +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_RunT_itick : unit -> Word64.word + * + * Return the minimum interval supported by the interval timer. + */ +ml_val_t _ml_RunT_itick (ml_state_t *msp, ml_val_t arg) +{ +#if defined(HAS_CLOCK_GETRES) + struct timespec ts; + + if (clock_getres(CLOCK_REALTIME, &ts) == 0) { + Unsigned64_t t = NS_PER_SEC * (Unsigned64_t)ts.tv_sec + (Unsigned64_t)ts.tv_nsec; + return ML_AllocWord64(msp, t); + } + else { + return RAISE_SYSERR(msp, 0); + } + +#elif defined(HAS_SETITIMER) + /* we guess at 10ms, since that is what is documented. It might be smaller than + * that, but there doesn't seem to be a way to tell. + */ + return ML_AllocWord64(msp, 10000000); + +#elif defined(OPSYS_WIN32) + /* 1 ms == 1000000 ns */ + return ML_AllocWord64(msp, 1000000); + +#else + return RAISE_ERROR(msp, "itick unimplemented"); +#endif + +} /* end of _ml_RunT_itick */ diff --git a/base/runtime/c-libs/smlnj-runtime/makefile b/base/runtime/c-libs/smlnj-runtime/makefile new file mode 100644 index 0000000..194677a --- /dev/null +++ b/base/runtime/c-libs/smlnj-runtime/makefile @@ -0,0 +1,59 @@ +# +# the makefile for the runtime library +# + +SHELL = /bin/sh + +INC_DIR = ../../include +CLIB_DIR = ../ + +INCLUDES = -I$(INC_DIR) -I$(CLIB_DIR) -I../../objs +GC_INCLUDES = $(INCLUDES) -I../../gc + +MAKE = make +AR = ar +ARFLAGS = rcv +RANLIB = ranlib + +LIBRARY = libsmlnj-runt.a + +VERSION = v-dummy + +OBJS = smlnj-runt-lib.o \ + alloc-code.o \ + argv.o \ + raw-argv.o \ + shift-argv.o \ + cmd-name.o \ + blast-in.o \ + blast-out.o \ + debug.o \ + dummy.o \ + export-heap.o \ + export-fun.o \ + gc-ctl.o \ + itick.o \ + mkexec.o \ + mkliterals.o \ + record1.o \ + record-concat.o \ + sysinfo.o \ + setitimer.o + +$(LIBRARY) : $(VERSION) $(OBJS) + rm -rf $(LIBRARY) + $(AR) $(ARFLAGS) $(LIBRARY) $(OBJS) + $(RANLIB) $(LIBRARY) + +$(VERSION) : + ($(MAKE) MAKE="$(MAKE)" clean) + echo "$(VERSION)" > $(VERSION) + +.c.o: $(INC_DIR)/ml-unixdep.h $(INC_DIR)/ml-base.h $(INC_DIR)/ml-values.h \ + $(INC_DIR)/ml-objects.h \ + $(CLIB_DIR)/ml-c.h cfun-proto-list.h cfun-list.h + $(CC) $(CFLAGS) $(DEFS) $(GC_INCLUDES) -c $< + +clean : + rm -f v-* *.o $(LIBRARY) + diff --git a/base/runtime/c-libs/smlnj-runtime/makefile.win32 b/base/runtime/c-libs/smlnj-runtime/makefile.win32 new file mode 100644 index 0000000..931422c --- /dev/null +++ b/base/runtime/c-libs/smlnj-runtime/makefile.win32 @@ -0,0 +1,122 @@ +# +# the makefile for the runtime library +# win32 specific +# + +SHELL = + +INC_DIR = ..\..\include +MACH_DIR = ..\..\mach-dep +CLIB_DIR = ..\ + +INCLUDES = /I$(INC_DIR) /I$(CLIB_DIR) /I..\..\objs /I$(MACH_DIR) +GC_INCLUDES = $(INCLUDES) /I..\..\gc + +MAKEFILE = makefile.win32 +MAKE = nmake /F$(MAKEFILE) +AR = lib +ARFLAGS = +RANLIB = lib + +LIBRARY = libsmlnj-runt.lib + +VERSION = v-dummy + +OBJS = smlnj-runt-lib.obj \ + alloc-code.obj \ + argv.obj \ + raw-argv.obj \ + cmd-name.obj \ + shift-argv.obj \ + blast-in.obj \ + blast-out.obj \ + debug.obj \ + dummy.obj \ + export-heap.obj \ + export-fun.obj \ + gc-ctl.obj \ + itick.obj \ + mkexec.obj \ + mkliterals.obj \ + record1.obj \ + record-concat.obj \ + sysinfo.obj \ + setitimer.obj + +$(LIBRARY) : $(VERSION) $(OBJS) + del /Q /F $(LIBRARY) + $(AR) $(ARFLAGS) /out:$(LIBRARY) $(OBJS) + $(RANLIB) $(LIBRARY) + +$(VERSION) : + $(MAKE) MAKE="$(MAKE)" clean + echo "$(VERSION)" > $(VERSION) + +DEPENDENTS=$(INC_DIR)\ml-unixdep.h $(INC_DIR)\ml-base.h \ + $(INC_DIR)\ml-values.h \ + $(INC_DIR)\ml-objects.h \ + $(CLIB_DIR)\ml-c.h cfun-proto-list.h cfun-list.h + +smlnj-runt-lib.obj: smlnj-runt-lib.c $(DEPNEDENTS) + $(CC) $(CFLAGS) $(DEFS) $(GC_INCLUDES) /c smlnj-runt-lib.c + +alloc-code.obj: alloc-code.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(GC_INCLUDES) /c alloc-code.c + +argv.obj: argv.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(GC_INCLUDES) /c argv.c + +raw-argv.obj: raw-argv.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(GC_INCLUDES) /c raw-argv.c + +cmd-name.obj: cmd-name.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(GC_INCLUDES) /c cmd-name.c + +shift-argv.obj: shift-argv.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(GC_INCLUDES) /c shift-argv.c + +blast-in.obj: blast-in.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(GC_INCLUDES) /c blast-in.c + +blast-out.obj: blast-out.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(GC_INCLUDES) /c blast-out.c + +debug.obj: debug.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(GC_INCLUDES) /c debug.c + +dummy.obj: dummy.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(GC_INCLUDES) /c dummy.c + +export-heap.obj: export-heap.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(GC_INCLUDES) /c export-heap.c + +export-fun.obj: export-fun.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(GC_INCLUDES) /c export-fun.c + +gc-ctl.obj: gc-ctl.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(GC_INCLUDES) /c gc-ctl.c + +itick.obj: itick.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(GC_INCLUDES) /c itick.c + +mkexec.obj: mkexec.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(GC_INCLUDES) /c mkexec.c + +mkliterals.obj: mkliterals.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(GC_INCLUDES) /c mkliterals.c + +record1.obj: record1.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(GC_INCLUDES) /c record1.c + +record-concat.obj: record-concat.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(GC_INCLUDES) /c record-concat.c + +sysinfo.obj: sysinfo.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(GC_INCLUDES) /c sysinfo.c + +setitimer.obj: setitimer.c $(INC_DIR)\win32-timers.h $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(GC_INCLUDES) /c setitimer.c + +clean : + del /Q /F v-* *.obj *.pdb $(LIBRARY) + diff --git a/base/runtime/c-libs/smlnj-runtime/mkexec.c b/base/runtime/c-libs/smlnj-runtime/mkexec.c new file mode 100644 index 0000000..7d67056 --- /dev/null +++ b/base/runtime/c-libs/smlnj-runtime/mkexec.c @@ -0,0 +1,33 @@ +/* mkexec.c + * + * COPYRIGHT (c) 1994 by AT&T Bell Laboratories. + */ + +#include "cache-flush.h" +#include "ml-base.h" +#include "ml-objects.h" +#include "ml-state.h" +#include "cfun-proto-list.h" + + +/* _ml_RunT_mkexec : Word8Array.array * int -> (object -> object) + * + * Turn a previously allocated code object into a closure. This means + * flushing the I-cache. + */ +ml_val_t _ml_RunT_mkexec (ml_state_t *msp, ml_val_t arg) +{ + ml_val_t seq = REC_SEL(arg, 0); + int entrypoint = REC_SELINT(arg, 1); + char *code = GET_SEQ_DATAPTR(char, seq); + Word_t nbytes = GET_SEQ_LEN(seq); + ml_val_t res; + + FlushICache (code, nbytes); + + REC_ALLOC1(msp, res, PTR_CtoML(code + entrypoint)); + + return res; + +} /* end of _ml_RunT_mkexec */ + diff --git a/base/runtime/c-libs/smlnj-runtime/mkliterals.c b/base/runtime/c-libs/smlnj-runtime/mkliterals.c new file mode 100644 index 0000000..0f019e8 --- /dev/null +++ b/base/runtime/c-libs/smlnj-runtime/mkliterals.c @@ -0,0 +1,23 @@ +/*! \file mkliterals.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-state.h" +#include "ml-objects.h" +#include "cfun-proto-list.h" + + +/* _ml_RunT_mkliterals : Word8Vector.vector -> object vector + * + */ +ml_val_t _ml_RunT_mkliterals (ml_state_t *msp, ml_val_t arg) +{ + + return BuildLiterals (msp, GET_SEQ_DATAPTR(Byte_t, arg), GET_SEQ_LEN(arg)); + +} /* end of _ml_RunT_mkcode */ + diff --git a/base/runtime/c-libs/smlnj-runtime/raw-argv.c b/base/runtime/c-libs/smlnj-runtime/raw-argv.c new file mode 100644 index 0000000..dfeb421 --- /dev/null +++ b/base/runtime/c-libs/smlnj-runtime/raw-argv.c @@ -0,0 +1,18 @@ +/* raw-argv.c + * + * COPYRIGHT (c) 1992 by AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "cfun-proto-list.h" + +/* _ml_Proc_raw_argv: + */ +ml_val_t _ml_Proc_raw_argv (ml_state_t *msp, ml_val_t arg) +{ + return ML_CStringList (msp, RawArgs); + +} /* end of _ml_Proc_raw_argv */ + diff --git a/base/runtime/c-libs/smlnj-runtime/record-concat.c b/base/runtime/c-libs/smlnj-runtime/record-concat.c new file mode 100644 index 0000000..17d48e8 --- /dev/null +++ b/base/runtime/c-libs/smlnj-runtime/record-concat.c @@ -0,0 +1,41 @@ +/* record-concat.c + * + * COPYRIGHT (c) 1998 Bell Labs, Lucent Technologies. + * + * Concatenation for records. + */ + + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-state.h" +#include "ml-objects.h" +#include "gc.h" +#include "cfun-proto-list.h" +#include "ml-c.h" + + + +/* _ml_RunT_recordconcat : (object * object) -> object + * + */ +ml_val_t _ml_RunT_recordconcat (ml_state_t *msp, ml_val_t arg) +{ + ml_val_t r1 = REC_SEL(arg,0); + ml_val_t r2 = REC_SEL(arg,1); + + if (r1 == ML_unit) + return r2; + else if (r2 == ML_unit) + return r1; + else { + ml_val_t res = RecordConcat (msp, r1, r2); + + if (res == ML_unit) + return RAISE_ERROR(msp, "recordconcat: not a record"); + else + return res; + } + +} /* end of _ml_RunT_recordconcat */ + diff --git a/base/runtime/c-libs/smlnj-runtime/record1.c b/base/runtime/c-libs/smlnj-runtime/record1.c new file mode 100644 index 0000000..abf0d90 --- /dev/null +++ b/base/runtime/c-libs/smlnj-runtime/record1.c @@ -0,0 +1,28 @@ +/* record1.c + * + * COPYRIGHT (c) 1998 Bell Labs, Lucent Technologies. + * + * Create a singleton record. + */ + + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-state.h" +#include "ml-objects.h" +#include "cfun-proto-list.h" + + +/* _ml_RunT_record1 : object -> object + * + */ +ml_val_t _ml_RunT_record1 (ml_state_t *msp, ml_val_t arg) +{ + ml_val_t res; + + REC_ALLOC1(msp, res, arg); + + return res; + +} /* end of _ml_RunT_record1 */ + diff --git a/base/runtime/c-libs/smlnj-runtime/setitimer.c b/base/runtime/c-libs/smlnj-runtime/setitimer.c new file mode 100644 index 0000000..313d5a1 --- /dev/null +++ b/base/runtime/c-libs/smlnj-runtime/setitimer.c @@ -0,0 +1,87 @@ +/*! \file setitimer.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * NOTE: this implementation is UNIX specific right now; I would like to + * define an OS abstraction layer for interval timers, which would cover + * both alarm timers and profiling, but I need to look at what other systems + * do first. + */ + +#include "ml-base.h" +#ifdef OPSYS_UNIX +# include "ml-unixdep.h" +# include +#elif defined(OPSYS_WIN32) +# include "win32-timers.h" +#endif +#include "ml-c.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "cfun-proto-list.h" + +/* _ml_RunT_setitimer : Word64.word option -> unit + * + * Set the interval timer; NONE means disable the timer, otherwise + * the time is specified in nanoseconds. + */ +ml_val_t _ml_RunT_setitimer (ml_state_t *msp, ml_val_t arg) +{ +#ifdef HAS_SETITIMER + struct itimerval new_itv; + int sts; + + if (arg == OPTION_NONE) { + /* turn the timer off */ + new_itv.it_interval.tv_sec = + new_itv.it_value.tv_sec = + new_itv.it_interval.tv_usec = + new_itv.it_value.tv_usec = 0; + } + else { + /* turn the timer on */ + Unsigned64_t t = WORD64_MLtoC(OPTION_get(arg)); + /* converto to microseconds */ + t /= 1000; + new_itv.it_interval.tv_sec = + new_itv.it_value.tv_sec = t / 1000000; + new_itv.it_interval.tv_usec = + new_itv.it_value.tv_usec = t % 1000000; + } + + sts = setitimer (ITIMER_REAL, &new_itv, NIL(struct itimerval *)); + + CHK_RETURN_UNIT(msp, sts); + +#elif defined(OPSYS_WIN32) + if (arg == OPTION_NONE) { + if (win32StopTimer()) { + return ML_unit; + } else { + return RAISE_ERROR(msp,"win32 setitimer: couldn't stop timer"); + } + } + else { + Unsigned64_t t = WORD64_MLtoC(OPTION_get(arg)); + /* convert to milliseconds */ + t /= 1000000; + + if (t <= 0) { + return RAISE_ERROR(msp, "win32 setitimer: invalid resolution"); + } + else { + if (win32StartTimer(t)) { + return ML_unit; + } else { + return RAISE_ERROR(msp,"win32 setitimer: couldn't start timer"); + } + } + } + +#else + return RAISE_ERROR(msp, "setitimer not supported"); +#endif + +} /* end of _ml_RunT_setitimer */ + diff --git a/base/runtime/c-libs/smlnj-runtime/shift-argv.c b/base/runtime/c-libs/smlnj-runtime/shift-argv.c new file mode 100644 index 0000000..3492301 --- /dev/null +++ b/base/runtime/c-libs/smlnj-runtime/shift-argv.c @@ -0,0 +1,20 @@ +/* shift-argv.c + * + * COPYRIGHT (c) 2007 by The Fellowship of SML/NJ + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "cfun-proto-list.h" + +/* _ml_Proc_shift_argv: + */ +ml_val_t _ml_Proc_shift_argv (ml_state_t *msp, ml_val_t arg) +{ + if (*CmdLineArgs != NIL(char *)) + ++CmdLineArgs; + + return ML_unit; + +} /* end of _ml_Proc_shift_argv */ diff --git a/base/runtime/c-libs/smlnj-runtime/smlnj-runt-lib.c b/base/runtime/c-libs/smlnj-runtime/smlnj-runt-lib.c new file mode 100644 index 0000000..3f43357 --- /dev/null +++ b/base/runtime/c-libs/smlnj-runtime/smlnj-runt-lib.c @@ -0,0 +1,28 @@ +/* smlnj-runt-lib.c + * + * COPYRIGHT (c) 1994 AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "c-library.h" +#include "cfun-proto-list.h" + + +/* the table of C functions and ML names */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_BIND(NAME, FUNC, MLTYPE) +PVT cfunc_binding_t CFunTable[] = { +#include "cfun-list.h" + CFUNC_NULL_BIND + }; +#undef CFUNC + + +/* the Runtime library */ +c_library_t SMLNJ_RunT_Library = { + CLIB_NAME, + CLIB_VERSION, + CLIB_DATE, + NIL(clib_init_fn_t), + CFunTable + }; + diff --git a/base/runtime/c-libs/smlnj-runtime/sysinfo.c b/base/runtime/c-libs/smlnj-runtime/sysinfo.c new file mode 100644 index 0000000..4dca864 --- /dev/null +++ b/base/runtime/c-libs/smlnj-runtime/sysinfo.c @@ -0,0 +1,84 @@ +/*! \file sysinfo.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * General interface to query system properties. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "cfun-proto-list.h" +#include "machine-id.h" + +#if defined(OPSYS_UNIX) +# include "ml-unixdep.h" /* for OS_NAME */ +#elif defined(OPSYS_WIN32) +# define OS_NAME "Win32" +#endif + +#define STREQ(s1, s2) (strcmp((s1), (s2)) == 0) + + +#define FALSE_VALUE "NO" +#define TRUE_VALUE "YES" + + +/* _ml_RunT_sysinfo : string -> string option + * + * Current queries: + * "OS_NAME" + * "OS_VERSION" (not supported) + * "ARCH" + * "ARCH_ARCH" (deprecated; use "ARCH") + * "TARGET_ARCH" (deprecated; use "ARCH") + * "HAS_SOFT_POLL" + * "HAS_MP" + * "HEAP_SUFFIX" + */ +ml_val_t _ml_RunT_sysinfo (ml_state_t *msp, ml_val_t arg) +{ + char *name = STR_MLtoC(arg); + ml_val_t res; + + if (STREQ("OS_NAME", name)) + res = ML_CString(msp, OS_NAME); + else if (STREQ("OS_VERSION", name)) + res = ML_CString(msp, ""); + else if (STREQ("HEAP_SUFFIX", name)) + res = ML_CString(msp, MACHINE_ID "-" OPSYS_ID); + else if (STREQ("ARCH_NAME", name) + || STREQ("HOST_ARCH", name) /* DEPRECATED; remove in 110.97 */ + || STREQ("TARGET_ARCH", name)) /* DEPRECATED; remove in 110.97 */ +#if defined(ARCH_AMD64) + res = ML_CString(msp, "AMD64"); +#elif defined(ARCH_PPC) + res = ML_CString(msp, "PPC"); +#elif defined(ARCH_SPARC) + res = ML_CString(msp, "SPARC"); +#elif defined(ARCH_X86) + res = ML_CString(msp, "X86"); +#else + res = ML_CString(msp, ""); +#endif + else if (STREQ("HAS_SOFT_POLL", name)) +#ifdef SOFT_POLL + res = ML_CString(msp, TRUE_VALUE); +#else + res = ML_CString(msp, FALSE_VALUE); +#endif + else if (STREQ("HAS_MP", name)) +#ifdef MP_SUPPORT + res = ML_CString(msp, TRUE_VALUE); +#else + res = ML_CString(msp, FALSE_VALUE); +#endif + else + return OPTION_NONE; + + OPTION_SOME(msp, res, res); + + return res; + +} /* end of _ml_RunT_sysinfo */ diff --git a/base/runtime/c-libs/smlnj-signals/README b/base/runtime/c-libs/smlnj-signals/README new file mode 100644 index 0000000..ea66bae --- /dev/null +++ b/base/runtime/c-libs/smlnj-signals/README @@ -0,0 +1,3 @@ +This directory contains the SML/C interface stubs for the signals +support. The implementation of these operations can be found in +the directory ../../mach-dep diff --git a/base/runtime/c-libs/smlnj-signals/cfun-list.h b/base/runtime/c-libs/smlnj-signals/cfun-list.h new file mode 100644 index 0000000..47c8810 --- /dev/null +++ b/base/runtime/c-libs/smlnj-signals/cfun-list.h @@ -0,0 +1,20 @@ +/* cfun-list.h + * + * COPYRIGHT (c) 1994 AT&T Bell Laboratories. + * + * This file lists the signals library of C functions that are callable by ML. + */ + +#ifndef CLIB_NAME +#define CLIB_NAME "SMLNJ-Signals" +#define CLIB_VERSION "1.1" +#define CLIB_DATE "October 29, 1995" +#endif + +CFUNC("listSignals", _ml_Sig_listsigs, "unit -> sysconst list") +CFUNC("getSigState", _ml_Sig_getsigstate, "sysconst -> int") +CFUNC("setSigState", _ml_Sig_setsigstate, "(sysconst * int) -> int") +CFUNC("getSigMask", _ml_Sig_getsigmask, "unit -> sysconst list option") +CFUNC("setSigMask", _ml_Sig_setsigmask, "sysconst list option -> unit") +CFUNC("pause", _ml_Sig_pause, "unit -> unit") + diff --git a/base/runtime/c-libs/smlnj-signals/cfun-proto-list.h b/base/runtime/c-libs/smlnj-signals/cfun-proto-list.h new file mode 100644 index 0000000..3f7ae7f --- /dev/null +++ b/base/runtime/c-libs/smlnj-signals/cfun-proto-list.h @@ -0,0 +1,18 @@ +/* cfun-proto-list.h + * + * COPYRIGHT (c) 1994 AT&T Bell Laboratories. + */ + +#ifndef _CFUN_PROTO_LIST_ +#define _CFUN_PROTO_LIST_ + +#ifndef _C_LIBRARY_ +# include "c-library.h" +#endif + +/* the external definitions for the C functions */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_PROTO(NAME, FUNC, MLTYPE) +#include "cfun-list.h" +#undef CFUNC + +#endif /* !_CFUN_PROTO_LIST_ */ diff --git a/base/runtime/c-libs/smlnj-signals/getsigmask.c b/base/runtime/c-libs/smlnj-signals/getsigmask.c new file mode 100644 index 0000000..0fab5b6 --- /dev/null +++ b/base/runtime/c-libs/smlnj-signals/getsigmask.c @@ -0,0 +1,18 @@ +/* getsigmask.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "ml-signals.h" +#include "cfun-proto-list.h" + +/* _ml_Sig_getsigmask : unit -> sysconst list option + * + */ +ml_val_t _ml_Sig_getsigmask (ml_state_t *msp, ml_val_t arg) +{ + return GetSignalMask (msp); + +} /* end of _ml_Sig_getsigmask */ + diff --git a/base/runtime/c-libs/smlnj-signals/getsigstate.c b/base/runtime/c-libs/smlnj-signals/getsigstate.c new file mode 100644 index 0000000..87b30db --- /dev/null +++ b/base/runtime/c-libs/smlnj-signals/getsigstate.c @@ -0,0 +1,22 @@ +/* getsigstate.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-state.h" +#include "ml-signals.h" +#include "cfun-proto-list.h" + +/* _ml_Sig_getsigstate : sysconst -> int + * + */ +ml_val_t _ml_Sig_getsigstate (ml_state_t *msp, ml_val_t arg) +{ + int state = GetSignalState (msp->ml_vproc, REC_SELINT(arg, 0)); + + return INT_CtoML(state); + +} /* end of _ml_Sig_getsigstate */ + diff --git a/base/runtime/c-libs/smlnj-signals/listsignals.c b/base/runtime/c-libs/smlnj-signals/listsignals.c new file mode 100644 index 0000000..dc57f8c --- /dev/null +++ b/base/runtime/c-libs/smlnj-signals/listsignals.c @@ -0,0 +1,19 @@ +/* listsignals.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "ml-signals.h" +#include "cfun-proto-list.h" + +/* _ml_Sig_listsignals : unit -> sysconst list + * + * List the supported signals. + */ +ml_val_t _ml_Sig_listsigs (ml_state_t *msp, ml_val_t arg) +{ + return ListSignals (msp); + +} /* end of _ml_Sig_pause */ + diff --git a/base/runtime/c-libs/smlnj-signals/makefile b/base/runtime/c-libs/smlnj-signals/makefile new file mode 100644 index 0000000..1abfb04 --- /dev/null +++ b/base/runtime/c-libs/smlnj-signals/makefile @@ -0,0 +1,45 @@ +# +# the makefile for the Signals library +# + +SHELL = /bin/sh + +INC_DIR = ../../include +CLIB_DIR = ../ + +INCLUDES = -I$(INC_DIR) -I$(CLIB_DIR) -I../../objs + +MAKE = make +AR = ar +ARFLAGS = rcv +RANLIB = ranlib + +LIBRARY = libsmlnj-sig.a + +VERSION = v-dummy + +OBJS = smlnj-sig-lib.o \ + getsigmask.o \ + getsigstate.o \ + listsignals.o \ + pause.o \ + setsigmask.o \ + setsigstate.o + +$(LIBRARY) : $(VERSION) $(OBJS) + rm -rf $(LIBRARY) + $(AR) $(ARFLAGS) $(LIBRARY) $(OBJS) + $(RANLIB) $(LIBRARY) + +$(VERSION) : + ($(MAKE) MAKE="$(MAKE)" clean) + echo "$(VERSION)" > $(VERSION) + +.c.o: $(INC_DIR)/ml-unixdep.h $(INC_DIR)/ml-base.h $(INC_DIR)/ml-values.h \ + $(INC_DIR)/ml-state.h $(INC_DIR)/ml-signals.h \ + $(CLIB_DIR)/ml-c.h cfun-proto-list.h cfun-list.h + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) -c $< + +clean : + rm -f v-* *.o $(LIBRARY) + diff --git a/base/runtime/c-libs/smlnj-signals/makefile.win32 b/base/runtime/c-libs/smlnj-signals/makefile.win32 new file mode 100644 index 0000000..b9e4bc2 --- /dev/null +++ b/base/runtime/c-libs/smlnj-signals/makefile.win32 @@ -0,0 +1,70 @@ +# +# the makefile for the Signals library +# win32 specific +# +# NOTE: currently, the signal functions in mach-dep/win32-signal.c are +# dummy stubs + +SHELL = + +INC_DIR = ..\..\include +CLIB_DIR = ..\ + +INCLUDES = /I$(INC_DIR) /I$(CLIB_DIR) /I..\..\objs + +MAKEFILE = makefile.win32 +MAKE = nmake /F$(MAKEFILE) +AR = lib +ARFLAGS = +RANLIB = lib + +LIBRARY = libsmlnj-sig.lib + +VERSION = v-dummy + +OBJS = smlnj-sig-lib.obj \ + getsigmask.obj \ + getsigstate.obj \ + listsignals.obj \ + pause.obj \ + setsigmask.obj \ + setsigstate.obj + +$(LIBRARY) : $(VERSION) $(OBJS) + del /F /Q $(LIBRARY) + $(AR) $(ARFLAGS) /out:$(LIBRARY) $(OBJS) + $(RANLIB) /out:$(LIBRARY) + +$(VERSION) : + ($(MAKE) MAKE="$(MAKE)" clean) + echo "$(VERSION)" > $(VERSION) + +DEPENDENTS = $(INC_DIR)\ml-unixdep.h $(INC_DIR)\ml-base.h \ + $(INC_DIR)\ml-values.h \ + $(INC_DIR)\ml-state.h $(INC_DIR)\ml-signals.h \ + $(CLIB_DIR)\ml-c.h cfun-proto-list.h cfun-list.h + +smlnj-sig-lib.obj: smlnj-sig-lib.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c smlnj-sig-lib.c + +getsigmask.obj: getsigmask.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c getsigmask.c + +getsigstate.obj: getsigstate.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c getsigstate.c + +listsignals.obj: listsignals.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c listsignals.c + +pause.obj: pause.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c pause.c + +setsigmask.obj: setsigmask.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c setsigmask.c + +setsigstate.obj: setsigstate.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c setsigstate.c + +clean : + del /F /Q v-* *.obj *.pdb $(LIBRARY) + diff --git a/base/runtime/c-libs/smlnj-signals/pause.c b/base/runtime/c-libs/smlnj-signals/pause.c new file mode 100644 index 0000000..dcd7823 --- /dev/null +++ b/base/runtime/c-libs/smlnj-signals/pause.c @@ -0,0 +1,24 @@ +/* pause.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-state.h" +#include "ml-signals.h" +#include "cfun-proto-list.h" + +/* _ml_Sig_pause : unit -> unit + * + * Pause until the next signal. + */ +ml_val_t _ml_Sig_pause (ml_state_t *msp, ml_val_t arg) +{ + + PauseUntilSignal (msp->ml_vproc); + + return ML_unit; + +} /* end of _ml_Sig_pause */ + diff --git a/base/runtime/c-libs/smlnj-signals/setsigmask.c b/base/runtime/c-libs/smlnj-signals/setsigmask.c new file mode 100644 index 0000000..a74baa2 --- /dev/null +++ b/base/runtime/c-libs/smlnj-signals/setsigmask.c @@ -0,0 +1,22 @@ +/* setsigmask.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-signals.h" +#include "cfun-proto-list.h" + +/* _ml_Sig_setsigmask : sysconst list option -> unit + * + * Mask the listed signals. + */ +ml_val_t _ml_Sig_setsigmask (ml_state_t *msp, ml_val_t arg) +{ + SetSignalMask (arg); + + return ML_unit; + +} /* end of _ml_Sig_setsigmask */ + diff --git a/base/runtime/c-libs/smlnj-signals/setsigstate.c b/base/runtime/c-libs/smlnj-signals/setsigstate.c new file mode 100644 index 0000000..66617e0 --- /dev/null +++ b/base/runtime/c-libs/smlnj-signals/setsigstate.c @@ -0,0 +1,24 @@ +/* setsigstate.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-state.h" +#include "ml-signals.h" +#include "cfun-proto-list.h" + +/* _ml_Sig_setsigstate : (sysconst * int) -> unit + * + */ +ml_val_t _ml_Sig_setsigstate (ml_state_t *msp, ml_val_t arg) +{ + ml_val_t sig = REC_SEL(arg, 0); + + SetSignalState (msp->ml_vproc, REC_SELINT(sig, 0), REC_SELINT(arg, 1)); + + return ML_unit; + +} /* end of _ml_Sig_setsigstate */ + diff --git a/base/runtime/c-libs/smlnj-signals/smlnj-sig-lib.c b/base/runtime/c-libs/smlnj-signals/smlnj-sig-lib.c new file mode 100644 index 0000000..597aaa0 --- /dev/null +++ b/base/runtime/c-libs/smlnj-signals/smlnj-sig-lib.c @@ -0,0 +1,28 @@ +/* smlnj-sig-lib.c + * + * COPYRIGHT (c) 1994 AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "c-library.h" +#include "cfun-proto-list.h" + + +/* the table of C functions and ML names */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_BIND(NAME, FUNC, MLTYPE) +PVT cfunc_binding_t CFunTable[] = { +#include "cfun-list.h" + CFUNC_NULL_BIND + }; +#undef CFUNC + + +/* the Signals library */ +c_library_t SMLNJ_Sig_Library = { + CLIB_NAME, + CLIB_VERSION, + CLIB_DATE, + NIL(clib_init_fn_t), + CFunTable + }; + diff --git a/base/runtime/c-libs/smlnj-sockets/accept.c b/base/runtime/c-libs/smlnj-sockets/accept.c new file mode 100644 index 0000000..5c7d014 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/accept.c @@ -0,0 +1,36 @@ +/* accept.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_Sock_accept : sock -> (sock * addr) + */ +ml_val_t _ml_Sock_accept (ml_state_t *msp, ml_val_t arg) +{ + int sock = INT_MLtoC(arg); + char addrBuf[MAX_SOCK_ADDR_SZB]; + socklen_t addrLen = MAX_SOCK_ADDR_SZB; + int newSock; + + newSock = accept (sock, (struct sockaddr *)addrBuf, &addrLen); + + if (newSock == -1) + return RAISE_SYSERR(msp, newSock); + else { + ml_val_t data = ML_CData (msp, addrBuf, addrLen); + ml_val_t addr, res; + + SEQHDR_ALLOC(msp, addr, DESC_word8vec, data, addrLen); + REC_ALLOC2(msp, res, INT_CtoML(newSock), addr); + return res; + } + +} /* end of _ml_Sock_accept */ diff --git a/base/runtime/c-libs/smlnj-sockets/bind.c b/base/runtime/c-libs/smlnj-sockets/bind.c new file mode 100644 index 0000000..b981f65 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/bind.c @@ -0,0 +1,29 @@ +/* bind.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_Sock_bind : (sock * addr) -> unit + */ +ml_val_t _ml_Sock_bind (ml_state_t *msp, ml_val_t arg) +{ + int sock = REC_SELINT(arg, 0); + ml_val_t addr = REC_SEL(arg, 1); + int sts; + + sts = bind ( + sock, + GET_SEQ_DATAPTR(struct sockaddr, addr), + GET_SEQ_LEN(addr)); + + CHK_RETURN_UNIT(msp, sts); + +} /* end of _ml_Sock_bind */ diff --git a/base/runtime/c-libs/smlnj-sockets/cfun-list.h b/base/runtime/c-libs/smlnj-sockets/cfun-list.h new file mode 100644 index 0000000..7590507 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/cfun-list.h @@ -0,0 +1,70 @@ +/* cfun-list.h + * + * COPYRIGHT (c) 1994 AT&T Bell Laboratories. + * + * This file lists the directory library of C functions that are callable by ML. + */ + +#ifndef CLIB_NAME +#define CLIB_NAME "SMLNJ-Sockets" +#define CLIB_VERSION "1.0" +#define CLIB_DATE "June 10, 1995" +#endif + +/* Network database functions */ +CFUNC("getHostName", _ml_NetDB_gethostname, "unit -> string") +CFUNC("getNetByName", _ml_NetDB_getnetbyname, "") +CFUNC("getNetByAddr", _ml_NetDB_getnetbyaddr, "") +CFUNC("getHostByName", _ml_NetDB_gethostbyname, "") +CFUNC("getHostByAddr", _ml_NetDB_gethostbyaddr, "") +CFUNC("getProtByName", _ml_NetDB_getprotbyname, "") +CFUNC("getProtByNum", _ml_NetDB_getprotbynum, "") +CFUNC("getServByName", _ml_NetDB_getservbyname, "") +CFUNC("getServByPort", _ml_NetDB_getservbyport, "") + +CFUNC("ctlDEBUG", _ml_Sock_ctlDEBUG, "(sock * bool option) -> bool") +CFUNC("ctlREUSEADDR", _ml_Sock_ctlREUSEADDR, "") +CFUNC("ctlKEEPALIVE", _ml_Sock_ctlKEEPALIVE, "") +CFUNC("ctlDONTROUTE", _ml_Sock_ctlDONTROUTE, "") +CFUNC("ctlLINGER", _ml_Sock_ctlLINGER, "") +CFUNC("ctlBROADCAST", _ml_Sock_ctlBROADCAST, "") +CFUNC("ctlOOBINLINE", _ml_Sock_ctlOOBINLINE, "") +CFUNC("ctlSNDBUF", _ml_Sock_ctlSNDBUF, "") +CFUNC("ctlRCVBUF", _ml_Sock_ctlRCVBUF, "") +CFUNC("ctlNODELAY", _ml_Sock_ctlNODELAY, "") +CFUNC("getTYPE", _ml_Sock_getTYPE, "") +CFUNC("getERROR", _ml_Sock_getERROR, "") +CFUNC("setNBIO", _ml_Sock_setNBIO, "(sock * int) -> unit") +CFUNC("getNREAD", _ml_Sock_getNREAD, "sock -> int") +CFUNC("getATMARK", _ml_Sock_getATMARK, "sock -> bool") +CFUNC("getPeerName", _ml_Sock_getpeername, "") +CFUNC("getSockName", _ml_Sock_getsockname, "") + +CFUNC("getAddrFamily", _ml_Sock_getaddrfamily, "addr -> af") +CFUNC("listAddrFamilies", _ml_Sock_listaddrfamilies, "") +CFUNC("listSockTypes", _ml_Sock_listsocktypes, "") +CFUNC("inetany", _ml_Sock_inetany, "int -> addr") +CFUNC("fromInetAddr", _ml_Sock_frominetaddr, "addr -> (in_addr*int)") +CFUNC("toInetAddr", _ml_Sock_toinetaddr, "(in_addr*int) -> addr") + +CFUNC("accept", _ml_Sock_accept, "sock -> (sock * Word8Vector.vector)") +CFUNC("bind", _ml_Sock_bind, "") +CFUNC("connect", _ml_Sock_connect, "") +CFUNC("listen", _ml_Sock_listen, "") +CFUNC("close", _ml_Sock_close, "") +CFUNC("shutdown", _ml_Sock_shutdown, "") +CFUNC("sendBuf", _ml_Sock_sendbuf, "") +CFUNC("sendBufTo", _ml_Sock_sendbufto, "") +CFUNC("recv", _ml_Sock_recv, "") +CFUNC("recvBuf", _ml_Sock_recvbuf, "") +CFUNC("recvFrom", _ml_Sock_recvfrom, "") +CFUNC("recvBufFrom", _ml_Sock_recvbuffrom, "") + +CFUNC("socket", _ml_Sock_socket, "(int * int * int) -> sock") + +#ifdef HAS_UNIX_DOMAIN +CFUNC("socketPair", _ml_Sock_socketpair, "(int * int * int) -> (sock * sock)") +CFUNC("fromUnixAddr", _ml_Sock_fromunixaddr, "addr -> string") +CFUNC("toUnixAddr", _ml_Sock_tounixaddr, "string -> addr") +#endif + diff --git a/base/runtime/c-libs/smlnj-sockets/cfun-proto-list.h b/base/runtime/c-libs/smlnj-sockets/cfun-proto-list.h new file mode 100644 index 0000000..16b3e96 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/cfun-proto-list.h @@ -0,0 +1,18 @@ +/* cfun-proto-list.h + * + * COPYRIGHT (c) 1194 AT&T Bell Laboratories. + */ + +#ifndef _CFUN_PROTO_LIST_ +#define _CFUN_PROTO_LIST_ + +#ifndef _C_LIBRARY_ +# include "c-library.h" +#endif + +/* the external definitions for the C functions */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_PROTO(NAME, FUNC, MLTYPE) +#include "cfun-list.h" +#undef CFUNC + +#endif /* !_CFUN_PROTO_LIST_ */ diff --git a/base/runtime/c-libs/smlnj-sockets/close.c b/base/runtime/c-libs/smlnj-sockets/close.c new file mode 100644 index 0000000..3500d39 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/close.c @@ -0,0 +1,29 @@ +/* close.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_Sock_close : sock -> unit + */ +ml_val_t _ml_Sock_close (ml_state_t *msp, ml_val_t arg) +{ + int status, fd = INT_MLtoC(arg); + + /* FIXME: Architecture dependencies code should probably moved to + sockets-osdep.h */ +#if defined(OPSYS_WIN32) + status = closesocket(fd); +#else + status = close(fd); +#endif + + CHK_RETURN_UNIT(msp, status); + +} /* end of _ml_Sock_close */ diff --git a/base/runtime/c-libs/smlnj-sockets/connect.c b/base/runtime/c-libs/smlnj-sockets/connect.c new file mode 100644 index 0000000..d59e115 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/connect.c @@ -0,0 +1,29 @@ +/* connect.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_Sock_connect : (sock * addr) -> unit + */ +ml_val_t _ml_Sock_connect (ml_state_t *msp, ml_val_t arg) +{ + int sock = REC_SELINT(arg, 0); + ml_val_t addr = REC_SEL(arg, 1); + int sts; + + sts = connect ( + sock, + GET_SEQ_DATAPTR(struct sockaddr, addr), + GET_SEQ_LEN(addr)); + + CHK_RETURN_UNIT(msp, sts); + +} /* end of _ml_Sock_connect */ diff --git a/base/runtime/c-libs/smlnj-sockets/ctlBROADCAST.c b/base/runtime/c-libs/smlnj-sockets/ctlBROADCAST.c new file mode 100644 index 0000000..e7d1eb9 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/ctlBROADCAST.c @@ -0,0 +1,22 @@ +/* ctlBROADCAST.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include "sock-util.h" + +/* _ml_Sock_ctlBROADCAST : (sock * bool option) -> bool + */ +ml_val_t _ml_Sock_ctlBROADCAST (ml_state_t *msp, ml_val_t arg) +{ + return _util_Sock_ControlFlg (msp, arg, SO_BROADCAST); + +} /* end of _ml_Sock_ctlBROADCAST */ + diff --git a/base/runtime/c-libs/smlnj-sockets/ctlDEBUG.c b/base/runtime/c-libs/smlnj-sockets/ctlDEBUG.c new file mode 100644 index 0000000..803bc74 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/ctlDEBUG.c @@ -0,0 +1,20 @@ +/* ctlDEBUG.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-c.h" +#include "sock-util.h" +#include "cfun-proto-list.h" + +/* _ml_Sock_ctlDEBUG : (sock * bool option) -> bool + */ +ml_val_t _ml_Sock_ctlDEBUG (ml_state_t *msp, ml_val_t arg) +{ + return _util_Sock_ControlFlg (msp, arg, SO_DEBUG); + +} /* end of _ml_Sock_ctlDEBUG */ diff --git a/base/runtime/c-libs/smlnj-sockets/ctlDONTROUTE.c b/base/runtime/c-libs/smlnj-sockets/ctlDONTROUTE.c new file mode 100644 index 0000000..94a7ae2 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/ctlDONTROUTE.c @@ -0,0 +1,20 @@ +/* ctlDONTROUTE.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include "sock-util.h" + +/* _ml_Sock_ctlDONTROUTE : (sock * bool option) -> bool + */ +ml_val_t _ml_Sock_ctlDONTROUTE (ml_state_t *msp, ml_val_t arg) +{ + return _util_Sock_ControlFlg (msp, arg, SO_DONTROUTE); + +} /* end of _ml_Sock_ctlDONTROUTE */ diff --git a/base/runtime/c-libs/smlnj-sockets/ctlKEEPALIVE.c b/base/runtime/c-libs/smlnj-sockets/ctlKEEPALIVE.c new file mode 100644 index 0000000..c08939d --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/ctlKEEPALIVE.c @@ -0,0 +1,20 @@ +/* ctlKEEPALIVE.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-c.h" +#include "sock-util.h" +#include "cfun-proto-list.h" + +/* _ml_Sock_ctlKEEPALIVE : (sock * bool option) -> bool + */ +ml_val_t _ml_Sock_ctlKEEPALIVE (ml_state_t *msp, ml_val_t arg) +{ + return _util_Sock_ControlFlg (msp, arg, SO_KEEPALIVE); + +} /* end of _ml_Sock_ctlKEEPALIVE */ diff --git a/base/runtime/c-libs/smlnj-sockets/ctlLINGER.c b/base/runtime/c-libs/smlnj-sockets/ctlLINGER.c new file mode 100644 index 0000000..41b6598 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/ctlLINGER.c @@ -0,0 +1,58 @@ +/* ctlLINGER.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "sock-util.h" +#include "cfun-proto-list.h" + +/* _ml_Sock_ctlLINGER : (sock * int option option) -> int option + * + * Set/get the SO_LINGER option as follows: + * NONE => get current setting + * SOME(NONE) => disable linger + * SOME(SOME t) => enable linger with timeout t. + */ +ml_val_t _ml_Sock_ctlLINGER (ml_state_t *msp, ml_val_t arg) +{ + int sock = REC_SELINT(arg, 0); + ml_val_t ctl = REC_SEL(arg, 1); + struct linger optVal; + int sts; + + if (ctl == OPTION_NONE) { + socklen_t optSz = sizeof(struct linger); + sts = getsockopt (sock, SOL_SOCKET, SO_LINGER, (sockoptval_t)&optVal, &optSz); + ASSERT((sts < 0) || (optSz == sizeof(struct linger))); + } + else { + ctl = OPTION_get(ctl); + if (ctl == OPTION_NONE) { + /* argument is SOME(NONE); disable linger */ + optVal.l_onoff = 0; + } + else { + /* argument is SOME t; enable linger */ + optVal.l_onoff = 1; + optVal.l_linger = INT_MLtoC(OPTION_get(ctl)); + } + sts = setsockopt (sock, SOL_SOCKET, SO_LINGER, (sockoptval_t)&optVal, sizeof(struct linger)); + } + + if (sts < 0) + return RAISE_SYSERR(msp, sts); + else if (optVal.l_onoff == 0) + return OPTION_NONE; + else { + ml_val_t res; + OPTION_SOME(msp, res, INT_CtoML(optVal.l_linger)); + return res; + } + +} /* end of _ml_Sock_ctlLINGER */ diff --git a/base/runtime/c-libs/smlnj-sockets/ctlNODELAY.c b/base/runtime/c-libs/smlnj-sockets/ctlNODELAY.c new file mode 100644 index 0000000..4b5afd0 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/ctlNODELAY.c @@ -0,0 +1,42 @@ +/* ctlNODELAY.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include "sock-util.h" + +/* _ml_Sock_ctlNODELAY : (sock * bool option) -> bool + * + * NOTE: this is a TCP level option, so we cannot use the utility function. + */ +ml_val_t _ml_Sock_ctlNODELAY (ml_state_t *msp, ml_val_t arg) +{ + int sock = REC_SELINT(arg, 0); + ml_val_t ctl = REC_SEL(arg, 1); + bool_t flg; + int sts; + + if (ctl == OPTION_NONE) { + socklen_t optSz = sizeof(int); + sts = getsockopt (sock, IPPROTO_TCP, TCP_NODELAY, (sockoptval_t)&flg, &optSz); + ASSERT((sts < 0) || (optSz == sizeof(int))); + } + else { + flg = (bool_t)INT_MLtoC(OPTION_get(ctl)); + sts = setsockopt (sock, IPPROTO_TCP, TCP_NODELAY, (sockoptval_t)&flg, sizeof(int)); + } + + if (sts < 0) + return RAISE_SYSERR(msp, sts); + else + return (flg ? ML_true : ML_false); + +} /* end of _ml_Sock_ctlNODELAY */ + diff --git a/base/runtime/c-libs/smlnj-sockets/ctlOOBINLINE.c b/base/runtime/c-libs/smlnj-sockets/ctlOOBINLINE.c new file mode 100644 index 0000000..adaae3e --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/ctlOOBINLINE.c @@ -0,0 +1,20 @@ +/* ctlOOBINLINE.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include "sock-util.h" + +/* _ml_Sock_ctlOOBINLINE : (sock * bool option) -> bool + */ +ml_val_t _ml_Sock_ctlOOBINLINE (ml_state_t *msp, ml_val_t arg) +{ + return _util_Sock_ControlFlg (msp, arg, SO_OOBINLINE); + +} /* end of _ml_Sock_ctlOOBINLINE */ diff --git a/base/runtime/c-libs/smlnj-sockets/ctlRCVBUF.c b/base/runtime/c-libs/smlnj-sockets/ctlRCVBUF.c new file mode 100644 index 0000000..3904095 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/ctlRCVBUF.c @@ -0,0 +1,38 @@ +/* ctlRCVBUF.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include "sock-util.h" + +/* _ml_Sock_ctlRCVBUF : (sock * int option) -> int + */ +ml_val_t _ml_Sock_ctlRCVBUF (ml_state_t *msp, ml_val_t arg) +{ + int sock = REC_SELINT(arg, 0); + ml_val_t ctl = REC_SEL(arg, 1); + int sz, sts; + + if (ctl == OPTION_NONE) { + socklen_t optSz = sizeof(int); + sts = getsockopt (sock, SOL_SOCKET, SO_RCVBUF, (sockoptval_t)&sz, &optSz); + ASSERT((sts < 0) || (optSz == sizeof(int))); + } + else { + sz = INT_MLtoC(OPTION_get(ctl)); + sts = setsockopt (sock, SOL_SOCKET, SO_RCVBUF, (sockoptval_t)&sz, sizeof(int)); + } + + if (sts < 0) + return RAISE_SYSERR(msp, sts); + else + return INT_CtoML(sz); + +} /* end of _ml_Sock_ctlRCVBUF */ diff --git a/base/runtime/c-libs/smlnj-sockets/ctlREUSEADDR.c b/base/runtime/c-libs/smlnj-sockets/ctlREUSEADDR.c new file mode 100644 index 0000000..33fe1cc --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/ctlREUSEADDR.c @@ -0,0 +1,20 @@ +/* ctlREUSEADDR.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include "sock-util.h" + +/* _ml_Sock_ctlREUSEADDR : (sock * bool option) -> bool + */ +ml_val_t _ml_Sock_ctlREUSEADDR (ml_state_t *msp, ml_val_t arg) +{ + return _util_Sock_ControlFlg (msp, arg, SO_REUSEADDR); + +} /* end of _ml_Sock_ctlREUSEADDR */ diff --git a/base/runtime/c-libs/smlnj-sockets/ctlSNDBUF.c b/base/runtime/c-libs/smlnj-sockets/ctlSNDBUF.c new file mode 100644 index 0000000..1c4741a --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/ctlSNDBUF.c @@ -0,0 +1,38 @@ +/* ctlSNDBUF.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include "sock-util.h" + +/* _ml_Sock_ctlSNDBUF : (sock * int option) -> int + */ +ml_val_t _ml_Sock_ctlSNDBUF (ml_state_t *msp, ml_val_t arg) +{ + int sock = REC_SELINT(arg, 0); + ml_val_t ctl = REC_SEL(arg, 1); + int sz, sts; + + if (ctl == OPTION_NONE) { + socklen_t optSz = sizeof(int); + sts = getsockopt (sock, SOL_SOCKET, SO_SNDBUF, (sockoptval_t)&sz, &optSz); + ASSERT((sts < 0) || (optSz == sizeof(int))); + } + else { + sz = INT_MLtoC(OPTION_get(ctl)); + sts = setsockopt (sock, SOL_SOCKET, SO_SNDBUF, (sockoptval_t)&sz, sizeof(int)); + } + + if (sts < 0) + return RAISE_SYSERR(msp, sts); + else + return INT_CtoML(sz); + +} /* end of _ml_Sock_ctlSNDBUF */ diff --git a/base/runtime/c-libs/smlnj-sockets/from-inetaddr.c b/base/runtime/c-libs/smlnj-sockets/from-inetaddr.c new file mode 100644 index 0000000..e73f064 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/from-inetaddr.c @@ -0,0 +1,33 @@ +/* from-inetaddr.c + * + * COPYRIGHT (c) 1996 AT&T Research. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + + +/* _ml_Sock_frominetaddr : addr -> (in_addr * int) + * + * Given a INET-domain socket address, return the INET address and port number. + */ +ml_val_t _ml_Sock_frominetaddr (ml_state_t *msp, ml_val_t arg) +{ + struct sockaddr_in *addr = GET_SEQ_DATAPTR(struct sockaddr_in, arg); + ml_val_t data, inAddr, res; + + ASSERT (addr->sin_family == AF_INET); + + data = ML_CData (msp, &(addr->sin_addr), sizeof(struct in_addr)); + SEQHDR_ALLOC (msp, inAddr, DESC_word8vec, data, sizeof(struct in_addr)); + REC_ALLOC2 (msp, res, inAddr, INT_CtoML(ntohs(addr->sin_port))); + + return res; + +} /* end of _ml_Sock_frominetaddr */ + diff --git a/base/runtime/c-libs/smlnj-sockets/from-unixaddr.c b/base/runtime/c-libs/smlnj-sockets/from-unixaddr.c new file mode 100644 index 0000000..fecee4b --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/from-unixaddr.c @@ -0,0 +1,30 @@ +/* from-unixaddr.c + * + * COPYRIGHT (c) 1996 AT&T Research. + */ + +#include "ml-unixdep.h" +#include "sockets-osdep.h" +#include INCLUDE_TYPES_H +#include INCLUDE_SOCKET_H +#include INCLUDE_UN_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "cfun-proto-list.h" + + +/* _ml_Sock_fromunixaddr : addr -> string + * + * Given a UNIX-domain socket address, return the string. + */ +ml_val_t _ml_Sock_fromunixaddr (ml_state_t *msp, ml_val_t arg) +{ + struct sockaddr_un *addr = GET_SEQ_DATAPTR(struct sockaddr_un, arg); + + ASSERT(addr->sun_family == AF_UNIX); + + return ML_CString(msp, addr->sun_path); + +} /* end of _ml_Sock_fromunixaddr */ + diff --git a/base/runtime/c-libs/smlnj-sockets/getATMARK.c b/base/runtime/c-libs/smlnj-sockets/getATMARK.c new file mode 100644 index 0000000..7617fd8 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/getATMARK.c @@ -0,0 +1,30 @@ +/* getATMARK.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include "sock-util.h" + +/* _ml_Sock_getATMARK : sock -> int + */ +ml_val_t _ml_Sock_getATMARK (ml_state_t *msp, ml_val_t arg) +{ + int n, sts; + + sts = ioctl (INT_MLtoC(arg), SIOCATMARK, (unsigned long *)&n); + + if (sts < 0) + return RAISE_SYSERR(msp, sts); + else if (n == 0) + return ML_false; + else + return ML_true; + +} /* end of _ml_Sock_getATMARK */ diff --git a/base/runtime/c-libs/smlnj-sockets/getERROR.c b/base/runtime/c-libs/smlnj-sockets/getERROR.c new file mode 100644 index 0000000..ba13919 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/getERROR.c @@ -0,0 +1,28 @@ +/* getERROR.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_Sock_getERROR : sock -> bool + */ +ml_val_t _ml_Sock_getERROR (ml_state_t *msp, ml_val_t arg) +{ + int sock = INT_MLtoC(arg); + int flg, sts; + socklen_t optSz = sizeof(int); + + sts = getsockopt (sock, SOL_SOCKET, SO_ERROR, (sockoptval_t)&flg, &optSz); + + if (sts < 0) + return RAISE_SYSERR(msp, sts); + else + return (flg ? ML_true : ML_false); + +} /* end of _ml_Sock_getERROR */ diff --git a/base/runtime/c-libs/smlnj-sockets/getNREAD.c b/base/runtime/c-libs/smlnj-sockets/getNREAD.c new file mode 100644 index 0000000..3bf5cc6 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/getNREAD.c @@ -0,0 +1,28 @@ +/* getNREAD.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include "sock-util.h" + +/* _ml_Sock_getNREAD : sock -> int + */ +ml_val_t _ml_Sock_getNREAD (ml_state_t *msp, ml_val_t arg) +{ + int n, sts; + + sts = ioctl (INT_MLtoC(arg), FIONREAD, (unsigned long *)&n); + + if (sts < 0) + return RAISE_SYSERR(msp, sts); + else + return INT_CtoML(n); + +} /* end of _ml_Sock_getNREAD */ diff --git a/base/runtime/c-libs/smlnj-sockets/getTYPE.c b/base/runtime/c-libs/smlnj-sockets/getTYPE.c new file mode 100644 index 0000000..3ba72c8 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/getTYPE.c @@ -0,0 +1,30 @@ +/* getTYPE.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include "sock-util.h" + +/* _ml_Sock_getTYPE : sock -> sock_type + */ +ml_val_t _ml_Sock_getTYPE (ml_state_t *msp, ml_val_t arg) +{ + int sock = INT_MLtoC(arg); + int flg, sts; + socklen_t optSz = sizeof(int); + + sts = getsockopt (sock, SOL_SOCKET, SO_TYPE, (sockoptval_t)&flg, &optSz); + + if (sts < 0) + return RAISE_SYSERR(msp, sts); + else + return ML_SysConst (msp, &_Sock_Type, flg); + +} /* end of _ml_Sock_getTYPE */ diff --git a/base/runtime/c-libs/smlnj-sockets/getaddrfamily.c b/base/runtime/c-libs/smlnj-sockets/getaddrfamily.c new file mode 100644 index 0000000..a469480 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/getaddrfamily.c @@ -0,0 +1,25 @@ +/* getaddrfamily.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include "sock-util.h" + +/* _ml_Sock_getaddrfamily : addr -> af + * + * Extract the family field, convert to host byteorder, and return it. + */ +ml_val_t _ml_Sock_getaddrfamily (ml_state_t *msp, ml_val_t arg) +{ + struct sockaddr *addr = GET_SEQ_DATAPTR(struct sockaddr, arg); + + return ML_SysConst (msp, &_Sock_AddrFamily, ntohs(addr->sa_family)); + +} /* end of _ml_Sock_getaddrfamily */ diff --git a/base/runtime/c-libs/smlnj-sockets/gethostbyaddr.c b/base/runtime/c-libs/smlnj-sockets/gethostbyaddr.c new file mode 100644 index 0000000..f578fda --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/gethostbyaddr.c @@ -0,0 +1,25 @@ +/* gethostbyaddr.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include "sock-util.h" + +/* _ml_NetDB_gethostbyaddr + * : addr -> (string * string list * addr_family * addr list) option + */ +ml_val_t _ml_NetDB_gethostbyaddr (ml_state_t *msp, ml_val_t arg) +{ + ASSERT (sizeof(struct in_addr) == GET_SEQ_LEN(arg)); + + return _util_NetDB_mkhostent ( + msp, + gethostbyaddr (STR_MLtoC(arg), sizeof(struct in_addr), AF_INET)); +} /* end of _ml_NetDB_gethostbyaddr */ diff --git a/base/runtime/c-libs/smlnj-sockets/gethostbyname.c b/base/runtime/c-libs/smlnj-sockets/gethostbyname.c new file mode 100644 index 0000000..b6eacc4 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/gethostbyname.c @@ -0,0 +1,22 @@ +/* gethostbyname.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include "sock-util.h" + + +/* _ml_NetDB_gethostbyname + * : string -> (string * string list * addr_family * addr list) option + */ +ml_val_t _ml_NetDB_gethostbyname (ml_state_t *msp, ml_val_t arg) +{ + return _util_NetDB_mkhostent (msp, gethostbyname (STR_MLtoC(arg))); + +} /* end of _ml_NetDB_gethostbyname */ diff --git a/base/runtime/c-libs/smlnj-sockets/gethostname.c b/base/runtime/c-libs/smlnj-sockets/gethostname.c new file mode 100644 index 0000000..437ef5c --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/gethostname.c @@ -0,0 +1,29 @@ +/* gethostname.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +#ifndef MAXHOSTNAMELEN +#define MAXHOSTNAMELEN 256 +#endif + +/* _ml_NetDB_gethostname : unit -> string + */ +ml_val_t _ml_NetDB_gethostname (ml_state_t *msp, ml_val_t arg) +{ + char hostname[MAXHOSTNAMELEN]; + + if (gethostname (hostname, MAXHOSTNAMELEN) == -1) + return RAISE_SYSERR(msp, sts); + else + return ML_CString(msp, hostname); + +} /* end of _ml_NetDB_gethostname */ diff --git a/base/runtime/c-libs/smlnj-sockets/getnetbyaddr.c b/base/runtime/c-libs/smlnj-sockets/getnetbyaddr.c new file mode 100644 index 0000000..09f94db --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/getnetbyaddr.c @@ -0,0 +1,35 @@ +/* getnetbyaddr.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include "sock-util.h" + +#if defined(__CYGWIN32__) +#undef getnetbyaddr +#define getnetbyaddr(x,y) NULL +#endif + +/* _ml_NetDB_getnetbyaddr + * : (sysword * addr_family) -> (string * string list * addr_family * sysword) option + */ +ml_val_t _ml_NetDB_getnetbyaddr (ml_state_t *msp, ml_val_t arg) +{ +#if defined(OPSYS_WIN32) + /* FIXME: getnetbyaddr() does not seem to exist under Windows. What is + the equivalent? */ + return RAISE_ERROR(msp, ""); +#else + unsigned long net = REC_SELWORD(arg, 0); + int type = REC_SELINT(arg, 1); + + return _util_NetDB_mknetent (msp, getnetbyaddr(net, type)); +#endif +} /* end of _ml_NetDB_getnetbyaddr */ diff --git a/base/runtime/c-libs/smlnj-sockets/getnetbyname.c b/base/runtime/c-libs/smlnj-sockets/getnetbyname.c new file mode 100644 index 0000000..1421761 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/getnetbyname.c @@ -0,0 +1,30 @@ +/* getnetbyname.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include "sock-util.h" + +#if defined(__CYGWIN32__) +#undef getnetbyname +#define getnetbyname(x) NULL +#endif + +/* _ml_NetDB_getnetbyname : string -> (string * string list * addr_family * sysword) option + */ +ml_val_t _ml_NetDB_getnetbyname (ml_state_t *msp, ml_val_t arg) +{ +#if defined(OPSYS_WIN32) + /* FIXME: getnetbyname() does not seem to exist under Windows. What is + the equivalent? */ + return RAISE_ERROR(msp, ""); +#else + return _util_NetDB_mknetent (msp, getnetbyname (STR_MLtoC(arg))); +#endif +} /* end of _ml_NetDB_getnetbyname */ diff --git a/base/runtime/c-libs/smlnj-sockets/getpeername.c b/base/runtime/c-libs/smlnj-sockets/getpeername.c new file mode 100644 index 0000000..4319597 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/getpeername.c @@ -0,0 +1,32 @@ +/* getpeername.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include "sock-util.h" + +/* _ml_Sock_getpeername : sock -> (af * addr) + */ +ml_val_t _ml_Sock_getpeername (ml_state_t *msp, ml_val_t arg) +{ + char addr[MAX_SOCK_ADDR_SZB]; + socklen_t addrLen = MAX_SOCK_ADDR_SZB; + + if (getpeername (INT_MLtoC(arg), (struct sockaddr *)addr, &addrLen) < 0) + return RAISE_SYSERR(msp, sts); + else { + ml_val_t cdata = ML_CData(msp, addr, addrLen); + ml_val_t res; + + SEQHDR_ALLOC (msp, res, DESC_word8vec, cdata, addrLen); + return res; + } + +} /* end of _ml_Sock_getpeername */ diff --git a/base/runtime/c-libs/smlnj-sockets/getprotbyname.c b/base/runtime/c-libs/smlnj-sockets/getprotbyname.c new file mode 100644 index 0000000..60696b8 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/getprotbyname.c @@ -0,0 +1,34 @@ +/* getprotbyname.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include "sock-util.h" + +/* _ml_NetDB_getprotbyname : string -> (string * string list * int) option + */ +ml_val_t _ml_NetDB_getprotbyname (ml_state_t *msp, ml_val_t arg) +{ + ml_val_t name, aliases, res; + struct protoent *pentry; + + pentry = getprotobyname (STR_MLtoC(arg)); + + if (pentry == NIL(struct protoent *)) + return OPTION_NONE; + else { + name = ML_CString (msp, pentry->p_name); + aliases = ML_CStringList (msp, pentry->p_aliases); + REC_ALLOC3 (msp, res, name, aliases, INT_CtoML(pentry->p_proto)); + OPTION_SOME (msp, res, res); + return res; + } + +} /* end of _ml_NetDB_getprotbyname */ diff --git a/base/runtime/c-libs/smlnj-sockets/getprotbynum.c b/base/runtime/c-libs/smlnj-sockets/getprotbynum.c new file mode 100644 index 0000000..98dff50 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/getprotbynum.c @@ -0,0 +1,34 @@ +/* getprotbynum.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include "sock-util.h" + +/* _ml_NetDB_getprotbynum : int -> (string * string list * int) option + */ +ml_val_t _ml_NetDB_getprotbynum (ml_state_t *msp, ml_val_t arg) +{ + ml_val_t name, aliases, res; + struct protoent *pentry; + + pentry = getprotobynumber (INT_MLtoC(arg)); + + if (pentry == NIL(struct protoent *)) + return OPTION_NONE; + else { + name = ML_CString (msp, pentry->p_name); + aliases = ML_CStringList (msp, pentry->p_aliases); + REC_ALLOC3 (msp, res, name, aliases, INT_CtoML(pentry->p_proto)); + OPTION_SOME (msp, res, res); + return res; + } + +} /* end of _ml_NetDB_getprotbynum */ diff --git a/base/runtime/c-libs/smlnj-sockets/getrpcbyname.c b/base/runtime/c-libs/smlnj-sockets/getrpcbyname.c new file mode 100644 index 0000000..41710ed --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/getrpcbyname.c @@ -0,0 +1,38 @@ +/* getrpcbyname.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "ml-unixdep.h" +#include "sockets-osdep.h" +#include +#ifdef INCLUDE_RPCENT_H +# include INCLUDE_RPCENT_H +#endif +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include "sock-util.h" + +/* _ml_NetDB_getrpcbyname : string -> (string * string list * int) option + */ +ml_val_t _ml_NetDB_getrpcbyname (ml_state_t *msp, ml_val_t arg) +{ + ml_val_t name, aliases, res; + struct rpcent *rentry; + + rentry = getrpcbyname (STR_MLtoC(arg)); + + if (rentry == NIL(struct rpcent *)) + return OPTION_NONE; + else { + name = ML_CString (msp, rentry->r_name); + aliases = ML_CStringList (msp, rentry->r_aliases); + REC_ALLOC3 (msp, res, name, aliases, INT_CtoML(rentry->r_number)); + OPTION_SOME (msp, res, res); + return res; + } + +} /* end of _ml_NetDB_getrpcbyname */ diff --git a/base/runtime/c-libs/smlnj-sockets/getrpcbynum.c b/base/runtime/c-libs/smlnj-sockets/getrpcbynum.c new file mode 100644 index 0000000..f48c4bc --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/getrpcbynum.c @@ -0,0 +1,41 @@ +/* getrpcbynum.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "ml-unixdep.h" +#include "sockets-osdep.h" +#include +#ifdef INCLUDE_RPCENT_H +# include INCLUDE_RPCENT_H +# ifdef bool_t /* NetBSD hack */ +# undef bool_t +# endif +#endif +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include "sock-util.h" + +/* _ml_NetDB_getrpcbynum : int -> (string * string list * int) option + */ +ml_val_t _ml_NetDB_getrpcbynum (ml_state_t *msp, ml_val_t arg) +{ + ml_val_t name, aliases, res; + struct rpcent *rentry; + + rentry = getrpcbynumber (INT_MLtoC(arg)); + + if (rentry == NIL(struct rpcent *)) + return OPTION_NONE; + else { + name = ML_CString (msp, rentry->r_name); + aliases = ML_CStringList (msp, rentry->r_aliases); + REC_ALLOC3 (msp, res, name, aliases, INT_CtoML(rentry->r_number)); + OPTION_SOME (msp, res, res); + return res; + } + +} /* end of _ml_NetDB_getrpcbynum */ diff --git a/base/runtime/c-libs/smlnj-sockets/getservbyname.c b/base/runtime/c-libs/smlnj-sockets/getservbyname.c new file mode 100644 index 0000000..4c1fb9e --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/getservbyname.c @@ -0,0 +1,33 @@ +/* getservbyname.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include "sock-util.h" + +/* _ml_NetDB_getservbyname + * : (string * string option) -> (string * string list * int * string) option + */ +ml_val_t _ml_NetDB_getservbyname (ml_state_t *msp, ml_val_t arg) +{ + ml_val_t mlServ = REC_SEL(arg, 0); + ml_val_t mlProto = REC_SEL(arg, 1); + char *proto; + + if (mlProto == OPTION_NONE) + proto = NIL(char *); + else + proto = STR_MLtoC(OPTION_get(mlProto)); + + return _util_NetDB_mkservent ( + msp, + getservbyname (STR_MLtoC(mlServ), proto)); + +} /* end of _ml_NetDB_getservbyname */ diff --git a/base/runtime/c-libs/smlnj-sockets/getservbyport.c b/base/runtime/c-libs/smlnj-sockets/getservbyport.c new file mode 100644 index 0000000..c3c4af9 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/getservbyport.c @@ -0,0 +1,30 @@ +/* getservbyport.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include "sock-util.h" + +/* _ml_NetDB_getservbyport + * : (int * string option) -> (string * string list * int * string) option + */ +ml_val_t _ml_NetDB_getservbyport (ml_state_t *msp, ml_val_t arg) +{ + ml_val_t mlProto = REC_SEL(arg, 1); + char *proto; + + if (mlProto == OPTION_NONE) + proto = NIL(char *); + else + proto = STR_MLtoC(OPTION_get(mlProto)); + + return _util_NetDB_mkservent (msp, getservbyport (REC_SELINT(arg, 0), proto)); + +} /* end of _ml_NetDB_getservbyport */ diff --git a/base/runtime/c-libs/smlnj-sockets/getsockname.c b/base/runtime/c-libs/smlnj-sockets/getsockname.c new file mode 100644 index 0000000..818b7a3 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/getsockname.c @@ -0,0 +1,34 @@ +/* getsockname.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_Sock_getsockname : sock -> addr + */ +ml_val_t _ml_Sock_getsockname (ml_state_t *msp, ml_val_t arg) +{ + int sock = INT_MLtoC(arg); + char addrBuf[MAX_SOCK_ADDR_SZB]; + socklen_t addrLen = MAX_SOCK_ADDR_SZB; + int sts; + + sts = getsockname (sock, (struct sockaddr *)addrBuf, &addrLen); + + if (sts == -1) + return RAISE_SYSERR(msp, sts); + else { + ml_val_t data = ML_CData (msp, addrBuf, addrLen); + ml_val_t addr; + SEQHDR_ALLOC (msp, addr, DESC_word8vec, data, addrLen); + return addr; + } + +} /* end of _ml_Sock_getsockname */ diff --git a/base/runtime/c-libs/smlnj-sockets/inetany.c b/base/runtime/c-libs/smlnj-sockets/inetany.c new file mode 100644 index 0000000..0fcd795 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/inetany.c @@ -0,0 +1,36 @@ +/* inetany.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + + +/* _ml_Sock_inetany : int -> addr + * + * Make an INET_ANY INET socket address, with the given port ID. + */ +ml_val_t _ml_Sock_inetany (ml_state_t *msp, ml_val_t arg) +{ + struct sockaddr_in addr; + ml_val_t data, res; + + memset(&addr, 0, sizeof(struct sockaddr_in)); + + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = htonl(INADDR_ANY); + addr.sin_port = htons(INT_MLtoC(arg)); + + data = ML_CData (msp, &addr, sizeof(struct sockaddr_in)); + SEQHDR_ALLOC (msp, res, DESC_word8vec, data, sizeof(struct sockaddr_in)); + + return res; + +} /* end of _ml_Sock_inetany */ + diff --git a/base/runtime/c-libs/smlnj-sockets/list-addr-families.c b/base/runtime/c-libs/smlnj-sockets/list-addr-families.c new file mode 100644 index 0000000..12d352e --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/list-addr-families.c @@ -0,0 +1,21 @@ +/* list-addr-families.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "ml-objects.h" +#include "sock-util.h" +#include "cfun-proto-list.h" +#include "ml-c.h" + +/* _ml_Sock_listaddrfamilies: + * + * Return a list of the known address famlies (this may contain unsupported + * families). + */ +ml_val_t _ml_Sock_listaddrfamilies (ml_state_t *msp, ml_val_t arg) +{ + return ML_SysConstList (msp, &_Sock_AddrFamily); + +} /* end of _ml_Sock_listaddrfamilies */ diff --git a/base/runtime/c-libs/smlnj-sockets/list-sock-types.c b/base/runtime/c-libs/smlnj-sockets/list-sock-types.c new file mode 100644 index 0000000..f27a84c --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/list-sock-types.c @@ -0,0 +1,21 @@ +/* list-sock-types.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "ml-objects.h" +#include "sock-util.h" +#include "cfun-proto-list.h" +#include "ml-c.h" + +/* _ml_Sock_listsocktypes + * + * Return a list of the known socket types (this may contain unsupported + * families). + */ +ml_val_t _ml_Sock_listsocktypes (ml_state_t *msp, ml_val_t arg) +{ + return ML_SysConstList (msp, &_Sock_Type); + +} /* end of _ml_Sock_listsocktypes */ diff --git a/base/runtime/c-libs/smlnj-sockets/listen.c b/base/runtime/c-libs/smlnj-sockets/listen.c new file mode 100644 index 0000000..7db4480 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/listen.c @@ -0,0 +1,25 @@ +/* listen.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_Sock_listen : (sock * int) -> unit + */ +ml_val_t _ml_Sock_listen (ml_state_t *msp, ml_val_t arg) +{ + int sock = REC_SELINT(arg, 0); + int backlog = REC_SELINT(arg, 1); + int sts; + + sts = listen (sock, backlog); + + CHK_RETURN_UNIT(msp, sts); + +} /* end of _ml_Sock_listen */ diff --git a/base/runtime/c-libs/smlnj-sockets/makefile b/base/runtime/c-libs/smlnj-sockets/makefile new file mode 100644 index 0000000..d56d537 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/makefile @@ -0,0 +1,103 @@ +# +# the makefile for the sockets library +# + +SHELL = /bin/sh + +INC_DIR = ../../include +CLIB_DIR = ../ + +INCLUDES = -I$(INC_DIR) -I$(CLIB_DIR) -I../../objs + +MAKE = make +AR = ar +ARFLAGS = rcv +RANLIB = ranlib + +LIBRARY = libsmlnj-sock.a + +VERSION = v-dummy + +NETDB_OBJS = gethostname.o \ + getnetbyname.o \ + getnetbyaddr.o \ + gethostbyname.o \ + gethostbyaddr.o \ + getprotbyname.o \ + getprotbynum.o \ + getservbyname.o \ + getservbyport.o + +CTL_OBJS = ctlBROADCAST.o \ + ctlDEBUG.o \ + ctlDONTROUTE.o \ + ctlKEEPALIVE.o \ + ctlLINGER.o \ + ctlNODELAY.o \ + ctlOOBINLINE.o \ + ctlRCVBUF.o \ + ctlREUSEADDR.o \ + ctlSNDBUF.o \ + getERROR.o \ + getTYPE.o \ + setNBIO.o \ + getNREAD.o \ + getATMARK.o \ + getpeername.o \ + getsockname.o + +# the following are UNIX specific +UNIXSOCK = from-unixaddr.o \ + to-unixaddr.o \ + socketpair.o + +SOCK_OBJS = accept.o \ + bind.o \ + close.o \ + connect.o \ + listen.o \ + socket.o \ + shutdown.o + +IO_OBJS = recv.o \ + recvbuf.o \ + recvfrom.o \ + recvbuffrom.o \ + sendbuf.o \ + sendbufto.o + +MISC_OBJS = list-addr-families.o \ + list-sock-types.o \ + getaddrfamily.o \ + inetany.o \ + from-inetaddr.o \ + to-inetaddr.o + +UTIL_OBJS = util-mkhostent.o \ + util-mknetent.o \ + util-mkservent.o \ + util-sockopt.o \ + tbl-addr-family.o \ + tbl-sock-type.o + +OBJS = smlnj-sock-lib.o \ + $(NETDB_OBJS) $(CTL_OBJS) $(SOCK_OBJS) $(MISC_OBJS) \ + $(IO_OBJS) $(UNIXSOCK) $(UTIL_OBJS) + +$(LIBRARY) : $(VERSION) $(OBJS) + rm -rf $(LIBRARY) + $(AR) $(ARFLAGS) $(LIBRARY) $(OBJS) + $(RANLIB) $(LIBRARY) + +$(VERSION) : + ($(MAKE) MAKE="$(MAKE)" clean) + echo "$(VERSION)" > $(VERSION) + +.c.o: $(INC_DIR)/ml-unixdep.h $(INC_DIR)/ml-base.h $(INC_DIR)/ml-values.h \ + $(CLIB_DIR)/ml-c.h \ + sockets-osdep.h sock-util.h cfun-proto-list.h cfun-list.h + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) -c $< + +clean : + rm -f v-* *.o $(LIBRARY) + diff --git a/base/runtime/c-libs/smlnj-sockets/makefile.win32 b/base/runtime/c-libs/smlnj-sockets/makefile.win32 new file mode 100644 index 0000000..a621797 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/makefile.win32 @@ -0,0 +1,256 @@ +# +# the makefile for the Sockets library +# win32 specific + +SHELL = + +INC_DIR = ..\..\include +CLIB_DIR = ..\ + +INCLUDES = /I$(INC_DIR) /I$(CLIB_DIR) /I..\..\objs + +MAKEFILE = makefile.win32 +MAKE = nmake /F$(MAKEFILE) +AR = lib +ARFLAGS = +RANLIB = lib + +LIBRARY = libsmlnj-sock.lib + +VERSION = v-dummy + +NETDB_OBJS = gethostname.obj \ + getnetbyname.obj \ + getnetbyaddr.obj \ + gethostbyname.obj \ + gethostbyaddr.obj \ + getprotbyname.obj \ + getprotbynum.obj \ + getservbyname.obj \ + getservbyport.obj + +CTL_OBJS = ctlBROADCAST.obj \ + ctlDEBUG.obj \ + ctlDONTROUTE.obj \ + ctlKEEPALIVE.obj \ + ctlLINGER.obj \ + ctlNODELAY.obj \ + ctlOOBINLINE.obj \ + ctlRCVBUF.obj \ + ctlREUSEADDR.obj \ + ctlSNDBUF.obj \ + getERROR.obj \ + getTYPE.obj \ + setNBIO.obj \ + getNREAD.obj \ + getATMARK.obj \ + getpeername.obj \ + getsockname.obj + +SOCK_OBJS = accept.obj \ + bind.obj \ + close.obj \ + connect.obj \ + listen.obj \ + socket.obj \ + shutdown.obj + +IO_OBJS = recv.obj \ + recvbuf.obj \ + recvfrom.obj \ + recvbuffrom.obj \ + sendbuf.obj \ + sendbufto.obj + +MISC_OBJS = list-addr-families.obj \ + list-sock-types.obj \ + getaddrfamily.obj \ + inetany.obj \ + from-inetaddr.obj \ + to-inetaddr.obj + +UTIL_OBJS = util-mkhostent.obj \ + util-mknetent.obj \ + util-mkservent.obj \ + util-sockopt.obj \ + tbl-sock-type.obj \ + tbl-addr-family.obj + +OBJS = smlnj-sock-lib.obj \ + $(NETDB_OBJS) $(CTL_OBJS) $(SOCK_OBJS) $(MISC_OBJS) \ + $(IO_OBJS) $(UTIL_OBJS) + + +$(LIBRARY) : $(VERSION) $(OBJS) + del /F /Q $(LIBRARY) + $(AR) $(ARFLAGS) /out:$(LIBRARY) $(OBJS) + $(RANLIB) /out:$(LIBRARY) + +$(VERSION) : + ($(MAKE) MAKE="$(MAKE)" clean) + echo "$(VERSION)" > $(VERSION) + +DEPENDENTS = $(INC_DIR)\ml-unixdep.h $(INC_DIR)\ml-base.h \ + $(INC_DIR)\ml-values.h \ + $(CLIB_DIR)\ml-c.h cfun-proto-list.h cfun-list.h + + +smlnj-sock-lib.obj: smlnj-sock-lib.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c smlnj-sock-lib.c + +list-addr-families.obj: list-addr-families.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c list-addr-families.c + +list-sock-types.obj: list-sock-types.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c list-sock-types.c + +socket.obj: socket.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c socket.c + +recvbuffrom.obj: recvbuffrom.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c recvbuffrom.c + +recvfrom.obj: recvfrom.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c recvfrom.c + +recvbuf.obj: recvbuf.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c recvbuf.c + +recv.obj: recv.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c recv.c + +sendbufto.obj: sendbufto.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c sendbufto.c + +sendbuf.obj: sendbuf.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c sendbuf.c + +shutdown.obj: shutdown.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c shutdown.c + +listen.obj: listen.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c listen.c + +connect.obj: connect.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c connect.c + +bind.obj: bind.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c bind.c + +accept.obj: accept.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c accept.c + +close.obj: close.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c close.c + +getaddrfamily.obj: getaddrfamily.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c getaddrfamily.c + +inetany.obj: inetany.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c inetany.c + +from-inetaddr.obj: from-inetaddr.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c from-inetaddr.c + +to-inetaddr.obj: to-inetaddr.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c to-inetaddr.c + +getsockname.obj: getsockname.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c getsockname.c + +getpeername.obj: getpeername.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c getpeername.c + +getservbyport.obj: getservbyport.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c getservbyport.c + +getservbyname.obj: getservbyname.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c getservbyname.c + +getprotbynum.obj: getprotbynum.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c getprotbynum.c + +getprotbyname.obj: getprotbyname.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c getprotbyname.c + +gethostbyname.obj: gethostbyname.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c gethostbyname.c + +gethostbyaddr.obj: gethostbyaddr.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c gethostbyaddr.c + +getnetbyname.obj: getnetbyname.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c getnetbyname.c + +getnetbyaddr.obj: getnetbyaddr.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c getnetbyaddr.c + +gethostname.obj: gethostname.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c gethostname.c + +util-mkhostent.obj: util-mkhostent.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c util-mkhostent.c + +util-mknetent.obj: util-mknetent.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c util-mknetent.c + +util-mkservent.obj: util-mkservent.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c util-mkservent.c + +util-sockopt.obj: util-sockopt.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c util-sockopt.c + +getATMARK.obj: getATMARK.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c getATMARK.c + +getNREAD.obj: getNREAD.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c getNREAD.c + +setNBIO.obj: setNBIO.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c setNBIO.c + +getERROR.obj: getERROR.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c getERROR.c + +getTYPE.obj: getTYPE.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c getTYPE.c + +ctlNODELAY.obj: ctlNODELAY.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c ctlNODELAY.c + +ctlRCVBUF.obj: ctlRCVBUF.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c ctlRCVBUF.c + +ctlSNDBUF.obj: ctlSNDBUF.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c ctlSNDBUF.c + +ctlOOBINLINE.obj: ctlOOBINLINE.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c ctlOOBINLINE.c + +ctlBROADCAST.obj: ctlBROADCAST.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c ctlBROADCAST.c + +ctlLINGER.obj: ctlLINGER.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c ctlLINGER.c + +ctlDONTROUTE.obj: ctlDONTROUTE.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c ctlDONTROUTE.c + +ctlKEEPALIVE.obj: ctlKEEPALIVE.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c ctlKEEPALIVE.c + +ctlREUSEADDR.obj: ctlREUSEADDR.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c ctlREUSEADDR.c + +ctlDEBUG.obj: ctlDEBUG.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c ctlDEBUG.c + +tbl-sock-type.obj: tbl-sock-type.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c tbl-sock-type.c + +tbl-addr-family.obj: tbl-addr-family.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c tbl-addr-family.c + + +clean : + del /F /Q v-* *.obj *.pdb $(LIBRARY) diff --git a/base/runtime/c-libs/smlnj-sockets/recv.c b/base/runtime/c-libs/smlnj-sockets/recv.c new file mode 100644 index 0000000..a3f4180 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/recv.c @@ -0,0 +1,59 @@ +/* recv.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_Sock_recv : (sock * int * bool * bool) -> int + * + * The arguments are: socket, number of bytes, OOB flag and peek flag; the + * result is the vector of bytes received. + */ +ml_val_t _ml_Sock_recv (ml_state_t *msp, ml_val_t arg) +{ + int sock = REC_SELINT(arg, 0); + int nbytes = REC_SELINT(arg, 1); + int flag = 0; + ml_val_t vec, res; + int m, n; + char *s; + + if (REC_SEL(arg, 2) == ML_true) flag |= MSG_OOB; + if (REC_SEL(arg, 3) == ML_true) flag |= MSG_PEEK; + + /* allocate the vector; note that this might cause a GC */ + vec = ML_AllocRaw (msp, BYTES_TO_WORDS(nbytes)); + + s = PTR_MLtoC(char, vec); + n = recv (sock, s, nbytes, flag); + + if (n < 0) + return RAISE_SYSERR(msp, sts); + else if (n == 0) + return ML_string0; + + /* pad the last word of the vector with zero bytes so that string pattern + * matching on the result will work. + */ + for (m = n; (m & (WORD_SZB-1)) != 0; m++) { + s[m] = '\0'; + } + + if (n < nbytes) { + /* we need to shrink the vector */ + ML_ShrinkRaw (msp, vec, BYTES_TO_WORDS(n)); + } + + SEQHDR_ALLOC (msp, res, DESC_string, vec, n); + + return res; + +} /* end of _ml_Sock_recv */ + diff --git a/base/runtime/c-libs/smlnj-sockets/recvbuf.c b/base/runtime/c-libs/smlnj-sockets/recvbuf.c new file mode 100644 index 0000000..2264c80 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/recvbuf.c @@ -0,0 +1,35 @@ +/* recvbuf.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_Sock_recvbuf : (sock * Word8Array.array * int * int * bool * bool) -> int + * + * The arguments are: socket, data buffer, start position, number of + * bytes, OOB flag and peek flag. + */ +ml_val_t _ml_Sock_recvbuf (ml_state_t *msp, ml_val_t arg) +{ + int sock = REC_SELINT(arg, 0); + ml_val_t buf = REC_SEL(arg, 1); + int nbytes = REC_SELINT(arg, 3); + char *start = STR_MLtoC(buf) + REC_SELINT(arg, 2); + int flag = 0; + int n; + + if (REC_SEL(arg, 4) == ML_true) flag |= MSG_OOB; + if (REC_SEL(arg, 5) == ML_true) flag |= MSG_PEEK; + + n = recv (sock, start, nbytes, flag); + + CHK_RETURN (msp, n) + +} /* end of _ml_Sock_recvbuf */ + diff --git a/base/runtime/c-libs/smlnj-sockets/recvbuffrom.c b/base/runtime/c-libs/smlnj-sockets/recvbuffrom.c new file mode 100644 index 0000000..feff4cb --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/recvbuffrom.c @@ -0,0 +1,49 @@ +/* recvbuffrom.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_Sock_recvbuffrom + * : (sock * Word8Array.array * int * int * bool * bool) -> (int * addr) + * + * The arguments are: socket, data buffer, start position, number of + * bytes, OOB flag and peek flag. The result is number of bytes read and + * the source address. + */ +ml_val_t _ml_Sock_recvbuffrom (ml_state_t *msp, ml_val_t arg) +{ + char addrBuf[MAX_SOCK_ADDR_SZB]; + socklen_t addrLen = MAX_SOCK_ADDR_SZB; + int sock = REC_SELINT(arg, 0); + ml_val_t buf = REC_SEL(arg, 1); + int nbytes = REC_SELINT(arg, 3); + char *start = STR_MLtoC(buf) + REC_SELINT(arg, 2); + int flag = 0; + int n; + + if (REC_SEL(arg, 4) == ML_true) flag |= MSG_OOB; + if (REC_SEL(arg, 5) == ML_true) flag |= MSG_PEEK; + + n = recvfrom (sock, start, nbytes, flag, (struct sockaddr *)addrBuf, &addrLen); + + if (n < 0) + return RAISE_SYSERR(msp, sts); + else { + ml_val_t data = ML_CData (msp, addrBuf, addrLen); + ml_val_t addr, res; + + SEQHDR_ALLOC (msp, addr, DESC_word8vec, data, addrLen); + REC_ALLOC2(msp, res, INT_CtoML(n), addr); + return res; + } + +} /* end of _ml_Sock_recvbuffrom */ + diff --git a/base/runtime/c-libs/smlnj-sockets/recvfrom.c b/base/runtime/c-libs/smlnj-sockets/recvfrom.c new file mode 100644 index 0000000..585af1d --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/recvfrom.c @@ -0,0 +1,61 @@ +/* recvbuffrom.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_Sock_recvfrom : (sock * int * bool * bool) -> (Word8Vector.vector * addr) + * + * The arguments are: socket, number of bytes, OOB flag and peek flag. The + * result is the vector of bytes read and the source address. + */ +ml_val_t _ml_Sock_recvfrom (ml_state_t *msp, ml_val_t arg) +{ + char addrBuf[MAX_SOCK_ADDR_SZB]; + socklen_t addrLen = MAX_SOCK_ADDR_SZB; + int sock = REC_SELINT(arg, 0); + int nbytes = REC_SELINT(arg, 1); + int flag = 0; + ml_val_t vec; + int n; + + if (REC_SEL(arg, 2) == ML_true) flag |= MSG_OOB; + if (REC_SEL(arg, 3) == ML_true) flag |= MSG_PEEK; + + /* allocate the vector; note that this might cause a GC */ + vec = ML_AllocRaw (msp, BYTES_TO_WORDS(nbytes)); + + n = recvfrom ( + sock, PTR_MLtoC(char, vec), nbytes, flag, + (struct sockaddr *)addrBuf, &addrLen); + + if (n < 0) + return RAISE_SYSERR(msp, sts); + else { + ml_val_t data = ML_CData (msp, addrBuf, addrLen); + ml_val_t addr, res; + + if (n == 0) + res = ML_string0; + else { + if (n < nbytes) + /* we need to shrink the vector */ + ML_ShrinkRaw (msp, vec, BYTES_TO_WORDS(n)); + SEQHDR_ALLOC (msp, res, DESC_string, vec, n); + } + + SEQHDR_ALLOC (msp, addr, DESC_word8vec, data, addrLen); + REC_ALLOC2(msp, res, res, addr); + + return res; + } + +} /* end of _ml_Sock_recvfrom */ + diff --git a/base/runtime/c-libs/smlnj-sockets/sendbuf.c b/base/runtime/c-libs/smlnj-sockets/sendbuf.c new file mode 100644 index 0000000..635531f --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/sendbuf.c @@ -0,0 +1,36 @@ +/* sendbuf.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_Sock_sendbuf : (sock * bytes * int * int * bool * bool) -> int + * + * Send data from the buffer; bytes is either a Word8Array.array, or + * a Word8Vector.vector. The arguemnts are: socket, data buffer, start + * position, number of bytes, OOB flag, and don't_route flag. + */ +ml_val_t _ml_Sock_sendbuf (ml_state_t *msp, ml_val_t arg) +{ + int sock = REC_SELINT(arg, 0); + ml_val_t buf = REC_SEL(arg, 1); + int nbytes = REC_SELINT(arg, 3); + char *data = STR_MLtoC(buf) + REC_SELINT(arg, 2); + int flgs, n; + + /* initialize the flags */ + flgs = 0; + if (REC_SEL(arg, 4) == ML_true) flgs |= MSG_OOB; + if (REC_SEL(arg, 5) == ML_true) flgs |= MSG_DONTROUTE; + + n = send (sock, data, nbytes, flgs); + + CHK_RETURN (msp, n); + +} /* end of _ml_Sock_sendbuf */ diff --git a/base/runtime/c-libs/smlnj-sockets/sendbufto.c b/base/runtime/c-libs/smlnj-sockets/sendbufto.c new file mode 100644 index 0000000..f928b65 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/sendbufto.c @@ -0,0 +1,40 @@ +/* sendbufto.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_Sock_sendbufto : (sock * bytes * int * int * bool * bool * addr) -> int + * + * Send data from the buffer; bytes is either a Word8Array.array, or + * a Word8Vector.vector. The arguments are: socket, data buffer, start + * position, number of bytes, OOB flag, don't_route flag, and destination address. + */ +ml_val_t _ml_Sock_sendbufto (ml_state_t *msp, ml_val_t arg) +{ + int sock = REC_SELINT(arg, 0); + ml_val_t buf = REC_SEL(arg, 1); + int nbytes = REC_SELINT(arg, 3); + char *data = STR_MLtoC(buf) + REC_SELINT(arg, 2); + ml_val_t addr = REC_SEL(arg, 6); + int flgs, n; + + /* initialize the flags. */ + flgs = 0; + if (REC_SEL(arg, 4) == ML_true) flgs |= MSG_OOB; + if (REC_SEL(arg, 5) == ML_true) flgs |= MSG_DONTROUTE; + + n = sendto ( + sock, data, nbytes, flgs, + GET_SEQ_DATAPTR(struct sockaddr, addr), GET_SEQ_LEN(addr)); + + CHK_RETURN (msp, n); + +} /* end of _ml_Sock_sendbufto */ diff --git a/base/runtime/c-libs/smlnj-sockets/setNBIO.c b/base/runtime/c-libs/smlnj-sockets/setNBIO.c new file mode 100644 index 0000000..dfd67cf --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/setNBIO.c @@ -0,0 +1,38 @@ +/* setNBIO.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" +#include "sock-util.h" + +/* _ml_Sock_setNBIO : (sock * bool) -> unit + */ +ml_val_t _ml_Sock_setNBIO (ml_state_t *msp, ml_val_t arg) +{ + int n, sts; + int sock = REC_SELINT(arg, 0); + +#ifdef USE_FCNTL_FOR_NBIO + n = fcntl(F_GETFL, sock); + if (n < 0) + return RAISE_SYSERR (msp, n); + if (REC_SEL(arg, 1) == ML_true) + n |= O_NONBLOCK; + else + n &= ~O_NONBLOCK; + sts = fcntl(F_SETFL, sock, n); +#else + n = (REC_SEL(arg, 1) == ML_true); + sts = ioctl (sock, FIONBIO, (unsigned long *)&n); +#endif + + CHK_RETURN_UNIT(msp, sts); + +} /* end of _ml_Sock_setNBIO */ diff --git a/base/runtime/c-libs/smlnj-sockets/shutdown.c b/base/runtime/c-libs/smlnj-sockets/shutdown.c new file mode 100644 index 0000000..653f675 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/shutdown.c @@ -0,0 +1,22 @@ +/* shutdown.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_Sock_shutdown : (sock * int) -> unit + */ +ml_val_t _ml_Sock_shutdown (ml_state_t *msp, ml_val_t arg) +{ + if (shutdown (REC_SELINT(arg, 0), REC_SELINT(arg, 1)) < 0) + return RAISE_SYSERR(msp, sts); + else + return ML_unit; + +} /* end of _ml_Sock_shutdown */ diff --git a/base/runtime/c-libs/smlnj-sockets/smlnj-sock-lib.c b/base/runtime/c-libs/smlnj-sockets/smlnj-sock-lib.c new file mode 100644 index 0000000..8ca3214 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/smlnj-sock-lib.c @@ -0,0 +1,44 @@ +/* smlnj-sock-lib.c + * + * COPYRIGHT (c) 1994 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "c-library.h" +#include "cfun-proto-list.h" + + +/* the table of C functions and ML names */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_BIND(NAME, FUNC, MLTYPE) +PVT cfunc_binding_t CFunTable[] = { +#include "cfun-list.h" + CFUNC_NULL_BIND + }; +#undef CFUNC + + +void init_fn(int argc, char **argv) +{ +#if defined(OPSYS_WIN32) + static int nCode = -1; + if( nCode!=0 ) + { + WSADATA wsaData; + nCode = WSAStartup(MAKEWORD(1, 1), &wsaData); + /* FIXME: what to do if WSAStartup fails (nCode!=0)? */ + } +#endif +} + + +/* the Sockets library */ +c_library_t SMLNJ_Sock_Library = { + CLIB_NAME, + CLIB_VERSION, + CLIB_DATE, + init_fn, + CFunTable + }; + diff --git a/base/runtime/c-libs/smlnj-sockets/sock-util.h b/base/runtime/c-libs/smlnj-sockets/sock-util.h new file mode 100644 index 0000000..dc93195 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/sock-util.h @@ -0,0 +1,24 @@ +/* sock-util.h + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * + * Utility functions for the network database and socket routines. + */ + +#ifndef _SOCK_UTIL_ +#define _SOCK_UTIL_ + +typedef struct hostent *hostent_ptr_t; +typedef struct netent *netent_ptr_t; +typedef struct servent *servent_ptr_t; + +extern ml_val_t _util_NetDB_mkhostent (ml_state_t *msp, hostent_ptr_t hentry); +extern ml_val_t _util_NetDB_mknetent (ml_state_t *msp, netent_ptr_t nentry); +extern ml_val_t _util_NetDB_mkservent (ml_state_t *msp, servent_ptr_t sentry); +extern ml_val_t _util_Sock_ControlFlg (ml_state_t *msp, ml_val_t arg, int option); + +extern sysconst_tbl_t _Sock_AddrFamily; +extern sysconst_tbl_t _Sock_Type; + +#endif /* !_SOCK_UTIL_ */ + diff --git a/base/runtime/c-libs/smlnj-sockets/socket.c b/base/runtime/c-libs/smlnj-sockets/socket.c new file mode 100644 index 0000000..736b034 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/socket.c @@ -0,0 +1,28 @@ +/* socket.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_Sock_socket : (int * int * int) -> sock + */ +ml_val_t _ml_Sock_socket (ml_state_t *msp, ml_val_t arg) +{ + int domain = REC_SELINT(arg, 0); + int type = REC_SELINT(arg, 1); + int protocol = REC_SELINT(arg, 2); + int sock; + + sock = socket (domain, type, protocol); + if (sock < 0) + return RAISE_SYSERR(msp, sts); + else + return INT_CtoML(sock); + +} /* end of _ml_Sock_socket */ diff --git a/base/runtime/c-libs/smlnj-sockets/socketpair.c b/base/runtime/c-libs/smlnj-sockets/socketpair.c new file mode 100644 index 0000000..f2614c9 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/socketpair.c @@ -0,0 +1,40 @@ +/* socketpair.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * + * NOTE: this file is UNIX specific. + */ + +#include "ml-unixdep.h" +#include "sockets-osdep.h" +#include INCLUDE_TYPES_H +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _ml_Sock_socketpair : (int * int * int) -> (sock * sock) + * + * Create a pair of sockets. The arguments are: domain (should be + * AF_UNIX), type, and protocol. + */ +ml_val_t _ml_Sock_socketpair (ml_state_t *msp, ml_val_t arg) +{ + int domain = REC_SELINT(arg, 0); + int type = REC_SELINT(arg, 1); + int protocol = REC_SELINT(arg, 2); + int sts, sock[2]; + + sts = socketpair (domain, type, protocol, sock); + + if (sts < 0) + return RAISE_SYSERR(msp, sts); + else { + ml_val_t res; + REC_ALLOC2(msp, res, INT_CtoML(sock[0]), INT_CtoML(sock[1])); + return res; + } + +} /* end of _ml_Sock_socketpair */ diff --git a/base/runtime/c-libs/smlnj-sockets/sockets-osdep.h b/base/runtime/c-libs/smlnj-sockets/sockets-osdep.h new file mode 100644 index 0000000..8c58dd5 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/sockets-osdep.h @@ -0,0 +1,69 @@ +/* sockets-osdep.h + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * + * O.S. specific dependencies needed by the sockets library. + */ + +#ifndef _SOCKETS_OSDEP_ +#define _SOCKETS_OSDEP_ + +#if defined(OPSYS_UNIX) +# define HAS_UNIX_DOMAIN +# define INCLUDE_SOCKET_H +# define INCLUDE_IN_H +# define INCLUDE_TCP_H +# define INCLUDE_UN_H + +# if defined(OPSYS_SOLARIS) +# define INCLUDE_RPCENT_H + +typedef char *sockoptval_t; /* The pointer type used to pass values to */ + /* getsockopt/setsockopt */ + +# define BSD_COMP /* needed to include FION* in ioctl.h */ + +# else +typedef void *sockoptval_t; /* The pointer type used to pass values to */ + /* getsockopt/setsockopt */ +# endif + +# if (defined(OPSYS_AIX)) +# define _SUN /* to get the rpcent definitions */ +# define SOCKADDR_HAS_LEN /* socket address has a length field */ +# endif + +# if (defined(OPSYS_FREEBSD) || defined (OPSYS_NETBSD) || defined (OPSYS_NETBSD2)) +# define i386 1 /* to avoid a bug in system header files */ +# define INCLUDE_RPCENT_H +# endif + +#include "ml-unixdep.h" +/* FIXME: The following includes are not needed in every file, yet they + cannot be moved to where they are used since that would break compilation + under Windows */ +#include INCLUDE_TYPES_H +#include INCLUDE_IN_H +#include INCLUDE_TCP_H +#include +#include +#include + +#elif defined(OPSYS_WIN32) || defined(OPSYS_CYGWIN) +# define INCLUDE_SOCKET_H + +/* This type is not defined in winsock2.h */ +typedef int socklen_t; + +/* FIXME: Is ioctlsocket() on Windows really the same as ioctl() on Unix? + It does seem so, yet the second parameter is of a different type */ +# define ioctl ioctlsocket + +typedef char *sockoptval_t; /* The pointer type used to pass values to */ + /* getsockopt/setsockopt */ +#endif + +#define MAX_SOCK_ADDR_SZB 1024 + +#endif /* !_SOCKETS_OSDEP_ */ + diff --git a/base/runtime/c-libs/smlnj-sockets/tbl-addr-family.c b/base/runtime/c-libs/smlnj-sockets/tbl-addr-family.c new file mode 100644 index 0000000..513ace1 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/tbl-addr-family.c @@ -0,0 +1,92 @@ +/* tbl-addr-family.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "sock-util.h" + +/** The table of address-family names **/ +PVT sys_const_t tbl[] = { + {AF_UNIX, "UNIX"}, + {AF_INET, "INET"}, +#ifdef AF_IMPLINK + {AF_IMPLINK, "IMPLINK"}, +#endif +#ifdef AF_PUP + {AF_PUP, "PUP"}, +#endif +#ifdef AF_CHAOS + {AF_CHAOS, "CHAOS"}, +#endif +#ifdef AF_NS + {AF_NS, "NS"}, +#endif +#ifdef AF_ISO + {AF_ISO, "ISO"}, +#endif +#ifdef AF_ECMA + {AF_ECMA, "ECMA"}, +#endif +#ifdef AF_DATAKIT + {AF_DATAKIT, "DATAKIT"}, +#endif +#ifdef AF_CCITT + {AF_CCITT, "CCITT"}, +#endif +#ifdef AF_SNA + {AF_SNA, "SNA"}, +#endif +#ifdef AF_DECnet + {AF_DECnet, "DECnet"}, +#endif +#ifdef AF_DLI + {AF_DLI, "DLI"}, +#endif +#ifdef AF_LAT + {AF_LAT, "LAT"}, +#endif +#ifdef AF_HYLINK + {AF_HYLINK, "HYLINK"}, +#endif +#ifdef AF_APPLETALK + {AF_APPLETALK, "APPLETALK"}, +#endif +#ifdef AF_ROUTE + {AF_ROUTE, "ROUTE"}, +#endif +#ifdef AF_RAW + {AF_RAW, "RAW"}, +#endif +#ifdef AF_LINK + {AF_LINK, "LINK"}, +#endif +#ifdef AF_NIT + {AF_NIT, "NIT"}, +#endif +#ifdef AF_802 + {AF_802, "802"}, +#endif +#ifdef AF_OSI + {AF_OSI, "OSI"}, +#endif +#ifdef AF_X25 + {AF_X25, "X25"}, +#endif +#ifdef AF_OSINET + {AF_OSINET, "OSINET"}, +#endif +#ifdef AF_GOSIP + {AF_GOSIP, "GOSIP"}, +#endif +#ifdef AF_SDL + {AF_SDL, "SDL"}, +#endif + }; + +sysconst_tbl_t _Sock_AddrFamily = { + sizeof(tbl) / sizeof(sys_const_t), + tbl + }; diff --git a/base/runtime/c-libs/smlnj-sockets/tbl-sock-type.c b/base/runtime/c-libs/smlnj-sockets/tbl-sock-type.c new file mode 100644 index 0000000..76e2ff1 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/tbl-sock-type.c @@ -0,0 +1,30 @@ +/* tbl-sock-type.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "sock-util.h" + +/** The table of socket-type names **/ +PVT sys_const_t tbl[] = { + {SOCK_STREAM, "STREAM"}, + {SOCK_DGRAM, "DGRAM"}, +#ifdef SOCK_RAW + {SOCK_RAW, "RAW"}, +#endif +#ifdef SOCK_RDM + {SOCK_RDM, "RDM"}, +#endif +#ifdef SOCK_SEQPACKET + {SOCK_SEQPACKET, "SEQPACKET"}, +#endif + }; + +sysconst_tbl_t _Sock_Type = { + sizeof(tbl) / sizeof(sys_const_t), + tbl + }; + diff --git a/base/runtime/c-libs/smlnj-sockets/to-inetaddr.c b/base/runtime/c-libs/smlnj-sockets/to-inetaddr.c new file mode 100644 index 0000000..38f1f17 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/to-inetaddr.c @@ -0,0 +1,40 @@ +/* to-inetaddr.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + + +/* _ml_Sock_toinetaddr : (in_addr * int) -> addr + * + * Given a INET address and port number, allocate a INET-domain socket address. + */ +ml_val_t _ml_Sock_toinetaddr (ml_state_t *msp, ml_val_t arg) +{ + ml_val_t inAddr = REC_SEL(arg, 0); + ml_val_t data, res; + struct sockaddr_in addr; + + memset(&addr, 0, sizeof(struct sockaddr_in)); + + addr.sin_family = AF_INET; + memcpy ( + &addr.sin_addr, + GET_SEQ_DATAPTR(char, inAddr), + sizeof(struct in_addr)); + addr.sin_port = htons(REC_SELINT(arg, 1)); + + data = ML_CData (msp, &addr, sizeof(struct sockaddr_in)); + SEQHDR_ALLOC (msp, res, DESC_word8vec, data, sizeof(struct sockaddr_in)); + + return res; + +} /* end of _ml_Sock_toinetaddr */ + diff --git a/base/runtime/c-libs/smlnj-sockets/to-unixaddr.c b/base/runtime/c-libs/smlnj-sockets/to-unixaddr.c new file mode 100644 index 0000000..43110c7 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/to-unixaddr.c @@ -0,0 +1,46 @@ +/* to-unixaddr.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "ml-unixdep.h" +#include "sockets-osdep.h" +#include INCLUDE_TYPES_H +#include INCLUDE_SOCKET_H +#include INCLUDE_UN_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + + +/* _ml_Sock_tounixaddr : string -> addr + * + * Given a path, allocate a UNIX-domain socket address. + */ +ml_val_t _ml_Sock_tounixaddr (ml_state_t *msp, ml_val_t arg) +{ + char *path = STR_MLtoC(arg); + struct sockaddr_un addr; + int len; + ml_val_t data, res; + + memset(&addr, 0, sizeof(struct sockaddr_un)); + + addr.sun_family = AF_UNIX; + strcpy (addr.sun_path, path); +#ifdef SOCKADDR_HAS_LEN + len = strlen(path)+sizeof(addr.sun_len)+sizeof(addr.sun_family)+1; + addr.sun_len = len; +#else + len = strlen(path)+sizeof(addr.sun_family)+1; +#endif + + data = ML_CData (msp, &addr, len); + SEQHDR_ALLOC (msp, res, DESC_word8vec, data, len); + + return res; + +} /* end of _ml_Sock_tounixaddr */ + diff --git a/base/runtime/c-libs/smlnj-sockets/util-mkhostent.c b/base/runtime/c-libs/smlnj-sockets/util-mkhostent.c new file mode 100644 index 0000000..4fab8f0 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/util-mkhostent.c @@ -0,0 +1,48 @@ +/* util-mkhostent.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "sock-util.h" + +/* _util_NetDB_mkhostent: + * + * Allocate an ML value of type + * (string * string list * addr_family * addr list) option + * to represent a struct hostent value. + * + * NOTE: we should probably be passing back the value of h_errno, but this + * will require an API change at the SML level. + */ +ml_val_t _util_NetDB_mkhostent (ml_state_t *msp, struct hostent *hentry) +{ + if (hentry == NIL(struct hostent *)) + return OPTION_NONE; + else { + /* build the return result */ + ml_val_t name, aliases, af, addr, addrs, res; + int nAddrs, i; + + name = ML_CString(msp, hentry->h_name); + aliases = ML_CStringList(msp, hentry->h_aliases); + af = ML_SysConst (msp, &_Sock_AddrFamily, hentry->h_addrtype); + for (nAddrs = 0; hentry->h_addr_list[nAddrs] != NIL(char *); nAddrs++) + continue; + for (i = nAddrs, addrs = LIST_nil; --i >= 0; ) { + addr = ML_AllocString (msp, hentry->h_length); + memcpy (GET_SEQ_DATAPTR(void, addr), hentry->h_addr_list[i], + hentry->h_length); + LIST_cons(msp, addrs, addr, addrs); + } + REC_ALLOC4 (msp, res, name, aliases, af, addrs); + OPTION_SOME (msp, res, res); + return res; + } + +} /* end of _util_NetDB_mkhostent */ diff --git a/base/runtime/c-libs/smlnj-sockets/util-mknetent.c b/base/runtime/c-libs/smlnj-sockets/util-mknetent.c new file mode 100644 index 0000000..18f0d5a --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/util-mknetent.c @@ -0,0 +1,36 @@ +/* util-mknetent.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "sock-util.h" + +/* _util_NetDB_mknetent: + * + * Allocate an ML value of type + * (string * string list * addr_family * sysword) option + * to represent a struct netent value. + */ +ml_val_t _util_NetDB_mknetent (ml_state_t *msp, struct netent *nentry) +{ + if (nentry == NIL(struct netent *)) + return OPTION_NONE; + else { + /* build the return result */ + ml_val_t name, aliases, af, net, res; + + name = ML_CString(msp, nentry->n_name); + aliases = ML_CStringList(msp, nentry->n_aliases); + af = ML_SysConst (msp, &_Sock_AddrFamily, nentry->n_addrtype); + WORD_ALLOC(msp, net, (Word_t)(nentry->n_net)); + REC_ALLOC4 (msp, res, name, aliases, af, net); + OPTION_SOME (msp, res, res); + return res; + } +} /* end of _util_NetDB_mknetent */ diff --git a/base/runtime/c-libs/smlnj-sockets/util-mkservent.c b/base/runtime/c-libs/smlnj-sockets/util-mkservent.c new file mode 100644 index 0000000..1a659e5 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/util-mkservent.c @@ -0,0 +1,38 @@ +/* util-mkservent.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "sock-util.h" + +/* _util_NetDB_mkservent: + * + * Allocate an ML value of type: + * (string * string list * int * string) option + * to represent a struct servent value. Note that the port number is returned + * in network byteorder, so we need to map it to host order. + */ +ml_val_t _util_NetDB_mkservent (ml_state_t *msp, struct servent *sentry) +{ + if (sentry == NIL(struct servent *)) + return OPTION_NONE; + else { + /* build the return result */ + ml_val_t name, aliases, port, proto, res; + + name = ML_CString(msp, sentry->s_name); + aliases = ML_CStringList(msp, sentry->s_aliases); + port = INT_CtoML(ntohs(sentry->s_port)); + proto = ML_CString(msp, sentry->s_proto); + REC_ALLOC4 (msp, res, name, aliases, port, proto); + OPTION_SOME (msp, res, res); + return res; + } + +} /* end of _util_NetDB_mkservent */ diff --git a/base/runtime/c-libs/smlnj-sockets/util-sockopt.c b/base/runtime/c-libs/smlnj-sockets/util-sockopt.c new file mode 100644 index 0000000..34a3a34 --- /dev/null +++ b/base/runtime/c-libs/smlnj-sockets/util-sockopt.c @@ -0,0 +1,40 @@ +/* util-sockopt.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + */ + +#include "sockets-osdep.h" +#include INCLUDE_SOCKET_H +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" +#include "cfun-proto-list.h" + +/* _util_Sock_ControlFlg: + * + * This utility routine gets/sets a boolean socket option. + */ +ml_val_t _util_Sock_ControlFlg (ml_state_t *msp, ml_val_t arg, int option) +{ + int sock = REC_SELINT(arg, 0); + ml_val_t ctl = REC_SEL(arg, 1); + int flg, sts; + + if (ctl == OPTION_NONE) { + socklen_t optSz = sizeof(int); + sts = getsockopt (sock, SOL_SOCKET, option, (sockoptval_t)&flg, &optSz); + ASSERT((sts < 0) || (optSz == sizeof(int))); + } + else { + flg = INT_MLtoC(OPTION_get(ctl)); + sts = setsockopt (sock, SOL_SOCKET, option, (sockoptval_t)&flg, sizeof(int)); + } + + if (sts < 0) + return RAISE_SYSERR(msp, sts); + else + return (flg ? ML_true : ML_false); + +} /* end of _util_Sock_ControlFlg */ + diff --git a/base/runtime/c-libs/smlnj-time/cfun-list.h b/base/runtime/c-libs/smlnj-time/cfun-list.h new file mode 100644 index 0000000..9c8ab57 --- /dev/null +++ b/base/runtime/c-libs/smlnj-time/cfun-list.h @@ -0,0 +1,16 @@ +/*! \file cfun-list.h + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This file lists the directory library of C functions that are callable by ML. + */ + +#ifndef CLIB_NAME +#define CLIB_NAME "SMLNJ-Time" +#define CLIB_VERSION "1.1" +#define CLIB_DATE "June 9, 2019" +#endif + +CFUNC("gettime", _ml_Time_gettime, "") +CFUNC("timeofday", _ml_Time_timeofday, "") diff --git a/base/runtime/c-libs/smlnj-time/cfun-proto-list.h b/base/runtime/c-libs/smlnj-time/cfun-proto-list.h new file mode 100644 index 0000000..16b3e96 --- /dev/null +++ b/base/runtime/c-libs/smlnj-time/cfun-proto-list.h @@ -0,0 +1,18 @@ +/* cfun-proto-list.h + * + * COPYRIGHT (c) 1194 AT&T Bell Laboratories. + */ + +#ifndef _CFUN_PROTO_LIST_ +#define _CFUN_PROTO_LIST_ + +#ifndef _C_LIBRARY_ +# include "c-library.h" +#endif + +/* the external definitions for the C functions */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_PROTO(NAME, FUNC, MLTYPE) +#include "cfun-list.h" +#undef CFUNC + +#endif /* !_CFUN_PROTO_LIST_ */ diff --git a/base/runtime/c-libs/smlnj-time/gettime.c b/base/runtime/c-libs/smlnj-time/gettime.c new file mode 100644 index 0000000..ca1ea86 --- /dev/null +++ b/base/runtime/c-libs/smlnj-time/gettime.c @@ -0,0 +1,36 @@ +/* gettime.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "vproc-state.h" +#include "ml-state.h" +#include "ml-timer.h" +#include "cfun-proto-list.h" + +/* _ml_Time_gettime : unit -> Int64.int * Int64.int * Int64.int + * + * Return the total CPU time, system time and garbage collection time used by this + * process so far. + */ +ml_val_t _ml_Time_gettime (ml_state_t *msp, ml_val_t arg) +{ + Time_t t, s; + ml_val_t cpuT, sysT, gcT, res; + vproc_state_t *vsp = msp->ml_vproc; + + GetCPUTime (&t, &s); + + cpuT = ML_AllocNanoseconds(msp, t.seconds, t.uSeconds); + sysT = ML_AllocNanoseconds(msp, s.seconds, s.uSeconds); + gcT = ML_AllocNanoseconds(msp, vsp->vp_gcTime->seconds, vsp->vp_gcTime->uSeconds); + + REC_ALLOC3(msp, res, cpuT, sysT, gcT); + + return res; + +} /* end of _ml_Time_gettime */ diff --git a/base/runtime/c-libs/smlnj-time/makefile b/base/runtime/c-libs/smlnj-time/makefile new file mode 100644 index 0000000..7d8a3e1 --- /dev/null +++ b/base/runtime/c-libs/smlnj-time/makefile @@ -0,0 +1,40 @@ +# +# the makefile for the Time library +# + +SHELL = /bin/sh + +INC_DIR = ../../include +CLIB_DIR = ../ + +INCLUDES = -I$(INC_DIR) -I$(CLIB_DIR) -I../../objs + +MAKE = make +AR = ar +ARFLAGS = rcv +RANLIB = ranlib + +LIBRARY = libsmlnj-time.a + +VERSION = v-dummy + +OBJS = smlnj-time-lib.o \ + gettime.o \ + timeofday.o + +$(LIBRARY) : $(VERSION) $(OBJS) + rm -rf $(LIBRARY) + $(AR) $(ARFLAGS) $(LIBRARY) $(OBJS) + $(RANLIB) $(LIBRARY) + +$(VERSION) : + ($(MAKE) MAKE="$(MAKE)" clean) + echo "$(VERSION)" > $(VERSION) + +.c.o: $(INC_DIR)/ml-unixdep.h $(INC_DIR)/ml-base.h $(INC_DIR)/ml-values.h \ + $(CLIB_DIR)/ml-c.h cfun-proto-list.h cfun-list.h + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) -c $< + +clean : + rm -f v-* *.o $(LIBRARY) + diff --git a/base/runtime/c-libs/smlnj-time/makefile.win32 b/base/runtime/c-libs/smlnj-time/makefile.win32 new file mode 100644 index 0000000..6b676cc --- /dev/null +++ b/base/runtime/c-libs/smlnj-time/makefile.win32 @@ -0,0 +1,50 @@ +# +# the makefile for the Time library +# win32 specific + +SHELL = + +INC_DIR = ..\..\include +CLIB_DIR = ..\ + +INCLUDES = /I$(INC_DIR) /I$(CLIB_DIR) /I..\..\objs + +MAKEFILE = makefile.win32 +MAKE = nmake /F$(MAKEFILE) +AR = lib +ARFLAGS = +RANLIB = lib + +LIBRARY = libsmlnj-time.lib + +VERSION = v-dummy + +OBJS = smlnj-time-lib.obj \ + gettime.obj \ + timeofday.obj + +$(LIBRARY) : $(VERSION) $(OBJS) + del /F /Q $(LIBRARY) + $(AR) $(ARFLAGS) /out:$(LIBRARY) $(OBJS) + $(RANLIB) /out:$(LIBRARY) + +$(VERSION) : + ($(MAKE) MAKE="$(MAKE)" clean) + echo "$(VERSION)" > $(VERSION) + +DEPENDENTS = $(INC_DIR)\ml-unixdep.h $(INC_DIR)\ml-base.h \ + $(INC_DIR)\ml-values.h \ + $(CLIB_DIR)\ml-c.h cfun-proto-list.h cfun-list.h + +smlnj-time-lib.obj: smlnj-time-lib.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c smlnj-time-lib.c + +gettime.obj: gettime.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c gettime.c + +timeofday.obj: timeofday.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c timeofday.c + +clean : + del /F /Q v-* *.obj *.pdb $(LIBRARY) + diff --git a/base/runtime/c-libs/smlnj-time/smlnj-time-lib.c b/base/runtime/c-libs/smlnj-time/smlnj-time-lib.c new file mode 100644 index 0000000..aac5cc5 --- /dev/null +++ b/base/runtime/c-libs/smlnj-time/smlnj-time-lib.c @@ -0,0 +1,28 @@ +/* smlnj-time-lib.c + * + * COPYRIGHT (c) 1994 AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "c-library.h" +#include "cfun-proto-list.h" + + +/* the table of C functions and ML names */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_BIND(NAME, FUNC, MLTYPE) +PVT cfunc_binding_t CFunTable[] = { +#include "cfun-list.h" + CFUNC_NULL_BIND + }; +#undef CFUNC + + +/* the Time library */ +c_library_t SMLNJ_Time_Library = { + CLIB_NAME, + CLIB_VERSION, + CLIB_DATE, + NIL(clib_init_fn_t), + CFunTable + }; + diff --git a/base/runtime/c-libs/smlnj-time/timeofday.c b/base/runtime/c-libs/smlnj-time/timeofday.c new file mode 100644 index 0000000..5b9c455 --- /dev/null +++ b/base/runtime/c-libs/smlnj-time/timeofday.c @@ -0,0 +1,49 @@ +/* timeofday.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +# include "ml-osdep.h" +#if defined(OPSYS_WIN32) +# include +#elif defined(HAS_GETTIMEOFDAY) +# include +#else +# error no timeofday mechanism +#endif +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "cfun-proto-list.h" + +/* _ml_Time_timeofday : unit -> Word64.word + * + * Return the UTC time of day in nanoseconds. + */ +ml_val_t _ml_Time_timeofday (ml_state_t *msp, ml_val_t arg) +{ +#if defined(OPSYS_UNIX) + struct timeval t; + + gettimeofday (&t, NIL(struct timezone *)); + + return ML_AllocNanoseconds(msp, t.tv_sec, t.tv_usec); +#elif defined(OPSYS_WIN32) + FILETIME ft; + ULARGE_INTEGER uli; + Unsigned64_t ns; + + GetSystemTimeAsFileTime (&ft); + + /* convert to nanoseconds; FILETIME is in units of 100ns */ + uli.HighPart = ft.dwHighDateTime; + uli.LowPart = ft.dwLowDateTime; + ns = 100 * uli.QuadPart; + + return ML_AllocWord64(msp, ns); +#else +# error no timeofday mechanism +#endif + +} /* end of _ml_Time_timeofday */ diff --git a/base/runtime/c-libs/templates/README b/base/runtime/c-libs/templates/README new file mode 100644 index 0000000..722a0b8 --- /dev/null +++ b/base/runtime/c-libs/templates/README @@ -0,0 +1,16 @@ +These are template files for the glue needed to put an ML callable +library together. See the "HOWTO-ADD-C-CODE" file in the "notes" +directory for complete details. The files are used as follows: + + makefile a makefile template; define the make variables LIBRARY + and OBJS. + + cfun-proto-list.h copy this file unchanged + + cfun-list.h copy this file, and replace the fields in <<>>. Add + a CFUNC entry for each C function that is callable from + ML. + + library-template.c copy and rename this file; change the <> + to the name of your library table. + diff --git a/base/runtime/c-libs/templates/cfun-list.h b/base/runtime/c-libs/templates/cfun-list.h new file mode 100644 index 0000000..1a359a9 --- /dev/null +++ b/base/runtime/c-libs/templates/cfun-list.h @@ -0,0 +1,15 @@ +/* cfun-list.h + * + * COPYRIGHT (c) 1994 AT&T Bell Laboratories. + * + * This file lists the directory library of C functions that are callable by ML. + */ + +#ifndef CLIB_NAME +#define CLIB_NAME "<>" +#define CLIB_VERSION "<>" +#define CLIB_DATE "<>" +#endif + +CFUNC("<>", <>, "<>") + diff --git a/base/runtime/c-libs/templates/cfun-proto-list.h b/base/runtime/c-libs/templates/cfun-proto-list.h new file mode 100644 index 0000000..16b3e96 --- /dev/null +++ b/base/runtime/c-libs/templates/cfun-proto-list.h @@ -0,0 +1,18 @@ +/* cfun-proto-list.h + * + * COPYRIGHT (c) 1194 AT&T Bell Laboratories. + */ + +#ifndef _CFUN_PROTO_LIST_ +#define _CFUN_PROTO_LIST_ + +#ifndef _C_LIBRARY_ +# include "c-library.h" +#endif + +/* the external definitions for the C functions */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_PROTO(NAME, FUNC, MLTYPE) +#include "cfun-list.h" +#undef CFUNC + +#endif /* !_CFUN_PROTO_LIST_ */ diff --git a/base/runtime/c-libs/templates/makefile b/base/runtime/c-libs/templates/makefile new file mode 100644 index 0000000..76ba760 --- /dev/null +++ b/base/runtime/c-libs/templates/makefile @@ -0,0 +1,38 @@ +# +# a template makefile for a C function library +# + +SHELL = /bin/sh + +INC_DIR = ../../include +CLIB_DIR = ../ + +INCLUDES = -I$(INC_DIR) -I$(CLIB_DIR) -I../../objs + +MAKE = make +AR = ar +ARFLAGS = rcv +RANLIB = ranlib + +LIBRARY = <> + +VERSION = v-dummy + +OBJS = <> + +$(LIBRARY) : $(VERSION) $(OBJS) + rm -rf $(LIBRARY) + $(AR) $(ARFLAGS) $(LIBRARY) $(OBJS) + $(RANLIB) $(LIBRARY) + +$(VERSION) : + ($(MAKE) MAKE="$(MAKE)" clean) + echo "$(VERSION)" > $(VERSION) + +.c.o: $(INC_DIR)/ml-unixdep.h $(INC_DIR)/ml-base.h $(INC_DIR)/ml-values.h \ + $(CLIB_DIR)/ml-c.h cfun-proto-list.h cfun-list.h + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) -c $< + +clean : + rm -f v-* *.o $(LIBRARY) + diff --git a/base/runtime/c-libs/templates/template-lib.c b/base/runtime/c-libs/templates/template-lib.c new file mode 100644 index 0000000..fe775bd --- /dev/null +++ b/base/runtime/c-libs/templates/template-lib.c @@ -0,0 +1,28 @@ +/* <> + * + * COPYRIGHT (c) 1994 AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "c-library.h" +#include "cfun-proto-list.h" + + +/* the table of C functions and ML names */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_BIND(NAME, FUNC, MLTYPE) +PVT cfunc_binding_t CFunTable[] = { +#include "cfun-list.h" + CFUNC_NULL_BIND + }; +#undef CFUNC + + +/* the <> library */ +c_library_t <> = { + CLIB_NAME, + CLIB_VERSION, + CLIB_DATE, + NIL(clib_init_fn_t), + CFunTable + }; + diff --git a/base/runtime/c-libs/unix-raise-syserr.c b/base/runtime/c-libs/unix-raise-syserr.c new file mode 100644 index 0000000..85ae7cb --- /dev/null +++ b/base/runtime/c-libs/unix-raise-syserr.c @@ -0,0 +1,85 @@ +/* unix-raise-syserr.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-unixdep.h" +#ifdef HAS_STRERROR +# include +#endif +#include +#include +#include "ml-base.h" +#include "ml-state.h" +#include "ml-objects.h" +#include "ml-globals.h" +#include "ml-c.h" + + +#ifndef HAS_STRERROR +/* strerror: + * An implementation of strerror for those systems that do not provide it. + */ +PVT char *strerror (int errnum) +{ + extern int sys_nerr; + extern char *sys_errlist[]; + + if ((errnum < 0) || (sys_nerr <= errnum)) + return ""; + else + return sys_errlist[errnum]; + +} /* end of strerror */ +#endif + + +/* RaiseSysError: + * + * Raise the ML exception SysErr, which has the spec: + * + * exception SysErr of (string * syserror option) + * + * For the time being, we use the errno value as the syserror; eventually that + * will be represented by an (int * string) pair. If alt_msg is non-zero, + * then use it as the error string and use NONE for the syserror. + */ +ml_val_t RaiseSysError (ml_state_t *msp, const char *altMsg, const char *at) +{ + ml_val_t s, atStk, syserror, arg, exn; + const char *msg; + char buf[32]; + + if (altMsg != NIL(char *)) { + msg = altMsg; + syserror = OPTION_NONE; + } + else if ((msg = strerror(errno)) != NIL(char *)) { + OPTION_SOME(msp, syserror, INT_CtoML(errno)) + } + else { + sprintf(buf, "", errno); + msg = buf; + OPTION_SOME(msp, syserror, INT_CtoML(errno)); + } + +#if (defined(DEBUG_OS_INTERFACE) || defined(DEBUG_TRACE_CCALL)) + SayDebug ("RaiseSysError: errno = %d, msg = \"%s\"\n", + (altMsg != NIL(char *)) ? -1 : errno, msg); +#endif + + s = ML_CString (msp, msg); + if (at != NIL(char *)) { + ml_val_t atMsg = ML_CString (msp, at); + LIST_cons(msp, atStk, atMsg, LIST_nil); + } + else + atStk = LIST_nil; + REC_ALLOC2 (msp, arg, s, syserror); + EXN_ALLOC (msp, exn, PTR_CtoML(SysErrId), arg, atStk); + + RaiseMLExn (msp, exn); + + return exn; + +} /* end of RaiseSysError */ diff --git a/base/runtime/c-libs/win32-filesys/README b/base/runtime/c-libs/win32-filesys/README new file mode 100644 index 0000000..d41f8be --- /dev/null +++ b/base/runtime/c-libs/win32-filesys/README @@ -0,0 +1,2 @@ +this directory contains C functions that interface to the win32 filesystem +and are required for supporting the basis diff --git a/base/runtime/c-libs/win32-filesys/cfun-list.h b/base/runtime/c-libs/win32-filesys/cfun-list.h new file mode 100644 index 0000000..94b6ec0 --- /dev/null +++ b/base/runtime/c-libs/win32-filesys/cfun-list.h @@ -0,0 +1,31 @@ +/* cfun-list.h + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * win32 C functions for IO + */ + +#ifndef CLIB_NAME +#define CLIB_NAME "WIN32-FILESYS" +#define CLIB_VERSION "0.1" +#define CLIB_DATE "October 15, 1996" +#endif + +CFUNC("find_first_file", _ml_win32_FS_find_first_file, "") +CFUNC("find_next_file", _ml_win32_FS_find_next_file, "") +CFUNC("find_close", _ml_win32_FS_find_close, "") +CFUNC("set_current_directory", _ml_win32_FS_set_current_directory, "") +CFUNC("get_current_directory", _ml_win32_FS_get_current_directory, "") +CFUNC("create_directory", _ml_win32_FS_create_directory, "") +CFUNC("remove_directory", _ml_win32_FS_remove_directory, "") +CFUNC("get_file_attributes", _ml_win32_FS_get_file_attributes, "") +CFUNC("get_file_attributes_by_handle", _ml_win32_FS_get_file_attributes_by_handle, "") +CFUNC("get_full_path_name", _ml_win32_FS_get_full_path_name, "") +CFUNC("get_file_size", _ml_win32_FS_get_file_size, "") +CFUNC("get_file_size_by_name", _ml_win32_FS_get_file_size_by_name, "") +CFUNC("get_file_time", _ml_win32_FS_get_file_time, "") +CFUNC("set_file_time", _ml_win32_FS_set_file_time, "") +CFUNC("delete_file", _ml_win32_FS_delete_file, "") +CFUNC("move_file", _ml_win32_FS_move_file, "") +CFUNC("get_temp_file_name", _ml_win32_FS_get_temp_file_name, "") diff --git a/base/runtime/c-libs/win32-filesys/cfun-proto-list.h b/base/runtime/c-libs/win32-filesys/cfun-proto-list.h new file mode 100644 index 0000000..a7d5d07 --- /dev/null +++ b/base/runtime/c-libs/win32-filesys/cfun-proto-list.h @@ -0,0 +1,18 @@ +/* cfun-proto-list.h + * + * COPYRIGHT (c) 1996 Bell Laboratories, Lucent Technologies + */ + +#ifndef _CFUN_PROTO_LIST_ +#define _CFUN_PROTO_LIST_ + +#ifndef _C_LIBRARY_ +# include "c-library.h" +#endif + +/* the external definitions for the C functions */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_PROTO(NAME, FUNC, MLTYPE) +#include "cfun-list.h" +#undef CFUNC + +#endif /* !_CFUN_PROTO_LIST_ */ diff --git a/base/runtime/c-libs/win32-filesys/makefile.win32 b/base/runtime/c-libs/win32-filesys/makefile.win32 new file mode 100644 index 0000000..6e2a00c --- /dev/null +++ b/base/runtime/c-libs/win32-filesys/makefile.win32 @@ -0,0 +1,47 @@ +# +# the makefile for the Win32 filesystem library +# + +SHELL = + +INC_DIR = ..\..\include +CLIB_DIR = ..\ + +INCLUDES = /I$(INC_DIR) /I$(CLIB_DIR) /I..\..\objs + +MAKEFILE = makefile.win32 +MAKE = nmake /F$(MAKEFILE) +AR = lib +ARFLAGS = +RANLIB = lib + +LIBRARY = libwin32-filesys.lib + +VERSION = v-dummy + +OBJS = win32-filesys-lib.obj \ + win32-filesys.obj + +$(LIBRARY) : $(VERSION) $(OBJS) + del /F /Q $(LIBRARY) + $(AR) $(ARFLAGS) /out:$(LIBRARY) $(OBJS) + $(RANLIB) /out:$(LIBRARY) + +$(VERSION) : + ($(MAKE) MAKE="$(MAKE)" clean) + echo "$(VERSION)" > $(VERSION) + +DEPENDENTS = $(CLIB_DIR)\ml-c.h cfun-proto-list.h cfun-list.h $(INC_DIR)\c-library.h + + +win32-filesys-lib.obj: win32-filesys-lib.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c win32-filesys-lib.c + +win32-filesys.obj: win32-filesys.c $(DEPENDENTS) \ + $(INC_DIR)\ml-base.h $(INC_DIR)\ml-values.h $(INC_DIR)\ml-objects.h + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c win32-filesys.c + +clean: + del /F /Q v-* *.obj *.pdb $(LIBRARY) + + diff --git a/base/runtime/c-libs/win32-filesys/win32-filesys-lib.c b/base/runtime/c-libs/win32-filesys/win32-filesys-lib.c new file mode 100644 index 0000000..9c462cb --- /dev/null +++ b/base/runtime/c-libs/win32-filesys/win32-filesys-lib.c @@ -0,0 +1,28 @@ +/* win32-io-lib.c + * + * COPYRIGHT (c) 1996 Bell Laboratories, Lucent Technologies + */ + +#include "ml-base.h" +#include "c-library.h" +#include "cfun-proto-list.h" + + +/* the table of C functions and ML names */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_BIND(NAME, FUNC, MLTYPE) +PVT cfunc_binding_t CFunTable[] = { +#include "cfun-list.h" + CFUNC_NULL_BIND + }; +#undef CFUNC + + +/* the Signals library */ +c_library_t WIN32_FileSys_Library = { + CLIB_NAME, + CLIB_VERSION, + CLIB_DATE, + NIL(clib_init_fn_t), + CFunTable + }; + diff --git a/base/runtime/c-libs/win32-filesys/win32-filesys.c b/base/runtime/c-libs/win32-filesys/win32-filesys.c new file mode 100644 index 0000000..44e68e8 --- /dev/null +++ b/base/runtime/c-libs/win32-filesys/win32-filesys.c @@ -0,0 +1,368 @@ +/*! \file win32-filesys.c + * + * Interface to win32 filesys functions + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" + +#define TMP_PREFIX "TMP-SMLNJ" + +#define IS_DOTDIR(c) ((c)[0] == '.' && (!(c)[1] || ((c)[1] == '.' && !(c)[2]))) + +static WIN32_FIND_DATA wfd; + +static ml_val_t find_next_file (ml_state_t *msp, HANDLE h) +{ + ml_val_t fname_opt,fname; + + loop: + if (FindNextFile(h,&wfd)) { + if (IS_DOTDIR(wfd.cFileName)) + /* skip "." and ".." */ + goto loop; + fname = ML_CString(msp,wfd.cFileName); + OPTION_SOME(msp,fname_opt,fname); + } + else { + fname_opt = OPTION_NONE; + } + return fname_opt; +} + +/* _ml_win32_FS_find_next_file : handle -> (string option) + */ +ml_val_t _ml_win32_FS_find_next_file (ml_state_t *msp, ml_val_t arg) +{ + return find_next_file(msp, HANDLE_MLtoC(arg)); +} + +/* _ml_win32_FS_find_first_file : string -> (handle * string option) + */ +ml_val_t _ml_win32_FS_find_first_file (ml_state_t *msp, ml_val_t arg) +{ + HANDLE h = FindFirstFile(STR_MLtoC(arg), &wfd); + ml_val_t fname_opt, fname, ml_h, res; + + if (h != INVALID_HANDLE_VALUE) { + if (IS_DOTDIR(wfd.cFileName)) { + fname_opt = find_next_file(msp, h); + } + else { + fname = ML_CString(msp, wfd.cFileName); + OPTION_SOME(msp, fname_opt, fname); + } + } + else { + fname_opt = OPTION_NONE; + } + + ml_h = HANDLE_CtoML(msp, h); + REC_ALLOC2(msp, res, ml_h, fname_opt); + + return res; +} + +/* _ml_win32_FS_find_close : handle -> bool + */ +ml_val_t _ml_win32_FS_find_close (ml_state_t *msp, ml_val_t arg) +{ + return FindClose(HANDLE_MLtoC(arg)) ? ML_true : ML_false; +} + +/* _ml_win32_FS_set_current_directory : string -> bool + */ +ml_val_t _ml_win32_FS_set_current_directory (ml_state_t *msp, ml_val_t arg) +{ + return SetCurrentDirectory(STR_MLtoC(arg)) ? ML_true : ML_false; +} + +/* _ml_win32_FS_get_current_directory : unit -> string + */ +ml_val_t _ml_win32_FS_get_current_directory (ml_state_t *msp, ml_val_t arg) +{ + char buf[MAX_PATH]; + DWORD r = GetCurrentDirectory(MAX_PATH, buf); + + if (r == 0 || r > MAX_PATH) { + return RAISE_SYSERR(msp, -1); + } + else { + return ML_CString(msp,buf); + } +} + + +/* _ml_win32_FS_create_directory : string -> bool + */ +ml_val_t _ml_win32_FS_create_directory (ml_state_t *msp, ml_val_t arg) +{ +#ifdef DEBUG_WIN32 + BOOL sts = CreateDirectory(STR_MLtoC(arg), NULL); + if (sts) { + return ML_true; + } else { + SayDebug("create_directory(%s) failed; error = %d\n", STR_MLtoC(arg), GetLastError()); + return ML_false; + } +#else + return CreateDirectory(STR_MLtoC(arg), NULL) ? ML_true : ML_false; +#endif +} + +/* _ml_win32_FS_remove_directory : string -> bool + */ +ml_val_t _ml_win32_FS_remove_directory (ml_state_t *msp, ml_val_t arg) +{ + return RemoveDirectory(STR_MLtoC(arg)) ? ML_true : ML_false; +} + +/* _ml_win32_FS_get_file_attributes : string -> (word32 option) + */ +ml_val_t _ml_win32_FS_get_file_attributes (ml_state_t *msp, ml_val_t arg) +{ + DWORD w = GetFileAttributes(STR_MLtoC(arg)); + ml_val_t res, ml_w; + + if (w != INVALID_FILE_ATTRIBUTES) { + ml_w = INT32_CtoML(msp, w); + OPTION_SOME(msp, res, ml_w); + } + else { +#ifdef DEBUG_WIN32 + SayDebug("get_file_attributes(%s): error = %d\n", STR_MLtoC(arg), GetLastError()); +#endif + res = OPTION_NONE; + } + return res; +} + +/* _ml_win32_FS_get_file_attributes_by_handle : handle -> (word32 option) + */ +ml_val_t _ml_win32_FS_get_file_attributes_by_handle (ml_state_t *msp, ml_val_t arg) +{ + BY_HANDLE_FILE_INFORMATION bhfi; + ml_val_t ml_w, res; + + if (GetFileInformationByHandle(HANDLE_MLtoC(arg), &bhfi)) { + ml_w = INT32_CtoML(msp, bhfi.dwFileAttributes); + OPTION_SOME(msp,res,ml_w); + } + else { +#ifdef DEBUG_WIN32 + SayDebug("get_file_attributes_by_handle(%#x): error = %d\n", HANDLE_MLtoC(arg), GetLastError()); +#endif + res = OPTION_NONE; + } + return res; +} + +/* _ml_win32_FS_get_full_path_name : string -> string + */ +ml_val_t _ml_win32_FS_get_full_path_name (ml_state_t *msp, ml_val_t arg) +{ + char buf[MAX_PATH], *dummy; + DWORD r; + ml_val_t res; + + r = GetFullPathName(STR_MLtoC(arg), MAX_PATH, buf, &dummy); + if ((r == 0) || (r > MAX_PATH)) { +#ifdef DEBUG_WIN32 + SayDebug("get_full_path(%s): error = %d\n", STR_MLtoC(arg), GetLastError()); +#endif + return RAISE_SYSERR(msp, -1); + } + res = ML_CString(msp, buf); + return res; +} + +/* _ml_win32_FS_get_file_size : handle -> Position.int + */ +ml_val_t _ml_win32_FS_get_file_size (ml_state_t *msp, ml_val_t arg) +{ + LARGE_INTEGER sz; + + if (GetFileSizeEx(HANDLE_MLtoC(arg), &sz)) { + return ML_AllocInt64(msp, sz.QuadPart); + } + else { +#ifdef DEBUG_WIN32 + SayDebug("get_file_size(%#x): error = %d\n", HANDLE_MLtoC(arg), GetLastError()); +#endif + return RAISE_SYSERR(msp, -1); + } +} + +/* _ml_win32_FS_get_file_size_by_name : string -> (Position.int option) + */ +ml_val_t _ml_win32_FS_get_file_size_by_name (ml_state_t *msp, ml_val_t arg) +{ + HANDLE h; + + h = CreateFile ( + STR_MLtoC(arg), 0, 0, NULL, + OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, INVALID_HANDLE_VALUE); + + if (h != INVALID_HANDLE_VALUE) { + ml_val_t res = OPTION_NONE; + LARGE_INTEGER sz; + + if (GetFileSizeEx(h, &sz)) { + ml_val_t ml_sz = ML_AllocInt64(msp, sz.QuadPart); + OPTION_SOME(msp, res, ml_sz); + } + + CloseHandle(h); + + return res; + } + else { +#ifdef DEBUG_WIN32 + SayDebug("get_file_size_by_name(%s): error = %d\n", STR_MLtoC(arg), GetLastError()); +#endif + return OPTION_NONE; + } + +} + +/* _ml_win32_FS_get_file_time : string -> Word64.word option + */ +ml_val_t _ml_win32_FS_get_file_time (ml_state_t *msp, ml_val_t arg) +{ + HANDLE h; + ml_val_t ml_ns, res; + + h = CreateFile( + STR_MLtoC(arg), 0, 0, NULL, + OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, INVALID_HANDLE_VALUE); + + if (h != INVALID_HANDLE_VALUE) { + FILETIME ft; + if (GetFileTime(h, NULL, NULL, &ft)) { /* request time of "last write" */ + /* convert to 100-nanosecond units (FILETIME units) */ + Unsigned64_t ns = ((Unsigned64_t)ft.dwHighDateTime << 32) + (Unsigned64_t)ft.dwLowDateTime; + /* return nanoseconds */ + ml_ns = ML_AllocWord64(msp, 100 * ns); + OPTION_SOME(msp, res, ml_ns); + } + + CloseHandle(h); + } + else { +#ifdef DEBUG_WIN32 + SayDebug("get_file_time(%s) failed; error = %d\n", STR_MLtoC(arg), GetLastError()); +#endif + res = OPTION_NONE; + } + return res; +} + +/* _ml_win32_FS_set_file_time : (string * Word64.word option) -> bool + */ +ml_val_t _ml_win32_FS_set_file_time (ml_state_t *msp, ml_val_t arg) +{ + HANDLE h; + ml_val_t res = ML_false; + ml_val_t fname = REC_SEL(arg,0); + Unsigned64_t ns = WORD64_MLtoC(REC_SEL(arg,1)); + + h = CreateFile ( + STR_MLtoC(fname), GENERIC_WRITE, 0 ,NULL, + OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, INVALID_HANDLE_VALUE); + + if (h != INVALID_HANDLE_VALUE) { + FILETIME ft; + + ns /= 100; /* FILETIME is in units of 100ns */ + ft.dwHighDateTime = (DWORD)(ns >> 32); + ft.dwLowDateTime = (DWORD)ns; + + if (SetFileTime(h, NULL, NULL, &ft)) { + res = ML_true; + } +#ifdef DEBUG_WIN32 + else { + SayDebug("set_file_time(%s, %llu) failed; error = %d\n", + STR_MLtoC(fname), 100*ns, GetLastError()); + } +#endif + + CloseHandle (h); + } +#ifdef DEBUG_WIN32 + else { + SayDebug("set_file_time(%s, %llu) failed to get handle; error = %d\n", + STR_MLtoC(fname), ns, GetLastError()); + } +#endif + + return res; +} + +/* _ml_win32_FS_delete_file : string -> bool + */ +ml_val_t _ml_win32_FS_delete_file (ml_state_t *msp, ml_val_t arg) +{ +#ifdef DEBUG_WIN32 + BOOL sts = DeleteFile(STR_MLtoC(arg)); + if (sts) { + return ML_true; + } else { + SayDebug("DeleteFile(%s); error = %d\n", STR_MLtoC(arg), GetLastError()); + return ML_false; + } +#else + return DeleteFile (STR_MLtoC(arg)) ? ML_true : ML_false; +#endif +} + +/* _ml_win32_FS_move_file : (string * string) -> bool + */ +ml_val_t _ml_win32_FS_move_file (ml_state_t *msp, ml_val_t arg) +{ + ml_val_t f1 = REC_SEL(arg, 0); + ml_val_t f2 = REC_SEL(arg, 1); + + if (MoveFile (STR_MLtoC(f1), STR_MLtoC(f2))) { + return ML_true; + } + else { +#ifdef DEBUG_WIN32 + SayDebug ("move_file (%s, %s) failed; error = %d\n", + STR_MLtoC(f1), STR_MLtoC(f2), GetLastError()); +#endif + return ML_false; + } +} + +/* _ml_win32_FS_get_temp_file_name : unit -> string option + */ +ml_val_t _ml_win32_FS_get_temp_file_name (ml_state_t *msp, ml_val_t arg) +{ + ml_val_t res = OPTION_NONE; + char name_buf[MAX_PATH]; + char path_buf[MAX_PATH]; + DWORD pblen; + + pblen = GetTempPath(MAX_PATH, path_buf); + if ((pblen <= MAX_PATH) && (GetTempFileName(path_buf, TMP_PREFIX, 0, name_buf) != 0)) { + ml_val_t tfn = ML_CString(msp, name_buf); + + OPTION_SOME(msp, res, tfn); + } +#ifdef DEBUG_WIN32 + else { + SayDebug ("get_temp_file_name () failed; error = %d\n", GetLastError()); + } +#endif + + return res; +} + +/* end of win32-filesys.c */ diff --git a/base/runtime/c-libs/win32-io/README b/base/runtime/c-libs/win32-io/README new file mode 100644 index 0000000..456cdae --- /dev/null +++ b/base/runtime/c-libs/win32-io/README @@ -0,0 +1,2 @@ +this directory contains C functions that interface to the win32 io api +and are required for supporting the basis diff --git a/base/runtime/c-libs/win32-io/cfun-list.h b/base/runtime/c-libs/win32-io/cfun-list.h new file mode 100644 index 0000000..1a3f060 --- /dev/null +++ b/base/runtime/c-libs/win32-io/cfun-list.h @@ -0,0 +1,52 @@ +/* cfun-list.h + * + * COPYRIGHT (c) 1996 Bell Laboratories, Lucent Technologies + * + * win32 C functions for IO + */ + +#ifndef CLIB_NAME +#define CLIB_NAME "WIN32-IO" +#define CLIB_VERSION "0.2" +#define CLIB_DATE "May 22, 1998" +#endif + +CFUNC("get_std_handle",\ + _ml_win32_IO_get_std_handle,\ + "word32->word32") +CFUNC("set_file_pointer",\ + _ml_win32_IO_set_file_pointer,\ + "(word32*word32*word32)->word32") +CFUNC("read_vec",\ + _ml_win32_IO_read_vec,\ + "(word32*int)->word8vector.vector") +CFUNC("read_arr",\ + _ml_win32_IO_read_arr,\ + "(word32*word8array.array*int*int)->int") +CFUNC("read_vec_txt",\ + _ml_win32_IO_read_vec_txt,\ + "(word32*int)->char8vector.vector") +CFUNC("read_arr_txt",\ + _ml_win32_IO_read_arr_txt,\ + "(word32*char8array.array*int*int)->int") +CFUNC("close",\ + _ml_win32_IO_close,\ + "word32->unit") +CFUNC("create_file",\ + _ml_win32_IO_create_file,\ + "(string*word32*word32*word32*word32)->word32") +CFUNC("write_vec",\ + _ml_win32_IO_write_vec,\ + "(word32*word8vector.vector*int*int)->int") +CFUNC("write_arr",\ + _ml_win32_IO_write_arr,\ + "(word32*word8array.array*int*int)->int") +CFUNC("write_vec_txt",\ + _ml_win32_IO_write_vec_txt,\ + "(word32*word8vector.vector*int*int)->int") +CFUNC("write_arr_txt",\ + _ml_win32_IO_write_arr_txt,\ + "(word32*word8array.array*int*int)->int") + +CFUNC("poll", _ml_win32_OS_poll, "((word32 * word) list * (int * word) list * (int * int) option) -> ((word32 * word) list * (int * word) list)") + diff --git a/base/runtime/c-libs/win32-io/cfun-proto-list.h b/base/runtime/c-libs/win32-io/cfun-proto-list.h new file mode 100644 index 0000000..a7d5d07 --- /dev/null +++ b/base/runtime/c-libs/win32-io/cfun-proto-list.h @@ -0,0 +1,18 @@ +/* cfun-proto-list.h + * + * COPYRIGHT (c) 1996 Bell Laboratories, Lucent Technologies + */ + +#ifndef _CFUN_PROTO_LIST_ +#define _CFUN_PROTO_LIST_ + +#ifndef _C_LIBRARY_ +# include "c-library.h" +#endif + +/* the external definitions for the C functions */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_PROTO(NAME, FUNC, MLTYPE) +#include "cfun-list.h" +#undef CFUNC + +#endif /* !_CFUN_PROTO_LIST_ */ diff --git a/base/runtime/c-libs/win32-io/makefile.win32 b/base/runtime/c-libs/win32-io/makefile.win32 new file mode 100644 index 0000000..ec733a8 --- /dev/null +++ b/base/runtime/c-libs/win32-io/makefile.win32 @@ -0,0 +1,51 @@ +# +# the makefile for the Win32 IO library +# + +SHELL = + +INC_DIR = ..\..\include +CLIB_DIR = ..\. +MACH_DIR = ..\..\mach-dep + +INCLUDES = /I$(INC_DIR) /I$(CLIB_DIR) /I..\..\objs + +MAKEFILE = makefile.win32 +MAKE = nmake /F$(MAKEFILE) +AR = lib +ARFLAGS = +RANLIB = lib + +LIBRARY = libwin32-io.lib + +VERSION = v-dummy + +OBJS = win32-io-lib.obj \ + win32-io.obj \ + poll.obj + +$(LIBRARY) : $(VERSION) $(OBJS) + del /F /Q $(LIBRARY) + $(AR) $(ARFLAGS) /out:$(LIBRARY) $(OBJS) + $(RANLIB) /out:$(LIBRARY) + +$(VERSION) : + ($(MAKE) MAKE="$(MAKE)" clean) + echo "$(VERSION)" > $(VERSION) + +DEPENDENTS = $(CLIB_DIR)\ml-c.h cfun-proto-list.h cfun-list.h $(INC_DIR)\c-library.h + +win32-io-lib.obj: win32-io-lib.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c win32-io-lib.c + +win32-io.obj: win32-io.c $(MACH_DIR)\win32-fault.h $(DEPENDENTS) \ + $(INC_DIR)\ml-base.h $(INC_DIR)\ml-values.h $(INC_DIR)\ml-objects.h + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c win32-io.c + +poll.obj: poll.c $(MACH_DIR)\win32-fault.h $(DEPENDENTS) \ + $(INC_DIR)\ml-base.h $(INC_DIR)\ml-values.h $(INC_DIR)\ml-objects.h + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c poll.c +clean: + del /F /Q v-* *.obj *.pdb $(LIBRARY) + + diff --git a/base/runtime/c-libs/win32-io/poll.c b/base/runtime/c-libs/win32-io/poll.c new file mode 100644 index 0000000..a7e3e9e --- /dev/null +++ b/base/runtime/c-libs/win32-io/poll.c @@ -0,0 +1,153 @@ +/*! \file poll.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * crude implementation of a polling function on Windows + */ + +#include + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" + +#include "win32-fault.h" + +/* bit masks for polling descriptors (see src/sml-nj/boot/Unix/os-io.sml) */ +#define RD_BIT 0x1 +#define WR_BIT 0x2 +#define ERR_BIT 0x4 + +/* _ml_win32_OS_poll : (handle * word) list * (int * word) list * Word32.int option + * -> (handle * word) list * (int * word) list + */ +ml_val_t _ml_win32_OS_poll (ml_state_t *msp, ml_val_t arg) +{ + DWORD dwMilliseconds; + ml_val_t pollList = REC_SEL(arg,0); + ml_val_t pollSockList = REC_SEL(arg,1); + ml_val_t timeout = REC_SEL(arg,2); + int sec,usec; + ml_val_t l,item; + ml_val_t hList, sList, resTuple; + HANDLE handle, *hArray; + fd_set read, write, err; + int fd, flag; + struct timeval tv, *tvp; + int result; + + int count, index; + + /* first, convert timeout to milliseconds */ + if (timeout == OPTION_NONE) { + dwMilliseconds = INFINITE; + } + else { + timeout = OPTION_get(timeout); + dwMilliseconds = WORD32_MLtoC(timeout); + } + + /* count number of handles */ + for (l = pollList, count = 0; l != LIST_nil; l = LIST_tl(l)) { + count++; + } + + /* allocate array of handles */ + hArray = NEW_VEC (HANDLE,count); + + /* initialize the array */ + for (l = pollList, index = 0; l != LIST_nil; l = LIST_tl(l)) { + item = LIST_hd (l); + handle = HANDLE_MLtoC(REC_SEL(item, 0)); + hArray[index++] = handle; + } + + /* generalized poll to see if anything is available */ + result = WaitForMultipleObjects (count, hArray, FALSE,dwMilliseconds); + hList = LIST_nil; + if (!((result==WAIT_FAILED) || (result==WAIT_TIMEOUT))) { + /* at least one handle was ready. Find all that are */ + for (l=pollList; l!=LIST_nil; l=LIST_tl(l)) { + item = LIST_hd (l); + handle = HANDLE_MLtoC(REC_SEL(item, 0)); + result = WaitForSingleObject (handle, 0); + if ((result == WAIT_FAILED) || (result == WAIT_TIMEOUT)) continue; + LIST_cons (msp, hList, item, hList); + } + } + + FREE(hArray); + + /* SOCKETS */ + /* count number of handles and init the fdsets */ + FD_ZERO(&read); + FD_ZERO(&write); + FD_ZERO(&err); + for (l=pollSockList,count=0; l!=LIST_nil; l=LIST_tl(l)) { + count++; + item = LIST_hd (l); + fd = REC_SELINT(item, 0); + flag = REC_SELINT(item, 1); + if ((flag & RD_BIT) != 0) { + FD_SET(fd, &read); + } + if ((flag & WR_BIT) != 0) { + FD_SET(fd, &write); + } + if ((flag & ERR_BIT) != 0) { + FD_SET(fd, &err); + } + } + + if (timeout == OPTION_NONE) { + tvp = NIL(struct timeval *); + } else { + tv.tv_sec = dwMilliseconds / 1000; + tv.tv_usec = (dwMilliseconds % 1000) * 1000; + tvp = &tv; + } + + sList = LIST_nil; + + if (count > 0) { + result = select (count, &read, &write, &err, tvp); + if (result < 0) { + return RAISE_SYSERR(msp, sts); + } + else if (result > 0) { + ml_val_t *resVec = NEW_VEC(ml_val_t, result); + int i, resFlag; + + for (i = 0, l = pollSockList; l != LIST_nil; l = LIST_tl(l)) { + item = LIST_hd(l); + fd = REC_SELINT(item, 0); + flag = REC_SELINT(item, 1); + resFlag = 0; + if (((flag & RD_BIT) != 0) && FD_ISSET(fd, &read)) resFlag |= RD_BIT; + if (((flag & WR_BIT) != 0) && FD_ISSET(fd, &write)) resFlag |= WR_BIT; + if (((flag & ERR_BIT) != 0) && FD_ISSET(fd, &err)) resFlag |= ERR_BIT; + if (resFlag != 0) { + REC_ALLOC2 (msp, item, INT_CtoML(fd), INT_CtoML(resFlag)); + resVec[i++] = item; + } + } + + ASSERT(i == result); + + for (i = result-1, sList = LIST_nil; i >= 0; i--) { + item = resVec[i]; + LIST_cons (msp, sList, item, sList); + } + + FREE(resVec); + } + } + + + REC_ALLOC2(msp, resTuple, hList, sList) + return resTuple; +} + +/* end of poll.c */ diff --git a/base/runtime/c-libs/win32-io/win32-io-lib.c b/base/runtime/c-libs/win32-io/win32-io-lib.c new file mode 100644 index 0000000..c5bf22f --- /dev/null +++ b/base/runtime/c-libs/win32-io/win32-io-lib.c @@ -0,0 +1,28 @@ +/* win32-io-lib.c + * + * COPYRIGHT (c) 1996 Bell Laboratories, Lucent Technologies + */ + +#include "ml-base.h" +#include "c-library.h" +#include "cfun-proto-list.h" + + +/* the table of C functions and ML names */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_BIND(NAME, FUNC, MLTYPE) +PVT cfunc_binding_t CFunTable[] = { +#include "cfun-list.h" + CFUNC_NULL_BIND + }; +#undef CFUNC + + +/* the Signals library */ +c_library_t WIN32_IO_Library = { + CLIB_NAME, + CLIB_VERSION, + CLIB_DATE, + NIL(clib_init_fn_t), + CFunTable + }; + diff --git a/base/runtime/c-libs/win32-io/win32-io.c b/base/runtime/c-libs/win32-io/win32-io.c new file mode 100644 index 0000000..2026651 --- /dev/null +++ b/base/runtime/c-libs/win32-io/win32-io.c @@ -0,0 +1,461 @@ +/* win32-io.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * interface to win32 io functions + */ + +#include + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" + +#include "win32-fault.h" + +#define EOF_char '\x01a' /* ^Z is win32 eof */ +#define CTRL_C_char '\x003' + +/* macro to check if h is a console that hasn't been redirected */ +#define IS_CONIN(h) (((h) == win32_stdin_handle) && \ + (GetFileType(h) == FILE_TYPE_CHAR)) + +/* _ml_win32_IO_get_std_handle : word32 -> handle + * interface to win32 GetStdHandle + */ +ml_val_t _ml_win32_IO_get_std_handle (ml_state_t *msp, ml_val_t arg) +{ + DWORD w = WORD32_MLtoC(arg); + HANDLE h = GetStdHandle(w); + ml_val_t res; + +#ifdef DEBUG_WIN32 + SayDebug("getting std handle for %x as %p\n", w, h); +#endif + + return HANDLE_CtoML(msp, h); +} + +/* _ml_win32_IO_close : handle -> unit + * close a handle + */ +ml_val_t _ml_win32_IO_close (ml_state_t *msp, ml_val_t arg) +{ + HANDLE h = HANDLE_MLtoC(arg); + + if (CloseHandle(h)) { + return ML_unit; + } else { +#ifdef DEBUG_WIN32 + SayDebug("_ml_win32_IO_close(%p): failing: error = %d\n", h, GetLastError()); +#endif + return RAISE_SYSERR(msp,-1); + } +} + + +/* _ml_win32_IO_set_file_pointer : (handle * Position.int * word32) -> Position.int + * handle dist how + */ +ml_val_t _ml_win32_IO_set_file_pointer (ml_state_t *msp, ml_val_t arg) +{ + HANDLE h = HANDLE_MLtoC(REC_SEL(arg,0)); + DWORD how = WORD32_MLtoC(REC_SEL(arg,2)); + LARGE_INTEGER dist, pos; + + dist.QuadPart = INT64_MLtoC(REC_SEL(arg,1)); + + if (SetFilePointerEx(h, dist, &pos, how)) { + return ML_AllocInt64(msp, pos.QuadPart); + } + else { +#ifdef DEBUG_WIN32 + SayDebug("_ml_win32_IO_set_file_pointer(%p, %lld, %d): failing: error = %d\n", + h, dist.QuadPart, how, GetLastError()); +#endif + return RAISE_SYSERR(msp, -1); + } +} + +/* remove CRs ('\r') from buf of size *np; sets *np to be the new buf size + */ +PVT rm_CRs (char *buf,int *np) +{ + int i, j = 0; + int n = *np; + + for (i = 0; i < n; i++) { + if (buf[i] != '\r') { + buf[j++] = buf[i]; + } + } + *np = j; +} + + +/* translate CRs ('\r') to newlines ('\n'), removing existing LFs (also '\n'). + * process backspace (BS) + * sets *np to the new buffer size + * returns TRUE if the buffer contains the EOF character + */ +PVT bool_t CRLF_EOFscan (char *buf, int *np) +{ + int i, j = 0; + int n = *np; + bool_t sawEOF = FALSE; + + for (i = 0; i word8vector.vector + * handle nbytes + * + * Read the specified number of bytes from the specified handle, + * returning them in a vector. + * + * Note: Read operations on console devices do not trap ctrl-C. + * ctrl-Cs are placed in the input buffer. + */ +ml_val_t _ml_win32_IO_read_vec (ml_state_t *msp, ml_val_t arg) +{ + HANDLE h = HANDLE_MLtoC(REC_SEL(arg, 0)); + DWORD nbytes = (DWORD) REC_SELINT(arg, 1); + ml_val_t vec, res; + DWORD n; + + /* allocate the vector; note that this might cause a GC */ + vec = ML_AllocRaw (msp, BYTES_TO_WORDS(nbytes)); + if (ReadFile(h, PTR_MLtoC(void, vec), nbytes, &n, NULL)) { + if (n == 0) { +#ifdef DEBUG_WIN32 + SayDebug("_ml_win32_IO_read_vec(%p, %d): eof\n", h, nbytes); +#endif + return ML_string0; + } + if (n < nbytes) { + /* we need to shrink the vector */ + ML_ShrinkRaw (msp, vec, BYTES_TO_WORDS(n)); + } + /* allocate header */ + SEQHDR_ALLOC (msp, res, DESC_string, vec, n); + return res; + } + else { +#ifdef DEBUG_WIN32 + SayDebug("_ml_win32_IO_read_vec(%p, %d) failed; n = %d, error = %d\n", + h, nbytes, n, GetLastError()); +#endif + return RAISE_SYSERR(msp,-1); + } +} + +PVT bool_t check_cntrl_c (BOOL read_OK, int bytes_read) +{ + /* this is a rude hack */ + /* under NT and default console mode, on + * EOF: read_OK is true, and n > 0 + * ^C: read_OK is true, and n == 0. However, the cntrl_c handler is + * not always invoked before ReadConsole returns. + */ + /* under 95 and default console mode, on + * EOF: read_OK is true and n is 0 + * ^C: read_OK is true, n is 0, but handler seems to always have been run + */ + if (read_OK && (bytes_read == 0) && win32_isNT) { + /* guaranteed that a cntrl_c has occurred and has not been reset */ + /* wait for it to happen */ + wait_for_cntrl_c(); + return TRUE; + } + return FALSE; +} + +/* + * Since we're not setting console mode to processed input (as that + * causes other issues around no longer getting an async event while + * executing), we need to append the ^C into the input stream + * manually. But since ^C isn't handled nicely (illegal character), we + * instead just prepend a space into the stream. + */ +PVT void append_cntrl_c (char *buf, int *np, int max) +{ + /* Out of space in buffer; exit without adding a character. This should + * be fine (provided max>0, which it always is), as all we're trying to + * prevent is returning zero bytes and causing the runtime to think we + * got an EOF on the input stream. + */ + if (*np == max) return; + + buf[(*np)++] = ' '; +} + +/* _ml_win32_IO_read_vec_txt : (handle * int) -> char8vector + * handle nbytes + * + * Read the specified number of bytes from the specified handle, + * returning them in a vector. + * + * reflect changes in _ml_win32_IO_read_arr_txt + */ +ml_val_t _ml_win32_IO_read_vec_txt(ml_state_t *msp, ml_val_t arg) +{ + HANDLE h = HANDLE_MLtoC(REC_SEL(arg, 0)); + DWORD nbytes = (DWORD) REC_SELINT(arg, 1); + ml_val_t vec, res; + DWORD n; + BOOL flag = FALSE; + + /* allocate the vector; note that this might cause a GC */ + vec = ML_AllocRaw (msp, BYTES_TO_WORDS (nbytes)); + + if (IS_CONIN(h)) { + flag = ReadConsole(h, PTR_MLtoC(void,vec), nbytes, &n, NULL); + if (check_cntrl_c(flag, n)) { + append_cntrl_c(PTR_MLtoC(void,vec), &n, nbytes); + } + } else { + flag = ReadFile(h,PTR_MLtoC(void,vec),nbytes,&n,NULL); + } + if (flag) { + if (IS_CONIN(h)) { + if (CRLF_EOFscan((char *)vec,&n)) { + n = 0; + } + } + else { + rm_CRs((char *)vec,&n); + } + + if (n == 0) { +#ifdef DEBUG_WIN32 + SayDebug("_ml_win32_IO_read_vec_txt(%p, %d): eof\n", h, nbytes); +#endif + return ML_string0; + } + if (n < nbytes) { + /* shrink buffer */ + ML_ShrinkRaw (msp, vec, BYTES_TO_WORDS(n)); + } + /* allocate header */ + SEQHDR_ALLOC (msp, res, DESC_string, vec, n); +#ifdef DEBUG_WIN32 + SayDebug("_ml_win32_IO_read_vec_txt: read %d\n",n); +#endif + return res; + } + else if ((h == win32_stdin_handle) /* input from stdin */ + && (GetFileType(h) == FILE_TYPE_PIPE) /* but not console */ + && (GetLastError() == ERROR_BROKEN_PIPE)) /* and pipe broken */ + { + /* this is an EOF on redirected stdin (ReadFile failed) */ + return ML_string0; + } + else { +#ifdef DEBUG_WIN32 + SayDebug("_ml_win32_IO_read_vec_txt: failing on handle %p\n", h); +#endif + return RAISE_SYSERR(msp,-1); + } +} + +/* _ml_win32_IO_read_arr : (handle * word8array * int * int) -> int + * handle buffer n start + * + * Read n bytes of data from the specified handle into the given array, + * starting at start. Return the number of bytes read. Assume bounds + * have been checked. + * + * Note: Read operations on console devices do not trap ctrl-C. + * ctrl-Cs are placed in the input buffer. + */ +ml_val_t _ml_win32_IO_read_arr (ml_state_t *msp, ml_val_t arg) +{ + HANDLE h = HANDLE_MLtoC(REC_SEL(arg, 0)); + ml_val_t buf = REC_SEL(arg,1); + DWORD nbytes = (DWORD) REC_SELINT(arg, 2); + Byte_t *start = STR_MLtoC(buf) + REC_SELINT(arg,3); + DWORD n; + + if (ReadFile(h, PTR_MLtoC(void,start), nbytes, &n, NULL)) { +#ifdef DEBUG_WIN32 + if (n == 0) SayDebug("_ml_win32_IO_read_arr(%p, %d): eof\n", h, nbytes); +#endif + return INT_CtoML(n); + } +#ifdef DEBUG_WIN32 + SayDebug("_ml_win32_IO_read_arr: failing\n"); +#endif + return RAISE_SYSERR(msp,-1); +} + +/* _ml_win32_IO_read_arr_txt : (handle * char8array * int * int) -> int + * handle buffer n start + * + * Read n bytes of data from the specified handle into the given array, + * starting at start. Return the number of bytes read. Assume bounds + * have been checked. + * + * reflect changes in _ml_win32_IO_read_vec_txt + */ +ml_val_t _ml_win32_IO_read_arr_txt (ml_state_t *msp, ml_val_t arg) +{ + HANDLE h = HANDLE_MLtoC(REC_SEL(arg, 0)); + ml_val_t buf = REC_SEL(arg,1); + DWORD nbytes = (DWORD) REC_SELINT(arg, 2); + Byte_t *start = STR_MLtoC(buf) + REC_SELINT(arg,3); + DWORD n; + BOOL flag; + + if (IS_CONIN(h)) { + flag = ReadConsole(h,PTR_MLtoC(void,start),nbytes,&n,NULL); + if (check_cntrl_c(flag,n)) { + append_cntrl_c(PTR_MLtoC(void,start),&n,nbytes); + } + } else { + flag = ReadFile(h,PTR_MLtoC(void,start),nbytes,&n,NULL); + } + if (flag) { + if (IS_CONIN(h)) { + if (CRLF_EOFscan((char *)start,&n)) { + n = 0; + } + } + else { + rm_CRs((char *)buf,&n); + } +#ifdef DEBUG_WIN32 + SayDebug("_ml_win32_IO_read_arr_txt(%p, %d): eof\n", h, nbytes); +#endif + return INT_CtoML(n); + } else { + if ((h == win32_stdin_handle) /* input from stdin */ + && (GetFileType(h) == FILE_TYPE_PIPE) /* but not console */ + && (GetLastError() == ERROR_BROKEN_PIPE)) /* and pipe broken */ + { + /* this is an EOF on redirected stdin (ReadFile failed) */ + return INT_CtoML(0); + } + } + +#ifdef DEBUG_WIN32 + SayDebug("_ml_win32_IO_read_arr_txt: failing\n"); +#endif + return RAISE_SYSERR(msp, -1); +} + + +/* _ml_win32_IO_create_file : (string * word32 * word32 * word32 * word32) -> handle + * name access share create attr handle + * + * create file "name" with access, share, create, and attr flags + */ +ml_val_t _ml_win32_IO_create_file (ml_state_t *msp, ml_val_t arg) +{ + ml_val_t fname = REC_SEL(arg,0); + char *name = STR_MLtoC(fname); + DWORD access = WORD32_MLtoC(REC_SEL(arg,1)); + DWORD share = WORD32_MLtoC(REC_SEL(arg,2)); + DWORD create = WORD32_MLtoC(REC_SEL(arg,3)); + DWORD attr = WORD32_MLtoC(REC_SEL(arg,4)); + HANDLE h = CreateFile(name, access, share, NULL, create, attr, INVALID_HANDLE_VALUE); + ml_val_t res; + +#ifdef DEBUG_WIN32 + if (h == INVALID_HANDLE_VALUE) { + SayDebug("create_file(\"%s\", %x, %x, %x, %x) failed; error = %d\n", + name, access, share, create, attr, GetLastError()); + } + else { + SayDebug("create_file(\"%s\", %x, %x, %x, %x) = %p\n", + name, access, share, create, attr, h); + } +#endif + + return HANDLE_CtoML(msp, h); +} + +/* _ml_win32_IO_write_buf : (handle * word8vector.vector * int * int) -> int + * handle buf n offset + * + * generic routine for writing n byes from buf to handle starting at offset + * + * A maximum print size is used to avoid exceeding maximum buffer thresholds + * with handles corresponding to console output. Technically, we can use + * larger values, but this will also support several other devices that have + * output limits if we decide to open up the range of supported file handles + * through interop. + */ +#define MAX_PRINT_SIZE 30000 +ml_val_t _ml_win32_IO_write_buf (ml_state_t *msp, ml_val_t arg) +{ + HANDLE h = HANDLE_MLtoC(REC_SEL(arg,0)); + ml_val_t buf = REC_SEL(arg,1); + size_t nbytes = REC_SELINT(arg,2); + Byte_t *start = (Byte_t *) (STR_MLtoC(buf) + REC_SELINT(arg, 3)); + DWORD n, remaining, total; + char *buffer = PTR_MLtoC(void,start); + int err; + +#ifdef DEBUG_WIN32 + SayDebug("_ml_win32_IO_write_buf(%p, -, %d, %d)\n", h, nbytes, REC_SELINT(arg, 3)); +#endif + + remaining = nbytes; + total = 0; + + while (remaining > 0) { + nbytes = min (MAX_PRINT_SIZE, remaining); + if (WriteFile(h, buffer, nbytes, &n, NULL)) { +#ifdef DEBUG_WIN32 + if (n == 0) SayDebug("_ml_win32_IO_write_buf(%h, %d): eof\n", h, nbytes); +#endif + total += n; + remaining -= n; + buffer += n; + } else { +#ifdef DEBUG_WIN32 + SayDebug("_ml_win32_IO_write_buf: failing\n"); +#endif + return RAISE_SYSERR(msp,-1); + } + } + + return INT_CtoML(total); +} + +ml_val_t _ml_win32_IO_write_vec (ml_state_t *msp, ml_val_t arg) +{ + return _ml_win32_IO_write_buf(msp, arg); +} + +ml_val_t _ml_win32_IO_write_arr (ml_state_t *msp, ml_val_t arg) +{ + return _ml_win32_IO_write_buf(msp, arg); +} + +ml_val_t _ml_win32_IO_write_vec_txt (ml_state_t *msp, ml_val_t arg) +{ + return _ml_win32_IO_write_buf(msp, arg); +} + +ml_val_t _ml_win32_IO_write_arr_txt (ml_state_t *msp, ml_val_t arg) +{ + return _ml_win32_IO_write_buf(msp, arg); +} + +/* end of win32-io.c */ diff --git a/base/runtime/c-libs/win32-process/README b/base/runtime/c-libs/win32-process/README new file mode 100644 index 0000000..cb033f6 --- /dev/null +++ b/base/runtime/c-libs/win32-process/README @@ -0,0 +1,2 @@ +this directory contains C functions that interface to the win32 process api +and are required for supporting the basis diff --git a/base/runtime/c-libs/win32-process/cfun-list.h b/base/runtime/c-libs/win32-process/cfun-list.h new file mode 100644 index 0000000..8384051 --- /dev/null +++ b/base/runtime/c-libs/win32-process/cfun-list.h @@ -0,0 +1,24 @@ +/* cfun-list.h + * + * COPYRIGHT (c) 1996 Bell Laboratories, Lucent Technologies + * + * win32 C functions for processes + */ + +#ifndef CLIB_NAME +#define CLIB_NAME "WIN32-PROCESS" +#define CLIB_VERSION "0.2" +#define CLIB_DATE "May 22, 1998" +#endif + +CFUNC("system",_ml_win32_PS_system,"string->word32") +CFUNC("exit_process",_ml_win32_PS_exit_process,"word32->'a") +CFUNC("get_environment_variable",_ml_win32_PS_get_environment_variable,"string->string option") +CFUNC("create_process",_ml_win32_PS_create_process,"string->word32") +CFUNC("wait_for_single_object",_ml_win32_PS_wait_for_single_object,"word32->word32 option") + +CFUNC("sleep",_ml_win32_PS_sleep,"word32->unit") +CFUNC("find_executable", _ml_win32_PS_find_executable, "string -> string option") +CFUNC("launch_application", _ml_win32_PS_launch_application, "string -> string -> unit") +CFUNC("open_document", _ml_win32_PS_open_document, "string -> unit") +CFUNC("create_process_redirect_handles",_ml_win32_PS_create_process_redirect_handles,"string->word32*word32*word32") diff --git a/base/runtime/c-libs/win32-process/cfun-proto-list.h b/base/runtime/c-libs/win32-process/cfun-proto-list.h new file mode 100644 index 0000000..a7d5d07 --- /dev/null +++ b/base/runtime/c-libs/win32-process/cfun-proto-list.h @@ -0,0 +1,18 @@ +/* cfun-proto-list.h + * + * COPYRIGHT (c) 1996 Bell Laboratories, Lucent Technologies + */ + +#ifndef _CFUN_PROTO_LIST_ +#define _CFUN_PROTO_LIST_ + +#ifndef _C_LIBRARY_ +# include "c-library.h" +#endif + +/* the external definitions for the C functions */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_PROTO(NAME, FUNC, MLTYPE) +#include "cfun-list.h" +#undef CFUNC + +#endif /* !_CFUN_PROTO_LIST_ */ diff --git a/base/runtime/c-libs/win32-process/makefile.win32 b/base/runtime/c-libs/win32-process/makefile.win32 new file mode 100644 index 0000000..f51d7cc --- /dev/null +++ b/base/runtime/c-libs/win32-process/makefile.win32 @@ -0,0 +1,46 @@ +# +# the makefile for the Win32 process library +# + +SHELL = + +INC_DIR = ..\..\include +CLIB_DIR = ..\ + +INCLUDES = /I$(INC_DIR) /I$(CLIB_DIR) /I..\..\objs + +MAKEFILE = makefile.win32 +MAKE = nmake /F$(MAKEFILE) +AR = lib +ARFLAGS = +RANLIB = lib + +LIBRARY = libwin32-process.lib + +VERSION = v-dummy + +OBJS = win32-process-lib.obj \ + win32-process.obj + +$(LIBRARY) : $(VERSION) $(OBJS) + del /F /Q $(LIBRARY) + $(AR) $(ARFLAGS) /out:$(LIBRARY) $(OBJS) + $(RANLIB) /out:$(LIBRARY) + +$(VERSION) : + ($(MAKE) MAKE="$(MAKE)" clean) + echo "$(VERSION)" > $(VERSION) + +DEPENDENTS = $(CLIB_DIR)\ml-c.h cfun-proto-list.h cfun-list.h $(INC_DIR)\c-library.h + +win32-process-lib.obj: win32-process-lib.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c win32-process-lib.c + +win32-process.obj: win32-process.c $(DEPENDENTS) \ + $(INC_DIR)\ml-base.h $(INC_DIR)\ml-values.h $(INC_DIR)\ml-objects.h + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c win32-process.c + +clean: + del /F /Q v-* *.obj *.pdb $(LIBRARY) + + diff --git a/base/runtime/c-libs/win32-process/win32-process-lib.c b/base/runtime/c-libs/win32-process/win32-process-lib.c new file mode 100644 index 0000000..dac1411 --- /dev/null +++ b/base/runtime/c-libs/win32-process/win32-process-lib.c @@ -0,0 +1,28 @@ +/* win32-process-lib.c + * + * COPYRIGHT (c) 1996 Bell Laboratories, Lucent Technologies + */ + +#include "ml-base.h" +#include "c-library.h" +#include "cfun-proto-list.h" + + +/* the table of C functions and ML names */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_BIND(NAME, FUNC, MLTYPE) +PVT cfunc_binding_t CFunTable[] = { +#include "cfun-list.h" + CFUNC_NULL_BIND + }; +#undef CFUNC + + +/* the Signals library */ +c_library_t WIN32_Process_Library = { + CLIB_NAME, + CLIB_VERSION, + CLIB_DATE, + NIL(clib_init_fn_t), + CFunTable + }; + diff --git a/base/runtime/c-libs/win32-process/win32-process.c b/base/runtime/c-libs/win32-process/win32-process.c new file mode 100644 index 0000000..47490e1 --- /dev/null +++ b/base/runtime/c-libs/win32-process/win32-process.c @@ -0,0 +1,243 @@ +/*! \file win32-process.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * interface to win32 process functions + */ + +#include +#include +#include +#include + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" + +/* _ml_win32_PS_create_process : string -> handle + * + * Note: This function returns the handle to the created process + * This handle will need to be freed before the system releases + * the memory associated to the process. + * We will take care of this in the wait_for_single_object + * call. This is for the time being only used by CML. + * It could also cause problems later on. + */ +ml_val_t _ml_win32_PS_create_process_internal (ml_state_t *msp, ml_val_t arg, STARTUPINFO *pStartup) +{ + char *str = STR_MLtoC(arg); + PROCESS_INFORMATION pi; + STARTUPINFO si; + ml_val_t res; + BOOL fSuccess; + ZeroMemory (&si,sizeof(si)); + si.cb = sizeof(si); + + if (pStartup == NULL) { + pStartup = &si; + } + fSuccess = CreateProcess (NULL,str,NULL,NULL,TRUE,CREATE_NEW_CONSOLE,NULL,NULL,pStartup,&pi); + if (fSuccess) { + HANDLE hProcess = pi.hProcess; + CloseHandle (pi.hThread); + return HANDLE_CtoML(msp, hProcess); + } + else { + return RAISE_SYSERR(msp,-1); + } +} + +/* _ml_win32_PS_create_process : string -> handle + */ +ml_val_t _ml_win32_PS_create_process (ml_state_t *msp, ml_val_t arg) +{ + return _ml_win32_PS_create_process_internal(msp, arg, NULL); +} + +/* _ml_win32_PS_create_process_redirect_handles : string -> handle * handle * handle + */ +ml_val_t _ml_win32_PS_create_process_redirect_handles (ml_state_t *msp, ml_val_t arg) +{ + SECURITY_ATTRIBUTES sa; + SECURITY_DESCRIPTOR sd; //security information for pipes + STARTUPINFO si; + HANDLE hStdoutRd, hStdoutWr, hStdinRd, hStdinWr = NULL; + ml_val_t res, procHandle, in, out; + + InitializeSecurityDescriptor(&sd,SECURITY_DESCRIPTOR_REVISION); + SetSecurityDescriptorDacl(&sd, TRUE, NULL, FALSE); + sa.lpSecurityDescriptor = &sd; + sa.nLength = sizeof(SECURITY_ATTRIBUTES); + sa.bInheritHandle = TRUE; + + ZeroMemory(&si, sizeof(si)); + si.cb = sizeof(si); + + // Create a pipe for the child process's STDOUT. + if (!CreatePipe(&hStdoutRd, &hStdoutWr, &sa, 0)) + return RAISE_SYSERR(msp,-1); + + // Create a pipe for the child process's STDIN. + if (!CreatePipe(&hStdinRd, &hStdinWr, &sa, 0)) + return RAISE_SYSERR(msp,-1); + + si.dwFlags = STARTF_USESTDHANDLES|STARTF_USESHOWWINDOW; + si.wShowWindow = SW_HIDE; + si.hStdInput = hStdinRd; // The child process READS from this + si.hStdOutput = si.hStdError = hStdoutWr; // And it WRITES to this one + + procHandle = _ml_win32_PS_create_process_internal(msp, arg, &si); + in = HANDLE_CtoML(msp, hStdoutRd); + out = HANDLE_CtoML(msp, hStdinWr); + REC_ALLOC3(msp, res, procHandle, in, out); + return res; +} + +/* _ml_win32_PS_wait_for_single_object : handle -> word option + */ +ml_val_t _ml_win32_PS_wait_for_single_object (ml_state_t *msp, ml_val_t arg) +{ + HANDLE hProcess = HANDLE_MLtoC(arg); + DWORD exit_code; + int res; + ml_val_t p,obj; + res = WaitForSingleObject (hProcess,0); + if ((res == WAIT_TIMEOUT) || (res == WAIT_FAILED)) { + /* information is not ready, or error */ + obj = OPTION_NONE; + } + else { + /* WAIT_OBJECT_0 ... done, finished */ + /* get info and return SOME(exit_status) */ + GetExitCodeProcess (hProcess,&exit_code); + CloseHandle (hProcess); /* decrease ref count */ + p = WORD32_CtoML (msp, exit_code); + OPTION_SOME(msp,obj,p); + } + return obj; +} + + +/* _ml_win32_PS_system : string -> word32 + * command + * + */ +ml_val_t _ml_win32_PS_system (ml_state_t *msp, ml_val_t arg) +{ + const char *unquoted = STR_MLtoC(arg); + int unquotedlen = strnlen (unquoted, GET_SEQ_LEN(arg)); + char *quoted = (char*)MALLOC((unquotedlen+3)*sizeof(char)); + int ret; + + if (quoted == (char *)0) { + Die ("_ml_win32_PS_system: unable to allocate memory\n"); + } + quoted[0] = '\"'; + strcpy(&(quoted[1]), unquoted); + quoted[unquotedlen+1] = '\"'; + quoted[unquotedlen+2] = (char)0; + ret = system(quoted); + FREE(quoted); + + return WORD32_CtoML(msp, ret); +} + +/* _ml_win32_PS_exit_process : word32 -> 'a + * exit code + * + */ +void _ml_win32_PS_exit_process (ml_state_t *msp, ml_val_t arg) +{ + ExitProcess ((UINT)WORD32_MLtoC(arg)); +} + +/* _ml_win32_PS_get_environment_variable : string -> string option + * var + * + */ +ml_val_t _ml_win32_PS_get_environment_variable (ml_state_t *msp, ml_val_t arg) +{ +#define GEV_BUF_SZ 4096 + char buf[GEV_BUF_SZ]; + int ret = GetEnvironmentVariable(STR_MLtoC(arg), buf, GEV_BUF_SZ); + ml_val_t ml_s,res = OPTION_NONE; + + if (ret > GEV_BUF_SZ) { + return RAISE_SYSERR(msp, -1); + } + if (ret > 0) { + ml_s = ML_CString(msp, buf); + OPTION_SOME(msp, res, ml_s); + } + return res; +#undef GEV_BUF_SZ +} + +/* _ml_win32_PS_sleep : word64 -> unit + * + * Suspend execution for interval in nanoseconds. + */ +ml_val_t _ml_win32_PS_sleep (ml_state_t *msp, ml_val_t arg) +{ + DWORD t = WORD64_MLtoC(arg); + /* convert to milliseconds */ + t = t / 1000; + Sleep (t); + return ML_unit; +} + +/* _ml_win32_PS_find_executable : string -> string option + */ +ml_val_t _ml_win32_PS_find_executable (ml_state_t *msp, ml_val_t arg) +{ + Byte_t *fileName = STR_MLtoC(arg); + TCHAR szResultPath[MAX_PATH]; + int length; + ml_val_t res, vec, obj; + BOOL found = FALSE; + + strcpy_s(szResultPath, max(strlen(fileName), MAX_PATH-1), fileName); + found = PathFindOnPath(szResultPath, NULL); + + if (!found) { + return OPTION_NONE; + } + + length = strlen(szResultPath); + vec = ML_AllocRaw (msp, BYTES_TO_WORDS (length + 1)); + strcpy_s(PTR_MLtoC(void, vec), length+1, szResultPath); + SEQHDR_ALLOC (msp, obj, DESC_string, vec, length); + OPTION_SOME(msp, res, obj); + return res; +} + +ml_val_t _ml_win32_PS_launch_application(ml_state_t *msp, ml_val_t arg) +{ + Byte_t *fileName = STR_MLtoC(REC_SEL(arg,0)); + Byte_t *argument = STR_MLtoC(REC_SEL(arg,1)); + + int result = (int)ShellExecute(NULL, NULL, fileName, argument, NULL, SW_SHOWNORMAL); + + if (result < 32) { + return RAISE_SYSERR(msp,-1); + } + + return ML_unit; +} + +ml_val_t _ml_win32_PS_open_document(ml_state_t *msp, ml_val_t arg) +{ + Byte_t *document = STR_MLtoC(arg); + + int result = (int)ShellExecute(NULL, NULL, document, NULL, NULL, SW_SHOWNORMAL); + + if (result < 32) { + return RAISE_SYSERR(msp,-1); + } + + return ML_unit; +} + +/* end of win32-process.c */ diff --git a/base/runtime/c-libs/win32-raise-syserr.c b/base/runtime/c-libs/win32-raise-syserr.c new file mode 100644 index 0000000..5c25dff --- /dev/null +++ b/base/runtime/c-libs/win32-raise-syserr.c @@ -0,0 +1,55 @@ +/* win32-raise-syserr.c + * + * COPYRIGHT (c) 1996 Bell Laboratories, Lucent Technologies + */ + +#include +#include "ml-base.h" +#include "ml-state.h" +#include "ml-objects.h" +#include "ml-globals.h" +#include "ml-c.h" + +/* RaiseSysError: + * + * Raise the ML exception SysErr, which has the spec: + * + * exception SysErr of (string * syserror option) + * + * We use the last win32-api error value as the syserror; eventually that + * will be represented by an (int * string) pair. If alt_msg is non-zero, + * then use it as the error string and use NONE for the syserror. + */ +ml_val_t RaiseSysError (ml_state_t *msp, const char *altMsg, const char *at) +{ + ml_val_t s, syserror, arg, exn, atStk; + const char *msg; + char buf[32]; + int ml_errno = -1; + + if (altMsg != NIL(char *)) { + msg = altMsg; + syserror = OPTION_NONE; + } + else { + ml_errno = (int) GetLastError(); + sprintf(buf, "", ml_errno); + msg = buf; + OPTION_SOME(msp, syserror, INT_CtoML(ml_errno)); + } + + s = ML_CString (msp, msg); + if (at != NIL(char *)) { + ml_val_t atMsg = ML_CString (msp, at); + LIST_cons(msp, atStk, atMsg, LIST_nil); + } + else + atStk = LIST_nil; + REC_ALLOC2 (msp, arg, s, syserror); + EXN_ALLOC (msp, exn, PTR_CtoML(SysErrId), arg, atStk); + + RaiseMLExn (msp, exn); + + return exn; + +} /* end of RaiseSysError */ diff --git a/base/runtime/c-libs/win32/README b/base/runtime/c-libs/win32/README new file mode 100644 index 0000000..f16d171 --- /dev/null +++ b/base/runtime/c-libs/win32/README @@ -0,0 +1,2 @@ +this directory contains C functions that interface to win32 +and are required for supporting the basis diff --git a/base/runtime/c-libs/win32/cfun-list.h b/base/runtime/c-libs/win32/cfun-list.h new file mode 100644 index 0000000..c62783c --- /dev/null +++ b/base/runtime/c-libs/win32/cfun-list.h @@ -0,0 +1,42 @@ +/* cfun-list.h + * + * COPYRIGHT (c) 1996 Bell Laboratories, Lucent Technologies + * + * utility win32 C functions + */ + +#ifndef CLIB_NAME +#define CLIB_NAME "WIN32" +#define CLIB_VERSION "0.1" +#define CLIB_DATE "October 11, 1996" +#endif + +CFUNC("get_const", _ml_win32_get_const, "string -> word32") +CFUNC("get_last_error", _ml_win32_get_last_error, "unit -> word32") +CFUNC("reg_open_key", _ml_win32_REG_open_key_ex, "word32 * string * word32 -> word32") +CFUNC("reg_create_key", _ml_win32_REG_create_key_ex, "word32 * string * word32 -> word32") +CFUNC("reg_close_key", _ml_win32_REG_close_key_ex, "word32 -> unit") +CFUNC("reg_delete_key", _ml_win32_REG_delete_key, "word32 * string -> unit") +CFUNC("reg_delete_value", _ml_win32_REG_delete_value, "word32 * string -> unit") +CFUNC("reg_enum_key", _ml_win32_REG_enum_key_ex, "word32 * word32 -> string option") +CFUNC("reg_enum_value", _ml_win32_REG_enum_value_ex, "word32 * word32 -> string option") +CFUNC("reg_query_value_type", _ml_win32_REG_query_value_type, "word32 * string -> word32") +CFUNC("reg_query_value_string", _ml_win32_REG_query_value_string, "word32 * string -> string") +CFUNC("reg_query_value_multi_string", _ml_win32_REG_query_value_multi_string, "word32 * string -> string") +CFUNC("reg_query_value_expand_string", _ml_win32_REG_query_value_expand_string, "word32 * string -> string") +CFUNC("reg_query_value_dword", _ml_win32_REG_query_value_dword, "word32 * string -> word32") +CFUNC("reg_query_value_binary", _ml_win32_REG_query_value_binary, "word32 * string -> Word8Vector.vector") +CFUNC("reg_set_value_dword", _ml_win32_REG_set_value_dword, "word32 * string * word32 -> unit") +CFUNC("reg_set_value_string", _ml_win32_REG_set_value_string, "word32 * string * string -> unit") +CFUNC("reg_set_value_expand_string", _ml_win32_REG_set_value_expand_string, "word32 * string * string -> unit") +CFUNC("reg_set_value_multi_string", _ml_win32_REG_set_value_multi_string, "word32 * string * string -> unit") +CFUNC("reg_set_value_binary", _ml_win32_REG_set_value_binary, "word32 * string * Word8Vector.vector -> unit") +CFUNC("config_get_version_ex", _ml_win32_CONFIG_get_version_ex, "unit -> word32 * word32 * word32 * word32 * string ") +CFUNC("config_get_windows_directory", _ml_win32_CONFIG_get_windows_directory, "unit -> string") +CFUNC("config_get_system_directory", _ml_win32_CONFIG_get_system_directory, "unit -> string") +CFUNC("config_get_computer_name", _ml_win32_CONFIG_get_computer_name, "unit -> string") +CFUNC("config_get_user_name", _ml_win32_CONFIG_get_user_name, "unit -> string") +CFUNC("config_get_volume_information", _ml_win32_CONFIG_get_volume_information, "string -> string * string * SysWord.word * int") +CFUNC("dde_start_dialog", _ml_win32_DDE_start_dialog, "string * string -> word32") +CFUNC("dde_execute_string", _ml_win32_DDE_execute_string, "word32 * string * int * int -> unit") +CFUNC("dde_stop_dialog", _ml_win32_DDE_stop_dialog, "word32 -> unit") diff --git a/base/runtime/c-libs/win32/cfun-proto-list.h b/base/runtime/c-libs/win32/cfun-proto-list.h new file mode 100644 index 0000000..a7d5d07 --- /dev/null +++ b/base/runtime/c-libs/win32/cfun-proto-list.h @@ -0,0 +1,18 @@ +/* cfun-proto-list.h + * + * COPYRIGHT (c) 1996 Bell Laboratories, Lucent Technologies + */ + +#ifndef _CFUN_PROTO_LIST_ +#define _CFUN_PROTO_LIST_ + +#ifndef _C_LIBRARY_ +# include "c-library.h" +#endif + +/* the external definitions for the C functions */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_PROTO(NAME, FUNC, MLTYPE) +#include "cfun-list.h" +#undef CFUNC + +#endif /* !_CFUN_PROTO_LIST_ */ diff --git a/base/runtime/c-libs/win32/makefile.win32 b/base/runtime/c-libs/win32/makefile.win32 new file mode 100644 index 0000000..e8d264a --- /dev/null +++ b/base/runtime/c-libs/win32/makefile.win32 @@ -0,0 +1,70 @@ +# +# the makefile for the Win32 core library +# + +SHELL = + +INC_DIR = ..\..\include +CLIB_DIR = ..\ + +INCLUDES = /I$(INC_DIR) /I$(CLIB_DIR) /I..\..\objs + +MAKEFILE = makefile.win32 +MAKE = nmake /F$(MAKEFILE) +AR = lib +ARFLAGS = +RANLIB = lib + +LIBRARY = libwin32.lib + +VERSION = v-dummy + +OBJS = win32-lib.obj \ + win32-constants.obj \ + win32-errors.obj \ + win32-dde.obj \ + win32-reg.obj \ + win32-config.obj \ + name-val.obj + +$(LIBRARY) : $(VERSION) $(OBJS) + del /F /Q $(LIBRARY) + $(AR) $(ARFLAGS) /out:$(LIBRARY) $(OBJS) + $(RANLIB) /out:$(LIBRARY) + +$(VERSION) : + ($(MAKE) MAKE="$(MAKE)" clean) + echo "$(VERSION)" > $(VERSION) + +DEPENDENTS = $(CLIB_DIR)\ml-c.h cfun-proto-list.h cfun-list.h + +win32-lib.obj: win32-lib.c $(DEPENDENTS) \ + $(INC_DIR)\c-library.h $(INC_DIR)\ml-base.h + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c win32-lib.c + +win32-constants.obj: win32-constants.c $(DEPENDENTS) \ + $(INC_DIR)\ml-base.h $(INC_DIR)\ml-values.h $(INC_DIR)\ml-objects.h + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c win32-constants.c + +win32-errors.obj: win32-errors.c $(DEPENDENTS) \ + $(INC_DIR)\ml-base.h $(INC_DIR)\ml-values.h $(INC_DIR)\ml-objects.h + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c win32-errors.c + +win32-dde.obj: win32-dde.c $(DEPENDENTS) \ + $(INC_DIR)\ml-base.h $(INC_DIR)\ml-values.h $(INC_DIR)\ml-objects.h + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c win32-dde.c + +win32-reg.obj: win32-reg.c $(DEPENDENTS) \ + $(INC_DIR)\ml-base.h $(INC_DIR)\ml-values.h $(INC_DIR)\ml-objects.h + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c win32-reg.c + +win32-config.obj: win32-config.c $(DEPENDENTS) \ + $(INC_DIR)\ml-base.h $(INC_DIR)\ml-values.h $(INC_DIR)\ml-objects.h + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c win32-config.c + +name-val.obj: name-val.c name-val.h + $(CC) $(CFLAGS) /c name-val.c +clean: + del /F /Q v-* *.obj *.pdb $(LIBRARY) + + diff --git a/base/runtime/c-libs/win32/name-val.c b/base/runtime/c-libs/win32/name-val.c new file mode 100644 index 0000000..734ab42 --- /dev/null +++ b/base/runtime/c-libs/win32/name-val.c @@ -0,0 +1,32 @@ +/* name-val.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + * COPYRIGHT (c) 1996 Bell Laboratories, Lucent Technologies + * + * support for string-to-int lookup. + */ + +#include +#include +#include "name-val.h" + +static int cmp (const void *key, const void *item) +{ + return strcmp(((name_val_t*)key)->name, ((name_val_t*)item)->name); +} + +/* nv_lookup + * + * Given a string key, an array of name/value pairs and the size of the + * array, find element in the array with matching key and return a pointer + * to it. If not found, return NULL. We use binary search, so we assume + * the array is sorted. + */ +name_val_t *nv_lookup (char *key, name_val_t *array, int numelms) +{ + name_val_t k; + + k.name = key; + return ((name_val_t *)bsearch(&k,array,numelms,sizeof (name_val_t),cmp)); +} + diff --git a/base/runtime/c-libs/win32/name-val.h b/base/runtime/c-libs/win32/name-val.h new file mode 100644 index 0000000..a12bef5 --- /dev/null +++ b/base/runtime/c-libs/win32/name-val.h @@ -0,0 +1,24 @@ +/* name-val.h + * + * COPYRIGHT (c) 1996 Bell Laboratories, Lucent Technologies + * + * Header file for handling string-to-int lookup. + */ + +#ifndef _NAME_VAL_ +#define _NAME_VAL_ + +#include + +typedef DWORD data_t; + +typedef struct { + char* name; + data_t data; +} name_val_t; + +extern name_val_t *nv_lookup (char *, name_val_t *, int); + +#endif /* !_NAME_VAL__ */ + +/* end of name-val.h */ diff --git a/base/runtime/c-libs/win32/win32-config.c b/base/runtime/c-libs/win32/win32-config.c new file mode 100644 index 0000000..72be116 --- /dev/null +++ b/base/runtime/c-libs/win32/win32-config.c @@ -0,0 +1,155 @@ +/*! \file win32-config.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * interface to win32 system configuration information + */ + +#include + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" + +ml_val_t _ml_win32_CONFIG_get_version_ex (ml_state_t *msp, ml_val_t arg) +{ + OSVERSIONINFOEX versionInfo; + long result = 0; + int length = 0; + ml_val_t res, major, minor, build, platform, csd, vec; + + ZeroMemory(&versionInfo, sizeof(OSVERSIONINFOEX)); + versionInfo.dwOSVersionInfoSize = sizeof(OSVERSIONINFOEX); + + result = GetVersionEx((OSVERSIONINFO *)&versionInfo); + if (result == 0) { + return RAISE_SYSERR(msp,-1); + } + + major = WORD32_CtoML(msp, versionInfo.dwMajorVersion); + minor = WORD32_CtoML(msp, versionInfo.dwMinorVersion); + build = WORD32_CtoML(msp, versionInfo.dwBuildNumber); + platform = WORD32_CtoML(msp, versionInfo.dwPlatformId); + + length = strlen(versionInfo.szCSDVersion); + vec = ML_AllocRaw (msp, BYTES_TO_WORDS (length + 1)); + strcpy_s(PTR_MLtoC(void, vec), length+1, versionInfo.szCSDVersion); + SEQHDR_ALLOC (msp, csd, DESC_string, vec, length); + + REC_ALLOC5(msp, res, major, minor, build, platform, csd); + + return res; +} +/* +val getVolumeInformation : string -> { + volumeName : string, + systemName : string, + serialNumber : SysWord.word, + maximumComponentLength : int + } +*/ +ml_val_t _ml_win32_CONFIG_get_volume_information (ml_state_t *msp, ml_val_t arg) +{ + TCHAR szVolumeName[MAX_PATH+1]; + DWORD serialNumber; + DWORD maxComponentLength; + DWORD fileSystemFlags; + TCHAR szFilesystemName[MAX_PATH+1]; + Byte_t *subKey = STR_MLtoC(arg); + int length; + ml_val_t res, volume, system, serial, maxcomponent, vec1, vec2; + + if (!GetVolumeInformation( + subKey, szVolumeName, MAX_PATH+1, &serialNumber, &maxComponentLength, + &fileSystemFlags, szFilesystemName, MAX_PATH+1)) + { + return RAISE_SYSERR(msp, -1); + } + + SYSWORD_ALLOC(msp, serial, serialNumber); + maxcomponent = INT_CtoML(maxComponentLength); + + length = strlen(szVolumeName); + vec1 = ML_AllocRaw (msp, BYTES_TO_WORDS (length + 1)); + strcpy_s(PTR_MLtoC(void, vec1), length+1, szVolumeName); + SEQHDR_ALLOC (msp, volume, DESC_string, vec1, length); + + length = strlen(szFilesystemName); + vec2 = ML_AllocRaw (msp, BYTES_TO_WORDS (length + 1)); + strcpy_s(PTR_MLtoC(void, vec2), length+1, szFilesystemName); + SEQHDR_ALLOC (msp, system, DESC_string, vec2, length); + + REC_ALLOC4(msp, res, volume, system, serial, maxcomponent); + return res; +} + + +ml_val_t _ml_win32_CONFIG_get_windows_directory (ml_state_t *msp, ml_val_t arg) +{ + TCHAR directory[MAX_PATH+1]; + DWORD dwSize = MAX_PATH+1; + ml_val_t res, vec; + + if ((dwSize = GetWindowsDirectory(directory, dwSize)) == 0) { + return RAISE_SYSERR(msp,-1); + } + + vec = ML_AllocRaw (msp, BYTES_TO_WORDS (dwSize+1)); + strcpy_s(PTR_MLtoC(void, vec), dwSize+1, directory); + SEQHDR_ALLOC (msp, res, DESC_string, vec, dwSize); + + return res; +} + +ml_val_t _ml_win32_CONFIG_get_system_directory (ml_state_t *msp, ml_val_t arg) +{ + TCHAR directory[MAX_PATH+1]; + DWORD dwSize = MAX_PATH+1; + ml_val_t res, vec; + + if ((dwSize = GetSystemDirectory(directory, dwSize)) == 0) { + return RAISE_SYSERR(msp,-1); + } + + vec = ML_AllocRaw (msp, BYTES_TO_WORDS (dwSize+1)); + strcpy_s(PTR_MLtoC(void, vec), dwSize+1, directory); + SEQHDR_ALLOC (msp, res, DESC_string, vec, dwSize); + + return res; +} + +ml_val_t _ml_win32_CONFIG_get_computer_name(ml_state_t *msp, ml_val_t arg) +{ + TCHAR name[MAX_PATH+1]; + DWORD dwSize = MAX_PATH+1; + ml_val_t res, vec; + + if (!GetComputerName(name, &dwSize)) { + return RAISE_SYSERR(msp,-1); + } + + vec = ML_AllocRaw (msp, BYTES_TO_WORDS (dwSize+1)); + strcpy_s(PTR_MLtoC(void, vec), dwSize+1, name); + SEQHDR_ALLOC (msp, res, DESC_string, vec, dwSize); + + return res; +} + +ml_val_t _ml_win32_CONFIG_get_user_name(ml_state_t *msp, ml_val_t arg) +{ + TCHAR name[MAX_PATH+1]; + DWORD dwSize = MAX_PATH+1; + ml_val_t res, vec; + + if (!GetUserName(name, &dwSize)) { + return RAISE_SYSERR(msp,-1); + } + + vec = ML_AllocRaw (msp, BYTES_TO_WORDS (dwSize)); + strcpy_s(PTR_MLtoC(void, vec), dwSize, name); + SEQHDR_ALLOC (msp, res, DESC_string, vec, dwSize-1); + + return res; +} diff --git a/base/runtime/c-libs/win32/win32-constants.c b/base/runtime/c-libs/win32/win32-constants.c new file mode 100644 index 0000000..b28f139 --- /dev/null +++ b/base/runtime/c-libs/win32/win32-constants.c @@ -0,0 +1,142 @@ +/*! \file win32-constants.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * interface to win32 constants + */ + +#include +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" + +#include "name-val.h" + +#define TAB_SZ(t) ((sizeof t)/(sizeof(name_val_t))) + +typedef struct { + name_val_t *ptab; + int sz; +} tab_desc_t; + +/* general table */ +PVT name_val_t general_tab[] = { + {"INVALID_HANDLE_VALUE", (Word_t)INVALID_HANDLE_VALUE} +}; + +/* FILE_ table */ +PVT name_val_t file_tab[] = { + {"BEGIN", FILE_BEGIN}, + {"CURRENT", FILE_CURRENT}, + {"END", FILE_END}, +}; + +/* FILE_ATTRIBUTE_ table */ +PVT name_val_t file_attr_tab[] = { + {"ARCHIVE", FILE_ATTRIBUTE_ARCHIVE}, +/** future win32 use + {"ATOMIC_WRITE", FILE_ATTRIBUTE_ATOMIC_WRITE}, +**/ + {"DIRECTORY", FILE_ATTRIBUTE_DIRECTORY}, + {"HIDDEN", FILE_ATTRIBUTE_HIDDEN}, + {"NORMAL", FILE_ATTRIBUTE_NORMAL}, + {"READONLY", FILE_ATTRIBUTE_READONLY}, + {"SYSTEM", FILE_ATTRIBUTE_SYSTEM}, + {"TEMPORARY", FILE_ATTRIBUTE_TEMPORARY}, +/** future win32 use + {"XACTION_WRITE", FILE_ATTRIBUTE_XACTION_WRITE}, +**/ +}; + +/* FILE_FLAG__ table */ +PVT name_val_t file_flag_tab[] = { + {"BACKUP_SEMANTICS", FILE_FLAG_BACKUP_SEMANTICS}, + {"DELETE_ON_CLOSE", FILE_FLAG_DELETE_ON_CLOSE}, + {"NO_BUFFERING", FILE_FLAG_NO_BUFFERING}, + {"OVERLAPPED", FILE_FLAG_OVERLAPPED}, + {"POSIX_SEMANTICS", FILE_FLAG_POSIX_SEMANTICS}, + {"RANDOM_ACCESS", FILE_FLAG_RANDOM_ACCESS}, + {"SEQUENTIAL_SCAN", FILE_FLAG_SEQUENTIAL_SCAN}, + {"WRITE_THROUGH", FILE_FLAG_WRITE_THROUGH}, +}; + +/* FILE_MODE__ table */ +PVT name_val_t file_mode_tab[] = { + {"CREATE_ALWAYS", CREATE_ALWAYS}, + {"CREATE_NEW", CREATE_NEW}, + {"OPEN_ALWAYS", OPEN_ALWAYS}, + {"OPEN_EXISTING", OPEN_EXISTING}, + {"TRUNCATE_EXISTING", TRUNCATE_EXISTING}, +}; + +/* FILE_SHARE_ table */ +PVT name_val_t file_share_tab[] = { + {"READ", FILE_SHARE_READ}, + {"WRITE", FILE_SHARE_WRITE}, +}; + +/* GENERIC__ table */ +PVT name_val_t generic_tab[] = { + {"READ", GENERIC_READ}, + {"WRITE", GENERIC_WRITE}, +}; + +/* STD_HANDLE table */ +PVT name_val_t std_handle_tab[] = { + {"ERROR", STD_ERROR_HANDLE}, + {"INPUT", STD_INPUT_HANDLE}, + {"OUTPUT", STD_OUTPUT_HANDLE}, +}; + +/* every constant table must have an entry in the descriptor table */ +PVT tab_desc_t table[] = { + {file_tab, TAB_SZ(file_tab)}, + {file_attr_tab, TAB_SZ(file_attr_tab)}, + {file_flag_tab, TAB_SZ(file_flag_tab)}, + {file_mode_tab, TAB_SZ(file_mode_tab)}, + {file_share_tab, TAB_SZ(file_share_tab)}, + {general_tab, TAB_SZ(general_tab)}, + {generic_tab, TAB_SZ(generic_tab)}, + {std_handle_tab, TAB_SZ(std_handle_tab)}, +}; + +/* constant classes */ +PVT name_val_t class[] = { + {"FILE", 0}, + {"FILE_ATTRIBUTE", 1}, + {"FILE_FLAG", 2}, + {"FILE_MODE", 3}, + {"FILE_SHARE", 4}, + {"GENERAL", 5}, + {"GENERIC", 6}, + {"STD_HANDLE",7}, +}; +#define N_CLASSES TAB_SZ(class) + + +/* _ml_win32_get_const: (string * string) -> word32 + * lookup (class,constant) pair + */ +ml_val_t _ml_win32_get_const(ml_state_t *msp, ml_val_t arg) +{ + char *s1 = STR_MLtoC(REC_SEL(arg,0)); + char *s2 = STR_MLtoC(REC_SEL(arg,1)); + name_val_t *ptab, *res; + int index; + ml_val_t v; + + ptab = nv_lookup(s1, class, N_CLASSES); + if (ptab) { + index = ptab->data; + ASSERT(index < TAB_SZ(table)); + if (res = nv_lookup(s2, table[index].ptab, table[index].sz)) { + return WORD32_CtoML(msp, res->data); + } + return RAISE_ERROR(msp,"win32_cconst: unknown constant"); + } + return RAISE_ERROR(msp,"win32_cconst: unknown constant class"); +} + +/* end of win32-constants.c */ diff --git a/base/runtime/c-libs/win32/win32-dde.c b/base/runtime/c-libs/win32/win32-dde.c new file mode 100644 index 0000000..0c428b6 --- /dev/null +++ b/base/runtime/c-libs/win32/win32-dde.c @@ -0,0 +1,86 @@ +/*! \file win32-dde.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * interface to win32 dynamic data exchange. Note that HCONV is just an alias + * for HANDLE. + */ + +#include + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" + +HDDEDATA CALLBACK DdeCallback (UINT uType, UINT uFmt, HCONV hconv, HSZ hsz1, + HSZ hsz2, HDDEDATA hdata, ULONG_PTR dwData1, + ULONG_PTR dwData2) +{ + /* As a DDE client, we will only get notification messages that + ignore the return value anyway */ + return (HDDEDATA)0; +} + +static DWORD InstanceId = 0; + +void InitializeIfNeeded () +{ + if (InstanceId == 0) { + DdeInitialize(&InstanceId, DdeCallback, APPCMD_CLIENTONLY, 0); + } +} + +ml_val_t _ml_win32_DDE_start_dialog (ml_state_t *msp, ml_val_t arg) +{ + Byte_t* service = STR_MLtoC(REC_SEL(arg,0)); + Byte_t* topic = STR_MLtoC(REC_SEL(arg,1)); + HCONV conversation = NULL; + HSZ hszService, hszTopic; + ml_val_t res; + + InitializeIfNeeded(); + + hszService = DdeCreateStringHandle(InstanceId, service, CP_WINANSI); + hszTopic = DdeCreateStringHandle(InstanceId, topic, CP_WINANSI); + + conversation = DdeConnect(InstanceId, hszService, hszTopic, NULL); + + DdeFreeStringHandle(InstanceId, hszService); + DdeFreeStringHandle(InstanceId, hszTopic); + + return HANDLE_CtoML(msp, conversation); +} + +ml_val_t _ml_win32_DDE_execute_string (ml_state_t *msp, ml_val_t arg) +{ + HCONV conversation = (HCONV)HANDLE_MLtoC(REC_SEL(arg, 0)); + Byte_t* command = STR_MLtoC(REC_SEL(arg,1)); + Word_t retry = INT_MLtoC(REC_SEL(arg,2)); + Word_t delay = INT_MLtoC(REC_SEL(arg,3)); + DWORD dwResult = 0; + HDDEDATA retval = 0; + + do { + retval = DdeClientTransaction( + command, strlen(command)+1, conversation, 0, 0, + XTYP_EXECUTE, delay, &dwResult); + retry--; + } while (retval == 0 && DdeGetLastError(InstanceId) == DMLERR_BUSY && retry >= 0); + + if (!retval) { + return RAISE_SYSERR(msp,-1); + } + + return ML_unit; +} + +ml_val_t _ml_win32_DDE_stop_dialog (ml_state_t *msp, ml_val_t arg) +{ + HCONV conversation = (HCONV)HANDLE_MLtoC(REC_SEL(arg, 0)); + + DdeDisconnect(conversation); + + return ML_unit; +} diff --git a/base/runtime/c-libs/win32/win32-errors.c b/base/runtime/c-libs/win32/win32-errors.c new file mode 100644 index 0000000..86b2c84 --- /dev/null +++ b/base/runtime/c-libs/win32/win32-errors.c @@ -0,0 +1,25 @@ +/* win32-errors.c + * + * COPYRIGHT (c) 1996 Bell Laboratories, Lucent Technologies + * + * interface to win32 error functions + */ + +#include +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" + +/* _ml_win32_get_last_error: unit -> word + */ +ml_val_t _ml_win32_get_last_error(ml_state_t *msp, ml_val_t arg) +{ + Word_t err = (Word_t)GetLastError(); + ml_val_t res; + + return WORD32_CtoML(msp, err); +} + +/* end of win32-errors.c */ + diff --git a/base/runtime/c-libs/win32/win32-lib.c b/base/runtime/c-libs/win32/win32-lib.c new file mode 100644 index 0000000..283b692 --- /dev/null +++ b/base/runtime/c-libs/win32/win32-lib.c @@ -0,0 +1,28 @@ +/* win32-lib.c + * + * COPYRIGHT (c) 1996 Bell Laboratories, Lucent Technologies + */ + +#include "ml-base.h" +#include "c-library.h" +#include "cfun-proto-list.h" + + +/* the table of C functions and ML names */ +#define CFUNC(NAME, FUNC, MLTYPE) CFUNC_BIND(NAME, FUNC, MLTYPE) +PVT cfunc_binding_t CFunTable[] = { +#include "cfun-list.h" + CFUNC_NULL_BIND + }; +#undef CFUNC + + +/* the Signals library */ +c_library_t WIN32_Library = { + CLIB_NAME, + CLIB_VERSION, + CLIB_DATE, + NIL(clib_init_fn_t), + CFunTable + }; + diff --git a/base/runtime/c-libs/win32/win32-reg.c b/base/runtime/c-libs/win32/win32-reg.c new file mode 100644 index 0000000..8098119 --- /dev/null +++ b/base/runtime/c-libs/win32/win32-reg.c @@ -0,0 +1,396 @@ +/* win32-reg.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * interface to win32 registry functions; note that HKEY is just an alias + * for HANDLE + */ + +#include + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-c.h" + +ml_val_t _ml_win32_REG_open_key_ex(ml_state_t *msp, ml_val_t arg) +{ + HKEY key = (HKEY)HANDLE_MLtoC(REC_SEL(arg,0)); + Byte_t *subKey = STR_MLtoC(REC_SEL(arg,1)); + Word_t flags = REC_SELWORD(arg,2); + HKEY target = NULL; + + LONG result = RegOpenKeyEx(key, subKey, 0, flags, &target); + + if (result == ERROR_SUCCESS) { + ml_val_t res; + return HANDLE_CtoML(msp, target); + } + + return RAISE_SYSERR(msp,-1); +} + +ml_val_t _ml_win32_REG_create_key_ex(ml_state_t *msp, ml_val_t arg) +{ + HKEY key = (HKEY)HANDLE_MLtoC(REC_SEL(arg,0)); + Byte_t *subKey = STR_MLtoC(REC_SEL(arg,1)); + Word_t flags = REC_SELWORD(arg,2); + HKEY target = NULL; + DWORD dwDisposition = 0; + ml_val_t res; + + LONG result = RegCreateKeyEx(key, subKey, 0, NULL, 0, flags, NULL, &target, &dwDisposition); + + if (result == ERROR_SUCCESS) { + RegCloseKey(target); + target = NULL; + + /* Safe, as can only ever be 1 or 2 */ + return WORD32_CtoML(msp, dwDisposition); + } + + return RAISE_SYSERR(msp,-1); +} + +ml_val_t _ml_win32_REG_close_key_ex(ml_state_t *msp, ml_val_t arg) +{ + HKEY key = (HKEY)HANDLE_MLtoC(arg); + LONG result = RegCloseKey(key); + + if (result == ERROR_SUCCESS) { + return ML_unit; + } + + return RAISE_SYSERR(msp,-1); +} + +ml_val_t _ml_win32_REG_delete_key(ml_state_t *msp, ml_val_t arg) +{ + HKEY key = (HKEY)HANDLE_MLtoC(REC_SEL(arg,0)); + Byte_t *subKey = STR_MLtoC(REC_SEL(arg,1)); + LONG result = RegDeleteKey(key, subKey); + + if (result == ERROR_SUCCESS) { + return ML_unit; + } + + return RAISE_SYSERR(msp,-1); +} + +ml_val_t _ml_win32_REG_delete_value(ml_state_t *msp, ml_val_t arg) +{ + HKEY key = (HKEY)HANDLE_MLtoC(REC_SEL(arg,0)); + Byte_t *subKey = STR_MLtoC(REC_SEL(arg,1)); + LONG result = RegDeleteValue(key, subKey); + + if (result == ERROR_SUCCESS) { + return ML_unit; + } + + return RAISE_SYSERR(msp,-1); +} + +ml_val_t _ml_win32_REG_enum_key_ex(ml_state_t *msp, ml_val_t arg) +{ + HKEY key = (HKEY)HANDLE_MLtoC(REC_SEL(arg,0)); + Word_t index = INT_MLtoC(REC_SEL(arg,1)); + ml_val_t vec; + HKEY target = NULL; + LONG result = 0; + LONG characters = 256; /* 255 is the max key name size */ + + vec = ML_AllocRaw (msp, BYTES_TO_WORDS (characters)); + result = RegEnumKeyEx(key, index, PTR_MLtoC(void, vec), &characters, 0, NULL, NULL, NULL); + + /* return string option */ + if (result == ERROR_SUCCESS) { + ml_val_t obj, res; + + /* allocate string header */ + SEQHDR_ALLOC (msp, obj, DESC_string, vec, characters); + /* put together the option string */ + OPTION_SOME(msp, res, obj); + return res; + } + + /* return NONE */ + if (result == ERROR_NO_MORE_ITEMS) { + return OPTION_NONE; + } + + return RAISE_SYSERR(msp,-1); +} + +ml_val_t _ml_win32_REG_enum_value_ex(ml_state_t *msp, ml_val_t arg) +{ + HKEY key = (HKEY)HANDLE_MLtoC(REC_SEL(arg,0)); + Word_t index = INT_MLtoC(REC_SEL(arg,1)); + ml_val_t vec; + HKEY target = NULL; + LONG result = 0; + DWORD nameLen = 0; + + result = RegQueryInfoKey(key, NULL, NULL, 0, NULL, NULL, NULL, NULL, &nameLen, NULL, NULL, NULL); + if (result != ERROR_SUCCESS) { + return RAISE_SYSERR(msp,-1); + } + + nameLen += 1; + vec = ML_AllocRaw (msp, BYTES_TO_WORDS (nameLen)); + result = RegEnumValue(key, index, PTR_MLtoC(void, vec), &nameLen, 0, NULL, NULL, NULL); + + /* return string option */ + if (result == ERROR_SUCCESS) { + ml_val_t obj, res; + + /* allocate string header */ + SEQHDR_ALLOC (msp, obj, DESC_string, vec, nameLen); + /* put together the option string */ + OPTION_SOME(msp, res, obj); + return res; + } + + /* return NONE */ + if (result == ERROR_NO_MORE_ITEMS) { + return OPTION_NONE; + } + + return RAISE_SYSERR(msp,-1); +} + +/* + * This is a helper method to get the underlying type stored in the + * registry value, so that the calling ML code can type select to the + * appropriately-typed getter. Note that we have to go to these + * lengths because it's not currently straightforward to store + * values into datatypes. + */ +ml_val_t _ml_win32_REG_query_value_type(ml_state_t *msp, ml_val_t arg) +{ + HKEY key = (HKEY)HANDLE_MLtoC(REC_SEL(arg,0)); + Byte_t *valueName = STR_MLtoC(REC_SEL(arg,1)); + LONG result = 0; + DWORD dwType = 0; + ml_val_t res; + + result = RegQueryValueEx(key, valueName, 0, &dwType, NULL, NULL); + if (result != ERROR_SUCCESS) { + return RAISE_SYSERR(msp,-1); + } + + return WORD32_CtoML(msp, dwType); +} + +ml_val_t _ml_win32_REG_QueryString(ml_state_t *msp, ml_val_t arg) +{ + HKEY key = (HKEY)HANDLE_MLtoC(REC_SEL(arg,0)); + Byte_t *valueName = STR_MLtoC(REC_SEL(arg,1)); + LONG result = 0; + DWORD dwSize = 0; + ml_val_t res, vec; + + result = RegQueryValueEx(key, valueName, 0, NULL, NULL, &dwSize); + if (result != ERROR_SUCCESS) { + return RAISE_SYSERR(msp,-1); + } + + vec = ML_AllocRaw (msp, BYTES_TO_WORDS (dwSize)); + result = RegQueryValueEx(key, valueName, 0, NULL, PTR_MLtoC(void, vec), &dwSize); + if (result != ERROR_SUCCESS) { + return RAISE_SYSERR(msp,-1); + } + + /* allocate string header */ + /* note that we subtract one, as it comes back with a trailing null included in the count */ + SEQHDR_ALLOC (msp, res, DESC_string, vec, dwSize-1); + return res; +} + +ml_val_t _ml_win32_REG_query_value_string(ml_state_t *msp, ml_val_t arg) +{ + return _ml_win32_REG_QueryString(msp, arg); +} + +ml_val_t _ml_win32_REG_query_value_multi_string(ml_state_t *msp, ml_val_t arg) +{ + HKEY key = (HKEY)HANDLE_MLtoC(REC_SEL(arg,0)); + Byte_t *valueName = STR_MLtoC(REC_SEL(arg,1)); + LONG result = 0; + DWORD dwSize = 0; + ml_val_t res, vec, str, tail; + char *concatenated = NULL; + char *ptr = NULL; + int nextToCopy = 0; + + result = RegQueryValueEx(key, valueName, 0, NULL, NULL, &dwSize); + if (result != ERROR_SUCCESS) { + return RAISE_SYSERR(msp,-1); + } + + concatenated = (char *)MALLOC(dwSize); + result = RegQueryValueEx(key, valueName, 0, NULL, concatenated , &dwSize); + if (result != ERROR_SUCCESS) { + return RAISE_SYSERR(msp,-1); + } + + + res = LIST_nil; + ptr = concatenated; + while (dwSize > 0) { + nextToCopy = strlen(ptr); + vec = ML_AllocRaw (msp, BYTES_TO_WORDS (nextToCopy+1)); + strcpy_s((PTR_MLtoC(char, vec)), nextToCopy+1, ptr); + SEQHDR_ALLOC (msp, str, DESC_string, vec, nextToCopy); + ptr += strlen(ptr)+1; + + tail = res; + LIST_cons(msp, res, str, tail); + dwSize -= (nextToCopy + 1); + } + + FREE(concatenated); + return res; +} + +ml_val_t _ml_win32_REG_query_value_expand_string(ml_state_t *msp, ml_val_t arg) +{ + return _ml_win32_REG_QueryString(msp, arg); +} + +ml_val_t _ml_win32_REG_query_value_dword(ml_state_t *msp, ml_val_t arg) +{ + HKEY key = (HKEY)HANDLE_MLtoC(REC_SEL(arg,0)); + Byte_t *valueName = STR_MLtoC(REC_SEL(arg,1)); + LONG result = 0; + DWORD dwValue = 0; + DWORD dwSize = sizeof(DWORD); + ml_val_t res, vec; + + result = RegQueryValueEx(key, valueName, 0, NULL, (LPBYTE)&dwValue, &dwSize); + if (result != ERROR_SUCCESS) { + return RAISE_SYSERR(msp,-1); + } + + return WORD32_CtoML(msp, dwValue); +} + +ml_val_t _ml_win32_REG_query_value_binary(ml_state_t *msp, ml_val_t arg) +{ + HKEY key = (HKEY)HANDLE_MLtoC(REC_SEL(arg,0)); + Byte_t *valueName = STR_MLtoC(REC_SEL(arg,1)); + void *pData = NULL; + LONG result = 0; + DWORD dwSize = 0; + ml_val_t res, vec, zero; + + result = RegQueryValueEx(key, valueName, 0, NULL, NULL, &dwSize); + if (result != ERROR_SUCCESS) { + return RAISE_SYSERR(msp,-1); + } + + vec = ML_AllocBytearray (msp, dwSize); + pData = GET_SEQ_DATAPTR(void, vec); + result = RegQueryValueEx(key, valueName, 0, NULL, pData, &dwSize); + if (result != ERROR_SUCCESS) { + return RAISE_SYSERR(msp,-1); + } + + return vec; +} + +ml_val_t _ml_win32_REG_set_value_dword(ml_state_t *msp, ml_val_t arg) +{ + HKEY key = (HKEY)HANDLE_MLtoC(REC_SEL(arg,0)); + Byte_t *valueName = STR_MLtoC(REC_SEL(arg,1)); + DWORD dwValue = REC_SELWORD(arg,2); + LONG result = 0; + DWORD dwSize = sizeof(DWORD); + + result = RegSetValueEx(key, valueName, 0, REG_DWORD, (const BYTE *)&dwValue, dwSize); + if (result != ERROR_SUCCESS) { + return RAISE_SYSERR(msp,-1); + } + + return ML_unit; +} + +ml_val_t _ml_win32_REG_SetStringValue(ml_state_t *msp, ml_val_t arg, DWORD dwStringType) +{ + HKEY key = (HKEY)HANDLE_MLtoC(REC_SEL(arg,0)); + Byte_t *valueName = STR_MLtoC(REC_SEL(arg,1)); + Byte_t *value = STR_MLtoC(REC_SEL(arg,2)); + LONG result = 0; + DWORD dwSize = strlen(value)+1; + + result = RegSetValueEx(key, valueName, 0, dwStringType, (const BYTE *)value, dwSize); + if (result != ERROR_SUCCESS) { + return RAISE_SYSERR(msp,-1); + } + + return ML_unit; +} + +ml_val_t _ml_win32_REG_set_value_string(ml_state_t *msp, ml_val_t arg) +{ + return _ml_win32_REG_SetStringValue(msp, arg, REG_SZ); +} + +ml_val_t _ml_win32_REG_set_value_expand_string(ml_state_t *msp, ml_val_t arg) +{ + return _ml_win32_REG_SetStringValue(msp, arg, REG_EXPAND_SZ); +} + +ml_val_t _ml_win32_REG_set_value_multi_string(ml_state_t *msp, ml_val_t arg) +{ + HKEY key = (HKEY)HANDLE_MLtoC(REC_SEL(arg,0)); + Byte_t *valueName = STR_MLtoC(REC_SEL(arg,1)); + ml_val_t stringList = REC_SEL(arg,2); + LONG result = 0; + DWORD dwSize = 0; + ml_val_t iter = stringList; + char *concatenated = NULL; + char *ptr = NULL; + + while (iter != LIST_nil) { + /* need to add one to each for room for extra NULLs */ + dwSize += strlen(STR_MLtoC(LIST_hd(iter))) + 1; + iter = LIST_tl(iter); + } + + /* extra second NULL terimator at end */ + concatenated = (char *)MALLOC((dwSize+1) * sizeof(char)); + ptr = concatenated; + iter = stringList; + while (iter != LIST_nil) { + strcpy_s(ptr, dwSize+1, STR_MLtoC(LIST_hd(iter))); + ptr += strlen(ptr)+1; + iter = LIST_tl(iter); + } + (*ptr) = '\0'; + + result = RegSetValueEx(key, valueName, 0, REG_MULTI_SZ, (const BYTE *)concatenated, dwSize); + FREE(concatenated); + if (result != ERROR_SUCCESS) { + return RAISE_SYSERR(msp,-1); + } + + return ML_unit; +} + +ml_val_t _ml_win32_REG_set_value_binary(ml_state_t *msp, ml_val_t arg) +{ + HKEY key = (HKEY)HANDLE_MLtoC(REC_SEL(arg,0)); + Byte_t *valueName = STR_MLtoC(REC_SEL(arg,1)); + Byte_t *dwValue = GET_SEQ_DATAPTR(Byte_t, REC_SEL(arg,2)); + DWORD dwSize = GET_SEQ_LEN(REC_SEL(arg,2)); + + LONG result = 0; + + result = RegSetValueEx(key, valueName, 0, REG_BINARY, (const BYTE *)dwValue, dwSize); + if (result != ERROR_SUCCESS) { + return RAISE_SYSERR(msp,-1); + } + + return ML_unit; +} diff --git a/base/runtime/config/gen-common.c b/base/runtime/config/gen-common.c new file mode 100644 index 0000000..72c3b78 --- /dev/null +++ b/base/runtime/config/gen-common.c @@ -0,0 +1,58 @@ +/* gen-common.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Common code for generating header files. + */ + +#include +#include +#include "gen.h" + +/* OpenFile: + * + * Open a generated file, and generate its header comment. + */ +FILE *OpenFile (char *fname, char *flag) +{ + FILE *f; + + if ((f = fopen(fname, "w")) == NULL) { + fprintf (stderr, "unable to open file \"%s\"\n", fname); + exit (1); + } + + fprintf (f, "/* %s\n", fname); + fprintf (f, " *\n"); + fprintf (f, " * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org)\n"); + fprintf (f, " * All rights reserved.\n"); + fprintf (f, " *\n"); + fprintf (f, " * NOTE: this file is generated --- do not edit!!!\n"); + fprintf (f, " */\n"); + fprintf (f, "\n"); + if (flag != (char *)0) { + fprintf (f, "#ifndef %s\n", flag); + fprintf (f, "#define %s\n", flag); + fprintf (f, "\n"); + } + + return f; + +} /* end of OpenFile */ + + +/* CloseFile: + * + * Generate the file trailer, and close the generated file. + */ +void CloseFile (FILE *f, char *flag) +{ + if (flag != (char *)0) { + fprintf (f, "\n"); + fprintf (f, "#endif /* !%s */\n", flag); + } + + fclose (f); + +} /* CloseFile */ diff --git a/base/runtime/config/gen-offsets.c b/base/runtime/config/gen-offsets.c new file mode 100644 index 0000000..172d34a --- /dev/null +++ b/base/runtime/config/gen-offsets.c @@ -0,0 +1,72 @@ +/*! \file gen-offsets.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This C program generates a header file for the *.prim.asm files, + * which gives the offset values in the VProc and ML state vectors. + * + * Note that we only generate offsets for three miscregs; this is because + * only the first three miscregs are marked as callee-save by the compiler + * and are thus live when saveregs is called. See compiler/CodeGen/main/machspec.sig + * and compiler/CodeGen/cpscompile/invokegc.sml. + */ + +#include "ml-base.h" +#include "vproc-state.h" +#include "ml-state.h" +#include "gen.h" + +#define MOFFSET(fld) (((Addr_t)&(M.s.fld)) - (Addr_t)&(M.b[0])) +#define VOFFSET(fld) (((Addr_t)&(V.s.fld)) - (Addr_t)&(V.b[0])) + +#define PVOFFSET(sym, fld) \ + fprintf(f, "#define %sOffVSP %ld\n", (sym), (long int) VOFFSET(fld)) +#define PMOFFSET(sym, fld) \ + fprintf(f, "#define %sOffMSP %ld\n", (sym), (long int) MOFFSET(fld)) + + +int main (void) +{ + union { + vproc_state_t s; + char b[sizeof(vproc_state_t)]; + } V; + union { + ml_state_t s; + char b[sizeof(ml_state_t)]; + } M; + FILE *f; + + f = OpenFile ("mlstate-offsets.h", "_MLSTATE_OFFSETS_"); + + PMOFFSET("VProc", ml_vproc); + PMOFFSET("AllocPtr", ml_allocPtr); + PMOFFSET("LimitPtr", ml_limitPtr); + PMOFFSET("StorePtr", ml_storePtr); + PMOFFSET("StdArg", ml_arg); + PMOFFSET("StdCont", ml_cont); + PMOFFSET("StdClos", ml_closure); + PMOFFSET("LinkReg", ml_linkReg); + PMOFFSET("PC", ml_pc); + PMOFFSET("ExnPtr", ml_exnCont); + PMOFFSET("VarPtr", ml_varReg); + PMOFFSET("Misc0", ml_calleeSave[0]); + PMOFFSET("Misc1", ml_calleeSave[1]); + PMOFFSET("Misc2", ml_calleeSave[2]); +#ifdef SOFT_POLL + PMOFFSET("RealLimit", ml_realLimit); + PMOFFSET("PollPending", ml_pollPending); + PMOFFSET("InPollHandler", ml_inPollHandler); +#endif + PVOFFSET("InML", vp_inMLFlag); + PVOFFSET("HandlerPending", vp_handlerPending); + PVOFFSET("InSigHandler", vp_inSigHandler); + PVOFFSET("SigsRecv", vp_totalSigCount.nReceived); + PVOFFSET("SigsHandled", vp_totalSigCount.nHandled); + PVOFFSET("LimitPtrMask", vp_limitPtrMask); + + CloseFile (f, "_MLSTATE_OFFSETS_"); + + exit (0); +} diff --git a/base/runtime/config/gen-posix-names.sh b/base/runtime/config/gen-posix-names.sh new file mode 100755 index 0000000..50e38f2 --- /dev/null +++ b/base/runtime/config/gen-posix-names.sh @@ -0,0 +1,145 @@ +#!/bin/sh +# +# COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) +# All rights reserved. +# +# Generate string-to-int tables for run-time POSIX values +# queried using sysconf and (f)pathconf. +# +# Usage: gen-posix-names.sh +# + +# redefine PATH so that we get the right versions of the various tools +# +PATH=/bin:/usr/bin + +# set locale variables so that sort works right +# +export LC_CTYPE LC_COLLATE +LC_CTYPE=C +LC_COLLATE=C + +CPP=${CPP:-/lib/cpp} + +PFIX=$1 # prefix: _SC_ or _PC_ +OUTF=$2 # name of output file + +INCLFILE="none" +USED_ENUMS="FALSE" + +# target specific workarounds +# +case "$VERSION" in + # linux uses enums for the _SC_ constants, so we cannot use the #ifdef check to avoid symbols + # that are not really defined in unistd.h. + *linux*) + USED_ENUMS="TRUE" + INCLFILE=tmp$$.h + SRCFILE=tmp$$.c + echo "#include " > $SRCFILE + $CPP $SRCFILE > $INCLFILE + rm -f $SRCFILE + ;; + # newer versions of the Mac OS X developer tools keep the include files inside the Xcode + # application bundle, so we add that as a possible path. + *x86-darwin*) + if test -r /usr/include/unistd.h ; then + INCLFILE=/usr/include/unistd.h + else + # some versions of the Mac OS X developer tools keep the include files inside the Xcode + # application bundle, so we add that as a possible path. + case `uname -r` in + 13.*) SDK=MacOSX10.9.sdk ;; + 14.*) SDK=MacOSX10.10.sdk ;; + 15.*) SDK=MacOSX10.11.sdk ;; + 16.*) SDK=MacOSX10.12.sdk ;; + 17.*) SDK=MacOSX10.13.sdk ;; + # Note that for Mojave (macOS 10.14; Darwin 18.x), we use the High Sierra SDK, since + # building 32-bit apps is no longer supported. + 18.*) SDK=MacOSX10.13.sdk ;; + 19.*) + echo "macOS 10.15 Catalina does not support 32-bit executables" + exit 1 + ;; + *) SDK=none ;; + esac + if test x$SDK != xnone ; then + # note: at some point, we might use "xcrun --show-sdk-path", but that only works + # with Xcode 5.x+ + XCODE_DEV_PATH=`xcode-select --print-path` + if [ x"$XCODE_DEV_PATH" = x/Library/Developer/CommandLineTools ] ; then + XCODE_SDK_PATH="$XCODE_DEV_PATH"/SDKs + else + XCODE_SDK_PATH=`xcode-select -p`/Platforms/MacOSX.platform/Developer/SDKs + fi + INCLFILE=$XCODE_SDK_PATH/$SDK/usr/include/unistd.h + # verify that unistd.h exists at the expected place + # + if test ! -r $INCLFILE ; then + echo "gen-posix-names.sh: unable to find " + exit 1 + fi + fi + fi + ;; + *amd64-darwin) + XCODE_SDK_PATH=`xcrun --show-sdk-path` + INCLFILE=$XCODE_SDK_PATH/usr/include/unistd.h + # verify that unistd.h exists at the expected place + # + if test ! -r $INCLFILE ; then + echo "gen-posix-names.sh: unable to find " + exit 1 + fi + ;; + *) ;; +esac + +if test x$INCLFILE = xnone ; then +# search the possible include files looking for a source +# of the constants. + FILES="\ + /usr/include/unistd.h \ + /usr/include/sys/unistd.h \ + /usr/include/bsd/unistd.h \ + /usr/include/confname.h \ + " + for f in $FILES ; do + if test -r $f ; then + grep $PFIX $f > /dev/null + if test $? -eq 0 ; then + INCLFILE=$f + break; + fi + fi + done + if test x$INCLFILE = xnone ; then + echo "gen-posix-names.sh: unable to find " + exit 1 + fi +fi + +echo "/* $OUTF" >> $OUTF +echo " *" >> $OUTF +echo " * This file is generated by gen-posix-names.sh from $INCLFILE" >> $OUTF +echo " */" >> $OUTF + +if [ "$USED_ENUMS" = "TRUE" ]; then + for i in `sed -n "s/.*$PFIX\([0-9A-Z_]*\).*/\1/p" $INCLFILE | sort -u` + do + echo " {\"$i\", $PFIX$i}," >> $OUTF + done +else + for i in `sed -n "s/.*$PFIX\([0-9A-Z_]*\).*/\1/p" $INCLFILE | sort -u` + do + echo "#ifdef $PFIX$i" >> $OUTF + echo " {\"$i\", $PFIX$i}," >> $OUTF + echo "#endif" >> $OUTF + done +fi + +if [ "$USED_ENUMS" = "TRUE" ]; then + rm -f $INCLFILE +fi + +exit 0 diff --git a/base/runtime/config/gen-regmask.c b/base/runtime/config/gen-regmask.c new file mode 100644 index 0000000..a6d400e --- /dev/null +++ b/base/runtime/config/gen-regmask.c @@ -0,0 +1,78 @@ +/* gen-regmask.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This file generates default definitions of some compiler flags and + * various register masks. The masks define the registers that are + * live in the following situations: + * + * FUN_MASK -- polymorphic (wrapped) function entry. + * + * RET_MASK -- return continuation mask + * + * CONT_MASK -- wrapped callcc continuation entry. + * + * EXN_MASK -- exception handler entry + * + * The defined constants are: + * + * CALLEESAVE + * FLOATCALLEESAVE + */ + +#include "gen.h" + +#ifndef DST_FILE +#define DST_FILE "reg-mask.h" +#endif + +#ifndef CALLEESAVE +# define GEN_CALLEESAVE +# define CALLEESAVE 3 +#endif +#ifndef FLOAT_CALLEESAVE +# define GEN_FLOAT_CALLEESAVE +# define FLOAT_CALLEESAVE 0 +#endif + +#if (CALLEESAVE > 0) +# define FUN_MASK ((1 << (CALLEESAVE + 4)) - 1) +# define RET_MASK ((1 << (CALLEESAVE + 4)) - 0x10 + 0xc) +# define CONT_MASK FUN_MASK +# define EXN_MASK FUN_MASK +#else +# define FUN_MASK ((1 << (CALLEESAVE + 4)) - 1) +# define RET_MASK (0xd) +# define CONT_MASK FUN_MASK +# define EXN_MASK CONT_MASK +#endif + +main () +{ + FILE *f; + + f = OpenFile (DST_FILE, "_REG_MASK_"); + + fprintf (f, "\n"); +#ifdef GEN_CALLEESAVE + fprintf (f, "#define CALLEESAVE %d\n", CALLEESAVE); +#endif +#ifdef GEN_FLOAT_CALLEESAVE + fprintf (f, "#define FLOAT_CALLEESAVE %d\n", FLOAT_CALLEESAVE); +#endif + fprintf (f, "\n"); + fprintf (f, "#define FUN_MASK\t\t%d\t/*\t%#010x\t*/\n", + FUN_MASK, FUN_MASK); + fprintf (f, "#define RET_MASK\t\t%d\t/*\t%#010x\t*/\n", + RET_MASK, RET_MASK); + fprintf (f, "#define CONT_MASK\t\t%d\t/*\t%#010x\t*/\n", + CONT_MASK, CONT_MASK); + fprintf (f, "#define EXN_MASK\t\t%d\t/*\t%#010x\t*/\n", + EXN_MASK, EXN_MASK); + fprintf (f, "\n"); + CloseFile (f, "_REG_MASK_"); + + exit (0); + +} diff --git a/base/runtime/config/gen-sizes.c b/base/runtime/config/gen-sizes.c new file mode 100644 index 0000000..2b2c432 --- /dev/null +++ b/base/runtime/config/gen-sizes.c @@ -0,0 +1,128 @@ +/*! \file gen-sizes.c + * + * \author John Reppy + * + * This program generates the "ml-sizes.h" header file; this file is + * usable in both C and assembly files. + */ + +/* + * COPYRIGHT (c) 2017 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include +#include +#include "gen.h" + +#define NIL(ty) ((ty)0) + +#if SIZE_64 +# define WORD_SZB 8 +#else +# define WORD_SZB 4 +#endif +# define ADDR_SZB sizeof(char *) + +static union { + char bytes[sizeof(unsigned long)]; + unsigned long l; +} U; + +int ilog2 (int x) +{ + int i, j; + + for (i = 1, j = 2; j <= x; i++, j += j) + continue; + + return i-1; + +} /* end of ilog2 */ + +int main (void) +{ + char *i16, *i32, *i64; + FILE *f; + + i16 = i32 = i64 = NIL(char *); + + if (sizeof(short) == 2) { + i16 = "short"; + } + if (sizeof(int) == 4) { + i32 = "int"; + } else if (sizeof(long) == 4) { + i32 = "long"; + } + if (sizeof(long) == 8) { + i64 = "long"; + } else if (sizeof(long long) == 8) { + i64 = "long long"; + } + + if (i16 == NIL(char *)) { + fprintf(stderr, "gen-sizes: Error -- no 16-bit integer type\n"); + exit (1); + } + if (i32 == NIL(char *)) { + fprintf(stderr, "gen-sizes: Error -- no 32-bit integer type\n"); + exit (1); + } + if (i64 == NIL(char *)) { + fprintf(stderr, "gen-sizes: Error -- no 64-bit integer type\n"); + exit (1); + } + + f = OpenFile ("ml-sizes.h", "_ML_SIZES_"); + + fprintf (f, "#define WORD_SZB %d\n", (int)WORD_SZB); + fprintf (f, "#define ADDR_SZB %d\n", (int)ADDR_SZB); + fprintf (f, "#define REALD_SZB %d\n", (int)sizeof(double)); + fprintf (f, "#define BITS_PER_WORD %d\n", 8*WORD_SZB); + fprintf (f, "#define LOG_BITS_PER_WORD %d\n", ilog2(8*WORD_SZB)); + fprintf (f, "#define LOG_BYTES_PER_WORD %d\n", ilog2(WORD_SZB)); + fprintf (f, "\n"); + + U.bytes[0] = 0x01; + U.bytes[sizeof(unsigned long)-1] = 0x02; + switch (U.l & 0xFF) { + case 0x01: + fprintf(f, "#define BYTE_ORDER_LITTLE\n"); + break; + case 0x02: + fprintf(f, "#define BYTE_ORDER_BIG\n"); + break; + default: + fprintf(stderr, "gen-sizes: Error -- unable to determine endianess\n"); + exit(1); + } /* end of switch */ + fprintf (f, "\n"); + + /* the C part */ + fprintf (f, "#ifndef _ASM_\n"); + + fprintf (f, "typedef %s Int16_t;\n", i16); + fprintf (f, "typedef unsigned %s Unsigned16_t;\n", i16); + fprintf (f, "typedef %s Int32_t;\n", i32); + fprintf (f, "typedef unsigned %s Unsigned32_t;\n", i32); + fprintf (f, "typedef %s Int64_t;\n", i64); + fprintf (f, "typedef unsigned %s Unsigned64_t;\n", i64); + fprintf (f, "\n"); + fprintf (f, "typedef unsigned char Byte_t;\n"); +#if SIZE_64 + fprintf (f, "typedef Unsigned64_t Word_t;\n"); + fprintf (f, "typedef Int64_t Int_t;\n"); + fprintf (f, "typedef Unsigned64_t Addr_t;\n"); +#else /* SIZE_32 */ + fprintf (f, "typedef Unsigned32_t Word_t;\n"); + fprintf (f, "typedef Int32_t Int_t;\n"); + fprintf (f, "typedef Unsigned32_t Addr_t;\n"); +#endif + + fprintf (f, "#endif\n"); + + CloseFile (f, "_ML_SIZES_"); + + exit (0); +} diff --git a/base/runtime/config/gen-unix-signals.c b/base/runtime/config/gen-unix-signals.c new file mode 100644 index 0000000..de66dc4 --- /dev/null +++ b/base/runtime/config/gen-unix-signals.c @@ -0,0 +1,55 @@ +/* gen-unix-signals.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Generate the "system-signals.h" file for UNIX systems. + */ + +#include +#include +#include +#include "gen.h" +#include "gen-unix-signals.h" + +#ifndef DST_FILE +#define DST_FILE "system-signals.h" +#endif + + +int main (void) +{ + sig_info_t *sigInfo; + int i; + int numSigs; + FILE *f; + + sigInfo = SortSignalTbl (); + + f = OpenFile (DST_FILE, "_SYSTEM_SIGNALS_"); + + numSigs = sigInfo->numSysSigs + sigInfo->numRunSigs; + + fprintf (f, "#define NUM_SYSTEM_SIGS %2d\n", sigInfo->numSysSigs); + fprintf (f, "#define MIN_SYSTEM_SIG %2d /* %s */\n", + sigInfo->minSysSig, sigInfo->sigs[0]->sigName); + fprintf (f, "#define MAX_SYSTEM_SIG %2d /* %s */\n", + sigInfo->maxSysSig, sigInfo->sigs[sigInfo->numSysSigs-1]->sigName); + fprintf (f, "#define NUM_SIGS %2d\n", numSigs); + fprintf (f, "#define SIGMAP_SZ %2d\n", + sigInfo->maxSysSig + sigInfo->numRunSigs + 1); + fprintf (f, "\n"); + for (i = sigInfo->numSysSigs; i < numSigs; i++) { + fprintf(f, "#define %s %2d\n", + sigInfo->sigs[i]->sigName, sigInfo->sigs[i]->sig); + } + fprintf (f, "\n"); + + fprintf (f, "#define IS_SYSTEM_SIG(S) ((S) <= MAX_SYSTEM_SIG)\n"); + + CloseFile (f, "_SYSTEM_SIGNALS_"); + + exit (0); + +} /* end of main */ + diff --git a/base/runtime/config/gen-unix-signals.h b/base/runtime/config/gen-unix-signals.h new file mode 100644 index 0000000..289d8ad --- /dev/null +++ b/base/runtime/config/gen-unix-signals.h @@ -0,0 +1,23 @@ +/* gen-unix-signals.h + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +typedef struct { + int sig; /* the UNIX signal code */ + char *sigName; /* the symbolic name of the signal (i.e., */ + /* the #define name). */ + char *shortName; /* the short name of the signal passed to ML */ +} sig_desc_t; + +typedef struct { + sig_desc_t **sigs; /* an ordered vector of signal descriptions */ + int numSysSigs; /* the number of system signals */ + int numRunSigs; /* the number of runtime signals */ + int minSysSig; /* the minimum system signal number. */ + int maxSysSig; /* the maximum system signal number. */ +} sig_info_t; + +extern sig_info_t *SortSignalTbl (); + diff --git a/base/runtime/config/gen-unix-sigtbl.c b/base/runtime/config/gen-unix-sigtbl.c new file mode 100644 index 0000000..78cafa3 --- /dev/null +++ b/base/runtime/config/gen-unix-sigtbl.c @@ -0,0 +1,53 @@ +/* gen-unix-sigtbl.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Generate the "unix-sigtbl.c" file. + */ + +#include +#include +#include +#include "gen.h" +#include "gen-unix-signals.h" + +#ifndef DST_FILE +#define DST_FILE "unix-sigtbl.c" +#endif + +int main (void) +{ + sig_info_t *sigInfo; + int i; + int numSigs; + FILE *f; + + sigInfo = SortSignalTbl (); + numSigs = sigInfo->numSysSigs + sigInfo->numRunSigs; + + f = OpenFile (DST_FILE, NIL(char *)); + + fprintf (f, "\n"); + fprintf (f, "PVT sys_const_t SigInfo[NUM_SIGS] = {\n"); + for (i = 0; i < sigInfo->numSysSigs; i++) { + fprintf(f, " { %s, \"%s\" },\n", + sigInfo->sigs[i]->sigName, sigInfo->sigs[i]->shortName); + } + fprintf (f, " /* Run-time signals */\n"); + for (i = sigInfo->numSysSigs; i < numSigs; i++) { + fprintf(f, " { %s, \"%s\" },\n", + sigInfo->sigs[i]->sigName, sigInfo->sigs[i]->shortName); + } + fprintf (f, "};\n"); + fprintf (f, "PVT sysconst_tbl_t SigTbl = {\n"); + fprintf (f, " /* numConsts */ NUM_SIGS,\n"); + fprintf (f, " /* consts */ SigInfo\n"); + fprintf (f, "};\n"); + + CloseFile (f, NIL(char *)); + + exit (0); + +} /* end of main */ + diff --git a/base/runtime/config/gen-win32-signals.c b/base/runtime/config/gen-win32-signals.c new file mode 100644 index 0000000..307256d --- /dev/null +++ b/base/runtime/config/gen-win32-signals.c @@ -0,0 +1,50 @@ +/* gen-win32-signals.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Generate the "system-signals.h" file for Win32 systems. + * signals aren't currently implemented (since Win32 doesn't have signals) + */ + +#include +#include +#include "gen.h" +#include "win32-sigtab.h" + +#ifndef DST_FILE +#define DST_FILE "system-signals.h" +#endif + +main () +{ + FILE *f; + int numSigs = 1; + int i; + + f = OpenFile (DST_FILE, "_SYSTEM_SIGNALS_"); + + fprintf (f, "#define NUM_SYSTEM_SIGS %2d\n", 0); + fprintf (f, "#define MIN_SYSTEM_SIG %2d /* %s */\n", + 0, "none"); + fprintf (f, "#define MAX_SYSTEM_SIG %2d /* %s */\n", + 0, "none"); + fprintf (f, "#define NUM_SIGS %2d\n", NUM_SIGS); + fprintf (f, "#define MAX_SIG %2d\n", NUM_SIGS); + fprintf (f, "#define SIGMAP_SZ %2d\n", NUM_SIGS+1); + fprintf (f, "\n"); + + /* the signals */ + for (i = 0; i < NUM_SIGS; i++) { + fprintf(f, "#define %s %2d\n", win32SigTab[i].lname, win32SigTab[i].n); + } + + fprintf (f, "#define IS_SYSTEM_SIG(S) (0)\n"); + + CloseFile (f, "_SYSTEM_SIGNALS_"); + + exit (0); + +} + +/* end of gen-win32-signals.c */ diff --git a/base/runtime/config/gen-win32-sigtbl.c b/base/runtime/config/gen-win32-sigtbl.c new file mode 100644 index 0000000..b0a5cd3 --- /dev/null +++ b/base/runtime/config/gen-win32-sigtbl.c @@ -0,0 +1,45 @@ +/* gen-win32-sigtbl.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * generate the "win32-sigtbl.c" file. + */ + +#include +#include +#include "gen.h" +#include "win32-sigtab.h" + +#ifndef DST_FILE +#define DST_FILE "win32-sigtbl.c" +#endif + +main () +{ + FILE *f; + int i; + + f = OpenFile (DST_FILE, NIL(char *)); + + fprintf (f, "\n"); + + fprintf (f, "PVT sys_const_t SigInfo[NUM_SIGS] = {\n"); + for (i = 0; i < NUM_SIGS; i++) { + fprintf(f, "\t{ %d, \"%s\" },\n", win32SigTab[i].n, win32SigTab[i].sname); + } + fprintf (f, "};\n"); + + fprintf (f, "PVT sysconst_tbl_t SigTbl = {\n"); + fprintf (f, " /* numConsts */ NUM_SIGS,\n"); + fprintf (f, " /* consts */ SigInfo\n"); + fprintf (f, "};\n"); + + CloseFile (f, NIL(char *)); + + exit (0); + +} + +/* end of gen-win32-sigtbl.c */ + diff --git a/base/runtime/config/gen.h b/base/runtime/config/gen.h new file mode 100644 index 0000000..eca9116 --- /dev/null +++ b/base/runtime/config/gen.h @@ -0,0 +1,30 @@ +/* gen.h + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#ifndef _GEN_COMMON_ +#define _GEN_COMMON_ + +#include + +extern FILE *OpenFile (char *fname, char *flag); +extern void CloseFile (FILE *f, char *flag); + +#ifndef _ML_BASE_ +/* nil pointers */ +#define NIL(ty) ((ty)0) + +/* aliases for malloc/free, so that we can easily replace them */ +#define MALLOC(sz) malloc(sz) +#define FREE(p) free(p) + +/* Allocate a new C object of type t. */ +#define NEW_OBJ(t) ((t *)MALLOC(sizeof(t))) +/* Allocate a new C array of type t objects. */ +#define NEW_VEC(t,n) ((t *)MALLOC((n)*sizeof(t))) +#endif /* !_ML_BASE_ */ + +#endif /* !_GEN_COMMON_ */ + diff --git a/base/runtime/config/unix-signals.c b/base/runtime/config/unix-signals.c new file mode 100644 index 0000000..7d2ae0c --- /dev/null +++ b/base/runtime/config/unix-signals.c @@ -0,0 +1,133 @@ +/* unix-signals.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Compute the signal table information for UNIX systems. This is used to + * generate the unix-signal-tbl.c file and the system-signals.h file. We + * assume that the signals SIGHUP, SIGINT, SIGQUIT, SIGALRM, and SIGTERM + * are (at least) provided. + */ + +#include "ml-unixdep.h" +#include +#include +#include +#include "gen.h" +#include "gen-unix-signals.h" + + +/** The UNIX signals **/ +sig_desc_t SigTable[] = { + { SIGHUP, "SIGHUP", "HUP"}, /* POSIX */ + { SIGINT, "SIGINT", "INT"}, /* POSIX */ + { SIGQUIT, "SIGQUIT", "QUIT"}, /* POSIX */ + { SIGALRM, "SIGALRM", "ALRM"}, /* POSIX */ + { SIGTERM, "SIGTERM", "TERM"}, /* POSIX */ +#ifdef SIGPIPE + { SIGPIPE, "SIGPIPE", "PIPE"}, /* POSIX */ +#endif +#ifdef SIGUSR1 + { SIGUSR1, "SIGUSR1", "USR1"}, /* POSIX */ +#endif +#ifdef SIGUSR2 + { SIGUSR2, "SIGUSR2", "USR2"}, /* POSIX */ +#endif +#if defined(SIGCHLD) + { SIGCHLD, "SIGCHLD", "CHLD"}, /* POSIX */ +#elif defined(SIGCLD) + { SIGCLD, "SIGCLD", "CHLD"}, +#endif +#if defined(SIGWINCH) + { SIGWINCH, "SIGWINCH", "WINCH"}, +#elif defined(SIGWINDOW) + { SIGWINDOW, "SIGWINDOW", "WINCH"}, +#endif +#ifdef SIGURG + { SIGURG, "SIGURG", "URG"}, +#endif +#ifdef SIGIO + { SIGIO, "SIGIO", "IO"}, +#endif +#ifdef SIGPOLL + { SIGPOLL, "SIGPOLL", "POLL"}, +#endif +#ifdef SIGTSTP + { SIGTSTP, "SIGTSTP", "TSTP"}, /* POSIX */ +#endif +#ifdef SIGCONT + { SIGCONT, "SIGCONT", "CONT"}, /* POSIX */ +#endif +#ifdef SIGTTIN + { SIGTTIN, "SIGTTIN", "TTIN"}, /* POSIX */ +#endif +#ifdef SIGTTOU + { SIGTTOU, "SIGTTOU", "TTOU"}, /* POSIX */ +#endif +#ifdef SIGVTALRM + { SIGVTALRM, "SIGVTALRM", "VTALRM"}, +#endif +}; +#define TABLE_SIZE (sizeof(SigTable)/sizeof(sig_desc_t)) + +/* the run-time system generated signals */ +sig_desc_t RunTSignals[] = { + { -1, "RUNSIG_GC", "GC" }, +}; +#define NUM_RUN_SIGS (sizeof(RunTSignals)/sizeof(sig_desc_t)) + +/* SortSignalTbl: + */ +sig_info_t *SortSignalTbl () +{ + int i, j, k, n; + sig_desc_t **signals; + sig_info_t *sigInfo; + + signals = NEW_VEC(sig_desc_t *, TABLE_SIZE + NUM_RUN_SIGS); + + /* sort the signal table by increaseing signal number; the sort removes + * duplicates by mapping to the first name. We need this because some + * systems alias signals. + */ + for (i = 0, n = 0; i < TABLE_SIZE; i++) { + /* invariant: signals[0..n-1] is sorted */ + sig_desc_t *p = &(SigTable[i]); + for (j = 0; j < n; j++) { + if (signals[j]->sig == p->sig) + /* a duplicate */ + break; + if (signals[j]->sig > p->sig) { + /* insert the signal at position j */ + for (k = n; k >= j; k--) + signals[k] = signals[k-1]; + signals[j] = p; n++; + break; + } + } + if (j == n) { + signals[n++] = p; + } + } + /* Here, n is the number of system signals and signals[n-1]->sig is the + * largest system signal code. + */ + + /* add the run-time system signals to the table */ + for (i = 0, j = n; i < NUM_RUN_SIGS; i++, j++) { + signals[j] = &(RunTSignals[i]); + signals[j]->sig = signals[n-1]->sig+i+1; + } + + + sigInfo = NEW_OBJ(sig_info_t); + sigInfo->sigs = signals; + sigInfo->numSysSigs = n; + sigInfo->numRunSigs = NUM_RUN_SIGS; + sigInfo->minSysSig = signals[0]->sig; + sigInfo->maxSysSig = signals[n-1]->sig; + + return sigInfo; + +} /* end of SortSignalTbl */ + diff --git a/base/runtime/config/unix-sigtbl.c b/base/runtime/config/unix-sigtbl.c new file mode 100644 index 0000000..6f2a1d3 --- /dev/null +++ b/base/runtime/config/unix-sigtbl.c @@ -0,0 +1,34 @@ +/* unix-sigtbl.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * NOTE: this file is generated --- do not edit!!! + */ + + +PVT sys_const_t SigInfo[NUM_SIGS] = { + { SIGHUP, "HUP" }, + { SIGINT, "INT" }, + { SIGQUIT, "QUIT" }, + { SIGPIPE, "PIPE" }, + { SIGALRM, "ALRM" }, + { SIGTERM, "TERM" }, + { SIGUSR1, "USR1" }, + { SIGUSR2, "USR2" }, + { SIGCHLD, "CHLD" }, + { SIGWINCH, "WINCH" }, + { SIGURG, "URG" }, + { SIGIO, "IO" }, + { SIGTSTP, "TSTP" }, + { SIGCONT, "CONT" }, + { SIGTTIN, "TTIN" }, + { SIGTTOU, "TTOU" }, + { SIGVTALRM, "VTALRM" }, + /* Run-time signals */ + { RUNSIG_GC, "GC" }, +}; +PVT sysconst_tbl_t SigTbl = { + /* numConsts */ NUM_SYSTEM_SIGS, + /* consts */ SigInfo +}; diff --git a/base/runtime/config/win32-sigtab.h b/base/runtime/config/win32-sigtab.h new file mode 100644 index 0000000..178cbb1 --- /dev/null +++ b/base/runtime/config/win32-sigtab.h @@ -0,0 +1,28 @@ +/* win32-sigtab.h + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * fake "signals" to make win32 go through. + * unlike the unix counterpart, this file is not generated -- do not delete! + */ + +#ifndef _WIN32_SIGTAB_ +#define _WIN32_SIGTAB_ + +struct { + int n; + char *sname,*lname; +} win32SigTab[] = { + {0, "INT", "SIGINT"}, + {1, "ALRM", "SIGALRM"}, + {2, "TERM", "SIGTERM"}, + {3, "GC", "RUNSIG_GC"} +}; + +#define NUM_SIGS 4 + + +#endif + +/* end of win32-sigtab.h */ diff --git a/base/runtime/gc/NEW-FORMAT b/base/runtime/gc/NEW-FORMAT new file mode 100644 index 0000000..c176f5e --- /dev/null +++ b/base/runtime/gc/NEW-FORMAT @@ -0,0 +1,23 @@ +Ideas for a new format for heap images. + +Number the exported arenas from 0..n (<= 255). +Whe writing a pointer p out, adjust it as follows: + + ((ArenaId << 24) | (p - arenaBase)) + +Adjusting the pointers on input is then simply a +matter of looking up the new base address in +a table and adding it to the low 24 bits. + +If an arena is bigger than 2^24 (16Mb), then use +multiple ArenaIds. Since the current number of +arenas is limited to be at most 70, this scheme +can support a minimum of 2976Mb. Since this +exceeds the capacity of most disks, we should be +okay. + +The tricky part of this is that when exporting, the +bigobject references must be mapped into a coalesced +big-object region. We can handle this by allocating +an offset word per big-object descriptor. + diff --git a/base/runtime/gc/NOTES b/base/runtime/gc/NOTES new file mode 100644 index 0000000..6c5da29 --- /dev/null +++ b/base/runtime/gc/NOTES @@ -0,0 +1,6 @@ +The way things are done now, the registration of runtime system addresses +is always done. For stand-alone systems, this shouldn't be necessary. +We might be able to avoid this by using function pointers to the CGlobals +functions, which get set to the right functions in the heap-io library. +Thus, if the heap-io stuff isn't included, then the CGlobals stuff won't +be linked either. diff --git a/base/runtime/gc/README b/base/runtime/gc/README new file mode 100644 index 0000000..cff8559 --- /dev/null +++ b/base/runtime/gc/README @@ -0,0 +1,7 @@ +This directory contains the garbage collector and a library for +reading and writing heap images. + +The two basic operations are importing/exporting an entire heap +image, and blasting in/out an ML data structure. In both cases, +references to addresses outside the heap must be marked on output +and patched on input. diff --git a/base/runtime/gc/addr-hash.c b/base/runtime/gc/addr-hash.c new file mode 100644 index 0000000..d7cc8e7 --- /dev/null +++ b/base/runtime/gc/addr-hash.c @@ -0,0 +1,148 @@ +/*! \file addr-hash.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Hash tables for mapping addresses to objects. + */ + +#include "ml-base.h" +#include "addr-hash.h" + +typedef struct item { /* items in the hash table */ + Addr_t addr; /* the address the object is keyed on */ + void *obj; /* the object */ + struct item *next; /* the next item in the bucket */ +} item_t; + +struct addr_tbl { + int ignoreBits; /* how many low bits of a hashed address are ignored */ + int size; /* number of buckets in the table; will be a power of 2 */ + int numItems; /* the number of items in the table */ + Addr_t mask; /* mask to form table index (== size-1) */ + item_t **buckets; /* array of buckets */ +}; + +STATIC_INLINE int _AddrHash (addr_tbl_t *tbl, Addr_t addr) +{ + return (int)((addr >> tbl->ignoreBits) & tbl->mask); +} + +#define HASH(tbl,addr) _AddrHash(tbl, addr) + +/* MakeAddrTbl: + * + * Allocate an address hash table. + */ +addr_tbl_t *MakeAddrTbl (int ignoreBits, int size) +{ + unsigned int nBuckets; + int i; + addr_tbl_t *tbl; + + /* find smallest power of 2 (but at least 16) that is greater than size */ + for (nBuckets = 16; nBuckets < size; nBuckets <<= 1) + continue; + + tbl = NEW_OBJ(addr_tbl_t); + tbl->buckets = NEW_VEC(item_t *, nBuckets); + tbl->ignoreBits = ignoreBits; + tbl->size = nBuckets; + tbl->mask = nBuckets-1; + tbl->numItems = 0; + for (i = 0; i < nBuckets; i++) + tbl->buckets[i] = NIL(item_t *); + + return tbl; + +} /* end of MakeAddrTbl */ + +/* AddrTblInsert: + * + * Insert an object into a address hash table. + */ +void AddrTblInsert (addr_tbl_t *tbl, Addr_t addr, void *obj) +{ + int h = HASH(tbl,addr); + item_t *p; + + ASSERT((0 <= h) && (h < tbl->size)); + for (p = tbl->buckets[h]; (p != NIL(item_t *)) && (p->addr != addr); p = p->next) { + continue; + } + if (p == NIL(item_t *)) { + p = NEW_OBJ(item_t); + p->addr = addr; + p->obj = obj; + p->next = tbl->buckets[h]; + tbl->buckets[h] = p; + tbl->numItems++; + } + else if (p->obj != obj) { + Die ("AddrTblInsert: %#x mapped to multiple objects", addr); + } + +} /* end of AddrTblInsert */ + +/* AddrTblLookup: + * + * Return the object associated with the given address; return NIL, if not + * found. + */ +void *AddrTblLookup (addr_tbl_t *tbl, Addr_t addr) +{ + int h = HASH(tbl,addr); + item_t *p; + + ASSERT((0 <= h) && (h < tbl->size)); + for (p = tbl->buckets[h]; (p != NIL(item_t *)) && (p->addr != addr); p = p->next) + continue; + + if (p == NIL(item_t *)) + return NIL(void *); + else + return p->obj; + +} /* end of AddrTblLookup */ + +/* AddrTblApply: + * + * Apply the given function to the elements of the table. + */ +void AddrTblApply (addr_tbl_t *tbl, void *clos, void (*f) (Addr_t, void *, void *)) +{ + int i; + item_t *p; + + for (i = 0; i < tbl->size; i++) { + for (p = tbl->buckets[i]; p != NIL(item_t *); p = p->next) { + (*f) (p->addr, clos, p->obj); + } + } + +} /* end of AddrTblApply */ + +/* FreeAddrTbl: + * + * Deallocate the space for an address table; if freeObjs is true, also deallocate + * the objects. + */ +void FreeAddrTbl (addr_tbl_t *tbl, bool_t freeObjs) +{ + int i; + item_t *p, *q; + + for (i = 0; i < tbl->size; i++) { + for (p = tbl->buckets[i]; p != NIL(item_t *); ) { + q = p->next; + if (freeObjs) { + FREE (p->obj); + } + FREE (p); + p = q; + } + } + FREE (tbl->buckets); + FREE (tbl); + +} /* end of FreeAddrTbl. */ diff --git a/base/runtime/gc/addr-hash.h b/base/runtime/gc/addr-hash.h new file mode 100644 index 0000000..44a0875 --- /dev/null +++ b/base/runtime/gc/addr-hash.h @@ -0,0 +1,39 @@ +/*! \file addr-hash.h + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Hash tables for mapping machine addresses to objects. + */ + +#ifndef _ADDR_HASH_ +#define _ADDR_HASH_ + +typedef struct addr_tbl addr_tbl_t; + +/* Allocate an address hash table. The `ignoreBits` parameter specifies + * how many lower-order bits are ignored by the hashing algorithm. + */ +extern addr_tbl_t *MakeAddrTbl (int ignoreBits, int size); + +/* Insert an object into a address hash table. + */ +extern void AddrTblInsert (addr_tbl_t *tbl, Addr_t addr, void *obj); + +/* Return the object associated with the given address; return NIL, if not + * found. + */ +extern void *AddrTblLookup (addr_tbl_t *tbl, Addr_t addr); + +/* Apply the given function to the elements of the table. The second + * argument to the function is the function's "closure," and the third is + * the associated info. + */ +extern void AddrTblApply (addr_tbl_t *tbl, void *clos, void (*f) (Addr_t, void *, void *)); + +/* Deallocate the space for an address table; if freeObjs is true, also deallocate + * the objects. + */ +extern void FreeAddrTbl (addr_tbl_t *tbl, bool_t freeObjs); + +#endif /* !_ADDR_HASH_ */ diff --git a/base/runtime/gc/arena-id.h b/base/runtime/gc/arena-id.h new file mode 100644 index 0000000..109ed38 --- /dev/null +++ b/base/runtime/gc/arena-id.h @@ -0,0 +1,122 @@ +/* arena-id.h + * + * COPYRIGHT (c) 1993 by AT&T Bell Laboratories. + * + * Definitions for the arena IDs and for mapping from addresses to arena IDs. + * + * An arena ID (aid_t) is an unsigned 16-bit value, with the following layout: + * + * bits 0-7: heap block ID (0xFF for unmapped objects) + * bits 8-11: object class: + * 0000 = new-space objects + * 1111 = unmapped objects + * bits 12-15: generation number (0 for new space, 1-14 for older generations, + * and 15 for non-heap memory) + * + * Heap pages in allocation space have the arena ID 0x0000, and unmapped heap + * pages have the arena ID 0xffff. The ID format is designed so that a + * from-space page can be detected by having a generation field less than or + * equal to the maximum generation being collected. + */ + +#ifndef _ARENA_ID_ +#define _ARENA_ID_ + +#ifndef _ML_BASE_ +#include "ml-base.h" +#endif + +#ifndef _BIBOP_ +#include "bibop.h" +#endif + +/* The indices of the different object arenas. WIth four bits for the + * object class, we have up to 7 regular objects and up to 7 big-object + * arenas. + */ +/* The different classes of objects; each class lives in a different arena */ +#define RECORD_INDX 0 +#define PAIR_INDX 1 +#define STRING_INDX 2 +#define ARRAY_INDX 3 +#define NUM_ARENAS 4 + +/* the different classes of big-objects, which live in big-object regions */ +#define CODE_INDX 0 +#define NUM_BIGOBJ_KINDS 1 + +#define NUM_OBJ_KINDS (NUM_ARENAS+NUM_BIGOBJ_KINDS) + +/* arena IDs */ +typedef page_id_t aid_t; + +/* The number of bits in the arena ID fields. The number of bits should add + * up to sizeof(aid_t)*8. + */ +#define HBLK_BITS 8 +#define OBJC_BITS 4 +#define GEN_BITS 4 + +#define HBLK_new 0 +#define MAX_HBLK 0xff +#define HBLK_MASK ((1<> OBJC_SHIFT)&OBJC_MASK) +#define EXTRACT_GEN(AID) ((AID) >> GEN_SHIFT) +#define IS_FROM_SPACE(AID,MAX_AID) \ + ((AID) <= (MAX_AID)) + +/* the arena IDs for new-space and unmapped heap pages, and for free big-objects */ +#define AID_NEW MAKE_AID(ALLOC_GEN,OBJC_new,HBLK_new) +#define AID_UNMAPPED PAGEID_unmapped +#define AID_MAX MAKE_MAX_AID(MAX_NUM_GENS) + +#ifdef TOSPACE_ID /* for debugging */ +#define TOSPACE_AID(OBJC,BLK) MAKE_AID(0xf,(OBJC),BLK) +#define TOSPACE_GEN(AID) EXTRACT_OBJC(AID) +#define IS_TOSPACE_AID(AID) (((AID) != AID_UNMAPPED) && (EXTRACT_GEN(AID) == 0xf)) +#endif + +/* AIds for big-object regions. These are always marked as from-space, since + * both from-space and two-space objects of different generations can occupy + * the same big-object region. + */ +#define AID_BIGOBJ(GEN) MAKE_AID(GEN,OBJC_bigobj,HBLK_bigobj) +#define AID_BIGOBJ_HDR(GEN) MAKE_AID(GEN,OBJC_bigobj,HBLK_bigobjhdr) + +/* return true if the AID is a AID_BIGOBJ_HDR (we assume that it is + * either an AID_BIGOBJ or an AID_BIGOBJ_HDR id). + */ +#define BO_IS_HDR(AID) (EXTRACT_HBLK(AID) == HBLK_bigobjhdr) + +/* return true, if AID is a big-object AID */ +#define IS_BIGOBJ_AID(ID) (EXTRACT_OBJC(ID) == OBJC_bigobj) + +#endif /* !_ARENA_ID_ */ + diff --git a/base/runtime/gc/big-objects.c b/base/runtime/gc/big-objects.c new file mode 100644 index 0000000..d8c6bb5 --- /dev/null +++ b/base/runtime/gc/big-objects.c @@ -0,0 +1,343 @@ +/*! \file big-objects.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Code for managing big-object regions. + */ + +#include "ml-base.h" +#include "memory.h" +#include "heap.h" +#include "heap-monitor.h" +#include + +#ifdef BO_DEBUG +/* PrintRegionMap: + */ +void PrintRegionMap (bigobj_region_t *r) +{ + bigobj_desc_t *dp, *dq; + int i; + + SayDebug ("[%d] %d/%d, @%p: ", r->minGen, r->nFree, r->nPages, (void *)(r->firstPage)); + for (i = 0, dq = NIL(bigobj_desc_t *); i < r->nPages; i++) { + dp = r->objMap[i]; + if (dp != dq) { + SayDebug ("|"); + dq = dp; + } + if (BO_IS_FREE(dp)) + SayDebug ("_"); + else + SayDebug ("X"); + } + SayDebug ("|\n"); + +} /* end of PrintRegionMap */ +#endif + + +/* BO_AllocRegion: + * + * Allocate a big object region that is large enough to hold an object of at + * least reqSzB bytes. It returns the descriptor for the free big-object that + * is the region. + * NOTE: it does not mark the BIBOP entries for the region; this should be + * done by the caller. + */ +bigobj_desc_t *BO_AllocRegion (heap_t *heap, Addr_t reqSzB) +{ + int npages, i; + Addr_t hdrSzB, memObjSzB; + bigobj_region_t *region; + mem_obj_t *memObj; + bigobj_desc_t *desc; + + /* Compute the memory-object size for the region. A region consists of + * a header followed by big-object pages. The size of the header depends + * on the number of pages and the number of pages should be rounded up + * to fill the memory object, which will be a multiple of BIBOP_PAGE_SZB + * bytes. We use an iterative algorithm to determine the number of pages + * and the size of the memory object. + */ + { + Addr_t szb; + int hdrSlop, slop; + /* minimum number of pages to hold requested size */ + npages = ROUNDUP(reqSzB, BIGOBJ_PAGE_SZB) >> BIGOBJ_PAGE_SHIFT; + /* size of header for npages */ + szb = BOREGION_HDR_SZB(npages); + /* round up to bigobject page size */ + hdrSzB = ROUNDUP(szb, BIGOBJ_PAGE_SZB); + /* amount of slop in header (measured in per-page space cost) */ + hdrSlop = (hdrSzB - szb) / sizeof(bigobj_desc_t *); + /* amount of memory needed to hold the header and pages */ + szb = hdrSzB + npages*BIGOBJ_PAGE_SZB; + /* round up to BIBOP page size */ + memObjSzB = ROUNDUP(szb, BIBOP_PAGE_SZB); + /* amount of slop in memory object (measured in per-page space cost) */ + slop = (memObjSzB - szb) / BIGOBJ_PAGE_SZB; + /* while the page slop is bigger than the header slop, reallocate a page to the header */ + while (hdrSlop < slop) { + slop -= 1; + hdrSlop += BIGOBJ_PAGE_SZB / sizeof(bigobj_desc_t *); + } + /* we can increase the number of pages without increasing the rounded + * size of the header. + */ + npages += slop; + /* recompute the header and request sizes based on the actual number of + * big-object pages being allocated. + */ + hdrSzB = ROUNDUP(BOREGION_HDR_SZB(npages), BIGOBJ_PAGE_SZB); + reqSzB = npages * BIGOBJ_PAGE_SZB; + } + + if ((memObj = MEM_AllocMemObj (memObjSzB)) == NIL(mem_obj_t *)) { + Die ("unable to allocate memory object for bigobject region"); + } + region = (bigobj_region_t *)MEMOBJ_BASE(memObj); + + if ((desc = NEW_OBJ(bigobj_desc_t)) == NIL(bigobj_desc_t *)) { + Die ("unable to allocate big-object descriptor"); + } + + /* initialize the region header */ + region->firstPage = ((Addr_t)region + hdrSzB); + region->nPages = npages; + region->nFree = npages; + region->minGen = MAX_NUM_GENS; + region->memObj = memObj; + region->next = heap->bigRegions; + heap->bigRegions = region; + heap->numBORegions++; + for (i = 0; i < npages; i++) { + region->objMap[i] = desc; + } + + /* initialize the descriptor for the region's memory */ + desc->obj = region->firstPage; + desc->sizeB = reqSzB; + desc->state = BO_FREE; + desc->region = region; + +#ifdef BO_DEBUG +SayDebug ("BO_AllocRegion: %d pages @ %p\n", npages, (void *)(region->firstPage)); +#endif + return desc; + +} /* end of BO_AllocRegion */ + + +/* BO_Alloc: + * + * Allocate a big object of the given size. + */ +bigobj_desc_t *BO_Alloc (heap_t *heap, int gen, Addr_t objSzB) +{ + bigobj_desc_t *hdr, *dp, *newDesc; + bigobj_region_t *region; + Addr_t totSzB; + int i, npages, firstPage; + + totSzB = ROUNDUP(objSzB, BIGOBJ_PAGE_SZB); + npages = (totSzB >> BIGOBJ_PAGE_SHIFT); + + /* search for a free object that is big enough (first-fit) */ + hdr = heap->freeBigObjs; + for (dp = hdr->next; (dp != hdr) && (dp->sizeB < totSzB); dp = dp->next) { + continue; + } + + if (dp == hdr) { + /* no free object fits, so allocate a new region */ + dp = BO_AllocRegion (heap, totSzB); + region = dp->region; + if (dp->sizeB == totSzB) { + /* allocate the whole region to the object */ + newDesc = dp; + } + else { + /* split the free object */ + newDesc = NEW_OBJ(bigobj_desc_t); + newDesc->obj = dp->obj; + newDesc->region = region; + dp->obj = (Addr_t)(dp->obj) + totSzB; + dp->sizeB -= totSzB; + AddBODesc(heap->freeBigObjs, dp); + firstPage = ADDR_TO_BOPAGE(region, newDesc->obj); + for (i = 0; i < npages; i++) { + region->objMap[firstPage+i] = newDesc; + } + } + } + else if (dp->sizeB == totSzB) { + RemoveBODesc(dp); + newDesc = dp; + region = dp->region; + } + else { + ASSERT(totSzB < dp->sizeB); + /* split the free object, leaving dp in the free list. */ + region = dp->region; + newDesc = NEW_OBJ(bigobj_desc_t); + newDesc->obj = dp->obj; + newDesc->region = region; + dp->obj = (Addr_t)(dp->obj) + totSzB; + dp->sizeB -= totSzB; + firstPage = ADDR_TO_BOPAGE(region, newDesc->obj); + for (i = 0; i < npages; i++) { + dp->region->objMap[firstPage+i] = newDesc; + } + } + + newDesc->sizeB = objSzB; + newDesc->state = BO_YOUNG; + newDesc->gen = gen; + region->nFree -= npages; + + if (region->minGen > gen) { + /* update the generation part of the descriptor */ + region->minGen = gen; + MarkRegion (BIBOP, (ml_val_t *)region, MEMOBJ_SZB(region->memObj), + AID_BIGOBJ(gen)); + ADDR_TO_PAGEID(BIBOP, region) = AID_BIGOBJ_HDR(gen); + } + +#ifdef BO_DEBUG +SayDebug ("BO_Alloc: %d bytes @ %p\n", objSzB, (void *)(newDesc->obj)); +PrintRegionMap(region); +#endif + return newDesc; + +} /* end of BO_Alloc */ + + +/* BO_Free: + * + * Mark a big object as free and add it to the free list. + */ +void BO_Free (heap_t *heap, bigobj_desc_t *desc) +{ + bigobj_region_t *region = desc->region; + bigobj_desc_t *dp; + int firstPage, lastPage, i, j; + Addr_t totSzB = ROUNDUP(desc->sizeB, BIGOBJ_PAGE_SZB); + + firstPage = ADDR_TO_BOPAGE(region, desc->obj); + lastPage = firstPage + (totSzB >> BIGOBJ_PAGE_SHIFT); + +#ifdef BO_DEBUG +SayDebug ("BO_Free: @ %#x, bibop gen = %x, gen = %d, state = %d, pages=[%d..%d)\n", +desc->obj, (unsigned)EXTRACT_GEN(ADDR_TO_PAGEID(BIBOP, desc->obj)), desc->gen, desc->state, +firstPage, lastPage); +PrintRegionMap(region); +#endif + if ((firstPage > 0) && BO_IS_FREE(region->objMap[firstPage-1])) { + /* coalesce with adjacent free object */ + dp = region->objMap[firstPage-1]; + RemoveBODesc(dp); + for (i = ADDR_TO_BOPAGE(region, dp->obj); i < firstPage; i++) + region->objMap[i] = desc; + desc->obj = dp->obj; + totSzB += dp->sizeB; + FREE (dp); + } + + if ((lastPage < region->nPages) && BO_IS_FREE(region->objMap[lastPage])) { + /* coalesce with adjacent free object */ + dp = region->objMap[lastPage]; + RemoveBODesc(dp); + for (i = lastPage, j = i+(dp->sizeB >> BIGOBJ_PAGE_SHIFT); i < j; i++) + region->objMap[i] = desc; + totSzB += dp->sizeB; + FREE (dp); + } + + desc->sizeB = totSzB; + desc->state = BO_FREE; + + region->nFree += (lastPage - firstPage); + /** what if (region->nFree == region->nPages) ??? **/ + + /* add desc to the free list */ + AddBODesc(heap->freeBigObjs, desc); + +} /* end of BO_Free */ + + +/* BO_GetDesc: + * + * Given an address into a big object, return the object's descriptor. + */ +bigobj_desc_t *BO_GetDesc (ml_val_t addr) +{ + bibop_t bibop = BIBOP; + int i; + aid_t aid; + bigobj_region_t *rp; + + /* find the beginning of the region containing the code object */ + i = BIBOP_ADDR_TO_INDEX(addr); + aid = INDEX_TO_PAGEID(bibop, i); + while (! BO_IS_HDR(aid)) { + --i; + aid = INDEX_TO_PAGEID(bibop, i); + } + + rp = (bigobj_region_t *)BIBOP_INDEX_TO_ADDR(i); + + return ADDR_TO_BODESC(rp, addr); + +} /* end of BO_GetDesc */ + + +/* BO_AddrToCodeObjTag: + * + * Return the tag of the code object containing the given PC (or else + * NIL). + */ +Byte_t *BO_AddrToCodeObjTag (Word_t pc) +{ + bigobj_region_t *region; + aid_t aid; + + aid = ADDR_TO_PAGEID(BIBOP, pc); + + if (IS_BIGOBJ_AID(aid)) { + int indx = BIBOP_ADDR_TO_INDEX(pc); + while (!BO_IS_HDR(aid)) { + --indx; + aid = INDEX_TO_PAGEID(BIBOP,indx); + ASSERT(IS_BIGOBJ_AID(aid)); + } + region = (bigobj_region_t *)BIBOP_INDEX_TO_ADDR(indx); + return BO_GetCodeObjTag (ADDR_TO_BODESC(region, pc)); + } + else + return NIL(Byte_t *); + +} /* end of BO_AddrToCodeObjTag */ + + +/* BO_GetCodeObjTag: + * + * Return the tag of the given code object. See + * + * compiler/CodeGen/cpscompile/smlnj-pseudoOps.sml + * + * for details on the tag layout. + */ +Byte_t *BO_GetCodeObjTag (bigobj_desc_t *bdp) +{ + Byte_t *lastByte; + int kx; + + lastByte = (Byte_t *)(bdp->obj) + bdp->sizeB - 1; + kx = *lastByte * WORD_SZB; + + return lastByte - kx + 1; + +} /* end of BO_GetCodeObjTag */ + diff --git a/base/runtime/gc/blast-gc.c b/base/runtime/gc/blast-gc.c new file mode 100644 index 0000000..583ee2c --- /dev/null +++ b/base/runtime/gc/blast-gc.c @@ -0,0 +1,907 @@ +/*! \file blast-gc.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This is the garbage collector for compacting a blasted object. + * + * NOTE: the extraction of literals could cause a space overflow. + */ + +#include +#include "ml-base.h" +#include "ml-limits.h" +#include "ml-state.h" +#include "ml-values.h" +#include "memory.h" +#include "card-map.h" +#include "heap.h" +#include "tags.h" +#include "copy-loop.h" +#include "heap-monitor.h" +#include "ml-timer.h" +#include "ml-heap-image.h" +#include "blast-out.h" +#include "addr-hash.h" +#include "c-globals-tbl.h" +#include "ml-objects.h" +#include "ml-globals.h" + + +PVT bool_t repairHeap; /* this is TRUE, as long as it is cheaper */ + /* to repair the heap, than to complete */ + /* the collection */ +PVT bool_t finishGC; /* this is TRUE, when we are finishing a */ + /* garbage collection after blasting. */ +PVT int maxCollectedGen; /* the oldest generation being collected */ +PVT ml_val_t *savedTop /* save to-space top pointers */ + [MAX_NUM_GENS][NUM_ARENAS]; +PVT export_table_t *ExportTbl; /* the table of exported symbols */ +PVT addr_tbl_t *EmbObjTbl; /* the table of embedded object references */ + +/* typedef struct repair repair_t; */ /* in heap.h */ +struct repair { + ml_val_t *loc; /* the location to repair */ + ml_val_t val; /* the old value */ +}; + +/* record a location in a given arena for repair */ +#define NOTE_REPAIR(ap, location, value) { \ + arena_t *__ap = (ap); \ + if (repairHeap) { \ + repair_t *__rp = __ap->repairList - 1; \ + if ((ml_val_t *)__rp > __ap->nextw) { \ + __rp->loc = (location); \ + __rp->val = (value); \ + __ap->repairList = __rp; \ + } \ + else \ + repairHeap = FALSE; \ + } \ + } + +/* local routines */ +PVT void BlastGC_RepairHeap (ml_state_t *msp, int maxGen); +PVT void BlastGC_FinishGC (ml_state_t *msp, int maxGen); +PVT void BlastGC_Flip (heap_t *heap, int gen); +PVT status_t BlastGC_SweepToSpace (heap_t *heap, aid_t maxAid); +/* +PVT bool_t BlastGC_SweepToSpArrays (heap_t *heap, arena_t *tosp, card_map_t *cm); +*/ +PVT ml_val_t BlastGC_ForwardObj (heap_t *heap, ml_val_t obj, aid_t id); +PVT bigobj_desc_t *BlastGC_ForwardBigObj ( + heap_t *heap, ml_val_t *p, ml_val_t obj, aid_t aid); +PVT embobj_info_t *EmbObjLookup (addr_tbl_t *tbl, Addr_t addr, embobj_kind_t kind); +PVT void BlastGC_AssignLits (Addr_t addr, void *_closure, void *_info); +PVT void BlastGC_ExtractLits (Addr_t addr, void *_closure, void *_info); + +struct assignlits_clos { /* the closure for BlastGC_AssignLits */ + Word_t id; /* the heap image chunk index for */ + /* embedded literals */ + Word_t offset; /* the offset of the next literal */ +}; + +struct extractlits_clos { /* the closure for BlastGC_ExtractLits */ + writer_t *wr; + Word_t offset; /* the offset of the next literal; this is */ + /* used to align reals. */ +}; + + +/* check to see if we need to extend the number of flipped generations */ +#define CHECK_GEN(heap, g) { \ + int __g = (g); \ + if (__g > maxCollectedGen) \ + BlastGC_Flip ((heap), __g); \ + } + +/* BlastGC_CheckWord: + * + * Check an ML value for external references, etc. + */ +#define BlastGC_CheckWord(heap, bibop, p, maxAid, errFlg) { \ + ml_val_t __w = *(p); \ +/*SayDebug ("CheckWord @ %#x --> %#x: ", p, __w);*/\ + if (isBOXED(__w)) { \ + aid_t __aid = ADDR_TO_PAGEID(bibop, __w); \ + if (isUNMAPPED(__aid)) { \ + /* an external reference */ \ +/*SayDebug ("external reference\n");*/\ + if ((! finishGC) && (ExportCSymbol(ExportTbl, __w) == ML_unit)) \ + (errFlg) = TRUE; \ + } \ + else if (IS_BIGOBJ_AID(__aid)) \ +/*{SayDebug ("big-object\n");*/\ + BlastGC_ForwardBigObj(heap, p, __w, __aid); \ +/*}*/\ + else if (IS_FROM_SPACE(__aid, maxAid)) \ +/*{SayDebug ("regular object\n");*/\ + *(p) = BlastGC_ForwardObj(heap, __w, __aid); \ +/*}*/\ + } \ +/*else SayDebug ("unboxed \n");*/\ + } + + +/* BlastGC: + * + */ +blast_res_t BlastGC (ml_state_t *msp, ml_val_t *root, int gen) +{ + heap_t *heap = msp->ml_heap; + bibop_t bibop = BIBOP; + blast_res_t result; + bool_t errFlg = FALSE; + + /* Allocates the export and embedded object tables */ + ExportTbl = NewExportTbl(); + EmbObjTbl = MakeAddrTbl(LOG_BYTES_PER_WORD, 64); + + result.exportTbl = ExportTbl; + result.embobjTbl = EmbObjTbl; + + /* Initialize, by flipping the generations upto the one including the object */ + repairHeap = TRUE; + finishGC = FALSE; + maxCollectedGen = 0; + BlastGC_Flip (heap, gen); + + /* Scan the object root */ + BlastGC_CheckWord (heap, bibop, root, AID_MAX, errFlg); + if (errFlg) { + result.error = TRUE; + return result; + } + + /* Sweep to-space */ + if (BlastGC_SweepToSpace(heap, AID_MAX) == FAILURE) { + result.error = TRUE; + return result; + } + + result.error = FALSE; + result.needsRepair = repairHeap; + result.maxGen = maxCollectedGen; + + return result; + +} /* end of BlastGC. */ + + +/* BlastGC_AssignLitAddrs: + * + * Assign relocation addresses to the embedded literals that are going to be + * extracted. The arguments to this are the blast result (containing the + * embedded literal table), the ID of the heap image chunk that the string + * literals are to be stored in, and the starting offset in that chunk. + * This returns the address immediately following the last embedded literal. + * + * NOTE: this code will break if the size of the string space, plus embedded + * literals exceeds 16Mb. + */ +Addr_t BlastGC_AssignLitAddrs (blast_res_t *res, int id, Addr_t offset) +{ + struct assignlits_clos closure; + + closure.offset = offset; + closure.id = id; + AddrTblApply (EmbObjTbl, &closure, BlastGC_AssignLits); + + return closure.offset; + +} /* end of BlastGC_AssignLitAddrs */ + + +/* BlastGC_BlastLits: + * + * Blast out the embedded literals. + */ +void BlastGC_BlastLits (writer_t *wr) +{ + struct extractlits_clos closure; + + closure.wr = wr; + closure.offset = 0; + AddrTblApply (EmbObjTbl, &closure, BlastGC_ExtractLits); + +} /* end of BlastGC_BlastLits */ + + +/* BlastGC_FinishUp: + * + * Finish up the blast-out operation. This means either repairing the heap, + * or completing the GC. + */ +void BlastGC_FinishUp (ml_state_t *msp, blast_res_t *res) +{ + if (res->needsRepair) + BlastGC_RepairHeap (msp, res->maxGen); + else + BlastGC_FinishGC (msp, res->maxGen); + + FreeExportTbl (ExportTbl); + FreeAddrTbl (EmbObjTbl, TRUE); + +} /* BlastGC_FinishUp */ + +/* BlastGC_RepairHeap: + */ +PVT void BlastGC_RepairHeap (ml_state_t *msp, int maxGen) +{ + heap_t *heap = msp->ml_heap; + int i, j; + +#ifdef VERBOSE +SayDebug ("Repairing blast GC (maxGen = %d of %d)\n", maxGen, heap->numGens); +#endif + for (i = 0; i < maxGen; i++) { + gen_t *gen = heap->gen[i]; + +#define REPAIR(INDX) { \ + arena_t *__ap = gen->arena[INDX]; \ + if (isACTIVE(__ap)) { \ + repair_t *__stop, *__rp; \ + __stop = (repair_t *)(__ap->tospTop); \ + for (__rp = __ap->repairList; __rp < __stop; __rp++) { \ + ml_val_t *__p = __rp->loc; \ + if (INDX != PAIR_INDX) \ + __p[-1] = FOLLOW_FWDOBJ(__p)[-1]; \ + __p[0] = __rp->val; \ + } \ + } \ + } /* end of REPAIR */ + + /* repair the arenas */ + REPAIR(RECORD_INDX); + REPAIR(PAIR_INDX); + REPAIR(STRING_INDX); + REPAIR(ARRAY_INDX); + + /* free the to-space object, and reset the BIBOP marks */ + for (j = 0; j < NUM_ARENAS; j++) { + arena_t *ap = gen->arena[j]; + if (isACTIVE(ap)) { + /* un-flip the spaces; note that FreeGeneration needs the from-space + * information. + */ + ml_val_t *tmpBase = ap->tospBase; + Addr_t tmpSizeB = ap->tospSizeB; + ml_val_t *tmpTop = ap->tospTop; + ap->nextw = + ap->sweep_nextw = ap->frspTop; + ap->tospBase = ap->frspBase; + ap->frspBase = tmpBase; + ap->tospSizeB = ap->frspSizeB; + ap->frspSizeB = tmpSizeB; + ap->tospTop = savedTop[i][j]; + ap->frspTop = tmpTop; + } + } /* end of for */ + /* free the to-space memory object */ + { + mem_obj_t *tmpObj = gen->fromObj; + gen->fromObj = gen->toObj; + gen->toObj = tmpObj; + FreeGeneration (heap, i); + } + } /* end of for */ + +} /* end of BlastGC_RepairHeap */ + + +/* BlastGC_FinishGC: + * + * Complete the partial garbage collection. + */ +PVT void BlastGC_FinishGC (ml_state_t *msp, int maxGen) +{ + heap_t *heap = msp->ml_heap; + bibop_t bibop = BIBOP; + bool_t dummy = FALSE; + int i, j; + aid_t maxAid; + +#ifdef VERBOSE +SayDebug ("Completing blast GC (maxGen = %d of %d)\n", maxGen, heap->numGens); +#endif + finishGC = TRUE; + maxAid = MAKE_MAX_AID(maxGen); + + /* allocate new dirty vectors for the flipped generations */ + for (i = 0; i < maxGen; i++) { + gen_t *gen = heap->gen[i]; + if (isACTIVE(gen->arena[ARRAY_INDX])) + NewDirtyVector(gen); + } + + /* collect the roots */ +#define CheckRoot(p) { \ + ml_val_t *__p = (p); \ + BlastGC_CheckWord (heap, bibop, __p, maxAid, dummy); \ + } + + for (i = 0; i < NumCRoots; i++) + CheckRoot(CRoots[i]); + + CheckRoot(&(msp->ml_arg)); + CheckRoot(&(msp->ml_cont)); + CheckRoot(&(msp->ml_closure)); + CheckRoot(&(msp->ml_linkReg)); + CheckRoot(&(msp->ml_pc)); + CheckRoot(&(msp->ml_exnCont)); + CheckRoot(&(msp->ml_varReg)); + CheckRoot(&(msp->ml_calleeSave[0])); + CheckRoot(&(msp->ml_calleeSave[1])); + CheckRoot(&(msp->ml_calleeSave[2])); + + /* sweep the dirty pages of generations over maxGen */ + for (i = maxGen; i < heap->numGens; i++) { + gen_t *gen = heap->gen[i]; + if (isACTIVE(gen->arena[ARRAY_INDX])) { + card_map_t *cm = gen->dirty; + if (cm != NIL(card_map_t *)) { + ml_val_t *maxSweep = gen->arena[ARRAY_INDX]->sweep_nextw; + int card; +#if (!defined(BIT_CARDS) && defined(TOSPACE_ID)) + FOR_DIRTY_CARD (cm, maxGen, card, { + ml_val_t *p = (cm->baseAddr + (card*CARD_SZW)); + ml_val_t *q = p + CARD_SZW; + int mark = i+1; + if (q > maxSweep) + /* don't sweep above the allocation high-water mark */ + q = maxSweep; + for (; p < q; p++) { + ml_val_t w = *p; + if (isBOXED(w)) { + aid_t aid = ADDR_TO_PAGEID(bibop, w); + int targetGen; + if (IS_FROM_SPACE(aid, maxAid)) { + /* this is a from-space object */ + if (IS_BIGOBJ_AID(aid)) { + bigobj_desc_t *dp; + dp = BlastGC_ForwardBigObj (heap, p, w, aid); + targetGen = dp->gen; + } + else { + *p = + w = BlastGC_ForwardObj(heap, w, aid); + aid = ADDR_TO_PAGEID(bibop, w); + if (IS_TOSPACE_AID(aid)) + targetGen = TOSPACE_GEN(aid); + else + targetGen = EXTRACT_GEN(aid); + } + if (targetGen < mark) + mark = targetGen; + } + } + } /* end of for */ + /* re-mark the card */ + ASSERT(cm->map[card] <= mark); + if (mark <= i) + cm->map[card] = mark; + else if (i == maxGen) + cm->map[card] = CARD_CLEAN; + }); +#elif (!defined(BIT_CARDS)) + FOR_DIRTY_CARD (cm, maxGen, card, { + ml_val_t *p = (cm->baseAddr + (card*CARD_SZW)); + ml_val_t *q = p + CARD_SZW; + int mark = i+1; + if (q > maxSweep) + /* don't sweep above the allocation high-water mark */ + q = maxSweep; + for (; p < q; p++) { + ml_val_t w = *p; + if (isBOXED(w)) { + aid_t aid = ADDR_TO_PAGEID(bibop, w); + int targetGen; + if (IS_FROM_SPACE(aid, maxAid)) { + /* this is a from-space object */ + if (IS_BIGOBJ_AID(aid)) { + bigobj_desc_t *dp; + dp = BlastGC_ForwardBigObj (heap, p, w, aid); + targetGen = dp->gen; + } + else { + *p = + w = BlastGC_ForwardObj(heap, w, aid); + targetGen = EXTRACT_GEN(ADDR_TO_PAGEID(bibop, w)); + } + if (targetGen < mark) + mark = targetGen; + } + } + } /* end of for */ + /* re-mark the card */ + ASSERT(cm->map[card] <= mark); + if (mark <= i) + cm->map[card] = mark; + else if (i == maxGen) + cm->map[card] = CARD_CLEAN; + }); +#else + /* BIT_CARDS */ +#endif + } + } + } + + /* sweep to-space */ + BlastGC_SweepToSpace (heap, maxAid); + + /* Scan the array spaces of the flipped generations, marking dirty pages */ + for (i = 1; i < maxGen; i++) { + gen_t *gen = heap->gen[i]; + arena_t *ap = gen->arena[ARRAY_INDX]; + if (isACTIVE(ap)) { + card_map_t *cm = gen->dirty; + int card; + ml_val_t *p, *stop, w; + + p = ap->tospBase; + card = 0; + while (p < ap->nextw) { + int mark = i+1; + stop = (ml_val_t *)(((Addr_t)p + CARD_SZB) & ~(CARD_SZB - 1)); + if (stop > ap->nextw) + stop = ap->nextw; + while (p < stop) { + if (isBOXED(w = *p++)) { + aid_t aid = ADDR_TO_PAGEID(bibop, w); + int targetGen; + + if (IS_BIGOBJ_AID(aid)) { + bigobj_desc_t *dp = BO_GetDesc(w); + targetGen = dp->gen; + } + else + targetGen = EXTRACT_GEN(aid); + if (targetGen < mark) { + mark = targetGen; + if (mark == 1) { + p = stop; + break; /* nothing dirtier than 1st generation */ + } + } + } + } + if (mark <= i) + cm->map[card] = mark; + else + cm->map[card] = CARD_CLEAN; + card++; + } + } + } + + /* reclaim space */ + for (i = 0; i < maxGen; i++) { + FreeGeneration (heap, i); +#ifdef TOSPACE_ID + for (j = 0; j < NUM_ARENAS; j++) { + arena_t *ap = heap->gen[i]->arena[j]; + if (isACTIVE(ap)) + MarkRegion (bibop, ap->tospBase, ap->tospSizeB, ap->id); + } +#endif + } + + /* remember the top of to-space in the collected generations */ + for (i = 0; i < maxGen; i++) { + gen_t *g = heap->gen[i]; + if (i == heap->numGens-1) { + /* the oldest generation has only "young" objects */ + for (j = 0; j < NUM_ARENAS; j++) { + if (isACTIVE(g->arena[j])) + g->arena[j]->oldTop = g->arena[j]->tospBase; + else + g->arena[j]->oldTop = NIL(ml_val_t *); + } + } + else { + for (j = 0; j < NUM_ARENAS; j++) { + if (isACTIVE(g->arena[j])) + g->arena[j]->oldTop = g->arena[j]->nextw; + else + g->arena[j]->oldTop = NIL(ml_val_t *); + } + } + } + + HeapMon_UpdateHeap (heap, maxSweptGen); + +#ifdef GC_STATS + /* Count the number of forwarded bytes */ + for (i = 0; i < maxGen; i++) { + for (j = 0; j < NUM_ARENAS; j++) { + arena_t *ap = heap->gen[i]->arena[j]; + if (isACTIVE(ap)) { + CNTR_INCR(&(heap->numCopied[i][j]), ap->nextw - ap->tospBase); + } + } + } +#endif + +} /* end of BlastGC_FinishGC */ + + +/* BlastGC_Flip: + * + * Flip additional generations from maxCollectedGen+1 .. gen. We allocate + * a to-space that is the same size as the existing from-space. + */ +PVT void BlastGC_Flip (heap_t *heap, int gen) +{ + int i, j; + Addr_t newSz; + + for (i = maxCollectedGen; i < gen; i++) { + gen_t *g = heap->gen[i]; + for (j = 0; j < NUM_ARENAS; j++) { + arena_t *ap = g->arena[j]; + if (isACTIVE(ap)) { + ASSERT ((j == STRING_INDX) || (ap->nextw == ap->sweep_nextw)); + savedTop[i][j] = ap->tospTop; + FLIP_ARENA(ap); + newSz = (Addr_t)(ap->frspTop) - (Addr_t)(ap->frspBase); + if (i == 0) + /* need to guarantee space for future minor collections */ + newSz += heap->allocSzB; + if (j == PAIR_INDX) + newSz += 2*WORD_SZB; + ap->tospSizeB = RND_MEMOBJ_SZB(newSz); + } + } + g->fromObj = g->toObj; +#ifdef VERBOSE +SayDebug ("New Generation %d:\n", i+1); +#endif + if (NewGeneration(g) == FAILURE) + Die ("unable to allocate to-space for generation %d\n", i+1); + /* initialize the repair lists */ + for (j = 0; j < NUM_ARENAS; j++) { + arena_t *ap = g->arena[j]; +#ifdef VERBOSE +if (isACTIVE(ap)) SayDebug (" %#x: [%#x, %#x)\n", ap->id, ap->tospBase, ap->tospTop); +#endif + if (isACTIVE(ap)) + ap->repairList = (repair_t *)(ap->tospTop); + } + } + + maxCollectedGen = gen; + +} /* end of BlastGC_Flip */ + +/* BlastGC_SweepToSpace: + * Sweep the to-space arenas. Because there are few references forward in time, we + * try to completely scavenge a younger generation before moving on to the + * next oldest. + */ +PVT status_t BlastGC_SweepToSpace (heap_t *heap, aid_t maxAid) +{ + int i; + bool_t swept; + bibop_t bibop = BIBOP; + bool_t errFlg = FALSE; + +#define SweepToSpArena(gen, indx) { \ + arena_t *__ap = (gen)->arena[(indx)]; \ + if (isACTIVE(__ap)) { \ + ml_val_t *__p, *__q; \ + __p = __ap->sweep_nextw; \ + if (__p < __ap->nextw) { \ + swept = TRUE; \ + do { \ + for (__q = __ap->nextw; __p < __q; __p++) { \ + BlastGC_CheckWord(heap, bibop, __p, maxAid, errFlg); \ + } \ + } while (__q != __ap->nextw); \ + __ap->sweep_nextw = __q; \ + } \ + } \ + } /* SweepToSpArena */ + + do { + swept = FALSE; + for (i = 0; i < maxCollectedGen; i++) { + gen_t *gen = heap->gen[i]; + + /* Sweep the record and pair arenas */ + SweepToSpArena(gen, RECORD_INDX); + SweepToSpArena(gen, PAIR_INDX); + SweepToSpArena(gen, ARRAY_INDX); + } + } while (swept && (!errFlg)); + + return (errFlg ? FAILURE : SUCCESS); + +} /* end of SweepToSpace */ + + +/* BlastGC_ForwardObj: + * + * Forward an object. + */ +PVT ml_val_t BlastGC_ForwardObj (heap_t *heap, ml_val_t v, aid_t id) +{ + ml_val_t *obj = PTR_MLtoC(ml_val_t, v); + int gen = EXTRACT_GEN(id); + ml_val_t *new_obj; + ml_val_t desc; + Word_t len; + arena_t *arena; + + if (! finishGC) + CHECK_GEN(heap, gen); + + switch (EXTRACT_OBJC(id)) { + case OBJC_record: { + desc = obj[-1]; + switch (GET_TAG(desc)) { + case DTAG_vec_hdr: + case DTAG_arr_hdr: + len = 2; + break; + case DTAG_forward: + /* This object has already been forwarded */ + return PTR_CtoML(FOLLOW_FWDOBJ(obj)); + default: + len = GET_LEN(desc); + } + arena = heap->gen[gen-1]->arena[RECORD_INDX]; + } break; + + case OBJC_pair: { + ml_val_t w; + + w = obj[0]; + if (isDESC(w)) + return PTR_CtoML(FOLLOW_FWDPAIR(w, obj)); + else { + /* forward the pair */ + arena = heap->gen[gen-1]->arena[PAIR_INDX]; + new_obj = arena->nextw; + arena->nextw += 2; + new_obj[0] = w; + new_obj[1] = obj[1]; + /* setup the forward pointer in the old pair */ + NOTE_REPAIR(arena, obj, w); + obj[0] = MAKE_PAIR_FP(new_obj); + return PTR_CtoML(new_obj); + } + } break; + + case OBJC_string: { + arena = heap->gen[gen-1]->arena[STRING_INDX]; + desc = obj[-1]; + switch (GET_TAG(desc)) { + case DTAG_forward: + return PTR_CtoML(FOLLOW_FWDOBJ(obj)); +/* 64BIT: on 64-bit machines, DTAG_raw and DTAG_raw64 can be handled in the same way */ + case DTAG_raw: + len = GET_LEN(desc); + break; + case DTAG_raw64: + len = GET_LEN(desc); +#ifdef ALIGN_REALDS +# ifdef CHECK_HEAP + if (((Addr_t)arena->nextw & WORD_SZB) == 0) { + *(arena->nextw) = (ml_val_t)0; + arena->nextw++; + } +# else + arena->nextw = (ml_val_t *)(((Addr_t)arena->nextw) | WORD_SZB); +# endif +#endif + break; + default: + Die ("bad string tag %d, obj = %#x, desc = %#x", + GET_TAG(desc), obj, desc); + } + } break; + + case OBJC_array: { + desc = obj[-1]; + switch (GET_TAG(desc)) { + case DTAG_forward: + /* This object has already been forwarded */ + return PTR_CtoML(FOLLOW_FWDOBJ(obj)); + case DTAG_arr_data: + len = GET_LEN(desc); + break; + case DTAG_special: + /* we are conservative here, and never nullify special objects */ + len = 1; + break; + default: + Die ("bad array tag %d, obj = %#x, desc = %#x", + GET_TAG(desc), obj, desc); + } /* end of switch */ + arena = heap->gen[gen-1]->arena[ARRAY_INDX]; + } break; + + case OBJC_bigobj: + default: + Die("BlastGC_ForwardObj: unknown object class %d @ %#x", + EXTRACT_OBJC(id), obj); + } /* end of switch */ + + /* Allocate and initialize a to-space copy of the object */ + new_obj = arena->nextw; + arena->nextw += (len + 1); + *new_obj++ = desc; + COPYLOOP(obj, new_obj, len); + + /* set up the forward pointer, and return the new object. */ + NOTE_REPAIR(arena, obj, *obj); + obj[-1] = DESC_forwarded; + obj[0] = (ml_val_t)(Addr_t)new_obj; + return PTR_CtoML(new_obj); + +} /* end of BlastGC_ForwardObj */ + + +/* BlastGC_ForwardBigObj: + * + * Forward a big-object obj, where id is the BIBOP entry for obj, and return + * the big-object descriptor. + * NOTE: we do not ``promote'' big-objects here, because are not reclaimed + * when completing th collection. + */ +PVT bigobj_desc_t *BlastGC_ForwardBigObj ( + heap_t *heap, + ml_val_t *p, + ml_val_t obj, + aid_t aid) +{ + int i; + bigobj_region_t *region; + bigobj_desc_t *dp; + embobj_info_t *codeInfo; + + /* find the beginning of the region containing the code object */ + i = BIBOP_ADDR_TO_INDEX(obj); + while (! BO_IS_HDR(aid)) { + --i; + aid = INDEX_TO_PAGEID(BIBOP, i); + } + + region = (bigobj_region_t *)BIBOP_INDEX_TO_ADDR(i); + dp = ADDR_TO_BODESC(region, obj); + + if (! finishGC) { + CHECK_GEN(heap, dp->gen); + codeInfo = EmbObjLookup (EmbObjTbl, dp->obj, UNUSED_CODE); + codeInfo->kind = USED_CODE; + } + + return dp; + +} /* end of BlastGC_ForwardBigObj */ + + +/* EmbObjLookup: + */ +PVT embobj_info_t *EmbObjLookup (addr_tbl_t *tbl, Addr_t addr, embobj_kind_t kind) +{ + embobj_info_t *p = FindEmbObj(tbl, addr); + + if (p == NIL(embobj_info_t *)) { + p = NEW_OBJ(embobj_info_t); + p->kind = kind; + p->codeObj = NIL(embobj_info_t *); + AddrTblInsert(tbl, addr, p); + } + + ASSERT(kind == p->kind); + + return p; + +} /* end of EmbObjLookup */ + +/* BlastGC_AssignLits: + * + * Calculate the location of the extracted literal strings in the blasted + * image, and record their addresses. This function is passed as an argument + * to AddrTblApply; its second argument is its "closure," and its third + * argument is the embedded object info. + */ +PVT void BlastGC_AssignLits (Addr_t addr, void *_closure, void *_info) +{ +#ifdef XXX + struct assignlits_clos *closure = (struct assignlits_clos *) _closure; + embobj_info_t *info = (embobj_info_t *) _info; + int objSzB; + + switch (info->kind) { + case UNUSED_CODE: + case USED_CODE: + info->relAddr = (ml_val_t)0; + return; + case EMB_STRING: { + int nChars = OBJ_LEN(PTR_CtoML(addr)); + int nWords = BYTES_TO_WORDS(nChars); + if ((nChars != 0) && ((nChars & 0x3) == 0)) + nWords++; + objSzB = nWords * WORD_SZB; + } break; + case EMB_REALD: + objSzB = OBJ_LEN(PTR_CtoML(addr)) * REALD_SZB; +#ifdef ALIGN_REALDS + closure->offset |= WORD_SZB; +#endif + break; + default: + Die("BlastGC_AssignLits: unexpected kind %d\n", info->kind); + } + + if (info->codeObj->kind == USED_CODE) { + /* the containing code object is also being exported */ + info->relAddr = (ml_val_t)0; + return; + } + + if (objSzB == 0) { + info->relAddr = ExportCSymbol (ExportTbl, + (info->kind == EMB_STRING) ? ML_string0 : ML_realarray0); + } + else { + /* assign a relocation address to the object, and bump the offset counter */ + closure->offset += WORD_SZB; /* space for the descriptor */ + info->relAddr = HIO_TAG_PTR(closure->id, closure->offset); + closure->offset += objSzB; + } +#else +Die ("BlastGC_AssignLits"); +#endif +} /* end of BlastGC_AssignLits */ + +/* BlastGC_ExtractLits: + * + * Extract the embedded literals that are in otherwise unreferenced code + * blocks. This function is passed as an argument to AddrTblApply; its + * second argument is its "closure," and its third argument is the + * embedded object info. + */ +PVT void BlastGC_ExtractLits (Addr_t addr, void *_closure, void *_info) +{ + struct extractlits_clos *closure = (struct extractlits_clos *) _closure; + embobj_info_t *info = (embobj_info_t *) _info; + int objSzB; + + if (info->relAddr == (ml_val_t)0) + return; + + switch (info->kind) { + case EMB_STRING: { + int nChars = OBJ_LEN(PTR_CtoML(addr)); + int nWords = BYTES_TO_WORDS(nChars); + if ((nChars != 0) && ((nChars & 0x3) == 0)) + nWords++; + objSzB = nWords * WORD_SZB; + } break; + case EMB_REALD: + objSzB = OBJ_LEN(PTR_CtoML(addr)) * REALD_SZB; +#ifdef ALIGN_REALDS + if ((closure->offset & (REALD_SZB-1)) == 0) { + /* the descriptor would be 8-byte aligned, which means that the + * real number would not be, so add some padding. + */ + WR_Put(closure->wr, 0); + closure->offset += 4; + } +#endif + break; + default: + Die("BlastGC_ExtractLits: unexpected kind %d\n", info->kind); + } + + if (objSzB != 0) { + /* extract the object into the blast buffer (including the descriptor) */ + WR_Write(closure->wr, (void *)(addr - WORD_SZB), objSzB + WORD_SZB); + closure->offset += (objSzB + WORD_SZB); + } + +} /* end of BlastGC_ExtractLits */ diff --git a/base/runtime/gc/blast-in.c b/base/runtime/gc/blast-in.c new file mode 100644 index 0000000..aa24ea3 --- /dev/null +++ b/base/runtime/gc/blast-in.c @@ -0,0 +1,209 @@ +/*! \file blast-in.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include +#include "ml-base.h" +#include "ml-values.h" +#include "ml-state.h" +#include "heap.h" +#include "ml-heap-image.h" +#include "c-globals-tbl.h" +#include "heap-input.h" + + +/* local routines */ +PVT status_t ReadImage (ml_state_t *msp, inbuf_t *bp, ml_val_t *objRef); + + +/* BlastIn: + * + * Build an ML heap object from a sequence of bytes; the fd is the underlying + * file descriptor (== -1, if blasting from a string), buf is any pre-read + * bytes of data, and nbytesP points to the number of bytes in buf. + */ +ml_val_t BlastIn (ml_state_t *msp, Byte_t *buf, long len, bool_t *errFlg) +{ + inbuf_t inBuf; + ml_image_hdr_t hdr; + ml_val_t obj; + + inBuf.needsSwap = FALSE; + inBuf.file = NULL; + inBuf.base = buf; + inBuf.buf = buf; + inBuf.nbytes = len; + + /* read the object header */ + if (HeapIO_ReadBlock (&inBuf, &hdr, sizeof(hdr)) == FAILURE) { + *errFlg = TRUE; + return ML_unit; + } + if (hdr.byteOrder != ORDER) { + if (BIGENDIAN_TO_HOST32(hdr.byteOrder) != ORDER) { + *errFlg = TRUE; + return ML_unit; + } + hdr.magic = BIGENDIAN_TO_HOST32(hdr.magic); + hdr.kind = BIGENDIAN_TO_HOST32(hdr.kind); + inBuf.needsSwap = TRUE; + } + if (hdr.magic != BLAST_MAGIC) { + *errFlg = TRUE; + return ML_unit; + } + + switch (hdr.kind) { + case BLAST_IMAGE: + if (ReadImage(msp, &inBuf, &obj) == FAILURE) { + *errFlg = TRUE; + return ML_unit; + } + break; + case BLAST_UNBOXED: { + ml_blast_hdr_t bhdr; + if (HeapIO_ReadBlock(&inBuf, &bhdr, sizeof(bhdr)) == FAILURE) { + *errFlg = TRUE; + return ML_unit; + } + else + obj = bhdr.rootObj; + } break; + default: + *errFlg = TRUE; + return ML_unit; + } + + return obj; + +} /* end of BlastIn */ + + +/* ReadImage: + */ +PVT status_t ReadImage (ml_state_t *msp, inbuf_t *bp, ml_val_t *objRef) +{ + ml_blast_hdr_t blastHdr; + ml_val_t *externs; + heap_arena_hdr_t *arenaHdrs[NUM_OBJ_KINDS], *arenaHdrsBuf; + int arenaHdrsSize, i; + gen_t *gen1 = msp->ml_heap->gen[0]; + + if ((HeapIO_ReadBlock(bp, &blastHdr, sizeof(blastHdr)) == FAILURE) + || (blastHdr.numArenas > NUM_ARENAS) + || (blastHdr.numBOKinds > NUM_BIGOBJ_KINDS)) + return FAILURE; + + /* read the externals table */ + externs = HeapIO_ReadExterns (bp); + + /* read the arena headers. */ + arenaHdrsSize = (blastHdr.numArenas + blastHdr.numBOKinds) + * sizeof(heap_arena_hdr_t); + arenaHdrsBuf = (heap_arena_hdr_t *) MALLOC (arenaHdrsSize); + if (HeapIO_ReadBlock (bp, arenaHdrsBuf, arenaHdrsSize) == FAILURE) { + FREE (arenaHdrsBuf); + return FAILURE; + } + for (i = 0; i < NUM_OBJ_KINDS; i++) + arenaHdrs[i] = NIL(heap_arena_hdr_t *); + for (i = 0; i < blastHdr.numArenas; i++) { + heap_arena_hdr_t *p = &(arenaHdrsBuf[i]); + arenaHdrs[p->objKind] = p; + } + /** DO BIG OBJECT HEADERS TOO **/ + + /* check the heap to see if there is enough free space in the 1st generation */ + { + Addr_t allocSzB = msp->ml_heap->allocSzB; + bool_t needsGC = FALSE; + + for (i = 0; i < NUM_ARENAS; i++) { + arena_t *ap = gen1->arena[i]; + if ((arenaHdrs[i] != NIL(heap_arena_hdr_t *)) && ((! isACTIVE(ap)) + || (AVAIL_SPACE(ap) < arenaHdrs[i]->info.o.sizeB + allocSzB))) { + needsGC = TRUE; + ap->reqSizeB = arenaHdrs[i]->info.o.sizeB; + } + } + if (needsGC) { + if (bp->nbytes > 0) { + /* the GC may cause the buffer to move */ + ml_val_t buffer = PTR_CtoML(bp->base); + InvokeGCWithRoots (msp, 1, &buffer, NIL(ml_val_t *)); + if (buffer != PTR_CtoML(bp->base)) { + /* the buffer moved, so adjust the buffer pointers */ + Byte_t *newBase = PTR_MLtoC(Byte_t, buffer); + bp->buf = newBase + (bp->buf - bp->base); + bp->base = newBase; + } + } + else + InvokeGC (msp, 1); + } + } + + /** Read the blasted objects **/ + { + Addr_t arenaBase[NUM_ARENAS]; + + for (i = 0; i < NUM_ARENAS; i++) { + if (arenaHdrs[i] != NIL(heap_arena_hdr_t *)) { + arena_t *ap = gen1->arena[i]; + arenaBase[i] = (Addr_t)(ap->nextw); + HeapIO_ReadBlock (bp, (ap->nextw), arenaHdrs[i]->info.o.sizeB); +/*SayDebug ("[%2d] Read [%#x..%#x)\n", i+1, ap->nextw,*/ +/*(Addr_t)(ap->nextw)+arenaHdrs[i]->info.o.sizeB);*/ + } + } + + /* adjust the pointers */ + for (i = 0; i < NUM_ARENAS; i++) { + if (arenaHdrs[i] != NIL(heap_arena_hdr_t *)) { + arena_t *ap = gen1->arena[i]; + if (i != STRING_INDX) { + ml_val_t *p, *stop; + p = ap->nextw; + stop = (ml_val_t *)((Addr_t)p + arenaHdrs[i]->info.o.sizeB); + while (p < stop) { + ml_val_t w = *p; + if (! isUNBOXED(w)) { + if (isEXTERNTAG(w)) { + w = externs[EXTERNID(w)]; + } + else if (! isDESC(w)) { +/*SayDebug ("adjust (@%#x) %#x --> ", p, w);*/ + w = PTR_CtoML(arenaBase[HIO_GET_ID(w)] + HIO_GET_OFFSET(w)); +/*SayDebug ("%#x\n", w);*/ + } + *p = w; + } + p++; + } + ap->nextw = + ap->sweep_nextw = stop; + } + else + ap->nextw = (ml_val_t *)((Addr_t)(ap->nextw) + + arenaHdrs[i]->info.o.sizeB); + } + } /* end of for */ + + /* adjust the root object pointer */ + if (isEXTERNTAG(blastHdr.rootObj)) + *objRef = externs[EXTERNID(blastHdr.rootObj)]; + else + *objRef = PTR_CtoML( + arenaBase[HIO_GET_ID(blastHdr.rootObj)] + + HIO_GET_OFFSET(blastHdr.rootObj)); +/*SayDebug ("root = %#x, adjusted = %#x\n", blastHdr.rootObj, *objRef);*/ + } + + FREE (arenaHdrsBuf); + FREE (externs); + + return SUCCESS; + +} /* end of ReadImage */ diff --git a/base/runtime/gc/blast-out.c b/base/runtime/gc/blast-out.c new file mode 100644 index 0000000..d1fb93e --- /dev/null +++ b/base/runtime/gc/blast-out.c @@ -0,0 +1,332 @@ +/*! \file blast-out.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-osdep.h" +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-heap-image.h" +#include "c-globals-tbl.h" +#include "addr-hash.h" +#include "gc.h" +#include "blast-out.h" +#include "heap-output.h" +#include "heap-io.h" + +#define BLAST_ERROR ML_unit + +/* local routines */ +PVT ml_val_t BlastUnboxed (ml_state_t *msp, ml_val_t obj); +PVT ml_val_t BlastHeap (ml_state_t *msp, ml_val_t obj, blast_res_t *info); +PVT ml_val_t AllocBlastData (ml_state_t *msp, Addr_t sizeB); + + +/* BlastOut: + * + * Linearize an ML object into a vector of bytes; return ML_unit on errors. + */ +ml_val_t BlastOut (ml_state_t *msp, ml_val_t obj) +{ + blast_res_t res; + int gen; + ml_val_t blastedObj; + + /* Collect allocation space */ + InvokeGCWithRoots (msp, 0, &obj, NIL(ml_val_t *)); + + gen = GetObjGen (obj); + + if (gen == -1) { + /* unboxed */ + blastedObj = BlastUnboxed (msp, obj); + } + else { /* a regular ML object */ + /* do the blast GC */ +/* DEBUG CheckHeap (msp->ml_heap, msp->ml_heap->numGens); */ + res = BlastGC (msp, &obj, gen); + + /* blast out the image */ + blastedObj = BlastHeap (msp, obj, &res); + + /* repair the heap or finish the GC */ + BlastGC_FinishUp (msp, &res); + +/* DEBUG CheckHeap (msp->ml_heap, res.maxGen); */ + } + + return blastedObj; + +} /* end of BlastOut */ + + +/* BlastUnboxed: + * + * Blast out an unboxed value. + */ +PVT ml_val_t BlastUnboxed (ml_state_t *msp, ml_val_t obj) +{ + ml_blast_hdr_t blastHdr; + int szB = sizeof(ml_image_hdr_t) + sizeof(ml_blast_hdr_t); + ml_val_t blastedObj; + writer_t *wr; + + /* allocate space for the object */ + blastedObj = AllocBlastData (msp, szB); + wr = WR_OpenMem (PTR_MLtoC(Byte_t, blastedObj), szB); + + HeapIO_WriteImageHeader (wr, BLAST_UNBOXED); + + blastHdr.numArenas = 0; + blastHdr.numBOKinds = 0; + blastHdr.numBORegions = 0; + blastHdr.hasCode = FALSE; + blastHdr.rootObj = obj; + + WR_Write(wr, &blastHdr, sizeof(blastHdr)); + + if (WR_Error(wr)) + return ML_unit; + else { + WR_Free(wr); + SEQHDR_ALLOC (msp, blastedObj, DESC_string, blastedObj, szB); + return blastedObj; + } + +} /* end of BlastUnboxed */ + + +/* BlastHeap: + * + * Blast out the heap image. + */ +PVT ml_val_t BlastHeap (ml_state_t *msp, ml_val_t obj, blast_res_t *info) +{ + heap_t *heap = msp->ml_heap; + int maxGen = info->maxGen; + Addr_t totArenaSzB[NUM_ARENAS], totSzB; + struct { + Addr_t base; /* the base address of the arena in the heap */ + Addr_t offset; /* the relative position in the merged */ + /* arena. */ + } adjust[MAX_NUM_GENS][NUM_ARENAS]; + heap_arena_hdr_t *p, *arenaHdrs[NUM_OBJ_KINDS], *arenaHdrsBuf; + int arenaHdrSz, i, j, numArenas; + ml_val_t blastedObj; + writer_t *wr; + + /* compute the arena offsets in the heap image */ + for (i = 0; i < NUM_ARENAS; i++) + totArenaSzB[i] = 0; + /* the embedded literals go first */ + totArenaSzB[STRING_INDX] = BlastGC_AssignLitAddrs (info, STRING_INDX, 0); +/* DEBUG SayDebug("%d bytes of string literals\n", totArenaSzB[STRING_INDX]); */ + for (i = 0; i < maxGen; i++) { + for (j = 0; j < NUM_ARENAS; j++) { + arena_t *ap = heap->gen[i]->arena[j]; + adjust[i][j].offset = totArenaSzB[j]; + if (isACTIVE(ap)) { +/* DEBUG SayDebug("[%d][%d] base = %#x, nextw = %#x, %d bytes\n", */ +/* DEBUG i, j, ap->tospBase, ap->nextw, (Addr_t)(ap->nextw) - (Addr_t)(ap->tospBase)); */ + totArenaSzB[j] += (Addr_t)(ap->nextw) - (Addr_t)(ap->tospBase); + adjust[i][j].base = (Addr_t)(ap->tospBase); + } + else + adjust[i][j].base = 0; + } + } +/* DEBUG for (i = 0; i < NUM_ARENAS; i++) SayDebug ("arena %d: %d bytes\n", i+1, totArenaSzB[i]); */ + /** WHAT ABOUT THE BIG OBJECTS??? **/ + + /* Compute the total size of the blasted object */ + for (i = 0, numArenas = 0, totSzB = 0; i < NUM_ARENAS; i++) { + if (totArenaSzB[i] > 0) { + numArenas++; + totSzB += totArenaSzB[i]; + } + } + totSzB += (sizeof(ml_image_hdr_t) + sizeof(ml_blast_hdr_t) + + (numArenas * sizeof(heap_arena_hdr_t))); + /** COUNT SPACE FOR BIG OBJECTS **/ + + /* include the space for the external symbols */ + totSzB += sizeof(extern_tbl_hdr_t) + ExportTableSz(info->exportTbl); + + /* allocate the heap object for the blasted representation, and initialize + * the writer. + */ + blastedObj = AllocBlastData (msp, totSzB); + wr = WR_OpenMem (PTR_MLtoC(Byte_t, blastedObj), totSzB); + + /* initialize the arena headers */ + arenaHdrSz = numArenas * sizeof(heap_arena_hdr_t); + arenaHdrsBuf = (heap_arena_hdr_t *) MALLOC (arenaHdrSz); + for (p = arenaHdrsBuf, i = 0; i < NUM_ARENAS; i++) { + if (totArenaSzB[i] > 0) { + p->gen = 0; + p->objKind = i; + p->info.o.baseAddr = 0; /* not used */ + p->info.o.sizeB = totArenaSzB[i]; + p->info.o.roundedSzB = -1; /* not used */ + p->offset = -1; /* not used */ + arenaHdrs[i] = p; + p++; + } + else + arenaHdrs[i] = NIL(heap_arena_hdr_t *); + } + /** WHAT ABOUT BIG OBJECTS **/ + + /* blast out the image header */ + if (HeapIO_WriteImageHeader (wr, BLAST_IMAGE) == FAILURE) { + FREE (arenaHdrsBuf); + return BLAST_ERROR; + } + + /* blast out the blast header */ + { + ml_blast_hdr_t hdr; + + hdr.numArenas = numArenas; + hdr.numBOKinds = 0; /** FIX THIS **/ + hdr.numBORegions = 0; /** FIX THIS **/ + + if (isEXTERNTAG(obj)) { + ASSERT(numArenas == 0); + hdr.rootObj = obj; + } + else { + aid_t aid = ADDR_TO_PAGEID(BIBOP, obj); + + if (IS_BIGOBJ_AID(aid)) { + embobj_info_t *p = FindEmbObj(info->embobjTbl, obj); + + if ((p == NIL(embobj_info_t *)) || (p->kind == USED_CODE)) { + Error ("blasting big objects not implemented\n"); + FREE (arenaHdrsBuf); + return BLAST_ERROR; + } + else + hdr.rootObj = p->relAddr; + } + else { + Addr_t addr = PTR_MLtoADDR(obj); + int gen = EXTRACT_GEN(aid) - 1; + int kind = EXTRACT_OBJC(aid) - 1; + addr -= adjust[gen][kind].base; + addr += adjust[gen][kind].offset; + hdr.rootObj = HIO_TAG_PTR(kind, addr); + } + } + + WR_Write(wr, &hdr, sizeof(hdr)); + if (WR_Error(wr)) { + FREE (arenaHdrsBuf); + return BLAST_ERROR; + } + } + + /* blast out the externals table */ + if (HeapIO_WriteExterns(wr, info->exportTbl) == -1) { + FREE (arenaHdrsBuf); + return BLAST_ERROR; + } + + /* blast out the arena headers */ + WR_Write (wr, arenaHdrsBuf, arenaHdrSz); + if (WR_Error(wr)) { + FREE (arenaHdrsBuf); + return BLAST_ERROR; + } + + /* blast out the heap itself */ + for (i = 0; i < NUM_ARENAS; i++) { + if (i == STRING_INDX) { + /* blast out the embedded literals */ + BlastGC_BlastLits (wr); + /* blast out the rest of the strings */ + for (j = 0; j < maxGen; j++) { + arena_t *ap = heap->gen[j]->arena[i]; + if (isACTIVE(ap)) { + WR_Write(wr, ap->tospBase, + (Addr_t)(ap->nextw)-(Addr_t)(ap->tospBase)); + } + } /* end for */ + } + else { + for (j = 0; j < maxGen; j++) { + arena_t *ap = heap->gen[j]->arena[i]; + ml_val_t *p, *top; + if (isACTIVE(ap)) { + for (p = ap->tospBase, top = ap->nextw; p < top; p++) { + ml_val_t w = *p; + if (isBOXED(w)) { + aid_t aid = ADDR_TO_PAGEID(BIBOP, w); + if (isUNMAPPED(aid)) { + w = ExportCSymbol(info->exportTbl, w); + ASSERT (w != ML_unit); + } + else if (IS_BIGOBJ_AID(aid)) { + embobj_info_t *objInfo + = FindEmbObj(info->embobjTbl, w); + + if ((objInfo == NIL(embobj_info_t *)) + || (objInfo->kind == USED_CODE)) + Die("blast bigobj unimplemented"); + else + w = objInfo->relAddr; + } + else { + /* adjust the pointer */ + int gen = EXTRACT_GEN(aid)-1; + int kind = EXTRACT_OBJC(aid)-1; + Addr_t addr = PTR_MLtoADDR(w); + addr -= adjust[gen][kind].base; + addr += adjust[gen][kind].offset; + w = HIO_TAG_PTR(kind, addr); + } + } + WR_Put(wr, (Word_t)w); + } + } + } /* end for */ + } + } + + FREE (arenaHdrsBuf); + + if (WR_Error(wr)) + return BLAST_ERROR; + else { + SEQHDR_ALLOC (msp, blastedObj, DESC_string, blastedObj, totSzB); + return blastedObj; + } + +} /* end of BlastHeap */ + + +/* AllocBlastData: + * + * Allocate some heap memory for blasting an object. + */ +PVT ml_val_t AllocBlastData (ml_state_t *msp, Addr_t sizeB) +{ + heap_t *heap = msp->ml_heap; + int nWords = BYTES_TO_WORDS(sizeB); + ml_val_t desc = MAKE_DESC(nWords, DTAG_raw); + ml_val_t res; + +/** we probably should allocate space in the big-object region for these objects **/ + if (sizeB >= heap->allocSzB-(8*ONE_K)) { + Die ("blasting out of %d bytes not supported yet! Increase allocation arena size.", + sizeB); + } + + ML_AllocWrite (msp, 0, desc); + res = ML_Alloc (msp, nWords); + return res; + +} /* end of AllocBlastData */ + diff --git a/base/runtime/gc/blast-out.h b/base/runtime/gc/blast-out.h new file mode 100644 index 0000000..45e2385 --- /dev/null +++ b/base/runtime/gc/blast-out.h @@ -0,0 +1,60 @@ +/* blast-out.h + * + * COPYRIGHT (c) 1993 by AT&T Bell Laboratories. + * + */ + +#ifndef _BLAST_OUT_ +#define _BLAST_OUT_ + +#ifndef _ADDR_HASH_ +#include "addr-hash.h" +#endif + +#ifndef _C_GLOBALS_TBL_ +#include "c-globals-tbl.h" +#endif + +#ifndef _WRITER_ +#include "writer.h" +#endif + +/* the table of referenced code objects, and embedded literals */ + +typedef enum { + EMB_STRING, /* embedded string */ + EMB_REALD, /* embedded real */ + UNUSED_CODE, /* code object with only embedded references */ + USED_CODE /* code object with code references */ +} embobj_kind_t; + +typedef struct embobj_info { /* info about an embedded object */ + embobj_kind_t kind; + struct embobj_info *codeObj; /* points to entry for the code */ + /* object that this literal is */ + /* embedded in. */ + ml_val_t relAddr; /* the relocated address of the literal */ + /* in the blasted heap image. */ +} embobj_info_t; + +/* find an embedded object */ +#define FindEmbObj(tbl, addr) \ + ((embobj_info_t *)AddrTblLookup((tbl), (Addr_t)(addr))) + + +typedef struct { /* the result of blasting out an object */ + bool_t error; /* true, if there was an error during the */ + /* blast GC (e.g., unrecognized external obj) */ + bool_t needsRepair; /* true, if the heap needs repair; otherwise */ + /* the collection must be completed. */ + int maxGen; /* the oldest generation included in the blast. */ + export_table_t *exportTbl; /* the table of external objects */ + addr_tbl_t *embobjTbl; /* the table of embedded objects */ +} blast_res_t; + +extern blast_res_t BlastGC (ml_state_t *msp, ml_val_t *root, int gen); +Addr_t BlastGC_AssignLitAddrs (blast_res_t *res, int id, Addr_t offset); +void BlastGC_BlastLits (writer_t *wr); +extern void BlastGC_FinishUp (ml_state_t *msp, blast_res_t *res); + +#endif /* _BLAST_OUT_ */ diff --git a/base/runtime/gc/build-literals.c b/base/runtime/gc/build-literals.c new file mode 100644 index 0000000..0eff5db --- /dev/null +++ b/base/runtime/gc/build-literals.c @@ -0,0 +1,892 @@ +/*! \file build-literals.c + * + * This file implements a simple bytecode interpreter that implements a language + * for initializing a record of compile-time constant values. + * + * This code needs to agree with the code generator in base/CPS/main/new-literals.sml + * + * See dev-notes/new-literals.md for a description of the bytecode. + * + * \author John Reppy + */ + +/* + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-base.h" +#include "ml-objects.h" +#include "heap.h" +#include +#include + +/* printf formats for Int_t/Word_t types */ +#ifdef SIZE_64 +# define PRINT PRId64 +# define PRWORD PRIu64 +#else +# define PRINT PRId32 +# define PRWORD PRIu32 +#endif + +#define V1_MAGIC 0x19981022 +#define V2_MAGIC 0x20190921 + +/* Codes for literal machine instructions (version 2) */ +enum { + UNUSED = 0, + INT, + INT32, + INT64, + BIGINT, + IVEC, + IVEC8, + IVEC16, + IVEC32, + IVEC64, + REAL32, + REAL64, + RVEC32, + RVEC64, + STR8, + RECORD, + VECTOR, + RAW, /* word-sized raw data */ + RAW32, /* 32-bit aligned raw data; will be padded to 64-bits on 64-bit targets */ + RAW64, /* 64-bit aligned raw data */ + CONCAT, + SAVE, + LOAD, + RETURN, + INVALID /* invalid opcode */ +}; + +/* argument types; if present, the argument may either be a literal value or + * a count of number of values to include in a vector/record. In the latter + * case we specify the sign and size of the literal in the instruction stream + * and the size of that the instruction requires. + */ +enum { + NO_ARG, /* no argument */ + IMMED_ARG, /* get argument from immedArg field (32 bits) */ + I8_ARG, /* one-byte signed argument */ + U8_ARG, /* one-byte unsigned argument */ + I16_ARG, /* two-byte signed argument */ + U16_ARG, /* two-byte unsigned argument */ + I32_ARG, /* four-byte signed argument */ + U32_ARG, /* four-byte unsigned argument */ + I64_ARG, /* eight-byte signed argument (64 bits) */ + INT_ARG /* target-sized integer argument */ +}; + +static struct instr_info { + unsigned char oper; /* the type of operation */ + unsigned char argKind; /* the kind of argument */ + signed char immedArg; /* immediate argument (if present) */ + unsigned char pad; /* pad to four bytes */ + } InstrInfo[256] = { + /* 0x00 */ { INT, IMMED_ARG, 0, 0 }, + /* 0x01 */ { INT, IMMED_ARG, 1, 0 }, + /* 0x02 */ { INT, IMMED_ARG, 2, 0 }, + /* 0x03 */ { INT, IMMED_ARG, 3, 0 }, + /* 0x04 */ { INT, IMMED_ARG, 4, 0 }, + /* 0x05 */ { INT, IMMED_ARG, 5, 0 }, + /* 0x06 */ { INT, IMMED_ARG, 6, 0 }, + /* 0x07 */ { INT, IMMED_ARG, 7, 0 }, + /* 0x08 */ { INT, IMMED_ARG, 8, 0 }, + /* 0x09 */ { INT, IMMED_ARG, 9, 0 }, + /* 0x0A */ { INT, IMMED_ARG, 10, 0 }, + /* 0x0B */ { INT, IMMED_ARG, -1, 0 }, + /* 0x0C */ { INT, IMMED_ARG, -2, 0 }, + /* 0x0D */ { INT, IMMED_ARG, -3, 0 }, + /* 0x0E */ { INT, IMMED_ARG, -4, 0 }, + /* 0x0F */ { INT, IMMED_ARG, -5, 0 }, + /* 0x10 */ { INT, I8_ARG, 0, 0 }, + /* 0x11 */ { INT, I16_ARG, 0, 0 }, + /* 0x12 */ { INT, I32_ARG, 0, 0 }, +#ifdef SIZE_64 + /* 0x13 */ { INT, I64_ARG, 0, 0 }, +#else /* SIZE_32 */ + /* 0x13 */ { INVALID, NO_ARG, 0, 0 }, +#endif +#ifdef SIZE_64 + /* 0x14 */ { INVALID, NO_ARG, 0, 0 }, + /* 0x15 */ { INVALID, NO_ARG, 0, 0 }, + /* 0x16 */ { INVALID, NO_ARG, 0, 0 }, + /* 0x17 */ { INT64, I8_ARG, 0, 0 }, + /* 0x18 */ { INT64, I16_ARG, 0, 0 }, + /* 0x19 */ { INT64, I32_ARG, 0, 0 }, + /* 0x1A */ { INT64, I64_ARG, 0, 0 }, +#else /* SIZE_32 */ + /* 0x14 */ { INT32, I8_ARG, 0, 0 }, + /* 0x15 */ { INT32, I16_ARG, 0, 0 }, + /* 0x16 */ { INT32, I32_ARG, 0, 0 }, + /* 0x17 */ { INVALID, NO_ARG, 0, 0 }, + /* 0x18 */ { INVALID, NO_ARG, 0, 0 }, + /* 0x19 */ { INVALID, NO_ARG, 0, 0 }, + /* 0x1A */ { INVALID, NO_ARG, 0, 0 }, +#endif + /* 0x1B */ { BIGINT, U32_ARG, 0, 0 }, + /* 0x1C */ { IVEC, U8_ARG, 0, 0 }, + /* 0x1D */ { IVEC, U32_ARG, 0, 0 }, + /* 0x1E */ { IVEC8, U8_ARG, 0, 0 }, + /* 0x1F */ { IVEC8, U32_ARG, 0, 0 }, + /* 0x20 */ { IVEC16, U8_ARG, 0, 0 }, + /* 0x21 */ { IVEC16, U32_ARG, 0, 0 }, + /* 0x22 */ { IVEC32, U8_ARG, 0, 0 }, + /* 0x23 */ { IVEC32, U32_ARG, 0, 0 }, + /* 0x24 */ { IVEC64, U8_ARG, 0, 0 }, + /* 0x25 */ { IVEC64, U32_ARG, 0, 0 }, + /* 0x26 */ { REAL32, NO_ARG, 0, 0 }, + /* 0x27 */ { REAL64, NO_ARG, 0, 0 }, + /* 0x28 */ { RVEC32, U8_ARG, 0, 0 }, + /* 0x29 */ { RVEC32, U32_ARG, 0, 0 }, + /* 0x2A */ { RVEC64, U8_ARG, 0, 0 }, + /* 0x2B */ { RVEC64, U32_ARG, 0, 0 }, + /* 0x2C */ { STR8, U8_ARG, 0, 0 }, + /* 0x2D */ { STR8, INT_ARG, 0, 0 }, + /* 0x2E */ { UNUSED, 0, 0, 0 }, + /* 0x2F */ { UNUSED, 0, 0, 0 }, + /* 0x30 */ { RECORD, IMMED_ARG, 1, 0 }, + /* 0x31 */ { RECORD, IMMED_ARG, 2, 0 }, + /* 0x32 */ { RECORD, IMMED_ARG, 3, 0 }, + /* 0x33 */ { RECORD, IMMED_ARG, 4, 0 }, + /* 0x34 */ { RECORD, IMMED_ARG, 5, 0 }, + /* 0x35 */ { RECORD, IMMED_ARG, 6, 0 }, + /* 0x36 */ { RECORD, IMMED_ARG, 7, 0 }, + /* 0x37 */ { RECORD, U8_ARG, 0, 0 }, + /* 0x38 */ { RECORD, U32_ARG, 0, 0 }, + /* 0x39 */ { VECTOR, U8_ARG, 0, 0 }, + /* 0x3A */ { VECTOR, U32_ARG, 0, 0 }, + /* 0x3B */ { RAW, IMMED_ARG, 1, 0 }, + /* 0x3C */ { RAW, IMMED_ARG, 2, 0 }, + /* 0x3D */ { RAW, U8_ARG, 0, 0 }, + /* 0x3E */ { RAW, U32_ARG, 0, 0 }, + /* 0x3F */ { RAW32, U8_ARG, 0, 0 }, + /* 0x40 */ { RAW32, U32_ARG, 0, 0 }, + /* 0x41 */ { RAW64, U8_ARG, 0, 0 }, + /* 0x42 */ { RAW64, U32_ARG, 0, 0 }, + /* 0x43 */ { CONCAT, U16_ARG, 0, 0 }, + /* 0x44 */ { SAVE, U8_ARG, 0, 0 }, + /* 0x45 */ { SAVE, U16_ARG, 0, 0 }, + /* 0x46 */ { LOAD, U8_ARG, 0, 0 }, + /* 0x47 */ { LOAD, U16_ARG, 0, 0 }, + /* 0x48 */ { UNUSED, 0, 0, 0 }, + /* 0x49 */ { UNUSED, 0, 0, 0 }, + /* 0x4A */ { UNUSED, 0, 0, 0 }, + /* 0x4B */ { UNUSED, 0, 0, 0 }, + /* 0x4C */ { UNUSED, 0, 0, 0 }, + /* 0x4D */ { UNUSED, 0, 0, 0 }, + /* 0x4E */ { UNUSED, 0, 0, 0 }, + /* 0x4F */ { UNUSED, 0, 0, 0 }, + /* 0x50 */ { UNUSED, 0, 0, 0 }, + /* 0x51 */ { UNUSED, 0, 0, 0 }, + /* 0x52 */ { UNUSED, 0, 0, 0 }, + /* 0x53 */ { UNUSED, 0, 0, 0 }, + /* 0x54 */ { UNUSED, 0, 0, 0 }, + /* 0x55 */ { UNUSED, 0, 0, 0 }, + /* 0x56 */ { UNUSED, 0, 0, 0 }, + /* 0x57 */ { UNUSED, 0, 0, 0 }, + /* 0x58 */ { UNUSED, 0, 0, 0 }, + /* 0x59 */ { UNUSED, 0, 0, 0 }, + /* 0x5A */ { UNUSED, 0, 0, 0 }, + /* 0x5B */ { UNUSED, 0, 0, 0 }, + /* 0x5C */ { UNUSED, 0, 0, 0 }, + /* 0x5D */ { UNUSED, 0, 0, 0 }, + /* 0x5E */ { UNUSED, 0, 0, 0 }, + /* 0x5F */ { UNUSED, 0, 0, 0 }, + /* 0x60 */ { UNUSED, 0, 0, 0 }, + /* 0x61 */ { UNUSED, 0, 0, 0 }, + /* 0x62 */ { UNUSED, 0, 0, 0 }, + /* 0x63 */ { UNUSED, 0, 0, 0 }, + /* 0x64 */ { UNUSED, 0, 0, 0 }, + /* 0x65 */ { UNUSED, 0, 0, 0 }, + /* 0x66 */ { UNUSED, 0, 0, 0 }, + /* 0x67 */ { UNUSED, 0, 0, 0 }, + /* 0x68 */ { UNUSED, 0, 0, 0 }, + /* 0x69 */ { UNUSED, 0, 0, 0 }, + /* 0x6A */ { UNUSED, 0, 0, 0 }, + /* 0x6B */ { UNUSED, 0, 0, 0 }, + /* 0x6C */ { UNUSED, 0, 0, 0 }, + /* 0x6D */ { UNUSED, 0, 0, 0 }, + /* 0x6E */ { UNUSED, 0, 0, 0 }, + /* 0x6F */ { UNUSED, 0, 0, 0 }, + /* 0x70 */ { UNUSED, 0, 0, 0 }, + /* 0x71 */ { UNUSED, 0, 0, 0 }, + /* 0x72 */ { UNUSED, 0, 0, 0 }, + /* 0x73 */ { UNUSED, 0, 0, 0 }, + /* 0x74 */ { UNUSED, 0, 0, 0 }, + /* 0x75 */ { UNUSED, 0, 0, 0 }, + /* 0x76 */ { UNUSED, 0, 0, 0 }, + /* 0x77 */ { UNUSED, 0, 0, 0 }, + /* 0x78 */ { UNUSED, 0, 0, 0 }, + /* 0x79 */ { UNUSED, 0, 0, 0 }, + /* 0x7A */ { UNUSED, 0, 0, 0 }, + /* 0x7B */ { UNUSED, 0, 0, 0 }, + /* 0x7C */ { UNUSED, 0, 0, 0 }, + /* 0x7D */ { UNUSED, 0, 0, 0 }, + /* 0x7E */ { UNUSED, 0, 0, 0 }, + /* 0x7F */ { UNUSED, 0, 0, 0 }, + /* 0x80 */ { UNUSED, 0, 0, 0 }, + /* 0x81 */ { UNUSED, 0, 0, 0 }, + /* 0x82 */ { UNUSED, 0, 0, 0 }, + /* 0x83 */ { UNUSED, 0, 0, 0 }, + /* 0x84 */ { UNUSED, 0, 0, 0 }, + /* 0x85 */ { UNUSED, 0, 0, 0 }, + /* 0x86 */ { UNUSED, 0, 0, 0 }, + /* 0x87 */ { UNUSED, 0, 0, 0 }, + /* 0x88 */ { UNUSED, 0, 0, 0 }, + /* 0x89 */ { UNUSED, 0, 0, 0 }, + /* 0x8A */ { UNUSED, 0, 0, 0 }, + /* 0x8B */ { UNUSED, 0, 0, 0 }, + /* 0x8C */ { UNUSED, 0, 0, 0 }, + /* 0x8D */ { UNUSED, 0, 0, 0 }, + /* 0x8E */ { UNUSED, 0, 0, 0 }, + /* 0x8F */ { UNUSED, 0, 0, 0 }, + /* 0x90 */ { UNUSED, 0, 0, 0 }, + /* 0x91 */ { UNUSED, 0, 0, 0 }, + /* 0x92 */ { UNUSED, 0, 0, 0 }, + /* 0x93 */ { UNUSED, 0, 0, 0 }, + /* 0x94 */ { UNUSED, 0, 0, 0 }, + /* 0x95 */ { UNUSED, 0, 0, 0 }, + /* 0x96 */ { UNUSED, 0, 0, 0 }, + /* 0x97 */ { UNUSED, 0, 0, 0 }, + /* 0x98 */ { UNUSED, 0, 0, 0 }, + /* 0x99 */ { UNUSED, 0, 0, 0 }, + /* 0x9A */ { UNUSED, 0, 0, 0 }, + /* 0x9B */ { UNUSED, 0, 0, 0 }, + /* 0x9C */ { UNUSED, 0, 0, 0 }, + /* 0x9D */ { UNUSED, 0, 0, 0 }, + /* 0x9E */ { UNUSED, 0, 0, 0 }, + /* 0x9F */ { UNUSED, 0, 0, 0 }, + /* 0xA0 */ { UNUSED, 0, 0, 0 }, + /* 0xA1 */ { UNUSED, 0, 0, 0 }, + /* 0xA2 */ { UNUSED, 0, 0, 0 }, + /* 0xA3 */ { UNUSED, 0, 0, 0 }, + /* 0xA4 */ { UNUSED, 0, 0, 0 }, + /* 0xA5 */ { UNUSED, 0, 0, 0 }, + /* 0xA6 */ { UNUSED, 0, 0, 0 }, + /* 0xA7 */ { UNUSED, 0, 0, 0 }, + /* 0xA8 */ { UNUSED, 0, 0, 0 }, + /* 0xA9 */ { UNUSED, 0, 0, 0 }, + /* 0xAA */ { UNUSED, 0, 0, 0 }, + /* 0xAB */ { UNUSED, 0, 0, 0 }, + /* 0xAC */ { UNUSED, 0, 0, 0 }, + /* 0xAD */ { UNUSED, 0, 0, 0 }, + /* 0xAE */ { UNUSED, 0, 0, 0 }, + /* 0xAF */ { UNUSED, 0, 0, 0 }, + /* 0xB0 */ { UNUSED, 0, 0, 0 }, + /* 0xB1 */ { UNUSED, 0, 0, 0 }, + /* 0xB2 */ { UNUSED, 0, 0, 0 }, + /* 0xB3 */ { UNUSED, 0, 0, 0 }, + /* 0xB4 */ { UNUSED, 0, 0, 0 }, + /* 0xB5 */ { UNUSED, 0, 0, 0 }, + /* 0xB6 */ { UNUSED, 0, 0, 0 }, + /* 0xB7 */ { UNUSED, 0, 0, 0 }, + /* 0xB8 */ { UNUSED, 0, 0, 0 }, + /* 0xB9 */ { UNUSED, 0, 0, 0 }, + /* 0xBA */ { UNUSED, 0, 0, 0 }, + /* 0xBB */ { UNUSED, 0, 0, 0 }, + /* 0xBC */ { UNUSED, 0, 0, 0 }, + /* 0xBD */ { UNUSED, 0, 0, 0 }, + /* 0xBE */ { UNUSED, 0, 0, 0 }, + /* 0xBF */ { UNUSED, 0, 0, 0 }, + /* 0xC0 */ { UNUSED, 0, 0, 0 }, + /* 0xC1 */ { UNUSED, 0, 0, 0 }, + /* 0xC2 */ { UNUSED, 0, 0, 0 }, + /* 0xC3 */ { UNUSED, 0, 0, 0 }, + /* 0xC4 */ { UNUSED, 0, 0, 0 }, + /* 0xC5 */ { UNUSED, 0, 0, 0 }, + /* 0xC6 */ { UNUSED, 0, 0, 0 }, + /* 0xC7 */ { UNUSED, 0, 0, 0 }, + /* 0xC8 */ { UNUSED, 0, 0, 0 }, + /* 0xC9 */ { UNUSED, 0, 0, 0 }, + /* 0xCA */ { UNUSED, 0, 0, 0 }, + /* 0xCB */ { UNUSED, 0, 0, 0 }, + /* 0xCC */ { UNUSED, 0, 0, 0 }, + /* 0xCD */ { UNUSED, 0, 0, 0 }, + /* 0xCE */ { UNUSED, 0, 0, 0 }, + /* 0xCF */ { UNUSED, 0, 0, 0 }, + /* 0xD0 */ { UNUSED, 0, 0, 0 }, + /* 0xD1 */ { UNUSED, 0, 0, 0 }, + /* 0xD2 */ { UNUSED, 0, 0, 0 }, + /* 0xD3 */ { UNUSED, 0, 0, 0 }, + /* 0xD4 */ { UNUSED, 0, 0, 0 }, + /* 0xD5 */ { UNUSED, 0, 0, 0 }, + /* 0xD6 */ { UNUSED, 0, 0, 0 }, + /* 0xD7 */ { UNUSED, 0, 0, 0 }, + /* 0xD8 */ { UNUSED, 0, 0, 0 }, + /* 0xD9 */ { UNUSED, 0, 0, 0 }, + /* 0xDA */ { UNUSED, 0, 0, 0 }, + /* 0xDB */ { UNUSED, 0, 0, 0 }, + /* 0xDC */ { UNUSED, 0, 0, 0 }, + /* 0xDD */ { UNUSED, 0, 0, 0 }, + /* 0xDE */ { UNUSED, 0, 0, 0 }, + /* 0xDF */ { UNUSED, 0, 0, 0 }, + /* 0xE0 */ { UNUSED, 0, 0, 0 }, + /* 0xE1 */ { UNUSED, 0, 0, 0 }, + /* 0xE2 */ { UNUSED, 0, 0, 0 }, + /* 0xE3 */ { UNUSED, 0, 0, 0 }, + /* 0xE4 */ { UNUSED, 0, 0, 0 }, + /* 0xE5 */ { UNUSED, 0, 0, 0 }, + /* 0xE6 */ { UNUSED, 0, 0, 0 }, + /* 0xE7 */ { UNUSED, 0, 0, 0 }, + /* 0xE8 */ { UNUSED, 0, 0, 0 }, + /* 0xE9 */ { UNUSED, 0, 0, 0 }, + /* 0xEA */ { UNUSED, 0, 0, 0 }, + /* 0xEB */ { UNUSED, 0, 0, 0 }, + /* 0xEC */ { UNUSED, 0, 0, 0 }, + /* 0xED */ { UNUSED, 0, 0, 0 }, + /* 0xEE */ { UNUSED, 0, 0, 0 }, + /* 0xEF */ { UNUSED, 0, 0, 0 }, + /* 0xF0 */ { UNUSED, 0, 0, 0 }, + /* 0xF1 */ { UNUSED, 0, 0, 0 }, + /* 0xF2 */ { UNUSED, 0, 0, 0 }, + /* 0xF3 */ { UNUSED, 0, 0, 0 }, + /* 0xF4 */ { UNUSED, 0, 0, 0 }, + /* 0xF5 */ { UNUSED, 0, 0, 0 }, + /* 0xF6 */ { UNUSED, 0, 0, 0 }, + /* 0xF7 */ { UNUSED, 0, 0, 0 }, + /* 0xF8 */ { UNUSED, 0, 0, 0 }, + /* 0xF9 */ { UNUSED, 0, 0, 0 }, + /* 0xFA */ { UNUSED, 0, 0, 0 }, + /* 0xFB */ { UNUSED, 0, 0, 0 }, + /* 0xFC */ { UNUSED, 0, 0, 0 }, + /* 0xFD */ { UNUSED, 0, 0, 0 }, + /* 0xFE */ { UNUSED, 0, 0, 0 }, + /* 0xFF */ { RETURN, NO_ARG, 0, 0 } + }; + +/* copy bytes from the instruction stream in correct byte order (the + * instruction stream is in bigendian order) + */ +STATIC_INLINE void GetBytes (Byte_t *dst, Byte_t *code, int n) +{ + int i; + for (i = 0; i < n; i++) { +#ifdef BYTE_ORDER_LITTLE + dst[n-1-i] = code[i]; +#else + dst[i] = code[i]; +#endif + } +} + +/* inline functions for fetching arguments */ +STATIC_INLINE signed char GetI8Arg (Byte_t *code) +{ + signed char i = code[0]; + return i; +} +STATIC_INLINE unsigned char GetU8Arg (Byte_t *code) +{ + unsigned char i = code[0]; + return i; +} +STATIC_INLINE Int16_t GetI16Arg (Byte_t *code) +{ + union { Byte_t b[sizeof(Int16_t)]; Int16_t i; } arg; + GetBytes(arg.b, code, sizeof(Int16_t)); + return arg.i; +} +STATIC_INLINE Unsigned16_t GetU16Arg (Byte_t *code) +{ + union { Byte_t b[sizeof(Unsigned16_t)]; Unsigned16_t u; } arg; + GetBytes(arg.b, code, sizeof(Unsigned16_t)); + return arg.u; +} +STATIC_INLINE Int32_t GetI32Arg (Byte_t *code) +{ + union { Byte_t b[sizeof(Int32_t)]; Int32_t i; } arg; + GetBytes(arg.b, code, sizeof(Int32_t)); + return arg.i; +} +STATIC_INLINE Unsigned32_t GetU32Arg (Byte_t *code) +{ + union { Byte_t b[sizeof(Unsigned32_t)]; Unsigned32_t u; } arg; + GetBytes(arg.b, code, sizeof(Unsigned32_t)); + return arg.u; +} +#ifdef SIZE_64 +STATIC_INLINE Int64_t GetI64Arg (Byte_t *code) +{ + union { Byte_t b[sizeof(Int64_t)]; Int64_t i; } arg; + GetBytes(arg.b, code, sizeof(Int64_t)); + return arg.i; +} +STATIC_INLINE Unsigned64_t GetU64Arg (Byte_t *code) +{ + union { Byte_t b[sizeof(Unsigned64_t)]; Unsigned64_t u; } arg; + GetBytes(arg.b, code, sizeof(Unsigned64_t)); + return arg.u; +} +#endif /* SIZE_64 */ +STATIC_INLINE double GetR32Arg (Byte_t *code) +{ + union { Byte_t b[sizeof(float)]; float r; } arg; + GetBytes(arg.b, code, sizeof(float)); + return arg.r; +} +STATIC_INLINE double GetR64Arg (Byte_t *code) +{ + union { Byte_t b[sizeof(double)]; double r; } arg; + GetBytes(arg.b, code, sizeof(double)); + return arg.r; +} +#ifdef SIZE_64 +#define GetRawArg GetU64Arg +#else /* SIZE_32 */ +#define GetRawArg GetU32Arg +#endif + +/* the size of a list cons cell in bytes */ +#define CONS_SZB (WORD_SZB*3) + +/* the amount of free space that we want in the allocation arena; this value must be + * less than MIN_ALLOC_SZB (defined in include/ml-limits.h) + */ +#define FREE_REQ_SZB 64*ONE_K + +/* for backward compatibility */ +ml_val_t BuildLiteralsV1 (ml_state_t *msp, Byte_t *lits, int pc, int len); + +#ifdef DEBUG_LITERALS +# define GC_MESSAGE SayDebug("BuildLiterals: invoke GC\n"); +#else +# define GC_MESSAGE +#endif + +/* BuildLiterals: + * + * NOTE: we allocate all of the objects in the first generation, and allocate + * the vector of literals in the allocation space. + */ +ml_val_t BuildLiterals (ml_state_t *msp, Byte_t *code, int len) +{ + int pc = 0; +#ifdef DEBUG_LITERALS + int depth = 0; +#endif + Unsigned32_t magic, maxDepth, wordSz, maxSaved; + ml_val_t stk; + ml_val_t res; + Int32_t availSpace, spaceReq; + Unsigned32_t ui; + /* we represent the saved array using a mutable data array that is allocated in + * the heap. This means that we need to create store-list entries when we update + * it. + */ + ml_val_t saved; + +/* A check that the available space is sufficient for the literal object that + * we are about to allocate. Note that the cons cell has already been accounted + * for in availSpace (but not in spaceReq). + */ +#define GC_CHECK \ + do { \ + if (spaceReq > availSpace) { \ + GC_MESSAGE \ + InvokeGCWithRoots (msp, 0, (ml_val_t *)&code, &stk, &saved, NIL(ml_val_t *)); \ + availSpace = ((size_t)msp->ml_limitPtr - (size_t)msp->ml_allocPtr) - CONS_SZB; \ + } \ + else \ + availSpace -= spaceReq; \ + } while (0) + +#ifdef DEBUG_LITERALS + SayDebug("BuildLiterals: code = %p, len = %d\n", (void *)code, len); +#endif + if (len <= 8) return ML_nil; + + magic = GetU32Arg(code+pc); pc += 4; + maxDepth = GetU32Arg(code+pc); pc += 4; + + if (magic == V1_MAGIC) { +#ifdef DEBUG_LITERALS + SayDebug("BuildLiterals: VERSION 1\n"); +#endif + return BuildLiteralsV1 (msp, code, pc, len); + } + else if (magic != V2_MAGIC) { + Die("bogus literal magic number %#x", magic); + } +#ifdef DEBUG_LITERALS + SayDebug("BuildLiterals: VERSION 2\n"); +#endif + + /* get the rest of the V2 header */ + wordSz = GetU32Arg(code+pc); pc += 4; + maxSaved = GetU32Arg(code+pc); pc += 4; + +#ifdef SIZE_64 + if (wordSz != 64) { + Die("expected word size = 64, but found %d\n", wordSz); + } +#else /* SIZE_32 */ + if (wordSz != 32) { + Die("expected word size = 32, but found %d\n", wordSz); + } +#endif + + if (maxSaved > 0) { + saved = ML_AllocArrayData (msp, maxSaved, ML_nil); + } + else { + saved = ML_nil; + } + + stk = ML_nil; + availSpace = ((size_t)msp->ml_limitPtr - (size_t)msp->ml_allocPtr); +#ifdef DEBUG_LITERALS + SayDebug("BuildLiterals: avail = %d bytes; maxDepth = %d, maxSaved = %d\n", + (int)availSpace, (int)maxDepth, (int)maxSaved); +#endif + while (TRUE) { + ASSERT(pc < len); + ASSERT(availSpace <= (Int32_t)((size_t)msp->ml_limitPtr - (size_t)msp->ml_allocPtr)); + if (availSpace < 512 * WORD_SZB) { + if (NeedGC(msp, FREE_REQ_SZB)) { + GC_MESSAGE + InvokeGCWithRoots (msp, 0, (ml_val_t *)&code, &stk, &saved, NIL(ml_val_t *)); + } + availSpace = ((size_t)msp->ml_limitPtr - (size_t)msp->ml_allocPtr); + } + availSpace -= CONS_SZB; /* space for stack cons cell */ + +#ifdef DEBUG_LITERALS + int startPC = pc; +#endif + +#ifdef DEBUG_LITERALS +# define PUSH(arg) do { LIST_cons(msp, stk, (arg), stk); depth++; } while(0) +#else +# define PUSH(arg) LIST_cons(msp, stk, (arg), stk) +#endif + + /* get the next instruction */ + Byte_t opcode = code[pc++]; + + /* get the argument (if any) */ + union { + Word_t uArg; + Int_t iArg; + } arg; + switch (InstrInfo[opcode].argKind) { + case NO_ARG: + break; + case IMMED_ARG: + arg.iArg = (Int_t)InstrInfo[opcode].immedArg; + break; + case I8_ARG: + arg.iArg = (Int_t)GetI8Arg(&(code[pc])); pc += 1; + break; + case U8_ARG: + arg.uArg = (Word_t)GetU8Arg(&(code[pc])); pc += 1; + break; + case I16_ARG: + arg.iArg = (Int_t)GetI16Arg(&(code[pc])); pc += 2; + break; + case U16_ARG: + arg.uArg = (Word_t)GetU16Arg(&(code[pc])); pc += 2; + break; + case I32_ARG: + arg.iArg = (Int_t)GetI32Arg(&(code[pc])); pc += 4; + break; + case U32_ARG: + arg.uArg = (Word_t)GetU32Arg(&(code[pc])); pc += 4; + break; +#ifdef SIZE_64 + case I64_ARG: + arg.iArg = (Int_t)GetI64Arg(&(code[pc])); pc += 8; + break; +#endif + case INT_ARG: +#ifdef SIZE_64 + arg.iArg = (Int_t)GetI64Arg(&(code[pc])); pc += 8; +#else /* SIZE_32 */ + arg.iArg = (Int_t)GetI32Arg(&(code[pc])); pc += 4; +#endif + } + + /* handle the operation */ + switch (InstrInfo[opcode].oper) { + case UNUSED: + break; + + case INT: +#ifdef DEBUG_LITERALS + SayDebug("[%04d/%4d]: INT(%" PRINT ")\n", startPC, depth, arg.iArg); +#endif + PUSH (INT_CtoML(arg.iArg)); + break; + +#ifdef SIZE_32 + case INT32: +#ifdef DEBUG_LITERALS + SayDebug("[%04d/%4d]: INT32(%" PRINT ")\n", startPC, depth, arg.iArg); +#endif + res = INT32_CtoML(msp, arg.iArg); + PUSH (res); + availSpace -= 2*WORD_SZB; + break; +#endif + +#ifdef SIZE_64 + case INT64: +#ifdef DEBUG_LITERALS + SayDebug("[%04d/%4d]: INT64(" PRINT ")\n", startPC, depth, arg.iArg); +#endif + res = ML_AllocWord64(msp, arg.iArg); + PUSH (res); + availSpace -= 2*WORD_SZB; + break; +#endif + + case BIGINT: + Die("BIGINT -- not supported yet"); + break; + + case IVEC: + Die("IVEC -- not supported yet"); + break; + + case IVEC8: + Die("IVEC8 -- not supported yet"); + break; + + case IVEC16: + Die("IVEC16 -- not supported yet"); + break; + + case IVEC32: + Die("IVEC32 -- not supported yet"); + break; + + case IVEC64: + Die("IVEC64 -- not supported yet"); + break; + + case REAL32: + Die("REAL32 -- not supported yet"); + break; + + case REAL64: +#ifdef DEBUG_LITERALS + SayDebug("[%04d/%4d]: REAL64(%f)\n", startPC, depth, GetR64Arg(&(code[pc]))); +#endif + REAL64_ALLOC(msp, res, GetR64Arg(&(code[pc]))); pc += 8; + availSpace -= WORD_SZB + REALD_SZB; +#ifdef ALIGN_REALDS + availSpace -= WORD_SZB; +#endif + break; + + case RVEC32: + Die("RVEC32 -- not supported yet"); + break; + + case RVEC64: + Die("RVEC64 -- not supported yet"); + break; + + case STR8: +#ifdef DEBUG_LITERALS + SayDebug("[%04d/%4d]: STR8(%" PRWORD ") [...]", startPC, depth, arg.uArg); +#endif + if (arg.uArg == 0) { +#ifdef DEBUG_LITERALS + SayDebug("\n"); +#endif + PUSH (ML_string0); + break; + } + ui = BYTES_TO_WORDS(arg.uArg+1); /* include space for '\0' */ + /* the space request includes space for the data-object header word and + * the sequence header object. + */ + spaceReq = WORD_SZB*(ui+1+3); +/* FIXME: for large strings, we should be allocating them in the 1st generation */ + GC_CHECK; + /* allocate the data object */ + ML_AllocWrite(msp, 0, MAKE_DESC(ui, DTAG_raw)); + ML_AllocWrite (msp, ui, 0); /* so word-by-word string equality works */ + res = ML_Alloc (msp, ui); +#ifdef DEBUG_LITERALS + SayDebug(" @ %p (%d words)\n", (void *)res, ui); +#endif + memcpy (PTR_MLtoC(void, res), &(code[pc]), arg.uArg); pc += arg.uArg; + /* allocate the header object */ + SEQHDR_ALLOC(msp, res, DESC_string, res, arg.uArg); + /* push on stack */ + PUSH (res); + availSpace -= spaceReq; + break; + + case RECORD: +#ifdef DEBUG_LITERALS + SayDebug("[%04d/%4d]: RECORD(%" PRWORD ") [", startPC, depth, arg.uArg); +#endif + if (arg.uArg == 0) { +#ifdef DEBUG_LITERALS + SayDebug("]\n"); +#endif + PUSH (ML_unit); + break; + } + else { + spaceReq = WORD_SZB*(arg.uArg+1); + GC_CHECK; + ML_AllocWrite(msp, 0, MAKE_DESC(arg.uArg, DTAG_record)); + } + /* top of stack is last element in record */ + for (ui = arg.uArg; ui > 0; ui--) { + ML_AllocWrite(msp, ui, LIST_hd(stk)); + stk = LIST_tl(stk); +#ifdef DEBUG_LITERALS + depth--; +#endif + } + res = ML_Alloc(msp, arg.uArg); +#ifdef DEBUG_LITERALS + SayDebug("...] @ %p\n", (void *)res); +#endif + PUSH (res); + availSpace -= spaceReq; + break; + + case VECTOR: +#ifdef DEBUG_LITERALS + SayDebug("[%04d/%4d]: VECTOR(%" PRWORD ") [", startPC, depth, arg.uArg); +#endif + if (arg.uArg == 0) { +#ifdef DEBUG_LITERALS + SayDebug("]\n"); +#endif + PUSH (ML_vector0); + break; + } + /* the space request includes space for the data-object header word and + * the sequence header object. + */ + spaceReq = WORD_SZB*(arg.uArg + (1 + 3)); +/* FIXME: for large vectors, we should be allocating them in the 1st generation */ + GC_CHECK; + /* allocate the data object */ + ML_AllocWrite(msp, 0, MAKE_DESC(arg.uArg, DTAG_vec_data)); + /* top of stack is last element in vector */ + for (ui = arg.uArg; ui > 0; ui--) { + ML_AllocWrite(msp, ui, LIST_hd(stk)); + stk = LIST_tl(stk); +#ifdef DEBUG_LITERALS + depth--; +#endif + } + res = ML_Alloc(msp, arg.uArg); + /* allocate the header object */ + SEQHDR_ALLOC(msp, res, DESC_polyvec, res, arg.uArg); +#ifdef DEBUG_LITERALS + SayDebug("...] @ %p\n", (void *)res); +#endif + PUSH (res); + availSpace -= spaceReq; + break; + + case RAW: /* Word_t sized raw values */ +#ifdef DEBUG_LITERALS + { + int i, n; + SayDebug("[%04d/%4d]: RAW(%" PRWORD ") [%02xn", + startPC, depth, arg.uArg, code[pc]); + n = (WORD_SZB*arg.uArg > 8) ? 8 : WORD_SZB*arg.uArg; + for (i = 1; i < n; i++) { + SayDebug(" %02x", code[pc+i]); + } + if (n < WORD_SZB*arg.uArg) { + SayDebug(" ...]\n"); + } else { + SayDebug("]\n"); + } + } +#endif + ASSERT(arg.uArg > 0); + spaceReq = WORD_SZB*arg.uArg + WORD_SZB; +/* FIXME: for large objects, we should be allocating them in the 1st generation */ + GC_CHECK; + ML_AllocWrite (msp, 0, MAKE_DESC(arg.uArg, DTAG_raw)); + for (ui = 1; ui <= arg.uArg; ui++) { + ML_AllocWrite (msp, ui, (ml_val_t)GetRawArg(&(code[pc]))); pc += WORD_SZB; + } + res = ML_Alloc (msp, arg.uArg); + PUSH (res); + availSpace -= spaceReq; + break; + + case RAW32: + Die("RAW32 -- not supported yet"); + break; + + case RAW64: +#ifdef DEBUG_LITERALS + SayDebug("[%04d/%4d]: RAW64(%" PRWORD ") [...]\n", startPC, depth, arg.uArg); +#endif + ASSERT(arg.uArg > 0); + spaceReq = 8*(arg.uArg+1); +/* FIXME: for large objects, we should be allocating them in the 1st generation */ + GC_CHECK; +#ifdef ALIGN_REALDS + /* Force REALD_SZB alignment (descriptor is off by one word) */ + msp->ml_allocPtr = (ml_val_t *)((Addr_t)(msp->ml_allocPtr) | WORD_SZB); +#endif + /* ui is the number of words */ + ui = WORD64_SZW * arg.uArg; + ML_AllocWrite (msp, 0, MAKE_DESC(ui, DTAG_raw64)); + res = ML_Alloc (msp, ui); + for (ui = 0; ui < arg.uArg; ui++) { + PTR_MLtoC(double, res)[ui] = GetR64Arg(&(code[pc])); + pc += 8; + } + PUSH (res); + availSpace -= spaceReq; + break; + + case CONCAT: + break; + + case SAVE: { + ml_val_t *loc; +#ifdef DEBUG_LITERALS + SayDebug("[%04d/%4d]: SAVE(%" PRWORD ") %p\n", + startPC, depth, arg.uArg, (void*)LIST_hd(stk)); +#endif + ASSERT(saved != ML_nil); + ASSERT(stk != ML_nil); + loc = PTR_MLtoC(ml_val_t, saved) + arg.uArg; + *loc = LIST_hd(stk); + ML_RecordUpdate (msp, loc); + } break; + + case LOAD: +#ifdef DEBUG_LITERALS + SayDebug("[%04d/%4d]: LOAD(%" PRWORD ") %p\n", + startPC, depth, arg.uArg, PTR_MLtoC(ml_val_t *, saved)[arg.uArg]); +#endif + ASSERT(saved != ML_nil); + PUSH (PTR_MLtoC(ml_val_t, saved)[arg.uArg]); + break; + + case RETURN: +#ifdef DEBUG_LITERALS + SayDebug("[%04d/%4d]: RETURN(%p); depth = %d\n", + startPC, depth, (void *)LIST_hd(stk), depth); +#endif + ASSERT(pc == len); + ASSERT((stk != ML_nil) && (LIST_tl(stk) == ML_nil)); + return (LIST_hd(stk)); + + default: + Die ("BuildLiterals: bogus literal opcode #%x @ %d", opcode, pc-1); + } /* switch */ + } /* while */ + +} /* end of BuildLiterals */ diff --git a/base/runtime/gc/c-globals-tbl.c b/base/runtime/gc/c-globals-tbl.c new file mode 100644 index 0000000..d7139b1 --- /dev/null +++ b/base/runtime/gc/c-globals-tbl.c @@ -0,0 +1,388 @@ +/*! \file c-globals-tbl.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This file implements a registry of global C symbols that may be referenced + * in the ML heap (e.g., references to C functions). + */ + +#include "ml-base.h" +#include "tags.h" +#include "ml-values.h" +#include "c-globals-tbl.h" +#include + +#define MAKE_EXTERN(index) MAKE_DESC(index, DTAG_extern) + +#define HASH_STRING(name, res) { \ + const char *__cp = (name); \ + int __hash = 0, __n; \ + for (; *__cp; __cp++) { \ + __n = (128*__hash) + (unsigned)*__cp; \ + __hash = __n - (8388593 * (__n / 8388593)); \ + } \ + (res) = __hash; \ + } + +typedef struct item { /* An item in the Symbol/Addr tables */ + ml_val_t addr; /* The address of the external reference */ + const char *name; /* The name of the reference */ + int stringHash; /* The hash sum of the name */ + struct item *nextSymb; /* The next item the SymbolTable bucket */ + struct item *nextAddr; /* The next item the AddrTable bucket */ +} item_t; + +typedef struct item_ref { /* an item in an export table */ + item_t *item; + int index; + struct item_ref *next; +} item_ref_t; + +struct export_table { /* A table of C symbols mapping strings to items, */ + /* which is used when loading a heap image. */ + item_ref_t **table; + int tableSize; + int numItems; + item_t **itemMap; /* A map from item #s to items */ + int itemMapSize; +}; + +/* hash key to index */ +#define STRHASH_INDEX(h, sz) ((h) & ((sz)-1)) +#define ADDRHASH_INDEX(a, sz) (((Word_t)(a) >> 3) & ((sz)-1)) + + +PVT item_t **SymbolTable = NIL(item_t **); /* Maps names to items */ +PVT item_t **AddrTable = NIL(item_t **); /* Maps addresses to items */ +PVT int TableSize = 0; /* The size of the tables; always */ + /* power of 2. */ +PVT int NumSymbols = 0; /* The number of entries in the tables */ + +/* local routines */ +PVT void GrowTable (export_table_t *tbl); + + +/* RecordCSymbol: + * + * Enter a global C symbol into the tables. + */ +void RecordCSymbol (const char *name, ml_val_t addr) +{ + int n, i, hash; + item_t *item, *p; + + ASSERT (((Word_t)addr & MAJOR_MASK) == 0); + + if (TableSize == NumSymbols) { + /* double the table size */ + int newTblSz = (TableSize ? 2*TableSize : 64); + item_t **newSTbl = NEW_VEC(item_t *, newTblSz); + item_t **newATbl = NEW_VEC(item_t *, newTblSz); + + memset ((char *)newSTbl, 0, sizeof(item_t *) * newTblSz); + memset ((char *)newATbl, 0, sizeof(item_t *) * newTblSz); + + for (i = 0; i < TableSize; i++) { + for (p = SymbolTable[i]; p != NIL(item_t *); ) { + item = p; + p = p->nextSymb; + n = STRHASH_INDEX(item->stringHash, newTblSz); + item->nextSymb = newSTbl[n]; + newSTbl[n] = item; + } + for (p = AddrTable[i]; p != NIL(item_t *); ) { + item = p; + p = p->nextAddr; + n = ADDRHASH_INDEX(item->addr, newTblSz); + item->nextAddr = newATbl[n]; + newATbl[n] = item; + } + } + + if (SymbolTable != NIL(item_t **)) { + FREE (SymbolTable); + FREE (AddrTable); + } + SymbolTable = newSTbl; + AddrTable = newATbl; + TableSize = newTblSz; + } + + /* compute the string hash function */ + HASH_STRING(name, hash); + + /* Allocate the item */ + item = NEW_OBJ(item_t); + item->name = name; + item->stringHash = hash; + item->addr = addr; + + /* insert the item into the symbol table. */ + n = STRHASH_INDEX(hash, TableSize); + for (p = SymbolTable[n]; p != NIL(item_t *); p = p->nextSymb) { + if ((p->stringHash == hash) && (strcmp(name, p->name) == 0)) { + if (p->addr != addr) + Die ("global C symbol \"%s\" defined twice", name); + else { + FREE (item); + return; + } + } + } + item->nextSymb = SymbolTable[n]; + SymbolTable[n] = item; + + /* insert the item into the addr table. */ + n = ADDRHASH_INDEX(addr, TableSize); + for (p = AddrTable[n]; p != NIL(item_t *); p = p->nextAddr) { + if (p->addr == addr) { + if ((p->stringHash != hash) || (strcmp(name, p->name) != 0)) + Die ("address %#x defined twice: \"%s\" and \"%s\"", + addr, p->name, name); + else { + FREE (item); + return; + } + } + } + item->nextAddr = AddrTable[n]; + AddrTable[n] = item; + NumSymbols++; + +} /* end of RecordCSymbol */ + +/* AddrToCSymbol: + * + * Return the name of the C symbol that labels the given address + * (or NIL). + */ +const char *AddrToCSymbol (ml_val_t addr) +{ + item_t *q; + + /* Find the symbol in the AddrTable */ + for (q = AddrTable[ADDRHASH_INDEX(addr, TableSize)]; + q != NIL(item_t *); + q = q->nextAddr) + { + if (q->addr == addr) + return q->name; + } + + return NIL(const char *); + +} /* end of AddrToCSymbol */ + +/* NewExportTbl: + */ +export_table_t *NewExportTbl () +{ + export_table_t *tbl; + + tbl = NEW_OBJ(export_table_t); + tbl->table = NIL(item_ref_t **); + tbl->tableSize = 0; + tbl->numItems = 0; + tbl->itemMap = NIL(item_t **); + tbl->itemMapSize = 0; + + return tbl; + +} /* end of NewExportTbl */ + +/* ExportCSymbol: + * + * Add an external address to an export table, returning its external reference + * descriptor. + */ +ml_val_t ExportCSymbol (export_table_t *tbl, ml_val_t addr) +{ + Addr_t a = PTR_MLtoADDR(addr); + item_ref_t *p; + item_t *q; + int h, index; + +/*SayDebug("ExportCSymbol: addr = %#x, ", addr);*/ + + if (tbl->numItems >= tbl->tableSize) + GrowTable (tbl); + + /* First check to see if addr is already in tbl */ + h = ADDRHASH_INDEX(a, tbl->tableSize); + for (p = tbl->table[h]; p != NIL(item_ref_t *); p = p->next) { + if (p->item->addr == addr) { +/*SayDebug("old name = \"%s\", index = %d\n", p->item->name, p->index);*/ + return MAKE_EXTERN(p->index); + } + } + + /* Find the symbol in the AddrTable */ + for (q = AddrTable[ADDRHASH_INDEX(a, TableSize)]; q != NIL(item_t *); q = q->nextAddr) { + if (q->addr == addr) + break; + } + if (q == NIL(item_t *)) { + Error("external address %#x not registered\n", addr); + return ML_unit; + } + + /* Insert the index into the address to index map. */ +/*SayDebug("new name = \"%s\", index = %d\n", q->name, tbl->numItems);*/ + index = tbl->numItems++; + if (tbl->itemMapSize <= index) { + int newSz = ((tbl->itemMapSize == 0) ? 64 : 2*tbl->itemMapSize); + item_t **newMap = NEW_VEC(item_t *, newSz); + int i; + + for (i = 0; i < tbl->itemMapSize; i++) + newMap[i] = tbl->itemMap[i]; + if (tbl->itemMap != NIL(item_t **)) + FREE (tbl->itemMap); + tbl->itemMap = newMap; + tbl->itemMapSize = newSz; + } + tbl->itemMap[index] = q; + + /* Insert the address into the export table */ + p = NEW_OBJ(item_ref_t); + p->item = q; + p->index = index; + p->next = tbl->table[h]; + tbl->table[h] = p; + + return MAKE_EXTERN(index); + +} /* end of ExportCSymbol */ + +/* AddrOfCSymbol: + * + * Given an external reference, return its address. + */ +ml_val_t AddrOfCSymbol (export_table_t *tbl, ml_val_t xref) +{ + int index; + + index = GET_LEN(xref); + +/*SayDebug("AddrOfCSymbol: %#x: %d --> %#x\n", xref, index, tbl->itemMap[index]->addr);*/ + if (index >= tbl->numItems) { + Die ("bad external object index %d", index); + } + + return tbl->itemMap[index]->addr; + +} /* end of AddrOfCSymbol */ + +/* ExportedSymbols: + */ +void ExportedSymbols (export_table_t *tbl, int *numSymbs, export_item_t **symbs) +{ + int i, n = tbl->numItems; + item_t **p; + export_item_t *ep; + + *numSymbs = n; + *symbs = ep = NEW_VEC(export_item_t, n); + for (p = tbl->itemMap, i = 0; i < n; i++) { + *ep = (*p)->name; + p++; ep++; + } + +} /* end of ExportedSymbols */ + + +/* FreeExportTbl: + * + * Free the storage used by a import/export table. + */ +void FreeExportTbl (export_table_t *tbl) +{ + int i; + item_ref_t *p, *q; + + for (i = 0; i < tbl->tableSize; i++) { + for (p = tbl->table[i]; p != NIL(item_ref_t *); ) { + q = p->next; + FREE (p); + p = q; + } + } + + if (tbl->itemMap != NIL(item_t **)) + FREE (tbl->itemMap); + + FREE (tbl); + +} /* end of FreeExportTbl */ + + +/* ImportCSymbol: + */ +ml_val_t ImportCSymbol (const char *name) +{ + int hash, index; + item_t *p; + + HASH_STRING(name, hash); + + /* insert the item into the symbol table. */ + index = STRHASH_INDEX(hash, TableSize); + for (p = SymbolTable[index]; p != NIL(item_t *); p = p->nextSymb) { + if ((p->stringHash == hash) && (strcmp(name, p->name) == 0)) { + return (p->addr); + } + } + + return ML_unit; + +} /* end of ImportCSymbol */ + + +/* ExportTableSz: + * + * Return the number of bytes required to represent the strings in an exported + * symbols table. + */ +Addr_t ExportTableSz (export_table_t *tbl) +{ + int i; + Addr_t nbytes; + + for (nbytes = 0, i = 0; i < tbl->numItems; i++) { + nbytes += (strlen(tbl->itemMap[i]->name) + 1); + } + nbytes = ROUNDUP(nbytes, WORD_SZB); + + return nbytes; + +} /* end of ExportTableSz */ + + +/* GrowTable: + */ +PVT void GrowTable (export_table_t *tbl) +{ + int newTblSz = (tbl->tableSize ? 2 * tbl->tableSize : 32); + item_ref_t **newTbl = NEW_VEC(item_ref_t *, newTblSz); + int i, n; + item_ref_t *p, *q; + + memset ((char *)newTbl, 0, newTblSz * sizeof(item_ref_t *)); + + for (i = 0; i < tbl->tableSize; i++) { + for (p = tbl->table[i]; p != NIL(item_ref_t *); ) { + q = p; + p = p->next; + n = ADDRHASH_INDEX(q->item->addr, newTblSz); + q->next = newTbl[n]; + newTbl[n] = q; + } + } + + if (tbl->table != NIL(item_ref_t **)) FREE (tbl->table); + tbl->table = newTbl; + tbl->tableSize = newTblSz; + +} /* end of GrowTable */ + diff --git a/base/runtime/gc/call-gc.c b/base/runtime/gc/call-gc.c new file mode 100644 index 0000000..98bd95e --- /dev/null +++ b/base/runtime/gc/call-gc.c @@ -0,0 +1,388 @@ +/* call-gc.c + * + * COPYRIGHT (c) 2022 The SML/NJ Fellowship. + * All rights reserved. + * + * The main interface between the GC and the rest of the run-time system. + * These are the routines used to invoke the GC. + */ + +#ifdef PAUSE_STATS /* GC pause statistics are UNIX dependent */ +# include "ml-unixdep.h" +#endif + +#include +#include "ml-base.h" +#include "ml-limits.h" +#include "memory.h" +#include "ml-state.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "cntr.h" +#include "heap.h" +#include "heap-monitor.h" +#include "ml-globals.h" +#include "ml-timer.h" +#include "gc-stats.h" +#include "vproc-state.h" +#include "ml-signals.h" +#include "profile.h" + +#ifdef C_CALLS +/* This is a list of pointers into the C heap locations that hold + * pointers to ML functions. This list is not part of any ML data + * structure(s). (also see gc/major-gc.c and c-libs/c-calls/c-calls-fns.c) + */ +extern ml_val_t CInterfaceRootList; +#endif + + +/* InvokeGC: + * + * Invoke a garbage collection. A garbage collection always involves + * collecting the allocation space. In addition, if level is greater than + * 0, or if the first generation is full after the minor collection, then + * a major collection of one or more generations is performed (at least + * level generations are collected). + */ +void InvokeGC (ml_state_t *msp, int level) +{ + ml_val_t *roots[NUM_GC_ROOTS]; /* registers and globals */ + ml_val_t **rootsPtr = roots; + heap_t *heap; + int i; +#ifdef MP_SUPPORT + int nProcs; +#endif + + ASSIGN(ProfCurrent, PROF_MINOR_GC); + +#ifdef MP_SUPPORT +#ifdef MP_DEBUG + SayDebug ("igc %d\n", msp->ml_mpSelf); +#endif + if ((nProcs = MP_StartCollect (msp)) == 0) { + /* a waiting proc */ + ASSIGN(ProfCurrent, PROF_RUNTIME); + return; + } +#endif + + START_GC_PAUSE(msp->ml_heap); + +#ifdef C_CALLS + *rootsPtr++ = &CInterfaceRootList; +#endif + +#ifdef MP_SUPPORT + /* get extra roots from procs that entered through InvokeGCWithRoots */ + for (i = 0; mpExtraRoots[i] != NIL(ml_val_t *); i++) + *rootsPtr++ = mpExtraRoots[i]; +#endif + + /* Gather the roots */ + for (i = 0; i < NumCRoots; i++) + *rootsPtr++ = CRoots[i]; +#ifdef MP_SUPPORT + { + vproc_state_t *vsp; + ml_state_t *msp; + int j; + + for (j = 0; j < MAX_NUM_PROCS; j++) { + vsp = VProc[j]; + msp = vsp->vp_state; +#ifdef MP_DEBUG + SayDebug ("msp[%d] alloc/limit was %x/%x\n", + j, msp->ml_allocPtr, msp->ml_limitPtr); +#endif + if (vsp->vp_mpState == MP_PROC_RUNNING) { + *rootsPtr++ = &(msp->ml_arg); + *rootsPtr++ = &(msp->ml_cont); + *rootsPtr++ = &(msp->ml_closure); + *rootsPtr++ = &(msp->ml_exnCont); + *rootsPtr++ = &(msp->ml_varReg); + *rootsPtr++ = &(msp->ml_calleeSave[0]); + *rootsPtr++ = &(msp->ml_calleeSave[1]); + *rootsPtr++ = &(msp->ml_calleeSave[2]); + } + } /* for */ + } +#else /* !MP_SUPPORT */ + *rootsPtr++ = &(msp->ml_linkReg); + *rootsPtr++ = &(msp->ml_arg); + *rootsPtr++ = &(msp->ml_cont); + *rootsPtr++ = &(msp->ml_closure); + *rootsPtr++ = &(msp->ml_exnCont); + *rootsPtr++ = &(msp->ml_varReg); + *rootsPtr++ = &(msp->ml_calleeSave[0]); + *rootsPtr++ = &(msp->ml_calleeSave[1]); + *rootsPtr++ = &(msp->ml_calleeSave[2]); +#endif /* MP_SUPPORT */ + *rootsPtr = NIL(ml_val_t *); + + MinorGC (msp, roots); + + heap = msp->ml_heap; + + /* Check for major GC */ + if (level == 0) { + gen_t *gen1 = heap->gen[0]; + Word_t sz = msp->ml_allocArenaSzB; + + for (i = 0; i < NUM_ARENAS; i++) { + arena_t *arena = gen1->arena[i]; + if (isACTIVE(arena) && (AVAIL_SPACE(arena) < sz)) { + level = 1; + break; + } + } + } + + if (level > 0) { +#ifdef MP_SUPPORT + vproc_state_t *vsp; + ml_state_t *msp; + + for (i = 0; i < MAX_NUM_PROCS; i++) { + vsp = VProc[i]; + msp = vsp->vp_state; + if (vsp->vp_mpState == MP_PROC_RUNNING) + *rootsPtr++ = &(msp->ml_linkReg); + } +#else + ASSIGN(ProfCurrent, PROF_MAJOR_GC); +#endif + *rootsPtr = NIL(ml_val_t *); + MajorGC (msp, roots, level); + } + else { + HeapMon_UpdateHeap (heap, 1); + } + + /* reset the allocation space */ +#ifdef MP_SUPPORT + MP_FinishCollect (msp, nProcs); +#else + msp->ml_allocPtr = heap->allocBase; +#ifdef SOFT_POLL + ResetPollLimit (msp); +#else + msp->ml_limitPtr = HEAP_LIMIT(heap); +#endif +#endif + + STOP_GC_PAUSE(); + + /* conditionally signal a GC signal */ + GCSignal (msp->ml_vproc, level); + + ASSIGN(ProfCurrent, PROF_RUNTIME); + +} /* end of InvokeGC */ + + +/* InvokeGCWithRoots: + * + * Invoke a garbage collection with possible additional roots. The list of + * additional roots should be NIL terminated. A garbage collection always + * involves collecting the allocation space. In addition, if level is greater + * than 0, or if the first generation is full after the minor collection, then + * a major collection of one or more generations is performed (at least level + * generations are collected). + * + * NOTE: the MP version of this may be broken, since if a processor calls this + * but isn't the collecting process, then the extra roots are lost. + */ +void InvokeGCWithRoots (ml_state_t *msp, int level, ...) +{ + ml_val_t *roots[NUM_GC_ROOTS+NUM_EXTRA_ROOTS]; /* registers and globals */ + ml_val_t **rootsPtr = roots, *p; + heap_t *heap; + int i; + va_list ap; +#ifdef MP_SUPPORT + int nProcs; +#endif + + ASSIGN(ProfCurrent, PROF_MINOR_GC); + +#ifdef MP_SUPPORT +#ifdef MP_DEBUG + SayDebug ("igcwr %d\n", msp->ml_mpSelf); +#endif + va_start (ap, level); + nProcs = MP_StartCollectWithRoots (msp, ap); + va_end(ap); + if (nProcs == 0) + ASSIGN(ProfCurrent, PROF_RUNTIME); + return; /* a waiting proc */ +#endif + + START_GC_PAUSE(msp->ml_heap); + +#ifdef C_CALLS + *rootsPtr++ = &CInterfaceRootList; +#endif + +#ifdef MP_SUPPORT + /* get extra roots from procs that entered through InvokeGCWithRoots. + * Our extra roots were placed in mpExtraRoots by MP_StartCollectWithRoots. + */ + for (i = 0; mpExtraRoots[i] != NIL(ml_val_t *); i++) + *rootsPtr++ = mpExtraRoots[i]; +#else + /* record extra roots from param list */ + va_start (ap, level); + while ((p = va_arg(ap, ml_val_t *)) != NIL(ml_val_t *)) { + *rootsPtr++ = p; + } + va_end(ap); +#endif /* MP_SUPPORT */ + + /* Gather the roots */ + for (i = 0; i < NumCRoots; i++) + *rootsPtr++ = CRoots[i]; +#ifdef MP_SUPPORT + { + ml_state_t *msp; + vproc_state_t *vsp; + int j; + + for (j = 0; j < MAX_NUM_PROCS; j++) { + vsp = VProc[j]; + msp = vsp->vp_state; +#ifdef MP_DEBUG + SayDebug ("msp[%d] alloc/limit was %x/%x\n", + j, msp->ml_allocPtr, msp->ml_limitPtr); +#endif + if (vsp->vp_mpState == MP_PROC_RUNNING) { + *rootsPtr++ = &(msp->ml_arg); + *rootsPtr++ = &(msp->ml_cont); + *rootsPtr++ = &(msp->ml_closure); + *rootsPtr++ = &(msp->ml_exnCont); + *rootsPtr++ = &(msp->ml_varReg); + *rootsPtr++ = &(msp->ml_calleeSave[0]); + *rootsPtr++ = &(msp->ml_calleeSave[1]); + *rootsPtr++ = &(msp->ml_calleeSave[2]); + } + } /* for */ + } +#else /* !MP_SUPPORT */ + *rootsPtr++ = &(msp->ml_arg); + *rootsPtr++ = &(msp->ml_cont); + *rootsPtr++ = &(msp->ml_closure); + *rootsPtr++ = &(msp->ml_exnCont); + *rootsPtr++ = &(msp->ml_varReg); + *rootsPtr++ = &(msp->ml_calleeSave[0]); + *rootsPtr++ = &(msp->ml_calleeSave[1]); + *rootsPtr++ = &(msp->ml_calleeSave[2]); +#endif /* MP_SUPPORT */ + *rootsPtr = NIL(ml_val_t *); + + MinorGC (msp, roots); + + heap = msp->ml_heap; + + /* Check for major GC */ + if (level == 0) { + gen_t *gen1 = heap->gen[0]; + Word_t sz = msp->ml_allocArenaSzB; + + for (i = 0; i < NUM_ARENAS; i++) { + arena_t *arena = gen1->arena[i]; + if (isACTIVE(arena) && (AVAIL_SPACE(arena) < sz)) { + level = 1; + break; + } + } + } + + if (level > 0) { +#ifdef MP_SUPPORT + vproc_state_t *vsp; + + for (i = 0; i < MAX_NUM_PROCS; i++) { + vsp = VProc[i]; + if (vsp->vp_mpState == MP_PROC_RUNNING) + *rootsPtr++ = &(vsp->vp_state->ml_linkReg); + } +#else + ASSIGN(ProfCurrent, PROF_MAJOR_GC); + *rootsPtr++ = &(msp->ml_linkReg); + *rootsPtr++ = &(msp->ml_pc); +#endif + *rootsPtr = NIL(ml_val_t *); + MajorGC (msp, roots, level); + } + else { + HeapMon_UpdateHeap (heap, 1); + } + + /* reset the allocation space */ +#ifdef MP_SUPPORT + MP_FinishCollect (msp, nProcs); +#else + msp->ml_allocPtr = heap->allocBase; +#ifdef SOFT_POLL + ResetPollLimit (msp); +#else + msp->ml_limitPtr = HEAP_LIMIT(heap); +#endif +#endif + + STOP_GC_PAUSE(); + + /* conditionally signal a GC signal */ + GCSignal (msp->ml_vproc, level); + + ASSIGN(ProfCurrent, PROF_RUNTIME); + +} /* end of InvokeGCWithRoots */ + +/* NeedGC: + * + * Check to see if a GC is required, or if there is enough heap space for + * nbytes worth of allocation. Return TRUE, if GC is required, FALSE + * otherwise. + */ +bool_t NeedGC (ml_state_t *msp, Word_t nbytes) +{ +#if (defined(MP_SUPPORT) && defined(COMMENT_MP_GCPOLL)) + if ((((Addr_t)(msp->ml_allocPtr)+nbytes) >= (Addr_t)(msp->ml_limitPtr)) + || (INT_MLtoC(PollEvent) != 0)) +#elif defined(MP_SUPPORT) + if (((Addr_t)(msp->ml_allocPtr)+nbytes) >= (Addr_t)(msp->ml_limitPtr)) +#else + if (((Addr_t)(msp->ml_allocPtr)+nbytes) >= (Addr_t)HEAP_LIMIT(msp->ml_heap)) +#endif + return TRUE; + else + return FALSE; + +} /* end of NeedGC */ + + +#ifdef SOFT_POLL +/* ResetPollLimit: + * + * Reset the limit pointer according to the current polling frequency. + */ +void ResetPollLimit (ml_state_t *msp) +{ + int pollFreq = INT_MLtoC(DEREF(PollFreq)); + heap_t *heap = msp->ml_heap; + + /* assumes ml_allocPtr has been reset */ + msp->ml_realLimit = HEAP_LIMIT(heap); + if (pollFreq > 0) { + msp->ml_limitPtr = heap->allocBase + pollFreq*POLL_GRAIN_CPSI; + msp->ml_limitPtr = (msp->ml_limitPtr > msp->ml_realLimit) + ? msp->ml_realLimit + : msp->ml_limitPtr; + } + else + msp->ml_limitPtr = msp->ml_realLimit; + +} /* end ResetPollLimit */ +#endif /* SOFT_POLL */ diff --git a/base/runtime/gc/card-map.h b/base/runtime/gc/card-map.h new file mode 100644 index 0000000..0ac35dc --- /dev/null +++ b/base/runtime/gc/card-map.h @@ -0,0 +1,145 @@ +/* card-map.h + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * + * Card maps for marking object updates. + */ + +#ifndef _CARD_MAP_ +#define _CARD_MAP_ + +#ifndef BIT_CARDS +typedef struct { /* A dirty card map */ + ml_val_t *baseAddr; /* The base address of the mapped region */ + Word_t numCards; /* The number of cards covered by the map */ + int mapSzB; /* The number of bytes allocated for this */ + /* map. */ + Byte_t map[WORD_SZB]; /* The card map */ +} card_map_t; + +#define CARD_CLEAN 0xff + +#define CARD_BITS 8 /* 256 byte cards */ +#define CARD_SZB (1<> LOG_BYTES_PER_WORD)-1)*WORD_SZB) + +/* Map an address to a card index */ +#define CARD_INDEX(cm, addr) \ + (((Addr_t)(addr) - (Addr_t)(cm->baseAddr)) >> CARD_BITS) + +/* Map a card index to its base address */ +#define CARD_TO_ADDR(cm, index) \ + ((Word_t *)((Addr_t)(cm->baseAddr) + ((index) << CARD_BITS))) + +/* Get the value of a card */ +#define CARD(cm, addr) ((cm)->map[CARD_INDEX(cm,addr)]) + +/* Mark the card containing addr */ +#define MARK_CARD(cm, addr, gen) { \ + card_map_t *__cm = (cm); \ + int __i = CARD_INDEX(__cm, (addr)); \ + int __g = (gen); \ + if (__g < __cm->map[__i]) \ + __cm->map[__i] = __g; \ + } + +/* test a card to see if it is marked */ +#define isDIRTY(cm, indx, maxGen) ((cm)->map[(indx)] <= (maxGen)) + +/* Iterate over the dirty cards of a card map. The argument indexVar + * should be an integer variable; it is used to pass the index of dirty + * cards to cmd. + */ +#ifdef COUNT_CARDS +#define COUNT_DIRTY(indexVar) \ + if(__cm->map[indexVar] != CARD_CLEAN) cardCnt2[i]++ +#else +#define COUNT_DIRTY(indexVar) /* null */ +#endif +#define FOR_DIRTY_CARD(cm, maxGen, indexVar, cmd) { \ + card_map_t *__cm = (cm); \ + int __n = __cm->numCards; \ + int __g = (maxGen); \ + for (indexVar = 0; indexVar < __n; indexVar++) { \ + COUNT_DIRTY(indexVar); \ + if (isDIRTY(__cm, indexVar, __g)) { \ + cmd \ + } \ + } \ + } + +#else +/** Memory cards ** + * The type of "cards" of memory. These are used to keep track of + * dirty regions in the array arenas. + * NOTE: we use bitvectors to implement card maps. It has been suggested that + * byte arrays are more efficient, since they avoid the read-modify-write + * required when setting a bit, but this doesn't seem to hold for SML. I + * think that this is because updates are less frequent, so that the savings + * on marking cards dirty doesn't offset the added cost of sweeping. + */ +typedef struct { /* A dirty card map */ + ml_val_t *baseAddr; /* The base address of the mapped region */ + Word_t numCards; /* The number of cards covered by the map */ + int mapSzB; /* The number of bytes allocated for this */ + /* map. */ + Word_t map[1]; /* The card map */ +} card_map_t; + +/* #define OLD_CARDS */ +#ifdef OLD_CARDS +#define CARD_BITS 10 /* 1024 byte cards */ +#else +#define CARD_BITS 8 /* 256 byte cards */ +#endif +#define CARD_SZB (1<> LOG_BITS_PER_WORD)-1)*WORD_SZB) + +/* Map an address to a card index */ +#define CARD_INDEX(cm, addr) \ + (((Addr_t)(addr) - (Addr_t)(cm->baseAddr)) >> CARD_BITS) + +/* Map a card index to its base address */ +#define CARD_TO_ADDR(cm, index) \ + ((Word_t *)((Addr_t)(cm->baseAddr) + ((index) << CARD_BITS))) + +/* Mark the card containing addr */ +#define MARK_CARD(cm, addr) { \ + card_map_t *__cm = (cm); \ + Word_t __offset = CARD_INDEX(__cm, addr); \ + __cm->map[__offset >> LOG_BITS_PER_WORD] |= \ + (1 << (__offset & (BITS_PER_WORD-1))); \ + } + +/* test a card to see if it is marked */ +#define isDIRTY(cm, indx) \ + ((cm)->map[(indx) >> LOG_BITS_PER_WORD] & (1 << ((indx) & (BITS_PER_WORD-1)))) + +/* Iterate over the dirty cards of a card map. The argument indexVar + * should be an integer variable; it is used to pass the index of dirty + * cards to cmd. + */ +#define FOR_DIRTY_CARD(cm, indexVar, cmd) { \ + card_map_t *__cm = (cm); \ + int __i = 0, __j = 0; \ + while (__j < __cm->numCards) { \ + Word_t __m = __cm->map[__i]; \ + indexVar = __j; \ + for (; __m != 0; __m >>= 1) { \ + if (__m & 1) { \ + cmd; \ + } \ + indexVar++; \ + } \ + __i++; __j += BITS_PER_WORD; \ + } \ + } +#endif + +#endif /* !_CARD_MAP_ */ diff --git a/base/runtime/gc/check-heap.c b/base/runtime/gc/check-heap.c new file mode 100644 index 0000000..e06cbca --- /dev/null +++ b/base/runtime/gc/check-heap.c @@ -0,0 +1,419 @@ +/*! \file check-heap.c + * + * Debugging code to check heap invariants. + * + * \author John Reppy + */ + +/* + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-base.h" +#include "card-map.h" +#include "heap.h" +#include "c-globals-tbl.h" + +#ifndef CHECK_HEAP +# error CHECK_HEAP must be defined too +#endif + +/* local routines */ +PVT void CheckRecordArena (arena_t *ap); +PVT void CheckPairArena (arena_t *ap); +PVT void CheckStringArena (arena_t *ap); +PVT void CheckArrayArena (arena_t *ap, card_map_t *cm); +PVT int CheckPtr (ml_val_t *p, ml_val_t w, int srcGen, int srcKind, int dstKind); + +PVT int ErrCount = 0; + +extern char *ArenaName[]; + +/* CheckPtr dstKind values */ +#define OBJC_NEWFLG (1 << OBJC_new) +#define OBJC_RECFLG (1 << OBJC_record) +#define OBJC_PAIRFLG (1 << OBJC_pair) +#define OBJC_STRFLG (1 << OBJC_string) +#define OBJC_ARRFLG (1 << OBJC_array) +#define OBJC_any \ + (OBJC_NEWFLG|OBJC_RECFLG|OBJC_PAIRFLG|OBJC_STRFLG|OBJC_ARRFLG) + +#define ERROR { \ + if (++ErrCount > 20) { \ + Die("CheckHeap: too many errors\n"); \ + } \ + } + +/* CheckBIBOP: + * + * Check that the heap and BIBOP agree. + */ +void CheckBIBOP (heap_t *heap) +{ + int i, j; + + ErrCount = 0; + + SayDebug ("Checking arena address ranges in BIBOP\n"); + for (i = 0; i < heap->numGens; i++) { + gen_t *g = heap->gen[i]; + /* check the small-object arenas */ + for (int j = 0; j < NUM_ARENAS; j++) { + arena_t *ap = g->arena[j]; + Addr_t p = (Addr_t)ap->tospBase; + Addr_t top = (Addr_t)ap->tospTop; + bool_t firstError = TRUE; + while (p < top) { + Addr_t aid = ADDR_TO_PAGEID(BIBOP, p); + if (aid != ap->id) { + ERROR; + if (firstError) { + SayDebug("** Generation %d, %s arena: inconsistent bibop\n", + i+1, ArenaName[j+1]); + firstError = FALSE; + } +#ifdef SIZE_64 + SayDebug("** %p: BIBOP[%d] = %p[%d] = %x:%x:%02x, but expected %x:%x:%02x\n", + p, BIBOP_ADDR_TO_L1_INDEX(p), + BIBOP[BIBOP_ADDR_TO_L1_INDEX(p)], + BIBOP_ADDR_TO_L2_INDEX(p), + EXTRACT_GEN(aid), EXTRACT_OBJC(aid), EXTRACT_HBLK(aid), + EXTRACT_GEN(ap->id), EXTRACT_OBJC(ap->id), EXTRACT_HBLK(ap->id)); +#else /* SIZE_32 */ + SayDebug("** %p: BIBOP[%d] = %x:%x:%02x, but expected %x:%x:%02x\n", + p, BIBOP_ADDR_TO_INDEX(p), + EXTRACT_GEN(aid), EXTRACT_OBJC(aid), EXTRACT_HBLK(aid), + EXTRACT_GEN(ap->id), EXTRACT_OBJC(ap->id), EXTRACT_HBLK(ap->id)); +#endif + } + p += BIBOP_PAGE_SZB; + } + } + /* check the big-objects */ + for (j = 0; j < NUM_BIGOBJ_KINDS; j++) { + bigobj_desc_t *bo = g->bigObjs[j]; +/* TODO */ + } + } + + if (ErrCount > 0) { + Die ("CheckBIBOP --- inconsistent heap\n"); + } + +} /* CheckBIBOP */ + +/* CheckHeap: + * + * Check the heap for consistency after a garbage collection (or blast out). + */ +void CheckHeap (heap_t *heap, int maxSweptGen) +{ + int i, j; + + ErrCount = 0; + + CheckBIBOP (heap); + + SayDebug ("Checking heap (%d generations) ...\n", maxSweptGen); + for (i = 0; i < maxSweptGen; i++) { + gen_t *g = heap->gen[i]; + + CheckRecordArena (g->arena[RECORD_INDX]); + CheckPairArena (g->arena[PAIR_INDX]); + CheckStringArena (g->arena[STRING_INDX]); + CheckArrayArena (g->arena[ARRAY_INDX], g->dirty); + } + SayDebug ("... done\n"); + + if (ErrCount > 0) { + Die ("CheckHeap --- inconsistent heap\n"); + } + +} /* end of CheckHeap */ + +/* CheckRecordArena: + * + * Check the record arena. + */ +PVT void CheckRecordArena (arena_t *ap) +{ + ml_val_t *p, *stop, desc, w; + int i, len; + int gen = EXTRACT_GEN(ap->id); + + if (! isACTIVE(ap)) + return; + + SayDebug (" records [%d]: [%p..%p:%p)\n", + gen, ap->tospBase, ap->nextw, ap->tospTop); + + p = ap->tospBase; + stop = ap->nextw; + while (p < stop) { + desc = *p++; + if (! isDESC(desc)) { + ERROR; + SayDebug ( + "** @%p: expected descriptor, but found %p in record arena\n", + p-1, desc); + return; + } + switch (GET_TAG(desc)) { + case DTAG_record: + len = GET_LEN(desc); + for (i = 0; i < len; i++, p++) { + w = *p; + if (isDESC(w)) { + ERROR; + SayDebug ( + "** @%p: unexpected descriptor %p in slot %d of %d\n", + p, w, i, GET_LEN(desc)); + return; + } + else if (isBOXED(w)) { + CheckPtr(p, w, gen, OBJC_record, OBJC_any); + } + } + break; + case DTAG_arr_hdr: + case DTAG_vec_hdr: + switch (GET_LEN(desc)) { + case SEQ_poly: + if (GET_TAG(desc) == DTAG_arr_hdr) + CheckPtr (p, *p, gen, OBJC_record, OBJC_ARRFLG); + else + CheckPtr (p, *p, gen, OBJC_record, OBJC_RECFLG|OBJC_PAIRFLG); + break; + case SEQ_word8: + case SEQ_word16: + case SEQ_word32: + case SEQ_word64: + case SEQ_real32: + case SEQ_real64: + CheckPtr (p, *p, gen, OBJC_record, OBJC_STRFLG); + break; + default: + ERROR; + SayDebug ("** @%p: strange sequence kind %d in record arena\n", + p-1, GET_LEN(desc)); + return; + } + if (! isUNBOXED(p[1])) { + ERROR; + SayDebug ("** @%p: sequence header length field not an int (%p)\n", + p+1, p[1]); + } + p += 2; + break; + default: + ERROR; + SayDebug ("** @%p: strange tag (%#x) in record arena\n", + p-1, GET_TAG(desc)); + return; + } /* end of switch */ + } + +} /* end of CheckRecordArena */ + +/* CheckPairArena: + */ +PVT void CheckPairArena (arena_t *ap) +{ + ml_val_t *p, *stop, w; + int gen = EXTRACT_GEN(ap->id); + + if (! isACTIVE(ap)) + return; + + SayDebug (" pairs [%d]: [%p..%p:%p)\n", + gen, ap->tospBase, ap->nextw, ap->tospTop); + + p = ap->tospBase + 2; + stop = ap->nextw; + while (p < stop) { + w = *p++; + if (isDESC(w)) { + ERROR; + SayDebug ( + "** @%p: unexpected descriptor %p in pair arena\n", + p-1, w); + return; + } + else if (isBOXED(w)) { + CheckPtr(p, w, gen, OBJC_pair, OBJC_any); + } + } + +} /* end of CheckPairArena */ + +/* CheckStringArena: + * + * Check a string arena for consistency. + */ +PVT void CheckStringArena (arena_t *ap) +{ + ml_val_t *p, *stop, *prevDesc, desc, next; + int len; + int gen = EXTRACT_GEN(ap->id); + + if (! isACTIVE(ap)) + return; + + SayDebug (" strings [%d]: [%p..%p:%p)\n", + gen, ap->tospBase, ap->nextw, ap->tospTop); + + p = ap->tospBase; + stop = ap->nextw; + prevDesc = NIL(ml_val_t *); + while (p < stop) { + desc = *p++; + if (isDESC(desc)) { + switch (GET_TAG(desc)) { + case DTAG_raw: + case DTAG_raw64: + len = GET_LEN(desc); + break; + default: + ERROR; + SayDebug ("** @%p: strange tag (%#x) in string arena\n", + p-1, GET_TAG(desc)); + if (prevDesc != NIL(ml_val_t *)) + SayDebug (" previous string started @ %p\n", prevDesc); + return; + } + prevDesc = p-1; + p += len; + } +#ifdef ALIGN_REALDS + else if ((desc == 0) && (((Addr_t)p & WORD_SZB) != 0)) + /* assume this is alignment padding */ + continue; +#endif + else { + ERROR; + SayDebug ( + "** @%p: expected descriptor, but found %p in string arena\n", + p-1, desc); + if (prevDesc != NIL(ml_val_t *)) + SayDebug (" previous string started @ %p\n", prevDesc); + return; + } + } + +} /* end of CheckStringArena */ + +/* CheckArrayArena: + */ +PVT void CheckArrayArena (arena_t *ap, card_map_t *cm) +{ + ml_val_t *p, *stop, desc, w; + int i, j, len; + int gen = EXTRACT_GEN(ap->id); + + if (! isACTIVE(ap)) + return; + + SayDebug (" arrays [%d]: [%p..%p:%p)\n", + gen, ap->tospBase, ap->nextw, ap->tospTop); + + p = ap->tospBase; + stop = ap->nextw; + while (p < stop) { + desc = *p++; + if (! isDESC(desc)) { + ERROR; + SayDebug ( + "** @%p: expected descriptor, but found %p in array arena\n", + p-1, desc); + return; + } + switch (GET_TAG(desc)) { + case DTAG_arr_data: + len = GET_LEN(desc); + break; + case DTAG_special: + len = 1; + break; + default: + ERROR; + SayDebug ("** @%p: strange tag (%#x) in array arena\n", + p-1, GET_TAG(desc)); + return; + } /* end of switch */ + for (i = 0; i < len; i++, p++) { + w = *p; + if (isDESC(w)) { + ERROR; + SayDebug ( + "** @%p: unexpected descriptor %p in array slot %d of %d\n", + p, w, i, GET_LEN(desc)); + for (p -= (i+1), j = 0; j <= len; j++, p++) { + SayDebug (" %p: %10p\n", p, *p); + } + return; + } + else if (isBOXED(w)) { + CheckPtr(p, w, gen, OBJC_array, OBJC_any); + } + } + } + +} /* end of CheckArrayArena */ + +/* CheckPtr: + */ +PVT int CheckPtr (ml_val_t *p, ml_val_t w, int srcGen, int srcKind, int dstKind) +{ + aid_t aid = ADDR_TO_PAGEID(BIBOP, w); + int dstGen = EXTRACT_GEN(aid); + int objc = EXTRACT_OBJC(aid); + + switch (objc) { + case OBJC_record: + case OBJC_pair: + case OBJC_string: + case OBJC_array: + if (!(dstKind & (1 << objc))) { + ERROR; + SayDebug ( + "** @%p: sequence data kind mismatch (expected %d, found %d)\n", + p, dstKind, objc); + } + if (dstGen < srcGen) { + if (srcKind != OBJC_array) { + ERROR; + SayDebug ( + "** @%p: reference to younger object @%p (gen = %d)\n", + p, (void *)w, dstGen); + } + } + if ((objc != OBJC_pair) && (! isDESC(((ml_val_t *)w)[-1]))) { + ERROR; + SayDebug ("** @%p: reference into object middle @%p\n", p, (void *)w); + } + break; + case OBJC_bigobj: + break; + case OBJC_new: + ERROR; + SayDebug ("** @%p: unexpected new-space reference\n", p); + dstGen = MAX_NUM_GENS; + break; + default: + if (aid == AID_UNMAPPED) { + if (AddrToCSymbol(w) == NIL(const char *)) { + ERROR; + SayDebug ( + "** @%p: reference to unregistered external address %p\n", + p, (void *)w); + } + dstGen = MAX_NUM_GENS; + } + else Die("bogus object class in BIBOP\n"); + break; + } /* end of switch */ + + return dstGen; + +} /* end of CheckPtr */ diff --git a/base/runtime/gc/copy-loop.h b/base/runtime/gc/copy-loop.h new file mode 100644 index 0000000..5dc9e4e --- /dev/null +++ b/base/runtime/gc/copy-loop.h @@ -0,0 +1,55 @@ +/* copy-loop.h + * + * COPYRIGHT (c) 1993 by AT&T Bell Laboratories. + * + * A dirty, but quick, copy loop for the GC. + */ + +#ifndef _COPY_LOOP_ +#define _COPY_LOOP_ + +#ifdef HAS_INCREMENT + +#define COPYLOOP(SRC,DST,LEN) { \ + Word_t *__src = (Word_t *)(SRC); \ + Word_t *__dst = (Word_t *)(DST); \ + int __len = (LEN); \ + int __m; \ + switch (__len & 0x3) { \ + case 3: *__dst++ = *__src++; \ + case 2: *__dst++ = *__src++; \ + case 1: *__dst++ = *__src++; \ + case 0: break; \ + } \ + __m = __len >> 2; \ + while (--__m >= 0) { \ + *__dst++ = *__src++; *__dst++ = *__src++; \ + *__dst++ = *__src++; *__dst++ = *__src++; \ + } \ + } + +#else + +#define COPYLOOP(SRC,DST,LEN) { \ + Word_t *__src = (Word_t *)(SRC); \ + Word_t *__dst = (Word_t *)(DST); \ + int __len = (LEN); \ + int __m; \ + switch (__len & 0x3) { \ + case 3: *__dst++ = *__src++; \ + case 2: *__dst++ = *__src++; \ + case 1: *__dst++ = *__src++; \ + case 0: break; \ + } \ + __m = __len >> 2; \ + while (--__m >= 0) { \ + __dst[0] = __src[0]; __dst[1] = __src[1]; \ + __dst[2] = __src[2]; __dst[3] = __src[3]; \ + __dst += 4; __src += 4; \ + } \ + } + +#endif + +#endif /* !_COPY_LOOP_ */ + diff --git a/base/runtime/gc/export-heap.c b/base/runtime/gc/export-heap.c new file mode 100644 index 0000000..0a693a8 --- /dev/null +++ b/base/runtime/gc/export-heap.c @@ -0,0 +1,394 @@ +/*! \file export-heap.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Routines to export an ML heap image. The basic layout of the heap image is: + * + * Header (Image header + Heap header) + * External reference table + * ML state info + * ML Heap: + * Big-object region descriptors + * Generation descriptors + * Heap image + * + * + * Note that this will change once multiple VProcs are supported. + */ + +#include "ml-osdep.h" +#include "ml-base.h" +#include "ml-limits.h" +#include "memory.h" +#include "ml-state.h" +#include "ml-objects.h" +#include "ml-globals.h" +#include "ml-heap-image.h" +#include "heap.h" +#include "c-globals-tbl.h" +#include "writer.h" +#include "heap-io.h" +#include "heap-output.h" + +#define isEXTERN(bibop, w) (isBOXED(w) && (ADDR_TO_PAGEID(bibop, w) == AID_UNMAPPED)) +#define isEXTERNTAG(w) (isDESC(w) && (GET_TAG(w) == DTAG_extern)) + +/* local routines */ +PVT status_t ExportImage (ml_state_t *msp, int kind, FILE *file); +PVT export_table_t *ScanHeap (heap_t *heap); +PVT status_t WriteHeap (writer_t *wr, heap_t *heap); +PVT void RepairHeap (export_table_t *tbl, heap_t *heap); + + +/* ExportHeapImage: + */ +status_t ExportHeapImage (ml_state_t *msp, FILE *file) +{ + return ExportImage (msp, EXPORT_HEAP_IMAGE, file); + +} /* end of ExportHeapImage. */ + + +/* ExportFnImage: + */ +status_t ExportFnImage (ml_state_t *msp, ml_val_t funct, FILE *file) +{ + /* zero-out the saved parts of the ML state, and use the standard argument + * register to hold the exported function closure. + */ + msp->ml_arg = funct; + msp->ml_cont = ML_unit; + msp->ml_closure = ML_unit; + msp->ml_linkReg = ML_unit; + msp->ml_exnCont = ML_unit; + msp->ml_varReg = ML_unit; /* ??? */ + msp->ml_calleeSave[0] = ML_unit; + msp->ml_calleeSave[1] = ML_unit; + msp->ml_calleeSave[2] = ML_unit; + + return ExportImage (msp, EXPORT_FN_IMAGE, file); + +} /* end of ExportFnImage */ + + +/* ExportImage: + */ +PVT status_t ExportImage (ml_state_t *msp, int kind, FILE *file) +{ + heap_t *heap = msp->ml_heap; + /* + gen_t *oldestGen = heap->gen[heap->numGens-1]; + */ + status_t status = SUCCESS; + export_table_t *exportTbl; + writer_t *wr; + +#define SAVE_REG(dst, src) { \ + ml_val_t __src = (src); \ + if (isEXTERN(BIBOP, __src)) \ + __src = ExportCSymbol(exportTbl, __src); \ + (dst) = __src; \ + } + + if ((wr = WR_OpenFile(file)) == NIL(writer_t *)) + return FAILURE; + + /* Shed any and all garbage. */ + InvokeGC (msp, 0); /* minor collection */ + InvokeGC (msp, MAX_NGENS); + + exportTbl = ScanHeap(heap); + + { + ml_heap_hdr_t heapHdr; + + heapHdr.numVProcs = 1; + heapHdr.numGens = heap->numGens; + heapHdr.numArenas = NUM_ARENAS; + heapHdr.numBOKinds = NUM_BIGOBJ_KINDS; + heapHdr.numBORegions = heap->numBORegions; + heapHdr.cacheGen = heap->cacheGen; + heapHdr.allocSzB = heap->allocSzB / MAX_NUM_PROCS; + + SAVE_REG(heapHdr.pervStruct, *PTR_MLtoC(ml_val_t, PervStruct)); + SAVE_REG(heapHdr.runTimeCompUnit, RunTimeCompUnit); +#ifdef ASM_MATH + SAVE_REG(heapHdr.mathVec, MathVec); +#else + heapHdr.mathVec = ML_unit; +#endif + + HeapIO_WriteImageHeader(wr, kind); + WR_Write(wr, &heapHdr, sizeof(heapHdr)); + if (WR_Error(wr)) { + WR_Free(wr); + return FAILURE; + } + } + + /* export the ML state info */ + { + ml_vproc_image_t image; + + /* Save the live registers */ + SAVE_REG(image.sigHandler, DEREF(MLSignalHandler)); + SAVE_REG(image.stdArg, msp->ml_arg); + SAVE_REG(image.stdCont, msp->ml_cont); + SAVE_REG(image.stdClos, msp->ml_closure); + SAVE_REG(image.pc, msp->ml_pc); + SAVE_REG(image.exnCont, msp->ml_exnCont); + SAVE_REG(image.varReg, msp->ml_varReg); + SAVE_REG(image.calleeSave[0], msp->ml_calleeSave[0]); + SAVE_REG(image.calleeSave[1], msp->ml_calleeSave[1]); + SAVE_REG(image.calleeSave[2], msp->ml_calleeSave[2]); + + if (HeapIO_WriteExterns(wr, exportTbl) == FAILURE) { + status = FAILURE; + goto done; + } + + WR_Write(wr, &image, sizeof(image)); + if (WR_Error(wr)) { + status = FAILURE; + goto done; + } + } + + /* Write out the heap image */ + if (WriteHeap(wr, heap) == FAILURE) + status = FAILURE; + + done:; + if (kind != EXPORT_FN_IMAGE) + RepairHeap (exportTbl, heap); + + WR_Free(wr); + + return status; + +} /* end of ExportImage. */ + + +/* ScanHeap: + * + * Scan the heap looking for exported symbols and return an export table. + */ +PVT export_table_t *ScanHeap (heap_t *heap) +{ + export_table_t *tbl = NewExportTbl(); + bibop_t bibop = BIBOP; + int i; + + /* Scan the record, pair and array regions for references to external symbols */ + for (i = 0; i < heap->numGens; i++) { +#define PatchArena(indx) { \ + arena_t *__ap = heap->gen[i]->arena[(indx)]; \ + ml_val_t *__p, *__q; \ + bool_t needsRepair = FALSE; \ + __p = __ap->tospBase; \ + __q = __ap->nextw; \ + while (__p < __q) { \ + ml_val_t __w = *__p; \ + if (isEXTERN(bibop, __w)) { \ + *__p = ExportCSymbol(tbl, __w); \ + needsRepair = TRUE; \ + } \ + __p++; \ + } \ + __ap->needsRepair = needsRepair; \ + } /* PatchArena */ + + PatchArena(RECORD_INDX); + PatchArena(PAIR_INDX); + PatchArena(ARRAY_INDX); + } + + return tbl; + +} /* end of ScanHeap */ + + +/* WriteHeap: + * + */ +PVT status_t WriteHeap (writer_t *wr, heap_t *heap) +{ + heap_arena_hdr_t *p, *arenaHdrs; + bigobj_desc_t *bdp; + Addr_t offset; + int arenaHdrsSize, pagesize; + int i, j; + + pagesize = GETPAGESIZE(); + + /* write the big-object region descriptors */ + { + int sz; + bo_region_info_t *hdr; + bigobj_region_t *rp; + +#ifdef BO_DEBUG +SayDebug("%d bigobject regions\n", heap->numBORegions); +#endif + sz = heap->numBORegions * sizeof(bo_region_info_t); + hdr = (bo_region_info_t *) MALLOC (sz); + for (rp = heap->bigRegions, i = 0; rp != NIL(bigobj_region_t *); rp = rp->next, i++) { +#ifdef BO_DEBUG +PrintRegionMap(rp); +#endif + hdr[i].baseAddr = MEMOBJ_BASE(rp->memObj); + hdr[i].firstPage = rp->firstPage; + hdr[i].sizeB = MEMOBJ_SZB(rp->memObj); + } + + WR_Write(wr, hdr, sz); + if (WR_Error(wr)) { + FREE (hdr); + return FAILURE; + } + + FREE(hdr); + } + + /* initialize the arena headers. */ + arenaHdrsSize = heap->numGens * (NUM_OBJ_KINDS * sizeof(heap_arena_hdr_t)); + arenaHdrs = (heap_arena_hdr_t *) MALLOC (arenaHdrsSize); + offset = WR_Tell(wr) + arenaHdrsSize; + offset = ROUNDUP(offset, pagesize); + for (p = arenaHdrs, i = 0; i < heap->numGens; i++) { + for (j = 0; j < NUM_ARENAS; j++, p++) { + arena_t *ap = heap->gen[i]->arena[j]; + p->gen = i; + p->objKind = j; + p->info.o.baseAddr = (Addr_t)(ap->tospBase); + p->info.o.sizeB = (Addr_t)(ap->nextw) - p->info.o.baseAddr; + p->info.o.roundedSzB = ROUNDUP(p->info.o.sizeB, pagesize); + p->offset = offset; + offset += p->info.o.roundedSzB; + } + for (j = 0; j < NUM_BIGOBJ_KINDS; j++, p++) { + int nObjs, nBOPages; + bdp = heap->gen[i]->bigObjs[j]; + for (nObjs = nBOPages = 0; bdp != NIL(bigobj_desc_t *); bdp = bdp->next) { + nObjs++; + nBOPages += (BO_ROUNDED_SZB(bdp) >> BIGOBJ_PAGE_SHIFT); + } + p->gen = i; + p->objKind = j; + p->info.bo.numBigObjs = nObjs; + p->info.bo.numBOPages = nBOPages; + p->offset = offset; + offset += ((nObjs * sizeof(bigobj_hdr_t)) + + (nBOPages << BIGOBJ_PAGE_SHIFT)); + } + } + + /* write the arena headers out */ + WR_Write(wr, arenaHdrs, arenaHdrsSize); + if (WR_Error(wr)) { + FREE (arenaHdrs); + return FAILURE; + } + + /* write out the arenas */ + for (p = arenaHdrs, i = 0; i < heap->numGens; i++) { + for (j = 0; j < NUM_ARENAS; j++) { + if (GCMessages) { + SayDebug("write %d,%d: %d bytes [%#x..%#x) @ %#x\n", + i+1, j, p->info.o.sizeB, + p->info.o.baseAddr, p->info.o.baseAddr+p->info.o.sizeB, + p->offset); + } + if (p->info.o.sizeB > 0) { + WR_Seek(wr, p->offset); + WR_Write(wr, (void *)(p->info.o.baseAddr), p->info.o.sizeB); + if (WR_Error(wr)) { + FREE (arenaHdrs); + return FAILURE; + } + } + p++; + } + for (j = 0; j < NUM_BIGOBJ_KINDS; j++) { + int hdrSizeB; + bigobj_hdr_t *hdr, *q; + + if (p->info.bo.numBigObjs > 0) { + hdrSizeB = p->info.bo.numBigObjs * sizeof(bigobj_hdr_t); + hdr = (bigobj_hdr_t *) MALLOC (hdrSizeB); + if (GCMessages) { + SayDebug("write %d,%d: %d big objects (%d pages) @ %#x\n", + i+1, j, p->info.bo.numBigObjs, p->info.bo.numBOPages, + p->offset); + } + /* initialize the big-object headers */ + q = hdr; + for (bdp = heap->gen[i]->bigObjs[j]; bdp != NIL(bigobj_desc_t *); bdp = bdp->next) { + q->gen = bdp->gen; + q->objKind = j; + q->baseAddr = (Addr_t)(bdp->obj); + q->sizeB = bdp->sizeB; + q++; + } + /* write the big-object headers */ + WR_Write (wr, hdr, hdrSizeB); + if (WR_Error(wr)) { + FREE (hdr); + FREE (arenaHdrs); + return FAILURE; + } + /* write the big-objects */ + for (bdp = heap->gen[i]->bigObjs[j]; bdp != NIL(bigobj_desc_t *); bdp = bdp->next) { + WR_Write(wr, (char *)(bdp->obj), BO_ROUNDED_SZB(bdp)); + if (WR_Error(wr)) { + FREE (hdr); + FREE (arenaHdrs); + return FAILURE; + } + } + FREE (hdr); + } + p++; + } + } + + FREE (arenaHdrs); + + return SUCCESS; + +} /* end of WriteHeap. */ + +/* RepairHeap: + */ +PVT void RepairHeap (export_table_t *tbl, heap_t *heap) +{ + int i; + + /* repair the in-memory heap */ + for (i = 0; i < heap->numGens; i++) { +#define RepairArena(indx) { \ + arena_t *__ap = heap->gen[i]->arena[(indx)]; \ + if (__ap->needsRepair) { \ + ml_val_t *__p, *__q; \ + __p = __ap->tospBase; \ + __q = __ap->nextw; \ + while (__p < __q) { \ + ml_val_t __w = *__p; \ + if (isEXTERNTAG(__w)) { \ + *__p = AddrOfCSymbol(tbl, __w); \ + } \ + __p++; \ + } \ + } \ + __ap->needsRepair = FALSE; \ + } /* RepairArena */ + + RepairArena(RECORD_INDX); + RepairArena(PAIR_INDX); + RepairArena(ARRAY_INDX); + } + + FreeExportTbl (tbl); + +} /* end of RepairHeap */ diff --git a/base/runtime/gc/flip.c b/base/runtime/gc/flip.c new file mode 100644 index 0000000..d683161 --- /dev/null +++ b/base/runtime/gc/flip.c @@ -0,0 +1,188 @@ +/* flip.c + * + * COPYRIGHT (c) 1993 by AT&T Bell Laboratories. + * + * This code determines which generations to flip and what the + * to-space sizes should be. + */ + +#include "ml-base.h" +#include "ml-limits.h" +#include "ml-state.h" +#include "heap.h" +#include "heap-monitor.h" + +#if defined(VERBOSE) +extern char *ArenaName[NUM_ARENAS+1]; +#endif + + +/* Flip: + * + * Determine which generations need to be flipped and flip them. Return + * the number of flipped generations (which will be at least min_gc_level). + * It is assumed that the fist generation is always flipped (i.e., that + * min_gc_level > 1). + */ +int Flip (heap_t *heap, int min_gc_level) +{ + int i, j, prevGC, numGCs; + Addr_t newSz, prevOldSz[NUM_ARENAS], minSize[NUM_ARENAS]; + arena_t *ap; + +#ifdef VERBOSE +SayDebug ("Flip: min_gc_level = %d\n", min_gc_level); +#endif + + /* for the first generation, we make a worst-case assumption that all of the + * data could be forwarded to any arena. + */ + for (i = 0; i < NUM_ARENAS; i++) { + prevOldSz[i] = heap->allocSzB; + } + + prevGC = heap->numMinorGCs; + for (i = 0; i < heap->numGens; i++) { + gen_t *g = heap->gen[i]; + + /* Check to see if generation (i+1) should be flipped */ +#ifdef VERBOSE +SayDebug ("checking generation %d\n", i+1); +#endif + if (i >= min_gc_level) { + for (j = 0; j < NUM_ARENAS; j++) { + arena_t *ap = g->arena[j]; +#ifdef VERBOSE +SayDebug (" %s: avail = %u, prev = %u\n", +ArenaName[j+1], (isACTIVE(ap) ? AVAIL_SPACE(ap) : 0), prevOldSz[j]); +#endif + if ((isACTIVE(ap) ? AVAIL_SPACE(ap) : 0) < prevOldSz[j]) + goto flip; + } + /* Here we don't need to flip gen[i] */ + return i; + } + flip:; /* Here we need to flip gen[i] */ + + numGCs = prevGC - g->lastPrevGC; +#ifdef VERBOSE +SayDebug ("Flip generation %d: (%d GCs)\n", i+1, numGCs); +#endif + /* Compute the space requirements for this generation, make the old + * to-space into from-space, and allocate a new to-space. + */ + for (j = 0; j < NUM_ARENAS; j++) { + Addr_t minSz, thisMinSz; + ap = g->arena[j]; + if (isACTIVE(ap)) { + FLIP_ARENA(ap); + HeapMon_MarkFromSp (heap, ap->frspBase, ap->frspSizeB); + ASSERT((Addr_t)(ap->oldTop) <= (Addr_t)(ap->frspTop)); + thisMinSz = ((Addr_t)(ap->frspTop) - (Addr_t)(ap->oldTop)); + } + else { + ap->frspSizeB = 0; /* to ensure accurate stats */ + if ((ap->reqSizeB == 0) && (prevOldSz[j] == 0)) { + /* there will be no data copied into this arena, so skip it */ + minSize[j] = 0; + continue; + } + else { + thisMinSz = 0; + } + } + minSz = prevOldSz[j] + thisMinSz + ap->reqSizeB; + if (j == STRING_INDX) { +#ifdef SIZE_32 + /* Doubles can require aligment fixups, which in the worst + * case are 1/3 the size of the object. Round up the minimum + * size to avoid an overrun on the end of TO space. + */ + minSz *= 1.33; +#endif + } + else if (j == PAIR_INDX) { + /* the first slot isn't used, but may need the space for poly = */ + minSz += 2*WORD_SZB; + } + minSize[j] = minSz; + +#ifdef OLD_POLICY + /* The desired size is the minimum size times the ratio for the arena, + * but it shouldn't exceed the maximum size for the arena (unless + * minSz > maxSizeB). + */ + newSz = (ap->ratio * minSz) / RATIO_UNIT; + if (newSz < minSz+ap->reqSizeB) + newSz = minSz+ap->reqSizeB; +#endif + /* The desired size is one that will allow "ratio" GCs of the + * previous generation before this has to be collected again. + * We approximate this as ((f*ratio) / n), where + * f == # of bytes forwarded since the last collection of this generation + * n == # of collections of the previous generation since the last + * collection of this generation + * We also need to allow space for young objects in this generation, + * but the new size shouldn't exceed the maximum size for the arena + * (unless minSz > maxSizeB). + */ + newSz = prevOldSz[j] + ap->reqSizeB + (g->ratio * (thisMinSz / numGCs)); + if (newSz < minSz) { + newSz = minSz; + } +#ifdef VERBOSE +SayDebug (" %s: min = %u, prev = %u, thisMin = %u, req = %u, new = %u, max = %u\n", +ArenaName[j+1], minSz, prevOldSz[j], thisMinSz, ap->reqSizeB, newSz, ap->maxSizeB); +#endif + if (newSz > ap->maxSizeB) { + newSz = (minSz > ap->maxSizeB) ? minSz : ap->maxSizeB; + } + + if (newSz > 0) { + ap->tospSizeB = RND_MEMOBJ_SZB(newSz); +#ifdef VERBOSE +SayDebug (" alloc %u\n", ap->tospSizeB); +#endif + } + else { + ap->nextw = NIL(ml_val_t *); + ap->tospTop = NIL(ml_val_t *); + ap->tospSizeB = 0; + } + /* Note: any data between ap->oldTop and ap->nextw is "young", and + * should stay in this generation. + */ + if (ap->frspSizeB > 0) + prevOldSz[j] = (Addr_t)(ap->oldTop) - (Addr_t)(ap->frspBase); + else + prevOldSz[j] = 0; + } + + g->lastPrevGC = prevGC; + g->numGCs++; + prevGC = g->numGCs; + g->fromObj = g->toObj; + if (NewGeneration(g) == FAILURE) { + /* try to allocate the minimum size */ + Error ("unable to allocate to-space for generation %d; trying smaller size\n", i+1); + for (j = 0; j < NUM_ARENAS; j++) { + g->arena[j]->tospSizeB = RND_MEMOBJ_SZB(minSize[j]); + } + if (NewGeneration(g) == FAILURE) + Die("unable to allocate minimum size\n"); + } +#ifdef TOSPACE_ID + for (j = 0; j < NUM_ARENAS; j++) { + arena_t *ap = g->arena[j]; + if (isACTIVE(ap)) + MarkRegion (BIBOP, ap->tospBase, ap->tospSizeB, TOSPACE_AID(i+1)); + } +#endif + + if (isACTIVE(g->arena[ARRAY_INDX])) + NewDirtyVector(g); + } + + return heap->numGens; + +} /* end of Flip */ diff --git a/base/runtime/gc/gc-stats.c b/base/runtime/gc/gc-stats.c new file mode 100644 index 0000000..ff7ef02 --- /dev/null +++ b/base/runtime/gc/gc-stats.c @@ -0,0 +1,200 @@ +/* gc-stats.c + * + * COPYRIGHT (c) 1993 by AT&T Bell Laboratories. + * + * Support routines for gathering GC statistics. + */ + +#include "ml-osdep.h" +#include +#include +#include +#include "ml-base.h" +#include "ml-limits.h" +#include "ml-state.h" +#include "heap.h" +#include "cntr.h" +#include "gc-stats.h" + +FILE *DebugF; +FILE *StatsF; + +/** Virtual memory statistics **/ +#ifdef VM_STATS + +#define ROUND1K(X) (((X)+512)/1024) + +/* ReportVM: + * + */ +void ReportVM (ml_state_t *msp, int maxCollectedGen) +{ + heap_t *heap = msp->ml_heap; + int kbytesPerPage = GETPAGESIZE()/1024; + FILE *f = (StatsF == NULL) ? DebugF : StatsF; + struct rusage ru; + Addr_t bytesAllocated, oldBytes, vmAlloc; + int i, j; + + getrusage(RUSAGE_SELF, &ru); + +#ifdef XXX + bytesAllocated = ((Addr_t)(msp->ml_allocPtr) - (Addr_t)(heap->allocBase)); +#else + bytesAllocated = 0; +#endif + + /* count size of older generations */ + oldBytes = 0; + for (i = 0; i < heap->numGens; i++) { + for (j = 0; j < NUM_ARENAS; j++) { + arena_t *ap = heap->gen[i]->arena[j]; + if (i < maxCollectedGen) { + if (ap->frspSizeB > 0) + oldBytes += ((Addr_t)(ap->frspTop) - (Addr_t)(ap->frspBase)); + } + else { + if (isACTIVE(ap)) + oldBytes += ((Addr_t)(ap->nextw) - (Addr_t)(ap->tospBase)); + } + } + /* count code objects too! */ + for (j = 0; j < NUM_BIGOBJ_KINDS; j++) { + bigobj_desc_t *dp = heap->gen[i]->bigObjs[j]; + for (; dp != NIL(bigobj_desc_t *); dp = dp->next) { + oldBytes += dp->sizeB; + } + } + } + + /* get amount of allocated VM (in Kb) */ + vmAlloc = MEM_GetVMSize(); + + fprintf (f, "VM{alloc="); + CNTR_FPRINTF (f, &(heap->numAlloc), 10); + fprintf (f, ", new=%dk, old=%dk, tot=%dk, max_rss=%d}\n", + ROUND1K(bytesAllocated), + ROUND1K(oldBytes), + vmAlloc, + ru.ru_maxrss*kbytesPerPage); + fflush (f); + +} /* end of ReportVM */ + +#endif + + +/** Pause time statistics **/ +#ifdef PAUSE_STATS + +pause_info_t PauseTbl[MAX_NGENS+1]; + +/* InitPauseTbl: + */ +void InitPauseTbl () +{ + int i; + + for (i = 0; i <= MAX_NGENS; i++) { + PauseTbl[i].numGCs = 0; + PauseTbl[i].maxPause = 0; + PauseTbl[i].buckets = NIL(short *); + } + + GrowPauseTbl (0, MS_TO_BUCKET(500)); + GrowPauseTbl (1, MS_TO_BUCKET(1000)); + GrowPauseTbl (2, MS_TO_BUCKET(2000)); + GrowPauseTbl (3, MS_TO_BUCKET(3000)); + GrowPauseTbl (4, MS_TO_BUCKET(3000)); + GrowPauseTbl (5, MS_TO_BUCKET(4000)); + GrowPauseTbl (6, MS_TO_BUCKET(4000)); + +} /* end of InitPauseTbl */ + +/* GrowPauseTbl: + * + */ +void GrowPauseTbl (int gen, int pause) +{ + pause_info_t *p = &(PauseTbl[gen]); + short *buckets = p->buckets, *new; + int sz, i; + + for (sz = (p->maxPause ? p->maxPause : 16); sz < pause; sz = sz+sz) + continue; + new = NEW_VEC(short, sz); + + if (buckets != NIL(short *)) { + for (i = 0; i < p->maxPause; i++) + new[i] = buckets[i]; + for (; i < sz; i++) + new[i] = 0; + FREE (buckets); + } + else { + for (i = 0; i < sz; i++) + new[i] = 0; + } + + p->buckets = new; + p->maxPause = sz; + +} /* end of GrowPauseTbl */ + +/* ReportPauses: + * + */ +void ReportPauses (FILE *f) +{ + pause_info_t *infop; + int i, j, k, n, maxPause; + + /* compute the largest maxPause time */ + maxPause = 100; /* one second */ + for (i = MAX_NGENS; i > 0; i--) { + infop = &(PauseTbl[i]); + if ((infop->numGCs > 0) && (infop->maxPause > maxPause)) { + for (j = infop->maxPause-1; (j > maxPause) && (infop->buckets[j] == 0); j--) + continue; + if (j > maxPause) + maxPause = ((j+99)/100)*100; + } + } + + fprintf(f, "newgraph\n"); + fprintf(f, " xaxis\n"); + fprintf(f, " label : GC Pause Times (ms)\n"); + fprintf(f, " no_auto_hash_marks\n"); + fprintf(f, " size 4.5\n"); + fprintf(f, " min -10 max %d\n", maxPause); + for (i = 0; i <= maxPause; i += 50) + fprintf(f, " hash_at %4d hash_label at %4d : %4d\n", i, i, i*10); + fprintf(f, " yaxis\n"); + fprintf(f, " label : Number of pauses\n"); + fprintf(f, " min 0\n"); + + for (i = MAX_NGENS; i > 0; i--) { + infop = &(PauseTbl[i]); + if (infop->numGCs == 0) + continue; + fprintf(f, " (* generation %d pause times *)\n", i); + fprintf(f, " newcurve\n"); + fprintf(f, " label : Generation %d\n", i); + fprintf(f, " marktype xbar\n"); + fprintf(f, " pts\n"); + for (j = 0; j < infop->maxPause; j++) { + if (infop->buckets[j] == 0) + continue; + for (k = 1, n = 0; k <= i; k++) { + if (PauseTbl[k].maxPause >= j) + n += PauseTbl[k].buckets[j]; + } + fprintf(f, " %4d %3d\n", j*10, n); + } + } + + fflush (f); + +} /* end of ReportPauses */ + +#endif diff --git a/base/runtime/gc/gc-stats.h b/base/runtime/gc/gc-stats.h new file mode 100644 index 0000000..b4ef2c0 --- /dev/null +++ b/base/runtime/gc/gc-stats.h @@ -0,0 +1,49 @@ +/* gc-stats.h + * + * COPYRIGHT (c) 1993 by AT&T Bell Laboratories. + */ + +#ifndef _GC_STATS_ +#define _GC_STATS_ + +#include "stats-data.h" + +#ifdef VM_STATS +extern void ReportVM (ml_state_t *msp, int maxCollectedGen); +#endif + +#ifdef PAUSE_STATS + +#define START_GC_PAUSE(HEAP) { \ + if (StatsOn) { \ + heap_t *__heap = (HEAP); \ + stat_rec_t *__p = &(StatsBuf[NStatsRecs]); \ + Unsigned32_t __n = (Addr_t)(msp->ml_allocPtr) - \ + (Addr_t)(__heap->allocBase); \ + CNTR_INCR(&(__heap->numAlloc), __n); \ + __p->allocCnt = __heap->numAlloc; \ + __p->numGens = 0; \ + gettimeofday(&(__p->startTime), NIL(struct timezone *)); \ + } \ + } + +#define NUM_GC_GENS(NGENS) { \ + if (StatsOn) \ + StatsBuf[NStatsRecs].numGens = (NGENS); \ + } + +#define STOP_GC_PAUSE() { \ + if (StatsOn) { \ + gettimeofday(&(StatsBuf[NStatsRecs].stopTime), \ + NIL(struct timezone *)); \ + STATS_FINISH(); \ + } \ + } + +#else /* !PAUSE_STATS */ +#define START_GC_PAUSE(HEAP) +#define NUM_GC_GENS(NGENS) +#define STOP_GC_PAUSE() +#endif /* PAUSE_STATS */ + +#endif diff --git a/base/runtime/gc/gc-util.c b/base/runtime/gc/gc-util.c new file mode 100644 index 0000000..a1b6e33 --- /dev/null +++ b/base/runtime/gc/gc-util.c @@ -0,0 +1,307 @@ +/*! \file gc-util.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Garbage collection utility routines. + */ + +#include +#include +#include "ml-base.h" +#include "ml-limits.h" +#include "ml-values.h" +#include "memory.h" +#include "card-map.h" +#include "heap.h" +#include "heap-monitor.h" + + +/* NewGeneration: + * + * Allocate and partition the space for a generation. + */ +status_t NewGeneration (gen_t *gen) +{ + int i; + Addr_t tot_sz; + ml_val_t *p; + mem_obj_t *memobj; + arena_t *ap; + + /* Compute the total size */ + for (tot_sz = 0, i = 0; i < NUM_ARENAS; i++) { + if (isACTIVE(gen->arena[i])) + tot_sz += gen->arena[i]->tospSizeB; + } + + if ((gen->cacheObj != NIL(mem_obj_t *)) && (MEMOBJ_SZB(gen->cacheObj) >= tot_sz)) { + memobj = gen->cacheObj; + gen->cacheObj = NIL(mem_obj_t *); + } + else if ((memobj = MEM_AllocMemObj(tot_sz)) == NIL(mem_obj_t *)) { + /** Eventually we should try to allocate the generation as separate + ** chunks instead of failing. + **/ + return FAILURE; + } + + /* Initialize the chunks */ + gen->toObj = memobj; +#ifdef VERBOSE +SayDebug ("NewGeneration[%d]: tot_sz = %d, [%p, %p)\n", +gen->genNum, tot_sz, MEMOBJ_BASE(memobj), MEMOBJ_BASE(memobj) + MEMOBJ_SZB(memobj)); +#endif + for (p = (ml_val_t *)MEMOBJ_BASE(memobj), i = 0; i < NUM_ARENAS; i++) { + ap = gen->arena[i]; + if (isACTIVE(ap)) { + ap->tospBase = p; + ap->nextw = p; + ap->sweep_nextw = p; + p = (ml_val_t *)((Addr_t)p + ap->tospSizeB); + ap->tospTop = p; + MarkRegion (BIBOP, ap->tospBase, ap->tospSizeB, ap->id); + HeapMon_MarkRegion (gen->heap, ap->tospBase, ap->tospSizeB, ap->id); +#ifdef VERBOSE +SayDebug (" %#x: [%p, %p)\n", ap->id, ap->nextw, p); +#endif + } + else { + ap->tospBase = NIL(ml_val_t *); + ap->nextw = NIL(ml_val_t *); + ap->sweep_nextw = NIL(ml_val_t *); + ap->tospTop = NIL(ml_val_t *); + } + } + + ap = gen->arena[PAIR_INDX]; + if (isACTIVE(ap)) { + /* The first slot of pair-space cannot be used, so that poly-equal won't fault */ + *(ap->nextw++) = ML_unit; + *(ap->nextw++) = ML_unit; + ap->tospBase = ap->nextw; + ap->sweep_nextw = ap->nextw; + } + + return SUCCESS; + +} /* end of NewGeneration */ + + +/* FreeGeneration: + */ +void FreeGeneration (heap_t *heap, int g) +{ + gen_t *gen = heap->gen[g]; + int i; + + if (gen->fromObj == NIL(mem_obj_t *)) + return; + +#ifdef VERBOSE +SayDebug ("FreeGeneration [%d]: [%p, %p)\n", g+1, MEMOBJ_BASE(gen->fromObj), +MEMOBJ_BASE(gen->fromObj) + MEMOBJ_SZB(gen->fromObj)); +#endif + if (g < heap->cacheGen) { + if (gen->cacheObj != NIL(mem_obj_t *)) { + if (MEMOBJ_SZB(gen->cacheObj) > MEMOBJ_SZB(gen->fromObj)) + MEM_FreeMemObj (gen->fromObj); + else { + MEM_FreeMemObj (gen->cacheObj); + gen->cacheObj = gen->fromObj; + } + } + else + gen->cacheObj = gen->fromObj; + } + else + MEM_FreeMemObj (gen->fromObj); + +/** NOTE: since the arenas are contiguous, we could do this in one call **/ + gen->fromObj = NIL(mem_obj_t *); + for (i = 0; i < NUM_ARENAS; i++) { + arena_t *ap = gen->arena[i]; + if (ap->frspBase != NIL(ml_val_t *)) { + MarkRegion (BIBOP, ap->frspBase, ap->frspSizeB, AID_UNMAPPED); + HeapMon_MarkRegion (heap, ap->frspBase, ap->frspSizeB, AID_UNMAPPED); + ap->frspBase = NIL(ml_val_t *); + ap->frspSizeB = 0; + ap->frspTop = NIL(ml_val_t *); + } + } + +} /* end of FreeGeneration */ + + +/* NewDirtyVector: + * Bind in a new dirty vector for the given generation, reclaiming the old + * vector. + */ +void NewDirtyVector (gen_t *gen) +{ + arena_t *ap = gen->arena[ARRAY_INDX]; + int vecSz = (ap->tospSizeB / CARD_SZB); + int allocSzB = CARD_MAP_SZ(vecSz); + + if (gen->dirty == NIL(card_map_t *)) { + gen->dirty = (card_map_t *)MALLOC(allocSzB); + gen->dirty->mapSzB = allocSzB; + } + else if (allocSzB > gen->dirty->mapSzB) { + FREE(gen->dirty); + gen->dirty = (card_map_t *)MALLOC(allocSzB); + gen->dirty->mapSzB = allocSzB; + } + if (gen->dirty == NIL(card_map_t *)) { + Die ("unable to allocate dirty vector"); + } + gen->dirty->baseAddr = ap->tospBase; + gen->dirty->numCards = vecSz; +#ifndef BIT_CARDS + memset (gen->dirty->map, CARD_CLEAN, allocSzB - (sizeof(card_map_t) - WORD_SZB)); +#else + memset (gen->dirty->map, 0, allocSzB - (sizeof(card_map_t) - WORD_SZB)); +#endif + +} /* end of NewDirtyVector. */ + + +/* MarkRegion: + * + * Mark the BIBOP entries corresponding to the range [baseAddr, baseAddr+szB) + * with aid. The `szb` parameter should be a multiple of the BIBOP page size + */ +void MarkRegion (bibop_t bibop, ml_val_t *baseAddr, Addr_t szB, aid_t aid) +{ + Addr_t start = BIBOP_ADDR_TO_INDEX(baseAddr); + Addr_t npages = BIBOP_ADDR_TO_INDEX(szB); + Addr_t end = start + npages; +#ifdef SIZE_64 + /* index range in top-level table */ + Unsigned32_t topStart = BIBOP_INDEX_TO_L1_INDEX(start); + Unsigned32_t topEnd = BIBOP_INDEX_TO_L1_INDEX(end); +#endif /* SIZE_64 */ + + ASSERT(npages * BIBOP_PAGE_SZB == szB); + +#ifdef SIZE_64 +#ifdef VERBOSE + SayDebug( + "MarkRegion(-, %p, %p, %x:%x:%02x); start = %d(top:%d), npages = %d, end = %d(top:%d)\n", + baseAddr, szB, EXTRACT_GEN(aid), EXTRACT_OBJC(aid), EXTRACT_HBLK(aid), + start, topStart, npages, end, topEnd); +#endif + ASSERT(BIBOP_ADDR_TO_L1_INDEX(baseAddr) == topStart); + + if (aid == AID_UNMAPPED) { + Unsigned32_t ix, jx, l2Start, l2End; + for (ix = topStart; ix <= topEnd; ix++) { + l2_bibop_t *l2Tbl = bibop[ix]; + ASSERT (l2Tbl != 0); + l2Start = (topStart < ix) ? 0 : (Unsigned32_t)BIBOP_INDEX_TO_L2_INDEX(start); + l2End = (ix < topEnd) ? BIBOP_L2_SZ : (Unsigned32_t)(BIBOP_INDEX_TO_L2_INDEX(end)); +/* FIXME: if l2Start == 0 and l2End == BIBOP_L2_SZ, then we can replace the table with + * L2_Unmapped. + */ + for (jx = l2Start; jx < l2End; jx++) { + l2Tbl->tbl[jx] = aid; + } + l2Tbl->numMapped -= (l2End - l2Start); + } + } + else { + Unsigned32_t ix, jx, l2Start, l2End; + for (ix = topStart; ix <= topEnd; ix++) { + l2_bibop_t *l2Tbl = bibop[ix]; + l2Start = (topStart < ix) ? 0 : (Unsigned32_t)BIBOP_INDEX_TO_L2_INDEX(start); + l2End = (ix < topEnd) ? BIBOP_L2_SZ : (Unsigned32_t)(BIBOP_INDEX_TO_L2_INDEX(end)); + if (l2Tbl == UNMAPPED_L2_TBL) { + bibop[ix] = + l2Tbl = NEW_OBJ(l2_bibop_t); + // initialize the part of the new block that is not being assigned + for (jx = 0; jx < l2Start; jx++) { + l2Tbl->tbl[jx] = AID_UNMAPPED; + } + for (jx = l2End; jx < BIBOP_L2_SZ; jx++) { + l2Tbl->tbl[jx] = AID_UNMAPPED; + } + l2Tbl->numMapped = (l2End - l2Start); + } + else { + l2Tbl->numMapped += (l2End - l2Start); + } + ASSERT((0 <= l2Start) && (l2End <= BIBOP_L2_SZ)); + for (jx = l2Start; jx < l2End; jx++) { + l2Tbl->tbl[jx] = aid; + } + } + } +#else /* 32-bit ML values */ +#ifdef VERBOSE +SayDebug("MarkRegion [%p..%p) (%d pages) as %#x\n", +baseAddr, ((Addr_t)baseAddr)+szB, npages, aid); +#endif + + while (start < end) { + BIBOP_UPDATE(bibop, start, aid); + start++; + } +#endif + +} /* end of MarkRegion */ + + +/* ScanWeakPtrs: + * + * Scan the list of weak pointers, nullifying those that refer to dead + * (i.e., from-space) objects. + */ +void ScanWeakPtrs (heap_t *heap) +{ + ml_val_t *p, *q, *obj, desc; + +/* SayDebug ("ScanWeakPtrs:\n"); */ + for (p = heap->weakList; p != NIL(ml_val_t *); p = q) { + q = PTR_MLtoC(ml_val_t, UNMARK_PTR(p[0])); + obj = (ml_val_t *)(Addr_t)UNMARK_PTR(p[1]); +/* SayDebug (" %p --> %p ", p+1, obj); */ + + switch (EXTRACT_OBJC(ADDR_TO_PAGEID(BIBOP, obj))) { + case OBJC_new: + case OBJC_record: + case OBJC_string: + case OBJC_array: + desc = obj[-1]; + if (desc == DESC_forwarded) { + p[0] = DESC_weak; + p[1] = PTR_CtoML(FOLLOW_FWDOBJ(obj)); +/* SayDebug ("forwarded to %p\n", FOLLOW_FWDOBJ(obj)); */ + } + else { + p[0] = DESC_null_weak; + p[1] = ML_unit; +/* SayDebug ("nullified\n"); */ + } + break; + case OBJC_pair: + if (isDESC(desc = obj[0])) { + p[0] = DESC_weak; + p[1] = PTR_CtoML(FOLLOW_FWDPAIR(desc, obj)); +/* SayDebug ("(pair) forwarded to %p\n", FOLLOW_FWDPAIR(desc, obj)); */ + } + else { + p[0] = DESC_null_weak; + p[1] = ML_unit; +/* SayDebug ("(pair) nullified\n"); */ + } + break; + case OBJC_bigobj: + Die ("weak big object"); + break; + } /* end of switch */ + } + + heap->weakList = NIL(ml_val_t *); + +} /* end of ScanWeakPtrs */ + diff --git a/base/runtime/gc/heap-in-util.c b/base/runtime/gc/heap-in-util.c new file mode 100644 index 0000000..f8cca96 --- /dev/null +++ b/base/runtime/gc/heap-in-util.c @@ -0,0 +1,136 @@ +/*! \file heap-in-util.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Utility routines to import an ML heap image. + */ + +#include "ml-base.h" +#include "heap.h" +#include "ml-values.h" +#include "ml-heap-image.h" +#include "c-globals-tbl.h" +#include "heap-input.h" +#include + +#ifndef SEEK_SET +# define SEEK_SET 0 +# define SEEK_END 2 +#endif + +/* local routines */ +PVT status_t ReadBlock (FILE *file, void *blk, size_t len); + + +/* HeapIO_ReadExterns: + */ +ml_val_t *HeapIO_ReadExterns (inbuf_t *bp) +{ + extern_tbl_hdr_t hdr; + ml_val_t *externs; + Byte_t *buf, *cp; + int i; + + /* Read the header */ + HeapIO_ReadBlock(bp, &(hdr), sizeof(hdr)); + + externs = NEW_VEC(ml_val_t, hdr.numExterns); + + /* Read in the names of the exported symbols */ + buf = NEW_VEC(Byte_t, hdr.externSzB); + HeapIO_ReadBlock (bp, buf, hdr.externSzB); + + /* map the names of the external symbols to addresses in the run-time system */ + for (cp = buf, i = 0; i < hdr.numExterns; i++) { + if ((externs[i] = ImportCSymbol ((char *)cp)) == ML_unit) { + Die ("Run-time system does not provide \"%s\"", cp); + } + cp += (strlen((char *)cp) + 1); + } + FREE (buf); + + return externs; + +} /* end of HeapIO_ReadExterns */ + + +/* HeapIO_Seek: + * + * Adjust the next character position to the given position in the + * input stream. + */ +status_t HeapIO_Seek (inbuf_t *bp, off_t offset) +{ + if (bp->file == NULL) { + /* the stream is in-memory */ + Byte_t *newPos = bp->base + offset; + if (bp->buf + bp->nbytes <= newPos) { + return FAILURE; + } + else { + bp->nbytes -= (newPos - bp->buf); + bp->buf = newPos; + return SUCCESS; + } + } + else if (fseek (bp->file, offset, SEEK_SET) != 0) { + Die ("unable to seek on heap image\n"); + } + + bp->nbytes = 0; /* just in case? */ + + return SUCCESS; + +} /* end of HeapIO_Seek */ + + +/* HeapIO_ReadBlock: + */ +status_t HeapIO_ReadBlock (inbuf_t *bp, void *blk, off_t len) +{ + status_t sts = SUCCESS; + + if (bp->nbytes >= len) { + memcpy (blk, bp->buf, len); + bp->nbytes -= len; + bp->buf += len; + } + else if (bp->file != NULL) { + memcpy (blk, bp->buf, bp->nbytes); + sts = ReadBlock (bp->file, ((Byte_t *)blk) + bp->nbytes, len - bp->nbytes); + bp->nbytes = 0; + } + else { + Error ("missing data in memory blast object"); + return FAILURE; + } + + if (bp->needsSwap) { + Die ("byte-swapping not implemented yet"); + } + + return sts; + +} /* end of HeapIO_ReadBlock */ + +/* ReadBlock: + */ +PVT status_t ReadBlock (FILE *file, void *blk, size_t len) +{ + size_t sts; + Byte_t *bp = (Byte_t *)blk; + + while (len > 0) { + sts = fread (bp, 1, len, file); + len -= sts; + bp += sts; + if ((sts < len) && (ferror(file) || feof(file))) { + Error ("unable to read %d bytes from image\n", len); + return FAILURE; + } + } + + return SUCCESS; + +} /* end of ReadBlock. */ diff --git a/base/runtime/gc/heap-input.h b/base/runtime/gc/heap-input.h new file mode 100644 index 0000000..404c908 --- /dev/null +++ b/base/runtime/gc/heap-input.h @@ -0,0 +1,51 @@ +/* heap-input.h + * + * COPYRIGHT (c) 1993 by AT&T Bell Laboratories. + * + */ + +#ifndef _HEAP_INPUT_ +#define _HEAP_INPUT_ + +#include + +#ifndef _ADDR_HASH_ +#include "addr-hash.h" +#endif + +typedef struct { /* An input source for reading heap data. We need */ + /* this because the blaster may need to read from a */ + /* stream that has already had characters read from it. */ + bool_t needsSwap; /* true, if the input bytes need to be swapped */ + FILE *file; /* the file descriptor to read from, once the */ + /* buffered characters are exhausted */ + Byte_t *base; /* the start of the bufferec characters */ + Byte_t *buf; /* the current position in the buffer */ + size_t nbytes; +} inbuf_t; + + +/** Big-object relocation info **/ + +typedef struct { /* big-object relocation info */ + Addr_t oldAddr; /* address in imported heap */ + bigobj_desc_t *newObj; /* corresponding object in the new heap */ +} bo_reloc_t; + +typedef struct { /* big-object region relocation info */ + Addr_t firstPage; /* the address of the first page of the region */ + int nPages; /* the number of big-object pages in the region */ + bo_reloc_t **objMap; /* the map from pages to big-object relocation */ + /* info. */ +} bo_region_reloc_t; + +/* Big-object region hash table interface */ +#define LookupBORegion(tbl, bibopIndex) \ + ((bo_region_reloc_t *)AddrTblLookup(tbl, BIBOP_INDEX_TO_ADDR(bibopIndex))) + +/* Utility routines */ +extern ml_val_t *HeapIO_ReadExterns (inbuf_t *bp); +extern status_t HeapIO_Seek (inbuf_t *bp, off_t offset); +extern status_t HeapIO_ReadBlock (inbuf_t *bp, void *blk, off_t len); + +#endif /* !_HEAP_INPUT_ */ diff --git a/base/runtime/gc/heap-monitor.h b/base/runtime/gc/heap-monitor.h new file mode 100644 index 0000000..5702513 --- /dev/null +++ b/base/runtime/gc/heap-monitor.h @@ -0,0 +1,36 @@ +/* heap-monitor.h + * + * COPYRIGHT (c) 1993 by AT&T Bell Laboratories. + * + * The interface of an X-windows heap monitor. + */ + +#ifndef _HEAP_MONITOR_ +#define _HEAP_MONITOR_ + +#ifndef _ML_BASE_ +#include "ml-base.h" +#endif + + +#ifdef HEAP_MONITOR + +typedef struct monitor monitor_t; + +extern void HeapMon_StartGC (heap_t *heap, int maxCollectedGen); +extern void HeapMon_UpdateHeap (heap_t *heap, int MaxCollectedGen); +extern void HeapMon_MarkRegion (heap_t *heap, ml_val_t *base, Word_t szB, aid_t aid); +extern void HeapMon_MarkFromSp (heap_t *heap, ml_val_t *base, Word_t szB); + +#else + +/* Macros to nullify calls to the heap monitor routines. */ +#define HeapMon_StartGC(A,B) +#define HeapMon_UpdateHeap(A,B) +#define HeapMon_MarkRegion(A,B,C,D) +#define HeapMon_MarkFromSp(A,B,C) + +#endif + +#endif /* !_HEAP_MONITOR_ */ + diff --git a/base/runtime/gc/heap-out-util.c b/base/runtime/gc/heap-out-util.c new file mode 100644 index 0000000..79e7d46 --- /dev/null +++ b/base/runtime/gc/heap-out-util.c @@ -0,0 +1,91 @@ +/* heap-out-util.c + * + * COPYRIGHT (c) 1993 by AT&T Bell Laboratories. + * + * Utility routines to export (or blast) an ML heap image. + */ + +#include "ml-base.h" +#include "heap.h" +#include "ml-values.h" +#include "ml-heap-image.h" +#include "c-globals-tbl.h" +#include "heap-output.h" +#include + + +/* HeapIO_WriteImageHeader: + * + * Blast out the ml_image_hdr_t. + */ +status_t HeapIO_WriteImageHeader (writer_t *wr, int kind) +{ + ml_image_hdr_t hdr; + + hdr.byteOrder = ORDER; + hdr.magic = ((kind == EXPORT_HEAP_IMAGE) || (kind == EXPORT_FN_IMAGE)) + ? IMAGE_MAGIC : BLAST_MAGIC; + hdr.kind = kind; + /* hdr.arch[] */ + /* hdr.opsys[] */ + + WR_Write(wr, &hdr, sizeof(hdr)); + if (WR_Error(wr)) + return FAILURE; + else + return SUCCESS; + +} /* end of HeapIO_WriteImageHeader */ + + +/* HeapIO_WriteExterns: + * + * Write out the external symbol table, returning the number of bytes + * written (-1 on error). + */ +Addr_t HeapIO_WriteExterns (writer_t *wr, export_table_t *tbl) +{ + int i, numExterns; + export_item_t *externs; + extern_tbl_hdr_t hdr; + Addr_t strSize, nbytes = sizeof(extern_tbl_hdr_t), padSzB; + + ExportedSymbols (tbl, &numExterns, &externs); + + for (strSize = 0, i = 0; i < numExterns; i++) + strSize += (strlen((char *)(externs[i])) + 1); + /* include padding to WORD_SZB bytes */ + padSzB = ROUNDUP(strSize, WORD_SZB) - strSize; + strSize += padSzB; + nbytes += strSize; + + /* write out the header */ + hdr.numExterns = numExterns; + hdr.externSzB = strSize; + WR_Write(wr, &hdr, sizeof(hdr)); + + /* write out the external symbols */ + for (i = 0; i < numExterns; i++) { + WR_Write (wr, externs[i], strlen(externs[i])+1); + } + + /* write the padding */ + { + char pad[8] = {0, 0, 0, 0, 0, 0, 0, 0}; + if (padSzB != 0) { + WR_Write (wr, pad, padSzB); + } + } + + /* + done:; + */ + FREE (externs); + + if (WR_Error(wr)) + return -1; + else + return nbytes; + +} /* end of HeapIO_WriteExterns */ + diff --git a/base/runtime/gc/heap-output.h b/base/runtime/gc/heap-output.h new file mode 100644 index 0000000..e098caf --- /dev/null +++ b/base/runtime/gc/heap-output.h @@ -0,0 +1,16 @@ +/* heap-output.h + * + * COPYRIGHT (c) 1993 by AT&T Bell Laboratories. + */ + +#ifndef _HEAP_OUTPUT_ +#define _HEAP_OUTPUT_ + +#ifndef _WRITER_ +#include "writer.h" +#endif + +extern status_t HeapIO_WriteImageHeader (writer_t *wr, int kind); +extern Addr_t HeapIO_WriteExterns (writer_t *wr, export_table_t *tbl); + +#endif /* !_HEAP_OUTPUT_ */ diff --git a/base/runtime/gc/heap.h b/base/runtime/gc/heap.h new file mode 100644 index 0000000..8a53c51 --- /dev/null +++ b/base/runtime/gc/heap.h @@ -0,0 +1,302 @@ +/*! \file heap.h + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * These are the definitions for the heap structure. + */ + +#ifndef _HEAP_ +#define _HEAP_ + +#ifndef _ML_BASE_ +#include "ml-base.h" +#endif + +#ifndef _GC_ +#include "gc.h" +#endif + +#ifndef _ARENA_ID_ +#include "arena-id.h" +#endif + +#ifndef _TAGS_ +#include "tags.h" +#endif + +#ifndef _MEMORY_ +typedef struct mem_obj mem_obj_t; +#endif + +#ifndef _CARD_MAP_ +typedef struct card_map card_map_t; +#endif + +#if ((defined(COLLECT_STATS) || defined(GC_STATS)) && (! defined(_CNTR_))) +#include "cntr.h" +#endif + +struct heap_params { + Addr_t allocSz; /* the size of the allocation arena */ + int numGens; + int cacheGen; +}; + +typedef struct repair repair_t; +typedef struct arena arena_t; +typedef struct bigobj_region bigobj_region_t; +typedef struct bigobj_desc bigobj_desc_t; +typedef struct generation gen_t; +/* typedef struct heap heap_t; */ /** defined in ml-base.h **/ + + +/** A heap ** + * A heap consists of an allocation space and one or more generations. + */ +struct heap { + ml_val_t *allocBase; /* The base address of the allocation arena */ + Addr_t allocSzB; /* The size in bytes of the allocation arena */ + mem_obj_t *baseObj; /* The OS memory object that contains the */ + /* allocation arena. */ + int numGens; /* The number of active generations. */ + int cacheGen; /* Cache the from-space for gens 1..cacheGen. */ + int numMinorGCs; /* The number of times the allocation space */ + /* has been collected. */ + gen_t *gen[MAX_NUM_GENS]; /* generation #i is gen[i-1] */ + int numBORegions; /* the number of active big-object regions */ + bigobj_region_t *bigRegions; /* points to the list of big object regions. */ + bigobj_desc_t *freeBigObjs; /* points to the header of the free-list */ + /* of big objects. */ + ml_val_t *weakList; /* A list of weak pointers forwarded*/ + /* during GC. */ +#if (defined(COLLECT_STATS) || defined(GC_STATS)) + cntr_t numAlloc; /* Keep track of the number of bytes */ + /* allocated and the number copied into */ +#ifdef GC_STATS + cntr_t numCopied /* each arena. */ + [MAX_NUM_GENS][NUM_ARENAS]; +#endif +#endif +#ifdef HEAP_MONITOR + struct monitor *monitor; /* The various graphical data structures */ + /* for monitoring the heap. */ +#endif +}; + +#ifdef OLD +/* once we figure out multiple arenas for the MP version, we should + * be able to go back to the old version of this. + */ +#define HEAP_LIMIT(hp) \ + (ml_val_t *)((Addr_t)((hp)->allocBase) + (hp)->allocSzB - HEAP_BUF_SZB) +#else +#define HEAP_LIMIT_SIZE(base,size) \ + (ml_val_t *)((Addr_t)(base) + (size) - HEAP_BUF_SZB) + +#define HEAP_LIMIT(hp) HEAP_LIMIT_SIZE((hp)->allocBase,(hp)->allocSzB) +#endif + + +/** A generation **/ +struct generation { + heap_t *heap; /* A back pointer to the heap data structure */ + int genNum; /* Which generation this is (1..numGens) */ + int numGCs; /* The number of times this generation has been */ + /* collected. */ + int lastPrevGC; /* The number GCs of the previous (younger) generation */ + /* the last time this generation was collected. */ + int ratio; /* The desired number of collections of the previous */ + /* generation for one collection of this generation */ + arena_t *arena[NUM_ARENAS]; + bigobj_desc_t *bigObjs[NUM_BIGOBJ_KINDS]; + mem_obj_t *toObj; /* The O.S. memory objects that this generation is */ + mem_obj_t *fromObj; /* using for the to-space and from-space */ + mem_obj_t *cacheObj; /* For younger generations, we cache the virtual */ + /* memory of from-space, instead of giving it back. */ + card_map_t *dirty; /* The dirty cards in the array arena of this gen. */ +}; + + +/** An arena **/ +struct arena { + aid_t id; /* The to-space version of this arena's identifier */ + ml_val_t *nextw; /* The next word to allocate in this arena's to-space */ + ml_val_t *tospBase; /* the base address and size of to-space. */ + Addr_t tospSizeB; + ml_val_t *tospTop; /* The top of the to-space (tospBase+tospSizeB). */ + ml_val_t *sweep_nextw; /* The next word to sweep in the to-space arena */ + repair_t *repairList; /* points to the top of the repair list (for */ + /* blasting out objects). The repair list grows */ + /* down in to-space. */ + ml_val_t *frspBase; /* the base address and size of from-space. */ + Addr_t frspSizeB; + ml_val_t *frspTop; /* The top of the used portion of from-space. */ + ml_val_t *oldTop; /* The top of the "older" from-space region. Objects */ + /* below oldTop get promoted, those above don't. */ + arena_t *nextGen; /* The arena to promote objects to. */ + bool_t needsRepair; /* Set to TRUE when exporting, if the arena had */ + /* external references that require repair */ + /* Heap sizing parameters: */ + Addr_t reqSizeB; /* requested minimum size for this arena (this is */ + /* in addition to the required min. size). */ + Addr_t maxSizeB; /* a soft maximum size for this arena. */ +}; + +/* Make to-space into from-space */ +#define FLIP_ARENA(ap) { \ + arena_t *__ap = (ap); \ + __ap->frspBase = __ap->tospBase; \ + __ap->frspSizeB = __ap->tospSizeB; \ + __ap->frspTop = __ap->nextw; \ + } + +/* Return true if this arena has an allocated ToSpace */ +#define isACTIVE(a) ((a)->tospSizeB > 0) + +/* Return true if this arena's ToSpace needs sweeping */ +#define NEEDS_SWEEPING(a) ((a)->sweep_nextw < (a)->nextw) + +/* Return the amount of free space (in bytes) available in an arena */ +#define AVAIL_SPACE(a) ((Addr_t)((a)->tospTop) - (Addr_t)((a)->nextw)) + +/* Return the amount of allocated space (in bytes) in an arena */ +#define USED_SPACE(a) ((Addr_t)((a)->nextw) - (Addr_t)((a)->tospBase)) + +/* Return true if the address addr is an older object in this arena */ +#define isOLDER(a,addr) ((addr) < (a)->oldTop) + + +/** Big object regions ** + * + * Currently, the only big objects are code objects. + */ + +/*#define BIGOBJ_PAGE_SHIFT 12*/ /* 4Kb */ +#define BIGOBJ_PAGE_SHIFT 10 /* 1Kb */ +#define BIGOBJ_PAGE_SZB (1 << BIGOBJ_PAGE_SHIFT) + +/* the minimum size of a big-object region should be at least 128K and be a multiple of + * the BIBOP page size. + */ +#if (BIBOP_PAGE_SZB <= 128*ONE_K) +#define MIN_BOREGION_SZB (128*ONE_K) +#else +#define MIN_BOREGION_SZB BIBOP_PAGE_SZB +#endif + +struct bigobj_region { /* A big-object region header */ + Addr_t firstPage; /* the address of the first page of the region */ + int nPages; /* the number of big-object pages in this region */ + int nFree; /* the number of free pages */ + int minGen; /* the minimum generation of the live objects in */ + /* this region. */ + mem_obj_t *memObj; /* the memory object that this is allocated in */ + bigobj_region_t *next; /* the next region in the list of regions */ + bigobj_desc_t *objMap[1]; /* the map from pages to big-object descriptors */ +}; + +struct bigobj_desc { /* A big-object descriptor. */ + Addr_t obj; /* the actual object */ + Addr_t sizeB; /* the size of the object in bytes. When the object */ + /* is in the free list, this will be a multiple of */ + /* BIGOBJ_PAGE_SZB, otherwise it is the exact size. */ + unsigned char objc; /* the object class */ + unsigned char state; /* the state of the object */ + unsigned char gen; /* the object's generation */ + bigobj_region_t *region; /* the region this big object is in */ + bigobj_desc_t *prev; /* the prev and next links. The big-object free */ + bigobj_desc_t *next; /* list is a doubly linked list; the other lists */ + /* are singly linked lists */ +}; + +/* the size of a big-object region header */ +#define BOREGION_HDR_SZB(NPAGES) \ + (sizeof(bigobj_region_t) + ((NPAGES-1)*sizeof(bigobj_desc_t *))) + +/* map an address to a big-object page index */ +#define ADDR_TO_BOPAGE(R, ADDR) \ + (((Addr_t)(ADDR) - (R)->firstPage) >> BIGOBJ_PAGE_SHIFT) + +/* map an address to a big-object descriptor */ +#define ADDR_TO_BODESC(R, ADDR) \ + ((R)->objMap[ADDR_TO_BOPAGE(R, ADDR)]) + +/* the rounded size of a big-object */ +#define BO_ROUNDED_SZB(BDP) ROUNDUP((BDP)->sizeB, BIGOBJ_PAGE_SZB) + +/* the number of big-object pages occupied by a big-object */ +#define BO_NUM_BOPAGES(BDP) (BO_ROUNDED_SZB(BDP) >> BIGOBJ_PAGE_SHIFT) + +/* big-object descriptor states */ +#define BO_FREE 0 /* a free big-object */ +#define BO_YOUNG 1 /* a young object (i.e., one that has never */ + /* been forwarded in its generation */ +#define BO_FORWARD 2 /* a forwarded young object */ +#define BO_OLD 3 /* an old object */ +#define BO_PROMOTE 4 /* a promoted old object */ + +#define BO_IS_FROM_SPACE(dp) (((dp)->state & 0x1) != 0) +#define BO_IS_FREE(dp) ((dp)->state == BO_FREE) + +/* remove a descriptor from a doubly linked list */ +STATIC_INLINE void RemoveBODesc (bigobj_desc_t *dp) +{ + ASSERT((dp->prev != dp) && (dp->next != dp)); + bigobj_desc_t *p = dp->prev; + bigobj_desc_t *n = dp->next; + p->next = n; + n->prev = p; +} + +/* add a descriptor to a doubly linked list */ +STATIC_INLINE void AddBODesc (bigobj_desc_t *hdr, bigobj_desc_t *dp) +{ + bigobj_desc_t *n = hdr->next; + dp->next = n; + dp->prev = hdr; + n->prev = dp; + hdr->next = dp; +} + + +/** operations on forward pointers **/ + +/* follow a forward pointer. HDR is the object header, P is the pointer to + * the object. + * NOTE: we need the two type casts for 32/64 bit systems. + */ +#define FOLLOW_FWDOBJ(HDR) ((ml_val_t *)(((ml_val_t *)(HDR))[0])) +/* follow a pair-space forward pointer (this is tagged as a descriptor). */ +#define FOLLOW_FWDPAIR(DESC, HDR) \ + ((ml_val_t *)(((Addr_t)(DESC)) & ~MAJOR_MASK)) + +/* make a pair-space forward pointer (this is tagged as a descriptor). */ +#define MAKE_PAIR_FP(NEW_ADDR) ((ml_val_t)((Addr_t)(NEW_ADDR) | TAG_desc)) + + +/** External GC functions **/ +extern void MinorGC (ml_state_t *msp, ml_val_t **roots); +extern void MajorGC (ml_state_t *msp, ml_val_t **roots, int level); +extern int Flip (heap_t *heap, int min_gc_level); +extern status_t NewGeneration (gen_t *gen); +extern void FreeGeneration (heap_t *heap, int g); +extern void NewDirtyVector (gen_t *gen); +extern void MarkRegion (bibop_t bibop, ml_val_t *base, Addr_t sizeB, aid_t id); +extern void ScanWeakPtrs (heap_t *heap); + +extern bigobj_desc_t *BO_AllocRegion (heap_t *heap, Addr_t szB); +extern bigobj_desc_t *BO_Alloc (heap_t *heap, int gen, Addr_t objSzB); +extern void BO_Free (heap_t *heap, bigobj_desc_t *desc); +extern bigobj_desc_t *BO_GetDesc (ml_val_t addr); +extern Byte_t *BO_GetCodeObjTag (bigobj_desc_t *bdp); + +#ifdef BO_DEBUG +extern void PrintRegionMap (bigobj_region_t *r); +#endif +#ifdef CHECK_HEAP +extern void CheckBIBOP (heap_t *heap); +extern void CheckHeap (heap_t *heap, int maxSweptGen); +#endif + +#endif /* !_HEAP_ */ diff --git a/base/runtime/gc/import-heap.c b/base/runtime/gc/import-heap.c new file mode 100644 index 0000000..28af53b --- /dev/null +++ b/base/runtime/gc/import-heap.c @@ -0,0 +1,641 @@ +/*! \file import-heap.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Routines to import an ML heap image. + */ + +#include +#include +#include "ml-base.h" +#include "machine-id.h" +#include "memory.h" +#include "cache-flush.h" +#include "ml-state.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-globals.h" +#include "card-map.h" +#include "heap.h" +#include "ml-heap-image.h" +#include "c-globals-tbl.h" +#include "addr-hash.h" +#include "heap-input.h" +#include "heap-io.h" + +#if defined(DLOPEN) && !defined(OPSYS_WIN32) +#include +#endif + +#ifdef DEBUG +PVT void PrintRelocMap (bo_region_reloc_t *r) +{ + bo_reloc_t *dp, *dq; + int i; + + SayDebug ("region @%#x: |", r->firstPage); + for (i = 0, dq = r->objMap[0]; i < r->nPages; i++) { + dp = r->objMap[i]; + if (dp != dq) { + SayDebug ("|"); + dq = dp; + } + if (dp == NIL(bo_reloc_t *)) + SayDebug ("_"); + else + SayDebug ("X"); + } + SayDebug ("|\n"); + +} /* end of PrintRelocMap */ +#endif + + +/* local routines */ +PVT void ReadHeap (inbuf_t *bp, ml_heap_hdr_t *hdr, ml_state_t *msp, ml_val_t *externs); +PVT bigobj_desc_t *AllocBODesc (bigobj_desc_t *, bigobj_hdr_t *, bo_region_reloc_t *); +PVT void RepairHeap ( + heap_t *, bibop_t, Addr_t [MAX_NUM_GENS][NUM_ARENAS], + addr_tbl_t *, ml_val_t *); +PVT ml_val_t RepairWord ( + ml_val_t w, bibop_t oldBIBOP, Addr_t addrOffset[MAX_NUM_GENS][NUM_ARENAS], + addr_tbl_t *boRegionTbl, ml_val_t *externs); +PVT bo_reloc_t *AddrToRelocInfo (bibop_t, addr_tbl_t *, aid_t, Addr_t); + +#define READ(bp,obj) HeapIO_ReadBlock(bp, &(obj), sizeof(obj)) + + +/* ImportHeapImage: + */ +ml_state_t *ImportHeapImage (const char *fname, heap_params_t *params) +{ + ml_state_t *msp; + ml_image_hdr_t imHdr; + ml_heap_hdr_t heapHdr; + ml_val_t *externs; + ml_vproc_image_t image; + inbuf_t inBuf; + + if (fname != NULL) { + /* Resolve the name of the image. If the file exists use it, otherwise try the + * pathname with the machine ID as an extension. + */ + if ((inBuf.file = fopen(fname, "rb")) != NULL) { + if (! SilentLoad) { + Say("loading %s ", fname); + } + } + else { + char buf[1024]; + + if (QualifyImageName(strcpy(buf, fname)) + && ((inBuf.file = fopen(buf, "rb")) != NULL)) { + if (! SilentLoad) { + Say("loading %s ", buf); + } + } + else { + Die ("unable to open heap image \"%s\"\n", fname); + } + } + + inBuf.needsSwap = FALSE; + inBuf.buf = NIL(Byte_t *); + inBuf.nbytes = 0; + } else { + /* fname == NULL, so try to find an in-core heap image */ +#if defined(DLOPEN) && !defined(OPSYS_WIN32) + void *lib = dlopen (NULL, RTLD_LAZY); + void *vimg, *vimglenptr; + if ((vimg = dlsym(lib, HEAP_IMAGE_SYMBOL)) == NULL) { + Die("no in-core heap image found\n"); + } + if ((vimglenptr = dlsym(lib, HEAP_IMAGE_LEN_SYMBOL)) == NULL) { + Die("unable to find length of in-core heap image\n"); + } + + inBuf.file = NULL; + inBuf.needsSwap = FALSE; + inBuf.base = vimg; + inBuf.buf = inBuf.base; + inBuf.nbytes = *(long *)vimglenptr; +#else + Die("in-core heap images not implemented\n"); +#endif + } + + READ(&inBuf, imHdr); + if (imHdr.byteOrder != ORDER) + Die ("incorrect byte order in heap image\n"); + if (imHdr.magic != IMAGE_MAGIC) + Die ("bad magic number (%#x) in heap image\n", imHdr.magic); + if ((imHdr.kind != EXPORT_HEAP_IMAGE) && (imHdr.kind != EXPORT_FN_IMAGE)) + Die ("bad image kind (%d) in heap image\n", imHdr.kind); + READ(&inBuf, heapHdr); + + /* check for command-line overrides of heap parameters. */ + if (params->allocSz == 0) params->allocSz = heapHdr.allocSzB; + if (params->numGens < heapHdr.numGens) params->numGens = heapHdr.numGens; + if (params->cacheGen < 0) params->cacheGen = heapHdr.cacheGen; + + msp = AllocMLState (FALSE, params); + + /* get the run-time pointers into the heap */ + *PTR_MLtoC(ml_val_t, PervStruct) = heapHdr.pervStruct; + RunTimeCompUnit = heapHdr.runTimeCompUnit; +#ifdef ASM_MATH + MathVec = heapHdr.mathVec; +#endif + + /* read the externals table */ + externs = HeapIO_ReadExterns (&inBuf); + + /* read and initialize the ML state info */ + READ(&inBuf, image); + if (imHdr.kind == EXPORT_HEAP_IMAGE) { + /* Load the live registers */ + ASSIGN(MLSignalHandler, image.sigHandler); + msp->ml_arg = image.stdArg; + msp->ml_cont = image.stdCont; + msp->ml_closure = image.stdClos; + msp->ml_pc = image.pc; + msp->ml_exnCont = image.exnCont; + msp->ml_varReg = image.varReg; + msp->ml_calleeSave[0] = image.calleeSave[0]; + msp->ml_calleeSave[1] = image.calleeSave[1]; + msp->ml_calleeSave[2] = image.calleeSave[2]; + /* read the ML heap */ + ReadHeap (&inBuf, &heapHdr, msp, externs); + /* GC message are on by default for interactive images */ + /* GCMessages = TRUE; */ + } + else { /* EXPORT_FN_IMAGE */ + ml_val_t funct, cmdName, args; + /* restore the signal handler */ + ASSIGN(MLSignalHandler, image.sigHandler); + /* read the ML heap */ + msp->ml_arg = image.stdArg; + ReadHeap (&inBuf, &heapHdr, msp, externs); + /* initialize the calling context (taken from ApplyMLFn) */ + funct = msp->ml_arg; + msp->ml_exnCont = PTR_CtoML(handle_v+1); + msp->ml_varReg = ML_unit; + msp->ml_cont = PTR_CtoML(return_c); + msp->ml_closure = funct; + msp->ml_pc = + msp->ml_linkReg = GET_CODE_ADDR(funct); + /* setup the arguments to the imported function */ + cmdName = ML_CString(msp, MLCmdName); + args = ML_CStringList (msp, CmdLineArgs); + REC_ALLOC2(msp, msp->ml_arg, cmdName, args); +/* +SayDebug("arg = %#x : [%#x, %#x]\n", msp->ml_arg, REC_SEL(msp->ml_arg, 0), REC_SEL(msp->ml_arg, 1)); +*/ + /* GC message are off by default for exportFn images */ + GCMessages = FALSE; + } + + FREE (externs); + if (inBuf.file != NULL) + fclose (inBuf.file); + + if (! SilentLoad) + Say(" done\n"); + + return msp; + +} /* end of ImportHeapImage */ + +/* ReadHeap: + */ +PVT void ReadHeap (inbuf_t *bp, ml_heap_hdr_t *hdr, ml_state_t *msp, ml_val_t *externs) +{ + heap_t *heap = msp->ml_heap; + heap_arena_hdr_t *arenaHdrs, *p, *q; + int arenaHdrsSize; + int i, j, k; + Addr_t prevSzB[NUM_ARENAS], sz; + bibop_t oldBIBOP; + Addr_t addrOffset[MAX_NUM_GENS][NUM_ARENAS]; + bo_region_reloc_t *boRelocInfo; + addr_tbl_t *boRegionTbl; + + /* Allocate a BIBOP for the imported heap image's address space. */ + oldBIBOP = InitBibop(); + + /* read in the big-object region descriptors for the old address space */ + { + size_t sz; + bo_region_info_t *boRgnHdr; + + boRegionTbl = MakeAddrTbl(BIBOP_PAGE_BITS+1, hdr->numBORegions); + sz = hdr->numBORegions * sizeof(bo_region_info_t); + boRgnHdr = NEW_VEC(bo_region_info_t, hdr->numBORegions); + HeapIO_ReadBlock (bp, boRgnHdr, sz); + +#ifdef VERBOSE + SayDebug ("Marking %d regions for imported big objects\n", hdr->numBORegions); +#endif + boRelocInfo = NEW_VEC(bo_region_reloc_t, hdr->numBORegions); + for (i = 0; i < hdr->numBORegions; i++) { + /* mark the big-object region as being in the `MAX_NUM_GENS` generation */ + MarkRegion(oldBIBOP, + (ml_val_t *)(boRgnHdr[i].baseAddr), + RND_MEMOBJ_SZB(boRgnHdr[i].sizeB), + AID_BIGOBJ(MAX_NUM_GENS)); + ADDR_TO_PAGEID(oldBIBOP,boRgnHdr[i].baseAddr) = AID_BIGOBJ_HDR(MAX_NUM_GENS); + /* set relocation info for the big-object region */ + boRelocInfo[i].firstPage = boRgnHdr[i].firstPage; + boRelocInfo[i].nPages = + (boRgnHdr[i].sizeB - (boRgnHdr[i].firstPage - boRgnHdr[i].baseAddr)) + >> BIGOBJ_PAGE_SHIFT; + boRelocInfo[i].objMap = NEW_VEC(bo_reloc_t *, boRelocInfo[i].nPages); + for (j = 0; j < boRelocInfo[i].nPages; j++) { + boRelocInfo[i].objMap[j] = NIL(bo_reloc_t *); + } + AddrTblInsert (boRegionTbl, boRgnHdr[i].baseAddr, &(boRelocInfo[i])); + } + FREE (boRgnHdr); + } + + /* read the arena headers. */ + arenaHdrsSize = hdr->numGens * NUM_OBJ_KINDS * sizeof(heap_arena_hdr_t); + arenaHdrs = (heap_arena_hdr_t *) MALLOC (arenaHdrsSize); + HeapIO_ReadBlock (bp, arenaHdrs, arenaHdrsSize); + + for (i = 0; i < NUM_ARENAS; i++) + prevSzB[i] = heap->allocSzB; + + /* allocate the arenas and read in the heap image. */ + for (p = arenaHdrs, i = 0; i < hdr->numGens; i++) { + gen_t *gen = heap->gen[i]; + + /* compute the space required for this generation, and mark the oldBIBOP + * to reflect the old address space. + */ + for (q = p, j = 0; j < NUM_ARENAS; j++) { + MarkRegion (oldBIBOP, + (ml_val_t *)(q->info.o.baseAddr), + RND_MEMOBJ_SZB(q->info.o.sizeB), + gen->arena[j]->id); + sz = q->info.o.sizeB + prevSzB[j]; + if ((j == PAIR_INDX) && (sz > 0)) + sz += 2*WORD_SZB; + gen->arena[j]->tospSizeB = RND_MEMOBJ_SZB(sz); + prevSzB[j] = q->info.o.sizeB; + q++; + } + + /* Allocate space for the generation */ + if (NewGeneration(gen) == FAILURE) + Die ("unable to allocated space for generation %d\n", i+1); + if (isACTIVE(gen->arena[ARRAY_INDX])) + NewDirtyVector (gen); + + /* read in the arenas for this generation and initialize the + * address offset table. + */ + for (j = 0; j < NUM_ARENAS; j++) { + arena_t *ap = gen->arena[j]; + + if (p->info.o.sizeB > 0) { + addrOffset[i][j] = (Addr_t)(ap->tospBase) - (Addr_t)(p->info.o.baseAddr); + HeapIO_Seek (bp, (off_t)(p->offset)); + HeapIO_ReadBlock(bp, (ap->tospBase), p->info.o.sizeB); + ap->nextw = (ml_val_t *)((Addr_t)(ap->tospBase) + p->info.o.sizeB); + ap->oldTop = ap->tospBase; + } + else if (isACTIVE(ap)) { + ap->oldTop = ap->tospBase; + } + if (! SilentLoad) { + Say("."); + } + p++; + } + /* read in the big-object arenas */ + for (j = 0; j < NUM_BIGOBJ_KINDS; j++) { + Addr_t totSizeB; + bigobj_desc_t *freeObj, *bdp; + bigobj_region_t *freeRegion; + bigobj_hdr_t *boHdrs; + int boHdrSizeB; + Addr_t indx; + bo_region_reloc_t *region; + + if (p->info.bo.numBOPages > 0) { + totSizeB = p->info.bo.numBOPages << BIGOBJ_PAGE_SHIFT; + freeObj = BO_AllocRegion (heap, totSizeB); + freeRegion = freeObj->region; + freeRegion->minGen = i; + MarkRegion (BIBOP, (ml_val_t *)freeRegion, + MEMOBJ_SZB(freeRegion->memObj), AID_BIGOBJ(i)); + ADDR_TO_PAGEID(BIBOP,freeRegion) = AID_BIGOBJ_HDR(i); + + /* read in the big-object headers */ + boHdrSizeB = p->info.bo.numBigObjs * sizeof(bigobj_hdr_t); + boHdrs = (bigobj_hdr_t *) MALLOC (boHdrSizeB); + HeapIO_ReadBlock (bp, boHdrs, boHdrSizeB); + + /* read in the big-objects */ + HeapIO_ReadBlock (bp, (void *)(freeObj->obj), totSizeB); + if (j == CODE_INDX) { + FlushICache ((void *)(freeObj->obj), totSizeB); + } + + /* setup the big-object descriptors and per-object relocation info */ + bdp = freeObj; + for (k = 0; k < p->info.bo.numBigObjs; k++) { + /* find the region relocation info for the object's region in + * the exported heap. + */ + for (indx = BIBOP_ADDR_TO_INDEX(boHdrs[k].baseAddr); + !BO_IS_HDR(INDEX_TO_PAGEID(oldBIBOP,indx)); + --indx) + { + continue; + } + region = LookupBORegion (boRegionTbl, indx); + ASSERT(region != NIL(bo_region_reloc_t *)); + /* allocate the big-object descriptor for the object, and + * link it into the list of big-objects for its generation. + */ + bdp = AllocBODesc (freeObj, &(boHdrs[k]), region); + bdp->next = gen->bigObjs[j]; + gen->bigObjs[j] = bdp; + ASSERT(bdp->gen == i+1); + + if (DumpObjectStrings && (j == CODE_INDX)) { + /* dump the comment string of the code object */ + char *namestring; + if ((namestring = (char *)BO_GetCodeObjTag(bdp)) != NIL(char *)) + SayDebug ("[%6d bytes] %s\n", bdp->sizeB, namestring); + } + } + + if (freeObj != bdp) { + /* there was some extra space left in the region */ + AddBODesc (heap->freeBigObjs, freeObj); + } + + FREE (boHdrs); + } + if (! SilentLoad) { + Say("."); + } + p++; + } + } + + RepairHeap (heap, oldBIBOP, addrOffset, boRegionTbl, externs); + +#ifdef CHECK_HEAP + SayDebug ("Checking imported heap...\n"); + CheckHeap (heap, hdr->numGens); +#endif + + /* Adjust the run-time globals that point into the heap */ + *PTR_MLtoC(ml_val_t, PervStruct) = RepairWord ( + *PTR_MLtoC(ml_val_t, PervStruct), + oldBIBOP, addrOffset, boRegionTbl, externs); + RunTimeCompUnit = RepairWord ( + RunTimeCompUnit, oldBIBOP, addrOffset, boRegionTbl, externs); +#ifdef ASM_MATH + MathVec = RepairWord (MathVec, oldBIBOP, addrOffset, boRegionTbl, externs); +#endif + + /* Adjust the ML registers to the new address space */ + ASSIGN(MLSignalHandler, RepairWord ( + DEREF(MLSignalHandler), oldBIBOP, addrOffset, boRegionTbl, externs)); + msp->ml_arg = RepairWord ( + msp->ml_arg, oldBIBOP, addrOffset, boRegionTbl, externs); + msp->ml_cont = RepairWord ( + msp->ml_cont, oldBIBOP, addrOffset, boRegionTbl, externs); + msp->ml_closure = RepairWord ( + msp->ml_closure, oldBIBOP, addrOffset, boRegionTbl, externs); + msp->ml_pc = RepairWord ( + msp->ml_pc, oldBIBOP, addrOffset, boRegionTbl, externs); + msp->ml_linkReg = RepairWord ( + msp->ml_linkReg, oldBIBOP, addrOffset, boRegionTbl, externs); + msp->ml_exnCont = RepairWord ( + msp->ml_exnCont, oldBIBOP, addrOffset, boRegionTbl, externs); + msp->ml_varReg = RepairWord ( + msp->ml_varReg, oldBIBOP, addrOffset, boRegionTbl, externs); + msp->ml_calleeSave[0] = RepairWord ( + msp->ml_calleeSave[0], oldBIBOP, addrOffset, boRegionTbl, externs); + msp->ml_calleeSave[1] = RepairWord ( + msp->ml_calleeSave[1], oldBIBOP, addrOffset, boRegionTbl, externs); + msp->ml_calleeSave[2] = RepairWord ( + msp->ml_calleeSave[2], oldBIBOP, addrOffset, boRegionTbl, externs); + + /* release storage */ + for (i = 0; i < hdr->numBORegions; i++) { + bo_reloc_t *p = NIL(bo_reloc_t *); + int nPages = boRelocInfo[i].nPages; + for (j = 0; j < nPages; j++) { + if ((boRelocInfo[i].objMap[j] != NIL(bo_reloc_t *)) + && (boRelocInfo[i].objMap[j] != p)) { + p = boRelocInfo[i].objMap[j]; + /* skip over all entries that map to `p` */ + while ((j < nPages) && (boRelocInfo[i].objMap[j] == p)) { + j++; + } + FREE (p); + } + } + } + FreeAddrTbl (boRegionTbl, FALSE); + FREE (boRelocInfo); + FREE (arenaHdrs); + FreeBibop (oldBIBOP); + + /* reset the sweep_nextw pointers */ + for (i = 0; i < heap->numGens; i++) { + gen_t *gen = heap->gen[i]; + for (j = 0; j < NUM_ARENAS; j++) { + arena_t *ap = gen->arena[j]; + if (isACTIVE(ap)) + ap->sweep_nextw = ap->nextw; + } + } + +} /* end of ReadHeap. */ + +/* AllocBODesc: + * + */ +PVT bigobj_desc_t *AllocBODesc ( + bigobj_desc_t *free, + bigobj_hdr_t *objHdr, + bo_region_reloc_t *oldRegion) +{ + bigobj_region_t *region; + bigobj_desc_t *newObj; + bo_reloc_t *relocInfo; + int i, totSzB, firstPage, npages; + + totSzB = ROUNDUP(objHdr->sizeB, BIGOBJ_PAGE_SZB); + npages = (totSzB >> BIGOBJ_PAGE_SHIFT); + region = free->region; + if (free->sizeB == totSzB) { + /* allocate the whole free area to the object */ + newObj = free; + } + else { + /* split the free object */ + newObj = NEW_OBJ(bigobj_desc_t); + newObj->obj = free->obj; + newObj->region = region; + free->obj = (Addr_t)(free->obj) + totSzB; + free->sizeB -= totSzB; + /* update region's big-object mapping for the new object */ + firstPage = ADDR_TO_BOPAGE(region, newObj->obj); + ASSERT(firstPage + npages <= region->nPages); + for (i = 0; i < npages; i++) { + region->objMap[firstPage+i] = newObj; + } + } + + newObj->sizeB = objHdr->sizeB; + newObj->state = BO_YOUNG; + newObj->gen = objHdr->gen; + newObj->objc = objHdr->objKind; + region->nFree -= npages; + + /* setup the relocation info */ + relocInfo = NEW_OBJ(bo_reloc_t); + relocInfo->oldAddr = objHdr->baseAddr; + relocInfo->newObj = newObj; + firstPage = ADDR_TO_BOPAGE(oldRegion, objHdr->baseAddr); + ASSERT(firstPage + npages <= oldRegion->nPages); + for (i = 0; i < npages; i++) { + oldRegion->objMap[firstPage+i] = relocInfo; + } + + return newObj; + +} /* end of AllocBODesc */ + +/* RepairHeap: + * + * Scan the heap, replacing external references with their addresses and + * adjusting pointers. + */ +PVT void RepairHeap ( + heap_t *heap, + bibop_t oldBIBOP, + Addr_t addrOffset[MAX_NUM_GENS][NUM_ARENAS], + addr_tbl_t *boRegionTbl, + ml_val_t *externs) +{ + int i; + + for (i = 0; i < heap->numGens; i++) { + gen_t *gen = heap->gen[i]; +#ifndef BIT_CARDS +#define MARK(cm, p, g) MARK_CARD(cm, p, g) +#else +#define MARK(cm, p, g) MARK_CARD(cm, p) +#endif +#define RepairArena(indx) { \ + arena_t *__ap = gen->arena[(indx)]; \ + ml_val_t *__p, *__q; \ + __p = __ap->tospBase; \ + __q = __ap->nextw; \ + while (__p < __q) { \ + ml_val_t __w = *__p; \ + int __gg, __objc; \ + if (isBOXED(__w)) { \ + Addr_t __obj = PTR_MLtoADDR(__w); \ + aid_t __aid = ADDR_TO_PAGEID(oldBIBOP, __obj); \ + if (IS_BIGOBJ_AID(__aid)) { \ + bo_reloc_t *__dp; \ + __dp = AddrToRelocInfo (oldBIBOP, boRegionTbl, \ + __aid, __obj); \ + *__p = PTR_CtoML((__obj - __dp->oldAddr) \ + + __dp->newObj->obj); \ + __gg = __dp->newObj->gen-1; \ + } \ + else { \ + __gg = EXTRACT_GEN(__aid)-1; \ + __objc = EXTRACT_OBJC(__aid)-1; \ + *__p = PTR_CtoML(__obj + addrOffset[__gg][__objc]); \ + } \ + if (((indx) == ARRAY_INDX) && (__gg < i)) { \ + MARK(gen->dirty, __p, __gg+1); /** **/ \ + } \ + } \ + else if (isEXTERNTAG(__w)) { \ + *__p = externs[EXTERNID(__w)]; \ + } \ + __p++; \ + } \ + } /* RepairArena */ + + RepairArena(RECORD_INDX); + RepairArena(PAIR_INDX); + RepairArena(ARRAY_INDX); + } + +} /* end of RepairHeap */ + +/* RepairWord: + */ +PVT ml_val_t RepairWord ( + ml_val_t w, + bibop_t oldBIBOP, + Addr_t addrOffset[MAX_NUM_GENS][NUM_ARENAS], + addr_tbl_t *boRegionTbl, + ml_val_t *externs) +{ + if (isBOXED(w)) { + Addr_t obj = PTR_MLtoADDR(w); + aid_t aid = ADDR_TO_PAGEID(oldBIBOP, obj); + if (IS_BIGOBJ_AID(aid)) { + bo_reloc_t *dp; + dp = AddrToRelocInfo (oldBIBOP, boRegionTbl, aid, obj); + return PTR_CtoML((obj - dp->oldAddr) + dp->newObj->obj); + } + else { + int g = EXTRACT_GEN(aid)-1; + int objc = EXTRACT_OBJC(aid)-1; + return PTR_CtoML(PTR_MLtoC(char, w) + addrOffset[g][objc]); + } + } + else if (isEXTERNTAG(w)) { + return externs[EXTERNID(w)]; + } + else + return w; + +} /* end of RepairWord */ + + +/* AddrToRelocInfo: + */ +PVT bo_reloc_t *AddrToRelocInfo ( + bibop_t oldBIBOP, + addr_tbl_t *boRegionTbl, + aid_t id, + Addr_t oldObj) +{ + Addr_t indx; + bo_region_reloc_t *region; + + indx = BIBOP_ADDR_TO_INDEX(oldObj); + while (!BO_IS_HDR(id)) { + --indx; + id = INDEX_TO_PAGEID(oldBIBOP,indx); + } + + /* find the old region descriptor */ + region = LookupBORegion (boRegionTbl, indx); + + if (region == NIL(bo_region_reloc_t *)) + Die ("unable to map big-object @ %#x; index = %#x, id = %#x\n", + oldObj, indx, (unsigned)id); + + return ADDR_TO_BODESC(region, oldObj); + +} /* end of AddrToRelocInfo */ diff --git a/base/runtime/gc/init-gc.c b/base/runtime/gc/init-gc.c new file mode 100644 index 0000000..63a422b --- /dev/null +++ b/base/runtime/gc/init-gc.c @@ -0,0 +1,341 @@ +/*! \file init-gc.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * The GC initialization code. + */ + +#ifdef PAUSE_STATS /* GC pause statistics are UNIX dependent */ +# include "ml-unixdep.h" +#endif + +#include +#include +#include "ml-base.h" +#include "ml-options.h" +#include "ml-limits.h" +#include "memory.h" +#include "ml-state.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "cntr.h" +#include "heap.h" +#include "heap-monitor.h" +#include "ml-globals.h" +#include "ml-timer.h" +#include "gc-stats.h" +#include "ml-mp.h" + +PVT int DfltRatios[MAX_NUM_GENS] = { + DFLT_RATIO1, DFLT_RATIO2, DFLT_RATIO, DFLT_RATIO, + DFLT_RATIO, DFLT_RATIO, DFLT_RATIO + }; + +bibop_t BIBOP; +#ifdef SIZE_64 +l2_bibop_t UnmappedL2; +#endif + +#ifdef COLLECT_STATS /** should this go into gc-stats.c ??? **/ +bool_t StatsOn = TRUE; /* if TRUE, then generate stats */ +int StatsFD = -1; /* the file descriptor to write the data to */ +stat_rec_t StatsBuf[STATS_BUF_SZ]; /* buffer of data */ +int NStatsRecs; /* the number of records in the buffer */ +#endif + + +/* ParseHeapParams: + * + * Parse and heap parameters from the command-line arguments. + */ +heap_params_t *ParseHeapParams (char **argv) +{ + char option[MAX_OPT_LEN], *optionArg; + bool_t errFlg = FALSE; + char *arg; + heap_params_t *params; + + if ((params = NEW_OBJ(heap_params_t)) == NIL(heap_params_t *)) { + Die("unable to allocate heap_params"); + } + + /* We use 0 or "-1" to signify that the default value should be used. */ + params->allocSz = 0; + params->numGens = -1; + params->cacheGen = -1; + +#define MATCH(opt) (strcmp(opt, option) == 0) +#define CHECK(opt) { \ + if (optionArg[0] == '\0') { \ + errFlg = TRUE; \ + Error("missing argument for \"%s\" option\n", opt); \ + continue; \ + } \ + } /* CHECK */ + + while ((arg = *argv++) != NIL(char *)) { + if (isRuntimeOption(arg, option, &optionArg)) { + if (MATCH("alloc")) { /* set allocation size */ + int allocSz = 0; + CHECK("alloc"); + allocSz = GetSzOption(ONE_K, optionArg); + if (allocSz < 0) { + errFlg = TRUE; + Error ("bad argument for \"@SMLalloc\" option\n"); + } + if (allocSz < MIN_ALLOC_SZB) { + Error ("argument for \"@SMLalloc\" option too small; using %dk\n", + MIN_ALLOC_SZB/ONE_K); + params->allocSz = MIN_ALLOC_SZB; + } + else + params->allocSz = allocSz; + } + else if (MATCH("ngens")) { + CHECK("ngens"); + params->numGens = atoi(optionArg); + if (params->numGens < 1) + params->numGens = 1; + else if (params->numGens > MAX_NGENS) + params->numGens = MAX_NGENS; + } + else if (MATCH("vmcache")) { + CHECK("vmcache"); + params->cacheGen = atoi(optionArg); + if (params->cacheGen < 0) + params->cacheGen = 0; + else if (params->cacheGen > MAX_NGENS) + params->cacheGen = MAX_NGENS; + } + else if (MATCH("unlimited-heap")) + UnlimitedHeap = TRUE; + } + if (errFlg) + return NIL(heap_params_t *); + } /* while */ + + return params; + +} /* end of ParseHeapParams */ + +/* InitBibop: + * + * Initialize the big-bag-of-pages map. + */ +bibop_t InitBibop () +{ + bibop_t bibop; + size_t bibopSz; + int i; + +#ifdef SIZE_64 + /* initialize the level-2 table for unmapped regions */ + for (i = 0; i < BIBOP_L2_SZ; i++) { + UnmappedL2.tbl[i] = AID_UNMAPPED; + } + UnmappedL2.numMapped = 0; + + bibopSz = BIBOP_L1_SZ * sizeof(l2_bibop_t *); +#else + bibopSz = BIBOP_SZ * sizeof(aid_t); +#endif + + if ((bibop = MALLOC(bibopSz)) == NIL(bibop_t)) { + Die("InitBibop: unable to allocate Bibop"); + } + +#ifdef SIZE_64 +#ifdef VERBOSE + SayDebug("InitBibop: UnmappedL2 = %p\n", &UnmappedL2); +#endif + for (i = 0; i < BIBOP_L1_SZ; i++) { + bibop[i] = &UnmappedL2; + } +#else + for (i = 0; i < BIBOP_SZ; i++) { + bibop[i] = AID_UNMAPPED; + } +#endif + + return bibop; +} + +/* FreeBibop: + * + * deallocate memory for a Bibop. + */ +void FreeBibop (bibop_t bibop) +{ +#ifdef SIZE_64 + int i; + for (i = 0; i < BIBOP_L1_SZ; i++) { + if (bibop[i] != &UnmappedL2) { + FREE(bibop[i]); + } + } +#endif + + FREE(bibop); + +} + +/* InitHeap: + * + * Create and initialize the heap. + */ +void InitHeap (ml_state_t *msp, bool_t isBoot, heap_params_t *params) +{ + int i, j, ratio, max_sz; + heap_t *heap; + gen_t *gen; + mem_obj_t *baseObj; + ml_val_t *allocBase; + + if (params->allocSz == 0) params->allocSz = DFLT_ALLOC; + if (params->numGens < 0) params->numGens = DFLT_NGENS; + if (params->cacheGen < 0) params->cacheGen = DFLT_CACHE_GEN; + + /* First we initialize the underlying memory system */ + MEM_InitMemory (); + + /* allocate the base memory object that holds the allocation space */ + { + baseObj = MEM_AllocMemObj (MAX_NUM_PROCS*params->allocSz); + if (baseObj == NIL(mem_obj_t *)) { + Die ("unable to allocate memory object for allocation spaces"); + } + allocBase = (ml_val_t *)MEMOBJ_BASE(baseObj); + } + + /* initialize the BIBOP */ + BIBOP = InitBibop(); + + /* initialize heap descriptor */ + heap = NEW_OBJ(heap_t); + memset ((char *)heap, 0, sizeof(heap_t)); + for (i = 0; i < MAX_NUM_GENS; i++) { + ratio = DfltRatios[i]; + if (i == 0) + max_sz = MAX_SZ1(params->allocSz * MAX_NUM_PROCS); + else { + max_sz = (5*max_sz)/2; + if (max_sz > 64*ONE_MEG) max_sz = 64*ONE_MEG; + } + gen = + heap->gen[i] = NEW_OBJ(gen_t); + gen->heap = heap; + gen->genNum = i+1; + gen->numGCs = 0; + gen->lastPrevGC = 0; + gen->ratio = ratio; + gen->toObj = NIL(mem_obj_t *); + gen->fromObj = NIL(mem_obj_t *); + gen->cacheObj = NIL(mem_obj_t *); + gen->dirty = NIL(card_map_t *); + for (j = 0; j < NUM_ARENAS; j++) { + gen->arena[j] = NEW_OBJ(arena_t); + gen->arena[j]->tospSizeB = 0; + gen->arena[j]->reqSizeB = 0; + gen->arena[j]->maxSizeB = max_sz; + gen->arena[j]->id = MAKE_AID(i+1, j+1, 0); + } + for (j = 0; j < NUM_BIGOBJ_KINDS; j++) { + gen->bigObjs[j] = NIL(bigobj_desc_t *); + } + } + for (i = 0; i < params->numGens; i++) { + int k = (i == params->numGens-1) ? i : i+1; + for (j = 0; j < NUM_ARENAS; j++) { + heap->gen[i]->arena[j]->nextGen = heap->gen[k]->arena[j]; + } + } + heap->numGens = params->numGens; + heap->cacheGen = params->cacheGen; + heap->numMinorGCs = 0; + heap->numBORegions = 0; + heap->bigRegions = NIL(bigobj_region_t *); + heap->freeBigObjs = NEW_OBJ(bigobj_desc_t); + heap->freeBigObjs->obj = (Addr_t)0; + heap->freeBigObjs->sizeB = 0; + heap->freeBigObjs->state = BO_FREE; + heap->freeBigObjs->prev = heap->freeBigObjs; + heap->freeBigObjs->next = heap->freeBigObjs; + heap->weakList = NIL(ml_val_t *); +#ifdef VERBOSE + SayDebug("Free Big Objects list header = %p\n", heap->freeBigObjs); +#endif + + /* initialize new space */ + heap->baseObj = baseObj; + heap->allocBase = allocBase; + heap->allocSzB = MAX_NUM_PROCS*params->allocSz; + MarkRegion (BIBOP, (ml_val_t *)MEMOBJ_BASE(baseObj), MEMOBJ_SZB(heap->baseObj), AID_NEW); +#ifdef VERBOSE + SayDebug ("NewSpace = [%p, %p:%p), %d bytes\n", + heap->allocBase, HEAP_LIMIT(heap), + (void *)((Addr_t)heap->allocBase+params->allocSz), params->allocSz); +#endif + +#ifdef GC_STATS + ClearGCStats (heap); +#endif +#if defined(COLLECT_STATS) + if (StatsFD > 0) { + stat_hdr_t hdr; + CNTR_ZERO(&(heap->numAlloc)); + hdr.mask = STATMASK_ALLOC|STATMASK_NGENS|STATMASK_START|STATMASK_STOP; + hdr.isNewRuntime = 1; + hdr.allocSzB = params->allocSz; + hdr.numGens = params->numGens; + gettimeofday (&(hdr.startTime), NIL(struct timezone *)); + write (StatsFD, (char *)&hdr, sizeof(stat_hdr_t)); + } +#endif + + if (isBoot) { + /* Create the first generation's to-space. */ + for (i = 0; i < NUM_ARENAS; i++) { + heap->gen[0]->arena[i]->tospSizeB = RND_MEMOBJ_SZB(2 * heap->allocSzB); + } + if (NewGeneration(heap->gen[0]) == FAILURE) { + Die ("unable to allocate initial first generation space\n"); + } + for (i = 0; i < NUM_ARENAS; i++) { + heap->gen[0]->arena[i]->oldTop = heap->gen[0]->arena[i]->tospBase; + } + } + + /* initialize the GC related parts of the ML state */ + msp->ml_heap = heap; + msp->ml_allocPtr = (ml_val_t *)(msp->ml_allocArena); +#ifdef SOFT_POLL + ResetPollLimit (msp); +#else + msp->ml_limitPtr = HEAP_LIMIT(heap); +#endif + +#ifdef CHECK_HEAP + CheckBIBOP (heap); + SayDebug("****** GC initialization done ******\n"); +#endif /* CHECK_HEAP */ + +} /* end of InitHeap */ + + +#ifdef GC_STATS +/* ClearGCStats: + */ +void ClearGCStats (heap_t *heap) +{ + int i, j; + + CNTR_ZERO(&(heap->numAlloc)); + for (i = 0; i < MAX_NUM_GENS; i++) { + for (j = 0; j < NUM_ARENAS; j++) { + CNTR_ZERO(&(heap->numCopied[i][j])); + } + } + +} /* end of ClearStats */ +#endif diff --git a/base/runtime/gc/major-gc.c b/base/runtime/gc/major-gc.c new file mode 100644 index 0000000..beafacb --- /dev/null +++ b/base/runtime/gc/major-gc.c @@ -0,0 +1,1040 @@ +/*! \file major-gc.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This is the regular garbage collector (for collecting the + * generations). + */ + +#ifdef PAUSE_STATS /* GC pause statistics are UNIX dependent */ +# include "ml-unixdep.h" +#endif + +#include "ml-base.h" +#include "ml-limits.h" +#include "ml-state.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "memory.h" +#include "card-map.h" +#include "heap.h" +#include "tags.h" +#include "copy-loop.h" +#include "heap-monitor.h" +#include "ml-timer.h" +#include "gc-stats.h" + +#ifdef GC_STATS +long lastMinorGC = 0; +long numUpdates = 0; +long numBytesAlloc = 0; +long numBytesCopied = 0; +#endif + +#ifdef BO_REF_STATS +PVT long numBO1, numBO2, numBO3; +#define IFBO_COUNT1(aid) {if (IS_BIGOBJ_AID(aid)) numBO1++;} +#define BO2_COUNT (numBO2)++ +#define BO3_COUNT (numBO3)++ +#else +#define IFBO_COUNT1(aid) {} +#define BO2_COUNT {} +#define BO3_COUNT {} +#endif + +#ifdef COUNT_CARDS +#ifndef BIT_CARDS +PVT unsigned long cardCnt1[MAX_NUM_GENS], cardCnt2[MAX_NUM_GENS]; +#define COUNT_CARD1(i) (cardCnt1[i]++) +#define COUNT_CARD2(i) (cardCnt2[i]++) +#else +PVT unsigned long cardCnt[MAX_NUM_GENS]; +#define COUNT_CARD(i) (cardCnt[i]++) +#endif +#else +#define COUNT_CARD(i) {} +#define COUNT_CARD1(i) {} +#define COUNT_CARD2(i) {} +#endif + + +/** DEBUG **/ +#ifdef BO_DEBUG +PVT void ScanMem (Word_t *start, Word_t *stop, int gen, int objKind) +{ + bibop_t bibop = BIBOP; + Word_t w; + aid_t aid; + bigobj_region_t *region; + bigobj_desc_t *dp; + + while (start < stop) { + w = *start; + if (isBOXED(w)) { + Addr_t indx = BIBOP_ADDR_TO_INDEX(w); + aid_t id = INDEX_TO_PAGEID(bibop,indx); + switch (EXTRACT_OBJC(id)) { + case OBJC_bigobj: + while (!BO_IS_HDR(id)) { + indx--; + id = INDEX_TO_PAGEID(bibop,indx); + } + region = (bigobj_region_t *)BIBOP_INDEX_TO_ADDR(indx); + dp = ADDR_TO_BODESC(region, w); + if (dp->state == BO_FREE) { + SayDebug ("** [%d/%d]: %p --> %p; unexpected free big-object\n", + gen, objKind, start, w); + } + break; + case OBJC_record: + case OBJC_pair: + case OBJC_string: + case OBJC_array: + break; + default: + if (id != AID_UNMAPPED) + SayDebug ("** [%d/%d]: %p --> %p; strange object class %d\n", + gen, objKind, start, w, EXTRACT_OBJC(id)); + break; + } + } + start++; + } +} +#endif /** BO_DEBUG **/ + +/* local routines */ +PVT void MajorGC_ScanRoots ( + ml_state_t *msp, heap_t *heap, ml_val_t **roots, int maxCollectedGen); +PVT void MajorGC_SweepToSpace (heap_t *heap, int maxCollectedGen, int maxSweptGen); +PVT bool_t MajorGC_SweepToSpArrays ( + heap_t *heap, int maxGen, arena_t *tosp, card_map_t *cm); +PVT ml_val_t MajorGC_ForwardObj ( + heap_t *heap, aid_t maxAid, ml_val_t obj, aid_t id); +PVT bigobj_desc_t *MajorGC_ForwardBigObj ( + heap_t *heap, int maxGen, ml_val_t obj, aid_t id); +PVT ml_val_t MajorGC_FwdSpecial ( + heap_t *heap, aid_t maxAid, ml_val_t *obj, aid_t id, ml_val_t desc); +PVT void TrimHeap (heap_t *heap, int maxCollectedGen); + +/* the symbolic names of the arenas */ +char *ArenaName[NUM_ARENAS+1] = { + "new", "record", "pair", "string", "array" + }; + +/* Check a word for a from-space reference */ +#ifdef TOSPACE_ID +#define NO_GC_INLINE /* DEBUG */ +#endif +#ifndef NO_GC_INLINE +#define MajorGC_CheckWord(heap,bibop,maxAid,p) { \ + ml_val_t __w = *(p); \ + if (isBOXED(__w)) { \ + aid_t __aid = ADDR_TO_PAGEID(bibop, __w); \ +IFBO_COUNT1(__aid); \ + if (IS_FROM_SPACE(__aid,maxAid)) { \ + *(p) = MajorGC_ForwardObj(heap, maxAid, __w, __aid); \ + } \ + } \ + } +#else +PVT void MajorGC_CheckWord (heap_t *heap, bibop_t bibop, aid_t maxAid, ml_val_t *p) +{ + ml_val_t w = *(p); + if (isBOXED(w)) { + aid_t arena_id = ADDR_TO_PAGEID(bibop, w); +IFBO_COUNT1(arena_id); \ + if (IS_FROM_SPACE(arena_id, maxAid)) { + *(p) = MajorGC_ForwardObj(heap, maxAid, w, arena_id); + } +#ifdef TOSPACE_ID + else if (IS_TOSPACE_AID(arena_id)) { + Die ("CheckWord: TOSPACE reference: %p (%p) --> %p\n", + p, ADDR_TO_PAGEID(bibop, p), w); + } +#endif + } +} +#endif + + +/* MajorGC: + * + * Do a garbage collection of (at least) the first level generations. + * By definition, level should be at least 1. + */ +void MajorGC (ml_state_t *msp, ml_val_t **roots, int level) +{ + heap_t *heap = msp->ml_heap; + bibop_t bibop = BIBOP; + int i, j; + int maxCollectedGen; /* the oldest generation being collected */ + int maxSweptGen; +#ifdef GC_STATS + ml_val_t *tospTop[NUM_ARENAS]; /* for counting # of bytes forwarded */ +#endif + +#ifndef PAUSE_STATS /* don't do timing when collecting pause data */ + StartGCTimer(msp->ml_vproc); +#endif +#ifdef BO_REF_STATS +numBO1 = numBO2 = numBO3 = 0; +#endif + + /* Flip to-space and from-space */ + maxCollectedGen = Flip (heap, level); + if (maxCollectedGen < heap->numGens) { + maxSweptGen = maxCollectedGen+1; +#ifdef GC_STATS + /* Remember the top of to-space for maxSweptGen */ + for (i = 0; i < NUM_ARENAS; i++) + tospTop[i] = heap->gen[maxSweptGen-1]->arena[i]->nextw; +#endif /* GC_STATS */ + } + else { + maxSweptGen = maxCollectedGen; + } + NUM_GC_GENS(maxCollectedGen); /* record pause info */ + +#ifdef VM_STATS + ReportVM (msp, maxCollectedGen); +#endif + +#ifndef PAUSE_STATS /* don't do messages when collecting pause data */ + if (GCMessages) { + SayDebug ("GC #"); + for (i = heap->numGens-1; i >= 0; i--) { + SayDebug ("%d.", heap->gen[i]->numGCs); + } + SayDebug ("%d: ", heap->numMinorGCs); + } +#endif + + HeapMon_StartGC (heap, maxCollectedGen); + + /* Scan the roots */ + MajorGC_ScanRoots (msp, heap, roots, maxCollectedGen); + + /* Sweep to-space */ + MajorGC_SweepToSpace (heap, maxCollectedGen, maxSweptGen); + + /* Handle weak pointers */ + if (heap->weakList != NIL(ml_val_t *)) + ScanWeakPtrs (heap); + + /* reclaim from-space; we do this from oldest to youngest so that + * we can promote big objects. + */ + for (i = maxCollectedGen; i > 0; i--) { + gen_t *gen = heap->gen[i-1], *promoteGen; + int forwardState, promoteState; + + FreeGeneration (heap, i-1); +#ifdef TOSPACE_ID + for (j = 0; j < NUM_ARENAS; j++) { + arena_t *ap = gen->arena[j]; + if (isACTIVE(ap)) + MarkRegion (bibop, ap->tospBase, ap->tospSizeB, ap->id); + } +#endif + /* NOTE: there should never be any big-objects in the oldest generation + * with the BO_PROMOTE tag. + */ + if (i != heap->numGens) { + promoteGen = heap->gen[i]; + forwardState = BO_OLD; + /* the objects promoted from generation i to generation i+1, when + * generation i+1 is also being collected, are "OLD", thus we need + * to mark the corresponding big objects as old so that they do not + * get out of sync. Since the oldest generation has only YOUNG + * objects, we have to check for that case too. + */ + if ((i == maxCollectedGen) || (i == heap->numGens-1)) + promoteState = BO_YOUNG; + else + promoteState = BO_OLD; + } + else { + /* oldest generation objects are promoted to the same generation */ + promoteGen = heap->gen[i-1]; + forwardState = BO_YOUNG; /* oldest gen has only YOUNG objects */ + promoteState = BO_YOUNG; + } + for (j = 0; j < NUM_BIGOBJ_KINDS; j++) { + bigobj_desc_t *dp, *dq, *forward, *promote; + promote = promoteGen->bigObjs[j]; + forward = NIL(bigobj_desc_t *); + for (dp = gen->bigObjs[j]; dp != NIL(bigobj_desc_t *); ) { + dq = dp->next; + ASSERT(dp->gen == i); + switch (dp->state) { + case BO_YOUNG: + case BO_OLD: + BO_Free (heap, dp); + break; + case BO_FORWARD: + dp->state = forwardState; + dp->next = forward; + forward = dp; + break; + case BO_PROMOTE: + dp->state = promoteState; + dp->next = promote; + dp->gen++; + promote = dp; + break; + default: + Die ("strange bigobject state %d @ %p in generation %d\n", + dp->state, dp, i); + } /* end switch */ + dp = dq; + } + promoteGen->bigObjs[j] = promote; /* a nop for the oldest generation */ + gen->bigObjs[j] = forward; + } + } +#ifdef BO_DEBUG +/** DEBUG **/ +for (i = 0; i < heap->numGens; i++) { +gen_t *gen = heap->gen[i]; +ScanMem((Word_t *)(gen->arena[RECORD_INDX]->tospBase), (Word_t *)(gen->arena[RECORD_INDX]->nextw), i+1, RECORD_INDX); +ScanMem((Word_t *)(gen->arena[PAIR_INDX]->tospBase), (Word_t *)(gen->arena[PAIR_INDX]->nextw), i+1, PAIR_INDX); +ScanMem((Word_t *)(gen->arena[ARRAY_INDX]->tospBase), (Word_t *)(gen->arena[ARRAY_INDX]->nextw), i+1, ARRAY_INDX); +} +/** DEBUG **/ +#endif + + /* relabel BIBOP entries for big-object regions to reflect promotions */ + { + bigobj_region_t *rp; + bigobj_desc_t *dp; + int min; + + for (rp = heap->bigRegions; rp != NIL(bigobj_region_t *); rp = rp->next) { + /* if the minimum generation of the region is less than or equal + * to maxCollectedGen, then it is possible that it has increased + * as a result of promotions or freeing of objects. + */ + if (rp->minGen <= maxCollectedGen) { + min = MAX_NUM_GENS; + for (i = 0; i < rp->nPages; ) { + dp = rp->objMap[i]; + if ((! BO_IS_FREE(dp)) && (dp->gen < min)) + min = dp->gen; + i += BO_NUM_BOPAGES(dp); + } + if (rp->minGen != min) { + rp->minGen = min; + MarkRegion (bibop, (ml_val_t *)rp, MEMOBJ_SZB(rp->memObj), + AID_BIGOBJ(min)); + BIBOP_UPDATE(bibop, BIBOP_ADDR_TO_INDEX(rp), AID_BIGOBJ_HDR(min)); + } + } + } /* end for */ + } + + /* remember the top of to-space in the collected generations */ + for (i = 0; i < maxCollectedGen; i++) { + gen_t *g = heap->gen[i]; + if (i == heap->numGens-1) { + /* the oldest generation has only "young" objects */ + for (j = 0; j < NUM_ARENAS; j++) { + if (isACTIVE(g->arena[j])) + g->arena[j]->oldTop = g->arena[j]->tospBase; + else + g->arena[j]->oldTop = NIL(ml_val_t *); + } + } + else { + for (j = 0; j < NUM_ARENAS; j++) { + if (isACTIVE(g->arena[j])) + g->arena[j]->oldTop = g->arena[j]->nextw; + else + g->arena[j]->oldTop = NIL(ml_val_t *); + } + } + } + + HeapMon_UpdateHeap (heap, maxSweptGen); + +#ifdef GC_STATS + /* Count the number of forwarded bytes */ + if (maxSweptGen != maxCollectedGen) { + gen_t *gen = heap->gen[maxSweptGen-1]; + for (j = 0; j < NUM_ARENAS; j++) { + CNTR_INCR(&(heap->numCopied[maxSweptGen-1][j]), + gen->arena[j]->nextw - tospTop[j]); + } + } + for (i = 0; i < maxCollectedGen; i++) { + for (j = 0; j < NUM_ARENAS; j++) { + arena_t *ap = heap->gen[i]->arena[j]; + if (isACTIVE(ap)) { + CNTR_INCR(&(heap->numCopied[i][j]), ap->nextw - tospTop[j]); + } + } + } +#endif + +#ifdef BO_REF_STATS +SayDebug ("bigobj stats: %d seen, %d lookups, %d forwarded\n", +numBO1, numBO2, numBO3); +#endif +#ifndef PAUSE_STATS /* don't do timing when collecting pause data */ + if (GCMessages) { + long gcTime; + StopGCTimer (msp->ml_vproc, &gcTime); + SayDebug (" (%d ms)\n", gcTime); + } + else + StopGCTimer (msp->ml_vproc, NIL(long *)); +#endif + +#ifdef VM_STATS + ReportVM (msp, 0); +#endif + +#ifdef CHECK_HEAP + CheckHeap(heap, maxSweptGen); +#endif + + if (! UnlimitedHeap) + TrimHeap (heap, maxCollectedGen); + +} /* end of MajorGC. */ + + +/* MajorGC_ScanRoots: + */ +PVT void MajorGC_ScanRoots ( + ml_state_t *msp, + heap_t *heap, + ml_val_t **roots, + int maxCollectedGen) +{ + bibop_t bibop = BIBOP; + aid_t maxAid = MAKE_MAX_AID(maxCollectedGen); + ml_val_t *rp; + int i; + + while ((rp = *roots++) != NIL(ml_val_t *)) { + MajorGC_CheckWord(heap, bibop, maxAid, rp); + } + + /* Scan the dirty cards in the older generations */ + for (i = maxCollectedGen; i < heap->numGens; i++) { + gen_t *gen = heap->gen[i]; +#ifdef COUNT_CARDS +#ifndef BIT_CARDS +/*CARD*/cardCnt1[i]=cardCnt2[i]=0; +#else +/*CARD*/cardCnt[i]=0; +#endif +#endif + if (isACTIVE(gen->arena[ARRAY_INDX])) { + card_map_t *cm = gen->dirty; + if (cm != NIL(card_map_t *)) { + ml_val_t *maxSweep = gen->arena[ARRAY_INDX]->sweep_nextw; + int card; +#if (!defined(BIT_CARDS) && defined(TOSPACE_ID)) + FOR_DIRTY_CARD (cm, maxCollectedGen, card, { + ml_val_t *p = (cm->baseAddr + (card*CARD_SZW)); + ml_val_t *q = p + CARD_SZW; + int mark = i+1; +COUNT_CARD1(i); + if (q > maxSweep) + /* don't sweep above the allocation high-water mark */ + q = maxSweep; + for (; p < q; p++) { + ml_val_t w = *p; + if (isBOXED(w)) { + aid_t aid = ADDR_TO_PAGEID(bibop, w); + int targetGen; +IFBO_COUNT1(aid); + if (IS_FROM_SPACE(aid, maxAid)) { + /* this is a from-space object */ + if (IS_BIGOBJ_AID(aid)) { + bigobj_desc_t *dp; + dp = MajorGC_ForwardBigObj ( + heap, maxCollectedGen, w, aid); + targetGen = dp->gen; + } + else { + *p = + w = MajorGC_ForwardObj(heap, maxAid, w, aid); + aid = ADDR_TO_PAGEID(bibop, w); + if (IS_TOSPACE_AID(aid)) + targetGen = TOSPACE_GEN(aid); + else + targetGen = EXTRACT_GEN(aid); + } + if (targetGen < mark) + mark = targetGen; + } + } + } /* end of for */ + /* re-mark the card */ + ASSERT(cm->map[card] <= mark); + if (mark <= i) + cm->map[card] = mark; + else if (i == maxCollectedGen) + cm->map[card] = CARD_CLEAN; + }); +#elif (!defined(BIT_CARDS)) + FOR_DIRTY_CARD (cm, maxCollectedGen, card, { + ml_val_t *p = (cm->baseAddr + (card*CARD_SZW)); + ml_val_t *q = p + CARD_SZW; + int mark = i+1; +COUNT_CARD1(i); + if (q > maxSweep) + /* don't sweep above the allocation high-water mark */ + q = maxSweep; + for (; p < q; p++) { + ml_val_t w = *p; + if (isBOXED(w)) { + aid_t aid = ADDR_TO_PAGEID(bibop, w); + int targetGen; +IFBO_COUNT1(aid); + if (IS_FROM_SPACE(aid, maxAid)) { + /* this is a from-space object */ +COUNT_CARD2(i); + if (IS_BIGOBJ_AID(aid)) { + bigobj_desc_t *dp; + dp = MajorGC_ForwardBigObj ( + heap, maxCollectedGen, w, aid); + targetGen = dp->gen; + } + else { + *p = + w = MajorGC_ForwardObj(heap, maxAid, w, aid); + targetGen = EXTRACT_GEN(ADDR_TO_PAGEID(bibop, w)); + } + if (targetGen < mark) + mark = targetGen; + } + } + } /* end of for */ + /* re-mark the card */ + ASSERT(cm->map[card] <= mark); + if (mark <= i) + cm->map[card] = mark; + else if (i == maxCollectedGen) + cm->map[card] = CARD_CLEAN; + }); +#else + FOR_DIRTY_CARD (cm, card, { + ml_val_t *p = (cm->baseAddr + (card*CARD_SZW)); + ml_val_t *q = p + CARD_SZW; +COUNT_CARD(i); + if (q > maxSweep) + /* don't sweep above the allocation high-water mark */ + q = maxSweep; + for (; p < q; p++) { + MajorGC_CheckWord (heap, bibop, maxAid, p); + } + }); +#endif + } + } + } /* end of for */ + +#ifdef COUNT_CARDS +/*CARD*/SayDebug ("\n[%d] SWEEP: ", maxCollectedGen); +/*CARD*/for(i = maxCollectedGen; i < heap->numGens; i++) { +/*CARD*/ card_map_t *cm = heap->gen[i]->dirty; +/*CARD*/ if (i > maxCollectedGen) SayDebug (", "); +#ifndef BIT_CARDS +/*CARD*/ SayDebug ("[%d] %d/%d/%d", i+1, cardCnt1[i], cardCnt2[i], +/*CARD*/ (cm != NIL(card_map_t*)) ? cm->numCards : 0); +#else +/*CARD*/ SayDebug ("[%d] %d/%d", i+1, cardCnt[i], +/*CARD*/ (cm != NIL(card_map_t*)) ? cm->numCards : 0); +#endif +/*CARD*/} +/*CARD*/SayDebug ("\n"); +#endif + +} /* end of MajorGC_ScanRoots */ + + +/* MajorGC_SweepToSpace: + * Sweep the to-space arenas. Because there are few references forward in time, we + * try to completely scavenge a younger generation before moving on to the + * next oldest. + */ +PVT void MajorGC_SweepToSpace (heap_t *heap, int maxCollectedGen, int maxSweptGen) +{ + int i; + bool_t swept; + bibop_t bibop = BIBOP; + aid_t maxAid = MAKE_MAX_AID(maxCollectedGen); + +#define SweepToSpArena(gen, indx) { \ + arena_t *__ap = (gen)->arena[(indx)]; \ + if (isACTIVE(__ap)) { \ + ml_val_t *__p, *__q; \ + __p = __ap->sweep_nextw; \ + if (__p < __ap->nextw) { \ + swept = TRUE; \ + do { \ + for (__q = __ap->nextw; __p < __q; __p++) { \ + MajorGC_CheckWord(heap, bibop, maxAid, __p); \ + } \ + } while (__q != __ap->nextw); \ + __ap->sweep_nextw = __q; \ + } \ + } \ + } /* SweepToSpArena */ + + do { + swept = FALSE; + for (i = 0; i < maxSweptGen; i++) { + gen_t *gen = heap->gen[i]; + + /* Sweep the record and pair arenas */ + SweepToSpArena(gen, RECORD_INDX); + SweepToSpArena(gen, PAIR_INDX); + + /* Sweep the array arena */ + { + arena_t *ap = gen->arena[ARRAY_INDX]; + if (isACTIVE(ap) + && MajorGC_SweepToSpArrays (heap, maxCollectedGen, ap, gen->dirty)) + swept = TRUE; + } + } + } while (swept); + +}/* end of SweepToSpace */ + + +/* MajorGC_SweepToSpArrays: + * + * Sweep the to-space of the array arena, returning true if any objects + * are actually swept. + */ +PVT bool_t MajorGC_SweepToSpArrays ( + heap_t *heap, int maxGen, arena_t *tosp, card_map_t *cm) +{ + ml_val_t w, *p, *stop; + int thisGen; + Word_t cardMask = ~(CARD_SZB - 1); + bibop_t bibop = BIBOP; + aid_t maxAid = MAKE_MAX_AID(maxGen); +#ifndef BIT_CARDS + ml_val_t *cardStart; + int cardMark; +#endif + + /* Sweep a single card at a time, looking for references that need to + * be remembered. + */ + thisGen = EXTRACT_GEN(tosp->id); + p = tosp->sweep_nextw; + if (p == tosp->nextw) + return FALSE; + while (p < tosp->nextw) { + stop = (ml_val_t *)(((Addr_t)p + CARD_SZB) & cardMask); + if (stop > tosp->nextw) + stop = tosp->nextw; + /* Sweep the next page until we see a reference to a younger generation */ +#ifndef BIT_CARDS + cardStart = p; + cardMark = CARD(cm, cardStart); +#endif + while (p < stop) { + if (isBOXED(w = *p)) { + aid_t arena_id = ADDR_TO_PAGEID(bibop, w); + int targetGen; + +IFBO_COUNT1(arena_id); + if (IS_FROM_SPACE(arena_id, maxAid)) { + /* this is a from-space object */ + if (IS_BIGOBJ_AID(arena_id)) { + bigobj_desc_t *dp; + dp = MajorGC_ForwardBigObj (heap, maxGen, w, arena_id); + targetGen = dp->gen; + } + else { + *p = w = MajorGC_ForwardObj(heap, maxAid, w, arena_id); +#ifdef TOSPACE_ID + { aid_t aid = ADDR_TO_PAGEID(bibop, w); + if (IS_TOSPACE_AID(aid)) + targetGen = TOSPACE_GEN(aid); + else + targetGen = EXTRACT_GEN(aid); + } +#else + targetGen = EXTRACT_GEN(ADDR_TO_PAGEID(bibop, w)); +#endif + } +#ifndef BIT_CARDS + if (targetGen < cardMark) + cardMark = targetGen; +#else + if (targetGen < thisGen) { + /* the forwarded object is in a younger generation */ + MARK_CARD(cm, p); + /* finish the card up quickly */ + for (p++; p < stop; p++) { + MajorGC_CheckWord(heap, bibop, maxAid, p); + } + break; + } +#endif + } +#ifdef TOSPACE_ID + else if (IS_TOSPACE_AID(arena_id)) { + Die ("Sweep Arrays: TOSPACE reference: %p (%p) --> %p\n", + p, ADDR_TO_PAGEID(bibop, p), w); + } +#endif + } + p++; + } /* end of while */ +#ifndef BIT_CARDS + if (cardMark < thisGen) + MARK_CARD(cm, cardStart, cardMark); +#endif + } /* end of while */ + tosp->sweep_nextw = p; + + return TRUE; + +} /* end of MajorGC_SweepToSpArrays */ + + +/* MajorGC_ForwardObj: + * + * Forward an object. + */ +PVT ml_val_t MajorGC_ForwardObj (heap_t *heap, aid_t maxAid, ml_val_t v, aid_t id) +{ + ml_val_t *obj = PTR_MLtoC(ml_val_t, v); + ml_val_t *new_obj; + ml_val_t desc; + Word_t len; + arena_t *arena; + + switch (EXTRACT_OBJC(id)) { + case OBJC_record: { + desc = obj[-1]; + switch (GET_TAG(desc)) { + case DTAG_vec_hdr: + case DTAG_arr_hdr: + len = 2; + break; + case DTAG_forward: + /* This object has already been forwarded */ + return PTR_CtoML(FOLLOW_FWDOBJ(obj)); + case DTAG_record: + len = GET_LEN(desc); + break; + default: + Die ("bad record tag %d, obj = %p, desc = %p", + GET_TAG(desc), obj, desc); + } /* end of switch */ + arena = heap->gen[EXTRACT_GEN(id)-1]->arena[RECORD_INDX]; + if (isOLDER(arena, obj)) + arena = arena->nextGen; + } break; + + case OBJC_pair: { + ml_val_t w; + + w = obj[0]; + if (isDESC(w)) + return PTR_CtoML(FOLLOW_FWDPAIR(w, obj)); + else { + /* forward the pair */ + arena = heap->gen[EXTRACT_GEN(id)-1]->arena[PAIR_INDX]; + if (isOLDER(arena, obj)) + arena = arena->nextGen; + new_obj = arena->nextw; + arena->nextw += PAIR_SZW; + new_obj[0] = w; + new_obj[1] = obj[1]; + /* setup the forward pointer in the old pair */ + obj[0] = MAKE_PAIR_FP(new_obj); + return PTR_CtoML(new_obj); + } + } break; + + case OBJC_string: { + arena = heap->gen[EXTRACT_GEN(id)-1]->arena[STRING_INDX]; + if (isOLDER(arena, obj)) + arena = arena->nextGen; + desc = obj[-1]; + switch (GET_TAG(desc)) { + case DTAG_forward: + return PTR_CtoML(FOLLOW_FWDOBJ(obj)); + case DTAG_raw: + len = GET_LEN(desc); + break; + case DTAG_raw64: + len = GET_LEN(desc); +#ifdef ALIGN_REALDS +# ifdef CHECK_HEAP + if (((Addr_t)arena->nextw & WORD_SZB) == 0) { + *(arena->nextw) = (ml_val_t)0; + arena->nextw++; + } +# else + arena->nextw = (ml_val_t *)(((Addr_t)arena->nextw) | WORD_SZB); +# endif +#endif + break; + default: + Die ("bad string tag %d, obj = %p, desc = %p", + GET_TAG(desc), obj, desc); + } /* end of switch */ + } break; + + case OBJC_array: { + desc = obj[-1]; + switch (GET_TAG(desc)) { + case DTAG_forward: + /* This object has already been forwarded */ + return PTR_CtoML(FOLLOW_FWDOBJ(obj)); + case DTAG_arr_data: + len = GET_LEN(desc); + break; + case DTAG_special: + return MajorGC_FwdSpecial (heap, maxAid, obj, id, desc); + default: + Die ("bad array tag %d, obj = %p, desc = %p", + GET_TAG(desc), obj, desc); + } /* end of switch */ + arena = heap->gen[EXTRACT_GEN(id)-1]->arena[ARRAY_INDX]; + if (isOLDER(arena, obj)) + arena = arena->nextGen; + } break; + + case OBJC_bigobj: + MajorGC_ForwardBigObj (heap, EXTRACT_GEN(maxAid), v, id); + return v; + + default: + Die("unknown object class %d @ %p", EXTRACT_OBJC(id), obj); + } /* end of switch */ + + /* Allocate and initialize a to-space copy of the object */ + new_obj = arena->nextw; + arena->nextw += (len + 1); + *new_obj++ = desc; + ASSERT(arena->nextw <= arena->tospTop); + COPYLOOP(obj, new_obj, len); + + /* set up the forward pointer, and return the new object. */ + obj[-1] = DESC_forwarded; + obj[0] = (ml_val_t)(Addr_t)new_obj; + + return PTR_CtoML(new_obj); + +} /* end of MajorGC_ForwardObj */ + + +/* MajorGC_ForwardBigObj: + * + * Forward a big-object obj, where id is the BIBOP entry for obj. + * Return the descriptor for obj. + */ +PVT bigobj_desc_t *MajorGC_ForwardBigObj ( + heap_t *heap, int maxGen, ml_val_t obj, aid_t id) +{ + Addr_t i; + bigobj_region_t *region; + bigobj_desc_t *dp; + +BO2_COUNT; + i = BIBOP_ADDR_TO_INDEX(obj); + while (!BO_IS_HDR(id)) { + --i; + id = INDEX_TO_PAGEID(BIBOP,i); + } + region = (bigobj_region_t *)BIBOP_INDEX_TO_ADDR(i); + dp = ADDR_TO_BODESC(region, obj); + if ((dp->gen <= maxGen) && BO_IS_FROM_SPACE(dp)) { +BO3_COUNT; + /* forward the big-object; note that objects in the oldest generation + * will always be YOUNG, thus will never be promoted. + */ + if (dp->state == BO_YOUNG) + dp->state = BO_FORWARD; + else + dp->state = BO_PROMOTE; + } + + return dp; + +} /* end of MajorGC_ForwardBigObj */ + + +/* MajorGC_FwdSpecial: + * + * Forward a special object (suspension, weak pointer, ...). + */ +PVT ml_val_t MajorGC_FwdSpecial ( + heap_t *heap, + aid_t maxAid, + ml_val_t *obj, + aid_t id, + ml_val_t desc +) +{ + gen_t *gen = heap->gen[EXTRACT_GEN(id)-1]; + arena_t *arena = gen->arena[ARRAY_INDX]; + ml_val_t *new_obj; + + if (isOLDER(arena, obj)) + arena = arena->nextGen; + + /* allocate the new object */ + new_obj = arena->nextw; + arena->nextw += SPECIAL_SZW; /* all specials are two words */ + + switch (GET_LEN(desc)) { + case SPCL_evaled_susp: + case SPCL_unevaled_susp: + case SPCL_null_weak: + *new_obj++ = desc; + *new_obj = *obj; + break; + case SPCL_weak: { + ml_val_t v = *obj; +#ifdef DEBUG_WEAK_PTRS +SayDebug ("MajorGC: weak [%p ==> %p] --> %p", obj, new_obj+1, v); +#endif + if (! isBOXED(v)) { +#ifdef DEBUG_WEAK_PTRS +SayDebug (" unboxed\n"); +#endif + /* weak references to unboxed objects are never nullified */ + *new_obj++ = DESC_weak; + *new_obj = v; + } + else { + aid_t aid = ADDR_TO_PAGEID(BIBOP, v); + ml_val_t *vp = PTR_MLtoC(ml_val_t, v); + ml_val_t desc; + + if (IS_FROM_SPACE(aid, maxAid)) { + switch (EXTRACT_OBJC(aid)) { + case OBJC_record: + case OBJC_string: + case OBJC_array: + desc = vp[-1]; + if (desc == DESC_forwarded) { + /* Reference to an object that has already been forwarded. + * NOTE: we have to put the pointer to the non-forwarded + * copy of the object (i.e, v) into the to-space copy + * of the weak pointer, since the GC has the invariant + * it never sees to-space pointers during sweeping. + */ +#ifdef DEBUG_WEAK_PTRS +SayDebug (" already forwarded to %p\n", FOLLOW_FWDOBJ(vp)); +#endif + *new_obj++ = DESC_weak; + *new_obj = v; + } + else { + /* the forwarded version of weak objects are threaded + * via their descriptor fields. We mark the object + * reference field to make it look like an unboxed value, + * so that the to-space sweeper does not follow the weak + * reference. + */ +#ifdef DEBUG_WEAK_PTRS +SayDebug (" forward (start = %p)\n", vp); +#endif + *new_obj = MARK_PTR(PTR_CtoML(gen->heap->weakList)); + gen->heap->weakList = new_obj++; + *new_obj = MARK_PTR(vp); + } + break; + case OBJC_pair: + if (isDESC(desc = vp[0])) { + /* Reference to a pair that has already been forwarded. + * NOTE: we have to put the pointer to the non-forwarded + * copy of the pair (i.e, v) into the to-space copy + * of the weak pointer, since the GC has the invariant + * it never sees to-space pointers during sweeping. + */ +#ifdef DEBUG_WEAK_PTRS +SayDebug (" (pair) already forwarded to %p\n", FOLLOW_FWDPAIR(desc, vp)); +#endif + *new_obj++ = DESC_weak; + *new_obj = v; + } + else { + *new_obj = MARK_PTR(PTR_CtoML(gen->heap->weakList)); + gen->heap->weakList = new_obj++; + *new_obj = MARK_PTR(vp); + } + break; + case OBJC_bigobj: + Die ("weak big object"); + break; + } + } + else { + /* reference to an older object */ +#ifdef DEBUG_WEAK_PTRS +SayDebug (" old object\n"); +#endif + *new_obj++ = DESC_weak; + *new_obj = v; + } + } + } break; + default: + Die ("strange/unexpected special object @ %p; desc = %p\n", obj, desc); + } /* end of switch */ + + obj[-1] = DESC_forwarded; + obj[0] = (ml_val_t)(Addr_t)new_obj; + + return PTR_CtoML(new_obj); + +} /* end of MajorGC_FwdSpecial */ + + +/* TrimHeap: + * + * After a major collection, trim any arenas that are over their maximum + * size in allocated space, but under their maximum size in used space. + */ +PVT void TrimHeap (heap_t *heap, int maxCollectedGen) +{ + int i, j; + gen_t *gen; + arena_t *ap; + Word_t minSzB, newSzB; + + for (i = 0; i < maxCollectedGen; i++) { + gen = heap->gen[i]; + for (j = 0; j < NUM_ARENAS; j++) { + ap = gen->arena[j]; + if (isACTIVE(ap) && (ap->tospSizeB > ap->maxSizeB)) { + minSzB = (i == 0) + ? heap->allocSzB + : heap->gen[i-1]->arena[j]->tospSizeB; + minSzB += (USED_SPACE(ap) + ap->reqSizeB); + if (minSzB < ap->maxSizeB) + newSzB = ap->maxSizeB; + else { + newSzB = RND_MEMOBJ_SZB(minSzB); + /* the calculation of minSz here may return something bigger + * that what flip.c computed! + */ + if (newSzB > ap->tospSizeB) + newSzB = ap->tospSizeB; + } + ap->tospSizeB = newSzB; + ap->tospTop = (ml_val_t *)((Addr_t)ap->tospBase + ap->tospSizeB); + } + } + } + +} /* end of TrimHeap */ diff --git a/base/runtime/gc/major-gc.c.SAV b/base/runtime/gc/major-gc.c.SAV new file mode 100644 index 0000000..162d3db --- /dev/null +++ b/base/runtime/gc/major-gc.c.SAV @@ -0,0 +1,947 @@ +/* major-gc.c + * + * COPYRIGHT (c) 1993 by AT&T Bell Laboratories. + * + * This is the regular garbage collector (for collecting the + * generations). + */ + +#ifndef PAUSE_STATS /* GC pause statistics are UNIX dependent */ +# include "ml-unixdep.h" +#endif + +#include "ml-base.h" +#include "ml-limits.h" +#include "ml-state.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "memory.h" +#include "heap.h" +#include "tags.h" +#include "copy-loop.h" +#include "heap-monitor.h" +#include "timer.h" +#include "gc-stats.h" + +#ifdef GC_STATS +long lastMinorGC = 0; +long numUpdates = 0; +long numBytesAlloc = 0; +long numBytesCopied = 0; +#endif + +#ifdef BO_REF_STATS +PVT long numBO1, numBO2, numBO3; +#endif + +#ifdef COUNT_CARDS +#ifndef BIT_CARDS +PVT unsigned long cardCnt1[MAX_NUM_GENS], cardCnt2[MAX_NUM_GENS]; +#else +PVT unsigned long cardCnt[MAX_NUM_GENS]; +#endif +#endif + + +/** DEBUG **/ +#ifdef BO_DEBUG +PVT void ScanMem (Word_t *start, Word_t *stop, int gen, int objKind) +{ + bibop_t bibop = BIBOP; + Word_t w; + int indx; + aid_t aid; + bigobj_region_t *region; + bigobj_desc_t *dp; + + while (start < stop) { + w = *start; + if (isBOXED(w)) { + int indx = BIBOP_ADDR_TO_INDEX(w); + aid_t id = bibop[indx]; + switch (EXTRACT_OBJC(id)) { + case OBJC_bigobj: + while (!BO_IS_HDR(id)) { + id = bibop[--indx]; + } + region = (bigobj_region_t *)BIBOP_INDEX_TO_ADDR(indx); + dp = ADDR_TO_BODESC(region, w); + if (dp->state == BO_FREE) { + SayDebug ("** [%d/%d]: %#x --> %#x; unexpected free big-object\n", + gen, objKind, start, w); + } + break; + case OBJC_record: + case OBJC_pair: + case OBJC_string: + case OBJC_array: + case OBJC_unmapped: + break; + default: + SayDebug ("** [%d/%d]: %#x --> %#x; strange object class %d\n", + gen, objKind, start, w, EXTRACT_OBJC(id)); + break; + } + } + start++; + } +} +#endif /** BO_DEBUG **/ + +/* local routines */ +PVT void MajorGC_ScanRoots ( + ml_state_t *msp, heap_t *heap, ml_val_t **roots, int maxCollectedGen); +PVT void MajorGC_SweepToSpace (heap_t *heap, int maxCollectedGen, int maxSweptGen); +PVT bool_t MajorGC_SweepToSpArrays ( + heap_t *heap, int maxGen, arena_t *tosp, card_map_t *cm); +PVT ml_val_t MajorGC_ForwardObj ( + heap_t *heap, aid_t maxAid, ml_val_t obj, aid_t id); +PVT bigobj_desc_t *MajorGC_ForwardBigObj ( + heap_t *heap, int maxGen, ml_val_t obj, aid_t id); +PVT ml_val_t MajorGC_FwdSpecial ( + heap_t *heap, aid_t maxAid, ml_val_t *obj, aid_t id, ml_val_t desc); + +/* the symbolic names of the arenas */ +char *ArenaName[NUM_ARENAS+1] = { + "new", "record", "pair", "string", "array" + }; +/* DEBUG */PVT char *StateName[] = {"FREE", "YOUNG", "FORWARD", "OLD", "PROMOTE"}; + +/* Check a word for a from-space reference */ +#ifdef TOSPACE_ID +#define NO_GC_INLINE /* DEBUG */ +#endif +#ifndef NO_GC_INLINE +#ifdef BO_REF_STATS +#define CNT_BOS if (IS_BIGOBJ_AID(__aid)) numBO1++; +#else +#define CNT_BOS +#endif +#define MajorGC_CheckWord(heap,bibop,maxAid,p) { \ + ml_val_t __w = *(p); \ + if (isBOXED(__w)) { \ + aid_t __aid = ADDR_TO_PAGEID(bibop, __w); \ +CNT_BOS\ + if (IS_FROM_SPACE(__aid,maxAid)) { \ + *(p) = MajorGC_ForwardObj(heap, maxAid, __w, __aid); \ + } \ + } \ + } +#else +PVT void MajorGC_CheckWord (heap_t *heap, bibop_t bibop, aid_t maxAid, ml_val_t *p) +{ + ml_val_t w = *(p); + if (isBOXED(w)) { + aid_t arena_id = ADDR_TO_PAGEID(bibop, w); +#ifdef BO_REF_STATS +if (IS_BIGOBJ_AID(arena_id)) numBO1++; +#endif + if (IS_FROM_SPACE(arena_id, maxAid)) { + *(p) = MajorGC_ForwardObj(heap, maxAid, w, arena_id); + } +#ifdef TOSPACE_ID + else if (IS_TOSPACE_AID(arena_id)) { + Die ("CheckWord: TOSPACE reference: %#x (%#x) --> %#x\n", + p, ADDR_TO_PAGEID(bibop, p), w); + } +#endif + } +} +#endif + + +/* MajorGC: + * + * Do a garbage collection of (at least) the first level generations. + * By definition, level should be at least 1. + */ +void MajorGC (ml_state_t *msp, ml_val_t **roots, int level) +{ + heap_t *heap = msp->ml_heap; + bibop_t bibop = BIBOP; + int i, j; + int maxCollectedGen; /* the oldest generation being collected */ + int maxSweptGen; +#ifdef GC_STATS + ml_val_t *tospTop[NUM_ARENAS]; /* for counting # of bytes forwarded */ +#endif + +#ifndef PAUSE_STATS /* don't do timing when collecting pause data */ + StartGCTimer(msp); +#endif +#ifdef BO_REF_STATS +numBO1 = numBO2 = numBO3 = 0; +#endif + + /* Flip to-space and from-space */ + maxCollectedGen = Flip (heap, level); + if (maxCollectedGen < heap->numGens) { + maxSweptGen = maxCollectedGen+1; +#ifdef GC_STATS + /* Remember the top of to-space for maxSweptGen */ + for (i = 0; i < NUM_ARENAS; i++) + tospTop[i] = heap->gen[maxSweptGen-1]->arena[i]->nextw; +#endif /* GC_STATS */ + } + else { + maxSweptGen = maxCollectedGen; + } + NUM_GC_GENS(maxCollectedGen); /* record pause info */ + +#ifdef VM_STATS + ReportVM (msp, maxCollectedGen); +#endif + +#ifndef PAUSE_STATS /* don't do messages when collecting pause data */ +SayDebug ("GC #"); +for (i = heap->numGens-1; i >= 0; i--) { + SayDebug ("%d.", heap->gen[i]->numGCs); +} +SayDebug ("%d: ", heap->numMinorGCs); +#endif + + HeapMon_StartGC (heap, maxCollectedGen); + + /* Scan the roots */ + MajorGC_ScanRoots (msp, heap, roots, maxCollectedGen); + + /* Sweep to-space */ + MajorGC_SweepToSpace (heap, maxCollectedGen, maxSweptGen); + + /* Handle weak pointers */ + if (heap->weakList != NIL(ml_val_t *)) + ScanWeakPtrs (heap); + + /* reclaim from-space; we do this from oldest to youngest so that + * we can promote big objects. + */ + for (i = maxCollectedGen; i > 0; i--) { + gen_t *gen = heap->gen[i-1], *promoteGen; + int forwardState, promoteState; + + FreeGeneration (heap, i-1); +#ifdef TOSPACE_ID + for (j = 0; j < NUM_ARENAS; j++) { + arena_t *ap = gen->arena[j]; + if (isACTIVE(ap)) + MarkRegion (bibop, ap->tospBase, ap->tospSizeB, ap->id); + } +#endif + /* NOTE: there should never be any big-objects in the oldest generation + * with the BO_PROMOTE tag. + */ + if (i != heap->numGens) { + promoteGen = heap->gen[i]; + forwardState = BO_OLD; + /* the objects promoted from generation i to generation i+1, when + * generation i+1 is also being collected, are "OLD", thus we need + * to mark the corresponding big objects as old so that they do not + * get out of sync. Since the oldest generation has only YOUNG + * objects, we have to check for that case too. + */ + if ((i == maxCollectedGen) || (i == heap->numGens-1)) + promoteState = BO_YOUNG; + else + promoteState = BO_OLD; + } + else { + promoteGen = heap->gen[i-1]; + forwardState = BO_YOUNG; /* oldest gen has only YOUNG objects */ + } + for (j = 0; j < NUM_BIGOBJ_KINDS; j++) { + bigobj_desc_t *dp, *dq, *forward, *promote; + promote = promoteGen->bigObjs[j]; + forward = NIL(bigobj_desc_t *); + for (dp = gen->bigObjs[j]; dp != NIL(bigobj_desc_t *); ) { + dq = dp->next; + switch (dp->state) { + case BO_YOUNG: + case BO_OLD: + BO_Free (heap, dp); + break; + case BO_FORWARD: + dp->state = forwardState; + dp->next = forward; + forward = dp; + break; + case BO_PROMOTE: + dp->state = promoteState; + dp->next = promote; + dp->gen++; + promote = dp; + break; + default: + Die ("strange bigobject state %d in generation %d\n", + dp->state, i); + } /* end switch */ + dp = dq; + } + promoteGen->bigObjs[j] = promote; /* a nop for the oldest generation */ + gen->bigObjs[j] = forward; + } + } +#ifdef BO_DEBUG +/** DEBUG **/ +for (i = 0; i < heap->numGens; i++) { +gen_t *gen = heap->gen[i]; +ScanMem((Word_t *)(gen->arena[RECORD_INDX]->tospBase), (Word_t *)(gen->arena[RECORD_INDX]->nextw), i+1, RECORD_INDX); +ScanMem((Word_t *)(gen->arena[PAIR_INDX]->tospBase), (Word_t *)(gen->arena[PAIR_INDX]->nextw), i+1, PAIR_INDX); +ScanMem((Word_t *)(gen->arena[ARRAY_INDX]->tospBase), (Word_t *)(gen->arena[ARRAY_INDX]->nextw), i+1, ARRAY_INDX); +} +/** DEBUG **/ +#endif + + /* relabel BIBOP entries for big-object regions to reflect promotions */ + { + bigobj_region_t *rp; + bigobj_desc_t *dp; + int min; + + for (rp = heap->bigRegions; rp != NIL(bigobj_region_t *); rp = rp->next) { + /* if the minimum generation of the region is less than or equal + * to maxCollectedGen, then it is possible that it has increased + * as a result of promotions or freeing of objects. + */ + if (rp->minGen <= maxCollectedGen) { + min = MAX_NUM_GENS; + for (i = 0; i < rp->nPages; ) { + dp = rp->objMap[i]; + if ((! BO_IS_FREE(dp)) && (dp->gen < min)) + min = dp->gen; + i += BO_NUM_BOPAGES(dp); + } + if (rp->minGen != min) { + rp->minGen = min; + MarkRegion (bibop, (ml_val_t *)rp, MEMOBJ_SZB(rp->memObj), + AID_BIGOBJ(min)); + bibop[BIBOP_ADDR_TO_INDEX(rp)] = AID_BIGOBJ_HDR(min); + } + } + } /* end for */ + } + + /* remember the top of to-space in the collected generations */ + for (i = 0; i < maxCollectedGen; i++) { + gen_t *g = heap->gen[i]; + if (i == heap->numGens-1) { + /* the oldest generation has only "young" objects */ + for (j = 0; j < NUM_ARENAS; j++) { + if (isACTIVE(g->arena[j])) + g->arena[j]->oldTop = g->arena[j]->tospBase; + else + g->arena[j]->oldTop = NIL(ml_val_t *); + } + } + else { + for (j = 0; j < NUM_ARENAS; j++) { + if (isACTIVE(g->arena[j])) + g->arena[j]->oldTop = g->arena[j]->nextw; + else + g->arena[j]->oldTop = NIL(ml_val_t *); + } + } + } + + HeapMon_UpdateHeap (heap, maxSweptGen); + +#ifdef GC_STATS + /* Count the number of forwarded bytes */ + if (maxSweptGen != maxCollectedGen) { + gen_t *gen = heap->gen[maxSweptGen-1]; + for (j = 0; j < NUM_ARENAS; j++) { + CNTR_INCR(&(heap->numCopied[maxSweptGen-1][j]), + gen->arena[j]->nextw - tospTop[j]); + } + } + for (i = 0; i < maxCollectedGen; i++) { + for (j = 0; j < NUM_ARENAS; j++) { + arena_t *ap = heap->gen[i]->arena[j]; + if (isACTIVE(ap)) { + CNTR_INCR(&(heap->numCopied[i][j]), ap->nextw - tospTop[j]); + } + } + } +#endif + +#ifdef BO_REF_STATS +SayDebug ("bigobj stats: %d seen, %d lookups, %d forwarded\n", +numBO1, numBO2, numBO3); +#endif +#ifndef PAUSE_STATS /* don't do timing when collecting pause data */ + { + long gcTime; + StopGCTimer (msp, &gcTime); + SayDebug (" (%d ms)\n", gcTime); + } +#endif + +#ifdef VM_STATS + ReportVM (msp, 0); +#endif + +/* DEBUG CheckHeap(heap, maxSweptGen); */ + +} /* end of MajorGC. */ + + +/* MajorGC_ScanRoots: + */ +PVT void MajorGC_ScanRoots ( + ml_state_t *msp, + heap_t *heap, + ml_val_t **roots, + int maxCollectedGen) +{ + bibop_t bibop = BIBOP; + aid_t maxAid = MAKE_MAX_AID(maxCollectedGen); + ml_val_t *rp; + int i; + + while ((rp = *roots++) != NIL(ml_val_t *)) { + MajorGC_CheckWord(heap, bibop, maxAid, rp); + } + + /* Scan the dirty cards in the older generations */ + for (i = maxCollectedGen; i < heap->numGens; i++) { + gen_t *gen = heap->gen[i]; +#ifdef COUNT_CARDS +#ifndef BIT_CARDS +/*CARD*/cardCnt1[i]=cardCnt2[i]=0; +#else +/*CARD*/cardCnt[i]=0; +#endif +#endif + if (isACTIVE(gen->arena[ARRAY_INDX])) { + card_map_t *cm = gen->dirty; + if (cm != NIL(card_map_t *)) { + ml_val_t *maxSweep = gen->arena[ARRAY_INDX]->sweep_nextw; + int card; +#ifndef BIT_CARDS + FOR_DIRTY_CARD (cm, maxCollectedGen, card, { + ml_val_t *p = (cm->baseAddr + (card*CARD_SZW)); + ml_val_t *q = p + CARD_SZW; + int mark = i+1; +#ifdef COUNT_CARDS +/*CARD*/cardCnt1[i]++; +#endif + if (q > maxSweep) + /* don't sweep above the allocation high-water mark */ + q = maxSweep; + for (; p < q; p++) { + ml_val_t w = *p; + if (isBOXED(w)) { + aid_t aid = ADDR_TO_PAGEID(bibop, w); + int targetGen; +#ifdef BO_REF_STATS +if (IS_BIGOBJ_AID(aid)) numBO1++; +#endif + if (IS_FROM_SPACE(aid, maxAid)) { + /* this is a from-space object */ + if (IS_BIGOBJ_AID(aid)) { + bigobj_desc_t *dp; + dp = MajorGC_ForwardBigObj ( + heap, maxCollectedGen, w, aid); + targetGen = dp->gen; + } + else { + *p = + w = MajorGC_ForwardObj(heap, maxAid, w, aid); +#ifdef TOSPACE_ID + { aid_t aid = ADDR_TO_PAGEID(bibop, w); + if (IS_TOSPACE_AID(aid)) + targetGen = TOSPACE_GEN(aid); + else + targetGen = EXTRACT_GEN(aid); + } +#else + targetGen = EXTRACT_GEN(ADDR_TO_PAGEID(bibop, w)); +#endif + } + if (targetGen < mark) + mark = targetGen; + } + } + } /* end of for */ + /* re-mark the card */ + ASSERT(cm->map[card] <= mark); + if (mark <= i) + cm->map[card] = mark; + else if (i == maxCollectedGen) + cm->map[card] = CARD_CLEAN; + }); +#else + FOR_DIRTY_CARD (cm, card, { + ml_val_t *p = (cm->baseAddr + (card*CARD_SZW)); + ml_val_t *q = p + CARD_SZW; +#ifdef COUNT_CARDS +/*CARD*/cardCnt[i]++; +#endif + if (q > maxSweep) + /* don't sweep above the allocation high-water mark */ + q = maxSweep; + for (; p < q; p++) { + MajorGC_CheckWord (heap, bibop, maxAid, p); + } + }); +#endif + } + } + } /* end of for */ + +#ifdef COUNT_CARDS +/*CARD*/SayDebug ("\n[%d] SWEEP: ", maxCollectedGen); +/*CARD*/for(i = maxCollectedGen; i < heap->numGens; i++) { +/*CARD*/ card_map_t *cm = heap->gen[i]->dirty; +/*CARD*/ if (i > maxCollectedGen) SayDebug (", "); +#ifndef BIT_CARDS +/*CARD*/ SayDebug ("[%d] %d/%d/%d", i+1, cardCnt1[i], cardCnt2[i], +/*CARD*/ (cm != NIL(card_map_t*)) ? cm->numCards : 0); +#else +/*CARD*/ SayDebug ("[%d] %d/%d", i+1, cardCnt[i], +/*CARD*/ (cm != NIL(card_map_t*)) ? cm->numCards : 0); +#endif +/*CARD*/} +/*CARD*/SayDebug ("\n"); +#endif + +} /* end of MajorGC_ScanRoots */ + + +/* MajorGC_SweepToSpace: + * Sweep the to-space arenas. Because there are few references forward in time, we + * try to completely scavenge a younger generation before moving on to the + * next oldest. + */ +PVT void MajorGC_SweepToSpace (heap_t *heap, int maxCollectedGen, int maxSweptGen) +{ + int i; + bool_t swept; + bibop_t bibop = BIBOP; + aid_t maxAid = MAKE_MAX_AID(maxCollectedGen); + +#define SweepToSpArena(gen, indx) { \ + arena_t *__ap = (gen)->arena[(indx)]; \ + if (isACTIVE(__ap)) { \ + ml_val_t *__p, *__q; \ + __p = __ap->sweep_nextw; \ + if (__p < __ap->nextw) { \ + swept = TRUE; \ + do { \ + for (__q = __ap->nextw; __p < __q; __p++) { \ + MajorGC_CheckWord(heap, bibop, maxAid, __p); \ + } \ + } while (__q != __ap->nextw); \ + __ap->sweep_nextw = __q; \ + } \ + } \ + } /* SweepToSpArena */ + + do { + swept = FALSE; + for (i = 0; i < maxSweptGen; i++) { + gen_t *gen = heap->gen[i]; + + /* Sweep the record and pair arenas */ + SweepToSpArena(gen, RECORD_INDX); + SweepToSpArena(gen, PAIR_INDX); + + /* Sweep the array arena */ + { + arena_t *ap = gen->arena[ARRAY_INDX]; + if (isACTIVE(ap) + && MajorGC_SweepToSpArrays (heap, maxCollectedGen, ap, gen->dirty)) + swept = TRUE; + } + } + } while (swept); + +}/* end of SweepToSpace */ + + +/* MajorGC_SweepToSpArrays: + * + * Sweep the to-space of the array arena, returning true if any objects + * are actually swept. + */ +PVT bool_t MajorGC_SweepToSpArrays ( + heap_t *heap, int maxGen, arena_t *tosp, card_map_t *cm) +{ + ml_val_t w, *p, *stop; + int thisGen; + Word_t cardMask = ~(CARD_SZB - 1); + aid_t *bibop = BIBOP; + aid_t maxAid = MAKE_MAX_AID(maxGen); +#ifndef BIT_CARDS + ml_val_t *cardStart; + int cardMark; +#endif + + /* Sweep a single card at a time, looking for references that need to + * be remembered. + */ + thisGen = EXTRACT_GEN(tosp->id); + p = tosp->sweep_nextw; + if (p == tosp->nextw) + return FALSE; + while (p < tosp->nextw) { + stop = (ml_val_t *)(((Addr_t)p + CARD_SZB) & cardMask); + if (stop > tosp->nextw) + stop = tosp->nextw; + /* Sweep the next page until we see a reference to a younger generation */ +#ifndef BIT_CARDS + cardStart = p; + cardMark = CARD(cm, cardStart); +#endif + while (p < stop) { + if (isBOXED(w = *p)) { + aid_t arena_id = ADDR_TO_PAGEID(bibop, w); + int targetGen; + +#ifdef BO_REF_STATS +if (IS_BIGOBJ_AID(arena_id)) numBO1++; +#endif + if (IS_FROM_SPACE(arena_id, maxAid)) { + /* this is a from-space object */ + if (IS_BIGOBJ_AID(arena_id)) { + bigobj_desc_t *dp; + dp = MajorGC_ForwardBigObj (heap, maxGen, w, arena_id); + targetGen = dp->gen; + } + else { + *p = w = MajorGC_ForwardObj(heap, maxAid, w, arena_id); +#ifdef TOSPACE_ID + { aid_t aid = ADDR_TO_PAGEID(bibop, w); + if (IS_TOSPACE_AID(aid)) + targetGen = TOSPACE_GEN(aid); + else + targetGen = EXTRACT_GEN(aid); + } +#else + targetGen = EXTRACT_GEN(ADDR_TO_PAGEID(bibop, w)); +#endif + } +#ifndef BIT_CARDS + if (targetGen < cardMark) + cardMark = targetGen; +#else + if (targetGen < thisGen) { + /* the forwarded object is in a younger generation */ + MARK_CARD(cm, p); + /* finish the card up quickly */ + for (p++; p < stop; p++) { + MajorGC_CheckWord(heap, bibop, maxAid, p); + } + break; + } +#endif + } +#ifdef TOSPACE_ID + else if (IS_TOSPACE_AID(arena_id)) { + Die ("Sweep Arrays: TOSPACE reference: %#x (%#x) --> %#x\n", + p, ADDR_TO_PAGEID(bibop, p), w); + } +#endif + } + p++; + } /* end of while */ +#ifndef BIT_CARDS + if (cardMark < thisGen) + MARK_CARD(cm, cardStart, cardMark); +#endif + } /* end of while */ + tosp->sweep_nextw = p; + + return TRUE; + +} /* end of MajorGC_SweepToSpArrays */ + + +/* MajorGC_ForwardObj: + * + * Forward an object. + */ +PVT ml_val_t MajorGC_ForwardObj (heap_t *heap, aid_t maxAid, ml_val_t v, aid_t id) +{ + ml_val_t *obj = PTR_MLtoC(ml_val_t, v); + ml_val_t *obj_start, *new_obj; + ml_val_t desc; + Word_t len; + arena_t *arena; + + switch (EXTRACT_OBJC(id)) { + case OBJC_record: { + for (obj_start = obj; !isDESC(desc = obj_start[-1]); obj_start--) + continue; + if (desc == DESC_forwarded) + /* This object has already been forwarded */ + return PTR_CtoML(FOLLOW_FWDOBJ(obj_start, obj)); + len = GET_LEN(desc); + arena = heap->gen[EXTRACT_GEN(id)-1]->arena[RECORD_INDX]; + if (isOLDER(arena, obj)) + arena = arena->nextGen; + } break; + + case OBJC_pair: { + ml_val_t w; + + obj_start = (ml_val_t *)((Addr_t)obj & ~(PAIR_SZB-1)); /* in case obj is derived */ + w = obj_start[0]; + if (isDESC(w)) + return PTR_CtoML(FOLLOW_FWDPAIR(w, obj_start, obj)); + else { + /* forward the pair */ + arena = heap->gen[EXTRACT_GEN(id)-1]->arena[PAIR_INDX]; + if (isOLDER(arena, obj)) + arena = arena->nextGen; + new_obj = arena->nextw; + arena->nextw += 2; + new_obj[0] = w; + new_obj[1] = obj_start[1]; + /* setup the forward pointer in the old pair */ + obj_start[0] = MAKE_PAIR_FP(new_obj); + return PTR_CtoML(new_obj + (obj - obj_start)); + } + } break; + + case OBJC_string: { +#ifdef ALIGN_REALDS + int align = 0; +#endif + obj_start = obj; + desc = obj_start[-1]; + switch (GET_TAG(desc)) { + case DTAG_forwarded: + return PTR_CtoML(FOLLOW_FWDOBJ(obj_start, obj)); + case DTAG_string: { + int nChars = GET_LEN(desc); + len = BYTES_TO_WORDS(nChars); + /* include the 0 termination bytes */ + if ((nChars & (WORD_SZB-1)) == 0) len++; + } break; + case DTAG_bytearray: + len = GET_STR_LEN(desc); + break; + case DTAG_reald: + len = REALD_SZW; +#ifdef ALIGN_REALDS + align = WORD_SZB; +#endif + break; + case DTAG_realdarray: + len = GET_REALDARR_LEN(desc); +#ifdef ALIGN_REALDS + align = WORD_SZB; +#endif + break; + default: + Die ("bad string tag %d, obj = %#x, desc = %#x", + GET_TAG(desc), obj, desc); + } + arena = heap->gen[EXTRACT_GEN(id)-1]->arena[STRING_INDX]; + if (isOLDER(arena, obj)) + arena = arena->nextGen; +#ifdef ALIGN_REALDS + arena->nextw = (ml_val_t *)(((Addr_t)arena->nextw) | align); +#endif + } break; + + case OBJC_bigobj: { + int i; + bigobj_region_t *region; + bigobj_desc_t *dp; + int gen; + + MajorGC_ForwardBigObj (heap, EXTRACT_GEN(maxAid), v, id); + return v; + } + + case OBJC_array: { + for (obj_start = obj; !isDESC(desc = obj_start[-1]); obj_start--) + continue; + switch (GET_TAG(desc)) { + case DTAG_forwarded: + /* This object has already been forwarded */ + return PTR_CtoML(FOLLOW_FWDOBJ(obj_start, obj)); + case DTAG_array: + len = GET_LEN(desc); + break; + case DTAG_special: + return MajorGC_FwdSpecial (heap, maxAid, obj, id, desc); + break; + default: + Die("unknown tag %#x @ %#x in array arena\n", + GET_TAG(desc), obj_start); + } /* end of switch */ + arena = heap->gen[EXTRACT_GEN(id)-1]->arena[ARRAY_INDX]; + if (isOLDER(arena, obj)) + arena = arena->nextGen; + } break; + + default: + Die("unknown object class %d @ %#x", EXTRACT_OBJC(id), obj); + } /* end of switch */ + + /* Allocate and initialize a to-space copy of the object */ + new_obj = arena->nextw; + arena->nextw += (len + 1); + *new_obj++ = desc; + ASSERT(arena->nextw <= arena->tospTop); + COPYLOOP(obj_start, new_obj, len); + + /* set up the forward pointer, and return the new object. */ + obj_start[-1] = DESC_forwarded; + obj_start[0] = (ml_val_t)(Addr_t)new_obj; + return PTR_CtoML(new_obj + (obj - obj_start)); + +} /* end of MajorGC_ForwardObj */ + + +/* MajorGC_ForwardBigObj: + * + * Forward a big-object obj, where id is the BIBOP entry for obj. + * Return the descriptor for obj. + */ +PVT bigobj_desc_t *MajorGC_ForwardBigObj ( + heap_t *heap, int maxGen, ml_val_t obj, aid_t id) +{ + int i, gen; + bigobj_region_t *region; + bigobj_desc_t *dp; + +#ifdef BO_REF_STATS +numBO2++; +#endif + for (i = BIBOP_ADDR_TO_INDEX(obj); !BO_IS_HDR(id); id = BIBOP[--i]) + continue; + region = (bigobj_region_t *)BIBOP_INDEX_TO_ADDR(i); + dp = ADDR_TO_BODESC(region, obj); + if (((gen = dp->gen) <= maxGen) && BO_IS_FROM_SPACE(dp)) { +#ifdef BO_REF_STATS +numBO3++; +#endif + /* forward the big-object; note that objects in the oldest generation + * will always be YOUNG, thus will never be promoted. + */ + if (dp->state == BO_YOUNG) + dp->state = BO_FORWARD; + else + dp->state = BO_PROMOTE; + } + + return dp; + +} /* end of MajorGC_ForwardBigObj */ + + +/* MajorGC_FwdSpecial: + * + * Forward a special object (suspension, weak pointer, ...). + */ +PVT ml_val_t MajorGC_FwdSpecial ( + heap_t *heap, + aid_t maxAid, + ml_val_t *obj, + aid_t id, + ml_val_t desc +) +{ + gen_t *gen = heap->gen[EXTRACT_GEN(id)-1]; + arena_t *arena = gen->arena[ARRAY_INDX]; + ml_val_t *new_obj; + + if (isOLDER(arena, obj)) + arena = arena->nextGen; + + /* allocate the new object */ + new_obj = arena->nextw; + arena->nextw += 2; /* all specials are two words */ + + switch (GET_LEN(desc)) { + case SPCL_evaled_susp: + case SPCL_unevaled_susp: + case SPCL_null_weak: + *new_obj++ = desc; + *new_obj = *obj; + break; + case SPCL_weak: { + ml_val_t v = *obj; +/* SayDebug ("MajorGC: weak [%#x ==> %#x] --> %#x", obj, new_obj+1, v); */ + if (! isBOXED(v)) { +/* SayDebug (" unboxed\n"); */ + /* weak references to unboxed objects are never nullified */ + *new_obj++ = DESC_weak; + *new_obj = v; + } + else { + aid_t aid = ADDR_TO_PAGEID(BIBOP, v); + ml_val_t *vp = PTR_MLtoC(ml_val_t, v); + ml_val_t *v_start, desc; + + if (IS_FROM_SPACE(aid, maxAid)) { + switch (EXTRACT_OBJC(aid)) { + case OBJC_record: + case OBJC_string: + case OBJC_array: + for (v_start = vp; !isDESC(desc = v_start[-1]); v_start--) + continue; + if (desc == DESC_forwarded) { + /* reference to an object that has already been + * forwarded. + */ +/* SayDebug (" already forwarded to %#x\n", FOLLOW_FWDOBJ(v_start, vp)); */ + *new_obj++ = DESC_weak; + *new_obj = v; + } + else { + /* the forwarded version of weak objects are threaded + * via their descriptor fields. We mark the object + * reference field to make it look like an unboxed value, + * so that the to-space sweeper does not follow the weak + * reference. + */ +/* SayDebug (" forward (start = %#x)\n", v_start); */ + *new_obj = MARK_PTR(PTR_CtoML(gen->heap->weakList)); + gen->heap->weakList = new_obj++; + *new_obj = MARK_PTR(vp); + } + break; + case OBJC_pair: + v_start = (ml_val_t *)((Addr_t)vp & ~(PAIR_SZB-1)); + if (isDESC(desc = v_start[0])) { + /* reference to a pair that has already been + * forwarded. + */ +/* SayDebug (" (pair) already forwarded to %#x\n", */ +/* FOLLOW_FWDPAIR(desc, v_start, vp)); */ + *new_obj++ = DESC_weak; + *new_obj = v; + } + else { + *new_obj = MARK_PTR(PTR_CtoML(gen->heap->weakList)); + gen->heap->weakList = new_obj++; + *new_obj = MARK_PTR(vp); + } + break; + case OBJC_bigobj: + Die ("weak big object"); + break; + } + } + else { + /* reference to an older object */ +/* SayDebug (" old object\n"); */ + *new_obj++ = DESC_weak; + *new_obj = v; + } + } + } break; + default: + Die ("strange/unexpected special object @ %#x; desc = %#x\n", obj, desc); + } /* end of switch */ + + obj[-1] = DESC_forwarded; + obj[0] = (ml_val_t)(Addr_t)new_obj; + + return PTR_CtoML(new_obj); + +} /* end of MajorGC_FwdSpecial */ diff --git a/base/runtime/gc/makefile b/base/runtime/gc/makefile new file mode 100644 index 0000000..f5b6c42 --- /dev/null +++ b/base/runtime/gc/makefile @@ -0,0 +1,194 @@ +# +# the makefile for the garbage collector and heap I/O library +# + +MAKE = make +AR = ar +ARFLAGS = rcv +RANLIB = ranlib + +LIB = libgc.a +MP_LIB = libmp-gc.a + +GC_OBJS = init-gc.o call-gc.o minor-gc.o major-gc.o flip.o gc-util.o \ + big-objects.o ml-objects.o obj-info.o build-literals.o \ + old-literals.o \ + record-ops.o \ + $(CHECK_HEAP) + +HEAP_IO_OBJS = import-heap.o export-heap.o \ + blast-in.o blast-out.o blast-gc.o \ + heap-in-util.o heap-out-util.o \ + writer.o mem-writer.o \ + addr-hash.o c-globals-tbl.o + +MP_GC_OBJS = mp-gc.o + +OBJS = $(GC_OBJS) $(HEAP_IO_OBJS) +MP_OBJS = $(OBJS) $(MP_GC_OBJS) + +VERSION = v-dummy + +OBJS_DIR = ../objs +INC_DIR = ../include +INCLUDES = -I$(OBJS_DIR) -I$(INC_DIR) + + +$(LIB) : $(VERSION) $(OBJS) + rm -rf $(LIB) + $(AR) $(ARFLAGS) $(LIB) $(OBJS) + $(RANLIB) $(LIB) + +$(MP_LIB) : $(VERSION) $(MP_OBJS) + rm -rf $(MP_LIB) + $(AR) $(ARFLAGS) $(MP_LIB) $(MP_OBJS) + $(RANLIB) $(MP_LIB) + +$(VERSION) : + ($(MAKE) MAKE="$(MAKE)" clean) + echo "$(VERSION)" > $(VERSION) + +# +# GC objects +# +init-gc.o: init-gc.c \ + $(OBJS_DIR)/ml-sizes.h \ + $(INC_DIR)/ml-base.h $(INC_DIR)/ml-limits.h \ + $(INC_DIR)/ml-state.h $(INC_DIR)/ml-values.h $(INC_DIR)/cntr.h \ + $(INC_DIR)/bibop.h $(INC_DIR)/memory.h $(INC_DIR)/ml-mp.h \ + heap.h arena-id.h heap-monitor.h \ + $(INC_DIR)/stats-data.h + $(CC) -c $(CFLAGS) $(DEFS) $(INCLUDES) init-gc.c + +call-gc.o: call-gc.c \ + $(OBJS_DIR)/ml-sizes.h \ + $(INC_DIR)/ml-base.h $(INC_DIR)/ml-limits.h \ + $(INC_DIR)/ml-state.h $(INC_DIR)/ml-values.h $(INC_DIR)/cntr.h \ + $(INC_DIR)/bibop.h $(INC_DIR)/memory.h $(INC_DIR)/ml-mp.h \ + heap.h arena-id.h heap-monitor.h \ + $(INC_DIR)/stats-data.h + $(CC) -c $(CFLAGS) $(DEFS) $(INCLUDES) call-gc.c + +minor-gc.o: minor-gc.c \ + $(OBJS_DIR)/ml-sizes.h \ + $(INC_DIR)/ml-base.h $(INC_DIR)/ml-state.h \ + $(INC_DIR)/ml-values.h $(INC_DIR)/ml-objects.h $(INC_DIR)/tags.h \ + $(INC_DIR)/bibop.h $(INC_DIR)/ml-globals.h \ + card-map.h heap.h arena-id.h copy-loop.h + $(CC) -c $(CFLAGS) $(DEFS) $(INCLUDES) minor-gc.c + +major-gc.o: major-gc.c \ + $(OBJS_DIR)/ml-sizes.h \ + $(INC_DIR)/ml-base.h $(INC_DIR)/ml-limits.h \ + $(INC_DIR)/ml-state.h $(INC_DIR)/ml-values.h $(INC_DIR)/ml-objects.h \ + $(INC_DIR)/bibop.h $(INC_DIR)/tags.h $(INC_DIR)/ml-globals.h \ + card-map.h heap.h arena-id.h copy-loop.h + $(CC) -c $(CFLAGS) $(DEFS) $(INCLUDES) major-gc.c + +flip.o: flip.c \ + $(OBJS_DIR)/ml-sizes.h \ + $(INC_DIR)/ml-base.h $(INC_DIR)/ml-limits.h \ + $(INC_DIR)/bibop.h $(INC_DIR)/ml-state.h \ + heap.h arena-id.h + $(CC) -c $(CFLAGS) $(DEFS) $(INCLUDES) flip.c + +gc-util.o: gc-util.c \ + $(OBJS_DIR)/ml-sizes.h \ + $(INC_DIR)/ml-base.h $(INC_DIR)/ml-limits.h \ + $(INC_DIR)/ml-values.h $(INC_DIR)/bibop.h $(INC_DIR)/memory.h \ + card-map.h heap.h arena-id.h heap-monitor.h + $(CC) -c $(CFLAGS) $(DEFS) $(INCLUDES) gc-util.c + +big-objects.o: big-objects.c \ + $(OBJS_DIR)/ml-sizes.h \ + $(INC_DIR)/ml-base.h $(INC_DIR)/memory.h $(INC_DIR)/bibop.h \ + heap.h arena-id.h + $(CC) -c $(CFLAGS) $(DEFS) $(INCLUDES) big-objects.c + +ml-objects.o: ml-objects.c \ + $(OBJS_DIR)/ml-sizes.h \ + $(INC_DIR)/ml-base.h $(INC_DIR)/ml-objects.h $(INC_DIR)/ml-limits.h \ + $(INC_DIR)/bibop.h $(INC_DIR)/tags.h \ + heap.h arena-id.h + $(CC) -c $(CFLAGS) $(DEFS) $(INCLUDES) ml-objects.c + +build-literals.o: \ + build-literals.c \ + $(OBJS_DIR)/ml-sizes.h \ + $(INC_DIR)/ml-base.h $(INC_DIR)/ml-objects.h $(INC_DIR)/tags.h \ + heap.h arena-id.h + $(CC) -c $(CFLAGS) $(DEFS) $(INCLUDES) build-literals.c + +old-literals.o: \ + old-literals.c \ + $(OBJS_DIR)/ml-sizes.h \ + $(INC_DIR)/ml-base.h $(INC_DIR)/ml-objects.h $(INC_DIR)/tags.h \ + heap.h arena-id.h + $(CC) -c $(CFLAGS) $(DEFS) $(INCLUDES) old-literals.c + +record-ops.o: \ + record-ops.c \ + $(OBJS_DIR)/ml-sizes.h \ + $(INC_DIR)/ml-base.h $(INC_DIR)/ml-objects.h $(INC_DIR)/tags.h \ + arena-id.h + $(CC) -c $(CFLAGS) $(DEFS) $(INCLUDES) record-ops.c + +obj-info.o: obj-info.c \ + $(INC_DIR)/ml-base.h $(INC_DIR)/ml-values.h $(INC_DIR)/gc.h \ + $(INC_DIR)/bibop.h \ + heap.h arena-id.h + $(CC) -c $(CFLAGS) $(DEFS) $(INCLUDES) obj-info.c + +xmonitor.o: xmonitor.c \ + $(OBJS_DIR)/ml-sizes.h $(INC_DIR)/ml-base.h \ + $(INC_DIR)/bibop.h \ + heap.h arena-id.h \ + xmonitor.h heap-monitor.h + $(CC) -c $(CFLAGS) $(DEFS) $(INCLUDES) xmonitor.c + +check-heap.o: check-heap.c \ + $(OBJS_DIR)/ml-sizes.h $(INC_DIR)/ml-base.h \ + $(INC_DIR)/bibop.h \ + card-map.h heap.h arena-id.h + $(CC) -c $(CFLAGS) $(DEFS) $(INCLUDES) check-heap.c + +gc-stats.o: gc-stats.c \ + $(OBJS_DIR)/ml-sizes.h $(INC_DIR)/ml-base.h \ + gc-stats.h + $(CC) -c $(CFLAGS) $(DEFS) $(INCLUDES) gc-stats.c + +mp-gc.o: mp-gc.c \ + $(OBJS_DIR)/ml-sizes.h \ + $(INC_DIR)/ml-base.h $(INC_DIR)/ml-limits.h \ + $(INC_DIR)/ml-state.h $(INC_DIR)/ml-values.h $(INC_DIR)/cntr.h \ + $(INC_DIR)/bibop.h $(INC_DIR)/memory.h $(INC_DIR)/ml-mp.h \ + heap.h arena-id.h heap-monitor.h \ + $(INC_DIR)/stats-data.h + $(CC) -c $(CFLAGS) $(DEFS) $(INCLUDES) mp-gc.c + + +# +# Heap I/O objects +# +c-globals-tbl.o: c-globals-tbl.c \ + $(INC_DIR)/ml-base.h $(INC_DIR)/c-globals-tbl.h + $(CC) -c $(CFLAGS) $(DEFS) $(INCLUDES) c-globals-tbl.c + +import-heap.o: import-heap.c \ + $(OBJS_DIR)/ml-sizes.h \ + $(INC_DIR)/ml-base.h $(INC_DIR)/ml-limits.h \ + $(INC_DIR)/ml-state.h $(INC_DIR)/c-globals-tbl.h \ + $(INC_DIR)/cache-flush.h \ + card-map.h arena-id.h heap.h writer.h \ + ml-heap-image.h addr-hash.h heap-input.h + +.c.o: $< \ + $(OBJS_DIR)/ml-sizes.h $(INC_DIR)/ml-base.h $(INC_DIR)/ml-limits.h \ + $(INC_DIR)/ml-state.h $(INC_DIR)/c-globals-tbl.h \ + arena-id.h heap.h writer.h \ + ml-heap-image.h addr-hash.h heap-input.h + $(CC) -c $(CFLAGS) $(DEFS) $(INCLUDES) $< + +clean : + rm -f v-* *.o $(LIB) $(MP_LIB) + diff --git a/base/runtime/gc/makefile.win32 b/base/runtime/gc/makefile.win32 new file mode 100644 index 0000000..8a224dc --- /dev/null +++ b/base/runtime/gc/makefile.win32 @@ -0,0 +1,221 @@ +# +# the makefile for the garbage collector and heap I/O library +# win32 specific + +MAKEFILE = makefile.win32 +MAKE = nmake /F$(MAKEFILE) +AR = lib +ARFLAGS = +RANLIB = lib + +LIB = libgc.lib +MP_LIB = libmp-gc.lib + +GC_OBJS = init-gc.obj call-gc.obj minor-gc.obj major-gc.obj flip.obj gc-util.obj \ + big-objects.obj ml-objects.obj obj-info.obj build-literals.obj \ + old-literals.obj \ + record-ops.obj \ + $(CHECK_HEAP) + +HEAP_IO_OBJS = import-heap.obj export-heap.obj \ + blast-in.obj blast-out.obj blast-gc.obj \ + heap-in-util.obj heap-out-util.obj \ + writer.obj mem-writer.obj \ + addr-hash.obj c-globals-tbl.obj + +MP_GC_OBJS = mp-gc.obj + +OBJS = $(GC_OBJS) $(HEAP_IO_OBJS) +MP_OBJS = $(OBJS) $(MP_GC_OBJS) + +VERSION = v-dummy + +OBJS_DIR = ..\objs +INC_DIR = ..\include +INCLUDES = -I$(OBJS_DIR) -I$(INC_DIR) + + +$(LIB) : $(VERSION) $(OBJS) + del /F /Q $(LIB) + $(AR) $(ARFLAGS) /out:$(LIB) $(OBJS) + $(RANLIB) /out:$(LIB) + +$(MP_LIB) : $(VERSION) $(MP_OBJS) + del /F /Q $(MP_LIB) + $(AR) $(ARFLAGS) /out:$(MP_LIB) $(MP_OBJS) + $(RANLIB) /out:$(MP_LIB) + +$(VERSION) : + ($(MAKE) MAKE="$(MAKE)" clean) + echo "$(VERSION)" > $(VERSION) + +# +# GC objects +# +init-gc.obj: init-gc.c \ + $(OBJS_DIR)\ml-sizes.h \ + $(INC_DIR)\ml-base.h $(INC_DIR)\ml-limits.h \ + $(INC_DIR)\ml-state.h $(INC_DIR)\ml-values.h $(INC_DIR)\cntr.h \ + $(INC_DIR)\bibop.h $(INC_DIR)\memory.h $(INC_DIR)\ml-mp.h \ + heap.h arena-id.h heap-monitor.h \ + $(INC_DIR)\stats-data.h + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) init-gc.c + +call-gc.obj: call-gc.c \ + $(OBJS_DIR)\ml-sizes.h \ + $(INC_DIR)\ml-base.h $(INC_DIR)\ml-limits.h \ + $(INC_DIR)\ml-state.h $(INC_DIR)\ml-values.h $(INC_DIR)\cntr.h \ + $(INC_DIR)\bibop.h $(INC_DIR)\memory.h $(INC_DIR)\ml-mp.h \ + heap.h arena-id.h heap-monitor.h \ + $(INC_DIR)\stats-data.h + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) call-gc.c + +minor-gc.obj: minor-gc.c \ + $(OBJS_DIR)\ml-sizes.h \ + $(INC_DIR)\ml-base.h $(INC_DIR)\ml-state.h \ + $(INC_DIR)\ml-values.h $(INC_DIR)\ml-objects.h $(INC_DIR)\tags.h \ + $(INC_DIR)\bibop.h $(INC_DIR)\ml-globals.h \ + card-map.h heap.h arena-id.h copy-loop.h + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) minor-gc.c + +major-gc.obj: major-gc.c \ + $(OBJS_DIR)\ml-sizes.h \ + $(INC_DIR)\ml-base.h $(INC_DIR)\ml-limits.h \ + $(INC_DIR)\ml-state.h $(INC_DIR)\ml-values.h $(INC_DIR)\ml-objects.h \ + $(INC_DIR)\bibop.h $(INC_DIR)\tags.h $(INC_DIR)\ml-globals.h \ + card-map.h heap.h arena-id.h copy-loop.h + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) major-gc.c + +flip.obj: flip.c \ + $(OBJS_DIR)\ml-sizes.h \ + $(INC_DIR)\ml-base.h $(INC_DIR)\ml-limits.h \ + $(INC_DIR)\bibop.h $(INC_DIR)\ml-state.h \ + heap.h arena-id.h + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) flip.c + +gc-util.obj: gc-util.c \ + $(OBJS_DIR)\ml-sizes.h \ + $(INC_DIR)\ml-base.h $(INC_DIR)\ml-limits.h \ + $(INC_DIR)\ml-values.h $(INC_DIR)\bibop.h $(INC_DIR)\memory.h \ + card-map.h heap.h arena-id.h heap-monitor.h + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) gc-util.c + +big-objects.obj: big-objects.c \ + $(OBJS_DIR)\ml-sizes.h \ + $(INC_DIR)\ml-base.h $(INC_DIR)\memory.h $(INC_DIR)\bibop.h \ + heap.h arena-id.h + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) big-objects.c + +ml-objects.obj: ml-objects.c \ + $(OBJS_DIR)\ml-sizes.h \ + $(INC_DIR)\ml-base.h $(INC_DIR)\ml-objects.h $(INC_DIR)\ml-limits.h \ + $(INC_DIR)\bibop.h $(INC_DIR)\tags.h \ + heap.h arena-id.h + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) ml-objects.c + +build-literals.obj: \ + build-literals.c \ + $(OBJS_DIR)\ml-sizes.h \ + $(INC_DIR)\ml-base.h $(INC_DIR)\ml-objects.h $(INC_DIR)\tags.h \ + heap.h arena-id.h + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) build-literals.c + +old-literals.obj: \ + old-literals.c \ + $(OBJS_DIR)\ml-sizes.h \ + $(INC_DIR)\ml-base.h $(INC_DIR)\ml-objects.h $(INC_DIR)\tags.h \ + heap.h arena-id.h + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) old-literals.c + +record-ops.obj: \ + record-ops.c \ + $(OBJS_DIR)/ml-sizes.h \ + $(INC_DIR)/ml-base.h $(INC_DIR)/ml-objects.h $(INC_DIR)/tags.h \ + arena-id.h + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) record-ops.c + +obj-info.obj: obj-info.c \ + $(INC_DIR)\ml-base.h $(INC_DIR)\ml-values.h $(INC_DIR)\gc.h \ + $(INC_DIR)\bibop.h \ + heap.h arena-id.h + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) obj-info.c + +xmonitor.obj: xmonitor.c \ + $(OBJS_DIR)\ml-sizes.h $(INC_DIR)\ml-base.h \ + $(INC_DIR)\bibop.h \ + heap.h arena-id.h \ + xmonitor.h heap-monitor.h + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) xmonitor.c + +check-heap.obj: check-heap.c \ + $(OBJS_DIR)\ml-sizes.h $(INC_DIR)\ml-base.h \ + $(INC_DIR)\bibop.h \ + card-map.h heap.h arena-id.h + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) check-heap.c + +gc-stats.obj: gc-stats.c \ + $(OBJS_DIR)\ml-sizes.h $(INC_DIR)\ml-base.h \ + gc-stats.h + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) gc-stats.c + +mp-gc.obj: mp-gc.c \ + $(OBJS_DIR)\ml-sizes.h \ + $(INC_DIR)\ml-base.h $(INC_DIR)\ml-limits.h \ + $(INC_DIR)\ml-state.h $(INC_DIR)\ml-values.h $(INC_DIR)\cntr.h \ + $(INC_DIR)\bibop.h $(INC_DIR)\memory.h $(INC_DIR)\ml-mp.h \ + heap.h arena-id.h heap-monitor.h \ + $(INC_DIR)\stats-data.h + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) mp-gc.c + + +# +# Heap I/O objects +# +c-globals-tbl.obj: c-globals-tbl.c \ + $(INC_DIR)\ml-base.h $(INC_DIR)\c-globals-tbl.h + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) c-globals-tbl.c + +import-heap.obj: import-heap.c \ + $(OBJS_DIR)\ml-sizes.h $(OBJS_DIR)\reg-mask.h \ + $(INC_DIR)\ml-base.h $(INC_DIR)\ml-limits.h \ + $(INC_DIR)\ml-state.h $(INC_DIR)\c-globals-tbl.h \ + $(INC_DIR)\cache-flush.h \ + card-map.h arena-id.h heap.h writer.h \ + ml-heap-image.h addr-hash.h heap-input.h + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) import-heap.c + +DEPENDENTS = $(OBJS_DIR)\ml-sizes.h $(INC_DIR)\ml-base.h $(INC_DIR)\ml-limits.h \ + $(INC_DIR)\ml-state.h $(INC_DIR)\c-globals-tbl.h \ + arena-id.h heap.h writer.h \ + ml-heap-image.h addr-hash.h heap-input.h + +export-heap.obj: export-heap.c $(DEPENDENTS) + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) export-heap.c + +blast-in.obj: blast-in.c $(DEPENDENTS) + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) blast-in.c + +blast-out.obj: blast-out.c $(DEPENDENTS) + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) blast-out.c + +blast-gc.obj: blast-gc.c $(DEPENDENTS) + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) blast-gc.c + +heap-in-util.obj: heap-in-util.c $(DEPENDENTS) + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) heap-in-util.c + +heap-out-util.obj: heap-out-util.c $(DEPENDENTS) + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) heap-out-util.c + +writer.obj: writer.c $(DEPENDENTS) + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) writer.c + +mem-writer.obj: mem-writer.c $(DEPENDENTS) + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) mem-writer.c + +addr-hash.obj: addr-hash.c $(DEPENDENTS) + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) addr-hash.c + +clean : + del /Q /F v-* *.obj *.pdb $(LIB) $(MP_LIB) + diff --git a/base/runtime/gc/mem-writer.c b/base/runtime/gc/mem-writer.c new file mode 100644 index 0000000..d5eaa5a --- /dev/null +++ b/base/runtime/gc/mem-writer.c @@ -0,0 +1,126 @@ +/*! \file mem-writer.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * An implementation of the abstract writers on top of memory regions. + */ + +#include "ml-base.h" +#include "writer.h" +#include + +#ifndef BUFSIZ +#define BUFSIZ 4096 +#endif + +typedef struct buffer { + Byte_t *base; + Byte_t *next; + Byte_t *top; +} wr_buffer_t; + +PVT void Put (writer_t *wr, Word_t w); +PVT void Write (writer_t *wr, const void *data, Addr_t nbytes); +PVT void Flush (writer_t *wr); +PVT off_t Tell (writer_t *wr); +PVT void Seek (writer_t *wr, off_t offset); +PVT void Free (writer_t *wr); + +#define BufOf(wr) ((wr_buffer_t *)((wr)->data)) + +/* WR_OpenMem: + * + * Open a file for writing, and make a writer for it. + */ +writer_t *WR_OpenMem (Byte_t *data, Addr_t len) +{ + wr_buffer_t *bp; + writer_t *wr; + + bp = NEW_OBJ(wr_buffer_t); + bp->base = data; + bp->next = data; + bp->top = (Byte_t *)(((Addr_t)data) + len); + + wr = NEW_OBJ(writer_t); + wr->errFlg = FALSE; + wr->data = (void *)bp; + wr->putWord = Put; + wr->write = Write; + wr->flush = Flush; + wr->tell = Tell; + wr->seek = Seek; + wr->free = Free; + + return wr; + +} /* end of WR_OpenMem */ + +/* Put: + */ +PVT void Put (writer_t *wr, Word_t w) +{ + wr_buffer_t *bp = BufOf(wr); + + ASSERT(bp->next+WORD_SZB <= bp->top); + + *((Word_t *)(bp->next)) = w; + bp->next += WORD_SZB; + +} /* end of Put */ + +/* Write: + */ +PVT void Write (writer_t *wr, const void *data, Addr_t nbytes) +{ + wr_buffer_t *bp = BufOf(wr); + + if (wr->errFlg) + return; + + ASSERT(bp->next+nbytes <= bp->top); + + memcpy (bp->next, data, nbytes); + bp->next += nbytes; + +} /* end of Write */ + +/* Flush: + */ +PVT void Flush (writer_t *wr) +{ + wr_buffer_t *bp = BufOf(wr); + + ASSERT(bp->next <= bp->top); + +} /* end of Flush */ + +/* Tell: + */ +PVT off_t Tell (writer_t *wr) +{ + Die ("Tell not supported on memory writers"); + +} /* end of Tell */ + +/* Seek: + */ +PVT void Seek (writer_t *wr, off_t offset) +{ + Die ("Tell not supported on memory writers"); + +} /* end of Seek */ + +/* Free: + */ +PVT void Free (writer_t *wr) +{ + wr_buffer_t *bp = BufOf(wr); + + ASSERT(bp->next == bp->top); + + FREE (BufOf(wr)); + FREE (wr); + +} /* end of Free */ diff --git a/base/runtime/gc/minor-gc.c b/base/runtime/gc/minor-gc.c new file mode 100644 index 0000000..ff64719 --- /dev/null +++ b/base/runtime/gc/minor-gc.c @@ -0,0 +1,472 @@ +/*! \file minor-gc.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This is the code for doing minor collections (i.e., collecting the + * allocation arena). + */ + +#include "ml-base.h" +#include "ml-limits.h" +#include "ml-state.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "card-map.h" +#include "heap.h" +#include "tags.h" +#include "copy-loop.h" +#ifdef MP_SUPPORT +#include "vproc-state.h" +#endif + +#ifdef GC_STATS +extern long numUpdates; +extern long numBytesAlloc; +extern long numBytesCopied; +#endif + +/** store list operations */ +#define STL_nil ML_unit +#define STL_hd(p) REC_SELPTR(ml_val_t, p, 0) +#define STL_tl(p) REC_SEL(p, 1) + +/* local routines */ +PVT void MinorGC_ScanStoreList (heap_t *heap, ml_val_t stl); +PVT void MinorGC_SweepToSpace (gen_t *gen1); +PVT ml_val_t MinorGC_ForwardObj (gen_t *gen1, ml_val_t v); +PVT ml_val_t MinorGC_FwdSpecial (gen_t *gen1, ml_val_t *obj, ml_val_t desc); + +#ifdef VERBOSE +extern char *ArenaName[]; +#endif + +/* Check a word for a allocation space reference */ +#ifndef NO_GC_INLINE +#define MinorGC_CheckWord(allocBase, allocSz, g1, p) { \ + ml_val_t __w = *(p); \ + if (isBOXED(__w) && (((Addr_t)__w - (allocBase)) < (allocSz))) { \ + *(p) = MinorGC_ForwardObj(g1, __w); \ + } \ + } +#else +PVT void MinorGC_CheckWord (Addr_t allocBase, Addr_t allocSz, gen_t *g1, ml_val_t *p) +{ + ml_val_t w = *(p); + if (isBOXED(w) && (((Addr_t)w - allocBase) < allocSz)) { + ASSERT(ADDR_TO_PAGEID(BIBOP, w) == AID_NEW); + *(p) = MinorGC_ForwardObj(g1, w); + } +} +#endif + + +/* MinorGC: + * + * Do a collection of the allocation space. + */ +void MinorGC (ml_state_t *msp, ml_val_t **roots) +{ + heap_t *heap = msp->ml_heap; + gen_t *gen1 = heap->gen[0]; +#ifdef GC_STATS + long nbytesAlloc, nbytesCopied, nUpdates=numUpdates; + Addr_t gen1Top[NUM_ARENAS]; + int i; + { + nbytesAlloc = (Addr_t)(msp->ml_allocPtr) - (Addr_t)(heap->allocBase); + CNTR_INCR(&(heap->numAlloc), nbytesAlloc); + for (i = 0; i < NUM_ARENAS; i++) + gen1Top[i] = (Addr_t)(gen1->arena[i]->nextw); + } +#elif defined(VM_STATS) + { + Addr_t nbytesAlloc; + nbytesAlloc = ((Addr_t)(msp->ml_allocPtr) - (Addr_t)(heap->allocBase)); + CNTR_INCR(&(heap->numAlloc), nbytesAlloc); + } +#endif + +#ifdef VERBOSE +{ + int i; + SayDebug ("Generation 1 before MinorGC:\n"); + for (i = 0; i < NUM_ARENAS; i++) { + SayDebug (" %s: base = %p, oldTop = %p, nextw = %p\n", + ArenaName[i+1], gen1->arena[i]->tospBase, + gen1->arena[i]->oldTop, gen1->arena[i]->nextw); + } +} +#endif + +#ifdef CHECK_HEAP + CheckBIBOP (heap); +#endif + + /* scan the standard roots */ + { + ml_val_t *rp; + Addr_t allocBase = (Addr_t)heap->allocBase; + Addr_t allocSz = heap->allocSzB; + + while ((rp = *roots++) != NIL(ml_val_t *)) { + MinorGC_CheckWord(allocBase, allocSz, gen1, rp); + } + } + + /* Scan the store list */ +#ifdef MP_SUPPORT + { + ml_val_t stl; + int i; + ml_state_t *msp; + vproc_state_t *vsp; + + for (i = 0; i < MAX_NUM_PROCS; i++) { + vsp = VProc[i]; + msp = vsp->vp_state; + if ((vsp->vp_mpState == MP_PROC_RUNNING) + && ((stl = msp->ml_storePtr) != STL_nil)) { + MinorGC_ScanStoreList (heap, stl); + msp->ml_storePtr = STL_nil; + } + } + } +#else + { + ml_val_t stl = msp->ml_storePtr; + if (stl != STL_nil) { + MinorGC_ScanStoreList (heap, stl); + msp->ml_storePtr = STL_nil; + } + } +#endif + + /* Sweep the first generation to-space */ + MinorGC_SweepToSpace (gen1); + heap->numMinorGCs++; + + /* Handle weak pointers */ + if (heap->weakList != NIL(ml_val_t *)) + ScanWeakPtrs (heap); + +#ifdef VERBOSE +{ + int i; + SayDebug ("Generation 1 after MinorGC:\n"); + for (i = 0; i < NUM_ARENAS; i++) { + SayDebug (" %s: base = %p, oldTop = %p, nextw = %p\n", + ArenaName[i+1], gen1->arena[i]->tospBase, + gen1->arena[i]->oldTop, gen1->arena[i]->nextw); + } +} +#endif + +#ifdef GC_STATS + { + int nbytes; + + nbytesCopied = 0; + for (i = 0; i < NUM_ARENAS; i++) { + nbytes = ((Word_t)(gen1->arena[i]->nextw) - gen1Top[i]); + nbytesCopied += nbytes; + CNTR_INCR(&(heap->numCopied[0][i]), nbytes); + } + } +#endif + +#ifdef CHECK_HEAP + CheckHeap(heap, 1); +#endif + +} /* end of MinorGC. */ + + +/* MinorGC_ScanStoreList: + * + * Scan the store list. The store list pointer (stl) is guaranteed to + * be non-null. + */ +PVT void MinorGC_ScanStoreList (heap_t *heap, ml_val_t stl) +{ + ml_val_t *addr, w; + gen_t *gen1 = heap->gen[0]; + bibop_t bibop = BIBOP; +#ifdef GC_STATS + int nUpdates = 0; +#endif + + /* Scan the store list */ + do { +#ifdef GC_STATS + nUpdates++; +#endif + addr = STL_hd(stl); + stl = STL_tl(stl); + w = *addr; + if (isBOXED(w)) { + aid_t srcId = ADDR_TO_PAGEID(bibop, addr); + /* We can ignore updates to objects in new-space, and to references + * in the runtime system references (ie, UNMAPPED) + */ + if ((srcId != AID_NEW) && (! isUNMAPPED(srcId))) { + /* srcGen is the generation of the updated cell; dstGen is the + * generation of the object that the cell points to. + */ + int srcGen = EXTRACT_GEN(srcId); + aid_t dstId = ADDR_TO_PAGEID(bibop, w); + int dstGen = EXTRACT_GEN(dstId); + + if (IS_BIGOBJ_AID(dstId)) { + int i; + bigobj_region_t *region; + bigobj_desc_t *dp; + if (dstGen >= srcGen) + continue; + /* find the beginning of the region containing the code object */ + i = BIBOP_ADDR_TO_INDEX(w); + while (! BO_IS_HDR(dstId)) { + --i; + dstId = INDEX_TO_PAGEID(bibop, i); + } + region = (bigobj_region_t *)BIBOP_INDEX_TO_ADDR(i); + dp = ADDR_TO_BODESC(region, w); + dstGen = dp->gen; + } + else { + if (dstGen == ALLOC_GEN) { + /* The refered to object is in allocation space, and will be + * forwarded to the first generation. + */ + dstGen = 1; + *addr = MinorGC_ForwardObj(gen1, w); + } + } + if (srcGen > dstGen) { + /* mark the card containing "addr" */ +#ifndef BIT_CARDS + MARK_CARD(heap->gen[srcGen-1]->dirty, addr, dstGen); +#else + MARK_CARD(heap->gen[srcGen-1]->dirty, addr); +#endif + } + } + } + } while (stl != STL_nil); + +#ifdef GC_STATS + numUpdates += nUpdates; +#endif + +} /* end MinorGC_ScanStoreList */ + + +/* MinorGC_SweepToSpace: + * + * Sweep the first generation's to-space. Note, that since there are + * no younger objects, we don't have to do anything special for the + * array space. + */ +PVT void MinorGC_SweepToSpace (gen_t *gen1) +{ + Addr_t allocBase = (Addr_t)gen1->heap->allocBase; + Addr_t allocSz = gen1->heap->allocSzB; + bool_t swept; + +#define MinorGC_SweepToSpArena(indx) { \ + arena_t *__ap = gen1->arena[(indx)]; \ + ml_val_t *__p, *__q; \ + __p = __ap->sweep_nextw; \ + if (__p < __ap->nextw) { \ + swept = TRUE; \ + do { \ + for (__q = __ap->nextw; __p < __q; __p++) { \ + MinorGC_CheckWord(allocBase, allocSz, gen1, __p); \ + } \ + } while (__q != __ap->nextw); \ + __ap->sweep_nextw = __q; \ + } \ + } /* MinorGC_SweepToSpArena */ + + do { + swept = FALSE; + + /* Sweep the record, pair and array arenas */ + MinorGC_SweepToSpArena(RECORD_INDX); + MinorGC_SweepToSpArena(PAIR_INDX); + MinorGC_SweepToSpArena(ARRAY_INDX); + + } while (swept); + +} /* end of MinorGC_SweepToSpace. */ + +/* MinorGC_ForwardObj: + * + * Forward an object from the allocation space to the first generation. + */ +PVT ml_val_t MinorGC_ForwardObj (gen_t *gen1, ml_val_t v) +{ + ml_val_t *obj = PTR_MLtoC(ml_val_t, v); + ml_val_t *new_obj, desc; + Word_t len; + arena_t *arena; + + desc = obj[-1]; + switch (GET_TAG(desc)) { + case DTAG_record: + len = GET_LEN(desc); +#ifdef NO_PAIR_STRIP + arena = gen1->arena[RECORD_INDX]; +#else + if (len == 2) { + arena = gen1->arena[PAIR_INDX]; + new_obj = arena->nextw; + arena->nextw += 2; + new_obj[0] = obj[0]; + new_obj[1] = obj[1]; + /* setup the forward pointer in the old pair */ + obj[-1] = DESC_forwarded; + obj[0] = (ml_val_t)(Addr_t)new_obj; + return PTR_CtoML(new_obj); + } + else + arena = gen1->arena[RECORD_INDX]; +#endif + break; + case DTAG_vec_hdr: + case DTAG_arr_hdr: + len = 2; + arena = gen1->arena[RECORD_INDX]; + break; + case DTAG_arr_data: + len = GET_LEN(desc); + arena = gen1->arena[ARRAY_INDX]; + break; +/* 64BIT: on 64-bit machines, we can treat DTAG_raw and DTAG_raw64 the same */ + case DTAG_raw: + len = GET_LEN(desc); + arena = gen1->arena[STRING_INDX]; + break; + case DTAG_raw64: + len = GET_LEN(desc); + arena = gen1->arena[STRING_INDX]; +#ifdef ALIGN_REALDS +# ifdef CHECK_HEAP + if (((Addr_t)arena->nextw & WORD_SZB) == 0) { + *(arena->nextw) = (ml_val_t)0; + arena->nextw++; + } +# else + arena->nextw = (ml_val_t *)(((Addr_t)arena->nextw) | WORD_SZB); +# endif +#endif + break; + case DTAG_special: + return MinorGC_FwdSpecial (gen1, obj, desc); + case DTAG_forward: + return PTR_CtoML(FOLLOW_FWDOBJ(obj)); + default: + Die ("bad object tag %d, obj = %p, desc = %p", GET_TAG(desc), obj, desc); + } /* end of switch */ + + /* Allocate and initialize a to-space copy of the object */ + new_obj = arena->nextw; + arena->nextw += (len + 1); + *new_obj++ = desc; + ASSERT(arena->nextw <= arena->tospTop); + + COPYLOOP(obj, new_obj, len); + + /* set up the forward pointer, and return the new object. */ + obj[-1] = DESC_forwarded; + obj[0] = (ml_val_t)(Addr_t)new_obj; + + return PTR_CtoML(new_obj); + +} /* end of MinorGC_ForwardObj */ + + +/* MinorGC_FwdSpecial: + * + * Forward a special object (suspension, weak pointer, ...). + */ +PVT ml_val_t MinorGC_FwdSpecial (gen_t *gen1, ml_val_t *obj, ml_val_t desc) +{ + arena_t *arena = gen1->arena[ARRAY_INDX]; + ml_val_t *new_obj = arena->nextw; + + arena->nextw += SPECIAL_SZW; /* all specials are two words */ + + switch (GET_LEN(desc)) { + case SPCL_evaled_susp: + case SPCL_unevaled_susp: + *new_obj++ = desc; + *new_obj = *obj; + break; + case SPCL_weak: { + ml_val_t v = *obj; +#ifdef DEBUG_WEAK_PTRS +SayDebug ("MinorGC: weak [%p ==> %p] --> %p", obj, new_obj+1, v); +#endif + if (! isBOXED(v)) { +#ifdef DEBUG_WEAK_PTRS +SayDebug (" unboxed\n"); +#endif + /* weak references to unboxed objects are never nullified */ + *new_obj++ = DESC_weak; + *new_obj = v; + } + else { + aid_t aid = ADDR_TO_PAGEID(BIBOP, v); + ml_val_t *vp = PTR_MLtoC(ml_val_t, v); + + if (aid == AID_NEW) { + if (vp[-1] == DESC_forwarded) { + /* Reference to an object that has already been forwarded. + * NOTE: we have to put the pointer to the non-forwarded + * copy of the object (i.e, v) into the to-space copy + * of the weak pointer, since the GC has the invariant + * it never sees to-space pointers during sweeping. + */ +#ifdef DEBUG_WEAK_PTRS +SayDebug (" already forwarded to %p\n", PTR_CtoML(FOLLOW_FWDOBJ(vp))); +#endif + *new_obj++ = DESC_weak; + *new_obj = v; + } + else { + /* the forwarded version of weak objects are threaded + * via their descriptor fields. We mark the object + * reference field to make it look like an unboxed value, + * so that the to-space sweeper does not follow the weak + * reference. + */ +#ifdef DEBUG_WEAK_PTRS +SayDebug (" forward\n"); +#endif + *new_obj = MARK_PTR(PTR_CtoML(gen1->heap->weakList)); + gen1->heap->weakList = new_obj++; + *new_obj = MARK_PTR(vp); + } + } + else { + /* reference to an older object */ +#ifdef DEBUG_WEAK_PTRS +SayDebug (" old object\n"); +#endif + *new_obj++ = DESC_weak; + *new_obj = v; + } + } + } break; + case SPCL_null_weak: /* shouldn't happen in the allocation arena */ + default: + Die ("strange/unexpected special object @ %p; desc = %p\n", obj, desc); + } /* end of switch */ + + obj[-1] = DESC_forwarded; + obj[0] = (ml_val_t)(Addr_t)new_obj; + + return PTR_CtoML(new_obj); + +} /* end of MinorGC_FwdSpecial */ diff --git a/base/runtime/gc/ml-heap-image.h b/base/runtime/gc/ml-heap-image.h new file mode 100644 index 0000000..1731282 --- /dev/null +++ b/base/runtime/gc/ml-heap-image.h @@ -0,0 +1,163 @@ +/* ml-heap-image.h + * + * COPYRIGHT (c) 1992 by AT&T Bell Laboratories. + * + * The definitions and typedefs that describe the layout of an ML + * heap image in a file. This can be either an exported heap, or + * a blasted object. + * + * These files have the following basic layout: + * + * Image header + * Heap/Blast header + * External reference table + * Image + * + * where the format of Image depends on the kind (heap vs. blast). + */ + +#ifndef _ML_IMAGE_ +#define _ML_IMAGE_ + +#ifndef _ML_SIZES_ +#include "ml-sizes.h" +#endif + +#ifndef _ML_STATE_ +#include "ml-state.h" +#endif + +#ifndef _HEAP_ +#include "heap.h" +#endif + +/* tag to identify image byte order */ +#define ORDER 0x00112233 + +/* heap image version identifier (date in mmddyyyy form) */ +#define IMAGE_MAGIC 0x09082004 + +/* blasted heap image version identifier (date in 00mmddyy form) */ +#define BLAST_MAGIC 0x00070995 + +/* the kind of heap image */ +#define EXPORT_HEAP_IMAGE 1 +#define EXPORT_FN_IMAGE 2 +#define BLAST_IMAGE 3 +#define BLAST_UNBOXED 4 /* a blasted unboxed value */ + +typedef struct { /* The magic number, and other version info */ + Unsigned32_t byteOrder; /* ORDER tag */ + Unsigned32_t magic; /* magic number */ + Unsigned32_t kind; /* EXPORT_HEAP_IMAGE, etc. */ + char arch[12]; /* the exporting machine's architecture */ + char opsys[12]; /* the exporting machine's operating system */ +} ml_image_hdr_t; + + +typedef struct { /* The header for a heap image */ + int numVProcs; /* The number of virtual processors */ + int numGens; /* The number of heap generations */ + int numArenas; /* The number of small-object arenas (one per kind) */ + int numBOKinds; /* The number of big-object kinds */ + int numBORegions; /* The number of big-object regions in the */ + /* exporting address space. */ + int cacheGen; /* The oldest cached generation */ + Addr_t allocSzB; /* The size of the allocation arena */ + /* heap objects that are referred to by the runtime */ + ml_val_t pervStruct; /* the contents of PervStruct */ + ml_val_t runTimeCompUnit; /* The run-time system compilation unit root */ + ml_val_t mathVec; /* The Math structure root (if defined) */ +} ml_heap_hdr_t; + +typedef struct { /* The header for a blasted object image */ + Unsigned32_t numArenas; /* The number of small-object arenas (one per kind) */ + Unsigned32_t numBOKinds; /* The number of big-object kinds */ + Unsigned32_t numBORegions;/* The number of big-object regions in the */ + /* exporting address space. */ + bool_t hasCode; /* true, if the blasted object contains code */ + ml_val_t rootObj; /* The root object */ +} ml_blast_hdr_t; + +typedef struct { /* The header for the extern table */ + int numExterns; /* The number of external symbols */ + int externSzB; /* The size (in bytes) of the string table area. */ +} extern_tbl_hdr_t; + + +typedef struct { /* The image of an ML virtual processor. The live */ + /* registers are those specified by RET_MASK, plus */ + /* the varReg, exnCont and pc. */ + ml_val_t sigHandler; /* the contents of MLSignalHandler */ + ml_val_t stdArg; + ml_val_t stdCont; + ml_val_t stdClos; + ml_val_t pc; + ml_val_t exnCont; + ml_val_t varReg; + ml_val_t calleeSave[CALLEESAVE]; +} ml_vproc_image_t; + + +/* The heap header consists of numGens generation descriptions, each of which + * consists of (numArenas+numBOKinds) heap_arena_hdr_t records. After the + * generation descriptors, there are numBORegions bo_region_info_t records, + * which are followed by the page aligned heap image follows the heap header. + */ + +typedef struct { /* An arena header. This is used for both the regular */ + /* arenas and the big-object arena of a generation. */ + int gen; /* the generation of this arena */ + int objKind; /* the kind of objects in this arena */ + Addr_t offset; /* the file position at which this arena starts. */ + union { /* additional info */ + struct { /* info for regular arenas */ + Addr_t baseAddr; /* the base address of this arena in the */ + /* exporting address space. */ + Addr_t sizeB; /* the size of the live data in this arena */ + Addr_t roundedSzB; /* the padded size of this arena in the */ + /* image file */ + } o; + struct { /* info for the big-object arena */ + int numBigObjs; /* the number of big-objects in this */ + /* generation. */ + int numBOPages; /* the number of big-object pages required. */ + } bo; + } info; +} heap_arena_hdr_t; + +typedef struct { /* a descriptor of a big-object region in the */ + /* exporting address space */ + Addr_t baseAddr; /* the base address of this big-object region in */ + /* the exporting address space. Note that this */ + /* is the address of the header, not of the */ + /* first page. */ + Addr_t firstPage; /* the address of the first page of the region in */ + /* the exporting address space. */ + Addr_t sizeB; /* the total size of this big-object region */ + /* (including the header). */ +} bo_region_info_t; + +typedef struct { /* a header for a big-object */ + int gen; /* the generation of this big-object */ + int objKind; /* the class of this big-object */ + Addr_t baseAddr; /* the base address of this big-object in the */ + /* exporting address space */ + Addr_t sizeB; /* the size of this big-object */ +} bigobj_hdr_t; + + +/** external references **/ +#define isEXTERNTAG(w) (isDESC(w) && (GET_TAG(w) == DTAG_extern)) +#define EXTERNID(w) GET_LEN(w) + +/** Pointer tagging operations **/ +#define HIO_ID_BITS 8 +#define HIO_ADDR_BITS (BITS_PER_WORD-HIO_ID_BITS) +#define HIO_ADDR_MASK (((Addr_t)1 << HIO_ADDR_BITS) - 1) + +#define HIO_TAG_PTR(id,offset) PTR_CtoML(((Addr_t)(id) << HIO_ADDR_BITS)|(Addr_t)(offset)) +#define HIO_GET_ID(p) (PTR_MLtoADDR(p)>>HIO_ADDR_BITS) +#define HIO_GET_OFFSET(p) (PTR_MLtoADDR(p) & HIO_ADDR_MASK) + +#endif /* !_ML_IMAGE_ */ diff --git a/base/runtime/gc/ml-objects.c b/base/runtime/gc/ml-objects.c new file mode 100644 index 0000000..f038542 --- /dev/null +++ b/base/runtime/gc/ml-objects.c @@ -0,0 +1,541 @@ +/*! \file ml-objects.c + * + * \author John Reppy + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Code to allocate and manipulate ML objects. + * + * MP Note: when invoking the GC, we add the requested size to reqSizeB, + * so that multiple processors can request space at the same time. + */ + +#include "ml-base.h" +#include "heap.h" +#include "ml-objects.h" +#include "ml-limits.h" +#include "ml-mp.h" +#include + +/* A macro to check for necessary GC; on MP systems, this needs to be + * a loop, since other processors may steal the memory before the + * checking processor can use it. + */ +#ifdef MP_SUPPORT +#define IFGC(ap, szb) \ + while ((! isACTIVE(ap)) || (AVAIL_SPACE(ap) <= (szb))) +#else +#define IFGC(ap, szb) \ + if ((! isACTIVE(ap)) || (AVAIL_SPACE(ap) <= (szb))) +#endif + +#ifdef COLLECT_STATS +#define COUNT_ALLOC(msp, nbytes) { \ + heap_t *__h = msp->ml_heap; \ + CNTR_INCR(&(__h->numAlloc), (nbytes)); \ + } +#else +#define COUNT_ALLOC(msp, nbytes) /* null */ +#endif + + +/* ML_CString: + * + * Allocate an ML string using a C string as an initializer. We assume + * that the string is small and can be allocated in the allocation + * arena. + */ +ml_val_t ML_CString (ml_state_t *msp, const char *v) +{ + int len = ((v == NIL(char *)) ? 0 : strlen(v)); + + if (len == 0) + return ML_string0; + else { + Word_t n = BYTES_TO_WORDS(len+1); /* count "\0" too */ + ml_val_t res; + + res = ML_AllocRaw (msp, n); + /* zero the last word to allow fast (word) string comparisons, and to + * guarantee 0 termination. + */ + PTR_MLtoC(Word_t, res)[n-1] = 0; + strcpy (PTR_MLtoC(char, res), v); + + SEQHDR_ALLOC (msp, res, DESC_string, res, len); + + return res; + } + +} /* end of ML_CString */ + +/* ML_CStringList: + * + * Given a NIL terminated array of char *, build a list of ML strings. + */ +ml_val_t ML_CStringList (ml_state_t *msp, char **strs) +{ +/** NOTE: we should do something about possible GC!!! **/ + int i; + ml_val_t p, s; + + for (i = 0; strs[i] != NIL(char *); i++) + continue; + + p = LIST_nil; + while (i-- > 0) { + s = ML_CString(msp, strs[i]); + LIST_cons(msp, p, s, p); + } + + return p; + +} /* end of ML_CStringList */ + +/* ML_AllocString: + * + * Allocate an uninitialized ML string of length > 0. This string is + * guaranteed to be padded to word size with 0 bytes, and to be 0 terminated. + */ +ml_val_t ML_AllocString (ml_state_t *msp, Word_t len) +{ + Word_t nwords = BYTES_TO_WORDS(len+1); + ml_val_t res; + + ASSERT(len > 0); + + res = ML_AllocRaw (msp, nwords); + + /* zero the last word to allow fast (word) string comparisons, and to + * guarantee 0 termination. + */ + PTR_MLtoC(Word_t, res)[nwords-1] = 0; + + SEQHDR_ALLOC (msp, res, DESC_string, res, len); + + return res; + +} /* end of ML_AllocString. */ + +/* ML_AllocRaw: + * + * Allocate an uninitialized chunk of raw data. + */ +ml_val_t ML_AllocRaw (ml_state_t *msp, Word_t nwords) +{ + ml_val_t desc = MAKE_DESC(nwords, DTAG_raw); + ml_val_t res; + Word_t szb; + + ASSERT(nwords > 0); + + if (nwords > SMALL_OBJ_SZW) { + arena_t *ap = msp->ml_heap->gen[0]->arena[STRING_INDX]; + + szb = WORD_SZB*(nwords + 1); + BEGIN_CRITICAL_SECT(MP_GCGenLock) + IFGC (ap, szb+msp->ml_heap->allocSzB) { + /* we need to do a GC */ + ap->reqSizeB += szb; + RELEASE_LOCK(MP_GCGenLock); + InvokeGC (msp, 1); + ACQUIRE_LOCK(MP_GCGenLock); + ap->reqSizeB = 0; + } + *(ap->nextw++) = desc; + res = PTR_CtoML(ap->nextw); + ap->nextw += nwords; + ASSERT(ap->nextw < ap->tospTop); + END_CRITICAL_SECT(MP_GCGenLock) + COUNT_ALLOC(msp, szb); + } + else { + ML_AllocWrite (msp, 0, desc); + res = ML_Alloc (msp, nwords); + } + + return res; + +} /* end of ML_AllocRaw. */ + +/* ML_ShrinkRaw: + * + * Shrink a freshly allocated raw-data vector. This is used by the input routines + * that must allocate space for input that may be excessive. + */ +void ML_ShrinkRaw (ml_state_t *msp, ml_val_t v, Word_t nWords) +{ + int oldNWords = OBJ_LEN(v); + + if (nWords == oldNWords) + return; + + ASSERT((nWords > 0) && (nWords < oldNWords)); + + if (oldNWords > SMALL_OBJ_SZW) { + arena_t *ap = msp->ml_heap->gen[0]->arena[STRING_INDX]; + ASSERT(ap->nextw - oldNWords == PTR_MLtoC(ml_val_t, v)); + ap->nextw -= (oldNWords - nWords); + } + else { + ASSERT(msp->ml_allocPtr - oldNWords == PTR_MLtoC(ml_val_t, v)); + msp->ml_allocPtr -= (oldNWords - nWords); + } + + PTR_MLtoC(ml_val_t, v)[-1] = MAKE_DESC(nWords, DTAG_raw); + +} /* end of ML_ShrinkRaw */ + +/* ML_AllocRaw64: + * + * Allocate an uninitialized chunk of 64-bit aligned raw data. + */ +ml_val_t ML_AllocRaw64 (ml_state_t *msp, Word_t nelems) +{ + Word_t nwords = DOUBLES_TO_WORDS(nelems); + ml_val_t desc = MAKE_DESC(nwords, DTAG_raw64); + ml_val_t res; + Word_t szb; + + if (nwords > SMALL_OBJ_SZW) { + arena_t *ap = msp->ml_heap->gen[0]->arena[STRING_INDX]; + szb = WORD_SZB*(nwords + 1); +#ifdef ALIGN_REALDS + szb += WORD_SZB; /* alignment padding */ +#endif + BEGIN_CRITICAL_SECT(MP_GCGenLock) + IFGC (ap, szb+msp->ml_heap->allocSzB) { + /* we need to do a GC */ + ap->reqSizeB += szb; + RELEASE_LOCK(MP_GCGenLock); + InvokeGC (msp, 1); + ACQUIRE_LOCK(MP_GCGenLock); + ap->reqSizeB = 0; + } +#ifdef ALIGN_REALDS + /* Force REALD_SZB alignment (descriptor is off by one word) */ +# ifdef CHECK_HEAP + if (((Addr_t)ap->nextw & WORD_SZB) == 0) { + *(ap->nextw) = (ml_val_t)0; + ap->nextw++; + } +# else + ap->nextw = (ml_val_t *)(((Addr_t)ap->nextw) | WORD_SZB); +# endif +#endif + *(ap->nextw++) = desc; + res = PTR_CtoML(ap->nextw); + ap->nextw += nwords; + END_CRITICAL_SECT(MP_GCGenLock) + COUNT_ALLOC(msp, szb); + } + else { +#ifdef ALIGN_REALDS + /* Force REALD_SZB alignment */ + msp->ml_allocPtr = (ml_val_t *)((Addr_t)(msp->ml_allocPtr) | WORD_SZB); +#endif + ML_AllocWrite (msp, 0, desc); + res = ML_Alloc (msp, nwords); + } + + return res; + +} /* end of ML_AllocRaw64 */ + +/* ML_AllocCode: + * + * Allocate an uninitialized ML code object. Assume that len > 1. + */ +ml_val_t ML_AllocCode (ml_state_t *msp, Word_t len) +{ + heap_t *heap = msp->ml_heap; + int allocGen = (heap->numGens < CODE_ALLOC_GEN) + ? heap->numGens + : CODE_ALLOC_GEN; + gen_t *gen = heap->gen[allocGen-1]; + bigobj_desc_t *dp; + + BEGIN_CRITICAL_SECT(MP_GCGenLock) + dp = BO_Alloc (heap, allocGen, len); + ASSERT(dp->gen == allocGen); + dp->next = gen->bigObjs[CODE_INDX]; + gen->bigObjs[CODE_INDX] = dp; + dp->objc = CODE_INDX; + COUNT_ALLOC(msp, len); + END_CRITICAL_SECT(MP_GCGenLock) + + return PTR_CtoML(dp->obj); + +} /* end of ML_AllocCode. */ + +/* ML_AllocBytearray: + * + * Allocate an uninitialized ML bytearray. Assume that len > 0. + */ +ml_val_t ML_AllocBytearray (ml_state_t *msp, Word_t len) +{ + Word_t nwords = BYTES_TO_WORDS(len); + ml_val_t res; + + res = ML_AllocRaw (msp, nwords); + + /* zero the last word to allow fast (word) string comparisons, and to + * guarantee 0 termination. + */ + PTR_MLtoC(Word_t, res)[nwords-1] = 0; + + SEQHDR_ALLOC (msp, res, DESC_word8arr, res, len); + + return res; + +} /* end of ML_AllocBytearray. */ + +/* ML_AllocRealdarray: + * + * Allocate an uninitialized ML realarray. Assume that len > 0. + */ +ml_val_t ML_AllocRealdarray (ml_state_t *msp, Word_t len) +{ + ml_val_t res; + + res = ML_AllocRaw64 (msp, len); + + SEQHDR_ALLOC (msp, res, DESC_real64arr, res, len); + + return res; + +} /* end of ML_AllocRealdarray. */ + +/* ML_AllocArrayData: + * + * Allocate a mutable data array using initVal as an initial value. Assume + * that len > 0. + */ +ml_val_t ML_AllocArrayData (ml_state_t *msp, Word_t len, ml_val_t initVal) +{ + ml_val_t res, *p; + ml_val_t desc = MAKE_DESC(len, DTAG_arr_data); + int i; + Word_t szb; + + if (len > SMALL_OBJ_SZW) { + arena_t *ap = msp->ml_heap->gen[0]->arena[ARRAY_INDX]; + int gcLevel = (isBOXED(initVal) ? 0 : -1); + + szb = WORD_SZB*(len + 1); + BEGIN_CRITICAL_SECT(MP_GCGenLock) +#ifdef MP_SUPPORT + checkGC:; /* the MP version jumps to here to recheck for GC */ +#endif + if (! isACTIVE(ap) + || (AVAIL_SPACE(ap) <= szb+msp->ml_heap->allocSzB)) + gcLevel = 1; + if (gcLevel >= 0) { + /* we need to do a GC (and preserve initVal) */ + ml_val_t root = initVal; + ap->reqSizeB += szb; + RELEASE_LOCK(MP_GCGenLock); + InvokeGCWithRoots (msp, gcLevel, &root, NIL(ml_val_t *)); + initVal = root; + ACQUIRE_LOCK(MP_GCGenLock); + ap->reqSizeB = 0; +#ifdef MP_SUPPORT + /* check again to insure that we have sufficient space */ + gcLevel = -1; + goto checkGC; +#endif + } + ASSERT(ap->nextw == ap->sweep_nextw); + *(ap->nextw++) = desc; + res = PTR_CtoML(ap->nextw); + ap->nextw += len; + ap->sweep_nextw = ap->nextw; + END_CRITICAL_SECT(MP_GCGenLock) + COUNT_ALLOC(msp, szb); + } + else { + ML_AllocWrite (msp, 0, desc); + res = ML_Alloc (msp, len); + } + + for (p = PTR_MLtoC(ml_val_t, res), i = 0; i < len; i++) { + *p++ = initVal; + } + + return res; + +} /* end of ML_AllocArrayData. */ + +/* ML_AllocArray: + * + * Allocate an ML array using initVal as an initial value. Assume + * that len > 0. + */ +ml_val_t ML_AllocArray (ml_state_t *msp, Word_t len, ml_val_t initVal) +{ + ml_val_t res; + + res = ML_AllocArrayData (msp, len, initVal); + + SEQHDR_ALLOC (msp, res, DESC_polyarr, res, len); + + return res; + +} /* end of ML_AllocArray. */ + +/* ML_AllocVector: + * + * Allocate an ML vector, using the list initVal as an initializer. + * Assume that len > 0. + */ +ml_val_t ML_AllocVector (ml_state_t *msp, Word_t len, ml_val_t initVal) +{ + ml_val_t desc = MAKE_DESC(len, DTAG_vec_data); + ml_val_t res, *p; + + if (len > SMALL_OBJ_SZW) { + /* Since we want to avoid pointers from the 1st generation record space + * into the allocation space, we need to do a GC (and preserve initVal) + */ + arena_t *ap = msp->ml_heap->gen[0]->arena[RECORD_INDX]; + ml_val_t root = initVal; + int gcLevel = 0; + Word_t szb; + + szb = WORD_SZB*(len + 1); + BEGIN_CRITICAL_SECT(MP_GCGenLock) + if (! isACTIVE(ap) + || (AVAIL_SPACE(ap) <= szb+msp->ml_heap->allocSzB)) + gcLevel = 1; +#ifdef MP_SUPPORT + checkGC:; /* the MP version jumps to here to redo the GC */ +#endif + ap->reqSizeB += szb; + RELEASE_LOCK(MP_GCGenLock); + InvokeGCWithRoots (msp, gcLevel, &root, NIL(ml_val_t *)); + initVal = root; + ACQUIRE_LOCK(MP_GCGenLock); + ap->reqSizeB = 0; +#ifdef MP_SUPPORT + /* check again to insure that we have sufficient space */ + if (AVAIL_SPACE(ap) <= szb+msp->ml_heap->allocSzB) + goto checkGC; +#endif + ASSERT(ap->nextw == ap->sweep_nextw); + *(ap->nextw++) = desc; + res = PTR_CtoML(ap->nextw); + ap->nextw += len; + ap->sweep_nextw = ap->nextw; + END_CRITICAL_SECT(MP_GCGenLock) + COUNT_ALLOC(msp, szb); + } + else { + ML_AllocWrite (msp, 0, desc); + res = ML_Alloc (msp, len); + } + + for ( + p = PTR_MLtoC(ml_val_t, res); + initVal != LIST_nil; + initVal = LIST_tl(initVal) + ) + *p++ = LIST_hd(initVal); + + SEQHDR_ALLOC (msp, res, DESC_polyvec, res, len); + + return res; + +} /* end of ML_AllocVector. */ + + +/* ML_SysConst: + * + * Find the system constant with the given id in tbl, and allocate a pair + * to represent it. If the constant is not present, then return the + * pair (~1, ""). + */ +ml_val_t ML_SysConst (ml_state_t *msp, sysconst_tbl_t *tbl, int id) +{ + ml_val_t name, res; + int i; + + for (i = 0; i < tbl->numConsts; i++) { + if (tbl->consts[i].id == id) { + name = ML_CString (msp, tbl->consts[i].name); + REC_ALLOC2 (msp, res, INT_CtoML(id), name); + return res; + } + } + /* here, we did not find the constant */ + name = ML_CString (msp, ""); + REC_ALLOC2 (msp, res, INT_CtoML(-1), name); + return res; + +} /* end of ML_SysConst */ + + +/* ML_SysConstList: + * + * Generate a list of system constants from the given table. + */ +ml_val_t ML_SysConstList (ml_state_t *msp, sysconst_tbl_t *tbl) +{ + int i; + ml_val_t name, sysConst, list; + Addr_t availSpace, reqSpace; + + availSpace = ((size_t)msp->ml_limitPtr - (size_t)msp->ml_allocPtr); + for (list = LIST_nil, i = tbl->numConsts; --i >= 0; ) { + /* required space for string header+data (4 words + string bytes), pair (3 words), + * cons (3 words). + */ + reqSpace = (4 + 3 + 3) * WORD_SZB + BYTES_TO_WORDS(strlen(tbl->consts[i].name) + 1); + if (reqSpace >= availSpace) { + InvokeGCWithRoots (msp, 0, (ml_val_t *)&list, NIL(ml_val_t *)); + availSpace = ((size_t)msp->ml_limitPtr - (size_t)msp->ml_allocPtr); + } + name = ML_CString (msp, tbl->consts[i].name); + REC_ALLOC2 (msp, sysConst, INT_CtoML(tbl->consts[i].id), name); + LIST_cons(msp, list, sysConst, list); + availSpace -= reqSpace; + } + + return list; + +} /* end of ML_SysConstList */ + + +/* ML_AllocCData: + * + * Allocate a 64-bit aligned raw data object (to store abstract C data). + */ +ml_val_t ML_AllocCData (ml_state_t *msp, Word_t nbytes) +{ + ml_val_t obj; + + obj = ML_AllocRaw64 (msp, (nbytes+7) >> 3); + + return obj; + +} /* end of ML_AllocCData */ + + +/* ML_CData: + * + * Allocate a 64-bit aligned raw data object and initialize it to the given C data. + */ +ml_val_t ML_CData (ml_state_t *msp, void *data, Word_t nbytes) +{ + ml_val_t obj; + + if (nbytes == 0) + return ML_unit; + else { + obj = ML_AllocRaw64 (msp, (nbytes+7) >> 3); + memcpy (PTR_MLtoC(void, obj), data, nbytes); + + return obj; + } + +} /* end of ML_CData */ diff --git a/base/runtime/gc/mp-gc.c b/base/runtime/gc/mp-gc.c new file mode 100644 index 0000000..019c3d8 --- /dev/null +++ b/base/runtime/gc/mp-gc.c @@ -0,0 +1,267 @@ +/* mp-gc.c + * + * COPYRIGHT (c) 1994 by AT&T Bell Laboratories. + * + * Extra routines to support GC in the MP implementation. + * + */ + +#include +#include +#include "ml-base.h" +#include "ml-limits.h" +#include "memory.h" +#include "ml-state.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "cntr.h" +#include "heap.h" +#include "heap-monitor.h" +#include "ml-globals.h" +#include "ml-timer.h" +#include "gc-stats.h" +#include "ml-mp.h" +#include "vproc-state.h" + +/* MP_SUPPORT */ + +/* PartitionAllocArena: + * + * Divide this allocation arena into smaller disjoint arenas for + * use by the parallel processors. + */ +void PartitionAllocArena (vproc_state_t *vsps[]) +{ + int indivSz; + ml_val_t *aBase; + int i; + int pollFreq = INT_MLtoC(DEREF(PollFreq)); + ml_state_t *msp, *msp0; + + msp0 = vsps[0]->vp_state; + indivSz = msp0->ml_heap->allocSzB / MAX_NUM_PROCS; + aBase = msp0->ml_heap->allocBase; + for (i = 0; i < MAX_NUM_PROCS; i++) { + msp = vsps[i]->vp_state; +#ifdef MP_DEBUG +SayDebug ("vsps[%d]->vp_state-> (ml_allocPtr %x/ml_limitPtr %x) changed to ", +i, msp->ml_allocPtr, msp->ml_limitPtr); +#endif + msp->ml_heap = msp0->ml_heap; + msp->ml_allocPtr = aBase; + msp->ml_realLimit = HEAP_LIMIT_SIZE(aBase, indivSz); + +#ifdef MP_GCPOLL + if (pollFreq > 0) { +#ifdef MP_DEBUG +SayDebug ("(with PollFreq=%d) ", pollFreq); +#endif + msp->ml_limitPtr = aBase + pollFreq*POLL_GRAIN_CPSI; + msp->ml_limitPtr = + (msp->ml_limitPtr > msp->ml_realLimit) + ? msp->ml_realLimit + : msp->ml_limitPtr; + + } + else { + msp->ml_limitPtr = msp->ml_realLimit; + } +#else + msp->ml_limitPtr = HEAP_LIMIT_SIZE(aBase,indivSz); +#endif + +#ifdef MP_DEBUG +SayDebug ("%x/%x\n",msp->ml_allocPtr, msp->ml_limitPtr); +#endif + aBase = (ml_val_t *) (((Addr_t) aBase) + indivSz); + } + +} /* end of PartitionAllocArena */ + + +PVT volatile int MP_RdyForGC = 0; /* the number of processors that are */ + /* ready for the GC. */ +PVT int MPCollectorProc; /* the processor that does the GC */ + +/* extra roots provided by InvokeGCWithRoots go here */ +ml_val_t *mpExtraRoots[NUM_EXTRA_ROOTS*MAX_NUM_PROCS]; +PVT ml_val_t **mpExtraRootsPtr; + +/* MP_StartCollect: + * + * Waits for all procs to check in and chooses one to do the + * collect (MPCollectorProc). MPCollectorProc returns to the invoking + * collection function and does the collect while the other procs + * wait at a barrier. MPCollectorProc will eventually check into this + * barrier releasing the waiting procs. + */ +int MP_StartCollect (ml_state_t *msp) +{ + int nProcs; + vproc_state_t *vsp = msp->ml_vproc; + + MP_SetLock(MP_GCLock); + if (MP_RdyForGC++ == 0) { + mpExtraRoots[0] = NIL(ml_val_t *); + mpExtraRootsPtr = mpExtraRoots; +#ifdef MP_GCPOLL + ASSIGN(PollEvent, ML_true); +#ifdef MP_DEBUG + SayDebug ("%d: set poll event\n", msp->ml_mpSelf); +#endif +#endif + /* we're the first one in, we'll do the collect */ + MPCollectorProc = vsp->vp_mpSelf; +#ifdef MP_DEBUG + SayDebug ("MPCollectorProc is %d\n",MPCollectorProc); +#endif + } + MP_UnsetLock(MP_GCLock); + + { +#ifdef MP_DEBUG + int n = 0; +#endif + /* nb: some other proc can be concurrently acquiring new processes */ + while (MP_RdyForGC != (nProcs = MP_ActiveProcs())) { + /* spin */ +#ifdef MP_DEBUG + if (n == 10000000) { + n = 0; + SayDebug ("%d spinning %d <> %d \n", + msp->ml_mpSelf, MP_RdyForGC, nProcs, msp->ml_allocPtr, + msp->ml_limitPtr); + } + else + n++; +#endif + } + } + + /* Here, all of the processors are ready to do GC */ + +#ifdef MP_GCPOLL + ASSIGN(PollEvent, ML_false); +#ifdef MP_DEBUG + SayDebug ("%d: cleared poll event\n", msp->ml_mpSelf); +#endif +#endif +#ifdef MP_DEBUG + SayDebug ("(%d) all %d/%d procs in\n", msp->ml_mpSelf, MP_RdyForGC, MP_ActiveProcs()); +#endif + if (MPCollectorProc != vsp->vp_mpSelf) { +#ifdef MP_DEBUG + SayDebug ("%d entering barrier %d\n",vsp->vp_mpSelf,nProcs); +#endif + MP_Barrier(MP_GCBarrier, nProcs); + +#ifdef MP_DEBUG + SayDebug ("%d left barrier\n", vsp->vp_mpSelf); +#endif + return 0; + } + + return nProcs; + +} /* end of MP_StartCollect */ + + +/* MP_StartCollectWithRoots: + * + * as above, but collects extra roots into mpExtraRoots + */ +int MP_StartCollectWithRoots (ml_state_t *msp, va_list ap) +{ + int nProcs; + ml_val_t *p; + vproc_state_t *vsp = msp->ml_vproc; + + MP_SetLock(MP_GCLock); + if (MP_RdyForGC++ == 0) { + mpExtraRootsPtr = mpExtraRoots; +#ifdef MP_GCPOLL + ASSIGN(PollEvent, ML_true); +#ifdef MP_DEBUG + SayDebug ("%d: set poll event\n", vsp->vp_mpSelf); +#endif +#endif + /* we're the first one in, we'll do the collect */ + MPCollectorProc = vsp->vp_mpSelf; +#ifdef MP_DEBUG + SayDebug ("MPCollectorProc is %d\n",MPCollectorProc); +#endif + } + while ((p = va_arg(ap, ml_val_t *)) != NIL(ml_val_t *)) { + *mpExtraRootsPtr++ = p; + } + *mpExtraRootsPtr = p; /* NIL(ml_val_t *) */ + MP_UnsetLock(MP_GCLock); + + { +#ifdef MP_DEBUG + int n = 0; +#endif + /* nb: some other proc can be concurrently acquiring new processes */ + while (MP_RdyForGC != (nProcs = MP_ActiveProcs())) { + /* spin */ +#ifdef MP_DEBUG + if (n == 10000000) { + n = 0; + SayDebug ("%d spinning %d <> %d \n", + vsp->vp_mpSelf, MP_RdyForGC, nProcs, msp->ml_allocPtr, + msp->ml_limitPtr); + } + else + n++; +#endif + } + } + + /* Here, all of the processors are ready to do GC */ + +#ifdef MP_GCPOLL + ASSIGN(PollEvent, ML_false); +#ifdef MP_DEBUG + SayDebug ("%d: cleared poll event\n", msp->ml_mpSelf); +#endif +#endif +#ifdef MP_DEBUG + SayDebug ("(%d) all %d/%d procs in\n", msp->ml_vproc->vp_mpSelf, MP_RdyForGC, MP_ActiveProcs()); +#endif + if (MPCollectorProc != vsp->vp_mpSelf) { +#ifdef MP_DEBUG + SayDebug ("%d entering barrier %d\n", vsp->vp_mpSelf, nProcs); +#endif + MP_Barrier(MP_GCBarrier, nProcs); + +#ifdef MP_DEBUG + SayDebug ("%d left barrier\n", vsp->vp_mpSelf); +#endif + return 0; + } + + return nProcs; + +} /* end of MP_StartCollectWithRoots */ + + +/* MP_FinishCollect: + */ +void MP_FinishCollect (ml_state_t *msp, int n) +{ + /* this works, but PartitionAllocArena is overkill */ + PartitionAllocArena(VProc); + MP_SetLock(MP_GCLock); +#ifdef MP_DEBUG + SayDebug ("%d entering barrier %d\n", msp->ml_vproc->vp_mpSelf,n); +#endif + MP_Barrier(MP_GCBarrier,n); + MP_RdyForGC = 0; + +#ifdef MP_DEBUG + SayDebug ("%d left barrier\n", msp->ml_vproc->vp_mpSelf); +#endif + MP_UnsetLock(MP_GCLock); + +} /* end of MP_FinishCollect */ + diff --git a/base/runtime/gc/obj-info.c b/base/runtime/gc/obj-info.c new file mode 100644 index 0000000..3c47f42 --- /dev/null +++ b/base/runtime/gc/obj-info.c @@ -0,0 +1,50 @@ +/*! \file obj-info.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-base.h" +#include "ml-values.h" +#include "heap.h" +#include "gc.h" + +/* GetObjGen: + * + * Get the generation of an object (return -1 for external/unboxed objects). + */ +int GetObjGen (ml_val_t obj) +{ + if (isBOXED(obj)) { + aid_t aid = ADDR_TO_PAGEID(BIBOP, obj); + if (IS_BIGOBJ_AID(aid)) { + int i; + bigobj_region_t *region; + bigobj_desc_t *dp; + + /* find the beginning of the region containing the code object */ + i = BIBOP_ADDR_TO_INDEX(obj); + while (! BO_IS_HDR(aid)) { + --i; + aid = INDEX_TO_PAGEID(BIBOP, i); + } + region = (bigobj_region_t *)BIBOP_INDEX_TO_ADDR(i); + dp = ADDR_TO_BODESC(region, obj); + + return dp->gen; + } + else if (aid == AID_NEW) { + return 0; + } + else if (isUNMAPPED(aid)) { + return -1; + } + else { + return EXTRACT_GEN(aid); + } + } + else + return -1; + +} /* end of GetObjGen */ + diff --git a/base/runtime/gc/old-literals.c b/base/runtime/gc/old-literals.c new file mode 100644 index 0000000..69c0b33 --- /dev/null +++ b/base/runtime/gc/old-literals.c @@ -0,0 +1,305 @@ +/*! \file old-literals.c + * + * This is the version 1 literals builder. We keep it around to ease + * the transition to the new scheme, but it can be removed from the runtime + * after the compiler switches over to the new scheme. + * + * \author John Reppy + */ + +/* + * COPYRIGHT (c) 2017 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-base.h" +#include "ml-objects.h" +#include "heap.h" +#include + +/* Codes for literal machine instructions (version 1): + * INT(i) 0x01 + * RAW32[i] 0x02 + * RAW32[i1,..,in] 0x03 ... + * RAW64[r] 0x04 + * RAW64[r1,..,rn] 0x05 ... + * STR[c1,..,cn] 0x06 ... + * LIT(k) 0x07 -- push stk[k] (for sharing) + * VECTOR(n) 0x08 + * RECORD(n) 0x09 + * RETURN 0xff + */ +#define I_INT 0x01 +#define I_RAW32 0x2 +#define I_RAW32L 0x3 +#define I_RAW64 0x4 +#define I_RAW64L 0x5 +#define I_STR 0x6 +#define I_LIT 0x7 +#define I_VECTOR 0x8 +#define I_RECORD 0x9 +#define I_RETURN 0xff + +#define _B0(p) ((p)[pc]) +#define _B1(p) ((p)[pc+1]) +#define _B2(p) ((p)[pc+2]) +#define _B3(p) ((p)[pc+3]) + +#define GET32(p) \ + ((_B0(p) << 24) | (_B1(p) << 16) | (_B2(p) << 8) | _B3(p)) + +/* the size of a list cons cell in bytes */ +#define CONS_SZB (WORD_SZB*3) + +/* the amount of free space that we want in the allocation arena; this value must be + * less than MIN_ALLOC_SZB (defined in include/ml-limits.h) + */ +#define FREE_REQ_SZB 64*ONE_K + +/* GetDouble: + */ +PVT double GetDouble (Byte_t *p) +{ + int i; + union { + double d; + Byte_t b[sizeof(double)]; + } u; + +#ifdef BYTE_ORDER_LITTLE + for (i = sizeof(double)-1; i >= 0; i--) { + u.b[i] = *p++; + } +#else + for (i = 0; i < sizeof(double); i++) { + u.b[i] = p[i]; + } +#endif + + return u.d; + +} /* end of GetDouble */ + +/* OldLiterals: + * + * The Version 1 build literals function. We assume that the header has already + * been consumed in the version check (see build-literals.c), which means that the + * `pc` argument should point to the first command. + */ +ml_val_t BuildLiteralsV1 (ml_state_t *msp, Byte_t *lits, int pc, int len) +{ + ml_val_t stk, res; + Int32_t i, j, n; + Int32_t availSpace, spaceReq; + double d; + +#ifdef DEBUG_LITERALS + SayDebug("BuildLiteralsV1: lits = %p, len = %d\n", (void *)lits, len); +#endif + if (len <= 0) return ML_nil; + +/* A check that the available space is sufficient for the literal object that + * we are about to allocate. Note that the cons cell has already been accounted + * for in availSpace (but not in spaceReq). + */ +#define GC_CHECK \ + do { \ + if (spaceReq > availSpace) { \ + InvokeGCWithRoots (msp, 0, (ml_val_t *)&lits, &stk, NIL(ml_val_t *)); \ + availSpace = ((size_t)msp->ml_limitPtr - (size_t)msp->ml_allocPtr) - CONS_SZB; \ + } \ + } while (0) + + stk = ML_nil; + while (TRUE) { + ASSERT(pc < len); + /* ensure that there is at least 1Kb of available space -- enough for fixed-size + * objects. + */ + availSpace = ((size_t)msp->ml_limitPtr - (size_t)msp->ml_allocPtr); + if (availSpace < ONE_K) { + if (NeedGC(msp, FREE_REQ_SZB)) + InvokeGCWithRoots (msp, 0, (ml_val_t *)&lits, &stk, NIL(ml_val_t *)); + availSpace = ((size_t)msp->ml_limitPtr - (size_t)msp->ml_allocPtr); + } + switch (lits[pc++]) { + case I_INT: + i = GET32(lits); pc += 4; +#ifdef DEBUG_LITERALS +SayDebug("[%2d]: INT(%d)\n", pc-5, i); +#endif + LIST_cons(msp, stk, INT_CtoML(i), stk); + break; + case I_RAW32: + i = GET32(lits); pc += 4; +#ifdef DEBUG_LITERALS +SayDebug("[%2d]: RAW32[%d]\n", pc-5, i); +#endif + res = INT32_CtoML(msp, i); + LIST_cons(msp, stk, res, stk); + break; + case I_RAW32L: + n = GET32(lits); pc += 4; +#ifdef DEBUG_LITERALS +SayDebug("[%2d]: RAW32L(%d) [...]\n", pc-5, n); +#endif + ASSERT(n > 0); + spaceReq = CONS_SZB + WORD_SZB + 4 * n; + ASSERT((spaceReq & (WORD_SZB-1)) == 0); +/* FIXME: for large objects, we should be allocating them in the 1st generation */ + GC_CHECK; + ML_AllocWrite (msp, 0, MAKE_DESC(n, DTAG_raw)); + for (j = WORD_SZB/4; j <= n; j++) { + i = GET32(lits); pc += 4; + ML_AllocWrite32 (msp, j, i); + } + res = ML_Alloc (msp, n); + LIST_cons(msp, stk, res, stk); + break; + case I_RAW64: + d = GetDouble(&(lits[pc])); pc += 8; + REAL64_ALLOC(msp, res, d); +#ifdef DEBUG_LITERALS +SayDebug("[%2d]: RAW64[%f] @ %#x\n", pc-5, d, res); +#endif + LIST_cons(msp, stk, res, stk); + break; + case I_RAW64L: + n = GET32(lits); pc += 4; +#ifdef DEBUG_LITERALS +SayDebug("[%2d]: RAW64L(%d) [...]\n", pc-5, n); +#endif + ASSERT(n > 0); + /* space request includes extra padding word */ + spaceReq = CONS_SZB + 2 * WORD_SZB + 8 * n; +/* FIXME: for large objects, we should be allocating them in the 1st generation */ + GC_CHECK; +#ifdef ALIGN_REALDS + /* Force REALD_SZB alignment (descriptor is off by one word) */ + msp->ml_allocPtr = (ml_val_t *)((Addr_t)(msp->ml_allocPtr) | WORD_SZB); +#endif + j = 2*n; /* number of words */ + ML_AllocWrite (msp, 0, MAKE_DESC(j, DTAG_raw64)); + res = ML_Alloc (msp, j); + for (j = 0; j < n; j++) { + PTR_MLtoC(double, res)[j] = GetDouble(&(lits[pc])); pc += 8; + } + LIST_cons(msp, stk, res, stk); + break; + case I_STR: + n = GET32(lits); pc += 4; +#ifdef DEBUG_LITERALS +SayDebug("[%2d]: STR(%d) [...]", pc-5, n); +#endif + if (n == 0) { +#ifdef DEBUG_LITERALS +SayDebug("\n"); +#endif + LIST_cons(msp, stk, ML_string0, stk); + break; + } + j = BYTES_TO_WORDS(n+1); /* include space for '\0' */ + /* the space request includes space for the data-object header word and + * the sequence header object. + */ + spaceReq = WORD_SZB*(j+1+3); +/* FIXME: for large strings, we should be allocating them in the 1st generation */ + GC_CHECK; + /* allocate the data object */ + ML_AllocWrite(msp, 0, MAKE_DESC(j, DTAG_raw)); + ML_AllocWrite (msp, j, 0); /* so word-by-word string equality works */ + res = ML_Alloc (msp, j); +#ifdef DEBUG_LITERALS +SayDebug(" @ %p (%d words)\n", (void *)res, j); +#endif + memcpy (PTR_MLtoC(void, res), &lits[pc], n); pc += n; + /* allocate the header object */ + SEQHDR_ALLOC(msp, res, DESC_string, res, n); + /* push on stack */ + LIST_cons(msp, stk, res, stk); + break; + case I_LIT: + n = GET32(lits); pc += 4; + for (j = 0, res = stk; j < n; j++) { + res = LIST_tl(res); + } +#ifdef DEBUG_LITERALS +SayDebug("[%2d]: LIT(%d) = %p\n", pc-5, n, (void *)LIST_hd(res)); +#endif + LIST_cons(msp, stk, LIST_hd(res), stk); + break; + case I_VECTOR: + n = GET32(lits); pc += 4; +#ifdef DEBUG_LITERALS +SayDebug("[%2d]: VECTOR(%d) [", pc-5, n); +#endif + if (n == 0) { +#ifdef DEBUG_LITERALS +SayDebug("]\n"); +#endif + LIST_cons(msp, stk, ML_vector0, stk); + break; + } + /* the space request includes space for the data-object header word and + * the sequence header object. + */ + spaceReq = WORD_SZB*(n+1+3); +/* FIXME: for large vectors, we should be allocating them in the 1st generation */ + GC_CHECK; + /* allocate the data object */ + ML_AllocWrite(msp, 0, MAKE_DESC(n, DTAG_vec_data)); + /* top of stack is last element in vector */ + for (j = n; j > 0; j--) { + ML_AllocWrite(msp, j, LIST_hd(stk)); + stk = LIST_tl(stk); + } + res = ML_Alloc(msp, n); + /* allocate the header object */ + SEQHDR_ALLOC(msp, res, DESC_polyvec, res, n); +#ifdef DEBUG_LITERALS +SayDebug("...] @ %p\n", (void *)res); +#endif + LIST_cons(msp, stk, res, stk); + break; + case I_RECORD: + n = GET32(lits); pc += 4; +#ifdef DEBUG_LITERALS +SayDebug("[%2d]: RECORD(%d) [", pc-5, n); +#endif + if (n == 0) { +#ifdef DEBUG_LITERALS +SayDebug("]\n"); +#endif + LIST_cons(msp, stk, ML_unit, stk); + break; + } + else { + spaceReq = WORD_SZB*(n+1); + GC_CHECK; + ML_AllocWrite(msp, 0, MAKE_DESC(n, DTAG_record)); + } + /* top of stack is the last element in the record */ + for (j = n; j > 0; j--) { + ML_AllocWrite(msp, j, LIST_hd(stk)); + stk = LIST_tl(stk); + } + res = ML_Alloc(msp, n); +#ifdef DEBUG_LITERALS +SayDebug("...] @ %p\n", (void *)res); +#endif + LIST_cons(msp, stk, res, stk); + break; + case I_RETURN: + ASSERT(pc == len); +#ifdef DEBUG_LITERALS +SayDebug("[%2d]: RETURN(%p)\n", pc-5, (void *)LIST_hd(stk)); +#endif + return (LIST_hd(stk)); + break; + default: + Die ("bogus literal opcode #%x @ %d", lits[pc-1], pc-1); + } /* switch */ + } /* while */ + +} /* end of BuildLiteralsV1 */ + diff --git a/base/runtime/gc/record-ops.c b/base/runtime/gc/record-ops.c new file mode 100644 index 0000000..5f17239 --- /dev/null +++ b/base/runtime/gc/record-ops.c @@ -0,0 +1,82 @@ +/* record-ops.c + * + * COPYRIGHT (c) 1998 Bell Labs, Lucent Technologies. + * + * Some (type unsafe) operations on records. + */ + + +#include "ml-base.h" +#include "ml-values.h" +#include "ml-state.h" +#include "ml-objects.h" +#include "arena-id.h" +#include "gc.h" + + +/* GetLen: + * + * Check that we really have a record object, and return its length. + */ +PVT int GetLen (ml_val_t r) +{ + ml_val_t d; + int t; + + if (! isBOXED(r)) + return -1; + + switch (EXTRACT_OBJC(ADDR_TO_PAGEID(BIBOP, r))) { + case OBJC_new: + d = OBJ_DESC(r); + t = GET_TAG(d); + if (t == DTAG_record) + return GET_LEN(d); + else + return -1; + case OBJC_pair: return 2; + case OBJC_record: + d = OBJ_DESC(r); + t = GET_TAG(d); + if (t == DTAG_record) + return GET_LEN(d); + else + return -1; + default: + return -1; + } + +} + +/* RecordConcat: + * + * Concatenate two records; returns unit if either argument is not + * a record of length at least one. + */ +ml_val_t RecordConcat (ml_state_t *msp, ml_val_t r1, ml_val_t r2) +{ + int l1 = GetLen(r1); + int l2 = GetLen(r2); + + if ((l1 > 0) && (l2 > 0)) { + int n = l1+l2; + int i, j; + ml_val_t *p, res; + + ML_AllocWrite (msp, 0, MAKE_DESC(n, DTAG_record)); + j = 1; + for (i = 0, p = PTR_MLtoC(ml_val_t, r1); i < l1; i++, j++) { + ML_AllocWrite (msp, j, p[i]); + } + for (i = 0, p = PTR_MLtoC(ml_val_t, r2); i < l2; i++, j++) { + ML_AllocWrite (msp, j, p[i]); + } + res = ML_Alloc(msp, n); + return res; + } + else { + return ML_unit; + } + +} /* end of RecordConcat */ + diff --git a/base/runtime/gc/writer.c b/base/runtime/gc/writer.c new file mode 100644 index 0000000..ab06a55 --- /dev/null +++ b/base/runtime/gc/writer.c @@ -0,0 +1,106 @@ +/* writer.c + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * + * An implementation of the abstract writers on top of ANSI C streams. + */ + +#include +#include "ml-base.h" +#include "writer.h" + +#ifndef SEEK_SET +# define SEEK_SET 0 +#endif + +PVT void Put (writer_t *wr, Word_t w); +PVT void Write (writer_t *wr, const void *data, Addr_t nbytes); +PVT void Flush (writer_t *wr); +PVT off_t Tell (writer_t *wr); +PVT void Seek (writer_t *wr, off_t offset); +PVT void Free (writer_t *wr); + +#define FileOf(wr) ((FILE *)((wr)->data)) + +/* WR_OpenFile: + * + * Open a file for writing, and make a writer for it. + */ +writer_t *WR_OpenFile (FILE *f) +{ + writer_t *wr; + + if (f == NULL) + return NIL(writer_t *); + + wr = NEW_OBJ(writer_t); + wr->errFlg = FALSE; + wr->data = (void *)f; + wr->putWord = Put; + wr->write = Write; + wr->flush = Flush; + wr->tell = Tell; + wr->seek = Seek; + wr->free = Free; + + return wr; + +} /* end of WR_OpenFile */ + +/* Put: + */ +PVT void Put (writer_t *wr, Word_t w) +{ + FILE *f = FileOf(wr); + + if (fwrite((void *)&w, WORD_SZB, 1, f) != 1) { + wr->errFlg = TRUE; + } + +} /* end of Put */ + +/* Write: + */ +PVT void Write (writer_t *wr, const void *data, Addr_t nbytes) +{ + FILE *f = FileOf(wr); + + if (fwrite(data, 1, nbytes, f) != nbytes) { + wr->errFlg = TRUE; + } + +} /* end of Write */ + +/* Flush: + */ +PVT void Flush (writer_t *wr) +{ + fflush (FileOf(wr)); + +} /* end of Flush */ + +/* Tell: + */ +PVT off_t Tell (writer_t *wr) +{ + return ftell(FileOf(wr)); + +} /* end of Tell */ + +/* Seek: + */ +PVT void Seek (writer_t *wr, off_t offset) +{ + if (fseek(FileOf(wr), offset, SEEK_SET) != 0) + wr->errFlg = TRUE; + +} /* end of Seek */ + +/* Free: + */ +PVT void Free (writer_t *wr) +{ + fflush (FileOf(wr)); + FREE(wr); + +} /* end of Free */ diff --git a/base/runtime/gc/writer.h b/base/runtime/gc/writer.h new file mode 100644 index 0000000..0d15108 --- /dev/null +++ b/base/runtime/gc/writer.h @@ -0,0 +1,38 @@ +/* writer.h + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * + * This is an abstraction of a buffered output device for writing + * heap data. + */ + +#ifndef _WRITER_ +#define _WRITER_ + +#include /* for FILE */ + +typedef struct writer { + bool_t errFlg; + void *data; + void (*putWord)(struct writer *, Word_t); + void (*write)(struct writer *, const void *, Addr_t); + void (*flush)(struct writer *); + off_t (*tell)(struct writer *); + void (*seek)(struct writer *, off_t offset); + void (*free)(struct writer *); +} writer_t; + +/* open a file for writing, and make a file for it */ +extern writer_t *WR_OpenFile (FILE *file); +/* make a writer from a region of memory */ +extern writer_t *WR_OpenMem (Byte_t *data, Addr_t len); + +#define WR_Error(wr) ((wr)->errFlg) +#define WR_Put(wr, w) ((wr)->putWord((wr), (w))) +#define WR_Write(wr, data, nbytes) ((wr)->write((wr), (data), (nbytes))) +#define WR_Flush(wr) ((wr)->flush(wr)) +#define WR_Tell(wr) ((wr)->tell(wr)) +#define WR_Seek(wr, offset) ((wr)->seek((wr), (offset))) +#define WR_Free(wr) ((wr)->free(wr)) + +#endif /* !_WRITER_ */ diff --git a/base/runtime/include/asm-base.h b/base/runtime/include/asm-base.h new file mode 100644 index 0000000..79bdc28 --- /dev/null +++ b/base/runtime/include/asm-base.h @@ -0,0 +1,126 @@ +/*! \file asm-base.h + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Common definitions for assembly files in the SML/NJ system. + * Note that we do not include this file in either X86.prim.asm + * or AMD64.prim.asm; instead we use x86-syntax.h for those files. + */ + +#ifndef _ASM_BASE_ +#define _ASM_BASE_ + +#ifndef _ML_BASE_ +#include "ml-base.h" +#endif + +/* bool_t values for assembly code */ +#define FALSE 0 +#define TRUE 1 + +#if (!defined(GLOBALS_HAVE_UNDERSCORE)) && (((defined(OPSYS_FREEBSD) || defined(OPSYS_NETBSD2) || defined(OPSYS_OPENBSD)) && !defined(__ELF__)) || defined(OPSYS_WIN32) || defined(OPSYS_DARWIN) || defined(OPSYS_CYGWIN)) +# define GLOBALS_HAVE_UNDERSCORE +#endif + +/* we should probably consider factoring this out into ml-unixdep.h -- JHR */ +#ifdef GLOBALS_HAVE_UNDERSCORE +# define CSYM(ID) CONCAT(_,ID) +#else +# define CSYM(ID) ID +#endif + +#if defined(ARCH_SPARC) +# if defined(OPSYS_SOLARIS) +# define _ASM +# include +# include +# endif +# define GLOBAL(ID) .global ID +# define LABEL(ID) ID: +# define ALIGN4 .align 4 +# define WORD(W) .word W +# define TEXT .seg "text" +# define DATA .seg "data" +# define BEGIN_PROC(P) +# define END_PROC(P) + +#elif defined(ARCH_PPC) +# if defined(OPSYS_AIX) +# define CFUNSYM(ID) CONCAT(.,ID) +# define USE_TOC +# define GLOBAL(ID) .globl ID +# define TEXT .csect [PR] +# define DATA .csect [RW] +# define RO_DATA .csect [RO] +# define ALIGN4 .align 2 +# define ALIGN8 .align 3 +# define DOUBLE(V) .double V +# define LABEL(ID) ID: + +# elif defined(OPSYS_LINUX) +# define CFUNSYM(ID) ID +# define GLOBAL(ID) .globl ID +# define TEXT .section ".text" +# define DATA .section ".data" +# define RO_DATA .section ".rodata" +# define ALIGN4 .align 2 +# define ALIGN8 .align 3 +# define DOUBLE(V) .double V +# define LABEL(ID) ID: + +# elif defined(OPSYS_DARWIN) +# define CFUNSYM(ID) CSYM(ID) +# define GLOBAL(ID) .globl ID +# define TEXT .text +# define DATA .data +# define RO_DATA .data +# define ALIGN4 .align 2 +# define ALIGN8 .align 3 +# define DOUBLE(V) .double V +# define LABEL(ID) ID: +# define __SC__ @ + +# elif defined(OPSYS_OPENBSD) +# define CFUNSYM(ID) CSYM(ID) +# define GLOBAL(ID) .globl ID +# define TEXT .text +# define DATA .data +# define RO_DATA .data +# define ALIGN4 .align 2 +# define ALIGN8 .align 3 +# define DOUBLE(V) .double V +# define LABEL(ID) ID: +# endif + +# define CENTRY(ID) \ + .globl CFUNSYM(ID) __SC__ \ + LABEL(CFUNSYM(ID)) + +/* FIXME: move these definitions to the x86-prim.h file */ +#elif defined(ARCH_X86) || defined(ARCH_AMD64) +# error use x86-syntax.h instead if ml-base.h + +#else +# error missing asm definitions + +#endif + +#ifndef __SC__ +# define __SC__ ; +#endif + +# define CGLOBAL(ID) GLOBAL(CSYM(ID)) + +#define ENTRY(ID) \ + CGLOBAL(ID) __SC__ \ + LABEL(CSYM(ID)) + +#define ML_CODE_HDR(name) \ + CGLOBAL(name) __SC__ \ + ALIGN4 __SC__ \ + LABEL(CSYM(name)) +#define IMMED(x) CONST(x) + +#endif /* !_ASM_BASE_ */ + diff --git a/base/runtime/include/bibop.h b/base/runtime/include/bibop.h new file mode 100644 index 0000000..49d4331 --- /dev/null +++ b/base/runtime/include/bibop.h @@ -0,0 +1,132 @@ +/*! \file bibop.h + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * The BIBOP maps memory pages to page IDs. The interpretation of most + * of these IDs is defined by the GC (see ../gc/arena-id.h), but the + * IDs for unmapped memory are defined here. + */ + +#ifndef _BIBOP_ +#define _BIBOP_ + +typedef Unsigned16_t page_id_t; + +#define PAGEID_unmapped 0xffff + +#define isUNMAPPED(ID) ((ID) == PAGEID_unmapped) + + +/** The BIBOP **/ + +#ifdef SIZE_64 + +/* for 64-bit ML values, we use a two-level BIBOP to resolve heap pointers to + * BIBOP pages. We assume that virtual addresses are < 2^48 (true for current + * 64-bit hardware). The top-level (L1) table consists of pointers to L2 tables. + * We preallocate a special L2 table for unmapped regions. + * + * A 64-bit address is logically partitioned into four parts: + * + * |00000000|00000000|aaaaaaaa|aaaaaaaa|bbbbbbbb|bbbbbbcc|cccccccc|cccccccc| + * + * [63..48] -- assumed to be zero and ignored + * [47..32] (a) -- L1 index (16 bits) + * [31..18] (b) -- L2 index (14 bits) + * [17..00] (c) -- page bits + * + * The concatenation of the L1 and L2 indices define the flat BIBOP index, + * which is 30 bits. + */ + +/* we assume that the virtual address space is limited to 48 bits */ +#define BIBOP_ADDR_BITS 48 +/* the log2 size of the L1 bibop table */ +#define BIBOP_L1_BITS 16 +/* the log2 size of the L2 bibop table */ +#define BIBOP_L2_BITS 14 +/* the log2 size of a flat BIBOP index */ +#define BIBOP_BITS (BIBOP_L1_BITS + BIBOP_L2_BITS) +/* the log2 size of a BIBOP page in bytes */ +#define BIBOP_PAGE_BITS (BIBOP_ADDR_BITS - BIBOP_BITS) +/* L1 table size */ +#define BIBOP_L1_SZ (1 << BIBOP_L1_BITS) +/* L2 table size */ +#define BIBOP_L2_SZ (1 << BIBOP_L2_BITS) +/* shift amount to convert address to L1 index */ +#define BIBOP_L1_SHIFT (BIBOP_L2_BITS + BIBOP_PAGE_BITS) +/* shift amount to convert address to L2 index */ +#define BIBOP_L2_SHIFT BIBOP_PAGE_BITS +/* mask for L2 index */ +#define BIBOP_L2_MASK (BIBOP_L2_SZ - 1) + +/* convert an address to a flat BIBOP index */ +#define BIBOP_ADDR_TO_INDEX(a) ((Addr_t)(a) >> BIBOP_L2_SHIFT) +/* convert an address to its level-1 table index */ +#define BIBOP_ADDR_TO_L1_INDEX(a) ((Addr_t)(a) >> BIBOP_L1_SHIFT) +/* convert an address to its level-2 table index */ +#define BIBOP_ADDR_TO_L2_INDEX(a) (BIBOP_ADDR_TO_INDEX(a) & BIBOP_L2_MASK) +/* convert a flat BIBOP index to a L1 table index */ +#define BIBOP_INDEX_TO_L1_INDEX(ix) ((ix) >> BIBOP_L2_BITS) +/* convert a flat BIBOP index to a L2 table index */ +#define BIBOP_INDEX_TO_L2_INDEX(ix) ((ix) & BIBOP_L2_MASK) +/* convert a flat BIBOP index to a memory address */ +#define BIBOP_INDEX_TO_ADDR(i) ((Addr_t)(i) << BIBOP_L2_SHIFT) + +typedef struct { + page_id_t tbl[BIBOP_L2_SZ]; + Unsigned32_t numMapped; +} l2_bibop_t; + +/* The BIBOP is a L1 table of pointers to L2 tables */ +typedef l2_bibop_t **bibop_t; + +extern bibop_t BIBOP; +extern l2_bibop_t UnmappedL2; + +#define UNMAPPED_L2_TBL &UnmappedL2 + +#define ADDR_TO_PAGEID(bibop,a) \ + (bibop[BIBOP_ADDR_TO_L1_INDEX(a)]->tbl[BIBOP_ADDR_TO_L2_INDEX(a)]) +#define INDEX_TO_PAGEID(bibop,ix) \ + (bibop[BIBOP_INDEX_TO_L1_INDEX(ix)]->tbl[BIBOP_INDEX_TO_L2_INDEX(ix)]) + +/* update a BIBOP entry at the given index */ +#define BIBOP_UPDATE(bibop, ix, aid) \ + do { bibop[BIBOP_INDEX_TO_L1_INDEX(ix)]->tbl[BIBOP_INDEX_TO_L2_INDEX(ix)] = (aid); } while (0) + +#else /* SIZE_32 */ + +#define BIBOP_PAGE_BITS 16 /* log2(BIBOP_PAGE_SZB) */ +#define BIBOP_BITS (BITS_PER_WORD-BIBOP_PAGE_BITS) +#define BIBOP_SZ (1 << BIBOP_BITS) +#define BIBOP_ADDR_TO_INDEX(a) ((Addr_t)(a) >> BIBOP_PAGE_BITS) + +#define BIBOP_INDEX_TO_ADDR(i) ((Addr_t)(i) << BIBOP_PAGE_BITS) +#define BIBOP_NBLKS_TO_SZB(i) ((Addr_t)(i) << BIBOP_PAGE_BITS) + +typedef page_id_t *bibop_t; + +extern bibop_t BIBOP; + +#define ADDR_TO_PAGEID(bibop,a) ((bibop)[BIBOP_ADDR_TO_INDEX(a)]) +#define INDEX_TO_PAGEID(bibop,a) ((bibop)[a]) + +/* update a BIBOP entry at the given index */ +#define BIBOP_UPDATE(bibop, ix, aid) do { (bibop)[ix] = (aid); } while (0) + +#endif /* !SIZE_64 */ + +/* validate the BIBOP page size */ +#if (BIBOP_PAGE_SZB != (1 << BIBOP_PAGE_BITS)) +# error BIBOP_PAGE_SZB in ml-base.h does not equal (1 << BIBOP_PAGE_BITS) +#endif + +/* allocate and initialize a Bibop */ +extern bibop_t InitBibop (); + +/* free a Bibop */ +extern void FreeBibop (bibop_t bibop); + +#endif /* !_BIBOP_ */ diff --git a/base/runtime/include/bill-os.h b/base/runtime/include/bill-os.h new file mode 100644 index 0000000..6b52c2f --- /dev/null +++ b/base/runtime/include/bill-os.h @@ -0,0 +1,85 @@ +/* + * This file provides all the ugly Windows -> POSIX simulation code. + * For cygwin and mingw only. On all other platforms this file does nothing. + */ + +#ifndef __BILL_OS_H__ +#define __BILL_OS_H__ + + /* C++/C compatibility */ +#if defined(__cplusplus) +#define __BEGIN_CDECLS extern "C" { +#define __END_CDECLS } +#define __NO_THROW throw () +#else +#define __BEGIN_CDECLS +#define __END_CDECLS +#define __NO_THROW +#endif + + /* Beginning of the mess */ +#if defined(__CYGWIN__) || defined(__MINGW32__) + +#if defined(__CYGWIN__) +#include +#endif + +#if defined(__MINGW32__) +#include +#endif + +__BEGIN_CDECLS + +/* + * These are missing on Cygwin/Mingw. The file /etc/networks doesn't + * exists. + */ + +struct netent * getnetbyname(const char * name) __NO_THROW; +struct netent * getnetbyaddr(long net, int type) __NO_THROW; + +/* + * The following functions require simulation on mingw. + * Simulation includes: + * 1. symlinks support + * 2. signals + */ +#if defined(__MINGW32__) + +#include + +/* Symlinks sipport */ +int open(const char * filename, int, ...) __NO_THROW; +int symlink(const char * , const char *) __NO_THROW; +int readlink(const char * , const char *) __NO_THROW; +int stat(const char * filename, struct stat * buf) __NO_THROW; +int lstat(const char * filename, struct stat * buf) __NO_THROW; +FILE * winduh_fopen(const char * filename, const char * mode) __NO_THROW; +FILE * winduh_freopen(const char * filename, const char * mode, FILE *) + __NO_THROW; + +/* Signals */ +#ifndef SIGHUP +#define SIGHUP 1 +#endif +#ifndef SIGQUIT +#define SIGQUIT 3 +#endif +#ifndef SIGALRM +#define SIGALRM 14 +#endif + +/* Dlopen simulation */ +void * dlopen(const char * filename, int) __NO_THROW; +void dlclose(void *) __NO_THROW; +void * dlsym(void *,const char * name) __NO_THROW; +const char * dlerror(void) __NO_THROW; + + +#endif + +__END_CDECLS + +#endif + +#endif // __BILL_OS_H__ diff --git a/base/runtime/include/bin-file.h b/base/runtime/include/bin-file.h new file mode 100644 index 0000000..c1d3e99 --- /dev/null +++ b/base/runtime/include/bin-file.h @@ -0,0 +1,51 @@ +/*! \file bin-file.h + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * The layout is: + * header + * import PerIDs (16 bytes each) + * export PerIDs (16 bytes each) + * CM dependency information + * inlinable lambda expression + * reserved area 1 (typically empty) + * reserved area 2 (typically empty) + * code objects + * This section contains a sequence of code objects, each of + * which is lead by its size. The individual sizes must sum up to + * codeSzB. + * pickled static environment + */ + +#ifndef _BIN_FILE_ +#define _BIN_FILE_ + +#ifndef _ML_BASE_ +#include "ml-base.h" +#endif + + +/** Persistent IDs **/ +#define PERID_LEN 16 + +typedef struct { /* a persistent ID (PerID) */ + Byte_t bytes[PERID_LEN]; +} pers_id_t; + + +typedef struct { /* The header of a .bin file; note that the fields */ + /* are in big-endian representation. */ + Byte_t magic[16]; /* magic number */ + Int32_t importCnt; /* the number of imported PerIDs. */ + Int32_t exportCnt; /* the number of exported PerIDs. */ + Int32_t importSzB; /* size of import tree area */ + Int32_t cmInfoSzB; /* the size of the CM dependency information area */ + Int32_t lambdaSzB; /* the size of inlinable lambda expressions */ + Int32_t reserved; /* reserved for future use */ + Int32_t pad; /* padding for code segment alignment */ + Int32_t codeSzB; /* the number of bytes of code */ + Int32_t envSzB; /* the size of the environment */ +} binfile_hdr_t; + +#endif /* !_BIN_FILE_ */ diff --git a/base/runtime/include/c-globals-tbl.h b/base/runtime/include/c-globals-tbl.h new file mode 100644 index 0000000..3332d6c --- /dev/null +++ b/base/runtime/include/c-globals-tbl.h @@ -0,0 +1,28 @@ +/* c-globals-tbl.h + * + * COPYRIGHT (c) 1992 by AT&T Bell Laboratories. + */ + +#ifndef _C_GLOBALS_TBL_ +#define _C_GLOBALS_TBL_ + +typedef struct export_table export_table_t; + +/* info about an exported external reference */ +typedef const char *export_item_t; + +extern void RecordCSymbol (const char *name, ml_val_t addr); +extern const char *AddrToCSymbol (ml_val_t addr); + +extern export_table_t *NewExportTbl (); +extern void FreeExportTbl (export_table_t *tbl); + +extern ml_val_t ExportCSymbol (export_table_t *tbl, ml_val_t addr); +extern ml_val_t AddrOfCSymbol (export_table_t *tbl, ml_val_t xref); +extern void ExportedSymbols (export_table_t *tbl, int *numSymbs, export_item_t **symbs); + +extern ml_val_t ImportCSymbol (const char *name); + +extern Addr_t ExportTableSz (export_table_t *tbl); + +#endif /* !_C_GLOBALS_TBL_ */ diff --git a/base/runtime/include/c-library.h b/base/runtime/include/c-library.h new file mode 100644 index 0000000..769e7d9 --- /dev/null +++ b/base/runtime/include/c-library.h @@ -0,0 +1,45 @@ +/* c-library.h + * + * COPYRIGHT (c) 1994 AT&T Bell Laboratories. + */ + +#ifndef _C_LIBRARY_ +#define _C_LIBRARY_ + +/* a pointer to a library initialization function; it is passed the + * list of command-line arguments. + */ +typedef void (*clib_init_fn_t) (int, char **); + +/* a pointer to an ML callable C function */ +typedef ml_val_t (*cfunc_t) (ml_state_t *, ml_val_t); + +/* an element in the table of name/function pairs. */ +typedef struct { + const char *name; + cfunc_t cfunc; +} cfunc_binding_t; + +/* The representation of a library of ML callable C functions */ +typedef struct { + const char *libName; /* the library name */ + const char *version; + const char *date; + clib_init_fn_t initFn; /* an optional initialization function */ + cfunc_binding_t *cfuns; /* the list of C function bindings, which is */ + /* terminated by {0, 0}. */ +} c_library_t; + + +/* A C function prototype declaration */ +#define CFUNC_PROTO(NAME, FUNC, MLTYPE) \ + extern ml_val_t FUNC (ml_state_t *msp, ml_val_t arg); + +/* A C function binding */ +#define CFUNC_BIND(NAME, FUNC, MLTYPE) \ + { NAME, FUNC }, + +/* the terminator for a C function list */ +#define CFUNC_NULL_BIND { NIL(const char *), NIL(cfunc_t) } + +#endif /* !_C_LIBRARY_ */ diff --git a/base/runtime/include/cache-flush.h b/base/runtime/include/cache-flush.h new file mode 100644 index 0000000..4974eca --- /dev/null +++ b/base/runtime/include/cache-flush.h @@ -0,0 +1,31 @@ +/* cache-flush.h + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * System dependent includes and macros for flushing the cache. + */ + +#ifndef _CACHE_FLUSH_ +#define _CACHE_FLUSH_ + +#if defined(ARCH_X86) +/* 386 & 486 have unified caches and the pentium has hardware consistency */ +# define FlushICache(addr, size) + +#elif (defined(ARCH_PPC) && defined(OPSYS_AIX)) +# include +# define FlushICache(addr, size) _sync_cache_range((addr), (size)) + +#elif (defined(ARCH_SPARC) || defined(OPSYS_MKLINUX)) +extern FlushICache (void *addr, int nbytes); + +#elif (defined(ARCH_PPC) && (defined(OPSYS_LINUX) || defined(OPSYS_DARWIN) )) +extern FlushICache (void *addr, int nbytes); + +#else +# define FlushICache(addr, size) +#endif + +#endif /* !_CACHE_FLUSH_ */ + diff --git a/base/runtime/include/cntr.h b/base/runtime/include/cntr.h new file mode 100644 index 0000000..fe3b13b --- /dev/null +++ b/base/runtime/include/cntr.h @@ -0,0 +1,53 @@ +/*! \file cntr.h + * + * Large counters for large (> 2^31) values. + */ + +/* + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#ifndef _CNTR_ +#define _CNTR_ + +#define ONE_MILLION 1000000 + +typedef struct { + Unsigned64_t cnt; +} cntr_t; + +#define CNTR_INCR(cp, i) { \ + cntr_t *__cp = (cp); \ + __cp->cnt += (i); \ + } + +#define CNTR_INCR1(cp) { \ + cntr_t *__cp = (cp); \ + __cp->cnt++; \ + } + +#define CNTR_ZERO(cp) { \ + (cp)->cnt = 0; \ + } + +#define CNTR_TO_REAL(cp) \ + ((double)((cp)->cnt)) + +/* Add cp2 to cp1 */ +#define CNTR_ADD(cp1, cp2) { \ + cntr_t *__cp1 = (cp1); \ + cntr_t *__cp2 = (cp2); \ + __cp1->cnt += __cp2->cnt; \ + } + +#define CNTR_PERCENT(cp1, cp2) ((100.0*CNTR_TO_REAL(cp1)) / CNTR_TO_REAL(cp2)) + +#define CNTR_FPRINTF(f,cp,wid) { \ + cntr_t *__cp = (cp); \ + int __w = (wid); \ + fprintf ((f), "%*llu", __w, (long long unsigned)(__cp->cnt)); \ + } + +#endif /* !_CNTR_ */ + diff --git a/base/runtime/include/gc.h b/base/runtime/include/gc.h new file mode 100644 index 0000000..276f3d3 --- /dev/null +++ b/base/runtime/include/gc.h @@ -0,0 +1,38 @@ +/* gc.h + * + * COPYRIGHT (c) 1992 AT&T Bell Laboratories + * + * The external interface to the garbage collector. + * + */ + +#ifndef _GC_ +#define _GC_ + +#ifndef _ML_BASE_ +#include "ml-base.h" +#endif + +/* typedef struct heap heap_t; */ /* from ml-base.h */ + +extern void InitHeap (ml_state_t *msp, bool_t isBoot, heap_params_t *params); +extern void InvokeGC (ml_state_t *msp, int level); +extern void InvokeGCWithRoots (ml_state_t *msp, int level, ...); +extern bool_t NeedGC (ml_state_t *msp, Word_t nbytes); + +extern int GetObjGen (ml_val_t obj); +extern ml_val_t RecordConcat (ml_state_t *msp, ml_val_t r1, ml_val_t r2); + +Byte_t *BO_AddrToCodeObjTag (Word_t pc); + +#ifdef HEAP_MONITOR +extern status_t HeapMon_Init (heap_t *heap); +#else +#define HeapMon_Init(A) +#endif + +#ifdef GC_STATS +extern void ClearGCStats (heap_t *heap); +#endif + +#endif /* !_GC_ */ diff --git a/base/runtime/include/heap-io.h b/base/runtime/include/heap-io.h new file mode 100644 index 0000000..952c115 --- /dev/null +++ b/base/runtime/include/heap-io.h @@ -0,0 +1,20 @@ +/* heap-io.h + * + * COPYRIGHT (c) 1993 by AT&T Bell Laboratories. + * + * Interface to heap-io library. + */ + +#ifndef _HEAP_IO_ +#define _HEAP_IO_ + +#include + +extern status_t ExportHeapImage (ml_state_t *msp, FILE *file); +extern status_t ExportFnImage (ml_state_t *msp, ml_val_t funct, FILE *file); +extern ml_state_t *ImportHeapImage (const char *fname, heap_params_t *heapParams); + +extern ml_val_t BlastOut (ml_state_t *msp, ml_val_t obj); +extern ml_val_t BlastIn (ml_state_t *msp, Byte_t *data, long len, bool_t *errFlg); + +#endif /* _HEAP_IO_ */ diff --git a/base/runtime/include/machine-id.h b/base/runtime/include/machine-id.h new file mode 100644 index 0000000..1f9eaa5 --- /dev/null +++ b/base/runtime/include/machine-id.h @@ -0,0 +1,50 @@ +/*! \file machine-id.h + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#ifndef _MACHINE_ID_ +#define _MACHINE_ID_ + +#ifndef _ML_BASE_ +# include "ml-base.h" +#endif + +#if defined(ARCH_AMD64) +# define MACHINE_ID "amd64" +#elif defined(ARCH_PPC) +# define MACHINE_ID "ppc" +#elif defined(ARCH_SPARC) +# define MACHINE_ID "sparc" +#elif defined(ARCH_X86) +# define MACHINE_ID "x86" +#else +# error unknown architecture type +#endif + +#if defined(OPSYS_UNIX) +# if (defined(OPSYS_AIX)) +# define OPSYS_ID "aix" +# elif (defined(OPSYS_DARWIN)) +# define OPSYS_ID "darwin" +# elif (defined(OPSYS_FREEBSD) || defined(OPSYS_NETBSD) || defined(OPSYS_NETBSD2) || defined(OPSYS_OPENBSD)) +# define OPSYS_ID "bsd" +# elif (defined(OPSYS_LINUX)) +# define OPSYS_ID "linux" +# elif (defined(OPSYS_OSF1)) +# define OPSYS_ID "osf1" +# elif (defined(OPSYS_SOLARIS)) +# define OPSYS_ID "solaris" +# elif (defined(OPSYS_CYGWIN)) +# define OPSYS_ID "cygwin" +# else +# define OPSYS_ID "unix" +# endif +#elif defined(OPSYS_WIN32) +# define OPSYS_ID "win32" +#else +# error unknown operating system +#endif + +#endif /* _MACHINE_ID_ */ diff --git a/base/runtime/include/memory.h b/base/runtime/include/memory.h new file mode 100644 index 0000000..5203ba4 --- /dev/null +++ b/base/runtime/include/memory.h @@ -0,0 +1,35 @@ +/* memory.h + * + * COPYRIGHT (c) 1992 AT&T Bell Laboratories + * + * An OS independent view of memory. This supports allocation of + * memory objects aligned to BIBOP_PAGE_SZB byte boundries (see bibop.h). + */ + +#ifndef _MEMORY_ +#define _MEMORY_ + +/* The header of a mem_obj_t structure. The full representation + * of this depends on the underlying OS memory system, and thus is + * abstract. + */ +struct mem_obj_hdr { + Addr_t base; /* the base address of the object. */ + Addr_t sizeB; /* the object's size (in bytes) */ +}; + +typedef struct mem_obj mem_obj_t; + +extern void MEM_InitMemory (); +extern mem_obj_t *MEM_AllocMemObj (Word_t szb); +extern void MEM_FreeMemObj (mem_obj_t *obj); + +#define MEMOBJ_BASE(objPtr) (((struct mem_obj_hdr *)(objPtr))->base) +#define MEMOBJ_SZB(objPtr) (((struct mem_obj_hdr *)(objPtr))->sizeB) + +#ifdef _VM_STATS_ +extern long MEM_GetVMSize (); +#endif + +#endif /* !_MEMORY_ */ + diff --git a/base/runtime/include/ml-base.h b/base/runtime/include/ml-base.h new file mode 100644 index 0000000..5d4ab08 --- /dev/null +++ b/base/runtime/include/ml-base.h @@ -0,0 +1,244 @@ +/*! \file ml-base.h + * + * \author John Reppy + */ + +/* + * COPYRIGHT (c) 2017 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#ifndef _ML_BASE_ +#define _ML_BASE_ + +/* macro concatenation (ANSI CPP) */ +#ifndef CONCAT /* assyntax.h also defines CONCAT */ +# if defined(__STDC__) || defined(OPSYS_WIN32) +# define CONCAT(x, y) x ## y +# define CONCAT3(a,b,c) a ## b ## c +# else +# define CONCAT(x, y) x/**/y +# define CONCAT3(a,b,c) a/**/b/**/c +# endif +#endif + +#define ONE_K 1024 +#define ONE_MEG (ONE_K*ONE_K) + +/* constants for converting between different amounts of seconds */ +#define MS_PER_SEC 1000 +#define US_PER_SEC 1000000 +#define NS_PER_SEC 1000000000 + +/* The generated file ml-sizes.h defines various size macros, and + * the following types: + * + * Int16_t -- 16-bit signed integer + * Int32_t -- 32-bit signed integer + * Int64_t -- 64-bit signed integer + * Unsigned16_t -- 16-bit unsigned integer + * Unsigned32_t -- 32-bit unsigned integer + * Unsigned64_t -- 64-bit unsigned integer + * Byte_t -- unsigned 8-bit integer + * Word_t -- unsigned integer that is large enough to hold an ML value. + * Int_t -- signed integer that is large enough to hold an ML value. + * Addr_t -- an unsigned integer that is large enough to hold an address. + */ +#ifndef _ML_SIZES_ +#include "ml-sizes.h" +#endif + +/* the size of a pair */ +#define PAIR_SZB (2*WORD_SZB) +/* the number of Word_t's per double */ +#define REALD_SZW (REALD_SZB / WORD_SZB) +/* the number of Word_t's per pair object */ +#define PAIR_SZW 2 +/* the number of Word_t's per special object */ +#define SPECIAL_SZW 2 +/* the number of Word_t's per 64-bit word */ +#define WORD64_SZW (8 / WORD_SZB) + +/* convert a number of bytes to an even number of words */ +#define BYTES_TO_WORDS(N) (((N)+(WORD_SZB-1)) >> LOG_BYTES_PER_WORD) + +/* convert a number of doubles to an even number of words */ +#define DOUBLES_TO_WORDS(N) ((N) * REALD_SZW) + +/* when ML values are 32-bits, it is useful to align doubles on 8-byte boundries */ +#ifndef SIZE_64 +# define ALIGN_REALDS +#endif + + +#ifndef _ASM_ + +#include + +#ifdef OPSYS_WIN32 +typedef long off_t; /* Windows does not define a file offset type */ +#endif + +#define PVT static + +#ifdef NDEBUG +# define STATIC_INLINE static +#else +# define STATIC_INLINE static inline +#endif + +typedef Int32_t bool_t; +#ifndef TRUE /* Some systems already define TRUE and FALSE */ +# define TRUE 1 +# define FALSE 0 +#endif + +typedef Int32_t status_t; +#define SUCCESS 1 +#define FAILURE 0 + +/* nil pointers */ +#define NIL(ty) ((ty)0) + +/* assertions for debugging */ +#ifdef ASSERT_ON +extern void AssertFail (const char *a, const char *file, int line); +/* #define ASSERT(A) ((A) ? ((void)0) : AssertFail(#A, __FILE__, __LINE__)) */ +#define ASSERT(A) do { if (!(A)) AssertFail(#A, __FILE__, __LINE__); } while(0) +#else +#define ASSERT(A) do { } while(0) +#endif + +/* Convert a bigendian 32-bit quantity into the host machine's representation. */ +#if defined(BYTE_ORDER_BIG) +# define BIGENDIAN_TO_HOST32(x) (x) +#elif defined(BYTE_ORDER_LITTLE) + extern Unsigned32_t SwapBytes32 (Unsigned32_t x); +# define BIGENDIAN_TO_HOST32(x) SwapBytes32(x) +#else +# error must define endianess +#endif + +/* round i up to the nearest multiple of n, where n is a power of 2 */ +#define ROUNDUP(i, n) (((i)+((n)-1)) & ~((n)-1)) + +/* extract the bitfield of width WID starting at position POS from I */ +#define XBITFIELD(I,POS,WID) (((I) >> (POS)) & ((1<<(WID))-1)) + +/* aliases for malloc/free, so that we can easily replace them */ +#define MALLOC(sz) malloc(sz) +#define _FREE free +#define FREE(p) _FREE(p) + +/* Allocate a new C object of type t. */ +#define NEW_OBJ(t) ((t *)MALLOC(sizeof(t))) +/* Allocate a new C array of type t objects. */ +#define NEW_VEC(t,n) ((t *)MALLOC((n)*sizeof(t))) + +/* clear memory */ +#define CLEAR_MEM(m, sz) (memset((m), 0, (sz))) + +/* The size of a page in the BIBOP memory map (in bytes). Note that this + * must agree with the size defined in `runtime/gc/bibop.h`. + */ +#ifdef SIZE_64 +#define BIBOP_PAGE_SZB (256*ONE_K) +#else /* SIZE_32 */ +#define BIBOP_PAGE_SZB (64*ONE_K) +#endif +#define RND_MEMOBJ_SZB(SZ) ROUNDUP(SZ,BIBOP_PAGE_SZB) + +/** C types used in the run-time system **/ +#ifdef SIZES_C64_ML32 +typedef Unsigned32_t ml_val_t; +#else /* ML values and pointers have the same size */ +typedef struct { Word_t v[1]; } ml_object_t; /* something for an ml_val_t to point to */ +typedef ml_object_t *ml_val_t; +#endif +typedef struct vproc_state vproc_state_t; +typedef struct ml_state ml_state_t; +typedef struct heap heap_t; + + +/* In C, system constants are usually integers. We represent these in the ML + * system as (int * string) pairs, where the integer is the C constant, and the + * string is a short version of the symbolic name used in C (e.g., the constant + * EINTR might be represented as (4, "INTR")). + */ +typedef struct { /* The representation of system constants */ + int id; + char *name; +} sys_const_t; + +typedef struct { /* a table of system constants. */ + int numConsts; + sys_const_t *consts; +} sysconst_tbl_t; + + +/* run-time system messages */ +extern void Say (const char *fmt, ...); +extern void SayDebug (const char *fmt, ...); +extern void Error (const char *, ...); +extern void Exit (int code); +extern void Die (const char *, ...); + +/* heap_params is an abstract type, whose representation depends on the + * particular GC being used. + */ +typedef struct heap_params heap_params_t; + +extern heap_params_t *ParseHeapParams (char **argv); +extern ml_state_t *AllocMLState (bool_t isBoot, heap_params_t *params); +extern void BootML (const char *bootlist, heap_params_t *params); +extern void LoadML (const char *loadImage, heap_params_t *params); + +extern bool_t QualifyImageName (char *buf); +extern void InitMLState (ml_state_t *msp); +extern void SaveCState (ml_state_t *msp, ...); +extern void RestoreCState (ml_state_t *msp, ...); +extern void InitTimers (); +extern void ResetTimers (vproc_state_t *vsp); +extern ml_val_t ApplyMLFn (ml_state_t *msp, ml_val_t f, ml_val_t arg, bool_t useCont); +extern void RunML (ml_state_t *msp); +extern void RaiseMLExn (ml_state_t *msp, ml_val_t exn); +extern void InitFaultHandlers (); + +#ifdef SOFT_POLL +extern void ResetPollLimit (ml_state_t *msp); +#endif + + +/* These are two views of the command line arguments; RawArgs is essentially + * argv[]. CmdLineArgs is argv[] with runtime system arguments stripped + * out (e.g., those of the form @SMLxxx[=yyy]). + */ +extern char **RawArgs; +extern char **CmdLineArgs; /* does not include the command name (argv[0]) */ +extern char *MLCmdName; /* the command name used to invoke the runtime */ +extern bool_t SilentLoad; +extern bool_t DumpObjectStrings; +extern bool_t GCMessages; +extern bool_t UnlimitedHeap; + +/* The table of virtual processor ML states */ +extern vproc_state_t *VProc[]; +extern int NumVProcs; + +#endif /* !_ASM_ */ + +/* macro to prepend an underscore if necessary and stringify a symbol */ +#if defined(OPSYS_FREEBSD) || defined(OPSYS_LINUX) +#define DLSYM_AUX1(S) #S +#define DLSYM_AUX2(S) DLSYM_AUX1(_ ## S) +#define DLSYM_STRING(SYM) DLSYM_AUX2(SYM) +#else +#define DLSYM_STRING(SYM) #SYM +#endif + +#ifndef HEAP_IMAGE_SYMBOL +#define HEAP_IMAGE_SYMBOL DLSYM_STRING(smlnj_heap_image) +#define HEAP_IMAGE_LEN_SYMBOL DLSYM_STRING(smlnj_heap_image_len) +#endif + +#endif /* !_ML_BASE_ */ diff --git a/base/runtime/include/ml-globals.h b/base/runtime/include/ml-globals.h new file mode 100644 index 0000000..0e49068 --- /dev/null +++ b/base/runtime/include/ml-globals.h @@ -0,0 +1,84 @@ +/* ml-globals.h + * + * COPYRIGHT (c) 1992 AT&T Bell Laboratories + * + * These are global reference variables allocated in the run-time system that + * are visible to the ML tasks. + */ + +#ifndef _ML_GLOBALS_ +#define _ML_GLOBALS_ + +#ifndef _ML_VALUES_ +#include "ml-values.h" +#endif + +extern ml_val_t *CRoots[]; +extern int NumCRoots; + +/* "current function" hook for profiling */ +extern ml_val_t _ProfCurrent[]; +#define ProfCurrent PTR_CtoML(_ProfCurrent+1) + +extern ml_val_t _PervStruct[]; /* Pointer to the pervasive structure */ +#define PervStruct PTR_CtoML(_PervStruct+1) + +extern ml_val_t _MLSignalHandler[]; +#define MLSignalHandler PTR_CtoML(_MLSignalHandler+1) + +extern ml_val_t SysErr_id0[]; +#define SysErrId PTR_CtoML(SysErr_id0+1) + +extern ml_val_t RunTimeCompUnit; +#ifdef ASM_MATH +extern ml_val_t MathVec; +#endif + +/* FIXME: we no longer need this reference! */ +extern ml_val_t _Div_id0[]; +#define DivId PTR_CtoML(_Div_id0+1) + +extern ml_val_t _Overflow_id0[]; +#define OverflowId PTR_CtoML(_Overflow_id0+1) + +#if defined(ASM_MATH) +extern ml_val_t _Ln_id0[]; +#define LnId PTR_CtoML(_Ln_id0+1) +extern ml_val_t _Sqrt_id0[]; +#define SqrtId PTR_CtoML(_Sqrt_id0+1) +#endif + +extern ml_val_t sigh_resume[]; +extern ml_val_t *sigh_return_c; +extern ml_val_t pollh_resume[]; +extern ml_val_t *pollh_return_c; +extern ml_val_t callc_v[]; +extern ml_val_t handle_v[]; +extern ml_val_t *return_c; + +extern ml_val_t _MLPollHandler[]; +#define MLPollHandler PTR_CtoML(_MLPollHandler+1) + +/** polling and MP references **/ +extern ml_val_t _PollFreq0[]; +#define PollFreq PTR_CtoML(_PollFreq0+1) +extern ml_val_t _PollEvent0[]; +#define PollEvent PTR_CtoML(_PollEvent0+1) +extern ml_val_t _ActiveProcs0[]; +#define ActiveProcs PTR_CtoML(_ActiveProcs0+1) + +/* Initialize the C function list */ +extern void InitCFunList (); +/* Record the C symbols that are visible to SML */ +extern void RecordGlobals (); +/* Initialize the ML globals that are supported by the runtime system */ +extern void AllocGlobals (ml_state_t *msp); +/* Bind a C function */ +extern ml_val_t BindCFun (char *moduleName, char *funName); + +#ifdef SIZES_C64_ML32 +/* patch the 32-bit addresses */ +extern void PatchAddrs (); +#endif + +#endif /* !_ML_GLOBALS_ */ diff --git a/base/runtime/include/ml-limits.h b/base/runtime/include/ml-limits.h new file mode 100644 index 0000000..f0ebfe2 --- /dev/null +++ b/base/runtime/include/ml-limits.h @@ -0,0 +1,111 @@ +/*! \file ml-limits.h + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Various limits and default settings for the SML/NJ run-time system. + */ + +#ifndef _ML_DEFAULTS_ +#define _ML_DEFAULTS_ + +#include "ml-base.h" + +/* default image: NULL (means: try to find in-core image using dlopen/dlsym) */ +#ifndef DFLT_IMAGE +#define DFLT_IMAGE NULL +#endif + +/* the maximum length of a boot-file pathname */ +#ifndef MAX_BOOT_PATH_LEN +# define MAX_BOOT_PATH_LEN 512 +#endif + +/* the maximum number of boot files */ +#ifndef MAX_NUM_BOOT_FILES +# define MAX_NUM_BOOT_FILES 1024 +#endif + +/** Multiprocessor limits **/ +#ifdef MP_SUPPORT +# ifndef MAX_NUM_PROCS +# define MAX_NUM_PROCS 8 +# endif +#else +# define MAX_NUM_PROCS 1 +#endif + + +/** Default heap sizes **/ +#ifndef DFLT_NGENS +# define DFLT_NGENS 5 +#endif +#define MAX_NGENS 14 /* should agree with MAX_NUM_GENS in */ + /* arena-id.h. */ +#define DFLT_CACHE_GEN 2 /* Cache from-space for gens 1 & 2 */ +#ifndef DFLT_ALLOC +# define DFLT_ALLOC (512*ONE_K) +#endif +#define MIN_ALLOC_SZB (128*ONE_K) +#ifdef OLD_POLICY +#define RATIO_UNIT 16 /* ratios are measured in 1/16ths */ +#define DFLT_RATIO1 (7*(RATIO_UNIT/2)) /* gen-1 arenas are small */ +#define DFLT_RATIO (3*RATIO_UNIT) +#define MAX_SZ1(NSZ) (5*(NSZ)) +#endif +#define DFLT_RATIO1 20 +#define DFLT_RATIO2 10 +#define DFLT_RATIO 5 +#define MAX_SZ1(NSZ) (6*(NSZ)) + +/* the generation to allocate code objects in */ +#define CODE_ALLOC_GEN 2 + +/* the size (in words) of a "small object." The C allocation routines allocate + * small objects in the allocation space, while large objects are allocated + * in the first generation. + */ +#define SMALL_OBJ_SZW 512 + +/* This is the size (in bytes) of the allocation buffer. If A is the value + * of the limit pointer, then A[HEAP_BUF_SZ-1] is the address of the next + * store-vector location. + */ +#define HEAP_BUF_SZ (1024 + 128) +#define HEAP_BUF_SZB (HEAP_BUF_SZ*WORD_SZB) + +/* The maximum number of global C variables that can be roots. */ +#define MAX_C_ROOTS 8 + +/* maximum number of additional roots that can be passed to GC */ +#define NUM_EXTRA_ROOTS 16 + +/* The number of potential GC roots. This includes space for C global roots, + * ML roots, and the terminating null pointer. + */ +#ifdef N_PSEUDO_REGS +#define N_PSEUDO_ROOTS N_PSEUDO_REGS +#else +#define N_PSEUDO_ROOTS 0 +#endif +#ifdef MP_SUPPORT +/* + * must assume that all other procs are supplying NUM_EXTRA_ROOTS + * in addition to the standard roots + */ +# define NUM_GC_ROOTS \ + ROUNDUP(MAX_NUM_PROCS*(MAX_C_ROOTS+NROOTS+N_PSEUDO_ROOTS)+ \ + (MAX_NUM_PROCS-1)*NUM_EXTRA_ROOTS+1, 8) +#else +# define NUM_GC_ROOTS \ + ROUNDUP(MAX_NUM_PROCS*(MAX_C_ROOTS+NROOTS+N_PSEUDO_ROOTS)+1, 8) +#endif + +#ifdef SOFT_POLL +/* limits for polling */ +#define POLL_GRAIN_CPSI 1024 /* power of 2, in cps instructions */ +#define POLL_GRAIN_BITS 10 /* log_2 POLL_GRAIN_CPSI */ +#endif + +#endif /* !_ML_DEFAULTS_ */ + diff --git a/base/runtime/include/ml-mp.h b/base/runtime/include/ml-mp.h new file mode 100644 index 0000000..a17de84 --- /dev/null +++ b/base/runtime/include/ml-mp.h @@ -0,0 +1,84 @@ +/* ml-mp.h + * + * COPYRIGHT (c) 1994 by AT&T Bell Laboratories. + */ + +#ifndef _ML_MP_ +#define _ML_MP_ + +/* The status of a virtual processor */ +typedef enum { + MP_PROC_RUNNING, /* processor is running */ + MP_PROC_SUSPENDED, /* processor is suspended */ + MP_PROC_NO_PROC /* no processor allocated */ +} vproc_status_t; + +#ifdef MP_SUPPORT + +#if !defined(SOFT_POLL) || !defined(MP_GCPOLL) +# error MP runtime currently requires polling support +#endif + +/*** OS dependent stuff ***/ + +#if defined(OPSYS_IRIX5) +#include +#include +#include +#include + +typedef ulock_t mp_lock_t; /* A lock */ +typedef barrier_t mp_barrier_t; /* A barrier */ +typedef int mp_pid_t; /* A process id */ + +#else +# error MP not supported for this system +#endif + + +/*** Generic MP interface ***/ + +extern int MP_StartCollect(ml_state_t *); +extern void MP_FinishCollect(ml_state_t *,int); +extern ml_val_t *mpExtraRoots[]; + +extern void MP_SetLock (mp_lock_t lock); +extern void MP_UnsetLock (mp_lock_t lock); +extern bool_t MP_TryLock (mp_lock_t lock); +extern mp_lock_t MP_AllocLock (); +extern void MP_FreeLock (mp_lock_t lock); + +extern mp_barrier_t *MP_AllocBarrier (); +extern void MP_FreeBarrier (mp_barrier_t *barrierp); +extern void MP_Barrier (mp_barrier_t *barrierp, unsigned n); +extern void MP_ResetBarrier (mp_barrier_t *barrierp); + +extern mp_pid_t MP_ProcId (void); +extern int MP_MaxProcs (); +extern ml_val_t MP_AcquireProc (ml_state_t *msp, ml_val_t arg); +extern void MP_ReleaseProc (ml_state_t *msp); +extern int MP_ActiveProcs (); +extern void MP_Init (void); +extern void MP_Shutdown (void); + +extern mp_lock_t MP_GCLock; +extern mp_lock_t MP_GCGenLock; +extern mp_lock_t MP_TimerLock; +extern mp_barrier_t *MP_GCBarrier; + +#define BEGIN_CRITICAL_SECT(LOCK) { MP_SetLock(LOCK); { +#define END_CRITICAL_SECT(LOCK) } MP_UnsetLock(LOCK); } +#define ACQUIRE_LOCK(LOCK) MP_SetLock(LOCK); +#define RELEASE_LOCK(LOCK) MP_UnsetLock(LOCK); + +#else /* !MP_SUPPORT */ + +#define BEGIN_CRITICAL_SECT(LOCK) { +#define END_CRITICAL_SECT(LOCK) } +#define ACQUIRE_LOCK(LOCK) /* no operation */ +#define RELEASE_LOCK(LOCK) /* no operation */ + +#endif /* MP_SUPPORT */ + +#endif /* !_ML_MP_ */ + diff --git a/base/runtime/include/ml-objects.h b/base/runtime/include/ml-objects.h new file mode 100644 index 0000000..5683f19 --- /dev/null +++ b/base/runtime/include/ml-objects.h @@ -0,0 +1,368 @@ +/*! \file ml-objects.h + * + * Macros and routines for allocating heap objects. + * + * \author John Reppy + */ + +/* + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#ifndef _ML_OBJECTS_ +#define _ML_OBJECTS_ + +#ifndef _ML_BASE_ +#include "ml-base.h" +#endif + +#ifndef _ML_VALUES_ +#include "ml-values.h" +#endif + +#ifndef _ML_STATE_ +#include "ml-state.h" +#endif + +#ifndef _TAGS_ +#include "tags.h" +#endif + +/* extract info from objects */ +#define OBJ_DESC(OBJ) REC_SEL((OBJ), -1) +#define OBJ_LEN(OBJ) GET_LEN(OBJ_DESC(OBJ)) +#define OBJ_TAG(OBJ) GET_TAG(OBJ_DESC(OBJ)) + + +/** The size of an ML record in bytes (including descriptor) **/ +#define REC_SZB(n) (((n)+1)*sizeof(ml_val_t)) + + +/** heap allocation macros **/ + +/* write an ml_val_t value `x` into the allocation space at offset `i`. */ +#define ML_AllocWrite(msp, i, x) ((((msp)->ml_allocPtr))[(i)] = (x)) + +/* write a 32-bit value `x` into the allocation space at offset `i`. Note that + * on 64-bit targets, the index is being scaled by 4 bytes (not 8)! + */ +#define ML_AllocWrite32(msp, i, x) \ + (((Unsigned32_t *)((msp)->ml_allocPtr))[(i)] = (Unsigned32_t)(x)) + +STATIC_INLINE ml_val_t ML_Alloc (ml_state_t *msp, int n) +{ + ml_val_t obj = PTR_CtoML(msp->ml_allocPtr + 1); + msp->ml_allocPtr += (n + 1); + return obj; +} + +/* inline allocation functions */ +STATIC_INLINE ml_val_t ML_RefAlloc (ml_state_t *msp, ml_val_t a) +{ + ml_val_t *p = msp->ml_allocPtr; + p[0] = DESC_ref; + p[1] = a; + return ML_Alloc(msp, 1); +} +STATIC_INLINE ml_val_t ML_Alloc1 (ml_state_t *msp, ml_val_t a) +{ + ml_val_t *p = msp->ml_allocPtr; + p[0] = MAKE_DESC(1, DTAG_record); + p[1] = a; + return ML_Alloc(msp, 1); +} +STATIC_INLINE ml_val_t ML_Alloc2 (ml_state_t *msp, ml_val_t a, ml_val_t b) +{ + ml_val_t *p = msp->ml_allocPtr; + p[0] = MAKE_DESC(2, DTAG_record); + p[1] = a; + p[2] = b; + return ML_Alloc(msp, 2); +} +STATIC_INLINE ml_val_t ML_Alloc3 (ml_state_t *msp, ml_val_t a, ml_val_t b, ml_val_t c) +{ + ml_val_t *p = msp->ml_allocPtr; + p[0] = MAKE_DESC(3, DTAG_record); + p[1] = a; + p[2] = b; + p[3] = c; + return ML_Alloc(msp, 3); +} +STATIC_INLINE ml_val_t ML_Alloc4 (ml_state_t *msp, ml_val_t a, ml_val_t b, ml_val_t c, ml_val_t d) +{ + ml_val_t *p = msp->ml_allocPtr; + p[0] = MAKE_DESC(4, DTAG_record); + p[1] = a; + p[2] = b; + p[3] = c; + p[4] = d; + return ML_Alloc(msp, 4); +} +STATIC_INLINE ml_val_t ML_Alloc5 (ml_state_t *msp, ml_val_t a, ml_val_t b, ml_val_t c, ml_val_t d, ml_val_t e) +{ + ml_val_t *p = msp->ml_allocPtr; + p[0] = MAKE_DESC(5, DTAG_record); + p[1] = a; + p[2] = b; + p[3] = c; + p[4] = d; + p[5] = e; + return ML_Alloc(msp, 5); +} +STATIC_INLINE ml_val_t ML_Alloc6 (ml_state_t *msp, ml_val_t a, ml_val_t b, ml_val_t c, ml_val_t d, ml_val_t e, ml_val_t f) +{ + ml_val_t *p = msp->ml_allocPtr; + p[0] = MAKE_DESC(6, DTAG_record); + p[1] = a; + p[2] = b; + p[3] = c; + p[4] = d; + p[5] = e; + p[6] = f; + return ML_Alloc(msp, 6); +} +STATIC_INLINE ml_val_t ML_AllocSeqHdr (ml_state_t *msp, ml_val_t desc, ml_val_t data, Int_t len) +{ + ml_val_t *p = msp->ml_allocPtr; + p[0] = desc; + p[1] = data; + p[2] = INT_CtoML(len); + return ML_Alloc(msp, 2); +} +STATIC_INLINE ml_val_t ML_AllocReal64 (ml_state_t *msp, double d) +{ + ml_val_t *p = msp->ml_allocPtr; +#ifdef ALIGN_REALDS + p = (ml_val_t *)((Addr_t)p | WORD_SZB); +#endif + *p++ = DESC_reald; + *(double *)p = d; + msp->ml_allocPtr = p + REALD_SZW; + return PTR_CtoML(p); +} +STATIC_INLINE ml_val_t ML_AllocWord (ml_state_t *msp, Word_t w) +{ + ml_val_t *p = msp->ml_allocPtr; + p[0] = MAKE_DESC(1, DTAG_raw); + p[1] = (ml_val_t)w; + return ML_Alloc(msp, 1); +} + +/* support for 32-bit integers and words, which are boxed on 32-bit systems + * and tagged on 64-bit systems. + */ +STATIC_INLINE ml_val_t INT32_CtoML (ml_state_t *msp, Int32_t n) +{ +#ifdef SIZE_64 + return INT_CtoML(n); /* tagged representation on 64-bit systems */ +#else /* 32-bit ML values */ + ml_val_t *p = msp->ml_allocPtr; + p[0] = MAKE_DESC(1, DTAG_raw); + p[1] = (ml_val_t)n; + return ML_Alloc(msp, 1); +#endif +} +STATIC_INLINE Int32_t INT32_MLtoC (ml_val_t n) +{ +#ifdef SIZE_64 + return INT_MLtoC(n); /* tagged representation on 64-bit systems */ +#else /* 32-bit ML values */ + return *PTR_MLtoC(Int32_t, n); +#endif +} +STATIC_INLINE Int32_t REC_SELINT32 (ml_val_t p, int i) +{ +#ifdef SIZE_64 + return REC_SELINT(p, i); /* tagged representation on 64-bit systems */ +#else /* 32-bit ML values */ + return *REC_SELPTR(Int32_t, p, i); +#endif +} +STATIC_INLINE ml_val_t WORD32_CtoML (ml_state_t *msp, Unsigned32_t n) +{ +#ifdef SIZE_64 + return INT_CtoML(n); /* tagged representation on 64-bit systems */ +#else /* 32-bit ML values */ + ml_val_t *p = msp->ml_allocPtr; + p[0] = MAKE_DESC(1, DTAG_raw); + p[1] = (ml_val_t)n; + return ML_Alloc(msp, 1); +#endif +} +STATIC_INLINE Unsigned32_t WORD32_MLtoC (ml_val_t n) +{ +#ifdef SIZE_64 + return (Unsigned32_t)INT_MLtoC(n); /* tagged representation on 64-bit systems */ +#else /* 32-bit ML values */ + return *PTR_MLtoC(Unsigned32_t, n); +#endif +} +STATIC_INLINE Unsigned32_t REC_SELWORD32 (ml_val_t p, int i) +{ +#ifdef SIZE_64 + return (Unsigned32_t)REC_SELINT(p, i); /* tagged representation on 64-bit systems */ +#else /* 32-bit ML values */ + return *REC_SELPTR(Unsigned32_t, p, i); +#endif +} + +/* support for 64-bit integers and words */ +STATIC_INLINE ml_val_t ML_AllocInt64 (ml_state_t *msp, Int64_t n) +{ + ml_val_t *p = msp->ml_allocPtr; + p[0] = DESC_word64; +#ifdef SIZE_64 + p[1] = (ml_val_t)n; +#else /* 32-bit ML values */ + p[1] = (ml_val_t)(Unsigned32_t)((Unsigned64_t)n >> 32); + p[2] = (ml_val_t)(Unsigned32_t)n; +#endif + return ML_Alloc(msp, WORD64_SZW); +} +STATIC_INLINE ml_val_t ML_AllocWord64 (ml_state_t *msp, Unsigned64_t w) +{ + ml_val_t *p = msp->ml_allocPtr; + p[0] = DESC_word64; +#ifdef SIZE_64 + p[1] = (ml_val_t)w; +#else /* 32-bit ML values */ + p[1] = (ml_val_t)(Unsigned32_t)(w >> 32); + p[2] = (ml_val_t)(Unsigned32_t)w; +#endif + return ML_Alloc(msp, WORD64_SZW); +} +STATIC_INLINE Int64_t INT64_MLtoC (ml_val_t n) +{ +#ifdef SIZE_64 + return *PTR_MLtoC(Unsigned64_t, n); +#else /* 32-bit ML values */ + Unsigned64_t hi = PTR_MLtoC(Unsigned32_t, n)[0]; + Unsigned64_t lo = PTR_MLtoC(Unsigned32_t, n)[1]; + return ((hi << 32) | lo); +#endif +} +STATIC_INLINE Unsigned64_t WORD64_MLtoC (ml_val_t n) +{ +#ifdef SIZE_64 + return *PTR_MLtoC(Unsigned64_t, n); +#else /* 32-bit ML values */ + Unsigned64_t hi = PTR_MLtoC(Unsigned32_t, n)[0]; + Unsigned64_t lo = PTR_MLtoC(Unsigned32_t, n)[1]; + return ((hi << 32) | lo); +#endif +} + +/* add a store-list entry */ +STATIC_INLINE void ML_RecordUpdate (ml_state_t *msp, ml_val_t *addr) +{ + ml_val_t *p = msp->ml_allocPtr; + p[0] = PTR_CtoML(addr); + p[1] = msp->ml_storePtr; + msp->ml_storePtr = PTR_CtoML(p); + msp->ml_allocPtr += 2; +} + +/* allocate a 64-bit integer number of nanoseconds, given seconds and + * microseconds. + */ +STATIC_INLINE ml_val_t ML_AllocNanoseconds (ml_state_t *msp, int sec, int usec) +{ + Unsigned64_t t = (NS_PER_SEC * (Unsigned64_t)sec) + (1000 * (Unsigned64_t)usec); + return ML_AllocWord64(msp, t); +} + +/* macros that wrap the inline allocation functions; these are for backward + * compatibility to the old macro-based allocation code. + */ +#define REF_ALLOC(msp, r, a) { (r) = ML_RefAlloc((msp), (a)); } +#define REC_ALLOC1(msp, r, a) { (r) = ML_Alloc1((msp), (a)); } +#define REC_ALLOC2(msp, r, a, b) { (r) = ML_Alloc2((msp), (a), (b)); } +#define REC_ALLOC3(msp, r, a, b, c) { (r) = ML_Alloc3((msp), (a), (b), (c)); } +#define REC_ALLOC4(msp, r, a, b, c, d) { (r) = ML_Alloc4((msp), (a), (b), (c), (d)); } +#define REC_ALLOC5(msp, r, a, b, c, d, e) { (r) = ML_Alloc5((msp), (a), (b), (c), (d), (e)); } +#define REC_ALLOC6(msp, r, a, b, c, d, e, f) { (r) = ML_Alloc6((msp), (a), (b), (c), (d), (e), (f)); } +#define SEQHDR_ALLOC(msp, r, desc, data, len) { (r) = ML_AllocSeqHdr((msp), (desc), (data), (len)); } +#define REAL64_ALLOC(msp, r, d) { (r) = ML_AllocReal64((msp), (d)); } +#define EXN_ALLOC(msp, ex, id, val, where) REC_ALLOC3(msp, ex, id, val, where) + +/** Boxed word values **/ +#define WORD_ALLOC(msp, r, w) { (r) = ML_AllocWord((msp), (w)); } +#define WORD_MLtoC(w) (*PTR_MLtoC(Word_t, w)) +#define REC_SELWORD(p, i) (*REC_SELPTR(Word_t, p, i)) + +/* temporary */ +#ifdef SIZE_32 +#define INT32_ALLOC(msp, p, i) WORD_ALLOC(msp, p, i) +#endif + +#define INT64_ALLOC(msp, r, i) { (r) = ML_AllocInt64((msp), (i)); } +#define WORD64_ALLOC(msp, r, w) { (r) = ML_AllocWord64((msp), (w)); } + +/** SysWord.word conversions */ +#ifdef SIZE_64 +typedef Unsigned64_t SysWord_t; +#define SYSWORD_ALLOC(msp, r, w) WORD64_ALLOC(msp, r, w) +#define SYSWORD_MLtoC(w) WORD64_MLtoC(w) +#else /* SIZE_32 */ +typedef Unsigned64_t SysWord_t; +#define SYSWORD_ALLOC(msp, r, w) WORD_ALLOC(msp, r, w) +#define SYSWORD_MLtoC(w) WORD32_MLtoC(w) +#endif + +/** ML lists **/ +#define LIST_hd(p) REC_SEL(p, 0) +#define LIST_tl(p) REC_SEL(p, 1) +#define LIST_nil INT_CtoML(0) +#define LIST_isNull(p) ((p) == LIST_nil) +#define LIST_cons(msp, r, a, b) REC_ALLOC2(msp, r, a, b) + +/** ML references **/ +#define DEREF(r) REC_SEL(r, 0) +#define ASSIGN(r, x) (PTR_MLtoC(ml_val_t, r)[0] = (x)) + +/** ML options **/ +#define OPTION_NONE INT_CtoML(0) +#define OPTION_SOME(msp, r, a) REC_ALLOC1(msp, r, a) +#define OPTION_get(r) REC_SEL(r, 0) + +/* the HANDLE type is an alias for `void *`, but HANDLE values are + * actually indices into internal tables in the OS. We could probably + * get away with representing them as tagged integers or words, but + * for now we use a pointer-sized boxed word. + */ +#if defined(_WIN32) +#define HANDLE_MLtoC(h) ((HANDLE)WORD32_MLtoC(h)) +#define HANDLE_CtoML(msp, h) WORD32_CtoML(msp, (Addr_t)h) +#elif defined(_WIN64) +#define HANDLE_MLtoC(h) ((HANDLE)WORD64_MLtoC(h)) +#define HANDLE_CtoML(msp, h) ML_AllocWord64(msp, (Addr_t)h) +#endif + +/** external routines **/ +extern ml_val_t ML_CString (ml_state_t *msp, const char *v); +extern ml_val_t ML_CStringList (ml_state_t *msp, char **strs); +extern ml_val_t ML_AllocString (ml_state_t *msp, Word_t len); +extern ml_val_t ML_AllocCode (ml_state_t *msp, Word_t len); +extern ml_val_t ML_AllocBytearray (ml_state_t *msp, Word_t len); +extern ml_val_t ML_AllocRealdarray (ml_state_t *msp, Word_t len); +extern ml_val_t ML_AllocArrayData (ml_state_t *msp, Word_t len, ml_val_t initVal); +extern ml_val_t ML_AllocArray (ml_state_t *msp, Word_t len, ml_val_t initVal); +extern ml_val_t ML_AllocVector (ml_state_t *msp, Word_t len, ml_val_t initVal); +extern ml_val_t ML_AllocRaw (ml_state_t *msp, Word_t len); +extern void ML_ShrinkRaw (ml_state_t *msp, ml_val_t v, Word_t nWords); +extern ml_val_t ML_AllocRaw64 (ml_state_t *msp, Word_t len); + +extern ml_val_t ML_SysConst (ml_state_t *msp, sysconst_tbl_t *tbl, int id); +extern ml_val_t ML_SysConstList (ml_state_t *msp, sysconst_tbl_t *tbl); +extern ml_val_t ML_AllocCData (ml_state_t *msp, Word_t nbytes); +extern ml_val_t ML_CData (ml_state_t *msp, void *data, Word_t nbytes); + +extern ml_val_t BuildLiterals (ml_state_t *msp, Byte_t *lits, int len); + +extern ml_val_t _ML_string0[]; +extern ml_val_t _ML_vector0[]; +#define ML_string0 PTR_CtoML(_ML_string0+1) +#define ML_vector0 PTR_CtoML(_ML_vector0+1) + +#endif /* !_ML_OBJECTS_ */ diff --git a/base/runtime/include/ml-options.h b/base/runtime/include/ml-options.h new file mode 100644 index 0000000..f685108 --- /dev/null +++ b/base/runtime/include/ml-options.h @@ -0,0 +1,18 @@ +/* ml-options.h + * + * COPYRIGHT (c) 1996 AT&T Research. + * + * Command-line argument processing. + */ + +#ifndef _ML_OPTIONS_ +#define _ML_OPTIONS_ + +/* maximum length of option and argument parts of command-line options */ +#define MAX_OPT_LEN 64 + +extern bool_t isRuntimeOption (char *cmdLineArg, char *option, char **arg); +extern int GetSzOption (int scale, char *sz); + +#endif /* !_ML_OPTIONS_ */ + diff --git a/base/runtime/include/ml-osdep.h b/base/runtime/include/ml-osdep.h new file mode 100644 index 0000000..5029523 --- /dev/null +++ b/base/runtime/include/ml-osdep.h @@ -0,0 +1,60 @@ +/*! \file ml-osdep.h + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This file contains definitions to hide a few OS dependencies. It + * should be portable across both UNIX and non-UNIX systems (unlike the + * UNIX specific "ml-unixdep.h" header file). + * + * GETPAGESIZE() return the machine's pagesize in bytes + * PATH_ARC_SEP the pathname arc separator character + * SYSCALL_RET_ERR this is set, if system calls typically + * return an error code status (unlike UNIX, + * where the global errno is used). + */ + +#ifndef _ML_OSDEP_ +#define _ML_OSDEP_ + +#if defined(OPSYS_UNIX) +# include "ml-unixdep.h" +# if (defined(OPSYS_LINUX) || defined(OPSYS_AIX) || defined(OPSYS_FREEBSD) || defined(OPSYS_NETBSD) || defined(OPSYS_NETBSD2) || defined(OPSYS_OPENBSD) || defined(OPSYS_DARWIN) || defined(OPSYS_CYGWIN)) +# define GETPAGESIZE() (getpagesize()) +# else + /* POSIX 1003.1b interface */ +# ifdef _SC_PAGESIZE +# define GETPAGESIZE() (sysconf(_SC_PAGESIZE)) +# else + /* HPUX engineers can't read specs */ +# define GETPAGESIZE() (sysconf(_SC_PAGE_SIZE)) +# endif +# endif + +# define PATH_ARC_SEP '/' +# define HAS_GETTIMEOFDAY + +#elif defined(OPSYS_MACOS) +# define PATH_ARC_SEP ':' +# define SYSCALL_RET_ERR + +#elif defined(OPSYS_OS2) +# define PATH_ARC_SEP '\\' + +#elif defined(OPSYS_WIN32) +# define PATH_ARC_SEP '\\' + +extern int GetPageSize (void); + +# define GETPAGESIZE() GetPageSize() +# define HAS_GETTIMEOFDAY + +#endif + +/* support for ANSI C Floating-point extensions */ +#if defined(OPSYS_DARWIN) || defined(OPSYS_LINUX) +# define HAS_ANSI_C_FP_EXT +#endif + +#endif /* !_ML_OSDEP_ */ + diff --git a/base/runtime/include/ml-request.h b/base/runtime/include/ml-request.h new file mode 100644 index 0000000..b1a951f --- /dev/null +++ b/base/runtime/include/ml-request.h @@ -0,0 +1,29 @@ +/* ml-request.h + * + * COPYRIGHT (c) 1994 AT&T Bell Laboratories + * + * These are the service request codes used C/ML interface. + */ + +#ifndef _ML_REQUEST_ +#define _ML_REQUEST_ + +#define REQ_GC 0 +#define REQ_RETURN 1 +#define REQ_EXN 2 +#define REQ_FAULT 3 +#define REQ_BIND_CFUN 4 +#define REQ_CALLC 5 +#define REQ_ALLOC_STRING 6 +#define REQ_ALLOC_BYTEARRAY 7 +#define REQ_ALLOC_REALDARRAY 8 +#define REQ_ALLOC_ARRAY 9 +#define REQ_ALLOC_VECTOR 10 +#define REQ_SIG_RETURN 11 +#define REQ_SIG_RESUME 12 +#define REQ_POLL_RETURN 13 +#define REQ_POLL_RESUME 14 +#define REQ_BUILD_LITERALS 15 + +#endif /* !_ML_REQUEST_ */ + diff --git a/base/runtime/include/ml-roots.h b/base/runtime/include/ml-roots.h new file mode 100644 index 0000000..37af981 --- /dev/null +++ b/base/runtime/include/ml-roots.h @@ -0,0 +1,148 @@ +/* ml-roots.h + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * The root register indices for various machines. + * + * NROOTS gives the size of the variable-size portion (roots[]) of the + * ML state vector. Note that the name "roots" is slightly misleading; + * while every entry in the vector must be saved over calls to C, not + * every entry is a valid root on every entry to C. The valididity of + * most entries is indicated using the register map convention (via + * ArgRegMap); these entries are valid (and live) iff the corresponding + * bit in the register mask is set (see cps/generic.sml). N_ARG_REGS + * gives the number of such entries. The pc, exncont, varptr, and baseptr + * (if defined) are always valid roots, and the icounter (if defined) never is. + * + * NOTE: N_ARG_REGS is use to implement the old FFI mechanism that Lorenz + * Huelsbergen implemented (see c-libs/smlnj-ccalls). I suspect that this + * mechanism no longer works and it has been superseded by NLFFI. We should + * probably remove it at some time (JHR; 2019-11-10). + */ + +#ifndef _ML_ROOTS_ +#define _ML_ROOTS_ + +#if defined (ARCH_AMD64) + +# define NROOTS 15 +# define N_ARG_REGS 19 +# define N_PSEUDO_REGS 2 +# define EXN_INDX 0 /* 40(rsp) */ +# define ARG_INDX 1 /* rbp */ +# define CONT_INDX 2 /* rsi */ +# define CLOSURE_INDX 3 /* rd */ +# define VAR_INDX 4 /* 56(rsp) */ +# define LINK_INDX 5 /* r8 */ +# define BASE_INDX 6 /* 32(rsp) */ +# define PC_INDX 7 /* rip */ +# define MISC0_INDX 8 /* rbx */ +# define MISC1_INDX 9 /* rcx */ +# define MISC2_INDX 10 /* rdx */ +# define MISC3_INDX 11 /* r10 */ +# define MISC4_INDX 12 /* r11 */ +# define MISC5_INDX 13 /* r12 */ +# define MISC6_INDX 14 /* r13 */ + +#elif defined(ARCH_PPC) + +# define NROOTS 24 +# define N_ARG_REGS 19 +# define N_PSEUDO_REGS 2 + +# define LINK_INDX 0 +# define CLOSURE_INDX 1 +# define ARG_INDX 2 +# define CONT_INDX 3 +# define EXN_INDX 4 +# define VAR_INDX 5 +# define BASE_INDX 6 +# define PC_INDX 8 + +# define MISC0_INDX 9 /* 24 */ +# define MISC1_INDX 10 /* 25 */ +# define MISC2_INDX 11 /* 26 */ +# define MISC3_INDX 12 /* 27 */ +# define MISC4_INDX 13 /* 3 */ +# define MISC5_INDX 14 /* 4 */ +# define MISC6_INDX 15 /* 5 */ +# define MISC7_INDX 16 /* 6 */ +# define MISC8_INDX 17 /* 7 */ +# define MISC9_INDX 18 /* 8 */ +# define MISC10_INDX 19 /* 9 */ +# define MISC11_INDX 20 /* 10 */ +# define MISC12_INDX 21 /* 11 */ +# define MISC13_INDX 22 /* 12 */ +# define MISC14_INDX 23 /* 13 */ + +#elif defined(ARCH_SPARC) + +# define NROOTS 23 /* pc, %i0-i5, %g7, %g1-%g3, %l0-%l7, %o0-%o1 %o3-%o4 */ +# define N_ARG_REGS 19 /* exclude baseptr */ +# define N_PSEUDO_REGS 2 +# define PC_INDX 6 +# define EXN_INDX 7 /* %g7 */ +# define ARG_INDX 0 /* %i0 */ +# define CONT_INDX 1 /* %i1 */ +# define CLOSURE_INDX 2 /* %i2 */ +# define BASE_INDX 3 /* %i3 */ +# define VAR_INDX 5 /* %i5 */ +# define LINK_INDX 4 /* %g1 */ +# define MISC0_INDX 8 /* %g2 */ +# define MISC1_INDX 9 /* %g3 */ +# define MISC2_INDX 10 /* %o0 */ +# define MISC3_INDX 11 /* %o1 */ +# define MISC4_INDX 12 /* %l0 */ +# define MISC5_INDX 13 /* %l1 */ +# define MISC6_INDX 14 /* %l2 */ +# define MISC7_INDX 15 /* %l3 */ +# define MISC8_INDX 16 /* %l4 */ +# define MISC9_INDX 17 /* %l5 */ +# define MISC10_INDX 18 /* %l6 */ +# define MISC11_INDX 19 /* %l7 */ +# define MISC12_INDX 20 /* %i4 */ +# define MISC13_INDX 21 /* %o3 */ +# define MISC14_INDX 22 /* %o4 */ + +#elif defined (ARCH_X86) + +# define NROOTS 26 +# define N_ARG_REGS 23 +# define N_PSEUDO_REGS 2 +# define EXN_INDX 0 /* 8(esp) */ +# define ARG_INDX 1 /* ebp */ +# define CONT_INDX 2 /* esi */ +# define CLOSURE_INDX 3 /* 16(esp) */ +# define VAR_INDX 4 /* 28(esp) */ +# define LINK_INDX 5 /* 20(esp) */ +# define PC_INDX 6 /* eip */ +# define MISC0_INDX 7 /* ebx */ +# define MISC1_INDX 8 /* ecx */ +# define MISC2_INDX 9 /* edx */ + /* MISCn, where n > 2, is a virtual register */ +# define MISC3_INDX 10 /* 40(esp) */ +# define MISC4_INDX 11 /* 44(esp) */ +# define MISC5_INDX 12 /* 48(esp) */ +# define MISC6_INDX 13 /* 52(esp) */ +# define MISC7_INDX 14 /* 56(esp) */ +# define MISC8_INDX 15 /* 60(esp) */ +# define MISC9_INDX 16 /* 64(esp) */ +# define MISC10_INDX 17 /* 68(esp) */ +# define MISC11_INDX 18 /* 72(esp) */ +# define MISC12_INDX 19 /* 76(esp) */ +# define MISC13_INDX 20 /* 80(esp) */ +# define MISC14_INDX 21 /* 84(esp) */ +# define MISC15_INDX 22 /* 88(esp) */ +# define MISC16_INDX 23 /* 92(esp) */ +# define MISC17_INDX 24 /* 96(esp) */ +# define MISC18_INDX 25 /* 100(esp) */ + +#else + +# error unknown architecture + +#endif + +#endif /* !_ML_ROOTS_ */ + diff --git a/base/runtime/include/ml-signals.h b/base/runtime/include/ml-signals.h new file mode 100644 index 0000000..fcbbe75 --- /dev/null +++ b/base/runtime/include/ml-signals.h @@ -0,0 +1,42 @@ +/* ml-signals.h + * + * COPYRIGHT (c) 2022 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#ifndef _ML_SIGNALS_ +#define _ML_SIGNALS_ + +typedef struct { /* counters for pending signals; we keep two counters */ + /* to avoid race conditions */ + Word_t nReceived; /* the count of how many signals of this variety */ + /* have been received. This counter is incremented */ + /* the signal handler */ + Word_t nHandled; /* the count of how many of this kind of */ + /* signal have been handled. This counter */ + /* is incremented by the main thread. */ +} sig_count_t; + +/* The state of ML signal handlers; these definitions must agree with + * the values used in src/sml-nj/boot/smlnj/signals.sml. + */ +#define ML_SIG_IGNORE 0 +#define ML_SIG_DEFAULT 1 +#define ML_SIG_ENABLED 2 + +/** Utility functions **/ +extern void GCSignal (vproc_state_t *vsp, int nGen); +extern void ChooseSignal (vproc_state_t *vsp); +extern ml_val_t MakeResumeCont (ml_state_t *msp, ml_val_t resume[]); +extern ml_val_t MakeHandlerArg (ml_state_t *msp, ml_val_t resume[]); +extern void LoadResumeState (ml_state_t *msp); + +/* OS dependent implementations of signal operations. */ +extern ml_val_t ListSignals (ml_state_t *msp); +extern void PauseUntilSignal (vproc_state_t *vsp); +extern void SetSignalState (vproc_state_t *vsp, int sigNum, int sigState); +extern int GetSignalState (vproc_state_t *vsp, int sigNum); +extern void SetSignalMask (ml_val_t sigList); +extern ml_val_t GetSignalMask (ml_state_t *msp); + +#endif /* !_ML_SIGNALS_ */ diff --git a/base/runtime/include/ml-state.h b/base/runtime/include/ml-state.h new file mode 100644 index 0000000..40c2e60 --- /dev/null +++ b/base/runtime/include/ml-state.h @@ -0,0 +1,82 @@ +/* ml-state.h + * + * COPYRIGHT (c) 1992 by AT&T Bell Laboratories. + * + * This is the C view of the state of a ML computation. + */ + +#ifndef _ML_STATE_ +#define _ML_STATE_ + +#ifndef _ML_BASE_ +#include "ml-base.h" +#endif + +#ifndef _ML_ROOTS_ +#include "ml-roots.h" +#endif + +#if (!defined(_CNTR_) && defined(ICOUNT)) +#include "cntr.h" +#endif + +#define CALLEESAVE 3 + + +/** The ML state vector ** + */ +/* typedef struct ml_state ml_state_t; */ /* defined in ml-base.h */ +struct ml_state { + /* ML task info */ + heap_t *ml_heap; /* The heap for this ML task */ +# define ml_allocArena ml_heap->allocBase +# define ml_allocArenaSzB ml_heap->allocSzB + vproc_state_t *ml_vproc; /* the VProc that this is running on */ + + /* ML registers */ + ml_val_t *ml_allocPtr; + ml_val_t *ml_limitPtr; + ml_val_t ml_arg; + ml_val_t ml_cont; + ml_val_t ml_closure; + ml_val_t ml_linkReg; + ml_val_t ml_pc; /* Address of ML code to execute; when */ + /* calling an ML function from C, this */ + /* holds the same value as the linkReg. */ + ml_val_t ml_exnCont; + ml_val_t ml_varReg; + ml_val_t ml_calleeSave[CALLEESAVE]; + + ml_val_t ml_storePtr; /* the list of store operations */ + + /* Linkage information */ + ml_val_t ml_faultExn; /* The exception packet for a hardware fault. */ + Word_t ml_faultPC; /* the PC of the faulting instruction */ +#ifdef SOFT_POLL + ml_val_t *ml_realLimit; /* real heap limit */ + bool_t ml_pollPending; /* poll event pending? */ + bool_t ml_inPollHandler; /* handling a poll event? */ +#endif +}; /* struct ml_state */ + + +/* set up the return linkage and continuation throwing in the ML state vector. */ +#define SETUP_RETURN(msp) { \ + ml_state_t *__msp = (msp); \ + __msp->ml_closure = ML_unit; \ + __msp->ml_pc = __msp->ml_cont; \ + } + +#define SETUP_THROW(msp, cont, val) { \ + ml_state_t *__msp = (msp); \ + ml_val_t __cont = (cont); \ + __msp->ml_closure = __cont; \ + __msp->ml_cont = ML_unit; \ + __msp->ml_pc = \ + __msp->ml_linkReg = GET_CODE_ADDR(__cont); \ + __msp->ml_exnCont = ML_unit; \ + __msp->ml_arg = (val); \ + } + +#endif /* !_ML_STATE_ */ + diff --git a/base/runtime/include/ml-timer.h b/base/runtime/include/ml-timer.h new file mode 100644 index 0000000..01b6976 --- /dev/null +++ b/base/runtime/include/ml-timer.h @@ -0,0 +1,27 @@ +/* ml-timer.h + * + * COPYRIGHT (c) 1992 by AT&T Bell Laboratories. + */ + +#ifndef _ML_TIMER_ +#define _ML_TIMER_ + +#ifndef _ML_BASE_ +#include "ml-base.h" +#endif + +/* TODO: switch to Unsigned64_t count of nanoseconds */ + +/* we define our own type to represent time values, since some systems have + * struct timeval, but others do not. + */ +typedef struct { + Int32_t seconds; + Int32_t uSeconds; +} Time_t; + +extern void GetCPUTime (Time_t *user_t, Time_t *sys_t); +extern void StartGCTimer (vproc_state_t *vsp); +extern void StopGCTimer (vproc_state_t *vsp, long *time); + +#endif /* !_ML_TIMER_ */ diff --git a/base/runtime/include/ml-unixdep.h b/base/runtime/include/ml-unixdep.h new file mode 100644 index 0000000..8b2a0bb --- /dev/null +++ b/base/runtime/include/ml-unixdep.h @@ -0,0 +1,265 @@ +/*! \file ml-unixdep.h + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This file contains O.S. dependent paths, definitions and feature flags for + * various UNIX systems. It should not be included in files that are OS + * independent. See the file * "mach-dep/signal-sysdep.h" for machine/O.S. + * dependencies related to signal handling. + * + * Operating system features: + * + * The following feature symbols may be defined: + * + * HAS_POSIX_LIBRARIES if the ML Posix binding is supported. + * HAS_GETRUSAGE if OS provides getrusage(2) call + * HAS_SETITIMER if OS provides setitimer(2) call + * HAS_CLOCK_GETRES if OS provides clock_getres(3) call + * HAS_MMAP if OS provides both mmap(2) and /dev/zero. + * HAS_ANON_MMAP if OS provides anonymous mmap(2) + * HAS_PARTIAL_MUNMAP if OS allows unmapping of subranges of a mapped + * object + * HAS_VM_ALLOCATE if OS provides vm_allocate (MACH) + * HAS_SELECT if OS supports BSD style select(2) + * HAS_POLL if OS supports SystemV style poll(2) + * HAS_POSIX_SIGS if OS provides POSIX sigaction signal interface + * (including the full sigprocmask interface). + * HAS_BSD_SIGS if OS provides BSD sigvec interface (including + * sigsetmask). + * HAS_SIGCONTEXT if signal handlers have a struct sigcontext + * argument. + * INCLUDE_SIGINFO_H include file that contains siginfo (if needed). + * HAS_UCONTEXT if signal handlers have a ucontext_t argument. + * HAS_STRERROR if the system provides the ISO C strerror function. + * INT_GIDLIST if the second argument to getgroups is int[]. + * STAT_HAS_TIMESPEC if the time fields in the "struct stat" type have + * type "struct timespec". + * HAS_NANOSLEEP if the system provides the nanosleep(2) function. + * + * Note that only one of the following sets of symbols should be defined: + * { HAS_MMAP, HAS_ANON_MMAP, HAS_VM_ALLOCATE } + * { HAS_SELECT, HAS_POLL } + * { HAS_POSIX_SIGS, HAS_BSD_SIGS } + * { HAS_SIGCONTEXT, HAS_UCONTEXT } + * + * Some UNIX systems do not support the POSIX libraries (HAS_POSIX_LIBRARIES is + * not defined), in which case, some of the following feature falgs may be defined: + * + * HAS_ACCESS + * HAS_WAITPID if OS provides waitpid(2) call (POSIX) + * HAS_WAIT3 if OS provides the BSD wait3(2) call + * HAS_SYMLINKS if OS supports symbolic links; this includes + * the symlink(2) and readlink(2) calls. + * HAS_GETCWD if OS supports getcwd(3) (POSIX) + * HAS_GETWD if OS supports getwd(3) (BSD) + * HAS_CHMOD if OS supports chmod(2) and fchmod(2) + * HAS_TRUNCATE if OS supports truncate(2) and ftruncate(2) + * HAS_GETHOSTNAME if OS supports gethostname(2) + * HAS_GETHOSTID if OS supports gethostid(2) + * HAS_SYSINFO if OS supports SystemV style sysinfo(2) + * HAS_UNAME_ID if OS supports uname(2) with machine ID field + * + * { HAS_GETHOSTID, HAS_SYSINFO, HAS_UNAME_ID } + * { HAS_WAITPID, HAS_WAIT3 } + */ + +#ifndef _ML_UNIXDEP_ +#define _ML_UNIXDEP_ + +/** Include file paths **/ +#define INCLUDE_TYPES_H +#define INCLUDE_TIME_H + +#if defined(OPSYS_AIX) +#define INCLUDE_FCNTL_H +#else +#define INCLUDE_FCNTL_H +#endif + +#if defined(OPSYS_AIX) || defined(OPSYS_LINUX) || defined(OPSYS_FREEBSD) || defined(OPSYS_NETBSD) || defined(OPSYS_OPENBSD) || defined(OPSYS_CYGWIN) +# define INCLUDE_DIRENT_H +#else +# define INCLUDE_DIRENT_H +#endif + +#if defined(OPSYS_AIX) /** AIX 3.2 **/ +# define OS_NAME "AIX" +# define HAS_POSIX_LIBRARIES +# define HAS_POSIX_SIGS +# define HAS_GETRUSAGE +# define HAS_MMAP +# define HAS_PARTIAL_MUNMAP +# define HAS_POLL +# define HAS_SIGCONTEXT +# define HAS_STRERROR + +/* These declarations are not in */ +extern int sys_nerr; +extern char *sys_errlist[]; + +#elif defined(OPSYS_DARWIN) /** MacOS X 10.5 for PPC / 10.6+ for x86 / 10.10+ for x86_64 **/ +# define OS_NAME "Darwin" +# define HAS_POSIX_LIBRARIES +# define HAS_POSIX_SIGS +# define HAS_GETRUSAGE +# define HAS_SETITIMER +# define HAS_ANON_MMAP +# define HAS_UCONTEXT +# define HAS_STRERROR +# define HAS_SELECT +# define MAP_ANONYMOUS MAP_ANON +# define HAS_MKSTEMP +# define STAT_HAS_TIMESPEC +/* NOTE: macOS added clock_getres in 10.12 (Sierra). For now, we do not + * enable it, since we are supporting backward compatability to 10.6 (Snow Leopard). + */ +# define HAS_NANOSLEEP + +#elif defined(OPSYS_SOLARIS) /** SunOS 5.x **/ +# define OS_NAME "Solaris" +# define HAS_POSIX_LIBRARIES +# define HAS_POSIX_SIGS +# define HAS_SETITIMER +# define HAS_MMAP +# define HAS_PARTIAL_MUNMAP +# define HAS_POLL +# define HAS_UCONTEXT +# define INCLUDE_SIGINFO_H +# define HAS_STRERROR +# define HAS_MKSTEMP + +/* These declarations are not in */ +extern int sys_nerr; +extern char *sys_errlist[]; + +#elif (defined(ARCH_AMD64) && defined(OPSYS_LINUX)) +# define OS_NAME "Linux" +# define HAS_POSIX_LIBRARIES +# define HAS_POSIX_SIGS +# define HAS_GETRUSAGE +# define HAS_SETITIMER +# define HAS_CLOCK_GETRES +# define HAS_ANON_MMAP +# define HAS_PARTIAL_MUNMAP +# define HAS_SELECT +# define HAS_UCONTEXT +# define HAS_STRERROR +# define HAS_MKSTEMP +# ifndef __USE_GNU +# define __USE_GNU +# endif +# define STAT_HAS_TIMESPEC +# define HAS_NANOSLEEP + +#include + +#elif (defined(ARCH_X86) && defined(OPSYS_LINUX)) +# define OS_NAME "Linux" +# define HAS_POSIX_LIBRARIES +# define HAS_POSIX_SIGS +# define HAS_GETRUSAGE +# define HAS_SETITIMER +# define HAS_CLOCK_GETRES +# define HAS_ANON_MMAP +# define HAS_PARTIAL_MUNMAP +# define HAS_SELECT +# define HAS_UCONTEXT +# define HAS_STRERROR +# define HAS_MKSTEMP +# define STAT_HAS_TIMESPEC +# define _FILE_OFFSET_BITS 64 +# if _POSIX_C_SOURCE >= 199309L +# define HAS_NANOSLEEP +# endif + +#include + +#elif (defined(ARCH_PPC) && defined(OPSYS_LINUX)) +# define OS_NAME "Linux" +# define HAS_POSIX_LIBRARIES +# define HAS_POSIX_SIGS +# define HAS_GETRUSAGE +# define HAS_SETITIMER +# define HAS_ANON_MMAP +# define HAS_PARTIAL_MUNMAP +# define HAS_SELECT +# define HAS_STRERROR +# define HAS_MKSTEMP +# ifndef __USE_GNU +# define __USE_GNU +# endif +# define STAT_HAS_TIMESPEC +# if _POSIX_C_SOURCE >= 199309L +# define HAS_NANOSLEEP +# endif + +#include + +#elif defined(OPSYS_FREEBSD) +# define OS_NAME "BSD" +# define HAS_POSIX_LIBRARIES +# define HAS_POSIX_SIGS +# define HAS_GETRUSAGE +# define HAS_SETITIMER +# define HAS_CLOCK_GETRES +# define HAS_ANON_MMAP +# define HAS_PARTIAL_MUNMAP +# define HAS_SELECT +# define HAS_UCONTEXT +# define HAS_STRERROR +# define STAT_HAS_TIMESPEC +# define HAS_NANOSLEEP + +#elif defined(OPSYS_NETBSD) /* version 3.x */ +# define OS_NAME "BSD" +# define HAS_POSIX_LIBRARIES +# define HAS_POSIX_SIGS +# define HAS_GETRUSAGE +# define HAS_SETITIMER +# define HAS_MMAP +# define HAS_SELECT +# define HAS_UCONTEXT +# define HAS_STRERROR +# define HAS_MKSTEMP +# define STAT_HAS_TIMESPEC +# define HAS_NANOSLEEP + +#elif defined(OPSYS_OPENBSD) +# define OS_NAME "BSD" +# define HAS_POSIX_LIBRARIES +# define HAS_BSD_SIGS +# define HAS_GETRUSAGE +# define HAS_SETITIMER +# define HAS_MMAP +# define HAS_SELECT +# define HAS_SIGCONTEXT +# define HAS_STRERROR +# define HAS_MKSTEMP +# define STAT_HAS_TIMESPEC +# define HAS_NANOSLEEP + +#elif defined(OPSYS_CYGWIN) +# define OS_NAME "Cygwin" +# define HAS_POSIX_LIBRARIES +# define HAS_POSIX_SIGS +# define HAS_GETRUSAGE +# define HAS_SETITIMER +# define HAS_CLOCK_GETRES +# define HAS_MMAP +# define HAS_PARTIAL_MUNMAP +# define HAS_SELECT +# define HAS_SIGCONTEXT +# define HAS_STRERROR +# define STAT_HAS_TIMESPEC +# define HAS_NANOSLEEP + +#include + +#endif + +#include +#include +#include + +#endif /* !_ML_UNIXDEP_ */ diff --git a/base/runtime/include/ml-values.h b/base/runtime/include/ml-values.h new file mode 100644 index 0000000..e6af1e2 --- /dev/null +++ b/base/runtime/include/ml-values.h @@ -0,0 +1,78 @@ +/*! \file ml-values.h + * + * \author John Reppy + * + * Basic definitions for representing ML values in C. + * + * INT_MLtoC(v) -- convert an unboxed ML value to an Int_t. + * INT_CtoML(i) -- convert a Word_t to an unboxed ML value. + * PTR_MLtoC(ty, v) -- convert a boxed ML value to a (ty *). + * PTR_CtoML(p) -- convert (Word_t *p) to an boxed ML value. + */ + +/* + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#ifndef _ML_VALUES_ +#define _ML_VALUES_ + +#ifndef _ML_BASE_ +#include "ml-base.h" +#endif + +/* typedef void *ml_val_t; */ /* defined in ml-base.h */ + +#ifndef _ASM_ + +/* When the size of a C pointer differs from the size of an ML value, the + * pointer cast should first convert to a address sized integer before + * the actual cast. This causes problems, however, for gcc when used in + * a static initialization; hence the PTR_CAST macro. + */ +#ifdef SIZES_C64_ML32 +#define PTR_CAST(ty, p) ((ty)(Addr_t)(p)) +#else +#define PTR_CAST(ty, p) ((ty)(p)) +#endif + +#define INT_MLtoC(n) (((Int_t)(n)) >> 1) +#define INT_CtoML(n) ((ml_val_t)(Int_t)(2*(n) + 1)) +#define PTR_MLtoC(ty,p) PTR_CAST(ty *, p) +#define PTR_CtoML(p) PTR_CAST(ml_val_t, p) +#else +#define INT_CtoML(n) (((n)*2)+1) +#endif /* !_ASM_ */ + +#ifndef _ASM_ + +/* convert an ML pointer to an Addr_t value */ +#define PTR_MLtoADDR(p) ((Addr_t)PTR_MLtoC(void, p)) + +/* ML record field selection */ +#define REC_SEL(p, i) ((PTR_MLtoC(ml_val_t, p))[(i)]) +#define REC_SELPTR(ty, p, i) PTR_MLtoC(ty, REC_SEL(p, i)) +#define REC_SELINT(p, i) INT_MLtoC(REC_SEL(p, i)) + +/* Extract the components of an array/vector header */ +#define GET_SEQ_DATA(p) REC_SEL(p, 0) +#define GET_SEQ_DATAPTR(ty, p) REC_SELPTR(ty, p, 0) +#define GET_SEQ_LEN(p) REC_SELINT(p, 1) + +/* Turn an ML string into a C string */ +#define STR_MLtoC(p) GET_SEQ_DATAPTR(char, p) + +/* Extract the code address from an ML closure */ +#define GET_CODE_ADDR(c) (REC_SEL(c, 0)) + +#endif /* !_ASM_ */ + +/** Some basic ML values **/ +#define ML_unit INT_CtoML(0) +#define ML_false INT_CtoML(0) +#define ML_true INT_CtoML(1) +#define ML_nil INT_CtoML(0) + +#endif /* !_ML_VALUES_ */ + diff --git a/base/runtime/include/profile.h b/base/runtime/include/profile.h new file mode 100644 index 0000000..6135442 --- /dev/null +++ b/base/runtime/include/profile.h @@ -0,0 +1,24 @@ +/* profile.h + * + * COPYRIGHT (c) 1996 AT&T Research. + */ + +#ifndef _PROFILE_ +#define _PROFILE_ + +#ifndef PROFILE_QUANTUM_US +# define PROFILE_QUANTUM_US 10000 /* profile timer quantum in uS */ +#endif + +extern ml_val_t ProfCntArray; + +/* Indices into the ProfCntArray for the run-time and GC; these need to + * track the definitions in sml-nj/boot/NJ/prof-control.sml. + */ +#define PROF_RUNTIME INT_CtoML(0) +#define PROF_MINOR_GC INT_CtoML(1) +#define PROF_MAJOR_GC INT_CtoML(2) +#define PROF_OTHER INT_CtoML(3) + +#endif /* _PROFILE_ */ + diff --git a/base/runtime/include/stats-data.h b/base/runtime/include/stats-data.h new file mode 100644 index 0000000..f8eb4c8 --- /dev/null +++ b/base/runtime/include/stats-data.h @@ -0,0 +1,72 @@ +/* stats-data.h + * + * COPYRIGHT (c) 1994 AT&T Bell Laboratories. + */ + +#ifndef _STATS_DATA_ +#define _STATS_DATA_ + +#ifndef _ML_BASE_ +typedef unsigned int Unsigned32_t; +#endif + +#include "ml-timer.h" +#include "cntr.h" + +typedef struct { + Time_t startTime; /* the time of initialization */ + Unsigned32_t mask; /* bitmask, telling which things were */ + /* measured */ + Unsigned32_t isNewRuntime; /* true, if this is the new runtime */ + /* new runtime parameters */ + Unsigned32_t allocSzB; /* the size of the allocation space */ + Unsigned32_t numGens; /* the number of generations. */ + /* old runtime parameters */ + Unsigned32_t softmax; + Unsigned32_t ratio; + Unsigned32_t pad[8]; /* pad to 64 bytes */ +} stat_hdr_t; + +typedef struct { + cntr_t allocCnt; /* allocation count (in bytes) */ + Unsigned32_t numGens; /* the number of generations collected */ + Time_t startTime; + Time_t stopTime; + Unsigned32_t pad[9]; /* pad to 64 bytes */ +} stat_rec_t; + +/* mask bits in header */ +#define STATMASK_ALLOC 0x01 +#define STATMASK_NGENS 0x02 +#define STATMASK_START 0x04 +#define STATMASK_STOP 0x08 + +#ifdef COLLECT_STATS + +#define STATS_BUF_SZ (2048/sizeof(stat_rec_t)) + +extern bool_t StatsOn; /* if TRUE, then generate stats */ +extern int StatsFD; /* the file descriptor to write the data to */ + +extern stat_rec_t StatsBuf[]; /* buffer of data */ +extern int NStatsRecs; /* the number of records in the buffer */ + +/* flush out any records in the buffer */ +#define STATS_FLUSH_BUF() { \ + if (NStatsRecs >= 0) { \ + write (StatsFD, (char *)StatsBuf, NStatsRecs*sizeof(stat_rec_t)); \ + NStatsRecs = 0; \ + } \ + } + +#define STATS_FINISH() { \ + if (++NStatsRecs >= STATS_BUF_SZ) { \ + write (StatsFD, (char *)StatsBuf, STATS_BUF_SZ*sizeof(stat_rec_t)); \ + NStatsRecs = 0; \ + } \ + } + +#endif + +#endif /* !_STATS_DATA_ */ + diff --git a/base/runtime/include/tags.h b/base/runtime/include/tags.h new file mode 100644 index 0000000..cabf833 --- /dev/null +++ b/base/runtime/include/tags.h @@ -0,0 +1,138 @@ +/*! \file tags.h + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * These are the macros for object tags and descriptors. They should agree + * with the values in the following compiler source files: + * + * compiler/CodeGen/main/object-desc.sml + * system/init/core.sml + * system/Basis/Implementation/Unsafe/object.sml + */ + +#ifndef _TAGS_ +#define _TAGS_ + +#if defined(_ASM_) && defined(OPSYS_WIN32) +#define HEXLIT(x) CONCAT3(0,x,h) +#define OROP OR +#define ANDOP AND +#else +#define HEXLIT(y) CONCAT(0x,y) +#define OROP | +#define ANDOP & +#endif + +#define MAJOR_MASK HEXLIT(3) /* bits 0-1 are the major tag */ + +#ifdef BOXED1 + /* Major tag: */ +#define TAG_boxed HEXLIT(1) /* 01 - pointers */ +#define TAG_desc HEXLIT(3) /* 11 - descriptors */ +#define TAG_unboxed_b0 HEXLIT(0) /* 00, 10 - unboxed (bit 0 is 0) */ + +/* mark/unmark an ML pointer to make it look like an unboxed object */ +#define MARK_PTR(p) ((ml_val_t)((Addr_t)(p) ANDOP ~HEXLIT(1))) +#define UNMARK_PTR(p) ((ml_val_t)((Addr_t)(p) OROP HEXLIT(1))) + +#else /* BOXED1 */ + /* Major tag: */ +#define TAG_boxed HEXLIT(0) /* 00 - pointers */ +#define TAG_desc HEXLIT(2) /* 10 - descriptors */ +#define TAG_unboxed_b0 HEXLIT(1) /* 01, 11 - unboxed (bit 0 is 1) */ + +/* mark/unmark an ML pointer to make it look like an unboxed object */ +#define MARK_PTR(p) ((ml_val_t)((Addr_t)(p) OROP HEXLIT(1))) +#define UNMARK_PTR(p) ((ml_val_t)((Addr_t)(p) ANDOP ~HEXLIT(1))) + +#endif /* BOXED1 */ + +/* Descriptors have five more tag bits (defined below). */ +#define DTAG_SHIFTW 2 +#define DTAG_WID 5 +#define DTAG_MASK (((1 << DTAG_WID)-1) << DTAG_SHIFTW) +#define TAG_SHIFTW (DTAG_SHIFTW+DTAG_WID) + +#define DTAG_record HEXLIT(0) /* records (including pairs) */ +#define DTAG_vec_hdr HEXLIT(1) /* vector header; length is kind */ +#define DTAG_vec_data DTAG_record /* polymorphic vector data */ +#define DTAG_arr_hdr HEXLIT(2) /* array header; length is kind */ +#define DTAG_arr_data HEXLIT(3) /* polymorphic array data */ +#define DTAG_ref DTAG_arr_data /* reference cell */ +#define DTAG_raw HEXLIT(4) /* word-size aligned non-pointer data */ +#define DTAG_raw64 HEXLIT(5) /* 64-bit aligned non-pointer data */ +#define DTAG_special HEXLIT(6) /* Special object; length is kind */ +#define DTAG_extern HEXLIT(10) /* external symbol reference (used in */ + /* exported heap images) */ +#define DTAG_forward HEXLIT(1F) /* a forwarded object */ + +/* Vector and array headers come in different kinds; the kind tag is stored + * in the length field of the descriptor. We need these codes for polymorphic + * equality and pretty-printing. + * + * NOTE: We need the SEQ_real64 tag for pretty printing because of the way that + * the type `real array`is currently handled (i.e., it is mapped to + * `Real64Array.array`). If we get rid of runtime type passing, then these + * tags can just be based on size. + * Also note that sequences of tagged integers use the next largest size + * (e.g., 31 ==> 32). + */ +#define SEQ_poly HEXLIT(0) /* one word per element; type unkonwn */ +#define SEQ_word8 HEXLIT(1) /* 8-bits per element */ +#define SEQ_word16 HEXLIT(2) /* 16-bits per element */ +#define SEQ_word32 HEXLIT(3) /* 32-bits per element */ +#define SEQ_word64 HEXLIT(4) /* 64-bits per element */ +#define SEQ_real32 HEXLIT(5) /* 32-bit floating-point values */ +#define SEQ_real64 HEXLIT(6) /* 64-bit floating-point values */ + +/* Build a descriptor from a descriptor tag and a length */ +#ifndef _ASM_ +#define MAKE_TAG(t) ((Word_t)(((t) << DTAG_SHIFTW) | TAG_desc)) +#define MAKE_DESC(l,t) ((ml_val_t)(Word_t)(((l) << TAG_SHIFTW) | MAKE_TAG(t))) +#else +#define MAKE_TAG(t) (((t)*4) + TAG_desc) +#define MAKE_DESC(l,t) (((l)*128) + MAKE_TAG(t)) +#endif + +#define DESC_pair MAKE_DESC(2, DTAG_record) +#define DESC_exn MAKE_DESC(3, DTAG_record) +#define DESC_ref MAKE_DESC(1, DTAG_ref) +#define DESC_reald MAKE_DESC(REALD_SZW, DTAG_raw64) +#define DESC_word64 MAKE_DESC(WORD64_SZW, DTAG_raw) +#define DESC_polyvec MAKE_DESC(SEQ_poly, DTAG_vec_hdr) +#define DESC_polyarr MAKE_DESC(SEQ_poly, DTAG_arr_hdr) +#define DESC_word8arr MAKE_DESC(SEQ_word8, DTAG_arr_hdr) +#define DESC_word8vec MAKE_DESC(SEQ_word8, DTAG_vec_hdr) +#define DESC_string MAKE_DESC(SEQ_word8, DTAG_vec_hdr) +#define DESC_real64arr MAKE_DESC(SEQ_real64, DTAG_arr_hdr) + +#define DESC_forwarded MAKE_DESC(0, DTAG_forward) + +/* There are two kinds of special objects: suspensions and weak pointers + * The length field of these defines the state and kind of special object: + */ +#define SPCL_evaled_susp 0 /* unevaluated suspension */ +#define SPCL_unevaled_susp 1 /* evaluated suspension */ +#define SPCL_weak 2 /* weak pointer */ +#define SPCL_null_weak 3 /* nulled weak pointer */ + +#define DESC_evaled_susp MAKE_DESC(SPCL_evaled_susp, DTAG_special) +#define DESC_unevaled_susp MAKE_DESC(SPCL_unevaled_susp, DTAG_special) +#define DESC_weak MAKE_DESC(SPCL_weak, DTAG_special) +#define DESC_null_weak MAKE_DESC(SPCL_null_weak, DTAG_special) + +/* tests on words: + * isBOXED(W) -- true if W is tagged as an boxed value + * isUNBOXED(W) -- true if W is tagged as an unboxed value + * isDESC(W) -- true if W is tagged as descriptor + */ +#define isBOXED(W) (((Word_t)(W) & MAJOR_MASK) == TAG_boxed) +#define isUNBOXED(W) (((Word_t)(W) & 1) == TAG_unboxed_b0) +#define isDESC(W) (((Word_t)(W) & MAJOR_MASK) == TAG_desc) + +/* extract descriptor fields */ +#define GET_LEN(D) (((Word_t)(D)) >> TAG_SHIFTW) +#define GET_TAG(D) ((((Word_t)(D)) ANDOP DTAG_MASK) >> DTAG_SHIFTW) + +#endif /* !_TAGS_ */ diff --git a/base/runtime/include/vproc-state.h b/base/runtime/include/vproc-state.h new file mode 100644 index 0000000..218eb02 --- /dev/null +++ b/base/runtime/include/vproc-state.h @@ -0,0 +1,67 @@ +/* vproc-state.h + * + * COPYRIGHT (c) 1992 by AT&T Bell Laboratories. + * + * This is the state of a virtual processor. + */ + +#ifndef _VPROC_STATE_ +#define _VPROC_STATE_ + +#ifndef _ML_BASE_ +#include "ml-base.h" +#endif + +#ifndef _ML_SIGNALS_ +#include "ml-signals.h" +#endif + +#ifndef _SYSTEM_SIGNALS_ +#include "system-signals.h" +#endif + +#ifndef _ML_TIMER_ +#include "ml-timer.h" +#endif + +#if defined(MP_SUPPORT) && (! defined(_ML_MP_)) +#include "ml-mp.h" +#endif + + +/** The Virtual processor state vector ** + * + * The fields that are accessed by the runtime assembly code are allocated at + * word size to keep the assembly code simpler. + */ +struct vproc_state { + heap_t *vp_heap; /* The heap for this ML task */ + ml_state_t *vp_state; /* The state of the ML task that is */ + /* running on this VProc. Eventually */ + /* we will support multiple ML tasks */ + /* per VProc. */ + /* Signal related fields: */ + Word_t vp_inMLFlag; /* True while executing ML code */ + Word_t vp_handlerPending; /* Is there a signal handler pending? */ + Word_t vp_inSigHandler; /* Is an ML signal handler active? */ + sig_count_t vp_totalSigCount; /* summary count for all signals */ + sig_count_t vp_sigCounts[SIGMAP_SZ]; /* counts of signals. */ + int vp_sigCode; /* the code and count of the next */ + int vp_sigCount; /* signal to handle. */ + int vp_nextPendingSig; /* the index in sigCounts of the next */ + /* signal to handle. */ + int vp_gcSigState; /* the state of the GC signal handler */ + int vp_gcSigThreshold; /* the generation threshold for generating a */ + /* GC signal (0 => all, 1 => gen 1, ...) */ + Time_t *vp_gcTime0; /* The cumulative CPU time at the start of */ + /* the last GC (see kernel/timers.c). */ + Time_t *vp_gcTime; /* The cumulative GC time. */ + Addr_t vp_limitPtrMask; /* for raw-C-call interface */ +#ifdef MP_SUPPORT + mp_pid_t vp_mpSelf; /* the owning process's ID */ + vproc_status_t vp_mpState; /* proc state (see ml-mp.h) */ +#endif +}; + +#endif /* !_VPROC_STATE_ */ + diff --git a/base/runtime/include/win32-timers.h b/base/runtime/include/win32-timers.h new file mode 100644 index 0000000..98477f3 --- /dev/null +++ b/base/runtime/include/win32-timers.h @@ -0,0 +1,16 @@ +/*! \file win32-timers.h + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * header for win32 specific interface to interval timers. See mach-dep/win32-timers.c + * for the implementation. + */ + +#ifndef _WIN32_TIMERS_H_ +#define _WIN32_TIMERS_H_ + +extern bool_t win32StopTimer (); +extern bool_t win32StartTimer (int milli_secs); + +#endif /* _WIN32_TIMERS_H_ */ diff --git a/base/runtime/kernel/bill-os.c b/base/runtime/kernel/bill-os.c new file mode 100644 index 0000000..6188dd3 --- /dev/null +++ b/base/runtime/kernel/bill-os.c @@ -0,0 +1,16 @@ +#include "bill-os.h" +#include + +#if defined(__CYGWIN__) || defined(__MINGW32__) + +struct netent * getnetbyname(const char * name) +{ + return NULL; +} + +struct netent * getnetbyaddr(long net, int type) +{ + return NULL; +} + +#endif diff --git a/base/runtime/kernel/boot.c b/base/runtime/kernel/boot.c new file mode 100644 index 0000000..f029178 --- /dev/null +++ b/base/runtime/kernel/boot.c @@ -0,0 +1,554 @@ +/*! \file boot.c + * + * COPYRIGHT (c) 2024 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This is the bootstrap loader for booting from binfiles. + * + * See dev-notes/binfile.adoc for a description of the binfile format. + * This file must be kept in sync with compiler/Execution/binfile/binfile.sml. + */ + +#include "ml-osdep.h" +#include +#include +#include +#include +#include "ml-base.h" +#include "ml-limits.h" +#include "cache-flush.h" +#include "bin-file.h" +#include "ml-objects.h" +#include "gc.h" +#include "ml-globals.h" + +#ifndef SEEK_SET +# define SEEK_SET 0 +#endif + +/* The persistent ID list is stored in the PervStruct refcell. It has + * the following ML type: + * + * datatype runDynEnv + * = NILrde + * | CONSrde of (Word8Vector.vector * Object.object * runDynEnv) + */ +#define PerIDList (*PTR_MLtoC(ml_val_t, PervStruct)) + +PVT ml_val_t BinFileList = LIST_nil; /* A list of bin files to load */ + + +/* local routines */ +PVT ml_val_t BuildFileList (ml_state_t *msp, const char *bootlist, + int *max_boot_path_len_ptr); +PVT FILE *OpenBinFile (const char *fname, bool_t isBinary); +PVT void ReadBinFile (FILE *file, void *buf, int nbytes, const char *fname); +PVT void LoadBinFile (ml_state_t *msp, char *fname); +PVT void EnterPerID (ml_state_t *msp, pers_id_t *perID, ml_val_t obj); +PVT ml_val_t LookupPerID (pers_id_t *perID); +PVT void ShowPerID (char *buf, pers_id_t *perID); + +static int HEX(int c) +{ + if (isdigit(c)) return c - '0'; + if (c >= 'a' && c <= 'z') return c - 'a' + 10; + return c - 'A' + 10; +} + + +/* BootML: + * + * Boot the system using the items read from the "bootlist" file. + */ +void BootML (const char *bootlist, heap_params_t *heapParams) +{ + ml_state_t *msp; + int max_boot_path_len; + char *fname; + int rts_init = 0; + + msp = AllocMLState (TRUE, heapParams); + +#ifdef HEAP_MONITOR + if (HeapMon_Init(CmdLineArgs, msp->ml_heap) == FAILURE) + Die("unable to start heap monitor"); +#endif + + InitFaultHandlers (); + AllocGlobals (msp); + + /* construct the list of files to be loaded */ + BinFileList = BuildFileList (msp, bootlist, &max_boot_path_len); + + /* this space is ultimately wasted */ + if ((fname = MALLOC (max_boot_path_len)) == NULL) { + Die ("unable to allocate space for boot file names"); + } + + /* boot the system */ + while (BinFileList != LIST_nil) { + /* need to make a copy of the path name because LoadBinFile is + * going to scribble into it */ + strcpy(fname, STR_MLtoC(LIST_hd(BinFileList))); + BinFileList = LIST_tl(BinFileList); + if (fname[0] == '#') { + if (rts_init) { + Die ("runtime system registered more than once\n"); + } + else { + /* register the runtime system under the given pers id */ + pers_id_t pid; + int i, l = strlen (fname + 1); + for (i = 0; i < PERID_LEN; i++) { + int i2 = 2 * i; + if (i2 + 1 < l) { + int c1 = fname[i2+1]; + int c2 = fname[i2+2]; + pid.bytes[i] = (HEX(c1) << 4) + HEX(c2); + } + } + if (!SilentLoad) { + Say ("[Registering runtime system as %s]\n", fname+1); + } + EnterPerID (msp, &pid, RunTimeCompUnit); + rts_init = 1; /* make sure we do this only once */ + } + } + else { + LoadBinFile (msp, fname); + } + } + +} /* end of BootML */ + + +/* BuildFileList: + * + * Given the directory path, build a list of the .bin files in the + * heap. + */ +PVT ml_val_t BuildFileList (ml_state_t *msp, const char *bootlist, int *mbplp) +{ + FILE *listF; + ml_val_t *fileNames = NULL; + char *nameBuf = NULL; + int max_num_boot_files = MAX_NUM_BOOT_FILES; + int max_boot_path_len = MAX_BOOT_PATH_LEN; + int i, j, numFiles; + ml_val_t fileList; +# define SIZE_BUF_LEN 128 /* this should be plenty for two numbers */ + char sizeBuf[SIZE_BUF_LEN]; + char c; + + numFiles = 0; + + listF = OpenBinFile (bootlist, FALSE); + + if (listF != NULL) { + c = getc (listF); + if (c == EOF) { + Die ("bootlist file \"%s\" is empty", bootlist); + } + if (c == '%') { + if (fgets (sizeBuf, SIZE_BUF_LEN, listF) != NIL(char *)) { + /* hardly any checking here... */ + char *space = strchr (sizeBuf, ' '); + *space = '\0'; + max_num_boot_files = strtoul(sizeBuf, NULL, 0); + max_boot_path_len = strtoul(space+1, NULL, 0) + 2; + } + else { + Die ("unable to read first line in \"%s\" after %%", bootlist); + } + } + else { + /* size spec is missing -- use defaults */ + ungetc (c, listF); + } + + *mbplp = max_boot_path_len; /* tell the calling function... */ + + if ((nameBuf = MALLOC(max_boot_path_len)) == NIL(char *)) { + Die ("unable to allocate space for boot file names"); + } + + if ((fileNames = MALLOC(max_num_boot_files * sizeof(char *))) == NULL) { + Die ("unable to allocate space for boot file name table"); + } + + /* read in the file names, converting them to ML strings. */ + while (fgets (nameBuf, max_boot_path_len, listF) != NIL(char *)) { + j = strlen(nameBuf)-1; + if (nameBuf[j] == '\n') nameBuf[j] = '\0'; /* remove "\n" */ + if (numFiles < max_num_boot_files) { + fileNames[numFiles++] = ML_CString(msp, nameBuf); + } else { + Die ("too many files\n"); + } + } + fclose (listF); + } + + /* create the in-heap list */ + for (fileList = LIST_nil, i = numFiles; --i >= 0; ) { + LIST_cons(msp, fileList, fileNames[i], fileList); + } + + /* these guys are no longer needed from now on */ + if (fileNames) { + FREE (fileNames); + } + if (nameBuf) { + FREE (nameBuf); + } + + return fileList; + +} /* end of BuildFileList */ + + +/* OpenBinFile: + * + * Open a file in the bin file directory. + */ +PVT FILE *OpenBinFile (const char *fname, bool_t isBinary) +{ + FILE *file; + + if ((file = fopen (fname, isBinary ? "rb" : "r")) == NULL) { + Error ("unable to open \"%s\"\n", fname); + } + + return file; + +} /* end of OpenBinFile */ + +/* ReadBinFile: + */ +PVT void ReadBinFile (FILE *file, void *buf, int nbytes, const char *fname) +{ + /* When Apple Silicon platforms execute x64 code through Rosetta, + * fread (and POSIX read) sometimes reads incorrect bytes from a + * file and corrupts the permission of the memory region in which + * the buffer resides. The following code is a workaround that uses + * fgetc, which seems to behaves correctly. + * + * TODO: Remove this special case when Apple has fixed the issue or + * when we have a native Arm build. + */ + +#if (defined(OPSYS_DARWIN)) + char *bufc = buf; + for (size_t i = 0; i < (size_t) nbytes; i++) { + int byte = fgetc(file); + if (byte == EOF) { + Die("cannot read file \"%s\"", fname); + } + + bufc[i] = byte; + } +#else + if (fread(buf, nbytes, 1, file) == -1) { + Die ("cannot read file \"%s\"", fname); + } +#endif + +} /* end of ReadBinFile */ + +/* ReadPackedInt32: + * + * Read an integer in "packed" format. (Small numbers only require 1 byte.) + */ +PVT Int32_t ReadPackedInt32 (FILE *file, const char *fname) +{ + Unsigned32_t n; + Byte_t c; + + n = 0; + do { + ReadBinFile (file, &c, sizeof(c), fname); + n = (n << 7) | (c & 0x7f); + } while ((c & 0x80) != 0); + + return ((Int32_t)n); + +} /* end of ReadPackedInt32 */ + +/* ImportSelection: + * + * Select out the interesting bits from the imported object. + */ +PVT void ImportSelection (ml_state_t *msp, FILE *file, const char *fname, + int *importVecPos, ml_val_t tree) +{ + Int32_t cnt = ReadPackedInt32 (file, fname); + if (cnt == 0) { + ML_AllocWrite (msp, *importVecPos, tree); + (*importVecPos)++; + } + else { + while (cnt-- > 0) { + Int32_t selector = ReadPackedInt32 (file, fname); + ImportSelection (msp, file, fname, importVecPos, + REC_SEL(tree, selector)); + } + } + +} /* end of ImportSelection */ + +/* LoadBinFile: + */ +PVT void LoadBinFile (ml_state_t *msp, char *fname) +{ + FILE *file; + int i, remainingCode, importRecLen; + int exportSzB = 0; + ml_val_t codeObj, importRec, closure, val; + binfile_hdr_t hdr; + pers_id_t exportPerID; + Int32_t thisSzB, thisEntryPoint; + size_t archiveOffset; + char *atptr, *colonptr; + char *objname = fname; + + /* an entry in the boot-file list should have the following syntax: + * + * [ '@' [ ':' ] ] + * + * where is the name of the binfile, is an optional offset + * into the binfile (for stable libraries), and is the optional name + * of the object being loaded. The defaults to 0 and the + * defaults to the . Note that the typically includes both + * the filename and the offset, as well as the name of the source file being + * loaded. + */ + if ((atptr = strchr (fname, '@')) == NULL) { + archiveOffset = 0; + } else { + if ((colonptr = strchr (atptr + 1, ':')) != NULL) { + objname = colonptr + 1; + *colonptr = '\0'; + } + /* not a lot of extensive checking here... */ + archiveOffset = strtoul (atptr + 1, NULL, 0); + *atptr = '\0'; + } + + if (!SilentLoad) { + Say ("[Loading %s]\n", objname); + } + + /* open the file */ + file = OpenBinFile (fname, TRUE); + if (file == NULL) + Exit (1); + + /* if an offset is given (i.e., we are probably dealing with a stable + * archive), then seek to the beginning of the section that contains + * the binfile + */ + if (archiveOffset != 0) { + if (fseek (file, archiveOffset, SEEK_SET) == -1) + Die ("cannot seek on archive file \"%s@%ul\"", + fname, (unsigned long) archiveOffset); + } + + /* get the header */ + ReadBinFile (file, &hdr, sizeof(binfile_hdr_t), fname); + + /* get header byte order right */ + hdr.importCnt = BIGENDIAN_TO_HOST32(hdr.importCnt); + hdr.exportCnt = BIGENDIAN_TO_HOST32(hdr.exportCnt); + hdr.importSzB = BIGENDIAN_TO_HOST32(hdr.importSzB); + hdr.cmInfoSzB = BIGENDIAN_TO_HOST32(hdr.cmInfoSzB); + hdr.lambdaSzB = BIGENDIAN_TO_HOST32(hdr.lambdaSzB); + hdr.reserved = BIGENDIAN_TO_HOST32(hdr.reserved); + hdr.pad = BIGENDIAN_TO_HOST32(hdr.pad); + hdr.codeSzB = BIGENDIAN_TO_HOST32(hdr.codeSzB); + hdr.envSzB = BIGENDIAN_TO_HOST32(hdr.envSzB); + + /* read the import PerIDs, and create the import vector */ + { + int importVecPos; + + importRecLen = hdr.importCnt + 1; + + if (NeedGC (msp, REC_SZB(importRecLen))) + InvokeGCWithRoots (msp, 0, &BinFileList, NIL(ml_val_t *)); + + ML_AllocWrite (msp, 0, MAKE_DESC(importRecLen, DTAG_record)); + for (importVecPos = 1; importVecPos < importRecLen; ) { + pers_id_t importPid; + ReadBinFile (file, &importPid, sizeof(pers_id_t), fname); + ImportSelection (msp, file, fname, &importVecPos, + LookupPerID(&importPid)); + } + ML_AllocWrite(msp, importRecLen, ML_nil); + importRec = ML_Alloc(msp, importRecLen); + } + + /* read the export PerID */ + if (hdr.exportCnt == 1) { + exportSzB = sizeof(pers_id_t); + ReadBinFile (file, &exportPerID, exportSzB, fname); + } + else if (hdr.exportCnt != 0) { + Die ("# of export pids is %d (should be 0 or 1)", (int)hdr.exportCnt); + } + + /* seek to code section */ + { + long off = archiveOffset + + sizeof(binfile_hdr_t) + + hdr.importSzB + + exportSzB + + hdr.cmInfoSzB + + hdr.lambdaSzB + + hdr.reserved + + hdr.pad; + + if (fseek(file, off, SEEK_SET) == -1) { + Die ("cannot seek on bin file \"%s\"", fname); + } + } + + /* Read code objects and run them. The first code object will be the + * data segment. + */ + + remainingCode = hdr.codeSzB; + + /* read the size and the dummy entry point for the data object */ + ReadBinFile (file, &thisSzB, sizeof(Int32_t), fname); + thisSzB = BIGENDIAN_TO_HOST32(thisSzB); + ReadBinFile (file, &thisEntryPoint, sizeof(Int32_t), fname); + /* thisEntryPoint = BIGENDIAN_TO_HOST32(thisEntryPoint); */ + + remainingCode -= thisSzB + 2 * sizeof(Int32_t); + if (remainingCode < 0) { + Die ("format error (data size mismatch) in bin file \"%s\"", fname); + } + + if (thisSzB > 0) { + Byte_t *dataObj = NEW_VEC(Byte_t, thisSzB); + + ReadBinFile (file, dataObj, thisSzB, fname); + SaveCState (msp, &BinFileList, &importRec, NIL(ml_val_t *)); + val = BuildLiterals (msp, dataObj, thisSzB); + FREE(dataObj); + RestoreCState (msp, &BinFileList, &importRec, NIL(ml_val_t *)); + } + else { + val = ML_unit; + } + /* do a functional update of the last element of the importRec. */ + for (i = 0; i < importRecLen; i++) { + ML_AllocWrite(msp, i, PTR_MLtoC(ml_val_t, importRec)[i-1]); + } + ML_AllocWrite(msp, importRecLen, val); + val = ML_Alloc(msp, importRecLen); + /* do a GC, if necessary */ + if (NeedGC (msp, PERID_LEN+REC_SZB(5))) { + InvokeGCWithRoots (msp, 0, &BinFileList, &val, NIL(ml_val_t *)); + } + + if (remainingCode > 0) { + /* read the size and entry point for the code object */ + ReadBinFile (file, &thisSzB, sizeof(Int32_t), fname); + thisSzB = BIGENDIAN_TO_HOST32(thisSzB); + ReadBinFile (file, &thisEntryPoint, sizeof(Int32_t), fname); + thisEntryPoint = BIGENDIAN_TO_HOST32(thisEntryPoint); + + /* how much more? */ + remainingCode -= thisSzB + 2 * sizeof(Int32_t); + if (remainingCode != 0) { + Die ("format error (code size mismatch) in bin file \"%s\"", fname); + } + + /* allocate space and read code object */ + codeObj = ML_AllocCode (msp, thisSzB); + ReadBinFile (file, PTR_MLtoC(char, codeObj), thisSzB, fname); + + FlushICache (PTR_MLtoC(char, codeObj), thisSzB); + + if (!SilentLoad) { + Say (" [addr: %p, size: %d]\n", PTR_MLtoC(char, codeObj), thisSzB); + } + + /* create closure (taking entry point into account) */ + REC_ALLOC1 (msp, closure, + PTR_CtoML (PTR_MLtoC (char, codeObj) + thisEntryPoint)); + + /* apply the closure to the import PerID vector */ + SaveCState (msp, &BinFileList, NIL(ml_val_t *)); + val = ApplyMLFn (msp, closure, val, TRUE); + RestoreCState (msp, &BinFileList, NIL(ml_val_t *)); + + /* do a GC, if necessary */ + if (NeedGC (msp, PERID_LEN+REC_SZB(5))) + InvokeGCWithRoots (msp, 0, &BinFileList, &val, NIL(ml_val_t *)); + } + + /* record the resulting exported PerID */ + if (exportSzB != 0) + EnterPerID (msp, &exportPerID, val); + + fclose (file); + +} /* end of LoadBinFile */ + +/* EnterPerID: + * + * Enter a PerID/object binding in the heap allocated list of PerIDs. + */ +PVT void EnterPerID (ml_state_t *msp, pers_id_t *perID, ml_val_t obj) +{ + ml_val_t mlPerID; + + /* Allocate space for the PerID */ + mlPerID = ML_AllocString (msp, PERID_LEN); + memcpy (STR_MLtoC(mlPerID), (char *)perID, PERID_LEN); + + /* Allocate the list element */ + REC_ALLOC3(msp, PerIDList, mlPerID, obj, PerIDList); + +} + +/* LookupPerID: + */ +PVT ml_val_t LookupPerID (pers_id_t *perID) +{ + ml_val_t p, id; + + for (p = PerIDList; p != ML_unit; p = REC_SEL(p, 2)) { + id = REC_SEL(p, 0); + if (memcmp((char *)perID, STR_MLtoC(id), PERID_LEN) == 0) + return (REC_SEL(p, 1)); + } + + /* here we were unable to find the PerID */ + { + char buf[64]; + ShowPerID (buf, perID); + Die ("unable to find PerID %s", buf); + } + +} /* end of LookupPerID */ + + +/* ShowPerID: + */ +PVT void ShowPerID (char *buf, pers_id_t *perID) +{ + char *cp = buf; + int i; + + *cp++ = '['; + for (i = 0; i < PERID_LEN; i++) { + sprintf (cp, "%02x", perID->bytes[i]); + cp += 2; + } + *cp++ = ']'; + *cp++ = '\0'; + +} /* end of ShowPerID */ diff --git a/base/runtime/kernel/error.c b/base/runtime/kernel/error.c new file mode 100644 index 0000000..7c7c706 --- /dev/null +++ b/base/runtime/kernel/error.c @@ -0,0 +1,97 @@ +/* error.c + * + * COPYRIGHT (c) 1994 by AT&T Bell Laboratories. + * + * Run-time system error messages. + */ + +#include +#include +#include "ml-base.h" + +extern FILE *DebugF; + +/* Say: + * Print a message to the standard output. + */ +void Say (const char *fmt, ...) +{ + va_list ap; + + va_start (ap, fmt); + vfprintf (stdout, fmt, ap); + va_end(ap); + fflush (stdout); + +} /* end of Say */ + +/* SayDebug: + * Print a message to the debug output stream. + */ +void SayDebug (const char *fmt, ...) +{ + va_list ap; + + va_start (ap, fmt); + vfprintf (DebugF, fmt, ap); + va_end(ap); + fflush (DebugF); + +} /* end of SayDebug */ + +/* Error: + * Print an error message. + */ +void Error (const char *fmt, ...) +{ + va_list ap; + + va_start (ap, fmt); + fprintf (stderr, "%s: Error -- ", MLCmdName); + vfprintf (stderr, fmt, ap); + va_end(ap); + +} /* end of Error */ + + +/* Die: + * Print an error message and then exit. + */ +void Die (const char *fmt, ...) +{ + va_list ap; + + va_start (ap, fmt); + fprintf (stderr, "%s: Fatal error -- ", MLCmdName); + vfprintf (stderr, fmt, ap); + fprintf (stderr, "\n"); + va_end(ap); + +#ifdef MP_SUPPORT + MP_Shutdown (); +#endif + + Exit (1); + +} /* end of Die */ + + +#ifdef ASSERT_ON +/* AssertFail: + * + * Print an assertion failure message. + */ +void AssertFail (const char *a, const char *file, int line) +{ + fprintf (stderr, "%s: Assertion failure (%s) at \"%s:%d\"\n", + MLCmdName, a, file, line); + +#ifdef MP_SUPPORT + MP_Shutdown (); +#endif + + Exit (2); + +} /* end of AssertFail */ +#endif + diff --git a/base/runtime/kernel/globals.c b/base/runtime/kernel/globals.c new file mode 100644 index 0000000..ca8410c --- /dev/null +++ b/base/runtime/kernel/globals.c @@ -0,0 +1,330 @@ +/*! \file globals.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-base.h" +#include "machine-id.h" +#include "ml-values.h" +#include "tags.h" +#include "ml-globals.h" +#include "ml-objects.h" +#include "ml-limits.h" +#include "c-globals-tbl.h" + +#ifdef SIZES_C64_ML32 +void PatchAddrs (); +#endif + +#ifndef SIZES_C64_ML32 + +typedef struct { + ml_val_t desc; + char *s; + ml_val_t len; +} ml_string_t; + +#define ML_STRING(id, s) \ + ml_string_t id = { \ + DESC_string, \ + s, \ + INT_CtoML(sizeof(s)-1) \ + } + +/* Exceptions are identified by (string ref) values */ +#define ML_EXNID(ex,name) \ + ML_STRING(CONCAT(ex,_s), name); \ + ml_val_t CONCAT(ex,_id0) [2] = { \ + DESC_ref, \ + PTR_CtoML(&(CONCAT(ex,_s).s)) \ + } + +#define ASM_CLOSURE(name) \ + extern ml_val_t CONCAT(name,_a)[]; \ + ml_val_t CONCAT(name,_v)[2] = { \ + MAKE_DESC(1,DTAG_record), \ + PTR_CtoML(CONCAT(name,_a)) \ + } + +#else /* SIZES_C64_ML32 */ +/* When the size of Addr_t is bigger than the size of an Word_t, we need + * to dynamically patch the static ML objects. + */ + +typedef struct { + ml_val_t desc; + ml_val_t s; + ml_val_t len; +} ml_string_t; + +#define ML_STRING(id,s) \ + PVT char CONCAT(id,_data)[] = s; \ + ml_string_t id = { \ + DESC_string, ML_unit, INT_CtoML(sizeof(s)-1) \ + } + +#define PATCH_ML_STRING(id) \ + id.s = PTR_CtoML(CONCAT(id,_data)) + +/* Exceptions are identified by (string ref) values */ +#define ML_EXNID(ex,name) \ + ML_STRING(CONCAT(ex,_s),name); \ + ml_val_t CONCAT(ex,_id0) [2] = { DESC_ref, } + +#define PATCH_ML_EXNID(ex) \ + PATCH_ML_STRING(CONCAT(ex,_s)); \ + CONCAT(ex,_id0)[1] = PTR_CtoML(&(CONCAT(ex,_s).s)) + +#define ASM_CLOSURE(name) \ + extern ml_val_t CONCAT(name,_a)[]; \ + ml_val_t CONCAT(name,_v)[2] = { \ + MAKE_DESC(1, DTAG_record), \ + } + +#define PATCH_ASM_CLOSURE(name) \ + CONCAT(name,_v)[1] = PTR_CtoML(CONCAT(name,_a)) + +#endif + + +#if (CALLEESAVE > 0) +#define ASM_CONT(name) \ + extern ml_val_t CONCAT(name,_a)[]; \ + ml_val_t *CONCAT(name,_c) = (ml_val_t *)(CONCAT(name,_a)) +#else +#define ASM_CONT(name) \ + ASM_CLOSURE(name); \ + ml_val_t *CONCAT(name,_c) = (ml_val_t *)(CONCAT(name,_v)+1) +#endif + +ASM_CLOSURE(array); +ASM_CLOSURE(bind_cfun); +ASM_CLOSURE(callc); +ASM_CLOSURE(create_b); +ASM_CLOSURE(create_r); +ASM_CLOSURE(create_s); +ASM_CLOSURE(create_v); +ASM_CLOSURE(floor); +ASM_CLOSURE(logb); +ASM_CLOSURE(scalb); +ASM_CLOSURE(try_lock); +ASM_CLOSURE(unlock); +ASM_CLOSURE(handle); + +ASM_CONT(return); +ASM_CONT(sigh_return); +ASM_CONT(pollh_return); + + +/* A ref cell initialized to unit. */ +#define REFCELL(z) ml_val_t z[2] = {DESC_ref, ML_unit} + +REFCELL(_ProfCurrent); +REFCELL(_PervStruct); +REFCELL(_MLSignalHandler); +REFCELL(_MLPollHandler); +REFCELL(_PollEvent0); +REFCELL(_PollFreq0); +REFCELL(_ActiveProcs0); + +ml_val_t RunTimeCompUnit = ML_unit; +#ifdef ASM_MATH +ml_val_t MathVec = ML_unit; +#endif + +/* aggregate structures of length zero */ +const char _ML_string0_data[1] = {0}; +ml_val_t _ML_string0[3] = {DESC_string, PTR_CtoML(_ML_string0_data), INT_CtoML(0)}; +ml_val_t _ML_vector0[3] = {DESC_polyvec, ML_unit, INT_CtoML(0)}; + +ML_EXNID(_Div,"Div"); +ML_EXNID(_Overflow,"Overflow"); +ML_EXNID(SysErr, "SysErr"); + +extern ml_val_t externlist0[]; + +#ifdef ASM_MATH +ML_EXNID(_Ln,"Ln"); +ML_EXNID(_Sqrt,"Sqrt"); +#endif + + +/* A table of pointers to global C variables that are potential roots. */ +ml_val_t *CRoots[MAX_C_ROOTS] = { + &RunTimeCompUnit, + _PervStruct+1, + _MLSignalHandler+1, + _MLPollHandler+1, +#ifdef ASM_MATH + &MathVec, +#else + NIL(ml_val_t *), +#endif + NIL(ml_val_t *), NIL(ml_val_t *) +}; +#ifdef ASM_MATH +int NumCRoots = 5; +#else +int NumCRoots = 4; +#endif + + +/* AllocGlobals: + */ +void AllocGlobals (ml_state_t *msp) +{ + ml_val_t RunVec; + ml_val_t CStruct; + +#ifdef SIZES_C64_ML32 + PatchAddrs (); +#endif + + /* allocate the RunVec */ +#define RUNVEC_SZ 12 + ML_AllocWrite(msp, 0, MAKE_DESC(RUNVEC_SZ, DTAG_record)); + ML_AllocWrite(msp, 1, PTR_CtoML(array_v+1)); + ML_AllocWrite(msp, 2, PTR_CtoML(bind_cfun_v+1)); + ML_AllocWrite(msp, 3, PTR_CtoML(callc_v+1)); + ML_AllocWrite(msp, 4, PTR_CtoML(create_b_v+1)); + ML_AllocWrite(msp, 5, PTR_CtoML(create_r_v+1)); + ML_AllocWrite(msp, 6, PTR_CtoML(create_s_v+1)); + ML_AllocWrite(msp, 7, PTR_CtoML(create_v_v+1)); + ML_AllocWrite(msp, 8, PTR_CtoML(floor_v+1)); + ML_AllocWrite(msp, 9, PTR_CtoML(logb_v+1)); + ML_AllocWrite(msp, 10, PTR_CtoML(scalb_v+1)); + ML_AllocWrite(msp, 11, PTR_CtoML(try_lock_v+1)); + ML_AllocWrite(msp, 12, PTR_CtoML(unlock_v+1)); + RunVec = ML_Alloc(msp, RUNVEC_SZ); + + /* allocate the CStruct */ +#define CSTRUCT_SZ 12 + ML_AllocWrite(msp, 0, MAKE_DESC(CSTRUCT_SZ, DTAG_record)); + ML_AllocWrite(msp, 1, RunVec); + ML_AllocWrite(msp, 2, DivId); /* FIXME: we no longer need this field! */ + ML_AllocWrite(msp, 3, OverflowId); + ML_AllocWrite(msp, 4, SysErrId); + ML_AllocWrite(msp, 5, ProfCurrent); + ML_AllocWrite(msp, 6, PollEvent); + ML_AllocWrite(msp, 7, PollFreq); + ML_AllocWrite(msp, 8, MLPollHandler); + ML_AllocWrite(msp, 9, ActiveProcs); + ML_AllocWrite(msp, 10, PervStruct); + ML_AllocWrite(msp, 11, MLSignalHandler); + ML_AllocWrite(msp, 12, ML_vector0); + CStruct = ML_Alloc(msp, CSTRUCT_SZ); + + /* allocate 1-elem SRECORD just containing the CStruct */ + REC_ALLOC1(msp, RunTimeCompUnit, CStruct); + +#ifdef ASM_MATH +#define MATHVEC_SZ 8 + ML_AllocWrite(msp, 0, MAKE_DESC(MATHVEC_SZ, DTAG_record)); + ML_AllocWrite(msp, 1, LnId); + ML_AllocWrite(msp, 2, SqrtId); + ML_AllocWrite(msp, 3, PTR_CtoML(arctan_v+1)); + ML_AllocWrite(msp, 4, PTR_CtoML(cos_v+1)); + ML_AllocWrite(msp, 5, PTR_CtoML(exp_v+1)); + ML_AllocWrite(msp, 6, PTR_CtoML(ln_v+1)); + ML_AllocWrite(msp, 7, PTR_CtoML(sin_v+1)); + ML_AllocWrite(msp, 8, PTR_CtoML(sqrt_v+1)); + MathVec = ML_Alloc(msp, MATHVEC_SZ); +#endif + +} /* end of AllocGlobals */ + + +/* RecordGlobals: + * + * Record all global symbols that may be referenced from the ML heap. + */ +void RecordGlobals () +{ + /* Misc. */ + RecordCSymbol ("nullptr", PTR_CtoML(0)); + RecordCSymbol ("handle", PTR_CtoML(handle_v+1)); + RecordCSymbol ("return", PTR_CtoML(return_c)); +#if (CALLEESAVE == 0) + RecordCSymbol ("return_a", PTR_CtoML(return_a)); +#endif + + /* RunVec */ + RecordCSymbol ("RunVec.array", PTR_CtoML(array_v+1)); + RecordCSymbol ("RunVec.bind_cfun", PTR_CtoML(bind_cfun_v+1)); + RecordCSymbol ("RunVec.callc", PTR_CtoML(callc_v+1)); + RecordCSymbol ("RunVec.create_b", PTR_CtoML(create_b_v+1)); + RecordCSymbol ("RunVec.create_r", PTR_CtoML(create_r_v+1)); + RecordCSymbol ("RunVec.create_s", PTR_CtoML(create_s_v+1)); + RecordCSymbol ("RunVec.create_v", PTR_CtoML(create_v_v+1)); + RecordCSymbol ("RunVec.floor", PTR_CtoML(floor_v+1)); + RecordCSymbol ("RunVec.logb", PTR_CtoML(logb_v+1)); + RecordCSymbol ("RunVec.scalb", PTR_CtoML(scalb_v+1)); + RecordCSymbol ("RunVec.try_lock", PTR_CtoML(try_lock_v+1)); + RecordCSymbol ("RunVec.unlock", PTR_CtoML(unlock_v+1)); + + /* CStruct */ + RecordCSymbol ("CStruct.DivId", DivId); /* FIXME: we can remove this */ + RecordCSymbol ("CStruct.OverflowId", OverflowId); + RecordCSymbol ("CStruct.SysErrId", SysErrId); + RecordCSymbol ("CStruct.PervStruct", PervStruct); + RecordCSymbol ("CStruct.MLSignalHandler", MLSignalHandler); + RecordCSymbol ("CStruct.vector0", ML_vector0); + RecordCSymbol ("CStruct.profCurrent", ProfCurrent); + RecordCSymbol ("CStruct.MLPollHandler", MLPollHandler); + RecordCSymbol ("CStruct.pollEvent", PollEvent); + RecordCSymbol ("CStruct.pollFreq", PollFreq); + RecordCSymbol ("CStruct.activeProcs", ActiveProcs); + + /* null string */ + RecordCSymbol ("string0", ML_string0); + +#if defined(ASM_MATH) + /* MathVec */ + RecordCSymbol ("MathVec.LnId", LnId); + RecordCSymbol ("MathVec.SqrtId", SqrtId); + RecordCSymbol ("MathVec.arctan", PTR_CtoML(arctan_v+1)); + RecordCSymbol ("MathVec.cos", PTR_CtoML(cos_v+1)); + RecordCSymbol ("MathVec.exp", PTR_CtoML(exp_v+1)); + RecordCSymbol ("MathVec.ln", PTR_CtoML(ln_v+1)); + RecordCSymbol ("MathVec.sin", PTR_CtoML(sin_v+1)); + RecordCSymbol ("MathVec.sqrt", PTR_CtoML(sqrt_v+1)); +#endif + +} /* end of RecordGlobals. */ + +#ifdef SIZES_C64_ML32 + +/* PatchAddrs: + * + * On machines where the size of Addr_t is bigger than the size of an Word_t, + * we need to dynamically patch the static ML objects. + */ +void PatchAddrs () +{ + PATCH_ML_EXNID(_Div); + PATCH_ML_EXNID(_Overflow); + PATCH_ML_EXNID(SysErr); + + PATCH_ASM_CLOSURE(array); + PATCH_ASM_CLOSURE(bind_cfun); + PATCH_ASM_CLOSURE(callc); + PATCH_ASM_CLOSURE(create_b); + PATCH_ASM_CLOSURE(create_r); + PATCH_ASM_CLOSURE(create_s); + PATCH_ASM_CLOSURE(create_v); + PATCH_ASM_CLOSURE(floor); + PATCH_ASM_CLOSURE(logb); + PATCH_ASM_CLOSURE(scalb); + PATCH_ASM_CLOSURE(try_lock); + PATCH_ASM_CLOSURE(unlock); + PATCH_ASM_CLOSURE(handle); + +#if (CALLEESAVE <= 0) + PATCH_ASM_CLOSURE(return); + PATCH_ASM_CLOSURE(sigh_return); +#endif + +} /* end of PatchAddrs */ + +#endif /* SIZES_C64_ML32 */ diff --git a/base/runtime/kernel/load-ml.c b/base/runtime/kernel/load-ml.c new file mode 100644 index 0000000..ab31f61 --- /dev/null +++ b/base/runtime/kernel/load-ml.c @@ -0,0 +1,40 @@ +/* load-ml.c + * + * COPYRIGHT (c) 1993 by AT&T Bell Laboratories. + * + */ + +#include +#include "ml-base.h" +#include "ml-state.h" +#include "gc.h" +#include "heap-io.h" + + +/* LoadML: + * + * Load a heap image from a file and resume execution. The arguments allocSz, + * numGens and cacheGen are possible command-line overrides of the heap parameters + * specified in the image being imported (non-negative values signify override). + */ +void LoadML (const char *loadImage, heap_params_t *heapParams) +{ + ml_state_t *msp; + + msp = ImportHeapImage (loadImage, heapParams); + +#ifdef HEAP_MONITOR + if (HeapMon_Init(msp->ml_heap) == FAILURE) + Die("unable to start heap monitor"); +#endif + + InitFaultHandlers (); + +#ifdef SIZES_C64_ML32 + /* patch the 32-bit addresses */ + PatchAddrs (); +#endif + + RunML (msp); + +} /* end of LoadML */ diff --git a/base/runtime/kernel/main.c b/base/runtime/kernel/main.c new file mode 100644 index 0000000..ca6702e --- /dev/null +++ b/base/runtime/kernel/main.c @@ -0,0 +1,190 @@ +/* main.c + * + * COPYRIGHT (c) 1993 by AT&T Bell Laboratories. + * + * This is the main routine for the interactive version of SML/NJ. + */ + +#include +#include +#include +#include "ml-base.h" +#include "ml-options.h" +#include "ml-limits.h" +#include "ml-globals.h" + +#ifdef COLLECT_STATS +#include +#include +#include +#include "stats-data.h" +#endif + +FILE *DebugF = NULL; + +/* Runtime globals */ +bool_t SilentLoad = TRUE; +bool_t DumpObjectStrings = FALSE; +bool_t GCMessages = FALSE; +bool_t UnlimitedHeap = FALSE; +char **RawArgs; +char **CmdLineArgs; /* does not include the command name (argv[0]) */ +char *MLCmdName; /* the command name used to invoke the runtime */ + +/* local variables */ +PVT bool_t isBoot = FALSE; /* true if we should bootstrap a system */ +PVT char *LoadImage /* the path name of the image file to load */ + = DFLT_IMAGE; +PVT char *BootFrom /* the boot source (bin file list file). */ + = NULL; +# ifdef MP_SUPPORT +PVT int NumProcs = 1; /* not used */ +# endif + +PVT void ParseOptions (int argc, char **argv, heap_params_t **heapParams); + + +int main (int argc, char **argv) +{ + heap_params_t *heapParams; + + DebugF = stderr; + + /* process the command-line options */ + ParseOptions (argc, argv, &heapParams); + + InitTimers (); + RecordGlobals (); + InitCFunList (); + +#ifdef MP_SUPPORT + MP_Init(); +#endif + + /* start ML */ + if (isBoot) { + BootML (BootFrom, heapParams); + } + else { /* load an image */ + LoadML (LoadImage, heapParams); + } + + Exit (0); + +} /* end of main. */ + + +/* ParseOptions: + * + * Parse the command-line options. + */ +PVT void ParseOptions (int argc, char **argv, heap_params_t **heapParams) +{ + char option[MAX_OPT_LEN], *optionArg, **nextArg; + bool_t errFlg = FALSE; + + /* first scan for any heap/GC parameters */ + if ((*heapParams = ParseHeapParams(argv)) == NIL(heap_params_t *)) + errFlg = TRUE; + + RawArgs = argv; + CmdLineArgs = NEW_VEC(char *, argc); + MLCmdName = *argv++; + nextArg = CmdLineArgs; + while (--argc > 0) { + char *arg = *argv++; + +#define MATCH(opt) (strcmp(opt, option) == 0) +#define CHECK(opt) { \ + if (optionArg[0] == '\0') { \ + errFlg = TRUE; \ + Error("missing argument for \"%s\" option\n", opt); \ + continue; \ + } \ + } /* CHECK */ + + if (isRuntimeOption(arg, option, &optionArg)) { + if (MATCH("boot")) { + CHECK("boot"); + isBoot = TRUE; + BootFrom = optionArg; + } + else if (MATCH("load")) { + CHECK("load"); + LoadImage = optionArg; + } + else if (MATCH("cmdname")) { + CHECK("cmdname"); + MLCmdName = optionArg; + } +#ifdef MP_SUPPORT + else if (MATCH("nprocs")) { + CHECK("nprocs"); + NumProcs = atoi(optionArg); + if (NumProcs < 0) + NumProcs = 0; + else if (NumProcs > MAX_NUM_PROCS) + NumProcs = MAX_NUM_PROCS; + } +#endif + else if (MATCH("quiet")) { + SilentLoad = TRUE; + } + else if (MATCH("verbose")) { + SilentLoad = FALSE; + } + else if (MATCH("objects")) { + DumpObjectStrings = TRUE; + } + else if (MATCH("debug")) { + CHECK("debug"); + if ((DebugF = fopen(optionArg, "w")) == NULL) { + DebugF = stderr; /* restore the file pointer */ + errFlg = TRUE; + Error("unable to open debug output file \"%s\"\n", *(argv[-1])); + continue; + } + } +#ifdef COLLECT_STATS + else if (MATCH("stats")) { + CHECK("stats"); + StatsFD = open (optionArg, O_WRONLY|O_TRUNC|O_CREAT, 0666); + if (StatsFD == -1) { + errFlg = TRUE; + Error("unable to open statistics file \"%s\"\n", *(argv[-1])); + continue; + } + } +#endif + } + else { + *nextArg++ = arg; + } + } /* end while */ + + *nextArg = NIL(char *); + + if (errFlg) + Exit (1); + +} /* end of ParseOptions */ + + +/* Exit: + * Exit from the ML system. + */ +void Exit (int code) +{ +#if COUNT_REG_MASKS + DumpMasks(); +#endif +#ifdef COLLECT_STATS + if (StatsFD >= 0) { + STATS_FLUSH_BUF(); + close (StatsFD); + } +#endif + + exit (code); + +} /* end of Exit */ diff --git a/base/runtime/kernel/ml-options.c b/base/runtime/kernel/ml-options.c new file mode 100644 index 0000000..7ebfa02 --- /dev/null +++ b/base/runtime/kernel/ml-options.c @@ -0,0 +1,69 @@ +/* ml-options.c + * + * COPYRIGHT (c) 1996 AT&T Research. + * + * Command-line argument processing utilities. + */ + +#include +#include "ml-base.h" +#include "ml-options.h" + +/* isRuntimeOption: + * + * Check a command line argument to see if it is a possible runtime + * system argument (i.e., has the form "@SMLxxx" or "@SMLxxx=yyy"). + * If the command-line argument is a runtime-system argument, then + * return TRUE, and copy the "xxx" part into option, and set arg to + * point to the start of the "yyy" part. + */ +bool_t isRuntimeOption (char *cmdLineArg, char *option, char **arg) +{ + char *cp = cmdLineArg, c; + + if ((*cp++ == '@') && (*cp++ == 'S') && (*cp++ == 'M') && (*cp++ == 'L')) { + while (((c = *cp++) != '\0') && (c != '=')) + *option++ = c; + *option = '\0'; + *arg = cp; + return TRUE; + } + else + return FALSE; + +} /* end of isRuntimeOption */ + + +/* GetSzOption: + * Get a size specification (accepting K and M suffixes). + */ +int GetSzOption (int scale, char *sz) +{ + char *p; + + /* find first non-digit in the string */ + for (p = sz; isdigit(*p); p++) + continue; + + if (p == sz) + return -1; + else { + switch (*p) { + case '\0': + break; + case 'k': + case 'K': + scale = ONE_K; + break; + case 'm': + case 'M': + scale = ONE_MEG; + break; + default: + return -1; + } /* end of switch */ + return (scale * atoi(sz)); + } + +} /* end of GetSzOption */ + diff --git a/base/runtime/kernel/ml-state.c b/base/runtime/kernel/ml-state.c new file mode 100644 index 0000000..b229d73 --- /dev/null +++ b/base/runtime/kernel/ml-state.c @@ -0,0 +1,208 @@ +/* ml-state.c + * + * COPYRIGHT (c) 1993 by AT&T Bell Laboratories. + * + */ + +#include +#include "ml-base.h" +#include "vproc-state.h" +#include "ml-state.h" +#include "system-signals.h" +#include "tags.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-globals.h" +#include "gc.h" +#include "ml-timer.h" +#include "ml-limits.h" + + +vproc_state_t *VProc[MAX_NUM_PROCS]; +int NumVProcs; + + +/* local routines */ +PVT void InitVProcState (vproc_state_t *vsp); + + +/* AllocMLState: + */ +ml_state_t *AllocMLState (bool_t isBoot, heap_params_t *heapParams) +{ + ml_state_t *msp = NIL(ml_state_t *); +#ifdef MP_SUPPORT + int i; +#endif + +#ifdef MP_SUPPORT + + for (i = 0; i < MAX_NUM_PROCS; i++) { + if (((VProc[i] = NEW_OBJ(vproc_state_t)) == NIL(vproc_state_t *)) + || ((msp = NEW_OBJ(ml_state_t)) == NIL(ml_state_t *))) { + Die ("unable to allocate ML state vectors"); + } + VProc[i]->vp_state = msp; + } + msp = VProc[0]->vp_state; +#else + if (((VProc[0] = NEW_OBJ(vproc_state_t)) == NIL (vproc_state_t *)) + || ((msp = NEW_OBJ(ml_state_t)) == NIL(ml_state_t *))) { + Die ("unable to allocate ML state vector"); + } + VProc[0]->vp_state = msp; +#endif /* MP_SUPPORT */ + + /* allocate and initialize the heap data structures */ + InitHeap (msp, isBoot, heapParams); + +#ifdef MP_SUPPORT + /* partition the allocation arena given by InitHeap among the + * MAX_NUM_PROCS processors. + */ + NumVProcs = MAX_NUM_PROCS; + PartitionAllocArena(VProc); + /* initialize the per-processor ML state */ + for (i = 0; i < MAX_NUM_PROCS; i++) { + int j; + + InitVProcState (VProc[i]); + /* single timers are currently shared among multiple processors */ + if (i != 0) { + VProc[i]->vp_gcTime0 = VProc[0]->vp_gcTime0; + VProc[i]->vp_gcTime = VProc[0]->vp_gcTime; + } + } + /* initialize the first processor here */ + VProc[0]->vp_mpSelf = MP_ProcId (); + VProc[0]->vp_mpState = MP_PROC_RUNNING; +#else + InitVProcState (VProc[0]); + NumVProcs = 1; +#endif /* MP_SUPPORT */ + + /* initialize the timers */ + /** MP_SUPPORT note: for now, only proc 0 has timers **/ + ResetTimers (VProc[0]); + + return msp; + +} /* end of AllocMLState */ + +/* InitVProcState: + */ +PVT void InitVProcState (vproc_state_t *vsp) +{ + int i; + + vsp->vp_heap = vsp->vp_state->ml_heap; + vsp->vp_state->ml_vproc = vsp; + vsp->vp_inMLFlag = FALSE; + vsp->vp_handlerPending = FALSE; + vsp->vp_inSigHandler = FALSE; + vsp->vp_totalSigCount.nReceived = 0; + vsp->vp_totalSigCount.nHandled = 0; + vsp->vp_sigCode = 0; + vsp->vp_sigCount = 0; + vsp->vp_nextPendingSig = MIN_SYSTEM_SIG; + vsp->vp_gcSigState = ML_SIG_IGNORE; + vsp->vp_gcSigThreshold = 1; /* by default, we ignore minor collections */ + vsp->vp_gcTime0 = NEW_OBJ(Time_t); + vsp->vp_gcTime = NEW_OBJ(Time_t); + + for (i = 0; i < SIGMAP_SZ; i++) { + vsp->vp_sigCounts[i].nReceived = 0; + vsp->vp_sigCounts[i].nHandled = 0; + } + + /* initialize the ML state, including the roots */ + InitMLState (vsp->vp_state); + vsp->vp_state->ml_arg = ML_unit; + vsp->vp_state->ml_cont = ML_unit; + vsp->vp_state->ml_closure = ML_unit; + vsp->vp_state->ml_linkReg = ML_unit; + vsp->vp_state->ml_pc = ML_unit; + vsp->vp_state->ml_exnCont = ML_unit; + vsp->vp_state->ml_varReg = ML_unit; + vsp->vp_state->ml_calleeSave[0] = ML_unit; + vsp->vp_state->ml_calleeSave[1] = ML_unit; + vsp->vp_state->ml_calleeSave[2] = ML_unit; + +#ifdef MP_SUPPORT + vsp->vp_mpSelf = 0; + vsp->vp_mpState = MP_PROC_NO_PROC; +#endif + +} /* end of InitVProcState */ + +/* InitMLState: + * + * Initialize the ML State vector. Note that we do not initialize the root + * registers here, since this is sometimes called when the roots are live (from + * ML_ApplyFn). + */ +void InitMLState (ml_state_t *msp) +{ + msp->ml_storePtr = ML_unit; +#ifdef SOFT_POLL + msp->ml_pollPending = FALSE; + msp->ml_inPollHandler = FALSE; +#endif + +} /* end of InitMLState. */ + +/* SaveCState: + * + * Build a return closure that will save a collection of ML values + * being used by C. The ML values are passed by reference, with NIL + * as termination. + */ +void SaveCState (ml_state_t *msp, ...) +{ + va_list ap; + int n, i; + ml_val_t *vp; + + /* count the number of values to be saved */ + va_start (ap, msp); + for (n = 0; (vp = va_arg(ap, ml_val_t *)) != NIL(ml_val_t *); n++) + continue; + va_end (ap); + + va_start (ap, msp); + /* NOTE: we use a DTAG_arr_data to ensure that if n == 2, we don't lose our + * header in a GC before RestoreCState is called. + */ + ML_AllocWrite (msp, 0, MAKE_DESC(n, DTAG_arr_data)); + for (i = 1; i <= n; i++) { + vp = va_arg (ap, ml_val_t *); + ML_AllocWrite (msp, i, *vp); + } + msp->ml_calleeSave[0] = ML_Alloc(msp, n); + msp->ml_cont = PTR_CtoML(return_c); + va_end (ap); + +} /* end of SaveCState */ + +/* RestoreCState: + * + * Restore a collection of ML values from the return closure. + */ +void RestoreCState (ml_state_t *msp, ...) +{ + va_list ap; + int n, i; + ml_val_t *vp; + ml_val_t savedState; + + va_start (ap, msp); + savedState = msp->ml_calleeSave[0]; + n = OBJ_LEN(savedState); + for (i = 0; i < n; i++) { + vp = va_arg (ap, ml_val_t *); + *vp = REC_SEL(savedState, i); + } + va_end (ap); + +} /* end of RestoreCState */ + diff --git a/base/runtime/kernel/qualify-name.c b/base/runtime/kernel/qualify-name.c new file mode 100644 index 0000000..292546c --- /dev/null +++ b/base/runtime/kernel/qualify-name.c @@ -0,0 +1,33 @@ +/* qualify-name.c + * + * COPYRIGHT (c) 1996 AT&T Research. + */ + +#include +#include "ml-base.h" +#include "machine-id.h" + +#define SUFFIX MACHINE_ID "-" OPSYS_ID + +/* QualifyImageName: + * + * Given a pathname for an image file, this adds the architecture extension + * to the pathname (if it doesn't already have it). It returns TRUE, if the + * extension was added. + */ +bool_t QualifyImageName (char *buf) +{ + int len = strlen(buf); + int midLen = sizeof(SUFFIX); /* length of ID + 1 */ + + if ((midLen+1 < len) && (buf[len-midLen] == '.') + && (strcmp(&(buf[len-(midLen-1)]), SUFFIX) == 0)) + /* the pathname is already qualified by the machine ID and OPSYS */ + return FALSE; + + strcat (buf, "." SUFFIX); + + return TRUE; + +} /* end of QualifyImageName */ + diff --git a/base/runtime/kernel/run-ml.c b/base/runtime/kernel/run-ml.c new file mode 100644 index 0000000..ea781cf --- /dev/null +++ b/base/runtime/kernel/run-ml.c @@ -0,0 +1,318 @@ +/// \file run-ml.c +/// +/// \copyright 2021 The Fellowship of SML/NJ (http://www.smlnj.org) +/// All rights reserved. +/// +/// \brief The main dispatch function for running SML code and for +/// servicing requests for runtime-system services +/// +/// \author John Reppy +/// + +#include +#include +#include + +#include "ml-base.h" +#include "ml-limits.h" +#include "ml-values.h" +#include "vproc-state.h" +#include "ml-state.h" +#include "tags.h" +#include "ml-request.h" +#include "ml-objects.h" +#include "ml-globals.h" +#include "ml-signals.h" +#include "c-library.h" +#include "profile.h" +#include "gc.h" + +/* local functions */ +PVT void UncaughtExn (ml_val_t e); + + +/* ApplyMLFn: + * + * Apply the ML closure f to arg and return the result. If the flag useCont + * is set, then the ML state has already been initialized with a return + * continuation (by SaveCState). + */ +ml_val_t ApplyMLFn (ml_state_t *msp, ml_val_t f, ml_val_t arg, bool_t useCont) +{ + InitMLState (msp); + + /* initialize the calling context */ + msp->ml_exnCont = PTR_CtoML(handle_v+1); + msp->ml_varReg = ML_unit; + msp->ml_arg = arg; + if (! useCont) + msp->ml_cont = PTR_CtoML(return_c); + msp->ml_closure = f; + msp->ml_pc = + msp->ml_linkReg = GET_CODE_ADDR(f); + + RunML (msp); + + return msp->ml_arg; + +} /* end of ApplyMLFn */ + + +/* RaiseMLExn: + * + * Modify the ML state, so that the given exception will be raised + * when ML is resumed. + */ +void RaiseMLExn (ml_state_t *msp, ml_val_t exn) +{ + ml_val_t kont = msp->ml_exnCont; + +/** NOTE: we should have a macro defined in ml-state.h for this **/ + msp->ml_arg = exn; + msp->ml_closure = kont; + msp->ml_cont = ML_unit; + msp->ml_pc = + msp->ml_linkReg = GET_CODE_ADDR(kont); + +} /* end of RaiseMLExn. */ + +extern int restoreregs (ml_state_t *msp); + +/* RunML: + */ +#if defined(__CYGWIN32__) +void SystemRunML (ml_state_t *msp) +#else +void RunML (ml_state_t *msp) +#endif +{ + int request; + vproc_state_t *vsp = msp->ml_vproc; + ml_val_t prevProfIndex = PROF_OTHER; + + for (;;) { + + ASSIGN(ProfCurrent, prevProfIndex); + request = restoreregs(msp); + prevProfIndex = DEREF(ProfCurrent); + ASSIGN(ProfCurrent, PROF_RUNTIME); + + if (request == REQ_GC) { + if (vsp->vp_handlerPending) { /* this is really a signal */ +#ifdef SIGNAL_DEBUG + SayDebug("RunML: handler pending\n"); +#endif + /* check for GC */ + if (NeedGC (msp, ONE_K*WORD_SZB)) { + InvokeGC (msp, 0); + } + /* invoke the ML signal handler */ + ChooseSignal (vsp); + msp->ml_arg = MakeHandlerArg (msp, sigh_resume); + msp->ml_cont = PTR_CtoML(sigh_return_c); + msp->ml_exnCont = PTR_CtoML(handle_v+1); + msp->ml_closure = DEREF(MLSignalHandler); + msp->ml_pc = + msp->ml_linkReg = GET_CODE_ADDR(msp->ml_closure); + vsp->vp_inSigHandler = TRUE; + vsp->vp_handlerPending = FALSE; + } +#ifdef SOFT_POLL + else if (msp->ml_pollPending && !msp->ml_inPollHandler) { + /* this is a poll event */ +#if defined(MP_SUPPORT) && defined(MP_GCPOLL) + /* Note: under MP, polling is used for GC only */ +#ifdef POLL_DEBUG +SayDebug ("run-ml: poll event\n"); +#endif + msp->ml_pollPending = FALSE; + InvokeGC (msp,0); +#else + /* check for GC */ + if (NeedGC (msp, ONE_K*WORD_SZB)) + InvokeGC (msp, 0); + msp->ml_arg = MakeResumeCont(msp, pollh_resume); + msp->ml_cont = PTR_CtoML(pollh_return_c); + msp->ml_exnCont = PTR_CtoML(handle_v+1); + msp->ml_closure = DEREF(MLPollHandler); + msp->ml_pc = + msp->ml_linkReg = GET_CODE_ADDR(msp->ml_closure); + msp->ml_inPollHandler = TRUE; + msp->ml_pollPending = FALSE; +#endif /* MP_SUPPORT */ + } +#endif /* SOFT_POLL */ + else { + InvokeGC (msp, 0); + } + } + else { + switch (request) { + case REQ_RETURN: + /* do a minor collection to clear the store list */ + InvokeGC (msp, 0); + return; + + case REQ_EXN: /* an UncaughtExn exception */ + UncaughtExn (msp->ml_arg); + return; + + case REQ_FAULT: { /* a hardware fault */ + ml_val_t loc, traceStk, exn; + char *namestring; + if ((namestring = (char *)BO_AddrToCodeObjTag(msp->ml_faultPC)) != NIL(char *)) + { + char buf2[192]; + sprintf(buf2, "", namestring); + loc = ML_CString(msp, buf2); + } + else { + loc = ML_CString(msp, ""); + } + LIST_cons(msp, traceStk, loc, LIST_nil); + EXN_ALLOC(msp, exn, msp->ml_faultExn, ML_unit, traceStk); + RaiseMLExn (msp, exn); + } break; + + case REQ_BIND_CFUN: + msp->ml_arg = BindCFun ( + STR_MLtoC(REC_SEL(msp->ml_arg, 0)), + STR_MLtoC(REC_SEL(msp->ml_arg, 1))); + SETUP_RETURN(msp); + break; + + case REQ_CALLC: { + ml_val_t (*f)(), arg; + + SETUP_RETURN(msp); + if (NeedGC (msp, 8*ONE_K)) + InvokeGC (msp, 0); + +#ifdef INDIRECT_CFUNC + f = ((cfunc_binding_t *)REC_SELPTR(Word_t, msp->ml_arg, 0))->cfunc; +# ifdef DEBUG_TRACE_CCALL + SayDebug("CALLC: %s (%#x)\n", + ((cfunc_binding_t *)REC_SELPTR(Word_t, msp->ml_arg, 0))->name, + REC_SEL(msp->ml_arg, 1)); +# endif +#else + f = (cfunc_t) REC_SELPTR(Word_t, msp->ml_arg, 0); +# ifdef DEBUG_TRACE_CCALL + SayDebug("CALLC: %#x (%#x)\n", f, REC_SEL(msp->ml_arg, 1)); +# endif +#endif + arg = REC_SEL(msp->ml_arg, 1); + msp->ml_arg = (*f)(msp, arg); + } break; + + case REQ_ALLOC_STRING: + msp->ml_arg = ML_AllocString (msp, INT_MLtoC(msp->ml_arg)); + SETUP_RETURN(msp); + break; + + case REQ_ALLOC_BYTEARRAY: + msp->ml_arg = ML_AllocBytearray (msp, INT_MLtoC(msp->ml_arg)); + SETUP_RETURN(msp); + break; + + case REQ_ALLOC_REALDARRAY: + msp->ml_arg = ML_AllocRealdarray (msp, INT_MLtoC(msp->ml_arg)); + SETUP_RETURN(msp); + break; + + case REQ_ALLOC_ARRAY: + msp->ml_arg = ML_AllocArray (msp, + REC_SELINT(msp->ml_arg, 0), REC_SEL(msp->ml_arg, 1)); + SETUP_RETURN(msp); + break; + + case REQ_ALLOC_VECTOR: + msp->ml_arg = ML_AllocVector (msp, + REC_SELINT(msp->ml_arg, 0), REC_SEL(msp->ml_arg, 1)); + SETUP_RETURN(msp); + break; + + case REQ_SIG_RETURN: +#ifdef SIGNAL_DEBUG +SayDebug("REQ_SIG_RETURN: arg = %#x, pending = %d, inHandler = %d, nSigs = %d/%d\n", +msp->ml_arg, vsp->vp_handlerPending, vsp->vp_inSigHandler, +vsp->vp_totalSigCount.nHandled, vsp->vp_totalSigCount.nReceived); +#endif + /* throw to the continuation */ + SETUP_THROW(msp, msp->ml_arg, ML_unit); + /* note that we are exiting the handler */ + vsp->vp_inSigHandler = FALSE; + break; + +#ifdef SOFT_POLL + case REQ_POLL_RETURN: + /* throw to the continuation */ + SETUP_THROW(msp, msp->ml_arg, ML_unit); + /* note that we are exiting the handler */ + msp->ml_inPollHandler = FALSE; + ResetPollLimit (msp); + break; +#endif + +#ifdef SOFT_POLL + case REQ_POLL_RESUME: +#endif + case REQ_SIG_RESUME: +#ifdef SIGNAL_DEBUG +SayDebug("REQ_SIG_RESUME: arg = %#x\n", msp->ml_arg); +#endif + LoadResumeState (msp); + break; + + case REQ_BUILD_LITERALS: + Die ("BUILD_LITERALS request"); + break; + + default: + Die ("unknown request code = %d", request); + break; + } /* end switch */ + } + } /* end of while */ + +} /* end of RunML */ + + +/* UncaughtExn: + * Handle an uncaught exception. + */ +PVT void UncaughtExn (ml_val_t e) +{ + ml_val_t name = REC_SEL(REC_SEL(e, 0), 0); + ml_val_t val = REC_SEL(e, 1); + ml_val_t traceBack = REC_SEL(e, 2); + char buf[1024]; + + if (isUNBOXED(val)) + sprintf (buf, "%ld\n", (long int) INT_MLtoC(val)); + else { + ml_val_t desc = OBJ_DESC(val); + if (desc == DESC_string) + sprintf (buf, "\"%.*s\"", (int) GET_SEQ_LEN(val), STR_MLtoC(val)); + else + sprintf (buf, ""); + } + + if (traceBack != LIST_nil) { + /* find the information about where this exception was raised */ + ml_val_t next = traceBack; + do { + traceBack = next; + next = LIST_tl(traceBack); + } while (next != LIST_nil); + val = LIST_hd(traceBack); + sprintf (buf+strlen(buf), " raised at %.*s", + (int) GET_SEQ_LEN(val), STR_MLtoC(val)); + } + + Die ("Uncaught exception %.*s with %s\n", + GET_SEQ_LEN(name), GET_SEQ_DATAPTR(char, name), buf); + + Exit (1); + +} /* end of UncaughtExn */ diff --git a/base/runtime/kernel/standalone.c b/base/runtime/kernel/standalone.c new file mode 100644 index 0000000..65f653b --- /dev/null +++ b/base/runtime/kernel/standalone.c @@ -0,0 +1,17 @@ +/* standalone.c + * + * COPYRIGHT (c) 1993 by AT&T Bell Laboratories. + * + * This is the main routine for linking with stand-alone applications. + */ + +#include "ml-base.h" +#include "ml-limits.h" + +main (argc, **argv) + int arg; + char **argv; +{ + +} /* end of main. */ + diff --git a/base/runtime/kernel/swap-bytes.c b/base/runtime/kernel/swap-bytes.c new file mode 100644 index 0000000..d3f14c3 --- /dev/null +++ b/base/runtime/kernel/swap-bytes.c @@ -0,0 +1,28 @@ +/*! \file swap-bytes.c + * + * \author John Reppy + */ + +/* + * COPYRIGHT (c) 2016 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-base.h" + +#ifdef BYTE_ORDER_LITTLE + +/* SwapBytes32: + */ +Unsigned32_t SwapBytes32 (Unsigned32_t x) +{ + unsigned int b0 = x & 0x000000FF; + unsigned int b1 = x & 0x0000FF00; + unsigned int b2 = x & 0x00FF0000; + unsigned int b3 = x & 0xFF000000; + + return ((b0 << 24) | (b1 << 8) | (b2 >> 8) | (b3 >> 24)); + +} /* end of SwapBytes */ + +#endif /* BYTE_ORDER_LITTLE */ diff --git a/base/runtime/kernel/timers.c b/base/runtime/kernel/timers.c new file mode 100644 index 0000000..ee3ae40 --- /dev/null +++ b/base/runtime/kernel/timers.c @@ -0,0 +1,77 @@ +/* timers.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + * + * OS independent timer routines; these rely on a OS dependent implementation + * of the following function: + * + * void GetCPUTime (Time_t *user_t, Time_t *sys_t); + */ + +#include "ml-base.h" +#include "vproc-state.h" +#include "ml-timer.h" + + +/* ResetTimers: + * + * Clear the GC timers. + */ +void ResetTimers (vproc_state_t *vsp) +{ + vsp->vp_gcTime->seconds = 0; + vsp->vp_gcTime->uSeconds = 0; + +} /* end of ResetTimers. */ + + +/* StartGCTimer: + */ +void StartGCTimer (vproc_state_t *vsp) +{ + GetCPUTime (vsp->vp_gcTime0, NIL(Time_t *)); + +} /* end of StartGCTimer */ + + +/* StopGCTimer: + * + * Stop the garbage collection timer and update the cumulative garbage collection + * time. If time is not NIL, then return the time (in ms.) spent since + * the start of the GC. + */ +void StopGCTimer (vproc_state_t *vsp, long *time) +{ + int sec, usec; + Time_t t1; + Time_t *gt0 = vsp->vp_gcTime0; + Time_t *gt = vsp->vp_gcTime; + + GetCPUTime (&t1, NIL(Time_t *)); + + sec = t1.seconds - gt0->seconds; + usec = t1.uSeconds - gt0->uSeconds; + + if (time != NIL(long *)) { + if (usec < 0) { + sec--; usec += 1000000; + } + else if (usec > 1000000) { + sec++; usec -= 1000000; + } + *time = (usec/1000 + sec*1000); + } + + sec = gt->seconds + sec; + usec = gt->uSeconds + usec; + if (usec < 0) { + sec--; usec += 1000000; + } + else if (usec > 1000000) { + sec++; usec -= 1000000; + } + gt->seconds = sec; + gt->uSeconds = usec; + +} /* end of StopGCTimer */ + diff --git a/base/runtime/kernel/unix-timers.c b/base/runtime/kernel/unix-timers.c new file mode 100644 index 0000000..7905673 --- /dev/null +++ b/base/runtime/kernel/unix-timers.c @@ -0,0 +1,97 @@ +/* unix-timers.c + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + * + * A UNIX specific interface to the system timers. + */ + +#include "ml-unixdep.h" +#include "ml-base.h" +#include "ml-state.h" +#include "ml-timer.h" + +#ifdef HAS_GETRUSAGE + +#include +#include +typedef struct rusage time_struct_t; +#define GET_TIME(t) getrusage(RUSAGE_SELF, &(t)) +#define SYS_TIME(t) ((t).ru_stime) +#define USR_TIME(t) ((t).ru_utime) +#define SET_TIME(tp, t) do { tp->seconds = t.tv_sec; tp->uSeconds = t.tv_usec; } while(0) + +#else /* !HAS_GETRUSAGE */ + +#include +PVT long ClksPerSec = 0; +typedef struct tms time_struct_t; +#define GET_TIME(t) times(&(t)) +#define SYS_TIME(t) ((t).tms_stime) +#define USR_TIME(t) ((t).tms_utime) +#define SET_TIME(tp, t) do { \ + Time_t *__tp = (tp); \ + clock_t __t = (t); \ + __tp->uSeconds = ((__t % ClksPerSec) * 1000000) / ClksPerSec; \ + __tp->seconds = (__t / ClksPerSec); \ + } while(0) + +#endif /* HAS_GETRUSAGE */ + + + +/* Several versions of Unix seem to allow time values to decrease on successive + * calls to getrusage. To avoid problems in the SML code, which assumes that + * time is monotonically increasing, we latch the time values. + * + * NOTE: this should probably be move to the VProc structure to avoid problems + * on MP machines. + */ +PVT Time_t lastU, lastS; + + +/* InitTimers: + * + * Do any system specific timer initialization. + */ +void InitTimers () +{ + lastU.seconds = lastU.uSeconds = 0; + lastS.seconds = lastS.uSeconds = 0; + +#ifndef HAS_GETRUSAGE + if (ClksPerSec == 0) + ClksPerSec = sysconf(_SC_CLK_TCK); +#endif + +} /* end of InitTimers */ + + +/* GetCPUTime: + * + * Get the user and/or system cpu times in a system independent way. + */ +void GetCPUTime (Time_t *usrT, Time_t *sysT) +{ + time_struct_t ts; + + GET_TIME(ts); + + if (usrT != NIL(Time_t *)) { + SET_TIME(usrT, USR_TIME(ts)); + if (usrT->seconds < lastU.seconds) + usrT->seconds = lastU.seconds; + if ((usrT->seconds == lastU.seconds) && (usrT->uSeconds < lastU.uSeconds)) + usrT->uSeconds = lastU.uSeconds; + lastU = *usrT; + } + + if (sysT != NIL(Time_t *)) { + SET_TIME(sysT, SYS_TIME(ts)); + if (sysT->seconds < lastS.seconds) + sysT->seconds = lastS.seconds; + if ((sysT->seconds == lastS.seconds) && (sysT->uSeconds < lastS.uSeconds)) + sysT->uSeconds = lastS.uSeconds; + lastS = *sysT; + } + +} /* end of GetCPUTime. */ diff --git a/base/runtime/mach-dep/AMD64.prim.asm b/base/runtime/mach-dep/AMD64.prim.asm new file mode 100644 index 0000000..a8c4ffa --- /dev/null +++ b/base/runtime/mach-dep/AMD64.prim.asm @@ -0,0 +1,673 @@ +/*! \file AMD64.prim.asm + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-base.h" +#include "x86-syntax.h" +#include "ml-values.h" +#include "tags.h" +#include "ml-request.h" +#include "mlstate-offsets.h" /** this file is generated **/ +#include "ml-limits.h" + +#if defined(OPSYS_LINUX) && defined(__ELF__) +/* needed to disable the execution bit on the stack pages */ +.section .note.GNU-stack,"",%progbits +#endif + +/* + * AMD64 function call conventions (System V ABI): + * + * Caller save registers: rax, rcx, rdx, rsi, rdi, r8-r11 + * Callee save registers: rbx, rbp, r12-15. + * Save frame pointer (rbp) first to match standard function prelude + * Floating point state is caller-save. + * The first six integer arguments are passed in registers: rdi, rsi, + * rdx, rcx, r8, and r9. Additional arguments are passed on the + * stack (rightmost argument pushed first). + * Word-sized result returned in %rax. + * The stack frame must be multiple of 16 bytes + */ + +/* Registers (see compiler/CodeGen/amd64/amd64CpsRegs.sml): */ +#define temp RAX +#define misc0 RBX /* callee save */ +#define misc1 RCX /* callee save */ +#define misc2 RDX /* callee save */ +#define misc3 R10 +#define misc4 R11 +#define misc5 R12 +#define misc6 R13 +#define stdcont RSI +#define stdarg RBP +#define stdlink R8 +#define stdclos R9 +#define allocptr RDI +#define limitptr R14 +#define storeptr R15 +#define stackptr RSP + +/* other reg uses */ +#define creturn RAX + +/* Stack frame offsets are w.r.t. the stack pointer. See + * + * https://smlnj-gforge.cs.uchicago.edu/svn/smlnj/dev-notes/amd64-stack-frame.numbers + * + * for details. + */ +#define negateSignBit REGOFF(8264,RSP) +#define signBit REGOFF(8256,RSP) +#define overflowExn REGOFF(8248,RSP) +#define start_gc REGOFF(8240,RSP) /* holds address of saveregs */ +#define varptr REGOFF(8232,RSP) +#define exncont REGOFF(8224,RSP) +#define baseptr REGOFF(8216,RSP) /* start address of module */ +#define tempmem0 REGOFF(8192,RSP) +#define pc REGOFF(8208,RSP) /* gcLink */ +#define mlStatePtr REGOFF(8200,RSP) + +/* space reserved for spilling registers */ +#define ML_SPILL_SIZE 8192 + +/* size of stack-frame region where ML stuff is stored. */ +#define ML_AREA_SIZE 80 + +/* the amount to bump up the frame after the callee save registers have been + * pushed onto the stack. + */ +#define ML_FRAME_SIZE (ML_SPILL_SIZE+ML_AREA_SIZE) + +/* we put the request code in tempmem before jumping to set_request */ +#define request_w tempmem0 + +/* NOTE: this include must come after the definition of stdlink, etc. */ +#include "x86-macros.h" + +/* word-size related immediate operands */ +#define WORD_SZB_IM IM(8) +#define WORD_SHFT_IM IM(3) + +/**********************************************************************/ + TEXT + +/* sigh_return: + */ +ALIGNED_ENTRY(sigh_return_a) + MOV (IM(ML_unit),stdlink) + MOV (IM(ML_unit),stdclos) + MOV (IM(ML_unit),pc) + MOV (IM(REQ_SIG_RETURN), request_w) + JMP (CSYM(set_request)) + +/* sigh_resume: + * Resume execution at the point at which a handler trap occurred. This is a + * standard two-argument function, thus the closure is in ml_cont. + */ +ALIGNED_ENTRY(sigh_resume) + MOV (IM(REQ_SIG_RESUME), request_w) + JMP (CSYM(set_request)) + +/* pollh_return_a: + * The return continuation for the ML poll handler. + */ +ALIGNED_ENTRY(pollh_return_a) + MOV (IM(REQ_POLL_RETURN), request_w) + MOV (IM(ML_unit),stdlink) + MOV (IM(ML_unit),stdclos) + MOV (IM(ML_unit),pc) + JMP (CSYM(set_request)) + +/* pollh_resume: + * Resume execution at the point at which a poll event occurred. + */ +ALIGNED_ENTRY(pollh_resume) + MOV (IM(REQ_POLL_RESUME), request_w) + JMP (CSYM(set_request)) + +/* handle: + */ +ALIGNED_ENTRY(handle_a) + MOV (IM(REQ_EXN), request_w) + MOVE (stdlink,temp,pc) + JMP (CSYM(set_request)) + +/* return: + */ +ALIGNED_ENTRY(return_a) + MOV (IM(REQ_RETURN), request_w) + MOV (IM(ML_unit),stdlink) + MOV (IM(ML_unit),stdclos) + MOV (IM(ML_unit),pc) + JMP (CSYM(set_request)) + +/* Request a fault. */ +ALIGNED_ENTRY(request_fault) + MOV (IM(REQ_FAULT), request_w) + MOVE (stdlink,temp,pc) + JMP (CSYM(set_request)) + +/* bind_cfun : (string * string) -> c_function + */ +ALIGNED_ENTRY(bind_cfun_a) + CHECKLIMIT + MOV (IM(REQ_BIND_CFUN), request_w) + JMP (CSYM(set_request)) + +/* build_literals: + */ +ALIGNED_ENTRY(build_literals_a) + CHECKLIMIT + MOV (IM(REQ_BUILD_LITERALS), request_w) + JMP (CSYM(set_request)) + +/* callc: + */ +ALIGNED_ENTRY(callc_a) + CHECKLIMIT + MOV (IM(REQ_CALLC), request_w) + JMP (CSYM(set_request)) + +/* saveregs: + * Entry point for GC. Control is transfered using a `call` instruction, + * so the return address is on the top of the stack. + */ +ALIGNED_ENTRY(saveregs) + POP (pc) + MOV (IM(REQ_GC), request_w) + /* fall into set_request */ + +/* set_request: + * common code to switch execution from SML to runtime system. The request + * code will be in `tempmem` (on the stack). + */ +ENTRY(set_request) + /* temp holds mlStatePtr, valid request in request_w */ + /* Save registers */ + MOV (mlStatePtr, temp) + MOV (allocptr, REGOFF(AllocPtrOffMSP,temp)) + MOV (stdarg, REGOFF(StdArgOffMSP,temp)) + MOV (stdcont, REGOFF(StdContOffMSP,temp)) + +#define temp2 allocptr + /* note that we have left ML code */ + MOV (REGOFF(VProcOffMSP,temp), temp2) + MOV (IM(0), REGOFF(InMLOffVSP, temp2)) + + /* Save stack-allocated CPS registers before the stack frame is popped. */ + MOVE (exncont, temp2, REGOFF(ExnPtrOffMSP, temp)) + MOVE (varptr, temp2, REGOFF(VarPtrOffMSP, temp)) + MOVE (pc, temp2, REGOFF(PCOffMSP, temp)) +#undef temp2 + + /* Save remaining registers */ + MOV (limitptr, REGOFF(LimitPtrOffMSP, temp)) + MOV (storeptr, REGOFF(StorePtrOffMSP, temp)) + MOV (stdclos, REGOFF(StdClosOffMSP, temp)) + MOV (stdlink, REGOFF(LinkRegOffMSP, temp)) + MOV (misc0, REGOFF(Misc0OffMSP, temp)) + MOV (misc1, REGOFF(Misc1OffMSP, temp)) + MOV (misc2, REGOFF(Misc2OffMSP, temp)) + + /* return val of function is request code */ + MOV(request_w,creturn) + + /* Pop the stack frame and return to run_ml(). */ + ADD (IM(ML_FRAME_SIZE), RSP) + + /* restore C callee-save registers */ + POP (R15) + POP (R14) + POP (R13) + POP (R12) + POP (RBX) + POP (RBP) + RET + +/**********************************************************************/ + +/* restoreregs (ml_state_t *msp): + * + * Switch from C to SML. + */ +#ifdef OPSYS_WIN32 +/* on Windows, `restoreregs` is a C wrapper around `asm_restoreregs` that + * handles traps (see `runtime/mach-dep/win32-fault.c`) + */ +ALIGNED_ENTRY(asm_restoreregs) +#else +ALIGNED_ENTRY(restoreregs) +#endif + /* save C callee-save registers */ + PUSH (RBP) + PUSH (RBX) + PUSH (R12) + PUSH (R13) + PUSH (R14) + PUSH (R15) + /* allocate the rest of the stack frame */ + SUB (IM(ML_FRAME_SIZE), RSP) + + /* move the argument (MLState ptr) to the temp register */ + MOV (RDI, temp) + +#define temp2 RBX + /* Initialize the ML stack frame. */ + MOVE (REGOFF(ExnPtrOffMSP, temp), temp2, exncont) + MOVE (REGOFF(VarPtrOffMSP, temp), temp2, varptr) + MOVE (REGOFF(PCOffMSP, temp), temp2, pc) + LEA (CODEADDR(CSYM(saveregs)), temp2) + MOV (temp2, start_gc) + MOV (temp, mlStatePtr) + /* Store address of "Overflow" exception in stack */ +#if defined(OPSYS_DARWIN) + MOV (CSYM(_Overflow_id0)@GOTPCREL(%rip), temp2) + ADD (IM(8), temp2) + MOV (temp2, overflowExn) +#elif defined(OPSYS_LINUX) + LEA (CODEADDR(8+CSYM(_Overflow_id0)), temp2) + MOV (temp2, overflowExn) +#else + /* for now we do nothing, since we do not have LLVM support for this system */ +#endif + /* Store bitmasks to support floating-point "neg" and "abs" in stack */ + MOV ($0x8000000000000000, temp2) + MOV (temp2, signBit) + MOV ($0x7fffffffffffffff, temp2) + MOV (temp2, negateSignBit) +#undef temp2 + + /* Load ML registers. */ + MOV (REGOFF(AllocPtrOffMSP, temp), allocptr) + MOV (REGOFF(LimitPtrOffMSP, temp), limitptr) + MOV (REGOFF(StorePtrOffMSP, temp), storeptr) + MOV (REGOFF(LinkRegOffMSP, temp), stdlink) + MOV (REGOFF(StdClosOffMSP, temp), stdclos) + MOV (REGOFF(StdContOffMSP, temp), stdcont) + MOV (REGOFF(StdArgOffMSP, temp), stdarg) + MOV (REGOFF(Misc0OffMSP, temp), misc0) + MOV (REGOFF(Misc1OffMSP, temp), misc1) + MOV (REGOFF(Misc2OffMSP, temp), misc2) + + PUSH (misc2) /* free up a register */ + PUSH (temp) /* save msp temporarily */ + +#define tmpreg misc2 + + /* note that we are entering ML */ + MOV (REGOFF(VProcOffMSP,temp), temp) /* temp is now vsp */ +#define vsp temp + MOV (IM(1),REGOFF(InMLOffVSP,vsp)) + + /* handle signals */ + MOV (REGOFF(SigsRecvOffVSP,vsp),RDX) + CMP (REGOFF(SigsHandledOffVSP,vsp),RDX) + +#undef tmpreg + JNE (pending) + +restore_and_jmp_ml: + POP (temp) /* restore temp to msp */ + POP (misc2) + +jmp_ml: + CMP (limitptr, allocptr) + JMP (CODEPTR(REGOFF(PCOffMSP,temp))) /* Jump to ML code. */ + + +/* QUESTION: are these fields 32-bits? */ +pending: + /* Currently handling signal? */ + CMP (IM(0), REGOFF(InSigHandlerOffVSP,vsp)) + JNE (restore_and_jmp_ml) + /* handler trap is now pending */ + MOV (IM(1),HandlerPendingOffVSP(vsp)) + + /* must restore here because limitptr is on stack */ /* XXX */ + POP (temp) /* restore temp to msp */ + POP (misc2) + + MOV (allocptr,limitptr) + JMP (jmp_ml) /* Jump to ML code. */ +#undef vsp + +/* ---------------------------------------------------------------------- + * array : (int * 'a) -> 'a array + * Allocate and initialize a new array. This can cause GC. + */ +ALIGNED_ENTRY(array_a) + CHECKLIMIT + MOV (REGIND(stdarg),temp) /* temp := length in words */ + SAR (IM(1),temp) /* temp := length untagged */ + CMP (IM(SMALL_OBJ_SZW),temp) /* small object? */ + JGE (L_array_large) + /* use misc5 and misc6 as temporary registers */ +#define temp1 misc5 +#define temp2 misc6 + /* build data object descriptor in temp1 */ + MOV (temp,temp1) + SAL (IM(TAG_SHIFTW),temp1) + OR (IM(MAKE_TAG(DTAG_arr_data)),temp1) + /* store descriptor and bump allocation pointer */ + MOV (temp1,REGIND(allocptr)) + ADD (WORD_SZB_IM,allocptr) + /* allocate and initialize data object */ + MOV (allocptr,temp1) /* temp1 := array data ptr */ + MOV (REGOFF(8,stdarg),temp2) /* temp2 := initial value */ +LABEL(L_array_lp) + MOV (temp2,REGIND(allocptr)) /* init array */ + ADD (WORD_SZB_IM,allocptr) + SUB (IM(1),temp) + JNE (L_array_lp) + /* Allocate array header */ + MOV (IM(DESC_polyarr),REGIND(allocptr)) /* descriptor */ + ADD (WORD_SZB_IM,allocptr) + MOV (REGIND(stdarg),temp) /* temp := length */ + MOV (allocptr, stdarg) /* result := header addr */ + MOV (temp1, REGIND(allocptr)) /* store pointer to data */ + MOV (temp, REGOFF(8,allocptr)) /* store length */ + ADD (IM(16),allocptr) + CONTINUE +#undef temp1 +#undef temp2 + + /* large arrays are allocated in the runtime system */ +LABEL(L_array_large) + MOV (stdlink,pc) + MOV (IM(REQ_ALLOC_ARRAY),request_w) + JMP (CSYM(set_request)) + + +/* create_r : int -> realarray */ +ALIGNED_ENTRY(create_r_a) + CHECKLIMIT + MOV (stdarg,temp) /* temp := length */ + SAR (IM(1),temp) /* temp := untagged length in words */ + CMP (IM(SMALL_OBJ_SZW),temp) + JGE (L_create_r_large) + +#define temp1 misc0 + PUSH (misc0) /* use misc0 as temp1 */ + + /* allocate the data object */ + MOV (temp,temp1) + SAL (IM(TAG_SHIFTW),temp1) /* temp1 := descriptor */ + OR (IM(MAKE_TAG(DTAG_raw64)),temp1) + MOV (temp1,REGIND(allocptr)) /* store descriptor */ + ADD (WORD_SZB_IM,allocptr) /* allocptr++ */ + MOV (allocptr,temp1) /* temp1 := data object */ + SAL (WORD_SHFT_IM,temp) /* temp := length in bytes */ + ADD (temp,allocptr) /* allocptr += length */ + + /* allocate the header object */ + MOV (IM(DESC_real64arr),REGIND(allocptr)) + ADD (WORD_SZB_IM,allocptr) /* allocptr++ */ + MOV (temp1,REGIND(allocptr)) /* header data */ + MOV (stdarg,REGOFF(8,allocptr)) /* header length */ + MOV (allocptr,stdarg) /* stdarg := header obj */ + ADD (IM(16),allocptr) /* allocptr += 2 */ + + POP (misc0) + CONTINUE +#undef temp1 + +LABEL(L_create_r_large) + MOV (stdlink,pc) + MOV (IM(REQ_ALLOC_REALDARRAY),request_w) + JMP (CSYM(set_request)) + + +/* create_b : int -> bytearray */ +ALIGNED_ENTRY(create_b_a) + CHECKLIMIT + MOV (stdarg,temp) /* temp is tagged length */ + SAR (IM(1),temp) /* temp >>= 1; (untag length) */ + ADD (IM(7),temp) /* temp += 7; */ + SAR (WORD_SHFT_IM,temp) /* temp >>= 3; (length in 8-byte words) */ + CMP (IM(SMALL_OBJ_SZW),temp) + JGE (L_create_b_large) + +#define temp1 misc0 + PUSH (misc0) + + /* allocate the data object */ + MOV (temp,temp1) + SAL (IM(TAG_SHIFTW),temp1) + OR (IM(MAKE_TAG(DTAG_raw)),temp1) + MOV (temp1,REGIND(allocptr)) /* store descriptor */ + ADD (WORD_SZB_IM,allocptr) + MOV (allocptr,temp1) /* temp1 is data object */ + SAL (WORD_SHFT_IM,temp) /* temp is size in bytes */ + ADD (temp,allocptr) /* allocptr += length */ + + /* allocate the header object */ + MOV (IM(DESC_word8arr),REGIND(allocptr)) + ADD (WORD_SZB_IM,allocptr) + MOV (temp1,REGIND(allocptr)) + MOV (stdarg,REGOFF(8,allocptr)) + MOV (allocptr,stdarg) /* stdarg := header */ + ADD (IM(16),allocptr) /* allocptr += 2 */ + POP (misc0) + CONTINUE +#undef temp1 + +LABEL(L_create_b_large) + MOV (stdlink,pc) + MOV (IM(REQ_ALLOC_BYTEARRAY),request_w) + JMP (CSYM(set_request)) + + +/* create_s : int -> string */ +ALIGNED_ENTRY(create_s_a) + CHECKLIMIT + MOV (stdarg,temp) + SAR (IM(1),temp) /* untag length */ + ADD (IM(8),temp) /* 7 + extra byte */ + SAR (WORD_SHFT_IM,temp) /* length in words */ + CMP (IM(SMALL_OBJ_SZW),temp) + JGE (L_create_s_large) + + PUSH (misc0) +#define temp1 misc0 + + MOV (temp,temp1) + SAL (IM(TAG_SHIFTW),temp1) + OR (IM(MAKE_TAG(DTAG_raw)),temp1) + MOV (temp1,REGIND(allocptr)) /* store descriptor */ + ADD (WORD_SZB_IM,allocptr) + + MOV (allocptr,temp1) /* temp1 is data obj */ + SAL (WORD_SHFT_IM,temp) /* length in bytes */ + ADD (temp,allocptr) /* allocptr += length */ + MOV (IM(0),REGOFF((-8),allocptr)) /* zero out last word */ + + /* allocate header obj */ + MOV (IM(DESC_string),temp) /* hdr descr */ + MOV (temp,REGIND(allocptr)) + ADD (WORD_SZB_IM,allocptr) + MOV (temp1,REGIND(allocptr)) /* hdr data */ + MOV (stdarg,REGOFF(8,allocptr)) /* hdr length */ + MOV (allocptr, stdarg) /* stdarg is hdr obj */ + ADD (IM(16),allocptr) /* allocptr += 2 */ + + POP (misc0) +#undef temp1 + CONTINUE + +LABEL(L_create_s_large) + MOVE (stdlink, temp, pc) + MOV (IM(REQ_ALLOC_STRING),request_w) + JMP (CSYM(set_request)) + +/* create_v_a : int * 'a list -> 'a vector + * creates a vector with elements taken from a list. + * n.b. The frontend ensures that list cannot be nil. + */ +ALIGNED_ENTRY(create_v_a) + CHECKLIMIT + MOV (REGIND(stdarg),temp) /* temp = len tagged */ + PUSH (misc0) +#define temp1 misc0 + + MOV (temp,temp1) + SAR (IM(1),temp1) /* temp1 = untagged len */ + CMP (IM(SMALL_OBJ_SZW),temp1) + JGE (L_create_v_large) + + PUSH (misc1) +#define temp2 misc1 + + SAL (IM(TAG_SHIFTW),temp1) + OR (IM(MAKE_TAG(DTAG_vec_data)),temp1) + MOV (temp1,REGIND(allocptr)) + ADD (WORD_SZB_IM,allocptr) + MOV (REGOFF(8,stdarg),temp1) /* temp1 is list */ + MOV (allocptr,stdarg) /* stdarg is vector */ + +LABEL(L_create_v_lp) + MOV (REGIND(temp1),temp2) /* hd */ + MOV (temp2,REGIND(allocptr)) /* store into vector */ + ADD (WORD_SZB_IM,allocptr) + MOV (REGOFF(8,temp1),temp1) /* tl */ + CMP (IM(ML_nil),temp1) /* isNull? */ + JNE L_create_v_lp + + /* allocate header object */ + MOV (IM(DESC_polyvec),temp1) + MOV (temp1,REGIND(allocptr)) + ADD (WORD_SZB_IM,allocptr) + MOV (stdarg,REGIND(allocptr)) /* data */ + MOV (temp,REGOFF(8,allocptr)) /* len */ + MOV (allocptr,stdarg) /* result */ + ADD (IM(16),allocptr) /* allocptr += 2 */ + + POP (misc1) + POP (misc0) + CONTINUE +#undef temp1 +#undef temp2 + +LABEL(L_create_v_large) + POP (misc0) /* restore misc0 */ + MOVE (stdlink, temp, pc) + MOV (IM(REQ_ALLOC_VECTOR),request_w) + JMP (CSYM(set_request)) + +/* try_lock: spin_lock -> bool. + * low-level test-and-set style primitive for mutual-exclusion among + * processors. For now, we only provide a uni-processor trivial version. + */ +ALIGNED_ENTRY(try_lock_a) +#if (MAX_PROCS > 1) +# error multiple processors not supported +#else /* (MAX_PROCS == 1) */ + MOV (REGIND(stdarg), temp) /* Get old value of lock. */ + MOV (IM(1), REGIND(stdarg)) /* Set the lock to ML_false. */ + MOV (temp, stdarg) /* Return old value of lock. */ + CONTINUE +#endif + +/* unlock : releases a spin lock + */ +ALIGNED_ENTRY(unlock_a) +#if (MAX_PROCS > 1) +# error multiple processors not supported +#else /* (MAX_PROCS == 1) */ + MOV (IM(3), REGIND(stdarg)) /* Store ML_true into lock. */ + MOV (IM(1), stdarg) /* Return unit. */ + CONTINUE +#endif + + +/********************* Floating point functions. *********************/ + +/* rounding modes (see Table 4-14 in the Instruction Set Reference) */ +#define RND_TO_NEGINF IM(9) +#define RND_TO_POSINF IM(10) +#define RND_TO_ZERO IM(11) + + TEXT + .align 8 + +/* floor : real -> int + * Return the nearest integer that is less or equal to the argument. + * Caller's responsibility to make sure arg is in range. + */ +ALIGNED_ENTRY(floor_a) + MOVSD (REGIND(stdarg), XMM0) + ROUNDSD (RND_TO_NEGINF, XMM0, XMM0) + CVTTSD2SI (XMM0, stdarg) + SAL (IM(1),stdarg) /* convert result to tagged representation */ + INC (stdarg) + CONTINUE + +/* logb : real -> int + * Extract the unbiased exponent pointed to by stdarg. + * Note: Using fxtract, and fistl does not work for inf's and nan's. + */ +ALIGNED_ENTRY(logb_a) + /* DEPRECATED */ + CONTINUE + +#define EXP_MASK IM(0x7ff0000000000000) +#define NOT_EXP_MASK IM(0x800fffffffffffff) +#define SIGN_MASK IM(0x8000000000000000) +#define INFINITY EXP_MASK + +/* scalb : (real * int) -> real + * Scale the first argument by 2 raised to the second argument. + * Note that if we were guaranteed AVX512 support, then we could use + * the VSCALEFSD instruction, but since we are not, we implement this + * using integer operations. + */ +ALIGNED_ENTRY(scalb_a) + CHECKLIMIT + MOV (REGOFF(8,stdarg), temp) /* get second arg */ + SAR (IM(1), temp) /* untag second arg */ + MOV (REGIND(stdarg), stdarg) /* put pointer to real in stdarg */ +#define temp1 misc5 +#define temp2 misc6 + MOV (REGIND(stdarg), temp1) /* put real bits in temp1 */ + MOV (EXP_MASK, temp2) + AND (temp1, temp2) /* temp2 has shifted exponent */ + TEST (temp2, temp2) + JE (L_scalb_return) /* if temp2 == 0 then return first arg */ + SAR (IM(52), temp2) + ADD (temp, temp2) /* temp2 = exponent + scale */ + JLE (L_scalb_under) + CMP (IM(2047), temp2) + JGE (L_scalb_over) + MOV (NOT_EXP_MASK, temp) /* clear exponent field in original number */ + AND (temp, temp1) + SAL (IM(52), temp2) /* shift exponent into position */ + OR (temp2, temp1) /* temp1 := temp1 | temp2 */ + +L_scalb_alloc: + MOV (IM(DESC_reald),temp) /* hdr descr */ + MOV (temp,REGIND(allocptr)) + ADD (WORD_SZB_IM,allocptr) + MOV (temp1,REGIND(allocptr)) /* data = temp1 */ + MOV (allocptr, stdarg) /* stdarg is result */ + ADD (WORD_SZB_IM,allocptr) /* allocptr += 1 */ + +L_scalb_return: + CONTINUE + +L_scalb_under: + XOR (temp1,temp1) /* temp1 = 0 */ + JMP (L_scalb_alloc) + +L_scalb_over: /* Overflow, so return infinity */ + MOV (SIGN_MASK, temp) /* temp1 := sign bit of temp1 */ + AND (temp, temp1) + MOV (INFINITY, temp) /* temp1 := sign | infinity */ + OR (temp, temp1) + JMP L_scalb_alloc +#undef temp1 +#undef temp2 + +END + +/* end of AMD64.prim.asm */ diff --git a/base/runtime/mach-dep/PPC.prim.asm b/base/runtime/mach-dep/PPC.prim.asm new file mode 100644 index 0000000..2e081f9 --- /dev/null +++ b/base/runtime/mach-dep/PPC.prim.asm @@ -0,0 +1,879 @@ +/* PPC.prim.asm + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#ifndef _ASM_ +#define _ASM_ +#endif + +#include "ml-base.h" +#include "asm-base.h" +#include "ml-values.h" +#include "tags.h" +#include "ml-request.h" +#include "ml-limits.h" +#include "mlstate-offsets.h" /** this file is generated **/ + +#if defined(OPSYS_LINUX) && defined(__ELF__) +/* needed to disable the execution bit on the stack pages */ +.section .note.GNU-stack,"",%progbits +#endif + +/* factor out variations in assembler synatax */ +#if defined(OPSYS_DARWIN) + /* use the macOS X names by default */ +# define HI(name) ha16(name) +# define LO(name) lo16(name) + +#else +# define HI(name) name@ha +# define LO(name) name@l + +# define cr0 0 +# define cr1 1 +# define r0 0 +# define r1 1 +# define r2 2 +# define r3 3 +# define r4 4 +# define r5 5 +# define r6 6 +# define r7 7 +# define r8 8 +# define r9 9 +# define r10 10 +# define r11 11 +# define r12 12 +# define r13 13 +# define r14 14 +# define r15 15 +# define r16 16 +# define r17 17 +# define r18 18 +# define r19 19 +# define r20 20 +# define r21 21 +# define r22 22 +# define r23 23 +# define r24 24 +# define r25 25 +# define r26 26 +# define r27 27 +# define r28 28 +# define r29 29 +# define r30 30 +# define r31 31 + +# define f0 0 +# define f1 1 +# define f2 2 +# define f3 3 +# define f4 4 +# define f5 5 +# define f6 6 +# define f7 7 +# define f8 8 +# define f9 9 +# define f10 10 +# define f11 11 +# define f12 12 +# define f13 13 +# define f14 14 +# define f15 15 +# define f16 16 +# define f17 17 +# define f18 18 +# define f19 19 +# define f20 20 +# define f21 21 +# define f22 22 +# define f23 23 +# define f24 24 +# define f25 25 +# define f26 26 +# define f27 27 +# define f28 28 +# define f29 29 +# define f30 30 +# define f31 31 + +#endif + + + +/** register usage **/ +#define sp r1 +#define stackptr sp + +#define allocptr r14 +#define limitptr r15 +#define storeptr r16 +#define stdlink r17 +#define stdclos r18 +#define stdarg r19 +#define stdcont r20 +#define exncont r21 +#define varptr r22 +#define miscreg0 r24 +#define miscreg1 r25 +#define miscreg2 r26 + +#define pc r28 + + +#define atmp1 r29 +#define atmp2 r30 +#define atmp3 r31 +#define atmp4 r13 + +/* old stackframe layout -- NO LONGER VALID: + * Note: 1. cvti2d stuff is used in CodeGen/ppc/ppcPseudoInstr.sml. + * (Orig: The offset of cvti2d tmp is used in rs6000.sml.) + * + * 2. Question: Where is the load/store offset used, if at all? + * (Orig: float load/store offset is hardwired in rs6000instr.sml.) + * + * +-------------------+ + * sp--> 0(sp) | mlstate addr | + * +-------------------+ + * 4(sp) | _startgc addr | + * +-------------------+ + * 8(sp) | cvti2d const | + * +-------------------+ + * 16(sp) | cvti2d tmp2 | + * +-------------------+ + * 24(sp) | float load/store | + * +-------------------+ + * 32(sp) | floor tmp | + * +-------------------+ + * 40(sp) | unused | + * +-------------------+ + * 44(sp) | unused | + * +-------------------+ + * argblock(sp) | C calleesave regs | + * . . + * . . + * +-------------------+ + * argblock+92(sp) | | + */ + +/* NEW stackframe layout -- with support for c-calls (nlffi): + * + * Note: cvti2d stuff is used in CodeGen/ppc/ppcPseudoInstr.sml. + * + * sp--> 0(sp) +-------------------+ + * | | + * | linkage area | + * | | + * 24(sp) +-------------------+ + * | 4072 (=4096-24) | + * | bytes for c-call | + * | arguments | + * . . + * . . + * +-------------------+ + * 4096(sp) | mlstate addr (4) | + * +-------------------+ + * 4100(sp) | _startgc addr (4) | + * +-------------------+ + * 4104(sp) | cvti2d const (8) | + * | | + * +-------------------+ + * 4112(sp) | cvti2d tmp2 (8) | + * | | + * +-------------------+ + * 4120(sp) | floor tmp (8) | + * +-------------------+ + * argblock(sp) | C calleesave regs | (argblock = 4128, mult. of 16) + * . . + * . . + * +-------------------+ + * argblock+92(sp) | | + */ + + +/** MLState offsets **/ +#define argblock 4128 +#define savearea (23*4+4) /* lr,cr,1,2,13-31,padding */ +#define framesize 8192 +#define MLSTATE_OFFSET 4096 +#define STARTGC_OFFSET 4100 +#define CVTI2D_OFFSET 4104 +#define FLOOR_OFFSET 4120 + +/** offsets in condition register CR.0 **/ + +#define CR0_LT 0 +#define CR0_GT 1 +#define CR0_EQ 2 +#define CR0_SO 3 + +#define CR0 cr0 + + +/** C parameter passing conventions **/ +#define CARG1 r3 +#define CRESULT1 r3 + + +#define CONTINUE \ + cmpl CR0,allocptr,limitptr __SC__ \ + mtlr stdcont __SC__ \ + blr + +#define CHECKLIMIT(label) \ + bt CR0_LT, label __SC__ \ + addi pc, stdlink,0 __SC__ \ + b CSYM(saveregs) __SC__ \ + label: + +#if defined (USE_TOC) +/* create table of contents entries for things we need the address of. */ + .extern CSYM(_PollFreq0) + .extern CSYM(_PollEvent0) + .extern CSYM(saveregs) + + .toc +T._PollFreq0: + .tc H._PollFreq0[TC],CSYM(_PollFreq0) +T._PollEvent0: + .tc H._PollEvent0[TC],CSYM(_PollEvent0) +T.saveregs: + .tc H.saveregs[TC],CSYM(saveregs) +T.cvti2d_CONST: + .tc H.cvti2d_CONST[TC],cvti2d_CONST +#endif + RO_DATA + ALIGN8 +cvti2d_CONST: + DOUBLE(4503601774854144.0) + + TEXT +/* sig_return : ('a cont * 'a) -> 'b + */ +ML_CODE_HDR(sigh_return_a) + li atmp4,REQ_SIG_RETURN + li stdlink, ML_unit + li stdclos, ML_unit + li pc, ML_unit + b set_request + +ENTRY(sigh_resume) + li atmp4, REQ_SIG_RESUME + b set_request + +/* pollh_return_a: + * The return continuation for the ML poll handler. + */ +ML_CODE_HDR(pollh_return_a) + li atmp4,REQ_POLL_RETURN + li stdlink, ML_unit + li stdclos, ML_unit + li pc, ML_unit + b set_request + +/* pollh_resume: + * Resume execution at the point at which a poll event occurred. + */ +ENTRY(pollh_resume) + li atmp4,REQ_POLL_RESUME + b set_request + + /* exception handler for ML functions called from C */ +ML_CODE_HDR(handle_a) + li atmp4,REQ_EXN + addi pc, stdlink, 0 + b set_request + + + /* continuation for ML functions called from C */ +ML_CODE_HDR(return_a) + li atmp4,REQ_RETURN + li stdlink, ML_unit + li stdclos, ML_unit + li pc, ML_unit + b set_request + + +ENTRY(request_fault) + li atmp4,REQ_FAULT + addi pc, stdlink, 0 + b set_request + + +/* bind_cfun : (string * string) -> c_function + */ +ML_CODE_HDR(bind_cfun_a) + CHECKLIMIT(bind_cfun_v_limit) + li atmp4,REQ_BIND_CFUN + b set_request + +ML_CODE_HDR(build_literals_a) + CHECKLIMIT(build_literals_v_limit) + li atmp4,REQ_BUILD_LITERALS + b set_request + +ML_CODE_HDR(callc_a) + CHECKLIMIT(callc_v_limit) + li atmp4,REQ_CALLC + b set_request + + +ENTRY(saveregs) + li atmp4, REQ_GC + mflr pc + /* fall through */ + +set_request: + lwz atmp3,MLSTATE_OFFSET(sp) /* save the minimal ML state */ + lwz atmp2,VProcOffMSP(atmp3) /* atmp2 := VProc State ptr */ + li r0,0 + stw r0,InMLOffVSP(atmp2) /* note that we have left ML */ + stw allocptr,AllocPtrOffMSP(atmp3) + stw limitptr,LimitPtrOffMSP(atmp3) + stw storeptr,StorePtrOffMSP(atmp3) + stw stdlink,LinkRegOffMSP(atmp3) + stw pc,PCOffMSP(atmp3) + stw stdarg,StdArgOffMSP(atmp3) + stw stdcont,StdContOffMSP(atmp3) + stw stdclos,StdClosOffMSP(atmp3) + stw varptr,VarPtrOffMSP(atmp3) + stw exncont,ExnPtrOffMSP(atmp3) + stw miscreg0,Misc0OffMSP(atmp3) + stw miscreg1,Misc1OffMSP(atmp3) + stw miscreg2,Misc2OffMSP(atmp3) + + addi r3,atmp4,0 /* request as argument */ + +restore_c_regs: + lwz r2, (argblock+4)(sp) + lwz r13, (argblock+8)(sp) + lwz r14, (argblock+12)(sp) + lwz r15, (argblock+16)(sp) + lwz r16, (argblock+20)(sp) + lwz r17, (argblock+24)(sp) + lwz r18, (argblock+28)(sp) + lwz r19, (argblock+32)(sp) + lwz r20, (argblock+36)(sp) + lwz r21, (argblock+40)(sp) + lwz r22, (argblock+44)(sp) + lwz r23, (argblock+48)(sp) + lwz r24, (argblock+52)(sp) + lwz r25, (argblock+56)(sp) + lwz r26, (argblock+60)(sp) + lwz r27, (argblock+64)(sp) + lwz r28, (argblock+68)(sp) + lwz r29, (argblock+72)(sp) + lwz r30, (argblock+76)(sp) + lwz r31, (argblock+80)(sp) + lwz r0, (argblock+84)(sp) + mtlr r0 + lwz r0, (argblock+88)(sp) + mtcrf 0x80, r0 + addi sp,sp,framesize + blr + + + +CENTRY(restoreregs) + stwu sp,-framesize(sp) +#if defined(USE_TOC) + lwz r0,T.saveregs(2) +#else +#ifdef BROKEN_CODE + lis r28, HI(CSYM(saveregs)) /* GPR0 <- addrof(saveregs) */ + addi r28, r28, LO(CSYM(saveregs)) + li r0, 0 + add r0, r28, r0 +#else + lis r11, HI(CSYM(saveregs)) /* GPR0 <- addrof(saveregs) */ + addi r11, r11, LO(CSYM(saveregs)) + li r0, 0 + add r0, r11, r0 +#endif +#endif + stw r3, MLSTATE_OFFSET(sp) + stw r0, STARTGC_OFFSET(sp) +#if defined(USE_TOC) + lwz r4, T.cvti2d_CONST(r2) /* GPR2 is RTOC */ + lfd f0, 0(r4) +#else + lis r4, HI(cvti2d_CONST) + lfd f0, LO(cvti2d_CONST)(r4) +#endif + stfd f0, CVTI2D_OFFSET(sp) + + stw r2, argblock+4(sp) + stw r13, argblock+8(sp) + stw r14, argblock+12(sp) + stw r15, argblock+16(sp) + stw r16, argblock+20(sp) + stw r17, argblock+24(sp) + stw r18, argblock+28(sp) + stw r19, argblock+32(sp) + stw r20, argblock+36(sp) + stw r21, argblock+40(sp) + stw r22, argblock+44(sp) + stw r23, argblock+48(sp) + stw r24, argblock+52(sp) + stw r25, argblock+56(sp) + stw r26, argblock+60(sp) + stw r27, argblock+64(sp) + stw r28, argblock+68(sp) + stw r29, argblock+72(sp) + stw r30, argblock+76(sp) + stw r31, argblock+80(sp) + mflr r0 + stw r0, argblock+84(sp) + mfcr r0 + stw r0, argblock+88(sp) + + and atmp1,r3,r3 /* atmp1 := MLState pointer */ + + lwz allocptr,AllocPtrOffMSP(atmp1) + lwz limitptr,LimitPtrOffMSP(atmp1) + lwz storeptr,StorePtrOffMSP(atmp1) + lwz atmp2,VProcOffMSP(atmp1) /* atmp2 := VProc State ptr */ + li atmp3,1 + stw atmp3,InMLOffVSP(atmp2) /* we are entering ML code */ + lwz stdarg,StdArgOffMSP(atmp1) + lwz stdcont,StdContOffMSP(atmp1) + lwz stdclos,StdClosOffMSP(atmp1) + lwz exncont,ExnPtrOffMSP(atmp1) + lwz miscreg0,Misc0OffMSP(atmp1) + lwz miscreg1,Misc1OffMSP(atmp1) + lwz miscreg2,Misc2OffMSP(atmp1) + lwz stdlink,LinkRegOffMSP(atmp1) + lwz varptr,VarPtrOffMSP(atmp1) + lwz atmp3,PCOffMSP(atmp1) + mtlr atmp3 + /* check for pending signals */ + lwz atmp1,SigsRecvOffVSP(atmp2) /* number of signals received */ + lwz atmp3,SigsHandledOffVSP(atmp2) /* number of signals handled */ + cmp CR0,atmp1,atmp3 + bne pending_sigs /* if not equal, then pending sigs */ + + +ENTRY(ml_go) + cmpl CR0,allocptr,limitptr + mtfsfi 3,0 /* Ensure that no exceptions are set */ + mtfsfi 2,0 + mtfsfi 1,0 + mtfsfi 0,0 + li r0,0 + mtxer r0 + blr /* jump to ML code */ + +pending_sigs: /* there are pending signals */ + /* check if currently handling a signal */ + lwz atmp1,InSigHandlerOffVSP(atmp2) + cmpi CR0,atmp1,0 + bf CR0_EQ,CSYM(ml_go) + + li r0,1 + stw r0,HandlerPendingOffVSP(atmp2) + addi limitptr,allocptr,0 + b CSYM(ml_go) + +/* array : (int * 'a) -> 'a array + * Allocate and initialize a new array. This can cause GC. + */ +ML_CODE_HDR(array_a) + CHECKLIMIT(array_a_limit) + + lwz atmp1,0(stdarg) /* atmp1 := length in words */ + srawi atmp2, atmp1, 1 /* atmp2 := length (untagged) */ + cmpi CR0,atmp2,SMALL_OBJ_SZW /* is this a small object */ + bf CR0_LT, array_a_large + + lwz stdarg,4(stdarg) /* initial value */ + slwi atmp3,atmp2,TAG_SHIFTW /* build descriptor in tmp3 */ + ori atmp3,atmp3,MAKE_TAG(DTAG_arr_data) + stw atmp3,0(allocptr) /* store descriptor */ + addi allocptr,allocptr,4 /* points to new object */ + addi atmp3,allocptr,0 /* array data ptr in atmp3 */ + +array_a_1: + stw stdarg,0(allocptr) /* initialize array */ + addi atmp2,atmp2,-1 + addi allocptr,allocptr,4 + cmpi CR0,atmp2,0 + bf CR0_EQ,array_a_1 + + /* allocate array header */ + li atmp2,DESC_polyarr /* descriptor in tmp2 */ + stw atmp2,0(allocptr) /* store descriptor */ + addi allocptr, allocptr, 4 /* allocptr++ */ + addi stdarg, allocptr, 0 /* result = header addr */ + stw atmp3,0(allocptr) /* store pointer to data */ + stw atmp1,4(allocptr) + addi allocptr,allocptr,8 + CONTINUE +array_a_large: /* off-line allocation */ + li atmp4,REQ_ALLOC_ARRAY + addi pc, stdlink,0 + b set_request + +/* create_b : int -> bytearray + * Create a bytearray of the given length. + */ +ML_CODE_HDR(create_b_a) + CHECKLIMIT(create_b_a_limit) + + srawi atmp2,stdarg,1 /* atmp2 = length (untagged int) */ + addi atmp2,atmp2,3 /* atmp2 = length in words */ + srawi atmp2,atmp2,2 + cmpi CR0,atmp2,SMALL_OBJ_SZW /* is this a small object */ + bf CR0_LT,create_b_a_large + + /* allocate the data object */ + slwi atmp1,atmp2,TAG_SHIFTW /* build descriptor in atmp1 */ + ori atmp1,atmp1,MAKE_TAG(DTAG_raw) + stw atmp1,0(allocptr) /* store the data descriptor */ + addi allocptr,allocptr,4 /* allocptr++ */ + addi atmp3, allocptr, 0 /* atmp3 = data object */ + slwi atmp2, atmp2, 2 /* atmp2 = length in bytes */ + add allocptr,allocptr,atmp2 /* allocptr += total length */ + + /* allocate the header object */ + li atmp1, DESC_word8arr /* header descriptor */ + stw atmp1,0(allocptr) + addi allocptr, allocptr, 4 /* allocptr++ */ + stw atmp3,0(allocptr) /* header data field */ + stw stdarg,4(allocptr) /* header length field */ + addi stdarg, allocptr, 0 /* stdarg = header object */ + addi allocptr,allocptr,8 /* allocptr += 2 */ + CONTINUE + +create_b_a_large: /* off-line allocation */ + li atmp4,REQ_ALLOC_BYTEARRAY + addi pc, stdlink,0 + b set_request + + +/* +** create_s_a: int -> string +*/ +ML_CODE_HDR(create_s_a) + CHECKLIMIT(create_s_a_limit) + + srawi atmp2,stdarg,1 /* atmp2 = length(untagged int) */ + addi atmp2,atmp2,4 + srawi atmp2,atmp2,2 /* length in words (including desc) */ + cmpi CR0,atmp2,SMALL_OBJ_SZW /* is this a small object */ + bf CR0_LT,create_s_a_large + + slwi atmp1,atmp2,TAG_SHIFTW /* build descriptor in atmp3 */ + ori atmp1,atmp1,MAKE_TAG(DTAG_raw) + stw atmp1,0(allocptr) /* store descriptor */ + addi allocptr,allocptr,4 /* allocptr++ */ + addi atmp3,allocptr,0 /* atmp3 = data object */ + slwi atmp2,atmp2,2 /* atmp2 = length in bytes */ + add allocptr,atmp2,allocptr /* allocptr += total length */ + stw r0,-4(allocptr) /* store zero in last word */ + + /* Allocate the header object */ + li atmp1, DESC_string /* header descriptor */ + stw atmp1, 0(allocptr) + addi allocptr,allocptr,4 /* allocptr++ */ + stw atmp3,0(allocptr) /* header data field */ + stw stdarg,4(allocptr) /* header length field */ + addi stdarg,allocptr,0 /* stdarg = header object */ + addi allocptr,allocptr,8 /* allocptr += 2 */ + CONTINUE + +create_s_a_large: /* off-line allocation */ + li atmp4,REQ_ALLOC_STRING + addi pc, stdlink,0 + b set_request + + + +ML_CODE_HDR(create_r_a) + CHECKLIMIT(create_r_a_limit) + + srawi atmp2,stdarg,1 /* atmp2 = length (untagged int) */ + slwi atmp2,atmp2,1 /* length in words */ + cmpi CR0,atmp2,SMALL_OBJ_SZW /* is this a small object */ + bf CR0_LT,create_r_a_large + + /* allocate the data object */ + slwi atmp1, atmp2, TAG_SHIFTW /* descriptor in atmp1 */ + ori atmp1, atmp1, MAKE_TAG(DTAG_raw64) +#ifdef ALIGN_REALDS + ori allocptr,allocptr,4 +#endif + stw atmp1,0(allocptr) /* store the descriptor */ + addi allocptr, allocptr, 4 /* allocptr++ */ + addi atmp3, allocptr, 0 /* atmp3 = data object */ + slwi atmp2, atmp2, 2 /* tmp2 = length in bytes */ + add allocptr,allocptr,atmp2 /* allocptr += length */ + + /* allocate the header object */ + li atmp1, DESC_real64arr + stw atmp1, 0(allocptr) /* header descriptor */ + addi allocptr,allocptr,4 /* allocptr++ */ + stw atmp3,0(allocptr) /* header data field */ + stw stdarg,4(allocptr) /* header length field */ + addi stdarg,allocptr,0 /* stdarg = header object */ + addi allocptr,allocptr,8 /* allocptr += 2 */ + CONTINUE +create_r_a_large: /* offline allocation */ + li atmp4,REQ_ALLOC_REALDARRAY + addi pc, stdlink,0 + b set_request + + +/* create_v_a : (int * 'a list) -> 'a vector + * Create a vector with elements taken from a list. + * NOTE: the front-end ensures that list cannot be nil. + */ +ML_CODE_HDR(create_v_a) + CHECKLIMIT(create_v_a_limit) + + lwz atmp1,0(stdarg) /* atmp1 = tagged length */ + srawi atmp2,atmp1,1 /* atmp2 = untagged length */ + cmpi CR0,atmp2,SMALL_OBJ_SZW /* is this a small object */ + bf CR0_LT,create_v_a_large + + slwi atmp2,atmp2,TAG_SHIFTW /* build descriptor in atmp2 */ + ori atmp2,atmp2,MAKE_TAG(DTAG_vec_data) + stw atmp2,0(allocptr) /* store descriptor */ + addi allocptr,allocptr,4 /* allocptr++ */ + lwz atmp2,4(stdarg) /* atmp2 := list */ + addi stdarg,allocptr,0 /* stdarg := vector */ + +create_v_a_1: + lwz atmp3,0(atmp2) /* atmp3:=hd(atmp2) */ + lwz atmp2,4(atmp2) /* atmp2:=tl(atmp2) */ + stw atmp3,0(allocptr) /* store word */ + addi allocptr,allocptr,4 /* allocptr++ */ + cmpi CR0,atmp2,ML_nil + bf CR0_EQ,create_v_a_1 + + /* allocate header object */ + li atmp3, DESC_polyvec /* descriptor in tmp3 */ + stw atmp3,0(allocptr) /* store descriptor */ + addi allocptr,allocptr,4 /* allocptr++ */ + stw stdarg,0(allocptr) /* header data field */ + stw atmp1,4(allocptr) /* header length */ + addi stdarg, allocptr, 0 /* result = header object */ + addi allocptr,allocptr,8 /* allocptr += 2 */ + CONTINUE + +create_v_a_large: + li atmp4,REQ_ALLOC_VECTOR + addi pc, stdlink,0 + b set_request + + +#if defined(USE_TOC) + .toc +T.floor_CONST: + .tc H.floor_CONST[TC],floor_CONST +#endif + RO_DATA + ALIGN8 +floor_CONST: + DOUBLE(4512395720392704.0) + + TEXT + /* + ** floor_a : real -> int + ** Do not test for overflow, it's the caller's + ** responsibility to be in range. + ** + ** This code essentially loads 1.0*2^52 into + ** register f3. A floating add will internally + ** perform an exponent alignment, which will + ** bring the required bits into the mantissa. + */ +ML_CODE_HDR(floor_a) + lfd f1, 0(stdarg) + /* + ** Neat thing here is that this code works for + ** both +ve and -ve floating point numbers. + */ + mffs f0 + stfd f0,0(allocptr) /* steal the allocptr for a second */ + lwz r0, 4(allocptr) + mtfsb1 30 + mtfsb1 31 +#ifdef USE_TOC + lwz atmp1, T.floor_CONST(r2) + lfd f3, 0(atmp1) +#else + lis atmp1, HI(floor_CONST) + lfd f3, LO(floor_CONST)(atmp1) +#endif + fadd f6,f1,f3 + stfd f6,FLOOR_OFFSET(sp) + lwz stdarg,FLOOR_OFFSET+4(sp) + add stdarg,stdarg,stdarg + addi stdarg,stdarg,1 + + andi. r0,r0, 0xf + mtfsf 0xff,f0 + CONTINUE + + +ML_CODE_HDR(logb_a) + lwz stdarg,0(stdarg) /* most significant part */ + srawi stdarg,stdarg,20 /* throw out 20 low bits */ + andi. stdarg,stdarg,0x07ff /* clear all but 11 low bits */ + addi stdarg,stdarg,-1023 /* subtract 1023 */ + slwi stdarg,stdarg,1 /* make room for tag bit */ + addi stdarg,stdarg,1 /* add the tag bit */ + CONTINUE + + +/* +** scalb : real * int -> real +** scalb(x,y) = x * 2^y +*/ +ML_CODE_HDR(scalb_a) + CHECKLIMIT(scalb_v_limit) + lwz atmp1,4(stdarg) /* atmp1 := y */ + srawi atmp1,atmp1,1 /* atmp1 := machine int y */ + lwz stdarg,0(stdarg) /* stdarg := x */ + lwz atmp2,0(stdarg) /* atmp2 := MSW(x) */ + lis r0,0x7ff0 /* r0 := 0x7ff0,0000 */ + and. atmp3,atmp2,r0 /* atmp3 := atmp2 & 0x7ff00000 */ + bt CR0_EQ,scalb_all_done + + srawi atmp3,atmp3,20 /* atmp3 := ieee(exp) */ + add. atmp1,atmp1,atmp3 /* scale exponent */ + bt CR0_LT,scalb_underflow + + cmpi CR0,atmp1,2047 /* max. ieee(exp) */ + bf CR0_LT,scalb_overflow + + not r0,r0 /* r0 := not(r0) */ + and atmp2,atmp2,r0 /* atmp2 := high mantessa bits + sign */ + slwi atmp1,atmp1,20 /* atmp1 := new exponent */ + or atmp1,atmp1,atmp2 /* atmp1 := new MSB(x) */ + lwz atmp2, 4(stdarg) + +scalb_write_out: + stw atmp1, 4(allocptr) + stw atmp2, 8(allocptr) + li atmp3, DESC_reald + stw atmp3, 0(allocptr) + addi stdarg,allocptr,4 + addi allocptr,allocptr,12 + +scalb_all_done: + CONTINUE + +scalb_underflow: + li atmp1,0 + li atmp2,0 + b scalb_write_out + +LABEL(scalb_overflow) + mtfsb1 3 + + + +ML_CODE_HDR(try_lock_a) + lwz atmp1,0(stdarg) + li atmp2,1 /* ML_false */ + stw atmp2,0(stdarg) + addi stdarg,atmp1,0 + CONTINUE + + +ML_CODE_HDR(unlock_a) + li atmp1,3 /* ML_true */ + stw atmp1,0(stdarg) + li stdarg,1 /* just return unit */ + CONTINUE + + + +CENTRY(set_fsr) + mtfsb0 24 /* disable invalid exception */ + mtfsb0 25 /* disable overflow exception */ + mtfsb0 26 /* disable underflow exception */ + mtfsb0 28 /* disable inexact exception */ + mtfsb0 30 /* round to nearest */ + mtfsb0 31 + blr /* return */ + +/* saveFPRegs and restoreFPRegs are called from C only. */ +#define ctmp1 12 +#define ctmp2 11 +#define ctmp3 10 + + +CENTRY(SaveFPRegs) + stfd f14, 4(r3) + stfd f15, 12(r3) + stfd f16, 20(r3) + stfd f17, 28(r3) + stfd f18, 36(r3) + stfd f19, 44(r3) + stfd f20, 52(r3) + stfd f21, 60(r3) + stfd f22, 68(r3) + stfd f23, 76(r3) + stfd f24, 84(r3) + stfd f25, 92(r3) + stfd f26, 100(r3) + stfd f27, 108(r3) + stfd f28, 116(r3) + stfd f29, 124(r3) + stfd f30, 132(r3) + stfd f31, 140(r3) + + blr + +CENTRY(RestoreFPRegs) + lfd f14, 0(r3) + lfd f15, 8(r3) + lfd f16, 16(r3) + lfd f17, 24(r3) + lfd f18, 32(r3) + lfd f19, 40(r3) + lfd f20, 48(r3) + lfd f21, 56(r3) + lfd f22, 64(r3) + lfd f23, 72(r3) + lfd f24, 80(r3) + lfd f25, 88(r3) + lfd f26, 96(r3) + lfd f27, 104(r3) + lfd f28, 112(r3) + lfd f29, 120(r3) + lfd f30, 128(r3) + lfd f31, 136(r3) + blr + +#if (defined(ARCH_PPC) && (defined(OPSYS_LINUX) || defined(OPSYS_DARWIN) )) + +#define CACHE_LINE_SZB 32 +#define CACHE_LINE_MASK (CACHE_LINE_SZB-1) +#define CACHE_LINE_BITS 26 + +/* FlushICache: + * + * void FlushICache (Addr_t addr, Addr_t nbytes) + */ +CENTRY(FlushICache) + add r4,r3,r4 /* stop := addr+nbytes */ + addic r4,r4,CACHE_LINE_MASK /* stop := stop + CACHE_LINE_MASK */ + rlwinm r4,r4,0,0,CACHE_LINE_BITS /* stop := stop & ~CACHE_LINE_MASK */ +L_FlushICache_1: + cmplw cr1,r3,r4 /* while (addr < stop) */ + bc 4,4,L_FlushICache_2 + dcbf 0,r3 /* flush addr */ + icbi 0,r3 /* invalidate addr */ + addi r3,r3,CACHE_LINE_SZB /* addr := addr + CACHE_LINE_SZB */ + b L_FlushICache_1 /* end while */ +L_FlushICache_2: + blr + +#endif + diff --git a/base/runtime/mach-dep/SPARC.prim.asm b/base/runtime/mach-dep/SPARC.prim.asm new file mode 100644 index 0000000..a5f4dcb --- /dev/null +++ b/base/runtime/mach-dep/SPARC.prim.asm @@ -0,0 +1,648 @@ +/*! \file SPARC.prim.asm + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "asm-base.h" +#include "ml-base.h" +#include "ml-values.h" +#include "tags.h" +#include "ml-request.h" +#include "ml-limits.h" +#include "mlstate-offsets.h" /** this file is generated **/ + +/* SPARC runtime code for ML. Registers are used as follows: + * + * %g0 zero + * %g1 standard link + * %g2-3 misc regs + * %g4 heap limit pointer + * %l5 store pointer + * %l7 alloc pointer + * %g4 limit pointer + * %l6 exception handler + * + * %o0-1 misc regs + * %o2 asm tmp + * %o3-5 misc regs + * %o6 sp + * %o7 gcLink + * + * %l0-4 misc regs + * + * %i0 standard arg + * %i1 standard cont + * %i2 standard clos + * %i3 base ptr + * %i4 misc reg + * %i5 var ptr + * %i6 fp (don't touch) + * %i7 misc reg + * + */ + + +#define ZERO %g0 +#define EXNCONT %l6 /* exception handler (ml_exncont) */ +#define ALLOCPTR %l7 /* freespace pointer (ml_allocptr) */ +#define STOREPTR %l5 /* store pointer (ml_storeptr) */ +#define LIMITPTR %g4 /* heap limit pointer (ml_limitptr)*/ +#define STDARG %i0 /* standard argument (ml_arg) */ +#define STDCONT %i1 /* standard continuation (ml_cont) */ +#define STDCLOS %i2 /* standard closure (ml_clos) */ +#define VARPTR %i5 /* var pointer (ml_varptr) */ +#define STDLINK %g1 /* standard link (ml_linkptr) */ +#define MISCREG0 %g2 +#define MISCREG1 %g3 +#define MISCREG2 %o0 +#define PC %o7 /* ml_pc (aka gc_link) */ + +#define ASMTMP %o2 /* assembly temporary used in ML */ +#define TMPREG1 ASMTMP +#define TMPREG2 %o3 +#define TMPREG3 %o4 +#define TMPREG4 %o5 + +/* %o2 and %o3 are also used as for multiply and divide */ + +/* + * %o6 = %sp (not used by ML) + * %i6 = %fp (not used by ML) + * %i7 = return address to C code (not used by ML) + * + * The ML stack frame has the following layout (set up by restoreregs): + * + * %fp = %sp+4096 + * +-------------------+ + * | | + * . . + * | | + * %sp+116: | spill area | + * +-------------------+ + * %sp+112: | unused | + * %sp+108: | unused | + * +-------------------+ + * %sp+104: | saved %o7 | + * +-------------------+ + * %sp+100: | addr of _saveregs | + * +-------------------+ + * %sp+96: | ptr to MLState | + * +-------------------+ + * %sp+92: | temp for floor | + * +-------------------+ + * %sp+88: | temp for cvti2d | + * +-------------------+ + * %sp+84: | addr of _ml_udiv | -- unused + * +-------------------+ + * %sp+80: | addr of _ml_umul | -- unused + * +-------------------+ + * %sp+76: | addr of _ml_div | -- unused + * +-------------------+ + * %sp+72: | addr of _ml_mul | -- unused + * +-------------------+ + * %sp+68: | saved %g6 | + * +-------------------+ + * %sp+64: | saved %g7 | + * +-------------------+ + * | space to save | + * | in and local | + * %sp: | registers | + * +-------------------+ + * + * Note that this must be a multiple of 8 bytes. The size of the + * stack frame is: + */ +#define ML_FRAMESIZE 4096 + +#define FLOOR_OFFSET 92 +#define MLSTATE_OFFSET 96 +#define STARTGC_OFFSET 100 +#define i7_OFFSET 104 + +#define CONTINUE \ + jmp STDCONT; \ + subcc ALLOCPTR,LIMITPTR,%g0 + +#define CHECKLIMIT(label) \ + blu label; \ + nop; \ + mov STDLINK,PC; \ + ba CSYM(saveregs); \ + nop; \ + label: + + + TEXT + +/* sigh_return_a: + * The return continuation for the ML signal handler. + */ +ML_CODE_HDR(sigh_return_a) + set ML_unit,STDLINK + set ML_unit,STDCLOS + set ML_unit,PC + ba set_request + set REQ_SIG_RETURN,TMPREG3 /* (delay slot) */ + +/* sigh_resume: + * Resume execution at the point at which a handler trap occurred. This is a + * standard two-argument function, thus the closure is in ml_cont (stdcont). + */ +ENTRY(sigh_resume) + mov STDLINK,PC + ba set_request + set REQ_SIG_RESUME,TMPREG3 /* (delay slot) */ + +/* pollh_return_a: + * The return continuation for the ML poll handler. + */ +ML_CODE_HDR(pollh_return_a) + set ML_unit,STDLINK + set ML_unit,STDCLOS + set ML_unit,PC + ba set_request + set REQ_POLL_RETURN,TMPREG3 /* (delay slot) */ + +/* pollh_resume: + * Resume execution at the point at which a poll event occurred. + */ +ENTRY(pollh_resume) + mov STDLINK,PC + ba set_request + set REQ_POLL_RESUME,TMPREG3 /* (delay slot) */ + +ML_CODE_HDR(handle_a) + mov STDLINK,PC + ba set_request + set REQ_EXN,TMPREG3 /* (delay slot) */ + +ML_CODE_HDR(return_a) + set ML_unit,STDLINK + set ML_unit,STDCLOS + set ML_unit,PC + ba set_request + set REQ_RETURN,TMPREG3 /* (delay slot) */ + +ENTRY(request_fault) + mov STDLINK,PC + ba set_request + set REQ_FAULT,TMPREG3 /* (delay slot) */ + +/* bind_cfun : (string * string) -> c_function + */ +ML_CODE_HDR(bind_cfun_a) + CHECKLIMIT(bind_cfun_v_limit) + ba set_request + set REQ_BIND_CFUN,TMPREG3 /* (delay slot) */ + +ML_CODE_HDR(build_literals_a) + CHECKLIMIT(build_literals_a_limit) + ba set_request + set REQ_BUILD_LITERALS,TMPREG3 /* (delay slot) */ + +ML_CODE_HDR(callc_a) + CHECKLIMIT(callc_a_limit) + ba set_request + set REQ_CALLC,TMPREG3 /* (delay slot) */ + +/* + * This is the entry point for starting gc called from ML's code. + * I've added an adjustment to the return address. The generated ML code + * uses the JMPL instruction, which does not add an offset of 8 to the + * correct return address. + * + * Allen 6/5/1998 + */ +ENTRY(saveregs0) + add PC, 8, PC +ENTRY(saveregs) + set REQ_GC,TMPREG3 + /* fall through */ + +set_request: + ld [%sp+MLSTATE_OFFSET],TMPREG2 /* get MLState ptr from stack */ + ld [TMPREG2+VProcOffMSP],TMPREG1 /* TMPREG1 := VProcPtr */ + st %g0,[TMPREG1+InMLOffVSP] /* note that we have left ML code */ + st ALLOCPTR,[TMPREG2+AllocPtrOffMSP] + st LIMITPTR,[TMPREG2+LimitPtrOffMSP] + st STOREPTR,[TMPREG2+StorePtrOffMSP]/* save storeptr */ + st STDLINK,[TMPREG2+LinkRegOffMSP] + st PC,[TMPREG2+PCOffMSP] /* PC of called function */ + st STDARG,[TMPREG2+StdArgOffMSP] /* save stdarg */ + st STDCLOS,[TMPREG2+StdClosOffMSP] /* save closure */ + st STDCONT,[TMPREG2+StdContOffMSP] /* save stdcont */ + st VARPTR,[TMPREG2+VarPtrOffMSP] /* save varptr */ + st EXNCONT,[TMPREG2+ExnPtrOffMSP] /* save exncont */ + st MISCREG0,[TMPREG2+Misc0OffMSP] + st MISCREG1,[TMPREG2+Misc1OffMSP] + st MISCREG2,[TMPREG2+Misc2OffMSP] + + ldd [%sp+64],%g6 /* restore C registers %g6 & %g7.*/ + ld [%sp+i7_OFFSET],%i7 /* restore C return address */ + mov TMPREG3,%i0 /* return request code */ + ret + restore /* delay slot */ + + +#define MLState ASMTMP +#define VProcPtr TMPREG4 +ENTRY(restoreregs) + save %sp,-SA(ML_FRAMESIZE),%sp + st %i0,[%sp+MLSTATE_OFFSET] /* save MLState ptr on stack */ + set CSYM(saveregs0),ASMTMP + st ASMTMP,[%sp+STARTGC_OFFSET] + mov %i0,MLState /* transfer MLState ptr to tmpreg4 */ + std %g6,[%sp+64] /* save C registers %g6 & %g7 */ + st %i7, [%sp+i7_OFFSET] /* save C return address */ + ld [MLState+AllocPtrOffMSP],ALLOCPTR + ld [MLState+LimitPtrOffMSP],LIMITPTR + ld [MLState+StorePtrOffMSP],STOREPTR + ld [MLState+PCOffMSP],PC + ld [MLState+StdArgOffMSP],STDARG + ld [MLState+StdContOffMSP],STDCONT + ld [MLState+StdClosOffMSP],STDCLOS + ld [MLState+VarPtrOffMSP],VARPTR + ld [MLState+LinkRegOffMSP],STDLINK + ld [MLState+ExnPtrOffMSP],EXNCONT /* restore exnptr */ + ld [MLState+Misc0OffMSP],MISCREG0 + ld [MLState+Misc1OffMSP],MISCREG1 + ld [MLState+Misc2OffMSP],MISCREG2 + ld [MLState+VProcOffMSP],VProcPtr /* TMPREG4 := VProcPtr */ + set 1,TMPREG2 /* note that we have entered ML code */ + st TMPREG2,[VProcPtr+InMLOffVSP] + ld [VProcPtr+SigsRecvOffVSP],TMPREG2 /* check for pending signals */ + ld [VProcPtr+SigsHandledOffVSP],TMPREG3 + subcc TMPREG2,TMPREG3,%g0 + bne pending_sigs + nop +CSYM(ml_go): /* invoke the ML code */ + jmp PC + subcc ALLOCPTR,LIMITPTR,%g0 /* Heap limit test (delay slot) */ + +pending_sigs: /* there are pending signals */ + /* check if we are currently handling a signal */ + ld [VProcPtr+InSigHandlerOffVSP],TMPREG2 + tst TMPREG2 + bne ml_go + set 1,TMPREG2 /* (delay slot) */ + /* note that a handler trap is pending */ + st TMPREG2,[VProcPtr+HandlerPendingOffVSP] + ba ml_go + mov ALLOCPTR,LIMITPTR /* (delay slot) */ + +/* array : (int * 'a) -> 'a array + * Allocate and initialize a new array. This can cause GC. + */ +ML_CODE_HDR(array_a) + CHECKLIMIT(array_a_limit) + ld [STDARG],TMPREG1 /* tmp1 = length in words */ + sra TMPREG1,1,TMPREG2 /* tmp2 = length (untagged) */ + cmp TMPREG2,SMALL_OBJ_SZW /* is this a small object? */ + bgt 3f + nop + /** allocate and initialize array data **/ + ld [STDARG+4],STDARG /* stdarg = initial value */ + sll TMPREG2,TAG_SHIFTW,TMPREG3 /* build descriptor in tmp3 */ + or TMPREG3,MAKE_TAG(DTAG_arr_data),TMPREG3 + st TMPREG3,[ALLOCPTR] /* store the descriptor */ + inc 4,ALLOCPTR /* allocptr++ */ + mov ALLOCPTR,TMPREG3 /* array data ptr in tmp3 */ +1: /* loop */ + st STDARG,[ALLOCPTR] + deccc 1,TMPREG2 /* if (--length > 0) */ + bgt 1b /* then continue */ + inc 4,ALLOCPTR /* allocptr++ (delay slot) */ + /* end loop */ + /** allocate array header **/ + set DESC_polyarr,TMPREG2 /* descriptor in tmp2 */ + st TMPREG2,[ALLOCPTR] /* store the descriptor */ + inc 4,ALLOCPTR /* allocptr++ */ + mov ALLOCPTR,STDARG /* result = header addr */ + st TMPREG3,[ALLOCPTR] /* store pointer to data */ + st TMPREG1,[ALLOCPTR+4] + inc 8,ALLOCPTR /* allocptr += 2 */ + CONTINUE + +3: /* here we do off-line allocation for big arrays */ + mov STDLINK,PC + ba set_request + set REQ_ALLOC_ARRAY,TMPREG3 /* (delayslot) */ + +/* create_r : int -> realarray + * Create a new realarray. + */ +ML_CODE_HDR(create_r_a) + CHECKLIMIT(create_r_a_limit) + sra STDARG,1,TMPREG2 /* tmp2 = length (untagged int) */ + sll TMPREG2,1,TMPREG2 /* tmp2 = length in words */ + cmp TMPREG2,SMALL_OBJ_SZW /* is this a small object? */ + bgt 1f + nop + /* Allocate the data object */ + sll TMPREG2,TAG_SHIFTW,TMPREG1 /* build data desc in tmp1 */ + or TMPREG1,MAKE_TAG(DTAG_raw64),TMPREG1 +#ifdef ALIGN_REALDS + or ALLOCPTR,0x4,ALLOCPTR /* desc is unaliged */ +#endif + st TMPREG1,[ALLOCPTR] /* store the data descriptor */ + inc 4,ALLOCPTR /* allocptr++ */ + mov ALLOCPTR,TMPREG3 /* tmp3 = data object */ + sll TMPREG2,2,TMPREG2 /* tmp2 = length in bytes */ + add ALLOCPTR,TMPREG2,ALLOCPTR /* allocptr += length */ + /* Allocate the header object */ + set DESC_real64arr,TMPREG1 + st TMPREG1,[ALLOCPTR] /* header descriptor */ + inc 4,ALLOCPTR /* allocptr++ */ + st TMPREG3,[ALLOCPTR] /* header data field */ + st STDARG,[ALLOCPTR+4] /* header length field */ + mov ALLOCPTR,STDARG /* stdarg = header object */ + inc 8,ALLOCPTR /* allocptr += 2 */ + CONTINUE + +1: /* off-line allocation of big realarrays */ + mov STDLINK,PC + ba set_request + set REQ_ALLOC_REALDARRAY,TMPREG3 /* (delayslot) */ + +/* create_b : int -> bytearray + * Create a bytearray of the given length. + */ +ML_CODE_HDR(create_b_a) + CHECKLIMIT(create_b_a_limit) + sra STDARG,1,TMPREG2 /* tmp2 = length (sparc int) */ + add TMPREG2,3,TMPREG2 /* tmp2 = length in words */ + sra TMPREG2,2,TMPREG2 + cmp TMPREG2,SMALL_OBJ_SZW /* is this a small object? */ + bgt 1f + nop + /* Allocate the data object */ + sll TMPREG2,TAG_SHIFTW,TMPREG1 /* build data desc in tmp1 */ + or TMPREG1,MAKE_TAG(DTAG_raw),TMPREG1 + st TMPREG1,[ALLOCPTR] /* store the data descriptor */ + inc 4,ALLOCPTR /* allocptr++ */ + mov ALLOCPTR,TMPREG3 /* tmp3 = data object */ + sll TMPREG2,2,TMPREG2 /* tmp2 = length in bytes */ + add ALLOCPTR,TMPREG2,ALLOCPTR /* allocptr += length */ + /* Allocate the header object */ + set DESC_word8arr,TMPREG1 /* header descriptor */ + st TMPREG1,[ALLOCPTR] + inc 4,ALLOCPTR /* allocptr++ */ + st TMPREG3,[ALLOCPTR] /* header data field */ + st STDARG,[ALLOCPTR+4] /* header length field */ + mov ALLOCPTR,STDARG /* stdarg = header object */ + inc 8,ALLOCPTR /* allocptr += 2 */ + CONTINUE + +1: /* here we do off-line allocation for big bytearrays */ + mov STDLINK,PC + ba set_request + set REQ_ALLOC_BYTEARRAY,TMPREG3 /* (delayslot) */ + +/* create_s : int -> string + * Create a string of the given length (> 0). + */ +ML_CODE_HDR(create_s_a) + CHECKLIMIT(create_s_a_limit) + sra STDARG,1,TMPREG2 /* tmp2 = length (sparc int) */ + add TMPREG2,4,TMPREG2 /* tmp2 = length in words */ + /* (including zero at end). */ + sra TMPREG2,2,TMPREG2 + cmp TMPREG2,SMALL_OBJ_SZW /* is this a small object? */ + bgt 1f + nop + /* Allocate the data object */ + sll TMPREG2,TAG_SHIFTW,TMPREG1 /* build data desc in tmp1 */ + or TMPREG1,MAKE_TAG(DTAG_raw),TMPREG1 + st TMPREG1,[ALLOCPTR] /* store the data descriptor */ + inc 4,ALLOCPTR /* allocptr++ */ + mov ALLOCPTR,TMPREG3 /* tmp3 = data object */ + sll TMPREG2,2,TMPREG2 /* tmp2 = length in bytes */ + add ALLOCPTR,TMPREG2,ALLOCPTR /* allocptr += length */ + st %g0,[ALLOCPTR-4] /* store 0 in last word of data */ + /* Allocate the header object */ + set DESC_string,TMPREG1 /* header descriptor */ + st TMPREG1,[ALLOCPTR] + inc 4,ALLOCPTR /* allocptr++ */ + st TMPREG3,[ALLOCPTR] /* header data field */ + st STDARG,[ALLOCPTR+4] /* header length field */ + mov ALLOCPTR,STDARG /* stdarg = header object */ + inc 8,ALLOCPTR /* allocptr += 2 */ + CONTINUE + +1: /* here we do off-line allocation for big strings */ + mov STDLINK,PC + ba set_request + set REQ_ALLOC_STRING,TMPREG3 /* (delayslot) */ + +/* create_v_a : (int * 'a list) -> 'a vector + * Create a vector with elements taken from a list. + * NOTE: the front-end ensures that list cannot be nil. + */ +ML_CODE_HDR(create_v_a) + CHECKLIMIT(create_v_a_limit) + ld [STDARG],TMPREG1 /* tmp1 = length (tagged int) */ + sra TMPREG1,1,TMPREG2 /* tmp2 = length (untagged int) */ + cmp TMPREG2,SMALL_OBJ_SZW /* is this a small object? */ + bgt 1f + nop + /* allocate and initialize data object */ + sll TMPREG2,TAG_SHIFTW,TMPREG2 /* build descriptor in TMPREG2 */ + or TMPREG2,MAKE_TAG(DTAG_vec_data),TMPREG2 + st TMPREG2,[ALLOCPTR] /* store the descriptor */ + inc 4,ALLOCPTR /* allocptr++ */ + ld [STDARG+4],TMPREG2 /* tmp2 = list */ + mov ALLOCPTR,STDARG /* stdarg = data obj */ +2: /* loop */ + ld [TMPREG2],TMPREG3 /* tmp3 = hd(tmp2) */ + ld [TMPREG2+4],TMPREG2 /* tmp2 = tl(tmp2) */ + st TMPREG3,[ALLOCPTR] /* store element */ + cmp TMPREG2,ML_nil /* if (tmp2 <> nil) goto loop */ + bne 2b + inc 4,ALLOCPTR /* allocptr++ (delay slot) */ + /* end loop */ + /* allocate header object */ + set DESC_polyvec,TMPREG3 /* descriptor in TMPREG3 */ + st TMPREG3,[ALLOCPTR] /* header descriptor */ + inc 4,ALLOCPTR /* allocptr++ */ + st STDARG,[ALLOCPTR] /* header data field */ + st TMPREG1,[ALLOCPTR+4] /* header length field */ + mov ALLOCPTR,STDARG /* result = header object */ + inc 8,ALLOCPTR /* allocptr += 2 */ + CONTINUE + +1: /* off-line allocation of big vectors */ + mov STDLINK,PC + ba set_request + set REQ_ALLOC_VECTOR,TMPREG3 /* (delayslot) */ + + +/* floor : real -> int + * Return the floor of the argument ; do not check for out-of-range (it's + * the ML code's responsibility to check before calling. */ +ML_CODE_HDR(floor_a) + ld [STDARG],%f0 /* fetch arg into %f0, %f1. */ + ld [STDARG+4],%f1 + ld [STDARG],TMPREG2 /* tmpreg2 gets high word. */ + tst TMPREG2 /* negative ? */ + blt 1f + nop + /* handle positive case */ + fdtoi %f0,%f2 /* cvt to int (round towards 0) */ + st %f2,[%sp+FLOOR_OFFSET] + ld [%sp+FLOOR_OFFSET],TMPREG2 /* tmpreg2 gets int result (via stack temp). */ + add TMPREG2,TMPREG2,TMPREG2 + add TMPREG2,1,STDARG + CONTINUE + +1: /* handle negative case. */ + fdtoi %f0,%f2 /* cvt to int (round towards 0) */ + st %f2,[%sp+FLOOR_OFFSET] + fitod %f2,%f4 /* cvt back to real to check for fraction */ + fcmpd %f0,%f4 /* same value? */ + ld [%sp+FLOOR_OFFSET],TMPREG2 /* tmpreg2 gets int result (via stack temp). */ + fbe 2f /* check result of fcmpd */ + nop + dec TMPREG2 /* push one lower */ +2: /* cvt result to ML int, and continue */ + add TMPREG2,TMPREG2,TMPREG2 + add TMPREG2,1,STDARG + CONTINUE + +/* logb : real -> int + * Extract and unbias the exponent. + * The IEEE bias is 1023. + */ +ML_CODE_HDR(logb_a) + ld [STDARG],TMPREG2 /* extract exponent. */ + srl TMPREG2,19,TMPREG2 + and TMPREG2,2047*2,TMPREG2 /* unbias and cvt to ML int. */ + sub TMPREG2,2045,STDARG /* 2(n-1023)+1 == 2n-2045. */ + CONTINUE + + +/* scalb : (real * int) -> real + * Scale the first argument by 2 raised to the second argument. Raise + * Float("underflow") or Float("overflow") as appropriate. + */ +ML_CODE_HDR(scalb_a) + CHECKLIMIT(scalb_a_limit) + ld [STDARG+4],TMPREG1 /* tmpreg1 gets scale (second arg) */ + sra TMPREG1,1,TMPREG1 /* cvt scale to sparc int */ + ld [STDARG],STDARG /* stdarg gets real (first arg) */ + ld [STDARG],TMPREG4 /* tmpreg4 gets high word of real value. */ + set 0x7ff00000,TMPREG2 /* tmpreg2 gets exponent mask. */ + andcc TMPREG4,TMPREG2,TMPREG3 /* extract exponent into tmpreg3. */ + be 1f /* if 0 then return same */ + nop + srl TMPREG3,20,TMPREG3 /* cvt exp to int (delay slot). */ + addcc TMPREG3,TMPREG1,TMPREG1 /* tmpreg1 = exp + scale */ + ble under /* if new exp <= 0 then underflow */ + nop + cmp TMPREG1,2047 /* if new exp >= 2047 then overflow */ + bge over + nop + andn TMPREG4,TMPREG2,TMPREG4 /* mask out old exponent. */ + sll TMPREG1,20,TMPREG1 /* shift new exp to exponent position. */ + or TMPREG4,TMPREG1,TMPREG4 /* set new exponent. */ + ld [STDARG+4],TMPREG1 /* tmpreg1 gets low word of real value. */ +7: +#ifdef ALIGN_REALDS + or ALLOCPTR,0x4,ALLOCPTR /* desc is unaliged */ +#endif + st TMPREG4,[ALLOCPTR+4] /* allocate the new real value */ + st TMPREG1,[ALLOCPTR+8] + set DESC_reald,TMPREG1 + st TMPREG1,[ALLOCPTR] + add ALLOCPTR,4,STDARG /* set result. */ + inc 12,ALLOCPTR /* allocptr += 3 */ +1: CONTINUE + +over: /* handle overflow */ + t ST_INT_OVERFLOW /* generate an Overflow exn. We do this */ + /* never get here */ /* via a trap to produce a SIGOVFL */ + +under: /* handle underflow */ + set 0,TMPREG4 + set 0,TMPREG1 + ba 7b + nop + +/* try_lock : spin_lock -> bool + * low-level test-and-set style primitive for mutual-exclusion among + * processors. + */ +ML_CODE_HDR(try_lock_a) +#if (MAX_PROCS > 1) + ??? +#else (MAX_PROCS == 1) + ld [STDARG],TMPREG1 /* load previous value into tmpreg1 */ + set ML_false,TMPREG4 /* ML_false */ + st TMPREG4,[STDARG] /* store ML_false into the lock */ + mov TMPREG1,STDARG /* return previous value of lock */ + CONTINUE +#endif + +/* unlock : releases a spin lock + */ +ML_CODE_HDR(unlock_a) +#if (MAX_PROCS > 1) + ??? +#else (MAX_PROCS == 1) + set ML_true,TMPREG1 /* store ML_true ... */ + st TMPREG1,[STDARG] /* into the lock */ + set ML_unit,STDARG /* return unit */ + CONTINUE +#endif + + +/* SetFSR: + * Load the floating-point status register with the given word. + */ +ENTRY(SetFSR) + set fsrtmp,%o1 + st %o0,[%o1] + retl + ld [%o1],%fsr /* (delay slot) */ + DATA +fsrtmp: .word 0 + TEXT + + +/* void FlushICache (char *addr, int nbytes) + */ +ENTRY(FlushICache) + and %o1,0x1F,%o2 /* m <- (nbytes % (32-1)) >> 2 (use %o2 for m) */ + srl %o2,2,%o2 + srl %o1,5,%o1 /* i <- (nbytes >> 5) */ +/* FLUSH4 implements: if (m > 0) { FLUSH addr; addr += 4; m--;} else goto L_test */ +#define FLUSH4 \ + tst %o2; \ + ble L_test; \ + nop; \ + iflush %o0; \ + inc 4,%o0; \ + dec 1,%o2 + FLUSH4 + FLUSH4 + FLUSH4 + FLUSH4 + FLUSH4 + FLUSH4 + FLUSH4 + /* addr is 32-byte aligned here */ +L_test: + tst %o1 + be L_exit + nop +L_loop: /* flush 32 bytes per iteration */ + iflush %o0 + iflush %o0+8 + iflush %o0+16 + iflush %o0+24 + deccc 1,%o1 /* if (--i > 0) goto L_loop */ + bg L_loop + inc 32,%o0 /* addr += 32 (delay slot) */ +L_exit: + retl + nop diff --git a/base/runtime/mach-dep/Unsupported/ALPHA32.prim.asm b/base/runtime/mach-dep/Unsupported/ALPHA32.prim.asm new file mode 100644 index 0000000..52d8be7 --- /dev/null +++ b/base/runtime/mach-dep/Unsupported/ALPHA32.prim.asm @@ -0,0 +1,741 @@ +/* ALPHA32.prim.asm + * + * ALPHA32 runtime code for ML. + * + * ML register usage follows: + * + * register C callee ML use + * save? + * -------- --------- ------- + * $0 no standard arg + * $1 no standard continuation + * $2 no standard closure + * $3 no standard link register + * $4 no base address register + * $5-$8 no miscellaneous registers + * $9 yes heap limit pointer + * $10 yes var pointer + * $11 yes heap-limit comparison flag, and arith temporary + * $12 yes store list pointer + * $13 yes allocation pointer + * $14 yes exception continuation + * $15 yes miscellaneous register + * $16-$25 no miscellaneous registers + * $26 no ml-pc + * $27 no miscellaneous register + * $28 no assembler temporary + * $29 - reserved for C (global pointer) + * $30 - reserved for C (stack pointer) + * $31 - constant zero + */ + +#include +#include "ml-base.h" +#include "asm-base.h" +#include "ml-values.h" +#include "tags.h" +#include "ml-request.h" +#include "ml-limits.h" +#include "mlstate-offsets.h" /** this file is generated **/ + + +#define STDARG $0 /* standard arg (ml_arg) */ +#define STDCONT $1 /* standard continuation (ml_cont) */ +#define STDCLOS $2 /* standard closure (ml_closure) */ +#define STDLINK $3 /* ptr to just-entered std function (ml_link) */ +#define MISCREG0 $5 +#define MISCREG1 $6 +#define MISCREG2 $7 +#define LIMITPTR $9 /* end of heap - 4096 (ml_limitptr) */ +#define VARPTR $10 /* per-thread var pointer (ml_varptr) */ +#define NEEDGC $11 /* arith temp; also, heap-limit comparison flag */ +#define STOREPTR $12 /* store pointer (ml_storeptr) */ +#define ALLOCPTR $13 /* freespace pointer (ml_allocptr) */ +#define EXNCONT $14 /* exception handler (ml_exncont) */ +#define PC $26 /* address to return/goto in ML */ +/* assembler-temp $28 */ +/* globalptr $29 reserved for C and assembler */ +/* stackptr $30 stack pointer */ +/* zero $31 zero */ + +#define CRESULT $0 +#define CARG0 $16 + +#define ATMP1 $20 +#define ATMP2 $21 +#define ATMP3 $22 +#define ATMP4 $23 +#define PTRTMP $24 + +/* The ML stack frame has the following layout (set up by restoreregs): + * +-------------------+ + * sp+124 | ml_divlu | + * +-------------------+ + * sp+120 | ml_divlv | + * +-------------------+ + * sp+116 | pseudo reg 2 | + * +-------------------+ + * sp+112 | pseudo reg 1 | + * +-------------------+ + * sp+104: | temporary storage | temporary use by floating + * +-------------------+ point code. + * sp+96: | temporary storage | + * +-------------------+ + * sp+88: | saved $30 | + * +-------------------+ + * sp+80: | saved $29 | + * +-------------------+ + * sp+72: | saved $26 | + * +-------------------+ + * sp+64: | saved $15 | + * +-------------------+ + * sp+56: | saved $14 | + * +-------------------+ + * sp+48: | saved $13 | + * +-------------------+ + * sp+40: | saved $12 | + * +-------------------+ + * sp+32: | saved $11 | + * +-------------------+ + * sp+24: | saved $10 | + * +-------------------+ + * sp+16: | saved $9 | + * +-------------------+ + * sp+8: | addr of saveregs | + * +-------------------+ + * sp: | ptr to MLState | + * +-------------------+ + */ + +#define ML_FRAMESIZE 4096 +#define MLSTATE_OFFSET 0 +#define STARTGC_OFFSET 8 +#define REGSAVE_OFFSET 16 +#define PSEUDOREG_OFFSET 112 +#define ML_DIVLV_OFFSET 120 +#define ML_DIVLU_OFFSET 124 + +#ifdef ALLIGN_ALLOCATION +# define ALLOCALIGN \ + addl ALLOCPTR,4,ALLOCPTR; \ + bic ALLOCPTR,4,ALLOCPTR; +#else +# define ALLOCALIGN +#endif + +#define CONTINUE \ + ALLOCALIGN \ + cmplt LIMITPTR,ALLOCPTR,NEEDGC; \ + jmp (STDCONT); + +#define CHECKLIMIT \ + mov STDLINK, PC; \ + beq NEEDGC,3f; \ + br saveregs; \ + 3: + + + DATA + .align 3 +one_half: .t_floating 0.5 +fsr_bits: .quad 0x8a70000000000000 + + TEXT + + +/* sigh_return_a: + * The return continuation for the ML signal handler. + */ +ML_CODE_HDR(sigh_return_a) + mov REQ_SIG_RETURN,ATMP1 + mov ML_unit, STDLINK + mov ML_unit, STDCLOS + mov ML_unit, PC + br set_request + + +/* sigh_resume: + * Resume execution at the point at which a handler trap occurred. This is a + * standard two-argument function, thus the closure is in ml_cont (%stdcont). + */ +ENTRY(sigh_resume) + mov REQ_SIG_RESUME,ATMP1 + br set_request + +/* pollh_return_a + * The return continuation for the ML poll handler. + */ +ML_CODE_HDR(pollh_return_a) + mov REQ_POLL_RESUME,ATMP1 + mov ML_unit, STDLINK + mov ML_unit, STDCLOS + mov ML_unit, PC + br set_request + +/* pollh_resume: + * Resume execution at the point at which a poll event occurred. + */ +ENTRY(pollh_resume) + mov REQ_POLL_RETURN,ATMP1 + br set_request + +ML_CODE_HDR(handle_a) + mov REQ_EXN,ATMP1 + mov STDLINK, PC + br set_request + +ML_CODE_HDR(return_a) + mov REQ_RETURN,ATMP1 + mov ML_unit, STDLINK + mov ML_unit, STDCLOS + mov ML_unit, PC + br set_request + +ENTRY(request_fault) + mov REQ_FAULT,ATMP1 + mov STDLINK, PC + br set_request + +/* bind_cfun : (string * string) -> c_function + */ +ML_CODE_HDR(bind_cfun_a) + CHECKLIMIT + mov REQ_BIND_CFUN,ATMP1 + br set_request + +ML_CODE_HDR(build_literals_a) + CHECKLIMIT + mov REQ_BUILD_LITERALS,ATMP1 + br set_request + +ML_CODE_HDR(callc_a) + CHECKLIMIT + mov REQ_CALLC,ATMP1 + br set_request + + BEGIN_PROC(saveregs) +ENTRY(saveregs) + mov REQ_GC,ATMP1 + + /* fall through */ + +set_request: + ldq PTRTMP,MLSTATE_OFFSET(sp) /* get the ML state ptr */ + ldq NEEDGC,VProcOffMSP(PTRTMP) /* use NEEDGC for VProcPtr */ + stl zero,InMLOffVSP(NEEDGC) /* note that we have left ML */ + stl ALLOCPTR,AllocPtrOffMSP(PTRTMP) + stl LIMITPTR,LimitPtrOffMSP(PTRTMP) + stl STOREPTR,StorePtrOffMSP(PTRTMP) + stl STDLINK,LinkRegOffMSP(PTRTMP) + stl PC,PCOffMSP(PTRTMP) + stl STDARG,StdArgOffMSP(PTRTMP) + stl STDCLOS,StdClosOffMSP(PTRTMP) + stl STDCONT,StdContOffMSP(PTRTMP) + stl VARPTR,VarPtrOffMSP(PTRTMP) + stl EXNCONT,ExnPtrOffMSP(PTRTMP) + mov ATMP1,CRESULT /* return request */ + stl MISCREG0,Misc0OffMSP(PTRTMP) + stl MISCREG1,Misc1OffMSP(PTRTMP) + stl MISCREG2,Misc2OffMSP(PTRTMP) + /* fall through */ + .end saveregs + + /* restore callee-save C registers */ +restore_c_regs: + ldq $30,REGSAVE_OFFSET+72(sp) + ldq $29,REGSAVE_OFFSET+64(sp) + ldq $26,REGSAVE_OFFSET+56(sp) + ldq $15,REGSAVE_OFFSET+48(sp) + ldq $14,REGSAVE_OFFSET+40(sp) + ldq $13,REGSAVE_OFFSET+32(sp) + ldq $12,REGSAVE_OFFSET+24(sp) + ldq $11,REGSAVE_OFFSET+16(sp) + ldq $10,REGSAVE_OFFSET+8(sp) + ldq $9 ,REGSAVE_OFFSET(sp) + addq sp,ML_FRAMESIZE /* discard the stack frame */ + jmp ($26) /* return to run_ml() */ + + + BEGIN_PROC(restoreregs) +ENTRY(restoreregs) + subq sp,ML_FRAMESIZE /* allocate a stack frame */ + .frame sp,ML_FRAMESIZE,zero + .mask 0xe000fe00,0 + /* save the C registers */ + lda $3,saveregs + stq CARG0,MLSTATE_OFFSET(sp) /* save MLState ptr for return to C */ + stq $3,STARTGC_OFFSET(sp) /* so ML can find saveregs! */ + lda $3,ml_divlv /* address of ml_divlv */ + stl $3,ML_DIVLV_OFFSET(sp) + lda $3,ml_divlu /* address of ml_divlu */ + stl $3,ML_DIVLU_OFFSET(sp) + stq $30,REGSAVE_OFFSET+72(sp) + stq $29,REGSAVE_OFFSET+64(sp) + stq $26,REGSAVE_OFFSET+56(sp) + stq $15,REGSAVE_OFFSET+48(sp) + stq $14,REGSAVE_OFFSET+40(sp) + stq $13,REGSAVE_OFFSET+32(sp) + stq $12,REGSAVE_OFFSET+24(sp) + stq $11,REGSAVE_OFFSET+16(sp) + stq $10,REGSAVE_OFFSET+8(sp) + stq $9,REGSAVE_OFFSET(sp) + mov CARG0,PTRTMP /* put MLState ptr in ptrtmp */ + + ldl ALLOCPTR,AllocPtrOffMSP(PTRTMP) + ldl LIMITPTR,LimitPtrOffMSP(PTRTMP) + ldl STOREPTR,StorePtrOffMSP(PTRTMP) + ldl NEEDGC,VProcOffMSP(PTRTMP) /* use NEEDGC for VProc Ptr */ + mov 1,ATMP1 +.set noreorder /* the order here is important */ + stl ATMP1,InMLOffVSP(NEEDGC) /* note that we are entering ML code */ + ldl STDARG,StdArgOffMSP(PTRTMP) + ldl STDCONT,StdContOffMSP(PTRTMP) + ldl STDCLOS,StdClosOffMSP(PTRTMP) + ldl EXNCONT,ExnPtrOffMSP(PTRTMP) + ldl MISCREG0,Misc0OffMSP(PTRTMP) + ldl MISCREG1,Misc1OffMSP(PTRTMP) + ldl MISCREG2,Misc2OffMSP(PTRTMP) + ldl STDLINK,LinkRegOffMSP(PTRTMP) + ldl PC,PCOffMSP(PTRTMP) + ldl VARPTR,VarPtrOffMSP(PTRTMP) + /* check for pending signals */ + ldl PTRTMP,NPendingSysOffVSP(NEEDGC) +.set noat + ldl $28,SigsRecvOffVSP(NEEDGC) + ldl PTRTMP,SigsHandledOffVSP(NEEDGC) + cmpeq $28,PTRTMP,PTRTMP +.set at + bne PTRTMP,pending_sigs + .end restoreregs + .ent ml_go +ENTRY(ml_go) + ALLOCALIGN + cmplt LIMITPTR,ALLOCPTR,NEEDGC + jmp (PC) /* jump/return to ML code */ + .end ml_go + +pending_sigs: /* there are pending signals */ + /* check if we are currently handling a signal */ + ldl PTRTMP,InSigHandlerOffVSP(NEEDGC) + bne PTRTMP,ml_go + /* note that a handler trap is pending */ + mov 1,PTRTMP + stl PTRTMP,HandlerPendingOffVSP(NEEDGC) + mov ALLOCPTR,LIMITPTR + br ml_go +.set reorder + + +/* SaveFPRegs: + * + * void SaveFPRegs (Word_t *p) + * + * Save the C callee-save FP registers starting at the given address. + */ + TEXT + BEGIN_PROC(SaveFPRegs) +ENTRY(SaveFPRegs) + stt $f2,0(a0) + stt $f3,8(a0) + stt $f4,16(a0) + stt $f5,24(a0) + stt $f6,32(a0) + stt $f7,40(a0) + stt $f8,48(a0) + stt $f9,56(a0) + jmp (ra) /* return */ + END_PROC(SaveFPRegs) + +/* RestoreFPRegs: + * + * void RestoreFPRegs (Word_t *p) + * + * Restore the C callee-save FP registers from the given address. + */ + BEGIN_PROC(RestoreFPRegs) +ENTRY(RestoreFPRegs) /* floats address passed as parm */ + ldt $f2,0(a0) /* retrieve float registers */ + ldt $f3,8(a0) + ldt $f4,16(a0) + ldt $f5,24(a0) + ldt $f6,32(a0) + ldt $f7,40(a0) + ldt $f8,48(a0) + ldt $f9,56(a0) + jmp (ra) + END_PROC(RestoreFPRegs) + + +/** Primitive object allocation routines **/ + +/* array : (int * 'a) -> 'a array + * Allocate and initialize a new array. This can cause GC. + */ +ML_CODE_HDR(array_a) + CHECKLIMIT + + ldl ATMP1,0(STDARG) /* tmp1 := length in words */ + sra ATMP1,1,ATMP2 /* tmp2 := length (untagged) */ + subl ATMP2,SMALL_OBJ_SZW,ATMP3 /* is this a small object? */ + bgt ATMP3,2f /* branch if large object */ + + ldl STDARG,4(STDARG) /* initial value */ + sll ATMP2,TAG_SHIFTW,ATMP3 /* build descriptor in tmp3 */ + or ATMP3,MAKE_TAG(DTAG_arr_data),ATMP3 + stl ATMP3,0(ALLOCPTR) /* store descriptor */ + addq ALLOCPTR,4 /* allocptr++ */ + mov ALLOCPTR,ATMP3 /* array data ptr in tmp3 */ +1: + stl STDARG,0(ALLOCPTR) /* initialize array */ + subl ATMP2, 1, ATMP2 + addq ALLOCPTR,4 + bne ATMP2,1b + + /* allocate array header */ + mov DESC_polyarr,ATMP2 /* descriptor in tmp2 */ + stl ATMP2,0(ALLOCPTR) /* store descriptor */ + addq ALLOCPTR, 4 /* allocptr++ */ + mov ALLOCPTR,STDARG /* result = header addr */ + stl ATMP3, 0(ALLOCPTR) /* store pointer to data */ + stl ATMP1, 4(ALLOCPTR) + addq ALLOCPTR,8 + CONTINUE + +2: /* off-line allocation of big arrays */ + mov REQ_ALLOC_ARRAY,ATMP1 + mov STDLINK, PC + br set_request + +/* create_r : int -> realarray + * Create a new realarray. + */ +ML_CODE_HDR(create_r_a) + CHECKLIMIT + + sra STDARG,1,ATMP2 /* atmp2 = length (untagged int) */ + sll ATMP2,1,ATMP2 /* atmp2 = length in words */ + subl ATMP2,SMALL_OBJ_SZW,ATMP3 + bgt ATMP3,1f /* is this a small object? */ + /* allocate the data object */ + sll ATMP2,TAG_SHIFTW,ATMP1 /* build data descriptor in tmp1 */ + or ATMP1, MAKE_TAG(DTAG_raw64),ATMP1 +#ifdef ALIGN_REALDS + or ALLOCPTR,4,ALLOCPTR /* tag is unaligned, so that the */ + /* first element is 8-byte aligned */ +#endif + stl ATMP1,0(ALLOCPTR) /* store the descriptor */ + addq ALLOCPTR,4 /* allocptr++ */ + mov ALLOCPTR,ATMP3 /* tmp3 = data object */ + sll ATMP2, 2, ATMP2 /* tmp2 = length in bytes */ + addq ALLOCPTR, ATMP2,ALLOCPTR /* allocptr += length */ + /* allocate the header object */ + mov DESC_real64arr,ATMP1 + stl ATMP1,0(ALLOCPTR) /* header descriptor */ + addq ALLOCPTR,4 /* allocptr++ */ + stl ATMP3,0(ALLOCPTR) /* header data field */ + stl STDARG,4(ALLOCPTR) /* header length field */ + mov ALLOCPTR,STDARG /* stdarg = header object */ + addq ALLOCPTR,8 + CONTINUE + +1: /* off-line allocation of big realarrays */ + mov REQ_ALLOC_REALDARRAY,ATMP1 + mov STDLINK, PC + br set_request + +/* create_b : int -> bytearray + * Create a bytearray of the given length. + */ +ML_CODE_HDR(create_b_a) + CHECKLIMIT + + sra STDARG,1,ATMP2 /* atmp2 = length (untagged int) */ + addq ATMP2,3,ATMP2 /* atmp2 = length in words */ + sra ATMP2,2 + subq ATMP2,SMALL_OBJ_SZW,ATMP3 /* is this a small object? */ + bgt ATMP3,1f + /* allocate the data object */ + sll ATMP2,TAG_SHIFTW,ATMP1 /* build descriptor in atmp1 */ + or ATMP1,MAKE_TAG(DTAG_raw),ATMP1 + stl ATMP1,0(ALLOCPTR) /* store the data descriptor */ + addq ALLOCPTR,4 /* allocptr++ */ + mov ALLOCPTR,ATMP3 /* tmp3 = data object */ + sll ATMP2,2 /* tmp2 = length in bytes */ + addq ALLOCPTR,ATMP2,ALLOCPTR /* allocptr += total length */ + /* allocate the header object */ + mov DESC_word8arr,ATMP1 /* header descriptor */ + stl ATMP1,0(ALLOCPTR) + addq ALLOCPTR,4 /* allocptr++ */ + stl ATMP3,0(ALLOCPTR) /* header data field */ + stl STDARG,4(ALLOCPTR) /* header length field */ + mov ALLOCPTR,STDARG /* stdarg = header object */ + addq ALLOCPTR,8 /* allocptr += 2 */ + CONTINUE +1: /* off-line allocation of big bytearrays */ + mov REQ_ALLOC_BYTEARRAY,ATMP1 + mov STDLINK, PC + br set_request + +/* create_s : int -> string + * Create a string of the given length (assume length >0). + */ +ML_CODE_HDR(create_s_a) + CHECKLIMIT + + sra STDARG,1,ATMP2 /* tmp2 = length (untagged int) */ + addq ATMP2,4,ATMP2 /* atmp2 = length in words */ + sra ATMP2,2 + subq ATMP2,SMALL_OBJ_SZW,ATMP3 + bgt ATMP3,1f /* is this a small object? */ + + sll ATMP2,TAG_SHIFTW,ATMP1 /* build descriptor in atmp3 */ + or ATMP1,MAKE_TAG(DTAG_raw),ATMP1 + stl ATMP1,0(ALLOCPTR) /* store the data descriptor */ + addq ALLOCPTR,4 /* allocptr++ */ + mov ALLOCPTR,ATMP3 /* tmp3 = data object */ + sll ATMP2,2 /* tmp2 = length in bytes */ + addq ALLOCPTR,ATMP2,ALLOCPTR /* allocptr += total length */ + stl zero,-4(ALLOCPTR) /* store zero in last word */ + /* Allocate the header object */ + mov DESC_string, ATMP1 /* header descriptor */ + stl ATMP1,0(ALLOCPTR) + addq ALLOCPTR,4 /* allocptr++ */ + stl ATMP3,0(ALLOCPTR) /* header data field */ + stl STDARG,4(ALLOCPTR) /* heder length field */ + mov ALLOCPTR,STDARG /* stdarg = header object */ + addq ALLOCPTR,8 + CONTINUE +1: /* off-line allocation of big strings */ + mov REQ_ALLOC_STRING,ATMP1 + mov STDLINK, PC + br set_request + +/* create_v_a : (int * 'a list) -> 'a vector + * Create a vector with elements taken from a list. + * NOTE: the front-end ensures that list cannot be nil. + */ +ML_CODE_HDR(create_v_a) + CHECKLIMIT + + ldl ATMP1,0(STDARG) /* tmp1 := length (tagged int) */ + sra ATMP1,1,ATMP2 /* tmp2 := length (untagged) */ + subq ATMP2,SMALL_OBJ_SZW,ATMP3 + bgt ATMP3,1f /* is this a small object? */ + + sll ATMP2,TAG_SHIFTW,ATMP2 /* build descriptor in tmp2 */ + or ATMP2,MAKE_TAG(DTAG_vec_data),ATMP2 + stl ATMP2,0(ALLOCPTR) /* store descriptor */ + addq ALLOCPTR,4 /* allocptr++ */ + ldl ATMP2,4(STDARG) /* atmp2 := list */ + mov ALLOCPTR,STDARG /* stdarg := vector */ +2: /* loop: */ + ldl ATMP3,0(ATMP2) /* tmp3 := hd(tmp2) */ + ldl ATMP2,4(ATMP2) /* tmp2 := tl(tmp2) */ + stl ATMP3,0(ALLOCPTR) /* store word in vector */ + addq ALLOCPTR,4 /* allocptr++ */ + cmpeq ATMP2,ML_nil,ATMP3 /* tmp3 := 1 if tmp2=ML_nil */ + beq ATMP3, 2b + /* allocate header object */ + mov DESC_polyvec,ATMP3 /* descriptor in tmp3 */ + stl ATMP3,0(ALLOCPTR) /* store descriptor */ + addq ALLOCPTR,4 /* allocptr++ */ + stl STDARG,0(ALLOCPTR) /* header data field */ + stl ATMP1,4(ALLOCPTR) /* header length */ + mov ALLOCPTR, STDARG /* result = header object */ + addq ALLOCPTR, 8 /* allocptr += 2 */ + CONTINUE + +1: /* off-line allocation for large vectors */ + mov REQ_ALLOC_VECTOR,ATMP1 + mov STDLINK, PC + br set_request + +/* Floating exceptions raised (assuming ROP's are never passed to functions): + * DIVIDE BY ZERO - (div) + * OVERFLOW/UNDERFLOW - (add,div,sub,mul) as appropriate + * + * floor raises integer overflow if the float is out of 32-bit range, + * Does not check for out-of-range ; it's up to the ML code to do that first. */ +#ifdef NEW_FLOOR + DATA + .align 3 +floor_MAXFLOAT: .quad 0x4330080000000000 + + .text + +ML_CODE_HDR(floor_a) + /* check for overflow */ + ldgp gp, 0(STDLINK) + ldt $f0, 0(STDARG) + + subq $30, 16, $30 /* allocate stack space */ + /* Do floor; neat thing is that this works + ** for both +ve and -ve floating point numbers! + */ + ldt $f1, floor_MAXFLOAT + addtm $f0, $f1, $f1 + stt $f1, 0($30) + ldl ATMP1, 0($30) + addl ATMP1, ATMP1, ATMP1 + addl ATMP1, 1, STDARG + + addq $30, 16, $30 + CONTINUE + +#else /* !NEW_FLOOR */ + DATA + .align 3 +floor_HALF: .t_floating 0.5 + + .text + +ML_CODE_HDR(floor_a) + ldgp gp, 0(STDLINK) + ldt $f0, 0(STDARG) /* get argument */ + subq $30, 16, $30 /* allocate stack space */ + fblt $f0, floor_negative_arg + +floor_positive_arg: + cvttqc $f0, $f1 + stt $f1, 0($30) + ldl ATMP1, 0($30) + addl ATMP1, ATMP1, ATMP1 + addl ATMP1, 1, STDARG + + addq $30, 16, $30 + CONTINUE + + +floor_negative_arg: + + /* cvttqm (x) = cvttq (2*x - 0.5) / 2 */ + /* cvttq (x-0.5) loses for odd integers which IEEE round to evens */ + ldt $f1, floor_HALF + addt $f0, $f0, $f0 + subt $f0, $f1, $f0 + cvttq $f0, $f0 + stt $f0, 0($30) + ldl STDARG, 0($30) + /* STDARG now holds either 2*floor(x) or 2*floor(x)+1. */ + /* convert to ml int by setting least bit! */ + bis STDARG, 1, STDARG + + addq $30, 16, $30 + CONTINUE + +#endif + + +ML_CODE_HDR(logb_a) + ldq STDARG,(STDARG) /* get argument */ + srl STDARG,52 /* throw out 52 low bits */ + and STDARG,0x07ff /* clear all but 11 low bits */ + subq STDARG,1023 /* subtract 1023 */ + sll STDARG,1 /* make room for tag bit */ + addq STDARG,1 /* add the tag bit */ + CONTINUE + +ML_CODE_HDR(scalb_a) + CHECKLIMIT + ldl PTRTMP,0(STDARG) /* address of float */ + ldq ATMP2,0(PTRTMP) /* get float */ + ldl ATMP1,4(STDARG) /* get tagged n */ + sra ATMP1,1,ATMP1 /* real n */ + beq ATMP1,9f /* branch if n=0 */ + sra ATMP2,52,ATMP3 /* shift out fraction of float */ + and ATMP3,0xfff,ATMP3 /* just exponent of float */ + addq ATMP3,ATMP1,ATMP3 /* n + exponent */ + ble ATMP3,6f /* branch if underflow */ + sll ATMP1,52,ATMP1 /* n in exponent position */ + addqv ATMP2,ATMP1,ATMP1 /* add n to exponent, with overflow */ +3: /* return float in atmp1 */ + or ALLOCPTR,4,ALLOCPTR /* unalign allocptr to align float */ + mov DESC_reald,ATMP2 /* get desc */ + stl ATMP2,0(ALLOCPTR) /* store desc */ + stq ATMP1,4(ALLOCPTR) /* store float */ + addq ALLOCPTR,4,STDARG /* return boxed float */ + addq ALLOCPTR,12,ALLOCPTR /* set allocptr */ + CONTINUE +6: /* underflow -- return zero */ + mov 0,ATMP1 + br 3b +9: /* n=0 -- return original float */ + mov PTRTMP,STDARG + CONTINUE + +/* ml_divlv + * Incoming parameters in $16 and $17, result in $0 + */ +ENTRY(ml_divlv) /* divide longword */ + beq $17, divZero /* check for div-by-zero */ + ornot $31, $17, $0 /* is divisor -1 */ + bne $0, do_ml_divl /* NO */ + sublv $31, $16, $0 /* is dividend largest negative int */ + trapb /* YES */ +do_ml_divl: + divl $16, $17, $0 /* do divl */ + ret $31, ($26), 1 + + +/* ml_divlu + * Incoming parameters in $16 and $17, result in $0 + */ +ENTRY(ml_divlu) /* divide longwork unsigned */ + beq $17, divZero /* check for div-by-zero */ + divlu $16, $17, $0 /* do divlu */ + ret $31, ($26), 1 + +divZero: + lda $16, -2($31) /* generate div-by-zero */ + call_pal 0xaa /* gentrap */ + + +/* try_lock : spin_lock -> bool + * low-level test-and-set style primitive for mutual-exclusion among + * processors. + */ +ML_CODE_HDR(try_lock_a) +#if (MAX_PROCS > 1) + ??? +#else (MAX_PROCS == 1) + ldl ATMP1,0(STDARG) + mov ML_false,ATMP2 + stl ATMP2,0(STDARG) + mov ATMP1,STDARG + CONTINUE +#endif + +/* unlock : releases a spin lock + */ +ML_CODE_HDR(unlock_a) +#if (MAX_PROCS > 1) + ??? +#else (MAX_PROCS == 1) + mov ML_true,ATMP1 + stl ATMP1,0(STDARG) + mov ML_unit,STDARG + CONTINUE +#endif + +/* SetFSR: + * Turn on floating-point overflow, underflow and zero-divide exceptions. + */ + BEGIN_PROC(SetFSR) +ENTRY(SetFSR) + trapb /* trap barrier just in case */ + ldt $f1,fsr_bits /* normal rounding,iov,ovf,dze,inv */ + mt_fpcr $f1 /* set floating point control reg */ + jmp ($26) + END_PROC(SetFSR) + +/* FlushICache: + * C callable instruction cache flush function + */ + BEGIN_PROC(FlushICache) +ENTRY(FlushICache) + .frame $30,0,$26,0 + .prologue 0 + call_pal 0x86 /* imb */ + ret $31,($26),1 + END_PROC(FlushICache) + diff --git a/base/runtime/mach-dep/Unsupported/HPPA.prim.asm b/base/runtime/mach-dep/Unsupported/HPPA.prim.asm new file mode 100644 index 0000000..b904844 --- /dev/null +++ b/base/runtime/mach-dep/Unsupported/HPPA.prim.asm @@ -0,0 +1,847 @@ +/* HPPA.prim.asm + * + * COPYRIGHT (c) 1995 by AT&T Bell Laboratories. + */ + +#include "ml-base.h" +#include "asm-base.h" +#include "ml-values.h" +#include "tags.h" +#include "ml-request.h" +#include "ml-limits.h" +#include "mlstate-offsets.h" /** machine generated file **/ + +/* stack layout when executing in ML code. */ + +/* +Note: stack grows from low to high memory addresses. + +low address ... + sp-116: | spill area | + +---------------------+ + sp-112: | $$umul | + +---------------------+ + sp-44: | %r2-%r18 | + +---------------------+ + sp-40: | *ml_state | + +---------------------+ + sp-32: | unused[2] | + +---------------------+ + sp-28: | startgc | + +---------------------+ + sp-24: | $$mul | + +---------------------+ + sp-20: | $$div | + +---------------------+ + sp-16: | $$udiv | + +---------------------+ + sp-12: | cvti2dTmp | + +---------------------+ + sp-8: | float64Tmp | + +---------------------+ + sp: +high addresses ... + + */ + +#define UDIV_OFFSET -16 +#define DIV_OFFSET -20 +#define MUL_OFFSET -24 +#define STARTGC_OFFSET -28 +#define MLSTATE_OFFSET -40 +#define REGSAVE_OFFSET -44 +#define UMUL_OFFSET -112 +#define ML_FRAMESIZE 4096 + + +/* Note: + Use of the SavedStackPtr location + Lal George + 12/13/95 + + The SavedStackPtr location is used to restore the value of the stack + pointer so that the layout above can be accessed. This is particularly + relevant when an exception/trap is generated inside ml_mul, ml_div + and ml_udiv millicode. Registers that are trashed by the system + millicode are saved on top of the stack together with a + return address. When an exception occurs, the exception trap + handler forces the program counter to resume at the address + corresponding to request_fault. However, because of the extra stuff + on the stack, request_fault does not see the layout it expects! + The value in SavedStackPtr is used to restore the correct stack pointer. + The ML registers that were saved on the stack during the millicode + call, are ones that are not required to resume the trap handler + and can therefore be dropped on the floor. + + The value of the stack pointer is saved before branching into ML via + restoreregs, and is restored in saveregs and set_request---the two entry + points into C. + + This scheme adds 6 extra instructions to go from ML to C and back. +*/ + +#define zero %r0 +#define miscreg0 %r1 +#define allocptr %r3 +#define limitptr %r4 +#define storeptr %r5 +#define exnptr %r6 +#define varptr %r7 +#define stdlink %r9 +#define stdclos %r10 +#define stdarg %r11 +#define stdcont %r12 +#define miscreg1 %r13 +#define miscreg2 %r14 + +#define sp %r30 +#define pc %r31 + +#define tmp1 %r29 +#define tmp2 %r24 +#define tmp3 %r25 +#define tmp4 %r23 +#define carg0 %r26 +#define creturn %r28 + +#define RSHIFT(r,n,t) extrs r, 31-(n), 32-(n), t +#define LSHIFT(r,n,t) zdep r, 31-(n), 32-(n), t + +#define LARGECONST(c, t) ldil L%c, t ! ldo R%c(t), t + + +#define CONTINUE \ + bv,n zero(stdcont) + +#define CHECKLIMIT(name) \ + combt,<=,n allocptr, limitptr, CSYM(CONCAT(L$$, name)) !\ + copy stdlink, pc !\ + b,n saveregs1 !\ + nop !\ + .label CSYM(CONCAT(L$$, name)) + + + /****************************************************** + All code must be in the data segment, since we + cannot distinguish between a code and data segment + offset. + ******************************************************/ + + .data + +SavedStackPtr .word 0 + +ML_CODE_HDR(sigh_return_a) + ldi REQ_SIG_RETURN, tmp2 + ldi 0+ML_unit, stdlink + ldi 0+ML_unit, stdclos + ldi 0+ML_unit, pc + b,n set_request + +ENTRY(sigh_resume) + ldi REQ_SIG_RESUME, tmp2 + b,n set_request + + +ML_CODE_HDR(pollh_return_a) + ldi REQ_POLL_RETURN, tmp2 + ldi 0+ML_unit, stdlink + ldi 0+ML_unit, stdclos + ldi 0+ML_unit, pc + b,n set_request + + +ENTRY(pollh_resume) + ldi REQ_POLL_RESUME, tmp2 + b,n set_request + + +ML_CODE_HDR(handle_a) + ldi REQ_EXN, tmp2 + copy stdlink, pc + b,n set_request + +ML_CODE_HDR(return_a) + ldi REQ_RETURN, tmp2 + ldi 0+ML_unit, stdlink + ldi 0+ML_unit, stdclos + ldi 0+ML_unit, pc + b,n set_request + +ENTRY(request_fault) + ldi REQ_FAULT, tmp2 + copy stdlink, pc + b,n set_request + +ML_CODE_HDR(bind_cfun_a) + CHECKLIMIT(bind_cfun_check) + ldi REQ_BIND_CFUN, tmp2 + b,n set_request + +ML_CODE_HDR(build_literals_a) + CHECKLIMIT(build_literals_check) + ldi REQ_BUILD_LITERALS, tmp2 + b,n set_request + +ML_CODE_HDR(callc_a) + CHECKLIMIT(callc_check) + ldi REQ_CALLC, tmp2 + b,n set_request + +/* + There are two entry points for saveregs --- saveregs0 and saveregs1. + + Saveregs0 is called from inside ML to invoke a gc. This is + done using a BLE,n instruction. The return address (in pc) with + nullification set, is at the wrong place unless one puts a NOP after + the BLR,n. Saveregs0 is used to correct the off-by-four value in pc + or %r31. + + Saveregs1 is called internally (or everywhere else) where the return + address is standard link (stdlink) typically and needs no correction. + +*/ + .export saveregs0,ENTRY +ENTRY(saveregs0) + addi 0-4, pc, pc + ldi 0-4, tmp2 + and pc, tmp2, pc +saveregs1 + ldi REQ_GC, tmp2 + /* fall through */ + + +set_request + ldil L%SavedStackPtr, tmp1 + ldo R%SavedStackPtr(tmp1), tmp1 + ldw 0(tmp1), sp /* restore stack pointer */ + + ldw MLSTATE_OFFSET(sp), tmp1 + ldw VProcOffMSP(tmp1), tmp4 /* use tmp4 as VProc ptr */ + stw zero, InMLOffVSP(tmp4) /* leaving ML */ + stw allocptr, AllocPtrOffMSP(tmp1) + stw limitptr, LimitPtrOffMSP(tmp1) + stw storeptr, StorePtrOffMSP(tmp1) + stw stdlink, LinkRegOffMSP(tmp1) + stw pc, PCOffMSP(tmp1) /* address of called function */ + stw stdarg, StdArgOffMSP(tmp1) + stw stdclos, StdClosOffMSP(tmp1) + stw stdcont, StdContOffMSP(tmp1) + stw varptr, VarPtrOffMSP(tmp1) + stw exnptr, ExnPtrOffMSP(tmp1) + copy tmp2, creturn /* return request */ + stw miscreg0,Misc0OffMSP(tmp1) + stw miscreg1,Misc1OffMSP(tmp1) + stw miscreg2,Misc2OffMSP(tmp1) + /* fall through */ +restore_c_regs + ldw REGSAVE_OFFSET(sp), %r2 + ldw REGSAVE_OFFSET-4(sp), %r3 + ldw REGSAVE_OFFSET-8(sp), %r4 + ldw REGSAVE_OFFSET-12(sp), %r5 + ldw REGSAVE_OFFSET-16(sp), %r6 + ldw REGSAVE_OFFSET-20(sp), %r7 + ldw REGSAVE_OFFSET-24(sp), %r8 + ldw REGSAVE_OFFSET-28(sp), %r9 + ldw REGSAVE_OFFSET-32(sp), %r10 + ldw REGSAVE_OFFSET-36(sp), %r11 + ldw REGSAVE_OFFSET-40(sp), %r12 + ldw REGSAVE_OFFSET-44(sp), %r13 + ldw REGSAVE_OFFSET-48(sp), %r14 + ldw REGSAVE_OFFSET-52(sp), %r15 + ldw REGSAVE_OFFSET-56(sp), %r16 + ldw REGSAVE_OFFSET-60(sp), %r17 + ldw REGSAVE_OFFSET-64(sp), %r18 + LARGECONST(-ML_FRAMESIZE, tmp3) + add tmp3, sp, sp /* discard the stack frame */ + ldsid (%r2), tmp1 + mtsp tmp1, %sr1 + be,n 0(%sr1, %r2) + +/* We need to find a way of creating a table of these constant + * values, rather than computing them each time around. + */ +#define STORE_CODE_ADDR(proc, offset) \ + ldil L%proc, tmp2 !\ + ldo R%proc(tmp2), tmp2 !\ + stw tmp2, offset(sp) + +BEGIN_PROC(restoreregs) + .export restoreregs,ENTRY +restoreregs + LARGECONST(ML_FRAMESIZE, tmp3) + add tmp3, sp, sp + + ldil L%SavedStackPtr, tmp1 /* save stack to restore */ + ldo R%SavedStackPtr(tmp1), tmp1 + stw sp, 0(tmp1) + + /* save the C registers */ + stw %r2, REGSAVE_OFFSET(sp) + stw %r3, REGSAVE_OFFSET-4(sp) + stw %r4, REGSAVE_OFFSET-8(sp) + stw %r5, REGSAVE_OFFSET-12(sp) + stw %r6, REGSAVE_OFFSET-16(sp) + stw %r7, REGSAVE_OFFSET-20(sp) + stw %r8, REGSAVE_OFFSET-24(sp) + stw %r9, REGSAVE_OFFSET-28(sp) + stw %r10, REGSAVE_OFFSET-32(sp) + stw %r11, REGSAVE_OFFSET-36(sp) + stw %r12, REGSAVE_OFFSET-40(sp) + stw %r13, REGSAVE_OFFSET-44(sp) + stw %r14, REGSAVE_OFFSET-48(sp) + stw %r15, REGSAVE_OFFSET-52(sp) + stw %r16, REGSAVE_OFFSET-56(sp) + stw %r17, REGSAVE_OFFSET-60(sp) + stw %r18, REGSAVE_OFFSET-64(sp) + + /* create ML stack frame */ + stw carg0, MLSTATE_OFFSET(sp) + copy carg0, tmp1 + + STORE_CODE_ADDR(ml_udiv, UDIV_OFFSET) + STORE_CODE_ADDR(ml_div, DIV_OFFSET) + STORE_CODE_ADDR(ml_mul, MUL_OFFSET) + STORE_CODE_ADDR(ml_umul, UMUL_OFFSET) + STORE_CODE_ADDR(saveregs0, STARTGC_OFFSET) + + ldw AllocPtrOffMSP(tmp1), allocptr + ldw LimitPtrOffMSP(tmp1), limitptr + ldw StorePtrOffMSP(tmp1), storeptr + ldi 1, tmp2 + ldw VProcOffMSP(tmp1), tmp4 + stw tmp2,InMLOffVSP(tmp4) /* entering ML code */ + ldw StdArgOffMSP(tmp1), stdarg + ldw StdContOffMSP(tmp1), stdcont + ldw StdClosOffMSP(tmp1), stdclos + ldw ExnPtrOffMSP(tmp1), exnptr + ldw Misc0OffMSP(tmp1), miscreg0 + ldw Misc1OffMSP(tmp1), miscreg1 + ldw Misc2OffMSP(tmp1), miscreg2 + ldw LinkRegOffMSP(tmp1), stdlink + ldw VarPtrOffMSP(tmp1), varptr + ldw PCOffMSP(tmp1), pc + /* check for pending signals */ + ldw SigsRecvOffVSP(tmp4), tmp2 + ldw SigsHandledOffVSP(tmp4), tmp3 + combf,= tmp2, tmp3, pending_sigs + nop + +ml_go + mfsp %sr5, tmp2 /* for indexed loads */ + mtsp tmp2, %sr3 + /* The pc is used to compute the baseptr on return + * to ML. The privelege level bits (30 and 31) need to be + * zeroed out before making the call. + */ + ldi 0-4, tmp2 + and pc, tmp2, pc + bv,n 0(pc) + +pending_sigs + /* there are pending signals */ + /* check if signals are masked */ + ldw InSigHandlerOffVSP(tmp4), tmp2 + combf,= tmp2, zero, ml_go + nop + + /* note that a handler trap is pending */ + ldi 1, tmp2 + stw tmp2, HandlerPendingOffVSP(tmp4) + copy limitptr,allocptr + b,n ml_go +END_PROC(restoreregs) + + +ENTRY(SaveFPRegs) + break 0,0 /* should never be called */ + +ENTRY(RestoreFPRegs) + break 0,0 /* should never be called */ + +/* + * array : (int * 'a) -> 'a array + */ +ML_CODE_HDR(array_a) + CHECKLIMIT(array_check) + + ldw 0(stdarg), tmp1 /* tmp1 := length (tagged int) */ + RSHIFT(tmp1, 1, tmp2) /* tmp2 := length : untagged int */ + ldi SMALL_OBJ_SZW, tmp3 /* is this a small object */ + combt,< tmp3, tmp1, L$array_offline + nop + + /* allocate and initialize array data */ + ldw 4(stdarg), stdarg /* stdarg = initial value */ + LSHIFT(tmp2, TAG_SHIFTW, tmp3) /* build descriptor in tmp3 */ + ldi 0+MAKE_TAG(DTAG_arr_data), tmp4 + or tmp3, tmp4, tmp3 + stw tmp3,0(allocptr) /* store descriptor */ + addi 4, allocptr, allocptr /* allocptr++ */ + copy allocptr, tmp3 /* array data ptr in tmp3 */ + + LSHIFT(tmp2, 2, tmp2) /* tmp2 = number of bytes to allocate */ + add tmp2, allocptr, tmp2 /* tmp2 = address of end of array */ +L$array_loop + stw stdarg, 0(allocptr) + addi 4, allocptr, allocptr + combf,= allocptr, tmp2, L$array_loop + nop + + /* allocate array header */ + ldi 0+(DESC_polyarr), tmp2 /* descriptor in tmp2 */ + stw tmp2, 0(allocptr) /* store the descriptor */ + addi 4, allocptr, allocptr /* allocptr++ */ + copy allocptr, stdarg /* result = header addr */ + stw tmp3, 0(allocptr) /* store pointer to data */ + stw tmp1, 4(allocptr) + addi 8, allocptr, allocptr /* allocptr += 2 */ + CONTINUE +L$array_offline /* off-line allocation of big arrays */ + ldi REQ_ALLOC_ARRAY, tmp2 + copy stdlink, pc + b,n set_request + + + +ML_CODE_HDR(create_r_a) + CHECKLIMIT(creat_r_check) + + RSHIFT(stdarg, 1, tmp2) /* tmp2 = length (untagged int) */ + LSHIFT(tmp2, 1, tmp2) /* tmp2 = length in words */ + ldi SMALL_OBJ_SZW, tmp3 + combt,< tmp3, tmp2, L$realarray_offline + nop + + LSHIFT(tmp2, TAG_SHIFTW, tmp1) /* build descriptor in tmp1 */ + ldi 0+MAKE_TAG(DTAG_raw64), tmp3 + or tmp1, tmp3, tmp1 + + ldi 4, tmp3 /* align start floating addr */ + or allocptr, tmp3, allocptr + stw tmp1, 0(allocptr) /* store data descriptor */ + addi 4, allocptr, allocptr /* allocptr++ */ + copy allocptr, tmp3 /* tmp3 = data object */ + LSHIFT(tmp2, 2, tmp2) /* tmp2 = length in bytes */ + add allocptr, tmp2, allocptr/* allocptr += length */ + + /* allocate the header object */ + ldi 0+(DESC_real64arr), tmp1 + stw tmp1, 0(allocptr) /* header descriptor */ + addi 4, allocptr, allocptr /* allocptr++ */ + stw tmp3, 0(allocptr) /* header data field */ + stw stdarg,4(allocptr) /* header length field */ + copy allocptr, stdarg /* stdarg = header object */ + addi 8, allocptr, allocptr /* allocptr += 2 */ + CONTINUE + +L$realarray_offline + /* off-line allocation of big realarrays */ + ldi REQ_ALLOC_REALDARRAY, tmp2 + copy stdlink, pc + b,n set_request + + + +ML_CODE_HDR(create_b_a) + CHECKLIMIT(create_b_checked) + + RSHIFT(stdarg, 1, tmp2) /* tmp2 := length (untagged) */ + addi 3, tmp2, tmp2 /* tmp2 := length (words) */ + RSHIFT(tmp2, 2, tmp2) + ldi SMALL_OBJ_SZW, tmp3 /* is this a small object? */ + combt,< tmp3, tmp2, L$bytearray_offline /* no */ + nop + + /* allocate the data object */ + LSHIFT(tmp2, TAG_SHIFTW, tmp1) /* descriptor in tmp1 */ + ldi 0+MAKE_TAG(DTAG_raw), tmp3 + or tmp1, tmp3, tmp1 + stw tmp1, 0(allocptr) /* write out descriptor */ + addi 4, allocptr, allocptr /* allocptr++ */ + copy allocptr, tmp3 /* tmp3 = data object */ + LSHIFT(tmp2, 2, tmp2) /* length in bytes */ + add tmp2, allocptr, allocptr /* allocptr += length */ + + /* allocate the header object */ + ldi 0+(DESC_word8arr), tmp1 /* header descriptor */ + stw tmp1, 0(allocptr) + addi 4, allocptr, allocptr /* allocptr++ */ + stw tmp3, 0(allocptr) /* header data field */ + stw stdarg, 4(allocptr) /* header length field */ + copy allocptr, stdarg /* stdarg = header object */ + addi 8, allocptr, allocptr /* allocptr += 2 */ + CONTINUE + +L$bytearray_offline /* big object */ + ldi REQ_ALLOC_BYTEARRAY, tmp2 + copy stdlink, pc + b,n set_request + + + +ML_CODE_HDR(create_s_a) + CHECKLIMIT(create_s_checked) + + RSHIFT(stdarg, 1, tmp2) /* tmp2 := length: untagged int */ + addi 4, tmp2, tmp2 /* tmp2 := length in words */ + RSHIFT(tmp2, 2, tmp2) + ldi SMALL_OBJ_SZW, tmp3 /* is this a big object */ + combt,< tmp3, tmp2, L$string_offline /* no */ + nop + + /* allocate the data object */ + LSHIFT(tmp2, TAG_SHIFTW, tmp1) /* build descriptor in tmp1 */ + ldi 0+MAKE_TAG(DTAG_raw), tmp3 + or tmp1, tmp3, tmp1 + stw tmp1, 0(allocptr) /* store descriptor */ + addi 4, allocptr, allocptr /* allocptr++ */ + copy allocptr, tmp3 /* tmp3 = data object */ + LSHIFT(tmp2, 2, tmp2) /* length in bytes */ + add tmp2, allocptr, allocptr/* allocptr += length */ + stw zero, -4(allocptr) /* zero-terminate string */ + + /* allocate the header object */ + ldi 0+(DESC_string), tmp1 /* header descriptor */ + stw tmp1, 0(allocptr) /* header data field */ + addi 4, allocptr, allocptr /* allocptr++ */ + stw tmp3, 0(allocptr) /* header data field */ + stw stdarg, 4(allocptr) /* header length field */ + copy allocptr, stdarg /* stdarg = header object */ + addi 8, allocptr, allocptr /* allocptr += 2 */ + CONTINUE + +L$string_offline + ldi REQ_ALLOC_STRING, tmp2 + copy stdlink, pc + b,n set_request + + + +ML_CODE_HDR(create_v_a) + CHECKLIMIT(create_v_checked) + + ldw 0(stdarg), tmp1 /* tmp1 = tagged length */ + RSHIFT(tmp1, 1, tmp2) /* tmp2 = untagged length */ + ldi SMALL_OBJ_SZW, tmp3 /* is this a small object? */ + combt,< tmp3, tmp2, L$vector_offline /* no */ + nop + + /* allocate and initialize data object */ + LSHIFT(tmp2, TAG_SHIFTW, tmp2) /* build descriptor in tmp2 */ + ldi 0+MAKE_TAG(DTAG_vec_data), tmp3 + or tmp2, tmp3, tmp2 + stw tmp2, 0(allocptr) /* store descriptor */ + addi 4, allocptr, allocptr /* allocptr++ */ + ldw 4(stdarg), tmp2 /* tmp2 = list */ + copy allocptr, stdarg /* stdarg = data obj */ + ldi 0+ML_nil, tmp4 +L$vector_loop + ldw 0(tmp2), tmp3 /* tmp3 = hd(tmp2) */ + ldw 4(tmp2), tmp2 /* tmp2 = tl(tmp2) */ + stw tmp3, 0(allocptr) /* store word in vector */ + addi 4, allocptr, allocptr /* allocptr++ */ + combf,= tmp2, tmp4, L$vector_loop/* if (tmp2 <> nil) goto loop */ + nop + + /* allocate header object */ + ldi 0+(DESC_polyvec), tmp3 /* descriptor in tmp3 */ + stw tmp3, 0(allocptr) /* header descriptor */ + addi 4, allocptr, allocptr /* allocptr++ */ + stw stdarg, 0(allocptr) /* header data field */ + stw tmp1, 4(allocptr) /* header length field */ + copy allocptr, stdarg /* result = header object */ + addi 8, allocptr, allocptr /* allocptr += 2 */ + CONTINUE +L$vector_offline + ldi REQ_ALLOC_VECTOR, tmp2 + copy stdlink, pc + b,n set_request + +/* logb --- extract and unbias the exponent */ +ML_CODE_HDR(logb_a) + ldw 0(stdarg), stdarg /* msb */ + extru stdarg, 11, 12, stdarg /* throw out 20 low bits */ + ldo 0x7ff(%r0), tmp1 /* retain 11 bits */ + and stdarg, tmp1, stdarg + ldo 0-1023(stdarg), stdarg /* unbias */ + LSHIFT(stdarg, 1, stdarg) /* tag as ML int */ + addi 1, stdarg, stdarg + CONTINUE + + + +/* scalb(u:real,v:int) = u * 2 ^ v */ +ML_CODE_HDR(scalb_a) + CHECKLIMIT(scalb_a_checked) + ldw 4(stdarg),tmp1 /* tmp1 := v tagged */ + RSHIFT(tmp1, 1, tmp1) /* tmp1 := v */ + ldw 0(stdarg),stdarg /* stdarg := u */ + ldw 0(stdarg), tmp2 /* tmp2 := MSW(u) */ + ldil L%0x7ff00000, tmp3 /* mask */ + and tmp2, tmp3, tmp3 /* tmp3 := tmp2 & 0x7ff00000 */ + combt,=,n tmp3, %r0, scalb_all_done /* u == 0.0 */ + + RSHIFT(tmp3, 20, tmp3) /* tmp3 := ieee(exp) */ + addo tmp3, tmp1, tmp3 /* tmp3 := scaled exponent */ + combt,<,n tmp3, %r0, scalb_underflow + + ldi 2047, tmp1 /* max. ieee(exp) */ + combt,<,n tmp1, tmp3, scalb_overflow + + ldil L%0x800fffff, tmp1 /* tmp1 := sign bit + mantissa mask */ + ldo R%0x800fffff(tmp1), tmp1 + and tmp1, tmp2, tmp1 /* tmp1 := original sign and mantessa*/ + LSHIFT(tmp3, 20, tmp3) /* tmp3 := exp in right place*/ + or tmp1, tmp3, tmp1 /* tmp1 := MSW(u) */ + ldw 4(stdarg), tmp2 /* tmp2 := LSW(u) */ + /* fall through */ + +scalb_write_out /* {tmp1, tmp2} live on entry */ + ldi 4, tmp3 /* align allocation pointer */ + or tmp3, allocptr, allocptr + stw tmp1, 4(allocptr) /* store MSW */ + stw tmp2, 8(allocptr) /* store LSW */ + ldi 0+(DESC_reald),tmp3 /* store descriptor */ + stw tmp3,0(allocptr) + addi 0x4,allocptr,stdarg /* return pointer to float */ + addi 0xc,allocptr,allocptr /* bump allocation pointer */ + /* fall through */ + +scalb_all_done + /* BUG: The compiler supports arithmetic over denormalized + * numbers, but scalb barfs at them. Denormalized numbers + * are treated here as 0.0. + */ + CONTINUE + +scalb_underflow + /* BUG: Incorrect behaviour on underflow, should return the + * denormalized number. + */ + ldi 0, tmp1 + ldi 0, tmp2 + b,n scalb_write_out + +scalb_overflow + ldil L%0x7fffffff,tmp1 + ldo R%0x7fffffff(tmp1),tmp1 + addo tmp1,tmp1,0 /* generate trap */ + /* should never execute the next instruction */ + + +floor_MAXINT .double 1073741824.0 + +ML_CODE_HDR(floor_a) + fldds 0(stdarg), %fr4 /* fr4 := argument */ + + ldi 0x60e, tmp1 /* set rounding mode to -inf */ + stw tmp1, 0-4(sp) /* store in temp scratch */ + fldws 0-4(sp), %fr0L + fcnvfx,dbl,sgl %fr4, %fr4R + + stw zero,0-4(sp) + fldws 0-4(sp),%fr0L + fstws %fr4R,0-4(sp) + ldw 0-4(sp), stdarg + add stdarg, stdarg, stdarg + ldo 1(stdarg), stdarg + CONTINUE + + +/* try_lock_a */ +ML_CODE_HDR(try_lock_a) + CHECKLIMIT(try_lock_check) + ldw 0(stdarg), tmp1 + ldi 0+ML_true, tmp2 + stw tmp2, 0(stdarg) + copy tmp2, stdarg + CONTINUE + + +ML_CODE_HDR(unlock_a) + CHECKLIMIT(unlock_check) + ldi 0+ML_false, tmp1 + stw tmp1, 0(stdarg) + ldi 0+ML_unit, stdarg + CONTINUE + + + + + /* milli code routines */ + +/* + millicode: + inputs in %r26 (arg0) and %r25 (arg1) + result in %r29 (ret1) + + saved: %r25, %r26, %r1 --- trashed by millicode routines + %r31 --- trashed by BLE + + Note: If the millicode were inlined in this data segment then it would + not be necessary to do this cross-segment jump. +*/ + +/* Note: Offset -20(sp) is used by DoMillicode */ +#define MILLI_LOCAL_AREA 24 /* multiple of 8 */ +#define millicodeSave \ + addi MILLI_LOCAL_AREA, sp, sp !\ + stw %r1, 0-4(sp) !\ + stw %r25,0-8(sp) !\ + stw %r26,0-12(sp) !\ + stw %r31,0-16(sp) + +#define millicodeRestore \ + ldw 0-16(sp), %r31 !\ + ldw 0-12(sp), %r26 !\ + ldw 0-8(sp), %r25 !\ + ldw 0-4(sp), %r1 !\ + addi 0-MILLI_LOCAL_AREA, sp, sp !\ + addi 0-4, pc, pc !\ + bv,n 0(pc) + +#define InvokeMillicode(proc) \ + millicodeSave !\ + ldil L%proc, %r1 !\ + ldo R%proc(%r1), %r1 !\ + ldsid (%r1), %r29 !\ + mtsp %r29, %sr1 !\ + ble,n 0(%sr1, %r1) !\ + nop !\ + millicodeRestore + + .export ml_mul,ENTRY + .export ml_umul,ENTRY + .export ml_div,ENTRY + .export ml_udiv,ENTRY + +floatingZero .double 0.0 +floatingOne .double 1.0 + +/* The bogus addit,= below is to cause an immediate trap */ +#define divByZeroCheck(lab) \ + combf,= %r0, %r25, lab !\ + nop !\ + ldil L%floatingZero, %r29 !\ + ldo R%floatingZero(%r29), %r29 !\ + fldds 0(%r29), %fr4 !\ + ldil L%floatingOne, %r29 !\ + ldo R%floatingOne(%r29), %r29 !\ + fldds 0(%r29), %fr5 !\ + fdiv,dbl %fr5, %fr4, %fr4 !\ + fstds %fr4, 0(sp) !\ + .label lab + +ENTRY(ml_mul) + InvokeMillicode(do_mulI) +ENTRY(ml_umul) + InvokeMillicode(do_mulU) +ENTRY(ml_udiv) + divByZeroCheck(noUdivByZero) + InvokeMillicode(do_divU) + +ENTRY(ml_div) + divByZeroCheck(noDivByZero) + comibf,= 0-1, %r25, mlDivNoOverflow + nop + ldo 0x1, tmp1 + subo %r26, tmp1, tmp1 +mlDivNoOverflow + InvokeMillicode(do_divI) + +/*----------------------------------------------------------------*/ + .code + +#define DoMillicode(proc) \ + stw %r31, 0-20(sp) !\ + bl,n proc, %r31 !\ + nop !\ + ldw 0-20(sp), %r31 !\ + ldsid (%r31), %r1 !\ + mtsp %r1, %sr1 !\ + be,n 0(%sr1, %r31) + + .import $$divI,MILLICODE + .import $$divU,MILLICODE + .import $$muloI,MILLICODE + .import $$mulU,MILLICODE + + .export do_mulI,ENTRY + .export do_mulU,ENTRY + .export do_divI,ENTRY + .export do_divU,ENTRY + +do_mulI + DoMillicode($$muloI) +do_mulU + DoMillicode($$mulU) +do_divI + DoMillicode($$divI) +do_divU + DoMillicode($$divU) + + + .export FlushICache,ENTRY +FlushICache + .proc + .callinfo + + .enter + ldsid (26), 23 /* get space id from short pointer */ + mtsp 23, 2 /* stick it in scratch space reg */ + + depi 0,31,4,26 /* align address to cache line */ + addi 15,25,25 /* align size upwards */ + depi 0,31,4,25 + ldi 16,22 /* r22 := minimum cache line size */ + ldi -16,21 /* r21 := -(minimum cache line size) */ + +fic_loop + fdc 0(2,26) + sync + /* fic can't use short pointer so + * use the space reg set up above + */ + fic,m 22(2,26) + + nop /* 7 cycle delay. See programming note */ + nop /* for SYNC in arch. ref. manual */ + nop + nop + nop + nop + nop + + addb,>= 21,25,fic_loop /* add stride to count, branch */ + nop + .leave + .procend + + + + +/* set_fsr - set IEEE floating point enables. */ +/* saving and restoring tmp1 is temporary paranoia */ + + .export set_fsr,ENTRY +set_fsr + .proc + .callinfo FRAME=64 + .enter + stw zero,0-4(sp) + fldws 0-4(sp),%fr0L + .leave + .procend + + .export pointer2space +pointer2space + .proc + .callinfo + .entry + bv 0(2) + ldsid (26), 28 + .leave + .procend + + .end ; End of program diff --git a/base/runtime/mach-dep/Unsupported/X86.prim.asm b/base/runtime/mach-dep/Unsupported/X86.prim.asm new file mode 100644 index 0000000..3ed4896 --- /dev/null +++ b/base/runtime/mach-dep/Unsupported/X86.prim.asm @@ -0,0 +1,747 @@ +/* X86.prim.asm + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * + * This was derived from I386.prim.s, by Mark Leone (mleone@cs.cmu.edu) + * + * Completely rewritten and changed to use assyntax.h, by Lal George. + */ + +#include "assyntax.h" +#include "ml-base.h" +#include "asm-base.h" +#include "ml-values.h" +#include "tags.h" +#include "ml-request.h" +#include "ml-limits.h" + +#if defined(OPSYS_LINUX) && defined(__ELF__) +/* needed to disable the execution bit on the stack pages */ +.section .note.GNU-stack,"",%progbits +#endif + +#if defined(OPSYS_DARWIN) +/* Note: although the MacOS assembler claims to be the GNU assembler, it appears to be + * an old version (1.38), which uses different alignment directives. + */ +#undef ALIGNTEXT4 +#undef ALIGNDATA4 +#define ALIGNTEXT4 .align 2 +#define ALIGNDATA4 .align 2 +#endif + +/* + * + * The 386 registers are used as follows: + * + * EAX - temp1 (see the code generator, x86/x86.sml) + * EBX - misc0 + * ECX - misc1 + * EDX - misc2 + * ESI - standard continuation (ml_cont, see ml_state.h) + * EBP - standard argument (ml_arg) + * EDI - free space pointer (ml_allocptr) + * ESP - stack pointer + * EIP - program counter (ml_pc) + */ + +/* Registers (see x86/x86CpsRegs.sml): */ +#define temp EAX +#define misc0 EBX +#define misc1 ECX +#define misc2 EDX +#define stdcont ESI +#define stdarg EBP +#define allocptr EDI +#define stackptr ESP + +/* other reg uses */ +#define creturn EAX + + /* Stack frame */ +#define tempmem REGOFF(0,ESP) +#define baseptr REGOFF(4,ESP) +#define exncont REGOFF(8,ESP) +#define limitptr REGOFF(12,ESP) +#define pc REGOFF(16,ESP) +#define unused_1 REGOFF(20,ESP) +#define storeptr REGOFF(24,ESP) +#define varptr REGOFF(28,ESP) +#define start_gc REGOFF(32,ESP) +#define unused_2 REGOFF(36,ESP) +#define eaxSpill REGOFF(40,ESP) /* eax=0 */ +#define ecxSpill REGOFF(44,ESP) /* ecx=1 */ +#define edxSpill REGOFF(48,ESP) /* edx=2 */ +#define ebxSpill REGOFF(52,ESP) /* ebx=3 */ +#define espSpill REGOFF(56,ESP) /* esp=4 */ +#define ebpSpill REGOFF(60,ESP) /* ebp=5 */ +#define esiSpill REGOFF(64,ESP) /* esi=6 */ +#define ediSpill REGOFF(68,ESP) /* edi=7 */ +#define stdlink REGOFF(72,ESP) +#define stdclos REGOFF(76,ESP) + +#define espsave REGOFF(500,ESP) + +#define ML_STATE_OFFSET 176 +#define mlstate_ptr REGOFF(ML_STATE_OFFSET, ESP) +#define freg8 184 /* double word aligned */ +#define freg9 192 +#define freg31 368 /* 152 + (31-8)*8 */ +#define fpTempMem 376 /* freg31 + 8 */ +#define SpillAreaStart 512 /* starting offset */ +#define ML_FRAME_SIZE (8192) + +#define via + + SEG_DATA + ALIGNDATA4 +request_w: /* place to put the request code */ + D_LONG 0 + GLOBL CSYM(ML_X86Frame) +LABEL(CSYM(ML_X86Frame)) /* ptr to the ml frame (gives C access to limitptr) */ + D_LONG 0 + + +#include "mlstate-offsets.h" /** this file is generated **/ + + +/* + * 386 function call conventions: + * [true for gcc and dynix3 cc; untested for others] + * + * Caller save registers: eax, ecx, edx + * Callee save registers: ebx, esi, edi, and ebp. + * Save frame pointer (ebx) first to match standard function prelude + * Floating point state is caller-save. + * Arguments passed on stack. Rightmost argument pushed first. + * Word-sized result returned in %eax. + * On Darwin, stack frame must be multiple of 16 bytes + */ + +#define cresult EAX + +#define CALLEE_SAVE_SZB 16 /* ebp, ebx, esi, edi */ + +#define CALLEE_SAVE \ + PUSH_L(EBP); \ + PUSH_L(EBX); \ + PUSH_L(ESI); \ + PUSH_L(EDI) + +#define CALLEE_RESTORE \ + POP_L(EDI); \ + POP_L(ESI); \ + POP_L(EBX); \ + POP_L(EBP) + +/* MOVE copies one memory location to another, using a specified temporary. */ + +#define MOVE(src,tmp,dest) \ + MOV_L(src, tmp); \ + MOV_L(tmp, dest) + +#define CONTINUE \ + JMP(CODEPTR(stdcont)) + +#define CHECKLIMIT \ + 1:; \ + MOVE(stdlink, temp, pc) ; \ + CMP_L(limitptr, allocptr); \ + JB(9f); \ + CALL(CSYM(saveregs)); \ + JMP(1b); \ + 9: + +/**********************************************************************/ + SEG_TEXT + ALIGNTEXT4 + +ML_CODE_HDR(sigh_return_a) + MOV_L(CONST(ML_unit),stdlink) + MOV_L(CONST(ML_unit),stdclos) + MOV_L(CONST(ML_unit),pc) + MOV_L(CONST(REQ_SIG_RETURN), request_w) + JMP(CSYM(set_request)) + +/* sigh_resume: + * Resume execution at the point at which a handler trap occurred. This is a + * standard two-argument function, thus the closure is in ml_cont. + */ + +ENTRY(sigh_resume) + MOV_L(CONST(REQ_SIG_RESUME), request_w) + JMP(CSYM(set_request)) + +/* pollh_return_a: + * The return continuation for the ML poll handler. + */ +ML_CODE_HDR(pollh_return_a) + MOV_L(CONST(REQ_POLL_RETURN), request_w) + MOV_L(CONST(ML_unit),stdlink) + MOV_L(CONST(ML_unit),stdclos) + MOV_L(CONST(ML_unit),pc) + JMP(CSYM(set_request)) + +/* pollh_resume: + * Resume execution at the point at which a poll event occurred. + */ +ENTRY(pollh_resume) + MOV_L(CONST(REQ_POLL_RESUME), request_w) + JMP(CSYM(set_request)) + +ML_CODE_HDR(handle_a) + MOV_L(CONST(REQ_EXN), request_w) + MOVE(stdlink,temp,pc) + JMP(CSYM(set_request)) + +ML_CODE_HDR(return_a) + MOV_L(CONST(REQ_RETURN), request_w) + MOV_L(CONST(ML_unit),stdlink) + MOV_L(CONST(ML_unit),stdclos) + MOV_L(CONST(ML_unit),pc) + JMP(CSYM(set_request)) + +/* Request a fault. The floating point coprocessor must be reset + * (thus trashing the FP registers) since we do not know whether a + * value has been pushed into the temporary "register". This is OK + * because no floating point registers will be live at the start of + * the exception handler. + */ +ENTRY(request_fault) + CALL(CSYM(FPEEnable)) /* Does not trash any general regs. */ + MOV_L(CONST(REQ_FAULT), request_w) + MOVE(stdlink,temp,pc) + JMP(CSYM(set_request)) + +/* bind_cfun : (string * string) -> c_function + */ +ML_CODE_HDR(bind_cfun_a) + CHECKLIMIT + MOV_L(CONST(REQ_BIND_CFUN), request_w) + JMP(CSYM(set_request)) + +ML_CODE_HDR(build_literals_a) + CHECKLIMIT + MOV_L(CONST(REQ_BUILD_LITERALS), request_w) + JMP(CSYM(set_request)) + +ML_CODE_HDR(callc_a) + CHECKLIMIT + MOV_L(CONST(REQ_CALLC), request_w) + JMP(CSYM(set_request)) + +ENTRY(saveregs) + POP_L(pc) + MOV_L(CONST(REQ_GC), request_w) + /* fall into set_request */ + +ENTRY(set_request) + /* temp holds mlstate_ptr, valid request in request_w */ + /* Save registers */ + MOV_L(mlstate_ptr, temp) + MOV_L(allocptr, REGOFF(AllocPtrOffMSP,temp)) + MOV_L(stdarg, REGOFF(StdArgOffMSP,temp)) + MOV_L(stdcont, REGOFF(StdContOffMSP,temp)) + +#define temp2 allocptr + /* note that we have left ML code */ + MOV_L(REGOFF(VProcOffMSP,temp), temp2) + MOV_L(CONST(0), REGOFF(InMLOffVSP,temp2)) + + MOV_L(misc0, REGOFF(Misc0OffMSP,temp)) + MOV_L(misc1, REGOFF(Misc1OffMSP,temp)) + MOV_L(misc2, REGOFF(Misc2OffMSP,temp)) + + /* Save vregs before the stack frame is popped. */ + MOVE(limitptr,temp2, REGOFF(LimitPtrOffMSP,temp)) + MOVE(exncont, temp2, REGOFF(ExnPtrOffMSP,temp)) + MOVE(stdclos, temp2, REGOFF(StdClosOffMSP,temp)) + MOVE(stdlink, temp2, REGOFF(LinkRegOffMSP,temp)) + MOVE(pc, temp2, REGOFF(PCOffMSP,temp)) + MOVE(storeptr,temp2, REGOFF(StorePtrOffMSP,temp)) + MOVE(varptr, temp2, REGOFF(VarPtrOffMSP,temp)) +#undef temp2 + + /* return val of function is request code */ + MOV_L(request_w,creturn) + + /* Pop the stack frame and return to run_ml(). */ +#if defined(OPSYS_DARWIN) + LEA_L(REGOFF(ML_FRAME_SIZE+12,ESP),ESP) +#else + MOV_L(espsave, ESP) +#endif + CALLEE_RESTORE + RET + + SEG_TEXT + ALIGNTEXT4 +ENTRY(restoreregs) + MOV_L(REGOFF(4,ESP), temp) /* Get argument (MLState ptr). */ + CALLEE_SAVE +#if defined(OPSYS_DARWIN) + /* MacOS X frames must be 16-byte aligned. We have 20 bytes on + * the stack for the return PC and callee-saves, so we need a + * 12-byte pad. + */ + SUB_L(CONST(ML_FRAME_SIZE+12), ESP) +#else + /* Align sp on 8 byte boundary. Assumes that the stack + * starts out being at least word aligned. But who knows ... + */ + MOV_L(ESP,EBX) + OR_L(CONST(4), ESP) + SUB_L(CONST(4), ESP) /* stack grows from high to low */ + SUB_L(CONST(ML_FRAME_SIZE), ESP) + MOV_L(EBX,espsave) +#endif + +#define temp2 EBX + /* Initialize the ML stack frame. */ + MOVE(REGOFF(ExnPtrOffMSP, temp), temp2, exncont) + MOVE(REGOFF(LimitPtrOffMSP, temp), temp2, limitptr) + MOVE(REGOFF(StorePtrOffMSP, temp), temp2, storeptr) + MOVE(REGOFF(VarPtrOffMSP, temp), temp2, varptr) + LEA_L(CSYM(saveregs), temp2) + MOV_L(temp2,start_gc) + MOV_L(temp, mlstate_ptr) + + /* vregs */ + MOVE (REGOFF(LinkRegOffMSP,temp), temp2, stdlink) + MOVE (REGOFF(StdClosOffMSP,temp), temp2, stdclos) + + /* PC */ + MOVE (REGOFF(PCOffMSP,temp), temp2, pc) +#undef temp2 + + /* Load ML registers. */ + MOV_L(REGOFF(AllocPtrOffMSP,temp), allocptr) + MOV_L(REGOFF(StdContOffMSP,temp), stdcont) + MOV_L(REGOFF(StdArgOffMSP,temp), stdarg) + MOV_L(REGOFF(Misc0OffMSP,temp), misc0) + MOV_L(REGOFF(Misc1OffMSP,temp), misc1) + MOV_L(REGOFF(Misc2OffMSP,temp), misc2) + + MOV_L(ESP,CSYM(ML_X86Frame)) /* frame ptr for signal handler. */ + + PUSH_L(misc2) /* free up a register */ + PUSH_L(temp) /* save msp temporarily */ + +#define tmpreg misc2 + + /* note that we are entering ML */ + MOV_L(REGOFF(VProcOffMSP,temp),temp) /* temp is now vsp */ +#define vsp temp + MOV_L(CONST(1),REGOFF(InMLOffVSP,vsp)) + + /* handle signals */ + MOV_L(REGOFF(SigsRecvOffVSP,vsp),tmpreg) + CMP_L(REGOFF(SigsHandledOffVSP,vsp),tmpreg) + +#undef tmpreg + JNE(pending) + +restore_and_jmp_ml: + POP_L(temp) /* restore temp to msp */ + POP_L(misc2) + +jmp_ml: + CMP_L(limitptr, allocptr) + JMP(CODEPTR(REGOFF(PCOffMSP,temp))) /* Jump to ML code. */ + + +pending: + /* Currently handling signal? */ + CMP_L(CONST(0), REGOFF(InSigHandlerOffVSP,vsp)) + JNE(restore_and_jmp_ml) + /* handler trap is now pending */ + MOV_L(IMMED(1),HandlerPendingOffVSP(vsp)) + + /* must restore here because limitptr is on stack */ /* XXX */ + POP_L(temp) /* restore temp to msp */ + POP_L(misc2) + + MOV_L(allocptr,limitptr) + JMP(jmp_ml) /* Jump to ML code. */ +#undef vsp + +/* ---------------------------------------------------------------------- + * array : (int * 'a) -> 'a array + * Allocate and initialize a new array. This can cause GC. + */ +ML_CODE_HDR(array_a) + CHECKLIMIT + MOV_L(REGIND(stdarg),temp) /* temp := length in words */ + SAR_L(CONST(1),temp) /* temp := length untagged */ + CMP_L(CONST(SMALL_OBJ_SZW),temp) /* is this a small object */ + JGE(3f) + +#define temp1 misc0 +#define temp2 misc1 + PUSH_L(misc0) /* save misc0 */ + PUSH_L(misc1) /* save misc1 */ + + MOV_L(temp, temp1) + SAL_L(CONST(TAG_SHIFTW),temp1) /* build descriptor in temp1 */ + OR_L(CONST(MAKE_TAG(DTAG_arr_data)),temp1) + MOV_L(temp1,REGIND(allocptr)) /* store descriptor */ + ADD_L(CONST(4),allocptr) /* allocptr++ */ + MOV_L(allocptr, temp1) /* temp1 := array data ptr */ + MOV_L(REGOFF(4,stdarg), temp2) /* temp2 := initial value */ +2: + MOV_L(temp2, REGIND(allocptr)) /* initialize array */ + ADD_L(CONST(4), allocptr) + SUB_L(CONST(1), temp) + JNE(2b) + + /* Allocate array header */ + MOV_L(CONST(DESC_polyarr),REGIND(allocptr)) /* descriptor in temp */ + ADD_L(CONST(4), allocptr) /* allocptr++ */ + MOV_L(REGIND(stdarg), temp) /* temp := length */ + MOV_L(allocptr, stdarg) /* result = header addr */ + MOV_L(temp1, REGIND(allocptr)) /* store pointer to data */ + MOV_L(temp, REGOFF(4,allocptr)) /* store length */ + ADD_L(CONST(8), allocptr) + + POP_L(misc1) + POP_L(misc0) + CONTINUE +#undef temp1 +#undef temp2 +3: + MOV_L(CONST(REQ_ALLOC_ARRAY), request_w) + MOVE (stdlink, temp, pc) + JMP(CSYM(set_request)) + + +/* create_r : int -> realarray */ +ML_CODE_HDR(create_r_a) + CHECKLIMIT +#define temp1 misc0 + PUSH_L(misc0) /* free temp1 */ + MOV_L(stdarg,temp) /* temp := length */ + SAR_L(CONST(1),temp) /* temp := untagged length */ + SAL_L(CONST(1),temp) /* temp := length in words */ + CMP_L(CONST(SMALL_OBJ_SZW),temp) + JGE(2f) + + OR_L(CONST(4),allocptr) /* align allocptr */ + + /* allocate the data object */ + MOV_L(temp, temp1) + SAL_L(CONST(TAG_SHIFTW),temp1) /* temp1 := descriptor */ + OR_L(CONST(MAKE_TAG(DTAG_raw64)),temp1) + MOV_L(temp1,REGIND(allocptr)) /* store descriptor */ + ADD_L(CONST(4), allocptr) /* allocptr++ */ + MOV_L(allocptr, temp1) /* temp1 := data object */ + SAL_L(CONST(2),temp) /* temp := length in bytes */ + ADD_L(temp, allocptr) /* allocptr += length */ + + /* allocate the header object */ + MOV_L(CONST(DESC_real64arr),REGIND(allocptr))/* header descriptor */ + ADD_L(CONST(4), allocptr) /* allocptr++ */ + MOV_L(temp1, REGIND(allocptr)) /* header data field */ + MOV_L(stdarg, REGOFF(4,allocptr)) /* header length field */ + MOV_L(allocptr, stdarg) /* stdarg := header object */ + ADD_L(CONST(8), allocptr) /* allocptr += 2 */ + + POP_L(misc0) /* restore temp1 */ + CONTINUE +2: + POP_L(misc0) /* restore temp1 */ + MOV_L(CONST(REQ_ALLOC_REALDARRAY), request_w) + MOVE (stdlink, temp, pc) + JMP(CSYM(set_request)) +#undef temp1 + + +/* create_b : int -> bytearray */ +ML_CODE_HDR(create_b_a) + CHECKLIMIT + MOV_L(stdarg,temp) /* temp := length(tagged int) */ + SAR_L(CONST(1),temp) /* temp := length(untagged) */ + ADD_L(CONST(3),temp) + SAR_L(CONST(2),temp) /* temp := length(words) */ + CMP_L(CONST(SMALL_OBJ_SZW),temp) /* small object? */ + JMP(2f) + JGE(2f) /* XXXXX */ + +#define temp1 misc0 + PUSH_L(misc0) + + /* allocate teh data object */ + MOV_L(temp, temp1) /* temp1 := descriptor */ + SAL_L(CONST(TAG_SHIFTW),temp1) + OR_L(CONST(MAKE_TAG(DTAG_raw)),temp1) + MOV_L(temp1, REGIND(allocptr)) /* store descriptor */ + ADD_L(CONST(4), allocptr) /* allocptr++ */ + MOV_L(allocptr, temp1) /* temp1 := data object */ + SAL_L(CONST(2), temp) /* temp := length in bytes */ + ADD_L(temp, allocptr) /* allocptr += length */ + + /* allocate the header object */ + MOV_L(CONST(DESC_word8arr), REGIND(allocptr))/* header descriptor */ + ADD_L(CONST(4),allocptr) /* allocptr++ */ + MOV_L(temp1, REGIND(allocptr)) /* header data field */ + MOV_L(stdarg, REGOFF(4,allocptr)) /* header length field */ + MOV_L(allocptr, stdarg) /* stdarg := header object */ + ADD_L(CONST(8),allocptr) /* allocptr := 2 */ + POP_L(misc0) + CONTINUE +#undef temp1 +2: + MOV_L(CONST(REQ_ALLOC_BYTEARRAY), request_w) + MOVE (stdlink, temp, pc) + JMP(CSYM(set_request)) + + +/* create_s : int -> string */ +ML_CODE_HDR(create_s_a) + CHECKLIMIT + MOV_L(stdarg,temp) + SAR_L(CONST(1),temp) /* temp := length(untagged) */ + ADD_L(CONST(4),temp) + SAR_L(CONST(2),temp) /* temp := length(words) */ + CMP_L(CONST(SMALL_OBJ_SZW),temp) + JGE(2f) + + PUSH_L(misc0) /* free misc0 */ +#define temp1 misc0 + + MOV_L(temp, temp1) + SAL_L(CONST(TAG_SHIFTW),temp1) /* build descriptor in temp1 */ + OR_L(CONST(MAKE_TAG(DTAG_raw)), temp1) + MOV_L(temp1, REGIND(allocptr))/* store the data pointer */ + ADD_L(CONST(4),allocptr) /* allocptr++ */ + + MOV_L(allocptr, temp1) /* temp1 := data object */ + SAL_L(CONST(2),temp) /* temp := length in bytes */ + ADD_L(temp, allocptr) /* allocptr += length */ + MOV_L(CONST(0),REGOFF(-4,allocptr)) /* zero out the last word */ + + /* allocate the header object */ + MOV_L(CONST(DESC_string), temp) /* header descriptor */ + MOV_L(temp, REGIND(allocptr)) + ADD_L(CONST(4), allocptr) /* allocptr++ */ + MOV_L(temp1, REGIND(allocptr))/* header data field */ + MOV_L(stdarg, REGOFF(4,allocptr)) /* header length field */ + MOV_L(allocptr, stdarg) /* stdarg := header object */ + ADD_L(CONST(8), allocptr) + + POP_L(misc0) /* restore misc0 */ +#undef temp1 + CONTINUE +2: + MOV_L(CONST(REQ_ALLOC_STRING), request_w) + MOVE (stdlink, temp, pc) + JMP(CSYM(set_request)) + +/* create_v_a : int * 'a list -> 'a vector + * creates a vector with elements taken from a list. + * n.b. The frontend ensures that list cannot be nil. + */ +ML_CODE_HDR(create_v_a) + CHECKLIMIT + PUSH_L(misc0) + PUSH_L(misc1) +#define temp1 misc0 +#define temp2 misc1 + MOV_L(REGIND(stdarg),temp) /* temp := length(tagged) */ + MOV_L(temp, temp1) + SAR_L(CONST(1),temp1) /* temp1 := length(untagged) */ + CMP_L(CONST(SMALL_OBJ_SZW),temp1) + JGE(3f) + + + SAL_L(CONST(TAG_SHIFTW),temp1) /* build descriptor in temp1 */ + OR_L(CONST(MAKE_TAG(DTAG_vec_data)),temp1) + MOV_L(temp1,REGIND(allocptr)) /* store descriptor */ + ADD_L(CONST(4),allocptr) /* allocptr++ */ + MOV_L(REGOFF(4,stdarg),temp1) /* temp1 := list */ + MOV_L(allocptr,stdarg) /* stdarg := vector */ + +2: + MOV_L(REGIND(temp1),temp2) /* temp2 := hd(temp1) */ + MOV_L(temp2, REGIND(allocptr)) /* store word in vector */ + ADD_L(CONST(4), allocptr) /* allocptr++ */ + MOV_L(REGOFF(4,temp1),temp1) /* temp1 := tl(temp1) */ + CMP_L(CONST(ML_nil),temp1) /* temp1 = nil? */ + JNE(2b) + + /* allocate header object */ + MOV_L(CONST(DESC_polyvec),temp1)/* descriptor in temp1 */ + MOV_L(temp1, REGIND(allocptr)) /* store descriptor */ + ADD_L(CONST(4),allocptr) /* allocptr++ */ + MOV_L(stdarg, REGIND(allocptr)) /* header data field */ + MOV_L(temp, REGOFF(4,allocptr)) /* header length */ + MOV_L(allocptr, stdarg) /* result = header object */ + ADD_L(CONST(8),allocptr) /* allocptr += 2 */ + + POP_L(misc1) + POP_L(misc0) + CONTINUE +3: + POP_L(misc1) + POP_L(misc0) + MOV_L(CONST(REQ_ALLOC_VECTOR), request_w) + MOVE (stdlink, temp, pc) + JMP(CSYM(set_request)) +#undef temp1 +#undef temp2 + +/* try_lock: spin_lock -> bool. + * low-level test-and-set style primitive for mutual-exclusion among + * processors. For now, we only provide a uni-processor trivial version. + */ +ML_CODE_HDR(try_lock_a) +#if (MAX_PROCS > 1) +# error multiple processors not supported +#else /* (MAX_PROCS == 1) */ + MOV_L(REGIND(stdarg), temp) /* Get old value of lock. */ + MOV_L(CONST(1), REGIND(stdarg)) /* Set the lock to ML_false. */ + MOV_L(temp, stdarg) /* Return old value of lock. */ + CONTINUE +#endif + +/* unlock : releases a spin lock + */ +ML_CODE_HDR(unlock_a) +#if (MAX_PROCS > 1) +# error multiple processors not supported +#else /* (MAX_PROCS == 1) */ + MOV_L(CONST(3), REGIND(stdarg)) /* Store ML_true into lock. */ + MOV_L(CONST(1), stdarg) /* Return unit. */ + CONTINUE +#endif + + +/********************* Floating point functions. *********************/ + +#define FPOP fstp %st /* Pop the floating point register stack. */ + + +/* Temporary storage for the old and new floating point control + word. We don't use the stack to for this, since doing so would + change the offsets of the pseudo-registers. */ + DATA + ALIGN4 +old_controlwd: + .word 0 +new_controlwd: + .word 0 + TEXT + ALIGN4 + +/* + * Initialize the 80387 floating point coprocessor. First, the floating + * point control word is initialized (undefined fields are left + * unchanged). Rounding control is set to "nearest" (although floor_a + * needs "toward negative infinity"). Precision control is set to + * "double". The precision, underflow, denormal + * overflow, zero divide, and invalid operation exceptions + * are masked. Next, seven of the eight available entries on the + * floating point register stack are claimed (see x86/x86.sml). + * + * NB: this cannot trash any registers because it's called from request_fault. + */ +ENTRY(FPEEnable) + FINIT + SUB_L(CONST(4), ESP) /* Temp space. Keep stack aligned. */ + FSTCW(REGIND(ESP)) /* Store FP control word. */ + /* Keep undefined fields, clear others. */ + AND_W(CONST(0xf0c0), REGIND(ESP)) + OR_W(CONST(0x023f), REGIND(ESP)) /* Set fields (see above). */ + FLDCW(REGIND(ESP)) /* Install new control word. */ + ADD_L(CONST(4), ESP) + RET + +/* NOTE: the following code is no longer required, since we are assuming C99 support */ +#ifdef XXXX +#if (defined(OPSYS_LINUX) || defined(OPSYS_CYGWIN) || defined(OPSYS_SOLARIS)) +ENTRY(fegetround) + SUB_L(CONST(4), ESP) /* allocate temporary space */ + FSTCW(REGIND(ESP)) /* store fp control word */ + SAR_L(CONST(10),REGIND(ESP))/* rounding mode is at bit 10 and 11 */ + AND_L(CONST(3), REGIND(ESP))/* mask two bits */ + MOV_L(REGIND(ESP),EAX) /* return rounding mode */ + ADD_L(CONST(4), ESP) /* deallocate space */ + RET + +ENTRY(fesetround) + SUB_L(CONST(4), ESP) /* allocate temporary space */ + FSTCW(REGIND(ESP)) /* store fp control word */ + AND_W(CONST(0xf3ff), REGIND(ESP)) /* Clear rounding field. */ + MOV_L(REGOFF(8,ESP), EAX) /* new rounding mode */ + SAL_L(CONST(10), EAX) /* move to right place */ + OR_L(EAX,REGIND(ESP)) /* new control word */ + FLDCW(REGIND(ESP)) /* load new control word */ + ADD_L(CONST(4), ESP) /* deallocate space */ + RET +#endif +#endif /* XXXX */ + + + +/* floor : real -> int + Return the nearest integer that is less or equal to the argument. + Caller's responsibility to make sure arg is in range. */ + +ML_CODE_HDR(floor_a) + FSTCW(old_controlwd) /* Get FP control word. */ + MOV_W(old_controlwd, AX) + AND_W(CONST(0xf3ff), AX) /* Clear rounding field. */ + OR_W(CONST(0x0400), AX) /* Round towards neg. infinity. */ + MOV_W(AX, new_controlwd) + FLDCW(new_controlwd) /* Install new control word. */ + + FLD_D(REGIND(stdarg)) + SUB_L(CONST(4), ESP) + FISTP_L(REGIND(ESP)) /* Round, store, and pop. */ + POP_L(stdarg) + SAL_L(CONST(1), stdarg) /* Tag the resulting integer. */ + INC_L(stdarg) + + FLDCW(old_controlwd) /* Restore old FP control word. */ + CONTINUE + +/* logb : real -> int + * Extract the unbiased exponent pointed to by stdarg. + * Note: Using fxtract, and fistl does not work for inf's and nan's. + */ +ML_CODE_HDR(logb_a) + MOV_L(REGOFF(4,stdarg),temp) /* msb for little endian arch */ + SAR_L(CONST(20), temp) /* throw out 20 bits */ + AND_L(CONST(0x7ff),temp) /* clear all but 11 low bits */ + SUB_L(CONST(1023), temp) /* unbias */ + SAL_L(CONST(1), temp) /* room for tag bit */ + ADD_L(CONST(1), temp) /* tag bit */ + MOV_L(temp, stdarg) + CONTINUE + + +/* scalb : (real * int) -> real + * Scale the first argument by 2 raised to the second argument. Raise + * Float("underflow") or Float("overflow") as appropriate. + * NB: We assume the first floating point "register" is + * caller-save, so we can use it here (see x86/x86.sml). */ + +ML_CODE_HDR(scalb_a) + CHECKLIMIT + PUSH_L(REGOFF(4,stdarg)) /* Get copy of scalar. */ + SAR_L(CONST(1), REGIND(ESP)) /* Untag it. */ + FILD_L(REGIND(ESP)) /* Load it ... */ +/* fstp %st(1) */ /* ... into 1st FP reg. */ + MOV_L(REGIND(stdarg), temp) /* Get pointer to real. */ + FLD_D(REGIND(temp)) /* Load it into temp. */ + + FSCALE /* Multiply exponent by scalar. */ + MOV_L(CONST(DESC_reald), REGIND(allocptr)) + FSTP_D(REGOFF(4,allocptr)) /* Store resulting float. */ + ADD_L(CONST(4), allocptr) /* Allocate word for tag. */ + MOV_L(allocptr, stdarg) /* Return a pointer to the float. */ + ADD_L(CONST(8), allocptr) /* Allocate room for float. */ + FSTP_D(REGIND(ESP)) + ADD_L(CONST(4), ESP) /* Discard copy of scalar. */ + CONTINUE + +/* end of X86.prim.asm */ diff --git a/base/runtime/mach-dep/Unsupported/X86.prim.masm b/base/runtime/mach-dep/Unsupported/X86.prim.masm new file mode 100644 index 0000000..d67878c --- /dev/null +++ b/base/runtime/mach-dep/Unsupported/X86.prim.masm @@ -0,0 +1,760 @@ +/* X86.prim.masm (MS assembler) + * + * COPYRIGHT (c) 1996 Bell Laboratories, Lucent Technologies + * + * derived from X86.prim.asm + * derived from I386.prim.s, by Mark Leone (mleone@cs.cmu.edu) + * + * new version derived from Lal George's completely rewritten + * X86.prim.asm, by Matthias Blume (blume@cs.uchicago.edu) + */ + +#include "ml-base.h" +#include "asm-base.h" +#include "ml-values.h" +#include "tags.h" +#include "ml-request.h" +#include "reg-mask.h" +#include "ml-limits.h" + +/* + * + * The 386 registers are used as follows: + * + * EAX - temp1 (see the code generator, x86/x86.sml) + * EBX - misc0 + * ECX - misc1 + * EDX - misc2 + * ESI - standard continuation (ml_cont, see ml_state.h) + * EBP - standard argument (ml_arg) + * EDI - free space pointer (ml_allocptr) + * ESP - stack pointer + * EIP - program counter (ml_pc) + */ + + + /* Registers (see x86/x86.sml): */ +#define temp REG(eax) +#define misc0 REG(ebx) +#define misc1 REG(ecx) +#define misc2 REG(edx) +#define stdcont REG(esi) +#define stdarg REG(ebp) +#define allocptr REG(edi) +#define stackptr REG(esp) + + + /* other reg uses */ +#define creturn REG(eax) + +#define REGOFF(o,r) IND_DW_OFF(r,o) + + /* Stack frame (see x86/x86.sml): */ +#define tempmem REGOFF(0,REG(esp)) +#define baseptr REGOFF(4,REG(esp)) +#define exncont REGOFF(8,REG(esp)) +#define limitptr REGOFF(12,REG(esp)) +#define pc REGOFF(16,REG(esp)) +#define unused_1 REGOFF(20,REG(esp)) +#define storeptr REGOFF(24,REG(esp)) +#define varptr REGOFF(28,REG(esp)) +#define start_gc REGOFF(32,REG(esp)) +#define unused_2 REGOFF(36,REG(esp)) +#define eaxSpill REGOFF(40,REG(esp)) /* eax=0 */ +#define ecxSpill REGOFF(44,REG(esp)) /* ecx=1 */ +#define edxSpill REGOFF(48,REG(esp)) /* edx=2 */ +#define ebxSpill REGOFF(52,REG(esp)) /* ebx=3 */ +#define espSpill REGOFF(56,REG(esp)) /* esp=4 */ +#define ebpSpill REGOFF(60,REG(esp)) /* ebp=5 */ +#define esiSpill REGOFF(64,REG(esp)) /* esi=6 */ +#define ediSpill REGOFF(68,REG(esp)) /* edi=7 */ +#define stdlink REGOFF(72,REG(esp)) +#define stdclos REGOFF(76,REG(esp)) + +#define ML_STATE_OFFSET 176 +#define mlstate_ptr REGOFF(ML_STATE_OFFSET,REG(esp)) +#define freg8 184 /* double word aligned */ +#define freg9 192 +#define freg31 368 /* 152 + (31-8)*8 */ +#define fpTempMem 376 /* freg31 + 8 */ +#define SpillAreaStart 512 /* starting offset */ +#define ML_FRAME_SIZE (8192) + +#define via + +.386 +.MODEL FLAT + + DATA + ALIGN4 +WORD32(request_w,0) /* place to put the request code */ + + GLOBAL(CSYM(ML_X86Frame)) +WORD32(CSYM(ML_X86Frame),0) /* ptr to the ml frame (gives C access to limitptr) */ + +WORD32(SavedSP,0) /* Value of stack pointer to restore */ + +#include "mlstate-offsets.h" /** this file is generated **/ + +/* + * 386 function call conventions: + * [true for gcc and dynix3 cc; untested for others] + * + * Caller save registers: eax, ecx, edx + * Callee save registers: ebx, esi, edi, and ebp. + * Floating point state is caller-save. + * Arguments passed on stack. Rightmost argument pushed first. + * Word-sized result returned in %eax. + */ + +#define cresult REG(eax) + +CALLEE_SAVE_M MACRO + PUSHL REG(ebx) + PUSHL REG(esi) + PUSHL REG(edi) + PUSHL REG(ebp) +ENDM +#define CALLEE_SAVE CALLEE_SAVE_M + +CALLEE_RESTORE_M MACRO + POPL REG(ebp) + POPL REG(edi) + POPL REG(esi) + POPL REG(ebx) +ENDM +#define CALLEE_RESTORE CALLEE_RESTORE_M + +/* MOVE copies one memory location to another, using a specified temporary. */ + +EXCHANGE_M MACRO src,tmp,dest + MOVL (src, tmp) + MOVL (tmp, dest) +ENDM +#define MOVE(a,b,c) EXCHANGE_M a, b, c + +CONTINUE_M MACRO + JMP via stdcont +ENDM +#define CONTINUE CONTINUE_M + +CHECKLIMIT_M MACRO + ANON_LAB: + MOVE(stdlink, temp, pc) + CMPL(limitptr, allocptr) + jb FLAB_ANON + CALL CSYM(saveregs) + JMP BLAB_ANON + ANON_LAB: +ENDM +#define CHECKLIMIT CHECKLIMIT_M + +ENTRY_M MACRO id + GLOBAL(CSYM(&id)) + LABEL(CSYM(&id)) +ENDM +#define ENTRY(id) ENTRY_M id + +ML_CODE_HDR_M MACRO name + GLOBAL(CSYM(&name)) + ALIGN4 + LABEL(CSYM(&name)) +ENDM +#define ML_CODE_HDR(name) ML_CODE_HDR_M name + + +/**********************************************************************/ + TEXT + ALIGN4 + +ML_CODE_HDR(sigh_return_a) + MOVL (IMMED(ML_unit),stdlink) + MOVL (IMMED(ML_unit),stdclos) + MOVL (IMMED(ML_unit),pc) + MOVL (IMMED(REQ_SIG_RETURN),request_w) + JMP CSYM(set_request) + +/* sigh_resume: + * Resume execution at the point at which a handler trap occurred. This is a + * standard two-argument function, thus the closure is in ml_cont. + */ + +ENTRY(sigh_resume) + MOVL (IMMED(REQ_SIG_RESUME),request_w) + JMP CSYM(set_request) + +/* pollh_return_a: + * The return continuation for the ML poll handler. + */ +ML_CODE_HDR(pollh_return_a) + MOVL (IMMED(REQ_POLL_RETURN),request_w) + MOVL (IMMED(ML_unit),stdlink) + MOVL (IMMED(ML_unit),stdclos) + MOVL (IMMED(ML_unit),pc) + JMP CSYM(set_request) + +/* pollh_resume: + * Resume execution at the point at which a poll event occurred. + */ +ENTRY(pollh_resume) + MOVL (IMMED(REQ_POLL_RESUME),request_w) + JMP CSYM(set_request) + +ML_CODE_HDR(handle_a) + MOVL (IMMED(REQ_EXN),request_w) + MOVE (stdlink,temp,pc) + JMP CSYM(set_request) + +ML_CODE_HDR(return_a) + MOVL (IMMED(REQ_RETURN),request_w) + MOVL (IMMED(ML_unit),stdlink) + MOVL (IMMED(ML_unit),stdclos) + MOVL (IMMED(ML_unit),pc) + JMP CSYM(set_request) + +/* Request a fault. The floating point coprocessor must be reset + * (thus trashing the FP registers) since we don't know whether a + * value has been pushed into the temporary "register". This is OK + * because no floating point registers will be live at the start of + * the exception handler. + */ +ENTRY(request_fault) + CALL CSYM(FPEEnable) + MOVL (IMMED(REQ_FAULT),request_w) + MOVE (stdlink,temp,pc) + JMP CSYM(set_request) + +/* bind_cfun : (string * string) -> c_function + */ +ML_CODE_HDR(bind_cfun_a) + CHECKLIMIT + MOVL (IMMED(REQ_BIND_CFUN),request_w) + JMP CSYM(set_request) + +ML_CODE_HDR(build_literals_a) + CHECKLIMIT + MOVL (IMMED(REQ_BUILD_LITERALS),request_w) + JMP CSYM(set_request) + +ML_CODE_HDR(callc_a) + CHECKLIMIT + MOVL (IMMED(REQ_CALLC),request_w) + JMP CSYM(set_request) + +ENTRY(saveregs) + POPL pc + MOVL (IMMED(REQ_GC),request_w) + /* fall into set_request */ + +ENTRY(set_request) + /* temp holds mlstate_ptr, valid request in request_w */ + /* Save registers */ + MOVL (mlstate_ptr, temp) + MOVL (allocptr, REGOFF(AllocPtrOffMSP,temp)) + MOVL (stdarg, REGOFF(StdArgOffMSP,temp)) + MOVL (stdcont, REGOFF(StdContOffMSP,temp)) + +#define temp2 allocptr + /* note that we have left ML code */ + MOVL (REGOFF(VProcOffMSP,temp),temp2) + MOVL (IMMED(0), REGOFF(InMLOffVSP,temp2)) + + MOVL (misc0, REGOFF(Misc0OffMSP,temp)) + MOVL (misc1, REGOFF(Misc1OffMSP,temp)) + MOVL (misc2, REGOFF(Misc2OffMSP,temp)) + + /* Save vregs before stack frame is popped. (?? - Blume) */ + MOVE (limitptr,temp2, REGOFF(LimitPtrOffMSP,temp)) + MOVE (exncont, temp2, REGOFF(ExnPtrOffMSP,temp)) + MOVE (stdclos, temp2, REGOFF(StdClosOffMSP,temp)) + MOVE (stdlink, temp2, REGOFF(LinkRegOffMSP,temp)) + MOVE (pc, temp2, REGOFF(PCOffMSP,temp)) + MOVE (storeptr,temp2, REGOFF(StorePtrOffMSP,temp)) + MOVE (varptr, temp2, REGOFF(VarPtrOffMSP,temp)) +#undef temp2 + + /* return val of function is request code */ + MOVL (request_w,creturn) + + /* Pop the stack frame and return to run_ml(). */ + MOVL (SavedSP, REG(esp)) + CALLEE_RESTORE + RET + + TEXT + ALIGN4 + +ENTRY(asm_restoreregs) + MOVL (REGOFF(4,REG(esp)), temp) /* get argument (MLState ptr) */ + CALLEE_SAVE + + MOVL (REG(esp), SavedSP) /* save stack pointer */ + + /* Align on 8 byte boundary. Assumes that the stack starts + * out being at least word aligned. But who knows... */ + + ORL (IMMED(4),REG(esp)) + SUBL (IMMED(4),REG(esp)) + +#define temp2 REG(ebx) + /* Allocate and initialize the ML stack frame. */ + SUBL (IMMED(ML_FRAME_SIZE), REG(esp)) + MOVE (REGOFF(ExnPtrOffMSP,temp), temp2, exncont) + MOVE (REGOFF(LimitPtrOffMSP,temp), temp2, limitptr) + MOVE (REGOFF(StorePtrOffMSP,temp), temp2, storeptr) + MOVE (REGOFF(VarPtrOffMSP,temp), temp2, varptr) + LEA (CSYM(saveregs),temp2) + MOVL (temp2,start_gc) + MOVL (temp,mlstate_ptr) + + /* vregs */ + MOVE (REGOFF(LinkRegOffMSP,temp),temp2,stdlink) + MOVE (REGOFF(StdClosOffMSP,temp),temp2,stdclos) + + /* PC */ + MOVE (REGOFF(PCOffMSP,temp),temp2,pc) +#undef temp2 + + /* Load ML registers */ + MOVL (REGOFF(AllocPtrOffMSP,temp),allocptr) + MOVL (REGOFF(StdContOffMSP,temp),stdcont) + MOVL (REGOFF(StdArgOffMSP,temp),stdarg) + MOVL (REGOFF(Misc0OffMSP,temp),misc0) + MOVL (REGOFF(Misc1OffMSP,temp),misc1) + MOVL (REGOFF(Misc2OffMSP,temp),misc2) + + MOVL(REG(esp),CSYM(ML_X86Frame)) /* frame ptr for signal handler. */ + + PUSHL misc2 /* free up a register */ + PUSHL temp /* save msp temporarily */ + +#define tmpreg misc2 + /* note that we're entering ML */ + MOVL (REGOFF(VProcOffMSP,temp),temp) /* temp is now vsp */ +#define vsp temp + MOVL (IMMED(1),REGOFF(InMLOffVSP,vsp)) + + /* handle signals */ + MOVL (REGOFF(SigsRecvOffVSP,vsp),tmpreg) + CMPL (REGOFF(SigsHandledOffVSP,vsp),tmpreg) +#undef tmpreg + + JNE pending + +restore_and_jmp_ml: + POPL temp /* restore temp to msp */ + POPL misc2 + +jmp_ml: + CMPL (limitptr,allocptr) + JMP (REGOFF(PCOffMSP,temp)) /* jump to ML code */ + +pending: + CMPL (IMMED(0),REGOFF(InSigHandlerOffVSP,vsp)) + JNE restore_and_jmp_ml + + MOVL (IMMED(1),REGOFF(HandlerPendingOffVSP,vsp)) + + /* must restore here because limitptr is on stack */ + POPL temp /* restore temp to msp */ + POPL misc2 + + MOVL (allocptr,limitptr) + JMP jmp_ml +#undef vsp + + +/* ---------------------------------------------------------------------- + * array : (int * 'a) -> 'a array + * Allocate and initialize a new array. This can cause GC. + */ + +ML_CODE_HDR(array_a) + CHECKLIMIT + MOVL (REGOFF(0,stdarg),temp) /* temp := length in words */ + SARL (IMMED(1),temp) /* temp := length untagged */ + CMPL (IMMED(SMALL_OBJ_SZW),temp) /* small object? */ + JGE FLAB(ARRAY_A_LARGE) + +#define temp1 misc0 +#define temp2 misc1 + PUSHL misc0 /* free up misc0 */ + PUSHL misc1 /* free up misc1 */ + + MOVL (temp,temp1) + SALL (IMMED(TAG_SHIFTW),temp1) /* build descriptor */ + ORL (IMMED(MAKE_TAG(DTAG_arr_data)),temp1) + MOVL (temp1,REGOFF(0,allocptr)) /* store descriptor */ + ADDL (IMMED(4),allocptr) /* allocptr++ */ + MOVL (allocptr,temp1) /* temp1 := array data ptr */ + MOVL (REGOFF(4,stdarg),temp2) /* temp2 := initial value */ +ANON_LAB: + MOVL (temp2,REGOFF(0,allocptr)) /* init array */ + ADDL (IMMED(4),allocptr) + SUBL (IMMED(1),temp) + JNE BLAB_ANON + + /* Allocate array header */ + MOVL (IMMED(DESC_polyarr),REGOFF(0,allocptr)) /* descriptor */ + ADDL (IMMED(4),allocptr) + MOVL (REGOFF(0,stdarg),temp) /* temp := length */ + MOVL (allocptr, stdarg) /* result := header addr */ + MOVL (temp1, REGOFF(0,allocptr)) /* store pointer to data */ + MOVL (temp, REGOFF(4,allocptr)) /* store length */ + ADDL (IMMED(8),allocptr) + + POPL misc1 + POPL misc0 + CONTINUE +#undef temp1 +#undef temp2 + +LABEL(ARRAY_A_LARGE) + MOVL (IMMED(REQ_ALLOC_ARRAY),request_w) + MOVE (stdlink,temp,pc) + JMP CSYM(set_request) + + +/* create_r : int -> realarray */ +ML_CODE_HDR(create_r_a) + CHECKLIMIT +#define temp1 misc0 + PUSHL misc0 /* free temp1 */ + MOVL (stdarg,temp) /* temp := length */ + SARL (IMMED(1),temp) /* temp := untagged length */ + SHLL (IMMED(1),temp) /* temp := length in words */ + CMPL (IMMED(SMALL_OBJ_SZW),temp) + JGE FLAB_ANON + + ORL (IMMED(4),allocptr) /* align allocptr */ + + /* allocate the data object */ + MOVL (temp,temp1) + SHLL (IMMED(TAG_SHIFTW),temp1) /* temp1 := descriptor */ + ORL (IMMED(MAKE_TAG(DTAG_raw64)),temp1) + MOVL (temp1,REGOFF(0,allocptr)) /* store descriptor */ + ADDL (IMMED(4),allocptr) /* allocptr++ */ + MOVL (allocptr,temp1) /* temp1 := data object */ + SHLL (IMMED(2),temp) /* temp := length in bytes */ + ADDL (temp,allocptr) /* allocptr += length */ + + /* allocate the header object */ + MOVL (IMMED(DESC_real64arr),REGOFF(0,allocptr)) + ADDL (IMMED(4),allocptr) /* allocptr++ */ + MOVL (temp1,REGOFF(0,allocptr)) /* header data */ + MOVL (stdarg,REGOFF(4,allocptr)) /* header length */ + MOVL (allocptr,stdarg) /* stdarg := header obj */ + ADDL (IMMED(8),allocptr) /* allocptr += 2 */ + + POPL misc0 + CONTINUE + +ANON_LAB: + POPL misc0 + MOVL (IMMED(REQ_ALLOC_REALDARRAY),request_w) + MOVE (stdlink,temp,pc) + JMP CSYM(set_request) +#undef temp1 + + +/* create_b : int -> bytearray */ +ML_CODE_HDR(create_b_a) + CHECKLIMIT + MOVL (stdarg,temp) /* temp is tagged length */ + SARL (IMMED(1),temp) /* temp is untagged length */ + ADDL (IMMED(3),temp) + SARL (IMMED(2),temp) /* temp is length in words */ + CMPL (IMMED(SMALL_OBJ_SZW),temp) + JMP FLAB_ANON + JGE FLAB_ANON /* XXXXX */ + +#define temp1 misc0 + PUSHL misc0 + + /* allocate the data object */ + MOVL (temp,temp1) + SHLL (IMMED(TAG_SHIFTW),temp1) + ORL (IMMED(MAKE_TAG(DTAG_raw)),temp1) + MOVL (temp1,REGOFF(0,allocptr)) /* store descriptor */ + ADDL (IMMED(4),allocptr) + MOVL (allocptr,temp1) /* temp1 is data object */ + SHLL (IMMED(2),temp) /* temp is size in bytes */ + ADDL (temp,allocptr) /* allocptr += length */ + + /* allocate the header object */ + MOVL (IMMED(DESC_word8arr),REGOFF(0,allocptr)) + ADDL (IMMED(4),allocptr) + MOVL (temp1,REGOFF(0,allocptr)) + MOVL (stdarg,REGOFF(4,allocptr)) + MOVL (allocptr,stdarg) /* stdarg := header */ + ADDL (IMMED(8),allocptr) /* allocptr += 2 */ + POPL misc0 + CONTINUE +#undef temp1 + +ANON_LAB: + MOVL (IMMED(REQ_ALLOC_BYTEARRAY),request_w) + MOVE (stdlink,temp,pc) + JMP CSYM(set_request) + + +/* create_s : int -> string */ +ML_CODE_HDR(create_s_a) + CHECKLIMIT + MOVL (stdarg,temp) + SARL (IMMED(1),temp) /* untag */ + ADDL (IMMED(4),temp) /* 3 + extra byte */ + SARL (IMMED(2),temp) /* length in words */ + CMPL (IMMED(SMALL_OBJ_SZW),temp) + JGE FLAB_ANON + + PUSHL misc0 +#define temp1 misc0 + + MOVL (temp,temp1) + SHLL (IMMED(TAG_SHIFTW),temp1) + ORL (IMMED(MAKE_TAG(DTAG_raw)),temp1) + MOVL (temp1,REGOFF(0,allocptr)) /* store descriptor */ + ADDL (IMMED(4),allocptr) + + MOVL (allocptr,temp1) /* temp1 is data obj */ + SHLL (IMMED(2),temp) /* bytes len */ + ADDL (temp,allocptr) /* allocptr += length */ + MOVL (IMMED(0),REGOFF((-4),allocptr)) /* zero out last word */ + + /* allocate header obj */ + MOVL (IMMED(DESC_string),temp) /* hdr descr */ + MOVL (temp,REGOFF(0,allocptr)) + ADDL (IMMED(4),allocptr) + MOVL (temp1,REGOFF(0,allocptr)) /* hdr data */ + MOVL (stdarg,REGOFF(4,allocptr)) /* hdr length */ + MOVL (allocptr, stdarg) /* stdarg is hdr obj */ + ADDL (IMMED(8),allocptr) /* allocptr += 2 */ + + POPL misc0 +#undef temp1 + CONTINUE + +ANON_LAB: + MOVL (IMMED(REQ_ALLOC_STRING),request_w) + MOVE (stdlink, temp, pc) + JMP CSYM(set_request) + + +/* create_v_a : int * 'a list -> 'a vector + * creates a vector with elements taken from a list. + * n.b. The frontend ensures that list cannot be nil. + */ +ML_CODE_HDR(create_v_a) + CHECKLIMIT + PUSHL misc0 + PUSHL misc1 +#define temp1 misc0 +#define temp2 misc1 + MOVL (REGOFF(0,stdarg),temp) /* len tagged */ + MOVL (temp,temp1) + SARL (IMMED(1),temp1) /* untag */ + CMPL (IMMED(SMALL_OBJ_SZW),temp1) + JGE FLAB(CREATE_V_A_LARGE) + + SHLL (IMMED(TAG_SHIFTW),temp1) + ORL (IMMED(MAKE_TAG(DTAG_vec_data)),temp1) + MOVL (temp1,REGOFF(0,allocptr)) + ADDL (IMMED(4),allocptr) + MOVL (REGOFF(4,stdarg),temp1) /* temp1 is list */ + MOVL (allocptr,stdarg) /* stdarg is vector */ + +ANON_LAB: + MOVL (REGOFF(0,temp1),temp2) /* hd */ + MOVL (temp2,REGOFF(0,allocptr)) /* store into vector */ + ADDL (IMMED(4),allocptr) + MOVL (REGOFF(4,temp1),temp1) /* tl */ + CMPL (IMMED(ML_nil),temp1) /* isNull */ + JNE BLAB_ANON + + /* allocate header object */ + MOVL (IMMED(DESC_polyvec),temp1) + MOVL (temp1,REGOFF(0,allocptr)) + ADDL (IMMED(4),allocptr) + MOVL (stdarg,REGOFF(0,allocptr)) /* data */ + MOVL (temp,REGOFF(4,allocptr)) /* len */ + MOVL (allocptr,stdarg) /* result */ + ADDL (IMMED(8),allocptr) /* allocptr += 2 */ + + POPL misc1 + POPL misc0 + CONTINUE + +LABEL(CREATE_V_A_LARGE) + POPL misc1 + POPL misc0 + MOVL (IMMED(REQ_ALLOC_VECTOR),request_w) + MOVE (stdlink, temp, pc) + JMP CSYM(set_request) +#undef temp1 +#undef temp2 + + +/* try_lock: spin_lock -> bool. + * low-level test-and-set style primitive for mutual-exclusion among + * processors. For now, we only provide a uni-processor trivial version. + */ +ML_CODE_HDR(try_lock_a) +#if (MAX_PROCS > 1) +# error multiple processors not supported +#else /* (MAX_PROCS == 1) */ + MOVL ((stdarg), temp) /* Get old value of lock. */ + MOVL (IMMED(1), (stdarg)) /* Set the lock to ML_false. */ + MOVL (temp, stdarg) /* Return old value of lock. */ + CONTINUE +#endif + +/* unlock : releases a spin lock + */ +ML_CODE_HDR(unlock_a) +#if (MAX_PROCS > 1) +# error multiple processors not supported +#else /* (MAX_PROCS == 1) */ + MOVL (IMMED(3), (stdarg)) /* Store ML_true into lock. */ + MOVL (IMMED(1), stdarg) /* Return unit. */ + CONTINUE +#endif + + +/********************* Floating point functions. *********************/ + +#define FPOP fstp FP_REG(st) /* Pop the floating point register stack. */ + +/* Temporary storage for the old and new floating point control + word. We don't use the stack to for this, since doing so would + change the offsets of the pseudo-registers. */ + DATA + ALIGN4 +GLOBAL(old_controlwd) +WORD16(old_controlwd,0) +GLOBAL(new_controlwd) +WORD16(new_controlwd,0) + + TEXT + ALIGN4 + +/* + * Initialize the 80387 floating point coprocessor. First, the floating + * point control word is initialized (undefined fields are left + * unchanged). Rounding control is set to "nearest" (although floor_a + * needs "toward negative infinity"). Precision control is set to + * "double". The precision, underflow, denormal + * overflow, zero divide, and invalid operation exceptions + * are masked. Next, seven of the eight available entries on the + * floating point register stack are claimed (see x86/x86.sml). + * + * NB: this cannot trash any registers because it's called from request_fault. + */ +ENTRY(FPEEnable) + finit + /* Temp space.Keep stack aligned. */ + SUBL (IMMED(4), REG(esp)) + /* Store FP control word. */ + fstcw IND_W_OFF(REG(esp),0) + /* Keep undefined fields, clear others. */ + ANDW (IMMED(HEXLIT(f0c0)), REGOFF(0,REG(esp))) + /* Set fields (see above). */ + ORW (IMMED(HEXLIT(023f)), REGOFF(0,REG(esp))) + fldcw IND_W_OFF(REG(esp),0) /* Install new control word. */ + ADDL (IMMED(4), REG(esp)) + RET + +ENTRY(fegetround) + SUBL (IMMED(4),REG(esp)) /* allocate temporary space */ + FSTCW IND_W_OFF(REG(esp),0) /* store fp control word */ + /* rounding mode is at bit 10 and 11 */ + SARL (IMMED(10),REGOFF(0,REG(esp))) + ANDL (IMMED(3),REGOFF(0,REG(esp))) /* mask two bits */ + MOVL (REGOFF(0,REG(esp)),REG(eax)) /* return rounding mode */ + ADDL (IMMED(4),REG(esp)) /* deallocate space */ + RET + +ENTRY(fesetround) + SUBL (IMMED(4),REG(esp)) /* allocate temporary space */ + FSTCW IND_W_OFF(REG(esp),0) /* store fp control word */ + /* Clear rounding field. */ + ANDW (IMMED(HEXLIT(f3ff)),REGOFF(0,REG(esp))) + MOVL (REGOFF(8,REG(esp)),REG(eax)) /* new rounding mode */ + SALL (IMMED(10),REG(eax)) /* move to right place */ + ORL (REG(eax),REGOFF(0,REG(esp))) /* new control word */ + FLDCW IND_W_OFF(REG(esp),0) /* load new control word */ + ADDL (IMMED(4),REG(esp)) /* deallocate space */ + RET + + +/* floor : real -> int + Return the nearest integer that is less or equal to the argument. + Caller's responsibility to make argument in range. */ + +ML_CODE_HDR(floor_a) + /* Get FP control word. */ + fstcw old_controlwd + MOVW (old_controlwd,REG(ax)) + /* Clear rounding field. */ + ANDW (IMMED(HEXLIT(f3ff)), REG(ax)) + /* Round towards neg. infinity. */ + ORW (IMMED(HEXLIT(0400)), REG(ax)) + MOVW (REG(ax), new_controlwd) + fldcw new_controlwd /* Install new control word. */ + + fld REAL8 PTR 0 [stdarg] + SUBL (IMMED(4),REG(esp)) + FISTPL REGOFF(0,REG(esp)) + POPL stdarg + SALL (IMMED(1),stdarg) + INCL stdarg + + FLDCW old_controlwd + CONTINUE + + +/* logb : real -> int + * Extract the unbiased exponent pointed to by stdarg. + * Note: Using fxtract, and fistl does not work for inf's and nan's. + */ +ML_CODE_HDR(logb_a) + MOVL (REGOFF(4,stdarg),temp) /* msb for little endian arch */ + SARL (IMMED(20),temp) /* throw out 20 bits */ + ANDL (IMMED(HEXLIT(7ff)),temp) /* clear all but 11 low bits */ + SUBL (IMMED(1023),temp) /* unbias */ + SALL (IMMED(1),temp) /* room for tag bit */ + ADDL (IMMED(1),temp) /* tag bit */ + MOVL (temp,stdarg) + CONTINUE + + +/* scalb : (real * int) -> real + * Scale the first argument by 2 raised to the second argument. Raise + * Float("underflow") or Float("overflow") as appropriate. + * NB: We assume the first floating point "register" is + * caller-save, so we can use it here (see x86/x86.sml). */ + +ML_CODE_HDR(scalb_a) + CHECKLIMIT + PUSHL REGOFF(4,stdarg) /* Get copy of scalar. */ + SARL (IMMED(1),REGOFF(0,REG(esp))) /* Untag it. */ + FILDL REGOFF(0,REG(esp)) /* Load it ... */ +/* fstp FP_REG(st)(1) */ /* ... into 1st FP reg. */ +/* ADDL (IMMED(4), REG(esp)) */ /* Discard copy of scalar. */ + + MOVL (REGOFF(0,stdarg), temp) /* Get pointer to real. */ + fld REAL8 PTR 0 [temp] /* Load it into temp. */ + + fscale /* Multiply exponent by scalar. */ + MOVL (IMMED(DESC_reald), REGOFF(0,allocptr)) + fstp REAL8 PTR 4 [allocptr] /* Store resulting float. */ + ADDL (IMMED(4),allocptr) /* Allocate word for tag. */ + MOVL (allocptr, stdarg) /* Return a pointer to the float. */ + ADDL (IMMED(8), allocptr) /* Allocate room for float. */ + fstp REAL8 PTR 0 [esp] /* ?? */ + ADDL (IMMED(4),REG(esp)) /* discard copy of scalar */ + CONTINUE + +END + +/* end of X86.prim.masm (MS assembler) */ diff --git a/base/runtime/mach-dep/Unsupported/assyntax.h b/base/runtime/mach-dep/Unsupported/assyntax.h new file mode 100644 index 0000000..4b500c3 --- /dev/null +++ b/base/runtime/mach-dep/Unsupported/assyntax.h @@ -0,0 +1,1688 @@ + +#ifndef __ASSYNTAX_H__ +#define __ASSYNTAX_H__ + +/* + * Copyright 1992 Vrije Universiteit, The Netherlands + * + * Permission to use, copy, modify, and distribute this software and its + * documentation for any purpose and without fee is hereby granted, provided + * that the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of the Vrije Universiteit not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. The Vrije Universiteit makes no + * representations about the suitability of this software for any purpose. + * It is provided "as is" without express or implied warranty. + * + * The Vrije Universiteit DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, + * IN NO EVENT SHALL The Vrije Universiteit BE LIABLE FOR ANY SPECIAL, + * INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM + * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE + * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + */ +/* $XFree86: xc/extras/Mesa/src/X86/assyntax.h,v 1.10 2002/03/05 20:27:24 dawes Exp $ */ + +/* + * assyntax.h + * + * Select the syntax appropriate to the 386 assembler being used + * To add support for more assemblers add more columns to the CHOICE + * macro. Note that register names must also have uppercase names + * to avoid macro recursion. e.g., #define ah %ah recurses! + * + * NB 1. Some of the macros for certain assemblers imply that the code is to + * run in protected mode!! Caveat emptor. + * + * NB 2. 486 specific instructions are not included. This is to discourage + * their accidental use in code that is intended to run on 386 and 486 + * systems. + * + * Supported assemblers: + * + * (a) AT&T SysVr4 as(1): define ATT_ASSEMBLER + * (b) GNU Assembler gas: define GNU_ASSEMBLER (default) + * (c) Amsterdam Compiler kit: define ACK_ASSEMBLER + * (d) The Netwide Assembler: define NASM_ASSEMBLER + * (e) Microsoft Assembler: define MASM_ASSEMBLER (UNTESTED!) + * + * The following naming conventions have been used to identify the various + * data types: + * _SR = segment register version + * Integer: + * _Q = quadword = 64 bits + * _L = long = 32 bits + * _W = short = 16 bits + * _B = byte = 8 bits + * Floating-point: + * _X = m80real = 80 bits + * _D = double = 64 bits + * _S = single = 32 bits + * + * Author: Gregory J. Sharp, Sept 1992 + * Vrije Universiteit, Amsterdam, The Netherlands + * + * [support for Intel syntax added by Josh Vanderhoof, 1999] + */ + +#if !(defined(NASM_ASSEMBLER) || defined(MASM_ASSEMBLER)) + +/* Default to ATT_ASSEMBLER when SVR4 or SYSV are defined */ +#if (defined(SVR4) || defined(SYSV)) && !defined(GNU_ASSEMBLER) +#define ATT_ASSEMBLER +#endif + +#if !defined(ATT_ASSEMBLER) && !defined(GNU_ASSEMBLER) && !defined(ACK_ASSEMBLER) +#define GNU_ASSEMBLER +#endif + +#if (defined(__STDC__) && !defined(UNIXCPP)) || (defined (sun) && defined (i386) && defined (SVR4) && defined (__STDC__) && !defined (__GNUC__)) +#define CONCAT(x, y) x ## y +#else +#define CONCAT(x, y) x/**/y +#endif + +#ifdef ACK_ASSEMBLER + +/* Assume we write code for 32-bit protected mode! */ + +/* Redefine register names for GAS & AT&T assemblers */ +#define AL al +#define AH ah +#define AX ax +#define EAX ax +#define BL bl +#define BH bh +#define BX bx +#define EBX bx +#define CL cl +#define CH ch +#define CX cx +#define ECX cx +#define DL dl +#define DH dh +#define DX dx +#define EDX dx +#define BP bp +#define EBP bp +#define SI si +#define ESI si +#define DI di +#define EDI di +#define SP sp +#define ESP sp +#define CS cs +#define SS ss +#define DS ds +#define ES es +#define FS fs +#define GS gs +/* Control Registers */ +#define CR0 cr0 +#define CR1 cr1 +#define CR2 cr2 +#define CR3 cr3 +/* Debug Registers */ +#define DR0 dr0 +#define DR1 dr1 +#define DR2 dr2 +#define DR3 dr3 +#define DR4 dr4 +#define DR5 dr5 +#define DR6 dr6 +#define DR7 dr7 +/* Floating-point Stack */ +#define ST st + +#define AS_BEGIN .sect .text; .sect .rom; .sect .data; .sect .bss; .sect .text + + +#define _WTOG o16 /* word toggle for _W instructions */ +#define _LTOG /* long toggle for _L instructions */ +#define ADDR_TOGGLE a16 +#define OPSZ_TOGGLE o16 +#define USE16 .use16 +#define USE32 .use32 + +#define CHOICE(a,b,c) c + +#else /* AT&T or GAS */ + +/* Redefine register names for GAS & AT&T assemblers */ +#define AL %al +#define AH %ah +#define AX %ax +#define EAX %eax +#define BL %bl +#define BH %bh +#define BX %bx +#define EBX %ebx +#define CL %cl +#define CH %ch +#define CX %cx +#define ECX %ecx +#define DL %dl +#define DH %dh +#define DX %dx +#define EDX %edx +#define BP %bp +#define EBP %ebp +#define SI %si +#define ESI %esi +#define DI %di +#define EDI %edi +#define SP %sp +#define ESP %esp +#define CS %cs +#define SS %ss +#define DS %ds +#define ES %es +#define FS %fs +#define GS %gs +/* Control Registers */ +#define CR0 %cr0 +#define CR1 %cr1 +#define CR2 %cr2 +#define CR3 %cr3 +/* Debug Registers */ +#define DR0 %db0 +#define DR1 %db1 +#define DR2 %db2 +#define DR3 %db3 +#define DR4 %db4 +#define DR5 %db5 +#define DR6 %db6 +#define DR7 %db7 +/* Floating-point Stack */ +#define _STX0 %st(0) +#define _STX1 %st(1) +#define _STX2 %st(2) +#define _STX3 %st(3) +#define _STX4 %st(4) +#define _STX5 %st(5) +#define _STX6 %st(6) +#define _STX7 %st(7) +#define ST(x) CONCAT(_STX,x) +#ifdef GNU_ASSEMBLER +#define ST0 %st(0) +#else +#define ST0 %st +#endif +/* MMX Registers */ +#define MM0 %mm0 +#define MM1 %mm1 +#define MM2 %mm2 +#define MM3 %mm3 +#define MM4 %mm4 +#define MM5 %mm5 +#define MM6 %mm6 +#define MM7 %mm7 +/* SSE Registers */ +#define XMM0 %xmm0 +#define XMM1 %xmm1 +#define XMM2 %xmm2 +#define XMM3 %xmm3 +#define XMM4 %xmm4 +#define XMM5 %xmm5 +#define XMM6 %xmm6 +#define XMM7 %xmm7 + +#define AS_BEGIN +#define USE16 +#define USE32 + +#ifdef GNU_ASSEMBLER + +#define ADDR_TOGGLE aword +#define OPSZ_TOGGLE word + +#define CHOICE(a,b,c) b + +#else +/* + * AT&T ASSEMBLER SYNTAX + * ********************* + */ +#define CHOICE(a,b,c) a + +#define ADDR_TOGGLE addr16 +#define OPSZ_TOGGLE data16 + +#endif /* GNU_ASSEMBLER */ +#endif /* ACK_ASSEMBLER */ + + +#if defined(__QNX__) || defined(Lynx) || (defined(SYSV) || defined(SVR4)) && !defined(ACK_ASSEMBLER) || defined(__ELF__) || defined(__GNU__) || defined(__GNUC__) +#define GLNAME(a) a +#else +#define GLNAME(a) CONCAT(_,a) +#endif + + + /****************************************/ + /* */ + /* Select the various choices */ + /* */ + /****************************************/ + + +/* Redefine assembler directives */ +/*********************************/ +#define GLOBL CHOICE(.globl, .globl, .extern) +#define GLOBAL GLOBL +#define EXTERN GLOBL +/* +#define ALIGNTEXT32 CHOICE(.align 32, .align ARG2(5,0x90), .align 32) +*/ +#define ALIGNTEXT32 CHOICE(.align 32, .balign 32, .align 32) +#define ALIGNTEXT16 CHOICE(.align 16, .balign 16, .align 16) +#define ALIGNTEXT8 CHOICE(.align 8, .balign 8, .align 8) +#define ALIGNTEXT4 CHOICE(.align 4, .balign 4, .align 4) +#define ALIGNTEXT2 CHOICE(.align 2, .balign 2, .align 2) +/* ALIGNTEXT4ifNOP is the same as ALIGNTEXT4, but only if the space is + * guaranteed to be filled with NOPs. Otherwise it does nothing. + */ +#define ALIGNTEXT32ifNOP CHOICE(.align 32, .balign ARG2(32,0x90), /*can't do it*/) +#define ALIGNTEXT16ifNOP CHOICE(.align 16, .balign ARG2(16,0x90), /*can't do it*/) +#define ALIGNTEXT8ifNOP CHOICE(.align 8, .balign ARG2(8,0x90), /*can't do it*/) +#define ALIGNTEXT4ifNOP CHOICE(.align 4, .balign ARG2(4,0x90), /*can't do it*/) +#define ALIGNDATA32 CHOICE(.align 32, .balign ARG2(32,0x0), .align 32) +#define ALIGNDATA16 CHOICE(.align 16, .balign ARG2(16,0x0), .align 16) +#define ALIGNDATA8 CHOICE(.align 8, .balign ARG2(8,0x0), .align 8) +#define ALIGNDATA4 CHOICE(.align 4, .balign ARG2(4,0x0), .align 4) +#define ALIGNDATA2 CHOICE(.align 2, .balign ARG2(2,0x0), .align 2) +#define FILE(s) CHOICE(.file s, .file s, .file s) +#define STRING(s) CHOICE(.string s, .asciz s, .asciz s) +#define D_LONG CHOICE(.long, .long, .data4) +#define D_WORD CHOICE(.value, .short, .data2) +#define D_BYTE CHOICE(.byte, .byte, .data1) +#define SPACE CHOICE(.comm, .space, .space) +#define COMM CHOICE(.comm, .comm, .comm) +#define SEG_DATA CHOICE(.data, .data, .sect .data) +#define SEG_TEXT CHOICE(.text, .text, .sect .text) +#define SEG_BSS CHOICE(.bss, .bss, .sect .bss) + +#ifdef GNU_ASSEMBLER +#define D_SPACE(n) . = . + n +#else +#define D_SPACE(n) .space n +#endif + +/* Addressing Modes */ +/* Immediate Mode */ +#define ADDR(a) CHOICE(CONCAT($,a), CONCAT($,a), a) +#define CONST(a) CHOICE(CONCAT($,a), CONCAT($,a), a) + +/* Indirect Mode */ +#define CONTENT(a) CHOICE(a, a, (a)) /* take contents of variable */ +#define REGIND(a) CHOICE((a), (a), (a)) /* Register a indirect */ +/* Register b indirect plus displacement a */ +#define REGOFF(a, b) CHOICE(a(b), a(b), a(b)) +/* Reg indirect Base + Index + Displacement - this is mainly for 16-bit mode + * which has no scaling + */ +#define REGBID(b,i,d) CHOICE(d(b,i), d(b,i), d(b)(i)) +/* Reg indirect Base + (Index * Scale) */ +#define REGBIS(b,i,s) CHOICE((b,i,s), (b,i,s), (b)(i*s)) +/* Reg indirect Base + (Index * Scale) + Displacement */ +#define REGBISD(b,i,s,d) CHOICE(d(b,i,s), d(b,i,s), d(b)(i*s)) +/* Displaced Scaled Index: */ +#define REGDIS(d,i,s) CHOICE(d(,i,s), d(,i,s), d(i * s)) +/* Indexed Base: */ +#define REGBI(b,i) CHOICE((b,i), (b,i), (b)(i)) +/* Displaced Base: */ +#define REGDB(d,b) CHOICE(d(b), d(b), d(b)) +/* Variable indirect: */ +#define VARINDIRECT(var) CHOICE(*var, *var, (var)) +/* Use register contents as jump/call target: */ +#define CODEPTR(reg) CHOICE(*reg, *reg, reg) + +/* For expressions requiring bracketing + * eg. (CRT0_PM | CRT_EM) + */ + +#define EXPR(a) CHOICE([a], (a), [a]) +#define ENOT(a) CHOICE(0!a, ~a, ~a) +#define EMUL(a,b) CHOICE(a\*b, a*b, a*b) +#define EDIV(a,b) CHOICE(a\/b, a/b, a/b) + +/* + * We have to beat the problem of commas within arguments to choice. + * eg. choice (add a,b, add b,a) will get argument mismatch. Luckily ANSI + * and other known cpp definitions evaluate arguments before substitution + * so the following works. + */ +#define ARG2(a, b) a,b +#define ARG3(a,b,c) a,b,c + +/* Redefine assembler commands */ +#define AAA CHOICE(aaa, aaa, aaa) +#define AAD CHOICE(aad, aad, aad) +#define AAM CHOICE(aam, aam, aam) +#define AAS CHOICE(aas, aas, aas) +#define ADC_L(a, b) CHOICE(adcl ARG2(a,b), adcl ARG2(a,b), _LTOG adc ARG2(b,a)) +#define ADC_W(a, b) CHOICE(adcw ARG2(a,b), adcw ARG2(a,b), _WTOG adc ARG2(b,a)) +#define ADC_B(a, b) CHOICE(adcb ARG2(a,b), adcb ARG2(a,b), adcb ARG2(b,a)) +#define ADD_L(a, b) CHOICE(addl ARG2(a,b), addl ARG2(a,b), _LTOG add ARG2(b,a)) +#define ADD_W(a, b) CHOICE(addw ARG2(a,b), addw ARG2(a,b), _WTOG add ARG2(b,a)) +#define ADD_B(a, b) CHOICE(addb ARG2(a,b), addb ARG2(a,b), addb ARG2(b,a)) +#define AND_L(a, b) CHOICE(andl ARG2(a,b), andl ARG2(a,b), _LTOG and ARG2(b,a)) +#define AND_W(a, b) CHOICE(andw ARG2(a,b), andw ARG2(a,b), _WTOG and ARG2(b,a)) +#define AND_B(a, b) CHOICE(andb ARG2(a,b), andb ARG2(a,b), andb ARG2(b,a)) +#define ARPL(a,b) CHOICE(arpl ARG2(a,b), arpl ARG2(a,b), arpl ARG2(b,a)) +#define BOUND_L(a, b) CHOICE(boundl ARG2(a,b), boundl ARG2(b,a), _LTOG bound ARG2(b,a)) +#define BOUND_W(a, b) CHOICE(boundw ARG2(a,b), boundw ARG2(b,a), _WTOG bound ARG2(b,a)) +#define BSF_L(a, b) CHOICE(bsfl ARG2(a,b), bsfl ARG2(a,b), _LTOG bsf ARG2(b,a)) +#define BSF_W(a, b) CHOICE(bsfw ARG2(a,b), bsfw ARG2(a,b), _WTOG bsf ARG2(b,a)) +#define BSR_L(a, b) CHOICE(bsrl ARG2(a,b), bsrl ARG2(a,b), _LTOG bsr ARG2(b,a)) +#define BSR_W(a, b) CHOICE(bsrw ARG2(a,b), bsrw ARG2(a,b), _WTOG bsr ARG2(b,a)) +#define BT_L(a, b) CHOICE(btl ARG2(a,b), btl ARG2(a,b), _LTOG bt ARG2(b,a)) +#define BT_W(a, b) CHOICE(btw ARG2(a,b), btw ARG2(a,b), _WTOG bt ARG2(b,a)) +#define BTC_L(a, b) CHOICE(btcl ARG2(a,b), btcl ARG2(a,b), _LTOG btc ARG2(b,a)) +#define BTC_W(a, b) CHOICE(btcw ARG2(a,b), btcw ARG2(a,b), _WTOG btc ARG2(b,a)) +#define BTR_L(a, b) CHOICE(btrl ARG2(a,b), btrl ARG2(a,b), _LTOG btr ARG2(b,a)) +#define BTR_W(a, b) CHOICE(btrw ARG2(a,b), btrw ARG2(a,b), _WTOG btr ARG2(b,a)) +#define BTS_L(a, b) CHOICE(btsl ARG2(a,b), btsl ARG2(a,b), _LTOG bts ARG2(b,a)) +#define BTS_W(a, b) CHOICE(btsw ARG2(a,b), btsw ARG2(a,b), _WTOG bts ARG2(b,a)) +#define CALL(a) CHOICE(call a, call a, call a) +#define CALLF(s,a) CHOICE(lcall ARG2(s,a), lcall ARG2(s,a), callf s:a) +#define CBW CHOICE(cbtw, cbw, cbw) +#define CWDE CHOICE(cwtd, cwde, cwde) +#define CLC CHOICE(clc, clc, clc) +#define CLD CHOICE(cld, cld, cld) +#define CLI CHOICE(cli, cli, cli) +#define CLTS CHOICE(clts, clts, clts) +#define CMC CHOICE(cmc, cmc, cmc) +#define CMP_L(a, b) CHOICE(cmpl ARG2(a,b), cmpl ARG2(a,b), _LTOG cmp ARG2(b,a)) +#define CMP_W(a, b) CHOICE(cmpw ARG2(a,b), cmpw ARG2(a,b), _WTOG cmp ARG2(b,a)) +#define CMP_B(a, b) CHOICE(cmpb ARG2(a,b), cmpb ARG2(a,b), cmpb ARG2(b,a)) +#define CMPS_L CHOICE(cmpsl, cmpsl, _LTOG cmps) +#define CMPS_W CHOICE(cmpsw, cmpsw, _WTOG cmps) +#define CMPS_B CHOICE(cmpsb, cmpsb, cmpsb) +#define CWD CHOICE(cwtl, cwd, cwd) +#define CDQ CHOICE(cltd, cdq, cdq) +#define DAA CHOICE(daa, daa, daa) +#define DAS CHOICE(das, das, das) +#define DEC_L(a) CHOICE(decl a, decl a, _LTOG dec a) +#define DEC_W(a) CHOICE(decw a, decw a, _WTOG dec a) +#define DEC_B(a) CHOICE(decb a, decb a, decb a) +#define DIV_L(a) CHOICE(divl a, divl a, div a) +#define DIV_W(a) CHOICE(divw a, divw a, div a) +#define DIV_B(a) CHOICE(divb a, divb a, divb a) +#define ENTER(a,b) CHOICE(enter ARG2(a,b), enter ARG2(a,b), enter ARG2(b,a)) +#define HLT CHOICE(hlt, hlt, hlt) +#define IDIV_L(a) CHOICE(idivl a, idivl a, _LTOG idiv a) +#define IDIV_W(a) CHOICE(idivw a, idivw a, _WTOG idiv a) +#define IDIV_B(a) CHOICE(idivb a, idivb a, idivb a) +/* More forms than this for imul!! */ +#define IMUL_L(a, b) CHOICE(imull ARG2(a,b), imull ARG2(a,b), _LTOG imul ARG2(b,a)) +#define IMUL_W(a, b) CHOICE(imulw ARG2(a,b), imulw ARG2(a,b), _WTOG imul ARG2(b,a)) +#define IMUL_B(a) CHOICE(imulb a, imulb a, imulb a) +#define IN_L CHOICE(inl (DX), inl ARG2(DX,EAX), _LTOG in DX) +#define IN_W CHOICE(inw (DX), inw ARG2(DX,AX), _WTOG in DX) +#define IN_B CHOICE(inb (DX), inb ARG2(DX,AL), inb DX) +/* Please AS code writer: use the following ONLY, if you refer to ports<256 + * directly, but not in IN1_W(DX), for instance, even if IN1_ looks nicer + */ +#if defined (sun) +#define IN1_L(a) CHOICE(inl (a), inl ARG2(a,EAX), _LTOG in a) +#define IN1_W(a) CHOICE(inw (a), inw ARG2(a,AX), _WTOG in a) +#define IN1_B(a) CHOICE(inb (a), inb ARG2(a,AL), inb a) +#else +#define IN1_L(a) CHOICE(inl a, inl ARG2(a,EAX), _LTOG in a) +#define IN1_W(a) CHOICE(inw a, inw ARG2(a,AX), _WTOG in a) +#define IN1_B(a) CHOICE(inb a, inb ARG2(a,AL), inb a) +#endif +#define INC_L(a) CHOICE(incl a, incl a, _LTOG inc a) +#define INC_W(a) CHOICE(incw a, incw a, _WTOG inc a) +#define INC_B(a) CHOICE(incb a, incb a, incb a) +#define INS_L CHOICE(insl, insl, _LTOG ins) +#define INS_W CHOICE(insw, insw, _WTOG ins) +#define INS_B CHOICE(insb, insb, insb) +#define INT(a) CHOICE(int a, int a, int a) +#define INT3 CHOICE(int CONST(3), int3, int CONST(3)) +#define INTO CHOICE(into, into, into) +#define IRET CHOICE(iret, iret, iret) +#define IRETD CHOICE(iret, iret, iretd) +#define JA(a) CHOICE(ja a, ja a, ja a) +#define JAE(a) CHOICE(jae a, jae a, jae a) +#define JB(a) CHOICE(jb a, jb a, jb a) +#define JBE(a) CHOICE(jbe a, jbe a, jbe a) +#define JC(a) CHOICE(jc a, jc a, jc a) +#define JE(a) CHOICE(je a, je a, je a) +#define JG(a) CHOICE(jg a, jg a, jg a) +#define JGE(a) CHOICE(jge a, jge a, jge a) +#define JL(a) CHOICE(jl a, jl a, jl a) +#define JLE(a) CHOICE(jle a, jle a, jle a) +#define JNA(a) CHOICE(jna a, jna a, jna a) +#define JNAE(a) CHOICE(jnae a, jnae a, jnae a) +#define JNB(a) CHOICE(jnb a, jnb a, jnb a) +#define JNBE(a) CHOICE(jnbe a, jnbe a, jnbe a) +#define JNC(a) CHOICE(jnc a, jnc a, jnc a) +#define JNE(a) CHOICE(jne a, jne a, jne a) +#define JNG(a) CHOICE(jng a, jng a, jng a) +#define JNGE(a) CHOICE(jnge a, jnge a, jnge a) +#define JNL(a) CHOICE(jnl a, jnl a, jnl a) +#define JNLE(a) CHOICE(jnle a, jnle a, jnle a) +#define JNO(a) CHOICE(jno a, jno a, jno a) +#define JNP(a) CHOICE(jnp a, jnp a, jnp a) +#define JNS(a) CHOICE(jns a, jns a, jns a) +#define JNZ(a) CHOICE(jnz a, jnz a, jnz a) +#define JO(a) CHOICE(jo a, jo a, jo a) +#define JP(a) CHOICE(jp a, jp a, jp a) +#define JPE(a) CHOICE(jpe a, jpe a, jpe a) +#define JPO(a) CHOICE(jpo a, jpo a, jpo a) +#define JS(a) CHOICE(js a, js a, js a) +#define JZ(a) CHOICE(jz a, jz a, jz a) +#define JMP(a) CHOICE(jmp a, jmp a, jmp a) +#define JMPF(s,a) CHOICE(ljmp ARG2(s,a), ljmp ARG2(s,a), jmpf s:a) +#define LAHF CHOICE(lahf, lahf, lahf) +#if !defined(_REAL_MODE) && !defined(_V86_MODE) +#define LAR(a, b) CHOICE(lar ARG2(a, b), lar ARG2(a, b), lar ARG2(b, a)) +#endif +#define LEA_L(a, b) CHOICE(leal ARG2(a,b), leal ARG2(a,b), _LTOG lea ARG2(b,a)) +#define LEA_W(a, b) CHOICE(leaw ARG2(a,b), leaw ARG2(a,b), _WTOG lea ARG2(b,a)) +#define LEAVE CHOICE(leave, leave, leave) +#define LGDT(a) CHOICE(lgdt a, lgdt a, lgdt a) +#define LIDT(a) CHOICE(lidt a, lidt a, lidt a) +#define LDS(a, b) CHOICE(ldsl ARG2(a,b), lds ARG2(a,b), lds ARG2(b,a)) +#define LES(a, b) CHOICE(lesl ARG2(a,b), les ARG2(a,b), les ARG2(b,a)) +#define LFS(a, b) CHOICE(lfsl ARG2(a,b), lfs ARG2(a,b), lfs ARG2(b,a)) +#define LGS(a, b) CHOICE(lgsl ARG2(a,b), lgs ARG2(a,b), lgs ARG2(b,a)) +#define LSS(a, b) CHOICE(lssl ARG2(a,b), lss ARG2(a,b), lss ARG2(b,a)) +#define LLDT(a) CHOICE(lldt a, lldt a, lldt a) +#define LMSW(a) CHOICE(lmsw a, lmsw a, lmsw a) +#define LOCK CHOICE(lock, lock, lock) +#define LODS_L CHOICE(lodsl, lodsl, _LTOG lods) +#define LODS_W CHOICE(lodsw, lodsw, _WTOG lods) +#define LODS_B CHOICE(lodsb, lodsb, lodsb) +#define LOOP(a) CHOICE(loop a, loop a, loop a) +#define LOOPE(a) CHOICE(loope a, loope a, loope a) +#define LOOPZ(a) CHOICE(loopz a, loopz a, loopz a) +#define LOOPNE(a) CHOICE(loopne a, loopne a, loopne a) +#define LOOPNZ(a) CHOICE(loopnz a, loopnz a, loopnz a) +#if !defined(_REAL_MODE) && !defined(_V86_MODE) +#define LSL(a, b) CHOICE(lsl ARG2(a,b), lsl ARG2(a,b), lsl ARG2(b,a)) +#endif +#define LTR(a) CHOICE(ltr a, ltr a, ltr a) +#define MOV_SR(a, b) CHOICE(movw ARG2(a,b), mov ARG2(a,b), mov ARG2(b,a)) +#define MOV_L(a, b) CHOICE(movl ARG2(a,b), movl ARG2(a,b), _LTOG mov ARG2(b,a)) +#define MOV_W(a, b) CHOICE(movw ARG2(a,b), movw ARG2(a,b), _WTOG mov ARG2(b,a)) +#define MOV_B(a, b) CHOICE(movb ARG2(a,b), movb ARG2(a,b), movb ARG2(b,a)) +#define MOVS_L CHOICE(movsl, movsl, _LTOG movs) +#define MOVS_W CHOICE(movsw, movsw, _WTOG movs) +#define MOVS_B CHOICE(movsb, movsb, movsb) +#define MOVSX_BL(a, b) CHOICE(movsbl ARG2(a,b), movsbl ARG2(a,b), movsx ARG2(b,a)) +#define MOVSX_BW(a, b) CHOICE(movsbw ARG2(a,b), movsbw ARG2(a,b), movsx ARG2(b,a)) +#define MOVSX_WL(a, b) CHOICE(movswl ARG2(a,b), movswl ARG2(a,b), movsx ARG2(b,a)) +#define MOVZX_BL(a, b) CHOICE(movzbl ARG2(a,b), movzbl ARG2(a,b), movzx ARG2(b,a)) +#define MOVZX_BW(a, b) CHOICE(movzbw ARG2(a,b), movzbw ARG2(a,b), movzx ARG2(b,a)) +#define MOVZX_WL(a, b) CHOICE(movzwl ARG2(a,b), movzwl ARG2(a,b), movzx ARG2(b,a)) +#define MUL_L(a) CHOICE(mull a, mull a, _LTOG mul a) +#define MUL_W(a) CHOICE(mulw a, mulw a, _WTOG mul a) +#define MUL_B(a) CHOICE(mulb a, mulb a, mulb a) +#define NEG_L(a) CHOICE(negl a, negl a, _LTOG neg a) +#define NEG_W(a) CHOICE(negw a, negw a, _WTOG neg a) +#define NEG_B(a) CHOICE(negb a, negb a, negb a) +#define NOP CHOICE(nop, nop, nop) +#define NOT_L(a) CHOICE(notl a, notl a, _LTOG not a) +#define NOT_W(a) CHOICE(notw a, notw a, _WTOG not a) +#define NOT_B(a) CHOICE(notb a, notb a, notb a) +#define OR_L(a,b) CHOICE(orl ARG2(a,b), orl ARG2(a,b), _LTOG or ARG2(b,a)) +#define OR_W(a,b) CHOICE(orw ARG2(a,b), orw ARG2(a,b), _WTOG or ARG2(b,a)) +#define OR_B(a,b) CHOICE(orb ARG2(a,b), orb ARG2(a,b), orb ARG2(b,a)) +#define OUT_L CHOICE(outl (DX), outl ARG2(EAX,DX), _LTOG out DX) +#define OUT_W CHOICE(outw (DX), outw ARG2(AX,DX), _WTOG out DX) +#define OUT_B CHOICE(outb (DX), outb ARG2(AL,DX), outb DX) +/* Please AS code writer: use the following ONLY, if you refer to ports<256 + * directly, but not in OUT1_W(DX), for instance, even if OUT1_ looks nicer + */ +#define OUT1_L(a) CHOICE(outl (a), outl ARG2(EAX,a), _LTOG out a) +#define OUT1_W(a) CHOICE(outw (a), outw ARG2(AX,a), _WTOG out a) +#define OUT1_B(a) CHOICE(outb (a), outb ARG2(AL,a), outb a) +#define OUTS_L CHOICE(outsl, outsl, _LTOG outs) +#define OUTS_W CHOICE(outsw, outsw, _WTOG outs) +#define OUTS_B CHOICE(outsb, outsb, outsb) +#define POP_SR(a) CHOICE(pop a, pop a, pop a) +#define POP_L(a) CHOICE(popl a, popl a, _LTOG pop a) +#define POP_W(a) CHOICE(popw a, popw a, _WTOG pop a) +#define POPA_L CHOICE(popal, popal, _LTOG popa) +#define POPA_W CHOICE(popaw, popaw, _WTOG popa) +#define POPF_L CHOICE(popfl, popfl, _LTOG popf) +#define POPF_W CHOICE(popfw, popfw, _WTOG popf) +#define PUSH_SR(a) CHOICE(push a, push a, push a) +#define PUSH_L(a) CHOICE(pushl a, pushl a, _LTOG push a) +#define PUSH_W(a) CHOICE(pushw a, pushw a, _WTOG push a) +#define PUSH_B(a) CHOICE(push a, pushb a, push a) +#define PUSHA_L CHOICE(pushal, pushal, _LTOG pusha) +#define PUSHA_W CHOICE(pushaw, pushaw, _WTOG pusha) +#define PUSHF_L CHOICE(pushfl, pushfl, _LTOG pushf) +#define PUSHF_W CHOICE(pushfw, pushfw, _WTOG pushf) +#define RCL_L(a, b) CHOICE(rcll ARG2(a,b), rcll ARG2(a,b), _LTOG rcl ARG2(b,a)) +#define RCL_W(a, b) CHOICE(rclw ARG2(a,b), rclw ARG2(a,b), _WTOG rcl ARG2(b,a)) +#define RCL_B(a, b) CHOICE(rclb ARG2(a,b), rclb ARG2(a,b), rclb ARG2(b,a)) +#define RCR_L(a, b) CHOICE(rcrl ARG2(a,b), rcrl ARG2(a,b), _LTOG rcr ARG2(b,a)) +#define RCR_W(a, b) CHOICE(rcrw ARG2(a,b), rcrw ARG2(a,b), _WTOG rcr ARG2(b,a)) +#define RCR_B(a, b) CHOICE(rcrb ARG2(a,b), rcrb ARG2(a,b), rcrb ARG2(b,a)) +#define ROL_L(a, b) CHOICE(roll ARG2(a,b), roll ARG2(a,b), _LTOG rol ARG2(b,a)) +#define ROL_W(a, b) CHOICE(rolw ARG2(a,b), rolw ARG2(a,b), _WTOG rol ARG2(b,a)) +#define ROL_B(a, b) CHOICE(rolb ARG2(a,b), rolb ARG2(a,b), rolb ARG2(b,a)) +#define ROR_L(a, b) CHOICE(rorl ARG2(a,b), rorl ARG2(a,b), _LTOG ror ARG2(b,a)) +#define ROR_W(a, b) CHOICE(rorw ARG2(a,b), rorw ARG2(a,b), _WTOG ror ARG2(b,a)) +#define ROR_B(a, b) CHOICE(rorb ARG2(a,b), rorb ARG2(a,b), rorb ARG2(b,a)) +#define REP CHOICE(rep ;, rep ;, repe) +#define REPE CHOICE(repz ;, repe ;, repe) +#define REPNE CHOICE(repnz ;, repne ;, repne) +#define REPNZ REPNE +#define REPZ REPE +#define RET CHOICE(ret, ret, ret) +#define SAHF CHOICE(sahf, sahf, sahf) +#define SAL_L(a, b) CHOICE(sall ARG2(a,b), sall ARG2(a,b), _LTOG sal ARG2(b,a)) +#define SAL_W(a, b) CHOICE(salw ARG2(a,b), salw ARG2(a,b), _WTOG sal ARG2(b,a)) +#define SAL_B(a, b) CHOICE(salb ARG2(a,b), salb ARG2(a,b), salb ARG2(b,a)) +#define SAR_L(a, b) CHOICE(sarl ARG2(a,b), sarl ARG2(a,b), _LTOG sar ARG2(b,a)) +#define SAR_W(a, b) CHOICE(sarw ARG2(a,b), sarw ARG2(a,b), _WTOG sar ARG2(b,a)) +#define SAR_B(a, b) CHOICE(sarb ARG2(a,b), sarb ARG2(a,b), sarb ARG2(b,a)) +#define SBB_L(a, b) CHOICE(sbbl ARG2(a,b), sbbl ARG2(a,b), _LTOG sbb ARG2(b,a)) +#define SBB_W(a, b) CHOICE(sbbw ARG2(a,b), sbbw ARG2(a,b), _WTOG sbb ARG2(b,a)) +#define SBB_B(a, b) CHOICE(sbbb ARG2(a,b), sbbb ARG2(a,b), sbbb ARG2(b,a)) +#define SCAS_L CHOICE(scasl, scasl, _LTOG scas) +#define SCAS_W CHOICE(scasw, scasw, _WTOG scas) +#define SCAS_B CHOICE(scasb, scasb, scasb) +#define SETA(a) CHOICE(seta a, seta a, seta a) +#define SETAE(a) CHOICE(setae a, setae a, setae a) +#define SETB(a) CHOICE(setb a, setb a, setb a) +#define SETBE(a) CHOICE(setbe a, setbe a, setbe a) +#define SETC(a) CHOICE(setc a, setb a, setb a) +#define SETE(a) CHOICE(sete a, sete a, sete a) +#define SETG(a) CHOICE(setg a, setg a, setg a) +#define SETGE(a) CHOICE(setge a, setge a, setge a) +#define SETL(a) CHOICE(setl a, setl a, setl a) +#define SETLE(a) CHOICE(setle a, setle a, setle a) +#define SETNA(a) CHOICE(setna a, setna a, setna a) +#define SETNAE(a) CHOICE(setnae a, setnae a, setnae a) +#define SETNB(a) CHOICE(setnb a, setnb a, setnb a) +#define SETNBE(a) CHOICE(setnbe a, setnbe a, setnbe a) +#define SETNC(a) CHOICE(setnc a, setnb a, setnb a) +#define SETNE(a) CHOICE(setne a, setne a, setne a) +#define SETNG(a) CHOICE(setng a, setng a, setng a) +#define SETNGE(a) CHOICE(setnge a, setnge a, setnge a) +#define SETNL(a) CHOICE(setnl a, setnl a, setnl a) +#define SETNLE(a) CHOICE(setnle a, setnle a, setnle a) +#define SETNO(a) CHOICE(setno a, setno a, setno a) +#define SETNP(a) CHOICE(setnp a, setnp a, setnp a) +#define SETNS(a) CHOICE(setns a, setns a, setna a) +#define SETNZ(a) CHOICE(setnz a, setnz a, setnz a) +#define SETO(a) CHOICE(seto a, seto a, seto a) +#define SETP(a) CHOICE(setp a, setp a, setp a) +#define SETPE(a) CHOICE(setpe a, setpe a, setpe a) +#define SETPO(a) CHOICE(setpo a, setpo a, setpo a) +#define SETS(a) CHOICE(sets a, sets a, seta a) +#define SETZ(a) CHOICE(setz a, setz a, setz a) +#define SGDT(a) CHOICE(sgdt a, sgdt a, sgdt a) +#define SIDT(a) CHOICE(sidt a, sidt a, sidt a) +#define SHL_L(a, b) CHOICE(shll ARG2(a,b), shll ARG2(a,b), _LTOG shl ARG2(b,a)) +#define SHL_W(a, b) CHOICE(shlw ARG2(a,b), shlw ARG2(a,b), _WTOG shl ARG2(b,a)) +#define SHL_B(a, b) CHOICE(shlb ARG2(a,b), shlb ARG2(a,b), shlb ARG2(b,a)) +#define SHLD_L(a,b,c) CHOICE(shldl ARG3(a,b,c), shldl ARG3(a,b,c), _LTOG shld ARG3(c,b,a)) +#define SHLD2_L(a,b) CHOICE(shldl ARG2(a,b), shldl ARG3(CL,a,b), _LTOG shld ARG3(b,a,CL)) +#define SHLD_W(a,b,c) CHOICE(shldw ARG3(a,b,c), shldw ARG3(a,b,c), _WTOG shld ARG3(c,b,a)) +#define SHLD2_W(a,b) CHOICE(shldw ARG2(a,b), shldw ARG3(CL,a,b), _WTOG shld ARG3(b,a,CL)) +#define SHR_L(a, b) CHOICE(shrl ARG2(a,b), shrl ARG2(a,b), _LTOG shr ARG2(b,a)) +#define SHR_W(a, b) CHOICE(shrw ARG2(a,b), shrw ARG2(a,b), _WTOG shr ARG2(b,a)) +#define SHR_B(a, b) CHOICE(shrb ARG2(a,b), shrb ARG2(a,b), shrb ARG2(b,a)) +#define SHRD_L(a,b,c) CHOICE(shrdl ARG3(a,b,c), shrdl ARG3(a,b,c), _LTOG shrd ARG3(c,b,a)) +#define SHRD2_L(a,b) CHOICE(shrdl ARG2(a,b), shrdl ARG3(CL,a,b), _LTOG shrd ARG3(b,a,CL)) +#define SHRD_W(a,b,c) CHOICE(shrdw ARG3(a,b,c), shrdw ARG3(a,b,c), _WTOG shrd ARG3(c,b,a)) +#define SHRD2_W(a,b) CHOICE(shrdw ARG2(a,b), shrdw ARG3(CL,a,b), _WTOG shrd ARG3(b,a,CL)) +#define SLDT(a) CHOICE(sldt a, sldt a, sldt a) +#define SMSW(a) CHOICE(smsw a, smsw a, smsw a) +#define STC CHOICE(stc, stc, stc) +#define STD CHOICE(std, std, std) +#define STI CHOICE(sti, sti, sti) +#define STOS_L CHOICE(stosl, stosl, _LTOG stos) +#define STOS_W CHOICE(stosw, stosw, _WTOG stos) +#define STOS_B CHOICE(stosb, stosb, stosb) +#define STR(a) CHOICE(str a, str a, str a) +#define SUB_L(a, b) CHOICE(subl ARG2(a,b), subl ARG2(a,b), _LTOG sub ARG2(b,a)) +#define SUB_W(a, b) CHOICE(subw ARG2(a,b), subw ARG2(a,b), _WTOG sub ARG2(b,a)) +#define SUB_B(a, b) CHOICE(subb ARG2(a,b), subb ARG2(a,b), subb ARG2(b,a)) +#define TEST_L(a, b) CHOICE(testl ARG2(a,b), testl ARG2(a,b), _LTOG test ARG2(b,a)) +#define TEST_W(a, b) CHOICE(testw ARG2(a,b), testw ARG2(a,b), _WTOG test ARG2(b,a)) +#define TEST_B(a, b) CHOICE(testb ARG2(a,b), testb ARG2(a,b), testb ARG2(b,a)) +#define VERR(a) CHOICE(verr a, verr a, verr a) +#define VERW(a) CHOICE(verw a, verw a, verw a) +#define WAIT CHOICE(wait, wait, wait) +#define XCHG_L(a, b) CHOICE(xchgl ARG2(a,b), xchgl ARG2(a,b), _LTOG xchg ARG2(b,a)) +#define XCHG_W(a, b) CHOICE(xchgw ARG2(a,b), xchgw ARG2(a,b), _WTOG xchg ARG2(b,a)) +#define XCHG_B(a, b) CHOICE(xchgb ARG2(a,b), xchgb ARG2(a,b), xchgb ARG2(b,a)) +#define XLAT CHOICE(xlat, xlat, xlat) +#define XOR_L(a, b) CHOICE(xorl ARG2(a,b), xorl ARG2(a,b), _LTOG xor ARG2(b,a)) +#define XOR_W(a, b) CHOICE(xorw ARG2(a,b), xorw ARG2(a,b), _WTOG xor ARG2(b,a)) +#define XOR_B(a, b) CHOICE(xorb ARG2(a,b), xorb ARG2(a,b), xorb ARG2(b,a)) + + +/* Floating Point Instructions */ +#define F2XM1 CHOICE(f2xm1, f2xm1, f2xm1) +#define FABS CHOICE(fabs, fabs, fabs) +#define FADD_D(a) CHOICE(faddl a, faddl a, faddd a) +#define FADD_S(a) CHOICE(fadds a, fadds a, fadds a) +#define FADD2(a, b) CHOICE(fadd ARG2(a,b), fadd ARG2(a,b), fadd ARG2(b,a)) +#define FADDP(a, b) CHOICE(faddp ARG2(a,b), faddp ARG2(a,b), faddp ARG2(b,a)) +#define FIADD_L(a) CHOICE(fiaddl a, fiaddl a, fiaddl a) +#define FIADD_W(a) CHOICE(fiadd a, fiadds a, fiadds a) +#define FBLD(a) CHOICE(fbld a, fbld a, fbld a) +#define FBSTP(a) CHOICE(fbstp a, fbstp a, fbstp a) +#define FCHS CHOICE(fchs, fchs, fchs) +#define FCLEX CHOICE(fclex, wait; fnclex, wait; fclex) +#define FNCLEX CHOICE(fnclex, fnclex, fclex) +#define FCOM(a) CHOICE(fcom a, fcom a, fcom a) +#define FCOM_D(a) CHOICE(fcoml a, fcoml a, fcomd a) +#define FCOM_S(a) CHOICE(fcoms a, fcoms a, fcoms a) +#define FCOMP(a) CHOICE(fcomp a, fcomp a, fcomp a) +#define FCOMP_D(a) CHOICE(fcompl a, fcompl a, fcompd a) +#define FCOMP_S(a) CHOICE(fcomps a, fcomps a, fcomps a) +#define FCOMPP CHOICE(fcompp, fcompp, fcompp) +#define FCOS CHOICE(fcos, fcos, fcos) +#define FDECSTP CHOICE(fdecstp, fdecstp, fdecstp) +#define FDIV_D(a) CHOICE(fdivl a, fdivl a, fdivd a) +#define FDIV_S(a) CHOICE(fdivs a, fdivs a, fdivs a) +#define FDIV2(a, b) CHOICE(fdiv ARG2(a,b), fdiv ARG2(a,b), fdiv ARG2(b,a)) +#define FDIVP(a, b) CHOICE(fdivp ARG2(a,b), fdivp ARG2(a,b), fdivp ARG2(b,a)) +#define FIDIV_L(a) CHOICE(fidivl a, fidivl a, fidivl a) +#define FIDIV_W(a) CHOICE(fidiv a, fidivs a, fidivs a) +#define FDIVR_D(a) CHOICE(fdivrl a, fdivrl a, fdivrd a) +#define FDIVR_S(a) CHOICE(fdivrs a, fdivrs a, fdivrs a) +#define FDIVR2(a, b) CHOICE(fdivr ARG2(a,b), fdivr ARG2(a,b), fdivr ARG2(b,a)) +#define FDIVRP(a, b) CHOICE(fdivrp ARG2(a,b), fdivrp ARG2(a,b), fdivrp ARG2(b,a)) +#define FIDIVR_L(a) CHOICE(fidivrl a, fidivrl a, fidivrl a) +#define FIDIVR_W(a) CHOICE(fidivr a, fidivrs a, fidivrs a) +#define FFREE(a) CHOICE(ffree a, ffree a, ffree a) +#define FICOM_L(a) CHOICE(ficoml a, ficoml a, ficoml a) +#define FICOM_W(a) CHOICE(ficom a, ficoms a, ficoms a) +#define FICOMP_L(a) CHOICE(ficompl a, ficompl a, ficompl a) +#define FICOMP_W(a) CHOICE(ficomp a, ficomps a, ficomps a) +#define FILD_Q(a) CHOICE(fildll a, fildq a, fildq a) +#define FILD_L(a) CHOICE(fildl a, fildl a, fildl a) +#define FILD_W(a) CHOICE(fild a, filds a, filds a) +#define FINCSTP CHOICE(fincstp, fincstp, fincstp) +#define FINIT CHOICE(finit, wait; fninit, wait; finit) +#define FNINIT CHOICE(fninit, fninit, finit) +#define FIST_L(a) CHOICE(fistl a, fistl a, fistl a) +#define FIST_W(a) CHOICE(fist a, fists a, fists a) +#define FISTP_Q(a) CHOICE(fistpll a, fistpq a, fistpq a) +#define FISTP_L(a) CHOICE(fistpl a, fistpl a, fistpl a) +#define FISTP_W(a) CHOICE(fistp a, fistps a, fistps a) +#define FLD_X(a) CHOICE(fldt a, fldt a, fldx a) /* 80 bit data type! */ +#define FLD_D(a) CHOICE(fldl a, fldl a, fldd a) +#define FLD_S(a) CHOICE(flds a, flds a, flds a) +#define FLD1 CHOICE(fld1, fld1, fld1) +#define FLDL2T CHOICE(fldl2t, fldl2t, fldl2t) +#define FLDL2E CHOICE(fldl2e, fldl2e, fldl2e) +#define FLDPI CHOICE(fldpi, fldpi, fldpi) +#define FLDLG2 CHOICE(fldlg2, fldlg2, fldlg2) +#define FLDLN2 CHOICE(fldln2, fldln2, fldln2) +#define FLDZ CHOICE(fldz, fldz, fldz) +#define FLDCW(a) CHOICE(fldcw a, fldcw a, fldcw a) +#define FLDENV(a) CHOICE(fldenv a, fldenv a, fldenv a) +#define FMUL_S(a) CHOICE(fmuls a, fmuls a, fmuls a) +#define FMUL_D(a) CHOICE(fmull a, fmull a, fmuld a) +#define FMUL2(a, b) CHOICE(fmul ARG2(a,b), fmul ARG2(a,b), fmul ARG2(b,a)) +#define FMULP(a, b) CHOICE(fmulp ARG2(a,b), fmulp ARG2(a,b), fmulp ARG2(b,a)) +#define FIMUL_L(a) CHOICE(fimull a, fimull a, fimull a) +#define FIMUL_W(a) CHOICE(fimul a, fimuls a, fimuls a) +#define FNOP CHOICE(fnop, fnop, fnop) +#define FPATAN CHOICE(fpatan, fpatan, fpatan) +#define FPREM CHOICE(fprem, fprem, fprem) +#define FPREM1 CHOICE(fprem1, fprem1, fprem1) +#define FPTAN CHOICE(fptan, fptan, fptan) +#define FRNDINT CHOICE(frndint, frndint, frndint) +#define FRSTOR(a) CHOICE(frstor a, frstor a, frstor a) +#define FSAVE(a) CHOICE(fsave a, wait; fnsave a, wait; fsave a) +#define FNSAVE(a) CHOICE(fnsave a, fnsave a, fsave a) +#define FSCALE CHOICE(fscale, fscale, fscale) +#define FSIN CHOICE(fsin, fsin, fsin) +#define FSINCOS CHOICE(fsincos, fsincos, fsincos) +#define FSQRT CHOICE(fsqrt, fsqrt, fsqrt) +#define FST_D(a) CHOICE(fstl a, fstl a, fstd a) +#define FST_S(a) CHOICE(fsts a, fsts a, fsts a) +#define FSTP_X(a) CHOICE(fstpt a, fstpt a, fstpx a) +#define FSTP_D(a) CHOICE(fstpl a, fstpl a, fstpd a) +#define FSTP_S(a) CHOICE(fstps a, fstps a, fstps a) +#define FSTP(a) CHOICE(fstp a, fstp a, fstp a) +#define FSTCW(a) CHOICE(fstcw a, wait; fnstcw a, wait; fstcw a) +#define FNSTCW(a) CHOICE(fnstcw a, fnstcw a, fstcw a) +#define FSTENV(a) CHOICE(fstenv a, wait; fnstenv a, fstenv a) +#define FNSTENV(a) CHOICE(fnstenv a, fnstenv a, fstenv a) +#define FSTSW(a) CHOICE(fstsw a, wait; fnstsw a, wait; fstsw a) +#define FNSTSW(a) CHOICE(fnstsw a, fnstsw a, fstsw a) +#define FSUB_S(a) CHOICE(fsubs a, fsubs a, fsubs a) +#define FSUB_D(a) CHOICE(fsubl a, fsubl a, fsubd a) +#define FSUB2(a, b) CHOICE(fsub ARG2(a,b), fsub ARG2(a,b), fsub ARG2(b,a)) +#define FSUBP(a, b) CHOICE(fsubp ARG2(a,b), fsubp ARG2(a,b), fsubp ARG2(b,a)) +#define FISUB_L(a) CHOICE(fisubl a, fisubl a, fisubl a) +#define FISUB_W(a) CHOICE(fisub a, fisubs a, fisubs a) +#define FSUBR_S(a) CHOICE(fsubrs a, fsubrs a, fsubrs a) +#define FSUBR_D(a) CHOICE(fsubrl a, fsubrl a, fsubrd a) +#define FSUBR2(a, b) CHOICE(fsubr ARG2(a,b), fsubr ARG2(a,b), fsubr ARG2(b,a)) +#define FSUBRP(a, b) CHOICE(fsubrp ARG2(a,b), fsubrp ARG2(a,b), fsubrp ARG2(b,a)) +#define FISUBR_L(a) CHOICE(fisubrl a, fisubrl a, fisubrl a) +#define FISUBR_W(a) CHOICE(fisubr a, fisubrs a, fisubrs a) +#define FTST CHOICE(ftst, ftst, ftst) +#define FUCOM(a) CHOICE(fucom a, fucom a, fucom a) +#define FUCOMP(a) CHOICE(fucomp a, fucomp a, fucomp a) +#define FUCOMPP CHOICE(fucompp, fucompp, fucompp) +#define FWAIT CHOICE(wait, wait, wait) +#define FXAM CHOICE(fxam, fxam, fxam) +#define FXCH(a) CHOICE(fxch a, fxch a, fxch a) +#define FXTRACT CHOICE(fxtract, fxtract, fxtract) +#define FYL2X CHOICE(fyl2x, fyl2x, fyl2x) +#define FYL2XP1 CHOICE(fyl2xp1, fyl2xp1, fyl2xp1) + +/* New instructions */ +#define CPUID CHOICE(D_BYTE ARG2(15, 162), cpuid, D_BYTE ARG2(15, 162)) +#define RDTSC CHOICE(D_BYTE ARG2(15, 49), rdtsc, D_BYTE ARG2(15, 49)) + +#else /* NASM_ASSEMBLER || MASM_ASSEMBLER is defined */ + + /****************************************/ + /* */ + /* Intel style assemblers. */ + /* (NASM and MASM) */ + /* */ + /****************************************/ + +#define P_EAX EAX +#define L_EAX EAX +#define W_AX AX +#define B_AH AH +#define B_AL AL + +#define P_EBX EBX +#define L_EBX EBX +#define W_BX BX +#define B_BH BH +#define B_BL BL + +#define P_ECX ECX +#define L_ECX ECX +#define W_CX CX +#define B_CH CH +#define B_CL CL + +#define P_EDX EDX +#define L_EDX EDX +#define W_DX DX +#define B_DH DH +#define B_DL DL + +#define P_EBP EBP +#define L_EBP EBP +#define W_BP BP + +#define P_ESI ESI +#define L_ESI ESI +#define W_SI SI + +#define P_EDI EDI +#define L_EDI EDI +#define W_DI DI + +#define P_ESP ESP +#define L_ESP ESP +#define W_SP SP + +#define W_CS CS +#define W_SS SS +#define W_DS DS +#define W_ES ES +#define W_FS FS +#define W_GS GS + +#define X_ST ST +#define D_ST ST +#define L_ST ST + +#define P_MM0 mm0 +#define P_MM1 mm1 +#define P_MM2 mm2 +#define P_MM3 mm3 +#define P_MM4 mm4 +#define P_MM5 mm5 +#define P_MM6 mm6 +#define P_MM7 mm7 + +#define P_XMM0 xmm0 +#define P_XMM1 xmm1 +#define P_XMM2 xmm2 +#define P_XMM3 xmm3 +#define P_XMM4 xmm4 +#define P_XMM5 xmm5 +#define P_XMM6 xmm6 +#define P_XMM7 xmm7 + +#define CONCAT(x, y) x ## y + +#if defined(NASM_ASSEMBLER) + +#define ST(n) st ## n +#define ST0 st0 + +#define TBYTE_PTR tword +#define QWORD_PTR qword +#define DWORD_PTR dword +#define WORD_PTR word +#define BYTE_PTR byte + +#define OFFSET + +#define GLOBL GLOBAL +#define ALIGNTEXT32 ALIGN 32 +#define ALIGNTEXT16 ALIGN 16 +#define ALIGNTEXT8 ALIGN 8 +#define ALIGNTEXT4 ALIGN 4 +#define ALIGNTEXT2 ALIGN 2 +#define ALIGNTEXT32ifNOP ALIGN 32 +#define ALIGNTEXT16ifNOP ALIGN 16 +#define ALIGNTEXT8ifNOP ALIGN 8 +#define ALIGNTEXT4ifNOP ALIGN 4 +#define ALIGNDATA32 ALIGN 32 +#define ALIGNDATA16 ALIGN 16 +#define ALIGNDATA8 ALIGN 8 +#define ALIGNDATA4 ALIGN 4 +#define ALIGNDATA2 ALIGN 2 +#define FILE(s) +#define STRING(s) db s +#define D_LONG dd +#define D_WORD dw +#define D_BYTE db +/* #define SPACE */ +/* #define COMM */ +#if defined(__WATCOMC__) +SECTION _TEXT public align=16 class=CODE use32 flat +SECTION _DATA public align=16 class=DATA use32 flat +#define SEG_TEXT SECTION _TEXT +#define SEG_DATA SECTION _DATA +#define SEG_BSS SECTION .bss +#else +#define SEG_DATA SECTION .data +#define SEG_TEXT SECTION .text +#define SEG_BSS SECTION .bss +#endif + +#define D_SPACE(n) db n REP 0 + +#define AS_BEGIN + +/* Jcc's should be handled better than this... */ +#define NEAR near + +#else /* MASM */ + +#define TBYTE_PTR tbyte ptr +#define QWORD_PTR qword ptr +#define DWORD_PTR dword ptr +#define WORD_PTR word ptr +#define BYTE_PTR byte ptr + +#define OFFSET offset + +#define GLOBL GLOBAL +#define ALIGNTEXT32 ALIGN 32 +#define ALIGNTEXT16 ALIGN 16 +#define ALIGNTEXT8 ALIGN 8 +#define ALIGNTEXT4 ALIGN 4 +#define ALIGNTEXT2 ALIGN 2 +#define ALIGNTEXT32ifNOP ALIGN 32 +#define ALIGNTEXT16ifNOP ALIGN 16 +#define ALIGNTEXT8ifNOP ALIGN 8 +#define ALIGNTEXT4ifNOP ALIGN 4 +#define ALIGNDATA32 ALIGN 32 +#define ALIGNDATA16 ALIGN 16 +#define ALIGNDATA8 ALIGN 8 +#define ALIGNDATA4 ALIGN 4 +#define ALIGNDATA2 ALIGN 2 +#define FILE(s) +#define STRING(s) db s +#define D_LONG dd +#define D_WORD dw +#define D_BYTE db +/* #define SPACE */ +/* #define COMM */ +#define SEG_DATA .DATA +#define SEG_TEXT .CODE +#define SEG_BSS .DATA + +#define D_SPACE(n) db n REP 0 + +#define AS_BEGIN + +#define NEAR + +#endif + +#if defined(Lynx) || (defined(SYSV) || defined(SVR4)) \ + || (defined(linux) || defined(__OS2ELF__)) && defined(__ELF__) \ + || defined(__FreeBSD__) && __FreeBSD__ >= 3 +#define GLNAME(a) a +#else +#define GLNAME(a) _ ## a +#endif + +/* + * Addressing Modes + */ + +/* Immediate Mode */ +#define P_ADDR(a) OFFSET a +#define X_ADDR(a) OFFSET a +#define D_ADDR(a) OFFSET a +#define L_ADDR(a) OFFSET a +#define W_ADDR(a) OFFSET a +#define B_ADDR(a) OFFSET a + +#define P_CONST(a) a +#define X_CONST(a) a +#define D_CONST(a) a +#define L_CONST(a) a +#define W_CONST(a) a +#define B_CONST(a) a + +/* Indirect Mode */ +#define P_CONTENT(a) a +#define X_CONTENT(a) TBYTE_PTR a +#define D_CONTENT(a) QWORD_PTR a +#define L_CONTENT(a) DWORD_PTR a +#define W_CONTENT(a) WORD_PTR a +#define B_CONTENT(a) BYTE_PTR a + +/* Register a indirect */ +#define P_REGIND(a) [a] +#define X_REGIND(a) TBYTE_PTR [a] +#define D_REGIND(a) QWORD_PTR [a] +#define L_REGIND(a) DWORD_PTR [a] +#define W_REGIND(a) WORD_PTR [a] +#define B_REGIND(a) BYTE_PTR [a] + +/* Register b indirect plus displacement a */ +#define P_REGOFF(a, b) [b + a] +#define X_REGOFF(a, b) TBYTE_PTR [b + a] +#define D_REGOFF(a, b) QWORD_PTR [b + a] +#define L_REGOFF(a, b) DWORD_PTR [b + a] +#define W_REGOFF(a, b) WORD_PTR [b + a] +#define B_REGOFF(a, b) BYTE_PTR [b + a] + +/* Reg indirect Base + Index + Displacement - this is mainly for 16-bit mode + * which has no scaling + */ +#define P_REGBID(b, i, d) [b + i + d] +#define X_REGBID(b, i, d) TBYTE_PTR [b + i + d] +#define D_REGBID(b, i, d) QWORD_PTR [b + i + d] +#define L_REGBID(b, i, d) DWORD_PTR [b + i + d] +#define W_REGBID(b, i, d) WORD_PTR [b + i + d] +#define B_REGBID(b, i, d) BYTE_PTR [b + i + d] + +/* Reg indirect Base + (Index * Scale) */ +#define P_REGBIS(b, i, s) [b + i * s] +#define X_REGBIS(b, i, s) TBYTE_PTR [b + i * s] +#define D_REGBIS(b, i, s) QWORD_PTR [b + i * s] +#define L_REGBIS(b, i, s) DWORD_PTR [b + i * s] +#define W_REGBIS(b, i, s) WORD_PTR [b + i * s] +#define B_REGBIS(b, i, s) BYTE_PTR [b + i * s] + +/* Reg indirect Base + (Index * Scale) + Displacement */ +#define P_REGBISD(b, i, s, d) [b + i * s + d] +#define X_REGBISD(b, i, s, d) TBYTE_PTR [b + i * s + d] +#define D_REGBISD(b, i, s, d) QWORD_PTR [b + i * s + d] +#define L_REGBISD(b, i, s, d) DWORD_PTR [b + i * s + d] +#define W_REGBISD(b, i, s, d) WORD_PTR [b + i * s + d] +#define B_REGBISD(b, i, s, d) BYTE_PTR [b + i * s + d] + +/* Displaced Scaled Index: */ +#define P_REGDIS(d, i, s) [i * s + d] +#define X_REGDIS(d, i, s) TBYTE_PTR [i * s + d] +#define D_REGDIS(d, i, s) QWORD_PTR [i * s + d] +#define L_REGDIS(d, i, s) DWORD_PTR [i * s + d] +#define W_REGDIS(d, i, s) WORD_PTR [i * s + d] +#define B_REGDIS(d, i, s) BYTE_PTR [i * s + d] + +/* Indexed Base: */ +#define P_REGBI(b, i) [b + i] +#define X_REGBI(b, i) TBYTE_PTR [b + i] +#define D_REGBI(b, i) QWORD_PTR [b + i] +#define L_REGBI(b, i) DWORD_PTR [b + i] +#define W_REGBI(b, i) WORD_PTR [b + i] +#define B_REGBI(b, i) BYTE_PTR [b + i] + +/* Displaced Base: */ +#define P_REGDB(d, b) [b + d] +#define X_REGDB(d, b) TBYTE_PTR [b + d] +#define D_REGDB(d, b) QWORD_PTR [b + d] +#define L_REGDB(d, b) DWORD_PTR [b + d] +#define W_REGDB(d, b) WORD_PTR [b + d] +#define B_REGDB(d, b) BYTE_PTR [b + d] + +/* Variable indirect: */ +#define VARINDIRECT(var) var + +/* Use register contents as jump/call target: */ +#define CODEPTR(reg) reg + +/* + * Redefine assembler commands + */ + +#define P_(a) P_ ## a +#define X_(a) X_ ## a +#define D_(a) D_ ## a +#define S_(a) L_ ## a +#define L_(a) L_ ## a +#define W_(a) W_ ## a +#define B_(a) B_ ## a + +#define AAA aaa +#define AAD aad +#define AAM aam +#define AAS aas +#define ADC_L(a, b) adc L_(b), L_(a) +#define ADC_W(a, b) adc W_(b), W_(a) +#define ADC_B(a, b) adc B_(b), B_(a) +#define ADD_L(a, b) add L_(b), L_(a) +#define ADD_W(a, b) add W_(b), W_(a) +#define ADD_B(a, b) add B_(b), B_(a) +#define AND_L(a, b) and L_(b), L_(a) +#define AND_W(a, b) and W_(b), W_(a) +#define AND_B(a, b) and B_(b), B_(a) +#define ARPL(a,b) arpl W_(b), a +#define BOUND_L(a, b) bound L_(b), L_(a) +#define BOUND_W(a, b) bound W_(b), W_(a) +#define BSF_L(a, b) bsf L_(b), L_(a) +#define BSF_W(a, b) bsf W_(b), W_(a) +#define BSR_L(a, b) bsr L_(b), L_(a) +#define BSR_W(a, b) bsr W_(b), W_(a) +#define BT_L(a, b) bt L_(b), L_(a) +#define BT_W(a, b) bt W_(b), W_(a) +#define BTC_L(a, b) btc L_(b), L_(a) +#define BTC_W(a, b) btc W_(b), W_(a) +#define BTR_L(a, b) btr L_(b), L_(a) +#define BTR_W(a, b) btr W_(b), W_(a) +#define BTS_L(a, b) bts L_(b), L_(a) +#define BTS_W(a, b) bts W_(b), W_(a) +#define CALL(a) call a +#define CALLF(s,a) call far s:a +#define CBW cbw +#define CWDE cwde +#define CLC clc +#define CLD cld +#define CLI cli +#define CLTS clts +#define CMC cmc +#define CMP_L(a, b) cmp L_(b), L_(a) +#define CMP_W(a, b) cmp W_(b), W_(a) +#define CMP_B(a, b) cmp B_(b), B_(a) +#define CMPS_L cmpsd +#define CMPS_W cmpsw +#define CMPS_B cmpsb +#define CPUID cpuid +#define CWD cwd +#define CDQ cdq +#define DAA daa +#define DAS das +#define DEC_L(a) dec L_(a) +#define DEC_W(a) dec W_(a) +#define DEC_B(a) dec B_(a) +#define DIV_L(a) div L_(a) +#define DIV_W(a) div W_(a) +#define DIV_B(a) div B_(a) +#define ENTER(a,b) enter b, a +#define HLT hlt +#define IDIV_L(a) idiv L_(a) +#define IDIV_W(a) idiv W_(a) +#define IDIV_B(a) idiv B_(a) +#define IMUL_L(a, b) imul L_(b), L_(a) +#define IMUL_W(a, b) imul W_(b), W_(a) +#define IMUL_B(a) imul B_(a) +#define IN_L in EAX, DX +#define IN_W in AX, DX +#define IN_B in AL, DX +#define IN1_L(a) in1 L_(a) +#define IN1_W(a) in1 W_(a) +#define IN1_B(a) in1 B_(a) +#define INC_L(a) inc L_(a) +#define INC_W(a) inc W_(a) +#define INC_B(a) inc B_(a) +#define INS_L ins +#define INS_W ins +#define INS_B ins +#define INT(a) int B_(a) +#define INT3 int3 +#define INTO into +#define IRET iret +#define IRETD iretd +#define JA(a) ja NEAR a +#define JAE(a) jae NEAR a +#define JB(a) jb NEAR a +#define JBE(a) jbe NEAR a +#define JC(a) jc NEAR a +#define JE(a) je NEAR a +#define JG(a) jg NEAR a +#define JGE(a) jge NEAR a +#define JL(a) jl NEAR a +#define JLE(a) jle NEAR a +#define JNA(a) jna NEAR a +#define JNAE(a) jnae NEAR a +#define JNB(a) jnb NEAR a +#define JNBE(a) jnbe NEAR a +#define JNC(a) jnc NEAR a +#define JNE(a) jne NEAR a +#define JNG(a) jng NEAR a +#define JNGE(a) jnge NEAR a +#define JNL(a) jnl NEAR a +#define JNLE(a) jnle NEAR a +#define JNO(a) jno NEAR a +#define JNP(a) jnp NEAR a +#define JNS(a) jns NEAR a +#define JNZ(a) jnz NEAR a +#define JO(a) jo NEAR a +#define JP(a) jp NEAR a +#define JPE(a) jpe NEAR a +#define JPO(a) jpo NEAR a +#define JS(a) js NEAR a +#define JZ(a) jz NEAR a +#define JMP(a) jmp a +#define JMPF(s,a) jmpf +#define LAHF lahf +#define LAR(a, b) lar b, a +#define LEA_L(a, b) lea P_(b), P_(a) +#define LEA_W(a, b) lea P_(b), P_(a) +#define LEAVE leave +#define LGDT(a) lgdt a +#define LIDT(a) lidt a +#define LDS(a, b) lds b, a +#define LES(a, b) les b, a +#define LFS(a, b) lfs b, a +#define LGS(a, b) lgs b, a +#define LSS(a, b) lss b, a +#define LLDT(a) lldt a +#define LMSW(a) lmsw a +#define LOCK lock +#define LODS_L lodsd +#define LODS_W lodsw +#define LODS_B lodsb +#define LOOP(a) loop a +#define LOOPE(a) loope a +#define LOOPZ(a) loopz a +#define LOOPNE(a) loopne a +#define LOOPNZ(a) loopnz a +#define LSL(a, b) lsl b, a +#define LTR(a) ltr a +#define MOV_SR(a, b) mov S_(b), S_(a) +#define MOV_L(a, b) mov L_(b), L_(a) +#define MOV_W(a, b) mov W_(b), W_(a) +#define MOV_B(a, b) mov B_(b), B_(a) +#define MOVS_L movsd +#define MOVS_W movsw +#define MOVS_B movsb +#define MOVSX_BL(a, b) movsx B_(b), B_(a) +#define MOVSX_BW(a, b) movsx B_(b), B_(a) +#define MOVSX_WL(a, b) movsx W_(b), W_(a) +#define MOVZX_BL(a, b) movzx B_(b), B_(a) +#define MOVZX_BW(a, b) movzx B_(b), B_(a) +#define MOVZX_WL(a, b) movzx W_(b), W_(a) +#define MUL_L(a) mul L_(a) +#define MUL_W(a) mul W_(a) +#define MUL_B(a) mul B_(a) +#define NEG_L(a) neg L_(a) +#define NEG_W(a) neg W_(a) +#define NEG_B(a) neg B_(a) +#define NOP nop +#define NOT_L(a) not L_(a) +#define NOT_W(a) not W_(a) +#define NOT_B(a) not B_(a) +#define OR_L(a,b) or L_(b), L_(a) +#define OR_W(a,b) or W_(b), W_(a) +#define OR_B(a,b) or B_(b), B_(a) +#define OUT_L out DX, EAX +#define OUT_W out DX, AX +#define OUT_B out DX, AL +#define OUT1_L(a) out1 L_(a) +#define OUT1_W(a) out1 W_(a) +#define OUT1_B(a) out1 B_(a) +#define OUTS_L outsd +#define OUTS_W outsw +#define OUTS_B outsb +#define POP_SR(a) pop S_(a) +#define POP_L(a) pop L_(a) +#define POP_W(a) pop W_(a) +#define POPA_L popad +#define POPA_W popa +#define POPF_L popfd +#define POPF_W popf +#define PUSH_SR(a) push S_(a) +#define PUSH_L(a) push L_(a) +#define PUSH_W(a) push W_(a) +#define PUSH_B(a) push B_(a) +#define PUSHA_L pushad +#define PUSHA_W pusha +#define PUSHF_L pushfd +#define PUSHF_W pushf +#define RCL_L(a, b) rcl L_(b), L_(a) +#define RCL_W(a, b) rcl W_(b), W_(a) +#define RCL_B(a, b) rcl B_(b), B_(a) +#define RCR_L(a, b) rcr L_(b), L_(a) +#define RCR_W(a, b) rcr W_(b), W_(a) +#define RCR_B(a, b) rcr B_(b), B_(a) +#define RDTSC rdtsc +#define ROL_L(a, b) rol L_(b), L_(a) +#define ROL_W(a, b) rol W_(b), W_(a) +#define ROL_B(a, b) rol B_(b), B_(a) +#define ROR_L(a, b) ror L_(b), L_(a) +#define ROR_W(a, b) ror W_(b), W_(a) +#define ROR_B(a, b) ror B_(b), B_(a) +#define REP rep +#define REPE repe +#define REPNE repne +#define REPNZ REPNE +#define REPZ REPE +#define RET ret +#define SAHF sahf +#define SAL_L(a, b) sal L_(b), L_(a) +#define SAL_W(a, b) sal W_(b), W_(a) +#define SAL_B(a, b) sal B_(b), B_(a) +#define SAR_L(a, b) sar L_(b), L_(a) +#define SAR_W(a, b) sar W_(b), W_(a) +#define SAR_B(a, b) sar B_(b), B_(a) +#define SBB_L(a, b) sbb L_(b), L_(a) +#define SBB_W(a, b) sbb W_(b), W_(a) +#define SBB_B(a, b) sbb B_(b), B_(a) +#define SCAS_L scas +#define SCAS_W scas +#define SCAS_B scas +#define SETA(a) seta a +#define SETAE(a) setae a +#define SETB(a) setb a +#define SETBE(a) setbe a +#define SETC(a) setc a +#define SETE(a) sete a +#define SETG(a) setg a +#define SETGE(a) setge a +#define SETL(a) setl a +#define SETLE(a) setle a +#define SETNA(a) setna a +#define SETNAE(a) setnae a +#define SETNB(a) setnb a +#define SETNBE(a) setnbe a +#define SETNC(a) setnc a +#define SETNE(a) setne a +#define SETNG(a) setng a +#define SETNGE(a) setnge a +#define SETNL(a) setnl a +#define SETNLE(a) setnle a +#define SETNO(a) setno a +#define SETNP(a) setnp a +#define SETNS(a) setns a +#define SETNZ(a) setnz a +#define SETO(a) seto a +#define SETP(a) setp a +#define SETPE(a) setpe a +#define SETPO(a) setpo a +#define SETS(a) sets a +#define SETZ(a) setz a +#define SGDT(a) sgdt a +#define SIDT(a) sidt a +#define SHL_L(a, b) shl L_(b), L_(a) +#define SHL_W(a, b) shl W_(b), W_(a) +#define SHL_B(a, b) shl B_(b), B_(a) +#define SHLD_L(a,b,c) shld +#define SHLD2_L(a,b) shld L_(b), L_(a) +#define SHLD_W(a,b,c) shld +#define SHLD2_W(a,b) shld W_(b), W_(a) +#define SHR_L(a, b) shr L_(b), L_(a) +#define SHR_W(a, b) shr W_(b), W_(a) +#define SHR_B(a, b) shr B_(b), B_(a) +#define SHRD_L(a,b,c) shrd +#define SHRD2_L(a,b) shrd L_(b), L_(a) +#define SHRD_W(a,b,c) shrd +#define SHRD2_W(a,b) shrd W_(b), W_(a) +#define SLDT(a) sldt a +#define SMSW(a) smsw a +#define STC stc +#define STD std +#define STI sti +#define STOS_L stos +#define STOS_W stos +#define STOS_B stos +#define STR(a) str a +#define SUB_L(a, b) sub L_(b), L_(a) +#define SUB_W(a, b) sub W_(b), W_(a) +#define SUB_B(a, b) sub B_(b), B_(a) +#define TEST_L(a, b) test L_(b), L_(a) +#define TEST_W(a, b) test W_(b), W_(a) +#define TEST_B(a, b) test B_(b), B_(a) +#define VERR(a) verr a +#define VERW(a) verw a +#define WAIT wait +#define XCHG_L(a, b) xchg L_(b), L_(a) +#define XCHG_W(a, b) xchg W_(b), W_(a) +#define XCHG_B(a, b) xchg B_(b), B_(a) +#define XLAT xlat +#define XOR_L(a, b) xor L_(b), L_(a) +#define XOR_W(a, b) xor W_(b), W_(a) +#define XOR_B(a, b) xor B_(b), B_(a) + + +/* Floating Point Instructions */ +#define F2XM1 f2xm1 +#define FABS fabs +#define FADD_D(a) fadd D_(a) +#define FADD_S(a) fadd S_(a) +#define FADD2(a, b) fadd b, a +#define FADDP(a, b) faddp b, a +#define FIADD_L(a) fiadd L_(a) +#define FIADD_W(a) fiadd W_(a) +#define FBLD(a) fbld a +#define FBSTP(a) fbstp a +#define FCHS fchs +#define FCLEX fclex +#define FNCLEX fnclex +#define FCOM(a) fcom a +#define FCOM_D(a) fcom D_(a) +#define FCOM_S(a) fcom S_(a) +#define FCOMP(a) fcomp a +#define FCOMP_D(a) fcomp D_(a) +#define FCOMP_S(a) fcomp S_(a) +#define FCOMPP fcompp +#define FCOS fcos +#define FDECSTP fdecstp +#define FDIV_D(a) fdiv D_(a) +#define FDIV_S(a) fdiv S_(a) +#define FDIV2(a, b) fdiv b, a +#define FDIVP(a, b) fdivp b, a +#define FIDIV_L(a) fidiv L_(a) +#define FIDIV_W(a) fidiv W_(a) +#define FDIVR_D(a) fdivr D_(a) +#define FDIVR_S(a) fdivr S_(a) +#define FDIVR2(a, b) fdivr b, a +#define FDIVRP(a, b) fdivrp b, a +#define FIDIVR_L(a) fidivr L_(a) +#define FIDIVR_W(a) fidivr W_(a) +#define FFREE(a) ffree a +#define FICOM_L(a) ficom L_(a) +#define FICOM_W(a) ficom W_(a) +#define FICOMP_L(a) ficomp L_(a) +#define FICOMP_W(a) ficomp W_(a) +#define FILD_Q(a) fild D_(a) +#define FILD_L(a) fild L_(a) +#define FILD_W(a) fild W_(a) +#define FINCSTP fincstp +#define FINIT finit +#define FNINIT fninit +#define FIST_L(a) fist L_(a) +#define FIST_W(a) fist W_(a) +#define FISTP_Q(a) fistp D_(a) +#define FISTP_L(a) fistp L_(a) +#define FISTP_W(a) fistp W_(a) +#define FLD_X(a) fld X_(a) +#define FLD_D(a) fld D_(a) +#define FLD_S(a) fld S_(a) +#define FLD1 fld1 +#define FLDL2T fldl2t +#define FLDL2E fldl2e +#define FLDPI fldpi +#define FLDLG2 fldlg2 +#define FLDLN2 fldln2 +#define FLDZ fldz +#define FLDCW(a) fldcw a +#define FLDENV(a) fldenv a +#define FMUL_S(a) fmul S_(a) +#define FMUL_D(a) fmul D_(a) +#define FMUL2(a, b) fmul b, a +#define FMULP(a, b) fmulp b, a +#define FIMUL_L(a) fimul L_(a) +#define FIMUL_W(a) fimul W_(a) +#define FNOP fnop +#define FPATAN fpatan +#define FPREM fprem +#define FPREM1 fprem1 +#define FPTAN fptan +#define FRNDINT frndint +#define FRSTOR(a) frstor a +#define FSAVE(a) fsave a +#define FNSAVE(a) fnsave a +#define FSCALE fscale +#define FSIN fsin +#define FSINCOS fsincos +#define FSQRT fsqrt +#define FST_D(a) fst D_(a) +#define FST_S(a) fst S_(a) +#define FSTP_X(a) fstp X_(a) +#define FSTP_D(a) fstp D_(a) +#define FSTP_S(a) fstp S_(a) +#define FSTP(a) fstp a +#define FSTCW(a) fstcw a +#define FNSTCW(a) fnstcw a +#define FSTENV(a) fstenv a +#define FNSTENV(a) fnstenv a +#define FSTSW(a) fstsw a +#define FNSTSW(a) fnstsw a +#define FSUB_S(a) fsub S_(a) +#define FSUB_D(a) fsub D_(a) +#define FSUB2(a, b) fsub b, a +#define FSUBP(a, b) fsubp b, a +#define FISUB_L(a) fisub L_(a) +#define FISUB_W(a) fisub W_(a) +#define FSUBR_S(a) fsubr S_(a) +#define FSUBR_D(a) fsubr D_(a) +#define FSUBR2(a, b) fsubr b, a +#define FSUBRP(a, b) fsubrp b, a +#define FISUBR_L(a) fisubr L_(a) +#define FISUBR_W(a) fisubr W_(a) +#define FTST ftst +#define FUCOM(a) fucom a +#define FUCOMP(a) fucomp a +#define FUCOMPP fucompp +#define FWAIT fwait +#define FXAM fxam +#define FXCH(a) fxch a +#define FXTRACT fxtract +#define FYL2X fyl2x +#define FYL2XP1 fyl2xp1 + +#endif /* NASM_ASSEMBLER, MASM_ASSEMBLER */ + + /****************************************/ + /* */ + /* Extensions to x86 insn set - */ + /* MMX, 3DNow! */ + /* */ + /****************************************/ + +#if defined(NASM_ASSEMBLER) || defined(MASM_ASSEMBLER) +#define P_ARG1(a) P_ ## a +#define P_ARG2(a, b) P_ ## b, P_ ## a +#define P_ARG3(a, b, c) P_ ## c, P_ ## b, P_ ## a +#else +#define P_ARG1(a) a +#define P_ARG2(a, b) a, b +#define P_ARG3(a, b, c) a, b, c +#endif + +/* MMX */ +#define MOVD(a, b) movd P_ARG2(a, b) +#define MOVQ(a, b) movq P_ARG2(a, b) + +#define PADDB(a, b) paddb P_ARG2(a, b) +#define PADDW(a, b) paddw P_ARG2(a, b) +#define PADDD(a, b) paddd P_ARG2(a, b) + +#define PADDSB(a, b) paddsb P_ARG2(a, b) +#define PADDSW(a, b) paddsw P_ARG2(a, b) + +#define PADDUSB(a, b) paddusb P_ARG2(a, b) +#define PADDUSW(a, b) paddusw P_ARG2(a, b) + +#define PSUBB(a, b) psubb P_ARG2(a, b) +#define PSUBW(a, b) psubw P_ARG2(a, b) +#define PSUBD(a, b) psubd P_ARG2(a, b) + +#define PSUBSB(a, b) psubsb P_ARG2(a, b) +#define PSUBSW(a, b) psubsw P_ARG2(a, b) + +#define PSUBUSB(a, b) psubusb P_ARG2(a, b) +#define PSUBUSW(a, b) psubusw P_ARG2(a, b) + +#define PCMPEQB(a, b) pcmpeqb P_ARG2(a, b) +#define PCMPEQW(a, b) pcmpeqw P_ARG2(a, b) +#define PCMPEQD(a, b) pcmpeqd P_ARG2(a, b) + +#define PCMPGTB(a, b) pcmpgtb P_ARG2(a, b) +#define PCMPGTW(a, b) pcmpgtw P_ARG2(a, b) +#define PCMPGTD(a, b) pcmpgtd P_ARG2(a, b) + +#define PMULHW(a, b) pmulhw P_ARG2(a, b) +#define PMULLW(a, b) pmullw P_ARG2(a, b) + +#define PMADDWD(a, b) pmaddwd P_ARG2(a, b) + +#define PAND(a, b) pand P_ARG2(a, b) + +#define PANDN(a, b) pandn P_ARG2(a, b) + +#define POR(a, b) por P_ARG2(a, b) + +#define PXOR(a, b) pxor P_ARG2(a, b) + +#define PSRAW(a, b) psraw P_ARG2(a, b) +#define PSRAD(a, b) psrad P_ARG2(a, b) + +#define PSRLW(a, b) psrlw P_ARG2(a, b) +#define PSRLD(a, b) psrld P_ARG2(a, b) +#define PSRLQ(a, b) psrlq P_ARG2(a, b) + +#define PSLLW(a, b) psllw P_ARG2(a, b) +#define PSLLD(a, b) pslld P_ARG2(a, b) +#define PSLLQ(a, b) psllq P_ARG2(a, b) + +#define PACKSSWB(a, b) packsswb P_ARG2(a, b) +#define PACKSSDW(a, b) packssdw P_ARG2(a, b) +#define PACKUSWB(a, b) packuswb P_ARG2(a, b) + +#define PUNPCKHBW(a, b) punpckhbw P_ARG2(a, b) +#define PUNPCKHWD(a, b) punpckhwd P_ARG2(a, b) +#define PUNPCKHDQ(a, b) punpckhdq P_ARG2(a, b) +#define PUNPCKLBW(a, b) punpcklbw P_ARG2(a, b) +#define PUNPCKLWD(a, b) punpcklwd P_ARG2(a, b) +#define PUNPCKLDQ(a, b) punpckldq P_ARG2(a, b) + +#define EMMS emms + +/* AMD 3DNow! */ +#define PAVGUSB(a, b) pavgusb P_ARG2(a, b) +#define PFADD(a, b) pfadd P_ARG2(a, b) +#define PFSUB(a, b) pfsub P_ARG2(a, b) +#define PFSUBR(a, b) pfsubr P_ARG2(a, b) +#define PFACC(a, b) pfacc P_ARG2(a, b) +#define PFCMPGE(a, b) pfcmpge P_ARG2(a, b) +#define PFCMPGT(a, b) pfcmpgt P_ARG2(a, b) +#define PFCMPEQ(a, b) pfcmpeq P_ARG2(a, b) +#define PFMIN(a, b) pfmin P_ARG2(a, b) +#define PFMAX(a, b) pfmax P_ARG2(a, b) +#define PI2FD(a, b) pi2fd P_ARG2(a, b) +#define PF2ID(a, b) pf2id P_ARG2(a, b) +#define PFRCP(a, b) pfrcp P_ARG2(a, b) +#define PFRSQRT(a, b) pfrsqrt P_ARG2(a, b) +#define PFMUL(a, b) pfmul P_ARG2(a, b) +#define PFRCPIT1(a, b) pfrcpit1 P_ARG2(a, b) +#define PFRSQIT1(a, b) pfrsqit1 P_ARG2(a, b) +#define PFRCPIT2(a, b) pfrcpit2 P_ARG2(a, b) +#define PMULHRW(a, b) pmulhrw P_ARG2(a, b) + +#define FEMMS femms +#define PREFETCH(a) prefetch P_ARG1(a) +#define PREFETCHW(a) prefetchw P_ARG1(a) + +/* Intel SSE */ +#define ADDPS(a, b) addps P_ARG2(a, b) +#define ADDSS(a, b) addss P_ARG2(a, b) +#define ANDNPS(a, b) andnps P_ARG2(a, b) +#define ANDPS(a, b) andps P_ARG2(a, b) +/* NASM only knows the pseudo ops for these. +#define CMPPS(a, b, c) cmpps P_ARG3(a, b, c) +#define CMPSS(a, b, c) cmpss P_ARG3(a, b, c) +*/ +#define CMPEQPS(a, b) cmpeqps P_ARG2(a, b) +#define CMPLTPS(a, b) cmpltps P_ARG2(a, b) +#define CMPLEPS(a, b) cmpleps P_ARG2(a, b) +#define CMPUNORDPS(a, b) cmpunordps P_ARG2(a, b) +#define CMPNEQPS(a, b) cmpneqps P_ARG2(a, b) +#define CMPNLTPS(a, b) cmpnltps P_ARG2(a, b) +#define CMPNLEPS(a, b) cmpnleps P_ARG2(a, b) +#define CMPORDPS(a, b) cmpordps P_ARG2(a, b) +#define CMPEQSS(a, b) cmpeqss P_ARG2(a, b) +#define CMPLTSS(a, b) cmpltss P_ARG2(a, b) +#define CMPLESS(a, b) cmpless P_ARG2(a, b) +#define CMPUNORDSS(a, b) cmpunordss P_ARG2(a, b) +#define CMPNEQSS(a, b) cmpneqss P_ARG2(a, b) +#define CMPNLTSS(a, b) cmpnltss P_ARG2(a, b) +#define CMPNLESS(a, b) cmpnless P_ARG2(a, b) +#define CMPORDSS(a, b) cmpordss P_ARG2(a, b) +#define COMISS(a, b) comiss P_ARG2(a, b) +#define CVTPI2PS(a, b) cvtpi2ps P_ARG2(a, b) +#define CVTPS2PI(a, b) cvtps2pi P_ARG2(a, b) +#define CVTSI2SS(a, b) cvtsi2ss P_ARG2(a, b) +#define CVTSS2SI(a, b) cvtss2si P_ARG2(a, b) +#define CVTTPS2PI(a, b) cvttps2pi P_ARG2(a, b) +#define CVTTSS2SI(a, b) cvttss2si P_ARG2(a, b) +#define DIVPS(a, b) divps P_ARG2(a, b) +#define DIVSS(a, b) divss P_ARG2(a, b) +#define FXRSTOR(a) fxrstor P_ARG1(a) +#define FXSAVE(a) fxsave P_ARG1(a) +#define LDMXCSR(a) ldmxcsr P_ARG1(a) +#define MAXPS(a, b) maxps P_ARG2(a, b) +#define MAXSS(a, b) maxss P_ARG2(a, b) +#define MINPS(a, b) minps P_ARG2(a, b) +#define MINSS(a, b) minss P_ARG2(a, b) +#define MOVAPS(a, b) movaps P_ARG2(a, b) +#define MOVHLPS(a, b) movhlps P_ARG2(a, b) +#define MOVHPS(a, b) movhps P_ARG2(a, b) +#define MOVLHPS(a, b) movlhps P_ARG2(a, b) +#define MOVLPS(a, b) movlps P_ARG2(a, b) +#define MOVMSKPS(a, b) movmskps P_ARG2(a, b) +#define MOVNTPS(a, b) movntps P_ARG2(a, b) +#define MOVNTQ(a, b) movntq P_ARG2(a, b) +#define MOVSS(a, b) movss P_ARG2(a, b) +#define MOVUPS(a, b) movups P_ARG2(a, b) +#define MULPS(a, b) mulps P_ARG2(a, b) +#define MULSS(a, b) mulss P_ARG2(a, b) +#define ORPS(a, b) orps P_ARG2(a, b) +#define RCPPS(a, b) rcpps P_ARG2(a, b) +#define RCPSS(a, b) rcpss P_ARG2(a, b) +#define RSQRTPS(a, b) rsqrtps P_ARG2(a, b) +#define RSQRTSS(a, b) rsqrtss P_ARG2(a, b) +#define SHUFPS(a, b, c) shufps P_ARG3(a, b, c) +#define SQRTPS(a, b) sqrtps P_ARG2(a, b) +#define SQRTSS(a, b) sqrtss P_ARG2(a, b) +#define STMXCSR(a) stmxcsr P_ARG1(a) +#define SUBPS(a, b) subps P_ARG2(a, b) +#define UCOMISS(a, b) ucomiss P_ARG2(a, b) +#define UNPCKHPS(a, b) unpckhps P_ARG2(a, b) +#define UNPCKLPS(a, b) unpcklps P_ARG2(a, b) +#define XORPS(a, b) xorps P_ARG2(a, b) + +#define PREFETCHNTA(a) prefetchnta P_ARG1(a) +#define PREFETCHT0(a) prefetcht0 P_ARG1(a) +#define PREFETCHT1(a) prefetcht1 P_ARG1(a) +#define PREFETCHT2(a) prefetcht2 P_ARG1(a) +#define SFENCE sfence + +/* Added by BrianP for FreeBSD (per David Dawes) */ +#if !defined(NASM_ASSEMBLER) && !defined(MASM_ASSEMBLER) && !defined(__bsdi__) +#define LLBL(a) CONCAT(.L,a) +#else +#define LLBL(a) a +#endif + + +#endif /* __ASSYNTAX_H__ */ diff --git a/base/runtime/mach-dep/Unsupported/mklinux-regs.h b/base/runtime/mach-dep/Unsupported/mklinux-regs.h new file mode 100644 index 0000000..ef4ba32 --- /dev/null +++ b/base/runtime/mach-dep/Unsupported/mklinux-regs.h @@ -0,0 +1,45 @@ +/* mklinux-regs.h + * + * COPYRIGHT (c) 1997 Bell Labs, Lucent Technologies. + * + * This defines the layout of the machine registers as they are + * passed to a signal handler in MkLinux on the PowerPC. It was + * reverse engineered from the files: + * + * mklinux/src/arch/osfmach3_ppc/kernel/signal.c + * mklinux/src/include/asm-ppc/ptrace.h + * + * in the MkLinux sources (DR2.1 update 4). + * + * A MkLinux signal handler has the prototype + * + * void handler (int signr, struct mklinux_ppc_regs *rp); + */ + +#ifndef _MKLINUX_REGS_H_ +#define _MKLINUX_REGS_H_ + +#include + +struct mklinux_ppc_regs { + unsigned long gpr[32]; + unsigned long nip; /* aka PC */ + unsigned long msr; + unsigned long orig_r3; + unsigned long ctr; + unsigned long lnk; + unsigned long xer; + unsigned long ccr; + unsigned long mq; + unsigned long trap; + unsigned long dar; + unsigned long dsisr; + unsigned long result; + unsigned long pad1[4]; /* pad to 48 words */ + double fpr[32]; + unsigned long pad2; + unsigned long fpscr; +}; + +#endif /* !_MKLINUX_REGS_H_ */ + diff --git a/base/runtime/mach-dep/X86.prim.asm b/base/runtime/mach-dep/X86.prim.asm new file mode 100644 index 0000000..e692505 --- /dev/null +++ b/base/runtime/mach-dep/X86.prim.asm @@ -0,0 +1,722 @@ +/*! \file X86.prim.asm + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include "ml-base.h" +#include "x86-syntax.h" +#include "ml-values.h" +#include "tags.h" +#include "ml-request.h" +#include "mlstate-offsets.h" /** this file is generated **/ +#include "ml-limits.h" + +#if defined(OPSYS_LINUX) && defined(__ELF__) +/* needed to disable the execution bit on the stack pages */ +.section .note.GNU-stack,"",%progbits +#endif + +/* + * 386 C function call conventions: + * [true for gcc and dynix3 cc; untested for others] + * + * Caller save registers: eax, ecx, edx + * Callee save registers: ebx, esi, edi, and ebp. + * Save frame pointer (ebx) first to match standard function prelude + * Floating point state is caller-save. + * Arguments passed on stack. Rightmost argument pushed first. + * Word-sized result returned in %eax. + * On Darwin, stack frame must be multiple of 16 bytes + * + * The 386 registers are used in SML as follows: + * + * EAX - temp1 (see the code generator, x86/x86.sml) + * EBX - misc0 + * ECX - misc1 + * EDX - misc2 + * ESI - standard continuation (ml_cont, see ml_state.h) + * EBP - standard argument (ml_arg) + * EDI - free space pointer (ml_allocptr) + * ESP - stack pointer + * EIP - program counter (ml_pc) + */ + +/* SML registers (see compiler/CodeGen/x86/X86CpsRegs.sml): */ +#define temp EAX +#define misc0 EBX +#define misc1 ECX +#define misc2 EDX +#define stdcont ESI +#define stdarg EBP +#define allocptr EDI +#define stackptr ESP + +/* C function result register */ +#define creturn EAX + +/* SML Stack frame layout: */ +#define tempword1 REGOFF_W(0,ESP) +#define tempword2 REGOFF_W(2,ESP) +#define tempmem REGIND(ESP) +#define baseptr REGOFF(4,ESP) +#define exncont REGOFF(8,ESP) +#define limitptr REGOFF(12,ESP) +#define pc REGOFF(16,ESP) /* gcLink */ +#define unused_1 REGOFF(20,ESP) +#define storeptr REGOFF(24,ESP) +#define varptr REGOFF(28,ESP) +#define start_gc REGOFF(32,ESP) /* holds address of saveregs */ +#define unused_2 REGOFF(36,ESP) +#define eaxSpill REGOFF(40,ESP) /* eax=0 */ +#define ecxSpill REGOFF(44,ESP) /* ecx=1 */ +#define edxSpill REGOFF(48,ESP) /* edx=2 */ +#define ebxSpill REGOFF(52,ESP) /* ebx=3 */ +#define espSpill REGOFF(56,ESP) /* esp=4 */ +#define ebpSpill REGOFF(60,ESP) /* ebp=5 */ +#define esiSpill REGOFF(64,ESP) /* esi=6 */ +#define ediSpill REGOFF(68,ESP) /* edi=7 */ +#define stdlink REGOFF(72,ESP) +#define stdclos REGOFF(76,ESP) + +#define esp_save REGOFF(500,ESP) + +#define ML_STATE_OFFSET 176 +#define mlstate_ptr REGOFF(ML_STATE_OFFSET,ESP) +#define SpillAreaStart 512 /* starting offset */ +#define ML_FRAME_SIZE (8192) + +/* NOTE: this include must come after the definition of stdlink, etc. */ +#include "x86-macros.h" + +/**********************************************************************/ +#ifdef MASM_ASSEMBLER + .386 + .MODEL FLAT +#endif + + DATA + ALIGN4 + + GLOBAL(CSYM(ML_X86Frame)) + /* global to hold ptr to the ml frame (gives C access to limitptr) */ + WORD(CSYM(ML_X86Frame)) + +/**********************************************************************/ + TEXT + +/* use tempmem to hold the request word */ +#define request_w tempmem + +/* sigh_return: + */ +ALIGNED_ENTRY(sigh_return_a) + MOV(IM(ML_unit),stdlink) + MOV(IM(ML_unit),stdclos) + MOV(IM(ML_unit),pc) + MOV(IM(REQ_SIG_RETURN),request_w) + JMP(set_request) + +/* sigh_resume: + * Resume execution at the point at which a handler trap occurred. This is a + * standard two-argument function, thus the closure is in ml_cont. + */ +ALIGNED_ENTRY(sigh_resume) + MOV(IM(REQ_SIG_RESUME),request_w) + JMP(set_request) + +/* pollh_return_a: + * The return continuation for the ML poll handler. + */ +ALIGNED_ENTRY(pollh_return_a) + MOV (IM(ML_unit),stdlink) + MOV (IM(ML_unit),stdclos) + MOV (IM(ML_unit),pc) + MOV (IM(REQ_POLL_RETURN),request_w) + JMP (set_request) + +/* pollh_resume: + * Resume execution at the point at which a poll event occurred. + */ +ALIGNED_ENTRY(pollh_resume) + MOV (IM(REQ_POLL_RESUME),request_w) + JMP (set_request) + +/* handle: + */ +ALIGNED_ENTRY(handle_a) + MOVE (stdlink,temp,pc) + MOV (IM(REQ_EXN),request_w) + JMP (set_request) + +/* return: + */ +ALIGNED_ENTRY(return_a) + MOV (IM(ML_unit),stdlink) + MOV (IM(ML_unit),stdclos) + MOV (IM(ML_unit),pc) + MOV (IM(REQ_RETURN),request_w) + JMP (set_request) + +/* Request a fault. The floating point coprocessor must be reset + * (thus trashing the FP registers) since we don't know whether a + * value has been pushed into the temporary "register". This is OK + * because no floating point registers will be live at the start of + * the exception handler. + */ +ALIGNED_ENTRY(request_fault) + CALL (CSYM(FPEEnable)) + MOVE (stdlink,temp,pc) + MOV (IM(REQ_FAULT),request_w) + JMP (set_request) + +/* bind_cfun : (string * string) -> c_function + */ +ALIGNED_ENTRY(bind_cfun_a) + CHECKLIMIT + MOV (IM(REQ_BIND_CFUN),request_w) + JMP (set_request) + +/* build_literals: + */ +ALIGNED_ENTRY(build_literals_a) + CHECKLIMIT + MOV (IM(REQ_BUILD_LITERALS),request_w) + JMP (set_request) + +/* callc: + */ +ALIGNED_ENTRY(callc_a) + CHECKLIMIT + MOV (IM(REQ_CALLC),request_w) + JMP (set_request) + +/* saveregs: + * Entry point for GC. Control is transfered using a `call` instruction, + * so the return address is on the top of the stack. + */ +ALIGNED_ENTRY(saveregs) + POP (pc) + MOV (IM(REQ_GC),request_w) + /* fall into set_request */ + +/* set_request: + * common code to switch execution from SML to runtime system. The request + * code will be in `tempmem` (on the stack). + */ +LABEL(set_request) + /* temp holds mlstate_ptr, valid request in tempmem */ + /* Save registers */ + MOV (mlstate_ptr, temp) + MOV (allocptr, REGOFF(AllocPtrOffMSP,temp)) + MOV (stdarg, REGOFF(StdArgOffMSP,temp)) + MOV (stdcont, REGOFF(StdContOffMSP,temp)) + +#define temp2 allocptr + /* note that we have left ML code */ + MOV (REGOFF(VProcOffMSP,temp),temp2) + MOV (IM(0), REGOFF(InMLOffVSP,temp2)) + + MOV (misc0, REGOFF(Misc0OffMSP,temp)) + MOV (misc1, REGOFF(Misc1OffMSP,temp)) + MOV (misc2, REGOFF(Misc2OffMSP,temp)) + + /* Save vregs before stack frame is popped. (?? - Blume) */ + MOVE (limitptr,temp2, REGOFF(LimitPtrOffMSP,temp)) + MOVE (exncont, temp2, REGOFF(ExnPtrOffMSP,temp)) + MOVE (stdclos, temp2, REGOFF(StdClosOffMSP,temp)) + MOVE (stdlink, temp2, REGOFF(LinkRegOffMSP,temp)) + MOVE (pc, temp2, REGOFF(PCOffMSP,temp)) + MOVE (storeptr,temp2, REGOFF(StorePtrOffMSP,temp)) + MOVE (varptr, temp2, REGOFF(VarPtrOffMSP,temp)) +#undef temp2 + + /* return val of function is request code */ + MOV (request_w,creturn) + + /* Pop the stack frame */ +#if defined(ALIGN_STACK_16) + LEA (REGOFF(ML_FRAME_SIZE+12,ESP),ESP) +#else + MOV (esp_save, ESP) +#endif + /* restore C callee-save registers */ + POP (EDI) + POP (ESI) + POP (EBX) + POP (EBP) + /* return to run_ml() */ + RET + +/**********************************************************************/ + +/* restoreregs (ml_state_t *msp): + * + * Switch from C to SML. + */ +#ifdef OPSYS_WIN32 +/* on Windows, `restoreregs` is a C wrapper around `asm_restoreregs` that + * handles traps (see `runtime/mach-dep/win32-fault.c`) + */ +ALIGNED_ENTRY(asm_restoreregs) +#else +ALIGNED_ENTRY(restoreregs) +#endif + /* put ML state pointer in temp */ + MOV (REGOFF(4,ESP), temp) + /* save C callee-save registers */ + PUSH (EBP) + PUSH (EBX) + PUSH (ESI) + PUSH (EDI) + /* save stack pointer */ +#if defined(ALIGN_STACK_16) + /* Some operating systems (e.g., macOS and Linux) require that stack + * frames be 16-byte aligned. We have 20 bytes on the stack for the + * return PC and callee-saves, so we need a 12-byte pad. + */ + SUB(IM(ML_FRAME_SIZE+12), ESP) +#else + /* Align sp on 8 byte boundary. Assumes that the stack + * starts out being at least word aligned. But who knows ... + */ + MOV (ESP, EBX) + OR (IM(4), ESP) + SUB (IM(4), ESP) + /* Allocate and initialize the ML stack frame. */ + SUB (IM(ML_FRAME_SIZE), ESP) + MOV (EBX, esp_save) +#endif + +#define temp2 EBX + /* Initialize the ML stack frame. */ + MOVE (REGOFF(ExnPtrOffMSP,temp), temp2, exncont) + MOVE (REGOFF(LimitPtrOffMSP,temp), temp2, limitptr) + MOVE (REGOFF(StorePtrOffMSP,temp), temp2, storeptr) + MOVE (REGOFF(VarPtrOffMSP,temp), temp2, varptr) + LEA (CSYM(saveregs), temp2) + MOV (temp2, start_gc) + MOV (temp, mlstate_ptr) + + /* vregs */ + MOVE (REGOFF(LinkRegOffMSP,temp), temp2, stdlink) + MOVE (REGOFF(StdClosOffMSP,temp), temp2, stdclos) + + /* PC */ + MOVE (REGOFF(PCOffMSP,temp), temp2,pc) +#undef temp2 + + /* Load ML registers from the ML state */ + MOV (REGOFF(AllocPtrOffMSP,temp), allocptr) + MOV (REGOFF(StdContOffMSP,temp), stdcont) + MOV (REGOFF(StdArgOffMSP,temp), stdarg) + MOV (REGOFF(Misc0OffMSP,temp), misc0) + MOV (REGOFF(Misc1OffMSP,temp), misc1) + MOV (REGOFF(Misc2OffMSP,temp), misc2) + /* put stack pointer somewhere that signal handlers can get it */ + MOV (ESP, CSYM(ML_X86Frame)) + + PUSH (misc2) /* free up a register */ + PUSH (temp) /* save msp temporarily */ + +#define tmpreg misc2 + /* note that we're entering ML */ + MOV (REGOFF(VProcOffMSP,temp),temp) /* temp is now vsp */ +#define vsp temp + MOV (IM(1),REGOFF(InMLOffVSP,vsp)) + + /* check for any pending signals */ + MOV (REGOFF(SigsRecvOffVSP,vsp), tmpreg) + CMP (REGOFF(SigsHandledOffVSP,vsp), tmpreg) + JNE (pending) +#undef tmpreg + /* here there are no pending signals */ +LABEL(restore_and_jmp_ml) + /* restore temp to msp */ + POP (temp) + POP (misc2) + +LABEL(jmp_ml) + CMP (limitptr, allocptr) + JMP (CODEPTR(REGOFF(PCOffMSP, temp))) /* jump to ML code */ + + /* handle pending signals */ +LABEL(pending) + CMP (IM(0),REGOFF(InSigHandlerOffVSP,vsp)) + JNE (restore_and_jmp_ml) + + MOV (IM(1),REGOFF(HandlerPendingOffVSP,vsp)) + + /* must restore here because limitptr is on stack */ + POP (temp) /* restore temp to msp */ + POP (misc2) + + MOV (allocptr,limitptr) + JMP (jmp_ml) +#undef vsp + +/**********************************************************************/ + +/* array : (int * 'a) -> 'a array + * + * Allocate and initialize a new array. This function can cause GC. + */ +ALIGNED_ENTRY(array_a) + CHECKLIMIT + MOV (REGIND(stdarg),temp) /* temp := length in words */ + SAR (IM(1),temp) /* temp := length untagged */ + CMP (IM(SMALL_OBJ_SZW),temp) /* small object? */ + JGE (L_array_large) + /* use misc0 and misc1 as temporary registers */ +#define temp1 misc0 +#define temp2 misc1 + PUSH (misc0) + PUSH (misc1) + /* build data object descriptor in temp1 */ + MOV (temp,temp1) + SAL (IM(TAG_SHIFTW),temp1) /* build descriptor */ + OR (IM(MAKE_TAG(DTAG_arr_data)),temp1) + /* store descriptor and bump allocation pointer */ + MOV (temp1,REGIND(allocptr)) + ADD (IM(4),allocptr) + /* allocate and initialize data object */ + MOV (allocptr,temp1) /* temp1 := array data ptr */ + MOV (REGOFF(4,stdarg),temp2) /* temp2 := initial value */ +LABEL(L_array_lp) + MOV (temp2,REGIND(allocptr)) /* init array */ + ADD (IM(4),allocptr) + SUB (IM(1),temp) + JNE (L_array_lp) + /* Allocate array header */ + MOV (IM(DESC_polyarr),REGIND(allocptr)) /* descriptor */ + ADD (IM(4),allocptr) + MOV (REGIND(stdarg),temp) /* temp := length */ + MOV (allocptr, stdarg) /* result := header addr */ + MOV (temp1, REGIND(allocptr)) /* store pointer to data */ + MOV (temp, REGOFF(4,allocptr)) /* store length */ + ADD (IM(8),allocptr) + /* restore misc0 and misc1 */ + POP (misc1) + POP (misc0) + CONTINUE +#undef temp1 +#undef temp2 + + /* large arrays are allocated in the runtime system */ +LABEL(L_array_large) + MOVE (stdlink,temp,pc) + MOV (IM(REQ_ALLOC_ARRAY),request_w) + JMP (set_request) + + +/* create_r : int -> realarray + * + * Alocate an uninitialized packed array of 64-bit reals. + */ +ALIGNED_ENTRY(create_r_a) + CHECKLIMIT + MOV (stdarg,temp) /* temp := length */ + SAR (IM(1),temp) /* temp := untagged length */ + SAL (IM(1),temp) /* temp := length in words */ + CMP (IM(SMALL_OBJ_SZW),temp) + JGE (L_create_r_large) + +#define temp1 misc0 + PUSH (misc0) /* use misc0 as temp1 */ + + OR (IM(4),allocptr) /* align allocptr on 32-bit x86 */ + + /* allocate the data object */ + MOV (temp,temp1) + SAL (IM(TAG_SHIFTW),temp1) /* temp1 := descriptor */ + OR (IM(MAKE_TAG(DTAG_raw64)),temp1) + MOV (temp1,REGIND(allocptr)) /* store descriptor */ + ADD (IM(4),allocptr) /* allocptr++ */ + MOV (allocptr,temp1) /* temp1 := data object */ + SAL (IM(2),temp) /* temp := length in bytes */ + ADD (temp,allocptr) /* allocptr += length */ + + /* allocate the header object */ + MOV (IM(DESC_real64arr),REGIND(allocptr)) + ADD (IM(4),allocptr) /* allocptr++ */ + MOV (temp1,REGIND(allocptr)) /* header data */ + MOV (stdarg,REGOFF(4,allocptr)) /* header length */ + MOV (allocptr,stdarg) /* stdarg := header obj */ + ADD (IM(8),allocptr) /* allocptr += 2 */ + + POP (misc0) + CONTINUE +#undef temp1 + +LABEL(L_create_r_large) + MOVE (stdlink,temp,pc) + MOV (IM(REQ_ALLOC_REALDARRAY),request_w) + JMP (set_request) + + +/* create_b : int -> bytearray + * + * Allocate an uninitialized packed array of bytes. + */ +ALIGNED_ENTRY(create_b_a) + CHECKLIMIT + MOV (stdarg,temp) /* temp is tagged length */ + SAR (IM(1),temp) /* temp >>= 1; (untag length) */ + ADD (IM(3),temp) /* temp += 3; */ + SAR (IM(2),temp) /* temp >>= 2; (length in 4-byte words) */ + CMP (IM(SMALL_OBJ_SZW),temp) + JGE (L_create_b_large) + +#define temp1 misc0 + PUSH (misc0) + + /* allocate the data object */ + MOV (temp,temp1) + SAL (IM(TAG_SHIFTW),temp1) + OR (IM(MAKE_TAG(DTAG_raw)),temp1) + MOV (temp1,REGIND(allocptr)) /* store descriptor */ + ADD (IM(4),allocptr) + MOV (allocptr,temp1) /* temp1 is data object */ + SAL (IM(2),temp) /* temp is size in bytes */ + ADD (temp,allocptr) /* allocptr += length */ + + /* allocate the header object */ + MOV (IM(DESC_word8arr),REGIND(allocptr)) + ADD (IM(4),allocptr) + MOV (temp1,REGIND(allocptr)) + MOV (stdarg,REGOFF(4,allocptr)) + MOV (allocptr,stdarg) /* stdarg := header */ + ADD (IM(8),allocptr) /* allocptr += 2 */ + POP (misc0) + CONTINUE +#undef temp1 + +LABEL(L_create_b_large) + MOVE (stdlink,temp,pc) + MOV (IM(REQ_ALLOC_BYTEARRAY),request_w) + JMP (set_request) + + +/* create_s : int -> string + * + * Allocate an uninitialized packed vector of bytes. + */ +ALIGNED_ENTRY(create_s_a) + CHECKLIMIT + MOV (stdarg,temp) + SAR (IM(1),temp) /* untag */ + ADD (IM(4),temp) /* 3 + extra byte */ + SAR (IM(2),temp) /* length in words */ + CMP (IM(SMALL_OBJ_SZW),temp) + JGE (L_create_s_large) + + PUSH (misc0) +#define temp1 misc0 + + MOV (temp,temp1) + SAL (IM(TAG_SHIFTW),temp1) + OR (IM(MAKE_TAG(DTAG_raw)),temp1) + MOV (temp1,REGIND(allocptr)) /* store descriptor */ + ADD (IM(4),allocptr) + + MOV (allocptr,temp1) /* temp1 is data obj */ + SAL (IM(2),temp) /* bytes len */ + ADD (temp,allocptr) /* allocptr += length */ + MOV (IM(0),REGOFF((-4),allocptr)) /* zero out last word */ + + /* allocate header obj */ + MOV (IM(DESC_string),temp) /* hdr descr */ + MOV (temp,REGIND(allocptr)) + ADD (IM(4),allocptr) + MOV (temp1,REGIND(allocptr)) /* hdr data */ + MOV (stdarg,REGOFF(4,allocptr)) /* hdr length */ + MOV (allocptr, stdarg) /* stdarg is hdr obj */ + ADD (IM(8),allocptr) /* allocptr += 2 */ + + POP (misc0) +#undef temp1 + CONTINUE + +LABEL(L_create_s_large) + MOVE (stdlink, temp, pc) + MOV (IM(REQ_ALLOC_STRING),request_w) + JMP (set_request) + + +/* create_v_a : int * 'a list -> 'a vector + * creates a vector with elements taken from a list. + * n.b. The frontend ensures that list cannot be nil. + */ +ALIGNED_ENTRY(create_v_a) + CHECKLIMIT + MOV (REGIND(stdarg),temp) /* temp = len tagged */ + PUSH (misc0) + PUSH (misc1) +#define temp1 misc0 +#define temp2 misc1 + MOV (temp,temp1) + SAR (IM(1),temp1) /* temp1 = untagged len */ + CMP (IM(SMALL_OBJ_SZW),temp1) + JGE (L_create_v_large) + + + SAL (IM(TAG_SHIFTW),temp1) + OR (IM(MAKE_TAG(DTAG_vec_data)),temp1) + MOV (temp1,REGIND(allocptr)) + ADD (IM(4),allocptr) + MOV (REGOFF(4,stdarg),temp1) /* temp1 is list */ + MOV (allocptr,stdarg) /* stdarg is vector */ + +LABEL(L_create_v_lp) + MOV (REGIND(temp1),temp2) /* hd */ + MOV (temp2,REGIND(allocptr)) /* store into vector */ + ADD (IM(4),allocptr) + MOV (REGOFF(4,temp1),temp1) /* tl */ + CMP (IM(ML_nil),temp1) /* isNull */ + JNE L_create_v_lp + + /* allocate header object */ + MOV (IM(DESC_polyvec),temp1) + MOV (temp1,REGIND(allocptr)) + ADD (IM(4),allocptr) + MOV (stdarg,REGIND(allocptr)) /* data */ + MOV (temp,REGOFF(4,allocptr)) /* len */ + MOV (allocptr,stdarg) /* result */ + ADD (IM(8),allocptr) /* allocptr += 2 */ + + POP (misc1) + POP (misc0) + CONTINUE + +LABEL(L_create_v_large) + POP (misc1) + POP (misc0) + MOVE (stdlink, temp, pc) + MOV (IM(REQ_ALLOC_VECTOR),request_w) + JMP (set_request) +#undef temp1 +#undef temp2 + + +/* try_lock: spin_lock -> bool. + * low-level test-and-set style primitive for mutual-exclusion among + * processors. For now, we only provide a uni-processor trivial version. + */ +ALIGNED_ENTRY(try_lock_a) +#if (MAX_PROCS > 1) +# error multiple processors not supported +#else /* (MAX_PROCS == 1) */ + MOV ((stdarg), temp) /* Get old value of lock. */ + MOV (IM(1), (stdarg)) /* Set the lock to ML_false. */ + MOV (temp, stdarg) /* Return old value of lock. */ + CONTINUE +#endif + +/* unlock : releases a spin lock + */ +ALIGNED_ENTRY(unlock_a) +#if (MAX_PROCS > 1) +# error multiple processors not supported +#else /* (MAX_PROCS == 1) */ + MOV (IM(3), (stdarg)) /* Store ML_true into lock. */ + MOV (IM(1), stdarg) /* Return unit. */ + CONTINUE +#endif + + +/********************* Floating point functions. *********************/ + +/* + * Initialize the 80387 floating point coprocessor. First, the floating + * point control word is initialized (undefined fields are left + * unchanged). Rounding control is set to "nearest" (although floor_a + * needs "toward negative infinity"). Precision control is set to + * "double". The precision, underflow, denormal + * overflow, zero divide, and invalid operation exceptions + * are masked. Next, seven of the eight available entries on the + * floating point register stack are claimed (see x86/x86.sml). + * + * NB: this cannot trash any registers because it's called from request_fault. + */ +ALIGNED_ENTRY(FPEEnable) + FINIT + /* Temp space.Keep stack aligned. */ + SUB (IM(4), ESP) + /* Store FP control word. */ + FSTCW (REGIND_16(ESP)) + /* Keep undefined fields, clear others. */ + ANDW (IM(HEXLIT(f0c0)), REGIND(ESP)) + /* Set fields (see above). */ + ORW (IM(HEXLIT(023f)), REGIND(ESP)) + FLDCW (REGIND_16(ESP)) /* Install new control word. */ + ADD (IM(4), ESP) + RET + +/* floor : real -> int + Return the nearest integer that is less or equal to the argument. + Caller's responsibility to make argument in range. */ + +ALIGNED_ENTRY(floor_a) + /* Get FP control word. */ +/* FIXME: use the stack !!! */ + FSTCW (tempword1) + MOVW (tempword1,REG(ax)) + /* Clear rounding field. */ + ANDW (IM(HEXLIT(f3ff)), REG(ax)) + /* Round towards neg. infinity. */ + ORW (IM(HEXLIT(0400)), REG(ax)) + MOVW (REG(ax), tempword2) + FLDCW (tempword2) /* Install new control word. */ + FLD (REGIND_DBL(stdarg)) + SUB (IM(4),ESP) + FISTP (REGIND(ESP)) + POP (stdarg) + SAL (IM(1),stdarg) + INC (stdarg) + + FLDCW (tempword1) + CONTINUE + + + /* DEPRECATED */ +/* logb : real -> int + * Extract the unbiased exponent pointed to by stdarg. + * Note: Using fxtract, and fistl does not work for inf's and nan's. + */ +ALIGNED_ENTRY(logb_a) + MOV (REGOFF(4,stdarg),temp) /* msb for little endian arch */ + SAR (IM(20),temp) /* throw out 20 bits */ + AND (IM(HEXLIT(7ff)),temp) /* clear all but 11 low bits */ + SUB (IM(1023),temp) /* unbias */ + SAL (IM(1),temp) /* room for tag bit */ + ADD (IM(1),temp) /* tag bit */ + MOV (temp,stdarg) + CONTINUE + + +/* scalb : (real * int) -> real + * Scale the first argument by 2 raised to the second argument. + * NB: We assume the first floating point "register" is + * caller-save, so we can use it here (see x86/x86.sml). + */ +ALIGNED_ENTRY(scalb_a) + CHECKLIMIT + PUSH (REGOFF(4,stdarg)) /* Get copy of scalar. */ + SAR (IM(1),REGIND(ESP)) /* Untag it. */ + FILDL (REGIND(ESP)) /* Load it ... */ + MOV (REGIND(stdarg), temp) /* Get pointer to real. */ + FLD (REGOFF_DBL(0,temp)) /* Load it into temp. */ + FSCALE /* Multiply exponent by scalar. */ + MOV (IM(DESC_reald), REGIND(allocptr)) + OR (IM(4),allocptr) /* align allocptr on 32-bit x86 */ + FSTPL (REGOFF_DBL(4,allocptr)) /* Store resulting float. */ + ADD (IM(4),allocptr) /* Allocate word for tag. */ + MOV (allocptr, stdarg) /* Return a pointer to the float. */ + ADD (IM(8), allocptr) /* Allocate room for float. */ + FSTPL (REGIND_DBL(ESP)) + ADD (IM(4),ESP) /* discard copy of scalar */ + CONTINUE + +END + +/* end of X86.prim.asm */ diff --git a/base/runtime/mach-dep/cygwin-fault.c b/base/runtime/mach-dep/cygwin-fault.c new file mode 100644 index 0000000..3cbed9b --- /dev/null +++ b/base/runtime/mach-dep/cygwin-fault.c @@ -0,0 +1,259 @@ +/*! \file cygwin-fault.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Special signal handling for cygwin on Windows. + * + * Even though cygwin behaves like "unix", its signal handling mechanism + * is crippled. I haven't been able to get/set the EIP addresses from + * the siginfo_t and related data structures. So here I'm using + * Windows and some gcc assembly hacks to get things done. + */ + +#if defined(__i386__) && defined(__CYGWIN32__) && defined(__GNUC__) + +#include "ml-unixdep.h" +#include "signal-sysdep.h" +#include "ml-base.h" +#include "vproc-state.h" +#include "ml-state.h" +#include "ml-globals.h" + +#include + +/******************** from exceptions.h ******************** + * + * Older versions of Cygwin had a file /usr/include/exceptions.h, but this + * file has disappeared in more recent versions. Here is the key bits + * taken from ftp://ftp.com.univ-mrs.fr/pub/cygwin/usr/include/exceptions.h + * + */ + +/* exceptions.h + + Copyright 1996, 1997, 1998, 2001 Red Hat, Inc. + +This file is part of Cygwin. + +This software is a copyrighted work licensed under the terms of the +Cygwin license. Please consult the file "CYGWIN_LICENSE" for +details. */ + +#ifndef _EXCEPTIONS_H +#define _EXCEPTIONS_H + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* Documentation on the innards of exception handling (i.e. from the + perspective of a compiler implementor) apparently doesn't exist. Sigh. + However, the following came from Onno Hovers + +The first pointer to the chain of handlers is in the thread environment block +at FS:[0]. This chain has the following format: + +typedef struct __EXCEPTION_FRAME +{ + struct __EXCEPTION_FRAME *Prev; /-* pointer to the previous frame *-/ + PEXCEPTION_HANDLER Handler; /-* handler function *-/ +} + +You register an exception handler in your compiler with this simple ASM +sequence: + PUSH _MyExceptionHandler + PUSH FS:[0] + MOV FS:[0],ESP +An exception frame MUST be on the stack! The frame may have more fields and +both Visual C++ and Borland C++ use more fields for themselves. + +When an exception occurs the system calls all handlers starting with the +handler at FS:0, and then the previous etc. until one handler returns +ExceptionContinueExecution, which is 0. If a handler does not want to handle +the exception it should just return ExceptionContinueSearch, which is 1. + +The handler has the following parameters: +ehandler ( + PEXCEPTION_RECORD erecord, + PEXCEPTION_FRAME myframe, + PCONTEXT context, /-* context before and after *-/ + PVOID dispatch) /-* something *-/ + +When a handler wants to handle the exception, it has some alternatives: + +-one is to do do something about the exception condition, like emulating +an invalid instruction, mapping memory where there was a page fault, etc. +If the handler wants to have the context of the thread that causes the +exception changed, it should make that change in the context passed to the +handler. + +-the second alternative is to call all exception handlers again, indicating +that you want them to clean up. This way all the __finally blocks get +executed. After doing that you change the context passed to the handler so +the code starts executing in the except block. For this purpose you could +call RtlUnwind. This (undocumented) function calls all exception handlers +up to but not including the exception frame passed to it. If NULL is passed +as exception frame RtlUnwind calls all exception handlers and then exits the +process. The parameters to RtlUnwind are: + +RtlUnwind ( + PEXCEPTION_FRAME endframe, + PVOID unusedEip, + PEXCEPTION_RECORD erecord, + DWORD returnEax) + +You should set unusedEip to the address where RtlUnwind should return like +this: + PUSH 0 + PUSH OFFSET ReturnUnwind + PUSH 0 + PUSH 0 + CALL RtlUnwind +ReturnUnwind: + ..... + +If no EXCEPTION_RECORD is passed, RtlUnwind makes a default exception +record. In any case, the ExceptionFlags part of this record has the +EH_UNWINDING (=2), flag set. (and EH_EXIT_UNWIND (=4), when NULL is passed as the end +frame.). + +The handler for a exception as well as a for unwinds may be executed in the +thread causing the exception, but may also be executed in another (special +exception) thread. So it is not wise to make any assumptions about that! + +As an alternative you may consider the SetUnhandledExceptionFilter API +to install your own exception filter. This one is documented. +*/ + +/* The January 1994 MSJ has an article entitled "Clearer, More Comprehensive + Error Processing with Win32 Structured Exception Handling". It goes into + a teensy bit of detail of the innards of exception handling (i.e. what we + have to do). */ + +typedef int (exception_handler) + (EXCEPTION_RECORD *, void *, CONTEXT *, void *); + +typedef struct _exception_list +{ + struct _exception_list *prev; + exception_handler *handler; + + /* We're apparently free to add more stuff here. + At present we don't need any. */ +} exception_list; + +void init_exceptions (exception_list *); + +#ifdef __cplusplus +}; +#endif /* __cplusplus */ + +#endif /* _EXCEPTIONS_H */ + +/******************** end exceptions.h ********************/ + + +#define SELF_VPROC (VProc[0]) + +/* generic handler for cygwin "signals" such as interrupt, alarm */ +/* returns TRUE if the main thread is running ML code */ +BOOL cygwin_generic_handler(int code) +{ + vproc_state_t *vsp = SELF_VPROC; + + vsp->vp_sigCounts[code].nReceived++; + vsp->vp_totalSigCount.nReceived++; + + vsp->vp_limitPtrMask = 0; + + if (vsp->vp_inMLFlag && + (! vsp->vp_handlerPending) && + (! vsp->vp_inSigHandler)) + { + vsp->vp_handlerPending = TRUE; + SIG_ZeroLimitPtr(); + return TRUE; + } + return FALSE; +} + +PVT BOOL __stdcall ctrl_c_handler(DWORD type) +{ + switch (type) + { + case CTRL_C_EVENT: + case CTRL_BREAK_EVENT: + if (cygwin_generic_handler(SIGINT)) { + return TRUE; + } + return FALSE; + default: + return FALSE; + } +} + +void InitFaultHandlers() +{ + /* Install the control-C handler */ + if (! SetConsoleCtrlHandler(ctrl_c_handler, TRUE)) + { + Die("cygwin:InitFaultHandlers: can't install ctrl-c-handler\n"); + } + /* Initialize the floating-point unit */ + SIG_InitFPE (); +} + +/* + * This filter catches all exceptions. + */ +PVT int page_fault_handler + (EXCEPTION_RECORD * exn, void * foo, CONTEXT * c, void * bar) +{ + extern Word_t request_fault[]; + ml_state_t * msp = SELF_VPROC->vp_state; + int code = exn->ExceptionCode; + DWORD pc = (DWORD)exn->ExceptionAddress; + + if (! SELF_VPROC->vp_inMLFlag) { + Die("cygwin:fault_handler: bogus fault not in ML: %#x\n", code); + } + + switch (code) { + case EXCEPTION_INT_DIVIDE_BY_ZERO: + case EXCEPTION_INT_OVERFLOW: + /* all arithmetic exceptions get mapped to Overflow, since the compiler + * generates code to check for divide by zero. + */ + /* Say("Overflow at %p\n", pc); */ + msp->ml_faultExn = OverflowId; + msp->ml_faultPC = pc; + c->Eip = (DWORD)request_fault; + break; + default: + Die("cygwin:fault_handler: unexpected fault @%#x, code=%#x", pc, code); + } + return FALSE; +} + +asm (".equ __win32_exception_list,0"); +extern exception_list * + _win32_exception_list asm ("%fs:__win32_exception_list"); + +/* + * This overrides the default RunML. + * It just adds a new exception handler at the very beginning before + * ML is executed. + */ +void RunML(ml_state_t * msp) +{ + extern void SystemRunML(ml_state_t *); + + exception_list el; + el.handler = page_fault_handler; + el.prev = _win32_exception_list; + _win32_exception_list = ⪙ + return SystemRunML(msp); +} + +#endif diff --git a/base/runtime/mach-dep/ml-fp.h b/base/runtime/mach-dep/ml-fp.h new file mode 100644 index 0000000..4872403 --- /dev/null +++ b/base/runtime/mach-dep/ml-fp.h @@ -0,0 +1,28 @@ +/* ml-fp.h + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * NOTE: changes to this file must be tracked in ml-fp.c. + */ + + +#ifdef ARCH_X86 + +extern void Save_C_FPState(); +extern void Restore_C_FPState(); + +extern void Save_ML_FPState(); +extern void Restore_ML_FPState(); + +#else + +#define Save_C_FPState() +#define Restore_C_FPState() +#define Save_ML_FPState() +#define Restore_ML_FPState() + +#endif + +/* end of ml-fp.h */ + diff --git a/base/runtime/mach-dep/signal-sysdep.h b/base/runtime/mach-dep/signal-sysdep.h new file mode 100644 index 0000000..c55ae68 --- /dev/null +++ b/base/runtime/mach-dep/signal-sysdep.h @@ -0,0 +1,474 @@ +/* signal-sysdep.h + * + * COPYRIGHT (c) 2019 The SML/NJ Fellowship. + * All rights reserved. + * + * O.S. and machine dependent signal definitions for UNIX systems: + * + * typedef SigReturn_t the return type of a signal handler. + * typedef SigInfo_t the signal generation information passed to a + * a signal handler. + * typedef SigContext_t the context info passed to a signal handler. + * typedef SigMask_t the representation of a set of signals + * + * SIG_GetCode(info, scp) extract the signal generation information + * SIG_GetPC(scp) get the PC from the context + * SIG_SetPC(scp, addr) set the PC in the context to the address + * SIG_SetHandler(sig, h) set the signal handler + * SIG_SetDefault(sig) set the handler for sig to SIG_DFL + * SIG_SetIgnore(sig) set the handler for sig to SIG_IGN + * SIG_GetHandler(sig, h) get the current handler into h + * SIG_ClearMask(mask) clear the given signal mask. + * SIG_AddToMask(mask, sig) Add the given signal to the mask. + * SIG_isSet(mask, sig) Return true, if the signal is in the mask. + * SIG_SetMask(mask) Set the signal mask. + * SIG_GetMask(mask) Get the signal mask into the variable mask. + * + * SIG_FAULT[12] The signals used to detect faults. + * + * SIG_InitFPE() This macro is defined to be a routine for + * initializing the FPE hardware exception mechanism. + * + * SIG_ResetFPE(scp) This macro is defined to be a routine for resetting + * the signal handling state (or hardware status + * registers) on machines that require it; otherwise + * it is defined to the empty statement. + * + * There are two ways to force a GC when a signal occurs. For some machines, + * this is done in an assembly routine called ZeroLimitPtr; for others, this + * can be done directly by manipulating the signal context. The following + * macros are used for this purpose: + * + * USE_ZERO_LIMIT_PTR_FN If set, then we use the ZeroLimitPtr function. + * SIG_SavePC(msp, scp) Save the PC, so that ZeroLimitPtr can restore it. + * + * SIG_ZeroLimitPtr(scp) Set the limit pointer in the context to zero. + * + * NOTE: Currently SavedPC is a global (so that the asm code in adjust_limit + * can access it). Once we have a runtimeLink register that allows dynamic + * access to the MLState, we can move SavedPC to the ML State vector. + */ + +#ifndef _SIGNAL_SYSDEP_ +#define _SIGNAL_SYSDEP_ + +#ifndef _ML_OSDEP_ +#include "ml-osdep.h" +#endif + +#ifndef _ML_BASE_ +#include "ml-base.h" /* for Addr_t */ +#endif + +#if defined(OPSYS_UNIX) +# include +#endif + +#if defined(HAS_UCONTEXT) +#ifdef __APPLE__ +# include +#else +# include +#endif +#ifdef INCLUDE_SIGINFO_H +# include INCLUDE_SIGINFO_H +#endif + +typedef void SigReturn_t; +typedef siginfo_t *SigInfo_t; +typedef ucontext_t SigContext_t; + +#elif defined(HAS_SIGCONTEXT) + +typedef int SigInfo_t; +typedef struct sigcontext SigContext_t; +#endif + + +#if defined(HAS_POSIX_SIGS) +/** POSIX signals **/ +# if defined(HAS_UCONTEXT) +# define SIG_SetHandler(sig, h) { \ + struct sigaction __svec; \ + sigfillset(&(__svec.sa_mask)); \ + __svec.sa_flags = SA_SIGINFO; \ + __svec.sa_sigaction = (h); \ + sigaction ((sig), &__svec, 0); \ + } +# define SIG_SetIgnore(sig) { \ + struct sigaction __svec; \ + __svec.sa_flags = 0; \ + __svec.sa_handler = SIG_IGN; \ + sigaction ((sig), &__svec, 0); \ + } +# define SIG_SetDefault(sig) { \ + struct sigaction __svec; \ + __svec.sa_flags = 0; \ + __svec.sa_handler = SIG_DFL; \ + sigaction ((sig), &__svec, 0); \ + } +# else +# define SIG_SetHandler(sig, h) { \ + struct sigaction __svec; \ + sigfillset(&(__svec.sa_mask)); \ + __svec.sa_flags = 0; \ + __svec.sa_handler = (h); \ + sigaction ((sig), &__svec, 0); \ + } +# define SIG_SetIgnore(sig) SIG_SetHandler(sig, SIG_IGN) +# define SIG_SetDefault(sig) SIG_SetHandler(sig, SIG_DFL) +#endif +#define SIG_GetHandler(sig, h) { \ + struct sigaction __svec; \ + sigaction ((sig), NIL(struct sigaction *), &__svec); \ + (h) = __svec.sa_handler; \ + } +typedef sigset_t SigMask_t; +#define SIG_ClearMask(mask) sigemptyset(&(mask)) +#define SIG_AddToMask(mask, s) sigaddset(&(mask), (s)) +#define SIG_isSet(mask, s) sigismember(&(mask), (s)) +#define SIG_SetMask(mask) sigprocmask(SIG_SETMASK, &(mask), NIL(sigset_t *)) +#define SIG_GetMask(mask) sigprocmask(SIG_SETMASK, NIL(sigset_t *), &(mask)) + +#elif defined(HAS_BSD_SIGS) +/** BSD signals **/ +#define SIG_SetHandler(sig, h) { \ + struct sigvec __svec; \ + __svec.sv_mask = 0xFFFFFFFF; \ + __svec.sv_flags = SV_INTERRUPT; \ + __svec.sv_handler = (h); \ + sigvec ((sig), &__svec, 0); \ + } +#define SIG_SetIgnore(sig) SIG_SetHandler(sig, SIG_IGN) +#define SIG_SetDefault(sig) SIG_SetHandler(sig, SIG_DFL) +#define SIG_GetHandler(sig, h) { \ + struct sigvec __svec; \ + sigvec ((sig), NIL(struct sigvec *), &__svec); \ + (h) = __svec.sv_handler; \ + } +typedef int SigMask_t; +#define SIG_ClearMask(mask) ((mask) = 0) +#define SIG_AddToMask(mask, s) ((mask) |= sigmask(s)) +#define SIG_isSet(mask, s) (((mask) & sigmask(s)) != 0) +#define SIG_SetMask(mask) sigsetmask(mask) +#define SIG_GetMask(mask) { \ + int __tmpMask; \ + __tmpMask = 0xFFFFFFFF; \ + (mask) = sigsetmask(__tmpMask); \ + sigsetmask(mask); \ + } +#elif defined(OPSYS_WIN32) + /* no win32 signals yet */ +#else +# error no way to set signal handler +#endif + + +/** Machine/OS dependent stuff **/ + +#if defined(ARCH_SPARC) + +extern void SetFSR(int); + /* disable all FP exceptions */ +# define SIG_InitFPE() SetFSR(0) + +# if defined(OPSYS_SOLARIS) + /** SPARC, SOLARIS **/ +# define SIG_OVERFLOW SIGFPE + +# define SIG_GetCode(info,scp) ((info)->si_code) + +# define SIG_GetPC(scp) ((scp)->uc_mcontext.gregs[REG_PC]) +# define SIG_SetPC(scp, addr) { \ + (scp)->uc_mcontext.gregs[REG_PC] = (Addr_t)(addr); \ + (scp)->uc_mcontext.gregs[REG_nPC] = (Addr_t)(addr) + 4; \ + } +# define SIG_ZeroLimitPtr(scp) \ + { (scp)->uc_mcontext.gregs[REG_G4] = 0; } + +# endif + +#elif defined(ARCH_PPC) +# if defined (OPSYS_AIX) + /** RS6000 or PPC, AIX **/ +# include +# define SIG_OVERFLOW SIGTRAP + + PVT int SIG_GetCode (SigInfo_t info, SigContext_t *scp); +# define SIG_GetPC(scp) ((scp)->sc_jmpbuf.jmp_context.iar) +# define SIG_SetPC(scp, addr) \ + { (scp)->sc_jmpbuf.jmp_context.iar = (Addr_t)(addr); } +# define SIG_ZeroLimitPtr(scp) \ + { (scp)->sc_jmpbuf.jmp_context.gpr[15] = 0; } +# define SIG_ResetFPE(scp) { \ + SigContext_t *__scp = (scp); \ + struct mstsave *__scj = &(__scp->sc_jmpbuf.jmp_context); \ + fp_ctx_t __flt_ctx; \ + __scj->xer &= 0x3fffffff; \ + fp_sh_trap_info (__scp, &__flt_ctx); \ + fp_sh_set_stat (__scp, (__flt_ctx.fpscr & ~__flt_ctx.trap)); \ + } + typedef void SigReturn_t; + +# elif defined(OPSYS_DARWIN) + /* PPC, Darwin */ +# define SIG_InitFPE() set_fsr() +# define SIG_ResetFPE(scp) +# define SIG_OVERFLOW SIGTRAP + /* info about siginfo_t is missing in the include files 4/17/2001 */ +# define SIG_GetCode(info,scp) 0 + /* see /usr/include/mach/ppc/thread_status.h */ +# define SIG_GetPC(scp) ((scp)->uc_mcontext->ss.srr0) +# define SIG_SetPC(scp, addr) {(scp)->uc_mcontext->ss.srr0 = (Addr_t) addr;} + /* The offset of 17 is hardwired from reverse engineering the contents of + * sc_regs. 17 is the offset for register 15. + */ +# define SIG_ZeroLimitPtr(scp) { (scp)->uc_mcontext->ss.r15 = 0; } + +# elif (defined(ARCH_PPC) && defined(OPSYS_LINUX)) + /* PPC, Linux */ + +# include + typedef struct sigcontext_struct SigContext_t; + +# define SIG_OVERFLOW SIGTRAP + +# define SIG_GetPC(scp) ((scp)->regs->nip) +# define SIG_SetPC(scp, addr) { (scp)->regs->nip = (Addr_t)(addr); } +# define SIG_ZeroLimitPtr(scp) { ((scp)->regs->gpr[15] = 0); } /* limitptr = 15 (see src/runtime/mach-dep/PPC.prim.asm) */ +# define SIG_GetCode(info,scp) ((scp)->regs->gpr[PT_FPSCR]) +# define SIG_ResetFPE(scp) { (scp)->regs->gpr[PT_FPSCR] = 0x0; } + typedef void SigReturn_t; + +# elif defined(OPSYS_OPENBSD) + /** PPC, OpenBSD **/ + +# define SIG_OVERFLOW SIGTRAP +# define SIG_GetPC(scp) ((scp)->sc_frame.srr0) +# define SIG_SetPC(scp, addr) { (scp)->sc_frame.srr0 = (Addr_t)(addr); } +# define SIG_ZeroLimitPtr(scp) { ((scp)->sc_frame.fixreg[15] = 0); } /* limitptr = 15 (see src/runtime/mach-dep/PPC.prim.asm) */ +# define SIG_GetCode(info,scp) (info) + + typedef void SigReturn_t; + +# endif /* ARCH_PPC */ + +#elif defined(ARCH_X86) + +# define LIMITPTR_X86OFFSET 3 /* offset (words) of limitptr in ML stack */ + /* frame (see X86.prim.asm) */ + extern Addr_t *ML_X86Frame; /* used to get at limitptr */ + extern void FPEEnable (); /* defined in X86.prim.asm */ +# define SIG_InitFPE() FPEEnable() + + /** OS-specific definitions for x86 */ +# if defined(OPSYS_CYGWIN) + /** x86, Cygwin -- see mach-dep/cygwin-fault.c */ + +# define SIG_OVERFLOW SIGFPE + +# define SIG_ZeroLimitPtr(scp) { ML_X86Frame[LIMITPTR_X86OFFSET] = 0; } + + typedef void SigReturn_t; + +# elif defined(OPSYS_DARWIN) + /** x86, Darwin **/ +# define SIG_OVERFLOW SIGFPE + + /* see /usr/include/mach/i386/thread_status.h */ +# define SIG_GetCode(info,scp) ((info)->si_code) +# if ((__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ - 1040) <= 0) + /* Tiger */ +# define SIG_GetPC(scp) ((scp)->uc_mcontext->ss.eip) +# define SIG_SetPC(scp, addr) { (scp)->uc_mcontext->ss.eip = (Addr_t) addr; } +# else + /* Leopard or later */ +# define SIG_GetPC(scp) ((scp)->uc_mcontext->__ss.__eip) +# define SIG_SetPC(scp, addr) { (scp)->uc_mcontext->__ss.__eip = (Addr_t) addr; } +# endif +# define SIG_ZeroLimitPtr(scp) { ML_X86Frame[LIMITPTR_X86OFFSET] = 0; } + +# elif defined(OPSYS_FREEBSD) + /** x86, FreeBSD **/ +# define SIG_OVERFLOW SIGFPE + +# define SIG_GetCode(info, scp) (info) +# define SIG_GetPC(scp) ((scp)->uc_mcontext.mc_eip) +# define SIG_SetPC(scp, addr) { (scp)->uc_mcontext.mc_eip = (Addr_t)(addr); } +# define SIG_ZeroLimitPtr(scp) { ML_X86Frame[LIMITPTR_X86OFFSET] = 0; } + + typedef void SigReturn_t; + +# elif defined(OPSYS_LINUX) + /** X86, LINUX **/ +# define INTO_OPCODE 0xce /* the 'into' instruction is a single */ + /* instruction that signals Overflow */ + +# define SIG_OVERFLOW SIGSEGV + +# define SIG_GetCode(info,scp) ((scp)->uc_mcontext.gregs[REG_EIP]) +/* for linux, SIG_GetCode simply returns the address of the fault */ +# define SIG_GetPC(scp) ((scp)->uc_mcontext.gregs[REG_EIP]) +# define SIG_SetPC(scp,addr) { (scp)->uc_mcontext.gregs[REG_EIP] = (Addr_t)(addr); } +# define SIG_ZeroLimitPtr(scp) { ML_X86Frame[LIMITPTR_X86OFFSET] = 0; } + +/* macro to check if SIGSEGV was caused by `into` instruction */ +# define SIG_IS_OVERFLOW_TRAP(sig,pc) \ + (((Byte_t*)pc)[-1] == 0xce) + +# elif defined(OPSYS_NETBSD2) + /** x86, NetBSD (version 2.x) **/ +# define SIG_OVERFLOW SIGFPE /* maybe this should be SIGBUS? */ + +# define SIG_GetCode(info, scp) (info) +# define SIG_GetPC(scp) ((scp)->sc_pc) +# define SIG_SetPC(scp, addr) { (scp)->sc_pc = (Addr_t)(addr); } +# define SIG_ZeroLimitPtr(scp) { ML_X86Frame[LIMITPTR_X86OFFSET] = 0; } + + typedef void SigReturn_t; + +# elif defined(OPSYS_NETBSD) + /** x86, NetBSD (version 3.x) **/ +# define SIG_OVERFLOW SIGFPE /* maybe this should be SIGBUS? */ + +# define SIG_GetCode(info, scp) (info) +# define SIG_GetPC(scp) (_UC_MACHINE_PC(scp)) +# define SIG_SetPC(scp, addr) { _UC_MACHINE_SET_PC(scp, ((Addr_t) (addr))); } +# define SIG_ZeroLimitPtr(scp) { ML_X86Frame[LIMITPTR_X86OFFSET] = 0; } + +# elif defined(OPSYS_OPENBSD) + /** x86, OpenBSD **/ +# define SIG_OVERFLOW SIGFPE /* maybe this should be SIGBUS? */ + +# define SIG_GetCode(info, scp) (info) +# define SIG_GetPC(scp) ((scp)->sc_pc) +# define SIG_SetPC(scp, addr) { (scp)->sc_pc = (Addr_t)(addr); } +# define SIG_ZeroLimitPtr(scp) { ML_X86Frame[LIMITPTR_X86OFFSET] = 0; } + + typedef void SigReturn_t; + +# elif defined(OPSYS_SOLARIS) + /** x86, Solaris */ +# define SIG_OVERFLOW SIGFPE + +# define SIG_GetCode(info, scp) ((info)->si_code) +# define SIG_GetPC(scp) ((scp)->uc_mcontext.gregs[EIP]) +# define SIG_SetPC(scp, addr) { (scp)->uc_mcontext.gregs[EIP] = (Addr_t)(addr); } +# define SIG_ZeroLimitPtr(scp) { ML_X86Frame[LIMITPTR_X86OFFSET] = 0; } + +# elif defined(OPSYS_WIN32) +# define SIG_ZeroLimitPtr() { ML_X86Frame[LIMITPTR_X86OFFSET] = 0; } + +# else +# error "unknown OPSYS for x86" +# endif + +#elif defined(ARCH_AMD64) + +# define SIG_InitFPE() + +# if defined(OPSYS_CYGWIN) + /** amd64, Cygwin -- see mach-dep/cygwin-fault.c */ + +# define SIG_OVERFLOW SIGFPE + +# define SIG_GetPC(scp) ((scp)->uc_mcontext.rip) +# define SIG_SetPC(scp, addr) { (scp)->uc_mcontext.rip = (Addr_t) addr; } +# define SIG_ZeroLimitPtr(scp) { (scp)->uc_mcontext.r14 = 0; } + + typedef void SigReturn_t; + +# error Cygwin/AMD64 not supported yet + +# elif defined(OPSYS_DARWIN) + /** amd64, Darwin **/ +# define SIG_OVERFLOW SIGFPE + + /* see /usr/include/mach/i386/thread_status.h */ +# define SIG_GetCode(info,scp) ((info)->si_code) +# define SIG_GetPC(scp) ((scp)->uc_mcontext->__ss.__rip) +# define SIG_SetPC(scp, addr) { (scp)->uc_mcontext->__ss.__rip = (Addr_t) addr; } +# define SIG_ZeroLimitPtr(scp) { (scp)->uc_mcontext->__ss.__r14 = 0; } + +# elif defined(OPSYS_FREEBSD) + /** amd64, FreeBSD **/ +# define SIG_OVERFLOW SIGFPE + +# define SIG_GetCode(info, scp) (info) +# define SIG_GetPC(scp) ((scp)->uc_mcontext.mc_rip) +# define SIG_SetPC(scp, addr) { (scp)->uc_mcontext.mc_rip = (Addr_t)(addr); } +# define SIG_ZeroLimitPtr(scp) { (scp)->uc_mcontext.mc_r14 = 0; } + + typedef void SigReturn_t; + +# elif defined(OPSYS_LINUX) + /** amd64, LINUX **/ +/* on linux, overflow can occur in two ways: + * (1) "int 4" instruction, which is invoked for addition and multiplication + * overflow, causes a SIGSEGV. + * (2) Division of the most negative number by -1 causes a SIGFPE. + */ + +# define SIG_OVERFLOW SIGSEGV +# define SIG_OVERFLOW2 SIGFPE + +# define SIG_GetCode(info,scp) ((scp)->uc_mcontext.gregs[REG_RIP]) +/* for linux, SIG_GetCode simply returns the address of the fault */ +# define SIG_GetPC(scp) ((scp)->uc_mcontext.gregs[REG_RIP]) +# define SIG_SetPC(scp,addr) { (scp)->uc_mcontext.gregs[REG_RIP] = (Addr_t)(addr); } +# define SIG_ZeroLimitPtr(scp) { (scp)->uc_mcontext.gregs[REG_R14] = 0; } + +/* macro to check if SIGSEGV was caused by `int 4` instruction */ +# define SIG_IS_OVERFLOW_TRAP(sig,pc) \ + (((sig) == SIG_OVERFLOW2) || \ + ((((Byte_t*)pc)[-2] == 0xcd) && (((Byte_t*)pc)[-1] == 0x04))) + +# elif defined(OPSYS_NETBSD) + /** amd64, NetBSD (version 3.x) **/ +# define SIG_OVERFLOW SIGFPE + +# define SIG_GetCode(info, scp) (info) +# define SIG_GetPC(scp) ((uc)->uc_mcontext.__gregs[_REG_RIP]) +# define SIG_SetPC(scp, addr) { (uc)->uc_mcontext.__gregs[_REG_RIP] = (Addr_t)(addr); } +# define SIG_ZeroLimitPtr(scp) { (scp)->uc_mcontext.__gregs[_REG_R14] = 0; } + +# error NetBSD/AMD64 not supported yet + +# elif defined(OPSYS_OPENBSD) + /** amd64, OpenBSD **/ +# define SIG_OVERFLOW SIGFPE + +# define SIG_GetCode(info, scp) (info) +# define SIG_GetPC(scp) ((scp)->sc_rip) +# define SIG_SetPC(scp, addr) { (scp)->sc_rip = (Addr_t)(addr); } +# define SIG_SIG_ZeroLimitPtr(scp) { (scp)->sc_r14 = 0; } + + typedef void SigReturn_t; + +# error OpenBSD/AMD64 not supported yet + +# elif defined(OPSYS_SOLARIS) + /** amd64, Solaris */ + +# define SIG_GetPC(scp) ((scp)->uc_mcontext.gregs[EIP]) +# define SIG_SetPC(scp, addr) { (scp)->uc_mcontext.gregs[EIP] = (Addr_t)(addr); } +/*# define SIG_ZeroLimitPtr(scp) { ML_X86Frame[LIMITPTR_X86OFFSET] = 0; }*/ + +# error Solaris/AMD64 not supported yet + +# else +# error "unknown OPSYS for amd64" +# endif + +#endif + +#ifndef SIG_InitFPE +#define SIG_InitFPE() /* nop */ +#endif + +#ifndef SIG_ResetFPE +#define SIG_ResetFPE(SCP) /* nop */ +#endif + +#endif /* !_SIGNAL_SYSDEP_ */ + diff --git a/base/runtime/mach-dep/signal-util.c b/base/runtime/mach-dep/signal-util.c new file mode 100644 index 0000000..8c9f7f4 --- /dev/null +++ b/base/runtime/mach-dep/signal-util.c @@ -0,0 +1,173 @@ +/*! \file signal-util.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * System independent utility routines for supporting signals and + * software polling. + */ + +#include +#include "ml-base.h" +#include "ml-limits.h" +#include "ml-state.h" +#include "vproc-state.h" +#include "ml-objects.h" +#include "ml-signals.h" +#include "system-signals.h" + + +/* GCSignal: + * + * Conditionally record a GC signal. + */ +void GCSignal (vproc_state_t *vsp, int nGen) +{ + if ((vsp->vp_gcSigState != ML_SIG_ENABLED) || (nGen < vsp->vp_gcSigThreshold)) { + /* the default behavior is to not generate GC signals */ + return; + } + + if (vsp->vp_inSigHandler && (vsp->vp_sigCode == RUNSIG_GC)) { + /* avoid generating GC signals while we are processing a GC signal; otherwise + * things can get out of hand! + */ + return; + } + + vsp->vp_sigCounts[RUNSIG_GC].nReceived++; + vsp->vp_totalSigCount.nReceived++; + + if (vsp->vp_inMLFlag && (! vsp->vp_handlerPending) && (! vsp->vp_inSigHandler)) { + vsp->vp_handlerPending = TRUE; + } + +} /* end of GCSignal */ + + +/* ChooseSignal: + * + * Choose which signal to pass to the ML handler and setup the ML state + * vector accordingly. + * WARNING: This should be called with signals masked to avoid race + * conditions. + */ +void ChooseSignal (vproc_state_t *vsp) +{ + int i, j, delta; + + /* scan the signal counts looking for a signal that needs to be handled. */ + i = vsp->vp_nextPendingSig; + j = 0; + do { + ASSERT (j++ < NUM_SIGS); + i++; + if (i == SIGMAP_SZ) i = MIN_SYSTEM_SIG; + delta = vsp->vp_sigCounts[i].nReceived - vsp->vp_sigCounts[i].nHandled; + } while (delta == 0); + vsp->vp_nextPendingSig = i; + + /* record the signal and count */ + vsp->vp_sigCode = i; + vsp->vp_sigCount = delta; + vsp->vp_sigCounts[i].nHandled += delta; + vsp->vp_totalSigCount.nHandled += delta; + +#ifdef SIGNAL_DEBUG +SayDebug ("ChooseSignal: sig = %d, count = %d\n", +vsp->vp_sigCode, vsp->vp_sigCount); +#endif + +} /* end of ChooseSignal */ + + +/* MakeResumeCont: + * + * Build the resume continuation for a signal or poll event handler. + * This closure contains the address of the resume entry-point and + * the registers from the ML State. + * + * At least 4K avail. heap assumed. + */ +ml_val_t MakeResumeCont (ml_state_t *msp, ml_val_t resume[]) +{ + /* allocate the resumption closure */ + ML_AllocWrite(msp, 0, MAKE_DESC(10, DTAG_record)); + ML_AllocWrite(msp, 1, PTR_CtoML(resume)); + ML_AllocWrite(msp, 2, msp->ml_arg); + ML_AllocWrite(msp, 3, msp->ml_cont); + ML_AllocWrite(msp, 4, msp->ml_closure); + ML_AllocWrite(msp, 5, msp->ml_linkReg); + ML_AllocWrite(msp, 6, msp->ml_pc); + ML_AllocWrite(msp, 7, msp->ml_exnCont); + /* John (Reppy) says that varReg should not be included here... + ML_AllocWrite(msp, 8, msp->ml_varReg); + */ + ML_AllocWrite(msp, 8, msp->ml_calleeSave[0]); + ML_AllocWrite(msp, 9, msp->ml_calleeSave[1]); + ML_AllocWrite(msp, 10, msp->ml_calleeSave[2]); + + return ML_Alloc(msp, 10); + +} /* end of MakeResumeCont */ + + +/* MakeHandlerArg: + * + * Build the argument record for the ML signal handler. It has the type + * + * val sigHandler : (int * int * unit cont) -> 'a + * + * The first argument is the signal code, the second is the signal count and the + * third is the resumption continuation. The ML signal handler should never + * return. + * NOTE: maybe this should be combined with ChooseSignal??? + */ +ml_val_t MakeHandlerArg (ml_state_t *msp, ml_val_t resume[]) +{ + ml_val_t resumeCont, arg; + vproc_state_t *vsp = msp->ml_vproc; + + resumeCont = MakeResumeCont(msp, resume); + + /* allocate the ML signal handler's argument record */ + REC_ALLOC3(msp, arg, + INT_CtoML(vsp->vp_sigCode), INT_CtoML(vsp->vp_sigCount), + resumeCont); + +#ifdef SIGNAL_DEBUG +SayDebug ("MakeHandlerArg: resumeC = %#x, arg = %#x\n", resumeCont, arg); +#endif + return arg; + +} /* end of MakeHandlerArg */ + + +/* LoadResumeState: + * + * Load the ML state with the state preserved in resumption continuation + * made by MakeResumeCont. + */ +void LoadResumeState (ml_state_t *msp) +{ + ml_val_t *contClosure; +#ifdef SIGNAL_DEBUG +SayDebug ("LoadResumeState:\n"); +#endif + + contClosure = PTR_MLtoC(ml_val_t, msp->ml_closure); + + msp->ml_arg = contClosure[1]; + msp->ml_cont = contClosure[2]; + msp->ml_closure = contClosure[3]; + msp->ml_linkReg = contClosure[4]; + msp->ml_pc = contClosure[5]; + msp->ml_exnCont = contClosure[6]; + /* John says ... + msp->ml_varReg = contClosure[7]; + */ + msp->ml_calleeSave[0] = contClosure[7]; + msp->ml_calleeSave[1] = contClosure[8]; + msp->ml_calleeSave[2] = contClosure[9]; + +} /* end of LoadResumeState */ diff --git a/base/runtime/mach-dep/unix-fault.c b/base/runtime/mach-dep/unix-fault.c new file mode 100644 index 0000000..7d2fba1 --- /dev/null +++ b/base/runtime/mach-dep/unix-fault.c @@ -0,0 +1,167 @@ +/*! \file unix-fault.c + * + * Common code for handling arithmetic traps and signals. + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Common code for handling arithmetic traps. + */ + +#if defined(__CYGWIN32__) + +#include "cygwin-fault.c" + +#else + +#include "ml-unixdep.h" +#include "signal-sysdep.h" +#include "ml-base.h" +#include "vproc-state.h" +#include "ml-state.h" +#include "ml-globals.h" + +#ifdef SIGNAL_DEBUG +#include "gc.h" /* for BO_AddrToCodeObjTag */ +#endif + +/* this is temporary */ +#define SELF_VPROC (VProc[0]) + + +/* local routines */ +#if defined(HAS_POSIX_SIGS) && defined(HAS_UCONTEXT) +PVT SigReturn_t FaultHandler (int sig, SigInfo_t code, void *scp); +#elif (defined(ARCH_PPC) && defined(OPSYS_LINUX)) +PVT SigReturn_t FaultHandler (int sig, SigContext_t *scp); +#else +PVT SigReturn_t FaultHandler (int sig, SigInfo_t code, SigContext_t *scp); +#endif + + +/* InitFaultHandlers: + */ +void InitFaultHandlers () +{ + + /** Set up the Overflow fault(s) **/ +#ifdef SIG_OVERFLOW + SIG_SetHandler (SIG_OVERFLOW, FaultHandler); +#else +# error now signal for Overflow specified +#endif +#ifdef SIG_OVERFLOW2 + SIG_SetHandler (SIG_OVERFLOW2, FaultHandler); +#endif + + /** Initialize the floating-point unit **/ + SIG_InitFPE (); + +} /* end of InitFaultHandlers */ + + +/* FaultHandler: + * + * Handle arithmetic faults. Note that since floating-point arithmetic + * is non-trapping in SML and since the compiler generates code to + * explicitly test for division by zero, and arithmetic trap should be + * mapped to Overflow. + */ +#if defined(HAS_POSIX_SIGS) && defined(HAS_UCONTEXT) + +PVT SigReturn_t FaultHandler (int signal, siginfo_t *si, void *uc) +{ + ucontext_t *scp = (ucontext_t *)uc; + Addr_t pc = (Addr_t)SIG_GetPC(scp); + ml_state_t *msp = SELF_VPROC->vp_state; + extern Word_t request_fault[]; + +#ifdef SIGNAL_DEBUG + SayDebug ("Fault handler: pc = %p, sig = %d, inML = %d\n", + (void*)pc, signal, SELF_VPROC->vp_inMLFlag); + if (SELF_VPROC->vp_inMLFlag) { + SayDebug (" source file: %s\n", (char *)BO_AddrToCodeObjTag(pc)); + } +#endif + + if (! SELF_VPROC->vp_inMLFlag) { + Die ("bogus fault not in ML: pc = %p, sig = %d\n", (void*)pc, signal); + } + +#ifdef SIG_IS_OVERFLOW_TRAP + /* verify that the signal actually comes from an overflow */ + if (! SIG_IS_OVERFLOW_TRAP(signal,pc)) { + Die ("bogus overflow fault: pc = %p, sig = %d\n", (void*)pc, signal); + } +#endif + + /* Map the signal to Overflow */ + msp->ml_faultExn = OverflowId; + msp->ml_faultPC = pc; + + SIG_SetPC (scp, request_fault); + + /* I don't think that this call is still necessary, since we are only + * dealing with integer overflow here! -- JHR (2019-10-10) + SIG_ResetFPE (scp); + */ + +} /* end of FaultHandler */ + +#else + +PVT SigReturn_t FaultHandler ( + int signal, +#if (defined(ARCH_PPC) && defined(OPSYS_LINUX)) + SigContext_t *scp) +#else + SigInfo_t info, + SigContext_t *scp) +#endif +{ + ml_state_t *msp = SELF_VPROC->vp_state; + extern Word_t request_fault[]; + int code = SIG_GetCode(info, scp); + +#ifdef SIGNAL_DEBUG + SayDebug ("Fault handler: sig = %d, inML = %d\n", + signal, SELF_VPROC->vp_inMLFlag); +#endif + + if (! SELF_VPROC->vp_inMLFlag) + Die ("bogus fault not in ML: sig = %d, code = %#x, pc = %#x)\n", + signal, SIG_GetCode(info, scp), SIG_GetPC(scp)); + + /* Map the signal to the appropriate ML exception. */ + /* Map the signal to Overflow */ + msp->ml_faultExn = OverflowId; + msp->ml_faultPC = (Word_t)SIG_GetPC(scp); + + SIG_SetPC (scp, request_fault); + + SIG_ResetFPE (scp); + +} /* end of FaultHandler */ + +#endif + +#if ((defined(ARCH_RS6000) || defined(ARCH_PPC)) && defined(OPSYS_AIX)) + +/* SIG_GetCode: + * + * For AIX, the overflow and divide by zero information is obtained + * from information contained in the sigcontext structure. + */ +PVT int SIG_GetCode (SigInfo_t code, SigContext_t *scp) +{ + struct fp_sh_info FPInfo; + + fp_sh_info (scp, &FPInfo, sizeof(struct fp_sh_info)); + + return FPInfo.trap; + +} /* end of SIG_GetCode */ + +#endif + +#endif /* !defined(__CYGWIN32__) */ diff --git a/base/runtime/mach-dep/unix-prof.c b/base/runtime/mach-dep/unix-prof.c new file mode 100644 index 0000000..3ea3e7c --- /dev/null +++ b/base/runtime/mach-dep/unix-prof.c @@ -0,0 +1,66 @@ +/*! \file unix-prof.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * SML Profiling support for Unix. + */ + +#include "ml-unixdep.h" +#include "signal-sysdep.h" +#include "ml-base.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "ml-globals.h" +#include "profile.h" + + +/* The pointer to the heap allocated array of call counts. + * When this pointer is ML_unit, then profiling is disabled. + */ +ml_val_t ProfCntArray = ML_unit; + +/* local routines */ +#if defined(HAS_POSIX_SIGS) && defined(HAS_UCONTEXT) +PVT SigReturn_t ProfSigHandler (int sig, SigInfo_t info, void *scp); +#elif (defined(ARCH_PPC) && defined(OPSYS_LINUX)) +PVT SigReturn_t ProfSigHandler (int sig, SigContext_t *scp); +#else +PVT SigReturn_t ProfSigHandler (int sig, SigInfo_t info, SigContext_t *scp); +#endif + + +/* EnableProfSignals: + */ +void EnableProfSignals () +{ + SIG_SetHandler (SIGVTALRM, ProfSigHandler); + +} /* end of EnableProfSignals */ + +/* DisableProfSignals: + */ +void DisableProfSignals () +{ + SIG_SetDefault (SIGVTALRM); + +} /* end of DisableProfSignals */ + +/* ProfSigHandler: + * + * The handler for SIGVTALRM signals. + */ +#if defined(HAS_POSIX_SIGS) && defined(HAS_UCONTEXT) +PVT SigReturn_t ProfSigHandler (int sig, SigInfo_t info, void *scp) +#elif (defined(ARCH_PPC) && defined(OPSYS_LINUX)) +PVT SigReturn_t ProfSigHandler (int sig, SigContext_t *scp) +#else +PVT SigReturn_t ProfSigHandler (int sig, SigInfo_t info, SigContext_t *scp) +#endif +{ + Word_t *arr = GET_SEQ_DATAPTR(Word_t, ProfCntArray); + int indx = INT_MLtoC(DEREF(ProfCurrent)); + + arr[indx]++; + +} /* end of ProfSigHandler */ diff --git a/base/runtime/mach-dep/unix-signal.c b/base/runtime/mach-dep/unix-signal.c new file mode 100644 index 0000000..ff608e3 --- /dev/null +++ b/base/runtime/mach-dep/unix-signal.c @@ -0,0 +1,283 @@ +/*! \file unix-signal.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Unix specific code to support ML signals. + */ + +#include "ml-unixdep.h" +#include "signal-sysdep.h" +#include "ml-base.h" +#include "ml-limits.h" +#include "ml-state.h" +#include "vproc-state.h" +#include "ml-objects.h" +#include "ml-signals.h" +#include "system-signals.h" +#include "ml-globals.h" + + +/* The generated sys_const_t table for UNIX signals */ +#include "unix-sigtbl.c" + + +#ifndef MP_SUPPORT +#define SELF_VPROC (VProc[0]) +#else +/** for MP_SUPPORT, we'll use SELF_VPROC for now **/ +#define SELF_VPROC (VProc[0]) +#endif + + +#ifdef USE_ZERO_LIMIT_PTR_FN +Addr_t SavedPC; +extern ZeroLimitPtr[]; +#endif + +/* local routines */ +#if defined(HAS_POSIX_SIGS) && defined(HAS_UCONTEXT) +PVT SigReturn_t CSigHandler (int sig, SigInfo_t info, void *scp); +#elif (defined(ARCH_PPC) && defined(OPSYS_LINUX)) +PVT SigReturn_t CSigHandler (int sig, SigContext_t *scp); +#else +PVT SigReturn_t CSigHandler (int sig, SigInfo_t info, SigContext_t *scp); +#endif + + +/* ListSignals: + */ +ml_val_t ListSignals (ml_state_t *msp) +{ + return ML_SysConstList (msp, &SigTbl); + +} /* end of ListSignals. */ + +/* PauseUntilSignal: + * + * Suspend the given VProc until a signal is received. + */ +void PauseUntilSignal (vproc_state_t *vsp) +{ + pause (); + +} /* end of PauseUntilSignal */ + +/* SetSignalState: + */ +void SetSignalState (vproc_state_t *vsp, int sigNum, int sigState) +{ +/* QUESTIONS: + * If we disable a signal that has pending signals, should the pending + * signals be discarded? + * How do we keep track of the state of non-UNIX signals (e.g., GC). + */ + switch (sigNum) { + case RUNSIG_GC: + vsp->vp_gcSigState = sigState; + break; + default: + if (IS_SYSTEM_SIG(sigNum)) { + switch (sigState) { + case ML_SIG_IGNORE: + SIG_SetIgnore (sigNum); + break; + case ML_SIG_DEFAULT: + SIG_SetDefault (sigNum); + break; + case ML_SIG_ENABLED: + SIG_SetHandler (sigNum, CSigHandler); + break; + default: + Die ("bogus signal state: sig = %d, state = %d\n", + sigNum, sigState); + } /* end switch */ + } + else Die ("SetSignalState: unknown signal %d\n", sigNum); + } /* end of switch */ + +} /* end of SetSignalState */ + + +/* GetSignalState: + */ +int GetSignalState (vproc_state_t *vsp, int sigNum) +{ + switch (sigNum) { + case RUNSIG_GC: + return vsp->vp_gcSigState; + default: + if (IS_SYSTEM_SIG(sigNum)) { + SigReturn_t (*handler)(); + SIG_GetHandler (sigNum, handler); + if (handler == SIG_IGN) + return ML_SIG_IGNORE; + else if (handler == SIG_DFL) + return ML_SIG_DEFAULT; + else + return ML_SIG_ENABLED; + } + else Die ("GetSignalState: unknown signal %d\n", sigNum); + } + +} /* end of GetSignalState. */ + + +/* CSigHandler: + * + * The C signal handler for signals that are to be passed to the ML handler. + */ +#if defined(HAS_POSIX_SIGS) && defined(HAS_UCONTEXT) + +PVT SigReturn_t CSigHandler (int sig, siginfo_t *si, void *c) +{ + ucontext_t *scp = (ucontext_t *)c; + vproc_state_t *vsp = SELF_VPROC; + + vsp->vp_sigCounts[sig].nReceived++; + vsp->vp_totalSigCount.nReceived++; + +#ifdef SIGNAL_DEBUG +SayDebug ("\nCSigHandler[1]: sig = %d, inML = %d, pending = %d, inHandler = %d\n", +sig, vsp->vp_inMLFlag, vsp->vp_handlerPending, vsp->vp_inSigHandler); +#endif + + /* The following line is needed only when currently executing + * "pure" C code. But doing it anyway in all other cases will + * not hurt... */ + vsp->vp_limitPtrMask = 0; + + if (vsp->vp_inMLFlag && (! vsp->vp_handlerPending) && (! vsp->vp_inSigHandler)) { + vsp->vp_handlerPending = TRUE; +#ifdef USE_ZERO_LIMIT_PTR_FN + SIG_SavePC(vsp->vp_state, scp); + SIG_SetPC(scp, ZeroLimitPtr); +#else /* we can adjust the heap limit directly */ + SIG_ZeroLimitPtr(scp); +#endif + } + +} /* end of CSigHandler */ + +#else + +PVT SigReturn_t CSigHandler ( + int sig, +#if (defined(ARCH_PPC) && defined(OPSYS_LINUX)) + SigContext_t *scp) +#else + SigInfo_t info, + SigContext_t *scp) +#endif +{ +#if defined(OPSYS_LINUX) && defined(ARCH_X86) && defined(USE_ZERO_LIMIT_PTR_FN) + SigContext_t *scp = ≻ +#endif + vproc_state_t *vsp = SELF_VPROC; + + vsp->vp_sigCounts[sig].nReceived++; + vsp->vp_totalSigCount.nReceived++; + +#ifdef SIGNAL_DEBUG +SayDebug ("\nCSigHandler[2]: sig = %d, inML = %d, pending = %d, inHandler = %d\n", +sig, vsp->vp_inMLFlag, vsp->vp_handlerPending, vsp->vp_inSigHandler); +#endif + + /* The following line is needed only when currently executing + * "pure" C code. But doing it anyway in all other cases will + * not hurt... */ + vsp->vp_limitPtrMask = 0; + + if (vsp->vp_inMLFlag && (! vsp->vp_handlerPending) && (! vsp->vp_inSigHandler)) { + vsp->vp_handlerPending = TRUE; +#ifdef USE_ZERO_LIMIT_PTR_FN + SIG_SavePC(vsp->vp_state, scp); + SIG_SetPC(scp, ZeroLimitPtr); +#else /* we can adjust the heap limit directly */ + SIG_ZeroLimitPtr(scp); +#endif + } + +} /* end of CSigHandler */ + +#endif + + +/* SetSignalMask: + * + * Set the signal mask to the given list of signals. The sigList has the + * type: "sysconst list option", with the following semantics (see + * sml-nj/boot/smlnj/signals.sml): + * NONE -- the empty mask + * SOME[] -- mask all signals + * SOME l -- the signals in l are the mask + */ +void SetSignalMask (ml_val_t sigList) +{ + SigMask_t mask; + int i; + + SIG_ClearMask(mask); + + if (sigList != OPTION_NONE) { + sigList = OPTION_get(sigList); + if (LIST_isNull(sigList)) { + /* SOME[] -- mask all signals */ + for (i = 0; i < NUM_SYSTEM_SIGS; i++) { + SIG_AddToMask(mask, SigInfo[i].id); + } + } + else { + while (sigList != LIST_nil) { + ml_val_t car = LIST_hd(sigList); + int sig = REC_SELINT(car, 0); + SIG_AddToMask(mask, sig); + sigList = LIST_tl(sigList); + } + } + } + + SIG_SetMask(mask); + +} /* end of SetSignalMask */ + + +/* GetSignalMask: + * + * Return the current signal mask (only those signals supported my ML); like + * SetSignalMask, the result has the following semantics: + * NONE -- the empty mask + * SOME[] -- mask all signals + * SOME l -- the signals in l are the mask + */ +ml_val_t GetSignalMask (ml_state_t *msp) +{ + SigMask_t mask; + ml_val_t name, sig, sigList, res; + int i, n; + + SIG_GetMask(mask); + + /* count the number of masked signals */ + for (i = 0, n = 0; i < NUM_SYSTEM_SIGS; i++) { + if (SIG_isSet(mask, SigInfo[i].id)) n++; + } + + if (n == 0) + return OPTION_NONE; + else if (n == NUM_SYSTEM_SIGS) + sigList = LIST_nil; + else { + for (i = 0, sigList = LIST_nil; i < NUM_SYSTEM_SIGS; i++) { + if (SIG_isSet(mask, SigInfo[i].id)) { + name = ML_CString (msp, SigInfo[i].name); + REC_ALLOC2(msp, sig, INT_CtoML(SigInfo[i].id), name); + LIST_cons(msp, sigList, sig, sigList); + } + } + } + + OPTION_SOME(msp, res, sigList); + return res; + +} /* end of GetSignalMask */ diff --git a/base/runtime/mach-dep/win32-fault.c b/base/runtime/mach-dep/win32-fault.c new file mode 100644 index 0000000..c4d9088 --- /dev/null +++ b/base/runtime/mach-dep/win32-fault.c @@ -0,0 +1,193 @@ +/*! \file win32-fault.c + * + * win32 code for handling traps (arithmetic overflow, div-by-0, ctrl-c, etc.). + */ + +/* + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + */ + +#include +#include + +#include "ml-base.h" +#include "vproc-state.h" +#include "ml-state.h" +#include "ml-globals.h" +#include "signal-sysdep.h" +#include "system-signals.h" + +#include "win32-fault.h" + +#define SELF_VPROC (VProc[0]) + +/* globals */ +HANDLE win32_stdin_handle; +HANDLE win32_console_handle; +HANDLE win32_stdout_handle; +HANDLE win32_stderr_handle; + +HANDLE win32_ML_thread_handle; +BOOL win32_isNT; + +/* static globals */ +PVT BOOL caught_cntrl_c = FALSE; + +void wait_for_cntrl_c () +{ + /* we know a cntrl_c is coming; wait for it */ + while (!caught_cntrl_c) { + continue; + } +} + +/* generic handler for win32 "signals" such as interrupt, alarm */ +/* returns TRUE if the main thread is running ML code */ +BOOL win32_generic_handler (int code) +{ + vproc_state_t *vsp = SELF_VPROC; + + vsp->vp_sigCounts[code].nReceived++; + vsp->vp_totalSigCount.nReceived++; + + vsp->vp_limitPtrMask = 0; + + if (vsp->vp_inMLFlag && (! vsp->vp_handlerPending) && (! vsp->vp_inSigHandler)) { + vsp->vp_handlerPending = TRUE; + SIG_ZeroLimitPtr(); + return TRUE; + } + return FALSE; +} + +/* cntrl_c_handler + * the win32 handler for ctrl-c + */ +PVT +BOOL cntrl_c_handler (DWORD fdwCtrlType) +{ + int ret = FALSE; + + /* SayDebug("event is %x\n", fdwCtrlType); */ + switch (fdwCtrlType) { + case CTRL_BREAK_EVENT: + case CTRL_C_EVENT: { + if (!win32_generic_handler(SIGINT)) { + caught_cntrl_c = TRUE; + } + ret = TRUE; /* we handled the event */ + break; + } + } + return ret; /* chain to other handlers */ +} + + +/* InitFaultHandlers: + */ +void InitFaultHandlers () +{ + /* some basic win32 initialization is done here */ + + /* determine if we're NT or 95 */ + win32_isNT = !(GetVersion() & 0x80000000); + + /* get the redirected handle; this is "stdin" */ + win32_stdin_handle = GetStdHandle(STD_INPUT_HANDLE); + /* get the actual handle of the console */ + win32_console_handle = CreateFile("CONIN$", + GENERIC_READ|GENERIC_WRITE, + FILE_SHARE_READ|FILE_SHARE_WRITE, + NULL, + OPEN_EXISTING, + 0,0); +#ifdef WIN32_DEBUG + if (win32_console_handle == INVALID_HANDLE_VALUE) { + SayDebug("win32: failed to get actual console handle"); + } +#endif + + win32_stdout_handle = GetStdHandle(STD_OUTPUT_HANDLE); + win32_stderr_handle = GetStdHandle(STD_ERROR_HANDLE); + +#ifdef WIN32_DEBUG + SayDebug("console input handle, %x\n", (unsigned int) win32_stdin_handle); + SayDebug("console output handle, %x\n", (unsigned int) win32_stdout_handle); + SayDebug("console error handle, %x\n", (unsigned int) win32_stderr_handle); +#endif + + /* create a thread id for the main thread */ + { + HANDLE cp_h = GetCurrentProcess(); + + if (!DuplicateHandle(cp_h, /* process with handle to dup */ + GetCurrentThread(), /* pseudohandle, hence the dup */ + cp_h, /* handle goes to current proc */ + &win32_ML_thread_handle, /* recipient */ + THREAD_ALL_ACCESS, + FALSE, + 0 /* no options */ + )) { + Die ("win32:InitFaultHandlers: cannot duplicate thread handle"); + } + } + + /* install the ctrl-C handler */ + if (!SetConsoleCtrlHandler((PHANDLER_ROUTINE)cntrl_c_handler,TRUE)) { + Die("win32:InitFaultHandlers: can't install cntrl_c_handler\n"); + } + + /* initialize the floating-point unit */ + SIG_InitFPE (); +} + +/* fault_handler: + * + * Handle arithmetic faults. Note that since floating-point arithmetic + * is non-trapping in SML and since the compiler generates code to + * explicitly test for division by zero, and arithmetic trap should be + * mapped to Overflow. + */ +PVT bool_t fault_handler (int code, Addr_t pc) +{ + ml_state_t *msp = SELF_VPROC->vp_state; + extern Word_t request_fault[]; + + if (! SELF_VPROC->vp_inMLFlag) { + Die ("win32:fault_handler: bogus fault not in ML: %#x\n", code); + } + + /* Map the signal to Overflow. */ + msp->ml_faultExn = OverflowId; + msp->ml_faultPC = pc; + + return TRUE; +} + +/* restoreregs + * this is where win32 handles traps + */ +int restoreregs (ml_state_t *msp) +{ + extern Word_t request_fault[]; + + caught_cntrl_c = FALSE; + __try{ + int request; + + request = asm_restoreregs(msp); + return request; + + } __except(fault_handler(GetExceptionCode(), (Addr_t)(GetExceptionInformation())->ContextRecord->Eip) ? +#ifdef ARCH_X86 + ((Word_t *)(GetExceptionInformation())->ContextRecord->Eip = request_fault, + EXCEPTION_CONTINUE_EXECUTION) : + EXCEPTION_CONTINUE_SEARCH) +#else +# error non-x86 win32 platforms need restoreregs support +#endif + { /* nothing */ } +} + +/* end of win32-fault.c */ diff --git a/base/runtime/mach-dep/win32-fault.h b/base/runtime/mach-dep/win32-fault.h new file mode 100644 index 0000000..6d3f1bc --- /dev/null +++ b/base/runtime/mach-dep/win32-fault.h @@ -0,0 +1,19 @@ +/* win32-fault.h + * + * COPYRIGHT (c) 1996 Bell Laboratories, Lucent Technologies + * + */ + +extern HANDLE win32_stdin_handle; +extern HANDLE win32_stdout_handle; +extern HANDLE win32_stderr_handle; + +extern HANDLE win32_ML_thread_handle; + +extern void wait_for_cntrl_c(void); +extern BOOL win32_generic_handler(int code); + +extern BOOL win32_isNT; + +/* end of win32-fault.h */ + diff --git a/base/runtime/mach-dep/win32-signal.c b/base/runtime/mach-dep/win32-signal.c new file mode 100644 index 0000000..5a6071a --- /dev/null +++ b/base/runtime/mach-dep/win32-signal.c @@ -0,0 +1,102 @@ +/* win32-signal.c + * + * COPYRIGHT (c) 1996 Bell Laboratories, Lucent Technologies + * + * when "signals" are supported in win32, they'll go here. + */ + +#include "signal-sysdep.h" +#include "ml-base.h" +#include "ml-limits.h" +#include "ml-state.h" +#include "vproc-state.h" +#include "ml-objects.h" +#include "ml-signals.h" +#include "system-signals.h" +#include "ml-globals.h" + +#include "win32-sigtbl.c" + +#ifndef MP_SUPPORT +#define SELF_VPROC (VProc[0]) +#else +/** for MP_SUPPORT, we'll use SELF_VPROC for now **/ +#define SELF_VPROC (VProc[0]) +#endif + +/* ListSignals: + */ +ml_val_t ListSignals (ml_state_t *msp) +{ +#ifdef WIN32_DEBUG + SayDebug("win32:ListSignals: returning dummy signal list\n"); +#endif + return ML_SysConstList (msp, &SigTbl); +} + +/* PauseUntilSignal: + * + * Suspend the given VProc until a signal is received. + */ +void PauseUntilSignal (vproc_state_t *vsp) +{ +#ifdef WIN32_DEBUG + SayDebug("win32:PauseUntilSignal: returning without pause\n"); +#endif +} + +/* SetSignalState: + */ +void SetSignalState (vproc_state_t *vsp, int sigNum, int sigState) +{ +#ifdef WIN32_DEBUG + SayDebug("win32:SetSignalState: not setting state for signal %d\n",sigNum); +#endif +} + + +/* GetSignalState: + */ +int GetSignalState (vproc_state_t *vsp, int sigNum) +{ +#ifdef WIN32_DEBUG + SayDebug("win32:GetSignalState: returning state for signal %d as ML_SIG_DEFAULT\n",sigNum); +#endif + return ML_SIG_DEFAULT; +} + + +/* SetSignalMask: + * + * Set the signal mask to the given list of signals. The sigList has the + * type: "sysconst list option", with the following semantics (see + * sml-nj/boot/smlnj/signals.sml): + * NONE -- the empty mask + * SOME[] -- mask all signals + * SOME l -- the signals in l are the mask + */ +void SetSignalMask (ml_val_t sigList) +{ +#ifdef WIN32_DEBUG + SayDebug("win32:SetSigMask: not setting mask\n"); +#endif +} + + +/* GetSignalMask: + * + * Return the current signal mask (only those signals supported my ML); like + * SetSignalMask, the result has the following semantics: + * NONE -- the empty mask + * SOME[] -- mask all signals + * SOME l -- the signals in l are the mask + */ +ml_val_t GetSignalMask (ml_state_t *msp) +{ +#ifdef WIN32_DEBUG + SayDebug("win32:GetSignalMask: returning mask as NONE\n"); +#endif + return OPTION_NONE; +} + +/* end of win32-signal.c */ diff --git a/base/runtime/mach-dep/win32-timers.c b/base/runtime/mach-dep/win32-timers.c new file mode 100644 index 0000000..10069d7 --- /dev/null +++ b/base/runtime/mach-dep/win32-timers.c @@ -0,0 +1,150 @@ +/*! \file win32-timers.c + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * win32 specific interface to times and interval timers. + */ + +#include +#include +#include +#include "ml-base.h" +#include "vproc-state.h" +#include "ml-state.h" +#include "ml-timer.h" +#include "win32-fault.h" +#include "win32-timers.h" + +#include "signal-sysdep.h" +#include "system-signals.h" + +#ifndef MP_SUPPORT +#define SELF_VPROC (VProc[0]) +#else +/** for MP_SUPPORT, we'll use SELF_VPROC for now **/ +#define SELF_VPROC (VProc[0]) +#endif + +/* for computing times */ +PVT struct _timeb start_timeb; + +/* for interval timers */ +#define WIN32_TIMER_DONE 0 +#define WIN32_TIMER_COUNTING 1 +#define WIN32_TIMER_HALTED 2 + +typedef struct { + HANDLE handle; + DWORD id; + int milli_secs; + void (*action)(); +} win32_timer_t; + + +PVT void timer (win32_timer_t *ct) +{ + while (1) { + (*ct->action)(); + Sleep(ct->milli_secs); + } +} + +PVT BOOL create_win32_timer (win32_timer_t *ct, void (*f)(), int mSec, BOOL suspend) +{ + /* create a thread */ + ct->milli_secs = mSec; + ct->action = f; + return ((ct->handle = CreateThread(NULL, + 0, /* default stack size */ + (LPTHREAD_START_ROUTINE) timer, + ct, + suspend ? CREATE_SUSPENDED : 0, + &ct->id)) != NULL); +} + +PVT BOOL destroy_win32_timer (win32_timer_t *ct) +{ + return TerminateThread(ct->handle,1); +} + +PVT BOOL halt_win32_timer (win32_timer_t *ct) +{ + return SuspendThread(ct->handle) != -1; +} + +PVT BOOL resume_win32_timer (win32_timer_t *ct) +{ + return ResumeThread(ct->handle) != -1; +} + +PVT win32_timer_t wt; + +bool_t win32StopTimer () +{ + return halt_win32_timer (&wt); +} + +bool_t win32StartTimer (int mSec) +{ + wt.milli_secs = mSec; + return resume_win32_timer (&wt); +} + +PVT void win32_fake_sigalrm() +{ + vproc_state_t *vsp = SELF_VPROC; + + if (SuspendThread (win32_ML_thread_handle) == -1) { + Die ("win32_fake_sigalrm: unable to suspend ML thread"); + } + + win32_generic_handler(SIGALRM); + + if (ResumeThread (win32_ML_thread_handle) == -1) { + Die ("win32_fake_sigalrm: unable to resume ML thread"); + } +} + + +/* InitTimers: + * + * system specific timer initialization + */ +void InitTimers () +{ + if (!create_win32_timer (&wt,win32_fake_sigalrm, 0, TRUE)) { + Die("InitTimers: unable to create_win32_timer"); + } + _ftime(&start_timeb); + +} /* end of InitTimers */ + + +/* GetCPUTime: + * + * Get the elapsed user and/or system cpu times in a system independent way. + */ +void GetCPUTime (Time_t *usrT, Time_t *sysT) +{ + struct _timeb now_timeb, elapsed_timeb; + + _ftime(&now_timeb); + if (now_timeb.millitm < start_timeb.millitm) { + now_timeb.time--; + ASSERT(now_timeb.time >= start_timeb.time); + now_timeb.millitm += 1000; + ASSERT(now_timeb.millitm > start_timeb.millitm); + } + elapsed_timeb.time = now_timeb.time - start_timeb.time; + elapsed_timeb.millitm = now_timeb.millitm - start_timeb.millitm; + if (usrT != NIL(Time_t *)) { + usrT->seconds = (Int32_t) elapsed_timeb.time; + usrT->uSeconds = ((Int32_t) elapsed_timeb.millitm) * 1000; + } + if (sysT != NIL(Time_t *)) { + sysT->seconds = sysT->uSeconds = 0; + } +} + +/* end of win32-timers.c */ diff --git a/base/runtime/mach-dep/win32-util.c b/base/runtime/mach-dep/win32-util.c new file mode 100644 index 0000000..d78642e --- /dev/null +++ b/base/runtime/mach-dep/win32-util.c @@ -0,0 +1,20 @@ +/* win32-util.c + * + * COPYRIGHT (c) 1996 Bell Laboratories, Lucent Technologies. + * + * win32 specific utility code + */ + +#include +#include "ml-osdep.h" +#include "ml-base.h" + +int GetPageSize() +{ + SYSTEM_INFO si; + + GetSystemInfo(&si); + return (int) si.dwPageSize; +} + +/* end of win32-util.c */ diff --git a/base/runtime/mach-dep/x86-macros.h b/base/runtime/mach-dep/x86-macros.h new file mode 100644 index 0000000..61d04ca --- /dev/null +++ b/base/runtime/mach-dep/x86-macros.h @@ -0,0 +1,69 @@ +/*! \file x86-macros.h + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * These are macros for the X86 and AMD64 assembly code. They need to be + * defined after the CPS register names are defined so that the Win32 macros + * are correct. + */ + +#ifndef _X86_MACROS_H_ +#define _X86_MACROS_H_ + +/* jump to the address held in the standard continuation register */ +#define CONTINUE JMP (CODEPTR(stdcont)) + +/* CHECKLIMIT, ENTRY, and ML_CODE_HDR macros */ +#ifdef MASM_ASSEMBLER + +CHECKLIMIT_M MACRO + @@: + MOVE (stdlink, temp, pc) + CMP (limitptr, allocptr) + jb @f + CALL (CSYM(saveregs)) + JMP @b + @@: +ENDM + +ENTRY_M MACRO id + GLOBAL (CSYM(&id)) + LABEL (CSYM(&id)) +ENDM + +ALIGNED_ENTRY_M MACRO name + GLOBAL (CSYM(&name)) + ALIGN_CODE + LABEL (CSYM(&name)) +ENDM + +#define CHECKLIMIT CHECKLIMIT_M +#define ENTRY(id) ENTRY_M id +#define ALIGNED_ENTRY(name) ALIGNED_ENTRY_M name + +#elif defined(GNU_ASSEMBLER) + +#define CHECKLIMIT \ + 1:; \ + MOVE (stdlink, temp, pc); \ + CMP (limitptr, allocptr); \ + JB (9f); \ + CALL (CSYM(saveregs)); \ + JMP (1b); \ + 9: + +#define ENTRY(ID) \ + CGLOBAL(ID); \ + LABEL(CSYM(ID)) + +#define ALIGNED_ENTRY(name) \ + CGLOBAL(name); \ + ALIGN_CODE; \ + LABEL(CSYM(name)) + +#else +# error must specify either GNU_ASSEMBLER or MASM_ASSEMBLER +#endif /* MASM_ASSEMBLER */ + +#endif /* !_X86_MACROS_H_ */ diff --git a/base/runtime/mach-dep/x86-syntax.h b/base/runtime/mach-dep/x86-syntax.h new file mode 100644 index 0000000..d794c13 --- /dev/null +++ b/base/runtime/mach-dep/x86-syntax.h @@ -0,0 +1,324 @@ +/*! \file x86-syntax.h + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This file abstracts over the syntax of x86 (and x86-64) assembly + * directives and instructions. It supports two syntactic conventions: + * + * 1. "GNU Assember syntax", where arguments are `src,dst`. This + * syntax is used by BSD, Linux, macOS, etc. + * + * 2. "Microsoft Assember syntax", where arguments are `dst,src`. + * This syntax is used in the Intel reference manuals. + * + * 32-bit Registers + * EAX X86 only + * EBX X86 only + * ECX X86 only + * EDX X86 only + * EBP X86 only + * ESI X86 only + * EDI X86 only + * ESP X86 only + * + * 64-bit Registers + * RDI AMD64 only + * RSI AMD64 only + * RSP AMD64 only + * RAX AMD64 only + * RBX AMD64 only + * RCX AMD64 only + * RDX AMD64 only + * RBP AMD64 only + * R8 AMD64 only + * R9 AMD64 only + * R10 AMD64 only + * R11 AMD64 only + * R12 AMD64 only + * R13 AMD64 only + * R14 AMD64 only + * R15 AMD64 only + * + * 128-bit SSE Registers + * XMM0 AMD64 only + * XMM1 AMD64 only + * XMM2 AMD64 only + * XMM3 AMD64 only + * XMM4 AMD64 only + * XMM5 AMD64 only + * XMM6 AMD64 only + * XMM7 AMD64 only + * XMM8 AMD64 only + * XMM9 AMD64 only + * XMM10 AMD64 only + * XMM11 AMD64 only + * XMM12 AMD64 only + * XMM13 AMD64 only + * XMM14 AMD64 only + * XMM15 AMD64 only + * + * Operands + * IM(x) immediate value + * REGIND(r) register indirect + * REGOFF(d,r) register indirect with offset + * CODEPTR(r) register contents as jump target + * + * Instructions; operand are ordered src,dst and are the native machine size + * ADD(src,dst) `dst := dst + src` + * AND(src,dst) `dst := dst AND src` + * CALL(lab) procedure call + * CMP(src,dst) test of `dst - src` + * FILD(src) load 32-bit integer into ST(0) (X86 only) + * FINIT initialize x87 FPU (X86 only) + * FISTP store floating-point value as integer (X86 only) + * FLD_D(dst) load 64-bit floating-point value into ST(0) (X86 only) + * FLDCW load x87 FPU control word (X86 only) + * FSCALE scale ST(0) by ST(1) (X86 only) + * FSTCW store x87 FPU control word (X86 only) + * FSTP_D(dst) store ST(0) as 64-bit float (X86 only) + * INC(dst) `dst := dst + 1` + * LEA(src,dst) load effective address + * JB(lab) jump if below + * JGE(lab) jump if greater or equal (dst >= src) + * JMP(lab) jump + * JNE(lab) jump if not equal + * MOV(src,dst) move from src to dst + * MOVS_D(src,dst) move scalar 64-bit floating-point value (AMD64 only) + * OR(src,dst) `dst := src OR dst` + * POP(dst) pop value off stack into `dst` + * PUSH(src) push `src` onto stack + * RET return + * SAL(src,dst) `dst := dst << src` + * SAR(src,dst) `dst := dst ~>> src` + * SUB(src,dst) `dst := dst - src` + * XOR(src,dst) `dst := dst XOR src` (used for setting registers to 0) + */ + +#ifndef _X86_SYNTAX_H_ +#define _X86_SYNTAX_H_ + +#if !defined(GNU_ASSEMBLER) && !defined(MASM_ASSEMBLER) +# error must specify either GNU_ASSEMBLER or MASM_ASSEMBLER +#endif + +#ifdef GNU_ASSEMBLER + +/* arguments are in src,dst order for the GNU assembler */ +#define ARGS2(src,dst) src,dst +#define ARGS3(im,src,dst) im,src,dst + +#define CHOICE(gnu, masm) gnu + +/* directives */ +#define GLOBAL(ID) .globl ID +#define LABEL(ID) CONCAT(ID,:) +#define TEXT .text +#define DATA .data +#define ALIGN_CODE .p2align 4 + +#if defined(ARCH_X86) +#define ALIGN4 .p2align 2 +#define WORD(lab) LABEL(lab) .long 0 +#else /* ARCH_AMD_64 */ +#define ALIGN8 .p2align 3 +#define WORD(lab) LABEL(lab) .long 0, 0 +#endif /* ARCH_X86 */ + +/* operands */ +#define IM(x) CONCAT($,x) +#define REG(r) CONCAT(%,r) +#define REGIND(r) (r) +#define REGIND_16(r) (r) +#define REGOFF(d,r) d(r) +#define CODEPTR(r) *r +#if defined(ARCH_AMD64) +# define CODEADDR(lab) lab(REG(rip)) +#endif + +#else /* MASM_ASSEMBLER */ + +/* arguments are in dst,src order for the MASM assembler */ +#define ARGS2(src,dst) dst,src +#define ARGS3(im,src,dst) dst,src,im + +#define CHOICE(gnu, masm) masm + +/* directives */ +#define GLOBAL(ID) PUBLIC ID +#define LABEL(ID) CONCAT(ID,:) +#define TEXT .CODE +#define DATA .DATA +#define ALIGN_CODE ALIGN 4 + +#if defined(ARCH_X86) +# define ALIGN4 ALIGN 4 +# define WORD(lab) lab DWORD 0 +#else /* ARCH_AMD_64 */ +# define ALIGN8 ALIGN 8 +# define WORD(lab) lab QWORD 0 +#endif /* ARCH_X86 */ + +/* operands */ +#define IM(x) x +#define REG(r) r +#ifdef ARCH_X86 +# define REGIND(r) dword ptr [r] +# define REGIND_16(r) word ptr [r] +# define REGOFF(d,r) dword ptr [r + d] +#else /* ARCH_AMD64 */ +# define REGIND(r) qword ptr [r] +# define REGOFF(d,r) qword ptr [r + d] +#endif +#define CODEPTR(r) r + +#endif /* GNU_ASSEMBLER */ + +#define CALL(lab) call lab +#define JB(lab) jb lab +#define JE(lab) je lab +#define JGE(lab) jge lab +#define JLE(lab) jle lab +#define JMP(lab) jmp lab +#define JNE(lab) jne lab +#define RET ret + +#ifdef ARCH_X86 +/* 16-bit operations */ +#define ANDW(src,dst) CHOICE(andw ARGS2(src,dst), and ARGS2(src,dst)) +#define MOVW(src,dst) CHOICE(movw ARGS2(src,dst), mov ARGS2(src,dst)) +#define ORW(src,dst) CHOICE(orw ARGS2(src,dst), or ARGS2(src,dst)) +/* 32-bit operations */ +#define ADD(src,dst) CHOICE(addl ARGS2(src,dst), add ARGS2(src,dst)) +#define AND(src,dst) CHOICE(andl ARGS2(src,dst), and ARGS2(src,dst)) +#define CMP(src,dst) CHOICE(cmpl ARGS2(src,dst), cmp ARGS2(src,dst)) +#define INC(dst) CHOICE(incl dst, inc dst) +#define LEA(src,dst) CHOICE(leal ARGS2(src,dst), lea ARGS2(src,dst)) +#define MOV(src,dst) CHOICE(movl ARGS2(src,dst), mov ARGS2(src,dst)) +#define OR(src,dst) CHOICE(orl ARGS2(src,dst), or ARGS2(src,dst)) +#define POP(dst) CHOICE(popl dst, pop dst) +#define PUSH(src) CHOICE(pushl src, push src) +#define SAL(src,dst) CHOICE(sall ARGS2(src,dst), sal ARGS2(src,dst)) +#define SAR(src,dst) CHOICE(sarl ARGS2(src,dst), sar ARGS2(src,dst)) +#define SUB(src,dst) CHOICE(subl ARGS2(src,dst), sub ARGS2(src,dst)) +/* x87 floating-point instructions */ +#define FILDL(src) CHOICE(fildl src, fild src) +#define FINIT finit +#define FISTP(dst) CHOICE(fistpl dst, fistp dst) +#define FLD(dst) CHOICE(fldl dst, fld dst) +#define FLDCW(src) CHOICE(fldcw src, fldcw src) +#define FSCALE fscale +#define FSTCW(dst) CHOICE(fstcw dst, fstcw dst) +#define FSTPL(dst) CHOICE(fstpl dst, fstp dst) +/* 32-bit registers */ +#define EAX REG(eax) +#define EBX REG(ebx) +#define ECX REG(ecx) +#define EDX REG(edx) +#define EBP REG(ebp) +#define ESI REG(esi) +#define EDI REG(edi) +#define ESP REG(esp) +/* extra operand macros for word and double addressing using the MASM assembler */ +#ifdef GNU_ASSEMBLER +# define REGIND_DBL(r) (r) +# define REGOFF_W(d,r) d(r) +# define REGOFF_DBL(d,r) d(r) +#else /* MASM_ASSEMBLER */ +# define REGIND_DBL(r) real8 ptr [r] +# define REGOFF_W(d,r) word ptr [r + d] +# define REGOFF_DBL(d,r) real8 ptr [r + d] +#endif /* GNU_ASSEMBLER */ +#endif /* ARCH_X86 */ + +#ifdef ARCH_AMD64 +/* 64-bit sized operations */ +#define ADD(src,dst) CHOICE(addq ARGS2(src,dst), add ARGS2(src,dst)) +#define AND(src,dst) CHOICE(andq ARGS2(src,dst), and ARGS2(src,dst)) +#define CMP(src,dst) CHOICE(cmpq ARGS2(src,dst), cmp ARGS2(src,dst)) +#define INC(dst) CHOICE(incq dst, inc dst) +#define INT4 CHOICE(int $4, int $4) +#define LEA(src,dst) CHOICE(leaq ARGS2(src,dst), lea ARGS2(src,dst)) +#define MOV(src,dst) CHOICE(movq ARGS2(src,dst), mov ARGS2(src,dst)) +#define MOVS_D(src,dst) CHOICE(movsd ARGS2(src,dst), movs ARGS2(src,dst)) +#define MULS_D(src,dst) CHOICE(mulsd ARGS2(src,dst), mulsd ARGS2(src,dst)) +#define OR(src,dst) CHOICE(orq ARGS2(src,dst), or ARGS2(src,dst)) +#define POP(dst) CHOICE(popq dst, pop dst) +#define PUSH(src) CHOICE(pushq src, push src) +#define SAL(src,dst) CHOICE(salq ARGS2(src,dst), sal ARGS2(src,dst)) +#define SAR(src,dst) CHOICE(sarq ARGS2(src,dst), sar ARGS2(src,dst)) +#define SUB(src,dst) CHOICE(subq ARGS2(src,dst), sub ARGS2(src,dst)) +#define TEST(src,dst) CHOICE(testq ARGS2(src,dst), test ARGS2(src,dst)) +#define XOR(src,dst) CHOICE(xorq ARGS2(src,dst), xor ARGS2(src,dst)) +/* Scalar SSE operations */ +#define CVTTSD2SI(src,dst) CHOICE(cvttsd2si ARGS2(src,dst), cvttsd2si ARGS2(src,dst)) +#define CVTSI2SDQ(src,dst) CHOICE(cvtsi2sdq ARGS2(src,dst), cvtsd2si ARGS2(src,dst)) +#define MOVSD(src,dst) CHOICE(movsd ARGS2(src,dst), movs ARGS2(src,dst)) +#define ROUNDSD(dir,src,dst) CHOICE(roundsd ARGS3(dir,src,dst), rounds ARGS3(dir,src,dst)) +/* Vector SSE operations */ +#define c(src1,src2,dst) +/* 64-bit registers */ +#define RDI REG(rdi) +#define RSI REG(rsi) +#define RSP REG(rsp) +#define RAX REG(rax) +#define RBX REG(rbx) +#define RCX REG(rcx) +#define RDX REG(rdx) +#define RBP REG(rbp) +#define R8 REG(r8) +#define R9 REG(r9) +#define R10 REG(r10) +#define R11 REG(r11) +#define R12 REG(r12) +#define R13 REG(r13) +#define R14 REG(r14) +#define R15 REG(r15) +/* instruction pointer */ +#define RIP REG(rip) +/* 128-bit SSE Registers */ +#define XMM0 REG(xmm0) +#define XMM1 REG(xmm1) +#define XMM2 REG(xmm2) +#define XMM3 REG(xmm3) +#define XMM4 REG(xmm4) +#define XMM5 REG(xmm5) +#define XMM6 REG(xmm6) +#define XMM7 REG(xmm7) +#define XMM8 REG(xmm8) +#define XMM9 REG(xmm9) +#define XMM10 REG(xmm10) +#define XMM11 REG(xmm11) +#define XMM12 REG(xmm12) +#define XMM13 REG(xmm13) +#define XMM14 REG(xmm14) +#define XMM15 REG(xmm15) +#endif /* ARCH_AMD64 */ + +/* MOVE(src,tmp,dst) copies one memory location `src` to `dst``, using register `tmp`. */ +#ifdef GNU_ASSEMBLER +#define MOVE(src,tmp,dst) \ + MOV(src, tmp); \ + MOV(tmp, dst) +#else /* MASM_ASSEMBLER */ +MOVE_M MACRO src,tmp,dst + MOV (src, tmp) + MOV (tmp, dst) +ENDM +#define MOVE(a,b,c) MOVE_M a, b, c +#endif + +#ifdef GNU_ASSEMBLER +# define END +#endif + +/* TODO: make this property a dynamic test */ +#if ((defined(OPSYS_FREEBSD) || defined(OPSYS_NETBSD2) || defined(OPSYS_OPENBSD)) && !defined(__ELF__)) || defined(OPSYS_WIN32) || defined(OPSYS_DARWIN) || defined(OPSYS_CYGWIN) +# define CSYM(ID) CONCAT(_,ID) +#else +# define CSYM(ID) ID +#endif + +#define CGLOBAL(ID) GLOBAL(CSYM(ID)) + +#endif /* _X86_SYNTAX_H_ */ diff --git a/base/runtime/memory/README b/base/runtime/memory/README new file mode 100644 index 0000000..7e2b770 --- /dev/null +++ b/base/runtime/memory/README @@ -0,0 +1,14 @@ +This directory implements a memory management library that hides the +operating system dependencies. It supports the allocation of memory +objects aligned to MAP_PAGE_SZB byte boundries (see mapping.h). + +The following routines are exported: + void MEM_InitMemory (); + mem_obj_t *MEM_AllocMemObj (word_t szb); + void MEM_FreeMemObj (mem_obj_t *obj); + +There are two different implementations: + + 1) a version that uses the SVR4 mmap mechanism to allocate chunks of memory. + + 2) a version that uses the MACH vm_allocate call to allocate chunks of memory. diff --git a/base/runtime/memory/makefile b/base/runtime/memory/makefile new file mode 100644 index 0000000..c7ab0c0 --- /dev/null +++ b/base/runtime/memory/makefile @@ -0,0 +1,36 @@ +# +# The makefile for the memory management library. This get recursively +# invoked with the OBJS variable set to the appropriate value. +# + +MAKE = make +AR = ar +ARFLAGS = rcv +RANLIB = ranlib + +VERSION = v-dummy + +LIB = libmem.a + +MMAP_OBJS = mem-mmap.o + +MACH_OBJS = mem-mach.o + +$(LIB) : $(VERSION) $(OBJS) + $(AR) $(ARFLAGS) $(LIB) $(OBJS) + $(RANLIB) $(LIB) + +$(VERSION) : + ($(MAKE) MAKE="$(MAKE)" clean) + echo "$(VERSION)" > $(VERSION) + +mem-mmap.o mem-mach.o: mem-common.ins + +.o: ../include/ml-base.h ../include/ml-unixdep.h ../include/bibop.h + +.c.o: + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) -c $< + +clean : + rm -f v-* *.o $(LIB) + diff --git a/base/runtime/memory/makefile.win32 b/base/runtime/memory/makefile.win32 new file mode 100644 index 0000000..68d016e --- /dev/null +++ b/base/runtime/memory/makefile.win32 @@ -0,0 +1,55 @@ +# win32 specific, only mem-vmem currently supported. +# +## The makefile for the memory management library. This gets recursively +## invoked with the OBJS variable set to the appropriate value. +# + +MAKE = nmake +AR = lib +ARFLAGS = +RANLIB = lib + +VERSION = v-dummy + +LIB = libmem.lib + +MMAP_OBJS = mem-mmap.obj + +MACH_OBJS = mem-mach.obj + +BRK_OBJS = mem-brk.obj malloc.obj + +SIMPLE_OBJS = mem-win32.obj + +$(LIB) : $(VERSION) $(OBJS) + $(AR) $(ARFLAGS) /out:$(LIB) $(OBJS) + $(RANLIB) /out:$(LIB) + +$(VERSION) : + $(MAKE) MAKE="$(MAKE)" clean + echo "$(VERSION)" > $(VERSION) + +mem-mmap.obj mem-mach.obj mem-brk.obj mem-win32.obj: mem-common.ins + +DEPENDENTS = ..\include\ml-base.h ..\include\ml-unixdep.h ..\include\bibop.h + +mem-mmap.obj : mem-mmap.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c mem-mmap.c + +mem-mach.obj : mem-mach.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c mem-mach.c + +mem-brk.obj : mem-brk.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c mem-brk.c + +malloc.obj : malloc.c $(DEPENDENTS) + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c malloc.c + +mem-win32.obj : mem-win32.c \ + mem-common.ins \ + ..\include\ml-base.h ..\include\ml-osdep.h ..\include\memory.h + $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) /c mem-win32.c + +clean : + del /F /Q v-* *.obj *.pdb $(LIB) + diff --git a/base/runtime/memory/mem-common.ins b/base/runtime/memory/mem-common.ins new file mode 100644 index 0000000..a17e09a --- /dev/null +++ b/base/runtime/memory/mem-common.ins @@ -0,0 +1,89 @@ +/* mem-common.ins + * + * This is code that is common to all three implementations of the + * memory management library. + */ + +#ifndef _MEM_COMMON_INS_ +#define _MEM_COMMON_INS_ + +PVT Addr_t PageSize; /* the system page size. */ +PVT Addr_t PageShift; /* PageSize == (1 << PageShift) */ +PVT Addr_t VMSizeB; /* The amount of virtual memory allocated */ + +PVT status_t MapMemory (mem_obj_t *obj, Addr_t szb); +PVT void UnmapMemory (mem_obj_t *obj); + + +/* InitMemory: + * + * Initialize the common stuff. + * + */ +PVT void InitMemory () +{ + int i, j; + + VMSizeB = 0; + PageSize = GETPAGESIZE(); + for (i = 1, j = 0; i != PageSize; i <<= 1, j++) + continue; + PageShift = j; + +} /* end of InitMemory */ + + +/* MEM_GetVMSize: + * + * Return the amount of virtual memory (in K-bytes) allocated to the heap. + */ +long MEM_GetVMSize () +{ + return (VMSizeB / ONE_K); + +} /* end of MEM_GetVMSize */ + + +/* MEM_AllocMemObj: + * Get a new memory object from the O.S. Return NIL on failure, otherwise return + * a pointer to the object descriptor. + */ +mem_obj_t *MEM_AllocMemObj (Word_t szb) +{ + Word_t alloc_szb; + mem_obj_t *obj; + + if ((obj = ALLOC_MEMOBJ()) == NIL(mem_obj_t *)) { + Error ("unable to allocate chunk descriptor\n"); + return NIL(mem_obj_t *); + } + + alloc_szb = (szb <= BIBOP_PAGE_SZB) ? BIBOP_PAGE_SZB : RND_MEMOBJ_SZB(szb); + + if (MapMemory (obj, alloc_szb) == FAILURE) { + FREE_MEMOBJ (obj); + return NIL(mem_obj_t *); + } + + VMSizeB += alloc_szb; + + return obj; + +} /* end of AllocMemObj */ + +/* MEM_FreeMemObj: + */ +void MEM_FreeMemObj (mem_obj_t *obj) +{ + if (obj == NIL(mem_obj_t *)) + return; + + UnmapMemory(obj); + + VMSizeB -= obj->sizeB; + + FREE_MEMOBJ (obj); + +} /* end of MEM_FreeMemObj */ + +#endif /* !_MEM_COMMON_INS_ */ diff --git a/base/runtime/memory/mem-mach.c b/base/runtime/memory/mem-mach.c new file mode 100644 index 0000000..d40e3ac --- /dev/null +++ b/base/runtime/memory/mem-mach.c @@ -0,0 +1,100 @@ +/* mem-mach.c + * + * COPYRIGHT (c) 1993 by AT&T Bell Laboratories. + * + * Memory sub-system for the MACH operating system. + * + * The following routines are exported: + * void InitMemory (); + * mem_obj_t *AllocMemObj (Word_t szb); + * void FreeMemObj (mem_obj_t *obj); + * + */ + +#include "ml-unixdep.h" +#include +#include "ml-base.h" +#include "memory.h" + +#ifndef HAS_VM_ALLOCATE +# error expected HAS_VM_ALLOCATE +#endif + +struct mem_obj { + Word_t *base; /* the base address of the object. */ + Word_t sizeB; /* the object's size (in bytes) */ + Word_t *mapBase; /* base address of the mapped region containing */ + /* the object */ + Word_t mapSizeB; /* the size of the mapped region containing */ + /* the object */ +}; + +#define ALLOC_MEMOBJ() NEW_OBJ(mem_obj_t) +#define FREE_MEMOBJ(p) FREE(p) + +#include "mem-common.ins" + +/* MEM_InitMemory: + */ +void MEM_InitMemory () +{ + InitMemory(); + +} /* MEM_InitMemory */ + + +/* MapMemory: + * + * Map a BIBOP_PAGE_SZB aligned chunk of szb bytes of virtual memory. Return + * the address of the mapped memory (or NIL on failure). + */ +PVT status_t MapMemory (mem_obj_t *obj, Addr_t szb) +{ + Addr_t addr, offset; + kern_return_t sts; + + sts = vm_allocate(task_self(), &addr, szb+BIBOP_PAGE_SZB, TRUE); + + if (sts) { + errno = sts; + return FAILURE; + } + + /* insure BIBOP_PAGE_SZB alignment */ + offset = BIBOP_PAGE_SZB - (addr & (BIBOP_PAGE_SZB-1)); + if (offset != 0) { + /* align addr and discard unused portions of memory */ + vm_deallocate (task_self(), addr, offset); + addr += offset; + vm_deallocate (task_self(), addr+szb, BIBOP_PAGE_SZB-offset); + } + else { + vm_deallocate (task_self(), addr+szb, BIBOP_PAGE_SZB); + } + + obj->base = (Word_t *)addr; + obj->sizeB = szb; + + return SUCCESS; + +} /* end of MapMemory */ + +/* UnmapMemory: + * + * Unmap a chunk of virtual memory at addr. + */ +PVT void UnmapMemory (mem_obj_t *obj) +{ + kern_return_t sts; + + sts = vm_deallocate ( + task_self(), + (vm_address_t)(obj->base), + (vm_size_t)(obj->sizeB)); + + if (sts != KERN_SUCCESS) { + Die ("error unmapping [%#x, %#x), errno = %d\n", + obj->mapBase, (Addr_t)(obj->mapBase) + obj->mapSizeB, errno); + } + +} /* end of UnmapMemory */ diff --git a/base/runtime/memory/mem-mmap.c b/base/runtime/memory/mem-mmap.c new file mode 100644 index 0000000..2482404 --- /dev/null +++ b/base/runtime/memory/mem-mmap.c @@ -0,0 +1,131 @@ +/* mem-mmap.c + * + * COPYRIGHT (c) 1993 by AT&T Bell Laboratories. + * + * Memory sub-system for systems that provide mmap. + */ + +#include "ml-unixdep.h" +#include "ml-osdep.h" +#include INCLUDE_TYPES_H +#include +#include INCLUDE_FCNTL_H +#include "ml-base.h" +#include "memory.h" + +#if !(defined(HAS_MMAP) || defined(HAS_ANON_MMAP)) +# error expected HAS_MMAP or HAS_ANON_MMAP +#endif + +/* protection mode for mmap memory */ +#define PROT_ALL (PROT_READ|PROT_WRITE|PROT_EXEC) + +/* flags for mmap */ +#ifdef HAS_ANON_MMAP +# define MMAP_FLGS (MAP_ANONYMOUS|MAP_PRIVATE) +#else +# define MMAP_FLGS MAP_PRIVATE +#endif + +struct mem_obj { + Word_t *base; /* the base address of the object. */ + Addr_t sizeB; /* the object's size (in bytes) */ +#ifdef HAS_PARTIAL_MUNMAP +# define mapBase base +# define mapSizeB sizeB +#else + Word_t *mapBase; /* base address of the mapped region containing */ + /* the object */ + Addr_t mapSizeB; /* the size of the mapped region containing */ + /* the object */ +#endif +}; + +extern int errno; + +#define ALLOC_MEMOBJ() NEW_OBJ(mem_obj_t) +#define FREE_MEMOBJ(p) FREE(p) + +#include "mem-common.ins" + +/* MEM_InitMemory: + */ +void MEM_InitMemory () +{ + InitMemory(); + +} /* MEM_InitMemory */ + + +/* MapMemory: + * + * Map a BIBOP_PAGE_SZB aligned chunk of szb bytes of virtual memory. Return + * the address of the mapped memory (or NIL on failure). + */ +PVT status_t MapMemory (mem_obj_t *obj, Addr_t szb) +{ + int fd; + Addr_t addr, offset; + +#ifdef HAS_ANON_MMAP + fd = -1; +#else + /* Note: we use O_RDONLY, because some OS are configured such that /dev/zero + * is not writable. This works because we are using MAP_PRIVATE as the + * mapping mode. + */ + if ((fd = open("/dev/zero", O_RDONLY)) == -1) { + Error ("unable to open /dev/zero, errno = %d\n", errno); + return FAILURE; + } +#endif + + /* we grab an extra BIBOP_PAGE_SZB bytes to give us some room for alignment */ + addr = (Addr_t) mmap (0, szb+BIBOP_PAGE_SZB, PROT_ALL, MMAP_FLGS, fd, 0); + if (addr == -1) { + Error ("unable to map %d bytes, errno = %d\n", szb, errno); +#ifndef HAS_ANON_MMAP + close (fd); /* NOTE: this call clobbers errno */ +#endif + return FAILURE; + } +#ifndef HAS_ANON_MMAP + close (fd); +#endif + + /* insure BIBOP_PAGE_SZB alignment */ + offset = BIBOP_PAGE_SZB - (addr & (BIBOP_PAGE_SZB-1)); +#ifdef HAS_PARTIAL_MUNMAP + if (offset != BIBOP_PAGE_SZB) { + /* align addr and discard unused portions of memory */ + munmap ((void *)addr, offset); + addr += offset; + munmap ((void *)(addr+szb), BIBOP_PAGE_SZB-offset); + } + else { + munmap ((void *)(addr+szb), BIBOP_PAGE_SZB); + } +#else + obj->mapBase = (Word_t *)addr; + obj->mapSizeB = szb+BIBOP_PAGE_SZB; + addr += offset; +#endif + obj->base = (Word_t *)addr; + obj->sizeB = szb; + + return SUCCESS; + +} /* end of MapMemory */ + +/* UnmapMemory: + * + * Unmap a szb byte chunk of virtual memory at addr. + */ +PVT void UnmapMemory (mem_obj_t *obj) +{ + if (munmap((caddr_t)(obj->mapBase), obj->mapSizeB) == -1) { + Die ("error unmapping [%#x, %#x), errno = %d\n", + obj->mapBase, (Addr_t)(obj->mapBase) + obj->mapSizeB, errno); + } + +} /* end of UnmapMemory */ diff --git a/base/runtime/memory/mem-win32.c b/base/runtime/memory/mem-win32.c new file mode 100644 index 0000000..8ee7378 --- /dev/null +++ b/base/runtime/memory/mem-win32.c @@ -0,0 +1,98 @@ +/* mem-win32.c + * + * COPYRIGHT (c) 1996 Bell Laboratories, Lucent Technologies. + * + * A simple memory module built on top of vmem alloc/free. + * This is currently win32 specific. + */ + +#if defined(OPSYS_WIN32) +#include +#endif + +#include "ml-osdep.h" +#include "ml-base.h" +#include "memory.h" + +struct mem_obj { + Word_t *base; /* the base address of the object. */ + Word_t sizeB; /* the object's size (in bytes) */ + Word_t *mapBase; /* base address of the mapped region containing */ + /* the object */ + Addr_t mapSizeB; /* the size of the mapped region containing */ + /* the object */ +}; + +#define MEM_OBJ_SZB (sizeof(mem_obj_t)) + +PVT void *alloc_vmem(); +PVT void free_vmem(void *); + +#define ALLOC_MEMOBJ() alloc_vmem(MEM_OBJ_SZB) +#define FREE_MEMOBJ free_vmem + +#include "mem-common.ins" + +/* alloc_vmem: + * Allocate some virtual memory. + */ +PVT void *alloc_vmem(int nb) +{ + void *p; + + p = (void *) VirtualAlloc(NULL, + nb, + MEM_COMMIT|MEM_RESERVE, + PAGE_EXECUTE_READWRITE); + if (p == NULL) { + Die("VirtualAlloc failed on request of size %lx\n", nb); + } + return p; +} + +/* free_vmem: + * Return memory to OS. + */ +PVT void free_vmem (void *p) +{ + if (!VirtualFree((LPVOID)p, + 0, + MEM_RELEASE)) { + Die("unable to VirtualFree memory at %lx\n", p); + } + +} + +PVT status_t MapMemory (mem_obj_t *obj, Addr_t szb) +{ + Addr_t offset, addr; + + if ((addr = (Addr_t) alloc_vmem(szb+BIBOP_PAGE_SZB)) == NIL(Addr_t)) { + return FAILURE; + } + obj->mapBase = (Addr_t *) addr; + obj->mapSizeB = szb+BIBOP_PAGE_SZB; + obj->sizeB = szb; + offset = BIBOP_PAGE_SZB - (addr & (BIBOP_PAGE_SZB-1)); + addr += offset; + obj->base = (Addr_t *) addr; + + return SUCCESS; +} + +PVT void UnmapMemory (mem_obj_t *obj) +{ + free_vmem(obj->mapBase); + obj->base = obj->mapBase = NULL; + obj->sizeB = obj->mapSizeB = 0; +} + +/* MEM_InitMemory: + */ +void MEM_InitMemory () +{ + InitMemory(); +} /* MEM_InitMemory */ + +/* end of mem-vmem.c */ + diff --git a/base/runtime/mp/makefile b/base/runtime/mp/makefile new file mode 100644 index 0000000..80981cf --- /dev/null +++ b/base/runtime/mp/makefile @@ -0,0 +1,40 @@ +# +# the makefile for the MP support library +# + +MAKE = make +AR = ar +ARFLAGS = rcv +RANLIB = ranlib + +VERSION = v-dummy + +LIB = libmp.a + +INC_DIR = ../include +INCLUDES = -I$(INC_DIR) + +# +# each version of the MP library has its own list of object files: +# +SGI_OBJS = sgi-mp.o + + + +$(LIB) : $(VERSION) $(OBJS) + rm -rf $(LIB) + $(AR) $(ARFLAGS) $(LIB) $(OBJS) + $(RANLIB) $(LIB) + +$(VERSION) : + ($(MAKE) MAKE="$(MAKE)" clean) + echo "$(VERSION)" > $(VERSION) + +sgi-mp.o: sgi-mp.c \ + $(INC_DIR)/ml-base.h $(INC_DIR)/ml-limits.h \ + $(INC_DIR)/ml-values.h $(INC_DIR)/tags.h $(INC_DIR)/ml-mp.h \ + $(INC_DIR)/ml-state.h $(INC_DIR)/ml-globals.h + $(CC) -c $(CFLAGS) $(DEFS) $(INCLUDES) sgi-mp.c + +clean : + rm -f v-* *.o $(LIB) diff --git a/base/runtime/mp/makefile.win32 b/base/runtime/mp/makefile.win32 new file mode 100644 index 0000000..9915f32 --- /dev/null +++ b/base/runtime/mp/makefile.win32 @@ -0,0 +1,41 @@ +# +# the makefile for the MP support library +# +# win32 specific; nothing here yet + +MAKE = nmake +AR = lib +ARFLAGS = +RANLIB = lib + +VERSION = v-dummy + +LIB = libmp.lib + +INC_DIR = ..\include +INCLUDES = -I$(INC_DIR) + +# +# each version of the MP library has its own list of object files: +# +SGI_OBJS = sgi-mp.obj + + + +$(LIB) : $(VERSION) $(OBJS) + del /F /Q $(LIB) + $(AR) $(ARFLAGS) $(LIB) $(OBJS) + $(RANLIB) $(LIB) + +$(VERSION) : + ($(MAKE) MAKE="$(MAKE)" clean) + echo "$(VERSION)" > $(VERSION) + +sgi-mp.obj: sgi-mp.c \ + $(INC_DIR)\ml-base.h $(INC_DIR)\ml-limits.h \ + $(INC_DIR)\ml-values.h $(INC_DIR)\tags.h $(INC_DIR)\ml-mp.h \ + $(INC_DIR)\ml-state.h $(INC_DIR)\ml-globals.h + $(CC) -c $(CFLAGS) $(DEFS) $(INCLUDES) sgi-mp.c + +clean : + del /F /Q v-* *.obj *.pdb $(LIB) diff --git a/base/runtime/mp/sgi-mp.c b/base/runtime/mp/sgi-mp.c new file mode 100644 index 0000000..1a669a4 --- /dev/null +++ b/base/runtime/mp/sgi-mp.c @@ -0,0 +1,407 @@ +/* sgi-mp.c + * + * COPYRIGHT (c) 1994 AT&T Bell Laboratories. + * + * MP support for SGI Challenge machines (Irix 5.x). + */ + +#include +#include +#include +#include +#include "ml-base.h" +#include "ml-limits.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "tags.h" +#include "ml-mp.h" +#include "ml-state.h" +#include "ml-globals.h" +#include "vproc-state.h" + +/* #define ARENA_FNAME tmpnam(0) */ +#define ARENA_FNAME "/tmp/sml-mp.lock-arena" + +#define INT_MLinc(n,i) ((ml_val_t)INT_CtoML(INT_MLtoC(n) + (i))) +#define INT_MLdec(n,i) (INT_MLinc(n,(-i))) + +/* forwards */ +PVT mp_lock_t AllocLock (); +PVT mp_barrier_t *AllocBarrier(); + +/* locals */ +PVT usptr_t *arena; /* arena for shared sync objects */ +PVT ulock_t MP_ArenaLock; /* must be held to alloc/free a lock */ +PVT ulock_t MP_ProcLock; /* must be held to acquire/release procs */ + +/* globals */ +mp_lock_t MP_GCLock; +mp_lock_t MP_GCGenLock; +mp_barrier_t *MP_GCBarrier; +mp_lock_t MP_TimerLock; + + +/* MP_Init: + */ +void MP_Init () +{ + /* set '_utrace = 1;' to debug shared arenas */ + if (usconfig(CONF_LOCKTYPE, US_NODEBUG) == -1) { + Die ("usconfig failed in MP_Init"); + } + usconfig(CONF_AUTOGROW, 0); + if (usconfig(CONF_INITSIZE, 65536) == -1) { + Die ("usconfig failed in MP_Init"); + } + if ((arena = usinit(ARENA_FNAME)) == NIL(usptr_t *)) { + Die ("usinit failed in MP_Init"); + } + + MP_ArenaLock = AllocLock(); + MP_ProcLock = AllocLock(); + MP_GCLock = AllocLock(); + MP_GCGenLock = AllocLock(); + MP_TimerLock = AllocLock(); + MP_GCBarrier = AllocBarrier(); + ASSIGN(ActiveProcs, INT_CtoML(1)); + +} /* end of MP_Init */ + + +/* MP_ProcId: + */ +mp_pid_t MP_ProcId () +{ + + return getpid (); + +} /* end of MP_ProcId */ + + +/* AllocLock: + * + * Allocate and initialize a system lock. + */ +PVT mp_lock_t AllocLock () +{ + ulock_t lock; + + if ((lock = usnewlock(arena)) == NIL(ulock_t)) { + Die ("AllocLock: cannot get lock with usnewlock\n"); + } + usinitlock(lock); + usunsetlock(lock); + + return lock; + +} /* end of AllocLock */ + + +/* MP_SetLock: + */ +void MP_SetLock (mp_lock_t lock) +{ + ussetlock(lock); + +} /* end of MP_SetLock */ + + +/* MP_UnsetLock: + */ +void MP_UnsetLock (mp_lock_t lock) +{ + usunsetlock(lock); + +} /* end of MP_UnsetLock */ + + +/* MP_TryLock: + */ +bool_t MP_TryLock (mp_lock_t lock) +{ + return ((bool_t) uscsetlock(lock, 1)); /* try once */ + +} /* end of MP_TryLock */ + + +/* MP_AllocLock: + */ +mp_lock_t MP_AllocLock () +{ + ulock_t lock; + + ussetlock(MP_ArenaLock); + lock = AllocLock (); + usunsetlock(MP_ArenaLock); + + return lock; + +} /* end of MP_AllocLock */ + + +/* MP_FreeLock: + */ +void MP_FreeLock (mp_lock_t lock) +{ + ussetlock(MP_ArenaLock); + usfreelock(lock,arena); + usunsetlock(MP_ArenaLock); + +} /* end of MP_FreeLock */ + + +/* AllocBarrier: + * + * Allocate and initialize a system barrier. + */ +PVT mp_barrier_t *AllocBarrier () +{ + barrier_t *barrierp; + + if ((barrierp = new_barrier(arena)) == NIL(barrier_t *)) { + Die ("cannot get barrier with new_barrier"); + } + init_barrier(barrierp); + + return barrierp; + +} /* end of AllocBarrier */ + +/* MP_AllocBarrier: + */ +mp_barrier_t *MP_AllocBarrier () +{ + barrier_t *barrierp; + + ussetlock(MP_ArenaLock); + barrierp = AllocBarrier (); + usunsetlock(MP_ArenaLock); + + return barrierp; + +} /* end of MP_AllocBarrier */ + +/* MP_FreeBarrier: + */ +void MP_FreeBarrier (mp_barrier_t *barrierp) +{ + ussetlock(MP_ArenaLock); + free_barrier(barrierp); + usunsetlock(MP_ArenaLock); + +} /* end of MP_FreeBarrier */ + +/* MP_Barrier: + */ +void MP_Barrier (mp_barrier_t *barrierp, unsigned n) +{ + barrier(barrierp, n); + +} /* end of MP_Barrier */ + +/* MP_ResetBarrier: + */ +void MP_ResetBarrier (mp_barrier_t *barrierp) +{ + init_barrier(barrierp); + +} /* end of MP_ResetBarrier */ + +/* ??? */ +PVT void fixPnum (int n) +{ + /* dummy for now */ +} + + +/* MP_MaxProcs: + */ +int MP_MaxProcs () +{ + return MAX_NUM_PROCS; + +} /* end of MP_MaxProcs */ + + +/* ProcMain: + */ +PVT void ProcMain (void *vmsp) +{ + ml_state_t *msp = (ml_state_t *) vmsp; + + /* needs to be done + fixPnum(msp->pnum); + setup_signals(msp, TRUE); + */ + /* spin until we get our id (from return of call to NewProc) */ + while (msp->ml_vproc->vp_mpSelf == NIL(mp_pid_t)) { +#ifdef MP_DEBUG + SayDebug("[waiting for self]\n"); +#endif + continue; + } +#ifdef MP_DEBUG + SayDebug ("[new proc main: releasing lock]\n"); +#endif + MP_UnsetLock (MP_ProcLock); /* implicitly handed to us by the parent */ + RunML (msp); /* should never return */ + Die ("proc returned after run_ml() in ProcMain().\n"); + +} /* end of ProcMain */ + + +/* NewProc: + */ +PVT int NewProc (ml_state_t *state) +{ + int ret, error; + + ret = sproc(ProcMain, PR_SALL, (void *)state); + if (ret == -1) { + extern int errno; + + error = oserror(); /* this is potentially a problem since */ + /* each thread should have its own errno. */ + /* see sgi man pages for sproc */ + Error ("error=%d,errno=%d\n", error, errno); + Error ("[warning NewProc: %s]\n",strerror(error)); + } + + return ret; +} + + +/* MP_AcquireProc: + */ +ml_val_t MP_AcquireProc (ml_state_t *msp, ml_val_t arg) +{ + ml_state_t *p; + vproc_state_t *vsp; + ml_val_t v = REC_SEL(arg, 0); + ml_val_t f = REC_SEL(arg, 1); + int i; + +#ifdef MP_DEBUG + SayDebug("[acquiring proc]\n"); +#endif + MP_SetLock(MP_ProcLock); + /* search for a suspended proc to reuse */ + for (i = 0; + (i < NumVProcs) && (VProc[i]->vp_mpState != MP_PROC_SUSPENDED); + i++ + ) + continue; +#ifdef MP_DEBUG + SayDebug("[checking for suspended processor]\n"); +#endif + if (i == NumVProcs) { + if (DEREF(ActiveProcs) == INT_CtoML(MAX_NUM_PROCS)) { + MP_UnsetLock(MP_ProcLock); + Error("[processors maxed]\n"); + return ML_false; + } +#ifdef MP_DEBUG + SayDebug("[checking for NO_PROC]\n"); +#endif + /* search for a slot in which to put a new proc */ + for (i = 0; + (i < NumVProcs) && (VProc[i]->vp_mpState != MP_PROC_NO_PROC); + i++ + ) + continue; + if (i == NumVProcs) { + MP_UnsetLock(MP_ProcLock); + Error("[no processor to allocate]\n"); + return ML_false; + } + } +#ifdef MP_DEBUG + SayDebug("[using processor at index %d]\n", i); +#endif + /* use processor at index i */ + vsp = VProc[i]; + p = vsp->vp_state; + + p->ml_exnCont = PTR_CtoML(handle_v+1); + p->ml_arg = ML_unit; + p->ml_cont = PTR_CtoML(return_c); + p->ml_closure = f; + p->ml_pc = + p->ml_linkReg = GET_CODE_ADDR(f); + p->ml_varReg = v; + + if (vsp->vp_mpState == MP_PROC_NO_PROC) { + /* assume we get one */ + ASSIGN(ActiveProcs, INT_MLinc(DEREF(ActiveProcs), 1)); + if ((vsp->vp_mpSelf = NewProc(p)) != -1) { +#ifdef MP_DEBUG + SayDebug ("[got a processor]\n"); +#endif + vsp->vp_mpState = MP_PROC_RUNNING; + /* NewProc will release MP_ProcLock */ + return ML_true; + } + else { + ASSIGN(ActiveProcs, INT_MLdec(DEREF(ActiveProcs), 1)); + MP_UnsetLock(MP_ProcLock); + return ML_false; + } + } + else { + vsp->vp_mpState = MP_PROC_RUNNING; +#ifdef MP_DEBUG + SayDebug ("[reusing a processor]\n"); +#endif + MP_UnsetLock(MP_ProcLock); + return ML_true; + } + +} /* end of MP_AcquireProc */ + +/* MP_ReleaseProc: + */ +void MP_ReleaseProc (ml_state_t *msp) +{ +#ifdef MP_DEBUG + SayDebug("[release_proc: suspending]\n"); +#endif + InvokeGC(msp,1); + MP_SetLock(MP_ProcLock); + msp->ml_vproc->vp_mpState = MP_PROC_SUSPENDED; + MP_UnsetLock(MP_ProcLock); + while (msp->ml_vproc->vp_mpState == MP_PROC_SUSPENDED) { + /* need to be continually available for gc */ + InvokeGC(msp,1); + } +#ifdef MP_DEBUG + SayDebug("[release_proc: resuming]\n"); +#endif + RunML(msp); + Die ("return after RunML(msp) in mp_release_proc\n"); + +} /* end of MP_ReleaseProc */ + + +/* MP_ActiveProcs: + */ +int MP_ActiveProcs () +{ + int ap; + + MP_SetLock(MP_ProcLock); + ap = INT_MLtoC(DEREF(ActiveProcs)); + MP_UnsetLock(MP_ProcLock); + + return ap; + +} /* end of MP_ActiveProcs */ + + +/* MP_Shutdown: + */ +void MP_Shutdown () +{ + usdetach(arena); + +} /* end of MP_Shutdown */ + diff --git a/base/runtime/mp/solaris-mp.c b/base/runtime/mp/solaris-mp.c new file mode 100644 index 0000000..1a8f0ec --- /dev/null +++ b/base/runtime/mp/solaris-mp.c @@ -0,0 +1,811 @@ +/* solaris-mp.c + * + * MP support for Sparc multiprocessor machines running Solaris 2.5 + * + * Solaris implementation of externals defined in $(INCLUDE)/ml-mp.h + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ml-limits.h" +#include "ml-values.h" +#include "ml-objects.h" +#include "tags.h" +#include "ml-mp.h" +#include "ml-state.h" +#include "ml-globals.h" +#include "vproc-state.h" + + +#define INT_MLinc(n,i) ((ml_val_t)INT_CtoML(INT_MLtoC(n) + (i))) +#define INT_MLdec(n,i) (INT_MLinc(n,(-i))) + +/* local functions */ +PVT mp_lock_t AllocLock(); +PVT mp_barrier_t *AllocBarrier(); +PVT void *AllocArenaMem(int sz); +PVT void FreeArenaMem(void *, int); +PVT void *ProcMain(void *msp); +PVT void *ResumeProc(void *vmsp); +PVT void SuspendProc(ml_state_t *msp); +PVT ml_state_t **InitProcStatesArray(); +PVT void BindToRealProc(processorid_t *); + +/* locals */ +PVT caddr_t arena; /* arena for shared sync objects */ +PVT mp_lock_t arenaLock; /* must be held to alloc/free a lock */ +PVT mp_lock_t MP_ProcLock; /* must be used to acquire/release procs */ +PVT ml_state_t **procStates; /*[MAX_NUM_PROCS]*/ /* list of states of suspended + procs */ +#if defined(MP_PROFILE) +PVT int *doProfile; +#endif + +#define LEAST_PROCESSOR_ID 0 +#define GREATEST_PROCESSOR_ID 3 + +#define NextProcessorId(id) (((id) == GREATEST_PROCESSOR_ID) ? LEAST_PROCESSOR_ID : (id) + 1) + +PVT processorid_t *processorId; /* processor id of the next processor a lwp + will be bound to */ +/* globals */ +mp_lock_t MP_GCLock; +mp_lock_t MP_GCGenLock; +mp_barrier_t *MP_GCBarrier; +mp_lock_t MP_TimerLock; + +#if defined(MP_PROFILE) +int mutex_trylock_calls; +int trylock_calls; +#endif + +/* MP_Init: + */ +void MP_Init() +{ + int fd; + + if ((fd = open("/dev/zero",O_RDWR)) == -1) + Die("MP_Init:Couldn't open /dev/zero"); + + arena = mmap((caddr_t) 0, sysconf(_SC_PAGESIZE),PROT_READ | PROT_WRITE ,MAP_PRIVATE,fd,0); + + arenaLock = AllocLock(); + MP_ProcLock = AllocLock(); + MP_GCLock = AllocLock(); + MP_GCGenLock = AllocLock(); + MP_TimerLock = AllocLock(); + MP_GCBarrier = AllocBarrier(); + procStates = InitProcStatesArray(); + ASSIGN(ActiveProcs, INT_CtoML(1)); +#ifdef MP_NONBLOCKING_IO + MP_InitStdInReader (); +#endif + processorId = (processorid_t *) AllocArenaMem(sizeof(processorid_t)); + *processorId = -1; + BindToRealProc(processorId); + +#ifdef MP_PROFILE + doProfile = (int *) AllocArenaMem(sizeof(int)); + *doProfile = 0; +#endif + NextProcessorId(*processorId); + + /* thr_setconcurrency(MAX_NUM_PROCS); */ + +} /* end of MP_Init */ + +/************************************************************************* + * Function: PVT mp_state_t **InitProcStatesArray() + * Purpose: Initialize the array of pointers to ml states of suspended + * processors + * Return: The initialized array as a pointer to pointers. + *************************************************************************/ + PVT ml_state_t **InitProcStatesArray() +{ + ml_state_t **array; + ml_state_t **ptr; + int i; + + array = (ml_state_t **) AllocArenaMem(sizeof(ml_state_t *)); + + for (i=1; i < MAX_NUM_PROCS; i++) + { + ptr = (ml_state_t **) AllocArenaMem(sizeof(ml_state_t *)); + *ptr = (ml_state_t *) NULL; + ptr++; + } + + return (array); + +} /* end of InitProcStatesArray */ +/************************************************************************* + * Function: PVT mp_lock_t AllocLock() + * Purpose: Allocate a portion of the arena of synch objects for a spin + lock. + * Returns: returns a pointer to the allocated region. + * Created: 5-14-96 +*************************************************************************/ +PVT mp_lock_t AllocLock() +{ + mp_lock_t lock; + + lock = (mp_lock_t) AllocArenaMem(MP_LOCK_SZ); + + lock->value = UNSET; + + if (mutex_init(&lock->mutex, USYNC_THREAD, NULL) == -1) + Die("AllocLock: unable to initialize mutex"); + + return lock; + +} /* end of AllocLock */ + +/************************************************************************** + * Function: FreeLock + * Purpose : Destroy the mutex. In addition, if the lock was the last object + allocated in the arena then recapture the space occupied by the + lock. Otherwise, zero out the space occupied by the lock. + * Created : 5-14-96 + **************************************************************************/ +PVT void FreeLock(mp_lock_t lock) +{ +#if defined(MP_LOCK_DEBUG) + printf("arena = %ld\t lock = %ld\n",(int) arena, lock); +#endif + + mutex_destroy(&lock->mutex); + + FreeArenaMem(lock,MP_LOCK_SZ); + +} /* end of FreeLock */ + +/************************************************************************* + * Function: void BindToRealProc(processorid_t *) + * Purpose: Bind the current lwp to a real processor. Attempt to bind the + lwp to a processor different from the last processor a lwp was + bound to. + * Created: 7-22-96 +*************************************************************************/ +void BindToRealProc(processorid_t *processorId) +{ + processorid_t procId = *processorId; + processorid_t obind; + int lwpBoundP = 0; + + while (!lwpBoundP) + { + procId = NextProcessorId(procId); + if (procId == *processorId) /* attempts made to bind on all processors */ + { + fprintf(stderr, "lwp was not bound to a processor.\n"); + lwpBoundP = 1; + } + else + { + if (processor_bind(P_LWPID, P_MYID, procId, &obind) == -1) + { + fprintf(stderr, "error attempting to bind lwp to processor [%d]\n",(int) procId); + lwpBoundP = 1; + } + else + { + if (obind == PBIND_NONE) /* couldn't bind to lwp */ + fprintf(stderr, "couldn't bind current lwp to processor [%d]\n", (int) procId); + else + { + fprintf(stderr,"lwp bound to processor [%d]\n",procId); + lwpBoundP = 1; + *processorId = procId; + } + } + } + } + +} /* end of BindToRealProc */ + +/************************************************************************* + * Function: bool_t MP_TryLock(mp_lock_t lock) + * Purpose: Return FALSE if cannot set lock; otherwise set lock and return + TRUE. + * Created: 5-14-96 + * Invariant: If more than one processes calls MP_TryLock at the same time, + then only one of the processes will have TRUE returned. +*************************************************************************/ + +bool_t MP_TryLock(mp_lock_t lock) +{ +#if defined(MP_PROFILE) + long cpuTime; +#endif + +#if defined(MP_LOCK_DEBUG) + printf("MP_TryLock: lock value is %d\n",lock->value); +#endif + +#if defined(MP_PROFILE) + if (*doProfile) + { + cpuTime = (long) clock(); + printf("trylock_calls = %d\n",++trylock_calls); + } +#endif + + /* We test to see if the lock is set here so that we can reduce the number + of calls to mutex_trylock when we are waiting for the lock to be + released. Apparently repeated calls to mutex_trylock floods the bus. + I don't know why. I found this out from the Threads Primer book. + */ + if (lock->value == SET) +#if defined(MP_PROFILE) + { + if (*doProfile) + fprintf(stderr,"MP_Trylock:cpu time %ld\n",(long) clock() - cpuTime); + return(FALSE); + } +#else + return(FALSE); +#endif + else + { + #if defined(MP_LOCK_DEBUG) + printf("MP_TryLock: calling mutex_trylock\n"); + #endif + + #if defined(MP_PROFILE) + if (*doProfile) + printf("mutex_trylock_calls = %d\n",++mutex_trylock_calls); + #endif + + if (mutex_trylock(&lock->mutex) == EBUSY) +#if defined(MP_PROFILE) + if (*doProfile) + fprintf(stderr,"MP_Trylock:cpu time %ld\n",(long) clock() - cpuTime); +#else + return(FALSE); +#endif + else + { + if (lock->value == SET) + { + mutex_unlock(&lock->mutex); +#if defined(MP_PROFILE) + if (*doProfile) + fprintf(stderr,"MP_Trylock:cpu time %ld\n",(long) clock() - cpuTime); +#endif + return(FALSE); + } + + lock->value = SET; + mutex_unlock(&lock->mutex); +#if defined(MP_PROFILE) + if (*doProfile) + fprintf(stderr,"MP_Trylock:cpu time %ld\n",(long) clock() - cpuTime); +#endif + return(TRUE); + } + } +} /* end of MP_TryLock */ + + +/************************************************************************* + * Function: void MP_UnsetLock(mp_lock_t lock) + * Purpose: Assign lock->value the value of 0. + * Created: 5-14-96 +*************************************************************************/ + +void MP_UnsetLock(mp_lock_t lock) +{ + lock->value = UNSET; +} + +/************************************************************************* + * Function: void MP_SetLock(mp_lock_t lock) + * Purpose: Busy wait until able set the lock. + * Created: 5-14-96 +*************************************************************************/ +void MP_SetLock(mp_lock_t lock) +{ + while (MP_TryLock(lock) == FALSE) ; +} + + +/* MP_AllocLock: + */ +mp_lock_t MP_AllocLock() +{ + mp_lock_t lock; + + MP_SetLock(arenaLock); + lock = AllocLock(); + MP_UnsetLock(arenaLock); + + return lock; +} /* end of MP_AllocLock */ + +/************************************************************************* + * Function: void MP_FreeLock (mp_lock_t lock) + * Purpose: Destroy mutex of lock and free memory occupied by lock. + * Returns: returns non-negative int if OK, -1 on error + * Created: 5-13-96 +*************************************************************************/ + +void MP_FreeLock (mp_lock_t lock) +{ + MP_SetLock(arenaLock); + FreeLock(lock); + MP_UnsetLock(arenaLock); +} + +/************************************************************************* + * Function: AllocBarrier + * Purpose: Get a chunk of memory from the arena for a barrier and + initialize it. + * Returns: Return a pointer to the barrier. + * Created: 5-15-96 +*************************************************************************/ + +PVT mp_barrier_t *AllocBarrier () +{ + mp_barrier_t *barrierp; + + barrierp = (mp_barrier_t *) arena; + arena += MP_BARRIER_SZ; + + barrierp->n_waiting = 0; + barrierp->phase = 0; + + if (mutex_init(&barrierp->lock, USYNC_THREAD, NULL) == -1) + Die("MP_Barrier: could not init barrier mutex lock"); + + if (cond_init(&barrierp->wait_cv, USYNC_THREAD, NULL) == -1) + Die("MP_Barrier: could not init conditional var of barrier"); + + + return barrierp; + +} /* end of AllocBarrier */ + + +/************************************************************************* + * Function: MP_AllocBarrier + * Purpose: Allocate a barrier from the synch object arena. Allocation is + mutually exclusive. Note the barrier is not initialized. + * Returns: Return a pointer to the barrier. + * Created: 5-15-96 +*************************************************************************/ +mp_barrier_t *MP_AllocBarrier () +{ + mp_barrier_t *barrierp; + + MP_SetLock(arenaLock); + barrierp = AllocBarrier (); + MP_UnsetLock(arenaLock); + + return barrierp; + +} /* end of MP_AllocBarrier */ + +/************************************************************************* + * Function: MP_AllocBarrier + * Purpose: destroy mutex and conditional variables of the barrier. + Regain memory if barrier was last object allocated in arena; + otherwise zero out the memory occupied by the barrier. + * Returns: Nothing. + * Created: 5-15-96 +*************************************************************************/ +void FreeBarrier(mp_barrier_t *barrierp) +{ + mutex_destroy(&barrierp->lock); + cond_destroy(&barrierp->wait_cv); + + FreeArenaMem(barrierp, MP_BARRIER_SZ); +} /* end of FreeBarrier */ + +void MP_FreeBarrier(mp_barrier_t *barrierp) +{ + MP_SetLock(arenaLock); + FreeBarrier(barrierp); + MP_UnsetLock(arenaLock); +} /* end of MP_FreeBarrier */ + + +/************************************************************************* + * Function: MP_Barrier + * Purpose: Wait until the required number of threads enter the barrier. + * Returns: Nothing. + * Created: 5-15-96 + * Invariant: barrierp->n_waiting <= n_clients +*************************************************************************/ + +void MP_Barrier(mp_barrier_t *barrierp, unsigned n_clients) +{ + int my_phase; + + mutex_lock(&barrierp->lock); + + my_phase = barrierp->phase; + barrierp->n_waiting++; + + if (barrierp->n_waiting == n_clients) + { + barrierp->n_waiting = 0; + barrierp->phase = 1 - my_phase; + cond_broadcast(&barrierp->wait_cv); + } + + /* Wait for the end of this synchronization phase */ + while (barrierp->phase == my_phase) + { + cond_wait(&barrierp->wait_cv, &barrierp->lock); + } + + mutex_unlock(&barrierp->lock); + +} /* end of MP_Barrier */ + +/************************************************************************* + * Function: MP_ResetBarrier + * Purpose: Set the various values of the barrier to zero. + * Returns: Nothing. + * Created: 5-15-96 +*************************************************************************/ +void MP_ResetBarrier(mp_barrier_t *barrierp) +{ + barrierp->n_waiting = 0; + barrierp->phase = 0; + +} /* end of MP_ResetBarrier */ + + + +/************************************************************************* + * Function: AllocArenaMem + ************************************************************************/ + +PVT void *AllocArenaMem(int sz) +{ + void *obj; + + obj = arena; + arena += sz; + + return obj; +} + +/************************************************************************* + * Function: FreeArenaMem + ************************************************************************/ +PVT void FreeArenaMem(void *p, int sz) +{ + if (arena == (caddr_t) p + sz) + arena -= sz; + else + memset(p,0,sz); +} + +/************************************************************************* + * Function: ResumeProc(ml_state_t *msp) + * Purpose: Resumes a proc to either perform garbage collection or to + * run ml with the given ml state. + * Return: Nothing + ************************************************************************/ +PVT void *ResumeProc(void *vmsp) +{ + ml_state_t *msp = (ml_state_t *) vmsp; + + MP_SetLock(MP_ProcLock); + if (msp->ml_vproc->vp_mpState == MP_PROC_SUSPENDED) + { + /* proc only resumed to do a gc */ +#ifdef MP_DEBUG + SayDebug("resuming %d to perform a gc\n",msp->ml_vproc->vp_mpSelf); +#endif + msp->ml_vproc->vp_mpState == MP_PROC_GC; + MP_UnsetLock(MP_ProcLock); + + /* the GC will be performed when we call MP_ReleaseProc */ + + MP_ReleaseProc(msp); + } + else + { +#ifdef MP_DEBUG + SayDebug("[release_proc: resuming proc %d]\n",msp->ml_vproc->vp_mpSelf); +#endif + MP_UnsetLock(MP_ProcLock); + RunML(msp); + Die ("return after RunML(msp) in mp_release_proc\n"); + } +} /* end of ResumeProc */ + +/************************************************************************* + * Function: MP_ResumeVProcs(int n_procs) + * Purpose: Remove n_procs states from the list of states and spawn threads + * to execute them. + * Note: We assume that calls to this function are mutually exclusive. + * Return: Return a pointer to the last state resumed. + ************************************************************************/ +vproc_state_t *MP_ResumeVProcs(int n_procs) +{ + ml_state_t *statep; + int i = 0; + + while(i < MAX_NUM_PROCS && n_procs > 0) { + + if ((statep = procStates[i]) != (ml_state_t *) NULL) /* get a state */ + { + /* spawn a thread to execute the state */ +#ifdef MP_DEBUG + SayDebug("Resuming proc %d\n",statep->ml_vproc->vp_mpSelf); +#endif + if(thr_create(NULL,0,ResumeProc,(void *)statep,NULL,NULL) != 0) + Die("Could create a thread to resume processors"); + + procStates[i] = NULL; + i++; + n_procs--; + } + else + i++; + } + + if (statep == (ml_state_t *) NULL) + return (vproc_state_t *) NULL; + + return statep->ml_vproc; + +} /* end of MP_ResumeVProcs */ + +/************************************************************************* + * Function: SuspendProc(ml_state_t *msp) + * Purpose: Suspend the calling proc, add its state, msp, to the suspended + * proc state list, and kill the thread the proc is running on. + * Return: Nothing. + ************************************************************************/ +PVT void SuspendProc(ml_state_t *msp) +{ + int i=0; + + MP_SetLock(MP_ProcLock); + + /* check if proc has actually been suspended */ + + if (msp->ml_vproc->vp_mpState != MP_PROC_SUSPENDED) + { +#ifdef MP_DEBUG + SayDebug("proc state is not PROC_SUSPENDED; not suspended"); +#endif + MP_UnsetLock(MP_ProcLock); + return; + } + + + while (i < MAX_NUM_PROCS) { + if (procStates[i] == NULL) + { + procStates[i] = msp; + i = MAX_NUM_PROCS; + } + else + i++; + } + + MP_UnsetLock(MP_ProcLock); + + /* exit the thread */ + thr_exit(NULL); + +} /* end of SuspendProc */ + +/************************************************************************* + * Function: MP_ReleaseProc(ml_state_t *msp) + ************************************************************************/ +void MP_ReleaseProc(ml_state_t *msp) +{ + + + InvokeGC(msp,1); + + MP_SetLock(MP_ProcLock); + msp->ml_vproc->vp_mpState = MP_PROC_SUSPENDED; + MP_UnsetLock(MP_ProcLock); + + /* suspend the proc */ +#ifdef MP_DEBUG + SayDebug("suspending proc %d\n",msp->ml_vproc->vp_mpSelf); +#endif + SuspendProc(msp); + +} /* end of MP_ReleaseProc */ + +/************************************************************************* + * Function: ProcMain(ml_state_t *msp) + * Purpose: Invoke RunML on msp; die if RunML returns + ************************************************************************/ +PVT void *ProcMain(void *vmsp) +{ + ml_state_t *msp = (ml_state_t *) vmsp; + + /* spin until we get our id (from return of call to thr_create) */ + while (msp->ml_vproc->vp_mpSelf == NIL(mp_pid_t)) { +#ifdef MP_DEBUG + SayDebug("[waiting for self]\n"); +#endif + continue; + } +#ifdef MP_DEBUG + SayDebug ("[new proc main: releasing lock]\n"); +#endif + + BindToRealProc(processorId); + + MP_UnsetLock(MP_ProcLock); /* implicitly handed to us by the parent */ + RunML(msp); /* should never return */ + Die("proc returned after run_ml() in ProcMain().\n"); + +} /* end of ProcMain */ +/************************************************************************* + * Function: MP_AcquireProc(ml_state_t *msp, ml_val_t arg) + ************************************************************************/ +ml_val_t MP_AcquireProc(ml_state_t *msp, ml_val_t arg) +{ + ml_state_t *p; + vproc_state_t *vsp; + ml_val_t v = REC_SEL(arg, 0); + ml_val_t f = REC_SEL(arg, 1); + int i; + +#ifdef MP_DEBUG + SayDebug("[acquiring proc]\n"); +#endif + + MP_SetLock(MP_ProcLock); + + /* search for a suspended proc to reuse */ + for (i = 0; + (i < NumVProcs) && (VProc[i]->vp_mpState != MP_PROC_SUSPENDED); + i++) + continue; + +#ifdef MP_DEBUG + SayDebug("[checking for suspended processor]\n"); +#endif + if (i == NumVProcs) + { + if (DEREF(ActiveProcs) == INT_CtoML(MAX_NUM_PROCS)) + { + MP_UnsetLock(MP_ProcLock); + Error("[processors maxed]\n"); + return ML_false; + } +#ifdef MP_DEBUG + SayDebug("[checking for NO_PROC]\n"); +#endif + + /* search for a slot in which to put a new proc */ + for (i = 0; + (i < NumVProcs) && (VProc[i]->vp_mpState != MP_PROC_NO_PROC); + i++) + continue; + + if (i == NumVProcs) + { + MP_UnsetLock(MP_ProcLock); + Error("[no processor to allocate]\n"); + return ML_false; + } + + /* use processor at index i */ + vsp = VProc[i]; + + } /* end of then */ + + else /* using a suspended processor */ + { +#ifdef MP_DEBUG + SayDebug("[using a suspended processor]\n"); +#endif + vsp = MP_ResumeVProcs(1); + } + + p = vsp->vp_state; + + p->ml_exnCont = PTR_CtoML(handle_v+1); + p->ml_arg = ML_unit; + p->ml_cont = PTR_CtoML(return_c); + p->ml_closure = f; + p->ml_pc = + p->ml_linkReg = GET_CODE_ADDR(f); + p->ml_varReg = v; + + if (vsp->vp_mpState == MP_PROC_NO_PROC) + { + mp_pid_t procId; + + /* assume we get one */ + ASSIGN(ActiveProcs, INT_MLinc(DEREF(ActiveProcs), 1)); + if (thr_create(NULL,0,ProcMain,(void *)p,THR_NEW_LWP,&((thread_t) procId)) == 0) + { +#ifdef MP_DEBUG + SayDebug ("[got a processor: %d,]\n",procId); +#endif + vsp->vp_mpState = MP_PROC_RUNNING; + vsp->vp_mpSelf = procId; + /* NewProc will release MP_ProcLock */ + return ML_true; + } + else + { + ASSIGN(ActiveProcs, INT_MLdec(DEREF(ActiveProcs), 1)); + MP_UnsetLock(MP_ProcLock); + return ML_false; + } + } + else + { + /* the thread executing the processor has already been invoked */ + vsp->vp_mpState = MP_PROC_RUNNING; +#ifdef MP_DEBUG + SayDebug ("[reusing a processor %d]\n",vsp->vp_mpSelf); +#endif + MP_UnsetLock(MP_ProcLock); + return ML_true; + } + +} /* end of MP_AcquireProc */ + + +/************************************************************************* + * Function: MP_Shutdown + ************************************************************************/ +void MP_Shutdown () +{ + munmap(arena,sysconf(_SC_PAGESIZE)); +} /* end of MP_Shutdown */ + + +/************************************************************************* + * Function: MP_MaxProcs + ************************************************************************/ +int MP_MaxProcs () +{ + return MAX_NUM_PROCS; + +} /* end of MP_MaxProcs */ + +/************************************************************************* + * Function: MP_ProcId + ************************************************************************/ +mp_pid_t MP_ProcId () +{ + + return (thr_self()); + +} /* end of MP_ProcId */ + +/************************************************************************* + * Function: MP_ActiveProcs + ************************************************************************/ +int MP_ActiveProcs () +{ + int ap; + + MP_SetLock(MP_ProcLock); + ap = INT_MLtoC(DEREF(ActiveProcs)); + MP_UnsetLock(MP_ProcLock); + + return ap; + +} /* end of MP_ActiveProcs */ + + +/* EndSourceFile */ + diff --git a/base/runtime/objs/makefile b/base/runtime/objs/makefile new file mode 100644 index 0000000..6d7908d --- /dev/null +++ b/base/runtime/objs/makefile @@ -0,0 +1,442 @@ +# makefile +# +# COPYRIGHT (c) 2019 The SML/NJ Fellowship +# +# this is the main makefile for the SML/NJ runtime system on Unix systems +# + +SHELL = /bin/sh +MAKE = make +CC = cc +CPP = /lib/cpp +LD_LIBS = +AS = as +AR = ar +ARFLAGS = rcv +RANLIB = ranlib + +# +# dummy version +# +VERSION = v-dummy + +# +# target object +# +RUNTIME = interactive-run + +# +# source directories +# +ROOT_DIR = .. +OBJS_DIR = $(ROOT_DIR)/objs +GC_DIR = $(ROOT_DIR)/gc +INC_DIR = $(ROOT_DIR)/include +KERN_DIR = $(ROOT_DIR)/kernel +MACH_DIR = $(ROOT_DIR)/mach-dep +MEM_DIR = $(ROOT_DIR)/memory +MP_DIR = $(ROOT_DIR)/mp +CLIB_DIR = $(ROOT_DIR)/c-libs +CONFIG_DIR = $(ROOT_DIR)/config + +CFLAGS = -O + +INCLUDES = -I$(OBJS_DIR) -I$(INC_DIR) +GC_INCLUDES = $(INCLUDES) -I$(GC_DIR) +GEN_INCLUDES = -I$(CONFIG_DIR) $(INCLUDES) + +OBJS = c-libraries.o unix-raise-syserr.o ml-options.o \ + boot.o load-ml.o run-ml.o globals.o ml-state.o \ + error.o timers.o unix-timers.o \ + qualify-name.o swap-bytes.o \ + unix-fault.o signal-util.o unix-signal.o unix-prof.o prim.o \ + $(XOBJS) + +# +# libraries of ML callable C functions +# +CLIBS = $(XCLIBS) \ + $(CLIB_DIR)/posix-os/libposix-os.a \ + $(CLIB_DIR)/smlnj-runtime/libsmlnj-runt.a \ + $(CLIB_DIR)/smlnj-signals/libsmlnj-sig.a \ + $(CLIB_DIR)/smlnj-prof/libsmlnj-prof.a \ + $(CLIB_DIR)/smlnj-sockets/libsmlnj-sock.a \ + $(CLIB_DIR)/smlnj-time/libsmlnj-time.a \ + $(CLIB_DIR)/smlnj-date/libsmlnj-date.a \ + $(CLIB_DIR)/smlnj-math/libsmlnj-math.a \ + $(CLIB_DIR)/posix-process/libposix-process.a \ + $(CLIB_DIR)/posix-procenv/libposix-procenv.a \ + $(CLIB_DIR)/posix-filesys/libposix-filesys.a \ + $(CLIB_DIR)/posix-io/libposix-io.a \ + $(CLIB_DIR)/posix-sysdb/libposix-sysdb.a \ + $(CLIB_DIR)/posix-signal/libposix-signal.a \ + $(CLIB_DIR)/posix-tty/libposix-tty.a \ + $(CLIB_DIR)/posix-error/libposix-error.a + +# +# The mmap version is the default memory subsystem +# +LIBMEM_OBJS = 'OBJS=$$(MMAP_OBJS)' + +# +# The SGI version is the default MP library +# +LIBMP_OBJS = 'OBJS=$$(SGI_OBJS)' + +# +# the default GC library +# +GC_LIB = libgc.a + +# +# The various libraries; note that the order matters, since the C functions +# call GC library routines, and the GC library uses the memory library. +# +ML_LIBS = $(GC_DIR)/$(GC_LIB) \ + $(MEM_DIR)/libmem.a + +DEP_LIBS = $(ML_LIBS) $(XLIBS) + +ALL_LIBS = $(CLIBS) $(DEP_LIBS) + +clean: + rm -f v-* run.* interactive-run \ + gen-sizes gen-offsets gen-bc-instr-def \ + gen-unix-signals gen-unix-sigtbl \ + ml-sizes.h mlstate-offsets.h bc-instr-def.h \ + system-signals.h unix-sigtbl.c \ + *.o prim.s primops.s + rm -r -f *.dSYM + (cd $(MEM_DIR); $(MAKE) MAKE="$(MAKE)" clean) + (cd $(CLIB_DIR); $(MAKE) MAKE="$(MAKE)" clean) + (cd $(GC_DIR); $(MAKE) MAKE="$(MAKE)" clean) + (cd $(MP_DIR); $(MAKE) MAKE="$(MAKE)" clean) + + +$(RUNTIME): $(VERSION) main.o $(OBJS) $(ALL_LIBS) + $(CC) -o $(RUNTIME) $(CFLAGS) $(LDFLAGS) main.o $(OBJS) $(ALL_LIBS) $(LD_LIBS) + +$(RUNTIME_A): $(VERSION) main.o $(OBJS) $(ALL_LIBS) + rm -f $(RUNTIME_A) + $(AR) rc $(RUNTIME_A) main.o $(OBJS) + rm -rf tmp + mkdir tmp + for lib in $(ALL_LIBS) ; \ + do \ + cd tmp && \ + $(AR) x ../$$lib && \ + $(AR) q ../$(RUNTIME_A) *.o && \ + rm * && \ + cd .. ; \ + done + $(RANLIB) $(RUNTIME_A) + rmdir tmp + +$(VERSION): + ($(MAKE) MAKE="$(MAKE)" clean) + echo "$(VERSION)" > $(VERSION) + +# +# Sizes +# +ml-sizes.h: gen-sizes + ./gen-sizes + +# +# kernel objects +# +main.o: $(KERN_DIR)/main.c \ + ml-sizes.h $(INC_DIR)/ml-base.h $(INC_DIR)/ml-options.h \ + $(INC_DIR)/ml-limits.h $(INC_DIR)/ml-objects.h + $(CC) -c $(CFLAGS) $(DEFS) $(INCLUDES) $(KERN_DIR)/main.c + +ml-options.o: $(KERN_DIR)/error.c \ + $(INC_DIR)/ml-base.h $(INC_DIR)/ml-options.h + $(CC) -c $(CFLAGS) $(DEFS) $(INCLUDES) $(KERN_DIR)/ml-options.c + +error.o: $(KERN_DIR)/error.c $(INC_DIR)/ml-base.h + $(CC) -c $(CFLAGS) $(DEFS) $(INCLUDES) $(KERN_DIR)/error.c + +standalone.o: $(KERN_DIR)/standalone.c \ + ml-sizes.h $(INC_DIR)/ml-base.h $(INC_DIR)/ml-limits.h + $(CC) -c $(CFLAGS) $(DEFS) $(INCLUDES) $(KERN_DIR)/standalone.c + +boot.o: $(KERN_DIR)/boot.c \ + $(INC_DIR)/ml-osdep.h $(INC_DIR)/cache-flush.h \ + ml-sizes.h $(INC_DIR)/ml-base.h $(INC_DIR)/bin-file.h \ + $(INC_DIR)/ml-objects.h $(INC_DIR)/ml-globals.h $(INC_DIR)/gc.h \ + $(INC_DIR)/ml-limits.h + $(CC) -c $(CFLAGS) $(DEFS) $(INCLUDES) $(KERN_DIR)/boot.c + +load-ml.o: $(KERN_DIR)/load-ml.c \ + ml-sizes.h system-signals.h \ + $(INC_DIR)/ml-base.h $(INC_DIR)/ml-roots.h $(INC_DIR)/ml-state.h + $(CC) -c $(CFLAGS) $(DEFS) $(INCLUDES) $(KERN_DIR)/load-ml.c +# $(CC) -c $(CFLAGS) $(DEFS) $(GC_INCLUDES) $(KERN_DIR)/load-ml.c + +run-ml.o: $(KERN_DIR)/run-ml.c \ + ml-sizes.h system-signals.h \ + $(INC_DIR)/ml-base.h $(INC_DIR)/ml-values.h \ + $(INC_DIR)/vproc-state.h $(INC_DIR)/ml-roots.h \ + $(INC_DIR)/ml-state.h + $(CC) -c $(CFLAGS) $(DEFS) $(INCLUDES) $(KERN_DIR)/run-ml.c + +globals.o: $(KERN_DIR)/globals.c \ + ml-sizes.h $(INC_DIR)/ml-base.h $(INC_DIR)/ml-values.h $(INC_DIR)/ml-limits.h \ + $(INC_DIR)/c-globals-tbl.h $(INC_DIR)/machine-id.h + $(CC) -c $(CFLAGS) $(DEFS) $(INCLUDES) $(KERN_DIR)/globals.c + +ml-state.o: $(KERN_DIR)/ml-state.c \ + ml-sizes.h system-signals.h \ + $(INC_DIR)/ml-base.h $(INC_DIR)/tags.h \ + $(INC_DIR)/ml-values.h $(INC_DIR)/ml-roots.h \ + $(INC_DIR)/ml-state.h $(INC_DIR)/vproc-state.h \ + $(INC_DIR)/ml-globals.h $(INC_DIR)/ml-timer.h $(INC_DIR)/gc.h \ + $(INC_DIR)/ml-limits.h + $(CC) -c $(CFLAGS) $(DEFS) $(INCLUDES) $(KERN_DIR)/ml-state.c + +timers.o: $(KERN_DIR)/timers.c \ + ml-sizes.h system-signals.h \ + $(INC_DIR)/ml-base.h $(INC_DIR)/vproc-state.h \ + $(INC_DIR)/ml-timer.h + $(CC) -c $(CFLAGS) $(DEFS) $(INCLUDES) $(KERN_DIR)/timers.c + +unix-timers.o: $(KERN_DIR)/unix-timers.c \ + ml-sizes.h system-signals.h \ + $(INC_DIR)/ml-base.h $(INC_DIR)/ml-unixdep.h \ + $(INC_DIR)/vproc-state.h $(INC_DIR)/ml-timer.h + $(CC) -c $(CFLAGS) $(DEFS) $(INCLUDES) $(KERN_DIR)/unix-timers.c + +qualify-name.o: $(KERN_DIR)/qualify-name.c \ + ml-sizes.h $(INC_DIR)/ml-base.h $(INC_DIR)/machine-id.h + $(CC) -c $(CFLAGS) $(DEFS) $(INCLUDES) $(KERN_DIR)/qualify-name.c + +swap-bytes.o: $(KERN_DIR)/swap-bytes.c \ + ml-sizes.h $(INC_DIR)/ml-base.h + $(CC) -c $(CFLAGS) $(DEFS) $(INCLUDES) $(KERN_DIR)/swap-bytes.c + + +# +# C libraries +# +c-libraries.o: $(CLIB_DIR)/c-libraries.c \ + ml-sizes.h $(INC_DIR)/ml-base.h \ + $(CLIB_DIR)/clib-list.h + $(CC) -c $(CFLAGS) $(DEFS) $(INCLUDES) -I$(CLIB_DIR) $(CLIB_DIR)/c-libraries.c + +unix-raise-syserr.o: $(CLIB_DIR)/unix-raise-syserr.c \ + ml-sizes.h system-signals.h \ + $(INC_DIR)/ml-base.h $(INC_DIR)/ml-roots.h $(INC_DIR)/ml-state.h \ + $(INC_DIR)/ml-objects.h $(INC_DIR)/ml-globals.h \ + $(INC_DIR)/ml-unixdep.h + $(CC) -c $(CFLAGS) $(DEFS) $(INCLUDES) $(CLIB_DIR)/unix-raise-syserr.c + +# +# Machine dependent objects +# +unix-fault.o: $(MACH_DIR)/unix-fault.c \ + ml-sizes.h system-signals.h \ + $(INC_DIR)/ml-base.h $(INC_DIR)/ml-unixdep.h \ + $(INC_DIR)/vproc-state.h $(INC_DIR)/ml-roots.h \ + $(INC_DIR)/ml-state.h $(INC_DIR)/ml-globals.h \ + $(MACH_DIR)/signal-sysdep.h + $(CC) -c $(CFLAGS) $(DEFS) $(INCLUDES) $(MACH_DIR)/unix-fault.c + +signal-util.o: $(MACH_DIR)/signal-util.c \ + ml-sizes.h system-signals.h \ + $(INC_DIR)/ml-base.h $(INC_DIR)/ml-signals.h \ + $(INC_DIR)/vproc-state.h $(INC_DIR)/ml-roots.h $(INC_DIR)/ml-state.h \ + $(INC_DIR)/ml-limits.h $(INC_DIR)/ml-objects.h + $(CC) -c $(CFLAGS) $(DEFS) $(INCLUDES) $(MACH_DIR)/signal-util.c + +unix-signal.o: $(MACH_DIR)/unix-signal.c \ + ml-sizes.h unix-sigtbl.c system-signals.h \ + $(INC_DIR)/ml-base.h $(INC_DIR)/ml-unixdep.h \ + $(INC_DIR)/ml-signals.h $(INC_DIR)/vproc-state.h $(INC_DIR)/ml-roots.h $(INC_DIR)/ml-state.h \ + $(INC_DIR)/ml-limits.h $(INC_DIR)/ml-objects.h \ + $(INC_DIR)/ml-globals.h \ + $(MACH_DIR)/signal-sysdep.h + $(CC) -c $(CFLAGS) $(DEFS) $(INCLUDES) $(MACH_DIR)/unix-signal.c + +unix-prof.o: $(MACH_DIR)/unix-prof.c \ + ml-sizes.h \ + $(INC_DIR)/ml-base.h $(INC_DIR)/ml-unixdep.h \ + $(INC_DIR)/ml-signals.h $(INC_DIR)/ml-globals.h \ + $(MACH_DIR)/signal-sysdep.h + $(CC) -c $(CFLAGS) $(DEFS) $(INCLUDES) $(MACH_DIR)/unix-prof.c + +system-signals.h: gen-unix-signals + ./gen-unix-signals + +unix-sigtbl.c: gen-unix-sigtbl + ./gen-unix-sigtbl + +prim.o: $(MACH_DIR)/$(TARGET).prim.asm \ + ml-sizes.h $(INC_DIR)/ml-base.h $(INC_DIR)/tags.h \ + $(INC_DIR)/asm-base.h \ + $(MACH_DIR)/x86-syntax.h \ + mlstate-offsets.h + $(CPP) -D_ASM_ $(DEFS) $(INCLUDES) $(MACH_DIR)/$(TARGET).prim.asm > prim.s + $(AS) -o prim.o prim.s + +# +# + +mlstate-offsets.h: gen-offsets ml-sizes.h system-signals.h \ + $(INC_DIR)/ml-base.h $(INC_DIR)/vproc-state.h \ + $(INC_DIR)/ml-roots.h $(INC_DIR)/ml-state.h + ./gen-offsets + + +# +# arguments to recursive make +# +MK_ARGS = VERSION="$(VERSION)" \ + MAKE="$(MAKE)" \ + CC="$(CC)" CFLAGS="$(CFLAGS)" DEFS="$(DEFS)" \ + AR="$(AR)" ARFLAGS="$(ARFLAGS)" \ + RANLIB="$(RANLIB)" \ + INCLUDES="$(GC_INCLUDES)" + +# +# memory management library +# +$(MEM_DIR)/libmem.a: FORCE + (cd $(MEM_DIR); $(MAKE) $(LIBMEM_OBJS) $(MK_ARGS) libmem.a) + +# +# C functions library +# +$(CLIB_DIR)/libcfuns.a: FORCE + (cd $(CLIB_DIR); $(MAKE) $(MK_ARGS) libcfuns.a) + + +# +# GC and heap I/O library +# +$(GC_DIR)/$(GC_LIB): FORCE + (cd $(GC_DIR); $(MAKE) CHECK_HEAP="$(CHECK_HEAP)" $(MK_ARGS) $(GC_LIB)) + + +# +# multiprocessor library (optional) +# +$(MP_DIR)/libmp.a: FORCE + (cd $(MP_DIR); $(MAKE) $(LIBMP_OBJS) $(MK_ARGS) libmp.a) + + +# +# to force recursive makes +# +FORCE: + + +# +# Configuration tools +# +gen-sizes: $(CONFIG_DIR)/gen-sizes.c gen-common.o $(CONFIG_DIR)/gen.h \ + $(INC_DIR)/ml-base.h + $(CC) $(CFLAGS) $(DEFS) $(GEN_INCLUDES) -o gen-sizes $(CONFIG_DIR)/gen-sizes.c gen-common.o + +gen-regmask: $(CONFIG_DIR)/gen-regmask.c gen-common.o $(CONFIG_DIR)/gen.h + $(CC) $(CFLAGS) $(DEFS) $(GEN_INCLUDES) -o gen-regmask $(CONFIG_DIR)/gen-regmask.c gen-common.o + +gen-offsets: $(CONFIG_DIR)/gen-offsets.c gen-common.o $(CONFIG_DIR)/gen.h \ + ml-sizes.h system-signals.h \ + $(INC_DIR)/ml-base.h $(INC_DIR)/vproc-state.h \ + $(INC_DIR)/ml-roots.h $(INC_DIR)/ml-state.h + $(CC) $(CFLAGS) $(DEFS) $(GEN_INCLUDES) -o gen-offsets $(CONFIG_DIR)/gen-offsets.c gen-common.o + +gen-unix-signals: $(CONFIG_DIR)/gen-unix-signals.c \ + unix-signals.o gen-common.o \ + $(CONFIG_DIR)/gen.h $(CONFIG_DIR)/gen-unix-signals.h + $(CC) $(CFLAGS) $(GEN_INCLUDES) $(DEFS) -o gen-unix-signals $(CONFIG_DIR)/gen-unix-signals.c unix-signals.o gen-common.o + +gen-unix-sigtbl: $(CONFIG_DIR)/gen-unix-sigtbl.c \ + gen-common.o unix-signals.o \ + $(CONFIG_DIR)/gen.h $(CONFIG_DIR)/gen-unix-signals.h + $(CC) $(CFLAGS) $(GEN_INCLUDES) $(DEFS) -o gen-unix-sigtbl $(CONFIG_DIR)/gen-unix-sigtbl.c unix-signals.o gen-common.o + +gen-common.o: $(CONFIG_DIR)/gen-common.c $(CONFIG_DIR)/gen.h + $(CC) -c $(CFLAGS) $(GEN_INCLUDES) $(CONFIG_DIR)/gen-common.c + +unix-signals.o: $(CONFIG_DIR)/unix-signals.c $(CONFIG_DIR)/gen.h + $(CC) -c $(CFLAGS) $(GEN_INCLUDES) $(DEFS) $(CONFIG_DIR)/unix-signals.c + + +# +# Make rules for the C libraries +# + +# include directories for the library sub-directories +# +LIB_OBJS_DIR = ../../objs +LIB_INC_DIR = ../../include +LIB_INCLUDES = -I$(LIB_OBJS_DIR) -I$(LIB_INC_DIR) -I.. + +# +# arguments to recursive make +# +LIB_MK_ARGS = VERSION="$(VERSION)" \ + MAKE="$(MAKE)" \ + CC="$(CC)" CFLAGS="$(CFLAGS)" DEFS="$(DEFS)" \ + AR="$(AR)" ARFLAGS="$(ARFLAGS)" \ + RANLIB="$(RANLIB)" \ + INCLUDES="$(LIB_INCLUDES)" + +$(CLIB_DIR)/posix-os/libposix-os.a: FORCE + (cd $(CLIB_DIR)/posix-os; $(MAKE) $(LIB_MK_ARGS) libposix-os.a) + +$(CLIB_DIR)/smlnj-runtime/libsmlnj-runt.a: FORCE + (cd $(CLIB_DIR)/smlnj-runtime; $(MAKE) $(LIB_MK_ARGS) libsmlnj-runt.a) + +$(CLIB_DIR)/smlnj-signals/libsmlnj-sig.a: FORCE + (cd $(CLIB_DIR)/smlnj-signals; $(MAKE) $(LIB_MK_ARGS) libsmlnj-sig.a) + +$(CLIB_DIR)/smlnj-prof/libsmlnj-prof.a: FORCE + (cd $(CLIB_DIR)/smlnj-prof; $(MAKE) $(LIB_MK_ARGS) libsmlnj-prof.a) + +$(CLIB_DIR)/smlnj-sockets/libsmlnj-sock.a: FORCE + (cd $(CLIB_DIR)/smlnj-sockets; $(MAKE) $(LIB_MK_ARGS) libsmlnj-sock.a) + +$(CLIB_DIR)/smlnj-time/libsmlnj-time.a: FORCE + (cd $(CLIB_DIR)/smlnj-time; $(MAKE) $(LIB_MK_ARGS) libsmlnj-time.a) + +$(CLIB_DIR)/smlnj-date/libsmlnj-date.a: FORCE + (cd $(CLIB_DIR)/smlnj-date; $(MAKE) $(LIB_MK_ARGS) libsmlnj-date.a) + +$(CLIB_DIR)/smlnj-math/libsmlnj-math.a: FORCE + (cd $(CLIB_DIR)/smlnj-math; $(MAKE) $(LIB_MK_ARGS) libsmlnj-math.a) + +$(CLIB_DIR)/smlnj-mp/libsmlnj-mp.a: FORCE + (cd $(CLIB_DIR)/smlnj-mp; $(MAKE) $(LIB_MK_ARGS) libsmlnj-mp.a) + +$(CLIB_DIR)/posix-process/libposix-process.a: FORCE + (cd $(CLIB_DIR)/posix-process; $(MAKE) $(LIB_MK_ARGS) libposix-process.a) + +$(CLIB_DIR)/posix-procenv/libposix-procenv.a: FORCE + (cd $(CLIB_DIR)/posix-procenv; $(MAKE) $(LIB_MK_ARGS) libposix-procenv.a) + +$(CLIB_DIR)/posix-filesys/libposix-filesys.a: FORCE + (cd $(CLIB_DIR)/posix-filesys; $(MAKE) $(LIB_MK_ARGS) libposix-filesys.a) + +$(CLIB_DIR)/posix-io/libposix-io.a: FORCE + (cd $(CLIB_DIR)/posix-io; $(MAKE) $(LIB_MK_ARGS) libposix-io.a) + +$(CLIB_DIR)/posix-sysdb/libposix-sysdb.a: FORCE + (cd $(CLIB_DIR)/posix-sysdb; $(MAKE) $(LIB_MK_ARGS) libposix-sysdb.a) + +$(CLIB_DIR)/posix-signal/libposix-signal.a: FORCE + (cd $(CLIB_DIR)/posix-signal; $(MAKE) $(LIB_MK_ARGS) libposix-signal.a) + +$(CLIB_DIR)/posix-tty/libposix-tty.a: FORCE + (cd $(CLIB_DIR)/posix-tty; $(MAKE) $(LIB_MK_ARGS) libposix-tty.a) + +$(CLIB_DIR)/posix-error/libposix-error.a: FORCE + (cd $(CLIB_DIR)/posix-error; $(MAKE) $(LIB_MK_ARGS) libposix-error.a) + +$(CLIB_DIR)/dl/libunix-dynload.a: FORCE + (cd $(CLIB_DIR)/dl; $(MAKE) $(LIB_MK_ARGS) libunix-dynload.a) + +$(CLIB_DIR)/smlnj-ccalls/libsmlnj-ccalls.a: FORCE + (cd $(CLIB_DIR)/smlnj-ccalls; $(MAKE) $(LIB_MK_ARGS) libsmlnj-ccalls.a) diff --git a/base/runtime/objs/makefile.win32 b/base/runtime/objs/makefile.win32 new file mode 100644 index 0000000..f7c4d60 --- /dev/null +++ b/base/runtime/objs/makefile.win32 @@ -0,0 +1,469 @@ +# makefile.win32 +# +# COPYRIGHT (c) 2018 The Fellowship of SML/NJ (http://www.smlnj.org) +# All rights reserved. +# +# this is the win32-specific main makefile for the SML/NJ runtime system +# To actually build the runtime, use the command +# +# nmake -f mk.x86-win32 +# +# which will invoke this makefile with the correct arguments +# + +SHELL = +MAKEFILE = makefile.win32 +MAKE = nmake /nologo /F$(MAKEFILE) +CC = cl /nologo +CPP = cl /nologo /E /EP +LD_LIBS = +AS = ml /Cx /Zd /Zi /Fr +AR = lib +ARFLAGS = +RANLIB = lib + +# +# dummy version +# +VERSION = v-dummy + +# +# target object +# +RUNTIME = interactive-run + +# +# source directories +# +ROOT_DIR = .. +OBJS_DIR = $(ROOT_DIR)\objs +GC_DIR = $(ROOT_DIR)\gc +INC_DIR = $(ROOT_DIR)\include +KERN_DIR = $(ROOT_DIR)\kernel +MACH_DIR = $(ROOT_DIR)\mach-dep +MEM_DIR = $(ROOT_DIR)\memory +MP_DIR = $(ROOT_DIR)\mp +CLIB_DIR = $(ROOT_DIR)\c-libs +CONFIG_DIR = $(ROOT_DIR)\config + +CFLAGS = + +INCLUDES = /I$(OBJS_DIR) /I$(INC_DIR) /I$(MACH_DIR) +GC_INCLUDES = $(INCLUDES) /I$(GC_DIR) +GEN_INCLUDES = /I$(CONFIG_DIR) $(INCLUDES) + +OBJS = c-libraries.obj win32-raise-syserr.obj \ + ml-options.obj \ + boot.obj load-ml.obj run-ml.obj globals.obj ml-state.obj \ + error.obj timers.obj win32-timers.obj \ + qualify-name.obj swap-bytes.obj \ + signal-util.obj prim.obj \ + win32-util.obj win32-fault.obj win32-signal.obj \ + $(XOBJS) + +# +# libraries of ML callable C functions +# +CLIBS = $(XCLIBS) \ + $(CLIB_DIR)\win32\libwin32.lib \ + $(CLIB_DIR)\win32-io\libwin32-io.lib \ + $(CLIB_DIR)\win32-filesys\libwin32-filesys.lib \ + $(CLIB_DIR)\win32-process\libwin32-process.lib \ + $(CLIB_DIR)\dl\dynload.lib \ + $(CLIB_DIR)\smlnj-runtime\libsmlnj-runt.lib \ + $(CLIB_DIR)\smlnj-signals\libsmlnj-sig.lib \ + $(CLIB_DIR)\smlnj-sockets\libsmlnj-sock.lib \ + $(CLIB_DIR)\smlnj-prof\libsmlnj-prof.lib \ + $(CLIB_DIR)\smlnj-time\libsmlnj-time.lib \ + $(CLIB_DIR)\smlnj-date\libsmlnj-date.lib \ + $(CLIB_DIR)\smlnj-math\libsmlnj-math.lib + +# +# The mem-win32 version is the default (and currently only) +# memory subsystem for win32 +# +LIBMEM_OBJS = mem-win32.obj + +# +# The SGI version is the default MP library +# +LIBMP_OBJS = 'OBJS=$$(SGI_OBJS)' + +# +# the default GC library +# +GC_LIB = libgc.lib + +# +# The various libraries; note that the order matters, since the C functions +# call GC library routines, and the GC library uses the memory library. +# +ML_LIBS = $(GC_DIR)\$(GC_LIB) \ + $(MEM_DIR)\libmem.lib + +DEP_LIBS = $(ML_LIBS) $(XLIBS) + +ALL_LIBS = $(CLIBS) $(DEP_LIBS) + +clean: + del /Q /F v-* run.* interactive-run \ + win32-sigtbl.c \ + gen-sizes.exe gen-offsets.exe gen-regmask.exe \ + gen-bc-instr-def.exe \ + gen-win32-signals.exe \ + gen-win32-sigtbl.exe \ + ml-sizes.h reg-mask.h mlstate-offsets.h bc-instr-def.h \ + system-signals.h \ + *.obj prim.s primops.s \ + *.sbr *.pdb + cd $(MEM_DIR) + $(MAKE) MAKE="$(MAKE)" clean + + cd $(CLIB_DIR) + $(MAKE) MAKE="$(MAKE)" clean + + cd $(GC_DIR) + $(MAKE) MAKE="$(MAKE)" clean + + cd $(MP_DIR) + $(MAKE) MAKE="$(MAKE)" clean + + +$(RUNTIME): $(VERSION) main.obj $(OBJS) $(ALL_LIBS) + $(CC) /Fe$(RUNTIME) $(CFLAGS) $(LDFLAGS) main.obj $(OBJS) $(ALL_LIBS) $(LD_LIBS) wsock32.lib netapi32.lib advapi32.lib user32.lib shlwapi.lib shell32.lib + +$(VERSION): + ($(MAKE) MAKE="$(MAKE)" clean) + echo "$(VERSION)" > $(VERSION) + +# +# Sizes +# +ml-sizes.h: gen-sizes.exe + .\gen-sizes.exe + +# +# kernel objects +# +main.obj: $(KERN_DIR)\main.c \ + ml-sizes.h $(INC_DIR)\ml-base.h $(INC_DIR)\ml-options.h \ + $(INC_DIR)\ml-limits.h $(INC_DIR)\ml-objects.h + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) $(KERN_DIR)\main.c + +ml-options.obj: $(KERN_DIR)\error.c \ + $(INC_DIR)\ml-base.h $(INC_DIR)\ml-options.h + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) $(KERN_DIR)\ml-options.c + +error.obj: $(KERN_DIR)\error.c $(INC_DIR)\ml-base.h + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) $(KERN_DIR)\error.c + +standalone.obj: $(KERN_DIR)\standalone.c \ + ml-sizes.h $(INC_DIR)\ml-base.h $(INC_DIR)\ml-limits.h + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) $(KERN_DIR)\standalone.c + +boot.obj: $(KERN_DIR)\boot.c \ + $(INC_DIR)\ml-osdep.h $(INC_DIR)\cache-flush.h \ + ml-sizes.h $(INC_DIR)\ml-base.h $(INC_DIR)\bin-file.h \ + $(INC_DIR)\ml-objects.h $(INC_DIR)\ml-globals.h $(INC_DIR)\gc.h \ + $(INC_DIR)\ml-limits.h + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) $(KERN_DIR)\boot.c + +load-ml.obj: $(KERN_DIR)\load-ml.c \ + ml-sizes.h \ + system-signals.h \ + $(INC_DIR)\ml-base.h $(INC_DIR)\ml-roots.h $(INC_DIR)\ml-state.h + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) $(KERN_DIR)\load-ml.c + +run-ml.obj: $(KERN_DIR)\run-ml.c \ + ml-sizes.h reg-mask.h \ + system-signals.h \ + $(INC_DIR)\ml-base.h $(INC_DIR)\ml-values.h \ + $(INC_DIR)\vproc-state.h $(INC_DIR)\ml-roots.h \ + $(INC_DIR)\ml-state.h + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) $(KERN_DIR)\run-ml.c + +globals.obj: $(KERN_DIR)\globals.c \ + ml-sizes.h $(INC_DIR)\ml-base.h $(INC_DIR)\ml-values.h $(INC_DIR)\ml-limits.h \ + $(INC_DIR)\c-globals-tbl.h $(INC_DIR)\machine-id.h + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) $(KERN_DIR)\globals.c + +ml-state.obj: $(KERN_DIR)\ml-state.c \ + ml-sizes.h \ + system-signals.h \ + $(INC_DIR)\ml-base.h $(INC_DIR)\tags.h \ + $(INC_DIR)\ml-values.h $(INC_DIR)\ml-roots.h \ + $(INC_DIR)\ml-state.h $(INC_DIR)\vproc-state.h \ + $(INC_DIR)\ml-globals.h $(INC_DIR)\ml-timer.h $(INC_DIR)\gc.h \ + $(INC_DIR)\ml-limits.h + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) $(KERN_DIR)\ml-state.c + +timers.obj: $(KERN_DIR)\timers.c \ + ml-sizes.h \ + system-signals.h \ + $(INC_DIR)\ml-base.h $(INC_DIR)\vproc-state.h \ + $(INC_DIR)\ml-timer.h + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) $(KERN_DIR)\timers.c + +qualify-name.obj: $(KERN_DIR)\qualify-name.c \ + ml-sizes.h $(INC_DIR)\ml-base.h $(INC_DIR)\machine-id.h + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) $(KERN_DIR)\qualify-name.c + +swap-bytes.obj: $(KERN_DIR)\swap-bytes.c \ + ml-sizes.h $(INC_DIR)\ml-base.h + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) $(KERN_DIR)\swap-bytes.c + + +# +# C libraries +# +c-libraries.obj: $(CLIB_DIR)\c-libraries.c \ + ml-sizes.h $(INC_DIR)\ml-base.h \ + $(CLIB_DIR)\clib-list.h + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) /I$(CLIB_DIR) $(CLIB_DIR)\c-libraries.c + +win32-raise-syserr.obj: $(CLIB_DIR)\win32-raise-syserr.c \ + ml-sizes.h \ + $(INC_DIR)\ml-base.h \ + $(INC_DIR)\ml-state.h $(INC_DIR)\ml-roots.h \ + $(INC_DIR)\ml-objects.h $(INC_DIR)\ml-globals.h \ + $(CLIB_DIR)\ml-c.h + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) $(CLIB_DIR)\win32-raise-syserr.c + + +# +# Machine dependent objects +# + +win32-fault.obj: $(MACH_DIR)\win32-fault.c $(MACH_DIR)\win32-fault.h \ + $(MACH_DIR)\signal-sysdep.h \ + system-signals.h \ + ml-sizes.h \ + $(INC_DIR)\ml-base.h \ + $(INC_DIR)\vproc-state.h $(INC_DIR)\ml-roots.h \ + $(INC_DIR)\ml-state.h $(INC_DIR)\ml-globals.h + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) $(MACH_DIR)\win32-fault.c + +win32-util.obj: $(MACH_DIR)\win32-util.c + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) $(MACH_DIR)\win32-util.c + +signal-util.obj: $(MACH_DIR)\signal-util.c \ + ml-sizes.h system-signals.h \ + $(INC_DIR)\ml-base.h $(INC_DIR)\ml-signals.h \ + $(INC_DIR)\vproc-state.h $(INC_DIR)\ml-roots.h $(INC_DIR)\ml-state.h \ + $(INC_DIR)\ml-limits.h $(INC_DIR)\ml-objects.h + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) $(MACH_DIR)\signal-util.c + +win32-signal.obj: $(MACH_DIR)\win32-signal.c win32-sigtbl.c \ + ml-sizes.h \ + $(INC_DIR)\ml-base.h \ + $(INC_DIR)\ml-signals.h $(INC_DIR)\vproc-state.h $(INC_DIR)\ml-roots.h $(INC_DIR)\ml-state.h \ + $(INC_DIR)\ml-limits.h $(INC_DIR)\ml-objects.h \ + $(INC_DIR)\ml-globals.h \ + $(MACH_DIR)\signal-sysdep.h + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) $(MACH_DIR)\win32-signal.c + +win32-timers.obj: $(MACH_DIR)\win32-timers.c \ + ml-sizes.h \ + $(INC_DIR)\win32-timers.h \ + $(INC_DIR)\ml-base.h \ + $(INC_DIR)\ml-roots.h $(INC_DIR)\ml-state.h \ + $(INC_DIR)\ml-timer.h + $(CC) /c $(CFLAGS) $(DEFS) $(INCLUDES) $(MACH_DIR)\win32-timers.c + +system-signals.h: gen-win32-signals.exe + .\gen-win32-signals.exe + +win32-sigtbl.c: gen-win32-sigtbl.exe + .\gen-win32-sigtbl.exe + +reg-mask.h: gen-regmask.exe + .\gen-regmask.exe + +prim.obj: $(MACH_DIR)\$(TARGET).prim.asm \ + $(MACH_DIR)\x86-syntax.h \ + ml-sizes.h reg-mask.h $(INC_DIR)\ml-base.h $(INC_DIR)\tags.h \ + $(INC_DIR)\asm-base.h \ + mlstate-offsets.h + $(CPP) /D_ASM_ /DMASM_ASSEMBLER $(DEFS) /I$(MACH_DIR) $(INCLUDES) /Tc$(MACH_DIR)\$(TARGET).prim.asm > prim.s + $(AS) /c prim.s + + +# +# + +mlstate-offsets.h: gen-offsets.exe ml-sizes.h \ + system-signals.h \ + $(INC_DIR)\ml-base.h $(INC_DIR)\vproc-state.h \ + $(INC_DIR)\ml-roots.h $(INC_DIR)\ml-state.h + .\gen-offsets.exe + + +# +# arguments to recursive make +# +MK_ARGS = VERSION="$(VERSION)" \ + MAKE="$(MAKE)" \ + CC="$(CC)" CFLAGS="$(CFLAGS)" DEFS="$(DEFS)" \ + AR="$(AR)" ARFLAGS="$(ARFLAGS)" \ + RANLIB="$(RANLIB)" \ + INCLUDES="$(GC_INCLUDES) /I..\bytecode" + +# +# memory management library +# +$(MEM_DIR)\libmem.lib: FORCE + cd $(MEM_DIR) + $(MAKE) OBJS="$(LIBMEM_OBJS)" $(MK_ARGS) libmem.lib + cd $(MAKEDIR) + +# +# C functions library +# +$(CLIB_DIR)\libcfuns.lib: FORCE + cd $(CLIB_DIR) + $(MAKE) $(MK_ARGS) libcfuns.lib + cd $(MAKEDIR) + +# +# GC and heap I/O library +# +$(GC_DIR)\$(GC_LIB): FORCE + cd $(GC_DIR) + $(MAKE) CHECK_HEAP="$(CHECK_HEAP)" $(MK_ARGS) $(GC_LIB) + cd $(MAKEDIR) + +# +# multiprocessor library (optional) +# +$(MP_DIR)\libmp.lib: FORCE + cd $(MP_DIR) + $(MAKE) $(LIBMP_OBJS) $(MK_ARGS) libmp.lib + cd $(MAKEDIR) + +# +# to force recursive makes +# +FORCE: + + +# +# Configuration tools +# +gen-sizes.exe: $(CONFIG_DIR)\gen-sizes.c gen-common.obj $(CONFIG_DIR)\gen.h \ + $(INC_DIR)\ml-base.h + $(CC) $(DEFS) $(GEN_INCLUDES) /Fegen-sizes.exe $(CONFIG_DIR)\gen-sizes.c gen-common.obj + +gen-regmask.exe: $(CONFIG_DIR)\gen-regmask.c gen-common.obj $(CONFIG_DIR)\gen.h + $(CC) $(DEFS) $(GEN_INCLUDES) /Fegen-regmask.exe $(CONFIG_DIR)\gen-regmask.c gen-common.obj + +gen-offsets.exe: $(CONFIG_DIR)\gen-offsets.c gen-common.obj $(CONFIG_DIR)\gen.h \ + ml-sizes.h \ + system-signals.h \ + $(INC_DIR)\ml-base.h $(INC_DIR)\vproc-state.h \ + $(INC_DIR)\ml-roots.h $(INC_DIR)\ml-state.h + $(CC) $(DEFS) $(GEN_INCLUDES) /Fegen-offsets $(CONFIG_DIR)\gen-offsets.c gen-common.obj + +gen-win32-signals.exe: $(CONFIG_DIR)\gen-win32-signals.c $(CONFIG_DIR)\win32-sigtab.h \ + gen-common.obj \ + $(CONFIG_DIR)\gen.h + $(CC) $(GEN_INCLUDES) $(DEFS) /Fegen-win32-signals.exe $(CONFIG_DIR)\gen-win32-signals.c gen-common.obj + +gen-win32-sigtbl.exe: $(CONFIG_DIR)\gen-win32-sigtbl.c $(CONFIG_DIR)\win32-sigtab.h \ + gen-common.obj \ + $(CONFIG_DIR)\gen.h + $(CC) $(CFLAGS) $(GEN_INCLUDES) $(DEFS) /Fegen-win32-sigtbl.exe $(CONFIG_DIR)\gen-win32-sigtbl.c gen-common.obj + +gen-common.obj: $(CONFIG_DIR)\gen-common.c $(CONFIG_DIR)\gen.h + $(CC) /c $(GEN_INCLUDES) $(CONFIG_DIR)\gen-common.c + +# +# Make rules for the C libraries +# + +# include directories for the library sub-directories +# +LIB_OBJS_DIR = ..\..\objs +LIB_INC_DIR = ..\..\include +LIB_MACH_DIR = ..\..\mach-dep +LIB_INCLUDES = /I$(LIB_OBJS_DIR) /I$(LIB_INC_DIR) /I$(LIB_MACH_DIR) /I.. + +# +# arguments to recursive make +# +LIB_MK_ARGS = VERSION="$(VERSION)" \ + MAKE="$(MAKE)" \ + CC="$(CC)" CFLAGS="$(CFLAGS)" DEFS="$(DEFS)" \ + AR="$(AR)" ARFLAGS="$(ARFLAGS)" \ + RANLIB="$(RANLIB)" \ + INCLUDES="$(LIB_INCLUDES)" + +$(CLIB_DIR)\win32\libwin32.lib: FORCE + cd $(CLIB_DIR)\win32 + $(MAKE) $(LIB_MK_ARGS) libwin32.lib + cd $(MAKEDIR) + +$(CLIB_DIR)\win32-io\libwin32-io.lib: FORCE + cd $(CLIB_DIR)\win32-io + $(MAKE) $(LIB_MK_ARGS) libwin32-io.lib + cd $(MAKEDIR) + +$(CLIB_DIR)\win32-filesys\libwin32-filesys.lib: FORCE + cd $(CLIB_DIR)\win32-filesys + $(MAKE) $(LIB_MK_ARGS) libwin32-filesys.lib + cd $(MAKEDIR) + +$(CLIB_DIR)\win32-process\libwin32-process.lib: FORCE + cd $(CLIB_DIR)\win32-process + $(MAKE) $(LIB_MK_ARGS) libwin32-process.lib + cd $(MAKEDIR) + +$(CLIB_DIR)\smlnj-runtime\libsmlnj-runt.lib: FORCE + cd $(CLIB_DIR)\smlnj-runtime + $(MAKE) $(LIB_MK_ARGS) libsmlnj-runt.lib + cd $(MAKEDIR) + +$(CLIB_DIR)\smlnj-signals\libsmlnj-sig.lib: FORCE + cd $(CLIB_DIR)\smlnj-signals + $(MAKE) $(LIB_MK_ARGS) libsmlnj-sig.lib + cd $(MAKEDIR) + +$(CLIB_DIR)\smlnj-sockets\libsmlnj-sock.lib: FORCE + cd $(CLIB_DIR)\smlnj-sockets + $(MAKE) $(LIB_MK_ARGS) libsmlnj-sock.lib + cd $(MAKEDIR) + +$(CLIB_DIR)\dl\dynload.lib: FORCE + cd $(CLIB_DIR)\dl + $(MAKE) $(LIB_MK_ARGS) dynload.lib + cd $(MAKEDIR) + +$(CLIB_DIR)\smlnj-prof\libsmlnj-prof.lib: FORCE + cd $(CLIB_DIR)\smlnj-prof + $(MAKE) $(LIB_MK_ARGS) libsmlnj-prof.lib + cd $(MAKEDIR) + +$(CLIB_DIR)\smlnj-time\libsmlnj-time.lib: FORCE + cd $(CLIB_DIR)\smlnj-time + $(MAKE) $(LIB_MK_ARGS) libsmlnj-time.lib + cd $(MAKEDIR) + +$(CLIB_DIR)\smlnj-date\libsmlnj-date.lib: FORCE + cd $(CLIB_DIR)\smlnj-date + $(MAKE) $(LIB_MK_ARGS) libsmlnj-date.lib + cd $(MAKEDIR) + +$(CLIB_DIR)\smlnj-math\libsmlnj-math.lib: FORCE + cd $(CLIB_DIR)\smlnj-math + $(MAKE) $(LIB_MK_ARGS) libsmlnj-math.lib + cd $(MAKEDIR) + +$(CLIB_DIR)\smlnj-ccalls\libsmlnj-ccalls.lib: FORCE + cd $(CLIB_DIR)\smlnj-ccalls + $(MAKE) $(LIB_MK_ARGS) libsmlnj-ccalls.lib + cd $(MAKEDIR) + +#$(CLIB_DIR)\smlnj-mp\libsmlnj-mp.lib: FORCE +# cd $(CLIB_DIR)\smlnj-mp +# $(MAKE) $(LIB_MK_ARGS) libsmlnj-mp.lib +# cd $(MAKEDIR) + diff --git a/base/runtime/objs/mk.amd64-cygwin b/base/runtime/objs/mk.amd64-cygwin new file mode 100644 index 0000000..2fb63c6 --- /dev/null +++ b/base/runtime/objs/mk.amd64-cygwin @@ -0,0 +1,22 @@ +# mk.x86-cygwin +# + +SHELL = /bin/sh + +MAKE = make +CC = gcc -std=gnu99 +CFLAGS = -O2 +CPP = gcc -x assembler-with-cpp -E -P +XOBJS = +XLIBS = ../c-libs/dl/libunix-dynload.a +LD_LIBS = -lm +XDEFS = +BASE_DEFS = +DEFS = $(XDEFS) $(BASE_DEFS) -DARCH_AMD64 -DSIZE_64 \ + -DOPSYS_UNIX -DOPSYS_CYGWIN -DGNU_ASSEMBLER -DDLOPEN -DINDIRECT_CFUNC +TARGET = AMD64 +VERSION = v-amd64-cygwin +RUNTIME = run.amd64-cygwin + +all: + ($(MAKE) RUNTIME="$(RUNTIME)" VERSION="$(VERSION)" CC="$(CC)" CFLAGS="$(CFLAGS)" CPP="$(CPP)" TARGET=$(TARGET) DEFS="$(DEFS)" XOBJS="$(XOBJS)" XLIBS="$(XLIBS)" LD_LIBS="$(LD_LIBS)" $(RUNTIME)) diff --git a/base/runtime/objs/mk.amd64-darwin b/base/runtime/objs/mk.amd64-darwin new file mode 100644 index 0000000..018ff30 --- /dev/null +++ b/base/runtime/objs/mk.amd64-darwin @@ -0,0 +1,36 @@ +# mk.amd64-darwin +# +# COPYRIGHT (c) 2019 The SML/NJ Fellowship +# +# Makefile for macOS 10.10+ (Yosimite or later) on 64-bit Intel processors +# + +SDK = -mmacosx-version-min=10.10 + +SHELL = /bin/sh + +MAKE = make +# Explicitly stating the target to let Arm machines use x64 system headers +# and assembler. +AS = /usr/bin/as -arch x86_64 +CC = /usr/bin/clang -m64 -std=c99 -target x86_64-apple-darwin +CFLAGS = -g -O2 -D_DARWIN_C_SOURCE +CPP = /usr/bin/clang -x assembler-with-cpp -E -P -std=c99 +AR = /usr/bin/ar +RANLIB = /usr/bin/ranlib + +XOBJS = +XLIBS = ../c-libs/dl/libunix-dynload.a +LD_LIBS = +XDEFS = +BASE_DEFS = +DEFS = $(XDEFS) $(BASE_DEFS) -DARCH_AMD64 -DSIZE_64 \ + -DOPSYS_UNIX -DOPSYS_DARWIN -DGNU_ASSEMBLER -DDLOPEN -DINDIRECT_CFUNC +TARGET = AMD64 +VERSION = v-amd64-darwin +RUNTIME = run.amd64-darwin +RUNTIMEX = runx.amd64-darwin + +all: + ($(MAKE) RUNTIME="$(RUNTIME)" VERSION="$(VERSION)" CC="$(CC)" CFLAGS="$(CFLAGS)" CPP="$(CPP)" AS="$(AS)" AR="$(AR)" RANLIB="$(RANLIB)" TARGET=$(TARGET) DEFS="$(DEFS)" XOBJS="$(XOBJS)" XLIBS="$(XLIBS)" LD_LIBS="$(LD_LIBS)" $(RUNTIME)) + ($(MAKE) RUNTIME="$(RUNTIMEX)" VERSION="$(VERSION)" CC="$(CC)" CFLAGS="$(CFLAGS)" CPP="$(CPP)" AS="$(AS)" AR="$(AR)" RANLIB="$(RANLIB)" TARGET=$(TARGET) DEFS="$(DEFS)" XOBJS="$(XOBJS)" XLIBS="$(XLIBS)" LD_LIBS="$(LD_LIBS)" LDFLAGS="-Xlinker -r" $(RUNTIMEX)) diff --git a/base/runtime/objs/mk.amd64-freebsd b/base/runtime/objs/mk.amd64-freebsd new file mode 100644 index 0000000..6ee1b79 --- /dev/null +++ b/base/runtime/objs/mk.amd64-freebsd @@ -0,0 +1,31 @@ +# mk.amd64-freebsd +# +# makefile for "Free BSD" on the amd64 architecture, which is a BSD 4.4 clone. +# + +SHELL = /bin/sh + +MAKE = make +AS = as --64 +CC = cc -std=gnu99 +CFLAGS = -O2 -m64 +CPP = cc -x assembler-with-cpp -E -P +XOBJS = +XLIBS = ../c-libs/dl/libunix-dynload.a +LD_LIBS = -lm -ldl +#CHECK_HEAP = check-heap.o +XDEFS = +BASE_DEFS = +DEFS = $(XDEFS) $(BASE_DEFS) -DARCH_AMD64 -DSIZE_64 \ + -DOPSYS_UNIX -DOPSYS_FREEBSD -D_GNU_SOURCE -DGNU_ASSEMBLER -DDLOPEN \ + -DINDIRECT_CFUNC +TARGET = AMD64 +VERSION = v-amd64-freebsd +RUNTIME = run.amd64-freebsd +RUNTIME_SO = run.amd64-freebsd.so +RUNTIME_A = run.amd64-freebsd.a + +all: + ($(MAKE) CHECK_HEAP=$(CHECK_HEAP) RUNTIME="$(RUNTIME)" VERSION="$(VERSION)" AS="$(AS)" CC="$(CC)" CFLAGS="$(CFLAGS)" CPP="$(CPP)" TARGET=$(TARGET) DEFS="$(DEFS)" XOBJS="$(XOBJS)" XLIBS="$(XLIBS)" LD_LIBS="$(LD_LIBS)" $(RUNTIME)) +# ($(MAKE) RUNTIME="$(RUNTIME_SO)" VERSION="$(VERSION)" AS="$(AS)" CC="$(CC)" CFLAGS="$(CFLAGS)" CPP="$(CPP)" TARGET=$(TARGET) DEFS="$(DEFS)" XOBJS="$(XOBJS)" XLIBS="$(XLIBS)" LD_LIBS="$(LD_LIBS)" LDFLAGS="-shared" $(RUNTIME_SO)) +# ($(MAKE) RUNTIME_A="$(RUNTIME_A)" VERSION="$(VERSION)" AS="$(AS)" CC="$(CC)" CFLAGS="$(CFLAGS)" CPP="$(CPP)" TARGET=$(TARGET) DEFS="$(DEFS)" XOBJS="$(XOBJS)" XLIBS="$(XLIBS)" LD_LIBS="$(LD_LIBS)" LDFLAGS="" $(RUNTIME_A)) diff --git a/base/runtime/objs/mk.amd64-linux b/base/runtime/objs/mk.amd64-linux new file mode 100644 index 0000000..731e695 --- /dev/null +++ b/base/runtime/objs/mk.amd64-linux @@ -0,0 +1,29 @@ +# mk.amd64-linux +# + +SHELL = /bin/sh + +MAKE = make +AS = as --64 +CC = gcc -std=gnu99 -Wall +CFLAGS = -O2 -m64 +CPP = gcc -x assembler-with-cpp -E -P +XOBJS = +XLIBS = ../c-libs/dl/libunix-dynload.a +LD_LIBS = -lm -ldl +#CHECK_HEAP = check-heap.o +XDEFS = +BASE_DEFS = +DEFS = $(XDEFS) $(BASE_DEFS) -DARCH_AMD64 -DSIZE_64 \ + -DOPSYS_UNIX -DOPSYS_LINUX -D_GNU_SOURCE -DGNU_ASSEMBLER -DDLOPEN \ + -DINDIRECT_CFUNC +TARGET = AMD64 +VERSION = v-amd64-linux +RUNTIME = run.amd64-linux +RUNTIME_SO = run.amd64-linux.so +RUNTIME_A = run.amd64-linux.a + +all: + ($(MAKE) CHECK_HEAP=$(CHECK_HEAP) RUNTIME="$(RUNTIME)" VERSION="$(VERSION)" AS="$(AS)" CC="$(CC)" CFLAGS="$(CFLAGS)" CPP="$(CPP)" TARGET=$(TARGET) DEFS="$(DEFS)" XOBJS="$(XOBJS)" XLIBS="$(XLIBS)" LD_LIBS="$(LD_LIBS)" $(RUNTIME)) +# ($(MAKE) RUNTIME="$(RUNTIME_SO)" VERSION="$(VERSION)" AS="$(AS)" CC="$(CC)" CFLAGS="$(CFLAGS)" CPP="$(CPP)" TARGET=$(TARGET) DEFS="$(DEFS)" XOBJS="$(XOBJS)" XLIBS="$(XLIBS)" LD_LIBS="$(LD_LIBS)" LDFLAGS="-shared" $(RUNTIME_SO)) +# ($(MAKE) RUNTIME_A="$(RUNTIME_A)" VERSION="$(VERSION)" AS="$(AS)" CC="$(CC)" CFLAGS="$(CFLAGS)" CPP="$(CPP)" TARGET=$(TARGET) DEFS="$(DEFS)" XOBJS="$(XOBJS)" XLIBS="$(XLIBS)" LD_LIBS="$(LD_LIBS)" LDFLAGS="" $(RUNTIME_A)) diff --git a/base/runtime/objs/mk.ppc-aix b/base/runtime/objs/mk.ppc-aix new file mode 100644 index 0000000..a9cb34b --- /dev/null +++ b/base/runtime/objs/mk.ppc-aix @@ -0,0 +1,25 @@ +# mk.ppc-aix +# + +SHELL = /bin/sh + +# note: the vendor cpp isn't expanding nested macro applications correctly. +CC = cc +CFLAGS = -O2 +CPP = /lib/cpp + +#CC = gcc -ansi +#CFLAGS = -O2 +#CPP = gcc -x c -E -P -ansi + +XOBJS = +XLIBS = +LD_LIBS = -lm +BASE_DEFS = +DEFS = $(BASE_DEFS) -DARCH_PPC -DDSIZE_32 -DOPSYS_UNIX -DOPSYS_AIX -DUNBOXEDFLOAT=1 +TARGET = PPC +VERSION = v-ppc-aix +RUNTIME = run.ppc-aix + +all: + (make RUNTIME="$(RUNTIME)" VERSION="$(VERSION)" CC="$(CC)" CPP="$(CPP)" CFLAGS="$(CFLAGS)" AS="$(AS)" TARGET=$(TARGET) DEFS="$(DEFS)" XOBJS="$(XOBJS)" XLIBS="$(XLIBS)" LD_LIBS="$(LD_LIBS)" $(RUNTIME)) diff --git a/base/runtime/objs/mk.ppc-darwin b/base/runtime/objs/mk.ppc-darwin new file mode 100644 index 0000000..e6deb17 --- /dev/null +++ b/base/runtime/objs/mk.ppc-darwin @@ -0,0 +1,29 @@ +# mk.ppc-darwin +# +# COPYRIGHT (c) 2012 The SML/NJ Fellowship +# +# Makefile for MacOS X 10.5 on PPC +# +# the -D_NONSTD_SOURCE flag is needed for Mac OS X 10.5, because the +# mcontext field names changed. + +SHELL = /bin/sh + +MAKE = make +CC = cc -ansi +CFLAGS = -g -O2 -D_NONSTD_SOURCE +CPP = cc -x assembler-with-cpp -E -P -ansi +XOBJS = +XLIBS = ../c-libs/dl/libunix-dynload.a +LD_LIBS = -ldl +XDEFS = +BASE_DEFS = +DEFS = $(XDEFS) $(BASE_DEFS) -DARCH_PPC -DDSIZE_32 -DOPSYS_UNIX -DOPSYS_DARWIN -DDLOPEN +TARGET = PPC +VERSION = v-ppc-darwin +RUNTIME = run.ppc-darwin +RUNTIMEX = runx.ppc-darwin + +all: + ($(MAKE) RUNTIME="$(RUNTIME)" VERSION="$(VERSION)" CC="$(CC)" CFLAGS="$(CFLAGS)" CPP="$(CPP)" TARGET=$(TARGET) DEFS="$(DEFS)" XOBJS="$(XOBJS)" XLIBS="$(XLIBS)" LD_LIBS="$(LD_LIBS)" $(RUNTIME)) + ($(MAKE) RUNTIME="$(RUNTIMEX)" VERSION="$(VERSION)" CC="$(CC)" CFLAGS="$(CFLAGS)" CPP="$(CPP)" TARGET=$(TARGET) DEFS="$(DEFS)" XOBJS="$(XOBJS)" XLIBS="$(XLIBS)" LD_LIBS="$(LD_LIBS)" LDFLAGS="-Xlinker -r" $(RUNTIMEX)) diff --git a/base/runtime/objs/mk.ppc-linux b/base/runtime/objs/mk.ppc-linux new file mode 100644 index 0000000..53a1abb --- /dev/null +++ b/base/runtime/objs/mk.ppc-linux @@ -0,0 +1,26 @@ +# +# mk.ppc-linux +# + +SHELL = /bin/sh + +MAKE = make +CC = gcc -ansi +CFLAGS = -O2 +CPP = gcc -x c -E -P -ansi +#XOBJS = xmonitor.o +#LD_LIBS = -lX11 +#BASE_DEFS = -DHEAP_MONITOR +XOBJS = +XLIBS = +LD_LIBS = -lm +XDEFS = +BASE_DEFS = +DEFS = $(XDEFS) $(BASE_DEFS) -DARCH_PPC -DDSIZE_32 \ + -DOPSYS_UNIX -DOPSYS_LINUX -D_GNU_SOURCE -D_POSIX_SOURCE -D_BSD_SOURCE -DGNU_ASSEMBLER +TARGET = PPC +VERSION = v-ppc-linux +RUNTIME = run.ppc-linux + +all: + ($(MAKE) RUNTIME="$(RUNTIME)" VERSION="$(VERSION)" CC="$(CC)" CFLAGS="$(CFLAGS)" CPP="$(CPP)" TARGET=$(TARGET) DEFS="$(DEFS)" XOBJS="$(XOBJS)" XLIBS="$(XLIBS)" LD_LIBS="$(LD_LIBS)" $(RUNTIME)) diff --git a/base/runtime/objs/mk.ppc-openbsd b/base/runtime/objs/mk.ppc-openbsd new file mode 100644 index 0000000..4d720d7 --- /dev/null +++ b/base/runtime/objs/mk.ppc-openbsd @@ -0,0 +1,25 @@ +# mk.ppc-openbsd +# +# makefile for OpenBSD (version 4.x), which is a BSD 4.4 clone. +# + +SHELL = /bin/sh + +MAKE = gmake + +ARFLAGS = Trcv +CC ?= gcc -ansi +CFLAGS ?= -O2 +CPP = gcc -x assembler-with-cpp -E -P + +XOBJS = +XLIBS = ../c-libs/dl/libunix-dynload.a +LD_LIBS = -lm +BASE_DEFS = +DEFS = $(XDEFS) $(BASE_DEFS) -DARCH_PPC -DDSIZE_32 -DOPSYS_UNIX -DOPSYS_OPENBSD -DDLOPEN +TARGET = PPC +VERSION = v-ppc-openbsd +RUNTIME = run.ppc-openbsd + +all: + ($(MAKE) RUNTIME="$(RUNTIME)" VERSION="$(VERSION)" MAKE="$(MAKE)" CC="$(CC)" CFLAGS="$(CFLAGS)" CPP="$(CPP)" TARGET=$(TARGET) DEFS="$(DEFS)" XOBJS="$(XOBJS)" XLIBS="$(XLIBS)" LD_LIBS="$(LD_LIBS)" $(RUNTIME)) diff --git a/base/runtime/objs/mk.sparc-solaris b/base/runtime/objs/mk.sparc-solaris new file mode 100644 index 0000000..0da2ade --- /dev/null +++ b/base/runtime/objs/mk.sparc-solaris @@ -0,0 +1,28 @@ +# mk.sparc-solaris +# + +SHELL = /bin/sh + +MAKE = /usr/ccs/bin/make +AS = /usr/ccs/bin/as +AR = /usr/ccs/bin/ar +GCC = gcc +CC = $(GCC) +CFLAGS = -O3 +#CFLAGS = -g +CPP = $(GCC) -x c -E -P +RANLIB = $(AR) ts + +XOBJS = +XLIBS = ../c-libs/dl/libunix-dynload.a +LD_LIBS = -lm -lsocket -lnsl -ldl +XDEFS = +BASE_DEFS = -D__STDC__=0 +DEFS = $(XDEFS) $(BASE_DEFS) -DARCH_SPARC -DDSIZE_32 \ + -DOPSYS_UNIX -DOPSYS_SOLARIS -DUNBOXEDFLOAT=1 -DDLOPEN +TARGET = SPARC +VERSION = v-sparc-solaris +RUNTIME = run.sparc-solaris + +all: + ($(MAKE) RUNTIME="$(RUNTIME)" MAKE="$(MAKE)" VERSION="$(VERSION)" CC="$(CC)" CFLAGS="$(CFLAGS)" CPP="$(CPP)" AS="$(AS)" AR="$(AR)" RANLIB="$(RANLIB)" TARGET=$(TARGET) DEFS="$(DEFS)" XOBJS="$(XOBJS)" XLIBS="$(XLIBS)" LD_LIBS="$(LD_LIBS)" $(RUNTIME)) diff --git a/base/runtime/objs/mk.x86-cygwin b/base/runtime/objs/mk.x86-cygwin new file mode 100644 index 0000000..cc684bd --- /dev/null +++ b/base/runtime/objs/mk.x86-cygwin @@ -0,0 +1,22 @@ +# mk.x86-cygwin +# + +SHELL = /bin/sh + +MAKE = make +CC = gcc -std=gnu99 +CFLAGS = -O2 +CPP = gcc -x assembler-with-cpp -E -P +XOBJS = +XLIBS = ../c-libs/dl/libunix-dynload.a +LD_LIBS = -lm +XDEFS = +BASE_DEFS = +DEFS = $(XDEFS) $(BASE_DEFS) -DARCH_X86 -DDSIZE_32 -DALIGN_STACK_16 \ + -DOPSYS_UNIX -DOPSYS_CYGWIN -DGNU_ASSEMBLER -DDLOPEN -DINDIRECT_CFUNC +TARGET = X86 +VERSION = v-x86-cygwin +RUNTIME = run.x86-cygwin + +all: + ($(MAKE) RUNTIME="$(RUNTIME)" VERSION="$(VERSION)" CC="$(CC)" CFLAGS="$(CFLAGS)" CPP="$(CPP)" TARGET=$(TARGET) DEFS="$(DEFS)" XOBJS="$(XOBJS)" XLIBS="$(XLIBS)" LD_LIBS="$(LD_LIBS)" $(RUNTIME)) diff --git a/base/runtime/objs/mk.x86-darwin b/base/runtime/objs/mk.x86-darwin new file mode 100644 index 0000000..4965409 --- /dev/null +++ b/base/runtime/objs/mk.x86-darwin @@ -0,0 +1,54 @@ +# mk.x86-darwin +# +# COPYRIGHT (c) 2012 The SML/NJ Fellowship +# +# Makefile for MacOS X 10.6+ on Intel processors +# + +# +# The SDK variable is set to the SDK minimum Mac OS X version that you want +# to support. Note that since Lion, the 10.5 (and 10.6?) SDKs are not part +# of the Xcode install. +# +# to support 10.5 and later you will need to have the 10.5 SDK in /Developer/SDKs/MacOSX10.5.sdk +# +#SDK = -isysroot /Developer/SDKs/MacOSX10.5.sdk -mmacosx-version-min=10.5 +# +# to support 10.5 and later you will need to have the 10.6 SDK in /Developer/SDKs/MacOSX10.6.sdk; +# this should work on Lion, if you had Xcode 4.2 installed. +#SDK = -isysroot /Developer/SDKs/MacOSX10.6.sdk -mmacosx-version-min=10.6 +# +# On Mountain Lion, the earliest version is 10.7, which is to supported by the following line: +# +#SDK = -isysroot Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.7.sdk -mmacosx-version-min=10.7 + +SDK = -mmacosx-version-min=10.6 + +# the /usr/bin/as command does _not_ accept the -mmacosx-version-min +# command-line option prior to MacOS X 10.10 (Yosimite) +ifeq ($(AS_ACCEPTS_SDK),yes) +AS_SDK = $(SDK) +endif + +SHELL = /bin/sh + +MAKE = make +AS = /usr/bin/as -arch i386 $(AS_SDK) +CC = cc -m32 -std=c99 +CFLAGS = -g -O2 -D_DARWIN_C_SOURCE $(SDK) +CPP = cc -x assembler-with-cpp -E -P -std=c99 +XOBJS = +XLIBS = ../c-libs/dl/libunix-dynload.a +LD_LIBS = -ldl +XDEFS = +BASE_DEFS = +DEFS = $(XDEFS) $(BASE_DEFS) -DARCH_X86 -DDSIZE_32 -DALIGN_STACK_16 \ + -DOPSYS_UNIX -DOPSYS_DARWIN -DGNU_ASSEMBLER -DDLOPEN -DINDIRECT_CFUNC +TARGET = X86 +VERSION = v-x86-darwin +RUNTIME = run.x86-darwin +RUNTIMEX = runx.x86-darwin + +all: + ($(MAKE) RUNTIME="$(RUNTIME)" VERSION="$(VERSION)" CC="$(CC)" CFLAGS="$(CFLAGS)" CPP="$(CPP)" AS="$(AS)" TARGET=$(TARGET) DEFS="$(DEFS)" XOBJS="$(XOBJS)" XLIBS="$(XLIBS)" LD_LIBS="$(LD_LIBS)" $(RUNTIME)) + ($(MAKE) RUNTIME="$(RUNTIMEX)" VERSION="$(VERSION)" CC="$(CC)" CFLAGS="$(CFLAGS)" CPP="$(CPP)" TARGET=$(TARGET) DEFS="$(DEFS)" XOBJS="$(XOBJS)" XLIBS="$(XLIBS)" LD_LIBS="$(LD_LIBS)" LDFLAGS="-Xlinker -r" $(RUNTIMEX)) diff --git a/base/runtime/objs/mk.x86-darwin18 b/base/runtime/objs/mk.x86-darwin18 new file mode 100644 index 0000000..2941d8d --- /dev/null +++ b/base/runtime/objs/mk.x86-darwin18 @@ -0,0 +1,46 @@ +# mk.x86-darwin +# +# COPYRIGHT (c) 2018 The SML/NJ Fellowship +# +# Makefile for macOS 10.14 Mojave. Note that we assume that the SDK +# variable is set on the make command line. +# + +# Normally, the SDK variable is set on the command line (by install.sh), +# but if it is not, then we assume that the 10.13 SDK is in the usual place. +# +ifeq ($(origin SDK), undefined) + DEVDIR = $(shell xcode-select -p) + SDK = $(DEVDIR)/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.13.sdk +endif + +SYSROOT = -isysroot $(SDK) -mmacosx-version-min=10.7 + +# the /usr/bin/as command does _not_ accept the -mmacosx-version-min +# command-line option prior to MacOS X 10.10 (Yosimite) +ifeq ($(AS_ACCEPTS_SDK),yes) + AS_SDK = $(SYSROOT) +endif + +SHELL = /bin/sh + +MAKE = make +AS = /usr/bin/as -arch i386 $(AS_SDK) +CC = cc -m32 -std=c99 +CFLAGS = -g -O2 -D_DARWIN_C_SOURCE $(SYSROOT) +CPP = cc -x assembler-with-cpp -E -P -std=c99 +XOBJS = +XLIBS = ../c-libs/dl/libunix-dynload.a +LD_LIBS = +XDEFS = +BASE_DEFS = +DEFS = $(XDEFS) $(BASE_DEFS) -DARCH_X86 -DDSIZE_32 -DALIGN_STACK_16 \ + -DOPSYS_UNIX -DOPSYS_DARWIN -DGNU_ASSEMBLER -DDLOPEN -DINDIRECT_CFUNC +TARGET = X86 +VERSION = v-x86-darwin +RUNTIME = run.x86-darwin +RUNTIMEX = runx.x86-darwin + +all: + ($(MAKE) RUNTIME="$(RUNTIME)" VERSION="$(VERSION)" CC="$(CC)" CFLAGS="$(CFLAGS)" CPP="$(CPP)" AS="$(AS)" TARGET=$(TARGET) DEFS="$(DEFS)" XOBJS="$(XOBJS)" XLIBS="$(XLIBS)" LD_LIBS="$(LD_LIBS)" $(RUNTIME)) + ($(MAKE) RUNTIME="$(RUNTIMEX)" VERSION="$(VERSION)" CC="$(CC)" CFLAGS="$(CFLAGS)" CPP="$(CPP)" TARGET=$(TARGET) DEFS="$(DEFS)" XOBJS="$(XOBJS)" XLIBS="$(XLIBS)" LD_LIBS="$(LD_LIBS)" LDFLAGS="-Xlinker -r" $(RUNTIMEX)) diff --git a/base/runtime/objs/mk.x86-freebsd b/base/runtime/objs/mk.x86-freebsd new file mode 100644 index 0000000..a5ebf4c --- /dev/null +++ b/base/runtime/objs/mk.x86-freebsd @@ -0,0 +1,32 @@ +# mk.x86-freebsd +# +# makefile for "Free BSD" on the x86 architecture, which is a BSD 4.4 clone. +# + +SHELL = /bin/sh + +MAKE = make + +AS = as --32 +CC = cc -std=gnu99 +CFLAGS = -O2 -m32 +CPP = cc -x assembler-with-cpp -E -P +#CPP = /usr/bin/cpp -P +ARFLAGS = Trcv + +XOBJS = +XLIBS = ../c-libs/dl/libunix-dynload.a +LD_LIBS = -lm +BASE_DEFS = +DEFS = $(BASE_DEFS) -DARCH_X86 -DDSIZE_32 -DALIGN_STACK_16 \ + -DOPSYS_UNIX -DOPSYS_FREEBSD -DGNU_ASSEMBLER -DDLOPEN -DINDIRECT_CFUNC +TARGET = X86 +VERSION = v-x86-freebsd +RUNTIME = run.x86-freebsd +RUNTIME_SO = run.x86-freebsd.so +RUNTIME_A = run.x86-freebsd.a + +all: + ($(MAKE) RUNTIME="$(RUNTIME)" VERSION="$(VERSION)" MAKE="$(MAKE)" AS="$(AS)" CC="$(CC)" CFLAGS="$(CFLAGS)" CPP="$(CPP)" TARGET=$(TARGET) DEFS="$(DEFS)" XOBJS="$(XOBJS)" XLIBS="$(XLIBS)" LD_LIBS="$(LD_LIBS)" $(RUNTIME)) +# ($(MAKE) RUNTIME="$(RUNTIME_SO)" VERSION="$(VERSION)" MAKE="$(MAKE)" AS="$(AS)" CC="$(CC)" CFLAGS="$(CFLAGS)" CPP="$(CPP)" TARGET=$(TARGET) DEFS="$(DEFS)" XOBJS="$(XOBJS)" XLIBS="$(XLIBS)" LD_LIBS="$(LD_LIBS)" LDFLAGS="-shared" $(RUNTIME_SO)) +# ($(MAKE) RUNTIME_A="$(RUNTIME_A)" VERSION="$(VERSION)" MAKE="$(MAKE)" AS="$(AS)" CC="$(CC)" CFLAGS="$(CFLAGS)" CPP="$(CPP)" TARGET=$(TARGET) DEFS="$(DEFS)" XOBJS="$(XOBJS)" XLIBS="$(XLIBS)" LD_LIBS="$(LD_LIBS)" LDFLAGS="" $(RUNTIME_A)) diff --git a/base/runtime/objs/mk.x86-linux b/base/runtime/objs/mk.x86-linux new file mode 100644 index 0000000..7b2d86a --- /dev/null +++ b/base/runtime/objs/mk.x86-linux @@ -0,0 +1,30 @@ +# mk.x86-linux +# + +SHELL = /bin/sh + +MAKE = make +AS = as --32 +CC = gcc -std=gnu99 +CFLAGS = -O2 -m32 +CPP = gcc -x assembler-with-cpp -E -P +#XOBJS = xmonitor.o +#LD_LIBS = -lX11 +#BASE_DEFS = -DHEAP_MONITOR +XOBJS = +XLIBS = ../c-libs/dl/libunix-dynload.a +LD_LIBS = -lm -ldl +XDEFS = +BASE_DEFS = +DEFS = $(XDEFS) $(BASE_DEFS) -DARCH_X86 -DDSIZE_32 -DALIGN_STACK_16 \ + -DOPSYS_UNIX -DOPSYS_LINUX -D_GNU_SOURCE -DGNU_ASSEMBLER -DDLOPEN -DINDIRECT_CFUNC -DINDIRECT_CFUNC +TARGET = X86 +VERSION = v-x86-linux +RUNTIME = run.x86-linux +RUNTIME_SO = run.x86-linux.so +RUNTIME_A = run.x86-linux.a + +all: + ($(MAKE) RUNTIME="$(RUNTIME)" VERSION="$(VERSION)" AS="$(AS)" CC="$(CC)" CFLAGS="$(CFLAGS)" CPP="$(CPP)" TARGET=$(TARGET) DEFS="$(DEFS)" XOBJS="$(XOBJS)" XLIBS="$(XLIBS)" LD_LIBS="$(LD_LIBS)" $(RUNTIME)) + ($(MAKE) RUNTIME="$(RUNTIME_SO)" VERSION="$(VERSION)" AS="$(AS)" CC="$(CC)" CFLAGS="$(CFLAGS)" CPP="$(CPP)" TARGET=$(TARGET) DEFS="$(DEFS)" XOBJS="$(XOBJS)" XLIBS="$(XLIBS)" LD_LIBS="$(LD_LIBS)" LDFLAGS="-shared" $(RUNTIME_SO)) + ($(MAKE) RUNTIME_A="$(RUNTIME_A)" VERSION="$(VERSION)" AS="$(AS)" CC="$(CC)" CFLAGS="$(CFLAGS)" CPP="$(CPP)" TARGET=$(TARGET) DEFS="$(DEFS)" XOBJS="$(XOBJS)" XLIBS="$(XLIBS)" LD_LIBS="$(LD_LIBS)" LDFLAGS="" $(RUNTIME_A)) diff --git a/base/runtime/objs/mk.x86-linux-pthreads b/base/runtime/objs/mk.x86-linux-pthreads new file mode 100644 index 0000000..688641e --- /dev/null +++ b/base/runtime/objs/mk.x86-linux-pthreads @@ -0,0 +1,25 @@ +# mk.x86-linux +# + +SHELL = /bin/sh + +MAKE = make +CC = gcc -std=c99 +CFLAGS = -O2 -D_REENTRANT +CPP = gcc -x assembler-with-cpp -E -P +#XOBJS = xmonitor.o +#LD_LIBS = -lX11 +#BASE_DEFS = -DHEAP_MONITOR +XOBJS = +XLIBS = ../c-libs/dl/libunix-dynload.a +LD_LIBS = -lm -ldl -lpthread +XDEFS = +BASE_DEFS = +DEFS = $(XDEFS) $(BASE_DEFS) -DARCH_X86 -DDSIZE_32 -DALIGN_STACK_16 \ + -DOPSYS_UNIX -DOPSYS_LINUX -D_POSIX_SOURCE -D_BSD_SOURCE -DGNU_ASSEMBLER -DDLOPEN -DINDIRECT_CFUNC +TARGET = X86 +VERSION = v-x86-linux-pthreads +RUNTIME = run.x86-linux + +all: + ($(MAKE) RUNTIME="$(RUNTIME)" VERSION="$(VERSION)" CC="$(CC)" CFLAGS="$(CFLAGS)" CPP="$(CPP)" TARGET=$(TARGET) DEFS="$(DEFS)" XOBJS="$(XOBJS)" XLIBS="$(XLIBS)" LD_LIBS="$(LD_LIBS)" $(RUNTIME)) diff --git a/base/runtime/objs/mk.x86-netbsd b/base/runtime/objs/mk.x86-netbsd new file mode 100644 index 0000000..fc716e7 --- /dev/null +++ b/base/runtime/objs/mk.x86-netbsd @@ -0,0 +1,27 @@ +# mk.x86-netbsd +# +# makefile for NetBSD (version 3.x), which is a BSD 4.4 clone. +# + +SHELL = /bin/sh + +MAKE = gmake + +ARFLAGS = Trcv +CC = gcc -ansi +CFLAGS = -O2 +CPP = gcc -x assembler-with-cpp -E -P +#CPP = /usr/bin/cpp -P + +XOBJS = +XLIBS = ../c-libs/dl/libunix-dynload.a +LD_LIBS = -lm +BASE_DEFS = +DEFS = $(XDEFS) $(BASE_DEFS) -DARCH_X86 -DDSIZE_32 -DALIGN_STACK_16 \ + -DOPSYS_UNIX -DOPSYS_NETBSD -DDLOPEN -DINDIRECT_CFUNC +TARGET = X86 +VERSION = v-x86-netbsd +RUNTIME = run.x86-netbsd + +all: + ($(MAKE) RUNTIME="$(RUNTIME)" VERSION="$(VERSION)" MAKE="$(MAKE)" CC="$(CC)" CFLAGS="$(CFLAGS)" CPP="$(CPP)" TARGET=$(TARGET) DEFS="$(DEFS)" XOBJS="$(XOBJS)" XLIBS="$(XLIBS)" LD_LIBS="$(LD_LIBS)" $(RUNTIME)) diff --git a/base/runtime/objs/mk.x86-openbsd b/base/runtime/objs/mk.x86-openbsd new file mode 100644 index 0000000..fc6ee28 --- /dev/null +++ b/base/runtime/objs/mk.x86-openbsd @@ -0,0 +1,27 @@ +# mk.x86-openbsd +# +# makefile for OpenBSD (version 4.x), which is a BSD 4.4 clone. +# + +SHELL = /bin/sh + +MAKE = gmake + +ARFLAGS = Trcv +CC = gcc -ansi +CFLAGS = -O2 +CPP = gcc -x assembler-with-cpp -E -P +#CPP = /usr/bin/cpp -P + +XOBJS = +XLIBS = ../c-libs/dl/libunix-dynload.a +LD_LIBS = -lm +BASE_DEFS = +DEFS = $(XDEFS) $(BASE_DEFS) -DARCH_X86 -DDSIZE_32 -DALIGN_STACK_16 \ + -DOPSYS_UNIX -DOPSYS_OPENBSD -DDLOPEN -DINDIRECT_CFUNC +TARGET = X86 +VERSION = v-x86-openbsd +RUNTIME = run.x86-openbsd + +all: + ($(MAKE) RUNTIME="$(RUNTIME)" VERSION="$(VERSION)" MAKE="$(MAKE)" CC="$(CC)" CFLAGS="$(CFLAGS)" CPP="$(CPP)" TARGET=$(TARGET) DEFS="$(DEFS)" XOBJS="$(XOBJS)" XLIBS="$(XLIBS)" LD_LIBS="$(LD_LIBS)" $(RUNTIME)) diff --git a/base/runtime/objs/mk.x86-solaris b/base/runtime/objs/mk.x86-solaris new file mode 100644 index 0000000..bbbfc23 --- /dev/null +++ b/base/runtime/objs/mk.x86-solaris @@ -0,0 +1,30 @@ +# mk.x86-solaris +# 90% mk.sparc-solaris, 10% mk.x86-linux +# AS _MUST_ be gas. Sun's Solaris x86 /usr/ccs/bin/as doesn't cut it, +# due to its bizarre input syntax. +# CPP _MUST_ include -ansi if gcc -E is used, to prevent "$" from +# being considered part of identifers. + +SHELL = /bin/sh + +MAKE = /usr/ccs/bin/make +AS = gas +AR = /usr/ccs/bin/ar +GCC = gcc +CC = $(GCC) +CFLAGS = -O2 +CPP = gcc -x assembler-with-cpp -E -P +RANLIB = $(AR) ts + +XOBJS = +XLIBS = +LD_LIBS = -lm -lsocket -lnsl +BASE_DEFS = -D__STDC__=0 +DEFS = $(BASE_DEFS) -DARCH_X86 -DDSIZE_32 -DALIGN_STACK_16 \ + -DOPSYS_UNIX -DOPSYS_SOLARIS -DINDIRECT_CFUNC +TARGET = X86 +VERSION = v-x86-solaris +RUNTIME = run.x86-solaris + +all: + ($(MAKE) RUNTIME="$(RUNTIME)" MAKE="$(MAKE)" VERSION="$(VERSION)" CC="$(CC)" CFLAGS="$(CFLAGS)" CPP="$(CPP)" AS="$(AS)" AR="$(AR)" RANLIB="$(RANLIB)" TARGET=$(TARGET) DEFS="$(DEFS)" XOBJS="$(XOBJS)" XLIBS="$(XLIBS)" LD_LIBS="$(LD_LIBS)" $(RUNTIME)) diff --git a/base/runtime/objs/mk.x86-win32 b/base/runtime/objs/mk.x86-win32 new file mode 100644 index 0000000..9635ede --- /dev/null +++ b/base/runtime/objs/mk.x86-win32 @@ -0,0 +1,29 @@ +# mk.x86-win32 +# +# COPYRIGHT (c) 2018 The Fellowship of SML/NJ (http://www.smlnj.org) +# All rights reserved. +# + +SHELL = + +MAKEFILE = makefile.win32 +MAKE = nmake /nologo /F$(MAKEFILE) +CC = cl /nologo +CFLAGS = /Zi +CPP = cl /E /EP +XOBJS = +XLIBS = +XCLIBS = +#XCLIBS = ..\c-libs\smlnj-ccalls\libsmlnj-ccalls.lib +LD_LIBS = +#XDEFS = /DC_CALLS +#XDEFS = /DDEBUG_TRACE_CCALL +XDEFS = +BASE_DEFS = +DEFS = $(XDEFS) $(BASE_DEFS) /DARCH_X86 /DSIZE_32 /DALIGN_STACK_16 /DOPSYS_WIN32 /DVREGS /DINDIRECT_CFUNC /DDLOPEN +TARGET = X86 +VERSION = v-x86-win32 +RUNTIME = run.x86-win32.exe + +all: + ($(MAKE) MAKE="$(MAKE)" RUNTIME="$(RUNTIME)" VERSION="$(VERSION)" CC="$(CC)" CFLAGS="$(CFLAGS)" CPP="$(CPP)" TARGET=$(TARGET) DEFS="$(DEFS)" XOBJS="$(XOBJS)" XLIBS="$(XLIBS)" LD_LIBS="$(LD_LIBS)" XCLIBS="$(XCLIBS)" $(RUNTIME)) diff --git a/bin/.arch-n-opsys b/bin/.arch-n-opsys new file mode 100755 index 0000000..434a7e8 --- /dev/null +++ b/bin/.arch-n-opsys @@ -0,0 +1,204 @@ +#!/bin/sh +# +# .arch-n-opsys [-32 | -64] -- get architecture and system info +# +# Running `eval .arch-n-opsys` will define the following shell variables: +# +# ARCH -- one of ppc, sparc, or x86 +# OPSYS -- one of aix, cygwin, darwin, freebsd, linux, mklinux, netbsd, +# sunos, solaris, win32 +# HEAP_SUFFIX -- usually $ARCH-$OPSYS, but in some cases the OPSYS is replaced +# by $HEAP_OPSYS +# + +export PATH +PATH="/bin:/usr/bin" + +# the default size; this is set by the config/install.sh script +# +SIZE=64 + +# check for word-size override +# +case x"$1" in + x-32) SIZE=32 ;; + x-64) SIZE=64 ;; + *) ;; +esac + +# pick_arch arch32 arch64 +# +pick_arch() { + if [ $SIZE = 32 ] ; then + echo $1 + else + echo $2 + fi +} + +case `uname -s` in + SunOS) + case `uname -r` in + 4.*) + OPSYS=sunos + case `/usr/bin/arch` in + sun4) ARCH=sparc;; + *) exit 1;; + esac + ;; + 5.*) + OPSYS=solaris + case `uname -p` in + sparc) ARCH=sparc;; + *86) ARCH=x86;; + *) exit 1;; + esac + ;; + *) exit 1;; + esac + ;; + AIX) + OPSYS=aix + ARCH=$(pick_arch ppc ppc64) + ;; + Darwin) + case `uname -p` in + powerpc) + ARCH=$(pick_arch ppc ppc64) + case `uname -r` in + 8*) OPSYS=darwin; HEAP_OPSYS=darwin ;; # MacOS X 10.4 Tiger + 9*) OPSYS=darwin; HEAP_OPSYS=darwin ;; # MacOS X 10.5 Leopard + 10*) OPSYS=darwin; HEAP_OPSYS=darwin ;; # MacOS X 10.6 Snow Leopard + *) exit 1;; + esac;; + i386) + ARCH=$(pick_arch x86 amd64) + REQUIRE_64BIT=no + OPSYS=darwin + HEAP_OPSYS=darwin + case `uname -r` in + 9*) HEAP_OPSYS=darwinz ;; # MacOS X 10.5 Leopard + 10*) ;; # MacOS X 10.6 Snow Leopard + 11*) ;; # MacOS X 10.7 Lion + 12*) ;; # MacOS X 10.8 Mountain Lion + 13*) ;; # MacOS X 10.9 Mavericks + 14*) ;; # MacOS X 10.10 Yosemite + 15*) ;; # MacOS X 10.11 El Capitan + 16*) ;; # macOS 10.12 Sierra + 17*) ;; # macOS 10.13 High Sierra + 18*) ;; # macOS 10.14 Mojave + 19*) REQUIRE_64BIT=yes ;; # macOS 10.15 Catalina + 20*) REQUIRE_64BIT=yes ;; # macOS 11 Big Sur + 21*) REQUIRE_64BIT=yes ;; # macOS 12 Monterey + 22*) REQUIRE_64BIT=yes ;; # macOS 13 Ventura + 23*) REQUIRE_64BIT=yes ;; # macOS 14 Sonoma + *) exit 1 ;; + esac + if [ x"$REQUIRE_64BIT" = xyes -a $SIZE = 32 ] ; then + # only 64-bit executables are supported in recent macOS versions + exit 1; + fi + ;; + arm) + # we use Rosetta since we do not have native arm64 support in + # the legacy version of SML/NJ + ARCH="amd64" + OPSYS=darwin; + HEAP_OPSYS=darwin + ;; + esac + ;; + Linux) + OPSYS=linux + case `uname -m` in + *86) + ARCH=x86 + # version 4.9 is the oldest "supported" version of Linux, so we + # only go back to the 3.x versions + case `uname -r` in + 3.*) ;; # 2011 -- 2015 + 4.*) ;; # 2015 -- 2018 + 5.*) ;; # 2019 -- 2022 + 6.*) ;; # 2022 -- + *) exit 1 ;; + esac + ;; + x86_64) + ARCH=$(pick_arch x86 amd64) + ;; + ppc) + ARCH=$(pick_arch ppc ppc64) + case `uname -r` in + *osfmach*) OPSYS=mklinux ;; + *) ;; + esac + ;; + *) exit 1;; + esac + ;; + FreeBSD) + OPSYS=freebsd + HEAP_OPSYS=bsd + case `uname -m` in + *86) ARCH=x86 ;; + x86_64) ARCH=$(pick_arch x86 amd64) ;; + amd64) ARCH=$(pick_arch x86 amd64) ;; + *) exit 1 ;; + esac + ;; + NetBSD) + case `uname -r` in + 1.*) exit 1 ;; + 2.*) exit 1 ;; + *) OPSYS=netbsd ;; + esac + HEAP_OPSYS=bsd + case `uname -p` in + *86) ARCH=x86;; + x86_64) ARCH=$(pick_arch x86 amd64) ;; + powerpc) ARCH=ppc;; + sparc) ARCH=sparc;; + *) exit 1;; + esac + ;; + OpenBSD) + OPSYS=openbsd + HEAP_OPSYS=bsd + case `uname -p` in + *86) ARCH=x86;; + x86_64) ARCH=$(pick_arch x86 amd64) ;; + powerpc) ARCH=ppc;; + *) exit 1;; + esac + ;; + Windows_NT) + OPSYS=win32 + case `uname -m` in + *86) ARCH=x86;; + *) exit 1;; + esac + ;; + CYGWIN_NT*) + # If the environment variable SMLNJ_WINDOWS_RUNTIME is defined, + # then we use Win32 as the runtime environment. + if [ "$SMLNJ_WINDOWS_RUNTIME" != "" ]; then + OPSYS=win32 + else + OPSYS=cygwin + fi + case `uname -m` in + *86) ARCH=x86;; + x86_64) ARCH=$(pick_arch x86 amd64) ;; + *) exit 1;; + esac + ;; + *) exit 1;; +esac + +if [ "$HEAP_OPSYS" = "" ]; then + HEAP_SUFFIX="$ARCH-$OPSYS" +else + HEAP_SUFFIX="$ARCH-$HEAP_OPSYS" +fi + +echo "ARCH=$ARCH; OPSYS=$OPSYS; HEAP_SUFFIX=$HEAP_SUFFIX" diff --git a/bin/.heap/ml-antlr.amd64-linux b/bin/.heap/ml-antlr.amd64-linux new file mode 100644 index 0000000..0f49369 Binary files /dev/null and b/bin/.heap/ml-antlr.amd64-linux differ diff --git a/bin/.heap/ml-burg.amd64-linux b/bin/.heap/ml-burg.amd64-linux new file mode 100644 index 0000000..f7932ba Binary files /dev/null and b/bin/.heap/ml-burg.amd64-linux differ diff --git a/bin/.heap/ml-lex.amd64-linux b/bin/.heap/ml-lex.amd64-linux new file mode 100644 index 0000000..bb8033e Binary files /dev/null and b/bin/.heap/ml-lex.amd64-linux differ diff --git a/bin/.heap/ml-ulex.amd64-linux b/bin/.heap/ml-ulex.amd64-linux new file mode 100644 index 0000000..bae58cb Binary files /dev/null and b/bin/.heap/ml-ulex.amd64-linux differ diff --git a/bin/.heap/ml-yacc.amd64-linux b/bin/.heap/ml-yacc.amd64-linux new file mode 100644 index 0000000..12fb759 Binary files /dev/null and b/bin/.heap/ml-yacc.amd64-linux differ diff --git a/bin/.heap/sml.amd64-linux b/bin/.heap/sml.amd64-linux new file mode 100644 index 0000000..a274c8b Binary files /dev/null and b/bin/.heap/sml.amd64-linux differ diff --git a/bin/.link-sml b/bin/.link-sml new file mode 100755 index 0000000..4b6b399 --- /dev/null +++ b/bin/.link-sml @@ -0,0 +1,117 @@ +#!/bin/sh +# +# Copyright 2019 The Fellowship of SML/NJ (http://www.smlnj.org) +# All rights reserved. +# +# The standard "link" script for SML/NJ +# + +# +# for /bin/ksh, disable reading user's environment file +# +unset ENV + +# the default size; this is set by the config/install.sh script +# +SIZE_OPT="-"64 + +RUN="" +BOOT="" +HEAP="sml" + +# +# Process command line arguments +# +while [ "$#" != "0" ]; do + arg=$1 + case "$arg" in + -32) SIZE_OPT=$arg ; shift ;; + -64) SIZE_OPT=$arg ; shift ;; + @SMLrun=*) + RUN=`echo "$arg" | sed 's/@SMLrun=//'` + shift + ;; + @SMLboot=*) + BOOT=`echo "$arg" | sed 's/@SMLboot=//'` + shift + ;; + @SMLheap=*) + HEAP=`echo "$arg" | sed 's/@SMLheap=//'` + shift + ;; + @SMLversion) + echo "$CMD 110.99.5" + exit 0 + ;; + *) + break + ;; + esac +done + +############################################################################# +# +# BEGIN SITE SPECIFIC STUFF +# +############################################################################# + +# +# SITE SPECIFIC CONFIGURATION INFO +# + +# the path of the directory in which executables (like this file) are kept. +if [ x"$SMLNJ_HOME" = x ] ; then + BIN_DIR="/home/npease/COS301-HW5/bin" + if [ ! -d "$BIN_DIR" ]; then + cmddir=`dirname $0` + case "$cmddir" in + /* ) BIN_DIR="$cmddir";; + * ) BIN_DIR=`cd $cmddir; pwd` ;; + esac + fi +else + if [ x"$CM_PATHCONFIG" = x ] ; then + CM_PATHCONFIG="$SMLNJ_HOME"/lib/pathconfig + export CM_PATHCONFIG + fi + BIN_DIR="$SMLNJ_HOME"/bin +fi + +# the path of the directory in which the runtime system executables are kept. +RUN_DIR=$BIN_DIR/.run + +# +# the following could be replaced with some site specific code +# +ARCH_N_OPSYS=`"$BIN_DIR/.arch-n-opsys" $SIZE_OPT` +if [ "$?" != "0" ]; then + echo "$CMD: unable to determine architecture/operating system" + exit 1 +fi +eval $ARCH_N_OPSYS + +############################################################################# +# +# END SITE SPECIFIC STUFF +# +############################################################################# + +# if the runtime was not specified, use the default +if [ x"$RUN" = x ]; then + RUN="$RUN_DIR/run.$ARCH-$OPSYS" +fi + +if [ ! -x "$RUN" ]; then + echo "$CMD: cannot find runtime system $RUN" + exit 1 +fi + +if [ "$BOOT" = "" ]; then + echo "@SMLboot= must be specified." + exit 1 +fi + +# +# run the sucker! +# +exec "$RUN" @SMLboot="$BOOT" @SMLheap="$HEAP" "$@" diff --git a/bin/.run-sml b/bin/.run-sml new file mode 100755 index 0000000..0bc63a7 --- /dev/null +++ b/bin/.run-sml @@ -0,0 +1,265 @@ +#!/bin/sh +# +# Copyright 2019 The Fellowship of SML/NJ (http://www.smlnj.org) +# All rights reserved. +# +# The standard driver for SML/NJ under the new runtime system +# + +CMD=`basename "$0"` + +# +# for /bin/ksh, disable reading user's environment file +# +unset ENV + +############################################################################# +# +# BEGIN SITE SPECIFIC STUFF +# +############################################################################# + +# +# SITE SPECIFIC CONFIGURATION INFO +# + +# On cygwin, make sure SMLNJ_HOME is given a POSIX-style pathname. +if [ x"$SMLNJ_HOME" != x ] ; then + if [ x"$SMLNJ_CYGWIN_RUNTIME" != x ] ; then + if [ "`uname -o`" = "Cygwin" ] ; then + # Convert to Unix style, absolute pathname. + SMLNJ_HOME=`cygpath -u -a "$SMLNJ_HOME"` + fi + fi +fi + +# the path of the directory in which executables (like this file) are kept. +if [ x"$SMLNJ_HOME" = x ] ; then + BIN_DIR="/home/npease/COS301-HW5/bin" + if [ ! -d "$BIN_DIR" ]; then + cmddir=`dirname "$0"` + case "$cmddir" in + /* ) BIN_DIR="$cmddir";; + * ) BIN_DIR=`cd $cmddir; pwd` ;; + esac + fi +else + if [ x"$CM_PATHCONFIG" = x ] ; then + CM_PATHCONFIG=${SMLNJ_HOME}/lib/pathconfig + export CM_PATHCONFIG + fi + BIN_DIR=${SMLNJ_HOME}/bin +fi + +# the path of the directory in which the runtime system executables are kept. +RUN_DIR=$BIN_DIR/.run + +# the path of the directory in which the heap images are kept. +HEAP_DIR=$BIN_DIR/.heap + +############################################################################# +# +# END SITE SPECIFIC STUFF +# +############################################################################# + +# special shortcut for frequent use (and for Linux' binfmt) +if [ `basename "$0"` = sml ] ; then + case "$1" in + /* | ./* | ../* ) + case "$1" in + *.cm | *.sml | *.sig | *.fun ) + ;; + * ) + HEAP="@SMLload=$1" + shift + ;; + esac + ;; + esac +fi + +ALLOC="" + +# the default size; this is set by the config/install.sh script +# +SIZE_OPT="-"64 + +# if "yes" then we report the heap suffix and quit +# +REPORT_SUFFIX=no + +# +# Process command line arguments +# +while [ "$#" != "0" ]; do + arg=$1 + case "$arg" in + -32) shift; SIZE_OPT=$arg ;; + -64) shift; SIZE_OPT=$arg ;; + @SMLrun=*) + shift + RUN=`echo $arg | sed 's/@SMLrun=//'` + ;; + @SMLload=*) + shift + HEAP=$arg + ;; + @SMLappl) + shift + if [ "$#" = "0" ]; then + echo "$CMD: missing argument for @SMLappl option" + exit 1 + fi + APPL=$1 + shift + ;; + @SMLversion) + echo "$CMD 110.99.5" + exit 0 + ;; + @SMLwordsize) + case $SIZE_OPT in + -32) echo "32" ;; + -64) echo "64" ;; + esac + exit 0 + ;; + @SMLsuffix) + shift + REPORT_SUFFIX=yes + ;; + @SMLalloc=*) + shift + ALLOC=$arg + ;; + *) + break + ;; + esac +done + +# +# the following could be replaced with some site specific code +# +ARCH_N_OPSYS=`"$BIN_DIR/.arch-n-opsys" $SIZE_OPT` +if [ "$?" != "0" ]; then + echo "$CMD: unable to determine architecture/operating system" + exit 1 +fi +eval $ARCH_N_OPSYS + +# if the `@SMLsuffix` option was specified, then report the suffix and quit +# +if [ $REPORT_SUFFIX = yes ] ; then + echo $HEAP_SUFFIX + exit 0 +fi + +# +# Try to figure out the CPU's cache size and set the allocation area +# size accordingly. This is majorly important for Celeron systems +# which suffer badly when the allocation area is too big. +# +if [ "$ALLOC" = "" ] ; then + if [ -f /proc/cpuinfo ] ; then + # "head" is called to make sure we consider only one matching line. + # (On linux SMP systems there is more than one such line.) + cache=`grep -F 'cache size' $smlfile <$cmfile <$mlscript < ignore (OS.Process.exit OS.Process.failure) + | SOME l => let + val s = TextIO.openOut "$depends" + fun pr { derived = true, file, class } = () + | pr { file, ... } = TextIO.output (s, " \\\\\\n " ^ file) + in + TextIO.output (s, "${delims}\\n${target}:"); + app pr l; + TextIO.output (s, "\\n${delime}\\n"); + TextIO.closeOut s; + ignore (OS.Process.exit OS.Process.success) + end +end +stop + +if $SML $SIZE_OPT '$smlnj/cm.cm' $dulist $mlscript ; then + # + # remove previous result of ml-makedepend + # (other cmfile/target combinations are unaffected) + # + awk "BEGIN { c = 1; s = \"${delims}\"; e = \"${delime}\"; } + (\$0 == s) { c = 0; next; } + (\$0 == e) { c = 1; next; } + (c == 1) { print }" <$mf >$tmpmf + cat $tmpmf $depends >$mf + rm $tmpmf $depends +else + echo $thisscript: CM dependency analysis failed + exit 1 +fi + +exit 0 diff --git a/bin/ml-ulex b/bin/ml-ulex new file mode 120000 index 0000000..fe459c2 --- /dev/null +++ b/bin/ml-ulex @@ -0,0 +1 @@ +.run-sml \ No newline at end of file diff --git a/bin/ml-yacc b/bin/ml-yacc new file mode 120000 index 0000000..fe459c2 --- /dev/null +++ b/bin/ml-yacc @@ -0,0 +1 @@ +.run-sml \ No newline at end of file diff --git a/bin/sml b/bin/sml new file mode 120000 index 0000000..fe459c2 --- /dev/null +++ b/bin/sml @@ -0,0 +1 @@ +.run-sml \ No newline at end of file diff --git a/boot.amd64-unix.tgz b/boot.amd64-unix.tgz new file mode 100644 index 0000000..49771d8 Binary files /dev/null and b/boot.amd64-unix.tgz differ diff --git a/ckit.tgz b/ckit.tgz new file mode 100644 index 0000000..aad4157 Binary files /dev/null and b/ckit.tgz differ diff --git a/ckit/BUGS b/ckit/BUGS new file mode 100644 index 0000000..43203da --- /dev/null +++ b/ckit/BUGS @@ -0,0 +1,247 @@ +ckit Bug List +============= +[Last updated: 3/31/00] + +---------------------------------------------------------------------- +NUMBER: 11 +SUBMITTER: Kathleen Fisher +DATE: 12/10/99 +TEST: +STATUS: request for more complete example +DESCRIPTION: + +The code: +void write_myArray (char *f, char *space, int size) +{ + Sfio_t *fp; + int *temp_space = (int *) space; + int i; + + fp = openfile (f,"w"); + for (i = 0; i<2; i++) + sfprintf (fp,"%d",&(*temp_space)[i]); +} + +passes through the ckit compiler, but cc reports: + +"directory008.c", line 866: error(1138): expression must have + pointer-to-object type + sfprintf (fp,"%d",&(*temp_space)[i]); + +COMMENT: Could not compile example (it is incomplete); +Kathleen indicates she can't reproduce this behaviour. + + +---------------------------------------------------------------------- +NUMBER: 15 +SUBMITTER: Alexey Loginov +DATE: 4/17/00 +TEST: +STATUS: open +DESCRIPTION: union bitfields + +According to Harbison and Steele (top of p.141), bitfields in unions are +allowed in ISO C, but not "traditional" C. Neither "gcc -ansi -pedantic", +nor cc complains. + + input: union { int u16:12; } u; + + output: "test.c":1.2-3.4: error: union member has size spec + + union t1 { int u16; }; + union t1 u; + + +---------------------------------------------------------------------- +NUMBER: 16 +SUBMITTER: Alexey Loginov +DATE: 4/17/00 +TEST: +STATUS: open +DESCRIPTION: struct bitfields + +This is not a big deal, since ckit implements a superset of ANSI C, +but I thought I'd mention it anyway. Ckit allows the following: + +struct S { + char a:4; /* <-- Illegal field for ANSI C, but not "traditional" C */ + float f:4; /* <-- Illegal field */ +}; + + +---------------------------------------------------------------------- +NUMBER: 17 +SUBMITTER: Alexey Loginov +DATE: 4/14/00 +TEST: +STATUS: open +DESCRIPTION: large initializer value + + Input: + ------ + long i = 0xf8000000; + unsigned long i = 0xf8000000ul; + + Output: + ------- + "test.c":1.11-21: error: large int const + "test.c":2.19-31: error: large int const + "???": warning: additional errors suppressed. + "???": warning: additional warnings suppressed. + + long i=0; + unsigned long i=0; + + Note: + ----- + gcc -Wall -ansi -pedantic: no complaint + cc: warning: initializer does not fit or is out of range: 0xf8000000 + +COMMENT: +The problem seems to be due to the fact that LargeInt (i.e. Int32 - +signed integer type) is used for storing the value, and LargeInt +raises exception Overflow (in c.lex) in the above cases. IntInf does +not overflow on such numbers, and so may be the right thing to use, +however that requires changing the code to use IntInf for IntConst. + +Ironically, LargeInt parses hexadecimal strings fine but IntInf does +not handle the 0x prefix for some reason: + +- let val v = valOf(StringCvt.scanString(LargeInt.scan StringCvt.HEX) "0xff") += in print ((LargeInt.toString v)^"\n") end; +255 +val it = () : unit +- let val v = valOf(StringCvt.scanString(IntInf.scan StringCvt.HEX) "0xff") += in print ((IntInf.toString v)^"\n") end; +0 +val it = () : unit +- let val v = valOf(StringCvt.scanString(IntInf.scan StringCvt.HEX) "ff") += in print ((IntInf.toString v)^"\n") end; +255 + + +---------------------------------------------------------------------- +NUMBER: 18 +SUBMITTER: Alexey Loginov +DATE: 4/14/00 +TEST: +STATUS: open +DESCRIPTION: old style varargs (INCONVENIENCE) + +This is something we have an easy workaround for (i.e. using cc -E +instead of gcc -E to preprocess) but we mention it in case you think +other people might care about this. When varargs.h is included by +gcc -E, old style parameter passing is transformed to this: + + int foo(a) + a;... + { } + + This is not accepted by ckit. + Remark: non-ANSI (gcc gives warning) + Solution: use cc -E. + + +---------------------------------------------------------------------- +NUMBER: 19 +SUBMITTER: Alexey Loginov +DATE: 4/20/00 +TEST: +STATUS: fixed, DBM, 6/8/00 + Return fixed-up id at the same time as the fixed type (fun processDecr + in ast/build-ast.sml) +DESCRIPTION: incomplete types + +A change to build-ast.sml since ckit 1.0b3 is annotating types of ids +with incomplete types at times. Our problem is that we use the ast in +a separate pass, so we don't see the local symbol tables (unless we +redo the work of their construction). We collect types of declared +variables, which we then use in instrumentation. Incomplete types +give us compilation errors. + +The fixed up type is inserted into the symbol table under line 911, +however, the type in the id of the declaration is still the incomplete +type. Subsequent uses of the id will by typed correctly but not the +declaration. The id created on line 916 should be used instead of the +original id. + +Input: + +int ia[] = {1,2,3}; + +int main() { +int ib[] = {1,2,3,4,5}; +} + +Types I see in ids for ia and ib in the declarations above are both +int[]. :-) + + +---------------------------------------------------------------------- +NUMBER: 20 +SUBMITTER: Alexey Loginov +DATE: 5/24/00 +TEST: +STATUS: open +DESCRIPTION: long long + +1. Ckit doesn't really handle long long (it just recognizes the type, but + does not handle literals of "long long" size: seems to be due + to using LargeInt. See "unsigned long" bug above. + +2. Should keep around "L", "LL", "UL" suffix to int literals: + input: 1LL<<40 + output: 1<<40 /* compiler complains */ + +3. Tilde and negative: + input: 2e-5 + output: 2E~05 + +Negative numbers are represented with ~ in SML, so that Real.toString +called in ppReal prints the ~ in the exponent. + + +---------------------------------------------------------------------- +NUMBER: 21 +SUBMITTER: Alexey Loginov +DATE: 5/25/00 +TEST: +STATUS: open +DESCRIPTION: spurious error messages + +Input: + struct S { + float j; + int:24; + char * k; + } s = {1.2,"3"}; + +Spurious Error Messages: + "stst.c":1.2-5.16: error: type of initializer is incompatible with type of lval + "stst.c":1.2-5.16: error: badly formed struct initializer: not enough initializers + +FIX: +skip over unnamed members, as per C specs: diff of build-ast.sml (old) follows: +623c623 +< let fun f ((fieldType, _, _) :: l, expr :: exprs) = +--- +> let fun f ((fieldType, SOME mem, _) :: l, expr :: exprs) = +625a626,627 +> | f ((fieldType, NONE, _) :: l, exprs) = +> f (l, exprs) + + +---------------------------------------------------------------------- +NUMBER: 22 +SUBMITTER: Alexey Loginov +DATE: 5/25/00 +TEST: +STATUS: open +DESCRIPTION: const char */char * incompatibility (INCONVENIENCE) + + input: char c[100]; const char * d = &c[5]; + c - d; + output: error: Type Error: Unacceptable operands of "-" or "--". + + general: in general, the error occurs with incompatible pointer types. + the key is to not treat "const" as incompatible. diff --git a/ckit/HISTORY b/ckit/HISTORY new file mode 100644 index 0000000..eb767b8 --- /dev/null +++ b/ckit/HISTORY @@ -0,0 +1,1050 @@ +ckit history +============ + +Release Changes +=============== + +Version 0.x, 14/Sept/99 (nch) +----------------------------- +1. BuildAst flags combined into compiler-mode and source-to-source-mode + (control of scaling, insertion of explicit coersions, reduction of assign ops + reduction of sizeof). + + BuildAst now contains the following flags and major modes (collections of + flags settings). + + (* control of buildAst modes *) + val insert_explicit_coersions : bool ref + (* Insert explicit casts at points where there are implicit type conversions. + If true, then reduce_assign_ops should also be set. *) + val insert_scaling : bool ref + (* Insert scaling computations at pointer arithmetic. *) + val reduce_sizeof : bool ref + (* Replace sizeof expressions by integer constants. *) + val reduce_assign_ops : bool ref + (* Replace assignops by simple ops and assignments. *) + val multi_file_mode : bool ref + (* Analysis mode -- allow repeated definitions. *) + + val multiFileMode: unit -> unit (* was called analysis mode *) + val compilerMode: unit -> unit + val sourceToSourceMode: unit -> unit + + +2. Warning for zero size arrays added. + +3. Error messages are now bounded. + Error now contains Error.errorsLimit and Error.warningsLimit + which can be used to limit the printing of errors and warnings. + +4. Array sizes and sizeof. + BuildAst now maintains the expressions used to define array sizes. + When printed out, these expressions are now printed. + This allows sizeof calculations to be maintained. + + Note: In principle we should do the same thing for expressions in enums + and in bitfields. However sizeof is fairly unlikely to be used in these + situations. If reduce_sizeof is false and a sizeof is encountered in these + situations, a warning message will be printed. + +5. Fixed build-ast so that if we get parse errors, we don't print type-checking + errors. The philosophy here is: first get file to parse, and only then worry + about semantic errors. + +6. Better error messages: We had mentioned capturing error messages in a + data-structure so that a filter could be installed to print out more meaningful + error messages (e.g. instead of "ENUM inserted"). This turns out to be a rather + complex exercise. The problematic error messages involving ENUM are generated + deep within ml-yacc, and dealing with these would require non-trivial rewriting + of ml-yacc (in fact I suspect that the only way to do this would be to have our + own version of ml-yacc). + + So, instead I hacked the ml-yacc specification to do slightly better error + recovery -- we now substiture "TYPE_NAME" for "ID" if there is an error + involving "ID" tokens. + + To see how this might help, recall that the problematic case is when you miss a + typedef (or the related hancock-thingy). For example, with a missing + definition of mytype, the declaration: + + mytype x; + + is tokenized as: + + ID ID; + + Now, what used to happen is that the parser would try to insert an ENUM token + (a random bad choice). Instead, the parser now tries to interpret the ID as a + TYPE_NAME, and so in effect we get: + + TYPE ID; + + and an error message: + + "foo.c": error: syntax error: replacing ID with TYPE_NAME + + I've experimented with some support for inserting heuristic help messages (an + extension to the Error struct). I think it is rather ad hoc, but let me know + how useful it is. The actual message that is printed now for the above scenario + is + + "file.c": error: syntax error: replacing ID with TYPE_NAME + Likely cause: missing typedef declaration. + + This message can be customized (it appears as a function call in the grammar + file), and others could be added. + + I've also configured build-ast so that when there are parser errors, error + reporting during type checking is switched off, since such type checking errors + tend to be quite confusing. + + +---------------------------------------------------------------------- +Version 1.0b1, 7/Dec/1999 +------------- +Error interface changed. +Top level interfaces for build-ast changed. + + +---------------------------------------------------------------------- +Version 1.0b2, 13/Jan/2000 +-------------- +Bug fixes. +Revise regression test suite. + + +Version 1.0b3, 15/Mar/2000 +------------- +Bug fixes: Bugs 1,3,5,6,7,9,10 +Made sizes (Sizes.sizes) a record and parameterized makeAst on sizes. +Added various regression tests for bugs. + + +Version 1.0, 15/Mar/2000 +------------- +* Bug fixes: Bugs 2, 4, 8, 12, 13 +* Reworked size and alignment mechanism to support parameterization + (see src/ast/sizes[-sig].sml). A utility program (src/c-util/sizes.c) + can be used to generate sizes info for a given compiler/platform + combination. + +Version 1.x (x > 0), Halloween 2001 (boo! :-), M.Blume +----------- +* Changed the "Function" constructor of type Ast.ctype to carry optional + argument identifiers. +* Changed the return type of TypeUtil.getFunction accordingly. +* Type equality ignores the argument names. +* TypeUtil.composite tries to preserve argument names but gives up + if there is a mismatch. + +====================================================================== +Bug fix history +====================================================================== + +Test: valid-programs/a40.c +Status: fixed 15/june/99 +Fix: Changed (ty, ty, signedNum CT.INT, expop) + to (ty, ty, ty, expop) + in mulDivOp (build-ast.sml) + Also change similar code in integralOp. +Email: + + From: chandra@research.bell-labs.com (Satish Chandra) + Date: Fri, 21 May 1999 14:00:28 -0500 + + Nevin: + + Have you already caught this one? + + File build-ast-fn.sml: + + Function mulDivOp in case PT.Binop, about line 1216 in my version: + + then (case usualBinaryCnv env (ty1, ty2) of + SOME ty => (ty, ty, signedNum CT.INT, expop) + ^^^^^^ + If we were type checking a float multiplied by a float, we would + lose, right? + + The Wisconsin folks came up with this example, which does not + produce the correct adornments. + + float f = 5.6, f1; + + int main() { + f1 = f * f; + + return 0; + } + + -satish + +--------------------------------------------------------------------------- + +Test: valid-programs/a37.c +Status: fixed 15/june/99 +Fix: added case "| ([CT.Void], nil) => (nil, nil)" to isAssignableL (type-util.sml) +Email: + + From: chandra@research.bell-labs.com (Satish Chandra) + Date: Tue, 25 May 1999 15:00:52 -0500 + + A function with a 'void' argument as its only argument is basically + a function with no arguments. E.g.: + + void f(void); + + main() + { + f(); + } + + We issue a Type Warning: function call has too few args + + I can go fix these things myself, but it will make it harder to + synchronize our changes. Please give me a call so we can make a plan + on how to go about it. + + -satish + +------------------------------------------------------------------------------ + +Test: valid-programs/a38.c +Status: fixed sometime in early june/99 +Fix: fixed during major overhaul of frontend + + From: chandra@research.bell-labs.com (Satish Chandra) + Date: Thu, 03 Jun 1999 20:17:56 -0500 + + When perform_type_checking is on, it complains about + + for(;;) + + as "condition of for statement is not scalar". + + The problem stems from the use of isScalar, without checking for EmptyExp + first. + + -satish + +------------------------------------------------------------------------------- + +Test: valid-programs/a39.c +Status: fixed sometime before 15/june/99 (probably before major frontend overhaul?) +Fix: seems to have been fixed as side-effect of some other bug-fix or code change. +Email: + + Date: Thu, 20 May 1999 09:16:37 -0400 (EDT) + From: Kathleen Fisher + + + There seems to be a bug involving the ++ operator. + The following does not work: + p->count[i]++ /* generates an error. */ + p->count[i] = p->count[i]+1; /* OK */ + + Kathleen + + +--------------------------- Sat Jul 31 18:36:00 1999 -------------------------- + +Test: valid-programs/a60.c +Status: fixed 31/july/99 +Fix: add case for TCInitializer so that if type is not core, then apply getCoreType + (Alternatively, we could impose the invarient that TCInitializer must be + applied only to core types, but this would be a pain becuase TCInitializer + has a number of recursive calls.) +Email: + + build-ast.sml (TCInitializer) does not appear to look into typedefs. + Therefore, + + typedef struct {int x,y; } point; + + point x = {2,3}; + + fails to typecheck although gcc accepts it. + + +--------------------------- Sat Jul 31 19:12:59 1999 -------------------------- + +Test: valid-programs/a61.c +Status: fixed 31/july/99 +Fix: Shadow struct definitions were simply ignored. They generated nothing in + Ast, and in fact there was no way to represent these definitions. + The fix was to change the TypeDecal contructor from: + + TypeDecl of tid + + to + + TypeDecl of {shadow: {strct:bool} option, tid:tid} + + and then use TypeDecl{SOME{strct=true}, tid=....} to represent "struct x;" + TypeDecl{SOME{strct=false}, tid=....} to represent "union x;" + +Email: + Date: Thu, 29 Jul 1999 14:15:12 -0400 + From: Fred Smith + + I ran some tests and a cast is being inserted whenever + a typedef occurs. In fact the following code: + + typedef struct { int x,y,z; } w; + + void main() { + w foo; + foo = foo; + } + + is compiled to + struct t12 { int x,y,z; }; + + typedef struct t12 w_t13; + void main () + { + w_t13 foo_p17; + foo_p17 = ((struct t12) foo_p17); + } + modulo formatting. + + +--------------------------- Sat Jul 31 20:24:30 1999 -------------------------- + +Test: invalid-programs/r60.c +Status: fixed 31/july/99 +Fix: Get rid of extra loc args in definitions of checkAssignableTys and checkAssign. + + Date: Fri, 30 Jul 1999 10:15:36 -0400 + From: Fred Smith + + The following C program compiles and type-checks under Ckit but + obviously should not. + + struct w { int x,y,z; }; + + void main() { + struct w foo; + float x; + x = foo; + + } + + It took me a long time to track this one down since the code looks + absolutely correct. The problem was that all calls to checkAssign in + build-ast.sml failed to pass in a location. Since checkAssign was being + used solely for its side-effect, both occurences were in contexts like + (one case preceded a ; ) + val _ = checkAssign .... + + +--------------------------- Wed Sep 22 17:20:03 1999 -------------------------- + +Test: valid-programs/a63.c +Status: fixed 22/Sept/99 +Fix: apply preArgConv to parameter types before adding them to local symbol table + +The following C program does not type check under ckit and it should: + +void f(); + +main () { + int y[4]; + f(y); +} + +void f(int x[4]) { + int *y; + x = y; + x[3] = 1; +} + +C is a horrible language -- if you declare an array as an arg to a function, +then "array of type" is adjusted to "pointer to type". + + +--------------------------- Wed Sep 22 17:34:55 1999 -------------------------- + +Test: invalid-programs/r62.c +Status: fixed 22/Sept/99 +Fix: use lookLocalScope to check if parameter is locally defined before adding it. + +The following C program type checks under ckit and it should not: + +main () { + return(0); +} + +f(int x, int x) { + x = x; +} + + +--------------------------- Mon Sep 27 19:17:33 1999 -------------------------- + +Test: not available +Status: fixed 27/Sept/99 +Problem: Redeclarations did not inherit the pid of the previous declaration. + +e.g. + +extern int i; + +extern int i; + +would be allocated different pid's. +Bug was introduced during build-ast overhaul. + +Fix: checkIdRebinding now returns an extra parameter (a uid option). + + +--------------------------- Mon Sep 27 19:20:48 1999 -------------------------- + +Test: not available +Status: fixed (see a64.c, a65.c below) +Problem: We don't check for non-constant expressions in non-simple initializers. + +int f(int j) { + int x[4] = {0,1,2,3}; /* this is ok */ + int x[4] = {0,j,2*j,3*j}; /* this isn't */ +} + +but our frontend currently gives an unhesitant thumbs up for this code. + + +--------------------------- Fri Oct 15 14:24:36 1999 -------------------------- + +Test: not available +Status: fixed 15/Oct/99 +Fix: in sizeof.sml, function computeFieldListStruct, + change foldr to foldl and reverse final list. +Problem: +> From: chandra@research.bell-labs.com (Satish Chandra) +> Date: Thu, 14 Oct 1999 22:30:05 -0500 +> +> This has been quite a while, and the code might have changed much, but ... +> +> In function computeFieldListStruct, foldr computes offsets the wrong way. +> I think we need foldl, and later on reverse the accumulated list called +> "tab". + +--------------------------- Tue Sep 14 11:30:44 1999 -------------------------- + +TEST: a64.c, a65.c +STATUS: fixed 12/jan/99 +FIX: propagated isZeroExpr info for function args to checkFn (in type-utils), + so that zero test can be included when checking assignment of zero to + arg of pointer type. +EMAIL: +From: Kathleen Fisher +Date: Mon, 10 Jan 2000 15:28:02 -0500 (EST) + +The ckit compiler doesn't treat 0 as a legal function pointer. The +program: + +*************************************************** +void f(int(* goo)(int)){} + +void main(){ + f(0); +} +*************************************************** + +gives the error message: + +"/fs/smaug/home4/kfisher/hancock/tests/suite/test.hc":4.2-6: error: Bad +function call: arg 1 has type int but fn parameter has type int (*) (int) + + +Kathleen + +--------------------------- Tue Jan 11 23:12:50 2000 -------------------------- + +TEST: a66.c +STATUS: fixed 12/jan/99 +FIX: Pretty-printer bug. + pp-ast-fn.sml: + changed the code for the e0 case of QuestionColon to + ; ppExpr {nested=true} aidinfo tidtab pps e0 +EMAIL: +From: Alexey Loginov +Date: Sun, 28 Nov 1999 12:43:16 -0600 (CST) +Hi Nevin and Dave, + +We found another bug that isn't fixed in the version of ckit you sent +us. I also didn't see it in the bug list. Expressions containing the +"?:" operator are not parenthesized correctly to account for its +right-associativity. (In the following program, lines 5 and 7 mean +the same thing but line 6 is different.) + +void main() { + + char a, b, c, d, e; + + a?b:c?d:e; /* Line 5. */ + (a?b:c)?d:e; /* Line 6. */ + a?b:(c?d:e); /* Line 7. */ +} + + +Output C code: + +- ParseToAst.fileToC "/u/a/l/alexey/types/test/quest_col.c"; + +void main () +{ + char a; + char b; + char c; + char d; + char e; + a ? b : c ? d : e; + a ? b : c ? d : e; + a ? b : c ? d : e; +} + + +Thanks, +-Alexey + +--------------------------- Tue Jan 11 23:12:52 2000 -------------------------- + + +TEST: r64.c +STATUS: fixed 12/jan/00 +FIX: Add check of initializers to see if const (only non-const + case is an object of dynamic storage duration (i.e. non-global, non-static). + Notion of const is complex -- need to recurse through arithmetic, ?-: etc. + (Long term issue: constant expressions should really be reduced to constants.) +EMAIL: +Date: Tue, 7 Dec 1999 17:22:48 -0500 (EST) +From: Kathleen Fisher + +The following code: + + struct foo_t{ + int x; + int y; + }; + + void f(int x0, int y0){ + struct foo_t myfoo = {x0,y0}; + } + + void main(){ + f(0,0); + } + +passes through the ckit compiler without complaint, but cc reports the +following error: + +"bug.c", line 8: error(1028): expression must have a constant value + struct foo_t myfoo = {x0,y0}; + ^ + +"bug.c", line 8: error(1028): expression must have a constant value + struct foo_t myfoo = {x0,y0}; + + +====================================================================== +Numbered Bugs +====================================================================== + +NUMBER: 1 +SUBMITTER: Alexey Loginov +DATE: 2/10/00 +TEST: regression/valid-programs/a211.c,a212.c,a213.c,a214.c,a215.c +STATUS: fixed 3/10/00 (nch) +DESCRIPTION: + Enum constants which were not assigned values in original source + are assigned 0 in output. + + - enum { e1,e2,e3 } e; + + --> enum t1 { e1=0, e2=0, e3=0 }; + enum t1 e; + + +---------------------------------------------------------------------- +NUMBER: 2 +SUBMITTER: Alexey Loginov +DATE: 2/10/00 +TEST: +STATUS: fixed 3/31/00 +DESCRIPTION: + functions returning function pointers. Function signal is a good + real-life example. It's mentioned in Harbison and Steele p. 270 + or so (sorry I don't have the book with me). + + int (*fp(double))(float); + + --> int (*) (float) fp (double); + + which does not compile. + + +---------------------------------------------------------------------- +NUMBER: 3 +SUBMITTER: Alexey Loginov +DATE: 2/10/00 +TEST: regression/valid-programs/a216.c +STATUS: fixed 3/10/00 +FIX: The problem was that the type for the second variable was array(const char), + and the code did not strip off the const inside the array constructor. + The fix was to add an extra case to look for qualifiers inside arrays in initializer-normalizer. +DESCRIPTION: + inconsistent interpretation of initialization of constant + character arrays: + + char c[] = "abcdefg"; + const char cc[] = "abcdefg"; + + --> char c[8]={97,98,99,100,101,102,103,0}; + char const cc[1]={"abcdefg"}; + + +---------------------------------------------------------------------- +NUMBER: 4 +SUBMITTER: Alexey Loginov +DATE: 2/10/00 +TEST: a232.c +STATUS: fixed 24/Mar/00 +FIX: +The pretty-printer in fact has enough information to know +whether the original definition of a function was k&r or not: +just look at the function params and at the function type. +If the function type has no args, but the function has +params, then we have k&r style. +The case of FunctionDef in ppCoreExternalDecl now +has code to recognize this case and print out K&R defns. + +DESCRIPTION: + old style C function parameter declarations have different + semantics from new style (with respec to promotions) and + should be preserved. + + void foo(int); + void foo(c) + char c; { } + + --> compiles, but + + void foo(int); + void foo(char c) { } + + --> which is output by ckit, does not + + +---------------------------------------------------------------------- +NUMBER: 5 +SUBMITTER: Alexey Loginov +DATE: 2/10/00 +TEST: regression/valid-programs/a222.c (for both problems) +STATUS: fixed 3/10/00 +FIX for 1: change default value of flag local_externs_ok to true +FIX for 2: changed checking code for initializations (build-ast.sml and initializer-normalizer.sml) +DESCRIPTION: + Inconvinient Warnings: + + - int foo() { extern int bar; } + + --> error: `extern' not allowed in local declarations + + - struct S s = t; (in general, struct S = where is + not of the form {...}) + + --> error: badly formed union/struct initializer: expecting { + +COMMENT: + These last two errors don't seem to affect the output, so we just + ignore them. The others are causing problems on various utilities of + the GNU website. (We're ignoring the issues with the use of GNU C.) + + Local extern warning was fixed by making local_externs_ok contain true + in build-ast.sml. There remains a question about whether the semantics + is correct in all cases. (see regression/invalid-programs/r65.c, which + appears to be handled properly -- produces an error message.) [dbm, 3/10/00] + + +---------------------------------------------------------------------- +NUMBER: 6 +SUBMITTER: John Reppy +DATE: 2/10/00 +TEST: none +STATUS: fixed +FIX: introduced a sizes record type in sizes-sig.sml, and parameterized + makeAst, SizeOf fns, and ParseToAst.fileAst' with respect to sizes. + The Sizes structure provides a default value Sizes.defaultSizes. + sizes-sig.sml and sizes.sml moved from variants(/ansic) to ast. + [Plan is to provide a structure containing a set of sizes values for + various platform/compiler combinations.] + Removed bogus stale sizes-sig.sml file (the real signature contains longlong and longdouble). + Fixed sizeof so that it now: + a) provides the standard functionality for bitfields of char and short; + b) has flags to modify the standard behaviour + We can now simulate the behaviour of cc, lcc and gcc (wrt e.g. alignment + issues for unnamed bitfields). + +DESCRIPTION: + The type metrics in the CKit are broken in several ways. First, it seems + that the SIZES signature does not include longlong or longdouble. Second, + the sizes are hard-coded in, instead of being ABI dependent. I'd recommend + replacing the Sizes structure with a record type: + + type metrics = {bits : int, align : int} + + type interface = { + charMetric : metrics, + shortMetric : metrics, + intMetric : metrics, + longMetric : metrics, + longlongMetric : metrics, + floatMetric : metrics, + doubleMetric : metrics, + longdoubleMetric : metrics, + pointerMetric : metrics, + structAlign : int, + bitFieldAlignment : int option + } + + For the IA32/SVID, the values should be + + char 8 8 + short 16 16 + int 32 32 + long 32 32 + long long 64 32 + float 32 32 + double 64 32 + long double 96 32 + pointer 32 32 + struct align - 8 + + One can probably write a small C program that generates this information. + [jhr has given one to nch -dbm] + + [dbm, 3/8/00] The ast/sizes-sig.sml version of SIZES doesn't include + longlong or longdouble, but the variants/sizes-sig.sml version does. + That seems to be the version used in (e.g.) variants/*/config.sml. + + The variants/sizes-sig.sml also defines a "layout" type corresponding + to John's "metrics". The ast version of SIZES does not seem to be + mentioned anywhere except in ast/sizes-sig.sml, where it is defined, + so it looks like this is an old, vestigial version that has been superceded + by the variants version. + + [dbm, 3/14/00] John still advocates using a record. He anticipates wanting + to switch target architectures dynamically (either by passing the record + as a value, or by setting a global value) in the midst of processing. + + +---------------------------------------------------------------------- +NUMBER: 7 +SUBMITTER: Alexey Loginov +DATE: 2/18/00 +TEST: regression/valid-programs/a224.c +STATUS: fixed 3/10/00 (see bug 9) +DESCRIPTION: + '\0' is treated as '0' and not as a null character. We didn't check + for any other special characters. + + INPUT: + + char c_null = '\0'; + char c_zero = '0'; + + int main () { return 0; } + + OUTPUT: + + char c_null=48; + char c_zero=48; + int main () + { + return 0; + } + +COMMENT: + This is a special case of bug 9. + + +---------------------------------------------------------------------- +NUMBER: 8 +SUBMITTER: Alexey Loginov +DATE: 2/18/00 +TEST: a227.c +STATUS: fixed 24/Mar/00 +FIX: +The problem was in type-util.sml, +where the function conditionalExp is used to type check conditional expressions. +The relevant case statement in the body of the function is +where both arguments (after usualUnaryCnv) have Pointer type. +In this case, composite is called -- the problem is that the +call to composite reverts to the original arguments ty1 and ty2 of the call +to conditionalExp, rather than the results of applying usualUnaryCnv. + +(Note that the function composite can not and should not +apply usualUnaryCnv to its arguments.) + +It turns out that this same error appears in three other places in the code +(in isEquable, isSubtractable and isComparable). + +With insert_explicit_coersions set to true, you now obtain: + +original program (a227.c): + +int main() { + int *ip; + int *jp; + int ia[3]; + jp = (1 ? ia : ip); + } + +fileToC output: + +int main () +{ + int *ip; + int *jp; + int ia[3]; + jp = (1 ? (int *) ia : ip); +} + + +DESCRIPTION: + "?:" operator typing. + + int *ip; + int *jp; + int ia[3]; + jp = (1 ? ia : ip); + + --> error: Type Error: Unacceptable operands of question-colon. + + C semantics (we think): + --> jp = (1 ? (int *) ia : ip); + ^^^^^^^ + (implicit cast of ia to pointer) + + + Current implementation: + --> jp = (1 ? ia : (int[3]) ip); + ^^^^^^^^ + (implicit cast of ip to array) + +COMMENT: + Of course, code is still output correctly (without the implicit cast) + but since we actually use the implicit casts to materialize some + casts, we rely on their correctness. + + +---------------------------------------------------------------------- +NUMBER: 9 +SUBMITTER: Alexey Loginov +DATE: 2/22/00 +TEST: regression/valid-programs/a223.c +STATUS: fixed 3/10/00 +DESCRIPTION: + To follow up on this [bug 8] I wanted to mention that it looks like ckit + handles '\ooo' in three different ways depending on the number of + octal digits. In C the number of digits can be 1-3 but ckit only + handles 3 digit numbers correctly. Two digit numbers are processed as + decimal numbers. Single digit numbers are processed as if '\' weren't + there (i.e. taken as ascii values). + + INPUT: + + char c_octal_0 = '\0'; + char c_octal_51 = '\051'; + char c_octal_60 = '\60'; + char c_octal_7 = '\7'; + + int main () { return 0; } + + + OUTPUT: + + char c_octal_0=48; /* Should be 0 */ + char c_octal_51=41; /* Correct */ + char c_octal_60=60; /* Should be 48 */ + char c_octal_7=55; /* Should be 7 */ + int main () + { + return 0; + } + + Of course, '\0' is the most common of these. Do you have a time frame + for when you think the problems I mentioned before might be fixed? + +COMMENT: + [dbm] fixed by changing the rule in parser/grammar/c.lex to take 1 to 3 + octal digits instead of exactly 3 ({1,3} replaced {3} as the modifier). + + +---------------------------------------------------------------------- +NUMBER: 10 +SUBMITTER: Alexey Loginov +DATE: 2/23/00 +TEST: regression/valid-programs/a226.c +STATUS: fixed 3/10/00 +FIX: Stupid cut-and-paste problem: preincrements/predecrements were getting + transformed into postincrements/postdecrements in build-ast.sml. +DESCRIPTION: + preincrement and predecrement behavior + + INPUT: + + int main () { + int i = 10; + + int i1 = ++i; + int i2 = --i; + + return 0; } + + + OUTPUT: + + int main () + { + int i=10; + int i1=i++; + int i2=i--; + return 0; + } + +COMMENT: + +---------------------------------------------------------------------- +NUMBER: 12 +SUBMITTER: Kathleen Fisher +DATE: 3/15/00 +TEST: a230.c +STATUS: fixed 24/Mar/00 +FIX: +The bug was because the implementation of the non-default +behaviour of convert_function_args_to_pointers was not complete (there was +a missing case in isAssignable in type-utils.sml). To put it another way, +too much of the code was assuming that convert_function_args_to_pointers +was set to the standard value, and in particular, that certain coersions had +been performed *before* isAssignable was called. These coersions are not +performed when convert_function_args_to_pointers is false. + +DESCRIPTION: +We've run into a problem with ckit when we turn the flag +convert_function_args_to_pointers to false in the config.sml file. +The following program: + +******************************************************** +typedef int *windowTy[1]; + +int f (windowTy w) +{ + return 1; +} + +void main(){ + windowTy w; + f (w); +} +******************************************************** + +compiles just fine using cc, but it generates the following +error if we compile it with ckit: + +"array-param.hc":11.3-8: error: Bad function call: arg 1 has type windowTy +but fn parameter has type windowTy + + +---------------------------------------------------------------------- +NUMBER: 13 +SUBMITTER: Olivier Tardieu +DATE: 24/Mar/00 +TEST: a301.c +STATUS: fixed 24/Mar/00 +FIX: +! should be type checked like || and && instead of like a simple +unary operator. Extra code (a function logicalOp1) has +been added to do this. + +DESCRIPTION: + +main() { + void* p; + + !p; +} + +gives type error + error: Type Error: operand of unary op ! must be a number. + + +---------------------------------------------------------------------- +NUMBER: 14 +SUBMITTER: Alexey Loginov +DATE: 4/17/00 +TEST: a240.c +STATUS: fixed 16/June/00 +FIX: +There is a flag to control this behaviour in config.sml: + Config.TypeCheckControl.convert_function_args_to_pointers + +It should be set to true for standard behaviour (but was set to false +for some reason -- maybe temporarily for debugging something else, and then +not reverted back to true??). + +This is actually an ambiguous case: a strict reading of the standard would +suggest that this should be flagged as a type error, but most +compilers allow it, so it is "standard". + +DESCRIPTION: array formals + +This one is similar to the ?: bug we reported earlier (Bug number 8). + +void foo(int a[]); + +int main() { + int * ip; + foo(ip); +} + +Output: + +"tt.c":5.3-10: error: Bad function call: + arg 1 has type int * but fn parameter has type int [] + +Correct C output is still produced but the implicit type of ip inside +the call to foo has int array type. This is a problem for our +instrumentation. + + diff --git a/ckit/README b/ckit/README new file mode 100644 index 0000000..00e2538 --- /dev/null +++ b/ckit/README @@ -0,0 +1,101 @@ +ckit, a front end for C in SML +Version 1.0, 31 Mar 2000 +------------------------------ + +0. Contact information +---------------------- + + ckit mailing list: ckit@mailman.cs.uchicago.edu + Dave MacQueen: dbm@cs.uchicago.edu + Nevin Heintze: nch@google.com + + +1. How to build it +------------------ + +Let CKIT be the directory containing the CKit code. +Run SML in directory $CKIT/ast. +Execute "CM.make();". This gives you the parser/elaborator. + + +2. How to use it +---------------- +Top level driving functions are in file parse-to-ast.sml. Generally +use ParseToAst.fileToAst. It returns a record of type BuildAst.ProgramInfo. + +Example: + + val {ast: Ast.ast, + tidtab: Bindings.tidBinding Tidtab.uidtab, + errorCount: int, + warningCount: int, + auxiliaryInfo: {aidtab: Tables.aidtab, + implicits: Tables.aidtab, + env: State.symtab}} + = ParseToAst.fileToAst ("file"); + +Ast.ast is the abstract syntax type for translation units (a list of top-level +C declarations). For further information, read the code. + +To get ahold of parse trees (parser/parse-tree.sml), which is the raw data +structure produced by the parser: + + val errorState : Error.errorState = Error.mkErrState TextIO.stdOut; + val parseTree : ParseTree.externalDecl list = + Parser.parseFile errorState "file"; + +See parse/util/error.sml for the definition of the errorState type. + + +3. Directory map +---------------- + +parser/ + parser-tree-sig.sml, parser-tree.sml: definition of parse tree types + grammar/ + lex and yacc specifications + util/ + sourcemap-sig.sml, sourcemap.sml: mapping source file locations + error-sig.sml, error.sml: error reporting functions +ast/ + ast-sig.sml, ast.sml: definition of abstract syntax types + build-ast.sml: + translation from parse trees to abstract syntax, with type checking and other + static semantics processing + pp/* : pretty printing for ast + extensions/ + c/ -- dummy extension structures for C +variants/ + ansic/ + config.sml: various flags controlling error checking, type checking, etc. +ast-utils/ + copy/ + copying ast types + equality/ + equality for ast types + simplifier/ + ast simplifier + + +4. Notes: + +4.1 Pretty printing - suppressing underscores + +You can suppress underscores in pretty-printing using: + + PPLib.suppressPidUnderscores := true; + PPLib.suppressTidUnderscores := true; + +4.2 ast-util + +Much of this code is suspect and needs revision. + + +5. Todo: + +5.1. Implement checks on casts. + - explicit casts currently allow any change of type; + instead they should only allow permitted conversions. + +----------------------------------------------------------------------------------- + diff --git a/ckit/doc/overview b/ckit/doc/overview new file mode 100644 index 0000000..8c5a4e4 --- /dev/null +++ b/ckit/doc/overview @@ -0,0 +1,247 @@ +ckit: A FRONT END FOR C IN SML +------------------------------ + +1. GETTING STARTED +------------------ + +On unpacking the ckit sources, you should see a src directory, a doc directory +and a README file (and possibly other directories, depending on the distribution). + +The src directory contains the following subdirectories: + + parser: lexer and parser, parse trees. + ast: abstract syntax trees (Ast), type-checker, pretty-printer. + variants: flags for controlling the parser and type-checker. + +To build the system, cd to src, run SML/NJ and type + +- CM.make(); + +To test the parser on "test.c", type + +- ParseToAst.fileToAst "test.c"; + +This parses and typechecks "test.c" and returns an abstract syntax tree for +"test.c". Alternatively, to parse, type-check and then pretty-print "test.c", +type + +- ParseToAst.fileToC "test.c"; + + +2. USING THE FRONTEND +--------------------- + +The following describes some commonly used ckit functions. + +2.1: ParseToAst.fileToAst: string -> ParseToAst.astBundle + +This is the main function to parse a file and produce abstract syntax. +When applied to a string (file name), it produces a bundle of information of +type astBundle: + +type astBundle = + {ast: Ast.ast, + tidtab: Bindings.tidBinding Tidtab.uidtab, + errorCount: int, + warningCount: int, + auxiliaryInfo: {astTypes: Tables.astIndexTab, + implicits: Tables.astIndexTab, + env: State.symtab}} + +where: + ast is the abstract syntax tree. + tidtab is the type identifier table that maps type identifiers into their meanings. + errorCount is the count of all errors encountered during parsing and type checking. + warningCount is the count of all warnings encountered during parsing and type checking. + astTypes is a table mapping ast indexes into the types of the corresponding ast expressions. + env is used to carry over global symbol information in some mult-file parsing applications. + +2.2 ParseToAst.fileToC + + +2.3 Parser.parseFile + val parseFile : string -> ParseTree.externalDecl list * Error.errorStream + +2.4 errorStream + -- not used for much + -- contains count of warnings and errors + +Top level driving functions are in file parse-to-ast.sml. Generally +use ParseToAst.fileToAst. It returns a record of type BuildAst.ProgramInfo. + +Example: + + val {ast={ast: Ast.ast, + tidtab: Bindings.tidBinding Tidtab.uidtab, + errstrm: Error.errorStream}, + aidtab: Tables.aidtab, + opaidtab: Tables.aidtab, + env: State.symtab} = ParseToAst.fileToAst ("file"); + +Ast.ast is the abstract syntax type for translation units (a list of top-level +C declarations). For further information, read the code. + +To get a hold of parse trees (parser/parse-tree.sml), which is the raw data +structure produced by the parser: + + val (parseTree, errorStream) = Parser.parseFile "file"; + +See parse/util/error.sml for the definition of the errorStream type. + + + +3. SYSTEM STRUCTURE +------------------- + +The frontend consists of a number of phases. +The first phase consists of a lexer/parser (written using ml-lex and ml-yacc +respectively). The output of this phase is a data-structure (parse tree) +that is a simple "unprocessed" form that closely follows the structure of C's +grammar. The next phase inputs the parse tree data-structure, type checks it, +and produces a "processed" abstract syntax tree representation (Ast). + +3.1 THE LEXER AND PARSER + +These are built using ml-lex and ml-yacc. The lex and yacc files can be found +in src/parser/grammar/[c.lex,c.grm]. The parser performs only a minimal amount +of syntactic processing. Many syntactic restrictions are enforced during the +type-checking phase e.g restrictions on the number and combination of type +specifiers used in a type. + +Similarly, most scoping issues are addressed during type-checking. +One exception is typedef. This must be handled during parsing because typedefs +introduce new types and these can dramatically alter the shape of parse trees. +In principle, the scoping of typedefs could be delayed till later processing, +but in practice this is not feasible: in particular, if typedefs are not +processed during parsing, then we cannot distinguish between +declaration forms and expressions. Consider, the following program. + +char x; +f() { + typedef int x; + { + x * x; + } +} + +Here, "x * x" declares x as a pointer to an integer. +However, if the typedef is commented out, then +"x * x" is interpreted as an expression. + +The treatment of typedefs involves a subtle interaction between the parser and +lexer. When the parser recognizes a typedef for an identifier, it communicates +to the lexer that the identifier should now be treated as a "type". +Parser lookahead introduces additional complication: we cannot lex a token until +any preceding typedefs have been processed. In particular, we must limit +lookahead to one symbol. In fact, this only works because C's grammar requires +typedefs to end in a semicolon --- this semicolon acts as a buffer so that a +typedef will be completely processed before any use of the new type is lexed. +Note that typedefs can be scoped (e.g. see the above program), and so the parser +must tell the lexer to undo the effect of a typedef when the typedef's scope is +exited. Another complication is the error recovery mechanism of ml-yacc. + +The parser produces parse trees (see src/parser/parse-tree-sig.sml). +This data structure is a simple "unprocessed" form that closely follows the +structure of C's grammar. These parse trees are built up by the actions of the +ml-yacc grammar. + +Any language extensions is likely to involve extensions to the lexer, parser and +to the parse tree datatype. When extending the lexer and parser, care must be taken to +preserve the interaction between the lexer, the parser, and the use of one-token +lookahead. Extensions to the parse tree datatype are supported via a collection +of "Ext" constructors in the parse tree datatypes. The file +extensions/c/parse-tree-ext.sml contains the default "empty extension" for +standard C. + +Files: + parser/parser-tree-sig.sml, parser-tree.sml: definition of parse tree types + parser/grammar/c.lex, c.grm: lex and yacc specifications + parser/util/sourcemap-sig.sml, sourcemap.sml: mapping source file locations + parser/util/error-sig.sml, error.sml: error reporting functions + + +3.2 ABSTRACT SYNTAX TREES (AST'S) AND BUILD-AST + +BuildAst (src/ast/build-ast.sml) consumes parse trees and builds up abstract +syntax trees (Ast's) while performing type checking. Ast's (src/ast/ast.sml) +are defined so that each of the major syntactic categories (statements, +expressions, and declarations) have a unique integer index associated with them. +These indices are used to associate information with specific parts of the +code. Care must be taken to preserve their uniqueness when performing code +transformations. + +Objects (global variables, local variables, functions, etc) and struct/union +fields are assigned globally unique integers called program identifiers +(pids). This simplifies treatment of scope in Ast. Similarly, types introduced +by structs, unions, enums and typedefs are assigned globally unique +integers called type identifiers (tids). + +BuildAst performs the following tasks: + + 1. Scoping: scoping of variables, structs, unions, fields and enums + is resolved. + + 2. Type Checking: Full ANSIC C type checking is performed, and appropriate + errors and warnings are generated. Errors and warnings are suppressed + in the case where there are parse errors. The behaviour of the type + checker can be customized using a collection of flags in + the TypeCheckControl structure defined in src/variants/ansic/config.sml. + BuildAst incrementally constructs a mapping between expression indices and + types that records the type of each expression. + + 3. Type Sizes And Memory Layout: BuildAst computes the sizes of the objects + declared in the program. It also optionally reduces sizeof expressions to + integer constants (the flag BuildAst.reduce_sizeof can be used to enable + this feature; the default setting does not reduce sizeof constructs). + BuildAst also computes the layout and alignment properties of + all objects, including the offsets for fields of structs. + Type size and memory layout is architecture and compiler specific. + The behaviour of this aspect of BuildAst is specified in + Sizes structure defined in src/variants/ansic/config.sml. + + 4. Initializer Normalization: The meaning of an object initializer is partly + determined by the type of the object begin initialized. BuildAst + normalizes initializers so that they are easier to implement. + Moreover, certain aspects of the type of an object are inferred from + an initializer (e.g. int x[] = {1,2,3}). + +Files: + + ast/ast-sig.sml, ast.sml: definition of abstract syntax datatypes. + ast/build-ast.sml: translation from parse trees to abstract syntax, with type checking and other + static semantics processing. + extensions/ + c/ -- dummy extension structures for C +variants/ + ansic/ + config.sml: various flags controlling error checking, type checking, etc. + +3.3 PRETTY PRINTER FOR AST + Ast comes equipped with a pretty-printer (ast/pp/pp-ast-sig.sml). + Not only is this useful for debugging purposes, but it also is an integral + component of source-to-source applications of the frontend. + When pretty printing Ast, pids and tids can be optionally printed. + The following flags control this behavior: + + PPLib.suppressPidUnderscores: controls printing of pids + PPLib.suppressPidGlobalUnderscores: controls printing of pids for global objects + PPLib.suppressTidUnderscores: controls printing of tids. + +Files: + pp/pp-ast-fn.sml : the generic pretty printing code for ast. + pp/pp-ast-sig.sml: pretty printing signature. + pp/pp-ast.sml: default pretty printer + pp/pp-ast-adornment.sml: pretty printer for printing ast interspersed with adornment info. + pp/pp-lib.sml: pretty printing for identifiers; some pretty printing combinators. + + +3.4 AST-UTILS [Not distributed yet] + +Files: + ast-utils/copy/: copying ast types + ast-utils/equality/: equality for ast types + ast-utils/simplifier/: ast simplifier + + +5. LOCATION INFO +---------------- diff --git a/ckit/doc/overview.html b/ckit/doc/overview.html new file mode 100644 index 0000000..2cc5380 --- /dev/null +++ b/ckit/doc/overview.html @@ -0,0 +1,280 @@ + + + + ckit Overview + + + +

    +

    ckit: A Front End for C in SML

    +
    +

    1. Getting Started

    +

    +On unpacking the ckit sources, you should see a src directory, a doc directory +and a README file (and possibly other directories, depending on the distribution). +

    +The src directory contains the following subdirectories: +

    +
    parser/ +
    lexer and parser, parse trees. +
    ast/ +
    abstract syntax trees (Ast), type-checker, pretty-printer. +
    variants/ +
    flags for controlling the parser and type-checker. +
    +To build the system, cd to src, run SML/NJ and type +
    +- CM.make();
    +
    +To test the parser on "test.c", type +
    +- ParseToAst.fileToAst "test.c";
    +
    +This parses and typechecks "test.c" and returns an abstract syntax tree for +"test.c". Alternatively, to parse, type-check and then pretty-print "test.c", +type +
    +- ParseToAst.fileToC "test.c";
    +
    +

    +

    2. Using the Frontend

    +

    +C source programs are processed in two steps. The lexer and parser +translate the source to parse trees (Parser.parseFile), and the +"elaboration" or static semantics phase (BuildAst.makeAst) performs +type checking and translates to abstract syntax. The parse tree +datatypes are defined in parse/parse-tree-sig.sml and the abstract +syntax types in ast/ast-sig.sml. These definitions are fairly +straightforward and should be self-explanatory. +

    +Top level driving functions are in module ParseToAst (see +ast/parse-to-ast-sig.sml). The following subsections describe some +commonly used ckit functions. +

    +

    2.1. ParseToAst.fileToAst: string -> ParseToAst.astBundle

    +

    +This is the main function to parse a file and produce abstract syntax. +When applied to a string (the C source file name), it produces a +bundle of information of type astBundle: +

    +   type astBundle =
    +       {ast: Ast.ast,
    +	tidtab: Bindings.tidBinding Tidtab.uidtab,
    +	errorCount: int,
    +	warningCount: int,
    +	auxiliaryInfo: {astTypes: Tables.astIndexTab,
    +			implicits: Tables.astIndexTab,
    +			env: State.symtab}}
    +
    +where: + +
  • ast is the abstract syntax tree. +
  • tidtab is the type identifier table that maps type identifiers into their meanings. +
  • errorCount is the count of all errors encountered during parsing and type checking. +
  • warningCount is the count of all warnings encountered during parsing and type checking. +
  • astTypes is a table mapping ast indexes into the types of the corresponding ast expressions. +
  • env is used to carry over global symbol information in some mult-file parsing applications. +
  • + +

    2.2. ParseToAst.fileToC : string -> unit

    +

    +Process a file and pretty print the resulting ast. + +

    2.3. Parser.parseFile : Error.errorState -> string -> ParseTree.externalDecl list

    +To get a hold of a parse tree (parser/parse-tree-sig.sml), +use Parser.parseFile (see parser/parser-sig.sml). +This function takes an errorState and the +name of a (preprocessed) C source file and returns a list of external +declaration parse trees corresponding to the top-level declarations in +the source file. See parser/parse-tree-sig.sml for definitions of +the parse tree types and parser/util/error-sig.sml for documentation +on Error.errorState. +

    + +

    3. System Structure

    +

    +The frontend consists of a number of phases. +The first phase consists of a lexer/parser (written using ml-lex and ml-yacc +respectively). The output of this phase is a data-structure (parse tree) +that is a simple "unprocessed" form that closely follows the structure of C's +grammar. The next phase inputs the parse tree data-structure, type checks it, +and produces a "processed" abstract syntax tree representation (Ast). +

    +

    3.1. The Lexer and Parser

    +

    +These are built using ml-lex and ml-yacc. The lex and yacc files can be found +in src/parser/grammar/[c.lex,c.grm]. The parser performs only a minimal amount +of syntactic processing. Many syntactic restrictions are enforced during the +type-checking phase e.g restrictions on the number and combination of type +specifiers used in a type. +

    +Similarly, most scoping issues are addressed during type-checking. +One exception is typedef. This must be handled during parsing because typedefs +introduce new types and these can dramatically alter the shape of parse trees. +In principle, the scoping of typedefs could be delayed till later processing, +but in practice this is not feasible: in particular, if typedefs are not +processed during parsing, then we cannot distinguish between +declaration forms and expressions. Consider, the following program. +

    +   char x;
    +   f() {
    +     typedef int x;
    +     {
    +       x * x;
    +     }
    +   }
    +
    +Here, "x * x" declares x as a pointer to an integer. +However, if the typedef is commented out, then +"x * x" is interpreted as an expression. +

    +The treatment of typedefs involves a subtle interaction between the parser and +lexer. When the parser recognizes a typedef for an identifier, it communicates +to the lexer that the identifier should now be treated as a "type". +Parser lookahead introduces additional complication: we cannot lex a token until +any preceding typedefs have been processed. In particular, we must limit +lookahead to one symbol. In fact, this only works because C's grammar requires +typedefs to end in a semicolon --- this semicolon acts as a buffer so that a +typedef will be completely processed before any use of the new type is lexed. +Note that typedefs can be scoped (e.g. see the above program), and so the parser +must tell the lexer to undo the effect of a typedef when the typedef's scope is +exited. Another complication is the error recovery mechanism of ml-yacc. +

    +The parser produces parse trees (see src/parser/parse-tree-sig.sml). +This data structure is a simple "unprocessed" form that closely follows the +structure of C's grammar. These parse trees are built up by the actions of the +ml-yacc grammar. +

    +Any language extensions is likely to involve extensions to the lexer, +parser and to the parse tree datatype. When extending the lexer and +parser, care must be taken to preserve the interaction between the +lexer, the parser, and the use of one-token lookahead. Extensions to +the parse tree datatype are supported via a collection of "Ext" +constructors in the parse tree datatypes. The file +extensions/c/parse-tree-ext.sml contains the default "empty extension" +for standard C. +

    +Files: +

    +
    parser/parser-tree-sig.sml, parser-tree.sml +
    definition of parse tree types +
    parser/grammar/c.lex, c.grm +
    lex and yacc specifications +
    parser/util/sourcemap-sig.sml, sourcemap.sml +
    mapping source file locations +
    parser/util/error-sig.sml, error.sml +
    error reporting functions +
    +

    +

    3.2. Abstract Syntax Trees (AST'S) And BuildAst

    +

    +BuildAst (src/ast/build-ast.sml) consumes parse trees and builds up abstract +syntax trees (Ast's) while performing type checking. Ast's (src/ast/ast.sml) +are defined so that each of the major syntactic categories (statements, +expressions, and declarations) have a unique integer index associated with them. +These indices are used to associate information with specific parts of the +code. Care must be taken to preserve their uniqueness when performing code +transformations. +

    +Objects (global variables, local variables, functions, etc) and struct/union +fields are assigned globally unique integers called program identifiers +(pids). This simplifies treatment of scope in Ast. Similarly, types introduced +by structs, unions, enums and typedefs are assigned globally unique +integers called type identifiers (tids). +

    +BuildAst performs the following tasks: +

      +
    1. Scoping: scoping of variables, structs, unions, fields and enums +is resolved. +

      +

    2. Type Checking: Full ANSIC C type checking is performed, and +appropriate errors and warnings are generated. Errors and warnings +are suppressed in the case where there are parse errors. The +behaviour of the type checker can be customized using a collection of +flags in the TypeCheckControl structure defined in +src/variants/ansic/config.sml. BuildAst incrementally constructs a +mapping between expression indices and types that records the type of +each expression. +

      +

    3. Type Sizes And Memory Layout: BuildAst computes the sizes of the +objects declared in the program. It also optionally reduces sizeof +expressions to integer constants (the flag BuildAst.reduce_sizeof can +be used to enable this feature; the default setting does not reduce +sizeof constructs). BuildAst also computes the layout and alignment +properties of all objects, including the offsets for fields of +structs. Type size and memory layout is architecture and compiler +specific. The behaviour of this aspect of BuildAst is specified in +Sizes structure defined in src/variants/ansic/config.sml. +

      +

    4. Initializer Normalization: The meaning of an object initializer +is partly determined by the type of the object begin initialized. +BuildAst normalizes initializers so that they are easier to implement. +Moreover, certain aspects of the type of an object are inferred from +an initializer (e.g. int x[] = {1,2,3}). +
    +Files: +
    +
    ast/ast-sig.sml, ast.sml +
    definition of abstract syntax datatypes. +
    ast/build-ast.sml +
    translation from parse trees to abstract syntax, with type +checking and other static semantics processing. +
    extensions/c/ +
    dummy extension structures for C +
    variants/ansic/config.sml +
    various flags controlling error checking, type checking, etc. +
    +

    +

    3.3. Pretty Printer for AST

    +Ast comes equipped with a pretty-printer (ast/pp/pp-ast-sig.sml). Not +only is this useful for debugging purposes, but it also is an integral +component of source-to-source applications of the frontend. When +pretty printing Ast, pids and tids can be optionally printed. The +following flags control this behavior: +
            
    +    PPLib.suppressPidUnderscores: controls printing of pids
    +    PPLib.suppressPidGlobalUnderscores: controls printing of pids for global objects
    +    PPLib.suppressTidUnderscores: controls printing of tids.
    +
    +Files: +
    +
    pp/pp-ast-fn.sml +
    the generic pretty printing code for ast +
    pp/pp-ast-sig.sml +
    pretty printing signature +
    pp/pp-ast.sml +
    default pretty printer +
    pp/pp-ast-adornment.sml +
    pretty printer for printing ast interspersed with adornment info +
    pp/pp-lib.sml +
    pretty printing for identifiers; some pretty printing combinators. +
    +

    +

    3.4. AST-UTILS [Not distributed yet]

    +

    +Files: +

    +
    ast-utils/copy/ +
    copying ast types +
    ast-utils/equality/ +
    equality for ast types +
    ast-utils/simplifier/ +
    ast simplifier +
    +

    + +

    4. Location Info

    +

    +Program phrases (expressions, declarations, statements) are annotated +in the abstract syntax with source code locations, which are +represented by a data structure that determines a region within a +source file. See src/parser/sourcemap-sig.sml. + +


    +
    Dave MacQueen
    + + +Last modified: Tue Dec 7 15:39:13 EST 1999 + + + diff --git a/ckit/doc/todo b/ckit/doc/todo new file mode 100644 index 0000000..487d9ea --- /dev/null +++ b/ckit/doc/todo @@ -0,0 +1,53 @@ +1. document key signatures and other files + + parser ++ parse-tree-sig.sml (parse tree representation) ++ parser-sig.sml + + parser/util ++ error-sig.sml ++ sourcemap-sig.sml + + ast/pp (pretty printing ast) + pp-ast-sig.sml + + ast + ast-sig.sml (abstract syntax representation) ++ parse-to-ast-sig.sml + (tables: tid, pid, aid, corresponding tables) + + src + sources.cm + + +2. config + + variants (configuration) + config-sig.sml + sizes-sig.sml + parse-control-sig.sml + type-check-control-sig.sml + + +3. extensions + + parse/extensions/x/ (x = c, ...) + parse-tree-ext-sig.sml + + ast/extensions/x/ + ast-ext-sig.sml + + ast/ + cnv-ext-sig.sml + + lexing & parsing + c.lex + c.grm + token table + build-ast.sml + size ... + + +--------------------------- Wed Oct 27 15:25:42 1999 -------------------------- + +Document the regression test procedure.... diff --git a/ckit/regression/README b/ckit/regression/README new file mode 100644 index 0000000..eb68e8a --- /dev/null +++ b/ckit/regression/README @@ -0,0 +1,34 @@ +Regression README + +Map of regression directory +--------------------------- +Test directories + valid-programs/ + invalid-programs/ + output/ + valid-programs.obs/ + +SML code for running tests + sources.cm + test-fn.sml defines TestFn + tests/ instantiations of TestFn + simplify/ for simplify + [not active] + typecheck/ for typecheck + sources.cm + typecheck-test.sml defines structure TypecheckTest + + +Typical usage +------------- + +% cd regression/tests/typecheck +% mkdir results # if necessary +% sml +- CM.make(); (* defines TypecheckTest *) +- TypecheckTest.testAll(); + +Note: this will fail if you don't have a results directory +in the typecheck directory. + +Note: the regression/tests/simplify is not working at the moment. diff --git a/ckit/regression/invalid-programs/a208.c b/ckit/regression/invalid-programs/a208.c new file mode 100644 index 0000000..fc0566c --- /dev/null +++ b/ckit/regression/invalid-programs/a208.c @@ -0,0 +1,6 @@ +struct qqux {int x;}; + +main (){ + + struct qqux int x; +} diff --git a/ckit/regression/invalid-programs/a37.c b/ckit/regression/invalid-programs/a37.c new file mode 100644 index 0000000..e78dd0d --- /dev/null +++ b/ckit/regression/invalid-programs/a37.c @@ -0,0 +1,5 @@ +junk junk junk + +main(){ + +} diff --git a/ckit/regression/invalid-programs/counter-example.c b/ckit/regression/invalid-programs/counter-example.c new file mode 100644 index 0000000..8c25ebd --- /dev/null +++ b/ckit/regression/invalid-programs/counter-example.c @@ -0,0 +1,22 @@ +extern int foo (int); + +main () +{ + int i; + printf ("foo"); + + /* bad */ + default: printf ("bar\n"); + /* worse */ + case 3: printf ("baz\n"); + + switch (i) + default: + if (foo (i)) + case 2: case 3: case 5: case 7: + foo (i); + else + case 4: case 6: case 8: case 9: case 10: + printf ("1"); +} + diff --git a/ckit/regression/invalid-programs/r1.c b/ckit/regression/invalid-programs/r1.c new file mode 100644 index 0000000..d306e44 --- /dev/null +++ b/ckit/regression/invalid-programs/r1.c @@ -0,0 +1,21 @@ +struct S { + int x; + int y; +}; + +main () { + + int i; + + switch(i) { + case 1: + case 2: 45; + default: 45;;;; + } + +} + +struct S g () { + int i; + return(i); +} diff --git a/ckit/regression/invalid-programs/r10.c b/ckit/regression/invalid-programs/r10.c new file mode 100644 index 0000000..41bc165 --- /dev/null +++ b/ckit/regression/invalid-programs/r10.c @@ -0,0 +1,13 @@ +enum X { + x1, x2, x3 + }; + +enum Y { + y1, y2, y3 + }; + +main () { + enum X *i; + enum Y *k; + k = i; +} diff --git a/ckit/regression/invalid-programs/r11.c b/ckit/regression/invalid-programs/r11.c new file mode 100644 index 0000000..238737f --- /dev/null +++ b/ckit/regression/invalid-programs/r11.c @@ -0,0 +1,6 @@ +main () { + int *i, *j; + i != 0; + i > 0; + i >= 0; +} diff --git a/ckit/regression/invalid-programs/r12.c b/ckit/regression/invalid-programs/r12.c new file mode 100644 index 0000000..562ce5b --- /dev/null +++ b/ckit/regression/invalid-programs/r12.c @@ -0,0 +1,5 @@ +main () { + void *i; + int *j; + j >= i; +} diff --git a/ckit/regression/invalid-programs/r13.c b/ckit/regression/invalid-programs/r13.c new file mode 100644 index 0000000..b2bc87a --- /dev/null +++ b/ckit/regression/invalid-programs/r13.c @@ -0,0 +1,5 @@ +main () { + float d; + int *i; + i = i + d; +} diff --git a/ckit/regression/invalid-programs/r14.c b/ckit/regression/invalid-programs/r14.c new file mode 100644 index 0000000..7c06092 --- /dev/null +++ b/ckit/regression/invalid-programs/r14.c @@ -0,0 +1,15 @@ +struct X { + int x1; + }; + +struct Y { + int x2; + }; + +main () { + struct X x; + struct Y y; + int tmp; + + tmp = (x && y); +} diff --git a/ckit/regression/invalid-programs/r15.c b/ckit/regression/invalid-programs/r15.c new file mode 100644 index 0000000..b1d18e7 --- /dev/null +++ b/ckit/regression/invalid-programs/r15.c @@ -0,0 +1,6 @@ +main () { + int i; + i = 1; + i++ += 3; + printf("%d\n", i); +} diff --git a/ckit/regression/invalid-programs/r16.c b/ckit/regression/invalid-programs/r16.c new file mode 100644 index 0000000..aa3ca06 --- /dev/null +++ b/ckit/regression/invalid-programs/r16.c @@ -0,0 +1,9 @@ +enum X { + x1, x2, x3 +} + +main () { + int i=1; + + x1 = i; +} diff --git a/ckit/regression/invalid-programs/r17.c b/ckit/regression/invalid-programs/r17.c new file mode 100644 index 0000000..9b469c3 --- /dev/null +++ b/ckit/regression/invalid-programs/r17.c @@ -0,0 +1,8 @@ +main () { + int a[10]; + int *(p[10]); + + *p = a; + a = 4; + (*p) = 4; +} diff --git a/ckit/regression/invalid-programs/r18.c b/ckit/regression/invalid-programs/r18.c new file mode 100644 index 0000000..053cf19 --- /dev/null +++ b/ckit/regression/invalid-programs/r18.c @@ -0,0 +1,12 @@ +struct X { + int i; + int j; +} k; + +struct X f() { + return(k); +} + +main () { + (f()).i = 1; +} diff --git a/ckit/regression/invalid-programs/r19.c b/ckit/regression/invalid-programs/r19.c new file mode 100644 index 0000000..ed917be --- /dev/null +++ b/ckit/regression/invalid-programs/r19.c @@ -0,0 +1,10 @@ +f() { + int i = 1; +} + +main () { + float j, k; + int i; + + i = j & k; +} diff --git a/ckit/regression/invalid-programs/r2.c b/ckit/regression/invalid-programs/r2.c new file mode 100644 index 0000000..2ad41a0 --- /dev/null +++ b/ckit/regression/invalid-programs/r2.c @@ -0,0 +1,25 @@ +struct S { + int x; + int y; +}; + +main () { + + struct S i ; + + switch(i) { + case 1: + case 2: 45; + default: 45;;;; + } + + if(i) { 1; } + + while(i) { 2; } + + for(1; i; 2); + for(i; 1; 2); + + do {3;} while (i); +} + diff --git a/ckit/regression/invalid-programs/r20.c b/ckit/regression/invalid-programs/r20.c new file mode 100644 index 0000000..d5ac5cd --- /dev/null +++ b/ckit/regression/invalid-programs/r20.c @@ -0,0 +1,16 @@ +struct X { + int x1; + int x2; + int x3; +}; + +enum Y { + x1, x2, x3 +}; + +main() { + int x1 = x1; + float x2; + x2 >> x1; + +} diff --git a/ckit/regression/invalid-programs/r21.c b/ckit/regression/invalid-programs/r21.c new file mode 100644 index 0000000..67f9771 --- /dev/null +++ b/ckit/regression/invalid-programs/r21.c @@ -0,0 +1,18 @@ +struct X { + int x1; + int x2; + int x3; +}; + +enum Y { + x1, x2, x3 +}; + +main() { + struct X *y, *z; + float x2; + + y = 0; + y == 0; + y >= 0+1; +} diff --git a/ckit/regression/invalid-programs/r22.c b/ckit/regression/invalid-programs/r22.c new file mode 100644 index 0000000..82c50d9 --- /dev/null +++ b/ckit/regression/invalid-programs/r22.c @@ -0,0 +1,16 @@ +struct X { + int x1; + int x2; + int x3; +}; + +enum Y { + x1, x2, x3 +}; + +main() { + struct X y, z; + float x2; + + y == z; +} diff --git a/ckit/regression/invalid-programs/r23.c b/ckit/regression/invalid-programs/r23.c new file mode 100644 index 0000000..aed8df4 --- /dev/null +++ b/ckit/regression/invalid-programs/r23.c @@ -0,0 +1,20 @@ + +main () { +struct X { + int y; +}; + +struct X; +struct X; + +struct X { + int y; +}; + + + return(1); +} + + + + diff --git a/ckit/regression/invalid-programs/r24.c b/ckit/regression/invalid-programs/r24.c new file mode 100644 index 0000000..eca94cf --- /dev/null +++ b/ckit/regression/invalid-programs/r24.c @@ -0,0 +1,15 @@ +struct X { + int x1; + int x2; + int x3; +}; + +enum X { + a1, a2 +}; + +main() { + struct X y, z; + float x2; + return(0); +} diff --git a/ckit/regression/invalid-programs/r25.c b/ckit/regression/invalid-programs/r25.c new file mode 100644 index 0000000..ce1dd91 --- /dev/null +++ b/ckit/regression/invalid-programs/r25.c @@ -0,0 +1,22 @@ +struct t { + struct s *x; +}; + +struct s { + struct t *x; +}; + +struct s { + struct t *x; +}; + +main () { + struct t; + struct s { + struct t *y; + }; + struct t { + struct s *y; + }; + return(1); +} diff --git a/ckit/regression/invalid-programs/r26.c b/ckit/regression/invalid-programs/r26.c new file mode 100644 index 0000000..0bd629e --- /dev/null +++ b/ckit/regression/invalid-programs/r26.c @@ -0,0 +1,17 @@ +enum t { + g1, g2 +}; + + +struct t { + enum t *x; +}; + +struct s { + enum t *x; +}; + +main () { + + return(1); +} diff --git a/ckit/regression/invalid-programs/r27.c b/ckit/regression/invalid-programs/r27.c new file mode 100644 index 0000000..678a25f --- /dev/null +++ b/ckit/regression/invalid-programs/r27.c @@ -0,0 +1,18 @@ +struct t; +struct t *x; +main () { + + struct s { + struct t *y; + }; + + struct t { + struct s *y; + }; + x -> y; + return(1); +} + + + + diff --git a/ckit/regression/invalid-programs/r28.c b/ckit/regression/invalid-programs/r28.c new file mode 100644 index 0000000..14f0807 --- /dev/null +++ b/ckit/regression/invalid-programs/r28.c @@ -0,0 +1,17 @@ + +struct t *x; + +main () { + + struct t { + struct s *y; + }; + + (*x).y; + + return(1); +} + + + + diff --git a/ckit/regression/invalid-programs/r29.c b/ckit/regression/invalid-programs/r29.c new file mode 100644 index 0000000..3423410 --- /dev/null +++ b/ckit/regression/invalid-programs/r29.c @@ -0,0 +1,14 @@ +union X { + int y; +}; + + +main () { +int x; +int x; + return(1); +} + + + + diff --git a/ckit/regression/invalid-programs/r3.c b/ckit/regression/invalid-programs/r3.c new file mode 100644 index 0000000..6a65024 --- /dev/null +++ b/ckit/regression/invalid-programs/r3.c @@ -0,0 +1,25 @@ +union S { + int i; + int j; +}; + +main () { + + struct S i ; + + switch(i) { + case 1: + case 2: 45; + default: 45;;;; + } + + if(i) { 1; } + + while(i) { 2; } + + for(1; i; 2); + for(i; 1; 2); + + do {3;} while (i); +} + diff --git a/ckit/regression/invalid-programs/r30.c b/ckit/regression/invalid-programs/r30.c new file mode 100644 index 0000000..e5973c2 --- /dev/null +++ b/ckit/regression/invalid-programs/r30.c @@ -0,0 +1,41 @@ +struct X { + struct X x; + int y; +}; + +enum X { + a1, a2 +}; + + +int f(int); + +extern int f(); + +int f (int x) { + return(x); +} + +int tmp =0; + +int f (); + +main() { +/* + struct X { + int xxx; + }; +*/ + + int f, kkkkkkkkkkk; + struct { + int x; + } X; + int tmp; + double y; + int i = (int) y; + + f = f + 1; + +} + diff --git a/ckit/regression/invalid-programs/r31.c b/ckit/regression/invalid-programs/r31.c new file mode 100644 index 0000000..c483db9 --- /dev/null +++ b/ckit/regression/invalid-programs/r31.c @@ -0,0 +1,10 @@ +struct X aa; + +struct X { + struct X x; + int y; +}; + +main() { + return(0); +} diff --git a/ckit/regression/invalid-programs/r32.c b/ckit/regression/invalid-programs/r32.c new file mode 100644 index 0000000..49d9e1f --- /dev/null +++ b/ckit/regression/invalid-programs/r32.c @@ -0,0 +1,7 @@ + const volatile const int y; + const volatile volatile const volatile int z; + const int * const volatile const p; + +main() { + y; +} diff --git a/ckit/regression/invalid-programs/r33.c b/ckit/regression/invalid-programs/r33.c new file mode 100644 index 0000000..6d81e9d --- /dev/null +++ b/ckit/regression/invalid-programs/r33.c @@ -0,0 +1,4 @@ +main () { + const int y = 1; + y=2; +} diff --git a/ckit/regression/invalid-programs/r34.c b/ckit/regression/invalid-programs/r34.c new file mode 100644 index 0000000..a04a6fb --- /dev/null +++ b/ckit/regression/invalid-programs/r34.c @@ -0,0 +1,5 @@ +main () { + int j; + int * const y = &j; + y=&j; +} diff --git a/ckit/regression/invalid-programs/r35.c b/ckit/regression/invalid-programs/r35.c new file mode 100644 index 0000000..216c1ae --- /dev/null +++ b/ckit/regression/invalid-programs/r35.c @@ -0,0 +1,7 @@ +main () { + int j; + const int *y = &j; + y=&j; + *y=3; + +} diff --git a/ckit/regression/invalid-programs/r36.c b/ckit/regression/invalid-programs/r36.c new file mode 100644 index 0000000..3175ced --- /dev/null +++ b/ckit/regression/invalid-programs/r36.c @@ -0,0 +1,11 @@ +struct X { + int f; + const char c; +} x, *xx; + +main () { + const char c = 'c'; + x.c = c; + xx = &x; + xx -> c = c; +} diff --git a/ckit/regression/invalid-programs/r37.c b/ckit/regression/invalid-programs/r37.c new file mode 100644 index 0000000..e77d255 --- /dev/null +++ b/ckit/regression/invalid-programs/r37.c @@ -0,0 +1,9 @@ +main () { + char *p; + const char *q; + char *const r; + + q = p; /* ok */ + p=q; /* bad */ + r = q; /* bad */ +} diff --git a/ckit/regression/invalid-programs/r38.c b/ckit/regression/invalid-programs/r38.c new file mode 100644 index 0000000..8d0b1d0 --- /dev/null +++ b/ckit/regression/invalid-programs/r38.c @@ -0,0 +1,11 @@ +int f(); + +int f(float); + +int f(int x) { + return(x); +} + +main () { + return(0); +} diff --git a/ckit/regression/invalid-programs/r39.c b/ckit/regression/invalid-programs/r39.c new file mode 100644 index 0000000..304919a --- /dev/null +++ b/ckit/regression/invalid-programs/r39.c @@ -0,0 +1,9 @@ +int f(float); + +int f(int x) { + return(x); +} + +main () { + return(0); +} diff --git a/ckit/regression/invalid-programs/r4.c b/ckit/regression/invalid-programs/r4.c new file mode 100644 index 0000000..5e8fdcc --- /dev/null +++ b/ckit/regression/invalid-programs/r4.c @@ -0,0 +1,9 @@ +main () { + + int *i, i2, k[4]; + short *j; + + i = j; + i = i2; + i = k; +} \ No newline at end of file diff --git a/ckit/regression/invalid-programs/r40.c b/ckit/regression/invalid-programs/r40.c new file mode 100644 index 0000000..e7ab2da --- /dev/null +++ b/ckit/regression/invalid-programs/r40.c @@ -0,0 +1,8 @@ +int f(int x) { + return(0); +} + +main () { + int f; + f(3); +} diff --git a/ckit/regression/invalid-programs/r41.c b/ckit/regression/invalid-programs/r41.c new file mode 100644 index 0000000..5fec4de --- /dev/null +++ b/ckit/regression/invalid-programs/r41.c @@ -0,0 +1,17 @@ +int f(); + +int f(int c) { + return(c); +} + + + +main() +{ + char c; + f(c); + +} + + +int f(int,int); diff --git a/ckit/regression/invalid-programs/r42.c b/ckit/regression/invalid-programs/r42.c new file mode 100644 index 0000000..179ace8 --- /dev/null +++ b/ckit/regression/invalid-programs/r42.c @@ -0,0 +1,6 @@ +main () { + int (*x)[2]; + int (*y)[4]; + + x = y; +} diff --git a/ckit/regression/invalid-programs/r43.c b/ckit/regression/invalid-programs/r43.c new file mode 100644 index 0000000..cc0c449 --- /dev/null +++ b/ckit/regression/invalid-programs/r43.c @@ -0,0 +1,14 @@ +enum E1 { + x, y, z +}; + +enum E2 { + a, b +}; + +main () { + enum E1 *x; + enum E2 *y; + + x = y; +} diff --git a/ckit/regression/invalid-programs/r44.c b/ckit/regression/invalid-programs/r44.c new file mode 100644 index 0000000..af8ad13 --- /dev/null +++ b/ckit/regression/invalid-programs/r44.c @@ -0,0 +1,15 @@ +enum E1 { + x, y, z +}; + +enum E2 { + a, b +}; + +int f(enum E1); + +int f(enum E2); + +main () { + f(a); +} diff --git a/ckit/regression/invalid-programs/r45.c b/ckit/regression/invalid-programs/r45.c new file mode 100644 index 0000000..81646b1 --- /dev/null +++ b/ckit/regression/invalid-programs/r45.c @@ -0,0 +1,8 @@ +int f(int (*)[]); + +int f(int (*)[4]); + +main () { + int (*a)[5]; + f(a); +} diff --git a/ckit/regression/invalid-programs/r46.c b/ckit/regression/invalid-programs/r46.c new file mode 100644 index 0000000..da2c59f --- /dev/null +++ b/ckit/regression/invalid-programs/r46.c @@ -0,0 +1,8 @@ +int f(int (*)[4]); + +int f(int (*)[]); + +main () { + int (*a)[5]; + f(a); +} diff --git a/ckit/regression/invalid-programs/r47.c b/ckit/regression/invalid-programs/r47.c new file mode 100644 index 0000000..9a786af --- /dev/null +++ b/ckit/regression/invalid-programs/r47.c @@ -0,0 +1,12 @@ +int f(int (*)[]); + +int f(int (*x)[4]) { + return(0); +} + +main () { + int (*a)[5]; + f(a); +} + + diff --git a/ckit/regression/invalid-programs/r48.c b/ckit/regression/invalid-programs/r48.c new file mode 100644 index 0000000..3a9d2fe --- /dev/null +++ b/ckit/regression/invalid-programs/r48.c @@ -0,0 +1,14 @@ +int (*(f(int)))[]; +int (*(f(int)))[4]; + +int (*(f(int x)))[3] { + int (*a)[3]; + return(a); +} + +main () { + int (*a)[5]; + f(2); +} + + diff --git a/ckit/regression/invalid-programs/r49.c b/ckit/regression/invalid-programs/r49.c new file mode 100644 index 0000000..22a386d --- /dev/null +++ b/ckit/regression/invalid-programs/r49.c @@ -0,0 +1,14 @@ +int (*(f(int)))[]; +int (*(f(int)))[4]; + +int (*(f(int x)))[] { + int (*a)[3]; + return(a); +} + +main () { + int (*a)[5]; + f(2); +} + + diff --git a/ckit/regression/invalid-programs/r5.c b/ckit/regression/invalid-programs/r5.c new file mode 100644 index 0000000..203cfd3 --- /dev/null +++ b/ckit/regression/invalid-programs/r5.c @@ -0,0 +1,5 @@ +main () { + + int j[4]; + int i[4] = j; +} diff --git a/ckit/regression/invalid-programs/r50.c b/ckit/regression/invalid-programs/r50.c new file mode 100644 index 0000000..7b7823a --- /dev/null +++ b/ckit/regression/invalid-programs/r50.c @@ -0,0 +1,11 @@ +int f(int x) { + return(x); +} + +main () { + int (*g)(float x) = f; + + f(1.1); +} + + diff --git a/ckit/regression/invalid-programs/r51.c b/ckit/regression/invalid-programs/r51.c new file mode 100644 index 0000000..61637a1 --- /dev/null +++ b/ckit/regression/invalid-programs/r51.c @@ -0,0 +1,13 @@ +int f(const int x[2]); + +int f(int x[3]) { +/* x = 1; */ + return(x[0]); +} + +main () { + int i[4]; + f(i); +} + + diff --git a/ckit/regression/invalid-programs/r52.c b/ckit/regression/invalid-programs/r52.c new file mode 100644 index 0000000..4312367 --- /dev/null +++ b/ckit/regression/invalid-programs/r52.c @@ -0,0 +1,10 @@ +struct X { + int x; + int y; +} + +main(){ + register struct X p; + p.x = 1; + &p; +} diff --git a/ckit/regression/invalid-programs/r53.c b/ckit/regression/invalid-programs/r53.c new file mode 100644 index 0000000..aa8ee72 --- /dev/null +++ b/ckit/regression/invalid-programs/r53.c @@ -0,0 +1,9 @@ +struct X { + int t(int); + int y; +}; + + +main(){ + return(0); +} diff --git a/ckit/regression/invalid-programs/r54.c b/ckit/regression/invalid-programs/r54.c new file mode 100644 index 0000000..7936070 --- /dev/null +++ b/ckit/regression/invalid-programs/r54.c @@ -0,0 +1,6 @@ +int (f())[]; + + +main(){ + return(0); +} diff --git a/ckit/regression/invalid-programs/r55.c b/ckit/regression/invalid-programs/r55.c new file mode 100644 index 0000000..d6e6229 --- /dev/null +++ b/ckit/regression/invalid-programs/r55.c @@ -0,0 +1,6 @@ +int (f())(); + + +main(){ + return(0); +} diff --git a/ckit/regression/invalid-programs/r56.c b/ckit/regression/invalid-programs/r56.c new file mode 100644 index 0000000..0d32685 --- /dev/null +++ b/ckit/regression/invalid-programs/r56.c @@ -0,0 +1,7 @@ +int f(static int x) { + return(0); +} + +main(){ + return(0); +} diff --git a/ckit/regression/invalid-programs/r57.c b/ckit/regression/invalid-programs/r57.c new file mode 100644 index 0000000..c9ca01d --- /dev/null +++ b/ckit/regression/invalid-programs/r57.c @@ -0,0 +1,7 @@ +register int f(int x) { + return(0); +} + +main(){ + return(0); +} diff --git a/ckit/regression/invalid-programs/r59.c b/ckit/regression/invalid-programs/r59.c new file mode 100644 index 0000000..11157cc --- /dev/null +++ b/ckit/regression/invalid-programs/r59.c @@ -0,0 +1,15 @@ +void main() { + int x; + unsigned const int xx; + char y; + int *p; + +/* ++(*++p); */ +/* *(++p) += 6; */ +/* p++; */ +/* p--; */ +/* *(p--) += 6; */ + + xx = x; + +} diff --git a/ckit/regression/invalid-programs/r6.c b/ckit/regression/invalid-programs/r6.c new file mode 100644 index 0000000..158a892 --- /dev/null +++ b/ckit/regression/invalid-programs/r6.c @@ -0,0 +1,5 @@ +main () { + + const int j; + j = 5; +} diff --git a/ckit/regression/invalid-programs/r60.c b/ckit/regression/invalid-programs/r60.c new file mode 100644 index 0000000..5f91526 --- /dev/null +++ b/ckit/regression/invalid-programs/r60.c @@ -0,0 +1,9 @@ + + struct w { int x,y,z; }; + + void main() { + struct w foo; + float x; + x = foo; + + } diff --git a/ckit/regression/invalid-programs/r61.c b/ckit/regression/invalid-programs/r61.c new file mode 100644 index 0000000..11157cc --- /dev/null +++ b/ckit/regression/invalid-programs/r61.c @@ -0,0 +1,15 @@ +void main() { + int x; + unsigned const int xx; + char y; + int *p; + +/* ++(*++p); */ +/* *(++p) += 6; */ +/* p++; */ +/* p--; */ +/* *(p--) += 6; */ + + xx = x; + +} diff --git a/ckit/regression/invalid-programs/r62.c b/ckit/regression/invalid-programs/r62.c new file mode 100644 index 0000000..dc6e8b8 --- /dev/null +++ b/ckit/regression/invalid-programs/r62.c @@ -0,0 +1,8 @@ +main () { + return(0); +} + +f(int x, int x) { + x = x; +} + diff --git a/ckit/regression/invalid-programs/r63.c b/ckit/regression/invalid-programs/r63.c new file mode 100644 index 0000000..23fd437 --- /dev/null +++ b/ckit/regression/invalid-programs/r63.c @@ -0,0 +1,8 @@ +/* null function pointers */ + +void f(int x, void y); + +void main(){ + f(0); +} + diff --git a/ckit/regression/invalid-programs/r64.c b/ckit/regression/invalid-programs/r64.c new file mode 100644 index 0000000..a6bee96 --- /dev/null +++ b/ckit/regression/invalid-programs/r64.c @@ -0,0 +1,14 @@ + + struct foo_t{ + int x; + int y; + }; + + void f(int x0, int y0){ + struct foo_t myfoo = {x0,y0}; + } + + void main(){ + f(0,0); + } + diff --git a/ckit/regression/invalid-programs/r65.c b/ckit/regression/invalid-programs/r65.c new file mode 100644 index 0000000..242a7a0 --- /dev/null +++ b/ckit/regression/invalid-programs/r65.c @@ -0,0 +1,13 @@ +main () { + return 1; +} + +int foo () { + extern int a[10]; + return 1; +} + +int bar () { + extern int a[20]; + return 1; +} diff --git a/ckit/regression/invalid-programs/r66.c b/ckit/regression/invalid-programs/r66.c new file mode 100644 index 0000000..0cbe82e --- /dev/null +++ b/ckit/regression/invalid-programs/r66.c @@ -0,0 +1,3 @@ +main() { + x->y; +} diff --git a/ckit/regression/invalid-programs/r7.c b/ckit/regression/invalid-programs/r7.c new file mode 100644 index 0000000..3618b08 --- /dev/null +++ b/ckit/regression/invalid-programs/r7.c @@ -0,0 +1,9 @@ +f() { + int i = 1; +} + +main () { + int j; + float d; + j = sizeof(5) & d; +} diff --git a/ckit/regression/invalid-programs/r8.c b/ckit/regression/invalid-programs/r8.c new file mode 100644 index 0000000..3c3f70b --- /dev/null +++ b/ckit/regression/invalid-programs/r8.c @@ -0,0 +1,8 @@ +f() { + int i = 1; +} + +main () { + void *j; + j = 5+sizeof; +} diff --git a/ckit/regression/invalid-programs/r9.c b/ckit/regression/invalid-programs/r9.c new file mode 100644 index 0000000..ed917be --- /dev/null +++ b/ckit/regression/invalid-programs/r9.c @@ -0,0 +1,10 @@ +f() { + int i = 1; +} + +main () { + float j, k; + int i; + + i = j & k; +} diff --git a/ckit/regression/invalid-programs/t15.c b/ckit/regression/invalid-programs/t15.c new file mode 100644 index 0000000..e704b89 --- /dev/null +++ b/ckit/regression/invalid-programs/t15.c @@ -0,0 +1,13 @@ +junk junk junk + +void main () +{ + + while (1) { + struct {int x; int y;} point; + } + + return; +} + + diff --git a/ckit/regression/invalid-programs/t2.c b/ckit/regression/invalid-programs/t2.c new file mode 100644 index 0000000..c7cf349 --- /dev/null +++ b/ckit/regression/invalid-programs/t2.c @@ -0,0 +1,19 @@ +extern int foo (int); + +main () +{ + int i; + printf ("this is the end, my only friend the end"); + + default: printf ("bar\n"); + + switch (i) + default: + if (foo (i)) + case 2: case 3: case 5: case 7: + foo (i); + else + case 4: case 6: case 8: case 9: case 10: + printf ("1"); +} + diff --git a/ckit/regression/output/a1.c b/ckit/regression/output/a1.c new file mode 100644 index 0000000..58a6efb --- /dev/null +++ b/ckit/regression/output/a1.c @@ -0,0 +1,65 @@ + +enum S_t1 { + x1=0, + x2=0 +}; +int main () +{ + struct S_t2 { + int abdd; + }; + struct S_t2 j_p7; + int i_p8; + i_p8 = x1; + switch (i_p8) + { + + + case 1: + + case 2: + 45; + + default: + 45; + } + if (i_p8) + { + + 1; + } + goto whileCont_p11; + +whileTop_p10: + ; + 2; + +whileCont_p11: + ; + if (i_p8) + goto whileTop_p10; + goto forStart_p14; + +forTop_p13: + ; + +forStart_p14: + ; + if (i_p8) + goto forTop_p13; + goto forStart_p18; + +forTop_p17: + ; + +forStart_p18: + ; + if (1) + goto forTop_p17; + +doTop_p21: + ; + 3; + if (i_p8) + goto doTop_p21; +} diff --git a/ckit/regression/output/a10.c b/ckit/regression/output/a10.c new file mode 100644 index 0000000..fe15845 --- /dev/null +++ b/ckit/regression/output/a10.c @@ -0,0 +1,17 @@ + +enum X_t3 { + x1=0, + x2=0, + x3=0 +}; +enum Y_t4 { + y1=0, + y2=0, + y3=0 +}; +int main () +{ + enum X_t3 i_p33; + enum Y_t4 k_p34; + k_p34 = ((enum Y_t4) i_p33); +} diff --git a/ckit/regression/output/a100.c b/ckit/regression/output/a100.c new file mode 100644 index 0000000..e7019fa --- /dev/null +++ b/ckit/regression/output/a100.c @@ -0,0 +1,7 @@ + +extern int timer_create (struct t5 *); +int main () +{ + + return 3; +} diff --git a/ckit/regression/output/a101.c b/ckit/regression/output/a101.c new file mode 100644 index 0000000..55ad934 --- /dev/null +++ b/ckit/regression/output/a101.c @@ -0,0 +1,11 @@ + +enum t22_t6 { + p45_B_FALSE=0, + p46_B_TRUE=1 +}; +typedef enum t22_t6 boolean_t_t23_t7; +int main () +{ + boolean_t_t23_t7 j_p43; + return 3; +} diff --git a/ckit/regression/output/a102.c b/ckit/regression/output/a102.c new file mode 100644 index 0000000..8fce251 --- /dev/null +++ b/ckit/regression/output/a102.c @@ -0,0 +1,7 @@ + +int a[4]={1,2,3,4}; +int main () +{ + + return a[2]; +} diff --git a/ckit/regression/output/a103.c b/ckit/regression/output/a103.c new file mode 100644 index 0000000..9969357 --- /dev/null +++ b/ckit/regression/output/a103.c @@ -0,0 +1,16 @@ + +struct foo_t8 { + int a[10]; + int p; +}; +int b[100]; +int f (int a_p51[100]) +{ + + return a_p51[0]; +} +int main () +{ + struct foo_t8 j_p53; + return j_p53.p; +} diff --git a/ckit/regression/output/a104.c b/ckit/regression/output/a104.c new file mode 100644 index 0000000..0dd8948 --- /dev/null +++ b/ckit/regression/output/a104.c @@ -0,0 +1,15 @@ + +int a[4]; +int f (int x_p56,int y_p57,int z_p58) +{ + + return (x_p56+y_p57)+z_p58; +} +int main () +{ + int s_p60; + s_p60 = 3; + a[3] = 5; + a[3]; + return f (a[2],a[0],a[1]); +} diff --git a/ckit/regression/output/a105.c b/ckit/regression/output/a105.c new file mode 100644 index 0000000..c3335dc --- /dev/null +++ b/ckit/regression/output/a105.c @@ -0,0 +1,12 @@ + +typedef int (*Rsdefkey_f_t9) (int *,char *,int); +typedef char mystring_t10[10]; +int main () +{ + int j_p64; + mystring_t10 s_p65; + s_p65[0] = ((char) 97); + s_p65[9] = ((char) 106); + s_p65[10] = ((char) 107); + return j_p64; +} diff --git a/ckit/regression/output/a106.c b/ckit/regression/output/a106.c new file mode 100644 index 0000000..70331db --- /dev/null +++ b/ckit/regression/output/a106.c @@ -0,0 +1,10 @@ + +struct foo_t11 { + int a[10]; + int p:4; +}; +int main () +{ + struct foo_t11 j_p70; + return j_p70.p; +} diff --git a/ckit/regression/output/a107.c b/ckit/regression/output/a107.c new file mode 100644 index 0000000..53f8504 --- /dev/null +++ b/ckit/regression/output/a107.c @@ -0,0 +1,10 @@ + +int t (int *,int ); +int main () +{ + int j_p73[10]; + int i_p74; + int *volatile *p_p75; + i_p74 = 4; + return i_p74; +} diff --git a/ckit/regression/output/a108.c b/ckit/regression/output/a108.c new file mode 100644 index 0000000..5fa550a --- /dev/null +++ b/ckit/regression/output/a108.c @@ -0,0 +1,15 @@ + +int main () +{ + int j_p77; + int i_p78; + int quesCol_p80; + i_p78 = 3; + if (i_p78) + quesCol_p80 = 4; + else + quesCol_p80 = 5; + j_p77 = quesCol_p80; + printf ("i=%d, j=%d\n",i_p78,j_p77); + return j_p77; +} diff --git a/ckit/regression/output/a109.c b/ckit/regression/output/a109.c new file mode 100644 index 0000000..3d0e47b --- /dev/null +++ b/ckit/regression/output/a109.c @@ -0,0 +1,7 @@ + +struct t12 *x; +int main () +{ + + +} diff --git a/ckit/regression/output/a11.c b/ckit/regression/output/a11.c new file mode 100644 index 0000000..8631f7b --- /dev/null +++ b/ckit/regression/output/a11.c @@ -0,0 +1,10 @@ + +int main () +{ + int *i_p85; + int *j_p86; + i_p85!=0; + i_p85>j_p86; + i_p85>=j_p86; + i_p85<(j_p86+(1*4)); +} diff --git a/ckit/regression/output/a110.c b/ckit/regression/output/a110.c new file mode 100644 index 0000000..5f27721 --- /dev/null +++ b/ckit/regression/output/a110.c @@ -0,0 +1,17 @@ + +extern int printf (); +extern int i; +int foo () +{ + + return i; +} +int i=10; +int main () +{ + int call_p92; + call_p92 = foo (); + printf ("foo = %d\n",call_p92); + printf ("i = %d\n",i); + return i; +} diff --git a/ckit/regression/output/a111.c b/ckit/regression/output/a111.c new file mode 100644 index 0000000..36bb1cf --- /dev/null +++ b/ckit/regression/output/a111.c @@ -0,0 +1,17 @@ + +extern int printf (); +extern int i; +int foo () +{ + + return i; +} +int i=10; +int main () +{ + int call_p98; + call_p98 = foo (); + printf ("foo = %d\n",call_p98); + printf ("i = %d\n",i); + return i; +} diff --git a/ckit/regression/output/a112.c b/ckit/regression/output/a112.c new file mode 100644 index 0000000..1566712 --- /dev/null +++ b/ckit/regression/output/a112.c @@ -0,0 +1,9 @@ + +extern int i; +int i=5; +static int j; +int main () +{ + + return i; +} diff --git a/ckit/regression/output/a113.c b/ckit/regression/output/a113.c new file mode 100644 index 0000000..1fcfaaf --- /dev/null +++ b/ckit/regression/output/a113.c @@ -0,0 +1,9 @@ + +int main () +{ + char *cp_p104; + if (cp_p104==0) + return 1; + else + return 2; +} diff --git a/ckit/regression/output/a114.c b/ckit/regression/output/a114.c new file mode 100644 index 0000000..8028929 --- /dev/null +++ b/ckit/regression/output/a114.c @@ -0,0 +1,8 @@ + +int i=5; +static int j; +int main () +{ + + return i; +} diff --git a/ckit/regression/output/a115.c b/ckit/regression/output/a115.c new file mode 100644 index 0000000..fc47ae7 --- /dev/null +++ b/ckit/regression/output/a115.c @@ -0,0 +1,8 @@ + +extern int i; +static int j; +int main () +{ + + return j; +} diff --git a/ckit/regression/output/a116.c b/ckit/regression/output/a116.c new file mode 100644 index 0000000..0d354a9 --- /dev/null +++ b/ckit/regression/output/a116.c @@ -0,0 +1,11 @@ + +void foo (char **arg_p112) +{ + + +} +int main () +{ + + return 0; +} diff --git a/ckit/regression/output/a117.c b/ckit/regression/output/a117.c new file mode 100644 index 0000000..f8082be --- /dev/null +++ b/ckit/regression/output/a117.c @@ -0,0 +1,20 @@ + +extern int getopt (); +int main (int argc_p116,int argv_p117) +{ + int i_p118; + int call_p122; + goto whileCont_p120; + +whileTop_p119: + ; + goto whileCont_p120; + +whileCont_p120: + ; + call_p122 = getopt (argc_p116,argv_p117,"c:a:f:F:"); + i_p118 = call_p122; + if (i_p118!=(-1)) + goto whileTop_p119; + return 0; +} diff --git a/ckit/regression/output/a118.c b/ckit/regression/output/a118.c new file mode 100644 index 0000000..49b3833 --- /dev/null +++ b/ckit/regression/output/a118.c @@ -0,0 +1,10 @@ + +extern struct t13 *fp; +struct bar_t14 { + struct t13 *l; +}; +int main () +{ + int i_p128; + return 0; +} diff --git a/ckit/regression/output/a12.c b/ckit/regression/output/a12.c new file mode 100644 index 0000000..c81e322 --- /dev/null +++ b/ckit/regression/output/a12.c @@ -0,0 +1,7 @@ + +int main () +{ + void *i_p130; + int *j_p131; + j_p131==i_p130; +} diff --git a/ckit/regression/output/a13.c b/ckit/regression/output/a13.c new file mode 100644 index 0000000..e73df33 --- /dev/null +++ b/ckit/regression/output/a13.c @@ -0,0 +1,26 @@ + +enum X_t15 { + x1=0, + x2=0, + x3=0 +}; +enum Y_t16 { + y1=0, + y2=0, + y3=0 +}; +int main () +{ + enum X_t15 i_p141; + enum Y_t16 k_p142; + int tmp_p143; + int *tmp2_p144; + tmp_p143 = (k_p142%i_p141); + tmp_p143 = (k_p142^i_p141); + tmp_p143 = (k_p142|i_p141); + tmp_p143 = (k_p142&i_p141); + tmp_p143 = (k_p142>>i_p141); + tmp_p143 = (k_p142<>k_p155))||(k_p155<=(z_p197+(1*12)); +} diff --git a/ckit/regression/output/a200.c b/ckit/regression/output/a200.c new file mode 100644 index 0000000..b413cbf --- /dev/null +++ b/ckit/regression/output/a200.c @@ -0,0 +1,92 @@ + +struct foo_t23 { + int x; + float y; +}; +union bar_t24 { + int x; + float y; +}; +int main () +{ + static char *s0_p206="this string"; + static char s1_p207[19]={78,111,116,32,110,117,108,108,32,116,101,114,109,105,110,97,116,101,100}; + static char s2_p208[16]={78,117,108,108,32,116,101,114,109,105,110,97,116,101,100,0}; + static char s3_p209[16]={78,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; + char *s4_p210; + char s5_p211[19]; + char s6_p212[16]; + static int a1_p213[5]={0,1,2,3,4}; + static int a2_p214[5]; + int a3_p215[5]; + int a4_p216[5]; + static struct foo_t23 st1_p217={1,2.0}; + static struct foo_t23 st2_p218; + struct foo_t23 st3_p219; + static union bar_t24 u1_p220; + static union bar_t24 u2_p221={3}; + union bar_t24 u3_p222; + union bar_t24 u4_p223; + static struct foo_t23 ast1_p224[3]={{1,1.0},{2,2.0},{3,3.0}}; + static struct foo_t23 ast2_p225[3]={{1,1.0},{2,2.0},{3,3.0}}; + static struct foo_t23 ast3_p226[3]={{1,1.0},{2,2.0},{3,3.0}}; + static struct foo_t23 ast4_p227[3]={{1,1.0},{2,2.0},{3,0}}; + static struct foo_t23 ast5_p228[3]={{1,1.0},{2,3},{0,0}}; + char z_p229[2][3]; + s4_p210 = "this string"; + s5_p211[0] = 78; + s5_p211[1] = 111; + s5_p211[2] = 116; + s5_p211[3] = 32; + s5_p211[4] = 110; + s5_p211[5] = 117; + s5_p211[6] = 108; + s5_p211[7] = 108; + s5_p211[8] = 32; + s5_p211[9] = 116; + s5_p211[10] = 101; + s5_p211[11] = 114; + s5_p211[12] = 109; + s5_p211[13] = 105; + s5_p211[14] = 110; + s5_p211[15] = 97; + s5_p211[16] = 116; + s5_p211[17] = 101; + s5_p211[18] = 100; + s6_p212[0] = 78; + s6_p212[1] = 117; + s6_p212[2] = 108; + s6_p212[3] = 108; + s6_p212[4] = 32; + s6_p212[5] = 116; + s6_p212[6] = 101; + s6_p212[7] = 114; + s6_p212[8] = 109; + s6_p212[9] = 105; + s6_p212[10] = 110; + s6_p212[11] = 97; + s6_p212[12] = 116; + s6_p212[13] = 101; + s6_p212[14] = 100; + s6_p212[15] = 0; + a3_p215[0] = 0; + a3_p215[1] = 1; + a3_p215[2] = 2; + a3_p215[3] = 3; + a3_p215[4] = 4; + a4_p216[0] = 0; + a4_p216[1] = 1; + a4_p216[2] = 2; + a4_p216[3] = 0; + a4_p216[4] = 0; + st3_p219.x = 3; + st3_p219.y = 4.0; + u4_p223.x = 4; + z_p229[0][0] = 97; + z_p229[0][1] = 98; + z_p229[0][2] = 99; + z_p229[1][0] = 100; + z_p229[1][1] = 101; + z_p229[1][2] = 102; + return 0; +} diff --git a/ckit/regression/output/a201.c b/ckit/regression/output/a201.c new file mode 100644 index 0000000..8bb27d2 --- /dev/null +++ b/ckit/regression/output/a201.c @@ -0,0 +1,20 @@ + +struct foo_t25 { + int x; + float y; +}; +union bar_t26 { + int x; + float y; +}; +int main () +{ + char z_p237[2][3]; + z_p237[0][0] = 97; + z_p237[0][1] = 98; + z_p237[0][2] = 99; + z_p237[1][0] = 100; + z_p237[1][1] = 101; + z_p237[1][2] = 102; + return 0; +} diff --git a/ckit/regression/output/a202.c b/ckit/regression/output/a202.c new file mode 100644 index 0000000..d99fec2 --- /dev/null +++ b/ckit/regression/output/a202.c @@ -0,0 +1,17 @@ + +struct foo_t27 { + char x; + int y; +}; +struct foo_t27 a; +struct foo_t27 b; +int main () +{ + int *p_p244; + p_p244 = ((int *) (&a)); + *p_p244 = (-1); + b = a; + p_p244 = ((int *) (&b)); + printf ("*p=%x\n",*p_p244); + return 0; +} diff --git a/ckit/regression/output/a203.c b/ckit/regression/output/a203.c new file mode 100644 index 0000000..11e9f84 --- /dev/null +++ b/ckit/regression/output/a203.c @@ -0,0 +1,21 @@ + +struct foo_t28 { + char x; + int y; +}; +struct foo_t28 a; +struct foo_t28 b; +int main () +{ + char x_p252; + int y_p253; + x_p252 = ((char) 255); + x_p252 = ((char) (x_p252+1)); + y_p253 = ((int) x_p252); + printf ("y=%d\n",y_p253); + x_p252 = ((char) 255); + x_p252 = ((char) (x_p252+1)); + y_p253 = ((int) x_p252); + printf ("y=%d\n",y_p253); + return 0; +} diff --git a/ckit/regression/output/a204.c b/ckit/regression/output/a204.c new file mode 100644 index 0000000..4952d6f --- /dev/null +++ b/ckit/regression/output/a204.c @@ -0,0 +1,41 @@ + +struct foo_t29 { + int a[3]; + int b; +}; +struct foo_t29 w[2]={{{1,0,0},0},{{2,0,0},0}}; +struct foo_t29 z[9]={{{2,3,4},0},{{5,0,0},0},{{1,42,44},13},{{1,2,2},2},{{3,0,0},0},{{4,0,0},0},{{0,0,0},0},{{0,0,0},0},{{0,0,0},0}}; +int main () +{ + int i_p261; + int j_p262; + int post_p268; + int post_p273; + i_p261 = 0; + goto forStart_p265; + +forTop_p264: + ; + printf ("a="); + j_p262 = 0; + goto forStart_p270; + +forTop_p269: + ; + printf ("%10d ",z[i_p261].a[j_p262]); + post_p273 = j_p262; + j_p262 = (j_p262+1); + +forStart_p270: + ; + if (j_p262<3) + goto forTop_p269; + printf ("b=%d\n",z[i_p261].b); + post_p268 = i_p261; + i_p261 = (i_p261+1); + +forStart_p265: + ; + if (i_p261<(144/16)) + goto forTop_p264; +} diff --git a/ckit/regression/output/a205.c b/ckit/regression/output/a205.c new file mode 100644 index 0000000..c7e5608 --- /dev/null +++ b/ckit/regression/output/a205.c @@ -0,0 +1,10 @@ + +int main () +{ + int *p_p275; + int *post_p276; + post_p276 = p_p275; + p_p275 = ((int *) (((int) p_p275)+4)); + post_p276; + p_p275 = (p_p275+3); +} diff --git a/ckit/regression/output/a206.c b/ckit/regression/output/a206.c new file mode 100644 index 0000000..ff23048 --- /dev/null +++ b/ckit/regression/output/a206.c @@ -0,0 +1,17 @@ + +int main () +{ + int x_p278; + int pref_p281; + int y_p279; + int pref_p282; + int volatile j_p280; + x_p278 = 1; + x_p278 = (x_p278+1); + pref_p281 = x_p278; + y_p279 = pref_p281; + x_p278 = (x_p278+1); + pref_p282 = x_p278; + j_p280 = pref_p282; + return 0; +} diff --git a/ckit/regression/output/a207.c b/ckit/regression/output/a207.c new file mode 100644 index 0000000..ce13acd --- /dev/null +++ b/ckit/regression/output/a207.c @@ -0,0 +1,16 @@ + +int main () +{ + int x_p284; + int y_p285[3]; + int pref_p287; + int volatile j_p286; + x_p284 = 1; + y_p285[0] = 2; + y_p285[1] = 1; + y_p285[2] = 3; + x_p284 = (x_p284+1); + pref_p287 = x_p284; + j_p286 = pref_p287; + return x_p284+j_p286; +} diff --git a/ckit/regression/output/a208.c b/ckit/regression/output/a208.c new file mode 100644 index 0000000..498dd35 --- /dev/null +++ b/ckit/regression/output/a208.c @@ -0,0 +1,9 @@ + +struct qqux_t30 { + int x; +}; +int main () +{ + struct qqux_t30 x_p291; + +} diff --git a/ckit/regression/output/a209.c b/ckit/regression/output/a209.c new file mode 100644 index 0000000..5d5d23a --- /dev/null +++ b/ckit/regression/output/a209.c @@ -0,0 +1,9 @@ + +struct qqux_t31 { + int x; +}; +int main () +{ + unsigned int x_p298; + +} diff --git a/ckit/regression/output/a21.c b/ckit/regression/output/a21.c new file mode 100644 index 0000000..eb2a47e --- /dev/null +++ b/ckit/regression/output/a21.c @@ -0,0 +1,18 @@ + +struct X_t32 { + int x1; + int x2; + int x3; +}; +enum Y_t33 { + x1=0, + x2=0, + x3=0 +}; +int main () +{ + struct X_t32 y_p308; + struct X_t32 z_p309; + float x2_p310; + y_p308 = z_p309; +} diff --git a/ckit/regression/output/a210.c b/ckit/regression/output/a210.c new file mode 100644 index 0000000..ab5a996 --- /dev/null +++ b/ckit/regression/output/a210.c @@ -0,0 +1,7 @@ + +int main () +{ + int *x_p312; + int y_p313; + y_p313 = 3; +} diff --git a/ckit/regression/output/a22.c b/ckit/regression/output/a22.c new file mode 100644 index 0000000..9d2eead --- /dev/null +++ b/ckit/regression/output/a22.c @@ -0,0 +1,22 @@ + +struct X_t34 { + int x1; + int x2; + int x3; +}; +enum Y_t35 { + x1=0, + x2=0, + x3=0 +}; +int main () +{ + struct X_t34 y_p323; + struct X_t34 z_p324; + void *p_p325; + float x2_p326; + char *i_p327; + p_p325 = ((void *) (&x2_p326)); + p_p325 = ((void *) (&y_p323)); + i_p327 = ((char *) p_p325); +} diff --git a/ckit/regression/output/a23.c b/ckit/regression/output/a23.c new file mode 100644 index 0000000..fbd99cf --- /dev/null +++ b/ckit/regression/output/a23.c @@ -0,0 +1,8 @@ + +int main () +{ + struct X_t37 { + int y; + }; + return 1; +} diff --git a/ckit/regression/output/a24.c b/ckit/regression/output/a24.c new file mode 100644 index 0000000..894eb9e --- /dev/null +++ b/ckit/regression/output/a24.c @@ -0,0 +1,20 @@ + +struct X_t38 { + int x1; + int x2; + int x3; +}; +enum Y_t39 { + x1=0, + x2=0, + x3=0 +}; +int main () +{ + struct X_t40 { + int x2; + char x3; + }; + struct X_t40 pp_p344; + pp_p344.x2 = ((int) x1); +} diff --git a/ckit/regression/output/a25.c b/ckit/regression/output/a25.c new file mode 100644 index 0000000..4c60f0b --- /dev/null +++ b/ckit/regression/output/a25.c @@ -0,0 +1,17 @@ + +struct t_t41 { + struct s_t42 *x; +}; +struct s_t42 { + struct t_t41 *x; +}; +int main () +{ + struct s_t44 { + struct t_t43 *y; + }; + struct t_t43 { + struct s_t44 *y; + }; + return 1; +} diff --git a/ckit/regression/output/a26.c b/ckit/regression/output/a26.c new file mode 100644 index 0000000..0880878 --- /dev/null +++ b/ckit/regression/output/a26.c @@ -0,0 +1,16 @@ + +struct t_t46 { + enum e_t45 *x; +}; +enum e_t45 { + g1=0, + g2=0 +}; +struct s_t47 { + enum e_t45 *x; +}; +int main () +{ + + return 1; +} diff --git a/ckit/regression/output/a27.c b/ckit/regression/output/a27.c new file mode 100644 index 0000000..069ca2d --- /dev/null +++ b/ckit/regression/output/a27.c @@ -0,0 +1,10 @@ + +void *y; +void *x; +void *y=&x; +void *x=&y; +int main () +{ + + 1; +} diff --git a/ckit/regression/output/a28.c b/ckit/regression/output/a28.c new file mode 100644 index 0000000..9a7ca61 --- /dev/null +++ b/ckit/regression/output/a28.c @@ -0,0 +1,7 @@ + +int main () +{ + int y_p368; + y_p368 = 1; + y_p368; +} diff --git a/ckit/regression/output/a29.c b/ckit/regression/output/a29.c new file mode 100644 index 0000000..cb4136c --- /dev/null +++ b/ckit/regression/output/a29.c @@ -0,0 +1,16 @@ + +enum A_t48 { + x1=0, + x2=0 +}; +enum B_t49 { + y1=0, + y2=0 +}; +int i; +int i; +int main () +{ + + return 0; +} diff --git a/ckit/regression/output/a3.c b/ckit/regression/output/a3.c new file mode 100644 index 0000000..2ede53b --- /dev/null +++ b/ckit/regression/output/a3.c @@ -0,0 +1,6 @@ + +int main () +{ + int j_p379; + j_p379 = ((int) 4); +} diff --git a/ckit/regression/output/a30.c b/ckit/regression/output/a30.c new file mode 100644 index 0000000..19e49aa --- /dev/null +++ b/ckit/regression/output/a30.c @@ -0,0 +1,11 @@ + +void * f (int x_p381) +{ + + return (void *) (((int) f)+x_p381); +} +int main () +{ + + return 0; +} diff --git a/ckit/regression/output/a31.c b/ckit/regression/output/a31.c new file mode 100644 index 0000000..cb84f3d --- /dev/null +++ b/ckit/regression/output/a31.c @@ -0,0 +1,11 @@ + +void * f (int x_p384) +{ + + return (void *) (((int) f)+x_p384); +} +int main () +{ + + f (3); +} diff --git a/ckit/regression/output/a32.c b/ckit/regression/output/a32.c new file mode 100644 index 0000000..e9912de --- /dev/null +++ b/ckit/regression/output/a32.c @@ -0,0 +1,12 @@ + +int f (); +int f (int c_p388) +{ + + return c_p388; +} +int main () +{ + char c_p390; + f ((int) c_p390); +} diff --git a/ckit/regression/output/a33.c b/ckit/regression/output/a33.c new file mode 100644 index 0000000..b5c2800 --- /dev/null +++ b/ckit/regression/output/a33.c @@ -0,0 +1,12 @@ + +int f (int (*)[]); +int main () +{ + int (*a_p393)[5]; + f ((int (*)[]) a_p393); +} +int f (int (*a_p395)[4]) +{ + + return a_p395[0][3]; +} diff --git a/ckit/regression/output/a34.c b/ckit/regression/output/a34.c new file mode 100644 index 0000000..bfa93b2 --- /dev/null +++ b/ckit/regression/output/a34.c @@ -0,0 +1,16 @@ + +static int g () +{ + + return 3; +} +static int f (int h_p398 ()) +{ + + return h_p398 (); +} +int main () +{ + + return f (g); +} diff --git a/ckit/regression/output/a35.c b/ckit/regression/output/a35.c new file mode 100644 index 0000000..cbae7ca --- /dev/null +++ b/ckit/regression/output/a35.c @@ -0,0 +1,18 @@ + +typedef unsigned short USHORT_t50; +typedef char UCHAR_t51; +struct t52 { + USHORT_t50 size; + UCHAR_t51 type; + UCHAR_t51 class; + long retran:8; + long to_esid:24; + long fill:8; + long from_esid:24; +}; +typedef struct t52 MGIHDR_t53; +int main () +{ + int i_p411[12]; + +} diff --git a/ckit/regression/output/a36.c b/ckit/regression/output/a36.c new file mode 100644 index 0000000..b9a4f7a --- /dev/null +++ b/ckit/regression/output/a36.c @@ -0,0 +1,17 @@ + +typedef unsigned long ULONG_t54; +struct cpMSCID_t55 { + ULONG_t54 sid:16; + ULONG_t54 swno:8; + ULONG_t54 fill:8; +}; +typedef struct cpMSCID_t55 CP_MSCID_TYPE_t56; +struct A_t57 { + CP_MSCID_TYPE_t56 t; +}; +int b[4]; +int main () +{ + int i_p422; + +} diff --git a/ckit/regression/output/a37.c b/ckit/regression/output/a37.c new file mode 100644 index 0000000..0240694 --- /dev/null +++ b/ckit/regression/output/a37.c @@ -0,0 +1,12 @@ + +void f (void); +int main () +{ + + f (); +} +void f () +{ + + +} diff --git a/ckit/regression/output/a38.c b/ckit/regression/output/a38.c new file mode 100644 index 0000000..88ffcf7 --- /dev/null +++ b/ckit/regression/output/a38.c @@ -0,0 +1,12 @@ + +int main () +{ + + goto forStart_p425; + +forTop_p424: + ; + +forStart_p425: + ; +} diff --git a/ckit/regression/output/a39.c b/ckit/regression/output/a39.c new file mode 100644 index 0000000..7ca8238 --- /dev/null +++ b/ckit/regression/output/a39.c @@ -0,0 +1,15 @@ + +struct t248 { + int count[3]; +}; +struct t248 *p; +int main () +{ + int i_p1427; + int post_p1428; + i_p1427 = 0; + post_p1428 = ((*p).count)[i_p1427]; + (*p).count[i_p1427] = (((*p).count)[i_p1427]+1); + post_p1428; + (*p).count[i_p1427] = (((*p).count)[i_p1427]+1); +} diff --git a/ckit/regression/output/a4.c b/ckit/regression/output/a4.c new file mode 100644 index 0000000..b25af83 --- /dev/null +++ b/ckit/regression/output/a4.c @@ -0,0 +1,14 @@ + +int * f () +{ + int i_p429; + i_p429 = 1; + return &i_p429; +} +int main () +{ + void *j_p431; + int *call_p432; + call_p432 = f (); + j_p431 = ((void *) call_p432); +} diff --git a/ckit/regression/output/a40.c b/ckit/regression/output/a40.c new file mode 100644 index 0000000..d033498 --- /dev/null +++ b/ckit/regression/output/a40.c @@ -0,0 +1,9 @@ + +float f=5.6; +float f1; +int main () +{ + + f1 = (f*f); + return 0; +} diff --git a/ckit/regression/output/a43.c b/ckit/regression/output/a43.c new file mode 100644 index 0000000..9547611 --- /dev/null +++ b/ckit/regression/output/a43.c @@ -0,0 +1,7 @@ + +int main () +{ + int *i_p1786; + int *j_p1787; + i_p1786 = (j_p1787+(1*4)); +} diff --git a/ckit/regression/output/a44.c b/ckit/regression/output/a44.c new file mode 100644 index 0000000..fb2189e --- /dev/null +++ b/ckit/regression/output/a44.c @@ -0,0 +1,11 @@ + +void f () +{ + + +} +int main () +{ + + f (); +} diff --git a/ckit/regression/output/a5.c b/ckit/regression/output/a5.c new file mode 100644 index 0000000..6087fe7 --- /dev/null +++ b/ckit/regression/output/a5.c @@ -0,0 +1,11 @@ + +int f () +{ + int i_p434; + i_p434 = 1; +} +int main () +{ + int i_p436; + i_p436 = (f&&i_p436); +} diff --git a/ckit/regression/output/a50.c b/ckit/regression/output/a50.c new file mode 100644 index 0000000..d033498 --- /dev/null +++ b/ckit/regression/output/a50.c @@ -0,0 +1,9 @@ + +float f=5.6; +float f1; +int main () +{ + + f1 = (f*f); + return 0; +} diff --git a/ckit/regression/output/a6.c b/ckit/regression/output/a6.c new file mode 100644 index 0000000..73d48ef --- /dev/null +++ b/ckit/regression/output/a6.c @@ -0,0 +1,8 @@ + +int main () +{ + int *i_p438; + int *j_p439; + int k_p440; + k_p440 = (i_p438>x1); +} diff --git a/ckit/regression/output/a8.c b/ckit/regression/output/a8.c new file mode 100644 index 0000000..ce7604e --- /dev/null +++ b/ckit/regression/output/a8.c @@ -0,0 +1,11 @@ + +enum e_t59 { + x1=0, + x2=0, + x3=0 +}; +enum e_t59 main () +{ + enum e_t59 k_p454; + k_p454 = ((enum e_t59) 45); +} diff --git a/ckit/regression/output/a9.c b/ckit/regression/output/a9.c new file mode 100644 index 0000000..cafda22 --- /dev/null +++ b/ckit/regression/output/a9.c @@ -0,0 +1,12 @@ + +enum X_t60 { + x1=0, + x2=0, + x3=0 +}; +enum X_t60 main () +{ + enum X_t60 i_p460; + int k_p461; + k_p461 = (-((int) i_p460)); +} diff --git a/ckit/regression/output/b2.c b/ckit/regression/output/b2.c new file mode 100644 index 0000000..ba75236 --- /dev/null +++ b/ckit/regression/output/b2.c @@ -0,0 +1,38 @@ + +struct foo_t61 { + int m; +}; +int i; +struct foo_t61 j; +int * f () +{ + + return &i; +} +struct foo_t61 * g () +{ + + return &j; +} +int main () +{ + struct foo_t61 *p_p469; + struct foo_t61 pp_p470; + int *call_p471; + struct foo_t61 *post_p472; + struct foo_t61 *pref_p473; + struct foo_t61 *call_p474; + call_p471 = f (); + *call_p471 = ((*call_p471)+1); + (*p_p469).m = (((*p_p469).m)+1); + post_p472 = p_p469; + p_p469 = ((struct foo_t61 *) (((int) p_p469)+4)); + (*post_p472).m = (((*post_p472).m)+1); + p_p469 = ((struct foo_t61 *) (((int) p_p469)+4)); + pref_p473 = p_p469; + (*pref_p473).m = (((*pref_p473).m)+1); + (*p_p469).m = (((*p_p469).m)+1); + pp_p470.m = ((pp_p470.m)+1); + call_p474 = g (); + (*call_p474).m = (((*call_p474).m)+1); +} diff --git a/ckit/regression/output/b3.c b/ckit/regression/output/b3.c new file mode 100644 index 0000000..80f9a0a --- /dev/null +++ b/ckit/regression/output/b3.c @@ -0,0 +1,23 @@ + +struct t63 { + int n; +}; +struct bar_t62 { + struct t63 m; +}; +struct bar_t62 a; +struct bar_t62 b; +struct bar_t62 c; +int main () +{ + struct t65 { + int n; + }; + struct foo_t64 { + struct t65 m; + }; + typedef struct foo_t64 baz_t66; + struct foo_t64 *p_p486; + struct foo_t64 pp_p487; + +} diff --git a/ckit/regression/output/c100.c.c b/ckit/regression/output/c100.c.c new file mode 100644 index 0000000..9e74181 --- /dev/null +++ b/ckit/regression/output/c100.c.c @@ -0,0 +1,179 @@ + +typedef int __int32_t_t67; +typedef unsigned int __uint32_t_t68; +typedef long long __int64_t_t69; +typedef unsigned long long __uint64_t_t70; +typedef __int32_t_t67 __psint_t_t71; +typedef __uint32_t_t68 __psunsigned_t_t72; +typedef __int32_t_t67 __scint_t_t73; +typedef __uint32_t_t68 __scunsigned_t_t74; +typedef unsigned int size_t_t75; +typedef long fpos_t_t76; +typedef __int64_t_t69 off64_t_t77; +typedef __int64_t_t69 fpos64_t_t78; +typedef char *va_list_t79; +struct __file_s_t80 { + int _cnt; + char *_ptr; + char *_base; + char _flag; + char _file; +}; +typedef struct __file_s_t80 FILE_t81; +extern FILE_t81 __iob[100]; +extern FILE_t81 *_lastbuf; +extern char *_bufendtab[]; +extern char _sibuf[]; +extern char _sobuf[]; +extern int remove (char *); +extern int rename (char *,char *); +extern FILE_t81 * tmpfile (void); +extern char * tmpnam (char *); +extern int fclose (FILE_t81 *); +extern int fflush (FILE_t81 *); +extern FILE_t81 * fopen (char *,char *); +extern FILE_t81 * freopen (char *,char *,FILE_t81 *); +extern void setbuf (FILE_t81 *,char *); +extern int setvbuf (FILE_t81 *,char *,int,size_t_t75); +extern int fprintf (FILE_t81 *,char *,...); +extern int fscanf (FILE_t81 *,char *,...); +extern int printf (char *,...); +extern int scanf (char *,...); +extern int sprintf (char *,char *,...); +extern int sscanf (char *,char *,...); +extern int vfprintf (FILE_t81 *,char *,char *); +extern int vprintf (char *,char *); +extern int vsprintf (char *,char *,char *); +extern int fgetc (FILE_t81 *); +extern char * fgets (char *,int,FILE_t81 *); +extern int fputc (int,FILE_t81 *); +extern int fputs (char *,FILE_t81 *); +extern int getc (FILE_t81 *); +extern int getchar (void); +extern char * gets (char *); +extern int putc (int,FILE_t81 *); +extern int putchar (int); +extern int puts (char *); +extern int ungetc (int,FILE_t81 *); +extern size_t_t75 fread (void *,size_t_t75,size_t_t75,FILE_t81 *); +extern size_t_t75 fwrite (void *,size_t_t75,size_t_t75,FILE_t81 *); +extern int fgetpos (FILE_t81 *,fpos_t_t76 *); +extern int fseek (FILE_t81 *,long,int); +extern int fsetpos (FILE_t81 *,fpos_t_t76 *); +extern long ftell (FILE_t81 *); +extern void rewind (FILE_t81 *); +extern void clearerr (FILE_t81 *); +extern int feof (FILE_t81 *); +extern int ferror (FILE_t81 *); +extern void perror (char *); +extern int __filbuf (FILE_t81 *); +extern int __flsbuf (int,FILE_t81 *); +extern FILE_t81 * fdopen (int,char *); +extern int fileno (FILE_t81 *); +extern void flockfile (FILE_t81 *); +extern int ftrylockfile (FILE_t81 *); +extern void funlockfile (FILE_t81 *); +extern int getc_unlocked (FILE_t81 *); +extern int putc_unlocked (int,FILE_t81 *); +extern int getchar_unlocked (void); +extern int putchar_unlocked (int); +extern FILE_t81 * popen (char *,char *); +extern int pclose (FILE_t81 *); +extern int getopt (int,char **,char *); +extern char *optarg; +extern int opterr; +extern int optind; +extern int optopt; +extern int getsubopt (char **,char **,char **); +extern void getoptreset (void); +extern char * ctermid (char *); +extern char * cuserid (char *); +extern char * tempnam (char *,char *); +extern int getw (FILE_t81 *); +extern int putw (int,FILE_t81 *); +extern char * mktemp (char *); +extern int mkstemp (char *); +extern int setbuffer (FILE_t81 *,char *,int); +extern int setlinebuf (FILE_t81 *); +extern int system (char *); +extern int fgetpos64 (FILE_t81 *,fpos64_t_t78 *); +extern FILE_t81 * fopen64 (char *,char *); +extern FILE_t81 * freopen64 (char *,char *,FILE_t81 *); +extern int fseek64 (FILE_t81 *,off64_t_t77,int); +extern int fseeko64 (FILE_t81 *,off64_t_t77,int); +extern int fseeko (FILE_t81 *,__int64_t_t69,int); +extern int fsetpos64 (FILE_t81 *,fpos64_t_t78 *); +extern off64_t_t77 ftell64 (FILE_t81 *); +extern __int64_t_t69 ftello (FILE_t81 *); +extern off64_t_t77 ftello64 (FILE_t81 *); +extern FILE_t81 * tmpfile64 (void); +extern int __semputc (int,FILE_t81 *); +extern int __semgetc (FILE_t81 *); +extern int __us_rsthread_stdio; +extern char * ctermid_r (char *); +int main () +{ + int c_p600; + int i_p601; + int quesCol_p605; + int pref_p609; + int quesCol_p610; + i_p601 = 0; + goto whileCont_p603; + +whileTop_p602: + ; + i_p601 = (i_p601+1); + pref_p609 = i_p601; + if (pref_p609>100) + goto whileBrk_p604; + if (__us_rsthread_stdio) + quesCol_p610 = __semputc (c_p600,(FILE_t81 *) (&__iob[1])); + else + { + int quesCol_p611; + int pref_p612; + (*(&__iob[1]))._cnt = (((*(&__iob[1]))._cnt)-1); + pref_p612 = ((*(&__iob[1]))._cnt); + if (pref_p612<0) + quesCol_p611 = __flsbuf (c_p600,(FILE_t81 *) (&__iob[1])); + else + { + char *post_p613; + post_p613 = ((*(&__iob[1]))._ptr); + (*(&__iob[1]))._ptr = ((char *) (((int) ((*(&__iob[1]))._ptr))+1)); + *post_p613 = ((char) c_p600); + quesCol_p611 = ((int) (*post_p613)); + } + quesCol_p610 = quesCol_p611; + } + quesCol_p610; + +whileCont_p603: + ; + if (__us_rsthread_stdio) + quesCol_p605 = __semgetc ((FILE_t81 *) (&__iob[0])); + else + { + int quesCol_p606; + int pref_p607; + (*(&__iob[0]))._cnt = (((*(&__iob[0]))._cnt)-1); + pref_p607 = ((*(&__iob[0]))._cnt); + if (pref_p607<0) + quesCol_p606 = __filbuf ((FILE_t81 *) (&__iob[0])); + else + { + char *post_p608; + post_p608 = ((*(&__iob[0]))._ptr); + (*(&__iob[0]))._ptr = ((char *) (((int) ((*(&__iob[0]))._ptr))+1)); + quesCol_p606 = ((int) (*post_p608)); + } + quesCol_p605 = quesCol_p606; + } + c_p600 = quesCol_p605; + if (c_p600!=(-1)) + goto whileTop_p602; + +whileBrk_p604: + ; +} diff --git a/ckit/regression/output/fact1.c b/ckit/regression/output/fact1.c new file mode 100644 index 0000000..63b6d4a --- /dev/null +++ b/ckit/regression/output/fact1.c @@ -0,0 +1,50 @@ + +int main () +{ + int i_p615; + int a_p616; + int post_p622; + int post_p626; + int post_p630; + i_p615 = 1; + a_p616 = 1; + goto forStart_p619; + +forTop_p618: + ; + a_p616 = (a_p616*i_p615); + post_p622 = i_p615; + i_p615 = (i_p615+1); + +forStart_p619: + ; + if (i_p615<=6) + goto forTop_p618; + printf ("fact 6 = %d\n",a_p616); + i_p615 = 1; + a_p616 = 1; + goto whileCont_p624; + +whileTop_p623: + ; + post_p626 = i_p615; + i_p615 = (i_p615+1); + a_p616 = (a_p616*post_p626); + +whileCont_p624: + ; + if (i_p615<=7) + goto whileTop_p623; + printf ("fact 7 = %d\n",a_p616); + i_p615 = 1; + a_p616 = 1; + +doTop_p627: + ; + post_p630 = i_p615; + i_p615 = (i_p615+1); + a_p616 = (a_p616*post_p630); + if (i_p615<=8) + goto doTop_p627; + printf ("fact 8 = %d\n",a_p616); +} diff --git a/ckit/regression/output/i100.c.c b/ckit/regression/output/i100.c.c new file mode 100644 index 0000000..c800e6f --- /dev/null +++ b/ckit/regression/output/i100.c.c @@ -0,0 +1,182 @@ + +typedef int __int32_t_t82; +typedef unsigned int __uint32_t_t83; +typedef long long __int64_t_t84; +typedef unsigned long long __uint64_t_t85; +typedef __int32_t_t82 __psint_t_t86; +typedef __uint32_t_t83 __psunsigned_t_t87; +typedef __int32_t_t82 __scint_t_t88; +typedef __uint32_t_t83 __scunsigned_t_t89; +typedef unsigned int size_t_t90; +typedef long fpos_t_t91; +typedef __int64_t_t84 off64_t_t92; +typedef __int64_t_t84 fpos64_t_t93; +typedef char *va_list_t94; +struct __file_s_t95 { + int _cnt; + char *_ptr; + char *_base; + char _flag; + char _file; +}; +typedef struct __file_s_t95 FILE_t96; +extern FILE_t96 __iob[100]; +extern FILE_t96 *_lastbuf; +extern char *_bufendtab[]; +extern char _sibuf[]; +extern char _sobuf[]; +extern int remove (char *); +extern int rename (char *,char *); +extern FILE_t96 * tmpfile (void); +extern char * tmpnam (char *); +extern int fclose (FILE_t96 *); +extern int fflush (FILE_t96 *); +extern FILE_t96 * fopen (char *,char *); +extern FILE_t96 * freopen (char *,char *,FILE_t96 *); +extern void setbuf (FILE_t96 *,char *); +extern int setvbuf (FILE_t96 *,char *,int,size_t_t90); +extern int fprintf (FILE_t96 *,char *,...); +extern int fscanf (FILE_t96 *,char *,...); +extern int printf (char *,...); +extern int scanf (char *,...); +extern int sprintf (char *,char *,...); +extern int sscanf (char *,char *,...); +extern int vfprintf (FILE_t96 *,char *,char *); +extern int vprintf (char *,char *); +extern int vsprintf (char *,char *,char *); +extern int fgetc (FILE_t96 *); +extern char * fgets (char *,int,FILE_t96 *); +extern int fputc (int,FILE_t96 *); +extern int fputs (char *,FILE_t96 *); +extern int getc (FILE_t96 *); +extern int getchar (void); +extern char * gets (char *); +extern int putc (int,FILE_t96 *); +extern int putchar (int); +extern int puts (char *); +extern int ungetc (int,FILE_t96 *); +extern size_t_t90 fread (void *,size_t_t90,size_t_t90,FILE_t96 *); +extern size_t_t90 fwrite (void *,size_t_t90,size_t_t90,FILE_t96 *); +extern int fgetpos (FILE_t96 *,fpos_t_t91 *); +extern int fseek (FILE_t96 *,long,int); +extern int fsetpos (FILE_t96 *,fpos_t_t91 *); +extern long ftell (FILE_t96 *); +extern void rewind (FILE_t96 *); +extern void clearerr (FILE_t96 *); +extern int feof (FILE_t96 *); +extern int ferror (FILE_t96 *); +extern void perror (char *); +extern int __filbuf (FILE_t96 *); +extern int __flsbuf (int,FILE_t96 *); +extern FILE_t96 * fdopen (int,char *); +extern int fileno (FILE_t96 *); +extern void flockfile (FILE_t96 *); +extern int ftrylockfile (FILE_t96 *); +extern void funlockfile (FILE_t96 *); +extern int getc_unlocked (FILE_t96 *); +extern int putc_unlocked (int,FILE_t96 *); +extern int getchar_unlocked (void); +extern int putchar_unlocked (int); +extern FILE_t96 * popen (char *,char *); +extern int pclose (FILE_t96 *); +extern int getopt (int,char **,char *); +extern char *optarg; +extern int opterr; +extern int optind; +extern int optopt; +extern int getsubopt (char **,char **,char **); +extern void getoptreset (void); +extern char * ctermid (char *); +extern char * cuserid (char *); +extern char * tempnam (char *,char *); +extern int getw (FILE_t96 *); +extern int putw (int,FILE_t96 *); +extern char * mktemp (char *); +extern int mkstemp (char *); +extern int setbuffer (FILE_t96 *,char *,int); +extern int setlinebuf (FILE_t96 *); +extern int system (char *); +extern int fgetpos64 (FILE_t96 *,fpos64_t_t93 *); +extern FILE_t96 * fopen64 (char *,char *); +extern FILE_t96 * freopen64 (char *,char *,FILE_t96 *); +extern int fseek64 (FILE_t96 *,off64_t_t92,int); +extern int fseeko64 (FILE_t96 *,off64_t_t92,int); +extern int fseeko (FILE_t96 *,__int64_t_t84,int); +extern int fsetpos64 (FILE_t96 *,fpos64_t_t93 *); +extern off64_t_t92 ftell64 (FILE_t96 *); +extern __int64_t_t84 ftello (FILE_t96 *); +extern off64_t_t92 ftello64 (FILE_t96 *); +extern FILE_t96 * tmpfile64 (void); +extern int __semputc (int,FILE_t96 *); +extern int __semgetc (FILE_t96 *); +extern int __us_rsthread_stdio; +extern char * ctermid_r (char *); +int main () +{ + int c_p743; + int i_p744; + int quesCol_p748; + int pref_p752; + int quesCol_p753; + i_p744 = 0; + goto whileCont_p746; + +whileTop_p745: + ; + i_p744 = (i_p744+1); + pref_p752 = i_p744; + if (pref_p752>100) + { + + printf ((char *) "\n"); + i_p744 = 0; + } + if (c_p743==10) + i_p744 = 0; + if (__us_rsthread_stdio) + quesCol_p753 = __semputc (c_p743,(FILE_t96 *) (&__iob[1])); + else + { + int quesCol_p754; + int pref_p755; + (*(&__iob[1]))._cnt = (((*(&__iob[1]))._cnt)-1); + pref_p755 = ((*(&__iob[1]))._cnt); + if (pref_p755<0) + quesCol_p754 = __flsbuf (c_p743,(FILE_t96 *) (&__iob[1])); + else + { + char *post_p756; + post_p756 = ((*(&__iob[1]))._ptr); + (*(&__iob[1]))._ptr = ((char *) (((int) ((*(&__iob[1]))._ptr))+1)); + *post_p756 = ((char) c_p743); + quesCol_p754 = ((int) (*post_p756)); + } + quesCol_p753 = quesCol_p754; + } + quesCol_p753; + +whileCont_p746: + ; + if (__us_rsthread_stdio) + quesCol_p748 = __semgetc ((FILE_t96 *) (&__iob[0])); + else + { + int quesCol_p749; + int pref_p750; + (*(&__iob[0]))._cnt = (((*(&__iob[0]))._cnt)-1); + pref_p750 = ((*(&__iob[0]))._cnt); + if (pref_p750<0) + quesCol_p749 = __filbuf ((FILE_t96 *) (&__iob[0])); + else + { + char *post_p751; + post_p751 = ((*(&__iob[0]))._ptr); + (*(&__iob[0]))._ptr = ((char *) (((int) ((*(&__iob[0]))._ptr))+1)); + quesCol_p749 = ((int) (*post_p751)); + } + quesCol_p748 = quesCol_p749; + } + c_p743 = quesCol_p748; + if (c_p743!=(-1)) + goto whileTop_p745; +} diff --git a/ckit/regression/output/kf.c b/ckit/regression/output/kf.c new file mode 100644 index 0000000..db654f3 --- /dev/null +++ b/ckit/regression/output/kf.c @@ -0,0 +1,12 @@ + +typedef int date_t_t97; +typedef int Htime_t_t98; +struct t99 { + short npa; +}; +typedef struct t99 areacode_t_t100; +int main () +{ + int c_p762; + c_p762 = ((int) 4); +} diff --git a/ckit/regression/output/primes.c.c b/ckit/regression/output/primes.c.c new file mode 100644 index 0000000..71658f9 --- /dev/null +++ b/ckit/regression/output/primes.c.c @@ -0,0 +1,153 @@ + +typedef int __int32_t_t101; +typedef unsigned int __uint32_t_t102; +typedef long long __int64_t_t103; +typedef unsigned long long __uint64_t_t104; +typedef __int32_t_t101 __psint_t_t105; +typedef __uint32_t_t102 __psunsigned_t_t106; +typedef __int32_t_t101 __scint_t_t107; +typedef __uint32_t_t102 __scunsigned_t_t108; +typedef unsigned int size_t_t109; +typedef long fpos_t_t110; +typedef __int64_t_t103 off64_t_t111; +typedef __int64_t_t103 fpos64_t_t112; +typedef char *va_list_t113; +struct __file_s_t114 { + int _cnt; + char *_ptr; + char *_base; + char _flag; + char _file; +}; +typedef struct __file_s_t114 FILE_t115; +extern FILE_t115 __iob[100]; +extern FILE_t115 *_lastbuf; +extern char *_bufendtab[]; +extern char _sibuf[]; +extern char _sobuf[]; +extern int remove (char *); +extern int rename (char *,char *); +extern FILE_t115 * tmpfile (void); +extern char * tmpnam (char *); +extern int fclose (FILE_t115 *); +extern int fflush (FILE_t115 *); +extern FILE_t115 * fopen (char *,char *); +extern FILE_t115 * freopen (char *,char *,FILE_t115 *); +extern void setbuf (FILE_t115 *,char *); +extern int setvbuf (FILE_t115 *,char *,int,size_t_t109); +extern int fprintf (FILE_t115 *,char *,...); +extern int fscanf (FILE_t115 *,char *,...); +extern int printf (char *,...); +extern int scanf (char *,...); +extern int sprintf (char *,char *,...); +extern int sscanf (char *,char *,...); +extern int vfprintf (FILE_t115 *,char *,char *); +extern int vprintf (char *,char *); +extern int vsprintf (char *,char *,char *); +extern int fgetc (FILE_t115 *); +extern char * fgets (char *,int,FILE_t115 *); +extern int fputc (int,FILE_t115 *); +extern int fputs (char *,FILE_t115 *); +extern int getc (FILE_t115 *); +extern int getchar (void); +extern char * gets (char *); +extern int putc (int,FILE_t115 *); +extern int putchar (int); +extern int puts (char *); +extern int ungetc (int,FILE_t115 *); +extern size_t_t109 fread (void *,size_t_t109,size_t_t109,FILE_t115 *); +extern size_t_t109 fwrite (void *,size_t_t109,size_t_t109,FILE_t115 *); +extern int fgetpos (FILE_t115 *,fpos_t_t110 *); +extern int fseek (FILE_t115 *,long,int); +extern int fsetpos (FILE_t115 *,fpos_t_t110 *); +extern long ftell (FILE_t115 *); +extern void rewind (FILE_t115 *); +extern void clearerr (FILE_t115 *); +extern int feof (FILE_t115 *); +extern int ferror (FILE_t115 *); +extern void perror (char *); +extern int __filbuf (FILE_t115 *); +extern int __flsbuf (int,FILE_t115 *); +extern FILE_t115 * fdopen (int,char *); +extern int fileno (FILE_t115 *); +extern void flockfile (FILE_t115 *); +extern int ftrylockfile (FILE_t115 *); +extern void funlockfile (FILE_t115 *); +extern int getc_unlocked (FILE_t115 *); +extern int putc_unlocked (int,FILE_t115 *); +extern int getchar_unlocked (void); +extern int putchar_unlocked (int); +extern FILE_t115 * popen (char *,char *); +extern int pclose (FILE_t115 *); +extern int getopt (int,char **,char *); +extern char *optarg; +extern int opterr; +extern int optind; +extern int optopt; +extern int getsubopt (char **,char **,char **); +extern void getoptreset (void); +extern char * ctermid (char *); +extern char * cuserid (char *); +extern char * tempnam (char *,char *); +extern int getw (FILE_t115 *); +extern int putw (int,FILE_t115 *); +extern char * mktemp (char *); +extern int mkstemp (char *); +extern int setbuffer (FILE_t115 *,char *,int); +extern int setlinebuf (FILE_t115 *); +extern int system (char *); +extern int fgetpos64 (FILE_t115 *,fpos64_t_t112 *); +extern FILE_t115 * fopen64 (char *,char *); +extern FILE_t115 * freopen64 (char *,char *,FILE_t115 *); +extern int fseek64 (FILE_t115 *,off64_t_t111,int); +extern int fseeko64 (FILE_t115 *,off64_t_t111,int); +extern int fseeko (FILE_t115 *,__int64_t_t103,int); +extern int fsetpos64 (FILE_t115 *,fpos64_t_t112 *); +extern off64_t_t111 ftell64 (FILE_t115 *); +extern __int64_t_t103 ftello (FILE_t115 *); +extern off64_t_t111 ftello64 (FILE_t115 *); +extern FILE_t115 * tmpfile64 (void); +extern int __semputc (int,FILE_t115 *); +extern int __semgetc (FILE_t115 *); +extern int __us_rsthread_stdio; +extern char * ctermid_r (char *); +int is_prime (int); +int main () +{ + int i_p876; + int post_p884; + i_p876 = 2; + goto forStart_p881; + +forTop_p880: + ; + if (is_prime (i_p876)) + printf ((char *) "%d\n",i_p876); + post_p884 = i_p876; + i_p876 = (i_p876+1); + +forStart_p881: + ; + if (1) + goto forTop_p880; +} +int is_prime (int i_p878) +{ + int j_p879; + int post_p889; + j_p879 = 2; + goto forStart_p886; + +forTop_p885: + ; + if ((i_p878%j_p879)==0) + return 0; + post_p889 = j_p879; + j_p879 = (j_p879+1); + +forStart_p886: + ; + if ((j_p879*j_p879)<=i_p878) + goto forTop_p885; + return 1; +} diff --git a/ckit/regression/output/ps2ascii.c.c b/ckit/regression/output/ps2ascii.c.c new file mode 100644 index 0000000..6268904 --- /dev/null +++ b/ckit/regression/output/ps2ascii.c.c @@ -0,0 +1,1493 @@ + +typedef int __int32_t_t116; +typedef unsigned int __uint32_t_t117; +typedef long long __int64_t_t118; +typedef unsigned long long __uint64_t_t119; +typedef __int32_t_t116 __psint_t_t120; +typedef __uint32_t_t117 __psunsigned_t_t121; +typedef __int32_t_t116 __scint_t_t122; +typedef __uint32_t_t117 __scunsigned_t_t123; +typedef unsigned int size_t_t124; +typedef long fpos_t_t125; +typedef __int64_t_t118 off64_t_t126; +typedef __int64_t_t118 fpos64_t_t127; +typedef char *va_list_t128; +struct __file_s_t129 { + int _cnt; + char *_ptr; + char *_base; + char _flag; + char _file; +}; +typedef struct __file_s_t129 FILE_t130; +extern FILE_t130 __iob[100]; +extern FILE_t130 *_lastbuf; +extern char *_bufendtab[]; +extern char _sibuf[]; +extern char _sobuf[]; +extern int remove (char *); +extern int rename (char *,char *); +extern FILE_t130 * tmpfile (void); +extern char * tmpnam (char *); +extern int fclose (FILE_t130 *); +extern int fflush (FILE_t130 *); +extern FILE_t130 * fopen (char *,char *); +extern FILE_t130 * freopen (char *,char *,FILE_t130 *); +extern void setbuf (FILE_t130 *,char *); +extern int setvbuf (FILE_t130 *,char *,int,size_t_t124); +extern int fprintf (FILE_t130 *,char *,...); +extern int fscanf (FILE_t130 *,char *,...); +extern int printf (char *,...); +extern int scanf (char *,...); +extern int sprintf (char *,char *,...); +extern int sscanf (char *,char *,...); +extern int vfprintf (FILE_t130 *,char *,char *); +extern int vprintf (char *,char *); +extern int vsprintf (char *,char *,char *); +extern int fgetc (FILE_t130 *); +extern char * fgets (char *,int,FILE_t130 *); +extern int fputc (int,FILE_t130 *); +extern int fputs (char *,FILE_t130 *); +extern int getc (FILE_t130 *); +extern int getchar (void); +extern char * gets (char *); +extern int putc (int,FILE_t130 *); +extern int putchar (int); +extern int puts (char *); +extern int ungetc (int,FILE_t130 *); +extern size_t_t124 fread (void *,size_t_t124,size_t_t124,FILE_t130 *); +extern size_t_t124 fwrite (void *,size_t_t124,size_t_t124,FILE_t130 *); +extern int fgetpos (FILE_t130 *,fpos_t_t125 *); +extern int fseek (FILE_t130 *,long,int); +extern int fsetpos (FILE_t130 *,fpos_t_t125 *); +extern long ftell (FILE_t130 *); +extern void rewind (FILE_t130 *); +extern void clearerr (FILE_t130 *); +extern int feof (FILE_t130 *); +extern int ferror (FILE_t130 *); +extern void perror (char *); +extern int __filbuf (FILE_t130 *); +extern int __flsbuf (int,FILE_t130 *); +extern FILE_t130 * fdopen (int,char *); +extern int fileno (FILE_t130 *); +extern void flockfile (FILE_t130 *); +extern int ftrylockfile (FILE_t130 *); +extern void funlockfile (FILE_t130 *); +extern int getc_unlocked (FILE_t130 *); +extern int putc_unlocked (int,FILE_t130 *); +extern int getchar_unlocked (void); +extern int putchar_unlocked (int); +extern FILE_t130 * popen (char *,char *); +extern int pclose (FILE_t130 *); +extern int getopt (int,char **,char *); +extern char *optarg; +extern int opterr; +extern int optind; +extern int optopt; +extern int getsubopt (char **,char **,char **); +extern void getoptreset (void); +extern char * ctermid (char *); +extern char * cuserid (char *); +extern char * tempnam (char *,char *); +extern int getw (FILE_t130 *); +extern int putw (int,FILE_t130 *); +extern char * mktemp (char *); +extern int mkstemp (char *); +extern int setbuffer (FILE_t130 *,char *,int); +extern int setlinebuf (FILE_t130 *); +extern int system (char *); +extern int fgetpos64 (FILE_t130 *,fpos64_t_t127 *); +extern FILE_t130 * fopen64 (char *,char *); +extern FILE_t130 * freopen64 (char *,char *,FILE_t130 *); +extern int fseek64 (FILE_t130 *,off64_t_t126,int); +extern int fseeko64 (FILE_t130 *,off64_t_t126,int); +extern int fseeko (FILE_t130 *,__int64_t_t118,int); +extern int fsetpos64 (FILE_t130 *,fpos64_t_t127 *); +extern off64_t_t126 ftell64 (FILE_t130 *); +extern __int64_t_t118 ftello (FILE_t130 *); +extern off64_t_t126 ftello64 (FILE_t130 *); +extern FILE_t130 * tmpfile64 (void); +extern int __semputc (int,FILE_t130 *); +extern int __semgetc (FILE_t130 *); +extern int __us_rsthread_stdio; +extern char * ctermid_r (char *); +typedef char uchar_t_t131; +typedef unsigned short ushort_t_t132; +typedef unsigned int uint_t_t133; +typedef unsigned long ulong_t_t134; +typedef char *addr_t_t135; +typedef char *caddr_t_t136; +typedef long daddr_t_t137; +typedef long pgno_t_t138; +typedef __uint32_t_t117 pfn_t_t139; +typedef short cnt_t_t140; +typedef unsigned long basictime_t_t141; +typedef __int64_t_t118 micro_t_t142; +typedef __int32_t_t116 pgcnt_t_t143; +enum t144 { + B_FALSE=0, + B_TRUE=0 +}; +typedef enum t144 boolean_t_t145; +typedef long id_t_t146; +typedef ulong_t_t134 major_t_t147; +typedef ulong_t_t134 minor_t_t148; +typedef ushort_t_t132 o_mode_t_t149; +typedef short o_dev_t_t150; +typedef ushort_t_t132 o_uid_t_t151; +typedef o_uid_t_t151 o_gid_t_t152; +typedef short o_nlink_t_t153; +typedef short o_pid_t_t154; +typedef __uint32_t_t117 o_ino_t_t155; +typedef unsigned long mode_t_t156; +typedef unsigned long dev_t_t157; +typedef long uid_t_t158; +typedef long gid_t_t159; +typedef unsigned long nlink_t_t160; +typedef long pid_t_t161; +typedef dev_t_t157 vertex_hdl_t_t162; +typedef unsigned long ino_t_t163; +typedef __uint64_t_t119 ino64_t_t164; +typedef long off_t_t165; +typedef __scint_t_t122 __scoff_t_t166; +typedef __scoff_t_t166 scoff_t_t167; +typedef __int64_t_t118 blkcnt64_t_t168; +typedef __uint64_t_t119 fsblkcnt64_t_t169; +typedef __uint64_t_t119 fsfilcnt64_t_t170; +typedef long blkcnt_t_t171; +typedef ulong_t_t134 fsblkcnt_t_t172; +typedef ulong_t_t134 fsfilcnt_t_t173; +typedef long swblk_t_t174; +typedef unsigned long paddr_t_t175; +typedef unsigned long iopaddr_t_t176; +typedef int key_t_t177; +typedef char use_t_t178; +typedef long sysid_t_t179; +typedef short index_t_t180; +typedef short nasid_t_t181; +typedef short cnodeid_t_t182; +typedef signed char partid_t_t183; +typedef short moduleid_t_t184; +typedef unsigned int lock_t_t185; +typedef short cpuid_t_t186; +typedef char pri_t_t187; +typedef __uint64_t_t119 accum_t_t188; +typedef __int64_t_t118 prid_t_t189; +typedef __int64_t_t118 ash_t_t190; +typedef int cell_t_t191; +typedef int ssize_t_t192; +typedef long time_t_t193; +typedef long clock_t_t194; +typedef long wchar_t_t195; +typedef int clockid_t_t196; +typedef int timer_t_t197; +typedef unsigned int useconds_t_t198; +typedef __scunsigned_t_t123 bitnum_t_t199; +typedef __scunsigned_t_t123 bitlen_t_t200; +typedef int processorid_t_t201; +typedef int toid_t_t202; +typedef long *qaddr_t_t203; +typedef __uint32_t_t117 inst_t_t204; +typedef unsigned int machreg_t_t205; +typedef __uint32_t_t117 fpreg_t_t206; +typedef signed char int8_t_t207; +typedef char uint8_t_t208; +typedef short int16_t_t209; +typedef unsigned short uint16_t_t210; +typedef int int32_t_t211; +typedef unsigned int uint32_t_t212; +typedef __int64_t_t118 int64_t_t213; +typedef __uint64_t_t119 uint64_t_t214; +typedef __int64_t_t118 intmax_t_t215; +typedef __uint64_t_t119 uintmax_t_t216; +typedef long intptr_t_t217; +typedef unsigned long uintptr_t_t218; +typedef char u_int8_t_t219; +typedef unsigned short u_int16_t_t220; +typedef __uint32_t_t117 u_int32_t_t221; +typedef long hostid_t_t222; +struct t223 { + int r[1]; +}; +typedef struct t223 *physadr_t224; +typedef char unchar_t225; +typedef char u_char_t226; +typedef unsigned short ushort_t227; +typedef unsigned short u_short_t228; +typedef unsigned int uint_t229; +typedef unsigned int u_int_t230; +typedef unsigned long ulong_t231; +typedef unsigned long u_long_t232; +struct _quad_t233 { + long val[2]; +}; +typedef struct _quad_t233 quad_t234; +typedef long fd_mask_t_t235; +typedef unsigned long ufd_mask_t_t236; +struct fd_set_t237 { + fd_mask_t_t235 fds_bits[32]; +}; +typedef struct fd_set_t237 fd_set_t238; +extern void * memcpy (void *,void *,size_t_t124); +extern void * memmove (void *,void *,size_t_t124); +extern char * strcpy (char *,char *); +extern char * strncpy (char *,char *,size_t_t124); +extern char * strcat (char *,char *); +extern char * strncat (char *,char *,size_t_t124); +extern void * memccpy (void *,void *,int,size_t_t124); +extern int memcmp (void *,void *,size_t_t124); +extern int strcmp (char *,char *); +extern int strcoll (char *,char *); +extern int strncmp (char *,char *,size_t_t124); +extern size_t_t124 strxfrm (char *,char *,size_t_t124); +extern void * memchr (void *,int,size_t_t124); +extern char * strchr (char *,int); +extern size_t_t124 strcspn (char *,char *); +extern char * strpbrk (char *,char *); +extern char * strrchr (char *,int); +extern size_t_t124 strspn (char *,char *); +extern char * strstr (char *,char *); +extern char * strtok (char *,char *); +extern void * memset (void *,int,size_t_t124); +extern char * strerror (int); +extern size_t_t124 strlen (char *); +extern int ffs (int); +extern int strcasecmp (char *,char *); +extern int strncasecmp (char *,char *,size_t_t124); +extern char * strdup (char *); +extern char * strtok_r (char *,char *,char **); +typedef long fd_mask_t239; +struct t240 { + __uint32_t_t117 sigbits[2]; +}; +typedef struct t240 k_sigset_t_t241; +extern int bcmp (void *,void *,size_t_t124); +extern void bcopy (void *,void *,size_t_t124); +extern void bzero (void *,size_t_t124); +extern char * index (char *,int); +extern char * rindex (char *,int); +extern int isalnum (int); +extern int isalpha (int); +extern int iscntrl (int); +extern int isdigit (int); +extern int isgraph (int); +extern int islower (int); +extern int isprint (int); +extern int ispunct (int); +extern int isspace (int); +extern int isupper (int); +extern int isxdigit (int); +extern int tolower (int); +extern int toupper (int); +extern int isascii (int); +extern int toascii (int); +extern int _tolower (int); +extern int _toupper (int); +extern char __ctype[]; +struct t242 { + int quot; + int rem; +}; +typedef struct t242 div_t_t243; +struct t244 { + long quot; + long rem; +}; +typedef struct t244 ldiv_t_t245; +struct t246 { + long long quot; + long long rem; +}; +typedef struct t246 lldiv_t_t247; +extern char __ctype[]; +extern double atof (char *); +extern int atoi (char *); +extern long atol (char *); +extern double strtod (char *,char **); +extern long strtol (char *,char **,int); +extern unsigned long strtoul (char *,char **,int); +extern int rand (void); +extern void srand (unsigned int); +extern void * calloc (size_t_t124,size_t_t124); +extern void free (void *); +extern void * malloc (size_t_t124); +extern void * realloc (void *,size_t_t124); +extern void abort (void); +extern int atexit (void (*) (void)); +extern void exit (int); +extern char * getenv (char *); +extern int system (char *); +extern void * bsearch (void *,void *,size_t_t124,size_t_t124,int (*) (void *,void *)); +extern void qsort (void *,size_t_t124,size_t_t124,int (*) (void *,void *)); +extern int abs (int); +extern div_t_t243 div (int,int); +extern long labs (long); +extern ldiv_t_t245 ldiv (long,long); +extern int mbtowc (wchar_t_t195 *,char *,size_t_t124); +extern int mblen (char *,size_t_t124); +extern int wctomb (char *,wchar_t_t195); +extern size_t_t124 mbstowcs (wchar_t_t195 *,char *,size_t_t124); +extern size_t_t124 wcstombs (char *,wchar_t_t195 *,size_t_t124); +extern int putenv (char *); +extern double drand48 (void); +extern double erand48 (unsigned short *); +extern long lrand48 (void); +extern long nrand48 (unsigned short *); +extern long mrand48 (void); +extern long jrand48 (unsigned short *); +extern void srand48 (long); +extern void lcong48 (unsigned short *); +extern void setkey (char *); +extern unsigned short * seed48 (unsigned short *); +extern long a64l (char *); +extern char * ecvt (double,int,int *,int *); +extern char * fcvt (double,int,int *,int *); +extern char * gcvt (double,int,char *); +extern int getsubopt (char **,char **,char **); +extern int grantpt (int); +extern char * initstate (unsigned int,char *,size_t_t124); +extern char * l64a (long); +extern char * mktemp (char *); +extern int mkstemp (char *); +extern char * ptsname (int); +extern long random (void); +extern char * realpath (char *,char *); +extern char * setstate (char *); +extern void srandom (unsigned int); +extern int ttyslot (void); +extern int unlockpt (int); +extern void * valloc (size_t_t124); +extern int rand_r (unsigned int *); +extern int atcheckpoint (void (*) (void)); +extern int atrestart (void (*) (void)); +extern int getpw (int,char *); +extern void l3tol (long *,char *,int); +extern void ltol3 (char *,long *,int); +extern void * memalign (size_t_t124,size_t_t124); +extern int dup2 (int,int); +extern char * getcwd (char *,size_t_t124); +extern char * getlogin (void); +extern char * getpass (char *); +extern int isatty (int); +extern void swab (void *,void *,ssize_t_t192); +extern char * ttyname (int); +extern long long atoll (char *); +extern long long strtoll (char *,char **,int); +extern unsigned long long strtoull (char *,char **,int); +extern long long llabs (long long); +extern lldiv_t_t247 lldiv (long long,long long); +extern char * ecvt_r (double,int,int *,int *,char *); +extern char * fcvt_r (double,int,int *,int *,char *); +void dviparse (FILE_t130 *); +void psparse (); +void main (int argc_p1257,char *argv_p1258[]) +{ + int i_p1259; + int known_flag_p1260; + int dvi_file_p1261; + FILE_t130 *file_p1262; + FILE_t130 *source_p1263; + int post_p1286; + int call_p1287; + int call_p1288; + FILE_t130 *call_p1289; + dvi_file_p1261 = 0; + source_p1263 = ((FILE_t130 *) (&__iob[0])); + i_p1259 = 1; + goto forStart_p1283; + +forTop_p1282: + ; + known_flag_p1260 = 0; + call_p1287 = strcmp ((char *) argv_p1258[i_p1259],(char *) "-dvi"); + if (call_p1287==0) + { + + dvi_file_p1261 = 1; + known_flag_p1260 = 1; + } + call_p1288 = strcmp ((char *) argv_p1258[i_p1259],(char *) "-"); + if (call_p1288==0) + { + + source_p1263 = ((FILE_t130 *) (&__iob[0])); + known_flag_p1260 = 1; + } + if (!known_flag_p1260) + { + + call_p1289 = fopen ((char *) argv_p1258[i_p1259],(char *) "r"); + file_p1262 = call_p1289; + if (file_p1262!=0) + source_p1263 = file_p1262; + else + { + + fprintf ((FILE_t130 *) (&__iob[2]),(char *) "ps2txt: error opening file %s\n",argv_p1258[i_p1259]); + fprintf ((FILE_t130 *) (&__iob[2]),(char *) "usage: ps2txt [-dvi] [-] [input_file.ps]\n"); + exit (1); + } + } + post_p1286 = i_p1259; + i_p1259 = (i_p1259+1); + +forStart_p1283: + ; + if (i_p12590) + { + + if (__us_rsthread_stdio) + quesCol_p1377 = __semputc (ch_p1279,(FILE_t130 *) (&__iob[1])); + else + { + int quesCol_p1378; + int pref_p1379; + (*(&__iob[1]))._cnt = (((*(&__iob[1]))._cnt)-1); + pref_p1379 = ((*(&__iob[1]))._cnt); + if (pref_p1379<0) + quesCol_p1378 = __flsbuf (ch_p1279,(FILE_t130 *) (&__iob[1])); + else + { + char *post_p1380; + post_p1380 = ((*(&__iob[1]))._ptr); + (*(&__iob[1]))._ptr = ((char *) (((int) ((*(&__iob[1]))._ptr))+1)); + *post_p1380 = ((char) ch_p1279); + quesCol_p1378 = ((int) (*post_p1380)); + } + quesCol_p1377 = quesCol_p1378; + } + quesCol_p1377; + } + } + goto switchBrk_p1371; + + case 41: + { + + post_p1381 = para_p1280; + para_p1280 = (para_p1280-1); + if (post_p1381>1) + { + + if (__us_rsthread_stdio) + quesCol_p1382 = __semputc (ch_p1279,(FILE_t130 *) (&__iob[1])); + else + { + int quesCol_p1383; + int pref_p1384; + (*(&__iob[1]))._cnt = (((*(&__iob[1]))._cnt)-1); + pref_p1384 = ((*(&__iob[1]))._cnt); + if (pref_p1384<0) + quesCol_p1383 = __flsbuf (ch_p1279,(FILE_t130 *) (&__iob[1])); + else + { + char *post_p1385; + post_p1385 = ((*(&__iob[1]))._ptr); + (*(&__iob[1]))._ptr = ((char *) (((int) ((*(&__iob[1]))._ptr))+1)); + *post_p1385 = ((char) ch_p1279); + quesCol_p1383 = ((int) (*post_p1385)); + } + quesCol_p1382 = quesCol_p1383; + } + quesCol_p1382; + } + else + { + + if (__us_rsthread_stdio) + quesCol_p1386 = __semputc (32,(FILE_t130 *) (&__iob[1])); + else + { + int quesCol_p1387; + int pref_p1388; + (*(&__iob[1]))._cnt = (((*(&__iob[1]))._cnt)-1); + pref_p1388 = ((*(&__iob[1]))._cnt); + if (pref_p1388<0) + quesCol_p1387 = __flsbuf (32,(FILE_t130 *) (&__iob[1])); + else + { + char *post_p1389; + post_p1389 = ((*(&__iob[1]))._ptr); + (*(&__iob[1]))._ptr = ((char *) (((int) ((*(&__iob[1]))._ptr))+1)); + *post_p1389 = ((char) 32); + quesCol_p1387 = ((int) (*post_p1389)); + } + quesCol_p1386 = quesCol_p1387; + } + quesCol_p1386; + } + } + last_p1281 = 1; + goto switchBrk_p1371; + + case 92: + if (para_p1280>0) + { + + call_p1390 = fgetc (source_p1276); + switch (ch_p1279 = call_p1390) + { + + + case 40: + + case 41: + { + + if (__us_rsthread_stdio) + quesCol_p1392 = __semputc (ch_p1279,(FILE_t130 *) (&__iob[1])); + else + { + int quesCol_p1393; + int pref_p1394; + (*(&__iob[1]))._cnt = (((*(&__iob[1]))._cnt)-1); + pref_p1394 = ((*(&__iob[1]))._cnt); + if (pref_p1394<0) + quesCol_p1393 = __flsbuf (ch_p1279,(FILE_t130 *) (&__iob[1])); + else + { + char *post_p1395; + post_p1395 = ((*(&__iob[1]))._ptr); + (*(&__iob[1]))._ptr = ((char *) (((int) ((*(&__iob[1]))._ptr))+1)); + *post_p1395 = ((char) ch_p1279); + quesCol_p1393 = ((int) (*post_p1395)); + } + quesCol_p1392 = quesCol_p1393; + } + quesCol_p1392; + } + goto switchBrk_p1391; + + case 116: + { + + if (__us_rsthread_stdio) + quesCol_p1396 = __semputc (9,(FILE_t130 *) (&__iob[1])); + else + { + int quesCol_p1397; + int pref_p1398; + (*(&__iob[1]))._cnt = (((*(&__iob[1]))._cnt)-1); + pref_p1398 = ((*(&__iob[1]))._cnt); + if (pref_p1398<0) + quesCol_p1397 = __flsbuf (9,(FILE_t130 *) (&__iob[1])); + else + { + char *post_p1399; + post_p1399 = ((*(&__iob[1]))._ptr); + (*(&__iob[1]))._ptr = ((char *) (((int) ((*(&__iob[1]))._ptr))+1)); + *post_p1399 = ((char) 9); + quesCol_p1397 = ((int) (*post_p1399)); + } + quesCol_p1396 = quesCol_p1397; + } + quesCol_p1396; + } + goto switchBrk_p1391; + + case 110: + { + + if (__us_rsthread_stdio) + quesCol_p1400 = __semputc (10,(FILE_t130 *) (&__iob[1])); + else + { + int quesCol_p1401; + int pref_p1402; + (*(&__iob[1]))._cnt = (((*(&__iob[1]))._cnt)-1); + pref_p1402 = ((*(&__iob[1]))._cnt); + if (pref_p1402<0) + quesCol_p1401 = __flsbuf (10,(FILE_t130 *) (&__iob[1])); + else + { + char *post_p1403; + post_p1403 = ((*(&__iob[1]))._ptr); + (*(&__iob[1]))._ptr = ((char *) (((int) ((*(&__iob[1]))._ptr))+1)); + *post_p1403 = ((char) 10); + quesCol_p1401 = ((int) (*post_p1403)); + } + quesCol_p1400 = quesCol_p1401; + } + quesCol_p1400; + } + goto switchBrk_p1391; + + case 92: + { + + if (__us_rsthread_stdio) + quesCol_p1404 = __semputc (92,(FILE_t130 *) (&__iob[1])); + else + { + int quesCol_p1405; + int pref_p1406; + (*(&__iob[1]))._cnt = (((*(&__iob[1]))._cnt)-1); + pref_p1406 = ((*(&__iob[1]))._cnt); + if (pref_p1406<0) + quesCol_p1405 = __flsbuf (92,(FILE_t130 *) (&__iob[1])); + else + { + char *post_p1407; + post_p1407 = ((*(&__iob[1]))._ptr); + (*(&__iob[1]))._ptr = ((char *) (((int) ((*(&__iob[1]))._ptr))+1)); + *post_p1407 = ((char) 92); + quesCol_p1405 = ((int) (*post_p1407)); + } + quesCol_p1404 = quesCol_p1405; + } + quesCol_p1404; + } + goto switchBrk_p1391; + + case 48: + + case 49: + + case 50: + + case 51: + + case 52: + + case 53: + + case 54: + + case 55: + { + + if (__us_rsthread_stdio) + quesCol_p1408 = __semputc (92,(FILE_t130 *) (&__iob[1])); + else + { + int quesCol_p1409; + int pref_p1410; + (*(&__iob[1]))._cnt = (((*(&__iob[1]))._cnt)-1); + pref_p1410 = ((*(&__iob[1]))._cnt); + if (pref_p1410<0) + quesCol_p1409 = __flsbuf (92,(FILE_t130 *) (&__iob[1])); + else + { + char *post_p1411; + post_p1411 = ((*(&__iob[1]))._ptr); + (*(&__iob[1]))._ptr = ((char *) (((int) ((*(&__iob[1]))._ptr))+1)); + *post_p1411 = ((char) 92); + quesCol_p1409 = ((int) (*post_p1411)); + } + quesCol_p1408 = quesCol_p1409; + } + quesCol_p1408; + } + + default: + { + + if (__us_rsthread_stdio) + quesCol_p1412 = __semputc (ch_p1279,(FILE_t130 *) (&__iob[1])); + else + { + int quesCol_p1413; + int pref_p1414; + (*(&__iob[1]))._cnt = (((*(&__iob[1]))._cnt)-1); + pref_p1414 = ((*(&__iob[1]))._cnt); + if (pref_p1414<0) + quesCol_p1413 = __flsbuf (ch_p1279,(FILE_t130 *) (&__iob[1])); + else + { + char *post_p1415; + post_p1415 = ((*(&__iob[1]))._ptr); + (*(&__iob[1]))._ptr = ((char *) (((int) ((*(&__iob[1]))._ptr))+1)); + *post_p1415 = ((char) ch_p1279); + quesCol_p1413 = ((int) (*post_p1415)); + } + quesCol_p1412 = quesCol_p1413; + } + quesCol_p1412; + } + goto switchBrk_p1391; + } + + switchBrk_p1391: + ; + } + goto switchBrk_p1371; + + default: + if (para_p1280>0) + { + + if (__us_rsthread_stdio) + quesCol_p1416 = __semputc (ch_p1279,(FILE_t130 *) (&__iob[1])); + else + { + int quesCol_p1417; + int pref_p1418; + (*(&__iob[1]))._cnt = (((*(&__iob[1]))._cnt)-1); + pref_p1418 = ((*(&__iob[1]))._cnt); + if (pref_p1418<0) + quesCol_p1417 = __flsbuf (ch_p1279,(FILE_t130 *) (&__iob[1])); + else + { + char *post_p1419; + post_p1419 = ((*(&__iob[1]))._ptr); + (*(&__iob[1]))._ptr = ((char *) (((int) ((*(&__iob[1]))._ptr))+1)); + *post_p1419 = ((char) ch_p1279); + quesCol_p1417 = ((int) (*post_p1419)); + } + quesCol_p1416 = quesCol_p1417; + } + quesCol_p1416; + } + } + +switchBrk_p1371: + ; + +whileCont_p1368: + ; + call_p1370 = fgetc (source_p1276); + ch_p1279 = call_p1370; + if (ch_p1279!=(-1)) + goto whileTop_p1367; +} diff --git a/ckit/regression/output/summary b/ckit/regression/output/summary new file mode 100644 index 0000000..db058c7 --- /dev/null +++ b/ckit/regression/output/summary @@ -0,0 +1,104 @@ +a1.c [parsed] [succeeded] [cc succeeded] +a10.c [parsed] [succeeded] [cc succeeded] +a100.c [parsed] [succeeded] [cc succeeded] +a101.c [parsed] [succeeded] [cc succeeded] +a102.c [parsed] [succeeded] [cc succeeded] +a103.c [parsed] [succeeded] [cc succeeded] +a104.c [parsed] [succeeded] [cc succeeded] +a105.c [parsed] [succeeded] [cc succeeded] +a106.c [parsed] [succeeded] [cc succeeded] +a107.c [parsed] [succeeded] [cc succeeded] +a108.c [warnings] [succeeded] [cc succeeded] +a109.c [parsed] [succeeded] [cc succeeded] +a11.c [parsed] [succeeded] [cc succeeded] +a110.c [parsed] [succeeded] [cc succeeded] +a111.c [parsed] [succeeded] [cc succeeded] +a112.c [parsed] [succeeded] [cc succeeded] +a113.c [parsed] [succeeded] [cc succeeded] +a114.c [parsed] [succeeded] [cc succeeded] +a115.c [parsed] [succeeded] [cc succeeded] +a116.c [parsed] [succeeded] [cc succeeded] +a117.c [parsed] [succeeded] [cc succeeded] +a118.c [parsed] [succeeded] [cc succeeded] +a12.c [parsed] [succeeded] [cc succeeded] +a13.c [parsed] [succeeded] [cc succeeded] +a14.c [parsed] [succeeded] [cc succeeded] +a15.c [parsed] [succeeded] [cc succeeded] +a16.c [parsed] [succeeded] [cc succeeded] +a17.c [parsed] [succeeded] [cc succeeded] +a18.c [parsed] [succeeded] [cc succeeded] +a19.c [parsed] [succeeded] [cc succeeded] +a2.c [parsed] [succeeded] [cc succeeded] +a20.c [parsed] [succeeded] [cc succeeded] +a200.c [parsed] [succeeded] [cc succeeded] +a201.c [parsed] [succeeded] [cc succeeded] +a202.c [warnings] [succeeded] [cc succeeded] +a203.c [warnings] [succeeded] [cc succeeded] +a204.c [warnings] [succeeded] [cc succeeded] +a205.c [parsed] [succeeded] [cc succeeded] +a206.c [parsed] [succeeded] [cc succeeded] +a207.c [parsed] [succeeded] [cc succeeded] +a208.c [parsed] [succeeded] [cc succeeded] +a37.c [parsed] [succeeded] [cc succeeded] +a209.c [parsed] [succeeded] [cc succeeded] +a21.c [parsed] [succeeded] [cc succeeded] +a210.c [parsed] [succeeded] [cc succeeded] +a22.c [parsed] [succeeded] [cc succeeded] +a23.c [parsed] [succeeded] [cc succeeded] +a24.c [parsed] [succeeded] [cc succeeded] +a25.c [parsed] [succeeded] [cc succeeded] +a26.c [parsed] [succeeded] [cc succeeded] +a27.c [parsed] [succeeded] [cc succeeded] +a28.c [parsed] [succeeded] [cc succeeded] +a29.c [parsed] [succeeded] [cc succeeded] +a3.c [parsed] [succeeded] [cc succeeded] +a30.c [parsed] [succeeded] [cc succeeded] +a31.c [parsed] [succeeded] [cc succeeded] +a32.c [parsed] [succeeded] [cc succeeded] +a33.c [parsed] [succeeded] [cc succeeded] +a34.c [parsed] [succeeded] [cc succeeded] +a35.c [parsed] [succeeded] [cc succeeded] +a36.c [parsed] [succeeded] [cc succeeded] +a38.c [parsed] [succeeded] [cc succeeded] +a4.c [parsed] [succeeded] [cc succeeded] +a5.c [parsed] [succeeded] [cc succeeded] +a6.c [parsed] [succeeded] [cc succeeded] +a7.c [parsed] [succeeded] [cc succeeded] +a8.c [parsed] [succeeded] [cc succeeded] +a9.c [parsed] [succeeded] [cc succeeded] +b2.c [parsed] [succeeded] [cc succeeded] +b3.c [parsed] [succeeded] [cc succeeded] +c100.c.c [parsed] [succeeded] [orig cc failed] +fact1.c [warnings] [succeeded] [cc succeeded] +i100.c.c [parsed] [succeeded] [orig cc failed] +kf.c [parsed] [succeeded] [cc succeeded] +primes.c.c [parsed] [succeeded] [cc succeeded] +ps2ascii.c.c [parsed] [succeeded] [orig cc failed] +t1.c [parsed] [succeeded] [cc succeeded] +a39.c [parsed] [succeeded] [cc succeeded] +t10.c [warnings] [succeeded] [cc succeeded] +t11.c [parsed] [succeeded] [cc succeeded] +t12.c [warnings] [succeeded] [cc succeeded] +t13.c [warnings] [succeeded] [cc succeeded] +t14.c [warnings] [succeeded] [cc succeeded] +a40.c [parsed] [succeeded] [cc succeeded] +t3.c [parsed] [succeeded] [cc succeeded] +t4.c [parsed] [succeeded] [cc succeeded] +t5.c [parsed] [succeeded] [cc succeeded] +t6.c [warnings] [failed] +t7.c [parsed] [succeeded] [cc succeeded] +t8.c [parsed] [succeeded] [cc succeeded] +t9.c [parsed] [succeeded] [cc succeeded] +test.c [warnings] [succeeded] [cc succeeded] +test1.c [warnings] [succeeded] [cc succeeded] +test10.c [warnings] [succeeded] [cc succeeded] +test2.c [warnings] [succeeded] [cc succeeded] +test4.c [warnings] [succeeded] [cc succeeded] +test5.c [warnings] [succeeded] [cc succeeded] +typedef.c [parsed] [succeeded] [cc succeeded] +typedef2.c [parsed] [succeeded] [cc succeeded] +words.c.c [parsed] [succeeded] [orig cc failed] +a50.c [parsed] [succeeded] [cc succeeded] +a43.c [parsed] [succeeded] [cc succeeded] +a44.c [parsed] [succeeded] [cc succeeded] +t22.c [parsed] [failed] diff --git a/ckit/regression/output/t1.c b/ckit/regression/output/t1.c new file mode 100644 index 0000000..59c7af8 --- /dev/null +++ b/ckit/regression/output/t1.c @@ -0,0 +1,18 @@ + +extern int printf (); +int main () +{ + + printf ("this is the end, my only friend the end"); + goto label_1_p1422; + +label_1_p1422: + ; + printf ("this is the end, my only friend the end"); + goto label_2_p1423; + goto label_1_p1422; + +label_2_p1423: + ; + return 10; +} diff --git a/ckit/regression/output/t10.c b/ckit/regression/output/t10.c new file mode 100644 index 0000000..36b3cb2 --- /dev/null +++ b/ckit/regression/output/t10.c @@ -0,0 +1,19 @@ + +int main () +{ + struct t249 { + int x; + int y; + }; + struct t249 point_p1432; + goto whileCont_p1434; + +whileTop_p1433: + ; + +whileCont_p1434: + ; + if (1) + goto whileTop_p1433; + return ; +} diff --git a/ckit/regression/output/t11.c b/ckit/regression/output/t11.c new file mode 100644 index 0000000..06fd69f --- /dev/null +++ b/ckit/regression/output/t11.c @@ -0,0 +1,15 @@ + +static char mimbar (char c_p1437) +{ + + return c_p1437; +} +void main () +{ + static char c_p1439; + static char (*f) (char); + char call_p1441; + f = mimbar; + call_p1441 = f (c_p1439); + c_p1439 = call_p1441; +} diff --git a/ckit/regression/output/t12.c b/ckit/regression/output/t12.c new file mode 100644 index 0000000..d854f31 --- /dev/null +++ b/ckit/regression/output/t12.c @@ -0,0 +1,40 @@ + +void main () +{ + int i_p1443; + int post_p1449; + i_p1443 = 0; + goto forStart_p1446; + +forTop_p1445: + ; + switch (i_p1443) + { + + + case 0: + goto forCont_p1447; + + default: + goto switchBrk_p1450; + } + +switchBrk_p1450: + ; + if (i_p1443==4) + goto forBrk_p1448; + printf ("i = %d\n",i_p1443); + +forCont_p1447: + ; + post_p1449 = i_p1443; + i_p1443 = (i_p1443+1); + +forStart_p1446: + ; + if (i_p1443<10) + goto forTop_p1445; + +forBrk_p1448: + ; +} diff --git a/ckit/regression/output/t13.c b/ckit/regression/output/t13.c new file mode 100644 index 0000000..b506829 --- /dev/null +++ b/ckit/regression/output/t13.c @@ -0,0 +1,25 @@ + +void main () +{ + int i_p1452; + int post_p1457; + i_p1452 = 0; + +f1_p1456: + ; + post_p1457 = i_p1452; + i_p1452 = (i_p1452+1); + if (post_p1457<4) + goto f2_p1453; + else + goto f3_p1454; + +f2_p1453: + ; + printf ("i = %d\n",i_p1452); + goto f1_p1456; + +f3_p1454: + ; + printf ("i\'= %d\n",i_p1452); +} diff --git a/ckit/regression/output/t14.c b/ckit/regression/output/t14.c new file mode 100644 index 0000000..9c7cfc5 --- /dev/null +++ b/ckit/regression/output/t14.c @@ -0,0 +1,13 @@ + +struct t250 { + int x; + int y; +}; +struct t250 z={1,2}; +void main () +{ + int i_p1462; + i_p1462 = 0; + i_p1462 = (z.x); + printf ("i = %d\n",i_p1462); +} diff --git a/ckit/regression/output/t15.c b/ckit/regression/output/t15.c new file mode 100644 index 0000000..6d4c9ec --- /dev/null +++ b/ckit/regression/output/t15.c @@ -0,0 +1,19 @@ + +void main () +{ + struct t347 { + int x; + int y; + }; + struct t347 point_p1855; + goto whileCont_p1857; + +whileTop_p1856: + ; + +whileCont_p1857: + ; + if (1) + goto whileTop_p1856; + return ; +} diff --git a/ckit/regression/output/t22.c b/ckit/regression/output/t22.c new file mode 100644 index 0000000..e69de29 diff --git a/ckit/regression/output/t3.c b/ckit/regression/output/t3.c new file mode 100644 index 0000000..c6357ae --- /dev/null +++ b/ckit/regression/output/t3.c @@ -0,0 +1,18 @@ + +int main () +{ + int i_p1468; + +doTop_p1469: + ; + i_p1468 = (i_p1468+1); + i_p1468 = (i_p1468+1); + if (i_p1468<10) + goto doTop_p1469; + +doTop_p1472: + ; + i_p1468 = (i_p1468-1); + if (i_p1468>0) + goto doTop_p1472; +} diff --git a/ckit/regression/output/t4.c b/ckit/regression/output/t4.c new file mode 100644 index 0000000..3e76e05 --- /dev/null +++ b/ckit/regression/output/t4.c @@ -0,0 +1,14 @@ + +int main () +{ + int i_p1476; + int x1_p1477; + int *x2_p1478; + int *x3_p1479[3]; + int **x4_p1480; + int (*x5) (); + x1_p1477 = ((int) i_p1476); + x2_p1478 = ((int *) i_p1476); + x4_p1480 = ((int **) x4_p1480); + x5 = x5; +} diff --git a/ckit/regression/output/t5.c b/ckit/regression/output/t5.c new file mode 100644 index 0000000..b05fcd6 --- /dev/null +++ b/ckit/regression/output/t5.c @@ -0,0 +1,41 @@ + +int x; +long y; +extern unsigned long foo (int); +extern int main (); +unsigned long foo (int x_p1487) +{ + unsigned long i_p1488; + i_p1488 = ((unsigned long) x_p1487); + return x_p1487; +} +int main () +{ + int i_p1490; + switch (i_p1490) + { + + + case 2: + foo (i_p1490); + goto switchBrk_p1491; + + case 3: + foo (i_p1490); + goto switchBrk_p1491; + + case 5: + foo (i_p1490); + goto switchBrk_p1491; + + case 7: + foo (i_p1490); + goto switchBrk_p1491; + + default: + foo (i_p1490*2); + } + +switchBrk_p1491: + ; +} diff --git a/ckit/regression/output/t6.c b/ckit/regression/output/t6.c new file mode 100644 index 0000000..e69de29 diff --git a/ckit/regression/output/t7.c b/ckit/regression/output/t7.c new file mode 100644 index 0000000..2ea4242 --- /dev/null +++ b/ckit/regression/output/t7.c @@ -0,0 +1,37 @@ + +int (**x[4]) (int,int,int); +int (*(*y)[10]) (int,int,int); +extern int nar (int,int,int); +int nar (int x_p1510,int y_p1511,int z_p1512) +{ + + return 3; +} +typedef int bar_t253; +static int g; +static int narn (int x_p1516,int y_p1517,int z_p1518) +{ + + return (x_p1516+y_p1517)+z_p1518; +} +void main () +{ + int (**x1_p1520) (int,int,int); + int (*x2) (int,int,int); + int (**y1_p1522) (int,int,int); + int (*y2) (int,int,int); + int i_p1524; + int j_p1525; + int call_p1526; + int call_p1527; + *x[1] = nar; + x1_p1520 = x[1]; + x2 = (*x1_p1520); + y1_p1522 = ((int (**) (int,int,int)) (*y)); + y2 = y1_p1522[0]; + call_p1526 = x2 (4,5,6); + i_p1524 = call_p1526; + call_p1527 = y2 (4,5,6); + j_p1525 = call_p1527; + return ; +} diff --git a/ckit/regression/output/t8.c b/ckit/regression/output/t8.c new file mode 100644 index 0000000..dec786a --- /dev/null +++ b/ckit/regression/output/t8.c @@ -0,0 +1,23 @@ + +extern int narn (int,int,int); +int *i; +static char *c; +static char mimbar (char c_p1532,long l_p1533,double d_p1534) +{ + static int k_p1535; + int call_p1543; + call_p1543 = narn (k_p1535,k_p1535+2,k_p1535+3); + k_p1535 = call_p1543; + return c_p1532; +} +int narn (int x_p1537,int y_p1538,int z_p1539) +{ + + return (x_p1537+y_p1538)+z_p1539; +} +void main () +{ + register int j_p1541; + static int k_p1542; + return ; +} diff --git a/ckit/regression/output/t9.c b/ckit/regression/output/t9.c new file mode 100644 index 0000000..5265f17 --- /dev/null +++ b/ckit/regression/output/t9.c @@ -0,0 +1,18 @@ + +extern int narn (int,int,int); +int narn (int x_p1546,int y_p1547,int z_p1548) +{ + + return 1; +} +void main () +{ + register int j_p1550; + static int k_p1551; + int k_p1552; + int call_p1553; + call_p1553 = narn (k_p1552,j_p1550,j_p1550); + k_p1552 = call_p1553; + j_p1550 = k_p1552; + return ; +} diff --git a/ckit/regression/output/test.c b/ckit/regression/output/test.c new file mode 100644 index 0000000..2aefdf4 --- /dev/null +++ b/ckit/regression/output/test.c @@ -0,0 +1,20 @@ + +int main () +{ + long i_p1555; + long post_p1561; + i_p1555 = ((long) (-10)); + goto forStart_p1558; + +forTop_p1557: + ; + printf ("%lx ",((long) i_p1555)*3); + printf ("%lx\n",((unsigned long) i_p1555)*3); + post_p1561 = i_p1555; + i_p1555 = (i_p1555+((long) 1)); + +forStart_p1558: + ; + if (i_p1555<11) + goto forTop_p1557; +} diff --git a/ckit/regression/output/test1.c b/ckit/regression/output/test1.c new file mode 100644 index 0000000..89a59d6 --- /dev/null +++ b/ckit/regression/output/test1.c @@ -0,0 +1,30 @@ + +enum nevin_t254 { + X=3, + Y=10 +}; +int i; +int main () +{ + + p (); + printf ("%d\n",i); + q (); + printf ("%d\n",i); +} +int p () +{ + enum dino_t255 { + L=1, + S=2 + }; + i = ((int) L); +} +int q () +{ + enum dino_t256 { + L=3, + S=4 + }; + i = ((int) L); +} diff --git a/ckit/regression/output/test10.c b/ckit/regression/output/test10.c new file mode 100644 index 0000000..08cfb3e --- /dev/null +++ b/ckit/regression/output/test10.c @@ -0,0 +1,44 @@ + +int i=2; +int i; +struct t257 { + int a; + int b; +}; +struct t257 x; +struct t258 { + int a; + int b; +}; +struct t258 y; +struct S_t259 { + int a; + int b; +}; +struct S_t259 u; +struct S_t259 v; +int main () +{ + int i_p1592; + struct t260 { + int a; + int b; + }; + struct t260 x_p1595; + struct t261 { + int a; + int b; + }; + struct t261 y_p1598; + struct S_t262 { + int a; + int b; + }; + struct S_t262 u_p1602; + struct S_t262 v_p1603; + i_p1592 = 35; + v_p1603.a = 1; + v_p1603.b = 34; + u_p1602 = v_p1603; + printf ("%d\n",i_p1592); +} diff --git a/ckit/regression/output/test2.c b/ckit/regression/output/test2.c new file mode 100644 index 0000000..dd4653f --- /dev/null +++ b/ckit/regression/output/test2.c @@ -0,0 +1,33 @@ + +struct num_t263 { + short mantissa; + short exponent; +}; +typedef int mynum_t264; +struct numm_t265 { + mynum_t264 mantissaa; + short exponentt; +}; +typedef struct numm_t265 xxx_t266; +struct nummm_t267 { + mynum_t264 mantissaaa; + short exponenttt; +}; +struct nummm_t267 v; +typedef struct nummm_t267 nummmmm_t268; +union q1_t269 { + short qa; + short qb; +}; +typedef union q1_t269 q2_t270; +union q3_t271 { + short qaa; + short qbb; +}; +typedef union q3_t271 q4_t272; +typedef union t273 q7_t274; +mynum_t264 main () +{ + + printf (""); +} diff --git a/ckit/regression/output/test4.c b/ckit/regression/output/test4.c new file mode 100644 index 0000000..327e93c --- /dev/null +++ b/ckit/regression/output/test4.c @@ -0,0 +1,11 @@ + +int main () +{ + long i_p1631; + unsigned long j_p1632; + i_p1631 = 0; + j_p1632 = 0; + i_p1631 = ((long) 1); + j_p1632 = ((unsigned long) 2); + printf ("%d\n",i_p1631+j_p1632); +} diff --git a/ckit/regression/output/test5.c b/ckit/regression/output/test5.c new file mode 100644 index 0000000..85d056b --- /dev/null +++ b/ckit/regression/output/test5.c @@ -0,0 +1,25 @@ + +typedef int mynum_t275; +struct num_t276 { + short mantissa; + short exponent; +}; +union t277 { + short mantissaa; + short exponentt; +}; +typedef union t277 xxx_t278; +union t279 { + short mantissaa; + short exponentt; +}; +typedef union t279 xx_t280; +enum dino_t281 { + L=0, + S=0 +}; +mynum_t275 main () +{ + + printf (""); +} diff --git a/ckit/regression/output/typedef.c b/ckit/regression/output/typedef.c new file mode 100644 index 0000000..477649d --- /dev/null +++ b/ckit/regression/output/typedef.c @@ -0,0 +1,13 @@ + +typedef int foo_t282; +int f (int y_p1651) +{ + int foo_p1652; + foo_p1652 = 1; + return foo_p1652+y_p1651; +} +foo_t282 main () +{ + + return f (1); +} diff --git a/ckit/regression/output/typedef2.c b/ckit/regression/output/typedef2.c new file mode 100644 index 0000000..cd2af09 --- /dev/null +++ b/ckit/regression/output/typedef2.c @@ -0,0 +1,13 @@ + +typedef int foo_t283; +int f (int y_p1656) +{ + int foo_p1657; + foo_p1657 = 1; + return foo_p1657+y_p1656; +} +int main () +{ + + return f (1); +} diff --git a/ckit/regression/output/words.c.c b/ckit/regression/output/words.c.c new file mode 100644 index 0000000..2d5af68 --- /dev/null +++ b/ckit/regression/output/words.c.c @@ -0,0 +1,180 @@ + +typedef int __int32_t_t284; +typedef unsigned int __uint32_t_t285; +typedef long long __int64_t_t286; +typedef unsigned long long __uint64_t_t287; +typedef __int32_t_t284 __psint_t_t288; +typedef __uint32_t_t285 __psunsigned_t_t289; +typedef __int32_t_t284 __scint_t_t290; +typedef __uint32_t_t285 __scunsigned_t_t291; +typedef unsigned int size_t_t292; +typedef long fpos_t_t293; +typedef __int64_t_t286 off64_t_t294; +typedef __int64_t_t286 fpos64_t_t295; +typedef char *va_list_t296; +struct __file_s_t297 { + int _cnt; + char *_ptr; + char *_base; + char _flag; + char _file; +}; +typedef struct __file_s_t297 FILE_t298; +extern FILE_t298 __iob[100]; +extern FILE_t298 *_lastbuf; +extern char *_bufendtab[]; +extern char _sibuf[]; +extern char _sobuf[]; +extern int remove (char *); +extern int rename (char *,char *); +extern FILE_t298 * tmpfile (void); +extern char * tmpnam (char *); +extern int fclose (FILE_t298 *); +extern int fflush (FILE_t298 *); +extern FILE_t298 * fopen (char *,char *); +extern FILE_t298 * freopen (char *,char *,FILE_t298 *); +extern void setbuf (FILE_t298 *,char *); +extern int setvbuf (FILE_t298 *,char *,int,size_t_t292); +extern int fprintf (FILE_t298 *,char *,...); +extern int fscanf (FILE_t298 *,char *,...); +extern int printf (char *,...); +extern int scanf (char *,...); +extern int sprintf (char *,char *,...); +extern int sscanf (char *,char *,...); +extern int vfprintf (FILE_t298 *,char *,char *); +extern int vprintf (char *,char *); +extern int vsprintf (char *,char *,char *); +extern int fgetc (FILE_t298 *); +extern char * fgets (char *,int,FILE_t298 *); +extern int fputc (int,FILE_t298 *); +extern int fputs (char *,FILE_t298 *); +extern int getc (FILE_t298 *); +extern int getchar (void); +extern char * gets (char *); +extern int putc (int,FILE_t298 *); +extern int putchar (int); +extern int puts (char *); +extern int ungetc (int,FILE_t298 *); +extern size_t_t292 fread (void *,size_t_t292,size_t_t292,FILE_t298 *); +extern size_t_t292 fwrite (void *,size_t_t292,size_t_t292,FILE_t298 *); +extern int fgetpos (FILE_t298 *,fpos_t_t293 *); +extern int fseek (FILE_t298 *,long,int); +extern int fsetpos (FILE_t298 *,fpos_t_t293 *); +extern long ftell (FILE_t298 *); +extern void rewind (FILE_t298 *); +extern void clearerr (FILE_t298 *); +extern int feof (FILE_t298 *); +extern int ferror (FILE_t298 *); +extern void perror (char *); +extern int __filbuf (FILE_t298 *); +extern int __flsbuf (int,FILE_t298 *); +extern FILE_t298 * fdopen (int,char *); +extern int fileno (FILE_t298 *); +extern void flockfile (FILE_t298 *); +extern int ftrylockfile (FILE_t298 *); +extern void funlockfile (FILE_t298 *); +extern int getc_unlocked (FILE_t298 *); +extern int putc_unlocked (int,FILE_t298 *); +extern int getchar_unlocked (void); +extern int putchar_unlocked (int); +extern FILE_t298 * popen (char *,char *); +extern int pclose (FILE_t298 *); +extern int getopt (int,char **,char *); +extern char *optarg; +extern int opterr; +extern int optind; +extern int optopt; +extern int getsubopt (char **,char **,char **); +extern void getoptreset (void); +extern char * ctermid (char *); +extern char * cuserid (char *); +extern char * tempnam (char *,char *); +extern int getw (FILE_t298 *); +extern int putw (int,FILE_t298 *); +extern char * mktemp (char *); +extern int mkstemp (char *); +extern int setbuffer (FILE_t298 *,char *,int); +extern int setlinebuf (FILE_t298 *); +extern int system (char *); +extern int fgetpos64 (FILE_t298 *,fpos64_t_t295 *); +extern FILE_t298 * fopen64 (char *,char *); +extern FILE_t298 * freopen64 (char *,char *,FILE_t298 *); +extern int fseek64 (FILE_t298 *,off64_t_t294,int); +extern int fseeko64 (FILE_t298 *,off64_t_t294,int); +extern int fseeko (FILE_t298 *,__int64_t_t286,int); +extern int fsetpos64 (FILE_t298 *,fpos64_t_t295 *); +extern off64_t_t294 ftell64 (FILE_t298 *); +extern __int64_t_t286 ftello (FILE_t298 *); +extern off64_t_t294 ftello64 (FILE_t298 *); +extern FILE_t298 * tmpfile64 (void); +extern int __semputc (int,FILE_t298 *); +extern int __semgetc (FILE_t298 *); +extern int __us_rsthread_stdio; +extern char * ctermid_r (char *); +int main () +{ + int c_p1771; + int i_p1772; + char a_p1773[1000]; + int quesCol_p1777; + int post_p1781; + i_p1772 = 0; + goto whileCont_p1775; + +whileTop_p1774: + ; + if (((c_p1771>=97)&&(c_p1771<=122))||((c_p1771>=65)&&(c_p1771<=90))) + { + + post_p1781 = i_p1772; + i_p1772 = (i_p1772+1); + a_p1773[post_p1781] = ((char) c_p1771); + } + else + if (((c_p1771==10)||(c_p1771==32))||(c_p1771==9)) + { + + if (i_p1772>1) + { + + a_p1773[i_p1772] = ((char) 0); + printf ((char *) "%s\n",(char *) a_p1773); + } + i_p1772 = 0; + } + else + { + + i_p1772 = 0; + } + if (i_p1772>999) + { + + i_p1772 = 0; + } + +whileCont_p1775: + ; + if (__us_rsthread_stdio) + quesCol_p1777 = __semgetc ((FILE_t298 *) (&__iob[0])); + else + { + int quesCol_p1778; + int pref_p1779; + (*(&__iob[0]))._cnt = (((*(&__iob[0]))._cnt)-1); + pref_p1779 = ((*(&__iob[0]))._cnt); + if (pref_p1779<0) + quesCol_p1778 = __filbuf ((FILE_t298 *) (&__iob[0])); + else + { + char *post_p1780; + post_p1780 = ((*(&__iob[0]))._ptr); + (*(&__iob[0]))._ptr = ((char *) (((int) ((*(&__iob[0]))._ptr))+1)); + quesCol_p1778 = ((int) (*post_p1780)); + } + quesCol_p1777 = quesCol_p1778; + } + c_p1771 = quesCol_p1777; + if (c_p1771!=(-1)) + goto whileTop_p1774; +} diff --git a/ckit/regression/sources.cm b/ckit/regression/sources.cm new file mode 100644 index 0000000..44a57b3 --- /dev/null +++ b/ckit/regression/sources.cm @@ -0,0 +1,6 @@ +Group is + ../src/ast/sources.cm +(* ../src/ast-utils/simplifier/sources.cm *) + + test-fn.sml + diff --git a/ckit/regression/test-fn.sml b/ckit/regression/test-fn.sml new file mode 100644 index 0000000..7753a33 --- /dev/null +++ b/ckit/regression/test-fn.sml @@ -0,0 +1,128 @@ +(* Functorized regression testing for valid C code. + * + * trans - function which takes the output of buildAst (parsed/typechecked c code) + * and performs a transformation on it returning an Ast. + * + * testDir - the directory where the valid C code lives. + * + * outDir - directory where to place the prettyprinted transformed code. + * + * For each C file in the testDir, the following actions are performed + * + * - the file is parsed and typechecked + * + * - the transformation, trans, is applied to the resulting ast representation + * + * - the transformed ast is pretty-printed to outDir using the same name as + * the file from which it was read. + * + * - the original C file and the generated C file are compiled and run with the + * results saved in their respective directories. These output files are then + * compared using diff. + *) + +functor TestFn (val testDir : string; + val outDir : string; + val trans : BuildAst.astBundle -> Ast.ast + ) = struct + + fun isCFile s = + case rev (explode s) + of (#"c")::(#".")::_ => true + | (#"i")::(#".")::_ => true + | _ => false + + fun dirList dir = + let val ds = OS.FileSys.openDir dir + fun loop () = + case OS.FileSys.readDir ds + of "" => [] + | s => if isCFile s then s::(loop ()) else loop () + in loop () before OS.FileSys.closeDir ds end + + fun spaces n = + let fun loop 0 a = String.concat a + | loop n a = loop (n-1) (" "::a) + in loop n [] end + + + fun normalize file = + case 15 - (String.size file) + of 0 => file + | n => if n < 0 then String.substring (file,0,15) + else file^(spaces n) + + fun ppTrans os file = + let val pinfo as {ast, tidtab, errorCount, warningCount, ...} = + ParseToAst.fileToAst (testDir^"/"^file) + val ast = trans pinfo + val fileOs = TextIO.openOut (outDir^"/"^file) + in (PPLib.ppToStrm (PPAst.ppAst () tidtab) fileOs ast; + (case (errorCount, warningCount) of + (0, 0) => TextIO.output (os,"\t[success]") + | (i, 0) => + TextIO.output (os,"\t[" ^ (Int.toString i) ^ " errors]") + | (0, j) => + TextIO.output (os,"\t[" ^ (Int.toString j) ^ " warnings]") + | (i, j) => + TextIO.output (os,"\t[" ^ (Int.toString i) ^ + " errors (" ^ (Int.toString j) ^"w)]")); + TextIO.closeOut fileOs; + true) + handle _ => + (TextIO.output (os,"\t[failed]"); + TextIO.closeOut fileOs; + false) + end + + fun compileCommand dir file = "cc "^dir^"/"^file^" -o "^dir^"/"^file^".exe" + + fun compile os file = + case OS.Process.system (compileCommand testDir file) + of 0 => (case OS.Process.system (compileCommand outDir file) + of 0 => (TextIO.output (os,"\t[cc succeeded]"); true) + | _ => (TextIO.output (os,"\t[trans cc failed]"); false)) + + | _ => (TextIO.output (os,"\t[orig cc failed]"); false) + + fun executeCommand dir file = dir^"/"^file^".exe > "^dir^"/"^file^".out" + fun execute os file = + case OS.Process.system (executeCommand testDir file) + of 0 => (case OS.Process.system (executeCommand outDir file) + of 0 => (TextIO.output (os,"\t[execution succeeded]"); true) + | _ => (TextIO.output (os,"\t[trans execution failed]"); false)) + + | _ => (TextIO.output (os,"\t[orig execution failed]"); false) + + fun compare os file = + let val diffCommaind = "diff "^testDir^"/"^file^".out "^outDir^"/"^file^".out" + in case OS.Process.system (executeCommand testDir file) + of 0 => (TextIO.output (os,"\t[output the same]"); true) + | _ => (TextIO.output (os,"\t[output different]"); false) + end + + fun test os file = + ( TextIO.output (os,normalize file) + ; print ( (normalize file) ^ "\n" ) + ; TextIO.flushOut os + ; if not (ppTrans os file) then () + else if not (compile os file) then () + else if not (execute os file) then () + else if not (compare os file) then () + else () + ; TextIO.output (os,"\n") + ) + + fun testOne file = test TextIO.stdOut file + + fun testAll () = + let val os = TextIO.openOut (outDir^"/summary"); + fun loop [] = TextIO.closeOut os + | loop (file::files) = + ( test os file + handle _ => TextIO.output (os,"FAILED WITH EXTREME PREJUDICE\n") + ; loop files + ) + in loop (dirList (testDir^"/")) end +end + diff --git a/ckit/regression/tests/simplify/simplify-test.sml b/ckit/regression/tests/simplify/simplify-test.sml new file mode 100644 index 0000000..7acb8ac --- /dev/null +++ b/ckit/regression/tests/simplify/simplify-test.sml @@ -0,0 +1,14 @@ +local + val _ = BuildAst.compilerMode() (* make sure we are in compiler mode *) + fun trans ({ast={ast,tidtab,...},aidtab,implicits,...}: BuildAst.programInfo) = + let + val {ast,...} = SimplifyAst.simplifyAst(ast,tidtab,aidtab, implicits) + in ast + end +in + structure SimplifyTest = + TestFn (val testDir = "../../valid-programs" + val outDir = "results" + val trans = trans) +end + diff --git a/ckit/regression/tests/simplify/sources.cm b/ckit/regression/tests/simplify/sources.cm new file mode 100644 index 0000000..3efd31c --- /dev/null +++ b/ckit/regression/tests/simplify/sources.cm @@ -0,0 +1,5 @@ +Group is + ../../sources.cm + + simplify-test.sml + diff --git a/ckit/regression/tests/typecheck/sources.cm b/ckit/regression/tests/typecheck/sources.cm new file mode 100644 index 0000000..a72b124 --- /dev/null +++ b/ckit/regression/tests/typecheck/sources.cm @@ -0,0 +1,5 @@ +Group is + ../../sources.cm + + typecheck-test.sml + diff --git a/ckit/regression/tests/typecheck/typecheck-test.sml b/ckit/regression/tests/typecheck/typecheck-test.sml new file mode 100644 index 0000000..ad1d289 --- /dev/null +++ b/ckit/regression/tests/typecheck/typecheck-test.sml @@ -0,0 +1,4 @@ +structure TypecheckTest = + TestFn (val testDir = "../../valid-programs" + val outDir = "results" + val trans = (fn ({ast,...}: BuildAst.astBundle) => ast)) diff --git a/ckit/regression/valid-programs.obs/c100.c.c b/ckit/regression/valid-programs.obs/c100.c.c new file mode 100644 index 0000000..73779ed --- /dev/null +++ b/ckit/regression/valid-programs.obs/c100.c.c @@ -0,0 +1,783 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +typedef int __int32_t; +typedef unsigned __uint32_t; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +typedef long long __int64_t; +typedef unsigned long long __uint64_t; + + + + + + +typedef __int32_t __psint_t; +typedef __uint32_t __psunsigned_t; + + + + + + + + + + + +typedef __int32_t __scint_t; +typedef __uint32_t __scunsigned_t; + + + + + + + + + + + + + +typedef unsigned int size_t; + + + + + +typedef long fpos_t; + + + + + +typedef __int64_t off64_t; + + + + + +typedef __int64_t fpos64_t; + + + + + + + + +typedef char *va_list; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +typedef struct + + + + + + + + + +__file_s + +{ + + int _cnt; + + + unsigned char *_ptr; + unsigned char *_base; + + unsigned char _flag; + unsigned char _file; + +} FILE; + +extern FILE __iob[100 ]; +extern FILE *_lastbuf; +extern unsigned char *_bufendtab[]; +extern unsigned char _sibuf[], _sobuf[]; + +extern int remove(const char *); +extern int rename(const char *, const char *); +extern FILE *tmpfile(void); +extern char *tmpnam(char *); +extern int fclose(FILE *); +extern int fflush(FILE *); +extern FILE *fopen(const char *, const char *); +extern FILE *freopen(const char *, const char *, FILE *); +extern void setbuf(FILE *, char *); +extern int setvbuf(FILE *, char *, int, size_t); + +extern int fprintf(FILE *, const char *, ...); + +extern int fscanf(FILE *, const char *, ...); + +extern int printf(const char *, ...); + +extern int scanf(const char *, ...); + +extern int sprintf(char *, const char *, ...); + +extern int sscanf(const char *, const char *, ...); +extern int vfprintf(FILE *, const char *, char *); +extern int vprintf(const char *, char *); +extern int vsprintf(char *, const char *, char *); + + + +extern int fgetc(FILE *); +extern char *fgets(char *, int, FILE *); +extern int fputc(int, FILE *); +extern int fputs(const char *, FILE *); +extern int getc(FILE *); +extern int getchar(void); +extern char *gets(char *); +extern int putc(int, FILE *); +extern int putchar(int); +extern int puts(const char *); +extern int ungetc(int, FILE *); +extern size_t fread(void *, size_t, size_t, FILE *); +extern size_t fwrite(const void *, size_t, size_t, FILE *); +extern int fgetpos(FILE *, fpos_t *); +extern int fseek(FILE *, long, int); +extern int fsetpos(FILE *, const fpos_t *); +extern long ftell(FILE *); +extern void rewind(FILE *); +extern void clearerr(FILE *); +extern int feof(FILE *); +extern int ferror(FILE *); +extern void perror(const char *); + +extern int __filbuf(FILE *); +extern int __flsbuf(int, FILE *); + + + +extern FILE *fdopen(int, const char *); +extern int fileno(FILE *); + + + + +extern void flockfile(FILE *); +extern int ftrylockfile(FILE *); +extern void funlockfile(FILE *); +extern int getc_unlocked(FILE *); +extern int putc_unlocked(int, FILE *); +extern int getchar_unlocked(void); +extern int putchar_unlocked(int); + + + +extern FILE *popen(const char *, const char *); +extern int pclose(FILE *); + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +extern int getopt(int, char *const *, const char *); + +extern char *optarg; +extern int opterr; +extern int optind; +extern int optopt; + + + + + + + + + + +extern int getsubopt(char **, char *const *, char **); +extern void getoptreset(void); + + + + + +extern char *ctermid(char *); +extern char *cuserid(char *); +extern char *tempnam(const char *, const char *); +extern int getw(FILE *); +extern int putw(int, FILE *); + + + + + +extern char *mktemp(char *); +extern int mkstemp(char *); +extern int setbuffer(FILE *, char *, int); +extern int setlinebuf(FILE *); +extern int system(const char *); + + + + +extern int fgetpos64(FILE *, fpos64_t *); +extern FILE *fopen64(const char *, const char *); +extern FILE *freopen64(const char *, const char *, FILE *); +extern int fseek64(FILE *, off64_t, int); +extern int fseeko64(FILE *, off64_t, int); + +extern int fseeko(FILE *, __int64_t, int); +extern int fsetpos64(FILE *, const fpos64_t *); +extern off64_t ftell64(FILE *); + +extern __int64_t ftello(FILE *); +extern off64_t ftello64(FILE *); +extern FILE *tmpfile64(void); + + + +extern int __semputc(int, FILE *); +extern int __semgetc(FILE *); +extern int __us_rsthread_stdio; + + + +extern char *ctermid_r(char *); + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +main() +{ + int c, i; + + i = 0; + while((c = (__us_rsthread_stdio ? __semgetc((&__iob[0])) : (--((&__iob[0]))->_cnt < 0 ? __filbuf((&__iob[0])) : (int)*((&__iob[0]))->_ptr++)) ) != (-1) ) { + if (++i > 100) break; + (__us_rsthread_stdio ? __semputc((c), (&__iob[1])) : (--( (&__iob[1]))->_cnt < 0 ? __flsbuf(((c)), ( (&__iob[1]))) : (int)(*( (&__iob[1]))->_ptr++ = (unsigned char)((c))))) ; + } +} diff --git a/ckit/regression/valid-programs.obs/i100.c.c b/ckit/regression/valid-programs.obs/i100.c.c new file mode 100644 index 0000000..1c190ca --- /dev/null +++ b/ckit/regression/valid-programs.obs/i100.c.c @@ -0,0 +1,787 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +typedef int __int32_t; +typedef unsigned __uint32_t; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +typedef long long __int64_t; +typedef unsigned long long __uint64_t; + + + + + + +typedef __int32_t __psint_t; +typedef __uint32_t __psunsigned_t; + + + + + + + + + + + +typedef __int32_t __scint_t; +typedef __uint32_t __scunsigned_t; + + + + + + + + + + + + + +typedef unsigned int size_t; + + + + + +typedef long fpos_t; + + + + + +typedef __int64_t off64_t; + + + + + +typedef __int64_t fpos64_t; + + + + + + + + +typedef char *va_list; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +typedef struct + + + + + + + + + +__file_s + +{ + + int _cnt; + + + unsigned char *_ptr; + unsigned char *_base; + + unsigned char _flag; + unsigned char _file; + +} FILE; + +extern FILE __iob[100 ]; +extern FILE *_lastbuf; +extern unsigned char *_bufendtab[]; +extern unsigned char _sibuf[], _sobuf[]; + +extern int remove(const char *); +extern int rename(const char *, const char *); +extern FILE *tmpfile(void); +extern char *tmpnam(char *); +extern int fclose(FILE *); +extern int fflush(FILE *); +extern FILE *fopen(const char *, const char *); +extern FILE *freopen(const char *, const char *, FILE *); +extern void setbuf(FILE *, char *); +extern int setvbuf(FILE *, char *, int, size_t); + +extern int fprintf(FILE *, const char *, ...); + +extern int fscanf(FILE *, const char *, ...); + +extern int printf(const char *, ...); + +extern int scanf(const char *, ...); + +extern int sprintf(char *, const char *, ...); + +extern int sscanf(const char *, const char *, ...); +extern int vfprintf(FILE *, const char *, char *); +extern int vprintf(const char *, char *); +extern int vsprintf(char *, const char *, char *); + + + +extern int fgetc(FILE *); +extern char *fgets(char *, int, FILE *); +extern int fputc(int, FILE *); +extern int fputs(const char *, FILE *); +extern int getc(FILE *); +extern int getchar(void); +extern char *gets(char *); +extern int putc(int, FILE *); +extern int putchar(int); +extern int puts(const char *); +extern int ungetc(int, FILE *); +extern size_t fread(void *, size_t, size_t, FILE *); +extern size_t fwrite(const void *, size_t, size_t, FILE *); +extern int fgetpos(FILE *, fpos_t *); +extern int fseek(FILE *, long, int); +extern int fsetpos(FILE *, const fpos_t *); +extern long ftell(FILE *); +extern void rewind(FILE *); +extern void clearerr(FILE *); +extern int feof(FILE *); +extern int ferror(FILE *); +extern void perror(const char *); + +extern int __filbuf(FILE *); +extern int __flsbuf(int, FILE *); + + + +extern FILE *fdopen(int, const char *); +extern int fileno(FILE *); + + + + +extern void flockfile(FILE *); +extern int ftrylockfile(FILE *); +extern void funlockfile(FILE *); +extern int getc_unlocked(FILE *); +extern int putc_unlocked(int, FILE *); +extern int getchar_unlocked(void); +extern int putchar_unlocked(int); + + + +extern FILE *popen(const char *, const char *); +extern int pclose(FILE *); + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +extern int getopt(int, char *const *, const char *); + +extern char *optarg; +extern int opterr; +extern int optind; +extern int optopt; + + + + + + + + + + +extern int getsubopt(char **, char *const *, char **); +extern void getoptreset(void); + + + + + +extern char *ctermid(char *); +extern char *cuserid(char *); +extern char *tempnam(const char *, const char *); +extern int getw(FILE *); +extern int putw(int, FILE *); + + + + + +extern char *mktemp(char *); +extern int mkstemp(char *); +extern int setbuffer(FILE *, char *, int); +extern int setlinebuf(FILE *); +extern int system(const char *); + + + + +extern int fgetpos64(FILE *, fpos64_t *); +extern FILE *fopen64(const char *, const char *); +extern FILE *freopen64(const char *, const char *, FILE *); +extern int fseek64(FILE *, off64_t, int); +extern int fseeko64(FILE *, off64_t, int); + +extern int fseeko(FILE *, __int64_t, int); +extern int fsetpos64(FILE *, const fpos64_t *); +extern off64_t ftell64(FILE *); + +extern __int64_t ftello(FILE *); +extern off64_t ftello64(FILE *); +extern FILE *tmpfile64(void); + + + +extern int __semputc(int, FILE *); +extern int __semgetc(FILE *); +extern int __us_rsthread_stdio; + + + +extern char *ctermid_r(char *); + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +main() +{ + int c, i; + + i = 0; + while((c = (__us_rsthread_stdio ? __semgetc((&__iob[0])) : (--((&__iob[0]))->_cnt < 0 ? __filbuf((&__iob[0])) : (int)*((&__iob[0]))->_ptr++)) ) != (-1) ) { + if (++i > 100) { + printf("\n"); + i = 0; + } + if (c == '\n') i = 0; + (__us_rsthread_stdio ? __semputc((c), (&__iob[1])) : (--( (&__iob[1]))->_cnt < 0 ? __flsbuf(((c)), ( (&__iob[1]))) : (int)(*( (&__iob[1]))->_ptr++ = (unsigned char)((c))))) ; + } +} diff --git a/ckit/regression/valid-programs.obs/ps2ascii.c.c b/ckit/regression/valid-programs.obs/ps2ascii.c.c new file mode 100644 index 0000000..5ab0a4f --- /dev/null +++ b/ckit/regression/valid-programs.obs/ps2ascii.c.c @@ -0,0 +1,1935 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +typedef int __int32_t; +typedef unsigned __uint32_t; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +typedef long long __int64_t; +typedef unsigned long long __uint64_t; + + + + + + +typedef __int32_t __psint_t; +typedef __uint32_t __psunsigned_t; + + + + + + + + + + + +typedef __int32_t __scint_t; +typedef __uint32_t __scunsigned_t; + + + + + + + + + + + + + +typedef unsigned int size_t; + + + + + +typedef long fpos_t; + + + + + +typedef __int64_t off64_t; + + + + + +typedef __int64_t fpos64_t; + + + + + + + + +typedef char *va_list; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +typedef struct + + + + + + + + + +__file_s + +{ + + int _cnt; + + + unsigned char *_ptr; + unsigned char *_base; + + unsigned char _flag; + unsigned char _file; + +} FILE; + +extern FILE __iob[100 ]; +extern FILE *_lastbuf; +extern unsigned char *_bufendtab[]; +extern unsigned char _sibuf[], _sobuf[]; + +extern int remove(const char *); +extern int rename(const char *, const char *); +extern FILE *tmpfile(void); +extern char *tmpnam(char *); +extern int fclose(FILE *); +extern int fflush(FILE *); +extern FILE *fopen(const char *, const char *); +extern FILE *freopen(const char *, const char *, FILE *); +extern void setbuf(FILE *, char *); +extern int setvbuf(FILE *, char *, int, size_t); + +extern int fprintf(FILE *, const char *, ...); + +extern int fscanf(FILE *, const char *, ...); + +extern int printf(const char *, ...); + +extern int scanf(const char *, ...); + +extern int sprintf(char *, const char *, ...); + +extern int sscanf(const char *, const char *, ...); +extern int vfprintf(FILE *, const char *, char *); +extern int vprintf(const char *, char *); +extern int vsprintf(char *, const char *, char *); + + + +extern int fgetc(FILE *); +extern char *fgets(char *, int, FILE *); +extern int fputc(int, FILE *); +extern int fputs(const char *, FILE *); +extern int getc(FILE *); +extern int getchar(void); +extern char *gets(char *); +extern int putc(int, FILE *); +extern int putchar(int); +extern int puts(const char *); +extern int ungetc(int, FILE *); +extern size_t fread(void *, size_t, size_t, FILE *); +extern size_t fwrite(const void *, size_t, size_t, FILE *); +extern int fgetpos(FILE *, fpos_t *); +extern int fseek(FILE *, long, int); +extern int fsetpos(FILE *, const fpos_t *); +extern long ftell(FILE *); +extern void rewind(FILE *); +extern void clearerr(FILE *); +extern int feof(FILE *); +extern int ferror(FILE *); +extern void perror(const char *); + +extern int __filbuf(FILE *); +extern int __flsbuf(int, FILE *); + + + +extern FILE *fdopen(int, const char *); +extern int fileno(FILE *); + + + + +extern void flockfile(FILE *); +extern int ftrylockfile(FILE *); +extern void funlockfile(FILE *); +extern int getc_unlocked(FILE *); +extern int putc_unlocked(int, FILE *); +extern int getchar_unlocked(void); +extern int putchar_unlocked(int); + + + +extern FILE *popen(const char *, const char *); +extern int pclose(FILE *); + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +extern int getopt(int, char *const *, const char *); + +extern char *optarg; +extern int opterr; +extern int optind; +extern int optopt; + + + + + + + + + + +extern int getsubopt(char **, char *const *, char **); +extern void getoptreset(void); + + + + + +extern char *ctermid(char *); +extern char *cuserid(char *); +extern char *tempnam(const char *, const char *); +extern int getw(FILE *); +extern int putw(int, FILE *); + + + + + +extern char *mktemp(char *); +extern int mkstemp(char *); +extern int setbuffer(FILE *, char *, int); +extern int setlinebuf(FILE *); +extern int system(const char *); + + + + +extern int fgetpos64(FILE *, fpos64_t *); +extern FILE *fopen64(const char *, const char *); +extern FILE *freopen64(const char *, const char *, FILE *); +extern int fseek64(FILE *, off64_t, int); +extern int fseeko64(FILE *, off64_t, int); + +extern int fseeko(FILE *, __int64_t, int); +extern int fsetpos64(FILE *, const fpos64_t *); +extern off64_t ftell64(FILE *); + +extern __int64_t ftello(FILE *); +extern off64_t ftello64(FILE *); +extern FILE *tmpfile64(void); + + + +extern int __semputc(int, FILE *); +extern int __semgetc(FILE *); +extern int __us_rsthread_stdio; + + + +extern char *ctermid_r(char *); + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +typedef unsigned char uchar_t; +typedef unsigned short ushort_t; +typedef unsigned int uint_t; +typedef unsigned long ulong_t; + + +typedef char * addr_t; +typedef char * caddr_t; + +typedef long daddr_t; + +typedef long pgno_t; +typedef __uint32_t pfn_t; +typedef short cnt_t; +typedef unsigned long basictime_t; +typedef __int64_t micro_t; + + + + +typedef __int32_t pgcnt_t; + +typedef enum { B_FALSE, B_TRUE } boolean_t; + + + + + + + + + + + +typedef long id_t; + + + + + + + + + + +typedef ulong_t major_t; +typedef ulong_t minor_t; + + + + + + + + + + + + + + +typedef ushort_t o_mode_t; +typedef short o_dev_t; +typedef ushort_t o_uid_t; +typedef o_uid_t o_gid_t; +typedef short o_nlink_t; +typedef short o_pid_t; +typedef __uint32_t o_ino_t; + + +typedef unsigned long mode_t; +typedef unsigned long dev_t; +typedef long uid_t; +typedef long gid_t; +typedef unsigned long nlink_t; +typedef long pid_t; + + + +typedef dev_t vertex_hdl_t; + + +typedef unsigned long ino_t; + +typedef __uint64_t ino64_t; + + +typedef long off_t; + + + + +typedef __scint_t __scoff_t; + +typedef __scoff_t scoff_t; + + + + +typedef __int64_t blkcnt64_t; +typedef __uint64_t fsblkcnt64_t; +typedef __uint64_t fsfilcnt64_t; + + + +typedef long blkcnt_t; +typedef ulong_t fsblkcnt_t; +typedef ulong_t fsfilcnt_t; + + +typedef long swblk_t; +typedef unsigned long paddr_t; +typedef unsigned long iopaddr_t; +typedef int key_t; +typedef unsigned char use_t; +typedef long sysid_t; +typedef short index_t; + +typedef signed short nasid_t; +typedef signed short cnodeid_t; +typedef signed char partid_t; +typedef signed short moduleid_t; + +typedef unsigned int lock_t; +typedef signed short cpuid_t; +typedef unsigned char pri_t; +typedef __uint64_t accum_t; +typedef __int64_t prid_t; +typedef __int64_t ash_t; +typedef int cell_t; + + + + + + +typedef int ssize_t; + + + + + + + +typedef long time_t; + + + + + + + +typedef long clock_t; + + + + + + + +typedef long wchar_t; + + + + + + +typedef int clockid_t; + + + +typedef int timer_t; + + + + + + + + +typedef unsigned int useconds_t; + + + + + + +typedef __scunsigned_t bitnum_t; +typedef __scunsigned_t bitlen_t; + + +typedef int processorid_t; +typedef int toid_t; +typedef long *qaddr_t; +typedef __uint32_t inst_t; + + + + +typedef unsigned machreg_t; + + + + + + + +typedef __uint32_t fpreg_t; + + + + + + + + + + + + +typedef signed char int8_t; +typedef unsigned char uint8_t; +typedef signed short int16_t; +typedef unsigned short uint16_t; +typedef signed int int32_t; +typedef unsigned int uint32_t; +typedef __int64_t int64_t; +typedef __uint64_t uint64_t; +typedef __int64_t intmax_t; +typedef __uint64_t uintmax_t; +typedef signed long int intptr_t; +typedef unsigned long int uintptr_t; + + + + + + +typedef unsigned char u_int8_t; +typedef unsigned short u_int16_t; +typedef __uint32_t u_int32_t; + + + + + + + + + + + + + + + + + + + + +typedef long hostid_t; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +typedef struct { int r[1]; } * physadr; +typedef unsigned char unchar; +typedef unsigned char u_char; +typedef unsigned short ushort; +typedef unsigned short u_short; +typedef unsigned int uint; +typedef unsigned int u_int; +typedef unsigned long ulong; +typedef unsigned long u_long; +typedef struct _quad { long val[2]; } quad; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +typedef long fd_mask_t; +typedef unsigned long ufd_mask_t; + + + + + + +typedef struct fd_set { + fd_mask_t fds_bits[(((1024)+(( (int)(sizeof(fd_mask_t) * 8))-1))/( (int)(sizeof(fd_mask_t) * 8))) ]; +} fd_set; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +extern void *memcpy(void *, const void *, size_t); +extern void *memmove(void *, const void *, size_t); +extern char *strcpy(char *, const char *); +extern char *strncpy(char *, const char *, size_t); +extern char *strcat(char *, const char *); +extern char *strncat(char *, const char *, size_t); +extern void *memccpy(void *, const void *, int, size_t); +extern int memcmp(const void *, const void *, size_t); +extern int strcmp(const char *, const char *); +extern int strcoll(const char *, const char *); +extern int strncmp(const char *, const char *, size_t); +extern size_t strxfrm(char *, const char *, size_t); +extern void *memchr(const void *, int, size_t); +extern char *strchr(const char *, int); +extern size_t strcspn(const char *, const char *); +extern char *strpbrk(const char *, const char *); +extern char *strrchr(const char *, int); +extern size_t strspn(const char *, const char *); +extern char *strstr(const char *, const char *); +extern char *strtok(char *, const char *); +extern void *memset(void *, int, size_t); +extern char *strerror(int); +extern size_t strlen(const char *); + + +extern int ffs(int); + +extern int strcasecmp(const char *, const char *); +extern int strncasecmp(const char *, const char *, size_t); + + + +extern char *strdup(const char *); + + + +extern char *strtok_r(char *, const char *, char **); + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +typedef long fd_mask; + + + + + + + + + + + + + +typedef struct { + __uint32_t sigbits[2]; +} k_sigset_t; + + + + + + + + + + + + + + +extern int bcmp(const void *, const void *, size_t); +extern void bcopy(const void *, void *, size_t); +extern void bzero(void *, size_t); +extern char *index(const char *, int); +extern char *rindex(const char *, int); + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +extern int isalnum(int); +extern int isalpha(int); +extern int iscntrl(int); +extern int isdigit(int); +extern int isgraph(int); +extern int islower(int); +extern int isprint(int); +extern int ispunct(int); +extern int isspace(int); +extern int isupper(int); +extern int isxdigit(int); +extern int tolower(int); +extern int toupper(int); + + +extern int isascii(int); +extern int toascii(int); + +extern int _tolower(int); +extern int _toupper(int); + + +extern unsigned char __ctype[]; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +typedef struct { + int quot; + int rem; + } div_t; + +typedef struct { + long quot; + long rem; + } ldiv_t; + + +typedef struct { + long long quot; + long long rem; + } lldiv_t; + + + + + + + + + + +extern unsigned char __ctype[]; + + + + +extern double atof(const char *); +extern int atoi(const char *); +extern long int atol(const char *); +extern double strtod(const char *, char **); +extern long int strtol(const char *, char **, int); +extern unsigned long int strtoul(const char *, char **, int); +extern int rand(void); +extern void srand(unsigned int); +extern void *calloc(size_t, size_t); +extern void free(void *); +extern void *malloc(size_t); +extern void *realloc(void *, size_t); + +extern void abort(void); +extern int atexit(void (*)(void)); +extern void exit(int); + +extern char *getenv(const char *); +extern int system(const char *); +extern void *bsearch(const void *, const void *, size_t, size_t, + int (*)(const void *, const void *)); +extern void qsort(void *, size_t, size_t, + int (*)(const void *, const void *)); + + +extern int abs(int); + + + +extern div_t div(int, int); +extern long int labs(long); + +extern ldiv_t ldiv(long, long); +extern int mbtowc(wchar_t *, const char *, size_t); +extern int mblen(const char *, size_t); +extern int wctomb(char *, wchar_t); +extern size_t mbstowcs(wchar_t *, const char *, size_t); +extern size_t wcstombs(char *, const wchar_t *, size_t); + + + +extern int putenv(const char *); +extern double drand48(void); +extern double erand48(unsigned short [3]); +extern long lrand48(void); +extern long nrand48(unsigned short [3]); +extern long mrand48(void); +extern long jrand48(unsigned short [3]); +extern void srand48(long); +extern void lcong48(unsigned short int [7]); +extern void setkey(const char *); +extern unsigned short * seed48(unsigned short int [3]); + + + + +extern long a64l(const char *); +extern char *ecvt(double, int, int *, int *); +extern char *fcvt(double, int, int *, int *); +extern char *gcvt(double, int, char *); +extern int getsubopt(char **, char * const *, char **); +extern int grantpt(int); +extern char *initstate(unsigned int, char *, size_t); +extern char *l64a(long); +extern char *mktemp(char *); +extern int mkstemp(char *); +extern char *ptsname(int); +extern long random(void); +extern char *realpath(const char *, char *); +extern char *setstate(const char *); +extern void srandom(unsigned); +extern int ttyslot(void); +extern int unlockpt(int); +extern void *valloc(size_t); + + + + +extern int rand_r(unsigned int *); + + + + + +extern int atcheckpoint(void (*)(void)); +extern int atrestart(void (*)(void)); +extern int getpw(int, char *); +extern void l3tol(long *, const char *, int); +extern void ltol3(char *, const long *, int); +extern void *memalign(size_t, size_t); + + + + + + + +extern int dup2(int, int); +extern char *getcwd(char *, size_t); +extern char *getlogin(void); +extern char *getpass(const char *); +extern int isatty(int); +extern void swab(const void *, void *, ssize_t); +extern char *ttyname(int); +extern long long int atoll(const char *); +extern long long int strtoll(const char *, char **, int); +extern unsigned long long int strtoull(const char *, char **, int); +extern long long llabs(long long); + +extern lldiv_t lldiv(long long, long long); + + + +extern char *ecvt_r(double, int, int *, int *, char *); +extern char *fcvt_r(double, int, int *, int *, char *); + + + + + + + + + + + +void dviparse(FILE *); +void psparse(); + +void main(argc, argv) +int argc; +char *argv[]; +{ + int i, + known_flag, + dvi_file = 0 ; + FILE *file, *source; + + source = (&__iob[0]) ; + for(i=1; i_cnt < 0 ? __flsbuf(((' ')), ( (&__iob[1]))) : (int)(*( (&__iob[1]))->_ptr++ = (unsigned char)((' '))))) ; ; break; + case '-' : if((c = fgetc(source)) == ')') { + word_over_line=1 ; + } + else { + (__us_rsthread_stdio ? __semputc((ch), (&__iob[1])) : (--( (&__iob[1]))->_cnt < 0 ? __flsbuf(((ch)), ( (&__iob[1]))) : (int)(*( (&__iob[1]))->_ptr++ = (unsigned char)((ch))))) ; ; + } + ungetc(c, source); + break; + + (__us_rsthread_stdio ? __semputc((' '), (&__iob[1])) : (--( (&__iob[1]))->_cnt < 0 ? __flsbuf(((' ')), ( (&__iob[1]))) : (int)(*( (&__iob[1]))->_ptr++ = (unsigned char)((' '))))) ; ; break; + case '\\' : + switch(ch=fgetc(source)) + { + case '(' : + case ')' : (__us_rsthread_stdio ? __semputc((ch), (&__iob[1])) : (--( (&__iob[1]))->_cnt < 0 ? __flsbuf(((ch)), ( (&__iob[1]))) : (int)(*( (&__iob[1]))->_ptr++ = (unsigned char)((ch))))) ; ; break; + case 't' : (__us_rsthread_stdio ? __semputc(('\t'), (&__iob[1])) : (--( (&__iob[1]))->_cnt < 0 ? __flsbuf((('\t')), ( (&__iob[1]))) : (int)(*( (&__iob[1]))->_ptr++ = (unsigned char)(('\t'))))) ; ; break; + case '\n' : break; + case 'n' : break; + case '\\': (__us_rsthread_stdio ? __semputc(('"'), (&__iob[1])) : (--( (&__iob[1]))->_cnt < 0 ? __flsbuf((('"')), ( (&__iob[1]))) : (int)(*( (&__iob[1]))->_ptr++ = (unsigned char)(('"'))))) ; ; break; + case '0' : switch(ch=fgetc(source)) + { + case '1': switch(ch=fgetc(source)) + { + case '3' : fputs("ff",(&__iob[1]) ); break; + case '4' : fputs("fi",(&__iob[1]) ); break; + case '5' : fputs("fl",(&__iob[1]) ); break; + case '6' : fputs("ffi",(&__iob[1]) ); break; + case '7' : fputs("ffl",(&__iob[1]) ); break; + default: fputs("\\01",(&__iob[1]) ); (__us_rsthread_stdio ? __semputc((ch), (&__iob[1])) : (--( (&__iob[1]))->_cnt < 0 ? __flsbuf(((ch)), ( (&__iob[1]))) : (int)(*( (&__iob[1]))->_ptr++ = (unsigned char)((ch))))) ; ; + } break; + default: fputs("\\0",(&__iob[1]) ); (__us_rsthread_stdio ? __semputc((ch), (&__iob[1])) : (--( (&__iob[1]))->_cnt < 0 ? __flsbuf(((ch)), ( (&__iob[1]))) : (int)(*( (&__iob[1]))->_ptr++ = (unsigned char)((ch))))) ; ; + } break; + case '1' : case '2' : case '3' : case '4' : + case '5' : case '6' : case '7' : (__us_rsthread_stdio ? __semputc(('\\'), (&__iob[1])) : (--( (&__iob[1]))->_cnt < 0 ? __flsbuf((('\\')), ( (&__iob[1]))) : (int)(*( (&__iob[1]))->_ptr++ = (unsigned char)(('\\'))))) ; ; + default: (__us_rsthread_stdio ? __semputc((ch), (&__iob[1])) : (--( (&__iob[1]))->_cnt < 0 ? __flsbuf(((ch)), ( (&__iob[1]))) : (int)(*( (&__iob[1]))->_ptr++ = (unsigned char)((ch))))) ; ; + } break; + default: (__us_rsthread_stdio ? __semputc((ch), (&__iob[1])) : (--( (&__iob[1]))->_cnt < 0 ? __flsbuf(((ch)), ( (&__iob[1]))) : (int)(*( (&__iob[1]))->_ptr++ = (unsigned char)((ch))))) ; ; + } + else + switch(ch) + { + case '%' : fgets(junk, 80, source); break; + case '\n' : break; + case '-' : if (b_flag) + { + b_flag = 0; + b_space = 0; + + + } break; + case '(' : in_paren++; + if(!word_over_line) { + if(!((__ctype + 1)[next_to_prev_ch] & (01 | 02 )) ) { + switch(prev_ch) + { + case 'l' : case 'm' : case 'n' : case 'o' : + case 'q' : case 'r' : case 's' : case 't' : + break; + case 'y' : (__us_rsthread_stdio ? __semputc(('\n'), (&__iob[1])) : (--( (&__iob[1]))->_cnt < 0 ? __flsbuf((('\n')), ( (&__iob[1]))) : (int)(*( (&__iob[1]))->_ptr++ = (unsigned char)(('\n'))))) ; ; break; + case 'b' : if (b_space) (__us_rsthread_stdio ? __semputc((' '), (&__iob[1])) : (--( (&__iob[1]))->_cnt < 0 ? __flsbuf(((' ')), ( (&__iob[1]))) : (int)(*( (&__iob[1]))->_ptr++ = (unsigned char)((' '))))) ; ; break; + case 'a' : case 'c' : case 'd' : case 'e' : + case 'f' : case 'g' : case 'h' : case 'i' : + case 'j' : case 'k' : case 'x' : (__us_rsthread_stdio ? __semputc((' '), (&__iob[1])) : (--( (&__iob[1]))->_cnt < 0 ? __flsbuf(((' ')), ( (&__iob[1]))) : (int)(*( (&__iob[1]))->_ptr++ = (unsigned char)((' '))))) ; ; break; + default: break; + } + } + else { + (__us_rsthread_stdio ? __semputc((' '), (&__iob[1])) : (--( (&__iob[1]))->_cnt < 0 ? __flsbuf(((' ')), ( (&__iob[1]))) : (int)(*( (&__iob[1]))->_ptr++ = (unsigned char)((' '))))) ; ; + } + } + b_space = 1; + word_over_line = 0 ; + break; + default: b_flag = 0; break; + } + next_to_prev_ch=prev_ch; + prev_ch=ch; + + } +} + +void psparse(source) +FILE *source; +{ +char *str; +char junk[80]; +int ch, para=0, last=0; +while ((ch=fgetc(source)) != (-1) ) + { + switch (ch) + { + case '%' : if (para==0) fgets(junk, 80, source); + else (__us_rsthread_stdio ? __semputc((ch), (&__iob[1])) : (--( (&__iob[1]))->_cnt < 0 ? __flsbuf(((ch)), ( (&__iob[1]))) : (int)(*( (&__iob[1]))->_ptr++ = (unsigned char)((ch))))) ; + case '\n' : if (last==1) { puts(""); last=0; } break; + case '(' : if (para++>0) (__us_rsthread_stdio ? __semputc((ch), (&__iob[1])) : (--( (&__iob[1]))->_cnt < 0 ? __flsbuf(((ch)), ( (&__iob[1]))) : (int)(*( (&__iob[1]))->_ptr++ = (unsigned char)((ch))))) ; break; + case ')' : if (para-->1) (__us_rsthread_stdio ? __semputc((ch), (&__iob[1])) : (--( (&__iob[1]))->_cnt < 0 ? __flsbuf(((ch)), ( (&__iob[1]))) : (int)(*( (&__iob[1]))->_ptr++ = (unsigned char)((ch))))) ; + else (__us_rsthread_stdio ? __semputc((' '), (&__iob[1])) : (--( (&__iob[1]))->_cnt < 0 ? __flsbuf(((' ')), ( (&__iob[1]))) : (int)(*( (&__iob[1]))->_ptr++ = (unsigned char)((' '))))) ; + last=1; break; + + case '\\' : if (para>0) + switch(ch=fgetc(source)) + { + case '(' : + case ')' : (__us_rsthread_stdio ? __semputc((ch), (&__iob[1])) : (--( (&__iob[1]))->_cnt < 0 ? __flsbuf(((ch)), ( (&__iob[1]))) : (int)(*( (&__iob[1]))->_ptr++ = (unsigned char)((ch))))) ; break; + case 't' : (__us_rsthread_stdio ? __semputc(('\t'), (&__iob[1])) : (--( (&__iob[1]))->_cnt < 0 ? __flsbuf((('\t')), ( (&__iob[1]))) : (int)(*( (&__iob[1]))->_ptr++ = (unsigned char)(('\t'))))) ; break; + case 'n' : (__us_rsthread_stdio ? __semputc(('\n'), (&__iob[1])) : (--( (&__iob[1]))->_cnt < 0 ? __flsbuf((('\n')), ( (&__iob[1]))) : (int)(*( (&__iob[1]))->_ptr++ = (unsigned char)(('\n'))))) ; break; + case '\\': (__us_rsthread_stdio ? __semputc(('\\'), (&__iob[1])) : (--( (&__iob[1]))->_cnt < 0 ? __flsbuf((('\\')), ( (&__iob[1]))) : (int)(*( (&__iob[1]))->_ptr++ = (unsigned char)(('\\'))))) ; break; + case '0' : case '1' : case '2' : case '3' : + case '4' : case '5' : case '6' : case '7' : + (__us_rsthread_stdio ? __semputc(('\\'), (&__iob[1])) : (--( (&__iob[1]))->_cnt < 0 ? __flsbuf((('\\')), ( (&__iob[1]))) : (int)(*( (&__iob[1]))->_ptr++ = (unsigned char)(('\\'))))) ; + default: (__us_rsthread_stdio ? __semputc((ch), (&__iob[1])) : (--( (&__iob[1]))->_cnt < 0 ? __flsbuf(((ch)), ( (&__iob[1]))) : (int)(*( (&__iob[1]))->_ptr++ = (unsigned char)((ch))))) ; break; + } + break; + default: if (para>0) (__us_rsthread_stdio ? __semputc((ch), (&__iob[1])) : (--( (&__iob[1]))->_cnt < 0 ? __flsbuf(((ch)), ( (&__iob[1]))) : (int)(*( (&__iob[1]))->_ptr++ = (unsigned char)((ch))))) ; + } + } +} diff --git a/ckit/regression/valid-programs.obs/t22.c b/ckit/regression/valid-programs.obs/t22.c new file mode 100644 index 0000000..8e3c03c --- /dev/null +++ b/ckit/regression/valid-programs.obs/t22.c @@ -0,0 +1,12 @@ + +void myfunc (){ + return; +} + +void (*fp) () = &myfunc; + +main () +{ +} + + diff --git a/ckit/regression/valid-programs.obs/t7.c b/ckit/regression/valid-programs.obs/t7.c new file mode 100644 index 0000000..6691cec --- /dev/null +++ b/ckit/regression/valid-programs.obs/t7.c @@ -0,0 +1,44 @@ +int (**x[4]) (int,int,int); + +int (*(*y)[10]) (int,int,int); + +extern int (nar) (int x,int y,int z); + +int nar (int x,int y,int z) { + return 3; +} + +typedef int bar; + +static int g; + +static int narn (int x,int y,int z) { + return (x+y+z); +} + +void main () +{ int (**x1) (int,int,int); + int (*x2) (int,int,int); + + int (**y1) (int,int,int); + int (*y2) (int,int,int); + + int i,j; + + *x[1] = &nar; + x1 = x[1]; + x2 = *x1; + + + y1 = *y; + y2 = y1[0]; + + + i = (*x2) (4,5,6); + j = (*y2)(4,5,6); + return; +} + + + + diff --git a/ckit/regression/valid-programs/a1.c b/ckit/regression/valid-programs/a1.c new file mode 100644 index 0000000..58e4233 --- /dev/null +++ b/ckit/regression/valid-programs/a1.c @@ -0,0 +1,28 @@ +extern printf (); + +enum S { + x1, x2 +}; + +main () { + + struct S { + int abdd; + } j; + int i = x1; + + switch(i) { + case 1: + case 2: printf ("%d\n",45); + default: printf ("%d\n",45);;; + } + + if(i) { printf ("%d\n",i); } + + while(i--) { 2; } + + for(i; i; i++); + + do {3;} while (i); +} + diff --git a/ckit/regression/valid-programs/a10.c b/ckit/regression/valid-programs/a10.c new file mode 100644 index 0000000..12b4ffa --- /dev/null +++ b/ckit/regression/valid-programs/a10.c @@ -0,0 +1,13 @@ +enum X { + x1, x2, x3 + }; + +enum Y { + y1, y2, y3 + }; + +main () { + enum X i; + enum Y k; + k = i; +} diff --git a/ckit/regression/valid-programs/a100.c b/ckit/regression/valid-programs/a100.c new file mode 100644 index 0000000..0452607 --- /dev/null +++ b/ckit/regression/valid-programs/a100.c @@ -0,0 +1,8 @@ +struct sigevent; + +extern int timer_create(struct sigevent *); + +main () { + return 0; +} + diff --git a/ckit/regression/valid-programs/a101.c b/ckit/regression/valid-programs/a101.c new file mode 100644 index 0000000..de27d4b --- /dev/null +++ b/ckit/regression/valid-programs/a101.c @@ -0,0 +1,11 @@ +enum t22 { p45_B_FALSE = 0, + p46_B_TRUE = 1 + }; + +typedef enum t22 boolean_t_t23; + +main () { + boolean_t_t23 j; + return 0; +} + diff --git a/ckit/regression/valid-programs/a102.c b/ckit/regression/valid-programs/a102.c new file mode 100644 index 0000000..1f76a9e --- /dev/null +++ b/ckit/regression/valid-programs/a102.c @@ -0,0 +1,6 @@ +int a[4] = {-2,-1,0,1}; + +int main () { + return a[2]; +} + diff --git a/ckit/regression/valid-programs/a103.c b/ckit/regression/valid-programs/a103.c new file mode 100644 index 0000000..3bb2aed --- /dev/null +++ b/ckit/regression/valid-programs/a103.c @@ -0,0 +1,15 @@ +struct foo { int a[10]; + int p; + }; + +int b[100]; + +int f (int a[100]) { + return a[0]; +} + +main () { + struct foo j; + return j.p; +} + diff --git a/ckit/regression/valid-programs/a104.c b/ckit/regression/valid-programs/a104.c new file mode 100644 index 0000000..a8d4383 --- /dev/null +++ b/ckit/regression/valid-programs/a104.c @@ -0,0 +1,11 @@ +int a[4] /*= {1,2,3,4}*/; + +int f (int x, int y, int z){ + return x + y + z; +} + +int main () { + int s = 3; + return (f (a[2],(a[3] = 5,a[0]),a[1])); +} + diff --git a/ckit/regression/valid-programs/a105.c b/ckit/regression/valid-programs/a105.c new file mode 100644 index 0000000..3d88703 --- /dev/null +++ b/ckit/regression/valid-programs/a105.c @@ -0,0 +1,15 @@ +typedef int (*Rsdefkey_f) (int *,char *,int); + +typedef char mystring[10]; + +main () { + int j; + mystring s; + + s[0]='a'; + s[9]='j'; + s[10]='k'; + return j; + +} + diff --git a/ckit/regression/valid-programs/a106.c b/ckit/regression/valid-programs/a106.c new file mode 100644 index 0000000..366a913 --- /dev/null +++ b/ckit/regression/valid-programs/a106.c @@ -0,0 +1,9 @@ +struct foo { int a[10]; + int p : 4; + }; + +int main () { + struct foo j; + return j.p; +} + diff --git a/ckit/regression/valid-programs/a107.c b/ckit/regression/valid-programs/a107.c new file mode 100644 index 0000000..4021cd1 --- /dev/null +++ b/ckit/regression/valid-programs/a107.c @@ -0,0 +1,9 @@ +int t (int const *,const int); + +int main () { + int const volatile j[10]; + int const i = 4; + int const * volatile const *p; + return (i-i); +} + diff --git a/ckit/regression/valid-programs/a108.c b/ckit/regression/valid-programs/a108.c new file mode 100644 index 0000000..b3fdd9d --- /dev/null +++ b/ckit/regression/valid-programs/a108.c @@ -0,0 +1,13 @@ +void printf(); + +int main () { + int j; + int i; + + j = (i = 3) ? 4 : 5; + + printf ("i=%d, j=%d\n",i,j); + + return (j?0:j-j); +} + diff --git a/ckit/regression/valid-programs/a109.c b/ckit/regression/valid-programs/a109.c new file mode 100644 index 0000000..4ca00b9 --- /dev/null +++ b/ckit/regression/valid-programs/a109.c @@ -0,0 +1,5 @@ +struct foo *x; + +main (){ + +} diff --git a/ckit/regression/valid-programs/a11.c b/ckit/regression/valid-programs/a11.c new file mode 100644 index 0000000..3072adc --- /dev/null +++ b/ckit/regression/valid-programs/a11.c @@ -0,0 +1,7 @@ +main () { + int *i, *j; + i != 0; + i > j; + i >= j; + i < j+1; +} diff --git a/ckit/regression/valid-programs/a110.c b/ckit/regression/valid-programs/a110.c new file mode 100644 index 0000000..12efbf9 --- /dev/null +++ b/ckit/regression/valid-programs/a110.c @@ -0,0 +1,16 @@ +extern printf (); + +extern int i; + +int foo () { + + return i; +} + +int i = 10; + +main () { + printf ("foo = %d\n",foo ()); + printf ("i = %d\n",i); + return 0; +} diff --git a/ckit/regression/valid-programs/a111.c b/ckit/regression/valid-programs/a111.c new file mode 100644 index 0000000..f09192f --- /dev/null +++ b/ckit/regression/valid-programs/a111.c @@ -0,0 +1,16 @@ +extern printf (); + +extern int i; + +int foo () { + + return i; +} + +int i = 10; + +main () { + printf ("foo = %d\n",foo ()); + printf ("i = %d\n",i); + return i-i; +} diff --git a/ckit/regression/valid-programs/a112.c b/ckit/regression/valid-programs/a112.c new file mode 100644 index 0000000..fbaa198 --- /dev/null +++ b/ckit/regression/valid-programs/a112.c @@ -0,0 +1,9 @@ +extern int i; + +int i = 5; + +static int j; + +main () { + return i-i; +} diff --git a/ckit/regression/valid-programs/a113.c b/ckit/regression/valid-programs/a113.c new file mode 100644 index 0000000..9090979 --- /dev/null +++ b/ckit/regression/valid-programs/a113.c @@ -0,0 +1,6 @@ + +main () { + char *cp; + if (cp == 0) return 0; + else return 2; +} diff --git a/ckit/regression/valid-programs/a114.c b/ckit/regression/valid-programs/a114.c new file mode 100644 index 0000000..12ca826 --- /dev/null +++ b/ckit/regression/valid-programs/a114.c @@ -0,0 +1,7 @@ +int i = 5; + +static int j; + +main () { + return i-5; +} diff --git a/ckit/regression/valid-programs/a115.c b/ckit/regression/valid-programs/a115.c new file mode 100644 index 0000000..ca76bee --- /dev/null +++ b/ckit/regression/valid-programs/a115.c @@ -0,0 +1,7 @@ +extern int i; + +static int j; + +main () { + return j; +} diff --git a/ckit/regression/valid-programs/a116.c b/ckit/regression/valid-programs/a116.c new file mode 100644 index 0000000..2b5e67c --- /dev/null +++ b/ckit/regression/valid-programs/a116.c @@ -0,0 +1,11 @@ + +void foo(char * const * arg){ +} + + +main () { + +return 0; +} + + diff --git a/ckit/regression/valid-programs/a117.c b/ckit/regression/valid-programs/a117.c new file mode 100644 index 0000000..8566d60 --- /dev/null +++ b/ckit/regression/valid-programs/a117.c @@ -0,0 +1,18 @@ +extern int getopt (); + +main (int argc, int argv){ + int i; + + /* + while((i = getopt (argc, argv, "c:a:f:F:") != -1)){} + */ + + while ((i = getopt(argc, argv, "c:a:f:F:")) != -1) + { + continue; + } + + + return 0; + +} diff --git a/ckit/regression/valid-programs/a118.c b/ckit/regression/valid-programs/a118.c new file mode 100644 index 0000000..bdf2f95 --- /dev/null +++ b/ckit/regression/valid-programs/a118.c @@ -0,0 +1,12 @@ +extern struct foo *fp; + +struct bar { + struct foo *l; +}; + +main (){ + int i; + + return 0; + +} diff --git a/ckit/regression/valid-programs/a12.c b/ckit/regression/valid-programs/a12.c new file mode 100644 index 0000000..03b308a --- /dev/null +++ b/ckit/regression/valid-programs/a12.c @@ -0,0 +1,5 @@ +main () { + void *i; + int *j; + j == i; +} diff --git a/ckit/regression/valid-programs/a13.c b/ckit/regression/valid-programs/a13.c new file mode 100644 index 0000000..46fa88c --- /dev/null +++ b/ckit/regression/valid-programs/a13.c @@ -0,0 +1,22 @@ +enum X { + x1, x2, x3 + }; + +enum Y { + y1, y2, y3 + }; + +main () { + enum X i; + enum Y k; + int tmp; + int *tmp2; + tmp = k % i; + tmp = k ^ i; + tmp = k | i; + tmp = k & i; + tmp = k >> i; + tmp = k << i; + tmp2 = tmp2 + x1; + tmp2 = tmp2 + k; +} diff --git a/ckit/regression/valid-programs/a14.c b/ckit/regression/valid-programs/a14.c new file mode 100644 index 0000000..2c55a48 --- /dev/null +++ b/ckit/regression/valid-programs/a14.c @@ -0,0 +1,17 @@ +enum X { + x1, x2, x3 + }; + +enum Y { + y1, y2, y3 + }; + +main () { + enum X i; + enum Y k; + float d; + double dd; + int tmp; + int *tmp2; + tmp = (k && i) && tmp2 && (i >> k) || (k << i) || dd && d; +} diff --git a/ckit/regression/valid-programs/a15.c b/ckit/regression/valid-programs/a15.c new file mode 100644 index 0000000..71da180 --- /dev/null +++ b/ckit/regression/valid-programs/a15.c @@ -0,0 +1,6 @@ +main () { + int j=1; + int *i = &j; + + *(i++) = 3; +} diff --git a/ckit/regression/valid-programs/a16.c b/ckit/regression/valid-programs/a16.c new file mode 100644 index 0000000..dceec05 --- /dev/null +++ b/ckit/regression/valid-programs/a16.c @@ -0,0 +1,6 @@ +main () { + int i = 99999999, k; + char j; + + k = (i = j); +} diff --git a/ckit/regression/valid-programs/a17.c b/ckit/regression/valid-programs/a17.c new file mode 100644 index 0000000..3760f43 --- /dev/null +++ b/ckit/regression/valid-programs/a17.c @@ -0,0 +1,8 @@ +main() { + + int i; + + i += i; + i -= i; + i &= i; +} diff --git a/ckit/regression/valid-programs/a18.c b/ckit/regression/valid-programs/a18.c new file mode 100644 index 0000000..9f8e4ce --- /dev/null +++ b/ckit/regression/valid-programs/a18.c @@ -0,0 +1,8 @@ +f() { + int i = 1; +} + +main () { + int j; + j = sizeof(5); +} diff --git a/ckit/regression/valid-programs/a19.c b/ckit/regression/valid-programs/a19.c new file mode 100644 index 0000000..0d91f67 --- /dev/null +++ b/ckit/regression/valid-programs/a19.c @@ -0,0 +1,13 @@ +struct X { + int x1; + int x2; + int x3; +}; + +enum Y { + x1, x2, x3 +}; + +main() { + int x1 = x2; +} diff --git a/ckit/regression/valid-programs/a2.c b/ckit/regression/valid-programs/a2.c new file mode 100644 index 0000000..25eb682 --- /dev/null +++ b/ckit/regression/valid-programs/a2.c @@ -0,0 +1,4 @@ +main () { + + int i[4], *j = i; +} diff --git a/ckit/regression/valid-programs/a20.c b/ckit/regression/valid-programs/a20.c new file mode 100644 index 0000000..59395a3 --- /dev/null +++ b/ckit/regression/valid-programs/a20.c @@ -0,0 +1,18 @@ +struct X { + int x1; + int x2; + int x3; +}; + +enum Y { + x1, x2, x3 +}; + +main() { + struct X *y, *z; + float x2; + + y = 0; + y == 0; + y >= z+1; +} diff --git a/ckit/regression/valid-programs/a200.c b/ckit/regression/valid-programs/a200.c new file mode 100644 index 0000000..302c2f6 --- /dev/null +++ b/ckit/regression/valid-programs/a200.c @@ -0,0 +1,49 @@ +struct foo {int x; float y;}; +union bar {int x; float y;}; + +main (){ + + static char *s0 = "this string"; + static char s1[19] = "Not null terminated"; + static char s2[16] = "Null terminated"; + static char s3[16] = "N"; + + char *s4 = "this string"; + char s5[19] = "Not null terminated"; + char s6[16] = "Null terminated"; + + /* something weird happens here + char s7[] = "Also null terminated"; + */ + + /* should also work + static int a1[] = {0,1,2,3,4}; + */ + + static int a1[5] = {0,1,2,3,4}; + /* This is incorrect */ + static int a2[5]; + int a3[5] = {0,1,2,3,4}; + int a4[5] = {0,1,2}; + + static struct foo st1 = {1,2.0}; + static struct foo st2; + + struct foo st3 = {3,4.0}; + + static union bar u1; /* 1st component should be statically set to 0 */ + static union bar u2 = {3}; /* 1st component should be statically set to 3 */ + union bar u3; /* no initialization */ + union bar u4 = {4}; /* 1st component should be dynamically set to 4 */ + + static struct foo ast1[3] = {{1,1.0},{2,2.0},{3,3.0}}; + static struct foo ast2[3] = {1,1.0,2,2.0,3,3.0,}; + static struct foo ast3[3] = {1,1.0,2,2.0,{3,3.0,}}; + static struct foo ast4[3] = {1,1.0,2,2.0,{3,}}; + static struct foo ast5[3] = {1,1.0,2,{3,}}; + + char z[][3] = {"abc","def"}; + + + return 0; +} diff --git a/ckit/regression/valid-programs/a201.c b/ckit/regression/valid-programs/a201.c new file mode 100644 index 0000000..8320f35 --- /dev/null +++ b/ckit/regression/valid-programs/a201.c @@ -0,0 +1,9 @@ +struct foo {int x; float y;}; +union bar {int x; float y;}; + +main (){ + + char z[][3] = {{"abc"},{"def"}}; + + return 0; +} diff --git a/ckit/regression/valid-programs/a202.c b/ckit/regression/valid-programs/a202.c new file mode 100644 index 0000000..369f40c --- /dev/null +++ b/ckit/regression/valid-programs/a202.c @@ -0,0 +1,24 @@ +void printf(); + +struct foo {char x; int y;} a, b; + +main (){ + + int *p; + + p = (int *) &a; + + *p = -1; + + b = a; + /* + b.x = a.x; + b.y = a.y; +*/ + + p = (int *) &b; + + printf ("*p=%x\n",*p); + + return 0; +} diff --git a/ckit/regression/valid-programs/a203.c b/ckit/regression/valid-programs/a203.c new file mode 100644 index 0000000..2fa553d --- /dev/null +++ b/ckit/regression/valid-programs/a203.c @@ -0,0 +1,24 @@ +void printf(); + +struct foo {char x; int y;} a, b; + +main (){ + + char x; + int y; + + x = 255; + + y = (x = x + 1); + + printf ("y=%d\n",y); + + x = 255; + x = x + 1; + y = x; + + printf ("y=%d\n",y); + + + return 0; +} diff --git a/ckit/regression/valid-programs/a204.c b/ckit/regression/valid-programs/a204.c new file mode 100644 index 0000000..9e5ee5a --- /dev/null +++ b/ckit/regression/valid-programs/a204.c @@ -0,0 +1,21 @@ +void printf(); + +struct foo {int a[3]; int b;} w[] = {{1},2}; + +struct foo z[] = {{2,3,4},{5},{1,42,44,13},1,2,2,2,{3},4}; + +main (){ + + int i, j; + + for (i=0; i<(sizeof(z)/16); i++) + { + printf ("a=" ); + for (j=0;j<3;j++) + { + printf ("%10d ",z[i].a[j]); + } + printf ("b=%d\n",z[i].b); + } + +} diff --git a/ckit/regression/valid-programs/a205.c b/ckit/regression/valid-programs/a205.c new file mode 100644 index 0000000..cec75f5 --- /dev/null +++ b/ckit/regression/valid-programs/a205.c @@ -0,0 +1,8 @@ +main (){ + + int *p; + + p ++; + + p += 3; +} diff --git a/ckit/regression/valid-programs/a206.c b/ckit/regression/valid-programs/a206.c new file mode 100644 index 0000000..1f31ab5 --- /dev/null +++ b/ckit/regression/valid-programs/a206.c @@ -0,0 +1,8 @@ +main (){ + + int x = 1; + const int y = ++x; + volatile int j = ++x; + + return 0; +} diff --git a/ckit/regression/valid-programs/a207.c b/ckit/regression/valid-programs/a207.c new file mode 100644 index 0000000..28c2533 --- /dev/null +++ b/ckit/regression/valid-programs/a207.c @@ -0,0 +1,8 @@ +main (){ + + int x = 1; + const int y[3] = {2,1,3}; + volatile int j = ++x; + + return (x-j); +} diff --git a/ckit/regression/valid-programs/a209.c b/ckit/regression/valid-programs/a209.c new file mode 100644 index 0000000..9b195bc --- /dev/null +++ b/ckit/regression/valid-programs/a209.c @@ -0,0 +1,6 @@ +struct qqux {int x;}; + +main (){ + + unsigned int x; +} diff --git a/ckit/regression/valid-programs/a21.c b/ckit/regression/valid-programs/a21.c new file mode 100644 index 0000000..a3b6788 --- /dev/null +++ b/ckit/regression/valid-programs/a21.c @@ -0,0 +1,16 @@ +struct X { + int x1; + int x2; + int x3; +}; + +enum Y { + x1, x2, x3 +}; + +main() { + struct X y, z; + float x2; + + y = z; +} diff --git a/ckit/regression/valid-programs/a210.c b/ckit/regression/valid-programs/a210.c new file mode 100644 index 0000000..a8f112f --- /dev/null +++ b/ckit/regression/valid-programs/a210.c @@ -0,0 +1,6 @@ +main (){ + int * const x, y; + + y = 3; + +} diff --git a/ckit/regression/valid-programs/a211.c b/ckit/regression/valid-programs/a211.c new file mode 100644 index 0000000..11f529f --- /dev/null +++ b/ckit/regression/valid-programs/a211.c @@ -0,0 +1,10 @@ +/* "bug 1": enum constants */ +void printf(); + +main() { + + enum {e1,e2,e3} e; + printf ("e1=%d,e2=%d,e3=%d\n",e1,e2,e3); + return 1; + +} diff --git a/ckit/regression/valid-programs/a212.c b/ckit/regression/valid-programs/a212.c new file mode 100644 index 0000000..45554a2 --- /dev/null +++ b/ckit/regression/valid-programs/a212.c @@ -0,0 +1,10 @@ +/* "bug 1": enum constants */ +void printf(); + +main() { + + enum {e1=10,e2=e1+2,e3=e2+3} e; + printf ("e1=%d,e2=%d,e3=%d\n",e1,e2,e3); + return 1; + +} diff --git a/ckit/regression/valid-programs/a213.c b/ckit/regression/valid-programs/a213.c new file mode 100644 index 0000000..7935bee --- /dev/null +++ b/ckit/regression/valid-programs/a213.c @@ -0,0 +1,10 @@ +/* "bug 1": enum constants */ +void printf(); + +main() { + + enum {e1=10,e2=e1+2,e3=10} e; + printf ("e1=%d,e2=%d,e3=%d\n",e1,e2,e3); + return 1; + +} diff --git a/ckit/regression/valid-programs/a214.c b/ckit/regression/valid-programs/a214.c new file mode 100644 index 0000000..d70fb10 --- /dev/null +++ b/ckit/regression/valid-programs/a214.c @@ -0,0 +1,10 @@ +/* "bug 1": enum constants */ +void printf(); + +main() { + + enum {e1,e2=10,e3,e4=20} e; + printf ("e1=%d,e2=%d,e3=%d,e4=%d\n",e1,e2,e3,e4); + return 1; + +} diff --git a/ckit/regression/valid-programs/a215.c b/ckit/regression/valid-programs/a215.c new file mode 100644 index 0000000..20d0be4 --- /dev/null +++ b/ckit/regression/valid-programs/a215.c @@ -0,0 +1,10 @@ +/* "bug 1": enum constants */ +void printf(); + +main() { + + enum {e1,e2=10,e3=-e2,e4=20} e; + printf ("e1=%d,e2=%d,e3=%d,e4=%d\n",e1,e2,e3,e4); + return 1; + +} diff --git a/ckit/regression/valid-programs/a216.c b/ckit/regression/valid-programs/a216.c new file mode 100644 index 0000000..96bd791 --- /dev/null +++ b/ckit/regression/valid-programs/a216.c @@ -0,0 +1,16 @@ +void printf(); + +char d[] = "asdfasdf"; +const char dd[] = "asdfasdf"; + +main() { + int i; + char c[] = "asdfasdf"; + const char cc[] = "asdfasdf"; + + for(i=0; i<8; i++) { + printf("%c %c\n", c[i], cc[i]); + } + + return 1; +} diff --git a/ckit/regression/valid-programs/a217.c b/ckit/regression/valid-programs/a217.c new file mode 100644 index 0000000..5694f10 --- /dev/null +++ b/ckit/regression/valid-programs/a217.c @@ -0,0 +1,9 @@ +void printf(); + +main() { + int i; + char c[] = "5"; + const char cc[] = "5"; + + return 1; +} diff --git a/ckit/regression/valid-programs/a22.c b/ckit/regression/valid-programs/a22.c new file mode 100644 index 0000000..8aad01f --- /dev/null +++ b/ckit/regression/valid-programs/a22.c @@ -0,0 +1,20 @@ +struct X { + int x1; + int x2; + int x3; +}; + +enum Y { + x1, x2, x3 +}; + +main() { + struct X y, z; + void *p; + float x2; + char *i; + + p = &x2; + p = &y; + i = p; +} diff --git a/ckit/regression/valid-programs/a220.c b/ckit/regression/valid-programs/a220.c new file mode 100644 index 0000000..83d6da5 --- /dev/null +++ b/ckit/regression/valid-programs/a220.c @@ -0,0 +1,9 @@ +main () { + return 1; +} + +int foo () { + extern int bar; + return 1; +} + diff --git a/ckit/regression/valid-programs/a221.c b/ckit/regression/valid-programs/a221.c new file mode 100644 index 0000000..6bf9515 --- /dev/null +++ b/ckit/regression/valid-programs/a221.c @@ -0,0 +1,7 @@ +struct foo {int x; float y;}; + +main (){ + static struct foo ast1 = {1,1.0}; + struct foo y = ast1; + return 0; +} diff --git a/ckit/regression/valid-programs/a222.c b/ckit/regression/valid-programs/a222.c new file mode 100644 index 0000000..5e256e1 --- /dev/null +++ b/ckit/regression/valid-programs/a222.c @@ -0,0 +1,10 @@ +/* test for "bug 5": externs and union/struct initializers */ + +struct foo {int x; float y;}; + +main (){ + struct foo ast1 = {1,1.0}; + struct foo y = ast1; + extern int bar; + return 0; +} diff --git a/ckit/regression/valid-programs/a223.c b/ckit/regression/valid-programs/a223.c new file mode 100644 index 0000000..e99be61 --- /dev/null +++ b/ckit/regression/valid-programs/a223.c @@ -0,0 +1,14 @@ +struct foo {int x; float y;}; +struct foo ast1 = {1,1.0}; +int z[3]; + + +int main (); + +/* int (*f)() = main; */ + +int main (){ + struct foo y = ast1; + return 0; +} + diff --git a/ckit/regression/valid-programs/a224.c b/ckit/regression/valid-programs/a224.c new file mode 100644 index 0000000..c80a469 --- /dev/null +++ b/ckit/regression/valid-programs/a224.c @@ -0,0 +1,4 @@ +char c_null = '\0'; +char c_zero = '0'; + +int main () { return 0; } diff --git a/ckit/regression/valid-programs/a225.c b/ckit/regression/valid-programs/a225.c new file mode 100644 index 0000000..8727c2a --- /dev/null +++ b/ckit/regression/valid-programs/a225.c @@ -0,0 +1,13 @@ +struct foo {int x; float y;}; +struct foo ast1 = {1,1.0}; +int z[3]; + +int main(); + +int *i = z; +int *ii = {z}; + +int main (){ + return 0; +} + diff --git a/ckit/regression/valid-programs/a226.c b/ckit/regression/valid-programs/a226.c new file mode 100644 index 0000000..bde0a3a --- /dev/null +++ b/ckit/regression/valid-programs/a226.c @@ -0,0 +1,12 @@ +/* "bug 10": preincrements get turned into postincrements */ + +void printf(); + +int main() { + int i = 10; + int i1 = ++i; + int i2 = --i; + + printf("%d, %d, %d\n", i, i1, i2); + return 1; +} diff --git a/ckit/regression/valid-programs/a227.c b/ckit/regression/valid-programs/a227.c new file mode 100644 index 0000000..5a4470a --- /dev/null +++ b/ckit/regression/valid-programs/a227.c @@ -0,0 +1,10 @@ +/* "bug 8": promotion of arrays inside ?: */ + +void printf(); + +int main() { + int *ip; + int *jp; + int ia[3]; + jp = (1 ? ia : ip); + } diff --git a/ckit/regression/valid-programs/a228.c b/ckit/regression/valid-programs/a228.c new file mode 100644 index 0000000..0da9cc4 --- /dev/null +++ b/ckit/regression/valid-programs/a228.c @@ -0,0 +1,9 @@ +struct foo {int x; float y;}; +struct foo ast1 = {1,1.0}; +int z[3]; + +int main (){ + int (*f)() = main; + return 0; +} + diff --git a/ckit/regression/valid-programs/a229.c b/ckit/regression/valid-programs/a229.c new file mode 100644 index 0000000..2fa685e --- /dev/null +++ b/ckit/regression/valid-programs/a229.c @@ -0,0 +1,30 @@ +void printf(); + +struct X1 {char x; char z;} X1x[10]; +struct X9 {int :0; char x; char y;} X9x[10]; +struct X2 {char x; char y; char z;} X2x[10]; + +struct X3 {int :6; char x; char y;} X3x[10]; +struct X4 {char x; int :6; char y;} X4x[10]; + +struct X5 {int q:6; char x; char y;} X5x[10]; +struct X6 {char x; int q:6; char y;} X6x[10]; + +struct X10 {char x; int :0; char y;} X10x[10]; + +struct X7 {int x; char y; int z;} X7x[10]; +struct X8 {int x; char y; int z; char q;} X8x[10]; + +main () { + printf("%d, %d\n", sizeof(struct X1), sizeof(X1x)); + printf("%d, %d\n", sizeof(struct X9), sizeof(X9x)); + printf("%d, %d\n", sizeof(struct X2), sizeof(X2x)); + printf("%d, %d\n", sizeof(struct X3), sizeof(X3x)); + printf("%d, %d\n", sizeof(struct X4), sizeof(X4x)); + printf("%d, %d\n", sizeof(struct X5), sizeof(X5x)); + printf("%d, %d\n", sizeof(struct X6), sizeof(X6x)); + printf("%d, %d\n", sizeof(struct X10), sizeof(X10x)); + printf("%d, %d\n", sizeof(struct X7), sizeof(X7x)); + printf("%d, %d\n", sizeof(struct X8), sizeof(X8x)); +} + diff --git a/ckit/regression/valid-programs/a23.c b/ckit/regression/valid-programs/a23.c new file mode 100644 index 0000000..0a19b2b --- /dev/null +++ b/ckit/regression/valid-programs/a23.c @@ -0,0 +1,13 @@ +struct X; + +main () { + struct X { + int y; + }; + + return 0; +} + + + + diff --git a/ckit/regression/valid-programs/a230.c b/ckit/regression/valid-programs/a230.c new file mode 100644 index 0000000..16de724 --- /dev/null +++ b/ckit/regression/valid-programs/a230.c @@ -0,0 +1,41 @@ +/* +NUMBER: 12 +SUBMITTER: Kathleen Fisher +DATE: 3/15/00 + +We've run into a problem with ckit when we turn the flag +convert_function_args_to_pointers to false in the config.sml file. +The following program: + +******************************************************** +typedef int *windowTy[1]; + +int f (windowTy w) +{ + return 1; +} + +void main(){ + windowTy w; + f (w); +} +******************************************************** + +compiles just fine using cc, but it generates the following +error if we compile it with ckit: + +"array-param.hc":11.3-8: error: Bad function call: arg 1 has type windowTy +but fn parameter has type windowTy +**/ + +typedef int *windowTy[1]; + +int f (windowTy w) +{ + return 1; +} + +void main(){ + windowTy w; + f (w); +} diff --git a/ckit/regression/valid-programs/a231.c b/ckit/regression/valid-programs/a231.c new file mode 100644 index 0000000..3a93399 --- /dev/null +++ b/ckit/regression/valid-programs/a231.c @@ -0,0 +1,12 @@ +typedef int *windowTy[1]; + +int f (w) + windowTy w; +{ + return 1; +} + +void main(){ + windowTy w; + f (w); +} diff --git a/ckit/regression/valid-programs/a232.c b/ckit/regression/valid-programs/a232.c new file mode 100644 index 0000000..9ddfc12 --- /dev/null +++ b/ckit/regression/valid-programs/a232.c @@ -0,0 +1,33 @@ +/***** +NUMBER: 4 +SUBMITTER: Alexey Loginov +DATE: 2/10/00 +TEST: +STATUS: +DESCRIPTION: + old style C function parameter declarations have different + semantics from new style (with respec to promotions) and + should be preserved. + + void foo(int); + void foo(c) + char c; { } + + --> compiles, but + + void foo(int); + void foo(char c) { } + + --> which is output by ckit, does not +***/ + +void foo(int); +void foo(c) + char c; +{ + return; +} + +void main() { + return; +} diff --git a/ckit/regression/valid-programs/a233.c b/ckit/regression/valid-programs/a233.c new file mode 100644 index 0000000..5cc4948 --- /dev/null +++ b/ckit/regression/valid-programs/a233.c @@ -0,0 +1,12 @@ +/* Bug reported by Olivier; + used to give: + error: Type Error: operand of unary op ! must be a number. + Problem: +*/ + +main() { + void* p; + + !p; +} + diff --git a/ckit/regression/valid-programs/a234.c b/ckit/regression/valid-programs/a234.c new file mode 100644 index 0000000..96ab06e --- /dev/null +++ b/ckit/regression/valid-programs/a234.c @@ -0,0 +1,11 @@ +/* enum tree_code {a1, a2, a3}; */ + +struct tree_common +{ + enum tree_code code : 8; +}; + +struct tree_int_cst +{ + char common[sizeof (struct tree_common)]; +}; diff --git a/ckit/regression/valid-programs/a235.c b/ckit/regression/valid-programs/a235.c new file mode 100644 index 0000000..3b4ddac --- /dev/null +++ b/ckit/regression/valid-programs/a235.c @@ -0,0 +1,11 @@ +int fun(); + +int (*ptr)() = fun; + +int fun() { + return 0; +} + +void main () { + return; +} diff --git a/ckit/regression/valid-programs/a236.c b/ckit/regression/valid-programs/a236.c new file mode 100644 index 0000000..db501ba --- /dev/null +++ b/ckit/regression/valid-programs/a236.c @@ -0,0 +1,7 @@ +int a[1]; + +int *p = a; + +void main () { + return; +} diff --git a/ckit/regression/valid-programs/a237.c b/ckit/regression/valid-programs/a237.c new file mode 100644 index 0000000..a968f82 --- /dev/null +++ b/ckit/regression/valid-programs/a237.c @@ -0,0 +1,6 @@ +char * malloc(); + +main () { + char *reg_equiv_replace = (char *) malloc (sizeof *reg_equiv_replace); +} + diff --git a/ckit/regression/valid-programs/a238.c b/ckit/regression/valid-programs/a238.c new file mode 100644 index 0000000..dc37660 --- /dev/null +++ b/ckit/regression/valid-programs/a238.c @@ -0,0 +1,3 @@ +main () { + int x[] = {1,2,(int)&x}; +} diff --git a/ckit/regression/valid-programs/a239.c b/ckit/regression/valid-programs/a239.c new file mode 100644 index 0000000..84a9396 --- /dev/null +++ b/ckit/regression/valid-programs/a239.c @@ -0,0 +1,6 @@ +main () { + /* int y[2,3]; */ + int x; + x = x ? 1,2 : 3,4; + x = 1,2 + 4; +} diff --git a/ckit/regression/valid-programs/a24.c b/ckit/regression/valid-programs/a24.c new file mode 100644 index 0000000..e97d7be --- /dev/null +++ b/ckit/regression/valid-programs/a24.c @@ -0,0 +1,18 @@ +struct X { + int x1; + int x2; + int x3; +}; + +enum Y { + x1, x2, x3 +}; + +main() { + struct X { + int x2; + char x3; + } pp; + + pp.x2 = x1; +} diff --git a/ckit/regression/valid-programs/a25.c b/ckit/regression/valid-programs/a25.c new file mode 100644 index 0000000..8508f0c --- /dev/null +++ b/ckit/regression/valid-programs/a25.c @@ -0,0 +1,18 @@ +struct t { + struct s *x; +}; + +struct s { + struct t *x; +}; + +main () { + struct t; + struct s { + struct t *y; + }; + struct t { + struct s *y; + }; + return 0; +} diff --git a/ckit/regression/valid-programs/a26.c b/ckit/regression/valid-programs/a26.c new file mode 100644 index 0000000..2b4e733 --- /dev/null +++ b/ckit/regression/valid-programs/a26.c @@ -0,0 +1,18 @@ +enum e; + +struct t { + enum e *x; +}; + +enum e { + g1, g2 +}; + +struct s { + enum e *x; +}; + +main () { + + return 0; +} diff --git a/ckit/regression/valid-programs/a27.c b/ckit/regression/valid-programs/a27.c new file mode 100644 index 0000000..fe5ebe8 --- /dev/null +++ b/ckit/regression/valid-programs/a27.c @@ -0,0 +1,10 @@ +void *y; +void *x; + +void *y = &x; + +void *x = &y; + +main() { + 1; +} diff --git a/ckit/regression/valid-programs/a28.c b/ckit/regression/valid-programs/a28.c new file mode 100644 index 0000000..ab2ce2c --- /dev/null +++ b/ckit/regression/valid-programs/a28.c @@ -0,0 +1,4 @@ +main () { + const int y = 1; + y; +} diff --git a/ckit/regression/valid-programs/a29.c b/ckit/regression/valid-programs/a29.c new file mode 100644 index 0000000..3565fa8 --- /dev/null +++ b/ckit/regression/valid-programs/a29.c @@ -0,0 +1,16 @@ +enum A { + x1, x2 +}; + +enum B { + y1, y2 +}; + +int i; + +int i; + +main () { + return(0); +} + diff --git a/ckit/regression/valid-programs/a3.c b/ckit/regression/valid-programs/a3.c new file mode 100644 index 0000000..88a7905 --- /dev/null +++ b/ckit/regression/valid-programs/a3.c @@ -0,0 +1,6 @@ +main () { + int j; + j = sizeof(5); + { + } +} diff --git a/ckit/regression/valid-programs/a30.c b/ckit/regression/valid-programs/a30.c new file mode 100644 index 0000000..ce4a900 --- /dev/null +++ b/ckit/regression/valid-programs/a30.c @@ -0,0 +1,7 @@ +void *f(int x) { + return((void *) (((int) f) + x) ); +} + +main () { + return(0); +} diff --git a/ckit/regression/valid-programs/a31.c b/ckit/regression/valid-programs/a31.c new file mode 100644 index 0000000..ec56687 --- /dev/null +++ b/ckit/regression/valid-programs/a31.c @@ -0,0 +1,7 @@ +void *f(int x) { + return((void *) (((int) f) + x)); +} + +main () { + f(3); +} diff --git a/ckit/regression/valid-programs/a32.c b/ckit/regression/valid-programs/a32.c new file mode 100644 index 0000000..cd60054 --- /dev/null +++ b/ckit/regression/valid-programs/a32.c @@ -0,0 +1,17 @@ +int f(); + +int f(int c) { + return(c); +} + + + +main() +{ + char c; + f(c); + +} + + + diff --git a/ckit/regression/valid-programs/a33.c b/ckit/regression/valid-programs/a33.c new file mode 100644 index 0000000..cd05426 --- /dev/null +++ b/ckit/regression/valid-programs/a33.c @@ -0,0 +1,16 @@ +int f(int (*)[]); + +main () { + + int b [10][5]; + int (*a)[5]; + + a = b; + + f(a); +} + +int f(int (*a)[4]) { + return (a[0][3]); +} + diff --git a/ckit/regression/valid-programs/a34.c b/ckit/regression/valid-programs/a34.c new file mode 100644 index 0000000..ea69e97 --- /dev/null +++ b/ckit/regression/valid-programs/a34.c @@ -0,0 +1,14 @@ +static int g() { + return 0; +} + +static int f(int h()) { +/* x = 1; */ + return((************h)()); +} + +int main () { + return(f(g)); +} + + diff --git a/ckit/regression/valid-programs/a35.c b/ckit/regression/valid-programs/a35.c new file mode 100644 index 0000000..890b31c --- /dev/null +++ b/ckit/regression/valid-programs/a35.c @@ -0,0 +1,16 @@ +typedef unsigned short USHORT; +typedef unsigned char UCHAR; + +typedef struct { + USHORT size; + UCHAR type; + UCHAR class; + long retran :8; + long to_esid :24; + long fill :8; + long from_esid :24; +} MGIHDR; + +main () { + int i[sizeof(MGIHDR)]; +} diff --git a/ckit/regression/valid-programs/a36.c b/ckit/regression/valid-programs/a36.c new file mode 100644 index 0000000..9280a38 --- /dev/null +++ b/ckit/regression/valid-programs/a36.c @@ -0,0 +1,17 @@ +typedef unsigned long ULONG; + +typedef struct cpMSCID { + ULONG sid: 16, + swno: 8, + fill: 8; +} CP_MSCID_TYPE; + +struct A { + CP_MSCID_TYPE t; +}; + +int b[sizeof(struct A)]; + +main () { + int i; +} diff --git a/ckit/regression/valid-programs/a37.c b/ckit/regression/valid-programs/a37.c new file mode 100644 index 0000000..46dcd5b --- /dev/null +++ b/ckit/regression/valid-programs/a37.c @@ -0,0 +1,13 @@ +/* a37.c */ +/* Chandra bug, 5/25/99 */ + +void f(void); + +int main() +{ + f(); + return 0; +} + +void f() { +} diff --git a/ckit/regression/valid-programs/a38.c b/ckit/regression/valid-programs/a38.c new file mode 100644 index 0000000..014bf87 --- /dev/null +++ b/ckit/regression/valid-programs/a38.c @@ -0,0 +1,7 @@ +/* a38.c */ +/* Chandra bug, 6/3/99 */ + +int main(){ + for(;;){return 0;} + return 0; +} diff --git a/ckit/regression/valid-programs/a39.c b/ckit/regression/valid-programs/a39.c new file mode 100644 index 0000000..46e10a2 --- /dev/null +++ b/ckit/regression/valid-programs/a39.c @@ -0,0 +1,13 @@ +/* a39.c */ +/* Fisher bug, 5/20/99 */ + +struct { + int count[3]; +} *p; + +int main(){ + int i = 0; + p->count[i]++; /* generates an error. */ + p->count[i] = p->count[i]+1; /* OK */ + return 0; +} diff --git a/ckit/regression/valid-programs/a4.c b/ckit/regression/valid-programs/a4.c new file mode 100644 index 0000000..e13a8d7 --- /dev/null +++ b/ckit/regression/valid-programs/a4.c @@ -0,0 +1,9 @@ +int *f() { + int i = 1; + return(&i); +} + +main () { + void *j; + j = f(); +} diff --git a/ckit/regression/valid-programs/a40.c b/ckit/regression/valid-programs/a40.c new file mode 100644 index 0000000..8793629 --- /dev/null +++ b/ckit/regression/valid-programs/a40.c @@ -0,0 +1,9 @@ +/* Satish bug, 5/21/99 */ + +float f = 5.6, f1; + +int main() { + f1 = f * f; + + return 0; +} diff --git a/ckit/regression/valid-programs/a43.c b/ckit/regression/valid-programs/a43.c new file mode 100644 index 0000000..546647d --- /dev/null +++ b/ckit/regression/valid-programs/a43.c @@ -0,0 +1,6 @@ +int main () { + int *i, *j; + i = j+1; + + return 0; +} diff --git a/ckit/regression/valid-programs/a44.c b/ckit/regression/valid-programs/a44.c new file mode 100644 index 0000000..cdf1bd0 --- /dev/null +++ b/ckit/regression/valid-programs/a44.c @@ -0,0 +1,13 @@ +/* a37.c */ +/* Chandra bug, 5/25/99 */ + +void f(void) { +} + + +int main() +{ + f(); + + return 0; +} diff --git a/ckit/regression/valid-programs/a45.c b/ckit/regression/valid-programs/a45.c new file mode 100644 index 0000000..2530fda --- /dev/null +++ b/ckit/regression/valid-programs/a45.c @@ -0,0 +1,12 @@ +void printf(); + +main() { + + int y; + int x['\?']; + printf("\xAA\?\n\t\a\b\r\f %d eh???/n", '\xAA'); + printf("", '\001'); + + y = 1; + +} diff --git a/ckit/regression/valid-programs/a5.c b/ckit/regression/valid-programs/a5.c new file mode 100644 index 0000000..5e5b9e1 --- /dev/null +++ b/ckit/regression/valid-programs/a5.c @@ -0,0 +1,9 @@ +f() { + int i = 1; +} + +main () { + int i; + + i = f && i; +} diff --git a/ckit/regression/valid-programs/a50.c b/ckit/regression/valid-programs/a50.c new file mode 100644 index 0000000..8793629 --- /dev/null +++ b/ckit/regression/valid-programs/a50.c @@ -0,0 +1,9 @@ +/* Satish bug, 5/21/99 */ + +float f = 5.6, f1; + +int main() { + f1 = f * f; + + return 0; +} diff --git a/ckit/regression/valid-programs/a6.c b/ckit/regression/valid-programs/a6.c new file mode 100644 index 0000000..0edb9ec --- /dev/null +++ b/ckit/regression/valid-programs/a6.c @@ -0,0 +1,6 @@ +main () { + int *i; + int *j; + int k; + k = (i < j); +} diff --git a/ckit/regression/valid-programs/a60.c b/ckit/regression/valid-programs/a60.c new file mode 100644 index 0000000..f814e9f --- /dev/null +++ b/ckit/regression/valid-programs/a60.c @@ -0,0 +1,7 @@ +typedef struct {int x,y; } point; + +point x = {2,3}; + +void main () { + return; +} diff --git a/ckit/regression/valid-programs/a61.c b/ckit/regression/valid-programs/a61.c new file mode 100644 index 0000000..224f368 --- /dev/null +++ b/ckit/regression/valid-programs/a61.c @@ -0,0 +1,18 @@ +struct x { + int f; + int g; +} + +main() { + + struct x; + + struct x *k; + + struct x { + int w; + }; + + k->w = 5; + +} diff --git a/ckit/regression/valid-programs/a62.c b/ckit/regression/valid-programs/a62.c new file mode 100644 index 0000000..7101408 --- /dev/null +++ b/ckit/regression/valid-programs/a62.c @@ -0,0 +1,7 @@ + typedef struct { int x,y,z; } w; + + void main() { + w foo; + foo = foo; + } + diff --git a/ckit/regression/valid-programs/a63.c b/ckit/regression/valid-programs/a63.c new file mode 100644 index 0000000..2f529ad --- /dev/null +++ b/ckit/regression/valid-programs/a63.c @@ -0,0 +1,12 @@ +void f(int x[4]); + +main () { + int y[4]; + f(y); +} + +void f(int x[4]) { + int *y; + x = y; + x[3] = 1; +} diff --git a/ckit/regression/valid-programs/a64.c b/ckit/regression/valid-programs/a64.c new file mode 100644 index 0000000..d2a8001 --- /dev/null +++ b/ckit/regression/valid-programs/a64.c @@ -0,0 +1,8 @@ +/* null function pointers */ + +void f(int(* goo)(int)){} + +void main(){ + f(0); +} + diff --git a/ckit/regression/valid-programs/a65.c b/ckit/regression/valid-programs/a65.c new file mode 100644 index 0000000..ac1ea67 --- /dev/null +++ b/ckit/regression/valid-programs/a65.c @@ -0,0 +1,10 @@ +/* null function pointers */ + +void f(int *x) { + return; +} + +void main(){ + f(0); +} + diff --git a/ckit/regression/valid-programs/a66.c b/ckit/regression/valid-programs/a66.c new file mode 100644 index 0000000..ed33c0a --- /dev/null +++ b/ckit/regression/valid-programs/a66.c @@ -0,0 +1,8 @@ +void main() { + + char a, b, c, d, e; + + a?b:c?d:e; /* Line 5. */ + (a?b:c)?d:e; /* Line 6. */ + a?b:(c?d:e); /* Line 7. */ +} diff --git a/ckit/regression/valid-programs/a67.c b/ckit/regression/valid-programs/a67.c new file mode 100644 index 0000000..951d140 --- /dev/null +++ b/ckit/regression/valid-programs/a67.c @@ -0,0 +1,4 @@ +int a[20]; +int a[]; + +int main () { return(0);} diff --git a/ckit/regression/valid-programs/a7.c b/ckit/regression/valid-programs/a7.c new file mode 100644 index 0000000..86b2d81 --- /dev/null +++ b/ckit/regression/valid-programs/a7.c @@ -0,0 +1,9 @@ +enum e { + x1, x2, x3} + +main () { + enum e k; + int i, j; + + j = i >> x1; +} diff --git a/ckit/regression/valid-programs/a8.c b/ckit/regression/valid-programs/a8.c new file mode 100644 index 0000000..ad25607 --- /dev/null +++ b/ckit/regression/valid-programs/a8.c @@ -0,0 +1,7 @@ +enum e { + x1, x2, x3} + +main () { + enum e k; + k = 45; +} diff --git a/ckit/regression/valid-programs/a9.c b/ckit/regression/valid-programs/a9.c new file mode 100644 index 0000000..c1860c9 --- /dev/null +++ b/ckit/regression/valid-programs/a9.c @@ -0,0 +1,9 @@ +enum X { + x1, x2, x3 + } + +main () { + enum X i; + int k; + k = -i; +} diff --git a/ckit/regression/valid-programs/b2.c b/ckit/regression/valid-programs/b2.c new file mode 100644 index 0000000..3b36d02 --- /dev/null +++ b/ckit/regression/valid-programs/b2.c @@ -0,0 +1,33 @@ +struct foo {int m;}; + +int i; + +struct foo j; + +int *f (){ + + return &i; +} + +struct foo *g (){ + + return &j; +} + +main () { + struct foo *p,pp; + + *f() += 1; + + p->m += 1; + + (p++)->m += 1; + + (++p)->m += 1; + + (*p).m +=1; + + pp.m +=1; + + (*g()).m +=1; +} diff --git a/ckit/regression/valid-programs/b3.c b/ckit/regression/valid-programs/b3.c new file mode 100644 index 0000000..3ed3e88 --- /dev/null +++ b/ckit/regression/valid-programs/b3.c @@ -0,0 +1,11 @@ +struct bar {struct {int n;} m;} a, b, c; + +main () { + + typedef struct foo {struct {int n;} m;} baz; + + struct foo *p,pp; + + + +} diff --git a/ckit/regression/valid-programs/fact1.c b/ckit/regression/valid-programs/fact1.c new file mode 100644 index 0000000..e85a8f1 --- /dev/null +++ b/ckit/regression/valid-programs/fact1.c @@ -0,0 +1,34 @@ +void printf(); + +main (){ + int i,a; + + for (i=1,a=1; i<=6;i++) + { + a = a * i; + } + + printf ("fact 6 = %d\n",a); + + i = 1; + a = 1; + + while (i<=7) + { + a = a * i++; + } + + printf ("fact 7 = %d\n",a); + + + i = 1; + a = 1; + + do + { + a = a * i++; + } while (i<=8); + + printf ("fact 8 = %d\n",a); +} + diff --git a/ckit/regression/valid-programs/kf.c b/ckit/regression/valid-programs/kf.c new file mode 100644 index 0000000..b4e9bda --- /dev/null +++ b/ckit/regression/valid-programs/kf.c @@ -0,0 +1,24 @@ + +# 1 "../../tests/b.c" +# 1 "../../tests/myinc.h" + +typedef int date_t; +typedef int Htime_t; + + + + + + + +typedef struct { + short int npa; +} areacode_t; + + +# 2 "../../tests/b.c" +main(){ + int c; + c = sizeof(int); +} + diff --git a/ckit/regression/valid-programs/primes.c.c b/ckit/regression/valid-programs/primes.c.c new file mode 100644 index 0000000..1389d2b --- /dev/null +++ b/ckit/regression/valid-programs/primes.c.c @@ -0,0 +1,792 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +typedef int __int32_t; +typedef unsigned __uint32_t; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +typedef long long __int64_t; +typedef unsigned long long __uint64_t; + + + + + + +typedef __int32_t __psint_t; +typedef __uint32_t __psunsigned_t; + + + + + + + + + + + +typedef __int32_t __scint_t; +typedef __uint32_t __scunsigned_t; + + + + + + + + + + + + + +typedef unsigned int size_t; + + + + + +typedef long fpos_t; + + + + + +typedef __int64_t off64_t; + + + + + +typedef __int64_t fpos64_t; + + + + + + + + +typedef char *va_list; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +typedef struct + + + + + + + + + +__file_s + +{ + + int _cnt; + + + unsigned char *_ptr; + unsigned char *_base; + + unsigned char _flag; + unsigned char _file; + +} FILE; + +extern FILE __iob[100 ]; +extern FILE *_lastbuf; +extern unsigned char *_bufendtab[]; +extern unsigned char _sibuf[], _sobuf[]; + +extern int remove(const char *); +extern int rename(const char *, const char *); +extern FILE *tmpfile(void); +extern char *tmpnam(char *); +extern int fclose(FILE *); +extern int fflush(FILE *); +extern FILE *fopen(const char *, const char *); +extern FILE *freopen(const char *, const char *, FILE *); +extern void setbuf(FILE *, char *); +extern int setvbuf(FILE *, char *, int, size_t); + +extern int fprintf(FILE *, const char *, ...); + +extern int fscanf(FILE *, const char *, ...); + +extern int printf(const char *, ...); + +extern int scanf(const char *, ...); + +extern int sprintf(char *, const char *, ...); + +extern int sscanf(const char *, const char *, ...); +extern int vfprintf(FILE *, const char *, char *); +extern int vprintf(const char *, char *); +extern int vsprintf(char *, const char *, char *); + + + +extern int fgetc(FILE *); +extern char *fgets(char *, int, FILE *); +extern int fputc(int, FILE *); +extern int fputs(const char *, FILE *); +extern int getc(FILE *); +extern int getchar(void); +extern char *gets(char *); +extern int putc(int, FILE *); +extern int putchar(int); +extern int puts(const char *); +extern int ungetc(int, FILE *); +extern size_t fread(void *, size_t, size_t, FILE *); +extern size_t fwrite(const void *, size_t, size_t, FILE *); +extern int fgetpos(FILE *, fpos_t *); +extern int fseek(FILE *, long, int); +extern int fsetpos(FILE *, const fpos_t *); +extern long ftell(FILE *); +extern void rewind(FILE *); +extern void clearerr(FILE *); +extern int feof(FILE *); +extern int ferror(FILE *); +extern void perror(const char *); + +extern int __filbuf(FILE *); +extern int __flsbuf(int, FILE *); + + + +extern FILE *fdopen(int, const char *); +extern int fileno(FILE *); + + + + +extern void flockfile(FILE *); +extern int ftrylockfile(FILE *); +extern void funlockfile(FILE *); +extern int getc_unlocked(FILE *); +extern int putc_unlocked(int, FILE *); +extern int getchar_unlocked(void); +extern int putchar_unlocked(int); + + + +extern FILE *popen(const char *, const char *); +extern int pclose(FILE *); + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +extern int getopt(int, char *const *, const char *); + +extern char *optarg; +extern int opterr; +extern int optind; +extern int optopt; + + + + + + + + + + +extern int getsubopt(char **, char *const *, char **); +extern void getoptreset(void); + + + + + +extern char *ctermid(char *); +extern char *cuserid(char *); +extern char *tempnam(const char *, const char *); +extern int getw(FILE *); +extern int putw(int, FILE *); + + + + + +extern char *mktemp(char *); +extern int mkstemp(char *); +extern int setbuffer(FILE *, char *, int); +extern int setlinebuf(FILE *); +extern int system(const char *); + + + + +extern int fgetpos64(FILE *, fpos64_t *); +extern FILE *fopen64(const char *, const char *); +extern FILE *freopen64(const char *, const char *, FILE *); +extern int fseek64(FILE *, off64_t, int); +extern int fseeko64(FILE *, off64_t, int); + +extern int fseeko(FILE *, __int64_t, int); +extern int fsetpos64(FILE *, const fpos64_t *); +extern off64_t ftell64(FILE *); + +extern __int64_t ftello(FILE *); +extern off64_t ftello64(FILE *); +extern FILE *tmpfile64(void); + + + +extern int __semputc(int, FILE *); +extern int __semgetc(FILE *); +extern int __us_rsthread_stdio; + + + +extern char *ctermid_r(char *); + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +int is_prime(int); + +main() +{ + int i; + for(i = 2; i <100000; i++) + { + if (is_prime(i)) printf("%d\n", i); + } +} + +is_prime (i) +int i; +{ + int j; + for(j = 2; j * j <= i; j++) + { + if( (i % j) == 0 ) return(0); + } + return(1); +} diff --git a/ckit/regression/valid-programs/t1.c b/ckit/regression/valid-programs/t1.c new file mode 100644 index 0000000..78a7c1c --- /dev/null +++ b/ckit/regression/valid-programs/t1.c @@ -0,0 +1,16 @@ +extern int printf(); + +main () +{ + printf ("this is the end, my only friend the end"); + goto label_1; + +label_1: + printf ("this is the end, my only friend the end"); + goto label_2; + goto label_1; + +label_2: + return 0; +} + diff --git a/ckit/regression/valid-programs/t10.c b/ckit/regression/valid-programs/t10.c new file mode 100644 index 0000000..bf4bd47 --- /dev/null +++ b/ckit/regression/valid-programs/t10.c @@ -0,0 +1,15 @@ +extern printf (); + +main () +{ + int i = 10; + + while (i--) { + struct {int x; int y;} point; + printf ("i=%d\n",i); + } + + return 0; +} + + diff --git a/ckit/regression/valid-programs/t11.c b/ckit/regression/valid-programs/t11.c new file mode 100644 index 0000000..f5f07d5 --- /dev/null +++ b/ckit/regression/valid-programs/t11.c @@ -0,0 +1,18 @@ + +static char mimbar (char c) { + return (c); +} + +void main () +{ + static char c; + static char (*f) (char); + + f = mimbar; + + c = (*f) (c); +} + + + + diff --git a/ckit/regression/valid-programs/t12.c b/ckit/regression/valid-programs/t12.c new file mode 100644 index 0000000..56813f1 --- /dev/null +++ b/ckit/regression/valid-programs/t12.c @@ -0,0 +1,22 @@ +extern printf (); + +int main () +{ + int i; + + for (i=0; i<10; i++) { + switch (i) { + case 0: continue ; + default: break; + } + + if (i == 4) break; + printf ("i = %d\n",i); + } + + return 0; +} + + + + diff --git a/ckit/regression/valid-programs/t13.c b/ckit/regression/valid-programs/t13.c new file mode 100644 index 0000000..1298b0d --- /dev/null +++ b/ckit/regression/valid-programs/t13.c @@ -0,0 +1,23 @@ +extern printf (); + +int main () +{ + int i = 0; + + f1: + { + if (i++ < 4) goto f2; else goto f3; + { + f2: printf ("i = %d\n",i); + goto f1; + } + + f3: + printf ("i'= %d\n",i); + } + +} + + + + diff --git a/ckit/regression/valid-programs/t14.c b/ckit/regression/valid-programs/t14.c new file mode 100644 index 0000000..fd204f0 --- /dev/null +++ b/ckit/regression/valid-programs/t14.c @@ -0,0 +1,15 @@ +void printf(); + +struct {int x; int y;} z = {1,2}; + +int main () +{ + int i = 0; + + i = z.x; + printf ("i = %d\n",i); +} + + + + diff --git a/ckit/regression/valid-programs/t3.c b/ckit/regression/valid-programs/t3.c new file mode 100644 index 0000000..523ad27 --- /dev/null +++ b/ckit/regression/valid-programs/t3.c @@ -0,0 +1,14 @@ + +main () +{ + int i; + do { + i = i + 1; + i = i + 1; + } while (i<10); + + do { + i = i - 1; + } while (i>0); +} + diff --git a/ckit/regression/valid-programs/t4.c b/ckit/regression/valid-programs/t4.c new file mode 100644 index 0000000..e83dba4 --- /dev/null +++ b/ckit/regression/valid-programs/t4.c @@ -0,0 +1,21 @@ + +main () +{ + int i; + int x1; + int *x2; + int *x3[3]; + int **x4; + int (*x5)(); + + + + x1 = (int) i; + x2 = (int *) i; + /*x3 = (int *[3]) x3;*/ + x4 = (int **) x4; + x5 = x5; + + /* x = ((*[ ])(void)) i; */ +} + diff --git a/ckit/regression/valid-programs/t5.c b/ckit/regression/valid-programs/t5.c new file mode 100644 index 0000000..ccf3784 --- /dev/null +++ b/ckit/regression/valid-programs/t5.c @@ -0,0 +1,28 @@ +int x; + +long y; + +extern unsigned long foo (int x); +extern int main (); + +unsigned long foo (int x) { + unsigned long i; + + i = (unsigned long) x; + + return (x); +} + +main () +{ + int i; + + switch (i) { + case 2: foo (i); break; + case 3: foo (i); break; + case 5: foo (i); break; + case 7: foo (i); break; + default: foo (i*2); + } +} + diff --git a/ckit/regression/valid-programs/t6.c b/ckit/regression/valid-programs/t6.c new file mode 100644 index 0000000..f428b7a --- /dev/null +++ b/ckit/regression/valid-programs/t6.c @@ -0,0 +1,26 @@ +typedef struct {int x; int y;} point; + +int (*x)[10]; + +int *(y[42]); + +int (*(z[55]))[66][77]; + +int (*a) (int (*)(int),int,int); + + +void myfunc (int x, int y){ + return; +} + +void (*fp) (int,int) = myfunc; + +void main () +{ + point p; + int *l; + + return; +} + + diff --git a/ckit/regression/valid-programs/t8.c b/ckit/regression/valid-programs/t8.c new file mode 100644 index 0000000..ea49db4 --- /dev/null +++ b/ckit/regression/valid-programs/t8.c @@ -0,0 +1,29 @@ +extern int narn (int,int,int); + +int *i; + +static char *c; + +static char mimbar (char c,long l,double d) { + static int k; + + k = narn (k,k+2,k+3); + return c; +} + +int narn (int x,int y,int z) { + return (x+y+z); +} + +void main () +{ + register int j; + + static int k; + + return; +} + + + + diff --git a/ckit/regression/valid-programs/t9.c b/ckit/regression/valid-programs/t9.c new file mode 100644 index 0000000..bb9f631 --- /dev/null +++ b/ckit/regression/valid-programs/t9.c @@ -0,0 +1,24 @@ +extern int narn (int,int,int); + +int narn (int x, int y, int z) +{ + return 1; +} + +void main () +{ + register int j; + + static int k; + + {int k; + k = narn (k,j,j); + j = k; + } + + return; +} + + + + diff --git a/ckit/regression/valid-programs/test.c b/ckit/regression/valid-programs/test.c new file mode 100644 index 0000000..a2adddd --- /dev/null +++ b/ckit/regression/valid-programs/test.c @@ -0,0 +1,14 @@ +void printf(); + +main() +{ + + signed long i; + + for(i=-10; i<11; i++) { + printf("%lx ", ((signed long)i) * 0x3); + printf("%lx\n", ((unsigned long)i) * 0x3); + } + +} + diff --git a/ckit/regression/valid-programs/test1.c b/ckit/regression/valid-programs/test1.c new file mode 100644 index 0000000..3380f23 --- /dev/null +++ b/ckit/regression/valid-programs/test1.c @@ -0,0 +1,25 @@ +void p(); +void q(); +void printf(); + +enum nevin {X=3, Y=10}; + +int i; + +main () { + p(); + printf("%d\n", i); + q(); + printf("%d\n", i); +} + +void p() { + enum dino { L=1, S=2}; + + i = L; +} + +void q() { + enum dino { L=3, S=4}; + i = L; +} diff --git a/ckit/regression/valid-programs/test10.c b/ckit/regression/valid-programs/test10.c new file mode 100644 index 0000000..4c90dff --- /dev/null +++ b/ckit/regression/valid-programs/test10.c @@ -0,0 +1,23 @@ +void printf(); + +int i = 2; +signed int i; + +struct {int a; int b;} x; +struct {int a; int b;} y; +struct S {int a; int b; } u; +struct S v; + +main() { + { + int i=35; + struct {int a; int b;} x; + struct {int a; int b;} y; + struct S {int a; int b; } u; + struct S v; + v.a = 1; + v.b = 34; + u = v; + printf("%d\n", i); + } +} diff --git a/ckit/regression/valid-programs/test2.c b/ckit/regression/valid-programs/test2.c new file mode 100644 index 0000000..7776a16 --- /dev/null +++ b/ckit/regression/valid-programs/test2.c @@ -0,0 +1,37 @@ +void printf(); + +struct num { + short mantissa; + short exponent; +}; + + +typedef int mynum; + +typedef struct numm { + mynum mantissaa; + short exponentt; +} xxx; + +struct nummm { + mynum mantissaaa; + short exponenttt; +} v; + +typedef struct nummm nummmmm; + +typedef union q1 { + short qa; + short qb; +} q2; + +typedef union q3 { + short qaa; + short qbb; +} q4; + +typedef union q6 q7; + +mynum main () { + printf(""); + } diff --git a/ckit/regression/valid-programs/test4.c b/ckit/regression/valid-programs/test4.c new file mode 100644 index 0000000..c463397 --- /dev/null +++ b/ckit/regression/valid-programs/test4.c @@ -0,0 +1,13 @@ +void printf(); + +main() +{ + long i = 0; + unsigned long j = 0; + + i = 1; + j = 2; + + printf("%d\n", i + j); + +} diff --git a/ckit/regression/valid-programs/test5.c b/ckit/regression/valid-programs/test5.c new file mode 100644 index 0000000..c2e25e3 --- /dev/null +++ b/ckit/regression/valid-programs/test5.c @@ -0,0 +1,26 @@ +void printf(); + +typedef int mynum; + +struct num { + short mantissa; + short exponent; +}; + +typedef union { + short mantissaa; + short exponentt; +} xxx; + +typedef union { + short mantissaa; + short exponentt; +} xx; + + +enum dino { L, S}; + + +mynum main () { + printf(""); +} diff --git a/ckit/regression/valid-programs/typedef.c b/ckit/regression/valid-programs/typedef.c new file mode 100644 index 0000000..c772337 --- /dev/null +++ b/ckit/regression/valid-programs/typedef.c @@ -0,0 +1,12 @@ +typedef int foo; + +int f (int y) +{ + int foo = 1; + return (foo+y); +} + +foo main () +{ + return f(1); +} diff --git a/ckit/regression/valid-programs/typedef2.c b/ckit/regression/valid-programs/typedef2.c new file mode 100644 index 0000000..f6294ea --- /dev/null +++ b/ckit/regression/valid-programs/typedef2.c @@ -0,0 +1,13 @@ +typedef int foo; + +int f (int y) +{ + int foo = 1; + return (foo+y); +} + + +int main () +{ + return f(1); +} diff --git a/ckit/src/ast-utils/copy/copy-ast-sig.sml b/ckit/src/ast-utils/copy/copy-ast-sig.sml new file mode 100644 index 0000000..313c39a --- /dev/null +++ b/ckit/src/ast-utils/copy/copy-ast-sig.sml @@ -0,0 +1,36 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +local + type aidctx = Tables.aidtab + type 'a copier = aidctx -> 'a -> 'a + + type 'a extCopier = (Ast.expression copier * Ast.statement copier * Ast.externalDecl copier) -> 'a copier + + type expExt = + (Ast.expression,Ast.statement,Ast.binop,Ast.unop) AstExt.expressionExt + type stmtExt = + (Ast.expression,Ast.statement,Ast.binop,Ast.unop) AstExt.statementExt + type extDeclExt = + (Ast.expression,Ast.statement,Ast.binop,Ast.unop) AstExt.externalDeclExt +in + +signature COPYASTEXT = sig + val copyExprExt : expExt extCopier + val copyStmtExt : stmtExt extCopier + val copyExtDeclExt : extDeclExt extCopier +end + +signature COPYAST = sig + val copyAid : Aid.uid copier + val copyAst : Ast.ast copier + val copyExtDecl : Ast.externalDecl copier + val copyCoreExtDecl : Ast.coreExternalDecl copier + val copyDecl : Ast.declaration copier + val copyStmt : Ast.statement copier + val copyCoreStmt : Ast.coreStatement copier + val copyExpr : Ast.expression copier + val copyCoreExpr : Ast.coreExpression copier + val copyInitExpr : Ast.initExpression copier +end + +end diff --git a/ckit/src/ast-utils/copy/copy-ast.sml b/ckit/src/ast-utils/copy/copy-ast.sml new file mode 100644 index 0000000..70a1f70 --- /dev/null +++ b/ckit/src/ast-utils/copy/copy-ast.sml @@ -0,0 +1,103 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +(* Used to copy Ast structures while preserving unique adornments. + *) + +structure CopyAst : COPYAST = +struct + + open Ast + + type aidtab = Tables.aidtab + + fun copyOpt copier aidtab NONE = NONE + | copyOpt copier aidtab (SOME v) = SOME (copier aidtab v) + + fun copyAid aidtab oldaid = + let val newaid = Aid.new () + in case Aidtab.find (aidtab,oldaid) + of NONE => () (* DBM: is this anomalous, should there be a warning? *) + | SOME ctype => Aidtab.insert (aidtab, newaid, ctype); + newaid + end + + fun copyAst aidtab extdecls = + List.map (copyExtDecl aidtab) extdecls + + and copyExtDecl aidtab (DECL (coreExtDecl,aid,loc)) = + let val aid = copyAid aidtab aid + val coreExtDecl = copyCoreExtDecl aidtab coreExtDecl + in DECL (coreExtDecl,aid,loc) end + + and copyCoreExtDecl aidtab = + fn ExternalDecl decl => ExternalDecl (copyDecl aidtab decl) + | FunctionDef (id,ids,stmt) => FunctionDef (id,ids,copyStmt aidtab stmt) + | ExternalDeclExt ext => + ExternalDeclExt(CopyAstExt.copyExtDeclExt (copyExpr,copyStmt,copyExtDecl) aidtab ext) + + and copyDecl aidtab = + fn VarDecl (id,initExprOpt) => VarDecl (id,copyOpt copyInitExpr aidtab initExprOpt) + | typedecl => typedecl (* TypeDecl *) + + and copyStmt aidtab (stmt as STMT (coreStmt,aid,loc)) = + let val aid = copyAid aidtab aid + val coreStmt = copyCoreStmt aidtab coreStmt + in STMT (coreStmt,aid,loc) + end + + and copyCoreStmt aidtab = + fn Expr exprOpt => Expr (copyOpt copyExpr aidtab exprOpt) + | Compound (decls,stmts) => Compound (map (copyDecl aidtab) decls, List.map (copyStmt aidtab) stmts) + | While (expr,stmt) => While (copyExpr aidtab expr, copyStmt aidtab stmt) + | Do (expr,stmt) => Do (copyExpr aidtab expr, copyStmt aidtab stmt) + | For (exprOpt0,exprOpt1,exprOpt2,stmt) => + For (copyOpt copyExpr aidtab exprOpt0, + copyOpt copyExpr aidtab exprOpt1, + copyOpt copyExpr aidtab exprOpt2, + copyStmt aidtab stmt) + | Labeled (pid,stmt) => Labeled (pid,copyStmt aidtab stmt) + | CaseLabel (li,stmt) => CaseLabel (li,copyStmt aidtab stmt) + | DefaultLabel stmt => DefaultLabel (copyStmt aidtab stmt) + | Return exprOpt => Return (copyOpt copyExpr aidtab exprOpt) + | IfThen (expr,stmt) => IfThen (copyExpr aidtab expr,copyStmt aidtab stmt) + | IfThenElse (expr,stmt0,stmt1) => + IfThenElse (copyExpr aidtab expr,copyStmt aidtab stmt0,copyStmt aidtab stmt1) + | Switch (expr,stmt) => Switch (copyExpr aidtab expr,copyStmt aidtab stmt) + | StatExt ext => + StatExt(CopyAstExt.copyStmtExt (copyExpr,copyStmt,copyExtDecl) aidtab ext) + | stmt => stmt + + and copyExpr aidtab (EXPR (coreExpr,aid,loc)) = + let val aid = copyAid aidtab aid + val coreExpr = copyCoreExpr aidtab coreExpr + in EXPR (coreExpr,aid,loc) + end + + and copyCoreExpr aidtab = + fn Call (expr,exprs) => + Call (copyExpr aidtab expr,List.map (copyExpr aidtab) exprs) + | QuestionColon (expr0,expr1,expr2) => + QuestionColon (copyExpr aidtab expr0, + copyExpr aidtab expr1, + copyExpr aidtab expr2) + | Assign (expr0,expr1) => + Assign (copyExpr aidtab expr0, copyExpr aidtab expr1) + | Comma (expr0,expr1) => Comma (copyExpr aidtab expr0, copyExpr aidtab expr1) + | Sub (expr0,expr1) => Sub (copyExpr aidtab expr0,copyExpr aidtab expr1) + | Member (expr,pid) => Member (copyExpr aidtab expr,pid) + | Arrow (expr,pid) => Arrow (copyExpr aidtab expr,pid) + | Deref expr => Deref (copyExpr aidtab expr) + | AddrOf expr => AddrOf (copyExpr aidtab expr) + | Binop (binop,expr0,expr1) => + Binop (binop,copyExpr aidtab expr0, copyExpr aidtab expr1) + | Unop (unop,expr) => Unop (unop,copyExpr aidtab expr) + | Cast (ctype,expr) => Cast (ctype,copyExpr aidtab expr) + | ExprExt ext => + ExprExt(CopyAstExt.copyExprExt (copyExpr,copyStmt,copyExtDecl) aidtab ext) + | expr => expr + + and copyInitExpr aidtab = + fn Simple expr => Simple (copyExpr aidtab expr) + | Aggregate inits => Aggregate (map (copyInitExpr aidtab) inits) + +end (* functor CopyAstFn *) diff --git a/ckit/src/ast-utils/equality/eq-ast-ext-sig.sml b/ckit/src/ast-utils/equality/eq-ast-ext-sig.sml new file mode 100644 index 0000000..024046f --- /dev/null +++ b/ckit/src/ast-utils/equality/eq-ast-ext-sig.sml @@ -0,0 +1,16 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +local + type tables = Tables.tidtab * Tables.tidtab + type maps = Tid.uid Tidtab.uidtab * Pidtab.uid Pidtab.uidtab + + type expExt = (Ast.expression, Ast.statement, Ast.binop, Ast.unop) AstExt.expressionExt + type stmtExt = (Ast.expression, Ast.statement, Ast.binop, Ast.unop) AstExt.statementExt + type extDeclExt = (Ast.expression, Ast.statement, Ast.binop, Ast.unop) AstExt.externalDeclExt +in +signature EQASTEXT = sig + val eqExpressionExt : tables -> maps -> (expExt * expExt) -> unit + val eqStatementExt : tables -> maps -> (stmtExt * stmtExt) -> unit + val eqExternalDeclExt : tables -> maps -> (extDeclExt * extDeclExt) -> unit +end +end diff --git a/ckit/src/ast-utils/equality/eq-ast.sml b/ckit/src/ast-utils/equality/eq-ast.sml new file mode 100644 index 0000000..f61991e --- /dev/null +++ b/ckit/src/ast-utils/equality/eq-ast.sml @@ -0,0 +1,229 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +structure EqAst (*: EQAST*) = +struct + + structure Ast = Ast + structure CT = CType + structure ECT = EqCType + structure PPL = PPLib + structure PPA = PPAst + structure EAT = EqAstExt + structure PT = Pidtab + + open Ast + + exception internalFail + + val myFold = ECT.myFold + + val trace = ref false + + fun tracer pp (ttab1,ttab2) (v1,v2) = + ( print "\nChecking: " + ; PPL.ppToStrm (pp () ttab1) TextIO.stdOut v1 + ; print "\nand: " + ; PPL.ppToStrm (pp () ttab2) TextIO.stdOut v2 + ; print "\n" + ) + + fun PTinserts pidmap = + myFold (fn () => fn (v1,v2) => Pidtab.insert (pidmap,v1,v2)) () + + fun TTinserts tidmap = + myFold (fn () => fn (v1,v2) => Tidtab.insert (tidmap,v1,v2)) () + + fun eqOpt f tabs maps (NONE,NONE) = () + | eqOpt f tabs maps (SOME v1,SOME v2) = f tabs maps (v1,v2) + | eqOpt f tabs maps _ = raise internalFail + + fun eqAst (edecls1,ttab1,edecls2,ttab2) = + let val tl1 = Tidtab.listItems ttab1 + val tl2 = Tidtab.listItems ttab2 + in if List.length tl1 = List.length tl2 + then eqExternalDecls (ttab1,ttab2) (edecls1,edecls2) + else raise ECT.eqFail + end + + and eqExternalDecls tabs decs = + let val maps = (Tidtab.uidtab (),Pidtab.uidtab ()) + in getExternalTypeBindings tabs maps decs + ; myFold (eqExternalDecl tabs) maps decs + end + + and getExternalTypeBindings tabs maps decs = + myFold (getExternalTypeBinding tabs) maps decs + + (* dpo: this needs to be fixed to declare types/functions and then check *) + + and getExternalTypeBinding tabs maps edeclPair = () + + and eqExternalDecl tabs maps (DECL (coreDecl1,_,_),DECL (coreDecl2,_,_)) = + eqExternalCoreDecl tabs maps (coreDecl1,coreDecl2) + + and eqExternalCoreDecl (tabs as (ttab1,ttab2)) (maps as (tidmap,pidmap)) coreDeclPair = + ( if !trace then tracer PPA.ppCoreExternalDecl tabs coreDeclPair + else () + ; case coreDeclPair + of (ExternalDecl decl1,ExternalDecl decl2) => + eqDecl tabs maps (decl1,decl2) + | (FunctionDef (id1,ids1,stmt1),FuncDecl (id2,ids2,stmt2)) => + let val pids1 = map (fn {uid,...} => uid) (id1::ids1) + val pids2 = map (fn {uid,...} => uid) (id2::ids2) + in PTinserts pidmap (pids1,pids2) + ; eqStmt tabs maps (stmt1,stmt2) + end + | _ => raise ECT.eqFail + ) + + and eqStmt (tabs as (ttab1,ttab2)) maps (stmtPair as (STMT (coreStmt1,_,_),STMT (coreStmt2,_,_))) = + ( if !trace then tracer PPA.ppStatement tabs stmtPair else () + ; eqCoreStmt tabs maps (coreStmt1,coreStmt2) + ) + handle internalFail => + ( print "\nThese two statements are not condidered equal:" + ; PPL.ppToStrm (PPA.ppStatement () ttab1) TextIO.stdOut (#1 stmtPair) + ; print "\nand:" + ; PPL.ppToStrm (PPA.ppStatement () ttab2) TextIO.stdOut (#2 stmtPair) + ; print "\n" + ; raise ECT.eqFail + ) + + and eqDecl tabs (maps as (tidmap,pidmap)) declPair = + case declPair + of (TypeDecl tid1,TypeDecl tid2) => + ECT.getTidBindings tabs maps (tid1,tid2) + | (VarDecl ({uid=pid1,...},initExpOpt1) + ,VarDecl ({uid=pid2,...},initExpOpt2)) => + if eqInitExprOpt tabs maps (initExpOpt1,initExpOpt2) + then Pidtab.insert (pidmap,pid1,pid2) + else raise internalFail + | _ => raise internalFail + + and eqDecls tabs maps declsPair = + ECT.myFold + (fn () => fn declPair => eqDecl tabs maps declPair) + () + declsPair + + and eqInitExpr tabs maps initExpPair = + case initExpPair + of (Simple exp1,Simple exp2) => eqExpr tabs maps (exp1,exp2) + | (Aggregate initExps1,Aggregate initExps2) => + ECT.myFold + (fn () => fn iePair => eqInitExpr tabs maps iePair) + () + (initExps1,initExps2) + + and eqInitExprOpt tabs = eqOpt eqInitExpr tabs + + and eqCoreStmt (tabs as (ttab1,ttab2)) (maps as (tidmap,pidmap)) coreStmtPair = + (case coreStmtPair + of (Expr expOpt1,Expr expOpt2) => + if eqExpr tabs maps (expOpt1,expOpt2) then () + else raise internalFail + | (Compound (decls1,stmts1),Compound (decls2,stmts2)) => + ( eqDecls (ttab1,ttab2) maps (decls1,decls2) + ; eqStmts tabs maps (stmts1,stmts2) + ) + | (While (exp1,stmt1),While (exp2,stmt2)) => + if eqExpr tabs maps (exp1,exp2) + then eqStmt tabs maps (stmt1,stmt2) + else raise internalFail + | (Do (exp1,stmt1),Do (exp2,stmt2)) => + if eqExpr tabs maps (exp1,exp2) + then eqStmt tabs maps (stmt1,stmt2) + else raise internalFail + | (For (expOpt1_1,expOpt1_2,expOpt1_3,stmt1) + ,For (expOpt2_1,expOpt2_2,expOpt2_3,stmt2)) => + if eqExprOpt tabs maps (expOpt1_1,expOpt2_1) andalso + eqExprOpt tabs maps (expOpt1_2,expOpt2_2) andalso + eqExprOpt tabs maps (expOpt1_3,expOpt2_3) + then eqStmt tabs maps (stmt1,stmt2) + else raise internalFail + | (Labeled (pid1,stmt1),Labeled (pid2,stmt2)) => + let val pidmap = Pidtab.insert (pidmap,pid1,pid2) + in eqStmt tabs (tidmap,pidmap) (stmt1,stmt2) end + | (CaseLabel (li1,stmt1),CaseLabel (li2,stmt2)) => + if li1 = li2 then eqStmt tabs maps (stmt1,stmt2) + else raise internalFail + | (DefaultLabel stmt1,DefaultLabel stmt2) => + eqStmt tabs maps (stmt1,stmt2) + | (Goto pid1,Goto pid2) => + if ECT.eqPid pidmap (pid1,pid2) then maps + else raise internalFail + | (Break,Break) => maps + | (Continue,Continue) => maps + | (Return expOpt1,Return expOpt2) => + if eqExprOpt tabs maps (expOpt1,expOpt2) then () + else raise internalFail + | (IfThen (exp1,stmt1),IfThen (exp2,stmt2)) => + if eqExpr tabs maps (exp1,exp2) + then eqStmt tabs maps (stmt1,stmt2) + else raise internalFail + | (IfThenElse (exp1,stmt1_1,stmt1_2),IfThenElse (exp2,stmt2_1,stmt2_2)) => + if eqExpr tabs maps (exp1,exp2) + then eqStmts tabs maps ([stmt1_1,stmt1_2],[stmt2_1,stmt2_2]) + else raise internalFail + | (Switch (exp1,stmt1),Switch (exp2,stmt2)) => + if eqExpr tabs maps (exp1,exp2) + then eqStmt tabs maps (stmt1,stmt2) + else raise internalFail + | (StatExt se1,StatExt se2) => EAT.eqStatementExt tabs maps (se1,se2) + | _ => raise internalFail) + + and eqStmts tabs maps = myFold (eqStmt tabs) maps + + and eqExprOpt tabs = eqOpt eqExpr tabs + + and eqExpr (tabs as (ttab1,ttab2)) maps (expPair as (EXPR (coreExpr1,_,_),EXPR (coreExpr2,_,_))) = + ( if !trace then tracer PPA.ppExpression tabs expPair else () + ; if eqCoreExpr tabs maps (coreExpr1,coreExpr2) then true else raise internalFail + ) handle internalFail => + ( print "\nThese two expressions are not condidered equal:" + ; PPL.ppToStrm (PPA.ppExpression () ttab1) TextIO.stdOut (#1 expPair) + ; print "\n and" + ; PPL.ppToStrm (PPA.ppExpression () ttab2) TextIO.stdOut (#2 expPair) + ; print "\n" + ; raise ECT.eqFail + ) + + and eqCoreExpr tabs (maps as (tidmap,pidmap)) coreExprPair = + case coreExprPair + of (IntConst li1,IntConst li2) => li1 = li2 + | (RealConst r1,RealConst r2) => Real.== (r1,r2) + | (StringConst s1,StringConst s2) => s1=s2 + | (Call (exp1,exps1),Call (exp2,exps2)) => + eqExprs tabs maps (exp1::exps1,exp2::exps2) + | (QuestionColon (e1_1,e1_2,e1_3),QuestionColon (e2_1,e2_2,e2_3)) => + eqExprs tabs maps ([e1_1,e1_2,e1_3],[e2_1,e2_2,e2_3]) + | (Assign (e1_1,e1_2),Assign (e2_1,e2_2)) => + eqExprs tabs maps ([e1_1,e1_2],[e2_1,e2_2]) + | (Comma (e1_1,e1_2),Comma (e2_1,e2_2)) => + eqExprs tabs maps ([e1_1,e1_2],[e2_1,e2_2]) + | (Sub (e1_1,e1_2),Sub (e2_1,e2_2)) => + eqExprs tabs maps ([e1_1,e1_2],[e2_1,e2_2]) + | (Member (exp1,pid1),Member (exp2,pid2)) => + ECT.eqPid pidmap (pid1,pid2) andalso eqExpr tabs maps (exp1,exp2) + | (Arrow (exp1,pid1),Arrow (exp2,pid2)) => + ECT.eqPid pidmap (pid1,pid2) andalso eqExpr tabs maps (exp1,exp2) + | (Deref exp1,Deref exp2) => eqExpr tabs maps (exp1,exp2) + | (AddrOf exp1,AddrOf exp2) => eqExpr tabs maps (exp1,exp2) + | (Binop (binop1,e1_1,e1_2),Binop (binop2,e2_1,e2_2)) => + binop1 = binop2 andalso eqExprs tabs maps ([e1_1,e1_2],[e2_1,e2_2]) + | (Unop (unop1,exp1),Unop (unop2,exp2)) => + unop1 = unop2 andalso eqExpr tabs maps (exp1,exp2) + | (Cast (ctype1,exp1),Cast (ctype2,exp2)) => + (ECT.eqCtype tidmap (ctype1,ctype2) handle _ => false) + andalso eqExpr tabs maps (exp1,exp2) + | (Id pid1,Id pid2) => + ECT.eqPid pidmap (pid1,pid2) + | (EnumId (pid1,li1),EnumId (pid2,li2)) => + li1 = li2 andalso ECT.eqPid pidmap (pid1,pid2) + | (ExprExt ee1,ExprExt ee2) => EAT.eqExpressionExt tabs maps (ee1,ee2) + | (ErrorExpr,ErrorExpr) => true + | _ => raise internalFail + + and eqExprs tabs maps = ECT.eqList (eqExpr tabs maps) + +end (* structure EqAst *) diff --git a/ckit/src/ast-utils/equality/eq-ctype.sml b/ckit/src/ast-utils/equality/eq-ctype.sml new file mode 100644 index 0000000..f8cc262 --- /dev/null +++ b/ckit/src/ast-utils/equality/eq-ctype.sml @@ -0,0 +1,167 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +structure EqCType (*: EQCTYPE*) = struct + + structure Tid = Tid + structure Pid = Pid + structure B = Bindings + structure CT = CType + open CT + + exception eqFail + + fun warning s = (print "Warning: EqCType: "; print s; print "\n") + + fun myFold eq acc ([],[]) = acc + | myFold eq acc (f1::fs1,f2::fs2) = + myFold eq (eq acc (f1,f2)) (fs1,fs2) + | myFold eq acc _ = raise eqFail + + fun eqList eq = myFold (fn bool => fn fs => bool andalso eq fs) true + + fun getCtypeBindings tidtabs maps ctPair = + case ctPair + of (Void,Void) => () + | (Ellipses,Ellipses) => () + | (Qual (q1,ct1),Qual (q2,ct2)) => + getCtypeBindings tidtabs maps (ct1,ct2) + | (Array (li1,ct1),Array (li2,ct2)) => + getCtypeBindings tidtabs maps (ct1,ct2) + | (Pointer ct1,Pointer ct2) => + getCtypeBindings tidtabs maps (ct1,ct2) + | (Function (ct1,cts1), Function (ct2,cts2)) => + getCtypesBindings tidtabs maps (ct1::cts1,ct2::cts2) + | (EnumRef tid1,EnumRef tid2) => getTidBindings tidtabs maps (tid1,tid2) + | (StructRef tid1,StructRef tid2) => getTidBindings tidtabs maps (tid1,tid2) + | (UnionRef tid1,UnionRef tid2) => getTidBindings tidtabs maps (tid1,tid2) + | (TypeRef tid1,TypeRef tid2) => getTidBindings tidtabs maps (tid1,tid2) + | _ => () + + and getCtypesBindings tidtabs maps ctPairs = + (map (getCtypeBindings tidtabs maps) (ListPair.zip ctPairs); ()) + + and getTidBindings (tidtab1: Tables.tidtab,tidtab2: Tables.tidtab) + (maps as (tidmap,pidmap)) (tid1,tid2) = + case Tidtab.find (tidmap,tid1) + of SOME tid2' => () + | NONE => case (Tidtab.find (tidtab1,tid1),Tidtab.find (tidtab2,tid2)) + of (SOME {ntype=SOME nct1,...},SOME {ntype=SOME nct2,...}) => + ( Tidtab.insert (tidmap,tid1,tid2) + ; getNamedCtypeBindings (tidtab1,tidtab2) maps (nct1,nct2) + ) + | _ => Tidtab.insert (tidmap,tid1,tid2) + + and getNamedCtypeBindings tidtabs (maps as (tidmap,pidmap)) nctPair = + case nctPair + of (B.Struct (tid1,fields1),B.Struct (tid2,fields2)) => + let + fun getField () ((ct1,memOpt1:Ast.member option,_) + ,(ct2,memOpt2:Ast.member option,_)) = + ( getCtypeBindings tidtabs maps (ct1,ct2) + ; case (memOpt1,memOpt2) + of (SOME {uid=pid1,...},SOME {uid=pid2,...}) => + Pidtab.insert (pidmap,pid1,pid2) + | _ => () + ) + in + ( Tidtab.insert (tidmap,tid1,tid2) + ; myFold getField () (fields1,fields2) + ) + end + | (B.Union (tid1,fields1), B.Union (tid2,fields2)) => + let + fun getField () ((ct1,{uid=pid1,...}:Ast.member),(ct2,{uid=pid2,...}:Ast.member)) = + ( Pidtab.insert (pidmap,pid1,pid2) + ; getCtypeBindings tidtabs maps (ct1,ct2) + ) + in + ( Tidtab.insert (tidmap,tid1,tid2) + ; myFold getField () (fields1,fields2) + ) + end + | (B.Enum (tid1,fields1),B.Enum (tid2,fields2)) => + let fun getField () (({uid=pid1,...}:Ast.member,_) + ,({uid=pid2,...}:Ast.member,_)) = + Pidtab.insert (pidmap,pid1,pid2) + in + ( Tidtab.insert (tidmap,tid1,tid2) + ; myFold getField () (fields1,fields2) + ) + end + | (B.Typedef (tid1,ct1),B.Typedef (tid2,ct2)) => + ( Tidtab.insert (tidmap,tid1,tid2) + ; getCtypeBindings tidtabs (tidmap,pidmap)(ct1,ct2) + ) + | _ => () + + fun eqTid tidmap (tid1,tid2) = + case Tidtab.find (tidmap,tid1) + of NONE => ( warning ("tid ("^(Tid.toString tid1)^") not found, reverting to simple equality test") + ; Tid.equal (tid1,tid2) + ) + | SOME tid1' => Tid.equal (tid1',tid2) + + fun eqPid pidmap (pid1,pid2) = + case Pidtab.find (pidmap,pid1) + of NONE => ( warning ("pid ("^(Pid.toString pid1)^") not found, reverting to simple equality test") + ; Pid.equal (pid1,pid2) + ) + | SOME pid1' => Pid.equal (pid1',pid2) + + fun eqMem pidmap ({uid=pid1,...}:Ast.member ,{uid=pid2, ...}:Ast.member) = eqPid pidmap (pid1,pid2) + + fun eqMemOpt pidmap (NONE,NONE) = true + | eqMemOpt pidmap (SOME mem1,SOME mem2) = eqMem pidmap (mem1,mem2) + | eqMemOpt pidmap _ = false + + fun eqCtype tidmap ctPair = + case ctPair + of (Void,Void) => true + | (Ellipses,Ellipses) => true + | (Qual (q1,ct1),Qual (q2,ct2)) => + if q1 = q2 then eqCtype tidmap (ct1,ct2) + else false + | (Numeric quad1,Numeric quad2) => quad1 = quad2 + | (Array (li1,ct1),Array (li2,ct2)) => + if li1 = li2 then eqCtype tidmap (ct1,ct2) else false + | (Pointer ct1,Pointer ct2) => eqCtype tidmap (ct1,ct2) + | (Function (ct1,cts1), Function (ct2,cts2)) => + eqCtypes tidmap (ct1::cts1,ct2::cts2) + | (EnumRef tid1,EnumRef tid2) => eqTid tidmap (tid1,tid2) + | (StructRef tid1,StructRef tid2) => eqTid tidmap (tid1,tid2) + | (UnionRef tid1,UnionRef tid2) => eqTid tidmap (tid1,tid2) + | (TypeRef tid1,TypeRef tid2) => eqTid tidmap (tid1,tid2) + | _ => false + + and eqCtypes tidmap = eqList (eqCtype tidmap) + + and eqNamedCtype (pair as (tidmap,pidmap)) nctPair = + case nctPair + of (B.Struct (tid1,fields1),B.Struct (tid2,fields2)) => + let + fun eqField ((ct1,memOpt1,LIOpt1),(ct2,memOpt2,LIOpt2)) = + LIOpt1 = LIOpt2 + andalso eqMemOpt pidmap (memOpt1,memOpt2) + andalso eqCtype tidmap (ct1,ct2) + val eqFields = eqList eqField + in eqTid tidmap (tid1,tid2) andalso eqFields (fields1,fields2) + end + | (B.Union (tid1,fields1),B.Union (tid2,fields2)) => + let + fun eqField ((ct1,mem1),(ct2,mem2)) = + eqMem pidmap (mem1,mem2) andalso eqCtype tidmap (ct1,ct2) + val eqFields = eqList eqField + in eqTid tidmap (tid1,tid2) andalso eqFields (fields1,fields2) + end + | (B.Enum (tid1,fields1),B.Enum (tid2,fields2)) => + let fun eqField ((mem1,li1),(mem2,li2)) = + li1 = li2 andalso eqMem pidmap (mem1,mem2) + val eqFields = eqList eqField + in + eqTid tidmap (tid1,tid2) andalso eqFields (fields1,fields2) + end + | (B.Typedef (tid1,ct1),B.Typedef (tid2,ct2)) => + eqTid tidmap (tid1,tid2) andalso eqCtype tidmap (ct1,ct2) + | _ => false + +end diff --git a/ckit/src/ast-utils/simplifier/extensions/c/copy-ast-ext.sml b/ckit/src/ast-utils/simplifier/extensions/c/copy-ast-ext.sml new file mode 100644 index 0000000..ee8581a --- /dev/null +++ b/ckit/src/ast-utils/simplifier/extensions/c/copy-ast-ext.sml @@ -0,0 +1,8 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +structure CopyAstExt = struct + fun copyExprExt (copyExp,copyStmt,copyExtDecl) aidctx ext = ext + fun copyStmtExt (copyExp,copyStmt,copyExtDecl) aidctx ext = ext + fun copyExtDeclExt (copyExp,copyStmt,copyExtDecl) aidctx ext = ext +end + diff --git a/ckit/src/ast-utils/simplifier/extensions/c/simplify-ast-ext.sml b/ckit/src/ast-utils/simplifier/extensions/c/simplify-ast-ext.sml new file mode 100644 index 0000000..cbc6651 --- /dev/null +++ b/ckit/src/ast-utils/simplifier/extensions/c/simplify-ast-ext.sml @@ -0,0 +1,11 @@ +(* Copyright (c) 1998 by Lucent Technologies *) +structure SimplifyAstExt:SIMPLIFYASTEXT = struct + + exception SimplifyAstExtExn of string + + fun fail s = raise (SimplifyAstExtExn s) + + fun simplifyExtDeclExt tables simplifiers ext = fail "no external declaration extensions defined" + fun simplifyExpExt tables simplifiers ext = fail "no expression extensions defined" + fun simplifyStmtExt tables simplifiers ext = fail "no statement extensions defined" +end diff --git a/ckit/src/ast-utils/simplifier/simplify-ast-sig.sml b/ckit/src/ast-utils/simplifier/simplify-ast-sig.sml new file mode 100644 index 0000000..964ec2f --- /dev/null +++ b/ckit/src/ast-utils/simplifier/simplify-ast-sig.sml @@ -0,0 +1,57 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +local + type tidtab = Tables.tidtab + type aidtab = Tables.aidtab + type esctab = unit Pidtab.uidtab + + type extDeclExt = (Ast.expression, Ast.statement, Ast.binop, Ast.unop) + AstExt.externalDeclExt + + type expExt = (Ast.expression, Ast.statement, Ast.binop, Ast.unop) + AstExt.expressionExt + + type stmtExt = (Ast.expression, Ast.statement, Ast.binop, Ast.unop) + AstExt.statementExt + + type expSimplifier = + Ast.expression + -> {decs:Ast.declaration list,pre:Ast.statement list,exp:Ast.expression} + + type stmtSimplifier = + Ast.statement -> {decs:Ast.declaration list,stmts:Ast.statement list} +in + +signature SIMPLIFYASTEXT = +sig + + val simplifyExtDeclExt : + (tidtab * aidtab * aidtab) + -> (expSimplifier * stmtSimplifier) + -> extDeclExt + -> Ast.coreExternalDecl + + val simplifyExpExt : + (tidtab * aidtab * aidtab) + -> (expSimplifier * stmtSimplifier) + -> expExt + -> {decs:Ast.declaration list, pre:Ast.statement list, coreExp:Ast.coreExpression} + + val simplifyStmtExt : + (tidtab * aidtab * aidtab) + -> (expSimplifier * stmtSimplifier) + -> stmtExt + -> {decs:Ast.declaration list, coreStmt:Ast.coreStatement} + +end (* signature SIMPLIFYASTEXT *) + + +signature SIMPLIFYAST = +sig + + val simplifyAst : Ast.ast * tidtab * aidtab * aidtab (* opaidtab *) + -> {ast: Ast.ast, escapetab: esctab} + +end (* signature SIMPLIFYAST *) + +end (* local *) diff --git a/ckit/src/ast-utils/simplifier/simplify-ast.sml b/ckit/src/ast-utils/simplifier/simplify-ast.sml new file mode 100644 index 0000000..10b74c1 --- /dev/null +++ b/ckit/src/ast-utils/simplifier/simplify-ast.sml @@ -0,0 +1,747 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +(* The simplify transformation normalizes the C code by: + * o getting rid of pre and post-increments + * o getting rid of op='s + * o getting rid of nested assignments w/in expressions + * o getting rid of comma expressions + * o getting rid of questioncolon expressions + * o getting rid of arrows + * o translating all breaks and continues into jumps + * o translating all do, whiles, and fors into conditional jumps + * o translating all Label (id,stmt) into sequences Label (id,emptyStmt); stmt + * which allows for a more unified expression of control flow. + * o translating all global/static variables without (or with only partial) + * initializations into into explicit initializations to 0. + * o translating all local variable initializations into explicit assignments. + * + * The transformation introduces new identifiers, expressions, and statements. + * The pidtab and the aidtab are kept consistent but the opaidtab is not since it + * shouldn't be needed after this transformation. The tidtab is not affected by + * the transformation. + * + * The main transformation in the system is the simplicication of expressions into + * non-sideffecting forms. All side effects (op==,pre/pos-increment,assignment) + * expressions are lifted to statements. The grammar of expressions after this + * transformation is: + * + * exp ::= id + * | constant + * | primapp exp* - where the primapp does not have a side-effect + * | exp.exp + * | exp->exp + * | exp[exp] + * | *exp + * | &exp + * | sizeof exp - sizeof should have already been eliminated + * | {exp*} + * + * within the context of statements, where assignments and function calls must + * occur, top level expression have the following syntax: + * + * topExp ::= exp - ie the new, restricted form of expressions + * | exp (exp* ) - function calls + * | exp := exp - simple assignments + * | exp := exp (exp* ) - assignments of function call values + * + * + * Issues: + * Consider the following code: + * + * x->y->m += 12; + * + * This gets translated into: + * + * x->y->m = x->y->m + 12; + * + * by copying the arbitraily complex expression (x->y->m). + * The alternative is to introduce a temporary variable: + * + * temp = x->y + * temp->m = temp->m + 12; + * + * but this transformation is subtle as you can't write: + * + * temp = x->y->m + * temp = temp + 12; + * + *) + +structure SimplifyAst : SIMPLIFYAST = +struct + + structure Ast = Ast + structure Copy = CopyAst + open Ast + + exception simplifyExn + + fun warn msg = print msg + + fun fail msg = (print msg; raise simplifyExn) + + val strictlyPrintable = ref true (* try to make it acceptable C code: see handling of casts *) + + val sizeOf = Sizeof.byteSizeOf {warn=warn,err=fail, bug=fail} + + fun lookup looker id = + case looker id + of NONE => fail "trying to lookup id" + | SOME v => v + + fun simplifyAst (edecls,tidtab,aidtab,opaidtab) = + let + val esctab = Pidtab.uidtab () : unit Pidtab.uidtab + val getTid = lookup (fn tid => Tidtab.find (tidtab,tid)) + val getOpAid = lookup (fn aid => Aidtab.find (opaidtab,aid)) + + fun copyExp exp = Copy.copyExpr aidtab exp + + fun newLabel name = + { name=Symbol.label name + , uid = Pid.new () + , location=SourceMap.UNKNOWN + } + + fun newId name ctype = + { name=Symbol.object name + , uid=Pid.new() + , location=SourceMap.UNKNOWN + , ctype=ctype + , stClass=Ast.DEFAULT, + global=false, + status=DECLARED + , kind= if TypeUtil.isFunction tidtab ctype then Ast.FUNCTION{hasFunctionDef=false} + else Ast.NONFUN + } + + fun addEscape pid = + Pidtab.insert(esctab,pid,()) + + (* fix: this code is incomplete ... *) + (* lval ::= id | id.field | expr[expr'] | expr->field | *expr *) + fun escapes (EXPR (Id {uid=pid,...},_,_)) = addEscape pid + | escapes (EXPR (Member (expr, _))) = escapes expr + | escapes _ = () + + (* Generate a new aid, bind it to ty in aidtab, and return it *) + fun bindAid ctype = + let val aid = Aid.new () + in Aidtab.insert(aidtab,aid,ctype); + aid + end + + fun id2ctype (id: Ast.id) = #ctype id + + fun isStaticOrGlobal ({stClass=Ast.STATIC,...}: Ast.id) = true + | isStaticOrGlobal {global=true,...} = true + | isStaticOrGlobal _ = false + + fun aid2ctype aid = + case Aidtab.find (aidtab,aid) + of NONE => ( print "unknown type for aid " + ; print (Aid.toString aid) + ; print ",assuming its void\n" + ; Ast.Void + ) + | SOME ctype => ctype + + fun exp2ctype (EXPR (_,aid,_)) = aid2ctype aid + + fun coreExp2exp ctype coreExp = EXPR (coreExp,bindAid ctype,SourceMap.UNKNOWN) + + fun coreStmt2stmt coreStmt = STMT (coreStmt,Aid.new (),SourceMap.UNKNOWN) + + fun exp2stmt exp = coreStmt2stmt (Expr (SOME exp)) + + fun coreExp2stmt ctype coreExp = exp2stmt (coreExp2exp ctype coreExp) + + fun mkId id = + coreExp2exp (id2ctype id) (Id id) + + fun label id = + coreStmt2stmt (Labeled (id,coreStmt2stmt (Expr NONE))) + + fun assign id exp = + let val ctype = id2ctype id + in coreExp2stmt ctype (Assign (mkId id,exp)) end + + (* dpo: eqCtype this is potentially expensive, should we do this? *) + fun cast ctype exp = + if CTypeEq.eqCType(ctype, (exp2ctype exp)) then exp + else coreExp2exp ctype (Cast (ctype,exp)) + + fun decl id = + let val ctype = id2ctype id + in VarDecl (id,NONE) end + + fun compound decls stmts = + let fun filter [] = ([],[]) + | filter (stmt::stmts) = + let val (decls,stmts) = filter stmts + in case stmt + of STMT (Compound (cmpDecls,cmpStmts),_,_) => (cmpDecls@decls,cmpStmts@stmts) + | STMT (Expr NONE,_,_) => (decls,stmts) + | _ => (decls,stmt::stmts) + end + val (decls',stmts') = filter stmts + in Compound (decls@decls',stmts') end + + fun noEffect (EXPR (coreExpr,_,_)) = + case coreExpr + of IntConst _ => true + | RealConst _ => true + | StringConst _ => true + | Id _ => true + | _ => false + + fun stmts2stmt [] [stmt] = stmt + | stmts2stmt decls stmts = STMT (compound decls stmts,Aid.new (), + SourceMap.UNKNOWN) + + val intCt = Ast.Numeric (Ast.NONSATURATE,Ast.WHOLENUM,Ast.SIGNED,Ast.INT, Ast.SIGNASSUMED) + + val charCt = Ast.Numeric (Ast.NONSATURATE,Ast.WHOLENUM,Ast.UNSIGNED,Ast.CHAR, Ast.SIGNASSUMED) + + fun mkInt i = coreExp2exp intCt (IntConst (i:LargeInt.int)) + + fun mkChr c = coreExp2exp charCt (IntConst (Int32.fromInt (ord c))) + + fun simplifyExtDecls edecls = + map simplifyExtDecl edecls + + and simplifyExtDecl (DECL (coreExtDecl,aid,loc)) = + DECL (simplifyCoreExtDecl coreExtDecl,aid,loc) + + and simplifyCoreExtDecl coreExtDecl = + case coreExtDecl + of ExternalDecl decl => ExternalDecl decl + | FunctionDef (id,ids,stmt) => + (case simplifyStmt (NONE,NONE) stmt + of {decs=[],stmts=[stmt]} => FunctionDef (id,ids,stmt) + | {decs,stmts} => + FunctionDef (id,ids,coreStmt2stmt (compound decs stmts))) + | ExternalDeclExt ext => + SimplifyAstExt.simplifyExtDeclExt + (tidtab,aidtab,opaidtab) + (simplifyNestedExp, simplifyStmt (NONE,NONE)) + ext + + and simplifyDecls [] = {decs=[],stmts=[]} + | simplifyDecls (decl::decls) = + let val {decs=decs0,stmts=stmts0} = simplifyDecl decl + val {decs=decs1,stmts=stmts1} = simplifyDecls decls + in {decs=decs0@decs1,stmts=stmts0@stmts1} end + + and simplifyDecl decl = + case decl + of TypeDecl tid => {decs=[decl],stmts=[]} + | VarDecl (id,NONE) => {decs=[decl],stmts=[]} + | VarDecl (id,SOME initExpr) => + if isStaticOrGlobal id + then {decs=[VarDecl (id,SOME initExpr)],stmts=[]} + else let val ctype = id2ctype id + val dec = VarDecl (id, NONE) + val {decs,stmts} = simplifyAutoInit (mkId id) ctype initExpr + in {decs=decs@[dec],stmts=stmts} end + + and simplifyStmts pair [] = {decs=[],stmts=[]} + | simplifyStmts pair (stmt::stmts) = + let val {decs=decs0,stmts=stmts0} = simplifyStmt pair stmt + val {decs=decs1,stmts=stmts1} = simplifyStmts pair stmts + in {decs=decs0@decs1,stmts=stmts0@stmts1} end + + + and simplifyStmt (pair as (contOpt,brkOpt)) + (stmt as STMT (coreStmt,aid,loc)) = + let fun mkStmt coreStmt = STMT (coreStmt,aid,loc) + fun cs2stmt coreStmt = STMT (coreStmt,Aid.new (),loc) + in case coreStmt + of Expr expOpt => + let val {decs,pre,expOpt} = simplifyTopExpOpt expOpt + val stmt = mkStmt (Expr expOpt) + in {decs=decs,stmts=pre@[stmt]} end + | Compound (decls,stmts) => + let val {decs=decs0,stmts=stmts0}= simplifyDecls decls + val {decs=decs1,stmts=stmts1} = simplifyStmts pair stmts + in {decs=decs0@decs1,stmts=[mkStmt (compound [] (stmts0@stmts1))]} end + (* The translation of while minimizes the number of jumps + * in the body of the loop. + * + * while (exp,stmt) => + * goto startLabel + * topLabel: stmt + * start&contLabel: preExp + * if exp then goto topLabel + * brkLabel: + * + * NOTE: the brk label is added only if is used. + *) + | While (exp,stmt) => + let val topLab = newLabel "whileTop" + val contLab = newLabel "whileCont" + val brkLab = newLabel "whileBrk" + val contUsed = ref true + val brkUsed = ref false + val pair = (SOME (contUsed,contLab),SOME (brkUsed,brkLab)) + val {decs=expDecs,pre=preExp,exp=exp} = simplifyTopExp exp + val {decs=bodyDecs,stmts} = simplifyStmt pair stmt + val stmts = [cs2stmt (Goto contLab), + label topLab + ] + @ stmts + @ [label contLab] + @ preExp + @ [mkStmt (IfThen (exp,cs2stmt (Goto topLab)))] + @ (if !brkUsed then [label brkLab] else []) + in {decs=expDecs@bodyDecs,stmts=stmts} end + (* The translation of do minimizes the number of jumps + * in the body of the loop. + * + * do (exp,stmt) => + * topLabel: stmt + * contLabel: preExp + * if exp then goto topLabel + * brkLabel: + * + * NOTE: cont and brk labels are added only if they are used. + *) + | Do (exp,stmt) => + let val topLab = newLabel "doTop" + val contLab = newLabel "doCont" + val brkLab = newLabel "doBrk" + val contUsed = ref false + val brkUsed = ref false + val pair = (SOME (contUsed,contLab),SOME (brkUsed,brkLab)) + val {decs=expDecs,pre=preExp,exp} = simplifyTopExp exp + val {decs=bodyDecs,stmts} = simplifyStmt pair stmt + val stmts = [label topLab] + @ stmts + @ (if !contUsed then [label contLab] else []) + @ preExp + @ [mkStmt (IfThen (exp,mkStmt (Goto topLab)))] + @ (if !brkUsed then [label brkLab] else []) + in {decs=expDecs@bodyDecs,stmts=stmts} end + (* The translation of for minimizes the number of jumps + * in the body of the loop. + * + * for (e0,e1,e2,stmt) => + * preE0 + * e0 + * goto startLabel + * topLabel: stmt + * contLabel: preE2 + * e2 + * startLabel: preE1 + * if e1 then goto topLabel + * brkLabel: + * + * NOTE: cont and brk labels are added only if they are used. + *) + | For (eOpt0,eOpt1,eOpt2,stmt) => + let val topLab = newLabel "forTop" + val startLab = newLabel "forStart" + val contLab = newLabel "forCont" + val brkLab = newLabel "forBrk" + val contUsed = ref false + val brkUsed = ref false + val pair = (SOME (contUsed,contLab),SOME (brkUsed,brkLab)) + val {decs=e0Decs,pre=preE0,expOpt=eOpt0} = simplifyTopExpOpt eOpt0 + val {decs=e1Decs,pre=preE1,expOpt=eOpt1} = simplifyTopExpOpt eOpt1 + val {decs=e2Decs,pre=preE2,expOpt=eOpt2} = simplifyTopExpOpt eOpt2 + val {decs=bodyDecs,stmts} = simplifyStmt pair stmt + fun expOpt2stmt NONE = [] + | expOpt2stmt (SOME exp) = + if noEffect exp then [] else [exp2stmt exp] + + val stmts = preE0 + @ expOpt2stmt eOpt0 + @ [cs2stmt (Goto startLab) + ,label topLab + ] + @ stmts + @ (if !contUsed then [label contLab] else []) + @ preE2 + @ expOpt2stmt eOpt2 + @ [label startLab] + @ preE1 + @ (case eOpt1 + of SOME e1 => [mkStmt (IfThen (e1,cs2stmt (Goto topLab)))] + | NONE => []) + @ (if !brkUsed then [label brkLab] else []) + in {decs=e0Decs@e1Decs@e2Decs@bodyDecs,stmts=stmts} end + | Labeled (label,stmt) => + let val {decs,stmts} = simplifyStmt pair stmt + val stmt = mkStmt (Labeled (label,cs2stmt (Expr NONE))) + in {decs=decs,stmts=stmt::stmts} end + | CaseLabel (li,stmt) => + let val {decs,stmts} = simplifyStmt pair stmt + in {decs=decs,stmts=[mkStmt (CaseLabel (li,stmts2stmt [] stmts))]} + end + | DefaultLabel stmt => + let val {decs,stmts} = simplifyStmt pair stmt + in {decs=decs,stmts=[mkStmt (DefaultLabel (stmts2stmt [] stmts))]} + end + | Goto label => {decs=[],stmts=[mkStmt (Goto label)]} + | Break => + (case brkOpt + of NONE => fail "invalid context for break" + | SOME (brkUsed,label) => + ( brkUsed := true + ; {decs=[],stmts=[mkStmt (Goto label)]} + )) + | Continue => + (case contOpt + of NONE => fail "invalid context for continue" + | SOME (contUsed,label) => + (contUsed := true; + {decs=[],stmts=[mkStmt (Goto label)]})) + | Return expOpt => + let val {decs,pre,expOpt} = simplifyTopExpOpt expOpt + in {decs=decs,stmts=pre@[mkStmt (Return expOpt)]} + end + | IfThen (exp,stmt) => + let val {decs=decs0,pre,exp} = simplifyTopExp exp + val {decs=decs1,stmts} = simplifyStmt pair stmt + val stmts = pre@[mkStmt (IfThen (exp,stmts2stmt [] stmts))] + in {decs=decs0@decs1,stmts=stmts} + end + | IfThenElse (exp,stmt0,stmt1) => + let val {decs,pre,exp} = simplifyTopExp exp + val {decs=decs0,stmts=stmts0} = simplifyStmt pair stmt0 + val {decs=decs1,stmts=stmts1} = simplifyStmt pair stmt1 + val stmts = + pre@[mkStmt(IfThenElse(exp,stmts2stmt [] stmts0, + stmts2stmt [] stmts1))] + in {decs=decs@decs0@decs1,stmts=stmts} end + | Switch (exp,stmt) => + let val {decs=decs0,pre,exp} = simplifyTopExp exp + val brkLab = newLabel "switchBrk" + val brkUsed = ref false + val {decs=decs1,stmts} = + simplifyStmt (contOpt,SOME (brkUsed,brkLab)) stmt + val stmts = pre + @ [mkStmt (Switch (exp,stmts2stmt [] stmts))] + @ (if !brkUsed then [label brkLab] else []) + in {decs=decs0@decs1,stmts=stmts} + end + | ErrorStmt => {decs=nil, stmts=[mkStmt ErrorStmt]} + | StatExt ext => + let val {decs,coreStmt} = + SimplifyAstExt.simplifyStmtExt + (tidtab,aidtab,opaidtab) + (simplifyNestedExp,simplifyStmt (NONE,NONE)) + ext + in {decs=decs,stmts=[mkStmt coreStmt]} + end + end + + + and simplifyAutoInit lhs ctype initExp = + case initExp + of (Aggregate initExps) => + let val {stmts} = autoInit lhs ctype initExp + in {decs=[],stmts=stmts} end + | (Simple exp) => + let val {decs,pre,exp} = simplifyTopExp exp + val stmt = coreExp2stmt ctype (Assign (lhs,exp)) + in {decs=decs,stmts=pre@[stmt]} end + + and autoInit lhs ctype initExp = + let fun feed initer (Aggregate initExps) = initer initExps + | feed initer _ = fail "bad form for initializer" + + fun arrInit lhs ctype i [] = {stmts=[]} + | arrInit lhs ctype i (initExp::initExps) = + let val intConst = mkInt i + val arrLhs = coreExp2exp ctype (Sub (lhs,intConst)) + val {stmts} = autoInit arrLhs ctype initExp + val {stmts=stmts'} = arrInit lhs ctype (i+1) initExps + in {stmts=stmts@stmts'} end + + fun structInit lhs [] [] = {stmts=[]} + | structInit lhs [] initExps = fail "initializer too big" + | structInit lhs fields [] = fail "initializer too small" + | structInit lhs ((ctype,NONE,liOpt)::fields) initExps = + (* according to the standard, unnamed fields don't + * get initialized. + *) + structInit lhs fields initExps + | structInit lhs ((ctype,SOME mem,liOpt)::fields) (initExp::initExps) = + let val memLhs = coreExp2exp ctype (Member (lhs,mem)) + val {stmts} = autoInit memLhs ctype initExp + val {stmts=stmts'} = structInit lhs fields initExps + in {stmts=stmts@stmts'} end + + fun unionInit lhs [] initExps = {stmts=[]} + | unionInit lhs ((ctype,mem)::_) ([initExp]) = + let val lhs = coreExp2exp ctype (Member (lhs,mem)) + in autoInit lhs ctype initExp end + | unionInit lhs fields exp = fail "bad form for union" + + fun scalarInit lhs ctype (Simple exp) = + {stmts=[coreExp2stmt ctype (Assign (lhs,exp))]} + | scalarInit lhs ctype _ = + fail "bad form for initializer" + + in case ctype + of Ast.Qual (_,ctype) => autoInit lhs ctype initExp + | Ast.TypeRef tid => + (case getTid tid + of {ntype=SOME (Bindings.Typedef (tid,ctype)),...} => + autoInit lhs ctype initExp + | _ => fail "bad type for initializer") + | Ast.Array (_,ctype) => feed (arrInit lhs ctype 0) initExp + | Ast.StructRef tid => + (case getTid tid + of {ntype=SOME (Bindings.Struct (tid,fields)),...} => + feed (structInit lhs fields) initExp + | _ => fail "bad type for initializer") + | Ast.UnionRef tid => + (case getTid tid + of {ntype=SOME (Bindings.Union (tid,fields)),...} => + feed (unionInit lhs fields) initExp + | _ => fail "bad type for initializer") + | Ast.Numeric _ => scalarInit lhs ctype initExp + | Ast.Pointer _ => scalarInit lhs ctype initExp + | Ast.Function _ => scalarInit lhs ctype initExp + | Ast.EnumRef _ => scalarInit lhs ctype initExp + | _ => fail "bad type for initializer" + end + + and simplifyExps [] = {decs=[],pre=[],exps=[]} + | simplifyExps (exp::exps) = + let val {decs,pre,exp} = simplifyExp {nested=true} exp + val {decs=decs',pre=pre',exps} = simplifyExps exps + in {decs=decs@decs',pre=pre@pre',exps=exp::exps} end + + and simplifyNestedExp exp = simplifyExp {nested=true} exp + + and simplifyTopExp exp = simplifyExp {nested=false} exp + + and simplifyTopExpOpt NONE = {decs=[],pre=[],expOpt=NONE} + | simplifyTopExpOpt (SOME exp) = + let val {decs,pre,exp} = simplifyTopExp exp + in {decs=decs,pre=pre,expOpt=SOME exp} end + + + and simplifyExp {nested} (exp as EXPR (coreExp,aid,loc)) = + let fun mkExp coreExp = EXPR (coreExp,aid,loc) + val ctype = aid2ctype aid + in case coreExp + of IntConst _ => {decs=[],pre=[],exp=exp} + | RealConst _ => {decs=[],pre=[],exp=exp} + | StringConst _ => {decs=[],pre=[],exp=exp} + | Call (exp,exps) => + let val {decs=decs0,pre=pre0,exp} = simplifyNestedExp exp + val {decs=decs1,pre=pre1,exps} = simplifyExps exps + val callExp = mkExp (Call (exp,exps)) + in if nested + then let val id = newId "call" ctype + val dec = decl id + val stmt = assign id callExp + val exp = mkId id + in {decs=dec::decs0@decs1,pre=pre0@pre1@[stmt],exp=exp} end + else {decs=decs0@decs1,pre=pre0@pre1,exp=callExp} + end + | QuestionColon (exp0,exp1,exp2) => + let val id = newId "quesCol" ctype + val dec = decl id + val {decs=decs0,pre=pre0,exp=exp0} = simplifyNestedExp exp0 + val {decs=decs1,pre=pre1,exp=exp1} = simplifyTopExp exp1 + val {decs=decs2,pre=pre2,exp=exp2} = simplifyTopExp exp2 + val stmt = coreStmt2stmt + (IfThenElse + ( exp0 + , stmts2stmt decs1 (pre1@[assign id exp1]) + , stmts2stmt decs2 (pre2@[assign id exp2]) + ) + ) + val exp = mkId id + in {decs=dec::decs0,pre=pre0@[stmt],exp=exp} end + | Assign (exp0,exp1) => + let val {decs=decs0,pre=pre0,exp=exp0} = simplifyNestedExp exp0 + val {decs=decs1,pre=pre1,exp=exp1} = simplifyNestedExp exp1 + val exp = mkExp (Assign (exp0,exp1)) + in if nested + then {decs=decs0@decs1,pre=pre0@pre1@[exp2stmt exp],exp=exp0} + else{decs=decs0@decs1,pre=pre0@pre1,exp=exp} + end + | Comma (exp0,exp1) => + let val {decs=decs0,pre=pre0,exp=exp0} = simplifyNestedExp exp0 + val {decs=decs1,pre=pre1,exp=exp1} = simplifyNestedExp exp1 + val pre = if noEffect exp0 then pre0@pre1 else pre0@[exp2stmt exp0]@pre1 + in {decs=decs0@decs1,pre=pre,exp=exp1} end + | Sub (exp0,exp1) => + let val {decs=decs0,pre=pre0,exp=exp0} = simplifyNestedExp exp0 + val {decs=decs1,pre=pre1,exp=exp1} = simplifyNestedExp exp1 + val exp = mkExp (Sub (exp0,exp1)) + in {decs=decs0@decs1,pre=pre0@pre1,exp=exp} end + | Member (exp,mem) => + let val {decs,pre,exp} = simplifyNestedExp exp + val exp = mkExp (Member (exp,mem)) + in {decs=decs,pre=pre,exp=exp} end + | Arrow (exp,mem) => + (case exp2ctype exp + of Ast.Pointer ctype => + let val {decs,pre,exp} = simplifyNestedExp exp + val exp = coreExp2exp ctype (Deref exp) + val exp = mkExp (Member (exp,mem)) + in {decs=decs,pre=pre,exp=exp} end + | _ => fail "Arrow: type error") + | Deref exp => + let val {decs,pre,exp} = simplifyNestedExp exp + val exp = mkExp (Deref exp) + in {decs=decs,pre=pre,exp=exp} end +(*** + | AddrOf exp => + let val {decs,pre,exp} = simplifyNestedExp exp + val exp = mkExp (AddrOf exp) + in (escapes exp; {decs=decs,pre=pre,exp=exp}) end +***) + + (* notes on addrOf: + s: effect of simplifyNestedExp + a: effect of adrf + 1. x =s=> x =a=> &x + 2. x->field =s=> *(x+k) =a=> x+k where k is offset of field + 3. x.field =s=> *((&x)+k) =a=> &x+k where k is offset of field + 4. e[i] =s=> *(e+i*k) =a=> e+i*k where k is scaling for ( *e) + 5. x[i] =s=> *(&x+i*k) + **) + | AddrOf exp => + let + val {decs, pre, exp} = simplifyNestedExp exp + fun adrf(expr as EXPR(coreExpr, aid, loc)) = + (case coreExpr of + Id{uid=pid, ...} => + (addEscape pid; + exp=EXPR(AddrOf expr, aid, loc)) + | Member(memExpr, field) => + let val expr = adrf(memExpr) + val ctype = exp2ctype(memExpr) + fun nullErr _ = () + val errs = {err=nullErr, warn=nullErr, bug=nullErr} + val fieldOffs = Sizeof.fieldOffsets errs tidtab ctype + val byteOffset = #bitOffset(Sizeof.getField errs (field, fieldOffs)) + in + EXPR(Binop(Plus(exp, byteOffset)), aid, loc) + end + | Deref expr => expr + | (Sub _ | Arrow _) => fail "simplifyNestedExp returned Sub or Arrow") +(* x[4] -> *(x+16) -> x+16 *) + in + {decs=decs, pre=pre, exp=adrf exp} + end + + let val {decs,pre,exp} = simplifyNestedExp exp + val exp = mkExp (AddrOf exp) + in (escapes exp; {decs=decs,pre=pre,exp=exp}) end + + | Binop trip => + simplifyBinop mkExp {nested=nested} ctype trip + | Unop pair => + let val {decs,pre,coreExp} = simplifyUnop {nested=nested} ctype pair + in {decs=decs,pre=pre,exp=mkExp coreExp} end + | Cast (ctype,exp) => + let val {decs,pre,exp} = simplifyNestedExp exp + in + {decs=decs, pre=pre, exp= mkExp(Cast(ctype, exp))} + end + | Id _ => {decs=[],pre=[],exp=exp} + | EnumId _ => {decs=[],pre=[],exp=exp} + | SizeOf _ => {decs=[],pre=[],exp=exp} (* should not appear in compiler mode *) + | ExprExt ext => + let val {decs,pre,coreExp} = + SimplifyAstExt.simplifyExpExt + (tidtab,aidtab,opaidtab) + (simplifyNestedExp, simplifyStmt(NONE,NONE)) + ext + in {decs=decs,pre=pre,exp=mkExp coreExp} + end + | ErrorExpr => {decs=[],pre=[],exp=exp} + end + + and scale ctype i = + case ctype + of Ast.Qual (_,ctype) => scale ctype i + | Ast.Pointer ctype => let val {bytes,...} = sizeOf tidtab ctype + in LargeInt.fromInt (bytes * i) end + | _ => LargeInt.fromInt i + + and simplifyUnop {nested} ctype (unop,exp as EXPR (_,aid,_)) = + let val {decs,pre,exp} = simplifyNestedExp exp + fun mkUnop unop = {decs=decs,pre=pre,coreExp=Unop (unop,exp)} + fun mkAssign {prefixOp} binop = + (* opArgTy is type to which arg is converted + e.g. e++ where e has type ctype and opArgTy newTy + becomes e = (ctype)( (newTy)e + (newTy)1 ) + and if ctype is a pointer, then 1 gets scaled by sizeof( *ctype ) + *) + let val id = newId (if prefixOp then "pref" else "post") ctype + val dec = decl id + val newTy = getOpAid aid + val argExp = cast newTy exp + val one = cast newTy + (coreExp2exp intCt (IntConst (scale ctype 1))) + val binExp = coreExp2exp newTy (Binop (binop,argExp,one)) + val incrStmt = coreExp2stmt ctype + (Assign(copyExp exp, cast ctype binExp)) + val assignStmt = + coreExp2stmt ctype (Assign (mkId id,copyExp exp)) + val pre = if prefixOp then pre@[incrStmt,assignStmt] + else pre@[assignStmt,incrStmt] + in {decs=dec::decs, pre=pre, coreExp=Id id} + end + in case unop + (* the ++, --, cases are no longer dealt with in here; + there is now code in build-ast (which is + enabled when insert_explicit_coersions is set) + that simplifies ++ and -- *) + of PreInc => mkAssign {prefixOp=true} Plus + | PreDec => mkAssign {prefixOp=true} Minus + | PostInc => mkAssign {prefixOp=false} Plus + | PostDec => mkAssign {prefixOp=false} Minus + | _ => mkUnop unop + end + + and simplifyBinop mkExp {nested} ctype (binop,exp0 as EXPR (_,aid,_),exp1) = + let val {decs=decs0,pre=pre0,exp=exp0} = simplifyNestedExp exp0 + val {decs=decs1,pre=pre1,exp=exp1} = simplifyNestedExp exp1 + val decs = decs0@decs1 + val pre = pre0@pre1 + fun mkBinop binop = + {decs=decs,pre=pre,exp=mkExp (Binop (binop,exp0,exp1))} + fun mkAssign binop = + (* for e0 += e1, e0 -= e1 and their friends, opArgTy specifies + the type that e0 must be converted to + e.g. e0 += e1 becomes e0 = (ctype(e0)) ( (opArgTy e0) + e1 ) + *) + let val opArgTy = getOpAid aid + val binExp = coreExp2exp opArgTy + (Binop (binop,cast opArgTy exp0,exp1)) + val assign = mkExp (Assign (copyExp exp0,cast ctype binExp)) + in if nested + then {decs=decs,pre=pre@[exp2stmt assign],exp=copyExp exp0} + else {decs=decs,pre=pre,exp=assign} + end + in case binop + (* the +=, -=, etc., cases are no longer dealt with in here; + there is now code in build-ast (which is + enabled when insert_explicit_coersions is set) + that simplifies +=, -=, ... *) + of PlusAssign => mkAssign Plus + | MinusAssign => mkAssign Minus + | TimesAssign => mkAssign Times + | DivAssign => mkAssign Divide + | ModAssign => mkAssign Mod + | XorAssign => mkAssign BitXor + | OrAssign => mkAssign Or + | AndAssign => mkAssign And + | LshiftAssign => mkAssign Lshift + | RshiftAssign => mkAssign Rshift + | _ => mkBinop binop + end + in {ast=simplifyExtDecls edecls, escapetab = esctab} + end +end diff --git a/ckit/src/ast-utils/simplifier/sources.cm b/ckit/src/ast-utils/simplifier/sources.cm new file mode 100644 index 0000000..3b9ae81 --- /dev/null +++ b/ckit/src/ast-utils/simplifier/sources.cm @@ -0,0 +1,17 @@ +Group is + + ../../ast/sources.cm + + ../copy/copy-ast-sig.sml + ../copy/copy-ast.sml + + simplify-ast-sig.sml + simplify-ast.sml + +#if (defined(d)) + extensions/d/eq-ast-ext.sml +#else + extensions/c/copy-ast-ext.sml + extensions/c/simplify-ast-ext.sml + +#endif diff --git a/ckit/src/ast/.cm/GUID/aid.sml b/ckit/src/ast/.cm/GUID/aid.sml new file mode 100644 index 0000000..5d1e752 --- /dev/null +++ b/ckit/src/ast/.cm/GUID/aid.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):aid.sml-1714016103.759 diff --git a/ckit/src/ast/.cm/GUID/aidtab.sml b/ckit/src/ast/.cm/GUID/aidtab.sml new file mode 100644 index 0000000..7a6a38e --- /dev/null +++ b/ckit/src/ast/.cm/GUID/aidtab.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):aidtab.sml-1714016103.873 diff --git a/ckit/src/ast/.cm/GUID/anonymous-structs.sml b/ckit/src/ast/.cm/GUID/anonymous-structs.sml new file mode 100644 index 0000000..2529e9e --- /dev/null +++ b/ckit/src/ast/.cm/GUID/anonymous-structs.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):anonymous-structs.sml-1714016107.795 diff --git a/ckit/src/ast/.cm/GUID/ast-sig.sml b/ckit/src/ast/.cm/GUID/ast-sig.sml new file mode 100644 index 0000000..72b8af2 --- /dev/null +++ b/ckit/src/ast/.cm/GUID/ast-sig.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):ast-sig.sml-1714016103.809 diff --git a/ckit/src/ast/.cm/GUID/ast.sml b/ckit/src/ast/.cm/GUID/ast.sml new file mode 100644 index 0000000..acd9729 --- /dev/null +++ b/ckit/src/ast/.cm/GUID/ast.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):ast.sml-1714016103.837 diff --git a/ckit/src/ast/.cm/GUID/bindings.sml b/ckit/src/ast/.cm/GUID/bindings.sml new file mode 100644 index 0000000..22a2817 --- /dev/null +++ b/ckit/src/ast/.cm/GUID/bindings.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):bindings.sml-1714016103.847 diff --git a/ckit/src/ast/.cm/GUID/build-ast-sig.sml b/ckit/src/ast/.cm/GUID/build-ast-sig.sml new file mode 100644 index 0000000..bdc18a2 --- /dev/null +++ b/ckit/src/ast/.cm/GUID/build-ast-sig.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):build-ast-sig.sml-1714016107.949 diff --git a/ckit/src/ast/.cm/GUID/build-ast.sml b/ckit/src/ast/.cm/GUID/build-ast.sml new file mode 100644 index 0000000..f71f68e --- /dev/null +++ b/ckit/src/ast/.cm/GUID/build-ast.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):build-ast.sml-1714016107.957 diff --git a/ckit/src/ast/.cm/GUID/cnv-ext-sig.sml b/ckit/src/ast/.cm/GUID/cnv-ext-sig.sml new file mode 100644 index 0000000..e959ecc --- /dev/null +++ b/ckit/src/ast/.cm/GUID/cnv-ext-sig.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):cnv-ext-sig.sml-1714016107.836 diff --git a/ckit/src/ast/.cm/GUID/ctype-eq.sml b/ckit/src/ast/.cm/GUID/ctype-eq.sml new file mode 100644 index 0000000..4973839 --- /dev/null +++ b/ckit/src/ast/.cm/GUID/ctype-eq.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):ctype-eq.sml-1714016107.764 diff --git a/ckit/src/ast/.cm/GUID/initializer-normalizer-sig.sml b/ckit/src/ast/.cm/GUID/initializer-normalizer-sig.sml new file mode 100644 index 0000000..0fe8717 --- /dev/null +++ b/ckit/src/ast/.cm/GUID/initializer-normalizer-sig.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):initializer-normalizer-sig.sml-1714016107.903 diff --git a/ckit/src/ast/.cm/GUID/initializer-normalizer.sml b/ckit/src/ast/.cm/GUID/initializer-normalizer.sml new file mode 100644 index 0000000..b0cecef --- /dev/null +++ b/ckit/src/ast/.cm/GUID/initializer-normalizer.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):initializer-normalizer.sml-1714016107.906 diff --git a/ckit/src/ast/.cm/GUID/parse-to-ast-sig.sml b/ckit/src/ast/.cm/GUID/parse-to-ast-sig.sml new file mode 100644 index 0000000..b0ccf82 --- /dev/null +++ b/ckit/src/ast/.cm/GUID/parse-to-ast-sig.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):parse-to-ast-sig.sml-1714016109.268 diff --git a/ckit/src/ast/.cm/GUID/parse-to-ast.sml b/ckit/src/ast/.cm/GUID/parse-to-ast.sml new file mode 100644 index 0000000..77ac626 --- /dev/null +++ b/ckit/src/ast/.cm/GUID/parse-to-ast.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):parse-to-ast.sml-1714016109.274 diff --git a/ckit/src/ast/.cm/GUID/pid.sml b/ckit/src/ast/.cm/GUID/pid.sml new file mode 100644 index 0000000..ede288a --- /dev/null +++ b/ckit/src/ast/.cm/GUID/pid.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):pid.sml-1714016103.730 diff --git a/ckit/src/ast/.cm/GUID/simplify-assign-ops.sml b/ckit/src/ast/.cm/GUID/simplify-assign-ops.sml new file mode 100644 index 0000000..c819623 --- /dev/null +++ b/ckit/src/ast/.cm/GUID/simplify-assign-ops.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):simplify-assign-ops.sml-1714016107.862 diff --git a/ckit/src/ast/.cm/GUID/sizeof-sig.sml b/ckit/src/ast/.cm/GUID/sizeof-sig.sml new file mode 100644 index 0000000..7f01486 --- /dev/null +++ b/ckit/src/ast/.cm/GUID/sizeof-sig.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):sizeof-sig.sml-1714016104.770 diff --git a/ckit/src/ast/.cm/GUID/sizeof.sml b/ckit/src/ast/.cm/GUID/sizeof.sml new file mode 100644 index 0000000..1dbda19 --- /dev/null +++ b/ckit/src/ast/.cm/GUID/sizeof.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):sizeof.sml-1714016104.776 diff --git a/ckit/src/ast/.cm/GUID/sizes-sig.sml b/ckit/src/ast/.cm/GUID/sizes-sig.sml new file mode 100644 index 0000000..9674b23 --- /dev/null +++ b/ckit/src/ast/.cm/GUID/sizes-sig.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):sizes-sig.sml-1714016104.414 diff --git a/ckit/src/ast/.cm/GUID/sizes.sml b/ckit/src/ast/.cm/GUID/sizes.sml new file mode 100644 index 0000000..1cdd345 --- /dev/null +++ b/ckit/src/ast/.cm/GUID/sizes.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):sizes.sml-1714016104.417 diff --git a/ckit/src/ast/.cm/GUID/state-sig.sml b/ckit/src/ast/.cm/GUID/state-sig.sml new file mode 100644 index 0000000..9f6b5da --- /dev/null +++ b/ckit/src/ast/.cm/GUID/state-sig.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):state-sig.sml-1714016107.646 diff --git a/ckit/src/ast/.cm/GUID/state.sml b/ckit/src/ast/.cm/GUID/state.sml new file mode 100644 index 0000000..4bede19 --- /dev/null +++ b/ckit/src/ast/.cm/GUID/state.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):state.sml-1714016107.665 diff --git a/ckit/src/ast/.cm/GUID/symbol-sig.sml b/ckit/src/ast/.cm/GUID/symbol-sig.sml new file mode 100644 index 0000000..361f4b4 --- /dev/null +++ b/ckit/src/ast/.cm/GUID/symbol-sig.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):symbol-sig.sml-1714016103.741 diff --git a/ckit/src/ast/.cm/GUID/symbol.sml b/ckit/src/ast/.cm/GUID/symbol.sml new file mode 100644 index 0000000..1b4019e --- /dev/null +++ b/ckit/src/ast/.cm/GUID/symbol.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):symbol.sml-1714016103.745 diff --git a/ckit/src/ast/.cm/GUID/tables.sml b/ckit/src/ast/.cm/GUID/tables.sml new file mode 100644 index 0000000..c3bf35d --- /dev/null +++ b/ckit/src/ast/.cm/GUID/tables.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):tables.sml-1714016103.878 diff --git a/ckit/src/ast/.cm/GUID/tid.sml b/ckit/src/ast/.cm/GUID/tid.sml new file mode 100644 index 0000000..f5ae51d --- /dev/null +++ b/ckit/src/ast/.cm/GUID/tid.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):tid.sml-1714016103.737 diff --git a/ckit/src/ast/.cm/GUID/tidtab.sml b/ckit/src/ast/.cm/GUID/tidtab.sml new file mode 100644 index 0000000..12632f0 --- /dev/null +++ b/ckit/src/ast/.cm/GUID/tidtab.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):tidtab.sml-1714016103.868 diff --git a/ckit/src/ast/.cm/GUID/type-util-sig.sml b/ckit/src/ast/.cm/GUID/type-util-sig.sml new file mode 100644 index 0000000..82d4253 --- /dev/null +++ b/ckit/src/ast/.cm/GUID/type-util-sig.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):type-util-sig.sml-1714016104.422 diff --git a/ckit/src/ast/.cm/GUID/type-util.sml b/ckit/src/ast/.cm/GUID/type-util.sml new file mode 100644 index 0000000..79734e9 --- /dev/null +++ b/ckit/src/ast/.cm/GUID/type-util.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):type-util.sml-1714016104.428 diff --git a/ckit/src/ast/.cm/GUID/uid-fn.sml b/ckit/src/ast/.cm/GUID/uid-fn.sml new file mode 100644 index 0000000..57f47ff --- /dev/null +++ b/ckit/src/ast/.cm/GUID/uid-fn.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):uid-fn.sml-1714016103.722 diff --git a/ckit/src/ast/.cm/GUID/uid-sig.sml b/ckit/src/ast/.cm/GUID/uid-sig.sml new file mode 100644 index 0000000..874dc69 --- /dev/null +++ b/ckit/src/ast/.cm/GUID/uid-sig.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):uid-sig.sml-1714016103.718 diff --git a/ckit/src/ast/.cm/GUID/uidtabimp-fn.sml b/ckit/src/ast/.cm/GUID/uidtabimp-fn.sml new file mode 100644 index 0000000..69a1495 --- /dev/null +++ b/ckit/src/ast/.cm/GUID/uidtabimp-fn.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):uidtabimp-fn.sml-1714016103.855 diff --git a/ckit/src/ast/.cm/GUID/uidtabimp-sig.sml b/ckit/src/ast/.cm/GUID/uidtabimp-sig.sml new file mode 100644 index 0000000..8788b73 --- /dev/null +++ b/ckit/src/ast/.cm/GUID/uidtabimp-sig.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):uidtabimp-sig.sml-1714016103.852 diff --git a/ckit/src/ast/.cm/SKEL/aid.sml b/ckit/src/ast/.cm/SKEL/aid.sml new file mode 100644 index 0000000..d0305e4 --- /dev/null +++ b/ckit/src/ast/.cm/SKEL/aid.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ad"Aid"jjh0gp1e"UidFn"gp1c"UID" \ No newline at end of file diff --git a/ckit/src/ast/.cm/SKEL/aidtab.sml b/ckit/src/ast/.cm/SKEL/aidtab.sml new file mode 100644 index 0000000..b34d269 --- /dev/null +++ b/ckit/src/ast/.cm/SKEL/aidtab.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ad"Aidtab"jh1ad"Uid"gp1d"Aid"gp1e"UidtabImpFn" \ No newline at end of file diff --git a/ckit/src/ast/.cm/SKEL/anonymous-structs.sml b/ckit/src/ast/.cm/SKEL/anonymous-structs.sml new file mode 100644 index 0000000..f299a69 --- /dev/null +++ b/ckit/src/ast/.cm/SKEL/anonymous-structs.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d3f2ParseTree"d"Tid"ad"TyEq"h1begp1f1d"Real"ad"AnonymousStructs"h0 \ No newline at end of file diff --git a/ckit/src/ast/.cm/SKEL/ast-sig.sml b/ckit/src/ast/.cm/SKEL/ast-sig.sml new file mode 100644 index 0000000..1d7255b --- /dev/null +++ b/ckit/src/ast/.cm/SKEL/ast-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f7d"Pid"d"AstExt"Cd"SourceMap"d"Symbol"d"LargeInt"d"Tid"d"Aid"Nac"AST"h0 \ No newline at end of file diff --git a/ckit/src/ast/.cm/SKEL/ast.sml b/ckit/src/ast/.cm/SKEL/ast.sml new file mode 100644 index 0000000..8b9849a --- /dev/null +++ b/ckit/src/ast/.cm/SKEL/ast.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f7d"Pid"d"AstExt"Cd"SourceMap"d"Symbol"d"LargeInt"d"Tid"d"Aid"Nad"Ast"jh0gp1c"AST" \ No newline at end of file diff --git a/ckit/src/ast/.cm/SKEL/bindings.sml b/ckit/src/ast/.cm/SKEL/bindings.sml new file mode 100644 index 0000000..c9ae993 --- /dev/null +++ b/ckit/src/ast/.cm/SKEL/bindings.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f6d"Pid"Cd"SourceMap"d"Symbol"d"LargeInt"d"Tid"d"Ast"Nad"Bindings"h0 \ No newline at end of file diff --git a/ckit/src/ast/.cm/SKEL/build-ast-sig.sml b/ckit/src/ast/.cm/SKEL/build-ast-sig.sml new file mode 100644 index 0000000..f5dafd1 --- /dev/null +++ b/ckit/src/ast/.cm/SKEL/build-ast-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f8d"ParseTree"d"Tables"d"State"Cd"Bindings"d"Sizes"d"Tidtab"d"Error"d"Ast"Nac"BUILD_AST"h0 \ No newline at end of file diff --git a/ckit/src/ast/.cm/SKEL/build-ast.sml b/ckit/src/ast/.cm/SKEL/build-ast.sml new file mode 100644 index 0000000..8ee8ad8 --- /dev/null +++ b/ckit/src/ast/.cm/SKEL/build-ast.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f9d"Tables"State"List"Bindings"CTypeUtil"Tidtab"SourceMap"d"String"Ast"Nad"BuildAst"jh6aSM"gp1 true | _ => false) + | eqExpr(String i, String j) = (i=j) + | eqExpr(Id i, Id j) = (i = j) + | eqExpr(Unop(expOp, expr), Unop(expOp', expr')) = + eqExpOp(expOp,expOp') andalso eqExpr(expr, expr') + | eqExpr(Binop(expOp, expr1, expr2), Binop(expOp', expr1', expr2')) = + eqExpOp(expOp,expOp') andalso eqExpr(expr1, expr1') andalso eqExpr(expr2, expr2') + | eqExpr(QuestionColon(expr1, expr2, expr3), + QuestionColon(expr1', expr2', expr3')) = + eqExpr(expr1, expr1') andalso eqExpr(expr2, expr2') + andalso eqExpr(expr3, expr3') + | eqExpr(Call(expr1, exprl), Call(expr1', exprl')) = + eqExpr(expr1, expr1') andalso (eqList eqExpr (exprl, exprl')) + | eqExpr(Cast(ctype, expr), Cast(ctype', expr')) = eqExpr(expr, expr') + | eqExpr(InitList exprl, InitList exprl') = eqList eqExpr (exprl, exprl') + | eqExpr(ExprExt _, ExprExt _) = false + | eqExpr(_, _) = false + + (* dpo: some small changes to get eqType type correct but is the equality correct? *) + fun eqTy({qualifiers=[], specifiers=[Enum{tagOpt=sOpt, enumerators=sel, ...}]} + ,{qualifiers=[], specifiers=[Enum{tagOpt=sOpt',enumerators=sel', ...}]}) = + sOpt = sOpt' andalso eqList (eqPair (eqString, eqExpr)) (sel, sel') + | eqTy({qualifiers=[], specifiers=[Struct{isStruct=b, tagOpt=sOpt, members=cdell}]} + ,{qualifiers=[], specifiers=[Struct{isStruct=b',tagOpt=sOpt',members=cdell'}]}) = + (b = b') andalso sOpt = sOpt' andalso + eqList + (eqPair (eqCtype, eqList(eqPair(eqDeclarator, eqExpr)))) + (cdell, cdell') + | eqTy(_, _) = false + + end (* local *) + +end (* structure TyEq *) + + +structure AnonymousStructs = +struct + + (* ------------------------------------------------------------ + Resolving Anonymous Structs (for inter-file analysis) + The problem: need to resolve structurally equiv anonymous structs in + different files to same tid. + ------------------------------------------------------------ + *) + + val anonymousStructsEnumsList = ref (nil : (ParseTree.ctype * Tid.uid) list) + fun resetAnonymousStructsEnumsList () = (anonymousStructsEnumsList := nil) + + fun findAnonStructEnum ty = + let fun finder((ty', tid) :: l) = + if TyEq.eqTy(ty, ty') + then (SOME tid) + (* debugging code: + print ("recovered anon struct with tid " ^ (Tid.toString tid) + ^ "\n"); + (case ty of + ParseTree.Enum _ => print "Enum\n" + | ParseTree.Struct(_, _, (_, (dec, e) :: _) :: _) => + (case dec of + ParseTree.Name name => print("Struct " ^ name ^ ".. \n") + | _ => print("Struct ? .. \n")) + | _ => print "Something else ..\n"); *) + else finder l + | finder nil = NONE + in finder (!anonymousStructsEnumsList) + end + + fun addAnonTid (ty, tid) = + let val l = (ty, tid) :: (!anonymousStructsEnumsList) + in anonymousStructsEnumsList := l + end + +end (* structure AnonymousStructs *) diff --git a/ckit/src/ast/ast-sig.sml b/ckit/src/ast/ast-sig.sml new file mode 100644 index 0000000..29f5eb2 --- /dev/null +++ b/ckit/src/ast/ast-sig.sml @@ -0,0 +1,197 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +signature AST = +sig + + type pid = Pid.uid + type aid = Aid.uid + type tid = Tid.uid + + (* TYPES: preliminary definitions *) + datatype storageClass = AUTO | EXTERN | REGISTER | STATIC | DEFAULT + + datatype qualifier = CONST | VOLATILE + + datatype signedness = SIGNED | UNSIGNED + + (* signednessTag determines whether a type was declared signed or unsigned, or + * the type was assumed to be one or the other. *) + datatype signednessTag = SIGNDECLARED | SIGNASSUMED + + datatype intKind = CHAR | SHORT | INT | LONG | LONGLONG | FLOAT | DOUBLE | LONGDOUBLE + + (* BEGIN D *) + datatype fractionality = FRACTIONAL | WHOLENUM (* FRACTIONAL dominates WHOLENUM *) + + datatype saturatedness = SATURATE | NONSATURATE (* SATURATE dominates NONSATURATE *) + (* END D *) + + (* note: definition of ctype appears later, is in the mutual recursive clump. *) + + (* IDENTIFIERS: preliminary definitions *) + + (* labels *) + type label = + {name: Symbol.symbol, (* the name of the label *) + uid : Pid.uid, (* unique identifier *) + location : SourceMap.location} + + datatype declStatus + = IMPLICIT (* used, but not yet declared or defined *) + | DECLARED (* declared, but not yet defined *) + | DEFINED (* defined, i.e. there is a FunctionDef or + * initializer for this identifier *) + + (* identifiers - we call these "id"s *) + datatype idKind + = NONFUN (* is not of functional type *) + | FUNCTION of (* is of functional type *) + {hasFunctionDef: bool} (* was defined by a FunctionDef *) + + (* OPERATORS *) + datatype binop + = Plus | Minus | Times | Divide | Mod + | Gt | Lt | Gte | Lte | Eq | Neq | And | Or + | BitOr | BitAnd | BitXor | Lshift | Rshift + | PlusAssign | MinusAssign | TimesAssign | DivAssign + | ModAssign | XorAssign | OrAssign | AndAssign + | LshiftAssign | RshiftAssign | BinopExt of AstExt.binopExt + + datatype unop + = Uplus | Not | Negate | BitNot + | PreInc | PostInc | PreDec | PostDec | UnopExt of AstExt.unopExt + + + (* DECLARATIONS *) + + datatype declaration + = TypeDecl of {shadow: {strct:bool} option, tid:tid} + (* placeholder to indicate where typedefs/enums/structs should be printed *) + | VarDecl of id * initExpression option + + + (* STATEMENTS *) + + and statement = STMT of coreStatement * aid * SourceMap.location + + and coreStatement + = Expr of expression option + | Compound of declaration list * statement list + | While of expression * statement + | Do of expression * statement + | For of expression option * expression option * expression option * statement + | Labeled of label * statement + | CaseLabel of LargeInt.int * statement + | DefaultLabel of statement + | Goto of label + | Break + | Continue + | Return of expression option + | IfThen of expression * statement + | IfThenElse of expression * statement * statement + | Switch of expression * statement + | StatExt of (expression, statement, binop, unop) AstExt.statementExt + | ErrorStmt + + (* EXPRESSIONS *) + + and expression = EXPR of coreExpression * aid * SourceMap.location + + and coreExpression + = IntConst of LargeInt.int + | RealConst of real + | StringConst of string + | Call of expression * expression list + | QuestionColon of expression * expression * expression + | Assign of expression * expression + | Comma of expression * expression + | Sub of expression * expression + | Member of expression * member + | Arrow of expression * member + | Deref of expression + | AddrOf of expression + | Binop of binop * expression * expression + | Unop of unop * expression + | Cast of ctype * expression + | Id of id + | EnumId of member * LargeInt.int + | SizeOf of ctype (* not used in compiler mode; sizeof expr becomes sizeof (typeof expr) *) + | ExprExt of (expression, statement, binop, unop) AstExt.expressionExt + | ErrorExpr + + and initExpression + = Simple of expression + | Aggregate of initExpression list + + and ctype + = Void + | Ellipses + | Qual of qualifier * ctype + | Numeric of (* D *) saturatedness * (* D *) fractionality * signedness * intKind + * signednessTag + | Array of (LargeInt.int * expression) option * ctype + | Pointer of ctype + | Function of ctype * (ctype * id option) list + | StructRef of tid (* reference to a tid bound by a struct decl *) + | UnionRef of tid (* reference to a tid bound by a union decl *) + | EnumRef of tid (* reference to a tid bound by a enumeration decl *) + | TypeRef of tid (* reference to a tid bound by a typedef decl *) + | Error + + (* INVARIANT: whenever the Error ctype is introduced, an error message will be printed. + * Thus any downstream code processing the Error value does not need to (and should not) + * generate additional error messages. *) + + (* MEMBERS AND IDENTIFIERS *) + + (* Members in structs and unions. Also used for named constants in + * enumerations; the ISO Standard calls these "members". *) + and memberKind + = STRUCTmem + | UNIONmem + | ENUMmem of LargeInt.int + + withtype member = + {name: Symbol.symbol, (* the name of the member *) + uid : Pid.uid, (* unique identifier *) + location : SourceMap.location, + ctype: ctype, (* member type *) + kind: memberKind} (* member kind: struct, union, or enum *) + + and id = + {name: Symbol.symbol, + uid: Pid.uid, (* unique identifier *) + location: SourceMap.location, + ctype: ctype, (* associated type *) + stClass: storageClass, + status: declStatus, + global: bool, (* defined at top level *) + kind: idKind} + + (* the common fields of id and member could be factored out, but + * this would increase space usage and access time, and require + * nested patterns when accessing the common fields. E.g.: + + type id = + {base : basicId, + stClass: storageClass option, + kind: idKind} + *) + + + (* top-level program elements *) + datatype coreExternalDecl + = ExternalDecl of declaration + | FunctionDef of id * id list * statement + | ExternalDeclExt of (expression, statement, binop, unop) AstExt.externalDeclExt + + (* marked and (potentially) annotated external declarations *) + datatype externalDecl = DECL of coreExternalDecl * aid * SourceMap.location + + (* PROGRAMS *) + (* abstract syntax of "programs", i.e. the result of processing a source file + * also known as a "translation unit" *) + type ast = externalDecl list + + +end (* signature AST *) diff --git a/ckit/src/ast/ast.sml b/ckit/src/ast/ast.sml new file mode 100644 index 0000000..bb0e8b4 --- /dev/null +++ b/ckit/src/ast/ast.sml @@ -0,0 +1,197 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +structure Ast : AST = +struct + + type pid = Pid.uid + type aid = Aid.uid + type tid = Tid.uid + + (* TYPES: preliminary definitions *) + datatype storageClass = AUTO | EXTERN | REGISTER | STATIC | DEFAULT + + datatype qualifier = CONST | VOLATILE + + datatype signedness = SIGNED | UNSIGNED + + (* signednessTag determines whether a type was declared signed or unsigned, or + * the type was assumed to be one or the other. *) + datatype signednessTag = SIGNDECLARED | SIGNASSUMED + + datatype intKind = CHAR | SHORT | INT | LONG | LONGLONG | FLOAT | DOUBLE | LONGDOUBLE + + (* BEGIN D *) + datatype fractionality = FRACTIONAL | WHOLENUM (* FRACTIONAL dominates WHOLENUM *) + + datatype saturatedness = SATURATE | NONSATURATE (* SATURATE dominates NONSATURATE *) + (* END D *) + + (* note: definition of ctype appears later, is in the mutual recursive clump. *) + + (* IDENTIFIERS: preliminary definitions *) + + (* labels *) + type label = + {name: Symbol.symbol, (* the name of the label *) + uid : Pid.uid, (* unique identifier *) + location : SourceMap.location} + + datatype declStatus + = IMPLICIT (* used, but not yet declared or defined *) + | DECLARED (* declared, but not yet defined *) + | DEFINED (* defined, i.e. there is a FunctionDef or + * initializer for this identifier *) + + (* identifiers - we call these "id"s *) + datatype idKind + = NONFUN (* is not of functional type *) + | FUNCTION of (* is of functional type *) + {hasFunctionDef: bool} (* was defined by a FunctionDef *) + + (* OPERATORS *) + datatype binop + = Plus | Minus | Times | Divide | Mod + | Gt | Lt | Gte | Lte | Eq | Neq | And | Or + | BitOr | BitAnd | BitXor | Lshift | Rshift + | PlusAssign | MinusAssign | TimesAssign | DivAssign + | ModAssign | XorAssign | OrAssign | AndAssign + | LshiftAssign | RshiftAssign | BinopExt of AstExt.binopExt + + datatype unop + = Uplus | Not | Negate | BitNot + | PreInc | PostInc | PreDec | PostDec | UnopExt of AstExt.unopExt + + + (* DECLARATIONS *) + + datatype declaration + = TypeDecl of {shadow: {strct:bool} option, tid:tid} + (* placeholder to indicate where typedefs/enums/structs should be printed *) + | VarDecl of id * initExpression option + + + (* STATEMENTS *) + + and statement = STMT of coreStatement * aid * SourceMap.location + + and coreStatement + = Expr of expression option + | Compound of declaration list * statement list + | While of expression * statement + | Do of expression * statement + | For of expression option * expression option * expression option * statement + | Labeled of label * statement + | CaseLabel of LargeInt.int * statement + | DefaultLabel of statement + | Goto of label + | Break + | Continue + | Return of expression option + | IfThen of expression * statement + | IfThenElse of expression * statement * statement + | Switch of expression * statement + | StatExt of (expression, statement, binop, unop) AstExt.statementExt + | ErrorStmt + + (* EXPRESSIONS *) + + and expression = EXPR of coreExpression * aid * SourceMap.location + + and coreExpression + = IntConst of LargeInt.int + | RealConst of real + | StringConst of string + | Call of expression * expression list + | QuestionColon of expression * expression * expression + | Assign of expression * expression + | Comma of expression * expression + | Sub of expression * expression + | Member of expression * member + | Arrow of expression * member + | Deref of expression + | AddrOf of expression + | Binop of binop * expression * expression + | Unop of unop * expression + | Cast of ctype * expression + | Id of id + | EnumId of member * LargeInt.int + | SizeOf of ctype (* not used in compiler mode; sizeof expr becomes sizeof (typeof expr) *) + | ExprExt of (expression, statement, binop, unop) AstExt.expressionExt + | ErrorExpr + + and initExpression + = Simple of expression + | Aggregate of initExpression list + + and ctype + = Void + | Ellipses + | Qual of qualifier * ctype + | Numeric of (* D *) saturatedness * (* D *) fractionality * signedness * intKind + * signednessTag + | Array of (LargeInt.int * expression) option * ctype + | Pointer of ctype + | Function of ctype * (ctype * id option) list + | StructRef of tid (* reference to a tid bound by a struct decl *) + | UnionRef of tid (* reference to a tid bound by a union decl *) + | EnumRef of tid (* reference to a tid bound by a enumeration decl *) + | TypeRef of tid (* reference to a tid bound by a typedef decl *) + | Error + + (* INVARIANT: whenever the Error ctype is introduced, an error message will be printed. + * Thus any downstream code processing the Error value does not need to (and should not) + * generate additional error messages. *) + + (* MEMBERS AND IDENTIFIERS *) + + (* Members in structs and unions. Also used for named constants in + * enumerations; the ISO Standard calls these "members". *) + and memberKind + = STRUCTmem + | UNIONmem + | ENUMmem of LargeInt.int + + withtype member = + {name: Symbol.symbol, (* the name of the member *) + uid : Pid.uid, (* unique identifier *) + location : SourceMap.location, + ctype: ctype, + kind: memberKind} (* member type *) + + and id = + {name: Symbol.symbol, + uid: Pid.uid, (* unique identifier *) + location: SourceMap.location, + ctype: ctype, (* associated type *) + stClass: storageClass, + status: declStatus, + global: bool, (* defined at top level *) + kind: idKind} + + (* the common fields of id and member could be factored out, but + * this would increase space usage and access time, and require + * nested patterns when accessing the common fields. E.g.: + + type id = + {base : basicId, + stClass: storageClass option, + kind: idKind} + *) + + + (* top-level program elements *) + datatype coreExternalDecl + = ExternalDecl of declaration + | FunctionDef of id * id list * statement + | ExternalDeclExt of (expression, statement, binop, unop) AstExt.externalDeclExt + + (* marked and (potentially) annotated external declarations *) + datatype externalDecl = DECL of coreExternalDecl * aid * SourceMap.location + + (* PROGRAMS *) + (* abstract syntax of "programs", i.e. the result of processing a source file + * also known as a "translation unit" *) + type ast = externalDecl list + + +end (* structure Ast *) diff --git a/ckit/src/ast/bindings.sml b/ckit/src/ast/bindings.sml new file mode 100644 index 0000000..bda1a3f --- /dev/null +++ b/ckit/src/ast/bindings.sml @@ -0,0 +1,48 @@ +(* Copyright (c) 1999 by Lucent Technologies *) +(* bindings.sml *) + +(* types the bindings of program identifiers, including types, + * labels, objects (variables and function names) in environments, + * i.e. tidtabs and symbol tables *) + +(* the old pidInfo corresponds to the identifier types now in Ast, + * and the old symInfo to symBinding *) + +structure Bindings = +struct + + datatype namedCtype + = Struct of + Tid.uid * (Ast.ctype * Ast.member option * LargeInt.int option) list + (* pid is optional because of anonymous bit fields *) + | Union of Tid.uid * (Ast.ctype * Ast.member) list + (* pid is mandatory for unions *) + | Enum of Tid.uid * (Ast.member * LargeInt.int) list + | Typedef of Tid.uid * Ast.ctype + + (* type info contained in tidtabs bindings *) + (* name = NONE for anonymous structs, unions, enums -- can't refer to it *) + (* ntype = NONE means is a "partial" type -- has been used, but not defined *) + type tidBinding = + {name: string option, + ntype: namedCtype option, + global: bool, (* is it a top level definition? *) + location: SourceMap.location} + + (* info used in environment symbol tables *) + + (* coincidentally the same as Ast.member *) + type typeIdInfo = + {name: Symbol.symbol, + uid : Pid.uid, (* unique identifier *) + location : SourceMap.location, + ctype: Ast.ctype} + + (* type of bindings in symbol tables *) + datatype symBinding + = MEMBER of Ast.member + | ID of Ast.id (* objects and functions *) + | TYPEDEF of typeIdInfo + | TAG of typeIdInfo + +end (* structure Bindings *) diff --git a/ckit/src/ast/build-ast-sig.sml b/ckit/src/ast/build-ast-sig.sml new file mode 100644 index 0000000..0a35d8a --- /dev/null +++ b/ckit/src/ast/build-ast-sig.sml @@ -0,0 +1,44 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +signature BUILD_AST = +sig + + (* information returned by makeAst *) + type astBundle = + {ast: Ast.ast, + tidtab: Bindings.tidBinding Tidtab.uidtab, + errorCount: int, + warningCount: int, + auxiliaryInfo: {aidtab: Tables.aidtab, + implicits: Tables.aidtab, + env: State.symtab}} + + (* control of buildAst modes *) + val insert_explicit_coersions : bool ref + (* insert explicit casts at points where there are implicit type conversions? *) + val insert_scaling : bool ref + (* insert scaling computations at pointer arithmetic? *) + val reduce_sizeof : bool ref + (* replace sizeof expressions by integer constants? *) + val reduce_assign_ops : bool ref + (* replace assignops by simple ops and assignments? *) + val multi_file_mode : bool ref + (* analysis mode -- allow repeated definitions? *) + val local_externs_ok: bool ref + (* local declarations involving EXTERN are ok (usually false) *) + val default_signed_char: bool ref + (* is the type "char" implicitly regarded as signed? *) + + val multiFileMode: unit -> unit (* was called analysis mode *) + val compilerMode: unit -> unit + val sourceToSourceMode: unit -> unit + + (* convert a parse tree to an ast and associated map from expression + * adornments to types + *) + val makeAst : + Sizes.sizes * State.stateInfo * Error.errorState + -> ParseTree.externalDecl list + -> astBundle + +end (* signature BUILD_AST *) diff --git a/ckit/src/ast/build-ast.sml b/ckit/src/ast/build-ast.sml new file mode 100644 index 0000000..9e8e7c7 --- /dev/null +++ b/ckit/src/ast/build-ast.sml @@ -0,0 +1,3105 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +(* buildast.sml + * + * Input: a parser tree + * + * Output: a type checked abstract syntax tree, a map from + * expression adornments to types, and mappings from + * variables (uids) to types and type ids (uids) to types. + * + * AUTHORS: Michael Siff (siff@cs.wisc.edu) + * Satish Chandra (chandra@research.bell-labs.com) + * Nevin Heintze (nch@research.bell-labs.com) + * Dino Oliva (oliva@research.bell-labs.com) + * Dave MacQueen (dbm@research.bell-labs.com) + * + * TBD: + * - needs to be tested for robustness + * (particularly type table and expression-type map) + * - add casts to constant expr evaluator + *) + +(* Type checking: minor checks not implemented: + 3. no pointer or arrays of bitfields: most compiler (and lint) don't implement this. + 5. only storage-class specifier in a parameter declaration is register. + *) + + +(* Notes: Treatment of function pointers. + In C, the types Function(...) and Pointer(Function(...)) + are almost interchangeable. If f is a function, then + it can be called using ( *f )(args); if x is a function pointer, + then the function it points to can be called using x(args) + (Dennis R. says this was introduced by the pcc compiler, and then adopted by ANSI.) + The auto-promotion of Function(...) and Pointer(Function(...)) has some + strange consequences: ( ******f ) is just f. + + We deal with this as follows: + 1. all expressions of type Function(...) are immediately + promoted to type Pointer(Function(...)) + 2. exceptions to (1) involving sizeof and & + are handled as special cases in the code for unary operations. + 3. derefs of expressions of type Pointer(Function(...)) are eliminated. + 4. & of functions are eliminated. + 5. function parameters of type Function(...) are promoted to Pointer(Function(...)). +*) + +(* Changes to make sometime around April 1st, 99 + 2. get rid of redundancy relating to topLevel/global (i.e. remove topLevel param) + - once it's been tested. +*) + +structure BuildAst : BUILD_AST = +struct + + type astBundle = + {ast: Ast.ast, + tidtab: Bindings.tidBinding Tidtab.uidtab, + errorCount: int, + warningCount: int, + auxiliaryInfo: {aidtab: Tables.aidtab, + implicits: Tables.aidtab, + env: State.symtab}} + + (* imported structures w/abbreviations *) + (* ----------------------------------- *) + structure SM = SourceMap + + structure Aid = Aid + structure Tid = Tid + structure Pid = Pid + + structure PT = ParseTree + structure Sym = Symbol + structure B = Bindings + structure PPL = PPLib + structure S = State + structure W = Word + structure TU = TypeUtil + structure TT = Tidtab + structure AT = Aidtab + structure TypeCheckControl = Config.TypeCheckControl + + (* local structures *) + (* ---------------- *) + (* DBM: an inefficient version of string binary map *) + structure IdMap = BinaryMapFn (struct + type ord_key = string + val compare = String.compare + end) + + (* abstract syntax of translation unit in context *) + type astBundle = + {ast: Ast.ast, + tidtab: Bindings.tidBinding Tidtab.uidtab, + errorCount: int, + warningCount: int, + auxiliaryInfo: {aidtab: Tables.aidtab, + implicits: Tables.aidtab, + env: State.symtab}} + + val insert_explicit_coersions = ref false + val insert_scaling = ref false + val reduce_sizeof = ref false + val reduce_assign_ops = ref false + val multi_file_mode = ref false + val local_externs_ok = ref true + val default_signed_char = ref false + + fun multiFileMode () = + (insert_explicit_coersions := false; + insert_scaling := false; + reduce_sizeof := false; + reduce_assign_ops := false; + multi_file_mode := true; + local_externs_ok := true) + + fun compilerMode () = + (insert_explicit_coersions := true; + insert_scaling := true; + reduce_sizeof := true; + reduce_assign_ops := true; + multi_file_mode := false; + local_externs_ok := true) + + fun sourceToSourceMode () = + (insert_explicit_coersions := false; + insert_scaling := false; + reduce_sizeof := false; + reduce_assign_ops := false; + multi_file_mode := false; + local_externs_ok := true) + + val _ = sourceToSourceMode() (* default is sourceToSource mode *) + + val perform_type_checking = TypeCheckControl.perform_type_checking + (* true = do type checking; false = disable type checking; + Note: with type checking off, there is still some + rudimentary type processing, but no + usual unary conversions, usual binary conversions, etc. *) + + val undeclared_id_error = TypeCheckControl.undeclared_id_error + (* In ANSI C, an undeclared id is an error; + in older versions of C, undeclared ids are assumed integer. + Default value: true (for ANSI behavior) *) + val convert_function_args_to_pointers = + TypeCheckControl.convert_function_args_to_pointers + (* In ANSI C, arguments of functions goverened by prototype + definitions that have type function or array are not + promoted to pointer type; however many compilers do this + promotion. + Default value: true (to get standard behavior) *) + val storage_size_check = TypeCheckControl.storage_size_check + (* Declarations and structure fields must have known storage + size; maybe you want to turn this check off? + Default value: true (to get ANSI behavior). *) + + val allow_non_constant_local_initializer_lists = TypeCheckControl.allow_non_constant_local_initializer_lists + (* Allow non constant local inializers for aggregates and unions. + e.g. int x, y, z; + int a[] = {x, y, z}; + This is allowed gcc *) + + val (repeated_declarations_ok, resolve_anonymous_structs) = + if !multi_file_mode then (true, true) else (false, false) + + fun debugPrBinding (name: string, binding: B.symBinding) = + (print ("symbol binding: " ^ name ^ + (case binding + of B.MEMBER _ => " MEMBER" + | B.TAG _ => " TAG" + | B.TYPEDEF _ => " TYPEDEF" + | B.ID _ => " ID") + ^ "\n")) + + + (* some auxiliary functions *) + (* ---------------------- *) + fun toId tid = ".anon" ^ (Tid.toString tid) + + fun dt2ct {qualifiers,specifiers,storage} = + {qualifiers=qualifiers,specifiers=specifiers} + + fun signedNum ik = Ast.Numeric (Ast.NONSATURATE,Ast.WHOLENUM,Ast.SIGNED,ik,Ast.SIGNASSUMED) + fun unsignedNum ik = Ast.Numeric (Ast.NONSATURATE,Ast.WHOLENUM,Ast.UNSIGNED,ik,Ast.SIGNASSUMED) + val stdInt = TypeUtil.stdInt + + fun getBindingLoc(B.MEMBER{location,...}) = location + | getBindingLoc(B.ID{location,...}) = location + | getBindingLoc(B.TYPEDEF{location,...}) = location + | getBindingLoc(B.TAG{location,...}) = location + + + val bogusTid = Tid.new() + val bogusUid = Pid.new() + fun bogusMember sym = + {name = sym, uid = Pid.new(), location = SourceMap.UNKNOWN, + ctype = Ast.Error, kind = Ast.STRUCTmem} (* dbm: is this kind ok? *) + + fun isZeroExp(Ast.EXPR (Ast.IntConst 0, _, _)) = true + | isZeroExp _ = false + + fun isZeroCoreExp(Ast.IntConst 0) = true + | isZeroCoreExp _ = false + + fun getCoreExpr(Ast.EXPR (expr, _, _)) = expr + + (* check if a parse-tree type is of the `tagged' variety - i.e. it + * refers to a (struct, union, or enum) type defined elsewhere *) + fun isTagTy ({specifiers,...}: PT.decltype) = + let fun sTest (PT.StructTag _) = true + | sTest (PT.EnumTag _) = true + | sTest _ = false + in List.exists sTest specifiers + end + +local open Bindings in +(* main function *) +fun makeAst (sizes: Sizes.sizes, stateInfo: S.stateInfo, + errorState : Error.errorState) = +let + + (* if there are any parse errors, then don't print any type-checking errors *) + val _ = if Error.errorCount errorState > 0 + then (Error.noMoreErrors errorState; Error.noMoreWarnings errorState) + else () + + val globalState as {uidTables={ttab,atab,implicits},...} = + S.initGlobal(stateInfo, errorState) + + val localState = S.initLocal () + + val stateFuns = S.stateFuns(globalState, localState) + + val {locFuns = + {pushLoc, popLoc, getLoc, error, warn}, + tidsFuns = + {pushTids, resetTids}, + tmpVarsFuns = + {pushTmpVars, resetTmpVars}, + envFuns = + {topLevel, pushLocalEnv, popLocalEnv, lookSym, bindSym, + lookSymGlobal, bindSymGlobal, lookLocalScope, getGlobalEnv}, + uidTabFuns = + {bindAid, lookAid=lookAid0, bindTid, lookTid}, + funFuns = + {newFunction, getReturnTy, checkLabels, addLabel, addGoto}, + switchFuns = + {pushSwitchLabels, popSwitchLabels, addSwitchLabel, addDefaultLabel}, + ...} + = stateFuns + + val bug = Error.bug errorState + fun convFunError s _ = + raise Fail("Fatal Bug: extension conversion function " ^ s ^ " not installed yet!") + + + (* refs for extension conversion functions *) + val refCNVExp = ref(convFunError "CNVExp" : CnvExt.expressionExt -> Ast.ctype * Ast.expression) + val refCNVStat = ref(convFunError "CNVStat": CnvExt.statementExt -> Ast.statement) + val refCNVBinop = ref(convFunError "CNVBinop": {binop: ParseTreeExt.operatorExt, arg1Expr: ParseTree.expression, + arg2Expr: ParseTree.expression} + -> Ast.ctype * Ast.expression) + val refCNVUnop = ref(convFunError "CNVUnop": {unop: ParseTreeExt.operatorExt, argExpr: ParseTree.expression} + -> Ast.ctype * Ast.expression) + val refCNVExternalDecl = ref(convFunError "CNVExternalDecl" : CnvExt.externalDeclExt -> Ast.externalDecl list) + val refCNVSpecifier = ref(convFunError "CNVSpecifier": {isShadow: bool, rest : ParseTree.specifier list} + -> CnvExt.specifierExt + -> Ast.ctype) + val refCNVDeclarator = ref(convFunError "CNVDeclarator": Ast.ctype * CnvExt.declaratorExt + -> Ast.ctype * string option) + val refCNVDeclaration = ref(convFunError "CNVDeclaration": CnvExt.declarationExt -> Ast.declaration list) + + fun CNVExp x = !refCNVExp x + fun CNVStat x = !refCNVStat x + fun CNVBinop x = !refCNVBinop x + fun CNVUnop x = !refCNVUnop x + fun CNVExternalDecl x = !refCNVExternalDecl x + fun CNVSpecifier x = !refCNVSpecifier x + fun CNVDeclarator x = !refCNVDeclarator x + fun CNVDeclaration x = !refCNVDeclaration x + + (* miscellaneous utility functions *) + + (* could be a component of stateFuns *) + (* indicates a type used before it is defined: structs, unions, enums *) + (* should never happen for tid bound to a typedef *) + fun isPartial tid = + case lookTid tid + of SOME{ntype=NONE,...} => true + | _ => false + + fun isPartialTy(Ast.StructRef tid | Ast.UnionRef tid) = isPartial tid + | isPartialTy _ = false + + + fun isLocalScope sym = isSome(lookLocalScope sym) + + (* redefine lookAid with error recovery behavior *) + fun lookAid aid = + case lookAid0 aid + of NONE => + (bug ("lookAid: no type for this expression." + ^ Int.toString aid); + Ast.Void) + | SOME ct => ct + + (* pretty-printer utils *) (* DBM: not used *) + fun ppCt () = + PPL.ppToStrm (PPAst.ppCtype () ttab) TextIO.stdOut + + val ctToString = PPL.ppToString (PPAst.ppCtype () ttab) + + (* identifier convention: loc : Errors.location *) + + val isPointer = TU.isPointer ttab + val isFunction = TU.isFunction ttab (* is real function type; excludes pointer to function *) + val isNonPointerFunction = TU.isNonPointerFunction ttab + val isNumberOrPointer = TU.isNumberOrPointer ttab + val isNumber = TU.isNumber ttab + val isArray = TU.isArray ttab + fun deref v = + (case (TU.deref ttab v) + of SOME x => x + | NONE => (error + ("Cannot dereference type " ^ (ctToString v)); + Ast.Void)) + + val getFunction = TU.getFunction ttab + val isStructOrUnion= TU.isStructOrUnion ttab + val isEnum = TU.isEnum ttab + fun lookupEnum v = + (case (TU.lookupEnum ttab v) + of SOME x => x + | NONE => (bug "lookupEnum: invalid enum type"; + LargeInt.fromInt 0)) + + val equalType = TU.equalType ttab + val isScalar = TU.isScalar ttab + val isIntegral = TU.isIntegral ttab + val usualUnaryCnv = TU.usualUnaryCnv ttab + val usualBinaryCnv = TU.usualBinaryCnv ttab + val isConst = TU.isConst ttab + val isEquable = TU.isEquable ttab + val isAddable = TU.isAddable ttab + val isSubtractable = TU.isSubtractable ttab + val isComparable = TU.isComparable ttab + val conditionalExp = TU.conditionalExp ttab + val compatible = TU.compatible ttab + val functionArgConv = TU.functionArgConv ttab + val isFunctionPrototype = TU.isFunctionPrototype ttab + val getCoreType = TU.getCoreType ttab + + fun composite (ty1, ty2) = + case TU.composite ttab (ty1, ty2) + of (res, nil) => res + | (res, errL) => + (List.map error errL; + res) + + val hasKnownStorageSize = TU.hasKnownStorageSize ttab + val preArgConv = TU.preArgConv ttab + val cnvFunctionToPointer2Function = TU.cnvFunctionToPointer2Function ttab + + fun checkQuals ty = TU.checkQuals ttab ty + + fun wrapSTMT(coreStmt: Ast.coreStatement) : Ast.statement = + Ast.STMT (coreStmt, Aid.new (), getLoc()) + + fun wrapDECL(coreExtDecl: Ast.coreExternalDecl) : Ast.externalDecl = + Ast.DECL(coreExtDecl, Aid.new (), getLoc()) + + fun wrapEXPR (ty, coreExpr) = + let val ty = cnvFunctionToPointer2Function ty + (* all expressions of type Function are promoted to Pointer(Function) + * exceptions (&, sizeof) are handled in unops *) + (* Strictly speaking, arrays should also be converted to pointers here; + however code using array expressions deal with the array case directly (e.g. Sub, Deref); + Caution: if we were to make this change, we still need to know it was an array! + Where is the right place to do this conversion? *) + val adorn = bindAid ty + in (ty, Ast.EXPR (coreExpr, adorn, getLoc())) + end + + val simplifyAssignOps = SimplifyAssignOps.simplifyAssignOps + {lookAid=lookAid, getCoreType=getCoreType, wrapEXPR=wrapEXPR, + getLoc=getLoc, topLevel=topLevel, bindSym=bindSym, pushTmpVars=pushTmpVars} + + fun mkFunctionCt (retTy, argTys) = + (if isNonPointerFunction retTy + then error "Return type of function cannot be function type." + else (); + if isArray retTy + then error "Return type of function cannot be array type." + else (); + let fun withName f (t, n) = (f t, n) + val argTys = + if convert_function_args_to_pointers then + List.map (withName preArgConv) argTys + else List.map (withName cnvFunctionToPointer2Function) argTys + in + Ast.Function(retTy, argTys) + end) + + fun getStorageClass sym = + case lookSym sym + of SOME(B.ID{stClass,...}) => SOME stClass + | _ => NONE + + fun checkFn (funTy,argTys,exprs) = + let val isZeroExprs = List.map isZeroExp exprs + in + case TU.checkFn ttab (funTy, argTys, isZeroExprs) + of (res, nil, args) => (res, args) + | (res, errL, args) => + (List.map error errL; + (res, args)) + end + + (* DBM: should this go in State? or be defined in terms of a more + * primitive operation in State like the former insertOpAid? *) + fun noteImplicitConversion (Ast.EXPR (_, aid, _), ty) = AT.insert(implicits,aid,ty) + + fun wrapCast (ty, expr as (Ast.EXPR(_, aid', loc'))) = + if CTypeEq.eqCType(getCoreType(lookAid aid'), getCoreType ty) then expr (* DBM: gen. equality on types *) + (* 7/29/99: tentative fix for spurious casts + old code: if lookAid aid' = ty then expr (* DBM: gen. equality on types *) + *) + else let + val aid = bindAid ty + in + if !insert_explicit_coersions then + Ast.EXPR(Ast.Cast(ty, expr), aid, loc') + else + (noteImplicitConversion(expr, ty); + expr) + end + + fun sizeof ty = + LargeInt.fromInt (#bytes (Sizeof.byteSizeOf {sizes=sizes, err=error, warn=warn, bug=bug} ttab ty)) + + fun isLval (expr, ty) = + case expr + of Ast.Member(Ast.EXPR (expr'', aid, _), _) => + isLval (expr'', lookAid aid) + | (Ast.Id _ | Ast.Sub _ | Ast.Arrow _ | Ast.Deref _) => true + | _ => false + + fun checkAssignableLval (expr, ty, s) = + (* check we can assign to this expression, + * and generate error messages if not *) + if isLval (expr, ty) then + if isConst ty then + error + ("Type Error: lhs of assignment is const" + ^ (if s = "" then "." else (" in " ^ s ^ "."))) + else (case expr + of Ast.Id _ => + if isArray ty then + error + ("Type Error: lhs of assignment is an array (not a modifiable lval)" + ^ (if s = "" then "." else (" " ^ s ^ "."))) + else () + | _ => ()) + else + error + ("Type Error: lhs of assignment is not an lvalue" + ^ (if s = "" then "." else (" " ^ s ^ "."))) + + fun isAssignableTys {lhsTy, rhsTy, rhsExprOpt : Ast.coreExpression option} = + let val rhsExpr0 = (case rhsExprOpt + of SOME rhsExpr => isZeroCoreExp rhsExpr + | NONE => false) + in TU.isAssignable ttab {lhs=lhsTy, rhs=rhsTy, rhsExpr0=rhsExpr0} + end + + fun checkAssignableTys (x as {lhsTy, rhsTy, rhsExprOpt}) = + if not(isAssignableTys x) then + let val lhs = ctToString lhsTy + val rhs' = ctToString (usualUnaryCnv rhsTy) + val rhs = ctToString rhsTy + in error + ("Type Error: rval of type " ^ rhs + ^ " cannot be assigned to lval of type " ^ lhs ^ ".") + end + else () + + fun checkAssign {lhsTy,lhsExpr,rhsTy,rhsExprOpt : Ast.coreExpression option} = + if perform_type_checking then + (checkAssignableLval(lhsExpr, lhsTy, ""); + checkAssignableTys {lhsTy=lhsTy,rhsTy=rhsTy,rhsExprOpt=rhsExprOpt}) + else () + + fun isTYPEDEF({storage,...} : PT.decltype) = + if List.exists (fn PT.TYPEDEF => true | _ => false) storage (* any typedefs? *) + then (case storage of + [PT.TYPEDEF] => true (* must be exactly one typedef *) + | _ => (error "illegal use of TYPEDEF"; + true)) + else false + + fun declExprToDecl errorStr (decr, PT.EmptyExpr) = decr + | declExprToDecl errorStr (decr, _) = (error errorStr; decr) + + (* checks for illegal rebinding within current local scope, for other + * than objects and functions *) + fun checkNonIdRebinding (sym, ty, kind: string) : unit = + case lookLocalScope sym + of SOME(B.TYPEDEF{location=loc, ...}) => + (error ("illegal redeclaration of " ^ kind ^ (Sym.name sym) ^ + ";\n previously declared as typedef at " ^ + SM.locToString loc)) + | SOME(B.MEMBER{location=loc, ...}) => + (error ("illegal redeclaration of " ^ kind ^ (Sym.name sym) ^ + ";\n previously declared as member at " ^ + SM.locToString loc)) + | SOME(B.TAG{location=loc, ...}) => + (error ("illegal redeclaration of " ^ kind ^ (Sym.name sym) ^ + ";\n previously declared as tag at " ^ + SM.locToString loc)) + | NONE => () (* not previously bound in local scope *) + | _ => bug "checkNonIdRebinding: unexpected binding" + + + (* checks for illegal rebinding within current local scope + * only called in processDecr for "object" declaration *) + fun checkIdRebinding (sym, newTy, newStatus: Ast.declStatus, {globalBinding}) : Ast.declStatus * Ast.ctype * (Pid.uid option) = + case (if globalBinding then lookSymGlobal sym else lookLocalScope sym) + of SOME (B.ID{status=oldStatus,kind,location,ctype=oldTy,uid, ...}) => + if globalBinding orelse topLevel() + then let val status = + case (newStatus, oldStatus) + of (Ast.DEFINED,Ast.DEFINED) => + (error + (case kind + of Ast.FUNCTION _ => + ("illegal redefinition of identifier " + ^ (Sym.name sym) ^ + ";\n previously defined as function at " ^ + SM.locToString location) + | Ast.NONFUN => + ("illegal redefinition of identifier " + ^ (Sym.name sym) ^ + ";\n previously declared with initializer at " ^ + SM.locToString location)); + Ast.DEFINED) + | (Ast.DEFINED,_) => Ast.DEFINED + | (_,Ast.DEFINED) => Ast.DEFINED + | (Ast.DECLARED,_) => Ast.DECLARED + | (_,Ast.DECLARED) => Ast.DECLARED + | _ => Ast.IMPLICIT + val ty = + case kind + of Ast.FUNCTION _ => + if equalType (newTy, oldTy) then oldTy + else + (case composite(newTy,oldTy) + of SOME ty => ty + | NONE => + (error + ("illegal redeclaration of function " ^ + (Sym.name sym) ^ + " has type incompatible with previous " ^ + "declaration at " ^ + SM.locToString location); + newTy)) + | Ast.NONFUN => + if equalType (newTy, oldTy) then oldTy + else + (case composite(newTy, oldTy) + of SOME ty => ty + | NONE => + (error + ("illegal redeclaration of identifier " + ^ (Sym.name sym) ^ + ";\n type incompatible with previous \ + \declaration at " ^ + SM.locToString location); + newTy)) + in (status,ty,SOME uid) + end + else (* no redefinition *) + (error + ("illegal redeclaration of "^ (Sym.name sym) ^ + " in nested scope;\n previous declaration at " ^ + SM.locToString location); + (newStatus,newTy, NONE)) + | NONE => (newStatus,newTy, NONE) (* not previously bound in local scope *) + | _ => (error ((Sym.name sym)^" is not a variable"); + (newStatus,newTy, NONE)) (* not previously bound in local scope *) + + + (* code for calling initializer normalizer *) + fun normalize(ty, expr) = + InitializerNormalizer.normalize{lookTid=lookTid, bindAid=bindAid, + initType=ty, initExpr=expr} + + + (* type check initializer: + recursively descend into type and initializer, checking as we go. + NB 1: if type is unions and structs, then don't generate errors when initializer is simple + NB 2: if type is array then *do* generate errors when initializer is simple *) + + + fun TCInitializer(ctype as (Ast.TypeRef _ | Ast.Qual _), expr) = + TCInitializer(getCoreType ctype, expr) (* the following TCInitializer cases expect coretypes *) + | TCInitializer (Ast.Array(opt, ctype), Ast.Aggregate exprs) = + (case (opt, LargeInt.fromInt(List.length exprs)) + of (NONE, _) => + bug "TCInitializer: array size should be filled in by now?" + | (SOME(x, _), y) => + if x = y then () (* LargeInt equality *) + else if x < y then + error "TCInitializer: badly formed array initializer: \ + \too many initializers" + else error "TCInitializer: badly formed array initializer: \ + \not enough initializers"; + List.app (fn e => TCInitializer(ctype, e)) exprs) + | TCInitializer (Ast.Array _, _) = + error "badly formed array initializer: expected {" + | TCInitializer (Ast.StructRef tid, Ast.Aggregate exprs) = + (case lookTid tid + of SOME{ntype=SOME(B.Struct(tid,fields)),...} => + let fun f ((fieldType, _, _) :: l, expr :: exprs) = + (TCInitializer(fieldType, expr); + f (l, exprs)) + | f (nil, nil) = () + | f (_, nil) = + error + "badly formed struct initializer: not enough initializers" + | f (nil, _) = + error + "badly formed struct initializer: too many initializers" + in f (fields, exprs) + end + | NONE => bug "TCInitializer: lookTid failed" + | _ => error "TCInitializer: ill-formed StructRef type") + | TCInitializer (Ast.UnionRef tid, Ast.Aggregate exprs) = + (case lookTid tid + of SOME{ntype=SOME(B.Union(tid,(fieldTy, _)::fields)),...} => + (case exprs + of [expr] => TCInitializer(fieldTy, expr) + | _ :: _ => + error + "badly formed union initializer: \ + \initializer has too many elements" + | nil => + error "badly formed union initializer: empty initializer") + | SOME{ntype=SOME (B.Union(tid,_)), ...} => + error "empty union" + | NONE => bug "TCInitializer: lookTid failed" + | _ => error "TCInitializer: ill-formed UnionRef type") + | TCInitializer (ty as (Ast.StructRef _ | Ast.UnionRef _), Ast.Simple(Ast.EXPR(coreExp, aid, _))) = + if isAssignableTys {lhsTy=ty, rhsTy=lookAid aid, rhsExprOpt=SOME coreExp} + then () + else error "type of initializer is incompatible with type of lval" + | TCInitializer (Ast.Pointer(Ast.Numeric (_,_,_,Ast.CHAR,_)), + Ast.Simple(Ast.EXPR(Ast.StringConst _, _, _))) = () + | TCInitializer (ty, Ast.Aggregate([Ast.Simple(Ast.EXPR(coreExp, aid, _))])) = + if isScalar ty then + if isAssignableTys {lhsTy=ty, rhsTy=lookAid aid, + rhsExprOpt=SOME coreExp} + then () + else error "type of initializer is incompatible with type of lval" + else + error "illegal aggregate initializer" + + | TCInitializer (_, Ast.Aggregate _) = + error "illegal aggregate initializer" + | TCInitializer (ty, Ast.Simple(Ast.EXPR(coreExp, aid, _))) = + if isAssignableTys {lhsTy=ty, rhsTy=lookAid aid, + rhsExprOpt=SOME coreExp} + then () + else error "type of initializer is incompatible with type of lval" + + (* check form of initializer *) + fun checkInitializer (ty, initExpr, auto) = + let + val initExpr' = + case initExpr of + Ast.Aggregate _ => if isArray ty orelse (case isStructOrUnion ty of SOME _ => true | NONE => false) + then normalize(ty, initExpr) + else initExpr + | Ast.Simple(Ast.EXPR(Ast.StringConst _, _, _)) => normalize(ty, initExpr) + | _ => initExpr + (* the purpose of normalize is the handle the case of strings as initializers, + * and to pad out curly-brace initializers + *) + (* old code: 3/10/00 + * case (initExpr, auto) of + * (Ast.Aggregate _, _) => normalize(ty, initExpr) + * | (_, false) => normalize(ty, initExpr) + * | (Ast.Simple(Ast.EXPR(Ast.StringConst _, _, _)), _) => normalize(ty, initExpr) + * | (_, true) => initExpr + *) + val ty = case getCoreType ty + of Ast.Array(NONE, ctype) => + (case initExpr' + of Ast.Aggregate inits => + let val len = List.length inits + val i = LargeInt.fromInt len + val (_, expr) = wrapEXPR(stdInt, Ast.IntConst i) + in + if len=0 then warn "Array has zero size." else (); + Ast.Array(SOME(i, expr), ctype) + end + | _ => (error + "badly formed array initializer: missing \"{\""; + ty)) + | _ => ty + in TCInitializer(ty, initExpr'); + (initExpr', ty) + end + + (* processing declarator parse trees *) + + fun processDeclarator (typ as {qualifiers,specifiers,storage},decr) = + let fun vardeclToTypeNameLoc (typ as {qualifiers, specifiers},decr) = + let fun mkTyp spc = {qualifiers=[], specifiers=[spc]} + fun addQual q = {qualifiers=q::qualifiers, specifiers=specifiers} + in case decr + of PT.VarDecr x => (typ,SOME x,getLoc()) + | PT.PointerDecr x => + vardeclToTypeNameLoc (mkTyp (PT.Pointer typ),x) + | PT.ArrayDecr (x,sz) => + vardeclToTypeNameLoc (mkTyp (PT.Array (sz,typ)),x) + | PT.FuncDecr (x,lst) => + vardeclToTypeNameLoc (mkTyp (PT.Function{retType=typ,params=lst}),x) + | PT.QualDecr (q,decr) => + vardeclToTypeNameLoc (addQual q, decr) + | PT.EmptyDecr => (typ, NONE, getLoc()) + | PT.EllipsesDecr => (mkTyp PT.Ellipses, SOME("**ellipses**"), getLoc()) + | PT.MARKdeclarator(loc, decr) => + (pushLoc loc; + vardeclToTypeNameLoc(typ, decr) + before popLoc ()) + | PT.DecrExt _ => (typ, NONE, getLoc()) (* should call decr extension? *) + end + val ({qualifiers,specifiers},sOpt, loc) = + vardeclToTypeNameLoc ({qualifiers=qualifiers, + specifiers=specifiers}, + decr) + in ({qualifiers=qualifiers,specifiers=specifiers,storage=storage},sOpt, loc) + end + + (* processDecr : + * Ast.ctype * Ast.storageClass * bool + * -> (ParseTree.declarator * ParseTree.expression) + * * ((Ast.id * Ast.expression) list) + * -> ((Ast.id * Ast.expression) list) + * to be used by both external (global) decls and internal (statement + * level - within function body) decls. + * After type and storage class are specified, designed to be used with + * a fold function. + *) + + fun cnvInitExpression(PT.InitList exprs) = + Ast.Aggregate(map cnvInitExpression exprs) + | cnvInitExpression(PT.MARKexpression(loc, expr)) = + (pushLoc loc; + cnvInitExpression expr + before popLoc ()) + | cnvInitExpression(expr) = + Ast.Simple(#2(cnvExpression expr)) + + and processDecr (ty,sc,topLevel0) (decr,expr) = + let val (ty,varNameOpt,loc) = mungeTyDecr (ty, decr) + val varName = + case varNameOpt + of SOME name => name + | NONE => + (error + "missing declarator in declaration - \ + \filling with ."; + "") + + val hasInitializer = (case expr of + PT.EmptyExpr => false + | _ => true) + + val varSym = Sym.object varName + + val _ = if (topLevel0 = topLevel()) then () + else bug "inconsistency of topLevel!" + + val auto = case (topLevel0, sc) + of (true, Ast.AUTO) => + (error "`auto' not allowed in top-level declarations"; + false) + | (true, Ast.REGISTER) => + (error "`register' not allowed in top-level declarations"; + false) + | (true, _) => true + | (false, Ast.EXTERN) => + (if !local_externs_ok then () + else error "`extern' not allowed in local declarations"; + false) + | (false, Ast.STATIC) => false + | (false, _) => true + (* local declarations are auto unless declared static *) + + (* ISO p71: initExprs must be constant if + a) they are in an initilizer list for an object of aggregate or union type + b) the object has static storage duration + *) + (* Note: should really reduce constants arith exprs to simple constants *) + fun constCheck(Ast.EXPR((Ast.StringConst _ | Ast.IntConst _ | Ast.RealConst _),_,_)) = true + | constCheck(Ast.EXPR(Ast.QuestionColon(e1, e2, e3), _, _)) + = constCheck e1 andalso constCheck e2 andalso constCheck e3 + | constCheck(Ast.EXPR(Ast.Binop(_, e1, e2), _, _)) + = constCheck e1 andalso constCheck e2 + | constCheck(Ast.EXPR(Ast.Unop(_, e1), _, _)) = constCheck e1 + | constCheck(Ast.EXPR(Ast.Cast(_, e1), _, _)) = constCheck e1 + | constCheck(Ast.EXPR(Ast.EnumId _, _, _)) = true + | constCheck(Ast.EXPR(Ast.SizeOf _, _, _)) = true + | constCheck(Ast.EXPR(Ast.AddrOf _, _, _)) = true + | constCheck(Ast.EXPR(Ast.Id id, _, _)) = + (* id must be a function or an array (note: a function pointer won't do) *) + let val {ctype, ...} = id + in + isFunction ctype orelse isArray ctype + end + | constCheck _ = false + fun constCheckIE'(Ast.Simple expr) = constCheck expr + | constCheckIE'(Ast.Aggregate exprl) + = List.foldl (fn (x, y) => (constCheckIE' x) andalso y) true exprl + fun constCheckIE(Ast.Simple expr) = + (if topLevel0 orelse sc = Ast.STATIC orelse sc = Ast.EXTERN + then + if constCheck expr then () + else error("Illegal initializer: object has static storage duration, but initializer is not constant.") + else if isArray ty + then + if constCheck expr then () + else error("Illegal initializer: object is an array, but initializer is not constant.") + else ()) + + | constCheckIE x = if allow_non_constant_local_initializer_lists orelse constCheckIE' x then () + else error("Illegal initializer: initializer list elements must be constants.") + + (*** Checking initializers: from ISO p72. + 1. if toplevel or static or extern or array then initializer must be const + 2. case of type: + scalar: initializer must be a single expression, optionally enclosed in {} + aggregate or union: + a) apply normalize + b) type check + - but don't generate errors due to simple for unions and structs + - do generate errors due to simple for arrays + *) + + val (id, ty) = + if isFunction ty then (* declaring (NOT defining) a function *) + (* CHECK: sc should be either DEFAULT, or EXTERN or STATIC? *) + let val (status, newTy, uidOpt) = + checkIdRebinding(varSym, ty, Ast.DECLARED, {globalBinding=true}) + val uid = case uidOpt of + SOME uid => uid + | NONE => Pid.new() + val id = {name = varSym, uid = uid, location = loc, + ctype = newTy, stClass = sc, status = status, global = true, + kind = Ast.FUNCTION{hasFunctionDef=false}} + val binding = ID id + in bindSymGlobal(varSym, binding); + (id, newTy) + end + else (* not a function type *) + let val status = if hasInitializer then Ast.DEFINED else Ast.DECLARED + val hasExtern = (case sc of Ast.EXTERN => true | _ => false) + (* if hasExtern then force globalization of this binding *) + val (status,ty,uidOpt) = + checkIdRebinding(varSym, ty, status, {globalBinding=hasExtern}) + val uid = case uidOpt of SOME uid => uid | NONE => Pid.new() + + val id = {name = varSym, uid = uid, location = loc, + ctype = ty, stClass = sc, status = status, global = topLevel() orelse hasExtern, + kind = Ast.NONFUN} + (* always rebind, even if there was a previous binding in + * scope *) + in if hasExtern then bindSymGlobal (varSym, ID id) + else bindSym (varSym, ID id); + (id, ty) + end + + (* Delay processing of initializer until we've added a binding for + the variable. This implements the "left-to-right" processing + strategy of C -- i.e. we process the declaration before we process + the initializer. + This means that + int x=43; + main () { + int x = x+2; + } + does not have its intuitive meaning (at least for functional programmers). + In other words, initializers are not quite let statements! + + This does lead to a problem: sometimes we don't know the full type + of something until we've looked at the initializer + e.g. int [] = {1,2,3}; + So, we might have to fix up the type! + *) + + (* DBM: return fixed id as well, to fix Bug 19 *) + val (initExprOpt, ty, id) = + case expr + of PT.EmptyExpr => (NONE, ty, id) + | _ => + let + val e = cnvInitExpression expr + val _ = constCheckIE e + val (e',ty') = checkInitializer(ty, e, auto) + val id' = + if equalType(ty', ty) then id (* no fix for id required *) + else (* fix up type of id *) + (case lookSym varSym + of SOME(B.ID x) => + let val {name, uid, location, ctype, stClass, + status, global, kind} = x + val newid = {name=name, uid=uid, location=location, + ctype=ty', stClass=stClass, + status=status, global=global, + kind=kind} + in bindSym (varSym, ID newid); + newid + end + | _ => id) (* can never arise: id must have ID binding *) + in (SOME e', ty', id') + end + + (* Now do storage size check: can't do it earlier, because type might + be incomplete, and only completed by processing the initializer. *) + + val _ = + if storage_size_check then + if hasKnownStorageSize ty then () + else (case sc + of Ast.EXTERN => () + | _ => + error + ("Storage size of `" + ^ Sym.name varSym + ^ "' is not known (e.g. incomplete type, void)")) + else () + + in (id, initExprOpt) + end + + + (* processTypedef : + * Ast.ctype -> ParseTree.declarator -> () + * (storage class simply meant to discriminate between top-level (STATIC) and + * local (AUTO)) + *) + and processTypedef ty decr = + if !multi_file_mode then (* version of processTypede for multi_file_mode *) + let + val (ty,nameOpt,loc) = mungeTyDecr (ty, decr) + val name = + case nameOpt + of SOME name => name + | NONE => + (error + "Missing declarator in typedef - filling with missing_typedef_name"; + "missing_typedef_name") + + val sym = Sym.typedef name + + val tidOpt = + (case lookLocalScope sym + of SOME(TYPEDEF{ctype=ty, location=loc',...}) => + (case ty + of Ast.TypeRef tid => + if repeated_declarations_ok then SOME tid + else (error + ("Redeclaration of typedef `" ^ + (Sym.name sym) ^ + "'; previous declaration at " ^ + SM.locToString loc'); + NONE) + | _ => (error + ("Redeclaration of typedef `" ^ + (Sym.name sym) ^ + "'; previous declaration at " ^ + SM.locToString loc'); + NONE)) + | SOME binding => + (error + ("Redeclaration of `" ^ + (Sym.name sym) ^ + "' as a typedef; previous declaration at " ^ + SM.locToString (getBindingLoc binding)); + NONE) + | NONE => NONE) (* not bound locally *) + + + val tid = + case tidOpt + of SOME tid => tid + | NONE => Tid.new () (* create a new named type id *) + + val ty' = Ast.TypeRef tid + (* store actual typdef symbol mapped to named type id *) + val _ = checkNonIdRebinding(sym, ty', "typedef ") + + val binding = TYPEDEF{name = sym, uid = Pid.new(), location = loc, + ctype = ty'} + + (* store named type id mapped to typedef in named-type table *) + in bindSym(sym, binding); + bindTid (tid, {name=SOME name, ntype=SOME(B.Typedef (tid,ty)), + global = topLevel(), location=getLoc()}); + tid + end + else (* standard version of processTypedef *) + (* In time the two version should be combined. *) + let val (ty,nameOpt,loc) = mungeTyDecr (ty, decr) + val name = + case nameOpt + of SOME name => name + | NONE => + (error + "Missing declarator in typedef - filling with missing_typedef_name"; + "missing_typedef_name") + val sym = Sym.typedef name + + (* create a new named type id *) + val tid = Tid.new () + val ty' = Ast.TypeRef tid + + val _ = checkNonIdRebinding(sym, ty', "typedef ") + + val binding = TYPEDEF{name = sym, uid = Pid.new(), location = loc, + ctype = ty'} + + (* store named type id mapped to typedef in named-type table *) + in bindSym (sym, binding); + bindTid (tid, {name=SOME name, ntype=SOME (B.Typedef (tid,ty)), + global = topLevel(), location=getLoc()}); + tid + end + + + (* like processDeclarator, except it munges a Ast.ctype with + * a PT.declarator *) + and mungeTyDecr (ty: Ast.ctype, decr : PT.declarator) + : Ast.ctype * string option * SourceMap.location = + case decr + of PT.VarDecr str => (ty,SOME str,getLoc()) + | PT.PointerDecr decr => mungeTyDecr (Ast.Pointer ty, decr) + | PT.ArrayDecr (decr,PT.EmptyExpr) => mungeTyDecr(Ast.Array (NONE, ty), decr) + | PT.ArrayDecr (decr,sz) => + let val (i, aexpr) = case evalExpr sz (* cannot be EmptyExpr *) + of + (SOME i, _, aexpr, _) => (if i=0 then warn "Array has zero size." else (); + (i, aexpr)) + | (NONE, _, aexpr, _) => (error "Array must have constant size."; + (0, aexpr)) + in + mungeTyDecr(Ast.Array (SOME(i, aexpr), ty), decr) + end + + | PT.FuncDecr (decr,lst) => + let fun folder (dt,decr) = + let val (dty, argIdOpt, loc) = processDeclarator (dt, decr) + val (ty, sc) = cnvType (false, dty) + fun mkId n = { name = Sym.object n, + uid = Pid.new (), + location = loc, + ctype = ty, + stClass = sc, + status = Ast.DECLARED, + kind = Ast.NONFUN, + global = false } + in (ty, Option.map mkId argIdOpt) + end + val argTys = List.map folder lst + in mungeTyDecr(mkFunctionCt(ty, argTys), decr) + end + | PT.QualDecr (PT.CONST,decr) => + let val ty' = Ast.Qual (Ast.CONST,ty) + (* dpo: is this check necessary? + * Doesn't the 2nd call get the same info? *) + val {redundantConst, ...} = checkQuals ty + val {redundantConst=redundantConst', ...} = checkQuals ty' + in if not redundantConst andalso redundantConst' + then error "Duplicate `const'." + else (); + mungeTyDecr (ty', decr) + end + | PT.QualDecr (PT.VOLATILE,decr) => + let val ty' = Ast.Qual (Ast.VOLATILE,ty) + val {redundantVolatile, ...} = checkQuals ty + val {redundantVolatile=redundantVolatile', ...} = checkQuals ty' + in if not(redundantVolatile) andalso redundantVolatile' + then error "Duplicate `volatile'." + else (); + mungeTyDecr (ty', decr) + end + | PT.EllipsesDecr => (Ast.Ellipses, SOME "**ellipses**", getLoc()) + | PT.EmptyDecr => (ty, NONE, getLoc()) + | PT.MARKdeclarator(loc, decr) => + (pushLoc loc; + mungeTyDecr(ty, decr) + before popLoc ()) + | PT.DecrExt ext => + let val (t,n) = CNVDeclarator (ty, ext) in (t,n,getLoc()) end + + + (* -------------------------------------------------------------------- + * cnvExternalDecl : ParseTree.externalDecl -> Ast.externalDecl list + * + * Converts a parse-tree top-level declaration into an ast top-level + * declaration by adding the necessary symbols and types to the + * environment and recursively converting statements of function bodies. + * -------------------------------------------------------------------- *) + + and cnvExternalDecl (PT.ExternalDecl(PT.DeclarationExt ext)) = + let val declarations = CNVDeclaration ext + in + List.map (fn x => wrapDECL(Ast.ExternalDecl x)) declarations + end + + | cnvExternalDecl (PT.ExternalDecl(PT.MARKdeclaration (loc,decl))) = + (pushLoc loc; + cnvExternalDecl(PT.ExternalDecl decl) + before popLoc ()) + + | cnvExternalDecl (PT.ExternalDecl(PT.Declaration(dt as {qualifiers, specifiers, storage}, + declExprs))) : Ast.externalDecl list = + (* The following code is almost identical to corresponding case in processDecls ... + Any changes made here should very likely be reflected in changes to the processDecls code. *) + if isTYPEDEF dt then + let val ct = {qualifiers=qualifiers, specifiers=specifiers} + val decls = List.map (declExprToDecl "initializers in typedef") declExprs + in (* global typedefs *) + if List.null decls then (warn "empty typedef"; []) + else + let val ty = cnvCtype (false, ct) + val tidl = List.map (processTypedef ty) decls + in List.map (fn x => wrapDECL(Ast.ExternalDecl(Ast.TypeDecl{shadow=NONE, tid=x}))) tidl + end + end + else (* global variable and struct declarations *) + let val isShadow = List.null declExprs andalso isTagTy dt + (* isShadow does not necessarily mean "shadows a previous definition"; + rather, it refers to empty type declarations of the form + struct t; + enum e; + Of course, the real use of these declarations is + for defining mutually recursive structs/unions + that reuse previously defined ids i.e. for shadowing.... + Note: if we had + struct t x; + then this would not be a shadow, + hence the null declExprs test. + *) + val (ty,sc) = cnvType (isShadow, dt) + in if isShadow + then let fun getTid (Ast.StructRef tid) = SOME({strct=true}, tid) + | getTid(Ast.UnionRef tid) = SOME({strct=false}, tid) + | getTid(Ast.Qual(_, ct)) = getTid ct (* ignore qualifiers *) + | getTid _ = NONE (* don't deref typerefs *) + in + case getTid ty of + SOME(strct, tid) => [wrapDECL(Ast.ExternalDecl(Ast.TypeDecl{shadow=SOME strct, tid=tid}))] + | NONE => [] + end + else + let val idExprs = List.map (processDecr(ty,sc,true)) declExprs + in List.map (fn x => wrapDECL(Ast.ExternalDecl(Ast.VarDecl x))) idExprs + end + end + + | cnvExternalDecl (PT.FunctionDef {retType as {qualifiers,specifiers,storage}, + funDecr, krParams: PT.declaration list, body}) = + (* function definitions *) + let + val (funTy, tagOpt, funLoc) = processDeclarator (retType, funDecr) + val funName = case tagOpt + of SOME tag => tag + | NONE => + (bug + "Missing function name - \ + \filling with missing_function_name"; + "missing_function_name") + val (retType, args) = + case funTy + of {specifiers=[PT.Function {retType,params}],...} => (retType, params) + | _ =>(error "ill-formed function declaration"; + ({qualifiers=[],specifiers=[]}, nil)) + + val retType' = cnvCtype (false,retType) + + val sc = cnvStorage storage + + (* check validity of storage class *) + val _ = case sc + of Ast.DEFAULT => () + | Ast.EXTERN => () + | Ast.STATIC => () + | _ => (error "`auto' and `register' are not allowed \ + \in function declarations") + + val argTyIdOpts = List.map processDeclarator args + fun unzip3((x, y, z) :: l) = + let val (xl, yl, zl) = unzip3 l + in + (x :: xl, y :: yl, z :: zl) + end + | unzip3 nil = (nil,nil, nil) + + fun zip3(x :: xl, y :: yl, z :: zl) = (x, y, z) :: (zip3(xl, yl, zl)) + | zip3 _ = nil + + val (argTys, argIdOpts, locs) = unzip3 argTyIdOpts + + fun noDeclType{specifiers=nil,qualifiers=nil,storage=nil} = true + | noDeclType _ = false + + val krParamsAdmitted = List.all noDeclType argTys (* if true, K&R params are admitted *) + + (* enter a local scope - push a new symbol table *) + val _ = pushLocalEnv () + + (* insert (and convert) argument types in this symbol table *) + (* this needs to be done left to right because the first + * argument could define a type used in later args *) + val argTyScList = List.map (fn ty => cnvType(false,ty)) argTys + + (* create a (ctype * storageClass) IdMap.map *) + val argIds' = + let + fun iter ((SOME s) :: l) = (s :: (iter l)) + | iter (NONE :: l) = (warn "unnamed function argument"; + nil) + | iter nil = nil + in + case argTyIdOpts of + [({specifiers=[PT.Void], qualifiers=nil, storage=nil}, NONE, _)] => nil + (* special case of function definition f(void) {...} *) + | _ => iter argIdOpts + end + + (* zipped list will be size of shorter list - if one is shorter *) + val argTyScIdLocList = zip3 (argTyScList, argIds', locs) + fun folder ((tySc,id,loc),mp) = IdMap.insert (mp, id, (tySc,false,loc)) + (* false component means hasn't been matched with K&R parameters spec *) + val argMap = List.foldl folder IdMap.empty argTyScIdLocList + + (* check if krParams are ok *) + val _ = if null krParams orelse krParamsAdmitted then () + else error "mixing of K&R params and prototype style params not allowed" + + (* rectify additional types from K&R style parameters *) + val argMap = + let + fun folder (decl,argMap) = + (case decl + of PT.MARKdeclaration(loc,decl') => + (pushLoc loc; + folder(decl',argMap) before + popLoc()) + | PT.DeclarationExt _ => + (error "Declaration extensions not permitted in K&R parameter declarations"; + argMap) + | PT.Declaration(decltype as {storage,...}, decrExprs) => + if isTYPEDEF decltype then (error "typedef in function parameter declaration"; + argMap) + else let val decrs = List.map (declExprToDecl "initializer in function declaration") decrExprs + val (ty,sc) = cnvType (false, decltype) + fun folder' (decr, argMap) = + let val (ty, sOpt, loc) = mungeTyDecr (ty, decr) + val s = + case sOpt + of SOME s => + (case IdMap.find (argMap,s) + of NONE => + (error "K&R parameter not in function's identifier list"; + s) + | SOME (_,matched,_) => + if matched then + (error ("repeated K&R declaration for parameter "^ s); + s) + else s) + | NONE => + (error "Unnamed K&R style parameter - \ + \filling with unnamed_KR_parameter"; + "") + val argMap = IdMap.insert + (argMap, s, ((ty,sc),true,loc)) + in argMap + end + in List.foldl folder' argMap decrs + end) + in + List.foldl folder argMap krParams + end + + fun mapper id = + let val (p, loc) = + case IdMap.find (argMap, id) + of SOME (p,_,loc) => (p, loc) + | NONE => (bug "mapper: inconsistent arg map"; + ((Ast.Error, Ast.DEFAULT), SM.UNKNOWN)) + in (p, id, loc) + end + + val argTyScIdLocList' = List.map mapper argIds' + + fun checkStorageClass ((_,Ast.REGISTER),_, _) = () + | checkStorageClass ((_,Ast.DEFAULT),_, _) = () (* DBM: ??? *) + | checkStorageClass _ = + error "Only valid storage class for function parameters is `register'." + + val _ = List.map checkStorageClass argTyScIdLocList' + + (* insert function name in global scope *) + val argTys' = #1 (ListPair.unzip (#1 (unzip3 argTyScIdLocList'))) + + (* insert the arguments in the local symbol table *) + val argPids = + let fun bindArg ((ty,sc),name,loc) = + let + val ty = preArgConv ty (* array and function replaced by pointers *) + val sym = Sym.object name + val kind = Ast.NONFUN + (* argument types cannot have function type: + even if declared as function types, + they are treated as function pointers. *) + val id = {name = sym, uid = Pid.new(), location = loc, + ctype = ty, stClass = sc, status=Ast.DECLARED, + kind = kind, global = false} + val _ = case lookLocalScope sym of + NONE => () + | SOME _ => error ("Repeated function parameter " ^ (Sym.name sym)) + in bindSym(sym, ID id); + id + end + in List.map bindArg argTyScIdLocList' + end + + (* ASSERT: argument type list is null iff not a prototype style defn *) + val funTy' = mkFunctionCt (retType', + if null krParams then + ListPair.zip (argTys', map SOME argPids) + else nil) + val funSym = Sym.func funName + val (status, newTy, uidOpt) = + checkIdRebinding(funSym, funTy', Ast.DEFINED, {globalBinding=true}) + val uid = case uidOpt of + SOME uid => uid + | NONE => Pid.new() + val funId = {name = funSym, uid = uid, location = funLoc, + ctype = funTy', stClass = sc, status = status, + kind = Ast.FUNCTION{hasFunctionDef = true}, global = true} + val binding = ID funId + + val _ = bindSymGlobal(funSym, binding) + (* note: we've already pushed a local env for the function args, so + we are no longer at top level -- we must use bindSymGlobal here! *) + + (* set new function context (labels and returns) *) + val _ = newFunction retType' + (* get new type declarations (tids) from retType and argTys *) + val newtids = resetTids () + + val bodyStmt = cnvStatement body + (* note: what one might think of as an empty function body would + * actually be a compound statement consisting of an empty list + * of statements - thus all functions consist of one statement. *) + + in popLocalEnv (); + case checkLabels () + of NONE => () + | SOME (lab,loc) => + Error.error(errorState, loc, + "Label " ^ ((Sym.name lab)) + ^ "used but not defined."); + (List.map (fn x => wrapDECL(Ast.ExternalDecl(Ast.TypeDecl({shadow=NONE, tid=x})))) newtids) @ + [wrapDECL(Ast.FunctionDef (funId, argPids, bodyStmt))] + end + + | cnvExternalDecl (PT.MARKexternalDecl (loc,extDecl)) = + (pushLoc loc; + cnvExternalDecl extDecl + before popLoc ()) + | cnvExternalDecl (PT.ExternalDeclExt extDecl) = + CNVExternalDecl extDecl + + (* -------------------------------------------------------------------- + * cnvStatement : ParseTree.statement -> Ast.statement ternary_option + * + * Converts a parse-tree statement into an ast statement by adding the + * necessary symbols and types to the environment and recursively converting + * statements and expressions. + * + * A statement could be a type (or struct/union/enum) declaration which + * only effects the environment, so return type is Ast.statement list + * where the empty list is returned for such declarations. + * A parse-tree statement can also be a variable declaration which + * declares multiple variables in which case the result will be multiple + * Ast statements. All other cases will result in one Ast.statement + * being returned. + * + * In the parse tree, most (in principle all) statements have their + * locations marked by being wrapped in a MARKstatement constructor. + * In the ast, each core statement is wrapped by a STMT constructor + * which also contains the location in the source file from where + * the statement came. This is reflected in the structure of the + * function: each MARKstatement causes the marked location to pushed + * onto the stack in the environment, the wrapped statement is + * recursively converted, then wrapped in a STMT constructor with the + * location; finally the location is popped off the location stack in + * the environment. + * -------------------------------------------------------------------- *) + + and processDecls ((PT.Decl decl) :: rest, astdecls: Ast.declaration list list) + : Ast.declaration list * PT.statement list = + let fun processDeclaration (PT.Declaration(dt as {qualifiers, specifiers, ...}, declExprs)) = + (* The following code is almost identical to corresponding case in cnvExternalDecl *) + (* but we have deal with struct definitions -- cnvExternalDecl doesn't *) + (* have to deal with them because makeAst' catches these at top level *) + (* Any changes made here should very likely be reflected in changes to the cnvExternalDecl code. *) + if isTYPEDEF dt then + let val ct = {qualifiers=qualifiers, specifiers=specifiers} + val decrs = List.map (declExprToDecl "initializer in typedef") declExprs + in + if List.null decrs + then (warn "empty typedef"; + astdecls) + else + let val ty = cnvCtype (false, ct) + val tidl = List.map (processTypedef ty) decrs + val newtids = resetTids () + in (List.map (fn tid => Ast.TypeDecl{shadow=NONE, tid=tid}) tidl) :: + (List.map (fn tid => Ast.TypeDecl{shadow=NONE, tid=tid}) newtids) :: astdecls + (* note: must process declarations left to right since we + * could have e.g. int i=45, j = i; *) + end + end + else + let val isShadow = List.null declExprs andalso isTagTy dt + val (ty,sc) = cnvType (isShadow, dt) + (* ASSERT: null(tidsContext) *) + (* ASSERT: not at top level (i.e. topLevel() => false) *) + in if isShadow + then let fun getTid (Ast.StructRef tid) = SOME({strct=true}, tid) + | getTid(Ast.UnionRef tid) = SOME({strct=false}, tid) + | getTid(Ast.Qual(_, ct)) = getTid ct (* ignore qualifiers *) + | getTid _ = NONE (* don't deref typerefs *) + in + (case getTid ty of + SOME(strct, tid) => [Ast.TypeDecl{shadow=SOME strct, tid=tid}] + | NONE => []) :: + (List.map (fn tid => Ast.TypeDecl{shadow=NONE, tid=tid}) (resetTids ()))(*should always be null*) + :: astdecls + end + else let + val idExprs = + List.map (processDecr (ty,sc,false)) declExprs + (* note: must process declarations left to right since we + * could have e.g. int i=45, j = i; *) + val newtids = resetTids () + in (List.map Ast.VarDecl idExprs) :: + (List.map (fn tid => Ast.TypeDecl{shadow=NONE, tid=tid}) newtids) :: astdecls + (* DBM: push decl lists onto astdecls in reverse order since + * astdecls will be reversed before flattening *) + end + end + | processDeclaration(PT.DeclarationExt ext) = + let val declarations = CNVDeclaration ext + in declarations :: astdecls + end + | processDeclaration(PT.MARKdeclaration(newloc, decl)) = + (pushLoc newloc; + processDeclaration decl + before popLoc ()) + in + processDecls(rest, processDeclaration decl) + end + + | processDecls((PT.MARKstatement (newloc,stmt as PT.Decl _)) :: rest, + astdecls) = + (pushLoc newloc; + processDecls(stmt :: rest, astdecls) + before popLoc ()) + + | processDecls((PT.MARKstatement (newloc,stmt as PT.MARKstatement _)) :: rest, + astdecls ) = + processDecls(stmt :: rest, astdecls) + + | processDecls (rest, astdecls) = (List.concat(rev astdecls), rest) + + (* cnvStatement : PT.statement -> Ast.statement *) + and cnvStatement (stmt: PT.statement): Ast.statement = + (case stmt + of PT.Expr PT.EmptyExpr => wrapSTMT(Ast.Expr NONE) + | PT.Expr e => + let val (_, e') = cnvExpression e + in wrapSTMT(Ast.Expr(SOME e')) + end + | PT.Compound stmts => + (pushLocalEnv (); + let val (decls,rest) = processDecls(stmts,[]) + val stmts = List.map cnvStatement rest + val newtids = resetTids () + val newTmps = resetTmpVars() + val tmpdecls = List.map (fn pid => Ast.VarDecl(pid, NONE)) newTmps + val typedecls = List.map (fn tid => Ast.TypeDecl{shadow=NONE, tid=tid}) newtids + in wrapSTMT(Ast.Compound(decls@tmpdecls@typedecls,stmts)) + end + before popLocalEnv ()) + | PT.Decl _ => + (* shouldn't occur; process decls anyway, but discard them *) + (error "unexpected declaration"; + processDecls([stmt],[]); + (* may violate assertion topLevel() = false for processDecls *) + wrapSTMT(Ast.ErrorStmt)) + | PT.While (expr, stmt) => + let val (exprTy, expr') = cnvExpression expr + val stmt = cnvStatement stmt + in if perform_type_checking andalso not(isScalar exprTy) + then error + "Type Error: condition of while statement is not scalar." + else (); + wrapSTMT(Ast.While (expr',stmt)) + end + | PT.Do (expr, stmt) => + let val (exprTy, expr') = cnvExpression expr + val stmt = cnvStatement stmt + in if perform_type_checking andalso not(isScalar exprTy) + then error + "Type Error: condition of do statement is not scalar." + else (); + wrapSTMT(Ast.Do (expr',stmt)) + end + | PT.For (expr1,expr2,expr3,stmt) => + let val expr1' = + (case expr1 + of PT.EmptyExpr => NONE + | _ => SOME(#2 (cnvExpression expr1))) + val expr2' = + (case expr2 + of PT.EmptyExpr => NONE + | _ => + let val (exprTy,expr2') = cnvExpression expr2 + in if perform_type_checking andalso not(isScalar exprTy) + then error + "Type Error: condition of for statement is not scalar." + else (); + SOME expr2' + end) + val expr3' = + (case expr3 + of PT.EmptyExpr => NONE + | _ => SOME(#2 (cnvExpression expr3))) + val stmt = cnvStatement stmt + in wrapSTMT(Ast.For (expr1',expr2',expr3',stmt)) + end + | PT.Labeled (s,stmt) => + let val stmt = cnvStatement stmt + val labelSym = Sym.label s + val label = addLabel(labelSym, getLoc()) + in wrapSTMT(Ast.Labeled (label, stmt)) + end + | PT.CaseLabel (expr, stmt) => + let val n = case expr of + PT.EmptyExpr => (error "Non-constant case label."; 0) + | _ => (case evalExpr expr of (* cannot be EmptyExpr *) + (SOME i, _, _, sizeofFl) => + (if sizeofFl andalso not(!reduce_sizeof) + then warn("sizeof in case label not preserved in source-to-source mode.") + else (); + i) + | (NONE, _, _, _) => (error "Non-constant case label."; 0)) + in case addSwitchLabel n + of NONE => () + | SOME msg => error msg; + wrapSTMT(Ast.CaseLabel (n, (cnvStatement stmt))) + end + | PT.DefaultLabel stmt => + let val stmt = cnvStatement stmt + in case addDefaultLabel () + of NONE => () + | SOME msg => error msg; + wrapSTMT(Ast.DefaultLabel (stmt)) + end + | PT.Goto s => + let val labSym = Sym.label s + val label = addGoto(labSym, getLoc()) + in wrapSTMT(Ast.Goto label) + end + | PT.Break => wrapSTMT(Ast.Break) + | PT.Continue => wrapSTMT(Ast.Continue) + | PT.Return expr => + let val (exprTy, expr') = + case expr + of PT.EmptyExpr => (Ast.Void, NONE) + | _ => + let val (ty,expr) = cnvExpression expr + in (ty, SOME expr) + end + val returnTy = getReturnTy () + val _ = + if perform_type_checking then + (case returnTy + of SOME returnTy => + if isAssignableTys{lhsTy=returnTy, + rhsTy=exprTy, + rhsExprOpt=case expr' + of SOME expr'' => + SOME(getCoreExpr expr'') + | NONE => NONE} + then () + else + let val lhs = ctToString returnTy + val rhs = ctToString exprTy + in case expr of + PT.EmptyExpr => warn "missing return value." + (* lcc gives this a warning: check ISO standard... *) + | _ => error + ( "Type Error: returning expression has illegal type " ^ rhs + ^ ".\n Function has return type " ^ lhs ^ "." + ) + end + | NONE => ()) + else () + in wrapSTMT((Ast.Return expr')) + end + | PT.IfThen (expr,stmt) => + let val (exprTy, expr') = cnvExpression expr + val stmt = cnvStatement stmt + in if perform_type_checking andalso not(isScalar exprTy) + then error + "Type Error: condition of if statement is not scalar." + else (); + wrapSTMT(Ast.IfThen (expr',stmt)) + end + | PT.IfThenElse (expr, stmt1, stmt2) => + let val (exprTy, expr') = cnvExpression expr + val stmt1 = cnvStatement stmt1 + val stmt2 = cnvStatement stmt2 + in if perform_type_checking andalso not(isScalar exprTy) + then error + "Type Error: condition of if statement is not scalar." + else (); + wrapSTMT(Ast.IfThenElse (expr', stmt1, stmt2)) + end + | PT.Switch (expr, stmt) => + let val (exprTy, expr') = cnvExpression expr + val _ = + if perform_type_checking andalso not(isIntegral exprTy) + then error + "The controlling expression of switch statement \ + \is not of integral type." + else () + val _ = pushSwitchLabels () + val stmt = cnvStatement stmt + in popSwitchLabels (); + wrapSTMT(Ast.Switch(expr',stmt)) + end + | PT.StatExt stmt => + CNVStat stmt + | PT.MARKstatement (newloc,stmt) => + (pushLoc newloc; + cnvStatement stmt + before popLoc ())) + + + (* -------------------------------------------------------------------- + * cnvExpression : ParseTree.expression -> Ast.ctype * Ast.expression + * + * Converts a parse-tree expression into an ast expression by + * recursively converting subexpressions. + * + * In the ast, each core statement is wrapped by an EXPR constructor + * which also contains the nearest marked location in the source file + * from which the expression came. This is reflected in the structure + * of the function: each parse-tree expression is converted into an ast + * core expression and then wrapped in EXPR along with the current + * location indicated by the environment and a unique + * adornment. Subsequently each ast expression can be referred to by + * its adornment. Along the way, the type of each expression is + * calculated and stored in the environment in a map from expression + * adornments to types. + * + * The fact that types are computed for each expression does _not_ mean + * that this is a type checker. The bare minimum type checking is done + * to allow for the expression-adornment-type map to be built. (* DBM ??? *) + * -------------------------------------------------------------------- *) + + and cnvExpression expr = + let + fun numberOrPointer (ty, s) = + if isNumberOrPointer ty then () + else error ("Type Error: operand of " ^ s ^ + " must be a number or a pointer.") + + fun number (ty, s) = + if isNumber ty then () + else error("Type Error: operand of " ^ s ^ " must be a number.") + + fun mkBinopExp((ty1, ty2, resTy), expr1, expr2, binop) = + let val resTy = getCoreType resTy + in + wrapEXPR(resTy, Ast.Binop (binop, wrapCast(ty1, expr1), wrapCast(ty2, expr2))) + end + + fun mkUnopExp((ty, resTy), expr, unop) = + let val resTy = getCoreType resTy + in + wrapEXPR(resTy, Ast.Unop (unop, wrapCast(ty, expr))) + end + + fun mkBinaryAssignOpExp((newTy1, newTy2, resTy), ty1, expr1, ty2, expr2, assignOp, simpleOp) = + let val _ = checkAssign {lhsTy=ty1, lhsExpr=getCoreExpr expr1, rhsTy=resTy, rhsExprOpt=NONE} + fun getTy(Ast.EXPR(_, adorn, _)) = getCoreType(lookAid adorn) + in + if !reduce_assign_ops then + simplifyAssignOps(processBinop, simpleOp, {preOp=true}, expr1, expr2) + else + (if CTypeEq.eqCType(getTy expr1, getCoreType newTy1) then () + else noteImplicitConversion(expr1, newTy1); + if CTypeEq.eqCType(getTy expr2, getCoreType newTy2) then () + else noteImplicitConversion(expr2, newTy2); + mkBinopExp((ty1, ty2, ty1), expr1, expr2, assignOp)) (* result type is (getCoreType ty1) *) + end + + and mkUnaryAssignOpExp((newTy1, newTy2, resTy), ty1, expr1, preOp, assignOp, simpleOp) = + let + val (oneTy, one) = wrapEXPR(stdInt, Ast.IntConst 1) (* implicit one constant + -- all unaryassignops use one *) + val expr2 = one + val ty2 = oneTy + val _ = checkAssign {lhsTy=ty1, lhsExpr=getCoreExpr expr1, rhsTy=resTy, rhsExprOpt=NONE} + in + if !reduce_assign_ops then + simplifyAssignOps(processBinop, simpleOp, preOp, expr1, expr2) + else + mkUnopExp((ty1, ty1), expr1, assignOp) (* result type is (getCoreType ty1) *) + end + + and scaleExpr (size: LargeInt.int, expr as Ast.EXPR(_, adorn, _)) = + let + val ty1 = lookAid adorn + val expr1 = expr + val ty2 = stdInt + val (_, expr2) = wrapEXPR(ty2, Ast.IntConst size) + in + processBinop(ty1, expr1, ty2, expr2, PT.Times) + end + + and scalePlus(ty1, expr1, ty2, expr2) = (* scale integer added to pointer *) + case (!insert_scaling, isPointer ty1, isPointer ty2) of + (true, true, false) => let val (ty2, expr2) = scaleExpr(sizeof(deref ty1), expr2) + in + (ty1, expr1, ty2, expr2) + end + | (true, false, true) => let val (ty1, expr1) = scaleExpr(sizeof(deref ty2), expr1) + in + (ty1, expr1, ty2, expr2) + end + | _ => (ty1, expr1, ty2, expr2) (* no change *) + + and scaleMinus(ty1, ty2, expr2) = (* scale integer subtracted from pointer *) + case (!insert_scaling, isPointer ty1, isPointer ty2) of + (true, true, false) => let val (ty2, expr2) = scaleExpr(sizeof(deref ty1), expr2) + in + (ty2, expr2) + end + | _ => (ty2, expr2) (* no change *) + + and plusOp (ty1, ty2) = (* type check plus *) + if perform_type_checking then + (case isAddable {ty1=ty1, ty2=ty2} + of SOME{ty1, ty2, resTy} => (ty1, ty2, resTy) + | NONE => (error + "Type Error: Unacceptable operands of \"+\" or \"++\"."; + (ty1, ty2, ty1))) + else + (ty1, ty2, ty1) + + and minusOp (ty1, ty2) = + if perform_type_checking then + (case isSubtractable {ty1=ty1, ty2=ty2} of + SOME{ty1, ty2, resTy} => (ty1, ty2, resTy) + | NONE => (error + "Type Error: Unacceptable operands of \"-\" or \"--\"."; + (ty1, ty2, ty1))) + else + (ty1, ty2, ty1) + + and processBinop(ty1, expr1, ty2, expr2, expop) = + let + fun eqOp(ty1, exp1, ty2, exp2) = (* see H&S p208 *) + if perform_type_checking then + (case isEquable {ty1=ty1, exp1Zero=isZeroExp exp1, + ty2=ty2, exp2Zero=isZeroExp exp2} + of SOME ty => (ty, ty, signedNum Ast.INT) + | NONE => + (error + "Type Error: bad types for arguments of eq/neq operator."; + (ty1, ty2, signedNum Ast.INT))) + else (ty1, ty2, signedNum Ast.INT) + + fun comparisonOp(ty1, ty2) = (* see H&S p208 *) + if perform_type_checking then + (case isComparable {ty1=ty1, ty2=ty2} of + SOME ty => (ty, ty, signedNum Ast.INT) + | NONE => (error + "Type Error: bad types for arguments of \ + \comparison operator."; + (ty1, ty2, signedNum Ast.INT))) + else (ty1, ty2, signedNum Ast.INT) + + fun logicalOp2(ty1, ty2) = (* And and Or *) + let val stdInt = signedNum Ast.INT + in if perform_type_checking then + if isNumberOrPointer ty1 + andalso isNumberOrPointer ty2 + then (stdInt, stdInt, stdInt) + else + (error + "Type Error: Unacceptable argument of logical operator."; + (ty1, ty2, signedNum Ast.INT)) + else (ty1, ty2, signedNum Ast.INT) + end + + fun integralOp(ty1, ty2) = + if perform_type_checking then + if isIntegral ty1 andalso isIntegral ty2 + then (case usualBinaryCnv (ty1, ty2) of + SOME ty => (ty, ty, ty) + | NONE => + (bug "cnvExpression: integralOp."; + (ty1, ty2, signedNum Ast.INT))) + else + (error + "Type Error: arguments of mod, shift and \ + \bitwise operators must be integral numbers."; + (ty1, ty2, signedNum Ast.INT)) + else (ty1, ty2, signedNum Ast.INT) + + fun mulDivOp(ty1, ty2) = + if perform_type_checking then + if isNumber ty1 + andalso isNumber ty2 + then (case usualBinaryCnv (ty1, ty2) of + SOME ty => (ty, ty, ty) + | NONE => + (bug + "usualBinaryCnv should \ + \succeed for numeric types."; + (ty1, ty2, signedNum Ast.INT))) + else + (error + "Type Error: arguments of mul and div must be numbers."; + (ty1, ty2, signedNum Ast.INT)) + else + (ty1, ty2, ty1) + + in case expop + of PT.Plus => + let val (ty1, expr1, ty2, expr2) = scalePlus(ty1, expr1, ty2, expr2) + val resTy = plusOp(ty1, ty2) + in + mkBinopExp(resTy, expr1, expr2, Ast.Plus) + end + | PT.Minus => + let val (ty2, expr2) = scaleMinus(ty1, ty2, expr2) + val resTy = minusOp(ty1, ty2) + in + mkBinopExp(resTy, expr1, expr2, Ast.Minus) + end + | PT.Times => mkBinopExp(mulDivOp(ty1, ty2), expr1, expr2, Ast.Times) + | PT.Divide => mkBinopExp(mulDivOp(ty1, ty2), expr1, expr2, Ast.Divide) + | PT.Mod => mkBinopExp(integralOp(ty1, ty2), expr1, expr2, Ast.Mod) + | PT.Eq => mkBinopExp(eqOp(ty1, expr1, ty2, expr2), expr1, expr2, Ast.Eq) + | PT.Neq => mkBinopExp(eqOp(ty1, expr1, ty2, expr2), expr1, expr2, Ast.Neq) + | PT.Gt => mkBinopExp(comparisonOp(ty1, ty2), expr1, expr2, Ast.Gt) + | PT.Lt => mkBinopExp(comparisonOp(ty1, ty2), expr1, expr2, Ast.Lt) + | PT.Gte => mkBinopExp(comparisonOp(ty1, ty2), expr1, expr2, Ast.Gte) + | PT.Lte => mkBinopExp(comparisonOp(ty1, ty2), expr1, expr2, Ast.Lte) + | PT.And => mkBinopExp(logicalOp2(ty1, ty2), expr1, expr2, Ast.And) + | PT.Or => mkBinopExp(logicalOp2(ty1, ty2), expr1, expr2, Ast.Or) + | PT.BitOr => mkBinopExp(integralOp(ty1, ty2), expr1, expr2, Ast.BitOr) + | PT.BitAnd => mkBinopExp(integralOp(ty1, ty2), expr1, expr2, Ast.BitAnd) + | PT.BitXor => mkBinopExp(integralOp(ty1, ty2), expr1, expr2, Ast.BitXor) + | PT.Lshift => mkBinopExp(integralOp(ty1, ty2), expr1, expr2, Ast.Lshift) + | PT.Rshift => mkBinopExp(integralOp(ty1, ty2), expr1, expr2, Ast.Rshift) + | PT.PlusAssign => mkBinaryAssignOpExp(plusOp(ty1, ty2), ty1, expr1, ty2, expr2, Ast.PlusAssign, PT.Plus) + | PT.MinusAssign => mkBinaryAssignOpExp(minusOp(ty1, ty2), ty1, expr1, ty2, expr2, Ast.MinusAssign, PT.Minus) + | PT.TimesAssign => mkBinaryAssignOpExp(mulDivOp(ty1, ty2), ty1, expr1, ty2, expr2, Ast.TimesAssign, PT.Times) + | PT.DivAssign => mkBinaryAssignOpExp(mulDivOp(ty1, ty2), ty1, expr1, ty2, expr2, Ast.DivAssign, PT.Divide) + | PT.ModAssign => mkBinaryAssignOpExp(integralOp(ty1, ty2), ty1, expr1, ty2, expr2, Ast.ModAssign, PT.Mod) + | PT.XorAssign => mkBinaryAssignOpExp(integralOp(ty1, ty2), ty1, expr1, ty2, expr2, Ast.XorAssign, PT.BitXor) + | PT.OrAssign => mkBinaryAssignOpExp(integralOp(ty1, ty2), ty1, expr1, ty2, expr2, Ast.OrAssign, PT.BitOr) + | PT.AndAssign => mkBinaryAssignOpExp(integralOp(ty1, ty2), ty1, expr1, ty2, expr2, Ast.AndAssign, PT.BitAnd) + | PT.LshiftAssign => mkBinaryAssignOpExp(integralOp(ty1, ty2), ty1, expr1, ty2, expr2, Ast.LshiftAssign, PT.Lshift) + | PT.RshiftAssign => mkBinaryAssignOpExp(integralOp(ty1, ty2), ty1, expr1, ty2, expr2, Ast.RshiftAssign, PT.Rshift) + | PT.OperatorExt binop => + (bug "Operator extension (binop case) should be dealt with at top level case"; + wrapEXPR(Ast.Error, Ast.ErrorExpr)) + + | _ => (bug "[BuildAst.cnvExpression] \ + \Binary operator expected."; + wrapEXPR(Ast.Error, Ast.ErrorExpr)) + end + + fun processUnop(ty, expr, unop) = + let fun simpleUnOp(expop, s) = + let val newTy = usualUnaryCnv ty + in if perform_type_checking then + if isNumber newTy then () + else error ("Type Error: operand of " ^ s ^ " must be a number.") + else (); + mkUnopExp((ty, newTy), expr, expop) + end + fun logicalOp1 ty1 = (* Not *) + let val stdInt = signedNum Ast.INT + in if perform_type_checking then + if isNumberOrPointer ty1 + then (stdInt, stdInt) + else + (error + "Type Error: Unacceptable argument of logical operator."; + (ty1, signedNum Ast.INT)) + else (ty1, signedNum Ast.INT) + end + in + case unop of + PT.PostInc => mkUnaryAssignOpExp(plusOp(ty, stdInt), ty, expr, {preOp=false}, Ast.PostInc, PT.Plus) + | PT.PreInc => mkUnaryAssignOpExp(plusOp(ty, stdInt), ty, expr, {preOp=true}, Ast.PreInc, PT.Plus) + | PT.PostDec => mkUnaryAssignOpExp(minusOp(ty, stdInt), ty, expr, {preOp=false}, Ast.PostDec, PT.Minus) + | PT.PreDec => mkUnaryAssignOpExp(minusOp(ty, stdInt), ty, expr, {preOp=true}, Ast.PreDec, PT.Minus) + | PT.Uplus => simpleUnOp(Ast.Uplus, "unary op +") + | PT.Negate => simpleUnOp(Ast.Negate, "unary op +") + | PT.Not => mkUnopExp(logicalOp1 ty, expr, Ast.Not) + | PT.BitNot => simpleUnOp(Ast.BitNot, "unary op ~") + | _ => (bug "BuildAst.cnvExpression \ + \Unary operator expected"; + wrapEXPR(Ast.Error, Ast.ErrorExpr)) + end + + fun cnvExpr expr = (* returns (Ast.ctype * AST.CoreExpr) *) + (case expr + of PT.EmptyExpr => + (bug "cnvExpression: PT.EmptyExpr"; + wrapEXPR(Ast.Error, Ast.ErrorExpr)) + (* DBM: no more Ast.Empty_exp ??? *) + | PT.MARKexpression(loc, expr) => + (pushLoc loc; + cnvExpression expr + before popLoc ()) + | PT.IntConst i => + wrapEXPR(signedNum Ast.INT, Ast.IntConst i) + | PT.RealConst r => + wrapEXPR(signedNum Ast.DOUBLE, Ast.RealConst r) + | PT.String s => + let val t = if (!default_signed_char) + then signedNum Ast.CHAR + else unsignedNum Ast.CHAR + val ct = Ast.Pointer t + in wrapEXPR(ct,Ast.StringConst s) end + | PT.Id s => + (* should id of type function be immediately converted + * to pointer to function? *) + (case lookSym (Sym.object s) + of SOME(ID(id as {ctype=ty,...})) => + wrapEXPR(ty, Ast.Id id) + | SOME(MEMBER(member as {ctype=ty,kind,...})) => + (* could it be an enum constant? *) + (* note: an enum const is inserted as EnumConst, + * but is in same namespace as Object *) + (case kind + of Ast.ENUMmem i => + wrapEXPR(ty, Ast.EnumId(member,i)) + | Ast.STRUCTmem => + (error ("struct member used as id: " ^ s); + wrapEXPR(Ast.Error, Ast.ErrorExpr)) + | Ast.UNIONmem => + (error ("union member used as id: " ^ s); + wrapEXPR(Ast.Error, Ast.ErrorExpr))) + | NONE => (* implicit declaration *) + let val ty = signedNum Ast.INT + val sym = Sym.object s + val id = {name = sym, uid = Pid.new(), location = getLoc(), + ctype = ty, stClass = Ast.DEFAULT, status = Ast.IMPLICIT, + kind = Ast.NONFUN, global = topLevel()} + in bindSym(sym, B.ID(id(*,B.OBJ{final=false}*))); + (if undeclared_id_error then error else warn) + (s ^ " not declared"); + wrapEXPR(ty, Ast.Id id) + end + | SOME binding => + (bug ("cnvExpression: bad id binding for "^s) ; + debugPrBinding(s,binding); + wrapEXPR(Ast.Error, Ast.ErrorExpr))) + + | PT.Unop (PT.OperatorExt unop, expr) => + CNVUnop {unop=unop, argExpr=expr} + | PT.Unop (PT.SizeofType typeName, _) => + let val ty = cnvCtype (false,typeName) + in if storage_size_check then + if hasKnownStorageSize ty then () + else error "Cannot take sizeof an expression of unknown size." + else (); + if !reduce_sizeof then + let val ast = Ast.IntConst(sizeof ty) + in wrapEXPR(Ast.Numeric (Ast.NONSATURATE,Ast.WHOLENUM,Ast.UNSIGNED,Ast.INT,Ast.SIGNASSUMED), + ast) + end + else + wrapEXPR(Ast.Numeric (Ast.NONSATURATE,Ast.WHOLENUM,Ast.UNSIGNED,Ast.INT,Ast.SIGNASSUMED), + Ast.SizeOf ty) + end + | PT.Unop (expop, expr_parseTree) => + let val (ty, expr) = cnvExpression (expr_parseTree) + (* ASSERT: expr_parseTree cannot be PT.EmptyExpr *) + in case expop + of PT.Sizeof => + (let fun checkForFun(PT.Id s) = + (case lookSym (Sym.object s) + of SOME(B.ID{ctype=Ast.Function _,...}) => + error "Cannot take sizeof a function." + | _ => ()) + | checkForFun(PT.MARKexpression(loc, expr)) = checkForFun expr + | checkForFun _ = () + in + checkForFun expr_parseTree + end; + if storage_size_check then + if hasKnownStorageSize ty then () + else error + "Cannot take sizeof an expression of unknown size." + else (); + if !reduce_sizeof then + let val ast = Ast.IntConst(sizeof ty) + in + wrapEXPR(Ast.Numeric (Ast.NONSATURATE,Ast.WHOLENUM,Ast.UNSIGNED,Ast.INT,Ast.SIGNASSUMED), ast) + end + else + wrapEXPR(Ast.Numeric (Ast.NONSATURATE,Ast.WHOLENUM,Ast.UNSIGNED,Ast.INT,Ast.SIGNASSUMED), + Ast.SizeOf ty) + ) + | PT.AddrOf => + let val coreExpr = getCoreExpr expr + val ty = + if isLval(coreExpr, ty) then + case coreExpr of + Ast.Id {ctype=idCtype, stClass, ...} => + (if stClass = Ast.REGISTER + then error "Cannot take address of register variable." + else (); + if isFunction idCtype then ty (* ty already pointer to fn *) + else Ast.Pointer ty) + | _ => Ast.Pointer ty + else (error + "Cannot take address of non-lval expression."; + Ast.Pointer ty) + in + wrapEXPR(ty, Ast.AddrOf expr) + end + +(**** old code: delete in due course + let fun checkId(PT.Id s) = + (case getStorageClass (Sym.object s) + of SOME Ast.REGISTER => + error + "Cannot take address of register variable." + | _ => (); + if isFunction ty then + (case ty + of Ast.Pointer _ => wrapEXPR(ty, getCoreExpr expr) + | _ => wrapEXPR(Ast.Pointer ty, getCoreExpr expr)) + (* Bug fix from Satish: 2/4/99 + It should be just "ty" in place of "Pointer ty", because we convert + all function types to pointer types at the end of cnvExpr, by + calling cnvFunctionToPointer2Function. + Conservative coding: above deals with case when function may + *not* have pointer around it. + *) + else wrapEXPR(Ast.Pointer ty, Ast.AddrOf expr)) + | checkId(PT.MARKexpression(loc, expr)) = checkId expr + | checkId _ = wrapEXPR(Ast.Pointer ty, Ast.AddrOf expr) + in + checkId expr_parseTree + end + else + (error + "Cannot take address of non-lval expression."; + wrapEXPR(Ast.Pointer ty, Ast.AddrOf expr)) +end old code ******) + | PT.Star => wrapEXPR(deref ty, Ast.Deref expr) + (* Used to explicitly squash *f, but this is incorrect. + Note 1: this happens automatically for type. + If I have *f and f has type=pointer(function), + then deref ty give us type=function, + and then wrapEXPR gives us back pointer(function). + Note 2: the real semantic processing of what star + achieves operationally is defined in simplify. *) + | PT.OperatorExt unop => + (bug "Operator extension (unop case) should be dealt with at top level case"; + wrapEXPR(Ast.Error, Ast.ErrorExpr)) + + | _ => processUnop(ty, expr, expop) + end + + | PT.Binop (PT.OperatorExt binop, expr1, expr2) => + CNVBinop {binop=binop, + arg1Expr=expr1, + arg2Expr=expr2} + | PT.Binop (expop, expr1, expr2) => + let val (ty1, expr1') = cnvExpression (expr1) + in case expop + of PT.Dot => + let + val s = + let fun getId (PT.Id str) = str + | getId (PT.MARKexpression(loc, expr)) = getId expr + | getId _ = (error "Identifier expected - filling with missing_id"; + "") + in + getId expr2 + end + + val m as {ctype,...} = + (case isStructOrUnion ty1 + of SOME tid => + let val sym = Sym.member (tid, s) + in case lookSym sym + of SOME(MEMBER m) => m + | _ => + (if isPartial tid then + error + "Can't access fields in incomplete type." + else error ("Field " ^ s ^ " not found."); + (* get garbage pid to continue *) + bogusMember sym) + end + | NONE => + (error + ("Field " ^ s ^ + " not found; expression does not have structure \ + \or union type."); + (* get garbage pid to continue *) + bogusMember(Sym.member(bogusTid,"s")))) + in wrapEXPR(ctype, Ast.Member (expr1', m)) + end + | PT.Arrow => + let + val s = + let fun getId (PT.Id str) = str + | getId (PT.MARKexpression(loc, expr)) = getId expr + | getId _ = (error "Identifier expected - filling with missing_id"; + "") + in + getId expr2 + end + val tyDeref = deref ty1 + val m as ({ctype,...}: Ast.member) = + (case isStructOrUnion tyDeref + of SOME tid => + let val sym = Sym.member (tid, s) + in case lookSym sym + of SOME(B.MEMBER m) => m + | NONE => + (if isPartial tid then + error + "Can't access fields in incomplete type." + else error ("Field " ^ s ^ " not found."); + (* get garbage pid to continue *) + bogusMember sym) + | _ => (error (s^" is not a member"); + bogusMember sym) + end + | NONE => + (error + ("Field " ^ s ^ + " not found; expression does not have structure \ + \or union type."); + (* get garbage pid to continue *) + bogusMember(Sym.member(bogusTid,"s")))) + in wrapEXPR(ctype, Ast.Arrow (expr1', m)) + end + | PT.Sub => + let val (ty2, expr2') = cnvExpression (expr2) + val ty = + if isPointer ty1 then deref ty1 + else if isPointer ty2 then deref ty2 + else (error "Array/ptr expected."; + Ast.Error) + in wrapEXPR(ty, Ast.Sub (expr1', expr2')) + end + | PT.Comma => + let val (ty2, expr2') = cnvExpression (expr2) + in wrapEXPR(ty2, Ast.Comma (expr1', expr2')) + end + | PT.Assign => + let val (exprTy, expr2') = cnvExpression (expr2) + val _ = checkAssign {lhsTy=ty1, lhsExpr=getCoreExpr expr1', + rhsTy=exprTy, + rhsExprOpt=SOME(getCoreExpr expr2')} + val resultTy = getCoreType ty1 + val (expr2') = wrapCast (resultTy, expr2') + in wrapEXPR(resultTy, Ast.Assign (expr1', expr2')) + (* type of result is the unqualified type of the left + * operand: H&S p 221. *) + end + | _ => let val (ty2, expr2') = cnvExpression (expr2) + in processBinop (ty1, expr1', ty2, expr2', expop) + end + end + | PT.QuestionColon (expr1, expr2, expr3) => + let + val (exprTy, expr1') = cnvExpression (expr1) + val _ = + if perform_type_checking andalso not(isScalar exprTy) + then error + "Type Error: condition of question-colon statement is not scalar." + else () + val (ty2, expr2') = cnvExpression (expr2) + val (ty3, expr3') = cnvExpression (expr3) + val ty4 = (case conditionalExp {ty1=ty2,exp1Zero=isZeroExp expr2', + ty2=ty3,exp2Zero=isZeroExp expr3'} + of SOME ty => ty + | NONE => + (error + "Type Error: Unacceptable operands of question-colon."; + ty2)) + val (expr2') = wrapCast (ty4, expr2') + val (expr3') = wrapCast (ty4, expr3') + in + wrapEXPR(ty4, Ast.QuestionColon (expr1',expr2',expr3')) + end + | PT.Call (expr, exprs) => + let + val (funTy, expr', prototype) = + let fun checkId (PT.Id s) = + let val funId as ({ctype=funTy,...}: Ast.id) = + (case lookSym (Sym.func s) + of SOME(ID id) => id + | NONE => + (* if ANSI C then this should be an error... *) + let val ty = mkFunctionCt (signedNum Ast.INT,[]) + val varSym = Sym.object s + val id = {name = varSym, uid = Pid.new(), + location = getLoc(),status=Ast.IMPLICIT, + ctype = ty, stClass = Ast.EXTERN, + kind = Ast.FUNCTION{hasFunctionDef=false}, + global = true} (* is is a function, so it is global! *) + val binding = ID id + in (* force insertion of symbol at top level *) + bindSymGlobal(varSym, binding); + (if Config.TypeCheckControl.undeclared_fun_error + then error else warn) + ("function " ^ s ^ " not declared"); + id + end + | _ => (error (s^" is not a function"); + {name = Sym.func s, uid = Pid.new(), + location = SourceMap.UNKNOWN, + ctype = Ast.Error, global = topLevel(), + stClass = Ast.DEFAULT, status = Ast.IMPLICIT, + kind = Ast.FUNCTION{hasFunctionDef=false}})) + val adorn = bindAid funTy + in (funTy, Ast.EXPR (Ast.Id funId, adorn, getLoc()), + isFunctionPrototype funTy) + end + | checkId(PT.MARKexpression(loc, expr)) = + (pushLoc loc; + checkId expr + before popLoc ()) + | checkId _ = + let val (funTy, expr) = cnvExpression expr + val prototype = isFunctionPrototype funTy + in (funTy, expr, prototype) + end + in + checkId expr + end + + val tyExprList = List.map cnvExpression exprs + val (argTys, exprs) = ListPair.unzip tyExprList + + fun cnvArgs (expr :: exprs, ty :: tys) = + let val (expr) = wrapCast (ty, expr) + val (exprs) = cnvArgs (exprs, tys) + in expr :: exprs + end + | cnvArgs (nil, nil) = nil + | cnvArgs _ = + (bug "type list and expression list must be same size"; + nil) + + val (retTy, exprs) = + if perform_type_checking + then if prototype + then let val (retTy, cnvArgTys) = + checkFn (funTy, argTys, exprs) + val (exprs) = cnvArgs (exprs, cnvArgTys) + in (retTy, exprs) + end + else let val cnvArgTys = List.map (functionArgConv) argTys + val retTy = + case getFunction funTy + of SOME(retTy,_) => retTy + | NONE => + (error + "Called object is not a function."; + Ast.Error) + val (exprs) = cnvArgs (exprs, cnvArgTys) + in (retTy, exprs) + end + else let val retTy = case getFunction funTy + of SOME(retTy,_) => retTy + | NONE => Ast.Void + in (retTy, exprs) + end + in + wrapEXPR(retTy, Ast.Call(expr', exprs)) + end + | PT.Cast (ct, expr) => (* TODO: should check consistency of cast *) + let val ty = cnvCtype (false, ct) + val (_, expr') = cnvExpression expr + in wrapEXPR(ty, Ast.Cast (ty, expr')) + end + | PT.InitList exprs => + let fun process e = #2(cnvExpression e) + val exprs = List.map process exprs + in (* PT.InitList should only occur within declarators as + * an aggregate initializer. It is handled in processDecr. *) + bug "cnvExpression: unexpected InitList"; + wrapEXPR(Ast.Error, Ast.ErrorExpr) + end + + | PT.ExprExt expr => CNVExp expr + ) + in cnvExpr expr + end + + (* -------------------------------------------------------------------- + * cnvType : bool * PT.ctype -> Ast.ctype + * + * Converts a parse-tree type into an ast type, adding new type and + * symbol (e.g. enumerated values and field identifiers) into the + * environment. + * + * The boolean first argument is a flag indicating if this type is a + * `shadow' - that is a struct/enum/union tag type used to refer + * to a future struct/union/enum declaration rather than one defined in + * an outer scope. + * + * Named types (i.e. structs/unions/enums/typedefs) are represented by + * indexes into the named-type table. That table maps these indexes to + * the actual struct/union/enum/typedef. This allows for for such a + * type to be resolved without having to do multiple enquiries into the + * symbol-table stack. By convention, an explicitly tagged type will be + * stored redundantly in the symbol table: once as its explicit tag and + * once as a manufactured one corresponding to the unique named type id + * generated by Tidtab.new. + * -------------------------------------------------------------------- *) + + and cnvCtype (isShadow: bool, ty: PT.ctype) : Ast.ctype = + let fun cnvSpecifier specifiers = + let val signed = ref (NONE : Ast.signedness option) + val frac = ref (NONE : Ast.fractionality option) + val sat = ref (NONE : Ast.saturatedness option) + val kind = ref (NONE : Ast.intKind option) + fun cnvSpecList (spec :: specL) = + (case spec + of PT.Signed => + (case !kind + of SOME (Ast.FLOAT | Ast.DOUBLE | Ast.LONGDOUBLE) => + error "illegal combination of signed with float/double/long double" + | _ => (); + case !signed + of NONE => (signed := SOME Ast.SIGNED) + | SOME _ => error "Multiple signed/unsigned") + | PT.Unsigned => + (case !kind + of SOME (Ast.FLOAT | Ast.DOUBLE | Ast.LONGDOUBLE) => + error "illegal combination of unsigned with float/double/long double" + | _ => (); + case !signed + of NONE => (signed := SOME Ast.UNSIGNED) + | SOME _ => error "Multiple signed/unsigned") + | PT.Char => + (case !kind + of NONE => (kind := SOME Ast.CHAR) + | SOME ct => + error (case ct + of Ast.CHAR => "duplicate char specifier" + | _ => "illegal use of char specifier")) + | PT.Short => + (case !kind + of (NONE | SOME Ast.INT) => (kind := SOME Ast.SHORT) + | SOME ct => + error (case ct + of Ast.SHORT => "duplicate short specifier" + | _ => "illegal use of short specifier")) + | PT.Int => + (case !kind + of NONE => (kind := SOME Ast.INT) + | SOME (Ast.SHORT | Ast.LONG | Ast.LONGLONG) => () + | SOME ct => + error (case ct + of Ast.INT => "duplicate int specifier" + | _ => "illegal use of int specifier")) + | PT.Long => + (case !kind + of NONE => (kind := SOME Ast.LONG) + | SOME Ast.LONG => (kind := SOME Ast.LONGLONG) + | SOME Ast.INT => (kind := SOME Ast.LONG) + | SOME ct => + error (case ct + of Ast.LONGLONG => "triplicate long specifier" + | _ => "illegal use of long specifier")) + | PT.Float => + (case !signed + of NONE => () + | SOME _ => error "illegal combination of signed/unsigned with float"; + case !kind + of NONE => (kind := SOME Ast.FLOAT) + | SOME ct => + error (case ct + of Ast.FLOAT => "duplicate float specifier" + | _ => "illegal use of float specifier")) + | PT.Double => + (case !signed + of NONE => () + | SOME _ => error "illegal combination of signed/unsigned with double"; + case !kind + of NONE => (kind := SOME Ast.DOUBLE) + | SOME Ast.LONG => (kind := SOME Ast.LONGDOUBLE) + | SOME ct => + error (case ct + of Ast.DOUBLE => "duplicate double specifier" + | _ => "illegal use of double specifier")) + | PT.Fractional => + (case !frac + of NONE => (frac := SOME Ast.FRACTIONAL) + | SOME _ => error "Multiple fractional or wholenum") + | PT.Wholenum => + (case !frac + of NONE => (frac := SOME Ast.WHOLENUM) + | SOME _ => error "Multiple fractional or wholenum") + | PT.Saturate => + (case !sat + of NONE => (sat := SOME Ast.SATURATE) + | SOME _ => error "Multiple saturate or nonsaturate") + | PT.Nonsaturate => + (case !sat + of NONE => (sat := SOME Ast.NONSATURATE) + | SOME _ => error "Multiple saturate or nonsaturate") + | _ => error("Illegal combination of type specifiers."); + cnvSpecList specL) + | cnvSpecList [] = + let val numKind = case !kind + of NONE => Ast.INT + | SOME numKind => numKind + val frac = case !frac + of NONE => Ast.WHOLENUM + | SOME frac => frac + val (sign,decl) + = case (!signed, numKind) + of (NONE, Ast.CHAR) => + if (!default_signed_char) + then (Ast.SIGNED, Ast.SIGNASSUMED) + else (Ast.UNSIGNED, Ast.SIGNASSUMED) + (* according to H&S p115, + * char can be signed or unsigned *) + | (NONE, _) => (Ast.SIGNED, Ast.SIGNASSUMED) + | (SOME sign, _) => (sign, Ast.SIGNDECLARED) + val sat = case !sat + of NONE => Ast.NONSATURATE + | SOME sat => sat + in + Ast.Numeric (sat,frac,sign,numKind,decl) + end + fun noMore [] _ = () + | noMore _ err = error (err ^ " cannot be combined with a specifier.") + in case specifiers + (* singleton cases: these should appear solo *) + of PT.Void :: l => (noMore l "Void"; Ast.Void) + | PT.Ellipses :: l => (noMore l "Ellipse"; Ast.Ellipses) + | (PT.Array (expr, ty)) :: l => + let val _ = noMore l "Array" + val opt = case expr of + PT.EmptyExpr => NONE + | _ => (case evalExpr expr of (* cannot be EmptyExpr *) + (SOME i, _, expr', _) => + (if i=0 then warn "Array has zero size." else (); + SOME(i, expr')) + | (NONE, _, expr', _) => (error "Array size must be constant expression."; + SOME(0, expr'))) + val ty' = cnvCtype (false, ty) + in Ast.Array (opt, ty') + end + | (PT.Pointer ty) :: l => + let val _ = noMore l "Pointer" + val ty' = cnvCtype (false, ty) + in Ast.Pointer ty' + end + | (PT.Function {retType,params}) :: l => + let val _ = noMore l "Function" + val retTy = cnvCtype (false, retType) + fun process (dt, decl) = + let + (*dpo: ignore storage class in translating type *) + val (dty, argIdOpt, loc) = + processDeclarator (dt, decl) + val (ty, sc) = cnvType (false, dty) + fun mkId n = { name = Sym.object n, + uid = Pid.new (), + location = loc, + ctype = ty, + stClass = sc, + status = Ast.DECLARED, + kind = Ast.NONFUN, + global = false } + + in + (ty, Option.map mkId argIdOpt) + end + val argTys = List.map process params + in mkFunctionCt (retTy, argTys) + end + + (* ------------- Enumerated Types ---------------- + * If enum tag is explicitly mentioned: + * if partially defined then use that named type + * identifier; + * otherwise, if it has never been mentioned or if + * it has been mentioned for a completely defined + * type (so that this definition is new for an + * inner scope) then create a new named type id + * and store a reference to it in the current + * symbol table. + * Otherwise, this is an `anonynmous' enum type: create a + * new named type id and store a reference to it in the + * current symbol table. + *) + + | (PT.Enum {tagOpt,enumerators,trailingComma}) :: l => + let + val _ = noMore l "Enum" + (* check for trailing comma warning/error *) + val _ = + if trailingComma then + if #error Config.ParseControl.trailingCommaInEnum + then error "trailing comma in enum declaration" + else if #warning Config.ParseControl.trailingCommaInEnum + then warn "trailing comma in enum declaration" + else () + else () + val (tid, alreadyDefined) = + (* alreadyDefined for multi-file analysis mode *) + (case tagOpt + of SOME tagname => + let + val sym = Sym.tag tagname + val tidFlagOpt = + (case lookLocalScope sym + of SOME(TAG{ctype=ty,location=loc',...}) => + (case ty + of Ast.EnumRef tid => + if isPartial tid + then SOME{tid=tid, alreadyDefined=false} + else if repeated_declarations_ok + then SOME{tid=tid, alreadyDefined=true} + else + (error + ("Redeclaration of enum tag `" ^ + tagname ^ + "'; previous declaration at " ^ + SM.locToString loc'); + NONE) + | _ => + (error + ("Redeclaration of enum tag `" ^ + tagname ^ + "'; previous declaration was not an " ^ + "enum tag and appeared at " ^ + SM.locToString loc'); + NONE)) + | NONE => NONE + | _ => (error (tagname^ " is not an enum tag"); + NONE)) + in case tidFlagOpt + of SOME{tid, alreadyDefined} => + (tid, alreadyDefined) + | NONE => + let val tid = Tid.new () + val ty = Ast.EnumRef tid + in bindSym(sym, TAG{name=sym,uid=Pid.new(), + location=getLoc(), ctype=ty}); + bindTid (tid, {name=tagOpt, ntype=NONE, + global=topLevel(), location=getLoc()}); + (tid, false) + end + end + | NONE => + let val (tid,alreadyDefined) = + if !multi_file_mode andalso (topLevel ()) then + (* in multi_file_mode, give identical top-level + * enums the same tid *) + case AnonymousStructs.findAnonStructEnum ty + of SOME tid => (tid,true) + | NONE => + let val tid = Tid.new () + in AnonymousStructs.addAnonTid(ty, tid); + (tid,false) + end + else + let val tid = Tid.new () + (* in standard mode, allocate new tid *) + in (tid, false) + end + in if alreadyDefined then () + else bindTid (tid, {name=tagOpt, ntype=NONE, + global=topLevel(), location=getLoc()}); + (tid, alreadyDefined) + end) + + (* add each enum value into symbol table (and evaluate it); + prevVal passes the enum value from one enum entry to the next + so that + enum {e1,e2,e3=4,e4}; + gives + enum {e1=0,e2=1,e3=4,e4=5}; + *) + fun process prevVal nil = nil + | process prevVal ((name,e) :: l) = + let val constValOpt = + case e of + PT.EmptyExpr => NONE + | _ => (case evalExpr e of + (SOME i, _, _, sizeofFl) => + (if sizeofFl andalso not(!reduce_sizeof) + then warn("sizeof in enum value " ^ + "not preserved in source-to-source mode.") + else (); + SOME i) + | (NONE, _, _, _) => + (error "Enum value must be constant expression."; + NONE)) + val constVal = + case constValOpt + of SOME n => n + | NONE => prevVal + 1 + val sym = Sym.enumConst name + val ty = Ast.EnumRef tid + val _ = checkNonIdRebinding(sym, ty, "enum constant ") + + val member = {name = sym, uid = Pid.new(), + location = getLoc(), ctype=ty, + kind = Ast.ENUMmem constVal} + val binding = B.MEMBER member + + val _ = bindSym (sym, binding) + in + (member, constVal) :: (process constVal l) + end + in if alreadyDefined then () + else + let val idIntList = process (LargeInt.fromInt ~1) enumerators + val namedTy = B.Enum (tid,idIntList) + in bindTid (tid, {name=tagOpt, ntype=SOME namedTy, + global=topLevel(), location=getLoc()}); + pushTids tid + end; + Ast.EnumRef tid + end + + + (* ------------- Structs and Unions ---------------- + * Very similar to rules for converting enums. *) + | (PT.Struct {isStruct, tagOpt, members}) :: l => + let val _ = noMore l "Struct" + val (tid, alreadyDefined) = + (case tagOpt + of SOME tagname => + let + val sym = Sym.tag tagname + val tidFlagOpt = + (case lookLocalScope sym + of SOME(TAG{ctype=ty,location=loc',...}) => + (case ty + of (Ast.UnionRef tid | Ast.StructRef tid) => + if isPartial tid + then SOME{tid=tid, alreadyDefined=false} + else if repeated_declarations_ok + then SOME{tid=tid, alreadyDefined=true} + else (error("Redeclaration of type tag `" + ^ tagname + ^ "'; previous declaration at " + ^ SM.locToString loc'); + NONE) + | _ => + (error("Redeclaration of type tag `" + ^ tagname ^ + "'; previous declaration was not a " + ^ "type tag and appeared at " + ^ SM.locToString loc'); + NONE)) + | NONE => NONE + | _ => (bug "cnvExpression: tag symbol 2"; NONE)) + in case tidFlagOpt + of SOME{tid, alreadyDefined} => + (tid, alreadyDefined) + | NONE => (* create a partial tid *) + let val tid = Tid.new () + val ty = if isStruct then Ast.StructRef tid + else Ast.UnionRef tid + in bindSym(sym, TAG{name=sym,uid=Pid.new(), + location=getLoc(), + ctype=ty}); + bindTid(tid, {name=NONE, ntype=NONE, + global=topLevel(), location=getLoc()}); + (tid, false) + end + end + | NONE => + let + val (tid,alreadyDefined) = + if !multi_file_mode andalso (topLevel ()) then + (* in multi_file_mode, give identical top-level + * structs the same tid + *) + case AnonymousStructs.findAnonStructEnum ty + of SOME tid => (tid, true) + | NONE => + let val tid = Tid.new () + in AnonymousStructs.addAnonTid(ty, tid); + (tid, false) + end + else + let val tid = Tid.new () + in (tid,false) + end + in if alreadyDefined then () + else bindTid (tid, {name=NONE, ntype=NONE, + global=topLevel(), location=getLoc()}); + (tid, alreadyDefined) + end) + + (* add members to symbol table, evaluate bit fields + * when present *) + fun process1 (ct, declExprs) = + let + val ty = cnvCtype (false, ct) + fun process2 (decr,expr) + : Ast.ctype * Ast.member option * LargeInt.int option = + let + val (ty', memNameOpt, loc) = mungeTyDecr (ty, decr) + val sizeOpt = + case expr of + PT.EmptyExpr => NONE + (* nch: fix: check bitfield types -- see checks in sizeof *) + | _ => (case evalExpr expr of + (SOME i, _, _, false) => SOME i + | (SOME i, _, _, true) => + (if !reduce_sizeof then () + else warn ("sizeof in bitfield specification " ^ + "not preserved in source-to-source mode"); + SOME i) + | (NONE, _, _, _) => + (error "Bitfield size must be constant expression"; + NONE)) + val memberOpt : Ast.member option = + (case memNameOpt + of SOME id' => + let val sym = Sym.member (tid,id') + val _ = + checkNonIdRebinding(sym, ty', + "struct/union member "); + val _ = + if isPartialTy ty' + then error("Member `" ^ id' + ^ "' has incomplete type.") + else (); + val _ = + if isNonPointerFunction ty' + then error("Member `" ^ id' + ^ "' has function type.") + else (); + val member = {name = sym, + uid = Pid.new(), + location = loc, + ctype = ty', + kind = if isStruct + then Ast.STRUCTmem + else Ast.UNIONmem} + in bindSym(sym,MEMBER member); + SOME member + (* DBM: FIELDs? *) + end + | NONE => NONE) + in (ty', memberOpt, sizeOpt) + end (* fun process2 *) + in map process2 declExprs + end (* fun process1 *) + + (* union members are more restricted than struct members *) + fun checkUnionMember (ty: Ast.ctype, NONE: Ast.member option, + _ : LargeInt.int option) = + (error "union member has no name"; + (ty,bogusMember(Sym.member(tid,"")))) + | checkUnionMember (ty,SOME m,SOME _) = + (error "union member has size spec"; + (ty,m)) + | checkUnionMember (ty,SOME m,NONE) = (ty,m) + + in if alreadyDefined then () + else + let val members = List.map process1 members + val members = List.concat members + val namedTy = + if isStruct then B.Struct(tid, members) + else B.Union(tid, map checkUnionMember members) + val binding : B.tidBinding = + {name = tagOpt, ntype = SOME namedTy, + global = topLevel(), location = getLoc()} + in bindTid (tid, binding); + pushTids tid + end; + (if isStruct then Ast.StructRef else Ast.UnionRef) tid + end + + | (PT.TypedefName s) :: l => + (* type symbol is added at the point of declaration: see + * cnvExternalDecl (case ExternalDecl(TypeDecl) and cnvStatement (case + * Decl(TypeDecl) *) + (noMore l "Typedef"; + case lookSym (Sym.typedef s) + of SOME(TYPEDEF{ctype,...}) => ctype + | _ => (error("typedef " ^ s ^ " has not been defined."); + Ast.Error)) + + | (PT.StructTag {isStruct,name=s}) :: l => + let val _ = noMore l "Struct" + val sym = Sym.tag s + val tyOpt = + case lookSym sym + of SOME(TAG{ctype,...}) => SOME ctype + | NONE => NONE + | _ => (bug "cnvExpression: bad tag 3"; NONE) + in if not (isSome tyOpt) orelse + (isShadow andalso not (isLocalScope sym)) then + let val tid = Tid.new () + val ty = (if isStruct then Ast.StructRef else Ast.UnionRef) tid + in bindSym(sym, TAG{name=sym,uid=Pid.new(), + location=getLoc(), ctype=ty}); + bindTid (tid, {name=SOME s, ntype=NONE, + global=topLevel(), location=getLoc()}); + ty + end + else valOf tyOpt (* guaranteed to be SOME *) + end + + | (PT.EnumTag s) :: l => (* nearly idenitical to struct tag case *) + let val _ = noMore l "Enum" + val sym = Sym.tag s + val tyOpt = + case lookSym sym + of SOME(TAG{ctype,...}) => SOME ctype + | NONE => (if TypeCheckControl.partial_enum_error + then error("incomplete enum " ^ s) + else (); + NONE) + | _ => (bug "cnvExpression: bad tag 3"; NONE) + in if not (isSome tyOpt) orelse + (isShadow andalso not (isLocalScope sym)) then + (* if this is explicitly a shadow or a enum tag not seen + * before then create a new named type identifier and + * record that this type is partially (incompletely) + * defined *) + let val tid = Tid.new () + val ty = Ast.EnumRef tid + in bindSym(sym, TAG{name=sym,uid=Pid.new(), + location=getLoc(), ctype=ty}); + bindTid (tid, {name=SOME s, ntype=NONE, + global=topLevel(), location=getLoc()}); + ty + end + (* otherwise return the type already established in + * environment *) + else valOf tyOpt + end + + | (PT.SpecExt xspec) :: rest => + CNVSpecifier {isShadow=isShadow, rest=rest} xspec + | l => cnvSpecList l + end + + val {qualifiers, specifiers} = ty + in cnvQualifiers (cnvSpecifier specifiers) qualifiers + end + + and cnvType (isShadow: bool, {storage,qualifiers,specifiers}: PT.decltype) + : Ast.ctype * Ast.storageClass = + let val sc = cnvStorage storage + val ct = cnvCtype (isShadow,{qualifiers=qualifiers,specifiers=specifiers}) + in (ct,sc) + end + + and cnvQualifiers ty [] = ty + | cnvQualifiers ty [PT.CONST] = Ast.Qual (Ast.CONST, ty) + | cnvQualifiers ty [PT.VOLATILE] = Ast.Qual (Ast.VOLATILE, ty) + | cnvQualifiers ty (PT.VOLATILE :: PT.VOLATILE :: _) = + (error "Duplicate `volatile'."; ty) + | cnvQualifiers ty (PT.CONST :: PT.CONST :: _) = + (error "Duplicate 'const'."; ty) + | cnvQualifiers ty (_ :: _ :: _ :: _) = + (error "too many 'const/volatile' qualifiers."; ty) + (* See: ISO-C Standard, p. 64 for meaning of const volatile. *) + | cnvQualifiers ty (_ :: _ :: nil) = ty + + + + (* -------------------------------------------------------------------- + * cnvStorage : PT.storage list -> Ast.storageClass option + * + * Converts a parse-tree storage class into an ast storage class. The + * only subtlety is the case where no parse-tree storage class has been + * given in which case the default (supplied by second argument) ast + * storage class is used. + * + * For rules for storage classes, see K&R A8.1 + * -------------------------------------------------------------------- *) + + and cnvStorage [] = Ast.DEFAULT + | cnvStorage [PT.STATIC] = Ast.STATIC + | cnvStorage [PT.EXTERN] = Ast.EXTERN + | cnvStorage [PT.REGISTER] = Ast.REGISTER + | cnvStorage [PT.AUTO] = Ast.AUTO + | cnvStorage [PT.TYPEDEF] = + (error "illegal use of TYPEDEF"; + Ast.DEFAULT) + | cnvStorage _ = + (error "Declarations can contain at most one storage class\ + \ (static, extern, register, auto)."; + Ast.DEFAULT) + + (* -------------------------------------------------------------------- + * evalExpr : ParseTree expr -> int option + * + * Converts parse-tree expressions to integer constants where possible; + * NONE used for cases where no constant can be computed or when no + * expression is given. A new environment is returned because it is + * possible to embed definitions of struct/union/enum types within + * sizeofs and casts. + * -------------------------------------------------------------------- *) + + and evalExpr e = (* evalExpr should not be called with PT.EmptyExpr *) + let + val encounteredSizeof = ref false + val (eTy, e') = cnvExpression (e) + fun evalAstExpr (Ast.EXPR (coreExpr,adorn, _)) = + case coreExpr + of Ast.IntConst i => SOME i + | Ast.Unop (unop, e) => evalUnaryOp (unop, e) + | Ast.Binop (binop, e, e') => evalBinaryOp (binop, e, e') + | Ast.QuestionColon (e0,e1,e2) => + (case evalAstExpr e0 + of SOME 0 => evalAstExpr e2 + | SOME _ => evalAstExpr e1 + | NONE => NONE) + | Ast.Cast (ct,e) => + let val eTy = lookAid adorn + in if compatible (ct, eTy) then () + else warn "evalExpr: cast not handled yet"; + evalAstExpr e + end + | Ast.EnumId (_, i) => SOME i + | Ast.SizeOf ct => (encounteredSizeof := true; + SOME(sizeof ct)) + | _ => NONE + + and evalBinaryOp (binop, e, e') = + let val opt = evalAstExpr e + val opt' = evalAstExpr e' + in + if isSome opt andalso isSome opt' then + let val i = valOf opt + val i' = valOf opt' + in case binop + of Ast.Plus => SOME (i + i') + | Ast.Minus => SOME (i - i') + | Ast.Times => SOME (i * i') + | Ast.Divide => SOME (LargeInt.quot (i,i')) + | Ast.Mod => SOME (LargeInt.rem (i,i')) + | Ast.Gt => SOME (if i > i' then 1 else 0) + | Ast.Lt => SOME (if i < i' then 1 else 0) + | Ast.Gte => SOME (if i >= i' then 1 else 0) + | Ast.Lte => SOME (if i <= i' then 1 else 0) + | Ast.Eq => SOME (if i = i' then 1 else 0) + | Ast.Neq => SOME (if i <> i' then 1 else 0) + | Ast.And => SOME (if i<>0 andalso i'<>0 then 1 else 0) + | Ast.Or => SOME (if i<>0 orelse i'<>0 then 1 else 0) + | Ast.BitOr => + SOME (W.toLargeInt (W.orb (W.fromLargeInt i, W.fromLargeInt i'))) + | Ast.BitXor => + SOME (W.toLargeInt (W.xorb (W.fromLargeInt i, W.fromLargeInt i'))) + | Ast.BitAnd => + SOME (W.toLargeInt (W.andb (W.fromLargeInt i, W.fromLargeInt i'))) + | Ast.Lshift => + SOME (W.toLargeInt (W.<< (W.fromLargeInt i, W.fromLargeInt i'))) + | Ast.Rshift => + SOME (W.toLargeInt (W.>> (W.fromLargeInt i, W.fromLargeInt i'))) + | _ => NONE + end + else + NONE + end + + and evalUnaryOp (unop, e) = + let + val opt = evalAstExpr e + in + if isSome opt then + let + val i = valOf opt + in case unop + of Ast.Negate => SOME (~i) + | Ast.Not => SOME (if i = 0 then 1 else 0) + | Ast.Uplus => SOME i + | Ast.BitNot => SOME (W.toLargeInt (W.notb (W.fromLargeInt i))) + | _ => NONE + end + else NONE + end + in (evalAstExpr e', eTy, e', !encounteredSizeof) + end + + (* -------------------------------------------------------------------- + * makeAst' : ParseTree.external_decl list * Error.errorState -> Ast.ast + * + * Converts a parse tree into an ast, by recursively converting + * each delcaration in the list. + * -------------------------------------------------------------------- *) + + (* initializing extension conversion functions *) + + val _ = + let val coreFuns = {stateFuns=stateFuns, + mungeTyDecr=(fn (ty, decr) => + let val (ctype, name, _) = + mungeTyDecr(ty,decr) + in (ctype, name) end), + (* since we added location in the output of mungeTyDecr and + * we don't want to change the extension interface *) + cnvType=cnvType, + cnvExpression=cnvExpression, + cnvStatement=cnvStatement, + cnvExternalDecl=cnvExternalDecl, + wrapEXPR=wrapEXPR, + wrapSTMT=wrapSTMT, + wrapDECL=wrapDECL} + val {CNVExp, CNVStat, CNVBinop, CNVUnop, CNVExternalDecl, + CNVSpecifier, CNVDeclarator, CNVDeclaration} = CnvExt.makeExtensionFuns coreFuns + in + refCNVExp := CNVExp; + refCNVStat := CNVStat; + refCNVBinop := CNVBinop; + refCNVUnop := CNVUnop; + refCNVExternalDecl := CNVExternalDecl; + refCNVSpecifier := CNVSpecifier; + refCNVDeclarator := CNVDeclarator; + refCNVDeclaration := CNVDeclaration + end + + fun makeAst' extDecls = + let val _ = if !multi_file_mode then print "Warning: multi_file_mode on\n" + else () + val _ = Sizeof.reset() + (* this is the top-level call for this structure; + * must reset sizeof memo table *) + val astExtDecls = + let fun process x = + let val astExtDecl = cnvExternalDecl x + val newtids = resetTids () + in (List.map (fn x => wrapDECL(Ast.ExternalDecl(Ast.TypeDecl{shadow=NONE, tid=x}))) + newtids) + @ astExtDecl + end + in List.map process extDecls + end + val astExtDecls = List.concat astExtDecls + val errorCount = Error.errorCount errorState + val warningCount = Error.warningCount errorState + in + {ast=astExtDecls, tidtab=ttab, errorCount=errorCount, warningCount=warningCount, + auxiliaryInfo = {aidtab=atab, implicits=implicits, env=getGlobalEnv()}} + (* DBM: will we want to reuse errorState? *) + end (* fun makeAst' *) + + in + makeAst' +end (* fun makeAst *) + +end (* local open Bindings *) + +end (* structure BuildAst *) diff --git a/ckit/src/ast/cnv-ext-sig.sml b/ckit/src/ast/cnv-ext-sig.sml new file mode 100644 index 0000000..c38c37c --- /dev/null +++ b/ckit/src/ast/cnv-ext-sig.sml @@ -0,0 +1,60 @@ +signature CNVEXT = +sig + + type coreConversionFuns = + { + stateFuns : State.stateFuns, + mungeTyDecr: (Ast.ctype*ParseTree.declarator ->Ast.ctype * string option), + + cnvType : bool*ParseTree.decltype -> Ast.ctype*Ast.storageClass, + cnvExpression: ParseTree.expression -> Ast.ctype * Ast.expression, + cnvStatement : ParseTree.statement -> Ast.statement, + cnvExternalDecl: ParseTree.externalDecl -> Ast.externalDecl list, + + wrapEXPR: (Ast.ctype*Ast.coreExpression -> Ast.ctype*Ast.expression), + wrapSTMT: Ast.coreStatement -> Ast.statement, + wrapDECL: Ast.coreExternalDecl -> Ast.externalDecl + } + + type expressionExt = (ParseTree.specifier, ParseTree.declarator, ParseTree.ctype, ParseTree.decltype, + ParseTree.operator, ParseTree.expression, ParseTree.statement) + ParseTreeExt.expressionExt + + type statementExt = (ParseTree.specifier, ParseTree.declarator, ParseTree.ctype, ParseTree.decltype, + ParseTree.operator, ParseTree.expression, ParseTree.statement) + ParseTreeExt.statementExt + + type externalDeclExt = (ParseTree.specifier, ParseTree.declarator, ParseTree.ctype, ParseTree.decltype, + ParseTree.operator, ParseTree.expression, ParseTree.statement) + ParseTreeExt.externalDeclExt + + type specifierExt = (ParseTree.specifier, ParseTree.declarator, ParseTree.ctype, ParseTree.decltype, + ParseTree.operator, ParseTree.expression, ParseTree.statement) + ParseTreeExt.specifierExt + + type declaratorExt = (ParseTree.specifier, ParseTree.declarator, ParseTree.ctype, ParseTree.decltype, + ParseTree.operator, ParseTree.expression, ParseTree.statement) + ParseTreeExt.declaratorExt + + type declarationExt = (ParseTree.specifier, ParseTree.declarator, ParseTree.ctype, ParseTree.decltype, + ParseTree.operator, ParseTree.expression, ParseTree.statement) + ParseTreeExt.declarationExt + + type extensionFuns = + {CNVExp: expressionExt -> Ast.ctype * Ast.expression, + CNVStat: statementExt -> Ast.statement, + CNVBinop: {binop: ParseTreeExt.operatorExt, arg1Expr: ParseTree.expression, arg2Expr: ParseTree.expression} + -> Ast.ctype * Ast.expression, + CNVUnop: {unop: ParseTreeExt.operatorExt, argExpr: ParseTree.expression} + -> Ast.ctype * Ast.expression, + CNVExternalDecl: externalDeclExt -> Ast.externalDecl list, + CNVSpecifier: {isShadow: bool, rest : ParseTree.specifier list} + -> specifierExt + -> Ast.ctype, + CNVDeclarator: Ast.ctype * declaratorExt + -> Ast.ctype * string option, + CNVDeclaration: declarationExt -> Ast.declaration list} + + val makeExtensionFuns: coreConversionFuns -> extensionFuns + +end (* signature CNVEXTENSION *) diff --git a/ckit/src/ast/ctype-eq.sml b/ckit/src/ast/ctype-eq.sml new file mode 100644 index 0000000..52e1ded --- /dev/null +++ b/ckit/src/ast/ctype-eq.sml @@ -0,0 +1,59 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +(* equality for ctype datatype (defined in ast.sml) *) +structure CTypeEq = +struct + open Ast + fun eqStorageClass(AUTO, AUTO) = true + | eqStorageClass(EXTERN, EXTERN) = true + | eqStorageClass(REGISTER, REGISTER) = true + | eqStorageClass(STATIC, STATIC) = true + | eqStorageClass(DEFAULT, DEFAULT) = true + | eqStorageClass _ = false + + fun eqQualifier(CONST, CONST) = true + | eqQualifier(VOLATILE, VOLATILE) = true + | eqQualifier _ = false + + fun eqSignedness(SIGNED, SIGNED) = true + | eqSignedness(UNSIGNED, UNSIGNED) = true + | eqSignedness _ = false + + fun eqIntKind(CHAR, CHAR) = true + | eqIntKind(SHORT, SHORT) = true + | eqIntKind(INT, INT) = true + | eqIntKind(LONG, LONG) = true + | eqIntKind(LONGLONG, LONGLONG) = true + | eqIntKind(FLOAT, FLOAT) = true + | eqIntKind(DOUBLE, DOUBLE) = true + | eqIntKind(LONGDOUBLE, LONGDOUBLE) = true + | eqIntKind _ = false + + fun eqFractionality(FRACTIONAL, FRACTIONAL) = true + | eqFractionality(WHOLENUM, WHOLENUM) = true + | eqFractionality _ = false + + fun eqSaturatedness(SATURATE, SATURATE) = true + | eqSaturatedness(NONSATURATE, NONSATURATE) = true + | eqSaturatedness _ = false + + fun eqCType(Void, Void) = true + | eqCType(Ellipses, Ellipses) = true + | eqCType(Qual(q1, ct1), Qual(q2, ct2)) = eqQualifier(q1, q2) andalso eqCType(ct1, ct2) + | eqCType(Numeric x1, Numeric x2) = (x1 = x2) + | eqCType(Array(SOME(i1, _), ct1), Array(SOME(i2, _), ct2)) = (i1=i2) andalso eqCType(ct1, ct2) + | eqCType(Pointer ct1, Pointer ct2) = eqCType(ct1, ct2) + | eqCType(Function(ct1, ctl1), Function(ct2, ctl2)) = eqCType(ct1, ct2) andalso eqCTypeList(ctl1, ctl2) + | eqCType(StructRef tid1, StructRef tid2) = (tid1 = tid2) + | eqCType(UnionRef tid1, UnionRef tid2) = (tid1 = tid2) + | eqCType(EnumRef tid1, EnumRef tid2) = (tid1 = tid2) + | eqCType(TypeRef tid1, TypeRef tid2) = (tid1 = tid2) + | eqCType(Error, Error) = true + | eqCType _ = false + + and eqCTypeList((ct1, _) :: ctl1, (ct2, _) :: ctl2) = + eqCType(ct1, ct2) andalso eqCTypeList(ctl1, ctl2) + | eqCTypeList(nil, nil) = true + | eqCTypeList _ = false + +end \ No newline at end of file diff --git a/ckit/src/ast/extensions/c/.cm/GUID/ast-ext.sml b/ckit/src/ast/extensions/c/.cm/GUID/ast-ext.sml new file mode 100644 index 0000000..dc351e5 --- /dev/null +++ b/ckit/src/ast/extensions/c/.cm/GUID/ast-ext.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):extensions/c/ast-ext.sml-1714016103.734 diff --git a/ckit/src/ast/extensions/c/.cm/GUID/cnv-ext.sml b/ckit/src/ast/extensions/c/.cm/GUID/cnv-ext.sml new file mode 100644 index 0000000..6bc1f6c --- /dev/null +++ b/ckit/src/ast/extensions/c/.cm/GUID/cnv-ext.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):extensions/c/cnv-ext.sml-1714016107.846 diff --git a/ckit/src/ast/extensions/c/.cm/GUID/pp-ast-ext-fn.sml b/ckit/src/ast/extensions/c/.cm/GUID/pp-ast-ext-fn.sml new file mode 100644 index 0000000..4c088d9 --- /dev/null +++ b/ckit/src/ast/extensions/c/.cm/GUID/pp-ast-ext-fn.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):extensions/c/pp-ast-ext-fn.sml-1714016103.927 diff --git a/ckit/src/ast/extensions/c/.cm/SKEL/ast-ext-sig.sml b/ckit/src/ast/extensions/c/.cm/SKEL/ast-ext-sig.sml new file mode 100644 index 0000000..5097770 --- /dev/null +++ b/ckit/src/ast/extensions/c/.cm/SKEL/ast-ext-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ac"ASTEXT"h0 \ No newline at end of file diff --git a/ckit/src/ast/extensions/c/.cm/SKEL/ast-ext.sml b/ckit/src/ast/extensions/c/.cm/SKEL/ast-ext.sml new file mode 100644 index 0000000..be2b66e --- /dev/null +++ b/ckit/src/ast/extensions/c/.cm/SKEL/ast-ext.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ad"AstExt"h0 \ No newline at end of file diff --git a/ckit/src/ast/extensions/c/.cm/SKEL/cnv-ext.sml b/ckit/src/ast/extensions/c/.cm/SKEL/cnv-ext.sml new file mode 100644 index 0000000..e50097b --- /dev/null +++ b/ckit/src/ast/extensions/c/.cm/SKEL/cnv-ext.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f4d"ParseTree"d"State"d"ParseTreeExt"d"Ast"ad"CnvExt"jh0gp1c"CNVEXT" \ No newline at end of file diff --git a/ckit/src/ast/extensions/c/.cm/SKEL/pp-ast-ext-fn.sml b/ckit/src/ast/extensions/c/.cm/SKEL/pp-ast-ext-fn.sml new file mode 100644 index 0000000..0235b50 --- /dev/null +++ b/ckit/src/ast/extensions/c/.cm/SKEL/pp-ast-ext-fn.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ae"PPAstExtFn"jh0gp1c"PPASTEXT" \ No newline at end of file diff --git a/ckit/src/ast/extensions/c/.cm/amd64-unix/ast-ext.sml b/ckit/src/ast/extensions/c/.cm/amd64-unix/ast-ext.sml new file mode 100644 index 0000000..eb4a265 Binary files /dev/null and b/ckit/src/ast/extensions/c/.cm/amd64-unix/ast-ext.sml differ diff --git a/ckit/src/ast/extensions/c/.cm/amd64-unix/cnv-ext.sml b/ckit/src/ast/extensions/c/.cm/amd64-unix/cnv-ext.sml new file mode 100644 index 0000000..c17d9a0 Binary files /dev/null and b/ckit/src/ast/extensions/c/.cm/amd64-unix/cnv-ext.sml differ diff --git a/ckit/src/ast/extensions/c/.cm/amd64-unix/pp-ast-ext-fn.sml b/ckit/src/ast/extensions/c/.cm/amd64-unix/pp-ast-ext-fn.sml new file mode 100644 index 0000000..893f885 Binary files /dev/null and b/ckit/src/ast/extensions/c/.cm/amd64-unix/pp-ast-ext-fn.sml differ diff --git a/ckit/src/ast/extensions/c/ast-ext-sig.sml b/ckit/src/ast/extensions/c/ast-ext-sig.sml new file mode 100644 index 0000000..d4980c8 --- /dev/null +++ b/ckit/src/ast/extensions/c/ast-ext-sig.sml @@ -0,0 +1,11 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +signature ASTEXT = sig + + type binopExt = unit + type unopExt = unit + type ('expression, 'statement, 'binop, 'unop) expressionExt = unit + type ('expression, 'statement, 'binop, 'unop) statementExt = unit + type ('expression, 'statement, 'binop, 'unop) externalDeclExt = unit + +end diff --git a/ckit/src/ast/extensions/c/ast-ext.sml b/ckit/src/ast/extensions/c/ast-ext.sml new file mode 100644 index 0000000..3290166 --- /dev/null +++ b/ckit/src/ast/extensions/c/ast-ext.sml @@ -0,0 +1,11 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +structure AstExt = struct + + type binopExt = unit + type unopExt = unit + type ('expression, 'statement, 'binop, 'unop) expressionExt = unit + type ('expression, 'statement, 'binop, 'unop) statementExt = unit + type ('expression, 'statement, 'binop, 'unop) externalDeclExt = unit + +end diff --git a/ckit/src/ast/extensions/c/cnv-ext.sml b/ckit/src/ast/extensions/c/cnv-ext.sml new file mode 100644 index 0000000..bcb15e5 --- /dev/null +++ b/ckit/src/ast/extensions/c/cnv-ext.sml @@ -0,0 +1,133 @@ +structure CnvExt : CNVEXT = struct + + type coreConversionFuns = + { + stateFuns : State.stateFuns, + mungeTyDecr: (Ast.ctype*ParseTree.declarator ->Ast.ctype * string option), + + cnvType : bool*ParseTree.decltype -> Ast.ctype*Ast.storageClass, + cnvExpression: ParseTree.expression -> Ast.ctype * Ast.expression, + cnvStatement : ParseTree.statement -> Ast.statement, + cnvExternalDecl: ParseTree.externalDecl -> Ast.externalDecl list, + + wrapEXPR: (Ast.ctype*Ast.coreExpression -> Ast.ctype*Ast.expression), + wrapSTMT: Ast.coreStatement -> Ast.statement, + wrapDECL: Ast.coreExternalDecl -> Ast.externalDecl + } + + type expressionExt = (ParseTree.specifier, ParseTree.declarator, ParseTree.ctype, ParseTree.decltype, + ParseTree.operator, ParseTree.expression, ParseTree.statement) + ParseTreeExt.expressionExt + + type statementExt = (ParseTree.specifier, ParseTree.declarator, ParseTree.ctype, ParseTree.decltype, + ParseTree.operator, ParseTree.expression, ParseTree.statement) + ParseTreeExt.statementExt + + type externalDeclExt = (ParseTree.specifier, ParseTree.declarator, ParseTree.ctype, ParseTree.decltype, + ParseTree.operator, ParseTree.expression, ParseTree.statement) + ParseTreeExt.externalDeclExt + + type specifierExt = (ParseTree.specifier, ParseTree.declarator, ParseTree.ctype, ParseTree.decltype, + ParseTree.operator, ParseTree.expression, ParseTree.statement) + ParseTreeExt.specifierExt + + type declaratorExt = (ParseTree.specifier, ParseTree.declarator, ParseTree.ctype, ParseTree.decltype, + ParseTree.operator, ParseTree.expression, ParseTree.statement) + ParseTreeExt.declaratorExt + + type declarationExt = (ParseTree.specifier, ParseTree.declarator, ParseTree.ctype, ParseTree.decltype, + ParseTree.operator, ParseTree.expression, ParseTree.statement) + ParseTreeExt.declarationExt + + type extensionFuns = + {CNVExp: expressionExt -> Ast.ctype * Ast.expression, + CNVStat: statementExt -> Ast.statement, + CNVBinop: {binop: ParseTreeExt.operatorExt, arg1Expr: ParseTree.expression, arg2Expr: ParseTree.expression} + -> Ast.ctype * Ast.expression, + CNVUnop: {unop: ParseTreeExt.operatorExt, argExpr: ParseTree.expression} + -> Ast.ctype * Ast.expression, + CNVExternalDecl: externalDeclExt -> Ast.externalDecl list, + CNVSpecifier: {isShadow: bool, rest : ParseTree.specifier list} + -> specifierExt + -> Ast.ctype, + CNVDeclarator: Ast.ctype * declaratorExt + -> Ast.ctype * string option, + CNVDeclaration: declarationExt -> Ast.declaration list} + + exception CnvExt of string + + fun CNVExp _ = raise (CnvExt "No proper extensions to expressions") + + fun CNVStat _ = raise (CnvExt "No proper extensions to statements") + + fun CNVBinop _ = raise (CnvExt "No proper extensions to binops") + + fun CNVUnop _ = raise (CnvExt "No proper extensions to unnops") + + fun CNVExternalDecl _ = raise (CnvExt "No proper extensions to external decls") + + fun CNVSpecifier _ _ = raise (CnvExt "No proper extensions to specifiers") + + fun CNVDeclarator _ = raise (CnvExt "No proper extensions to declarators") + + fun CNVDeclaration _ = raise (CnvExt "No proper extensions to declarations") + + fun makeExtensionFuns _ = {CNVExp = CNVExp, + CNVStat = CNVStat, + CNVBinop = CNVBinop, + CNVUnop = CNVUnop, + CNVExternalDecl = CNVExternalDecl, + CNVSpecifier = CNVSpecifier, + CNVDeclarator = CNVDeclarator, + CNVDeclaration = CNVDeclaration} + + (* prototypical use of makeExtensionsFuns for non-trival extensions: + + fun makeExtensions {stateFuns, mungeTyDecr, cnvType, cnvExpression, cnvStatement, cnvExternalDecl, + wrapEXPR, wrapSTMT, wrapDECL} = + let + fun raiseError ... (* local helper function *) + val .... (* local helper function *) + ... etc ... (* more local helper functions *) + + fun CNVExp args = .. + fun CNVStat args = .. + ... etc ... + in {CNVExp = CNVExp, CNVStat = CNVStat, ....} + end + *) + + +end + +(**************** + +structure CnvExt : CNVEXT = struct + + type cnv = {stateFuns: State.stateFuns, + cnvType: bool * ParseTree.decltype -> Ast.ctype * Ast.storageClass, + mungeTyDecr: Ast.ctype * ParseTree.declarator -> Ast.ctype * string option, + cnvExpression: ParseTree.expression -> Ast.ctype * Ast.expression, + cnvStatement: ParseTree.statement -> Ast.statement} + + exception CnvExt of string + + val intTy = Ast.Numeric (Ast.NONSATURATE,Ast.WHOLENUM,Ast.SIGNED,Ast.INT) + + fun CNVExp _ _ = raise (CnvExt "No proper extensions to expressions") + + fun CNVStat _ _ = raise (CnvExt "No proper extensions to statements") + + fun CNVBinop _ _ = raise (CnvExt "No proper extensions to binops") + + fun CNVUnop _ _ = raise (CnvExt "No proper extensions to unnops") + + fun CNVExternalDecl _ _ = raise (CnvExt "No proper extensions to external decls") + + fun CNVSpecifier _ _ = raise (CnvExt "No proper extensions to specifiers") + + fun CNVDeclarator _ _ = raise (CnvExt "No proper extensions to declarators") + + fun CNVDeclaration _ _ = raise (CnvExt "No proper extensions to declarations") +end + ***************) \ No newline at end of file diff --git a/ckit/src/ast/extensions/c/eq-ast-ext.sml b/ckit/src/ast/extensions/c/eq-ast-ext.sml new file mode 100644 index 0000000..21cd1c0 --- /dev/null +++ b/ckit/src/ast/extensions/c/eq-ast-ext.sml @@ -0,0 +1,6 @@ +structure EqAstExt = struct + type expExt = (Ast.ctype,Ast.expression,Ast.statement,Ast.binop) AstExt.expressionExt + type stmtExt = (Ast.ctype,Ast.expression,Ast.statement,Ast.binop) AstExt.statementExt + fun eqExpressionExt pair maps (ee1,ee2) = true + fun eqStatementExt pair maps (se1,se2) = maps +end diff --git a/ckit/src/ast/extensions/c/pp-ast-ext-fn.sml b/ckit/src/ast/extensions/c/pp-ast-ext-fn.sml new file mode 100644 index 0000000..bdaa7eb --- /dev/null +++ b/ckit/src/ast/extensions/c/pp-ast-ext-fn.sml @@ -0,0 +1,11 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +functor PPAstExtFn (type aidinfo):PPASTEXT = struct + type aidinfo = aidinfo + fun ppUnopExt aidinfo pair pps unopExt = () + fun ppBinopExt aidinfo pair pps binopExt = () + fun ppExpressionExt quad aidinfo pair pps expExt = () + fun ppStatementExt quad aidinfo pair pps stmtExt = () + fun ppExternalDeclExt quad aidinfo pair pps extDecExt = () +end + diff --git a/ckit/src/ast/group.cm b/ckit/src/ast/group.cm new file mode 100644 index 0000000..b33efa8 --- /dev/null +++ b/ckit/src/ast/group.cm @@ -0,0 +1,112 @@ +Group +is + +#if defined(NEW_CM) +#if (SMLNJ_VERSION * 100 + SMLNJ_MINOR_VERSION >= 11029) + + (* standard basis *) + $/basis.cm + + (* standard library *) + $/smlnj-lib.cm +#else + (* standard basis *) + basis.cm + + (* standard library *) + smlnj-lib.cm +#endif +#else + smlnj-lib.cm +#endif + + (* parser *) + ../parser/group.cm + + (* configuration *) + ../variants/group.cm + + (* C symbols *) + symbol-sig.sml + symbol.sml + + (* unique identifiers *) + uid-sig.sml + uid-fn.sml + + (* abstract syntax *) + aid.sml (* was adornment.sml *) + pid.sml + tid.sml + ast-sig.sml + ast.sml + ctype-eq.sml + + (* language extension support *) + cnv-ext-sig.sml + + (* tables for unique identifiers *) + uidtabimp-sig.sml + uidtabimp-fn.sml + aidtab.sml + pidtab.sml + tidtab.sml + tables.sml (* type abbreviations for pidtab, tidtab, aidtab *) + + (* pretty-printers *) + pp/pp-lib.sml + pp/pp-ast-sig.sml + pp/pp-ast.sml + pp/pp-ast-adornment-sig.sml + pp/pp-ast-ext-sig.sml + pp/pp-ast-fn.sml + +(* Not currently needed: current uses of ast-equality (in simplify-ast) + just use polymorphic equality. But this code may be useful in the future + if the modifications to the ast types violate rules for eqtypes. + (* equality modulo alpha renaming *) + eq-binary-maps.sml + eq-ast-ext-sig.sml + eq-ctype.sml + eq-ast.sml +*) + (* translation from parse tree *) + sizes-sig.sml + sizes.sml + sizeof-sig.sml + sizeof.sml + + type-util-sig.sml + type-util.sml + + bindings.sml + + state-sig.sml + state.sml + + simplify-assign-ops.sml + build-ast-sig.sml + build-ast.sml + + anonymous-structs.sml + + initializer-normalizer-sig.sml + initializer-normalizer.sml + + (* top level *) + parse-to-ast-sig.sml + parse-to-ast.sml + + (* extensions *) +#if (defined(d)) + extensions/d/ast-ext-sig.sml + extensions/d/ast-ext.sml + extensions/d/cnv-ext.sml + extensions/d/eq-ast-ext.sml + extensions/d/pp-ast-ext-fn.sml +#else + extensions/c/ast-ext-sig.sml + extensions/c/ast-ext.sml + extensions/c/cnv-ext.sml + extensions/c/pp-ast-ext-fn.sml +#endif diff --git a/ckit/src/ast/initializer-normalizer-sig.sml b/ckit/src/ast/initializer-normalizer-sig.sml new file mode 100644 index 0000000..c8c0589 --- /dev/null +++ b/ckit/src/ast/initializer-normalizer-sig.sml @@ -0,0 +1,12 @@ +(* initializer-normalizer-sig.sml *) + +signature INITIALIZER_NORMALIZER = +sig + + val normalize : {lookTid : Tid.uid -> Bindings.tidBinding option, + bindAid : Ast.ctype -> Aid.uid, + initType : Ast.ctype, + initExpr : Ast.initExpression} + -> Ast.initExpression + +end diff --git a/ckit/src/ast/initializer-normalizer.sml b/ckit/src/ast/initializer-normalizer.sml new file mode 100644 index 0000000..5ea8dbc --- /dev/null +++ b/ckit/src/ast/initializer-normalizer.sml @@ -0,0 +1,178 @@ +(* Copyright (c) 1999 by Lucent Technologies *) + +(* initializer-normalizer-fn.sml + * + * AUTHORS: Dino Oliva (oliva@research.bell-labs.com) + * + *) + +structure InitializerNormalizer : INITIALIZER_NORMALIZER = +struct + + structure Ast = Ast + structure B = Bindings + open Ast + + exception NormalizeExn + + fun fail msg = (print msg; raise NormalizeExn) + (* does this signal an internal "compiler bug"? + * only acts as a warning, since normalize acts as an identity + * on the expression if this is called. *) + + fun warn msg = (print msg; ()) + + val intCt = Ast.Numeric (Ast.NONSATURATE,Ast.WHOLENUM,Ast.SIGNED,Ast.INT,Ast.SIGNASSUMED) + + val charCt = Ast.Numeric (Ast.NONSATURATE,Ast.WHOLENUM,Ast.UNSIGNED,Ast.CHAR,Ast.SIGNASSUMED) + +(* DBM: the bindAid function introduces new aid mappings in the atab state + * component *) + +(* this takes the type of a declaration and the initializer and + * massages the initializer so that it exactly matches the type of + * declaration. It is called in BuildAst. *) +fun normalize {lookTid: Tid.uid -> Bindings.tidBinding option, + bindAid: Ast.ctype -> Aid.uid, + initType: Ast.ctype, + initExpr: Ast.initExpression} + : Ast.initExpression = +let + fun coreExp2exp (ctype, coreExp) = + let val aid = bindAid ctype + in EXPR (coreExp,aid,SourceMap.UNKNOWN) + end + + fun mkChrInit c = + Simple(coreExp2exp (charCt, (IntConst (LargeInt.fromInt (ord c))))) + + fun mkIntInit i = + Simple(coreExp2exp (intCt, (IntConst (i:LargeInt.int)))) + + fun mkChrs (NONE, []) = [] + | mkChrs (SOME c, []) = [mkChrInit c] + | mkChrs (cOpt, c::cs) = mkChrInit c :: mkChrs (cOpt, cs) + + (* padding out with zero (via scalarNorm) when too few initializers. + * as per [ISO-C, p.72-73] *) + fun arrNorm (arrType, Ast.Qual (_,ctype), maxOp) origInits = (* strip qual *) + arrNorm (arrType, ctype, maxOp) origInits + | arrNorm (arrType, Ast.TypeRef tid, maxOp) origInits = (* dereference type ref *) + (case lookTid tid + of SOME{ntype = SOME(B.Typedef (tid,ctype)),...} => + arrNorm (arrType, ctype, maxOp) origInits + | _ => fail "Inconsistent table for type ref") + | arrNorm (arrType, Ast.Numeric(_,_,_,Ast.CHAR,_), maxOp) + (Simple(EXPR(StringConst s,aid,loc))::rest) = + (* special case for character arrays initialized w/strings *) + let val len = (String.size s) + 1 (* size of c string *) + val max = case maxOp of SOME l => LargeInt.toInt l | _ => len + val nullOpt = if len = max + 1 then NONE else SOME #"\000" + val charInits = mkChrs (nullOpt, explode s) + in norm(arrType, (Aggregate charInits)::rest) + end + | arrNorm (arrType, baseType, maxOp) origInits = + let val max = case maxOp of + SOME l => LargeInt.toInt l + | _ => length origInits + fun loop(i, inits) = + if (i=max) then ([], inits) + else let val (elemInit,remainder) = norm(baseType, inits) + val (elemInits,remainder') = loop (i+1,remainder) + in (elemInit::elemInits, remainder') + end + val (arrayInits,remainder) = loop(0,origInits) + in (Aggregate arrayInits, remainder) + end + + and structNorm (structType, fields) origInits = + let fun loop [] inits = ([],inits) + | loop ((fieldType,NONE,liOpt)::fields) inits = + (* according to the standard, unnamed fields don't + * have initializers. + *) + loop fields inits + | loop ((fieldType,pidOpt,liOpt)::fields) inits = + let val (fieldInit,remainder) = norm(fieldType, inits) + val (fieldInits,remainder') = loop fields remainder + in (fieldInit::fieldInits, remainder') + end + val (structInits,remainder) = loop fields origInits + in (Aggregate structInits, remainder) + end + + and unionNorm (unionType, fields) origInits = + case fields + of [] => (warn "Empty union type, initializing to {}"; + (Aggregate [], origInits)) + | (fieldCtype,member)::_ => + let val (fieldInit,remainder) = norm(fieldCtype, origInits) + in (Aggregate [fieldInit], remainder) + end + + (* fill in with zeros if you run out of initializers *) + and scalarNorm ctype origInits = + case origInits + of (scalarInit::remainder) => (scalarInit, remainder) + | [] => let val scalarInit = mkIntInit 0 + in (scalarInit, []) + end + + (* feed supplies its argument initfn with the inits from the first aggregate, + * if there is one. The initfn should consume all the inits from the aggregate. *) + and feed (initfn, (Aggregate elemInits)::inits) = + let val (newinit,remainder) = initfn elemInits + in case remainder + of [] => (newinit, inits) + | _ => + (warn "Too many initializers for expression, ignoring extras"; + (newinit, inits)) + end + | feed (initfn, inits) = initfn inits + + and norm (ctype, inits) = + case ctype + of Ast.Qual (_,ctype) => norm(ctype, inits) (* strip qual *) + | Ast.TypeRef tid => (* dereference type ref *) + (case lookTid tid + of SOME{ntype = SOME(B.Typedef (tid,ctype)),...} => + norm(ctype, inits) + | _ => fail "Inconsistent table for type ref") + | Ast.Array (opt,baseType) => + let + val lenOp = case opt of SOME(i, _) => SOME i | NONE => NONE + in + feed (arrNorm(ctype, baseType, lenOp), inits) + end + | Ast.StructRef tid => + (case lookTid tid + of SOME {ntype = SOME(B.Struct(tid,fields)),...} => + feed (structNorm (ctype, fields), inits) + | SOME _ => fail "Incomplete type for struct ref" + | NONE => fail "Inconsistent table for struct ref") + | Ast.UnionRef tid => + (case lookTid tid + of SOME {ntype = SOME(B.Union(tid,fields)),...} => + feed (unionNorm (ctype, fields), inits) + | SOME _ => fail "Incomplete type for union ref" + | NONE => fail "Inconsistent table for union ref") + | (Ast.Numeric _ | Ast.Pointer _ | Ast.Function _ | Ast.EnumRef _) => + feed (scalarNorm ctype, inits) + | Ast.Void => fail "Incomplete type: void" + | Ast.Ellipses => fail "Cannot initialize ellipses" + | Ast.Error => fail "Cannot initialize error type" + +in + + let val (newinit,remainder) = norm(initType, [initExpr]) + in case remainder + of [] => newinit (* used them all *) + | _ => + (warn "Too many initializers for expression, ignoring extras"; + newinit) + end + handle NormalizeExn => initExpr + +end (* END normalize *) + +end (* structure InitializerNormalizer *) diff --git a/ckit/src/ast/parse-to-ast-sig.sml b/ckit/src/ast/parse-to-ast-sig.sml new file mode 100644 index 0000000..0dd4a5e --- /dev/null +++ b/ckit/src/ast/parse-to-ast-sig.sml @@ -0,0 +1,43 @@ +(* ast/parse-to-ast-sig.sml *) + +(* This is the top-level interface to the C front-end. It is + * implemented by the structures Ansic, FiveESSC, and D *) + +signature PARSE_TO_AST = +sig + + (* astBundle: the collection of information returned as the result of elaboration *) + type astBundle = + {ast: Ast.ast, (* the abstract syntax representation of a trans. unit *) + tidtab: Bindings.tidBinding Tidtab.uidtab, (* table of type identifiers *) + errorCount: int, (* count of errors occuring during parsing and elaboration *) + warningCount: int,(* count of warnings occuring during parsing and elaboration *) + auxiliaryInfo: (* annotations and symbol table info *) + {aidtab: Tables.aidtab, (* type annotation table *) + implicits: Tables.aidtab, + (* types associated with implicit argument conversions. + * See, e.g. "usual unary" and "usual binary" conversions + * in Harbison & Steele *) + env: State.symtab}} (* symbol table generated during elaboration *) + + val progToState : astBundle -> State.stateInfo + (* extracts stateInfo from astBundle for cascading processing of multiple + * translation units *) + + val fileToAst' : + TextIO.outstream (* error stream *) + -> (Sizes.sizes * State.stateInfo) (* sizes info and initial state *) + -> string (* source file *) + -> astBundle + (* processs a source file given the state resulting from processing + * previous files *) + + val fileToAst : + string (* source file *) + -> astBundle + (* process a file in isolation *) + + val fileToC : string -> unit + (* process a file and pretty print the resulting ast *) + +end (* signature PARSE_TO_AST *) diff --git a/ckit/src/ast/parse-to-ast.sml b/ckit/src/ast/parse-to-ast.sml new file mode 100644 index 0000000..8269a35 --- /dev/null +++ b/ckit/src/ast/parse-to-ast.sml @@ -0,0 +1,43 @@ +(* parse-to-ast.sml *) + +structure ParseToAst : PARSE_TO_AST = +struct + + type astBundle = + {ast: Ast.ast, + tidtab: Bindings.tidBinding Tidtab.uidtab, + errorCount: int, + warningCount: int, + auxiliaryInfo: {aidtab: Tables.aidtab, + implicits: Tables.aidtab, + env: State.symtab}} + + fun progToState ({tidtab, auxiliaryInfo={aidtab, implicits, env}, ...} : astBundle) = + State.STATE({ttab=tidtab,atab=aidtab,implicits=implicits},env) + + fun fileToAst' errStrm (sizes: Sizes.sizes, stateInfo: State.stateInfo) inFile + : astBundle = + let + (* suppress underscores to make error message more readable *) + val suppressPidUnderscores = !PPLib.suppressPidUnderscores + val suppressTidUnderscores = !PPLib.suppressTidUnderscores + val _ = (PPLib.suppressPidUnderscores := true; + PPLib.suppressTidUnderscores := true) + val errState = Error.mkErrState errStrm + val p = Parser.parseFile errState inFile + val result = BuildAst.makeAst (sizes,stateInfo,errState) p + in + PPLib.suppressPidUnderscores := suppressPidUnderscores; + PPLib.suppressTidUnderscores := suppressTidUnderscores; + result + end + + fun fileToAst inFile = + fileToAst' TextIO.stdErr (Sizes.defaultSizes, State.INITIAL) inFile + + fun fileToC x = + let val {ast, tidtab, ...} = fileToAst x + in PPLib.ppToStrm (PPAst.ppAst () tidtab) TextIO.stdOut ast + end + +end (* structure ParseToAst *) diff --git a/ckit/src/ast/pid.sml b/ckit/src/ast/pid.sml new file mode 100644 index 0000000..8616705 --- /dev/null +++ b/ckit/src/ast/pid.sml @@ -0,0 +1,5 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +structure Pid : UID = UidFn (val initial = 0; val prefix = "p"); + + diff --git a/ckit/src/ast/pidtab.sml b/ckit/src/ast/pidtab.sml new file mode 100644 index 0000000..d788b19 --- /dev/null +++ b/ckit/src/ast/pidtab.sml @@ -0,0 +1,5 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +structure Pidtab = UidtabImpFn (structure Uid=Pid) + + diff --git a/ckit/src/ast/pp/.cm/GUID/pp-ast-adornment-sig.sml b/ckit/src/ast/pp/.cm/GUID/pp-ast-adornment-sig.sml new file mode 100644 index 0000000..356cd74 --- /dev/null +++ b/ckit/src/ast/pp/.cm/GUID/pp-ast-adornment-sig.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):pp/pp-ast-adornment-sig.sml-1714016103.917 diff --git a/ckit/src/ast/pp/.cm/GUID/pp-ast-ext-sig.sml b/ckit/src/ast/pp/.cm/GUID/pp-ast-ext-sig.sml new file mode 100644 index 0000000..07d9a42 --- /dev/null +++ b/ckit/src/ast/pp/.cm/GUID/pp-ast-ext-sig.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):pp/pp-ast-ext-sig.sml-1714016103.922 diff --git a/ckit/src/ast/pp/.cm/GUID/pp-ast-fn.sml b/ckit/src/ast/pp/.cm/GUID/pp-ast-fn.sml new file mode 100644 index 0000000..a471afa --- /dev/null +++ b/ckit/src/ast/pp/.cm/GUID/pp-ast-fn.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):pp/pp-ast-fn.sml-1714016104.027 diff --git a/ckit/src/ast/pp/.cm/GUID/pp-ast-sig.sml b/ckit/src/ast/pp/.cm/GUID/pp-ast-sig.sml new file mode 100644 index 0000000..1ba18b6 --- /dev/null +++ b/ckit/src/ast/pp/.cm/GUID/pp-ast-sig.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):pp/pp-ast-sig.sml-1714016104.020 diff --git a/ckit/src/ast/pp/.cm/GUID/pp-ast.sml b/ckit/src/ast/pp/.cm/GUID/pp-ast.sml new file mode 100644 index 0000000..fdaee0b --- /dev/null +++ b/ckit/src/ast/pp/.cm/GUID/pp-ast.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):pp/pp-ast.sml-1714016104.395 diff --git a/ckit/src/ast/pp/.cm/GUID/pp-lib.sml b/ckit/src/ast/pp/.cm/GUID/pp-lib.sml new file mode 100644 index 0000000..d7c05df --- /dev/null +++ b/ckit/src/ast/pp/.cm/GUID/pp-lib.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):pp/pp-lib.sml-1714016103.941 diff --git a/ckit/src/ast/pp/.cm/SKEL/pp-ast-adornment-sig.sml b/ckit/src/ast/pp/.cm/SKEL/pp-ast-adornment-sig.sml new file mode 100644 index 0000000..b7270d3 --- /dev/null +++ b/ckit/src/ast/pp/.cm/SKEL/pp-ast-adornment-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f3d"OldPrettyPrint"d"Tables"d"Ast"ac"PPASTADORNMENT"h0 \ No newline at end of file diff --git a/ckit/src/ast/pp/.cm/SKEL/pp-ast-ext-sig.sml b/ckit/src/ast/pp/.cm/SKEL/pp-ast-ext-sig.sml new file mode 100644 index 0000000..3f32f92 --- /dev/null +++ b/ckit/src/ast/pp/.cm/SKEL/pp-ast-ext-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f4d"OldPrettyPrint"d"AstExt"d"Tables"d"Ast"ac"PPASTEXT"h0 \ No newline at end of file diff --git a/ckit/src/ast/pp/.cm/SKEL/pp-ast-fn.sml b/ckit/src/ast/pp/.cm/SKEL/pp-ast-fn.sml new file mode 100644 index 0000000..6ed8c7b --- /dev/null +++ b/ckit/src/ast/pp/.cm/SKEL/pp-ast-fn.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ae"PPAstFn"i2aPPAstAdornment"gp1c"PPASTADORNMENT"fjh5CaTid"gp1 OldPrettyPrint.ppstream -> 'a -> unit + + type ('aidinfo,'a,'b) adornment_pp = ('aidinfo -> 'a) -> 'aidinfo -> 'b +in +signature PPASTADORNMENT = sig + type aidinfo + val ppExpressionAdornment: (aidinfo,Ast.coreExpression pp,Ast.expression pp) adornment_pp + val ppStatementAdornment : (aidinfo,Ast.coreStatement pp,Ast.statement pp) adornment_pp + val ppExternalDeclAdornment: (aidinfo,Ast.coreExternalDecl pp,Ast.externalDecl pp) adornment_pp +end +end diff --git a/ckit/src/ast/pp/pp-ast-ext-sig.sml b/ckit/src/ast/pp/pp-ast-ext-sig.sml new file mode 100644 index 0000000..4169fc7 --- /dev/null +++ b/ckit/src/ast/pp/pp-ast-ext-sig.sml @@ -0,0 +1,27 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +local + type 'a pp = Tables.tidtab -> OldPrettyPrint.ppstream -> 'a -> unit + type ('a, 'aidinfo) ppExt = + (('aidinfo -> Ast.expression pp) * ('aidinfo -> Ast.statement pp) * + ('aidinfo -> Ast.binop pp) * ('aidinfo -> Ast.unop pp)) + -> 'aidinfo + -> Tables.tidtab -> OldPrettyPrint.ppstream -> 'a -> unit +in + +signature PPASTEXT = sig + type aidinfo + val ppUnopExt : aidinfo -> AstExt.unopExt pp + val ppBinopExt : aidinfo -> AstExt.binopExt pp + val ppExpressionExt : + ((Ast.expression, Ast.statement, Ast.binop, Ast.unop) AstExt.expressionExt, + aidinfo) ppExt + val ppStatementExt : + ((Ast.expression, Ast.statement, Ast.binop, Ast.unop) AstExt.statementExt, + aidinfo) ppExt + val ppExternalDeclExt : + ((Ast.expression, Ast.statement, Ast.binop, Ast.unop) AstExt.externalDeclExt, + aidinfo) ppExt +end + +end diff --git a/ckit/src/ast/pp/pp-ast-fn.sml b/ckit/src/ast/pp/pp-ast-fn.sml new file mode 100644 index 0000000..6e364e2 --- /dev/null +++ b/ckit/src/ast/pp/pp-ast-fn.sml @@ -0,0 +1,686 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +functor PPAstFn (structure PPAstAdornment : PPASTADORNMENT) : PP_AST = struct + + structure Tid = Tid + structure Pid = Pid + structure PP = OldPrettyPrint + structure B = Bindings + structure PPAA = PPAstAdornment + structure PPAE = PPAstExtFn (type aidinfo = PPAstAdornment.aidinfo); + structure PPL = PPLib + + open PPLib + open Ast + + type aidinfo = PPAE.aidinfo + + val printLocation = false (* internal flag - pretty print locations as comments *) + + fun ppLoc pps (SourceMap.LOC {srcFile, beginLine, beginCol, endLine, endCol}) = + if printLocation then ( PPL.addStr pps " /*[" + ; PPL.addStr pps (srcFile) + ; PPL.addStr pps ":" + ; PPL.addStr pps (Int.toString beginLine) + ; PPL.addStr pps "]*/ " + ) + else () + | ppLoc pps _ = () + + val warning = PPL.warning + + val ppLParen = PPL.ppGuarded "(" + + val ppRParen = PPL.ppGuarded ")" + + fun getCtype ({stClass,ctype,...}: Ast.id) = (stClass,ctype) + + fun isPostFix PostInc = true + | isPostFix PostDec = true + | isPostFix _ = false + + fun ppBinop aidinfo tidtab pps binop = + case binop + of Plus => PPL.addStr pps "+" + | Minus => PPL.addStr pps "-" + | Times => PPL.addStr pps "*" + | Divide => PPL.addStr pps "/" + | Mod => PPL.addStr pps "%" + | Gt => PPL.addStr pps ">" + | Lt => PPL.addStr pps "<" + | Gte => PPL.addStr pps ">=" + | Lte => PPL.addStr pps "<=" + | Eq => PPL.addStr pps "==" + | Neq => PPL.addStr pps "!=" + | And => PPL.addStr pps "&&" + | Or => PPL.addStr pps "||" + | BitOr => PPL.addStr pps "|" + | BitAnd => PPL.addStr pps "&" + | BitXor => PPL.addStr pps "^" + | Lshift => PPL.addStr pps "<<" + | Rshift => PPL.addStr pps ">>" + | PlusAssign => PPL.addStr pps "+=" + | MinusAssign => PPL.addStr pps "-=" + | TimesAssign => PPL.addStr pps "*=" + | DivAssign => PPL.addStr pps "/=" + | ModAssign => PPL.addStr pps "%=" + | XorAssign => PPL.addStr pps "^=" + | OrAssign => PPL.addStr pps "|=" + | AndAssign => PPL.addStr pps "&=" + | LshiftAssign => PPL.addStr pps "<<=" + | RshiftAssign => PPL.addStr pps ">>=" + | BinopExt be => PPAE.ppBinopExt aidinfo tidtab pps be + + fun ppUnop aidinfo tidtab pps unop = + case unop + of Uplus => PPL.addStr pps "+" + | Not => PPL.addStr pps "!" + | Negate => PPL.addStr pps "-" + | BitNot => PPL.addStr pps "~" + | PreInc => PPL.addStr pps "++" + | PostInc => PPL.addStr pps "++" + | PreDec => PPL.addStr pps "--" + | PostDec => PPL.addStr pps "--" + | UnopExt ue => PPAE.ppUnopExt aidinfo tidtab pps ue + + datatype Identifier + = ID of Ast.id + | MEMBER of Ast.member + | TID of Tid.uid + + datatype params = + EMPTY + | ANSI of Ast.id list + | KNR of Ast.id list + + datatype ctStkItem + = Arr of (LargeInt.int * Ast.expression) option + | Qua of Ast.qualifier + | Fun of Ast.ctype list * params + | Ptr + + + val printConst = ref false + + fun ppIdentifier tidtab pps = + fn (ID id) => ppId pps id + | (MEMBER member) => ppMember pps member + | (TID tid) => ppTid tidtab pps tid + + fun ppQualifier pps qf = + let val s = case qf + of CONST => if !printConst then "const " else "" + | VOLATILE => "volatile " + in addStr pps s end + + fun ppStorageClass pps sc = + let val s = case sc + of STATIC => "static " + | EXTERN => "extern " + | REGISTER => "register " + | AUTO => "" + | DEFAULT => "" + in addStr pps s end + + + fun ppSignedness pps sign = + let val s = case sign + of SIGNED => "" + | UNSIGNED => "unsigned " + in addStr pps s end + + fun ppFractionality pps frac = + let val s = case frac + of FRACTIONAL => "fractional " + | WHOLENUM => "" + in addStr pps s end + + fun ppSaturatedness pps sat = + let val s = case sat + of SATURATE => "saturate " + | NONSATURATE => "" + in addStr pps s end + + fun ppIntKind pps ik = + let val s = case ik + of CHAR => "char" + | SHORT => "short" + | INT => "int" + | LONG => "long" + | LONGLONG => "long long" + | FLOAT => "float" + | DOUBLE => "double" + | LONGDOUBLE => "long double" + in addStr pps s end + + fun ppStk aidinfo tidtab pps (idOpt,stk) = + let fun loop (prev,[]) = ppOpt (ppIdentifier tidtab) pps idOpt + | loop (prev,(Qua qf)::l) = + (ppQualifier pps qf + ;loop (prev,l) + ) + | loop (prev,(a as Arr opt)::l) = + (loop (a,l) + ;addStr pps "[" + ;(case opt of + SOME(i, expr) => ppExpr {nested=false} aidinfo tidtab pps expr + | NONE => ()) + ;addStr pps "]" + ) + | loop (prev, ((f as Fun (cts,idsOpt))::l)) = + (loop (f,l) + ;space pps + ;case idsOpt + of EMPTY => + ppList {pp=ppCtype aidinfo tidtab + ,sep="," + ,lDelim="(" + ,rDelim=")" + } pps cts + | ANSI ids => + ppList {pp=ppIdDecl aidinfo tidtab + ,sep="," + ,lDelim="(" + ,rDelim=")" + } pps ids + | KNR ids => + ppList {pp=ppId + ,sep="," + ,lDelim="(" + ,rDelim=")" + } pps ids + ) + | loop (Ptr,p::l) = + (addStr pps "*" + ;loop (Ptr,l) + ) + | loop (_,Ptr::l) = + (addStr pps "(" + ;addStr pps "*" + ;loop (Ptr,l) + ;addStr pps ")" + ) + in loop (Ptr,stk) end + + and ppSpStk aidinfo tidtab pps (pair as (NONE,[])) = ppStk aidinfo tidtab pps pair + | ppSpStk aidinfo tidtab pps (pair as (_,stk)) = (space pps; ppStk aidinfo tidtab pps pair) + + and ppDecl0 aidinfo tidtab pps (idOpt,idsOpt,ctype) = + let fun loop (idsOpt,ctype,stk) = + case ctype + of Void => + (addStr pps "void" + ;ppSpStk aidinfo tidtab pps (idOpt,stk) + ) + | Ellipses => + (case stk + of [] => addStr pps "..." + | _ => (warning + "ppDecl" + "ill-formed ellipses type" + ;addStr pps "..." + )) + | Qual (qf,ct) => + loop (idsOpt,ct,(Qua qf)::stk) + | Numeric (NONSATURATE,WHOLENUM, _, CHAR, SIGNASSUMED) => + (addStr pps "char" + ;ppSpStk aidinfo tidtab pps (idOpt,stk) + ) + | Numeric (NONSATURATE,WHOLENUM, SIGNED, CHAR, SIGNDECLARED) => + (addStr pps "signed char" + ;ppSpStk aidinfo tidtab pps (idOpt,stk) + ) + | Numeric (NONSATURATE,WHOLENUM, UNSIGNED, CHAR, SIGNDECLARED) => + (addStr pps "unsigned char" + ;ppSpStk aidinfo tidtab pps (idOpt,stk) + ) + | Numeric (sat, frac, sign, ik, _) => + (ppSaturatedness pps sat + ;ppFractionality pps frac + ;ppSignedness pps sign + ;ppIntKind pps ik + ;ppSpStk aidinfo tidtab pps (idOpt,stk) + ) + | Array (opt,ct) => loop (idsOpt,ct,Arr opt::stk) + | Pointer ct => loop (idsOpt,ct,Ptr::stk) + | Function (ct,cts) => let + val optids = map #2 cts(* optional ids *) + val cts = map #1 cts(* types *) + fun useaux () = loop (EMPTY,ct,Fun (cts,idsOpt)::stk) + in + case idsOpt of + EMPTY => let + in + if List.exists (not o isSome) optids then useaux () + else loop (EMPTY, ct, + Fun(cts,ANSI(map valOf optids))::stk) + end + | _ => useaux () + end + | EnumRef tid => + (case Tidtab.find (tidtab,tid) + of SOME {ntype=SOME (B.Enum _),...} => + (addStr pps "enum " + ;ppTid tidtab pps tid + ) + | _ => (* print out partially defined enums *) + (addStr pps "enum " + ;ppTid tidtab pps tid + ) + (* addStr pps ("EnumRef(" ^ (Tid.toString tid) ^ ")") *) + ;ppSpStk aidinfo tidtab pps (idOpt,stk) + ) + | StructRef tid => + (addStr pps "struct " + ;ppTid tidtab pps tid + ;ppSpStk aidinfo tidtab pps (idOpt,stk)) + | UnionRef tid => + (addStr pps "union " + ;ppTid tidtab pps tid + ;ppSpStk aidinfo tidtab pps (idOpt,stk)) + | TypeRef tid => + (case Tidtab.find (tidtab,tid) + of SOME {ntype=SOME (B.Typedef _),...} => ppTid tidtab pps tid + | _ => addStr pps ("TypeRef(" ^ (Tid.toString tid) ^ ")") + ;ppSpStk aidinfo tidtab pps (idOpt,stk) + ) + | Error => (addStr pps "/* ErrorType */ " + ;ppSpStk aidinfo tidtab pps (idOpt,stk)) + in loop (idsOpt,ctype,[]) end + + and ppCtype aidinfo tidtab pps ctype = ppDecl0 aidinfo tidtab pps (NONE,EMPTY,ctype) + + (* Note: id is only used for printing purposes. + All information needed to interpret a type is obtained via tid *) + and ppNamedCtype aidinfo tidtab pps nct = + let fun ppOptList ppElt sep [] = () + | ppOptList ppElt sep l = + (addStr pps "{" + ;blockify 2 (separate (ppElt,fn pps => (addStr pps sep; newline pps))) pps l + ;newline pps + ;addStr pps "}" + ) + in case nct + of B.Struct (tid,members) => + let fun ppLI' pps li = (addStr pps ":"; ppLI pps li) + + fun ppMember pps (ct, memberOpt, LIOpt) = + (ppDecl0 aidinfo tidtab pps (Option.map MEMBER memberOpt,EMPTY,ct) + ;ppOpt ppLI' pps LIOpt + ;addStr pps ";" + ) + + in (addStr pps "struct " + ;ppTid tidtab pps tid + ;space pps + ;ppOptList ppMember "" members) + end + | B.Union (tid,members) => + let + fun ppMember pps (ct, member) = + (ppDecl0 aidinfo tidtab pps (SOME(MEMBER member),EMPTY,ct) + ;addStr pps ";" + ) + in addStr pps "union " + ;ppTid tidtab pps tid + ;space pps + ;ppOptList ppMember "" members + end + | B.Enum (tid,members) => + let fun ppMemberInt pps (member,li) = + (ppMember pps member + ;addStr pps "=" + ;ppLI pps li + ) + in (addStr pps "enum " + ;ppTid tidtab pps tid + ;space pps + ;ppOptList ppMemberInt "," members + ) + end + | B.Typedef (tid,ctype) => + (addStr pps "typedef " + ;ppDecl0 aidinfo tidtab pps (SOME (TID tid),EMPTY,ctype) + ) + end + + + and ppDecl aidinfo tidtab pps (id,ct) = ppDecl0 aidinfo tidtab pps (SOME (ID id),EMPTY,ct) + + and ppDeclaration aidinfo tidtab pps (TypeDecl{shadow=NONE, tid}) = + (case Tidtab.find (tidtab,tid) + of SOME {ntype=SOME nct,location,...} => + (ppLoc pps location; ppNamedCtype aidinfo tidtab pps nct) + | _ => (warning + "ppCoreStmt" + ("No type associated with tid:"^(Tid.toString tid)); + PPL.addStr pps "..."); + PPL.addStr pps ";") + | ppDeclaration aidinfo tidtab pps (TypeDecl{shadow=SOME{strct=true}, tid}) = + (PPLib.addStr pps "struct " + ;PPLib.ppTid tidtab pps tid + ;PPL.addStr pps ";") + | ppDeclaration aidinfo tidtab pps (TypeDecl{shadow=SOME{strct=false}, tid}) = + (PPLib.addStr pps "union " + ;PPLib.ppTid tidtab pps tid + ;PPL.addStr pps ";") + | ppDeclaration aidinfo tidtab pps (VarDecl (id as {location,...}, initOpt)) = + (ppLoc pps location + ;ppIdDecl aidinfo tidtab pps id + ;case initOpt + of SOME initExpr => + (PPL.addStr pps "="; + ppInitExpression aidinfo tidtab pps initExpr) + | NONE => () + ;PPL.addStr pps ";") + + and ppIdDecl aidinfo tidtab pps (id: Ast.id) = + let val (stClass,ctype) = getCtype id + in (ppStorageClass pps stClass + ;ppDecl aidinfo tidtab pps (id,ctype) + ) + end + + and blockStmt aidinfo tidtab pps stmt = PPL.blockify 2 (ppStmt aidinfo tidtab) pps stmt + + and ppStmt aidinfo tidtab pps (stmt as (STMT (_,_,loc))) = + ( ppLoc pps loc + ; PPAA.ppStatementAdornment ppCoreStmt aidinfo tidtab pps stmt + ) + + and ppCoreStmt aidinfo tidtab pps coreStmt = + case coreStmt + of Expr expOpt => + ( PPL.ppOpt (ppExpr {nested=false} aidinfo tidtab) pps expOpt + ; PPL.addStr pps ";" + ) + | Compound (decls,stmts) => + ( PPL.addStr pps "{" + ; (case decls of + nil => () + | _ => PPL.blockify 2 (PPL.separate (ppDeclaration aidinfo tidtab, PPL.newline)) pps decls) + ; (case stmts of + nil => () + | _ => PPL.blockify 2 (PPL.separate (ppStmt aidinfo tidtab, PPL.newline)) pps stmts) + ; PPL.newline pps + ; PPL.addStr pps "}" + ) + | While (exp,stmt) => + ( PPL.addStr pps "while (" + ; ppExpr {nested=false} aidinfo tidtab pps exp + ; PPL.addStr pps ")" + ; blockStmt aidinfo tidtab pps stmt + ) + | Do (exp,stmt) => + ( PPL.addStr pps "do" + ; blockStmt aidinfo tidtab pps stmt + ; PPL.newline pps + ; PPL.addStr pps "while (" + ; ppExpr {nested=false} aidinfo tidtab pps exp + ; PPL.addStr pps ");" + ) + | For (expOpt0,expOpt1,expOpt2,stmt) => + ( PPL.addStr pps "for (" + ; PPL.ppOpt (ppExpr {nested=false} aidinfo tidtab) pps expOpt0 + ; PPL.addStr pps "; " + ; PPL.ppOpt (ppExpr {nested=false} aidinfo tidtab) pps expOpt1 + ; PPL.addStr pps "; " + ; PPL.ppOpt (ppExpr {nested=false} aidinfo tidtab) pps expOpt2 + ; PPL.addStr pps ")" + ; blockStmt aidinfo tidtab pps stmt + ) + | Labeled (label,stmt) => + ( PPL.bBlock pps PP.INCONSISTENT ~2 + ; PPL.newline pps + ; PPL.ppLabel pps label + ; PPL.addStr pps ": " + ; PPL.eBlock pps + ; PPL.newline pps + ; ppStmt aidinfo tidtab pps stmt + ) + | CaseLabel (li,stmt) => + ( PPL.bBlock pps PP.INCONSISTENT ~2 + ; PPL.newline pps + ; PPL.addStr pps "case " + ; PPL.ppLI pps li + ; PPL.addStr pps ": " + ; PPL.eBlock pps + ; PPL.newline pps + ; ppStmt aidinfo tidtab pps stmt + ) + | DefaultLabel stmt => + ( PPL.bBlock pps PP.INCONSISTENT ~2 + ; PPL.newline pps + ; PPL.addStr pps "default: " + ; PPL.eBlock pps + ; PPL.newline pps + ; ppStmt aidinfo tidtab pps stmt + ) + | Goto label => + ( PPL.addStr pps "goto " + ; PPL.ppLabel pps label + ; PPL.addStr pps ";" + ) + | Break => PPL.addStr pps "break;" + | Continue => PPL.addStr pps "continue;" + | Return expOpt => + ( PPL.addStr pps "return " + ; PPL.ppOpt (ppExpr {nested=false} aidinfo tidtab) pps expOpt + ; PPL.addStr pps ";" + ) + | IfThen (exp,stmt) => + ( PPL.addStr pps "if (" + ; ppExpr {nested=false} aidinfo tidtab pps exp + ; PPL.addStr pps ") " + ; blockStmt aidinfo tidtab pps stmt + ) + | IfThenElse (exp,stmt0,stmt1) => + ( PPL.addStr pps "if (" + ; ppExpr {nested=false} aidinfo tidtab pps exp + ; PPL.addStr pps ") " + ; blockStmt aidinfo tidtab pps stmt0 + ; PPL.newline pps + ; PPL.addStr pps "else" + ; blockStmt aidinfo tidtab pps stmt1 + ) + | Switch (exp,stmt) => + ( PPL.addStr pps "switch (" + ; ppExpr {nested=false} aidinfo tidtab pps exp + ; PPL.addStr pps ")" + ; blockStmt aidinfo tidtab pps stmt + ) + | ErrorStmt => + ( PPL.addStr pps "/* ErrorStmt */" + ) + | StatExt se => PPAE.ppStatementExt (ppExpr {nested=false},ppStmt,ppBinop,ppUnop) aidinfo tidtab pps se + + and ppExpr nested aidinfo tidtab pps expr = + PPAA.ppExpressionAdornment (ppCoreExpr nested) aidinfo tidtab pps expr + + and ppCoreExpr {nested} aidinfo tidtab pps coreExpr = + case coreExpr + of IntConst li => PPL.ppLI pps li + | RealConst r => PPL.ppReal pps r + | StringConst s => PPL.ppString pps s + | Call (exp,exps) => + ( ppExpr {nested=true} aidinfo tidtab pps exp + ; PPL.space pps + ; PPL.ppList { pp=ppExpr {nested=false} aidinfo tidtab + , sep="," + , lDelim="(" + , rDelim=")" + } pps exps + ) + | QuestionColon (e0,e1,e2) => + ( ppLParen nested pps + ; ppExpr {nested=true} aidinfo tidtab pps e0 + ; PPL.addStr pps " ? " + ; ppExpr {nested=false} aidinfo tidtab pps e1 + ; PPL.addStr pps " : " + ; ppExpr {nested=false} aidinfo tidtab pps e2 + ; ppRParen nested pps + ) + | Assign (e0,e1) => + ( ppLParen nested pps + ; ppExpr {nested=false} aidinfo tidtab pps e0 + ; PPL.addStr pps " = " + ; ppExpr {nested=true} aidinfo tidtab pps e1 + ; ppRParen nested pps + ) + | Comma (e0,e1) => + ( PPL.addStr pps "(" + ; ppExpr {nested=false} aidinfo tidtab pps e0 + ; PPL.addStr pps "," + ; ppExpr {nested=false} aidinfo tidtab pps e1 + ; PPL.addStr pps ")" + ) + | Sub (e0,e1) => + ( ppExpr {nested=nested} aidinfo tidtab pps e0 + ; PPL.addStr pps "[" + ; ppExpr {nested=false} aidinfo tidtab pps e1 + ; PPL.addStr pps "]" + ) + | Member (exp,member) => + ( ppLParen nested pps + ; ppExpr {nested=true} aidinfo tidtab pps exp + ; PPL.addStr pps "." + ; PPL.ppMember pps member + ; ppRParen nested pps + ) + | Arrow (exp,member) => + ( ppLParen nested pps + ; ppExpr {nested=true} aidinfo tidtab pps exp + ; PPL.addStr pps "->" + ; PPL.ppMember pps member + ; ppRParen nested pps + ) + | Deref exp => + ( ppLParen nested pps + ; PPL.addStr pps "*" + ; ppExpr {nested=true} aidinfo tidtab pps exp + ; ppRParen nested pps + ) + | AddrOf exp => + ( ppLParen nested pps + ; PPL.addStr pps "&" + ; ppExpr {nested=true} aidinfo tidtab pps exp + ; ppRParen nested pps + ) + | Binop (binop,exp0,exp1) => + ( ppLParen nested pps + ; ppExpr {nested=true} aidinfo tidtab pps exp0 + ; ppBinop aidinfo tidtab pps binop + ; ppExpr {nested=true} aidinfo tidtab pps exp1 + ; ppRParen nested pps + ) + | Unop (unop,exp) => + ( ppLParen nested pps + ; if (isPostFix unop) + then (ppExpr {nested=true} aidinfo tidtab pps exp; ppUnop aidinfo tidtab pps unop) + else (ppUnop aidinfo tidtab pps unop; ppExpr {nested=true} aidinfo tidtab pps exp) + ; ppRParen nested pps + ) + | Cast (ctype,exp) => + ( ppLParen nested pps + ; PPL.addStr pps "(" + ; ppCtype aidinfo tidtab pps ctype + ; PPL.addStr pps ") " + ; ppExpr {nested=true} aidinfo tidtab pps exp + ; ppRParen nested pps + ) + | Id id => PPL.ppId pps id + | EnumId (id,li) => PPL.ppMember pps id + | SizeOf ctype => + ( ppLParen nested pps + ; PPL.addStr pps "sizeof(" + ; ppCtype aidinfo tidtab pps ctype + ; PPL.addStr pps ")" + ; ppRParen nested pps + ) + + | ExprExt ee => PPAE.ppExpressionExt (ppExpr {nested=false},ppStmt,ppBinop,ppUnop) aidinfo tidtab pps ee + | ErrorExpr => ( warning "ppCoreExpression" "found an error expression" + ; PPL.addStr pps "/* error expression */ 0" + ) + + and ppInitExpression aidinfo tidtab pps initExpr = + case initExpr + of Simple expr => ppExpr {nested=false} aidinfo tidtab pps expr + | Aggregate initExprs => + PPL.ppList { pp=ppInitExpression aidinfo tidtab + , sep="," + , lDelim="{" + , rDelim="}" + } pps initExprs + + fun ppCoreExternalDecl aidinfo tidtab pps edecl = + case edecl + of ExternalDecl decl => + ppDeclaration aidinfo tidtab pps decl + | FunctionDef (id,ids,stmt) => + let val {location,...} = id + val (stClass,ctype) = getCtype id + val (ctype,kNr,params) = + case ctype + of Ast.Function (retTy,paramTys) => + if null paramTys andalso not (null ids) + then (ctype,true,KNR ids) + else (ctype,false,ANSI ids) + | _ => + (warning + "ppCoreExternalDecl" + ("No function type associated with id:" + ^(PPL.ppToString PPL.ppId id)) + ;(Ast.Function (Ast.Void,[]),false,ANSI []) + ) + fun kr pps [] = [] + | kr pps (id::ids) = + (ppIdDecl aidinfo tidtab pps id + ;PPL.addStr pps ";" + ;if null ids then () else newline pps + ;kr pps ids + ) + in ppLoc pps location + ;ppStorageClass pps stClass + ;ppDecl0 aidinfo tidtab pps (SOME (ID id),params,ctype) + ;PPL.newline pps + ;if kNr then (blockify 2 kr pps ids; newline pps) else () + ;ppStmt aidinfo tidtab pps stmt + end + | ExternalDeclExt ed => PPAE.ppExternalDeclExt (ppExpr {nested=false},ppStmt,ppBinop,ppUnop) aidinfo tidtab pps ed + + fun ppExternalDecl aidinfo tidtab pps edecl = + PPAA.ppExternalDeclAdornment ppCoreExternalDecl aidinfo tidtab pps edecl + + fun ppAst aidinfo tidtab pps edecls = + PPL.separate (ppExternalDecl aidinfo tidtab,PPL.newline) pps edecls + + (* The pretty-printer expects a block at top level, so all of the + * external interfaces are wrapped to give it one. + *) + fun wrap' pp aidinfo pps v = + ( PPL.bBlock pps PP.INCONSISTENT 0 + ; PPL.newline pps + ; pp aidinfo pps v + ; PPL.eBlock pps + ) + + fun wrap pp aidinfo tidtab pps v = + ( PPL.bBlock pps PP.INCONSISTENT 0 + ; PPL.newline pps + ; pp aidinfo tidtab pps v + ; PPL.newline pps + ; PPL.eBlock pps + ) + + val ppBinop = wrap ppBinop + val ppUnop = wrap ppUnop + val ppDeclaration = wrap ppDeclaration + val ppStatement = wrap ppStmt + val ppCoreStatement = wrap ppCoreStmt + val ppExpression = wrap (ppExpr {nested=false}) + val ppCoreExpression = wrap (ppCoreExpr {nested=false}) + val ppExternalDecl = wrap ppExternalDecl + val ppCoreExternalDecl = wrap ppCoreExternalDecl + val ppAst = wrap ppAst +end diff --git a/ckit/src/ast/pp/pp-ast-sig.sml b/ckit/src/ast/pp/pp-ast-sig.sml new file mode 100644 index 0000000..95d1901 --- /dev/null +++ b/ckit/src/ast/pp/pp-ast-sig.sml @@ -0,0 +1,35 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +signature PP_AST = +sig + + type aidinfo + type 'a pp = OldPrettyPrint.ppstream -> 'a -> unit + + val printConst : bool ref + val ppId : Ast.id pp + val ppTid : Tables.tidtab -> Tid.uid pp + val ppStorageClass : Ast.storageClass pp + val ppDecl : aidinfo -> Tables.tidtab -> (Ast.id * Ast.ctype) pp + val ppCtype : aidinfo -> Tables.tidtab -> Ast.ctype pp + val ppQualifier : Ast.qualifier pp + val ppSignedness : Ast.signedness pp + val ppFractionality : Ast.fractionality pp + val ppSaturatedness : Ast.saturatedness pp + val ppIntKind : Ast.intKind pp + val ppNamedCtype : aidinfo -> Tables.tidtab -> Bindings.namedCtype pp + + val ppBinop : aidinfo -> Tables.tidtab -> Ast.binop pp + val ppUnop : aidinfo -> Tables.tidtab -> Ast.unop pp + val ppDeclaration : aidinfo -> Tables.tidtab -> Ast.declaration pp + val ppStatement : aidinfo -> Tables.tidtab -> Ast.statement pp + val ppCoreStatement : aidinfo -> Tables.tidtab -> Ast.coreStatement pp + val ppExpression : aidinfo -> Tables.tidtab -> Ast.expression pp + val ppCoreExpression : aidinfo -> Tables.tidtab -> Ast.coreExpression pp + val ppInitExpression : aidinfo -> Tables.tidtab -> Ast.initExpression pp + val ppCoreExternalDecl : aidinfo -> Tables.tidtab -> Ast.coreExternalDecl pp + val ppExternalDecl : aidinfo -> Tables.tidtab -> Ast.externalDecl pp + val ppAst : aidinfo -> Tables.tidtab -> Ast.ast pp + +end + diff --git a/ckit/src/ast/pp/pp-ast.sml b/ckit/src/ast/pp/pp-ast.sml new file mode 100644 index 0000000..def7a05 --- /dev/null +++ b/ckit/src/ast/pp/pp-ast.sml @@ -0,0 +1,23 @@ +(* Copyright (c) 1998 by Lucent Technologies + * pretty-printer which simply ignores any aidinfo. + *) + +local + structure PPAstAdornment : PPASTADORNMENT = + struct + type aidinfo = unit + + fun ppExpressionAdornment ppCoreExpr aidinfo tidtab pps (Ast.EXPR (coreExpr,_,_)) = + ppCoreExpr aidinfo tidtab pps coreExpr + + fun ppStatementAdornment ppCoreStmt aidinfo tidtab pps (Ast.STMT (coreStmt,_,_)) = + ppCoreStmt aidinfo tidtab pps coreStmt + + fun ppExternalDeclAdornment ppCoreExternalDecl aidinfo tidtab pps + (Ast.DECL (coreExtDecl,_,_)) = + ppCoreExternalDecl aidinfo tidtab pps coreExtDecl + end + +in + structure PPAst = PPAstFn(structure PPAstAdornment=PPAstAdornment) +end diff --git a/ckit/src/ast/pp/pp-lib.sml b/ckit/src/ast/pp/pp-lib.sml new file mode 100644 index 0000000..f6386d3 --- /dev/null +++ b/ckit/src/ast/pp/pp-lib.sml @@ -0,0 +1,141 @@ +structure PPLib = struct + + structure PP = OldPrettyPrint + + type 'a pp = OldPrettyPrint.ppstream -> 'a -> unit + + exception ppExn of string + + val suppressPidUnderscores = ref true + val suppressPidGlobalUnderscores = ref true + (* usually want to do this to preserve linkability *) + val suppressTidUnderscores = ref true + (* These flags are set to true temporarily during parsing to make error messages + * more readable, and are then resored to their original values. See + * parse-to-ast.sml. + *) + + fun warning f msg = (print f; print ":"; print msg) + + fun ppToStrm pp strm v = + let val pps = PP.mk_ppstream {consumer = (fn s => TextIO.output (strm,s)), + flush = (fn () => TextIO.flushOut(strm)), + linewidth = 80} + in pp pps v; + PP.flush_ppstream pps + end + + fun ppToString pp v = PP.pp_to_string 80 pp v + + val addStr = PP.add_string + val newline = PP.add_newline + val bBlock = PP.begin_block + val eBlock = PP.end_block + + fun ppInt pps i = + if i >= 0 then addStr pps (Int.toString i) + else (addStr pps "-"; addStr pps (Int.toString (~i))) + + fun ppInt32 pps i = + if i >= 0 then addStr pps (Int32.toString i) + else (addStr pps "-"; addStr pps (Int32.toString (~i))) + + fun ppLI pps i = + if i >= 0 then addStr pps (LargeInt.toString i) + else (addStr pps "-"; addStr pps (LargeInt.toString (~i))) + + fun ppReal pps r = addStr pps (Real.toString r) + + fun ppString pps s = + (addStr pps "\""; + addStr pps (String.toCString s); + addStr pps "\"") + + fun separate (pp,sep) pps [] = () + | separate (pp,sep) pps [x] = pp pps x + | separate (pp,sep) pps (x::xs) = + (pp pps x; sep pps; separate (pp,sep) pps xs) + + fun ppList {pp, sep, lDelim, rDelim} pps items = + (addStr pps lDelim; + separate (pp,fn pps => addStr pps sep) pps items; + addStr pps rDelim) + + fun space pps = addStr pps " " + + fun spaces pps 0 = () + | spaces pps n = (space pps; spaces pps (n-1)) + + fun blockify n pp pps v = + ( newline pps + ; bBlock pps PP.INCONSISTENT n + ; spaces pps n + ; pp pps v + ; eBlock pps + ) + + fun ppOpt pp pps NONE = () + | ppOpt pp pps (SOME x) = pp pps x + + fun ppSp pp pps v = (space pps; pp pps v) + + fun ppSpOpt pp pps opt = ppOpt (ppSp pp) pps opt + + fun ppGuarded s bool pps = if bool then addStr pps s else () +(* + fun ppPid (pidtab: Tables.pidtab, _) pps pid = + let fun ppSymbolQuietly symbol = addStr pps (Symbol.name symbol) + fun ppSymbolVerbose symbol = ( addStr pps (Symbol.name symbol) + ; addStr pps "_" + ; addStr pps (Pid.toString pid) + ) + val ppSymbol = if !suppressPidUnderscores then ppSymbolQuietly + else ppSymbolVerbose + in case Pidtab.find (pidtab,pid) + of SOME {symbol,kind,...} => + (case kind + of (Info.FIELDp _ | + Info.VARIABLEp{stClass=SOME Ast.EXTERN,...} | + Info.VARIABLEp{global=true,...}) => + addStr pps (symbol2string symbol) + | Info.VARIABLEp{global=false,...} => ppSymbol symbol + | Info.LABEL => ppSymbol symbol + | Info.TYPEDEFp _ => ppSymbol symbol + | Info.TAGp _ => ppSymbol symbol) + | _ => addStr pps (Pid.toString pid) + end +*) + fun ppSymbol' pps symbol = addStr pps (Symbol.name symbol) + + fun ppSymbol pps (symbol: Symbol.symbol, uid: Pid.uid) = + (addStr pps (Symbol.name symbol); + if !suppressPidUnderscores then () + else (addStr pps "_"; + addStr pps (Pid.toString uid))) + + fun ppId pps ({name,uid,kind,stClass,global,...}: Ast.id) = + case (stClass,global) + of ((Ast.EXTERN,_) | (_, true)) => (* globals *) + if !suppressPidGlobalUnderscores then ppSymbol' pps name + else ppSymbol pps (name,uid) + | _ => ppSymbol pps (name,uid) + (* no uids printed for globals to preserve linkability *) + + fun ppLabel pps ({name,uid,...}: Ast.label) = + ppSymbol pps (name,uid) + + fun ppMember pps ({name,...}: Ast.member) = + ppSymbol' pps name + + fun ppTid (tidtab: Tables.tidtab) pps tid = + case Tidtab.find (tidtab,tid) + of SOME {name=NONE,...} => + addStr pps (Tid.toString tid) + | SOME {name=SOME id,...} => + if !suppressTidUnderscores then addStr pps id + else (addStr pps id; + addStr pps "_"; + addStr pps (Tid.toString tid)) + | NONE => addStr pps (Tid.toString tid) + +end diff --git a/ckit/src/ast/simplify-assign-ops.sml b/ckit/src/ast/simplify-assign-ops.sml new file mode 100755 index 0000000..7ef4322 --- /dev/null +++ b/ckit/src/ast/simplify-assign-ops.sml @@ -0,0 +1,160 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +(* simplify-assign-ops.sml + * + * Main Function: simplifyAssignOp {lookAid, getCoreType, wrapEXPR, getLoc, topLevel, bindSym} + (processBinop, opn, {preOp}, expr1, expr2) + * + * processBinop -- function to call for typechecking and building binop expressions + * opn -- an Ast binary operation + * {preOp} -- true if operation should be performed before result + * e.g. ++x becomes simplifyAssignOp(+, {preOp=true}, x, 1) + * e.g. x+=e becomes simplifyAssignOp(+, {preOp=true}, x, e) + * -- false if operation should be done after result + * e.g. x++ becomes simplifyAssignOp(+, {preOp=false}, x, 1) + * expr1, expr2 -- expressions + * function returns an equivalent simplified expr. + * + * Issues: + * 1. copying must maintain unique pid invariant. + * 2. copying of rvals (simpleDup) versus lvals (duplicateLval) versus (duplicateRval) + * 3. must be careful with types of new variables (bug #1) + * e.g. + * struct { int count[3]; } *p; + * .... + * p->count[i]++; + * generates + * int tmp1[3], tmp2; tmp1=p->count, tmp2=tmp1[i], tmp1[i]=tmp2, tmp2; + * + * AUTHORS: Nevin Heintze (nch@research.bell-labs.com) + * + * TBD: More testing... + *) + +structure SimplifyAssignOps = +struct + + (* Note: lvals are either + Ast.Id + Ast.Sub + Ast.Arrow + Ast.Deref + Ast.Dot where first arg is an lval + *) + fun simplifyAssignOps {lookAid, getCoreType, wrapEXPR, getLoc, topLevel, bindSym, pushTmpVars} = + let + fun wrapEXPR' x = let val (ty, expr) = wrapEXPR x in expr end + + fun combineExprs'(x1, x2 as Ast.EXPR(_, adorn, _)) = wrapEXPR'(getCoreType(lookAid adorn), Ast.Comma(x1, x2)) + + fun combineExprs(NONE, x) = x + | combineExprs(SOME x1, x2) = combineExprs'(x1, x2) + + fun combineExprsOpt(NONE, x) = x + | combineExprsOpt(x, NONE) = x + | combineExprsOpt(SOME x1, SOME x2) = SOME(combineExprs'(x1, x2)) + + fun getExprTy(Ast.EXPR(_, adorn, _)) = getCoreType(lookAid adorn) + + (* Can't just introduce id of type ty: may not be legal to do assignment (e.g. for arrays). + So, first convert arrays to pointers, functions to pointers, and eliminate qualifiers. + Potential problem: elimination of volatile qualifiers on temporary variables? + *) + fun niceTy ty = + (case getCoreType ty of + Ast.Array (_, arrayTp) => Ast.Pointer arrayTp + | Ast.Function x => Ast.Pointer ty + | _ => ty) + + fun simpleDup expr = (* given e, return: (tmp = e, tmp, tmp) *) + let + val ty = getExprTy expr + val sym = Symbol.object "tmp" + val id = {name=sym, uid = Pid.new(), location = getLoc(), + ctype = niceTy ty, stClass = Ast.DEFAULT, status = Ast.DECLARED, + kind = Ast.NONFUN, global = topLevel()} + val _ = pushTmpVars id + val _ = bindSym(sym, Bindings.ID id) + val exprNewVar = wrapEXPR'(ty, Ast.Id id) + in + {assigns=SOME(wrapEXPR'(ty, Ast.Assign(exprNewVar, expr))), + var1=wrapEXPR'(ty, Ast.Id id), + var2=wrapEXPR'(ty, Ast.Id id)} + end + + fun duplicateRval (expr as Ast.EXPR(Ast.Id _, _, _)) = {assigns=NONE, var1=expr, var2=expr} + | duplicateRval expr = simpleDup expr + + fun duplicateLval expr = (* copy lval, factoring out side-effecting expressions *) + let + fun dup(mkExp, expr) = + let + val {assigns, var1, var2} = duplicateRval expr + in + {assigns=assigns, + copy1= mkExp var1, + copy2= mkExp var2} + end + + fun dup2(mkExp, expr1, expr2) = + let + val {assigns=assigns1, var1=var1a, var2=var1b} = duplicateRval expr1 + val {assigns=assigns2, var1=var2a, var2=var2b} = duplicateRval expr2 + val assigns = combineExprsOpt(assigns1, assigns2) + in + {assigns=assigns, + copy1=mkExp(var1a, var2a), + copy2=mkExp(var1b, var2b)} + end + in + case expr of + Ast.EXPR(Ast.Id pid, _, _) => {assigns=NONE, + copy1=expr, + copy2=wrapEXPR'(getExprTy expr, Ast.Id pid)} + | Ast.EXPR(Ast.Arrow(expr1, member), adorn, loc) => + dup(fn e => wrapEXPR'(lookAid adorn, Ast.Arrow(e, member)), expr1) + | Ast.EXPR(Ast.Deref(expr1), adorn, loc) => + dup(fn e => wrapEXPR'(lookAid adorn, Ast.Deref e), expr1) + | Ast.EXPR(Ast.Sub(expr1, expr2), adorn, loc) => + dup2(fn e => wrapEXPR'(lookAid adorn, Ast.Sub e), expr1, expr2) + + | Ast.EXPR(Ast.Member(expr1, member), _, _) => + let + val ty = getExprTy expr + val {assigns, copy1, copy2} = duplicateLval(expr1) + in + {assigns=assigns, + copy1=wrapEXPR'(ty, Ast.Member(copy1, member)), + copy2=wrapEXPR'(ty, Ast.Member(copy2, member))} + end + | Ast.EXPR(_, adorn, loc) => + (* not an lval --> just use simple duplication (should never occur, unless error) *) + let val {assigns, var1, var2} = duplicateRval expr + in + {assigns=assigns, copy1=var1, copy2=var2} + end + end + + fun simplifyAss(processBinop, opn, {preOp=true}, expr1, expr2) = (* e.g. ++x; ++( *p ); x += 5; *p += 5; *) + let val {assigns, copy1, copy2} = duplicateLval expr1 + fun procBinop x = let val (ty, expr) = processBinop x in expr end + val newExpr = Ast.Assign(copy1, procBinop(getExprTy copy2, copy2, getExprTy expr2, expr2, opn)) + val newExpr = wrapEXPR'(getExprTy expr1, newExpr) + val finalExpr = combineExprs(assigns, newExpr) + in + (getExprTy finalExpr, finalExpr) + end + | simplifyAss(processBinop, opn, {preOp=false}, expr1, expr2) = (* e.g. x++; ( *p )++; *) + let val {assigns, copy1, copy2} = duplicateLval expr1 + val {assigns=assigns2, var1, var2} = simpleDup copy1 + fun procBinop x = let val (ty, expr) = processBinop x in expr end + val newExpr = Ast.Assign(copy2, procBinop(getExprTy var1, var1, getExprTy expr2, expr2, opn)) + val newExpr = wrapEXPR'(getExprTy expr1, newExpr) + val finalExpr = combineExprs(assigns, combineExprs(assigns2, combineExprs'(newExpr, var2))) + in + (getExprTy finalExpr, finalExpr) + end + + in simplifyAss + end +end \ No newline at end of file diff --git a/ckit/src/ast/sizeof-sig.sml b/ckit/src/ast/sizeof-sig.sml new file mode 100644 index 0000000..e3110c7 --- /dev/null +++ b/ckit/src/ast/sizeof-sig.sml @@ -0,0 +1,32 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +signature SIZEOF = sig + + val warningsOn : unit -> unit (* default *) + val warningsOff : unit -> unit + val byteSizeOf : {sizes: Sizes.sizes, err: string -> unit, + warn: string -> unit, bug: string->unit} + -> Tables.tidtab -> Ast.ctype -> {bytes:int, byteAlignment:int} + + val reset : unit -> unit + (* reset memoization table *) + +(* DBM: following not yet used? *) + + val bitSizeOf : {sizes: Sizes.sizes, err: string -> unit, + warn: string -> unit, bug: string->unit} + -> Tables.tidtab -> Ast.ctype + -> {bits:int, bitAlignment:int} + + val fieldOffsets: {sizes: Sizes.sizes, err: string -> unit, + warn: string -> unit, bug: string->unit} + -> Tables.tidtab -> Ast.ctype + -> ({memberOpt:Ast.member option, bitOffset:int} list) option + + (* looks up a field in the list returned by fieldOffsets *) + val getField: {sizes: Sizes.sizes, err: string -> unit, + warn: string -> unit, bug: string->unit} + -> Ast.member * {memberOpt: Ast.member option, bitOffset:int} list + -> {memberOpt: Ast.member option, bitOffset: int} + +end (* signature SIZEOF *) diff --git a/ckit/src/ast/sizeof.sml b/ckit/src/ast/sizeof.sml new file mode 100644 index 0000000..02705c2 --- /dev/null +++ b/ckit/src/ast/sizeof.sml @@ -0,0 +1,402 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +(* sizeof.sml + + * rules for bit-fields: + + - cannot be more than sizeof an int (word) + - can be zero (only if there is no id) : means fill to word + - need not have id + - can straddle boundary of words (very implementation + dependent); behavior specified by S.bitFieldAlignment. + + *) + +structure Sizeof : SIZEOF = +struct + + structure Tid = Tid + structure B = Bindings + structure S = Sizes + structure TU = TypeUtil + structure TypeCheckControl = Config.TypeCheckControl + + structure Map = BinaryMapFn (struct + type ord_key = Tid.uid + val compare = Tid.compare + end) + + val warningsRef = ref true + fun warningsOn () = warningsRef := true + fun warningsOff () = warningsRef := false + fun localWarning s = if !warningsRef then TextIO.print s else () + + (* ref used for memoization of sizeof values *) + val tidSizeAlignMapRef = + ref (Map.empty : {tabOpt: {memberOpt: Ast.member option, bitOffset:int} list option, + bits: int, align: int} Map.map) + + fun reset() = + tidSizeAlignMapRef := + (Map.empty : {tabOpt:{memberOpt: Ast.member option, bitOffset:int} list option, + bits: int, align: int} Map.map) + + fun padToBoundary{bits, boundary} = + let val q = Int.mod (bits,boundary) + in if q = 0 then bits else bits + (boundary - q) end + + (* used as a bogus return value *) + val defaultIntLayout = + let val {bits, align} = #int Sizes.defaultSizes + in {tabOpt=NONE:({memberOpt:Ast.member option, bitOffset:int} list) option, + bits=bits, align=align} + end + + fun fieldSizeStruct (sizesErrWarnBug as {sizes, err, warn, bug}) + tidtab (ctype, memberOpt, SOME li) = + let val errors = + case TU.getCoreType tidtab ctype + of Ast.Numeric(_, _, _, Ast.FLOAT, _) => + err "Can't mix bitfield and float." + | Ast.Numeric(_, _, _, Ast.DOUBLE, _) => + err "Can't mix bitfield and double." + | Ast.Numeric(_, _, _, Ast.LONGDOUBLE, _) => + err "Can't mix bitfield and longdouble." + | Ast.Numeric(_, _, _, Ast.CHAR, _) => + if TypeCheckControl.ISO_bitfield_restrictions + then err "Can't mix bitfield and char in ISO/ANSI C." + (* (ISO spec, section 6.5.2.1, p60) *) + else () + | Ast.Numeric(_, _, _, Ast.SHORT, _) => + if TypeCheckControl.ISO_bitfield_restrictions + then err "Can't mix bitfield and short in ISO/ANSI C." + (* (ISO spec, section 6.5.2.1, p60) *) + else () + | Ast.Numeric(_, _, _, Ast.LONG, _) => + if TypeCheckControl.ISO_bitfield_restrictions + then err "Can't mix bitfield and long in ISO/ANSI C." + (* (ISO spec, section 6.5.2.1, p60) *) + else () + | Ast.Numeric(_, _, _, Ast.LONGLONG, _) => + if TypeCheckControl.ISO_bitfield_restrictions + then err "Can't mix bitfield and long long in ISO/ANSI C." + (* (ISO spec, section 6.5.2.1, p60) *) + else () + | Ast.Numeric(_, _, _, Ast.INT, _) => () + | (Ast.EnumRef _) => + if TypeCheckControl.allow_enum_bitfields then () + else err "Enum not permitted in bitfield." + | _ => err "Bitfield must be numeric (char, short, int)" + val i = LargeInt.toInt li + val {bits, align,...} = process sizesErrWarnBug tidtab ctype + in if i > bits then err "Width of field exceeds its type" else (); + {memberOpt=memberOpt, bitfield=SOME i, size=bits, align=align} + end + | fieldSizeStruct sizesErrWarnBug tidtab (ctype, memberOpt, NONE) = + let val {bits, align,...} = process sizesErrWarnBug tidtab ctype + in {memberOpt=memberOpt, bitfield=NONE, size=bits, align=align} + end + + and fieldSizeUnion sizesErrWarnBug tidtab (ctype, member) = + let val {bits, align,...} = process sizesErrWarnBug tidtab ctype + in {bits=bits, align=align} + end + + + (* The basic idea is to process bit-fields in order from first to last, + inserting padding as necessary, accumulating alignment constraints, + and recording for each field the bit offset from the start of the struct. + The alignment constraints of the underlying types of bit fields are propagated + to the alignment constraints of the entire structure (with some exceptions; + see below). + + Although the standard only mandates bitfields with underlying type + int (signed or unsigned), most compilers allow for bitfields + of type char, short or long (possible signed or unsigned) as well. + The difference is reflected in the alignment constraints. + + The basic algorithm is as follows. There are two main variables + a) alignmentSoFar: alignment constraint so far encountered + b) nextBit: next bit to be allocated (starts with 0) + NB: corresponds to how many bits so far layed out in this struct + + + To process a bitfield with type t and size b bits, where layout(t) = {size, align} + + if b>0 then + 1. if b > size then indicate error. + 2. alignmentSoFar := max(alignmentSoFar, align) + 3. if (nextBit + b) div size <> nextBit div size + /* i.e. adding this field would cross a "size" boundary */ + pad nextBit to next "size" boundary + 4. struct[field] := nextBit + 5. nextBit += b + else /* b == 0 */ + 6. alignmentSoFar := max(alignmentSoFar, align) + 7. pad nextBit to next "size" boundary + + ASSUMPTIONS: alignments are powers of 2 + + COMPLICATIONS: + A. Only allow int (int, unsigned, signed) bitfields. + This is controlled by the flag TypeCheckControl.ISO_bitfield_restrictions + (default = false). + If set to true, then an error is raised + for bitfields with types other than int, unsigned, signed. + + B. Do unnamed bitfields contribute to alignment constraints? + Most compilers say no (except lcc). + This is controlled by the sizes.sml flag ignoreUnnamedBitFieldAlignment (default true). + If set, then the alignment of unnamed bitfields is ignored (i.e. only + their size counts). + e.g. + struct X {int :8; char x; char y;} sizeof(struct X) = 3 (true) or 4 (false) + + C. Are non bitfields packed with bitfields? + C1: Only pack bit fields (sizes.sml flag: onlyPackBitFields) + if flag is true, then start the current bitfield on a size boundary + unless previous field was a bitfield. + e.g. struct X {char x; int z: 5;} sizeof(struct X) = 4 (false) or 8 (true) + + C2: In theory there is a complementary variation involving non-bitfields after + bitfields, but it is not clear what this might mean (although + that's never stopped someone putting it into a c compiler), and + it isn't implemented in ckit. + + ---------------------------------------------------------------- + Old notes on unnamed length zero bit fields: + + Haberson and Steele p 138 says + "Specifying a (bit field) length of 0 for an unnamed bit field has a + special meaning - it indicates that the following component should + begin on the next boundary appropriate to its type. ("Appropriate" + is not specified further; in ISO C, it is the next int-size unit.)" + + We implement the following (which seems to be what SGI cc and gcc do): + Specifying a (bit field) length of 0 for an unnamed bit field indicates + that the following component should be aligned according to the + alignment constraints of the unnamed bit field. (Of course if the + next field has its own alignment constriants, e.g. is double, then + the next fields alignment constraints must also be satisfied.) + + Note: this interpretation differs from ISO (and also K&R p 150) if + char and short bit fields are involved e.g. + + struct s { char a : 4; + short : 0; + char b : 2; + }; + *) + + and computeFieldStruct {sizes: Sizes.sizes, err, warn, bug} + {nextBit, alignmentSoFar, lastFieldWasBitField, + field={memberOpt, bitfield=SOME bits, size, align}} = + if bits > 0 then + let + val nextBit = (* pad out if last field not bitfield and onlyPackBitFields *) + if #onlyPackBitFields sizes andalso not lastFieldWasBitField + then padToBoundary{bits=nextBit, boundary=size} + else nextBit + val alignmentSoFar = (* accumulate alignment constraints *) + (case memberOpt of + NONE => if #ignoreUnnamedBitFieldAlignment sizes then alignmentSoFar + else Int.max (alignmentSoFar,align) + | SOME _ => Int.max (alignmentSoFar,align)) + val fieldStartBit = (* pad out if we cross a "size" boundary *) + if (nextBit + bits) div size = nextBit div size + then nextBit + else padToBoundary{bits=nextBit, boundary=size} + in (* NB: checking for error case of (bits > size) is done in fieldSizeStruct *) + {field={memberOpt=memberOpt, bitOffset=nextBit}, + nextBit=nextBit + bits, + alignmentSoFar=alignmentSoFar, + lastFieldWasBitField=true} + end + else (* bits = 0 *) + let + val alignmentSoFar = if #ignoreUnnamedBitFieldAlignment sizes + then alignmentSoFar + else Int.max (alignmentSoFar,align) + val nextBit = padToBoundary{bits=nextBit, boundary=size} + val _ = (case memberOpt of + NONE => () + | _ => err "Named bit-field has zero width") + in + {field={memberOpt=memberOpt, bitOffset=nextBit}, + nextBit=nextBit, + alignmentSoFar=alignmentSoFar, + lastFieldWasBitField=true} + end + | computeFieldStruct {sizes, err, warn, bug} + {nextBit, alignmentSoFar, lastFieldWasBitField, + field={memberOpt, bitfield=NONE, size, align}} = + let + val thisBit = padToBoundary{bits=nextBit, boundary=align} + val alignmentSoFar = Int.max(alignmentSoFar, align) + in + {field={memberOpt=memberOpt, bitOffset=thisBit}, + nextBit=thisBit + size, + alignmentSoFar=alignmentSoFar, + lastFieldWasBitField=false} + end + + and computeFieldListStruct (sizesErrWarnBug as {sizes, err, warn, bug}) + tidtab fieldList = + let val l = List.map (fieldSizeStruct sizesErrWarnBug tidtab) fieldList + fun foldfn (field, {tab, nextBit, alignmentSoFar, lastFieldWasBitField}) = + let val {field, nextBit, alignmentSoFar, lastFieldWasBitField} = + computeFieldStruct sizesErrWarnBug {nextBit=nextBit, + alignmentSoFar=alignmentSoFar, + field=field, + lastFieldWasBitField=lastFieldWasBitField} + in + {tab=field :: tab, + nextBit=nextBit, + alignmentSoFar=alignmentSoFar, + lastFieldWasBitField=lastFieldWasBitField} + end + val {tab, nextBit, alignmentSoFar, lastFieldWasBitField} = + List.foldl foldfn {tab=nil, + nextBit=0, + alignmentSoFar=(#align(#min_struct sizes)), + lastFieldWasBitField=false} l + in + {tab=List.rev tab, nextBit=nextBit, align=alignmentSoFar} + end + + + and computeFieldListUnion (sizesErrWarnBug as {sizes, err, warn, bug}) + tidtab fieldList = + let + val l = List.map (fieldSizeUnion sizesErrWarnBug tidtab) + fieldList + fun foldfn ({bits=fieldBits,align=fieldAlign}, {size, align}) = + {size=Int.max(size, fieldBits), align=Int.max(align, fieldAlign)} + (* again, assume alignments are powers of 2 *) + in + foldr foldfn {size=0, align=(#align(#min_union sizes))} l + end + + + and processTid (sizesErrWarnBug as {sizes, err, warn, bug}) + (tidtab: Tables.tidtab) tid = + case Map.find (!tidSizeAlignMapRef, tid) + of SOME result => result + | NONE => + let val result = + case Tidtab.find (tidtab,tid) + of SOME({ntype=SOME(B.Struct (_,fields)),...}) => + let val {tab, nextBit, align, ...} = + computeFieldListStruct sizesErrWarnBug + tidtab fields + in {tabOpt=SOME tab, bits=padToBoundary{bits=nextBit, boundary=align}, + align=align} + end + | SOME({ntype=SOME(B.Union (_,fields)),...}) => + let val {size, align} = + computeFieldListUnion sizesErrWarnBug + tidtab fields + in {tabOpt=NONE, + bits=padToBoundary{bits=size, boundary=align}, + align=align} + end + | SOME({ntype=SOME(B.Typedef (_,ty)),...}) => process sizesErrWarnBug tidtab ty + | SOME({ntype=SOME(B.Enum _),...}) => + let val {bits, align} = #int sizes + in {tabOpt=NONE, bits=bits, align=align} + end + | SOME{ntype=NONE,...} => + (err + "sizeof applied to a partial type"; + defaultIntLayout) + | NONE => + (bug + "sizeof: missing type id in type-id map."; + defaultIntLayout) + in + tidSizeAlignMapRef := Map.insert (!tidSizeAlignMapRef, tid, result); + result + end + + and process (sizesErrWarnBug as {sizes, err, warn, bug}) tidtab ty = + case ty + of Ast.TypeRef tid => processTid sizesErrWarnBug tidtab tid + | (Ast.StructRef tid | Ast.UnionRef tid) => + processTid sizesErrWarnBug tidtab tid + | Ast.EnumRef _ => + let val {bits,align} = #int sizes + in {tabOpt=NONE,bits=bits,align=align} end + | Ast.Qual (_,ty) => process sizesErrWarnBug tidtab ty + | Ast.Array (SOME(n, _) ,ty) => + let val {tabOpt, bits=sz, align} = process sizesErrWarnBug tidtab ty + in {tabOpt=NONE, bits = (LargeInt.toInt n) * sz, align=align} end + | Ast.Array(NONE,ty) => + ( err "taking sizeof array whose size is unspecified: assuming unit size.\n" + ; let val {bits,align,...} = process sizesErrWarnBug tidtab ty + in {tabOpt = NONE, bits = bits, align = align} + end + ) + | Ast.Pointer _ => + let val {bits,align} = #pointer sizes + in {tabOpt=NONE,bits=bits,align=align} end + | Ast.Numeric (_, _, _,ik, _) => + let val {char,short,int,long,longlong,float,double,longdouble,...} + = sizes + val {bits,align} = case ik + of Ast.CHAR => char + | Ast.SHORT => short + | Ast.INT => int + | Ast.LONG => long + | Ast.LONGLONG => longlong + | Ast.FLOAT => float + | Ast.DOUBLE => double + | Ast.LONGDOUBLE => longdouble + in {tabOpt=NONE,bits=bits,align=align} end + | Ast.Function _ => + let val {bits,align} = #pointer sizes + in {tabOpt=NONE,bits=bits,align=align} end + | Ast.Error => + let val {bits,align} = #int sizes + in {tabOpt=NONE,bits=bits,align=align} + end + | _ => let val {bits,align} = #int sizes + in err "invalid type to be sized: assuming int size.\n"; + {tabOpt=NONE,bits=bits,align=align} + end + + fun toBytes bits = + if (bits mod 8) = 0 then bits div 8 + else ( localWarning "Warning: toBytes is rounding your bits." + ; bits div 8 + ) + + fun byteSizeOf sizesErrWarnBug tidtab ty = + let val {bits,align,...} = process sizesErrWarnBug tidtab ty + in {bytes=toBytes bits, + byteAlignment=toBytes align} + end + + + fun bitSizeOf sizesErrWarnBug tidtab ty = + let val {bits,align,...} = process sizesErrWarnBug tidtab ty + in {bits=bits, + bitAlignment=align} + end + + + fun fieldOffsets sizesErrWarnBug tidtab ty = + #tabOpt(process sizesErrWarnBug tidtab ty) + + fun equalMember({uid=uid1,...}: Ast.member, {uid=uid2,...}: Ast.member) = + Pid.equal(uid1,uid2) + + fun getField {sizes, err, warn, bug} (member,[]) = + (err "field not found"; + {memberOpt = NONE, bitOffset=0}) + | getField sizesErrWarnBug (member,{memberOpt=NONE,...}::fields) = + getField sizesErrWarnBug (member,fields) + | getField sizesErrWarnBug (member,(field as {memberOpt=SOME member',bitOffset})::fields) = + if equalMember (member,member') then field + else getField sizesErrWarnBug (member,fields) + +end (* structure Sizeof *) diff --git a/ckit/src/ast/sizes-sig.sml b/ckit/src/ast/sizes-sig.sml new file mode 100644 index 0000000..28a3797 --- /dev/null +++ b/ckit/src/ast/sizes-sig.sml @@ -0,0 +1,20 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +signature SIZES = + sig + type layout = {bits:int, align:int} + type sizes = {char: layout, + short: layout, + int: layout, + long: layout, + longlong: layout, + float: layout, + double: layout, + longdouble: layout, + pointer: layout, + min_struct: layout, + min_union: layout, + onlyPackBitFields: bool, + ignoreUnnamedBitFieldAlignment: bool} + val defaultSizes: sizes +end diff --git a/ckit/src/ast/sizes.sml b/ckit/src/ast/sizes.sml new file mode 100644 index 0000000..57ff9c1 --- /dev/null +++ b/ckit/src/ast/sizes.sml @@ -0,0 +1,39 @@ +(* sizes.sml *) + +(* sizes.sml contains a default version of sizes; + other versions will be available in a sizes database, + or can be automatically generated (using sizes.c) *) + +structure Sizes : SIZES = +struct + type layout = {bits:int, align:int} + type sizes = {char: layout, + short: layout, + int: layout, + long: layout, + longlong: layout, + float: layout, + double: layout, + longdouble: layout, + pointer: layout, + min_struct: layout, + min_union: layout, + onlyPackBitFields: bool, + ignoreUnnamedBitFieldAlignment: bool} + + val defaultSizes : sizes = + {char = {bits=8, align=8}, + short= {bits=16,align=16}, + int = {bits=32,align=32}, + long = {bits=32,align=32}, + longlong = {bits=64,align=64}, (* default guess -- highly architecture dependent *) + float = {bits=32,align=32}, + double = {bits=64,align=64}, + longdouble = {bits=64,align=64}, + pointer = {bits=32,align=32}, + min_struct = {bits = 8, align = 8}, + min_union = {bits = 8, align = 8}, + onlyPackBitFields = false, + ignoreUnnamedBitFieldAlignment = true} + +end (* structure Sizes *) diff --git a/ckit/src/ast/sources.cm b/ckit/src/ast/sources.cm new file mode 100644 index 0000000..9c4f496 --- /dev/null +++ b/ckit/src/ast/sources.cm @@ -0,0 +1,118 @@ +Group is + +#if defined(NEW_CM) +#if (SMLNJ_VERSION * 100 + SMLNJ_MINOR_VERSION >= 11029) + + (* standard basis *) + $/basis.cm + + (* Compiler *) + $smlnj/compiler.cm + + (* standard library *) + $/smlnj-lib.cm +#else + (* standard basis *) + basis.cm + + (* Compiler *) + host-compiler.cm + + (* standard library *) + smlnj-lib.cm +#endif +#else + smlnj-lib.cm +#endif + + (* parser *) + ../parser/sources.cm + + (* configuration *) + ../variants/sources.cm + + (* C symbols *) + symbol-sig.sml + symbol.sml + + (* unique identifiers *) + uid-sig.sml + uid-fn.sml + + (* abstract syntax *) + aid.sml (* was adornment.sml *) + pid.sml + tid.sml + ast-sig.sml + ast.sml + ctype-eq.sml + + (* language extension support *) + cnv-ext-sig.sml + + (* tables for unique identifiers *) + uidtabimp-sig.sml + uidtabimp-fn.sml + aidtab.sml + pidtab.sml + tidtab.sml + tables.sml (* type abbreviations for pidtab, tidtab, aidtab *) + + (* pretty-printers *) + pp/pp-lib.sml + pp/pp-ast-sig.sml + pp/pp-ast.sml + pp/pp-ast-adornment-sig.sml + pp/pp-ast-ext-sig.sml + pp/pp-ast-fn.sml + +(* Not currently needed: current uses of ast-equality (in simplify-ast) + just use polymorphic equality. But this code may be useful in the future + if the modifications to the ast types violate rules for eqtypes. + (* equality modulo alpha renaming *) + eq-binary-maps.sml + eq-ast-ext-sig.sml + eq-ctype.sml + eq-ast.sml +*) + (* translation from parse tree *) + sizes-sig.sml + sizes.sml + sizeof-sig.sml + sizeof.sml + + type-util-sig.sml + type-util.sml + + bindings.sml + + state-sig.sml + state.sml + + simplify-assign-ops.sml + build-ast-sig.sml + build-ast.sml + + anonymous-structs.sml + + initializer-normalizer-sig.sml + initializer-normalizer.sml + + (* top level *) + parse-to-ast-sig.sml + parse-to-ast.sml + + (* extensions *) +#if (defined(d)) + extensions/d/ast-ext-sig.sml + extensions/d/ast-ext.sml + extensions/d/cnv-ext.sml + extensions/d/eq-ast-ext.sml + extensions/d/pp-ast-ext-fn.sml +#else + extensions/c/ast-ext-sig.sml + extensions/c/ast-ext.sml + extensions/c/cnv-ext.sml + extensions/c/pp-ast-ext-fn.sml + +#endif diff --git a/ckit/src/ast/state-sig.sml b/ckit/src/ast/state-sig.sml new file mode 100644 index 0000000..21f090c --- /dev/null +++ b/ckit/src/ast/state-sig.sml @@ -0,0 +1,189 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +(* -------------------------------------------------------------------- + * State: a local structure for operating on context state during the build-ast + * (elaboration) phase. + * State includes: + * - a global symbol table (#globalEnv envContext) + * - a stack of local symbol tables (#localEnv envContext) + * - a stack of locations for error reporting (#locStack locContext) + * - a table of named type identifiers (uidTables.ttab) + * - a table of adornment types (uidTables.atab) giving the type for each expression + * - a table of adornment types (uidTables.implicits) + giving implicit coercions for each expression (if any) + * - a list of type identifiers (defined in the current "context") + * (tidsContext.newTids) + * - a stack of tables of switch statement labels (switchContext.switchLabels) + * -------------------------------------------------------------------- + *) + +signature STATE = +sig + +(* finite map structures *) + + structure ST : ORD_MAP where type Key.ord_key = Symbol.symbol + structure IT : ORD_MAP where type Key.ord_key = LargeInt.int + + +(* environments *) + + type symtab = Bindings.symBinding ST.map + type env = symtab list (* local environment stack *) + + +(* global context types *) + + type uidTables = + {ttab : Tables.tidtab, (* type name table *) + atab : Tables.aidtab, (* adornment table *) + implicits : Tables.aidtab} (* "optional" adornment table -- for special casts *) + + type envContext = + {globalEnv : symtab ref, (* the global symbol table *) + localEnv : env ref} (* the local environment stack *) + + +(* local context types : temporary information used during elaboration *) + + (* tidsContext: sequence of tids of types created while processing a phrase *) + type tidsContext = + {newTids : Tid.uid list ref} + + (* tmpVariables: sequence of (pid, ty) pairs created while processing a phrase *) + (* used when inserting explicit coercions in the case of ++, --, += *) + type tmpVariables = + {newVariables : Ast.id list ref} + + (* for use in D *) + type typeContext = + {typeCxts : Ast.ctype option list ref} + + (* funContext: information for the current function def *) + type funContext = + {labelTab : (Ast.label * bool) ST.map ref, + gotos : Symbol.symbol list ref, + returnTy : Ast.ctype option ref} + + (* table for collecting switch labels while processing switch statements *) + type switchContext = + {switchLabels : {switchTab : unit IT.map, default : bool} list ref} + + type locContext = (* location context *) + {locStack : SourceMap.location list ref} + + (* global state components *) + type globalState = + {uidTables : uidTables, + envContext : envContext, (* contains some local working state in localEnv *) + errorState : Error.errorState} + + (* local, "working", state components, holding temporary information *) + type localState = + {locContext: locContext, + tidsContext : tidsContext, + tmpVariables : tmpVariables, + funContext: funContext, + switchContext: switchContext, + typeContext: typeContext} + + (* initial information for calling makeAst *) + datatype stateInfo + = STATE of uidTables * symtab (* state carried over from previous translation unit *) + | INITIAL (* no previous state *) + + +(* packages of functions to manipulate state implicitly *) + + type stateFuns = + {globalState : globalState, + localState : localState, + (* the state records, included in case direct access to the + * state is required (probably shouldn't be) *) + + locFuns : + {pushLoc : SourceMap.location -> unit, + (* push location onto location stack *) + popLoc : unit -> unit, + (* pop location stack *) + getLoc : unit -> SourceMap.location, + (* get top location from location stack *) + error : string -> unit, + (* report an error and its location *) + warn : string -> unit}, + (* (if warnings are on) report a warning and its location *) + + tidsFuns : + {pushTids : Tid.uid -> unit, + (* records tids from new structs/unions/typdefs + * introduced in declarations, casts, etc. *) + resetTids : unit -> Tid.uid list}, + (* returns list of recently generated tids (since last resetTids call) *) + + tmpVarsFuns : + {pushTmpVars : Ast.id -> unit, + (* records pids for temporary introduced in decompilation of ++, --, +=, and their friends *) + resetTmpVars : unit -> Ast.id list}, + (* returns list of recently generated pids (since last resetTmpVars call) *) + + envFuns : + {topLevel : unit -> bool, + (* are we at top level? *) + pushLocalEnv : unit -> unit, + (* push a fresh symbol table onto the stack *) + popLocalEnv : unit -> unit, + (* pop symbol table stack *) + lookSym : Symbol.symbol -> Bindings.symBinding option, + (* lookup type of a symbol in symbol table stack *) + bindSym : Symbol.symbol * Bindings.symBinding -> unit, + (* insert (i.e. bind) a symbol in the top (most local) symbol table *) + lookSymGlobal : Symbol.symbol -> Bindings.symBinding option, + (* lookup type of a symbol in the global symbol table *) + bindSymGlobal : Symbol.symbol * Bindings.symBinding -> unit, + (* insert (i.e. bind) a symbol in the global symbol table *) + lookLocalScope : Symbol.symbol -> Bindings.symBinding option, + (* look for a binding in the most local symbol table *) + getGlobalEnv : unit -> symtab}, + (* return the global symbol table *) + + uidTabFuns : + {bindAid : Ast.ctype -> Aid.uid, + (* generate a new adornment identifier and bind it to the type *) + lookAid : Aid.uid -> Ast.ctype option, + (* lookup adornment identifier in state aidtab *) + bindTid : Tid.uid * Bindings.tidBinding -> unit, + (* insert a type identifier into the type symbol table *) + lookTid : Tid.uid -> Bindings.tidBinding option}, + (* lookup a type identifier in the type symbol table *) + + funFuns : (* manipulate current function context *) + {newFunction : Ast.ctype -> unit, + (* enter a new function context with the given return type *) + getReturnTy : unit -> Ast.ctype option, + (* get the return type of the current function context *) + checkLabels : unit -> (Symbol.symbol * SourceMap.location) option, + (* verify that all goto targets are defined as labels *) + addLabel : Symbol.symbol * SourceMap.location -> Ast.label, + (* define a label, returning an error flag if multiple defs *) + addGoto : Symbol.symbol * SourceMap.location -> Ast.label}, + (* record a label as a goto target *) + + switchFuns : (* manipulate current switch context *) + {pushSwitchLabels : unit -> unit, (* enter a switch statement *) + popSwitchLabels : unit -> unit, (* leave a switch statement *) + addSwitchLabel : LargeInt.int -> string option, + (* record a new switch label; returns SOME errormsg if duplicate + * or not within a switch *) + addDefaultLabel : unit -> string option}} + (* record a default label; returns SOME errormsg if multiple defaults, + * or not within a switch *) + + (* state initialization functions *) + val initGlobal : (stateInfo * Error.errorState) -> globalState + val initLocal : unit -> localState + + val stateFuns : globalState * localState -> stateFuns + (* returns a collection of state functions specialized to + * operate on the state passed as argument *) + +end (* sigature STATE *) diff --git a/ckit/src/ast/state.sml b/ckit/src/ast/state.sml new file mode 100644 index 0000000..c6a644b --- /dev/null +++ b/ckit/src/ast/state.sml @@ -0,0 +1,505 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +structure State : STATE = +struct + + structure Sym = Symbol + (* uid structures: program, type and adornment identifiers *) + structure Pid = Pid + structure Tid = Tid + structure Aid = Aid + (* imperative uid tables (hashtables) *) + structure TT = Tidtab + structure AT = Aidtab (* was TypeAddornmentTab *) + + (* symbol table binary maps *) + structure ST = BinaryMapFn (struct + type ord_key = Sym.symbol + val compare = Sym.compare + end) + + (* int binary maps *) + structure IT = BinaryMapFn (struct + type ord_key = LargeInt.int + val compare = LargeInt.compare + end) + + + (* environments *) + type symtab = Bindings.symBinding ST.map + type env = symtab list (* local environments *) + + + (* global context types *) + + type uidTables = + {ttab : Tables.tidtab, (* type name table *) + atab : Tables.aidtab, (* adornment table *) + implicits : Tables.aidtab} (* "optional" adornment table -- for special casts *) + + type envContext = + {globalEnv : symtab ref, (* the global symbol table *) + localEnv : env ref} (* the local environment stack *) + + + (* local ("working") context types *) + + (* tidsContext: sequence of tids of types created while processing a phrase *) + type tidsContext = + {newTids : Tid.uid list ref} + + (* tmpVariables: sequence of pids created while processing a phrase *) + (* used when inserting explicit coercions in the case of ++, --, += *) + type tmpVariables = + {newVariables : Ast.id list ref} + + (* for use in D *) + type typeContext = + {typeCxts : Ast.ctype option list ref} + + (* information for the current function def *) + type funContext = + {labelTab : (Ast.label * bool) ST.map ref, + gotos : Sym.symbol list ref, + returnTy : Ast.ctype option ref} + + (* table for collecting switch labels while processing switch statements *) + type switchContext = + {switchLabels : {switchTab : unit IT.map, default : bool} list ref} + + (* location context, mainly for error messages *) + type locContext = + {locStack : SourceMap.location list ref} + + + (* global state components *) + type globalState = + {uidTables : uidTables, + envContext : envContext, (* contains some local working state in localEnv *) + errorState : Error.errorState} + + (* local, "working", state components *) + type localState = + {locContext: locContext, + tidsContext : tidsContext, + tmpVariables : tmpVariables, + funContext: funContext, + switchContext: switchContext, + typeContext: typeContext} + + + (* initial state information for calling makeAst *) + datatype stateInfo + = STATE of uidTables * symtab (* previous state info *) + | INITIAL (* no previous state info *) + + type stateFuns = + {globalState : globalState, + localState : localState, + (* the state records, included for convenience *) + + locFuns : + {pushLoc : SourceMap.location -> unit, + popLoc : unit -> unit, + getLoc : unit -> SourceMap.location, + error : string -> unit, + warn : string -> unit}, + + tidsFuns : + {pushTids : Tid.uid -> unit, + resetTids : unit -> Tid.uid list}, + + tmpVarsFuns : + {pushTmpVars : Ast.id -> unit, + resetTmpVars : unit -> Ast.id list}, + + envFuns : + {topLevel : unit -> bool, + pushLocalEnv : unit -> unit, + popLocalEnv : unit -> unit, + lookSym : Sym.symbol -> Bindings.symBinding option, + bindSym : Sym.symbol * Bindings.symBinding -> unit, + lookSymGlobal : Sym.symbol -> Bindings.symBinding option, + bindSymGlobal : Sym.symbol * Bindings.symBinding -> unit, + lookLocalScope : Sym.symbol -> Bindings.symBinding option, + getGlobalEnv : unit -> symtab}, + + uidTabFuns : + {bindAid : Ast.ctype -> Aid.uid, + lookAid : Aid.uid -> Ast.ctype option, + bindTid : Tid.uid * Bindings.tidBinding -> unit, + lookTid : Tid.uid -> Bindings.tidBinding option}, + + funFuns : + {newFunction : Ast.ctype -> unit, + getReturnTy : unit -> Ast.ctype option, + checkLabels : unit -> (Symbol.symbol * SourceMap.location) option, + addLabel : Sym.symbol * SourceMap.location -> Ast.label, + addGoto : Sym.symbol * SourceMap.location -> Ast.label}, + + switchFuns : + {pushSwitchLabels : unit -> unit, + popSwitchLabels : unit -> unit, + addSwitchLabel : LargeInt.int -> string option, + (* returns error message option *) + addDefaultLabel : unit -> string option}} + (* returns error message option *) + + +(* state initialization *) +fun initLocal () : localState = + {tidsContext = + {newTids = ref []}, + tmpVariables = + {newVariables = ref []}, + typeContext = + {typeCxts = ref []}, + funContext = + {labelTab = ref ST.empty, + gotos = ref [], + returnTy = ref NONE}, + switchContext = + {switchLabels = ref []}, + locContext = + {locStack = ref [SourceMap.UNKNOWN]}} + +fun initGlobal(INITIAL, errorState: Error.errorState) : globalState = + {uidTables = + {ttab = TT.uidtab(), + atab = AT.uidtab(), + implicits = AT.uidtab()}, + envContext = + {globalEnv = ref ST.empty, + localEnv = ref []}, + errorState = errorState} + + | initGlobal(STATE({ttab,atab,implicits},globalEnv), errorState) = + {uidTables = + {ttab = ttab, + atab = atab, + implicits = implicits}, + envContext = + {globalEnv = ref(globalEnv), + localEnv = ref []}, + errorState = errorState} + + +(* provide packages of implicit state manipulation functions *) +fun stateFuns(globalState as {uidTables, envContext, errorState} : globalState, + localState as + {tidsContext, tmpVariables, funContext, switchContext, locContext,...}: localState) + : stateFuns = +let + +val bug = Error.bug errorState + +(* tidsContext functions ***********************************************) +local val {newTids} = tidsContext in + + fun pushTids tid = + newTids := tid :: !newTids + + fun resetTids () = + rev(!newTids) before (newTids := []) + (* ct's pushed onto newTids as encountered; need to reverse list to + * give original program order*) +end + + +(* newVariables functions ***********************************************) +local val {newVariables} = tmpVariables in + + fun pushTmpVars pidTy = + newVariables := pidTy :: !newVariables + + fun resetTmpVars () = + rev(!newVariables) before (newVariables := []) + (* pidTy pairs are pushed onto newVariables as encountered; need to reverse list to + * give original program order*) +end + + +(* location functions *************************************************) +local val {locStack} = locContext (* also uses errorState *) +in + + (* accesses: locStack *) + fun error (msg: string) = + case !locStack + of loc :: _ => Error.error(errorState,loc,msg) + | nil => bug "Empty location stack" + + (* accesses: locStack *) + fun warn (msg: string) = + case !locStack + of loc :: _ => Error.warning(errorState,loc,msg) + | nil => bug "Empty location stack" + + (* get "current" location *) + (* accesses: locStack *) + fun getLoc () = + case !locStack + of loc :: _ => loc + | nil => (bug "getLoc: empty location stack"; SourceMap.UNKNOWN) + + (* push the location stack, on entering a marked phrase *) + (* affects: locStack *) + fun pushLoc loc = + locStack := loc :: !locStack + + (* pop the location stack, on exiting a marked phrase *) + (* affects: locStack *) + fun popLoc () = + case !locStack + of _ :: rest => locStack := rest + | nil => bug "popLoc: empty location stack" + +end (* locContext *) + +(* switch label functions ***************************************************) +local val {switchLabels} = switchContext in + + (* effects: switchLabels *) + fun popSwitchLabels () = + (case !switchLabels + of _ :: swLabels => switchLabels := swLabels + | nil => bug "State: can't pop empty switchlabels") + + (* effects: switchLabels *) + fun pushSwitchLabels () = + switchLabels := {switchTab = IT.empty, default=false} :: !switchLabels + + (* effects: switchLabels *) + fun addSwitchLabel (i: LargeInt.int) : string option = + case !switchLabels + of {switchTab, default} :: rest => + (case IT.find(switchTab, i) + of NONE => + let val switchTab = IT.insert(switchTab, i, ()) + in switchLabels := {switchTab=switchTab, default=default}::rest; + NONE + end + | SOME _ => (* error return *) + SOME ("Duplicate case label " ^ (LargeInt.toString i) ^ + " in the same switch statement")) + | nil => (* error return *) + SOME ("Case label " ^ (LargeInt.toString i) ^ + " appears outside a switch statement") + + (* effects: switchLabels *) + fun addDefaultLabel () : string option = + case !switchLabels + of {switchTab, default} :: rest => + if default then (* error return *) + SOME "Duplicate default label in the same switch statement" + else (switchLabels := {switchTab=switchTab, default=true} :: rest; + NONE) + | nil => (* error return *) + SOME "Default label appears outside a switch statement" + +end (* switchContext *) + +(* identifier table functions *******************************************) +local val {ttab,atab,...} = uidTables in + + (* generate a new aid, bind it to ty in atab, and return it *) + fun bindAid ty = + let val aid = Aid.new () + in AT.insert(atab,aid,ty); + aid + end + + fun lookAid aid = AT.find (atab,aid) + + fun bindTid (tid, binding) = TT.insert(ttab,tid,binding) + + fun lookTid tid = TT.find (ttab,tid) + +end (* identifier functions *) + + +(* funContext functions *************************************************) +local val {labelTab,gotos,returnTy} = funContext +in + + (* effects: funContext *) + fun newFunction(returnty) = + (labelTab := ST.empty; + gotos := []; + returnTy := SOME returnty) + + (* accesses funContext *) + fun getReturnTy () = !returnTy + + (* accesses: ? + * effects: labelTab *) + (* DBM: labToPid called only with definition=false from addGoto, + * so errorFl will always be returned false in that case. On the + * other hand, in addLabel, the value of the error flag is discarded. *) + fun symbolToLabel (definition: bool, labSym : Symbol.symbol, loc: SourceMap.location) + : (Ast.label * bool) = + case ST.find(!labelTab,labSym) + of SOME(label, true) => (* previously defined *) + if definition then (label, true) (* error, multiple defitions *) + else (label, false) (* no error *) + | SOME(label, false) => (* label has been seen previously but not defined *) + (if definition + then (labelTab := ST.insert(!labelTab, labSym, (label, true))) + (* mark as defined, rebinding labSym in labelTab *) + else (); + (label, false)) (* no error *) + | NONE => (* new label *) + let val label = {name=labSym, uid = Pid.new (), location=loc} + in labelTab := ST.insert(!labelTab, labSym, (label, definition)); + (label, false) + end + + (* accesses: funContext + * effects: labelTab, gotos *) + fun addGoto (labSym, loc) = + let val (label, _) = symbolToLabel(false, labSym, loc) + (* discard error flag: no possibility of an error condition, + * since not a defining occurrence of the label *) + in gotos := labSym :: !gotos; + label + end + + (* global: funContext + * effects: labelTab *) + fun addLabel (labSym, loc) = + let val (label, errorFlag) = symbolToLabel(true, labSym, loc) + in if errorFlag then + error("Repeated definition of label " ^ (Sym.name labSym)) + else (); + label + end + + (* access: labelTab *) + fun checkLabels () = + let fun check(g :: gl) = + (case ST.find(!labelTab,g) + of SOME(pid, true) => check gl + | SOME({name,location,...}, false) => + SOME(name, location) + (* error in program -- label used but not defined *) + | NONE => (bug "State: checkLabels: goto label not in table"; NONE)) + | check nil = NONE (* ok -- all goto target labels defined *) + in check (!gotos) + end + +end (* funContext *) + + +(* environment functions *************************************************) +local val {localEnv, globalEnv} = envContext +in + + (* Are we in a top-level environment *) + (* accesses: localEnv *) + fun topLevel () = List.null(!localEnv) + + (* effects: localEnv *) + fun pushLocalEnv () = localEnv := ST.empty :: !localEnv + + (* effects: localEnv *) + fun popLocalEnv () = + case !localEnv + of st :: env => localEnv := env + | nil => bug "State: popping an empty local environment" + + (* lookSym: lookup a symbol in the full environment (localEnv over globalEnv) *) + (* accesses: globalEnv, localEnv *) + fun lookSym (sym: Sym.symbol) : Bindings.symBinding option = + let fun lookup [] = + ST.find(!globalEnv,sym) + | lookup (st::rest) = + (case ST.find (st, sym) + of SOME x => SOME x + | NONE => lookup rest) + in lookup (!localEnv) + end + + (* accesses: globalEnv *) + fun lookSymGlobal (sym: Sym.symbol): Bindings.symBinding option = + ST.find(!globalEnv,sym) + + (* bindSym : symbol * binding -> unit + * bind a new symbol. + * affects: environment *) + fun bindSym (sym, binding) = + case !localEnv + of st :: outer => + localEnv := ST.insert(st,sym,binding) :: outer + | nil => + globalEnv := ST.insert(!globalEnv,sym,binding) + + (* forces entry into the global env (used for patching up undeclared + * function calls). WARNING: new pid/uid generated *) + (* effects: globalEnv *) + fun bindSymGlobal (sym, binding) = + globalEnv := ST.insert(!globalEnv, sym, binding) + + (* is symbol bound in current innermost scope level *) + (* accesses: globalEnv *) + fun lookLocalScope sym = + case !localEnv + of nil => ST.find(!globalEnv,sym) + | st :: _ => ST.find(st,sym) + + (* return the current global environment (symtab) *) + fun getGlobalEnv (): symtab = + !globalEnv + +end (* environment *) + +in (* state function package *) + {globalState = globalState, + localState = localState, + locFuns = + {pushLoc = pushLoc, + popLoc = popLoc, + getLoc = getLoc, + error = error, + warn = warn}, + + tidsFuns = + {pushTids = pushTids, + resetTids = resetTids}, + + tmpVarsFuns = + {pushTmpVars = pushTmpVars, + resetTmpVars = resetTmpVars}, + + envFuns = + {topLevel = topLevel, + pushLocalEnv = pushLocalEnv, + popLocalEnv = popLocalEnv, + lookSym = lookSym, + bindSym = bindSym, + lookSymGlobal = lookSymGlobal, + bindSymGlobal = bindSymGlobal, + lookLocalScope = lookLocalScope, + getGlobalEnv = getGlobalEnv}, + + uidTabFuns = + {bindAid = bindAid, + lookAid = lookAid, + bindTid = bindTid, + lookTid = lookTid}, + + funFuns = + {newFunction = newFunction, + getReturnTy = getReturnTy, + checkLabels = checkLabels, + addLabel = addLabel, + addGoto = addGoto}, + + switchFuns = + {pushSwitchLabels = pushSwitchLabels, + popSwitchLabels = popSwitchLabels, + addSwitchLabel = addSwitchLabel, + addDefaultLabel = addDefaultLabel}} + +end (* fun stateFuns *) + +end (* structure State *) diff --git a/ckit/src/ast/symbol-sig.sml b/ckit/src/ast/symbol-sig.sml new file mode 100644 index 0000000..c1dd09a --- /dev/null +++ b/ckit/src/ast/symbol-sig.sml @@ -0,0 +1,38 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +(* K&R2, A11.1, C names fall into four spaces. Same name may be used for different + * purposes in the same scope if the uses are in different name spaces. These spaces + * are: 1. Objects, functions, typedef names, and enum constants. + * 2. Labels + * 3. Tags of structures, unions, and enumerations + * 4. Member of structures are unions individually. + * + * Member is (id * id), the first id is the id of the structure and the second is the + * real member. Whenever you look up a Member, you must know the struct_ty. If you + * know the struct_ty, you know the first id (see CTYPE), and then you can get a + * unique mapping in the lookup. + * + *) + +signature SYMBOL = sig + + datatype symbolKind = OBJECT | FUNCTION | TYPEDEF | ENUMCONST | LABEL | TAG | MEMBER of Tid.uid + + type symbol + + val symbol : {name:string, kind:symbolKind} -> symbol + val name : symbol -> string + val kind : symbol -> symbolKind + val equal : symbol * symbol -> bool + val compare : symbol * symbol -> order + + val label : string -> symbol + val object : string -> symbol + val func : string -> symbol + val typedef : string -> symbol + val enumConst : string -> symbol + val tag : string -> symbol + val member : Tid.uid * string -> symbol + +end + diff --git a/ckit/src/ast/symbol.sml b/ckit/src/ast/symbol.sml new file mode 100644 index 0000000..350d11e --- /dev/null +++ b/ckit/src/ast/symbol.sml @@ -0,0 +1,58 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +(* K&R2, A11.1, C names fall into four spaces. Same name may be used for + * different purposes in the same scope if the uses are in different name + * spaces. These spaces are: + * + * 0. Objects, functions, typedef names, and enum constants, + * 1. Labels, + * 2. Tags of structures, unions, and enumerations, + * 3. Members of structures and unions individually. + *) + + +structure Symbol :> SYMBOL = struct + + datatype symbolKind + = OBJECT | FUNCTION | TYPEDEF | ENUMCONST | LABEL | TAG | MEMBER of Tid.uid + + type symbol = {name:string, hash:Word.word, kind:symbolKind, namespace:int} + + val hash = HashString.hashString + + fun namespace OBJECT = 0 + | namespace FUNCTION = 0 + | namespace TYPEDEF = 0 + | namespace ENUMCONST = 0 + | namespace LABEL = 1 + | namespace TAG = 2 + | namespace (MEMBER tid) = 3 + tid (* DBM: beware negative tid *) + + fun symbol {name, kind} = + {name=name, hash=hash name, kind=kind, namespace=namespace kind} + + fun name ({name,...}:symbol) = name + + fun kind ({kind,...}:symbol) = kind + + fun equal (sym1:symbol,sym2:symbol) = + (#hash sym1) = (#hash sym2) andalso + (#namespace sym1) = (#namespace sym2) andalso + (#name sym1) = (#name sym2) + + fun compare (sym1:symbol,sym2:symbol) = + case Word.compare (#hash sym1,#hash sym2) + of EQUAL => (case Int.compare (#namespace sym1,#namespace sym2) + of EQUAL => String.compare (#name sym1,#name sym2) + | x => x) + | x => x + + fun object name = symbol {name=name, kind=OBJECT} + fun func name = symbol {name=name, kind=FUNCTION} + fun typedef name = symbol {name=name, kind=TYPEDEF} + fun enumConst name = symbol {name=name, kind=ENUMCONST} + fun label name = symbol {name=name, kind=LABEL} + fun tag name = symbol {name=name, kind=TAG} + fun member (tid,name) = symbol {name=name, kind=MEMBER tid} + +end (* structure Symbol *) diff --git a/ckit/src/ast/tables.sml b/ckit/src/ast/tables.sml new file mode 100644 index 0000000..aba2e18 --- /dev/null +++ b/ckit/src/ast/tables.sml @@ -0,0 +1,9 @@ +(* tables.sml *) + +structure Tables = +struct + + type aidtab = Ast.ctype Aidtab.uidtab + type tidtab = Bindings.tidBinding Tidtab.uidtab + +end diff --git a/ckit/src/ast/tid.sml b/ckit/src/ast/tid.sml new file mode 100644 index 0000000..5cc7fa6 --- /dev/null +++ b/ckit/src/ast/tid.sml @@ -0,0 +1,3 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +structure Tid : UID = UidFn (val initial = 0; val prefix = "t"); diff --git a/ckit/src/ast/tidtab.sml b/ckit/src/ast/tidtab.sml new file mode 100644 index 0000000..24012b2 --- /dev/null +++ b/ckit/src/ast/tidtab.sml @@ -0,0 +1,7 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +structure Tidtab = UidtabImpFn (structure Uid=Tid) + + + + diff --git a/ckit/src/ast/type-util-sig.sml b/ckit/src/ast/type-util-sig.sml new file mode 100644 index 0000000..b03260b --- /dev/null +++ b/ckit/src/ast/type-util-sig.sml @@ -0,0 +1,148 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +local + type 'a type_util = Tables.tidtab -> Ast.ctype -> 'a + type 'a type_mem_util = Tables.tidtab -> Ast.ctype * Ast.member -> 'a + type 'a type_type_util = Tables.tidtab -> Ast.ctype * Ast.ctype -> 'a +in + +signature TYPE_UTIL = +sig + + exception TypeError of Ast.ctype + + val hasKnownStorageSize : bool type_util + (* check if type has known storage size *) + +(* + val fixArrayType : Ast.tidtab -> {n:Int32.int, ty:Ast.ctype} -> {err:bool, ty:Ast.ctype} + (* fix up array type using initializer info e.g. int x[] = {1,2,3}; *) +*) + + val getCoreType : Ast.ctype type_util + + val isPointer : bool type_util + (* check if a type can be considered to be a pointer type *) + + val isConst : bool type_util + (* check if a type contains the const qualifier *) + + val isNumberOrPointer : bool type_util + (* check if a type can be considered to be a number or pointer type *) + + val isNumber : bool type_util + (* check if a type can be considered to be a number type *) + + val isArray : bool type_util + (* check if a type can be considered to be an array *) + + val isIntegral : bool type_util + (* check if a type can be considered to be an array *) + + val deref : (Ast.ctype option) type_util + (* if type can be considered a pointer then returns dereferenced type; + * and otherwise returns NONE. + *) + + val checkQuals : {redundantConst:bool, redundantVolatile:bool} type_util + (* check for redundant qualifiers *) + + val getQuals : {const:bool, volatile:bool, ty:Ast.ctype} type_util + (* check for redundant qualifiers *) + + val isFunction : bool type_util + (* check if a type can be considered to be a function type *) + + val isFunctionPrototype : bool type_util + (* check if a type is a function prototype *) + + val isNonPointerFunction : bool type_util + (* check if a type is a function (but not a function pointer) *) + + val getFunction : ((Ast.ctype * (Ast.ctype * Ast.id option) list) option) type_util + (* if type can be considered a function then returns return type and + * list of argument types; + * and otherwise returns NONE. + *) + + val isStructOrUnion : (Ast.tid option) type_util + (* if type is a struct or union then returns tid of that type, + * and otherwise returns NONE. + *) + + val isEnum : bool type_mem_util + (* true iff type can be considered an enumerated type and pid is a + * member of that enum + *) + + val lookupEnum : (LargeInt.int option) type_mem_util + (* if type can be considered an enumerated type and id is a member of + * that enum, return the value of that member; + * otherwise raise a type error + *) + + val isAssignable : Tables.tidtab + -> {lhs:Ast.ctype, rhs:Ast.ctype, rhsExpr0:bool} + -> bool + (* type checking: expr of type rhs can be assigned to lval of type lhs *) + + val isEquable : Tables.tidtab + -> {ty1:Ast.ctype, exp1Zero:bool, + ty2:Ast.ctype, exp2Zero:bool} + -> Ast.ctype option + + val conditionalExp : Tables.tidtab + -> {ty1:Ast.ctype, exp1Zero:bool, + ty2:Ast.ctype, exp2Zero:bool} + -> Ast.ctype option + + val isComparable: Tables.tidtab + -> {ty1:Ast.ctype, ty2:Ast.ctype} + -> Ast.ctype option + + val isAddable: Tables.tidtab + -> {ty1:Ast.ctype, ty2:Ast.ctype} + -> {ty1:Ast.ctype, ty2:Ast.ctype, resTy:Ast.ctype} option + + val isSubtractable: Tables.tidtab + -> {ty1:Ast.ctype, ty2:Ast.ctype} + -> {ty1:Ast.ctype, ty2:Ast.ctype, resTy:Ast.ctype} option + + val checkFn : Tables.tidtab + -> Ast.ctype * Ast.ctype list * bool list + -> Ast.ctype * string list (* for type error messages *) + * Ast.ctype list + (* type checking: function applied to args is well formed *) + + val equalType : bool type_type_util + (* type equality *) + + val compatible : bool type_type_util + (* type compatibility *) + + val functionArgConv : Ast.ctype type_util + + val composite : Tables.tidtab + -> Ast.ctype * Ast.ctype + -> Ast.ctype option * string list (* for type error messages *) + + val isScalar : bool type_util + (* is type numeric *) + + val usualBinaryCnv : (Ast.ctype option) type_type_util + (* combine binary operation types *) + + val usualUnaryCnv : Ast.ctype type_util + (* process unary operation type *) + + val preArgConv : Ast.ctype type_util + (* converts arrays and functions to pointers *) + + val cnvFunctionToPointer2Function : Ast.ctype type_util + (* converts functions to pointers *) + + val stdInt : Ast.ctype + +end (* signature TYPE_UTIL *) + +end (* local *) diff --git a/ckit/src/ast/type-util.sml b/ckit/src/ast/type-util.sml new file mode 100644 index 0000000..8dc30d3 --- /dev/null +++ b/ckit/src/ast/type-util.sml @@ -0,0 +1,788 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +structure TypeUtil : TYPE_UTIL = +struct + + structure S = Symbol + structure Pid = Pid + structure Tid = Tid + structure B = Bindings + structure TypeCheckControl = Config.TypeCheckControl + + exception TypeError of Ast.ctype + + (* some parameters used here, but passed in that should be lifted out of here *) + fun warning s = (print "warning "; print s; print "\n") + + fun internalError s = (print "internal error "; print s; print "\n") + + val don't_convert_SHORT_to_INT = TypeCheckControl.don't_convert_SHORT_to_INT + (* In ANSI C, usual unary converstion converts + SHORT to INT; for DSP code, we want to + keep SHORT as SHORT. + Default: true for ANSI C behavior *) + + val don't_convert_DOUBLE_in_usual_unary_cnv = TypeCheckControl.don't_convert_DOUBLE_in_usual_unary_cnv + (* In ANSI, FLOAT is not converted to DOUBLE during + usual unary converstion; in old style compilers + FLOAT *is* converted to DOUBLE. + Default: true for ANSI behavior *) + + val enumeration_incompatibility = TypeCheckControl.enumeration_incompatibility + (* ANSI says that different enumerations are incomptible + (although all are compatible with int); + older style compilers say that different enumerations + are compatible. + Default: true for ANSI behavior *) + + val pointer_compatibility_quals = TypeCheckControl.pointer_compatibility_quals + (* ANSI says that pointers to differently qualified types + are different; some compilers vary. + Default: true for ANSI behavior *) + + val stdInt = Ast.Numeric(Ast.NONSATURATE, Ast.WHOLENUM, Ast.SIGNED, Ast.INT, Ast.SIGNASSUMED) + + fun ctToString tidtab = + PPLib.ppToString (PPAst.ppCtype () tidtab) + (* pid table actually not needed to print out a ct, but it is + a parameter passed to ppCtype, so just fudge one to make types work. + This is ugly dpo? + *) + + fun reduceTypedef (tidtab: Tables.tidtab) ty = + case ty + of Ast.TypeRef tid => + (case Tidtab.find (tidtab,tid) + of SOME{ntype=SOME(B.Typedef (_,ty)),...} => reduceTypedef tidtab ty + | _ => ( internalError "poorly formed type table (unresolved type id),assuming Void" + ; Ast.Void + ) + +) + | ty => ty + + fun getCoreType tidtab ty = + (* derefs typedefs and and removes qualifiers *) + case ty + of Ast.TypeRef tid => getCoreType tidtab (reduceTypedef tidtab ty) + | Ast.Qual (_,ty) => getCoreType tidtab ty + | ty => ty + + fun checkQuals tidtab ty = + let fun check ty = + (case ty + of Ast.TypeRef tid => check (reduceTypedef tidtab ty) + | Ast.Qual (q,ty) => + let val {volatile, const, cerr, verr} = check ty + in + case q of + Ast.CONST => {volatile=volatile, const=true, verr=verr, cerr=const} + | Ast.VOLATILE => {volatile=true, const=const, cerr=cerr, verr=volatile} + end + | ty => {volatile=false, const=false, verr=false, cerr=false}) + val res = check ty + in + {redundantConst = #cerr res, + redundantVolatile = #verr res} + end + + fun getQuals tidtab ty = + (* collects qualifiers *) + case ty + of Ast.TypeRef tid => getQuals tidtab (reduceTypedef tidtab ty) + | Ast.Qual (q,ty) => + let val {volatile, const, ty} = getQuals tidtab ty + in + case q of + Ast.CONST => {volatile=volatile, const=true, ty=ty} + | Ast.VOLATILE => {volatile=true, const=const, ty=ty} + end + | ty => {volatile=false, const=false, ty=ty} + +(* + fun hasKnownStorageSize tidtab {ty, withInitializer} = + (* withInitializer=true: does ty have known storage size when an initializer is present (see array case) + withInitializer=false: does ty have known storage size, period. *) + case ty of + Ast.Void => false + | Ast.Qual(_, ty) => hasKnownStorageSize tidtab ty + | Ast.Numeric _ => true + | Ast.Array(SOME _, ty) => hasKnownStorageSize tidtab ty + | Ast.Array(NONE, _) => withInitializer + | Ast.Pointer _ => true + | Ast.Function _ => true + | Ast.EnumRef tid => true + | Ast.AggrRef tid => + (case Tidtab.find (tidtab,tid) + of SOME(_,SOME(Ast.Aggr (_,_,fields)),_) => + List.foldl + (fn ((ty, _, _), b) => b andalso (hasKnownStorageSize tidtab ty)) + true fields + | _ => false) + | Ast.TypeRef tid => hasKnownStorageSize tidtab (reduceTypedef tidtab ty) + | Ast.Ellipses => false +*) + + +(* nch fix: + hasKnownStorageSize should reuse some code from + sizeof -- same kinds of checks and memoization +*) + + + fun hasKnownStorageSize (tidtab: Tables.tidtab) ty = + case ty + of Ast.Void => false + | Ast.Qual(_, ty) => hasKnownStorageSize tidtab ty + | Ast.Numeric _ => true + | Ast.Array(SOME _, ty) => hasKnownStorageSize tidtab ty + | Ast.Array(NONE, _) => false + | Ast.Pointer _ => true + | Ast.Function _ => true + | Ast.EnumRef tid => + (case Tidtab.find (tidtab,tid) + of SOME{ntype=SOME _, ...} => true + | _ => + if TypeCheckControl.partial_enums_have_unknown_size then false + else true) + | Ast.StructRef tid => + (case Tidtab.find (tidtab,tid) + of SOME{ntype=SOME(B.Struct (_,fields)),...} => + List.all + (fn (ty, _, _) => (hasKnownStorageSize tidtab ty)) + fields + | _ => false) + | Ast.UnionRef tid => + (case Tidtab.find (tidtab,tid) + of SOME{ntype=SOME(B.Union (_,fields)),...} => + List.all + (fn (ty, _) => (hasKnownStorageSize tidtab ty)) + fields + | _ => false) + | Ast.TypeRef tid => hasKnownStorageSize tidtab (reduceTypedef tidtab ty) + | Ast.Ellipses => false + | Ast.Error => false + +(* + fun fixArrayType tidtab {ty, n} = + case ty of + Ast.Void => {err=(n<=1), ty} + | Ast.Qual(_, ty) => fixArrayType tidtab {ty=ty, n=n} + | Ast.Numeric _ => {err=(n<=1), ty} + | Ast.Array(SOME n', ty) => {err=(n<=n'), ty} + | Ast.Array(NONE, ty) => {err=true, Ast.Array(SOME n, ty}) + | Ast.Pointer _ => {err=(n<=1), ty} + | Ast.Function _ => {err=(n<=1), ty} + | Ast.EnumRef tid => {err=(n<=1), ty} + | Ast.AggrRef tid => {err=(n<=1), ty} + | Ast.TypeRef tid => fixArrayType tidtab {ty=reduceTypedef tidtab ty, n=n} + | Ast.Ellipses => {err=false, ty} +*) + + fun isConst tidtab ty = #const(getQuals tidtab ty) + + fun isPointer tidtab ty = + case ty + of Ast.Qual (_,ty) => isPointer tidtab ty + | Ast.Array _ => true + | Ast.Pointer _ => true + | Ast.Function _ => true + | Ast.TypeRef _ => isPointer tidtab (reduceTypedef tidtab ty) + | _ => false + + fun isIntegral tidtab ty = + case ty + of Ast.Qual (_,ty) => isIntegral tidtab ty + | Ast.Array _ => false + | Ast.Pointer _ => false + | Ast.Function _ => false + | Ast.Numeric(sat, frac, sign, Ast.CHAR, _) => true + | Ast.Numeric(sat, frac, sign, Ast.SHORT, _) => true + | Ast.Numeric(sat, frac, sign, Ast.INT, _) => true + | Ast.Numeric(sat, frac, sign, Ast.LONG, _) => true + | Ast.Numeric(sat, frac, sign, Ast.LONGLONG, _) => true + | Ast.Numeric(sat, frac, sign, Ast.FLOAT, _) => false + | Ast.Numeric(sat, frac, sign, Ast.DOUBLE, _) => false + | Ast.Numeric(sat, frac, sign, Ast.LONGDOUBLE, _) => false + | Ast.EnumRef _ => true + | Ast.TypeRef _ => isIntegral tidtab (reduceTypedef tidtab ty) + | _ => false + + fun isArray tidtab ty = + case ty + of Ast.Qual (_,ty) => isArray tidtab ty + | Ast.Array _ => true + | Ast.TypeRef _ => isArray tidtab (reduceTypedef tidtab ty) + | _ => false + + fun isNumberOrPointer tidtab ty = + case ty + of Ast.Qual (_,ty) => isNumberOrPointer tidtab ty + | Ast.Array _ => true + | Ast.Pointer _ => true + | Ast.Function _ => true + | Ast.Numeric _ => true + | Ast.EnumRef _ => true + | Ast.TypeRef _ => isNumberOrPointer tidtab (reduceTypedef tidtab ty) + | _ => false + + fun isNumber tidtab ty = + case ty + of Ast.Qual (_,ty) => isNumber tidtab ty + | Ast.Array _ => false + | Ast.Pointer _ => false + | Ast.Function _ => false + | Ast.Numeric _ => true + | Ast.EnumRef _ => true + | Ast.TypeRef _ => isNumber tidtab (reduceTypedef tidtab ty) + | _ => false + + fun deref tidtab ty = + case ty + of Ast.Qual (_,ty) => deref tidtab ty + | Ast.Array (_,ty) => SOME ty + | Ast.Pointer ty => SOME ty + | Ast.Function _ => SOME ty + | Ast.TypeRef _ => deref tidtab (reduceTypedef tidtab ty) + | _ => NONE + + fun getFunction tidtab ty = + let fun getF ty {deref} = + case ty + of Ast.Qual (_,ty) => getF ty {deref=deref} + | Ast.Pointer ty => if deref then NONE else getF ty {deref=true} + (* allow one level of dereferencing of function pointers + see H & S p 147: "an expression of type `pointer to function' can be used in a + function call without an explicit dereferencing" *) + | Ast.Function (retTy,argTys) => SOME(retTy,argTys) + | Ast.TypeRef _ => getF (reduceTypedef tidtab ty) {deref=deref} + | _ => NONE + in + getF ty {deref=false} + end + + fun isFunction tidtab ty = (* returns true of ty is a function; excludes fn pointer case *) + case reduceTypedef tidtab ty of (* might have prototype fn def using typedef?? *) + Ast.Function _ => true + | _ => false + + fun isFunctionPrototype tidtab ty = + case getFunction tidtab ty of + NONE => false + | SOME(_, nil) => false + | SOME(_, _ :: _) => true + + fun isNonPointerFunction tidtab ty = + case ty + of Ast.Qual (_,ty) => isNonPointerFunction tidtab ty + | Ast.TypeRef _ => isNonPointerFunction tidtab (reduceTypedef tidtab ty) + | Ast.Function _ => true + | _ => false + + fun isStructOrUnion tidtab ty = + case reduceTypedef tidtab ty + of Ast.Qual (_,ty) => isStructOrUnion tidtab ty + | (Ast.StructRef tid | Ast.UnionRef tid) => SOME tid + | _ => NONE + + fun isEnum tidtab (ty,member as {uid,kind=Ast.ENUMmem _,...}: Ast.member) = + (case reduceTypedef tidtab ty + of Ast.Qual (_,ty) => isEnum tidtab (ty,member) + | Ast.EnumRef tid => + (case Tidtab.find (tidtab,tid) + of SOME {ntype=SOME (B.Enum (_,memberIntList)),...} => + let fun pred ({uid=uid',...}: Ast.member,_) = + Pid.equal (uid',uid) + in List.exists pred memberIntList end + | SOME {ntype=NONE,...} => + (warning + "Enum type used but not declared, assuming member is not an EnumId"; + false) + | SOME {ntype=SOME _,...} => + (internalError + ("poorly formed type table: expected enumerated type for " + ^ (Tid.toString tid)); + false) + | NONE => + (internalError + ("poorly formed type table: expected enumerated type for " + ^ (Tid.toString tid)); + false)) + | _ => false) + | isEnum tidtab (ty,member) = + (internalError "isEnum applied to struct or union member"; + false) + + fun lookupEnum tidtab (ty,member as {uid,...}: Ast.member) = + case reduceTypedef tidtab ty + of Ast.Qual (_,ty) => lookupEnum tidtab (ty,member) + | Ast.EnumRef tid => + (case Tidtab.find (tidtab,tid) + of SOME{ntype=SOME(B.Enum(_,memberIntList)),...} => + let fun pred ({uid=uid',...}: Ast.member,_) = + Pid.equal(uid', uid) + in case List.find pred memberIntList + of SOME (_,i) => SOME i + | NONE => NONE + end + | _ => NONE) + | _ => NONE + + (* Haberson/Steele "C Reference Manual", 4th Ed, section 5.11.1 p152 *) + fun equalType tidtab (ty1,ty2) = + let open Ast + fun eq (ty1,ty2) = + case (ty1,ty2) + of (Void, Void) => true + | (Qual(q1, ct1), Qual(q2, ct2)) => + (q1 = q2) andalso eq (ct1, ct2) + | (Numeric(sat1, frac1, sign1, intKnd1, signednessTag1), + Numeric(sat2, frac2, sign2, intKnd2, signednessTag2)) => + sat1 = sat2 andalso frac1 = frac2 andalso + sign1 = sign2 andalso intKnd1 = intKnd2 + (* note: don't require signednessTags to be the same *) + | (Array(SOME(i1, _), ct1), Array(SOME(i2,_), ct2)) => (i1=i2) andalso eq (ct1, ct2) + | (Array(NONE, ct1), Array(NONE, ct2)) => eq (ct1, ct2) + | (Array _, Array _) => false + | (Pointer ct1, Pointer ct2) => eq (ct1, ct2) + | (Function(ct1, ctl1), Function(ct2, ctl2)) => + eq (ct1, ct2) andalso eql (ctl1, ctl2) + | (EnumRef tid1, EnumRef tid2) => Tid.equal (tid1, tid2) + | (UnionRef tid1, UnionRef tid2) => Tid.equal (tid1, tid2) + | (StructRef tid1, StructRef tid2) => Tid.equal (tid1, tid2) + | (TypeRef _, _) => eq (reduceTypedef tidtab ty1, ty2) + | (_, TypeRef _) => eq (ty1, reduceTypedef tidtab ty2) + | _ => false + and eql ([],[]) = true + | eql ((ty1,_)::tyl1,(ty2,_)::tyl2) = + eq (ty1,ty2) andalso eql (tyl1,tyl2) + | eql _ = false + in eq (ty1,ty2) end + +(* implements "ISO C conversion" column of table 6-4 in Haberson/Steele, p175 + "C Reference Manual", 4th Ed *) + + fun usualUnaryCnv tidtab tp = + let val tp = getCoreType tidtab tp + in case tp + of Ast.Numeric (sat, frac, _, Ast.CHAR, _) => + Ast.Numeric (sat, frac, Ast.SIGNED, if don't_convert_SHORT_to_INT then Ast.SHORT else Ast.INT, Ast.SIGNASSUMED) + | Ast.Numeric (sat, frac, _, Ast.SHORT,_) => + Ast.Numeric (sat, frac, Ast.SIGNED, if don't_convert_SHORT_to_INT then Ast.SHORT else Ast.INT, Ast.SIGNASSUMED) + (* for dsp work, want to keep short as short *) + | ty as (Ast.Numeric (sat, frac, sign, Ast.FLOAT, d)) => + if don't_convert_DOUBLE_in_usual_unary_cnv then ty else Ast.Numeric (sat, frac, sign, Ast.DOUBLE, d) + | Ast.Array (_, arrayTp) => if (Config.DFLAG) then tp else Ast.Pointer arrayTp + | Ast.Function x => Ast.Pointer tp (* this code is now not used: it is overridden by the stronger condition that + all expressions of Function type are converted to Pointer(Function), + (except for & and sizeof) *) + | Ast.EnumRef _ => stdInt + (* Not explicit in table 6-4, but seems to be implicitly assumed -- e.g. see compatiblity *) + | _ => tp + end + + (* implements section 6.3.5 of H&S, p177. *) + fun functionArgConv tidtab tp = + case getCoreType tidtab tp + of + (Ast.Numeric (sat, frac, sign, Ast.FLOAT, d)) => + Ast.Numeric (sat, frac, sign, Ast.DOUBLE, d) + | _ => usualUnaryCnv tidtab tp + + fun combineSat (Ast.SATURATE, Ast.SATURATE) = Ast.SATURATE + | combineSat _ = Ast.NONSATURATE + + fun combineFrac (Ast.FRACTIONAL, _) = Ast.FRACTIONAL + | combineFrac (_, Ast.FRACTIONAL) = Ast.FRACTIONAL + | combineFrac _ = Ast.WHOLENUM + +(* follows "ISO C conversion" column of table 6-5 in Haberson/Steele, p176 + "C Reference Manual", 4th Ed *) + fun usualBinaryCnv tidtab (tp1,tp2) = + case ( usualUnaryCnv tidtab (getCoreType tidtab tp1) + , usualUnaryCnv tidtab (getCoreType tidtab tp2) + ) + of ( Ast.Numeric(sat1, frac1, sign1, int1, d1) + , Ast.Numeric(sat2, frac2, sign2, int2, d2) + ) => + (* removes CHAR, and (maybe) SHORT *) + let val (sign', int') = + case ((sign1, int1), (sign2, int2)) + of ((_, Ast.LONGDOUBLE), _) => (Ast.SIGNED, Ast.LONGDOUBLE) + | (_, (_, Ast.LONGDOUBLE)) => (Ast.SIGNED, Ast.LONGDOUBLE) + | ((_, Ast.DOUBLE), _) => (Ast.SIGNED, Ast.DOUBLE) + | (_, (_, Ast.DOUBLE)) => (Ast.SIGNED, Ast.DOUBLE) + | ((_, Ast.FLOAT), _) => (Ast.SIGNED, Ast.FLOAT) + | (_, (_, Ast.FLOAT)) => (Ast.SIGNED, Ast.FLOAT) + + (* we've removed: LONGDOUBLE, DOUBLE, FLOAT, CHAR and (maybe) SHORT *) + (* this leaves: INT, LONG, LONGLONG and (possibly) SHORT *) + | (x1, x2) => + let + val int' = + case (int1, int2) + of (Ast.LONGLONG, _) => Ast.LONGLONG + | (_, Ast.LONGLONG) => Ast.LONGLONG + | (Ast.LONG, _) => Ast.LONG + | (_, Ast.LONG) => Ast.LONG + | (Ast.INT, _) => Ast.INT + | (_, Ast.INT) => Ast.INT + | (Ast.SHORT, _) => Ast.SHORT + | (_, Ast.SHORT) => Ast.SHORT + | _ => int1 (* should be nothing left *) + val sign' = + case (sign1, sign2) + of (Ast.UNSIGNED, _) => Ast.UNSIGNED + | (_, Ast.UNSIGNED) => Ast.UNSIGNED + | _ => Ast.SIGNED + in (sign', int') end + in + SOME ( Ast.Numeric(combineSat(sat1, sat2) + , combineFrac(frac1, frac2), sign', int', Ast.SIGNASSUMED) + ) + end + | (tp1', tp2') => + (print "Warning: unexpected call of usualBinaryCnv on non-Numeric types\n"; + if equalType tidtab (tp1',tp2') + then SOME tp1' + else NONE) + + (* Many compilers consider function args to be compatible when they + can be converted to pointers of the same type *) + fun preArgConv tidtab ty = + (case reduceTypedef tidtab ty of + Ast.Array (_, arrayTp) => Ast.Pointer arrayTp + | Ast.Function x => Ast.Pointer ty + | Ast.Qual(q, ty) => Ast.Qual(q, preArgConv tidtab ty) + | _ => ty) + + (* Used to convert function args of type Function(...) to Pointer(Function(...)) *) + fun cnvFunctionToPointer2Function tidtab ty = + (case getCoreType tidtab ty of + (coreType as (Ast.Function _)) => Ast.Pointer(coreType) + | _ => ty) + + (* section 5.11, pp151-155, in Haberson/Steele "C Reference Manual", 4th Ed *) + fun composite tidtab (ty1,ty2) = + let + open Ast + fun enumCompose (tid, ty) = + (case ty of + EnumRef tid2 => + if enumeration_incompatibility then + if Tid.equal(tid, tid2) then SOME ty else NONE + else + SOME ty (* old style: all enums are compatible *) + + | Numeric(NONSATURATE, WHOLENUM, SIGNED, INT, d) => SOME(Numeric(NONSATURATE, WHOLENUM, SIGNED, INT, d)) + (* enumeration types are always compatible with the underlying implementation type, + assume in this frontend to the int *) + | _ => NONE) + + fun composeid (NONE, x2) = x2 + | composeid (x1, NONE) = x1 + | composeid (x1 as SOME (i1: Ast.id), SOME (i2: Ast.id)) = + if Symbol.equal (#name i1, #name i2) then x1 else NONE + + fun compose (ty1,ty2) = + let val ty1 = if pointer_compatibility_quals then ty1 else getCoreType tidtab ty1 + val ty2 = if pointer_compatibility_quals then ty2 else getCoreType tidtab ty2 + fun em1() = ("Prototype " ^ (ctToString tidtab ty1) ^ + " and non-prototype " ^ (ctToString tidtab ty2) ^ + " are not compatible because parameter is not compatible with the" ^ + " type after applying default argument promotion.") + fun em2() = ("Prototype " ^ (ctToString tidtab ty2) ^ + " and non-prototype " ^ (ctToString tidtab ty1) ^ + " are not compatible because parameter is not compatible with the" ^ + " type after applying default argument promotion.") + in + case (ty1,ty2) + of + (Void, Void) => (SOME(Void), nil) + | (TypeRef _, _) => compose (reduceTypedef tidtab ty1, ty2) + | (_, TypeRef _) => compose (ty1, reduceTypedef tidtab ty2) + | (EnumRef tid1, _) => (enumCompose(tid1, ty2), nil) + | (_, EnumRef tid2) => (enumCompose(tid2, ty1), nil) + | (Array(io1, ct1), Array(io2, ct2)) => + (case (compose(ct1, ct2), io1, io2) of + ((SOME ct, eml), NONE, NONE) => (SOME(Array(NONE, ct)), eml) + | ((SOME ct, eml), SOME opt1, NONE) => (SOME(Array(SOME opt1, ct)), eml) + | ((SOME ct, eml), NONE, SOME opt2) => (SOME(Array(SOME opt2, ct)), eml) + | ((SOME ct, eml), SOME(i1, expr1), SOME(i2, _)) => + (* potential source-to-source problem: what if i1=i2, but expr1 and expr2 are diff? *) + if (i1 = i2) then (SOME(Array(SOME(i1, expr1), ct)), + eml) + else (NONE, "Arrays have different lengths." :: eml) + | ((NONE,eml),_, _) => (NONE,eml)) + | (Function(ct1, nil), Function(ct2, nil)) => (* both non-prototypes *) + (case compose (ct1, ct2) of + (NONE, eml) => (NONE, eml) + | (SOME ct, eml) => (SOME(Function(ct, nil)), eml)) + | (Function(ct1, [(Void, _)]), Function(ct2, nil)) => (* first is Void-arg-prototype *) + (case compose (ct1, ct2) of + (NONE, eml) => (NONE, eml) + | (SOME ct, eml) => (SOME(Function(ct, [(Void,NONE)])), eml)) + | (Function(ct1, nil), Function(ct2, [(Void,_)])) => (* second is Void-arg-prototype *) + (case compose (ct1, ct2) of + (NONE, eml) => (NONE, eml) + | (SOME ct, eml) => (SOME(Function(ct, [(Void,NONE)])), eml)) + | (Function(ct1, ctl1), Function(ct2, nil)) => (* first is prototype *) + (case (compose(ct1, ct2), checkArgs ctl1) of + ((SOME ct,eml), fl) => (SOME(Function(ct, ctl1)), if fl then eml else (em1()) :: eml) + | ((NONE, eml), fl) => (NONE, if fl then eml else (em1()) :: eml)) + | (Function(ct1, nil), Function(ct2, ctl2)) => (* second is prototype *) + (case (compose(ct1, ct2), checkArgs ctl2) of + ((SOME ct, eml), fl) => (SOME(Function(ct, ctl2)), if fl then eml else (em2()) :: eml) + | ((NONE, eml), fl) => (NONE, if fl then eml else (em2()) :: eml)) + | (Function(ct1, ctl1), Function(ct2, ctl2)) => (* both are prototypes *) + (case (compose (ct1, ct2), composel (ctl1, ctl2)) of (* composel: deals with ellipses *) + ((SOME ct, eml1), (SOME ctl, eml2)) => (SOME(Function(ct, ctl)), eml1 @ eml2) + | ((_, eml1), (_, eml2)) => (NONE, eml1 @ eml2)) + | (ct1 as Qual _, ct2 as Qual _) => + let val {volatile, const, ty=ct} = getQuals tidtab ct1 + val {volatile=volatile', const=const', ty=ct'} = getQuals tidtab ct2 + in case compose (ct, ct') of + (NONE, eml) => (NONE, eml) + | (SOME ct, eml) => let val ct = if volatile then Qual(VOLATILE, ct) else ct + val ct = if const then Qual(CONST, ct) else ct + in + (SOME ct, eml) + end + end + | (Numeric x, Numeric y) => if x = y then (SOME ty1, nil) else (NONE, nil) + | (Pointer ct1, Pointer ct2) => (case compose (ct1, ct2) of + (SOME ct, eml) => (SOME(Pointer ct), eml) + | (NONE, eml) => (NONE, eml)) + | ((StructRef tid1, StructRef tid2) | (UnionRef tid1, UnionRef tid2)) => + if Tid.equal (tid1, tid2) then (SOME ty1, nil) else (NONE, nil) + | _ => (NONE, nil) + end + and checkArgs ((Ellipses, _) :: _) = true + | checkArgs ((ct, _) :: ctl) = (case compose(ct, functionArgConv tidtab ct) of + (NONE, _) => false + | (SOME _, _) => checkArgs ctl + (* H & S, p 154, midpage: + each parameter type T must be compatible with the type + resulting from applying the usual unary conversions to T. + Correction: usual unary cnv except that float always + converted to unary (c.f. ISO conversion) + *) + ) + | checkArgs nil = true + and composel ([],[]) = (SOME nil, nil) + | composel ([(Ast.Ellipses, _)], [(Ast.Ellipses, _)]) = (SOME([(Ast.Ellipses,NONE)]), nil) + | composel ([(Ast.Ellipses, _)], _) = (NONE, ["Use of ellipses does not match."]) + | composel (_, [(Ast.Ellipses, _)]) = (NONE, ["Use of ellipses does not match."]) + | composel ((ty1, id1)::tyl1,(ty2, id2)::tyl2) = + (case (compose (ty1,ty2), composel (tyl1,tyl2)) of + ((SOME ty, eml1), (SOME tyl, eml2)) => + (SOME((ty, composeid (id1, id2)) :: tyl), eml1@eml2) + | ((_, eml1), (_, eml2)) => (NONE, eml1@eml2)) + | composel _ = (NONE, ["Function types have different numbers of arguments."]) + in compose (ty1,ty2) end + + fun compatible tidtab (ty1,ty2) = + (case composite tidtab (ty1,ty2) of + (SOME _, _) => true + | (NONE, _) => false) + + fun isAssignable tidtab {lhs, rhs, rhsExpr0} = + (* From H&S p 174, table 6-3 (but also see Table 7-7, p221) + Note 1: This function just checks that the implicit assignment conversion is allowable. + - it does not check that lhs is assignable. + Note 2: The usualUnaryCnv conversion on rhs is not explicit in H & S, + but seems implied? + (otherwise can't typecheck: int i[4], *j = i) + Note 3: The definition below structure to correspond to table 6-3, but because of the + redundancy in this definition, we have reorganized order of some lines + Note 4: The EnumRef case is not explicit in Table 6-3, + but seems implied by compatibility (and is needed). + *) + (case (getCoreType tidtab lhs, usualUnaryCnv tidtab rhs, rhsExpr0) of + (* Note: usualUnary eliminates: Array, Function and Enum *) + +(*1*) (Ast.Numeric _, Ast.Numeric _, _) => true + +(*2a*) | (ty1 as Ast.StructRef _, ty2 as Ast.StructRef _, _) => compatible tidtab (ty1, ty2) +(*2b*) | (ty1 as Ast.UnionRef _, ty2 as Ast.UnionRef _, _) => compatible tidtab (ty1, ty2) + +(*3a*) | (Ast.Pointer Ast.Void, _, true) => true +(*3c*) | (Ast.Pointer Ast.Void, Ast.Pointer Ast.Void, _) => true +(*3b*) | (Ast.Pointer Ast.Void, Ast.Pointer _, _) => true + + +(*5a*) | (Ast.Pointer (Ast.Function _), _, true) => true +(*5b*) | (Ast.Pointer (ty1 as Ast.Function _), Ast.Pointer (ty2 as Ast.Function _), _) + => compatible tidtab (ty1,ty2) + +(*4a*) | (Ast.Pointer ty1, _, true) => true +(*4c*) | (Ast.Pointer _, Ast.Pointer Ast.Void, _) => true +(*4b*) | (Ast.Pointer ty1, Ast.Pointer ty2, _) => + let + val ty1' = getCoreType tidtab ty1 + val ty2' = getCoreType tidtab ty2 + val {volatile=vol1, const=const1, ...} = getQuals tidtab ty1 + val {volatile=vol2, const=const2, ...} = getQuals tidtab ty2 + val qual1 = vol1 orelse not vol2 + val qual2 = const1 orelse not const2 + in + qual1 andalso qual2 andalso compatible tidtab (ty1',ty2') + end + | (Ast.EnumRef _, _, _) => isIntegral tidtab rhs + + | (ty1, ty2, fl) => (* this case is important when type checking function calls if + convert_function_args_to_pointers is set to false *) + (equalType tidtab (ty1,ty2)) orelse + (equalType tidtab (ty1,getCoreType tidtab rhs))) + + fun isEquable tidtab {ty1, exp1Zero, ty2, exp2Zero} = (* for Eq and Neq *) + (case (usualUnaryCnv tidtab ty1, exp1Zero, usualUnaryCnv tidtab ty2, exp2Zero) of + (Ast.Numeric _, _, Ast.Numeric _, _) => usualBinaryCnv tidtab (ty1, ty2) (* get common type *) + | (Ast.Pointer Ast.Void, _, Ast.Pointer _, _) => SOME ty1 + | (Ast.Pointer _, _, Ast.Pointer Ast.Void, _) => SOME ty2 + | (Ast.Pointer _, _, _, true) => SOME ty1 + | (_, true, Ast.Pointer _, _) => SOME ty2 + | (ty1' as Ast.Pointer _, _, ty2' as Ast.Pointer _, _) => + let val (x, _) = composite tidtab (ty1', ty2') (* composite *AFTER* usualUnaryCnv! *) + in x + end + | _ => NONE) + + fun conditionalExp tidtab {ty1, exp1Zero, ty2, exp2Zero} = (* for Eq and Neq *) + (case (usualUnaryCnv tidtab ty1, exp1Zero, usualUnaryCnv tidtab ty2, exp2Zero) of + (Ast.Numeric _, _, Ast.Numeric _, _) => usualBinaryCnv tidtab (ty1, ty2) (* get common type *) + | ((Ast.StructRef tid1, _, Ast.StructRef tid2, _) | + (Ast.UnionRef tid1, _, Ast.UnionRef tid2, _)) => + if Tid.equal (tid1, tid2) then SOME ty1 + else NONE + | (Ast.Void, _, Ast.Void, _) => SOME ty1 + + | (Ast.Pointer _, _, Ast.Pointer Ast.Void, _) => SOME ty2 + | (Ast.Pointer Ast.Void, _, Ast.Pointer _, _) => SOME ty1 + + | (ty1' as Ast.Pointer _, _, ty2' as Ast.Pointer _, _) => + let val (x, _) = composite tidtab (ty1', ty2') (* composite *AFTER* usualUnaryCnv! *) + in + x + end + + | (Ast.Pointer _, _, _, true) => SOME ty1 + | (_, true, Ast.Pointer _, _) => SOME ty2 + + | (ty1, _, ty2, _) => NONE) + + fun isAddable tidtab {ty1, ty2} = (* for Plus *) + (case (usualUnaryCnv tidtab ty1, usualUnaryCnv tidtab ty2) of + (Ast.Numeric _, Ast.Numeric _) => + (case usualBinaryCnv tidtab (ty1, ty2) (* get common type *) + of SOME ty => SOME{ty1=ty, ty2=ty, resTy=ty} + | NONE => NONE) + | (Ast.Pointer _, Ast.Numeric _) => + if isIntegral tidtab ty2 + then SOME{ty1=ty1, ty2=stdInt, resTy=ty1} + else NONE + | (Ast.Numeric _, Ast.Pointer _) => + if isIntegral tidtab ty1 + then SOME{ty1=stdInt, ty2=ty2, resTy=ty2} + else NONE + | _ => NONE) + + fun isSubtractable tidtab {ty1, ty2} = (* for Plus *) + (case (usualUnaryCnv tidtab ty1, usualUnaryCnv tidtab ty2) of + (Ast.Numeric _, Ast.Numeric _) => + (case usualBinaryCnv tidtab (ty1, ty2) (* get common type *) + of SOME ty => SOME{ty1=ty, ty2=ty, resTy=ty} + | NONE => NONE) + | (ty1' as Ast.Pointer _, ty2' as Ast.Pointer _) => + (case composite tidtab (ty1', ty2') of (* composite *AFTER* usualUnaryCnv *) + (SOME ty, _) => SOME{ty1=ty, ty2=ty, resTy=stdInt} + | (NONE, _) => NONE) + | (Ast.Pointer _, Ast.Numeric _) => + if isIntegral tidtab ty2 then SOME{ty1=ty1, ty2=stdInt, resTy=ty1} + else NONE + | _ => NONE) + + fun isComparable tidtab {ty1, ty2} = (* for Eq and Neq *) + (case (usualUnaryCnv tidtab ty1, usualUnaryCnv tidtab ty2) of + (Ast.Numeric _, Ast.Numeric _) => usualBinaryCnv tidtab (ty1, ty2) (* get common type *) + | (ty1' as Ast.Pointer _, ty2' as Ast.Pointer _) => + let val (x, _) = composite tidtab (ty1', ty2') (* composite *AFTER* usualUnaryCnv *) + in x + end + | _ => NONE) + + fun checkFn tidtab (funTy, argTys, isZeroExprs) = + (case getFunction tidtab funTy of + NONE => (Ast.Void, ["Called object is not a function."], argTys) + | SOME(retTy, paramTysIdOpts) => + let + val paramTys = map #1 paramTysIdOpts + val paramTys = case paramTys + of [Ast.Void] => nil (* a function with a single void argument is a function of no args *) + | _ => paramTys + fun isAssignableL n x = + case x + of (Ast.Ellipses :: _, argl, _) => (nil, List.map (functionArgConv tidtab) argl) + (* Ellipses = variable arg length function *) + | (param :: paraml, arg :: argl, isZeroExpr :: isZeroExprs) => + let val (strL, paraml) = isAssignableL (n+1) (paraml, argl, isZeroExprs) + val strL' = if isAssignable tidtab {lhs=param, rhs=arg, rhsExpr0=isZeroExpr} + then strL + else + let val msg = "Bad function call: arg " ^ Int.toString n ^ + " has type " ^ (ctToString tidtab arg) + ^ " but fn parameter has type " ^ (ctToString tidtab param) + in + msg :: strL + end + in + (strL', param :: paraml) + end + | (nil, nil, _) => (nil, nil) + (* bugfix 12/Jan/00: the previous bugfix of 15/jun/99 overdid it a little (recursion!). + the case of a function with a single void arg is + now handled above in val paramTys = ... + | ([Ast.Void], nil) => (nil, nil) (* bugfix 15/jun/99: a function with a single void argument + * is a function of no args *) + *) + | ((_, nil, _) | (_, _, nil)) => ( ["Type Warning: function call has too few args"] + , nil + ) + | (nil, argl, _) => (["Type Warning: function call has too many args"] + , List.map (functionArgConv tidtab) argl + ) + val (msgL, argTys') = isAssignableL 1 (paramTys,argTys, isZeroExprs) + in + (retTy, msgL, argTys') + end) + + (* The notion of "scalar" types is not defined in e.g. K&R or H&S although + it is refered to in H&S p218. + It is used to restrict the type of controlling expressions (e.g. while, do, for, ?:, etc.). + According to the ISO standard (p24), scalars consist of + a) arithmetic types (integral and floating types) + b) pointer types + This seems to exclude array and function types. + + However most compilers consider an array type to be scalar (i.e. just consider it a pointer). + + We shall assume that everthing is a scalar except: functions, unions and structs. + Lint agrees with this; gcc and SGI cc disagree with this on functions. + *) + + fun isScalar tidtab ty = + case ty + of Ast.Qual (_,ty) => isScalar tidtab ty + | Ast.Numeric _ => true + | Ast.Pointer _ => true + | Ast.Array _ => true + | Ast.EnumRef _ => true + | Ast.TypeRef _ => isScalar tidtab (reduceTypedef tidtab ty) + | Ast.Function _ => false (* although a function can be viewed as a pointer *) + | Ast.StructRef _ => false + | Ast.UnionRef _ => false + | Ast.Ellipses => false (* can't occur *) + | Ast.Void => false + | Ast.Error => false + +end (* functor TypeUtilFn *) diff --git a/ckit/src/ast/uid-fn.sml b/ckit/src/ast/uid-fn.sml new file mode 100644 index 0000000..373d0bb --- /dev/null +++ b/ckit/src/ast/uid-fn.sml @@ -0,0 +1,31 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +(* a functor for creating new categories of unique ids *) + +functor UidFn (val initial: int + val prefix: string) :> UID = +struct + + type uid = int + + val initial = initial + + val counter = ref initial + + fun new () = let val n = !counter + in counter := n + 1; + n + end + + fun reset n = counter := n + + fun toString x = prefix ^ (Int.toString x) + + val toWord = Word.fromInt + + fun equal (uid:uid, uid') = (uid = uid') + + val compare = Int.compare + +end + diff --git a/ckit/src/ast/uid-sig.sml b/ckit/src/ast/uid-sig.sml new file mode 100644 index 0000000..afe3e19 --- /dev/null +++ b/ckit/src/ast/uid-sig.sml @@ -0,0 +1,15 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +(* UID = "Unique IDentifiers" *) + +signature UID = sig + type uid = int + + val initial : uid + val new : unit -> uid + val reset : int -> unit + val equal: uid * uid -> bool + val compare: uid * uid -> order + val toWord: uid -> Word.word + val toString: uid -> string +end diff --git a/ckit/src/ast/uidtabimp-fn.sml b/ckit/src/ast/uidtabimp-fn.sml new file mode 100644 index 0000000..56dc4ba --- /dev/null +++ b/ckit/src/ast/uidtabimp-fn.sml @@ -0,0 +1,32 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +(* imperative uid tables based on hash table library *) +(* polymorphic table operations *) + +functor UidtabImpFn (structure Uid:UID) :> UIDTABIMP where type uid = Uid.uid += +struct + + structure M = HashTableFn + (struct (* must match HASH_KEY *) + type hash_key = Uid.uid + val hashVal = Uid.toWord + val sameKey = Uid.equal + end) + + exception NotFound + + type uid = Uid.uid + type 'a uidtab = 'a M.hash_table + + fun insert (uidtab,uid,v) : unit = M.insert uidtab (uid,v) + + fun find (uidtab,uid) = M.find uidtab uid + + fun listItems uidtab = M.listItems uidtab + + fun listItemsi uidtab = M.listItemsi uidtab + + fun uidtab () = M.mkTable(50, NotFound) + +end (* functor UidtabImperFn *) diff --git a/ckit/src/ast/uidtabimp-sig.sml b/ckit/src/ast/uidtabimp-sig.sml new file mode 100644 index 0000000..ab340fc --- /dev/null +++ b/ckit/src/ast/uidtabimp-sig.sml @@ -0,0 +1,21 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +(* imperative uid tables *) + +signature UIDTABIMP = +sig + + type uid + type 'a uidtab + + val insert : 'info uidtab * uid * 'info -> unit + val find : 'info uidtab * uid -> 'info option + val listItems : 'info uidtab -> 'info list + val listItemsi : 'info uidtab -> (uid * 'info) list + + val uidtab : unit -> 'info uidtab + +end + + + diff --git a/ckit/src/c-util/sizes.c b/ckit/src/c-util/sizes.c new file mode 100644 index 0000000..a51361d --- /dev/null +++ b/ckit/src/c-util/sizes.c @@ -0,0 +1,45 @@ +/* automatically generates Sizes.sml */ + +#include + +typedef struct { char c; } min_struct; +typedef union { char c; } min_union; +typedef long long longlong; +typedef long double longdouble; +typedef char* pointer; + +#define OFFSET(ty) \ + { \ + struct { \ + char a; \ + ty b; \ + } x; \ + printf(" "); \ + printf(#ty " = {bits = %d, align = %d},\n", \ + sizeof(ty)*8, \ + ((unsigned long)&(x.b) - (unsigned long)&(x.a))*8); \ + } + +main () +{ + printf("(* This file was automatically generated using size.c.\n"); + printf(" * It contains information about c data sizes and layout.\n\n"); + printf(" * Limitations:\n"); + printf(" * 1. write proper test for bitFieldAlignment.\n"); + printf(" * 2. include date and system information in this file?\n"); + printf(" *)\n\n"); + printf("val sizes = { (*** all sizes in bits ***)\n"); + OFFSET(char) + OFFSET(short) + OFFSET(int) + OFFSET(long) + OFFSET(longlong) + OFFSET(float) + OFFSET(double) + OFFSET(longdouble) + OFFSET(pointer) + OFFSET(min_struct) + OFFSET(min_union) + printf(" onlyPackBitFields = false,\n"); + printf(" ignoreUnnamedBitFieldAlignment = true\n}\n"); +} diff --git a/ckit/src/ckit-lib.cm b/ckit/src/ckit-lib.cm new file mode 100644 index 0000000..04b5067 --- /dev/null +++ b/ckit/src/ckit-lib.cm @@ -0,0 +1,33 @@ +Library + signature PARSE_TO_AST + structure ParseToAst + signature PARSETREE + structure ParseTree + signature PARSER + structure Parser + signature AST + structure Ast + signature PP_AST + structure PPAst + signature UIDTABIMP + structure Tidtab + structure Aidtab + signature UID + structure Tid + structure Pid + structure Tables + structure Bindings + structure State + structure Symbol + signature SOURCE_MAP + structure SourceMap + signature PARSECONTROL + signature TYPECHECKCONTROL + signature CONFIG + structure Config + + structure Sizes + structure TypeUtil + structure Sizeof +is + ast/group.cm diff --git a/ckit/src/ckit.cm b/ckit/src/ckit.cm new file mode 100644 index 0000000..c8a2cd0 --- /dev/null +++ b/ckit/src/ckit.cm @@ -0,0 +1,2 @@ +Group is + ast/sources.cm diff --git a/ckit/src/parser/.cm/GUID/parse-tree-sig.sml b/ckit/src/parser/.cm/GUID/parse-tree-sig.sml new file mode 100644 index 0000000..6a21dc2 --- /dev/null +++ b/ckit/src/parser/.cm/GUID/parse-tree-sig.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):../parser/(group.cm):parse-tree-sig.sml-1714016104.855 diff --git a/ckit/src/parser/.cm/GUID/parse-tree.sml b/ckit/src/parser/.cm/GUID/parse-tree.sml new file mode 100644 index 0000000..05657fd --- /dev/null +++ b/ckit/src/parser/.cm/GUID/parse-tree.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):../parser/(group.cm):parse-tree.sml-1714016104.874 diff --git a/ckit/src/parser/.cm/GUID/parser-sig.sml b/ckit/src/parser/.cm/GUID/parser-sig.sml new file mode 100644 index 0000000..f1b5551 --- /dev/null +++ b/ckit/src/parser/.cm/GUID/parser-sig.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):../parser/(group.cm):parser-sig.sml-1714016104.920 diff --git a/ckit/src/parser/.cm/GUID/parser.sml b/ckit/src/parser/.cm/GUID/parser.sml new file mode 100644 index 0000000..4815b8b --- /dev/null +++ b/ckit/src/parser/.cm/GUID/parser.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):../parser/(group.cm):parser.sml-1714016107.605 diff --git a/ckit/src/parser/.cm/SKEL/parse-tree-sig.sml b/ckit/src/parser/.cm/SKEL/parse-tree-sig.sml new file mode 100644 index 0000000..99f8d2f --- /dev/null +++ b/ckit/src/parser/.cm/SKEL/parse-tree-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f3d"SourceMap"d"ParseTreeExt"d"LargeInt"ac"PARSETREE"h0 \ No newline at end of file diff --git a/ckit/src/parser/.cm/SKEL/parse-tree.sml b/ckit/src/parser/.cm/SKEL/parse-tree.sml new file mode 100644 index 0000000..391eb1c --- /dev/null +++ b/ckit/src/parser/.cm/SKEL/parse-tree.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f3d"SourceMap"d"ParseTreeExt"d"LargeInt"ad"ParseTree"jh0gp1c"PARSETREE" \ No newline at end of file diff --git a/ckit/src/parser/.cm/SKEL/parser-sig.sml b/ckit/src/parser/.cm/SKEL/parser-sig.sml new file mode 100644 index 0000000..028fd81 --- /dev/null +++ b/ckit/src/parser/.cm/SKEL/parser-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f2d"ParseTree"d"Error"ac"PARSER"h0 \ No newline at end of file diff --git a/ckit/src/parser/.cm/SKEL/parser.sml b/ckit/src/parser/.cm/SKEL/parser.sml new file mode 100644 index 0000000..8603fe0 --- /dev/null +++ b/ckit/src/parser/.cm/SKEL/parser.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f5CLrParser"d"SourceMap"d"TypeDefs"d"Error"d"TextIO"Nad"Parser"jh4aLrVals"jh1aToken"gp2gp1e"LrValsFun"aTokTable"jh1Tokens"gp1e"TokenTable"aCLex"jh2a4gp14gp1e"CLexFun"ad"P"jh3aParserData"gp2ad"Lex"gp1&agp1gp1e"JoinWithArg"gp1c"PARSER" \ No newline at end of file diff --git a/ckit/src/parser/.cm/amd64-unix/parse-tree-sig.sml b/ckit/src/parser/.cm/amd64-unix/parse-tree-sig.sml new file mode 100644 index 0000000..92b4678 Binary files /dev/null and b/ckit/src/parser/.cm/amd64-unix/parse-tree-sig.sml differ diff --git a/ckit/src/parser/.cm/amd64-unix/parse-tree.sml b/ckit/src/parser/.cm/amd64-unix/parse-tree.sml new file mode 100644 index 0000000..4d6e10f Binary files /dev/null and b/ckit/src/parser/.cm/amd64-unix/parse-tree.sml differ diff --git a/ckit/src/parser/.cm/amd64-unix/parser-sig.sml b/ckit/src/parser/.cm/amd64-unix/parser-sig.sml new file mode 100644 index 0000000..d96cb96 Binary files /dev/null and b/ckit/src/parser/.cm/amd64-unix/parser-sig.sml differ diff --git a/ckit/src/parser/.cm/amd64-unix/parser.sml b/ckit/src/parser/.cm/amd64-unix/parser.sml new file mode 100644 index 0000000..eb5711d Binary files /dev/null and b/ckit/src/parser/.cm/amd64-unix/parser.sml differ diff --git a/ckit/src/parser/extensions/c/.cm/GUID/parse-tree-ext.sml b/ckit/src/parser/extensions/c/.cm/GUID/parse-tree-ext.sml new file mode 100644 index 0000000..2f34494 --- /dev/null +++ b/ckit/src/parser/extensions/c/.cm/GUID/parse-tree-ext.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):../parser/(group.cm):extensions/c/parse-tree-ext.sml-1714016104.852 diff --git a/ckit/src/parser/extensions/c/.cm/SKEL/parse-tree-ext-sig.sml b/ckit/src/parser/extensions/c/.cm/SKEL/parse-tree-ext-sig.sml new file mode 100644 index 0000000..001e1a4 --- /dev/null +++ b/ckit/src/parser/extensions/c/.cm/SKEL/parse-tree-ext-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ac"PARSETREEEXT"h0 \ No newline at end of file diff --git a/ckit/src/parser/extensions/c/.cm/SKEL/parse-tree-ext.sml b/ckit/src/parser/extensions/c/.cm/SKEL/parse-tree-ext.sml new file mode 100644 index 0000000..041b9df --- /dev/null +++ b/ckit/src/parser/extensions/c/.cm/SKEL/parse-tree-ext.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ad"ParseTreeExt"h0 \ No newline at end of file diff --git a/ckit/src/parser/extensions/c/.cm/amd64-unix/parse-tree-ext.sml b/ckit/src/parser/extensions/c/.cm/amd64-unix/parse-tree-ext.sml new file mode 100644 index 0000000..9ca9360 Binary files /dev/null and b/ckit/src/parser/extensions/c/.cm/amd64-unix/parse-tree-ext.sml differ diff --git a/ckit/src/parser/extensions/c/parse-tree-ext-sig.sml b/ckit/src/parser/extensions/c/parse-tree-ext-sig.sml new file mode 100644 index 0000000..e1aca78 --- /dev/null +++ b/ckit/src/parser/extensions/c/parse-tree-ext-sig.sml @@ -0,0 +1,18 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +(* cmodel extension signature for empty extension *) + +(* for documentation only -- not currently used *) + +signature PARSETREEEXT = +sig + (* DBM: may need equality operations for some or all of these types *) + type operatorExt = unit + type ('specifier, 'declarator, 'ctype, 'decltype, 'operator, 'expression, 'statement) expressionExt + type ('specifier, 'declarator, 'ctype, 'decltype, 'operator, 'expression, 'statement) specifierExt + type ('specifier, 'declarator, 'ctype, 'decltype, 'operator, 'expression, 'statement) declaratorExt + type ('specifier, 'declarator, 'ctype, 'decltype, 'operator, 'expression, 'statement) statementExt + type ('specifier, 'declarator, 'ctype, 'decltype, 'operator, 'expression, 'statement) declarationExt + type ('specifier, 'declarator, 'ctype, 'decltype, 'operator, 'expression, 'statement) externalDeclExt +end + diff --git a/ckit/src/parser/extensions/c/parse-tree-ext.sml b/ckit/src/parser/extensions/c/parse-tree-ext.sml new file mode 100644 index 0000000..d74a366 --- /dev/null +++ b/ckit/src/parser/extensions/c/parse-tree-ext.sml @@ -0,0 +1,13 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +(* parse tree extension signature for empty extension *) + +structure ParseTreeExt = struct + type operatorExt = unit + type ('specifier, 'declarator, 'ctype, 'decltype, 'operator, 'expression, 'statement) expressionExt = unit + type ('specifier, 'declarator, 'ctype, 'decltype, 'operator, 'expression, 'statement) specifierExt = unit + type ('specifier, 'declarator, 'ctype, 'decltype, 'operator, 'expression, 'statement) declaratorExt = unit + type ('specifier, 'declarator, 'ctype, 'decltype, 'operator, 'expression, 'statement) statementExt = unit + type ('specifier, 'declarator, 'ctype, 'decltype, 'operator, 'expression, 'statement) declarationExt = unit + type ('specifier, 'declarator, 'ctype, 'decltype, 'operator, 'expression, 'statement) externalDeclExt = unit +end diff --git a/ckit/src/parser/grammar/.cm/GUID/c.grm.sig b/ckit/src/parser/grammar/.cm/GUID/c.grm.sig new file mode 100644 index 0000000..e2772e0 --- /dev/null +++ b/ckit/src/parser/grammar/.cm/GUID/c.grm.sig @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):../parser/(group.cm):grammar/c.grm.sig-1714016104.943 diff --git a/ckit/src/parser/grammar/.cm/GUID/c.grm.sml b/ckit/src/parser/grammar/.cm/GUID/c.grm.sml new file mode 100644 index 0000000..7a482f0 --- /dev/null +++ b/ckit/src/parser/grammar/.cm/GUID/c.grm.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):../parser/(group.cm):grammar/c.grm.sml-1714016104.952 diff --git a/ckit/src/parser/grammar/.cm/GUID/c.lex.sml b/ckit/src/parser/grammar/.cm/GUID/c.lex.sml new file mode 100644 index 0000000..629efdc --- /dev/null +++ b/ckit/src/parser/grammar/.cm/GUID/c.lex.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):../parser/(group.cm):grammar/c.lex.sml-1714016106.932 diff --git a/ckit/src/parser/grammar/.cm/GUID/tdefs.sml b/ckit/src/parser/grammar/.cm/GUID/tdefs.sml new file mode 100644 index 0000000..9537933 --- /dev/null +++ b/ckit/src/parser/grammar/.cm/GUID/tdefs.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):../parser/(group.cm):grammar/tdefs.sml-1714016104.923 diff --git a/ckit/src/parser/grammar/.cm/GUID/tokentable.sml b/ckit/src/parser/grammar/.cm/GUID/tokentable.sml new file mode 100644 index 0000000..761bc35 --- /dev/null +++ b/ckit/src/parser/grammar/.cm/GUID/tokentable.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):../parser/(group.cm):grammar/tokentable.sml-1714016106.886 diff --git a/ckit/src/parser/grammar/.cm/SKEL/c.grm.sig b/ckit/src/parser/grammar/.cm/SKEL/c.grm.sig new file mode 100644 index 0000000..f57961b --- /dev/null +++ b/ckit/src/parser/grammar/.cm/SKEL/c.grm.sig @@ -0,0 +1,2 @@ +Skeleton 5 +d3f1d"LargeInt"aC_TOKENS"h0ac"C_LRVALS"h2ad"Tokens"gp1ad"ParserData"gp1c"PARSER_DATA" \ No newline at end of file diff --git a/ckit/src/parser/grammar/.cm/SKEL/c.grm.sml b/ckit/src/parser/grammar/.cm/SKEL/c.grm.sml new file mode 100644 index 0000000..1b25b39 --- /dev/null +++ b/ckit/src/parser/grammar/.cm/SKEL/c.grm.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ae"LrValsFun"i2aToken"gp1c"TOKEN"fh2ad"ParserData"h7aHeader"h2egp1ParseTree"f3List"SourceMap"TypeDefs"aLrTable"gp26Cagp$b6f6d"Char"C6d"General"d"String"d"Array"Nb:d2f2d"LargeInt"aMlyValue"0ad"EC"h2bf2d"Error"ad"Actions"h2bf4 6*f1Nad"Tokens"j)gp1c"C_TOKENS" \ No newline at end of file diff --git a/ckit/src/parser/grammar/.cm/SKEL/c.lex.sml b/ckit/src/parser/grammar/.cm/SKEL/c.lex.sml new file mode 100644 index 0000000..8e67771 --- /dev/null +++ b/ckit/src/parser/grammar/.cm/SKEL/c.lex.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ae"CLexFun"i3aTokens"gp1c"C_TOKENS"aTokTable"gp1c"TOKENTABLE"f7d"StringCvt"d"Char"Cd"CharVector"Int"SourceMap"d"String"d"LargeInt"Cd"Real"TextIO"Vector"IntInf"Nh3ayyInput"jh3aTIO"gp1ad"TSIO"gp2d"StreamIO"ad"TPIO"gp1d"TextPrimIO"0aUserDeclarations">bbd2egp1f5Cd"IO"3&d"List"C$*&Nf0f13 \ No newline at end of file diff --git a/ckit/src/parser/grammar/.cm/SKEL/tdefs.sml b/ckit/src/parser/grammar/.cm/SKEL/tdefs.sml new file mode 100644 index 0000000..c22b7dc --- /dev/null +++ b/ckit/src/parser/grammar/.cm/SKEL/tdefs.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d3f2d"AtomTable"d"Atom"aTYPEDEFS"h0ad"TypeDefs"jh1aParseControl"gp2d"Config"5gp1 \ No newline at end of file diff --git a/ckit/src/parser/grammar/.cm/SKEL/tokentable.sml b/ckit/src/parser/grammar/.cm/SKEL/tokentable.sml new file mode 100644 index 0000000..a72bf8a --- /dev/null +++ b/ckit/src/parser/grammar/.cm/SKEL/tokentable.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2aTOKENTABLE"h1Tokens"C_TOKENS"ae"TokenTable"i2f3d"TypeDefs"d"AtomTable"d"Atom"jh2agp1aParseControl"gp2d"Config"(gp1 \ No newline at end of file diff --git a/ckit/src/parser/grammar/.cm/amd64-unix/c.grm.sig b/ckit/src/parser/grammar/.cm/amd64-unix/c.grm.sig new file mode 100644 index 0000000..885f934 Binary files /dev/null and b/ckit/src/parser/grammar/.cm/amd64-unix/c.grm.sig differ diff --git a/ckit/src/parser/grammar/.cm/amd64-unix/c.grm.sml b/ckit/src/parser/grammar/.cm/amd64-unix/c.grm.sml new file mode 100644 index 0000000..f3f036a Binary files /dev/null and b/ckit/src/parser/grammar/.cm/amd64-unix/c.grm.sml differ diff --git a/ckit/src/parser/grammar/.cm/amd64-unix/c.lex.sml b/ckit/src/parser/grammar/.cm/amd64-unix/c.lex.sml new file mode 100644 index 0000000..0d5c7d0 Binary files /dev/null and b/ckit/src/parser/grammar/.cm/amd64-unix/c.lex.sml differ diff --git a/ckit/src/parser/grammar/.cm/amd64-unix/tdefs.sml b/ckit/src/parser/grammar/.cm/amd64-unix/tdefs.sml new file mode 100644 index 0000000..c9315b7 Binary files /dev/null and b/ckit/src/parser/grammar/.cm/amd64-unix/tdefs.sml differ diff --git a/ckit/src/parser/grammar/.cm/amd64-unix/tokentable.sml b/ckit/src/parser/grammar/.cm/amd64-unix/tokentable.sml new file mode 100644 index 0000000..db82efc Binary files /dev/null and b/ckit/src/parser/grammar/.cm/amd64-unix/tokentable.sml differ diff --git a/ckit/src/parser/grammar/Makefile b/ckit/src/parser/grammar/Makefile new file mode 100644 index 0000000..17779d2 --- /dev/null +++ b/ckit/src/parser/grammar/Makefile @@ -0,0 +1,8 @@ +CPP = /lib/cpp +#CPP = /usr/ccs/lib/cpp + +all: d.grm c.grm + +d.grm: cd.grm; $(CPP) -DDPARSE -P cd.grm d.grm + +c.grm: cd.grm; $(CPP) -P cd.grm c.grm diff --git a/ckit/src/parser/grammar/README b/ckit/src/parser/grammar/README new file mode 100644 index 0000000..c61089d --- /dev/null +++ b/ckit/src/parser/grammar/README @@ -0,0 +1,14 @@ +The principal file here is cd.grm. +c.grm and d.grm are derived from it via cpp and should not be edited. + +For C: + 1. comment out #define DPARSE + 2. /lib/cpp -P cd.grm > c.grm + +For D: + 1. uncomment #define DPARSE + 2. /lib/cpp -P cd.grm > d.grm + +Note: "-P" so that don't get control info (e.g. # 218 "cd.grm"). + + diff --git a/ckit/src/parser/grammar/c.grm b/ckit/src/parser/grammar/c.grm new file mode 100644 index 0000000..21beda1 --- /dev/null +++ b/ckit/src/parser/grammar/c.grm @@ -0,0 +1,909 @@ +(* DO NOT CHANGE THIS FILE -- this file was generated from cd.grm *) + +(* Copyright (c) 1998 by Lucent Technologies *) + +(* new comments from Satish Chandra, 6/21/99 *) +(* Overriding design approach: + * + * Accept all legal programs, but possibly some illegal ones at this stage. + * Do not attempt to make a really tight grammar. Our tools are supposed to + * work on "correct" C programs (i.e. those that cc -ansi would compile without + * warnings). Of course, a type checker on the parse tree can report some errors + * as syntax errors. + * + * Note on MARK: + * + * externalDecl, statement, and expression are the non-terms that are marked. + * Compound statements are not separately marked. + * declarations eventually become either a statement or a externalDecl + * if they are outside any function. They are marked accordingly. + * + * Note on function definitions: + * + * The order of the paramaters will always come from the FuncDecr. + * The types of the parameter may come from the second declaration list + * (in K&R style) + * + * Note on the structure of the grammar: + * + * It is difficult to write a LALR(1) grammar based on the grammar given at + * the back of the K&R book. The basic difficulty is that both TYPE_NAME and + * ID are tokens that are strings, but it depends on the context whether + * a given string is to be treated as an ID or a TYPE_NAME. + * We have borrowed the solution used in GCC's parser specification. In this + * scheme, the lexer always return the token TYPE_NAME if a name has been + * defined as a type name (via a typedef) in an applicable scope. The grammar + * productions are heavily rearranged (from K&R's grammar) to do the right + * thing. In this rearrangement, the basic idea is that a TYPE_NAME is + * allowed to appear in a declaration as a plain identifier only after a type + * specifier has previously appeared in the declaration. Also, a TYPE_NAME may + * appear only once in a declaration as a type specifier. + *) + +(* old comments below *) +(* Shortcomings *) +(* 1. No floating-point whatsoever *) + +(* Notes on MARK: + * externalDecl and statement are the non-terms that are marked. + * Compound statements are not separately marked. + * expressions are not marked at all. + * declarations eventually become either a statement or a externalDecl + * if they are outside any function. they are marked accordingly. + *) + +(* Overriding theme: accept all legal programs, but also some illegal ones at this + * stage. Do not attempt to make a really tight grammar. Our tools are supposed to + * work on "correct" C programs (i.e. those that cc -ansi would compile without + * warnings). Of course, a type checker on the parse tree can report some errors + * as syntax errors. + *) + +(* About function definitions: + * The order of the paramaters will always come from the FuncDecr thing + * The types of the parameter may come from the second declaration list (in K&R style) + *) + +open ParseTree (* PortingHelp *) + +fun markExternalDecl srcMap (d,left,right) = + MARKexternalDecl(SourceMap.location srcMap (left,right), d) + +fun markDeclaration srcMap (d,left,right) = + MARKdeclaration(SourceMap.location srcMap (left,right), d) + +fun markDeclarator srcMap (d,left,right) = + MARKdeclarator(SourceMap.location srcMap (left,right), d) + +fun markStatement srcMap (s,left,right) = + MARKstatement(SourceMap.location srcMap (left, right), s) + +fun markExpression srcMap (s,left,right) = + MARKexpression(SourceMap.location srcMap (left, right), s) + +val unknown = {storage=[],qualifiers=[],specifiers=[]}:decltype + +(* this code duplicated in BuildAst in function processDeclarator *) +fun ctypeDecrToTypeName (typ as {qualifiers, specifiers},decr) = + let fun mkTyp spc = {qualifiers=[], specifiers=[spc]} + fun addQual q = {qualifiers=q::qualifiers, specifiers=specifiers} + in case decr + of VarDecr x => (typ,SOME x) + | PointerDecr x => + ctypeDecrToTypeName (mkTyp (Pointer typ),x) + | ArrayDecr (x,sz) => + ctypeDecrToTypeName (mkTyp (Array (sz,typ)),x) + | FuncDecr (x,lst) => + ctypeDecrToTypeName (mkTyp (Function{retType=typ,params=lst}),x) + | QualDecr (q,decr) => + ctypeDecrToTypeName (addQual q, decr) + | EmptyDecr => (typ, NONE) + | EllipsesDecr => (mkTyp Ellipses, SOME("**ellipses**")) + | DecrExt _ => (typ, NONE) (* should call decr extension? *) + | MARKdeclarator(loc, decr) => ctypeDecrToTypeName(typ, decr) + end + +fun dclr2str dcl = + (case ctypeDecrToTypeName ({qualifiers=[],specifiers=[]}, dcl) + of (_,SOME s) => s + | (_,NONE) => "") + +fun combineDecltypes ( {qualifiers=q1,storage=st1,specifiers=sp1} + , {qualifiers=q2,storage=st2,specifiers=sp2} + ) = + {qualifiers=q1@q2,storage=st1@st2,specifiers=sp1@sp2} (* @ ok *) + +fun applyPointer (PointerDecr x,rest) = PointerDecr (applyPointer (x,rest)) + | applyPointer (QualDecr (q,x),rest) = QualDecr (q, applyPointer (x,rest)) + | applyPointer (EmptyDecr, rest) = rest + | applyPointer (_, rest) = rest + (* NCH/DBM[6/14/99]: this case can never occur *) + +fun addStorage(st, {qualifiers,storage,specifiers}) = + {qualifiers=qualifiers,storage=st::storage,specifiers=specifiers} + +fun addQualifiers(qs, {qualifiers,storage,specifiers}) = + {qualifiers=qs@qualifiers,storage=storage,specifiers=specifiers} (* @ ok *) + +fun addQualifier(q, {qualifiers,storage,specifiers}) = + {qualifiers=q::qualifiers,storage=storage,specifiers=specifiers} + +fun addSpecifier(sp, {qualifiers,storage,specifiers}) = + {qualifiers=qualifiers,storage=storage,specifiers=sp::specifiers} + +val addAll = combineDecltypes + +fun loopQd (q::rst, acc) = loopQd(rst, QualDecr(q, acc)) + | loopQd (nil, acc) = acc + +fun mkCtype typ = typ + +(* DBM: major kludge, using TYPEDEF as storage class *) +fun insertDeclNames ({storage,...}: decltype, idl) = + case storage + of [TYPEDEF] => List.app (fn x as (dcl,_) => TypeDefs.addTdef (dclr2str dcl)) idl + | _ => List.app (fn x as (dcl,_) => TypeDefs.addNoTdef (dclr2str dcl)) idl + +fun insertFuncName dcl = + let + val name = dclr2str dcl + in + TypeDefs.addNoTdef name + end + +fun insertFuncParams (FuncDecr (_,params)) : unit = + let + fun getName (ct, dclr) = dclr2str dclr + val names = map getName params + in + List.app TypeDefs.addNoTdef names + end + | insertFuncParams (ArrayDecr(dcl,_)) = insertFuncParams dcl + | insertFuncParams (PointerDecr dcl) = insertFuncParams dcl + | insertFuncParams _ = () (* this is actually an error, but it will be caught in + * BuildAst when processing a PT.FunctionDef *) + +abstype 'a seq = SEQ of 'a list +with val emptySeq = SEQ nil + fun singletonSeq x = SEQ[x] + fun addToSeq(x, SEQ yl) = SEQ(x :: yl) (* add to end of sequence! *) + (* fun addListToEnd(xl, yl) = SEQ((List.rev xl) @ yl) *) + fun addOptToEnd(NONE, yl) = yl + | addOptToEnd(SOME x, SEQ yl) = SEQ(x :: yl) + fun seqToList(SEQ yl) = List.rev yl +end + +%% + +%header (functor LrValsFun(structure Token : TOKEN + )) + +%term + EOF + | COLON | SEMICOLON | LPAREN | RPAREN | LCURLY | RCURLY + | LBRACE | RBRACE | DOT + | COMMA | QUESTION | PERCENT | AMP | BAR | TILDE | DIVIDE | PLUS + | MINUS | HAT | BANG | TIMES + | INC | DEC | ARROW + | ID of string + | EQUALS | PLUSEQUALS | MINUSEQUALS | XOREQUALS | MODEQUALS + | TIMESEQUALS | DIVEQUALS | OREQUALS | ANDEQUALS | LSHIFTEQUALS + | RSHIFTEQUALS + | LTE | GTE | LT | GT | EQ | NEQ | OR | AND | LSHIFT | RSHIFT + | DECNUM of LargeInt.int + | REALNUM of real + | STRING of string + | CCONST of LargeInt.int + | EXTERN | AUTO | STATIC | REGISTER | CONST | VOLATILE + | IF | THEN | ELSE + | FOR | DO | SWITCH | CASE | DEFAULT + | WHILE | RETURN + | BREAK | CONTINUE | GOTO + | CHAR | DOUBLE | ENUM | FLOAT | INT | LONG | SHORT + | FRACTIONAL | SATURATE (* D *) + | STRUCT | UNION | UNSIGNED | SIGNED + | VOID | SIZEOF | TYPEDEF | UNARY + | ELIPSIS + | TYPE_NAME of string + +%nonterm + translationUnit of externalDecl list + | tu of externalDecl seq + | statement of statement + | ostatementlist of statement list + + | statementlist of statement seq + | compoundStatement of statement + | expr of expression + | opExpr of expression + | exprWComma of expression + | unaryOperator of operator + | argumentExprList of expression seq + | trailingComma of bool + | enumeratorList of (string * expression) seq + | enumerator of (string * expression) + | abstractDeclarator of declarator + | directAbstractDeclarator of declarator + | initDeclarator of (declarator * expression) + | notypeInitDeclarator of (declarator * expression) + | initDeclaratorList of (declarator * expression) seq + | notypeInitDeclaratorList of (declarator * expression) seq + | pointer of declarator + | declarator of declarator + | aftertypeDeclarator of declarator + | notypeDeclarator of declarator + | parmDeclarator of declarator + | aftertypeDirectDeclarator of declarator + | notypeDirectDeclarator of declarator + | parmDirectDeclarator of declarator + | declarationSpecifiers of decltype + | declarationModifiers of decltype + | reservedDeclarationSpecifier of decltype + | specifierQualifierReserved of ctype + | reservedSpecifierQualifiers of ctype + | initializer of expression + | initializerList of expression seq + | storageClassSpecifier of storage + | typeName of ctype + | typeSpecifier of specifier + | typeSpecifierReserved of specifier + | typeQualifier of qualifier + | typeQualifierList of qualifier list + | specifierQualifierList of ctype + | enumSpecifier of specifier + | structOrUnionSpecifier of specifier + | fDefDeclaration of (decltype * declarator) + | declarationList of declaration seq + | identlist of (string * int * int) seq + | functionDefinition of externalDecl + | declaration of declaration + | declaration1 of declaration + | externalDeclaration of externalDecl option + | parameterList of (decltype * declarator) seq + | parameterTypeList of (decltype * declarator) list + | parameterDeclaration of (decltype * declarator) + | structOrUnion of bool + | structDeclarator of (declarator * expression) + | notypeStructDeclarator of (declarator * expression) + | structDeclaratorList of (declarator * expression) seq + | notypeStructDeclaratorList of (declarator * expression) seq + | structDeclarationList of (ctype * (declarator * expression) list) seq + | structDeclaration of (ctype * (declarator * expression) list) + | pushScope of unit + | popScope of unit + | strings of string + + + + + +%pos int +%verbose +%pure +%start translationUnit +%eop EOF +%noshift EOF +%keyword QUESTION IF THEN ELSE FOR DO SWITCH CASE DEFAULT WHILE RETURN BREAK CONTINUE GOTO +%subst TYPE_NAME for ID +%value TYPE_NAME(Error.hint "Likely cause: missing typedef declaration.\n"; "bogus") + +%arg (srcMap) : SourceMap.sourcemap + +%name C + +%left COMMA +%right EQUALS PLUSEQUALS MINUSEQUALS TIMESEQUALS DIVEQUALS MODEQUALS XOREQUALS OREQUALS ANDEQUALS LSHIFTEQUALS RSHIFTEQUALS +%right QUESTION +%left OR +%left AND +%left BAR +%left HAT +%left AMP +%left EQ NEQ +%left LT GT LTE GTE +%left LSHIFT RSHIFT +%left PLUS MINUS +%left TIMES DIVIDE PERCENT +%right UNARY +%right INC DEC SIZEOF +%left LBRACE LPAREN ARROW DOT + +%% + +translationUnit: + tu (seqToList tu) + +tu: + (emptySeq) + | tu externalDeclaration (addOptToEnd(externalDeclaration, tu)) + +externalDeclaration: + declaration (SOME(markExternalDecl srcMap (ExternalDecl declaration, + declarationleft, + declarationright))) + | SEMICOLON (NONE) + | functionDefinition (SOME(markExternalDecl srcMap (functionDefinition, + functionDefinitionleft, + functionDefinitionright))) + +statement: + FOR LPAREN opExpr SEMICOLON opExpr SEMICOLON opExpr RPAREN statement + (markStatement srcMap (For(opExpr1,opExpr2,opExpr3,statement), + FORleft, statementright)) + | WHILE LPAREN exprWComma RPAREN statement + (markStatement srcMap (While(exprWComma,statement), + WHILEleft, statementright)) + | SWITCH LPAREN exprWComma RPAREN statement + (markStatement srcMap (Switch(exprWComma,statement), + SWITCHleft, statementright)) + | DO statement WHILE LPAREN exprWComma RPAREN SEMICOLON + (markStatement srcMap (Do(exprWComma,statement), + DOleft, SEMICOLONright)) + | BREAK SEMICOLON (markStatement srcMap (Break, + BREAKleft, SEMICOLONright)) + | CONTINUE SEMICOLON (markStatement srcMap (Continue, + CONTINUEleft, SEMICOLONright)) + | RETURN opExpr SEMICOLON (markStatement srcMap (Return(opExpr), + RETURNleft, SEMICOLONright)) + | GOTO ID SEMICOLON (markStatement srcMap (Goto(ID), + GOTOleft, SEMICOLONright)) + | compoundStatement (compoundStatement) + | ID COLON statement (markStatement srcMap (Labeled(ID,statement), + IDleft,statementright)) + | DEFAULT COLON statement (markStatement srcMap (DefaultLabel(statement), + DEFAULTleft, statementright)) + | CASE exprWComma COLON statement + (markStatement srcMap (CaseLabel(exprWComma,statement), + CASEleft, statementright)) + | IF LPAREN exprWComma RPAREN statement + (markStatement srcMap (IfThen(exprWComma,statement), + IFleft, statementright)) + | IF LPAREN exprWComma RPAREN statement ELSE statement + (markStatement srcMap (IfThenElse(exprWComma,statement1,statement2), + IFleft, + statement2right)) + | exprWComma SEMICOLON (markStatement srcMap (Expr(exprWComma), + exprWCommaleft, SEMICOLONright)) + | SEMICOLON (markStatement srcMap (Expr(EmptyExpr), + SEMICOLONleft, SEMICOLONright)) + + + + + + +declaration: + declaration1 SEMICOLON (declaration1) + + +declaration1: + declarationSpecifiers + (insertDeclNames (declarationSpecifiers, []); + markDeclaration srcMap + (Declaration(declarationSpecifiers, []), + declarationSpecifiersleft, declarationSpecifiersright)) + + | declarationSpecifiers initDeclaratorList + (let val decl = (declarationSpecifiers, seqToList initDeclaratorList) + in insertDeclNames decl; + markDeclaration srcMap + (Declaration decl, declarationSpecifiersleft, initDeclaratorListright) + end) + + | declarationModifiers notypeInitDeclaratorList + (let val decl = (declarationModifiers, seqToList notypeInitDeclaratorList) + in insertDeclNames decl; + markDeclaration srcMap + (Declaration decl, declarationModifiersleft, notypeInitDeclaratorListright) + end) + +ostatementlist: + statementlist (seqToList statementlist) + | ([]) + +statementlist: + statement (singletonSeq statement) + | statementlist statement (addToSeq(statement, statementlist)) + + +(* original code: changed for "let" statements in D *) +compoundStatement: + LCURLY pushScope declarationList ostatementlist popScope RCURLY + (markStatement srcMap (Compound ((map Decl (seqToList declarationList)) @ ostatementlist), LCURLYleft, RCURLYright)) + | LCURLY ostatementlist RCURLY + (markStatement srcMap (Compound (ostatementlist), LCURLYleft, RCURLYright)) +(* *) + + + + + +unaryOperator: + AMP (AddrOf) + | TIMES (Star) + | PLUS (Uplus) + | MINUS (Negate) + | TILDE (BitNot) + | BANG (Not) + +expr: + expr QUESTION exprWComma COLON expr %prec QUESTION (markExpression srcMap (QuestionColon(expr1,exprWComma1,expr2),expr1left,expr2right)) + | expr PLUSEQUALS expr (markExpression srcMap (Binop(PlusAssign,expr1,expr2),expr1left,expr2right)) + | expr MINUSEQUALS expr (markExpression srcMap (Binop(MinusAssign,expr1,expr2),expr1left,expr2right)) + | expr TIMESEQUALS expr (markExpression srcMap (Binop(TimesAssign,expr1,expr2),expr1left,expr2right)) + | expr DIVEQUALS expr (markExpression srcMap (Binop(DivAssign,expr1,expr2),expr1left,expr2right)) + | expr MODEQUALS expr (markExpression srcMap (Binop(ModAssign,expr1,expr2),expr1left,expr2right)) + | expr XOREQUALS expr (markExpression srcMap (Binop(XorAssign,expr1,expr2),expr1left,expr2right)) + | expr OREQUALS expr (markExpression srcMap (Binop(OrAssign,expr1,expr2),expr1left,expr2right)) + | expr ANDEQUALS expr (markExpression srcMap (Binop(AndAssign,expr1,expr2),expr1left,expr2right)) + | expr LSHIFTEQUALS expr (markExpression srcMap (Binop(LshiftAssign,expr1,expr2),expr1left,expr2right)) + | expr RSHIFTEQUALS expr (markExpression srcMap (Binop(RshiftAssign,expr1,expr2),expr1left,expr2right)) + | expr EQUALS expr (markExpression srcMap (Binop(Assign,expr1,expr2),expr1left,expr2right)) + | expr OR expr (markExpression srcMap (Binop(Or,expr1,expr2),expr1left,expr2right)) + | expr AND expr (markExpression srcMap (Binop(And,expr1,expr2),expr1left,expr2right)) + | expr BAR expr (markExpression srcMap (Binop(BitOr,expr1,expr2),expr1left,expr2right)) + | expr HAT expr (markExpression srcMap (Binop(BitXor,expr1,expr2),expr1left,expr2right)) + | expr AMP expr (markExpression srcMap (Binop(BitAnd,expr1,expr2),expr1left,expr2right)) + | expr EQ expr (markExpression srcMap (Binop(Eq,expr1,expr2),expr1left,expr2right)) + | expr NEQ expr (markExpression srcMap (Binop(Neq,expr1,expr2),expr1left,expr2right)) + | expr LT expr (markExpression srcMap (Binop(Lt,expr1,expr2),expr1left,expr2right)) + | expr GT expr (markExpression srcMap (Binop(Gt,expr1,expr2),expr1left,expr2right)) + | expr LTE expr (markExpression srcMap (Binop(Lte,expr1,expr2),expr1left,expr2right)) + | expr GTE expr (markExpression srcMap (Binop(Gte,expr1,expr2),expr1left,expr2right)) + | expr LSHIFT expr (markExpression srcMap (Binop(Lshift,expr1,expr2),expr1left,expr2right)) + | expr RSHIFT expr (markExpression srcMap (Binop(Rshift,expr1,expr2),expr1left,expr2right)) + | expr PLUS expr (markExpression srcMap (Binop(Plus,expr1,expr2),expr1left,expr2right)) + | expr MINUS expr (markExpression srcMap (Binop(Minus,expr1,expr2),expr1left,expr2right)) + | expr TIMES expr (markExpression srcMap (Binop(Times,expr1,expr2),expr1left,expr2right)) + | expr DIVIDE expr (markExpression srcMap (Binop(Divide,expr1,expr2),expr1left,expr2right)) + | expr PERCENT expr (markExpression srcMap (Binop(Mod,expr1,expr2),expr1left,expr2right)) + | expr INC %prec INC (markExpression srcMap (Unop(PostInc,expr),exprleft,INCright)) + | expr DEC %prec INC (markExpression srcMap (Unop(PostDec,expr),exprleft,DECright)) + | INC expr %prec INC (markExpression srcMap (Unop(PreInc,expr),INCleft,exprright)) + | DEC expr %prec INC (markExpression srcMap (Unop(PreDec,expr),DECleft,exprright)) + | unaryOperator expr %prec UNARY (markExpression srcMap (Unop(unaryOperator,expr),unaryOperatorleft,exprright)) + | SIZEOF expr (markExpression srcMap (Unop(Sizeof,expr),SIZEOFleft,exprright)) + | LPAREN typeName RPAREN expr %prec INC + (markExpression srcMap (Cast (typeName,expr),LPARENleft,exprright)) + | SIZEOF LPAREN typeName RPAREN %prec SIZEOF + (markExpression srcMap (Unop(SizeofType typeName,EmptyExpr),SIZEOFleft,RPARENright)) + | expr LBRACE exprWComma RBRACE + (markExpression srcMap (Binop(Sub,expr,exprWComma),exprleft,RBRACEright)) + + + + | expr LPAREN RPAREN (markExpression srcMap (Call(expr,[]),exprleft,RPARENright)) + | expr LPAREN argumentExprList RPAREN + (markExpression srcMap (Call(expr, seqToList argumentExprList),exprleft,RPARENright)) + | expr DOT ID (markExpression srcMap (Binop(Dot,expr,Id(ID)),exprleft,IDright)) + | expr ARROW ID (markExpression srcMap (Binop(Arrow,expr,Id(ID)),exprleft,IDright)) + | expr DOT TYPE_NAME (markExpression srcMap (Binop(Dot,expr,Id(TYPE_NAME)),exprleft,TYPE_NAMEright)) + | expr ARROW TYPE_NAME (markExpression srcMap (Binop(Arrow,expr,Id(TYPE_NAME)),exprleft,TYPE_NAMEright)) + | LPAREN exprWComma RPAREN (markExpression srcMap (exprWComma,LPARENleft,RPARENright)) + | DECNUM (markExpression srcMap (IntConst DECNUM,DECNUMleft,DECNUMright)) + | REALNUM (markExpression srcMap (RealConst REALNUM, REALNUMleft,REALNUMright)) + | CCONST (markExpression srcMap (IntConst CCONST, CCONSTleft,CCONSTright)) + | ID (markExpression srcMap (Id(ID), IDleft, IDright)) + | strings (markExpression srcMap (String(strings),stringsleft,stringsright)) + + +strings: STRING (STRING) + | STRING strings (STRING ^ strings) + + + +exprWComma: + expr (expr) + | exprWComma COMMA expr (markExpression srcMap (Binop(Comma,exprWComma,expr),exprWCommaleft,exprright)) + +opExpr: (EmptyExpr) + | exprWComma (exprWComma) + + +argumentExprList: + expr (singletonSeq expr) + | argumentExprList COMMA expr (addToSeq(expr, argumentExprList)) + +typeName: + specifierQualifierList (specifierQualifierList) + | specifierQualifierList abstractDeclarator + (#1 (ctypeDecrToTypeName (specifierQualifierList, abstractDeclarator))) + +declarationSpecifiers: + typeSpecifier reservedDeclarationSpecifier + (addSpecifier (typeSpecifier, + reservedDeclarationSpecifier)) + | declarationModifiers typeSpecifier reservedDeclarationSpecifier + (addAll (declarationModifiers, + addSpecifier (typeSpecifier, + reservedDeclarationSpecifier))) + +reservedDeclarationSpecifier: + (unknown) + | reservedDeclarationSpecifier specifierQualifierReserved + (let val {qualifiers,specifiers} = specifierQualifierReserved + val decltype = {qualifiers=qualifiers,specifiers=specifiers,storage=[]} + in addAll (decltype, reservedDeclarationSpecifier) end ) + | reservedDeclarationSpecifier storageClassSpecifier + (addStorage (storageClassSpecifier, + reservedDeclarationSpecifier)) + +specifierQualifierReserved: + typeSpecifierReserved ({qualifiers=[],specifiers=[typeSpecifierReserved]}) + | typeQualifier ({qualifiers=[typeQualifier],specifiers=[]}) + | structOrUnionSpecifier ({qualifiers=[],specifiers=[structOrUnionSpecifier]}) + | enumSpecifier ({qualifiers=[],specifiers=[enumSpecifier]}) + +declarationModifiers: + storageClassSpecifier + ({storage = [storageClassSpecifier], + qualifiers = [], + specifiers = []}) + | declarationModifiers storageClassSpecifier + (addStorage(storageClassSpecifier,declarationModifiers)) + | typeQualifier + ({specifiers = [], + storage = [], + qualifiers = [typeQualifier]}) + | declarationModifiers typeQualifier + (addQualifier(typeQualifier, declarationModifiers)) + +specifierQualifierList: + typeSpecifier reservedSpecifierQualifiers + (let val {specifiers, qualifiers} = reservedSpecifierQualifiers + in {specifiers=typeSpecifier::specifiers,qualifiers=qualifiers} end) + + | typeQualifierList typeSpecifier reservedSpecifierQualifiers + (let val {specifiers, qualifiers} = reservedSpecifierQualifiers + in {specifiers=typeSpecifier::specifiers + ,qualifiers=typeQualifierList@qualifiers + } + end) + +reservedSpecifierQualifiers: + ({qualifiers=[],specifiers=[]}) + | reservedSpecifierQualifiers specifierQualifierReserved + (let val {specifiers=s1, qualifiers=q1} = reservedSpecifierQualifiers + val {specifiers=s2, qualifiers=q2} = specifierQualifierReserved + in {specifiers=s1@s2, qualifiers=q1@q2} end) + +typeQualifierList: + typeQualifier ([typeQualifier]) + + | typeQualifier typeQualifierList + (typeQualifier::typeQualifierList) + +typeSpecifier: + typeSpecifierReserved (typeSpecifierReserved) + | structOrUnionSpecifier (structOrUnionSpecifier) + | enumSpecifier (enumSpecifier) + | TYPE_NAME (TypedefName TYPE_NAME) + +typeSpecifierReserved: + VOID (Void) + | CHAR (Char) + | SHORT (Short) + | INT (Int) + | LONG (Long) + | FLOAT (Float) + | DOUBLE (Double) + | SIGNED (Signed) + | UNSIGNED (Unsigned) + + +structOrUnionSpecifier: + structOrUnion LCURLY structDeclarationList RCURLY + (Struct{isStruct=structOrUnion, tagOpt=NONE, members=seqToList structDeclarationList}) + + | structOrUnion ID LCURLY structDeclarationList RCURLY + (Struct{isStruct=structOrUnion, tagOpt=SOME ID, members=seqToList structDeclarationList}) + + | structOrUnion TYPE_NAME LCURLY structDeclarationList RCURLY + (Struct{isStruct=structOrUnion, tagOpt=SOME TYPE_NAME, members=seqToList structDeclarationList}) + + | structOrUnion ID (StructTag {isStruct=structOrUnion, name=ID}) + + | structOrUnion TYPE_NAME (StructTag {isStruct=structOrUnion, name=TYPE_NAME}) + + (* humor me: consider true for struct *) +structOrUnion: + STRUCT (true) + | UNION (false) + +structDeclarationList: + structDeclaration (singletonSeq structDeclaration) + | structDeclarationList structDeclaration + (addToSeq(structDeclaration, structDeclarationList)) + +structDeclaration: + specifierQualifierList structDeclaratorList SEMICOLON + ((specifierQualifierList, seqToList structDeclaratorList)) + | typeQualifierList notypeStructDeclaratorList SEMICOLON + (let + val ct = {qualifiers=typeQualifierList, specifiers=[]} + in + (ct, seqToList notypeStructDeclaratorList) + end) + +structDeclaratorList: + structDeclarator (singletonSeq structDeclarator) + | structDeclaratorList COMMA structDeclarator + (addToSeq(structDeclarator, structDeclaratorList)) + +notypeStructDeclaratorList: + notypeStructDeclarator (singletonSeq notypeStructDeclarator) + | notypeStructDeclaratorList COMMA structDeclarator + (addToSeq(structDeclarator, notypeStructDeclaratorList)) + +structDeclarator: + declarator (declarator, EmptyExpr) + | COLON expr (EmptyDecr, expr) + | declarator COLON expr (declarator, expr) + +notypeStructDeclarator: + notypeDeclarator (notypeDeclarator, EmptyExpr) + | COLON expr (EmptyDecr, expr) + | notypeDeclarator COLON expr (notypeDeclarator, expr) + +typeQualifier: + CONST (CONST) + | VOLATILE (VOLATILE) + +enumSpecifier: + ENUM LCURLY enumeratorList trailingComma RCURLY + (Enum{tagOpt=NONE, enumerators=seqToList enumeratorList, trailingComma=trailingComma}) + + | ENUM ID LCURLY enumeratorList trailingComma RCURLY + (Enum{tagOpt=SOME(ID), enumerators=seqToList enumeratorList, trailingComma=trailingComma}) + + | ENUM TYPE_NAME LCURLY enumeratorList trailingComma RCURLY + (Enum{tagOpt=SOME(TYPE_NAME), enumerators=seqToList enumeratorList, trailingComma=trailingComma}) + + | ENUM ID (EnumTag(ID)) + + | ENUM TYPE_NAME (EnumTag(TYPE_NAME)) + +enumeratorList: + enumeratorList COMMA enumerator + (addToSeq(enumerator, enumeratorList)) + | enumerator ((TypeDefs.addNoTdef(#1(enumerator))); + singletonSeq enumerator) + +enumerator: + ID ((ID,ParseTree.EmptyExpr)) + | ID EQUALS expr (ID,expr) + +storageClassSpecifier: + EXTERN (EXTERN) + | STATIC (STATIC) + | AUTO (AUTO) + | REGISTER (REGISTER) + | TYPEDEF (TYPEDEF) + +trailingComma: (false) + | COMMA (true) + +initDeclaratorList: + initDeclarator (singletonSeq initDeclarator) + | initDeclaratorList COMMA initDeclarator + (addToSeq(initDeclarator, initDeclaratorList)) + +initDeclarator: + declarator ((declarator,EmptyExpr)) + | declarator EQUALS initializer (declarator,initializer) + +notypeInitDeclaratorList: + notypeInitDeclarator (singletonSeq notypeInitDeclarator) + | notypeInitDeclaratorList COMMA initDeclarator + (addToSeq(initDeclarator, notypeInitDeclaratorList)) + +notypeInitDeclarator: + notypeDeclarator ((notypeDeclarator,EmptyExpr)) + | notypeDeclarator EQUALS initializer + (notypeDeclarator,initializer) + +declarator: + aftertypeDeclarator (aftertypeDeclarator) + | notypeDeclarator (notypeDeclarator) + +aftertypeDeclarator: + aftertypeDirectDeclarator (aftertypeDirectDeclarator) + | pointer aftertypeDirectDeclarator (applyPointer(pointer,aftertypeDirectDeclarator)) + +notypeDeclarator: + notypeDirectDeclarator (notypeDirectDeclarator) + | pointer notypeDirectDeclarator + (applyPointer(pointer, notypeDirectDeclarator)) + +parmDeclarator: + parmDirectDeclarator (parmDirectDeclarator) + | pointer parmDirectDeclarator + (applyPointer(pointer, parmDirectDeclarator)) + +pointer: + TIMES (PointerDecr(EmptyDecr)) + | TIMES typeQualifierList + (PointerDecr(loopQd(typeQualifierList,EmptyDecr))) + | TIMES pointer (PointerDecr(pointer)) + | TIMES typeQualifierList pointer + (PointerDecr(loopQd(typeQualifierList,pointer))) + +aftertypeDirectDeclarator: + TYPE_NAME (markDeclarator srcMap (VarDecr TYPE_NAME,TYPE_NAMEleft,TYPE_NAMEright)) + | LPAREN aftertypeDeclarator RPAREN + (aftertypeDeclarator) + | aftertypeDirectDeclarator LBRACE RBRACE %prec DOT + (ArrayDecr (aftertypeDirectDeclarator,EmptyExpr)) + | aftertypeDirectDeclarator LBRACE expr RBRACE %prec DOT + (ArrayDecr (aftertypeDirectDeclarator,expr)) + | aftertypeDirectDeclarator LPAREN RPAREN %prec DOT + (FuncDecr (aftertypeDirectDeclarator,nil)) + | aftertypeDirectDeclarator LPAREN parameterTypeList RPAREN %prec DOT + (FuncDecr (aftertypeDirectDeclarator,parameterTypeList)) + | aftertypeDirectDeclarator LPAREN identlist RPAREN %prec DOT + (FuncDecr (aftertypeDirectDeclarator, + map (fn (x,y,z) => (unknown,markDeclarator srcMap (VarDecr x,y,z))) (seqToList identlist))) + +notypeDirectDeclarator: + ID (markDeclarator srcMap (VarDecr ID,IDleft,IDright)) + | LPAREN notypeDeclarator RPAREN + (notypeDeclarator) + | notypeDirectDeclarator LBRACE RBRACE %prec DOT + (ArrayDecr (notypeDirectDeclarator,EmptyExpr)) + | notypeDirectDeclarator LBRACE expr RBRACE %prec DOT + (ArrayDecr (notypeDirectDeclarator,expr)) + | notypeDirectDeclarator LPAREN RPAREN %prec DOT + (FuncDecr (notypeDirectDeclarator,nil)) + | notypeDirectDeclarator LPAREN parameterTypeList RPAREN %prec DOT + (FuncDecr (notypeDirectDeclarator,parameterTypeList)) + | notypeDirectDeclarator LPAREN identlist RPAREN %prec DOT + (FuncDecr (notypeDirectDeclarator, + map (fn (x,y,z) => (unknown,markDeclarator srcMap (VarDecr x,y,z))) (seqToList identlist))) + +parmDirectDeclarator: + TYPE_NAME (markDeclarator srcMap (VarDecr TYPE_NAME,TYPE_NAMEleft,TYPE_NAMEright)) + | parmDirectDeclarator LBRACE RBRACE %prec DOT + (ArrayDecr (parmDirectDeclarator,EmptyExpr)) + | parmDirectDeclarator LBRACE expr RBRACE %prec DOT + (ArrayDecr (parmDirectDeclarator,expr)) + | parmDirectDeclarator LPAREN RPAREN %prec DOT + (FuncDecr (parmDirectDeclarator,nil)) + | parmDirectDeclarator LPAREN parameterTypeList RPAREN %prec DOT + (FuncDecr (parmDirectDeclarator,parameterTypeList)) + | parmDirectDeclarator LPAREN identlist RPAREN %prec DOT + (FuncDecr (parmDirectDeclarator, + map (fn (x,y,z) => (unknown,markDeclarator srcMap (VarDecr x,y,z))) (seqToList identlist))) + +initializer: + expr (expr) + | LCURLY initializerList trailingComma RCURLY + (markExpression srcMap (InitList(seqToList initializerList),LCURLYleft,RCURLYright)) + +initializerList: + initializer (singletonSeq initializer) + | initializerList COMMA initializer + (addToSeq(initializer, initializerList)) + +declarationList: + declaration (singletonSeq(markDeclaration srcMap (declaration, + declarationleft, + declarationright))) + | declarationList declaration (addToSeq(markDeclaration srcMap (declaration, + declarationleft, + declarationright), + declarationList)) + +identlist: + ID (singletonSeq (ID,IDleft,IDright)) + | identlist COMMA ID (addToSeq((ID,IDleft,IDright),identlist)) + +(* Put function name in the current scope and param names in a pushed scope. *) +fDefDeclaration: + notypeDeclarator + (insertFuncName(notypeDeclarator); + TypeDefs.pushScope(); + insertFuncParams(notypeDeclarator); + (unknown, notypeDeclarator)) + | declarationSpecifiers declarator + (insertFuncName(declarator); + TypeDefs.pushScope(); + insertFuncParams(declarator); + (declarationSpecifiers, declarator)) + + | declarationModifiers notypeDeclarator + (insertFuncName(notypeDeclarator); + TypeDefs.pushScope(); + insertFuncParams(notypeDeclarator); + (declarationModifiers, notypeDeclarator)) + +functionDefinition: + fDefDeclaration compoundStatement + (TypeDefs.popScope(); + FunctionDef + {retType = #1 fDefDeclaration, + funDecr = #2 fDefDeclaration, + krParams = [], + body = compoundStatement}) + + | fDefDeclaration declarationList compoundStatement + (TypeDefs.popScope(); + FunctionDef + {retType = #1 fDefDeclaration, + funDecr = #2 fDefDeclaration, + krParams = seqToList declarationList, + body = compoundStatement}) + +abstractDeclarator: + pointer (applyPointer (pointer, EmptyDecr)) + | directAbstractDeclarator (directAbstractDeclarator) + | pointer directAbstractDeclarator + (applyPointer(pointer, directAbstractDeclarator)) + +directAbstractDeclarator: + LPAREN abstractDeclarator RPAREN (abstractDeclarator) + | LBRACE RBRACE (ArrayDecr(EmptyDecr, EmptyExpr)) + | LBRACE expr RBRACE (ArrayDecr(EmptyDecr, expr)) + | directAbstractDeclarator LBRACE RBRACE + (ArrayDecr (directAbstractDeclarator,EmptyExpr)) + | directAbstractDeclarator LBRACE expr RBRACE + (ArrayDecr (directAbstractDeclarator,expr)) + | LPAREN RPAREN (FuncDecr (EmptyDecr ,nil)) + | LPAREN parameterTypeList RPAREN (FuncDecr (EmptyDecr, parameterTypeList)) + | directAbstractDeclarator LPAREN RPAREN + (FuncDecr (directAbstractDeclarator,nil)) + | directAbstractDeclarator LPAREN parameterTypeList RPAREN + (FuncDecr (directAbstractDeclarator, parameterTypeList)) + +parameterTypeList: + parameterList (seqToList parameterList) + | parameterList COMMA ELIPSIS + (let val decltype = {specifiers=[Ellipses],qualifiers=[],storage=[]} + in (seqToList parameterList) @ [(decltype, EllipsesDecr)] end) + +parameterList: + parameterDeclaration + (singletonSeq(#1 parameterDeclaration, + markDeclarator srcMap (#2 parameterDeclaration, + parameterDeclarationleft, + parameterDeclarationright))) + | parameterList COMMA parameterDeclaration + (addToSeq((#1 parameterDeclaration, + markDeclarator + srcMap + (#2 parameterDeclaration, + parameterDeclarationleft, + parameterDeclarationright)), + parameterList)) + +(* Decided not a push and pop a scope at the parameterDeclarations, because we + * are not going to directly plug these names in the tdef table. If this is just + * a function declaration, the names here do not matter to the tdef table. If + * this will be part of func definition, we put all these names in tdef table at + * proper scope correctly, later on. + * Note: We miss syntax errors like int f(int foo, foo bar);, if foo was a typename + *) +parameterDeclaration: + declarationSpecifiers notypeDeclarator + ((declarationSpecifiers, notypeDeclarator)) + | declarationSpecifiers parmDeclarator + ((declarationSpecifiers, parmDeclarator)) + + | declarationSpecifiers (* this case can arise for function prototypes *) + ((declarationSpecifiers, EmptyDecr)) + + | declarationSpecifiers abstractDeclarator + ((declarationSpecifiers, abstractDeclarator)) + + | declarationModifiers notypeDeclarator + ((declarationModifiers, notypeDeclarator)) + + | declarationModifiers abstractDeclarator + ((declarationModifiers, abstractDeclarator)) + +pushScope: + (TypeDefs.pushScope()) + +popScope: + (TypeDefs.popScope()) + + diff --git a/ckit/src/parser/grammar/c.grm.sig b/ckit/src/parser/grammar/c.grm.sig new file mode 100644 index 0000000..c0f805d --- /dev/null +++ b/ckit/src/parser/grammar/c.grm.sig @@ -0,0 +1,101 @@ +signature C_TOKENS = +sig +type ('a,'b) token +type svalue +val TYPE_NAME: (string) * 'a * 'a -> (svalue,'a) token +val ELIPSIS: 'a * 'a -> (svalue,'a) token +val UNARY: 'a * 'a -> (svalue,'a) token +val TYPEDEF: 'a * 'a -> (svalue,'a) token +val SIZEOF: 'a * 'a -> (svalue,'a) token +val VOID: 'a * 'a -> (svalue,'a) token +val SIGNED: 'a * 'a -> (svalue,'a) token +val UNSIGNED: 'a * 'a -> (svalue,'a) token +val UNION: 'a * 'a -> (svalue,'a) token +val STRUCT: 'a * 'a -> (svalue,'a) token +val SATURATE: 'a * 'a -> (svalue,'a) token +val FRACTIONAL: 'a * 'a -> (svalue,'a) token +val SHORT: 'a * 'a -> (svalue,'a) token +val LONG: 'a * 'a -> (svalue,'a) token +val INT: 'a * 'a -> (svalue,'a) token +val FLOAT: 'a * 'a -> (svalue,'a) token +val ENUM: 'a * 'a -> (svalue,'a) token +val DOUBLE: 'a * 'a -> (svalue,'a) token +val CHAR: 'a * 'a -> (svalue,'a) token +val GOTO: 'a * 'a -> (svalue,'a) token +val CONTINUE: 'a * 'a -> (svalue,'a) token +val BREAK: 'a * 'a -> (svalue,'a) token +val RETURN: 'a * 'a -> (svalue,'a) token +val WHILE: 'a * 'a -> (svalue,'a) token +val DEFAULT: 'a * 'a -> (svalue,'a) token +val CASE: 'a * 'a -> (svalue,'a) token +val SWITCH: 'a * 'a -> (svalue,'a) token +val DO: 'a * 'a -> (svalue,'a) token +val FOR: 'a * 'a -> (svalue,'a) token +val ELSE: 'a * 'a -> (svalue,'a) token +val THEN: 'a * 'a -> (svalue,'a) token +val IF: 'a * 'a -> (svalue,'a) token +val VOLATILE: 'a * 'a -> (svalue,'a) token +val CONST: 'a * 'a -> (svalue,'a) token +val REGISTER: 'a * 'a -> (svalue,'a) token +val STATIC: 'a * 'a -> (svalue,'a) token +val AUTO: 'a * 'a -> (svalue,'a) token +val EXTERN: 'a * 'a -> (svalue,'a) token +val CCONST: (LargeInt.int) * 'a * 'a -> (svalue,'a) token +val STRING: (string) * 'a * 'a -> (svalue,'a) token +val REALNUM: (real) * 'a * 'a -> (svalue,'a) token +val DECNUM: (LargeInt.int) * 'a * 'a -> (svalue,'a) token +val RSHIFT: 'a * 'a -> (svalue,'a) token +val LSHIFT: 'a * 'a -> (svalue,'a) token +val AND: 'a * 'a -> (svalue,'a) token +val OR: 'a * 'a -> (svalue,'a) token +val NEQ: 'a * 'a -> (svalue,'a) token +val EQ: 'a * 'a -> (svalue,'a) token +val GT: 'a * 'a -> (svalue,'a) token +val LT: 'a * 'a -> (svalue,'a) token +val GTE: 'a * 'a -> (svalue,'a) token +val LTE: 'a * 'a -> (svalue,'a) token +val RSHIFTEQUALS: 'a * 'a -> (svalue,'a) token +val LSHIFTEQUALS: 'a * 'a -> (svalue,'a) token +val ANDEQUALS: 'a * 'a -> (svalue,'a) token +val OREQUALS: 'a * 'a -> (svalue,'a) token +val DIVEQUALS: 'a * 'a -> (svalue,'a) token +val TIMESEQUALS: 'a * 'a -> (svalue,'a) token +val MODEQUALS: 'a * 'a -> (svalue,'a) token +val XOREQUALS: 'a * 'a -> (svalue,'a) token +val MINUSEQUALS: 'a * 'a -> (svalue,'a) token +val PLUSEQUALS: 'a * 'a -> (svalue,'a) token +val EQUALS: 'a * 'a -> (svalue,'a) token +val ID: (string) * 'a * 'a -> (svalue,'a) token +val ARROW: 'a * 'a -> (svalue,'a) token +val DEC: 'a * 'a -> (svalue,'a) token +val INC: 'a * 'a -> (svalue,'a) token +val TIMES: 'a * 'a -> (svalue,'a) token +val BANG: 'a * 'a -> (svalue,'a) token +val HAT: 'a * 'a -> (svalue,'a) token +val MINUS: 'a * 'a -> (svalue,'a) token +val PLUS: 'a * 'a -> (svalue,'a) token +val DIVIDE: 'a * 'a -> (svalue,'a) token +val TILDE: 'a * 'a -> (svalue,'a) token +val BAR: 'a * 'a -> (svalue,'a) token +val AMP: 'a * 'a -> (svalue,'a) token +val PERCENT: 'a * 'a -> (svalue,'a) token +val QUESTION: 'a * 'a -> (svalue,'a) token +val COMMA: 'a * 'a -> (svalue,'a) token +val DOT: 'a * 'a -> (svalue,'a) token +val RBRACE: 'a * 'a -> (svalue,'a) token +val LBRACE: 'a * 'a -> (svalue,'a) token +val RCURLY: 'a * 'a -> (svalue,'a) token +val LCURLY: 'a * 'a -> (svalue,'a) token +val RPAREN: 'a * 'a -> (svalue,'a) token +val LPAREN: 'a * 'a -> (svalue,'a) token +val SEMICOLON: 'a * 'a -> (svalue,'a) token +val COLON: 'a * 'a -> (svalue,'a) token +val EOF: 'a * 'a -> (svalue,'a) token +end +signature C_LRVALS= +sig +structure Tokens : C_TOKENS +structure ParserData:PARSER_DATA +sharing type ParserData.Token.token = Tokens.token +sharing type ParserData.svalue = Tokens.svalue +end diff --git a/ckit/src/parser/grammar/c.grm.sml b/ckit/src/parser/grammar/c.grm.sml new file mode 100644 index 0000000..1404fd2 --- /dev/null +++ b/ckit/src/parser/grammar/c.grm.sml @@ -0,0 +1,3865 @@ +functor LrValsFun(structure Token : TOKEN + ) = +struct +structure ParserData= +struct +structure Header = +struct +(* DO NOT CHANGE THIS FILE -- this file was generated from cd.grm *) + +(* Copyright (c) 1998 by Lucent Technologies *) + +(* new comments from Satish Chandra, 6/21/99 *) +(* Overriding design approach: + * + * Accept all legal programs, but possibly some illegal ones at this stage. + * Do not attempt to make a really tight grammar. Our tools are supposed to + * work on "correct" C programs (i.e. those that cc -ansi would compile without + * warnings). Of course, a type checker on the parse tree can report some errors + * as syntax errors. + * + * Note on MARK: + * + * externalDecl, statement, and expression are the non-terms that are marked. + * Compound statements are not separately marked. + * declarations eventually become either a statement or a externalDecl + * if they are outside any function. They are marked accordingly. + * + * Note on function definitions: + * + * The order of the paramaters will always come from the FuncDecr. + * The types of the parameter may come from the second declaration list + * (in K&R style) + * + * Note on the structure of the grammar: + * + * It is difficult to write a LALR(1) grammar based on the grammar given at + * the back of the K&R book. The basic difficulty is that both TYPE_NAME and + * ID are tokens that are strings, but it depends on the context whether + * a given string is to be treated as an ID or a TYPE_NAME. + * We have borrowed the solution used in GCC's parser specification. In this + * scheme, the lexer always return the token TYPE_NAME if a name has been + * defined as a type name (via a typedef) in an applicable scope. The grammar + * productions are heavily rearranged (from K&R's grammar) to do the right + * thing. In this rearrangement, the basic idea is that a TYPE_NAME is + * allowed to appear in a declaration as a plain identifier only after a type + * specifier has previously appeared in the declaration. Also, a TYPE_NAME may + * appear only once in a declaration as a type specifier. + *) + +(* old comments below *) +(* Shortcomings *) +(* 1. No floating-point whatsoever *) + +(* Notes on MARK: + * externalDecl and statement are the non-terms that are marked. + * Compound statements are not separately marked. + * expressions are not marked at all. + * declarations eventually become either a statement or a externalDecl + * if they are outside any function. they are marked accordingly. + *) + +(* Overriding theme: accept all legal programs, but also some illegal ones at this + * stage. Do not attempt to make a really tight grammar. Our tools are supposed to + * work on "correct" C programs (i.e. those that cc -ansi would compile without + * warnings). Of course, a type checker on the parse tree can report some errors + * as syntax errors. + *) + +(* About function definitions: + * The order of the paramaters will always come from the FuncDecr thing + * The types of the parameter may come from the second declaration list (in K&R style) + *) + +open ParseTree (* PortingHelp *) + +fun markExternalDecl srcMap (d,left,right) = + MARKexternalDecl(SourceMap.location srcMap (left,right), d) + +fun markDeclaration srcMap (d,left,right) = + MARKdeclaration(SourceMap.location srcMap (left,right), d) + +fun markDeclarator srcMap (d,left,right) = + MARKdeclarator(SourceMap.location srcMap (left,right), d) + +fun markStatement srcMap (s,left,right) = + MARKstatement(SourceMap.location srcMap (left, right), s) + +fun markExpression srcMap (s,left,right) = + MARKexpression(SourceMap.location srcMap (left, right), s) + +val unknown = {storage=[],qualifiers=[],specifiers=[]}:decltype + +(* this code duplicated in BuildAst in function processDeclarator *) +fun ctypeDecrToTypeName (typ as {qualifiers, specifiers},decr) = + let fun mkTyp spc = {qualifiers=[], specifiers=[spc]} + fun addQual q = {qualifiers=q::qualifiers, specifiers=specifiers} + in case decr + of VarDecr x => (typ,SOME x) + | PointerDecr x => + ctypeDecrToTypeName (mkTyp (Pointer typ),x) + | ArrayDecr (x,sz) => + ctypeDecrToTypeName (mkTyp (Array (sz,typ)),x) + | FuncDecr (x,lst) => + ctypeDecrToTypeName (mkTyp (Function{retType=typ,params=lst}),x) + | QualDecr (q,decr) => + ctypeDecrToTypeName (addQual q, decr) + | EmptyDecr => (typ, NONE) + | EllipsesDecr => (mkTyp Ellipses, SOME("**ellipses**")) + | DecrExt _ => (typ, NONE) (* should call decr extension? *) + | MARKdeclarator(loc, decr) => ctypeDecrToTypeName(typ, decr) + end + +fun dclr2str dcl = + (case ctypeDecrToTypeName ({qualifiers=[],specifiers=[]}, dcl) + of (_,SOME s) => s + | (_,NONE) => "") + +fun combineDecltypes ( {qualifiers=q1,storage=st1,specifiers=sp1} + , {qualifiers=q2,storage=st2,specifiers=sp2} + ) = + {qualifiers=q1@q2,storage=st1@st2,specifiers=sp1@sp2} (* @ ok *) + +fun applyPointer (PointerDecr x,rest) = PointerDecr (applyPointer (x,rest)) + | applyPointer (QualDecr (q,x),rest) = QualDecr (q, applyPointer (x,rest)) + | applyPointer (EmptyDecr, rest) = rest + | applyPointer (_, rest) = rest + (* NCH/DBM[6/14/99]: this case can never occur *) + +fun addStorage(st, {qualifiers,storage,specifiers}) = + {qualifiers=qualifiers,storage=st::storage,specifiers=specifiers} + +fun addQualifiers(qs, {qualifiers,storage,specifiers}) = + {qualifiers=qs@qualifiers,storage=storage,specifiers=specifiers} (* @ ok *) + +fun addQualifier(q, {qualifiers,storage,specifiers}) = + {qualifiers=q::qualifiers,storage=storage,specifiers=specifiers} + +fun addSpecifier(sp, {qualifiers,storage,specifiers}) = + {qualifiers=qualifiers,storage=storage,specifiers=sp::specifiers} + +val addAll = combineDecltypes + +fun loopQd (q::rst, acc) = loopQd(rst, QualDecr(q, acc)) + | loopQd (nil, acc) = acc + +fun mkCtype typ = typ + +(* DBM: major kludge, using TYPEDEF as storage class *) +fun insertDeclNames ({storage,...}: decltype, idl) = + case storage + of [TYPEDEF] => List.app (fn x as (dcl,_) => TypeDefs.addTdef (dclr2str dcl)) idl + | _ => List.app (fn x as (dcl,_) => TypeDefs.addNoTdef (dclr2str dcl)) idl + +fun insertFuncName dcl = + let + val name = dclr2str dcl + in + TypeDefs.addNoTdef name + end + +fun insertFuncParams (FuncDecr (_,params)) : unit = + let + fun getName (ct, dclr) = dclr2str dclr + val names = map getName params + in + List.app TypeDefs.addNoTdef names + end + | insertFuncParams (ArrayDecr(dcl,_)) = insertFuncParams dcl + | insertFuncParams (PointerDecr dcl) = insertFuncParams dcl + | insertFuncParams _ = () (* this is actually an error, but it will be caught in + * BuildAst when processing a PT.FunctionDef *) + +abstype 'a seq = SEQ of 'a list +with val emptySeq = SEQ nil + fun singletonSeq x = SEQ[x] + fun addToSeq(x, SEQ yl) = SEQ(x :: yl) (* add to end of sequence! *) + (* fun addListToEnd(xl, yl) = SEQ((List.rev xl) @ yl) *) + fun addOptToEnd(NONE, yl) = yl + | addOptToEnd(SOME x, SEQ yl) = SEQ(x :: yl) + fun seqToList(SEQ yl) = List.rev yl +end + + +end +structure LrTable = Token.LrTable +structure Token = Token +local open LrTable in +val table=let val actionRows = +"\ +\\001\000\001\000\000\000\000\000\ +\\001\000\002\000\169\000\004\000\070\000\022\000\042\000\026\000\041\000\ +\\089\000\069\000\000\000\ +\\001\000\002\000\174\000\004\000\043\000\022\000\042\000\026\000\041\000\ +\\071\000\034\000\072\000\033\000\073\000\032\000\074\000\031\000\ +\\075\000\030\000\076\000\029\000\077\000\028\000\080\000\027\000\ +\\081\000\026\000\082\000\025\000\083\000\024\000\084\000\023\000\ +\\089\000\021\000\000\000\ +\\001\000\002\000\226\000\000\000\ +\\001\000\002\000\075\001\011\000\178\000\000\000\ +\\001\000\002\000\115\001\011\000\178\000\000\000\ +\\001\000\003\000\083\002\006\000\130\002\011\000\083\002\027\000\139\000\ +\\052\000\130\002\053\000\130\002\054\000\130\002\055\000\130\002\ +\\056\000\130\002\057\000\130\002\071\000\130\002\072\000\130\002\ +\\073\000\130\002\074\000\130\002\075\000\130\002\076\000\130\002\ +\\077\000\130\002\080\000\130\002\081\000\130\002\082\000\130\002\ +\\083\000\130\002\084\000\130\002\086\000\130\002\089\000\130\002\000\000\ +\\001\000\003\000\087\002\006\000\131\002\011\000\087\002\027\000\135\000\ +\\052\000\131\002\053\000\131\002\054\000\131\002\055\000\131\002\ +\\056\000\131\002\057\000\131\002\071\000\131\002\072\000\131\002\ +\\073\000\131\002\074\000\131\002\075\000\131\002\076\000\131\002\ +\\077\000\131\002\080\000\131\002\081\000\131\002\082\000\131\002\ +\\083\000\131\002\084\000\131\002\086\000\131\002\089\000\131\002\000\000\ +\\001\000\003\000\048\000\000\000\ +\\001\000\003\000\127\000\004\000\126\000\006\000\054\000\007\000\194\001\ +\\014\000\125\000\016\000\124\000\018\000\123\000\019\000\122\000\ +\\021\000\121\000\022\000\120\000\023\000\119\000\024\000\118\000\ +\\026\000\117\000\048\000\116\000\049\000\115\000\050\000\114\000\ +\\051\000\113\000\052\000\156\002\053\000\156\002\054\000\156\002\ +\\055\000\156\002\056\000\156\002\057\000\156\002\058\000\112\000\ +\\061\000\111\000\062\000\110\000\063\000\109\000\064\000\108\000\ +\\065\000\107\000\066\000\106\000\067\000\105\000\068\000\104\000\ +\\069\000\103\000\070\000\102\000\071\000\156\002\072\000\156\002\ +\\073\000\156\002\074\000\156\002\075\000\156\002\076\000\156\002\ +\\077\000\156\002\080\000\156\002\081\000\156\002\082\000\156\002\ +\\083\000\156\002\084\000\156\002\085\000\101\000\086\000\156\002\ +\\089\000\156\002\000\000\ +\\001\000\003\000\127\000\004\000\126\000\006\000\054\000\014\000\125\000\ +\\016\000\124\000\018\000\123\000\019\000\122\000\021\000\121\000\ +\\022\000\120\000\023\000\119\000\024\000\118\000\026\000\117\000\ +\\048\000\116\000\049\000\115\000\050\000\114\000\051\000\113\000\ +\\058\000\112\000\061\000\111\000\062\000\110\000\063\000\109\000\ +\\064\000\108\000\065\000\107\000\066\000\106\000\067\000\105\000\ +\\068\000\104\000\069\000\103\000\070\000\102\000\085\000\101\000\000\000\ +\\001\000\003\000\179\000\011\000\178\000\000\000\ +\\001\000\003\000\221\000\000\000\ +\\001\000\003\000\222\000\000\000\ +\\001\000\003\000\021\001\011\000\020\001\000\000\ +\\001\000\003\000\025\001\011\000\024\001\000\000\ +\\001\000\003\000\071\001\000\000\ +\\001\000\003\000\072\001\000\000\ +\\001\000\003\000\124\001\000\000\ +\\001\000\003\000\159\001\000\000\ +\\001\000\003\000\161\001\000\000\ +\\001\000\004\000\043\000\022\000\042\000\026\000\041\000\000\000\ +\\001\000\004\000\043\000\022\000\042\000\026\000\041\000\052\000\040\000\ +\\053\000\039\000\054\000\038\000\055\000\037\000\056\000\036\000\ +\\057\000\035\000\071\000\034\000\072\000\033\000\073\000\032\000\ +\\074\000\031\000\075\000\030\000\076\000\029\000\077\000\028\000\ +\\080\000\027\000\081\000\026\000\082\000\025\000\083\000\024\000\ +\\084\000\023\000\086\000\022\000\089\000\021\000\000\000\ +\\001\000\004\000\043\000\026\000\041\000\000\000\ +\\001\000\004\000\070\000\022\000\042\000\026\000\041\000\089\000\069\000\000\000\ +\\001\000\004\000\070\000\026\000\041\000\089\000\069\000\000\000\ +\\001\000\004\000\126\000\005\000\069\001\014\000\125\000\016\000\124\000\ +\\018\000\123\000\019\000\122\000\021\000\121\000\022\000\120\000\ +\\023\000\119\000\024\000\118\000\026\000\144\000\048\000\116\000\ +\\049\000\115\000\050\000\114\000\051\000\113\000\085\000\101\000\000\000\ +\\001\000\004\000\126\000\006\000\242\000\014\000\125\000\016\000\124\000\ +\\018\000\123\000\019\000\122\000\021\000\121\000\022\000\120\000\ +\\023\000\119\000\024\000\118\000\026\000\144\000\048\000\116\000\ +\\049\000\115\000\050\000\114\000\051\000\113\000\085\000\101\000\000\000\ +\\001\000\004\000\126\000\009\000\145\000\014\000\125\000\016\000\124\000\ +\\018\000\123\000\019\000\122\000\021\000\121\000\022\000\120\000\ +\\023\000\119\000\024\000\118\000\026\000\144\000\048\000\116\000\ +\\049\000\115\000\050\000\114\000\051\000\113\000\085\000\101\000\000\000\ +\\001\000\004\000\126\000\009\000\245\000\014\000\125\000\016\000\124\000\ +\\018\000\123\000\019\000\122\000\021\000\121\000\022\000\120\000\ +\\023\000\119\000\024\000\118\000\026\000\144\000\048\000\116\000\ +\\049\000\115\000\050\000\114\000\051\000\113\000\085\000\101\000\000\000\ +\\001\000\004\000\126\000\009\000\098\001\014\000\125\000\016\000\124\000\ +\\018\000\123\000\019\000\122\000\021\000\121\000\022\000\120\000\ +\\023\000\119\000\024\000\118\000\026\000\144\000\048\000\116\000\ +\\049\000\115\000\050\000\114\000\051\000\113\000\085\000\101\000\000\000\ +\\001\000\004\000\126\000\009\000\130\001\014\000\125\000\016\000\124\000\ +\\018\000\123\000\019\000\122\000\021\000\121\000\022\000\120\000\ +\\023\000\119\000\024\000\118\000\026\000\144\000\048\000\116\000\ +\\049\000\115\000\050\000\114\000\051\000\113\000\085\000\101\000\000\000\ +\\001\000\004\000\126\000\009\000\137\001\014\000\125\000\016\000\124\000\ +\\018\000\123\000\019\000\122\000\021\000\121\000\022\000\120\000\ +\\023\000\119\000\024\000\118\000\026\000\144\000\048\000\116\000\ +\\049\000\115\000\050\000\114\000\051\000\113\000\085\000\101\000\000\000\ +\\001\000\004\000\126\000\014\000\125\000\016\000\124\000\018\000\123\000\ +\\019\000\122\000\021\000\121\000\022\000\120\000\023\000\119\000\ +\\024\000\118\000\026\000\144\000\048\000\116\000\049\000\115\000\ +\\050\000\114\000\051\000\113\000\056\000\036\000\057\000\035\000\ +\\071\000\034\000\072\000\033\000\073\000\032\000\074\000\031\000\ +\\075\000\030\000\076\000\029\000\077\000\028\000\080\000\027\000\ +\\081\000\026\000\082\000\025\000\083\000\024\000\084\000\023\000\ +\\085\000\101\000\089\000\021\000\000\000\ +\\001\000\004\000\126\000\014\000\125\000\016\000\124\000\018\000\123\000\ +\\019\000\122\000\021\000\121\000\022\000\120\000\023\000\119\000\ +\\024\000\118\000\026\000\144\000\048\000\116\000\049\000\115\000\ +\\050\000\114\000\051\000\113\000\085\000\101\000\000\000\ +\\001\000\004\000\215\000\008\000\214\000\009\000\252\000\010\000\213\000\ +\\012\000\212\000\013\000\211\000\014\000\210\000\015\000\209\000\ +\\017\000\208\000\018\000\207\000\019\000\206\000\020\000\205\000\ +\\022\000\204\000\023\000\203\000\024\000\202\000\025\000\201\000\ +\\027\000\200\000\028\000\199\000\029\000\198\000\030\000\197\000\ +\\031\000\196\000\032\000\195\000\033\000\194\000\034\000\193\000\ +\\035\000\192\000\036\000\191\000\037\000\190\000\038\000\189\000\ +\\039\000\188\000\040\000\187\000\041\000\186\000\042\000\185\000\ +\\043\000\184\000\044\000\183\000\045\000\182\000\046\000\181\000\ +\\047\000\180\000\000\000\ +\\001\000\004\000\215\000\008\000\214\000\009\000\088\001\010\000\213\000\ +\\012\000\212\000\013\000\211\000\014\000\210\000\015\000\209\000\ +\\017\000\208\000\018\000\207\000\019\000\206\000\020\000\205\000\ +\\022\000\204\000\023\000\203\000\024\000\202\000\025\000\201\000\ +\\027\000\200\000\028\000\199\000\029\000\198\000\030\000\197\000\ +\\031\000\196\000\032\000\195\000\033\000\194\000\034\000\193\000\ +\\035\000\192\000\036\000\191\000\037\000\190\000\038\000\189\000\ +\\039\000\188\000\040\000\187\000\041\000\186\000\042\000\185\000\ +\\043\000\184\000\044\000\183\000\045\000\182\000\046\000\181\000\ +\\047\000\180\000\000\000\ +\\001\000\004\000\215\000\008\000\214\000\009\000\133\001\010\000\213\000\ +\\012\000\212\000\013\000\211\000\014\000\210\000\015\000\209\000\ +\\017\000\208\000\018\000\207\000\019\000\206\000\020\000\205\000\ +\\022\000\204\000\023\000\203\000\024\000\202\000\025\000\201\000\ +\\027\000\200\000\028\000\199\000\029\000\198\000\030\000\197\000\ +\\031\000\196\000\032\000\195\000\033\000\194\000\034\000\193\000\ +\\035\000\192\000\036\000\191\000\037\000\190\000\038\000\189\000\ +\\039\000\188\000\040\000\187\000\041\000\186\000\042\000\185\000\ +\\043\000\184\000\044\000\183\000\045\000\182\000\046\000\181\000\ +\\047\000\180\000\000\000\ +\\001\000\004\000\215\000\008\000\214\000\009\000\153\001\010\000\213\000\ +\\012\000\212\000\013\000\211\000\014\000\210\000\015\000\209\000\ +\\017\000\208\000\018\000\207\000\019\000\206\000\020\000\205\000\ +\\022\000\204\000\023\000\203\000\024\000\202\000\025\000\201\000\ +\\027\000\200\000\028\000\199\000\029\000\198\000\030\000\197\000\ +\\031\000\196\000\032\000\195\000\033\000\194\000\034\000\193\000\ +\\035\000\192\000\036\000\191\000\037\000\190\000\038\000\189\000\ +\\039\000\188\000\040\000\187\000\041\000\186\000\042\000\185\000\ +\\043\000\184\000\044\000\183\000\045\000\182\000\046\000\181\000\ +\\047\000\180\000\000\000\ +\\001\000\004\000\215\000\008\000\214\000\009\000\155\001\010\000\213\000\ +\\012\000\212\000\013\000\211\000\014\000\210\000\015\000\209\000\ +\\017\000\208\000\018\000\207\000\019\000\206\000\020\000\205\000\ +\\022\000\204\000\023\000\203\000\024\000\202\000\025\000\201\000\ +\\027\000\200\000\028\000\199\000\029\000\198\000\030\000\197\000\ +\\031\000\196\000\032\000\195\000\033\000\194\000\034\000\193\000\ +\\035\000\192\000\036\000\191\000\037\000\190\000\038\000\189\000\ +\\039\000\188\000\040\000\187\000\041\000\186\000\042\000\185\000\ +\\043\000\184\000\044\000\183\000\045\000\182\000\046\000\181\000\ +\\047\000\180\000\000\000\ +\\001\000\004\000\219\000\014\000\125\000\016\000\124\000\018\000\123\000\ +\\019\000\122\000\021\000\121\000\022\000\120\000\023\000\119\000\ +\\024\000\118\000\026\000\144\000\048\000\116\000\049\000\115\000\ +\\050\000\114\000\051\000\113\000\085\000\101\000\000\000\ +\\001\000\004\000\225\000\000\000\ +\\001\000\004\000\228\000\000\000\ +\\001\000\004\000\230\000\000\000\ +\\001\000\004\000\231\000\000\000\ +\\001\000\004\000\006\001\005\000\101\001\008\000\005\001\022\000\042\000\ +\\026\000\041\000\052\000\040\000\053\000\039\000\054\000\038\000\ +\\055\000\037\000\056\000\036\000\057\000\035\000\071\000\034\000\ +\\072\000\033\000\073\000\032\000\074\000\031\000\075\000\030\000\ +\\076\000\029\000\077\000\028\000\080\000\027\000\081\000\026\000\ +\\082\000\025\000\083\000\024\000\084\000\023\000\086\000\022\000\ +\\089\000\021\000\000\000\ +\\001\000\004\000\006\001\008\000\005\001\022\000\042\000\026\000\041\000\ +\\052\000\040\000\053\000\039\000\054\000\038\000\055\000\037\000\ +\\056\000\036\000\057\000\035\000\071\000\034\000\072\000\033\000\ +\\073\000\032\000\074\000\031\000\075\000\030\000\076\000\029\000\ +\\077\000\028\000\080\000\027\000\081\000\026\000\082\000\025\000\ +\\083\000\024\000\084\000\023\000\086\000\022\000\089\000\021\000\000\000\ +\\001\000\004\000\083\001\005\000\101\001\008\000\005\001\022\000\042\000\ +\\052\000\040\000\053\000\039\000\054\000\038\000\055\000\037\000\ +\\056\000\036\000\057\000\035\000\071\000\034\000\072\000\033\000\ +\\073\000\032\000\074\000\031\000\075\000\030\000\076\000\029\000\ +\\077\000\028\000\080\000\027\000\081\000\026\000\082\000\025\000\ +\\083\000\024\000\084\000\023\000\086\000\022\000\089\000\021\000\000\000\ +\\001\000\004\000\123\001\000\000\ +\\001\000\005\000\153\000\026\000\152\000\052\000\040\000\053\000\039\000\ +\\054\000\038\000\055\000\037\000\056\000\036\000\057\000\035\000\ +\\071\000\034\000\072\000\033\000\073\000\032\000\074\000\031\000\ +\\075\000\030\000\076\000\029\000\077\000\028\000\080\000\027\000\ +\\081\000\026\000\082\000\025\000\083\000\024\000\084\000\023\000\ +\\086\000\022\000\089\000\021\000\000\000\ +\\001\000\005\000\161\000\000\000\ +\\001\000\005\000\248\000\026\000\152\000\052\000\040\000\053\000\039\000\ +\\054\000\038\000\055\000\037\000\056\000\036\000\057\000\035\000\ +\\071\000\034\000\072\000\033\000\073\000\032\000\074\000\031\000\ +\\075\000\030\000\076\000\029\000\077\000\028\000\080\000\027\000\ +\\081\000\026\000\082\000\025\000\083\000\024\000\084\000\023\000\ +\\086\000\022\000\089\000\021\000\000\000\ +\\001\000\005\000\251\000\000\000\ +\\001\000\005\000\253\000\000\000\ +\\001\000\005\000\000\001\011\000\255\000\000\000\ +\\001\000\005\000\084\001\000\000\ +\\001\000\005\000\085\001\011\000\178\000\000\000\ +\\001\000\005\000\089\001\000\000\ +\\001\000\005\000\090\001\011\000\255\000\000\000\ +\\001\000\005\000\118\001\011\000\117\001\000\000\ +\\001\000\005\000\119\001\000\000\ +\\001\000\005\000\120\001\011\000\178\000\000\000\ +\\001\000\005\000\122\001\011\000\178\000\000\000\ +\\001\000\005\000\125\001\011\000\178\000\000\000\ +\\001\000\005\000\132\001\052\000\040\000\053\000\039\000\054\000\038\000\ +\\055\000\037\000\056\000\036\000\057\000\035\000\071\000\034\000\ +\\072\000\033\000\073\000\032\000\074\000\031\000\075\000\030\000\ +\\076\000\029\000\077\000\028\000\080\000\027\000\081\000\026\000\ +\\082\000\025\000\083\000\024\000\084\000\023\000\086\000\022\000\ +\\089\000\021\000\000\000\ +\\001\000\005\000\134\001\000\000\ +\\001\000\005\000\135\001\000\000\ +\\001\000\005\000\140\001\026\000\152\000\052\000\040\000\053\000\039\000\ +\\054\000\038\000\055\000\037\000\056\000\036\000\057\000\035\000\ +\\071\000\034\000\072\000\033\000\073\000\032\000\074\000\031\000\ +\\075\000\030\000\076\000\029\000\077\000\028\000\080\000\027\000\ +\\081\000\026\000\082\000\025\000\083\000\024\000\084\000\023\000\ +\\086\000\022\000\089\000\021\000\000\000\ +\\001\000\005\000\154\001\000\000\ +\\001\000\005\000\156\001\000\000\ +\\001\000\005\000\157\001\011\000\255\000\000\000\ +\\001\000\005\000\158\001\011\000\178\000\000\000\ +\\001\000\005\000\164\001\000\000\ +\\001\000\006\000\047\000\026\000\046\000\089\000\045\000\000\000\ +\\001\000\006\000\054\000\052\000\040\000\053\000\039\000\054\000\038\000\ +\\055\000\037\000\056\000\036\000\057\000\035\000\071\000\034\000\ +\\072\000\033\000\073\000\032\000\074\000\031\000\075\000\030\000\ +\\076\000\029\000\077\000\028\000\080\000\027\000\081\000\026\000\ +\\082\000\025\000\083\000\024\000\084\000\023\000\086\000\022\000\ +\\089\000\021\000\000\000\ +\\001\000\006\000\076\000\026\000\075\000\089\000\074\000\000\000\ +\\001\000\007\000\165\000\056\000\036\000\057\000\035\000\071\000\034\000\ +\\072\000\033\000\073\000\032\000\074\000\031\000\075\000\030\000\ +\\076\000\029\000\077\000\028\000\080\000\027\000\081\000\026\000\ +\\082\000\025\000\083\000\024\000\084\000\023\000\089\000\021\000\000\000\ +\\001\000\007\000\217\000\000\000\ +\\001\000\007\000\018\001\056\000\036\000\057\000\035\000\071\000\034\000\ +\\072\000\033\000\073\000\032\000\074\000\031\000\075\000\030\000\ +\\076\000\029\000\077\000\028\000\080\000\027\000\081\000\026\000\ +\\082\000\025\000\083\000\024\000\084\000\023\000\089\000\021\000\000\000\ +\\001\000\007\000\019\001\056\000\036\000\057\000\035\000\071\000\034\000\ +\\072\000\033\000\073\000\032\000\074\000\031\000\075\000\030\000\ +\\076\000\029\000\077\000\028\000\080\000\027\000\081\000\026\000\ +\\082\000\025\000\083\000\024\000\084\000\023\000\089\000\021\000\000\000\ +\\001\000\007\000\107\001\000\000\ +\\001\000\007\000\141\001\000\000\ +\\001\000\007\000\142\001\000\000\ +\\001\000\007\000\143\001\000\000\ +\\001\000\007\000\151\001\000\000\ +\\001\000\009\000\116\001\011\000\178\000\000\000\ +\\001\000\026\000\158\000\000\000\ +\\001\000\026\000\220\000\000\000\ +\\001\000\026\000\054\001\089\000\053\001\000\000\ +\\001\000\026\000\065\001\089\000\064\001\000\000\ +\\001\000\026\000\093\001\000\000\ +\\001\000\052\000\040\000\053\000\039\000\054\000\038\000\055\000\037\000\ +\\056\000\036\000\057\000\035\000\071\000\034\000\072\000\033\000\ +\\073\000\032\000\074\000\031\000\075\000\030\000\076\000\029\000\ +\\077\000\028\000\080\000\027\000\081\000\026\000\082\000\025\000\ +\\083\000\024\000\084\000\023\000\086\000\022\000\088\000\092\001\ +\\089\000\021\000\000\000\ +\\001\000\052\000\040\000\053\000\039\000\054\000\038\000\055\000\037\000\ +\\056\000\036\000\057\000\035\000\071\000\034\000\072\000\033\000\ +\\073\000\032\000\074\000\031\000\075\000\030\000\076\000\029\000\ +\\077\000\028\000\080\000\027\000\081\000\026\000\082\000\025\000\ +\\083\000\024\000\084\000\023\000\086\000\022\000\089\000\021\000\000\000\ +\\001\000\056\000\036\000\057\000\035\000\071\000\034\000\072\000\033\000\ +\\073\000\032\000\074\000\031\000\075\000\030\000\076\000\029\000\ +\\077\000\028\000\080\000\027\000\081\000\026\000\082\000\025\000\ +\\083\000\024\000\084\000\023\000\089\000\021\000\000\000\ +\\001\000\066\000\077\001\000\000\ +\\001\000\071\000\034\000\072\000\033\000\073\000\032\000\074\000\031\000\ +\\075\000\030\000\076\000\029\000\077\000\028\000\080\000\027\000\ +\\081\000\026\000\082\000\025\000\083\000\024\000\084\000\023\000\ +\\089\000\021\000\000\000\ +\\167\001\003\000\044\000\004\000\043\000\022\000\042\000\026\000\041\000\ +\\052\000\040\000\053\000\039\000\054\000\038\000\055\000\037\000\ +\\056\000\036\000\057\000\035\000\071\000\034\000\072\000\033\000\ +\\073\000\032\000\074\000\031\000\075\000\030\000\076\000\029\000\ +\\077\000\028\000\080\000\027\000\081\000\026\000\082\000\025\000\ +\\083\000\024\000\084\000\023\000\086\000\022\000\089\000\021\000\000\000\ +\\168\001\000\000\ +\\169\001\000\000\ +\\170\001\000\000\ +\\171\001\000\000\ +\\172\001\000\000\ +\\173\001\000\000\ +\\174\001\000\000\ +\\175\001\000\000\ +\\176\001\000\000\ +\\177\001\000\000\ +\\178\001\000\000\ +\\179\001\000\000\ +\\180\001\000\000\ +\\181\001\000\000\ +\\182\001\000\000\ +\\183\001\000\000\ +\\184\001\000\000\ +\\185\001\060\000\160\001\000\000\ +\\186\001\000\000\ +\\187\001\000\000\ +\\188\001\000\000\ +\\189\001\000\000\ +\\190\001\004\000\070\000\022\000\042\000\026\000\041\000\089\000\069\000\000\000\ +\\191\001\011\000\141\000\000\000\ +\\192\001\011\000\136\000\000\000\ +\\193\001\003\000\127\000\004\000\126\000\006\000\054\000\014\000\125\000\ +\\016\000\124\000\018\000\123\000\019\000\122\000\021\000\121\000\ +\\022\000\120\000\023\000\119\000\024\000\118\000\026\000\117\000\ +\\048\000\116\000\049\000\115\000\050\000\114\000\051\000\113\000\ +\\058\000\112\000\061\000\111\000\062\000\110\000\063\000\109\000\ +\\064\000\108\000\065\000\107\000\066\000\106\000\067\000\105\000\ +\\068\000\104\000\069\000\103\000\070\000\102\000\085\000\101\000\000\000\ +\\194\001\003\000\127\000\004\000\126\000\006\000\054\000\014\000\125\000\ +\\016\000\124\000\018\000\123\000\019\000\122\000\021\000\121\000\ +\\022\000\120\000\023\000\119\000\024\000\118\000\026\000\117\000\ +\\048\000\116\000\049\000\115\000\050\000\114\000\051\000\113\000\ +\\052\000\040\000\053\000\039\000\054\000\038\000\055\000\037\000\ +\\056\000\036\000\057\000\035\000\058\000\112\000\061\000\111\000\ +\\062\000\110\000\063\000\109\000\064\000\108\000\065\000\107\000\ +\\066\000\106\000\067\000\105\000\068\000\104\000\069\000\103\000\ +\\070\000\102\000\071\000\034\000\072\000\033\000\073\000\032\000\ +\\074\000\031\000\075\000\030\000\076\000\029\000\077\000\028\000\ +\\080\000\027\000\081\000\026\000\082\000\025\000\083\000\024\000\ +\\084\000\023\000\085\000\101\000\086\000\022\000\089\000\021\000\000\000\ +\\195\001\000\000\ +\\196\001\000\000\ +\\197\001\000\000\ +\\198\001\000\000\ +\\199\001\000\000\ +\\200\001\000\000\ +\\201\001\000\000\ +\\202\001\000\000\ +\\203\001\000\000\ +\\204\001\000\000\ +\\205\001\004\000\215\000\008\000\214\000\010\000\213\000\012\000\212\000\ +\\013\000\211\000\014\000\210\000\015\000\209\000\017\000\208\000\ +\\018\000\207\000\019\000\206\000\020\000\205\000\022\000\204\000\ +\\023\000\203\000\024\000\202\000\025\000\201\000\038\000\189\000\ +\\039\000\188\000\040\000\187\000\041\000\186\000\042\000\185\000\ +\\043\000\184\000\044\000\183\000\045\000\182\000\046\000\181\000\ +\\047\000\180\000\000\000\ +\\206\001\004\000\215\000\008\000\214\000\010\000\213\000\012\000\212\000\ +\\013\000\211\000\014\000\210\000\015\000\209\000\017\000\208\000\ +\\018\000\207\000\019\000\206\000\020\000\205\000\022\000\204\000\ +\\023\000\203\000\024\000\202\000\025\000\201\000\027\000\200\000\ +\\028\000\199\000\029\000\198\000\030\000\197\000\031\000\196\000\ +\\032\000\195\000\033\000\194\000\034\000\193\000\035\000\192\000\ +\\036\000\191\000\037\000\190\000\038\000\189\000\039\000\188\000\ +\\040\000\187\000\041\000\186\000\042\000\185\000\043\000\184\000\ +\\044\000\183\000\045\000\182\000\046\000\181\000\047\000\180\000\000\000\ +\\207\001\004\000\215\000\008\000\214\000\010\000\213\000\012\000\212\000\ +\\013\000\211\000\014\000\210\000\015\000\209\000\017\000\208\000\ +\\018\000\207\000\019\000\206\000\020\000\205\000\022\000\204\000\ +\\023\000\203\000\024\000\202\000\025\000\201\000\027\000\200\000\ +\\028\000\199\000\029\000\198\000\030\000\197\000\031\000\196\000\ +\\032\000\195\000\033\000\194\000\034\000\193\000\035\000\192\000\ +\\036\000\191\000\037\000\190\000\038\000\189\000\039\000\188\000\ +\\040\000\187\000\041\000\186\000\042\000\185\000\043\000\184\000\ +\\044\000\183\000\045\000\182\000\046\000\181\000\047\000\180\000\000\000\ +\\208\001\004\000\215\000\008\000\214\000\010\000\213\000\012\000\212\000\ +\\013\000\211\000\014\000\210\000\015\000\209\000\017\000\208\000\ +\\018\000\207\000\019\000\206\000\020\000\205\000\022\000\204\000\ +\\023\000\203\000\024\000\202\000\025\000\201\000\027\000\200\000\ +\\028\000\199\000\029\000\198\000\030\000\197\000\031\000\196\000\ +\\032\000\195\000\033\000\194\000\034\000\193\000\035\000\192\000\ +\\036\000\191\000\037\000\190\000\038\000\189\000\039\000\188\000\ +\\040\000\187\000\041\000\186\000\042\000\185\000\043\000\184\000\ +\\044\000\183\000\045\000\182\000\046\000\181\000\047\000\180\000\000\000\ +\\209\001\004\000\215\000\008\000\214\000\010\000\213\000\012\000\212\000\ +\\013\000\211\000\014\000\210\000\015\000\209\000\017\000\208\000\ +\\018\000\207\000\019\000\206\000\020\000\205\000\022\000\204\000\ +\\023\000\203\000\024\000\202\000\025\000\201\000\027\000\200\000\ +\\028\000\199\000\029\000\198\000\030\000\197\000\031\000\196\000\ +\\032\000\195\000\033\000\194\000\034\000\193\000\035\000\192\000\ +\\036\000\191\000\037\000\190\000\038\000\189\000\039\000\188\000\ +\\040\000\187\000\041\000\186\000\042\000\185\000\043\000\184\000\ +\\044\000\183\000\045\000\182\000\046\000\181\000\047\000\180\000\000\000\ +\\210\001\004\000\215\000\008\000\214\000\010\000\213\000\012\000\212\000\ +\\013\000\211\000\014\000\210\000\015\000\209\000\017\000\208\000\ +\\018\000\207\000\019\000\206\000\020\000\205\000\022\000\204\000\ +\\023\000\203\000\024\000\202\000\025\000\201\000\027\000\200\000\ +\\028\000\199\000\029\000\198\000\030\000\197\000\031\000\196\000\ +\\032\000\195\000\033\000\194\000\034\000\193\000\035\000\192\000\ +\\036\000\191\000\037\000\190\000\038\000\189\000\039\000\188\000\ +\\040\000\187\000\041\000\186\000\042\000\185\000\043\000\184\000\ +\\044\000\183\000\045\000\182\000\046\000\181\000\047\000\180\000\000\000\ +\\211\001\004\000\215\000\008\000\214\000\010\000\213\000\012\000\212\000\ +\\013\000\211\000\014\000\210\000\015\000\209\000\017\000\208\000\ +\\018\000\207\000\019\000\206\000\020\000\205\000\022\000\204\000\ +\\023\000\203\000\024\000\202\000\025\000\201\000\027\000\200\000\ +\\028\000\199\000\029\000\198\000\030\000\197\000\031\000\196\000\ +\\032\000\195\000\033\000\194\000\034\000\193\000\035\000\192\000\ +\\036\000\191\000\037\000\190\000\038\000\189\000\039\000\188\000\ +\\040\000\187\000\041\000\186\000\042\000\185\000\043\000\184\000\ +\\044\000\183\000\045\000\182\000\046\000\181\000\047\000\180\000\000\000\ +\\212\001\004\000\215\000\008\000\214\000\010\000\213\000\012\000\212\000\ +\\013\000\211\000\014\000\210\000\015\000\209\000\017\000\208\000\ +\\018\000\207\000\019\000\206\000\020\000\205\000\022\000\204\000\ +\\023\000\203\000\024\000\202\000\025\000\201\000\027\000\200\000\ +\\028\000\199\000\029\000\198\000\030\000\197\000\031\000\196\000\ +\\032\000\195\000\033\000\194\000\034\000\193\000\035\000\192\000\ +\\036\000\191\000\037\000\190\000\038\000\189\000\039\000\188\000\ +\\040\000\187\000\041\000\186\000\042\000\185\000\043\000\184\000\ +\\044\000\183\000\045\000\182\000\046\000\181\000\047\000\180\000\000\000\ +\\213\001\004\000\215\000\008\000\214\000\010\000\213\000\012\000\212\000\ +\\013\000\211\000\014\000\210\000\015\000\209\000\017\000\208\000\ +\\018\000\207\000\019\000\206\000\020\000\205\000\022\000\204\000\ +\\023\000\203\000\024\000\202\000\025\000\201\000\027\000\200\000\ +\\028\000\199\000\029\000\198\000\030\000\197\000\031\000\196\000\ +\\032\000\195\000\033\000\194\000\034\000\193\000\035\000\192\000\ +\\036\000\191\000\037\000\190\000\038\000\189\000\039\000\188\000\ +\\040\000\187\000\041\000\186\000\042\000\185\000\043\000\184\000\ +\\044\000\183\000\045\000\182\000\046\000\181\000\047\000\180\000\000\000\ +\\214\001\004\000\215\000\008\000\214\000\010\000\213\000\012\000\212\000\ +\\013\000\211\000\014\000\210\000\015\000\209\000\017\000\208\000\ +\\018\000\207\000\019\000\206\000\020\000\205\000\022\000\204\000\ +\\023\000\203\000\024\000\202\000\025\000\201\000\027\000\200\000\ +\\028\000\199\000\029\000\198\000\030\000\197\000\031\000\196\000\ +\\032\000\195\000\033\000\194\000\034\000\193\000\035\000\192\000\ +\\036\000\191\000\037\000\190\000\038\000\189\000\039\000\188\000\ +\\040\000\187\000\041\000\186\000\042\000\185\000\043\000\184\000\ +\\044\000\183\000\045\000\182\000\046\000\181\000\047\000\180\000\000\000\ +\\215\001\004\000\215\000\008\000\214\000\010\000\213\000\012\000\212\000\ +\\013\000\211\000\014\000\210\000\015\000\209\000\017\000\208\000\ +\\018\000\207\000\019\000\206\000\020\000\205\000\022\000\204\000\ +\\023\000\203\000\024\000\202\000\025\000\201\000\027\000\200\000\ +\\028\000\199\000\029\000\198\000\030\000\197\000\031\000\196\000\ +\\032\000\195\000\033\000\194\000\034\000\193\000\035\000\192\000\ +\\036\000\191\000\037\000\190\000\038\000\189\000\039\000\188\000\ +\\040\000\187\000\041\000\186\000\042\000\185\000\043\000\184\000\ +\\044\000\183\000\045\000\182\000\046\000\181\000\047\000\180\000\000\000\ +\\216\001\004\000\215\000\008\000\214\000\010\000\213\000\012\000\212\000\ +\\013\000\211\000\014\000\210\000\015\000\209\000\017\000\208\000\ +\\018\000\207\000\019\000\206\000\020\000\205\000\022\000\204\000\ +\\023\000\203\000\024\000\202\000\025\000\201\000\027\000\200\000\ +\\028\000\199\000\029\000\198\000\030\000\197\000\031\000\196\000\ +\\032\000\195\000\033\000\194\000\034\000\193\000\035\000\192\000\ +\\036\000\191\000\037\000\190\000\038\000\189\000\039\000\188\000\ +\\040\000\187\000\041\000\186\000\042\000\185\000\043\000\184\000\ +\\044\000\183\000\045\000\182\000\046\000\181\000\047\000\180\000\000\000\ +\\217\001\004\000\215\000\008\000\214\000\010\000\213\000\013\000\211\000\ +\\014\000\210\000\015\000\209\000\017\000\208\000\018\000\207\000\ +\\019\000\206\000\020\000\205\000\022\000\204\000\023\000\203\000\ +\\024\000\202\000\025\000\201\000\038\000\189\000\039\000\188\000\ +\\040\000\187\000\041\000\186\000\042\000\185\000\043\000\184\000\ +\\045\000\182\000\046\000\181\000\047\000\180\000\000\000\ +\\218\001\004\000\215\000\008\000\214\000\010\000\213\000\013\000\211\000\ +\\014\000\210\000\015\000\209\000\017\000\208\000\018\000\207\000\ +\\019\000\206\000\020\000\205\000\022\000\204\000\023\000\203\000\ +\\024\000\202\000\025\000\201\000\038\000\189\000\039\000\188\000\ +\\040\000\187\000\041\000\186\000\042\000\185\000\043\000\184\000\ +\\046\000\181\000\047\000\180\000\000\000\ +\\219\001\004\000\215\000\008\000\214\000\010\000\213\000\013\000\211\000\ +\\014\000\210\000\017\000\208\000\018\000\207\000\019\000\206\000\ +\\020\000\205\000\022\000\204\000\023\000\203\000\024\000\202\000\ +\\025\000\201\000\038\000\189\000\039\000\188\000\040\000\187\000\ +\\041\000\186\000\042\000\185\000\043\000\184\000\046\000\181\000\ +\\047\000\180\000\000\000\ +\\220\001\004\000\215\000\008\000\214\000\010\000\213\000\013\000\211\000\ +\\014\000\210\000\017\000\208\000\018\000\207\000\019\000\206\000\ +\\022\000\204\000\023\000\203\000\024\000\202\000\025\000\201\000\ +\\038\000\189\000\039\000\188\000\040\000\187\000\041\000\186\000\ +\\042\000\185\000\043\000\184\000\046\000\181\000\047\000\180\000\000\000\ +\\221\001\004\000\215\000\008\000\214\000\010\000\213\000\013\000\211\000\ +\\017\000\208\000\018\000\207\000\019\000\206\000\022\000\204\000\ +\\023\000\203\000\024\000\202\000\025\000\201\000\038\000\189\000\ +\\039\000\188\000\040\000\187\000\041\000\186\000\042\000\185\000\ +\\043\000\184\000\046\000\181\000\047\000\180\000\000\000\ +\\222\001\004\000\215\000\008\000\214\000\010\000\213\000\013\000\211\000\ +\\017\000\208\000\018\000\207\000\019\000\206\000\022\000\204\000\ +\\023\000\203\000\024\000\202\000\025\000\201\000\038\000\189\000\ +\\039\000\188\000\040\000\187\000\041\000\186\000\046\000\181\000\ +\\047\000\180\000\000\000\ +\\223\001\004\000\215\000\008\000\214\000\010\000\213\000\013\000\211\000\ +\\017\000\208\000\018\000\207\000\019\000\206\000\022\000\204\000\ +\\023\000\203\000\024\000\202\000\025\000\201\000\038\000\189\000\ +\\039\000\188\000\040\000\187\000\041\000\186\000\046\000\181\000\ +\\047\000\180\000\000\000\ +\\224\001\004\000\215\000\008\000\214\000\010\000\213\000\013\000\211\000\ +\\017\000\208\000\018\000\207\000\019\000\206\000\022\000\204\000\ +\\023\000\203\000\024\000\202\000\025\000\201\000\046\000\181\000\ +\\047\000\180\000\000\000\ +\\225\001\004\000\215\000\008\000\214\000\010\000\213\000\013\000\211\000\ +\\017\000\208\000\018\000\207\000\019\000\206\000\022\000\204\000\ +\\023\000\203\000\024\000\202\000\025\000\201\000\046\000\181\000\ +\\047\000\180\000\000\000\ +\\226\001\004\000\215\000\008\000\214\000\010\000\213\000\013\000\211\000\ +\\017\000\208\000\018\000\207\000\019\000\206\000\022\000\204\000\ +\\023\000\203\000\024\000\202\000\025\000\201\000\046\000\181\000\ +\\047\000\180\000\000\000\ +\\227\001\004\000\215\000\008\000\214\000\010\000\213\000\013\000\211\000\ +\\017\000\208\000\018\000\207\000\019\000\206\000\022\000\204\000\ +\\023\000\203\000\024\000\202\000\025\000\201\000\046\000\181\000\ +\\047\000\180\000\000\000\ +\\228\001\004\000\215\000\008\000\214\000\010\000\213\000\013\000\211\000\ +\\017\000\208\000\018\000\207\000\019\000\206\000\022\000\204\000\ +\\023\000\203\000\024\000\202\000\025\000\201\000\000\000\ +\\229\001\004\000\215\000\008\000\214\000\010\000\213\000\013\000\211\000\ +\\017\000\208\000\018\000\207\000\019\000\206\000\022\000\204\000\ +\\023\000\203\000\024\000\202\000\025\000\201\000\000\000\ +\\230\001\004\000\215\000\008\000\214\000\010\000\213\000\013\000\211\000\ +\\017\000\208\000\022\000\204\000\023\000\203\000\024\000\202\000\ +\\025\000\201\000\000\000\ +\\231\001\004\000\215\000\008\000\214\000\010\000\213\000\013\000\211\000\ +\\017\000\208\000\022\000\204\000\023\000\203\000\024\000\202\000\ +\\025\000\201\000\000\000\ +\\232\001\004\000\215\000\008\000\214\000\010\000\213\000\023\000\203\000\ +\\024\000\202\000\025\000\201\000\000\000\ +\\233\001\004\000\215\000\008\000\214\000\010\000\213\000\023\000\203\000\ +\\024\000\202\000\025\000\201\000\000\000\ +\\234\001\004\000\215\000\008\000\214\000\010\000\213\000\023\000\203\000\ +\\024\000\202\000\025\000\201\000\000\000\ +\\235\001\000\000\ +\\236\001\000\000\ +\\237\001\004\000\215\000\008\000\214\000\010\000\213\000\023\000\203\000\ +\\024\000\202\000\025\000\201\000\000\000\ +\\238\001\004\000\215\000\008\000\214\000\010\000\213\000\023\000\203\000\ +\\024\000\202\000\025\000\201\000\000\000\ +\\239\001\004\000\215\000\008\000\214\000\010\000\213\000\023\000\203\000\ +\\024\000\202\000\025\000\201\000\000\000\ +\\240\001\004\000\215\000\008\000\214\000\010\000\213\000\023\000\203\000\ +\\024\000\202\000\025\000\201\000\000\000\ +\\241\001\004\000\215\000\008\000\214\000\010\000\213\000\023\000\203\000\ +\\024\000\202\000\025\000\201\000\000\000\ +\\242\001\004\000\126\000\016\000\124\000\021\000\121\000\023\000\119\000\ +\\024\000\118\000\026\000\144\000\048\000\116\000\049\000\115\000\ +\\050\000\114\000\051\000\113\000\085\000\101\000\000\000\ +\\243\001\000\000\ +\\244\001\000\000\ +\\245\001\000\000\ +\\246\001\000\000\ +\\247\001\000\000\ +\\248\001\000\000\ +\\249\001\000\000\ +\\250\001\000\000\ +\\251\001\000\000\ +\\252\001\000\000\ +\\253\001\000\000\ +\\254\001\000\000\ +\\254\001\002\000\233\000\000\000\ +\\255\001\000\000\ +\\000\002\050\000\114\000\000\000\ +\\001\002\000\000\ +\\002\002\004\000\215\000\008\000\214\000\010\000\213\000\012\000\212\000\ +\\013\000\211\000\014\000\210\000\015\000\209\000\017\000\208\000\ +\\018\000\207\000\019\000\206\000\020\000\205\000\022\000\204\000\ +\\023\000\203\000\024\000\202\000\025\000\201\000\027\000\200\000\ +\\028\000\199\000\029\000\198\000\030\000\197\000\031\000\196\000\ +\\032\000\195\000\033\000\194\000\034\000\193\000\035\000\192\000\ +\\036\000\191\000\037\000\190\000\038\000\189\000\039\000\188\000\ +\\040\000\187\000\041\000\186\000\042\000\185\000\043\000\184\000\ +\\044\000\183\000\045\000\182\000\046\000\181\000\047\000\180\000\000\000\ +\\003\002\004\000\215\000\008\000\214\000\010\000\213\000\012\000\212\000\ +\\013\000\211\000\014\000\210\000\015\000\209\000\017\000\208\000\ +\\018\000\207\000\019\000\206\000\020\000\205\000\022\000\204\000\ +\\023\000\203\000\024\000\202\000\025\000\201\000\027\000\200\000\ +\\028\000\199\000\029\000\198\000\030\000\197\000\031\000\196\000\ +\\032\000\195\000\033\000\194\000\034\000\193\000\035\000\192\000\ +\\036\000\191\000\037\000\190\000\038\000\189\000\039\000\188\000\ +\\040\000\187\000\041\000\186\000\042\000\185\000\043\000\184\000\ +\\044\000\183\000\045\000\182\000\046\000\181\000\047\000\180\000\000\000\ +\\004\002\004\000\126\000\014\000\125\000\016\000\124\000\018\000\123\000\ +\\019\000\122\000\021\000\121\000\022\000\120\000\023\000\119\000\ +\\024\000\118\000\026\000\144\000\048\000\116\000\049\000\115\000\ +\\050\000\114\000\051\000\113\000\085\000\101\000\000\000\ +\\005\002\011\000\178\000\000\000\ +\\006\002\004\000\215\000\008\000\214\000\010\000\213\000\012\000\212\000\ +\\013\000\211\000\014\000\210\000\015\000\209\000\017\000\208\000\ +\\018\000\207\000\019\000\206\000\020\000\205\000\022\000\204\000\ +\\023\000\203\000\024\000\202\000\025\000\201\000\027\000\200\000\ +\\028\000\199\000\029\000\198\000\030\000\197\000\031\000\196\000\ +\\032\000\195\000\033\000\194\000\034\000\193\000\035\000\192\000\ +\\036\000\191\000\037\000\190\000\038\000\189\000\039\000\188\000\ +\\040\000\187\000\041\000\186\000\042\000\185\000\043\000\184\000\ +\\044\000\183\000\045\000\182\000\046\000\181\000\047\000\180\000\000\000\ +\\007\002\004\000\215\000\008\000\214\000\010\000\213\000\012\000\212\000\ +\\013\000\211\000\014\000\210\000\015\000\209\000\017\000\208\000\ +\\018\000\207\000\019\000\206\000\020\000\205\000\022\000\204\000\ +\\023\000\203\000\024\000\202\000\025\000\201\000\027\000\200\000\ +\\028\000\199\000\029\000\198\000\030\000\197\000\031\000\196\000\ +\\032\000\195\000\033\000\194\000\034\000\193\000\035\000\192\000\ +\\036\000\191\000\037\000\190\000\038\000\189\000\039\000\188\000\ +\\040\000\187\000\041\000\186\000\042\000\185\000\043\000\184\000\ +\\044\000\183\000\045\000\182\000\046\000\181\000\047\000\180\000\000\000\ +\\008\002\004\000\083\001\008\000\005\001\022\000\042\000\000\000\ +\\009\002\000\000\ +\\010\002\052\000\040\000\053\000\039\000\054\000\038\000\055\000\037\000\ +\\056\000\036\000\057\000\035\000\071\000\034\000\072\000\033\000\ +\\073\000\032\000\074\000\031\000\075\000\030\000\076\000\029\000\ +\\077\000\028\000\080\000\027\000\081\000\026\000\082\000\025\000\ +\\083\000\024\000\084\000\023\000\086\000\022\000\000\000\ +\\011\002\052\000\040\000\053\000\039\000\054\000\038\000\055\000\037\000\ +\\056\000\036\000\057\000\035\000\071\000\034\000\072\000\033\000\ +\\073\000\032\000\074\000\031\000\075\000\030\000\076\000\029\000\ +\\077\000\028\000\080\000\027\000\081\000\026\000\082\000\025\000\ +\\083\000\024\000\084\000\023\000\086\000\022\000\000\000\ +\\012\002\000\000\ +\\013\002\000\000\ +\\014\002\000\000\ +\\015\002\000\000\ +\\016\002\000\000\ +\\017\002\000\000\ +\\018\002\000\000\ +\\019\002\000\000\ +\\020\002\000\000\ +\\021\002\000\000\ +\\022\002\000\000\ +\\023\002\056\000\036\000\057\000\035\000\071\000\034\000\072\000\033\000\ +\\073\000\032\000\074\000\031\000\075\000\030\000\076\000\029\000\ +\\077\000\028\000\080\000\027\000\081\000\026\000\082\000\025\000\ +\\083\000\024\000\084\000\023\000\000\000\ +\\024\002\056\000\036\000\057\000\035\000\071\000\034\000\072\000\033\000\ +\\073\000\032\000\074\000\031\000\075\000\030\000\076\000\029\000\ +\\077\000\028\000\080\000\027\000\081\000\026\000\082\000\025\000\ +\\083\000\024\000\084\000\023\000\000\000\ +\\025\002\000\000\ +\\026\002\000\000\ +\\027\002\056\000\036\000\057\000\035\000\000\000\ +\\028\002\000\000\ +\\029\002\000\000\ +\\030\002\000\000\ +\\031\002\000\000\ +\\032\002\000\000\ +\\033\002\000\000\ +\\034\002\000\000\ +\\035\002\000\000\ +\\036\002\000\000\ +\\037\002\000\000\ +\\038\002\000\000\ +\\039\002\000\000\ +\\040\002\000\000\ +\\041\002\000\000\ +\\042\002\000\000\ +\\043\002\000\000\ +\\044\002\000\000\ +\\045\002\006\000\082\000\000\000\ +\\046\002\006\000\081\000\000\000\ +\\047\002\000\000\ +\\048\002\000\000\ +\\049\002\000\000\ +\\050\002\000\000\ +\\051\002\000\000\ +\\052\002\000\000\ +\\053\002\000\000\ +\\054\002\000\000\ +\\055\002\000\000\ +\\056\002\000\000\ +\\057\002\002\000\022\001\000\000\ +\\058\002\004\000\215\000\008\000\214\000\010\000\213\000\012\000\212\000\ +\\013\000\211\000\014\000\210\000\015\000\209\000\017\000\208\000\ +\\018\000\207\000\019\000\206\000\020\000\205\000\022\000\204\000\ +\\023\000\203\000\024\000\202\000\025\000\201\000\027\000\200\000\ +\\028\000\199\000\029\000\198\000\030\000\197\000\031\000\196\000\ +\\032\000\195\000\033\000\194\000\034\000\193\000\035\000\192\000\ +\\036\000\191\000\037\000\190\000\038\000\189\000\039\000\188\000\ +\\040\000\187\000\041\000\186\000\042\000\185\000\043\000\184\000\ +\\044\000\183\000\045\000\182\000\046\000\181\000\047\000\180\000\000\000\ +\\059\002\004\000\215\000\008\000\214\000\010\000\213\000\012\000\212\000\ +\\013\000\211\000\014\000\210\000\015\000\209\000\017\000\208\000\ +\\018\000\207\000\019\000\206\000\020\000\205\000\022\000\204\000\ +\\023\000\203\000\024\000\202\000\025\000\201\000\027\000\200\000\ +\\028\000\199\000\029\000\198\000\030\000\197\000\031\000\196\000\ +\\032\000\195\000\033\000\194\000\034\000\193\000\035\000\192\000\ +\\036\000\191\000\037\000\190\000\038\000\189\000\039\000\188\000\ +\\040\000\187\000\041\000\186\000\042\000\185\000\043\000\184\000\ +\\044\000\183\000\045\000\182\000\046\000\181\000\047\000\180\000\000\000\ +\\060\002\002\000\027\001\000\000\ +\\061\002\004\000\215\000\008\000\214\000\010\000\213\000\012\000\212\000\ +\\013\000\211\000\014\000\210\000\015\000\209\000\017\000\208\000\ +\\018\000\207\000\019\000\206\000\020\000\205\000\022\000\204\000\ +\\023\000\203\000\024\000\202\000\025\000\201\000\027\000\200\000\ +\\028\000\199\000\029\000\198\000\030\000\197\000\031\000\196\000\ +\\032\000\195\000\033\000\194\000\034\000\193\000\035\000\192\000\ +\\036\000\191\000\037\000\190\000\038\000\189\000\039\000\188\000\ +\\040\000\187\000\041\000\186\000\042\000\185\000\043\000\184\000\ +\\044\000\183\000\045\000\182\000\046\000\181\000\047\000\180\000\000\000\ +\\062\002\004\000\215\000\008\000\214\000\010\000\213\000\012\000\212\000\ +\\013\000\211\000\014\000\210\000\015\000\209\000\017\000\208\000\ +\\018\000\207\000\019\000\206\000\020\000\205\000\022\000\204\000\ +\\023\000\203\000\024\000\202\000\025\000\201\000\027\000\200\000\ +\\028\000\199\000\029\000\198\000\030\000\197\000\031\000\196\000\ +\\032\000\195\000\033\000\194\000\034\000\193\000\035\000\192\000\ +\\036\000\191\000\037\000\190\000\038\000\189\000\039\000\188\000\ +\\040\000\187\000\041\000\186\000\042\000\185\000\043\000\184\000\ +\\044\000\183\000\045\000\182\000\046\000\181\000\047\000\180\000\000\000\ +\\063\002\000\000\ +\\064\002\000\000\ +\\065\002\000\000\ +\\066\002\000\000\ +\\067\002\000\000\ +\\068\002\006\000\155\000\000\000\ +\\069\002\006\000\154\000\000\000\ +\\070\002\000\000\ +\\071\002\000\000\ +\\072\002\027\000\017\001\000\000\ +\\073\002\004\000\215\000\008\000\214\000\010\000\213\000\012\000\212\000\ +\\013\000\211\000\014\000\210\000\015\000\209\000\017\000\208\000\ +\\018\000\207\000\019\000\206\000\020\000\205\000\022\000\204\000\ +\\023\000\203\000\024\000\202\000\025\000\201\000\027\000\200\000\ +\\028\000\199\000\029\000\198\000\030\000\197\000\031\000\196\000\ +\\032\000\195\000\033\000\194\000\034\000\193\000\035\000\192\000\ +\\036\000\191\000\037\000\190\000\038\000\189\000\039\000\188\000\ +\\040\000\187\000\041\000\186\000\042\000\185\000\043\000\184\000\ +\\044\000\183\000\045\000\182\000\046\000\181\000\047\000\180\000\000\000\ +\\074\002\000\000\ +\\075\002\000\000\ +\\076\002\000\000\ +\\077\002\000\000\ +\\078\002\000\000\ +\\079\002\011\000\016\001\000\000\ +\\079\002\011\000\128\001\000\000\ +\\080\002\004\000\126\000\006\000\242\000\014\000\125\000\016\000\124\000\ +\\018\000\123\000\019\000\122\000\021\000\121\000\022\000\120\000\ +\\023\000\119\000\024\000\118\000\026\000\144\000\048\000\116\000\ +\\049\000\115\000\050\000\114\000\051\000\113\000\085\000\101\000\000\000\ +\\080\002\026\000\158\000\000\000\ +\\081\002\000\000\ +\\082\002\000\000\ +\\083\002\027\000\139\000\000\000\ +\\084\002\000\000\ +\\085\002\000\000\ +\\086\002\000\000\ +\\087\002\027\000\135\000\000\000\ +\\088\002\000\000\ +\\089\002\000\000\ +\\090\002\000\000\ +\\091\002\004\000\138\000\008\000\137\000\000\000\ +\\092\002\004\000\138\000\008\000\137\000\000\000\ +\\093\002\004\000\072\000\008\000\071\000\000\000\ +\\094\002\004\000\072\000\008\000\071\000\000\000\ +\\095\002\004\000\103\001\008\000\102\001\000\000\ +\\096\002\004\000\103\001\008\000\102\001\000\000\ +\\097\002\022\000\042\000\056\000\036\000\057\000\035\000\000\000\ +\\098\002\022\000\042\000\000\000\ +\\099\002\000\000\ +\\100\002\000\000\ +\\101\002\000\000\ +\\102\002\000\000\ +\\103\002\000\000\ +\\104\002\000\000\ +\\105\002\000\000\ +\\106\002\000\000\ +\\107\002\000\000\ +\\108\002\000\000\ +\\109\002\000\000\ +\\110\002\000\000\ +\\111\002\000\000\ +\\112\002\000\000\ +\\113\002\000\000\ +\\114\002\000\000\ +\\115\002\000\000\ +\\116\002\000\000\ +\\117\002\000\000\ +\\118\002\000\000\ +\\119\002\000\000\ +\\120\002\000\000\ +\\121\002\004\000\215\000\008\000\214\000\010\000\213\000\012\000\212\000\ +\\013\000\211\000\014\000\210\000\015\000\209\000\017\000\208\000\ +\\018\000\207\000\019\000\206\000\020\000\205\000\022\000\204\000\ +\\023\000\203\000\024\000\202\000\025\000\201\000\027\000\200\000\ +\\028\000\199\000\029\000\198\000\030\000\197\000\031\000\196\000\ +\\032\000\195\000\033\000\194\000\034\000\193\000\035\000\192\000\ +\\036\000\191\000\037\000\190\000\038\000\189\000\039\000\188\000\ +\\040\000\187\000\041\000\186\000\042\000\185\000\043\000\184\000\ +\\044\000\183\000\045\000\182\000\046\000\181\000\047\000\180\000\000\000\ +\\122\002\000\000\ +\\123\002\000\000\ +\\124\002\000\000\ +\\125\002\000\000\ +\\126\002\000\000\ +\\127\002\000\000\ +\\128\002\000\000\ +\\129\002\000\000\ +\\132\002\000\000\ +\\133\002\000\000\ +\\134\002\004\000\006\001\008\000\005\001\026\000\041\000\000\000\ +\\134\002\004\000\006\001\008\000\005\001\026\000\041\000\089\000\012\001\000\000\ +\\134\002\004\000\083\001\008\000\005\001\000\000\ +\\135\002\004\000\096\001\008\000\095\001\000\000\ +\\136\002\004\000\096\001\008\000\095\001\000\000\ +\\137\002\000\000\ +\\138\002\000\000\ +\\139\002\000\000\ +\\140\002\000\000\ +\\141\002\000\000\ +\\142\002\000\000\ +\\143\002\000\000\ +\\144\002\000\000\ +\\145\002\000\000\ +\\146\002\011\000\254\000\000\000\ +\\147\002\000\000\ +\\148\002\000\000\ +\\149\002\000\000\ +\\150\002\000\000\ +\\151\002\000\000\ +\\152\002\004\000\006\001\008\000\005\001\022\000\042\000\026\000\041\000\ +\\089\000\012\001\000\000\ +\\153\002\000\000\ +\\154\002\000\000\ +\\155\002\000\000\ +\\157\002\000\000\ +\" +val actionRowNumbers = +"\097\000\096\000\073\000\098\000\ +\\008\000\099\000\101\000\074\000\ +\\216\000\217\000\207\000\215\000\ +\\198\000\205\000\022\000\119\000\ +\\025\001\061\001\023\000\218\000\ +\\008\001\219\000\226\000\227\000\ +\\234\000\233\000\221\000\223\000\ +\\222\000\224\000\075\000\225\000\ +\\220\000\250\000\249\000\007\001\ +\\005\001\006\001\004\001\040\001\ +\\029\001\021\000\100\000\232\000\ +\\231\000\093\000\118\000\057\001\ +\\074\000\022\000\119\000\062\001\ +\\009\000\196\000\208\000\198\000\ +\\206\000\007\000\121\000\017\001\ +\\023\001\022\001\021\001\006\000\ +\\025\000\120\000\013\001\033\001\ +\\024\000\028\000\049\000\026\001\ +\\255\000\254\000\086\000\030\001\ +\\213\000\031\001\050\000\093\000\ +\\093\000\235\000\076\000\001\000\ +\\002\000\211\000\058\001\063\001\ +\\019\001\015\001\185\000\092\000\ +\\034\000\011\000\188\000\110\000\ +\\122\000\077\000\124\000\040\000\ +\\087\000\012\000\013\000\190\000\ +\\041\000\003\000\034\000\042\000\ +\\010\000\043\000\044\000\182\000\ +\\186\000\181\000\180\000\184\000\ +\\034\000\034\000\129\000\133\000\ +\\131\000\130\000\132\000\128\000\ +\\033\000\117\000\203\000\204\000\ +\\202\000\201\000\200\000\199\000\ +\\197\000\027\000\024\000\029\000\ +\\051\000\027\000\024\001\024\000\ +\\052\000\035\000\183\000\042\001\ +\\080\001\053\000\078\001\054\000\ +\\046\000\084\001\059\001\044\001\ +\\086\000\086\000\001\001\009\001\ +\\002\001\032\001\214\000\041\001\ +\\078\000\079\000\236\000\228\000\ +\\014\000\239\000\243\000\034\000\ +\\015\000\241\000\211\000\246\000\ +\\034\000\209\000\123\000\168\000\ +\\034\000\116\000\034\000\034\000\ +\\034\000\034\000\034\000\034\000\ +\\034\000\034\000\034\000\034\000\ +\\034\000\034\000\034\000\034\000\ +\\034\000\034\000\034\000\034\000\ +\\034\000\034\000\034\000\088\000\ +\\165\000\164\000\034\000\034\000\ +\\034\000\034\000\034\000\034\000\ +\\034\000\034\000\034\000\089\000\ +\\034\000\026\000\125\000\127\000\ +\\169\000\033\000\016\000\107\000\ +\\106\000\191\000\017\000\034\000\ +\\010\000\004\000\034\000\094\000\ +\\190\000\034\000\187\000\010\000\ +\\167\000\166\000\194\000\095\000\ +\\055\000\056\000\020\001\053\001\ +\\027\000\018\001\036\000\035\001\ +\\057\000\058\000\037\001\016\001\ +\\014\001\034\001\043\001\045\001\ +\\091\000\090\000\046\001\086\001\ +\\064\001\067\001\087\001\030\000\ +\\045\000\027\001\083\001\082\001\ +\\065\001\085\001\047\001\009\001\ +\\009\001\080\000\012\001\034\000\ +\\230\000\229\000\001\000\237\000\ +\\034\000\244\000\001\000\238\000\ +\\210\000\034\000\247\000\212\000\ +\\088\001\189\000\158\000\157\000\ +\\147\000\146\000\152\000\151\000\ +\\154\000\153\000\156\000\155\000\ +\\144\000\143\000\142\000\141\000\ +\\138\000\137\000\139\000\140\000\ +\\136\000\135\000\145\000\178\000\ +\\176\000\161\000\149\000\160\000\ +\\159\000\162\000\148\000\150\000\ +\\163\000\005\000\177\000\175\000\ +\\085\000\059\000\192\000\173\000\ +\\060\000\109\000\108\000\061\000\ +\\112\000\010\000\062\000\048\000\ +\\018\000\063\000\111\000\066\001\ +\\195\000\047\000\034\000\179\000\ +\\010\001\055\001\036\001\038\001\ +\\039\001\081\001\079\001\060\001\ +\\068\001\031\000\064\000\037\000\ +\\070\001\065\000\066\000\074\001\ +\\032\000\067\000\028\001\081\000\ +\\082\000\251\000\000\001\003\001\ +\\240\000\245\000\242\000\248\000\ +\\083\000\034\000\172\000\034\000\ +\\174\000\171\000\010\000\113\000\ +\\010\000\034\000\190\000\010\000\ +\\170\000\084\000\011\001\038\000\ +\\072\001\068\000\076\001\071\001\ +\\075\001\069\001\039\000\048\001\ +\\069\000\070\000\050\001\253\000\ +\\252\000\126\000\134\000\193\000\ +\\103\000\104\000\071\000\019\000\ +\\114\000\054\001\056\001\073\001\ +\\077\001\049\001\051\001\052\001\ +\\020\000\190\000\010\000\105\000\ +\\072\000\115\000\010\000\102\000\ +\\000\000" +val gotoT = +"\ +\\001\000\164\001\002\000\001\000\000\000\ +\\021\000\018\000\024\000\017\000\027\000\016\000\029\000\015\000\ +\\030\000\014\000\036\000\013\000\038\000\012\000\039\000\011\000\ +\\040\000\010\000\043\000\009\000\044\000\008\000\045\000\007\000\ +\\048\000\006\000\049\000\005\000\050\000\004\000\051\000\003\000\ +\\055\000\002\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\006\000\051\000\029\000\050\000\030\000\049\000\036\000\013\000\ +\\038\000\012\000\039\000\011\000\040\000\010\000\043\000\009\000\ +\\044\000\008\000\046\000\048\000\049\000\047\000\050\000\004\000\ +\\055\000\002\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\031\000\053\000\000\000\ +\\000\000\ +\\018\000\059\000\020\000\058\000\021\000\018\000\024\000\057\000\ +\\027\000\016\000\036\000\056\000\038\000\055\000\039\000\011\000\ +\\040\000\054\000\043\000\009\000\044\000\008\000\055\000\002\000\000\000\ +\\017\000\066\000\019\000\065\000\021\000\064\000\022\000\063\000\ +\\023\000\062\000\024\000\061\000\026\000\060\000\027\000\016\000\000\000\ +\\000\000\ +\\000\000\ +\\027\000\071\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\021\000\077\000\040\000\076\000\041\000\075\000\000\000\ +\\021\000\018\000\024\000\078\000\027\000\016\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\038\000\085\000\039\000\011\000\040\000\076\000\041\000\084\000\ +\\042\000\083\000\043\000\009\000\044\000\008\000\055\000\002\000\ +\\060\000\082\000\061\000\081\000\000\000\ +\\000\000\ +\\000\000\ +\\006\000\087\000\029\000\050\000\030\000\049\000\036\000\013\000\ +\\038\000\012\000\039\000\011\000\040\000\010\000\043\000\009\000\ +\\044\000\008\000\049\000\086\000\050\000\004\000\055\000\002\000\000\000\ +\\018\000\059\000\020\000\058\000\021\000\018\000\024\000\088\000\ +\\027\000\016\000\036\000\056\000\038\000\055\000\039\000\011\000\ +\\040\000\054\000\043\000\009\000\044\000\008\000\055\000\002\000\000\000\ +\\017\000\066\000\019\000\065\000\021\000\064\000\022\000\089\000\ +\\023\000\062\000\024\000\061\000\026\000\060\000\027\000\016\000\000\000\ +\\000\000\ +\\003\000\098\000\004\000\097\000\005\000\096\000\006\000\095\000\ +\\007\000\094\000\009\000\093\000\010\000\092\000\062\000\091\000\ +\\064\000\090\000\000\000\ +\\032\000\131\000\036\000\130\000\039\000\129\000\040\000\128\000\ +\\043\000\127\000\044\000\126\000\055\000\002\000\000\000\ +\\000\000\ +\\031\000\132\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\026\000\138\000\027\000\071\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\021\000\064\000\023\000\140\000\024\000\078\000\026\000\060\000\ +\\027\000\016\000\000\000\ +\\007\000\141\000\010\000\092\000\064\000\090\000\000\000\ +\\029\000\149\000\030\000\148\000\036\000\013\000\038\000\012\000\ +\\039\000\011\000\040\000\010\000\043\000\009\000\044\000\008\000\ +\\047\000\147\000\052\000\146\000\053\000\145\000\054\000\144\000\ +\\055\000\002\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\013\000\155\000\014\000\154\000\000\000\ +\\021\000\157\000\000\000\ +\\040\000\076\000\041\000\158\000\000\000\ +\\000\000\ +\\000\000\ +\\038\000\085\000\039\000\011\000\040\000\076\000\041\000\084\000\ +\\042\000\083\000\043\000\009\000\044\000\008\000\055\000\002\000\ +\\060\000\160\000\061\000\081\000\000\000\ +\\038\000\085\000\039\000\011\000\040\000\076\000\041\000\084\000\ +\\042\000\083\000\043\000\009\000\044\000\008\000\055\000\002\000\ +\\060\000\161\000\061\000\081\000\000\000\ +\\000\000\ +\\038\000\085\000\039\000\011\000\040\000\076\000\041\000\084\000\ +\\042\000\083\000\043\000\009\000\044\000\008\000\055\000\002\000\ +\\061\000\162\000\000\000\ +\\021\000\064\000\022\000\166\000\023\000\062\000\024\000\061\000\ +\\026\000\060\000\027\000\016\000\056\000\165\000\058\000\164\000\000\000\ +\\021\000\018\000\024\000\171\000\027\000\016\000\038\000\170\000\ +\\039\000\011\000\043\000\009\000\044\000\008\000\055\000\002\000\ +\\057\000\169\000\059\000\168\000\000\000\ +\\033\000\173\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\029\000\050\000\030\000\049\000\036\000\013\000\038\000\012\000\ +\\039\000\011\000\040\000\010\000\043\000\009\000\044\000\008\000\ +\\046\000\174\000\049\000\047\000\050\000\004\000\055\000\002\000\000\000\ +\\007\000\175\000\010\000\092\000\064\000\090\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\003\000\214\000\006\000\095\000\007\000\094\000\009\000\093\000\ +\\010\000\092\000\064\000\090\000\000\000\ +\\000\000\ +\\000\000\ +\\007\000\216\000\010\000\092\000\064\000\090\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\007\000\094\000\008\000\222\000\009\000\221\000\010\000\092\000\ +\\064\000\090\000\000\000\ +\\000\000\ +\\000\000\ +\\007\000\094\000\009\000\225\000\010\000\092\000\064\000\090\000\000\000\ +\\000\000\ +\\003\000\227\000\006\000\095\000\007\000\094\000\009\000\093\000\ +\\010\000\092\000\064\000\090\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\064\000\230\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\007\000\232\000\010\000\092\000\064\000\090\000\000\000\ +\\007\000\233\000\010\000\092\000\064\000\090\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\007\000\094\000\009\000\237\000\010\000\092\000\037\000\236\000\ +\\038\000\085\000\039\000\011\000\040\000\076\000\041\000\235\000\ +\\042\000\234\000\043\000\009\000\044\000\008\000\055\000\002\000\ +\\064\000\090\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\032\000\131\000\036\000\130\000\039\000\129\000\040\000\128\000\ +\\043\000\127\000\044\000\126\000\055\000\002\000\000\000\ +\\007\000\239\000\010\000\092\000\034\000\238\000\064\000\090\000\000\000\ +\\017\000\241\000\021\000\064\000\022\000\089\000\023\000\062\000\ +\\024\000\061\000\026\000\060\000\027\000\016\000\000\000\ +\\007\000\242\000\010\000\092\000\064\000\090\000\000\000\ +\\029\000\149\000\030\000\148\000\036\000\013\000\038\000\012\000\ +\\039\000\011\000\040\000\010\000\043\000\009\000\044\000\008\000\ +\\047\000\245\000\052\000\146\000\053\000\244\000\054\000\144\000\ +\\055\000\002\000\000\000\ +\\007\000\239\000\010\000\092\000\034\000\247\000\064\000\090\000\000\000\ +\\000\000\ +\\017\000\248\000\021\000\064\000\022\000\089\000\023\000\062\000\ +\\024\000\061\000\026\000\060\000\027\000\016\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\015\000\002\001\016\000\001\001\021\000\000\001\024\000\255\000\ +\\027\000\016\000\036\000\056\000\038\000\055\000\039\000\011\000\ +\\040\000\054\000\043\000\009\000\044\000\008\000\055\000\002\000\000\000\ +\\015\000\009\001\016\000\001\001\021\000\008\001\024\000\007\001\ +\\025\000\006\001\027\000\016\000\028\000\005\001\000\000\ +\\000\000\ +\\000\000\ +\\013\000\011\001\014\000\154\000\000\000\ +\\013\000\012\001\014\000\154\000\000\000\ +\\000\000\ +\\012\000\013\001\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\038\000\085\000\039\000\011\000\040\000\076\000\041\000\084\000\ +\\042\000\083\000\043\000\009\000\044\000\008\000\055\000\002\000\ +\\061\000\162\000\000\000\ +\\038\000\085\000\039\000\011\000\040\000\076\000\041\000\084\000\ +\\042\000\083\000\043\000\009\000\044\000\008\000\055\000\002\000\ +\\061\000\162\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\007\000\021\001\010\000\092\000\064\000\090\000\000\000\ +\\000\000\ +\\000\000\ +\\033\000\024\001\000\000\ +\\000\000\ +\\007\000\026\001\010\000\092\000\064\000\090\000\000\000\ +\\032\000\027\001\039\000\129\000\040\000\128\000\043\000\127\000\ +\\044\000\126\000\055\000\002\000\000\000\ +\\003\000\098\000\004\000\028\001\005\000\096\000\006\000\095\000\ +\\007\000\094\000\009\000\093\000\010\000\092\000\029\000\050\000\ +\\030\000\049\000\036\000\013\000\038\000\012\000\039\000\011\000\ +\\040\000\010\000\043\000\009\000\044\000\008\000\049\000\086\000\ +\\050\000\004\000\055\000\002\000\064\000\090\000\000\000\ +\\000\000\ +\\007\000\029\001\010\000\092\000\064\000\090\000\000\000\ +\\000\000\ +\\007\000\030\001\010\000\092\000\064\000\090\000\000\000\ +\\007\000\031\001\010\000\092\000\064\000\090\000\000\000\ +\\007\000\032\001\010\000\092\000\064\000\090\000\000\000\ +\\007\000\033\001\010\000\092\000\064\000\090\000\000\000\ +\\007\000\034\001\010\000\092\000\064\000\090\000\000\000\ +\\007\000\035\001\010\000\092\000\064\000\090\000\000\000\ +\\007\000\036\001\010\000\092\000\064\000\090\000\000\000\ +\\007\000\037\001\010\000\092\000\064\000\090\000\000\000\ +\\007\000\038\001\010\000\092\000\064\000\090\000\000\000\ +\\007\000\039\001\010\000\092\000\064\000\090\000\000\000\ +\\007\000\040\001\010\000\092\000\064\000\090\000\000\000\ +\\007\000\041\001\010\000\092\000\064\000\090\000\000\000\ +\\007\000\042\001\010\000\092\000\064\000\090\000\000\000\ +\\007\000\043\001\010\000\092\000\064\000\090\000\000\000\ +\\007\000\044\001\010\000\092\000\064\000\090\000\000\000\ +\\007\000\045\001\010\000\092\000\064\000\090\000\000\000\ +\\007\000\046\001\010\000\092\000\064\000\090\000\000\000\ +\\007\000\047\001\010\000\092\000\064\000\090\000\000\000\ +\\007\000\048\001\010\000\092\000\064\000\090\000\000\000\ +\\007\000\049\001\010\000\092\000\064\000\090\000\000\000\ +\\007\000\050\001\010\000\092\000\064\000\090\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\007\000\053\001\010\000\092\000\064\000\090\000\000\000\ +\\007\000\054\001\010\000\092\000\064\000\090\000\000\000\ +\\007\000\055\001\010\000\092\000\064\000\090\000\000\000\ +\\007\000\056\001\010\000\092\000\064\000\090\000\000\000\ +\\007\000\057\001\010\000\092\000\064\000\090\000\000\000\ +\\007\000\058\001\010\000\092\000\064\000\090\000\000\000\ +\\007\000\059\001\010\000\092\000\064\000\090\000\000\000\ +\\007\000\060\001\010\000\092\000\064\000\090\000\000\000\ +\\007\000\094\000\009\000\061\001\010\000\092\000\064\000\090\000\000\000\ +\\000\000\ +\\007\000\094\000\009\000\064\001\010\000\092\000\064\000\090\000\000\000\ +\\007\000\066\001\010\000\092\000\011\000\065\001\064\000\090\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\007\000\094\000\009\000\237\000\010\000\092\000\037\000\068\001\ +\\038\000\085\000\039\000\011\000\040\000\076\000\041\000\235\000\ +\\042\000\234\000\043\000\009\000\044\000\008\000\055\000\002\000\ +\\064\000\090\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\007\000\094\000\009\000\071\001\010\000\092\000\064\000\090\000\000\000\ +\\003\000\072\001\006\000\095\000\007\000\094\000\009\000\093\000\ +\\010\000\092\000\064\000\090\000\000\000\ +\\000\000\ +\\007\000\094\000\009\000\074\001\010\000\092\000\064\000\090\000\000\000\ +\\000\000\ +\\007\000\094\000\008\000\076\001\009\000\221\000\010\000\092\000\ +\\064\000\090\000\000\000\ +\\007\000\094\000\009\000\077\001\010\000\092\000\064\000\090\000\000\000\ +\\000\000\ +\\003\000\078\001\006\000\095\000\007\000\094\000\009\000\093\000\ +\\010\000\092\000\064\000\090\000\000\000\ +\\000\000\ +\\000\000\ +\\015\000\080\001\016\000\001\001\021\000\079\001\000\000\ +\\038\000\170\000\039\000\011\000\043\000\009\000\044\000\008\000\ +\\055\000\002\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\007\000\239\000\010\000\092\000\034\000\085\001\035\000\084\001\ +\\064\000\090\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\029\000\149\000\030\000\148\000\036\000\013\000\038\000\012\000\ +\\039\000\011\000\040\000\010\000\043\000\009\000\044\000\008\000\ +\\054\000\089\001\055\000\002\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\016\000\092\001\027\000\071\000\000\000\ +\\000\000\ +\\000\000\ +\\007\000\095\001\010\000\092\000\064\000\090\000\000\000\ +\\015\000\098\001\016\000\001\001\021\000\000\001\024\000\078\000\ +\\027\000\016\000\029\000\149\000\030\000\148\000\036\000\013\000\ +\\038\000\012\000\039\000\011\000\040\000\010\000\043\000\009\000\ +\\044\000\008\000\052\000\146\000\053\000\097\001\054\000\144\000\ +\\055\000\002\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\016\000\092\001\027\000\071\000\028\000\102\001\000\000\ +\\000\000\ +\\000\000\ +\\012\000\103\001\000\000\ +\\012\000\104\001\000\000\ +\\000\000\ +\\014\000\106\001\000\000\ +\\007\000\107\001\010\000\092\000\064\000\090\000\000\000\ +\\000\000\ +\\000\000\ +\\021\000\064\000\022\000\166\000\023\000\062\000\024\000\061\000\ +\\026\000\060\000\027\000\016\000\056\000\108\001\000\000\ +\\000\000\ +\\007\000\109\001\010\000\092\000\064\000\090\000\000\000\ +\\000\000\ +\\021\000\064\000\022\000\166\000\023\000\062\000\024\000\061\000\ +\\026\000\060\000\027\000\016\000\056\000\110\001\000\000\ +\\000\000\ +\\032\000\027\001\039\000\129\000\040\000\128\000\043\000\127\000\ +\\044\000\126\000\055\000\002\000\000\000\ +\\007\000\111\001\010\000\092\000\064\000\090\000\000\000\ +\\000\000\ +\\000\000\ +\\063\000\112\001\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\003\000\119\001\006\000\095\000\007\000\094\000\009\000\093\000\ +\\010\000\092\000\064\000\090\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\016\000\092\001\000\000\ +\\000\000\ +\\015\000\098\001\016\000\001\001\021\000\079\001\029\000\149\000\ +\\030\000\148\000\036\000\013\000\038\000\012\000\039\000\011\000\ +\\040\000\010\000\043\000\009\000\044\000\008\000\052\000\146\000\ +\\053\000\097\001\054\000\144\000\055\000\002\000\000\000\ +\\007\000\124\001\010\000\092\000\064\000\090\000\000\000\ +\\000\000\ +\\012\000\125\001\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\007\000\127\001\010\000\092\000\064\000\090\000\000\000\ +\\029\000\149\000\030\000\148\000\036\000\013\000\038\000\012\000\ +\\039\000\011\000\040\000\010\000\043\000\009\000\044\000\008\000\ +\\052\000\146\000\053\000\129\001\054\000\144\000\055\000\002\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\007\000\134\001\010\000\092\000\064\000\090\000\000\000\ +\\029\000\149\000\030\000\148\000\036\000\013\000\038\000\012\000\ +\\039\000\011\000\040\000\010\000\043\000\009\000\044\000\008\000\ +\\047\000\137\001\052\000\146\000\053\000\136\001\054\000\144\000\ +\\055\000\002\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\007\000\142\001\010\000\092\000\064\000\090\000\000\000\ +\\000\000\ +\\007\000\143\001\010\000\092\000\064\000\090\000\000\000\ +\\000\000\ +\\007\000\124\001\010\000\092\000\064\000\090\000\000\000\ +\\003\000\144\001\006\000\095\000\007\000\094\000\009\000\093\000\ +\\010\000\092\000\064\000\090\000\000\000\ +\\000\000\ +\\003\000\145\001\006\000\095\000\007\000\094\000\009\000\093\000\ +\\010\000\092\000\064\000\090\000\000\000\ +\\007\000\094\000\009\000\146\001\010\000\092\000\064\000\090\000\000\000\ +\\007\000\094\000\008\000\147\001\009\000\221\000\010\000\092\000\ +\\064\000\090\000\000\000\ +\\003\000\148\001\006\000\095\000\007\000\094\000\009\000\093\000\ +\\010\000\092\000\064\000\090\000\000\000\ +\\000\000\ +\\000\000\ +\\007\000\239\000\010\000\092\000\034\000\150\001\064\000\090\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\007\000\094\000\008\000\160\001\009\000\221\000\010\000\092\000\ +\\064\000\090\000\000\000\ +\\003\000\161\001\006\000\095\000\007\000\094\000\009\000\093\000\ +\\010\000\092\000\064\000\090\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\003\000\163\001\006\000\095\000\007\000\094\000\009\000\093\000\ +\\010\000\092\000\064\000\090\000\000\000\ +\\000\000\ +\\000\000\ +\" +val numstates = 421 +val numrules = 247 +val s = ref "" and index = ref 0 +val string_to_int = fn () => +let val i = !index +in index := i+2; Char.ord(String.sub(!s,i)) + Char.ord(String.sub(!s,i+1)) * 256 +end +val string_to_list = fn s' => + let val len = String.size s' + fun f () = + if !index < len then string_to_int() :: f() + else nil + in index := 0; s := s'; f () + end +val string_to_pairlist = fn (conv_key,conv_entry) => + let fun f () = + case string_to_int() + of 0 => EMPTY + | n => PAIR(conv_key (n-1),conv_entry (string_to_int()),f()) + in f + end +val string_to_pairlist_default = fn (conv_key,conv_entry) => + let val conv_row = string_to_pairlist(conv_key,conv_entry) + in fn () => + let val default = conv_entry(string_to_int()) + val row = conv_row() + in (row,default) + end + end +val string_to_table = fn (convert_row,s') => + let val len = String.size s' + fun f ()= + if !index < len then convert_row() :: f() + else nil + in (s := s'; index := 0; f ()) + end +local + val memo = Array.array(numstates+numrules,ERROR) + val _ =let fun g i=(Array.update(memo,i,REDUCE(i-numstates)); g(i+1)) + fun f i = + if i=numstates then g i + else (Array.update(memo,i,SHIFT (STATE i)); f (i+1)) + in f 0 handle General.Subscript => () + end +in +val entry_to_action = fn 0 => ACCEPT | 1 => ERROR | j => Array.sub(memo,(j-2)) +end +val gotoT=Array.fromList(string_to_table(string_to_pairlist(NT,STATE),gotoT)) +val actionRows=string_to_table(string_to_pairlist_default(T,entry_to_action),actionRows) +val actionRowNumbers = string_to_list actionRowNumbers +val actionT = let val actionRowLookUp= +let val a=Array.fromList(actionRows) in fn i=>Array.sub(a,i) end +in Array.fromList(List.map actionRowLookUp actionRowNumbers) +end +in LrTable.mkLrTable {actions=actionT,gotos=gotoT,numRules=numrules, +numStates=numstates,initialState=STATE 0} +end +end +local open Header in +type pos = int +type arg = SourceMap.sourcemap +structure MlyValue = +struct +datatype svalue = VOID' | ntVOID of unit | TYPE_NAME of (string) + | CCONST of (LargeInt.int) | STRING of (string) + | REALNUM of (real) | DECNUM of (LargeInt.int) | ID of (string) + | strings of (string) | popScope of (unit) | pushScope of (unit) + | structDeclaration of ( ( ctype * (declarator * expression) list ) ) + | structDeclarationList of ( ( ctype * (declarator * expression) list ) seq) + | notypeStructDeclaratorList of ( ( declarator * expression ) seq) + | structDeclaratorList of ( ( declarator * expression ) seq) + | notypeStructDeclarator of ( ( declarator * expression ) ) + | structDeclarator of ( ( declarator * expression ) ) + | structOrUnion of (bool) + | parameterDeclaration of ( ( decltype * declarator ) ) + | parameterTypeList of ( ( decltype * declarator ) list) + | parameterList of ( ( decltype * declarator ) seq) + | externalDeclaration of (externalDecl option) + | declaration1 of (declaration) | declaration of (declaration) + | functionDefinition of (externalDecl) + | identlist of ( ( string * int * int ) seq) + | declarationList of (declaration seq) + | fDefDeclaration of ( ( decltype * declarator ) ) + | structOrUnionSpecifier of (specifier) + | enumSpecifier of (specifier) | specifierQualifierList of (ctype) + | typeQualifierList of (qualifier list) + | typeQualifier of (qualifier) + | typeSpecifierReserved of (specifier) + | typeSpecifier of (specifier) | typeName of (ctype) + | storageClassSpecifier of (storage) + | initializerList of (expression seq) | initializer of (expression) + | reservedSpecifierQualifiers of (ctype) + | specifierQualifierReserved of (ctype) + | reservedDeclarationSpecifier of (decltype) + | declarationModifiers of (decltype) + | declarationSpecifiers of (decltype) + | parmDirectDeclarator of (declarator) + | notypeDirectDeclarator of (declarator) + | aftertypeDirectDeclarator of (declarator) + | parmDeclarator of (declarator) | notypeDeclarator of (declarator) + | aftertypeDeclarator of (declarator) | declarator of (declarator) + | pointer of (declarator) + | notypeInitDeclaratorList of ( ( declarator * expression ) seq) + | initDeclaratorList of ( ( declarator * expression ) seq) + | notypeInitDeclarator of ( ( declarator * expression ) ) + | initDeclarator of ( ( declarator * expression ) ) + | directAbstractDeclarator of (declarator) + | abstractDeclarator of (declarator) + | enumerator of ( ( string * expression ) ) + | enumeratorList of ( ( string * expression ) seq) + | trailingComma of (bool) | argumentExprList of (expression seq) + | unaryOperator of (operator) | exprWComma of (expression) + | opExpr of (expression) | expr of (expression) + | compoundStatement of (statement) + | statementlist of (statement seq) + | ostatementlist of (statement list) | statement of (statement) + | tu of (externalDecl seq) | translationUnit of (externalDecl list) +end +type svalue = MlyValue.svalue +type result = externalDecl list +end +structure EC= +struct +open LrTable +infix 5 $$ +fun x $$ y = y::x +val is_keyword = +fn (T 11) => true | (T 57) => true | (T 58) => true | (T 59) => true + | (T 60) => true | (T 61) => true | (T 62) => true | (T 63) => true + | (T 64) => true | (T 65) => true | (T 66) => true | (T 67) => true + | (T 68) => true | (T 69) => true | _ => false +val preferred_change : (term list * term list) list = +(nil + $$ (T 25),nil + $$ (T 88)):: +nil +val noShift = +fn (T 0) => true | _ => false +val showTerminal = +fn (T 0) => "EOF" + | (T 1) => "COLON" + | (T 2) => "SEMICOLON" + | (T 3) => "LPAREN" + | (T 4) => "RPAREN" + | (T 5) => "LCURLY" + | (T 6) => "RCURLY" + | (T 7) => "LBRACE" + | (T 8) => "RBRACE" + | (T 9) => "DOT" + | (T 10) => "COMMA" + | (T 11) => "QUESTION" + | (T 12) => "PERCENT" + | (T 13) => "AMP" + | (T 14) => "BAR" + | (T 15) => "TILDE" + | (T 16) => "DIVIDE" + | (T 17) => "PLUS" + | (T 18) => "MINUS" + | (T 19) => "HAT" + | (T 20) => "BANG" + | (T 21) => "TIMES" + | (T 22) => "INC" + | (T 23) => "DEC" + | (T 24) => "ARROW" + | (T 25) => "ID" + | (T 26) => "EQUALS" + | (T 27) => "PLUSEQUALS" + | (T 28) => "MINUSEQUALS" + | (T 29) => "XOREQUALS" + | (T 30) => "MODEQUALS" + | (T 31) => "TIMESEQUALS" + | (T 32) => "DIVEQUALS" + | (T 33) => "OREQUALS" + | (T 34) => "ANDEQUALS" + | (T 35) => "LSHIFTEQUALS" + | (T 36) => "RSHIFTEQUALS" + | (T 37) => "LTE" + | (T 38) => "GTE" + | (T 39) => "LT" + | (T 40) => "GT" + | (T 41) => "EQ" + | (T 42) => "NEQ" + | (T 43) => "OR" + | (T 44) => "AND" + | (T 45) => "LSHIFT" + | (T 46) => "RSHIFT" + | (T 47) => "DECNUM" + | (T 48) => "REALNUM" + | (T 49) => "STRING" + | (T 50) => "CCONST" + | (T 51) => "EXTERN" + | (T 52) => "AUTO" + | (T 53) => "STATIC" + | (T 54) => "REGISTER" + | (T 55) => "CONST" + | (T 56) => "VOLATILE" + | (T 57) => "IF" + | (T 58) => "THEN" + | (T 59) => "ELSE" + | (T 60) => "FOR" + | (T 61) => "DO" + | (T 62) => "SWITCH" + | (T 63) => "CASE" + | (T 64) => "DEFAULT" + | (T 65) => "WHILE" + | (T 66) => "RETURN" + | (T 67) => "BREAK" + | (T 68) => "CONTINUE" + | (T 69) => "GOTO" + | (T 70) => "CHAR" + | (T 71) => "DOUBLE" + | (T 72) => "ENUM" + | (T 73) => "FLOAT" + | (T 74) => "INT" + | (T 75) => "LONG" + | (T 76) => "SHORT" + | (T 77) => "FRACTIONAL" + | (T 78) => "SATURATE" + | (T 79) => "STRUCT" + | (T 80) => "UNION" + | (T 81) => "UNSIGNED" + | (T 82) => "SIGNED" + | (T 83) => "VOID" + | (T 84) => "SIZEOF" + | (T 85) => "TYPEDEF" + | (T 86) => "UNARY" + | (T 87) => "ELIPSIS" + | (T 88) => "TYPE_NAME" + | _ => "bogus-term" +local open Header in +val errtermvalue= +fn (T 88) => MlyValue.TYPE_NAME(( +Error.hint "Likely cause: missing typedef declaration.\n"; "bogus")) | +_ => MlyValue.VOID' +end +val terms : term list = nil + $$ (T 87) $$ (T 86) $$ (T 85) $$ (T 84) $$ (T 83) $$ (T 82) $$ (T 81) + $$ (T 80) $$ (T 79) $$ (T 78) $$ (T 77) $$ (T 76) $$ (T 75) $$ (T 74) + $$ (T 73) $$ (T 72) $$ (T 71) $$ (T 70) $$ (T 69) $$ (T 68) $$ (T 67) + $$ (T 66) $$ (T 65) $$ (T 64) $$ (T 63) $$ (T 62) $$ (T 61) $$ (T 60) + $$ (T 59) $$ (T 58) $$ (T 57) $$ (T 56) $$ (T 55) $$ (T 54) $$ (T 53) + $$ (T 52) $$ (T 51) $$ (T 46) $$ (T 45) $$ (T 44) $$ (T 43) $$ (T 42) + $$ (T 41) $$ (T 40) $$ (T 39) $$ (T 38) $$ (T 37) $$ (T 36) $$ (T 35) + $$ (T 34) $$ (T 33) $$ (T 32) $$ (T 31) $$ (T 30) $$ (T 29) $$ (T 28) + $$ (T 27) $$ (T 26) $$ (T 24) $$ (T 23) $$ (T 22) $$ (T 21) $$ (T 20) + $$ (T 19) $$ (T 18) $$ (T 17) $$ (T 16) $$ (T 15) $$ (T 14) $$ (T 13) + $$ (T 12) $$ (T 11) $$ (T 10) $$ (T 9) $$ (T 8) $$ (T 7) $$ (T 6) $$ +(T 5) $$ (T 4) $$ (T 3) $$ (T 2) $$ (T 1) $$ (T 0)end +structure Actions = +struct +exception mlyAction of int +local open Header in +val actions = +fn (i392,defaultPos,stack, + (srcMap):arg) => +case (i392,stack) +of ( 0, ( ( _, ( MlyValue.tu tu, tu1left, tu1right)) :: rest671)) => + let val result = MlyValue.translationUnit (seqToList tu) + in ( LrTable.NT 0, ( result, tu1left, tu1right), rest671) +end +| ( 1, ( rest671)) => let val result = MlyValue.tu (emptySeq) + in ( LrTable.NT 1, ( result, defaultPos, defaultPos), rest671) +end +| ( 2, ( ( _, ( MlyValue.externalDeclaration externalDeclaration, _, +externalDeclaration1right)) :: ( _, ( MlyValue.tu tu, tu1left, _)) :: +rest671)) => let val result = MlyValue.tu ( +addOptToEnd(externalDeclaration, tu)) + in ( LrTable.NT 1, ( result, tu1left, externalDeclaration1right), +rest671) +end +| ( 3, ( ( _, ( MlyValue.declaration declaration, (declarationleft + as declaration1left), (declarationright as declaration1right))) :: +rest671)) => let val result = MlyValue.externalDeclaration ( +SOME(markExternalDecl srcMap (ExternalDecl declaration, + declarationleft, + declarationright)) +) + in ( LrTable.NT 50, ( result, declaration1left, declaration1right), +rest671) +end +| ( 4, ( ( _, ( _, SEMICOLON1left, SEMICOLON1right)) :: rest671)) => + let val result = MlyValue.externalDeclaration (NONE) + in ( LrTable.NT 50, ( result, SEMICOLON1left, SEMICOLON1right), +rest671) +end +| ( 5, ( ( _, ( MlyValue.functionDefinition functionDefinition, ( +functionDefinitionleft as functionDefinition1left), ( +functionDefinitionright as functionDefinition1right))) :: rest671)) => + let val result = MlyValue.externalDeclaration ( +SOME(markExternalDecl srcMap (functionDefinition, + functionDefinitionleft, + functionDefinitionright)) +) + in ( LrTable.NT 50, ( result, functionDefinition1left, +functionDefinition1right), rest671) +end +| ( 6, ( ( _, ( MlyValue.statement statement, _, (statementright as +statement1right))) :: _ :: ( _, ( MlyValue.opExpr opExpr3, _, _)) :: _ + :: ( _, ( MlyValue.opExpr opExpr2, _, _)) :: _ :: ( _, ( +MlyValue.opExpr opExpr1, _, _)) :: _ :: ( _, ( _, (FORleft as FOR1left +), _)) :: rest671)) => let val result = MlyValue.statement ( +markStatement srcMap (For(opExpr1,opExpr2,opExpr3,statement), + FORleft, statementright) +) + in ( LrTable.NT 2, ( result, FOR1left, statement1right), rest671) +end +| ( 7, ( ( _, ( MlyValue.statement statement, _, (statementright as +statement1right))) :: _ :: ( _, ( MlyValue.exprWComma exprWComma, _, _ +)) :: _ :: ( _, ( _, (WHILEleft as WHILE1left), _)) :: rest671)) => + let val result = MlyValue.statement ( +markStatement srcMap (While(exprWComma,statement), + WHILEleft, statementright) +) + in ( LrTable.NT 2, ( result, WHILE1left, statement1right), rest671) + +end +| ( 8, ( ( _, ( MlyValue.statement statement, _, (statementright as +statement1right))) :: _ :: ( _, ( MlyValue.exprWComma exprWComma, _, _ +)) :: _ :: ( _, ( _, (SWITCHleft as SWITCH1left), _)) :: rest671)) => + let val result = MlyValue.statement ( +markStatement srcMap (Switch(exprWComma,statement), + SWITCHleft, statementright) +) + in ( LrTable.NT 2, ( result, SWITCH1left, statement1right), rest671) + +end +| ( 9, ( ( _, ( _, _, (SEMICOLONright as SEMICOLON1right))) :: _ :: ( + _, ( MlyValue.exprWComma exprWComma, _, _)) :: _ :: _ :: ( _, ( +MlyValue.statement statement, _, _)) :: ( _, ( _, (DOleft as DO1left), + _)) :: rest671)) => let val result = MlyValue.statement ( +markStatement srcMap (Do(exprWComma,statement), + DOleft, SEMICOLONright) +) + in ( LrTable.NT 2, ( result, DO1left, SEMICOLON1right), rest671) +end +| ( 10, ( ( _, ( _, _, (SEMICOLONright as SEMICOLON1right))) :: ( _, +( _, (BREAKleft as BREAK1left), _)) :: rest671)) => let val result = +MlyValue.statement ( +markStatement srcMap (Break, + BREAKleft, SEMICOLONright)) + in ( LrTable.NT 2, ( result, BREAK1left, SEMICOLON1right), rest671) + +end +| ( 11, ( ( _, ( _, _, (SEMICOLONright as SEMICOLON1right))) :: ( _, +( _, (CONTINUEleft as CONTINUE1left), _)) :: rest671)) => let val +result = MlyValue.statement ( +markStatement srcMap (Continue, + CONTINUEleft, SEMICOLONright)) + in ( LrTable.NT 2, ( result, CONTINUE1left, SEMICOLON1right), rest671 +) +end +| ( 12, ( ( _, ( _, _, (SEMICOLONright as SEMICOLON1right))) :: ( _, +( MlyValue.opExpr opExpr, _, _)) :: ( _, ( _, (RETURNleft as +RETURN1left), _)) :: rest671)) => let val result = MlyValue.statement + ( +markStatement srcMap (Return(opExpr), + RETURNleft, SEMICOLONright) +) + in ( LrTable.NT 2, ( result, RETURN1left, SEMICOLON1right), rest671) + +end +| ( 13, ( ( _, ( _, _, (SEMICOLONright as SEMICOLON1right))) :: ( _, +( MlyValue.ID ID, _, _)) :: ( _, ( _, (GOTOleft as GOTO1left), _)) :: +rest671)) => let val result = MlyValue.statement ( +markStatement srcMap (Goto(ID), + GOTOleft, SEMICOLONright)) + in ( LrTable.NT 2, ( result, GOTO1left, SEMICOLON1right), rest671) + +end +| ( 14, ( ( _, ( MlyValue.compoundStatement compoundStatement, +compoundStatement1left, compoundStatement1right)) :: rest671)) => let + val result = MlyValue.statement (compoundStatement) + in ( LrTable.NT 2, ( result, compoundStatement1left, +compoundStatement1right), rest671) +end +| ( 15, ( ( _, ( MlyValue.statement statement, _, (statementright as +statement1right))) :: _ :: ( _, ( MlyValue.ID ID, (IDleft as ID1left), + _)) :: rest671)) => let val result = MlyValue.statement ( +markStatement srcMap (Labeled(ID,statement), + IDleft,statementright) +) + in ( LrTable.NT 2, ( result, ID1left, statement1right), rest671) +end +| ( 16, ( ( _, ( MlyValue.statement statement, _, (statementright as +statement1right))) :: _ :: ( _, ( _, (DEFAULTleft as DEFAULT1left), _) +) :: rest671)) => let val result = MlyValue.statement ( +markStatement srcMap (DefaultLabel(statement), + DEFAULTleft, statementright) +) + in ( LrTable.NT 2, ( result, DEFAULT1left, statement1right), rest671) + +end +| ( 17, ( ( _, ( MlyValue.statement statement, _, (statementright as +statement1right))) :: _ :: ( _, ( MlyValue.exprWComma exprWComma, _, _ +)) :: ( _, ( _, (CASEleft as CASE1left), _)) :: rest671)) => let val +result = MlyValue.statement ( +markStatement srcMap (CaseLabel(exprWComma,statement), + CASEleft, statementright) +) + in ( LrTable.NT 2, ( result, CASE1left, statement1right), rest671) + +end +| ( 18, ( ( _, ( MlyValue.statement statement, _, (statementright as +statement1right))) :: _ :: ( _, ( MlyValue.exprWComma exprWComma, _, _ +)) :: _ :: ( _, ( _, (IFleft as IF1left), _)) :: rest671)) => let val + result = MlyValue.statement ( +markStatement srcMap (IfThen(exprWComma,statement), + IFleft, statementright) +) + in ( LrTable.NT 2, ( result, IF1left, statement1right), rest671) +end +| ( 19, ( ( _, ( MlyValue.statement statement2, _, statement2right)) + :: _ :: ( _, ( MlyValue.statement statement1, _, _)) :: _ :: ( _, ( +MlyValue.exprWComma exprWComma, _, _)) :: _ :: ( _, ( _, (IFleft as +IF1left), _)) :: rest671)) => let val result = MlyValue.statement ( +markStatement srcMap (IfThenElse(exprWComma,statement1,statement2), + IFleft, + statement2right) +) + in ( LrTable.NT 2, ( result, IF1left, statement2right), rest671) +end +| ( 20, ( ( _, ( _, _, (SEMICOLONright as SEMICOLON1right))) :: ( _, +( MlyValue.exprWComma exprWComma, (exprWCommaleft as exprWComma1left), + _)) :: rest671)) => let val result = MlyValue.statement ( +markStatement srcMap (Expr(exprWComma), + exprWCommaleft, SEMICOLONright) +) + in ( LrTable.NT 2, ( result, exprWComma1left, SEMICOLON1right), +rest671) +end +| ( 21, ( ( _, ( _, (SEMICOLONleft as SEMICOLON1left), ( +SEMICOLONright as SEMICOLON1right))) :: rest671)) => let val result = + MlyValue.statement ( +markStatement srcMap (Expr(EmptyExpr), + SEMICOLONleft, SEMICOLONright) +) + in ( LrTable.NT 2, ( result, SEMICOLON1left, SEMICOLON1right), +rest671) +end +| ( 22, ( ( _, ( _, _, SEMICOLON1right)) :: ( _, ( +MlyValue.declaration1 declaration1, declaration11left, _)) :: rest671) +) => let val result = MlyValue.declaration (declaration1) + in ( LrTable.NT 48, ( result, declaration11left, SEMICOLON1right), +rest671) +end +| ( 23, ( ( _, ( MlyValue.declarationSpecifiers declarationSpecifiers +, (declarationSpecifiersleft as declarationSpecifiers1left), ( +declarationSpecifiersright as declarationSpecifiers1right))) :: +rest671)) => let val result = MlyValue.declaration1 ( +insertDeclNames (declarationSpecifiers, []); + markDeclaration srcMap + (Declaration(declarationSpecifiers, []), + declarationSpecifiersleft, declarationSpecifiersright) +) + in ( LrTable.NT 49, ( result, declarationSpecifiers1left, +declarationSpecifiers1right), rest671) +end +| ( 24, ( ( _, ( MlyValue.initDeclaratorList initDeclaratorList, _, ( +initDeclaratorListright as initDeclaratorList1right))) :: ( _, ( +MlyValue.declarationSpecifiers declarationSpecifiers, ( +declarationSpecifiersleft as declarationSpecifiers1left), _)) :: +rest671)) => let val result = MlyValue.declaration1 ( +let val decl = (declarationSpecifiers, seqToList initDeclaratorList) + in insertDeclNames decl; + markDeclaration srcMap + (Declaration decl, declarationSpecifiersleft, initDeclaratorListright) + end +) + in ( LrTable.NT 49, ( result, declarationSpecifiers1left, +initDeclaratorList1right), rest671) +end +| ( 25, ( ( _, ( MlyValue.notypeInitDeclaratorList +notypeInitDeclaratorList, _, (notypeInitDeclaratorListright as +notypeInitDeclaratorList1right))) :: ( _, ( +MlyValue.declarationModifiers declarationModifiers, ( +declarationModifiersleft as declarationModifiers1left), _)) :: rest671 +)) => let val result = MlyValue.declaration1 ( +let val decl = (declarationModifiers, seqToList notypeInitDeclaratorList) + in insertDeclNames decl; + markDeclaration srcMap + (Declaration decl, declarationModifiersleft, notypeInitDeclaratorListright) + end +) + in ( LrTable.NT 49, ( result, declarationModifiers1left, +notypeInitDeclaratorList1right), rest671) +end +| ( 26, ( ( _, ( MlyValue.statementlist statementlist, +statementlist1left, statementlist1right)) :: rest671)) => let val +result = MlyValue.ostatementlist (seqToList statementlist) + in ( LrTable.NT 3, ( result, statementlist1left, statementlist1right) +, rest671) +end +| ( 27, ( rest671)) => let val result = MlyValue.ostatementlist ([]) + in ( LrTable.NT 3, ( result, defaultPos, defaultPos), rest671) +end +| ( 28, ( ( _, ( MlyValue.statement statement, statement1left, +statement1right)) :: rest671)) => let val result = +MlyValue.statementlist (singletonSeq statement) + in ( LrTable.NT 4, ( result, statement1left, statement1right), +rest671) +end +| ( 29, ( ( _, ( MlyValue.statement statement, _, statement1right)) + :: ( _, ( MlyValue.statementlist statementlist, statementlist1left, _ +)) :: rest671)) => let val result = MlyValue.statementlist ( +addToSeq(statement, statementlist)) + in ( LrTable.NT 4, ( result, statementlist1left, statement1right), +rest671) +end +| ( 30, ( ( _, ( _, _, (RCURLYright as RCURLY1right))) :: _ :: ( _, ( + MlyValue.ostatementlist ostatementlist, _, _)) :: ( _, ( +MlyValue.declarationList declarationList, _, _)) :: _ :: ( _, ( _, ( +LCURLYleft as LCURLY1left), _)) :: rest671)) => let val result = +MlyValue.compoundStatement ( +markStatement srcMap (Compound ((map Decl (seqToList declarationList)) @ ostatementlist), LCURLYleft, RCURLYright) +) + in ( LrTable.NT 5, ( result, LCURLY1left, RCURLY1right), rest671) +end +| ( 31, ( ( _, ( _, _, (RCURLYright as RCURLY1right))) :: ( _, ( +MlyValue.ostatementlist ostatementlist, _, _)) :: ( _, ( _, ( +LCURLYleft as LCURLY1left), _)) :: rest671)) => let val result = +MlyValue.compoundStatement ( +markStatement srcMap (Compound (ostatementlist), LCURLYleft, RCURLYright) +) + in ( LrTable.NT 5, ( result, LCURLY1left, RCURLY1right), rest671) +end +| ( 32, ( ( _, ( _, AMP1left, AMP1right)) :: rest671)) => let val +result = MlyValue.unaryOperator (AddrOf) + in ( LrTable.NT 9, ( result, AMP1left, AMP1right), rest671) +end +| ( 33, ( ( _, ( _, TIMES1left, TIMES1right)) :: rest671)) => let + val result = MlyValue.unaryOperator (Star) + in ( LrTable.NT 9, ( result, TIMES1left, TIMES1right), rest671) +end +| ( 34, ( ( _, ( _, PLUS1left, PLUS1right)) :: rest671)) => let val +result = MlyValue.unaryOperator (Uplus) + in ( LrTable.NT 9, ( result, PLUS1left, PLUS1right), rest671) +end +| ( 35, ( ( _, ( _, MINUS1left, MINUS1right)) :: rest671)) => let + val result = MlyValue.unaryOperator (Negate) + in ( LrTable.NT 9, ( result, MINUS1left, MINUS1right), rest671) +end +| ( 36, ( ( _, ( _, TILDE1left, TILDE1right)) :: rest671)) => let + val result = MlyValue.unaryOperator (BitNot) + in ( LrTable.NT 9, ( result, TILDE1left, TILDE1right), rest671) +end +| ( 37, ( ( _, ( _, BANG1left, BANG1right)) :: rest671)) => let val +result = MlyValue.unaryOperator (Not) + in ( LrTable.NT 9, ( result, BANG1left, BANG1right), rest671) +end +| ( 38, ( ( _, ( MlyValue.expr expr2, _, expr2right)) :: _ :: ( _, ( +MlyValue.exprWComma exprWComma1, _, _)) :: _ :: ( _, ( MlyValue.expr +expr1, expr1left, _)) :: rest671)) => let val result = MlyValue.expr + ( +markExpression srcMap (QuestionColon(expr1,exprWComma1,expr2),expr1left,expr2right) +) + in ( LrTable.NT 6, ( result, expr1left, expr2right), rest671) +end +| ( 39, ( ( _, ( MlyValue.expr expr2, _, expr2right)) :: _ :: ( _, ( +MlyValue.expr expr1, expr1left, _)) :: rest671)) => let val result = +MlyValue.expr ( +markExpression srcMap (Binop(PlusAssign,expr1,expr2),expr1left,expr2right) +) + in ( LrTable.NT 6, ( result, expr1left, expr2right), rest671) +end +| ( 40, ( ( _, ( MlyValue.expr expr2, _, expr2right)) :: _ :: ( _, ( +MlyValue.expr expr1, expr1left, _)) :: rest671)) => let val result = +MlyValue.expr ( +markExpression srcMap (Binop(MinusAssign,expr1,expr2),expr1left,expr2right) +) + in ( LrTable.NT 6, ( result, expr1left, expr2right), rest671) +end +| ( 41, ( ( _, ( MlyValue.expr expr2, _, expr2right)) :: _ :: ( _, ( +MlyValue.expr expr1, expr1left, _)) :: rest671)) => let val result = +MlyValue.expr ( +markExpression srcMap (Binop(TimesAssign,expr1,expr2),expr1left,expr2right) +) + in ( LrTable.NT 6, ( result, expr1left, expr2right), rest671) +end +| ( 42, ( ( _, ( MlyValue.expr expr2, _, expr2right)) :: _ :: ( _, ( +MlyValue.expr expr1, expr1left, _)) :: rest671)) => let val result = +MlyValue.expr ( +markExpression srcMap (Binop(DivAssign,expr1,expr2),expr1left,expr2right) +) + in ( LrTable.NT 6, ( result, expr1left, expr2right), rest671) +end +| ( 43, ( ( _, ( MlyValue.expr expr2, _, expr2right)) :: _ :: ( _, ( +MlyValue.expr expr1, expr1left, _)) :: rest671)) => let val result = +MlyValue.expr ( +markExpression srcMap (Binop(ModAssign,expr1,expr2),expr1left,expr2right) +) + in ( LrTable.NT 6, ( result, expr1left, expr2right), rest671) +end +| ( 44, ( ( _, ( MlyValue.expr expr2, _, expr2right)) :: _ :: ( _, ( +MlyValue.expr expr1, expr1left, _)) :: rest671)) => let val result = +MlyValue.expr ( +markExpression srcMap (Binop(XorAssign,expr1,expr2),expr1left,expr2right) +) + in ( LrTable.NT 6, ( result, expr1left, expr2right), rest671) +end +| ( 45, ( ( _, ( MlyValue.expr expr2, _, expr2right)) :: _ :: ( _, ( +MlyValue.expr expr1, expr1left, _)) :: rest671)) => let val result = +MlyValue.expr ( +markExpression srcMap (Binop(OrAssign,expr1,expr2),expr1left,expr2right) +) + in ( LrTable.NT 6, ( result, expr1left, expr2right), rest671) +end +| ( 46, ( ( _, ( MlyValue.expr expr2, _, expr2right)) :: _ :: ( _, ( +MlyValue.expr expr1, expr1left, _)) :: rest671)) => let val result = +MlyValue.expr ( +markExpression srcMap (Binop(AndAssign,expr1,expr2),expr1left,expr2right) +) + in ( LrTable.NT 6, ( result, expr1left, expr2right), rest671) +end +| ( 47, ( ( _, ( MlyValue.expr expr2, _, expr2right)) :: _ :: ( _, ( +MlyValue.expr expr1, expr1left, _)) :: rest671)) => let val result = +MlyValue.expr ( +markExpression srcMap (Binop(LshiftAssign,expr1,expr2),expr1left,expr2right) +) + in ( LrTable.NT 6, ( result, expr1left, expr2right), rest671) +end +| ( 48, ( ( _, ( MlyValue.expr expr2, _, expr2right)) :: _ :: ( _, ( +MlyValue.expr expr1, expr1left, _)) :: rest671)) => let val result = +MlyValue.expr ( +markExpression srcMap (Binop(RshiftAssign,expr1,expr2),expr1left,expr2right) +) + in ( LrTable.NT 6, ( result, expr1left, expr2right), rest671) +end +| ( 49, ( ( _, ( MlyValue.expr expr2, _, expr2right)) :: _ :: ( _, ( +MlyValue.expr expr1, expr1left, _)) :: rest671)) => let val result = +MlyValue.expr ( +markExpression srcMap (Binop(Assign,expr1,expr2),expr1left,expr2right) +) + in ( LrTable.NT 6, ( result, expr1left, expr2right), rest671) +end +| ( 50, ( ( _, ( MlyValue.expr expr2, _, expr2right)) :: _ :: ( _, ( +MlyValue.expr expr1, expr1left, _)) :: rest671)) => let val result = +MlyValue.expr ( +markExpression srcMap (Binop(Or,expr1,expr2),expr1left,expr2right)) + in ( LrTable.NT 6, ( result, expr1left, expr2right), rest671) +end +| ( 51, ( ( _, ( MlyValue.expr expr2, _, expr2right)) :: _ :: ( _, ( +MlyValue.expr expr1, expr1left, _)) :: rest671)) => let val result = +MlyValue.expr ( +markExpression srcMap (Binop(And,expr1,expr2),expr1left,expr2right)) + in ( LrTable.NT 6, ( result, expr1left, expr2right), rest671) +end +| ( 52, ( ( _, ( MlyValue.expr expr2, _, expr2right)) :: _ :: ( _, ( +MlyValue.expr expr1, expr1left, _)) :: rest671)) => let val result = +MlyValue.expr ( +markExpression srcMap (Binop(BitOr,expr1,expr2),expr1left,expr2right)) + in ( LrTable.NT 6, ( result, expr1left, expr2right), rest671) +end +| ( 53, ( ( _, ( MlyValue.expr expr2, _, expr2right)) :: _ :: ( _, ( +MlyValue.expr expr1, expr1left, _)) :: rest671)) => let val result = +MlyValue.expr ( +markExpression srcMap (Binop(BitXor,expr1,expr2),expr1left,expr2right) +) + in ( LrTable.NT 6, ( result, expr1left, expr2right), rest671) +end +| ( 54, ( ( _, ( MlyValue.expr expr2, _, expr2right)) :: _ :: ( _, ( +MlyValue.expr expr1, expr1left, _)) :: rest671)) => let val result = +MlyValue.expr ( +markExpression srcMap (Binop(BitAnd,expr1,expr2),expr1left,expr2right) +) + in ( LrTable.NT 6, ( result, expr1left, expr2right), rest671) +end +| ( 55, ( ( _, ( MlyValue.expr expr2, _, expr2right)) :: _ :: ( _, ( +MlyValue.expr expr1, expr1left, _)) :: rest671)) => let val result = +MlyValue.expr ( +markExpression srcMap (Binop(Eq,expr1,expr2),expr1left,expr2right)) + in ( LrTable.NT 6, ( result, expr1left, expr2right), rest671) +end +| ( 56, ( ( _, ( MlyValue.expr expr2, _, expr2right)) :: _ :: ( _, ( +MlyValue.expr expr1, expr1left, _)) :: rest671)) => let val result = +MlyValue.expr ( +markExpression srcMap (Binop(Neq,expr1,expr2),expr1left,expr2right)) + in ( LrTable.NT 6, ( result, expr1left, expr2right), rest671) +end +| ( 57, ( ( _, ( MlyValue.expr expr2, _, expr2right)) :: _ :: ( _, ( +MlyValue.expr expr1, expr1left, _)) :: rest671)) => let val result = +MlyValue.expr ( +markExpression srcMap (Binop(Lt,expr1,expr2),expr1left,expr2right)) + in ( LrTable.NT 6, ( result, expr1left, expr2right), rest671) +end +| ( 58, ( ( _, ( MlyValue.expr expr2, _, expr2right)) :: _ :: ( _, ( +MlyValue.expr expr1, expr1left, _)) :: rest671)) => let val result = +MlyValue.expr ( +markExpression srcMap (Binop(Gt,expr1,expr2),expr1left,expr2right)) + in ( LrTable.NT 6, ( result, expr1left, expr2right), rest671) +end +| ( 59, ( ( _, ( MlyValue.expr expr2, _, expr2right)) :: _ :: ( _, ( +MlyValue.expr expr1, expr1left, _)) :: rest671)) => let val result = +MlyValue.expr ( +markExpression srcMap (Binop(Lte,expr1,expr2),expr1left,expr2right)) + in ( LrTable.NT 6, ( result, expr1left, expr2right), rest671) +end +| ( 60, ( ( _, ( MlyValue.expr expr2, _, expr2right)) :: _ :: ( _, ( +MlyValue.expr expr1, expr1left, _)) :: rest671)) => let val result = +MlyValue.expr ( +markExpression srcMap (Binop(Gte,expr1,expr2),expr1left,expr2right)) + in ( LrTable.NT 6, ( result, expr1left, expr2right), rest671) +end +| ( 61, ( ( _, ( MlyValue.expr expr2, _, expr2right)) :: _ :: ( _, ( +MlyValue.expr expr1, expr1left, _)) :: rest671)) => let val result = +MlyValue.expr ( +markExpression srcMap (Binop(Lshift,expr1,expr2),expr1left,expr2right) +) + in ( LrTable.NT 6, ( result, expr1left, expr2right), rest671) +end +| ( 62, ( ( _, ( MlyValue.expr expr2, _, expr2right)) :: _ :: ( _, ( +MlyValue.expr expr1, expr1left, _)) :: rest671)) => let val result = +MlyValue.expr ( +markExpression srcMap (Binop(Rshift,expr1,expr2),expr1left,expr2right) +) + in ( LrTable.NT 6, ( result, expr1left, expr2right), rest671) +end +| ( 63, ( ( _, ( MlyValue.expr expr2, _, expr2right)) :: _ :: ( _, ( +MlyValue.expr expr1, expr1left, _)) :: rest671)) => let val result = +MlyValue.expr ( +markExpression srcMap (Binop(Plus,expr1,expr2),expr1left,expr2right)) + in ( LrTable.NT 6, ( result, expr1left, expr2right), rest671) +end +| ( 64, ( ( _, ( MlyValue.expr expr2, _, expr2right)) :: _ :: ( _, ( +MlyValue.expr expr1, expr1left, _)) :: rest671)) => let val result = +MlyValue.expr ( +markExpression srcMap (Binop(Minus,expr1,expr2),expr1left,expr2right)) + in ( LrTable.NT 6, ( result, expr1left, expr2right), rest671) +end +| ( 65, ( ( _, ( MlyValue.expr expr2, _, expr2right)) :: _ :: ( _, ( +MlyValue.expr expr1, expr1left, _)) :: rest671)) => let val result = +MlyValue.expr ( +markExpression srcMap (Binop(Times,expr1,expr2),expr1left,expr2right)) + in ( LrTable.NT 6, ( result, expr1left, expr2right), rest671) +end +| ( 66, ( ( _, ( MlyValue.expr expr2, _, expr2right)) :: _ :: ( _, ( +MlyValue.expr expr1, expr1left, _)) :: rest671)) => let val result = +MlyValue.expr ( +markExpression srcMap (Binop(Divide,expr1,expr2),expr1left,expr2right) +) + in ( LrTable.NT 6, ( result, expr1left, expr2right), rest671) +end +| ( 67, ( ( _, ( MlyValue.expr expr2, _, expr2right)) :: _ :: ( _, ( +MlyValue.expr expr1, expr1left, _)) :: rest671)) => let val result = +MlyValue.expr ( +markExpression srcMap (Binop(Mod,expr1,expr2),expr1left,expr2right)) + in ( LrTable.NT 6, ( result, expr1left, expr2right), rest671) +end +| ( 68, ( ( _, ( _, _, (INCright as INC1right))) :: ( _, ( +MlyValue.expr expr, (exprleft as expr1left), _)) :: rest671)) => let + val result = MlyValue.expr ( +markExpression srcMap (Unop(PostInc,expr),exprleft,INCright)) + in ( LrTable.NT 6, ( result, expr1left, INC1right), rest671) +end +| ( 69, ( ( _, ( _, _, (DECright as DEC1right))) :: ( _, ( +MlyValue.expr expr, (exprleft as expr1left), _)) :: rest671)) => let + val result = MlyValue.expr ( +markExpression srcMap (Unop(PostDec,expr),exprleft,DECright)) + in ( LrTable.NT 6, ( result, expr1left, DEC1right), rest671) +end +| ( 70, ( ( _, ( MlyValue.expr expr, _, (exprright as expr1right))) + :: ( _, ( _, (INCleft as INC1left), _)) :: rest671)) => let val +result = MlyValue.expr ( +markExpression srcMap (Unop(PreInc,expr),INCleft,exprright)) + in ( LrTable.NT 6, ( result, INC1left, expr1right), rest671) +end +| ( 71, ( ( _, ( MlyValue.expr expr, _, (exprright as expr1right))) + :: ( _, ( _, (DECleft as DEC1left), _)) :: rest671)) => let val +result = MlyValue.expr ( +markExpression srcMap (Unop(PreDec,expr),DECleft,exprright)) + in ( LrTable.NT 6, ( result, DEC1left, expr1right), rest671) +end +| ( 72, ( ( _, ( MlyValue.expr expr, _, (exprright as expr1right))) + :: ( _, ( MlyValue.unaryOperator unaryOperator, (unaryOperatorleft + as unaryOperator1left), _)) :: rest671)) => let val result = +MlyValue.expr ( +markExpression srcMap (Unop(unaryOperator,expr),unaryOperatorleft,exprright) +) + in ( LrTable.NT 6, ( result, unaryOperator1left, expr1right), rest671 +) +end +| ( 73, ( ( _, ( MlyValue.expr expr, _, (exprright as expr1right))) + :: ( _, ( _, (SIZEOFleft as SIZEOF1left), _)) :: rest671)) => let + val result = MlyValue.expr ( +markExpression srcMap (Unop(Sizeof,expr),SIZEOFleft,exprright)) + in ( LrTable.NT 6, ( result, SIZEOF1left, expr1right), rest671) +end +| ( 74, ( ( _, ( MlyValue.expr expr, _, (exprright as expr1right))) + :: _ :: ( _, ( MlyValue.typeName typeName, _, _)) :: ( _, ( _, ( +LPARENleft as LPAREN1left), _)) :: rest671)) => let val result = +MlyValue.expr ( +markExpression srcMap (Cast (typeName,expr),LPARENleft,exprright)) + in ( LrTable.NT 6, ( result, LPAREN1left, expr1right), rest671) +end +| ( 75, ( ( _, ( _, _, (RPARENright as RPAREN1right))) :: ( _, ( +MlyValue.typeName typeName, _, _)) :: _ :: ( _, ( _, (SIZEOFleft as +SIZEOF1left), _)) :: rest671)) => let val result = MlyValue.expr ( +markExpression srcMap (Unop(SizeofType typeName,EmptyExpr),SIZEOFleft,RPARENright) +) + in ( LrTable.NT 6, ( result, SIZEOF1left, RPAREN1right), rest671) +end +| ( 76, ( ( _, ( _, _, (RBRACEright as RBRACE1right))) :: ( _, ( +MlyValue.exprWComma exprWComma, _, _)) :: _ :: ( _, ( MlyValue.expr +expr, (exprleft as expr1left), _)) :: rest671)) => let val result = +MlyValue.expr ( +markExpression srcMap (Binop(Sub,expr,exprWComma),exprleft,RBRACEright) +) + in ( LrTable.NT 6, ( result, expr1left, RBRACE1right), rest671) +end +| ( 77, ( ( _, ( _, _, (RPARENright as RPAREN1right))) :: _ :: ( _, ( + MlyValue.expr expr, (exprleft as expr1left), _)) :: rest671)) => let + val result = MlyValue.expr ( +markExpression srcMap (Call(expr,[]),exprleft,RPARENright)) + in ( LrTable.NT 6, ( result, expr1left, RPAREN1right), rest671) +end +| ( 78, ( ( _, ( _, _, (RPARENright as RPAREN1right))) :: ( _, ( +MlyValue.argumentExprList argumentExprList, _, _)) :: _ :: ( _, ( +MlyValue.expr expr, (exprleft as expr1left), _)) :: rest671)) => let + val result = MlyValue.expr ( +markExpression srcMap (Call(expr, seqToList argumentExprList),exprleft,RPARENright) +) + in ( LrTable.NT 6, ( result, expr1left, RPAREN1right), rest671) +end +| ( 79, ( ( _, ( MlyValue.ID ID, _, (IDright as ID1right))) :: _ :: ( + _, ( MlyValue.expr expr, (exprleft as expr1left), _)) :: rest671)) => + let val result = MlyValue.expr ( +markExpression srcMap (Binop(Dot,expr,Id(ID)),exprleft,IDright)) + in ( LrTable.NT 6, ( result, expr1left, ID1right), rest671) +end +| ( 80, ( ( _, ( MlyValue.ID ID, _, (IDright as ID1right))) :: _ :: ( + _, ( MlyValue.expr expr, (exprleft as expr1left), _)) :: rest671)) => + let val result = MlyValue.expr ( +markExpression srcMap (Binop(Arrow,expr,Id(ID)),exprleft,IDright)) + in ( LrTable.NT 6, ( result, expr1left, ID1right), rest671) +end +| ( 81, ( ( _, ( MlyValue.TYPE_NAME TYPE_NAME, _, (TYPE_NAMEright as +TYPE_NAME1right))) :: _ :: ( _, ( MlyValue.expr expr, (exprleft as +expr1left), _)) :: rest671)) => let val result = MlyValue.expr ( +markExpression srcMap (Binop(Dot,expr,Id(TYPE_NAME)),exprleft,TYPE_NAMEright) +) + in ( LrTable.NT 6, ( result, expr1left, TYPE_NAME1right), rest671) + +end +| ( 82, ( ( _, ( MlyValue.TYPE_NAME TYPE_NAME, _, (TYPE_NAMEright as +TYPE_NAME1right))) :: _ :: ( _, ( MlyValue.expr expr, (exprleft as +expr1left), _)) :: rest671)) => let val result = MlyValue.expr ( +markExpression srcMap (Binop(Arrow,expr,Id(TYPE_NAME)),exprleft,TYPE_NAMEright) +) + in ( LrTable.NT 6, ( result, expr1left, TYPE_NAME1right), rest671) + +end +| ( 83, ( ( _, ( _, _, (RPARENright as RPAREN1right))) :: ( _, ( +MlyValue.exprWComma exprWComma, _, _)) :: ( _, ( _, (LPARENleft as +LPAREN1left), _)) :: rest671)) => let val result = MlyValue.expr ( +markExpression srcMap (exprWComma,LPARENleft,RPARENright)) + in ( LrTable.NT 6, ( result, LPAREN1left, RPAREN1right), rest671) +end +| ( 84, ( ( _, ( MlyValue.DECNUM DECNUM, (DECNUMleft as DECNUM1left), + (DECNUMright as DECNUM1right))) :: rest671)) => let val result = +MlyValue.expr ( +markExpression srcMap (IntConst DECNUM,DECNUMleft,DECNUMright)) + in ( LrTable.NT 6, ( result, DECNUM1left, DECNUM1right), rest671) +end +| ( 85, ( ( _, ( MlyValue.REALNUM REALNUM, (REALNUMleft as +REALNUM1left), (REALNUMright as REALNUM1right))) :: rest671)) => let + val result = MlyValue.expr ( +markExpression srcMap (RealConst REALNUM, REALNUMleft,REALNUMright)) + in ( LrTable.NT 6, ( result, REALNUM1left, REALNUM1right), rest671) + +end +| ( 86, ( ( _, ( MlyValue.CCONST CCONST, (CCONSTleft as CCONST1left), + (CCONSTright as CCONST1right))) :: rest671)) => let val result = +MlyValue.expr ( +markExpression srcMap (IntConst CCONST, CCONSTleft,CCONSTright)) + in ( LrTable.NT 6, ( result, CCONST1left, CCONST1right), rest671) +end +| ( 87, ( ( _, ( MlyValue.ID ID, (IDleft as ID1left), (IDright as +ID1right))) :: rest671)) => let val result = MlyValue.expr ( +markExpression srcMap (Id(ID), IDleft, IDright)) + in ( LrTable.NT 6, ( result, ID1left, ID1right), rest671) +end +| ( 88, ( ( _, ( MlyValue.strings strings, (stringsleft as +strings1left), (stringsright as strings1right))) :: rest671)) => let + val result = MlyValue.expr ( +markExpression srcMap (String(strings),stringsleft,stringsright)) + in ( LrTable.NT 6, ( result, strings1left, strings1right), rest671) + +end +| ( 89, ( ( _, ( MlyValue.STRING STRING, STRING1left, STRING1right)) + :: rest671)) => let val result = MlyValue.strings (STRING) + in ( LrTable.NT 63, ( result, STRING1left, STRING1right), rest671) + +end +| ( 90, ( ( _, ( MlyValue.strings strings, _, strings1right)) :: ( _, + ( MlyValue.STRING STRING, STRING1left, _)) :: rest671)) => let val +result = MlyValue.strings (STRING ^ strings) + in ( LrTable.NT 63, ( result, STRING1left, strings1right), rest671) + +end +| ( 91, ( ( _, ( MlyValue.expr expr, expr1left, expr1right)) :: +rest671)) => let val result = MlyValue.exprWComma (expr) + in ( LrTable.NT 8, ( result, expr1left, expr1right), rest671) +end +| ( 92, ( ( _, ( MlyValue.expr expr, _, (exprright as expr1right))) + :: _ :: ( _, ( MlyValue.exprWComma exprWComma, (exprWCommaleft as +exprWComma1left), _)) :: rest671)) => let val result = +MlyValue.exprWComma ( +markExpression srcMap (Binop(Comma,exprWComma,expr),exprWCommaleft,exprright) +) + in ( LrTable.NT 8, ( result, exprWComma1left, expr1right), rest671) + +end +| ( 93, ( rest671)) => let val result = MlyValue.opExpr (EmptyExpr) + in ( LrTable.NT 7, ( result, defaultPos, defaultPos), rest671) +end +| ( 94, ( ( _, ( MlyValue.exprWComma exprWComma, exprWComma1left, +exprWComma1right)) :: rest671)) => let val result = MlyValue.opExpr ( +exprWComma) + in ( LrTable.NT 7, ( result, exprWComma1left, exprWComma1right), +rest671) +end +| ( 95, ( ( _, ( MlyValue.expr expr, expr1left, expr1right)) :: +rest671)) => let val result = MlyValue.argumentExprList ( +singletonSeq expr) + in ( LrTable.NT 10, ( result, expr1left, expr1right), rest671) +end +| ( 96, ( ( _, ( MlyValue.expr expr, _, expr1right)) :: _ :: ( _, ( +MlyValue.argumentExprList argumentExprList, argumentExprList1left, _)) + :: rest671)) => let val result = MlyValue.argumentExprList ( +addToSeq(expr, argumentExprList)) + in ( LrTable.NT 10, ( result, argumentExprList1left, expr1right), +rest671) +end +| ( 97, ( ( _, ( MlyValue.specifierQualifierList +specifierQualifierList, specifierQualifierList1left, +specifierQualifierList1right)) :: rest671)) => let val result = +MlyValue.typeName (specifierQualifierList) + in ( LrTable.NT 36, ( result, specifierQualifierList1left, +specifierQualifierList1right), rest671) +end +| ( 98, ( ( _, ( MlyValue.abstractDeclarator abstractDeclarator, _, +abstractDeclarator1right)) :: ( _, ( MlyValue.specifierQualifierList +specifierQualifierList, specifierQualifierList1left, _)) :: rest671)) + => let val result = MlyValue.typeName ( +#1 (ctypeDecrToTypeName (specifierQualifierList, abstractDeclarator))) + in ( LrTable.NT 36, ( result, specifierQualifierList1left, +abstractDeclarator1right), rest671) +end +| ( 99, ( ( _, ( MlyValue.reservedDeclarationSpecifier +reservedDeclarationSpecifier, _, reservedDeclarationSpecifier1right)) + :: ( _, ( MlyValue.typeSpecifier typeSpecifier, typeSpecifier1left, _ +)) :: rest671)) => let val result = MlyValue.declarationSpecifiers ( +addSpecifier (typeSpecifier, + reservedDeclarationSpecifier) +) + in ( LrTable.NT 28, ( result, typeSpecifier1left, +reservedDeclarationSpecifier1right), rest671) +end +| ( 100, ( ( _, ( MlyValue.reservedDeclarationSpecifier +reservedDeclarationSpecifier, _, reservedDeclarationSpecifier1right)) + :: ( _, ( MlyValue.typeSpecifier typeSpecifier, _, _)) :: ( _, ( +MlyValue.declarationModifiers declarationModifiers, +declarationModifiers1left, _)) :: rest671)) => let val result = +MlyValue.declarationSpecifiers ( +addAll (declarationModifiers, + addSpecifier (typeSpecifier, + reservedDeclarationSpecifier)) +) + in ( LrTable.NT 28, ( result, declarationModifiers1left, +reservedDeclarationSpecifier1right), rest671) +end +| ( 101, ( rest671)) => let val result = +MlyValue.reservedDeclarationSpecifier (unknown) + in ( LrTable.NT 30, ( result, defaultPos, defaultPos), rest671) +end +| ( 102, ( ( _, ( MlyValue.specifierQualifierReserved +specifierQualifierReserved, _, specifierQualifierReserved1right)) :: ( + _, ( MlyValue.reservedDeclarationSpecifier +reservedDeclarationSpecifier, reservedDeclarationSpecifier1left, _)) + :: rest671)) => let val result = +MlyValue.reservedDeclarationSpecifier ( +let val {qualifiers,specifiers} = specifierQualifierReserved + val decltype = {qualifiers=qualifiers,specifiers=specifiers,storage=[]} + in addAll (decltype, reservedDeclarationSpecifier) end +) + in ( LrTable.NT 30, ( result, reservedDeclarationSpecifier1left, +specifierQualifierReserved1right), rest671) +end +| ( 103, ( ( _, ( MlyValue.storageClassSpecifier +storageClassSpecifier, _, storageClassSpecifier1right)) :: ( _, ( +MlyValue.reservedDeclarationSpecifier reservedDeclarationSpecifier, +reservedDeclarationSpecifier1left, _)) :: rest671)) => let val result + = MlyValue.reservedDeclarationSpecifier ( +addStorage (storageClassSpecifier, + reservedDeclarationSpecifier) +) + in ( LrTable.NT 30, ( result, reservedDeclarationSpecifier1left, +storageClassSpecifier1right), rest671) +end +| ( 104, ( ( _, ( MlyValue.typeSpecifierReserved +typeSpecifierReserved, typeSpecifierReserved1left, +typeSpecifierReserved1right)) :: rest671)) => let val result = +MlyValue.specifierQualifierReserved ( +{qualifiers=[],specifiers=[typeSpecifierReserved]}) + in ( LrTable.NT 31, ( result, typeSpecifierReserved1left, +typeSpecifierReserved1right), rest671) +end +| ( 105, ( ( _, ( MlyValue.typeQualifier typeQualifier, +typeQualifier1left, typeQualifier1right)) :: rest671)) => let val +result = MlyValue.specifierQualifierReserved ( +{qualifiers=[typeQualifier],specifiers=[]}) + in ( LrTable.NT 31, ( result, typeQualifier1left, typeQualifier1right +), rest671) +end +| ( 106, ( ( _, ( MlyValue.structOrUnionSpecifier +structOrUnionSpecifier, structOrUnionSpecifier1left, +structOrUnionSpecifier1right)) :: rest671)) => let val result = +MlyValue.specifierQualifierReserved ( +{qualifiers=[],specifiers=[structOrUnionSpecifier]}) + in ( LrTable.NT 31, ( result, structOrUnionSpecifier1left, +structOrUnionSpecifier1right), rest671) +end +| ( 107, ( ( _, ( MlyValue.enumSpecifier enumSpecifier, +enumSpecifier1left, enumSpecifier1right)) :: rest671)) => let val +result = MlyValue.specifierQualifierReserved ( +{qualifiers=[],specifiers=[enumSpecifier]}) + in ( LrTable.NT 31, ( result, enumSpecifier1left, enumSpecifier1right +), rest671) +end +| ( 108, ( ( _, ( MlyValue.storageClassSpecifier +storageClassSpecifier, storageClassSpecifier1left, +storageClassSpecifier1right)) :: rest671)) => let val result = +MlyValue.declarationModifiers ( +{storage = [storageClassSpecifier], + qualifiers = [], + specifiers = []} +) + in ( LrTable.NT 29, ( result, storageClassSpecifier1left, +storageClassSpecifier1right), rest671) +end +| ( 109, ( ( _, ( MlyValue.storageClassSpecifier +storageClassSpecifier, _, storageClassSpecifier1right)) :: ( _, ( +MlyValue.declarationModifiers declarationModifiers, +declarationModifiers1left, _)) :: rest671)) => let val result = +MlyValue.declarationModifiers ( +addStorage(storageClassSpecifier,declarationModifiers)) + in ( LrTable.NT 29, ( result, declarationModifiers1left, +storageClassSpecifier1right), rest671) +end +| ( 110, ( ( _, ( MlyValue.typeQualifier typeQualifier, +typeQualifier1left, typeQualifier1right)) :: rest671)) => let val +result = MlyValue.declarationModifiers ( +{specifiers = [], + storage = [], + qualifiers = [typeQualifier]} +) + in ( LrTable.NT 29, ( result, typeQualifier1left, typeQualifier1right +), rest671) +end +| ( 111, ( ( _, ( MlyValue.typeQualifier typeQualifier, _, +typeQualifier1right)) :: ( _, ( MlyValue.declarationModifiers +declarationModifiers, declarationModifiers1left, _)) :: rest671)) => + let val result = MlyValue.declarationModifiers ( +addQualifier(typeQualifier, declarationModifiers)) + in ( LrTable.NT 29, ( result, declarationModifiers1left, +typeQualifier1right), rest671) +end +| ( 112, ( ( _, ( MlyValue.reservedSpecifierQualifiers +reservedSpecifierQualifiers, _, reservedSpecifierQualifiers1right)) :: + ( _, ( MlyValue.typeSpecifier typeSpecifier, typeSpecifier1left, _)) + :: rest671)) => let val result = MlyValue.specifierQualifierList ( +let val {specifiers, qualifiers} = reservedSpecifierQualifiers + in {specifiers=typeSpecifier::specifiers,qualifiers=qualifiers} end +) + in ( LrTable.NT 41, ( result, typeSpecifier1left, +reservedSpecifierQualifiers1right), rest671) +end +| ( 113, ( ( _, ( MlyValue.reservedSpecifierQualifiers +reservedSpecifierQualifiers, _, reservedSpecifierQualifiers1right)) :: + ( _, ( MlyValue.typeSpecifier typeSpecifier, _, _)) :: ( _, ( +MlyValue.typeQualifierList typeQualifierList, typeQualifierList1left, + _)) :: rest671)) => let val result = MlyValue.specifierQualifierList + ( +let val {specifiers, qualifiers} = reservedSpecifierQualifiers + in {specifiers=typeSpecifier::specifiers + ,qualifiers=typeQualifierList@qualifiers + } + end +) + in ( LrTable.NT 41, ( result, typeQualifierList1left, +reservedSpecifierQualifiers1right), rest671) +end +| ( 114, ( rest671)) => let val result = +MlyValue.reservedSpecifierQualifiers ({qualifiers=[],specifiers=[]}) + in ( LrTable.NT 32, ( result, defaultPos, defaultPos), rest671) +end +| ( 115, ( ( _, ( MlyValue.specifierQualifierReserved +specifierQualifierReserved, _, specifierQualifierReserved1right)) :: ( + _, ( MlyValue.reservedSpecifierQualifiers reservedSpecifierQualifiers +, reservedSpecifierQualifiers1left, _)) :: rest671)) => let val +result = MlyValue.reservedSpecifierQualifiers ( +let val {specifiers=s1, qualifiers=q1} = reservedSpecifierQualifiers + val {specifiers=s2, qualifiers=q2} = specifierQualifierReserved + in {specifiers=s1@s2, qualifiers=q1@q2} end +) + in ( LrTable.NT 32, ( result, reservedSpecifierQualifiers1left, +specifierQualifierReserved1right), rest671) +end +| ( 116, ( ( _, ( MlyValue.typeQualifier typeQualifier, +typeQualifier1left, typeQualifier1right)) :: rest671)) => let val +result = MlyValue.typeQualifierList ([typeQualifier]) + in ( LrTable.NT 40, ( result, typeQualifier1left, typeQualifier1right +), rest671) +end +| ( 117, ( ( _, ( MlyValue.typeQualifierList typeQualifierList, _, +typeQualifierList1right)) :: ( _, ( MlyValue.typeQualifier +typeQualifier, typeQualifier1left, _)) :: rest671)) => let val result + = MlyValue.typeQualifierList (typeQualifier::typeQualifierList) + in ( LrTable.NT 40, ( result, typeQualifier1left, +typeQualifierList1right), rest671) +end +| ( 118, ( ( _, ( MlyValue.typeSpecifierReserved +typeSpecifierReserved, typeSpecifierReserved1left, +typeSpecifierReserved1right)) :: rest671)) => let val result = +MlyValue.typeSpecifier (typeSpecifierReserved) + in ( LrTable.NT 37, ( result, typeSpecifierReserved1left, +typeSpecifierReserved1right), rest671) +end +| ( 119, ( ( _, ( MlyValue.structOrUnionSpecifier +structOrUnionSpecifier, structOrUnionSpecifier1left, +structOrUnionSpecifier1right)) :: rest671)) => let val result = +MlyValue.typeSpecifier (structOrUnionSpecifier) + in ( LrTable.NT 37, ( result, structOrUnionSpecifier1left, +structOrUnionSpecifier1right), rest671) +end +| ( 120, ( ( _, ( MlyValue.enumSpecifier enumSpecifier, +enumSpecifier1left, enumSpecifier1right)) :: rest671)) => let val +result = MlyValue.typeSpecifier (enumSpecifier) + in ( LrTable.NT 37, ( result, enumSpecifier1left, enumSpecifier1right +), rest671) +end +| ( 121, ( ( _, ( MlyValue.TYPE_NAME TYPE_NAME, TYPE_NAME1left, +TYPE_NAME1right)) :: rest671)) => let val result = +MlyValue.typeSpecifier (TypedefName TYPE_NAME) + in ( LrTable.NT 37, ( result, TYPE_NAME1left, TYPE_NAME1right), +rest671) +end +| ( 122, ( ( _, ( _, VOID1left, VOID1right)) :: rest671)) => let val + result = MlyValue.typeSpecifierReserved (Void) + in ( LrTable.NT 38, ( result, VOID1left, VOID1right), rest671) +end +| ( 123, ( ( _, ( _, CHAR1left, CHAR1right)) :: rest671)) => let val + result = MlyValue.typeSpecifierReserved (Char) + in ( LrTable.NT 38, ( result, CHAR1left, CHAR1right), rest671) +end +| ( 124, ( ( _, ( _, SHORT1left, SHORT1right)) :: rest671)) => let + val result = MlyValue.typeSpecifierReserved (Short) + in ( LrTable.NT 38, ( result, SHORT1left, SHORT1right), rest671) +end +| ( 125, ( ( _, ( _, INT1left, INT1right)) :: rest671)) => let val +result = MlyValue.typeSpecifierReserved (Int) + in ( LrTable.NT 38, ( result, INT1left, INT1right), rest671) +end +| ( 126, ( ( _, ( _, LONG1left, LONG1right)) :: rest671)) => let val + result = MlyValue.typeSpecifierReserved (Long) + in ( LrTable.NT 38, ( result, LONG1left, LONG1right), rest671) +end +| ( 127, ( ( _, ( _, FLOAT1left, FLOAT1right)) :: rest671)) => let + val result = MlyValue.typeSpecifierReserved (Float) + in ( LrTable.NT 38, ( result, FLOAT1left, FLOAT1right), rest671) +end +| ( 128, ( ( _, ( _, DOUBLE1left, DOUBLE1right)) :: rest671)) => let + val result = MlyValue.typeSpecifierReserved (Double) + in ( LrTable.NT 38, ( result, DOUBLE1left, DOUBLE1right), rest671) + +end +| ( 129, ( ( _, ( _, SIGNED1left, SIGNED1right)) :: rest671)) => let + val result = MlyValue.typeSpecifierReserved (Signed) + in ( LrTable.NT 38, ( result, SIGNED1left, SIGNED1right), rest671) + +end +| ( 130, ( ( _, ( _, UNSIGNED1left, UNSIGNED1right)) :: rest671)) => + let val result = MlyValue.typeSpecifierReserved (Unsigned) + in ( LrTable.NT 38, ( result, UNSIGNED1left, UNSIGNED1right), rest671 +) +end +| ( 131, ( ( _, ( _, _, RCURLY1right)) :: ( _, ( +MlyValue.structDeclarationList structDeclarationList, _, _)) :: _ :: ( + _, ( MlyValue.structOrUnion structOrUnion, structOrUnion1left, _)) :: + rest671)) => let val result = MlyValue.structOrUnionSpecifier ( +Struct{isStruct=structOrUnion, tagOpt=NONE, members=seqToList structDeclarationList} +) + in ( LrTable.NT 43, ( result, structOrUnion1left, RCURLY1right), +rest671) +end +| ( 132, ( ( _, ( _, _, RCURLY1right)) :: ( _, ( +MlyValue.structDeclarationList structDeclarationList, _, _)) :: _ :: ( + _, ( MlyValue.ID ID, _, _)) :: ( _, ( MlyValue.structOrUnion +structOrUnion, structOrUnion1left, _)) :: rest671)) => let val result + = MlyValue.structOrUnionSpecifier ( +Struct{isStruct=structOrUnion, tagOpt=SOME ID, members=seqToList structDeclarationList} +) + in ( LrTable.NT 43, ( result, structOrUnion1left, RCURLY1right), +rest671) +end +| ( 133, ( ( _, ( _, _, RCURLY1right)) :: ( _, ( +MlyValue.structDeclarationList structDeclarationList, _, _)) :: _ :: ( + _, ( MlyValue.TYPE_NAME TYPE_NAME, _, _)) :: ( _, ( +MlyValue.structOrUnion structOrUnion, structOrUnion1left, _)) :: +rest671)) => let val result = MlyValue.structOrUnionSpecifier ( +Struct{isStruct=structOrUnion, tagOpt=SOME TYPE_NAME, members=seqToList structDeclarationList} +) + in ( LrTable.NT 43, ( result, structOrUnion1left, RCURLY1right), +rest671) +end +| ( 134, ( ( _, ( MlyValue.ID ID, _, ID1right)) :: ( _, ( +MlyValue.structOrUnion structOrUnion, structOrUnion1left, _)) :: +rest671)) => let val result = MlyValue.structOrUnionSpecifier ( +StructTag {isStruct=structOrUnion, name=ID}) + in ( LrTable.NT 43, ( result, structOrUnion1left, ID1right), rest671) + +end +| ( 135, ( ( _, ( MlyValue.TYPE_NAME TYPE_NAME, _, TYPE_NAME1right)) + :: ( _, ( MlyValue.structOrUnion structOrUnion, structOrUnion1left, _ +)) :: rest671)) => let val result = MlyValue.structOrUnionSpecifier ( +StructTag {isStruct=structOrUnion, name=TYPE_NAME}) + in ( LrTable.NT 43, ( result, structOrUnion1left, TYPE_NAME1right), +rest671) +end +| ( 136, ( ( _, ( _, STRUCT1left, STRUCT1right)) :: rest671)) => let + val result = MlyValue.structOrUnion (true) + in ( LrTable.NT 54, ( result, STRUCT1left, STRUCT1right), rest671) + +end +| ( 137, ( ( _, ( _, UNION1left, UNION1right)) :: rest671)) => let + val result = MlyValue.structOrUnion (false) + in ( LrTable.NT 54, ( result, UNION1left, UNION1right), rest671) +end +| ( 138, ( ( _, ( MlyValue.structDeclaration structDeclaration, +structDeclaration1left, structDeclaration1right)) :: rest671)) => let + val result = MlyValue.structDeclarationList ( +singletonSeq structDeclaration) + in ( LrTable.NT 59, ( result, structDeclaration1left, +structDeclaration1right), rest671) +end +| ( 139, ( ( _, ( MlyValue.structDeclaration structDeclaration, _, +structDeclaration1right)) :: ( _, ( MlyValue.structDeclarationList +structDeclarationList, structDeclarationList1left, _)) :: rest671)) => + let val result = MlyValue.structDeclarationList ( +addToSeq(structDeclaration, structDeclarationList)) + in ( LrTable.NT 59, ( result, structDeclarationList1left, +structDeclaration1right), rest671) +end +| ( 140, ( ( _, ( _, _, SEMICOLON1right)) :: ( _, ( +MlyValue.structDeclaratorList structDeclaratorList, _, _)) :: ( _, ( +MlyValue.specifierQualifierList specifierQualifierList, +specifierQualifierList1left, _)) :: rest671)) => let val result = +MlyValue.structDeclaration ( +(specifierQualifierList, seqToList structDeclaratorList)) + in ( LrTable.NT 60, ( result, specifierQualifierList1left, +SEMICOLON1right), rest671) +end +| ( 141, ( ( _, ( _, _, SEMICOLON1right)) :: ( _, ( +MlyValue.notypeStructDeclaratorList notypeStructDeclaratorList, _, _)) + :: ( _, ( MlyValue.typeQualifierList typeQualifierList, +typeQualifierList1left, _)) :: rest671)) => let val result = +MlyValue.structDeclaration ( +let + val ct = {qualifiers=typeQualifierList, specifiers=[]} + in + (ct, seqToList notypeStructDeclaratorList) + end +) + in ( LrTable.NT 60, ( result, typeQualifierList1left, SEMICOLON1right +), rest671) +end +| ( 142, ( ( _, ( MlyValue.structDeclarator structDeclarator, +structDeclarator1left, structDeclarator1right)) :: rest671)) => let + val result = MlyValue.structDeclaratorList ( +singletonSeq structDeclarator) + in ( LrTable.NT 57, ( result, structDeclarator1left, +structDeclarator1right), rest671) +end +| ( 143, ( ( _, ( MlyValue.structDeclarator structDeclarator, _, +structDeclarator1right)) :: _ :: ( _, ( MlyValue.structDeclaratorList +structDeclaratorList, structDeclaratorList1left, _)) :: rest671)) => + let val result = MlyValue.structDeclaratorList ( +addToSeq(structDeclarator, structDeclaratorList)) + in ( LrTable.NT 57, ( result, structDeclaratorList1left, +structDeclarator1right), rest671) +end +| ( 144, ( ( _, ( MlyValue.notypeStructDeclarator +notypeStructDeclarator, notypeStructDeclarator1left, +notypeStructDeclarator1right)) :: rest671)) => let val result = +MlyValue.notypeStructDeclaratorList ( +singletonSeq notypeStructDeclarator) + in ( LrTable.NT 58, ( result, notypeStructDeclarator1left, +notypeStructDeclarator1right), rest671) +end +| ( 145, ( ( _, ( MlyValue.structDeclarator structDeclarator, _, +structDeclarator1right)) :: _ :: ( _, ( +MlyValue.notypeStructDeclaratorList notypeStructDeclaratorList, +notypeStructDeclaratorList1left, _)) :: rest671)) => let val result = + MlyValue.notypeStructDeclaratorList ( +addToSeq(structDeclarator, notypeStructDeclaratorList)) + in ( LrTable.NT 58, ( result, notypeStructDeclaratorList1left, +structDeclarator1right), rest671) +end +| ( 146, ( ( _, ( MlyValue.declarator declarator, declarator1left, +declarator1right)) :: rest671)) => let val result = +MlyValue.structDeclarator (declarator, EmptyExpr) + in ( LrTable.NT 55, ( result, declarator1left, declarator1right), +rest671) +end +| ( 147, ( ( _, ( MlyValue.expr expr, _, expr1right)) :: ( _, ( _, +COLON1left, _)) :: rest671)) => let val result = +MlyValue.structDeclarator (EmptyDecr, expr) + in ( LrTable.NT 55, ( result, COLON1left, expr1right), rest671) +end +| ( 148, ( ( _, ( MlyValue.expr expr, _, expr1right)) :: _ :: ( _, ( +MlyValue.declarator declarator, declarator1left, _)) :: rest671)) => + let val result = MlyValue.structDeclarator (declarator, expr) + in ( LrTable.NT 55, ( result, declarator1left, expr1right), rest671) + +end +| ( 149, ( ( _, ( MlyValue.notypeDeclarator notypeDeclarator, +notypeDeclarator1left, notypeDeclarator1right)) :: rest671)) => let + val result = MlyValue.notypeStructDeclarator ( +notypeDeclarator, EmptyExpr) + in ( LrTable.NT 56, ( result, notypeDeclarator1left, +notypeDeclarator1right), rest671) +end +| ( 150, ( ( _, ( MlyValue.expr expr, _, expr1right)) :: ( _, ( _, +COLON1left, _)) :: rest671)) => let val result = +MlyValue.notypeStructDeclarator (EmptyDecr, expr) + in ( LrTable.NT 56, ( result, COLON1left, expr1right), rest671) +end +| ( 151, ( ( _, ( MlyValue.expr expr, _, expr1right)) :: _ :: ( _, ( +MlyValue.notypeDeclarator notypeDeclarator, notypeDeclarator1left, _)) + :: rest671)) => let val result = MlyValue.notypeStructDeclarator ( +notypeDeclarator, expr) + in ( LrTable.NT 56, ( result, notypeDeclarator1left, expr1right), +rest671) +end +| ( 152, ( ( _, ( _, CONST1left, CONST1right)) :: rest671)) => let + val result = MlyValue.typeQualifier (CONST) + in ( LrTable.NT 39, ( result, CONST1left, CONST1right), rest671) +end +| ( 153, ( ( _, ( _, VOLATILE1left, VOLATILE1right)) :: rest671)) => + let val result = MlyValue.typeQualifier (VOLATILE) + in ( LrTable.NT 39, ( result, VOLATILE1left, VOLATILE1right), rest671 +) +end +| ( 154, ( ( _, ( _, _, RCURLY1right)) :: ( _, ( +MlyValue.trailingComma trailingComma, _, _)) :: ( _, ( +MlyValue.enumeratorList enumeratorList, _, _)) :: _ :: ( _, ( _, +ENUM1left, _)) :: rest671)) => let val result = +MlyValue.enumSpecifier ( +Enum{tagOpt=NONE, enumerators=seqToList enumeratorList, trailingComma=trailingComma} +) + in ( LrTable.NT 42, ( result, ENUM1left, RCURLY1right), rest671) +end +| ( 155, ( ( _, ( _, _, RCURLY1right)) :: ( _, ( +MlyValue.trailingComma trailingComma, _, _)) :: ( _, ( +MlyValue.enumeratorList enumeratorList, _, _)) :: _ :: ( _, ( +MlyValue.ID ID, _, _)) :: ( _, ( _, ENUM1left, _)) :: rest671)) => let + val result = MlyValue.enumSpecifier ( +Enum{tagOpt=SOME(ID), enumerators=seqToList enumeratorList, trailingComma=trailingComma} +) + in ( LrTable.NT 42, ( result, ENUM1left, RCURLY1right), rest671) +end +| ( 156, ( ( _, ( _, _, RCURLY1right)) :: ( _, ( +MlyValue.trailingComma trailingComma, _, _)) :: ( _, ( +MlyValue.enumeratorList enumeratorList, _, _)) :: _ :: ( _, ( +MlyValue.TYPE_NAME TYPE_NAME, _, _)) :: ( _, ( _, ENUM1left, _)) :: +rest671)) => let val result = MlyValue.enumSpecifier ( +Enum{tagOpt=SOME(TYPE_NAME), enumerators=seqToList enumeratorList, trailingComma=trailingComma} +) + in ( LrTable.NT 42, ( result, ENUM1left, RCURLY1right), rest671) +end +| ( 157, ( ( _, ( MlyValue.ID ID, _, ID1right)) :: ( _, ( _, +ENUM1left, _)) :: rest671)) => let val result = +MlyValue.enumSpecifier (EnumTag(ID)) + in ( LrTable.NT 42, ( result, ENUM1left, ID1right), rest671) +end +| ( 158, ( ( _, ( MlyValue.TYPE_NAME TYPE_NAME, _, TYPE_NAME1right)) + :: ( _, ( _, ENUM1left, _)) :: rest671)) => let val result = +MlyValue.enumSpecifier (EnumTag(TYPE_NAME)) + in ( LrTable.NT 42, ( result, ENUM1left, TYPE_NAME1right), rest671) + +end +| ( 159, ( ( _, ( MlyValue.enumerator enumerator, _, enumerator1right +)) :: _ :: ( _, ( MlyValue.enumeratorList enumeratorList, +enumeratorList1left, _)) :: rest671)) => let val result = +MlyValue.enumeratorList (addToSeq(enumerator, enumeratorList)) + in ( LrTable.NT 12, ( result, enumeratorList1left, enumerator1right), + rest671) +end +| ( 160, ( ( _, ( MlyValue.enumerator enumerator, enumerator1left, +enumerator1right)) :: rest671)) => let val result = +MlyValue.enumeratorList ( +(TypeDefs.addNoTdef(#1(enumerator))); + singletonSeq enumerator) + in ( LrTable.NT 12, ( result, enumerator1left, enumerator1right), +rest671) +end +| ( 161, ( ( _, ( MlyValue.ID ID, ID1left, ID1right)) :: rest671)) => + let val result = MlyValue.enumerator ((ID,ParseTree.EmptyExpr)) + in ( LrTable.NT 13, ( result, ID1left, ID1right), rest671) +end +| ( 162, ( ( _, ( MlyValue.expr expr, _, expr1right)) :: _ :: ( _, ( +MlyValue.ID ID, ID1left, _)) :: rest671)) => let val result = +MlyValue.enumerator (ID,expr) + in ( LrTable.NT 13, ( result, ID1left, expr1right), rest671) +end +| ( 163, ( ( _, ( _, EXTERN1left, EXTERN1right)) :: rest671)) => let + val result = MlyValue.storageClassSpecifier (EXTERN) + in ( LrTable.NT 35, ( result, EXTERN1left, EXTERN1right), rest671) + +end +| ( 164, ( ( _, ( _, STATIC1left, STATIC1right)) :: rest671)) => let + val result = MlyValue.storageClassSpecifier (STATIC) + in ( LrTable.NT 35, ( result, STATIC1left, STATIC1right), rest671) + +end +| ( 165, ( ( _, ( _, AUTO1left, AUTO1right)) :: rest671)) => let val + result = MlyValue.storageClassSpecifier (AUTO) + in ( LrTable.NT 35, ( result, AUTO1left, AUTO1right), rest671) +end +| ( 166, ( ( _, ( _, REGISTER1left, REGISTER1right)) :: rest671)) => + let val result = MlyValue.storageClassSpecifier (REGISTER) + in ( LrTable.NT 35, ( result, REGISTER1left, REGISTER1right), rest671 +) +end +| ( 167, ( ( _, ( _, TYPEDEF1left, TYPEDEF1right)) :: rest671)) => + let val result = MlyValue.storageClassSpecifier (TYPEDEF) + in ( LrTable.NT 35, ( result, TYPEDEF1left, TYPEDEF1right), rest671) + +end +| ( 168, ( rest671)) => let val result = MlyValue.trailingComma ( +false) + in ( LrTable.NT 11, ( result, defaultPos, defaultPos), rest671) +end +| ( 169, ( ( _, ( _, COMMA1left, COMMA1right)) :: rest671)) => let + val result = MlyValue.trailingComma (true) + in ( LrTable.NT 11, ( result, COMMA1left, COMMA1right), rest671) +end +| ( 170, ( ( _, ( MlyValue.initDeclarator initDeclarator, +initDeclarator1left, initDeclarator1right)) :: rest671)) => let val +result = MlyValue.initDeclaratorList (singletonSeq initDeclarator) + in ( LrTable.NT 18, ( result, initDeclarator1left, +initDeclarator1right), rest671) +end +| ( 171, ( ( _, ( MlyValue.initDeclarator initDeclarator, _, +initDeclarator1right)) :: _ :: ( _, ( MlyValue.initDeclaratorList +initDeclaratorList, initDeclaratorList1left, _)) :: rest671)) => let + val result = MlyValue.initDeclaratorList ( +addToSeq(initDeclarator, initDeclaratorList)) + in ( LrTable.NT 18, ( result, initDeclaratorList1left, +initDeclarator1right), rest671) +end +| ( 172, ( ( _, ( MlyValue.declarator declarator, declarator1left, +declarator1right)) :: rest671)) => let val result = +MlyValue.initDeclarator ((declarator,EmptyExpr)) + in ( LrTable.NT 16, ( result, declarator1left, declarator1right), +rest671) +end +| ( 173, ( ( _, ( MlyValue.initializer initializer, _, +initializer1right)) :: _ :: ( _, ( MlyValue.declarator declarator, +declarator1left, _)) :: rest671)) => let val result = +MlyValue.initDeclarator (declarator,initializer) + in ( LrTable.NT 16, ( result, declarator1left, initializer1right), +rest671) +end +| ( 174, ( ( _, ( MlyValue.notypeInitDeclarator notypeInitDeclarator, + notypeInitDeclarator1left, notypeInitDeclarator1right)) :: rest671)) + => let val result = MlyValue.notypeInitDeclaratorList ( +singletonSeq notypeInitDeclarator) + in ( LrTable.NT 19, ( result, notypeInitDeclarator1left, +notypeInitDeclarator1right), rest671) +end +| ( 175, ( ( _, ( MlyValue.initDeclarator initDeclarator, _, +initDeclarator1right)) :: _ :: ( _, ( +MlyValue.notypeInitDeclaratorList notypeInitDeclaratorList, +notypeInitDeclaratorList1left, _)) :: rest671)) => let val result = +MlyValue.notypeInitDeclaratorList ( +addToSeq(initDeclarator, notypeInitDeclaratorList)) + in ( LrTable.NT 19, ( result, notypeInitDeclaratorList1left, +initDeclarator1right), rest671) +end +| ( 176, ( ( _, ( MlyValue.notypeDeclarator notypeDeclarator, +notypeDeclarator1left, notypeDeclarator1right)) :: rest671)) => let + val result = MlyValue.notypeInitDeclarator ( +(notypeDeclarator,EmptyExpr)) + in ( LrTable.NT 17, ( result, notypeDeclarator1left, +notypeDeclarator1right), rest671) +end +| ( 177, ( ( _, ( MlyValue.initializer initializer, _, +initializer1right)) :: _ :: ( _, ( MlyValue.notypeDeclarator +notypeDeclarator, notypeDeclarator1left, _)) :: rest671)) => let val +result = MlyValue.notypeInitDeclarator (notypeDeclarator,initializer) + in ( LrTable.NT 17, ( result, notypeDeclarator1left, +initializer1right), rest671) +end +| ( 178, ( ( _, ( MlyValue.aftertypeDeclarator aftertypeDeclarator, +aftertypeDeclarator1left, aftertypeDeclarator1right)) :: rest671)) => + let val result = MlyValue.declarator (aftertypeDeclarator) + in ( LrTable.NT 21, ( result, aftertypeDeclarator1left, +aftertypeDeclarator1right), rest671) +end +| ( 179, ( ( _, ( MlyValue.notypeDeclarator notypeDeclarator, +notypeDeclarator1left, notypeDeclarator1right)) :: rest671)) => let + val result = MlyValue.declarator (notypeDeclarator) + in ( LrTable.NT 21, ( result, notypeDeclarator1left, +notypeDeclarator1right), rest671) +end +| ( 180, ( ( _, ( MlyValue.aftertypeDirectDeclarator +aftertypeDirectDeclarator, aftertypeDirectDeclarator1left, +aftertypeDirectDeclarator1right)) :: rest671)) => let val result = +MlyValue.aftertypeDeclarator (aftertypeDirectDeclarator) + in ( LrTable.NT 22, ( result, aftertypeDirectDeclarator1left, +aftertypeDirectDeclarator1right), rest671) +end +| ( 181, ( ( _, ( MlyValue.aftertypeDirectDeclarator +aftertypeDirectDeclarator, _, aftertypeDirectDeclarator1right)) :: ( _ +, ( MlyValue.pointer pointer, pointer1left, _)) :: rest671)) => let + val result = MlyValue.aftertypeDeclarator ( +applyPointer(pointer,aftertypeDirectDeclarator)) + in ( LrTable.NT 22, ( result, pointer1left, +aftertypeDirectDeclarator1right), rest671) +end +| ( 182, ( ( _, ( MlyValue.notypeDirectDeclarator +notypeDirectDeclarator, notypeDirectDeclarator1left, +notypeDirectDeclarator1right)) :: rest671)) => let val result = +MlyValue.notypeDeclarator (notypeDirectDeclarator) + in ( LrTable.NT 23, ( result, notypeDirectDeclarator1left, +notypeDirectDeclarator1right), rest671) +end +| ( 183, ( ( _, ( MlyValue.notypeDirectDeclarator +notypeDirectDeclarator, _, notypeDirectDeclarator1right)) :: ( _, ( +MlyValue.pointer pointer, pointer1left, _)) :: rest671)) => let val +result = MlyValue.notypeDeclarator ( +applyPointer(pointer, notypeDirectDeclarator)) + in ( LrTable.NT 23, ( result, pointer1left, +notypeDirectDeclarator1right), rest671) +end +| ( 184, ( ( _, ( MlyValue.parmDirectDeclarator parmDirectDeclarator, + parmDirectDeclarator1left, parmDirectDeclarator1right)) :: rest671)) + => let val result = MlyValue.parmDeclarator (parmDirectDeclarator) + in ( LrTable.NT 24, ( result, parmDirectDeclarator1left, +parmDirectDeclarator1right), rest671) +end +| ( 185, ( ( _, ( MlyValue.parmDirectDeclarator parmDirectDeclarator, + _, parmDirectDeclarator1right)) :: ( _, ( MlyValue.pointer pointer, +pointer1left, _)) :: rest671)) => let val result = +MlyValue.parmDeclarator (applyPointer(pointer, parmDirectDeclarator)) + in ( LrTable.NT 24, ( result, pointer1left, +parmDirectDeclarator1right), rest671) +end +| ( 186, ( ( _, ( _, TIMES1left, TIMES1right)) :: rest671)) => let + val result = MlyValue.pointer (PointerDecr(EmptyDecr)) + in ( LrTable.NT 20, ( result, TIMES1left, TIMES1right), rest671) +end +| ( 187, ( ( _, ( MlyValue.typeQualifierList typeQualifierList, _, +typeQualifierList1right)) :: ( _, ( _, TIMES1left, _)) :: rest671)) => + let val result = MlyValue.pointer ( +PointerDecr(loopQd(typeQualifierList,EmptyDecr))) + in ( LrTable.NT 20, ( result, TIMES1left, typeQualifierList1right), +rest671) +end +| ( 188, ( ( _, ( MlyValue.pointer pointer, _, pointer1right)) :: ( _ +, ( _, TIMES1left, _)) :: rest671)) => let val result = +MlyValue.pointer (PointerDecr(pointer)) + in ( LrTable.NT 20, ( result, TIMES1left, pointer1right), rest671) + +end +| ( 189, ( ( _, ( MlyValue.pointer pointer, _, pointer1right)) :: ( _ +, ( MlyValue.typeQualifierList typeQualifierList, _, _)) :: ( _, ( _, +TIMES1left, _)) :: rest671)) => let val result = MlyValue.pointer ( +PointerDecr(loopQd(typeQualifierList,pointer))) + in ( LrTable.NT 20, ( result, TIMES1left, pointer1right), rest671) + +end +| ( 190, ( ( _, ( MlyValue.TYPE_NAME TYPE_NAME, (TYPE_NAMEleft as +TYPE_NAME1left), (TYPE_NAMEright as TYPE_NAME1right))) :: rest671)) => + let val result = MlyValue.aftertypeDirectDeclarator ( +markDeclarator srcMap (VarDecr TYPE_NAME,TYPE_NAMEleft,TYPE_NAMEright) +) + in ( LrTable.NT 25, ( result, TYPE_NAME1left, TYPE_NAME1right), +rest671) +end +| ( 191, ( ( _, ( _, _, RPAREN1right)) :: ( _, ( +MlyValue.aftertypeDeclarator aftertypeDeclarator, _, _)) :: ( _, ( _, +LPAREN1left, _)) :: rest671)) => let val result = +MlyValue.aftertypeDirectDeclarator (aftertypeDeclarator) + in ( LrTable.NT 25, ( result, LPAREN1left, RPAREN1right), rest671) + +end +| ( 192, ( ( _, ( _, _, RBRACE1right)) :: _ :: ( _, ( +MlyValue.aftertypeDirectDeclarator aftertypeDirectDeclarator, +aftertypeDirectDeclarator1left, _)) :: rest671)) => let val result = +MlyValue.aftertypeDirectDeclarator ( +ArrayDecr (aftertypeDirectDeclarator,EmptyExpr)) + in ( LrTable.NT 25, ( result, aftertypeDirectDeclarator1left, +RBRACE1right), rest671) +end +| ( 193, ( ( _, ( _, _, RBRACE1right)) :: ( _, ( MlyValue.expr expr, + _, _)) :: _ :: ( _, ( MlyValue.aftertypeDirectDeclarator +aftertypeDirectDeclarator, aftertypeDirectDeclarator1left, _)) :: +rest671)) => let val result = MlyValue.aftertypeDirectDeclarator ( +ArrayDecr (aftertypeDirectDeclarator,expr)) + in ( LrTable.NT 25, ( result, aftertypeDirectDeclarator1left, +RBRACE1right), rest671) +end +| ( 194, ( ( _, ( _, _, RPAREN1right)) :: _ :: ( _, ( +MlyValue.aftertypeDirectDeclarator aftertypeDirectDeclarator, +aftertypeDirectDeclarator1left, _)) :: rest671)) => let val result = +MlyValue.aftertypeDirectDeclarator ( +FuncDecr (aftertypeDirectDeclarator,nil)) + in ( LrTable.NT 25, ( result, aftertypeDirectDeclarator1left, +RPAREN1right), rest671) +end +| ( 195, ( ( _, ( _, _, RPAREN1right)) :: ( _, ( +MlyValue.parameterTypeList parameterTypeList, _, _)) :: _ :: ( _, ( +MlyValue.aftertypeDirectDeclarator aftertypeDirectDeclarator, +aftertypeDirectDeclarator1left, _)) :: rest671)) => let val result = +MlyValue.aftertypeDirectDeclarator ( +FuncDecr (aftertypeDirectDeclarator,parameterTypeList)) + in ( LrTable.NT 25, ( result, aftertypeDirectDeclarator1left, +RPAREN1right), rest671) +end +| ( 196, ( ( _, ( _, _, RPAREN1right)) :: ( _, ( MlyValue.identlist +identlist, _, _)) :: _ :: ( _, ( MlyValue.aftertypeDirectDeclarator +aftertypeDirectDeclarator, aftertypeDirectDeclarator1left, _)) :: +rest671)) => let val result = MlyValue.aftertypeDirectDeclarator ( +FuncDecr (aftertypeDirectDeclarator, + map (fn (x,y,z) => (unknown,markDeclarator srcMap (VarDecr x,y,z))) (seqToList identlist)) +) + in ( LrTable.NT 25, ( result, aftertypeDirectDeclarator1left, +RPAREN1right), rest671) +end +| ( 197, ( ( _, ( MlyValue.ID ID, (IDleft as ID1left), (IDright as +ID1right))) :: rest671)) => let val result = +MlyValue.notypeDirectDeclarator ( +markDeclarator srcMap (VarDecr ID,IDleft,IDright)) + in ( LrTable.NT 26, ( result, ID1left, ID1right), rest671) +end +| ( 198, ( ( _, ( _, _, RPAREN1right)) :: ( _, ( +MlyValue.notypeDeclarator notypeDeclarator, _, _)) :: ( _, ( _, +LPAREN1left, _)) :: rest671)) => let val result = +MlyValue.notypeDirectDeclarator (notypeDeclarator) + in ( LrTable.NT 26, ( result, LPAREN1left, RPAREN1right), rest671) + +end +| ( 199, ( ( _, ( _, _, RBRACE1right)) :: _ :: ( _, ( +MlyValue.notypeDirectDeclarator notypeDirectDeclarator, +notypeDirectDeclarator1left, _)) :: rest671)) => let val result = +MlyValue.notypeDirectDeclarator ( +ArrayDecr (notypeDirectDeclarator,EmptyExpr)) + in ( LrTable.NT 26, ( result, notypeDirectDeclarator1left, +RBRACE1right), rest671) +end +| ( 200, ( ( _, ( _, _, RBRACE1right)) :: ( _, ( MlyValue.expr expr, + _, _)) :: _ :: ( _, ( MlyValue.notypeDirectDeclarator +notypeDirectDeclarator, notypeDirectDeclarator1left, _)) :: rest671)) + => let val result = MlyValue.notypeDirectDeclarator ( +ArrayDecr (notypeDirectDeclarator,expr)) + in ( LrTable.NT 26, ( result, notypeDirectDeclarator1left, +RBRACE1right), rest671) +end +| ( 201, ( ( _, ( _, _, RPAREN1right)) :: _ :: ( _, ( +MlyValue.notypeDirectDeclarator notypeDirectDeclarator, +notypeDirectDeclarator1left, _)) :: rest671)) => let val result = +MlyValue.notypeDirectDeclarator (FuncDecr (notypeDirectDeclarator,nil) +) + in ( LrTable.NT 26, ( result, notypeDirectDeclarator1left, +RPAREN1right), rest671) +end +| ( 202, ( ( _, ( _, _, RPAREN1right)) :: ( _, ( +MlyValue.parameterTypeList parameterTypeList, _, _)) :: _ :: ( _, ( +MlyValue.notypeDirectDeclarator notypeDirectDeclarator, +notypeDirectDeclarator1left, _)) :: rest671)) => let val result = +MlyValue.notypeDirectDeclarator ( +FuncDecr (notypeDirectDeclarator,parameterTypeList)) + in ( LrTable.NT 26, ( result, notypeDirectDeclarator1left, +RPAREN1right), rest671) +end +| ( 203, ( ( _, ( _, _, RPAREN1right)) :: ( _, ( MlyValue.identlist +identlist, _, _)) :: _ :: ( _, ( MlyValue.notypeDirectDeclarator +notypeDirectDeclarator, notypeDirectDeclarator1left, _)) :: rest671)) + => let val result = MlyValue.notypeDirectDeclarator ( +FuncDecr (notypeDirectDeclarator, + map (fn (x,y,z) => (unknown,markDeclarator srcMap (VarDecr x,y,z))) (seqToList identlist)) +) + in ( LrTable.NT 26, ( result, notypeDirectDeclarator1left, +RPAREN1right), rest671) +end +| ( 204, ( ( _, ( MlyValue.TYPE_NAME TYPE_NAME, (TYPE_NAMEleft as +TYPE_NAME1left), (TYPE_NAMEright as TYPE_NAME1right))) :: rest671)) => + let val result = MlyValue.parmDirectDeclarator ( +markDeclarator srcMap (VarDecr TYPE_NAME,TYPE_NAMEleft,TYPE_NAMEright) +) + in ( LrTable.NT 27, ( result, TYPE_NAME1left, TYPE_NAME1right), +rest671) +end +| ( 205, ( ( _, ( _, _, RBRACE1right)) :: _ :: ( _, ( +MlyValue.parmDirectDeclarator parmDirectDeclarator, +parmDirectDeclarator1left, _)) :: rest671)) => let val result = +MlyValue.parmDirectDeclarator ( +ArrayDecr (parmDirectDeclarator,EmptyExpr)) + in ( LrTable.NT 27, ( result, parmDirectDeclarator1left, RBRACE1right +), rest671) +end +| ( 206, ( ( _, ( _, _, RBRACE1right)) :: ( _, ( MlyValue.expr expr, + _, _)) :: _ :: ( _, ( MlyValue.parmDirectDeclarator +parmDirectDeclarator, parmDirectDeclarator1left, _)) :: rest671)) => + let val result = MlyValue.parmDirectDeclarator ( +ArrayDecr (parmDirectDeclarator,expr)) + in ( LrTable.NT 27, ( result, parmDirectDeclarator1left, RBRACE1right +), rest671) +end +| ( 207, ( ( _, ( _, _, RPAREN1right)) :: _ :: ( _, ( +MlyValue.parmDirectDeclarator parmDirectDeclarator, +parmDirectDeclarator1left, _)) :: rest671)) => let val result = +MlyValue.parmDirectDeclarator (FuncDecr (parmDirectDeclarator,nil)) + in ( LrTable.NT 27, ( result, parmDirectDeclarator1left, RPAREN1right +), rest671) +end +| ( 208, ( ( _, ( _, _, RPAREN1right)) :: ( _, ( +MlyValue.parameterTypeList parameterTypeList, _, _)) :: _ :: ( _, ( +MlyValue.parmDirectDeclarator parmDirectDeclarator, +parmDirectDeclarator1left, _)) :: rest671)) => let val result = +MlyValue.parmDirectDeclarator ( +FuncDecr (parmDirectDeclarator,parameterTypeList)) + in ( LrTable.NT 27, ( result, parmDirectDeclarator1left, RPAREN1right +), rest671) +end +| ( 209, ( ( _, ( _, _, RPAREN1right)) :: ( _, ( MlyValue.identlist +identlist, _, _)) :: _ :: ( _, ( MlyValue.parmDirectDeclarator +parmDirectDeclarator, parmDirectDeclarator1left, _)) :: rest671)) => + let val result = MlyValue.parmDirectDeclarator ( +FuncDecr (parmDirectDeclarator, + map (fn (x,y,z) => (unknown,markDeclarator srcMap (VarDecr x,y,z))) (seqToList identlist)) +) + in ( LrTable.NT 27, ( result, parmDirectDeclarator1left, RPAREN1right +), rest671) +end +| ( 210, ( ( _, ( MlyValue.expr expr, expr1left, expr1right)) :: +rest671)) => let val result = MlyValue.initializer (expr) + in ( LrTable.NT 33, ( result, expr1left, expr1right), rest671) +end +| ( 211, ( ( _, ( _, _, (RCURLYright as RCURLY1right))) :: _ :: ( _, +( MlyValue.initializerList initializerList, _, _)) :: ( _, ( _, ( +LCURLYleft as LCURLY1left), _)) :: rest671)) => let val result = +MlyValue.initializer ( +markExpression srcMap (InitList(seqToList initializerList),LCURLYleft,RCURLYright) +) + in ( LrTable.NT 33, ( result, LCURLY1left, RCURLY1right), rest671) + +end +| ( 212, ( ( _, ( MlyValue.initializer initializer, initializer1left, + initializer1right)) :: rest671)) => let val result = +MlyValue.initializerList (singletonSeq initializer) + in ( LrTable.NT 34, ( result, initializer1left, initializer1right), +rest671) +end +| ( 213, ( ( _, ( MlyValue.initializer initializer, _, +initializer1right)) :: _ :: ( _, ( MlyValue.initializerList +initializerList, initializerList1left, _)) :: rest671)) => let val +result = MlyValue.initializerList ( +addToSeq(initializer, initializerList)) + in ( LrTable.NT 34, ( result, initializerList1left, initializer1right +), rest671) +end +| ( 214, ( ( _, ( MlyValue.declaration declaration, (declarationleft + as declaration1left), (declarationright as declaration1right))) :: +rest671)) => let val result = MlyValue.declarationList ( +singletonSeq(markDeclaration srcMap (declaration, + declarationleft, + declarationright)) +) + in ( LrTable.NT 45, ( result, declaration1left, declaration1right), +rest671) +end +| ( 215, ( ( _, ( MlyValue.declaration declaration, declarationleft, + (declarationright as declaration1right))) :: ( _, ( +MlyValue.declarationList declarationList, declarationList1left, _)) :: + rest671)) => let val result = MlyValue.declarationList ( +addToSeq(markDeclaration srcMap (declaration, + declarationleft, + declarationright), + declarationList) +) + in ( LrTable.NT 45, ( result, declarationList1left, declaration1right +), rest671) +end +| ( 216, ( ( _, ( MlyValue.ID ID, (IDleft as ID1left), (IDright as +ID1right))) :: rest671)) => let val result = MlyValue.identlist ( +singletonSeq (ID,IDleft,IDright)) + in ( LrTable.NT 46, ( result, ID1left, ID1right), rest671) +end +| ( 217, ( ( _, ( MlyValue.ID ID, IDleft, (IDright as ID1right))) :: + _ :: ( _, ( MlyValue.identlist identlist, identlist1left, _)) :: +rest671)) => let val result = MlyValue.identlist ( +addToSeq((ID,IDleft,IDright),identlist)) + in ( LrTable.NT 46, ( result, identlist1left, ID1right), rest671) +end +| ( 218, ( ( _, ( MlyValue.notypeDeclarator notypeDeclarator, +notypeDeclarator1left, notypeDeclarator1right)) :: rest671)) => let + val result = MlyValue.fDefDeclaration ( +insertFuncName(notypeDeclarator); + TypeDefs.pushScope(); + insertFuncParams(notypeDeclarator); + (unknown, notypeDeclarator) +) + in ( LrTable.NT 44, ( result, notypeDeclarator1left, +notypeDeclarator1right), rest671) +end +| ( 219, ( ( _, ( MlyValue.declarator declarator, _, declarator1right +)) :: ( _, ( MlyValue.declarationSpecifiers declarationSpecifiers, +declarationSpecifiers1left, _)) :: rest671)) => let val result = +MlyValue.fDefDeclaration ( +insertFuncName(declarator); + TypeDefs.pushScope(); + insertFuncParams(declarator); + (declarationSpecifiers, declarator) +) + in ( LrTable.NT 44, ( result, declarationSpecifiers1left, +declarator1right), rest671) +end +| ( 220, ( ( _, ( MlyValue.notypeDeclarator notypeDeclarator, _, +notypeDeclarator1right)) :: ( _, ( MlyValue.declarationModifiers +declarationModifiers, declarationModifiers1left, _)) :: rest671)) => + let val result = MlyValue.fDefDeclaration ( +insertFuncName(notypeDeclarator); + TypeDefs.pushScope(); + insertFuncParams(notypeDeclarator); + (declarationModifiers, notypeDeclarator) +) + in ( LrTable.NT 44, ( result, declarationModifiers1left, +notypeDeclarator1right), rest671) +end +| ( 221, ( ( _, ( MlyValue.compoundStatement compoundStatement, _, +compoundStatement1right)) :: ( _, ( MlyValue.fDefDeclaration +fDefDeclaration, fDefDeclaration1left, _)) :: rest671)) => let val +result = MlyValue.functionDefinition ( +TypeDefs.popScope(); + FunctionDef + {retType = #1 fDefDeclaration, + funDecr = #2 fDefDeclaration, + krParams = [], + body = compoundStatement} +) + in ( LrTable.NT 47, ( result, fDefDeclaration1left, +compoundStatement1right), rest671) +end +| ( 222, ( ( _, ( MlyValue.compoundStatement compoundStatement, _, +compoundStatement1right)) :: ( _, ( MlyValue.declarationList +declarationList, _, _)) :: ( _, ( MlyValue.fDefDeclaration +fDefDeclaration, fDefDeclaration1left, _)) :: rest671)) => let val +result = MlyValue.functionDefinition ( +TypeDefs.popScope(); + FunctionDef + {retType = #1 fDefDeclaration, + funDecr = #2 fDefDeclaration, + krParams = seqToList declarationList, + body = compoundStatement} +) + in ( LrTable.NT 47, ( result, fDefDeclaration1left, +compoundStatement1right), rest671) +end +| ( 223, ( ( _, ( MlyValue.pointer pointer, pointer1left, +pointer1right)) :: rest671)) => let val result = +MlyValue.abstractDeclarator (applyPointer (pointer, EmptyDecr)) + in ( LrTable.NT 14, ( result, pointer1left, pointer1right), rest671) + +end +| ( 224, ( ( _, ( MlyValue.directAbstractDeclarator +directAbstractDeclarator, directAbstractDeclarator1left, +directAbstractDeclarator1right)) :: rest671)) => let val result = +MlyValue.abstractDeclarator (directAbstractDeclarator) + in ( LrTable.NT 14, ( result, directAbstractDeclarator1left, +directAbstractDeclarator1right), rest671) +end +| ( 225, ( ( _, ( MlyValue.directAbstractDeclarator +directAbstractDeclarator, _, directAbstractDeclarator1right)) :: ( _, +( MlyValue.pointer pointer, pointer1left, _)) :: rest671)) => let val + result = MlyValue.abstractDeclarator ( +applyPointer(pointer, directAbstractDeclarator)) + in ( LrTable.NT 14, ( result, pointer1left, +directAbstractDeclarator1right), rest671) +end +| ( 226, ( ( _, ( _, _, RPAREN1right)) :: ( _, ( +MlyValue.abstractDeclarator abstractDeclarator, _, _)) :: ( _, ( _, +LPAREN1left, _)) :: rest671)) => let val result = +MlyValue.directAbstractDeclarator (abstractDeclarator) + in ( LrTable.NT 15, ( result, LPAREN1left, RPAREN1right), rest671) + +end +| ( 227, ( ( _, ( _, _, RBRACE1right)) :: ( _, ( _, LBRACE1left, _)) + :: rest671)) => let val result = MlyValue.directAbstractDeclarator ( +ArrayDecr(EmptyDecr, EmptyExpr)) + in ( LrTable.NT 15, ( result, LBRACE1left, RBRACE1right), rest671) + +end +| ( 228, ( ( _, ( _, _, RBRACE1right)) :: ( _, ( MlyValue.expr expr, + _, _)) :: ( _, ( _, LBRACE1left, _)) :: rest671)) => let val result + = MlyValue.directAbstractDeclarator (ArrayDecr(EmptyDecr, expr)) + in ( LrTable.NT 15, ( result, LBRACE1left, RBRACE1right), rest671) + +end +| ( 229, ( ( _, ( _, _, RBRACE1right)) :: _ :: ( _, ( +MlyValue.directAbstractDeclarator directAbstractDeclarator, +directAbstractDeclarator1left, _)) :: rest671)) => let val result = +MlyValue.directAbstractDeclarator ( +ArrayDecr (directAbstractDeclarator,EmptyExpr)) + in ( LrTable.NT 15, ( result, directAbstractDeclarator1left, +RBRACE1right), rest671) +end +| ( 230, ( ( _, ( _, _, RBRACE1right)) :: ( _, ( MlyValue.expr expr, + _, _)) :: _ :: ( _, ( MlyValue.directAbstractDeclarator +directAbstractDeclarator, directAbstractDeclarator1left, _)) :: +rest671)) => let val result = MlyValue.directAbstractDeclarator ( +ArrayDecr (directAbstractDeclarator,expr)) + in ( LrTable.NT 15, ( result, directAbstractDeclarator1left, +RBRACE1right), rest671) +end +| ( 231, ( ( _, ( _, _, RPAREN1right)) :: ( _, ( _, LPAREN1left, _)) + :: rest671)) => let val result = MlyValue.directAbstractDeclarator ( +FuncDecr (EmptyDecr ,nil)) + in ( LrTable.NT 15, ( result, LPAREN1left, RPAREN1right), rest671) + +end +| ( 232, ( ( _, ( _, _, RPAREN1right)) :: ( _, ( +MlyValue.parameterTypeList parameterTypeList, _, _)) :: ( _, ( _, +LPAREN1left, _)) :: rest671)) => let val result = +MlyValue.directAbstractDeclarator ( +FuncDecr (EmptyDecr, parameterTypeList)) + in ( LrTable.NT 15, ( result, LPAREN1left, RPAREN1right), rest671) + +end +| ( 233, ( ( _, ( _, _, RPAREN1right)) :: _ :: ( _, ( +MlyValue.directAbstractDeclarator directAbstractDeclarator, +directAbstractDeclarator1left, _)) :: rest671)) => let val result = +MlyValue.directAbstractDeclarator ( +FuncDecr (directAbstractDeclarator,nil)) + in ( LrTable.NT 15, ( result, directAbstractDeclarator1left, +RPAREN1right), rest671) +end +| ( 234, ( ( _, ( _, _, RPAREN1right)) :: ( _, ( +MlyValue.parameterTypeList parameterTypeList, _, _)) :: _ :: ( _, ( +MlyValue.directAbstractDeclarator directAbstractDeclarator, +directAbstractDeclarator1left, _)) :: rest671)) => let val result = +MlyValue.directAbstractDeclarator ( +FuncDecr (directAbstractDeclarator, parameterTypeList)) + in ( LrTable.NT 15, ( result, directAbstractDeclarator1left, +RPAREN1right), rest671) +end +| ( 235, ( ( _, ( MlyValue.parameterList parameterList, +parameterList1left, parameterList1right)) :: rest671)) => let val +result = MlyValue.parameterTypeList (seqToList parameterList) + in ( LrTable.NT 52, ( result, parameterList1left, parameterList1right +), rest671) +end +| ( 236, ( ( _, ( _, _, ELIPSIS1right)) :: _ :: ( _, ( +MlyValue.parameterList parameterList, parameterList1left, _)) :: +rest671)) => let val result = MlyValue.parameterTypeList ( +let val decltype = {specifiers=[Ellipses],qualifiers=[],storage=[]} + in (seqToList parameterList) @ [(decltype, EllipsesDecr)] end +) + in ( LrTable.NT 52, ( result, parameterList1left, ELIPSIS1right), +rest671) +end +| ( 237, ( ( _, ( MlyValue.parameterDeclaration parameterDeclaration, + (parameterDeclarationleft as parameterDeclaration1left), ( +parameterDeclarationright as parameterDeclaration1right))) :: rest671) +) => let val result = MlyValue.parameterList ( +singletonSeq(#1 parameterDeclaration, + markDeclarator srcMap (#2 parameterDeclaration, + parameterDeclarationleft, + parameterDeclarationright)) +) + in ( LrTable.NT 51, ( result, parameterDeclaration1left, +parameterDeclaration1right), rest671) +end +| ( 238, ( ( _, ( MlyValue.parameterDeclaration parameterDeclaration, + parameterDeclarationleft, (parameterDeclarationright as +parameterDeclaration1right))) :: _ :: ( _, ( MlyValue.parameterList +parameterList, parameterList1left, _)) :: rest671)) => let val result + = MlyValue.parameterList ( +addToSeq((#1 parameterDeclaration, + markDeclarator + srcMap + (#2 parameterDeclaration, + parameterDeclarationleft, + parameterDeclarationright)), + parameterList) +) + in ( LrTable.NT 51, ( result, parameterList1left, +parameterDeclaration1right), rest671) +end +| ( 239, ( ( _, ( MlyValue.notypeDeclarator notypeDeclarator, _, +notypeDeclarator1right)) :: ( _, ( MlyValue.declarationSpecifiers +declarationSpecifiers, declarationSpecifiers1left, _)) :: rest671)) => + let val result = MlyValue.parameterDeclaration ( +(declarationSpecifiers, notypeDeclarator)) + in ( LrTable.NT 53, ( result, declarationSpecifiers1left, +notypeDeclarator1right), rest671) +end +| ( 240, ( ( _, ( MlyValue.parmDeclarator parmDeclarator, _, +parmDeclarator1right)) :: ( _, ( MlyValue.declarationSpecifiers +declarationSpecifiers, declarationSpecifiers1left, _)) :: rest671)) => + let val result = MlyValue.parameterDeclaration ( +(declarationSpecifiers, parmDeclarator)) + in ( LrTable.NT 53, ( result, declarationSpecifiers1left, +parmDeclarator1right), rest671) +end +| ( 241, ( ( _, ( MlyValue.declarationSpecifiers +declarationSpecifiers, declarationSpecifiers1left, +declarationSpecifiers1right)) :: rest671)) => let val result = +MlyValue.parameterDeclaration ((declarationSpecifiers, EmptyDecr)) + in ( LrTable.NT 53, ( result, declarationSpecifiers1left, +declarationSpecifiers1right), rest671) +end +| ( 242, ( ( _, ( MlyValue.abstractDeclarator abstractDeclarator, _, +abstractDeclarator1right)) :: ( _, ( MlyValue.declarationSpecifiers +declarationSpecifiers, declarationSpecifiers1left, _)) :: rest671)) => + let val result = MlyValue.parameterDeclaration ( +(declarationSpecifiers, abstractDeclarator)) + in ( LrTable.NT 53, ( result, declarationSpecifiers1left, +abstractDeclarator1right), rest671) +end +| ( 243, ( ( _, ( MlyValue.notypeDeclarator notypeDeclarator, _, +notypeDeclarator1right)) :: ( _, ( MlyValue.declarationModifiers +declarationModifiers, declarationModifiers1left, _)) :: rest671)) => + let val result = MlyValue.parameterDeclaration ( +(declarationModifiers, notypeDeclarator)) + in ( LrTable.NT 53, ( result, declarationModifiers1left, +notypeDeclarator1right), rest671) +end +| ( 244, ( ( _, ( MlyValue.abstractDeclarator abstractDeclarator, _, +abstractDeclarator1right)) :: ( _, ( MlyValue.declarationModifiers +declarationModifiers, declarationModifiers1left, _)) :: rest671)) => + let val result = MlyValue.parameterDeclaration ( +(declarationModifiers, abstractDeclarator)) + in ( LrTable.NT 53, ( result, declarationModifiers1left, +abstractDeclarator1right), rest671) +end +| ( 245, ( rest671)) => let val result = MlyValue.pushScope ( +TypeDefs.pushScope()) + in ( LrTable.NT 61, ( result, defaultPos, defaultPos), rest671) +end +| ( 246, ( rest671)) => let val result = MlyValue.popScope ( +TypeDefs.popScope()) + in ( LrTable.NT 62, ( result, defaultPos, defaultPos), rest671) +end +| _ => raise (mlyAction i392) +end +val void = MlyValue.VOID' +val extract = fn a => (fn MlyValue.translationUnit x => x +| _ => let exception ParseInternal + in raise ParseInternal end) a +end +end +structure Tokens : C_TOKENS = +struct +type svalue = ParserData.svalue +type ('a,'b) token = ('a,'b) Token.token +fun EOF (p1,p2) = Token.TOKEN (ParserData.LrTable.T 0,( +ParserData.MlyValue.VOID',p1,p2)) +fun COLON (p1,p2) = Token.TOKEN (ParserData.LrTable.T 1,( +ParserData.MlyValue.VOID',p1,p2)) +fun SEMICOLON (p1,p2) = Token.TOKEN (ParserData.LrTable.T 2,( +ParserData.MlyValue.VOID',p1,p2)) +fun LPAREN (p1,p2) = Token.TOKEN (ParserData.LrTable.T 3,( +ParserData.MlyValue.VOID',p1,p2)) +fun RPAREN (p1,p2) = Token.TOKEN (ParserData.LrTable.T 4,( +ParserData.MlyValue.VOID',p1,p2)) +fun LCURLY (p1,p2) = Token.TOKEN (ParserData.LrTable.T 5,( +ParserData.MlyValue.VOID',p1,p2)) +fun RCURLY (p1,p2) = Token.TOKEN (ParserData.LrTable.T 6,( +ParserData.MlyValue.VOID',p1,p2)) +fun LBRACE (p1,p2) = Token.TOKEN (ParserData.LrTable.T 7,( +ParserData.MlyValue.VOID',p1,p2)) +fun RBRACE (p1,p2) = Token.TOKEN (ParserData.LrTable.T 8,( +ParserData.MlyValue.VOID',p1,p2)) +fun DOT (p1,p2) = Token.TOKEN (ParserData.LrTable.T 9,( +ParserData.MlyValue.VOID',p1,p2)) +fun COMMA (p1,p2) = Token.TOKEN (ParserData.LrTable.T 10,( +ParserData.MlyValue.VOID',p1,p2)) +fun QUESTION (p1,p2) = Token.TOKEN (ParserData.LrTable.T 11,( +ParserData.MlyValue.VOID',p1,p2)) +fun PERCENT (p1,p2) = Token.TOKEN (ParserData.LrTable.T 12,( +ParserData.MlyValue.VOID',p1,p2)) +fun AMP (p1,p2) = Token.TOKEN (ParserData.LrTable.T 13,( +ParserData.MlyValue.VOID',p1,p2)) +fun BAR (p1,p2) = Token.TOKEN (ParserData.LrTable.T 14,( +ParserData.MlyValue.VOID',p1,p2)) +fun TILDE (p1,p2) = Token.TOKEN (ParserData.LrTable.T 15,( +ParserData.MlyValue.VOID',p1,p2)) +fun DIVIDE (p1,p2) = Token.TOKEN (ParserData.LrTable.T 16,( +ParserData.MlyValue.VOID',p1,p2)) +fun PLUS (p1,p2) = Token.TOKEN (ParserData.LrTable.T 17,( +ParserData.MlyValue.VOID',p1,p2)) +fun MINUS (p1,p2) = Token.TOKEN (ParserData.LrTable.T 18,( +ParserData.MlyValue.VOID',p1,p2)) +fun HAT (p1,p2) = Token.TOKEN (ParserData.LrTable.T 19,( +ParserData.MlyValue.VOID',p1,p2)) +fun BANG (p1,p2) = Token.TOKEN (ParserData.LrTable.T 20,( +ParserData.MlyValue.VOID',p1,p2)) +fun TIMES (p1,p2) = Token.TOKEN (ParserData.LrTable.T 21,( +ParserData.MlyValue.VOID',p1,p2)) +fun INC (p1,p2) = Token.TOKEN (ParserData.LrTable.T 22,( +ParserData.MlyValue.VOID',p1,p2)) +fun DEC (p1,p2) = Token.TOKEN (ParserData.LrTable.T 23,( +ParserData.MlyValue.VOID',p1,p2)) +fun ARROW (p1,p2) = Token.TOKEN (ParserData.LrTable.T 24,( +ParserData.MlyValue.VOID',p1,p2)) +fun ID (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 25,( +ParserData.MlyValue.ID i,p1,p2)) +fun EQUALS (p1,p2) = Token.TOKEN (ParserData.LrTable.T 26,( +ParserData.MlyValue.VOID',p1,p2)) +fun PLUSEQUALS (p1,p2) = Token.TOKEN (ParserData.LrTable.T 27,( +ParserData.MlyValue.VOID',p1,p2)) +fun MINUSEQUALS (p1,p2) = Token.TOKEN (ParserData.LrTable.T 28,( +ParserData.MlyValue.VOID',p1,p2)) +fun XOREQUALS (p1,p2) = Token.TOKEN (ParserData.LrTable.T 29,( +ParserData.MlyValue.VOID',p1,p2)) +fun MODEQUALS (p1,p2) = Token.TOKEN (ParserData.LrTable.T 30,( +ParserData.MlyValue.VOID',p1,p2)) +fun TIMESEQUALS (p1,p2) = Token.TOKEN (ParserData.LrTable.T 31,( +ParserData.MlyValue.VOID',p1,p2)) +fun DIVEQUALS (p1,p2) = Token.TOKEN (ParserData.LrTable.T 32,( +ParserData.MlyValue.VOID',p1,p2)) +fun OREQUALS (p1,p2) = Token.TOKEN (ParserData.LrTable.T 33,( +ParserData.MlyValue.VOID',p1,p2)) +fun ANDEQUALS (p1,p2) = Token.TOKEN (ParserData.LrTable.T 34,( +ParserData.MlyValue.VOID',p1,p2)) +fun LSHIFTEQUALS (p1,p2) = Token.TOKEN (ParserData.LrTable.T 35,( +ParserData.MlyValue.VOID',p1,p2)) +fun RSHIFTEQUALS (p1,p2) = Token.TOKEN (ParserData.LrTable.T 36,( +ParserData.MlyValue.VOID',p1,p2)) +fun LTE (p1,p2) = Token.TOKEN (ParserData.LrTable.T 37,( +ParserData.MlyValue.VOID',p1,p2)) +fun GTE (p1,p2) = Token.TOKEN (ParserData.LrTable.T 38,( +ParserData.MlyValue.VOID',p1,p2)) +fun LT (p1,p2) = Token.TOKEN (ParserData.LrTable.T 39,( +ParserData.MlyValue.VOID',p1,p2)) +fun GT (p1,p2) = Token.TOKEN (ParserData.LrTable.T 40,( +ParserData.MlyValue.VOID',p1,p2)) +fun EQ (p1,p2) = Token.TOKEN (ParserData.LrTable.T 41,( +ParserData.MlyValue.VOID',p1,p2)) +fun NEQ (p1,p2) = Token.TOKEN (ParserData.LrTable.T 42,( +ParserData.MlyValue.VOID',p1,p2)) +fun OR (p1,p2) = Token.TOKEN (ParserData.LrTable.T 43,( +ParserData.MlyValue.VOID',p1,p2)) +fun AND (p1,p2) = Token.TOKEN (ParserData.LrTable.T 44,( +ParserData.MlyValue.VOID',p1,p2)) +fun LSHIFT (p1,p2) = Token.TOKEN (ParserData.LrTable.T 45,( +ParserData.MlyValue.VOID',p1,p2)) +fun RSHIFT (p1,p2) = Token.TOKEN (ParserData.LrTable.T 46,( +ParserData.MlyValue.VOID',p1,p2)) +fun DECNUM (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 47,( +ParserData.MlyValue.DECNUM i,p1,p2)) +fun REALNUM (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 48,( +ParserData.MlyValue.REALNUM i,p1,p2)) +fun STRING (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 49,( +ParserData.MlyValue.STRING i,p1,p2)) +fun CCONST (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 50,( +ParserData.MlyValue.CCONST i,p1,p2)) +fun EXTERN (p1,p2) = Token.TOKEN (ParserData.LrTable.T 51,( +ParserData.MlyValue.VOID',p1,p2)) +fun AUTO (p1,p2) = Token.TOKEN (ParserData.LrTable.T 52,( +ParserData.MlyValue.VOID',p1,p2)) +fun STATIC (p1,p2) = Token.TOKEN (ParserData.LrTable.T 53,( +ParserData.MlyValue.VOID',p1,p2)) +fun REGISTER (p1,p2) = Token.TOKEN (ParserData.LrTable.T 54,( +ParserData.MlyValue.VOID',p1,p2)) +fun CONST (p1,p2) = Token.TOKEN (ParserData.LrTable.T 55,( +ParserData.MlyValue.VOID',p1,p2)) +fun VOLATILE (p1,p2) = Token.TOKEN (ParserData.LrTable.T 56,( +ParserData.MlyValue.VOID',p1,p2)) +fun IF (p1,p2) = Token.TOKEN (ParserData.LrTable.T 57,( +ParserData.MlyValue.VOID',p1,p2)) +fun THEN (p1,p2) = Token.TOKEN (ParserData.LrTable.T 58,( +ParserData.MlyValue.VOID',p1,p2)) +fun ELSE (p1,p2) = Token.TOKEN (ParserData.LrTable.T 59,( +ParserData.MlyValue.VOID',p1,p2)) +fun FOR (p1,p2) = Token.TOKEN (ParserData.LrTable.T 60,( +ParserData.MlyValue.VOID',p1,p2)) +fun DO (p1,p2) = Token.TOKEN (ParserData.LrTable.T 61,( +ParserData.MlyValue.VOID',p1,p2)) +fun SWITCH (p1,p2) = Token.TOKEN (ParserData.LrTable.T 62,( +ParserData.MlyValue.VOID',p1,p2)) +fun CASE (p1,p2) = Token.TOKEN (ParserData.LrTable.T 63,( +ParserData.MlyValue.VOID',p1,p2)) +fun DEFAULT (p1,p2) = Token.TOKEN (ParserData.LrTable.T 64,( +ParserData.MlyValue.VOID',p1,p2)) +fun WHILE (p1,p2) = Token.TOKEN (ParserData.LrTable.T 65,( +ParserData.MlyValue.VOID',p1,p2)) +fun RETURN (p1,p2) = Token.TOKEN (ParserData.LrTable.T 66,( +ParserData.MlyValue.VOID',p1,p2)) +fun BREAK (p1,p2) = Token.TOKEN (ParserData.LrTable.T 67,( +ParserData.MlyValue.VOID',p1,p2)) +fun CONTINUE (p1,p2) = Token.TOKEN (ParserData.LrTable.T 68,( +ParserData.MlyValue.VOID',p1,p2)) +fun GOTO (p1,p2) = Token.TOKEN (ParserData.LrTable.T 69,( +ParserData.MlyValue.VOID',p1,p2)) +fun CHAR (p1,p2) = Token.TOKEN (ParserData.LrTable.T 70,( +ParserData.MlyValue.VOID',p1,p2)) +fun DOUBLE (p1,p2) = Token.TOKEN (ParserData.LrTable.T 71,( +ParserData.MlyValue.VOID',p1,p2)) +fun ENUM (p1,p2) = Token.TOKEN (ParserData.LrTable.T 72,( +ParserData.MlyValue.VOID',p1,p2)) +fun FLOAT (p1,p2) = Token.TOKEN (ParserData.LrTable.T 73,( +ParserData.MlyValue.VOID',p1,p2)) +fun INT (p1,p2) = Token.TOKEN (ParserData.LrTable.T 74,( +ParserData.MlyValue.VOID',p1,p2)) +fun LONG (p1,p2) = Token.TOKEN (ParserData.LrTable.T 75,( +ParserData.MlyValue.VOID',p1,p2)) +fun SHORT (p1,p2) = Token.TOKEN (ParserData.LrTable.T 76,( +ParserData.MlyValue.VOID',p1,p2)) +fun FRACTIONAL (p1,p2) = Token.TOKEN (ParserData.LrTable.T 77,( +ParserData.MlyValue.VOID',p1,p2)) +fun SATURATE (p1,p2) = Token.TOKEN (ParserData.LrTable.T 78,( +ParserData.MlyValue.VOID',p1,p2)) +fun STRUCT (p1,p2) = Token.TOKEN (ParserData.LrTable.T 79,( +ParserData.MlyValue.VOID',p1,p2)) +fun UNION (p1,p2) = Token.TOKEN (ParserData.LrTable.T 80,( +ParserData.MlyValue.VOID',p1,p2)) +fun UNSIGNED (p1,p2) = Token.TOKEN (ParserData.LrTable.T 81,( +ParserData.MlyValue.VOID',p1,p2)) +fun SIGNED (p1,p2) = Token.TOKEN (ParserData.LrTable.T 82,( +ParserData.MlyValue.VOID',p1,p2)) +fun VOID (p1,p2) = Token.TOKEN (ParserData.LrTable.T 83,( +ParserData.MlyValue.VOID',p1,p2)) +fun SIZEOF (p1,p2) = Token.TOKEN (ParserData.LrTable.T 84,( +ParserData.MlyValue.VOID',p1,p2)) +fun TYPEDEF (p1,p2) = Token.TOKEN (ParserData.LrTable.T 85,( +ParserData.MlyValue.VOID',p1,p2)) +fun UNARY (p1,p2) = Token.TOKEN (ParserData.LrTable.T 86,( +ParserData.MlyValue.VOID',p1,p2)) +fun ELIPSIS (p1,p2) = Token.TOKEN (ParserData.LrTable.T 87,( +ParserData.MlyValue.VOID',p1,p2)) +fun TYPE_NAME (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 88,( +ParserData.MlyValue.TYPE_NAME i,p1,p2)) +end +end diff --git a/ckit/src/parser/grammar/c.lex b/ckit/src/parser/grammar/c.lex new file mode 100644 index 0000000..96d1128 --- /dev/null +++ b/ckit/src/parser/grammar/c.lex @@ -0,0 +1,239 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +(* + * The following replacement for c.lex should give correct (ANSI) + * In particular, we don't allow + * + * char *t = "abd + * lkj"; + * + * GCC accepts this, but SGI cc does not. This program is not ANSI + * compliant. + *) + +type svalue = Tokens.svalue +type pos = int +type lexresult = (svalue,pos) Tokens.token +type errWarn = {err: pos*pos*string->unit, warn: pos*pos*string->unit} +type lexarg = {comLevel : int ref, + sourceMap : SourceMap.sourcemap, + charlist : string list ref, + stringstart : int ref, (* start of current string or comment*) + errWarn: errWarn} + +type arg = lexarg +type ('a,'b) token = ('a,'b) Tokens.token + +fun ordof (s, i) = Char.ord (String.sub (s, i)) +fun dec (iRef : int ref) = iRef := (!iRef) - 1 +fun inc (iRef : int ref) = iRef := (!iRef) + 1 +fun chr i = String.str(Char.chr i) +fun ord s = Char.ord(String.sub(s, 0)) +fun explode s = CharVector.foldr (fn (c, l) => str c :: l) [] s +fun implode strList = String.concat strList +fun hd [] = (print "c.lex: hd of empty\n"; + raise Empty) + | hd (h :: l) = h + +val eof = fn ({comLevel,errWarn,sourceMap,stringstart,charlist}:lexarg) => + let val pos = Int.max(!stringstart+2, SourceMap.currPos sourceMap) + in if !comLevel>0 then (#err errWarn) (!stringstart,pos, "unclosed comment" ) + else (); + Tokens.EOF(pos,pos) + end +fun addString (charlist,s:string) = charlist := s :: (!charlist) +fun makeString charlist = (implode(rev(!charlist)) before charlist := nil) + +fun mkHexInt (s,a,b,errWarn:errWarn)=((case (StringCvt.scanString (LargeInt.scan StringCvt.HEX) s) of + SOME i => i + | _ => ((#err errWarn)(a,b,"trouble in parsing int");Int.toLarge(0))) + handle OverFlow => ((#err errWarn)(a,b,"large int const");Int.toLarge(0))) + +fun mkHexChar (args as (s, a, b, errWarn:errWarn)) : int (* returns a character sized integer *) = + let val i = mkHexInt args + in + if (i>255) then + ((#warn errWarn) (a,b,"overflow in hexadecimal escape sequence"); + IntInf.toInt(i mod 256)) + else + IntInf.toInt i + end + +fun mkOctInt (s,a,b,errWarn:errWarn) + = ((case (StringCvt.scanString (LargeInt.scan StringCvt.OCT) s) of + SOME i => i + | _ => ((#err errWarn)(a,b,"trouble in parsing int");Int.toLarge(0))) + handle OverFlow => ((#err errWarn)(a,b,"large int const");Int.toLarge(0))) + + +fun mkOctChar (args as (s, a, b, errWarn:errWarn)) (* returns a character sized integer *) = + let val i = mkOctInt args + in + if (i>255) then + ((#warn errWarn) (a,b,"overflow in octal escape sequence"); + IntInf.toInt(i mod 256)) + else + IntInf.toInt i + end + +fun mkInt (s,a,b,errWarn:errWarn) = ((case (StringCvt.scanString (LargeInt.scan StringCvt.DEC) s) of + SOME i => i + | _ => ((#err errWarn)(a,b,"trouble in parsing int");Int.toLarge(0))) + handle OverFlow => ((#err errWarn)(a,b,"large int const");Int.toLarge(0))) + +fun mkRealNum (s,a,b,errWarn:errWarn) = ((case (StringCvt.scanString Real.scan s) of + SOME r => r + | _ => ((#err errWarn)(a,b,"trouble in parsing real");0.0)) + handle OverFlow => ((#err errWarn)(a,b,"large real const"); 0.0)) + +val backslasha = 7 + +fun special_char(c,fst,last,errWarn:errWarn) = + (case c of + "\\a" => 7 + | "\\b" => 8 + | "\\f" => 12 + | "\\n" => 10 + | "\\r" => 13 + | "\\t" => 9 + | "\\v" => 11 + | _ => ordof(c,1) + (* strictly speaking, should only handle + \?, \\, \", \', but it is common + to simply ignore slash, and just use next char *) + ) + + +(* Notes on lexer states: + INITIAL -- predefined start state and the default token state + S -- inside a string (entered from INTITAL with ") + C -- inside a comment (entered from INITIAL with /* ) + *) + + +%% + +%header (functor CLexFun(structure Tokens : C_TOKENS + structure TokTable : TOKENTABLE + sharing TokTable.Tokens = Tokens)); + +%arg ({comLevel,errWarn,sourceMap,charlist,stringstart}); +%s C S; + + +id = [_A-Za-z][_A-Za-z0-9]*; +octdigit = [0-7]; +hexdigit = [0-9a-fA-F]; +integersuffix = ([uU][lL]?[lL]?|[lL][lL]?[uU]?); +hexnum = 0[xX]{hexdigit}+{integersuffix}?; +octnum = 0{octdigit}+{integersuffix}?; +decnum = (0|([1-9][0-9]*)){integersuffix}?; +floatingsuffix = [flFL]; +realnum = (([0-9]+(\.[0-9]+)?)|(\.[0-9]+))([eE][+-]?[0-9]+)?{floatingsuffix}?; +ws = ("\012"|[\t\ ])*; + +simplecharconst = '[^\n\\]'; +escapecharconst = '\\[^\n]'; + +directive = #(.)*\n; + +%% + +^{ws}{directive} => (SourceMap.parseDirective sourceMap + (yypos,yytext); continue()); +\n => (SourceMap.newline sourceMap yypos; continue()); +{ws} => (continue()); + + +"/*" => (YYBEGIN C; continue()); +"*/" => (YYBEGIN INITIAL; continue()); +. => (continue()); + + +\" => (charlist := [""]; stringstart := yypos; YYBEGIN S; continue()); +\" => (YYBEGIN INITIAL;Tokens.STRING(makeString charlist,!stringstart,yypos+1)); +\n => ((#err errWarn) (!stringstart,yypos,"unclosed string"); + SourceMap.newline sourceMap yypos; + YYBEGIN INITIAL; Tokens.STRING(makeString charlist,!stringstart,yypos)); +[^"\\\n]* => (addString(charlist,yytext); continue()); +\\\n => (SourceMap.newline sourceMap yypos; continue()); +\\0 => (addString(charlist,chr 0);continue()); +\\{octdigit}{3} => (addString(charlist, chr(mkOctChar(substring(yytext, 1, size(yytext)-1), yypos, yypos+size(yytext), errWarn))); continue()); +\\x{hexdigit}+ => (addString(charlist, chr(mkHexChar(substring(yytext, 2, size(yytext)-2), yypos, yypos+size(yytext), errWarn))); continue()); +\\\^[@-_] => (addString(charlist,chr(ordof(yytext,2)-ord("@"))); continue()); +\\. => (addString(charlist, chr(special_char(yytext, yypos, yypos+size(yytext), errWarn))); continue()); + +":" => (Tokens.COLON(yypos,yypos+1)); +";" => (Tokens.SEMICOLON(yypos,yypos+1)); +"(" => (Tokens.LPAREN(yypos,yypos+1)); +")" => (Tokens.RPAREN(yypos,yypos+1)); +"[" => (Tokens.LBRACE(yypos,yypos+1)); +"]" => (Tokens.RBRACE(yypos,yypos+1)); +"{" => (Tokens.LCURLY(yypos,yypos+1)); +"}" => (Tokens.RCURLY(yypos,yypos+1)); +"." => (Tokens.DOT(yypos,yypos+1)); +"..." => (Tokens.ELIPSIS(yypos,yypos+3)); +"," => (Tokens.COMMA(yypos,yypos+1)); +"*" => (Tokens.TIMES(yypos,yypos+1)); +"!" => (Tokens.BANG(yypos,yypos+1)); +"^" => (Tokens.HAT(yypos,yypos+1)); +"+" => (Tokens.PLUS(yypos,yypos+1)); +"-" => (Tokens.MINUS(yypos,yypos+1)); +"++" => (Tokens.INC(yypos,yypos+2)); +"--" => (Tokens.DEC(yypos,yypos+2)); +"->" => (Tokens.ARROW(yypos,yypos+1)); +"/" => (Tokens.DIVIDE(yypos,yypos+1)); +"~" => (Tokens.TILDE(yypos,yypos+1)); +"?" => (Tokens.QUESTION(yypos,yypos+1)); +"|" => (Tokens.BAR(yypos,yypos+1)); +"&" => (Tokens.AMP(yypos,yypos+1)); +"%" => (Tokens.PERCENT(yypos,yypos+1)); +"<=" => (Tokens.LTE(yypos,yypos+2)); +">=" => (Tokens.GTE(yypos,yypos+2)); +"==" => (Tokens.EQ(yypos,yypos+2)); +"=" => (Tokens.EQUALS(yypos,yypos+1)); +"+=" => (Tokens.PLUSEQUALS(yypos,yypos+2)); +"-=" => (Tokens.MINUSEQUALS(yypos,yypos+2)); +"^=" => (Tokens.XOREQUALS(yypos,yypos+2)); +"%=" => (Tokens.MODEQUALS(yypos,yypos+2)); +"*=" => (Tokens.TIMESEQUALS(yypos,yypos+2)); +"/=" => (Tokens.DIVEQUALS(yypos,yypos+2)); +"|=" => (Tokens.OREQUALS(yypos,yypos+2)); +"&=" => (Tokens.ANDEQUALS(yypos,yypos+2)); +"<<=" => (Tokens.LSHIFTEQUALS(yypos,yypos+3)); +">>=" => (Tokens.RSHIFTEQUALS(yypos,yypos+3)); +"<" => (Tokens.LT(yypos,yypos+1)); +">" => (Tokens.GT(yypos,yypos+1)); +"!=" => (Tokens.NEQ(yypos,yypos+2)); +"||" => (Tokens.OR(yypos,yypos+2)); +"&&" => (Tokens.AND(yypos,yypos+2)); +"<<" => (Tokens.LSHIFT(yypos,yypos+2)); +">>" => (Tokens.RSHIFT(yypos,yypos+2)); + +{octnum} => (Tokens.DECNUM(mkOctInt(yytext,yypos,yypos+size(yytext),errWarn),yypos, yypos+size(yytext))); +{hexnum} => (Tokens.DECNUM(mkHexInt(yytext,yypos,yypos+size(yytext),errWarn),yypos, yypos+size(yytext))); +{decnum} => (Tokens.DECNUM(mkInt (yytext,yypos,yypos+size(yytext),errWarn), yypos,yypos+size(yytext))); +{realnum} => +(Tokens.REALNUM(mkRealNum(yytext,yypos,yypos+size(yytext),errWarn), yypos, yypos ++ size(yytext))); + +"'\\"{octdigit}{1,3}"'" => (let val s = substring(yytext, 2, size(yytext)-3) + in Tokens.CCONST(IntInf.fromInt (mkOctChar(s,yypos,yypos+size(yytext),errWarn)), + yypos, + yypos+size(yytext)) + end); + +"'\\x"{hexdigit}+"'" => (let val s = substring(yytext, 3, size(yytext)-4) + in Tokens.CCONST(IntInf.fromInt (mkHexChar(s,yypos,yypos+size(yytext),errWarn)), + yypos, + yypos+size(yytext)) + end); + + +{simplecharconst} => (let val cval = ordof(yytext,1) + in Tokens.CCONST(Int.toLarge cval,yypos,yypos+size(yytext)) + end); +{escapecharconst} => (Tokens.CCONST(IntInf.fromInt(special_char(substring(yytext,1,size(yytext)-2),yypos,yypos+size(yytext),errWarn)), yypos, yypos+size(yytext))); +{id} => (TokTable.checkToken(yytext,yypos)); +. => (continue()); + diff --git a/ckit/src/parser/grammar/c.lex.sml b/ckit/src/parser/grammar/c.lex.sml new file mode 100644 index 0000000..041fb6c --- /dev/null +++ b/ckit/src/parser/grammar/c.lex.sml @@ -0,0 +1,2035 @@ +functor CLexFun(structure Tokens : C_TOKENS + structure TokTable : TOKENTABLE + sharing TokTable.Tokens = Tokens) = struct + + structure yyInput : sig + + type stream + val mkStream : (int -> string) -> stream + val fromStream : TextIO.StreamIO.instream -> stream + val getc : stream -> (Char.char * stream) option + val getpos : stream -> int + val getlineNo : stream -> int + val subtract : stream * stream -> string + val eof : stream -> bool + val lastWasNL : stream -> bool + + end = struct + + structure TIO = TextIO + structure TSIO = TIO.StreamIO + structure TPIO = TextPrimIO + + datatype stream = Stream of { + strm : TSIO.instream, + id : int, (* track which streams originated + * from the same stream *) + pos : int, + lineNo : int, + lastWasNL : bool + } + + local + val next = ref 0 + in + fun nextId() = !next before (next := !next + 1) + end + + val initPos = 2 (* ml-lex bug compatibility *) + + fun mkStream inputN = let + val strm = TSIO.mkInstream + (TPIO.RD { + name = "lexgen", + chunkSize = 4096, + readVec = SOME inputN, + readArr = NONE, + readVecNB = NONE, + readArrNB = NONE, + block = NONE, + canInput = NONE, + avail = (fn () => NONE), + getPos = NONE, + setPos = NONE, + endPos = NONE, + verifyPos = NONE, + close = (fn () => ()), + ioDesc = NONE + }, "") + in + Stream {strm = strm, id = nextId(), pos = initPos, lineNo = 1, + lastWasNL = true} + end + + fun fromStream strm = Stream { + strm = strm, id = nextId(), pos = initPos, lineNo = 1, lastWasNL = true + } + + fun getc (Stream {strm, pos, id, lineNo, ...}) = (case TSIO.input1 strm + of NONE => NONE + | SOME (c, strm') => + SOME (c, Stream { + strm = strm', + pos = pos+1, + id = id, + lineNo = lineNo + + (if c = #"\n" then 1 else 0), + lastWasNL = (c = #"\n") + }) + (* end case*)) + + fun getpos (Stream {pos, ...}) = pos + + fun getlineNo (Stream {lineNo, ...}) = lineNo + + fun subtract (new, old) = let + val Stream {strm = strm, pos = oldPos, id = oldId, ...} = old + val Stream {pos = newPos, id = newId, ...} = new + val (diff, _) = if newId = oldId andalso newPos >= oldPos + then TSIO.inputN (strm, newPos - oldPos) + else raise Fail + "BUG: yyInput: attempted to subtract incompatible streams" + in + diff + end + + fun eof s = not (isSome (getc s)) + + fun lastWasNL (Stream {lastWasNL, ...}) = lastWasNL + + end + + datatype yystart_state = +C | S | INITIAL + structure UserDeclarations = + struct + +(* Copyright (c) 1998 by Lucent Technologies *) + +(* + * The following replacement for c.lex should give correct (ANSI) + * In particular, we don't allow + * + * char *t = "abd + * lkj"; + * + * GCC accepts this, but SGI cc does not. This program is not ANSI + * compliant. + *) + +type svalue = Tokens.svalue +type pos = int +type lexresult = (svalue,pos) Tokens.token +type errWarn = {err: pos*pos*string->unit, warn: pos*pos*string->unit} +type lexarg = {comLevel : int ref, + sourceMap : SourceMap.sourcemap, + charlist : string list ref, + stringstart : int ref, (* start of current string or comment*) + errWarn: errWarn} + +type arg = lexarg +type ('a,'b) token = ('a,'b) Tokens.token + +fun ordof (s, i) = Char.ord (String.sub (s, i)) +fun dec (iRef : int ref) = iRef := (!iRef) - 1 +fun inc (iRef : int ref) = iRef := (!iRef) + 1 +fun chr i = String.str(Char.chr i) +fun ord s = Char.ord(String.sub(s, 0)) +fun explode s = CharVector.foldr (fn (c, l) => str c :: l) [] s +fun implode strList = String.concat strList +fun hd [] = (print "c.lex: hd of empty\n"; + raise Empty) + | hd (h :: l) = h + +val eof = fn ({comLevel,errWarn,sourceMap,stringstart,charlist}:lexarg) => + let val pos = Int.max(!stringstart+2, SourceMap.currPos sourceMap) + in if !comLevel>0 then (#err errWarn) (!stringstart,pos, "unclosed comment" ) + else (); + Tokens.EOF(pos,pos) + end +fun addString (charlist,s:string) = charlist := s :: (!charlist) +fun makeString charlist = (implode(rev(!charlist)) before charlist := nil) + +fun mkHexInt (s,a,b,errWarn:errWarn)=((case (StringCvt.scanString (LargeInt.scan StringCvt.HEX) s) of + SOME i => i + | _ => ((#err errWarn)(a,b,"trouble in parsing int");Int.toLarge(0))) + handle OverFlow => ((#err errWarn)(a,b,"large int const");Int.toLarge(0))) + +fun mkHexChar (args as (s, a, b, errWarn:errWarn)) : int (* returns a character sized integer *) = + let val i = mkHexInt args + in + if (i>255) then + ((#warn errWarn) (a,b,"overflow in hexadecimal escape sequence"); + IntInf.toInt(i mod 256)) + else + IntInf.toInt i + end + +fun mkOctInt (s,a,b,errWarn:errWarn) + = ((case (StringCvt.scanString (LargeInt.scan StringCvt.OCT) s) of + SOME i => i + | _ => ((#err errWarn)(a,b,"trouble in parsing int");Int.toLarge(0))) + handle OverFlow => ((#err errWarn)(a,b,"large int const");Int.toLarge(0))) + + +fun mkOctChar (args as (s, a, b, errWarn:errWarn)) (* returns a character sized integer *) = + let val i = mkOctInt args + in + if (i>255) then + ((#warn errWarn) (a,b,"overflow in octal escape sequence"); + IntInf.toInt(i mod 256)) + else + IntInf.toInt i + end + +fun mkInt (s,a,b,errWarn:errWarn) = ((case (StringCvt.scanString (LargeInt.scan StringCvt.DEC) s) of + SOME i => i + | _ => ((#err errWarn)(a,b,"trouble in parsing int");Int.toLarge(0))) + handle OverFlow => ((#err errWarn)(a,b,"large int const");Int.toLarge(0))) + +fun mkRealNum (s,a,b,errWarn:errWarn) = ((case (StringCvt.scanString Real.scan s) of + SOME r => r + | _ => ((#err errWarn)(a,b,"trouble in parsing real");0.0)) + handle OverFlow => ((#err errWarn)(a,b,"large real const"); 0.0)) + +val backslasha = 7 + +fun special_char(c,fst,last,errWarn:errWarn) = + (case c of + "\\a" => 7 + | "\\b" => 8 + | "\\f" => 12 + | "\\n" => 10 + | "\\r" => 13 + | "\\t" => 9 + | "\\v" => 11 + | _ => ordof(c,1) + (* strictly speaking, should only handle + \?, \\, \", \', but it is common + to simply ignore slash, and just use next char *) + ) + + +(* Notes on lexer states: + INITIAL -- predefined start state and the default token state + S -- inside a string (entered from INTITAL with ") + C -- inside a comment (entered from INITIAL with /* ) + *) + + + + + end + + datatype yymatch + = yyNO_MATCH + | yyMATCH of yyInput.stream * action * yymatch + withtype action = yyInput.stream * yymatch -> UserDeclarations.lexresult + + local + + val yytable = +Vector.fromList [] + fun mk yyins = let + (* current start state *) + val yyss = ref INITIAL + fun YYBEGIN ss = (yyss := ss) + (* current input stream *) + val yystrm = ref yyins + (* get one char of input *) + val yygetc = yyInput.getc + (* create yytext *) + fun yymktext(strm) = yyInput.subtract (strm, !yystrm) + open UserDeclarations + fun lex +(yyarg as ({comLevel,errWarn,sourceMap,charlist,stringstart})) () = let + fun continue() = let + val yylastwasn = yyInput.lastWasNL (!yystrm) + fun yystuck (yyNO_MATCH) = raise Fail "stuck state" + | yystuck (yyMATCH (strm, action, old)) = + action (strm, old) + val yypos = yyInput.getpos (!yystrm) + val yygetlineNo = yyInput.getlineNo + fun yyactsToMatches (strm, [], oldMatches) = oldMatches + | yyactsToMatches (strm, act::acts, oldMatches) = + yyMATCH (strm, act, yyactsToMatches (strm, acts, oldMatches)) + fun yygo actTable = + (fn (~1, _, oldMatches) => yystuck oldMatches + | (curState, strm, oldMatches) => let + val (transitions, finals') = Vector.sub (yytable, curState) + val finals = List.map (fn i => Vector.sub (actTable, i)) finals' + fun tryfinal() = + yystuck (yyactsToMatches (strm, finals, oldMatches)) + fun find (c, []) = NONE + | find (c, (c1, c2, s)::ts) = + if c1 <= c andalso c <= c2 then SOME s + else find (c, ts) + in case yygetc strm + of SOME(c, strm') => + (case find (c, transitions) + of NONE => tryfinal() + | SOME n => + yygo actTable + (n, strm', + yyactsToMatches (strm, finals, oldMatches))) + | NONE => tryfinal() + end) + in +let +fun yyAction0 (strm, lastMatch : yymatch) = let + val oldStrm = !(yystrm) + fun REJECT () = (yystrm := oldStrm; yystuck(lastMatch)) + val yytext = yymktext(strm) + in + yystrm := strm; + if not yylastwasn then REJECT() else ((SourceMap.parseDirective sourceMap + (yypos,yytext); continue())) + end +fun yyAction1 (strm, lastMatch : yymatch) = (yystrm := strm; + (SourceMap.newline sourceMap yypos; continue())) +fun yyAction2 (strm, lastMatch : yymatch) = (yystrm := strm; (continue())) +fun yyAction3 (strm, lastMatch : yymatch) = (yystrm := strm; + (YYBEGIN C; continue())) +fun yyAction4 (strm, lastMatch : yymatch) = (yystrm := strm; + (YYBEGIN INITIAL; continue())) +fun yyAction5 (strm, lastMatch : yymatch) = (yystrm := strm; (continue())) +fun yyAction6 (strm, lastMatch : yymatch) = (yystrm := strm; + (charlist := [""]; stringstart := yypos; YYBEGIN S; continue())) +fun yyAction7 (strm, lastMatch : yymatch) = (yystrm := strm; + (YYBEGIN INITIAL;Tokens.STRING(makeString charlist,!stringstart,yypos+1))) +fun yyAction8 (strm, lastMatch : yymatch) = (yystrm := strm; + ((#err errWarn) (!stringstart,yypos,"unclosed string"); + SourceMap.newline sourceMap yypos; + YYBEGIN INITIAL; Tokens.STRING(makeString charlist,!stringstart,yypos))) +fun yyAction9 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (addString(charlist,yytext); continue()) + end +fun yyAction10 (strm, lastMatch : yymatch) = (yystrm := strm; + (SourceMap.newline sourceMap yypos; continue())) +fun yyAction11 (strm, lastMatch : yymatch) = (yystrm := strm; + (addString(charlist,chr 0);continue())) +fun yyAction12 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; + (addString(charlist, chr(mkOctChar(substring(yytext, 1, size(yytext)-1), yypos, yypos+size(yytext), errWarn))); continue()) + end +fun yyAction13 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; + (addString(charlist, chr(mkHexChar(substring(yytext, 2, size(yytext)-2), yypos, yypos+size(yytext), errWarn))); continue()) + end +fun yyAction14 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; + (addString(charlist,chr(ordof(yytext,2)-ord("@"))); continue()) + end +fun yyAction15 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; + (addString(charlist, chr(special_char(yytext, yypos, yypos+size(yytext), errWarn))); continue()) + end +fun yyAction16 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.COLON(yypos,yypos+1))) +fun yyAction17 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.SEMICOLON(yypos,yypos+1))) +fun yyAction18 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.LPAREN(yypos,yypos+1))) +fun yyAction19 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.RPAREN(yypos,yypos+1))) +fun yyAction20 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.LBRACE(yypos,yypos+1))) +fun yyAction21 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.RBRACE(yypos,yypos+1))) +fun yyAction22 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.LCURLY(yypos,yypos+1))) +fun yyAction23 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.RCURLY(yypos,yypos+1))) +fun yyAction24 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.DOT(yypos,yypos+1))) +fun yyAction25 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.ELIPSIS(yypos,yypos+3))) +fun yyAction26 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.COMMA(yypos,yypos+1))) +fun yyAction27 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.TIMES(yypos,yypos+1))) +fun yyAction28 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.BANG(yypos,yypos+1))) +fun yyAction29 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.HAT(yypos,yypos+1))) +fun yyAction30 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.PLUS(yypos,yypos+1))) +fun yyAction31 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.MINUS(yypos,yypos+1))) +fun yyAction32 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.INC(yypos,yypos+2))) +fun yyAction33 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.DEC(yypos,yypos+2))) +fun yyAction34 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.ARROW(yypos,yypos+1))) +fun yyAction35 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.DIVIDE(yypos,yypos+1))) +fun yyAction36 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.TILDE(yypos,yypos+1))) +fun yyAction37 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.QUESTION(yypos,yypos+1))) +fun yyAction38 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.BAR(yypos,yypos+1))) +fun yyAction39 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.AMP(yypos,yypos+1))) +fun yyAction40 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.PERCENT(yypos,yypos+1))) +fun yyAction41 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.LTE(yypos,yypos+2))) +fun yyAction42 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.GTE(yypos,yypos+2))) +fun yyAction43 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.EQ(yypos,yypos+2))) +fun yyAction44 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.EQUALS(yypos,yypos+1))) +fun yyAction45 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.PLUSEQUALS(yypos,yypos+2))) +fun yyAction46 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.MINUSEQUALS(yypos,yypos+2))) +fun yyAction47 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.XOREQUALS(yypos,yypos+2))) +fun yyAction48 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.MODEQUALS(yypos,yypos+2))) +fun yyAction49 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.TIMESEQUALS(yypos,yypos+2))) +fun yyAction50 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.DIVEQUALS(yypos,yypos+2))) +fun yyAction51 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.OREQUALS(yypos,yypos+2))) +fun yyAction52 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.ANDEQUALS(yypos,yypos+2))) +fun yyAction53 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.LSHIFTEQUALS(yypos,yypos+3))) +fun yyAction54 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.RSHIFTEQUALS(yypos,yypos+3))) +fun yyAction55 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.LT(yypos,yypos+1))) +fun yyAction56 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.GT(yypos,yypos+1))) +fun yyAction57 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.NEQ(yypos,yypos+2))) +fun yyAction58 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.OR(yypos,yypos+2))) +fun yyAction59 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.AND(yypos,yypos+2))) +fun yyAction60 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.LSHIFT(yypos,yypos+2))) +fun yyAction61 (strm, lastMatch : yymatch) = (yystrm := strm; + (Tokens.RSHIFT(yypos,yypos+2))) +fun yyAction62 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; + (Tokens.DECNUM(mkOctInt(yytext,yypos,yypos+size(yytext),errWarn),yypos, yypos+size(yytext))) + end +fun yyAction63 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; + (Tokens.DECNUM(mkHexInt(yytext,yypos,yypos+size(yytext),errWarn),yypos, yypos+size(yytext))) + end +fun yyAction64 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; + (Tokens.DECNUM(mkInt (yytext,yypos,yypos+size(yytext),errWarn), yypos,yypos+size(yytext))) + end +fun yyAction65 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; + (Tokens.REALNUM(mkRealNum(yytext,yypos,yypos+size(yytext),errWarn), yypos, yypos ++ size(yytext))) + end +fun yyAction66 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; + (let val s = substring(yytext, 2, size(yytext)-3) + in Tokens.CCONST(IntInf.fromInt (mkOctChar(s,yypos,yypos+size(yytext),errWarn)), + yypos, + yypos+size(yytext)) + end) + end +fun yyAction67 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; + (let val s = substring(yytext, 3, size(yytext)-4) + in Tokens.CCONST(IntInf.fromInt (mkHexChar(s,yypos,yypos+size(yytext),errWarn)), + yypos, + yypos+size(yytext)) + end) + end +fun yyAction68 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; + (let val cval = ordof(yytext,1) + in Tokens.CCONST(Int.toLarge cval,yypos,yypos+size(yytext)) + end) + end +fun yyAction69 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; + (Tokens.CCONST(IntInf.fromInt(special_char(substring(yytext,1,size(yytext)-2),yypos,yypos+size(yytext),errWarn)), yypos, yypos+size(yytext))) + end +fun yyAction70 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (TokTable.checkToken(yytext,yypos)) + end +fun yyAction71 (strm, lastMatch : yymatch) = (yystrm := strm; (continue())) +fun yyQ57 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction36(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction36(strm, yyNO_MATCH) + (* end case *)) +fun yyQ56 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction23(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction23(strm, yyNO_MATCH) + (* end case *)) +fun yyQ59 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction58(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction58(strm, yyNO_MATCH) + (* end case *)) +fun yyQ58 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction51(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction51(strm, yyNO_MATCH) + (* end case *)) +fun yyQ55 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction38(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #">" + then yyAction38(strm, yyNO_MATCH) + else if inp < #">" + then if inp = #"=" + then yyQ58(strm', yyMATCH(strm, yyAction38, yyNO_MATCH)) + else yyAction38(strm, yyNO_MATCH) + else if inp = #"|" + then yyQ59(strm', yyMATCH(strm, yyAction38, yyNO_MATCH)) + else yyAction38(strm, yyNO_MATCH) + (* end case *)) +fun yyQ54 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction22(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction22(strm, yyNO_MATCH) + (* end case *)) +fun yyQ60 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction47(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction47(strm, yyNO_MATCH) + (* end case *)) +fun yyQ53 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction29(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"=" + then yyQ60(strm', yyMATCH(strm, yyAction29, yyNO_MATCH)) + else yyAction29(strm, yyNO_MATCH) + (* end case *)) +fun yyQ52 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction21(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction21(strm, yyNO_MATCH) + (* end case *)) +fun yyQ51 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction20(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction20(strm, yyNO_MATCH) + (* end case *)) +fun yyQ61 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction70(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"[" + then yyAction70(strm, yyNO_MATCH) + else if inp < #"[" + then if inp = #":" + then yyAction70(strm, yyNO_MATCH) + else if inp < #":" + then if inp <= #"/" + then yyAction70(strm, yyNO_MATCH) + else yyQ61(strm', yyMATCH(strm, yyAction70, yyNO_MATCH)) + else if inp <= #"@" + then yyAction70(strm, yyNO_MATCH) + else yyQ61(strm', yyMATCH(strm, yyAction70, yyNO_MATCH)) + else if inp = #"`" + then yyAction70(strm, yyNO_MATCH) + else if inp < #"`" + then if inp = #"_" + then yyQ61(strm', yyMATCH(strm, yyAction70, yyNO_MATCH)) + else yyAction70(strm, yyNO_MATCH) + else if inp <= #"z" + then yyQ61(strm', yyMATCH(strm, yyAction70, yyNO_MATCH)) + else yyAction70(strm, yyNO_MATCH) + (* end case *)) +fun yyQ50 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction70(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"[" + then yyAction70(strm, yyNO_MATCH) + else if inp < #"[" + then if inp = #":" + then yyAction70(strm, yyNO_MATCH) + else if inp < #":" + then if inp <= #"/" + then yyAction70(strm, yyNO_MATCH) + else yyQ61(strm', yyMATCH(strm, yyAction70, yyNO_MATCH)) + else if inp <= #"@" + then yyAction70(strm, yyNO_MATCH) + else yyQ61(strm', yyMATCH(strm, yyAction70, yyNO_MATCH)) + else if inp = #"`" + then yyAction70(strm, yyNO_MATCH) + else if inp < #"`" + then if inp = #"_" + then yyQ61(strm', yyMATCH(strm, yyAction70, yyNO_MATCH)) + else yyAction70(strm, yyNO_MATCH) + else if inp <= #"z" + then yyQ61(strm', yyMATCH(strm, yyAction70, yyNO_MATCH)) + else yyAction70(strm, yyNO_MATCH) + (* end case *)) +fun yyQ49 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction37(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction37(strm, yyNO_MATCH) + (* end case *)) +fun yyQ64 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction54(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction54(strm, yyNO_MATCH) + (* end case *)) +fun yyQ63 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction61(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"=" + then yyQ64(strm', yyMATCH(strm, yyAction61, yyNO_MATCH)) + else yyAction61(strm, yyNO_MATCH) + (* end case *)) +fun yyQ62 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction42(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction42(strm, yyNO_MATCH) + (* end case *)) +fun yyQ48 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction56(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #">" + then yyQ63(strm', yyMATCH(strm, yyAction56, yyNO_MATCH)) + else if inp < #">" + then if inp = #"=" + then yyQ62(strm', yyMATCH(strm, yyAction56, yyNO_MATCH)) + else yyAction56(strm, yyNO_MATCH) + else yyAction56(strm, yyNO_MATCH) + (* end case *)) +fun yyQ65 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction43(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction43(strm, yyNO_MATCH) + (* end case *)) +fun yyQ47 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction44(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"=" + then yyQ65(strm', yyMATCH(strm, yyAction44, yyNO_MATCH)) + else yyAction44(strm, yyNO_MATCH) + (* end case *)) +fun yyQ67 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction41(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction41(strm, yyNO_MATCH) + (* end case *)) +fun yyQ68 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction53(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction53(strm, yyNO_MATCH) + (* end case *)) +fun yyQ66 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction60(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"=" + then yyQ68(strm', yyMATCH(strm, yyAction60, yyNO_MATCH)) + else yyAction60(strm, yyNO_MATCH) + (* end case *)) +fun yyQ46 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction55(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"=" + then yyQ67(strm', yyMATCH(strm, yyAction55, yyNO_MATCH)) + else if inp < #"=" + then if inp = #"<" + then yyQ66(strm', yyMATCH(strm, yyAction55, yyNO_MATCH)) + else yyAction55(strm, yyNO_MATCH) + else yyAction55(strm, yyNO_MATCH) + (* end case *)) +fun yyQ45 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction17(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction17(strm, yyNO_MATCH) + (* end case *)) +fun yyQ44 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction16(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction16(strm, yyNO_MATCH) + (* end case *)) +fun yyQ76 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction64(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction64(strm, yyNO_MATCH) + (* end case *)) +fun yyQ75 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction64(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"M" + then yyAction64(strm, yyNO_MATCH) + else if inp < #"M" + then if inp = #"L" + then yyQ76(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else yyAction64(strm, yyNO_MATCH) + else if inp = #"l" + then yyQ76(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else yyAction64(strm, yyNO_MATCH) + (* end case *)) +fun yyQ74 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction64(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"M" + then yyAction64(strm, yyNO_MATCH) + else if inp < #"M" + then if inp = #"L" + then yyQ75(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else yyAction64(strm, yyNO_MATCH) + else if inp = #"l" + then yyQ75(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else yyAction64(strm, yyNO_MATCH) + (* end case *)) +fun yyQ77 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction64(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"V" + then yyAction64(strm, yyNO_MATCH) + else if inp < #"V" + then if inp = #"U" + then yyQ76(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else yyAction64(strm, yyNO_MATCH) + else if inp = #"u" + then yyQ76(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else yyAction64(strm, yyNO_MATCH) + (* end case *)) +fun yyQ73 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction64(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"V" + then yyAction64(strm, yyNO_MATCH) + else if inp < #"V" + then if inp = #"M" + then yyAction64(strm, yyNO_MATCH) + else if inp < #"M" + then if inp = #"L" + then yyQ77(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else yyAction64(strm, yyNO_MATCH) + else if inp = #"U" + then yyQ76(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else yyAction64(strm, yyNO_MATCH) + else if inp = #"m" + then yyAction64(strm, yyNO_MATCH) + else if inp < #"m" + then if inp = #"l" + then yyQ77(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else yyAction64(strm, yyNO_MATCH) + else if inp = #"u" + then yyQ76(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else yyAction64(strm, yyNO_MATCH) + (* end case *)) +fun yyQ72 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction65(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction65(strm, yyNO_MATCH) + (* end case *)) +fun yyQ79 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction65(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"L" + then yyQ72(strm', yyMATCH(strm, yyAction65, yyNO_MATCH)) + else if inp < #"L" + then if inp = #":" + then yyAction65(strm, yyNO_MATCH) + else if inp < #":" + then if inp <= #"/" + then yyAction65(strm, yyNO_MATCH) + else yyQ79(strm', yyMATCH(strm, yyAction65, yyNO_MATCH)) + else if inp = #"F" + then yyQ72(strm', yyMATCH(strm, yyAction65, yyNO_MATCH)) + else yyAction65(strm, yyNO_MATCH) + else if inp = #"g" + then yyAction65(strm, yyNO_MATCH) + else if inp < #"g" + then if inp = #"f" + then yyQ72(strm', yyMATCH(strm, yyAction65, yyNO_MATCH)) + else yyAction65(strm, yyNO_MATCH) + else if inp = #"l" + then yyQ72(strm', yyMATCH(strm, yyAction65, yyNO_MATCH)) + else yyAction65(strm, yyNO_MATCH) + (* end case *)) +fun yyQ78 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"0" + then yyQ79(strm', lastMatch) + else if inp < #"0" + then yystuck(lastMatch) + else if inp <= #"9" + then yyQ79(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ71 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"-" + then yyQ78(strm', lastMatch) + else if inp < #"-" + then if inp = #"+" + then yyQ78(strm', lastMatch) + else yystuck(lastMatch) + else if inp = #"0" + then yyQ79(strm', lastMatch) + else if inp < #"0" + then yystuck(lastMatch) + else if inp <= #"9" + then yyQ79(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ80 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction65(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"L" + then yyQ72(strm', yyMATCH(strm, yyAction65, yyNO_MATCH)) + else if inp < #"L" + then if inp = #"E" + then yyQ71(strm', yyMATCH(strm, yyAction65, yyNO_MATCH)) + else if inp < #"E" + then if inp = #"0" + then yyQ80(strm', yyMATCH(strm, yyAction65, yyNO_MATCH)) + else if inp < #"0" + then yyAction65(strm, yyNO_MATCH) + else if inp <= #"9" + then yyQ80(strm', yyMATCH(strm, yyAction65, yyNO_MATCH)) + else yyAction65(strm, yyNO_MATCH) + else if inp = #"F" + then yyQ72(strm', yyMATCH(strm, yyAction65, yyNO_MATCH)) + else yyAction65(strm, yyNO_MATCH) + else if inp = #"g" + then yyAction65(strm, yyNO_MATCH) + else if inp < #"g" + then if inp = #"e" + then yyQ71(strm', yyMATCH(strm, yyAction65, yyNO_MATCH)) + else if inp = #"f" + then yyQ72(strm', yyMATCH(strm, yyAction65, yyNO_MATCH)) + else yyAction65(strm, yyNO_MATCH) + else if inp = #"l" + then yyQ72(strm', yyMATCH(strm, yyAction65, yyNO_MATCH)) + else yyAction65(strm, yyNO_MATCH) + (* end case *)) +fun yyQ69 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"0" + then yyQ80(strm', lastMatch) + else if inp < #"0" + then yystuck(lastMatch) + else if inp <= #"9" + then yyQ80(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ70 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction64(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"M" + then yyAction64(strm, yyNO_MATCH) + else if inp < #"M" + then if inp = #":" + then yyAction64(strm, yyNO_MATCH) + else if inp < #":" + then if inp = #"/" + then yyAction64(strm, yyNO_MATCH) + else if inp < #"/" + then if inp = #"." + then yyQ69(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else yyAction64(strm, yyNO_MATCH) + else yyQ70(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else if inp = #"F" + then yyQ72(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else if inp < #"F" + then if inp = #"E" + then yyQ71(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else yyAction64(strm, yyNO_MATCH) + else if inp = #"L" + then yyQ73(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else yyAction64(strm, yyNO_MATCH) + else if inp = #"g" + then yyAction64(strm, yyNO_MATCH) + else if inp < #"g" + then if inp = #"V" + then yyAction64(strm, yyNO_MATCH) + else if inp < #"V" + then if inp = #"U" + then yyQ74(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else yyAction64(strm, yyNO_MATCH) + else if inp = #"e" + then yyQ71(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else if inp = #"f" + then yyQ72(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else yyAction64(strm, yyNO_MATCH) + else if inp = #"m" + then yyAction64(strm, yyNO_MATCH) + else if inp < #"m" + then if inp = #"l" + then yyQ73(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else yyAction64(strm, yyNO_MATCH) + else if inp = #"u" + then yyQ74(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else yyAction64(strm, yyNO_MATCH) + (* end case *)) +fun yyQ43 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction64(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"M" + then yyAction64(strm, yyNO_MATCH) + else if inp < #"M" + then if inp = #":" + then yyAction64(strm, yyNO_MATCH) + else if inp < #":" + then if inp = #"/" + then yyAction64(strm, yyNO_MATCH) + else if inp < #"/" + then if inp = #"." + then yyQ69(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else yyAction64(strm, yyNO_MATCH) + else yyQ70(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else if inp = #"F" + then yyQ72(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else if inp < #"F" + then if inp = #"E" + then yyQ71(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else yyAction64(strm, yyNO_MATCH) + else if inp = #"L" + then yyQ73(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else yyAction64(strm, yyNO_MATCH) + else if inp = #"g" + then yyAction64(strm, yyNO_MATCH) + else if inp < #"g" + then if inp = #"V" + then yyAction64(strm, yyNO_MATCH) + else if inp < #"V" + then if inp = #"U" + then yyQ74(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else yyAction64(strm, yyNO_MATCH) + else if inp = #"e" + then yyQ71(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else if inp = #"f" + then yyQ72(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else yyAction64(strm, yyNO_MATCH) + else if inp = #"m" + then yyAction64(strm, yyNO_MATCH) + else if inp < #"m" + then if inp = #"l" + then yyQ73(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else yyAction64(strm, yyNO_MATCH) + else if inp = #"u" + then yyQ74(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else yyAction64(strm, yyNO_MATCH) + (* end case *)) +fun yyQ88 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction63(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction63(strm, yyNO_MATCH) + (* end case *)) +fun yyQ87 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction63(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"M" + then yyAction63(strm, yyNO_MATCH) + else if inp < #"M" + then if inp = #"L" + then yyQ88(strm', yyMATCH(strm, yyAction63, yyNO_MATCH)) + else yyAction63(strm, yyNO_MATCH) + else if inp = #"l" + then yyQ88(strm', yyMATCH(strm, yyAction63, yyNO_MATCH)) + else yyAction63(strm, yyNO_MATCH) + (* end case *)) +fun yyQ86 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction63(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"M" + then yyAction63(strm, yyNO_MATCH) + else if inp < #"M" + then if inp = #"L" + then yyQ87(strm', yyMATCH(strm, yyAction63, yyNO_MATCH)) + else yyAction63(strm, yyNO_MATCH) + else if inp = #"l" + then yyQ87(strm', yyMATCH(strm, yyAction63, yyNO_MATCH)) + else yyAction63(strm, yyNO_MATCH) + (* end case *)) +fun yyQ89 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction63(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"V" + then yyAction63(strm, yyNO_MATCH) + else if inp < #"V" + then if inp = #"U" + then yyQ88(strm', yyMATCH(strm, yyAction63, yyNO_MATCH)) + else yyAction63(strm, yyNO_MATCH) + else if inp = #"u" + then yyQ88(strm', yyMATCH(strm, yyAction63, yyNO_MATCH)) + else yyAction63(strm, yyNO_MATCH) + (* end case *)) +fun yyQ85 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction63(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"V" + then yyAction63(strm, yyNO_MATCH) + else if inp < #"V" + then if inp = #"M" + then yyAction63(strm, yyNO_MATCH) + else if inp < #"M" + then if inp = #"L" + then yyQ89(strm', yyMATCH(strm, yyAction63, yyNO_MATCH)) + else yyAction63(strm, yyNO_MATCH) + else if inp = #"U" + then yyQ88(strm', yyMATCH(strm, yyAction63, yyNO_MATCH)) + else yyAction63(strm, yyNO_MATCH) + else if inp = #"m" + then yyAction63(strm, yyNO_MATCH) + else if inp < #"m" + then if inp = #"l" + then yyQ89(strm', yyMATCH(strm, yyAction63, yyNO_MATCH)) + else yyAction63(strm, yyNO_MATCH) + else if inp = #"u" + then yyQ88(strm', yyMATCH(strm, yyAction63, yyNO_MATCH)) + else yyAction63(strm, yyNO_MATCH) + (* end case *)) +fun yyQ84 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction63(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"U" + then yyQ86(strm', yyMATCH(strm, yyAction63, yyNO_MATCH)) + else if inp < #"U" + then if inp = #"A" + then yyQ84(strm', yyMATCH(strm, yyAction63, yyNO_MATCH)) + else if inp < #"A" + then if inp = #"0" + then yyQ84(strm', yyMATCH(strm, yyAction63, yyNO_MATCH)) + else if inp < #"0" + then yyAction63(strm, yyNO_MATCH) + else if inp <= #"9" + then yyQ84(strm', yyMATCH(strm, yyAction63, yyNO_MATCH)) + else yyAction63(strm, yyNO_MATCH) + else if inp = #"L" + then yyQ85(strm', yyMATCH(strm, yyAction63, yyNO_MATCH)) + else if inp < #"L" + then if inp <= #"F" + then yyQ84(strm', yyMATCH(strm, yyAction63, yyNO_MATCH)) + else yyAction63(strm, yyNO_MATCH) + else yyAction63(strm, yyNO_MATCH) + else if inp = #"l" + then yyQ85(strm', yyMATCH(strm, yyAction63, yyNO_MATCH)) + else if inp < #"l" + then if inp = #"a" + then yyQ84(strm', yyMATCH(strm, yyAction63, yyNO_MATCH)) + else if inp < #"a" + then yyAction63(strm, yyNO_MATCH) + else if inp <= #"f" + then yyQ84(strm', yyMATCH(strm, yyAction63, yyNO_MATCH)) + else yyAction63(strm, yyNO_MATCH) + else if inp = #"u" + then yyQ86(strm', yyMATCH(strm, yyAction63, yyNO_MATCH)) + else yyAction63(strm, yyNO_MATCH) + (* end case *)) +fun yyQ83 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"A" + then yyQ84(strm', lastMatch) + else if inp < #"A" + then if inp = #"0" + then yyQ84(strm', lastMatch) + else if inp < #"0" + then yystuck(lastMatch) + else if inp <= #"9" + then yyQ84(strm', lastMatch) + else yystuck(lastMatch) + else if inp = #"a" + then yyQ84(strm', lastMatch) + else if inp < #"a" + then if inp <= #"F" + then yyQ84(strm', lastMatch) + else yystuck(lastMatch) + else if inp <= #"f" + then yyQ84(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ82 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction65(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"G" + then yyAction65(strm, yyNO_MATCH) + else if inp < #"G" + then if inp = #"0" + then yyQ82(strm', yyMATCH(strm, yyAction65, yyNO_MATCH)) + else if inp < #"0" + then if inp = #"." + then yyQ69(strm', yyMATCH(strm, yyAction65, yyNO_MATCH)) + else yyAction65(strm, yyNO_MATCH) + else if inp = #"E" + then yyQ71(strm', yyMATCH(strm, yyAction65, yyNO_MATCH)) + else if inp < #"E" + then if inp <= #"9" + then yyQ82(strm', yyMATCH(strm, yyAction65, yyNO_MATCH)) + else yyAction65(strm, yyNO_MATCH) + else yyQ72(strm', yyMATCH(strm, yyAction65, yyNO_MATCH)) + else if inp = #"f" + then yyQ72(strm', yyMATCH(strm, yyAction65, yyNO_MATCH)) + else if inp < #"f" + then if inp = #"M" + then yyAction65(strm, yyNO_MATCH) + else if inp < #"M" + then if inp = #"L" + then yyQ72(strm', yyMATCH(strm, yyAction65, yyNO_MATCH)) + else yyAction65(strm, yyNO_MATCH) + else if inp = #"e" + then yyQ71(strm', yyMATCH(strm, yyAction65, yyNO_MATCH)) + else yyAction65(strm, yyNO_MATCH) + else if inp = #"l" + then yyQ72(strm', yyMATCH(strm, yyAction65, yyNO_MATCH)) + else yyAction65(strm, yyNO_MATCH) + (* end case *)) +fun yyQ93 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction62(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction62(strm, yyNO_MATCH) + (* end case *)) +fun yyQ92 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction62(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"M" + then yyAction62(strm, yyNO_MATCH) + else if inp < #"M" + then if inp = #"L" + then yyQ93(strm', yyMATCH(strm, yyAction62, yyNO_MATCH)) + else yyAction62(strm, yyNO_MATCH) + else if inp = #"l" + then yyQ93(strm', yyMATCH(strm, yyAction62, yyNO_MATCH)) + else yyAction62(strm, yyNO_MATCH) + (* end case *)) +fun yyQ91 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction62(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"M" + then yyAction62(strm, yyNO_MATCH) + else if inp < #"M" + then if inp = #"L" + then yyQ92(strm', yyMATCH(strm, yyAction62, yyNO_MATCH)) + else yyAction62(strm, yyNO_MATCH) + else if inp = #"l" + then yyQ92(strm', yyMATCH(strm, yyAction62, yyNO_MATCH)) + else yyAction62(strm, yyNO_MATCH) + (* end case *)) +fun yyQ94 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction62(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"V" + then yyAction62(strm, yyNO_MATCH) + else if inp < #"V" + then if inp = #"U" + then yyQ93(strm', yyMATCH(strm, yyAction62, yyNO_MATCH)) + else yyAction62(strm, yyNO_MATCH) + else if inp = #"u" + then yyQ93(strm', yyMATCH(strm, yyAction62, yyNO_MATCH)) + else yyAction62(strm, yyNO_MATCH) + (* end case *)) +fun yyQ90 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction62(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"V" + then yyAction62(strm, yyNO_MATCH) + else if inp < #"V" + then if inp = #"M" + then yyAction62(strm, yyNO_MATCH) + else if inp < #"M" + then if inp = #"L" + then yyQ94(strm', yyMATCH(strm, yyAction62, yyNO_MATCH)) + else yyAction62(strm, yyNO_MATCH) + else if inp = #"U" + then yyQ93(strm', yyMATCH(strm, yyAction62, yyNO_MATCH)) + else yyAction62(strm, yyNO_MATCH) + else if inp = #"m" + then yyAction62(strm, yyNO_MATCH) + else if inp < #"m" + then if inp = #"l" + then yyQ94(strm', yyMATCH(strm, yyAction62, yyNO_MATCH)) + else yyAction62(strm, yyNO_MATCH) + else if inp = #"u" + then yyQ93(strm', yyMATCH(strm, yyAction62, yyNO_MATCH)) + else yyAction62(strm, yyNO_MATCH) + (* end case *)) +fun yyQ81 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction62(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"M" + then yyAction62(strm, yyNO_MATCH) + else if inp < #"M" + then if inp = #":" + then yyAction62(strm, yyNO_MATCH) + else if inp < #":" + then if inp = #"/" + then yyAction62(strm, yyNO_MATCH) + else if inp < #"/" + then if inp = #"." + then yyQ69(strm', yyMATCH(strm, yyAction62, yyNO_MATCH)) + else yyAction62(strm, yyNO_MATCH) + else if inp <= #"7" + then yyQ81(strm', yyMATCH(strm, yyAction62, yyNO_MATCH)) + else yyQ82(strm', yyMATCH(strm, yyAction62, yyNO_MATCH)) + else if inp = #"F" + then yyQ72(strm', yyMATCH(strm, yyAction62, yyNO_MATCH)) + else if inp < #"F" + then if inp = #"E" + then yyQ71(strm', yyMATCH(strm, yyAction62, yyNO_MATCH)) + else yyAction62(strm, yyNO_MATCH) + else if inp = #"L" + then yyQ90(strm', yyMATCH(strm, yyAction62, yyNO_MATCH)) + else yyAction62(strm, yyNO_MATCH) + else if inp = #"g" + then yyAction62(strm, yyNO_MATCH) + else if inp < #"g" + then if inp = #"V" + then yyAction62(strm, yyNO_MATCH) + else if inp < #"V" + then if inp = #"U" + then yyQ91(strm', yyMATCH(strm, yyAction62, yyNO_MATCH)) + else yyAction62(strm, yyNO_MATCH) + else if inp = #"e" + then yyQ71(strm', yyMATCH(strm, yyAction62, yyNO_MATCH)) + else if inp = #"f" + then yyQ72(strm', yyMATCH(strm, yyAction62, yyNO_MATCH)) + else yyAction62(strm, yyNO_MATCH) + else if inp = #"m" + then yyAction62(strm, yyNO_MATCH) + else if inp < #"m" + then if inp = #"l" + then yyQ90(strm', yyMATCH(strm, yyAction62, yyNO_MATCH)) + else yyAction62(strm, yyNO_MATCH) + else if inp = #"u" + then yyQ91(strm', yyMATCH(strm, yyAction62, yyNO_MATCH)) + else yyAction62(strm, yyNO_MATCH) + (* end case *)) +fun yyQ42 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction64(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"V" + then yyAction64(strm, yyNO_MATCH) + else if inp < #"V" + then if inp = #"E" + then yyQ71(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else if inp < #"E" + then if inp = #"0" + then yyQ81(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else if inp < #"0" + then if inp = #"." + then yyQ69(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else yyAction64(strm, yyNO_MATCH) + else if inp = #"8" + then yyQ82(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else if inp < #"8" + then yyQ81(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else if inp <= #"9" + then yyQ82(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else yyAction64(strm, yyNO_MATCH) + else if inp = #"L" + then yyQ73(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else if inp < #"L" + then if inp = #"F" + then yyQ72(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else yyAction64(strm, yyNO_MATCH) + else if inp = #"U" + then yyQ74(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else yyAction64(strm, yyNO_MATCH) + else if inp = #"l" + then yyQ73(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else if inp < #"l" + then if inp = #"e" + then yyQ71(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else if inp < #"e" + then if inp = #"X" + then yyQ83(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else yyAction64(strm, yyNO_MATCH) + else if inp = #"f" + then yyQ72(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else yyAction64(strm, yyNO_MATCH) + else if inp = #"v" + then yyAction64(strm, yyNO_MATCH) + else if inp < #"v" + then if inp = #"u" + then yyQ74(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else yyAction64(strm, yyNO_MATCH) + else if inp = #"x" + then yyQ83(strm', yyMATCH(strm, yyAction64, yyNO_MATCH)) + else yyAction64(strm, yyNO_MATCH) + (* end case *)) +fun yyQ96 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction50(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction50(strm, yyNO_MATCH) + (* end case *)) +fun yyQ95 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction3(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction3(strm, yyNO_MATCH) + (* end case *)) +fun yyQ41 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction35(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"+" + then yyAction35(strm, yyNO_MATCH) + else if inp < #"+" + then if inp = #"*" + then yyQ95(strm', yyMATCH(strm, yyAction35, yyNO_MATCH)) + else yyAction35(strm, yyNO_MATCH) + else if inp = #"=" + then yyQ96(strm', yyMATCH(strm, yyAction35, yyNO_MATCH)) + else yyAction35(strm, yyNO_MATCH) + (* end case *)) +fun yyQ98 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction25(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction25(strm, yyNO_MATCH) + (* end case *)) +fun yyQ97 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"." + then yyQ98(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ40 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction24(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"/" + then yyAction24(strm, yyNO_MATCH) + else if inp < #"/" + then if inp = #"." + then yyQ97(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else yyAction24(strm, yyNO_MATCH) + else if inp <= #"9" + then yyQ80(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else yyAction24(strm, yyNO_MATCH) + (* end case *)) +fun yyQ101 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction34(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction34(strm, yyNO_MATCH) + (* end case *)) +fun yyQ100 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction46(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction46(strm, yyNO_MATCH) + (* end case *)) +fun yyQ99 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction33(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction33(strm, yyNO_MATCH) + (* end case *)) +fun yyQ39 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"=" + then yyQ100(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < #"=" + then if inp = #"-" + then yyQ99(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = #">" + then yyQ101(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ38 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction26(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction26(strm, yyNO_MATCH) + (* end case *)) +fun yyQ103 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction45(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction45(strm, yyNO_MATCH) + (* end case *)) +fun yyQ102 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction32(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction32(strm, yyNO_MATCH) + (* end case *)) +fun yyQ37 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction30(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"," + then yyAction30(strm, yyNO_MATCH) + else if inp < #"," + then if inp = #"+" + then yyQ102(strm', yyMATCH(strm, yyAction30, yyNO_MATCH)) + else yyAction30(strm, yyNO_MATCH) + else if inp = #"=" + then yyQ103(strm', yyMATCH(strm, yyAction30, yyNO_MATCH)) + else yyAction30(strm, yyNO_MATCH) + (* end case *)) +fun yyQ104 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction49(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction49(strm, yyNO_MATCH) + (* end case *)) +fun yyQ36 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction27(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"=" + then yyQ104(strm', yyMATCH(strm, yyAction27, yyNO_MATCH)) + else yyAction27(strm, yyNO_MATCH) + (* end case *)) +fun yyQ35 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction19(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction19(strm, yyNO_MATCH) + (* end case *)) +fun yyQ34 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction18(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction18(strm, yyNO_MATCH) + (* end case *)) +fun yyQ112 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction67(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction67(strm, yyNO_MATCH) + (* end case *)) +fun yyQ111 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #":" + then yystuck(lastMatch) + else if inp < #":" + then if inp = #"(" + then yystuck(lastMatch) + else if inp < #"(" + then if inp = #"'" + then yyQ112(strm', lastMatch) + else yystuck(lastMatch) + else if inp <= #"/" + then yystuck(lastMatch) + else yyQ111(strm', lastMatch) + else if inp = #"G" + then yystuck(lastMatch) + else if inp < #"G" + then if inp <= #"@" + then yystuck(lastMatch) + else yyQ111(strm', lastMatch) + else if inp = #"a" + then yyQ111(strm', lastMatch) + else if inp < #"a" + then yystuck(lastMatch) + else if inp <= #"f" + then yyQ111(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ110 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction69(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction69(strm, yyNO_MATCH) + (* end case *)) +fun yyQ109 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #":" + then yystuck(lastMatch) + else if inp < #":" + then if inp = #"(" + then yystuck(lastMatch) + else if inp < #"(" + then if inp = #"'" + then yyQ110(strm', lastMatch) + else yystuck(lastMatch) + else if inp <= #"/" + then yystuck(lastMatch) + else yyQ111(strm', lastMatch) + else if inp = #"G" + then yystuck(lastMatch) + else if inp < #"G" + then if inp <= #"@" + then yystuck(lastMatch) + else yyQ111(strm', lastMatch) + else if inp = #"a" + then yyQ111(strm', lastMatch) + else if inp < #"a" + then yystuck(lastMatch) + else if inp <= #"f" + then yyQ111(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ115 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction66(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction66(strm, yyNO_MATCH) + (* end case *)) +fun yyQ116 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"'" + then yyQ115(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ114 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"(" + then yystuck(lastMatch) + else if inp < #"(" + then if inp = #"'" + then yyQ115(strm', lastMatch) + else yystuck(lastMatch) + else if inp = #"0" + then yyQ116(strm', lastMatch) + else if inp < #"0" + then yystuck(lastMatch) + else if inp <= #"7" + then yyQ116(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ113 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction66(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction66(strm, yyNO_MATCH) + (* end case *)) +fun yyQ108 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"(" + then yystuck(lastMatch) + else if inp < #"(" + then if inp = #"'" + then yyQ113(strm', lastMatch) + else yystuck(lastMatch) + else if inp = #"0" + then yyQ114(strm', lastMatch) + else if inp < #"0" + then yystuck(lastMatch) + else if inp <= #"7" + then yyQ114(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ107 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"'" + then yyQ110(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ106 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"0" + then yyQ108(strm', lastMatch) + else if inp < #"0" + then if inp = #"\n" + then yystuck(lastMatch) + else yyQ107(strm', lastMatch) + else if inp = #"x" + then yyQ109(strm', lastMatch) + else if inp < #"x" + then if inp <= #"7" + then yyQ108(strm', lastMatch) + else yyQ107(strm', lastMatch) + else yyQ107(strm', lastMatch) + (* end case *)) +fun yyQ117 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction68(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction68(strm, yyNO_MATCH) + (* end case *)) +fun yyQ105 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"'" + then yyQ117(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ33 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction71(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\v" + then yyQ105(strm', yyMATCH(strm, yyAction71, yyNO_MATCH)) + else if inp < #"\v" + then if inp = #"\n" + then yyAction71(strm, yyNO_MATCH) + else yyQ105(strm', yyMATCH(strm, yyAction71, yyNO_MATCH)) + else if inp = #"\\" + then yyQ106(strm', yyMATCH(strm, yyAction71, yyNO_MATCH)) + else yyQ105(strm', yyMATCH(strm, yyAction71, yyNO_MATCH)) + (* end case *)) +fun yyQ119 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction52(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction52(strm, yyNO_MATCH) + (* end case *)) +fun yyQ118 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction59(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction59(strm, yyNO_MATCH) + (* end case *)) +fun yyQ32 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction39(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"'" + then yyAction39(strm, yyNO_MATCH) + else if inp < #"'" + then if inp = #"&" + then yyQ118(strm', yyMATCH(strm, yyAction39, yyNO_MATCH)) + else yyAction39(strm, yyNO_MATCH) + else if inp = #"=" + then yyQ119(strm', yyMATCH(strm, yyAction39, yyNO_MATCH)) + else yyAction39(strm, yyNO_MATCH) + (* end case *)) +fun yyQ120 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction48(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction48(strm, yyNO_MATCH) + (* end case *)) +fun yyQ31 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction40(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"=" + then yyQ120(strm', yyMATCH(strm, yyAction40, yyNO_MATCH)) + else yyAction40(strm, yyNO_MATCH) + (* end case *)) +fun yyQ10 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction0(strm, lastMatch) + | SOME(inp, strm') => yyAction0(strm, lastMatch) + (* end case *)) +fun yyQ9 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"\n" + then yyQ10(strm', lastMatch) + else yyQ9(strm', lastMatch) + (* end case *)) +fun yyQ30 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction71(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\n" + then yyQ10(strm', yyMATCH(strm, yyAction71, yyNO_MATCH)) + else yyQ9(strm', yyMATCH(strm, yyAction71, yyNO_MATCH)) + (* end case *)) +fun yyQ29 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction6(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction6(strm, yyNO_MATCH) + (* end case *)) +fun yyQ121 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction57(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction57(strm, yyNO_MATCH) + (* end case *)) +fun yyQ28 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction28(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"=" + then yyQ121(strm', yyMATCH(strm, yyAction28, yyNO_MATCH)) + else yyAction28(strm, yyNO_MATCH) + (* end case *)) +fun yyQ5 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction1(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction1(strm, yyNO_MATCH) + (* end case *)) +fun yyQ11 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction2(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\r" + then yyAction2(strm, yyNO_MATCH) + else if inp < #"\r" + then if inp = #"\n" + then yyAction2(strm, yyNO_MATCH) + else if inp < #"\n" + then if inp = #"\t" + then yyQ11(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else if inp = #"\f" + then yyQ11(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else if inp = #"!" + then yyAction2(strm, yyNO_MATCH) + else if inp < #"!" + then if inp = #" " + then yyQ11(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else if inp = #"#" + then yyQ9(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + (* end case *)) +fun yyQ27 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction2(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\r" + then yyAction2(strm, yyNO_MATCH) + else if inp < #"\r" + then if inp = #"\n" + then yyAction2(strm, yyNO_MATCH) + else if inp < #"\n" + then if inp = #"\t" + then yyQ11(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else if inp = #"\f" + then yyQ11(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else if inp = #"!" + then yyAction2(strm, yyNO_MATCH) + else if inp < #"!" + then if inp = #" " + then yyQ11(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else if inp = #"#" + then yyQ9(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + (* end case *)) +fun yyQ26 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction71(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction71(strm, yyNO_MATCH) + (* end case *)) +fun yyQ2 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yyAction2(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"0" + then yyQ42(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < #"0" + then if inp = #"%" + then yyQ31(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < #"%" + then if inp = #"\r" + then yyQ26(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < #"\r" + then if inp = #"\n" + then yyQ5(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < #"\n" + then if inp = #"\t" + then yyQ27(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ26(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = #"\v" + then yyQ26(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ27(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = #"\"" + then yyQ29(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < #"\"" + then if inp = #" " + then yyQ27(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = #"!" + then yyQ28(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ26(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = #"#" + then yyQ30(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ26(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = #"+" + then yyQ37(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < #"+" + then if inp = #"(" + then yyQ34(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < #"(" + then if inp = #"&" + then yyQ32(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ33(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = #")" + then yyQ35(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ36(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = #"." + then yyQ40(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < #"." + then if inp = #"," + then yyQ38(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ39(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ41(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = #"\\" + then yyQ26(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < #"\\" + then if inp = #">" + then yyQ48(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < #">" + then if inp = #";" + then yyQ45(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < #";" + then if inp = #":" + then yyQ44(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ43(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = #"<" + then yyQ46(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ47(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = #"A" + then yyQ50(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < #"A" + then if inp = #"?" + then yyQ49(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ26(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = #"[" + then yyQ51(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ50(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = #"{" + then yyQ54(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < #"{" + then if inp = #"_" + then yyQ50(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < #"_" + then if inp = #"]" + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ53(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = #"`" + then yyQ26(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ50(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = #"~" + then yyQ57(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < #"~" + then if inp = #"|" + then yyQ55(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ56(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ26(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + (* end case *)) +fun yyQ22 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction13(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"A" + then yyQ22(strm', yyMATCH(strm, yyAction13, yyNO_MATCH)) + else if inp < #"A" + then if inp = #"0" + then yyQ22(strm', yyMATCH(strm, yyAction13, yyNO_MATCH)) + else if inp < #"0" + then yyAction13(strm, yyNO_MATCH) + else if inp <= #"9" + then yyQ22(strm', yyMATCH(strm, yyAction13, yyNO_MATCH)) + else yyAction13(strm, yyNO_MATCH) + else if inp = #"a" + then yyQ22(strm', yyMATCH(strm, yyAction13, yyNO_MATCH)) + else if inp < #"a" + then if inp <= #"F" + then yyQ22(strm', yyMATCH(strm, yyAction13, yyNO_MATCH)) + else yyAction13(strm, yyNO_MATCH) + else if inp <= #"f" + then yyQ22(strm', yyMATCH(strm, yyAction13, yyNO_MATCH)) + else yyAction13(strm, yyNO_MATCH) + (* end case *)) +fun yyQ21 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction15(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"A" + then yyQ22(strm', yyMATCH(strm, yyAction15, yyNO_MATCH)) + else if inp < #"A" + then if inp = #"0" + then yyQ22(strm', yyMATCH(strm, yyAction15, yyNO_MATCH)) + else if inp < #"0" + then yyAction15(strm, yyNO_MATCH) + else if inp <= #"9" + then yyQ22(strm', yyMATCH(strm, yyAction15, yyNO_MATCH)) + else yyAction15(strm, yyNO_MATCH) + else if inp = #"a" + then yyQ22(strm', yyMATCH(strm, yyAction15, yyNO_MATCH)) + else if inp < #"a" + then if inp <= #"F" + then yyQ22(strm', yyMATCH(strm, yyAction15, yyNO_MATCH)) + else yyAction15(strm, yyNO_MATCH) + else if inp <= #"f" + then yyQ22(strm', yyMATCH(strm, yyAction15, yyNO_MATCH)) + else yyAction15(strm, yyNO_MATCH) + (* end case *)) +fun yyQ23 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction14(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction14(strm, yyNO_MATCH) + (* end case *)) +fun yyQ20 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction15(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"@" + then yyQ23(strm', yyMATCH(strm, yyAction15, yyNO_MATCH)) + else if inp < #"@" + then yyAction15(strm, yyNO_MATCH) + else if inp <= #"_" + then yyQ23(strm', yyMATCH(strm, yyAction15, yyNO_MATCH)) + else yyAction15(strm, yyNO_MATCH) + (* end case *)) +fun yyQ25 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction12(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction12(strm, yyNO_MATCH) + (* end case *)) +fun yyQ24 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"0" + then yyQ25(strm', lastMatch) + else if inp < #"0" + then yystuck(lastMatch) + else if inp <= #"7" + then yyQ25(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ19 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction15(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"0" + then yyQ24(strm', yyMATCH(strm, yyAction15, yyNO_MATCH)) + else if inp < #"0" + then yyAction15(strm, yyNO_MATCH) + else if inp <= #"7" + then yyQ24(strm', yyMATCH(strm, yyAction15, yyNO_MATCH)) + else yyAction15(strm, yyNO_MATCH) + (* end case *)) +fun yyQ18 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction11(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"0" + then yyQ24(strm', yyMATCH(strm, yyAction11, yyNO_MATCH)) + else if inp < #"0" + then yyAction11(strm, yyNO_MATCH) + else if inp <= #"7" + then yyQ24(strm', yyMATCH(strm, yyAction11, yyNO_MATCH)) + else yyAction11(strm, yyNO_MATCH) + (* end case *)) +fun yyQ17 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction10(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction10(strm, yyNO_MATCH) + (* end case *)) +fun yyQ16 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction15(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction15(strm, yyNO_MATCH) + (* end case *)) +fun yyQ15 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"8" + then yyQ16(strm', lastMatch) + else if inp < #"8" + then if inp = #"\v" + then yyQ16(strm', lastMatch) + else if inp < #"\v" + then if inp = #"\n" + then yyQ17(strm', lastMatch) + else yyQ16(strm', lastMatch) + else if inp = #"0" + then yyQ18(strm', lastMatch) + else if inp <= #"/" + then yyQ16(strm', lastMatch) + else yyQ19(strm', lastMatch) + else if inp = #"_" + then yyQ16(strm', lastMatch) + else if inp < #"_" + then if inp = #"^" + then yyQ20(strm', lastMatch) + else yyQ16(strm', lastMatch) + else if inp = #"x" + then yyQ21(strm', lastMatch) + else yyQ16(strm', lastMatch) + (* end case *)) +fun yyQ14 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction7(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction7(strm, yyNO_MATCH) + (* end case *)) +fun yyQ13 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction8(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction8(strm, yyNO_MATCH) + (* end case *)) +fun yyQ12 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction9(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\"" + then yyAction9(strm, yyNO_MATCH) + else if inp < #"\"" + then if inp = #"\n" + then yyAction9(strm, yyNO_MATCH) + else yyQ12(strm', yyMATCH(strm, yyAction9, yyNO_MATCH)) + else if inp = #"\\" + then yyAction9(strm, yyNO_MATCH) + else yyQ12(strm', yyMATCH(strm, yyAction9, yyNO_MATCH)) + (* end case *)) +fun yyQ1 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yyAction9(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\"" + then yyQ14(strm', yyMATCH(strm, yyAction9, yyNO_MATCH)) + else if inp < #"\"" + then if inp = #"\n" + then yyQ13(strm', yyMATCH(strm, yyAction9, yyNO_MATCH)) + else yyQ12(strm', yyMATCH(strm, yyAction9, yyNO_MATCH)) + else if inp = #"\\" + then yyQ15(strm', yyMATCH(strm, yyAction9, yyNO_MATCH)) + else yyQ12(strm', yyMATCH(strm, yyAction9, yyNO_MATCH)) + (* end case *)) +fun yyQ8 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction4(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction4(strm, yyNO_MATCH) + (* end case *)) +fun yyQ7 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction5(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"/" + then yyQ8(strm', yyMATCH(strm, yyAction5, yyNO_MATCH)) + else yyAction5(strm, yyNO_MATCH) + (* end case *)) +fun yyQ6 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction5(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\n" + then yyQ10(strm', yyMATCH(strm, yyAction5, yyNO_MATCH)) + else yyQ9(strm', yyMATCH(strm, yyAction5, yyNO_MATCH)) + (* end case *)) +fun yyQ4 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction2(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\r" + then yyAction2(strm, yyNO_MATCH) + else if inp < #"\r" + then if inp = #"\n" + then yyAction2(strm, yyNO_MATCH) + else if inp < #"\n" + then if inp = #"\t" + then yyQ11(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else if inp = #"\f" + then yyQ11(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else if inp = #"!" + then yyAction2(strm, yyNO_MATCH) + else if inp < #"!" + then if inp = #" " + then yyQ11(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else if inp = #"#" + then yyQ9(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + (* end case *)) +fun yyQ3 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction5(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction5(strm, yyNO_MATCH) + (* end case *)) +fun yyQ0 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yyAction2(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #" " + then yyQ4(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < #" " + then if inp = #"\v" + then yyQ3(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < #"\v" + then if inp = #"\t" + then yyQ4(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = #"\n" + then yyQ5(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ3(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = #"\f" + then yyQ4(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ3(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = #"$" + then yyQ3(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < #"$" + then if inp = #"#" + then yyQ6(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ3(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = #"*" + then yyQ7(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ3(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + (* end case *)) +in + (case (!(yyss)) + of C => yyQ0(!(yystrm), yyNO_MATCH) + | S => yyQ1(!(yystrm), yyNO_MATCH) + | INITIAL => yyQ2(!(yystrm), yyNO_MATCH) + (* end case *)) +end + end + in + continue() + handle IO.Io{cause, ...} => raise cause + end + in + lex + end + in + fun makeLexer yyinputN = mk (yyInput.mkStream yyinputN) + end + + end diff --git a/ckit/src/parser/grammar/tdefs.sml b/ckit/src/parser/grammar/tdefs.sml new file mode 100644 index 0000000..6aa142c --- /dev/null +++ b/ckit/src/parser/grammar/tdefs.sml @@ -0,0 +1,110 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +signature TYPEDEFS = + sig + val addTdef: string -> unit (* a string as a typename in current scope *) + val addNoTdef: string -> unit (* a string is not a typename, + * may hide typenames of outer scopes *) + val checkTdef: string -> bool (* is this string a typename in current context ? *) + val reset: unit -> unit (* clear all tables, needed if you are doing many files *) + val truncTo: int ref (* limited-width names ? *) + val pushScope: unit -> unit (* entering a new scope in C *) + val popScope: unit -> unit (* exiting the last scope *) + end + +(* We need a stack of tables to properly handle the scoping in typenames + * Remember, there are four type of things competing in the namespace of + * typenames: + * typenames, variables, functions and enum constants + * Once you enter a new scope, reuse of these names can hide previous + * uses. + * Also note that struct field names do not redefine names within their scope + * So, the following is legal: + * typedef int bar; + * struct h { + * bar bar; + * bar baz; + * }; + *) + +structure TypeDefs : TYPEDEFS = +struct + structure ParseControl = Config.ParseControl + + exception NotTdef + + val truncTo: int ref = ref ParseControl.symbolLength; + + type item = bool (* true says typename, false says else *) + + val sayTdefs : bool ref = ref true; + + val tdefTable: (item AtomTable.hash_table list) ref = ref ([AtomTable.mkTable(1024, NotTdef)]) + + fun checkTdef (str) = + let + val s = substring(str,0,(!truncTo)) handle Substring => str + val name = Atom.atom s + fun lookup (n, nil) = NONE + | lookup (n, fst::rst) = + (case (AtomTable.find (fst) n) of + (SOME x) => SOME(x) + | _ => (lookup(n,rst))) + in + case lookup(name, (!tdefTable)) of + (SOME true) => (if (!sayTdefs) then true else false) + | (SOME false) => false + | NONE => false + end; + + + fun pushScope () = (tdefTable := (AtomTable.mkTable(1024, NotTdef))::(!tdefTable); ()) + + fun popScope () = (* was just tl(!tdefTable), but caused problems with ml-yacc error correction *) + (case (!tdefTable) + of [x] => () (* don't change *) + | (_ :: l) => (tdefTable := l) + | nil => ()) + (* don't change; but we are in trouble here! *) + + val errorCount = ref 0 + + fun reset() = (tdefTable := [AtomTable.mkTable(1024, NotTdef)]; + errorCount := 0) + + (* TBD: In the next two functions, it is an option to raise a syntax error, + * if there is a redefinition in the same scope, i.e., the topmost table + *) + + fun addTdef(str) = + let + val s = substring(str,0,(!truncTo)) handle Substring => str + val name = Atom.atom s + in + (* insert name in the top of tdefTable as a typename *) + case !tdefTable of + x :: _ => AtomTable.insert x (name, true) + | nil => (if !errorCount = 0 + then print "Error: empty type def table (lexer), probably caused by syntax error" + (* should be Error.error, but don't have an error stream handy. *) + else (); + errorCount := !errorCount + 1) + end + + fun addNoTdef(str) = + let + val s = substring(str,0,(!truncTo)) handle Substring => str + val name = Atom.atom s + in + (* insert name in the top of tdefTable as not a typename *) + case !tdefTable of + x :: _ => AtomTable.insert x (name, false) + | nil => (if !errorCount = 0 + then print "Error: empty type def table (lexer), probably caused by syntax error" + (* should be Error.error, but don't have an error stream handy. *) + else (); + errorCount := !errorCount + 1) + end + +end + diff --git a/ckit/src/parser/grammar/tokentable.sml b/ckit/src/parser/grammar/tokentable.sml new file mode 100644 index 0000000..96eeb4f --- /dev/null +++ b/ckit/src/parser/grammar/tokentable.sml @@ -0,0 +1,119 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +(*************************************************************************** + + TOKEN.SML: hash table for token recognition + + ***************************************************************************) + + +signature TOKENTABLE = +sig + structure Tokens : C_TOKENS + val checkToken : (string * int) -> (Tokens.svalue,int)Tokens.token +end + +functor TokenTable(structure Tokens : C_TOKENS): TOKENTABLE = +struct + + structure Tokens = Tokens + structure ParseControl = Config.ParseControl + type item = (int * int) -> (Tokens.svalue, int) Tokens.token + exception Keyword + exception LexError + val keywords : item AtomTable.hash_table = AtomTable.mkTable(64, Keyword) + + local + val insert = AtomTable.insert keywords + fun ins (s, item) = insert (Atom.atom s, item) + + fun idTok (s, pos, endPos) = + if TypeDefs.checkTdef(s) = true then + Tokens.TYPE_NAME(s,pos,endPos) + else Tokens.ID(s,pos,endPos) + + (* to enter GCC-style 'underscore'-versions of certain keywords *) + fun insaug (s, item) = let + fun item' (p as (pos, endPos)) = + case ParseControl.underscoreKeywords of + NONE => idTok (s, pos, endPos) + | SOME true => item p + | SOME false => + (ParseControl.violation + (concat ["gcc-style keywords '__", s, "' or '__", + s, "__' are not allowed"]); + raise LexError) + in + ins ("__" ^ s, item'); + ins ("__" ^ s ^ "__", item') + end + + val normaltokens = + [("auto", Tokens.AUTO), + ("extern", Tokens.EXTERN), + ("register", Tokens.REGISTER), + ("static", Tokens.STATIC), + ("unsigned", Tokens.UNSIGNED), + ("break", Tokens.BREAK), + ("case", Tokens.CASE), + ("continue", Tokens.CONTINUE), + ("default", Tokens.DEFAULT), + ("do", Tokens.DO), + ("else", Tokens.ELSE), + ("for", Tokens.FOR), + ("goto", Tokens.GOTO), + ("if", Tokens.IF), + ("enum", Tokens.ENUM), + ("float", Tokens.FLOAT), + ("double", Tokens.DOUBLE), + ("char", Tokens.CHAR), + ("int", Tokens.INT), + ("long", Tokens.LONG), + ("short", Tokens.SHORT), + ("struct", Tokens.STRUCT), + ("union", Tokens.UNION), + ("void", Tokens.VOID), + ("sizeof", Tokens.SIZEOF), + ("typedef", Tokens.TYPEDEF), + ("return", Tokens.RETURN), + ("switch", Tokens.SWITCH), + ("while", Tokens.WHILE)] + + (* tokens for which gcc has __* and __*__ versions *) + val augmentabletokens = + [("signed", Tokens.SIGNED), + ("const", fn p => if ParseControl.constAllowed + then (Tokens.CONST p) + else (ParseControl.violation + "the keyword 'const' not allowed"; + raise LexError)), + ("volatile", fn p => if ParseControl.volatileAllowed + then (Tokens.VOLATILE p) + else (ParseControl.violation + "the keyword 'volatile' not allowed"; + raise LexError))] + + (* tokens for D *) + val dtokens = + [ + ] + + val _ = + (app ins normaltokens; + app ins augmentabletokens; + app insaug augmentabletokens; + (* enter D keywords only when allowed... + * (I think the ParseControl test is done at the wrong time here. + * - Blume) *) + if ParseControl.Dkeywords then app ins dtokens else ()) + in + fun checkToken (s, pos) = let + val endPos = pos + size s + val name = Atom.atom s + in + case (AtomTable.find keywords name) of + SOME tokFn => tokFn(pos, endPos) + | NONE => idTok (s, pos, endPos) + end + end (* local *) +end diff --git a/ckit/src/parser/group.cm b/ckit/src/parser/group.cm new file mode 100644 index 0000000..04881b0 --- /dev/null +++ b/ckit/src/parser/group.cm @@ -0,0 +1,66 @@ +Group +is + +#if defined(NEW_CM) +#if (SMLNJ_VERSION * 100 + SMLNJ_MINOR_VERSION >= 11029) + (* standard basis *) + $/basis.cm + + (* standard library *) + $/smlnj-lib.cm + + (* ml-yacc *) + $/ml-yacc-lib.cm + + (* pretty printer library *) + $/pp-lib.cm +#else + (* standard basis *) + basis.cm + + (* standard library *) + smlnj-lib.cm + + (* ml-yacc *) + ml-yacc-lib.cm + + (* pretty printer library *) + pp-lib.cm +#endif +#else + smlnj-lib.cm + ml-yacc-lib.cm + pp-lib.cm +#endif + + (* configuration *) + ../variants/group.cm + + (* utilitities *) + util/old-pp.sml + util/ascii.sml + util/sourcemap-sig.sml + util/sourcemap.sml + util/error-sig.sml + util/error.sml + + (* lexer and parser *) + grammar/tdefs.sml + grammar/tokentable.sml + grammar/c.lex + + parser-sig.sml + parser.sml + parse-tree-sig.sml + parse-tree.sml + + (* extensions *) +#if (defined(d)) + extensions/d/parse-tree-ext-sig.sml + extensions/d/parse-tree-ext.sml + grammar/d.grm +#else + extensions/c/parse-tree-ext-sig.sml + extensions/c/parse-tree-ext.sml + grammar/c.grm +#endif diff --git a/ckit/src/parser/parse-tree-sig.sml b/ckit/src/parser/parse-tree-sig.sml new file mode 100644 index 0000000..4ac8151 --- /dev/null +++ b/ckit/src/parser/parse-tree-sig.sml @@ -0,0 +1,187 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +(* C parse trees, produced by the parser *) + +signature PARSETREE = +sig + + datatype qualifier = CONST | VOLATILE + + (* storage class attributes *) + datatype storage + = TYPEDEF + | STATIC + | EXTERN + | REGISTER + | AUTO + + (* built in unary and binary operators *) + datatype operator + = Plus | Minus | Times | Divide | Mod + | Gt | Lt | Gte | Lte | Eq | Neq | And | Or + | BitOr | BitAnd | BitXor | Lshift | Rshift + | Star | AddrOf | Dot | Arrow | Sub | Sizeof + | PreInc | PostInc | PreDec | PostDec | Comma + | Not | Negate | BitNot | Assign + | PlusAssign | MinusAssign | TimesAssign | DivAssign + | ModAssign | XorAssign | OrAssign | AndAssign + | LshiftAssign | RshiftAssign + | Uplus + | SizeofType of ctype + | OperatorExt of operatorExt + + and expression + = EmptyExpr + | IntConst of LargeInt.int + | RealConst of real + | String of string + | Id of string + | Unop of operator * expression + | Binop of operator * expression * expression + | QuestionColon of expression * expression * expression + | Call of expression * expression list + | Cast of ctype * expression + | InitList of expression list + | MARKexpression of (SourceMap.location * expression) + | ExprExt of expressionExt + + and specifier + = Void + | Ellipses + | Signed + | Unsigned + | Char + | Short + | Int + | Long + | Float + | Double + | Fractional + | Wholenum + | Saturate + | Nonsaturate + | Array of expression * ctype + | Pointer of ctype + | Function of + {retType : ctype, + params : (decltype * declarator) list} + | Enum of + {tagOpt : string option, + enumerators : (string * expression) list, + trailingComma : bool} (* true if there was there a trailing comma in the declaration *) + | Struct of + {isStruct : bool, (* struct or union; true => struct *) + tagOpt : string option, (* optional tag *) + members: (ctype * (declarator * expression) list) list} (* member specs *) + | TypedefName of string + | StructTag of + {isStruct : bool, (* ??? *) + name : string} + | EnumTag of string + | SpecExt of specifierExt + + and declarator (* constructor suffix: "Decr" *) + = EmptyDecr + | EllipsesDecr + | VarDecr of string + | ArrayDecr of declarator * expression + | PointerDecr of declarator + | QualDecr of qualifier * declarator + | FuncDecr of declarator * (decltype * declarator) list + | MARKdeclarator of (SourceMap.location * declarator) + | DecrExt of declaratorExt + + (* supports extensions of C in which expressions contain statements *) + and statement + = Decl of declaration + | Expr of expression + | Compound of statement list + | While of expression * statement + | Do of expression * statement + | For of expression * expression * expression * statement + | Labeled of string * statement + | CaseLabel of expression * statement + | DefaultLabel of statement + | Goto of string + | Break + | Continue + | Return of expression + | IfThen of expression * statement + | IfThenElse of expression * statement * statement + | Switch of expression * statement + | MARKstatement of (SourceMap.location * statement) + | StatExt of statementExt + + and declaration + = Declaration of decltype * (declarator * expression) list + | MARKdeclaration of (SourceMap.location * declaration) + | DeclarationExt of declarationExt + + (* the top-level constructs in a translation unit (i.e. source file) *) + and externalDecl + = ExternalDecl of declaration + | FunctionDef of + {retType : decltype, (* return type *) + funDecr : declarator, (* function name declarator *) + krParams : declaration list, (* K&R-style parameter declarations *) + body : statement} (* function body *) + | MARKexternalDecl of (SourceMap.location * externalDecl) + | ExternalDeclExt of externalDeclExt + + withtype ctype = + {qualifiers : qualifier list, + specifiers : specifier list} + and decltype = + {qualifiers : qualifier list, + specifiers : specifier list, + storage : storage list} + + (* extension types for basic constructs *) + and externalDeclExt = + (specifier, declarator, ctype, decltype, operator, expression, statement) + ParseTreeExt.externalDeclExt + and declarationExt = + (specifier, declarator, ctype, decltype, operator, expression, statement) + ParseTreeExt.declarationExt + and statementExt = + (specifier, declarator, ctype, decltype, operator, expression, statement) + ParseTreeExt.statementExt + and declaratorExt = + (specifier, declarator, ctype, decltype, operator, expression, statement) + ParseTreeExt.declaratorExt + and specifierExt = + (specifier, declarator, ctype, decltype, operator, expression, statement) + ParseTreeExt.specifierExt + and expressionExt = + (specifier, declarator, ctype, decltype, operator, expression, statement) + ParseTreeExt.expressionExt + and operatorExt = ParseTreeExt.operatorExt + +end (* signature PARSETREE *) + +(* Note: in structure declarations, the bool is IsStruct/IsUnion, and the expression + * after the declarator is the bit field. + *) + +(* Location Marking: + * The expression, declarator, statement, declaration, and externalDecl + * types have a MARK variant for annotating the corresponding constructs + * with source file locations. + *) + +(* Syntax Extensions: + * The operator, expression, specification, declarator, statement, declaration, + * and externalDecl types have an ...Ext variant for supporting syntax + * extensions. The types of these variants, operatorExt, expressionExt, etc. + * are defined by instantiating corresponding type operators defined in + * the ParseTreeExt structure (see src/parser/extensions/c/parse-tree-ext*.sml + * for the dummy definitions for ansi C). In general, extensions for + * a construct may need to build on other constructs, which is why + * the ParseTreeExt type constructors are parameterized by the collection + * of syntax tree types. + * + * A user-defined extension (call it x) would need it's own version of + * ParseTreeExt defined in files parse-tree-ext-sig.sml and parse-tree-ext.sml + * in a new directory src/parser/extensions/x/. + * + *) diff --git a/ckit/src/parser/parse-tree.sml b/ckit/src/parser/parse-tree.sml new file mode 100644 index 0000000..160eb5e --- /dev/null +++ b/ckit/src/parser/parse-tree.sml @@ -0,0 +1,154 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +structure ParseTree : PARSETREE = +struct + + datatype qualifier = CONST | VOLATILE + + datatype storage + = TYPEDEF + | STATIC + | EXTERN + | REGISTER + | AUTO + + datatype operator + = Plus | Minus | Times | Divide | Mod + | Gt | Lt | Gte | Lte | Eq | Neq | And | Or + | BitOr | BitAnd | BitXor | Lshift | Rshift + | Star | AddrOf | Dot | Arrow | Sub | Sizeof + | PreInc | PostInc | PreDec | PostDec | Comma + | Not | Negate | BitNot | Assign + | PlusAssign | MinusAssign | TimesAssign | DivAssign + | ModAssign | XorAssign | OrAssign | AndAssign + | LshiftAssign | RshiftAssign + | Uplus + | SizeofType of ctype + | OperatorExt of operatorExt + + and expression + = EmptyExpr + | IntConst of LargeInt.int + | RealConst of real + | String of string + | Id of string + | Unop of operator * expression + | Binop of operator * expression * expression + | QuestionColon of expression * expression * expression + | Call of expression * expression list + | Cast of ctype * expression + | InitList of expression list + | MARKexpression of (SourceMap.location * expression) + | ExprExt of expressionExt + + and specifier + = Void + | Ellipses + | Signed + | Unsigned + | Char + | Short + | Int + | Long + | Float + | Double + | Fractional + | Wholenum + | Saturate + | Nonsaturate + | Array of expression * ctype + | Pointer of ctype + | Function of + {retType : ctype, + params : (decltype * declarator) list} + | Enum of + {tagOpt : string option, + enumerators : (string * expression) list, + trailingComma : bool} (* true if there was there a trailing comma in the declaration *) + | Struct of + {isStruct : bool, (* struct or union; true => struct *) + tagOpt : string option, (* optional tag *) + members: (ctype * (declarator * expression) list) list} (* member specs *) + | TypedefName of string + | StructTag of + {isStruct : bool, (* ??? *) + name : string} + | EnumTag of string + | SpecExt of specifierExt + + and declarator (* constructor suffix: "Decr" *) + = EmptyDecr + | EllipsesDecr + | VarDecr of string + | ArrayDecr of declarator * expression + | PointerDecr of declarator + | QualDecr of qualifier * declarator + | FuncDecr of declarator * (decltype * declarator) list + | MARKdeclarator of (SourceMap.location * declarator) + | DecrExt of declaratorExt + + (* supports extensions of C in which expressions contain statements *) + and statement + = Decl of declaration + | Expr of expression + | Compound of statement list + | While of expression * statement + | Do of expression * statement + | For of expression * expression * expression * statement + | Labeled of string * statement + | CaseLabel of expression * statement + | DefaultLabel of statement + | Goto of string + | Break + | Continue + | Return of expression + | IfThen of expression * statement + | IfThenElse of expression * statement * statement + | Switch of expression * statement + | MARKstatement of (SourceMap.location * statement) + | StatExt of statementExt + + and declaration + = Declaration of decltype * (declarator * expression) list + | MARKdeclaration of (SourceMap.location * declaration) + | DeclarationExt of declarationExt + + and externalDecl + = ExternalDecl of declaration + | FunctionDef of (* record? *) + {retType : decltype, (* return type *) + funDecr : declarator, (* function name declarator *) + krParams : declaration list, (* K&R-style parameter declarations *) + body : statement} (* function body *) + | MARKexternalDecl of (SourceMap.location * externalDecl) + | ExternalDeclExt of externalDeclExt + + withtype ctype = + {qualifiers : qualifier list, + specifiers : specifier list} + and decltype = + {qualifiers : qualifier list, + specifiers : specifier list, + storage : storage list} + + and externalDeclExt = + (specifier, declarator, ctype, decltype, operator, expression, statement) + ParseTreeExt.externalDeclExt + and declarationExt = + (specifier, declarator, ctype, decltype, operator, expression, statement) + ParseTreeExt.declarationExt + and statementExt = + (specifier, declarator, ctype, decltype, operator, expression, statement) + ParseTreeExt.statementExt + and declaratorExt = + (specifier, declarator, ctype, decltype, operator, expression, statement) + ParseTreeExt.declaratorExt + and specifierExt = + (specifier, declarator, ctype, decltype, operator, expression, statement) + ParseTreeExt.specifierExt + and expressionExt = + (specifier, declarator, ctype, decltype, operator, expression, statement) + ParseTreeExt.expressionExt + and operatorExt = ParseTreeExt.operatorExt + +end (* structure ParseTree *) diff --git a/ckit/src/parser/parser-sig.sml b/ckit/src/parser/parser-sig.sml new file mode 100644 index 0000000..bd2323c --- /dev/null +++ b/ckit/src/parser/parser-sig.sml @@ -0,0 +1,15 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +signature PARSER = +sig + + val parseFile : Error.errorState -> string -> ParseTree.externalDecl list + (* parseFile takes an errorState and the name of a (preprocessed) + * C source file and returns a list of external declaration parse + * trees corresponding to the top-level declarations in the source file. + * See ckit/src/parser/util/error-sig.sml for documentation on + * Error.errorState. + *) + +end + diff --git a/ckit/src/parser/parser.sml b/ckit/src/parser/parser.sml new file mode 100644 index 0000000..1c2ad15 --- /dev/null +++ b/ckit/src/parser/parser.sml @@ -0,0 +1,50 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +structure Parser : PARSER = +struct + + (* instantiate parser structures *) + structure LrVals = LrValsFun(structure Token = LrParser.Token) + + structure TokTable = TokenTable(structure Tokens = LrVals.Tokens) + + structure CLex = CLexFun(structure Tokens = LrVals.Tokens + structure TokTable = TokTable) + + structure P = JoinWithArg(structure ParserData = LrVals.ParserData + structure Lex = CLex + structure LrParser = LrParser) + + fun parseFile errState f = + let val _ = TypeDefs.reset() + + val sourceMap = SourceMap.newmap{srcFile=f} + + fun lexErr (p1, p2, msg) = + Error.error (errState, SourceMap.location sourceMap (p1, p2), msg) + fun lexWarn (p1, p2, msg) = + Error.warning (errState, SourceMap.location sourceMap (p1, p2), msg) + fun parseErr (msg, p1, p2) = + Error.error (errState, SourceMap.location sourceMap (p1, p2), msg) + + fun inputc instrm i = TextIO.inputN(instrm,i) + + val lexArg = {comLevel = ref 0, + sourceMap = sourceMap, + charlist = ref ([] : string list), + stringstart = ref 0, + errWarn = {err=lexErr, warn = lexWarn} + } + val instrm = TextIO.openIn f + val lookahead = 15 + + val lexer = LrParser.Stream.streamify (CLex.makeLexer (inputc instrm) lexArg) + val (res,_) = P.parse(lookahead, lexer, parseErr, sourceMap) + val _ = TextIO.closeIn instrm + in res + end + handle P.ParseError => + (TextIO.output(Error.errStream errState,"ParseError raised\n"); + []) + +end (* structure Parser *) diff --git a/ckit/src/parser/sources.cm b/ckit/src/parser/sources.cm new file mode 100644 index 0000000..37330d9 --- /dev/null +++ b/ckit/src/parser/sources.cm @@ -0,0 +1,76 @@ +Group is + +#if defined(NEW_CM) +#if (SMLNJ_VERSION * 100 + SMLNJ_MINOR_VERSION >= 11029) + (* standard basis *) + $/basis.cm + + (* Compiler *) + $smlnj/compiler.cm + + (* standard library *) + $/smlnj-lib.cm + + (* ml-yacc *) + $/ml-yacc-lib.cm + + (* pretty printer library *) + $/pp-lib.cm +#else + (* standard basis *) + basis.cm + + (* Compiler *) + host-compiler.cm + + (* standard library *) + smlnj-lib.cm + + (* ml-yacc *) + ml-yacc-lib.cm + + (* pretty printer library *) + pp-lib.cm +#endif +#else + (* standard library *) + smlnj-lib.cm + + (* ml-yacc *) + ml-yacc-lib.cm + + (* pretty printer library *) + pp-lib.cm +#endif + + (* configuration *) + ../variants/sources.cm + + (* utilitities *) + util/old-pp.sml + util/ascii.sml + util/sourcemap-sig.sml + util/sourcemap.sml + util/error-sig.sml + util/error.sml + + (* lexer and parser *) + grammar/tdefs.sml + grammar/tokentable.sml + grammar/c.lex + + parser-sig.sml + parser.sml + parse-tree-sig.sml + parse-tree.sml + + (* extensions *) +#if (defined(d)) + extensions/d/parse-tree-ext-sig.sml + extensions/d/parse-tree-ext.sml + grammar/d.grm +#else + extensions/c/parse-tree-ext-sig.sml + extensions/c/parse-tree-ext.sml + grammar/c.grm +#endif diff --git a/ckit/src/parser/util/.cm/GUID/error-sig.sml b/ckit/src/parser/util/.cm/GUID/error-sig.sml new file mode 100644 index 0000000..c92a74b --- /dev/null +++ b/ckit/src/parser/util/.cm/GUID/error-sig.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):../parser/(group.cm):util/error-sig.sml-1714016104.882 diff --git a/ckit/src/parser/util/.cm/GUID/error.sml b/ckit/src/parser/util/.cm/GUID/error.sml new file mode 100644 index 0000000..ed17dba --- /dev/null +++ b/ckit/src/parser/util/.cm/GUID/error.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):../parser/(group.cm):util/error.sml-1714016104.886 diff --git a/ckit/src/parser/util/.cm/GUID/old-pp.sml b/ckit/src/parser/util/.cm/GUID/old-pp.sml new file mode 100644 index 0000000..fd1d110 --- /dev/null +++ b/ckit/src/parser/util/.cm/GUID/old-pp.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):../parser/(group.cm):util/old-pp.sml-1714016103.881 diff --git a/ckit/src/parser/util/.cm/GUID/sourcemap-sig.sml b/ckit/src/parser/util/.cm/GUID/sourcemap-sig.sml new file mode 100644 index 0000000..4774cbf --- /dev/null +++ b/ckit/src/parser/util/.cm/GUID/sourcemap-sig.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):../parser/(group.cm):util/sourcemap-sig.sml-1714016103.763 diff --git a/ckit/src/parser/util/.cm/GUID/sourcemap.sml b/ckit/src/parser/util/.cm/GUID/sourcemap.sml new file mode 100644 index 0000000..013fe16 --- /dev/null +++ b/ckit/src/parser/util/.cm/GUID/sourcemap.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):../parser/(group.cm):util/sourcemap.sml-1714016103.783 diff --git a/ckit/src/parser/util/.cm/SKEL/ascii.sml b/ckit/src/parser/util/.cm/SKEL/ascii.sml new file mode 100644 index 0000000..adbb6ef --- /dev/null +++ b/ckit/src/parser/util/.cm/SKEL/ascii.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ad"Ascii"h0 \ No newline at end of file diff --git a/ckit/src/parser/util/.cm/SKEL/error-sig.sml b/ckit/src/parser/util/.cm/SKEL/error-sig.sml new file mode 100644 index 0000000..9f6aeb3 --- /dev/null +++ b/ckit/src/parser/util/.cm/SKEL/error-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f4d"OldPrettyPrint"d"Format"d"SourceMap"d"TextIO"ac"ERROR"h0 \ No newline at end of file diff --git a/ckit/src/parser/util/.cm/SKEL/error.sml b/ckit/src/parser/util/.cm/SKEL/error.sml new file mode 100644 index 0000000..b477999 --- /dev/null +++ b/ckit/src/parser/util/.cm/SKEL/error.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"TextIO"ad"Error"jh3ad"F"gp1d"Format"ad"PP"gp1d"OldPrettyPrint"ad"SM"gp1d"SourceMap"gp1c"ERROR" \ No newline at end of file diff --git a/ckit/src/parser/util/.cm/SKEL/old-pp.sml b/ckit/src/parser/util/.cm/SKEL/old-pp.sml new file mode 100644 index 0000000..7c762ab --- /dev/null +++ b/ckit/src/parser/util/.cm/SKEL/old-pp.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d3f3d"StringCvt"d"List"d"String"aOLD_PRETTYPRINT"0ad"OldPrettyPrint"jh2aDev"3ad"PP"jh2ad"Token"gp1d"StringToken"ad"Device"gp1 gp1e"PPStreamFn"gp1! \ No newline at end of file diff --git a/ckit/src/parser/util/.cm/SKEL/sourcemap-sig.sml b/ckit/src/parser/util/.cm/SKEL/sourcemap-sig.sml new file mode 100644 index 0000000..b4de260 --- /dev/null +++ b/ckit/src/parser/util/.cm/SKEL/sourcemap-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ac"SOURCE_MAP"h0 \ No newline at end of file diff --git a/ckit/src/parser/util/.cm/SKEL/sourcemap.sml b/ckit/src/parser/util/.cm/SKEL/sourcemap.sml new file mode 100644 index 0000000..038c769 --- /dev/null +++ b/ckit/src/parser/util/.cm/SKEL/sourcemap.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f3d"Int"d"String"d"Config"ad"SourceMap"jh1ad"F"gp1d"Format"gp1c"SOURCE_MAP" \ No newline at end of file diff --git a/ckit/src/parser/util/.cm/amd64-unix/error-sig.sml b/ckit/src/parser/util/.cm/amd64-unix/error-sig.sml new file mode 100644 index 0000000..29c0392 Binary files /dev/null and b/ckit/src/parser/util/.cm/amd64-unix/error-sig.sml differ diff --git a/ckit/src/parser/util/.cm/amd64-unix/error.sml b/ckit/src/parser/util/.cm/amd64-unix/error.sml new file mode 100644 index 0000000..183256b Binary files /dev/null and b/ckit/src/parser/util/.cm/amd64-unix/error.sml differ diff --git a/ckit/src/parser/util/.cm/amd64-unix/old-pp.sml b/ckit/src/parser/util/.cm/amd64-unix/old-pp.sml new file mode 100644 index 0000000..5056646 Binary files /dev/null and b/ckit/src/parser/util/.cm/amd64-unix/old-pp.sml differ diff --git a/ckit/src/parser/util/.cm/amd64-unix/sourcemap-sig.sml b/ckit/src/parser/util/.cm/amd64-unix/sourcemap-sig.sml new file mode 100644 index 0000000..8373345 Binary files /dev/null and b/ckit/src/parser/util/.cm/amd64-unix/sourcemap-sig.sml differ diff --git a/ckit/src/parser/util/.cm/amd64-unix/sourcemap.sml b/ckit/src/parser/util/.cm/amd64-unix/sourcemap.sml new file mode 100644 index 0000000..86b3197 Binary files /dev/null and b/ckit/src/parser/util/.cm/amd64-unix/sourcemap.sml differ diff --git a/ckit/src/parser/util/ascii.sml b/ckit/src/parser/util/ascii.sml new file mode 100644 index 0000000..f68de38 --- /dev/null +++ b/ckit/src/parser/util/ascii.sml @@ -0,0 +1,50 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +(* Copyright 1989 by AT&T Bell Laboratories *) +(* ascii.sml *) + +structure Ascii = struct + val caret = 94 + and colon = 58 + and comma = 44 + and del = 127 + and dollar = 36 + and dot = 46 + and dquote = 34 + and equal = 61 + and formfeed = 12 + and greaterthan = 62 + and lbrace = 123 + and lbracket = 91 + and lc_a = 97 + and lc_n = 110 + and lc_t = 116 + and lc_z = 122 + and lessthan = 60 + and lparen = 40 + and minus = 45 + and newline = 10 + and nine = 57 + and percent = 37 + and plus = 43 + and query = 63 + and rbrace = 125 + and rbracket = 93 + and return = 13 + and rparen = 41 + and SEMIcolon = 59 + and sharp = 35 + and slash = 47 + and space = 32 + and squote = 39 + and star = 42 + and tab = 9 + and tilde = 126 + and uc_a = 65 + and uc_z = 90 + and underscore = 95 + and zero = 48 + + fun isDigit (char) = char >= zero andalso char <= nine; + +end (* structure Ascii *) diff --git a/ckit/src/parser/util/error-sig.sml b/ckit/src/parser/util/error-sig.sml new file mode 100644 index 0000000..6e3c74a --- /dev/null +++ b/ckit/src/parser/util/error-sig.sml @@ -0,0 +1,115 @@ +(* + * Copyright (c) 1996 by Satish Chandra, Brad Richards, Mark D. Hill, + * James R. Larus, and David A. Wood. + * + * Teapot is distributed under the following conditions: + * + * You may make copies of Teapot for your own use and modify those copies. + * + * All copies of Teapot must retain our names and copyright notice. + * + * You may not sell Teapot or distributed Teapot in conjunction with a + * commercial product or service without the expressed written consent of + * the copyright holders. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + * + *) + +(* error-sig.sml + * + * CS703 --- Project --- Spring '94 + * + * COPYRIGHT (c) 1992 AT&T Bell Laboratories + *) + +signature ERROR = +sig + + type errorState + (* the information related to error reporting, including counters + * for errors and warnings and upper bounds thereon.*) + +(* global limit variables *) + + val errorsLimit : int ref + (* caps number of errors reported on an error state (see mkErrState) *) + + val warningsLimit : int ref + (* caps number of warnings reported on an error state (see mkErrState) *) + + +(* creating errorStates *) + + val mkErrState : TextIO.outstream -> errorState + (* mkErrorState(os): make an error state with destination outstream os. + * Uses the current values of errorsLimit and warningsLimit as upper bounds + * on numbers of errors and warnings reported via the resulting errorState. + *) + + +(* reporting internal bugs *) + + val bug : errorState -> string -> unit + (* for reporting internal bugs *) + + +(* generating warning messages *) + + val warning : (errorState * SourceMap.location * string) -> unit + (* warning(es,loc,message): the message and location loc will be printed + * to the destination outstream component of es *) + + val warningf : (errorState * SourceMap.location * string * Format.fmt_item list) + -> unit + (* warning(es,loc,message,items): the message and location loc and + * formated representation of items will be printed to the destination + * outstream component of es *) + + val noMoreWarnings : errorState -> unit + (* turns off printing of warning messages for the given errorState *) + + +(* generating error messages *) + + val hint: string -> unit + (* MAGIC (i.e. really gross hack) that allows you to insert hints + * that will be utilized by the next call to error. This was introduced + * to support better parser error messages. The next call to error will + * consume the hint, so it only applies to the next error. Typically + * it is a hint as to why the error occurred. *) + + val error : (errorState * SourceMap.location * string) -> unit + (* warning(es,loc,message): the message and location loc will be printed + * to the destination outstream component of es *) + + val errorf : (errorState * SourceMap.location * string * Format.fmt_item list) + (* warning(es,loc,message,items): the message and location loc and + * formated representation of items will be printed to the destination + * outstream component of es *) + -> unit + val noMoreErrors : errorState -> unit + (* turns off printing of warning messages for the given errorState *) + + val ppError : + (errorState * SourceMap.location * (OldPrettyPrint.ppstream -> unit)) + -> unit + (* pretty-print an error message on the error stream *) + + val errStream : errorState -> TextIO.outstream + (* returns the destination outstream of the errorState *) + + val errorCount : errorState -> int + (* returns n, if there have been n>0 errors reported on the state since + * it was initialized or last reset *) + + val warningCount : errorState -> int + (* returns n, if there have been n>0 warnings reported on the state since + * it was initialized or last reset *) + + val reset : errorState -> unit + (* clears the error and warnings counts *) + +end (* signature ERROR *) diff --git a/ckit/src/parser/util/error.sml b/ckit/src/parser/util/error.sml new file mode 100644 index 0000000..89cb733 --- /dev/null +++ b/ckit/src/parser/util/error.sml @@ -0,0 +1,176 @@ +(* + * Copyright (c) 1996 by Satish Chandra, Brad Richards, Mark D. Hill, + * James R. Larus, and David A. Wood. + * + * Teapot is distributed under the following conditions: + * + * You may make copies of Teapot for your own use and modify those copies. + * + * All copies of Teapot must retain our names and copyright notice. + * + * You may not sell Teapot or distributed Teapot in conjunction with a + * commercial product or service without the expressed written consent of + * the copyright holders. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + * + *) + +(* error.sml + * + * CS703 --- Project --- Spring '94 + * + * COPYRIGHT (c) 1992 AT&T Bell Laboratories + *) + +structure Error : ERROR = +struct + + structure F = Format + structure PP = OldPrettyPrint + structure SM = SourceMap + + datatype errorState + = ES of + {outStrm : TextIO.outstream, + numErrors : int ref, + numWarnings : int ref, + warningsEnabled : bool ref, + errorsEnabled : bool ref, + errorsLimit : int, + warningsLimit : int} + + (* global error and warning count limits *) + val errorsLimit = ref 10 (* flag for suppressing error messages *) + val warningsLimit = ref 10 (* flag for suppressing warning messages *) + +(* make an error state. src is the source file name, dst is the + * output state to report errors on, lnum and lpos are references + * used to keep track of the current line number and starting + * character positions of the scanned lines. + *) + fun mkErrState (dst: TextIO.outstream) = + ES {outStrm = dst, + numErrors = ref 0, + numWarnings = ref 0, + warningsEnabled = ref true, + errorsEnabled = ref true, + errorsLimit = !errorsLimit, + warningsLimit = !warningsLimit + } + + fun inc (i: int ref) = (i := !i + 1; ()) + fun dec (i: int ref) = (i := !i - 1; ()) + +(* curried version of TextIO.output *) + fun outputc outstrm strng = TextIO.output(outstrm, strng) + +(* for reporting internal bugs *) + fun bug (ES{outStrm,...}) (msg: string) : unit= + TextIO.output(outStrm,("Compiler bug: " ^ msg ^ "\n")) + +(* output a warning/error message with location info *) + fun sayError (es as ES{outStrm, ...}, loc, kind, msg) = + F.formatf "%s: %s%s\n" (outputc outStrm) [ + F.STR(SM.locToString loc), F.STR kind, F.STR msg + ] + +(* output a formatted warning/error message with location info *) + fun fmtError (es as ES{outStrm, ...}, loc, kind, fmt, items) = + F.formatf ("%s: %s" ^ fmt ^ "\n") (outputc outStrm) + ((F.STR(SM.locToString loc))::(F.STR kind)::items) + +(* generate warning messages to the error stream *) + fun warning (es as ES{numWarnings,warningsLimit,warningsEnabled,...}, loc, msg) = + if !warningsEnabled then + (sayError(es, loc, "warning: ", msg); + inc numWarnings; + if !numWarnings > warningsLimit then + (warningsEnabled := false; + sayError(es, loc, "warning: ", "additional warnings suppressed")) + else ()) + else () + + fun warningf (es as ES{numWarnings,warningsLimit,warningsEnabled,...}, + loc, fmt, items) = + if !warningsEnabled then + (fmtError(es, loc, "warning: ", fmt, items); + inc numWarnings; + if !numWarnings > warningsLimit then + (warningsEnabled := false; + sayError(es, loc, "warning: ", "additional warnings suppressed")) + else ()) + else () + + fun noMoreWarnings (es as ES{warningsEnabled,...}) = + (warningsEnabled := false; + sayError(es, SM.UNKNOWN, "warning: ", "additional warnings suppressed.")) + +(* hints - heuristic help for error messages; + Note: must be called before error call is generated. *) + val lastHint = ref (NONE : string option) + fun hint s = (lastHint := SOME s) + +(* generate error messages to the error stream *) + fun error (es as ES{numErrors, errorsLimit, errorsEnabled,...}, loc, msg) = + if !errorsEnabled then + (case !lastHint of + SOME s => (sayError(es, loc, "error: ", msg ^ "\n" ^ s); + lastHint := NONE) + | NONE => sayError(es, loc, "error: ", msg); + inc numErrors; + if !numErrors > errorsLimit then + (errorsEnabled := false; + sayError(es, loc, "warning: ", "additional errors suppressed.")) + else ()) + else () + + fun errorf (es as ES{numErrors,errorsLimit,errorsEnabled,...}, loc, fmt, items) = + if !errorsEnabled then + (fmtError(es, loc, "error: ", fmt, items); + inc numErrors; + if !numErrors > errorsLimit then + (errorsEnabled := false; + sayError(es, loc, "warning: ", "additional errors suppressed.")) + else ()) + else () + + fun noMoreErrors(es as ES{errorsEnabled,...}) = + (errorsEnabled := false; + sayError(es, SM.UNKNOWN, "warning: ", "additional errors suppressed.")) + +(* pretty-print an error message on the error stream *) + fun ppError (es as ES{outStrm, numErrors, ...}, loc, pp) = let + val ppStrm = PP.mk_ppstream { + consumer = outputc outStrm, + flush = fn () => TextIO.flushOut outStrm, + linewidth = 80 + } + in + inc numErrors; + PP.begin_block ppStrm PP.INCONSISTENT 0; + PP.add_string ppStrm + (F.format "Error %s: " [F.STR(SM.locToString loc)]); + pp ppStrm; + PP.add_newline ppStrm; + PP.end_block ppStrm; + PP.flush_ppstream ppStrm + end + + fun errStream (ES{outStrm, ...}) = outStrm + +(* returns count of errors reported on the state (since last reset) *) + fun errorCount (ES{numErrors, ...}) = + !numErrors +(* returns count of warnings reported on the state (since last reset) *) + fun warningCount (ES{numWarnings, ...}) = + !numWarnings + +(* clears the error and warning counts, so that errorCount and + * warningCount will return 0. *) + fun reset (ES{numErrors, numWarnings,...}) = + (numErrors := 0; numWarnings := 0) + +end (* Error *) diff --git a/ckit/src/parser/util/old-pp.sml b/ckit/src/parser/util/old-pp.sml new file mode 100644 index 0000000..497b40a --- /dev/null +++ b/ckit/src/parser/util/old-pp.sml @@ -0,0 +1,119 @@ +(* old-pp.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * An implementation of the SML/NJ's old PP interface on top of the PP library. + *) + +signature OLD_PRETTYPRINT = + sig + type ppstream + type ppconsumer = { + consumer : string -> unit, + linewidth : int, + flush : unit -> unit + } + + datatype break_style = CONSISTENT | INCONSISTENT + + exception PP_FAIL of string + + val mk_ppstream : ppconsumer -> ppstream + val dest_ppstream : ppstream -> ppconsumer + val add_break : ppstream -> int * int -> unit + val add_newline : ppstream -> unit + val add_string : ppstream -> string -> unit + val begin_block : ppstream -> break_style -> int -> unit + val end_block : ppstream -> unit + val clear_ppstream : ppstream -> unit + val flush_ppstream : ppstream -> unit + val with_pp : ppconsumer -> (ppstream -> unit) -> unit + val pp_to_string : int -> (ppstream -> 'a -> unit) -> 'a -> string + + end; + +structure OldPrettyPrint :> OLD_PRETTYPRINT = + struct + + type ppconsumer = { + consumer : string -> unit, + linewidth : int, + flush : unit -> unit + } + + structure Dev = + struct + type device = ppconsumer + type style = unit + fun sameStyle _ = true + fun pushStyle _ = () + fun popStyle _ = () + fun defaultStyle _ = () + fun maxDepth _ = NONE +val depth = maxDepth (* DEPRECATED *) + fun setMaxDepth _ = () + fun ellipses _ = ("", 0) + fun setEllipses _ = () + fun setEllipsesWithSz _ = () + fun lineWidth {consumer, linewidth, flush} = SOME linewidth + fun setLineWidth _ = () + fun maxIndent _ = NONE + fun setMaxIndent _ = () + fun textWidth _ = NONE + fun setTextWidth _ = () + fun space ({consumer, linewidth, flush}, n) = + consumer (StringCvt.padLeft #" " n "") + val indent = space + fun newline {consumer, linewidth, flush} = consumer "\n" + fun string ({consumer, linewidth, flush}, s) = consumer s + fun char ({consumer, linewidth, flush}, c) = consumer(str c) + fun flush {consumer, linewidth, flush} = flush() + end + + structure PP = PPStreamFn(structure Token = StringToken structure Device = Dev) + + datatype ppstream = STRM of { + consumer : ppconsumer, + strm : PP.stream + } + + datatype break_style = CONSISTENT | INCONSISTENT + + exception PP_FAIL of string + + fun mk_ppstream ppc = STRM{ + consumer = ppc, + strm = PP.openStream ppc + } + fun dest_ppstream (STRM{consumer, ...}) = consumer + fun add_break (STRM{strm, ...}) (nsp, offset) = + PP.break strm {nsp=nsp, offset=offset} + fun add_newline (STRM{strm, ...}) = PP.newline strm + fun add_string (STRM{strm, ...}) s = PP.string strm s + fun begin_block (STRM{strm, ...}) CONSISTENT indent = + PP.openHVBox strm (PP.Rel indent) + | begin_block (STRM{strm, ...}) INCONSISTENT indent = + PP.openHOVBox strm (PP.Rel indent) + fun end_block (STRM{strm, ...}) = PP.closeBox strm + fun clear_ppstream(STRM{strm, ...}) = + raise Fail "clear_ppstream not implemented" + fun flush_ppstream (STRM{strm, ...}) = PP.flushStream strm + fun with_pp ppc f = let + val (ppStrm as (STRM{strm, ...})) = mk_ppstream ppc + in + f ppStrm; + PP.closeStream strm + end + fun pp_to_string wid ppFn obj = let + val l = ref ([] : string list) + fun attach s = l := s :: !l + in + with_pp { + consumer = attach, linewidth = wid, flush = fn()=>() + } (fn ppStrm => ppFn ppStrm obj); + String.concat(List.rev(!l)) + end + + end; + diff --git a/ckit/src/parser/util/sourcemap-sig.sml b/ckit/src/parser/util/sourcemap-sig.sml new file mode 100644 index 0000000..6b44de7 --- /dev/null +++ b/ckit/src/parser/util/sourcemap-sig.sml @@ -0,0 +1,54 @@ +signature SOURCE_MAP = +sig + + type charpos = int + (* char position in a file *) + + type region = charpos * charpos + (* region between two character positions, where it is assumed that + * the first charpos is less than the second *) + + datatype location + = LOC of + {srcFile : string, + beginLine : int, + beginCol : int, + endLine : int, + endCol : int} + | UNKNOWN + (* encodes the information used to record locations in input sources. + * a location designates a region within a (single) source file *) + + type sourcemap + (* a data structure maintaining a mapping between character positions + * in an input source and locations. + * This handles multiple source files, which can happen if the input + * has been passed through the C preprocessor. + *) + + val newmap : {srcFile : string} -> sourcemap + (* creates a new sourcemap with an initial source file name srcFile *) + + val newline : sourcemap -> charpos -> unit + (* records a line break in the input source *) + + val resynch : sourcemap -> {pos:charpos, srcFile:string option, line:int} -> unit + (* switch source file names in response to a directive created by + * an include *) + + val parseDirective : sourcemap -> charpos * string -> unit + (* parse a C preprocessor directive to reset src file name and line number *) + + val location : sourcemap -> region -> location + (* maps a region to a location *) + + val currPos : sourcemap -> charpos + (* returns the current character position in the source represented + * by the sourcemap *) + + val locToString : location -> string + (* format a location as a string *) + +end + + diff --git a/ckit/src/parser/util/sourcemap.sml b/ckit/src/parser/util/sourcemap.sml new file mode 100644 index 0000000..5787622 --- /dev/null +++ b/ckit/src/parser/util/sourcemap.sml @@ -0,0 +1,129 @@ +structure SourceMap : SOURCE_MAP = +struct + structure F = Format + + type charpos = int + + type region = charpos * charpos + + datatype location + = LOC of + {srcFile : string, + beginLine : int, + beginCol : int, + endLine : int, + endCol : int} + | UNKNOWN + + datatype sourcemap + = SOURCEMAP of + { linePos : charpos list ref, + filePos : {linePos : charpos list, + line : int, + srcFile : string} list ref, + lineNum : int ref} + + (* DBM: the filePos is a stack of records, but it doesn't get popped, so + * it looks like filePos could just be a ref of the record *) + + fun newmap{srcFile} = SOURCEMAP + { linePos = ref [1], (* this compensates for lex bug : yypos off by 2 *) + filePos = ref [{linePos=[],line=1,srcFile=srcFile}], + lineNum = ref 1 + } + + fun newline (SOURCEMAP{linePos,lineNum,...}) pos = + (linePos := pos :: !linePos; lineNum := 1 + !lineNum) + + fun resynch (SOURCEMAP{linePos,filePos,lineNum,...}) {pos,srcFile,line} = + (filePos := {linePos= !linePos, + line= !lineNum, + srcFile= + (case srcFile of + SOME srcFile => srcFile + | NONE => + let val fpl = !filePos + in case fpl of + nil => "" + | x :: _ => #srcFile x + end) + } :: !filePos; + linePos := [pos]; + lineNum := line + ) + + fun parseDirective sourceMap (pos,directive) = + let fun sep #" " = true + | sep #"\"" = true + | sep #"#" = true + | sep #"\n" = true + | sep _ = false + fun proc{line, srcOpt} = + (case Int.fromString line + of SOME line => + resynch sourceMap {pos=pos,srcFile=srcOpt,line=line} + | _ => newline sourceMap pos) + in if Config.ParseControl.parseDirective then + case String.tokens sep directive + of ("line" :: line :: srcFile :: _) => + proc{line=line, srcOpt=SOME srcFile} + | line::srcFile::_ => proc{line=line, srcOpt=SOME srcFile} + | line :: _ => proc{line=line, srcOpt=NONE} + | _ => newline sourceMap pos + else newline sourceMap pos + end + + fun currPos(SOURCEMAP{linePos,...}) = hd (!linePos) + + + fun location(SOURCEMAP{linePos,filePos,lineNum,...}) (x,y) = + let fun findPos(p,currPos,currFile,pos::rest,filePos,line) = + if p > pos then + {srcFile=currFile,line=line,column=p - pos} + else findPos(p,pos,currFile,rest,filePos,line-1) + | findPos(p,currPos,currFile,[],{linePos,line,srcFile}::filePos,_) = + findPos(p,currPos,#srcFile(hd filePos),linePos,filePos,line) + (* NOTE: very confusing... + filePos stack contains previous line info and srcFile of current file *) + | findPos(p,currPos,currFile,[],[],line) = + {srcFile=currFile,line=line,column=0} + + val {srcFile=currFile,...} = hd(!filePos) + val {srcFile,line=l1,column=c1} = + findPos(x,x,currFile,!linePos,!filePos,!lineNum) + val {srcFile,line=l2,column=c2} = + findPos(y,y,currFile,!linePos,!filePos,!lineNum) + in LOC{srcFile = srcFile, + beginLine = l1, + beginCol = c1, + endLine = l2, + endCol = c2 + } + end + + (* return a string representing a location *) + fun locToString UNKNOWN = "\"???\"" + | locToString (LOC{srcFile,beginLine,beginCol,endLine,endCol}) = let + val srcFile = srcFile + val p1line = beginLine + val p1pos = beginCol + val p2line = endLine + val p2pos = endCol + in + if (beginLine = endLine) + then if (p1pos < p2pos) + then F.format "\"%s\":%d.%d-%d" [ + F.STR srcFile, F.INT p1line, F.INT p1pos, F.INT p2pos + ] + else F.format "\"%s\":%d.%d" [ + F.STR srcFile, F.INT p1line, F.INT p1pos + ] + else F.format "\"%s\":%d.%d-%d.%d" [ + F.STR srcFile, F.INT p1line, F.INT p1pos, + F.INT p2line, F.INT p2pos + ] + end (* locToString *) + +end + + diff --git a/ckit/src/sources.cm b/ckit/src/sources.cm new file mode 100644 index 0000000..c8a2cd0 --- /dev/null +++ b/ckit/src/sources.cm @@ -0,0 +1,2 @@ +Group is + ast/sources.cm diff --git a/ckit/src/test.c b/ckit/src/test.c new file mode 100644 index 0000000..c706f6a --- /dev/null +++ b/ckit/src/test.c @@ -0,0 +1,5 @@ +void printf(); + +main () { + printf("Hello world\n"); +} diff --git a/ckit/src/variants/.cm/GUID/config-sig.sml b/ckit/src/variants/.cm/GUID/config-sig.sml new file mode 100644 index 0000000..91c25d9 --- /dev/null +++ b/ckit/src/variants/.cm/GUID/config-sig.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):../parser/(group.cm):../variants/(group.cm):config-sig.sml-1714016103.774 diff --git a/ckit/src/variants/.cm/GUID/parse-control-sig.sml b/ckit/src/variants/.cm/GUID/parse-control-sig.sml new file mode 100644 index 0000000..66b4e30 --- /dev/null +++ b/ckit/src/variants/.cm/GUID/parse-control-sig.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):../parser/(group.cm):../variants/(group.cm):parse-control-sig.sml-1714016103.767 diff --git a/ckit/src/variants/.cm/GUID/type-check-control-sig.sml b/ckit/src/variants/.cm/GUID/type-check-control-sig.sml new file mode 100644 index 0000000..161f6ea --- /dev/null +++ b/ckit/src/variants/.cm/GUID/type-check-control-sig.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):../parser/(group.cm):../variants/(group.cm):type-check-control-sig.sml-1714016103.771 diff --git a/ckit/src/variants/.cm/SKEL/config-sig.sml b/ckit/src/variants/.cm/SKEL/config-sig.sml new file mode 100644 index 0000000..f07a324 --- /dev/null +++ b/ckit/src/variants/.cm/SKEL/config-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ac"CONFIG"h2ad"ParseControl"gp1c"PARSECONTROL"ad"TypeCheckControl"gp1c"TYPECHECKCONTROL" \ No newline at end of file diff --git a/ckit/src/variants/.cm/SKEL/parse-control-sig.sml b/ckit/src/variants/.cm/SKEL/parse-control-sig.sml new file mode 100644 index 0000000..7e1a9ab --- /dev/null +++ b/ckit/src/variants/.cm/SKEL/parse-control-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ac"PARSECONTROL"h0 \ No newline at end of file diff --git a/ckit/src/variants/.cm/SKEL/type-check-control-sig.sml b/ckit/src/variants/.cm/SKEL/type-check-control-sig.sml new file mode 100644 index 0000000..3355cc7 --- /dev/null +++ b/ckit/src/variants/.cm/SKEL/type-check-control-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ac"TYPECHECKCONTROL"h0 \ No newline at end of file diff --git a/ckit/src/variants/.cm/amd64-unix/config-sig.sml b/ckit/src/variants/.cm/amd64-unix/config-sig.sml new file mode 100644 index 0000000..9bcf2a1 Binary files /dev/null and b/ckit/src/variants/.cm/amd64-unix/config-sig.sml differ diff --git a/ckit/src/variants/.cm/amd64-unix/parse-control-sig.sml b/ckit/src/variants/.cm/amd64-unix/parse-control-sig.sml new file mode 100644 index 0000000..d19d32d Binary files /dev/null and b/ckit/src/variants/.cm/amd64-unix/parse-control-sig.sml differ diff --git a/ckit/src/variants/.cm/amd64-unix/type-check-control-sig.sml b/ckit/src/variants/.cm/amd64-unix/type-check-control-sig.sml new file mode 100644 index 0000000..7f9b944 Binary files /dev/null and b/ckit/src/variants/.cm/amd64-unix/type-check-control-sig.sml differ diff --git a/ckit/src/variants/ansic/.cm/GUID/config.sml b/ckit/src/variants/ansic/.cm/GUID/config.sml new file mode 100644 index 0000000..852d94f --- /dev/null +++ b/ckit/src/variants/ansic/.cm/GUID/config.sml @@ -0,0 +1 @@ +guid-$/(ckit-lib.cm):ast/(group.cm):../parser/(group.cm):../variants/(group.cm):ansic/config.sml-1714016103.777 diff --git a/ckit/src/variants/ansic/.cm/SKEL/config.sml b/ckit/src/variants/ansic/.cm/SKEL/config.sml new file mode 100644 index 0000000..a6741c4 --- /dev/null +++ b/ckit/src/variants/ansic/.cm/SKEL/config.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"TextIO"ad"Config"jh2ad"ParseControl"j0gp1c"PARSECONTROL"ad"TypeCheckControl"j+gp1c"TYPECHECKCONTROL"gp1c"CONFIG" \ No newline at end of file diff --git a/ckit/src/variants/ansic/.cm/amd64-unix/config.sml b/ckit/src/variants/ansic/.cm/amd64-unix/config.sml new file mode 100644 index 0000000..d8c8d5f Binary files /dev/null and b/ckit/src/variants/ansic/.cm/amd64-unix/config.sml differ diff --git a/ckit/src/variants/ansic/config.sml b/ckit/src/variants/ansic/config.sml new file mode 100644 index 0000000..ca496a7 --- /dev/null +++ b/ckit/src/variants/ansic/config.sml @@ -0,0 +1,47 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +(* Configuration for ANSIC *) + +structure Config : CONFIG = +struct + + val DFLAG = false + + structure ParseControl : PARSECONTROL = + struct + val symbolLength = 256 + val typedefsScoped = true + val prototypesAllowed = true + val templatesAllowed = false + val trailingCommaInEnum = {error=false,warning=true} + val newFundefsAllowed = true + val voidAllowed = true + val voidStarAllowed = true + val constAllowed = true + val volatileAllowed = true + fun violation str = + TextIO.output(TextIO.stdOut,"\nERROR: in ANSI C " ^ str ^ "\n") + val Dkeywords = false + val parseDirective = true (* Chandra, 6/21/99 *) + val underscoreKeywords = SOME true (* Blume *) + end + + (* see type-check-control-sig.sml for description of these flags *) + structure TypeCheckControl : TYPECHECKCONTROL = + struct + val don't_convert_SHORT_to_INT = false (* not doing dsp *) + val don't_convert_DOUBLE_in_usual_unary_cnv = true (* ansic *) + val enumeration_incompatibility = true (* ansic *) + val pointer_compatibility_quals = true (* ansic *) + val undeclared_id_error = true (* ansic *) + val undeclared_fun_error = true (* ansic *) + val convert_function_args_to_pointers = true (* ansic *) + val storage_size_check = true (* ansic *) + val perform_type_checking = true (* do type checking *) + val ISO_bitfield_restrictions = false (* allow char, short, long in bitfields *) + val allow_enum_bitfields = true (* allow enums in bitfields *) + val allow_non_constant_local_initializer_lists = false (* ansic *) + val partial_enum_error = false (* permissive *) + val partial_enums_have_unknown_size = false (* permissive *) + end +end (* structure Config *) diff --git a/ckit/src/variants/config-sig.sml b/ckit/src/variants/config-sig.sml new file mode 100644 index 0000000..bb5d153 --- /dev/null +++ b/ckit/src/variants/config-sig.sml @@ -0,0 +1,7 @@ +signature CONFIG = +sig + val DFLAG : bool + + structure ParseControl : PARSECONTROL + structure TypeCheckControl : TYPECHECKCONTROL +end \ No newline at end of file diff --git a/ckit/src/variants/group.cm b/ckit/src/variants/group.cm new file mode 100644 index 0000000..40d4714 --- /dev/null +++ b/ckit/src/variants/group.cm @@ -0,0 +1,25 @@ +Group +is + +#if defined(NEW_CM) +#if (SMLNJ_VERSION * 100 + SMLNJ_MINOR_VERSION >= 11029) + + (* standard basis *) + $/basis.cm +#else + (* standard basis *) + basis.cm +#endif +#endif + + parse-control-sig.sml + type-check-control-sig.sml + config-sig.sml + +#if (defined(d)) + d/config.sml +#elif (defined(fiveessc)) + 5essc/config.sml +#else + ansic/config.sml +#endif diff --git a/ckit/src/variants/parse-control-sig.sml b/ckit/src/variants/parse-control-sig.sml new file mode 100644 index 0000000..907a8fb --- /dev/null +++ b/ckit/src/variants/parse-control-sig.sml @@ -0,0 +1,23 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +signature PARSECONTROL = + sig + val symbolLength: int + val typedefsScoped: bool + val prototypesAllowed: bool + val templatesAllowed: bool + val trailingCommaInEnum : {error:bool,warning:bool} + val newFundefsAllowed: bool + val voidAllowed: bool + val voidStarAllowed: bool + val constAllowed: bool + val volatileAllowed: bool + val violation : string -> unit + val Dkeywords : bool + val parseDirective : bool + val underscoreKeywords : bool option + (* NONE -> accept as normal identifiers; + * SOME true -> accept as keywords; + * SOME false -> reject as error *) + end + diff --git a/ckit/src/variants/sources.cm b/ckit/src/variants/sources.cm new file mode 100644 index 0000000..fb01ab8 --- /dev/null +++ b/ckit/src/variants/sources.cm @@ -0,0 +1,24 @@ +Group is + +#if defined(NEW_CM) +#if (SMLNJ_VERSION * 100 + SMLNJ_MINOR_VERSION >= 11029) + + (* standard basis *) + $/basis.cm +#else + (* standard basis *) + basis.cm +#endif +#endif + + parse-control-sig.sml + type-check-control-sig.sml + config-sig.sml + +#if (defined(d)) + d/config.sml +#elif (defined(fiveessc)) + 5essc/config.sml +#else + ansic/config.sml +#endif diff --git a/ckit/src/variants/type-check-control-sig.sml b/ckit/src/variants/type-check-control-sig.sml new file mode 100644 index 0000000..ed3f4cc --- /dev/null +++ b/ckit/src/variants/type-check-control-sig.sml @@ -0,0 +1,93 @@ +signature TYPECHECKCONTROL = +sig + + (* these flags are used in type-util.sml *) + val don't_convert_SHORT_to_INT: bool + (* In ANSI C, usual unary converstion converts + SHORT to INT; for DSP code, we want to + keep SHORT as SHORT. + Default: true for ANSI C behavior *) + + val don't_convert_DOUBLE_in_usual_unary_cnv: bool + (* In ANSI, FLOAT is not converted to DOUBLE during + usual unary converstion; in old style compilers + FLOAT *is* converted to DOUBLE. + Default: true for ANSI behavior *) + + val enumeration_incompatibility: bool + (* ANSI says that different enumerations are incomptible + (although all are compatible with int); + older style compilers say that different enumerations + are compatible. + Default: true for ANSI behavior *) + + val pointer_compatibility_quals: bool + (* ANSI says that pointers to differently qualified types + are different; some compilers vary. + Default: true for ANSI behavior *) + + (* used in build-ast.sml *) + val undeclared_id_error:bool + (* In ANSI C, an undeclared id is an error; + in older versions of C, undeclared ids are assumed integer. + Default value: true (for ANSI behavior) *) + + val undeclared_fun_error:bool + (* In ANSI C, an undeclared fun is an error; + in older versions of C, undeclared funs are assumed to return integer. + Default value: true (for ANSI behavior) *) + + val convert_function_args_to_pointers:bool + (* In ANSI C, arguments of functions goverened by prototype + definitions that have type function or array are not + promoted to pointer type; however many compilers do this + promotion. + Default value: true (to get standard behavior) *) + + val storage_size_check:bool + (* Declarations and structure fields must have known storage + size; maybe you want to turn this check off? + Default value: true (to get ANSI behavior). *) + + val allow_non_constant_local_initializer_lists: bool + (* Allow non constant local inializers for aggregates and unions. + e.g. int x, y, z; + int a[] = {x, y, z}; + This is allowed gcc *) + val perform_type_checking:bool + (* true = do type checking; false = disable type checking; + Note: with type checking off, there is still some + rudimentary type processing, but no + usual unary conversions, usual binary conversions, etc. *) + + (* used by sizeof *) + val ISO_bitfield_restrictions: bool + (* In ANSI/ISO, types of bitfields must be qualified or unqualified version of + int, unsigned int or signed int (ISO spec, section 6.5.2.1, p60); + however most compilers allow chars, shorts and longs as well. + Default value: false (to get std permissive behavior) *) + + val allow_enum_bitfields: bool + (* Allow bitfields involving enum + e.g. enum x y : 8; + Default value: true (permissive behavior e.g. gcc) *) + + val partial_enum_error: bool + (* Prohibit partial enums. + i.e. enum x *y; + enum x {a, b, c}; + Default value: false. + (set to true to get strict behaviour) + *) + + val partial_enums_have_unknown_size: bool + (* Treat partial enums as having unknown size. + e.g. + enum x y; + enum x {a, b, c}; + Default value: false. + *) +end + + + diff --git a/cml.tgz b/cml.tgz new file mode 100644 index 0000000..ea0a833 Binary files /dev/null and b/cml.tgz differ diff --git a/cml/CHANGES b/cml/CHANGES new file mode 100644 index 0000000..c350442 --- /dev/null +++ b/cml/CHANGES @@ -0,0 +1,448 @@ +This is a record of changes made to CML and the CML Library. +------------------------------------------------------------ + +[2020-04-08] + Switch to using the "module subtraction" mechanism of CM so that + the CML version of the SML/NJ Library tracks the sequential + version automatically. While this change breaks compatibility + with the "Old CM", the "New CM" was introduced almost 20 years + ago and there are many other sources of incompatibilty. + +[2015-10-04] + Tracking changes to the SML/NJ implementation of the Basis Library. + +[2013-05-05] + Added Word64 to CML Basis. Note that, as with SML/NJ, this is not + the LargeWord structure (because of efficiency issues). Also added + some missing SML Basis Library signatures to the CML version of + the Basis. + +[2012-09-24] + Track changes to SML/NJ Library + +[2011-05-10] + Changed the paths used to specify the CML versions of libraries + in a CM file. The new scheme is + $cml/basis.cm -- the CML version of $/basis.cm + $cml/cml.cm -- core CML features + $cml/cml-lib.cm -- CML library code + $cml/trace-cml.cm -- TraceCML library for debugging + $cml/smlnj-lib.cm -- CML version of the $/smlnj-lib.cm library + $cml/inet-lib.cm -- CML version of the $/inet-lib.cm library + $cml/unix-lib.cm -- CML version of the $/unix-lib.cm library + +[2011-02-18] + Added barriers. These are inspired by the similar mechanism in CHP, + although we do not include any event-based operations on them as of yet. + +[2010-09-16] + Fixed the Win32 socket and polling implementation to work correctly + with CML. Signature of poll was wrong and didn't handle sockets at all. + (Thanks to Lars Bergstrom) + +[2008-07-16] + Fixed interface to OS.Process so that it agrees with the SML Basis Library + (added isSuccess and sleep; status is nolonger an eqtype). + +[2007-09-06] + Exported Int64 and IntInf from basis.cm. + +[2007-02-15] + Updates to the cml-lib to track removals from the SML/NJ library. + +[2007-01-28] + Updates to the cml-lib to track additions to the SML/NJ library. + +[2006-12-07] + Carried over the fix to the SOCKET signature. + +[2006-05-04] + Fixed bug with iGetPoll, mTakePoll, and mGetPoll. In some cases, the + atomic region was not being closed properly. The iGetPoll and mGetPoll + operations just read the value field now, since there is reads are + atomic. + +[2006-02-27] + Fixed bug with the combination of withNack and never, where the + negative acknowledgement is never generated. Thanks to Heath + Putnam for the bug report and fix. + +[2005-02-28] + Fixed serious bug in structure Atom. (Must use mvar, not mailbox!) + +[2005-02-24] + Reuse signature ATOM from $/smlnj-lib.cm, thus tracking all changes. + Implement structure Atom in terms of structure Atom in $/smlnj-lib.cm + by protecting access to the global hashtable using an mbox lock. + Atoms can now be created (sequentially) prior to calling RunCML.doit. + +[2004-11-24] + Made the IO implementation agnostic of size of Position.int. + (Now compiles with either Position = Int31 or Position = Int64.) + +[2003-09-23] + Accounted for changes to socket API in Basis. (Non-blocking + behavior is no longer a stateful property of a socket. Instead, + there are non-blocking versions of most functions in the + interface. This simplifies the CML code since it no longer + has to do OS-specific handling of "wouldblock" etc. + On the other hand, (trivial) CML implementations of those + non-blocking operations had to be added.) + +[2003-09-12] + Accounted for changed type of inputLine. + +[2003-09-09] + Made CML compile under Win32 again. + +[2003-09-03] + Added *_SLICE signatures and *Slice structures (copied from + "normal" Basis into CML Basis). + Fixed code broken due to API changes (slices). + +[2003-05-09] + Added the missing implementation of StreamIO event constructors + (e.g., TextIO.StreamIO.inputEvt). Note that if you use these + operations, then the system may not shutdown when all user threads + are blocked. + +[2003-04-21] + The CleanUp.logMailbox function was not protected against + interrupts (thanks to David Benson for catching this bug). + +[2003-04-09] + Changed the name of the directory cml-lib/cm to cml-lib/cm-descr + in order to avoid a name conflict with cml-lib/CM which gets + created by the compilation manager. (On case-insensitive + filesystems such as the one used by MacOS X the two names clash.) + +[2003-03-10] + Documentation cleanup. + +[2002-10-01] + Fixed a bad space leak in the stream I/O implementations. The + cleanup hooks were holding onto the initial buffer, which + meant that the entire buffer chain would remain live as long + as the stream was live. + +[2001-06-20] + Ported to SML/NJ 110.33 (in particular, the new CM). + Under the new CM, the CML library is known as $cml/cml.cm + and there is a CML-specific replacement for $/basis.cm + known as $cml/basis.cm. Two additional libraries + ($cml/cml-internal.cm and $cml/core-cml.cm) are used + internally but should not be referred to directly by client code. + A CML-specific replacement for $/smlnj-lib.cm is + known as $cml-lib/smlnj-lib.cm. The trace module is + $cml-lib/trace-cml.cm. (The aforementioned $cml/core-cml.cm + exists mainly to make it possible for $cml-lib/trace-cml.cm + to refer to it.) + +[2001-03-8] + Fixed a problem in the ordering of clean-up actions. IO streams + were being cleaned before servers, which meant that servers could + not use I/O in initialization or shutdown. + +[2001-01-14] + Fixed a bug in the timeout manager. Cleanup actions were being + executed twice, which breaks withNack. + +[2000-12-12] + Fixed a bug in the I/O manager (core-cml/io-manager.sml), where + if there were two threads blocked on the same descriptor (one + reading and one writing), the result might be matched to the + wrong thread. + +[2000-09-28] + Version 1.0.13 (SML/NJ Version 110.0.7) + +[2000-09-27] + Updated the CML Library to track additions to the SML/NJ Library. + +[1999-12-08] + Fixed a collection of bugs in event.sml related to the handling of + negative acknowledgements. + +[1999-12-01] + Modified RunCML.doit to install a dummy print hook. This prevents + the bug of SML/NJ's print function being called during CML execution + (the CML print function is installed by code in TextIO, but that + code isn't loaded by CML when the application does not specifically + mention TextIO). + +Version 1.0.12 (SML/NJ 110.0.6 and 110.8) +----------------------------------------- + +[1999-09-29] + Updated the CML Library to track additions to the SML/NJ Library. + +[1999-07-05] + Added support for Win32 sockets (thanks to Riccardo Pucella). + +[1998-08-04] + Added preliminary Win32 implementation (thanks to Riccardo Pucella). + +[1998-06-01] + Fixed type of inputLineEvt in CML_TEXT_STREAM_IO (thanks to A. Appel). + +[1998-03-12] + Fixed bug in ChanIOFn (failure to spawn threads). + + +Version 1.0.11 (SML/NJ 109.33 and 110) +-------------------------------------- + +[1997-11-21] + Added TextIO.scanStream function. + +[1997-11-21] + Added import of PathUtil:PATH_UTIL to CML library. + + +Version 1.0.10 (SML/NJ 109.32) +------------------------------ + +[1997-09-18] + Fixed a scheduler bug that occured when the atomicState was SignalPending + and atomicSwitchTo was used to exit the atomic region. + +[1997-09-15] + Fixed a bug in the RunCML.doit code that prevented multiple runs (this bug + was exposed by the previous bug fix). + +[1997-09-15] + Fixed a serious bug with the internal condition variables (Event.atomicCVarSet + was not changing the state of the variable). + +[1997-09-15] + Introduced datatype rebinding to eliminate some structure opening. + +[1997-09-14] + Moved definition of tidToString to RepTypes, so that it can be used in + debugging the CML internals. + +[1997-09-12] + Server initialization/shutdown is now handled by a dedicated cleanup + routine. Eventually, each server should define its own cleanup routine, + and we'll get rid of logServer. + +[1997-09-12] + Fixed bugs in the initialization and shutdown protocols for the standard + text streams. + + +Version 1.0.9 (SML/NJ 109.31) +----------------------------- + +[1997-09-02] + Added cleanup code for the I/O stacks (including the standard I/O streams). + +[1997-08-29] + Implemented a clean-up mechansim that is similar to that provided by + SML/NJ. The logging of global servers for initialization/shutdown + is now done using cleaners, which provide somewhat finer control. + Channel and mailbox logging is still supported, but the initialization + is done by a dedicated cleaner. This change is part of the overhaul + of RunCML. + +[1997-08-28] + Qualified the use of the "cont" type constructor, which is no longer + available at top level. + +[1997-08-28] + Replaced a few holdover uses of "abstraction" with opaque signature matching. + +[1997-08-15] + RunCML.doit now returns an OS.Process.status value. If a CML program + uses OS.Process.{exit,terminate} to shutdown, then the status argument + is returned as the result. If the system shuts down because of deadlock, + the OS.Process.failure is returned. + +[1997-08-15] + Many changes to ensure that exportFn images do not include the entire + top-level environment. These include: using SMLofNJ.isolate to create + top-level continuations, and reimplementing RunCML.exportFn to do better + housekeeping. To get this to work also required a number of changes + to the SML/NJ sources. + +[1997-08-15] + Fixed bug in scheduler, where a timer interrupt in an atomic region + was not getting marked by SignalPending. + +[1997-07-24] + The TextIO.output* functions did not implement line buffering; this + has now been added. + + +Version 1.0.8 (SML/NJ 109.30) +----------------------------- + +[1997-07-11] + Changed uses of System.Unsafe to Unsafe. + +[1997-07-11] + Eliminated redefinition of Scheduler structure to avoid loss of inlining, + since the compiler handles this properly now. + +[1997-07-11] + Added Atom.atom' (tracking SML/NJ library changes) + +[1997-06-30] + Removed Array2:ARRAY2 from library, as this is now gone from the + SML/NJ library. + + +Version 1.0.7 (SML/NJ 109.29) +----------------------------- + +[1997-06-11] + Changed OS.IO.{pollErr,isErr} to OS.IO.{pollPri,isPri} (this tracks fixes + in the SML/NJ implementation of the SML Basis Library. + +[1997-06-11] + Changes to src/Unix/posix-bin-prim-io.sml to track basis fixes. + + +Version 1.0.6 (SML/NJ 109.28) +----------------------------- + +[1997-05-21] + Added a temporary fix so that input operations do not cause the whole + CML system to block waiting for input. Eventually, there should be a + CML version of the complete Posix API, which is the right way to avoid + this problem. + +[1997-05-21] + Added RunCML.exportFn. + NOTE: there are some problems with the size of exported images that + need to be tracked down and fixed. + + +Version 1.0.5 (SML/NJ 109.26) +----------------------------- + +[1997-03-18] + Added SyncVar.{mSwap,mSwapEvt} operations, and changed the implementation + of multicast channels to use it. + +[1997-03-17] + Modified SyncVar.{mGet,mGetEvt} to resume other blocked threads (just + as iGet does). + +[1997-03-16] + Fixed a bug in SyncVar.{mTake,mTakeEvt}, where the cell was not getting + emptied in some execution paths. + +[1997-03-15] + Added an "error thread," which is enqueued when CML is not running. + This thread will get dispatched if someone tries to execute a CML + operation without using RunCML.doit (not including channel allocation, + etc.). I also added a top-level call to Scheduler.reset, which will + ensure that the current thread ID is initialized. + +[1997-03-13] + Fixed a serious bug in SyncVar.{iPut,mPut}, where the cell was not + getting filled in some cases. + + +Version 1.0.4 (SML/NJ 109.25.2) +------------------------------- + +[1997-02-28] + Added some additional modules to the CML Library: IOUtil:IO_UTIL and + KeywordsFn. These were recent additions to the SML/NJ Library. I also + reorganized the directory structure of the CML library to make it easier + to track changes in the SML/NJ Library. + +[1997-02-28] + Tracked changes in location of continuation operations changed in SML/NJ. + + +Version 1.0.3 (SML/NJ 109.25.1) +------------------------------- + +[1997-02-25] + Fixed bugs in the implementation of the pauseHook and schedulerHook in + RunCMLFn (missing atomicBegin). + +[1997-02-25] + Fixed bug in implementation of TraceCML when TraceToFile was set as the + destination. + +[1997-02-18] + Fixed bug in random access support in BinIOFn and TextIOFn (bug also in + SML/NJ implementation). + +[1997-02-17] + Added logging/unlogging for global mailboxes. + +[1997-02-17] + Fixed bug in the TraceCML.unwatchThread. Also changed the implementation + to avoid potential races between unwatching a thread and its termination. + +[1997-02-15] + Various changes to the Scheduler module to improve robustness. Added + an exception handler around the function in enqueueTmpThread. Replaced + enqueueCurThread with enqueueAndSwitchCurThread, which keeps the thread + ID bookkeeping in the same place. And fixed a bug(?) where atomicSwitchTo + was failing to set the current thread ID properly. + +[1997-02-04] + Added sendPoll operation on channels to CML structure. + + +Version 1.0.2 (SML/NJ 109.25) +----------------------------- + +[1997-01-30] + Added documentation for the Multicast and TraceCML library modules. + +[1997-01-30] + Made minor improvements to the implementation of TraceCML. + +[1997-01-29] + Changes to the SML/NJ system (available in 109.25) now make it possible + to use the top-level print function in CML programs (it gets dynamically + rebound to CML's version of TextIO.print when a CML program starts running). + +[1997-01-27] + Renamed CML.atEvt to CML.atTimeEvt. + +[1997-01-26] + Added SimpleRPC:SIMPLE_RPC to the CML library. + +[1997-01-26] + Fixed a bug in the I/O manager for the case when two I/O events became + enabled for the same thread at the same time. + + +Version 1.0.1 (SML/NJ 109.22) +----------------------------- + +[1996-11-09] + Added UnixEnv:UNIX_ENV to the structures reexported from the CML library. + +[1996-11-07] + Fixed bug in {TextIO,BinIO}.StreamIO.endOfStream, where more m-variable + was getting multiple puts. + + +Version 1.0 (SML/NJ 109.21.1) +----------------------------- + +[1996-11-01] + Added version and banner to CML structure. + +[1996-11-01] + Added CML version of Unix structure. + +[1996-11-01] + Changed the implementation of the internal condition variables to make + them work correctly. + +[1996-11-01] + Added implementation of OS.IO.poll and OS.IO.pollEvt. + +[1996-11-01] + Fixed implementation of Socket.connect. + diff --git a/cml/README b/cml/README new file mode 100644 index 0000000..6a3009f --- /dev/null +++ b/cml/README @@ -0,0 +1,12 @@ +This is an "beta" release of a completely new implementation of CML. +It has been in use since the Fall'96, and seems to be robust. +The doc/HTML directory has partial HTML documentation for this version. + +In keeping with the migration of SML/NJ to the SML'97 standard, this version +of CML has a number of changes in the API. See the file doc/HTML/porting.html +for more information. + + - John Reppy + Bell Labs, Lucent Technologies + jhr@research.bell-labs.com + diff --git a/cml/TODO b/cml/TODO new file mode 100644 index 0000000..8fbccc7 --- /dev/null +++ b/cml/TODO @@ -0,0 +1,20 @@ +Short-term +---------- + +Finish CML reference manual + +Alert mechanism for interrupting threads. + +Create a CML version of SML basis Posix library + + +Long-term +--------- + +Interactive top-level loop for CML. + +Multi-threading run-time system. + +Multi-processor implementation. + + diff --git a/cml/cml-lib/.cm/GUID/multicast-sig.sml b/cml/cml-lib/.cm/GUID/multicast-sig.sml new file mode 100644 index 0000000..1fb7d11 --- /dev/null +++ b/cml/cml-lib/.cm/GUID/multicast-sig.sml @@ -0,0 +1 @@ +guid-$cml-lib/(smlnj-lib.cm):../(sources.cm):multicast-sig.sml-1714016097.690 diff --git a/cml/cml-lib/.cm/GUID/multicast.sml b/cml/cml-lib/.cm/GUID/multicast.sml new file mode 100644 index 0000000..4940943 --- /dev/null +++ b/cml/cml-lib/.cm/GUID/multicast.sml @@ -0,0 +1 @@ +guid-$cml-lib/(smlnj-lib.cm):../(sources.cm):multicast.sml-1714016097.694 diff --git a/cml/cml-lib/.cm/GUID/simple-rpc-sig.sml b/cml/cml-lib/.cm/GUID/simple-rpc-sig.sml new file mode 100644 index 0000000..9475427 --- /dev/null +++ b/cml/cml-lib/.cm/GUID/simple-rpc-sig.sml @@ -0,0 +1 @@ +guid-$cml-lib/(smlnj-lib.cm):../(sources.cm):simple-rpc-sig.sml-1714016097.466 diff --git a/cml/cml-lib/.cm/GUID/simple-rpc.sml b/cml/cml-lib/.cm/GUID/simple-rpc.sml new file mode 100644 index 0000000..a4c4717 --- /dev/null +++ b/cml/cml-lib/.cm/GUID/simple-rpc.sml @@ -0,0 +1 @@ +guid-$cml-lib/(smlnj-lib.cm):../(sources.cm):simple-rpc.sml-1714016097.523 diff --git a/cml/cml-lib/.cm/GUID/trace-cml-sig.sml b/cml/cml-lib/.cm/GUID/trace-cml-sig.sml new file mode 100644 index 0000000..ddf283d --- /dev/null +++ b/cml/cml-lib/.cm/GUID/trace-cml-sig.sml @@ -0,0 +1 @@ +guid-$cml-lib/(trace-cml.cm):../(trace-cml.cm):trace-cml-sig.sml-1714016097.265 diff --git a/cml/cml-lib/.cm/GUID/trace-cml.sml b/cml/cml-lib/.cm/GUID/trace-cml.sml new file mode 100644 index 0000000..885841f --- /dev/null +++ b/cml/cml-lib/.cm/GUID/trace-cml.sml @@ -0,0 +1 @@ +guid-$cml-lib/(trace-cml.cm):../(trace-cml.cm):trace-cml.sml-1714016097.327 diff --git a/cml/cml-lib/.cm/SKEL/multicast-sig.sml b/cml/cml-lib/.cm/SKEL/multicast-sig.sml new file mode 100644 index 0000000..1d281be --- /dev/null +++ b/cml/cml-lib/.cm/SKEL/multicast-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"CML"ac"MULTICAST"h0 \ No newline at end of file diff --git a/cml/cml-lib/.cm/SKEL/multicast.sml b/cml/cml-lib/.cm/SKEL/multicast.sml new file mode 100644 index 0000000..cb371b0 --- /dev/null +++ b/cml/cml-lib/.cm/SKEL/multicast.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"CML"ad"Multicast"jh1ad"V"gp1d"SyncVar"gp1c"MULTICAST" \ No newline at end of file diff --git a/cml/cml-lib/.cm/SKEL/simple-rpc-sig.sml b/cml/cml-lib/.cm/SKEL/simple-rpc-sig.sml new file mode 100644 index 0000000..f9f417e --- /dev/null +++ b/cml/cml-lib/.cm/SKEL/simple-rpc-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"CML"ac"SIMPLE_RPC"h0 \ No newline at end of file diff --git a/cml/cml-lib/.cm/SKEL/simple-rpc.sml b/cml/cml-lib/.cm/SKEL/simple-rpc.sml new file mode 100644 index 0000000..b890147 --- /dev/null +++ b/cml/cml-lib/.cm/SKEL/simple-rpc.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f3d"CML"d"Mailbox"d"SyncVar"ad"SimpleRPC"jh0gp1c"SIMPLE_RPC" \ No newline at end of file diff --git a/cml/cml-lib/.cm/SKEL/trace-cml-sig.sml b/cml/cml-lib/.cm/SKEL/trace-cml-sig.sml new file mode 100644 index 0000000..0e134dd --- /dev/null +++ b/cml/cml-lib/.cm/SKEL/trace-cml-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f2d"CML"d"TextIO"ac"TRACE_CML"h0 \ No newline at end of file diff --git a/cml/cml-lib/.cm/SKEL/trace-cml.sml b/cml/cml-lib/.cm/SKEL/trace-cml.sml new file mode 100644 index 0000000..83dae39 --- /dev/null +++ b/cml/cml-lib/.cm/SKEL/trace-cml.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f8d"CML"d"Debug"d"Thread"Cd"SMLofNJ"d"List"d"Mailbox"d"RunCML"d"TextIO"Nad"TraceCML"jh3ad"SV"gp1d"SyncVar"ad"SS"gp1d"Substring"ad"TidTbl"jh0gp1e"HashTableFn"gp1c"TRACE_CML" \ No newline at end of file diff --git a/cml/cml-lib/.cm/amd64-unix/multicast-sig.sml b/cml/cml-lib/.cm/amd64-unix/multicast-sig.sml new file mode 100644 index 0000000..a2e0b70 Binary files /dev/null and b/cml/cml-lib/.cm/amd64-unix/multicast-sig.sml differ diff --git a/cml/cml-lib/.cm/amd64-unix/multicast.sml b/cml/cml-lib/.cm/amd64-unix/multicast.sml new file mode 100644 index 0000000..85a9abf Binary files /dev/null and b/cml/cml-lib/.cm/amd64-unix/multicast.sml differ diff --git a/cml/cml-lib/.cm/amd64-unix/simple-rpc-sig.sml b/cml/cml-lib/.cm/amd64-unix/simple-rpc-sig.sml new file mode 100644 index 0000000..8ed60d3 Binary files /dev/null and b/cml/cml-lib/.cm/amd64-unix/simple-rpc-sig.sml differ diff --git a/cml/cml-lib/.cm/amd64-unix/simple-rpc.sml b/cml/cml-lib/.cm/amd64-unix/simple-rpc.sml new file mode 100644 index 0000000..9035d7a Binary files /dev/null and b/cml/cml-lib/.cm/amd64-unix/simple-rpc.sml differ diff --git a/cml/cml-lib/.cm/amd64-unix/trace-cml-sig.sml b/cml/cml-lib/.cm/amd64-unix/trace-cml-sig.sml new file mode 100644 index 0000000..691416a Binary files /dev/null and b/cml/cml-lib/.cm/amd64-unix/trace-cml-sig.sml differ diff --git a/cml/cml-lib/.cm/amd64-unix/trace-cml.sml b/cml/cml-lib/.cm/amd64-unix/trace-cml.sml new file mode 100644 index 0000000..7314c2e Binary files /dev/null and b/cml/cml-lib/.cm/amd64-unix/trace-cml.sml differ diff --git a/cml/cml-lib/README b/cml/cml-lib/README new file mode 100644 index 0000000..77fbe6d --- /dev/null +++ b/cml/cml-lib/README @@ -0,0 +1,3 @@ +This directory contains a small library of useful CML utilities. +Some of these are versions of the SML/NJ library modules implemented +using global servers to protect global state. diff --git a/cml/cml-lib/SMLNJ-INet/.cm/GUID/sock-util-sig.sml b/cml/cml-lib/SMLNJ-INet/.cm/GUID/sock-util-sig.sml new file mode 100644 index 0000000..d3fe05f --- /dev/null +++ b/cml/cml-lib/SMLNJ-INet/.cm/GUID/sock-util-sig.sml @@ -0,0 +1 @@ +guid-$cml-lib/(smlnj-lib.cm):../(sources.cm):SMLNJ-INet/sock-util-sig.sml-1714016097.552 diff --git a/cml/cml-lib/SMLNJ-INet/.cm/GUID/sock-util.sml b/cml/cml-lib/SMLNJ-INet/.cm/GUID/sock-util.sml new file mode 100644 index 0000000..fbb29d2 --- /dev/null +++ b/cml/cml-lib/SMLNJ-INet/.cm/GUID/sock-util.sml @@ -0,0 +1 @@ +guid-$cml-lib/(smlnj-lib.cm):../(sources.cm):SMLNJ-INet/sock-util.sml-1714016097.619 diff --git a/cml/cml-lib/SMLNJ-INet/.cm/GUID/unix-sock-util.sml b/cml/cml-lib/SMLNJ-INet/.cm/GUID/unix-sock-util.sml new file mode 100644 index 0000000..9c34273 --- /dev/null +++ b/cml/cml-lib/SMLNJ-INet/.cm/GUID/unix-sock-util.sml @@ -0,0 +1 @@ +guid-$cml-lib/(smlnj-lib.cm):../(sources.cm):SMLNJ-INet/unix-sock-util.sml-1714016097.668 diff --git a/cml/cml-lib/SMLNJ-INet/.cm/SKEL/sock-util-sig.sml b/cml/cml-lib/SMLNJ-INet/.cm/SKEL/sock-util-sig.sml new file mode 100644 index 0000000..69cb72c --- /dev/null +++ b/cml/cml-lib/SMLNJ-INet/.cm/SKEL/sock-util-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f6d"StringCvt"Cd"NetHostDB"d"INetSock"d"Word8Vector"d"Word8Array"d"Socket"Nac"SOCK_UTIL"h0 \ No newline at end of file diff --git a/cml/cml-lib/SMLNJ-INet/.cm/SKEL/sock-util.sml b/cml/cml-lib/SMLNJ-INet/.cm/SKEL/sock-util.sml new file mode 100644 index 0000000..6406c3a --- /dev/null +++ b/cml/cml-lib/SMLNJ-INet/.cm/SKEL/sock-util.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f7d"OS"d"StringCvt"Cd"Byte"d"Word8VectorSlice"d"Word8ArraySlice"d"NetHostDB"d"Int"Cd"NetServDB"d"INetSock"d"Word8Vector"d"Word8Array"d"Socket"Nad"SockUtil"jh2ad"C"gp1d"Char"ad"PC"gp1d"ParserComb"gp1c"SOCK_UTIL" \ No newline at end of file diff --git a/cml/cml-lib/SMLNJ-INet/.cm/SKEL/unix-sock-util.sml b/cml/cml-lib/SMLNJ-INet/.cm/SKEL/unix-sock-util.sml new file mode 100644 index 0000000..7f832c3 --- /dev/null +++ b/cml/cml-lib/SMLNJ-INet/.cm/SKEL/unix-sock-util.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2aUNIX_SOCK_UTIL"h2egp1c"SOCK_UTIL"f1UnixSock"ad"UnixSockUtil"jh2egp1d"SockUtil"f2(d"Socket"gp1 \ No newline at end of file diff --git a/cml/cml-lib/SMLNJ-INet/.cm/amd64-unix/sock-util-sig.sml b/cml/cml-lib/SMLNJ-INet/.cm/amd64-unix/sock-util-sig.sml new file mode 100644 index 0000000..857312f Binary files /dev/null and b/cml/cml-lib/SMLNJ-INet/.cm/amd64-unix/sock-util-sig.sml differ diff --git a/cml/cml-lib/SMLNJ-INet/.cm/amd64-unix/sock-util.sml b/cml/cml-lib/SMLNJ-INet/.cm/amd64-unix/sock-util.sml new file mode 100644 index 0000000..dfd6f8d Binary files /dev/null and b/cml/cml-lib/SMLNJ-INet/.cm/amd64-unix/sock-util.sml differ diff --git a/cml/cml-lib/SMLNJ-INet/.cm/amd64-unix/unix-sock-util.sml b/cml/cml-lib/SMLNJ-INet/.cm/amd64-unix/unix-sock-util.sml new file mode 100644 index 0000000..82774fa Binary files /dev/null and b/cml/cml-lib/SMLNJ-INet/.cm/amd64-unix/unix-sock-util.sml differ diff --git a/cml/cml-lib/SMLNJ-INet/README b/cml/cml-lib/SMLNJ-INet/README new file mode 100644 index 0000000..8c9610b --- /dev/null +++ b/cml/cml-lib/SMLNJ-INet/README @@ -0,0 +1,3 @@ +These are library modules from the SML/NJ INet library, +which are copied/reimplemented in the CML library. + diff --git a/cml/cml-lib/SMLNJ-INet/sock-util-sig.sml b/cml/cml-lib/SMLNJ-INet/sock-util-sig.sml new file mode 100644 index 0000000..b6f7f5f --- /dev/null +++ b/cml/cml-lib/SMLNJ-INet/sock-util-sig.sml @@ -0,0 +1,48 @@ +(* sock-util-sig.sml + * + * COPYRIGHT (c) 1999 Bell Labs, Lucent Technologies. + * + * Various utility functions for programming with sockets. + *) + +signature SOCK_UTIL = + sig + + datatype port = PortNumber of int | ServName of string + (* a port can be identified by number, or by the name of a service *) + + datatype hostname = HostName of string | HostAddr of NetHostDB.in_addr + + val scanAddr : (char, 'a) StringCvt.reader + -> ({host : hostname, port : port option}, 'a) StringCvt.reader + (* scan an address, which has the form + * addr [ ":" port ] + * where the addr may either be numeric or symbolic host name and the + * port is either a service name or a decimal number. Legal host names + * must begin with a letter, and may contain any alphanumeric character, + * the minus sign (-) and period (.), where the period is used as a + * domain separator. + *) + + exception BadAddr of string + + val resolveAddr : {host : hostname, port : port option} + -> {host : string, addr : NetHostDB.in_addr, port : int option} + (* Given a hostname and optional port, resolve them in the host + * and service database. If either the host or service name is not + * found, then BadAddr is raised. + *) + + type 'a stream_sock = ('a, Socket.active Socket.stream) Socket.sock + + val connectINetStrm : {addr : NetHostDB.in_addr, port : int} + -> INetSock.inet stream_sock + (* establish a client-side connection to a INET domain stream socket *) + + val recvVec : ('a stream_sock * int) -> Word8Vector.vector + val recvStr : ('a stream_sock * int) -> string + val sendVec : ('a stream_sock * Word8Vector.vector) -> unit + val sendStr : ('a stream_sock * string) -> unit + val sendArr : ('a stream_sock * Word8Array.array) -> unit + + end; diff --git a/cml/cml-lib/SMLNJ-INet/sock-util.sml b/cml/cml-lib/SMLNJ-INet/sock-util.sml new file mode 100644 index 0000000..40fccd7 --- /dev/null +++ b/cml/cml-lib/SMLNJ-INet/sock-util.sml @@ -0,0 +1,135 @@ +(* sock-util.sml + * + * COPYRIGHT (c) 1999 Bell Labs, Lucent Technologies. + * + * Various utility functions for programming with sockets. + *) + +structure SockUtil : SOCK_UTIL = + struct + + structure C = Char + structure PC = ParserComb + + datatype port = PortNumber of int | ServName of string + (* a port can be identified by number, or by the name of a service *) + + datatype hostname = HostName of string | HostAddr of NetHostDB.in_addr + +(** This belongs in an Option structure **) + fun filterPartial pred NONE = NONE + | filterPartial pred (SOME x) = if (pred x) then SOME x else NONE + + fun scanName getc strm = let + fun isNameChr (#".", _) = true + | isNameChr (#"-", _) = true + | isNameChr (c, _) = C.isAlphaNum c + fun getName (strm, cl) = (case filterPartial isNameChr (getc strm) + of SOME(c, strm') => getName(strm', c::cl) + | NONE => SOME(implode(rev cl), strm) + (* end case *)) + in + case (filterPartial (C.isAlpha o #1) (getc strm)) + of SOME(c, strm) => getName(strm, [c]) + | NONE => NONE + (* end case *) + end + + (* scan an address, which has the form + * addr [ ":" port ] + * where the addr may either be numeric or symbolic host name and the + * port is either a service name or a decimal number. Legal host names + * must begin with a letter, and may contain any alphanumeric character, + * the minus sign (-) and period (.), where the period is used as a + * domain separator. + *) + fun scanAddr getc strm = + PC.seqWith (fn (host, port) => {host=host, port=port}) ( + PC.or ( + PC.wrap (scanName, HostName), + PC.wrap (NetHostDB.scan, HostAddr)), + PC.option ( + PC.seqWith #2 ( + PC.eatChar (fn c => (c = #":")), + PC.or ( + PC.wrap (scanName, ServName), + PC.wrap (Int.scan StringCvt.DEC, PortNumber))))) getc strm + + exception BadAddr of string + + fun resolveAddr {host, port} = let + fun err (a, b) = raise BadAddr(concat[a, " \"", b, "\" not found"]) + val (name, addr) = (case host + of HostName s => (case NetHostDB.getByName s + of NONE => err ("hostname", s) + | (SOME entry) => (s, NetHostDB.addr entry) + (* end case *)) + | HostAddr addr => (case NetHostDB.getByAddr addr + of NONE => err ("host address", NetHostDB.toString addr) + | (SOME entry) => (NetHostDB.name entry, addr) + (* end case *)) + (* end case *)) + val port = (case port + of (SOME(PortNumber n)) => SOME n + | (SOME(ServName s)) => (case NetServDB.getByName(s, NONE) + of (SOME entry) => SOME(NetServDB.port entry) + | NONE => err("service", s) + (* end case *)) + | NONE => NONE + (* end case *)) + in + {host = name, addr = addr, port = port} + end + + type 'a stream_sock = ('a, Socket.active Socket.stream) Socket.sock + + (* establish a client-side connection to a INET domain stream socket *) + fun connectINetStrm {addr, port} = let + val sock = INetSock.TCP.socket () + in + Socket.connect (sock, INetSock.toAddr(addr, port)); + sock + end + +(** If the server closes the connection, do we get 0 bytes or an error??? **) + (* read exactly n bytes from a stream socket *) + fun recvVec (sock, n) = let + fun get (0, data) = Word8Vector.concat(rev data) + | get (n, data) = let + val v = Socket.recvVec (sock, n) + in + if (Word8Vector.length v = 0) + then raise OS.SysErr("closed socket", NONE) + else get (n - Word8Vector.length v, v::data) + end + in + if (n < 0) then raise Size else get (n, []) + end + + fun recvStr arg = Byte.bytesToString (recvVec arg) + + (* send the complete contents of a vector *) + fun sendVec (sock, vec) = let + val len = Word8Vector.length vec + fun send i = Socket.sendVec (sock, Word8VectorSlice.slice (vec, i, NONE)) + fun put i = if (i < len) + then put(i + send i) + else () + in + put 0 + end + + fun sendStr (sock, str) = sendVec (sock, Byte.stringToBytes str) + + (* send the complete contents of an array *) + fun sendArr (sock, arr) = let + val len = Word8Array.length arr + fun send i = Socket.sendArr (sock, Word8ArraySlice.slice (arr, i, NONE)) + fun put i = if (i < len) + then put(i + send i) + else () + in + put 0 + end + + end; diff --git a/cml/cml-lib/SMLNJ-INet/unix-sock-util.sml b/cml/cml-lib/SMLNJ-INet/unix-sock-util.sml new file mode 100644 index 0000000..c067513 --- /dev/null +++ b/cml/cml-lib/SMLNJ-INet/unix-sock-util.sml @@ -0,0 +1,33 @@ +(* unix-sock-util.sml + * + * COPYRIGHT (c) 1999 Bell Labs, Lucent Technologies. + * + * Bind SockUtil structure on Unix systems + *) + +signature UNIX_SOCK_UTIL = + sig + + include SOCK_UTIL + + val connectUnixStrm : string -> UnixSock.unix stream_sock + (* establish a client-side connection to a Unix-domain stream socket *) + + end + + +structure UnixSockUtil : UNIX_SOCK_UTIL = + struct + + open SockUtil + + (* establish a client-side connection to a Unix-domain stream socket *) + fun connectUnixStrm path = let + val sock = UnixSock.Strm.socket () + in + Socket.connect (sock, UnixSock.toAddr path); + sock + end + + end + diff --git a/cml/cml-lib/SMLNJ-Unix/README b/cml/cml-lib/SMLNJ-Unix/README new file mode 100644 index 0000000..dd7c9db --- /dev/null +++ b/cml/cml-lib/SMLNJ-Unix/README @@ -0,0 +1,3 @@ +These are library modules from the SML/NJ Unix library, +which are copied/reimplemented in the CML library. + diff --git a/cml/cml-lib/SMLNJ-Util/.cm/GUID/atom-binary-map.sml b/cml/cml-lib/SMLNJ-Util/.cm/GUID/atom-binary-map.sml new file mode 100644 index 0000000..d36c3a4 --- /dev/null +++ b/cml/cml-lib/SMLNJ-Util/.cm/GUID/atom-binary-map.sml @@ -0,0 +1 @@ +guid-$cml-lib/(smlnj-lib.cm):../(sources.cm):SMLNJ-Util/atom-binary-map.sml-1714016097.677 diff --git a/cml/cml-lib/SMLNJ-Util/.cm/GUID/atom-binary-set.sml b/cml/cml-lib/SMLNJ-Util/.cm/GUID/atom-binary-set.sml new file mode 100644 index 0000000..f430145 --- /dev/null +++ b/cml/cml-lib/SMLNJ-Util/.cm/GUID/atom-binary-set.sml @@ -0,0 +1 @@ +guid-$cml-lib/(smlnj-lib.cm):../(sources.cm):SMLNJ-Util/atom-binary-set.sml-1714016097.559 diff --git a/cml/cml-lib/SMLNJ-Util/.cm/GUID/atom-map.sml b/cml/cml-lib/SMLNJ-Util/.cm/GUID/atom-map.sml new file mode 100644 index 0000000..a06970f --- /dev/null +++ b/cml/cml-lib/SMLNJ-Util/.cm/GUID/atom-map.sml @@ -0,0 +1 @@ +guid-$cml-lib/(smlnj-lib.cm):../(sources.cm):SMLNJ-Util/atom-map.sml-1714016097.724 diff --git a/cml/cml-lib/SMLNJ-Util/.cm/GUID/atom-redblack-map.sml b/cml/cml-lib/SMLNJ-Util/.cm/GUID/atom-redblack-map.sml new file mode 100644 index 0000000..13eaffa --- /dev/null +++ b/cml/cml-lib/SMLNJ-Util/.cm/GUID/atom-redblack-map.sml @@ -0,0 +1 @@ +guid-$cml-lib/(smlnj-lib.cm):../(sources.cm):SMLNJ-Util/atom-redblack-map.sml-1714016097.684 diff --git a/cml/cml-lib/SMLNJ-Util/.cm/GUID/atom-redblack-set.sml b/cml/cml-lib/SMLNJ-Util/.cm/GUID/atom-redblack-set.sml new file mode 100644 index 0000000..3ac261a --- /dev/null +++ b/cml/cml-lib/SMLNJ-Util/.cm/GUID/atom-redblack-set.sml @@ -0,0 +1 @@ +guid-$cml-lib/(smlnj-lib.cm):../(sources.cm):SMLNJ-Util/atom-redblack-set.sml-1714016097.513 diff --git a/cml/cml-lib/SMLNJ-Util/.cm/GUID/atom-set.sml b/cml/cml-lib/SMLNJ-Util/.cm/GUID/atom-set.sml new file mode 100644 index 0000000..f607b18 --- /dev/null +++ b/cml/cml-lib/SMLNJ-Util/.cm/GUID/atom-set.sml @@ -0,0 +1 @@ +guid-$cml-lib/(smlnj-lib.cm):../(sources.cm):SMLNJ-Util/atom-set.sml-1714016097.519 diff --git a/cml/cml-lib/SMLNJ-Util/.cm/GUID/atom-table.sml b/cml/cml-lib/SMLNJ-Util/.cm/GUID/atom-table.sml new file mode 100644 index 0000000..c6459bf --- /dev/null +++ b/cml/cml-lib/SMLNJ-Util/.cm/GUID/atom-table.sml @@ -0,0 +1 @@ +guid-$cml-lib/(smlnj-lib.cm):../(sources.cm):SMLNJ-Util/atom-table.sml-1714016097.508 diff --git a/cml/cml-lib/SMLNJ-Util/.cm/GUID/cml-atom-new.sml b/cml/cml-lib/SMLNJ-Util/.cm/GUID/cml-atom-new.sml new file mode 100644 index 0000000..e8d8f82 --- /dev/null +++ b/cml/cml-lib/SMLNJ-Util/.cm/GUID/cml-atom-new.sml @@ -0,0 +1 @@ +guid-$cml-lib/(smlnj-lib.cm):../(sources.cm):SMLNJ-Util/cml-atom-new.sml-1714016097.501 diff --git a/cml/cml-lib/SMLNJ-Util/.cm/GUID/io-util-sig.sml b/cml/cml-lib/SMLNJ-Util/.cm/GUID/io-util-sig.sml new file mode 100644 index 0000000..1c01ece --- /dev/null +++ b/cml/cml-lib/SMLNJ-Util/.cm/GUID/io-util-sig.sml @@ -0,0 +1 @@ +guid-$cml-lib/(smlnj-lib.cm):../(sources.cm):SMLNJ-Util/io-util-sig.sml-1714016097.472 diff --git a/cml/cml-lib/SMLNJ-Util/.cm/GUID/io-util.sml b/cml/cml-lib/SMLNJ-Util/.cm/GUID/io-util.sml new file mode 100644 index 0000000..d1e3608 --- /dev/null +++ b/cml/cml-lib/SMLNJ-Util/.cm/GUID/io-util.sml @@ -0,0 +1 @@ +guid-$cml-lib/(smlnj-lib.cm):../(sources.cm):SMLNJ-Util/io-util.sml-1714016097.475 diff --git a/cml/cml-lib/SMLNJ-Util/.cm/SKEL/atom-binary-map.sml b/cml/cml-lib/SMLNJ-Util/.cm/SKEL/atom-binary-map.sml new file mode 100644 index 0000000..2f6207d --- /dev/null +++ b/cml/cml-lib/SMLNJ-Util/.cm/SKEL/atom-binary-map.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"Atom"ad"AtomBinaryMap"jh0gp1e"BinaryMapFn" \ No newline at end of file diff --git a/cml/cml-lib/SMLNJ-Util/.cm/SKEL/atom-binary-set.sml b/cml/cml-lib/SMLNJ-Util/.cm/SKEL/atom-binary-set.sml new file mode 100644 index 0000000..8d67b01 --- /dev/null +++ b/cml/cml-lib/SMLNJ-Util/.cm/SKEL/atom-binary-set.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"Atom"ad"AtomBinarySet"jh0gp1e"BinarySetFn" \ No newline at end of file diff --git a/cml/cml-lib/SMLNJ-Util/.cm/SKEL/atom-map.sml b/cml/cml-lib/SMLNJ-Util/.cm/SKEL/atom-map.sml new file mode 100644 index 0000000..03f4087 --- /dev/null +++ b/cml/cml-lib/SMLNJ-Util/.cm/SKEL/atom-map.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ad"AtomMap"gp1d"AtomRedBlackMap" \ No newline at end of file diff --git a/cml/cml-lib/SMLNJ-Util/.cm/SKEL/atom-redblack-map.sml b/cml/cml-lib/SMLNJ-Util/.cm/SKEL/atom-redblack-map.sml new file mode 100644 index 0000000..1dc259a --- /dev/null +++ b/cml/cml-lib/SMLNJ-Util/.cm/SKEL/atom-redblack-map.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"Atom"ad"AtomRedBlackMap"jh0gp1e"RedBlackMapFn" \ No newline at end of file diff --git a/cml/cml-lib/SMLNJ-Util/.cm/SKEL/atom-redblack-set.sml b/cml/cml-lib/SMLNJ-Util/.cm/SKEL/atom-redblack-set.sml new file mode 100644 index 0000000..0c48ff0 --- /dev/null +++ b/cml/cml-lib/SMLNJ-Util/.cm/SKEL/atom-redblack-set.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"Atom"ad"AtomRedBlackSet"jh0gp1e"RedBlackSetFn" \ No newline at end of file diff --git a/cml/cml-lib/SMLNJ-Util/.cm/SKEL/atom-set.sml b/cml/cml-lib/SMLNJ-Util/.cm/SKEL/atom-set.sml new file mode 100644 index 0000000..11f6bea --- /dev/null +++ b/cml/cml-lib/SMLNJ-Util/.cm/SKEL/atom-set.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ad"AtomSet"gp1d"AtomRedBlackSet" \ No newline at end of file diff --git a/cml/cml-lib/SMLNJ-Util/.cm/SKEL/atom-table.sml b/cml/cml-lib/SMLNJ-Util/.cm/SKEL/atom-table.sml new file mode 100644 index 0000000..d227d54 --- /dev/null +++ b/cml/cml-lib/SMLNJ-Util/.cm/SKEL/atom-table.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"Atom"ad"AtomTable"jh0gp1e"HashTableFn" \ No newline at end of file diff --git a/cml/cml-lib/SMLNJ-Util/.cm/SKEL/cml-atom-new.sml b/cml/cml-lib/SMLNJ-Util/.cm/SKEL/cml-atom-new.sml new file mode 100644 index 0000000..b58cef5 --- /dev/null +++ b/cml/cml-lib/SMLNJ-Util/.cm/SKEL/cml-atom-new.sml @@ -0,0 +1,2 @@ +Skeleton 5 +aAtom"jh2egp1f1d"SyncVar"gp1c"ATOM" \ No newline at end of file diff --git a/cml/cml-lib/SMLNJ-Util/.cm/SKEL/io-util-sig.sml b/cml/cml-lib/SMLNJ-Util/.cm/SKEL/io-util-sig.sml new file mode 100644 index 0000000..aaa4d6f --- /dev/null +++ b/cml/cml-lib/SMLNJ-Util/.cm/SKEL/io-util-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ac"IO_UTIL"h0 \ No newline at end of file diff --git a/cml/cml-lib/SMLNJ-Util/.cm/SKEL/io-util.sml b/cml/cml-lib/SMLNJ-Util/.cm/SKEL/io-util.sml new file mode 100644 index 0000000..d82e0fb --- /dev/null +++ b/cml/cml-lib/SMLNJ-Util/.cm/SKEL/io-util.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"TextIO"ad"IOUtil"jh0gp1c"IO_UTIL" \ No newline at end of file diff --git a/cml/cml-lib/SMLNJ-Util/.cm/amd64-unix/atom-binary-map.sml b/cml/cml-lib/SMLNJ-Util/.cm/amd64-unix/atom-binary-map.sml new file mode 100644 index 0000000..66b2e37 Binary files /dev/null and b/cml/cml-lib/SMLNJ-Util/.cm/amd64-unix/atom-binary-map.sml differ diff --git a/cml/cml-lib/SMLNJ-Util/.cm/amd64-unix/atom-binary-set.sml b/cml/cml-lib/SMLNJ-Util/.cm/amd64-unix/atom-binary-set.sml new file mode 100644 index 0000000..0285b08 Binary files /dev/null and b/cml/cml-lib/SMLNJ-Util/.cm/amd64-unix/atom-binary-set.sml differ diff --git a/cml/cml-lib/SMLNJ-Util/.cm/amd64-unix/atom-map.sml b/cml/cml-lib/SMLNJ-Util/.cm/amd64-unix/atom-map.sml new file mode 100644 index 0000000..522d5a8 Binary files /dev/null and b/cml/cml-lib/SMLNJ-Util/.cm/amd64-unix/atom-map.sml differ diff --git a/cml/cml-lib/SMLNJ-Util/.cm/amd64-unix/atom-redblack-map.sml b/cml/cml-lib/SMLNJ-Util/.cm/amd64-unix/atom-redblack-map.sml new file mode 100644 index 0000000..af97ff5 Binary files /dev/null and b/cml/cml-lib/SMLNJ-Util/.cm/amd64-unix/atom-redblack-map.sml differ diff --git a/cml/cml-lib/SMLNJ-Util/.cm/amd64-unix/atom-redblack-set.sml b/cml/cml-lib/SMLNJ-Util/.cm/amd64-unix/atom-redblack-set.sml new file mode 100644 index 0000000..3f83087 Binary files /dev/null and b/cml/cml-lib/SMLNJ-Util/.cm/amd64-unix/atom-redblack-set.sml differ diff --git a/cml/cml-lib/SMLNJ-Util/.cm/amd64-unix/atom-set.sml b/cml/cml-lib/SMLNJ-Util/.cm/amd64-unix/atom-set.sml new file mode 100644 index 0000000..8ab246d Binary files /dev/null and b/cml/cml-lib/SMLNJ-Util/.cm/amd64-unix/atom-set.sml differ diff --git a/cml/cml-lib/SMLNJ-Util/.cm/amd64-unix/atom-table.sml b/cml/cml-lib/SMLNJ-Util/.cm/amd64-unix/atom-table.sml new file mode 100644 index 0000000..d373e66 Binary files /dev/null and b/cml/cml-lib/SMLNJ-Util/.cm/amd64-unix/atom-table.sml differ diff --git a/cml/cml-lib/SMLNJ-Util/.cm/amd64-unix/cml-atom-new.sml b/cml/cml-lib/SMLNJ-Util/.cm/amd64-unix/cml-atom-new.sml new file mode 100644 index 0000000..fbd7605 Binary files /dev/null and b/cml/cml-lib/SMLNJ-Util/.cm/amd64-unix/cml-atom-new.sml differ diff --git a/cml/cml-lib/SMLNJ-Util/.cm/amd64-unix/io-util-sig.sml b/cml/cml-lib/SMLNJ-Util/.cm/amd64-unix/io-util-sig.sml new file mode 100644 index 0000000..7b96aae Binary files /dev/null and b/cml/cml-lib/SMLNJ-Util/.cm/amd64-unix/io-util-sig.sml differ diff --git a/cml/cml-lib/SMLNJ-Util/.cm/amd64-unix/io-util.sml b/cml/cml-lib/SMLNJ-Util/.cm/amd64-unix/io-util.sml new file mode 100644 index 0000000..da0c22d Binary files /dev/null and b/cml/cml-lib/SMLNJ-Util/.cm/amd64-unix/io-util.sml differ diff --git a/cml/cml-lib/SMLNJ-Util/README b/cml/cml-lib/SMLNJ-Util/README new file mode 100644 index 0000000..ba9b430 --- /dev/null +++ b/cml/cml-lib/SMLNJ-Util/README @@ -0,0 +1,3 @@ +These are library modules from the SML/NJ Util library, +which are copied/reimplemented in the CML library. + diff --git a/cml/cml-lib/SMLNJ-Util/atom-binary-map.sml b/cml/cml-lib/SMLNJ-Util/atom-binary-map.sml new file mode 100644 index 0000000..9c84655 --- /dev/null +++ b/cml/cml-lib/SMLNJ-Util/atom-binary-map.sml @@ -0,0 +1,13 @@ +(* atom-binary-map.sml + * + * COPYRIGHT (c) 1997 Bell Labs, Lucent Technologies. + * + * Functional finite maps with atom keys. + *) + +structure AtomBinaryMap = + BinaryMapFn ( + struct + type ord_key = Atom.atom + val compare = Atom.compare + end) diff --git a/cml/cml-lib/SMLNJ-Util/atom-binary-set.sml b/cml/cml-lib/SMLNJ-Util/atom-binary-set.sml new file mode 100644 index 0000000..f5619ff --- /dev/null +++ b/cml/cml-lib/SMLNJ-Util/atom-binary-set.sml @@ -0,0 +1,13 @@ +(* atom-binary-map.sml + * + * COPYRIGHT (c) 1997 Bell Labs, Lucent Technologies. + * + * Functional sets of atoms. + *) + +structure AtomBinarySet = + BinarySetFn ( + struct + type ord_key = Atom.atom + val compare = Atom.compare + end) diff --git a/cml/cml-lib/SMLNJ-Util/atom-map.sml b/cml/cml-lib/SMLNJ-Util/atom-map.sml new file mode 100644 index 0000000..0e9916f --- /dev/null +++ b/cml/cml-lib/SMLNJ-Util/atom-map.sml @@ -0,0 +1,8 @@ +(* atom-map.sml + * + * COPYRIGHT (c) 1999 Bell Labs, Lucent Technologies. + * + * Functional finite maps with atom keys. + *) + +structure AtomMap = AtomRedBlackMap diff --git a/cml/cml-lib/SMLNJ-Util/atom-redblack-map.sml b/cml/cml-lib/SMLNJ-Util/atom-redblack-map.sml new file mode 100644 index 0000000..83bed33 --- /dev/null +++ b/cml/cml-lib/SMLNJ-Util/atom-redblack-map.sml @@ -0,0 +1,13 @@ +(* atom-redblack-map.sml + * + * COPYRIGHT (c) 1999 Bell Labs, Lucent Technologies. + * + * Functional finite maps with atom keys. + *) + +structure AtomRedBlackMap = + RedBlackMapFn ( + struct + type ord_key = Atom.atom + val compare = Atom.compare + end) diff --git a/cml/cml-lib/SMLNJ-Util/atom-redblack-set.sml b/cml/cml-lib/SMLNJ-Util/atom-redblack-set.sml new file mode 100644 index 0000000..879145f --- /dev/null +++ b/cml/cml-lib/SMLNJ-Util/atom-redblack-set.sml @@ -0,0 +1,13 @@ +(* atom-redblack-map.sml + * + * COPYRIGHT (c) 1999 Bell Labs, Lucent Technologies. + * + * Functional sets of atoms. + *) + +structure AtomRedBlackSet = + RedBlackSetFn ( + struct + type ord_key = Atom.atom + val compare = Atom.compare + end) diff --git a/cml/cml-lib/SMLNJ-Util/atom-set.sml b/cml/cml-lib/SMLNJ-Util/atom-set.sml new file mode 100644 index 0000000..5063f90 --- /dev/null +++ b/cml/cml-lib/SMLNJ-Util/atom-set.sml @@ -0,0 +1,8 @@ +(* atom-map.sml + * + * COPYRIGHT (c) 1999 Bell Labs, Lucent Technologies. + * + * Functional sets of atoms. + *) + +structure AtomSet = AtomRedBlackSet diff --git a/cml/cml-lib/SMLNJ-Util/atom-sig.sml b/cml/cml-lib/SMLNJ-Util/atom-sig.sml new file mode 100644 index 0000000..102c7ed --- /dev/null +++ b/cml/cml-lib/SMLNJ-Util/atom-sig.sml @@ -0,0 +1,33 @@ +(* atom-sig.sml + * + * COPYRIGHT (c) 1996 by AT&T Research + * + * AUTHOR: John Reppy + * AT&T Bell Laboratories + * Murray Hill, NJ 07974 + * jhr@research.att.com + * + * TODO: add a gensym operation? + *) + +signature ATOM = + sig + + type atom + (* Atoms are hashed strings that support fast equality testing. *) + + val atom : string -> atom + val atom' : substring -> atom + (* Map a string/substring to the corresponding unique atom. *) + val toString : atom -> string + (* return the string representation of the atom *) + val sameAtom : (atom * atom) -> bool + (* return true if the atoms are the same *) + val compare : (atom * atom) -> order + (* compare two atoms for their relative order; note that this is + * not lexical order! + *) + val hash : atom -> word + (* return a hash key for the atom *) + + end (* signature ATOM *) diff --git a/cml/cml-lib/SMLNJ-Util/atom-table.sml b/cml/cml-lib/SMLNJ-Util/atom-table.sml new file mode 100644 index 0000000..dd5c716 --- /dev/null +++ b/cml/cml-lib/SMLNJ-Util/atom-table.sml @@ -0,0 +1,13 @@ +(* atom-table.sml + * + * COPYRIGHT (c) 1996 AT&T Research. + * + * Hash tables of atoms. + *) + +structure AtomTable = HashTableFn (struct + type hash_key = Atom.atom + val hashVal = Atom.hash + val sameKey = Atom.sameAtom + end); + diff --git a/cml/cml-lib/SMLNJ-Util/cml-atom-new.sml b/cml/cml-lib/SMLNJ-Util/cml-atom-new.sml new file mode 100644 index 0000000..cff697f --- /dev/null +++ b/cml/cml-lib/SMLNJ-Util/cml-atom-new.sml @@ -0,0 +1,21 @@ +(* cml-atom-new.sml + * + * Thread-safe version of Atom (protecting the global hashtable + * with a lock). + * + * Copyright (c) 2005 by The Fellowship of SML/NJ + * + * Author: Matthias Blume (blume@tti-c.org) + *) +structure Atom : ATOM = struct + + open Atom (* from $/smlnj-lib.cm *) + + local val l = SyncVar.mVarInit () + in + fun atomically f a = (SyncVar.mTake l; f a before SyncVar.mPut (l, ())) + end + + val atom = atomically atom + val atom' = atomically atom' +end diff --git a/cml/cml-lib/SMLNJ-Util/cml-atom.sml b/cml/cml-lib/SMLNJ-Util/cml-atom.sml new file mode 100644 index 0000000..564d33c --- /dev/null +++ b/cml/cml-lib/SMLNJ-Util/cml-atom.sml @@ -0,0 +1,139 @@ +(* cml-atom.sml + * + * COPYRIGHT (c) 1992 by AT&T Bell Laboratories + * COPYRIGHT (c) 1996 by AT&T Research + * + * This is a CML version of the Atom module from the SML/NJ library. + * It protects the global hash table in a server thread. + * + * AUTHOR: John Reppy + * AT&T Bell Laboratories + * Murray Hill, NJ 07974 + * jhr@research.att.com + *) + +structure Atom : ATOM = + struct + + structure V = SyncVar + + (* local definition of app *) + fun app f = let + fun appF [] = () + | appF (x::r) = (f x; appF r) + in + appF + end + + (* unique names *) + datatype atom = ATOM of { + hash : word, + id : string + } + + fun toString (ATOM{id, ...}) = id + + fun hash (ATOM{hash=h, ...}) = h + + fun sameAtom (ATOM{hash=h1, id=id1}, ATOM{hash=h2, id=id2}) = + (h1 = h2) andalso (id1 = id2) + + (* compare two names for their relative order; note that this is + * not lexical order! + *) + fun compare (ATOM{hash=h1, id=id1}, ATOM{hash=h2, id=id2}) = + if (h1 = h2) + then if (id1 = id2) + then EQUAL + else if (id1 < id2) + then LESS + else GREATER + else if (h1 < h2) + then LESS + else GREATER + + + (** the unique name hash table; this is protected in a server thread. **) + val tableSz = 64 (* initial table size *) + + (* a request to the server *) + type req = {key : word, str : string, reply : atom V.ivar} + + (* the server's request channel *) + val reqCh : req CML.chan = CML.channel() + + (* the name server *) + fun nameServer () = let + fun server (tblSize, tbl, numItems) = let + val {key, str, reply} = CML.recv reqCh + fun isName (ATOM{hash, id}) = (hash = key) andalso (id = str) + fun insert (tblSz, tbl, numItems) = + if (numItems > tblSz) + then grow (tblSz, tbl, numItems) + else let + val indx = Word.toIntX(Word.andb(key, Word.fromInt tblSz - 0w1)) + fun look [] = let + val newName = ATOM{hash = key, id = str} + in + Array.update ( + tbl, indx, newName :: Array.sub(tbl, indx)); + V.iPut(reply, newName); + (tblSz, tbl, numItems+1) + end + | look (name::r) = ( + if (isName name) + then ( + V.iPut(reply, name); + (tblSz, tbl, numItems)) + else look r) + in + look (Array.sub(tbl, indx)) + end + (* double the table size *) + and grow (tblSz, tbl, numItems) = let + val newSz = tblSz+tblSz + val newMask = Word.fromInt newSz - 0w1 + val newTbl = Array.array(newSz, []) + fun ins (item as ATOM{hash, ...}) = let + val indx = Word.toIntX(Word.andb(hash, newMask)) + in + Array.update (newTbl, indx, + item :: Array.sub(newTbl, indx)) + end + val appins = app ins + fun copy i = (appins (Array.sub(tbl, i)); copy(i+1)) + in + (copy 0) handle _ => (); + insert (newSz, newTbl, numItems) + end + in + server (insert (tblSize, tbl, numItems)) + end (* server *) + in + server (tableSz, Array.array(tableSz, [] : atom list), 0) + end + + (* make an atom from a string; this operation is split into a client + * part (compute the hash key), and a server part (map to unique + * representation). + *) + fun atom s = let + val replyV = V.iVar() + in + CML.send (reqCh, {key=HashString.hashString s, str=s, reply=replyV}); + V.iGet replyV + end + + (* eventually, we should hash the substring and check for prior definition + * before creating the string. + *) + fun atom' ss = atom(Substring.string ss) + + (** Initialization code **) + fun startup () = (CML.spawn nameServer; ()) + fun shutdown () = () + + val _ = RunCML.logServer("Name", startup, shutdown) + val _ = RunCML.logChannel("Name:reqCh", reqCh) + + end (* Atom *) diff --git a/cml/cml-lib/SMLNJ-Util/io-util-sig.sml b/cml/cml-lib/SMLNJ-Util/io-util-sig.sml new file mode 100644 index 0000000..1b28d5a --- /dev/null +++ b/cml/cml-lib/SMLNJ-Util/io-util-sig.sml @@ -0,0 +1,15 @@ +(* io-util-sig.sml + * + * COPYRIGHT (c) 1997 AT&T Labs Research. + *) + +signature IO_UTIL = + sig + type instream + type outstream + + val withInputFile : string * ('a -> 'b) -> 'a -> 'b + val withInstream : instream * ('a -> 'b) -> 'a -> 'b + val withOutputFile : string * ('a -> 'b) -> 'a -> 'b + val withOutstream : outstream * ('a -> 'b) -> 'a -> 'b + end diff --git a/cml/cml-lib/SMLNJ-Util/io-util.sml b/cml/cml-lib/SMLNJ-Util/io-util.sml new file mode 100644 index 0000000..dd66d96 --- /dev/null +++ b/cml/cml-lib/SMLNJ-Util/io-util.sml @@ -0,0 +1,58 @@ +(* io-util.sml + * + * COPYRIGHT (c) 1997 AT&T Labs Research. + *) + +structure IOUtil : IO_UTIL = + struct + + type instream = TextIO.instream + type outstream = TextIO.outstream + + fun swapInstrm (s, s') = + TextIO.getInstream s before TextIO.setInstream(s, s') + + fun withInputFile (s, f) x = let + val oldStrm = swapInstrm(TextIO.stdIn, TextIO.getInstream(TextIO.openIn s)) + fun cleanUp () = + TextIO.StreamIO.closeIn(swapInstrm(TextIO.stdIn, oldStrm)) + val res = (f x) handle ex => (cleanUp(); raise ex) + in + cleanUp(); + res + end + + fun withInstream (strm, f) x = let + val oldStrm = swapInstrm(TextIO.stdIn, TextIO.getInstream strm) + fun cleanUp () = + TextIO.setInstream(strm, swapInstrm(TextIO.stdIn, oldStrm)) + val res = (f x) handle ex => (cleanUp(); raise ex) + in + cleanUp(); + res + end + + fun swapOutstrm (s, s') = + TextIO.getOutstream s before TextIO.setOutstream(s, s') + + fun withOutputFile (s, f) x = let + val oldStrm = swapOutstrm(TextIO.stdOut, TextIO.getOutstream(TextIO.openOut s)) + fun cleanUp () = + TextIO.StreamIO.closeOut(swapOutstrm(TextIO.stdOut, oldStrm)) + val res = (f x) handle ex => (cleanUp(); raise ex) + in + cleanUp(); + res + end + + fun withOutstream (strm, f) x = let + val oldStrm = swapOutstrm(TextIO.stdOut, TextIO.getOutstream strm) + fun cleanUp () = + TextIO.setOutstream(strm, swapOutstrm(TextIO.stdOut, oldStrm)) + val res = (f x) handle ex => (cleanUp(); raise ex) + in + cleanUp(); + res + end + + end (* IOUtil *) diff --git a/cml/cml-lib/cm-descr/smlnj-lib.cm b/cml/cml-lib/cm-descr/smlnj-lib.cm new file mode 100644 index 0000000..03860f2 --- /dev/null +++ b/cml/cml-lib/cm-descr/smlnj-lib.cm @@ -0,0 +1,29 @@ +(* smlnj-lib.cm + * + * COPYRIGHT (c) 2001 Bell Labs, Lucent Technologies. + * + * The public interface to CML's own version of smlnj-lib.cm. + * (Actually, it also includes unix-lib.cm and inet-lib.cm. This should + * perhaps later be cleaned up.) + * + * This file is used with the new CM only. Clients compiled using + * the old CM refer to ../sources.cm directly. + *) +Library + +(** CML specific modules **) + signature MULTICAST + signature SIMPLE_RPC + signature TRACE_CML + + structure Multicast + structure SimpleRPC + structure TraceCML + + library($/smlnj-lib.cm) - structure TimeLimit +#if defined(OPSYS_UNIX) + library($/unix-lib.cm) +#endif + library($/inet-lib.cm) +is + ../sources.cm diff --git a/cml/cml-lib/cm-descr/trace-cml.cm b/cml/cml-lib/cm-descr/trace-cml.cm new file mode 100644 index 0000000..f474f1a --- /dev/null +++ b/cml/cml-lib/cm-descr/trace-cml.cm @@ -0,0 +1,13 @@ +(* trace-cml.cm + * + * COPYRIGHT (c) 1996 AT&T Research. + * + * The TraceCML library module needs access to CML internals, so we package + * it up into a sub-group. + *) + +Library + signature TRACE_CML + structure TraceCML +is + ../trace-cml.cm diff --git a/cml/cml-lib/multicast-sig.sml b/cml/cml-lib/multicast-sig.sml new file mode 100644 index 0000000..01b075f --- /dev/null +++ b/cml/cml-lib/multicast-sig.sml @@ -0,0 +1,32 @@ +(* multicast-sig.sml + * + * COPYRIGHT (c) 1990 by John H. Reppy. See COPYRIGHT file for details. + * + * Asynchronous multicast (one-to-many) channels. + *) + +signature MULTICAST = + sig + + type 'a mchan + type 'a port + type 'a event = 'a CML.event + + val mChannel : unit -> 'a mchan + (* create a new multicast channel *) + val port : 'a mchan -> 'a port + (* create a new output port on a channel *) + val copy : 'a port -> 'a port + (* create a new output port on a channel that has the same state as the + * given port. I.e., the stream of messages seen on the two ports will + * be the same. + * NOTE: if two (or more) independent threads are reading from the + * same port, then the copy operation may not be accurate. + *) + val recv : 'a port -> 'a + val recvEvt : 'a port -> 'a event + (* receive a message from a port *) + val multicast : ('a mchan * 'a) -> unit + (* send a message to all of the ports of a channel *) + + end (* MULTICAST *) diff --git a/cml/cml-lib/multicast.sml b/cml/cml-lib/multicast.sml new file mode 100644 index 0000000..b3f675c --- /dev/null +++ b/cml/cml-lib/multicast.sml @@ -0,0 +1,74 @@ +(* multicast.sml + * + * COPYRIGHT (c) 1994 AT&T Bell Laboratories. + * + * Asynchronous multicast (one-to-many) channels. This implementation + * is based on a condition variable implementation of multicast channels. + * See Chapter 5 of "Concurrent Programming in ML" for details. + *) + +structure Multicast : MULTICAST = + struct + + structure V = SyncVar + + type 'a event = 'a CML.event + + datatype 'a mchan = MChan of ('a request CML.chan * 'a port CML.chan) + + and 'a port + = Port of (('a * 'a mc_state V.ivar) CML.chan * 'a mc_state V.ivar V.mvar) + + and 'a request + = Message of 'a + | NewPort + + and 'a mc_state = MCState of ('a * 'a mc_state V.ivar) + + fun mkPort cv = let + val outCh = CML.channel() + val stateVar = V.mVarInit cv + fun tee cv = let + val (MCState(v, nextCV)) = V.iGet cv + in + CML.send (outCh, (v, nextCV)); + tee nextCV + end + in + CML.spawn (fn () => tee cv); + Port(outCh, stateVar) + end + + fun mChannel () = let + val reqCh = CML.channel() and replyCh = CML.channel() + fun server cv = (case (CML.recv reqCh) + of NewPort => ( + CML.send (replyCh, mkPort cv); + server cv) + | (Message m) => let + val nextCV = V.iVar() + in + V.iPut (cv, MCState(m, nextCV)); + server nextCV + end + (* end case *)) + in + CML.spawn (fn () => server (V.iVar())); + MChan(reqCh, replyCh) + end + + fun multicast (MChan(ch, _), m) = CML.send (ch, Message m) + + fun port (MChan(reqCh, replyCh)) = ( + CML.send (reqCh, NewPort); + CML.recv replyCh) + + fun copy (Port(_, stateV)) = mkPort(V.mGet stateV) + + fun recvMsg stateV (v, nextCV) = (V.mSwap (stateV, nextCV); v) + + fun recv (Port(ch, stateV)) = recvMsg stateV (CML.recv ch) + fun recvEvt (Port(ch, stateV)) = CML.wrap(CML.recvEvt ch, recvMsg stateV) + + end (* Multicast *) + diff --git a/cml/cml-lib/old-cml-sig.sml b/cml/cml-lib/old-cml-sig.sml new file mode 100644 index 0000000..a062ad6 --- /dev/null +++ b/cml/cml-lib/old-cml-sig.sml @@ -0,0 +1,83 @@ +(* old-cml-sig.sml + * + * COPYRIGHT (c) 1990 by John H. Reppy. See COPYRIGHT file for details. + * + * This is essentially the 0.9.8 version of the core CML interface. The only + * thing missing is poll and the low-level I/O synchronization. Also, there + * is an additional substructure (NewCML), to allow access to the new features. + *) + +signature OLD_CML = + sig + + structure NewCML : CML + + val version : {major : int, minor : int, rev : int, date : string} + val versionName : string + + (** events **) + type 'a event + + val sync : 'a event -> 'a + val select : 'a event list -> 'a + + val choose : 'a event list -> 'a event + + val guard : (unit -> 'a event) -> 'a event + + val wrap : ('a event * ('a -> 'b)) -> 'b event + val wrapHandler : ('a event * (exn -> 'a)) -> 'a event + val wrapAbort : ('a event * (unit -> unit)) -> 'a event + + val always : 'a -> 'a event + val ALWAYS : unit event (** for backward compatibility **) + + (** threads **) + type thread_id + + val spawn : (unit -> unit) -> thread_id + + val yield : unit -> unit + val exit : unit -> 'a + + val getTid : unit -> thread_id + val sameThread : (thread_id * thread_id) -> bool + val tidLessThan : (thread_id * thread_id) -> bool + val tidToString : thread_id -> string + + val threadWait : thread_id -> unit event + + (** condition variables **) + type 'a cond_var + + val condVar : unit -> '1a cond_var + + val writeVar : ('a cond_var * 'a) -> unit + exception WriteTwice + + val readVar : 'a cond_var -> 'a + val readVarEvt : 'a cond_var -> 'a event + + (** channels **) + type 'a chan + +(* +DEBUG ** +val dumpCh : 'a chan -> string +** -DEBUG *) + val channel : unit -> '1a chan + + val send : ('a chan * 'a) -> unit + val sendc : 'a chan -> 'a -> unit + val accept : 'a chan -> 'a + + val sameChannel : ('a chan * 'a chan) -> bool + + val transmit : ('a chan * 'a) -> unit event + val transmitc : 'a chan -> 'a -> unit event + val receive : 'a chan -> 'a event + + (** real-time synchronization **) + val waitUntil : Time.time -> unit event + val timeout : Time.time -> unit event + + end (* signature CONCUR_ML *) diff --git a/cml/cml-lib/old-cml.sml b/cml/cml-lib/old-cml.sml new file mode 100644 index 0000000..032caa4 --- /dev/null +++ b/cml/cml-lib/old-cml.sml @@ -0,0 +1,95 @@ +(* old-cml.sml + * + * COPYRIGHT (c) 1990 by John H. Reppy. See COPYRIGHT file for details. + * + * This is essentially the 0.9.8 version of the core CML interface. The only + * thing missing is poll and the low-level I/O synchronization. + *) + +structure OldCML : OLD_CML = + struct + + structure NewCML = CML + + val version = let + val (major, minor, rev) = (case (#version_id CML.version) + of (a::b::c::_) => (a, b, c) + | [a, b] => (a, b, 0) + | [a] => (a, 0, 0) + (* end case *)) + in + {major = major, minor = minor, rev = rev, date = #date CML.version} + end + val versionName = CML.banner + + (** events **) + type 'a event = 'a CML.event + + val sync = CML.sync + val select = CML.select + + val choose = CML.choose + + val guard = CML.guard + + val wrap = CML.wrap + val wrapHandler = CML.wrapHandler + + fun wrapAbort (evt, abortAct) = CML.withNack (fn abortEvt => let + fun abortAct' () = (sync abortEvt; abortAct()) + in + CML.spawn abortAct'; evt + end + + val always = CML.always + val ALWAYS = always() + + (** threads **) + type thread_id = CML.thread_id + + val spawn = CML.spawn + + val yield = CML.yield + val exit = CML.exit + + val getTid = CML.getTid + val sameThread = CML.sameTid + val tidLessThan (tid1, tid2) = (case CML.compareTid(tid1, tid2) + of LESS => true + | _ => false + (* end case *)) + val tidToString = CML.tidToString + + val threadWait = CML.joinEvt + + (** condition variables **) + type 'a cond_var = 'a SyncVar.ivar + + val condVar = SyncVar.iVar + + val writeVar = SyncVar.iPut + exception WriteTwice = SyncVar.Put + + val readVar = SyncVar.iGet + val readVarEvt = SyncVar.iGetEvt + + (** channels **) + type 'a chan = 'a CML.chan + + val channel = CML.channel + + val send = CML.send + fun sendc ch msg = CML.send(ch, msg) + val accept = CML.recv + + val sameChannel = CML.sameChannel + + val transmit = CML.sendEvt + fun transmitc ch msg = CML.sendEvt(ch, msg) + val receive = CML.recvEvt + + (** real-time synchronization **) + val waitUntil = CML.atTimeEvt + val timeout = CML.timeOutEvt + + end (* structure OldCML *) diff --git a/cml/cml-lib/simple-rpc-sig.sml b/cml/cml-lib/simple-rpc-sig.sml new file mode 100644 index 0000000..4bb27c6 --- /dev/null +++ b/cml/cml-lib/simple-rpc-sig.sml @@ -0,0 +1,32 @@ +(* simple-rpc-sig.sml + * + * COPYRIGHT (c) 1997 AT&T Labs Research. + * + * Generators for simple RPC protocols. + *) + +signature SIMPLE_RPC = sig + + type 'a event = 'a CML.event + + val mkRPC : ('a -> 'b) -> { + call : 'a -> 'b, + entryEvt : unit event + } + + val mkRPC_In : (('a * 'c) -> 'b) -> { + call : 'a -> 'b, + entryEvt : 'c -> unit event + } + + val mkRPC_Out : ('a -> ('b * 'c)) -> { + call : 'a -> 'b, + entryEvt : 'c event + } + + val mkRPC_InOut : (('a * 'c) -> ('b * 'd)) -> { + call : 'a -> 'b, + entryEvt : 'c -> 'd event + } + + end diff --git a/cml/cml-lib/simple-rpc.sml b/cml/cml-lib/simple-rpc.sml new file mode 100644 index 0000000..2f1bcf8 --- /dev/null +++ b/cml/cml-lib/simple-rpc.sml @@ -0,0 +1,65 @@ +(* simple-rpc.sml + * + * COPYRIGHT (c) 1997 AT&T Labs Research. + * + * Generators for simple RPC protocols. + *) + +structure SimpleRPC : SIMPLE_RPC = + struct + + type 'a event = 'a CML.event + + fun call reqMB arg = let + val replV = SyncVar.iVar() + in + Mailbox.send(reqMB, (arg, replV)); + SyncVar.iGet replV + end + + fun mkRPC f = let + val reqMB = Mailbox.mailbox() + val entryEvt = CML.wrap ( + Mailbox.recvEvt reqMB, + fn (arg, replV) => SyncVar.iPut(replV, f arg)) + in + { call = call reqMB, entryEvt = entryEvt } + end + + fun mkRPC_In f = let + val reqMB = Mailbox.mailbox() + val reqEvt = Mailbox.recvEvt reqMB + fun entryEvt state = CML.wrap ( + reqEvt, + fn (arg, replV) => SyncVar.iPut(replV, f(arg, state))) + in + { call = call reqMB, entryEvt = entryEvt } + end + + fun mkRPC_Out f = let + val reqMB = Mailbox.mailbox() + val reqEvt = Mailbox.recvEvt reqMB + val entryEvt = CML.wrap ( + reqEvt, + fn (arg, replV) => let val (res, state') = f arg + in + SyncVar.iPut(replV, res); state' + end) + in + { call = call reqMB, entryEvt = entryEvt } + end + + fun mkRPC_InOut f = let + val reqMB = Mailbox.mailbox() + val reqEvt = Mailbox.recvEvt reqMB + fun entryEvt state = CML.wrap ( + reqEvt, + fn (arg, replV) => let val (res, state') = f(arg, state) + in + SyncVar.iPut(replV, res); state' + end) + in + { call = call reqMB, entryEvt = entryEvt } + end + + end diff --git a/cml/cml-lib/sources.cm b/cml/cml-lib/sources.cm new file mode 100644 index 0000000..a1f982c --- /dev/null +++ b/cml/cml-lib/sources.cm @@ -0,0 +1,64 @@ +(* sources.cm + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Sources file for the CML version of smlnj-lib.cm. + *) + +Group + +(** CML specific modules **) + signature MULTICAST + signature SIMPLE_RPC + signature TRACE_CML + + structure Multicast + structure SimpleRPC + structure TraceCML + +(** Modules inherited from the SML/NJ Util Library **) + library($/smlnj-lib.cm) - structure TimeLimit + +(** Modules from the SML/NJ Unix Library *) +#if defined(OPSYS_UNIX) + library($/unix-lib.cm) +#endif + +(** Modules from the SML/NJ INet Library *) + signature SOCK_UTIL (* copied to avoid typing problems *) + structure SockUtil (* copied to avoid typing problems *) +#if defined(OPSYS_UNIX) + signature UNIX_SOCK_UTIL + structure UnixSockUtil +#endif +is + + $cml/basis.cm + $/smlnj-lib.cm +# if defined(OPSYS_UNIX) + $/unix-lib.cm +# endif + $cml/cml.cm + $cml-lib/trace-cml.cm (* for trace-cml-sig.sml and trace-cml.sml *) + + SMLNJ-Util/cml-atom-new.sml + SMLNJ-Util/atom-binary-map.sml + SMLNJ-Util/atom-binary-set.sml + SMLNJ-Util/atom-map.sml + SMLNJ-Util/atom-redblack-map.sml + SMLNJ-Util/atom-redblack-set.sml + SMLNJ-Util/atom-set.sml + SMLNJ-Util/atom-table.sml + SMLNJ-Util/io-util.sml + SMLNJ-Util/io-util-sig.sml + SMLNJ-INet/sock-util-sig.sml + SMLNJ-INet/sock-util.sml +#if defined(OPSYS_UNIX) + SMLNJ-INet/unix-sock-util.sml +#endif + + multicast-sig.sml + multicast.sml + simple-rpc-sig.sml + simple-rpc.sml diff --git a/cml/cml-lib/trace-cml-sig.sml b/cml/cml-lib/trace-cml-sig.sml new file mode 100644 index 0000000..64570ea --- /dev/null +++ b/cml/cml-lib/trace-cml-sig.sml @@ -0,0 +1,94 @@ +(* trace-cml-sig.sml + * + * COPYRIGHT (c) 1992 AT&T Bell Laboratories + * + * This module provides rudimentary debugging support in the form of mechanisms + * to control debugging output, and to monitor thread termination. This + * version of this module is adapted from Cliff Krumvieda's utility for tracing + * CML programs. It provides three facilities: trace modules, for controlling + * debugging output; thread watching, for detecting thread termination; and + * a mechanism for reporting uncaught exceptions on a per thread basis. + *) + +signature TRACE_CML = + sig + + (** Trace modules ** + * + * The basic idea is that one defines a heirarchy of ``trace + * modules,'' which provide valves for debugging output. + *) + + type trace_module + + (* where to direct trace output to *) + datatype trace_to + = TraceToOut + | TraceToErr + | TraceToNull + | TraceToFile of string + | TraceToStream of TextIO.outstream + + val setTraceFile : trace_to -> unit + (* Direct the destination of trace output. Note: TraceToStream + * can only be specified as a destination if CML is running. + *) + + val traceRoot : trace_module + (* the root module of the trace hierarchy *) + + exception NoSuchModule + + val traceModule : (trace_module * string) -> trace_module + val nameOf : trace_module -> string + (* return the name of the module *) + val moduleOf : string -> trace_module + (* return the module specified by the given string, or raise + * NoSuchModule if none exists. + *) + + val traceOn : trace_module -> unit + (* turn tracing on for a module and its descendents *) + val traceOff : trace_module -> unit + (* turn tracing off for a module and its descendents *) + val traceOnly : trace_module -> unit + (* turn tracing on for a module (but not for its descendents) *) + val amTracing : trace_module -> bool + (* return true if this module is being traced *) + + val status : trace_module -> (trace_module * bool) list + (* return a list of the registered modules dominated by the given + * module, and their status. + *) + + val trace : (trace_module * (unit -> string list)) -> unit + (* conditionally generate tracing output *) + + + (** Thread watching **) + + val watcher : trace_module + (* controls printing of thread watching messages; the module's name + * is "/ThreadWatcher/" + *) + val watch : (string * CML.thread_id) -> unit + (* watch the given thread for unexpected termination *) + val unwatch : CML.thread_id -> unit + (* stop watching the named thread *) + + (** Uncaught exception handling **) + + val setUncaughtFn : ((CML.thread_id * exn) -> unit) -> unit + (* this sets the default uncaught exception action. *) + val setHandleFn : ((CML.thread_id * exn) -> bool) -> unit + (* add an additional uncaught exception action. If the action returns + * true, then no further action is taken. This can be used to handle + * application specific exceptions. + *) + val resetUncaughtFn : unit -> unit + (* this resets the default uncaught exception action to the system default, + * and removes any layered actions. + *) + + end; (* TRACE_CML *) + diff --git a/cml/cml-lib/trace-cml.cm b/cml/cml-lib/trace-cml.cm new file mode 100644 index 0000000..2be4f14 --- /dev/null +++ b/cml/cml-lib/trace-cml.cm @@ -0,0 +1,32 @@ +(* trace-cml.cm + * + * COPYRIGHT (c) 1996 AT&T Research. + * + * The TraceCML library module needs access to CML internals, so we package + * it up into a sub-group. + *) +Group + (* Notice that the "owner" specification above gets ignored by the old + * CM. Under the old CM clients use _this_ file to refer to the + * trace library, under the new CM clients use the description file + * in cm-descr/trace-cml.cm. + * This is done to avoid a file-naming conflict for smlnj-lib.cm. + * The conflict is caused by the old CM's path-search mechanism and + * does not occur under the new CM. *) + signature TRACE_CML + structure TraceCML +is +#if defined (NEW_CM) + $/smlnj-lib.cm + $cml/basis.cm + $cml/core-cml.cm + $cml/cml.cm +#else + smlnj-lib.cm + ../src/basis.cm + ../src/core-cml.cm + ../src/cml.cm +#endif + + trace-cml-sig.sml + trace-cml.sml diff --git a/cml/cml-lib/trace-cml.sml b/cml/cml-lib/trace-cml.sml new file mode 100644 index 0000000..f6813db --- /dev/null +++ b/cml/cml-lib/trace-cml.sml @@ -0,0 +1,348 @@ +(* trace-cml.sml + * + * COPYRIGHT (c) 1992 AT&T Bell Laboratories + * + * This module provides rudimentary debugging support in the form of mechanisms + * to control debugging output, and to monitor thread termination. This + * version of this module is adapted from Cliff Krumvieda's utility for tracing + * CML programs. It provides three facilities: trace modules, for controlling + * debugging output; thread watching, for detecting thread termination; and + * a mechanism for reporting uncaught exceptions on a per thread basis. + *) + +structure TraceCML : TRACE_CML = + struct + + structure SV = SyncVar + + (* where to direct trace output to *) + datatype trace_to + = TraceToOut + | TraceToErr + | TraceToNull + | TraceToFile of string + | TraceToStream of TextIO.outstream + + exception NoSuchModule + + (** Trace Modules **) + datatype trace_module = TM of { + full_name : string, + label : string, + tracing : bool ref, + children : trace_module list ref + } + + val traceRoot = TM{ + full_name = "/", + label = "", + tracing = ref false, + children = ref [] + } + + fun forAll f = let + fun for (tm as TM{children, ...}) = (f tm; forChildren(!children)) + and forChildren [] = () + | forChildren (tm::r) = (for tm; forChildren r) + in + for + end + + structure SS = Substring + + fun findTraceModule name = let + fun eq ss (TM{label, ...}) = (SS.compare(SS.full label, ss) = EQUAL) + fun find ([], tm) = SOME tm + | find (arc::rest, tm as TM{label, children, ...}) = let + val eqArc = eq arc + fun findChild [] = NONE + | findChild (c::r) = + if (eqArc c) then find(rest, c) else findChild r + in + findChild (!children) + end + in + find ( + SS.tokens (fn #"/" => true | _ => false) (SS.full name), + traceRoot) + end + + fun traceModule' (TM parent, name) = let + fun checkChildren [] = let + val tm = TM{ + full_name = (#full_name parent ^ name), + label = name, + tracing = ref(!(#tracing parent)), + children = ref [] + } + in + (#children parent) := tm :: !(#children parent); + tm + end + | checkChildren((tm as TM{label, ...})::r) = + if (label = name) then tm else checkChildren r + in + checkChildren (! (#children parent)) + end + + (* return the name of the module *) + fun nameOf (TM{full_name, ...}) = full_name + + (* return the module specified by the given string *) + fun moduleOf' name = (case findTraceModule name + of NONE => raise NoSuchModule + | (SOME tm) => tm + (* end case *)) + + (* turn tracing on for a module and its descendents *) + val traceOn' = forAll (fn (TM{tracing, ...}) => tracing := true) + + (* turn tracing off for a module and its descendents *) + val traceOff' = forAll (fn (TM{tracing, ...}) => tracing := false) + + (* turn tracing on for a module (but not for its descendents) *) + fun traceOnly' (TM{tracing, ...}) = tracing := true + + (* return true if this module is being traced *) + fun amTracing (TM{tracing, ...}) = !tracing + + (* return a list of the registered modules dominated by the given + * module, and their status. + *) + fun status' root = let + fun list (tm as TM{tracing, children, ...}, l) = + listChildren (!children, (tm, !tracing)::l) + and listChildren ([], l) = l + | listChildren (c::r, l) = listChildren(r, list(c, l)) + in + rev (list (root, [])) + end + + (** Trace printing **) + val traceDst = ref TraceToOut + val traceCleanup = ref (fn () => ()) + + fun setTraceFile' t = traceDst := t + +(** NOTE: there are bookkeeping bugs, when changing the trace destination + ** from TraceToStream to something else (where the original destination + ** was TraceToFile). + **) + fun tracePrint s = let + fun output strm = (TextIO.output(strm, s); TextIO.flushOut strm) + in + case !traceDst + of TraceToOut => output TextIO.stdOut + | TraceToErr => output TextIO.stdErr + | TraceToNull => () + | (TraceToFile fname) => let + val dst = let + val strm = TextIO.openOut fname + in + traceCleanup := (fn () => TextIO.closeOut strm); + TraceToStream strm + end handle _ => ( + Debug.sayDebug(concat[ + "TraceCML: unable to open \"", fname, + "\", redirecting to stdout" + ]); + TraceToOut) + in + setTraceFile' dst; + tracePrint s + end + | (TraceToStream strm) => output strm + (* end case *) + end + + (** Trace server **) + val traceCh : (unit -> string list) CML.chan = CML.channel() + val traceUpdateCh : (unit -> unit) CML.chan = CML.channel() + + fun traceServer () = let + val evt = [ + CML.wrap(CML.recvEvt traceCh, fn f => tracePrint(concat(f()))), + CML.wrap(CML.recvEvt traceUpdateCh, fn f => f()) + ] + fun loop () = (CML.select evt; loop()) + in + loop() + end (* traceServer *) + + fun tracerStart () = (CML.spawn traceServer; ()) + fun tracerStop () = ((!traceCleanup)(); traceCleanup := (fn () => ())) + + val _ = ( + RunCML.logChannel ("TraceCML:trace", traceCh); + RunCML.logChannel ("TraceCML:trace-update", traceUpdateCh); + RunCML.logServer ("TraceCML:trace-server", tracerStart, tracerStop)) + + local + fun carefully f = if RunCML.isRunning() + then CML.send(traceUpdateCh, f) + else f() + fun carefully' f = if RunCML.isRunning() + then let + val reply = SV.iVar() + in + CML.send (traceUpdateCh, fn () => (SV.iPut(reply, f()))); + SV.iGet reply + end + else f() + in + fun traceModule arg = carefully' (fn () => traceModule' arg) + fun moduleOf name = carefully' (fn () => moduleOf' name) + fun traceOn tm = carefully (fn () => traceOn' tm) + fun traceOff tm = carefully (fn () => traceOff' tm) + fun traceOnly tm = carefully (fn () => traceOnly' tm) + fun setTraceFile f = carefully (fn () => setTraceFile' f) + fun status root = carefully' (fn () => status' root) + end (* local *) + + fun trace (TM{tracing, ...}, prFn) = + if (RunCML.isRunning() andalso (!tracing)) + then CML.send(traceCh, prFn) + else () + + + (** Thread watching **) + + (* controls printing of thread watching messages *) + val watcher = traceModule (traceRoot, "ThreadWatcher") + val _ = traceOn watcher + + datatype watcher_msg + = WATCH of (CML.thread_id * unit CML.chan) + | UNWATCH of (CML.thread_id * unit SV.ivar) + + val watcherMb : watcher_msg Mailbox.mbox = Mailbox.mailbox () + + (* stop watching the named thread *) + fun unwatch tid = let + val ackV = SV.iVar() + in + Mailbox.send(watcherMb, UNWATCH(tid, ackV)); + SV.iGet ackV + end + + (* watch the given thread for unexpected termination *) + fun watch (name, tid) = let + val unwatchCh = CML.channel() + fun handleTermination () = ( + trace (watcher, fn () => [ + "WARNING! Watched thread ", name, CML.tidToString tid, + " has died.\n" + ]); + unwatch tid) + fun watcherThread () = ( + Mailbox.send (watcherMb, WATCH(tid, unwatchCh)); + CML.select [ + CML.recvEvt unwatchCh, + CML.wrap (CML.joinEvt tid, handleTermination) + ]) + in + CML.spawn (watcherThread); () + end + + structure TidTbl = HashTableFn ( + struct + type hash_key = CML.thread_id + val hashVal = CML.hashTid + val sameKey = CML.sameTid + end) + + (* the watcher server *) + fun startWatcher () = let + val tbl = TidTbl.mkTable (32, Fail "startWatcher") + fun loop () = (case (Mailbox.recv watcherMb) + of (WATCH arg) => TidTbl.insert tbl arg + | (UNWATCH(tid, ack)) => ( + (* notify the watcher that the thread is no longer being + * watched, and then acknowledge the unwatch command. + *) + CML.send(TidTbl.remove tbl tid, ()) + handle _ => (); + (* acknowledge that the thread has been removed *) + SV.iPut(ack, ())) + (* end case *); + loop ()) + in + CML.spawn loop; () + end + + val _ = ( + RunCML.logMailbox ("TraceCML:watcherMb", watcherMb); + RunCML.logServer ("TraceCML:watcher-server", startWatcher, fn () => ())) + + + (** Uncaught exception handling **) + + fun defaultHandlerFn (tid, ex) = let + val raisedAt = (case (SMLofNJ.exnHistory ex) + of [] => ["\n"] + | l => [" raised at ", List.last l, "\n"] + (* end case *)) + in + Debug.sayDebug (concat ([ + CML.tidToString tid, " uncaught exception ", + exnName ex, " [", exnMessage ex, "]" + ] @ raisedAt)) + end + + val defaultHandler = ref defaultHandlerFn + val handlers = ref ([] : ((CML.thread_id * exn) -> bool) list) + + (* this sets the default uncaught exception action. *) + fun setUncaughtFn' action = defaultHandler := action + + (* add an additional uncaught exception action. If the action returns + * true, then no further action is taken. This can be used to handle + * handle application specific exceptions. + *) + fun setHandleFn' action = handlers := action :: !handlers + + (* this resets the default uncaught exception action to the system default, + * and removes any layered actions. + *) + fun resetUncaughtFn' () = (defaultHandler := defaultHandlerFn; handlers := []) + + val exnUpdateCh : (unit -> unit) CML.chan = CML.channel() + + fun exnServerStartup () = let + val errCh = Mailbox.mailbox() + (* this function is installed as the default handler for threads; + * it sends the thread ID and uncaught exception to the ExnServer. + *) + fun threadHandler exn = Mailbox.send(errCh, (CML.getTid(), exn)) + (* invoke the hsndler actions on the uncaught exception *) + fun handleExn arg = let + val hdlrList = !handlers and dfltHndlr = !defaultHandler + fun loop [] = dfltHndlr arg + | loop (hdlr::r) = if (hdlr arg) then () else loop r + in + CML.spawn (fn () => ((loop hdlrList) handle _ => (dfltHndlr arg))); + () + end + val event = [ + CML.wrap (CML.recvEvt exnUpdateCh, fn f => f()), + CML.wrap (Mailbox.recvEvt errCh, handleExn) + ] + fun server () = (CML.select event; server()) + in + Thread.defaultExnHandler := threadHandler; + CML.spawn server; () + end + + val _ = ( + RunCML.logChannel ("TraceCML:exnUpdateCh", exnUpdateCh); + RunCML.logServer ("TraceCML", exnServerStartup, fn () => ())) + + local + fun carefully f = if RunCML.isRunning() then CML.send(exnUpdateCh, f) else f() + in + fun setUncaughtFn arg = carefully (fn () => setUncaughtFn' arg) + fun setHandleFn arg = carefully (fn () => setHandleFn' arg) + fun resetUncaughtFn arg = carefully (fn () => resetUncaughtFn' arg) + end (* local *) + + end; (* TraceCML *) diff --git a/cml/doc/CATALOG b/cml/doc/CATALOG new file mode 100644 index 0000000..5ea8cc9 --- /dev/null +++ b/cml/doc/CATALOG @@ -0,0 +1,4 @@ +-- a temporary catalog for ML-Doc -- +ENTITY %document-entities "Entities.sgml" +CATALOG "/Users/jhr/Work/Tools/ml-doc/lib/catalog" + diff --git a/cml/doc/Config.cfg b/cml/doc/Config.cfg new file mode 100644 index 0000000..a7323be --- /dev/null +++ b/cml/doc/Config.cfg @@ -0,0 +1,42 @@ +# Config.cfg +# +# base configuration file for the CML Reference Manual. + +Catalog "CATALOG" +MasterInfoFile "Info/Master.info" + +# +# references to external documents +# +SML-Basis-Doc { + InfoFile "/Users/jhr/Work/sml-basis/Basis/Info/HTML.info" + BaseURL "SML.sourceforge.net/Basis" + RootURL "SML.sourceforge.net/Basis/index.html" +} + +# +# tool specific options +# +HTML { + BaseURL "CML" + RelativeLinks TRUE + PreWid 70 +} +HTML-Gen { + Template "page.template" +} +HTML-Index { + Template "index.template" +} +HTML-TOC { + Template "toc.template" +} + +Proof-LaTeX { + TopLevelSection "Chapter" +} + +#Tools { +# SGMLS "/opt/default/bin/nsgmls" +#} + diff --git a/cml/doc/Entities.sgml b/cml/doc/Entities.sgml new file mode 100644 index 0000000..5b2c18f --- /dev/null +++ b/cml/doc/Entities.sgml @@ -0,0 +1,28 @@ + + + + + + + + + + + + + + + + + + + + + diff --git a/cml/doc/ML-Doc/barrier.mldoc b/cml/doc/ML-Doc/barrier.mldoc new file mode 100644 index 0000000..0d66fb3 --- /dev/null +++ b/cml/doc/ML-Doc/barrier.mldoc @@ -0,0 +1,142 @@ + + + + + + + +The Barrier structure + + +The + + + + +The + + + + 'abarrier + + + This is the type constructor for a barrier. + + + + + 'aenrollment + + + This type constructor represents an enrollment on a barrier. + Enrollments are used to synchronize on barriers. + + + + + barrier('a -> 'a) -> 'a -> 'a barrier + + + barrier update init + + creates a new barrier with the update function update + and the initial state init. + The state is updated each time the barrier is synchronized on by the enrolled + threads. + + + + + enroll'a barrier -> 'a enrollment + + + enroll bar + + enrolls on the barrier, returning a new enrollment. + The (unenforced) convention is that each enrolled thread belongs to a thread + and that a thread owns at most one enrollment on a given barrier. + + + + + wait'a enrollment -> 'a + Fail + + + wait ebar + + waits on the barrier until all of the enrolled threads are waiting, at which point + the state is updated and the resulting state value is returned to the waiting + threads. + If another thread is already waiting on this enrollment or if the enrollment + has been resigned, then the exception Fail is raised. + + Note that if the update function for the barrier raises an exception, then this + exception is raised for each waiting thread by wait. + + + + + + resign'a enrollment -> unit + Fail + + + resign ebar + + resigns from the enrollment ebar. + Resigning from an already resigned enrollment is ignored, but + if another thread is waiting on this enrollment, then the + exception Fail is raised. + + + + + value'a enrollment -> 'a + Fail + + + value ebar + + gets the current value of the barrier's state. + + Note that if the convention of one-thread per enrollment is followed, then this + operation is free of races, since the state is stable between barrier + synchronizations. + + + + + + +Barriers can be used to implement clock and phased synchronization. +For example, consider the following code: + +let +val clock = Barrier.barrier (fn x => x+1) 0 +fun spawnChild () = let + val bar = Barrier.enroll clock + fun loop () = if (Barrier.wait bar = 5) then () else loop() + in + CML.spawn loop + end +in + Barrier.enroll clock; + spawnChild (); + spawnChild (); + Barrier.resign clock +end + +In this example, two threads are spawned, which then synchronize on the barrier +five times. +To avoid a race condition between enrollment and the first synchronization, the parent +thread enrolls on the barrier prior to spawning its children and then resigns after +the children have been enrolled by the spawnChild function. + + + diff --git a/cml/doc/ML-Doc/basics.mldoc b/cml/doc/ML-Doc/basics.mldoc new file mode 100644 index 0000000..6709779 --- /dev/null +++ b/cml/doc/ML-Doc/basics.mldoc @@ -0,0 +1,21 @@ + + + + + + + + +Basics + +
    +Basics + + + +
    diff --git a/cml/doc/ML-Doc/cml.mldoc b/cml/doc/ML-Doc/cml.mldoc new file mode 100644 index 0000000..20a129b --- /dev/null +++ b/cml/doc/ML-Doc/cml.mldoc @@ -0,0 +1,331 @@ + + + + + + + + + + +The CML structure + + +The + + + + + + + + + + thread_id + + + Thread IDs are the unique IDs associated with &CML; threads. + These IDs are in an unspecified total order that can be used to + break cyclic depedencies (see + 'achan + + + This is the type constructor for synchronous channels. + + 'aevent + + + Event values are abstract representations of synchronous operations + (so called first-class sychronous operations). + + version{system : string, version_id : int list, date : string} + bannerstring + + + These specify the version of &CML; in the same format as &SMLNJ;. + + spawnc('a -> unit) -> 'a -> thread_id + spawn(unit -> unit) -> thread_id + + + spawnc + spawn + creates a new thread of control to evaluate the body of + yieldunit -> unit + + + This function can be used to implement an explicit context switch. + Since CML is preemptively scheduled, it should never be necessary for + user programs to call this function. + It is mainly used for performance measurements. + + exitunit -> 'a + + + exit () + + causes the calling thread to terminate. + + + getTidunit -> thread_id + + + getTid () + + returns the thread ID of the calling thread. + + sameTid(thread_id * thread_id) -> bool + + + sameTid ( + returns + compareTid(thread_id * thread_id) -> order + + + compareTid ( + compares the two thread IDs and returns their order in the total + ordering of thread IDs. + The precise semantics of this ordering is left unspecified, other + than to say it is a total order. + + hashTidthread_id -> word + + + hashTid + returns a hashing of the thread ID + tidToStringthread_id -> string + + + tidToString + returns a string representation of the thread ID + joinEvtthread_id -> unit event + + + joinEvt + creates an event value for synchronizing on the termination of + the thread with the ID + channelunit -> 'a chan + + + channel () + + creates a new synchronous channel. + + sameChannel('a chan * 'a chan) -> bool + + + sameChannel ( + returns + send('a chan * 'a) -> unit + + + send ( + sends the message + recv'a chan -> 'a + + + recv + receives a message from the channel + sendEvt('a chan * 'a) -> unit event + recvEvt'a chan -> 'a event + + + These functions create event values to represent the + sendPoll('a chan * 'a) -> bool + + + sendPoll ( + attempts to send the message + recvPoll'a chan -> 'a option + + + recvPoll + attempts to receive a message from the channel + wrap('a event * ('a -> 'b)) -> 'b event + + + wrap ( + wraps the post-synchronization action + wrapHandler('a event * (exn -> 'a event)) -> 'a event + + + wrapHandler ( + wraps the exception handler function + guard(unit -> 'a event) -> 'a event + + + guard + creates a delayed event value from the function + withNack(unit event -> 'a event) -> 'a event + + + withNack + creates a delayed event value from the function negative + acknowledgement event as an argument. + This negative acknowledgement event is enabled in the case where + some other event involved in the synchronization is chosen instead + of the one produced by + choose'a event list -> 'a event + + + choose + constructs an event value that represents the non-deterministic + choice of the events in the list + sync'a event -> 'a + + + sync + synchronizes the calling thread on the event + select'a event list -> 'a + + + select + synchronizes on the non-deterministic choice of the events in the + list + + but is more efficient. + + never'a event + + + never + + is an event value that is never enabled for synchronization. + It is semantically equivalant to the expression: + + + + alwaysEvt'a -> 'a event + + + alwaysEvt + creates an event value that is always enabled, and that returns + the value + timeOutEvtTime.time -> unit event + + + timeOutEvt + creates an event value that becomes enabled at the time + interval + + will delay the calling thread for one second. + Note that the specified time interval is actually a minimum + waiting time, and the delay may be longer. + + atTimeEvtTime.time -> unit event + + + atTimeEvt + creates an event value that becomes enabled at the specified time + + + blocks the calling thread until the beginning of the year 2000. + + + diff --git a/cml/doc/ML-Doc/core-cml.mldoc b/cml/doc/ML-Doc/core-cml.mldoc new file mode 100644 index 0000000..ecac097 --- /dev/null +++ b/cml/doc/ML-Doc/core-cml.mldoc @@ -0,0 +1,22 @@ + + + + + + + + + +Core CML Reference + +
    +Core CML reference + + + + + + + + +
    diff --git a/cml/doc/ML-Doc/lib/README b/cml/doc/ML-Doc/lib/README new file mode 100644 index 0000000..56bd4ed --- /dev/null +++ b/cml/doc/ML-Doc/lib/README @@ -0,0 +1,3 @@ +Since the ML-Doc tools can't handle paths of depth greater than one +right now; we must put links in the parent directory to these files. + diff --git a/cml/doc/ML-Doc/lib/cml-lib.mldoc b/cml/doc/ML-Doc/lib/cml-lib.mldoc new file mode 100644 index 0000000..eb145b0 --- /dev/null +++ b/cml/doc/ML-Doc/lib/cml-lib.mldoc @@ -0,0 +1,19 @@ + + + + + + + + + + +CML Library Reference + +
    +CML Library Reference + + + +
    + diff --git a/cml/doc/ML-Doc/lib/multicast.mldoc b/cml/doc/ML-Doc/lib/multicast.mldoc new file mode 100644 index 0000000..13bcbf0 --- /dev/null +++ b/cml/doc/ML-Doc/lib/multicast.mldoc @@ -0,0 +1,93 @@ + + + + + + +The Multicast structure + + +The + + +Multicast channels provide a mechanism for broadcasting a stream of +messages to a collection of threads. +Threads receive multicast messages via an + + + 'aevent'a CML.event + + 'amchan + + + This is the type constructor for asynchronous multicast channels. + + 'aport + + + This is the type constructor for output ports on an + asynchronous multicast channels. + + mChannelunit -> 'a mchan + + + mChannel + creates a new multicast channel. + + port'a mchan -> 'a port + + + port + creates a new output port on the channel + copy'a port -> 'a port + + + copy + creates a new output port on a channel that has the same state as the + port + recv'a port -> 'a + + + recv + gets the next message from the port + recvEvt'a port -> 'a event + + + recvEvt + creates an event value that represents the + multicast('a mchan * 'a) -> unit + + + multicast ( + multicasts the value + + diff --git a/cml/doc/ML-Doc/lib/trace-cml.mldoc b/cml/doc/ML-Doc/lib/trace-cml.mldoc new file mode 100644 index 0000000..008dac8 --- /dev/null +++ b/cml/doc/ML-Doc/lib/trace-cml.mldoc @@ -0,0 +1,195 @@ + + + + + + + + + +The TraceCML structure + + +The + + +The +Trace modules provide a hierarchical name space, which is used to control +the granularity of debugging output. +The trace module operations have been implemented in such a way that they +can be invoked independent of whether &CML; is currently running. +This allows the trace hierarchy to be setup statically. + +The + + + trace_module + + + A + trace_to + TraceToOut + TraceToErr + TraceToNull + TraceToFilestring + TraceToStreamTextIO.outstream + + + + The various destinations of trace output. + + setTraceFiletrace_to -> unit + + + setTraceFile + Direct the destination of trace output. + Note: + traceRoottrace_module + + + traceRoot + + is the root module of the trace hierarchy + + NoSuchModule + + traceModule(trace_module * string) -> trace_module + + + traceModule ( + creates a new trace module that is a child of + nameOftrace_module -> string + + + nameOf + returns the full name of the module + moduleOfstring -> trace_module + + + moduleOf + returns the trace module named by "/" as a separator). + + traceOntrace_module -> unit + + + traceOn + turns tracing on for moduel + traceOfftrace_module -> unit + + + traceOff + turns tracing off for moduel + traceOnlytrace_module -> unit + + + traceOnly + turn tracing on for module + amTracingtrace_module -> bool + + + amTracing + returns + statustrace_module -> (trace_module * bool) list + + + status + returns a pre-order list of the modules rooted at + trace(trace_module * (unit -> string list)) -> unit + + + trace ( + explain the use and semantics of trace HERE. + + watchertrace_module + + + watcher + + is a trace module that is used to control the printing of thread + termination messages. + Its name is "/ThreadWatcher/", and by default it is enabled. + + watch(string * CML.thread_id) -> unit + + + watch ( + watch the thread named by + unwatchCML.thread_id -> unit + + + unwatch + stop watching the thread named by + setUncaughtFn((CML.thread_id * exn) -> unit) -> unit + + + setUncaughtFn + sets the default uncaught exception action to + setHandleFn((CML.thread_id * exn) -> bool) -> unit + + + setHandleFn + adds the function + resetUncaughtFnunit -> unit + + + resetUncaughtFn + resets the default uncaught exception action to the system default, + and removes any layered actions. + + + diff --git a/cml/doc/ML-Doc/mailbox.mldoc b/cml/doc/ML-Doc/mailbox.mldoc new file mode 100644 index 0000000..a43c4e4 --- /dev/null +++ b/cml/doc/ML-Doc/mailbox.mldoc @@ -0,0 +1,89 @@ + + + + + + + + + +The Mailbox structure + + +The + + + + +The + + + 'ambox + + + This is the type constructor for a mailbox. + A mailbox is an unbounded, buffered communication channel. + + mailboxunit -> 'a mbox + + + mailbox () + + creates a new mailbox. + + sameMailbox('a mbox * 'a mbox) -> bool + + + sameMailbox ( + returns + send('a mbox * 'a) -> unit + + + send ( + sends the message + recv'a mbox -> 'a + + + recv + receive the next message from the mailbox + recvEvt'a mbox -> 'a event + + + recvEvt + returns the event value that represents the + recvPoll'a mbox -> 'a option + + + This is the non-blocking version of + +Note that mailbox buffers are unbounded, which means that there is no flow +control to prevent a producer from greatly outstriping a consumer, and thus +exhausting memory. +In situations where there is no natural limit to the rate of + diff --git a/cml/doc/ML-Doc/os-io.mldoc b/cml/doc/ML-Doc/os-io.mldoc new file mode 100644 index 0000000..52200d1 --- /dev/null +++ b/cml/doc/ML-Doc/os-io.mldoc @@ -0,0 +1,46 @@ + + + + + + + + + + +The OS.IO structure + + +The + + + + +The + + + + pollEvtpoll_desc list -> poll_info list event + + + pollEvt + polls a list of poll descriptors + + diff --git a/cml/doc/ML-Doc/os-process.mldoc b/cml/doc/ML-Doc/os-process.mldoc new file mode 100644 index 0000000..fa0ce83 --- /dev/null +++ b/cml/doc/ML-Doc/os-process.mldoc @@ -0,0 +1,49 @@ + + + + + + + + + + +The OS.Process structure + + +The + + + + +The + + + + systemEvtstring -> status event + + + systemEvt + asks the operating system to execute the command + Note that, although this function is independent of the operating + system, the interpretation of the string + + diff --git a/cml/doc/ML-Doc/os.mldoc b/cml/doc/ML-Doc/os.mldoc new file mode 100644 index 0000000..3dbb458 --- /dev/null +++ b/cml/doc/ML-Doc/os.mldoc @@ -0,0 +1,40 @@ + + + + + + + + + + +The OS structure + + +The + + + + +The + + + + Process + + IO + + diff --git a/cml/doc/ML-Doc/porting.mldoc b/cml/doc/ML-Doc/porting.mldoc new file mode 100644 index 0000000..c2d34c5 --- /dev/null +++ b/cml/doc/ML-Doc/porting.mldoc @@ -0,0 +1,72 @@ + + + + + + + + + +Porting old programs + +
    +Porting old CML programs + + +There have been substantial changes from Version 0.9.8 of &CML;. +Most of these changes are cosmetic name changes meant to track +changes in the &SML; interfaces, and to rationalize naming conventions. +In this section, we detail the changes to the &CML; interfaces, and +describe correspondence between the old and new interfaces. + +
    +Backwards compatibility modules + +To ease the transition from version 0.9.8 of &CML; to the new +interfaces, we provide two backwards compatibility modules in the +&CML; Library. +
    + +
    +Name changes + + + + " + | tokToString (STARTTT payload) = + "" + | tokToString ENDTT = "" + | tokToString (STARTUL payload) = + "" + | tokToString ENDUL = "" + | tokToString (STARTVAR payload) = + "" + | tokToString ENDVAR = "" + | tokToString (STARTAPPLET payload) = + "" + | tokToString ENDAPPLET = "" + | tokToString (STARTBASEFONT payload) = + "" + | tokToString (STARTCENTER payload) = + "" + | tokToString ENDCENTER = "" + | tokToString (STARTDIR payload) = + "" + | tokToString ENDDIR = "" + | tokToString (STARTFONT payload) = + "" + | tokToString ENDFONT = "" + | tokToString (STARTIFRAME payload) = + "" + | tokToString ENDIFRAME = "" + | tokToString (STARTISINDEX payload) = + "" + | tokToString (STARTMENU payload) = + "" + | tokToString ENDMENU = "" + | tokToString (STARTS payload) = + "" + | tokToString ENDS = "" + | tokToString (STARTSTRIKE payload) = + "" + | tokToString ENDSTRIKE = "" + | tokToString (STARTU payload) = + "" + | tokToString ENDU = "" + | tokToString (STARTFRAME payload) = + "" + | tokToString (STARTFRAMESET payload) = + "" + | tokToString ENDFRAMESET = "" + | tokToString (STARTNOFRAMES payload) = + "" + | tokToString ENDNOFRAMES = "" + +(* ______________________________________________________________________ *) + +fun tokGetAttrs (STARTA payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTABBR payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTACRONYM payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTADDRESS payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTAREA payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTB payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTBASE payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTBDO payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTBIG payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTBLOCKQUOTE payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTBODY payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTBR payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTBUTTON payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTCAPTION payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTCITE payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTCODE payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTCOL payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTCOLGROUP payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTDD payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTDEL payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTDFN payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTDIV payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTDL payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTDT payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTEM payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTFIELDSET payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTFORM payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTH1 payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTH2 payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTH3 payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTH4 payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTH5 payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTH6 payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTHEAD payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTHR payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTHTML payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTI payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTIMG payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTINPUT payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTINS payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTKBD payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTLABEL payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTLEGEND payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTLI payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTLINK payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTMAP payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTMETA payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTNOSCRIPT payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTOBJECT payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTOL payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTOPTGROUP payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTOPTION payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTP payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTPARAM payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTPRE payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTQ payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTSAMP payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTSCRIPT payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTSELECT payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTSMALL payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTSPAN payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTSTRONG payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTSTYLE payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTSUB payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTSUP payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTTABLE payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTTBODY payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTTD payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTTEXTAREA payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTTFOOT payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTTH payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTTHEAD payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTTITLE payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTTR payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTTT payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTUL payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTVAR payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTAPPLET payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTBASEFONT payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTCENTER payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTDIR payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTFONT payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTIFRAME payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTISINDEX payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTMENU payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTS payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTSTRIKE payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTU payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTFRAME payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTFRAMESET payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs (STARTNOFRAMES payload) = SOME (H4U.getAttrs payload) + | tokGetAttrs _ = NONE + +end (* HTML4TokenUtils *) + +(* ______________________________________________________________________ + End of html4-token-utils.sml + ______________________________________________________________________ *) diff --git a/smlnj-lib/HTML4/html4-utils.sml b/smlnj-lib/HTML4/html4-utils.sml new file mode 100644 index 0000000..bf8186e --- /dev/null +++ b/smlnj-lib/HTML4/html4-utils.sml @@ -0,0 +1,197 @@ +(* html4-utils.sml + * + * COPYRIGHT (c) 2014 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Defines a set of utility data types and functions for the HTML 4 parser. + *) + +structure HTML4Utils = struct + +(* ____________________________________________________________ + Parse trees + *) + +datatype 'a parsetree = Nd of Atom.atom * 'a parsetree list + | Lf of 'a + +(* ____________________________________________________________ + Data structure and utilities for element tokens + *) + +type tag_payload = string * (Atom.atom * string option) list + +fun attrToStr (name, NONE) = Atom.toString name + | attrToStr (name, SOME a_val) = String.concat [Atom.toString name, "=", + a_val] + +fun attrsToStr attrs = String.concatWith " " (map attrToStr attrs) + +fun payloadToStr (payload, []) = payload + | payloadToStr (_, attrs as (attr :: _)) = attrsToStr attrs + +val getAttrs : tag_payload -> (Atom.atom * string option) list = #2 + +(* ____________________________________________________________ + Streams + *) + +datatype 'a stream = StreamCons of 'a * (unit -> 'a stream) + | StreamNil + +exception EmptyStream + +fun stream_hd (StreamCons (v, _)) = v + | stream_hd StreamNil = raise EmptyStream + +fun stream_tl (StreamCons (_, rst)) = rst() + | stream_tl StreamNil = raise EmptyStream + +fun stream_nth (stream, 0) = stream_hd stream + | stream_nth (stream, idx) = stream_nth(stream_tl stream, idx - 1) + +fun stream_empty StreamNil = true + | stream_empty (StreamCons _) = false + +fun stream_concat (StreamNil, stream2) = stream2 + | stream_concat (StreamCons(hdval, tl_thunk), stream2) = + StreamCons(hdval, fn () => (stream_concat (tl_thunk(), stream2))) + +fun stream_concatl [] = StreamNil + | stream_concatl (StreamNil :: streams) = stream_concatl streams + | stream_concatl ((StreamCons(hdval, tl_thunk)) :: streams) = + StreamCons(hdval, fn () => (stream_concatl ((tl_thunk())::streams))) + +(* stream_concatt() - Special concat that allows a stream thunk to be +appended to the tail of a stream. *) + +fun stream_concatt (StreamNil, tl_thunk2) = tl_thunk2() + | stream_concatt (StreamCons(hdval, tl_thunk1), tl_thunk2) = + StreamCons(hdval, fn () => stream_concatt (tl_thunk1(), tl_thunk2)) + +fun stream_map mapfn StreamNil = StreamNil + | stream_map mapfn (StreamCons (hdval, tl_thunk)) = + StreamCons(mapfn hdval, fn () => (stream_map mapfn (tl_thunk()))) + +(* stream_maps() - Full blown transduction from one kind of stream to +another, where the mapper returns a stream. This allows one to zero +and one to many mappings, as opposed to stream_map() which only allows +one to one maps. *) + +fun stream_maps mapsfn instrm = + (case instrm of + StreamNil => StreamNil + | StreamCons(crnt_hd, tl_thunk) => + let val outstrm_front = mapsfn crnt_hd + fun tl_thunk' () = stream_maps mapsfn (tl_thunk ()) + in stream_concatt(outstrm_front, tl_thunk') end) + +fun stream_app appfn StreamNil = () + | stream_app appfn (StreamCons (hdval, tl_thunk)) = + (appfn hdval; stream_app appfn (tl_thunk())); + +fun stream_filter pred StreamNil = StreamNil + | stream_filter pred (StreamCons (hdval, tl_thunk)) = + if pred hdval then StreamCons(hdval, + fn () => (stream_filter pred (tl_thunk()))) + else stream_filter pred (tl_thunk()) + +fun stream_foldl foldlfn acc StreamNil = acc + | stream_foldl foldlfn acc (StreamCons(hd_val, tl_thunk)) = + stream_foldl foldlfn (foldlfn(hd_val, acc)) (tl_thunk()) + +fun stream_singleton soleval = StreamCons(soleval, fn () => StreamNil) + +fun stream_inf infval = StreamCons(infval, fn () => (stream_inf infval)) + +fun stream_fromList [] = StreamNil + | stream_fromList (elem::elems) = + StreamCons(elem, fn () => stream_fromList elems) + +(* ____________________________________________________________ + Parse tree streams + *) + +datatype 'a parsevisitation = EnterNT of Atom.atom + | ExitNT of Atom.atom + | VisitT of 'a + +fun visitationToString _ (EnterNT ntAtom) = + concat["entry of ", Atom.toString ntAtom, " nonterminal"] + | visitationToString _ (ExitNT ntAtom) = + concat["exit of ", Atom.toString ntAtom, " nonterminal"] + | visitationToString termToString (VisitT terminal) = + concat["vistation of ", termToString terminal, " terminal"] + +fun visitationSame _ (EnterNT ntAtom, EnterNT ntAtom') = Atom.same(ntAtom, ntAtom') + | visitationSame _ (ExitNT ntAtom, ExitNT ntAtom') = Atom.same(ntAtom, ntAtom') + | visitationSame termSame (VisitT term, VisitT term') = termSame(term, term') + | visitationSame _ _ = false + +fun parsetreeToVisitationStream (node as (Nd (ntAtom, children))) = + let fun tl_thunk () = + let val children' = map parsetreeToVisitationStream children + in + stream_concat(stream_concatl children', + stream_singleton (ExitNT ntAtom)) + end + in + StreamCons(EnterNT ntAtom, tl_thunk) + end + | parsetreeToVisitationStream (node as (Lf payload)) = + StreamCons(VisitT payload, fn () => StreamNil) + +fun visitationStreamToParsetree strm = + let fun handleVisit (EnterNT _, (spine, peers)) = + (peers :: spine, []) + | handleVisit (ExitNT ntAtom, ((peers :: spine), children')) = + (spine, (Nd (ntAtom, rev children')) :: peers) + | handleVisit (VisitT term, (spine, peers)) = + (spine, (Lf term) :: peers) + val (_, result :: _) = stream_foldl handleVisit ([], []) strm + in result end + +fun parsetreeStreamMapT maptfn = + let fun transduce StreamNil = StreamNil + | transduce (StreamCons(crnt_hd, tl_thunk)) = + let val hd' = case crnt_hd of + VisitT term => VisitT (maptfn term) + | _ => crnt_hd + fun tl_thunk' () = transduce (tl_thunk ()) + in StreamCons(hd', tl_thunk') end + in transduce end + +(* parsetreeStreamMapTStream(): given a function that maps from +terminals to a parse tree visitation stream, do a map over an existing +visitation stream. This should be useful for mapping some placeholder +token into a synthetic nonterminal or list of terminals. *) + +fun parsetreeStreamMapTStream (guardfn, maptsfn) = + let fun transduce StreamNil = StreamNil + | transduce (StreamCons(crnt_hd, tl_thunk)) = + let fun tl_thunk' () = transduce (tl_thunk ()) + in case crnt_hd of + VisitT term => if (guardfn term) + then stream_concatt(maptsfn term, + tl_thunk') + else StreamCons(crnt_hd, tl_thunk') + | _ => StreamCons(crnt_hd, tl_thunk') + end + in transduce end + +fun mkParsetreeStreamToString termToString strm = + let fun handleVisit (EnterNT ntAtom, (indent, outs)) = + (String.concat [indent, " "], + (String.concat [indent, Atom.toString ntAtom, "\n"] :: outs)) + | handleVisit (ExitNT ntAtom, (indent, outs)) = + (String.extract(indent, 1, NONE), outs) + | handleVisit (VisitT term, (indent, outs)) = + (indent, String.concat [indent, termToString term, "\n"] :: outs) + val (_, outs) = stream_foldl handleVisit ("", []) strm + in String.concat(rev outs) end + +end + +(* ______________________________________________________________________ + End of html4-utils.sml + ______________________________________________________________________ *) diff --git a/smlnj-lib/HTML4/html4.g b/smlnj-lib/HTML4/html4.g new file mode 100644 index 0000000..8e8c8ec --- /dev/null +++ b/smlnj-lib/HTML4/html4.g @@ -0,0 +1,929 @@ +(* html4.g + * + * COPYRIGHT (c) 2014 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +%name HTML4; + +%defs( + +open HTML4Utils + +fun optToList NONE = [] + | optToList (SOME thing) = [thing] + +fun optListToList NONE = [] + | optListToList (SOME thing) = thing + +); + +%tokens : OPENTAG of Atom.atom * HTML4Utils.tag_payload + | CLOSETAG of Atom.atom + | COMMENT of string + | PCDATA of string + | DOCTYPE of string + | CHAR_REF of IntInf.int + | ENTITY_REF of Atom.atom + | XML_PROCESSING of string + (* HTML 4 element tokens. *) + | STARTA of HTML4Utils.tag_payload + | ENDA + | STARTABBR of HTML4Utils.tag_payload + | ENDABBR + | STARTACRONYM of HTML4Utils.tag_payload + | ENDACRONYM + | STARTADDRESS of HTML4Utils.tag_payload + | ENDADDRESS + | STARTAPPLET of HTML4Utils.tag_payload + | ENDAPPLET + | STARTAREA of HTML4Utils.tag_payload + (* No END tag for AREA element. *) + | STARTB of HTML4Utils.tag_payload + | ENDB + | STARTBASE of HTML4Utils.tag_payload + (* No END tag for BASE element. *) + | STARTBASEFONT of HTML4Utils.tag_payload + (* No END tag for BASEFONT element. *) + | STARTBDO of HTML4Utils.tag_payload + | ENDBDO + | STARTBIG of HTML4Utils.tag_payload + | ENDBIG + | STARTBLOCKQUOTE of HTML4Utils.tag_payload + | ENDBLOCKQUOTE + | STARTBODY of HTML4Utils.tag_payload + | ENDBODY + | STARTBR of HTML4Utils.tag_payload + (* No END tag for BR element. *) + | STARTBUTTON of HTML4Utils.tag_payload + | ENDBUTTON + | STARTCAPTION of HTML4Utils.tag_payload + | ENDCAPTION + | STARTCENTER of HTML4Utils.tag_payload + | ENDCENTER + | STARTCITE of HTML4Utils.tag_payload + | ENDCITE + | STARTCODE of HTML4Utils.tag_payload + | ENDCODE + | STARTCOL of HTML4Utils.tag_payload + (* No END tag for COL element. *) + | STARTCOLGROUP of HTML4Utils.tag_payload + | ENDCOLGROUP + | STARTDD of HTML4Utils.tag_payload + | ENDDD + | STARTDEL of HTML4Utils.tag_payload + | ENDDEL + | STARTDFN of HTML4Utils.tag_payload + | ENDDFN + | STARTDIR of HTML4Utils.tag_payload + | ENDDIR + | STARTDIV of HTML4Utils.tag_payload + | ENDDIV + | STARTDL of HTML4Utils.tag_payload + | ENDDL + | STARTDT of HTML4Utils.tag_payload + | ENDDT + | STARTEM of HTML4Utils.tag_payload + | ENDEM + | STARTFIELDSET of HTML4Utils.tag_payload + | ENDFIELDSET + | STARTFONT of HTML4Utils.tag_payload + | ENDFONT + | STARTFORM of HTML4Utils.tag_payload + | ENDFORM + | STARTFRAME of HTML4Utils.tag_payload + (* No END tag for FRAME element. *) + | STARTFRAMESET of HTML4Utils.tag_payload + | ENDFRAMESET + | STARTH1 of HTML4Utils.tag_payload + | ENDH1 + | STARTH2 of HTML4Utils.tag_payload + | ENDH2 + | STARTH3 of HTML4Utils.tag_payload + | ENDH3 + | STARTH4 of HTML4Utils.tag_payload + | ENDH4 + | STARTH5 of HTML4Utils.tag_payload + | ENDH5 + | STARTH6 of HTML4Utils.tag_payload + | ENDH6 + | STARTHEAD of HTML4Utils.tag_payload + | ENDHEAD + | STARTHR of HTML4Utils.tag_payload + (* No END tag for HR element. *) + | STARTHTML of HTML4Utils.tag_payload + | ENDHTML + | STARTI of HTML4Utils.tag_payload + | ENDI + | STARTIFRAME of HTML4Utils.tag_payload + | ENDIFRAME + | STARTIMG of HTML4Utils.tag_payload + (* No END tag for IMG element. *) + | STARTINPUT of HTML4Utils.tag_payload + (* No END tag for INPUT element. *) + | STARTINS of HTML4Utils.tag_payload + | ENDINS + | STARTISINDEX of HTML4Utils.tag_payload + (* No END tag for ISINDEX element. *) + | STARTKBD of HTML4Utils.tag_payload + | ENDKBD + | STARTLABEL of HTML4Utils.tag_payload + | ENDLABEL + | STARTLEGEND of HTML4Utils.tag_payload + | ENDLEGEND + | STARTLI of HTML4Utils.tag_payload + | ENDLI + | STARTLINK of HTML4Utils.tag_payload + (* No END tag for LINK element. *) + | STARTMAP of HTML4Utils.tag_payload + | ENDMAP + | STARTMENU of HTML4Utils.tag_payload + | ENDMENU + | STARTMETA of HTML4Utils.tag_payload + (* No END tag for META element. *) + | STARTNOFRAMES of HTML4Utils.tag_payload + | ENDNOFRAMES + | STARTNOSCRIPT of HTML4Utils.tag_payload + | ENDNOSCRIPT + | STARTOBJECT of HTML4Utils.tag_payload + | ENDOBJECT + | STARTOL of HTML4Utils.tag_payload + | ENDOL + | STARTOPTGROUP of HTML4Utils.tag_payload + | ENDOPTGROUP + | STARTOPTION of HTML4Utils.tag_payload + | ENDOPTION + | STARTP of HTML4Utils.tag_payload + | ENDP + | STARTPARAM of HTML4Utils.tag_payload + (* No END tag for PARAM element. *) + | STARTPRE of HTML4Utils.tag_payload + | ENDPRE + | STARTQ of HTML4Utils.tag_payload + | ENDQ + | STARTS of HTML4Utils.tag_payload + | ENDS + | STARTSAMP of HTML4Utils.tag_payload + | ENDSAMP + | STARTSCRIPT of HTML4Utils.tag_payload + | ENDSCRIPT + | STARTSELECT of HTML4Utils.tag_payload + | ENDSELECT + | STARTSMALL of HTML4Utils.tag_payload + | ENDSMALL + | STARTSPAN of HTML4Utils.tag_payload + | ENDSPAN + | STARTSTRIKE of HTML4Utils.tag_payload + | ENDSTRIKE + | STARTSTRONG of HTML4Utils.tag_payload + | ENDSTRONG + | STARTSTYLE of HTML4Utils.tag_payload + | ENDSTYLE + | STARTSUB of HTML4Utils.tag_payload + | ENDSUB + | STARTSUP of HTML4Utils.tag_payload + | ENDSUP + | STARTTABLE of HTML4Utils.tag_payload + | ENDTABLE + | STARTTBODY of HTML4Utils.tag_payload + | ENDTBODY + | STARTTD of HTML4Utils.tag_payload + | ENDTD + | STARTTEXTAREA of HTML4Utils.tag_payload + | ENDTEXTAREA + | STARTTFOOT of HTML4Utils.tag_payload + | ENDTFOOT + | STARTTH of HTML4Utils.tag_payload + | ENDTH + | STARTTHEAD of HTML4Utils.tag_payload + | ENDTHEAD + | STARTTITLE of HTML4Utils.tag_payload + | ENDTITLE + | STARTTR of HTML4Utils.tag_payload + | ENDTR + | STARTTT of HTML4Utils.tag_payload + | ENDTT + | STARTU of HTML4Utils.tag_payload + | ENDU + | STARTUL of HTML4Utils.tag_payload + | ENDUL + | STARTVAR of HTML4Utils.tag_payload + | ENDVAR +; + +%start document; + +%entry body, flow, block, inline, cdata_opt; + +document : cdata_opt + (DOCTYPE cdata_opt => ((Lf (Tok.DOCTYPE DOCTYPE)) :: cdata_opt))? + (STARTHTML cdata_opt + => ((Lf (Tok.STARTHTML STARTHTML)) :: cdata_opt))? + head + (body | frameset) + (ENDHTML cdata_opt => ((Lf (Tok.ENDHTML)) :: cdata_opt))? + => (Nd (Atom.atom "DOCUMENT", + cdata_opt @ (optListToList SR1) @ (optListToList SR2) @ + (head :: SR3 :: (optListToList SR4)))) +; + +(* ______________________________________________________________________ + HEAD and related elements + ______________________________________________________________________ *) + +head : (STARTHEAD cdata_opt => ((Lf (Tok.STARTHEAD STARTHEAD)) :: cdata_opt))? + (head_content cdata_opt => (head_content :: cdata_opt))* + (ENDHEAD cdata_opt => ((Lf (Tok.ENDHEAD)) :: cdata_opt))? + => (Nd (Atom.atom "HEAD", + (optListToList SR1) @ (foldr op@ [] SR2) @ (optListToList SR3))) +; + +head_content : title | base | script | style | meta | link | object +; + +title : STARTTITLE cdata_opt ENDTITLE + => (Nd (Atom.atom "TITLE", + (Lf (Tok.STARTTITLE STARTTITLE)) :: + (cdata_opt @ [Lf (Tok.ENDTITLE)]))) +; + +base : STARTBASE + => (Nd (Atom.atom "BASE", [Lf (Tok.STARTBASE STARTBASE)])) +; + +script : STARTSCRIPT cdata_opt ENDSCRIPT + => (Nd (Atom.atom "SCRIPT", + (Lf (Tok.STARTSCRIPT STARTSCRIPT)) :: + (cdata_opt @ [Lf (Tok.ENDSCRIPT)]))) +; + +style : STARTSTYLE cdata_opt ENDSTYLE + => (Nd (Atom.atom "STYLE", + (Lf (Tok.STARTSTYLE STARTSTYLE)) :: + (cdata_opt @ [Lf (Tok.ENDSTYLE)]))) +; + +meta : STARTMETA + => (Nd (Atom.atom "META", [Lf (Tok.STARTMETA STARTMETA)])) +; + +link : STARTLINK + => (Nd (Atom.atom "LINK", [Lf (Tok.STARTLINK STARTLINK)])) +; + +object : STARTOBJECT (param | flow)* ENDOBJECT + => (Nd (Atom.atom "OBJECT", + (Lf (Tok.STARTOBJECT STARTOBJECT)) :: + (SR @ [Lf (Tok.ENDOBJECT)]))) +; + +param : STARTPARAM + => (Nd (Atom.atom "PARAM", [(Lf (Tok.STARTPARAM STARTPARAM))])) +; + +(* ______________________________________________________________________ + BODY and related elements + ______________________________________________________________________ *) + +body : STARTBODY body_rest + => (Nd (Atom.atom "BODY", + (Lf (Tok.STARTBODY STARTBODY)) :: body_rest)) + | (block | ins | del) body_rest + => (Nd (Atom.atom "BODY", SR :: body_rest)) +; + +body_rest : (block | script | ins | del | cdata)* + (ENDBODY cdata_opt => ((Lf (Tok.ENDBODY)) :: cdata_opt))? + => (SR1 @ (optListToList SR2)) +; + +flow : block + | inline +; + +block : p + | heading + | list + | preformatted + | dl + | div + | noscript + | blockquote + | form + | hr + | table + | fieldset + | address + | block_loose +; + +block_loose : center + | isindex +; + +heading : h1 + | h2 + | h3 + | h4 + | h5 + | h6 +; + +list : ul + | ol + | list_loose +; + +list_loose : dir + | menu +; + +preformatted : pre +; + +inline : fontstyle + | phrase + | special + | formctrl + | cdata +; + +fontstyle : tt + | i + | b + | big + | small + | fontstyle_loose +; + +fontstyle_loose : u + | s + | strike +; + +phrase : em + | strong + | dfn + | code + | samp + | kbd + | var + | cite + | abbr + | acronym +; + +special : a + | img + | object + | br + | script + | map + | q + | sub + | sup + | span + | bdo + | special_loose +; + +special_loose : applet + | basefont + | font + | iframe +; + +formctrl : input + | select + | textarea + | label + | button +; + +(* Actual elements *) + +a : STARTA inline* ENDA + => (Nd (Atom.atom "A", + (Lf (Tok.STARTA STARTA)) :: (inline @ [Lf (Tok.ENDA)]))) +; + +abbr : STARTABBR inline* ENDABBR + => (Nd (Atom.atom "ABBR", + (Lf (Tok.STARTABBR STARTABBR)) :: + (inline @ [Lf (Tok.ENDABBR)]))) +; + +acronym : STARTACRONYM inline* ENDACRONYM + => (Nd (Atom.atom "ACRONYM", + (Lf (Tok.STARTACRONYM STARTACRONYM)) :: + (inline @ [Lf (Tok.ENDACRONYM)]))) +; + +address : STARTADDRESS inline* ENDADDRESS + => (Nd (Atom.atom "ADDRESS", + (Lf (Tok.STARTADDRESS STARTADDRESS)) :: + (inline @ [Lf (Tok.ENDADDRESS)]))) +; + +applet : STARTAPPLET (param | flow)* ENDAPPLET + => (Nd (Atom.atom "APPLET", + (Lf (Tok.STARTAPPLET STARTAPPLET)) :: + (SR @ [Lf (Tok.ENDAPPLET)]))) +; + +area : STARTAREA + => (Nd (Atom.atom "AREA", [Lf (Tok.STARTAREA STARTAREA)])) +; + +b : STARTB inline* ENDB + => (Nd (Atom.atom "B", + (Lf (Tok.STARTB STARTB)) :: (inline @ [Lf (Tok.ENDB)]))) +; + +basefont : STARTBASEFONT + => (Nd (Atom.atom "BASEFONT", [Lf (Tok.STARTBASEFONT STARTBASEFONT)])) +; + +bdo : STARTBDO inline* ENDBDO + => (Nd (Atom.atom "BDO", + (Lf (Tok.STARTBDO STARTBDO)) :: + (inline @ [Lf (Tok.ENDBDO)]))) +; + +big : STARTBIG inline* ENDBIG + => (Nd (Atom.atom "BIG", + (Lf (Tok.STARTBIG STARTBIG)) :: + (inline @ [Lf (Tok.ENDBIG)]))) +; + +blockquote : STARTBLOCKQUOTE (block | script | cdata)+ ENDBLOCKQUOTE + => (Nd (Atom.atom "BLOCKQUOTE", + (Lf (Tok.STARTBLOCKQUOTE STARTBLOCKQUOTE)) :: + (SR @ [Lf (Tok.ENDBLOCKQUOTE)]))) +; + +br : STARTBR + => (Nd (Atom.atom "BR", [Lf (Tok.STARTBR STARTBR)])) +; + +button : STARTBUTTON flow* ENDBUTTON + => (Nd (Atom.atom "BUTTON", + (Lf (Tok.STARTBUTTON STARTBUTTON)) :: + (flow @ [Lf (Tok.ENDBUTTON)]))) +; + +caption : STARTCAPTION inline* ENDCAPTION + => (Nd (Atom.atom "CAPTION", + (Lf (Tok.STARTCAPTION STARTCAPTION)) :: + (inline @ [Lf (Tok.ENDCAPTION)]))) +; + +center : STARTCENTER flow* ENDCENTER + => (Nd (Atom.atom "CENTER", + (Lf (Tok.STARTCENTER STARTCENTER)) :: + (flow @ [Lf (Tok.ENDCENTER)]))) +; + +cite : STARTCITE inline* ENDCITE + => (Nd (Atom.atom "CITE", + (Lf (Tok.STARTCITE STARTCITE)) :: + (inline @ [Lf (Tok.ENDCITE)]))) +; + +code : STARTCODE inline* ENDCODE + => (Nd (Atom.atom "CODE", + (Lf (Tok.STARTCODE STARTCODE)) :: + (inline @ [Lf (Tok.ENDCODE)]))) +; + +col : STARTCOL + => (Nd (Atom.atom "COL", [Lf (Tok.STARTCOL STARTCOL)])) +; + +colgroup : STARTCOLGROUP cdata_opt + (col cdata_opt => (col :: cdata_opt))* + (ENDCOLGROUP => (Lf (Tok.ENDCOLGROUP)))? + => (Nd (Atom.atom "COLGROUP", + (Lf (Tok.STARTCOLGROUP STARTCOLGROUP)) :: + (cdata_opt @ (foldr op@ [] SR1) @ (optToList SR2)))) +; + +dd : STARTDD flow* (ENDDD => (Lf (Tok.ENDDD)))? + => (Nd (Atom.atom "DD", + (Lf (Tok.STARTDD STARTDD)) :: (flow @ (optToList SR)))) +; + +del : STARTDEL flow* ENDDEL + => (Nd (Atom.atom "DEL", + (Lf (Tok.STARTDEL STARTDEL)) :: + (flow @ [Lf (Tok.ENDDEL)]))) +; + +dfn : STARTDFN inline* ENDDFN + => (Nd (Atom.atom "DFN", + (Lf (Tok.STARTDFN STARTDFN)) :: + (inline @ [Lf (Tok.ENDDFN)]))) +; + +dir : STARTDIR cdata_opt li+ ENDDIR + => (Nd (Atom.atom "DIR", + (Lf (Tok.STARTDIR STARTDIR)) :: + (cdata_opt @ li @ [Lf (Tok.ENDDIR)]))) +; + +div : STARTDIV flow* ENDDIV + => (Nd (Atom.atom "DIV", + (Lf (Tok.STARTDIV STARTDIV)) :: + (flow @ [Lf (Tok.ENDDIV)]))) +; + +dl : STARTDL cdata_opt (dt | dd)+ ENDDL + => (Nd (Atom.atom "DL", + (Lf (Tok.STARTDL STARTDL)) :: + (cdata_opt @ SR @ [Lf (Tok.ENDDL)]))) +; + +dt : STARTDT inline* (ENDDT => (Lf (Tok.ENDDT)))? + => (Nd (Atom.atom "DT", + (Lf (Tok.STARTDT STARTDT)) :: (inline @ (optToList SR)))) +; + +em : STARTEM inline* ENDEM + => (Nd (Atom.atom "EM", + (Lf (Tok.STARTEM STARTEM)) :: (inline @ [Lf (Tok.ENDEM)]))) +; + +fieldset : STARTFIELDSET cdata_opt legend flow* ENDFIELDSET + => (Nd (Atom.atom "FIELDSET", + (Lf (Tok.STARTFIELDSET STARTFIELDSET)) :: + (cdata_opt @ [legend] @ flow @ + [Lf (Tok.ENDFIELDSET)]))) +; + +font : STARTFONT inline* ENDFONT + => (Nd (Atom.atom "FONT", + (Lf (Tok.STARTFONT STARTFONT)) :: + (inline @ [Lf (Tok.ENDFONT)]))) +; + +form : STARTFORM (cdata | block | script)+ ENDFORM + => (Nd (Atom.atom "FORM", + (Lf (Tok.STARTFORM STARTFORM)) :: + (SR @ [Lf (Tok.ENDFORM)]))) +; + +frame : STARTFRAME + => (Nd (Atom.atom "FRAME", [Lf (Tok.STARTFRAME STARTFRAME)])) +; + +frameset : STARTFRAMESET (frameset | frame | cdata)+ + (noframes cdata_opt => (noframes::cdata_opt))? ENDFRAMESET + => (Nd (Atom.atom "FRAMESET", + (Lf (Tok.STARTFRAMESET STARTFRAMESET)) :: + (SR1 @ (optListToList SR2) @ [Lf (Tok.ENDFRAMESET)]))) +; + +h1 : STARTH1 inline* ENDH1 + => (Nd (Atom.atom "H1", + (Lf (Tok.STARTH1 STARTH1)) :: (inline @ [Lf (Tok.ENDH1)]))) +; + +h2 : STARTH2 inline* ENDH2 + => (Nd (Atom.atom "H2", + (Lf (Tok.STARTH2 STARTH2)) :: (inline @ [Lf (Tok.ENDH2)]))) +; + +h3 : STARTH3 inline* ENDH3 + => (Nd (Atom.atom "H3", + (Lf (Tok.STARTH3 STARTH3)) :: (inline @ [Lf (Tok.ENDH3)]))) +; + +h4 : STARTH4 inline* ENDH4 + => (Nd (Atom.atom "H4", + (Lf (Tok.STARTH4 STARTH4)) :: (inline @ [Lf (Tok.ENDH4)]))) +; + +h5 : STARTH5 inline* ENDH5 + => (Nd (Atom.atom "H5", + (Lf (Tok.STARTH5 STARTH5)) :: (inline @ [Lf (Tok.ENDH5)]))) +; + +h6 : STARTH6 inline* ENDH6 + => (Nd (Atom.atom "H6", + (Lf (Tok.STARTH6 STARTH6)) :: (inline @ [Lf (Tok.ENDH6)]))) +; + +hr : STARTHR + => (Nd (Atom.atom "HR", [Lf (Tok.STARTHR STARTHR)])) +; + +i : STARTI inline* ENDI + => (Nd (Atom.atom "I", + (Lf (Tok.STARTI STARTI)) :: + (inline @ [Lf (Tok.ENDI)]))) +; + +iframe : STARTIFRAME flow* ENDIFRAME + => (Nd (Atom.atom "IFRAME", + (Lf (Tok.STARTIFRAME STARTIFRAME)) :: + (flow @ [Lf (Tok.ENDIFRAME)]))) +; + +img : STARTIMG + => (Nd (Atom.atom "IMG", [Lf (Tok.STARTIMG STARTIMG)])) +; + +input : STARTINPUT + => (Nd (Atom.atom "INPUT", [Lf (Tok.STARTINPUT STARTINPUT)])) +; + +ins : STARTINS flow* ENDINS + => (Nd (Atom.atom "INS", + (Lf (Tok.STARTINS STARTINS)) :: + (flow @ [Lf (Tok.ENDINS)]))) +; + +isindex : STARTISINDEX + => (Nd (Atom.atom "ISINDEX", [Lf (Tok.STARTISINDEX STARTISINDEX)])) +; + +kbd : STARTKBD inline* ENDKBD + => (Nd (Atom.atom "KBD", + (Lf (Tok.STARTKBD STARTKBD)) :: + (inline @ [Lf (Tok.ENDKBD)]))) +; + +label : STARTLABEL inline* ENDLABEL + => (Nd (Atom.atom "LABEL", + (Lf (Tok.STARTLABEL STARTLABEL)) :: + (inline @ [Lf (Tok.ENDLABEL)]))) +; + +legend : STARTLEGEND inline* ENDLEGEND + => (Nd (Atom.atom "LEGEND", + (Lf (Tok.STARTLEGEND STARTLEGEND)) :: + (inline @ [Lf (Tok.ENDLEGEND)]))) +; + +li : STARTLI flow* (ENDLI => (Lf (Tok.ENDLI)))? + => (Nd (Atom.atom "LI", + (Lf (Tok.STARTLI STARTLI)) :: (flow @ (optToList SR)))) +; + +map : STARTMAP (cdata | block | area)+ ENDMAP + => (Nd (Atom.atom "MAP", + (Lf (Tok.STARTMAP STARTMAP)) :: (SR @ [Lf (Tok.ENDMAP)]))) +; + +menu : STARTMENU cdata_opt li+ ENDMENU + => (Nd (Atom.atom "MENU", + (Lf (Tok.STARTMENU STARTMENU)) :: + (cdata_opt @ li @ [Lf (Tok.ENDMENU)]))) +; + +noframes : STARTNOFRAMES body ENDNOFRAMES + => (Nd (Atom.atom "NOFRAMES", + [Lf (Tok.STARTNOFRAMES STARTNOFRAMES), body, + Lf (Tok.ENDNOFRAMES)])) +; + +noscript : STARTNOSCRIPT (cdata | block)+ ENDNOSCRIPT + => (Nd (Atom.atom "NOSCRIPT", + (Lf (Tok.STARTNOSCRIPT STARTNOSCRIPT)) :: + (SR @ [Lf (Tok.ENDNOSCRIPT)]))) +; + +ol : STARTOL cdata_opt li+ ENDOL + => (Nd (Atom.atom "OL", + (Lf (Tok.STARTOL STARTOL)) :: + (cdata_opt @ li @ [Lf (Tok.ENDOL)]))) +; + +optgroup : STARTOPTGROUP cdata_opt option+ ENDOPTGROUP cdata_opt + => (Nd (Atom.atom "OPTGROUP", + (Lf (Tok.STARTOPTGROUP STARTOPTGROUP)) :: + (cdata_opt1 @ option @ ((Lf (Tok.ENDOPTGROUP)) + :: cdata_opt2)))) +; + +option : STARTOPTION cdata_opt + (ENDOPTION cdata_opt => ((Lf (Tok.ENDOPTION)) :: + cdata_opt))? + => (Nd (Atom.atom "OPTION", + (Lf (Tok.STARTOPTION STARTOPTION)) :: + (cdata_opt @ (optListToList SR)))) +; + +(* TODO: Making the ENDP optional, which is valid, causes +left-recursion for the inline* part. This can be fixed by having a +two state flow nonterminal, which the older HTML library does. *) + +p : STARTP inline* ENDP + => (Nd (Atom.atom "P", + (Lf (Tok.STARTP STARTP)) :: (inline @ [Lf (Tok.ENDP)]))) +; + +pre : STARTPRE inline* ENDPRE + => (Nd (Atom.atom "PRE", + (Lf (Tok.STARTPRE STARTPRE)) :: + (inline @ [Lf (Tok.ENDPRE)]))) +; + +q : STARTQ inline* ENDQ + => (Nd (Atom.atom "Q", + (Lf (Tok.STARTQ STARTQ)) :: (inline @ [Lf (Tok.ENDQ)]))) +; + +s : STARTS inline* ENDS + => (Nd (Atom.atom "S", + (Lf (Tok.STARTS STARTS)) :: (inline @ [Lf (Tok.ENDS)]))) +; + +samp : STARTSAMP inline* ENDSAMP + => (Nd (Atom.atom "SAMP", + (Lf (Tok.STARTSAMP STARTSAMP)) :: + (inline @ [Lf (Tok.ENDSAMP)]))) +; + +select : STARTSELECT cdata_opt (optgroup | option)+ ENDSELECT + => (Nd (Atom.atom "SELECT", + (Lf (Tok.STARTSELECT STARTSELECT)) :: + (cdata_opt @ SR @ [Lf (Tok.ENDSELECT)]))) +; + +small : STARTSMALL inline* ENDSMALL + => (Nd (Atom.atom "SMALL", + (Lf (Tok.STARTSMALL STARTSMALL)) :: + (inline @ [Lf (Tok.ENDSMALL)]))) +; + +span : STARTSPAN inline* ENDSPAN + => (Nd (Atom.atom "SPAN", + (Lf (Tok.STARTSPAN STARTSPAN)) :: + (inline @ [Lf (Tok.ENDSPAN)]))) +; + +strike : STARTSTRIKE inline* ENDSTRIKE + => (Nd (Atom.atom "STRIKE", + (Lf (Tok.STARTSTRIKE STARTSTRIKE)) :: + (inline @ [Lf (Tok.ENDSTRIKE)]))) +; + +strong : STARTSTRONG inline* ENDSTRONG + => (Nd (Atom.atom "STRONG", + (Lf (Tok.STARTSTRONG STARTSTRONG)) :: + (inline @ [Lf (Tok.ENDSTRONG)]))) +; + +sub : STARTSUB inline* ENDSUB + => (Nd (Atom.atom "SUB", + (Lf (Tok.STARTSUB STARTSUB)) :: + (inline @ [Lf (Tok.ENDSUB)]))) +; + +sup : STARTSUP inline* ENDSUP + => (Nd (Atom.atom "SUP", + (Lf (Tok.STARTSUP STARTSUP)) :: + (inline @ [Lf (Tok.ENDSUP)]))) +; + +(* My reading of the HTML DTD indicates the following order of +elements is enforceable: *) + +table : STARTTABLE cdata_opt + (caption cdata_opt => (caption :: cdata_opt))? + col_or_colgroups table_content ENDTABLE + => (Nd (Atom.atom "TABLE", + (Lf (Tok.STARTTABLE STARTTABLE)) :: + (cdata_opt @ (optListToList SR) @ col_or_colgroups @ + table_content @ [Lf (Tok.ENDTABLE)]))) +; + +(* The whole tr+ thing makes the original table production ambiguous: + STARTTABLE ... thead? tfoot? tbody+ ENDTABLE *) + +table_content + : thead tfoot? tbodies + => (thead :: ((optToList tfoot)) @ tbodies) + | tfoot tbodies + => (tfoot :: tbodies) + | tbodies_nostart +; + +col_or_colgroups : (* empty *) + => ([]) + | (col cdata_opt => (col :: cdata_opt))+ + => (foldr op@ [] SR) + | colgroup+ +; + + +tbodies_nostart : (STARTTBODY cdata_opt => + ((Lf (Tok.STARTTBODY STARTTBODY)) :: cdata_opt))? + tr+ tbodies_rest? + => (let val (tbody_rest, tbody_peers) = + case tbodies_rest of + NONE => ([], []) + | SOME tbodies_tup => tbodies_tup + in (Nd (Atom.atom "TBODIES_NOSTART", + (optListToList SR) @ tr @ tbody_rest)) :: + tbody_peers end) +; + +tbodies : STARTTBODY cdata_opt tr+ tbodies_rest + => (let val (tbody_rest, tbody_peers) = tbodies_rest + in (Nd (Atom.atom "TBODIES", + (Lf (Tok.STARTTBODY STARTTBODY)) :: + (cdata_opt @ tr @ tbody_rest))) :: + tbody_peers end) +; + +tbodies_rest : ENDTBODY cdata_opt tbodies? + => ((Lf (Tok.ENDTBODY)) :: cdata_opt, + optListToList tbodies) + | STARTTBODY cdata_opt tr+ tbodies_rest? + => (let val (tbody_rest, tbody_peers) = + case tbodies_rest of NONE => ([], []) + | SOME tbodies_tup => tbodies_tup + in ([], (Nd (Atom.atom "TBODIES_REST", + (Lf (Tok.STARTTBODY STARTTBODY)) :: + (cdata_opt @ tr @ tbody_rest))) :: tbody_peers) + end) +; + +td : STARTTD flow* (ENDTD cdata_opt => ((Lf (Tok.ENDTD)) :: cdata_opt))? + => (Nd (Atom.atom "TD", + (Lf (Tok.STARTTD STARTTD)) :: (flow @ (optListToList SR)))) +; + +textarea : STARTTEXTAREA cdata_opt ENDTEXTAREA + => (Nd (Atom.atom "TEXTAREA", + (Lf (Tok.STARTTEXTAREA STARTTEXTAREA)) :: + (cdata_opt @ [Lf (Tok.ENDTEXTAREA)]))) +; + +tfoot : STARTTFOOT cdata_opt tr+ + (ENDTFOOT cdata_opt => ((Lf (Tok.ENDTFOOT)) :: cdata_opt))? + => (Nd (Atom.atom "TFOOT", + (Lf (Tok.STARTTFOOT STARTTFOOT)) :: (cdata_opt @ tr @ + (optListToList SR)))) +; + +th : STARTTH flow* (ENDTH cdata_opt => ((Lf (Tok.ENDTH)) :: cdata_opt))? + => (Nd (Atom.atom "TH", + (Lf (Tok.STARTTH STARTTH)) :: (flow @ (optListToList SR)))) +; + +thead : STARTTHEAD cdata_opt tr+ + (ENDTHEAD cdata_opt => ((Lf (Tok.ENDTHEAD)) :: cdata_opt))? + => (Nd (Atom.atom "THEAD", + (Lf (Tok.STARTTHEAD STARTTHEAD)) :: (cdata_opt @ tr @ + (optListToList SR)))) +; + +tr : STARTTR cdata_opt (th | td)+ + (ENDTR cdata_opt => ((Lf (Tok.ENDTR)) :: cdata_opt))? + => (Nd (Atom.atom "TR", + (Lf (Tok.STARTTR STARTTR)) :: (cdata_opt @ SR1 @ + (optListToList SR2)))) +; + +tt : STARTTT inline* ENDTT + => (Nd (Atom.atom "TT", + (Lf (Tok.STARTTT STARTTT)) :: (inline @ [Lf (Tok.ENDTT)]))) +; + +u : STARTU inline* ENDU + => (Nd (Atom.atom "U", + (Lf (Tok.STARTU STARTU)) :: (inline @ [Lf (Tok.ENDU)]))) +; + +ul : STARTUL cdata_opt li+ ENDUL + => (Nd (Atom.atom "UL", + ((Lf (Tok.STARTUL STARTUL)) :: (cdata_opt @ li @ + [Lf (Tok.ENDUL)])))) +; + +var : STARTVAR inline* ENDVAR + => (Nd (Atom.atom "VAR", + (Lf (Tok.STARTVAR STARTVAR)) :: (inline @ + [Lf (Tok.ENDVAR)]))) +; + +(* ______________________________________________________________________ + Miscellaneous data nonterminals + ______________________________________________________________________ *) + +cdata : (PCDATA => (Tok.PCDATA PCDATA) + | CHAR_REF => (Tok.CHAR_REF CHAR_REF) + | ENTITY_REF => (Tok.ENTITY_REF ENTITY_REF) + | COMMENT => (Tok.COMMENT COMMENT)) + => ((Lf SR) : HTML4Tokens.token parsetree) +; + +cdata_opt : cdata* => (cdata : HTML4Tokens.token parsetree list) +; + +(* ______________________________________________________________________ + End of html4.g + ______________________________________________________________________ *) diff --git a/smlnj-lib/HTML4/html4.g.sml b/smlnj-lib/HTML4/html4.g.sml new file mode 100644 index 0000000..354b6b6 --- /dev/null +++ b/smlnj-lib/HTML4/html4.g.sml @@ -0,0 +1,8034 @@ +structure HTML4Tokens = + struct + datatype token + = OPENTAG of Atom.atom * HTML4Utils.tag_payload + | CLOSETAG of Atom.atom + | COMMENT of string + | PCDATA of string + | DOCTYPE of string + | CHAR_REF of IntInf.int + | ENTITY_REF of Atom.atom + | XML_PROCESSING of string + | STARTA of HTML4Utils.tag_payload + | ENDA + | STARTABBR of HTML4Utils.tag_payload + | ENDABBR + | STARTACRONYM of HTML4Utils.tag_payload + | ENDACRONYM + | STARTADDRESS of HTML4Utils.tag_payload + | ENDADDRESS + | STARTAPPLET of HTML4Utils.tag_payload + | ENDAPPLET + | STARTAREA of HTML4Utils.tag_payload + | STARTB of HTML4Utils.tag_payload + | ENDB + | STARTBASE of HTML4Utils.tag_payload + | STARTBASEFONT of HTML4Utils.tag_payload + | STARTBDO of HTML4Utils.tag_payload + | ENDBDO + | STARTBIG of HTML4Utils.tag_payload + | ENDBIG + | STARTBLOCKQUOTE of HTML4Utils.tag_payload + | ENDBLOCKQUOTE + | STARTBODY of HTML4Utils.tag_payload + | ENDBODY + | STARTBR of HTML4Utils.tag_payload + | STARTBUTTON of HTML4Utils.tag_payload + | ENDBUTTON + | STARTCAPTION of HTML4Utils.tag_payload + | ENDCAPTION + | STARTCENTER of HTML4Utils.tag_payload + | ENDCENTER + | STARTCITE of HTML4Utils.tag_payload + | ENDCITE + | STARTCODE of HTML4Utils.tag_payload + | ENDCODE + | STARTCOL of HTML4Utils.tag_payload + | STARTCOLGROUP of HTML4Utils.tag_payload + | ENDCOLGROUP + | STARTDD of HTML4Utils.tag_payload + | ENDDD + | STARTDEL of HTML4Utils.tag_payload + | ENDDEL + | STARTDFN of HTML4Utils.tag_payload + | ENDDFN + | STARTDIR of HTML4Utils.tag_payload + | ENDDIR + | STARTDIV of HTML4Utils.tag_payload + | ENDDIV + | STARTDL of HTML4Utils.tag_payload + | ENDDL + | STARTDT of HTML4Utils.tag_payload + | ENDDT + | STARTEM of HTML4Utils.tag_payload + | ENDEM + | STARTFIELDSET of HTML4Utils.tag_payload + | ENDFIELDSET + | STARTFONT of HTML4Utils.tag_payload + | ENDFONT + | STARTFORM of HTML4Utils.tag_payload + | ENDFORM + | STARTFRAME of HTML4Utils.tag_payload + | STARTFRAMESET of HTML4Utils.tag_payload + | ENDFRAMESET + | STARTH1 of HTML4Utils.tag_payload + | ENDH1 + | STARTH2 of HTML4Utils.tag_payload + | ENDH2 + | STARTH3 of HTML4Utils.tag_payload + | ENDH3 + | STARTH4 of HTML4Utils.tag_payload + | ENDH4 + | STARTH5 of HTML4Utils.tag_payload + | ENDH5 + | STARTH6 of HTML4Utils.tag_payload + | ENDH6 + | STARTHEAD of HTML4Utils.tag_payload + | ENDHEAD + | STARTHR of HTML4Utils.tag_payload + | STARTHTML of HTML4Utils.tag_payload + | ENDHTML + | STARTI of HTML4Utils.tag_payload + | ENDI + | STARTIFRAME of HTML4Utils.tag_payload + | ENDIFRAME + | STARTIMG of HTML4Utils.tag_payload + | STARTINPUT of HTML4Utils.tag_payload + | STARTINS of HTML4Utils.tag_payload + | ENDINS + | STARTISINDEX of HTML4Utils.tag_payload + | STARTKBD of HTML4Utils.tag_payload + | ENDKBD + | STARTLABEL of HTML4Utils.tag_payload + | ENDLABEL + | STARTLEGEND of HTML4Utils.tag_payload + | ENDLEGEND + | STARTLI of HTML4Utils.tag_payload + | ENDLI + | STARTLINK of HTML4Utils.tag_payload + | STARTMAP of HTML4Utils.tag_payload + | ENDMAP + | STARTMENU of HTML4Utils.tag_payload + | ENDMENU + | STARTMETA of HTML4Utils.tag_payload + | STARTNOFRAMES of HTML4Utils.tag_payload + | ENDNOFRAMES + | STARTNOSCRIPT of HTML4Utils.tag_payload + | ENDNOSCRIPT + | STARTOBJECT of HTML4Utils.tag_payload + | ENDOBJECT + | STARTOL of HTML4Utils.tag_payload + | ENDOL + | STARTOPTGROUP of HTML4Utils.tag_payload + | ENDOPTGROUP + | STARTOPTION of HTML4Utils.tag_payload + | ENDOPTION + | STARTP of HTML4Utils.tag_payload + | ENDP + | STARTPARAM of HTML4Utils.tag_payload + | STARTPRE of HTML4Utils.tag_payload + | ENDPRE + | STARTQ of HTML4Utils.tag_payload + | ENDQ + | STARTS of HTML4Utils.tag_payload + | ENDS + | STARTSAMP of HTML4Utils.tag_payload + | ENDSAMP + | STARTSCRIPT of HTML4Utils.tag_payload + | ENDSCRIPT + | STARTSELECT of HTML4Utils.tag_payload + | ENDSELECT + | STARTSMALL of HTML4Utils.tag_payload + | ENDSMALL + | STARTSPAN of HTML4Utils.tag_payload + | ENDSPAN + | STARTSTRIKE of HTML4Utils.tag_payload + | ENDSTRIKE + | STARTSTRONG of HTML4Utils.tag_payload + | ENDSTRONG + | STARTSTYLE of HTML4Utils.tag_payload + | ENDSTYLE + | STARTSUB of HTML4Utils.tag_payload + | ENDSUB + | STARTSUP of HTML4Utils.tag_payload + | ENDSUP + | STARTTABLE of HTML4Utils.tag_payload + | ENDTABLE + | STARTTBODY of HTML4Utils.tag_payload + | ENDTBODY + | STARTTD of HTML4Utils.tag_payload + | ENDTD + | STARTTEXTAREA of HTML4Utils.tag_payload + | ENDTEXTAREA + | STARTTFOOT of HTML4Utils.tag_payload + | ENDTFOOT + | STARTTH of HTML4Utils.tag_payload + | ENDTH + | STARTTHEAD of HTML4Utils.tag_payload + | ENDTHEAD + | STARTTITLE of HTML4Utils.tag_payload + | ENDTITLE + | STARTTR of HTML4Utils.tag_payload + | ENDTR + | STARTTT of HTML4Utils.tag_payload + | ENDTT + | STARTU of HTML4Utils.tag_payload + | ENDU + | STARTUL of HTML4Utils.tag_payload + | ENDUL + | STARTVAR of HTML4Utils.tag_payload + | ENDVAR + | EOF + val allToks = [ + ENDA, ENDABBR, ENDACRONYM, ENDADDRESS, ENDAPPLET, ENDB, ENDBDO, ENDBIG, ENDBLOCKQUOTE, ENDBODY, ENDBUTTON, ENDCAPTION, ENDCENTER, ENDCITE, ENDCODE, ENDCOLGROUP, ENDDD, ENDDEL, ENDDFN, ENDDIR, ENDDIV, ENDDL, ENDDT, ENDEM, ENDFIELDSET, ENDFONT, ENDFORM, ENDFRAMESET, ENDH1, ENDH2, ENDH3, ENDH4, ENDH5, ENDH6, ENDHEAD, ENDHTML, ENDI, ENDIFRAME, ENDINS, ENDKBD, ENDLABEL, ENDLEGEND, ENDLI, ENDMAP, ENDMENU, ENDNOFRAMES, ENDNOSCRIPT, ENDOBJECT, ENDOL, ENDOPTGROUP, ENDOPTION, ENDP, ENDPRE, ENDQ, ENDS, ENDSAMP, ENDSCRIPT, ENDSELECT, ENDSMALL, ENDSPAN, ENDSTRIKE, ENDSTRONG, ENDSTYLE, ENDSUB, ENDSUP, ENDTABLE, ENDTBODY, ENDTD, ENDTEXTAREA, ENDTFOOT, ENDTH, ENDTHEAD, ENDTITLE, ENDTR, ENDTT, ENDU, ENDUL, ENDVAR, EOF + ] + fun toString tok = +(case (tok) + of (OPENTAG(_)) => "OPENTAG" + | (CLOSETAG(_)) => "CLOSETAG" + | (COMMENT(_)) => "COMMENT" + | (PCDATA(_)) => "PCDATA" + | (DOCTYPE(_)) => "DOCTYPE" + | (CHAR_REF(_)) => "CHAR_REF" + | (ENTITY_REF(_)) => "ENTITY_REF" + | (XML_PROCESSING(_)) => "XML_PROCESSING" + | (STARTA(_)) => "STARTA" + | (ENDA) => "ENDA" + | (STARTABBR(_)) => "STARTABBR" + | (ENDABBR) => "ENDABBR" + | (STARTACRONYM(_)) => "STARTACRONYM" + | (ENDACRONYM) => "ENDACRONYM" + | (STARTADDRESS(_)) => "STARTADDRESS" + | (ENDADDRESS) => "ENDADDRESS" + | (STARTAPPLET(_)) => "STARTAPPLET" + | (ENDAPPLET) => "ENDAPPLET" + | (STARTAREA(_)) => "STARTAREA" + | (STARTB(_)) => "STARTB" + | (ENDB) => "ENDB" + | (STARTBASE(_)) => "STARTBASE" + | (STARTBASEFONT(_)) => "STARTBASEFONT" + | (STARTBDO(_)) => "STARTBDO" + | (ENDBDO) => "ENDBDO" + | (STARTBIG(_)) => "STARTBIG" + | (ENDBIG) => "ENDBIG" + | (STARTBLOCKQUOTE(_)) => "STARTBLOCKQUOTE" + | (ENDBLOCKQUOTE) => "ENDBLOCKQUOTE" + | (STARTBODY(_)) => "STARTBODY" + | (ENDBODY) => "ENDBODY" + | (STARTBR(_)) => "STARTBR" + | (STARTBUTTON(_)) => "STARTBUTTON" + | (ENDBUTTON) => "ENDBUTTON" + | (STARTCAPTION(_)) => "STARTCAPTION" + | (ENDCAPTION) => "ENDCAPTION" + | (STARTCENTER(_)) => "STARTCENTER" + | (ENDCENTER) => "ENDCENTER" + | (STARTCITE(_)) => "STARTCITE" + | (ENDCITE) => "ENDCITE" + | (STARTCODE(_)) => "STARTCODE" + | (ENDCODE) => "ENDCODE" + | (STARTCOL(_)) => "STARTCOL" + | (STARTCOLGROUP(_)) => "STARTCOLGROUP" + | (ENDCOLGROUP) => "ENDCOLGROUP" + | (STARTDD(_)) => "STARTDD" + | (ENDDD) => "ENDDD" + | (STARTDEL(_)) => "STARTDEL" + | (ENDDEL) => "ENDDEL" + | (STARTDFN(_)) => "STARTDFN" + | (ENDDFN) => "ENDDFN" + | (STARTDIR(_)) => "STARTDIR" + | (ENDDIR) => "ENDDIR" + | (STARTDIV(_)) => "STARTDIV" + | (ENDDIV) => "ENDDIV" + | (STARTDL(_)) => "STARTDL" + | (ENDDL) => "ENDDL" + | (STARTDT(_)) => "STARTDT" + | (ENDDT) => "ENDDT" + | (STARTEM(_)) => "STARTEM" + | (ENDEM) => "ENDEM" + | (STARTFIELDSET(_)) => "STARTFIELDSET" + | (ENDFIELDSET) => "ENDFIELDSET" + | (STARTFONT(_)) => "STARTFONT" + | (ENDFONT) => "ENDFONT" + | (STARTFORM(_)) => "STARTFORM" + | (ENDFORM) => "ENDFORM" + | (STARTFRAME(_)) => "STARTFRAME" + | (STARTFRAMESET(_)) => "STARTFRAMESET" + | (ENDFRAMESET) => "ENDFRAMESET" + | (STARTH1(_)) => "STARTH1" + | (ENDH1) => "ENDH1" + | (STARTH2(_)) => "STARTH2" + | (ENDH2) => "ENDH2" + | (STARTH3(_)) => "STARTH3" + | (ENDH3) => "ENDH3" + | (STARTH4(_)) => "STARTH4" + | (ENDH4) => "ENDH4" + | (STARTH5(_)) => "STARTH5" + | (ENDH5) => "ENDH5" + | (STARTH6(_)) => "STARTH6" + | (ENDH6) => "ENDH6" + | (STARTHEAD(_)) => "STARTHEAD" + | (ENDHEAD) => "ENDHEAD" + | (STARTHR(_)) => "STARTHR" + | (STARTHTML(_)) => "STARTHTML" + | (ENDHTML) => "ENDHTML" + | (STARTI(_)) => "STARTI" + | (ENDI) => "ENDI" + | (STARTIFRAME(_)) => "STARTIFRAME" + | (ENDIFRAME) => "ENDIFRAME" + | (STARTIMG(_)) => "STARTIMG" + | (STARTINPUT(_)) => "STARTINPUT" + | (STARTINS(_)) => "STARTINS" + | (ENDINS) => "ENDINS" + | (STARTISINDEX(_)) => "STARTISINDEX" + | (STARTKBD(_)) => "STARTKBD" + | (ENDKBD) => "ENDKBD" + | (STARTLABEL(_)) => "STARTLABEL" + | (ENDLABEL) => "ENDLABEL" + | (STARTLEGEND(_)) => "STARTLEGEND" + | (ENDLEGEND) => "ENDLEGEND" + | (STARTLI(_)) => "STARTLI" + | (ENDLI) => "ENDLI" + | (STARTLINK(_)) => "STARTLINK" + | (STARTMAP(_)) => "STARTMAP" + | (ENDMAP) => "ENDMAP" + | (STARTMENU(_)) => "STARTMENU" + | (ENDMENU) => "ENDMENU" + | (STARTMETA(_)) => "STARTMETA" + | (STARTNOFRAMES(_)) => "STARTNOFRAMES" + | (ENDNOFRAMES) => "ENDNOFRAMES" + | (STARTNOSCRIPT(_)) => "STARTNOSCRIPT" + | (ENDNOSCRIPT) => "ENDNOSCRIPT" + | (STARTOBJECT(_)) => "STARTOBJECT" + | (ENDOBJECT) => "ENDOBJECT" + | (STARTOL(_)) => "STARTOL" + | (ENDOL) => "ENDOL" + | (STARTOPTGROUP(_)) => "STARTOPTGROUP" + | (ENDOPTGROUP) => "ENDOPTGROUP" + | (STARTOPTION(_)) => "STARTOPTION" + | (ENDOPTION) => "ENDOPTION" + | (STARTP(_)) => "STARTP" + | (ENDP) => "ENDP" + | (STARTPARAM(_)) => "STARTPARAM" + | (STARTPRE(_)) => "STARTPRE" + | (ENDPRE) => "ENDPRE" + | (STARTQ(_)) => "STARTQ" + | (ENDQ) => "ENDQ" + | (STARTS(_)) => "STARTS" + | (ENDS) => "ENDS" + | (STARTSAMP(_)) => "STARTSAMP" + | (ENDSAMP) => "ENDSAMP" + | (STARTSCRIPT(_)) => "STARTSCRIPT" + | (ENDSCRIPT) => "ENDSCRIPT" + | (STARTSELECT(_)) => "STARTSELECT" + | (ENDSELECT) => "ENDSELECT" + | (STARTSMALL(_)) => "STARTSMALL" + | (ENDSMALL) => "ENDSMALL" + | (STARTSPAN(_)) => "STARTSPAN" + | (ENDSPAN) => "ENDSPAN" + | (STARTSTRIKE(_)) => "STARTSTRIKE" + | (ENDSTRIKE) => "ENDSTRIKE" + | (STARTSTRONG(_)) => "STARTSTRONG" + | (ENDSTRONG) => "ENDSTRONG" + | (STARTSTYLE(_)) => "STARTSTYLE" + | (ENDSTYLE) => "ENDSTYLE" + | (STARTSUB(_)) => "STARTSUB" + | (ENDSUB) => "ENDSUB" + | (STARTSUP(_)) => "STARTSUP" + | (ENDSUP) => "ENDSUP" + | (STARTTABLE(_)) => "STARTTABLE" + | (ENDTABLE) => "ENDTABLE" + | (STARTTBODY(_)) => "STARTTBODY" + | (ENDTBODY) => "ENDTBODY" + | (STARTTD(_)) => "STARTTD" + | (ENDTD) => "ENDTD" + | (STARTTEXTAREA(_)) => "STARTTEXTAREA" + | (ENDTEXTAREA) => "ENDTEXTAREA" + | (STARTTFOOT(_)) => "STARTTFOOT" + | (ENDTFOOT) => "ENDTFOOT" + | (STARTTH(_)) => "STARTTH" + | (ENDTH) => "ENDTH" + | (STARTTHEAD(_)) => "STARTTHEAD" + | (ENDTHEAD) => "ENDTHEAD" + | (STARTTITLE(_)) => "STARTTITLE" + | (ENDTITLE) => "ENDTITLE" + | (STARTTR(_)) => "STARTTR" + | (ENDTR) => "ENDTR" + | (STARTTT(_)) => "STARTTT" + | (ENDTT) => "ENDTT" + | (STARTU(_)) => "STARTU" + | (ENDU) => "ENDU" + | (STARTUL(_)) => "STARTUL" + | (ENDUL) => "ENDUL" + | (STARTVAR(_)) => "STARTVAR" + | (ENDVAR) => "ENDVAR" + | (EOF) => "EOF" +(* end case *)) + fun isKW tok = +(case (tok) + of (OPENTAG(_)) => false + | (CLOSETAG(_)) => false + | (COMMENT(_)) => false + | (PCDATA(_)) => false + | (DOCTYPE(_)) => false + | (CHAR_REF(_)) => false + | (ENTITY_REF(_)) => false + | (XML_PROCESSING(_)) => false + | (STARTA(_)) => false + | (ENDA) => false + | (STARTABBR(_)) => false + | (ENDABBR) => false + | (STARTACRONYM(_)) => false + | (ENDACRONYM) => false + | (STARTADDRESS(_)) => false + | (ENDADDRESS) => false + | (STARTAPPLET(_)) => false + | (ENDAPPLET) => false + | (STARTAREA(_)) => false + | (STARTB(_)) => false + | (ENDB) => false + | (STARTBASE(_)) => false + | (STARTBASEFONT(_)) => false + | (STARTBDO(_)) => false + | (ENDBDO) => false + | (STARTBIG(_)) => false + | (ENDBIG) => false + | (STARTBLOCKQUOTE(_)) => false + | (ENDBLOCKQUOTE) => false + | (STARTBODY(_)) => false + | (ENDBODY) => false + | (STARTBR(_)) => false + | (STARTBUTTON(_)) => false + | (ENDBUTTON) => false + | (STARTCAPTION(_)) => false + | (ENDCAPTION) => false + | (STARTCENTER(_)) => false + | (ENDCENTER) => false + | (STARTCITE(_)) => false + | (ENDCITE) => false + | (STARTCODE(_)) => false + | (ENDCODE) => false + | (STARTCOL(_)) => false + | (STARTCOLGROUP(_)) => false + | (ENDCOLGROUP) => false + | (STARTDD(_)) => false + | (ENDDD) => false + | (STARTDEL(_)) => false + | (ENDDEL) => false + | (STARTDFN(_)) => false + | (ENDDFN) => false + | (STARTDIR(_)) => false + | (ENDDIR) => false + | (STARTDIV(_)) => false + | (ENDDIV) => false + | (STARTDL(_)) => false + | (ENDDL) => false + | (STARTDT(_)) => false + | (ENDDT) => false + | (STARTEM(_)) => false + | (ENDEM) => false + | (STARTFIELDSET(_)) => false + | (ENDFIELDSET) => false + | (STARTFONT(_)) => false + | (ENDFONT) => false + | (STARTFORM(_)) => false + | (ENDFORM) => false + | (STARTFRAME(_)) => false + | (STARTFRAMESET(_)) => false + | (ENDFRAMESET) => false + | (STARTH1(_)) => false + | (ENDH1) => false + | (STARTH2(_)) => false + | (ENDH2) => false + | (STARTH3(_)) => false + | (ENDH3) => false + | (STARTH4(_)) => false + | (ENDH4) => false + | (STARTH5(_)) => false + | (ENDH5) => false + | (STARTH6(_)) => false + | (ENDH6) => false + | (STARTHEAD(_)) => false + | (ENDHEAD) => false + | (STARTHR(_)) => false + | (STARTHTML(_)) => false + | (ENDHTML) => false + | (STARTI(_)) => false + | (ENDI) => false + | (STARTIFRAME(_)) => false + | (ENDIFRAME) => false + | (STARTIMG(_)) => false + | (STARTINPUT(_)) => false + | (STARTINS(_)) => false + | (ENDINS) => false + | (STARTISINDEX(_)) => false + | (STARTKBD(_)) => false + | (ENDKBD) => false + | (STARTLABEL(_)) => false + | (ENDLABEL) => false + | (STARTLEGEND(_)) => false + | (ENDLEGEND) => false + | (STARTLI(_)) => false + | (ENDLI) => false + | (STARTLINK(_)) => false + | (STARTMAP(_)) => false + | (ENDMAP) => false + | (STARTMENU(_)) => false + | (ENDMENU) => false + | (STARTMETA(_)) => false + | (STARTNOFRAMES(_)) => false + | (ENDNOFRAMES) => false + | (STARTNOSCRIPT(_)) => false + | (ENDNOSCRIPT) => false + | (STARTOBJECT(_)) => false + | (ENDOBJECT) => false + | (STARTOL(_)) => false + | (ENDOL) => false + | (STARTOPTGROUP(_)) => false + | (ENDOPTGROUP) => false + | (STARTOPTION(_)) => false + | (ENDOPTION) => false + | (STARTP(_)) => false + | (ENDP) => false + | (STARTPARAM(_)) => false + | (STARTPRE(_)) => false + | (ENDPRE) => false + | (STARTQ(_)) => false + | (ENDQ) => false + | (STARTS(_)) => false + | (ENDS) => false + | (STARTSAMP(_)) => false + | (ENDSAMP) => false + | (STARTSCRIPT(_)) => false + | (ENDSCRIPT) => false + | (STARTSELECT(_)) => false + | (ENDSELECT) => false + | (STARTSMALL(_)) => false + | (ENDSMALL) => false + | (STARTSPAN(_)) => false + | (ENDSPAN) => false + | (STARTSTRIKE(_)) => false + | (ENDSTRIKE) => false + | (STARTSTRONG(_)) => false + | (ENDSTRONG) => false + | (STARTSTYLE(_)) => false + | (ENDSTYLE) => false + | (STARTSUB(_)) => false + | (ENDSUB) => false + | (STARTSUP(_)) => false + | (ENDSUP) => false + | (STARTTABLE(_)) => false + | (ENDTABLE) => false + | (STARTTBODY(_)) => false + | (ENDTBODY) => false + | (STARTTD(_)) => false + | (ENDTD) => false + | (STARTTEXTAREA(_)) => false + | (ENDTEXTAREA) => false + | (STARTTFOOT(_)) => false + | (ENDTFOOT) => false + | (STARTTH(_)) => false + | (ENDTH) => false + | (STARTTHEAD(_)) => false + | (ENDTHEAD) => false + | (STARTTITLE(_)) => false + | (ENDTITLE) => false + | (STARTTR(_)) => false + | (ENDTR) => false + | (STARTTT(_)) => false + | (ENDTT) => false + | (STARTU(_)) => false + | (ENDU) => false + | (STARTUL(_)) => false + | (ENDUL) => false + | (STARTVAR(_)) => false + | (ENDVAR) => false + | (EOF) => false +(* end case *)) + fun isEOF EOF = true + | isEOF _ = false + end (* HTML4Tokens *) + +functor HTML4ParseFn (Lex : ANTLR_LEXER) = struct + + local + structure Tok = +HTML4Tokens + structure UserCode = + struct + + +open HTML4Utils + +fun optToList NONE = [] + | optToList (SOME thing) = [thing] + +fun optListToList NONE = [] + | optListToList (SOME thing) = thing + + +fun document_PROD_1_SUBRULE_1_PROD_1_ACT (DOCTYPE, cdata_opt, DOCTYPE_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ((Lf (Tok.DOCTYPE DOCTYPE)) :: cdata_opt) +fun document_PROD_1_SUBRULE_2_PROD_1_ACT (SR1, cdata_opt, STARTHTML, SR1_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), STARTHTML_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ((Lf (Tok.STARTHTML STARTHTML)) :: cdata_opt) +fun document_PROD_1_SUBRULE_4_PROD_1_ACT (head, SR1, SR2, SR3, cdata_opt, ENDHTML, head_SPAN : (Lex.pos * Lex.pos), SR1_SPAN : (Lex.pos * Lex.pos), SR2_SPAN : (Lex.pos * Lex.pos), SR3_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), ENDHTML_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ((Lf (Tok.ENDHTML)) :: cdata_opt) +fun document_PROD_1_ACT (head, SR1, SR2, SR3, SR4, cdata_opt, head_SPAN : (Lex.pos * Lex.pos), SR1_SPAN : (Lex.pos * Lex.pos), SR2_SPAN : (Lex.pos * Lex.pos), SR3_SPAN : (Lex.pos * Lex.pos), SR4_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "DOCUMENT", + cdata_opt @ (optListToList SR1) @ (optListToList SR2) @ + (head :: SR3 :: (optListToList SR4)))) +fun head_PROD_1_SUBRULE_1_PROD_1_ACT (cdata_opt, STARTHEAD, cdata_opt_SPAN : (Lex.pos * Lex.pos), STARTHEAD_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ((Lf (Tok.STARTHEAD STARTHEAD)) :: cdata_opt) +fun head_PROD_1_SUBRULE_2_PROD_1_ACT (SR1, cdata_opt, head_content, SR1_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), head_content_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (head_content :: cdata_opt) +fun head_PROD_1_SUBRULE_3_PROD_1_ACT (SR1, SR2, cdata_opt, ENDHEAD, SR1_SPAN : (Lex.pos * Lex.pos), SR2_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), ENDHEAD_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ((Lf (Tok.ENDHEAD)) :: cdata_opt) +fun head_PROD_1_ACT (SR1, SR2, SR3, SR1_SPAN : (Lex.pos * Lex.pos), SR2_SPAN : (Lex.pos * Lex.pos), SR3_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "HEAD", + (optListToList SR1) @ (foldr op@ [] SR2) @ (optListToList SR3))) +fun title_PROD_1_ACT (STARTTITLE, cdata_opt, ENDTITLE, STARTTITLE_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), ENDTITLE_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "TITLE", + (Lf (Tok.STARTTITLE STARTTITLE)) :: + (cdata_opt @ [Lf (Tok.ENDTITLE)]))) +fun base_PROD_1_ACT (STARTBASE, STARTBASE_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "BASE", [Lf (Tok.STARTBASE STARTBASE)])) +fun script_PROD_1_ACT (cdata_opt, STARTSCRIPT, ENDSCRIPT, cdata_opt_SPAN : (Lex.pos * Lex.pos), STARTSCRIPT_SPAN : (Lex.pos * Lex.pos), ENDSCRIPT_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "SCRIPT", + (Lf (Tok.STARTSCRIPT STARTSCRIPT)) :: + (cdata_opt @ [Lf (Tok.ENDSCRIPT)]))) +fun style_PROD_1_ACT (cdata_opt, STARTSTYLE, ENDSTYLE, cdata_opt_SPAN : (Lex.pos * Lex.pos), STARTSTYLE_SPAN : (Lex.pos * Lex.pos), ENDSTYLE_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "STYLE", + (Lf (Tok.STARTSTYLE STARTSTYLE)) :: + (cdata_opt @ [Lf (Tok.ENDSTYLE)]))) +fun meta_PROD_1_ACT (STARTMETA, STARTMETA_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "META", [Lf (Tok.STARTMETA STARTMETA)])) +fun link_PROD_1_ACT (STARTLINK, STARTLINK_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "LINK", [Lf (Tok.STARTLINK STARTLINK)])) +fun object_PROD_1_ACT (SR, STARTOBJECT, ENDOBJECT, SR_SPAN : (Lex.pos * Lex.pos), STARTOBJECT_SPAN : (Lex.pos * Lex.pos), ENDOBJECT_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "OBJECT", + (Lf (Tok.STARTOBJECT STARTOBJECT)) :: + (SR @ [Lf (Tok.ENDOBJECT)]))) +fun param_PROD_1_ACT (STARTPARAM, STARTPARAM_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "PARAM", [(Lf (Tok.STARTPARAM STARTPARAM))])) +fun body_PROD_1_ACT (STARTBODY, body_rest, STARTBODY_SPAN : (Lex.pos * Lex.pos), body_rest_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "BODY", + (Lf (Tok.STARTBODY STARTBODY)) :: body_rest)) +fun body_PROD_2_ACT (SR, body_rest, SR_SPAN : (Lex.pos * Lex.pos), body_rest_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "BODY", SR :: body_rest)) +fun body_rest_PROD_1_SUBRULE_2_PROD_1_ACT (ENDBODY, SR1, cdata_opt, ENDBODY_SPAN : (Lex.pos * Lex.pos), SR1_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ((Lf (Tok.ENDBODY)) :: cdata_opt) +fun body_rest_PROD_1_ACT (SR1, SR2, SR1_SPAN : (Lex.pos * Lex.pos), SR2_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (SR1 @ (optListToList SR2)) +fun a_PROD_1_ACT (inline, STARTA, ENDA, inline_SPAN : (Lex.pos * Lex.pos), STARTA_SPAN : (Lex.pos * Lex.pos), ENDA_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "A", + (Lf (Tok.STARTA STARTA)) :: (inline @ [Lf (Tok.ENDA)]))) +fun abbr_PROD_1_ACT (inline, ENDABBR, STARTABBR, inline_SPAN : (Lex.pos * Lex.pos), ENDABBR_SPAN : (Lex.pos * Lex.pos), STARTABBR_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "ABBR", + (Lf (Tok.STARTABBR STARTABBR)) :: + (inline @ [Lf (Tok.ENDABBR)]))) +fun acronym_PROD_1_ACT (inline, STARTACRONYM, ENDACRONYM, inline_SPAN : (Lex.pos * Lex.pos), STARTACRONYM_SPAN : (Lex.pos * Lex.pos), ENDACRONYM_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "ACRONYM", + (Lf (Tok.STARTACRONYM STARTACRONYM)) :: + (inline @ [Lf (Tok.ENDACRONYM)]))) +fun address_PROD_1_ACT (inline, STARTADDRESS, ENDADDRESS, inline_SPAN : (Lex.pos * Lex.pos), STARTADDRESS_SPAN : (Lex.pos * Lex.pos), ENDADDRESS_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "ADDRESS", + (Lf (Tok.STARTADDRESS STARTADDRESS)) :: + (inline @ [Lf (Tok.ENDADDRESS)]))) +fun applet_PROD_1_ACT (SR, ENDAPPLET, STARTAPPLET, SR_SPAN : (Lex.pos * Lex.pos), ENDAPPLET_SPAN : (Lex.pos * Lex.pos), STARTAPPLET_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "APPLET", + (Lf (Tok.STARTAPPLET STARTAPPLET)) :: + (SR @ [Lf (Tok.ENDAPPLET)]))) +fun area_PROD_1_ACT (STARTAREA, STARTAREA_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "AREA", [Lf (Tok.STARTAREA STARTAREA)])) +fun b_PROD_1_ACT (inline, STARTB, ENDB, inline_SPAN : (Lex.pos * Lex.pos), STARTB_SPAN : (Lex.pos * Lex.pos), ENDB_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "B", + (Lf (Tok.STARTB STARTB)) :: (inline @ [Lf (Tok.ENDB)]))) +fun basefont_PROD_1_ACT (STARTBASEFONT, STARTBASEFONT_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "BASEFONT", [Lf (Tok.STARTBASEFONT STARTBASEFONT)])) +fun bdo_PROD_1_ACT (inline, ENDBDO, STARTBDO, inline_SPAN : (Lex.pos * Lex.pos), ENDBDO_SPAN : (Lex.pos * Lex.pos), STARTBDO_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "BDO", + (Lf (Tok.STARTBDO STARTBDO)) :: + (inline @ [Lf (Tok.ENDBDO)]))) +fun big_PROD_1_ACT (inline, ENDBIG, STARTBIG, inline_SPAN : (Lex.pos * Lex.pos), ENDBIG_SPAN : (Lex.pos * Lex.pos), STARTBIG_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "BIG", + (Lf (Tok.STARTBIG STARTBIG)) :: + (inline @ [Lf (Tok.ENDBIG)]))) +fun blockquote_PROD_1_ACT (SR, STARTBLOCKQUOTE, ENDBLOCKQUOTE, SR_SPAN : (Lex.pos * Lex.pos), STARTBLOCKQUOTE_SPAN : (Lex.pos * Lex.pos), ENDBLOCKQUOTE_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "BLOCKQUOTE", + (Lf (Tok.STARTBLOCKQUOTE STARTBLOCKQUOTE)) :: + (SR @ [Lf (Tok.ENDBLOCKQUOTE)]))) +fun br_PROD_1_ACT (STARTBR, STARTBR_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "BR", [Lf (Tok.STARTBR STARTBR)])) +fun button_PROD_1_ACT (STARTBUTTON, flow, ENDBUTTON, STARTBUTTON_SPAN : (Lex.pos * Lex.pos), flow_SPAN : (Lex.pos * Lex.pos), ENDBUTTON_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "BUTTON", + (Lf (Tok.STARTBUTTON STARTBUTTON)) :: + (flow @ [Lf (Tok.ENDBUTTON)]))) +fun caption_PROD_1_ACT (inline, STARTCAPTION, ENDCAPTION, inline_SPAN : (Lex.pos * Lex.pos), STARTCAPTION_SPAN : (Lex.pos * Lex.pos), ENDCAPTION_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "CAPTION", + (Lf (Tok.STARTCAPTION STARTCAPTION)) :: + (inline @ [Lf (Tok.ENDCAPTION)]))) +fun center_PROD_1_ACT (ENDCENTER, flow, STARTCENTER, ENDCENTER_SPAN : (Lex.pos * Lex.pos), flow_SPAN : (Lex.pos * Lex.pos), STARTCENTER_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "CENTER", + (Lf (Tok.STARTCENTER STARTCENTER)) :: + (flow @ [Lf (Tok.ENDCENTER)]))) +fun cite_PROD_1_ACT (inline, ENDCITE, STARTCITE, inline_SPAN : (Lex.pos * Lex.pos), ENDCITE_SPAN : (Lex.pos * Lex.pos), STARTCITE_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "CITE", + (Lf (Tok.STARTCITE STARTCITE)) :: + (inline @ [Lf (Tok.ENDCITE)]))) +fun code_PROD_1_ACT (inline, ENDCODE, STARTCODE, inline_SPAN : (Lex.pos * Lex.pos), ENDCODE_SPAN : (Lex.pos * Lex.pos), STARTCODE_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "CODE", + (Lf (Tok.STARTCODE STARTCODE)) :: + (inline @ [Lf (Tok.ENDCODE)]))) +fun col_PROD_1_ACT (STARTCOL, STARTCOL_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "COL", [Lf (Tok.STARTCOL STARTCOL)])) +fun colgroup_PROD_1_SUBRULE_1_PROD_1_ACT (STARTCOLGROUP, cdata_opt, col, STARTCOLGROUP_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), col_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (col :: cdata_opt) +fun colgroup_PROD_1_SUBRULE_2_PROD_1_ACT (STARTCOLGROUP, SR1, cdata_opt, ENDCOLGROUP, STARTCOLGROUP_SPAN : (Lex.pos * Lex.pos), SR1_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), ENDCOLGROUP_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Lf (Tok.ENDCOLGROUP)) +fun colgroup_PROD_1_ACT (STARTCOLGROUP, SR1, SR2, cdata_opt, STARTCOLGROUP_SPAN : (Lex.pos * Lex.pos), SR1_SPAN : (Lex.pos * Lex.pos), SR2_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "COLGROUP", + (Lf (Tok.STARTCOLGROUP STARTCOLGROUP)) :: + (cdata_opt @ (foldr op@ [] SR1) @ (optToList SR2)))) +fun dd_PROD_1_SUBRULE_2_PROD_1_ACT (ENDDD, STARTDD, flow, ENDDD_SPAN : (Lex.pos * Lex.pos), STARTDD_SPAN : (Lex.pos * Lex.pos), flow_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Lf (Tok.ENDDD)) +fun dd_PROD_1_ACT (SR, STARTDD, flow, SR_SPAN : (Lex.pos * Lex.pos), STARTDD_SPAN : (Lex.pos * Lex.pos), flow_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "DD", + (Lf (Tok.STARTDD STARTDD)) :: (flow @ (optToList SR)))) +fun del_PROD_1_ACT (flow, ENDDEL, STARTDEL, flow_SPAN : (Lex.pos * Lex.pos), ENDDEL_SPAN : (Lex.pos * Lex.pos), STARTDEL_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "DEL", + (Lf (Tok.STARTDEL STARTDEL)) :: + (flow @ [Lf (Tok.ENDDEL)]))) +fun dfn_PROD_1_ACT (inline, ENDDFN, STARTDFN, inline_SPAN : (Lex.pos * Lex.pos), ENDDFN_SPAN : (Lex.pos * Lex.pos), STARTDFN_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "DFN", + (Lf (Tok.STARTDFN STARTDFN)) :: + (inline @ [Lf (Tok.ENDDFN)]))) +fun dir_PROD_1_ACT (li, cdata_opt, ENDDIR, STARTDIR, li_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), ENDDIR_SPAN : (Lex.pos * Lex.pos), STARTDIR_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "DIR", + (Lf (Tok.STARTDIR STARTDIR)) :: + (cdata_opt @ li @ [Lf (Tok.ENDDIR)]))) +fun div_PROD_1_ACT (flow, ENDDIV, STARTDIV, flow_SPAN : (Lex.pos * Lex.pos), ENDDIV_SPAN : (Lex.pos * Lex.pos), STARTDIV_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "DIV", + (Lf (Tok.STARTDIV STARTDIV)) :: + (flow @ [Lf (Tok.ENDDIV)]))) +fun dl_PROD_1_ACT (ENDDL, SR, cdata_opt, STARTDL, ENDDL_SPAN : (Lex.pos * Lex.pos), SR_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), STARTDL_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "DL", + (Lf (Tok.STARTDL STARTDL)) :: + (cdata_opt @ SR @ [Lf (Tok.ENDDL)]))) +fun dt_PROD_1_SUBRULE_2_PROD_1_ACT (ENDDT, inline, STARTDT, ENDDT_SPAN : (Lex.pos * Lex.pos), inline_SPAN : (Lex.pos * Lex.pos), STARTDT_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Lf (Tok.ENDDT)) +fun dt_PROD_1_ACT (SR, inline, STARTDT, SR_SPAN : (Lex.pos * Lex.pos), inline_SPAN : (Lex.pos * Lex.pos), STARTDT_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "DT", + (Lf (Tok.STARTDT STARTDT)) :: (inline @ (optToList SR)))) +fun em_PROD_1_ACT (ENDEM, inline, STARTEM, ENDEM_SPAN : (Lex.pos * Lex.pos), inline_SPAN : (Lex.pos * Lex.pos), STARTEM_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "EM", + (Lf (Tok.STARTEM STARTEM)) :: (inline @ [Lf (Tok.ENDEM)]))) +fun fieldset_PROD_1_ACT (cdata_opt, legend, flow, ENDFIELDSET, STARTFIELDSET, cdata_opt_SPAN : (Lex.pos * Lex.pos), legend_SPAN : (Lex.pos * Lex.pos), flow_SPAN : (Lex.pos * Lex.pos), ENDFIELDSET_SPAN : (Lex.pos * Lex.pos), STARTFIELDSET_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "FIELDSET", + (Lf (Tok.STARTFIELDSET STARTFIELDSET)) :: + (cdata_opt @ [legend] @ flow @ + [Lf (Tok.ENDFIELDSET)]))) +fun font_PROD_1_ACT (inline, ENDFONT, STARTFONT, inline_SPAN : (Lex.pos * Lex.pos), ENDFONT_SPAN : (Lex.pos * Lex.pos), STARTFONT_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "FONT", + (Lf (Tok.STARTFONT STARTFONT)) :: + (inline @ [Lf (Tok.ENDFONT)]))) +fun form_PROD_1_ACT (SR, ENDFORM, STARTFORM, SR_SPAN : (Lex.pos * Lex.pos), ENDFORM_SPAN : (Lex.pos * Lex.pos), STARTFORM_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "FORM", + (Lf (Tok.STARTFORM STARTFORM)) :: + (SR @ [Lf (Tok.ENDFORM)]))) +fun frame_PROD_1_ACT (STARTFRAME, STARTFRAME_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "FRAME", [Lf (Tok.STARTFRAME STARTFRAME)])) +fun frameset_PROD_1_SUBRULE_2_PROD_1_ACT (SR1, cdata_opt, STARTFRAMESET, noframes, SR1_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), STARTFRAMESET_SPAN : (Lex.pos * Lex.pos), noframes_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (noframes::cdata_opt) +fun frameset_PROD_1_ACT (SR1, SR2, STARTFRAMESET, ENDFRAMESET, SR1_SPAN : (Lex.pos * Lex.pos), SR2_SPAN : (Lex.pos * Lex.pos), STARTFRAMESET_SPAN : (Lex.pos * Lex.pos), ENDFRAMESET_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "FRAMESET", + (Lf (Tok.STARTFRAMESET STARTFRAMESET)) :: + (SR1 @ (optListToList SR2) @ [Lf (Tok.ENDFRAMESET)]))) +fun h1_PROD_1_ACT (ENDH1, inline, STARTH1, ENDH1_SPAN : (Lex.pos * Lex.pos), inline_SPAN : (Lex.pos * Lex.pos), STARTH1_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "H1", + (Lf (Tok.STARTH1 STARTH1)) :: (inline @ [Lf (Tok.ENDH1)]))) +fun h2_PROD_1_ACT (ENDH2, inline, STARTH2, ENDH2_SPAN : (Lex.pos * Lex.pos), inline_SPAN : (Lex.pos * Lex.pos), STARTH2_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "H2", + (Lf (Tok.STARTH2 STARTH2)) :: (inline @ [Lf (Tok.ENDH2)]))) +fun h3_PROD_1_ACT (ENDH3, inline, STARTH3, ENDH3_SPAN : (Lex.pos * Lex.pos), inline_SPAN : (Lex.pos * Lex.pos), STARTH3_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "H3", + (Lf (Tok.STARTH3 STARTH3)) :: (inline @ [Lf (Tok.ENDH3)]))) +fun h4_PROD_1_ACT (ENDH4, inline, STARTH4, ENDH4_SPAN : (Lex.pos * Lex.pos), inline_SPAN : (Lex.pos * Lex.pos), STARTH4_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "H4", + (Lf (Tok.STARTH4 STARTH4)) :: (inline @ [Lf (Tok.ENDH4)]))) +fun h5_PROD_1_ACT (ENDH5, inline, STARTH5, ENDH5_SPAN : (Lex.pos * Lex.pos), inline_SPAN : (Lex.pos * Lex.pos), STARTH5_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "H5", + (Lf (Tok.STARTH5 STARTH5)) :: (inline @ [Lf (Tok.ENDH5)]))) +fun h6_PROD_1_ACT (ENDH6, inline, STARTH6, ENDH6_SPAN : (Lex.pos * Lex.pos), inline_SPAN : (Lex.pos * Lex.pos), STARTH6_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "H6", + (Lf (Tok.STARTH6 STARTH6)) :: (inline @ [Lf (Tok.ENDH6)]))) +fun hr_PROD_1_ACT (STARTHR, STARTHR_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "HR", [Lf (Tok.STARTHR STARTHR)])) +fun i_PROD_1_ACT (inline, STARTI, ENDI, inline_SPAN : (Lex.pos * Lex.pos), STARTI_SPAN : (Lex.pos * Lex.pos), ENDI_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "I", + (Lf (Tok.STARTI STARTI)) :: + (inline @ [Lf (Tok.ENDI)]))) +fun iframe_PROD_1_ACT (ENDIFRAME, STARTIFRAME, flow, ENDIFRAME_SPAN : (Lex.pos * Lex.pos), STARTIFRAME_SPAN : (Lex.pos * Lex.pos), flow_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "IFRAME", + (Lf (Tok.STARTIFRAME STARTIFRAME)) :: + (flow @ [Lf (Tok.ENDIFRAME)]))) +fun img_PROD_1_ACT (STARTIMG, STARTIMG_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "IMG", [Lf (Tok.STARTIMG STARTIMG)])) +fun input_PROD_1_ACT (STARTINPUT, STARTINPUT_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "INPUT", [Lf (Tok.STARTINPUT STARTINPUT)])) +fun ins_PROD_1_ACT (STARTINS, flow, ENDINS, STARTINS_SPAN : (Lex.pos * Lex.pos), flow_SPAN : (Lex.pos * Lex.pos), ENDINS_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "INS", + (Lf (Tok.STARTINS STARTINS)) :: + (flow @ [Lf (Tok.ENDINS)]))) +fun isindex_PROD_1_ACT (STARTISINDEX, STARTISINDEX_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "ISINDEX", [Lf (Tok.STARTISINDEX STARTISINDEX)])) +fun kbd_PROD_1_ACT (inline, STARTKBD, ENDKBD, inline_SPAN : (Lex.pos * Lex.pos), STARTKBD_SPAN : (Lex.pos * Lex.pos), ENDKBD_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "KBD", + (Lf (Tok.STARTKBD STARTKBD)) :: + (inline @ [Lf (Tok.ENDKBD)]))) +fun label_PROD_1_ACT (inline, ENDLABEL, STARTLABEL, inline_SPAN : (Lex.pos * Lex.pos), ENDLABEL_SPAN : (Lex.pos * Lex.pos), STARTLABEL_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "LABEL", + (Lf (Tok.STARTLABEL STARTLABEL)) :: + (inline @ [Lf (Tok.ENDLABEL)]))) +fun legend_PROD_1_ACT (inline, ENDLEGEND, STARTLEGEND, inline_SPAN : (Lex.pos * Lex.pos), ENDLEGEND_SPAN : (Lex.pos * Lex.pos), STARTLEGEND_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "LEGEND", + (Lf (Tok.STARTLEGEND STARTLEGEND)) :: + (inline @ [Lf (Tok.ENDLEGEND)]))) +fun li_PROD_1_SUBRULE_2_PROD_1_ACT (ENDLI, STARTLI, flow, ENDLI_SPAN : (Lex.pos * Lex.pos), STARTLI_SPAN : (Lex.pos * Lex.pos), flow_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Lf (Tok.ENDLI)) +fun li_PROD_1_ACT (SR, STARTLI, flow, SR_SPAN : (Lex.pos * Lex.pos), STARTLI_SPAN : (Lex.pos * Lex.pos), flow_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "LI", + (Lf (Tok.STARTLI STARTLI)) :: (flow @ (optToList SR)))) +fun map_PROD_1_ACT (SR, STARTMAP, ENDMAP, SR_SPAN : (Lex.pos * Lex.pos), STARTMAP_SPAN : (Lex.pos * Lex.pos), ENDMAP_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "MAP", + (Lf (Tok.STARTMAP STARTMAP)) :: (SR @ [Lf (Tok.ENDMAP)]))) +fun menu_PROD_1_ACT (li, cdata_opt, STARTMENU, ENDMENU, li_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), STARTMENU_SPAN : (Lex.pos * Lex.pos), ENDMENU_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "MENU", + (Lf (Tok.STARTMENU STARTMENU)) :: + (cdata_opt @ li @ [Lf (Tok.ENDMENU)]))) +fun noframes_PROD_1_ACT (STARTNOFRAMES, ENDNOFRAMES, body, STARTNOFRAMES_SPAN : (Lex.pos * Lex.pos), ENDNOFRAMES_SPAN : (Lex.pos * Lex.pos), body_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "NOFRAMES", + [Lf (Tok.STARTNOFRAMES STARTNOFRAMES), body, + Lf (Tok.ENDNOFRAMES)])) +fun noscript_PROD_1_ACT (SR, ENDNOSCRIPT, STARTNOSCRIPT, SR_SPAN : (Lex.pos * Lex.pos), ENDNOSCRIPT_SPAN : (Lex.pos * Lex.pos), STARTNOSCRIPT_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "NOSCRIPT", + (Lf (Tok.STARTNOSCRIPT STARTNOSCRIPT)) :: + (SR @ [Lf (Tok.ENDNOSCRIPT)]))) +fun ol_PROD_1_ACT (ENDOL, li, cdata_opt, STARTOL, ENDOL_SPAN : (Lex.pos * Lex.pos), li_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), STARTOL_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "OL", + (Lf (Tok.STARTOL STARTOL)) :: + (cdata_opt @ li @ [Lf (Tok.ENDOL)]))) +fun optgroup_PROD_1_ACT (option, STARTOPTGROUP, ENDOPTGROUP, cdata_opt1, cdata_opt2, option_SPAN : (Lex.pos * Lex.pos), STARTOPTGROUP_SPAN : (Lex.pos * Lex.pos), ENDOPTGROUP_SPAN : (Lex.pos * Lex.pos), cdata_opt1_SPAN : (Lex.pos * Lex.pos), cdata_opt2_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "OPTGROUP", + (Lf (Tok.STARTOPTGROUP STARTOPTGROUP)) :: + (cdata_opt1 @ option @ ((Lf (Tok.ENDOPTGROUP)) + :: cdata_opt2)))) +fun option_PROD_1_SUBRULE_1_PROD_1_ACT (STARTOPTION, cdata_opt, ENDOPTION, STARTOPTION_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), ENDOPTION_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ((Lf (Tok.ENDOPTION)) :: + cdata_opt) +fun option_PROD_1_ACT (SR, STARTOPTION, cdata_opt, SR_SPAN : (Lex.pos * Lex.pos), STARTOPTION_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "OPTION", + (Lf (Tok.STARTOPTION STARTOPTION)) :: + (cdata_opt @ (optListToList SR)))) +fun p_PROD_1_ACT (inline, STARTP, ENDP, inline_SPAN : (Lex.pos * Lex.pos), STARTP_SPAN : (Lex.pos * Lex.pos), ENDP_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "P", + (Lf (Tok.STARTP STARTP)) :: (inline @ [Lf (Tok.ENDP)]))) +fun pre_PROD_1_ACT (inline, ENDPRE, STARTPRE, inline_SPAN : (Lex.pos * Lex.pos), ENDPRE_SPAN : (Lex.pos * Lex.pos), STARTPRE_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "PRE", + (Lf (Tok.STARTPRE STARTPRE)) :: + (inline @ [Lf (Tok.ENDPRE)]))) +fun q_PROD_1_ACT (inline, STARTQ, ENDQ, inline_SPAN : (Lex.pos * Lex.pos), STARTQ_SPAN : (Lex.pos * Lex.pos), ENDQ_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "Q", + (Lf (Tok.STARTQ STARTQ)) :: (inline @ [Lf (Tok.ENDQ)]))) +fun s_PROD_1_ACT (inline, STARTS, ENDS, inline_SPAN : (Lex.pos * Lex.pos), STARTS_SPAN : (Lex.pos * Lex.pos), ENDS_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "S", + (Lf (Tok.STARTS STARTS)) :: (inline @ [Lf (Tok.ENDS)]))) +fun samp_PROD_1_ACT (inline, STARTSAMP, ENDSAMP, inline_SPAN : (Lex.pos * Lex.pos), STARTSAMP_SPAN : (Lex.pos * Lex.pos), ENDSAMP_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "SAMP", + (Lf (Tok.STARTSAMP STARTSAMP)) :: + (inline @ [Lf (Tok.ENDSAMP)]))) +fun select_PROD_1_ACT (STARTSELECT, SR, cdata_opt, ENDSELECT, STARTSELECT_SPAN : (Lex.pos * Lex.pos), SR_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), ENDSELECT_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "SELECT", + (Lf (Tok.STARTSELECT STARTSELECT)) :: + (cdata_opt @ SR @ [Lf (Tok.ENDSELECT)]))) +fun small_PROD_1_ACT (inline, ENDSMALL, STARTSMALL, inline_SPAN : (Lex.pos * Lex.pos), ENDSMALL_SPAN : (Lex.pos * Lex.pos), STARTSMALL_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "SMALL", + (Lf (Tok.STARTSMALL STARTSMALL)) :: + (inline @ [Lf (Tok.ENDSMALL)]))) +fun span_PROD_1_ACT (inline, STARTSPAN, ENDSPAN, inline_SPAN : (Lex.pos * Lex.pos), STARTSPAN_SPAN : (Lex.pos * Lex.pos), ENDSPAN_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "SPAN", + (Lf (Tok.STARTSPAN STARTSPAN)) :: + (inline @ [Lf (Tok.ENDSPAN)]))) +fun strike_PROD_1_ACT (inline, STARTSTRIKE, ENDSTRIKE, inline_SPAN : (Lex.pos * Lex.pos), STARTSTRIKE_SPAN : (Lex.pos * Lex.pos), ENDSTRIKE_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "STRIKE", + (Lf (Tok.STARTSTRIKE STARTSTRIKE)) :: + (inline @ [Lf (Tok.ENDSTRIKE)]))) +fun strong_PROD_1_ACT (inline, STARTSTRONG, ENDSTRONG, inline_SPAN : (Lex.pos * Lex.pos), STARTSTRONG_SPAN : (Lex.pos * Lex.pos), ENDSTRONG_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "STRONG", + (Lf (Tok.STARTSTRONG STARTSTRONG)) :: + (inline @ [Lf (Tok.ENDSTRONG)]))) +fun sub_PROD_1_ACT (inline, ENDSUB, STARTSUB, inline_SPAN : (Lex.pos * Lex.pos), ENDSUB_SPAN : (Lex.pos * Lex.pos), STARTSUB_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "SUB", + (Lf (Tok.STARTSUB STARTSUB)) :: + (inline @ [Lf (Tok.ENDSUB)]))) +fun sup_PROD_1_ACT (inline, ENDSUP, STARTSUP, inline_SPAN : (Lex.pos * Lex.pos), ENDSUP_SPAN : (Lex.pos * Lex.pos), STARTSUP_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "SUP", + (Lf (Tok.STARTSUP STARTSUP)) :: + (inline @ [Lf (Tok.ENDSUP)]))) +fun table_PROD_1_SUBRULE_1_PROD_1_ACT (STARTTABLE, cdata_opt, caption, STARTTABLE_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), caption_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (caption :: cdata_opt) +fun table_PROD_1_ACT (SR, STARTTABLE, cdata_opt, table_content, ENDTABLE, col_or_colgroups, SR_SPAN : (Lex.pos * Lex.pos), STARTTABLE_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), table_content_SPAN : (Lex.pos * Lex.pos), ENDTABLE_SPAN : (Lex.pos * Lex.pos), col_or_colgroups_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "TABLE", + (Lf (Tok.STARTTABLE STARTTABLE)) :: + (cdata_opt @ (optListToList SR) @ col_or_colgroups @ + table_content @ [Lf (Tok.ENDTABLE)]))) +fun table_content_PROD_1_ACT (tbodies, tfoot, thead, tbodies_SPAN : (Lex.pos * Lex.pos), tfoot_SPAN : (Lex.pos * Lex.pos), thead_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (thead :: ((optToList tfoot)) @ tbodies) +fun table_content_PROD_2_ACT (tbodies, tfoot, tbodies_SPAN : (Lex.pos * Lex.pos), tfoot_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (tfoot :: tbodies) +fun col_or_colgroups_PROD_1_ACT (FULL_SPAN : (Lex.pos * Lex.pos)) = + ([]) +fun col_or_colgroups_PROD_2_SUBRULE_1_PROD_1_ACT (cdata_opt, col, cdata_opt_SPAN : (Lex.pos * Lex.pos), col_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (col :: cdata_opt) +fun col_or_colgroups_PROD_2_ACT (SR, SR_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (foldr op@ [] SR) +fun tbodies_nostart_PROD_1_SUBRULE_1_PROD_1_ACT (cdata_opt, STARTTBODY, cdata_opt_SPAN : (Lex.pos * Lex.pos), STARTTBODY_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ((Lf (Tok.STARTTBODY STARTTBODY)) :: cdata_opt) +fun tbodies_nostart_PROD_1_ACT (tr, SR, tbodies_rest, tr_SPAN : (Lex.pos * Lex.pos), SR_SPAN : (Lex.pos * Lex.pos), tbodies_rest_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (let val (tbody_rest, tbody_peers) = + case tbodies_rest of + NONE => ([], []) + | SOME tbodies_tup => tbodies_tup + in (Nd (Atom.atom "TBODIES_NOSTART", + (optListToList SR) @ tr @ tbody_rest)) :: + tbody_peers end) +fun tbodies_PROD_1_ACT (tr, cdata_opt, STARTTBODY, tbodies_rest, tr_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), STARTTBODY_SPAN : (Lex.pos * Lex.pos), tbodies_rest_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (let val (tbody_rest, tbody_peers) = tbodies_rest + in (Nd (Atom.atom "TBODIES", + (Lf (Tok.STARTTBODY STARTTBODY)) :: + (cdata_opt @ tr @ tbody_rest))) :: + tbody_peers end) +fun tbodies_rest_PROD_1_ACT (cdata_opt, tbodies, ENDTBODY, cdata_opt_SPAN : (Lex.pos * Lex.pos), tbodies_SPAN : (Lex.pos * Lex.pos), ENDTBODY_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ((Lf (Tok.ENDTBODY)) :: cdata_opt, + optListToList tbodies) +fun tbodies_rest_PROD_2_ACT (tr, cdata_opt, STARTTBODY, tbodies_rest, tr_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), STARTTBODY_SPAN : (Lex.pos * Lex.pos), tbodies_rest_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (let val (tbody_rest, tbody_peers) = + case tbodies_rest of NONE => ([], []) + | SOME tbodies_tup => tbodies_tup + in ([], (Nd (Atom.atom "TBODIES_REST", + (Lf (Tok.STARTTBODY STARTTBODY)) :: + (cdata_opt @ tr @ tbody_rest))) :: tbody_peers) + end) +fun td_PROD_1_SUBRULE_2_PROD_1_ACT (ENDTD, cdata_opt, STARTTD, flow, ENDTD_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), STARTTD_SPAN : (Lex.pos * Lex.pos), flow_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ((Lf (Tok.ENDTD)) :: cdata_opt) +fun td_PROD_1_ACT (SR, STARTTD, flow, SR_SPAN : (Lex.pos * Lex.pos), STARTTD_SPAN : (Lex.pos * Lex.pos), flow_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "TD", + (Lf (Tok.STARTTD STARTTD)) :: (flow @ (optListToList SR)))) +fun textarea_PROD_1_ACT (cdata_opt, STARTTEXTAREA, ENDTEXTAREA, cdata_opt_SPAN : (Lex.pos * Lex.pos), STARTTEXTAREA_SPAN : (Lex.pos * Lex.pos), ENDTEXTAREA_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "TEXTAREA", + (Lf (Tok.STARTTEXTAREA STARTTEXTAREA)) :: + (cdata_opt @ [Lf (Tok.ENDTEXTAREA)]))) +fun tfoot_PROD_1_SUBRULE_2_PROD_1_ACT (tr, STARTTFOOT, cdata_opt, ENDTFOOT, tr_SPAN : (Lex.pos * Lex.pos), STARTTFOOT_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), ENDTFOOT_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ((Lf (Tok.ENDTFOOT)) :: cdata_opt) +fun tfoot_PROD_1_ACT (tr, SR, STARTTFOOT, cdata_opt, tr_SPAN : (Lex.pos * Lex.pos), SR_SPAN : (Lex.pos * Lex.pos), STARTTFOOT_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "TFOOT", + (Lf (Tok.STARTTFOOT STARTTFOOT)) :: (cdata_opt @ tr @ + (optListToList SR)))) +fun th_PROD_1_SUBRULE_2_PROD_1_ACT (ENDTH, cdata_opt, STARTTH, flow, ENDTH_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), STARTTH_SPAN : (Lex.pos * Lex.pos), flow_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ((Lf (Tok.ENDTH)) :: cdata_opt) +fun th_PROD_1_ACT (SR, STARTTH, flow, SR_SPAN : (Lex.pos * Lex.pos), STARTTH_SPAN : (Lex.pos * Lex.pos), flow_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "TH", + (Lf (Tok.STARTTH STARTTH)) :: (flow @ (optListToList SR)))) +fun thead_PROD_1_SUBRULE_2_PROD_1_ACT (tr, STARTTHEAD, ENDTHEAD, cdata_opt, tr_SPAN : (Lex.pos * Lex.pos), STARTTHEAD_SPAN : (Lex.pos * Lex.pos), ENDTHEAD_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ((Lf (Tok.ENDTHEAD)) :: cdata_opt) +fun thead_PROD_1_ACT (tr, SR, STARTTHEAD, cdata_opt, tr_SPAN : (Lex.pos * Lex.pos), SR_SPAN : (Lex.pos * Lex.pos), STARTTHEAD_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "THEAD", + (Lf (Tok.STARTTHEAD STARTTHEAD)) :: (cdata_opt @ tr @ + (optListToList SR)))) +fun tr_PROD_1_SUBRULE_2_PROD_1_ACT (ENDTR, SR1, cdata_opt, STARTTR, ENDTR_SPAN : (Lex.pos * Lex.pos), SR1_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), STARTTR_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ((Lf (Tok.ENDTR)) :: cdata_opt) +fun tr_PROD_1_ACT (SR1, SR2, cdata_opt, STARTTR, SR1_SPAN : (Lex.pos * Lex.pos), SR2_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), STARTTR_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "TR", + (Lf (Tok.STARTTR STARTTR)) :: (cdata_opt @ SR1 @ + (optListToList SR2)))) +fun tt_PROD_1_ACT (ENDTT, inline, STARTTT, ENDTT_SPAN : (Lex.pos * Lex.pos), inline_SPAN : (Lex.pos * Lex.pos), STARTTT_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "TT", + (Lf (Tok.STARTTT STARTTT)) :: (inline @ [Lf (Tok.ENDTT)]))) +fun u_PROD_1_ACT (inline, STARTU, ENDU, inline_SPAN : (Lex.pos * Lex.pos), STARTU_SPAN : (Lex.pos * Lex.pos), ENDU_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "U", + (Lf (Tok.STARTU STARTU)) :: (inline @ [Lf (Tok.ENDU)]))) +fun ul_PROD_1_ACT (li, ENDUL, cdata_opt, STARTUL, li_SPAN : (Lex.pos * Lex.pos), ENDUL_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), STARTUL_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "UL", + ((Lf (Tok.STARTUL STARTUL)) :: (cdata_opt @ li @ + [Lf (Tok.ENDUL)])))) +fun var_PROD_1_ACT (inline, STARTVAR, ENDVAR, inline_SPAN : (Lex.pos * Lex.pos), STARTVAR_SPAN : (Lex.pos * Lex.pos), ENDVAR_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Nd (Atom.atom "VAR", + (Lf (Tok.STARTVAR STARTVAR)) :: (inline @ + [Lf (Tok.ENDVAR)]))) +fun cdata_PROD_1_SUBRULE_1_PROD_1_ACT (PCDATA, PCDATA_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Tok.PCDATA PCDATA) +fun cdata_PROD_1_SUBRULE_1_PROD_2_ACT (CHAR_REF, CHAR_REF_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Tok.CHAR_REF CHAR_REF) +fun cdata_PROD_1_SUBRULE_1_PROD_3_ACT (ENTITY_REF, ENTITY_REF_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Tok.ENTITY_REF ENTITY_REF) +fun cdata_PROD_1_SUBRULE_1_PROD_4_ACT (COMMENT, COMMENT_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (Tok.COMMENT COMMENT) +fun cdata_PROD_1_ACT (SR, SR_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ((Lf SR) : HTML4Tokens.token parsetree) +fun cdata_opt_PROD_1_ACT (cdata, cdata_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (cdata : HTML4Tokens.token parsetree list) + end (* UserCode *) + + structure Err = AntlrErrHandler( + structure Tok = Tok + structure Lex = Lex) + +(* replace functor with inline structure for better optimization + structure EBNF = AntlrEBNF( + struct + type strm = Err.wstream + val getSpan = Err.getSpan + end) +*) + structure EBNF = + struct + fun optional (pred, parse, strm) = + if pred strm + then let + val (y, span, strm') = parse strm + in + (SOME y, span, strm') + end + else (NONE, Err.getSpan strm, strm) + + fun closure (pred, parse, strm) = let + fun iter (strm, (left, right), ys) = + if pred strm + then let + val (y, (_, right'), strm') = parse strm + in iter (strm', (left, right'), y::ys) + end + else (List.rev ys, (left, right), strm) + in + iter (strm, Err.getSpan strm, []) + end + + fun posclos (pred, parse, strm) = let + val (y, (left, _), strm') = parse strm + val (ys, (_, right), strm'') = closure (pred, parse, strm') + in + (y::ys, (left, right), strm'') + end + end + + fun mk lexFn = let +fun getS() = {} +fun putS{} = () +fun unwrap (ret, strm, repairs) = (ret, strm, repairs) + val (eh, lex) = Err.mkErrHandler {get = getS, put = putS} + fun fail() = Err.failure eh + fun tryProds (strm, prods) = let + fun try [] = fail() + | try (prod :: prods) = + (Err.whileDisabled eh (fn() => prod strm)) + handle Err.ParseError => try (prods) + in try prods end +fun matchOPENTAG strm = (case (lex(strm)) + of (Tok.OPENTAG(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchCLOSETAG strm = (case (lex(strm)) + of (Tok.CLOSETAG(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchCOMMENT strm = (case (lex(strm)) + of (Tok.COMMENT(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchPCDATA strm = (case (lex(strm)) + of (Tok.PCDATA(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchDOCTYPE strm = (case (lex(strm)) + of (Tok.DOCTYPE(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchCHAR_REF strm = (case (lex(strm)) + of (Tok.CHAR_REF(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENTITY_REF strm = (case (lex(strm)) + of (Tok.ENTITY_REF(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchXML_PROCESSING strm = (case (lex(strm)) + of (Tok.XML_PROCESSING(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTA strm = (case (lex(strm)) + of (Tok.STARTA(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDA strm = (case (lex(strm)) + of (Tok.ENDA, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTABBR strm = (case (lex(strm)) + of (Tok.STARTABBR(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDABBR strm = (case (lex(strm)) + of (Tok.ENDABBR, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTACRONYM strm = (case (lex(strm)) + of (Tok.STARTACRONYM(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDACRONYM strm = (case (lex(strm)) + of (Tok.ENDACRONYM, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTADDRESS strm = (case (lex(strm)) + of (Tok.STARTADDRESS(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDADDRESS strm = (case (lex(strm)) + of (Tok.ENDADDRESS, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTAPPLET strm = (case (lex(strm)) + of (Tok.STARTAPPLET(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDAPPLET strm = (case (lex(strm)) + of (Tok.ENDAPPLET, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTAREA strm = (case (lex(strm)) + of (Tok.STARTAREA(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTB strm = (case (lex(strm)) + of (Tok.STARTB(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDB strm = (case (lex(strm)) + of (Tok.ENDB, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTBASE strm = (case (lex(strm)) + of (Tok.STARTBASE(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTBASEFONT strm = (case (lex(strm)) + of (Tok.STARTBASEFONT(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTBDO strm = (case (lex(strm)) + of (Tok.STARTBDO(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDBDO strm = (case (lex(strm)) + of (Tok.ENDBDO, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTBIG strm = (case (lex(strm)) + of (Tok.STARTBIG(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDBIG strm = (case (lex(strm)) + of (Tok.ENDBIG, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTBLOCKQUOTE strm = (case (lex(strm)) + of (Tok.STARTBLOCKQUOTE(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDBLOCKQUOTE strm = (case (lex(strm)) + of (Tok.ENDBLOCKQUOTE, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTBODY strm = (case (lex(strm)) + of (Tok.STARTBODY(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDBODY strm = (case (lex(strm)) + of (Tok.ENDBODY, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTBR strm = (case (lex(strm)) + of (Tok.STARTBR(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTBUTTON strm = (case (lex(strm)) + of (Tok.STARTBUTTON(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDBUTTON strm = (case (lex(strm)) + of (Tok.ENDBUTTON, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTCAPTION strm = (case (lex(strm)) + of (Tok.STARTCAPTION(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDCAPTION strm = (case (lex(strm)) + of (Tok.ENDCAPTION, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTCENTER strm = (case (lex(strm)) + of (Tok.STARTCENTER(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDCENTER strm = (case (lex(strm)) + of (Tok.ENDCENTER, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTCITE strm = (case (lex(strm)) + of (Tok.STARTCITE(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDCITE strm = (case (lex(strm)) + of (Tok.ENDCITE, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTCODE strm = (case (lex(strm)) + of (Tok.STARTCODE(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDCODE strm = (case (lex(strm)) + of (Tok.ENDCODE, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTCOL strm = (case (lex(strm)) + of (Tok.STARTCOL(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTCOLGROUP strm = (case (lex(strm)) + of (Tok.STARTCOLGROUP(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDCOLGROUP strm = (case (lex(strm)) + of (Tok.ENDCOLGROUP, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTDD strm = (case (lex(strm)) + of (Tok.STARTDD(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDDD strm = (case (lex(strm)) + of (Tok.ENDDD, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTDEL strm = (case (lex(strm)) + of (Tok.STARTDEL(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDDEL strm = (case (lex(strm)) + of (Tok.ENDDEL, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTDFN strm = (case (lex(strm)) + of (Tok.STARTDFN(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDDFN strm = (case (lex(strm)) + of (Tok.ENDDFN, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTDIR strm = (case (lex(strm)) + of (Tok.STARTDIR(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDDIR strm = (case (lex(strm)) + of (Tok.ENDDIR, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTDIV strm = (case (lex(strm)) + of (Tok.STARTDIV(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDDIV strm = (case (lex(strm)) + of (Tok.ENDDIV, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTDL strm = (case (lex(strm)) + of (Tok.STARTDL(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDDL strm = (case (lex(strm)) + of (Tok.ENDDL, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTDT strm = (case (lex(strm)) + of (Tok.STARTDT(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDDT strm = (case (lex(strm)) + of (Tok.ENDDT, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTEM strm = (case (lex(strm)) + of (Tok.STARTEM(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDEM strm = (case (lex(strm)) + of (Tok.ENDEM, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTFIELDSET strm = (case (lex(strm)) + of (Tok.STARTFIELDSET(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDFIELDSET strm = (case (lex(strm)) + of (Tok.ENDFIELDSET, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTFONT strm = (case (lex(strm)) + of (Tok.STARTFONT(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDFONT strm = (case (lex(strm)) + of (Tok.ENDFONT, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTFORM strm = (case (lex(strm)) + of (Tok.STARTFORM(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDFORM strm = (case (lex(strm)) + of (Tok.ENDFORM, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTFRAME strm = (case (lex(strm)) + of (Tok.STARTFRAME(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTFRAMESET strm = (case (lex(strm)) + of (Tok.STARTFRAMESET(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDFRAMESET strm = (case (lex(strm)) + of (Tok.ENDFRAMESET, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTH1 strm = (case (lex(strm)) + of (Tok.STARTH1(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDH1 strm = (case (lex(strm)) + of (Tok.ENDH1, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTH2 strm = (case (lex(strm)) + of (Tok.STARTH2(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDH2 strm = (case (lex(strm)) + of (Tok.ENDH2, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTH3 strm = (case (lex(strm)) + of (Tok.STARTH3(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDH3 strm = (case (lex(strm)) + of (Tok.ENDH3, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTH4 strm = (case (lex(strm)) + of (Tok.STARTH4(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDH4 strm = (case (lex(strm)) + of (Tok.ENDH4, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTH5 strm = (case (lex(strm)) + of (Tok.STARTH5(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDH5 strm = (case (lex(strm)) + of (Tok.ENDH5, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTH6 strm = (case (lex(strm)) + of (Tok.STARTH6(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDH6 strm = (case (lex(strm)) + of (Tok.ENDH6, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTHEAD strm = (case (lex(strm)) + of (Tok.STARTHEAD(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDHEAD strm = (case (lex(strm)) + of (Tok.ENDHEAD, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTHR strm = (case (lex(strm)) + of (Tok.STARTHR(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTHTML strm = (case (lex(strm)) + of (Tok.STARTHTML(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDHTML strm = (case (lex(strm)) + of (Tok.ENDHTML, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTI strm = (case (lex(strm)) + of (Tok.STARTI(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDI strm = (case (lex(strm)) + of (Tok.ENDI, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTIFRAME strm = (case (lex(strm)) + of (Tok.STARTIFRAME(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDIFRAME strm = (case (lex(strm)) + of (Tok.ENDIFRAME, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTIMG strm = (case (lex(strm)) + of (Tok.STARTIMG(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTINPUT strm = (case (lex(strm)) + of (Tok.STARTINPUT(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTINS strm = (case (lex(strm)) + of (Tok.STARTINS(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDINS strm = (case (lex(strm)) + of (Tok.ENDINS, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTISINDEX strm = (case (lex(strm)) + of (Tok.STARTISINDEX(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTKBD strm = (case (lex(strm)) + of (Tok.STARTKBD(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDKBD strm = (case (lex(strm)) + of (Tok.ENDKBD, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTLABEL strm = (case (lex(strm)) + of (Tok.STARTLABEL(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDLABEL strm = (case (lex(strm)) + of (Tok.ENDLABEL, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTLEGEND strm = (case (lex(strm)) + of (Tok.STARTLEGEND(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDLEGEND strm = (case (lex(strm)) + of (Tok.ENDLEGEND, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTLI strm = (case (lex(strm)) + of (Tok.STARTLI(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDLI strm = (case (lex(strm)) + of (Tok.ENDLI, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTLINK strm = (case (lex(strm)) + of (Tok.STARTLINK(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTMAP strm = (case (lex(strm)) + of (Tok.STARTMAP(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDMAP strm = (case (lex(strm)) + of (Tok.ENDMAP, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTMENU strm = (case (lex(strm)) + of (Tok.STARTMENU(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDMENU strm = (case (lex(strm)) + of (Tok.ENDMENU, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTMETA strm = (case (lex(strm)) + of (Tok.STARTMETA(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTNOFRAMES strm = (case (lex(strm)) + of (Tok.STARTNOFRAMES(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDNOFRAMES strm = (case (lex(strm)) + of (Tok.ENDNOFRAMES, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTNOSCRIPT strm = (case (lex(strm)) + of (Tok.STARTNOSCRIPT(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDNOSCRIPT strm = (case (lex(strm)) + of (Tok.ENDNOSCRIPT, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTOBJECT strm = (case (lex(strm)) + of (Tok.STARTOBJECT(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDOBJECT strm = (case (lex(strm)) + of (Tok.ENDOBJECT, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTOL strm = (case (lex(strm)) + of (Tok.STARTOL(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDOL strm = (case (lex(strm)) + of (Tok.ENDOL, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTOPTGROUP strm = (case (lex(strm)) + of (Tok.STARTOPTGROUP(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDOPTGROUP strm = (case (lex(strm)) + of (Tok.ENDOPTGROUP, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTOPTION strm = (case (lex(strm)) + of (Tok.STARTOPTION(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDOPTION strm = (case (lex(strm)) + of (Tok.ENDOPTION, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTP strm = (case (lex(strm)) + of (Tok.STARTP(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDP strm = (case (lex(strm)) + of (Tok.ENDP, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTPARAM strm = (case (lex(strm)) + of (Tok.STARTPARAM(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTPRE strm = (case (lex(strm)) + of (Tok.STARTPRE(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDPRE strm = (case (lex(strm)) + of (Tok.ENDPRE, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTQ strm = (case (lex(strm)) + of (Tok.STARTQ(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDQ strm = (case (lex(strm)) + of (Tok.ENDQ, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTS strm = (case (lex(strm)) + of (Tok.STARTS(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDS strm = (case (lex(strm)) + of (Tok.ENDS, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTSAMP strm = (case (lex(strm)) + of (Tok.STARTSAMP(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDSAMP strm = (case (lex(strm)) + of (Tok.ENDSAMP, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTSCRIPT strm = (case (lex(strm)) + of (Tok.STARTSCRIPT(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDSCRIPT strm = (case (lex(strm)) + of (Tok.ENDSCRIPT, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTSELECT strm = (case (lex(strm)) + of (Tok.STARTSELECT(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDSELECT strm = (case (lex(strm)) + of (Tok.ENDSELECT, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTSMALL strm = (case (lex(strm)) + of (Tok.STARTSMALL(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDSMALL strm = (case (lex(strm)) + of (Tok.ENDSMALL, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTSPAN strm = (case (lex(strm)) + of (Tok.STARTSPAN(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDSPAN strm = (case (lex(strm)) + of (Tok.ENDSPAN, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTSTRIKE strm = (case (lex(strm)) + of (Tok.STARTSTRIKE(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDSTRIKE strm = (case (lex(strm)) + of (Tok.ENDSTRIKE, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTSTRONG strm = (case (lex(strm)) + of (Tok.STARTSTRONG(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDSTRONG strm = (case (lex(strm)) + of (Tok.ENDSTRONG, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTSTYLE strm = (case (lex(strm)) + of (Tok.STARTSTYLE(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDSTYLE strm = (case (lex(strm)) + of (Tok.ENDSTYLE, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTSUB strm = (case (lex(strm)) + of (Tok.STARTSUB(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDSUB strm = (case (lex(strm)) + of (Tok.ENDSUB, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTSUP strm = (case (lex(strm)) + of (Tok.STARTSUP(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDSUP strm = (case (lex(strm)) + of (Tok.ENDSUP, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTTABLE strm = (case (lex(strm)) + of (Tok.STARTTABLE(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDTABLE strm = (case (lex(strm)) + of (Tok.ENDTABLE, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTTBODY strm = (case (lex(strm)) + of (Tok.STARTTBODY(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDTBODY strm = (case (lex(strm)) + of (Tok.ENDTBODY, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTTD strm = (case (lex(strm)) + of (Tok.STARTTD(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDTD strm = (case (lex(strm)) + of (Tok.ENDTD, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTTEXTAREA strm = (case (lex(strm)) + of (Tok.STARTTEXTAREA(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDTEXTAREA strm = (case (lex(strm)) + of (Tok.ENDTEXTAREA, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTTFOOT strm = (case (lex(strm)) + of (Tok.STARTTFOOT(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDTFOOT strm = (case (lex(strm)) + of (Tok.ENDTFOOT, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTTH strm = (case (lex(strm)) + of (Tok.STARTTH(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDTH strm = (case (lex(strm)) + of (Tok.ENDTH, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTTHEAD strm = (case (lex(strm)) + of (Tok.STARTTHEAD(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDTHEAD strm = (case (lex(strm)) + of (Tok.ENDTHEAD, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTTITLE strm = (case (lex(strm)) + of (Tok.STARTTITLE(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDTITLE strm = (case (lex(strm)) + of (Tok.ENDTITLE, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTTR strm = (case (lex(strm)) + of (Tok.STARTTR(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDTR strm = (case (lex(strm)) + of (Tok.ENDTR, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTTT strm = (case (lex(strm)) + of (Tok.STARTTT(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDTT strm = (case (lex(strm)) + of (Tok.ENDTT, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTU strm = (case (lex(strm)) + of (Tok.STARTU(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDU strm = (case (lex(strm)) + of (Tok.ENDU, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTUL strm = (case (lex(strm)) + of (Tok.STARTUL(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDUL strm = (case (lex(strm)) + of (Tok.ENDUL, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTARTVAR strm = (case (lex(strm)) + of (Tok.STARTVAR(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchENDVAR strm = (case (lex(strm)) + of (Tok.ENDVAR, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchEOF strm = (case (lex(strm)) + of (Tok.EOF, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) + +val (document_NT, block_NT, cdata_opt_NT, inline_NT, body_NT, flow_NT) = +let +fun cdata_NT (strm) = let + val (SR_RES, SR_SPAN, strm') = let + fun cdata_PROD_1_SUBRULE_1_NT (strm) = let + fun cdata_PROD_1_SUBRULE_1_PROD_1 (strm) = let + val (PCDATA_RES, PCDATA_SPAN, strm') = matchPCDATA(strm) + val FULL_SPAN = (#1(PCDATA_SPAN), #2(PCDATA_SPAN)) + in + (UserCode.cdata_PROD_1_SUBRULE_1_PROD_1_ACT (PCDATA_RES, PCDATA_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun cdata_PROD_1_SUBRULE_1_PROD_2 (strm) = let + val (CHAR_REF_RES, CHAR_REF_SPAN, strm') = matchCHAR_REF(strm) + val FULL_SPAN = (#1(CHAR_REF_SPAN), #2(CHAR_REF_SPAN)) + in + (UserCode.cdata_PROD_1_SUBRULE_1_PROD_2_ACT (CHAR_REF_RES, CHAR_REF_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun cdata_PROD_1_SUBRULE_1_PROD_3 (strm) = let + val (ENTITY_REF_RES, ENTITY_REF_SPAN, strm') = matchENTITY_REF(strm) + val FULL_SPAN = (#1(ENTITY_REF_SPAN), #2(ENTITY_REF_SPAN)) + in + (UserCode.cdata_PROD_1_SUBRULE_1_PROD_3_ACT (ENTITY_REF_RES, ENTITY_REF_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun cdata_PROD_1_SUBRULE_1_PROD_4 (strm) = let + val (COMMENT_RES, COMMENT_SPAN, strm') = matchCOMMENT(strm) + val FULL_SPAN = (#1(COMMENT_SPAN), #2(COMMENT_SPAN)) + in + (UserCode.cdata_PROD_1_SUBRULE_1_PROD_4_ACT (COMMENT_RES, COMMENT_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => + cdata_PROD_1_SUBRULE_1_PROD_4(strm) + | (Tok.CHAR_REF(_), _, strm') => + cdata_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.PCDATA(_), _, strm') => + cdata_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.ENTITY_REF(_), _, strm') => + cdata_PROD_1_SUBRULE_1_PROD_3(strm) + | _ => fail() + (* end case *)) + end + in + cdata_PROD_1_SUBRULE_1_NT(strm) + end + val FULL_SPAN = (#1(SR_SPAN), #2(SR_SPAN)) + in + (UserCode.cdata_PROD_1_ACT (SR_RES, SR_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +fun cdata_opt_NT (strm) = let + fun cdata_opt_PROD_1_SUBRULE_1_NT (strm) = let + val (cdata_RES, cdata_SPAN, strm') = cdata_NT(strm) + val FULL_SPAN = (#1(cdata_SPAN), #2(cdata_SPAN)) + in + ((cdata_RES), FULL_SPAN, strm') + end + fun cdata_opt_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | _ => false + (* end case *)) + val (cdata_RES, cdata_SPAN, strm') = EBNF.closure(cdata_opt_PROD_1_SUBRULE_1_PRED, cdata_opt_PROD_1_SUBRULE_1_NT, strm) + val FULL_SPAN = (#1(cdata_SPAN), #2(cdata_SPAN)) + in + (UserCode.cdata_opt_PROD_1_ACT (cdata_RES, cdata_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +fun textarea_NT (strm) = let + val (STARTTEXTAREA_RES, STARTTEXTAREA_SPAN, strm') = matchSTARTTEXTAREA(strm) + val (cdata_opt_RES, cdata_opt_SPAN, strm') = cdata_opt_NT(strm') + val (ENDTEXTAREA_RES, ENDTEXTAREA_SPAN, strm') = matchENDTEXTAREA(strm') + val FULL_SPAN = (#1(STARTTEXTAREA_SPAN), #2(ENDTEXTAREA_SPAN)) + in + (UserCode.textarea_PROD_1_ACT (cdata_opt_RES, STARTTEXTAREA_RES, ENDTEXTAREA_RES, cdata_opt_SPAN : (Lex.pos * Lex.pos), STARTTEXTAREA_SPAN : (Lex.pos * Lex.pos), ENDTEXTAREA_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +fun option_NT (strm) = let + val (STARTOPTION_RES, STARTOPTION_SPAN, strm') = matchSTARTOPTION(strm) + val (cdata_opt_RES, cdata_opt_SPAN, strm') = cdata_opt_NT(strm') + fun option_PROD_1_SUBRULE_1_NT (strm) = let + val (ENDOPTION_RES, ENDOPTION_SPAN, strm') = matchENDOPTION(strm) + val (cdata_opt_RES, cdata_opt_SPAN, strm') = cdata_opt_NT(strm') + val FULL_SPAN = (#1(ENDOPTION_SPAN), #2(cdata_opt_SPAN)) + in + (UserCode.option_PROD_1_SUBRULE_1_PROD_1_ACT (STARTOPTION_RES, cdata_opt_RES, ENDOPTION_RES, STARTOPTION_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), ENDOPTION_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun option_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.ENDOPTION, _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.optional(option_PROD_1_SUBRULE_1_PRED, option_PROD_1_SUBRULE_1_NT, strm') + val FULL_SPAN = (#1(STARTOPTION_SPAN), #2(SR_SPAN)) + in + (UserCode.option_PROD_1_ACT (SR_RES, STARTOPTION_RES, cdata_opt_RES, SR_SPAN : (Lex.pos * Lex.pos), STARTOPTION_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +fun optgroup_NT (strm) = let + val (STARTOPTGROUP_RES, STARTOPTGROUP_SPAN, strm') = matchSTARTOPTGROUP(strm) + val (cdata_opt1_RES, cdata_opt1_SPAN, strm') = cdata_opt_NT(strm') + fun optgroup_PROD_1_SUBRULE_1_NT (strm) = let + val (option_RES, option_SPAN, strm') = option_NT(strm) + val FULL_SPAN = (#1(option_SPAN), #2(option_SPAN)) + in + ((option_RES), FULL_SPAN, strm') + end + fun optgroup_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.STARTOPTION(_), _, strm') => true + | _ => false + (* end case *)) + val (option_RES, option_SPAN, strm') = EBNF.posclos(optgroup_PROD_1_SUBRULE_1_PRED, optgroup_PROD_1_SUBRULE_1_NT, strm') + val (ENDOPTGROUP_RES, ENDOPTGROUP_SPAN, strm') = matchENDOPTGROUP(strm') + val (cdata_opt2_RES, cdata_opt2_SPAN, strm') = cdata_opt_NT(strm') + val FULL_SPAN = (#1(STARTOPTGROUP_SPAN), #2(cdata_opt2_SPAN)) + in + (UserCode.optgroup_PROD_1_ACT (option_RES, STARTOPTGROUP_RES, ENDOPTGROUP_RES, cdata_opt1_RES, cdata_opt2_RES, option_SPAN : (Lex.pos * Lex.pos), STARTOPTGROUP_SPAN : (Lex.pos * Lex.pos), ENDOPTGROUP_SPAN : (Lex.pos * Lex.pos), cdata_opt1_SPAN : (Lex.pos * Lex.pos), cdata_opt2_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +fun select_NT (strm) = let + val (STARTSELECT_RES, STARTSELECT_SPAN, strm') = matchSTARTSELECT(strm) + val (cdata_opt_RES, cdata_opt_SPAN, strm') = cdata_opt_NT(strm') + fun select_PROD_1_SUBRULE_1_NT (strm) = let + fun select_PROD_1_SUBRULE_1_PROD_1 (strm) = let + val (optgroup_RES, optgroup_SPAN, strm') = optgroup_NT(strm) + val FULL_SPAN = (#1(optgroup_SPAN), #2(optgroup_SPAN)) + in + ((optgroup_RES), FULL_SPAN, strm') + end + fun select_PROD_1_SUBRULE_1_PROD_2 (strm) = let + val (option_RES, option_SPAN, strm') = option_NT(strm) + val FULL_SPAN = (#1(option_SPAN), #2(option_SPAN)) + in + ((option_RES), FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.STARTOPTION(_), _, strm') => + select_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTOPTGROUP(_), _, strm') => + select_PROD_1_SUBRULE_1_PROD_1(strm) + | _ => fail() + (* end case *)) + end + fun select_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.STARTOPTGROUP(_), _, strm') => true + | (Tok.STARTOPTION(_), _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.posclos(select_PROD_1_SUBRULE_1_PRED, select_PROD_1_SUBRULE_1_NT, strm') + val (ENDSELECT_RES, ENDSELECT_SPAN, strm') = matchENDSELECT(strm') + val FULL_SPAN = (#1(STARTSELECT_SPAN), #2(ENDSELECT_SPAN)) + in + (UserCode.select_PROD_1_ACT (STARTSELECT_RES, SR_RES, cdata_opt_RES, ENDSELECT_RES, STARTSELECT_SPAN : (Lex.pos * Lex.pos), SR_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), ENDSELECT_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +fun input_NT (strm) = let + val (STARTINPUT_RES, STARTINPUT_SPAN, strm') = matchSTARTINPUT(strm) + val FULL_SPAN = (#1(STARTINPUT_SPAN), #2(STARTINPUT_SPAN)) + in + (UserCode.input_PROD_1_ACT (STARTINPUT_RES, STARTINPUT_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +fun basefont_NT (strm) = let + val (STARTBASEFONT_RES, STARTBASEFONT_SPAN, strm') = matchSTARTBASEFONT(strm) + val FULL_SPAN = (#1(STARTBASEFONT_SPAN), #2(STARTBASEFONT_SPAN)) + in + (UserCode.basefont_PROD_1_ACT (STARTBASEFONT_RES, STARTBASEFONT_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +fun param_NT (strm) = let + val (STARTPARAM_RES, STARTPARAM_SPAN, strm') = matchSTARTPARAM(strm) + val FULL_SPAN = (#1(STARTPARAM_SPAN), #2(STARTPARAM_SPAN)) + in + (UserCode.param_PROD_1_ACT (STARTPARAM_RES, STARTPARAM_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +fun area_NT (strm) = let + val (STARTAREA_RES, STARTAREA_SPAN, strm') = matchSTARTAREA(strm) + val FULL_SPAN = (#1(STARTAREA_SPAN), #2(STARTAREA_SPAN)) + in + (UserCode.area_PROD_1_ACT (STARTAREA_RES, STARTAREA_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +fun isindex_NT (strm) = let + val (STARTISINDEX_RES, STARTISINDEX_SPAN, strm') = matchSTARTISINDEX(strm) + val FULL_SPAN = (#1(STARTISINDEX_SPAN), #2(STARTISINDEX_SPAN)) + in + (UserCode.isindex_PROD_1_ACT (STARTISINDEX_RES, STARTISINDEX_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +fun col_NT (strm) = let + val (STARTCOL_RES, STARTCOL_SPAN, strm') = matchSTARTCOL(strm) + val FULL_SPAN = (#1(STARTCOL_SPAN), #2(STARTCOL_SPAN)) + in + (UserCode.col_PROD_1_ACT (STARTCOL_RES, STARTCOL_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +fun colgroup_NT (strm) = let + val (STARTCOLGROUP_RES, STARTCOLGROUP_SPAN, strm') = matchSTARTCOLGROUP(strm) + val (cdata_opt_RES, cdata_opt_SPAN, strm') = cdata_opt_NT(strm') + fun colgroup_PROD_1_SUBRULE_1_NT (strm) = let + val (col_RES, col_SPAN, strm') = col_NT(strm) + val (cdata_opt_RES, cdata_opt_SPAN, strm') = cdata_opt_NT(strm') + val FULL_SPAN = (#1(col_SPAN), #2(cdata_opt_SPAN)) + in + (UserCode.colgroup_PROD_1_SUBRULE_1_PROD_1_ACT (STARTCOLGROUP_RES, cdata_opt_RES, col_RES, STARTCOLGROUP_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), col_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun colgroup_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.STARTCOL(_), _, strm') => true + | _ => false + (* end case *)) + val (SR1_RES, SR1_SPAN, strm') = EBNF.closure(colgroup_PROD_1_SUBRULE_1_PRED, colgroup_PROD_1_SUBRULE_1_NT, strm') + fun colgroup_PROD_1_SUBRULE_2_NT (strm) = let + val (ENDCOLGROUP_RES, ENDCOLGROUP_SPAN, strm') = matchENDCOLGROUP(strm) + val FULL_SPAN = (#1(ENDCOLGROUP_SPAN), #2(ENDCOLGROUP_SPAN)) + in + (UserCode.colgroup_PROD_1_SUBRULE_2_PROD_1_ACT (STARTCOLGROUP_RES, SR1_RES, cdata_opt_RES, ENDCOLGROUP_RES, STARTCOLGROUP_SPAN : (Lex.pos * Lex.pos), SR1_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), ENDCOLGROUP_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun colgroup_PROD_1_SUBRULE_2_PRED (strm) = (case (lex(strm)) + of (Tok.ENDCOLGROUP, _, strm') => true + | _ => false + (* end case *)) + val (SR2_RES, SR2_SPAN, strm') = EBNF.optional(colgroup_PROD_1_SUBRULE_2_PRED, colgroup_PROD_1_SUBRULE_2_NT, strm') + val FULL_SPAN = (#1(STARTCOLGROUP_SPAN), #2(SR2_SPAN)) + in + (UserCode.colgroup_PROD_1_ACT (STARTCOLGROUP_RES, SR1_RES, SR2_RES, cdata_opt_RES, STARTCOLGROUP_SPAN : (Lex.pos * Lex.pos), SR1_SPAN : (Lex.pos * Lex.pos), SR2_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +fun col_or_colgroups_NT (strm) = let + fun col_or_colgroups_PROD_1 (strm) = let + val FULL_SPAN = (Err.getPos(strm), Err.getPos(strm)) + in + (UserCode.col_or_colgroups_PROD_1_ACT (FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm) + end + fun col_or_colgroups_PROD_2 (strm) = let + fun col_or_colgroups_PROD_2_SUBRULE_1_NT (strm) = let + val (col_RES, col_SPAN, strm') = col_NT(strm) + val (cdata_opt_RES, cdata_opt_SPAN, strm') = cdata_opt_NT(strm') + val FULL_SPAN = (#1(col_SPAN), #2(cdata_opt_SPAN)) + in + (UserCode.col_or_colgroups_PROD_2_SUBRULE_1_PROD_1_ACT (cdata_opt_RES, col_RES, cdata_opt_SPAN : (Lex.pos * Lex.pos), col_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun col_or_colgroups_PROD_2_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.STARTCOL(_), _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.posclos(col_or_colgroups_PROD_2_SUBRULE_1_PRED, col_or_colgroups_PROD_2_SUBRULE_1_NT, strm) + val FULL_SPAN = (#1(SR_SPAN), #2(SR_SPAN)) + in + (UserCode.col_or_colgroups_PROD_2_ACT (SR_RES, SR_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun col_or_colgroups_PROD_3 (strm) = let + fun col_or_colgroups_PROD_3_SUBRULE_1_NT (strm) = let + val (colgroup_RES, colgroup_SPAN, strm') = colgroup_NT(strm) + val FULL_SPAN = (#1(colgroup_SPAN), #2(colgroup_SPAN)) + in + ((colgroup_RES), FULL_SPAN, strm') + end + fun col_or_colgroups_PROD_3_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.STARTCOLGROUP(_), _, strm') => true + | _ => false + (* end case *)) + val (colgroup_RES, colgroup_SPAN, strm') = EBNF.posclos(col_or_colgroups_PROD_3_SUBRULE_1_PRED, col_or_colgroups_PROD_3_SUBRULE_1_NT, strm) + val FULL_SPAN = (#1(colgroup_SPAN), #2(colgroup_SPAN)) + in + ((colgroup_RES), FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.STARTCOLGROUP(_), _, strm') => col_or_colgroups_PROD_3(strm) + | (Tok.STARTTBODY(_), _, strm') => col_or_colgroups_PROD_1(strm) + | (Tok.STARTTFOOT(_), _, strm') => col_or_colgroups_PROD_1(strm) + | (Tok.STARTTHEAD(_), _, strm') => col_or_colgroups_PROD_1(strm) + | (Tok.STARTTR(_), _, strm') => col_or_colgroups_PROD_1(strm) + | (Tok.STARTCOL(_), _, strm') => col_or_colgroups_PROD_2(strm) + | _ => fail() + (* end case *)) + end +fun hr_NT (strm) = let + val (STARTHR_RES, STARTHR_SPAN, strm') = matchSTARTHR(strm) + val FULL_SPAN = (#1(STARTHR_SPAN), #2(STARTHR_SPAN)) + in + (UserCode.hr_PROD_1_ACT (STARTHR_RES, STARTHR_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +fun script_NT (strm) = let + val (STARTSCRIPT_RES, STARTSCRIPT_SPAN, strm') = matchSTARTSCRIPT(strm) + val (cdata_opt_RES, cdata_opt_SPAN, strm') = cdata_opt_NT(strm') + val (ENDSCRIPT_RES, ENDSCRIPT_SPAN, strm') = matchENDSCRIPT(strm') + val FULL_SPAN = (#1(STARTSCRIPT_SPAN), #2(ENDSCRIPT_SPAN)) + in + (UserCode.script_PROD_1_ACT (cdata_opt_RES, STARTSCRIPT_RES, ENDSCRIPT_RES, cdata_opt_SPAN : (Lex.pos * Lex.pos), STARTSCRIPT_SPAN : (Lex.pos * Lex.pos), ENDSCRIPT_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +fun br_NT (strm) = let + val (STARTBR_RES, STARTBR_SPAN, strm') = matchSTARTBR(strm) + val FULL_SPAN = (#1(STARTBR_SPAN), #2(STARTBR_SPAN)) + in + (UserCode.br_PROD_1_ACT (STARTBR_RES, STARTBR_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +fun img_NT (strm) = let + val (STARTIMG_RES, STARTIMG_SPAN, strm') = matchSTARTIMG(strm) + val FULL_SPAN = (#1(STARTIMG_SPAN), #2(STARTIMG_SPAN)) + in + (UserCode.img_PROD_1_ACT (STARTIMG_RES, STARTIMG_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +fun flow_NT (strm) = let + fun flow_PROD_1 (strm) = let + val (block_RES, block_SPAN, strm') = block_NT(strm) + val FULL_SPAN = (#1(block_SPAN), #2(block_SPAN)) + in + ((block_RES), FULL_SPAN, strm') + end + fun flow_PROD_2 (strm) = let + val (inline_RES, inline_SPAN, strm') = inline_NT(strm) + val FULL_SPAN = (#1(inline_SPAN), #2(inline_SPAN)) + in + ((inline_RES), FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => flow_PROD_2(strm) + | (Tok.PCDATA(_), _, strm') => flow_PROD_2(strm) + | (Tok.CHAR_REF(_), _, strm') => flow_PROD_2(strm) + | (Tok.ENTITY_REF(_), _, strm') => flow_PROD_2(strm) + | (Tok.STARTA(_), _, strm') => flow_PROD_2(strm) + | (Tok.STARTABBR(_), _, strm') => flow_PROD_2(strm) + | (Tok.STARTACRONYM(_), _, strm') => flow_PROD_2(strm) + | (Tok.STARTAPPLET(_), _, strm') => flow_PROD_2(strm) + | (Tok.STARTB(_), _, strm') => flow_PROD_2(strm) + | (Tok.STARTBASEFONT(_), _, strm') => flow_PROD_2(strm) + | (Tok.STARTBDO(_), _, strm') => flow_PROD_2(strm) + | (Tok.STARTBIG(_), _, strm') => flow_PROD_2(strm) + | (Tok.STARTBR(_), _, strm') => flow_PROD_2(strm) + | (Tok.STARTBUTTON(_), _, strm') => flow_PROD_2(strm) + | (Tok.STARTCITE(_), _, strm') => flow_PROD_2(strm) + | (Tok.STARTCODE(_), _, strm') => flow_PROD_2(strm) + | (Tok.STARTDFN(_), _, strm') => flow_PROD_2(strm) + | (Tok.STARTEM(_), _, strm') => flow_PROD_2(strm) + | (Tok.STARTFONT(_), _, strm') => flow_PROD_2(strm) + | (Tok.STARTI(_), _, strm') => flow_PROD_2(strm) + | (Tok.STARTIFRAME(_), _, strm') => flow_PROD_2(strm) + | (Tok.STARTIMG(_), _, strm') => flow_PROD_2(strm) + | (Tok.STARTINPUT(_), _, strm') => flow_PROD_2(strm) + | (Tok.STARTKBD(_), _, strm') => flow_PROD_2(strm) + | (Tok.STARTLABEL(_), _, strm') => flow_PROD_2(strm) + | (Tok.STARTMAP(_), _, strm') => flow_PROD_2(strm) + | (Tok.STARTOBJECT(_), _, strm') => flow_PROD_2(strm) + | (Tok.STARTQ(_), _, strm') => flow_PROD_2(strm) + | (Tok.STARTS(_), _, strm') => flow_PROD_2(strm) + | (Tok.STARTSAMP(_), _, strm') => flow_PROD_2(strm) + | (Tok.STARTSCRIPT(_), _, strm') => flow_PROD_2(strm) + | (Tok.STARTSELECT(_), _, strm') => flow_PROD_2(strm) + | (Tok.STARTSMALL(_), _, strm') => flow_PROD_2(strm) + | (Tok.STARTSPAN(_), _, strm') => flow_PROD_2(strm) + | (Tok.STARTSTRIKE(_), _, strm') => flow_PROD_2(strm) + | (Tok.STARTSTRONG(_), _, strm') => flow_PROD_2(strm) + | (Tok.STARTSUB(_), _, strm') => flow_PROD_2(strm) + | (Tok.STARTSUP(_), _, strm') => flow_PROD_2(strm) + | (Tok.STARTTEXTAREA(_), _, strm') => flow_PROD_2(strm) + | (Tok.STARTTT(_), _, strm') => flow_PROD_2(strm) + | (Tok.STARTU(_), _, strm') => flow_PROD_2(strm) + | (Tok.STARTVAR(_), _, strm') => flow_PROD_2(strm) + | (Tok.STARTADDRESS(_), _, strm') => flow_PROD_1(strm) + | (Tok.STARTBLOCKQUOTE(_), _, strm') => flow_PROD_1(strm) + | (Tok.STARTCENTER(_), _, strm') => flow_PROD_1(strm) + | (Tok.STARTDIR(_), _, strm') => flow_PROD_1(strm) + | (Tok.STARTDIV(_), _, strm') => flow_PROD_1(strm) + | (Tok.STARTDL(_), _, strm') => flow_PROD_1(strm) + | (Tok.STARTFIELDSET(_), _, strm') => flow_PROD_1(strm) + | (Tok.STARTFORM(_), _, strm') => flow_PROD_1(strm) + | (Tok.STARTH1(_), _, strm') => flow_PROD_1(strm) + | (Tok.STARTH2(_), _, strm') => flow_PROD_1(strm) + | (Tok.STARTH3(_), _, strm') => flow_PROD_1(strm) + | (Tok.STARTH4(_), _, strm') => flow_PROD_1(strm) + | (Tok.STARTH5(_), _, strm') => flow_PROD_1(strm) + | (Tok.STARTH6(_), _, strm') => flow_PROD_1(strm) + | (Tok.STARTHR(_), _, strm') => flow_PROD_1(strm) + | (Tok.STARTISINDEX(_), _, strm') => flow_PROD_1(strm) + | (Tok.STARTMENU(_), _, strm') => flow_PROD_1(strm) + | (Tok.STARTNOSCRIPT(_), _, strm') => flow_PROD_1(strm) + | (Tok.STARTOL(_), _, strm') => flow_PROD_1(strm) + | (Tok.STARTP(_), _, strm') => flow_PROD_1(strm) + | (Tok.STARTPRE(_), _, strm') => flow_PROD_1(strm) + | (Tok.STARTTABLE(_), _, strm') => flow_PROD_1(strm) + | (Tok.STARTUL(_), _, strm') => flow_PROD_1(strm) + | _ => fail() + (* end case *)) + end +and inline_NT (strm) = let + fun inline_PROD_1 (strm) = let + val (fontstyle_RES, fontstyle_SPAN, strm') = fontstyle_NT(strm) + val FULL_SPAN = (#1(fontstyle_SPAN), #2(fontstyle_SPAN)) + in + ((fontstyle_RES), FULL_SPAN, strm') + end + fun inline_PROD_2 (strm) = let + val (phrase_RES, phrase_SPAN, strm') = phrase_NT(strm) + val FULL_SPAN = (#1(phrase_SPAN), #2(phrase_SPAN)) + in + ((phrase_RES), FULL_SPAN, strm') + end + fun inline_PROD_3 (strm) = let + val (special_RES, special_SPAN, strm') = special_NT(strm) + val FULL_SPAN = (#1(special_SPAN), #2(special_SPAN)) + in + ((special_RES), FULL_SPAN, strm') + end + fun inline_PROD_4 (strm) = let + val (formctrl_RES, formctrl_SPAN, strm') = formctrl_NT(strm) + val FULL_SPAN = (#1(formctrl_SPAN), #2(formctrl_SPAN)) + in + ((formctrl_RES), FULL_SPAN, strm') + end + fun inline_PROD_5 (strm) = let + val (cdata_RES, cdata_SPAN, strm') = cdata_NT(strm) + val FULL_SPAN = (#1(cdata_SPAN), #2(cdata_SPAN)) + in + ((cdata_RES), FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => inline_PROD_5(strm) + | (Tok.PCDATA(_), _, strm') => inline_PROD_5(strm) + | (Tok.CHAR_REF(_), _, strm') => inline_PROD_5(strm) + | (Tok.ENTITY_REF(_), _, strm') => inline_PROD_5(strm) + | (Tok.STARTA(_), _, strm') => inline_PROD_3(strm) + | (Tok.STARTAPPLET(_), _, strm') => inline_PROD_3(strm) + | (Tok.STARTBASEFONT(_), _, strm') => inline_PROD_3(strm) + | (Tok.STARTBDO(_), _, strm') => inline_PROD_3(strm) + | (Tok.STARTBR(_), _, strm') => inline_PROD_3(strm) + | (Tok.STARTFONT(_), _, strm') => inline_PROD_3(strm) + | (Tok.STARTIFRAME(_), _, strm') => inline_PROD_3(strm) + | (Tok.STARTIMG(_), _, strm') => inline_PROD_3(strm) + | (Tok.STARTMAP(_), _, strm') => inline_PROD_3(strm) + | (Tok.STARTOBJECT(_), _, strm') => inline_PROD_3(strm) + | (Tok.STARTQ(_), _, strm') => inline_PROD_3(strm) + | (Tok.STARTSCRIPT(_), _, strm') => inline_PROD_3(strm) + | (Tok.STARTSPAN(_), _, strm') => inline_PROD_3(strm) + | (Tok.STARTSUB(_), _, strm') => inline_PROD_3(strm) + | (Tok.STARTSUP(_), _, strm') => inline_PROD_3(strm) + | (Tok.STARTB(_), _, strm') => inline_PROD_1(strm) + | (Tok.STARTBIG(_), _, strm') => inline_PROD_1(strm) + | (Tok.STARTI(_), _, strm') => inline_PROD_1(strm) + | (Tok.STARTS(_), _, strm') => inline_PROD_1(strm) + | (Tok.STARTSMALL(_), _, strm') => inline_PROD_1(strm) + | (Tok.STARTSTRIKE(_), _, strm') => inline_PROD_1(strm) + | (Tok.STARTTT(_), _, strm') => inline_PROD_1(strm) + | (Tok.STARTU(_), _, strm') => inline_PROD_1(strm) + | (Tok.STARTABBR(_), _, strm') => inline_PROD_2(strm) + | (Tok.STARTACRONYM(_), _, strm') => inline_PROD_2(strm) + | (Tok.STARTCITE(_), _, strm') => inline_PROD_2(strm) + | (Tok.STARTCODE(_), _, strm') => inline_PROD_2(strm) + | (Tok.STARTDFN(_), _, strm') => inline_PROD_2(strm) + | (Tok.STARTEM(_), _, strm') => inline_PROD_2(strm) + | (Tok.STARTKBD(_), _, strm') => inline_PROD_2(strm) + | (Tok.STARTSAMP(_), _, strm') => inline_PROD_2(strm) + | (Tok.STARTSTRONG(_), _, strm') => inline_PROD_2(strm) + | (Tok.STARTVAR(_), _, strm') => inline_PROD_2(strm) + | (Tok.STARTBUTTON(_), _, strm') => inline_PROD_4(strm) + | (Tok.STARTINPUT(_), _, strm') => inline_PROD_4(strm) + | (Tok.STARTLABEL(_), _, strm') => inline_PROD_4(strm) + | (Tok.STARTSELECT(_), _, strm') => inline_PROD_4(strm) + | (Tok.STARTTEXTAREA(_), _, strm') => inline_PROD_4(strm) + | _ => fail() + (* end case *)) + end +and formctrl_NT (strm) = let + fun formctrl_PROD_1 (strm) = let + val (input_RES, input_SPAN, strm') = input_NT(strm) + val FULL_SPAN = (#1(input_SPAN), #2(input_SPAN)) + in + ((input_RES), FULL_SPAN, strm') + end + fun formctrl_PROD_2 (strm) = let + val (select_RES, select_SPAN, strm') = select_NT(strm) + val FULL_SPAN = (#1(select_SPAN), #2(select_SPAN)) + in + ((select_RES), FULL_SPAN, strm') + end + fun formctrl_PROD_3 (strm) = let + val (textarea_RES, textarea_SPAN, strm') = textarea_NT(strm) + val FULL_SPAN = (#1(textarea_SPAN), #2(textarea_SPAN)) + in + ((textarea_RES), FULL_SPAN, strm') + end + fun formctrl_PROD_4 (strm) = let + val (label_RES, label_SPAN, strm') = label_NT(strm) + val FULL_SPAN = (#1(label_SPAN), #2(label_SPAN)) + in + ((label_RES), FULL_SPAN, strm') + end + fun formctrl_PROD_5 (strm) = let + val (button_RES, button_SPAN, strm') = button_NT(strm) + val FULL_SPAN = (#1(button_SPAN), #2(button_SPAN)) + in + ((button_RES), FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.STARTBUTTON(_), _, strm') => formctrl_PROD_5(strm) + | (Tok.STARTTEXTAREA(_), _, strm') => formctrl_PROD_3(strm) + | (Tok.STARTINPUT(_), _, strm') => formctrl_PROD_1(strm) + | (Tok.STARTSELECT(_), _, strm') => formctrl_PROD_2(strm) + | (Tok.STARTLABEL(_), _, strm') => formctrl_PROD_4(strm) + | _ => fail() + (* end case *)) + end +and button_NT (strm) = let + val (STARTBUTTON_RES, STARTBUTTON_SPAN, strm') = matchSTARTBUTTON(strm) + fun button_PROD_1_SUBRULE_1_NT (strm) = let + val (flow_RES, flow_SPAN, strm') = flow_NT(strm) + val FULL_SPAN = (#1(flow_SPAN), #2(flow_SPAN)) + in + ((flow_RES), FULL_SPAN, strm') + end + fun button_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTADDRESS(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBLOCKQUOTE(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCENTER(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTDIR(_), _, strm') => true + | (Tok.STARTDIV(_), _, strm') => true + | (Tok.STARTDL(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFIELDSET(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTFORM(_), _, strm') => true + | (Tok.STARTH1(_), _, strm') => true + | (Tok.STARTH2(_), _, strm') => true + | (Tok.STARTH3(_), _, strm') => true + | (Tok.STARTH4(_), _, strm') => true + | (Tok.STARTH5(_), _, strm') => true + | (Tok.STARTH6(_), _, strm') => true + | (Tok.STARTHR(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTISINDEX(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTMENU(_), _, strm') => true + | (Tok.STARTNOSCRIPT(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTOL(_), _, strm') => true + | (Tok.STARTP(_), _, strm') => true + | (Tok.STARTPRE(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTABLE(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTUL(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (flow_RES, flow_SPAN, strm') = EBNF.closure(button_PROD_1_SUBRULE_1_PRED, button_PROD_1_SUBRULE_1_NT, strm') + val (ENDBUTTON_RES, ENDBUTTON_SPAN, strm') = matchENDBUTTON(strm') + val FULL_SPAN = (#1(STARTBUTTON_SPAN), #2(ENDBUTTON_SPAN)) + in + (UserCode.button_PROD_1_ACT (STARTBUTTON_RES, flow_RES, ENDBUTTON_RES, STARTBUTTON_SPAN : (Lex.pos * Lex.pos), flow_SPAN : (Lex.pos * Lex.pos), ENDBUTTON_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and label_NT (strm) = let + val (STARTLABEL_RES, STARTLABEL_SPAN, strm') = matchSTARTLABEL(strm) + fun label_PROD_1_SUBRULE_1_NT (strm) = let + val (inline_RES, inline_SPAN, strm') = inline_NT(strm) + val FULL_SPAN = (#1(inline_SPAN), #2(inline_SPAN)) + in + ((inline_RES), FULL_SPAN, strm') + end + fun label_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (inline_RES, inline_SPAN, strm') = EBNF.closure(label_PROD_1_SUBRULE_1_PRED, label_PROD_1_SUBRULE_1_NT, strm') + val (ENDLABEL_RES, ENDLABEL_SPAN, strm') = matchENDLABEL(strm') + val FULL_SPAN = (#1(STARTLABEL_SPAN), #2(ENDLABEL_SPAN)) + in + (UserCode.label_PROD_1_ACT (inline_RES, ENDLABEL_RES, STARTLABEL_RES, inline_SPAN : (Lex.pos * Lex.pos), ENDLABEL_SPAN : (Lex.pos * Lex.pos), STARTLABEL_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and special_NT (strm) = let + fun special_PROD_1 (strm) = let + val (a_RES, a_SPAN, strm') = a_NT(strm) + val FULL_SPAN = (#1(a_SPAN), #2(a_SPAN)) + in + ((a_RES), FULL_SPAN, strm') + end + fun special_PROD_2 (strm) = let + val (img_RES, img_SPAN, strm') = img_NT(strm) + val FULL_SPAN = (#1(img_SPAN), #2(img_SPAN)) + in + ((img_RES), FULL_SPAN, strm') + end + fun special_PROD_3 (strm) = let + val (object_RES, object_SPAN, strm') = object_NT(strm) + val FULL_SPAN = (#1(object_SPAN), #2(object_SPAN)) + in + ((object_RES), FULL_SPAN, strm') + end + fun special_PROD_4 (strm) = let + val (br_RES, br_SPAN, strm') = br_NT(strm) + val FULL_SPAN = (#1(br_SPAN), #2(br_SPAN)) + in + ((br_RES), FULL_SPAN, strm') + end + fun special_PROD_5 (strm) = let + val (script_RES, script_SPAN, strm') = script_NT(strm) + val FULL_SPAN = (#1(script_SPAN), #2(script_SPAN)) + in + ((script_RES), FULL_SPAN, strm') + end + fun special_PROD_6 (strm) = let + val (map_RES, map_SPAN, strm') = map_NT(strm) + val FULL_SPAN = (#1(map_SPAN), #2(map_SPAN)) + in + ((map_RES), FULL_SPAN, strm') + end + fun special_PROD_7 (strm) = let + val (q_RES, q_SPAN, strm') = q_NT(strm) + val FULL_SPAN = (#1(q_SPAN), #2(q_SPAN)) + in + ((q_RES), FULL_SPAN, strm') + end + fun special_PROD_8 (strm) = let + val (sub_RES, sub_SPAN, strm') = sub_NT(strm) + val FULL_SPAN = (#1(sub_SPAN), #2(sub_SPAN)) + in + ((sub_RES), FULL_SPAN, strm') + end + fun special_PROD_9 (strm) = let + val (sup_RES, sup_SPAN, strm') = sup_NT(strm) + val FULL_SPAN = (#1(sup_SPAN), #2(sup_SPAN)) + in + ((sup_RES), FULL_SPAN, strm') + end + fun special_PROD_10 (strm) = let + val (span_RES, span_SPAN, strm') = span_NT(strm) + val FULL_SPAN = (#1(span_SPAN), #2(span_SPAN)) + in + ((span_RES), FULL_SPAN, strm') + end + fun special_PROD_11 (strm) = let + val (bdo_RES, bdo_SPAN, strm') = bdo_NT(strm) + val FULL_SPAN = (#1(bdo_SPAN), #2(bdo_SPAN)) + in + ((bdo_RES), FULL_SPAN, strm') + end + fun special_PROD_12 (strm) = let + val (special_loose_RES, special_loose_SPAN, strm') = special_loose_NT(strm) + val FULL_SPAN = (#1(special_loose_SPAN), #2(special_loose_SPAN)) + in + ((special_loose_RES), FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.STARTAPPLET(_), _, strm') => special_PROD_12(strm) + | (Tok.STARTBASEFONT(_), _, strm') => special_PROD_12(strm) + | (Tok.STARTFONT(_), _, strm') => special_PROD_12(strm) + | (Tok.STARTIFRAME(_), _, strm') => special_PROD_12(strm) + | (Tok.STARTSPAN(_), _, strm') => special_PROD_10(strm) + | (Tok.STARTSUB(_), _, strm') => special_PROD_8(strm) + | (Tok.STARTMAP(_), _, strm') => special_PROD_6(strm) + | (Tok.STARTBR(_), _, strm') => special_PROD_4(strm) + | (Tok.STARTIMG(_), _, strm') => special_PROD_2(strm) + | (Tok.STARTA(_), _, strm') => special_PROD_1(strm) + | (Tok.STARTOBJECT(_), _, strm') => special_PROD_3(strm) + | (Tok.STARTSCRIPT(_), _, strm') => special_PROD_5(strm) + | (Tok.STARTQ(_), _, strm') => special_PROD_7(strm) + | (Tok.STARTSUP(_), _, strm') => special_PROD_9(strm) + | (Tok.STARTBDO(_), _, strm') => special_PROD_11(strm) + | _ => fail() + (* end case *)) + end +and special_loose_NT (strm) = let + fun special_loose_PROD_1 (strm) = let + val (applet_RES, applet_SPAN, strm') = applet_NT(strm) + val FULL_SPAN = (#1(applet_SPAN), #2(applet_SPAN)) + in + ((applet_RES), FULL_SPAN, strm') + end + fun special_loose_PROD_2 (strm) = let + val (basefont_RES, basefont_SPAN, strm') = basefont_NT(strm) + val FULL_SPAN = (#1(basefont_SPAN), #2(basefont_SPAN)) + in + ((basefont_RES), FULL_SPAN, strm') + end + fun special_loose_PROD_3 (strm) = let + val (font_RES, font_SPAN, strm') = font_NT(strm) + val FULL_SPAN = (#1(font_SPAN), #2(font_SPAN)) + in + ((font_RES), FULL_SPAN, strm') + end + fun special_loose_PROD_4 (strm) = let + val (iframe_RES, iframe_SPAN, strm') = iframe_NT(strm) + val FULL_SPAN = (#1(iframe_SPAN), #2(iframe_SPAN)) + in + ((iframe_RES), FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.STARTIFRAME(_), _, strm') => special_loose_PROD_4(strm) + | (Tok.STARTBASEFONT(_), _, strm') => special_loose_PROD_2(strm) + | (Tok.STARTAPPLET(_), _, strm') => special_loose_PROD_1(strm) + | (Tok.STARTFONT(_), _, strm') => special_loose_PROD_3(strm) + | _ => fail() + (* end case *)) + end +and iframe_NT (strm) = let + val (STARTIFRAME_RES, STARTIFRAME_SPAN, strm') = matchSTARTIFRAME(strm) + fun iframe_PROD_1_SUBRULE_1_NT (strm) = let + val (flow_RES, flow_SPAN, strm') = flow_NT(strm) + val FULL_SPAN = (#1(flow_SPAN), #2(flow_SPAN)) + in + ((flow_RES), FULL_SPAN, strm') + end + fun iframe_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTADDRESS(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBLOCKQUOTE(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCENTER(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTDIR(_), _, strm') => true + | (Tok.STARTDIV(_), _, strm') => true + | (Tok.STARTDL(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFIELDSET(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTFORM(_), _, strm') => true + | (Tok.STARTH1(_), _, strm') => true + | (Tok.STARTH2(_), _, strm') => true + | (Tok.STARTH3(_), _, strm') => true + | (Tok.STARTH4(_), _, strm') => true + | (Tok.STARTH5(_), _, strm') => true + | (Tok.STARTH6(_), _, strm') => true + | (Tok.STARTHR(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTISINDEX(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTMENU(_), _, strm') => true + | (Tok.STARTNOSCRIPT(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTOL(_), _, strm') => true + | (Tok.STARTP(_), _, strm') => true + | (Tok.STARTPRE(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTABLE(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTUL(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (flow_RES, flow_SPAN, strm') = EBNF.closure(iframe_PROD_1_SUBRULE_1_PRED, iframe_PROD_1_SUBRULE_1_NT, strm') + val (ENDIFRAME_RES, ENDIFRAME_SPAN, strm') = matchENDIFRAME(strm') + val FULL_SPAN = (#1(STARTIFRAME_SPAN), #2(ENDIFRAME_SPAN)) + in + (UserCode.iframe_PROD_1_ACT (ENDIFRAME_RES, STARTIFRAME_RES, flow_RES, ENDIFRAME_SPAN : (Lex.pos * Lex.pos), STARTIFRAME_SPAN : (Lex.pos * Lex.pos), flow_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and font_NT (strm) = let + val (STARTFONT_RES, STARTFONT_SPAN, strm') = matchSTARTFONT(strm) + fun font_PROD_1_SUBRULE_1_NT (strm) = let + val (inline_RES, inline_SPAN, strm') = inline_NT(strm) + val FULL_SPAN = (#1(inline_SPAN), #2(inline_SPAN)) + in + ((inline_RES), FULL_SPAN, strm') + end + fun font_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (inline_RES, inline_SPAN, strm') = EBNF.closure(font_PROD_1_SUBRULE_1_PRED, font_PROD_1_SUBRULE_1_NT, strm') + val (ENDFONT_RES, ENDFONT_SPAN, strm') = matchENDFONT(strm') + val FULL_SPAN = (#1(STARTFONT_SPAN), #2(ENDFONT_SPAN)) + in + (UserCode.font_PROD_1_ACT (inline_RES, ENDFONT_RES, STARTFONT_RES, inline_SPAN : (Lex.pos * Lex.pos), ENDFONT_SPAN : (Lex.pos * Lex.pos), STARTFONT_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and applet_NT (strm) = let + val (STARTAPPLET_RES, STARTAPPLET_SPAN, strm') = matchSTARTAPPLET(strm) + fun applet_PROD_1_SUBRULE_1_NT (strm) = let + fun applet_PROD_1_SUBRULE_1_PROD_1 (strm) = let + val (param_RES, param_SPAN, strm') = param_NT(strm) + val FULL_SPAN = (#1(param_SPAN), #2(param_SPAN)) + in + ((param_RES), FULL_SPAN, strm') + end + fun applet_PROD_1_SUBRULE_1_PROD_2 (strm) = let + val (flow_RES, flow_SPAN, strm') = flow_NT(strm) + val FULL_SPAN = (#1(flow_SPAN), #2(flow_SPAN)) + in + ((flow_RES), FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.PCDATA(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.CHAR_REF(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.ENTITY_REF(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTA(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTABBR(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTACRONYM(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTADDRESS(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTAPPLET(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTB(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTBASEFONT(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTBDO(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTBIG(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTBLOCKQUOTE(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTBR(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTBUTTON(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTCENTER(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTCITE(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTCODE(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTDFN(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTDIR(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTDIV(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTDL(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTEM(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTFIELDSET(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTFONT(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTFORM(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTH1(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTH2(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTH3(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTH4(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTH5(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTH6(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTHR(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTI(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTIFRAME(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTIMG(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTINPUT(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTISINDEX(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTKBD(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTLABEL(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTMAP(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTMENU(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTNOSCRIPT(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTOBJECT(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTOL(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTP(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTPRE(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTQ(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTS(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTSAMP(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTSCRIPT(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTSELECT(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTSMALL(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTSPAN(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTSTRIKE(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTSTRONG(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTSUB(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTSUP(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTTABLE(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTTEXTAREA(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTTT(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTU(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTUL(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTVAR(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTPARAM(_), _, strm') => + applet_PROD_1_SUBRULE_1_PROD_1(strm) + | _ => fail() + (* end case *)) + end + fun applet_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTADDRESS(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBLOCKQUOTE(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCENTER(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTDIR(_), _, strm') => true + | (Tok.STARTDIV(_), _, strm') => true + | (Tok.STARTDL(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFIELDSET(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTFORM(_), _, strm') => true + | (Tok.STARTH1(_), _, strm') => true + | (Tok.STARTH2(_), _, strm') => true + | (Tok.STARTH3(_), _, strm') => true + | (Tok.STARTH4(_), _, strm') => true + | (Tok.STARTH5(_), _, strm') => true + | (Tok.STARTH6(_), _, strm') => true + | (Tok.STARTHR(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTISINDEX(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTMENU(_), _, strm') => true + | (Tok.STARTNOSCRIPT(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTOL(_), _, strm') => true + | (Tok.STARTP(_), _, strm') => true + | (Tok.STARTPARAM(_), _, strm') => true + | (Tok.STARTPRE(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTABLE(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTUL(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.closure(applet_PROD_1_SUBRULE_1_PRED, applet_PROD_1_SUBRULE_1_NT, strm') + val (ENDAPPLET_RES, ENDAPPLET_SPAN, strm') = matchENDAPPLET(strm') + val FULL_SPAN = (#1(STARTAPPLET_SPAN), #2(ENDAPPLET_SPAN)) + in + (UserCode.applet_PROD_1_ACT (SR_RES, ENDAPPLET_RES, STARTAPPLET_RES, SR_SPAN : (Lex.pos * Lex.pos), ENDAPPLET_SPAN : (Lex.pos * Lex.pos), STARTAPPLET_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and bdo_NT (strm) = let + val (STARTBDO_RES, STARTBDO_SPAN, strm') = matchSTARTBDO(strm) + fun bdo_PROD_1_SUBRULE_1_NT (strm) = let + val (inline_RES, inline_SPAN, strm') = inline_NT(strm) + val FULL_SPAN = (#1(inline_SPAN), #2(inline_SPAN)) + in + ((inline_RES), FULL_SPAN, strm') + end + fun bdo_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (inline_RES, inline_SPAN, strm') = EBNF.closure(bdo_PROD_1_SUBRULE_1_PRED, bdo_PROD_1_SUBRULE_1_NT, strm') + val (ENDBDO_RES, ENDBDO_SPAN, strm') = matchENDBDO(strm') + val FULL_SPAN = (#1(STARTBDO_SPAN), #2(ENDBDO_SPAN)) + in + (UserCode.bdo_PROD_1_ACT (inline_RES, ENDBDO_RES, STARTBDO_RES, inline_SPAN : (Lex.pos * Lex.pos), ENDBDO_SPAN : (Lex.pos * Lex.pos), STARTBDO_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and span_NT (strm) = let + val (STARTSPAN_RES, STARTSPAN_SPAN, strm') = matchSTARTSPAN(strm) + fun span_PROD_1_SUBRULE_1_NT (strm) = let + val (inline_RES, inline_SPAN, strm') = inline_NT(strm) + val FULL_SPAN = (#1(inline_SPAN), #2(inline_SPAN)) + in + ((inline_RES), FULL_SPAN, strm') + end + fun span_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (inline_RES, inline_SPAN, strm') = EBNF.closure(span_PROD_1_SUBRULE_1_PRED, span_PROD_1_SUBRULE_1_NT, strm') + val (ENDSPAN_RES, ENDSPAN_SPAN, strm') = matchENDSPAN(strm') + val FULL_SPAN = (#1(STARTSPAN_SPAN), #2(ENDSPAN_SPAN)) + in + (UserCode.span_PROD_1_ACT (inline_RES, STARTSPAN_RES, ENDSPAN_RES, inline_SPAN : (Lex.pos * Lex.pos), STARTSPAN_SPAN : (Lex.pos * Lex.pos), ENDSPAN_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and sup_NT (strm) = let + val (STARTSUP_RES, STARTSUP_SPAN, strm') = matchSTARTSUP(strm) + fun sup_PROD_1_SUBRULE_1_NT (strm) = let + val (inline_RES, inline_SPAN, strm') = inline_NT(strm) + val FULL_SPAN = (#1(inline_SPAN), #2(inline_SPAN)) + in + ((inline_RES), FULL_SPAN, strm') + end + fun sup_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (inline_RES, inline_SPAN, strm') = EBNF.closure(sup_PROD_1_SUBRULE_1_PRED, sup_PROD_1_SUBRULE_1_NT, strm') + val (ENDSUP_RES, ENDSUP_SPAN, strm') = matchENDSUP(strm') + val FULL_SPAN = (#1(STARTSUP_SPAN), #2(ENDSUP_SPAN)) + in + (UserCode.sup_PROD_1_ACT (inline_RES, ENDSUP_RES, STARTSUP_RES, inline_SPAN : (Lex.pos * Lex.pos), ENDSUP_SPAN : (Lex.pos * Lex.pos), STARTSUP_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and sub_NT (strm) = let + val (STARTSUB_RES, STARTSUB_SPAN, strm') = matchSTARTSUB(strm) + fun sub_PROD_1_SUBRULE_1_NT (strm) = let + val (inline_RES, inline_SPAN, strm') = inline_NT(strm) + val FULL_SPAN = (#1(inline_SPAN), #2(inline_SPAN)) + in + ((inline_RES), FULL_SPAN, strm') + end + fun sub_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (inline_RES, inline_SPAN, strm') = EBNF.closure(sub_PROD_1_SUBRULE_1_PRED, sub_PROD_1_SUBRULE_1_NT, strm') + val (ENDSUB_RES, ENDSUB_SPAN, strm') = matchENDSUB(strm') + val FULL_SPAN = (#1(STARTSUB_SPAN), #2(ENDSUB_SPAN)) + in + (UserCode.sub_PROD_1_ACT (inline_RES, ENDSUB_RES, STARTSUB_RES, inline_SPAN : (Lex.pos * Lex.pos), ENDSUB_SPAN : (Lex.pos * Lex.pos), STARTSUB_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and q_NT (strm) = let + val (STARTQ_RES, STARTQ_SPAN, strm') = matchSTARTQ(strm) + fun q_PROD_1_SUBRULE_1_NT (strm) = let + val (inline_RES, inline_SPAN, strm') = inline_NT(strm) + val FULL_SPAN = (#1(inline_SPAN), #2(inline_SPAN)) + in + ((inline_RES), FULL_SPAN, strm') + end + fun q_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (inline_RES, inline_SPAN, strm') = EBNF.closure(q_PROD_1_SUBRULE_1_PRED, q_PROD_1_SUBRULE_1_NT, strm') + val (ENDQ_RES, ENDQ_SPAN, strm') = matchENDQ(strm') + val FULL_SPAN = (#1(STARTQ_SPAN), #2(ENDQ_SPAN)) + in + (UserCode.q_PROD_1_ACT (inline_RES, STARTQ_RES, ENDQ_RES, inline_SPAN : (Lex.pos * Lex.pos), STARTQ_SPAN : (Lex.pos * Lex.pos), ENDQ_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and map_NT (strm) = let + val (STARTMAP_RES, STARTMAP_SPAN, strm') = matchSTARTMAP(strm) + fun map_PROD_1_SUBRULE_1_NT (strm) = let + fun map_PROD_1_SUBRULE_1_PROD_1 (strm) = let + val (cdata_RES, cdata_SPAN, strm') = cdata_NT(strm) + val FULL_SPAN = (#1(cdata_SPAN), #2(cdata_SPAN)) + in + ((cdata_RES), FULL_SPAN, strm') + end + fun map_PROD_1_SUBRULE_1_PROD_2 (strm) = let + val (block_RES, block_SPAN, strm') = block_NT(strm) + val FULL_SPAN = (#1(block_SPAN), #2(block_SPAN)) + in + ((block_RES), FULL_SPAN, strm') + end + fun map_PROD_1_SUBRULE_1_PROD_3 (strm) = let + val (area_RES, area_SPAN, strm') = area_NT(strm) + val FULL_SPAN = (#1(area_SPAN), #2(area_SPAN)) + in + ((area_RES), FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.STARTAREA(_), _, strm') => + map_PROD_1_SUBRULE_1_PROD_3(strm) + | (Tok.COMMENT(_), _, strm') => + map_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.PCDATA(_), _, strm') => + map_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.CHAR_REF(_), _, strm') => + map_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.ENTITY_REF(_), _, strm') => + map_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTADDRESS(_), _, strm') => + map_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTBLOCKQUOTE(_), _, strm') => + map_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTCENTER(_), _, strm') => + map_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTDIR(_), _, strm') => + map_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTDIV(_), _, strm') => + map_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTDL(_), _, strm') => + map_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTFIELDSET(_), _, strm') => + map_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTFORM(_), _, strm') => + map_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTH1(_), _, strm') => + map_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTH2(_), _, strm') => + map_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTH3(_), _, strm') => + map_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTH4(_), _, strm') => + map_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTH5(_), _, strm') => + map_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTH6(_), _, strm') => + map_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTHR(_), _, strm') => + map_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTISINDEX(_), _, strm') => + map_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTMENU(_), _, strm') => + map_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTNOSCRIPT(_), _, strm') => + map_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTOL(_), _, strm') => + map_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTP(_), _, strm') => + map_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTPRE(_), _, strm') => + map_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTTABLE(_), _, strm') => + map_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTUL(_), _, strm') => + map_PROD_1_SUBRULE_1_PROD_2(strm) + | _ => fail() + (* end case *)) + end + fun map_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTADDRESS(_), _, strm') => true + | (Tok.STARTAREA(_), _, strm') => true + | (Tok.STARTBLOCKQUOTE(_), _, strm') => true + | (Tok.STARTCENTER(_), _, strm') => true + | (Tok.STARTDIR(_), _, strm') => true + | (Tok.STARTDIV(_), _, strm') => true + | (Tok.STARTDL(_), _, strm') => true + | (Tok.STARTFIELDSET(_), _, strm') => true + | (Tok.STARTFORM(_), _, strm') => true + | (Tok.STARTH1(_), _, strm') => true + | (Tok.STARTH2(_), _, strm') => true + | (Tok.STARTH3(_), _, strm') => true + | (Tok.STARTH4(_), _, strm') => true + | (Tok.STARTH5(_), _, strm') => true + | (Tok.STARTH6(_), _, strm') => true + | (Tok.STARTHR(_), _, strm') => true + | (Tok.STARTISINDEX(_), _, strm') => true + | (Tok.STARTMENU(_), _, strm') => true + | (Tok.STARTNOSCRIPT(_), _, strm') => true + | (Tok.STARTOL(_), _, strm') => true + | (Tok.STARTP(_), _, strm') => true + | (Tok.STARTPRE(_), _, strm') => true + | (Tok.STARTTABLE(_), _, strm') => true + | (Tok.STARTUL(_), _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.posclos(map_PROD_1_SUBRULE_1_PRED, map_PROD_1_SUBRULE_1_NT, strm') + val (ENDMAP_RES, ENDMAP_SPAN, strm') = matchENDMAP(strm') + val FULL_SPAN = (#1(STARTMAP_SPAN), #2(ENDMAP_SPAN)) + in + (UserCode.map_PROD_1_ACT (SR_RES, STARTMAP_RES, ENDMAP_RES, SR_SPAN : (Lex.pos * Lex.pos), STARTMAP_SPAN : (Lex.pos * Lex.pos), ENDMAP_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and block_NT (strm) = let + fun block_PROD_1 (strm) = let + val (p_RES, p_SPAN, strm') = p_NT(strm) + val FULL_SPAN = (#1(p_SPAN), #2(p_SPAN)) + in + ((p_RES), FULL_SPAN, strm') + end + fun block_PROD_2 (strm) = let + val (heading_RES, heading_SPAN, strm') = heading_NT(strm) + val FULL_SPAN = (#1(heading_SPAN), #2(heading_SPAN)) + in + ((heading_RES), FULL_SPAN, strm') + end + fun block_PROD_3 (strm) = let + val (list_RES, list_SPAN, strm') = list_NT(strm) + val FULL_SPAN = (#1(list_SPAN), #2(list_SPAN)) + in + ((list_RES), FULL_SPAN, strm') + end + fun block_PROD_4 (strm) = let + val (preformatted_RES, preformatted_SPAN, strm') = preformatted_NT(strm) + val FULL_SPAN = (#1(preformatted_SPAN), #2(preformatted_SPAN)) + in + ((preformatted_RES), FULL_SPAN, strm') + end + fun block_PROD_5 (strm) = let + val (dl_RES, dl_SPAN, strm') = dl_NT(strm) + val FULL_SPAN = (#1(dl_SPAN), #2(dl_SPAN)) + in + ((dl_RES), FULL_SPAN, strm') + end + fun block_PROD_6 (strm) = let + val (div_RES, div_SPAN, strm') = div_NT(strm) + val FULL_SPAN = (#1(div_SPAN), #2(div_SPAN)) + in + ((div_RES), FULL_SPAN, strm') + end + fun block_PROD_7 (strm) = let + val (noscript_RES, noscript_SPAN, strm') = noscript_NT(strm) + val FULL_SPAN = (#1(noscript_SPAN), #2(noscript_SPAN)) + in + ((noscript_RES), FULL_SPAN, strm') + end + fun block_PROD_8 (strm) = let + val (blockquote_RES, blockquote_SPAN, strm') = blockquote_NT(strm) + val FULL_SPAN = (#1(blockquote_SPAN), #2(blockquote_SPAN)) + in + ((blockquote_RES), FULL_SPAN, strm') + end + fun block_PROD_9 (strm) = let + val (form_RES, form_SPAN, strm') = form_NT(strm) + val FULL_SPAN = (#1(form_SPAN), #2(form_SPAN)) + in + ((form_RES), FULL_SPAN, strm') + end + fun block_PROD_10 (strm) = let + val (hr_RES, hr_SPAN, strm') = hr_NT(strm) + val FULL_SPAN = (#1(hr_SPAN), #2(hr_SPAN)) + in + ((hr_RES), FULL_SPAN, strm') + end + fun block_PROD_11 (strm) = let + val (table_RES, table_SPAN, strm') = table_NT(strm) + val FULL_SPAN = (#1(table_SPAN), #2(table_SPAN)) + in + ((table_RES), FULL_SPAN, strm') + end + fun block_PROD_12 (strm) = let + val (fieldset_RES, fieldset_SPAN, strm') = fieldset_NT(strm) + val FULL_SPAN = (#1(fieldset_SPAN), #2(fieldset_SPAN)) + in + ((fieldset_RES), FULL_SPAN, strm') + end + fun block_PROD_13 (strm) = let + val (address_RES, address_SPAN, strm') = address_NT(strm) + val FULL_SPAN = (#1(address_SPAN), #2(address_SPAN)) + in + ((address_RES), FULL_SPAN, strm') + end + fun block_PROD_14 (strm) = let + val (block_loose_RES, block_loose_SPAN, strm') = block_loose_NT(strm) + val FULL_SPAN = (#1(block_loose_SPAN), #2(block_loose_SPAN)) + in + ((block_loose_RES), FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.STARTCENTER(_), _, strm') => block_PROD_14(strm) + | (Tok.STARTISINDEX(_), _, strm') => block_PROD_14(strm) + | (Tok.STARTFIELDSET(_), _, strm') => block_PROD_12(strm) + | (Tok.STARTHR(_), _, strm') => block_PROD_10(strm) + | (Tok.STARTBLOCKQUOTE(_), _, strm') => block_PROD_8(strm) + | (Tok.STARTDIV(_), _, strm') => block_PROD_6(strm) + | (Tok.STARTPRE(_), _, strm') => block_PROD_4(strm) + | (Tok.STARTH1(_), _, strm') => block_PROD_2(strm) + | (Tok.STARTH2(_), _, strm') => block_PROD_2(strm) + | (Tok.STARTH3(_), _, strm') => block_PROD_2(strm) + | (Tok.STARTH4(_), _, strm') => block_PROD_2(strm) + | (Tok.STARTH5(_), _, strm') => block_PROD_2(strm) + | (Tok.STARTH6(_), _, strm') => block_PROD_2(strm) + | (Tok.STARTP(_), _, strm') => block_PROD_1(strm) + | (Tok.STARTDIR(_), _, strm') => block_PROD_3(strm) + | (Tok.STARTMENU(_), _, strm') => block_PROD_3(strm) + | (Tok.STARTOL(_), _, strm') => block_PROD_3(strm) + | (Tok.STARTUL(_), _, strm') => block_PROD_3(strm) + | (Tok.STARTDL(_), _, strm') => block_PROD_5(strm) + | (Tok.STARTNOSCRIPT(_), _, strm') => block_PROD_7(strm) + | (Tok.STARTFORM(_), _, strm') => block_PROD_9(strm) + | (Tok.STARTTABLE(_), _, strm') => block_PROD_11(strm) + | (Tok.STARTADDRESS(_), _, strm') => block_PROD_13(strm) + | _ => fail() + (* end case *)) + end +and block_loose_NT (strm) = let + fun block_loose_PROD_1 (strm) = let + val (center_RES, center_SPAN, strm') = center_NT(strm) + val FULL_SPAN = (#1(center_SPAN), #2(center_SPAN)) + in + ((center_RES), FULL_SPAN, strm') + end + fun block_loose_PROD_2 (strm) = let + val (isindex_RES, isindex_SPAN, strm') = isindex_NT(strm) + val FULL_SPAN = (#1(isindex_SPAN), #2(isindex_SPAN)) + in + ((isindex_RES), FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.STARTISINDEX(_), _, strm') => block_loose_PROD_2(strm) + | (Tok.STARTCENTER(_), _, strm') => block_loose_PROD_1(strm) + | _ => fail() + (* end case *)) + end +and center_NT (strm) = let + val (STARTCENTER_RES, STARTCENTER_SPAN, strm') = matchSTARTCENTER(strm) + fun center_PROD_1_SUBRULE_1_NT (strm) = let + val (flow_RES, flow_SPAN, strm') = flow_NT(strm) + val FULL_SPAN = (#1(flow_SPAN), #2(flow_SPAN)) + in + ((flow_RES), FULL_SPAN, strm') + end + fun center_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTADDRESS(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBLOCKQUOTE(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCENTER(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTDIR(_), _, strm') => true + | (Tok.STARTDIV(_), _, strm') => true + | (Tok.STARTDL(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFIELDSET(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTFORM(_), _, strm') => true + | (Tok.STARTH1(_), _, strm') => true + | (Tok.STARTH2(_), _, strm') => true + | (Tok.STARTH3(_), _, strm') => true + | (Tok.STARTH4(_), _, strm') => true + | (Tok.STARTH5(_), _, strm') => true + | (Tok.STARTH6(_), _, strm') => true + | (Tok.STARTHR(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTISINDEX(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTMENU(_), _, strm') => true + | (Tok.STARTNOSCRIPT(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTOL(_), _, strm') => true + | (Tok.STARTP(_), _, strm') => true + | (Tok.STARTPRE(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTABLE(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTUL(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (flow_RES, flow_SPAN, strm') = EBNF.closure(center_PROD_1_SUBRULE_1_PRED, center_PROD_1_SUBRULE_1_NT, strm') + val (ENDCENTER_RES, ENDCENTER_SPAN, strm') = matchENDCENTER(strm') + val FULL_SPAN = (#1(STARTCENTER_SPAN), #2(ENDCENTER_SPAN)) + in + (UserCode.center_PROD_1_ACT (ENDCENTER_RES, flow_RES, STARTCENTER_RES, ENDCENTER_SPAN : (Lex.pos * Lex.pos), flow_SPAN : (Lex.pos * Lex.pos), STARTCENTER_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and address_NT (strm) = let + val (STARTADDRESS_RES, STARTADDRESS_SPAN, strm') = matchSTARTADDRESS(strm) + fun address_PROD_1_SUBRULE_1_NT (strm) = let + val (inline_RES, inline_SPAN, strm') = inline_NT(strm) + val FULL_SPAN = (#1(inline_SPAN), #2(inline_SPAN)) + in + ((inline_RES), FULL_SPAN, strm') + end + fun address_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (inline_RES, inline_SPAN, strm') = EBNF.closure(address_PROD_1_SUBRULE_1_PRED, address_PROD_1_SUBRULE_1_NT, strm') + val (ENDADDRESS_RES, ENDADDRESS_SPAN, strm') = matchENDADDRESS(strm') + val FULL_SPAN = (#1(STARTADDRESS_SPAN), #2(ENDADDRESS_SPAN)) + in + (UserCode.address_PROD_1_ACT (inline_RES, STARTADDRESS_RES, ENDADDRESS_RES, inline_SPAN : (Lex.pos * Lex.pos), STARTADDRESS_SPAN : (Lex.pos * Lex.pos), ENDADDRESS_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and fieldset_NT (strm) = let + val (STARTFIELDSET_RES, STARTFIELDSET_SPAN, strm') = matchSTARTFIELDSET(strm) + val (cdata_opt_RES, cdata_opt_SPAN, strm') = cdata_opt_NT(strm') + val (legend_RES, legend_SPAN, strm') = legend_NT(strm') + fun fieldset_PROD_1_SUBRULE_1_NT (strm) = let + val (flow_RES, flow_SPAN, strm') = flow_NT(strm) + val FULL_SPAN = (#1(flow_SPAN), #2(flow_SPAN)) + in + ((flow_RES), FULL_SPAN, strm') + end + fun fieldset_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTADDRESS(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBLOCKQUOTE(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCENTER(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTDIR(_), _, strm') => true + | (Tok.STARTDIV(_), _, strm') => true + | (Tok.STARTDL(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFIELDSET(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTFORM(_), _, strm') => true + | (Tok.STARTH1(_), _, strm') => true + | (Tok.STARTH2(_), _, strm') => true + | (Tok.STARTH3(_), _, strm') => true + | (Tok.STARTH4(_), _, strm') => true + | (Tok.STARTH5(_), _, strm') => true + | (Tok.STARTH6(_), _, strm') => true + | (Tok.STARTHR(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTISINDEX(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTMENU(_), _, strm') => true + | (Tok.STARTNOSCRIPT(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTOL(_), _, strm') => true + | (Tok.STARTP(_), _, strm') => true + | (Tok.STARTPRE(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTABLE(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTUL(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (flow_RES, flow_SPAN, strm') = EBNF.closure(fieldset_PROD_1_SUBRULE_1_PRED, fieldset_PROD_1_SUBRULE_1_NT, strm') + val (ENDFIELDSET_RES, ENDFIELDSET_SPAN, strm') = matchENDFIELDSET(strm') + val FULL_SPAN = (#1(STARTFIELDSET_SPAN), #2(ENDFIELDSET_SPAN)) + in + (UserCode.fieldset_PROD_1_ACT (cdata_opt_RES, legend_RES, flow_RES, ENDFIELDSET_RES, STARTFIELDSET_RES, cdata_opt_SPAN : (Lex.pos * Lex.pos), legend_SPAN : (Lex.pos * Lex.pos), flow_SPAN : (Lex.pos * Lex.pos), ENDFIELDSET_SPAN : (Lex.pos * Lex.pos), STARTFIELDSET_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and legend_NT (strm) = let + val (STARTLEGEND_RES, STARTLEGEND_SPAN, strm') = matchSTARTLEGEND(strm) + fun legend_PROD_1_SUBRULE_1_NT (strm) = let + val (inline_RES, inline_SPAN, strm') = inline_NT(strm) + val FULL_SPAN = (#1(inline_SPAN), #2(inline_SPAN)) + in + ((inline_RES), FULL_SPAN, strm') + end + fun legend_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (inline_RES, inline_SPAN, strm') = EBNF.closure(legend_PROD_1_SUBRULE_1_PRED, legend_PROD_1_SUBRULE_1_NT, strm') + val (ENDLEGEND_RES, ENDLEGEND_SPAN, strm') = matchENDLEGEND(strm') + val FULL_SPAN = (#1(STARTLEGEND_SPAN), #2(ENDLEGEND_SPAN)) + in + (UserCode.legend_PROD_1_ACT (inline_RES, ENDLEGEND_RES, STARTLEGEND_RES, inline_SPAN : (Lex.pos * Lex.pos), ENDLEGEND_SPAN : (Lex.pos * Lex.pos), STARTLEGEND_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and table_NT (strm) = let + val (STARTTABLE_RES, STARTTABLE_SPAN, strm') = matchSTARTTABLE(strm) + val (cdata_opt_RES, cdata_opt_SPAN, strm') = cdata_opt_NT(strm') + fun table_PROD_1_SUBRULE_1_NT (strm) = let + val (caption_RES, caption_SPAN, strm') = caption_NT(strm) + val (cdata_opt_RES, cdata_opt_SPAN, strm') = cdata_opt_NT(strm') + val FULL_SPAN = (#1(caption_SPAN), #2(cdata_opt_SPAN)) + in + (UserCode.table_PROD_1_SUBRULE_1_PROD_1_ACT (STARTTABLE_RES, cdata_opt_RES, caption_RES, STARTTABLE_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), caption_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun table_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.STARTCAPTION(_), _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.optional(table_PROD_1_SUBRULE_1_PRED, table_PROD_1_SUBRULE_1_NT, strm') + val (col_or_colgroups_RES, col_or_colgroups_SPAN, strm') = col_or_colgroups_NT(strm') + val (table_content_RES, table_content_SPAN, strm') = table_content_NT(strm') + val (ENDTABLE_RES, ENDTABLE_SPAN, strm') = matchENDTABLE(strm') + val FULL_SPAN = (#1(STARTTABLE_SPAN), #2(ENDTABLE_SPAN)) + in + (UserCode.table_PROD_1_ACT (SR_RES, STARTTABLE_RES, cdata_opt_RES, table_content_RES, ENDTABLE_RES, col_or_colgroups_RES, SR_SPAN : (Lex.pos * Lex.pos), STARTTABLE_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), table_content_SPAN : (Lex.pos * Lex.pos), ENDTABLE_SPAN : (Lex.pos * Lex.pos), col_or_colgroups_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and table_content_NT (strm) = let + fun table_content_PROD_1 (strm) = let + val (thead_RES, thead_SPAN, strm') = thead_NT(strm) + fun table_content_PROD_1_SUBRULE_1_NT (strm) = let + val (tfoot_RES, tfoot_SPAN, strm') = tfoot_NT(strm) + val FULL_SPAN = (#1(tfoot_SPAN), #2(tfoot_SPAN)) + in + ((tfoot_RES), FULL_SPAN, strm') + end + fun table_content_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.STARTTFOOT(_), _, strm') => true + | _ => false + (* end case *)) + val (tfoot_RES, tfoot_SPAN, strm') = EBNF.optional(table_content_PROD_1_SUBRULE_1_PRED, table_content_PROD_1_SUBRULE_1_NT, strm') + val (tbodies_RES, tbodies_SPAN, strm') = tbodies_NT(strm') + val FULL_SPAN = (#1(thead_SPAN), #2(tbodies_SPAN)) + in + (UserCode.table_content_PROD_1_ACT (tbodies_RES, tfoot_RES, thead_RES, tbodies_SPAN : (Lex.pos * Lex.pos), tfoot_SPAN : (Lex.pos * Lex.pos), thead_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun table_content_PROD_2 (strm) = let + val (tfoot_RES, tfoot_SPAN, strm') = tfoot_NT(strm) + val (tbodies_RES, tbodies_SPAN, strm') = tbodies_NT(strm') + val FULL_SPAN = (#1(tfoot_SPAN), #2(tbodies_SPAN)) + in + (UserCode.table_content_PROD_2_ACT (tbodies_RES, tfoot_RES, tbodies_SPAN : (Lex.pos * Lex.pos), tfoot_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun table_content_PROD_3 (strm) = let + val (tbodies_nostart_RES, tbodies_nostart_SPAN, strm') = tbodies_nostart_NT(strm) + val FULL_SPAN = (#1(tbodies_nostart_SPAN), + #2(tbodies_nostart_SPAN)) + in + ((tbodies_nostart_RES), FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.STARTTBODY(_), _, strm') => table_content_PROD_3(strm) + | (Tok.STARTTR(_), _, strm') => table_content_PROD_3(strm) + | (Tok.STARTTHEAD(_), _, strm') => table_content_PROD_1(strm) + | (Tok.STARTTFOOT(_), _, strm') => table_content_PROD_2(strm) + | _ => fail() + (* end case *)) + end +and tbodies_nostart_NT (strm) = let + fun tbodies_nostart_PROD_1_SUBRULE_1_NT (strm) = let + val (STARTTBODY_RES, STARTTBODY_SPAN, strm') = matchSTARTTBODY(strm) + val (cdata_opt_RES, cdata_opt_SPAN, strm') = cdata_opt_NT(strm') + val FULL_SPAN = (#1(STARTTBODY_SPAN), #2(cdata_opt_SPAN)) + in + (UserCode.tbodies_nostart_PROD_1_SUBRULE_1_PROD_1_ACT (cdata_opt_RES, STARTTBODY_RES, cdata_opt_SPAN : (Lex.pos * Lex.pos), STARTTBODY_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun tbodies_nostart_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.STARTTBODY(_), _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.optional(tbodies_nostart_PROD_1_SUBRULE_1_PRED, tbodies_nostart_PROD_1_SUBRULE_1_NT, strm) + fun tbodies_nostart_PROD_1_SUBRULE_2_NT (strm) = let + val (tr_RES, tr_SPAN, strm') = tr_NT(strm) + val FULL_SPAN = (#1(tr_SPAN), #2(tr_SPAN)) + in + ((tr_RES), FULL_SPAN, strm') + end + fun tbodies_nostart_PROD_1_SUBRULE_2_PRED (strm) = (case (lex(strm)) + of (Tok.STARTTR(_), _, strm') => true + | _ => false + (* end case *)) + val (tr_RES, tr_SPAN, strm') = EBNF.posclos(tbodies_nostart_PROD_1_SUBRULE_2_PRED, tbodies_nostart_PROD_1_SUBRULE_2_NT, strm') + fun tbodies_nostart_PROD_1_SUBRULE_3_NT (strm) = let + val (tbodies_rest_RES, tbodies_rest_SPAN, strm') = tbodies_rest_NT(strm) + val FULL_SPAN = (#1(tbodies_rest_SPAN), #2(tbodies_rest_SPAN)) + in + ((tbodies_rest_RES), FULL_SPAN, strm') + end + fun tbodies_nostart_PROD_1_SUBRULE_3_PRED (strm) = (case (lex(strm)) + of (Tok.STARTTBODY(_), _, strm') => true + | (Tok.ENDTBODY, _, strm') => true + | _ => false + (* end case *)) + val (tbodies_rest_RES, tbodies_rest_SPAN, strm') = EBNF.optional(tbodies_nostart_PROD_1_SUBRULE_3_PRED, tbodies_nostart_PROD_1_SUBRULE_3_NT, strm') + val FULL_SPAN = (#1(SR_SPAN), #2(tbodies_rest_SPAN)) + in + (UserCode.tbodies_nostart_PROD_1_ACT (tr_RES, SR_RES, tbodies_rest_RES, tr_SPAN : (Lex.pos * Lex.pos), SR_SPAN : (Lex.pos * Lex.pos), tbodies_rest_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and tbodies_rest_NT (strm) = let + fun tbodies_rest_PROD_1 (strm) = let + val (ENDTBODY_RES, ENDTBODY_SPAN, strm') = matchENDTBODY(strm) + val (cdata_opt_RES, cdata_opt_SPAN, strm') = cdata_opt_NT(strm') + fun tbodies_rest_PROD_1_SUBRULE_1_NT (strm) = let + val (tbodies_RES, tbodies_SPAN, strm') = tbodies_NT(strm) + val FULL_SPAN = (#1(tbodies_SPAN), #2(tbodies_SPAN)) + in + ((tbodies_RES), FULL_SPAN, strm') + end + fun tbodies_rest_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.STARTTBODY(_), _, strm') => true + | _ => false + (* end case *)) + val (tbodies_RES, tbodies_SPAN, strm') = EBNF.optional(tbodies_rest_PROD_1_SUBRULE_1_PRED, tbodies_rest_PROD_1_SUBRULE_1_NT, strm') + val FULL_SPAN = (#1(ENDTBODY_SPAN), #2(tbodies_SPAN)) + in + (UserCode.tbodies_rest_PROD_1_ACT (cdata_opt_RES, tbodies_RES, ENDTBODY_RES, cdata_opt_SPAN : (Lex.pos * Lex.pos), tbodies_SPAN : (Lex.pos * Lex.pos), ENDTBODY_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun tbodies_rest_PROD_2 (strm) = let + val (STARTTBODY_RES, STARTTBODY_SPAN, strm') = matchSTARTTBODY(strm) + val (cdata_opt_RES, cdata_opt_SPAN, strm') = cdata_opt_NT(strm') + fun tbodies_rest_PROD_2_SUBRULE_1_NT (strm) = let + val (tr_RES, tr_SPAN, strm') = tr_NT(strm) + val FULL_SPAN = (#1(tr_SPAN), #2(tr_SPAN)) + in + ((tr_RES), FULL_SPAN, strm') + end + fun tbodies_rest_PROD_2_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.STARTTR(_), _, strm') => true + | _ => false + (* end case *)) + val (tr_RES, tr_SPAN, strm') = EBNF.posclos(tbodies_rest_PROD_2_SUBRULE_1_PRED, tbodies_rest_PROD_2_SUBRULE_1_NT, strm') + fun tbodies_rest_PROD_2_SUBRULE_2_NT (strm) = let + val (tbodies_rest_RES, tbodies_rest_SPAN, strm') = tbodies_rest_NT(strm) + val FULL_SPAN = (#1(tbodies_rest_SPAN), + #2(tbodies_rest_SPAN)) + in + ((tbodies_rest_RES), FULL_SPAN, strm') + end + fun tbodies_rest_PROD_2_SUBRULE_2_PRED (strm) = (case (lex(strm)) + of (Tok.STARTTBODY(_), _, strm') => true + | (Tok.ENDTBODY, _, strm') => true + | _ => false + (* end case *)) + val (tbodies_rest_RES, tbodies_rest_SPAN, strm') = EBNF.optional(tbodies_rest_PROD_2_SUBRULE_2_PRED, tbodies_rest_PROD_2_SUBRULE_2_NT, strm') + val FULL_SPAN = (#1(STARTTBODY_SPAN), #2(tbodies_rest_SPAN)) + in + (UserCode.tbodies_rest_PROD_2_ACT (tr_RES, cdata_opt_RES, STARTTBODY_RES, tbodies_rest_RES, tr_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), STARTTBODY_SPAN : (Lex.pos * Lex.pos), tbodies_rest_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.STARTTBODY(_), _, strm') => tbodies_rest_PROD_2(strm) + | (Tok.ENDTBODY, _, strm') => tbodies_rest_PROD_1(strm) + | _ => fail() + (* end case *)) + end +and tr_NT (strm) = let + val (STARTTR_RES, STARTTR_SPAN, strm') = matchSTARTTR(strm) + val (cdata_opt_RES, cdata_opt_SPAN, strm') = cdata_opt_NT(strm') + fun tr_PROD_1_SUBRULE_1_NT (strm) = let + fun tr_PROD_1_SUBRULE_1_PROD_1 (strm) = let + val (th_RES, th_SPAN, strm') = th_NT(strm) + val FULL_SPAN = (#1(th_SPAN), #2(th_SPAN)) + in + ((th_RES), FULL_SPAN, strm') + end + fun tr_PROD_1_SUBRULE_1_PROD_2 (strm) = let + val (td_RES, td_SPAN, strm') = td_NT(strm) + val FULL_SPAN = (#1(td_SPAN), #2(td_SPAN)) + in + ((td_RES), FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.STARTTD(_), _, strm') => + tr_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTTH(_), _, strm') => + tr_PROD_1_SUBRULE_1_PROD_1(strm) + | _ => fail() + (* end case *)) + end + fun tr_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.STARTTD(_), _, strm') => true + | (Tok.STARTTH(_), _, strm') => true + | _ => false + (* end case *)) + val (SR1_RES, SR1_SPAN, strm') = EBNF.posclos(tr_PROD_1_SUBRULE_1_PRED, tr_PROD_1_SUBRULE_1_NT, strm') + fun tr_PROD_1_SUBRULE_2_NT (strm) = let + val (ENDTR_RES, ENDTR_SPAN, strm') = matchENDTR(strm) + val (cdata_opt_RES, cdata_opt_SPAN, strm') = cdata_opt_NT(strm') + val FULL_SPAN = (#1(ENDTR_SPAN), #2(cdata_opt_SPAN)) + in + (UserCode.tr_PROD_1_SUBRULE_2_PROD_1_ACT (ENDTR_RES, SR1_RES, cdata_opt_RES, STARTTR_RES, ENDTR_SPAN : (Lex.pos * Lex.pos), SR1_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), STARTTR_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun tr_PROD_1_SUBRULE_2_PRED (strm) = (case (lex(strm)) + of (Tok.ENDTR, _, strm') => true + | _ => false + (* end case *)) + val (SR2_RES, SR2_SPAN, strm') = EBNF.optional(tr_PROD_1_SUBRULE_2_PRED, tr_PROD_1_SUBRULE_2_NT, strm') + val FULL_SPAN = (#1(STARTTR_SPAN), #2(SR2_SPAN)) + in + (UserCode.tr_PROD_1_ACT (SR1_RES, SR2_RES, cdata_opt_RES, STARTTR_RES, SR1_SPAN : (Lex.pos * Lex.pos), SR2_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), STARTTR_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and td_NT (strm) = let + val (STARTTD_RES, STARTTD_SPAN, strm') = matchSTARTTD(strm) + fun td_PROD_1_SUBRULE_1_NT (strm) = let + val (flow_RES, flow_SPAN, strm') = flow_NT(strm) + val FULL_SPAN = (#1(flow_SPAN), #2(flow_SPAN)) + in + ((flow_RES), FULL_SPAN, strm') + end + fun td_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTADDRESS(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBLOCKQUOTE(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCENTER(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTDIR(_), _, strm') => true + | (Tok.STARTDIV(_), _, strm') => true + | (Tok.STARTDL(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFIELDSET(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTFORM(_), _, strm') => true + | (Tok.STARTH1(_), _, strm') => true + | (Tok.STARTH2(_), _, strm') => true + | (Tok.STARTH3(_), _, strm') => true + | (Tok.STARTH4(_), _, strm') => true + | (Tok.STARTH5(_), _, strm') => true + | (Tok.STARTH6(_), _, strm') => true + | (Tok.STARTHR(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTISINDEX(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTMENU(_), _, strm') => true + | (Tok.STARTNOSCRIPT(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTOL(_), _, strm') => true + | (Tok.STARTP(_), _, strm') => true + | (Tok.STARTPRE(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTABLE(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTUL(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (flow_RES, flow_SPAN, strm') = EBNF.closure(td_PROD_1_SUBRULE_1_PRED, td_PROD_1_SUBRULE_1_NT, strm') + fun td_PROD_1_SUBRULE_2_NT (strm) = let + val (ENDTD_RES, ENDTD_SPAN, strm') = matchENDTD(strm) + val (cdata_opt_RES, cdata_opt_SPAN, strm') = cdata_opt_NT(strm') + val FULL_SPAN = (#1(ENDTD_SPAN), #2(cdata_opt_SPAN)) + in + (UserCode.td_PROD_1_SUBRULE_2_PROD_1_ACT (ENDTD_RES, cdata_opt_RES, STARTTD_RES, flow_RES, ENDTD_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), STARTTD_SPAN : (Lex.pos * Lex.pos), flow_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun td_PROD_1_SUBRULE_2_PRED (strm) = (case (lex(strm)) + of (Tok.ENDTD, _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.optional(td_PROD_1_SUBRULE_2_PRED, td_PROD_1_SUBRULE_2_NT, strm') + val FULL_SPAN = (#1(STARTTD_SPAN), #2(SR_SPAN)) + in + (UserCode.td_PROD_1_ACT (SR_RES, STARTTD_RES, flow_RES, SR_SPAN : (Lex.pos * Lex.pos), STARTTD_SPAN : (Lex.pos * Lex.pos), flow_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and th_NT (strm) = let + val (STARTTH_RES, STARTTH_SPAN, strm') = matchSTARTTH(strm) + fun th_PROD_1_SUBRULE_1_NT (strm) = let + val (flow_RES, flow_SPAN, strm') = flow_NT(strm) + val FULL_SPAN = (#1(flow_SPAN), #2(flow_SPAN)) + in + ((flow_RES), FULL_SPAN, strm') + end + fun th_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTADDRESS(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBLOCKQUOTE(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCENTER(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTDIR(_), _, strm') => true + | (Tok.STARTDIV(_), _, strm') => true + | (Tok.STARTDL(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFIELDSET(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTFORM(_), _, strm') => true + | (Tok.STARTH1(_), _, strm') => true + | (Tok.STARTH2(_), _, strm') => true + | (Tok.STARTH3(_), _, strm') => true + | (Tok.STARTH4(_), _, strm') => true + | (Tok.STARTH5(_), _, strm') => true + | (Tok.STARTH6(_), _, strm') => true + | (Tok.STARTHR(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTISINDEX(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTMENU(_), _, strm') => true + | (Tok.STARTNOSCRIPT(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTOL(_), _, strm') => true + | (Tok.STARTP(_), _, strm') => true + | (Tok.STARTPRE(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTABLE(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTUL(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (flow_RES, flow_SPAN, strm') = EBNF.closure(th_PROD_1_SUBRULE_1_PRED, th_PROD_1_SUBRULE_1_NT, strm') + fun th_PROD_1_SUBRULE_2_NT (strm) = let + val (ENDTH_RES, ENDTH_SPAN, strm') = matchENDTH(strm) + val (cdata_opt_RES, cdata_opt_SPAN, strm') = cdata_opt_NT(strm') + val FULL_SPAN = (#1(ENDTH_SPAN), #2(cdata_opt_SPAN)) + in + (UserCode.th_PROD_1_SUBRULE_2_PROD_1_ACT (ENDTH_RES, cdata_opt_RES, STARTTH_RES, flow_RES, ENDTH_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), STARTTH_SPAN : (Lex.pos * Lex.pos), flow_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun th_PROD_1_SUBRULE_2_PRED (strm) = (case (lex(strm)) + of (Tok.ENDTH, _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.optional(th_PROD_1_SUBRULE_2_PRED, th_PROD_1_SUBRULE_2_NT, strm') + val FULL_SPAN = (#1(STARTTH_SPAN), #2(SR_SPAN)) + in + (UserCode.th_PROD_1_ACT (SR_RES, STARTTH_RES, flow_RES, SR_SPAN : (Lex.pos * Lex.pos), STARTTH_SPAN : (Lex.pos * Lex.pos), flow_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and tbodies_NT (strm) = let + val (STARTTBODY_RES, STARTTBODY_SPAN, strm') = matchSTARTTBODY(strm) + val (cdata_opt_RES, cdata_opt_SPAN, strm') = cdata_opt_NT(strm') + fun tbodies_PROD_1_SUBRULE_1_NT (strm) = let + val (tr_RES, tr_SPAN, strm') = tr_NT(strm) + val FULL_SPAN = (#1(tr_SPAN), #2(tr_SPAN)) + in + ((tr_RES), FULL_SPAN, strm') + end + fun tbodies_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.STARTTR(_), _, strm') => true + | _ => false + (* end case *)) + val (tr_RES, tr_SPAN, strm') = EBNF.posclos(tbodies_PROD_1_SUBRULE_1_PRED, tbodies_PROD_1_SUBRULE_1_NT, strm') + val (tbodies_rest_RES, tbodies_rest_SPAN, strm') = tbodies_rest_NT(strm') + val FULL_SPAN = (#1(STARTTBODY_SPAN), #2(tbodies_rest_SPAN)) + in + (UserCode.tbodies_PROD_1_ACT (tr_RES, cdata_opt_RES, STARTTBODY_RES, tbodies_rest_RES, tr_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), STARTTBODY_SPAN : (Lex.pos * Lex.pos), tbodies_rest_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and tfoot_NT (strm) = let + val (STARTTFOOT_RES, STARTTFOOT_SPAN, strm') = matchSTARTTFOOT(strm) + val (cdata_opt_RES, cdata_opt_SPAN, strm') = cdata_opt_NT(strm') + fun tfoot_PROD_1_SUBRULE_1_NT (strm) = let + val (tr_RES, tr_SPAN, strm') = tr_NT(strm) + val FULL_SPAN = (#1(tr_SPAN), #2(tr_SPAN)) + in + ((tr_RES), FULL_SPAN, strm') + end + fun tfoot_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.STARTTR(_), _, strm') => true + | _ => false + (* end case *)) + val (tr_RES, tr_SPAN, strm') = EBNF.posclos(tfoot_PROD_1_SUBRULE_1_PRED, tfoot_PROD_1_SUBRULE_1_NT, strm') + fun tfoot_PROD_1_SUBRULE_2_NT (strm) = let + val (ENDTFOOT_RES, ENDTFOOT_SPAN, strm') = matchENDTFOOT(strm) + val (cdata_opt_RES, cdata_opt_SPAN, strm') = cdata_opt_NT(strm') + val FULL_SPAN = (#1(ENDTFOOT_SPAN), #2(cdata_opt_SPAN)) + in + (UserCode.tfoot_PROD_1_SUBRULE_2_PROD_1_ACT (tr_RES, STARTTFOOT_RES, cdata_opt_RES, ENDTFOOT_RES, tr_SPAN : (Lex.pos * Lex.pos), STARTTFOOT_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), ENDTFOOT_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun tfoot_PROD_1_SUBRULE_2_PRED (strm) = (case (lex(strm)) + of (Tok.ENDTFOOT, _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.optional(tfoot_PROD_1_SUBRULE_2_PRED, tfoot_PROD_1_SUBRULE_2_NT, strm') + val FULL_SPAN = (#1(STARTTFOOT_SPAN), #2(SR_SPAN)) + in + (UserCode.tfoot_PROD_1_ACT (tr_RES, SR_RES, STARTTFOOT_RES, cdata_opt_RES, tr_SPAN : (Lex.pos * Lex.pos), SR_SPAN : (Lex.pos * Lex.pos), STARTTFOOT_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and thead_NT (strm) = let + val (STARTTHEAD_RES, STARTTHEAD_SPAN, strm') = matchSTARTTHEAD(strm) + val (cdata_opt_RES, cdata_opt_SPAN, strm') = cdata_opt_NT(strm') + fun thead_PROD_1_SUBRULE_1_NT (strm) = let + val (tr_RES, tr_SPAN, strm') = tr_NT(strm) + val FULL_SPAN = (#1(tr_SPAN), #2(tr_SPAN)) + in + ((tr_RES), FULL_SPAN, strm') + end + fun thead_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.STARTTR(_), _, strm') => true + | _ => false + (* end case *)) + val (tr_RES, tr_SPAN, strm') = EBNF.posclos(thead_PROD_1_SUBRULE_1_PRED, thead_PROD_1_SUBRULE_1_NT, strm') + fun thead_PROD_1_SUBRULE_2_NT (strm) = let + val (ENDTHEAD_RES, ENDTHEAD_SPAN, strm') = matchENDTHEAD(strm) + val (cdata_opt_RES, cdata_opt_SPAN, strm') = cdata_opt_NT(strm') + val FULL_SPAN = (#1(ENDTHEAD_SPAN), #2(cdata_opt_SPAN)) + in + (UserCode.thead_PROD_1_SUBRULE_2_PROD_1_ACT (tr_RES, STARTTHEAD_RES, ENDTHEAD_RES, cdata_opt_RES, tr_SPAN : (Lex.pos * Lex.pos), STARTTHEAD_SPAN : (Lex.pos * Lex.pos), ENDTHEAD_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun thead_PROD_1_SUBRULE_2_PRED (strm) = (case (lex(strm)) + of (Tok.ENDTHEAD, _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.optional(thead_PROD_1_SUBRULE_2_PRED, thead_PROD_1_SUBRULE_2_NT, strm') + val FULL_SPAN = (#1(STARTTHEAD_SPAN), #2(SR_SPAN)) + in + (UserCode.thead_PROD_1_ACT (tr_RES, SR_RES, STARTTHEAD_RES, cdata_opt_RES, tr_SPAN : (Lex.pos * Lex.pos), SR_SPAN : (Lex.pos * Lex.pos), STARTTHEAD_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and caption_NT (strm) = let + val (STARTCAPTION_RES, STARTCAPTION_SPAN, strm') = matchSTARTCAPTION(strm) + fun caption_PROD_1_SUBRULE_1_NT (strm) = let + val (inline_RES, inline_SPAN, strm') = inline_NT(strm) + val FULL_SPAN = (#1(inline_SPAN), #2(inline_SPAN)) + in + ((inline_RES), FULL_SPAN, strm') + end + fun caption_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (inline_RES, inline_SPAN, strm') = EBNF.closure(caption_PROD_1_SUBRULE_1_PRED, caption_PROD_1_SUBRULE_1_NT, strm') + val (ENDCAPTION_RES, ENDCAPTION_SPAN, strm') = matchENDCAPTION(strm') + val FULL_SPAN = (#1(STARTCAPTION_SPAN), #2(ENDCAPTION_SPAN)) + in + (UserCode.caption_PROD_1_ACT (inline_RES, STARTCAPTION_RES, ENDCAPTION_RES, inline_SPAN : (Lex.pos * Lex.pos), STARTCAPTION_SPAN : (Lex.pos * Lex.pos), ENDCAPTION_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and form_NT (strm) = let + val (STARTFORM_RES, STARTFORM_SPAN, strm') = matchSTARTFORM(strm) + fun form_PROD_1_SUBRULE_1_NT (strm) = let + fun form_PROD_1_SUBRULE_1_PROD_1 (strm) = let + val (cdata_RES, cdata_SPAN, strm') = cdata_NT(strm) + val FULL_SPAN = (#1(cdata_SPAN), #2(cdata_SPAN)) + in + ((cdata_RES), FULL_SPAN, strm') + end + fun form_PROD_1_SUBRULE_1_PROD_2 (strm) = let + val (block_RES, block_SPAN, strm') = block_NT(strm) + val FULL_SPAN = (#1(block_SPAN), #2(block_SPAN)) + in + ((block_RES), FULL_SPAN, strm') + end + fun form_PROD_1_SUBRULE_1_PROD_3 (strm) = let + val (script_RES, script_SPAN, strm') = script_NT(strm) + val FULL_SPAN = (#1(script_SPAN), #2(script_SPAN)) + in + ((script_RES), FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.STARTSCRIPT(_), _, strm') => + form_PROD_1_SUBRULE_1_PROD_3(strm) + | (Tok.COMMENT(_), _, strm') => + form_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.PCDATA(_), _, strm') => + form_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.CHAR_REF(_), _, strm') => + form_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.ENTITY_REF(_), _, strm') => + form_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTADDRESS(_), _, strm') => + form_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTBLOCKQUOTE(_), _, strm') => + form_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTCENTER(_), _, strm') => + form_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTDIR(_), _, strm') => + form_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTDIV(_), _, strm') => + form_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTDL(_), _, strm') => + form_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTFIELDSET(_), _, strm') => + form_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTFORM(_), _, strm') => + form_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTH1(_), _, strm') => + form_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTH2(_), _, strm') => + form_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTH3(_), _, strm') => + form_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTH4(_), _, strm') => + form_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTH5(_), _, strm') => + form_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTH6(_), _, strm') => + form_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTHR(_), _, strm') => + form_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTISINDEX(_), _, strm') => + form_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTMENU(_), _, strm') => + form_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTNOSCRIPT(_), _, strm') => + form_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTOL(_), _, strm') => + form_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTP(_), _, strm') => + form_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTPRE(_), _, strm') => + form_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTTABLE(_), _, strm') => + form_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTUL(_), _, strm') => + form_PROD_1_SUBRULE_1_PROD_2(strm) + | _ => fail() + (* end case *)) + end + fun form_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTADDRESS(_), _, strm') => true + | (Tok.STARTBLOCKQUOTE(_), _, strm') => true + | (Tok.STARTCENTER(_), _, strm') => true + | (Tok.STARTDIR(_), _, strm') => true + | (Tok.STARTDIV(_), _, strm') => true + | (Tok.STARTDL(_), _, strm') => true + | (Tok.STARTFIELDSET(_), _, strm') => true + | (Tok.STARTFORM(_), _, strm') => true + | (Tok.STARTH1(_), _, strm') => true + | (Tok.STARTH2(_), _, strm') => true + | (Tok.STARTH3(_), _, strm') => true + | (Tok.STARTH4(_), _, strm') => true + | (Tok.STARTH5(_), _, strm') => true + | (Tok.STARTH6(_), _, strm') => true + | (Tok.STARTHR(_), _, strm') => true + | (Tok.STARTISINDEX(_), _, strm') => true + | (Tok.STARTMENU(_), _, strm') => true + | (Tok.STARTNOSCRIPT(_), _, strm') => true + | (Tok.STARTOL(_), _, strm') => true + | (Tok.STARTP(_), _, strm') => true + | (Tok.STARTPRE(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTTABLE(_), _, strm') => true + | (Tok.STARTUL(_), _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.posclos(form_PROD_1_SUBRULE_1_PRED, form_PROD_1_SUBRULE_1_NT, strm') + val (ENDFORM_RES, ENDFORM_SPAN, strm') = matchENDFORM(strm') + val FULL_SPAN = (#1(STARTFORM_SPAN), #2(ENDFORM_SPAN)) + in + (UserCode.form_PROD_1_ACT (SR_RES, ENDFORM_RES, STARTFORM_RES, SR_SPAN : (Lex.pos * Lex.pos), ENDFORM_SPAN : (Lex.pos * Lex.pos), STARTFORM_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and blockquote_NT (strm) = let + val (STARTBLOCKQUOTE_RES, STARTBLOCKQUOTE_SPAN, strm') = matchSTARTBLOCKQUOTE(strm) + fun blockquote_PROD_1_SUBRULE_1_NT (strm) = let + fun blockquote_PROD_1_SUBRULE_1_PROD_1 (strm) = let + val (block_RES, block_SPAN, strm') = block_NT(strm) + val FULL_SPAN = (#1(block_SPAN), #2(block_SPAN)) + in + ((block_RES), FULL_SPAN, strm') + end + fun blockquote_PROD_1_SUBRULE_1_PROD_2 (strm) = let + val (script_RES, script_SPAN, strm') = script_NT(strm) + val FULL_SPAN = (#1(script_SPAN), #2(script_SPAN)) + in + ((script_RES), FULL_SPAN, strm') + end + fun blockquote_PROD_1_SUBRULE_1_PROD_3 (strm) = let + val (cdata_RES, cdata_SPAN, strm') = cdata_NT(strm) + val FULL_SPAN = (#1(cdata_SPAN), #2(cdata_SPAN)) + in + ((cdata_RES), FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => + blockquote_PROD_1_SUBRULE_1_PROD_3(strm) + | (Tok.PCDATA(_), _, strm') => + blockquote_PROD_1_SUBRULE_1_PROD_3(strm) + | (Tok.CHAR_REF(_), _, strm') => + blockquote_PROD_1_SUBRULE_1_PROD_3(strm) + | (Tok.ENTITY_REF(_), _, strm') => + blockquote_PROD_1_SUBRULE_1_PROD_3(strm) + | (Tok.STARTADDRESS(_), _, strm') => + blockquote_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTBLOCKQUOTE(_), _, strm') => + blockquote_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTCENTER(_), _, strm') => + blockquote_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTDIR(_), _, strm') => + blockquote_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTDIV(_), _, strm') => + blockquote_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTDL(_), _, strm') => + blockquote_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTFIELDSET(_), _, strm') => + blockquote_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTFORM(_), _, strm') => + blockquote_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTH1(_), _, strm') => + blockquote_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTH2(_), _, strm') => + blockquote_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTH3(_), _, strm') => + blockquote_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTH4(_), _, strm') => + blockquote_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTH5(_), _, strm') => + blockquote_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTH6(_), _, strm') => + blockquote_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTHR(_), _, strm') => + blockquote_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTISINDEX(_), _, strm') => + blockquote_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTMENU(_), _, strm') => + blockquote_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTNOSCRIPT(_), _, strm') => + blockquote_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTOL(_), _, strm') => + blockquote_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTP(_), _, strm') => + blockquote_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTPRE(_), _, strm') => + blockquote_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTTABLE(_), _, strm') => + blockquote_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTUL(_), _, strm') => + blockquote_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTSCRIPT(_), _, strm') => + blockquote_PROD_1_SUBRULE_1_PROD_2(strm) + | _ => fail() + (* end case *)) + end + fun blockquote_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTADDRESS(_), _, strm') => true + | (Tok.STARTBLOCKQUOTE(_), _, strm') => true + | (Tok.STARTCENTER(_), _, strm') => true + | (Tok.STARTDIR(_), _, strm') => true + | (Tok.STARTDIV(_), _, strm') => true + | (Tok.STARTDL(_), _, strm') => true + | (Tok.STARTFIELDSET(_), _, strm') => true + | (Tok.STARTFORM(_), _, strm') => true + | (Tok.STARTH1(_), _, strm') => true + | (Tok.STARTH2(_), _, strm') => true + | (Tok.STARTH3(_), _, strm') => true + | (Tok.STARTH4(_), _, strm') => true + | (Tok.STARTH5(_), _, strm') => true + | (Tok.STARTH6(_), _, strm') => true + | (Tok.STARTHR(_), _, strm') => true + | (Tok.STARTISINDEX(_), _, strm') => true + | (Tok.STARTMENU(_), _, strm') => true + | (Tok.STARTNOSCRIPT(_), _, strm') => true + | (Tok.STARTOL(_), _, strm') => true + | (Tok.STARTP(_), _, strm') => true + | (Tok.STARTPRE(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTTABLE(_), _, strm') => true + | (Tok.STARTUL(_), _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.posclos(blockquote_PROD_1_SUBRULE_1_PRED, blockquote_PROD_1_SUBRULE_1_NT, strm') + val (ENDBLOCKQUOTE_RES, ENDBLOCKQUOTE_SPAN, strm') = matchENDBLOCKQUOTE(strm') + val FULL_SPAN = (#1(STARTBLOCKQUOTE_SPAN), #2(ENDBLOCKQUOTE_SPAN)) + in + (UserCode.blockquote_PROD_1_ACT (SR_RES, STARTBLOCKQUOTE_RES, ENDBLOCKQUOTE_RES, SR_SPAN : (Lex.pos * Lex.pos), STARTBLOCKQUOTE_SPAN : (Lex.pos * Lex.pos), ENDBLOCKQUOTE_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and noscript_NT (strm) = let + val (STARTNOSCRIPT_RES, STARTNOSCRIPT_SPAN, strm') = matchSTARTNOSCRIPT(strm) + fun noscript_PROD_1_SUBRULE_1_NT (strm) = let + fun noscript_PROD_1_SUBRULE_1_PROD_1 (strm) = let + val (cdata_RES, cdata_SPAN, strm') = cdata_NT(strm) + val FULL_SPAN = (#1(cdata_SPAN), #2(cdata_SPAN)) + in + ((cdata_RES), FULL_SPAN, strm') + end + fun noscript_PROD_1_SUBRULE_1_PROD_2 (strm) = let + val (block_RES, block_SPAN, strm') = block_NT(strm) + val FULL_SPAN = (#1(block_SPAN), #2(block_SPAN)) + in + ((block_RES), FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.STARTADDRESS(_), _, strm') => + noscript_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTBLOCKQUOTE(_), _, strm') => + noscript_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTCENTER(_), _, strm') => + noscript_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTDIR(_), _, strm') => + noscript_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTDIV(_), _, strm') => + noscript_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTDL(_), _, strm') => + noscript_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTFIELDSET(_), _, strm') => + noscript_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTFORM(_), _, strm') => + noscript_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTH1(_), _, strm') => + noscript_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTH2(_), _, strm') => + noscript_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTH3(_), _, strm') => + noscript_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTH4(_), _, strm') => + noscript_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTH5(_), _, strm') => + noscript_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTH6(_), _, strm') => + noscript_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTHR(_), _, strm') => + noscript_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTISINDEX(_), _, strm') => + noscript_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTMENU(_), _, strm') => + noscript_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTNOSCRIPT(_), _, strm') => + noscript_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTOL(_), _, strm') => + noscript_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTP(_), _, strm') => + noscript_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTPRE(_), _, strm') => + noscript_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTTABLE(_), _, strm') => + noscript_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTUL(_), _, strm') => + noscript_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.COMMENT(_), _, strm') => + noscript_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.PCDATA(_), _, strm') => + noscript_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.CHAR_REF(_), _, strm') => + noscript_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.ENTITY_REF(_), _, strm') => + noscript_PROD_1_SUBRULE_1_PROD_1(strm) + | _ => fail() + (* end case *)) + end + fun noscript_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTADDRESS(_), _, strm') => true + | (Tok.STARTBLOCKQUOTE(_), _, strm') => true + | (Tok.STARTCENTER(_), _, strm') => true + | (Tok.STARTDIR(_), _, strm') => true + | (Tok.STARTDIV(_), _, strm') => true + | (Tok.STARTDL(_), _, strm') => true + | (Tok.STARTFIELDSET(_), _, strm') => true + | (Tok.STARTFORM(_), _, strm') => true + | (Tok.STARTH1(_), _, strm') => true + | (Tok.STARTH2(_), _, strm') => true + | (Tok.STARTH3(_), _, strm') => true + | (Tok.STARTH4(_), _, strm') => true + | (Tok.STARTH5(_), _, strm') => true + | (Tok.STARTH6(_), _, strm') => true + | (Tok.STARTHR(_), _, strm') => true + | (Tok.STARTISINDEX(_), _, strm') => true + | (Tok.STARTMENU(_), _, strm') => true + | (Tok.STARTNOSCRIPT(_), _, strm') => true + | (Tok.STARTOL(_), _, strm') => true + | (Tok.STARTP(_), _, strm') => true + | (Tok.STARTPRE(_), _, strm') => true + | (Tok.STARTTABLE(_), _, strm') => true + | (Tok.STARTUL(_), _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.posclos(noscript_PROD_1_SUBRULE_1_PRED, noscript_PROD_1_SUBRULE_1_NT, strm') + val (ENDNOSCRIPT_RES, ENDNOSCRIPT_SPAN, strm') = matchENDNOSCRIPT(strm') + val FULL_SPAN = (#1(STARTNOSCRIPT_SPAN), #2(ENDNOSCRIPT_SPAN)) + in + (UserCode.noscript_PROD_1_ACT (SR_RES, ENDNOSCRIPT_RES, STARTNOSCRIPT_RES, SR_SPAN : (Lex.pos * Lex.pos), ENDNOSCRIPT_SPAN : (Lex.pos * Lex.pos), STARTNOSCRIPT_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and div_NT (strm) = let + val (STARTDIV_RES, STARTDIV_SPAN, strm') = matchSTARTDIV(strm) + fun div_PROD_1_SUBRULE_1_NT (strm) = let + val (flow_RES, flow_SPAN, strm') = flow_NT(strm) + val FULL_SPAN = (#1(flow_SPAN), #2(flow_SPAN)) + in + ((flow_RES), FULL_SPAN, strm') + end + fun div_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTADDRESS(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBLOCKQUOTE(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCENTER(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTDIR(_), _, strm') => true + | (Tok.STARTDIV(_), _, strm') => true + | (Tok.STARTDL(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFIELDSET(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTFORM(_), _, strm') => true + | (Tok.STARTH1(_), _, strm') => true + | (Tok.STARTH2(_), _, strm') => true + | (Tok.STARTH3(_), _, strm') => true + | (Tok.STARTH4(_), _, strm') => true + | (Tok.STARTH5(_), _, strm') => true + | (Tok.STARTH6(_), _, strm') => true + | (Tok.STARTHR(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTISINDEX(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTMENU(_), _, strm') => true + | (Tok.STARTNOSCRIPT(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTOL(_), _, strm') => true + | (Tok.STARTP(_), _, strm') => true + | (Tok.STARTPRE(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTABLE(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTUL(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (flow_RES, flow_SPAN, strm') = EBNF.closure(div_PROD_1_SUBRULE_1_PRED, div_PROD_1_SUBRULE_1_NT, strm') + val (ENDDIV_RES, ENDDIV_SPAN, strm') = matchENDDIV(strm') + val FULL_SPAN = (#1(STARTDIV_SPAN), #2(ENDDIV_SPAN)) + in + (UserCode.div_PROD_1_ACT (flow_RES, ENDDIV_RES, STARTDIV_RES, flow_SPAN : (Lex.pos * Lex.pos), ENDDIV_SPAN : (Lex.pos * Lex.pos), STARTDIV_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and dl_NT (strm) = let + val (STARTDL_RES, STARTDL_SPAN, strm') = matchSTARTDL(strm) + val (cdata_opt_RES, cdata_opt_SPAN, strm') = cdata_opt_NT(strm') + fun dl_PROD_1_SUBRULE_1_NT (strm) = let + fun dl_PROD_1_SUBRULE_1_PROD_1 (strm) = let + val (dt_RES, dt_SPAN, strm') = dt_NT(strm) + val FULL_SPAN = (#1(dt_SPAN), #2(dt_SPAN)) + in + ((dt_RES), FULL_SPAN, strm') + end + fun dl_PROD_1_SUBRULE_1_PROD_2 (strm) = let + val (dd_RES, dd_SPAN, strm') = dd_NT(strm) + val FULL_SPAN = (#1(dd_SPAN), #2(dd_SPAN)) + in + ((dd_RES), FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.STARTDD(_), _, strm') => + dl_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTDT(_), _, strm') => + dl_PROD_1_SUBRULE_1_PROD_1(strm) + | _ => fail() + (* end case *)) + end + fun dl_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.STARTDD(_), _, strm') => true + | (Tok.STARTDT(_), _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.posclos(dl_PROD_1_SUBRULE_1_PRED, dl_PROD_1_SUBRULE_1_NT, strm') + val (ENDDL_RES, ENDDL_SPAN, strm') = matchENDDL(strm') + val FULL_SPAN = (#1(STARTDL_SPAN), #2(ENDDL_SPAN)) + in + (UserCode.dl_PROD_1_ACT (ENDDL_RES, SR_RES, cdata_opt_RES, STARTDL_RES, ENDDL_SPAN : (Lex.pos * Lex.pos), SR_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), STARTDL_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and dd_NT (strm) = let + val (STARTDD_RES, STARTDD_SPAN, strm') = matchSTARTDD(strm) + fun dd_PROD_1_SUBRULE_1_NT (strm) = let + val (flow_RES, flow_SPAN, strm') = flow_NT(strm) + val FULL_SPAN = (#1(flow_SPAN), #2(flow_SPAN)) + in + ((flow_RES), FULL_SPAN, strm') + end + fun dd_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTADDRESS(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBLOCKQUOTE(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCENTER(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTDIR(_), _, strm') => true + | (Tok.STARTDIV(_), _, strm') => true + | (Tok.STARTDL(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFIELDSET(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTFORM(_), _, strm') => true + | (Tok.STARTH1(_), _, strm') => true + | (Tok.STARTH2(_), _, strm') => true + | (Tok.STARTH3(_), _, strm') => true + | (Tok.STARTH4(_), _, strm') => true + | (Tok.STARTH5(_), _, strm') => true + | (Tok.STARTH6(_), _, strm') => true + | (Tok.STARTHR(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTISINDEX(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTMENU(_), _, strm') => true + | (Tok.STARTNOSCRIPT(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTOL(_), _, strm') => true + | (Tok.STARTP(_), _, strm') => true + | (Tok.STARTPRE(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTABLE(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTUL(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (flow_RES, flow_SPAN, strm') = EBNF.closure(dd_PROD_1_SUBRULE_1_PRED, dd_PROD_1_SUBRULE_1_NT, strm') + fun dd_PROD_1_SUBRULE_2_NT (strm) = let + val (ENDDD_RES, ENDDD_SPAN, strm') = matchENDDD(strm) + val FULL_SPAN = (#1(ENDDD_SPAN), #2(ENDDD_SPAN)) + in + (UserCode.dd_PROD_1_SUBRULE_2_PROD_1_ACT (ENDDD_RES, STARTDD_RES, flow_RES, ENDDD_SPAN : (Lex.pos * Lex.pos), STARTDD_SPAN : (Lex.pos * Lex.pos), flow_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun dd_PROD_1_SUBRULE_2_PRED (strm) = (case (lex(strm)) + of (Tok.ENDDD, _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.optional(dd_PROD_1_SUBRULE_2_PRED, dd_PROD_1_SUBRULE_2_NT, strm') + val FULL_SPAN = (#1(STARTDD_SPAN), #2(SR_SPAN)) + in + (UserCode.dd_PROD_1_ACT (SR_RES, STARTDD_RES, flow_RES, SR_SPAN : (Lex.pos * Lex.pos), STARTDD_SPAN : (Lex.pos * Lex.pos), flow_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and dt_NT (strm) = let + val (STARTDT_RES, STARTDT_SPAN, strm') = matchSTARTDT(strm) + fun dt_PROD_1_SUBRULE_1_NT (strm) = let + val (inline_RES, inline_SPAN, strm') = inline_NT(strm) + val FULL_SPAN = (#1(inline_SPAN), #2(inline_SPAN)) + in + ((inline_RES), FULL_SPAN, strm') + end + fun dt_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (inline_RES, inline_SPAN, strm') = EBNF.closure(dt_PROD_1_SUBRULE_1_PRED, dt_PROD_1_SUBRULE_1_NT, strm') + fun dt_PROD_1_SUBRULE_2_NT (strm) = let + val (ENDDT_RES, ENDDT_SPAN, strm') = matchENDDT(strm) + val FULL_SPAN = (#1(ENDDT_SPAN), #2(ENDDT_SPAN)) + in + (UserCode.dt_PROD_1_SUBRULE_2_PROD_1_ACT (ENDDT_RES, inline_RES, STARTDT_RES, ENDDT_SPAN : (Lex.pos * Lex.pos), inline_SPAN : (Lex.pos * Lex.pos), STARTDT_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun dt_PROD_1_SUBRULE_2_PRED (strm) = (case (lex(strm)) + of (Tok.ENDDT, _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.optional(dt_PROD_1_SUBRULE_2_PRED, dt_PROD_1_SUBRULE_2_NT, strm') + val FULL_SPAN = (#1(STARTDT_SPAN), #2(SR_SPAN)) + in + (UserCode.dt_PROD_1_ACT (SR_RES, inline_RES, STARTDT_RES, SR_SPAN : (Lex.pos * Lex.pos), inline_SPAN : (Lex.pos * Lex.pos), STARTDT_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and preformatted_NT (strm) = let + val (pre_RES, pre_SPAN, strm') = pre_NT(strm) + val FULL_SPAN = (#1(pre_SPAN), #2(pre_SPAN)) + in + ((pre_RES), FULL_SPAN, strm') + end +and pre_NT (strm) = let + val (STARTPRE_RES, STARTPRE_SPAN, strm') = matchSTARTPRE(strm) + fun pre_PROD_1_SUBRULE_1_NT (strm) = let + val (inline_RES, inline_SPAN, strm') = inline_NT(strm) + val FULL_SPAN = (#1(inline_SPAN), #2(inline_SPAN)) + in + ((inline_RES), FULL_SPAN, strm') + end + fun pre_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (inline_RES, inline_SPAN, strm') = EBNF.closure(pre_PROD_1_SUBRULE_1_PRED, pre_PROD_1_SUBRULE_1_NT, strm') + val (ENDPRE_RES, ENDPRE_SPAN, strm') = matchENDPRE(strm') + val FULL_SPAN = (#1(STARTPRE_SPAN), #2(ENDPRE_SPAN)) + in + (UserCode.pre_PROD_1_ACT (inline_RES, ENDPRE_RES, STARTPRE_RES, inline_SPAN : (Lex.pos * Lex.pos), ENDPRE_SPAN : (Lex.pos * Lex.pos), STARTPRE_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and list_NT (strm) = let + fun list_PROD_1 (strm) = let + val (ul_RES, ul_SPAN, strm') = ul_NT(strm) + val FULL_SPAN = (#1(ul_SPAN), #2(ul_SPAN)) + in + ((ul_RES), FULL_SPAN, strm') + end + fun list_PROD_2 (strm) = let + val (ol_RES, ol_SPAN, strm') = ol_NT(strm) + val FULL_SPAN = (#1(ol_SPAN), #2(ol_SPAN)) + in + ((ol_RES), FULL_SPAN, strm') + end + fun list_PROD_3 (strm) = let + val (list_loose_RES, list_loose_SPAN, strm') = list_loose_NT(strm) + val FULL_SPAN = (#1(list_loose_SPAN), #2(list_loose_SPAN)) + in + ((list_loose_RES), FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.STARTDIR(_), _, strm') => list_PROD_3(strm) + | (Tok.STARTMENU(_), _, strm') => list_PROD_3(strm) + | (Tok.STARTUL(_), _, strm') => list_PROD_1(strm) + | (Tok.STARTOL(_), _, strm') => list_PROD_2(strm) + | _ => fail() + (* end case *)) + end +and list_loose_NT (strm) = let + fun list_loose_PROD_1 (strm) = let + val (dir_RES, dir_SPAN, strm') = dir_NT(strm) + val FULL_SPAN = (#1(dir_SPAN), #2(dir_SPAN)) + in + ((dir_RES), FULL_SPAN, strm') + end + fun list_loose_PROD_2 (strm) = let + val (menu_RES, menu_SPAN, strm') = menu_NT(strm) + val FULL_SPAN = (#1(menu_SPAN), #2(menu_SPAN)) + in + ((menu_RES), FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.STARTMENU(_), _, strm') => list_loose_PROD_2(strm) + | (Tok.STARTDIR(_), _, strm') => list_loose_PROD_1(strm) + | _ => fail() + (* end case *)) + end +and menu_NT (strm) = let + val (STARTMENU_RES, STARTMENU_SPAN, strm') = matchSTARTMENU(strm) + val (cdata_opt_RES, cdata_opt_SPAN, strm') = cdata_opt_NT(strm') + fun menu_PROD_1_SUBRULE_1_NT (strm) = let + val (li_RES, li_SPAN, strm') = li_NT(strm) + val FULL_SPAN = (#1(li_SPAN), #2(li_SPAN)) + in + ((li_RES), FULL_SPAN, strm') + end + fun menu_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.STARTLI(_), _, strm') => true + | _ => false + (* end case *)) + val (li_RES, li_SPAN, strm') = EBNF.posclos(menu_PROD_1_SUBRULE_1_PRED, menu_PROD_1_SUBRULE_1_NT, strm') + val (ENDMENU_RES, ENDMENU_SPAN, strm') = matchENDMENU(strm') + val FULL_SPAN = (#1(STARTMENU_SPAN), #2(ENDMENU_SPAN)) + in + (UserCode.menu_PROD_1_ACT (li_RES, cdata_opt_RES, STARTMENU_RES, ENDMENU_RES, li_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), STARTMENU_SPAN : (Lex.pos * Lex.pos), ENDMENU_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and li_NT (strm) = let + val (STARTLI_RES, STARTLI_SPAN, strm') = matchSTARTLI(strm) + fun li_PROD_1_SUBRULE_1_NT (strm) = let + val (flow_RES, flow_SPAN, strm') = flow_NT(strm) + val FULL_SPAN = (#1(flow_SPAN), #2(flow_SPAN)) + in + ((flow_RES), FULL_SPAN, strm') + end + fun li_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTADDRESS(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBLOCKQUOTE(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCENTER(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTDIR(_), _, strm') => true + | (Tok.STARTDIV(_), _, strm') => true + | (Tok.STARTDL(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFIELDSET(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTFORM(_), _, strm') => true + | (Tok.STARTH1(_), _, strm') => true + | (Tok.STARTH2(_), _, strm') => true + | (Tok.STARTH3(_), _, strm') => true + | (Tok.STARTH4(_), _, strm') => true + | (Tok.STARTH5(_), _, strm') => true + | (Tok.STARTH6(_), _, strm') => true + | (Tok.STARTHR(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTISINDEX(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTMENU(_), _, strm') => true + | (Tok.STARTNOSCRIPT(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTOL(_), _, strm') => true + | (Tok.STARTP(_), _, strm') => true + | (Tok.STARTPRE(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTABLE(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTUL(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (flow_RES, flow_SPAN, strm') = EBNF.closure(li_PROD_1_SUBRULE_1_PRED, li_PROD_1_SUBRULE_1_NT, strm') + fun li_PROD_1_SUBRULE_2_NT (strm) = let + val (ENDLI_RES, ENDLI_SPAN, strm') = matchENDLI(strm) + val FULL_SPAN = (#1(ENDLI_SPAN), #2(ENDLI_SPAN)) + in + (UserCode.li_PROD_1_SUBRULE_2_PROD_1_ACT (ENDLI_RES, STARTLI_RES, flow_RES, ENDLI_SPAN : (Lex.pos * Lex.pos), STARTLI_SPAN : (Lex.pos * Lex.pos), flow_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun li_PROD_1_SUBRULE_2_PRED (strm) = (case (lex(strm)) + of (Tok.ENDLI, _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.optional(li_PROD_1_SUBRULE_2_PRED, li_PROD_1_SUBRULE_2_NT, strm') + val FULL_SPAN = (#1(STARTLI_SPAN), #2(SR_SPAN)) + in + (UserCode.li_PROD_1_ACT (SR_RES, STARTLI_RES, flow_RES, SR_SPAN : (Lex.pos * Lex.pos), STARTLI_SPAN : (Lex.pos * Lex.pos), flow_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and dir_NT (strm) = let + val (STARTDIR_RES, STARTDIR_SPAN, strm') = matchSTARTDIR(strm) + val (cdata_opt_RES, cdata_opt_SPAN, strm') = cdata_opt_NT(strm') + fun dir_PROD_1_SUBRULE_1_NT (strm) = let + val (li_RES, li_SPAN, strm') = li_NT(strm) + val FULL_SPAN = (#1(li_SPAN), #2(li_SPAN)) + in + ((li_RES), FULL_SPAN, strm') + end + fun dir_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.STARTLI(_), _, strm') => true + | _ => false + (* end case *)) + val (li_RES, li_SPAN, strm') = EBNF.posclos(dir_PROD_1_SUBRULE_1_PRED, dir_PROD_1_SUBRULE_1_NT, strm') + val (ENDDIR_RES, ENDDIR_SPAN, strm') = matchENDDIR(strm') + val FULL_SPAN = (#1(STARTDIR_SPAN), #2(ENDDIR_SPAN)) + in + (UserCode.dir_PROD_1_ACT (li_RES, cdata_opt_RES, ENDDIR_RES, STARTDIR_RES, li_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), ENDDIR_SPAN : (Lex.pos * Lex.pos), STARTDIR_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and ol_NT (strm) = let + val (STARTOL_RES, STARTOL_SPAN, strm') = matchSTARTOL(strm) + val (cdata_opt_RES, cdata_opt_SPAN, strm') = cdata_opt_NT(strm') + fun ol_PROD_1_SUBRULE_1_NT (strm) = let + val (li_RES, li_SPAN, strm') = li_NT(strm) + val FULL_SPAN = (#1(li_SPAN), #2(li_SPAN)) + in + ((li_RES), FULL_SPAN, strm') + end + fun ol_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.STARTLI(_), _, strm') => true + | _ => false + (* end case *)) + val (li_RES, li_SPAN, strm') = EBNF.posclos(ol_PROD_1_SUBRULE_1_PRED, ol_PROD_1_SUBRULE_1_NT, strm') + val (ENDOL_RES, ENDOL_SPAN, strm') = matchENDOL(strm') + val FULL_SPAN = (#1(STARTOL_SPAN), #2(ENDOL_SPAN)) + in + (UserCode.ol_PROD_1_ACT (ENDOL_RES, li_RES, cdata_opt_RES, STARTOL_RES, ENDOL_SPAN : (Lex.pos * Lex.pos), li_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), STARTOL_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and ul_NT (strm) = let + val (STARTUL_RES, STARTUL_SPAN, strm') = matchSTARTUL(strm) + val (cdata_opt_RES, cdata_opt_SPAN, strm') = cdata_opt_NT(strm') + fun ul_PROD_1_SUBRULE_1_NT (strm) = let + val (li_RES, li_SPAN, strm') = li_NT(strm) + val FULL_SPAN = (#1(li_SPAN), #2(li_SPAN)) + in + ((li_RES), FULL_SPAN, strm') + end + fun ul_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.STARTLI(_), _, strm') => true + | _ => false + (* end case *)) + val (li_RES, li_SPAN, strm') = EBNF.posclos(ul_PROD_1_SUBRULE_1_PRED, ul_PROD_1_SUBRULE_1_NT, strm') + val (ENDUL_RES, ENDUL_SPAN, strm') = matchENDUL(strm') + val FULL_SPAN = (#1(STARTUL_SPAN), #2(ENDUL_SPAN)) + in + (UserCode.ul_PROD_1_ACT (li_RES, ENDUL_RES, cdata_opt_RES, STARTUL_RES, li_SPAN : (Lex.pos * Lex.pos), ENDUL_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), STARTUL_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and heading_NT (strm) = let + fun heading_PROD_1 (strm) = let + val (h1_RES, h1_SPAN, strm') = h1_NT(strm) + val FULL_SPAN = (#1(h1_SPAN), #2(h1_SPAN)) + in + ((h1_RES), FULL_SPAN, strm') + end + fun heading_PROD_2 (strm) = let + val (h2_RES, h2_SPAN, strm') = h2_NT(strm) + val FULL_SPAN = (#1(h2_SPAN), #2(h2_SPAN)) + in + ((h2_RES), FULL_SPAN, strm') + end + fun heading_PROD_3 (strm) = let + val (h3_RES, h3_SPAN, strm') = h3_NT(strm) + val FULL_SPAN = (#1(h3_SPAN), #2(h3_SPAN)) + in + ((h3_RES), FULL_SPAN, strm') + end + fun heading_PROD_4 (strm) = let + val (h4_RES, h4_SPAN, strm') = h4_NT(strm) + val FULL_SPAN = (#1(h4_SPAN), #2(h4_SPAN)) + in + ((h4_RES), FULL_SPAN, strm') + end + fun heading_PROD_5 (strm) = let + val (h5_RES, h5_SPAN, strm') = h5_NT(strm) + val FULL_SPAN = (#1(h5_SPAN), #2(h5_SPAN)) + in + ((h5_RES), FULL_SPAN, strm') + end + fun heading_PROD_6 (strm) = let + val (h6_RES, h6_SPAN, strm') = h6_NT(strm) + val FULL_SPAN = (#1(h6_SPAN), #2(h6_SPAN)) + in + ((h6_RES), FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.STARTH6(_), _, strm') => heading_PROD_6(strm) + | (Tok.STARTH4(_), _, strm') => heading_PROD_4(strm) + | (Tok.STARTH2(_), _, strm') => heading_PROD_2(strm) + | (Tok.STARTH1(_), _, strm') => heading_PROD_1(strm) + | (Tok.STARTH3(_), _, strm') => heading_PROD_3(strm) + | (Tok.STARTH5(_), _, strm') => heading_PROD_5(strm) + | _ => fail() + (* end case *)) + end +and h6_NT (strm) = let + val (STARTH6_RES, STARTH6_SPAN, strm') = matchSTARTH6(strm) + fun h6_PROD_1_SUBRULE_1_NT (strm) = let + val (inline_RES, inline_SPAN, strm') = inline_NT(strm) + val FULL_SPAN = (#1(inline_SPAN), #2(inline_SPAN)) + in + ((inline_RES), FULL_SPAN, strm') + end + fun h6_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (inline_RES, inline_SPAN, strm') = EBNF.closure(h6_PROD_1_SUBRULE_1_PRED, h6_PROD_1_SUBRULE_1_NT, strm') + val (ENDH6_RES, ENDH6_SPAN, strm') = matchENDH6(strm') + val FULL_SPAN = (#1(STARTH6_SPAN), #2(ENDH6_SPAN)) + in + (UserCode.h6_PROD_1_ACT (ENDH6_RES, inline_RES, STARTH6_RES, ENDH6_SPAN : (Lex.pos * Lex.pos), inline_SPAN : (Lex.pos * Lex.pos), STARTH6_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and h5_NT (strm) = let + val (STARTH5_RES, STARTH5_SPAN, strm') = matchSTARTH5(strm) + fun h5_PROD_1_SUBRULE_1_NT (strm) = let + val (inline_RES, inline_SPAN, strm') = inline_NT(strm) + val FULL_SPAN = (#1(inline_SPAN), #2(inline_SPAN)) + in + ((inline_RES), FULL_SPAN, strm') + end + fun h5_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (inline_RES, inline_SPAN, strm') = EBNF.closure(h5_PROD_1_SUBRULE_1_PRED, h5_PROD_1_SUBRULE_1_NT, strm') + val (ENDH5_RES, ENDH5_SPAN, strm') = matchENDH5(strm') + val FULL_SPAN = (#1(STARTH5_SPAN), #2(ENDH5_SPAN)) + in + (UserCode.h5_PROD_1_ACT (ENDH5_RES, inline_RES, STARTH5_RES, ENDH5_SPAN : (Lex.pos * Lex.pos), inline_SPAN : (Lex.pos * Lex.pos), STARTH5_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and h4_NT (strm) = let + val (STARTH4_RES, STARTH4_SPAN, strm') = matchSTARTH4(strm) + fun h4_PROD_1_SUBRULE_1_NT (strm) = let + val (inline_RES, inline_SPAN, strm') = inline_NT(strm) + val FULL_SPAN = (#1(inline_SPAN), #2(inline_SPAN)) + in + ((inline_RES), FULL_SPAN, strm') + end + fun h4_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (inline_RES, inline_SPAN, strm') = EBNF.closure(h4_PROD_1_SUBRULE_1_PRED, h4_PROD_1_SUBRULE_1_NT, strm') + val (ENDH4_RES, ENDH4_SPAN, strm') = matchENDH4(strm') + val FULL_SPAN = (#1(STARTH4_SPAN), #2(ENDH4_SPAN)) + in + (UserCode.h4_PROD_1_ACT (ENDH4_RES, inline_RES, STARTH4_RES, ENDH4_SPAN : (Lex.pos * Lex.pos), inline_SPAN : (Lex.pos * Lex.pos), STARTH4_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and h3_NT (strm) = let + val (STARTH3_RES, STARTH3_SPAN, strm') = matchSTARTH3(strm) + fun h3_PROD_1_SUBRULE_1_NT (strm) = let + val (inline_RES, inline_SPAN, strm') = inline_NT(strm) + val FULL_SPAN = (#1(inline_SPAN), #2(inline_SPAN)) + in + ((inline_RES), FULL_SPAN, strm') + end + fun h3_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (inline_RES, inline_SPAN, strm') = EBNF.closure(h3_PROD_1_SUBRULE_1_PRED, h3_PROD_1_SUBRULE_1_NT, strm') + val (ENDH3_RES, ENDH3_SPAN, strm') = matchENDH3(strm') + val FULL_SPAN = (#1(STARTH3_SPAN), #2(ENDH3_SPAN)) + in + (UserCode.h3_PROD_1_ACT (ENDH3_RES, inline_RES, STARTH3_RES, ENDH3_SPAN : (Lex.pos * Lex.pos), inline_SPAN : (Lex.pos * Lex.pos), STARTH3_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and h2_NT (strm) = let + val (STARTH2_RES, STARTH2_SPAN, strm') = matchSTARTH2(strm) + fun h2_PROD_1_SUBRULE_1_NT (strm) = let + val (inline_RES, inline_SPAN, strm') = inline_NT(strm) + val FULL_SPAN = (#1(inline_SPAN), #2(inline_SPAN)) + in + ((inline_RES), FULL_SPAN, strm') + end + fun h2_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (inline_RES, inline_SPAN, strm') = EBNF.closure(h2_PROD_1_SUBRULE_1_PRED, h2_PROD_1_SUBRULE_1_NT, strm') + val (ENDH2_RES, ENDH2_SPAN, strm') = matchENDH2(strm') + val FULL_SPAN = (#1(STARTH2_SPAN), #2(ENDH2_SPAN)) + in + (UserCode.h2_PROD_1_ACT (ENDH2_RES, inline_RES, STARTH2_RES, ENDH2_SPAN : (Lex.pos * Lex.pos), inline_SPAN : (Lex.pos * Lex.pos), STARTH2_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and h1_NT (strm) = let + val (STARTH1_RES, STARTH1_SPAN, strm') = matchSTARTH1(strm) + fun h1_PROD_1_SUBRULE_1_NT (strm) = let + val (inline_RES, inline_SPAN, strm') = inline_NT(strm) + val FULL_SPAN = (#1(inline_SPAN), #2(inline_SPAN)) + in + ((inline_RES), FULL_SPAN, strm') + end + fun h1_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (inline_RES, inline_SPAN, strm') = EBNF.closure(h1_PROD_1_SUBRULE_1_PRED, h1_PROD_1_SUBRULE_1_NT, strm') + val (ENDH1_RES, ENDH1_SPAN, strm') = matchENDH1(strm') + val FULL_SPAN = (#1(STARTH1_SPAN), #2(ENDH1_SPAN)) + in + (UserCode.h1_PROD_1_ACT (ENDH1_RES, inline_RES, STARTH1_RES, ENDH1_SPAN : (Lex.pos * Lex.pos), inline_SPAN : (Lex.pos * Lex.pos), STARTH1_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and p_NT (strm) = let + val (STARTP_RES, STARTP_SPAN, strm') = matchSTARTP(strm) + fun p_PROD_1_SUBRULE_1_NT (strm) = let + val (inline_RES, inline_SPAN, strm') = inline_NT(strm) + val FULL_SPAN = (#1(inline_SPAN), #2(inline_SPAN)) + in + ((inline_RES), FULL_SPAN, strm') + end + fun p_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (inline_RES, inline_SPAN, strm') = EBNF.closure(p_PROD_1_SUBRULE_1_PRED, p_PROD_1_SUBRULE_1_NT, strm') + val (ENDP_RES, ENDP_SPAN, strm') = matchENDP(strm') + val FULL_SPAN = (#1(STARTP_SPAN), #2(ENDP_SPAN)) + in + (UserCode.p_PROD_1_ACT (inline_RES, STARTP_RES, ENDP_RES, inline_SPAN : (Lex.pos * Lex.pos), STARTP_SPAN : (Lex.pos * Lex.pos), ENDP_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and object_NT (strm) = let + val (STARTOBJECT_RES, STARTOBJECT_SPAN, strm') = matchSTARTOBJECT(strm) + fun object_PROD_1_SUBRULE_1_NT (strm) = let + fun object_PROD_1_SUBRULE_1_PROD_1 (strm) = let + val (param_RES, param_SPAN, strm') = param_NT(strm) + val FULL_SPAN = (#1(param_SPAN), #2(param_SPAN)) + in + ((param_RES), FULL_SPAN, strm') + end + fun object_PROD_1_SUBRULE_1_PROD_2 (strm) = let + val (flow_RES, flow_SPAN, strm') = flow_NT(strm) + val FULL_SPAN = (#1(flow_SPAN), #2(flow_SPAN)) + in + ((flow_RES), FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.PCDATA(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.CHAR_REF(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.ENTITY_REF(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTA(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTABBR(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTACRONYM(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTADDRESS(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTAPPLET(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTB(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTBASEFONT(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTBDO(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTBIG(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTBLOCKQUOTE(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTBR(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTBUTTON(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTCENTER(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTCITE(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTCODE(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTDFN(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTDIR(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTDIV(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTDL(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTEM(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTFIELDSET(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTFONT(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTFORM(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTH1(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTH2(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTH3(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTH4(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTH5(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTH6(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTHR(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTI(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTIFRAME(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTIMG(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTINPUT(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTISINDEX(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTKBD(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTLABEL(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTMAP(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTMENU(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTNOSCRIPT(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTOBJECT(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTOL(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTP(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTPRE(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTQ(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTS(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTSAMP(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTSCRIPT(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTSELECT(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTSMALL(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTSPAN(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTSTRIKE(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTSTRONG(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTSUB(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTSUP(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTTABLE(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTTEXTAREA(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTTT(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTU(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTUL(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTVAR(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTPARAM(_), _, strm') => + object_PROD_1_SUBRULE_1_PROD_1(strm) + | _ => fail() + (* end case *)) + end + fun object_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTADDRESS(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBLOCKQUOTE(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCENTER(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTDIR(_), _, strm') => true + | (Tok.STARTDIV(_), _, strm') => true + | (Tok.STARTDL(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFIELDSET(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTFORM(_), _, strm') => true + | (Tok.STARTH1(_), _, strm') => true + | (Tok.STARTH2(_), _, strm') => true + | (Tok.STARTH3(_), _, strm') => true + | (Tok.STARTH4(_), _, strm') => true + | (Tok.STARTH5(_), _, strm') => true + | (Tok.STARTH6(_), _, strm') => true + | (Tok.STARTHR(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTISINDEX(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTMENU(_), _, strm') => true + | (Tok.STARTNOSCRIPT(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTOL(_), _, strm') => true + | (Tok.STARTP(_), _, strm') => true + | (Tok.STARTPARAM(_), _, strm') => true + | (Tok.STARTPRE(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTABLE(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTUL(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.closure(object_PROD_1_SUBRULE_1_PRED, object_PROD_1_SUBRULE_1_NT, strm') + val (ENDOBJECT_RES, ENDOBJECT_SPAN, strm') = matchENDOBJECT(strm') + val FULL_SPAN = (#1(STARTOBJECT_SPAN), #2(ENDOBJECT_SPAN)) + in + (UserCode.object_PROD_1_ACT (SR_RES, STARTOBJECT_RES, ENDOBJECT_RES, SR_SPAN : (Lex.pos * Lex.pos), STARTOBJECT_SPAN : (Lex.pos * Lex.pos), ENDOBJECT_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and a_NT (strm) = let + val (STARTA_RES, STARTA_SPAN, strm') = matchSTARTA(strm) + fun a_PROD_1_SUBRULE_1_NT (strm) = let + val (inline_RES, inline_SPAN, strm') = inline_NT(strm) + val FULL_SPAN = (#1(inline_SPAN), #2(inline_SPAN)) + in + ((inline_RES), FULL_SPAN, strm') + end + fun a_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (inline_RES, inline_SPAN, strm') = EBNF.closure(a_PROD_1_SUBRULE_1_PRED, a_PROD_1_SUBRULE_1_NT, strm') + val (ENDA_RES, ENDA_SPAN, strm') = matchENDA(strm') + val FULL_SPAN = (#1(STARTA_SPAN), #2(ENDA_SPAN)) + in + (UserCode.a_PROD_1_ACT (inline_RES, STARTA_RES, ENDA_RES, inline_SPAN : (Lex.pos * Lex.pos), STARTA_SPAN : (Lex.pos * Lex.pos), ENDA_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and phrase_NT (strm) = let + fun phrase_PROD_1 (strm) = let + val (em_RES, em_SPAN, strm') = em_NT(strm) + val FULL_SPAN = (#1(em_SPAN), #2(em_SPAN)) + in + ((em_RES), FULL_SPAN, strm') + end + fun phrase_PROD_2 (strm) = let + val (strong_RES, strong_SPAN, strm') = strong_NT(strm) + val FULL_SPAN = (#1(strong_SPAN), #2(strong_SPAN)) + in + ((strong_RES), FULL_SPAN, strm') + end + fun phrase_PROD_3 (strm) = let + val (dfn_RES, dfn_SPAN, strm') = dfn_NT(strm) + val FULL_SPAN = (#1(dfn_SPAN), #2(dfn_SPAN)) + in + ((dfn_RES), FULL_SPAN, strm') + end + fun phrase_PROD_4 (strm) = let + val (code_RES, code_SPAN, strm') = code_NT(strm) + val FULL_SPAN = (#1(code_SPAN), #2(code_SPAN)) + in + ((code_RES), FULL_SPAN, strm') + end + fun phrase_PROD_5 (strm) = let + val (samp_RES, samp_SPAN, strm') = samp_NT(strm) + val FULL_SPAN = (#1(samp_SPAN), #2(samp_SPAN)) + in + ((samp_RES), FULL_SPAN, strm') + end + fun phrase_PROD_6 (strm) = let + val (kbd_RES, kbd_SPAN, strm') = kbd_NT(strm) + val FULL_SPAN = (#1(kbd_SPAN), #2(kbd_SPAN)) + in + ((kbd_RES), FULL_SPAN, strm') + end + fun phrase_PROD_7 (strm) = let + val (var_RES, var_SPAN, strm') = var_NT(strm) + val FULL_SPAN = (#1(var_SPAN), #2(var_SPAN)) + in + ((var_RES), FULL_SPAN, strm') + end + fun phrase_PROD_8 (strm) = let + val (cite_RES, cite_SPAN, strm') = cite_NT(strm) + val FULL_SPAN = (#1(cite_SPAN), #2(cite_SPAN)) + in + ((cite_RES), FULL_SPAN, strm') + end + fun phrase_PROD_9 (strm) = let + val (abbr_RES, abbr_SPAN, strm') = abbr_NT(strm) + val FULL_SPAN = (#1(abbr_SPAN), #2(abbr_SPAN)) + in + ((abbr_RES), FULL_SPAN, strm') + end + fun phrase_PROD_10 (strm) = let + val (acronym_RES, acronym_SPAN, strm') = acronym_NT(strm) + val FULL_SPAN = (#1(acronym_SPAN), #2(acronym_SPAN)) + in + ((acronym_RES), FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.STARTACRONYM(_), _, strm') => phrase_PROD_10(strm) + | (Tok.STARTCITE(_), _, strm') => phrase_PROD_8(strm) + | (Tok.STARTKBD(_), _, strm') => phrase_PROD_6(strm) + | (Tok.STARTCODE(_), _, strm') => phrase_PROD_4(strm) + | (Tok.STARTSTRONG(_), _, strm') => phrase_PROD_2(strm) + | (Tok.STARTEM(_), _, strm') => phrase_PROD_1(strm) + | (Tok.STARTDFN(_), _, strm') => phrase_PROD_3(strm) + | (Tok.STARTSAMP(_), _, strm') => phrase_PROD_5(strm) + | (Tok.STARTVAR(_), _, strm') => phrase_PROD_7(strm) + | (Tok.STARTABBR(_), _, strm') => phrase_PROD_9(strm) + | _ => fail() + (* end case *)) + end +and acronym_NT (strm) = let + val (STARTACRONYM_RES, STARTACRONYM_SPAN, strm') = matchSTARTACRONYM(strm) + fun acronym_PROD_1_SUBRULE_1_NT (strm) = let + val (inline_RES, inline_SPAN, strm') = inline_NT(strm) + val FULL_SPAN = (#1(inline_SPAN), #2(inline_SPAN)) + in + ((inline_RES), FULL_SPAN, strm') + end + fun acronym_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (inline_RES, inline_SPAN, strm') = EBNF.closure(acronym_PROD_1_SUBRULE_1_PRED, acronym_PROD_1_SUBRULE_1_NT, strm') + val (ENDACRONYM_RES, ENDACRONYM_SPAN, strm') = matchENDACRONYM(strm') + val FULL_SPAN = (#1(STARTACRONYM_SPAN), #2(ENDACRONYM_SPAN)) + in + (UserCode.acronym_PROD_1_ACT (inline_RES, STARTACRONYM_RES, ENDACRONYM_RES, inline_SPAN : (Lex.pos * Lex.pos), STARTACRONYM_SPAN : (Lex.pos * Lex.pos), ENDACRONYM_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and abbr_NT (strm) = let + val (STARTABBR_RES, STARTABBR_SPAN, strm') = matchSTARTABBR(strm) + fun abbr_PROD_1_SUBRULE_1_NT (strm) = let + val (inline_RES, inline_SPAN, strm') = inline_NT(strm) + val FULL_SPAN = (#1(inline_SPAN), #2(inline_SPAN)) + in + ((inline_RES), FULL_SPAN, strm') + end + fun abbr_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (inline_RES, inline_SPAN, strm') = EBNF.closure(abbr_PROD_1_SUBRULE_1_PRED, abbr_PROD_1_SUBRULE_1_NT, strm') + val (ENDABBR_RES, ENDABBR_SPAN, strm') = matchENDABBR(strm') + val FULL_SPAN = (#1(STARTABBR_SPAN), #2(ENDABBR_SPAN)) + in + (UserCode.abbr_PROD_1_ACT (inline_RES, ENDABBR_RES, STARTABBR_RES, inline_SPAN : (Lex.pos * Lex.pos), ENDABBR_SPAN : (Lex.pos * Lex.pos), STARTABBR_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and cite_NT (strm) = let + val (STARTCITE_RES, STARTCITE_SPAN, strm') = matchSTARTCITE(strm) + fun cite_PROD_1_SUBRULE_1_NT (strm) = let + val (inline_RES, inline_SPAN, strm') = inline_NT(strm) + val FULL_SPAN = (#1(inline_SPAN), #2(inline_SPAN)) + in + ((inline_RES), FULL_SPAN, strm') + end + fun cite_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (inline_RES, inline_SPAN, strm') = EBNF.closure(cite_PROD_1_SUBRULE_1_PRED, cite_PROD_1_SUBRULE_1_NT, strm') + val (ENDCITE_RES, ENDCITE_SPAN, strm') = matchENDCITE(strm') + val FULL_SPAN = (#1(STARTCITE_SPAN), #2(ENDCITE_SPAN)) + in + (UserCode.cite_PROD_1_ACT (inline_RES, ENDCITE_RES, STARTCITE_RES, inline_SPAN : (Lex.pos * Lex.pos), ENDCITE_SPAN : (Lex.pos * Lex.pos), STARTCITE_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and var_NT (strm) = let + val (STARTVAR_RES, STARTVAR_SPAN, strm') = matchSTARTVAR(strm) + fun var_PROD_1_SUBRULE_1_NT (strm) = let + val (inline_RES, inline_SPAN, strm') = inline_NT(strm) + val FULL_SPAN = (#1(inline_SPAN), #2(inline_SPAN)) + in + ((inline_RES), FULL_SPAN, strm') + end + fun var_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (inline_RES, inline_SPAN, strm') = EBNF.closure(var_PROD_1_SUBRULE_1_PRED, var_PROD_1_SUBRULE_1_NT, strm') + val (ENDVAR_RES, ENDVAR_SPAN, strm') = matchENDVAR(strm') + val FULL_SPAN = (#1(STARTVAR_SPAN), #2(ENDVAR_SPAN)) + in + (UserCode.var_PROD_1_ACT (inline_RES, STARTVAR_RES, ENDVAR_RES, inline_SPAN : (Lex.pos * Lex.pos), STARTVAR_SPAN : (Lex.pos * Lex.pos), ENDVAR_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and kbd_NT (strm) = let + val (STARTKBD_RES, STARTKBD_SPAN, strm') = matchSTARTKBD(strm) + fun kbd_PROD_1_SUBRULE_1_NT (strm) = let + val (inline_RES, inline_SPAN, strm') = inline_NT(strm) + val FULL_SPAN = (#1(inline_SPAN), #2(inline_SPAN)) + in + ((inline_RES), FULL_SPAN, strm') + end + fun kbd_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (inline_RES, inline_SPAN, strm') = EBNF.closure(kbd_PROD_1_SUBRULE_1_PRED, kbd_PROD_1_SUBRULE_1_NT, strm') + val (ENDKBD_RES, ENDKBD_SPAN, strm') = matchENDKBD(strm') + val FULL_SPAN = (#1(STARTKBD_SPAN), #2(ENDKBD_SPAN)) + in + (UserCode.kbd_PROD_1_ACT (inline_RES, STARTKBD_RES, ENDKBD_RES, inline_SPAN : (Lex.pos * Lex.pos), STARTKBD_SPAN : (Lex.pos * Lex.pos), ENDKBD_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and samp_NT (strm) = let + val (STARTSAMP_RES, STARTSAMP_SPAN, strm') = matchSTARTSAMP(strm) + fun samp_PROD_1_SUBRULE_1_NT (strm) = let + val (inline_RES, inline_SPAN, strm') = inline_NT(strm) + val FULL_SPAN = (#1(inline_SPAN), #2(inline_SPAN)) + in + ((inline_RES), FULL_SPAN, strm') + end + fun samp_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (inline_RES, inline_SPAN, strm') = EBNF.closure(samp_PROD_1_SUBRULE_1_PRED, samp_PROD_1_SUBRULE_1_NT, strm') + val (ENDSAMP_RES, ENDSAMP_SPAN, strm') = matchENDSAMP(strm') + val FULL_SPAN = (#1(STARTSAMP_SPAN), #2(ENDSAMP_SPAN)) + in + (UserCode.samp_PROD_1_ACT (inline_RES, STARTSAMP_RES, ENDSAMP_RES, inline_SPAN : (Lex.pos * Lex.pos), STARTSAMP_SPAN : (Lex.pos * Lex.pos), ENDSAMP_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and code_NT (strm) = let + val (STARTCODE_RES, STARTCODE_SPAN, strm') = matchSTARTCODE(strm) + fun code_PROD_1_SUBRULE_1_NT (strm) = let + val (inline_RES, inline_SPAN, strm') = inline_NT(strm) + val FULL_SPAN = (#1(inline_SPAN), #2(inline_SPAN)) + in + ((inline_RES), FULL_SPAN, strm') + end + fun code_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (inline_RES, inline_SPAN, strm') = EBNF.closure(code_PROD_1_SUBRULE_1_PRED, code_PROD_1_SUBRULE_1_NT, strm') + val (ENDCODE_RES, ENDCODE_SPAN, strm') = matchENDCODE(strm') + val FULL_SPAN = (#1(STARTCODE_SPAN), #2(ENDCODE_SPAN)) + in + (UserCode.code_PROD_1_ACT (inline_RES, ENDCODE_RES, STARTCODE_RES, inline_SPAN : (Lex.pos * Lex.pos), ENDCODE_SPAN : (Lex.pos * Lex.pos), STARTCODE_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and dfn_NT (strm) = let + val (STARTDFN_RES, STARTDFN_SPAN, strm') = matchSTARTDFN(strm) + fun dfn_PROD_1_SUBRULE_1_NT (strm) = let + val (inline_RES, inline_SPAN, strm') = inline_NT(strm) + val FULL_SPAN = (#1(inline_SPAN), #2(inline_SPAN)) + in + ((inline_RES), FULL_SPAN, strm') + end + fun dfn_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (inline_RES, inline_SPAN, strm') = EBNF.closure(dfn_PROD_1_SUBRULE_1_PRED, dfn_PROD_1_SUBRULE_1_NT, strm') + val (ENDDFN_RES, ENDDFN_SPAN, strm') = matchENDDFN(strm') + val FULL_SPAN = (#1(STARTDFN_SPAN), #2(ENDDFN_SPAN)) + in + (UserCode.dfn_PROD_1_ACT (inline_RES, ENDDFN_RES, STARTDFN_RES, inline_SPAN : (Lex.pos * Lex.pos), ENDDFN_SPAN : (Lex.pos * Lex.pos), STARTDFN_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and strong_NT (strm) = let + val (STARTSTRONG_RES, STARTSTRONG_SPAN, strm') = matchSTARTSTRONG(strm) + fun strong_PROD_1_SUBRULE_1_NT (strm) = let + val (inline_RES, inline_SPAN, strm') = inline_NT(strm) + val FULL_SPAN = (#1(inline_SPAN), #2(inline_SPAN)) + in + ((inline_RES), FULL_SPAN, strm') + end + fun strong_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (inline_RES, inline_SPAN, strm') = EBNF.closure(strong_PROD_1_SUBRULE_1_PRED, strong_PROD_1_SUBRULE_1_NT, strm') + val (ENDSTRONG_RES, ENDSTRONG_SPAN, strm') = matchENDSTRONG(strm') + val FULL_SPAN = (#1(STARTSTRONG_SPAN), #2(ENDSTRONG_SPAN)) + in + (UserCode.strong_PROD_1_ACT (inline_RES, STARTSTRONG_RES, ENDSTRONG_RES, inline_SPAN : (Lex.pos * Lex.pos), STARTSTRONG_SPAN : (Lex.pos * Lex.pos), ENDSTRONG_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and em_NT (strm) = let + val (STARTEM_RES, STARTEM_SPAN, strm') = matchSTARTEM(strm) + fun em_PROD_1_SUBRULE_1_NT (strm) = let + val (inline_RES, inline_SPAN, strm') = inline_NT(strm) + val FULL_SPAN = (#1(inline_SPAN), #2(inline_SPAN)) + in + ((inline_RES), FULL_SPAN, strm') + end + fun em_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (inline_RES, inline_SPAN, strm') = EBNF.closure(em_PROD_1_SUBRULE_1_PRED, em_PROD_1_SUBRULE_1_NT, strm') + val (ENDEM_RES, ENDEM_SPAN, strm') = matchENDEM(strm') + val FULL_SPAN = (#1(STARTEM_SPAN), #2(ENDEM_SPAN)) + in + (UserCode.em_PROD_1_ACT (ENDEM_RES, inline_RES, STARTEM_RES, ENDEM_SPAN : (Lex.pos * Lex.pos), inline_SPAN : (Lex.pos * Lex.pos), STARTEM_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and fontstyle_NT (strm) = let + fun fontstyle_PROD_1 (strm) = let + val (tt_RES, tt_SPAN, strm') = tt_NT(strm) + val FULL_SPAN = (#1(tt_SPAN), #2(tt_SPAN)) + in + ((tt_RES), FULL_SPAN, strm') + end + fun fontstyle_PROD_2 (strm) = let + val (i_RES, i_SPAN, strm') = i_NT(strm) + val FULL_SPAN = (#1(i_SPAN), #2(i_SPAN)) + in + ((i_RES), FULL_SPAN, strm') + end + fun fontstyle_PROD_3 (strm) = let + val (b_RES, b_SPAN, strm') = b_NT(strm) + val FULL_SPAN = (#1(b_SPAN), #2(b_SPAN)) + in + ((b_RES), FULL_SPAN, strm') + end + fun fontstyle_PROD_4 (strm) = let + val (big_RES, big_SPAN, strm') = big_NT(strm) + val FULL_SPAN = (#1(big_SPAN), #2(big_SPAN)) + in + ((big_RES), FULL_SPAN, strm') + end + fun fontstyle_PROD_5 (strm) = let + val (small_RES, small_SPAN, strm') = small_NT(strm) + val FULL_SPAN = (#1(small_SPAN), #2(small_SPAN)) + in + ((small_RES), FULL_SPAN, strm') + end + fun fontstyle_PROD_6 (strm) = let + val (fontstyle_loose_RES, fontstyle_loose_SPAN, strm') = fontstyle_loose_NT(strm) + val FULL_SPAN = (#1(fontstyle_loose_SPAN), + #2(fontstyle_loose_SPAN)) + in + ((fontstyle_loose_RES), FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.STARTS(_), _, strm') => fontstyle_PROD_6(strm) + | (Tok.STARTSTRIKE(_), _, strm') => fontstyle_PROD_6(strm) + | (Tok.STARTU(_), _, strm') => fontstyle_PROD_6(strm) + | (Tok.STARTBIG(_), _, strm') => fontstyle_PROD_4(strm) + | (Tok.STARTI(_), _, strm') => fontstyle_PROD_2(strm) + | (Tok.STARTTT(_), _, strm') => fontstyle_PROD_1(strm) + | (Tok.STARTB(_), _, strm') => fontstyle_PROD_3(strm) + | (Tok.STARTSMALL(_), _, strm') => fontstyle_PROD_5(strm) + | _ => fail() + (* end case *)) + end +and fontstyle_loose_NT (strm) = let + fun fontstyle_loose_PROD_1 (strm) = let + val (u_RES, u_SPAN, strm') = u_NT(strm) + val FULL_SPAN = (#1(u_SPAN), #2(u_SPAN)) + in + ((u_RES), FULL_SPAN, strm') + end + fun fontstyle_loose_PROD_2 (strm) = let + val (s_RES, s_SPAN, strm') = s_NT(strm) + val FULL_SPAN = (#1(s_SPAN), #2(s_SPAN)) + in + ((s_RES), FULL_SPAN, strm') + end + fun fontstyle_loose_PROD_3 (strm) = let + val (strike_RES, strike_SPAN, strm') = strike_NT(strm) + val FULL_SPAN = (#1(strike_SPAN), #2(strike_SPAN)) + in + ((strike_RES), FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.STARTSTRIKE(_), _, strm') => fontstyle_loose_PROD_3(strm) + | (Tok.STARTU(_), _, strm') => fontstyle_loose_PROD_1(strm) + | (Tok.STARTS(_), _, strm') => fontstyle_loose_PROD_2(strm) + | _ => fail() + (* end case *)) + end +and strike_NT (strm) = let + val (STARTSTRIKE_RES, STARTSTRIKE_SPAN, strm') = matchSTARTSTRIKE(strm) + fun strike_PROD_1_SUBRULE_1_NT (strm) = let + val (inline_RES, inline_SPAN, strm') = inline_NT(strm) + val FULL_SPAN = (#1(inline_SPAN), #2(inline_SPAN)) + in + ((inline_RES), FULL_SPAN, strm') + end + fun strike_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (inline_RES, inline_SPAN, strm') = EBNF.closure(strike_PROD_1_SUBRULE_1_PRED, strike_PROD_1_SUBRULE_1_NT, strm') + val (ENDSTRIKE_RES, ENDSTRIKE_SPAN, strm') = matchENDSTRIKE(strm') + val FULL_SPAN = (#1(STARTSTRIKE_SPAN), #2(ENDSTRIKE_SPAN)) + in + (UserCode.strike_PROD_1_ACT (inline_RES, STARTSTRIKE_RES, ENDSTRIKE_RES, inline_SPAN : (Lex.pos * Lex.pos), STARTSTRIKE_SPAN : (Lex.pos * Lex.pos), ENDSTRIKE_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and s_NT (strm) = let + val (STARTS_RES, STARTS_SPAN, strm') = matchSTARTS(strm) + fun s_PROD_1_SUBRULE_1_NT (strm) = let + val (inline_RES, inline_SPAN, strm') = inline_NT(strm) + val FULL_SPAN = (#1(inline_SPAN), #2(inline_SPAN)) + in + ((inline_RES), FULL_SPAN, strm') + end + fun s_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (inline_RES, inline_SPAN, strm') = EBNF.closure(s_PROD_1_SUBRULE_1_PRED, s_PROD_1_SUBRULE_1_NT, strm') + val (ENDS_RES, ENDS_SPAN, strm') = matchENDS(strm') + val FULL_SPAN = (#1(STARTS_SPAN), #2(ENDS_SPAN)) + in + (UserCode.s_PROD_1_ACT (inline_RES, STARTS_RES, ENDS_RES, inline_SPAN : (Lex.pos * Lex.pos), STARTS_SPAN : (Lex.pos * Lex.pos), ENDS_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and u_NT (strm) = let + val (STARTU_RES, STARTU_SPAN, strm') = matchSTARTU(strm) + fun u_PROD_1_SUBRULE_1_NT (strm) = let + val (inline_RES, inline_SPAN, strm') = inline_NT(strm) + val FULL_SPAN = (#1(inline_SPAN), #2(inline_SPAN)) + in + ((inline_RES), FULL_SPAN, strm') + end + fun u_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (inline_RES, inline_SPAN, strm') = EBNF.closure(u_PROD_1_SUBRULE_1_PRED, u_PROD_1_SUBRULE_1_NT, strm') + val (ENDU_RES, ENDU_SPAN, strm') = matchENDU(strm') + val FULL_SPAN = (#1(STARTU_SPAN), #2(ENDU_SPAN)) + in + (UserCode.u_PROD_1_ACT (inline_RES, STARTU_RES, ENDU_RES, inline_SPAN : (Lex.pos * Lex.pos), STARTU_SPAN : (Lex.pos * Lex.pos), ENDU_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and small_NT (strm) = let + val (STARTSMALL_RES, STARTSMALL_SPAN, strm') = matchSTARTSMALL(strm) + fun small_PROD_1_SUBRULE_1_NT (strm) = let + val (inline_RES, inline_SPAN, strm') = inline_NT(strm) + val FULL_SPAN = (#1(inline_SPAN), #2(inline_SPAN)) + in + ((inline_RES), FULL_SPAN, strm') + end + fun small_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (inline_RES, inline_SPAN, strm') = EBNF.closure(small_PROD_1_SUBRULE_1_PRED, small_PROD_1_SUBRULE_1_NT, strm') + val (ENDSMALL_RES, ENDSMALL_SPAN, strm') = matchENDSMALL(strm') + val FULL_SPAN = (#1(STARTSMALL_SPAN), #2(ENDSMALL_SPAN)) + in + (UserCode.small_PROD_1_ACT (inline_RES, ENDSMALL_RES, STARTSMALL_RES, inline_SPAN : (Lex.pos * Lex.pos), ENDSMALL_SPAN : (Lex.pos * Lex.pos), STARTSMALL_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and big_NT (strm) = let + val (STARTBIG_RES, STARTBIG_SPAN, strm') = matchSTARTBIG(strm) + fun big_PROD_1_SUBRULE_1_NT (strm) = let + val (inline_RES, inline_SPAN, strm') = inline_NT(strm) + val FULL_SPAN = (#1(inline_SPAN), #2(inline_SPAN)) + in + ((inline_RES), FULL_SPAN, strm') + end + fun big_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (inline_RES, inline_SPAN, strm') = EBNF.closure(big_PROD_1_SUBRULE_1_PRED, big_PROD_1_SUBRULE_1_NT, strm') + val (ENDBIG_RES, ENDBIG_SPAN, strm') = matchENDBIG(strm') + val FULL_SPAN = (#1(STARTBIG_SPAN), #2(ENDBIG_SPAN)) + in + (UserCode.big_PROD_1_ACT (inline_RES, ENDBIG_RES, STARTBIG_RES, inline_SPAN : (Lex.pos * Lex.pos), ENDBIG_SPAN : (Lex.pos * Lex.pos), STARTBIG_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and b_NT (strm) = let + val (STARTB_RES, STARTB_SPAN, strm') = matchSTARTB(strm) + fun b_PROD_1_SUBRULE_1_NT (strm) = let + val (inline_RES, inline_SPAN, strm') = inline_NT(strm) + val FULL_SPAN = (#1(inline_SPAN), #2(inline_SPAN)) + in + ((inline_RES), FULL_SPAN, strm') + end + fun b_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (inline_RES, inline_SPAN, strm') = EBNF.closure(b_PROD_1_SUBRULE_1_PRED, b_PROD_1_SUBRULE_1_NT, strm') + val (ENDB_RES, ENDB_SPAN, strm') = matchENDB(strm') + val FULL_SPAN = (#1(STARTB_SPAN), #2(ENDB_SPAN)) + in + (UserCode.b_PROD_1_ACT (inline_RES, STARTB_RES, ENDB_RES, inline_SPAN : (Lex.pos * Lex.pos), STARTB_SPAN : (Lex.pos * Lex.pos), ENDB_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and i_NT (strm) = let + val (STARTI_RES, STARTI_SPAN, strm') = matchSTARTI(strm) + fun i_PROD_1_SUBRULE_1_NT (strm) = let + val (inline_RES, inline_SPAN, strm') = inline_NT(strm) + val FULL_SPAN = (#1(inline_SPAN), #2(inline_SPAN)) + in + ((inline_RES), FULL_SPAN, strm') + end + fun i_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (inline_RES, inline_SPAN, strm') = EBNF.closure(i_PROD_1_SUBRULE_1_PRED, i_PROD_1_SUBRULE_1_NT, strm') + val (ENDI_RES, ENDI_SPAN, strm') = matchENDI(strm') + val FULL_SPAN = (#1(STARTI_SPAN), #2(ENDI_SPAN)) + in + (UserCode.i_PROD_1_ACT (inline_RES, STARTI_RES, ENDI_RES, inline_SPAN : (Lex.pos * Lex.pos), STARTI_SPAN : (Lex.pos * Lex.pos), ENDI_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and tt_NT (strm) = let + val (STARTTT_RES, STARTTT_SPAN, strm') = matchSTARTTT(strm) + fun tt_PROD_1_SUBRULE_1_NT (strm) = let + val (inline_RES, inline_SPAN, strm') = inline_NT(strm) + val FULL_SPAN = (#1(inline_SPAN), #2(inline_SPAN)) + in + ((inline_RES), FULL_SPAN, strm') + end + fun tt_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (inline_RES, inline_SPAN, strm') = EBNF.closure(tt_PROD_1_SUBRULE_1_PRED, tt_PROD_1_SUBRULE_1_NT, strm') + val (ENDTT_RES, ENDTT_SPAN, strm') = matchENDTT(strm') + val FULL_SPAN = (#1(STARTTT_SPAN), #2(ENDTT_SPAN)) + in + (UserCode.tt_PROD_1_ACT (ENDTT_RES, inline_RES, STARTTT_RES, ENDTT_SPAN : (Lex.pos * Lex.pos), inline_SPAN : (Lex.pos * Lex.pos), STARTTT_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +fun del_NT (strm) = let + val (STARTDEL_RES, STARTDEL_SPAN, strm') = matchSTARTDEL(strm) + fun del_PROD_1_SUBRULE_1_NT (strm) = let + val (flow_RES, flow_SPAN, strm') = flow_NT(strm) + val FULL_SPAN = (#1(flow_SPAN), #2(flow_SPAN)) + in + ((flow_RES), FULL_SPAN, strm') + end + fun del_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTADDRESS(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBLOCKQUOTE(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCENTER(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTDIR(_), _, strm') => true + | (Tok.STARTDIV(_), _, strm') => true + | (Tok.STARTDL(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFIELDSET(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTFORM(_), _, strm') => true + | (Tok.STARTH1(_), _, strm') => true + | (Tok.STARTH2(_), _, strm') => true + | (Tok.STARTH3(_), _, strm') => true + | (Tok.STARTH4(_), _, strm') => true + | (Tok.STARTH5(_), _, strm') => true + | (Tok.STARTH6(_), _, strm') => true + | (Tok.STARTHR(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTISINDEX(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTMENU(_), _, strm') => true + | (Tok.STARTNOSCRIPT(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTOL(_), _, strm') => true + | (Tok.STARTP(_), _, strm') => true + | (Tok.STARTPRE(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTABLE(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTUL(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (flow_RES, flow_SPAN, strm') = EBNF.closure(del_PROD_1_SUBRULE_1_PRED, del_PROD_1_SUBRULE_1_NT, strm') + val (ENDDEL_RES, ENDDEL_SPAN, strm') = matchENDDEL(strm') + val FULL_SPAN = (#1(STARTDEL_SPAN), #2(ENDDEL_SPAN)) + in + (UserCode.del_PROD_1_ACT (flow_RES, ENDDEL_RES, STARTDEL_RES, flow_SPAN : (Lex.pos * Lex.pos), ENDDEL_SPAN : (Lex.pos * Lex.pos), STARTDEL_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +fun ins_NT (strm) = let + val (STARTINS_RES, STARTINS_SPAN, strm') = matchSTARTINS(strm) + fun ins_PROD_1_SUBRULE_1_NT (strm) = let + val (flow_RES, flow_SPAN, strm') = flow_NT(strm) + val FULL_SPAN = (#1(flow_SPAN), #2(flow_SPAN)) + in + ((flow_RES), FULL_SPAN, strm') + end + fun ins_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTA(_), _, strm') => true + | (Tok.STARTABBR(_), _, strm') => true + | (Tok.STARTACRONYM(_), _, strm') => true + | (Tok.STARTADDRESS(_), _, strm') => true + | (Tok.STARTAPPLET(_), _, strm') => true + | (Tok.STARTB(_), _, strm') => true + | (Tok.STARTBASEFONT(_), _, strm') => true + | (Tok.STARTBDO(_), _, strm') => true + | (Tok.STARTBIG(_), _, strm') => true + | (Tok.STARTBLOCKQUOTE(_), _, strm') => true + | (Tok.STARTBR(_), _, strm') => true + | (Tok.STARTBUTTON(_), _, strm') => true + | (Tok.STARTCENTER(_), _, strm') => true + | (Tok.STARTCITE(_), _, strm') => true + | (Tok.STARTCODE(_), _, strm') => true + | (Tok.STARTDFN(_), _, strm') => true + | (Tok.STARTDIR(_), _, strm') => true + | (Tok.STARTDIV(_), _, strm') => true + | (Tok.STARTDL(_), _, strm') => true + | (Tok.STARTEM(_), _, strm') => true + | (Tok.STARTFIELDSET(_), _, strm') => true + | (Tok.STARTFONT(_), _, strm') => true + | (Tok.STARTFORM(_), _, strm') => true + | (Tok.STARTH1(_), _, strm') => true + | (Tok.STARTH2(_), _, strm') => true + | (Tok.STARTH3(_), _, strm') => true + | (Tok.STARTH4(_), _, strm') => true + | (Tok.STARTH5(_), _, strm') => true + | (Tok.STARTH6(_), _, strm') => true + | (Tok.STARTHR(_), _, strm') => true + | (Tok.STARTI(_), _, strm') => true + | (Tok.STARTIFRAME(_), _, strm') => true + | (Tok.STARTIMG(_), _, strm') => true + | (Tok.STARTINPUT(_), _, strm') => true + | (Tok.STARTISINDEX(_), _, strm') => true + | (Tok.STARTKBD(_), _, strm') => true + | (Tok.STARTLABEL(_), _, strm') => true + | (Tok.STARTMAP(_), _, strm') => true + | (Tok.STARTMENU(_), _, strm') => true + | (Tok.STARTNOSCRIPT(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTOL(_), _, strm') => true + | (Tok.STARTP(_), _, strm') => true + | (Tok.STARTPRE(_), _, strm') => true + | (Tok.STARTQ(_), _, strm') => true + | (Tok.STARTS(_), _, strm') => true + | (Tok.STARTSAMP(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSELECT(_), _, strm') => true + | (Tok.STARTSMALL(_), _, strm') => true + | (Tok.STARTSPAN(_), _, strm') => true + | (Tok.STARTSTRIKE(_), _, strm') => true + | (Tok.STARTSTRONG(_), _, strm') => true + | (Tok.STARTSUB(_), _, strm') => true + | (Tok.STARTSUP(_), _, strm') => true + | (Tok.STARTTABLE(_), _, strm') => true + | (Tok.STARTTEXTAREA(_), _, strm') => true + | (Tok.STARTTT(_), _, strm') => true + | (Tok.STARTU(_), _, strm') => true + | (Tok.STARTUL(_), _, strm') => true + | (Tok.STARTVAR(_), _, strm') => true + | _ => false + (* end case *)) + val (flow_RES, flow_SPAN, strm') = EBNF.closure(ins_PROD_1_SUBRULE_1_PRED, ins_PROD_1_SUBRULE_1_NT, strm') + val (ENDINS_RES, ENDINS_SPAN, strm') = matchENDINS(strm') + val FULL_SPAN = (#1(STARTINS_SPAN), #2(ENDINS_SPAN)) + in + (UserCode.ins_PROD_1_ACT (STARTINS_RES, flow_RES, ENDINS_RES, STARTINS_SPAN : (Lex.pos * Lex.pos), flow_SPAN : (Lex.pos * Lex.pos), ENDINS_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +fun body_rest_NT (strm) = let + fun body_rest_PROD_1_SUBRULE_1_NT (strm) = let + fun body_rest_PROD_1_SUBRULE_1_PROD_1 (strm) = let + val (block_RES, block_SPAN, strm') = block_NT(strm) + val FULL_SPAN = (#1(block_SPAN), #2(block_SPAN)) + in + ((block_RES), FULL_SPAN, strm') + end + fun body_rest_PROD_1_SUBRULE_1_PROD_2 (strm) = let + val (script_RES, script_SPAN, strm') = script_NT(strm) + val FULL_SPAN = (#1(script_SPAN), #2(script_SPAN)) + in + ((script_RES), FULL_SPAN, strm') + end + fun body_rest_PROD_1_SUBRULE_1_PROD_3 (strm) = let + val (ins_RES, ins_SPAN, strm') = ins_NT(strm) + val FULL_SPAN = (#1(ins_SPAN), #2(ins_SPAN)) + in + ((ins_RES), FULL_SPAN, strm') + end + fun body_rest_PROD_1_SUBRULE_1_PROD_4 (strm) = let + val (del_RES, del_SPAN, strm') = del_NT(strm) + val FULL_SPAN = (#1(del_SPAN), #2(del_SPAN)) + in + ((del_RES), FULL_SPAN, strm') + end + fun body_rest_PROD_1_SUBRULE_1_PROD_5 (strm) = let + val (cdata_RES, cdata_SPAN, strm') = cdata_NT(strm) + val FULL_SPAN = (#1(cdata_SPAN), #2(cdata_SPAN)) + in + ((cdata_RES), FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => + body_rest_PROD_1_SUBRULE_1_PROD_5(strm) + | (Tok.PCDATA(_), _, strm') => + body_rest_PROD_1_SUBRULE_1_PROD_5(strm) + | (Tok.CHAR_REF(_), _, strm') => + body_rest_PROD_1_SUBRULE_1_PROD_5(strm) + | (Tok.ENTITY_REF(_), _, strm') => + body_rest_PROD_1_SUBRULE_1_PROD_5(strm) + | (Tok.STARTINS(_), _, strm') => + body_rest_PROD_1_SUBRULE_1_PROD_3(strm) + | (Tok.STARTADDRESS(_), _, strm') => + body_rest_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTBLOCKQUOTE(_), _, strm') => + body_rest_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTCENTER(_), _, strm') => + body_rest_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTDIR(_), _, strm') => + body_rest_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTDIV(_), _, strm') => + body_rest_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTDL(_), _, strm') => + body_rest_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTFIELDSET(_), _, strm') => + body_rest_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTFORM(_), _, strm') => + body_rest_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTH1(_), _, strm') => + body_rest_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTH2(_), _, strm') => + body_rest_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTH3(_), _, strm') => + body_rest_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTH4(_), _, strm') => + body_rest_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTH5(_), _, strm') => + body_rest_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTH6(_), _, strm') => + body_rest_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTHR(_), _, strm') => + body_rest_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTISINDEX(_), _, strm') => + body_rest_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTMENU(_), _, strm') => + body_rest_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTNOSCRIPT(_), _, strm') => + body_rest_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTOL(_), _, strm') => + body_rest_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTP(_), _, strm') => + body_rest_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTPRE(_), _, strm') => + body_rest_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTTABLE(_), _, strm') => + body_rest_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTUL(_), _, strm') => + body_rest_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTSCRIPT(_), _, strm') => + body_rest_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STARTDEL(_), _, strm') => + body_rest_PROD_1_SUBRULE_1_PROD_4(strm) + | _ => fail() + (* end case *)) + end + fun body_rest_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTADDRESS(_), _, strm') => true + | (Tok.STARTBLOCKQUOTE(_), _, strm') => true + | (Tok.STARTCENTER(_), _, strm') => true + | (Tok.STARTDEL(_), _, strm') => true + | (Tok.STARTDIR(_), _, strm') => true + | (Tok.STARTDIV(_), _, strm') => true + | (Tok.STARTDL(_), _, strm') => true + | (Tok.STARTFIELDSET(_), _, strm') => true + | (Tok.STARTFORM(_), _, strm') => true + | (Tok.STARTH1(_), _, strm') => true + | (Tok.STARTH2(_), _, strm') => true + | (Tok.STARTH3(_), _, strm') => true + | (Tok.STARTH4(_), _, strm') => true + | (Tok.STARTH5(_), _, strm') => true + | (Tok.STARTH6(_), _, strm') => true + | (Tok.STARTHR(_), _, strm') => true + | (Tok.STARTINS(_), _, strm') => true + | (Tok.STARTISINDEX(_), _, strm') => true + | (Tok.STARTMENU(_), _, strm') => true + | (Tok.STARTNOSCRIPT(_), _, strm') => true + | (Tok.STARTOL(_), _, strm') => true + | (Tok.STARTP(_), _, strm') => true + | (Tok.STARTPRE(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTTABLE(_), _, strm') => true + | (Tok.STARTUL(_), _, strm') => true + | _ => false + (* end case *)) + val (SR1_RES, SR1_SPAN, strm') = EBNF.closure(body_rest_PROD_1_SUBRULE_1_PRED, body_rest_PROD_1_SUBRULE_1_NT, strm) + fun body_rest_PROD_1_SUBRULE_2_NT (strm) = let + val (ENDBODY_RES, ENDBODY_SPAN, strm') = matchENDBODY(strm) + val (cdata_opt_RES, cdata_opt_SPAN, strm') = cdata_opt_NT(strm') + val FULL_SPAN = (#1(ENDBODY_SPAN), #2(cdata_opt_SPAN)) + in + (UserCode.body_rest_PROD_1_SUBRULE_2_PROD_1_ACT (ENDBODY_RES, SR1_RES, cdata_opt_RES, ENDBODY_SPAN : (Lex.pos * Lex.pos), SR1_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun body_rest_PROD_1_SUBRULE_2_PRED (strm) = (case (lex(strm)) + of (Tok.ENDBODY, _, strm') => true + | _ => false + (* end case *)) + val (SR2_RES, SR2_SPAN, strm') = EBNF.optional(body_rest_PROD_1_SUBRULE_2_PRED, body_rest_PROD_1_SUBRULE_2_NT, strm') + val FULL_SPAN = (#1(SR1_SPAN), #2(SR2_SPAN)) + in + (UserCode.body_rest_PROD_1_ACT (SR1_RES, SR2_RES, SR1_SPAN : (Lex.pos * Lex.pos), SR2_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +fun body_NT (strm) = let + fun body_PROD_1 (strm) = let + val (STARTBODY_RES, STARTBODY_SPAN, strm') = matchSTARTBODY(strm) + val (body_rest_RES, body_rest_SPAN, strm') = body_rest_NT(strm') + val FULL_SPAN = (#1(STARTBODY_SPAN), #2(body_rest_SPAN)) + in + (UserCode.body_PROD_1_ACT (STARTBODY_RES, body_rest_RES, STARTBODY_SPAN : (Lex.pos * Lex.pos), body_rest_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun body_PROD_2 (strm) = let + val (SR_RES, SR_SPAN, strm') = let + fun body_PROD_2_SUBRULE_1_NT (strm) = let + fun body_PROD_2_SUBRULE_1_PROD_1 (strm) = let + val (block_RES, block_SPAN, strm') = block_NT(strm) + val FULL_SPAN = (#1(block_SPAN), #2(block_SPAN)) + in + ((block_RES), FULL_SPAN, strm') + end + fun body_PROD_2_SUBRULE_1_PROD_2 (strm) = let + val (ins_RES, ins_SPAN, strm') = ins_NT(strm) + val FULL_SPAN = (#1(ins_SPAN), #2(ins_SPAN)) + in + ((ins_RES), FULL_SPAN, strm') + end + fun body_PROD_2_SUBRULE_1_PROD_3 (strm) = let + val (del_RES, del_SPAN, strm') = del_NT(strm) + val FULL_SPAN = (#1(del_SPAN), #2(del_SPAN)) + in + ((del_RES), FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.STARTDEL(_), _, strm') => + body_PROD_2_SUBRULE_1_PROD_3(strm) + | (Tok.STARTADDRESS(_), _, strm') => + body_PROD_2_SUBRULE_1_PROD_1(strm) + | (Tok.STARTBLOCKQUOTE(_), _, strm') => + body_PROD_2_SUBRULE_1_PROD_1(strm) + | (Tok.STARTCENTER(_), _, strm') => + body_PROD_2_SUBRULE_1_PROD_1(strm) + | (Tok.STARTDIR(_), _, strm') => + body_PROD_2_SUBRULE_1_PROD_1(strm) + | (Tok.STARTDIV(_), _, strm') => + body_PROD_2_SUBRULE_1_PROD_1(strm) + | (Tok.STARTDL(_), _, strm') => + body_PROD_2_SUBRULE_1_PROD_1(strm) + | (Tok.STARTFIELDSET(_), _, strm') => + body_PROD_2_SUBRULE_1_PROD_1(strm) + | (Tok.STARTFORM(_), _, strm') => + body_PROD_2_SUBRULE_1_PROD_1(strm) + | (Tok.STARTH1(_), _, strm') => + body_PROD_2_SUBRULE_1_PROD_1(strm) + | (Tok.STARTH2(_), _, strm') => + body_PROD_2_SUBRULE_1_PROD_1(strm) + | (Tok.STARTH3(_), _, strm') => + body_PROD_2_SUBRULE_1_PROD_1(strm) + | (Tok.STARTH4(_), _, strm') => + body_PROD_2_SUBRULE_1_PROD_1(strm) + | (Tok.STARTH5(_), _, strm') => + body_PROD_2_SUBRULE_1_PROD_1(strm) + | (Tok.STARTH6(_), _, strm') => + body_PROD_2_SUBRULE_1_PROD_1(strm) + | (Tok.STARTHR(_), _, strm') => + body_PROD_2_SUBRULE_1_PROD_1(strm) + | (Tok.STARTISINDEX(_), _, strm') => + body_PROD_2_SUBRULE_1_PROD_1(strm) + | (Tok.STARTMENU(_), _, strm') => + body_PROD_2_SUBRULE_1_PROD_1(strm) + | (Tok.STARTNOSCRIPT(_), _, strm') => + body_PROD_2_SUBRULE_1_PROD_1(strm) + | (Tok.STARTOL(_), _, strm') => + body_PROD_2_SUBRULE_1_PROD_1(strm) + | (Tok.STARTP(_), _, strm') => + body_PROD_2_SUBRULE_1_PROD_1(strm) + | (Tok.STARTPRE(_), _, strm') => + body_PROD_2_SUBRULE_1_PROD_1(strm) + | (Tok.STARTTABLE(_), _, strm') => + body_PROD_2_SUBRULE_1_PROD_1(strm) + | (Tok.STARTUL(_), _, strm') => + body_PROD_2_SUBRULE_1_PROD_1(strm) + | (Tok.STARTINS(_), _, strm') => + body_PROD_2_SUBRULE_1_PROD_2(strm) + | _ => fail() + (* end case *)) + end + in + body_PROD_2_SUBRULE_1_NT(strm) + end + val (body_rest_RES, body_rest_SPAN, strm') = body_rest_NT(strm') + val FULL_SPAN = (#1(SR_SPAN), #2(body_rest_SPAN)) + in + (UserCode.body_PROD_2_ACT (SR_RES, body_rest_RES, SR_SPAN : (Lex.pos * Lex.pos), body_rest_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.STARTADDRESS(_), _, strm') => body_PROD_2(strm) + | (Tok.STARTBLOCKQUOTE(_), _, strm') => body_PROD_2(strm) + | (Tok.STARTCENTER(_), _, strm') => body_PROD_2(strm) + | (Tok.STARTDEL(_), _, strm') => body_PROD_2(strm) + | (Tok.STARTDIR(_), _, strm') => body_PROD_2(strm) + | (Tok.STARTDIV(_), _, strm') => body_PROD_2(strm) + | (Tok.STARTDL(_), _, strm') => body_PROD_2(strm) + | (Tok.STARTFIELDSET(_), _, strm') => body_PROD_2(strm) + | (Tok.STARTFORM(_), _, strm') => body_PROD_2(strm) + | (Tok.STARTH1(_), _, strm') => body_PROD_2(strm) + | (Tok.STARTH2(_), _, strm') => body_PROD_2(strm) + | (Tok.STARTH3(_), _, strm') => body_PROD_2(strm) + | (Tok.STARTH4(_), _, strm') => body_PROD_2(strm) + | (Tok.STARTH5(_), _, strm') => body_PROD_2(strm) + | (Tok.STARTH6(_), _, strm') => body_PROD_2(strm) + | (Tok.STARTHR(_), _, strm') => body_PROD_2(strm) + | (Tok.STARTINS(_), _, strm') => body_PROD_2(strm) + | (Tok.STARTISINDEX(_), _, strm') => body_PROD_2(strm) + | (Tok.STARTMENU(_), _, strm') => body_PROD_2(strm) + | (Tok.STARTNOSCRIPT(_), _, strm') => body_PROD_2(strm) + | (Tok.STARTOL(_), _, strm') => body_PROD_2(strm) + | (Tok.STARTP(_), _, strm') => body_PROD_2(strm) + | (Tok.STARTPRE(_), _, strm') => body_PROD_2(strm) + | (Tok.STARTTABLE(_), _, strm') => body_PROD_2(strm) + | (Tok.STARTUL(_), _, strm') => body_PROD_2(strm) + | (Tok.STARTBODY(_), _, strm') => body_PROD_1(strm) + | _ => fail() + (* end case *)) + end +fun noframes_NT (strm) = let + val (STARTNOFRAMES_RES, STARTNOFRAMES_SPAN, strm') = matchSTARTNOFRAMES(strm) + val (body_RES, body_SPAN, strm') = body_NT(strm') + val (ENDNOFRAMES_RES, ENDNOFRAMES_SPAN, strm') = matchENDNOFRAMES(strm') + val FULL_SPAN = (#1(STARTNOFRAMES_SPAN), #2(ENDNOFRAMES_SPAN)) + in + (UserCode.noframes_PROD_1_ACT (STARTNOFRAMES_RES, ENDNOFRAMES_RES, body_RES, STARTNOFRAMES_SPAN : (Lex.pos * Lex.pos), ENDNOFRAMES_SPAN : (Lex.pos * Lex.pos), body_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +fun frame_NT (strm) = let + val (STARTFRAME_RES, STARTFRAME_SPAN, strm') = matchSTARTFRAME(strm) + val FULL_SPAN = (#1(STARTFRAME_SPAN), #2(STARTFRAME_SPAN)) + in + (UserCode.frame_PROD_1_ACT (STARTFRAME_RES, STARTFRAME_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +fun frameset_NT (strm) = let + val (STARTFRAMESET_RES, STARTFRAMESET_SPAN, strm') = matchSTARTFRAMESET(strm) + fun frameset_PROD_1_SUBRULE_1_NT (strm) = let + fun frameset_PROD_1_SUBRULE_1_PROD_1 (strm) = let + val (frameset_RES, frameset_SPAN, strm') = frameset_NT(strm) + val FULL_SPAN = (#1(frameset_SPAN), #2(frameset_SPAN)) + in + ((frameset_RES), FULL_SPAN, strm') + end + fun frameset_PROD_1_SUBRULE_1_PROD_2 (strm) = let + val (frame_RES, frame_SPAN, strm') = frame_NT(strm) + val FULL_SPAN = (#1(frame_SPAN), #2(frame_SPAN)) + in + ((frame_RES), FULL_SPAN, strm') + end + fun frameset_PROD_1_SUBRULE_1_PROD_3 (strm) = let + val (cdata_RES, cdata_SPAN, strm') = cdata_NT(strm) + val FULL_SPAN = (#1(cdata_SPAN), #2(cdata_SPAN)) + in + ((cdata_RES), FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => + frameset_PROD_1_SUBRULE_1_PROD_3(strm) + | (Tok.PCDATA(_), _, strm') => + frameset_PROD_1_SUBRULE_1_PROD_3(strm) + | (Tok.CHAR_REF(_), _, strm') => + frameset_PROD_1_SUBRULE_1_PROD_3(strm) + | (Tok.ENTITY_REF(_), _, strm') => + frameset_PROD_1_SUBRULE_1_PROD_3(strm) + | (Tok.STARTFRAMESET(_), _, strm') => + frameset_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.STARTFRAME(_), _, strm') => + frameset_PROD_1_SUBRULE_1_PROD_2(strm) + | _ => fail() + (* end case *)) + end + fun frameset_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMENT(_), _, strm') => true + | (Tok.PCDATA(_), _, strm') => true + | (Tok.CHAR_REF(_), _, strm') => true + | (Tok.ENTITY_REF(_), _, strm') => true + | (Tok.STARTFRAME(_), _, strm') => true + | (Tok.STARTFRAMESET(_), _, strm') => true + | _ => false + (* end case *)) + val (SR1_RES, SR1_SPAN, strm') = EBNF.posclos(frameset_PROD_1_SUBRULE_1_PRED, frameset_PROD_1_SUBRULE_1_NT, strm') + fun frameset_PROD_1_SUBRULE_2_NT (strm) = let + val (noframes_RES, noframes_SPAN, strm') = noframes_NT(strm) + val (cdata_opt_RES, cdata_opt_SPAN, strm') = cdata_opt_NT(strm') + val FULL_SPAN = (#1(noframes_SPAN), #2(cdata_opt_SPAN)) + in + (UserCode.frameset_PROD_1_SUBRULE_2_PROD_1_ACT (SR1_RES, cdata_opt_RES, STARTFRAMESET_RES, noframes_RES, SR1_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), STARTFRAMESET_SPAN : (Lex.pos * Lex.pos), noframes_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun frameset_PROD_1_SUBRULE_2_PRED (strm) = (case (lex(strm)) + of (Tok.STARTNOFRAMES(_), _, strm') => true + | _ => false + (* end case *)) + val (SR2_RES, SR2_SPAN, strm') = EBNF.optional(frameset_PROD_1_SUBRULE_2_PRED, frameset_PROD_1_SUBRULE_2_NT, strm') + val (ENDFRAMESET_RES, ENDFRAMESET_SPAN, strm') = matchENDFRAMESET(strm') + val FULL_SPAN = (#1(STARTFRAMESET_SPAN), #2(ENDFRAMESET_SPAN)) + in + (UserCode.frameset_PROD_1_ACT (SR1_RES, SR2_RES, STARTFRAMESET_RES, ENDFRAMESET_RES, SR1_SPAN : (Lex.pos * Lex.pos), SR2_SPAN : (Lex.pos * Lex.pos), STARTFRAMESET_SPAN : (Lex.pos * Lex.pos), ENDFRAMESET_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +fun link_NT (strm) = let + val (STARTLINK_RES, STARTLINK_SPAN, strm') = matchSTARTLINK(strm) + val FULL_SPAN = (#1(STARTLINK_SPAN), #2(STARTLINK_SPAN)) + in + (UserCode.link_PROD_1_ACT (STARTLINK_RES, STARTLINK_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +fun meta_NT (strm) = let + val (STARTMETA_RES, STARTMETA_SPAN, strm') = matchSTARTMETA(strm) + val FULL_SPAN = (#1(STARTMETA_SPAN), #2(STARTMETA_SPAN)) + in + (UserCode.meta_PROD_1_ACT (STARTMETA_RES, STARTMETA_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +fun style_NT (strm) = let + val (STARTSTYLE_RES, STARTSTYLE_SPAN, strm') = matchSTARTSTYLE(strm) + val (cdata_opt_RES, cdata_opt_SPAN, strm') = cdata_opt_NT(strm') + val (ENDSTYLE_RES, ENDSTYLE_SPAN, strm') = matchENDSTYLE(strm') + val FULL_SPAN = (#1(STARTSTYLE_SPAN), #2(ENDSTYLE_SPAN)) + in + (UserCode.style_PROD_1_ACT (cdata_opt_RES, STARTSTYLE_RES, ENDSTYLE_RES, cdata_opt_SPAN : (Lex.pos * Lex.pos), STARTSTYLE_SPAN : (Lex.pos * Lex.pos), ENDSTYLE_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +fun base_NT (strm) = let + val (STARTBASE_RES, STARTBASE_SPAN, strm') = matchSTARTBASE(strm) + val FULL_SPAN = (#1(STARTBASE_SPAN), #2(STARTBASE_SPAN)) + in + (UserCode.base_PROD_1_ACT (STARTBASE_RES, STARTBASE_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +fun title_NT (strm) = let + val (STARTTITLE_RES, STARTTITLE_SPAN, strm') = matchSTARTTITLE(strm) + val (cdata_opt_RES, cdata_opt_SPAN, strm') = cdata_opt_NT(strm') + val (ENDTITLE_RES, ENDTITLE_SPAN, strm') = matchENDTITLE(strm') + val FULL_SPAN = (#1(STARTTITLE_SPAN), #2(ENDTITLE_SPAN)) + in + (UserCode.title_PROD_1_ACT (STARTTITLE_RES, cdata_opt_RES, ENDTITLE_RES, STARTTITLE_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), ENDTITLE_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +fun head_content_NT (strm) = let + fun head_content_PROD_1 (strm) = let + val (title_RES, title_SPAN, strm') = title_NT(strm) + val FULL_SPAN = (#1(title_SPAN), #2(title_SPAN)) + in + ((title_RES), FULL_SPAN, strm') + end + fun head_content_PROD_2 (strm) = let + val (base_RES, base_SPAN, strm') = base_NT(strm) + val FULL_SPAN = (#1(base_SPAN), #2(base_SPAN)) + in + ((base_RES), FULL_SPAN, strm') + end + fun head_content_PROD_3 (strm) = let + val (script_RES, script_SPAN, strm') = script_NT(strm) + val FULL_SPAN = (#1(script_SPAN), #2(script_SPAN)) + in + ((script_RES), FULL_SPAN, strm') + end + fun head_content_PROD_4 (strm) = let + val (style_RES, style_SPAN, strm') = style_NT(strm) + val FULL_SPAN = (#1(style_SPAN), #2(style_SPAN)) + in + ((style_RES), FULL_SPAN, strm') + end + fun head_content_PROD_5 (strm) = let + val (meta_RES, meta_SPAN, strm') = meta_NT(strm) + val FULL_SPAN = (#1(meta_SPAN), #2(meta_SPAN)) + in + ((meta_RES), FULL_SPAN, strm') + end + fun head_content_PROD_6 (strm) = let + val (link_RES, link_SPAN, strm') = link_NT(strm) + val FULL_SPAN = (#1(link_SPAN), #2(link_SPAN)) + in + ((link_RES), FULL_SPAN, strm') + end + fun head_content_PROD_7 (strm) = let + val (object_RES, object_SPAN, strm') = object_NT(strm) + val FULL_SPAN = (#1(object_SPAN), #2(object_SPAN)) + in + ((object_RES), FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.STARTOBJECT(_), _, strm') => head_content_PROD_7(strm) + | (Tok.STARTMETA(_), _, strm') => head_content_PROD_5(strm) + | (Tok.STARTSCRIPT(_), _, strm') => head_content_PROD_3(strm) + | (Tok.STARTTITLE(_), _, strm') => head_content_PROD_1(strm) + | (Tok.STARTBASE(_), _, strm') => head_content_PROD_2(strm) + | (Tok.STARTSTYLE(_), _, strm') => head_content_PROD_4(strm) + | (Tok.STARTLINK(_), _, strm') => head_content_PROD_6(strm) + | _ => fail() + (* end case *)) + end +fun head_NT (strm) = let + fun head_PROD_1_SUBRULE_1_NT (strm) = let + val (STARTHEAD_RES, STARTHEAD_SPAN, strm') = matchSTARTHEAD(strm) + val (cdata_opt_RES, cdata_opt_SPAN, strm') = cdata_opt_NT(strm') + val FULL_SPAN = (#1(STARTHEAD_SPAN), #2(cdata_opt_SPAN)) + in + (UserCode.head_PROD_1_SUBRULE_1_PROD_1_ACT (cdata_opt_RES, STARTHEAD_RES, cdata_opt_SPAN : (Lex.pos * Lex.pos), STARTHEAD_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun head_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.STARTHEAD(_), _, strm') => true + | _ => false + (* end case *)) + val (SR1_RES, SR1_SPAN, strm') = EBNF.optional(head_PROD_1_SUBRULE_1_PRED, head_PROD_1_SUBRULE_1_NT, strm) + fun head_PROD_1_SUBRULE_2_NT (strm) = let + val (head_content_RES, head_content_SPAN, strm') = head_content_NT(strm) + val (cdata_opt_RES, cdata_opt_SPAN, strm') = cdata_opt_NT(strm') + val FULL_SPAN = (#1(head_content_SPAN), #2(cdata_opt_SPAN)) + in + (UserCode.head_PROD_1_SUBRULE_2_PROD_1_ACT (SR1_RES, cdata_opt_RES, head_content_RES, SR1_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), head_content_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun head_PROD_1_SUBRULE_2_PRED (strm) = (case (lex(strm)) + of (Tok.STARTBASE(_), _, strm') => true + | (Tok.STARTLINK(_), _, strm') => true + | (Tok.STARTMETA(_), _, strm') => true + | (Tok.STARTOBJECT(_), _, strm') => true + | (Tok.STARTSCRIPT(_), _, strm') => true + | (Tok.STARTSTYLE(_), _, strm') => true + | (Tok.STARTTITLE(_), _, strm') => true + | _ => false + (* end case *)) + val (SR2_RES, SR2_SPAN, strm') = EBNF.closure(head_PROD_1_SUBRULE_2_PRED, head_PROD_1_SUBRULE_2_NT, strm') + fun head_PROD_1_SUBRULE_3_NT (strm) = let + val (ENDHEAD_RES, ENDHEAD_SPAN, strm') = matchENDHEAD(strm) + val (cdata_opt_RES, cdata_opt_SPAN, strm') = cdata_opt_NT(strm') + val FULL_SPAN = (#1(ENDHEAD_SPAN), #2(cdata_opt_SPAN)) + in + (UserCode.head_PROD_1_SUBRULE_3_PROD_1_ACT (SR1_RES, SR2_RES, cdata_opt_RES, ENDHEAD_RES, SR1_SPAN : (Lex.pos * Lex.pos), SR2_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), ENDHEAD_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun head_PROD_1_SUBRULE_3_PRED (strm) = (case (lex(strm)) + of (Tok.ENDHEAD, _, strm') => true + | _ => false + (* end case *)) + val (SR3_RES, SR3_SPAN, strm') = EBNF.optional(head_PROD_1_SUBRULE_3_PRED, head_PROD_1_SUBRULE_3_NT, strm') + val FULL_SPAN = (#1(SR1_SPAN), #2(SR3_SPAN)) + in + (UserCode.head_PROD_1_ACT (SR1_RES, SR2_RES, SR3_RES, SR1_SPAN : (Lex.pos * Lex.pos), SR2_SPAN : (Lex.pos * Lex.pos), SR3_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +fun document_NT (strm) = let + val (cdata_opt_RES, cdata_opt_SPAN, strm') = cdata_opt_NT(strm) + fun document_PROD_1_SUBRULE_1_NT (strm) = let + val (DOCTYPE_RES, DOCTYPE_SPAN, strm') = matchDOCTYPE(strm) + val (cdata_opt_RES, cdata_opt_SPAN, strm') = cdata_opt_NT(strm') + val FULL_SPAN = (#1(DOCTYPE_SPAN), #2(cdata_opt_SPAN)) + in + (UserCode.document_PROD_1_SUBRULE_1_PROD_1_ACT (DOCTYPE_RES, cdata_opt_RES, DOCTYPE_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun document_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.DOCTYPE(_), _, strm') => true + | _ => false + (* end case *)) + val (SR1_RES, SR1_SPAN, strm') = EBNF.optional(document_PROD_1_SUBRULE_1_PRED, document_PROD_1_SUBRULE_1_NT, strm') + fun document_PROD_1_SUBRULE_2_NT (strm) = let + val (STARTHTML_RES, STARTHTML_SPAN, strm') = matchSTARTHTML(strm) + val (cdata_opt_RES, cdata_opt_SPAN, strm') = cdata_opt_NT(strm') + val FULL_SPAN = (#1(STARTHTML_SPAN), #2(cdata_opt_SPAN)) + in + (UserCode.document_PROD_1_SUBRULE_2_PROD_1_ACT (SR1_RES, cdata_opt_RES, STARTHTML_RES, SR1_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), STARTHTML_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun document_PROD_1_SUBRULE_2_PRED (strm) = (case (lex(strm)) + of (Tok.STARTHTML(_), _, strm') => true + | _ => false + (* end case *)) + val (SR2_RES, SR2_SPAN, strm') = EBNF.optional(document_PROD_1_SUBRULE_2_PRED, document_PROD_1_SUBRULE_2_NT, strm') + val (head_RES, head_SPAN, strm') = head_NT(strm') + val (SR3_RES, SR3_SPAN, strm') = let + fun document_PROD_1_SUBRULE_3_NT (strm) = let + fun document_PROD_1_SUBRULE_3_PROD_1 (strm) = let + val (body_RES, body_SPAN, strm') = body_NT(strm) + val FULL_SPAN = (#1(body_SPAN), #2(body_SPAN)) + in + ((body_RES), FULL_SPAN, strm') + end + fun document_PROD_1_SUBRULE_3_PROD_2 (strm) = let + val (frameset_RES, frameset_SPAN, strm') = frameset_NT(strm) + val FULL_SPAN = (#1(frameset_SPAN), #2(frameset_SPAN)) + in + ((frameset_RES), FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.STARTFRAMESET(_), _, strm') => + document_PROD_1_SUBRULE_3_PROD_2(strm) + | (Tok.STARTADDRESS(_), _, strm') => + document_PROD_1_SUBRULE_3_PROD_1(strm) + | (Tok.STARTBLOCKQUOTE(_), _, strm') => + document_PROD_1_SUBRULE_3_PROD_1(strm) + | (Tok.STARTBODY(_), _, strm') => + document_PROD_1_SUBRULE_3_PROD_1(strm) + | (Tok.STARTCENTER(_), _, strm') => + document_PROD_1_SUBRULE_3_PROD_1(strm) + | (Tok.STARTDEL(_), _, strm') => + document_PROD_1_SUBRULE_3_PROD_1(strm) + | (Tok.STARTDIR(_), _, strm') => + document_PROD_1_SUBRULE_3_PROD_1(strm) + | (Tok.STARTDIV(_), _, strm') => + document_PROD_1_SUBRULE_3_PROD_1(strm) + | (Tok.STARTDL(_), _, strm') => + document_PROD_1_SUBRULE_3_PROD_1(strm) + | (Tok.STARTFIELDSET(_), _, strm') => + document_PROD_1_SUBRULE_3_PROD_1(strm) + | (Tok.STARTFORM(_), _, strm') => + document_PROD_1_SUBRULE_3_PROD_1(strm) + | (Tok.STARTH1(_), _, strm') => + document_PROD_1_SUBRULE_3_PROD_1(strm) + | (Tok.STARTH2(_), _, strm') => + document_PROD_1_SUBRULE_3_PROD_1(strm) + | (Tok.STARTH3(_), _, strm') => + document_PROD_1_SUBRULE_3_PROD_1(strm) + | (Tok.STARTH4(_), _, strm') => + document_PROD_1_SUBRULE_3_PROD_1(strm) + | (Tok.STARTH5(_), _, strm') => + document_PROD_1_SUBRULE_3_PROD_1(strm) + | (Tok.STARTH6(_), _, strm') => + document_PROD_1_SUBRULE_3_PROD_1(strm) + | (Tok.STARTHR(_), _, strm') => + document_PROD_1_SUBRULE_3_PROD_1(strm) + | (Tok.STARTINS(_), _, strm') => + document_PROD_1_SUBRULE_3_PROD_1(strm) + | (Tok.STARTISINDEX(_), _, strm') => + document_PROD_1_SUBRULE_3_PROD_1(strm) + | (Tok.STARTMENU(_), _, strm') => + document_PROD_1_SUBRULE_3_PROD_1(strm) + | (Tok.STARTNOSCRIPT(_), _, strm') => + document_PROD_1_SUBRULE_3_PROD_1(strm) + | (Tok.STARTOL(_), _, strm') => + document_PROD_1_SUBRULE_3_PROD_1(strm) + | (Tok.STARTP(_), _, strm') => + document_PROD_1_SUBRULE_3_PROD_1(strm) + | (Tok.STARTPRE(_), _, strm') => + document_PROD_1_SUBRULE_3_PROD_1(strm) + | (Tok.STARTTABLE(_), _, strm') => + document_PROD_1_SUBRULE_3_PROD_1(strm) + | (Tok.STARTUL(_), _, strm') => + document_PROD_1_SUBRULE_3_PROD_1(strm) + | _ => fail() + (* end case *)) + end + in + document_PROD_1_SUBRULE_3_NT(strm') + end + fun document_PROD_1_SUBRULE_4_NT (strm) = let + val (ENDHTML_RES, ENDHTML_SPAN, strm') = matchENDHTML(strm) + val (cdata_opt_RES, cdata_opt_SPAN, strm') = cdata_opt_NT(strm') + val FULL_SPAN = (#1(ENDHTML_SPAN), #2(cdata_opt_SPAN)) + in + (UserCode.document_PROD_1_SUBRULE_4_PROD_1_ACT (head_RES, SR1_RES, SR2_RES, SR3_RES, cdata_opt_RES, ENDHTML_RES, head_SPAN : (Lex.pos * Lex.pos), SR1_SPAN : (Lex.pos * Lex.pos), SR2_SPAN : (Lex.pos * Lex.pos), SR3_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), ENDHTML_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun document_PROD_1_SUBRULE_4_PRED (strm) = (case (lex(strm)) + of (Tok.ENDHTML, _, strm') => true + | _ => false + (* end case *)) + val (SR4_RES, SR4_SPAN, strm') = EBNF.optional(document_PROD_1_SUBRULE_4_PRED, document_PROD_1_SUBRULE_4_NT, strm') + val FULL_SPAN = (#1(cdata_opt_SPAN), #2(SR4_SPAN)) + in + (UserCode.document_PROD_1_ACT (head_RES, SR1_RES, SR2_RES, SR3_RES, SR4_RES, cdata_opt_RES, head_SPAN : (Lex.pos * Lex.pos), SR1_SPAN : (Lex.pos * Lex.pos), SR2_SPAN : (Lex.pos * Lex.pos), SR3_SPAN : (Lex.pos * Lex.pos), SR4_SPAN : (Lex.pos * Lex.pos), cdata_opt_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +in + (document_NT, block_NT, cdata_opt_NT, inline_NT, body_NT, flow_NT) +end +val document_NT = fn s => unwrap (Err.launch (eh, lexFn, document_NT , true) s) +val block_NT = fn s => unwrap (Err.launch (eh, lexFn, block_NT , false) s) +val cdata_opt_NT = fn s => unwrap (Err.launch (eh, lexFn, cdata_opt_NT , false) s) +val inline_NT = fn s => unwrap (Err.launch (eh, lexFn, inline_NT , false) s) +val body_NT = fn s => unwrap (Err.launch (eh, lexFn, body_NT , false) s) +val flow_NT = fn s => unwrap (Err.launch (eh, lexFn, flow_NT , false) s) + +in (document_NT, block_NT, cdata_opt_NT, inline_NT, body_NT, flow_NT) end + in +fun parse lexFn s = let val (document_NT, block_NT, cdata_opt_NT, inline_NT, body_NT, flow_NT) = mk lexFn in document_NT s end + +fun parseblock lexFn s = let val (document_NT, block_NT, cdata_opt_NT, inline_NT, body_NT, flow_NT) = mk lexFn in block_NT s end + +fun parsecdata_opt lexFn s = let val (document_NT, block_NT, cdata_opt_NT, inline_NT, body_NT, flow_NT) = mk lexFn in cdata_opt_NT s end + +fun parseinline lexFn s = let val (document_NT, block_NT, cdata_opt_NT, inline_NT, body_NT, flow_NT) = mk lexFn in inline_NT s end + +fun parsebody lexFn s = let val (document_NT, block_NT, cdata_opt_NT, inline_NT, body_NT, flow_NT) = mk lexFn in body_NT s end + +fun parseflow lexFn s = let val (document_NT, block_NT, cdata_opt_NT, inline_NT, body_NT, flow_NT) = mk lexFn in flow_NT s end + + end + +end diff --git a/smlnj-lib/HTML4/html4.l b/smlnj-lib/HTML4/html4.l new file mode 100644 index 0000000..75047cd --- /dev/null +++ b/smlnj-lib/HTML4/html4.l @@ -0,0 +1,84 @@ +(* html4.l + * + * COPYRIGHT (c) 2014 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +%name HTML4Lexer; + +%defs ( +open HTML4TokenUtils + +fun eof() = EOF +type lex_result = token + +val buffer = ref ([] : string list) + +fun addStr s = (buffer := s :: !buffer) +fun getStr () = (String.concat(List.rev(!buffer)) before (buffer := [])) + +(* trim an optional ";" from a non-empty substring *) +fun trimSemi ss = if Substring.sub(ss, Substring.size ss - 1) = #";" + then Substring.trimr 1 ss + else ss +val scanInt = #1 o valOf o IntInf.scan StringCvt.DEC Substring.getc +val scanHex = #1 o valOf o IntInf.scan StringCvt.HEX Substring.getc +); + +%states INITIAL, INTAG, COM1, COM2; + +%let alpha=[A-Za-z]; +%let digit=[0-9]; +%let hexdigit=[0-9a-fA-F]; +%let namechar=[-.A-Za-z0-9.]; +%let tag={alpha}{namechar}*; +%let ws=[\ \t]; +%let nonct=[^>]+; + +(* Open tags *) + +"<"{tag} => (addStr yytext; YYBEGIN INTAG; continue()); +">" => (addStr yytext; YYBEGIN INITIAL; mkOpenTag(getStr())); +{nonct} => (addStr yytext; continue()); + +(* Close tags *) + +"" => (mkCloseTag yytext); + +(* Comments *) + +" + + +Index of the HTML 4 Elements + + + + + + + + + +

    Index of Elements 

    + +

    Legend: Optional, Forbidden, Empty, +Deprecated, Loose DTD, Frameset DTD

    + +
    Old name New name +
    + + +
    +Input/output + +The most significant changes to the &CML; interfaces have to do with +input/output operations. +In version 0.9.8, the structure + +
    +Condition variables +
    + +
    +Polling + +The only loss in function from version 0.9.8 is the elimination +of the +
    + diff --git a/cml/doc/ML-Doc/refman.mldoc b/cml/doc/ML-Doc/refman.mldoc new file mode 100644 index 0000000..92e33c8 --- /dev/null +++ b/cml/doc/ML-Doc/refman.mldoc @@ -0,0 +1,17 @@ + + + + + + + + + + +The Concurrent Reference Manual + + + + + + diff --git a/cml/doc/ML-Doc/run-cml.mldoc b/cml/doc/ML-Doc/run-cml.mldoc new file mode 100644 index 0000000..725e2a1 --- /dev/null +++ b/cml/doc/ML-Doc/run-cml.mldoc @@ -0,0 +1,75 @@ + + + + + + + +The RunCML structure + + +The RunCML structure + + CML + Time + + + + +The RunCML structure provides +support for running &CML; programs. + + + + + doit((unit -> unit) * Time.time option) -> OS.Process.status + + isRunningunit -> bool + + shutdownOS.Process.status -> 'a + + exportFn(string * (string * string list -> OS.Process.status) * Time.time option) -> unit + + + + diff --git a/cml/doc/ML-Doc/sync-var.mldoc b/cml/doc/ML-Doc/sync-var.mldoc new file mode 100644 index 0000000..0dcce4e --- /dev/null +++ b/cml/doc/ML-Doc/sync-var.mldoc @@ -0,0 +1,227 @@ + + + + + + + + + +The SyncVar structure + + +The + + + + +The + + + Put + + + This exception is raised when an attempt is made to put a + value into a value that is already full (see + 'aivar + + + This is the type constructor for I-structured variables. + I-structured variables are write-once variables that provide + synchronization on read operations. + They are especially useful for one-shot communications, such + as reply messages in client/server protocols, and can also be used + to implement shared incremental data structures. + + iVarunit -> 'a ivar + + + iVar () + + creates a new empty I-variable. + + iPut('a ivar * 'a) -> unit + + + iPut ( + fills the I-variable + iGet'a ivar -> 'a + + + iGet + returns the contents of the I-variable + iGetEvt'a ivar -> 'a event + + + iGetEvt + returns an event-value that represents the + iGetPoll'a ivar -> 'a option + + + This is a non-blocking version of + sameIVar('a ivar * 'a ivar) -> bool + + + sameIVar ( + returns + 'amvar + + + This is the type constructor for M-structured variables. + Unlike + mVarunit -> 'a mvar + + + mVar () + + creates a new empty M-variable. + + mVarInit'a -> 'a mvar + + + mVarInit + creates a new M-variable initialized to + mPut('a mvar * 'a) -> unit + + + mPut ( + fills the M-variable + mTake'a mvar -> 'a + + + mTake + removes and returns the contents of the M-variable + mTakeEvt'a mvar -> 'a event + + + mTakeEvt + returns an event-value that represents the + mGet'a mvar -> 'a + + + mGet + returns the contents of the M-variable +let val x = + + mGetEvt'a mvar -> 'a event + + + mGetEvt + returns an event-value that represents the + mTakePoll'a mvar -> 'a option + mGetPoll'a mvar -> 'a option + + + These are non-blocking versions of + mSwap('a mvar * 'a) -> 'a + + + mSwap ( + puts the value +let val x = + except that + mSwapEvt('a mvar * 'a) -> 'a event + + + mSwapEvt ( + returns an event-value that represents the + sameMVar('a mvar * 'a mvar) -> bool + + + sameMVar ( + returns + + +I-variables provide a useful mechanism for implementing the +reply communication in request/reply protocols (in cases where +the server does not care if the reply is accepted). +They may also be used to implement incremental data structures +and streams; for example, the Multicast structure +uses I-variables to implement its multicast channels. + +A disciplined use of M-variables can provide an atomic +read-modify-write operation. + diff --git a/cml/doc/configure.ac b/cml/doc/configure.ac new file mode 100644 index 0000000..0ea1602 --- /dev/null +++ b/cml/doc/configure.ac @@ -0,0 +1,77 @@ +dnl Process this file with autoconf (2.52+) to produce a configure script. +dnl +dnl COPYRIGHT (c) 2003 John Reppy (http://www.cs.uchicago.edu/~jhr) +dnl All rights reserved. +dnl + +AC_INIT(CMLManual,1.1) + +AC_PREREQ(2.52) +AC_COPYRIGHT([COPYRIGHT (c) 2002 John Reppy] (http://www.cs.uchicago.edu/~jhr)) +AC_CONFIG_SRCDIR(ML-Doc/cml.mldoc) +AC_CONFIG_AUX_DIR(config) + +BASIS_DIR=`pwd` +AC_SUBST(BASIS_DIR) + +dnl +dnl find location of ML-Doc tools +dnl +AC_ARG_WITH(mldoc, + [ --with-mldoc=PATH specify location of ML-Doc tools], + MLDOC_DIR=$with_mldoc) +if test x$MLDOC_DIR = xyes -o x$MLDOC_DIR = xno ; then + AC_MSG_ERROR([--with-mldoc option must specify directory argument]) +elif test x$MLDOC_DIR != x ; then +dnl +dnl verify that $MLDOC_DIR is an absolute path +dnl + case $MLDOC_DIR in + /*) ;; + *) AC_MSG_ERROR([--with-mldoc argument must be absolute]) ;; + esac +fi +AC_SUBST(MLDOC_DIR) + +dnl +dnl Get location of SML Basis Library Master.info +dnl file. +dnl +AC_ARG_WITH(basis-info, + [ --with-basis-info=PATH specify location of SML Basis master info file], + BASIS_INFO_PATH=$with_basis_info) +if test x$BASIS_INFO_PATH = xyes -o x$BASIS_INFO_PATH = xno ; then + AC_MSG_ERROR([--with-basis-info option must specify directory argument]) +elif test x$BASIS_INFO_PATH != x ; then +dnl +dnl verify the existance of the file +dnl + case $BASIS_INFO_PATH in + /*) ;; + *) AC_MSG_ERROR([--with-basis-info argument must be absolute]) ;; + esac + if test ! -r $BASIS_INFO_PATH ; then + AC_MSG_ERROR([info file "$BASIS_INFO_PATH" not found]) + fi +else + AC_MSG_ERROR([must specify location of SML Basis Library info file]) +fi +AC_SUBST(BASIS_INFO_PATH) + +dnl +dnl Generate makefiles +dnl +$MLDOC_DIR/bin/mk-mldoc-makefile < input + +AC_CONFIG_FILES( + CATALOG:config/CATALOG.in \ + Config.cfg:config/Config_cfg.in \ + Hardcopy/run-latex:config/run-latex.in \ +) + +AC_CONFIG_COMMANDS_PRE([if test ! -d Info ; then mkdir Info; fi]) +AC_CONFIG_COMMANDS_PRE([if test ! -d HTML ; then mkdir HTML; fi]) +AC_CONFIG_COMMANDS_PRE([if test ! -d Hardcopy ; then mkdir Hardcopy; fi]) +AC_CONFIG_COMMANDS_PRE([if test ! -d Proof ; then mkdir Proof; fi]) + +AC_OUTPUT diff --git a/cml/doc/index.template b/cml/doc/index.template new file mode 100644 index 0000000..e369998 --- /dev/null +++ b/cml/doc/index.template @@ -0,0 +1,26 @@ + + + +&title; + + +
    +

    The Concurrent ML Reference Manual

    +
    +&body; +
    +
    +[ Top + | Parent + | Contents + | Index + | Root + ] +
    +

    +Generated &today.date;
    +Comments to John Reppy.
    +Copyright © 1991-2003 John Reppy
    +


    + + diff --git a/cml/doc/input b/cml/doc/input new file mode 100644 index 0000000..70ed05c --- /dev/null +++ b/cml/doc/input @@ -0,0 +1,17 @@ +ML-Doc/refman.mldoc +ML-Doc/basics.mldoc +ML-Doc/porting.mldoc +ML-Doc/core-cml.mldoc +ML-Doc/cml.mldoc +ML-Doc/mailbox.mldoc +ML-Doc/sync-var.mldoc +ML-Doc/os.mldoc +ML-Doc/os-process.mldoc +ML-Doc/os-io.mldoc +#ML-Doc/lib/cml-lib.mldoc +#ML-Doc/lib/multicast.mldoc +#ML-Doc/lib/trace-cml.mldoc +ML-Doc/cml-lib.mldoc +ML-Doc/multicast.mldoc +ML-Doc/trace-cml.mldoc + diff --git a/cml/doc/manual/README b/cml/doc/manual/README new file mode 100644 index 0000000..4b046e4 --- /dev/null +++ b/cml/doc/manual/README @@ -0,0 +1 @@ +This directory contains the LaTeX source for the CML reference manual. diff --git a/cml/doc/manual/manual.tex b/cml/doc/manual/manual.tex new file mode 100644 index 0000000..36f5969 --- /dev/null +++ b/cml/doc/manual/manual.tex @@ -0,0 +1,5 @@ +\documentclass[twosided]{report} + +\begin{document} + +\end{document} diff --git a/cml/doc/page.template b/cml/doc/page.template new file mode 100644 index 0000000..afd1d8f --- /dev/null +++ b/cml/doc/page.template @@ -0,0 +1,26 @@ + + + +&title; + + +
    +

    The Concurrent ML Reference Manual

    +
    +&body; +
    +
    +[ Top + | Parent + | Contents + | Index + | Root + ] +
    +

    +Last Modified &date;
    +Comments to John Reppy.
    +Copyright © 1991-2003 John Reppy
    +


    + + diff --git a/cml/doc/toc.template b/cml/doc/toc.template new file mode 100644 index 0000000..3aa26e0 --- /dev/null +++ b/cml/doc/toc.template @@ -0,0 +1,26 @@ + + + +&title; + + +
    +

    The Concurrent ML Reference Manual

    +
    +&body; +
    +
    +[ Top + | Parent + | Contents + | Index + | Root + ] +
    +

    +Generated &today.date;
    +Comments to John Reppy.
    +Copyright © 1991-2003 John Reppy
    +


    + + diff --git a/cml/src/IO/.cm/GUID/bin-prim-io.sml b/cml/src/IO/.cm/GUID/bin-prim-io.sml new file mode 100644 index 0000000..8e2ef14 --- /dev/null +++ b/cml/src/IO/.cm/GUID/bin-prim-io.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):glue/(sources.cm):../IO/(sources.cm):bin-prim-io.sml-1714016095.869 diff --git a/cml/src/IO/.cm/GUID/chan-io-fn.sml b/cml/src/IO/.cm/GUID/chan-io-fn.sml new file mode 100644 index 0000000..b9fcfc3 --- /dev/null +++ b/cml/src/IO/.cm/GUID/chan-io-fn.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):glue/(sources.cm):../IO/(sources.cm):chan-io-fn.sml-1714016096.255 diff --git a/cml/src/IO/.cm/GUID/clean-io.sml b/cml/src/IO/.cm/GUID/clean-io.sml new file mode 100644 index 0000000..8446e94 --- /dev/null +++ b/cml/src/IO/.cm/GUID/clean-io.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):glue/(sources.cm):../IO/(sources.cm):clean-io.sml-1714016095.975 diff --git a/cml/src/IO/.cm/GUID/cml-bin-io-sig.sml b/cml/src/IO/.cm/GUID/cml-bin-io-sig.sml new file mode 100644 index 0000000..ecc802c --- /dev/null +++ b/cml/src/IO/.cm/GUID/cml-bin-io-sig.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):glue/(sources.cm):../IO/(sources.cm):cml-bin-io-sig.sml-1714016096.002 diff --git a/cml/src/IO/.cm/GUID/cml-stream-io-sig.sml b/cml/src/IO/.cm/GUID/cml-stream-io-sig.sml new file mode 100644 index 0000000..f729af4 --- /dev/null +++ b/cml/src/IO/.cm/GUID/cml-stream-io-sig.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):glue/(sources.cm):../IO/(sources.cm):cml-stream-io-sig.sml-1714016095.992 diff --git a/cml/src/IO/.cm/GUID/cml-text-stream-io-sig.sml b/cml/src/IO/.cm/GUID/cml-text-stream-io-sig.sml new file mode 100644 index 0000000..f1de8d1 --- /dev/null +++ b/cml/src/IO/.cm/GUID/cml-text-stream-io-sig.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):glue/(sources.cm):../IO/(sources.cm):cml-text-stream-io-sig.sml-1714016096.321 diff --git a/cml/src/IO/.cm/GUID/new-bin-io-fn.sml b/cml/src/IO/.cm/GUID/new-bin-io-fn.sml new file mode 100644 index 0000000..8221403 --- /dev/null +++ b/cml/src/IO/.cm/GUID/new-bin-io-fn.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):glue/(sources.cm):../IO/(sources.cm):new-bin-io-fn.sml-1714016096.010 diff --git a/cml/src/IO/.cm/GUID/new-cml-imperative-io-sig.sml b/cml/src/IO/.cm/GUID/new-cml-imperative-io-sig.sml new file mode 100644 index 0000000..57f4f44 --- /dev/null +++ b/cml/src/IO/.cm/GUID/new-cml-imperative-io-sig.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):glue/(sources.cm):../IO/(sources.cm):new-cml-imperative-io-sig.sml-1714016095.997 diff --git a/cml/src/IO/.cm/GUID/new-cml-text-io-sig.sml b/cml/src/IO/.cm/GUID/new-cml-text-io-sig.sml new file mode 100644 index 0000000..7fe4ef1 --- /dev/null +++ b/cml/src/IO/.cm/GUID/new-cml-text-io-sig.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):glue/(sources.cm):../IO/(sources.cm):new-cml-text-io-sig.sml-1714016096.327 diff --git a/cml/src/IO/.cm/GUID/new-text-io-fn.sml b/cml/src/IO/.cm/GUID/new-text-io-fn.sml new file mode 100644 index 0000000..e5eeeae --- /dev/null +++ b/cml/src/IO/.cm/GUID/new-text-io-fn.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):glue/(sources.cm):../IO/(sources.cm):new-text-io-fn.sml-1714016096.336 diff --git a/cml/src/IO/.cm/GUID/os-prim-io-sig.sml b/cml/src/IO/.cm/GUID/os-prim-io-sig.sml new file mode 100644 index 0000000..1bea377 --- /dev/null +++ b/cml/src/IO/.cm/GUID/os-prim-io-sig.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):glue/(sources.cm):../IO/(sources.cm):os-prim-io-sig.sml-1714016095.884 diff --git a/cml/src/IO/.cm/GUID/prim-io-fn.sml b/cml/src/IO/.cm/GUID/prim-io-fn.sml new file mode 100644 index 0000000..ad5bac1 --- /dev/null +++ b/cml/src/IO/.cm/GUID/prim-io-fn.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):glue/(sources.cm):../IO/(sources.cm):prim-io-fn.sml-1714016095.850 diff --git a/cml/src/IO/.cm/GUID/prim-io-sig.sml b/cml/src/IO/.cm/GUID/prim-io-sig.sml new file mode 100644 index 0000000..f1d6fba --- /dev/null +++ b/cml/src/IO/.cm/GUID/prim-io-sig.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):glue/(sources.cm):../IO/(sources.cm):prim-io-sig.sml-1714016095.844 diff --git a/cml/src/IO/.cm/GUID/text-prim-io.sml b/cml/src/IO/.cm/GUID/text-prim-io.sml new file mode 100644 index 0000000..2907f55 --- /dev/null +++ b/cml/src/IO/.cm/GUID/text-prim-io.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):glue/(sources.cm):../IO/(sources.cm):text-prim-io.sml-1714016096.204 diff --git a/cml/src/IO/.cm/SKEL/bin-prim-io.sml b/cml/src/IO/.cm/SKEL/bin-prim-io.sml new file mode 100644 index 0000000..8cd4f00 --- /dev/null +++ b/cml/src/IO/.cm/SKEL/bin-prim-io.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f2d"Word8"d"Position"ad"BinPrimIO"jh4ad"Vector"gp1d"Word8Vector"ad"Array"gp1d"Word8Array"ad"VectorSlice"gp1d"Word8VectorSlice"ad"ArraySlice"gp1d"Word8ArraySlice"gp1e"PrimIO" \ No newline at end of file diff --git a/cml/src/IO/.cm/SKEL/chan-io-fn.sml b/cml/src/IO/.cm/SKEL/chan-io-fn.sml new file mode 100644 index 0000000..1c34dc2 --- /dev/null +++ b/cml/src/IO/.cm/SKEL/chan-io-fn.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ae"ChanIOFn"i6PrimIO"PRIM_IO"CaV"gp1c"MONO_VECTOR"aVS"gp1c"MONO_VECTOR_SLICE"aA"gp1c"MONO_ARRAY"aAS"gp1c"MONO_ARRAY_SLICE"f8/d"IO"=Cd"CML"'d"General"d"Mailbox"NNjh2ad"SV"gp1d"SyncVar"agp1h1 \ No newline at end of file diff --git a/cml/src/IO/.cm/SKEL/clean-io.sml b/cml/src/IO/.cm/SKEL/clean-io.sml new file mode 100644 index 0000000..190e84b --- /dev/null +++ b/cml/src/IO/.cm/SKEL/clean-io.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2fCleanUp"ad"CleanIO"jh2ad"SV"gp1d"SyncVar"ad"C"gph0 \ No newline at end of file diff --git a/cml/src/IO/.cm/SKEL/cml-bin-io-sig.sml b/cml/src/IO/.cm/SKEL/cml-bin-io-sig.sml new file mode 100644 index 0000000..a158721 --- /dev/null +++ b/cml/src/IO/.cm/SKEL/cml-bin-io-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f3d"Word8"d"BinPrimIO"d"Word8Vector"ac"CML_BIN_IO"h1egp1c"CML_IMPERATIVE_IO" \ No newline at end of file diff --git a/cml/src/IO/.cm/SKEL/cml-stream-io-sig.sml b/cml/src/IO/.cm/SKEL/cml-stream-io-sig.sml new file mode 100644 index 0000000..79fb5f6 --- /dev/null +++ b/cml/src/IO/.cm/SKEL/cml-stream-io-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ac"CML_STREAM_IO"h2egp1c"STREAM_IO"f1d"CML" \ No newline at end of file diff --git a/cml/src/IO/.cm/SKEL/cml-text-stream-io-sig.sml b/cml/src/IO/.cm/SKEL/cml-text-stream-io-sig.sml new file mode 100644 index 0000000..e04ad67 --- /dev/null +++ b/cml/src/IO/.cm/SKEL/cml-text-stream-io-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ac"CML_TEXT_STREAM_IO"h2egp1c"TEXT_STREAM_IO"f1d"CML" \ No newline at end of file diff --git a/cml/src/IO/.cm/SKEL/new-bin-io-fn.sml b/cml/src/IO/.cm/SKEL/new-bin-io-fn.sml new file mode 100644 index 0000000..5b3e0c9 --- /dev/null +++ b/cml/src/IO/.cm/SKEL/new-bin-io-fn.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"BinPrimIO"ae"BinIOFn"i2aOSPrimIO"gp1c"OS_PRIM_IO"f7d"IO"d"CML"Cd"Word8"Position"d"Int"d"CleanIO"Njh8ad"PIO"gp2d"PrimIO"ad"A"gp1d"Word8Array"ad"AS"gp1d"Word8ArraySlice"Cad"V"gp1d"Word8Vector"ad"VS"gp1d"Word8VectorSlice"ad"Pos"gp1ad"SV"gp1d"SyncVar"ad"StreamIO"h0Ngp1c"CML_BIN_IO" \ No newline at end of file diff --git a/cml/src/IO/.cm/SKEL/new-cml-imperative-io-sig.sml b/cml/src/IO/.cm/SKEL/new-cml-imperative-io-sig.sml new file mode 100644 index 0000000..6873b98 --- /dev/null +++ b/cml/src/IO/.cm/SKEL/new-cml-imperative-io-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"CML"ac"CML_IMPERATIVE_IO"h1ad"StreamIO"gp1c"CML_STREAM_IO" \ No newline at end of file diff --git a/cml/src/IO/.cm/SKEL/new-cml-text-io-sig.sml b/cml/src/IO/.cm/SKEL/new-cml-text-io-sig.sml new file mode 100644 index 0000000..05368d3 --- /dev/null +++ b/cml/src/IO/.cm/SKEL/new-cml-text-io-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f3d"CML"d"StringCvt"d"TextPrimIO"ac"CML_TEXT_IO"h1ad"StreamIO"gp1c"CML_TEXT_STREAM_IO" \ No newline at end of file diff --git a/cml/src/IO/.cm/SKEL/new-text-io-fn.sml b/cml/src/IO/.cm/SKEL/new-text-io-fn.sml new file mode 100644 index 0000000..bbdb3ae Binary files /dev/null and b/cml/src/IO/.cm/SKEL/new-text-io-fn.sml differ diff --git a/cml/src/IO/.cm/SKEL/os-prim-io-sig.sml b/cml/src/IO/.cm/SKEL/os-prim-io-sig.sml new file mode 100644 index 0000000..0e6b1eb --- /dev/null +++ b/cml/src/IO/.cm/SKEL/os-prim-io-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ac"OS_PRIM_IO"h1ad"PrimIO"gp1c"PRIM_IO" \ No newline at end of file diff --git a/cml/src/IO/.cm/SKEL/prim-io-fn.sml b/cml/src/IO/.cm/SKEL/prim-io-fn.sml new file mode 100644 index 0000000..83435fd --- /dev/null +++ b/cml/src/IO/.cm/SKEL/prim-io-fn.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ae"PrimIO"i5CaVector"gp1c"MONO_VECTOR"aArray"gp1c"MONO_ARRAY"aVectorSlice"gp1c"MONO_VECTOR_SLICE"aArraySlice"gp1c"MONO_ARRAY_SLICE"f5Cd"OS"d"CML"(d"Position"NNjh2ad"A"gp1)ad"V"gp1gp1c"PRIM_IO" \ No newline at end of file diff --git a/cml/src/IO/.cm/SKEL/prim-io-sig.sml b/cml/src/IO/.cm/SKEL/prim-io-sig.sml new file mode 100644 index 0000000..c6743cf --- /dev/null +++ b/cml/src/IO/.cm/SKEL/prim-io-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f3d"OS"d"CML"d"Position"ac"PRIM_IO"h0 \ No newline at end of file diff --git a/cml/src/IO/.cm/SKEL/text-prim-io.sml b/cml/src/IO/.cm/SKEL/text-prim-io.sml new file mode 100644 index 0000000..f819e3f --- /dev/null +++ b/cml/src/IO/.cm/SKEL/text-prim-io.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"Position"ad"TextPrimIO"jh4ad"Vector"gp1d"CharVector"ad"Array"gp1d"CharArray"ad"VectorSlice"gp1d"CharVectorSlice"ad"ArraySlice"gp1d"CharArraySlice"gp1e"PrimIO" \ No newline at end of file diff --git a/cml/src/IO/.cm/amd64-unix/bin-prim-io.sml b/cml/src/IO/.cm/amd64-unix/bin-prim-io.sml new file mode 100644 index 0000000..25f54cb Binary files /dev/null and b/cml/src/IO/.cm/amd64-unix/bin-prim-io.sml differ diff --git a/cml/src/IO/.cm/amd64-unix/chan-io-fn.sml b/cml/src/IO/.cm/amd64-unix/chan-io-fn.sml new file mode 100644 index 0000000..17507cd Binary files /dev/null and b/cml/src/IO/.cm/amd64-unix/chan-io-fn.sml differ diff --git a/cml/src/IO/.cm/amd64-unix/clean-io.sml b/cml/src/IO/.cm/amd64-unix/clean-io.sml new file mode 100644 index 0000000..dca4df3 Binary files /dev/null and b/cml/src/IO/.cm/amd64-unix/clean-io.sml differ diff --git a/cml/src/IO/.cm/amd64-unix/cml-bin-io-sig.sml b/cml/src/IO/.cm/amd64-unix/cml-bin-io-sig.sml new file mode 100644 index 0000000..912847a Binary files /dev/null and b/cml/src/IO/.cm/amd64-unix/cml-bin-io-sig.sml differ diff --git a/cml/src/IO/.cm/amd64-unix/cml-stream-io-sig.sml b/cml/src/IO/.cm/amd64-unix/cml-stream-io-sig.sml new file mode 100644 index 0000000..add2a51 Binary files /dev/null and b/cml/src/IO/.cm/amd64-unix/cml-stream-io-sig.sml differ diff --git a/cml/src/IO/.cm/amd64-unix/cml-text-stream-io-sig.sml b/cml/src/IO/.cm/amd64-unix/cml-text-stream-io-sig.sml new file mode 100644 index 0000000..79a473d Binary files /dev/null and b/cml/src/IO/.cm/amd64-unix/cml-text-stream-io-sig.sml differ diff --git a/cml/src/IO/.cm/amd64-unix/new-bin-io-fn.sml b/cml/src/IO/.cm/amd64-unix/new-bin-io-fn.sml new file mode 100644 index 0000000..42f6be0 Binary files /dev/null and b/cml/src/IO/.cm/amd64-unix/new-bin-io-fn.sml differ diff --git a/cml/src/IO/.cm/amd64-unix/new-cml-imperative-io-sig.sml b/cml/src/IO/.cm/amd64-unix/new-cml-imperative-io-sig.sml new file mode 100644 index 0000000..c7ac0f0 Binary files /dev/null and b/cml/src/IO/.cm/amd64-unix/new-cml-imperative-io-sig.sml differ diff --git a/cml/src/IO/.cm/amd64-unix/new-cml-text-io-sig.sml b/cml/src/IO/.cm/amd64-unix/new-cml-text-io-sig.sml new file mode 100644 index 0000000..72f93e1 Binary files /dev/null and b/cml/src/IO/.cm/amd64-unix/new-cml-text-io-sig.sml differ diff --git a/cml/src/IO/.cm/amd64-unix/new-text-io-fn.sml b/cml/src/IO/.cm/amd64-unix/new-text-io-fn.sml new file mode 100644 index 0000000..e5cb486 Binary files /dev/null and b/cml/src/IO/.cm/amd64-unix/new-text-io-fn.sml differ diff --git a/cml/src/IO/.cm/amd64-unix/os-prim-io-sig.sml b/cml/src/IO/.cm/amd64-unix/os-prim-io-sig.sml new file mode 100644 index 0000000..2970c6e Binary files /dev/null and b/cml/src/IO/.cm/amd64-unix/os-prim-io-sig.sml differ diff --git a/cml/src/IO/.cm/amd64-unix/prim-io-fn.sml b/cml/src/IO/.cm/amd64-unix/prim-io-fn.sml new file mode 100644 index 0000000..13149c5 Binary files /dev/null and b/cml/src/IO/.cm/amd64-unix/prim-io-fn.sml differ diff --git a/cml/src/IO/.cm/amd64-unix/prim-io-sig.sml b/cml/src/IO/.cm/amd64-unix/prim-io-sig.sml new file mode 100644 index 0000000..37b70db Binary files /dev/null and b/cml/src/IO/.cm/amd64-unix/prim-io-sig.sml differ diff --git a/cml/src/IO/.cm/amd64-unix/text-prim-io.sml b/cml/src/IO/.cm/amd64-unix/text-prim-io.sml new file mode 100644 index 0000000..365921f Binary files /dev/null and b/cml/src/IO/.cm/amd64-unix/text-prim-io.sml differ diff --git a/cml/src/IO/bin-io-fn.sml b/cml/src/IO/bin-io-fn.sml new file mode 100644 index 0000000..c21504f --- /dev/null +++ b/cml/src/IO/bin-io-fn.sml @@ -0,0 +1,717 @@ +(* bin-io-fn.sml + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This is the CML version of the BinIO functor. + *) + +functor BinIOFn ( + + structure OSPrimIO : OS_PRIM_IO + where type PrimIO.array = BinPrimIO.array + where type PrimIO.vector = BinPrimIO.vector + where type PrimIO.elem = BinPrimIO.elem + where type PrimIO.pos = BinPrimIO.pos + where type PrimIO.reader = BinPrimIO.reader + where type PrimIO.writer = BinPrimIO.writer + + ) : CML_BIN_IO = struct + + structure PIO = OSPrimIO.PrimIO + structure A = Word8Array + structure V = Word8Vector + structure Pos = Position + + structure SV = SyncVar + + (* assign to an MVar *) + fun mUpdate (mv, x) = (SV.mTake mv; SV.mPut(mv, x)) + + (* an element for initializing buffers *) + val someElem = (0w0 : Word8.word) + + val vecExtract = V.extract + val vecSub = V.sub + val arrUpdate = A.update + val empty = V.fromList[] + + fun dummyCleaner () = () + + structure StreamIO = + struct + type vector = V.vector + type elem = V.elem + type reader = PIO.reader + type writer = PIO.writer + type pos = PIO.pos + + (*** Functional input streams ***) + datatype instream = ISTRM of (in_buffer * int) + and in_buffer = IBUF of { + basePos : pos option, + more : more SV.mvar, (* when this cell is empty, it means that *) + (* there is an outstanding request to the *) + (* server to extend the stream. *) + data : vector, + info : info + } + and more + = MORE of in_buffer (* forward link to additional data *) + | NOMORE (* placeholder for forward link *) + | TERMINATED (* termination of the stream *) + + and info = INFO of { + reader : reader, + readVec : int -> vector, + readVecEvt : int -> vector CML.event, + closed : bool ref, + getPos : unit -> pos option, + tail : more SV.mvar SV.mvar, + (* points to the more cell of the last buffer *) + cleanTag : CleanIO.tag + } + + fun infoOfIBuf (IBUF{info, ...}) = info + fun chunkSzOfIBuf buf = let + val INFO{reader=PIO.RD{chunkSize, ...}, ...} = infoOfIBuf buf + in + chunkSize + end + fun readVec (IBUF{info=INFO{readVec=f, ...}, ...}) = f + + fun inputExn (INFO{reader=PIO.RD{name, ...}, ...}, mlOp, exn) = + raise IO.Io{function=mlOp, name=name, cause=exn} + + datatype more_data = EOF | DATA of in_buffer + + (* extend the stream by a chunk. + * Invariant: the more m-variable is empty on entry and full on exit. + *) + fun extendStream (readFn, mlOp, buf as IBUF{more, info, ...}) = (let + val INFO{getPos, tail, ...} = info + val basePos = getPos() + val chunk = readFn (chunkSzOfIBuf buf) + in + if (V.length chunk = 0) + then (SV.mPut (more, NOMORE); EOF) + else let + val newMore = SV.mVar() + val buf' = IBUF{ + basePos = basePos, data = chunk, + more = newMore, info = info + } + in + (* note that we do not fill the newMore cell until + * after the tail has been updated. This ensures + * that someone attempting to access the tail will + * not acquire the lock until after we are done. + *) + mUpdate (tail, newMore); + SV.mPut (more, MORE buf'); (* releases lock!! *) + SV.mPut (newMore, NOMORE); + DATA buf' + end + end + handle ex => ( + SV.mPut (more, NOMORE); + inputExn(info, mlOp, ex))) + + (* get the next buffer in the stream, extending it if necessary. If + * the stream must be extended, we lock it by taking the value from the + * more cell; the extendStream function is responsible for filling in + * the cell. + *) + fun getBuffer (readFn, mlOp) (buf as IBUF{more, info, ...}) = let + fun get TERMINATED = EOF + | get (MORE buf') = DATA buf' + | get NOMORE = (case SV.mTake more + of NOMORE => extendStream (readFn, mlOp, buf) + | next => (SV.mPut(more, next); get next) + (* end case *)) + in + get (SV.mGet more) + end + + (* read a chunk that is at least the specified size *) + fun readChunk buf = let + val INFO{readVec, reader=PIO.RD{chunkSize, ...}, ...} = + infoOfIBuf buf + in + case (chunkSize - 1) + of 0 => (fn n => readVec n) + | k => (* round up to next multiple of chunkSize *) + (fn n => readVec(Int.quot(n+k, chunkSize) * chunkSize)) + (* end case *) + end + + fun generalizedInput getBuf = let + fun get (ISTRM(buf as IBUF{data, ...}, pos)) = let + val len = V.length data + in + if (pos < len) + then (vecExtract(data, pos, NONE), ISTRM(buf, len)) + else (case (getBuf buf) + of EOF => (empty, ISTRM(buf, len)) + | (DATA rest) => get (ISTRM(rest, 0)) + (* end case *)) + end + in + get + end + + (* terminate an input stream *) + fun terminate (info as INFO{tail, cleanTag, ...}) = let + val m = SV.mGet tail + in + case SV.mTake m + of (m' as MORE _) => (SV.mPut(m, m'); terminate info) + | TERMINATED => SV.mPut(m, TERMINATED) + | _ => ( + CleanIO.removeCleaner cleanTag; + SV.mPut(m, TERMINATED)) + (* end case *) + end + + (* find the end of the stream *) + fun findEOS (buf as IBUF{more, data, ...}) = (case (SV.mGet more) + of (MORE buf) => findEOS buf + | _ => ISTRM(buf, V.length data) + (* end case *)) + + fun input (strm as ISTRM(buf, _)) = + generalizedInput (getBuffer (readVec buf, "input")) strm + fun input1 (ISTRM(buf, pos)) = let + val IBUF{data, more, ...} = buf + in + if (pos < V.length data) + then SOME(vecSub(data, pos), ISTRM(buf, pos+1)) + else let + fun get (MORE buf) = input1 (ISTRM(buf, 0)) + | get TERMINATED = NONE + | get NOMORE = (case SV.mTake more + of NOMORE => ( + case extendStream (readVec buf, "input1", buf) + of EOF => NONE + | (DATA rest) => input1 (ISTRM(rest, 0)) + (* end case *)) + | next => (SV.mPut(more, next); get next) + (* end case *)) + in + get (SV.mGet more) + end + end + fun inputN (ISTRM(buf, pos), n) = let + fun join (item, (list, strm)) = (item::list, strm) + fun inputList (buf as IBUF{data, ...}, i, n) = let + val len = V.length data + val remain = len-i + in + if (remain >= n) + then ([vecExtract(data, i, SOME n)], ISTRM(buf, i+n)) + else join ( + vecExtract(data, i, NONE), + nextBuf(buf, n-remain)) + end + and nextBuf (buf as IBUF{more, data, ...}, n) = let + fun get (MORE buf) = inputList (buf, 0, n) + | get TERMINATED = ([], ISTRM(buf, V.length data)) + | get NOMORE = (case (SV.mTake more) + of NOMORE => (case extendStream (readVec buf, "inputN", buf) + of EOF => ([], ISTRM(buf, V.length data)) + | (DATA rest) => inputList (rest, 0, n) + (* end case *)) + | next => (SV.mPut(more, next); get next) + (* end case *)) + in + get (SV.mGet more) + end + val (data, strm) = inputList (buf, pos, n) + in + (V.concat data, strm) + end + + fun inputAll (strm as ISTRM(buf, _)) = let + val INFO{reader=PIO.RD{avail, ...}, ...} = infoOfIBuf buf + (* read a chunk that is as large as the available input. Note + * that for systems that use CR-LF for #"\n", the size will be + * too large, but this should be okay. + *) + fun bigChunk _ = let + val delta = (case avail() + of NONE => chunkSzOfIBuf buf + | (SOME n) => n + (* end case *)) + in + readChunk buf delta + end + val bigInput = + generalizedInput (getBuffer (bigChunk, "inputAll")) + fun loop (v, strm) = + if (V.length v = 0) then [] else v :: loop(bigInput strm) + val data = V.concat (loop (bigInput strm)) + in + (data, findEOS buf) + end + + fun input1Evt _ = raise Fail "input1Evt unimplemented" + fun inputEvt _ = raise Fail "inputEvt unimplemented" + fun inputNEvt _ = raise Fail "inputNEvt unimplemented" + fun inputAllEvt _ = raise Fail "inputAllEvt unimplemented" + + (* Return SOME k, if k <= amount characters can be read without blocking. *) + fun canInput (strm as ISTRM(buf, pos), amount) = let +(****** + val readVecNB = (case buf + of (IBUF{info as INFO{readVecNB=NONE, ...}, ...}) => + inputExn(info, "canInput", IO.NonblockingNotSupported) + | (IBUF{info=INFO{readVecNB=SOME f, ...}, ...}) => f + (* end case *)) +******) + fun tryInput (buf as IBUF{data, ...}, i, n) = let + val len = V.length data + val remain = len - i + in + if (remain >= n) + then SOME n + else nextBuf (buf, n - remain) + end + and nextBuf (IBUF{more, ...}, n) = let + fun get (MORE buf) = tryInput (buf, 0, n) + | get TERMINATED = SOME(amount - n) +(****** + | get NOMORE = (case SV.mTake more + of NOMORE => (( + case extendStream (readVecNB, "canInput", buf) + of EOF => SOME(amount - n) + | (DATA b) => tryInput (b, 0, n) + (* end case *)) + handle IO.Io{cause=WouldBlock, ...} => SOME(amount - n)) + | next => (SV.mPut(more, next); get next) + (* end case *)) +******) + | get NOMORE = SOME(amount - n) + in + get (SV.mGet more) + end + in + if (amount < 0) + then raise Size + else tryInput (buf, pos, amount) + end + (* close an input stream given its info structure; we need this function + * for the cleanup hook to avoid a space leak. + *) + fun closeInInfo (INFO{closed=ref true, ...}) = () + | closeInInfo (info as INFO{closed, reader=PIO.RD{close, ...}, ...}) = ( +(*** We need some kind of lock on the input stream to do this safely!!! ***) + terminate info; + closed := true; + close() handle ex => inputExn(info, "closeIn", ex)) + fun closeIn (ISTRM(buf, _)) = closeInInfo (infoOfIBuf buf) + fun endOfStream (ISTRM(buf as IBUF{more, ...}, pos)) = ( + case SV.mTake more + of (next as MORE _) => (SV.mPut(more, next); false) + | next => let + val IBUF{data, info=INFO{closed, ...}, ...} = buf + in + if (pos = V.length data) + then (case (next, !closed) + of (NOMORE, false) => ( + case extendStream (readVec buf, "endOfStream", buf) + of EOF => true + | _ => false + (* end case *)) + | _ => (SV.mPut(more, next); true) + (* end case *)) + else (SV.mPut(more, next); false) + end + (* end case *)) + fun mkInstream (reader, optData) = let + val PIO.RD{readVec, readVecEvt, getPos, setPos, ...} = reader + val getPos = (case (getPos, setPos) + of (SOME f, SOME _) => (fn () => SOME(f())) + | _ => (fn () => NONE) + (* end case *)) + val more = SV.mVarInit NOMORE + val tag = CleanIO.addCleaner dummyCleaner + val info = INFO{ + reader=reader, readVec=readVec, readVecEvt=readVecEvt, + closed = ref false, getPos = getPos, + tail = SV.mVarInit more, cleanTag = tag + } + val buf = (case optData + of NONE => IBUF{ + basePos = getPos(), data=empty, + info=info, more=more + } +(** What should we do about the position in this case ?? **) +(** Suggestion: When building a stream with supplied initial data, + ** nothing can be said about the positions inside that initial + ** data (who knows where that data even came from!). + **) + | (SOME v) => IBUF{ + basePos = NONE, data=v, + info=info, more=more} + (* end case *)) + val strm = ISTRM(buf, 0) + in + CleanIO.rebindCleaner (tag, fn () => closeInInfo info); + strm + end + fun getReader (ISTRM(buf, pos)) = let + val IBUF{data, info as INFO{reader, ...}, more, ...} = buf + fun getData more = (case SV.mGet more + of (MORE(IBUF{data, more=more', ...})) => data :: getData more' + | _ => [] + (* end case *)) + in + terminate info; + if (pos < V.length data) + then ( + reader, + V.concat(vecExtract(data, pos, NONE) :: getData more) + ) + else (reader, V.concat(getData more)) + end + + (** Position operations on instreams **) + datatype in_pos = INP of { + base : pos, + offset : int, + info : info + } + + fun getPosIn (ISTRM(buf, pos)) = (case buf + of IBUF{basePos=NONE, info, ...} => + inputExn (info, "getPosIn", IO.RandomAccessNotSupported) + | IBUF{basePos=SOME p, info, ...} => INP{ + base = p, offset = pos, info = info + } + (* end case *)) + fun filePosIn (INP{base, offset, ...}) = + Position.+(base, Position.fromInt offset) + fun setPosIn (pos as INP{info as INFO{reader, ...}, ...}) = let + val fpos = filePosIn pos + val (PIO.RD rd) = reader + in + terminate info; + valOf (#setPos rd) fpos; + mkInstream (PIO.RD rd, NONE) + end + + + (*** Output streams ***) + + (* an output stream is implemented as a monitor using an mvar to + * hold its data. + *) + + datatype ostrm_info = OSTRM of { + buf : A.array, + pos : int ref, + closed : bool ref, + bufferMode : IO.buffer_mode ref, + writer : writer, + writeArr : {buf : A.array, i : int, sz : int option} -> unit, + writeVec : {buf : V.vector, i : int, sz : int option} -> unit, + cleanTag : CleanIO.tag + } + + type outstream = ostrm_info SV.mvar + + fun outputExn (OSTRM{writer=PIO.WR{name, ...}, ...}, mlOp, exn) = + raise IO.Io{function=mlOp, name=name, cause=exn} + + (* lock access to the stream and make sure that it is not closed. *) + fun lockAndChkClosedOut (strmMV, mlOp) = (case SV.mTake strmMV + of (strm as OSTRM({closed=ref true, ...})) => ( + SV.mPut (strmMV, strm); + outputExn (strm, mlOp, IO.ClosedStream)) + | strm => strm + (* end case *)) + + fun flushBuffer (strmMV, strm as OSTRM{buf, pos, writeArr, ...}, mlOp) = ( + case !pos + of 0 => () + | n => (( + writeArr {buf=buf, i=0, sz=SOME n}; pos := 0) + handle ex => ( + SV.mPut(strmMV, strm); outputExn (strm, mlOp, ex))) + (* end case *)) + + fun output (strmMV, v) = let + val (strm as OSTRM os) = lockAndChkClosedOut (strmMV, "output") + fun release () = SV.mPut (strmMV, strm) + val {buf, pos, bufferMode, ...} = os + fun flush () = flushBuffer (strmMV, strm, "output") + fun flushAll () = (#writeArr os {buf=buf, i=0, sz=NONE} + handle ex => (release(); outputExn (strm, "output", ex))) + fun writeDirect () = ( + case !pos + of 0 => () + | n => (#writeArr os {buf=buf, i=0, sz=SOME n}; pos := 0) + (* end case *); + #writeVec os {buf=v, i=0, sz=NONE}) + handle ex => (release(); outputExn (strm, "output", ex)) + fun insert copyVec = let + val bufLen = A.length buf + val dataLen = V.length v + in + if (dataLen >= bufLen) + then writeDirect() + else let + val i = !pos + val avail = bufLen - i + in + if (avail < dataLen) + then ( + copyVec(v, 0, avail, buf, i); + flushAll(); + copyVec(v, avail, dataLen-avail, buf, 0); + pos := dataLen-avail) + else ( + copyVec(v, 0, dataLen, buf, i); + pos := i + dataLen; + if (avail = dataLen) then flush() else ()) + end + end + in + case !bufferMode + of IO.NO_BUF => writeDirect () + | _ => let + fun copyVec (src, srcI, srcLen, dst, dstI) = A.copyVec { + src = src, si = srcI, len = SOME srcLen, + dst = dst, di = dstI + } + in + insert copyVec + end + (* end case *); + release() + end + + fun output1 (strmMV, elem) = let + val (strm as OSTRM{buf, pos, bufferMode, writeArr, ...}) = + lockAndChkClosedOut (strmMV, "output1") + fun release () = SV.mPut (strmMV, strm) + in + case !bufferMode + of IO.NO_BUF => ( + arrUpdate (buf, 0, elem); + writeArr {buf=buf, i=0, sz=SOME 1} + handle ex => (release(); outputExn (strm, "output1", ex))) + | _ => let val i = !pos val i' = i+1 + in + arrUpdate (buf, i, elem); pos := i'; + if (i' = A.length buf) + then flushBuffer (strmMV, strm, "output1") + else () + end + (* end case *); + release() + end + + fun flushOut strmMV = let + val strm = lockAndChkClosedOut (strmMV, "flushOut") + in + flushBuffer (strmMV, strm, "flushOut"); + SV.mPut (strmMV, strm) + end + + fun closeOut strmMV = let + val (strm as OSTRM{writer=PIO.WR{close, ...}, closed, cleanTag, ...}) = + SV.mTake strmMV + in + if !closed + then () + else ( + flushBuffer (strmMV, strm, "closeOut"); + closed := true; + CleanIO.removeCleaner cleanTag; + close()); + SV.mPut (strmMV, strm) + end + + fun mkOutstream (wr as PIO.WR{chunkSize, writeArr, writeVec, ...}, mode) = + let + fun iterate f (buf, i, sz) = let + fun lp (_, 0) = () + | lp (i, n) = let val n' = f{buf=buf, i=i, sz=SOME n} + in lp (i+n', n-n') end + in + lp (i, sz) + end + fun writeArr' {buf, i, sz} = let + val len = (case sz + of NONE => A.length buf - i + | (SOME n) => n + (* end case *)) + in + iterate writeArr (buf, i, len) + end + fun writeVec' {buf, i, sz} = let + val len = (case sz + of NONE => V.length buf - i + | (SOME n) => n + (* end case *)) + in + iterate writeVec (buf, i, len) + end + (* install a dummy cleaner *) + val tag = CleanIO.addCleaner dummyCleaner + val strm = SV.mVarInit (OSTRM{ + buf = A.array(chunkSize, someElem), + pos = ref 0, + closed = ref false, + bufferMode = ref mode, + writer = wr, + writeArr = writeArr', + writeVec = writeVec', + cleanTag = tag + }) + in + CleanIO.rebindCleaner (tag, fn () => closeOut strm); + strm + end + + fun getWriter strmMV = let + val (strm as OSTRM{writer, bufferMode, ...}) = + lockAndChkClosedOut (strmMV, "getWriter") + in + (writer, !bufferMode) before SV.mPut(strmMV, strm) + end + + (** Position operations on outstreams **) + datatype out_pos = OUTP of { + pos : PIO.pos, + strm : outstream + } + + fun getPosOut strmMV = let + val (strm as OSTRM{writer, ...}) = + lockAndChkClosedOut (strmMV, "getWriter") + fun release () = SV.mPut(strmMV, strm) + in + flushBuffer (strmMV, strm, "getPosOut"); + case writer + of PIO.WR{getPos=SOME f, ...} => ( + OUTP{pos = f(), strm = strmMV} + handle ex => (release(); outputExn(strm, "getPosOut", ex))) + | _ => ( + release(); + outputExn(strm, "getPosOut", IO.RandomAccessNotSupported)) + (* end case *) + before release() + end + fun filePosOut (OUTP{pos, strm=strmMV}) = ( + SV.mPut (strmMV, lockAndChkClosedOut (strmMV, "filePosOut")); + pos) + fun setPosOut (OUTP{pos, strm=strmMV}) = let + val (strm as OSTRM{writer, ...}) = + lockAndChkClosedOut (strmMV, "setPosOut") + fun release () = SV.mPut(strmMV, strm) + in + case writer + of PIO.WR{setPos=SOME f, ...} => ( + (f pos) + handle ex => (release(); outputExn(strm, "setPosOut", ex))) + | _ => ( + release(); + outputExn(strm, "getPosOut", IO.RandomAccessNotSupported)) + (* end case *); + release() + end + + fun setBufferMode (strmMV, mode) = let + val (strm as OSTRM{bufferMode, ...}) = + lockAndChkClosedOut (strmMV, "setBufferMode") + in + if (mode = IO.NO_BUF) + then flushBuffer (strmMV, strm, "setBufferMode") + else (); + bufferMode := mode; + SV.mPut (strmMV, strm) + end + fun getBufferMode strmMV = let +(** should we be checking for closed streams here??? **) + val (strm as OSTRM{bufferMode, ...}) = + lockAndChkClosedOut (strmMV, "getBufferMode") + in + !bufferMode before SV.mPut (strmMV, strm) + end + + end (* StreamIO *) + + type vector = V.vector + type elem = V.elem + type instream = StreamIO.instream SV.mvar + type outstream = StreamIO.outstream SV.mvar + + (** Input operations **) + fun input strm = let val (v, strm') = StreamIO.input(SV.mTake strm) + in + SV.mPut (strm, strm'); v + end + fun input1 strm = (case StreamIO.input1(SV.mTake strm) + of NONE => NONE + | (SOME(elem, strm')) => (SV.mPut (strm, strm'); SOME elem) + (* end case *)) + fun inputN (strm, n) = let val (v, strm') = StreamIO.inputN (SV.mTake strm, n) + in + SV.mPut (strm, strm'); v + end + fun inputAll (strm : instream) = let + val (v, strm') = StreamIO.inputAll(SV.mTake strm) + in + SV.mPut (strm, strm'); v + end + fun input1Evt _ = raise Fail "input1Evt unimplemented" + fun inputEvt _ = raise Fail "inputEvt unimplemented" + fun inputNEvt _ = raise Fail "inputNEvt unimplemented" + fun inputAllEvt _ = raise Fail "inputAllEvt unimplemented" + fun canInput (strm, n) = StreamIO.canInput (SV.mGet strm, n) + fun lookahead (strm : instream) = (case StreamIO.input1(SV.mGet strm) + of NONE => NONE + | (SOME(elem, _)) => SOME elem + (* end case *)) + fun closeIn strm = let + val (s as StreamIO.ISTRM(buf as StreamIO.IBUF{data, ...}, _)) = + SV.mTake strm + in + StreamIO.closeIn s; + SV.mPut(strm, StreamIO.findEOS buf) + end + fun endOfStream strm = StreamIO.endOfStream(SV.mGet strm) + fun getPosIn strm = StreamIO.getPosIn(SV.mGet strm) + fun setPosIn (strm, p) = mUpdate(strm, StreamIO.setPosIn p) + + (** Output operations **) + fun output (strm, v) = StreamIO.output(SV.mGet strm, v) + fun output1 (strm, c) = StreamIO.output1(SV.mGet strm, c) + fun flushOut strm = StreamIO.flushOut(SV.mGet strm) + fun closeOut strm = StreamIO.closeOut(SV.mGet strm) + fun getPosOut strm = StreamIO.getPosOut(SV.mGet strm) + fun setPosOut (strm, p as StreamIO.OUTP{strm=strm', ...}) = ( + mUpdate(strm, strm'); StreamIO.setPosOut p) + + fun mkInstream (strm : StreamIO.instream) = SV.mVarInit strm + fun getInstream (strm : instream) = SV.mGet strm + fun setInstream (strm : instream, strm') = mUpdate(strm, strm') + + fun mkOutstream (strm : StreamIO.outstream) = SV.mVarInit strm + fun getOutstream (strm : outstream) = SV.mGet strm + fun setOutstream (strm : outstream, strm') = mUpdate(strm, strm') + + (** Open files **) + fun openIn fname = + mkInstream(StreamIO.mkInstream(OSPrimIO.openRd fname, NONE)) + handle ex => raise IO.Io{function="openIn", name=fname, cause=ex} + fun openOut fname = + mkOutstream(StreamIO.mkOutstream(OSPrimIO.openWr fname, IO.BLOCK_BUF)) + handle ex => raise IO.Io{function="openOut", name=fname, cause=ex} + fun openAppend fname = + mkOutstream(StreamIO.mkOutstream(OSPrimIO.openApp fname, IO.NO_BUF)) + handle ex => raise IO.Io{function="openAppend", name=fname, cause=ex} + + end (* BinIOFn *) diff --git a/cml/src/IO/bin-prim-io.sml b/cml/src/IO/bin-prim-io.sml new file mode 100644 index 0000000..a1e9bc4 --- /dev/null +++ b/cml/src/IO/bin-prim-io.sml @@ -0,0 +1,14 @@ +(* bin-prim-io.sml + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +structure BinPrimIO = PrimIO ( + structure Vector = Word8Vector + structure Array = Word8Array + structure VectorSlice = Word8VectorSlice + structure ArraySlice = Word8ArraySlice + val someElem = (0w0 : Word8.word) + type pos = Position.int + val compare = Position.compare); diff --git a/cml/src/IO/chan-io-fn.sml b/cml/src/IO/chan-io-fn.sml new file mode 100644 index 0000000..6373d71 --- /dev/null +++ b/cml/src/IO/chan-io-fn.sml @@ -0,0 +1,154 @@ +(* chan-io-fn.sml + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +functor ChanIOFn ( + structure PrimIO : PRIM_IO + structure V : MONO_VECTOR + structure VS : MONO_VECTOR_SLICE + structure A : MONO_ARRAY + structure AS : MONO_ARRAY_SLICE + sharing type A.array = AS.array = PrimIO.array + sharing type A.vector = V.vector = AS.vector = VS.vector = PrimIO.vector + sharing type VS.slice = AS.vector_slice = PrimIO.vector_slice + sharing type AS.slice = PrimIO.array_slice + ) : sig + + structure PrimIO : PRIM_IO + + val mkReader : PrimIO.vector CML.chan -> PrimIO.reader + val mkWriter : PrimIO.vector CML.chan -> PrimIO.writer + + end = struct + + structure SV = SyncVar + + structure PrimIO = PrimIO + + val vextract = VS.vector o VS.slice + + (* create a reader that is connected to the output port of a channel. *) + fun mkReader ch = let + val closedFlg = SV.iVar() + val isClosedEvt = + CML.wrap(SV.iGetEvt closedFlg, fn () => raise IO.ClosedStream) + datatype req + = RD of (int * unit CML.event * V.vector CML.chan) + | CLOSE + val reqCh = Mailbox.mailbox() + fun readVecEvt 0 = CML.alwaysEvt(V.fromList[]) + | readVecEvt n = if (n < 0) + then raise General.Subscript + else CML.withNack (fn nack => let + val replCh = CML.channel() + in + Mailbox.send (reqCh, RD(n, nack, replCh)); + CML.choose [ + CML.recvEvt replCh, + isClosedEvt + ] + end) + fun readArrEvt asl = let + val (buf, i, n) = AS.base asl + in + CML.wrap (readVecEvt n, fn v => ( + A.copyVec{dst=buf, di=i, src=v}; + V.length v)) + end + fun close () = Mailbox.send(reqCh, CLOSE) + fun getData NONE = let + val v = CML.recv ch + in + if (V.length v > 0) then v else getData NONE + end + | getData (SOME v) = v + fun server buf = (case (Mailbox.recv reqCh) + of RD(n, nack, replCh) => let + val v = getData buf + in + if (V.length v > n) + then let + val v' = vextract (v, 0, SOME n) + in + CML.select [ + CML.wrap (nack, fn () => server(SOME v)), + CML.wrap (CML.sendEvt(replCh, v), + fn () => + server(SOME(vextract(v, n, NONE)))) + ] + end + else CML.select [ + CML.wrap (nack, fn () => server(SOME v)), + CML.wrap (CML.sendEvt(replCh, v), fn () => server NONE) + ] + end + | CLOSE => (SV.iPut(closedFlg, ()); closedServer()) + (* end case *)) + and closedServer () = (ignore(Mailbox.recv reqCh); closedServer()) + in + ignore(CML.spawnc server NONE); + PrimIO.RD{ + name = "", + chunkSize = 1024, (* ?? *) + readVec = CML.sync o readVecEvt, + readArr = CML.sync o readArrEvt, + readVecEvt = readVecEvt, + readArrEvt = readArrEvt, + avail = fn () => NONE, (* ?? *) + getPos = NONE, + setPos = NONE, + endPos = NONE, + verifyPos = NONE, + close = close, + ioDesc = NONE + } + end + + (* create a writer that is connected to the input port of a channel. *) + fun mkWriter ch = let + val closedFlg = SV.iVar() + val closedEvt = + CML.wrap (SV.iGetEvt closedFlg, fn () => raise IO.ClosedStream) + val ch' = CML.channel() + fun buffer () = CML.select [ + CML.wrap (CML.recvEvt ch', fn v => ( + if (V.length v > 0) then CML.send(ch, v) else (); + buffer())), + closedEvt + ] + fun writeVecEvt arg = let val v = VS.vector arg + in + CML.choose [ + closedEvt, + CML.wrap (CML.sendEvt (ch', v), fn () => V.length v) + ] + end + fun writeArrEvt arg = let val v = AS.vector arg + in + CML.choose [ + closedEvt, + CML.wrap (CML.sendEvt (ch', v), fn () => V.length v) + ] + end + fun close () = SV.iPut(closedFlg, ()) + in + ignore(CML.spawn(fn () => ignore(buffer()))); + PrimIO.WR{ + name = "", + chunkSize = 1024, + writeVec = CML.sync o writeVecEvt, + writeArr = CML.sync o writeArrEvt, + writeVecEvt = writeVecEvt, + writeArrEvt = writeArrEvt, + getPos = NONE, + setPos = NONE, + endPos = NONE, + verifyPos = NONE, + close = close, + ioDesc = NONE + } + end + + end; diff --git a/cml/src/IO/clean-io.sml b/cml/src/IO/clean-io.sml new file mode 100644 index 0000000..98bb3f1 --- /dev/null +++ b/cml/src/IO/clean-io.sml @@ -0,0 +1,109 @@ +(* clean-io.sml + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This module keeps track of open I/O streams, and handles the proper + * cleaning of them. It is a modified version of the SML/NJ module + * of the same name (in boot/IO/clean-io.sml). Unlike the SML/NJ version, + * we only do cleanup at shutdown/exit time (we do not try to support the + * persistence of CML streams across invocations of RunCML.doit), and only + * require a single clean-up function (this flushes the standard streams, + * and closes all others). These operations should only be called while + * CML is running, since they use synchronization primitives. + * + * NOTE: there is currently a problem with removing the cleaners for streams + * that get dropped by the application, but the system limit on open files + * will limit this. + * + *) + +structure CleanIO :> sig + + type tag + + val osInitHook : (unit -> unit) ref + (* this function gets invoked as the first action during the IO + * initialization. It is meant to support any OS specific initialization + * that might be necessary. + *) + + val stdStrmHook : (unit -> unit) ref + (* this function is defined in TextIOFn, and is called after the osHook. + * It is used to rebuild the standard streams. + *) + + val addCleaner : (unit -> unit) -> tag + + val rebindCleaner : (tag * (unit -> unit)) -> unit + + val removeCleaner : tag -> unit + + (* for linking the master IO cleaner function into the list of cleanup hooks *) + val ioCleaner : (string * CleanUp.when list * (CleanUp.when -> unit)) + + end = struct + + structure SV = SyncVar + + type tag = unit ref + + type cleaner = { + tag : tag, (* unique ID for this cleaner *) + close : unit -> unit (* called AtExit and AtShutdown *) + } + + val osInitHook = ref(fn () => ()) + val stdStrmHook = ref(fn () => ()) + + val cleaners = SV.mVarInit ([] : cleaner list) + + fun addCleaner close = let + val tag = ref() + val cleanerRec = {tag = tag, close = close} + in + SV.mPut (cleaners, cleanerRec :: SV.mTake cleaners); + tag + end + + fun getTag ({tag, ...} : cleaner) = tag + + fun rebindCleaner (t, close) = let + fun f [] = raise Fail "rebindCleaner: tag not found" + | f (x :: r) = let + val t' = getTag x + in + if (t' = t) + then {tag=t, close=close} :: r + else x :: f r + end + in + SV.mPut (cleaners, f (SV.mTake cleaners)) + end + + fun removeCleaner t = let + fun f [] = [] (* should we raise an exception here? *) + | f (x :: r) = if (getTag x = t) then r else x :: f r + in + SV.mPut (cleaners, f (SV.mTake cleaners)) + end + + fun doClean () = let + fun doit [] = () + | doit ({tag, close}::r) = ((close()) handle _ => (); doit r) + in + doit (SV.mGet cleaners) + end + + structure C = CleanUp + + fun cleanUp (C.AtShutdown | C.AtExit) = doClean () + | cleanUp (C.AtInit | C.AtInitFn) = ( + (!osInitHook)(); + (!stdStrmHook)()) + + (* for linking the master IO cleaner function into the list of cleanup hooks *) + val ioCleaner = ("IO", C.atAll, cleanUp) + + end (* CleanIO *) + diff --git a/cml/src/IO/cml-bin-io-sig.sml b/cml/src/IO/cml-bin-io-sig.sml new file mode 100644 index 0000000..8b9aec8 --- /dev/null +++ b/cml/src/IO/cml-bin-io-sig.sml @@ -0,0 +1,28 @@ +(* cml-bin-io-sig.sml + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This extends the SMLBL BIN_IO interface with event-valued operations. + *) + +signature CML_BIN_IO = + sig + include CML_IMPERATIVE_IO +(* + where type StreamIO.vector = Word8Vector.vector + where type StreamIO.elem = Word8.word + where type StreamIO.reader = BinPrimIO.reader + where type StreamIO.writer = BinPrimIO.writer + where type StreamIO.pos = BinPrimIO.pos = Position.int +*) + + val openIn : string -> instream + val openOut : string -> outstream + val openAppend : string -> outstream + end + where type StreamIO.vector = Word8Vector.vector + where type StreamIO.elem = Word8.word + where type StreamIO.reader = BinPrimIO.reader + where type StreamIO.writer = BinPrimIO.writer + where type StreamIO.pos = BinPrimIO.pos diff --git a/cml/src/IO/cml-imperative-io-sig.sml b/cml/src/IO/cml-imperative-io-sig.sml new file mode 100644 index 0000000..9ec0ba9 --- /dev/null +++ b/cml/src/IO/cml-imperative-io-sig.sml @@ -0,0 +1,55 @@ +(* cml-imperative-io-sig.sml + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This extends the SMLBL IMPERATIVE_IO interface with event-valued operations. + *) + +signature CML_IMPERATIVE_IO = + sig + + (* include IMPERATIVE_IO *) + + type vector + type elem + + type instream + type outstream + + val input : instream -> vector + val input1 : instream -> elem option + val inputN : (instream * int) -> vector + val inputAll : instream -> vector + val canInput : (instream * int) -> int option + val lookahead : instream -> elem option + val closeIn : instream -> unit + val endOfStream : instream -> bool + + val output : (outstream * vector) -> unit + val output1 : (outstream * elem) -> unit + val flushOut : outstream -> unit + val closeOut : outstream -> unit + + structure StreamIO : CML_STREAM_IO + sharing type vector = StreamIO.vector + sharing type elem = StreamIO.elem + + val getPosIn : instream -> StreamIO.in_pos + val setPosIn : (instream * StreamIO.in_pos) -> unit + val mkInstream : StreamIO.instream -> instream + val getInstream : instream -> StreamIO.instream + val setInstream : (instream * StreamIO.instream) -> unit + + val getPosOut : outstream -> StreamIO.out_pos + val setPosOut : (outstream * StreamIO.out_pos) -> unit + val mkOutstream : StreamIO.outstream -> outstream + val getOutstream : outstream -> StreamIO.outstream + val setOutstream : (outstream * StreamIO.outstream) -> unit + + val input1Evt : instream -> elem option CML.event + val inputNEvt : (instream * int) -> vector CML.event + val inputEvt : instream -> vector CML.event + val inputAllEvt : instream -> vector CML.event + + end; diff --git a/cml/src/IO/cml-stream-io-sig.sml b/cml/src/IO/cml-stream-io-sig.sml new file mode 100644 index 0000000..d08ab2f --- /dev/null +++ b/cml/src/IO/cml-stream-io-sig.sml @@ -0,0 +1,19 @@ +(* cml-stream-io-sig.sml + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This extends the SMLBL STREAM_IO interface with event-valued operations. + *) + +signature CML_STREAM_IO = + sig + include STREAM_IO + + val input1Evt : instream -> (elem * instream) option CML.event + val inputNEvt : (instream * int) -> (vector * instream) CML.event + val inputEvt : instream -> (vector * instream) CML.event + val inputAllEvt : instream -> (vector * instream) CML.event + + end; + diff --git a/cml/src/IO/cml-text-io-sig.sml b/cml/src/IO/cml-text-io-sig.sml new file mode 100644 index 0000000..afb30b9 --- /dev/null +++ b/cml/src/IO/cml-text-io-sig.sml @@ -0,0 +1,78 @@ +(* cml-text-io-sig.sml + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This extends the SMLBL TEXT_IO interface with event-valued operations. + *) + +signature CML_TEXT_IO = + sig + (* include TEXT_IO *) + type vector = string + type elem = char + + type instream + type outstream + + val input : instream -> vector + val input1 : instream -> elem option + val inputN : (instream * int) -> vector + val inputAll : instream -> vector + val canInput : (instream * int) -> int option + val lookahead : instream -> elem option + val closeIn : instream -> unit + val endOfStream : instream -> bool + + val output : (outstream * vector) -> unit + val output1 : (outstream * elem) -> unit + val flushOut : outstream -> unit + val closeOut : outstream -> unit + + structure StreamIO : CML_TEXT_STREAM_IO + where type reader = TextPrimIO.reader + where type writer = TextPrimIO.writer + where type pos = TextPrimIO.pos + where type vector = string + where type elem = char + + val getPosIn : instream -> StreamIO.in_pos + val setPosIn : (instream * StreamIO.in_pos) -> unit + val mkInstream : StreamIO.instream -> instream + val getInstream : instream -> StreamIO.instream + val setInstream : (instream * StreamIO.instream) -> unit + + val getPosOut : outstream -> StreamIO.out_pos + val setPosOut : (outstream * StreamIO.out_pos) -> unit + val mkOutstream : StreamIO.outstream -> outstream + val getOutstream : outstream -> StreamIO.outstream + val setOutstream : (outstream * StreamIO.outstream) -> unit + + val inputLine : instream -> string option + val outputSubstr : (outstream * substring) -> unit + + val openIn : string -> instream + val openString : string -> instream + val openOut : string -> outstream + val openAppend : string -> outstream + + val stdIn : instream + val stdOut : outstream + val stdErr : outstream + + val input1Evt : instream -> elem option CML.event + val inputNEvt : (instream * int) -> vector CML.event + val inputEvt : instream -> vector CML.event + val inputAllEvt : instream -> vector CML.event + + val openChanIn : string CML.chan -> instream + val openChanOut : string CML.chan -> outstream + + val print : string -> unit + + val scanStream : + ((elem, StreamIO.instream) StringCvt.reader + -> ('a, StreamIO.instream) StringCvt.reader + ) -> instream -> 'a option + + end; diff --git a/cml/src/IO/cml-text-stream-io-sig.sml b/cml/src/IO/cml-text-stream-io-sig.sml new file mode 100644 index 0000000..b090b14 --- /dev/null +++ b/cml/src/IO/cml-text-stream-io-sig.sml @@ -0,0 +1,19 @@ +(* cml-text-stream-io-sig.sml + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This extends the SMLBL TEXT_STREAM_IO interface with event-valued operations. + *) + +signature CML_TEXT_STREAM_IO = + sig + include TEXT_STREAM_IO + + val input1Evt : instream -> (elem * instream) option CML.event + val inputNEvt : (instream * int) -> (vector * instream) CML.event + val inputEvt : instream -> (vector * instream) CML.event + val inputAllEvt : instream -> (vector * instream) CML.event + val inputLineEvt : instream -> (vector * instream) option CML.event + + end; diff --git a/cml/src/IO/new-bin-io-fn.sml b/cml/src/IO/new-bin-io-fn.sml new file mode 100644 index 0000000..b570df7 --- /dev/null +++ b/cml/src/IO/new-bin-io-fn.sml @@ -0,0 +1,757 @@ +(* new-bin-io-fn.sml + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This is the CML version of the BinIO functor. + *) + +functor BinIOFn ( + + structure OSPrimIO : OS_PRIM_IO + where type PrimIO.array = BinPrimIO.array + where type PrimIO.vector = BinPrimIO.vector + where type PrimIO.array_slice = BinPrimIO.array_slice + where type PrimIO.vector_slice = BinPrimIO.vector_slice + where type PrimIO.elem = BinPrimIO.elem + where type PrimIO.pos = BinPrimIO.pos + where type PrimIO.reader = BinPrimIO.reader + where type PrimIO.writer = BinPrimIO.writer + + ) : CML_BIN_IO = struct + + structure PIO = OSPrimIO.PrimIO + structure A = Word8Array + structure AS = Word8ArraySlice + structure V = Word8Vector + structure VS = Word8VectorSlice + structure Pos = Position + + structure SV = SyncVar + + (* assign to an MVar *) + fun mUpdate (mv, x) = (SV.mTake mv; SV.mPut(mv, x)) + + (* an element for initializing buffers *) + val someElem = (0w0 : Word8.word) + + val vecExtract = VS.vector o VS.slice + val vecSub = V.sub + val arrUpdate = A.update + val empty = V.fromList[] + + fun dummyCleaner () = () + + structure StreamIO = + struct + type vector = V.vector + type elem = V.elem + type reader = PIO.reader + type writer = PIO.writer + type pos = PIO.pos + + (* maximum size of an input request *) + val maxInputSz = Position.fromInt V.maxLen + + (*** Functional input streams ***) + datatype instream = ISTRM of (in_buffer * int) + and in_buffer = IBUF of { + basePos : pos option, + more : more SV.mvar, (* when this cell is empty, it means that *) + (* there is an outstanding request to the *) + (* server to extend the stream. *) + data : vector, + info : info + } + and more + = MORE of in_buffer (* forward link to additional data *) + | NOMORE (* placeholder for forward link *) + | TERMINATED (* termination of the stream *) + + and info = INFO of { + reader : reader, + readVec : int -> vector, + readVecEvt : int -> vector CML.event, + closed : bool ref, + getPos : unit -> pos option, + tail : more SV.mvar SV.mvar, + (* points to the more cell of the last buffer *) + cleanTag : CleanIO.tag + } + + fun infoOfIBuf (IBUF{info, ...}) = info + fun chunkSzOfIBuf buf = let + val INFO{reader=PIO.RD{chunkSize, ...}, ...} = infoOfIBuf buf + in + chunkSize + end + fun readVec (IBUF{info=INFO{readVec=f, ...}, ...}) = f + + fun inputExn (INFO{reader=PIO.RD{name, ...}, ...}, mlOp, exn) = + raise IO.Io{function=mlOp, name=name, cause=exn} + + datatype more_data = EOF | DATA of in_buffer + + (* extend the stream by a chunk. + * Invariant: the more m-variable is empty on entry and full on exit. + *) + fun extendStream (readFn, mlOp, buf as IBUF{more, info, ...}) = (let + val INFO{getPos, tail, ...} = info + val basePos = getPos() + val chunk = readFn (chunkSzOfIBuf buf) + in + if (V.length chunk = 0) + then (SV.mPut (more, NOMORE); EOF) + else let + val newMore = SV.mVar() + val buf' = IBUF{ + basePos = basePos, data = chunk, + more = newMore, info = info + } + in + (* note that we do not fill the newMore cell until + * after the tail has been updated. This ensures + * that someone attempting to access the tail will + * not acquire the lock until after we are done. + *) + mUpdate (tail, newMore); + SV.mPut (more, MORE buf'); (* releases lock!! *) + SV.mPut (newMore, NOMORE); + DATA buf' + end + end + handle ex => ( + SV.mPut (more, NOMORE); + inputExn(info, mlOp, ex))) + + (* get the next buffer in the stream, extending it if necessary. If + * the stream must be extended, we lock it by taking the value from the + * more cell; the extendStream function is responsible for filling in + * the cell. + *) + fun getBuffer (readFn, mlOp) (buf as IBUF{more, info, ...}) = let + fun get TERMINATED = EOF + | get (MORE buf') = DATA buf' + | get NOMORE = (case SV.mTake more + of NOMORE => extendStream (readFn, mlOp, buf) + | next => (SV.mPut(more, next); get next) + (* end case *)) + in + get (SV.mGet more) + end + + (* read a chunk that is at least the specified size *) + fun readChunk buf = let + val INFO{readVec, reader=PIO.RD{chunkSize, ...}, ...} = + infoOfIBuf buf + in + case (chunkSize - 1) + of 0 => (fn n => readVec n) + | k => (* round up to next multiple of chunkSize *) + (fn n => readVec(Int.quot(n+k, chunkSize) * chunkSize)) + (* end case *) + end + + fun generalizedInput getBuf = let + fun get (ISTRM(buf as IBUF{data, ...}, pos)) = let + val len = V.length data + in + if (pos < len) + then (vecExtract(data, pos, NONE), ISTRM(buf, len)) + else (case (getBuf buf) + of EOF => (empty, ISTRM(buf, len)) + | (DATA rest) => get (ISTRM(rest, 0)) + (* end case *)) + end + in + get + end + + (* terminate an input stream *) + fun terminate (info as INFO{tail, cleanTag, ...}) = let + val m = SV.mGet tail + in + case SV.mTake m + of (m' as MORE _) => (SV.mPut(m, m'); terminate info) + | TERMINATED => SV.mPut(m, TERMINATED) + | _ => ( + CleanIO.removeCleaner cleanTag; + SV.mPut(m, TERMINATED)) + (* end case *) + end + + (* find the end of the stream *) + fun findEOS (buf as IBUF{more, data, ...}) = (case (SV.mGet more) + of (MORE buf) => findEOS buf + | _ => ISTRM(buf, V.length data) + (* end case *)) + + fun input (strm as ISTRM(buf, _)) = + generalizedInput (getBuffer (readVec buf, "input")) strm + fun input1 (ISTRM(buf, pos)) = let + val IBUF{data, more, ...} = buf + in + if (pos < V.length data) + then SOME(vecSub(data, pos), ISTRM(buf, pos+1)) + else let + fun get (MORE buf) = input1 (ISTRM(buf, 0)) + | get TERMINATED = NONE + | get NOMORE = (case SV.mTake more + of NOMORE => ( + case extendStream (readVec buf, "input1", buf) + of EOF => NONE + | (DATA rest) => input1 (ISTRM(rest, 0)) + (* end case *)) + | next => (SV.mPut(more, next); get next) + (* end case *)) + in + get (SV.mGet more) + end + end + fun inputN (ISTRM(buf, pos), n) = let + fun join (item, (list, strm)) = (item::list, strm) + fun inputList (buf as IBUF{data, ...}, i, n) = let + val len = V.length data + val remain = len-i + in + if (remain >= n) + then ([vecExtract(data, i, SOME n)], ISTRM(buf, i+n)) + else join ( + vecExtract(data, i, NONE), + nextBuf(buf, n-remain)) + end + and nextBuf (buf as IBUF{more, data, ...}, n) = let + fun get (MORE buf) = inputList (buf, 0, n) + | get TERMINATED = ([], ISTRM(buf, V.length data)) + | get NOMORE = (case (SV.mTake more) + of NOMORE => (case extendStream (readVec buf, "inputN", buf) + of EOF => ([], ISTRM(buf, V.length data)) + | (DATA rest) => inputList (rest, 0, n) + (* end case *)) + | next => (SV.mPut(more, next); get next) + (* end case *)) + in + get (SV.mGet more) + end + val (data, strm) = inputList (buf, pos, n) + in + (V.concat data, strm) + end + + fun inputAll (strm as ISTRM(buf, _)) = let + val INFO{reader=PIO.RD{avail, ...}, ...} = infoOfIBuf buf + (* read a chunk that is as large as the available input. Note + * that for systems that use CR-LF for #"\n", the size will be + * too large, but this should be okay. + *) + fun bigChunk _ = let + val delta = (case avail() + of NONE => chunkSzOfIBuf buf + | (SOME n) => if (n > maxInputSz) + then raise Size + else Position.toInt n + (* end case *)) + in + readChunk buf delta + end + val bigInput = + generalizedInput (getBuffer (bigChunk, "inputAll")) + fun loop (v, strm) = + if (V.length v = 0) then [] else v :: loop(bigInput strm) + val data = V.concat (loop (bigInput strm)) + in + (data, findEOS buf) + end + + fun input1Evt _ = raise Fail "input1Evt unimplemented" + fun inputEvt _ = raise Fail "inputEvt unimplemented" + fun inputNEvt _ = raise Fail "inputNEvt unimplemented" + fun inputAllEvt _ = raise Fail "inputAllEvt unimplemented" + + (* Return SOME k, if k <= amount characters can be read without blocking. *) + fun canInput (strm as ISTRM(buf, pos), amount) = let +(****** + val readVecNB = (case buf + of (IBUF{info as INFO{readVecNB=NONE, ...}, ...}) => + inputExn(info, "canInput", IO.NonblockingNotSupported) + | (IBUF{info=INFO{readVecNB=SOME f, ...}, ...}) => f + (* end case *)) +******) + fun tryInput (buf as IBUF{data, ...}, i, n) = let + val len = V.length data + val remain = len - i + in + if (remain >= n) + then SOME n + else nextBuf (buf, n - remain) + end + and nextBuf (IBUF{more, ...}, n) = let + fun get (MORE buf) = tryInput (buf, 0, n) + | get TERMINATED = SOME(amount - n) +(****** + | get NOMORE = (case SV.mTake more + of NOMORE => (( + case extendStream (readVecNB, "canInput", buf) + of EOF => SOME(amount - n) + | (DATA b) => tryInput (b, 0, n) + (* end case *)) + handle IO.Io{cause=WouldBlock, ...} => SOME(amount - n)) + | next => (SV.mPut(more, next); get next) + (* end case *)) +******) + | get NOMORE = SOME(amount - n) + in + get (SV.mGet more) + end + in + if (amount < 0) + then raise Size + else tryInput (buf, pos, amount) + end + (* close an input stream given its info structure; we need this function + * for the cleanup hook to avoid a space leak. + *) + fun closeInInfo (INFO{closed=ref true, ...}) = () + | closeInInfo (info as INFO{closed, reader=PIO.RD{close, ...}, ...}) = ( +(*** We need some kind of lock on the input stream to do this safely!!! ***) + terminate info; + closed := true; + close() handle ex => inputExn(info, "closeIn", ex)) + fun closeIn (ISTRM(buf, _)) = closeInInfo (infoOfIBuf buf) + fun endOfStream (ISTRM(buf as IBUF{more, ...}, pos)) = ( + case SV.mTake more + of (next as MORE _) => (SV.mPut(more, next); false) + | next => let + val IBUF{data, info=INFO{closed, ...}, ...} = buf + in + if (pos = V.length data) + then (case (next, !closed) + of (NOMORE, false) => ( + case extendStream (readVec buf, "endOfStream", buf) + of EOF => true + | _ => false + (* end case *)) + | _ => (SV.mPut(more, next); true) + (* end case *)) + else (SV.mPut(more, next); false) + end + (* end case *)) + fun mkInstream (reader, data) = let + val PIO.RD{readVec, readVecEvt, getPos, setPos, ...} = reader + val getPos = (case (getPos, setPos) + of (SOME f, SOME _) => (fn () => SOME(f())) + | _ => (fn () => NONE) + (* end case *)) + val more = SV.mVarInit NOMORE + val tag = CleanIO.addCleaner dummyCleaner + val info = INFO{ + reader=reader, readVec=readVec, readVecEvt=readVecEvt, + closed = ref false, getPos = getPos, + tail = SV.mVarInit more, cleanTag = tag + } +(** What should we do about the position in this case ?? **) +(** Suggestion: When building a stream with supplied initial data, + ** nothing can be said about the positions inside that initial + ** data (who knows where that data even came from!). + **) + val basePos = if (V.length data = 0) then getPos() else NONE + val buf = IBUF{ + basePos = basePos, data = data, + info = info, more = more + } + val strm = ISTRM(buf, 0) + in + CleanIO.rebindCleaner (tag, fn () => closeInInfo info); + strm + end + fun getReader (ISTRM(buf, pos)) = let + val IBUF{data, info as INFO{reader, ...}, more, ...} = buf + fun getData more = (case SV.mGet more + of (MORE(IBUF{data, more=more', ...})) => data :: getData more' + | _ => [] + (* end case *)) + in + terminate info; + if (pos < V.length data) + then ( + reader, + V.concat(vecExtract(data, pos, NONE) :: getData more) + ) + else (reader, V.concat(getData more)) + end + +(* + (** Position operations on instreams **) + datatype in_pos = INP of { + base : pos, + offset : int, + info : info + } +*) + +(* + fun getPosIn (ISTRM(buf, pos)) = (case buf + of IBUF{basePos=NONE, info, ...} => + inputExn (info, "getPosIn", IO.RandomAccessNotSupported) + | IBUF{basePos=SOME p, info, ...} => INP{ + base = p, offset = pos, info = info + } + (* end case *)) +*) + + +(* + fun filePosIn (INP{base, offset, ...}) = + Position.+(base, Position.fromInt offset) +*) + fun filePosIn (ISTRM(buf, pos)) = + case buf of + IBUF{basePos=NONE, info, ... } => + inputExn (info, "filePosIn", IO.RandomAccessNotSupported) + | IBUF{basePos=SOME b, ... } => + Position.+(b, Position.fromInt pos) +(* + fun setPosIn (pos as INP{info as INFO{reader, ...}, ...}) = let + val fpos = filePosIn pos + val (PIO.RD rd) = reader + in + terminate info; + valOf (#setPos rd) fpos; + mkInstream (PIO.RD rd, empty) + end +*) + + (* IO event constructors: + * We exploit the "functional" nature of stream IO to implement the event + * constructors. These constructors spawn a thread to do the operation + * and and write the result in an iVar that serves as the synchronization + * value. + * NOTE: this implementation has the weakness that it prevents shutdown when + * everything else is deadlocked, since the thread that is spawned to actually + * do the IO could proceed. + *) + local + datatype 'a result = RES of 'a | EXN of exn + fun doInput inputOp = let + fun input arg = RES(inputOp arg) handle ex => EXN ex + in + fn arg => CML.guard (fn () => let + val resV = SV.iVar() + in + CML.spawn (fn () => SV.iPut(resV, input arg)); + CML.wrap(SV.iGetEvt resV, + fn (RES x) => x | (EXN ex) => raise ex) + end) + end + in + val input1Evt = doInput input1 + val inputEvt = doInput input + val inputNEvt = doInput inputN + val inputAllEvt = doInput inputAll + end (* local *) + + + (*** Output streams ***) + + (* an output stream is implemented as a monitor using an mvar to + * hold its data. + *) + + datatype ostrm_info = OSTRM of { + buf : A.array, + pos : int ref, + closed : bool ref, + bufferMode : IO.buffer_mode ref, + writer : writer, + writeArr : AS.slice -> unit, + writeVec : VS.slice -> unit, + cleanTag : CleanIO.tag + } + + type outstream = ostrm_info SV.mvar + + fun outputExn (OSTRM{writer=PIO.WR{name, ...}, ...}, mlOp, exn) = + raise IO.Io{function=mlOp, name=name, cause=exn} + + (* lock access to the stream and make sure that it is not closed. *) + fun lockAndChkClosedOut (strmMV, mlOp) = (case SV.mTake strmMV + of (strm as OSTRM({closed=ref true, ...})) => ( + SV.mPut (strmMV, strm); + outputExn (strm, mlOp, IO.ClosedStream)) + | strm => strm + (* end case *)) + + fun flushBuffer (strmMV, strm as OSTRM{buf, pos, writeArr, ...}, mlOp) = ( + case !pos + of 0 => () + | n => (( + writeArr (AS.slice (buf, 0, SOME n)); pos := 0) + handle ex => ( + SV.mPut(strmMV, strm); outputExn (strm, mlOp, ex))) + (* end case *)) + + fun output (strmMV, v) = let + val (strm as OSTRM os) = lockAndChkClosedOut (strmMV, "output") + fun release () = SV.mPut (strmMV, strm) + val {buf, pos, bufferMode, ...} = os + fun flush () = flushBuffer (strmMV, strm, "output") + fun flushAll () = (#writeArr os (AS.full buf) + handle ex => (release(); outputExn (strm, "output", ex))) + fun writeDirect () = ( + case !pos + of 0 => () + | n => (#writeArr os (AS.slice (buf, 0, SOME n)); pos := 0) + (* end case *); + #writeVec os (VS.full v)) + handle ex => (release(); outputExn (strm, "output", ex)) + fun insert copyVec = let + val bufLen = A.length buf + val dataLen = V.length v + in + if (dataLen >= bufLen) + then writeDirect() + else let + val i = !pos + val avail = bufLen - i + in + if (avail < dataLen) + then ( + copyVec(v, 0, avail, buf, i); + flushAll(); + copyVec(v, avail, dataLen-avail, buf, 0); + pos := dataLen-avail) + else ( + copyVec(v, 0, dataLen, buf, i); + pos := i + dataLen; + if (avail = dataLen) then flush() else ()) + end + end + in + case !bufferMode + of IO.NO_BUF => writeDirect () + | _ => let + fun copyVec (src, srcI, srcLen, dst, dstI) = + AS.copyVec + { src = VS.slice (src, srcI, SOME srcLen), + dst = dst, di = dstI } + in + insert copyVec + end + (* end case *); + release() + end + + fun output1 (strmMV, elem) = let + val (strm as OSTRM{buf, pos, bufferMode, writeArr, ...}) = + lockAndChkClosedOut (strmMV, "output1") + fun release () = SV.mPut (strmMV, strm) + in + case !bufferMode + of IO.NO_BUF => ( + arrUpdate (buf, 0, elem); + writeArr (AS.slice (buf, 0, SOME 1)) + handle ex => (release(); outputExn (strm, "output1", ex))) + | _ => let val i = !pos val i' = i+1 + in + arrUpdate (buf, i, elem); pos := i'; + if (i' = A.length buf) + then flushBuffer (strmMV, strm, "output1") + else () + end + (* end case *); + release() + end + + fun flushOut strmMV = let + val strm = lockAndChkClosedOut (strmMV, "flushOut") + in + flushBuffer (strmMV, strm, "flushOut"); + SV.mPut (strmMV, strm) + end + + fun closeOut strmMV = let + val (strm as OSTRM{writer=PIO.WR{close, ...}, closed, cleanTag, ...}) = + SV.mTake strmMV + in + if !closed + then () + else ( + flushBuffer (strmMV, strm, "closeOut"); + closed := true; + CleanIO.removeCleaner cleanTag; + close()); + SV.mPut (strmMV, strm) + end + + fun mkOutstream (wr as PIO.WR{chunkSize, writeArr, writeVec, ...}, mode) = + let fun iterate (f, size, subslice) = let + fun lp sl = + if size sl = 0 then () + else let val n = f sl + in + lp (subslice (sl, n, NONE)) + end + in + lp + end + val writeArr' = iterate (writeArr, AS.length, AS.subslice) + val writeVec' = iterate (writeVec, VS.length, VS.subslice) + (* install a dummy cleaner *) + val tag = CleanIO.addCleaner dummyCleaner + val strm = SV.mVarInit (OSTRM{ + buf = A.array(chunkSize, someElem), + pos = ref 0, + closed = ref false, + bufferMode = ref mode, + writer = wr, + writeArr = writeArr', + writeVec = writeVec', + cleanTag = tag + }) + in + CleanIO.rebindCleaner (tag, fn () => closeOut strm); + strm + end + + fun getWriter strmMV = let + val (strm as OSTRM{writer, bufferMode, ...}) = + lockAndChkClosedOut (strmMV, "getWriter") + in + (writer, !bufferMode) before SV.mPut(strmMV, strm) + end + + (** Position operations on outstreams **) + datatype out_pos = OUTP of { + pos : PIO.pos, + strm : outstream + } + + fun getPosOut strmMV = let + val (strm as OSTRM{writer, ...}) = + lockAndChkClosedOut (strmMV, "getWriter") + fun release () = SV.mPut(strmMV, strm) + in + flushBuffer (strmMV, strm, "getPosOut"); + case writer + of PIO.WR{getPos=SOME f, ...} => ( + OUTP{pos = f(), strm = strmMV} + handle ex => (release(); outputExn(strm, "getPosOut", ex))) + | _ => ( + release(); + outputExn(strm, "getPosOut", IO.RandomAccessNotSupported)) + (* end case *) + before release() + end + fun filePosOut (OUTP{pos, strm=strmMV}) = ( + SV.mPut (strmMV, lockAndChkClosedOut (strmMV, "filePosOut")); + pos) + fun setPosOut (OUTP{pos, strm=strmMV}) = let + val (strm as OSTRM{writer, ...}) = + lockAndChkClosedOut (strmMV, "setPosOut") + fun release () = SV.mPut(strmMV, strm) + in + case writer + of PIO.WR{setPos=SOME f, ...} => ( + (f pos) + handle ex => (release(); outputExn(strm, "setPosOut", ex))) + | _ => ( + release(); + outputExn(strm, "getPosOut", IO.RandomAccessNotSupported)) + (* end case *); + release() + end + + fun setBufferMode (strmMV, mode) = let + val (strm as OSTRM{bufferMode, ...}) = + lockAndChkClosedOut (strmMV, "setBufferMode") + in + if (mode = IO.NO_BUF) + then flushBuffer (strmMV, strm, "setBufferMode") + else (); + bufferMode := mode; + SV.mPut (strmMV, strm) + end + fun getBufferMode strmMV = let +(** should we be checking for closed streams here??? **) + val (strm as OSTRM{bufferMode, ...}) = + lockAndChkClosedOut (strmMV, "getBufferMode") + in + !bufferMode before SV.mPut (strmMV, strm) + end + + end (* StreamIO *) + + type vector = V.vector + type elem = V.elem + type instream = StreamIO.instream SV.mvar + type outstream = StreamIO.outstream SV.mvar + + (** Input operations **) + fun input strm = let val (v, strm') = StreamIO.input(SV.mTake strm) + in + SV.mPut (strm, strm'); v + end + fun input1 strm = (case StreamIO.input1(SV.mTake strm) + of NONE => NONE + | (SOME(elem, strm')) => (SV.mPut (strm, strm'); SOME elem) + (* end case *)) + fun inputN (strm, n) = let val (v, strm') = StreamIO.inputN (SV.mTake strm, n) + in + SV.mPut (strm, strm'); v + end + fun inputAll (strm : instream) = let + val (v, strm') = StreamIO.inputAll(SV.mTake strm) + in + SV.mPut (strm, strm'); v + end + fun input1Evt _ = raise Fail "input1Evt unimplemented" + fun inputEvt _ = raise Fail "inputEvt unimplemented" + fun inputNEvt _ = raise Fail "inputNEvt unimplemented" + fun inputAllEvt _ = raise Fail "inputAllEvt unimplemented" + fun canInput (strm, n) = StreamIO.canInput (SV.mGet strm, n) + fun lookahead (strm : instream) = (case StreamIO.input1(SV.mGet strm) + of NONE => NONE + | (SOME(elem, _)) => SOME elem + (* end case *)) + fun closeIn strm = let + val (s as StreamIO.ISTRM(buf as StreamIO.IBUF{data, ...}, _)) = + SV.mTake strm + in + StreamIO.closeIn s; + SV.mPut(strm, StreamIO.findEOS buf) + end + fun endOfStream strm = StreamIO.endOfStream(SV.mGet strm) +(* + fun getPosIn strm = StreamIO.getPosIn(SV.mGet strm) + fun setPosIn (strm, p) = mUpdate(strm, StreamIO.setPosIn p) +*) + + (** Output operations **) + fun output (strm, v) = StreamIO.output(SV.mGet strm, v) + fun output1 (strm, c) = StreamIO.output1(SV.mGet strm, c) + fun flushOut strm = StreamIO.flushOut(SV.mGet strm) + fun closeOut strm = StreamIO.closeOut(SV.mGet strm) + fun getPosOut strm = StreamIO.getPosOut(SV.mGet strm) + fun setPosOut (strm, p as StreamIO.OUTP{strm=strm', ...}) = ( + mUpdate(strm, strm'); StreamIO.setPosOut p) + + fun mkInstream (strm : StreamIO.instream) = SV.mVarInit strm + fun getInstream (strm : instream) = SV.mGet strm + fun setInstream (strm : instream, strm') = mUpdate(strm, strm') + + fun mkOutstream (strm : StreamIO.outstream) = SV.mVarInit strm + fun getOutstream (strm : outstream) = SV.mGet strm + fun setOutstream (strm : outstream, strm') = mUpdate(strm, strm') + + (** Open files **) + fun openIn fname = + mkInstream(StreamIO.mkInstream(OSPrimIO.openRd fname, empty)) + handle ex => raise IO.Io{function="openIn", name=fname, cause=ex} + fun openOut fname = + mkOutstream(StreamIO.mkOutstream(OSPrimIO.openWr fname, IO.BLOCK_BUF)) + handle ex => raise IO.Io{function="openOut", name=fname, cause=ex} + fun openAppend fname = + mkOutstream(StreamIO.mkOutstream(OSPrimIO.openApp fname, IO.NO_BUF)) + handle ex => raise IO.Io{function="openAppend", name=fname, cause=ex} + + end (* BinIOFn *) diff --git a/cml/src/IO/new-cml-imperative-io-sig.sml b/cml/src/IO/new-cml-imperative-io-sig.sml new file mode 100644 index 0000000..700b894 --- /dev/null +++ b/cml/src/IO/new-cml-imperative-io-sig.sml @@ -0,0 +1,57 @@ +(* cml-imperative-io-sig.sml + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This extends the SMLBL IMPERATIVE_IO interface with event-valued operations. + *) + +signature CML_IMPERATIVE_IO = + sig + + (* include IMPERATIVE_IO *) + + type vector + type elem + + type instream + type outstream + + val input : instream -> vector + val input1 : instream -> elem option + val inputN : (instream * int) -> vector + val inputAll : instream -> vector + val canInput : (instream * int) -> int option + val lookahead : instream -> elem option + val closeIn : instream -> unit + val endOfStream : instream -> bool + + val output : (outstream * vector) -> unit + val output1 : (outstream * elem) -> unit + val flushOut : outstream -> unit + val closeOut : outstream -> unit + + structure StreamIO : CML_STREAM_IO + sharing type vector = StreamIO.vector + sharing type elem = StreamIO.elem + +(* + val getPosIn : instream -> StreamIO.in_pos + val setPosIn : (instream * StreamIO.in_pos) -> unit +*) + val mkInstream : StreamIO.instream -> instream + val getInstream : instream -> StreamIO.instream + val setInstream : (instream * StreamIO.instream) -> unit + + val getPosOut : outstream -> StreamIO.out_pos + val setPosOut : (outstream * StreamIO.out_pos) -> unit + val mkOutstream : StreamIO.outstream -> outstream + val getOutstream : outstream -> StreamIO.outstream + val setOutstream : (outstream * StreamIO.outstream) -> unit + + val input1Evt : instream -> elem option CML.event + val inputNEvt : (instream * int) -> vector CML.event + val inputEvt : instream -> vector CML.event + val inputAllEvt : instream -> vector CML.event + + end; diff --git a/cml/src/IO/new-cml-text-io-sig.sml b/cml/src/IO/new-cml-text-io-sig.sml new file mode 100644 index 0000000..bd78898 --- /dev/null +++ b/cml/src/IO/new-cml-text-io-sig.sml @@ -0,0 +1,80 @@ +(* cml-text-io-sig.sml + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This extends the SMLBL TEXT_IO interface with event-valued operations. + *) + +signature CML_TEXT_IO = + sig + (* include TEXT_IO *) + type vector = string + type elem = char + + type instream + type outstream + + val input : instream -> vector + val input1 : instream -> elem option + val inputN : (instream * int) -> vector + val inputAll : instream -> vector + val canInput : (instream * int) -> int option + val lookahead : instream -> elem option + val closeIn : instream -> unit + val endOfStream : instream -> bool + + val output : (outstream * vector) -> unit + val output1 : (outstream * elem) -> unit + val flushOut : outstream -> unit + val closeOut : outstream -> unit + + structure StreamIO : CML_TEXT_STREAM_IO + where type reader = TextPrimIO.reader + where type writer = TextPrimIO.writer + where type pos = TextPrimIO.pos + where type vector = string + where type elem = char + +(* + val getPosIn : instream -> StreamIO.in_pos + val setPosIn : (instream * StreamIO.in_pos) -> unit +*) + val mkInstream : StreamIO.instream -> instream + val getInstream : instream -> StreamIO.instream + val setInstream : (instream * StreamIO.instream) -> unit + + val getPosOut : outstream -> StreamIO.out_pos + val setPosOut : (outstream * StreamIO.out_pos) -> unit + val mkOutstream : StreamIO.outstream -> outstream + val getOutstream : outstream -> StreamIO.outstream + val setOutstream : (outstream * StreamIO.outstream) -> unit + + val inputLine : instream -> string option + val outputSubstr : (outstream * substring) -> unit + + val openIn : string -> instream + val openString : string -> instream + val openOut : string -> outstream + val openAppend : string -> outstream + + val stdIn : instream + val stdOut : outstream + val stdErr : outstream + + val input1Evt : instream -> elem option CML.event + val inputNEvt : (instream * int) -> vector CML.event + val inputEvt : instream -> vector CML.event + val inputAllEvt : instream -> vector CML.event + + val openChanIn : string CML.chan -> instream + val openChanOut : string CML.chan -> outstream + + val print : string -> unit + + val scanStream : + ((elem, StreamIO.instream) StringCvt.reader + -> ('a, StreamIO.instream) StringCvt.reader + ) -> instream -> 'a option + + end; diff --git a/cml/src/IO/new-text-io-fn.sml b/cml/src/IO/new-text-io-fn.sml new file mode 100644 index 0000000..a0c828e --- /dev/null +++ b/cml/src/IO/new-text-io-fn.sml @@ -0,0 +1,1063 @@ +(* text-io-fn.sml + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This is the CML version of the TextIO functor. + *) + +functor TextIOFn ( + + structure OSPrimIO : sig + include OS_PRIM_IO + val stdIn : unit -> PrimIO.reader + val stdOut : unit -> PrimIO.writer + val stdErr : unit -> PrimIO.writer + val strReader : string -> PrimIO.reader + end + where type PrimIO.array = TextPrimIO.array + where type PrimIO.vector = TextPrimIO.vector + where type PrimIO.array_slice = TextPrimIO.array_slice + where type PrimIO.vector_slice = TextPrimIO.vector_slice + where type PrimIO.elem = TextPrimIO.elem + where type PrimIO.pos = TextPrimIO.pos + where type PrimIO.reader = TextPrimIO.reader + where type PrimIO.writer = TextPrimIO.writer + + ) : CML_TEXT_IO = struct + + structure PIO = OSPrimIO.PrimIO + structure A = CharArray + structure AS = CharArraySlice + structure V = CharVector + structure VS = CharVectorSlice + + structure SV = SyncVar + + (* assign to an MVar *) + fun mUpdate (mv, x) = ignore(SV.mSwap(mv, x)) + + (* an element for initializing buffers *) + val someElem = #"\000" + + val vecExtract = VS.vector o VS.slice + val vecSub = V.sub + val arrUpdate = A.update + val substringBase = Substring.base + val empty = "" + + fun dummyCleaner () = () + + structure StreamIO = + struct + type vector = V.vector + type elem = V.elem + type reader = PIO.reader + type writer = PIO.writer + type pos = PIO.pos + + (* maximum size of an input request *) + val maxInputSz = Position.fromInt V.maxLen + + (*** Functional input streams ***) + datatype instream = ISTRM of (in_buffer * int) + and in_buffer = IBUF of { + basePos : pos option, + more : more SV.mvar, (* when this cell is empty, it means that *) + (* there is an outstanding request to the *) + (* server to extend the stream. *) + data : vector, + info : info + } + and more + = MORE of in_buffer (* forward link to additional data *) + | NOMORE (* placeholder for forward link *) + | TERMINATED (* termination of the stream *) + + and info = INFO of { + reader : reader, + readVec : int -> vector, + readVecEvt : int -> vector CML.event, + closed : bool ref, + getPos : unit -> pos option, + tail : more SV.mvar SV.mvar, + (* points to the more cell of the last buffer *) + cleanTag : CleanIO.tag + } + + fun infoOfIBuf (IBUF{info, ...}) = info + fun chunkSzOfIBuf buf = let + val INFO{reader=PIO.RD{chunkSize, ...}, ...} = infoOfIBuf buf + in + chunkSize + end + fun readVec (IBUF{info=INFO{readVec=f, ...}, ...}) = f + + fun inputExn (INFO{reader=PIO.RD{name, ...}, ...}, mlOp, exn) = + raise IO.Io{function=mlOp, name=name, cause=exn} + + datatype more_data = EOF | DATA of in_buffer + + (* extend the stream by a chunk. + * Invariant: the more m-variable is empty on entry and full on exit. + *) + fun extendStream (readFn, mlOp, buf as IBUF{more, info, ...}) = (let + val INFO{getPos, tail, ...} = info + val basePos = getPos() + val chunk = readFn (chunkSzOfIBuf buf) + in + if (V.length chunk = 0) + then (SV.mPut (more, NOMORE); EOF) + else let + val newMore = SV.mVar() + val buf' = IBUF{ + basePos = basePos, data = chunk, + more = newMore, info = info + } + in + (* note that we do not fill the more cell until + * after the tail has been updated. This ensures + * that someone attempting to access the tail will + * not acquire the lock until after we are done. + *) + mUpdate (tail, newMore); + SV.mPut (more, MORE buf'); (* releases lock!! *) + SV.mPut (newMore, NOMORE); + DATA buf' + end + end + handle ex => ( + SV.mPut (more, NOMORE); + inputExn(info, mlOp, ex))) + + (* get the next buffer in the stream, extending it if necessary. If + * the stream must be extended, we lock it by taking the value from the + * more cell; the extendStream function is responsible for filling in + * the cell. + *) + fun getBuffer (readFn, mlOp) (buf as IBUF{more, info, ...}) = let + fun get TERMINATED = EOF + | get (MORE buf') = DATA buf' + | get NOMORE = (case SV.mTake more + of NOMORE => extendStream (readFn, mlOp, buf) + | next => (SV.mPut(more, next); get next) + (* end case *)) + in + get (SV.mGet more) + end + + (* read a chunk that is at least the specified size *) + fun readChunk buf = let + val INFO{readVec, reader=PIO.RD{chunkSize, ...}, ...} = + infoOfIBuf buf + in + case (chunkSize - 1) + of 0 => (fn n => readVec n) + | k => (* round up to next multiple of chunkSize *) + (fn n => readVec(Int.quot(n+k, chunkSize) * chunkSize)) + (* end case *) + end + + fun generalizedInput getBuf = let + fun get (ISTRM(buf as IBUF{data, ...}, pos)) = let + val len = V.length data + in + if (pos < len) + then (vecExtract(data, pos, NONE), ISTRM(buf, len)) + else (case (getBuf buf) + of EOF => (empty, ISTRM(buf, len)) + | (DATA rest) => get (ISTRM(rest, 0)) + (* end case *)) + end + in + get + end + + (* terminate an input stream *) + fun terminate (info as INFO{tail, cleanTag, ...}) = let + val m = SV.mGet tail + in + case SV.mTake m + of (m' as MORE _) => (SV.mPut(m, m'); terminate info) + | TERMINATED => SV.mPut(m, TERMINATED) + | _ => ( + CleanIO.removeCleaner cleanTag; + SV.mPut(m, TERMINATED)) + (* end case *) + end + + (* find the end of the stream *) + fun findEOS (buf as IBUF{more, data, ...}) = (case (SV.mGet more) + of (MORE buf) => findEOS buf + | _ => ISTRM(buf, V.length data) + (* end case *)) + + fun input (strm as ISTRM(buf, _)) = + generalizedInput (getBuffer (readVec buf, "input")) strm + fun input1 (ISTRM(buf, pos)) = let + val IBUF{data, more, ...} = buf + in + if (pos < V.length data) + then SOME(vecSub(data, pos), ISTRM(buf, pos+1)) + else let + fun get (MORE buf) = input1 (ISTRM(buf, 0)) + | get TERMINATED = NONE + | get NOMORE = (case SV.mTake more + of NOMORE => ( + case extendStream (readVec buf, "input1", buf) + of EOF => NONE + | (DATA rest) => input1 (ISTRM(rest, 0)) + (* end case *)) + | next => (SV.mPut(more, next); get next) + (* end case *)) + in + get (SV.mGet more) + end + end + fun inputN (ISTRM(buf, pos), n) = let + fun join (item, (list, strm)) = (item::list, strm) + fun inputList (buf as IBUF{data, ...}, i, n) = let + val len = V.length data + val remain = len-i + in + if (remain >= n) + then ([vecExtract(data, i, SOME n)], ISTRM(buf, i+n)) + else join ( + vecExtract(data, i, NONE), + nextBuf(buf, n-remain)) + end + and nextBuf (buf as IBUF{more, data, ...}, n) = let + fun get (MORE buf) = inputList (buf, 0, n) + | get TERMINATED = ([], ISTRM(buf, V.length data)) + | get NOMORE = (case (SV.mTake more) + of NOMORE => (case extendStream (readVec buf, "inputN", buf) + of EOF => ([], ISTRM(buf, V.length data)) + | (DATA rest) => inputList (rest, 0, n) + (* end case *)) + | next => (SV.mPut(more, next); get next) + (* end case *)) + in + get (SV.mGet more) + end + val (data, strm) = inputList (buf, pos, n) + in + (V.concat data, strm) + end + + fun inputAll (strm as ISTRM(buf, _)) = let + val INFO{reader=PIO.RD{avail, ...}, ...} = infoOfIBuf buf + (* read a chunk that is as large as the available input. Note + * that for systems that use CR-LF for #"\n", the size will be + * too large, but this should be okay. + *) + fun bigChunk _ = let + val delta = (case avail() + of NONE => chunkSzOfIBuf buf + | (SOME n) => if (n > maxInputSz) + then raise Size + else Position.toInt n + (* end case *)) + in + readChunk buf delta + end + val bigInput = + generalizedInput (getBuffer (bigChunk, "inputAll")) + fun loop (v, strm) = + if (V.length v = 0) then [] else v :: loop(bigInput strm) + val data = V.concat (loop (bigInput strm)) + in + (data, findEOS buf) + end + + (* Return SOME k, if k <= amount characters can be read without blocking. *) + fun canInput (strm as ISTRM(buf, pos), amount) = let +(****** + val readVecNB = (case buf + of (IBUF{info as INFO{readVecNB=NONE, ...}, ...}) => + inputExn(info, "canInput", IO.NonblockingNotSupported) + | (IBUF{info=INFO{readVecNB=SOME f, ...}, ...}) => f + (* end case *)) +******) + fun tryInput (buf as IBUF{data, ...}, i, n) = let + val len = V.length data + val remain = len - i + in + if (remain >= n) + then SOME n + else nextBuf (buf, n - remain) + end + and nextBuf (IBUF{more, ...}, n) = let + fun get (MORE buf) = tryInput (buf, 0, n) + | get TERMINATED = SOME(amount - n) +(****** + | get NOMORE = (case SV.mTake more + of NOMORE => (( + case extendStream (readVecNB, "canInput", buf) + of EOF => SOME(amount - n) + | (DATA b) => tryInput (b, 0, n) + (* end case *)) + handle IO.Io{cause=WouldBlock, ...} => SOME(amount - n)) + | next => (SV.mPut(more, next); get next) + (* end case *)) +******) + | get NOMORE = SOME(amount - n) + in + get (SV.mGet more) + end + in + if (amount < 0) + then raise Size + else tryInput (buf, pos, amount) + end + (* close an input stream given its info structure; we need this function + * for the cleanup hook to avoid a space leak. + *) + fun closeInInfo (INFO{closed=ref true, ...}) = () + | closeInInfo (info as INFO{closed, reader=PIO.RD{close, ...}, ...}) = ( +(*** We need some kind of lock on the input stream to do this safely!!! ***) + terminate info; + closed := true; + close() handle ex => inputExn(info, "closeIn", ex)) + fun closeIn (ISTRM(buf, _)) = closeInInfo (infoOfIBuf buf) + fun endOfStream (ISTRM(buf as IBUF{more, ...}, pos)) = ( + case SV.mTake more + of (next as MORE _) => (SV.mPut(more, next); false) + | next => let + val IBUF{data, info=INFO{closed, ...}, ...} = buf + in + if (pos = V.length data) + then (case (next, !closed) + of (NOMORE, false) => ( + case extendStream (readVec buf, "endOfStream", buf) + of EOF => true + | _ => false + (* end case *)) + | _ => (SV.mPut(more, next); true) + (* end case *)) + else (SV.mPut(more, next); false) + end + (* end case *)) + fun mkInstream' (reader, data) = let + val PIO.RD{readVec, readVecEvt, getPos, setPos, ...} = reader + val getPos = (case (getPos, setPos) + of (SOME f, SOME _) => (fn () => SOME(f())) + | _ => (fn () => NONE) + (* end case *)) + val more = SV.mVarInit NOMORE + val closedFlg = ref false + val tag = CleanIO.addCleaner dummyCleaner + val info = INFO{ + reader=reader, readVec=readVec, readVecEvt=readVecEvt, + closed = closedFlg, getPos = getPos, + tail = SV.mVarInit more, cleanTag = tag + } +(** What should we do about the position in this case ?? **) +(** Suggestion: When building a stream with supplied initial data, + ** nothing can be said about the positions inside that initial + ** data (who knows where that data even came from!). + **) + val basePos = if (V.length data = 0) then getPos () else NONE + val buf = IBUF { + basePos = basePos, data = data, + info = info, more = more + } + val strm = ISTRM(buf, 0) + in + (tag, strm) + end + fun mkInstream arg = let + val (tag, strm as ISTRM(IBUF{info, ...}, _)) = mkInstream' arg + in + CleanIO.rebindCleaner (tag, fn () => closeInInfo info); + strm + end + fun getReader (ISTRM(buf, pos)) = let + val IBUF{data, info as INFO{reader, ...}, more, ...} = buf + fun getData more = (case SV.mGet more + of (MORE(IBUF{data, more=more', ...})) => data :: getData more' + | _ => [] + (* end case *)) + in + terminate info; + if (pos < V.length data) + then ( + reader, + V.concat(vecExtract(data, pos, NONE) :: getData more) + ) + else (reader, V.concat(getData more)) + end + +(* + (** Position operations on instreams **) + datatype in_pos = INP of { + base : pos, + offset : int, + info : info + } +*) + +(* + fun getPosIn (ISTRM(buf, pos)) = (case buf + of IBUF{basePos=NONE, info, ...} => + inputExn (info, "getPosIn", IO.RandomAccessNotSupported) + | IBUF{basePos=SOME p, info, ...} => INP{ + base = p, offset = pos, info = info + } + (* end case *)) +*) +(* + fun filePosIn (INP{base, offset, ...}) = + Position.+(base, Position.fromInt offset) +*) + (* Get the underlying file position of a stream *) + fun filePosIn (ISTRM(buf, pos)) = (case buf + of IBUF{basePos=NONE, info, ...} => + inputExn (info, "filePosIn", IO.RandomAccessNotSupported) + | IBUF{basePos=SOME base, info, ...} => let + val INFO{reader=PIO.RD rd, readVec, ...} = info + in + case (#getPos rd, #setPos rd) + of (SOME getPos, SOME setPos) => let + val tmpPos = getPos() + fun readN 0 = () + | readN n = (case V.length(readVec n) + of 0 => inputExn ( + info, "filePosIn", Fail "bogus position") + | k => readN(n-k) + (* end case *)) + in + setPos base; + readN pos; + getPos () before setPos tmpPos + end + | _ => raise Fail "filePosIn: impossible" + (* end case *) + end + (* end case *)) +(* + fun setPosIn (pos as INP{info as INFO{reader, ...}, ...}) = let + val fpos = filePosIn pos + val (PIO.RD rd) = reader + in + terminate info; + valOf (#setPos rd) fpos; + mkInstream (PIO.RD rd, NONE) + end +*) + + (** Text stream specific operations **) + fun inputLine (ISTRM(buf as IBUF{data, ...}, pos)) = let + fun join (item, (list, strm)) = (item::list, strm) + fun nextBuf (isEmpty, buf as IBUF{more, data, ...}) = let + fun last () = + (if isEmpty then [] else ["\n"], ISTRM(buf, V.length data)) + fun get (MORE buf) = scanData (buf, 0) + | get NOMORE = (case (SV.mTake more) + of NOMORE => ( + case extendStream (readVec buf, "inputLine", buf) + of EOF => last () + | (DATA rest) => scanData (rest, 0) + (* end case *)) + | next => (SV.mPut(more, next); get next) + (* end case *)) + | get TERMINATED = last() + in + get (SV.mGet more) + end + and scanData (buf as IBUF{data, ...}, i) = let + val len = V.length data + fun scan j = if (j = len) + then join(vecExtract(data, i, NONE), nextBuf(false, buf)) + else if (vecSub(data, j) = #"\n") + then ([vecExtract(data, i, SOME(j+1-i))], ISTRM(buf, j+1)) + else scan (j+1) + in + scan i + end + val (data, strm) = if (V.length data = pos) + then nextBuf (true, buf) + else scanData (buf, pos) + val res_v = V.concat data + in + if V.length res_v = 0 then NONE else SOME (res_v, strm) + end + + (* IO event constructors: + * We exploit the "functional" nature of stream IO to implement the event + * constructors. These constructors spawn a thread to do the operation + * and and write the result in an iVar that serves as the synchronization + * value. + * NOTE: this implementation has the weakness that it prevents shutdown when + * everything else is deadlocked, since the thread that is spawned to actually + * do the IO could proceed. + *) + local + datatype 'a result = RES of 'a | EXN of exn + fun doInput inputOp = let + fun input arg = RES(inputOp arg) handle ex => EXN ex + in + fn arg => CML.guard (fn () => let + val resV = SV.iVar() + in + CML.spawn (fn () => SV.iPut(resV, input arg)); + CML.wrap(SV.iGetEvt resV, + fn (RES x) => x | (EXN ex) => raise ex) + end) + end + in + val input1Evt = doInput input1 + val inputEvt = doInput input + val inputNEvt = doInput inputN + val inputAllEvt = doInput inputAll + val inputLineEvt = doInput inputLine + end (* local *) + + + (*** Output streams ***) + + (* an output stream is implemented as a monitor using an mvar to + * hold its data. + *) + datatype ostrm_info = OSTRM of { + buf : A.array, + pos : int ref, + closed : bool ref, + bufferMode : IO.buffer_mode ref, + writer : writer, + writeArr : AS.slice -> unit, + writeVec : VS.slice -> unit, + cleanTag : CleanIO.tag + } + + type outstream = ostrm_info SV.mvar + + fun isNL #"\n" = true + | isNL _ = false + + fun isLineBreak (OSTRM{bufferMode, ...}) = + if (!bufferMode = IO.LINE_BUF) then isNL else (fn _ => false) + + fun outputExn (OSTRM{writer=PIO.WR{name, ...}, ...}, mlOp, exn) = + raise IO.Io{function=mlOp, name=name, cause=exn} + + (* lock access to the stream and make sure that it is not closed. *) + fun lockAndChkClosedOut (strmMV, mlOp) = (case SV.mTake strmMV + of (strm as OSTRM({closed=ref true, ...})) => ( + SV.mPut (strmMV, strm); + outputExn (strm, mlOp, IO.ClosedStream)) + | strm => strm + (* end case *)) + + fun flushBuffer (strmMV, strm as OSTRM{buf, pos, writeArr, ...}, mlOp) = ( + case !pos + of 0 => () + | n => (( + writeArr (AS.slice (buf, 0, SOME n)); pos := 0) + handle ex => ( + SV.mPut(strmMV, strm); outputExn (strm, mlOp, ex))) + (* end case *)) + + (* A version of copyVec that checks for newlines, while it is copying. + * This is used for LINE_BUF output of strings and substrings. + *) + fun lineBufCopyVec (src, srcI, srcLen, dst, dstI) = let + val stop = srcI+srcLen + fun cpy (srcI, dstI, lb) = + if (srcI < stop) + then let val c = vecSub(src, srcI) + in + arrUpdate (dst, dstI, c); + cpy (srcI+1, dstI+1, lb orelse isNL c) + end + else lb + in + cpy (srcI, dstI, false) + end + + (* a version of copyVec for BLOCK_BUF output of strings and substrings. *) + fun blockBufCopyVec (src, srcI, srcLen, dst, dstI) = ( + AS.copyVec { + src = VS.slice (src, srcI, SOME srcLen), + dst = dst, di = dstI }; + false) + + fun output (strmMV, v) = let + val (strm as OSTRM os) = lockAndChkClosedOut (strmMV, "output") + fun release () = SV.mPut (strmMV, strm) + val {buf, pos, bufferMode, ...} = os + fun flush () = flushBuffer (strmMV, strm, "output") + fun flushAll () = (#writeArr os (AS.full buf) + handle ex => (release(); outputExn (strm, "output", ex))) + fun writeDirect () = ( + case !pos + of 0 => () + | n => (#writeArr os (AS.slice (buf, 0, SOME n)); pos := 0) + (* end case *); + #writeVec os (VS.full v)) + handle ex => (release(); outputExn (strm, "output", ex)) + fun insert copyVec = let + val bufLen = A.length buf + val dataLen = V.length v + in + if (dataLen >= bufLen) + then writeDirect() + else let + val i = !pos + val avail = bufLen - i + in + if (avail < dataLen) + then let + val _ = + AS.copyVec + { src = VS.slice (v, 0, SOME avail), + dst = buf, di = i } + val _ = flushAll() + val needsFlush = copyVec(v, avail, dataLen-avail, buf, 0) + in + pos := dataLen-avail; + if needsFlush then flush () else () + end + else let + val needsFlush = copyVec(v, 0, dataLen, buf, i) + in + pos := i + dataLen; + if (needsFlush orelse (avail = dataLen)) + then flush() + else () + end + end + end + in + case !bufferMode + of IO.NO_BUF => writeDirect () + | IO.LINE_BUF => insert lineBufCopyVec + | IO.BLOCK_BUF => insert blockBufCopyVec + (* end case *); + release() + end + + fun output1 (strmMV, elem) = let + val (strm as OSTRM{buf, pos, bufferMode, writeArr, ...}) = + lockAndChkClosedOut (strmMV, "output1") + fun release () = SV.mPut (strmMV, strm) + in + case !bufferMode + of IO.NO_BUF => ( + arrUpdate (buf, 0, elem); + writeArr (AS.slice (buf, 0, SOME 1)) + handle ex => (release(); outputExn (strm, "output1", ex))) + | IO.LINE_BUF => let val i = !pos val i' = i+1 + in + arrUpdate (buf, i, elem); pos := i'; + if ((i' = A.length buf) orelse (isNL elem)) + then flushBuffer (strmMV, strm, "output1") + else () + end + | IO.BLOCK_BUF => let val i = !pos val i' = i+1 + in + arrUpdate (buf, i, elem); pos := i'; + if (i' = A.length buf) + then flushBuffer (strmMV, strm, "output1") + else () + end + (* end case *); + release() + end + + fun flushOut strmMV = let + val strm = lockAndChkClosedOut (strmMV, "flushOut") + in + flushBuffer (strmMV, strm, "flushOut"); + SV.mPut (strmMV, strm) + end + + fun closeOut strmMV = let + val (strm as OSTRM{writer=PIO.WR{close, ...}, closed, cleanTag, ...}) = + SV.mTake strmMV + in + if !closed + then () + else ( + flushBuffer (strmMV, strm, "closeOut"); + closed := true; + CleanIO.removeCleaner cleanTag; + close()); + SV.mPut (strmMV, strm) + end + + fun mkOutstream' (wr as PIO.WR{chunkSize, writeArr, writeVec, ...}, mode) = + let fun iterate (f, size, subslice) = let + fun lp sl = + if size sl = 0 then () + else let val n = f sl + in + lp (subslice (sl, n, NONE)) + end + in + lp + end + val writeArr' = iterate (writeArr, AS.length, AS.subslice) + val writeVec' = iterate (writeVec, VS. length, VS.subslice) + (* install a dummy cleaner *) + val tag = CleanIO.addCleaner dummyCleaner + val strm = SV.mVarInit (OSTRM{ + buf = A.array(chunkSize, someElem), + pos = ref 0, + closed = ref false, + bufferMode = ref mode, + writer = wr, + writeArr = writeArr', + writeVec = writeVec', + cleanTag = tag + }) + in + (tag, strm) + end + fun mkOutstream arg = let + val (tag, strm) = mkOutstream' arg + in + CleanIO.rebindCleaner (tag, fn () => closeOut strm); + strm + end + + fun getWriter strmMV = let + val (strm as OSTRM{writer, bufferMode, ...}) = + lockAndChkClosedOut (strmMV, "getWriter") + in + (writer, !bufferMode) before SV.mPut(strmMV, strm) + end + + (** Position operations on outstreams **) + datatype out_pos = OUTP of { + pos : PIO.pos, + strm : outstream + } + + fun getPosOut strmMV = let + val (strm as OSTRM{writer, ...}) = + lockAndChkClosedOut (strmMV, "getWriter") + fun release () = SV.mPut(strmMV, strm) + in + flushBuffer (strmMV, strm, "getPosOut"); + case writer + of PIO.WR{getPos=SOME f, ...} => ( + OUTP{pos = f(), strm = strmMV} + handle ex => (release(); outputExn(strm, "getPosOut", ex))) + | _ => ( + release(); + outputExn(strm, "getPosOut", IO.RandomAccessNotSupported)) + (* end case *) + before release() + end + fun filePosOut (OUTP{pos, strm=strmMV}) = ( + SV.mPut (strmMV, lockAndChkClosedOut (strmMV, "filePosOut")); + pos) + fun setPosOut (OUTP{pos, strm=strmMV}) = let + val (strm as OSTRM{writer, ...}) = + lockAndChkClosedOut (strmMV, "setPosOut") + fun release () = SV.mPut(strmMV, strm) + in + case writer + of PIO.WR{setPos=SOME f, ...} => ( + (f pos) + handle ex => (release(); outputExn(strm, "setPosOut", ex))) + | _ => ( + release(); + outputExn(strm, "getPosOut", IO.RandomAccessNotSupported)) + (* end case *); + release() + end + + fun setBufferMode (strmMV, mode) = let + val (strm as OSTRM{bufferMode, ...}) = + lockAndChkClosedOut (strmMV, "setBufferMode") + in + if (mode = IO.NO_BUF) + then flushBuffer (strmMV, strm, "setBufferMode") + else (); + bufferMode := mode; + SV.mPut (strmMV, strm) + end + fun getBufferMode strmMV = let +(** should we be checking for closed streams here??? **) + val (strm as OSTRM{bufferMode, ...}) = + lockAndChkClosedOut (strmMV, "getBufferMode") + in + !bufferMode before SV.mPut (strmMV, strm) + end + + (** Text stream specific operations **) + fun outputSubstr (strmMV, ss) = let + val (strm as OSTRM os) = lockAndChkClosedOut (strmMV, "outputSubstr") + fun release () = SV.mPut (strmMV, strm) + val (v, dataStart, dataLen) = substringBase ss + val {buf, pos, bufferMode, ...} = os + val bufLen = A.length buf + fun flush () = flushBuffer (strmMV, strm, "outputSubstr") + fun flushAll () = (#writeArr os (AS.full buf) + handle ex => (release(); outputExn (strm, "outputSubstr", ex))) + fun writeDirect () = ( + case !pos + of 0 => () + | n => (#writeArr os (AS.slice (buf, 0, SOME n)); pos := 0) + (* end case *); + #writeVec os (VS.slice (v, dataStart, SOME dataLen))) + handle ex => (release(); outputExn (strm, "outputSubstr", ex)) + fun insert copyVec = let + val bufLen = A.length buf + val dataLen = V.length v + in + if (dataLen >= bufLen) + then writeDirect() + else let + val i = !pos + val avail = bufLen - i + in + if (avail < dataLen) + then let + val _ = + AS.copyVec { src = VS.slice (v, dataStart, + SOME avail), + dst = buf, di = i } + val _ = flushAll() + val needsFlush = copyVec(v, avail, dataLen-avail, buf, 0) + in + pos := dataLen-avail; + if needsFlush then flush () else () + end + else let + val needsFlush = copyVec(v, dataStart, dataLen, buf, i) + in + pos := i + dataLen; + if (needsFlush orelse (avail = dataLen)) + then flush() + else () + end + end + end + in + case !bufferMode + of IO.NO_BUF => writeDirect () + | IO.LINE_BUF => insert lineBufCopyVec + | IO.BLOCK_BUF => insert blockBufCopyVec + (* end case *); + release() + end + + end (* StreamIO *) + + type vector = V.vector + type elem = V.elem + type instream = StreamIO.instream SV.mvar + type outstream = StreamIO.outstream SV.mvar + + (** Input operations **) + fun input strm = let val (v, strm') = StreamIO.input(SV.mTake strm) + in + SV.mPut (strm, strm'); v + end + fun input1 strm = (case StreamIO.input1(SV.mTake strm) + of NONE => NONE + | (SOME(elem, strm')) => (SV.mPut (strm, strm'); SOME elem) + (* end case *)) + fun inputN (strm, n) = let val (v, strm') = StreamIO.inputN (SV.mTake strm, n) + in + SV.mPut (strm, strm'); v + end + fun inputAll (strm : instream) = let + val (v, strm') = StreamIO.inputAll(SV.mTake strm) + in + SV.mPut (strm, strm'); v + end + + (* event-value constructors *) + local + datatype 'a result = RES of 'a | EXN of exn + fun sendEvt (ch, v) = CML.sendEvt(ch, RES v) + fun sendExnEvt (ch, exn) = CML.sendEvt(ch, EXN exn) + fun recvEvt ch = + CML.wrap(CML.recvEvt ch, fn (RES v) => v | (EXN exn) => raise exn) + fun doInput inputEvt (strm : instream) nack = let + val replyCh = CML.channel() + fun inputThread () = let + val strm' = SV.mTake strm + val nackEvt = CML.wrap(nack, fn _ => SV.mPut(strm, strm')) + fun handleInput (result, strm'') = CML.select [ + CML.wrap (sendEvt(replyCh, result), + fn _ => SV.mPut(strm, strm'')), + nackEvt + ] + in + (CML.select [ + CML.wrap (inputEvt strm', handleInput), + nackEvt + ]) handle exn => CML.select [ + CML.wrap (sendExnEvt(replyCh, exn), + fn _ => SV.mPut(strm, strm')), + nackEvt + ] + end + in + ignore (CML.spawn inputThread); + recvEvt replyCh + end + in + fun input1Evt (strm : instream) = let + fun inputEvt (strm : StreamIO.instream) = CML.wrap ( + StreamIO.input1Evt strm, + fn NONE => (NONE, strm) | SOME(s, strm') => (SOME s, strm')) + in + CML.withNack (doInput inputEvt strm) + end + fun inputEvt strm = CML.withNack (doInput StreamIO.inputEvt strm) + fun inputNEvt (strm, n) = + CML.withNack (doInput (fn strm' => StreamIO.inputNEvt(strm', n)) strm) + fun inputAllEvt Strm = CML.withNack (doInput StreamIO.inputAllEvt Strm) + end (* local *) + + fun canInput (strm, n) = StreamIO.canInput (SV.mGet strm, n) + fun lookahead (strm : instream) = (case StreamIO.input1(SV.mGet strm) + of NONE => NONE + | (SOME(elem, _)) => SOME elem + (* end case *)) + fun closeIn strm = let + val (s as StreamIO.ISTRM(buf as StreamIO.IBUF{data, ...}, _)) = + SV.mTake strm + in + StreamIO.closeIn s; + SV.mPut(strm, StreamIO.findEOS buf) + end + fun endOfStream strm = StreamIO.endOfStream(SV.mGet strm) +(* + fun getPosIn strm = StreamIO.getPosIn(SV.mGet strm) + fun setPosIn (strm, p) = mUpdate(strm, StreamIO.setPosIn p) +*) + + (** Output operations **) + fun output (strm, v) = StreamIO.output(SV.mGet strm, v) + fun output1 (strm, c) = StreamIO.output1(SV.mGet strm, c) + fun flushOut strm = StreamIO.flushOut(SV.mGet strm) + fun closeOut strm = StreamIO.closeOut(SV.mGet strm) + fun getPosOut strm = StreamIO.getPosOut(SV.mGet strm) + fun setPosOut (strm, p as StreamIO.OUTP{strm=strm', ...}) = ( + mUpdate(strm, strm'); StreamIO.setPosOut p) + + fun mkInstream (strm : StreamIO.instream) = SV.mVarInit strm + fun getInstream (strm : instream) = SV.mGet strm + fun setInstream (strm : instream, strm') = mUpdate(strm, strm') + + fun mkOutstream (strm : StreamIO.outstream) = SV.mVarInit strm + fun getOutstream (strm : outstream) = SV.mGet strm + fun setOutstream (strm : outstream, strm') = mUpdate(strm, strm') + + (* figure out the proper buffering mode for a given writer *) + fun bufferMode (PIO.WR{ioDesc=NONE, ...}) = IO.BLOCK_BUF + | bufferMode (PIO.WR{ioDesc=SOME iod, ...}) = + if (OS.IO.kind iod = OS.IO.Kind.tty) then IO.LINE_BUF else IO.BLOCK_BUF + + (** Open files **) + fun openIn fname = + mkInstream(StreamIO.mkInstream(OSPrimIO.openRd fname, empty)) + handle ex => raise IO.Io{function="openIn", name=fname, cause=ex} + fun openOut fname = let + val wr = OSPrimIO.openWr fname + in + mkOutstream(StreamIO.mkOutstream(wr, bufferMode wr)) + handle ex => raise IO.Io{function="openOut", name=fname, cause=ex} + end + fun openAppend fname = + mkOutstream(StreamIO.mkOutstream(OSPrimIO.openApp fname, IO.NO_BUF)) + handle ex => raise IO.Io{function="openAppend", name=fname, cause=ex} + + (** Text stream specific operations **) + fun inputLine strm = + Option.map (fn (s, strm') => (SV.mPut (strm, strm'); s)) + (StreamIO.inputLine (SV.mTake strm)) + fun outputSubstr (strm, ss) = StreamIO.outputSubstr (SV.mGet strm, ss) + fun openString src = + mkInstream(StreamIO.mkInstream(OSPrimIO.strReader src, empty)) + handle ex => raise IO.Io{function="openIn", name="", cause=ex} + + structure ChanIO = ChanIOFn( + structure PrimIO = PIO + structure V = CharVector + structure A = CharArray + structure VS = CharVectorSlice + structure AS = CharArraySlice) + + (* open an instream that is connected to the output port of a channel. *) + fun openChanIn ch = + mkInstream(StreamIO.mkInstream(ChanIO.mkReader ch, empty)) + + (* open an outstream that is connected to the input port of a channel. *) + fun openChanOut ch = + mkOutstream(StreamIO.mkOutstream(ChanIO.mkWriter ch, IO.NO_BUF)) + + (** Standard streams **) + local + structure SIO = StreamIO + fun mkStdIn rebind = let + val (tag, strm) = SIO.mkInstream'(OSPrimIO.stdIn(), empty) + in + if rebind + then CleanIO.rebindCleaner (tag, dummyCleaner) + else (); + strm + end + fun mkStdOut rebind = let + val wr = OSPrimIO.stdOut() + val (tag, strm) = SIO.mkOutstream'(wr, bufferMode wr) + in + if rebind + then CleanIO.rebindCleaner (tag, fn () => SIO.flushOut strm) + else (); + strm + end + fun mkStdErr rebind = let + val (tag, strm) = SIO.mkOutstream'(OSPrimIO.stdErr(), IO.NO_BUF) + in + if rebind + then CleanIO.rebindCleaner (tag, fn () => SIO.flushOut strm) + else (); + strm + end + in + (* build the standard streams. Since we are not currently running CML, we + * cannot do the cleaner rebinding here, but that is okay, since these are + * just place holders. + *) + val stdIn = mkInstream(mkStdIn false) + val stdOut = mkOutstream(mkStdOut false) + val stdErr = mkOutstream(mkStdErr false) + + fun print s = let val strm' = SV.mTake stdOut + in + StreamIO.output (strm', s); StreamIO.flushOut strm'; + SV.mPut(stdOut, strm') + end + + fun scanStream scanFn = let + val scan = scanFn StreamIO.input1 + fun doit strm = let + val instrm = getInstream strm + in + case scan instrm + of NONE => NONE + | SOME(item, instrm') => ( + setInstream(strm, instrm'); + SOME item) + (* end case *) + end + in + doit + end + + (* Establish a hook function to rebuild the I/O stack *) + val _ = CleanIO.stdStrmHook := (fn () => ( + setInstream (stdIn, mkStdIn true); + setOutstream (stdOut, mkStdOut true); + setOutstream (stdErr, mkStdErr true); + SMLofNJ.Internals.prHook := print)) + end (* local *) + + end (* TextIOFn *) diff --git a/cml/src/IO/os-prim-io-sig.sml b/cml/src/IO/os-prim-io-sig.sml new file mode 100644 index 0000000..6b91ce0 --- /dev/null +++ b/cml/src/IO/os-prim-io-sig.sml @@ -0,0 +1,35 @@ +(* os-prim-io-sig.sml + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This is an interface to a PrimIO structure augmented with OS specific + * functions to create readers and writers. + * + * This file was copied from the SML/NJ sources. + *) + +signature OS_PRIM_IO = + sig + structure PrimIO : PRIM_IO + + type file_desc + + val openRd : string -> PrimIO.reader + val openWr : string -> PrimIO.writer + val openApp : string -> PrimIO.writer + + val mkReader : { + fd : file_desc, + name : string + } -> PrimIO.reader + + val mkWriter: { + fd : file_desc, + name : string, + appendMode : bool, + chunkSize : int + } -> PrimIO.writer + + end + diff --git a/cml/src/IO/prim-io-fn.sml b/cml/src/IO/prim-io-fn.sml new file mode 100644 index 0000000..128d88b --- /dev/null +++ b/cml/src/IO/prim-io-fn.sml @@ -0,0 +1,69 @@ +(* prim-io-fn.sml + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +functor PrimIO ( + + structure Vector : MONO_VECTOR + structure Array : MONO_ARRAY + structure VectorSlice : MONO_VECTOR_SLICE + structure ArraySlice : MONO_ARRAY_SLICE + sharing type Vector.vector = Array.vector = + VectorSlice.vector = ArraySlice.vector + sharing type Vector.elem = Array.elem = + VectorSlice.elem = ArraySlice.elem + sharing type ArraySlice.vector_slice = VectorSlice.slice + val someElem : Vector.elem + eqtype pos + val compare : (pos * pos) -> order + + ) : PRIM_IO = struct + + type 'a event = 'a CML.event + + structure A = Array + structure V = Vector + + type elem = A.elem + type vector = V.vector + type array = A.array + type array_slice = ArraySlice.slice + type vector_slice = VectorSlice.slice + type pos = pos + + val compare = compare + + datatype reader = RD of { + name : string, + chunkSize : int, + readVec : int -> vector, + readArr : array_slice -> int, + readVecEvt : int -> vector event, + readArrEvt : array_slice -> int event, + avail : unit -> Position.int option, + getPos : (unit -> pos) option, + setPos : (pos -> unit) option, + endPos : (unit -> pos) option, + verifyPos : (unit -> pos) option, + close : unit -> unit, + ioDesc : OS.IO.iodesc option + } + + datatype writer = WR of { + name : string, + chunkSize : int, + writeVec : vector_slice -> int, + writeArr : array_slice -> int, + writeVecEvt : vector_slice -> int event, + writeArrEvt : array_slice -> int event, + getPos : (unit -> pos) option, + setPos : (pos -> unit) option, + endPos : (unit -> pos) option, + verifyPos : (unit -> pos) option, + close : unit -> unit, + ioDesc : OS.IO.iodesc option + } + + end (* PrimIO *) diff --git a/cml/src/IO/prim-io-sig.sml b/cml/src/IO/prim-io-sig.sml new file mode 100644 index 0000000..eedfa7d --- /dev/null +++ b/cml/src/IO/prim-io-sig.sml @@ -0,0 +1,57 @@ +(* prim-io-sig.sml + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This is the CML equivalent of the SMLBL's PRIM_IO signature. The + * differences are that we use event-valued interfaces instead of + * non-blocking operations, and that the operations are not optional. + *) + +signature PRIM_IO = + sig + + type 'a event = 'a CML.event + + type array + type vector + type elem + type vector_slice + type array_slice + eqtype pos + + val compare : (pos * pos) -> order + + datatype reader = RD of { + name : string, + chunkSize : int, + readVec : int -> vector, + readArr : array_slice -> int, + readVecEvt : int -> vector event, + readArrEvt : array_slice -> int event, + avail : unit -> Position.int option, + getPos : (unit -> pos) option, + setPos : (pos -> unit) option, + endPos : (unit -> pos) option, + verifyPos : (unit -> pos) option, + close : unit -> unit, + ioDesc : OS.IO.iodesc option + } + + datatype writer = WR of { + name : string, + chunkSize : int, + writeVec : vector_slice -> int, + writeArr : array_slice -> int, + writeVecEvt : vector_slice -> int event, + writeArrEvt : array_slice -> int event, + getPos : (unit -> pos) option, + setPos : (pos -> unit) option, + endPos : (unit -> pos) option, + verifyPos : (unit -> pos) option, + close : unit -> unit, + ioDesc : OS.IO.iodesc option + } + + end; + diff --git a/cml/src/IO/sources.cm b/cml/src/IO/sources.cm new file mode 100644 index 0000000..522ba14 --- /dev/null +++ b/cml/src/IO/sources.cm @@ -0,0 +1,53 @@ +(* sources.cm + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +Group + signature CML_BIN_IO + signature CML_IMPERATIVE_IO + signature CML_STREAM_IO + signature CML_TEXT_IO + signature CML_TEXT_STREAM_IO + signature OS_PRIM_IO + signature PRIM_IO + structure BinPrimIO + structure CleanIO + structure TextPrimIO + functor BinIOFn + functor ChanIOFn + functor PrimIO + functor TextIOFn +is +#if defined (NEW_CM) + $/basis.cm + $cml/core-cml.cm +#else + ../core-cml.cm +#endif + + prim-io-sig.sml + cml-stream-io-sig.sml + cml-text-stream-io-sig.sml +#if (SMLNJ_VERSION * 100 + SMLNJ_MINOR_VERSION >= 11033) + new-cml-imperative-io-sig.sml + new-bin-io-fn.sml + new-cml-text-io-sig.sml + new-text-io-fn.sml +#else + cml-imperative-io-sig.sml + bin-io-fn.sml + cml-text-io-sig.sml + text-io-fn.sml +#endif + + cml-bin-io-sig.sml + + clean-io.sml + prim-io-fn.sml + os-prim-io-sig.sml + bin-prim-io.sml + text-prim-io.sml + chan-io-fn.sml + diff --git a/cml/src/IO/text-io-fn.sml b/cml/src/IO/text-io-fn.sml new file mode 100644 index 0000000..977d9d5 --- /dev/null +++ b/cml/src/IO/text-io-fn.sml @@ -0,0 +1,1007 @@ +(* text-io-fn.sml + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This is the CML version of the TextIO functor. + *) + +functor TextIOFn ( + + structure OSPrimIO : sig + include OS_PRIM_IO + val stdIn : unit -> PrimIO.reader + val stdOut : unit -> PrimIO.writer + val stdErr : unit -> PrimIO.writer + val strReader : string -> PrimIO.reader + end + where type PrimIO.array = TextPrimIO.array + where type PrimIO.vector = TextPrimIO.vector + where type PrimIO.elem = TextPrimIO.elem + where type PrimIO.pos = TextPrimIO.pos + where type PrimIO.reader = TextPrimIO.reader + where type PrimIO.writer = TextPrimIO.writer + + ) : CML_TEXT_IO = struct + + structure PIO = OSPrimIO.PrimIO + structure A = CharArray + structure V = CharVector + + structure SV = SyncVar + + (* assign to an MVar *) + fun mUpdate (mv, x) = ignore(SV.mSwap(mv, x)) + + (* an element for initializing buffers *) + val someElem = #"\000" + + val vecExtract = V.extract + val vecSub = V.sub + val arrUpdate = A.update + val substringBase = Substring.base + val empty = "" + + fun dummyCleaner () = () + + structure StreamIO = + struct + type vector = V.vector + type elem = V.elem + type reader = PIO.reader + type writer = PIO.writer + type pos = PIO.pos + + (*** Functional input streams ***) + datatype instream = ISTRM of (in_buffer * int) + and in_buffer = IBUF of { + basePos : pos option, + more : more SV.mvar, (* when this cell is empty, it means that *) + (* there is an outstanding request to the *) + (* server to extend the stream. *) + data : vector, + info : info + } + and more + = MORE of in_buffer (* forward link to additional data *) + | NOMORE (* placeholder for forward link *) + | TERMINATED (* termination of the stream *) + + and info = INFO of { + reader : reader, + readVec : int -> vector, + readVecEvt : int -> vector CML.event, + closed : bool ref, + getPos : unit -> pos option, + tail : more SV.mvar SV.mvar, + (* points to the more cell of the last buffer *) + cleanTag : CleanIO.tag + } + + fun infoOfIBuf (IBUF{info, ...}) = info + fun chunkSzOfIBuf buf = let + val INFO{reader=PIO.RD{chunkSize, ...}, ...} = infoOfIBuf buf + in + chunkSize + end + fun readVec (IBUF{info=INFO{readVec=f, ...}, ...}) = f + + fun inputExn (INFO{reader=PIO.RD{name, ...}, ...}, mlOp, exn) = + raise IO.Io{function=mlOp, name=name, cause=exn} + + datatype more_data = EOF | DATA of in_buffer + + (* extend the stream by a chunk. + * Invariant: the more m-variable is empty on entry and full on exit. + *) + fun extendStream (readFn, mlOp, buf as IBUF{more, info, ...}) = (let + val INFO{getPos, tail, ...} = info + val basePos = getPos() + val chunk = readFn (chunkSzOfIBuf buf) + in + if (V.length chunk = 0) + then (SV.mPut (more, NOMORE); EOF) + else let + val newMore = SV.mVar() + val buf' = IBUF{ + basePos = basePos, data = chunk, + more = newMore, info = info + } + in + (* note that we do not fill the more cell until + * after the tail has been updated. This ensures + * that someone attempting to access the tail will + * not acquire the lock until after we are done. + *) + mUpdate (tail, newMore); + SV.mPut (more, MORE buf'); (* releases lock!! *) + SV.mPut (newMore, NOMORE); + DATA buf' + end + end + handle ex => ( + SV.mPut (more, NOMORE); + inputExn(info, mlOp, ex))) + + (* get the next buffer in the stream, extending it if necessary. If + * the stream must be extended, we lock it by taking the value from the + * more cell; the extendStream function is responsible for filling in + * the cell. + *) + fun getBuffer (readFn, mlOp) (buf as IBUF{more, info, ...}) = let + fun get TERMINATED = EOF + | get (MORE buf') = DATA buf' + | get NOMORE = (case SV.mTake more + of NOMORE => extendStream (readFn, mlOp, buf) + | next => (SV.mPut(more, next); get next) + (* end case *)) + in + get (SV.mGet more) + end + + (* read a chunk that is at least the specified size *) + fun readChunk buf = let + val INFO{readVec, reader=PIO.RD{chunkSize, ...}, ...} = + infoOfIBuf buf + in + case (chunkSize - 1) + of 0 => (fn n => readVec n) + | k => (* round up to next multiple of chunkSize *) + (fn n => readVec(Int.quot(n+k, chunkSize) * chunkSize)) + (* end case *) + end + + fun generalizedInput getBuf = let + fun get (ISTRM(buf as IBUF{data, ...}, pos)) = let + val len = V.length data + in + if (pos < len) + then (vecExtract(data, pos, NONE), ISTRM(buf, len)) + else (case (getBuf buf) + of EOF => (empty, ISTRM(buf, len)) + | (DATA rest) => get (ISTRM(rest, 0)) + (* end case *)) + end + in + get + end + + (* terminate an input stream *) + fun terminate (info as INFO{tail, cleanTag, ...}) = let + val m = SV.mGet tail + in + case SV.mTake m + of (m' as MORE _) => (SV.mPut(m, m'); terminate info) + | TERMINATED => SV.mPut(m, TERMINATED) + | _ => ( + CleanIO.removeCleaner cleanTag; + SV.mPut(m, TERMINATED)) + (* end case *) + end + + (* find the end of the stream *) + fun findEOS (buf as IBUF{more, data, ...}) = (case (SV.mGet more) + of (MORE buf) => findEOS buf + | _ => ISTRM(buf, V.length data) + (* end case *)) + + fun input (strm as ISTRM(buf, _)) = + generalizedInput (getBuffer (readVec buf, "input")) strm + fun input1 (ISTRM(buf, pos)) = let + val IBUF{data, more, ...} = buf + in + if (pos < V.length data) + then SOME(vecSub(data, pos), ISTRM(buf, pos+1)) + else let + fun get (MORE buf) = input1 (ISTRM(buf, 0)) + | get TERMINATED = NONE + | get NOMORE = (case SV.mTake more + of NOMORE => ( + case extendStream (readVec buf, "input1", buf) + of EOF => NONE + | (DATA rest) => input1 (ISTRM(rest, 0)) + (* end case *)) + | next => (SV.mPut(more, next); get next) + (* end case *)) + in + get (SV.mGet more) + end + end + fun inputN (ISTRM(buf, pos), n) = let + fun join (item, (list, strm)) = (item::list, strm) + fun inputList (buf as IBUF{data, ...}, i, n) = let + val len = V.length data + val remain = len-i + in + if (remain >= n) + then ([vecExtract(data, i, SOME n)], ISTRM(buf, i+n)) + else join ( + vecExtract(data, i, NONE), + nextBuf(buf, n-remain)) + end + and nextBuf (buf as IBUF{more, data, ...}, n) = let + fun get (MORE buf) = inputList (buf, 0, n) + | get TERMINATED = ([], ISTRM(buf, V.length data)) + | get NOMORE = (case (SV.mTake more) + of NOMORE => (case extendStream (readVec buf, "inputN", buf) + of EOF => ([], ISTRM(buf, V.length data)) + | (DATA rest) => inputList (rest, 0, n) + (* end case *)) + | next => (SV.mPut(more, next); get next) + (* end case *)) + in + get (SV.mGet more) + end + val (data, strm) = inputList (buf, pos, n) + in + (V.concat data, strm) + end + + fun inputAll (strm as ISTRM(buf, _)) = let + val INFO{reader=PIO.RD{avail, ...}, ...} = infoOfIBuf buf + (* read a chunk that is as large as the available input. Note + * that for systems that use CR-LF for #"\n", the size will be + * too large, but this should be okay. + *) + fun bigChunk _ = let + val delta = (case avail() + of NONE => chunkSzOfIBuf buf + | (SOME n) => n + (* end case *)) + in + readChunk buf delta + end + val bigInput = + generalizedInput (getBuffer (bigChunk, "inputAll")) + fun loop (v, strm) = + if (V.length v = 0) then [] else v :: loop(bigInput strm) + val data = V.concat (loop (bigInput strm)) + in + (data, findEOS buf) + end + + fun input1Evt _ = raise Fail "input1Evt unimplemented" + fun inputEvt _ = raise Fail "inputEvt unimplemented" + fun inputNEvt _ = raise Fail "inputNEvt unimplemented" + fun inputAllEvt _ = raise Fail "inputAllEvt unimplemented" + fun inputLineEvt _ = raise Fail "inputLineEvt unimplemented" + + (* Return SOME k, if k <= amount characters can be read without blocking. *) + fun canInput (strm as ISTRM(buf, pos), amount) = let +(****** + val readVecNB = (case buf + of (IBUF{info as INFO{readVecNB=NONE, ...}, ...}) => + inputExn(info, "canInput", IO.NonblockingNotSupported) + | (IBUF{info=INFO{readVecNB=SOME f, ...}, ...}) => f + (* end case *)) +******) + fun tryInput (buf as IBUF{data, ...}, i, n) = let + val len = V.length data + val remain = len - i + in + if (remain >= n) + then SOME n + else nextBuf (buf, n - remain) + end + and nextBuf (IBUF{more, ...}, n) = let + fun get (MORE buf) = tryInput (buf, 0, n) + | get TERMINATED = SOME(amount - n) +(****** + | get NOMORE = (case SV.mTake more + of NOMORE => (( + case extendStream (readVecNB, "canInput", buf) + of EOF => SOME(amount - n) + | (DATA b) => tryInput (b, 0, n) + (* end case *)) + handle IO.Io{cause=WouldBlock, ...} => SOME(amount - n)) + | next => (SV.mPut(more, next); get next) + (* end case *)) +******) + | get NOMORE = SOME(amount - n) + in + get (SV.mGet more) + end + in + if (amount < 0) + then raise Size + else tryInput (buf, pos, amount) + end + (* close an input stream given its info structure; we need this function + * for the cleanup hook to avoid a space leak. + *) + fun closeInInfo (INFO{closed=ref true, ...}) = () + | closeInInfo (info as INFO{closed, reader=PIO.RD{close, ...}, ...}) = ( +(*** We need some kind of lock on the input stream to do this safely!!! ***) + terminate info; + closed := true; + close() handle ex => inputExn(info, "closeIn", ex)) + fun closeIn (ISTRM(buf, _)) = closeInInfo (infoOfIBuf buf) + fun endOfStream (ISTRM(buf as IBUF{more, ...}, pos)) = ( + case SV.mTake more + of (next as MORE _) => (SV.mPut(more, next); false) + | next => let + val IBUF{data, info=INFO{closed, ...}, ...} = buf + in + if (pos = V.length data) + then (case (next, !closed) + of (NOMORE, false) => ( + case extendStream (readVec buf, "endOfStream", buf) + of EOF => true + | _ => false + (* end case *)) + | _ => (SV.mPut(more, next); true) + (* end case *)) + else (SV.mPut(more, next); false) + end + (* end case *)) + fun mkInstream' (reader, optData) = let + val PIO.RD{readVec, readVecEvt, getPos, setPos, ...} = reader + val getPos = (case (getPos, setPos) + of (SOME f, SOME _) => (fn () => SOME(f())) + | _ => (fn () => NONE) + (* end case *)) + val more = SV.mVarInit NOMORE + val closedFlg = ref false + val tag = CleanIO.addCleaner dummyCleaner + val info = INFO{ + reader=reader, readVec=readVec, readVecEvt=readVecEvt, + closed = closedFlg, getPos = getPos, + tail = SV.mVarInit more, cleanTag = tag + } + val buf = (case optData + of NONE => IBUF{ + basePos = getPos(), data=empty, + info=info, more=more + } +(** What should we do about the position in this case ?? **) +(** Suggestion: When building a stream with supplied initial data, + ** nothing can be said about the positions inside that initial + ** data (who knows where that data even came from!). + **) + | (SOME v) => IBUF{ + basePos = NONE, data=v, + info=info, more=more} + (* end case *)) + val strm = ISTRM(buf, 0) + in + (tag, strm) + end + fun mkInstream arg = let + val (tag, strm as ISTRM(IBUF{info, ...}, _)) = mkInstream' arg + in + CleanIO.rebindCleaner (tag, fn () => closeInInfo info); + strm + end + fun getReader (ISTRM(buf, pos)) = let + val IBUF{data, info as INFO{reader, ...}, more, ...} = buf + fun getData more = (case SV.mGet more + of (MORE(IBUF{data, more=more', ...})) => data :: getData more' + | _ => [] + (* end case *)) + in + terminate info; + if (pos < V.length data) + then ( + reader, + V.concat(vecExtract(data, pos, NONE) :: getData more) + ) + else (reader, V.concat(getData more)) + end + + (** Position operations on instreams **) + datatype in_pos = INP of { + base : pos, + offset : int, + info : info + } + + fun getPosIn (ISTRM(buf, pos)) = (case buf + of IBUF{basePos=NONE, info, ...} => + inputExn (info, "getPosIn", IO.RandomAccessNotSupported) + | IBUF{basePos=SOME p, info, ...} => INP{ + base = p, offset = pos, info = info + } + (* end case *)) + fun filePosIn (INP{base, offset, ...}) = + Position.+(base, Position.fromInt offset) + fun setPosIn (pos as INP{info as INFO{reader, ...}, ...}) = let + val fpos = filePosIn pos + val (PIO.RD rd) = reader + in + terminate info; + valOf (#setPos rd) fpos; + mkInstream (PIO.RD rd, NONE) + end + + (** Text stream specific operations **) + fun inputLine (ISTRM(buf as IBUF{data, ...}, pos)) = let + fun join (item, (list, strm)) = (item::list, strm) + fun nextBuf (isEmpty, buf as IBUF{more, data, ...}) = let + fun last () = + (if isEmpty then [] else ["\n"], ISTRM(buf, V.length data)) + fun get (MORE buf) = scanData (buf, 0) + | get NOMORE = (case (SV.mTake more) + of NOMORE => ( + case extendStream (readVec buf, "inputLine", buf) + of EOF => last () + | (DATA rest) => scanData (rest, 0) + (* end case *)) + | next => (SV.mPut(more, next); get next) + (* end case *)) + | get TERMINATED = last() + in + get (SV.mGet more) + end + and scanData (buf as IBUF{data, ...}, i) = let + val len = V.length data + fun scan j = if (j = len) + then join(vecExtract(data, i, NONE), nextBuf(false, buf)) + else if (vecSub(data, j) = #"\n") + then ([vecExtract(data, i, SOME(j+1-i))], ISTRM(buf, j+1)) + else scan (j+1) + in + scan i + end + val (data, strm) = if (V.length data = pos) + then nextBuf (true, buf) + else scanData (buf, pos) + in + (V.concat data, strm) + end + + (*** Output streams ***) + + (* an output stream is implemented as a monitor using an mvar to + * hold its data. + *) + datatype ostrm_info = OSTRM of { + buf : A.array, + pos : int ref, + closed : bool ref, + bufferMode : IO.buffer_mode ref, + writer : writer, + writeArr : {buf : A.array, i : int, sz : int option} -> unit, + writeVec : {buf : V.vector, i : int, sz : int option} -> unit, + cleanTag : CleanIO.tag + } + + type outstream = ostrm_info SV.mvar + + fun isNL #"\n" = true + | isNL _ = false + + fun isLineBreak (OSTRM{bufferMode, ...}) = + if (!bufferMode = IO.LINE_BUF) then isNL else (fn _ => false) + + fun outputExn (OSTRM{writer=PIO.WR{name, ...}, ...}, mlOp, exn) = + raise IO.Io{function=mlOp, name=name, cause=exn} + + (* lock access to the stream and make sure that it is not closed. *) + fun lockAndChkClosedOut (strmMV, mlOp) = (case SV.mTake strmMV + of (strm as OSTRM({closed=ref true, ...})) => ( + SV.mPut (strmMV, strm); + outputExn (strm, mlOp, IO.ClosedStream)) + | strm => strm + (* end case *)) + + fun flushBuffer (strmMV, strm as OSTRM{buf, pos, writeArr, ...}, mlOp) = ( + case !pos + of 0 => () + | n => (( + writeArr {buf=buf, i=0, sz=SOME n}; pos := 0) + handle ex => ( + SV.mPut(strmMV, strm); outputExn (strm, mlOp, ex))) + (* end case *)) + + (* A version of copyVec that checks for newlines, while it is copying. + * This is used for LINE_BUF output of strings and substrings. + *) + fun lineBufCopyVec (src, srcI, srcLen, dst, dstI) = let + val stop = srcI+srcLen + fun cpy (srcI, dstI, lb) = + if (srcI < stop) + then let val c = vecSub(src, srcI) + in + arrUpdate (dst, dstI, c); + cpy (srcI+1, dstI+1, lb orelse isNL c) + end + else lb + in + cpy (srcI, dstI, false) + end + + (* a version of copyVec for BLOCK_BUF output of strings and substrings. *) + fun blockBufCopyVec (src, srcI, srcLen, dst, dstI) = ( + A.copyVec { + src = src, si = srcI, len = SOME srcLen, dst = dst, di = dstI + }; + false) + + fun output (strmMV, v) = let + val (strm as OSTRM os) = lockAndChkClosedOut (strmMV, "output") + fun release () = SV.mPut (strmMV, strm) + val {buf, pos, bufferMode, ...} = os + fun flush () = flushBuffer (strmMV, strm, "output") + fun flushAll () = (#writeArr os {buf=buf, i=0, sz=NONE} + handle ex => (release(); outputExn (strm, "output", ex))) + fun writeDirect () = ( + case !pos + of 0 => () + | n => (#writeArr os {buf=buf, i=0, sz=SOME n}; pos := 0) + (* end case *); + #writeVec os {buf=v, i=0, sz=NONE}) + handle ex => (release(); outputExn (strm, "output", ex)) + fun insert copyVec = let + val bufLen = A.length buf + val dataLen = V.length v + in + if (dataLen >= bufLen) + then writeDirect() + else let + val i = !pos + val avail = bufLen - i + in + if (avail < dataLen) + then let + val _ = A.copyVec{ + src=v, si=0, len=SOME avail, dst=buf, di=i + } + val _ = flushAll() + val needsFlush = copyVec(v, avail, dataLen-avail, buf, 0) + in + pos := dataLen-avail; + if needsFlush then flush () else () + end + else let + val needsFlush = copyVec(v, 0, dataLen, buf, i) + in + pos := i + dataLen; + if (needsFlush orelse (avail = dataLen)) + then flush() + else () + end + end + end + in + case !bufferMode + of IO.NO_BUF => writeDirect () + | IO.LINE_BUF => insert lineBufCopyVec + | IO.BLOCK_BUF => insert blockBufCopyVec + (* end case *); + release() + end + + fun output1 (strmMV, elem) = let + val (strm as OSTRM{buf, pos, bufferMode, writeArr, ...}) = + lockAndChkClosedOut (strmMV, "output1") + fun release () = SV.mPut (strmMV, strm) + in + case !bufferMode + of IO.NO_BUF => ( + arrUpdate (buf, 0, elem); + writeArr {buf=buf, i=0, sz=SOME 1} + handle ex => (release(); outputExn (strm, "output1", ex))) + | IO.LINE_BUF => let val i = !pos val i' = i+1 + in + arrUpdate (buf, i, elem); pos := i'; + if ((i' = A.length buf) orelse (isNL elem)) + then flushBuffer (strmMV, strm, "output1") + else () + end + | IO.BLOCK_BUF => let val i = !pos val i' = i+1 + in + arrUpdate (buf, i, elem); pos := i'; + if (i' = A.length buf) + then flushBuffer (strmMV, strm, "output1") + else () + end + (* end case *); + release() + end + + fun flushOut strmMV = let + val strm = lockAndChkClosedOut (strmMV, "flushOut") + in + flushBuffer (strmMV, strm, "flushOut"); + SV.mPut (strmMV, strm) + end + + fun closeOut strmMV = let + val (strm as OSTRM{writer=PIO.WR{close, ...}, closed, cleanTag, ...}) = + SV.mTake strmMV + in + if !closed + then () + else ( + flushBuffer (strmMV, strm, "closeOut"); + closed := true; + CleanIO.removeCleaner cleanTag; + close()); + SV.mPut (strmMV, strm) + end + + fun mkOutstream' (wr as PIO.WR{chunkSize, writeArr, writeVec, ...}, mode) = + let + fun iterate f (buf, i, sz) = let + fun lp (_, 0) = () + | lp (i, n) = let val n' = f{buf=buf, i=i, sz=SOME n} + in lp (i+n', n-n') end + in + lp (i, sz) + end + fun writeArr' {buf, i, sz} = let + val len = (case sz + of NONE => A.length buf - i + | (SOME n) => n + (* end case *)) + in + iterate writeArr (buf, i, len) + end + fun writeVec' {buf, i, sz} = let + val len = (case sz + of NONE => V.length buf - i + | (SOME n) => n + (* end case *)) + in + iterate writeVec (buf, i, len) + end + (* install a dummy cleaner *) + val tag = CleanIO.addCleaner dummyCleaner + val strm = SV.mVarInit (OSTRM{ + buf = A.array(chunkSize, someElem), + pos = ref 0, + closed = ref false, + bufferMode = ref mode, + writer = wr, + writeArr = writeArr', + writeVec = writeVec', + cleanTag = tag + }) + in + (tag, strm) + end + fun mkOutstream arg = let + val (tag, strm) = mkOutstream' arg + in + CleanIO.rebindCleaner (tag, fn () => closeOut strm); + strm + end + + fun getWriter strmMV = let + val (strm as OSTRM{writer, bufferMode, ...}) = + lockAndChkClosedOut (strmMV, "getWriter") + in + (writer, !bufferMode) before SV.mPut(strmMV, strm) + end + + (** Position operations on outstreams **) + datatype out_pos = OUTP of { + pos : PIO.pos, + strm : outstream + } + + fun getPosOut strmMV = let + val (strm as OSTRM{writer, ...}) = + lockAndChkClosedOut (strmMV, "getWriter") + fun release () = SV.mPut(strmMV, strm) + in + flushBuffer (strmMV, strm, "getPosOut"); + case writer + of PIO.WR{getPos=SOME f, ...} => ( + OUTP{pos = f(), strm = strmMV} + handle ex => (release(); outputExn(strm, "getPosOut", ex))) + | _ => ( + release(); + outputExn(strm, "getPosOut", IO.RandomAccessNotSupported)) + (* end case *) + before release() + end + fun filePosOut (OUTP{pos, strm=strmMV}) = ( + SV.mPut (strmMV, lockAndChkClosedOut (strmMV, "filePosOut")); + pos) + fun setPosOut (OUTP{pos, strm=strmMV}) = let + val (strm as OSTRM{writer, ...}) = + lockAndChkClosedOut (strmMV, "setPosOut") + fun release () = SV.mPut(strmMV, strm) + in + case writer + of PIO.WR{setPos=SOME f, ...} => ( + (f pos) + handle ex => (release(); outputExn(strm, "setPosOut", ex))) + | _ => ( + release(); + outputExn(strm, "getPosOut", IO.RandomAccessNotSupported)) + (* end case *); + release() + end + + fun setBufferMode (strmMV, mode) = let + val (strm as OSTRM{bufferMode, ...}) = + lockAndChkClosedOut (strmMV, "setBufferMode") + in + if (mode = IO.NO_BUF) + then flushBuffer (strmMV, strm, "setBufferMode") + else (); + bufferMode := mode; + SV.mPut (strmMV, strm) + end + fun getBufferMode strmMV = let +(** should we be checking for closed streams here??? **) + val (strm as OSTRM{bufferMode, ...}) = + lockAndChkClosedOut (strmMV, "getBufferMode") + in + !bufferMode before SV.mPut (strmMV, strm) + end + + (** Text stream specific operations **) + fun outputSubstr (strmMV, ss) = let + val (strm as OSTRM os) = lockAndChkClosedOut (strmMV, "outputSubstr") + fun release () = SV.mPut (strmMV, strm) + val (v, dataStart, dataLen) = substringBase ss + val {buf, pos, bufferMode, ...} = os + val bufLen = A.length buf + fun flush () = flushBuffer (strmMV, strm, "outputSubstr") + fun flushAll () = (#writeArr os {buf=buf, i=0, sz=NONE} + handle ex => (release(); outputExn (strm, "outputSubstr", ex))) + fun writeDirect () = ( + case !pos + of 0 => () + | n => (#writeArr os {buf=buf, i=0, sz=SOME n}; pos := 0) + (* end case *); + #writeVec os {buf=v, i=dataStart, sz=SOME dataLen}) + handle ex => (release(); outputExn (strm, "outputSubstr", ex)) + fun insert copyVec = let + val bufLen = A.length buf + val dataLen = V.length v + in + if (dataLen >= bufLen) + then writeDirect() + else let + val i = !pos + val avail = bufLen - i + in + if (avail < dataLen) + then let + val _ = A.copyVec{ + src=v, si=dataStart, len=SOME avail, + dst=buf, di=i + } + val _ = flushAll() + val needsFlush = copyVec(v, avail, dataLen-avail, buf, 0) + in + pos := dataLen-avail; + if needsFlush then flush () else () + end + else let + val needsFlush = copyVec(v, dataStart, dataLen, buf, i) + in + pos := i + dataLen; + if (needsFlush orelse (avail = dataLen)) + then flush() + else () + end + end + end + in + case !bufferMode + of IO.NO_BUF => writeDirect () + | IO.LINE_BUF => insert lineBufCopyVec + | IO.BLOCK_BUF => insert blockBufCopyVec + (* end case *); + release() + end + + end (* StreamIO *) + + type vector = V.vector + type elem = V.elem + type instream = StreamIO.instream SV.mvar + type outstream = StreamIO.outstream SV.mvar + + (** Input operations **) + fun input strm = let val (v, strm') = StreamIO.input(SV.mTake strm) + in + SV.mPut (strm, strm'); v + end + fun input1 strm = (case StreamIO.input1(SV.mTake strm) + of NONE => NONE + | (SOME(elem, strm')) => (SV.mPut (strm, strm'); SOME elem) + (* end case *)) + fun inputN (strm, n) = let val (v, strm') = StreamIO.inputN (SV.mTake strm, n) + in + SV.mPut (strm, strm'); v + end + fun inputAll (strm : instream) = let + val (v, strm') = StreamIO.inputAll(SV.mTake strm) + in + SV.mPut (strm, strm'); v + end + + (* event-value constructors *) + local + datatype 'a result = RES of 'a | EXN of exn + fun sendEvt (ch, v) = CML.sendEvt(ch, RES v) + fun sendExnEvt (ch, exn) = CML.sendEvt(ch, EXN exn) + fun recvEvt ch = + CML.wrap(CML.recvEvt ch, fn (RES v) => v | (EXN exn) => raise exn) + fun doInput inputEvt (strm : instream) nack = let + val replyCh = CML.channel() + fun inputThread () = let + val strm' = SV.mTake strm + val nackEvt = CML.wrap(nack, fn _ => SV.mPut(strm, strm')) + fun handleInput (result, strm'') = CML.select [ + CML.wrap (sendEvt(replyCh, result), + fn _ => SV.mPut(strm, strm'')), + nackEvt + ] + in + (CML.select [ + CML.wrap (inputEvt strm', handleInput), + nackEvt + ]) handle exn => CML.select [ + CML.wrap (sendExnEvt(replyCh, exn), + fn _ => SV.mPut(strm, strm')), + nackEvt + ] + end + in + ignore (CML.spawn inputThread); + recvEvt replyCh + end + in + fun input1Evt (strm : instream) = let + fun inputEvt (strm : StreamIO.instream) = CML.wrap ( + StreamIO.input1Evt strm, + fn NONE => (NONE, strm) | SOME(s, strm') => (SOME s, strm')) + in + CML.withNack (doInput inputEvt strm) + end + fun inputEvt strm = CML.withNack (doInput StreamIO.inputEvt strm) + fun inputNEvt (strm, n) = + CML.withNack (doInput (fn strm' => StreamIO.inputNEvt(strm', n)) strm) + fun inputAllEvt Strm = CML.withNack (doInput StreamIO.inputAllEvt Strm) + end (* local *) + + fun canInput (strm, n) = StreamIO.canInput (SV.mGet strm, n) + fun lookahead (strm : instream) = (case StreamIO.input1(SV.mGet strm) + of NONE => NONE + | (SOME(elem, _)) => SOME elem + (* end case *)) + fun closeIn strm = let + val (s as StreamIO.ISTRM(buf as StreamIO.IBUF{data, ...}, _)) = + SV.mTake strm + in + StreamIO.closeIn s; + SV.mPut(strm, StreamIO.findEOS buf) + end + fun endOfStream strm = StreamIO.endOfStream(SV.mGet strm) + fun getPosIn strm = StreamIO.getPosIn(SV.mGet strm) + fun setPosIn (strm, p) = mUpdate(strm, StreamIO.setPosIn p) + + (** Output operations **) + fun output (strm, v) = StreamIO.output(SV.mGet strm, v) + fun output1 (strm, c) = StreamIO.output1(SV.mGet strm, c) + fun flushOut strm = StreamIO.flushOut(SV.mGet strm) + fun closeOut strm = StreamIO.closeOut(SV.mGet strm) + fun getPosOut strm = StreamIO.getPosOut(SV.mGet strm) + fun setPosOut (strm, p as StreamIO.OUTP{strm=strm', ...}) = ( + mUpdate(strm, strm'); StreamIO.setPosOut p) + + fun mkInstream (strm : StreamIO.instream) = SV.mVarInit strm + fun getInstream (strm : instream) = SV.mGet strm + fun setInstream (strm : instream, strm') = mUpdate(strm, strm') + + fun mkOutstream (strm : StreamIO.outstream) = SV.mVarInit strm + fun getOutstream (strm : outstream) = SV.mGet strm + fun setOutstream (strm : outstream, strm') = mUpdate(strm, strm') + + (* figure out the proper buffering mode for a given writer *) + fun bufferMode (PIO.WR{ioDesc=NONE, ...}) = IO.BLOCK_BUF + | bufferMode (PIO.WR{ioDesc=SOME iod, ...}) = + if (OS.IO.kind iod = OS.IO.Kind.tty) then IO.LINE_BUF else IO.BLOCK_BUF + + (** Open files **) + fun openIn fname = + mkInstream(StreamIO.mkInstream(OSPrimIO.openRd fname, NONE)) + handle ex => raise IO.Io{function="openIn", name=fname, cause=ex} + fun openOut fname = let + val wr = OSPrimIO.openWr fname + in + mkOutstream(StreamIO.mkOutstream(wr, bufferMode wr)) + handle ex => raise IO.Io{function="openOut", name=fname, cause=ex} + end + fun openAppend fname = + mkOutstream(StreamIO.mkOutstream(OSPrimIO.openApp fname, IO.NO_BUF)) + handle ex => raise IO.Io{function="openAppend", name=fname, cause=ex} + + (** Text stream specific operations **) + fun inputLine strm = let val (s, strm') = StreamIO.inputLine (SV.mTake strm) + in + SV.mPut(strm, strm'); s + end + fun outputSubstr (strm, ss) = StreamIO.outputSubstr (SV.mGet strm, ss) + fun openString src = + mkInstream(StreamIO.mkInstream(OSPrimIO.strReader src, NONE)) + handle ex => raise IO.Io{function="openIn", name="", cause=ex} + + structure ChanIO = ChanIOFn( + structure PrimIO = PIO + structure V = CharVector + structure A = CharArray) + + (* open an instream that is connected to the output port of a channel. *) + fun openChanIn ch = + mkInstream(StreamIO.mkInstream(ChanIO.mkReader ch, NONE)) + + (* open an outstream that is connected to the input port of a channel. *) + fun openChanOut ch = + mkOutstream(StreamIO.mkOutstream(ChanIO.mkWriter ch, IO.NO_BUF)) + + (** Standard streams **) + local + structure SIO = StreamIO + fun mkStdIn rebind = let + val (tag, strm) = SIO.mkInstream'(OSPrimIO.stdIn(), NONE) + in + if rebind + then CleanIO.rebindCleaner (tag, dummyCleaner) + else (); + strm + end + fun mkStdOut rebind = let + val wr = OSPrimIO.stdOut() + val (tag, strm) = SIO.mkOutstream'(wr, bufferMode wr) + in + if rebind + then CleanIO.rebindCleaner (tag, fn () => SIO.flushOut strm) + else (); + strm + end + fun mkStdErr rebind = let + val (tag, strm) = SIO.mkOutstream'(OSPrimIO.stdErr(), IO.NO_BUF) + in + if rebind + then CleanIO.rebindCleaner (tag, fn () => SIO.flushOut strm) + else (); + strm + end + in + (* build the standard streams. Since we are not currently running CML, we + * cannot do the cleaner rebinding here, but that is okay, since these are + * just place holders. + *) + val stdIn = mkInstream(mkStdIn false) + val stdOut = mkOutstream(mkStdOut false) + val stdErr = mkOutstream(mkStdErr false) + + fun print s = let val strm' = SV.mTake stdOut + in + StreamIO.output (strm', s); StreamIO.flushOut strm'; + SV.mPut(stdOut, strm') + end + + fun scanStream scanFn = let + val scan = scanFn StreamIO.input1 + fun doit strm = let + val instrm = getInstream strm + in + case scan instrm + of NONE => NONE + | SOME(item, instrm') => ( + setInstream(strm, instrm'); + SOME item) + (* end case *) + end + in + doit + end + + (* Establish a hook function to rebuild the I/O stack *) + val _ = CleanIO.stdStrmHook := (fn () => ( + setInstream (stdIn, mkStdIn true); + setOutstream (stdOut, mkStdOut true); + setOutstream (stdErr, mkStdErr true); + SMLofNJ.Internals.prHook := print)) + end (* local *) + + end (* TextIOFn *) diff --git a/cml/src/IO/text-prim-io.sml b/cml/src/IO/text-prim-io.sml new file mode 100644 index 0000000..f9b8e84 --- /dev/null +++ b/cml/src/IO/text-prim-io.sml @@ -0,0 +1,15 @@ +(* text-prim-io.sml + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +structure TextPrimIO = PrimIO ( + structure Vector = CharVector + structure Array = CharArray + structure VectorSlice = CharVectorSlice + structure ArraySlice = CharArraySlice + val someElem = #"\000" + type pos = Position.int + val compare = Position.compare); + diff --git a/cml/src/OS/.cm/GUID/os-io-sig.sml b/cml/src/OS/.cm/GUID/os-io-sig.sml new file mode 100644 index 0000000..1561cf6 --- /dev/null +++ b/cml/src/OS/.cm/GUID/os-io-sig.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):OS/(sources.cm):os-io-sig.sml-1714016095.753 diff --git a/cml/src/OS/.cm/GUID/os-process-sig.sml b/cml/src/OS/.cm/GUID/os-process-sig.sml new file mode 100644 index 0000000..63e3087 --- /dev/null +++ b/cml/src/OS/.cm/GUID/os-process-sig.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):OS/(sources.cm):os-process-sig.sml-1714016095.758 diff --git a/cml/src/OS/.cm/GUID/os-sig.sml b/cml/src/OS/.cm/GUID/os-sig.sml new file mode 100644 index 0000000..194e8ea --- /dev/null +++ b/cml/src/OS/.cm/GUID/os-sig.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):OS/(sources.cm):os-sig.sml-1714016095.762 diff --git a/cml/src/OS/.cm/GUID/os-signatures.sml b/cml/src/OS/.cm/GUID/os-signatures.sml new file mode 100644 index 0000000..67a176f --- /dev/null +++ b/cml/src/OS/.cm/GUID/os-signatures.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):OS/(sources.cm):os-signatures.sml-1714016095.765 diff --git a/cml/src/OS/.cm/SKEL/os-io-sig.sml b/cml/src/OS/.cm/SKEL/os-io-sig.sml new file mode 100644 index 0000000..4245711 --- /dev/null +++ b/cml/src/OS/.cm/SKEL/os-io-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f2d"Time"d"Event"ac"CML_OS_IO"h1ad"Kind"h0 \ No newline at end of file diff --git a/cml/src/OS/.cm/SKEL/os-process-sig.sml b/cml/src/OS/.cm/SKEL/os-process-sig.sml new file mode 100644 index 0000000..7d75226 --- /dev/null +++ b/cml/src/OS/.cm/SKEL/os-process-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f2d"Time"d"Event"ac"CML_OS_PROCESS"h0 \ No newline at end of file diff --git a/cml/src/OS/.cm/SKEL/os-sig.sml b/cml/src/OS/.cm/SKEL/os-sig.sml new file mode 100644 index 0000000..7b1e70b --- /dev/null +++ b/cml/src/OS/.cm/SKEL/os-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ac"CML_OS"h4ad"FileSys"gp1c"OS_FILE_SYS"ad"Path"gp1c"OS_PATH"ad"Process"gp1c"CML_OS_PROCESS"ad"IO"gp1c"CML_OS_IO" \ No newline at end of file diff --git a/cml/src/OS/.cm/SKEL/os-signatures.sml b/cml/src/OS/.cm/SKEL/os-signatures.sml new file mode 100644 index 0000000..47b6b33 --- /dev/null +++ b/cml/src/OS/.cm/SKEL/os-signatures.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d3ac"OS_IO"gp1c"CML_OS_IO"ac"OS_PROCESS"gp1c"CML_OS_PROCESS"ac"OS"gp1c"CML_OS" \ No newline at end of file diff --git a/cml/src/OS/.cm/amd64-unix/os-io-sig.sml b/cml/src/OS/.cm/amd64-unix/os-io-sig.sml new file mode 100644 index 0000000..3f6b0a5 Binary files /dev/null and b/cml/src/OS/.cm/amd64-unix/os-io-sig.sml differ diff --git a/cml/src/OS/.cm/amd64-unix/os-process-sig.sml b/cml/src/OS/.cm/amd64-unix/os-process-sig.sml new file mode 100644 index 0000000..6e6f80f Binary files /dev/null and b/cml/src/OS/.cm/amd64-unix/os-process-sig.sml differ diff --git a/cml/src/OS/.cm/amd64-unix/os-sig.sml b/cml/src/OS/.cm/amd64-unix/os-sig.sml new file mode 100644 index 0000000..48da6ac Binary files /dev/null and b/cml/src/OS/.cm/amd64-unix/os-sig.sml differ diff --git a/cml/src/OS/.cm/amd64-unix/os-signatures.sml b/cml/src/OS/.cm/amd64-unix/os-signatures.sml new file mode 100644 index 0000000..33d9442 Binary files /dev/null and b/cml/src/OS/.cm/amd64-unix/os-signatures.sml differ diff --git a/cml/src/OS/os-io-sig.sml b/cml/src/OS/os-io-sig.sml new file mode 100644 index 0000000..32ad653 --- /dev/null +++ b/cml/src/OS/os-io-sig.sml @@ -0,0 +1,74 @@ +(* os-io-sig.sml + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * + * The CML version of the generic low-level I/O interface. + *) + +signature CML_OS_IO = + sig + eqtype iodesc + (* an iodesc is an abstract descriptor for an OS object that + * supports I/O (e.g., file, tty device, socket, ...). + *) + eqtype iodesc_kind + + val hash : iodesc -> word + (* return a hash value for the I/O descriptor. *) + + val compare : (iodesc * iodesc) -> order + (* compare two I/O descriptors *) + + val kind : iodesc -> iodesc_kind + (* return the kind of I/O descriptor. *) + + structure Kind : sig + val file : iodesc_kind + val dir : iodesc_kind + val symlink : iodesc_kind + val tty : iodesc_kind + val pipe : iodesc_kind + val socket : iodesc_kind + val device : iodesc_kind + end + + type poll_desc + (* this is an abstract representation of a polling operation on + * an I/O descriptor. + *) + type poll_info + (* this is an abstract representation of the per-descriptor + * information returned by the poll operation. + *) + + val pollDesc : iodesc -> poll_desc option + (* create a polling operation on the given descriptor; note that + * not all I/O devices support polling. + *) + val pollToIODesc : poll_desc -> iodesc + (* return the I/O descriptor that is being polled *) + + exception Poll + + (* set polling events; if the polling operation is not appropriate + * for the underlying I/O device, then the Poll exception is raised. + *) + val pollIn : poll_desc -> poll_desc + val pollOut : poll_desc -> poll_desc + val pollPri : poll_desc -> poll_desc + + (* polling functions *) + val poll : (poll_desc list * Time.time option) -> poll_info list + (* a timeout of NONE means wait indefinitely; a timeout of + * (SOME Time.zeroTime) means do not block. + *) + val pollEvt : poll_desc list -> poll_info list Event.event + + (* check for conditions *) + val isIn : poll_info -> bool + val isOut : poll_info -> bool + val isPri : poll_info -> bool + val infoToPollDesc : poll_info -> poll_desc + + end (* OS_IO *) + diff --git a/cml/src/OS/os-process-sig.sml b/cml/src/OS/os-process-sig.sml new file mode 100644 index 0000000..f1b8432 --- /dev/null +++ b/cml/src/OS/os-process-sig.sml @@ -0,0 +1,29 @@ +(* os-process-sig.sml + * + * COPYRIGHT (c) 2008 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * The CML version of the generic process control interface. + *) + +signature CML_OS_PROCESS = + sig + + type status + + val success : status + val failure : status + val isSuccess : status -> bool + + val system : string -> status + val systemEvt : string -> status Event.event + + val atExit : (unit -> unit) -> unit + + val exit : status -> 'a + val terminate : status -> 'a + + val getEnv : string -> string option + val sleep : Time.time -> unit + + end diff --git a/cml/src/OS/os-sig.sml b/cml/src/OS/os-sig.sml new file mode 100644 index 0000000..087ef3b --- /dev/null +++ b/cml/src/OS/os-sig.sml @@ -0,0 +1,21 @@ +(* os-sig.sml + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + *) + +signature CML_OS = + sig + type syserror + + val errorName : syserror -> string + val errorMsg : syserror -> string + + exception SysErr of (string * syserror option) + + structure FileSys : OS_FILE_SYS + structure Path : OS_PATH + structure Process : CML_OS_PROCESS + structure IO : CML_OS_IO + + end; + diff --git a/cml/src/OS/os-signatures.sml b/cml/src/OS/os-signatures.sml new file mode 100644 index 0000000..59223d7 --- /dev/null +++ b/cml/src/OS/os-signatures.sml @@ -0,0 +1,11 @@ +(* os-signatures.sml + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * + * Bind the CML versions of the OS signatures. + *) + +signature OS_IO = CML_OS_IO; +signature OS_PROCESS = CML_OS_PROCESS; +signature OS = CML_OS; + diff --git a/cml/src/OS/sources.cm b/cml/src/OS/sources.cm new file mode 100644 index 0000000..8c6034e --- /dev/null +++ b/cml/src/OS/sources.cm @@ -0,0 +1,22 @@ +(* sources.cm + * + * These are the generic OS interfaces that are modified for CML. + *) + +Group + signature OS_IO + signature OS_PROCESS + signature OS +is +#if defined (NEW_CM) + $/basis.cm + $cml/core-cml.cm +#else + ../core-cml.cm +#endif + + os-io-sig.sml + os-process-sig.sml + os-sig.sml + os-signatures.sml + diff --git a/cml/src/README b/cml/src/README new file mode 100644 index 0000000..3b753d3 --- /dev/null +++ b/cml/src/README @@ -0,0 +1,18 @@ +Here is a road map of the CML sources: + + src/core-cml this contains the implementation of the CML features + such as threads, channels, events, etc. + + src/glue this contains code to glue the various CML parts together. + + src/OS this contains a CML interface to the SML Basis OS API. + + src/IO this contains a CML interface to the SML Basis IO API. + + src/Posix this contains a CML interface to the Posix 1003.1a + binding defined in the SML Basis. [not yet implemented] + + src/Sockets this contains a CML interface to the SML Basis sockets API. + + src/Unix this contains a CML implementation of various Unix + specific modules --- both internal and external. diff --git a/cml/src/Sockets/.cm/GUID/cml-generic-sock.sml b/cml/src/Sockets/.cm/GUID/cml-generic-sock.sml new file mode 100644 index 0000000..9ce742f --- /dev/null +++ b/cml/src/Sockets/.cm/GUID/cml-generic-sock.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):Sockets/(sources.cm):(layer3.cm):cml-generic-sock.sml-1714016096.981 diff --git a/cml/src/Sockets/.cm/GUID/cml-inet-sock.sml b/cml/src/Sockets/.cm/GUID/cml-inet-sock.sml new file mode 100644 index 0000000..2d461be --- /dev/null +++ b/cml/src/Sockets/.cm/GUID/cml-inet-sock.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):Sockets/(sources.cm):(layer3.cm):cml-inet-sock.sml-1714016096.999 diff --git a/cml/src/Sockets/.cm/GUID/cml-socket-sig.sml b/cml/src/Sockets/.cm/GUID/cml-socket-sig.sml new file mode 100644 index 0000000..e8e6653 --- /dev/null +++ b/cml/src/Sockets/.cm/GUID/cml-socket-sig.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):Sockets/(sources.cm):(layer3.cm):(layer2.cm):(layer1.cm):cml-socket-sig.sml-1714016096.684 diff --git a/cml/src/Sockets/.cm/GUID/cml-socket.sml b/cml/src/Sockets/.cm/GUID/cml-socket.sml new file mode 100644 index 0000000..87ad71e --- /dev/null +++ b/cml/src/Sockets/.cm/GUID/cml-socket.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):Sockets/(sources.cm):(layer3.cm):(layer2.cm):(layer1.cm):cml-socket.sml-1714016096.693 diff --git a/cml/src/Sockets/.cm/GUID/cml-unix-sock.sml b/cml/src/Sockets/.cm/GUID/cml-unix-sock.sml new file mode 100644 index 0000000..9fc69d1 --- /dev/null +++ b/cml/src/Sockets/.cm/GUID/cml-unix-sock.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):Sockets/(sources.cm):(layer3.cm):cml-unix-sock.sml-1714016097.023 diff --git a/cml/src/Sockets/.cm/GUID/generic-sock-sig.sml b/cml/src/Sockets/.cm/GUID/generic-sock-sig.sml new file mode 100644 index 0000000..1f938c7 --- /dev/null +++ b/cml/src/Sockets/.cm/GUID/generic-sock-sig.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):Sockets/(sources.cm):(layer3.cm):(layer2.cm):generic-sock-sig.sml-1714016096.976 diff --git a/cml/src/Sockets/.cm/GUID/inet-sock-sig.sml b/cml/src/Sockets/.cm/GUID/inet-sock-sig.sml new file mode 100644 index 0000000..c321885 --- /dev/null +++ b/cml/src/Sockets/.cm/GUID/inet-sock-sig.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):Sockets/(sources.cm):(layer3.cm):(layer2.cm):inet-sock-sig.sml-1714016096.994 diff --git a/cml/src/Sockets/.cm/GUID/pre-sock.sml b/cml/src/Sockets/.cm/GUID/pre-sock.sml new file mode 100644 index 0000000..5b3076a --- /dev/null +++ b/cml/src/Sockets/.cm/GUID/pre-sock.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):Sockets/(sources.cm):(layer3.cm):(layer2.cm):(layer1.cm):pre-sock.sml-1714016096.669 diff --git a/cml/src/Sockets/.cm/GUID/rebind1.sml b/cml/src/Sockets/.cm/GUID/rebind1.sml new file mode 100644 index 0000000..b9875f6 --- /dev/null +++ b/cml/src/Sockets/.cm/GUID/rebind1.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):Sockets/(sources.cm):(layer3.cm):(layer2.cm):rebind1.sml-1714016096.897 diff --git a/cml/src/Sockets/.cm/GUID/rebind2-unix-sock.sml b/cml/src/Sockets/.cm/GUID/rebind2-unix-sock.sml new file mode 100644 index 0000000..c5d5ca0 --- /dev/null +++ b/cml/src/Sockets/.cm/GUID/rebind2-unix-sock.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):Sockets/(sources.cm):rebind2-unix-sock.sml-1714016097.034 diff --git a/cml/src/Sockets/.cm/GUID/rebind2.sml b/cml/src/Sockets/.cm/GUID/rebind2.sml new file mode 100644 index 0000000..750c9d2 --- /dev/null +++ b/cml/src/Sockets/.cm/GUID/rebind2.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):Sockets/(sources.cm):rebind2.sml-1714016097.011 diff --git a/cml/src/Sockets/.cm/GUID/unix-sock-sig.sml b/cml/src/Sockets/.cm/GUID/unix-sock-sig.sml new file mode 100644 index 0000000..5a6ff62 --- /dev/null +++ b/cml/src/Sockets/.cm/GUID/unix-sock-sig.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):Sockets/(sources.cm):(layer3.cm):(layer2.cm):unix-sock-sig.sml-1714016097.018 diff --git a/cml/src/Sockets/.cm/SKEL/cml-generic-sock.sml b/cml/src/Sockets/.cm/SKEL/cml-generic-sock.sml new file mode 100644 index 0000000..0a38505 --- /dev/null +++ b/cml/src/Sockets/.cm/SKEL/cml-generic-sock.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f2d"GenericSock"d"PreSock"ad"CML_GenericSock"jh0gp1c"GENERIC_SOCK" \ No newline at end of file diff --git a/cml/src/Sockets/.cm/SKEL/cml-inet-sock.sml b/cml/src/Sockets/.cm/SKEL/cml-inet-sock.sml new file mode 100644 index 0000000..c5f3579 --- /dev/null +++ b/cml/src/Sockets/.cm/SKEL/cml-inet-sock.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f4d"CML_GenericSock"d"PreSock"d"INetSock"d"Socket"ad"CML_INetSock"jh2ad"UDP"0ad"TCP"gp1c"INET_SOCK" \ No newline at end of file diff --git a/cml/src/Sockets/.cm/SKEL/cml-socket-sig.sml b/cml/src/Sockets/.cm/SKEL/cml-socket-sig.sml new file mode 100644 index 0000000..694d16c --- /dev/null +++ b/cml/src/Sockets/.cm/SKEL/cml-socket-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1CML"ac"CML_SOCKET"h2egp1c"SYNCHRONOUS_SOCKET"f3d"Word8ArraySlice"d"Word8Vector" \ No newline at end of file diff --git a/cml/src/Sockets/.cm/SKEL/cml-socket.sml b/cml/src/Sockets/.cm/SKEL/cml-socket.sml new file mode 100644 index 0000000..3b466c3 --- /dev/null +++ b/cml/src/Sockets/.cm/SKEL/cml-socket.sml @@ -0,0 +1,3 @@ +Skeleton 5 +d2f4CML"PreSock"d"SyncVar"Socket"ad"CML_Socket"jh7aPS"gp1 +aAF"gp2CaSOCK"gp2ad"Ctl"0aS'"jgp1)egp1,f39Ngp1c"CML_SOCKET" \ No newline at end of file diff --git a/cml/src/Sockets/.cm/SKEL/cml-unix-sock.sml b/cml/src/Sockets/.cm/SKEL/cml-unix-sock.sml new file mode 100644 index 0000000..9fa582b --- /dev/null +++ b/cml/src/Sockets/.cm/SKEL/cml-unix-sock.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f3d"CML_GenericSock"d"UnixSock"Socket"ad"CML_UnixSock"jh3aSOCK"gp2!>ad"Strm"0ad"DGrm"gp1c"UNIX_SOCK" \ No newline at end of file diff --git a/cml/src/Sockets/.cm/SKEL/generic-sock-sig.sml b/cml/src/Sockets/.cm/SKEL/generic-sock-sig.sml new file mode 100644 index 0000000..998d8c1 --- /dev/null +++ b/cml/src/Sockets/.cm/SKEL/generic-sock-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"Socket"ac"GENERIC_SOCK"h0 \ No newline at end of file diff --git a/cml/src/Sockets/.cm/SKEL/inet-sock-sig.sml b/cml/src/Sockets/.cm/SKEL/inet-sock-sig.sml new file mode 100644 index 0000000..6daf4f1 --- /dev/null +++ b/cml/src/Sockets/.cm/SKEL/inet-sock-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f2d"NetHostDB"d"Socket"ac"INET_SOCK"h2ad"UDP"0ad"TCP"/ \ No newline at end of file diff --git a/cml/src/Sockets/.cm/SKEL/pre-sock.sml b/cml/src/Sockets/.cm/SKEL/pre-sock.sml new file mode 100644 index 0000000..bed0888 --- /dev/null +++ b/cml/src/Sockets/.cm/SKEL/pre-sock.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f5Cd"IOManager"d"OS"d"CML"d"SyncVar"d"Socket"Nad"PreSock"j0< \ No newline at end of file diff --git a/cml/src/Sockets/.cm/SKEL/rebind1.sml b/cml/src/Sockets/.cm/SKEL/rebind1.sml new file mode 100644 index 0000000..46095bd --- /dev/null +++ b/cml/src/Sockets/.cm/SKEL/rebind1.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ad"Socket"h1egp1d"CML_Socket" \ No newline at end of file diff --git a/cml/src/Sockets/.cm/SKEL/rebind2-unix-sock.sml b/cml/src/Sockets/.cm/SKEL/rebind2-unix-sock.sml new file mode 100644 index 0000000..470f951 --- /dev/null +++ b/cml/src/Sockets/.cm/SKEL/rebind2-unix-sock.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ad"UnixSock"h1egp1d"CML_UnixSock" \ No newline at end of file diff --git a/cml/src/Sockets/.cm/SKEL/rebind2.sml b/cml/src/Sockets/.cm/SKEL/rebind2.sml new file mode 100644 index 0000000..8c0252f --- /dev/null +++ b/cml/src/Sockets/.cm/SKEL/rebind2.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2ad"GenericSock"h1egp1d"CML_GenericSock"ad"INetSock"h1egp1d"CML_INetSock" \ No newline at end of file diff --git a/cml/src/Sockets/.cm/SKEL/unix-sock-sig.sml b/cml/src/Sockets/.cm/SKEL/unix-sock-sig.sml new file mode 100644 index 0000000..6ca7ffa --- /dev/null +++ b/cml/src/Sockets/.cm/SKEL/unix-sock-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"Socket"ac"UNIX_SOCK"h2ad"Strm"0ad"DGrm"$ \ No newline at end of file diff --git a/cml/src/Sockets/.cm/amd64-unix/cml-generic-sock.sml b/cml/src/Sockets/.cm/amd64-unix/cml-generic-sock.sml new file mode 100644 index 0000000..406d8f2 Binary files /dev/null and b/cml/src/Sockets/.cm/amd64-unix/cml-generic-sock.sml differ diff --git a/cml/src/Sockets/.cm/amd64-unix/cml-inet-sock.sml b/cml/src/Sockets/.cm/amd64-unix/cml-inet-sock.sml new file mode 100644 index 0000000..33e0693 Binary files /dev/null and b/cml/src/Sockets/.cm/amd64-unix/cml-inet-sock.sml differ diff --git a/cml/src/Sockets/.cm/amd64-unix/cml-socket-sig.sml b/cml/src/Sockets/.cm/amd64-unix/cml-socket-sig.sml new file mode 100644 index 0000000..4d77120 Binary files /dev/null and b/cml/src/Sockets/.cm/amd64-unix/cml-socket-sig.sml differ diff --git a/cml/src/Sockets/.cm/amd64-unix/cml-socket.sml b/cml/src/Sockets/.cm/amd64-unix/cml-socket.sml new file mode 100644 index 0000000..fcd62ef Binary files /dev/null and b/cml/src/Sockets/.cm/amd64-unix/cml-socket.sml differ diff --git a/cml/src/Sockets/.cm/amd64-unix/cml-unix-sock.sml b/cml/src/Sockets/.cm/amd64-unix/cml-unix-sock.sml new file mode 100644 index 0000000..2bc43ec Binary files /dev/null and b/cml/src/Sockets/.cm/amd64-unix/cml-unix-sock.sml differ diff --git a/cml/src/Sockets/.cm/amd64-unix/generic-sock-sig.sml b/cml/src/Sockets/.cm/amd64-unix/generic-sock-sig.sml new file mode 100644 index 0000000..102db5b Binary files /dev/null and b/cml/src/Sockets/.cm/amd64-unix/generic-sock-sig.sml differ diff --git a/cml/src/Sockets/.cm/amd64-unix/inet-sock-sig.sml b/cml/src/Sockets/.cm/amd64-unix/inet-sock-sig.sml new file mode 100644 index 0000000..db28850 Binary files /dev/null and b/cml/src/Sockets/.cm/amd64-unix/inet-sock-sig.sml differ diff --git a/cml/src/Sockets/.cm/amd64-unix/pre-sock.sml b/cml/src/Sockets/.cm/amd64-unix/pre-sock.sml new file mode 100644 index 0000000..e1ce67d Binary files /dev/null and b/cml/src/Sockets/.cm/amd64-unix/pre-sock.sml differ diff --git a/cml/src/Sockets/.cm/amd64-unix/rebind1.sml b/cml/src/Sockets/.cm/amd64-unix/rebind1.sml new file mode 100644 index 0000000..b8ecc57 Binary files /dev/null and b/cml/src/Sockets/.cm/amd64-unix/rebind1.sml differ diff --git a/cml/src/Sockets/.cm/amd64-unix/rebind2-unix-sock.sml b/cml/src/Sockets/.cm/amd64-unix/rebind2-unix-sock.sml new file mode 100644 index 0000000..e76e701 Binary files /dev/null and b/cml/src/Sockets/.cm/amd64-unix/rebind2-unix-sock.sml differ diff --git a/cml/src/Sockets/.cm/amd64-unix/rebind2.sml b/cml/src/Sockets/.cm/amd64-unix/rebind2.sml new file mode 100644 index 0000000..9005004 Binary files /dev/null and b/cml/src/Sockets/.cm/amd64-unix/rebind2.sml differ diff --git a/cml/src/Sockets/.cm/amd64-unix/unix-sock-sig.sml b/cml/src/Sockets/.cm/amd64-unix/unix-sock-sig.sml new file mode 100644 index 0000000..019c098 Binary files /dev/null and b/cml/src/Sockets/.cm/amd64-unix/unix-sock-sig.sml differ diff --git a/cml/src/Sockets/README b/cml/src/Sockets/README new file mode 100644 index 0000000..8585f6a --- /dev/null +++ b/cml/src/Sockets/README @@ -0,0 +1,11 @@ +This is the CML implementation of the Sockets API. The implementation +mostly follows that of the SML/NJ version, but with a couple of exceptions: + + 1) the systems calls ?? are executed with the timers disabled, since they + may take much longer than a time quantum and there is no non-blocking + form. + + 2) the systems calls ?? are restarted if they are interrupted. + + 3) additional event-valued operations are added. + diff --git a/cml/src/Sockets/cml-generic-sock.sml b/cml/src/Sockets/cml-generic-sock.sml new file mode 100644 index 0000000..0b5f8c1 --- /dev/null +++ b/cml/src/Sockets/cml-generic-sock.sml @@ -0,0 +1,37 @@ +(* cml-generic-sock.sml + * + * COPYRIGHT (c) 1996 AT&T Research. + *) + +structure CML_GenericSock : GENERIC_SOCK = + struct + +(* + (* returns a list of the supported address families; this should include + * at least: Socket.AF.inet. + *) + val addressFamilies = GenericSock.addressFamilies + + (* returns a list of the supported socket types; this should include at + * least: Socket.SOCK.stream and Socket.SOCK.dgram. + *) + val socketTypes = GenericSock.socketTypes +*) + + (* create sockets using default protocol *) + fun socket arg = PreSock.mkSock(GenericSock.socket arg) + fun socketPair arg = let + val (s1, s2) = GenericSock.socketPair arg + in + (PreSock.mkSock s1, PreSock.mkSock s2) + end + + (* create sockets using the specified protocol *) + fun socket' arg = PreSock.mkSock(GenericSock.socket' arg) + fun socketPair' arg = let + val (s1, s2) = GenericSock.socketPair' arg + in + (PreSock.mkSock s1, PreSock.mkSock s2) + end + + end diff --git a/cml/src/Sockets/cml-inet-sock.sml b/cml/src/Sockets/cml-inet-sock.sml new file mode 100644 index 0000000..1759ea5 --- /dev/null +++ b/cml/src/Sockets/cml-inet-sock.sml @@ -0,0 +1,43 @@ +(* cml-inet-sock.sml + * + * COPYRIGHT (c) 1996 AT&T Research. + *) + +structure CML_INetSock : INET_SOCK = + struct + type inet = INetSock.inet + + type 'a sock = (inet, 'a) Socket.sock + type 'a stream_sock = 'a Socket.stream sock + type dgram_sock = Socket.dgram sock + + type sock_addr = inet Socket.sock_addr + + val inetAF = INetSock.inetAF + + val toAddr = INetSock.toAddr + val fromAddr = INetSock.fromAddr + val any = INetSock.any + + structure UDP = + struct + fun socket () = + CML_GenericSock.socket (inetAF, Socket.SOCK.dgram) + fun socket' proto = + CML_GenericSock.socket' (inetAF, Socket.SOCK.dgram, proto) + end + + structure TCP = + struct + fun socket () = + CML_GenericSock.socket (inetAF, Socket.SOCK.stream) + fun socket' proto = + CML_GenericSock.socket' (inetAF, Socket.SOCK.stream, proto) + (* tcp control options *) + fun getNODELAY (PreSock.CMLSock{sock, ...}) = + INetSock.TCP.getNODELAY sock + fun setNODELAY (PreSock.CMLSock{sock, ...}, flg) = + INetSock.TCP.setNODELAY(sock, flg) + end + + end diff --git a/cml/src/Sockets/cml-socket-sig.sml b/cml/src/Sockets/cml-socket-sig.sml new file mode 100644 index 0000000..a5bb745 --- /dev/null +++ b/cml/src/Sockets/cml-socket-sig.sml @@ -0,0 +1,45 @@ +(* cml-socket-sig.sml + * + * COPYRIGHT (c) 1996 AT&T Research. + * + * This signature extends the SML Basis SOCKET signature with event + * constructors for the input operations and accept. + *) + +signature CML_SOCKET = + sig + type 'a event = 'a CML.event + + include SYNCHRONOUS_SOCKET (* don't drag in non-blocking ops *) + + val connectEvt : (('a, 'b) sock * 'a sock_addr) -> unit event + + val acceptEvt : + ('a, passive stream) sock + -> (('a, active stream) sock * 'a sock_addr) event + + (* Sock input event constructors *) + val recvVecEvt : + ('a, active stream) sock * int -> Word8Vector.vector CML.event + val recvArrEvt : + ('a, active stream) sock * Word8ArraySlice.slice -> int CML.event + val recvVecEvt' : + ('a, active stream) sock * int * in_flags + -> Word8Vector.vector CML.event + val recvArrEvt' : + ('a, active stream) sock * Word8ArraySlice.slice * in_flags + -> int CML.event + val recvVecFromEvt : + ('a, dgram) sock * int + -> (Word8Vector.vector * 'a sock_addr) CML.event + val recvArrFromEvt : + ('a, dgram) sock * Word8ArraySlice.slice + -> (int * 'a sock_addr) CML.event + val recvVecFromEvt' : + ('a, dgram) sock * int * in_flags + -> (Word8Vector.vector * 'a sock_addr) CML.event + val recvArrFromEvt' : + ('a, dgram) sock * Word8ArraySlice.slice * in_flags + -> (int * 'a sock_addr) CML.event + + end diff --git a/cml/src/Sockets/cml-socket.sml b/cml/src/Sockets/cml-socket.sml new file mode 100644 index 0000000..fdc9b89 --- /dev/null +++ b/cml/src/Sockets/cml-socket.sml @@ -0,0 +1,239 @@ +(* cml-socket.sml + * + * COPYRIGHT (c) 1996 AT&T Research. + *) + +structure CML_Socket : CML_SOCKET = + struct + structure PS = PreSock + + type 'a event = 'a CML.event + + (* sockets are polymorphic; the instantiation of the type variables + * provides a way to distinguish between different kinds of sockets. + *) + type ('af, 'sock) sock = ('af, 'sock) PS.sock + type 'af sock_addr = 'af Socket.sock_addr + + (* witness types for the socket parameter *) + type dgram = Socket.dgram + type 'a stream = 'a Socket.stream + type passive = Socket.passive + type active = Socket.active + + (* address families *) + structure AF = Socket.AF + + (* socket types *) + structure SOCK = Socket.SOCK + + (* socket control operations *) + structure Ctl = + struct + fun wrapSet f (PS.CMLSock{sock, ...}, v) = f(sock, v) + fun wrapGet f (PS.CMLSock{sock, ...}) = f sock + + (* get/set socket options *) + fun getDEBUG arg = wrapGet Socket.Ctl.getDEBUG arg + fun setDEBUG arg = wrapSet Socket.Ctl.setDEBUG arg + fun getREUSEADDR arg = wrapGet Socket.Ctl.getREUSEADDR arg + fun setREUSEADDR arg = wrapSet Socket.Ctl.setREUSEADDR arg + fun getKEEPALIVE arg = wrapGet Socket.Ctl.getKEEPALIVE arg + fun setKEEPALIVE arg = wrapSet Socket.Ctl.setKEEPALIVE arg + fun getDONTROUTE arg = wrapGet Socket.Ctl.getDONTROUTE arg + fun setDONTROUTE arg = wrapSet Socket.Ctl.setDONTROUTE arg + fun getLINGER arg = wrapGet Socket.Ctl.getLINGER arg + fun setLINGER arg = wrapSet Socket.Ctl.setLINGER arg + fun getBROADCAST arg = wrapGet Socket.Ctl.getBROADCAST arg + fun setBROADCAST arg = wrapSet Socket.Ctl.setBROADCAST arg + fun getOOBINLINE arg = wrapGet Socket.Ctl.getOOBINLINE arg + fun setOOBINLINE arg = wrapSet Socket.Ctl.setOOBINLINE arg + fun getSNDBUF arg = wrapGet Socket.Ctl.getSNDBUF arg + fun setSNDBUF arg = wrapSet Socket.Ctl.setSNDBUF arg + fun getRCVBUF arg = wrapGet Socket.Ctl.getRCVBUF arg + fun setRCVBUF arg = wrapSet Socket.Ctl.setRCVBUF arg + fun getTYPE arg = wrapGet Socket.Ctl.getTYPE arg + fun getERROR arg = wrapGet Socket.Ctl.getERROR arg + fun getPeerName arg = wrapGet Socket.Ctl.getPeerName arg + fun getSockName arg = wrapGet Socket.Ctl.getSockName arg + fun getNREAD arg = wrapGet Socket.Ctl.getNREAD arg + fun getATMARK arg = wrapGet Socket.Ctl.getATMARK arg + end (* Ctl *) + + (* socket address operations *) + val sameAddr = Socket.sameAddr + val familyOfAddr = Socket.familyOfAddr + + (* socket management *) + local + fun acceptNB' sock = + case Socket.acceptNB sock of + SOME (sock', addr) => + SOME (PreSock.mkSock sock', addr) + | NONE => NONE + fun accept' sock = let + val (sock', addr) = Socket.accept sock + in + (PreSock.mkSock sock', addr) + end + in + fun acceptEvt (s as PS.CMLSock{sock, ...}) = CML.guard (fn () => + case acceptNB' sock + of (SOME res) => CML.alwaysEvt res + | NONE => CML.wrap((PreSock.inEvt s), fn _ => accept' sock) + (* end case *)) + + fun accept (s as PS.CMLSock{sock, ...}) = ( + case acceptNB' sock + of (SOME res) => res + | NONE => (CML.sync(PreSock.inEvt s); accept' sock) + (* end case *)) + end (* local *) + + fun bind (PS.CMLSock{sock, ...}, addr) = Socket.bind(sock, addr) + + fun connectEvt (s as PS.CMLSock{sock, ...}, addr) = CML.guard (fn () => + if Socket.connectNB (sock, addr) then CML.alwaysEvt () + else PreSock.outEvt s) + + fun connect (s as PS.CMLSock{sock, ...}, addr) = + if Socket.connectNB (sock, addr) then () + else CML.sync (PreSock.outEvt s) + + fun listen (PS.CMLSock{sock, ...}, n) = Socket.listen(sock, n) + + fun close (PS.CMLSock{sock, state}) = ( + case (SyncVar.mTake state) + of PS.Closed => () + | _ => Socket.close sock + (* end case *); + SyncVar.mPut(state, PS.Closed)) + + structure S' : sig + datatype shutdown_mode = NO_RECVS | NO_SENDS | NO_RECVS_OR_SENDS + end = Socket + open S' + fun shutdown (PS.CMLSock{sock, ...}, how) = Socket.shutdown(sock, how) + + type sock_desc = Socket.sock_desc + fun ioDesc (PS.CMLSock{sock,...}) = Socket.ioDesc sock + fun sockDesc (PS.CMLSock{sock,...}) = Socket.sockDesc sock + val sameDesc = Socket.sameDesc + val select = Socket.select + + (* Sock I/O option types *) + type out_flags = {don't_route : bool, oob : bool} + type in_flags = {peek : bool, oob : bool} + + type 'a buf = {buf : 'a, i : int, sz : int option} + + (* Sock output operations *) + fun sendVec (s as PS.CMLSock{sock, ...}, buf) = + case Socket.sendVecNB (sock, buf) + of (SOME res) => res + | NONE => (CML.sync(PS.outEvt s); Socket.sendVec (sock, buf)) + fun sendArr (s as PS.CMLSock{sock, ...}, buf) = + case Socket.sendArrNB (sock, buf) + of (SOME res) => res + | NONE => (CML.sync(PS.outEvt s); Socket.sendArr (sock, buf)) + fun sendVec' (s as PS.CMLSock{sock, ...}, buf, flgs) = + case Socket.sendVecNB' (sock, buf, flgs) + of (SOME res) => res + | NONE => (CML.sync(PS.outEvt s); Socket.sendVec' (sock, buf, flgs)) + fun sendArr' (s as PS.CMLSock{sock, ...}, buf, flgs) = + case Socket.sendArrNB' (sock, buf, flgs) + of (SOME res) => res + | NONE => (CML.sync(PS.outEvt s); Socket.sendArr' (sock, buf, flgs)) + fun sendVecTo (s as PS.CMLSock{sock, ...}, addr, buf) = + if Socket.sendVecToNB (sock, addr, buf) then () + else (CML.sync(PS.outEvt s); Socket.sendVecTo (sock, addr, buf)) + fun sendArrTo (s as PS.CMLSock{sock, ...}, addr, buf) = + if Socket.sendArrToNB (sock, addr, buf) then () + else (CML.sync(PS.outEvt s); Socket.sendArrTo (sock, addr, buf)) + fun sendVecTo' (s as PS.CMLSock{sock, ...}, addr, buf, flgs) = + if Socket.sendVecToNB' (sock, addr, buf, flgs) then () + else (CML.sync(PS.outEvt s); Socket.sendVecTo' (sock, addr, buf, flgs)) + fun sendArrTo' (s as PS.CMLSock{sock, ...}, addr, buf, flgs) = + if Socket.sendArrToNB' (sock, addr, buf, flgs) then () + else (CML.sync(PS.outEvt s); Socket.sendArrTo' (sock, addr, buf, flgs)) + + (* Sock input operations *) + fun recvVec (s as PS.CMLSock{sock, ...}, n) = + case Socket.recvVecNB (sock, n) + of (SOME res) => res + | NONE => (CML.sync(PS.inEvt s); Socket.recvVec (sock, n)) + fun recvArr (s as PS.CMLSock{sock, ...}, buf) = + case Socket.recvArrNB (sock, buf) + of (SOME res) => res + | NONE => (CML.sync(PS.inEvt s); Socket.recvArr (sock, buf)) + fun recvVec' (s as PS.CMLSock{sock, ...}, n, flgs) = + case Socket.recvVecNB' (sock, n, flgs) + of (SOME res) => res + | NONE => (CML.sync(PS.inEvt s); Socket.recvVec' (sock, n, flgs)) + fun recvArr' (s as PS.CMLSock{sock, ...}, buf, flgs) = + case Socket.recvArrNB' (sock, buf, flgs) + of (SOME res) => res + | NONE => (CML.sync(PS.inEvt s); Socket.recvArr' (sock, buf, flgs)) + fun recvVecFrom (s as PS.CMLSock{sock, ...}, n) = + case Socket.recvVecFromNB (sock, n) + of (SOME res) => res + | NONE => (CML.sync(PS.inEvt s); Socket.recvVecFrom (sock, n)) + fun recvArrFrom (s as PS.CMLSock{sock, ...}, buf) = + case Socket.recvArrFromNB (sock, buf) + of (SOME res) => res + | NONE => (CML.sync(PS.inEvt s); Socket.recvArrFrom (sock, buf)) + fun recvVecFrom' (s as PS.CMLSock{sock, ...}, n, flgs) = + case Socket.recvVecFromNB' (sock, n, flgs) + of (SOME res) => res + | NONE => (CML.sync(PS.inEvt s); Socket.recvVecFrom' (sock, n, flgs)) + fun recvArrFrom' (s as PS.CMLSock{sock, ...}, buf, flgs) = + case Socket.recvArrFromNB' (sock, buf, flgs) + of (SOME res) => res + | NONE => (CML.sync(PS.inEvt s); + Socket.recvArrFrom' (sock, buf, flgs)) + + (* Sock input event constructors *) + fun recvVecEvt (s as PS.CMLSock{sock, ...}, n) = CML.guard (fn () => + case Socket.recvVecNB (sock, n) + of (SOME res) => CML.alwaysEvt res + | NONE => CML.wrap(PS.inEvt s, fn _ => Socket.recvVec (sock, n)) + (* end case *)) + fun recvArrEvt (s as PS.CMLSock{sock, ...}, buf) = CML.guard (fn () => + case Socket.recvArrNB (sock, buf) + of (SOME res) => CML.alwaysEvt res + | NONE => CML.wrap(PS.inEvt s, fn _ => Socket.recvArr (sock, buf)) + (* end case *)) + fun recvVecEvt' (s as PS.CMLSock{sock, ...}, n, flgs) = CML.guard (fn () => + case Socket.recvVecNB' (sock, n, flgs) + of (SOME res) => CML.alwaysEvt res + | NONE => CML.wrap(PS.inEvt s, fn _ => Socket.recvVec' (sock, n, flgs)) + (* end case *)) + fun recvArrEvt' (s as PS.CMLSock{sock, ...}, buf, flgs) = CML.guard (fn () => + case Socket.recvArrNB' (sock, buf, flgs) + of (SOME res) => CML.alwaysEvt res + | NONE => CML.wrap(PS.inEvt s, fn _ => Socket.recvArr' (sock, buf, flgs)) + (* end case *)) + fun recvVecFromEvt (s as PS.CMLSock{sock, ...}, n) = CML.guard (fn () => + case Socket.recvVecFromNB (sock, n) + of (SOME res) => CML.alwaysEvt res + | NONE => CML.wrap(PS.inEvt s, fn _ => Socket.recvVecFrom (sock, n)) + (* end case *)) + fun recvArrFromEvt (s as PS.CMLSock{sock, ...}, buf) = CML.guard (fn () => + case Socket.recvArrFromNB (sock, buf) + of (SOME res) => CML.alwaysEvt res + | NONE => CML.wrap(PS.inEvt s, fn _ => Socket.recvArrFrom (sock, buf)) + (* end case *)) + fun recvVecFromEvt' (s as PS.CMLSock{sock, ...}, n, flgs) = CML.guard (fn () => + case Socket.recvVecFromNB' (sock, n, flgs) + of (SOME res) => CML.alwaysEvt res + | NONE => + CML.wrap(PS.inEvt s, fn _ => Socket.recvVecFrom' (sock, n, flgs)) + (* end case *)) + fun recvArrFromEvt' (s as PS.CMLSock{sock, ...}, buf, flgs) = CML.guard (fn () => + case Socket.recvArrFromNB' (sock, buf, flgs) + of (SOME res) => CML.alwaysEvt res + | NONE => + CML.wrap(PS.inEvt s, fn _ => Socket.recvArrFrom' (sock, buf, flgs)) + (* end case *)) + + end diff --git a/cml/src/Sockets/cml-unix-sock.sml b/cml/src/Sockets/cml-unix-sock.sml new file mode 100644 index 0000000..a032f57 --- /dev/null +++ b/cml/src/Sockets/cml-unix-sock.sml @@ -0,0 +1,38 @@ +(* cml-unix-sock.sml + * + * COPYRIGHT (c) 1996 AT&T Research. + *) + +structure CML_UnixSock : UNIX_SOCK = + struct + structure SOCK = Socket.SOCK + + type unix = UnixSock.unix + + type 'a sock = (unix, 'a) Socket.sock + type 'a stream_sock = 'a Socket.stream sock + type dgram_sock = Socket.dgram sock + + type sock_addr = unix Socket.sock_addr + + val unixAF = UnixSock.unixAF + val toAddr = UnixSock.toAddr + val fromAddr = UnixSock.fromAddr + + structure Strm = + struct + fun socket () = CML_GenericSock.socket (unixAF, SOCK.stream) + fun socket' proto = CML_GenericSock.socket' (unixAF, SOCK.stream, proto) + fun socketPair () = CML_GenericSock.socketPair (unixAF, SOCK.stream) + fun socketPair' proto = CML_GenericSock.socketPair' (unixAF, SOCK.stream, proto) + end + + structure DGrm = + struct + fun socket () = CML_GenericSock.socket (unixAF, SOCK.dgram) + fun socket' proto = CML_GenericSock.socket' (unixAF, SOCK.dgram, proto) + fun socketPair () = CML_GenericSock.socketPair (unixAF, SOCK.dgram) + fun socketPair' proto = CML_GenericSock.socketPair' (unixAF, SOCK.dgram, proto) + end + + end diff --git a/cml/src/Sockets/generic-sock-sig.sml b/cml/src/Sockets/generic-sock-sig.sml new file mode 100644 index 0000000..5fa8bb6 --- /dev/null +++ b/cml/src/Sockets/generic-sock-sig.sml @@ -0,0 +1,54 @@ +(* generic-sock-sig.sml + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * + * $Log$ + * Revision 1.4 2003/09/24 20:09:40 mblume + * sync of socket implementation with Basis spec; + * minor updates to Real64 code + * + * Revision 1.3.2.1 2003/09/23 20:37:17 mblume + * switched to spec-conforming implementation of Sockets; + * updated CML accordingly + * + * Revision 1.3 2001/06/20 20:39:14 blume + * CML compiles and works again + * + * Revision 1.2.4.1 2001/06/20 17:54:59 blume + * CML now compiles under old and new CM + * + * Revision 1.2 1996/06/03 21:11:42 jhr + * Sockets API cleanup. + * + * Revision 1.1.1.1 1996/01/31 16:02:36 george + * Version 109 + * + *) + +signature GENERIC_SOCK = + sig +(* + val addressFamilies : unit -> Socket.AF.addr_family list + (* returns a list of the supported address families; this should include + * at least: Socket.AF.inet. + *) + + val socketTypes : unit -> Socket.SOCK.sock_type + (* returns a list of the supported socket types; this should include at + * least: Socket.SOCK.stream and Socket.SOCK.dgram. + *) +*) + + (* create sockets using default protocol *) + val socket : (Socket.AF.addr_family * Socket.SOCK.sock_type) + -> ('a, 'b) Socket.sock + val socketPair : (Socket.AF.addr_family * Socket.SOCK.sock_type) + -> (('a, 'b) Socket.sock * ('a, 'b) Socket.sock) + + (* create sockets using the specified protocol *) + val socket' : (Socket.AF.addr_family * Socket.SOCK.sock_type * int) + -> ('a, 'b) Socket.sock + val socketPair' : (Socket.AF.addr_family * Socket.SOCK.sock_type * int) + -> (('a, 'b) Socket.sock * ('a, 'b) Socket.sock) + + end diff --git a/cml/src/Sockets/inet-sock-sig.sml b/cml/src/Sockets/inet-sock-sig.sml new file mode 100644 index 0000000..0f3f47c --- /dev/null +++ b/cml/src/Sockets/inet-sock-sig.sml @@ -0,0 +1,53 @@ +(* inet-sock-sig.sml + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * + * $Log$ + * Revision 1.3 2001/06/20 20:39:14 blume + * CML compiles and works again + * + * Revision 1.2.4.1 2001/06/20 17:54:59 blume + * CML now compiles under old and new CM + * +# Revision 1.2 1996/09/06 00:35:42 george +# *** empty log message *** +# +# Revision 1.1 1996/06/03 21:11:43 jhr +# Sockets API cleanup. +# + * Revision 1.1.1.1 1996/01/31 16:02:37 george + * Version 109 + * + *) + +signature INET_SOCK = + sig + + type inet + + type 'a sock = (inet, 'a) Socket.sock + type 'a stream_sock = 'a Socket.stream sock + type dgram_sock = Socket.dgram sock + + type sock_addr = inet Socket.sock_addr + + val inetAF : Socket.AF.addr_family (* DARPA internet protocols *) + + val toAddr : (NetHostDB.in_addr * int) -> sock_addr + val fromAddr : sock_addr -> (NetHostDB.in_addr * int) + val any : int -> sock_addr + + structure UDP : sig + val socket : unit -> dgram_sock + val socket' : int -> dgram_sock + end + + structure TCP : sig + val socket : unit -> 'a stream_sock + val socket' : int -> 'a stream_sock + (* tcp control options *) + val getNODELAY : 'a stream_sock -> bool + val setNODELAY : ('a stream_sock * bool) -> unit + end + end + diff --git a/cml/src/Sockets/layer1.cm b/cml/src/Sockets/layer1.cm new file mode 100644 index 0000000..5370ed8 --- /dev/null +++ b/cml/src/Sockets/layer1.cm @@ -0,0 +1,32 @@ +(* layer1.cm + * + * COPYRIGHT (c) 1999 Bell Labs, Lucent Technologies. + * + *) + +Group + signature CML_SOCKET + structure PreSock + structure CML_Socket + +#if defined (NEW_CM) + (* re-exporting from the basis *) + structure GenericSock + structure NetHostDB + structure INetSock +#if defined (OPSYS_UNIX) + structure UnixSock +#endif +#endif +is + +#if defined (NEW_CM) + $/basis.cm + $cml/core-cml.cm +#else + ../core-cml.cm +#endif + +pre-sock.sml +cml-socket-sig.sml +cml-socket.sml diff --git a/cml/src/Sockets/layer2.cm b/cml/src/Sockets/layer2.cm new file mode 100644 index 0000000..b3c4f69 --- /dev/null +++ b/cml/src/Sockets/layer2.cm @@ -0,0 +1,39 @@ +(* cml-socket.cm + * + * COPYRIGHT (c) 1999 Bell Labs, Lucent Technologies. + * + *) + +Group + signature CML_SOCKET + signature GENERIC_SOCK + signature INET_SOCK +#if defined (OPSYS_UNIX) + signature UNIX_SOCK +#endif + structure PreSock + structure Socket + +#if defined (NEW_CM) + (* re-exporting from the basis *) + structure GenericSock + structure NetHostDB + structure INetSock +#if defined (OPSYS_UNIX) + structure UnixSock +#endif +#endif +is + +layer1.cm + +rebind1.sml + +(* recompile the SML signatures, so that the types are right *) +generic-sock-sig.sml +inet-sock-sig.sml + +#if defined (OPSYS_UNIX) +unix-sock-sig.sml +#endif + diff --git a/cml/src/Sockets/layer3.cm b/cml/src/Sockets/layer3.cm new file mode 100644 index 0000000..473be75 --- /dev/null +++ b/cml/src/Sockets/layer3.cm @@ -0,0 +1,27 @@ +(* layer3.sml + * + * COPYRIGHT (c) 1999 Bell Labs, Lucent Technologies. + * + *) + +Group + signature CML_SOCKET + signature GENERIC_SOCK + signature INET_SOCK + structure Socket + structure CML_GenericSock + structure CML_INetSock +#if defined(OPSYS_UNIX) + signature UNIX_SOCK + structure CML_UnixSock +#endif +is + +layer2.cm + +cml-generic-sock.sml +cml-inet-sock.sml + +#if defined(OPSYS_UNIX) +cml-unix-sock.sml +#endif diff --git a/cml/src/Sockets/pre-sock.sml b/cml/src/Sockets/pre-sock.sml new file mode 100644 index 0000000..079c58e --- /dev/null +++ b/cml/src/Sockets/pre-sock.sml @@ -0,0 +1,60 @@ +(* pre-sock.sml + * + * COPYRIGHT (c) 1996 AT&T Research. + * + * Provide some utility operations for CML sockets. + *) + +structure PreSock : sig + + datatype socket_state + = Unconnected (* initial state *) + | Connecting (* when waiting for a connect to complete *) + | Connected (* when connected *) + | Accepting (* when waiting for an accept to complete *) + | WaitingOnIO (* when waiting on an input and/or output operation *) + | Closed + + datatype ('a, 'b) sock = CMLSock of { + state : socket_state SyncVar.mvar, + sock : ('a, 'b) Socket.sock + } + + val mkSock : ('a, 'b) Socket.sock -> ('a, 'b) sock + + val inEvt : ('a, 'b) sock -> unit CML.event + val outEvt : ('a, 'b) sock -> unit CML.event + + end = struct + + datatype socket_state + = Unconnected (* initial state *) + | Connecting (* when waiting for a connect to complete *) + | Connected (* when connected *) + | Accepting (* when waiting for an accept to complete *) + | WaitingOnIO (* when waiting on an input and/or output operation *) + | Closed + + datatype ('a, 'b) sock = CMLSock of { + state : socket_state SyncVar.mvar, + sock : ('a, 'b) Socket.sock + } + + (* given an SML socket, return a CML socket *) + fun mkSock s = + CMLSock { state = SyncVar.mVarInit Unconnected, + sock = s } + + local + fun pollD sock = + case OS.IO.pollDesc (Socket.ioDesc sock) of + SOME pd => pd + | NONE => raise Fail "no polling on socket I/O descriptor" + in + fun inEvt (CMLSock{sock, ...}) = + CML.wrap(IOManager.ioEvt (OS.IO.pollIn (pollD sock)), ignore) + fun outEvt (CMLSock{sock, ...}) = + CML.wrap(IOManager.ioEvt(OS.IO.pollOut (pollD sock)), ignore) + + end + end; diff --git a/cml/src/Sockets/rebind-sockets.sml b/cml/src/Sockets/rebind-sockets.sml new file mode 100644 index 0000000..c9938c3 --- /dev/null +++ b/cml/src/Sockets/rebind-sockets.sml @@ -0,0 +1,12 @@ +(* rebind-socket.sml + * + * COPYRIGHT (c) 1996 AT&T Research. + * + * Rebind the Socket structures to the exported names. + *) + +structure Socket = CML_Socket +structure GenericSock : GENERIC_SOCK = CML_GenericSock +structure INetSock : INET_SOCK = CML_INetSock +structure UnixSock : UNIX_SOC = CML_UnixSock + diff --git a/cml/src/Sockets/rebind1.sml b/cml/src/Sockets/rebind1.sml new file mode 100644 index 0000000..51a94e8 --- /dev/null +++ b/cml/src/Sockets/rebind1.sml @@ -0,0 +1,9 @@ +(* rebind1.sml + * + * COPYRIGHT (c) 1996 AT&T Research. + * + * Rebind the Socket structure + *) + +structure Socket = (* CML_Socket *) struct open CML_Socket end + diff --git a/cml/src/Sockets/rebind2-unix-sock.sml b/cml/src/Sockets/rebind2-unix-sock.sml new file mode 100644 index 0000000..055dabd --- /dev/null +++ b/cml/src/Sockets/rebind2-unix-sock.sml @@ -0,0 +1,9 @@ +(* rebind2-unix-sock.sml + * + * COPYRIGHT (c) 1999 Bell Labs, Lucent Technologies + * + * Rebind the UnixSock structures to the exported names. + *) + + +structure UnixSock = (* CML_UnixSock *) struct open CML_UnixSock end diff --git a/cml/src/Sockets/rebind2.sml b/cml/src/Sockets/rebind2.sml new file mode 100644 index 0000000..d8fbcd8 --- /dev/null +++ b/cml/src/Sockets/rebind2.sml @@ -0,0 +1,10 @@ +(* rebind2.sml + * + * COPYRIGHT (c) 1996 AT&T Research. + * + * Rebind the Socket structures to the exported names. + *) + +structure GenericSock = (* CML_GenericSock *) struct open CML_GenericSock end +structure INetSock = (* CML_INetSock *) struct open CML_INetSock end + diff --git a/cml/src/Sockets/sources.cm b/cml/src/Sockets/sources.cm new file mode 100644 index 0000000..fe83e66 --- /dev/null +++ b/cml/src/Sockets/sources.cm @@ -0,0 +1,32 @@ +(* sources.cm + * + * COPYRIGHT (c) 1999 Bell Labs, Lucent Technologies. + *) + +Group + signature CML_SOCKET + signature GENERIC_SOCK + signature INET_SOCK + structure Socket + structure CML_GenericSock + structure CML_INetSock +#if defined(OPSYS_UNIX) + signature UNIX_SOCK + structure CML_UnixSock +#endif + structure GenericSock + structure INetSock +#if defined(OPSYS_UNIX) + structure UnixSock +#endif + +is + +layer3.cm + +rebind2.sml + +#if defined(OPSYS_UNIX) +rebind2-unix-sock.sml +#endif + diff --git a/cml/src/Sockets/unix-sock-sig.sml b/cml/src/Sockets/unix-sock-sig.sml new file mode 100644 index 0000000..39f971e --- /dev/null +++ b/cml/src/Sockets/unix-sock-sig.sml @@ -0,0 +1,46 @@ +(* unix-sock-sig.sml + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * + * $Log$ + * Revision 1.3 2001/06/20 20:39:14 blume + * CML compiles and works again + * + * Revision 1.2.4.1 2001/06/20 17:54:59 blume + * CML now compiles under old and new CM + * + * Revision 1.3 1996/09/06 00:35:43 george + * *** empty log message *** + * + * Revision 1.2 1996/06/03 21:11:48 jhr + * Sockets API cleanup. + * + * Revision 1.1.1.1 1996/01/31 16:02:40 george + * Version 109 + * + *) + +signature UNIX_SOCK = + sig + type unix + + type 'a sock = (unix, 'a) Socket.sock + type 'a stream_sock = 'a Socket.stream sock + type dgram_sock = Socket.dgram sock + + type sock_addr = unix Socket.sock_addr + + val unixAF : Socket.AF.addr_family (* 4.3BSD internal protocols *) + + val toAddr : string -> sock_addr + val fromAddr : sock_addr -> string + + structure Strm : sig + val socket : unit -> 'a stream_sock + val socketPair : unit -> ('a stream_sock * 'a stream_sock) + end + structure DGrm : sig + val socket : unit -> dgram_sock + val socketPair : unit -> (dgram_sock * dgram_sock) + end + end; diff --git a/cml/src/TODO b/cml/src/TODO new file mode 100644 index 0000000..deba772 --- /dev/null +++ b/cml/src/TODO @@ -0,0 +1,11 @@ +Implement the event constructors for input operation (inputEvt, ...). + +Slow down timer interrupts when pausing for I/O, etc. When waiting for +just a single kind of thing (e.g., I/O), we can just wait, instead of +sigpause. Also, we should eventually use asynchronous mechanisms, such +as SIGCHLD and SIGIO. + +Add a CML interface to signals. + +Add clean-up code for sockets. + diff --git a/cml/src/Unix/.cm/GUID/new-unix.sml b/cml/src/Unix/.cm/GUID/new-unix.sml new file mode 100644 index 0000000..f2c72c8 --- /dev/null +++ b/cml/src/Unix/.cm/GUID/new-unix.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):Unix/(sources.cm):new-unix.sml-1714016097.041 diff --git a/cml/src/Unix/.cm/GUID/os-io.sml b/cml/src/Unix/.cm/GUID/os-io.sml new file mode 100644 index 0000000..3d8cd01 --- /dev/null +++ b/cml/src/Unix/.cm/GUID/os-io.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):Unix/(sources.cm):(os.cm):os-io.sml-1714016095.768 diff --git a/cml/src/Unix/.cm/GUID/os-process.sml b/cml/src/Unix/.cm/GUID/os-process.sml new file mode 100644 index 0000000..46e7533 --- /dev/null +++ b/cml/src/Unix/.cm/GUID/os-process.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):Unix/(sources.cm):(os.cm):os-process.sml-1714016095.812 diff --git a/cml/src/Unix/.cm/GUID/os.sml b/cml/src/Unix/.cm/GUID/os.sml new file mode 100644 index 0000000..24c0615 --- /dev/null +++ b/cml/src/Unix/.cm/GUID/os.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):Unix/(sources.cm):os.sml-1714016095.829 diff --git a/cml/src/Unix/.cm/GUID/posix-bin-io.sml b/cml/src/Unix/.cm/GUID/posix-bin-io.sml new file mode 100644 index 0000000..ba5a708 --- /dev/null +++ b/cml/src/Unix/.cm/GUID/posix-bin-io.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):Unix/(sources.cm):posix-bin-io.sml-1714016096.192 diff --git a/cml/src/Unix/.cm/GUID/posix-bin-prim-io.sml b/cml/src/Unix/.cm/GUID/posix-bin-prim-io.sml new file mode 100644 index 0000000..e024b1a --- /dev/null +++ b/cml/src/Unix/.cm/GUID/posix-bin-prim-io.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):Unix/(sources.cm):posix-bin-prim-io.sml-1714016095.887 diff --git a/cml/src/Unix/.cm/GUID/posix-text-io.sml b/cml/src/Unix/.cm/GUID/posix-text-io.sml new file mode 100644 index 0000000..084f43a --- /dev/null +++ b/cml/src/Unix/.cm/GUID/posix-text-io.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):Unix/(sources.cm):posix-text-io.sml-1714016096.650 diff --git a/cml/src/Unix/.cm/GUID/posix-text-prim-io.sml b/cml/src/Unix/.cm/GUID/posix-text-prim-io.sml new file mode 100644 index 0000000..6f99a35 --- /dev/null +++ b/cml/src/Unix/.cm/GUID/posix-text-prim-io.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):Unix/(sources.cm):posix-text-prim-io.sml-1714016096.218 diff --git a/cml/src/Unix/.cm/GUID/proc-manager.sml b/cml/src/Unix/.cm/GUID/proc-manager.sml new file mode 100644 index 0000000..cc946c9 --- /dev/null +++ b/cml/src/Unix/.cm/GUID/proc-manager.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):Unix/(sources.cm):(os.cm):proc-manager.sml-1714016095.795 diff --git a/cml/src/Unix/.cm/GUID/run-cml.sml b/cml/src/Unix/.cm/GUID/run-cml.sml new file mode 100644 index 0000000..a9b1a7b --- /dev/null +++ b/cml/src/Unix/.cm/GUID/run-cml.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):Unix/(sources.cm):run-cml.sml-1714016096.971 diff --git a/cml/src/Unix/.cm/GUID/syscall.sml b/cml/src/Unix/.cm/GUID/syscall.sml new file mode 100644 index 0000000..dee5c26 --- /dev/null +++ b/cml/src/Unix/.cm/GUID/syscall.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):Unix/(sources.cm):syscall.sml-1714016095.833 diff --git a/cml/src/Unix/.cm/GUID/unix-glue.sml b/cml/src/Unix/.cm/GUID/unix-glue.sml new file mode 100644 index 0000000..8b49d62 --- /dev/null +++ b/cml/src/Unix/.cm/GUID/unix-glue.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):Unix/(sources.cm):unix-glue.sml-1714016096.912 diff --git a/cml/src/Unix/.cm/GUID/unix-sig.sml b/cml/src/Unix/.cm/GUID/unix-sig.sml new file mode 100644 index 0000000..f6fc232 --- /dev/null +++ b/cml/src/Unix/.cm/GUID/unix-sig.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):Unix/(sources.cm):unix-sig.sml-1714016096.664 diff --git a/cml/src/Unix/.cm/SKEL/new-unix.sml b/cml/src/Unix/.cm/SKEL/new-unix.sml new file mode 100644 index 0000000..b92730a --- /dev/null +++ b/cml/src/Unix/.cm/SKEL/new-unix.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f6d"PosixTextPrimIO"CIO"d"Signals"d"Event"Posix"d"TextIO"Nad"Unix"jh7ad"S"gp1d"Scheduler"ad"PM"gp1d"ProcManager"Cad"PP"gp2.d"Process"ad"PE"gp2.d"ProcEnv"ad"PF"gp2.d"FileSys"ad"PIO"gp2.ad"SS"gp1d"Substring"Ngp1c"UNIX" \ No newline at end of file diff --git a/cml/src/Unix/.cm/SKEL/os-io.sml b/cml/src/Unix/.cm/SKEL/os-io.sml new file mode 100644 index 0000000..745b746 --- /dev/null +++ b/cml/src/Unix/.cm/SKEL/os-io.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f2OS"d"CML"adOS_IO"jh2ad"IOM"gp1d"IOManager"aKind"gp3d"IO"2gp1c \ No newline at end of file diff --git a/cml/src/Unix/.cm/SKEL/os-process.sml b/cml/src/Unix/.cm/SKEL/os-process.sml new file mode 100644 index 0000000..4f011e5 --- /dev/null +++ b/cml/src/Unix/.cm/SKEL/os-process.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f2d"Event"d"TimeOut"ad"OS_Process"jh5Cad"S"gp1d"Scheduler"ad"PM"gp1d"ProcManager"ad"CC"gp2d"SMLofNJ"d"Cont"ad"P"gp2d"OS"Process"ad"PP"gp2d"Posix":Ngp1c"OS_PROCESS" \ No newline at end of file diff --git a/cml/src/Unix/.cm/SKEL/os.sml b/cml/src/Unix/.cm/SKEL/os.sml new file mode 100644 index 0000000..84b5585 --- /dev/null +++ b/cml/src/Unix/.cm/SKEL/os.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1OS"ajh4ad"IO"gp1d"OS_IO"aPath"gp2!ad"Process"gp1d"OS_Process"aFileSys"gp2 gp1c \ No newline at end of file diff --git a/cml/src/Unix/.cm/SKEL/posix-bin-io.sml b/cml/src/Unix/.cm/SKEL/posix-bin-io.sml new file mode 100644 index 0000000..5f80315 --- /dev/null +++ b/cml/src/Unix/.cm/SKEL/posix-bin-io.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ad"BinIO"jh1ad"OSPrimIO"gp1d"PosixBinPrimIO"gp1e"BinIOFn" \ No newline at end of file diff --git a/cml/src/Unix/.cm/SKEL/posix-bin-prim-io.sml b/cml/src/Unix/.cm/SKEL/posix-bin-prim-io.sml new file mode 100644 index 0000000..7ada45a --- /dev/null +++ b/cml/src/Unix/.cm/SKEL/posix-bin-prim-io.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f8d"IOManager"d"OS"IO"Cd"CML"d"Position"d"Option"d"Syscall"BinPrimIO"Nad"PosixBinPrimIO"jh5Cad"SV"gp1d"SyncVar"ad"PrimIO"gp1?ad"Vec"gp1d"Word8Vector"ad"PF"gp2Posix"d"FileSys"ad"PIO"gp2%Ngp1c"OS_PRIM_IO" \ No newline at end of file diff --git a/cml/src/Unix/.cm/SKEL/posix-text-io.sml b/cml/src/Unix/.cm/SKEL/posix-text-io.sml new file mode 100644 index 0000000..335784a --- /dev/null +++ b/cml/src/Unix/.cm/SKEL/posix-text-io.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ad"TextIO"jh1ad"OSPrimIO"gp1d"PosixTextPrimIO"gp1e"TextIOFn" \ No newline at end of file diff --git a/cml/src/Unix/.cm/SKEL/posix-text-prim-io.sml b/cml/src/Unix/.cm/SKEL/posix-text-prim-io.sml new file mode 100644 index 0000000..d2e466e --- /dev/null +++ b/cml/src/Unix/.cm/SKEL/posix-text-prim-io.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f8d"Unsafe"d"IO"d"CML"Cd"CharVectorSlice"d"Position"d"Int"d"String"d"CharArraySlice"Nad"PosixTextPrimIO"jh4ad"SV"gp1d"SyncVar"ad"PF"gp2d"Posix"d"FileSys"ad"BinPrimIO"gp1d"PosixBinPrimIO"aPrimIO"gp1d"TextPrimIO"h2egp1c"OS_PRIM_IO"f1= \ No newline at end of file diff --git a/cml/src/Unix/.cm/SKEL/proc-manager.sml b/cml/src/Unix/.cm/SKEL/proc-manager.sml new file mode 100644 index 0000000..4f4bf90 --- /dev/null +++ b/cml/src/Unix/.cm/SKEL/proc-manager.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f4Result"d"List"d"Event"Posix"ad"ProcManager"jh3ad"S"gp1d"Scheduler"ad"P_Proc"gp2d"Process"ad"R"gp1h0 \ No newline at end of file diff --git a/cml/src/Unix/.cm/SKEL/run-cml.sml b/cml/src/Unix/.cm/SKEL/run-cml.sml new file mode 100644 index 0000000..80c0aeb --- /dev/null +++ b/cml/src/Unix/.cm/SKEL/run-cml.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ad"RunCML"jgp1d"UnixGlue"gp1e"RunCMLFn" \ No newline at end of file diff --git a/cml/src/Unix/.cm/SKEL/syscall.sml b/cml/src/Unix/.cm/SKEL/syscall.sml new file mode 100644 index 0000000..3eb120c --- /dev/null +++ b/cml/src/Unix/.cm/SKEL/syscall.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f2d"OS"d"Posix"ad"Syscall"jh1ad"S"gp1d"Scheduler"h0 \ No newline at end of file diff --git a/cml/src/Unix/.cm/SKEL/unix-glue.sml b/cml/src/Unix/.cm/SKEL/unix-glue.sml new file mode 100644 index 0000000..ae85703 --- /dev/null +++ b/cml/src/Unix/.cm/SKEL/unix-glue.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f4d"IOManager"d"ProcManager"d"Signals"d"TimeOut"ad"UnixGlue"jh0gp1c"OS_GLUE" \ No newline at end of file diff --git a/cml/src/Unix/.cm/SKEL/unix-sig.sml b/cml/src/Unix/.cm/SKEL/unix-sig.sml new file mode 100644 index 0000000..1915f6c --- /dev/null +++ b/cml/src/Unix/.cm/SKEL/unix-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f3d"CML"d"Posix"d"TextIO"ac"UNIX"h0 \ No newline at end of file diff --git a/cml/src/Unix/.cm/amd64-unix/new-unix.sml b/cml/src/Unix/.cm/amd64-unix/new-unix.sml new file mode 100644 index 0000000..6eee4ab Binary files /dev/null and b/cml/src/Unix/.cm/amd64-unix/new-unix.sml differ diff --git a/cml/src/Unix/.cm/amd64-unix/os-io.sml b/cml/src/Unix/.cm/amd64-unix/os-io.sml new file mode 100644 index 0000000..a431a8b Binary files /dev/null and b/cml/src/Unix/.cm/amd64-unix/os-io.sml differ diff --git a/cml/src/Unix/.cm/amd64-unix/os-process.sml b/cml/src/Unix/.cm/amd64-unix/os-process.sml new file mode 100644 index 0000000..2280071 Binary files /dev/null and b/cml/src/Unix/.cm/amd64-unix/os-process.sml differ diff --git a/cml/src/Unix/.cm/amd64-unix/os.sml b/cml/src/Unix/.cm/amd64-unix/os.sml new file mode 100644 index 0000000..038a7f4 Binary files /dev/null and b/cml/src/Unix/.cm/amd64-unix/os.sml differ diff --git a/cml/src/Unix/.cm/amd64-unix/posix-bin-io.sml b/cml/src/Unix/.cm/amd64-unix/posix-bin-io.sml new file mode 100644 index 0000000..7c69816 Binary files /dev/null and b/cml/src/Unix/.cm/amd64-unix/posix-bin-io.sml differ diff --git a/cml/src/Unix/.cm/amd64-unix/posix-bin-prim-io.sml b/cml/src/Unix/.cm/amd64-unix/posix-bin-prim-io.sml new file mode 100644 index 0000000..115c12e Binary files /dev/null and b/cml/src/Unix/.cm/amd64-unix/posix-bin-prim-io.sml differ diff --git a/cml/src/Unix/.cm/amd64-unix/posix-text-io.sml b/cml/src/Unix/.cm/amd64-unix/posix-text-io.sml new file mode 100644 index 0000000..8d0edba Binary files /dev/null and b/cml/src/Unix/.cm/amd64-unix/posix-text-io.sml differ diff --git a/cml/src/Unix/.cm/amd64-unix/posix-text-prim-io.sml b/cml/src/Unix/.cm/amd64-unix/posix-text-prim-io.sml new file mode 100644 index 0000000..6b0673d Binary files /dev/null and b/cml/src/Unix/.cm/amd64-unix/posix-text-prim-io.sml differ diff --git a/cml/src/Unix/.cm/amd64-unix/proc-manager.sml b/cml/src/Unix/.cm/amd64-unix/proc-manager.sml new file mode 100644 index 0000000..dd78cc4 Binary files /dev/null and b/cml/src/Unix/.cm/amd64-unix/proc-manager.sml differ diff --git a/cml/src/Unix/.cm/amd64-unix/run-cml.sml b/cml/src/Unix/.cm/amd64-unix/run-cml.sml new file mode 100644 index 0000000..9b0d852 Binary files /dev/null and b/cml/src/Unix/.cm/amd64-unix/run-cml.sml differ diff --git a/cml/src/Unix/.cm/amd64-unix/syscall.sml b/cml/src/Unix/.cm/amd64-unix/syscall.sml new file mode 100644 index 0000000..1da435e Binary files /dev/null and b/cml/src/Unix/.cm/amd64-unix/syscall.sml differ diff --git a/cml/src/Unix/.cm/amd64-unix/unix-glue.sml b/cml/src/Unix/.cm/amd64-unix/unix-glue.sml new file mode 100644 index 0000000..03474a0 Binary files /dev/null and b/cml/src/Unix/.cm/amd64-unix/unix-glue.sml differ diff --git a/cml/src/Unix/.cm/amd64-unix/unix-sig.sml b/cml/src/Unix/.cm/amd64-unix/unix-sig.sml new file mode 100644 index 0000000..525acdd Binary files /dev/null and b/cml/src/Unix/.cm/amd64-unix/unix-sig.sml differ diff --git a/cml/src/Unix/new-unix.sml b/cml/src/Unix/new-unix.sml new file mode 100644 index 0000000..ac1cafd --- /dev/null +++ b/cml/src/Unix/new-unix.sml @@ -0,0 +1,122 @@ +(* unix.sml + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * + * This is a CML version of the UNIX interface that is provided by SML/NJ. + *) + +structure Unix : UNIX = + struct + + structure S = Scheduler + structure PM = ProcManager + + structure PP = Posix.Process + structure PE = Posix.ProcEnv + structure PF = Posix.FileSys + structure PIO = Posix.IO + structure SS = Substring + + fun protect f x = let + val _ = Signals.maskSignals Signals.MASKALL + val y = (f x) handle ex => + (Signals.unmaskSignals Signals.MASKALL; raise ex) + in + Signals.unmaskSignals Signals.MASKALL; y + end + + fun fdReader (name : string, fd : PIO.file_desc) = + PosixTextPrimIO.mkReader { + name = name, + fd = fd + } + + fun fdWriter (name, fd) = + PosixTextPrimIO.mkWriter { + appendMode = false, + name = name, + chunkSize=4096, + fd = fd + } + + fun openOutFD (name, fd) = + TextIO.mkOutstream ( + TextIO.StreamIO.mkOutstream ( + fdWriter (name, fd), IO.BLOCK_BUF)) + + fun openInFD (name, fd) = + TextIO.mkInstream ( + TextIO.StreamIO.mkInstream ( + fdReader (name, fd), "")) + + datatype proc = PROC of { + pid : PP.pid, + ins : TextIO.instream, + outs : TextIO.outstream + } + + + fun executeInEnv (cmd, argv, env) = let + val p1 = PIO.pipe () + val p2 = PIO.pipe () + fun closep () = ( + PIO.close (#outfd p1); + PIO.close (#infd p1); + PIO.close (#outfd p2); + PIO.close (#infd p2) + ) + val base = SS.string(SS.taker (fn c => c <> #"/") (SS.full cmd)) + fun startChild () = (case protect PP.fork () + of SOME pid => pid (* parent *) + | NONE => let + val oldin = #infd p1 + val newin = Posix.FileSys.wordToFD 0w0 + val oldout = #outfd p2 + val newout = Posix.FileSys.wordToFD 0w1 + in + PIO.close (#outfd p1); + PIO.close (#infd p2); + if (oldin = newin) then () + else ( + PIO.dup2{old = oldin, new = newin}; + PIO.close oldin); + if (oldout = newout) then () + else ( + PIO.dup2{old = oldout, new = newout}; + PIO.close oldout); + PP.exece (cmd, base::argv, env) + handle ex => ( + (* the exec failed, so we need to shutdown the child *) + PP.exit 0w128) + end + (* end case *)) + val _ = TextIO.flushOut TextIO.stdOut + val pid = ( + S.stopTimer(); + startChild () before + S.restartTimer()) + handle ex => (S.restartTimer(); closep(); raise ex) + val ins = openInFD (base^"_exec_in", #infd p2) + val outs = openOutFD (base^"_exec_out", #outfd p1) + in + (* close the child-side fds *) + PIO.close (#outfd p2); + PIO.close (#infd p1); + (* set the fds close on exec *) + PIO.setfd (#infd p2, PIO.FD.flags [PIO.FD.cloexec]); + PIO.setfd (#outfd p1, PIO.FD.flags [PIO.FD.cloexec]); + PROC{pid = pid, ins = ins, outs = outs} + end + + fun execute (cmd, argv) = executeInEnv (cmd, argv, PE.environ()) + + fun streamsOf (PROC{ins, outs, ...}) = (ins, outs) + + fun kill (PROC{pid, ...}, signal) = PP.kill (PP.K_PROC pid, signal) + + fun reapEvt (PROC{pid, ins, outs}) = ( + S.atomicBegin(); PM.addPid pid before S.atomicEnd()) + + val reap = Event.sync o reapEvt + + end (* structure Unix *) diff --git a/cml/src/Unix/os-io.sml b/cml/src/Unix/os-io.sml new file mode 100644 index 0000000..18e2af0 --- /dev/null +++ b/cml/src/Unix/os-io.sml @@ -0,0 +1,53 @@ +(* os-io.sml + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +structure OS_IO : OS_IO = + struct + structure IOM = IOManager + + type iodesc = OS.IO.iodesc + type iodesc_kind = OS.IO.iodesc_kind + + val hash = OS.IO.hash + val compare = OS.IO.compare + val kind = OS.IO.kind + + structure Kind = OS.IO.Kind + + type poll_desc = OS.IO.poll_desc + type poll_info = OS.IO.poll_info + + val pollDesc = OS.IO.pollDesc + val pollToIODesc = OS.IO.pollToIODesc + + exception Poll = OS.IO.Poll + + (* set polling events; if the polling operation is not appropriate + * for the underlying I/O device, then the Poll exception is raised. + *) + val pollIn = OS.IO.pollIn + val pollOut = OS.IO.pollOut + val pollPri = OS.IO.pollPri + + (* polling functions *) + local + fun timeOut t = CML.wrap(CML.timeOutEvt t, fn () => []) + fun ioEvt pd = CML.wrap(IOM.ioEvt pd, fn info => [info]) + in + fun pollEvt [pd] = ioEvt pd + | pollEvt _ = raise Fail "IO.OS.pollEvt not fully implemented" + fun poll ([pd], NONE) = CML.sync(ioEvt pd) + | poll ([pd], SOME t) = CML.select[timeOut t, ioEvt pd] + | poll _ = raise Fail "IO.OS.poll not fully implemented" + end + + (* check for conditions *) + val isIn = OS.IO.isIn + val isOut = OS.IO.isOut + val isPri = OS.IO.isPri + val infoToPollDesc = OS.IO.infoToPollDesc + + end diff --git a/cml/src/Unix/os-process.sml b/cml/src/Unix/os-process.sml new file mode 100644 index 0000000..a0801ec --- /dev/null +++ b/cml/src/Unix/os-process.sml @@ -0,0 +1,55 @@ +(* os-process.sml + * + * COPYRIGHT (c) 2008 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * The generic process control interface --- Unix implementation. + *) + +structure OS_Process : OS_PROCESS = + struct + + structure S = Scheduler + structure PM = ProcManager + structure CC = SMLofNJ.Cont + + structure P = OS.Process + structure PP = Posix.Process + + type status = P.status + + val success = P.success + val failure = P.failure + val isSuccess = P.isSuccess + +(** NOTE: we probably need to disable timer signals here **) + fun system' cmd = ( + S.stopTimer(); + case PP.fork() + of NONE => ( + PP.exec ("/bin/sh", ["sh", "-c", cmd]) + PP.exit 0w127) + | (SOME pid) => (S.restartTimer(); pid) + (* end case *)) + + fun systemEvt cmd = let + val pid = system' cmd + val evt = (S.atomicBegin(); PM.addPid pid before S.atomicEnd()) + in + Event.wrap (evt, + fn PP.W_EXITED => P.success + | _ => P.failure) + end + + val system = Event.sync o systemEvt + + fun atExit _ = raise Fail "OS.Process.atExit unimplemented" + fun exit sts = (S.atomicBegin(); CC.throw (!S.shutdownHook) (true, sts)) + fun terminate sts = (S.atomicBegin(); CC.throw (!S.shutdownHook) (false, sts)) + + val getEnv = P.getEnv + + (* should sleep be per-thread or for the whole system? *) + val sleep = Event.sync o TimeOut.timeOutEvt + + end diff --git a/cml/src/Unix/os.cm b/cml/src/Unix/os.cm new file mode 100644 index 0000000..6c431ca --- /dev/null +++ b/cml/src/Unix/os.cm @@ -0,0 +1,29 @@ +(* os.cm + * + * This defines a subgroup of the CML Unix group. We need the subgroup + * to avoid cycles. + *) + +Group + signature OS_IO + signature OS_PROCESS + signature OS + + structure OS_IO + structure OS_Process + structure ProcManager +is +#if defined (NEW_CM) + ../cml-sub-basis.cm + $cml/core-cml.cm +#else + ../core-cml.cm +#endif + ../OS/sources.cm + ../util/sources.cm + + (* io-manager.sml *) + os-io.sml + proc-manager.sml + os-process.sml + diff --git a/cml/src/Unix/os.sml b/cml/src/Unix/os.sml new file mode 100644 index 0000000..73ea555 --- /dev/null +++ b/cml/src/Unix/os.sml @@ -0,0 +1,20 @@ +(* os.sml + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + *) + +structure OS : OS = + struct + structure IO = OS_IO + structure Path = OS.Path + structure Process = OS_Process + structure FileSys = OS.FileSys (** may need to protect system calls *) + + type syserror = OS.syserror + + exception SysErr = OS.SysErr + + val errorName = OS.errorName + val errorMsg = OS.errorMsg + + end diff --git a/cml/src/Unix/posix-bin-io.sml b/cml/src/Unix/posix-bin-io.sml new file mode 100644 index 0000000..dbbaa85 --- /dev/null +++ b/cml/src/Unix/posix-bin-io.sml @@ -0,0 +1,9 @@ +(* bin-io.sml + * + * COPYRIGHT (c) 1996 AT&T Research. + * + * The implementation of the BinIO stack on Posix systems. + * + *) + +structure BinIO = BinIOFn (structure OSPrimIO = PosixBinPrimIO); diff --git a/cml/src/Unix/posix-bin-prim-io.sml b/cml/src/Unix/posix-bin-prim-io.sml new file mode 100644 index 0000000..b5b0075 --- /dev/null +++ b/cml/src/Unix/posix-bin-prim-io.sml @@ -0,0 +1,212 @@ +(* posix-bin-prim-io.sml + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This implements the UNIX version of the OS specific binary primitive + * IO structure. The Text IO version is implemented by a trivial translation + * of these operations (see posix-text-prim-io.sml). + *) + +structure PosixBinPrimIO : OS_PRIM_IO = + struct + + structure SV = SyncVar + + structure PrimIO = BinPrimIO + + structure Vec = Word8Vector + structure PF = Posix.FileSys + structure PIO = Posix.IO + + type file_desc = PF.file_desc + + val bufferSzB = 4096 + + fun isRegFile fd = PF.ST.isReg(PF.fstat fd) + + fun posFns (closed, fd) = if (isRegFile fd) + then let + val pos = ref(Position.fromInt 0) + fun getPos () = !pos + fun setPos p = ( + if !closed then raise IO.ClosedStream else (); + pos := PIO.lseek(fd,p,PIO.SEEK_SET)) + fun endPos () = ( + if !closed then raise IO.ClosedStream else (); + PF.ST.size(PF.fstat fd)) + fun verifyPos () = let + val curPos = PIO.lseek(fd, Position.fromInt 0, PIO.SEEK_CUR) + in + pos := curPos; curPos + end + in + ignore (verifyPos()); + { pos = pos, + getPos = SOME getPos, + setPos = SOME setPos, + endPos = SOME endPos, + verifyPos = SOME verifyPos + } + end + else { + pos = ref(Position.fromInt 0), + getPos = NONE, setPos = NONE, endPos = NONE, verifyPos = NONE + } + + fun mkReader {fd, name} = let + val iod = PF.fdToIOD fd + val lockMV = SV.mVarInit() + fun withLock f x = ( + SV.mTake lockMV; + (Syscall.doSyscall f x) before SV.mPut(lockMV, ())) + handle ex => (SV.mPut(lockMV, ()); raise ex) + fun withLock' NONE = NONE + | withLock' (SOME f) = SOME(withLock f) + val closed = ref false + val {pos, getPos, setPos, endPos, verifyPos} = posFns (closed, fd) + fun incPos k = pos := Position.+(!pos, Position.fromInt k) + fun blockWrap f x = ( + if !closed then raise IO.ClosedStream else (); + f x) + val readEvt = + IOManager.ioEvt(OS.IO.pollIn(Option.valOf(OS.IO.pollDesc iod))) + fun eventWrap f x = CML.withNack (fn nack => ( + if !closed then raise IO.ClosedStream else (); + case (SV.mTakePoll lockMV) + of NONE => let + val replV = SV.iVar() + in + CML.spawn(fn () => CML.select [ + CML.wrap (readEvt, fn _ => SV.iPut(replV, ())), + nack + ]); + CML.wrap(SV.iGetEvt replV, fn _ => f x) + end + | (SOME _) => CML.wrap (readEvt, + fn _ => (SV.mPut(lockMV, ()); f x)) + (* end case *))) + fun readVec n = let + val _ = CML.sync readEvt + val v = PIO.readVec(fd, n) + in + incPos (Vec.length v); v + end + fun readArr arg = let + val _ = CML.sync readEvt + val k = PIO.readArr(fd, arg) + in + incPos k; k + end + fun close () = if !closed + then () + else (closed:=true; PIO.close fd) + val isReg = isRegFile fd + fun avail () = if !closed + then SOME 0 + else if isReg + then SOME(PF.ST.size(PF.fstat fd) - !pos) + else NONE + in + BinPrimIO.RD{ + name = name, + chunkSize = bufferSzB, + readVec = withLock (blockWrap readVec), + readArr = withLock (blockWrap readArr), + readVecEvt = eventWrap readVec, + readArrEvt = eventWrap readArr, + avail = withLock avail, + getPos = withLock' getPos, + setPos = withLock' setPos, + endPos = withLock' endPos, + verifyPos = withLock' verifyPos, + close = withLock close, + ioDesc = SOME iod + } + end + + + fun openRd name = mkReader{ + fd = PF.openf(name, PIO.O_RDONLY, PF.O.flags[]), + name = name + } + + + fun mkWriter {fd, name, appendMode, chunkSize} = let + val iod = PF.fdToIOD fd + val lockMV = SV.mVarInit() + fun withLock f x = ( + SV.mTake lockMV; + (Syscall.doSyscall f x) before SV.mPut(lockMV, ())) + handle ex => (SV.mPut(lockMV, ()); raise ex) + fun withLock' NONE = NONE + | withLock' (SOME f) = SOME(withLock f) + val closed = ref false + val appendFS = PIO.O.flags(if appendMode then [PIO.O.append] else []) + fun updateStatus() = PIO.setfl(fd, appendFS) + fun ensureOpen () = if !closed then raise IO.ClosedStream else () + fun putV x = PIO.writeVec x + fun putA x = PIO.writeArr x + fun write put arg = (ensureOpen(); put(fd, arg)) + val writeEvt = + IOManager.ioEvt(OS.IO.pollOut(Option.valOf(OS.IO.pollDesc iod))) + fun eventWrap f x = CML.withNack (fn nack => ( + if !closed then raise IO.ClosedStream else (); + case (SV.mTakePoll lockMV) + of NONE => let + val replV = SV.iVar() + in + CML.spawn(fn () => CML.select [ + CML.wrap (writeEvt, fn _ => SV.iPut(replV, ())), + nack + ]); + CML.wrap(SV.iGetEvt replV, fn _ => f x) + end + | (SOME _) => CML.wrap (writeEvt, + fn _ => (SV.mPut(lockMV, ()); f x)) + (* end case *))) + fun close () = if !closed + then () + else (closed:=true; PIO.close fd) + val {pos, getPos, setPos, endPos, verifyPos} = posFns (closed, fd) + in + BinPrimIO.WR{ + name = name, + chunkSize = chunkSize, + writeVec = withLock (write putV), + writeArr = withLock (write putA), + writeVecEvt = eventWrap (write putV), + writeArrEvt = eventWrap (write putA), + getPos = withLock' getPos, + setPos = withLock' setPos, + endPos = withLock' endPos, + verifyPos = withLock' verifyPos, + close = withLock close, + ioDesc = SOME iod + } + end + + val standardMode = PF.S.flags[ (* mode 0666 *) + PF.S.irusr, PF.S.iwusr, + PF.S.irgrp, PF.S.iwgrp, + PF.S.iroth, PF.S.iwoth + ] + fun createFile (name, mode, flags) = + PF.createf(name, mode, flags, standardMode) + + fun openWr name = mkWriter{ + fd=createFile(name, PIO.O_WRONLY, PF.O.trunc), + name=name, + appendMode=false, + chunkSize=bufferSzB + } + + fun openApp name = mkWriter{ + fd = createFile(name, PIO.O_WRONLY, PF.O.append), + name = name, + appendMode = true, + chunkSize = bufferSzB + } + + end; (* PosixBinPrimIO *) + diff --git a/cml/src/Unix/posix-text-io.sml b/cml/src/Unix/posix-text-io.sml new file mode 100644 index 0000000..1bd80f1 --- /dev/null +++ b/cml/src/Unix/posix-text-io.sml @@ -0,0 +1,8 @@ +(* text-io.sml + * + * COPYRIGHT (c) 1996 AT&T Research. + * + * The implementation of the TextIO stack on Posix systems. + *) + +structure TextIO = TextIOFn (structure OSPrimIO = PosixTextPrimIO); diff --git a/cml/src/Unix/posix-text-prim-io.sml b/cml/src/Unix/posix-text-prim-io.sml new file mode 100644 index 0000000..c822bc4 --- /dev/null +++ b/cml/src/Unix/posix-text-prim-io.sml @@ -0,0 +1,121 @@ +(* posix-text-prim-io.sml + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This implements the UNIX version of the OS specific text primitive + * IO structure. It is implemented by a trivial translation of the + * binary operations (see posix-bin-prim-io.sml). + *) + +structure PosixTextPrimIO : sig + + include OS_PRIM_IO + + val stdIn : unit -> PrimIO.reader + val stdOut : unit -> PrimIO.writer + val stdErr : unit -> PrimIO.writer + + val strReader : string -> PrimIO.reader + + end = struct + + structure SV = SyncVar + structure PF = Posix.FileSys + structure BinPrimIO = PosixBinPrimIO + structure PrimIO = TextPrimIO + + type file_desc = PF.file_desc + + val bufferSzB = 4096 + + (* If Char.char is really Word8.word, then very efficient versions of + * translateIn and translateOut are possible: + *) + val translateIn : BinPrimIO.PrimIO.reader -> PrimIO.reader = Unsafe.cast + val translateOut : BinPrimIO.PrimIO.writer -> PrimIO.writer = Unsafe.cast + + fun openRd fname = translateIn(BinPrimIO.openRd fname) + fun openWr fname = translateOut(BinPrimIO.openWr fname) + fun openApp fname = translateOut(BinPrimIO.openApp fname) + + fun mkReader args = translateIn(BinPrimIO.mkReader args) + fun mkWriter args = translateOut(BinPrimIO.mkWriter args) + + fun stdIn () = mkReader{ + fd = PF.stdin, + name = "" + } + + fun stdOut () = mkWriter{ + fd = PF.stdout, + name = "", + appendMode = false, (* Bug! Should check! *) + chunkSize = bufferSzB + } + + fun stdErr () = mkWriter{ + fd = PF.stderr, + name = "", + appendMode = false, (* Bug! Should check! *) + chunkSize = bufferSzB + } + + fun strReader src = let + val lockMV = SV.mVarInit() + fun withLock f x = ( + SV.mTake lockMV; + f x before SV.mPut(lockMV, ())) + handle ex => (SV.mPut(lockMV, ()); raise ex) + val pos = ref 0 + val closed = ref false + fun checkClosed () = if !closed then raise IO.ClosedStream else () + val len = String.size src + val plen = Position.fromInt len + fun avail () = Position.fromInt(len - !pos) + fun readV n = let + val p = !pos + val m = Int.min(n, len-p) + in + checkClosed (); + pos := p+m; +(** NOTE: could use unchecked operations here **) + String.substring (src, p, m) + end + fun readA asl = let + val p = !pos + val (buf, i, n) = CharArraySlice.base asl + val m = Int.min (n, len - p) + in + checkClosed (); + pos := p+m; + CharArraySlice.copyVec + { src = CharVectorSlice.slice (src, p, SOME m), + dst = buf, di = i }; + m + end + fun getPos () = (checkClosed(); Position.fromInt (!pos)) + fun setPos p = + (checkClosed (); + if p < 0 andalso p > plen then raise Subscript + else pos := Position.toInt p) + in + PrimIO.RD{ + name = "", + chunkSize = len, + readVec = withLock readV, + readArr = withLock readA, + readVecEvt = withLock(CML.alwaysEvt o readV), + readArrEvt = withLock(CML.alwaysEvt o readA), + avail = SOME o avail, + getPos = SOME(withLock getPos), + setPos = SOME(withLock setPos), + endPos = SOME(withLock(fn () => (checkClosed(); plen))), + verifyPos = SOME(withLock getPos), + close = withLock(fn () => closed := true), + ioDesc = NONE + } + end + + end; (* PosixTextPrimIO *) + diff --git a/cml/src/Unix/proc-manager.sml b/cml/src/Unix/proc-manager.sml new file mode 100644 index 0000000..f967a00 --- /dev/null +++ b/cml/src/Unix/proc-manager.sml @@ -0,0 +1,60 @@ +(* proc-manager.sml + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * COPYRIGHT (c) 1989-1991 John H. Reppy + * + * Unix process management. + * + *) + +structure ProcManager : sig + + val addPid : Posix.Process.pid -> Posix.Process.exit_status Event.event + + val pollProcs : unit -> unit + + val anyWaiting : unit -> bool + + end = struct + + structure S = Scheduler + structure P_Proc = Posix.Process + structure R = Result + + datatype pid = PID of { + wait : P_Proc.exit_status R.result, + pid : P_Proc.pid + } + + val waiting = ref ([] : pid list) + + fun addPid pid = let + val rv = Result.result() + in + waiting := PID{wait = rv, pid = pid} :: !waiting; + Result.getEvt rv + end + + fun pollProcs () = let +(** NOTE: it would be more efficient to poll for any zombie process, + ** until there are no more. + **) + fun pollPid pid = P_Proc.waitpid_nh (P_Proc.W_CHILD pid, []) + fun pollItem (item as PID{wait, pid}) = ( + case (pollPid pid) + of SOME(_, sts) => ( + S.enqueueTmpThread (fn () => R.put(wait, sts)); + false) + | NONE => true + (* end case *)) + handle ex => ( + S.enqueueTmpThread (fn () => R.putExn (wait, ex)); + false) + in + waiting := List.filter pollItem (! waiting) + end + + fun anyWaiting () = (case !waiting of [] => false | _ => true) + + end + diff --git a/cml/src/Unix/run-cml.sml b/cml/src/Unix/run-cml.sml new file mode 100644 index 0000000..ca96ae7 --- /dev/null +++ b/cml/src/Unix/run-cml.sml @@ -0,0 +1,10 @@ +(* run-cml.sml + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * COPYRIGHT (c) 1989-1991 John H. Reppy + * + * Build the UNIX version of RunCML. + *) + +structure RunCML = RunCMLFn (UnixGlue); + diff --git a/cml/src/Unix/sources.cm b/cml/src/Unix/sources.cm new file mode 100644 index 0000000..ca4cd28 --- /dev/null +++ b/cml/src/Unix/sources.cm @@ -0,0 +1,40 @@ +Group + signature OS_IO + signature OS_PROCESS + signature UNIX + + structure Syscall + structure OS + structure BinPrimIO + structure BinIO + structure TextPrimIO + structure TextIO + structure Unix + structure RunCML +is +#if defined (NEW_CM) + ../cml-sub-basis.cm + $cml/core-cml.cm +#else + ../core-cml.cm +#endif + + ../IO/sources.cm + ../glue/sources.cm + os.cm + + syscall.sml + os.sml + posix-bin-prim-io.sml + posix-bin-io.sml + posix-text-prim-io.sml + posix-text-io.sml + unix-sig.sml +#if (SMLNJ_VERSION * 100 + SMLNJ_MINOR_VERSION >= 11033) + new-unix.sml +#else + unix.sml +#endif + unix-glue.sml + run-cml.sml + diff --git a/cml/src/Unix/syscall.sml b/cml/src/Unix/syscall.sml new file mode 100644 index 0000000..612a9eb --- /dev/null +++ b/cml/src/Unix/syscall.sml @@ -0,0 +1,43 @@ +(* syscall.sml + * + * COPYRIGHT (c) 1996 AT&T Research. + * + * Some system calls may take a long time to complete and may + * be interrupted by timer signals before they complete. This + * module implements mechanisms to protect against this problem. + *) + +structure Syscall : sig + + val isIntr : OS.syserror -> bool + + val doSyscall : ('a -> 'b) -> 'a -> 'b + (* do a system call, and restart if it is interrupted *) + + val doAtomicSyscall : ('a -> 'b) -> 'a -> 'b + (* do a system call with timer signals masked *) + + end = struct + + structure S = Scheduler + + fun isIntr err = (err = Posix.Error.intr) + + fun doAtomicSyscall f x = let + val _ = S.stopTimer() + val y = (f x) handle ex => (S.restartTimer(); raise ex) + in + S.restartTimer(); y + end + + fun doSyscall f x = let + fun try 0 = doAtomicSyscall f x + | try n = ((f x) + handle (ex as OS.SysErr(_, SOME err)) => + if isIntr err then try(n-1) else raise ex) + in + try 3 + end + + end; + diff --git a/cml/src/Unix/unix-glue.sml b/cml/src/Unix/unix-glue.sml new file mode 100644 index 0000000..71df7fe --- /dev/null +++ b/cml/src/Unix/unix-glue.sml @@ -0,0 +1,30 @@ +(* unix-glue.sml + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * COPYRIGHT (c) 1989-1991 John H. Reppy + * + * The Glue for the UNIX version of CML. + *) + +structure UnixGlue : OS_GLUE = + struct + + fun init () = TimeOut.reset() + + fun pollOS () = ( + TimeOut.pollTime(); + IOManager.pollIO(); + ProcManager.pollProcs()) + + fun pause () = (case TimeOut.anyWaiting() + of NONE => if (IOManager.anyWaiting() orelse ProcManager.anyWaiting()) + then (Signals.pause(); true) + else false +(** NOTE: eventually, we should just go to sleep for the specified time **) + | (SOME t) => (Signals.pause(); true) + (* end case *)) + + fun shutdown () = TimeOut.reset() + + end; + diff --git a/cml/src/Unix/unix-sig.sml b/cml/src/Unix/unix-sig.sml new file mode 100644 index 0000000..68490d6 --- /dev/null +++ b/cml/src/Unix/unix-sig.sml @@ -0,0 +1,60 @@ +(* unix-sig.sml + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * + * This is a CML version of the UNIX interface that is provided by SML/NJ. + *) + +signature UNIX = + sig + type proc + + (* executeInEnv (path, args, env) + * forks/execs new process given by path + * The new process will have environment env, and + * arguments args prepended by the last arc in path + * (following the Unix convention that the first argument + * is the command name). + * Returns an abstract type proc, which represents + * the child process plus streams attached to the + * the child process stdin/stdout. + * + * Simple command searching can be obtained by using + * executeInEnv ("/bin/sh", "-c"::args, env) + *) + val executeInEnv : string * string list * string list -> proc + + (* execute (path, args) + * = executeInEnv (path, args, Posix.ProcEnv.environ()) + *) + val execute : string * string list -> proc + + (* streamsOf proc + * returns an instream and outstream used to read + * from and write to the stdout and stdin of the + * executed process. + * + * The underlying files are set to be close-on-exec. + *) + val streamsOf : proc -> TextIO.instream * TextIO.outstream + + (* reap proc + * This closes the associated streams and waits for the + * child process to finish, returns its exit status. + * + * Note that even if the child process has already exited, + * so that reap returns immediately, + * the parent process should eventually reap it. Otherwise, + * the process will remain a zombie and take a slot in the + * process table. + *) + val reapEvt : proc -> Posix.Process.exit_status CML.event + val reap : proc -> Posix.Process.exit_status + + (* kill (proc, signal) + * sends the Posix signal to the associated process. + *) + val kill : proc * Posix.Signal.signal -> unit + + end + diff --git a/cml/src/Unix/unix.sml b/cml/src/Unix/unix.sml new file mode 100644 index 0000000..d771974 --- /dev/null +++ b/cml/src/Unix/unix.sml @@ -0,0 +1,119 @@ +(* unix.sml + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * + * This is a CML version of the UNIX interface that is provided by SML/NJ. + *) + +structure Unix : UNIX = + struct + + structure S = Scheduler + structure PM = ProcManager + + structure PP = Posix.Process + structure PE = Posix.ProcEnv + structure PF = Posix.FileSys + structure PIO = Posix.IO + structure SS = Substring + + fun protect f x = let + val _ = Signals.maskSignals Signals.MASKALL + val y = (f x) handle ex => + (Signals.unmaskSignals Signals.MASKALL; raise ex) + in + Signals.unmaskSignals Signals.MASKALL; y + end + + fun fdReader (name : string, fd : PIO.file_desc) = + PosixTextPrimIO.mkReader { + name = name, + fd = fd + } + + fun fdWriter (name, fd) = + PosixTextPrimIO.mkWriter { + appendMode = false, + name = name, + chunkSize=4096, + fd = fd + } + + fun openOutFD (name, fd) = + TextIO.mkOutstream ( + TextIO.StreamIO.mkOutstream ( + fdWriter (name, fd), IO.BLOCK_BUF)) + + fun openInFD (name, fd) = + TextIO.mkInstream ( + TextIO.StreamIO.mkInstream ( + fdReader (name, fd), NONE)) + + datatype proc = PROC of { + pid : PP.pid, + ins : TextIO.instream, + outs : TextIO.outstream + } + + + fun executeInEnv (cmd, argv, env) = let + val p1 = PIO.pipe () + val p2 = PIO.pipe () + fun closep () = ( + PIO.close (#outfd p1); + PIO.close (#infd p1); + PIO.close (#outfd p2); + PIO.close (#infd p2) + ) + val base = SS.string(SS.taker (fn c => c <> #"/") (SS.all cmd)) + fun startChild () = (case protect PP.fork () + of SOME pid => pid (* parent *) + | NONE => let + val oldin = #infd p1 + val newin = Posix.FileSys.wordToFD 0w0 + val oldout = #outfd p2 + val newout = Posix.FileSys.wordToFD 0w1 + in + PIO.close (#outfd p1); + PIO.close (#infd p2); + if (oldin = newin) then () + else ( + PIO.dup2{old = oldin, new = newin}; + PIO.close oldin); + if (oldout = newout) then () + else ( + PIO.dup2{old = oldout, new = newout}; + PIO.close oldout); + PP.exece (cmd, base::argv, env) + end + (* end case *)) + val _ = TextIO.flushOut TextIO.stdOut + val pid = ( + S.stopTimer(); + startChild () before + S.restartTimer()) + handle ex => (S.restartTimer(); closep(); raise ex) + val ins = openInFD (base^"_exec_in", #infd p2) + val outs = openOutFD (base^"_exec_out", #outfd p1) + in + (* close the child-side fds *) + PIO.close (#outfd p2); + PIO.close (#infd p1); + (* set the fds close on exec *) + PIO.setfd (#infd p2, PIO.FD.flags [PIO.FD.cloexec]); + PIO.setfd (#outfd p1, PIO.FD.flags [PIO.FD.cloexec]); + PROC{pid = pid, ins = ins, outs = outs} + end + + fun execute (cmd, argv) = executeInEnv (cmd, argv, PE.environ()) + + fun streamsOf (PROC{ins, outs, ...}) = (ins, outs) + + fun kill (PROC{pid, ...}, signal) = PP.kill (PP.K_PROC pid, signal) + + fun reapEvt (PROC{pid, ins, outs}) = ( + S.atomicBegin(); PM.addPid pid before S.atomicEnd()) + + val reap = Event.sync o reapEvt + + end (* structure Unix *) diff --git a/cml/src/Win32/os-io.sml b/cml/src/Win32/os-io.sml new file mode 100644 index 0000000..18e2af0 --- /dev/null +++ b/cml/src/Win32/os-io.sml @@ -0,0 +1,53 @@ +(* os-io.sml + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +structure OS_IO : OS_IO = + struct + structure IOM = IOManager + + type iodesc = OS.IO.iodesc + type iodesc_kind = OS.IO.iodesc_kind + + val hash = OS.IO.hash + val compare = OS.IO.compare + val kind = OS.IO.kind + + structure Kind = OS.IO.Kind + + type poll_desc = OS.IO.poll_desc + type poll_info = OS.IO.poll_info + + val pollDesc = OS.IO.pollDesc + val pollToIODesc = OS.IO.pollToIODesc + + exception Poll = OS.IO.Poll + + (* set polling events; if the polling operation is not appropriate + * for the underlying I/O device, then the Poll exception is raised. + *) + val pollIn = OS.IO.pollIn + val pollOut = OS.IO.pollOut + val pollPri = OS.IO.pollPri + + (* polling functions *) + local + fun timeOut t = CML.wrap(CML.timeOutEvt t, fn () => []) + fun ioEvt pd = CML.wrap(IOM.ioEvt pd, fn info => [info]) + in + fun pollEvt [pd] = ioEvt pd + | pollEvt _ = raise Fail "IO.OS.pollEvt not fully implemented" + fun poll ([pd], NONE) = CML.sync(ioEvt pd) + | poll ([pd], SOME t) = CML.select[timeOut t, ioEvt pd] + | poll _ = raise Fail "IO.OS.poll not fully implemented" + end + + (* check for conditions *) + val isIn = OS.IO.isIn + val isOut = OS.IO.isOut + val isPri = OS.IO.isPri + val infoToPollDesc = OS.IO.infoToPollDesc + + end diff --git a/cml/src/Win32/os-process.sml b/cml/src/Win32/os-process.sml new file mode 100644 index 0000000..ea9ee65 --- /dev/null +++ b/cml/src/Win32/os-process.sml @@ -0,0 +1,55 @@ +(* os-process.sml + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * The generic process control interface. + * Modified to work for Win32 (no reliance on Posix.Process) + *) + +structure OS_Process : OS_PROCESS = + struct + + structure S = Scheduler + structure PM = ProcManager + structure CC = SMLofNJ.Cont + + structure P = OS.Process + structure WP = Win32Process + + type status = P.status + + val success = P.success + val failure = P.failure + val isSuccess = P.isSuccess + +(** NOTE: we probably need to disable timer signals here **) + fun system' cmd = let + val _ = S.stopTimer () + val pid = WP.createProcess (cmd) + val _ = S.restartTimer () + in + pid + end + + fun systemEvt cmd = let + val pid = system' cmd + val evt = (S.atomicBegin(); PM.addPid pid before S.atomicEnd()) + in + Event.wrap (evt, + fn WP.SUCCESS => P.success + | _ => P.failure) + end + + val system = Event.sync o systemEvt + + fun atExit _ = raise Fail "OS.Process.atExit unimplemented" + fun exit sts = (S.atomicBegin(); CC.throw (!S.shutdownHook) (true, sts)) + fun terminate sts = (S.atomicBegin(); CC.throw (!S.shutdownHook) (false, sts)) + + val getEnv = P.getEnv + + (* should sleep be per-thread or for the whole system? *) + val sleep = Event.sync o TimeOut.timeOutEvt + + end diff --git a/cml/src/Win32/os.cm b/cml/src/Win32/os.cm new file mode 100644 index 0000000..44d867f --- /dev/null +++ b/cml/src/Win32/os.cm @@ -0,0 +1,32 @@ +(* os.cm + * + * This defines a subgroup of the CML Unix group. We need the subgroup + * to avoid cycles. + *) + +Group (../cml-internal.cm) + signature OS_IO + signature OS_PROCESS + signature OS + + structure OS_IO + structure OS_Process + + structure ProcManager +is +#if defined (NEW_CM) + ../cml-sub-basis.cm + $cml/core-cml.cm +#else + ../core-cml.cm +#endif + ../OS/sources.cm + ../util/sources.cm + + (* io-manager.sml *) + os-io.sml + proc-manager.sml + os-process.sml + + (* added file to simulate the Posix.Process structure *) + win32-process.sml diff --git a/cml/src/Win32/os.sml b/cml/src/Win32/os.sml new file mode 100644 index 0000000..f499dd7 --- /dev/null +++ b/cml/src/Win32/os.sml @@ -0,0 +1,21 @@ +(* os.sml + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +structure OS : OS = + struct + structure IO = OS_IO + structure Path = OS.Path + structure Process = OS_Process + structure FileSys = OS.FileSys (** may need to protect system calls *) + + type syserror = OS.syserror + + exception SysErr = OS.SysErr + + val errorName = OS.errorName + val errorMsg = OS.errorMsg + + end diff --git a/cml/src/Win32/proc-manager.sml b/cml/src/Win32/proc-manager.sml new file mode 100644 index 0000000..9fd9ff3 --- /dev/null +++ b/cml/src/Win32/proc-manager.sml @@ -0,0 +1,60 @@ +(* proc-manager.sml + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Win32 process management (from its Unix counterpart) + * + *) + +structure ProcManager : sig + + val addPid : Win32Process.pid -> Win32Process.exit_status Event.event + + val pollProcs : unit -> unit + + val anyWaiting : unit -> bool + + end = struct + + structure S = Scheduler + structure WP = Win32Process + structure R = Result + + datatype pid = PID of { + wait : WP.exit_status R.result, + pid : WP.pid + } + + val waiting = ref ([] : pid list) + + fun addPid pid = let + val rv = Result.result() + in + waiting := PID{wait = rv, pid = pid} :: !waiting; + Result.getEvt rv + end + + fun pollProcs () = let +(** NOTE: it would be more efficient to poll for any zombie process, + ** until there are no more. + **) + fun pollPid pid = WP.waitForSingleObject pid + fun pollItem (item as PID{wait, pid}) = ( + case (pollPid pid) + of SOME(sts) => ( + S.enqueueTmpThread (fn () => R.put(wait, sts)); + false) + | NONE => true + (* end case *)) + handle ex => ( + S.enqueueTmpThread (fn () => R.putExn (wait, ex)); + false) + in + waiting := List.filter pollItem (! waiting) + end + + fun anyWaiting () = (case !waiting of [] => false | _ => true) + + end + diff --git a/cml/src/Win32/run-cml.sml b/cml/src/Win32/run-cml.sml new file mode 100644 index 0000000..043c36c --- /dev/null +++ b/cml/src/Win32/run-cml.sml @@ -0,0 +1,10 @@ +(* run-cml.sml + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Build the Win32 version of RunCML. + *) + +structure RunCML = RunCMLFn (Win32Glue); + diff --git a/cml/src/Win32/sources.cm b/cml/src/Win32/sources.cm new file mode 100644 index 0000000..2103a57 --- /dev/null +++ b/cml/src/Win32/sources.cm @@ -0,0 +1,34 @@ +Group (../cml-internal.cm) + signature OS_IO + signature OS_PROCESS + + structure Syscall + structure OS + structure BinPrimIO + structure BinIO + structure TextPrimIO + structure TextIO + structure RunCML +is +#if defined (NEW_CM) + ../cml-sub-basis.cm + $cml/core-cml.cm +#else + ../core-cml.cm +#endif + + ../IO/sources.cm + ../glue/sources.cm + os.cm + + syscall.sml + os.sml + + win32-bin-prim-io.sml + win32-bin-io.sml + win32-text-prim-io.sml + win32-text-io.sml + + win32-glue.sml + + run-cml.sml diff --git a/cml/src/Win32/syscall.sml b/cml/src/Win32/syscall.sml new file mode 100644 index 0000000..a456591 --- /dev/null +++ b/cml/src/Win32/syscall.sml @@ -0,0 +1,44 @@ +(* syscall.sml + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Some system calls may take a long time to complete and may + * be interrupted by timer signals before they complete. This + * module implements mechanisms to protect against this problem. + *) + +structure Syscall : sig + + val isIntr : OS.syserror -> bool + + val doSyscall : ('a -> 'b) -> 'a -> 'b + (* do a system call, and restart if it is interrupted *) + + val doAtomicSyscall : ('a -> 'b) -> 'a -> 'b + (* do a system call with timer signals masked *) + + end = struct + + structure S = Scheduler + + fun isIntr err = false (* TOFIX: this'll break... (err = Posix.Error.intr) *) + + fun doAtomicSyscall f x = let + val _ = S.stopTimer() + val y = (f x) handle ex => (S.restartTimer(); raise ex) + in + S.restartTimer(); y + end + + fun doSyscall f x = let + fun try 0 = doAtomicSyscall f x + | try n = ((f x) + handle (ex as OS.SysErr(_, SOME err)) => + if isIntr err then try(n-1) else raise ex) + in + try 3 + end + + end; + diff --git a/cml/src/Win32/win32-bin-io.sml b/cml/src/Win32/win32-bin-io.sml new file mode 100644 index 0000000..5920d8a --- /dev/null +++ b/cml/src/Win32/win32-bin-io.sml @@ -0,0 +1,7 @@ +(* win32-bin-io.sml + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +structure BinIO = BinIOFn (structure OSPrimIO = Win32BinPrimIO); diff --git a/cml/src/Win32/win32-bin-prim-io.sml b/cml/src/Win32/win32-bin-prim-io.sml new file mode 100644 index 0000000..d6598e4 --- /dev/null +++ b/cml/src/Win32/win32-bin-prim-io.sml @@ -0,0 +1,226 @@ +(* win32-bin-prim-io.sml + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This implements the Win32 version of the OS specific binary primitive + * IO structure. The Text IO version is implemented by a trivial translation + * of these operations (see nt-text-prim-io.sml). + *) + +structure Win32BinPrimIO : OS_PRIM_IO = + struct + + structure SV = SyncVar + + structure PrimIO = BinPrimIO + + structure W32FS = Win32.FileSys + structure W32IO = Win32.IO + structure W32G = Win32.General + + structure V = Word8Vector + + type file_desc = W32G.hndl + + val bufferSzB = 4096 + + val seek = W32IO.setFilePointer' + + fun posFns iod = + if (OS.IO.kind iod = OS.IO.Kind.file) + then let + val pos : Position.int ref = ref 0 + fun getPos () : Position.int = !pos + fun setPos p = + pos := seek (W32FS.IODToHndl iod, p, W32IO.FILE_BEGIN) + fun endPos () : Position.int = W32FS.getFileSize (W32FS.IODToHndl iod) + fun verifyPos () = ( + pos := seek (W32FS.IODToHndl iod, 0, W32IO.FILE_CURRENT); + !pos) + in + ignore (verifyPos()); + { pos=pos, + getPos=SOME getPos, + setPos=SOME setPos, + endPos=SOME endPos, + verifyPos=SOME verifyPos + } + end + else { pos=ref 0, getPos=NONE, setPos=NONE, endPos=NONE, verifyPos=NONE } + + fun addCheck f (SOME g) = SOME (f g) + | addCheck _ NONE = NONE + + fun mkReader {fd, name} = let + val iod = W32FS.hndlToIOD fd + val lockMV = SV.mVarInit() + fun withLock f x = ( + SV.mTake lockMV; + (Syscall.doSyscall f x) before SV.mPut(lockMV, ())) + handle ex => (SV.mPut(lockMV, ()); raise ex) + fun withLock' NONE = NONE + | withLock' (SOME f) = SOME(withLock f) + val closed = ref false + val {pos, getPos, setPos, endPos, verifyPos} = posFns iod + fun incPos k = pos := Position.+(!pos, Position.fromInt k) + fun blockWrap f x = ( + if !closed then raise IO.ClosedStream else (); + f x) + val readEvt = + IOManager.ioEvt(OS.IO.pollIn(Option.valOf(OS.IO.pollDesc iod))) + fun eventWrap f x = CML.withNack (fn nack => ( + if !closed then raise IO.ClosedStream else (); + case (SV.mTakePoll lockMV) + of NONE => let + val replV = SV.iVar() + in + CML.spawn(fn () => CML.select [ + CML.wrap (readEvt, fn _ => SV.iPut(replV, ())), + nack + ]); + CML.wrap(SV.iGetEvt replV, fn _ => f x) + end + | (SOME _) => CML.wrap (readEvt, + fn _ => (SV.mPut(lockMV, ()); f x)) + (* end case *))) + fun readVec n = let + val _ = CML.sync readEvt + val v = W32IO.readVec (W32FS.IODToHndl iod,n) + in + incPos (V.length v); v + end + fun readArr arg = let + val _ = CML.sync readEvt + val k = W32IO.readArr(W32FS.IODToHndl iod,arg) + in + incPos k; k + end + fun close () = if !closed + then () + else (closed:=true; W32IO.close (W32FS.IODToHndl iod)) + fun avail () = if !closed + then SOME 0 + else SOME(Position.-(W32FS.getFileSize (W32FS.IODToHndl iod), !pos)) + in + PrimIO.RD{ + name = name, + chunkSize = bufferSzB, + readVec = withLock (blockWrap readVec), + readArr = withLock (blockWrap readArr), + readVecEvt = eventWrap readVec, + readArrEvt = eventWrap readArr, + avail = withLock avail, + getPos = withLock' getPos, + setPos = withLock' setPos, + endPos = withLock' endPos, + verifyPos = withLock' verifyPos, + close = withLock close, + ioDesc = SOME iod + } + end + + + val shareAll = W32G.Word.orb(W32IO.FILE_SHARE_READ, W32IO.FILE_SHARE_WRITE) + + fun checkHndl name h = if W32G.isValidHandle h + then h + else raise OS.SysErr("win32-bin-prim-io:checkHndl: "^name^": failed",NONE) + + fun openRd name = mkReader{ + fd = checkHndl "openRd" (W32IO.createFile { + name=name, + access=W32IO.GENERIC_READ, + share=shareAll, + mode=W32IO.OPEN_EXISTING, + attrs=0wx0 + }), + name = name + } + + fun mkWriter {fd, name, appendMode, chunkSize} = let + val iod = W32FS.hndlToIOD fd + val lockMV = SV.mVarInit() + fun withLock f x = ( + SV.mTake lockMV; + (Syscall.doSyscall f x) before SV.mPut(lockMV, ())) + handle ex => (SV.mPut(lockMV, ()); raise ex) + fun withLock' NONE = NONE + | withLock' (SOME f) = SOME(withLock f) + val closed = ref false + val {pos, getPos, setPos, endPos, verifyPos} = posFns iod + fun incPos k = pos := Position.+(!pos, Position.fromInt k) + fun ensureOpen () = if !closed then raise IO.ClosedStream else () + fun putV x = W32IO.writeVec x + fun putA x = W32IO.writeArr x + fun write put arg = let + val _ = ensureOpen() + val v = put(W32FS.IODToHndl iod, arg) + in + incPos v; v + end + val writeEvt = + IOManager.ioEvt(OS.IO.pollOut(Option.valOf(OS.IO.pollDesc iod))) + fun eventWrap f x = CML.withNack (fn nack => ( + if !closed then raise IO.ClosedStream else (); + case (SV.mTakePoll lockMV) + of NONE => let + val replV = SV.iVar() + in + CML.spawn(fn () => CML.select [ + CML.wrap (writeEvt, fn _ => SV.iPut(replV, ())), + nack + ]); + CML.wrap(SV.iGetEvt replV, fn _ => f x) + end + | (SOME _) => CML.wrap (writeEvt, + fn _ => (SV.mPut(lockMV, ()); f x)) + (* end case *))) + fun close () = if !closed + then () + else (closed:=true; W32IO.close (W32FS.IODToHndl iod)) + in + PrimIO.WR{ + name = name, + chunkSize = chunkSize, + writeVec = withLock (write putV), + writeArr = withLock (write putA), + writeVecEvt = eventWrap (write putV), + writeArrEvt = eventWrap (write putA), + getPos = withLock' getPos, + setPos = withLock' setPos, + endPos = withLock' endPos, + verifyPos = withLock' verifyPos, + close = withLock close, + ioDesc = SOME iod + } + end + + fun openWr name = mkWriter{ + fd = checkHndl "openWr" (W32IO.createFile{ + name=name, + access=W32IO.GENERIC_WRITE, + share=shareAll, + mode=W32IO.CREATE_ALWAYS, + attrs=W32FS.FILE_ATTRIBUTE_NORMAL + }), + name = name, + appendMode = false, + chunkSize = bufferSzB + } + + fun openApp name = let + val h = checkHndl "openApp" (W32IO.createFile { + name=name, + access=W32IO.GENERIC_WRITE, + share=shareAll, + mode=W32IO.OPEN_ALWAYS, + attrs=W32FS.FILE_ATTRIBUTE_NORMAL + }) + val _ = seek (h, 0, W32IO.FILE_END) + in + mkWriter{fd = h, name = name, appendMode = true, chunkSize = bufferSzB} + end + + end; (* Win32BinPrimIO *) + diff --git a/cml/src/Win32/win32-glue.sml b/cml/src/Win32/win32-glue.sml new file mode 100644 index 0000000..15f4ca4 --- /dev/null +++ b/cml/src/Win32/win32-glue.sml @@ -0,0 +1,30 @@ +(* win32-glue.sml + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * The glue for the Win32 version of CML. + *) + +structure Win32Glue : OS_GLUE = + struct + + fun init () = TimeOut.reset() + + fun pollOS () = ( + TimeOut.pollTime(); + IOManager.pollIO(); + ProcManager.pollProcs()) + + fun pause () = (case TimeOut.anyWaiting() + of NONE => if (IOManager.anyWaiting() orelse ProcManager.anyWaiting()) + then (Signals.pause(); true) + else false +(** NOTE: eventually, we should just go to sleep for the specified time **) + | (SOME t) => (Signals.pause(); true) + (* end case *)) + + fun shutdown () = TimeOut.reset() + + end; + diff --git a/cml/src/Win32/win32-os-io.cm b/cml/src/Win32/win32-os-io.cm new file mode 100644 index 0000000..9fd5a96 --- /dev/null +++ b/cml/src/Win32/win32-os-io.cm @@ -0,0 +1,6 @@ + +Group + structure Win32OSIO +is + + win32-os-io.sml diff --git a/cml/src/Win32/win32-os-io.sml b/cml/src/Win32/win32-os-io.sml new file mode 100644 index 0000000..e672975 --- /dev/null +++ b/cml/src/Win32/win32-os-io.sml @@ -0,0 +1,82 @@ +(* win32-os-io.sml + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Replacement of OS.IO structure for Win32. + * It implements a simple type of polling for file objects. + *) + +structure Win32OSIO = + struct + structure W32G = Win32.General + structure W32FS = Win32.FileSys + type word32 = Word32.word + + exception SysErr = OS.SysErr + +(* should this be = IODesc of Handle.t ref | SockDesc of int *) + datatype iodesc = IODesc of W32G.hndl ref (* OS.IO.iodesc *) + + fun hash (IODesc(ref h)) = Handle.hash h + + fun compare (IODesc(ref ha), IODesc(ref hb)) = Handle.compare(ha, hb) + + datatype iodesc_kind = K of string + + structure Kind = + struct + val file = K "FILE" + val dir = K "DIR" + val symlink = K "LINK" + val tty = K "TTY" + val pipe = K "PIPE" + val socket = K "SOCK" + val device = K "DEV" + end + + fun kind (IODesc (ref h)) = (case W32FS.getFileAttributes' h + of NONE => K "UNKNOWN" + | SOME w => if W32FS.isRegularFile h + then Kind.file + else Kind.dir + (* end case *)) + + (* no win32 polling devices for now *) + val noPolling = "polling not implemented for win32 for this device/type" + + datatype poll_desc = PollDesc of iodesc + datatype poll_info = PollInfo of poll_desc + + fun pollDesc id = SOME (PollDesc id) (* NONE *) + fun pollToIODesc (PollDesc pd) = pd (* raise Fail("pollToIODesc: "^noPolling) *) + exception Poll + + fun pollIn pd = pd (* raise Fail("pollIn: "^noPolling) *) + fun pollOut pd = pd (* raise Fail("pollOut: "^noPolling) *) + fun pollPri pd = pd (* raise Fail("pollPri: "^noPolling) *) + + local + val poll' : (word32 list * (Int32.int * int) option -> word32 list) = + Unsafe.CInterface.c_function "WIN32-IO" "poll" + fun toPollInfo (w) = PollInfo (PollDesc (IODesc (ref w))) + fun fromPollDesc (PollDesc (IODesc (ref w))) = w + in + fun poll (pdl,t) = let + val timeout = (case t + of (SOME t) => + SOME(Time.toSeconds t, Int.fromLarge (Time.toMicroseconds t)) + | NONE => NONE) + val info = poll' (List.map fromPollDesc pdl,timeout) + in + List.map toPollInfo info + end + end (* end local *) + + fun isIn pd = raise Fail("isIn: "^noPolling) + fun isOut pd = raise Fail("isOut: "^noPolling) + fun isPri pd = raise Fail("isPri: "^noPolling) + + fun infoToPollDesc (PollInfo pd) = pd (* raise Fail("infoToPollDesc: "^noPolling) *) + + end diff --git a/cml/src/Win32/win32-process.sml b/cml/src/Win32/win32-process.sml new file mode 100644 index 0000000..a917233 --- /dev/null +++ b/cml/src/Win32/win32-process.sml @@ -0,0 +1,32 @@ +(* win32-process.sml + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Simulate some of the Posix.Process structure on Win32 machines + *) + +structure Win32Process : + sig + + type pid + datatype exit_status = SUCCESS | FAIL + val createProcess : string -> pid + val waitForSingleObject : pid -> exit_status option + + end = struct + + type pid = Word32.word (* actually, a handle *) + datatype exit_status = SUCCESS | FAIL + + fun cfun x = Unsafe.CInterface.c_function "WIN32-PROCESS" x + + val createProcess : string -> pid = cfun "create_process" + + val wait_for_single_object : pid -> pid option = cfun "wait_for_single_object" + fun waitForSingleObject (p : pid) = (case (wait_for_single_object p) + of NONE => NONE + | SOME (v) => if v=0w0 then SOME FAIL else SOME SUCCESS + (* end of case *)) + + end diff --git a/cml/src/Win32/win32-text-io.sml b/cml/src/Win32/win32-text-io.sml new file mode 100644 index 0000000..3c6c2a2 --- /dev/null +++ b/cml/src/Win32/win32-text-io.sml @@ -0,0 +1,9 @@ +(* win32-text-io.sml + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * The implementation of the TextIO stack on Win32 systems. + *) + +structure TextIO = TextIOFn (structure OSPrimIO = Win32TextPrimIO); diff --git a/cml/src/Win32/win32-text-prim-io.sml b/cml/src/Win32/win32-text-prim-io.sml new file mode 100644 index 0000000..712d8b7 --- /dev/null +++ b/cml/src/Win32/win32-text-prim-io.sml @@ -0,0 +1,141 @@ +(* win32-text-prim-io.sml + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This implements the Win32 version of the OS specific text primitive + * IO structure. It is implemented by a trivial translation of the + * binary operations (see win32-bin-prim-io.sml). + *) + +structure Win32TextPrimIO : sig + + include OS_PRIM_IO + + val stdIn : unit -> PrimIO.reader + val stdOut : unit -> PrimIO.writer + val stdErr : unit -> PrimIO.writer + + val strReader : string -> PrimIO.reader + + end = struct + + structure SV = SyncVar + structure BinPrimIO = Win32BinPrimIO + structure PrimIO = TextPrimIO + + structure W32FS = Win32.FileSys + structure W32IO = Win32.IO + structure W32G = Win32.General + + structure V = Word8Vector + + type file_desc = W32G.hndl + + val bufferSzB = 4096 + + (* If Char.char is really Word8.word, then very efficient versions of + * translateIn and translateOut are possible: + *) + val translateIn : BinPrimIO.PrimIO.reader -> PrimIO.reader = Unsafe.cast + val translateOut : BinPrimIO.PrimIO.writer -> PrimIO.writer = Unsafe.cast + + fun openRd fname = translateIn(BinPrimIO.openRd fname) + fun openWr fname = translateOut(BinPrimIO.openWr fname) + fun openApp fname = translateOut(BinPrimIO.openApp fname) + + fun mkReader args = translateIn(BinPrimIO.mkReader args) + fun mkWriter args = translateOut(BinPrimIO.mkWriter args) + + fun stdIn () = let + val h = W32IO.getStdHandle(W32IO.STD_INPUT_HANDLE) + in + if W32G.isValidHandle h + then mkReader{fd = h, name = ""} + else raise OS.SysErr("Win32TextPrimIO: can't get stdin",NONE) + end + + fun stdOut () = let + val h = W32IO.getStdHandle(W32IO.STD_OUTPUT_HANDLE) + in + if W32G.isValidHandle h + then mkWriter{ + fd = h, + name = "", + appendMode = true, + chunkSize = bufferSzB + } + else raise OS.SysErr("Win32TextPrimIO: can't get stdout",NONE) + end + + fun stdErr () = let + val h = W32IO.getStdHandle(W32IO.STD_ERROR_HANDLE) + in + if W32G.isValidHandle h + then mkWriter{ + fd = h, + name = "", + appendMode = true, + chunkSize = bufferSzB + } + else raise OS.SysErr("Win32TextPrimIO: can't get stderr",NONE) + end + + fun strReader src = let + val lockMV = SV.mVarInit() + fun withLock f x = ( + SV.mTake lockMV; + f x before SV.mPut(lockMV, ())) + handle ex => (SV.mPut(lockMV, ()); raise ex) + val pos = ref 0 + val closed = ref false + fun checkClosed () = if !closed then raise IO.ClosedStream else () + val len = String.size src + val plen = Position.fromInt len + fun avail () = SOME(Position.fromInt (len - !pos)) + fun readV n = let + val p = !pos + val m = Int.min(n, len-p) + in + checkClosed (); + pos := p+m; +(** NOTE: could use unchecked operations here **) + String.substring (src, p, m) + end + fun readA asl = let + val (buf, i, n) = CharArraySlice.base asl + val p = !pos + val m = Int.min(n, len-p) + in + checkClosed (); + pos := p+m; + CharArraySlice.copyVec { src = CharVectorSlice.slice + (src,p,SOME len), + dst = buf, di = i }; + m + end + fun getPos () = (checkClosed(); Position.fromInt (!pos)) + in + PrimIO.RD{ + name = "", + chunkSize = len, + readVec = withLock readV, + readArr = withLock readA, + readVecEvt = withLock(CML.alwaysEvt o readV), + readArrEvt = withLock(CML.alwaysEvt o readA), + avail = avail, + getPos = SOME(withLock getPos), + setPos = SOME(withLock(fn p => ( + checkClosed(); + if (p < 0) orelse (plen < p) + then raise Subscript + else (); + pos := Position.toInt p))), + endPos = SOME(withLock(fn () => (checkClosed(); plen))), + verifyPos = SOME(withLock getPos), + close = withLock(fn () => closed := true), + ioDesc = NONE + } + end + + end; (* Win32TextPrimIO *) diff --git a/cml/src/basis-sub-basis.cm b/cml/src/basis-sub-basis.cm new file mode 100644 index 0000000..ef09d54 --- /dev/null +++ b/cml/src/basis-sub-basis.cm @@ -0,0 +1,36 @@ +(* basis-sub-basis.cm + * + * This group provides a restricted view on $/basis.cm (much like + * cml-sub-basis.cm but even more restrictive). It is used to build + * CML's own basis.cm without name conflicts. + * + * This file is for use with the new CM. The old CM cannot process it. + *) +Group + + library($/basis.cm) + + - (signature OS + signature OS_IO + signature OS_PROCESS + signature PRIM_IO + + structure OS + structure BinIO + structure BinPrimIO + structure TextIO + structure TextPrimIO + + signature INET_SOCK + signature GENERIC_SOCK + + structure GenericSock + structure INetSock + structure Socket + + signature UNIX + structure Unix + signature UNIX_SOCK + structure UnixSock) +is + $/basis.cm diff --git a/cml/src/basis.cm b/cml/src/basis.cm new file mode 100644 index 0000000..403b19c --- /dev/null +++ b/cml/src/basis.cm @@ -0,0 +1,248 @@ +(* basis.cm + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This is the CML-enhanced version of the Standard ML Basis Library. + * It consists of a "view" of cml-internal.cm and many definitions + * reexported from the original $/basis.cm without change. + *) + +Library + +(* The "old" CM did not have $/basis.cm but used a very large pervasive + * environment in its place. The pervasive environment is implicit and + * one cannot re-export from it. Therefore, re-exports from $/basis.cm + * are conditional on NEW_CM. + *) + (* Basis *) + signature ARRAY + signature ARRAY_SLICE + signature ARRAY2 + signature BIN_IO + signature BIT_FLAGS + signature BOOL + signature BYTE + signature CHAR + signature COMMAND_LINE + signature DATE + signature GENERAL + signature IEEE_REAL + signature IMPERATIVE_IO + signature INT_INF + signature INTEGER + signature IO + signature LIST + signature LIST_PAIR + signature MATH + signature MONO_ARRAY + signature MONO_ARRAY_SLICE + signature MONO_ARRAY2 + signature MONO_VECTOR + signature MONO_VECTOR_SLICE + signature OPTION + signature OS + signature OS_FILE_SYS + signature OS_IO + signature OS_PATH + signature OS_PROCESS + signature PACK_REAL + signature PACK_WORD + signature PRIM_IO + signature REAL + signature STREAM_IO + signature STRING + signature STRING_CVT + signature SUBSTRING + signature TEXT + signature TEXT_IO + signature TEXT_STREAM_IO + signature TIME + signature TIMER + signature VECTOR + signature VECTOR_SLICE + signature WORD + + signature EITHER (* proposal 2015-002 *) + signature FN (* proposal 2015-005 *) + signature REF (* proposal 2015-007 *) + + signature SML90 + + (* SML/NJ specific *) + signature SIGNALS + + signature CLEAN_UP + signature CONT + signature INTERVAL_TIMER + signature INTERNALS + signature GC + signature SYS_INFO + signature WEAK + signature SML_OF_NJ + + signature UNSAFE_OBJECT + signature POLL + signature UNSAFE_ARRAY + signature UNSAFE_VECTOR + signature UNSAFE_MONO_ARRAY + signature UNSAFE_MONO_VECTOR + signature UNSAFE + + (* Basis *) + structure Array + structure Array2 + structure ArraySlice + structure Bool + structure Byte + structure Char + structure CharArray + structure CharArraySlice + structure CharVector + structure CharVectorSlice + structure CommandLine + structure Date + structure General + structure IEEEReal + structure Int +#if defined(SIZE_64) + structure Int63 +#else (* SIZE_32 *) + structure Int31 +#endif + structure Int32 + structure Int64 + structure IntInf + structure IO + structure LargeInt + structure LargeReal + structure LargeWord + structure List + structure ListPair + structure Math + structure Option + structure PackReal64Big + structure PackReal64Little + structure PackWord16Big + structure PackWord16Little + structure PackWord32Big + structure PackWord32Little + structure PackWord64Big + structure PackWord64Little + structure Position + structure Real + structure Real64 + structure Real64Array + structure Real64ArraySlice + structure Real64Vector + structure Real64VectorSlice + structure RealArray + structure RealArraySlice + structure RealVector + structure RealVectorSlice + structure String + structure StringCvt + structure Substring + structure SysWord + structure Text + structure Time + structure Timer + structure Vector + structure VectorSlice + structure Word +#if defined(SIZE_64) + structure Int63 +#else (* SIZE_32 *) + structure Word31 +#endif + structure Word32 + structure Word64 + structure Word8 + structure Word8Array + structure Word8ArraySlice + structure Word8Vector + structure Word8VectorSlice + + structure Either (* proposal 2015-002 *) + structure Fn (* proposal 2015-005 *) + structure Ref (* proposal 2015-007 *) + + structure SML90 + + (* SML/NJ specific *) + structure Signals + structure Unsafe + structure SMLofNJ + structure Lazy + +#if defined(OPSYS_UNIX) orelse defined(OPSYS_WIN32) + (* Sockets (common part) *) + signature NET_HOST_DB + signature NET_PROT_DB + signature NET_SERV_DB + + signature SOCKET + + structure GenericSock + structure NetHostDB + structure NetProtDB + structure NetServDB +#endif + +#if defined(OPSYS_UNIX) + (* Posix *) + signature POSIX_ERROR + signature POSIX_SIGNAL + signature POSIX_PROCESS + signature POSIX_PROC_ENV + signature POSIX_FILE_SYS + signature POSIX_IO + signature POSIX_SYS_DB + signature POSIX_TTY + signature POSIX + structure Posix + + (* Unix *) + signature UNIX_SIGNALS + structure UnixSignals + + (* Sockets *) + signature NET_DB + + structure NetDB + +#elif defined (OPSYS_WIN32) + + signature WIN32_GENERAL + signature WIN32_PROCESS + signature WIN32_FILESYS + signature WIN32_IO + signature WIN32 + + structure Win32 +#endif + + (* Here are the things that have a CML-specific implementation: *) + signature OS_PROCESS + signature OS_IO + signature OS + structure OS + signature PRIM_IO + structure BinIO + structure BinPrimIO + structure TextIO + structure TextPrimIO + signature INET_SOCK + signature GENERIC_SOCK + structure Socket + structure INetSock +#if defined(OPSYS_UNIX) + signature UNIX_SOCK + structure UnixSock + signature UNIX + structure Unix +#endif +is + + basis-sub-basis.cm + $cml/cml-internal.cm diff --git a/cml/src/cml-internal.cm b/cml/src/cml-internal.cm new file mode 100644 index 0000000..12aedce --- /dev/null +++ b/cml/src/cml-internal.cm @@ -0,0 +1,77 @@ +(* cml-internal.cm + * + * COPYRIGHT (c) 2011 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This is the actual implementation of CML. It defines the public + * interface of CML as well as some other things which belong intro + * basis.cm. + * This library has the status of an "internal" library. Public + * "views" for it are defined in cml.cm and basis.cm. + *) + +Library + signature CML + structure CML + + signature SYNC_VAR + structure SyncVar + signature MAILBOX + structure Mailbox + signature BARRIER + structure Barrier + + structure RunCML + + structure Debug + + signature OS_PROCESS + signature OS_IO + signature OS + structure OS + + signature PRIM_IO + signature CML_STREAM_IO + signature CML_TEXT_STREAM_IO + signature CML_IMPERATIVE_IO + signature CML_BIN_IO + signature CML_TEXT_IO + + structure BinPrimIO + structure BinIO + structure TextPrimIO + structure TextIO + + signature GENERIC_SOCK + signature INET_SOCK + + structure Socket + structure GenericSock + structure INetSock +#if defined(OPSYS_UNIX) + signature UNIX_SOCK + structure UnixSock +#endif + +#if defined(OPSYS_UNIX) + signature UNIX + structure Unix +#endif + +is +#if defined (NEW_CM) + $cml/core-cml.cm +#else + core-cml.cm +#endif + glue/sources.cm + OS/sources.cm + IO/sources.cm + + Sockets/sources.cm + +#if defined(OPSYS_UNIX) + Unix/sources.cm +#elif defined(OPSYS_WIN32) + Win32/sources.cm +#endif diff --git a/cml/src/cml-lib.cm b/cml/src/cml-lib.cm new file mode 100644 index 0000000..671ddd9 --- /dev/null +++ b/cml/src/cml-lib.cm @@ -0,0 +1,17 @@ +(* (C) 2011 The Fellowship of SML/NJ + * Author: matthias.blume@gmail.com (Matthias Blume) + * + * This wrapper library establishes $cml/cml-lib.cm + * an alias for the CML-specific additions in + * $cml-lib/smlnj-lib.cm. + *) +Library + signature MULTICAST + signature SIMPLE_RPC + signature TRACE_CML + + structure Multicast + structure SimpleRPC + structure TraceCML +is + $cml-lib/smlnj-lib.cm diff --git a/cml/src/cml-sub-basis.cm b/cml/src/cml-sub-basis.cm new file mode 100644 index 0000000..8a2282c --- /dev/null +++ b/cml/src/cml-sub-basis.cm @@ -0,0 +1,27 @@ +(* cml-sub-basis.cm + * + * COPYRIGHT (c) 2011 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This group provides a restricted view on $/basis.cm to avoid conflicts + * between definitions imported from the original and CML's own Basis. + * + * This file must be used with the new CM. The old CM cannot process it. + *) +Group + + library($/basis.cm) + + - (signature OS + signature OS_IO + signature OS_PROCESS + signature PRIM_IO + + structure BinIO + structure BinPrimIO + structure TextIO + structure TextPrimIO + + functor PrimIO) +is + $/basis.cm diff --git a/cml/src/cml.cm b/cml/src/cml.cm new file mode 100644 index 0000000..531a8cf --- /dev/null +++ b/cml/src/cml.cm @@ -0,0 +1,36 @@ +(* cml.cm + * + * COPYRIGHT (c) 2011 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This defines the public interface to CML. + * It does not include CML-specific Basis stuff and is nothing more + * than a "view" on cml-internal.cm. + *) + +Library + signature CML + structure CML + + signature SYNC_VAR + structure SyncVar + signature MAILBOX + structure Mailbox + signature BARRIER + structure Barrier + + structure RunCML + + structure Debug + + signature CML_STREAM_IO + signature CML_TEXT_STREAM_IO + signature CML_IMPERATIVE_IO + signature CML_BIN_IO + signature CML_TEXT_IO +is +#if defined (NEW_CM) + $cml/cml-internal.cm +#else + cml-internal.cm +#endif diff --git a/cml/src/core-cml.cm b/cml/src/core-cml.cm new file mode 100644 index 0000000..ba90330 --- /dev/null +++ b/cml/src/core-cml.cm @@ -0,0 +1,59 @@ +(* core-cml.cm + * + * COPYRIGHT (c) 2011 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This is the "core" of CML. It is provided as a library so that + * ../cml-lib/trace-cml.cm can refer to it directly. + *) +Library + signature CML + signature SYNC_VAR + signature MAILBOX + signature BARRIER + signature CML_CLEANUP + structure CML + structure Event + structure Q + structure Thread + structure Scheduler + structure SyncVar + structure Mailbox + structure Barrier + structure TimeOut + structure IOManager + structure Running + structure CleanUp + structure Debug +is +#if defined (NEW_CM) + $/basis.cm +#endif + core-cml/version.sml + core-cml/rep-types.sml + core-cml/queue.sml + core-cml/scheduler.sml + core-cml/event-sig.sml + core-cml/event.sml + core-cml/thread-sig.sml + core-cml/thread.sml + core-cml/channel-sig.sml + core-cml/channel.sml + core-cml/timeout-sig.sml + core-cml/timeout.sml + core-cml/io-manager.sml + core-cml/cml-sig.sml + core-cml/cml.sml + + core-cml/sync-var-sig.sml + core-cml/sync-var.sml + core-cml/mailbox-sig.sml + core-cml/mailbox.sml + core-cml/barrier-sig.sml + core-cml/barrier.sml + + core-cml/running.sml + core-cml/cml-cleanup-sig.sml + core-cml/cleanup.sml + + core-cml/debug.sml diff --git a/cml/src/core-cml/.cm/GUID/barrier-sig.sml b/cml/src/core-cml/.cm/GUID/barrier-sig.sml new file mode 100644 index 0000000..19ba2b2 --- /dev/null +++ b/cml/src/core-cml/.cm/GUID/barrier-sig.sml @@ -0,0 +1 @@ +guid-$cml/(core-cml.cm):core-cml/barrier-sig.sml-1714016095.443 diff --git a/cml/src/core-cml/.cm/GUID/barrier.sml b/cml/src/core-cml/.cm/GUID/barrier.sml new file mode 100644 index 0000000..37c339e --- /dev/null +++ b/cml/src/core-cml/.cm/GUID/barrier.sml @@ -0,0 +1 @@ +guid-$cml/(core-cml.cm):core-cml/barrier.sml-1714016095.512 diff --git a/cml/src/core-cml/.cm/GUID/channel-sig.sml b/cml/src/core-cml/.cm/GUID/channel-sig.sml new file mode 100644 index 0000000..5e83807 --- /dev/null +++ b/cml/src/core-cml/.cm/GUID/channel-sig.sml @@ -0,0 +1 @@ +guid-$cml/(core-cml.cm):core-cml/channel-sig.sml-1714016095.227 diff --git a/cml/src/core-cml/.cm/GUID/channel.sml b/cml/src/core-cml/.cm/GUID/channel.sml new file mode 100644 index 0000000..27168b1 --- /dev/null +++ b/cml/src/core-cml/.cm/GUID/channel.sml @@ -0,0 +1 @@ +guid-$cml/(core-cml.cm):core-cml/channel.sml-1714016095.230 diff --git a/cml/src/core-cml/.cm/GUID/cleanup.sml b/cml/src/core-cml/.cm/GUID/cleanup.sml new file mode 100644 index 0000000..9f20836 --- /dev/null +++ b/cml/src/core-cml/.cm/GUID/cleanup.sml @@ -0,0 +1 @@ +guid-$cml/(core-cml.cm):core-cml/cleanup.sml-1714016095.453 diff --git a/cml/src/core-cml/.cm/GUID/cml-cleanup-sig.sml b/cml/src/core-cml/.cm/GUID/cml-cleanup-sig.sml new file mode 100644 index 0000000..a9dbf84 --- /dev/null +++ b/cml/src/core-cml/.cm/GUID/cml-cleanup-sig.sml @@ -0,0 +1 @@ +guid-$cml/(core-cml.cm):core-cml/cml-cleanup-sig.sml-1714016095.449 diff --git a/cml/src/core-cml/.cm/GUID/cml-sig.sml b/cml/src/core-cml/.cm/GUID/cml-sig.sml new file mode 100644 index 0000000..77bb076 --- /dev/null +++ b/cml/src/core-cml/.cm/GUID/cml-sig.sml @@ -0,0 +1 @@ +guid-$cml/(core-cml.cm):core-cml/cml-sig.sml-1714016095.299 diff --git a/cml/src/core-cml/.cm/GUID/cml.sml b/cml/src/core-cml/.cm/GUID/cml.sml new file mode 100644 index 0000000..8f4dd84 --- /dev/null +++ b/cml/src/core-cml/.cm/GUID/cml.sml @@ -0,0 +1 @@ +guid-$cml/(core-cml.cm):core-cml/cml.sml-1714016095.305 diff --git a/cml/src/core-cml/.cm/GUID/debug.sml b/cml/src/core-cml/.cm/GUID/debug.sml new file mode 100644 index 0000000..33a5b44 --- /dev/null +++ b/cml/src/core-cml/.cm/GUID/debug.sml @@ -0,0 +1 @@ +guid-$cml/(core-cml.cm):core-cml/debug.sml-1714016094.902 diff --git a/cml/src/core-cml/.cm/GUID/event-sig.sml b/cml/src/core-cml/.cm/GUID/event-sig.sml new file mode 100644 index 0000000..9146ddf --- /dev/null +++ b/cml/src/core-cml/.cm/GUID/event-sig.sml @@ -0,0 +1 @@ +guid-$cml/(core-cml.cm):core-cml/event-sig.sml-1714016094.980 diff --git a/cml/src/core-cml/.cm/GUID/event.sml b/cml/src/core-cml/.cm/GUID/event.sml new file mode 100644 index 0000000..2407dc5 --- /dev/null +++ b/cml/src/core-cml/.cm/GUID/event.sml @@ -0,0 +1 @@ +guid-$cml/(core-cml.cm):core-cml/event.sml-1714016094.983 diff --git a/cml/src/core-cml/.cm/GUID/io-manager.sml b/cml/src/core-cml/.cm/GUID/io-manager.sml new file mode 100644 index 0000000..56b68ab --- /dev/null +++ b/cml/src/core-cml/.cm/GUID/io-manager.sml @@ -0,0 +1 @@ +guid-$cml/(core-cml.cm):core-cml/io-manager.sml-1714016095.538 diff --git a/cml/src/core-cml/.cm/GUID/mailbox-sig.sml b/cml/src/core-cml/.cm/GUID/mailbox-sig.sml new file mode 100644 index 0000000..05445ba --- /dev/null +++ b/cml/src/core-cml/.cm/GUID/mailbox-sig.sml @@ -0,0 +1 @@ +guid-$cml/(core-cml.cm):core-cml/mailbox-sig.sml-1714016095.177 diff --git a/cml/src/core-cml/.cm/GUID/mailbox.sml b/cml/src/core-cml/.cm/GUID/mailbox.sml new file mode 100644 index 0000000..08b7f9c --- /dev/null +++ b/cml/src/core-cml/.cm/GUID/mailbox.sml @@ -0,0 +1 @@ +guid-$cml/(core-cml.cm):core-cml/mailbox.sml-1714016095.401 diff --git a/cml/src/core-cml/.cm/GUID/queue.sml b/cml/src/core-cml/.cm/GUID/queue.sml new file mode 100644 index 0000000..edcbb8e --- /dev/null +++ b/cml/src/core-cml/.cm/GUID/queue.sml @@ -0,0 +1 @@ +guid-$cml/(core-cml.cm):core-cml/queue.sml-1714016094.909 diff --git a/cml/src/core-cml/.cm/GUID/rep-types.sml b/cml/src/core-cml/.cm/GUID/rep-types.sml new file mode 100644 index 0000000..6495975 --- /dev/null +++ b/cml/src/core-cml/.cm/GUID/rep-types.sml @@ -0,0 +1 @@ +guid-$cml/(core-cml.cm):core-cml/rep-types.sml-1714016094.894 diff --git a/cml/src/core-cml/.cm/GUID/running.sml b/cml/src/core-cml/.cm/GUID/running.sml new file mode 100644 index 0000000..5e4b64f --- /dev/null +++ b/cml/src/core-cml/.cm/GUID/running.sml @@ -0,0 +1 @@ +guid-$cml/(core-cml.cm):core-cml/running.sml-1714016095.446 diff --git a/cml/src/core-cml/.cm/GUID/scheduler.sml b/cml/src/core-cml/.cm/GUID/scheduler.sml new file mode 100644 index 0000000..db3d7d7 --- /dev/null +++ b/cml/src/core-cml/.cm/GUID/scheduler.sml @@ -0,0 +1 @@ +guid-$cml/(core-cml.cm):core-cml/scheduler.sml-1714016094.932 diff --git a/cml/src/core-cml/.cm/GUID/sync-var-sig.sml b/cml/src/core-cml/.cm/GUID/sync-var-sig.sml new file mode 100644 index 0000000..a76d419 --- /dev/null +++ b/cml/src/core-cml/.cm/GUID/sync-var-sig.sml @@ -0,0 +1 @@ +guid-$cml/(core-cml.cm):core-cml/sync-var-sig.sml-1714016095.313 diff --git a/cml/src/core-cml/.cm/GUID/sync-var.sml b/cml/src/core-cml/.cm/GUID/sync-var.sml new file mode 100644 index 0000000..0479aa8 --- /dev/null +++ b/cml/src/core-cml/.cm/GUID/sync-var.sml @@ -0,0 +1 @@ +guid-$cml/(core-cml.cm):core-cml/sync-var.sml-1714016095.317 diff --git a/cml/src/core-cml/.cm/GUID/thread-sig.sml b/cml/src/core-cml/.cm/GUID/thread-sig.sml new file mode 100644 index 0000000..f43c3ad --- /dev/null +++ b/cml/src/core-cml/.cm/GUID/thread-sig.sml @@ -0,0 +1 @@ +guid-$cml/(core-cml.cm):core-cml/thread-sig.sml-1714016095.187 diff --git a/cml/src/core-cml/.cm/GUID/thread.sml b/cml/src/core-cml/.cm/GUID/thread.sml new file mode 100644 index 0000000..74aed24 --- /dev/null +++ b/cml/src/core-cml/.cm/GUID/thread.sml @@ -0,0 +1 @@ +guid-$cml/(core-cml.cm):core-cml/thread.sml-1714016095.191 diff --git a/cml/src/core-cml/.cm/GUID/timeout-sig.sml b/cml/src/core-cml/.cm/GUID/timeout-sig.sml new file mode 100644 index 0000000..879b662 --- /dev/null +++ b/cml/src/core-cml/.cm/GUID/timeout-sig.sml @@ -0,0 +1 @@ +guid-$cml/(core-cml.cm):core-cml/timeout-sig.sml-1714016095.149 diff --git a/cml/src/core-cml/.cm/GUID/timeout.sml b/cml/src/core-cml/.cm/GUID/timeout.sml new file mode 100644 index 0000000..24f823f --- /dev/null +++ b/cml/src/core-cml/.cm/GUID/timeout.sml @@ -0,0 +1 @@ +guid-$cml/(core-cml.cm):core-cml/timeout.sml-1714016095.153 diff --git a/cml/src/core-cml/.cm/GUID/version.sml b/cml/src/core-cml/.cm/GUID/version.sml new file mode 100644 index 0000000..8d8824e --- /dev/null +++ b/cml/src/core-cml/.cm/GUID/version.sml @@ -0,0 +1 @@ +guid-$cml/(core-cml.cm):core-cml/version.sml-1714016095.180 diff --git a/cml/src/core-cml/.cm/SKEL/barrier-sig.sml b/cml/src/core-cml/.cm/SKEL/barrier-sig.sml new file mode 100644 index 0000000..f82e520 --- /dev/null +++ b/cml/src/core-cml/.cm/SKEL/barrier-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ac"BARRIER"h0 \ No newline at end of file diff --git a/cml/src/core-cml/.cm/SKEL/barrier.sml b/cml/src/core-cml/.cm/SKEL/barrier.sml new file mode 100644 index 0000000..0d6ba3a --- /dev/null +++ b/cml/src/core-cml/.cm/SKEL/barrier.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f2d"SMLofNJ"d"List"ad"Barrier"jh1ad"S"gp1d"Scheduler"gp1c"BARRIER" \ No newline at end of file diff --git a/cml/src/core-cml/.cm/SKEL/channel-sig.sml b/cml/src/core-cml/.cm/SKEL/channel-sig.sml new file mode 100644 index 0000000..ddb9d2b --- /dev/null +++ b/cml/src/core-cml/.cm/SKEL/channel-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ac"CHANNEL"h0 \ No newline at end of file diff --git a/cml/src/core-cml/.cm/SKEL/channel.sml b/cml/src/core-cml/.cm/SKEL/channel.sml new file mode 100644 index 0000000..9fb25fd --- /dev/null +++ b/cml/src/core-cml/.cm/SKEL/channel.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f3d"SMLofNJ"d"Q"d"Event"ad"Channel"jh3ad"T"gp1d"Thread"ad"S"gp1d"Scheduler"ad"R"gp1d"RepTypes"h1egp1c"CHANNEL" \ No newline at end of file diff --git a/cml/src/core-cml/.cm/SKEL/cleanup.sml b/cml/src/core-cml/.cm/SKEL/cleanup.sml new file mode 100644 index 0000000..1b0009a --- /dev/null +++ b/cml/src/core-cml/.cm/SKEL/cleanup.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ad"CleanUp"jh2baSV"gp1d"SyncVar"f2d"Running"f5Cd"CML"d"Channel"d"List"d"Time"d"Mailbox"Nh1egp1c"CML_CLEANUP" \ No newline at end of file diff --git a/cml/src/core-cml/.cm/SKEL/cml-cleanup-sig.sml b/cml/src/core-cml/.cm/SKEL/cml-cleanup-sig.sml new file mode 100644 index 0000000..4b67401 --- /dev/null +++ b/cml/src/core-cml/.cm/SKEL/cml-cleanup-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f2d"CML"d"Mailbox"ac"CML_CLEANUP"h0 \ No newline at end of file diff --git a/cml/src/core-cml/.cm/SKEL/cml-sig.sml b/cml/src/core-cml/.cm/SKEL/cml-sig.sml new file mode 100644 index 0000000..cc796e1 --- /dev/null +++ b/cml/src/core-cml/.cm/SKEL/cml-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ac"CML"h4egp1c"THREAD"egp1c"CHANNEL"egp1c"EVENT"egp1c"TIME_OUT" \ No newline at end of file diff --git a/cml/src/core-cml/.cm/SKEL/cml.sml b/cml/src/core-cml/.cm/SKEL/cml.sml new file mode 100644 index 0000000..9d59c04 --- /dev/null +++ b/cml/src/core-cml/.cm/SKEL/cml.sml @@ -0,0 +1,2 @@ +Skeleton 5 +adCML"jh5Cegp1d"Version"egp1d"Thread"egp1d"Channel"egp1d"Event"egp1d"TimeOut"Ngp1c \ No newline at end of file diff --git a/cml/src/core-cml/.cm/SKEL/debug.sml b/cml/src/core-cml/.cm/SKEL/debug.sml new file mode 100644 index 0000000..46135a4 --- /dev/null +++ b/cml/src/core-cml/.cm/SKEL/debug.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f3d"Unsafe"d"RepTypes"d"Time"ad"Debug"j0) \ No newline at end of file diff --git a/cml/src/core-cml/.cm/SKEL/event-sig.sml b/cml/src/core-cml/.cm/SKEL/event-sig.sml new file mode 100644 index 0000000..bf1a355 --- /dev/null +++ b/cml/src/core-cml/.cm/SKEL/event-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ac"EVENT"h0 \ No newline at end of file diff --git a/cml/src/core-cml/.cm/SKEL/event.sml b/cml/src/core-cml/.cm/SKEL/event.sml new file mode 100644 index 0000000..207a245 --- /dev/null +++ b/cml/src/core-cml/.cm/SKEL/event.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f4d"SMLofNJ"RepTypes"d"List"d"Int"ad"Event"jh2ad"R"gpad"S"gp1d"Scheduler"h2egp1c"EVENT"f9 \ No newline at end of file diff --git a/cml/src/core-cml/.cm/SKEL/io-manager.sml b/cml/src/core-cml/.cm/SKEL/io-manager.sml new file mode 100644 index 0000000..d909bd0 --- /dev/null +++ b/cml/src/core-cml/.cm/SKEL/io-manager.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f5Cd"OS"d"SMLofNJ"d"List"d"Time"d"Event"Nad"IOManager"jh2ad"R"gp1d"RepTypes"ad"S"gp1d"Scheduler"h0 \ No newline at end of file diff --git a/cml/src/core-cml/.cm/SKEL/mailbox-sig.sml b/cml/src/core-cml/.cm/SKEL/mailbox-sig.sml new file mode 100644 index 0000000..ecd2ce7 --- /dev/null +++ b/cml/src/core-cml/.cm/SKEL/mailbox-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"Event"ac"MAILBOX"h0 \ No newline at end of file diff --git a/cml/src/core-cml/.cm/SKEL/mailbox.sml b/cml/src/core-cml/.cm/SKEL/mailbox.sml new file mode 100644 index 0000000..89d9e3d --- /dev/null +++ b/cml/src/core-cml/.cm/SKEL/mailbox.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f2d"SMLofNJ"d"List"ad"Mailbox"jh2ad"R"gp1d"RepTypes"ad"S"gp1d"Scheduler"h1egp1c"MAILBOX" \ No newline at end of file diff --git a/cml/src/core-cml/.cm/SKEL/queue.sml b/cml/src/core-cml/.cm/SKEL/queue.sml new file mode 100644 index 0000000..c1a1294 --- /dev/null +++ b/cml/src/core-cml/.cm/SKEL/queue.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"RepTypes"ad"Q"j0 \ No newline at end of file diff --git a/cml/src/core-cml/.cm/SKEL/rep-types.sml b/cml/src/core-cml/.cm/SKEL/rep-types.sml new file mode 100644 index 0000000..da48a14 --- /dev/null +++ b/cml/src/core-cml/.cm/SKEL/rep-types.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f3d"StringCvt"d"SMLofNJ"d"Int"ad"RepTypes"h0 \ No newline at end of file diff --git a/cml/src/core-cml/.cm/SKEL/running.sml b/cml/src/core-cml/.cm/SKEL/running.sml new file mode 100644 index 0000000..1f2f348 --- /dev/null +++ b/cml/src/core-cml/.cm/SKEL/running.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ad"Running"h0 \ No newline at end of file diff --git a/cml/src/core-cml/.cm/SKEL/scheduler.sml b/cml/src/core-cml/.cm/SKEL/scheduler.sml new file mode 100644 index 0000000..7561ed1 --- /dev/null +++ b/cml/src/core-cml/.cm/SKEL/scheduler.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f7d"Unsafe"d"OS"Cd"Debug"SMLofNJ"d"Q"RepTypes"d"Time"Nad"Scheduler"jh3ad"R"gp1)ad"Sig"gp1d"Signals"ad"IT"gp2d"IntervalTimer"h0 \ No newline at end of file diff --git a/cml/src/core-cml/.cm/SKEL/sync-var-sig.sml b/cml/src/core-cml/.cm/SKEL/sync-var-sig.sml new file mode 100644 index 0000000..b2219e2 --- /dev/null +++ b/cml/src/core-cml/.cm/SKEL/sync-var-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"CML"ac"SYNC_VAR"h0 \ No newline at end of file diff --git a/cml/src/core-cml/.cm/SKEL/sync-var.sml b/cml/src/core-cml/.cm/SKEL/sync-var.sml new file mode 100644 index 0000000..fef967c --- /dev/null +++ b/cml/src/core-cml/.cm/SKEL/sync-var.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f2d"SMLofNJ"d"Q"ad"SyncVar"jh2ad"R"gp1d"RepTypes"ad"S"gp1d"Scheduler"gp1c"SYNC_VAR" \ No newline at end of file diff --git a/cml/src/core-cml/.cm/SKEL/thread-sig.sml b/cml/src/core-cml/.cm/SKEL/thread-sig.sml new file mode 100644 index 0000000..9242f37 --- /dev/null +++ b/cml/src/core-cml/.cm/SKEL/thread-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ac"THREAD"h0 \ No newline at end of file diff --git a/cml/src/core-cml/.cm/SKEL/thread.sml b/cml/src/core-cml/.cm/SKEL/thread.sml new file mode 100644 index 0000000..f528698 --- /dev/null +++ b/cml/src/core-cml/.cm/SKEL/thread.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f5Cd"Word"d"SMLofNJ"d"List"d"Int"d"Event"Nad"Thread"jh2ad"R"gp1d"RepTypes"ad"S"gp1d"Scheduler"h1egp1c"THREAD" \ No newline at end of file diff --git a/cml/src/core-cml/.cm/SKEL/timeout-sig.sml b/cml/src/core-cml/.cm/SKEL/timeout-sig.sml new file mode 100644 index 0000000..373b15a --- /dev/null +++ b/cml/src/core-cml/.cm/SKEL/timeout-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"Time"ac"TIME_OUT"h0 \ No newline at end of file diff --git a/cml/src/core-cml/.cm/SKEL/timeout.sml b/cml/src/core-cml/.cm/SKEL/timeout.sml new file mode 100644 index 0000000..32d9031 --- /dev/null +++ b/cml/src/core-cml/.cm/SKEL/timeout.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f3d"SMLofNJ"Time"d"Event"ad"TimeOut"jh2ad"R"gp1d"RepTypes"ad"S"gp1d"Scheduler"h2egp1c"TIME_OUT"f1 \ No newline at end of file diff --git a/cml/src/core-cml/.cm/SKEL/version.sml b/cml/src/core-cml/.cm/SKEL/version.sml new file mode 100644 index 0000000..87ca246 --- /dev/null +++ b/cml/src/core-cml/.cm/SKEL/version.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"Int"ad"Version"j0 \ No newline at end of file diff --git a/cml/src/core-cml/.cm/amd64-unix/barrier-sig.sml b/cml/src/core-cml/.cm/amd64-unix/barrier-sig.sml new file mode 100644 index 0000000..5b7794c Binary files /dev/null and b/cml/src/core-cml/.cm/amd64-unix/barrier-sig.sml differ diff --git a/cml/src/core-cml/.cm/amd64-unix/barrier.sml b/cml/src/core-cml/.cm/amd64-unix/barrier.sml new file mode 100644 index 0000000..785c40a Binary files /dev/null and b/cml/src/core-cml/.cm/amd64-unix/barrier.sml differ diff --git a/cml/src/core-cml/.cm/amd64-unix/channel-sig.sml b/cml/src/core-cml/.cm/amd64-unix/channel-sig.sml new file mode 100644 index 0000000..dd4af5c Binary files /dev/null and b/cml/src/core-cml/.cm/amd64-unix/channel-sig.sml differ diff --git a/cml/src/core-cml/.cm/amd64-unix/channel.sml b/cml/src/core-cml/.cm/amd64-unix/channel.sml new file mode 100644 index 0000000..70d1d29 Binary files /dev/null and b/cml/src/core-cml/.cm/amd64-unix/channel.sml differ diff --git a/cml/src/core-cml/.cm/amd64-unix/cleanup.sml b/cml/src/core-cml/.cm/amd64-unix/cleanup.sml new file mode 100644 index 0000000..a5fc46b Binary files /dev/null and b/cml/src/core-cml/.cm/amd64-unix/cleanup.sml differ diff --git a/cml/src/core-cml/.cm/amd64-unix/cml-cleanup-sig.sml b/cml/src/core-cml/.cm/amd64-unix/cml-cleanup-sig.sml new file mode 100644 index 0000000..ddcd497 Binary files /dev/null and b/cml/src/core-cml/.cm/amd64-unix/cml-cleanup-sig.sml differ diff --git a/cml/src/core-cml/.cm/amd64-unix/cml-sig.sml b/cml/src/core-cml/.cm/amd64-unix/cml-sig.sml new file mode 100644 index 0000000..03f7493 Binary files /dev/null and b/cml/src/core-cml/.cm/amd64-unix/cml-sig.sml differ diff --git a/cml/src/core-cml/.cm/amd64-unix/cml.sml b/cml/src/core-cml/.cm/amd64-unix/cml.sml new file mode 100644 index 0000000..6c270ff Binary files /dev/null and b/cml/src/core-cml/.cm/amd64-unix/cml.sml differ diff --git a/cml/src/core-cml/.cm/amd64-unix/debug.sml b/cml/src/core-cml/.cm/amd64-unix/debug.sml new file mode 100644 index 0000000..57b733c Binary files /dev/null and b/cml/src/core-cml/.cm/amd64-unix/debug.sml differ diff --git a/cml/src/core-cml/.cm/amd64-unix/event-sig.sml b/cml/src/core-cml/.cm/amd64-unix/event-sig.sml new file mode 100644 index 0000000..0672850 Binary files /dev/null and b/cml/src/core-cml/.cm/amd64-unix/event-sig.sml differ diff --git a/cml/src/core-cml/.cm/amd64-unix/event.sml b/cml/src/core-cml/.cm/amd64-unix/event.sml new file mode 100644 index 0000000..0e36a5f Binary files /dev/null and b/cml/src/core-cml/.cm/amd64-unix/event.sml differ diff --git a/cml/src/core-cml/.cm/amd64-unix/io-manager.sml b/cml/src/core-cml/.cm/amd64-unix/io-manager.sml new file mode 100644 index 0000000..07919c4 Binary files /dev/null and b/cml/src/core-cml/.cm/amd64-unix/io-manager.sml differ diff --git a/cml/src/core-cml/.cm/amd64-unix/mailbox-sig.sml b/cml/src/core-cml/.cm/amd64-unix/mailbox-sig.sml new file mode 100644 index 0000000..598008b Binary files /dev/null and b/cml/src/core-cml/.cm/amd64-unix/mailbox-sig.sml differ diff --git a/cml/src/core-cml/.cm/amd64-unix/mailbox.sml b/cml/src/core-cml/.cm/amd64-unix/mailbox.sml new file mode 100644 index 0000000..1125bc0 Binary files /dev/null and b/cml/src/core-cml/.cm/amd64-unix/mailbox.sml differ diff --git a/cml/src/core-cml/.cm/amd64-unix/queue.sml b/cml/src/core-cml/.cm/amd64-unix/queue.sml new file mode 100644 index 0000000..4dd86de Binary files /dev/null and b/cml/src/core-cml/.cm/amd64-unix/queue.sml differ diff --git a/cml/src/core-cml/.cm/amd64-unix/rep-types.sml b/cml/src/core-cml/.cm/amd64-unix/rep-types.sml new file mode 100644 index 0000000..a59ca36 Binary files /dev/null and b/cml/src/core-cml/.cm/amd64-unix/rep-types.sml differ diff --git a/cml/src/core-cml/.cm/amd64-unix/running.sml b/cml/src/core-cml/.cm/amd64-unix/running.sml new file mode 100644 index 0000000..22bda0e Binary files /dev/null and b/cml/src/core-cml/.cm/amd64-unix/running.sml differ diff --git a/cml/src/core-cml/.cm/amd64-unix/scheduler.sml b/cml/src/core-cml/.cm/amd64-unix/scheduler.sml new file mode 100644 index 0000000..9656e67 Binary files /dev/null and b/cml/src/core-cml/.cm/amd64-unix/scheduler.sml differ diff --git a/cml/src/core-cml/.cm/amd64-unix/sync-var-sig.sml b/cml/src/core-cml/.cm/amd64-unix/sync-var-sig.sml new file mode 100644 index 0000000..f618a18 Binary files /dev/null and b/cml/src/core-cml/.cm/amd64-unix/sync-var-sig.sml differ diff --git a/cml/src/core-cml/.cm/amd64-unix/sync-var.sml b/cml/src/core-cml/.cm/amd64-unix/sync-var.sml new file mode 100644 index 0000000..86f6ca6 Binary files /dev/null and b/cml/src/core-cml/.cm/amd64-unix/sync-var.sml differ diff --git a/cml/src/core-cml/.cm/amd64-unix/thread-sig.sml b/cml/src/core-cml/.cm/amd64-unix/thread-sig.sml new file mode 100644 index 0000000..5b3e5d8 Binary files /dev/null and b/cml/src/core-cml/.cm/amd64-unix/thread-sig.sml differ diff --git a/cml/src/core-cml/.cm/amd64-unix/thread.sml b/cml/src/core-cml/.cm/amd64-unix/thread.sml new file mode 100644 index 0000000..b27f3c6 Binary files /dev/null and b/cml/src/core-cml/.cm/amd64-unix/thread.sml differ diff --git a/cml/src/core-cml/.cm/amd64-unix/timeout-sig.sml b/cml/src/core-cml/.cm/amd64-unix/timeout-sig.sml new file mode 100644 index 0000000..2c79098 Binary files /dev/null and b/cml/src/core-cml/.cm/amd64-unix/timeout-sig.sml differ diff --git a/cml/src/core-cml/.cm/amd64-unix/timeout.sml b/cml/src/core-cml/.cm/amd64-unix/timeout.sml new file mode 100644 index 0000000..098ba80 Binary files /dev/null and b/cml/src/core-cml/.cm/amd64-unix/timeout.sml differ diff --git a/cml/src/core-cml/.cm/amd64-unix/version.sml b/cml/src/core-cml/.cm/amd64-unix/version.sml new file mode 100644 index 0000000..5556a4e Binary files /dev/null and b/cml/src/core-cml/.cm/amd64-unix/version.sml differ diff --git a/cml/src/core-cml/barrier-sig.sml b/cml/src/core-cml/barrier-sig.sml new file mode 100644 index 0000000..f2f18d2 --- /dev/null +++ b/cml/src/core-cml/barrier-sig.sml @@ -0,0 +1,35 @@ +(* barrier-sig.sml + * + * COPYRIGHT (c) 2011 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Support for barrier synchronization with global state. This mechanism + * is inspired by the similar CHP mechanism (http://www.cs.kent.ac.uk/projects/ofa/chp/). + *) + +signature BARRIER = + sig + + type 'a barrier + type 'a enrollment + + (* create a new barrier. The first argument is the update function that + * is applied to the global state whenever a barrier synchronization occurs. + * The second argument is the initial global state. + *) + val barrier : ('a -> 'a) -> 'a -> 'a barrier + + (* enroll in a barrier *) + val enroll : 'a barrier -> 'a enrollment + + (* synchronize on a barrier *) + val wait : 'a enrollment -> 'a + + (* resign from an enrolled barrier *) + val resign : 'a enrollment -> unit + + (* get the current state of the barrier *) + val value : 'a enrollment -> 'a + + end + diff --git a/cml/src/core-cml/barrier.sml b/cml/src/core-cml/barrier.sml new file mode 100644 index 0000000..3c02d81 --- /dev/null +++ b/cml/src/core-cml/barrier.sml @@ -0,0 +1,101 @@ +(* barrier.sml + * + * COPYRIGHT (c) 2011 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +structure Barrier :> BARRIER = + struct + + structure S = Scheduler + + type 'a cont = 'a SMLofNJ.Cont.cont + val callcc = SMLofNJ.Cont.callcc + val throw = SMLofNJ.Cont.throw + + datatype 'a result = RAISE of exn | VALUE of 'a + + datatype 'a barrier = BAR of { + state : 'a ref, + update : 'a -> 'a, + nEnrolled : int ref, + nWaiting : int ref, + waiting : (S.thread_id * 'a result cont) list ref + } + + datatype status = ENROLLED | WAITING | RESIGNED + + datatype 'a enrollment = ENROLL of { + bar : 'a barrier, + sts : status ref (* current status of this enrollment *) + } + + (* create a new barrier. The first argument is the update function that + * is applied to the global state whenever a barrier synchronization occurs. + * The second argument is the initial global state. + *) + fun barrier update init = BAR{ + state = ref init, + update = update, + nEnrolled = ref 0, + nWaiting = ref 0, + waiting = ref [] + } + + (* enroll in a barrier *) + fun enroll (bar as BAR{nEnrolled, ...}) = ( + S.atomicBegin(); + nEnrolled := !nEnrolled + 1; + S.atomicEnd(); + ENROLL{bar = bar, sts = ref ENROLLED}) + + fun wakeupThd result (tid, resumeK) = + S.enqueueThread( + tid, callcc(fn k => (callcc(fn k' => throw k k'); throw resumeK result))) + + fun return (RAISE exn) = raise exn + | return (VALUE x) = x + + (* synchronize on a barrier *) + fun wait (ENROLL{bar=BAR{state, update, nEnrolled, nWaiting, waiting}, sts}) = ( + S.atomicBegin(); + case !sts + of ENROLLED => ( + sts := WAITING; + nWaiting := !nWaiting+1; + if (!nWaiting = !nEnrolled) + then let (* all threads are at the barrier, so we can proceed *) + val result = let + val x = update(!state) + in + state := x; + VALUE x + end handle exn => RAISE exn + in + List.app (wakeupThd result) (!waiting); + nWaiting := 0; + waiting := []; + S.atomicEnd (); + return result + end + else ( + sts := WAITING; + return (callcc (fn resumeK => ( + waiting := (S.getCurThread(), resumeK) :: !waiting; + S.atomicDispatch()))))) + | WAITING => (S.atomicEnd(); raise Fail "multiple barrier waits") + | RESIGNED => (S.atomicEnd(); raise Fail "barrier wait after resignation") + (* end case *)) + + (* resign from an enrolled barrier *) + fun resign (ENROLL{bar, sts}) = ( + S.atomicBegin(); + case !sts + of RESIGNED => () (* ignore multiple resignations *) + | WAITING => (S.atomicEnd(); raise Fail "resign while waiting") + | ENROLLED => (sts := RESIGNED; S.atomicEnd())) + + (* get the current state of the barrier *) + fun value (ENROLL{bar=BAR{state, ...}, ...}) = !state + + end diff --git a/cml/src/core-cml/channel-sig.sml b/cml/src/core-cml/channel-sig.sml new file mode 100644 index 0000000..cb664bc --- /dev/null +++ b/cml/src/core-cml/channel-sig.sml @@ -0,0 +1,29 @@ +(* channel-sig.sml + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * COPYRIGHT (c) 1989-1991 John H. Reppy + * + * The representation of synchronous channels. + *) + +signature CHANNEL = + sig + + type 'a chan + type 'a event + + val channel : unit -> 'a chan + + val sameChannel : ('a chan * 'a chan) -> bool + + val send : ('a chan * 'a) -> unit + val recv : 'a chan -> 'a + + val sendEvt : ('a chan * 'a) -> unit event + val recvEvt : 'a chan -> 'a event + + val sendPoll : ('a chan * 'a) -> bool + val recvPoll : 'a chan -> 'a option + + end + diff --git a/cml/src/core-cml/channel.sml b/cml/src/core-cml/channel.sml new file mode 100644 index 0000000..2958f36 --- /dev/null +++ b/cml/src/core-cml/channel.sml @@ -0,0 +1,241 @@ +(* channel.sml + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * COPYRIGHT (c) 1989-1991 John H. Reppy + * + * The representation of synchronous channels. + * + * To ensure that we always leave the atomic region exactly once, we + * require that the blocking operation be responsible for leaving the + * atomic region (in the event case, it must also execute the clean-up + * action). The doFn always transfers control to the blocked thread + * without leaving the atomic region. Note thet the send (and sendEvt) + * blockFns run using the receiver's thread ID. + *) + +structure Channel : sig + + type 'a event + + include CHANNEL + + val resetChan : 'a chan -> unit + + end = struct + + structure T = Thread + structure S = Scheduler + structure R = RepTypes + + type 'a event = 'a Event.event + + type 'a cont = 'a SMLofNJ.Cont.cont + val callcc = SMLofNJ.Cont.callcc + val throw = SMLofNJ.Cont.throw + + (* Some inline functions to improve performance *) + fun enqueue (R.Q{rear, ...}, x) = rear := x :: !rear + + datatype 'a chan = CHAN of { + priority : int ref, + inQ : (R.trans_id ref * 'a cont) R.queue, + outQ : (R.trans_id ref * (R.thread_id * 'a cont) cont) R.queue + } + + fun resetChan (CHAN{priority, inQ, outQ}) = ( + priority := 1; + Q.reset inQ; + Q.reset outQ) + + fun channel () = CHAN{priority=ref 1, inQ=Q.queue(), outQ=Q.queue()} + + (* sameChannel : ('a chan * 'a chan) -> bool *) + fun sameChannel (CHAN{inQ=in1, ...}, CHAN{inQ=in2, ...}) = + Q.sameQ(in1, in2) + + (* create a new transaction ID *) + fun mkId () = ref(R.TRANS(S.getCurThread())) + + (* given a transaction ID, get its thread ID and mark it cancelled. *) + fun getIdFromTrans (transId as ref(R.TRANS tid)) = ( + transId := R.CANCEL; + tid) + + (* given a transaction ID, set the current thread to its thread ID + * and mark it cancelled. + *) + fun setCurThread transId = S.setCurThread(getIdFromTrans transId) + + datatype 'a q_item + = NoItem + | Item of (R.trans_id ref * 'a cont) + + (* bump a priority value by one, returning the old value *) + fun bumpPriority (p as ref n) = (p := n+1; n) + + (* functions to clean channel input and output queues *) + local + fun clean [] = [] + | clean ((ref R.CANCEL, _)::r) = clean r + | clean l = l + fun cleanRev ([], l) = l + | cleanRev ((ref R.CANCEL, _)::r, l) = cleanRev (r, l) + | cleanRev (x::r, l) = cleanRev (r, x::l) + fun cleanAll l = let + fun rev ([], l) = l + | rev (x::r, l) = rev(r, x::l) + in + rev (cleanRev (l, []), []) + end + in + fun cleanAndChk (priority, R.Q{front, rear}) = let + fun cleanFront [] = cleanRear (! rear) + | cleanFront f = (case (clean f) + of [] => cleanRear (! rear) + | f' => (front := f'; bumpPriority priority) + (* end case *)) + and cleanRear [] = 0 + | cleanRear r = ( + rear := []; + case (cleanRev (r, [])) + of [] => 0 + | rr => (front := rr; bumpPriority priority) + (* end case *)) + in + cleanFront (! front) + end + fun cleanAndRemove (R.Q{front, rear, ...}) = let + fun cleanFront [] = cleanRear (! rear) + | cleanFront f = (case (clean f) + of [] => cleanRear (! rear) + | (item::rest) => (front := rest; Item item) + (* end case *)) + and cleanRear [] = NoItem + | cleanRear r = ( + rear := []; + case (cleanRev (r, [])) + of [] => NoItem + | (item::rest) => (front := rest; Item item) + (* end case *)) + in + cleanFront (! front) + end + fun cleanAndEnqueue (R.Q{front, rear, ...}, item) = (case cleanAll(!front) + of [] => (front := cleanRev(!rear, [item]); rear := []) + | f => (front := f; rear := item :: cleanAll(! rear)) + (* end case *)) + end (* local *) + + fun impossible () = raise Fail "Channel: impossible" + + fun send (CHAN{priority, inQ, outQ}, msg) = ( + S.atomicBegin(); + case (cleanAndRemove inQ) + of Item(rid, rkont) => callcc (fn sendK => ( + S.enqueueAndSwitchCurThread(sendK, getIdFromTrans rid); + priority := 1; + throw rkont msg)) + | NoItem => let + val (recvId, recvK) = callcc (fn sendK => ( + enqueue (outQ, (mkId(), sendK)); + S.atomicDispatch())) + in + S.atomicSwitchTo (recvId, recvK, msg) + end + (* end case *)) + + fun sendEvt (CHAN{priority, inQ, outQ}, msg) = let + fun doFn () = let + val (transId, rkont) = Q.dequeue inQ + in + callcc (fn sendK => ( + S.enqueueAndSwitchCurThread(sendK, getIdFromTrans transId); + priority := 1; + throw rkont msg)) + end + fun blockFn {transId, cleanUp, next} = let + val (recvId, recvK) = callcc (fn sendK => ( + cleanAndEnqueue (outQ, (transId, sendK)); + next(); + impossible ())) + in + cleanUp(); + S.atomicSwitchTo (recvId, recvK, msg) + end + fun pollFn () = (case (cleanAndChk (priority, inQ)) + of 0 => R.BLOCKED blockFn + | p => R.ENABLED{prio=p, doFn=doFn} + (* end case *)) + in + R.BEVT[pollFn] + end + + fun sendPoll (CHAN{priority, inQ, outQ}, msg) = callcc (fn sendK => ( + S.atomicBegin(); + case (cleanAndRemove inQ) + of Item(rid, rkont) => ( + callcc (fn sendK => ( + S.enqueueAndSwitchCurThread(sendK, getIdFromTrans rid); + priority := 1; + throw rkont msg)); + true) + | NoItem => (S.atomicEnd(); false) + (* end case *))) + + fun recv (CHAN{priority, inQ, outQ}) = callcc (fn recvK => ( + S.atomicBegin (); + case (cleanAndRemove outQ) + of Item(transId, sendK) => let + val myId = S.getCurThread() + in + setCurThread transId; + priority := 1; + throw sendK (myId, recvK) + end + | NoItem => ( + enqueue (inQ, (mkId(), recvK)); + S.atomicDispatch()) + (* end case *))) + + fun recvEvt (CHAN{priority, inQ, outQ}) = let + fun doFn () = let + val (transId, sendK) = Q.dequeue outQ + val myId = S.getCurThread() + in + setCurThread transId; + priority := 1; + callcc (fn recvK => throw sendK (myId, recvK)) + end + fun blockFn {transId, cleanUp, next} = let + val msg = callcc (fn recvK => ( + cleanAndEnqueue (inQ, (transId, recvK)); + next (); + impossible())) + in + cleanUp(); + S.atomicEnd(); + msg + end + fun pollFn () = (case (cleanAndChk (priority, outQ)) + of 0 => R.BLOCKED blockFn + | p => R.ENABLED{prio=p, doFn=doFn} + (* end case *)) + in + R.BEVT[pollFn] + end + + fun recvPoll (CHAN{priority, inQ, outQ}) = ( + S.atomicBegin (); + case (cleanAndRemove outQ) + of Item(transId, sendK) => SOME(callcc (fn recvK => + let + val myId = S.getCurThread() + in + setCurThread transId; + priority := 1; + throw sendK (myId, recvK) + end)) + | NoItem => (S.atomicEnd(); NONE) + (* end case *)) + + end diff --git a/cml/src/core-cml/cleanup.sml b/cml/src/core-cml/cleanup.sml new file mode 100644 index 0000000..fb27214 --- /dev/null +++ b/cml/src/core-cml/cleanup.sml @@ -0,0 +1,216 @@ +(* cleanup.sml + * + * COPYRIGHT (c) 1997 Bell Labs, Lucent Technologies. + * COPYRIGHT (c) 1996 AT&T Research. + *) + +structure CleanUp : sig + + include CML_CLEANUP + + val clean : when -> unit + + val exportFnCleanup : unit -> unit + + val chanCleaner : (string * when list * (when -> unit)) + val servCleaner : (string * when list * (when -> unit)) + + end = struct + + datatype when = AtInit | AtInitFn | AtShutdown | AtExit + (* The CML clean-up times are somewhat different than the SML/NJ + * times. + * + * AtInit initialization of a program that is being run + * under RunCML.doit. + * AtInitFn initialization of a stand-alone program that was + * generated by exportFn. + * AtShutdown normal program exit of a CML program running + * under RunCML.doit. + * AtExit normal program exit of a stand-alone CML program. + * + * Note that the clean-up routines run while CML is still active. It + * may also be useful for an application to register clean-up routines + * with SML/NJ (AtExportFn actions are the most useful). + *) + + (* at all times *) + val atAll = [AtExit, AtShutdown, AtInit, AtInitFn] + + val hooks = ref ([] : (string * when list * (when -> unit)) list) + + local + structure SV = SyncVar + val lockV = SV.mVarInit () + in + fun lock () = SV.mTake lockV + fun unlock () = SV.mPut(lockV, ()) + fun protect f x = if !Running.isRunning + then let + val _ = lock() + val res = (f x) handle ex => (unlock(); raise ex) + in + unlock (); res + end + else f x + end (* local *) + + (* return the list of hooks that apply at when. *) + fun filter when = let + fun f [] = [] + | f ((item as (_, whenLst, _))::r) = + if (List.exists when whenLst) then item :: (f r) else (f r) + in + f (!hooks) + end + + (* apply the clean-up function for the given time. In some cases, this + * causes the list of hooks to be redefined. + * NOTE: we reverse the order of application at initialization time. + *) + fun clean when = let + val _ = lock() + val cleanFns = (case when + of (AtInit | AtInitFn) => List.rev (filter (fn w => (w = when))) + | _ => filter (fn w => (w = when)) + (* end case *)) + fun initFnPred AtExit = true + | initFnPred _ = false + fun doCleaner (_, _, f) = CML.select [ + CML.joinEvt(CML.spawnc f when), + CML.timeOutEvt(Time.fromSeconds 1) + ] +(*DEBUG +fun doCleaner (tag, _, f) = ( +Debug.sayDebugTS(concat["do Cleaner \"", tag, "\"\n"]); +CML.select [ +CML.wrap(CML.joinEvt(CML.spawnc f when), fn _ => Debug.sayDebugTS " done\n"), +CML.wrap(CML.timeOutEvt(Time.fromSeconds 1), fn _ => Debug.sayDebugTS " timeout\n") +]) +DEBUG*) + in + (* remove uneccesary clean-up routines *) + case when + of AtInitFn => hooks := filter initFnPred + | _ => () + (* end case *); + unlock(); + (* now apply the clean-up routines *) + List.app doCleaner cleanFns + end + + (* find and remove the named hook from the hook list; return the hook + * and the new hook list; if the named hook doesn't exist, then return NONE. + *) + fun removeHook name = let + fun remove [] = NONE + | remove ((hook as (name', whenLst, cleanFn)) :: r) = + if (name = name') + then SOME((whenLst, cleanFn), r) + else (case (remove r) + of NONE => NONE + | SOME(hook', r') => SOME(hook', hook::r') + (* end case *)) + in + remove (! hooks) + end + + (* add the named cleaner. This returns the previous definition, or NONE. *) + fun addCleaner (arg as (name, _, _)) = (case (removeHook name) + of NONE => (hooks := arg :: !hooks; NONE) + | (SOME(oldHook, hookLst)) => ( + hooks := arg :: hookLst; SOME oldHook) + (* end case *)) + val addCleaner = protect addCleaner + + (* remove and return the named cleaner; return NONE if it is not found *) + fun removeCleaner name = (case (removeHook name) + of NONE => NONE + | (SOME(oldHook, hookLst)) => ( + hooks := hookLst; SOME oldHook) + (* end case *)) + val removeCleaner = protect removeCleaner + + exception Unlog + + datatype item = ITEM of { + key : string, + init : unit -> unit, + shut : unit -> unit + } + + val chanList = ref ([] : item list) + val mboxList = ref ([] : item list) + val serverList = ref ([] : item list) + + fun unlogItem l name = let + fun f [] = raise Unlog + | f ((x as ITEM{key, ...})::r) = if (name = key) then r else (x :: (f r)) + in + l := f(!l) + end + + fun appInit l = List.app (fn ITEM{init, ...} => init()) (List.rev (!l)) + + fun unlogAll () = (chanList := []; mboxList := []; serverList := []) + + val unlogChannel = protect (unlogItem chanList) + fun logChannel (name, ch) = let + fun f () = Channel.resetChan ch + in + (unlogChannel name) handle Unlog => (); + chanList := ITEM{key=name, init=f, shut=f} :: (!chanList) + end + val logChannel = fn x => protect logChannel x + + val unlogMailbox = protect (unlogItem mboxList) + fun logMailbox (name, mb) = let + fun f () = Mailbox.resetMbox mb + in + (unlogMailbox name) handle Unlog => (); + mboxList := ITEM{key=name, init=f, shut=f} :: (!mboxList) + end + val logMailbox = fn x => protect logMailbox x + + val unlogServer = protect (unlogItem serverList) + + fun logServer (name, f, g) = ( + (unlogServer name) handle Unlog => (); + serverList := ITEM{key=name, init=f, shut=g} :: (!serverList)) + val logServer = protect logServer + + fun startServers () = appInit serverList + + fun shutdownServers () = let + fun shut (ITEM{key, shut, ...}) = CML.select [ + CML.joinEvt(CML.spawn shut), + CML.timeOutEvt(Time.fromSeconds 2) + ] + in + app shut (!serverList) + end + + fun cleanServers (AtInit | AtInitFn) = startServers() + | cleanServers (AtShutdown | AtExit) = shutdownServers() + + (* clean the logged channels and mailboxes. *) + fun cleanChannels _ = (appInit chanList; appInit mboxList) + + (* The standard cleaners *) + val chanCleaner = ("Channels&Mailboxes", [AtInit,AtShutdown], cleanChannels) + val servCleaner = ("Servers", atAll, cleanServers) + + (* remove useless cleaners and clear the channel/mailbox logs + * prior to exporting a stand-alone CML program. + *) + fun exportFnCleanup () = let + fun exportFnPred (AtInitFn | AtExit) = true + | exportFnPred _ = false + in + cleanChannels (); + chanList := []; mboxList := []; + hooks := filter exportFnPred + end + + end (* CleanUp *) + diff --git a/cml/src/core-cml/cml-cleanup-sig.sml b/cml/src/core-cml/cml-cleanup-sig.sml new file mode 100644 index 0000000..112b7fb --- /dev/null +++ b/cml/src/core-cml/cml-cleanup-sig.sml @@ -0,0 +1,54 @@ +(* cml-cleanup-sig.sml + * + * COPYRIGHT (c) 1996 AT&T Research. + * + * Log/unlog channels and servers for initialization and termination. + *) + +signature CML_CLEANUP = + sig + + datatype when = AtInit | AtInitFn | AtShutdown | AtExit + (* The CML clean-up times are somewhat different than the SML/NJ + * times. + * + * AtInit initialization of a program that is being run + * under RunCML.doit. + * AtInitFn initialization of a stand-alone program that was + * generated by exportFn. + * AtShutdown normal program exit of a CML program running + * under RunCML.doit. + * AtExit normal program exit of a stand-alone CML program. + * + * Note that the clean-up routines run while CML is still active. It + * may also be useful for an application to register clean-up routines + * with SML/NJ (AtExportFn actions are the most useful). + *) + + val atAll : when list + (* at all times *) + + val addCleaner : (string * when list * (when -> unit)) + -> (when list * (when -> unit)) option + (* add the named cleaner. This returns the previous definition, or NONE. *) + + val removeCleaner : string -> (when list * (when -> unit)) option + (* remove and return the named cleaner; return NONE if it is not found *) + + exception Unlog + + val logMailbox : (string * 'a Mailbox.mbox) -> unit + val unlogMailbox : string -> unit + + val logChannel : (string * 'a CML.chan) -> unit + val unlogChannel : string -> unit + +(* + val logServer : (string * (when -> unit)) -> unit +*) + val logServer : (string * (unit -> unit) * (unit -> unit)) -> unit + val unlogServer : string -> unit + + val unlogAll : unit -> unit + + end diff --git a/cml/src/core-cml/cml-sig.sml b/cml/src/core-cml/cml-sig.sml new file mode 100644 index 0000000..4c7758a --- /dev/null +++ b/cml/src/core-cml/cml-sig.sml @@ -0,0 +1,19 @@ +(* cml-sig.sml + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * COPYRIGHT (c) 1989-1991 John H. Reppy + * + * The interface to the core CML features. + *) + +signature CML = + sig + val version : {system : string, version_id : int list, date : string} + val banner : string + + include THREAD + include CHANNEL + include EVENT + include TIME_OUT + end + diff --git a/cml/src/core-cml/cml.sml b/cml/src/core-cml/cml.sml new file mode 100644 index 0000000..25b70f4 --- /dev/null +++ b/cml/src/core-cml/cml.sml @@ -0,0 +1,15 @@ +(* cml.sml + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * COPYRIGHT (c) 1989-1991 John H. Reppy + *) + +structure CML : CML = + struct + open Version + open Thread + open Channel + open Event + open TimeOut + end + diff --git a/cml/src/core-cml/debug.sml b/cml/src/core-cml/debug.sml new file mode 100644 index 0000000..d310689 --- /dev/null +++ b/cml/src/core-cml/debug.sml @@ -0,0 +1,29 @@ +(* debug.sml + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * COPYRIGHT (c) 1989-1991 John H. Reppy + * + * Debugging support for the CML core. + *) + +structure Debug : sig + + val sayDebug : string -> unit + val sayDebugTS : string -> unit + val sayDebugId : string -> unit + + end = struct + + val sayDebug : string -> unit = + Unsafe.CInterface.c_function "SMLNJ-RunT" "debug" + + fun sayDebugTS msg = sayDebug(concat["[", Time.fmt 3 (Time.now()), "] ", msg]) + + val getCurThread : unit -> RepTypes.thread_id = Unsafe.getVar + + fun sayDebugId msg = sayDebug(concat[ + RepTypes.tidToString(getCurThread()), " ", msg + ]) + + end + diff --git a/cml/src/core-cml/event-sig.sml b/cml/src/core-cml/event-sig.sml new file mode 100644 index 0000000..6b69d72 --- /dev/null +++ b/cml/src/core-cml/event-sig.sml @@ -0,0 +1,30 @@ +(* events-sig.sml + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * COPYRIGHT (c) 1989-1991 John H. Reppy + * + * The representation of event values and the event combinators. + *) + +signature EVENT = + sig + + type 'a event + + val never : 'a event + val alwaysEvt : 'a -> 'a event + + val wrap : ('a event * ('a -> 'b)) -> 'b event + val wrapHandler : ('a event * (exn -> 'a)) -> 'a event + + val guard : (unit -> 'a event) -> 'a event + val withNack : (unit event -> 'a event) -> 'a event + + val choose : 'a event list -> 'a event + + val sync : 'a event -> 'a + + val select : 'a event list -> 'a + + end + diff --git a/cml/src/core-cml/event.sml b/cml/src/core-cml/event.sml new file mode 100644 index 0000000..7b67a07 --- /dev/null +++ b/cml/src/core-cml/event.sml @@ -0,0 +1,460 @@ +(* event.sml + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * COPYRIGHT (c) 1989-1991 John H. Reppy + * + * The representation of event values and the event combinators. + * + * Some important requirements on the implementation of base event values: + * + * 1) The pollFn, doFn, and blockFn are always called from inside + * atomic regions. + * + * 2) The pollFn returns an integer priority: this is 0 when not enabled, + * ~1 for fixed priority, and a positive value for dynamic priority. + * The standard scheme is to associate a counter with the underlying + * synchronization object, and to increase it by one for each + * synchronization attempt. + * + * 3) The blockFn is responsible for exiting the atomic region; the doFns + * should NOT leave the atomic region. + * + * 4) The blockFn is responsible for executing the "cleanUp" action + * prior to leaving the atomic region. + *) + +structure Event : sig + + include EVENT + + val atomicCVarSet : RepTypes.cvar -> unit + val cvarGetEvt : RepTypes.cvar -> unit event + + end = struct + + structure R = RepTypes + structure S = Scheduler + + val capture = SMLofNJ.Cont.capture + val escape = SMLofNJ.Cont.escape + val callcc = SMLofNJ.Cont.callcc + val throw = SMLofNJ.Cont.throw + + (* Some inline functions to improve performance *) + fun map f = let + fun mapf [] = [] + | mapf [a] = [f a] + | mapf [a, b] = [f a, f b] + | mapf [a, b, c] = [f a, f b, f c] + | mapf (a::b::c::d::r) = (f a)::(f b)::(f c)::(f d)::(mapf r) + in + mapf + end + fun app f = let + fun appf [] = () + | appf (x::r) = (f x; appf r) + in + appf + end + fun foldl f init l = let + fun foldf ([], accum) = accum + | foldf (x::r, accum) = foldf(r, f(x, accum)) + in + foldf (l, init) + end + + fun error msg = raise Fail msg + + datatype event_status = datatype RepTypes.event_status + type 'a base_evt = 'a RepTypes.base_evt + datatype event = datatype RepTypes.event + + + (** Condition variables. Because these variables are set inside atomic + ** regions, we have to use different conventions for clean-up, etc. + ** Instead of requiring the blockFn continuation to call the cleanUp + ** action and to leave the atomic region, we call the cleanUp function + ** when setting the condition variable (in atomicCVarSet), and have the + ** invariant that the blockFn continuation is dispatched outside the + ** atomic region. + **) + + (* set a condition variable; we assume that this function is always + * executed in an atomic region. + *) + fun atomicCVarSet (R.CVAR state) = ( + case !state + of (R.CVAR_unset waiting) => let + val R.Q{rear, ...} = S.rdyQ1 + fun add [] = !rear + | add ({transId=ref R.CANCEL, ...}::r) = add r + | add ({transId as ref(R.TRANS tid), cleanUp, kont}::r) = ( + transId := R.CANCEL; + cleanUp(); + (tid, kont) :: (add r)) + in + state := R.CVAR_set 1; + rear := add waiting + end + | _ => error "cvar already set" + (* end case *)) + + (* the event constructor for waiting on a cvar *) + fun cvarGetEvt (R.CVAR(state)) = let + fun blockFn {transId, cleanUp, next} = callcc (fn k => let + val (R.CVAR_unset waiting) = !state + val item = {transId=transId, cleanUp=cleanUp, kont=k} + in + state := R.CVAR_unset(item :: waiting); + next () + end) + fun pollFn () = (case !state + of (R.CVAR_set n) => let + fun doFn () = (state := R.CVAR_set 1; S.atomicEnd()) + in + state := R.CVAR_set(n+1); + ENABLED{prio=n, doFn=doFn} + end + | _ => BLOCKED blockFn + (* end case *)) + in + BEVT[pollFn] + end + + + fun alwaysEvt v = BEVT[fn () => R.ENABLED{ + prio= ~1, doFn=fn () => (S.atomicEnd(); v) + }] + + val never = BEVT[] + + val guard = GUARD + + val withNack = W_NACK + + fun choose (el : 'a event list) = let + fun gatherBEvts ([], l) = BEVT l + | gatherBEvts (BEVT[] :: r, l) = gatherBEvts (r, l) + | gatherBEvts (BEVT[bev] :: r, bevs') = gatherBEvts (r, bev::bevs') + | gatherBEvts (BEVT bevs :: r, bevs') = gatherBEvts (r, bevs @ bevs') + | gatherBEvts (evts, []) = gather (evts, []) + | gatherBEvts (evts, l) = gather (evts, [BEVT l]) + and gather ([], [evt]) = evt + | gather ([], evts) = CHOOSE evts + | gather (CHOOSE evts :: r, evts') = gather (r, evts @ evts') + | gather (BEVT bevs :: r, BEVT bevs' :: r') + = gather (r, BEVT(bevs @ bevs') :: r') + | gather (evt :: r, evts') = gather (r, evt :: evts') + in + gatherBEvts (rev el, []) + end + + fun wrap (evt, wfn) = let + fun wrapBaseEvt pollFn () = (case pollFn() + of ENABLED{prio, doFn} => ENABLED{prio=prio, doFn = wfn o doFn} + | (BLOCKED blockFn) => BLOCKED(wfn o blockFn) + (* end case *)) + fun wrap' (BEVT bevs) = BEVT(map wrapBaseEvt bevs) + | wrap' (CHOOSE evts) = CHOOSE(map wrap' evts) + | wrap' (GUARD g) = GUARD(fn () => wrap(g(), wfn)) + | wrap' (W_NACK f) = W_NACK(fn evt => wrap(f evt, wfn)) + in + wrap' evt + end + + fun wrapHandler (evt, hfn) = let + fun wrap f x = ((f x) handle exn => hfn exn) + fun wrapBaseEvt pollFn () = (case pollFn() + of ENABLED{prio, doFn} => ENABLED{prio=prio, doFn = wrap doFn} + | (BLOCKED blockFn) => BLOCKED(wrap blockFn) + (* end case *)) + fun wrap' (BEVT bevs) = BEVT(map wrapBaseEvt bevs) + | wrap' (CHOOSE evts) = CHOOSE(map wrap' evts) + | wrap' (GUARD g) = GUARD(fn () => wrapHandler(g(), hfn)) + | wrap' (W_NACK f) = W_NACK(fn evt => wrapHandler(f evt, hfn)) + in + wrap' evt + end + + datatype 'a event_group + = BASE_GRP of 'a base_evt list + | GRP of 'a event_group list + | NACK_GRP of (R.cvar * 'a event_group) + +(*+DEBUG +fun sayGrp (msg, eg) = let + fun f (BASE_GRP l, sl) = "BASE_GRP("::Int.toString(List.length l)::")"::sl + | f (GRP l, sl) = "GRP(" :: g(l, ")"::sl) + | f (NACK_GRP l, sl) = "NACK_GRP(" :: f(#2 l, ")"::sl) + and g ([], sl) = sl + | g ([x], sl) = f(x, sl) + | g (x::r, sl) = f(x, "," :: g(r, sl)) + in + Debug.sayDebugId(String.concat(msg :: ": " :: f(eg, ["\n"]))) + end +-DEBUG*) + + (* force the evaluation of any guards in an event group. *) + fun force (BEVT l) = BASE_GRP l + | force evt = let + fun force' (GUARD g) = force' (g ()) + | force' (W_NACK f) = let + val cvar = R.CVAR(ref(R.CVAR_unset [])) + in + NACK_GRP(cvar, force' (f (cvarGetEvt cvar))) + end + | force' (BEVT grp) = BASE_GRP grp + | force' (CHOOSE evts) = let + fun forceBL ([], bevs) = BASE_GRP bevs + | forceBL (evt::r, bevs') = (case (force' evt) + of (BASE_GRP bevs) => forceBL (r, bevs @ bevs') + | (GRP grp) => forceL (r, grp @ [BASE_GRP bevs']) + | grp => forceL (r, [grp, BASE_GRP bevs']) + (* end case *)) + and forceL ([], [grp]) = grp + | forceL ([], l) = GRP l + | forceL (evt :: r, l) = ( + case (force' evt, l) + of (BASE_GRP bevs, BASE_GRP bevs' :: r') => + forceL (r, BASE_GRP(bevs @ bevs') :: r') + | (GRP grp, l) => forceL (r, grp @ l) + | (grp, l) => forceL (r, grp :: l) + (* end case *)) + in + forceBL (evts, []) + end + in + force' evt + end + + local + val cnt = ref 0 + fun random i = let val j = !cnt + in + if (j = 1000000) then cnt := 0 else cnt := j+1; + Int.rem(j, i) + end + in + fun selectDoFn ([(_, doFn)], _) = doFn + | selectDoFn (l, n) = let + fun priority ~1 = n + | priority p = p + fun max ((p, doFn)::r, maxP, k, doFns) = let + val p = priority p + in + if (p > maxP) then max(r, p, 1, [doFn]) + else if (p = maxP) then max(r, maxP, k+1, doFn::doFns) + else max(r, maxP, k, doFns) + end + | max ([], _, k, [doFn]) = doFn + | max ([], _, k, doFns) = List.nth(doFns, random k) + in + max (l, 0, 0, []) + end + end + + fun mkFlg () = let val flg = ref(R.TRANS(S.getCurThread())) + in + (flg, fn () => flg := R.CANCEL) + end + + fun syncOnOneEvt (pollFn : 'a base_evt) = ( + S.atomicBegin (); + case (pollFn()) + of ENABLED{doFn, ...} => doFn() + | (BLOCKED blockFn) => let val (flg, setFlg) = mkFlg() + in + blockFn{transId=flg, cleanUp=setFlg, next=S.atomicDispatch} + end + (* end case *)) + + (* this function handles the case of synchronizing on a list of + * base events (w/o any negative acknowledgements). It also handles + * the case of synchronizing on NEVER. + *) + fun syncOnBEvts [] = S.dispatch() + | syncOnBEvts [bev] = syncOnOneEvt bev + | syncOnBEvts bevs = let + fun ext ([], blockFns) = capture (fn k => let + val escape = escape k + val (transId, setFlg) = mkFlg() + fun log [] = S.atomicDispatch () + | log (blockFn :: r) = + escape (blockFn { + transId = transId, + cleanUp = setFlg, + next = fn () => log r + }) + in + log blockFns; error "[log]" + end) + | ext (pollFn :: r, blockFns) = (case pollFn() + of ENABLED{prio, doFn} => extRdy (r, [(prio, doFn)], 1) + | (BLOCKED blockFn) => ext (r, blockFn::blockFns) + (* end case *)) +(** NOTE: maybe we should just keep track of the max priority? + ** What about fairness to fixed priority events (e.g., always, timeout?) + **) + and extRdy ([], doFns, n) = selectDoFn (doFns, n) () + | extRdy (pollFn :: r, doFns, n) = (case pollFn() + of ENABLED{prio, doFn} => extRdy (r, (prio, doFn)::doFns, n+1) + | _ => extRdy (r, doFns, n) + (* end case *)) + in + S.atomicBegin(); + ext (bevs, []) + end + + (* walk the event group tree, collecting the base events (with associated + * ack flags), and a list of flag sets. A flag set is a (cvar * ack flag list) + * pair, where the flags are those associated with the events covered by the + * nack cvar. + *) + fun collect grp = let + val unWrappedFlg = ref false + fun gatherWrapped (grp, bl, flgSets) = let + fun gather (BASE_GRP bevs, bl, allFlgs, flgSets) = let + fun append ([], bl, allFlgs) = (bl, allFlgs) + | append (bev::r, bl, allFlgs) = let + val flg = ref false + in + append (r, (bev, flg)::bl, flg::allFlgs) + end + val (bl', allFlgs') = append (bevs, bl, allFlgs) + in + (bl', allFlgs', flgSets) + end + | gather (GRP grp, bl, allFlgs, flgSets) = let + fun f (grp', (bl', allFlgs', flgSets')) = + gather (grp', bl', allFlgs', flgSets') + in + foldl f (bl, allFlgs, flgSets) grp + end + | gather (NACK_GRP(cvar, grp), bl, allFlgs, flgSets) = let + val (bl', allFlgs', flgSets') = + gather (grp, bl, [], flgSets) + in + (bl', allFlgs' @ allFlgs, (cvar, allFlgs') :: flgSets') + end + val (bl, _, flgSets) = gather (grp, bl, [], flgSets) + in + (bl, flgSets) + end + in + case grp + of (GRP _) => let + val unWrappedFlg = ref false + fun append ([], bl) = bl + | append (bev::r, bl) = append(r, (bev, unWrappedFlg)::bl) + fun gather (BASE_GRP bevs, bl, flgSets) = + (append(bevs, bl), flgSets) + | gather (GRP grp, bl, flgSets) = let + fun f (grp', (bl', flgSets')) = + gather(grp', bl', flgSets') + in + foldl f (bl, flgSets) grp + end + | gather (grp as NACK_GRP _, bl, flgSets) = + gatherWrapped (grp, bl, flgSets) + in + gather (grp, [], []) + end + | grp => gatherWrapped (grp, [], []) + (* end case *) + end + + (* this function handles the more complicated case of synchronization + * on groups of events where negative acknowledgements are involved. + *) + fun syncOnGrp grp = let + val (bl, flgSets) = collect grp + fun chkCVars () = let + (* chkCVar checks the flags of a flag set. If they are all false + * then the corresponding cvar is set to signal the negative ack. + *) + fun chkCVar (cvar, flgs) = let + fun chkFlgs [] = atomicCVarSet cvar + | chkFlgs ((ref true)::_) = () + | chkFlgs (_::r) = chkFlgs r + in + chkFlgs flgs + end + in + app chkCVar flgSets + end + fun ext ([], blockFns) = capture (fn k => let + val escape = escape k + val transId = ref(R.TRANS(S.getCurThread())) + fun setFlg flg () = ( + transId := R.CANCEL; flg := true; chkCVars()) + fun log [] = S.atomicDispatch () + | log ((blockFn, flg) :: r) = + escape (blockFn { + transId = transId, + cleanUp = setFlg flg, + next = fn () => log r + }) + in + log blockFns; error "[log]" + end) + | ext ((pollFn, flg) :: r, blockFns) = (case pollFn() + of ENABLED{prio, doFn} => extRdy (r, [(prio, (doFn, flg))], 1) + | (BLOCKED blockFn) => ext (r, (blockFn, flg)::blockFns) + (* end case *)) +(** NOTE: maybe we should just keep track of the max priority? + ** What about fairness to fixed priority events (e.g., always, timeout?) + **) + and extRdy ([], doFns, n) = let + val (doFn, flg) = selectDoFn (doFns, n) + in + flg := true; + chkCVars (); + doFn() + end + | extRdy ((pollFn, flg) :: r, doFns, n) = (case pollFn() + of ENABLED{prio, doFn} => + extRdy (r, (prio, (doFn, flg))::doFns, n+1) + | _ => extRdy (r, doFns, n) + (* end case *)) + in + S.atomicBegin(); ext (bl, []) + end + + fun sync ev = (case (force ev) + of (BASE_GRP bevs) => syncOnBEvts bevs + | grp => syncOnGrp grp + (* end case *)) + + fun select evts = let + fun forceBL ([], bevs) = BASE_GRP bevs + | forceBL (evt::r, bevs') = (case (force' evt) + of (BASE_GRP bevs) => forceBL (r, bevs @ bevs') + | (GRP grp) => forceL (r, grp @ [BASE_GRP bevs']) + | grp => forceL (r, [grp, BASE_GRP bevs']) + (* end case *)) + and forceL ([], [grp]) = grp + | forceL ([], l) = GRP l + | forceL (evt :: r, l) = ( + case (force' evt, l) + of (BASE_GRP bevs, BASE_GRP bevs' :: r') => + forceL (r, BASE_GRP(bevs @ bevs') :: r') + | (GRP grp, l) => forceL (r, grp @ l) + | (grp, l) => forceL (r, grp :: l) + (* end case *)) + and force' (GUARD g) = force' (g ()) + | force' (W_NACK f) = let + val cvar = R.CVAR(ref(R.CVAR_unset [])) + in + NACK_GRP(cvar, force' (f (cvarGetEvt cvar))) + end + | force' (BEVT grp) = BASE_GRP grp + | force' (CHOOSE evts) = forceBL (evts, []) + in + case forceBL(evts, []) + of (BASE_GRP bevs) => syncOnBEvts bevs + | grp => syncOnGrp grp + (* end case *) + end + + end; + diff --git a/cml/src/core-cml/io-manager.sml b/cml/src/core-cml/io-manager.sml new file mode 100644 index 0000000..6082731 --- /dev/null +++ b/cml/src/core-cml/io-manager.sml @@ -0,0 +1,122 @@ +(* io-manager.sml + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * COPYRIGHT (c) 1989-1991 John H. Reppy + * + * This is a generic I/O manager for CML. It uses the OS.IO polling + * mechanism. + * NOTE: it currently does not work if more than one thread blocks on the same + * descriptor. + *) + +structure IOManager : sig + + type iodesc + type poll_desc + type poll_info + + val ioEvt : poll_desc -> poll_info Event.event + + val pollIO : unit -> unit + + val anyWaiting : unit -> bool + + end = struct + + structure R = RepTypes + structure S = Scheduler + + type iodesc = OS.IO.iodesc + type poll_desc = OS.IO.poll_desc + type poll_info = OS.IO.poll_info + + type io_wait_item = { + pd : poll_desc, + tid : R.trans_id ref, + cleanUp : unit -> unit, + k : poll_info SMLofNJ.Cont.cont + } + + val waiting = ref ([] : io_wait_item list) + + (* In some OSs (e.g., Linux) this may raise an EINTR error, even though + * it is non-blocking. + *) + fun poll l = OS.IO.poll(l, SOME(Time.zeroTime)) handle _ => [] + + (* NOTE: as in the case of condition variables (see event.sml), we need to + * do the cleanUp routine when we enable the ioEvt (instead of in the blockFn + * continuation). + *) + fun ioEvt pd = let + fun blockFn {transId, cleanUp, next} = let + val pi = SMLofNJ.Cont.callcc (fn k => let + val item = {pd=pd, tid=transId, cleanUp=cleanUp, k=k} + in + waiting := item :: !waiting; + next(); + raise Fail "impossible: ioEvt" + end) + in + pi + end + fun pollFn () = (case (poll [pd]) + of [pi] => R.ENABLED{prio= ~1, doFn=(fn () => (S.atomicEnd(); pi))} + | _ => R.BLOCKED blockFn + (* end case *)) + in + R.BEVT[pollFn] + end + +(* NOTE: this code works because SML/NJ doesn't use opaque signature matching + * on the OS.IO interface. + *) + fun sameDesc (pi, pd) = (OS.IO.infoToPollDesc pi = pd) + + (* Take an I/O waiting queue and return the cleaned queue along with the list + * of poll descriptors in the remaining elements. + *) + fun clean wq = let + fun cl ([] : io_wait_item list, pds, q) = (pds, q) + | cl ({tid=ref R.CANCEL, ...} :: r, pds, wq) = cl (r, pds, wq) + | cl ((item as {pd, ...}) :: r, pds, wq) = cl (r, pd::pds, item::wq) + in + cl (wq, [], []) + end + + (* enqueue a thread that is polling on the ready queue. We have to do some + * continuation hacking to pass the poll info to the thread. We also must + * catch the case where the transaction has been canceled, since a single + * thread might be polling on multiple descriptors. + *) + fun enqueue ({tid as ref(R.TRANS id), cleanUp, k, pd}, pi) = let + val uk = SMLofNJ.Cont.callcc (fn kk => ( + SMLofNJ.Cont.callcc (fn uk => SMLofNJ.Cont.throw kk uk); + SMLofNJ.Cont.throw k pi)) + in + tid := R.CANCEL; + cleanUp(); + S.enqueueThread (id, uk) + end + | enqueue ({tid=ref R.CANCEL, ...}, _) = () + + fun pollIO () = (case clean(! waiting) + of ([], _) => waiting := [] + | (pds, wq) => (case (poll pds) + of [] => waiting := List.rev wq + | l => let + fun filter ([], r, wq) = + waiting := List.revAppend(r, wq) + | filter (pi::pis, (item : io_wait_item)::r, wq) = + if sameDesc(pi, #pd item) + then (enqueue (item, pi); filter (pis, r, wq)) + else filter (pi::pis, r, item::wq) + in + filter (l, wq, []) + end + (* end case *)) + (* end case *)) + + fun anyWaiting () = (case !waiting of [] => false | _ => true) + + end diff --git a/cml/src/core-cml/mailbox-sig.sml b/cml/src/core-cml/mailbox-sig.sml new file mode 100644 index 0000000..c9a41e1 --- /dev/null +++ b/cml/src/core-cml/mailbox-sig.sml @@ -0,0 +1,24 @@ +(* mailbox-sig.sml + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories + * COPYRIGHT (c) 1989-1991 John H. Reppy + * + * Asynchronous channels (called mailboxes). + *) + +signature MAILBOX = + sig + + type 'a mbox + + val mailbox : unit -> 'a mbox + + val sameMailbox : ('a mbox * 'a mbox) -> bool + + val send : ('a mbox * 'a) -> unit + val recv : 'a mbox -> 'a + val recvEvt : 'a mbox -> 'a Event.event + val recvPoll : 'a mbox -> 'a option + + end (* MAILBOX *) + diff --git a/cml/src/core-cml/mailbox.sml b/cml/src/core-cml/mailbox.sml new file mode 100644 index 0000000..7b74776 --- /dev/null +++ b/cml/src/core-cml/mailbox.sml @@ -0,0 +1,156 @@ +(* mailbox.sml + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories + * COPYRIGHT (c) 1989-1991 John H. Reppy + * + * Asynchronous channels (called mailboxes). + *) + +structure Mailbox : sig + + include MAILBOX + + val resetMbox : 'a mbox -> unit + + end = struct + + structure R = RepTypes + structure S = Scheduler + + type 'a cont = 'a SMLofNJ.Cont.cont + val callcc = SMLofNJ.Cont.callcc + val throw = SMLofNJ.Cont.throw + + type 'a queue = {front : 'a list, rear : 'a list} + + fun enqueue ({front, rear}, x) = {front=front, rear=x::rear} + + fun dequeue ({front=x::r, rear}) = ({front=r, rear=rear}, x) + | dequeue ({front=[], rear}) = dequeue{front=List.rev rear, rear=[]} + + (* the state of a mailbox. The queue of the NONEMPTY constructor should + * never be empty (use EMPTY instead). + *) + datatype 'a state + = EMPTY of (R.trans_id ref * 'a cont) queue + | NONEMPTY of (int * 'a queue) + + datatype 'a mbox = MB of 'a state ref + + fun resetMbox (MB state) = (state := EMPTY{front=[], rear=[]}) + + fun mailbox () = MB(ref(EMPTY{front=[], rear=[]})) + + fun sameMailbox (MB s1, MB s2) = (s1 = s2) + + (* create a new transaction ID *) + fun mkId () = ref(R.TRANS(S.getCurThread())) + + (* given a transaction ID, get its thread ID and mark it cancelled. *) + fun getIdFromTrans (transId as ref(R.TRANS tid)) = ( + transId := R.CANCEL; + tid) + + datatype 'a q_item + = NoItem + | Item of (R.trans_id ref * 'a cont * 'a state) + + local + fun clean [] = [] + | clean ((ref R.CANCEL, _)::r) = clean r + | clean l = l + fun cleanRev ([], l) = l + | cleanRev ((ref R.CANCEL, _)::r, l) = cleanRev (r, l) + | cleanRev (x::r, l) = cleanRev (r, x::l) + in + fun cleanAndRemove (q as {front, rear}) = let + fun cleanFront [] = cleanRear rear + | cleanFront f = (case (clean f) + of [] => cleanRear rear + | ((id, k)::rest) => Item(id, k, EMPTY{front=rest, rear=rear}) + (* end case *)) + and cleanRear [] = NoItem + | cleanRear r = (case (cleanRev (r, [])) + of [] => NoItem + | ((id, k)::rest) => Item(id, k, EMPTY{front=rest, rear=[]}) + (* end case *)) + in + cleanFront front + end + end + + fun send (MB state, x) = ( + S.atomicBegin(); + case !state + of (EMPTY q) => (case (cleanAndRemove q) + of NoItem => ( + state := NONEMPTY(1, {front=[x], rear=[]}); + S.atomicEnd()) + | (Item(transId, recvK, state')) => callcc (fn k => ( + state := state'; + S.enqueueAndSwitchCurThread(k, getIdFromTrans transId); + throw recvK x)) + (* end case *)) + | NONEMPTY(p, q) => + (* we force a context switch here to prevent a producer from + * outrunning a consumer. + *) + callcc (fn k => ( + state := NONEMPTY(p, enqueue(q, x)); + S.atomicYield k)) + (* end case *)) + + fun getMsg (state, q) = let + val (q', msg) = dequeue q + in + case q' + of {front=[], rear=[]} => state := EMPTY{front=[], rear=[]} + | _ => state := NONEMPTY(1, q') + (* end case *); + S.atomicEnd(); + msg + end + + fun recv (MB state) = ( + S.atomicBegin(); + case !state + of (EMPTY q) => let + val msg = callcc (fn recvK => ( + state := EMPTY(enqueue(q, (mkId(), recvK))); + S.atomicDispatch())) + in + S.atomicEnd(); msg + end + | (NONEMPTY(priority, q)) => getMsg (state, q) + (* end case *)) + + fun recvEvt (MB state) = let + fun blockFn {transId, cleanUp, next} = let + val (EMPTY q) = !state + val msg = callcc (fn recvK => ( + state := EMPTY(enqueue(q, (transId, recvK))); + next(); + raise Fail "Mailbox: impossible")) + in + cleanUp(); + S.atomicEnd(); + msg + end + fun pollFn () = (case !state + of (EMPTY _) => R.BLOCKED blockFn + | (NONEMPTY(priority, q)) => ( + state := NONEMPTY(priority+1, q); + R.ENABLED{prio=priority, doFn=(fn () => getMsg(state, q))}) + (* end case *)) + in + R.BEVT[pollFn] + end + + fun recvPoll (MB state) = ( + S.atomicBegin(); + case !state + of (EMPTY q) => (S.atomicEnd(); NONE) + | (NONEMPTY(priority, q)) => SOME(getMsg (state, q)) + (* end case *)) + + end (* Mailbox *) diff --git a/cml/src/core-cml/queue.sml b/cml/src/core-cml/queue.sml new file mode 100644 index 0000000..8abba6e --- /dev/null +++ b/cml/src/core-cml/queue.sml @@ -0,0 +1,99 @@ +(* queue.sml + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * COPYRIGHT (c) 1989-1991 John H. Reppy + * + * These are the basic scheduling queues used throughout the CML + * implementation. We make the representation concrete, so that + * other modules can inline the operations [someday we can count + * on the compiler for this]. + *) + +structure Q : sig + + type 'a queue (* = 'a RepTypes.queue *) + + val queue : unit -> 'a queue + (* create a new queue *) + + val sameQ : ('a queue * 'a queue) -> bool + (* return true, if the two queues are the same *) + + val isEmpty : 'a queue -> bool + (* return true, if the queue is empty *) + + val enqueue : ('a queue * 'a) -> unit + (* enqueue an item in the queue *) + + exception EmptyQ + val dequeue : 'a queue -> 'a + (* dequeue an item; raise EmptyQ if the queue is empty *) + + val next : 'a queue -> 'a option + (* dequeue and return then next item in the queue; return NONE, if + * the queue is empty. + *) + + val reset : 'a queue -> unit + (* reset a queue to all empty *) + + val remove : ('a queue * ('a -> bool)) -> unit + (* find and remove the first item that satisfies the predicate *) + + end = struct + + datatype queue = datatype RepTypes.queue + + fun reverse (x, [], rl) = (x, rl) + | reverse (x, y :: rest, rl) = reverse (y, rest, x :: rl) + + fun revAppend ([], l) = l + | revAppend (x::r, l) = revAppend(r, x::l) + + fun queue () = Q{front = ref[], rear = ref[]} + + fun sameQ (Q{front=f1, ...}, Q{front=f2, ...}) = (f1 = f2) + + fun isEmpty (Q{front = ref [], rear = ref []}) = true + | isEmpty _ = false + + fun enqueue (Q{rear, ...}, item) = rear := item :: !rear + + exception EmptyQ + fun dequeue (Q{front, rear}) = (case !front + of (x::r) => (front := r; x) + | [] => (case !rear + of (x::r) => let val (y, rr) = reverse(x, r, []) + in + front := rr; rear := []; y + end + | [] => raise EmptyQ + (* end case *)) + (* end case *)) + + fun next (Q{front, rear}) = (case !front + of (x::r) => (front := r; SOME x) + | [] => (case !rear + of (x::r) => let val (y, rr) = reverse(x, r, []) + in + front := rr; rear := []; SOME y + end + | [] => NONE + (* end case *)) + (* end case *)) + + fun reset (Q{front, rear}) = (front := []; rear := []) + + exception Remove + fun remove (Q{front, rear}, pred) = let + fun lookF ([], l) = lookR(!rear, []) + | lookF (x::r, l) = + if (pred x) then front := revAppend(l, r) else lookF(r, x::l) + and lookR ([], _) = raise Remove + | lookR (x::r, l) = + if (pred x) then rear := revAppend(l, r) else lookR(r, x::l) + in + lookF(!front, []) + end + + end diff --git a/cml/src/core-cml/rep-types.sml b/cml/src/core-cml/rep-types.sml new file mode 100644 index 0000000..14bf097 --- /dev/null +++ b/cml/src/core-cml/rep-types.sml @@ -0,0 +1,74 @@ +(* rep-types.sml + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * COPYRIGHT (c) 1989-1991 John H. Reppy + * + * These are the concrete representations of the various CML types. + * These types are abstract (or not even visible) outside this library. + *) + +structure RepTypes = + struct + + (* queues --- see queue.sml *) + datatype 'a queue = Q of { + front : 'a list ref, + rear : 'a list ref + } + + (** thread IDs --- see threads.sml **) + datatype thread_id = TID of { (* thread ids *) + id : int, (* an unique ID *) + alert : bool ref, (* true, if there is a pending alert on this *) + (* thread *) + done_comm : bool ref, (* set this whenever this thread does some *) + (* concurrency operation. *) + exnHandler : (exn -> unit) ref, (* root-level exception handler hook *) + props : exn list ref, (* holds thread-local properties *) + dead : cvar (* the cvar that becomes set when the thread *) + (* dies *) + } + + (* transaction IDs are used to mark blocked threads in the various waiting + * queues. They are "cancelled" when some other event is selected. + *) + and trans_id + = CANCEL + | TRANS of thread_id + + (* Condition variables --- see events.sml. + * These are essentially unit valued ivars, and are used for various + * internal synchronization conditions (e.g., nack events, I/O + * synchronization, and thread termination). + *) + and cvar = CVAR of cvar_state ref + and cvar_state + = CVAR_unset of { + transId : trans_id ref, + cleanUp : unit -> unit, + kont : unit SMLofNJ.Cont.cont + } list + | CVAR_set of int + + (** events --- see events.sml **) + datatype 'a event_status + = ENABLED of {prio : int, doFn : unit -> 'a} + | BLOCKED of { + transId : trans_id ref, cleanUp : unit -> unit, next : unit -> unit + } -> 'a + + type 'a base_evt = unit -> 'a event_status + + datatype 'a event + = BEVT of 'a base_evt list + | CHOOSE of 'a event list + | GUARD of unit -> 'a event + | W_NACK of unit event -> 'a event + + (* we put this function here, because it is useful when debugging the + * CML internals. + *) + fun tidToString (TID{id, ...}) = + concat["[", StringCvt.padLeft #"0" 6 (Int.toString id), "]"] + + end diff --git a/cml/src/core-cml/running.sml b/cml/src/core-cml/running.sml new file mode 100644 index 0000000..c55b185 --- /dev/null +++ b/cml/src/core-cml/running.sml @@ -0,0 +1,13 @@ +(* running.sml + * + * COPYRIGHT (c) 1997 Bell Labs, Lucent Technologies. + * + * A flag to tell us if CML is running. This gets set and cleared in the + * RunCMLFn functor, but other modules need to test it. + *) + +structure Running = + struct + val isRunning = ref false + end + diff --git a/cml/src/core-cml/scheduler.sml b/cml/src/core-cml/scheduler.sml new file mode 100644 index 0000000..9cd39fb --- /dev/null +++ b/cml/src/core-cml/scheduler.sml @@ -0,0 +1,359 @@ +(* scheduler.sml + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * COPYRIGHT (c) 1989-1991 John H. Reppy + * + * This module implements the scheduling queues and preemption + * mechanisms. + *) + +structure Scheduler : sig + + type thread_id + type 'a cont = 'a SMLofNJ.Cont.cont + + val rdyQ1 : (thread_id * unit cont) RepTypes.queue + + val getCurThread : unit -> thread_id + val setCurThread : thread_id -> unit + + val enqueueThread : (thread_id * unit cont) -> unit + + val enqueueAndSwitchCurThread : (unit cont * thread_id) -> unit + (* enqueue the given continuation with the current thread ID, and make + * the given thread ID be the current one. + *) + + val enqueueTmpThread : (unit -> unit) -> unit + (* create a temporary thread (with dummy ID) to run the given + * function and then exit. The thread is placed on the front + * of the scheduling queue. + *) + + datatype atomic_state = NonAtomic | Atomic | SignalPending + + val atomicState : atomic_state ref + + val atomicBegin : unit -> unit + val atomicEnd : unit -> unit + (* enter/leave an atomic region; note that these do not nest *) + + val atomicDispatch : unit -> 'a + (* leave the atomic region and dispatch the next thread *) + + val dispatch : unit -> 'a + (* dispatch the next thread; this should NOT be called while in + * an atomic region. Use atomicDispatch() for that case. + *) + + val atomicSwitchTo : (thread_id * 'a cont * 'a) -> unit + (* switch to the given thread, while leaving the atomic region *) + + val atomicYield : unit cont -> 'a + (* Yield control to the next thread, while leaving the atomic + * region. + *) + + val schedulerHook : unit cont ref + (* this hook points to a continuation that gets dispatched when + * a preemption is received, or when a thread exits an atomic + * region and there is a signal pending. It is invoked after + * leaving the atomic region. + *) + + val pauseHook : unit cont ref + (* this hook points to a continuation that gets invoked when + * when the scheduler has nothing else to do. + *) + + val shutdownHook : (bool * OS.Process.status) cont ref + (* this hook points to a continuation that gets invoked when + * the system is otherwise deadlocked. It takes two arguments: + * the first is a boolean flag that says weather to do clean-up, + * and the second is the exit status. + *) + + val getTime : unit -> Time.time + (* returns an approximation of the current time of day (this is at + * least as accurate as the time quantum. + *) + + val reset : bool -> unit + + (* control over the preemptive timer *) + val startTimer : Time.time -> unit + val stopTimer : unit -> unit + val restartTimer : unit -> unit + + end = struct + + structure R = RepTypes + structure Sig = Signals + + type 'a cont = 'a SMLofNJ.Cont.cont + val callcc = SMLofNJ.Cont.callcc + val throw = SMLofNJ.Cont.throw + + (* some utility functions that should be inlined *) + fun reverse ([], rl) = rl + | reverse (x :: rest, rl) = reverse(rest, x :: rl) + + type thread_id = R.thread_id + + (* the current thread is represented using the "var" register *) + val getCurThread : unit -> thread_id = Unsafe.getVar + val setCurThread : thread_id -> unit = Unsafe.setVar + + (* The scheduler defines three continuation "hooks": + * schedulerHook -- this points to a continuation that gets dispatched + * when a thread attempts to exit an atomic region and + * there is a signal pending. It is invoked after + * leaving the atomic region. + * pauseHook -- this points to a continuation that gets invoked when + * there is nothing else to do. + * shutdownHook -- this points to a continuation that gets invoked when + * the system is deadlocked, or when RunCML.shutdown + * is called. It takes two arguments: the first is a + * boolean flag that says weather to do clean-up, and + * the second is the exit status. + *) + fun bogus _ = raise Fail "should never see this " + val bogusHook : unit cont = SMLofNJ.Cont.isolate bogus + val bogusShutdownHook : (bool * OS.Process.status) cont = + SMLofNJ.Cont.isolate bogus + val schedulerHook = ref bogusHook + val pauseHook = ref bogusHook + val shutdownHook = ref bogusShutdownHook + + (* the dummy thread Id; this is used when an ID is needed to get + * the types right + *) + val dummyTid = R.TID{ + id = ~1, alert = ref false, done_comm = ref false, + exnHandler = ref(fn _ => ()), + props = ref[], + dead = R.CVAR(ref(R.CVAR_unset[])) + } + (* the error thread. This thread is used to trap attempts to run CML + * without proper initialization (i.e., via RunCML). This thread is + * enqueued by reset. + *) + val errorTid = R.TID{ + id = ~2, alert = ref false, done_comm = ref false, + exnHandler = ref(fn _ => ()), + props = ref[], + dead = R.CVAR(ref(R.CVAR_unset[])) + } + val errorCont : unit cont = SMLofNJ.Cont.isolate (fn _ => ( + Debug.sayDebug "**** Use RunCML.doit to run CML ****\n"; + raise Fail "CML not initialized")) + + (* thread id marking *) + fun markTid (R.TID{done_comm, ...}) = done_comm := true + fun unmarkTid (R.TID{done_comm, ...}) = done_comm := false + fun isMarked (R.TID{done_comm, ...}) = !done_comm + + (* The thread ready queues: + * rdyQ1 is the primary queue and rdyQ2 is the secondary queue. + *) + val (rdyQ1 as R.Q{rear=rear1, ...}) : (R.thread_id * unit cont) Q.queue = + Q.queue() + val rdyQ2 : (R.thread_id * unit cont) Q.queue = Q.queue() + + (* enqueue a ready thread *) + fun enqueue p = (rear1 := p :: !rear1) + fun markAndEnqueue (p as (id, _)) = (markTid id; rear1 := p :: !rear1) + + val enqueueThread = markAndEnqueue + + (* enqueue the current thread, and make the given thread ID be the current + * one. + *) + fun enqueueAndSwitchCurThread (resume, tid) = ( + markAndEnqueue(getCurThread(), resume); + setCurThread tid) + + (* dequeue a thread from the primary queue *) + fun dequeue1 () = (case rdyQ1 + of (R.Q{front = ref [], rear = ref []}) => dequeue2() + | (R.Q{front as (ref []), rear as (ref l)}) => let + val (x::r) = reverse(l, []) + in + front := r; rear := []; x + end + | (R.Q{front as (ref(x::r)), ...}) => (front := r; x) + (* end case *)) + + (* remove a thread from the secondary queue (assuming that the + * primary queue is empty. + *) + and dequeue2 () = (case rdyQ2 + of (R.Q{front = ref [], rear = ref []}) => (dummyTid, !pauseHook) + | (R.Q{front as ref [], rear as ref l}) => ( + rear := []; front := reverse(l, []); dequeue2()) + | (R.Q{front as ref(item::r), ...}) => (front := r; item) + (* end case *)) + + (* promote a thread from the secondary queue to the primary queue *) + fun promote () = (case (Q.next rdyQ2) + of (SOME x) => enqueue x + | NONE => () + (* end case *)) + + (* global flag for implementing atomic operations *) + datatype atomic_state = NonAtomic | Atomic | SignalPending + val atomicState = ref NonAtomic + + (* Note, the first thing the scheduler hook does is a atomicBegin, so we don't + * need to clear the atomic state here. + *) + fun dispatchSchedulerHook () = throw (!schedulerHook) () + +(* + fun enqueueSchedulerHook () = let + val kont = callcc (fn k => ( + callcc (fn k' => throw k k'); + dispatchSchedulerHook ())) + val R.Q{front, ...} = rdyQ1 + in + front := (dummyTid, kont) :: !front + end +*) + + fun atomicBegin () = atomicState := Atomic + + (* leave an atomic region. + * NOTE: there is a race condition between the test of the atomicState + * flag and the setting of it to NonAtomic, but this is not a problem in + * practice, because there are no GC tests between these (and thus no + * preemption). + *) + fun atomicEnd () = (case !atomicState + of SignalPending => callcc (fn k => ( + enqueue(getCurThread(), k); + dispatchSchedulerHook())) + | _ => atomicState := NonAtomic + (* end case *)) + + fun atomicDispatch () = (case !atomicState + of SignalPending => dispatchSchedulerHook() + | _ => let + val (id, kont) = dequeue1() + in + setCurThread id; + atomicState := NonAtomic; + throw kont () + end + (* end case *)) + + fun dispatch () = (atomicBegin(); atomicDispatch ()) + + fun atomicSwitchTo (tid, k, x) = + callcc (fn curK => ( + case !atomicState + of SignalPending => + callcc (fn k' => ( + enqueue(tid, k'); + enqueue(getCurThread(), curK); + dispatchSchedulerHook())) + | _ => ( + enqueueAndSwitchCurThread (curK, tid); + atomicState := NonAtomic) + (* end case *); + throw k x)) + + (* Yield control to the next thread, while leaving the atomic region. *) + fun atomicYield k = ( + markAndEnqueue(getCurThread(), k); + atomicDispatch ()) + + (* create a temporary thread (with dummy ID) to run the given + * function and then exit. The thread is placed on the front + * of the scheduling queue. + *) + fun enqueueTmpThread f = let +(** this should be, but the overhead is too high right now. ** + val kont = SMLofNJ.Cont.isolate f +**) + val kont = callcc (fn k => ( + callcc (fn k' => throw k k'); + f () handle _ => (); + dispatch ())) + val R.Q{front, ...} = rdyQ1 + in + front := (dummyTid, kont) :: !front + end + + val defaultHook : unit cont = SMLofNJ.Cont.isolate dispatch + + (* this holds an approximation of the current time of day. It is + * cleared at each pre-emption, and initialized on demand (by getTime). + *) + val clock = ref(SOME Time.zeroTime) + + (* returns an approximation of the current time of day (this is at + * least as accurate as the time quantum). + *) + fun getTime () = (case !clock + of NONE => let val t = Time.now() + in + clock := SOME t; t + end + | (SOME t) => t + (* end case *)) + + (* preempt the current thread (with continuation k). *) + fun preempt k = let + val curTid = getCurThread() + val curP = (curTid, k) + in + if (isMarked curTid) + then ( + unmarkTid curTid; + promote (); + enqueue curP) + else Q.enqueue(rdyQ2, curP) + end + + (* the preemption handler *) + fun alrmHandler (_, _, k) = ( + clock := NONE; + case !atomicState + of NonAtomic => (preempt k; !schedulerHook) + | Atomic => (atomicState := SignalPending; k) + | _ => k + (* end case *)) + + val defaultTimeQ = Time.fromMilliseconds 20 + val timeQ = ref defaultTimeQ + + structure IT = SMLofNJ.IntervalTimer + + fun startTimer tq = let + val tq = if Time.<(Time.zeroTime, tq) then tq else defaultTimeQ + in + timeQ := tq; + ignore (Sig.setHandler (Sig.sigALRM, Sig.HANDLER alrmHandler)); + ignore (IT.setIntTimer (SOME tq)) + end + + fun stopTimer () = ( + ignore (IT.setIntTimer NONE); + ignore (Sig.setHandler (Sig.sigALRM, Sig.IGNORE))) + + fun restartTimer () = startTimer (!timeQ) + + (* reset various pieces of state *) + fun reset running = ( + setCurThread dummyTid; + pauseHook := bogusHook; + shutdownHook := bogusShutdownHook; + schedulerHook := defaultHook; + clock := NONE; + Q.reset rdyQ1; Q.reset rdyQ2; + if (not running) then enqueueThread(errorTid, errorCont) else ()) + + val _ = reset false + + end + diff --git a/cml/src/core-cml/sync-var-sig.sml b/cml/src/core-cml/sync-var-sig.sml new file mode 100644 index 0000000..ab2bb5e --- /dev/null +++ b/cml/src/core-cml/sync-var-sig.sml @@ -0,0 +1,38 @@ +(* sync-var-sig.sml + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * COPYRIGHT (c) 1989-1991 John H. Reppy + * + * The implementation of Id-style synchronizing memory cells (I-structures + * and M-structures). + *) + +signature SYNC_VAR = + sig + + type 'a ivar (* I-structure variable *) + type 'a mvar (* M-structure variable *) + + exception Put (* raised on put operations to full cells *) + + val iVar : unit -> 'a ivar + val iPut : ('a ivar * 'a) -> unit + val iGet : 'a ivar -> 'a + val iGetEvt : 'a ivar -> 'a CML.event + val iGetPoll : 'a ivar -> 'a option + val sameIVar : ('a ivar * 'a ivar) -> bool + + val mVar : unit -> 'a mvar + val mVarInit : 'a -> 'a mvar + val mPut : ('a mvar * 'a) -> unit + val mTake : 'a mvar -> 'a + val mTakeEvt : 'a mvar -> 'a CML.event + val mTakePoll : 'a mvar -> 'a option + val mGet : 'a mvar -> 'a + val mGetEvt : 'a mvar -> 'a CML.event + val mGetPoll : 'a mvar -> 'a option + val mSwap : ('a mvar * 'a) -> 'a + val mSwapEvt : ('a mvar * 'a) -> 'a CML.event + val sameMVar : ('a mvar * 'a mvar) -> bool + + end; diff --git a/cml/src/core-cml/sync-var.sml b/cml/src/core-cml/sync-var.sml new file mode 100644 index 0000000..eed7960 --- /dev/null +++ b/cml/src/core-cml/sync-var.sml @@ -0,0 +1,338 @@ +(* sync-var.sml + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * COPYRIGHT (c) 1989-1991 John H. Reppy + * + * The implementation of Id-style synchronizing memory cells. + *) + +structure SyncVar :> SYNC_VAR = + struct + + structure R = RepTypes + structure S = Scheduler + + type 'a cont = 'a SMLofNJ.Cont.cont + val callcc = SMLofNJ.Cont.callcc + val throw = SMLofNJ.Cont.throw + + (* the underlying representation of both ivars and mvars is the same. *) + datatype 'a cell = CELL of { + priority : int ref, + readQ : (R.trans_id ref * 'a cont) Q.queue, + value : 'a option ref + } + + type 'a ivar = 'a cell + type 'a mvar = 'a cell + + exception Put + + fun newCell () = CELL{priority = ref 0, readQ = Q.queue(), value=ref NONE} + fun sameCell (CELL{value=v1, ...}, CELL{value=v2, ...}) = (v1 = v2) + + (* create a new transaction ID *) + fun mkId () = ref(R.TRANS(S.getCurThread())) + + (* given a transaction ID, get its thread ID and mark it cancelled. *) + fun getIdFromTrans (transId as ref(R.TRANS tid)) = ( + transId := R.CANCEL; + tid) + + (* bump a priority value by one, returning the old value *) + fun bumpPriority (p as ref n) = (p := n+1; n) + + datatype 'a q_item + = NoItem + | Item of (R.trans_id ref * 'a cont) + + (* functions to clean channel input and output queues *) + local + fun clean [] = [] + | clean ((ref R.CANCEL, _)::r) = clean r + | clean l = l + fun cleanRev ([], l) = l + | cleanRev ((ref R.CANCEL, _)::r, l) = cleanRev (r, l) + | cleanRev (x::r, l) = cleanRev (r, x::l) + in + fun cleanAndChk (priority, R.Q{front, rear}) = let + fun cleanFront [] = cleanRear (! rear) + | cleanFront f = (case (clean f) + of [] => cleanRear (! rear) + | f' => (front := f'; bumpPriority priority) + (* end case *)) + and cleanRear [] = 0 + | cleanRear r = ( + rear := []; + case (cleanRev (r, [])) + of [] => 0 + | rr => (front := rr; bumpPriority priority) + (* end case *)) + in + cleanFront (! front) + end + fun cleanAndRemove (R.Q{front, rear, ...}) = let + fun cleanFront [] = cleanRear (! rear) + | cleanFront f = (case (clean f) + of [] => cleanRear (! rear) + | (item::rest) => (front := rest; Item item) + (* end case *)) + and cleanRear [] = NoItem + | cleanRear r = ( + rear := []; + case (cleanRev (r, [])) + of [] => NoItem + | (item::rest) => (front := rest; Item item) + (* end case *)) + in + cleanFront (! front) + end + fun cleanAndEnqueue (R.Q{front, rear, ...}, item) = let + fun cleanFront [] = cleanRear (! rear) + | cleanFront f = (case (clean f) + of [] => cleanRear (! rear) + | f' => (front := f'; rear := item :: (! rear)) + (* end case *)) + and cleanRear [] = (front := [item]) + | cleanRear r = (case (cleanRev (r, [])) + of [] => (front := [item]; rear := []) + | rr => (rear := [item]; front := rr) + (* end case *)) + in + cleanFront (! front) + end + end (* local *) + + (* When a thread is resumed after being blocked on an iGet or mGet operation, + * there may be other threads also blocked on the variable. This function + * is used to propagate the message to all of the threads that are blocked + * on the variable (or until one of them takes the value in the mvar case). + * It must be called from an atomic region; when the readQ is finally empty, + * we leave the atomic region. We must use "cleanAndRemove" to get items + * from the readQ in the unlikely event that a single thread executes a + * choice of multiple gets on the same variable. + *) + fun relayMsg (readQ, msg) = (case (cleanAndRemove readQ) + of NoItem => S.atomicEnd() + | (Item(transId, kont)) => callcc (fn myKont => ( + S.enqueueAndSwitchCurThread(myKont, getIdFromTrans transId); + throw kont msg)) + (* end case *)) + + fun impossible () = raise Fail "SyncVar: impossible" + + + (** I-variables **) + + val iVar = newCell + val sameIVar = sameCell + + fun iPut (CELL{priority, readQ, value}, x) = ( + S.atomicBegin(); + case !value + of NONE => ( + value := SOME x; + case (cleanAndRemove readQ) + of NoItem => S.atomicEnd() + | (Item(transId, kont)) => callcc (fn myKont => ( + S.enqueueAndSwitchCurThread(myKont, getIdFromTrans transId); + priority := 1; + throw kont x)) + (* end case *)) + | (SOME _) => (S.atomicEnd(); raise Put) + (* end case *)) + + fun iGet (CELL{priority, readQ, value}) = ( + S.atomicBegin(); + case !value + of NONE => let + val msg = callcc (fn k => ( + Q.enqueue (readQ, (mkId(), k)); + S.atomicDispatch ())) + in + relayMsg (readQ, msg); msg + end + | (SOME v) => (S.atomicEnd(); v) + (* end case *)) + + fun iGetEvt (CELL{priority, readQ, value}) = let + fun blockFn {transId, cleanUp, next} = let + val msg = callcc (fn k => ( + Q.enqueue (readQ, (transId, k)); + next (); + impossible())) + in + cleanUp(); + relayMsg (readQ, msg); msg + end + fun pollFn () = (case !value + of NONE => R.BLOCKED blockFn + | (SOME v) => R.ENABLED{ + prio=bumpPriority priority, + doFn=(fn () => (priority := 1; S.atomicEnd(); v)) + } + (* end case *)) + in + R.BEVT[pollFn] + end + + (* NOTE: we assume that reads are atomic, so this function does not + * need to run in an atomic region. + *) + fun iGetPoll (CELL{value, ...}) = !value + + + (** M-variables **) + + val mVar = newCell + fun mVarInit x = CELL{priority = ref 0, readQ = Q.queue(), value=ref(SOME x)} + val sameMVar = sameCell + + fun mPut (CELL{priority, readQ, value}, x) = ( + S.atomicBegin(); + case !value + of NONE => ( + value := SOME x; + case (cleanAndRemove readQ) + of NoItem => S.atomicEnd() + | (Item(transId, kont)) => callcc (fn myKont => ( + S.enqueueAndSwitchCurThread(myKont, getIdFromTrans transId); + priority := 1; + throw kont x)) + (* end case *)) + | (SOME _) => (S.atomicEnd(); raise Put) + (* end case *)) + + fun mTake (CELL{priority, readQ, value}) = ( + S.atomicBegin(); + case !value + of NONE => let + val v = callcc (fn k => ( + Q.enqueue (readQ, (mkId(), k)); + S.atomicDispatch ())) + in + value := NONE; + S.atomicEnd(); + v + end + | (SOME v) => (value := NONE; S.atomicEnd(); v) + (* end case *)) + + fun mTakeEvt (CELL{priority, readQ, value}) = let + fun blockFn {transId, cleanUp, next} = let + val v = callcc (fn k => ( + Q.enqueue (readQ, (transId, k)); + next (); + impossible())) + in + cleanUp(); + value := NONE; + S.atomicEnd(); + v + end + fun pollFn () = (case !value + of NONE => R.BLOCKED blockFn + | (SOME v) => R.ENABLED{ + prio=bumpPriority priority, + doFn=(fn () => (value := NONE; S.atomicEnd(); v)) + } + (* end case *)) + in + R.BEVT[pollFn] + end + + fun mTakePoll (CELL{priority, readQ, value}) = let + val res = ( + S.atomicBegin(); + case !value + of NONE => NONE + | (SOME v) => (value := NONE; SOME v) + (* end case *)) + in + S.atomicEnd(); res + end + + fun mGet (CELL{priority, readQ, value}) = ( + S.atomicBegin(); + case !value + of NONE => let + val v = callcc (fn k => ( + Q.enqueue (readQ, (mkId(), k)); + S.atomicDispatch ())) + in + relayMsg (readQ, v); v + end + | (SOME v) => (S.atomicEnd(); v) + (* end case *)) + + fun mGetEvt (CELL{priority, readQ, value}) = let + fun blockFn {transId, cleanUp, next} = let + val v = callcc (fn k => ( + Q.enqueue (readQ, (transId, k)); + next (); + impossible())) + in + cleanUp(); + relayMsg (readQ, v); + v + end + fun pollFn () = (case !value + of NONE => R.BLOCKED blockFn + | (SOME v) => R.ENABLED{ + prio=bumpPriority priority, + doFn=(fn () => (S.atomicEnd(); v)) + } + (* end case *)) + in + R.BEVT[pollFn] + end + + (* NOTE: we assume that reads are atomic, so this function does not + * need to run in an atomic region. + *) + fun mGetPoll (CELL{value, ...}) = !value + + (* Swap the current contents of the cell with a new value. This function + * has the effect of an mTake followed by an mPut, except that it is + * guaranteed to be atomic. It is also somewhat more efficient. + *) + fun mSwap (CELL{priority, readQ, value}, newV) = ( + S.atomicBegin(); + case !value + of NONE => let + val v = callcc (fn k => ( + Q.enqueue (readQ, (mkId(), k)); + S.atomicDispatch ())) + in + value := SOME newV; + (* relay the new value to any other blocked threads *) + relayMsg (readQ, newV); + v + end + | (SOME v) => (value := SOME newV; S.atomicEnd(); v) + (* end case *)) + + fun mSwapEvt (CELL{priority, readQ, value}, newV) = let + fun blockFn {transId, cleanUp, next} = let + val v = callcc (fn k => ( + Q.enqueue (readQ, (transId, k)); + next (); + impossible())) + in + cleanUp(); + value := SOME newV; + relayMsg (readQ, newV); + v + end + fun pollFn () = (case !value + of NONE => R.BLOCKED blockFn + | (SOME v) => R.ENABLED{ + prio=bumpPriority priority, + doFn=(fn () => (value := SOME newV; S.atomicEnd(); v)) + } + (* end case *)) + in + R.BEVT[pollFn] + end + + end; (* SyncVar *) diff --git a/cml/src/core-cml/thread-sig.sml b/cml/src/core-cml/thread-sig.sml new file mode 100644 index 0000000..d95d7a3 --- /dev/null +++ b/cml/src/core-cml/thread-sig.sml @@ -0,0 +1,44 @@ +(* threads-sig.sml + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * COPYRIGHT (c) 1989-1991 John H. Reppy + *) + +signature THREAD = + sig + type 'a event + type thread_id + + val getTid : unit -> thread_id + + val sameTid : (thread_id * thread_id) -> bool + val compareTid : (thread_id * thread_id) -> order + val hashTid : thread_id -> word + + val tidToString : thread_id -> string + + val spawnc : ('a -> unit) -> 'a -> thread_id + val spawn : (unit -> unit) -> thread_id + + val exit : unit -> 'a + + val joinEvt : thread_id -> unit event + + val yield : unit -> unit (* mostly for benchmarking *) + + (* thread-local data *) + val newThreadProp : (unit -> 'a) -> { + clrFn : unit -> unit, (* clear's current thread's property *) + getFn : unit -> 'a, (* get current thread's property; if *) + (* the property is not defined, then *) + (* it sets it using the initialization *) + (* function. *) + peekFn : unit -> 'a option, (* return the property's value, if any *) + setFn : 'a -> unit (* set the property's value for the *) + (* current thread. *) + } + + val newThreadFlag : unit -> {getFn : unit -> bool, setFn : bool -> unit} + + end; + diff --git a/cml/src/core-cml/thread.sml b/cml/src/core-cml/thread.sml new file mode 100644 index 0000000..2c9525c --- /dev/null +++ b/cml/src/core-cml/thread.sml @@ -0,0 +1,176 @@ +(* thread.sml + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * COPYRIGHT (c) 1989-1991 John H. Reppy + *) + +structure Thread : sig + include THREAD + val defaultExnHandler : (exn -> unit) ref + val reset : bool -> unit + end = struct + + structure R = RepTypes + structure S = Scheduler + + datatype thread_id = datatype R.thread_id + datatype cvar = datatype R.cvar + datatype cvar_state = datatype R.cvar_state + + type 'a event = 'a R.event + + local + val tidCount = ref 0 + fun cvar () = CVAR(ref(CVAR_unset [])) + in + + fun reset running = ( + tidCount := 0; + S.reset running) + + fun exnHandler (exn : exn) = () + + val defaultExnHandler = ref exnHandler + + fun newTId () = let val n = !tidCount + in + tidCount := n+1; + TID{ + id = n, + alert = ref false, + done_comm = ref false, + exnHandler = ref(! defaultExnHandler), + props = ref[], + dead = cvar() + } + end + end (* local *) + + fun sameTid (TID{id=a, ...}, TID{id=b, ...}) = (a = b) + + fun compareTid (TID{id=a, ...}, TID{id=b, ...}) = Int.compare(a, b) + + fun hashTid (TID{id, ...}) = Word.fromInt id + + val tidToString = R.tidToString + + fun notifyAndDispatch (TID{dead, ...}) = ( + S.atomicBegin(); Event.atomicCVarSet dead; S.atomicDispatch()) + + fun doHandler (TID{exnHandler, ...}, exn) = + ((!exnHandler) exn) handle _ => () + +(** Eventually, this should be: + fun spawnc f x = let + val _ = S.atomicBegin() + val id = newTId() + fun thread () = ( + (f x) handle ex => doHandler(id, ex); + notifyAndDispatch id) + in + SMLofNJ.Cont.callcc (fn parentK => ( + S.enqueueAndSwitchCurThread(parentK, id); + S.atomicEnd(); + SMLofNJ.Cont.throw (SMLofNJ.Cont.isolate thread) ())); + id + end + **) + fun spawnc f x = let + val _ = S.atomicBegin() + val id = newTId() + in + SMLofNJ.Cont.callcc (fn parentK => ( + S.enqueueAndSwitchCurThread(parentK, id); + S.atomicEnd(); + (f x) handle ex => doHandler(id, ex); + notifyAndDispatch id)); + id + end + + fun spawn f = spawnc f () + + fun joinEvt (TID{dead, ...}) = Event.cvarGetEvt dead + + val getTid = S.getCurThread + + fun exit () = let + val (tid as TID{props, ...}) = getTid() + in + props := []; + notifyAndDispatch tid + end + + fun yield () = SMLofNJ.Cont.callcc (fn k => ( + S.atomicBegin(); + S.atomicYield k)) + + (* thread-local data *) + local + fun mkProp () = let + exception E of 'a + fun cons (a, l) = E a :: l + fun peek [] = NONE + | peek (E a :: _) = SOME a + | peek (_ :: l) = peek l + fun delete [] = [] + | delete (E a :: r) = r + | delete (x :: r) = x :: delete r + in + { cons = cons, peek = peek, delete = delete } + end + fun mkFlag () = let + exception E + fun peek [] = false + | peek (E :: _) = true + | peek (_ :: l) = peek l + fun set (l, flg) = let + fun set ([], _) = if flg then E::l else l + | set (E::r, xs) = if flg then l else List.revAppend(xs, r) + | set (x::r, xs) = set (r, x::xs) + in + set (l, []) + end + in + { set = set, peek = peek } + end + fun getProps () = let val TID{props, ...} = getTid() in props end + in + fun newThreadProp (init : unit -> 'b) = let + val {peek, cons, delete} = mkProp() + fun peekFn () = peek(!(getProps())) + fun getF () = let + val h = getProps() + in + case peek(!h) + of NONE => let val b = init() in h := cons(b, !h); b end + | (SOME b) => b + (* end case *) + end + fun clrF () = let + val h = getProps() + in + h := delete(!h) + end + fun setFn x = let + val h = getProps() + in + h := cons(x, delete(!h)) + end + in + {peekFn = peekFn, getFn = getF, clrFn = clrF, setFn = setFn} + end + + fun newThreadFlag () = let + val {peek, set} = mkFlag() + fun getF ()= peek(!(getProps())) + fun setF flg = let + val h = getProps() + in + h := set(!h, flg) + end + in + {getFn = getF, setFn = setF} + end + end (* local *) + + end; diff --git a/cml/src/core-cml/timeout-sig.sml b/cml/src/core-cml/timeout-sig.sml new file mode 100644 index 0000000..06ce0ce --- /dev/null +++ b/cml/src/core-cml/timeout-sig.sml @@ -0,0 +1,18 @@ +(* timeout-sig.sml + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * COPYRIGHT (c) 1989-1991 John H. Reppy + * + * Exported interface for timeout synchronization. + *) + +signature TIME_OUT = + sig + + type 'a event + + val timeOutEvt : Time.time -> unit event + val atTimeEvt : Time.time -> unit event + + end; + diff --git a/cml/src/core-cml/timeout.sml b/cml/src/core-cml/timeout.sml new file mode 100644 index 0000000..955ac1e --- /dev/null +++ b/cml/src/core-cml/timeout.sml @@ -0,0 +1,112 @@ +(* timeout.sml + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * COPYRIGHT (c) 1989-1991 John H. Reppy + * + * Events for synchronizing on timeouts. + *) + +structure TimeOut : sig + + include TIME_OUT + + val reset : unit -> unit + val pollTime : unit -> unit + val anyWaiting : unit -> Time.time option + + end = struct + + structure R = RepTypes + structure S = Scheduler + + type 'a event = 'a Event.event + + (* The list of threads waiting for timouts. It is sorted in increasing order + * of time value. + * NOTE: we may want to use some sort of balanced search structure in the + * future. + *) + type item = (Time.time * (unit -> unit) * R.trans_id ref * unit S.cont) + val timeQ = ref ([] : item list) + + fun timeWait (t, f, id, k) = let + fun ins [] = [(t, f, id, k)] + | ins ((_, _, ref R.CANCEL, _) :: r) = ins r + | ins (l as ((item as (t', _, _, _)) :: r)) = if (Time.<(t', t)) + then item :: ins r + else (t, f, id, k) :: l + in + timeQ := ins (! timeQ) + end + + fun clean [] = [] + | clean ((_, _, ref R.CANCEL, _) :: r) = clean r + | clean (item :: r) = item :: clean r + + fun checkQ q = let + val now = S.getTime() + fun chk [] = [] + | chk ((_, _, ref R.CANCEL, _) :: r) = chk r + | chk (l as ((item as (t', f, transId as ref(R.TRANS tid), k)) :: r)) = + if (Time.<=(t', now)) + then ( + S.enqueueThread (tid, k); + f(); (* cleanup function *) + chk r) + else clean l + in + chk q + end + + fun anyWaiting () = (case clean(!timeQ) + of [] => NONE + | (q as ((t, _, _, _)::_)) => let + val now = S.getTime() + in + if (Time.<=(t, now)) + then SOME(Time.zeroTime) + else SOME(Time.-(t, now)) + end + (* end case *)) + + fun pollTime () = (case !timeQ + of [] => () + | q => timeQ := checkQ q + (* end case *)) + + fun reset () = timeQ := [] + + (** NOTE: unlike for most base events, the block functions of time-out + ** events do not have to exit the atomic region or execute the clean-up + ** operation. This is done when they are removed from the waiting queue. + **) + fun timeOutEvt t = let + fun blockFn {transId, cleanUp, next} = let + val t0 = S.getTime() + in + SMLofNJ.Cont.callcc (fn k => ( + timeWait (Time.+(t, t0), cleanUp, transId, k); + next())); + S.atomicEnd() + end + fun pollFn () = if (t = Time.zeroTime) + then R.ENABLED{prio= ~1, doFn=S.atomicEnd} + else R.BLOCKED blockFn + in + R.BEVT[pollFn] + end + + fun atTimeEvt t = let + fun blockFn {transId, cleanUp, next} = ( + SMLofNJ.Cont.callcc (fn k => ( + timeWait (t, cleanUp, transId, k); + next())); + S.atomicEnd()) + fun pollFn () = if Time.<=(t, S.getTime()) + then R.ENABLED{prio= ~1, doFn=S.atomicEnd} + else R.BLOCKED blockFn + in + R.BEVT[pollFn] + end + + end; diff --git a/cml/src/core-cml/version.sml b/cml/src/core-cml/version.sml new file mode 100644 index 0000000..f0dd217 --- /dev/null +++ b/cml/src/core-cml/version.sml @@ -0,0 +1,29 @@ +(* version.sml + * + * COPYRIGHT (c) 1996 AT&T Research. + *) + +structure Version : sig + + val version : {system : string, version_id : int list, date : string} + val banner : string + + end = struct + + + val version = { + system = "Concurrent ML", + version_id = [1, 0, 10], + date = "September 15, 1997" + } + + fun f ([], l) = l + | f ([x : int], l) = (Int.toString x)::l + | f (x::r, l) = (Int.toString x) :: "." :: f(r, l) + + val banner = concat ( + #system version :: ", Version " :: + f (#version_id version, [", ", #date version])) + + end; + diff --git a/cml/src/glue/.cm/GUID/export-fn-fn.sml b/cml/src/glue/.cm/GUID/export-fn-fn.sml new file mode 100644 index 0000000..32fb201 --- /dev/null +++ b/cml/src/glue/.cm/GUID/export-fn-fn.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):glue/(sources.cm):export-fn-fn.sml-1714016096.923 diff --git a/cml/src/glue/.cm/GUID/init-cleanup.sml b/cml/src/glue/.cm/GUID/init-cleanup.sml new file mode 100644 index 0000000..81d8102 --- /dev/null +++ b/cml/src/glue/.cm/GUID/init-cleanup.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):glue/(sources.cm):init-cleanup.sml-1714016096.920 diff --git a/cml/src/glue/.cm/GUID/new-run-cml-fn.sml b/cml/src/glue/.cm/GUID/new-run-cml-fn.sml new file mode 100644 index 0000000..915baf0 --- /dev/null +++ b/cml/src/glue/.cm/GUID/new-run-cml-fn.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):glue/(sources.cm):new-run-cml-fn.sml-1714016096.945 diff --git a/cml/src/glue/.cm/GUID/os-glue-sig.sml b/cml/src/glue/.cm/GUID/os-glue-sig.sml new file mode 100644 index 0000000..d3c9bcf --- /dev/null +++ b/cml/src/glue/.cm/GUID/os-glue-sig.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):glue/(sources.cm):os-glue-sig.sml-1714016096.909 diff --git a/cml/src/glue/.cm/SKEL/export-fn-fn.sml b/cml/src/glue/.cm/SKEL/export-fn-fn.sml new file mode 100644 index 0000000..46dfd14 --- /dev/null +++ b/cml/src/glue/.cm/SKEL/export-fn-fn.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ae"ExportFnFn"i2aG"gp1c"OS_GLUE"f8d"Unsafe"d"OS"d"CML"Cd"Thread"SMLofNJ"d"Q"d"Time"Njh3ad"S"gp1d"Scheduler"ad"CU"gp1d"CleanUp"aCont"gp2h0 \ No newline at end of file diff --git a/cml/src/glue/.cm/SKEL/init-cleanup.sml b/cml/src/glue/.cm/SKEL/init-cleanup.sml new file mode 100644 index 0000000..20050e0 --- /dev/null +++ b/cml/src/glue/.cm/SKEL/init-cleanup.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f2d"CleanUp"d"CleanIO"ad"InitCleanup"j0( \ No newline at end of file diff --git a/cml/src/glue/.cm/SKEL/new-run-cml-fn.sml b/cml/src/glue/.cm/SKEL/new-run-cml-fn.sml new file mode 100644 index 0000000..4e9e772 Binary files /dev/null and b/cml/src/glue/.cm/SKEL/new-run-cml-fn.sml differ diff --git a/cml/src/glue/.cm/SKEL/os-glue-sig.sml b/cml/src/glue/.cm/SKEL/os-glue-sig.sml new file mode 100644 index 0000000..ce2ea9b --- /dev/null +++ b/cml/src/glue/.cm/SKEL/os-glue-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ac"OS_GLUE"h0 \ No newline at end of file diff --git a/cml/src/glue/.cm/amd64-unix/export-fn-fn.sml b/cml/src/glue/.cm/amd64-unix/export-fn-fn.sml new file mode 100644 index 0000000..fe222f4 Binary files /dev/null and b/cml/src/glue/.cm/amd64-unix/export-fn-fn.sml differ diff --git a/cml/src/glue/.cm/amd64-unix/init-cleanup.sml b/cml/src/glue/.cm/amd64-unix/init-cleanup.sml new file mode 100644 index 0000000..9d56113 Binary files /dev/null and b/cml/src/glue/.cm/amd64-unix/init-cleanup.sml differ diff --git a/cml/src/glue/.cm/amd64-unix/new-run-cml-fn.sml b/cml/src/glue/.cm/amd64-unix/new-run-cml-fn.sml new file mode 100644 index 0000000..2604586 Binary files /dev/null and b/cml/src/glue/.cm/amd64-unix/new-run-cml-fn.sml differ diff --git a/cml/src/glue/.cm/amd64-unix/os-glue-sig.sml b/cml/src/glue/.cm/amd64-unix/os-glue-sig.sml new file mode 100644 index 0000000..d8601a5 Binary files /dev/null and b/cml/src/glue/.cm/amd64-unix/os-glue-sig.sml differ diff --git a/cml/src/glue/export-fn-fn.sml b/cml/src/glue/export-fn-fn.sml new file mode 100644 index 0000000..4eb0633 --- /dev/null +++ b/cml/src/glue/export-fn-fn.sml @@ -0,0 +1,69 @@ +(* export-fn-fn.sml + * + * COPYRIGHT (c) 1997 Bell Labs, Lucent Technologies. + * COPYRIGHT (c) 1989-1991 John H. Reppy + *) + +functor ExportFnFn (G : OS_GLUE) : sig + + val pollK : unit SMLofNJ.Cont.cont + val pauseK : unit SMLofNJ.Cont.cont + + datatype ('a, 'b) pair = PAIR of ('a * 'b) + + val wrapForExport : + (((string * string list) -> OS.Process.status) * Time.time option) + -> (string, string list) pair + -> OS.Process.status + + end = struct + + structure S = Scheduler + structure CU = CleanUp + structure Cont = SMLofNJ.Cont + + val pollK : unit Cont.cont = Cont.isolate (fn _ => ( + S.atomicBegin(); + G.pollOS(); + S.atomicDispatch())) + + val pauseK : unit Cont.cont = Cont.isolate (fn _ => ( + S.atomicBegin(); + (* first, we poll the OS to schedule any ready threads *) + G.pollOS(); + (* check for ready threads orelse pause *) + if (not (Q.isEmpty S.rdyQ1) orelse G.pause()) + then S.atomicDispatch() + else ( + S.atomicEnd(); + Cont.throw (! S.shutdownHook) (true, OS.Process.failure)))) + + datatype ('a, 'b) pair = PAIR of ('a * 'b) + type cmdt = (string, string list) pair -> OS.Process.status + val exportFn' : (string * cmdt) -> unit = + Unsafe.CInterface.c_function "SMLNJ-RunT" "exportFn" + + fun wrapForExport (f, tq) (PAIR args) = let + val _ = ( + SMLofNJ.Internals.initSigTbl (); + Thread.reset true; + G.init(); + S.schedulerHook := pollK; + S.pauseHook := pauseK) + fun initialProc () = + OS.Process.exit(f args handle _ => OS.Process.failure) + val (cleanUp, sts) = Cont.callcc (fn doneK => ( + S.shutdownHook := doneK; + case tq of (SOME tq) => S.startTimer tq | _ => S.restartTimer(); + CU.clean CU.AtInitFn; + CML.spawn initialProc; + CML.exit ())) + in + CU.clean CU.AtExit; + G.shutdown(); + S.stopTimer(); + Thread.reset false; + sts + end + + end diff --git a/cml/src/glue/init-cleanup.sml b/cml/src/glue/init-cleanup.sml new file mode 100644 index 0000000..11cd66b --- /dev/null +++ b/cml/src/glue/init-cleanup.sml @@ -0,0 +1,18 @@ +(* init-cleanup.sml + * + * COPYRIGHT (c) 2001 Bell Labs, Lucent Technologies + *) + +structure InitCleanup : sig end = + struct + + (* Add the standard cleaners. The order here is important: I/O needs to be + * after Channels&Mailboxes, but before Servers (since server cleanup may + * depend on I/O). + *) + val _ = ( + CleanUp.addCleaner CleanUp.chanCleaner; + CleanUp.addCleaner CleanIO.ioCleaner; + CleanUp.addCleaner CleanUp.servCleaner) + + end diff --git a/cml/src/glue/new-run-cml-fn.sml b/cml/src/glue/new-run-cml-fn.sml new file mode 100644 index 0000000..fb7de37 --- /dev/null +++ b/cml/src/glue/new-run-cml-fn.sml @@ -0,0 +1,100 @@ +(* run-cml-fn.sml + * + * COPYRIGHT (c) 1996 AT&T Research. + * COPYRIGHT (c) 1989-1991 John H. Reppy + *) + +functor RunCMLFn (G : OS_GLUE) : sig + + val doit : ((unit -> unit) * Time.time option) -> OS.Process.status + + val isRunning : unit -> bool + + val shutdown : OS.Process.status -> 'a + + val exportFn : + (string * (string * string list -> OS.Process.status) * Time.time option) + -> unit + + include CML_CLEANUP + + end = struct + + structure S = Scheduler + structure Sig = Signals + structure CU = CleanUp + + open InitCleanup (* to force CM to link this module in *) + + structure E = ExportFnFn (G); + + open CU + + val runningFlg = Running.isRunning + + fun isRunning () = !runningFlg + + fun shutdown sts = if !runningFlg + then SMLofNJ.Cont.throw (! S.shutdownHook) (true, sts) + else raise Fail "CML is not running" + + (* a dummy print function, in case the user's program doesn't reference + * CML's TextIO structure directly. + *) + fun dummyPrint _ = raise Fail "print called without loading CML's TextIO" + + val interruptK : unit SMLofNJ.Cont.cont = + SMLofNJ.Cont.isolate (fn _ => shutdown OS.Process.failure) + + fun doit (initialProc, tq) = let + val saveIntHandler = Sig.inqHandler Sig.sigINT + val savePrintFn = !SMLofNJ.Internals.prHook + val _ = ( + if !runningFlg + then raise Fail "CML is already running" + else runningFlg := true; + Thread.reset true; + G.init(); + S.schedulerHook := E.pollK; + S.pauseHook := E.pauseK) + val (cleanUp, sts) = SMLofNJ.Cont.callcc (fn doneK => ( + ignore ( + Sig.setHandler (Sig.sigINT, Sig.HANDLER(fn _ => interruptK))); + S.shutdownHook := doneK; + SMLofNJ.Internals.prHook := dummyPrint; + case tq of (SOME tq) => S.startTimer tq | _ => S.restartTimer(); + CU.clean CU.AtInit; + CML.spawn initialProc; + S.dispatch())) + in + CU.clean CU.AtShutdown; + G.shutdown(); + S.stopTimer(); + Thread.reset false; + runningFlg := false; + SMLofNJ.Internals.prHook := savePrintFn; + ignore (Sig.setHandler (Sig.sigINT, saveIntHandler)); + sts + end + + type cmdt = (string, string list) E.pair -> OS.Process.status + val exportFn' : (string * cmdt) -> unit = + Unsafe.CInterface.c_function "SMLNJ-RunT" "exportFn" + + fun exportFn (fileName, main, timeQ) = ( + if !runningFlg + then raise Fail "Cannot exportFn while CML is running" + else runningFlg := true; + Signals.maskSignals Signals.MASKALL; + (* run the SML/NJ AtExportFn cleaners to eliminate space-leaks *) + SMLofNJ.Internals.CleanUp.clean SMLofNJ.Internals.CleanUp.AtExportFn; + (* strip out any unecessary stuff from the CML Cleanup state. *) + CU.exportFnCleanup (); + (* unlink the SML print function *) + SMLofNJ.Internals.prHook := (fn _ => ()); + (* unlink the perv structure *) + Unsafe.pStruct := Unsafe.NILrde; + (* now export the wrapped main function *) + exportFn' (fileName, E.wrapForExport (main, timeQ))) + + end diff --git a/cml/src/glue/os-glue-sig.sml b/cml/src/glue/os-glue-sig.sml new file mode 100644 index 0000000..5edd136 --- /dev/null +++ b/cml/src/glue/os-glue-sig.sml @@ -0,0 +1,28 @@ +(* os-glue-sig.sml + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * COPYRIGHT (c) 1989-1991 John H. Reppy + * + * This is the interface to an OS specific module that glues the various + * OS-specific scheduling operations together (i.e., timeouts, I/O, signals, + * etc...). + *) + +signature OS_GLUE = + sig + + val init : unit -> unit + (* this function is called at start-up time *) + + val pollOS : unit -> unit + (* this function is called at pre-emption points *) + + val pause : unit -> bool + (* this function is called when there is nothing else to do. It returns + * false if there are no threads blocked on OS conditions. + *) + + val shutdown : unit -> unit + (* this function is called when the system is shuting down *) + + end diff --git a/cml/src/glue/run-cml-fn.sml b/cml/src/glue/run-cml-fn.sml new file mode 100644 index 0000000..fde76e1 --- /dev/null +++ b/cml/src/glue/run-cml-fn.sml @@ -0,0 +1,100 @@ +(* run-cml-fn.sml + * + * COPYRIGHT (c) 1996 AT&T Research. + * COPYRIGHT (c) 1989-1991 John H. Reppy + *) + +functor RunCMLFn (G : OS_GLUE) : sig + + val doit : ((unit -> unit) * Time.time option) -> OS.Process.status + + val isRunning : unit -> bool + + val shutdown : OS.Process.status -> 'a + + val exportFn : + (string * (string * string list -> OS.Process.status) * Time.time option) + -> unit + + include CML_CLEANUP + + end = struct + + structure S = Scheduler + structure Sig = Signals + structure CU = CleanUp + + open InitCleanup (* to force CM to link this module in *) + + structure E = ExportFnFn (G); + + open CU + + val runningFlg = Running.isRunning + + fun isRunning () = !runningFlg + + fun shutdown sts = if !runningFlg + then SMLofNJ.Cont.throw (! S.shutdownHook) (true, sts) + else raise Fail "CML is not running" + + (* a dummy print function, in case the user's program doesn't reference + * CML's TextIO structure directly. + *) + fun dummyPrint _ = raise Fail "print called without loading CML's TextIO" + + val interruptK : unit SMLofNJ.Cont.cont = + SMLofNJ.Cont.isolate (fn _ => shutdown OS.Process.failure) + + fun doit (initialProc, tq) = let + val saveIntHandler = Sig.inqHandler Sig.sigINT + val savePrintFn = !SMLofNJ.Internals.prHook + val _ = ( + if !runningFlg + then raise Fail "CML is already running" + else runningFlg := true; + Thread.reset true; + G.init(); + S.schedulerHook := E.pollK; + S.pauseHook := E.pauseK) + val (cleanUp, sts) = SMLofNJ.Cont.callcc (fn doneK => ( + ignore ( + Sig.setHandler (Sig.sigINT, Sig.HANDLER(fn _ => interruptK))); + S.shutdownHook := doneK; + SMLofNJ.Internals.prHook := dummyPrint; + case tq of (SOME tq) => S.startTimer tq | _ => S.restartTimer(); + CU.clean CU.AtInit; + CML.spawn initialProc; + S.dispatch())) + in + CU.clean CU.AtShutdown; + G.shutdown(); + S.stopTimer(); + Thread.reset false; + runningFlg := false; + SMLofNJ.Internals.prHook := savePrintFn; + ignore (Sig.setHandler (Sig.sigINT, saveIntHandler)); + sts + end + + type cmdt = (string, string list) E.pair -> OS.Process.status + val exportFn' : (string * cmdt) -> unit = + Unsafe.CInterface.c_function "SMLNJ-RunT" "exportFn" + + fun exportFn (fileName, main, timeQ) = ( + if !runningFlg + then raise Fail "Cannot exportFn while CML is running" + else runningFlg := true; + Signals.maskSignals Signals.MASKALL; + (* run the SML/NJ AtExportFn cleaners to eliminate space-leaks *) + SMLofNJ.Internals.CleanUp.clean SMLofNJ.Internals.CleanUp.AtExportFn; + (* strip out any unecessary stuff from the CML Cleanup state. *) + CU.exportFnCleanup (); + (* unlink the SML print function *) + SMLofNJ.Internals.prHook := (fn _ => ()); + (* unlink the perv structure *) + Unsafe.pStruct := Unsafe.Object.toObject (); + (* now export the wrapped main function *) + exportFn' (fileName, E.wrapForExport (main, timeQ))) + + end diff --git a/cml/src/glue/sources.cm b/cml/src/glue/sources.cm new file mode 100644 index 0000000..5f53678 --- /dev/null +++ b/cml/src/glue/sources.cm @@ -0,0 +1,25 @@ +(* sources.cm + * + * The sources file for the glue that holds CML together. + *) + +Group + signature OS_GLUE + functor RunCMLFn +is +#if defined(NEW_CM) + ../cml-sub-basis.cm + $cml/core-cml.cm +#else + ../core-cml.cm +#endif + ../IO/sources.cm + + init-cleanup.sml + export-fn-fn.sml + os-glue-sig.sml +#if (SMLNJ_VERSION * 100 + SMLNJ_MINOR_VERSION >= 11033) + new-run-cml-fn.sml +#else + run-cml-fn.sml +#endif diff --git a/cml/src/inet-lib.cm b/cml/src/inet-lib.cm new file mode 100644 index 0000000..c78131f --- /dev/null +++ b/cml/src/inet-lib.cm @@ -0,0 +1,11 @@ +(* (C) 2011 The Fellowship of SML/NJ + * Author: matthias.blume@gmail.com (Matthias Blume) + * + * This wrapper library establishes $cml/inet-lib.cm + * an alias for the part of $cml-lib/smlnj-lib.cm + * that corresponds to $/inet-lib.cm + *) +Library + library($/inet-lib.cm) +is + $cml-lib/smlnj-lib.cm diff --git a/cml/src/smlnj-lib.cm b/cml/src/smlnj-lib.cm new file mode 100644 index 0000000..bd8ffc0 --- /dev/null +++ b/cml/src/smlnj-lib.cm @@ -0,0 +1,11 @@ +(* (C) 2011 The Fellowship of SML/NJ + * Author: matthias.blume@gmail.com (Matthias Blume) + * + * This wrapper library establishes $cml/smlnj-lib.cm + * an alias for $cml-lib/smlnj-lib.cm, but without + * the CML-specific additions. + *) +Library + library($/smlnj-lib.cm) - structure TimeLimit +is + $cml-lib/smlnj-lib.cm diff --git a/cml/src/tests/sources.cm b/cml/src/tests/sources.cm new file mode 100644 index 0000000..4851bca --- /dev/null +++ b/cml/src/tests/sources.cm @@ -0,0 +1,10 @@ +Group is +#if defined(NEW_CM) + $cml/basis.cm + $cml/cml.cm +#else + ../src/basis.cm + ../src/cml.cm +#endif + + test.sml diff --git a/cml/src/tests/test.sml b/cml/src/tests/test.sml new file mode 100644 index 0000000..52779cb --- /dev/null +++ b/cml/src/tests/test.sml @@ -0,0 +1,24 @@ +structure Test = +struct + + val prog = "/bin/ls" + + fun doit () = let + val proc = Unix.execute(prog, []) + val (fin,fout) = Unix.streamsOf proc + fun echo () = (case TextIO.inputLine fin + of "" => () + | s => (TextIO.output(TextIO.stdOut, s); echo()) + (* end case *)) + in + TextIO.closeOut fout; + echo (); + TextIO.closeIn fin; + ignore(Unix.reap proc); + () + end + + fun run () = RunCML.doit(doit, SOME(Time.fromMilliseconds 100)) + +end + diff --git a/cml/src/trace-cml.cm b/cml/src/trace-cml.cm new file mode 100644 index 0000000..eb5709b --- /dev/null +++ b/cml/src/trace-cml.cm @@ -0,0 +1,10 @@ +(* (C) 2011 The Fellowship of SML/NJ + * Author: matthias.blume@gmail.com (Matthias Blume) + * + * This wrapper library establishes $cml/trace-cml.cm as + * an alias for $cml-lib/trace-cml.cm + *) +Library + library($cml-lib/trace-cml.cm) +is + $cml-lib/trace-cml.cm diff --git a/cml/src/unix-lib.cm b/cml/src/unix-lib.cm new file mode 100644 index 0000000..03258f2 --- /dev/null +++ b/cml/src/unix-lib.cm @@ -0,0 +1,11 @@ +(* (C) 2011 The Fellowship of SML/NJ + * Author: matthias.blume@gmail.com (Matthias Blume) + * + * This wrapper library establishes $cml/unix-lib.cm + * an alias for the part of $cml-lib/smlnj-lib.cm + * that corresponds to $/unix-lib.cm. + *) +Library + library($/unix-lib.cm) +is + $cml-lib/smlnj-lib.cm diff --git a/cml/src/util/.cm/GUID/result.sml b/cml/src/util/.cm/GUID/result.sml new file mode 100644 index 0000000..9cb5e75 --- /dev/null +++ b/cml/src/util/.cm/GUID/result.sml @@ -0,0 +1 @@ +guid-$cml/(cml-internal.cm):Unix/(sources.cm):(os.cm):../util/(sources.cm):result.sml-1714016095.781 diff --git a/cml/src/util/.cm/SKEL/result.sml b/cml/src/util/.cm/SKEL/result.sml new file mode 100644 index 0000000..51f24bc --- /dev/null +++ b/cml/src/util/.cm/SKEL/result.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"CML"ad"Result"jh1ad"SV"gp1d"SyncVar"h0 \ No newline at end of file diff --git a/cml/src/util/.cm/amd64-unix/result.sml b/cml/src/util/.cm/amd64-unix/result.sml new file mode 100644 index 0000000..d92449d Binary files /dev/null and b/cml/src/util/.cm/amd64-unix/result.sml differ diff --git a/cml/src/util/result.sml b/cml/src/util/result.sml new file mode 100644 index 0000000..010205e --- /dev/null +++ b/cml/src/util/result.sml @@ -0,0 +1,34 @@ +(* result.sml + * + * COPYRIGHT (c) 1996 AT&T Research. + * + *) + +structure Result :> sig + + type 'a result + + val result : unit -> 'a result + val put : ('a result * 'a) -> unit + val putExn : ('a result * exn) -> unit + val get : 'a result -> 'a + val getEvt : 'a result -> 'a CML.event + + end = struct + + structure SV = SyncVar + + datatype 'a result_val = EXN of exn | RES of 'a + + type 'a result = 'a result_val SV.ivar + + fun result () = SV.iVar() + fun put (iv, v) = SV.iPut(iv, RES v) + fun putExn (iv, ex) = SV.iPut(iv, EXN ex) + fun wrap (RES v) = v + | wrap (EXN ex) = raise ex + fun get iv = wrap(SV.iGet iv) + fun getEvt iv = CML.wrap(SV.iGetEvt iv, wrap) + + end; + diff --git a/cml/src/util/sources.cm b/cml/src/util/sources.cm new file mode 100644 index 0000000..70fcafa --- /dev/null +++ b/cml/src/util/sources.cm @@ -0,0 +1,15 @@ +(* sources.cm + * + * COPYRIGHT (c) 1996 AT&T Research. + *) + +Group +is +#if defined (NEW_CM) + $cml/core-cml.cm +#else + ../core-cml.cm +#endif + + result.sml + diff --git a/config/MacResources/postinstall b/config/MacResources/postinstall new file mode 100755 index 0000000..4e09e4c --- /dev/null +++ b/config/MacResources/postinstall @@ -0,0 +1,4 @@ +#!/bin/sh + +cd $2 +config/install.sh nolib diff --git a/config/MacResources/postupgrade b/config/MacResources/postupgrade new file mode 100755 index 0000000..4e09e4c --- /dev/null +++ b/config/MacResources/postupgrade @@ -0,0 +1,4 @@ +#!/bin/sh + +cd $2 +config/install.sh nolib diff --git a/config/_arch-n-opsys b/config/_arch-n-opsys new file mode 100644 index 0000000..c251c69 --- /dev/null +++ b/config/_arch-n-opsys @@ -0,0 +1,204 @@ +#!@SHELL@ +# +# .arch-n-opsys [-32 | -64] -- get architecture and system info +# +# Running `eval .arch-n-opsys` will define the following shell variables: +# +# ARCH -- one of ppc, sparc, or x86 +# OPSYS -- one of aix, cygwin, darwin, freebsd, linux, mklinux, netbsd, +# sunos, solaris, win32 +# HEAP_SUFFIX -- usually $ARCH-$OPSYS, but in some cases the OPSYS is replaced +# by $HEAP_OPSYS +# + +export PATH +PATH="/bin:/usr/bin" + +# the default size; this is set by the config/install.sh script +# +SIZE=@SIZE@ + +# check for word-size override +# +case x"$1" in + x-32) SIZE=32 ;; + x-64) SIZE=64 ;; + *) ;; +esac + +# pick_arch arch32 arch64 +# +pick_arch() { + if [ $SIZE = 32 ] ; then + echo $1 + else + echo $2 + fi +} + +case `uname -s` in + SunOS) + case `uname -r` in + 4.*) + OPSYS=sunos + case `/usr/bin/arch` in + sun4) ARCH=sparc;; + *) exit 1;; + esac + ;; + 5.*) + OPSYS=solaris + case `uname -p` in + sparc) ARCH=sparc;; + *86) ARCH=x86;; + *) exit 1;; + esac + ;; + *) exit 1;; + esac + ;; + AIX) + OPSYS=aix + ARCH=$(pick_arch ppc ppc64) + ;; + Darwin) + case `uname -p` in + powerpc) + ARCH=$(pick_arch ppc ppc64) + case `uname -r` in + 8*) OPSYS=darwin; HEAP_OPSYS=darwin ;; # MacOS X 10.4 Tiger + 9*) OPSYS=darwin; HEAP_OPSYS=darwin ;; # MacOS X 10.5 Leopard + 10*) OPSYS=darwin; HEAP_OPSYS=darwin ;; # MacOS X 10.6 Snow Leopard + *) exit 1;; + esac;; + i386) + ARCH=$(pick_arch x86 amd64) + REQUIRE_64BIT=no + OPSYS=darwin + HEAP_OPSYS=darwin + case `uname -r` in + 9*) HEAP_OPSYS=darwinz ;; # MacOS X 10.5 Leopard + 10*) ;; # MacOS X 10.6 Snow Leopard + 11*) ;; # MacOS X 10.7 Lion + 12*) ;; # MacOS X 10.8 Mountain Lion + 13*) ;; # MacOS X 10.9 Mavericks + 14*) ;; # MacOS X 10.10 Yosemite + 15*) ;; # MacOS X 10.11 El Capitan + 16*) ;; # macOS 10.12 Sierra + 17*) ;; # macOS 10.13 High Sierra + 18*) ;; # macOS 10.14 Mojave + 19*) REQUIRE_64BIT=yes ;; # macOS 10.15 Catalina + 20*) REQUIRE_64BIT=yes ;; # macOS 11 Big Sur + 21*) REQUIRE_64BIT=yes ;; # macOS 12 Monterey + 22*) REQUIRE_64BIT=yes ;; # macOS 13 Ventura + 23*) REQUIRE_64BIT=yes ;; # macOS 14 Sonoma + *) exit 1 ;; + esac + if [ x"$REQUIRE_64BIT" = xyes -a $SIZE = 32 ] ; then + # only 64-bit executables are supported in recent macOS versions + exit 1; + fi + ;; + arm) + # we use Rosetta since we do not have native arm64 support in + # the legacy version of SML/NJ + ARCH="amd64" + OPSYS=darwin; + HEAP_OPSYS=darwin + ;; + esac + ;; + Linux) + OPSYS=linux + case `uname -m` in + *86) + ARCH=x86 + # version 4.9 is the oldest "supported" version of Linux, so we + # only go back to the 3.x versions + case `uname -r` in + 3.*) ;; # 2011 -- 2015 + 4.*) ;; # 2015 -- 2018 + 5.*) ;; # 2019 -- 2022 + 6.*) ;; # 2022 -- + *) exit 1 ;; + esac + ;; + x86_64) + ARCH=$(pick_arch x86 amd64) + ;; + ppc) + ARCH=$(pick_arch ppc ppc64) + case `uname -r` in + *osfmach*) OPSYS=mklinux ;; + *) ;; + esac + ;; + *) exit 1;; + esac + ;; + FreeBSD) + OPSYS=freebsd + HEAP_OPSYS=bsd + case `uname -m` in + *86) ARCH=x86 ;; + x86_64) ARCH=$(pick_arch x86 amd64) ;; + amd64) ARCH=$(pick_arch x86 amd64) ;; + *) exit 1 ;; + esac + ;; + NetBSD) + case `uname -r` in + 1.*) exit 1 ;; + 2.*) exit 1 ;; + *) OPSYS=netbsd ;; + esac + HEAP_OPSYS=bsd + case `uname -p` in + *86) ARCH=x86;; + x86_64) ARCH=$(pick_arch x86 amd64) ;; + powerpc) ARCH=ppc;; + sparc) ARCH=sparc;; + *) exit 1;; + esac + ;; + OpenBSD) + OPSYS=openbsd + HEAP_OPSYS=bsd + case `uname -p` in + *86) ARCH=x86;; + x86_64) ARCH=$(pick_arch x86 amd64) ;; + powerpc) ARCH=ppc;; + *) exit 1;; + esac + ;; + Windows_NT) + OPSYS=win32 + case `uname -m` in + *86) ARCH=x86;; + *) exit 1;; + esac + ;; + CYGWIN_NT*) + # If the environment variable SMLNJ_WINDOWS_RUNTIME is defined, + # then we use Win32 as the runtime environment. + if [ "$SMLNJ_WINDOWS_RUNTIME" != "" ]; then + OPSYS=win32 + else + OPSYS=cygwin + fi + case `uname -m` in + *86) ARCH=x86;; + x86_64) ARCH=$(pick_arch x86 amd64) ;; + *) exit 1;; + esac + ;; + *) exit 1;; +esac + +if [ "$HEAP_OPSYS" = "" ]; then + HEAP_SUFFIX="$ARCH-$OPSYS" +else + HEAP_SUFFIX="$ARCH-$HEAP_OPSYS" +fi + +echo "ARCH=$ARCH; OPSYS=$OPSYS; HEAP_SUFFIX=$HEAP_SUFFIX" diff --git a/config/_heap2exec b/config/_heap2exec new file mode 100644 index 0000000..93f1fed --- /dev/null +++ b/config/_heap2exec @@ -0,0 +1,185 @@ +#!@SHELL@ +# +# COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) +# All rights reserved. +# +# usage: heap2exec [ ] heapfile execfile +# +# where the options are +# +# -static +# -linkwith-a -- use static linking on Linux/FreeBSD systems +# +# -dynamic +# -linkwith-so -- use dynamic linking on Linux/FreeBSF systems +# + +CMD=`basename "$0"` + +usage() { + echo "usage: $CMD [ -32 | -64 ] [ ] " + echo " where is one of" + echo " -static, --linkwith-a -- use static linking (Linux/FreeBSD)" + echo " -dynamic, --linkwith-so -- use dynamic linking (Linux/FreeBSD)" + exit 1 +} + +die () { + echo "${CMD}: $1" + exit 1 +} + +# the default size; this is set by the config/install.sh script +# +SIZE=@SIZE@ +if [ x"$1" = x-32 ] ; then + SIZE=32 ; shift +elif [ x"$1" = x-64 ] ; then + SIZE=64 ; shift +fi +SIZE_OPT="-"$SIZE + +if [ x${SMLNJ_HOME} = x ] ; then + BIN_DIR="@BINDIR@" +else + BIN_DIR=${SMLNJ_HOME}/bin +fi + +ARCH_N_OPSYS=`"$BIN_DIR/.arch-n-opsys" $SIZE_OPT` +if [ "$?" != "0" ]; then + die "unable to determine architecture/operating system" +fi +eval $ARCH_N_OPSYS + +RUNX=${BIN_DIR}/.run/runx.${ARCH}-${OPSYS} +RUN_SO=${BIN_DIR}/.run/run.${ARCH}-${OPSYS}.so +RUN_A=${BIN_DIR}/.run/run.${ARCH}-${OPSYS}.a +H2A=${BIN_DIR}/heap2asm + +FORMAT= +if [ $# != 2 ] ; then + if [ $# = 3 ] ; then + case $1 in + --linkwith-exec) + FORMAT=exec + ;; + -dynamic|--linkwith-so) + FORMAT=so + ;; + -static|--linkwith-a) + FORMAT=a + ;; + *) + usage + ;; + esac + shift + else + usage + fi +else + case ${OPSYS} in + darwin) + FORMAT=exec + ;; + freebsd|linux) + FORMAT=a + ;; + *) + die "no default runtime link format known for ${OPSYS}" + ;; + esac +fi +if [ -z "$FORMAT" ] ; then + die "no runtime object format specified" +fi + +heapfile=$1 +execfile=$2 + +CC=cc +LD=ld + +EXEC_PROG= +EXEC_FLAGS= +EXEC_LIBS= +SO_PROG= +SO_FLAGS= +SO_LIBS= +A_PROG= +A_FLAGS= +A_LIBS= + +case ${OPSYS} in + darwin) + EXEC_PROG=${LD} + EXEC_LIBS=-lc + ;; + freebsd) + SO_PROG=${CC} + SO_FLAGS=-Wl,--export-dynamic + SO_LIBS=-lm + A_PROG=${CC} + A_FLAGS=-Wl,--export-dynamic + A_LIBS=-lm + ;; + linux) + SO_PROG=${CC} + SO_FLAGS=-Wl,--export-dynamic + A_PROG=${CC} + A_FLAGS=-Wl,--export-dynamic + A_LIBS="-lm -ldl" + ;; + *) + ;; +esac + +if [ ! -f $H2A ]; then + echo "${CMD}: heap2asm is not installed" + exit 2 +fi + +# +# TODO: it would be better to check the linking command etc before +# running heap2asm, since that can take a long time +# + +RESULT=0 +if ${H2A} $SIZE_OPT "$heapfile" "$execfile".s ; then + if [ -f "$execfile".s ] ; then + if ${CC} -c -o "$execfile".o "$execfile".s ; then + rm "$execfile".s + else + rm "$execfile".s + die "${execfile}.o creation failed" + fi + else + die "${execfile}.s creation failed" + fi + if [ "$FORMAT" = exec -a -f "${RUNX}" ] ; then + [ -z "${EXEC_PROG}" ] && die "no linker specified for runtime format $FORMAT" + ${EXEC_PROG} ${EXEC_FLAGS} -o "$execfile" ${RUNX} "$execfile".o ${EXEC_LIBS} + RESULT=$? + elif [ "$FORMAT" = so -a -f "${RUN_SO}" ] ; then + [ -z "${SO_PROG}" ] && die "no linker specified for runtime format $FORMAT" + ${SO_PROG} ${SO_FLAGS} -o "$execfile" ${RUN_SO} "$execfile".o ${SO_LIBS} + RESULT=$? + elif [ "$FORMAT" = a -a -f "${RUN_A}" ] ; then + [ -z "${A_PROG}" ] && die "no linker specified for runtime format $FORMAT" + ${A_PROG} ${A_FLAGS} -o "$execfile" ${RUN_A} "$execfile".o ${A_LIBS} + RESULT=$? + else + echo "${CMD}: linkable runtime system object ($FORMAT) not available" + rm "$execfile".o + exit 2 + fi + rm "$execfile".o +else + die "heap2asm failed" +fi + +if [ $RESULT != 0 ] ; then + die "linking failed with return code $RESULT" +fi + +exit 0 diff --git a/config/_link-sml b/config/_link-sml new file mode 100644 index 0000000..ae6a558 --- /dev/null +++ b/config/_link-sml @@ -0,0 +1,117 @@ +#!@SHELL@ +# +# Copyright 2019 The Fellowship of SML/NJ (http://www.smlnj.org) +# All rights reserved. +# +# The standard "link" script for SML/NJ +# + +# +# for /bin/ksh, disable reading user's environment file +# +unset ENV + +# the default size; this is set by the config/install.sh script +# +SIZE_OPT="-"@SIZE@ + +RUN="" +BOOT="" +HEAP="sml" + +# +# Process command line arguments +# +while [ "$#" != "0" ]; do + arg=$1 + case "$arg" in + -32) SIZE_OPT=$arg ; shift ;; + -64) SIZE_OPT=$arg ; shift ;; + @SMLrun=*) + RUN=`echo "$arg" | sed 's/@SMLrun=//'` + shift + ;; + @SMLboot=*) + BOOT=`echo "$arg" | sed 's/@SMLboot=//'` + shift + ;; + @SMLheap=*) + HEAP=`echo "$arg" | sed 's/@SMLheap=//'` + shift + ;; + @SMLversion) + echo "$CMD @VERSION@" + exit 0 + ;; + *) + break + ;; + esac +done + +############################################################################# +# +# BEGIN SITE SPECIFIC STUFF +# +############################################################################# + +# +# SITE SPECIFIC CONFIGURATION INFO +# + +# the path of the directory in which executables (like this file) are kept. +if [ x"$SMLNJ_HOME" = x ] ; then + BIN_DIR="@BINDIR@" + if [ ! -d "$BIN_DIR" ]; then + cmddir=`dirname $0` + case "$cmddir" in + /* ) BIN_DIR="$cmddir";; + * ) BIN_DIR=`cd $cmddir; pwd` ;; + esac + fi +else + if [ x"$CM_PATHCONFIG" = x ] ; then + CM_PATHCONFIG="$SMLNJ_HOME"/lib/pathconfig + export CM_PATHCONFIG + fi + BIN_DIR="$SMLNJ_HOME"/bin +fi + +# the path of the directory in which the runtime system executables are kept. +RUN_DIR=$BIN_DIR/.run + +# +# the following could be replaced with some site specific code +# +ARCH_N_OPSYS=`"$BIN_DIR/.arch-n-opsys" $SIZE_OPT` +if [ "$?" != "0" ]; then + echo "$CMD: unable to determine architecture/operating system" + exit 1 +fi +eval $ARCH_N_OPSYS + +############################################################################# +# +# END SITE SPECIFIC STUFF +# +############################################################################# + +# if the runtime was not specified, use the default +if [ x"$RUN" = x ]; then + RUN="$RUN_DIR/run.$ARCH-$OPSYS" +fi + +if [ ! -x "$RUN" ]; then + echo "$CMD: cannot find runtime system $RUN" + exit 1 +fi + +if [ "$BOOT" = "" ]; then + echo "@SMLboot= must be specified." + exit 1 +fi + +# +# run the sucker! +# +exec "$RUN" @SMLboot="$BOOT" @SMLheap="$HEAP" "$@" diff --git a/config/_ml-build b/config/_ml-build new file mode 100644 index 0000000..7146bcb --- /dev/null +++ b/config/_ml-build @@ -0,0 +1,117 @@ +#!@SHELL@ +# +# COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) +# All rights reserved. +# +# usage: ml-build [-S setup] root-group [main-function [heapfile]] +# + +if [ x${SMLNJ_HOME} = x ] ; then + BIN_DIR="@BINDIR@" + LIB_DIR="@LIBDIR@" +else + BIN_DIR=${SMLNJ_HOME}/bin + LIB_DIR=${SMLNJ_HOME}/lib +fi + +if [ x"$CM_PATHCONFIG" = x ] ; then + CM_PATHCONFIG=${LIB_DIR}/pathconfig + export CM_PATHCONFIG +fi + +SML=$BIN_DIR/sml +LINK=$BIN_DIR/.link-sml + +thisscript=$0 + +xx=$$ +smlfile=$xx-export.sml +cmfile=$xx-export.cm +listfile=$xx-BOOTLIST +linkargsfile=$xx-LINKARGS + +dulist='' + +trap 'rm -rf $smlfile $cmfile $listfile $linkargsfile @CMDIRARC@/*/$smlfile' 0 1 2 3 15 + +usage() { + echo ${thisscript}: $* + echo Usage: $thisscript '[-32 | -64] [-S setup] root-group [main-function [heapfile]]' + exit 1 +} + +# the default size; this is set by the config/install.sh script +# +SIZE_OPT="-"@SIZE@ +setup= + +while [ $# != 0 ] ; do + case $1 in + -32) SIZE_OPT="-32"; shift ;; + -64) SIZE_OPT="-64"; shift ;; + -D*|-U*|-C*) + dulist="$dulist $1" + shift + ;; + -S) + shift + if [ $# = 0 ] ; then + usage missing argument for -S + fi + setup=$1 + shift + ;; + *) + break + ;; + esac +done + +if [ $# = 4 ] ; then + # assume that first argument is setup; e.g., "$smlnj-tdp/back-trace.cm" + if [ x"$setup" != x ] ; then + usage setup was already specified + fi + setup=$1 + root=$2 + main=$3 + heap=$4 +elif [ $# = 3 ] ; then + root=$1 + main=$2 + heap=$3 +elif [ $# = 2 ] ; then + root=$1 + main=$2 + heap=`basename "$root" .cm` +elif [ $# = 1 ] ; then + root=$1 + # quick hack for now: + main=Test.main + heap=`basename "$root" .cm` +else + usage no CM description file specified +fi + +rare=XYZ_XXX_0123 + +cat >$smlfile <$cmfile <$mlscript < ignore (OS.Process.exit OS.Process.failure) + | SOME l => let + val s = TextIO.openOut "$depends" + fun pr { derived = true, file, class } = () + | pr { file, ... } = TextIO.output (s, " \\\\\\n " ^ file) + in + TextIO.output (s, "${delims}\\n${target}:"); + app pr l; + TextIO.output (s, "\\n${delime}\\n"); + TextIO.closeOut s; + ignore (OS.Process.exit OS.Process.success) + end +end +stop + +if $SML $SIZE_OPT '$smlnj/cm.cm' $dulist $mlscript ; then + # + # remove previous result of ml-makedepend + # (other cmfile/target combinations are unaffected) + # + awk "BEGIN { c = 1; s = \"${delims}\"; e = \"${delime}\"; } + (\$0 == s) { c = 0; next; } + (\$0 == e) { c = 1; next; } + (c == 1) { print }" <$mf >$tmpmf + cat $tmpmf $depends >$mf + rm $tmpmf $depends +else + echo $thisscript: CM dependency analysis failed + exit 1 +fi + +exit 0 diff --git a/config/_run-sml b/config/_run-sml new file mode 100644 index 0000000..8c5ddfa --- /dev/null +++ b/config/_run-sml @@ -0,0 +1,265 @@ +#!@SHELL@ +# +# Copyright 2019 The Fellowship of SML/NJ (http://www.smlnj.org) +# All rights reserved. +# +# The standard driver for SML/NJ under the new runtime system +# + +CMD=`basename "$0"` + +# +# for /bin/ksh, disable reading user's environment file +# +unset ENV + +############################################################################# +# +# BEGIN SITE SPECIFIC STUFF +# +############################################################################# + +# +# SITE SPECIFIC CONFIGURATION INFO +# + +# On cygwin, make sure SMLNJ_HOME is given a POSIX-style pathname. +if [ x"$SMLNJ_HOME" != x ] ; then + if [ x"$SMLNJ_CYGWIN_RUNTIME" != x ] ; then + if [ "`uname -o`" = "Cygwin" ] ; then + # Convert to Unix style, absolute pathname. + SMLNJ_HOME=`cygpath -u -a "$SMLNJ_HOME"` + fi + fi +fi + +# the path of the directory in which executables (like this file) are kept. +if [ x"$SMLNJ_HOME" = x ] ; then + BIN_DIR="@BINDIR@" + if [ ! -d "$BIN_DIR" ]; then + cmddir=`dirname "$0"` + case "$cmddir" in + /* ) BIN_DIR="$cmddir";; + * ) BIN_DIR=`cd $cmddir; pwd` ;; + esac + fi +else + if [ x"$CM_PATHCONFIG" = x ] ; then + CM_PATHCONFIG=${SMLNJ_HOME}/lib/pathconfig + export CM_PATHCONFIG + fi + BIN_DIR=${SMLNJ_HOME}/bin +fi + +# the path of the directory in which the runtime system executables are kept. +RUN_DIR=$BIN_DIR/.run + +# the path of the directory in which the heap images are kept. +HEAP_DIR=$BIN_DIR/.heap + +############################################################################# +# +# END SITE SPECIFIC STUFF +# +############################################################################# + +# special shortcut for frequent use (and for Linux' binfmt) +if [ `basename "$0"` = sml ] ; then + case "$1" in + /* | ./* | ../* ) + case "$1" in + *.cm | *.sml | *.sig | *.fun ) + ;; + * ) + HEAP="@SMLload=$1" + shift + ;; + esac + ;; + esac +fi + +ALLOC="" + +# the default size; this is set by the config/install.sh script +# +SIZE_OPT="-"@SIZE@ + +# if "yes" then we report the heap suffix and quit +# +REPORT_SUFFIX=no + +# +# Process command line arguments +# +while [ "$#" != "0" ]; do + arg=$1 + case "$arg" in + -32) shift; SIZE_OPT=$arg ;; + -64) shift; SIZE_OPT=$arg ;; + @SMLrun=*) + shift + RUN=`echo $arg | sed 's/@SMLrun=//'` + ;; + @SMLload=*) + shift + HEAP=$arg + ;; + @SMLappl) + shift + if [ "$#" = "0" ]; then + echo "$CMD: missing argument for @SMLappl option" + exit 1 + fi + APPL=$1 + shift + ;; + @SMLversion) + echo "$CMD @VERSION@" + exit 0 + ;; + @SMLwordsize) + case $SIZE_OPT in + -32) echo "32" ;; + -64) echo "64" ;; + esac + exit 0 + ;; + @SMLsuffix) + shift + REPORT_SUFFIX=yes + ;; + @SMLalloc=*) + shift + ALLOC=$arg + ;; + *) + break + ;; + esac +done + +# +# the following could be replaced with some site specific code +# +ARCH_N_OPSYS=`"$BIN_DIR/.arch-n-opsys" $SIZE_OPT` +if [ "$?" != "0" ]; then + echo "$CMD: unable to determine architecture/operating system" + exit 1 +fi +eval $ARCH_N_OPSYS + +# if the `@SMLsuffix` option was specified, then report the suffix and quit +# +if [ $REPORT_SUFFIX = yes ] ; then + echo $HEAP_SUFFIX + exit 0 +fi + +# +# Try to figure out the CPU's cache size and set the allocation area +# size accordingly. This is majorly important for Celeron systems +# which suffer badly when the allocation area is too big. +# +if [ "$ALLOC" = "" ] ; then + if [ -f /proc/cpuinfo ] ; then + # "head" is called to make sure we consider only one matching line. + # (On linux SMP systems there is more than one such line.) + cache=`grep -F 'cache size' lib +# lib +# ulib +# ulib +# anchor

    +# libanchor

    +# prog ... +# dprog ... +# config +# src +# +# Short explanation: +# +# The tag defines the scope of the command. It should be a name +# used in the config/targets file. +# +# "lib" (library) and "ulib" (library for Unix-like systems only): +# : "anchor" -- the anchor name currently used by the library +# to be registered for compilation +# : "alternative anchor" -- optional alternative anchor name which is +# to be used once the library is in its final location +# (this must be used if "anchor" is already bound +# and used for other libraries which come from the +# bootfile bundle), +# : "relative name" -- path to library's .cm file relative to anchor +# (standard syntax) +# : "dir" -- directory name that anchor should be bound to, +# name is relative to smlnjroot and in standard syntax +# +# "anchor" (assign path name to anchor) and +# "libanchor" (assign path name relative to lib dir to anchor) +# : "anchor" -- the anchor name to be defined +#

    : "path" -- the path name that is the value +# +# "prog" (install "standalone" program, e.g., ml-yacc, etc.) and +# "dprog" (install standalone program, but defer this action until all +# libraries are installed) +# These have 0, 1, 2, or 3 arguments: +# {prog|dprog} +# : "target" -- name of program; this is the same as the basename +# of the heap image to be generated as well as the +# final arc of the source tree's directory name +# if is not given, it is taken to be identical to the module name +# : "optional heap directory" -- optional subdirectory where the +# build command drops the heap image; default is "-", which means +# no subdirectory is given (i.e., the heap image winds up +# in the toplevel directory of the respective program's source tree) +# : "dir" -- directory relative to installation root where source +# tree for this program resides; by default, this is taken to be +# identical to the module name +# +# "config" (configure a module) has one argument, which is the directory +# relative to the installation root where the config.sh (resp. config.bat) +# script can be found. +# +# "src" (register a source package that needs to be unpacked but requires +# no further action) +# + +# modules that don't require installer actions other than +# unpacking their respective source trees: +runtime src +compiler src +smlnj-c src +cm src +system src +doc src + +# Backwards compatible views of the SML Basis Library +# +old-basis lib basis-2004.cm basis-2004.cm base/old-basis/2004 + +# Addional components of the SML/NJ library that are not used by the compiler: +# (unix-lib.cm is installed only on Unix-like systems) +smlnj-lib ulib unix-lib.cm unix-lib.cm smlnj-lib/Unix +smlnj-lib lib hash-cons-lib.cm hash-cons-lib.cm smlnj-lib/HashCons +smlnj-lib lib html-lib.cm html-lib.cm smlnj-lib/HTML +smlnj-lib lib html4-lib.cm html4-lib.cm smlnj-lib/HTML4 +smlnj-lib lib inet-lib.cm inet-lib.cm smlnj-lib/INet +smlnj-lib lib json-lib.cm json-lib.cm smlnj-lib/JSON +smlnj-lib lib pp-extras-lib.cm pp-extras-lib.cm smlnj-lib/PP +smlnj-lib lib regexp-lib.cm regexp-lib.cm smlnj-lib/RegExp +smlnj-lib lib reactive-lib.cm reactive-lib.cm smlnj-lib/Reactive +smlnj-lib lib sexp-lib.cm sexp-lib.cm smlnj-lib/SExp +smlnj-lib lib uuid-lib.cm uuid-lib.cm smlnj-lib/UUID +smlnj-lib lib xml-lib.cm xml-lib.cm smlnj-lib/XML + +# Concurrent ML: +cml lib cml core-cml.cm cml/src +cml lib cml cml-internal.cm cml/src +cml lib cml cml.cm cml/src +cml lib cml basis.cm cml/src + +# Concurrent ML support libraries: +cml-lib lib cml-lib trace-cml.cm cml/cml-lib/cm-descr +cml-lib lib cml-lib smlnj-lib.cm cml/cml-lib/cm-descr +cml-lib lib cml trace-cml.cm cml/src +cml-lib lib cml smlnj-lib.cm cml/src +cml-lib lib cml inet-lib.cm cml/src +cml-lib ulib cml unix-lib.cm cml/src +cml-lib lib cml cml-lib.cm cml/src + +# eXene toolkit: +eXene lib eXene.cm eXene.cm eXene + +# C Kit: +ckit lib ckit-lib.cm ckit-lib.cm ckit/src + +# NLFFI foreign function interface library +ml-nlffi-lib lib c memory/memory.cm nlffi/lib +ml-nlffi-lib lib c internals/c-int.cm nlffi/lib +ml-nlffi-lib lib c c.cm nlffi/lib + +# portable dependency graph library: +pgraph-util lib pgraph-util.cm pgraph-util.cm pgraph + +# tracing/debugging/profiling: +tdp-util lib smlnj-tdp plugins.cm trace-debug-profile +tdp-util lib smlnj-tdp back-trace.cm trace-debug-profile +tdp-util lib smlnj-tdp coverage.cm trace-debug-profile + +# MLRISC libraries (those that are not already included in the compiler): +mlrisc libanchor Control.cm SMLNJ-MLRISC +mlrisc libanchor Lib.cm SMLNJ-MLRISC +mlrisc libanchor Visual.cm SMLNJ-MLRISC +mlrisc libanchor MLRISC.cm SMLNJ-MLRISC +mlrisc libanchor MLTREE.cm SMLNJ-MLRISC +mlrisc libanchor Graphs.cm SMLNJ-MLRISC +mlrisc libanchor IA32.cm SMLNJ-MLRISC +mlrisc libanchor AMD64.cm SMLNJ-MLRISC +mlrisc libanchor SPARC.cm SMLNJ-MLRISC + +#mlrisc libanchor StagedAlloc.cm SMLNJ-MLRISC +#mlrisc libanchor CCall.cm SMLNJ-MLRISC +#mlrisc libanchor CCall-x86-64.cm SMLNJ-MLRISC +#mlrisc libanchor CCall-x86.cm SMLNJ-MLRISC +#mlrisc libanchor CCall-sparc.cm SMLNJ-MLRISC +#mlrisc libanchor CCall-Vararg.cm SMLNJ-MLRISC +#mlrisc libanchor CCall-VarargCall.cm SMLNJ-MLRISC +#mlrisc libanchor CCall-VarargInterp.cm SMLNJ-MLRISC + +mlrisc anchor RA.cm MLRISC/cm +#mlrisc anchor SPARC.cm MLRISC/cm +mlrisc anchor Peephole.cm MLRISC/cm +mlrisc anchor StagedAlloc.cm MLRISC/cm +#mlrisc anchor IA32.cm MLRISC/cm +mlrisc anchor AMD64.cm MLRISC/cm +mlrisc anchor CCall.cm MLRISC/cm +mlrisc anchor CCall-x86-64.cm MLRISC/cm +mlrisc anchor CCall-x86.cm MLRISC/cm +mlrisc anchor CCall-sparc.cm MLRISC/cm +mlrisc anchor CCall-Vararg.cm MLRISC/cm +mlrisc anchor CCall-VarargCall.cm MLRISC/cm +mlrisc anchor CCall-VarargInterp.cm MLRISC/cm + +mlrisc lib OTHER-MLRISC RA.cm MLRISC/cm SMLNJ-MLRISC +mlrisc lib OTHER-MLRISC Peephole.cm MLRISC/cm SMLNJ-MLRISC +#mlrisc lib OTHER-MLRISC IA32.cm MLRISC/cm SMLNJ-MLRISC +mlrisc lib OTHER-MLRISC IA32-Peephole.cm MLRISC/cm SMLNJ-MLRISC +mlrisc lib OTHER-MLRISC AMD64.cm MLRISC/cm SMLNJ-MLRISC +mlrisc lib OTHER-MLRISC AMD64-Peephole.cm MLRISC/cm SMLNJ-MLRISC +#mlrisc lib OTHER-MLRISC SPARC.cm MLRISC/cm SMLNJ-MLRISC +mlrisc lib OTHER-MLRISC StagedAlloc.cm MLRISC/cm SMLNJ-MLRISC +mlrisc lib OTHER-MLRISC CCall.cm MLRISC/cm SMLNJ-MLRISC +mlrisc lib OTHER-MLRISC CCall-x86-64.cm MLRISC/cm SMLNJ-MLRISC +mlrisc lib OTHER-MLRISC CCall-x86.cm MLRISC/cm SMLNJ-MLRISC +mlrisc lib OTHER-MLRISC CCall-sparc.cm MLRISC/cm SMLNJ-MLRISC +mlrisc lib OTHER-MLRISC CCall-Vararg.cm MLRISC/cm SMLNJ-MLRISC + +# libraries supporting MLRISC tools: +mlrisc-tools lib mlrisc-tools pp.cm MLRISC/Tools +mlrisc-tools lib mlrisc-tools source-map.cm MLRISC/Tools +mlrisc-tools lib mlrisc-tools sml-ast.cm MLRISC/Tools +mlrisc-tools lib mlrisc-tools prec-parser.cm MLRISC/Tools +mlrisc-tools lib mlrisc-tools parser.cm MLRISC/Tools +mlrisc-tools lib mlrisc-tools match-compiler.cm MLRISC/Tools + +# old lexer generator: +ml-lex prog +ml-lex-mllex-tool lib mllex-tool.cm mllex-tool.cm ml-lex/tool +ml-lex-lex-ext lib lex-ext.cm lex-ext.cm ml-lex/tool + +# LALR(1) parser generator: +ml-yacc prog ml-yacc src +ml-yacc lib mlyacc-tool.cm mlyacc-tool.cm ml-yacc/tool +ml-yacc-grm-ext lib grm-ext.cm grm-ext.cm ml-yacc/tool + +# new (unicode-capable) lexer generator: +ml-ulex dprog ml-ulex - ml-lpt/ml-ulex +ml-ulex lib ml-ulex-tool.cm ml-ulex-tool.cm ml-lpt/ml-ulex/tool +ml-ulex-mllex-tool lib mllex-tool.cm mllex-tool.cm ml-lpt/ml-ulex/tool +ml-ulex-lex-ext lib lex-ext.cm lex-ext.cm ml-lpt/ml-ulex/tool + +# LL(k) parser generator: +ml-antlr dprog ml-antlr - ml-lpt/ml-antlr +ml-antlr lib ml-antlr-tool.cm ml-antlr-tool.cm ml-lpt/ml-antlr/tool +ml-antlr-grm-ext lib grm-ext.cm grm-ext.cm ml-lpt/ml-antlr/tool + +# support library for ml-ulex and ml-antlr: +ml-lpt-lib lib ml-lpt-lib.cm ml-lpt-lib.cm ml-lpt/lib + +# ASDL generator and support library +asdl config asdl +asdl dprog asdlgen - asdl/src/asdlgen +asdl lib asdlgen-tool.cm asdlgen-tool.cm asdl/tool +asdl lib asdl-ext.cm asdl-ext.cm asdl/tool +asdl lib asdl-lib.cm asdl-lib.cm asdl/src/lib/sml + +# bottom-up rewrite code generator generator: +ml-burg prog +ml-burg lib mlburg-tool.cm mlburg-tool.cm ml-burg/tool +ml-burg lib burg-ext.cm burg-ext.cm ml-burg/tool + +# utility program for use by heap2exec: +heap2asm prog + +# NLFFI foreign function interface generator: +ml-nlffigen dprog ml-nlffigen - nlffi/gen +nowhere dprog nowhere - MLRISC/Tools/nowhere diff --git a/config/chk-global-names.sh b/config/chk-global-names.sh new file mode 100755 index 0000000..616589a --- /dev/null +++ b/config/chk-global-names.sh @@ -0,0 +1,32 @@ +#!/bin/sh +# +# COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) +# All rights reserved. +# +# Check to see if "_" is prepended to global names in the symbol table. +# + +CC=${CC:-cc} + +TMP_FILE=/tmp/smlConfig-$$ +TMP_FILE_C=$TMP_FILE.c + +WITNESS="w3E_4Ew3E_4Rrr_56TtT" + +cat > $TMP_FILE_C < +# +# the architecture for the boot files +# +# the operating system (win32 or unix) +# + +here=`pwd` +path_to_me=`dirname $0` + +CONFIGDIR=$here/config + +complain() { + echo "$@" + exit 1 +} + +if [ $# -ne 2 ] ; then + complain "usage: config/download.sh " +fi +ARCH=$1 +OS=$2 +BOOT_ARCHIVE=boot.$ARCH-$OS + +export VERSION=`cat "$CONFIGDIR/version"` + +# extract list of targets +# +TARGETS=$(grep ^request $CONFIGDIR/targets | sed -e 's/request \(.*\)/\1/') +if [ x"$TARGETS" = x ] ; then + complain "empty target list" +fi + +# determine list of files to download from targets +# +DOWNLOADS="$BOOT_ARCHIVE" +for tgt in $TARGETS ; do + case $tgt in + src-smlnj) DOWNLOADS="$DOWNLOADS compiler.tgz" ;; + ml-ulex) DOWNLOADS="$DOWNLOADS ml-lpt" ;; + ml-ulex-mllex-tool) DOWNLOADS="$DOWNLOADS ml-lpt" ;; + ml-lex-mllex-tool) DOWNLOADS="$DOWNLOADS ml-lex" ;; + ml-lex-lex-ext) DOWNLOADS="$DOWNLOADS ml-lex" ;; + ml-yacc-grm-ext) DOWNLOADS="$DOWNLOADS ml-yacc" ;; + ml-antlr) DOWNLOADS="$DOWNLOADS ml-lpt" ;; + ml-antlr-grm-ext) DOWNLOADS="$DOWNLOADS ml-lpt" ;; + ml-lpt-lib) DOWNLOADS="$DOWNLOADS ml-lpt" ;; + pgraph-util) DOWNLOADS="$DOWNLOADS pgraph" ;; + tdp-util) DOWNLOADS="$DOWNLOADS trace-debug-profile" ;; + cml-lib) DOWNLOADS="$DOWNLOADS cml" ;; + mlrisc) DOWNLOADS="$DOWNLOADS MLRISC" ;; + mlrisc-tools) DOWNLOADS="$DOWNLOADS MLRISC" ;; + ml-nlffi-lib) DOWNLOADS="$DOWNLOADS nlffi" ;; + ml-nlffigen) DOWNLOADS="$DOWNLOADS nlffi" ;; + nowhere) DOWNLOADS="$DOWNLOADS MLRISC" ;; + *) DOWNLOADS="$DOWNLOADS $tgt" ;; + esac +done + +# +# the URL for the (usually remote) source archive +# +. "$CONFIGDIR"/srcarchiveurl + +echo Downloading version $VERSION from $SRCARCHIVEURL + +# +# download the files +# +for d in $DOWNLOADS ; do + if [ ! -d $d ] ; then + echo "get $d" + tarfile=$d.tgz + curl -O $SRCARCHIVEURL/$tarfile + if [ $? -ne 0 ] ; then + complain "unable to download $SRCARCHIVEURL/$tarfile" + fi + tar -xzf $tarfile + rm $tarfile + fi +done diff --git a/config/extrapathconfig b/config/extrapathconfig new file mode 100644 index 0000000..e14c599 --- /dev/null +++ b/config/extrapathconfig @@ -0,0 +1,5 @@ +basis.cm smlnj/basis +smlnj-lib.cm smlnj/smlnj-lib +pp-lib.cm smlnj/smlnj-lib +controls-lib.cm smlnj/smlnj-lib +ml-yacc-lib.cm smlnj/ml-yacc diff --git a/config/install.bat b/config/install.bat new file mode 100644 index 0000000..725fc00 --- /dev/null +++ b/config/install.bat @@ -0,0 +1,45 @@ +REM Win32 installer for SML/NJ. +REM +REM (C) 2003 The Fellowship of SML/NJ. +REM +REM Author: Matthias Blume (blume@tti-c.org) + +if "%SMLNJ_HOME%"=="" (echo Please set the SMLNJ_HOME environment variable && goto :EOF) +if NOT EXIST %SMLNJ_HOME%\sml.boot.x86-win32 (echo Please expand the boot.x86-win32.tgz file to the root of your SMLNJ source tree && goto :EOF) + +REM begin by creating rudimentary directory hierarchy +if EXIST bin (rmdir /s /q bin) +mkdir bin +mkdir bin\.run +mkdir bin\.heap +mkdir lib + +REM compile runtime system and move executable to bin\.run +cd base\runtime\objs +nmake -f mk.x86-win32 +copy /y run.x86-win32.exe ..\..\..\bin\.run\run.x86-win32.exe +cd ..\..\.. + +REM put helper .bat scripts into bin +copy config\link-sml.bat bin +copy config\ml-build.bat bin +copy config\sml.bat bin + +REM copy config\ml-lex.bat bin +REM copy config\lexgen.bat bin +REM copy config\ml-yacc.bat bin + +REM create heap image and lib hierarchy ("boot") +copy config\preloads preloads.standard +cd sml.boot.x86-win32 +..\bin\.run\run.x86-win32.exe @SMLboot=BOOTLIST @SMLheap=sml @SMLalloc=1M @SMLverbose +cd .. +move sml.x86-win32 bin\.heap +del preloads.standard +cd sml.boot.x86-win32 +for /D %%a in (*.*) do echo %%a %%a >>..\lib\pathconfig +for /D %%a in (*.*) do xcopy /e /y /h /i %%a ..\lib\%%a +cd .. + +REM Do all the rest using the precompiled installer. +%COMSPEC% /C "bin\sml.bat -m $smlnj/installer.cm" diff --git a/config/install.sh b/config/install.sh new file mode 100755 index 0000000..3c072ba --- /dev/null +++ b/config/install.sh @@ -0,0 +1,536 @@ +#!/bin/sh +# +# Copyright (c) 1994 AT&T Bell Laboratories. +# Copyright (c) 2014-2020 The Fellowship of SML/NJ +# +# Installation script for SML/NJ and related tools. +# +# Significant changes to take advantage of a new portable installer +# script for everything after booting the interactive system. +# +# Author: Matthias Blume and John Reppy +# + +complain() { + echo "$@" + exit 1 +} + +this=$0 + +# set the default size for the install. Currently, the default is 64 for +# systems that report "x86_64" for `uname -m`. These include macOS and +# Linux systems. We set the default size to 32 for all other systems. +# +DEFAULT_SIZE=32 +case `uname -m` in + x86_64) DEFAULT_SIZE=64 ;; +esac + +# process options +SIZE_OPT= +nolib=false +while [ "$#" != "0" ] ; do + arg=$1; shift + case $arg in + -32) SIZE_OPT=$arg ;; + -64) SIZE_OPT=$arg ;; + -default) + case x"$1" in + x32) DEFAULT_SIZE="32"; shift ;; + x64) DEFAULT_SIZE="64"; shift ;; + x) complain "missing size argument for '-default'" ;; + *) complain "invalid size argument for '-default'; should be 32 or 64" ;; + esac ;; + -nolib) nolib=true ;; + *) complain "usage: $this [-32 | -64] [-default ] [-nolib]" + ;; + esac +done + +if [ x"$SIZE_OPT" = x ] ; then + SIZE_OPT="-"$DEFAULT_SIZE +fi + +if [ x${INSTALL_QUIETLY} = xtrue ] ; then + export CM_VERBOSE + CM_VERBOSE=false +fi + +vsay() { + if [ x${INSTALL_DEBUG} = xtrue ] ; then + echo "$@" + elif [ x${INSTALL_QUIETLY} = xtrue ] ; then + : + else + echo "$@" + fi +} + + +# +# create the preloads.standard file +# +if [ ! -r config/preloads ]; then + complain "$this: !!! File config/preloads is missing." +fi +cp config/preloads preloads.standard + +# +# Some OSs have make in strange places, but most of the time it is +# simply on the PATH: +# +MAKE=make + +SHELL=/bin/sh +vsay $this: Using shell $SHELL. + +# +# set the SML root directory +# +REAL_PWD=`pwd` +ROOT=${PWD:-$REAL_PWD} +vsay $this: SML root is $ROOT. + +cd "${INSTALLDIR:=$ROOT}" +INSTALLDIR=`pwd` +cd "$ROOT" +vsay $this: Installation directory is ${INSTALLDIR}. + +# +# set the various directory and file pathname variables +# +BINDIR=$INSTALLDIR/bin # main dir for binary stuff +CONFIGDIR=$ROOT/config +HEAPDIR=$BINDIR/.heap # where heap images live +RUNDIR=$BINDIR/.run # where executables (i.e., the RTS) live +BASEDIR=$ROOT/base # where the base source tree is rooted +LIBDIR=$INSTALLDIR/lib # where libraries live + +# +# files to be deleted after we are done... +# +tmpfiles="" +tmpfiles="$tmpfiles preloads.standard" +# +# make sure we always clean up after ourselves... +# +trap 'cd "$ROOT"; rm -f $tmpfiles' 0 1 2 3 15 + + +# +# set the CM configuration variables (these are environment variables +# that will be queried by the bootstrap code) +# Especially important is CM_PATHCONFIG. +# +export CM_PATHCONFIG +CM_PATHCONFIG=$LIBDIR/pathconfig +# +# the release version that we are installing +# +VERSION=`cat "$CONFIGDIR/version"` +vsay $this: Installing version $VERSION. + +# +# the URL for the (usually remote) source archive +# +. "$CONFIGDIR"/srcarchiveurl +vsay $this: URL of source archive is $SRCARCHIVEURL. + +# +# Function to make a directory including its ancestors. +# +makedir() { + if [ x"$1" = x ] ; then + : + elif [ -d "$1" ] ; then + : + else + makedirtmp=`dirname "$1"` + makedir "$makedirtmp" + if [ x${INSTALL_VERBOSE} = xtrue ] ; then + vsay "$this: Making directory $1" + fi + if mkdir "$1" ; then + : + else + complain "$this: !!! Unable to make directory $1!" + fi + fi +} + +# +# Fish out the CM metadata directory name from library files +# and store it in ORIG_CM_DIR_ARC. +# The single argument is the name of the directory containing +# a single subdirectory which is a CM metadata directory: +# +fish() { + cd "$1" + ORIG_CM_DIR_ARC=unknown + for i in * .[a-zA-Z0-9]* ; do + if [ -d $i ] ; then + ORIG_CM_DIR_ARC=$i + break + fi + done + if [ $ORIG_CM_DIR_ARC = unknown ] ; then + complain "$this: could not determine CM metadata directory name" + else + echo "$this: CM metadata directory name is \"${ORIG_CM_DIR_ARC}\"" + fi +} + + +# A function to move all stable library files to a parallel directory +# hierarchy. +# The first argument must be a simple path (no / inside), and +# the second argument must be an absolute path. +move() { + if [ -L "$1" ] ; then + rm -f "$1" # remove symbolic link made by diracs (see below) + elif [ -d "$1" ] ; then + if [ ! -d "$2" ] ; then + if [ -f "$2" ] ; then + complain $this: $2 exists as a non-directory. + fi + mkdir "$2" + fi + cd "$1" + for i in * .[a-zA-Z0-9]* ; do + move "$i" "$2"/"$i" + done + cd .. + elif [ -f "$1" ] ; then + rm -f "$2" + mv "$1" "$2" + fi +} + +# +# Traverse the directory tree rooted at $3 (must be single arc!). +# Find all directories named $1, rename them into $2 and make +# and establish $1 as a symbolic link to $2: +# +dirarcs() { + if [ -d "$3" ] ; then + if [ "$3" = "$1" ] ; then + mv "$1" "$2" + ln -s "$2" "$1" + else + cd "$3" + for d in * .[a-zA-Z0-9]* ; do + dirarcs "$1" "$2" "$d" + done + cd .. + fi + fi +} + + +###################################################################### + +# +# create the various sub directories +# +for dir in "$BINDIR" "$HEAPDIR" "$RUNDIR" "$LIBDIR" "$BASEDIR" ; do + makedir "$dir" +done + +# +# Function to install a "driver" script... +# This takes care of patching the source of the script with the SHELL, +# BINDIR, and VERSION variables to use. +# +installdriver() { + echo "$this: installing $BINDIR/$2" + dsrc=$1 + ddst=$2 +# We install the driver unconditionally. (It would be better to test +# for an outdated driver script, but not all "test" commands understand +# the -nt comparison operator....) +# if [ -x $BINDIR/$ddst ]; then +# echo $this: Script $BINDIR/$ddst already exists. +# else + rm -f "$BINDIR"/"$ddst" + cat "$CONFIGDIR"/"$dsrc" | \ + sed -e "s,@SHELL@,$SHELL,g" \ + -e "s,@BINDIR@,$BINDIR," \ + -e "s,@LIBDIR@,$LIBDIR," \ + -e "s,@VERSION@,$VERSION," \ + -e "s,@CMDIRARC@,${CM_DIR_ARC:-dummy}," \ + -e "s,@SIZE@,$DEFAULT_SIZE," \ + > "$BINDIR"/"$ddst" + chmod 555 "$BINDIR"/"$ddst" + if [ ! -x "$BINDIR"/"$ddst" ]; then + complain "$this: !!! Installation of $BINDIR/${ddst} failed." + fi +# fi +} + +# +# install the script that tests architecture and os... +# +installdriver _arch-n-opsys .arch-n-opsys + +# +# run it to figure out what architecture and os we are using, define +# corresponding variables... +# +ARCH_N_OPSYS=`"$BINDIR"/.arch-n-opsys $SIZE_OPT` +if [ "$?" != "0" ]; then + echo "$this: !!! Script $BINDIR/.arch-n-opsys fails on this machine." + echo "$this: !!! You must patch $BINDIR/.arch-n-opsys by hand and repeat the installation." + exit 2 +else + vsay $this: Script $BINDIR/.arch-n-opsys reports $ARCH_N_OPSYS. +fi +eval $ARCH_N_OPSYS + +# +# now install most of the other driver scripts +# (except ml-build, since we don't know $CM_DIR_ARC yet) +# +installdriver _run-sml .run-sml +installdriver _link-sml .link-sml +installdriver _ml-makedepend ml-makedepend + +# +# we optimistically install heap2exec, but will remove it if heap2asm +# is not installed +# +installdriver _heap2exec heap2exec + +# +# set allocation size; for the x86, this gets reset in .run-sml +# +ALLOC=512k + +# OS-specific things for building the runtime system +# +RT_MAKEFILE=mk.$ARCH-$OPSYS +case $OPSYS in + darwin) + SDK=none + if [ "$ARCH" = "x86" ] ; then + # the /usr/bin/as command does _not_ accept the -mmacosx-version-min + # command-line option prior to MacOS X 10.10 (Yosemite) + case `sw_vers -productVersion` in + 10.6*) AS_ACCEPTS_SDK=no ;; + 10.7*) AS_ACCEPTS_SDK=no ;; + 10.8*) AS_ACCEPTS_SDK=no ;; + 10.9*) AS_ACCEPTS_SDK=no ;; + 10.14*) + AS_ACCEPTS_SDK=yes + # Mojave needs a special makefile for the x86, but we need to be careful + # about when we are running the postinstall script, so we check + # for the nolib argument + if [ x"$nolib" = xfalse ] ; then + RT_MAKEFILE=mk.x86-darwin18 + # location of Xcode SDKs + if [ ! -x /usr/bin/xcode-select ] ; then + echo "$this: !!! /usr/bin/xcode-select is missing; please install Xcode" + exit 1 + fi + XCODE_DEV_PATH=`xcode-select -p` + if [ x"$XCODE_DEV_PATH" = x/Library/Developer/CommandLineTools ] ; then + XCODE_SDK_PATH="$XCODE_DEV_PATH/SDKs" + else + XCODE_SDK_PATH="$XCODE_DEV_PATH/Platforms/MacOSX.platform/Developer/SDKs" + fi + # look for an SDK that supports 32-bit builds (starting with 10.13 High Sierra + # and going back to 10.10 Yosemite) + # + for SDK_VERS in 13 12 11 10 ; do + if [ -d "$XCODE_SDK_PATH/MacOSX10.$SDK_VERS.sdk" ] ; then + SDK="$XCODE_SDK_PATH/MacOSX10.$SDK_VERS.sdk" + break + fi + done + if [ x"$SDK" = xnone ] ; then + echo "$this: !!! SML/NJ requires support for 32-bit executables." + echo " Please see http://www.smlnj.org/dist/working/$VERSION/MACOSXINSTALL for more details." + exit 1 + fi + fi + ;; + *) AS_ACCEPTS_SDK=yes ;; + esac + if [ x"$SDK" = xnone ] ; then + EXTRA_DEFS="AS_ACCEPTS_SDK=$AS_ACCEPTS_SDK" + else + EXTRA_DEFS="AS_ACCEPTS_SDK=$AS_ACCEPTS_SDK SDK=$SDK" + fi + elif [ "$ARCH" = AMD64 ] ; then + EXTRA_DEFS="AS_ACCEPTS_SDK=yes" + fi + ;; + linux) + EXTRA_DEFS=`"$CONFIGDIR/chk-global-names.sh"` + if [ "$?" != "0" ]; then + complain "$this: !!! Problems checking for underscores in asm names." + fi + EXTRA_DEFS="XDEFS=$EXTRA_DEFS" + if [ "$ARCH" = "x86" ] ; then + # + # on 64-bit linux systems, we need to check to see if the 32-bit emulation + # support is installed + # + case `uname -m` in + x86_64) + tmpFile=smlnj-test$$ + echo "int main () { return 0; }" >> /tmp/$tmpFile.c + gcc -m32 -o /tmp/$tmpFile /tmp/$tmpFile.c 2> /dev/null 1>> /dev/null + if [ "$?" != "0" ] ; then + rm -f /tmp/$tmpFile /tmp/$tmpFile.c + echo "$this: !!! SML/NJ requires support for 32-bit executables." + echo "$this: !!! Please see http://www.smlnj.org/dist/working/$VERSION/install.html for more details." + exit 1 + else + rm -f /tmp/$tmpFile /tmp/$tmpFile.c + fi + ;; + *) ;; + esac + fi + ;; + solaris) + MAKE=/usr/ccs/bin/make + ;; +esac + +# +# the name of the bin files directory +# +BOOT_ARCHIVE=boot.$ARCH-unix +BOOT_FILES=sml.$BOOT_ARCHIVE + +# +# build the run-time system +# +if [ -x "$RUNDIR"/run.$ARCH-$OPSYS ]; then + vsay $this: Run-time system already exists. +else + "$CONFIGDIR"/unpack "$ROOT" runtime + cd "$BASEDIR"/runtime/objs + echo $this: Compiling the run-time system. + $MAKE -f $RT_MAKEFILE $EXTRA_DEFS + if [ -x run.$ARCH-$OPSYS ]; then + mv run.$ARCH-$OPSYS "$RUNDIR" + if [ -f runx.$ARCH-$OPSYS ]; then + mv runx.$ARCH-$OPSYS "$RUNDIR" + fi + if [ -f run.$ARCH-$OPSYS.so ]; then + mv run.$ARCH-$OPSYS.so "$RUNDIR" + fi + if [ -f run.$ARCH-$OPSYS.a ]; then + mv run.$ARCH-$OPSYS.a "$RUNDIR" + fi + $MAKE MAKE=$MAKE clean + else + complain "$this: !!! Run-time system build failed for some reason." + fi +fi +cd "$BASEDIR" + +# +# boot the base SML system +# +if [ -r "$HEAPDIR"/sml.$HEAP_SUFFIX ]; then + vsay $this: Heap image $HEAPDIR/sml.$HEAP_SUFFIX already exists. + fish "$LIBDIR"/smlnj/basis + # ignore requested arc name since we have to live with what is there: + export CM_DIR_ARC + CM_DIR_ARC=$ORIG_CM_DIR_ARC + # now re-dump the heap image: + vsay "$this: Re-creating a (customized) heap image..." + "$BINDIR"/sml $SIZE_OPT @CMredump "$ROOT"/sml + cd "$ROOT" + if [ -r sml.$HEAP_SUFFIX ]; then + mv sml.$HEAP_SUFFIX "$HEAPDIR" + else + complain "$this !!! Unable to re-create heap image (sml.$HEAP_SUFFIX)." + fi +else + "$CONFIGDIR"/unpack "$ROOT" "$BOOT_ARCHIVE" + + fish "$ROOT"/"$BOOT_FILES"/smlnj/basis + + cd "$ROOT" + + # Target arc: + export CM_DIR_ARC + CM_DIR_ARC=${CM_DIR_ARC:-".cm"} + + if [ $CM_DIR_ARC = $ORIG_CM_DIR_ARC ] ; then + : we are happy + else + # now we have to make a symbolic link for each occurrence of + # $ORIG_CM_DIR_ARC to $CM_DIR_ARC + dirarcs "$ORIG_CM_DIR_ARC" "$CM_DIR_ARC" "$BOOT_FILES" + fi + + cd "$ROOT"/"$BOOT_FILES" + + # now link (boot) the system and let it initialize itself... + if "$BINDIR"/.link-sml $SIZE_OPT @SMLheap="$ROOT"/sml @SMLboot=BOOTLIST @SMLalloc=$ALLOC + then + cd "$ROOT" + if [ -r sml.$HEAP_SUFFIX ]; then + mv sml.$HEAP_SUFFIX "$HEAPDIR" + cd "$BINDIR" + ln -s .run-sml sml + # + # Now move all stable libraries to #LIBDIR and generate + # the pathconfig file. + # + cd "$ROOT"/"$BOOT_FILES" + for anchor in * ; do + if [ -d $anchor ] ; then + echo $anchor $anchor >>$CM_PATHCONFIG + move $anchor "$LIBDIR"/$anchor + fi + done + cd "$ROOT" + # $BOOT_FILES is now only an empty skeleton, let's get rid of it. + rm -rf "$BOOT_FILES" + + else + complain "$this !!! No heap image generated (sml.$HEAP_SUFFIX)." + fi + else + complain "$this !!! Boot code failed, no heap image (sml.$HEAP_SUFFIX)." + fi +fi + +# +# now that we know CM_DIR_ARC we can install the ml-build driver... +# +installdriver _ml-build ml-build + +cd "$ROOT" + +# +# Now do all the rest using the precompiled installer +# (see base/system/smlnj/installer for details) +# +if [ $nolib = false ] ; then + echo $this: Installing other libraries and programs: + export ROOT INSTALLDIR CONFIGDIR BINDIR + CM_TOLERATE_TOOL_FAILURES=true + export CM_TOLERATE_TOOL_FAILURES + if "$BINDIR"/sml $SIZE_OPT -m \$smlnj/installer.cm + then + # because we create heap2exec without knowing if heap2asm is going + # to be installed, we need this hack to remove heap2exec when heap2asm + # is not available + if [ ! -x "$BINDIR"/heap2asm ] ; then + rm -f "$BINDIR"/heap2exec + fi + vsay $this: Installation complete. + else + complain "$this: !!! Installation of libraries and programs failed." + fi +fi + +exit 0 diff --git a/config/lexgen.bat b/config/lexgen.bat new file mode 100644 index 0000000..e48032e --- /dev/null +++ b/config/lexgen.bat @@ -0,0 +1,2 @@ +@echo OFF +"%SMLNJ_HOME%\bin\.run\run.x86-win32.exe" "@SMLload=%SMLNJ_HOME%\bin\.heap\lexgen" %1 %2 %3 %4 %5 %6 %7 %8 %9 diff --git a/config/link-sml.bat b/config/link-sml.bat new file mode 100644 index 0000000..c0e8841 --- /dev/null +++ b/config/link-sml.bat @@ -0,0 +1,3 @@ +set CM_PATHCONFIG=%SMLNJ_HOME%\lib\pathconfig +REM We rely on the first argument being @SMLboot=whatever. +"%SMLNJ_HOME%\bin\.run\run.x86-win32.exe" %1 %2 %3 %4 %5 %6 %7 %8 %9 diff --git a/config/ml-antlr.bat b/config/ml-antlr.bat new file mode 100644 index 0000000..e9713f5 --- /dev/null +++ b/config/ml-antlr.bat @@ -0,0 +1,2 @@ +@echo OFF +"%SMLNJ_HOME%\bin\.run\run.x86-win32.exe" "@SMLload=%SMLNJ_HOME%\bin\.heap\ml-antlr" %1 %2 %3 %4 %5 %6 %7 %8 %9 diff --git a/config/ml-build.bat b/config/ml-build.bat new file mode 100644 index 0000000..b2eff86 --- /dev/null +++ b/config/ml-build.bat @@ -0,0 +1,62 @@ +@echo off + +set flags= +set setup= + +:DOFLAGS + +if %1 == -D goto FOUNDFLAG +if %1 == -U goto FOUNDFLAG +if %1 == -C goto FOUNDFLAG +if %1 == -S goto FOUNDSETUP +goto DONEFLAGS + +:FOUNDFLAG + +set flags=%flags% %1%2 +shift +shift +goto DOFLAGS + +:FOUNDSETUP + +set setup=%2 +shift +shift +goto DOFLAGS + +:DONEFLAGS + +set root=%1 +set main=%2 +set heap=%3 + +set smlfile=XYZ_XXX_smlfile.sml +set cmfile=XYZ_XXX_cmfile.cm +set listfile=XYZ_XXX_BOOTLIST +set linkargsfile=XYZ_XXX_LINKARGS + +set rare=XYZ_XXX_0123 + +echo structure %rare% = struct val _ = SMLofNJ.exportFn ("%heap%", %main%) end >"%smlfile%" + +echo Group structure %rare% is $/basis.cm "%root%" %smlfile% >%cmfile% + +%COMSPEC% /C ""%SMLNJ_HOME%\bin\sml.bat" %flags% %setup% @CMbuild %root% %cmfile% %heap% %listfile% %linkargsfile%" +if ERRORLEVEL 1 goto ERR +if NOT EXIST %linkargsfile% goto END +"%SMLNJ_HOME%\bin\.run\run.x86-win32.exe" @SMLboot=%listfile% +del %linkargsfile% +goto END + +:ERR +echo Compilation failed with error. + +:END +rem more cleaning up +del %smlfile% +del %cmfile% +del %listfile% +del .cm\GUID\%smlfile% +del .cm\SKEL\%smlfile% +del .cm\x86-win32\%smlfile% diff --git a/config/ml-burg.bat b/config/ml-burg.bat new file mode 100755 index 0000000..fcad72d --- /dev/null +++ b/config/ml-burg.bat @@ -0,0 +1,2 @@ +@echo OFF +"%SMLNJ_HOME%\bin\.run\run.x86-win32.exe" "@SMLload=%SMLNJ_HOME%\bin\.heap\ml-burg" %1 %2 %3 %4 %5 %6 %7 %8 %9 diff --git a/config/ml-lex.bat b/config/ml-lex.bat new file mode 100644 index 0000000..4ed14ba --- /dev/null +++ b/config/ml-lex.bat @@ -0,0 +1,2 @@ +@echo OFF +"%SMLNJ_HOME%\bin\.run\run.x86-win32.exe" "@SMLload=%SMLNJ_HOME%\bin\.heap\ml-lex" %1 %2 %3 %4 %5 %6 %7 %8 %9 diff --git a/config/ml-nlffigen.bat b/config/ml-nlffigen.bat new file mode 100755 index 0000000..13c821e --- /dev/null +++ b/config/ml-nlffigen.bat @@ -0,0 +1,2 @@ +@echo OFF +"%SMLNJ_HOME%\bin\.run\run.x86-win32.exe" "@SMLload=%SMLNJ_HOME%\bin\.heap\ml-nlffigen" %1 %2 %3 %4 %5 %6 %7 %8 %9 diff --git a/config/ml-ulex.bat b/config/ml-ulex.bat new file mode 100644 index 0000000..ef3cb2b --- /dev/null +++ b/config/ml-ulex.bat @@ -0,0 +1,2 @@ +@echo OFF +"%SMLNJ_HOME%\bin\.run\run.x86-win32.exe" "@SMLload=%SMLNJ_HOME%\bin\.heap\ml-ulex" %1 %2 %3 %4 %5 %6 %7 %8 %9 diff --git a/config/ml-yacc.bat b/config/ml-yacc.bat new file mode 100644 index 0000000..80c23f7 --- /dev/null +++ b/config/ml-yacc.bat @@ -0,0 +1,2 @@ +@echo OFF +"%SMLNJ_HOME%\bin\.run\run.x86-win32.exe" "@SMLload=%SMLNJ_HOME%\bin\.heap\ml-yacc" %1 %2 %3 %4 %5 %6 %7 %8 %9 diff --git a/config/nowhere.bat b/config/nowhere.bat new file mode 100755 index 0000000..deb6f59 --- /dev/null +++ b/config/nowhere.bat @@ -0,0 +1,2 @@ +@echo OFF +"%SMLNJ_HOME%\bin\.run\run.x86-win32.exe" "@SMLload=%SMLNJ_HOME%\bin\.heap\nowhere" %1 %2 %3 %4 %5 %6 %7 %8 %9 diff --git a/config/preloads b/config/preloads new file mode 100644 index 0000000..798b32a --- /dev/null +++ b/config/preloads @@ -0,0 +1,62 @@ +# +# Modules to be pre-loaded during bootstrap. +# Each line should have the form "command libname" where "command" is +# either "autoload" or "make" and where "libname" is suitable as an +# argument to CM.autoload or CM.make. +# +# Make sure that at least $smlnj/cm.cm is being registered here. +# + +########################### BASICS #################################### + +# The SML Basis library: +autoload $smlnj/basis/basis.cm + +# The Compilation Manager (structure CM) +autoload $smlnj/cm.cm + +# The "Util" portion of the SML/NJ Library +autoload $smlnj/smlnj-lib/smlnj-lib.cm + + +##################### ADDITIONAL LIBRARIES ############################ + +# The "Pretty-Printing" portion of the SML/NJ library +#autoload $smlnj/smlnj-lib/pp-lib.cm + +# The "Configurable Controls" portion of the SML/NJ library +#autoload $smlnj/smlnj-lib/controls-lib.cm + +# The "HTML3" portion of the SML/NJ library +#autoload $smlnj/smlnj-lib/html-lib.cm + + +################## FOR SML/NJ COMPILER HACKERS ######################## + +# The Visible Compiler (collection of structures) +autoload $smlnj/compiler.cm + +# The Visible Compiler (old style: structure Compiler) +#autoload $smlnj/compiler/compiler.cm + +# If you don't autoload the old-style visible compiler above, then +# you should make a minimal version of it available so that +# Compiler.version as well as Compiler.achitecture are available: +autoload $smlnj/compiler/minimal.cm + +# The Bootstrap Compilation Manager (structure CMB) +#autoload $smlnj/cmb.cm + +# Cross-compiler version of CMB for alpha, hppa, ppc, sparc, x86 +# on unix, macos, and win32 (where applicable). +#autoload $smlnj/cmb/alpha32-unix.cm +#autoload $smlnj/cmb/hppa-unix.cm +#autoload $smlnj/cmb/ppc-macos.cm +#autoload $smlnj/cmb/ppc-unix.cm +#autoload $smlnj/cmb/sparc-unix.cm +#autoload $smlnj/cmb/x86-unix.cm +#autoload $smlnj/cmb/x86-win32.cm + +# All cross-compiler versions of CMB at once (together with +# their corresponding architecture-specific compiler structures): +#autoload $smlnj/compiler/all.cm diff --git a/config/prepare-win-install.sh b/config/prepare-win-install.sh new file mode 100755 index 0000000..2764dbc --- /dev/null +++ b/config/prepare-win-install.sh @@ -0,0 +1,105 @@ +#!/bin/sh +# +# Copyright (c) 2019 The Fellowship of SML/NJ +# +# Pre-installation script for SML/NJ. The purpose of this script +# is to download and unpackage files in preparation of building on +# Windows. +# + +complain() { + echo "$@" + exit 1 +} + +vsay() { + if [ x${INSTALL_DEBUG} = xtrue ] ; then + echo "$@" + elif [ x${INSTALL_QUIETLY} = xtrue ] ; then + : + else + echo "$@" + fi +} + +this=$0 + +ROOT=`pwd` + +# +# set the various directory and file pathname variables +# +CONFIGDIR=$ROOT/config +BASEDIR=$ROOT/base # where the base source tree is rooted +BOOT_ARCHIVE=boot.x86-win32 + +# +# Function to make a directory including its ancestors. +# +makedir() { + if [ x"$1" = x ] ; then + : + elif [ -d "$1" ] ; then + : + else + makedirtmp=`dirname "$1"` + makedir "$makedirtmp" + if [ x${INSTALL_VERBOSE} = xtrue ] ; then + vsay "$this: Making directory $1" + fi + if mkdir "$1" ; then + : + else + complain "$this: !!! Unable to make directory $1!" + fi + fi +} + +# +# the release version that we are installing +# +VERSION=`cat "$CONFIGDIR/version"` +vsay $this: Preparing version $VERSION for Windows installation. + +# +# create the base source subdirectory +# +makedir "$BASEDIR" + +"$CONFIGDIR"/unpack "$ROOT" runtime +"$CONFIGDIR"/unpack "$ROOT" "$BOOT_ARCHIVE" +"$CONFIGDIR"/unpack "$ROOT" smlnj-lib +"$CONFIGDIR"/unpack "$ROOT" system + +# source code for the various targets that are part of the +# standard Windows installation. +# +EXTRA_TARGETS="\ + ckit \ + cml \ + doc \ + MLRISC \ + ml-burg \ + ml-lex + ml-lpt \ + ml-yacc \ + nlffi \ + old-basis \ + pgraph \ + trace-debug-profile \ +" + +for file in $EXTRA_TARGETS ; do + "$CONFIGDIR"/unpack "$ROOT" $file +done + +# +# need to make sure that the generated files have a newer timestamp +# so that the config\install.bat script works +# +touch smlnj-lib/HTML4/*.l.sml smlnj-lib/HTML4/*.g.sml + +# +# remove tar files +# +rm -rf *tgz diff --git a/config/releasedate b/config/releasedate new file mode 100644 index 0000000..7623ea7 --- /dev/null +++ b/config/releasedate @@ -0,0 +1 @@ +March 14, 2024 diff --git a/config/sml.bat b/config/sml.bat new file mode 100644 index 0000000..9f049e2 --- /dev/null +++ b/config/sml.bat @@ -0,0 +1,18 @@ +@echo OFF + +REM sml.bat +REM +REM Copyright 2022 The Fellowship of SML/NJ (http://www.smlnj.org) +REM All rights reserved. +REM +REM The standard driver for SML/NJ under the new runtime system +REM + +title Standard ML of New Jersey +setlocal + +if "%SMLNJ_HOME%"=="" set SMLNJ_HOME=%~dp0\.. +if NOT EXIST "%SMLNJ_HOME%\bin\.run\run.x86-win32.exe" set SMLNJ_HOME=%~dp0\.. + +set CM_PATHCONFIG=%SMLNJ_HOME%\lib\pathconfig +"%SMLNJ_HOME%\bin\.run\run.x86-win32.exe" "@SMLload=%SMLNJ_HOME%\bin\.heap\sml" %* diff --git a/config/srcarchiveurl b/config/srcarchiveurl new file mode 100644 index 0000000..fc619aa --- /dev/null +++ b/config/srcarchiveurl @@ -0,0 +1,4 @@ +# +# It is ok to use $VERSION as part of the URL specification... +# +SRCARCHIVEURL=http://smlnj.cs.uchicago.edu/dist/working/${VERSION}/ diff --git a/config/targets b/config/targets new file mode 100644 index 0000000..e33ac3a --- /dev/null +++ b/config/targets @@ -0,0 +1,221 @@ +# targets +# +# the following is a list of the targets (in addition to sml) that +# this shell script can install. Comment out those that you do not +# want. Note that cml-lib requires cml. +# +# The syntax of requests is as follows: +# +# ::= * +# ::= 'request' NL +# | 'if' SYMBOL NL * * 'endif' NL +# ::= 'elif' SYMBOL NL * +# +# where NL represents end-of-line and SYMBOL is one of +# +# SIZE_32 -- true for 32-bit systems +# SIZE_64 -- true for 64-bit systems +# WINDOWS -- true for Microsoft Windows +# UNIX -- true for Unix systems (including macOS and Linux) + +# +# The install script will move all libraries out of the source tree +# and into the $LIBDIR directory. Thus, you can later delete the +# entire source tree without losing functionality. +# (Notice that the old "dont_move_libraries" directive broke a while ago +# and has been removed.) +# + +# (The base system consisting of runtime, compiler, and basic libraries +# will always be built.) + +# unpack the source code for everything (including for the SML/NJ compiler +# itself); this is not required, unless you are doing compiler hacking, +# but it may be interesting to look at. +# +#request src-smlnj + +# include backward-compatible versions of the Basis Library. +# +request old-basis + +# build new (Unicode-capable) ML-ULex +# +request ml-ulex +# +# Arrange for the .lex suffix to be recognized by CM as (new-style) +# ML-ULex input. +# (Notice that you should NOT select this option even if you want to have +# ml-ulex process legacy ml-lex input using the --ml-lex-mode option.) +# Since most existing projects have legacy .lex files, this should be +# kept off by default. +# WARNING: This is incompatible with ml-lex-lex-ext!! +#request ml-ulex-lex-ext +# +# Register the "mllex" CM tool class in such a way that "legacy" ml-lex +# input is processed by "ml-ulex --ml-lex-mode". +# (If you want the .lex extension to be mapped to the "mllex" class +# and have that be processed by ml-ulex, you should request +# ml-ulex-mllex-tool and ml-lex-lex-ext, but NOT ml-ulex-lex-ext.) +# WARNING: This is incompatible with ml-lex-mllex-tool!! +request ml-ulex-mllex-tool + +# build legacy ML-Lex +# +request ml-lex +# +# Register the "mllex" CM tool class in such a way that "legacy" ml-lex +# input is processed by the legacy ml-lex program. +# WARNING: This is incompatible with ml-ulex-mllex-tool!! +#request ml-lex-mllex-tool +# +# Arrange for the .lex suffix to be recognized by CM as legacy ml-lex input. +# This requires that either ml-lex-mllex-tool or ml-ulex-mllex-tool +# (but not both at the same time!) is installed. +# Since most existing projects have legacy .lex files, this should be +# kept on by default. +# WARNING: This is incompatible with ml-ulex-lex-ext!! +request ml-lex-lex-ext + +# build ML-Yacc (must come after ml-lex) +# +request ml-yacc +# +# Arrange for the .grm suffix to be recognized by CM as ML-Yacc input. +# Since .grm files in most existing project are ML-Yacc input, this should be +# kept on by default. +# WARNING: This is incompatible with ml-antlr-grm-ext!! +request ml-yacc-grm-ext + +# Always: install the pre-compiled ML-Yacc Library; this is necessary +# to use parsers produced by ML-Yacc, but also to bootstrap the system +# in the first place. + +# build ML-Antlr +# +request ml-antlr +# +# Arrange for the .grm suffix to be recognized by CM as ML-Antlr input. +# Since .grm files in most existing project are ML-Yacc input, this should be +# kept off by default. +# WARNING: This is incompatible with ml-yacc-grm-ext!! +#request ml-antlr-grm-ext + +# utility library for ml-antlr and ml-ulex +# +request ml-lpt-lib + +# Always: install the pre-compiled SML/NJ Library (necessary to bootstrap). + +# pre-compile and install the remaining components of the SML/NJ library +# (everything except smlnj-lib.cm, aka Util, itself) +# +request smlnj-lib + +# build asdlgen and install ASDL libraries +# +# NOTE: asdlgen requires that a C++ compiler be installed and it +# currently does not build on Windows +# +#if UNIX +# request asdl +#endif + +# pre-compile and install CM "portable graph" utility library +# +#request pgraph-util + +# pre-compile and install "Trace-Debug-Profile" utility library +# (provides simple back-trace- and coverage facilities) +# +request tdp-util + +# pre-compile and install Concurrent ML, which is a library for message-passing +# concurrency. +# +request cml + +# pre-compile and install the CML Library, which provides some useful CML +# modules. +# +request cml-lib + +# pre-compile and install eXene, which is a library for X-Windows programming. +# EXene requires CML. +# +#request eXene + +# pre-compile (certain) parts of MLRISC that are not already part of the SML/NJ +# compiler itself +# +request mlrisc + +# pre-compile and install the C-Kit, which is a library for parsing and +# type-checking C programs +# +request ckit + +# pre-compile and install the ML-NLFFI Library, which is the core of +# a new foreign function interface (where "foreign" functions are +# "no longer foreign") +# This library is necessary to compile and/or run programs that use +# the new FFI. +# This preview release currently works under X86/Linux, Sparc/Solaris, +# PPC/MacOSX, and X86/Win32. +# +# +# NOTE: currently NLFFI is not supported on 64-bit machines +# +if SIZE_32 + request ml-nlffi-lib +endif + +# build ML-NLFFI-Gen, a program generator tool used in conjunction with +# the new "NLFFI" foreign function interface. The tool generates ML +# glue code from C source code. +# (Requires ckit!) +# This preview release currently works under X86/Linux, Sparc/Solaris, +# PPC/MacOSX, and X86/Win32. +# +# +# NOTE: currently NLFFI is not supported on 64-bit machines +# +if SIZE_32 + request ml-nlffigen +endif + +# pre-compile and install the MLRISC Tools library. +# This library is for parsing and pretty printing SML code. +# It's used by various MLRISC code generator generator. +# +#request mlrisc-tools + +# build ML-Burg +# +request ml-burg + +# Build and install the 'nowhere' program translator. +# This tool translates programs with conditional patterns (where clauses) +# into legal SML code. See MLRISC/Tools/Doc/nowhere.tex for its +# (bad) documentation. +# (Requires the mlrisc-tools library!) +# +#request nowhere + +# Build and install 'heap2asm', which is used by 'heap2exec' +# for producing self-contained (aka stand-alone) executables. +# This facility is currently only supported on Linux and macOS +# +if UNIX +# request heap2asm +endif + +# Download the documentation directory. +# +request doc + +# Note: autoloading is always enabled. +# In order to customize what is being pre-registered for autoloading +# edit file "preloads". You should make sure that it contains at least +# "$smlnj/cm.cm". +# Also, it is a good idea to have "$/basis.cm" pre-loaded. diff --git a/config/unpack b/config/unpack new file mode 100755 index 0000000..8768b50 --- /dev/null +++ b/config/unpack @@ -0,0 +1,302 @@ +#!/bin/sh +# +# This script was extracted from install.sh and deals with the fetching +# and unpacking of source/bootfile trees. The first argument must be +# the installation root directory. Subsequent arguments are the names +# of modules whose source trees are required. It is invoked by both the +# config/install.sh script and by base/system/smlnj/installer/generic-install.sml. +# +# (C) 2003 The Fellowship of SML/NJ +# +# Author: Matthias Blume (blume@tti-c.org) +# + +this="$0" +ROOT="$1" +shift + +CONFIGDIR="$ROOT/config" + +VERSION=`cat "$CONFIGDIR"/version` +. "$CONFIGDIR"/srcarchiveurl + +vsay() { + if [ x${INSTALL_DEBUG} = xtrue ] ; then + echo "$@" + elif [ x${INSTALL_QUIETLY} = xtrue ] ; then + : + else + echo "$@" + fi +} + +# +# Function for asking user to fetch source archive. +# $1 - descriptive name +# $2 - base name without extension, without version, and without dir +# $3 - remote directory +# +askurl() { + echo "$this: Please, fetch $1 archive" + echo ' ('$2.'*' or $VERSION-$2.'*)' + echo " from $3" + echo " and then re-run this script!" + exit 1 +} + +# +# Function for fetching source archives automatically using wget or lynx. +# $1 - command to actually get the stuff +# $2 - descriptive name +# $3 - base name without extension and without dir +# $4 - remote directory +# +fetchurl() { + getter=$1 ; shift + vsay $this: Fetching $1 from $3. Please stand by... + fetched=no + for base in "$2" "$VERSION-$2" ; do + for ext in tgz tar.gz tar.Z tz tar tar.bz2 ; do + try=$base.$ext + vsay $this: Trying $try ... + if "$getter" "$3"/"$try" "$ROOT"/"$try" ; then + fetched=yes + vsay $this: Fetching $try was a success. + break 2 # get out of both for-loops + else + rm -f "$ROOT"/"$try" + fi + done + done + if [ $fetched = no ] ; then + echo $this: Fetching $try was no success. + echo ' ' You should try to do it manually now. + askurl "$1" "$2" "$3" + fi +} + +# wrapper for wget +usewget() { + wget -nv -O "$2" "$1" +} + +# wrapper for lynx +uselynx() { + lynx -source "$1" >"$2" +} + +# wrapper for curl +usecurl() { + curl -s "$1" >"$2" +} + +testurlgetter() { + (exec >/dev/null 2>&1 ; exec $*) +} + +# +# Function to check whether wget or lynx is available. +# Set URLGETTER accordingly. URLGETTER can be set externally +# to either 'wget' or 'curl' or 'lynx' -- in which case the +# corresponding command will be used (properly wrapped). Any +# other external setting will be passed directly to fetchurl (without +# wrapping -- meaning it must take precisely two argumets: source and +# destination, in that order). +# +urlgetter() { + case ${URLGETTER:-unknown} in + fetchurl*) + ;; + unknown) + # automatically figure out which wrapper to use + if testurlgetter wget --help ; then + URLGETTER="fetchurl usewget" + elif testurlgetter curl -s file:///dev/null -o /dev/null ; then + URLGETTER="fetchurl usecurl" + elif testurlgetter lynx -help ; then + URLGETTER="fetchurl uselynx" + else + URLGETTER="askurl" + fi + ;; + wget|curl|lynx) + # special getters we know how to wrap + URLGETTER="fetchurl use${URLGETTER}" + ;; + *) + # other -- must be able to work without wrapper + URLGETTER="fetchurl ${URLGETTER}" + ;; + esac +} + +# wrapper for tar +un_tar() { + vsay "$this: Un-TAR-ing $1 archive." + tar -xf "$2" +} + +# wrapper for zcat followed by tar +un_tar_Z() { + vsay "$this: Un-COMPRESS-ing and un-TAR-ing $1 archive." + zcat "$2" | tar -xf - +} + +# wrapper for gunzip followed by tar +un_tar_gz() { + vsay "$this: Un-GZIP-ing and un-TAR-ing $1 archive." + gunzip -c "$2" | tar -xf - +} + +# wrapper for bunzip2 followed by tar +un_tar_bz2() { + vsay "$this: Un-BZIP2-ing and un-TAR-ing $1 archive." + bunzip2 -c "$2" | tar -xf - +} + +# unarchive archive without and with version number attached +unarchive() { + # $1: descriptive string, $2: archive, $3: unpacker + if [ -r "$ROOT"/"$2" ] ; then + "$3" "$1" "$ROOT"/"$2" + elif [ -r "$ROOT"/"$VERSION"-"$2" ]; then + $3 "$1" "$ROOT"/"$VERSION"-"$2" + else + return 1 + fi +} + +# +# Function to unpack a source archive. +# +# $1: descriptive name of the sources to be unpacked +# $2: the directory into which to unpack the sources +# $3: the sub-directory of $2 that is going to be created by unpacking +# $4: the basename of the source archive (the script will check several +# different suffixes to determine what kind of de-compression is to +# be used) +# +# fetch_n_unpack is the helper function that does the real work. If +# no archive is found locally, it invokes $URLGETTER and tries again. +# The variable $tryfetch is used to make sure this happens only once. +fetch_n_unpack() { + cd "$2" + if unarchive "$1" "$4".tgz un_tar_gz || + unarchive "$1" "$4".tar.gz un_tar_gz || + unarchive "$1" "$4".tar.Z un_tar_Z || + unarchive "$1" "$4".tar un_tar || + unarchive "$1" "$4".tar.bz2 un_tar_bz2 || + unarchive "$1" "$4".tz un_tar_Z + then + : we are done + elif [ $tryfetch = yes ] ; then + urlgetter + $URLGETTER "$1" "$4" "$SRCARCHIVEURL" + tryfetch=no + fetch_n_unpack "$1" "$2" "$3" "$4" + fi +} + +# +# The main "unpack" driver function that invokes the above helper. +# +unpack() { + tryfetch=yes + if [ -d "$2"/"$3" ]; then + vsay "$this: The $1 tree already exists." + else + fetch_n_unpack "$1" "$2" "$3" "$4" + fi + if [ ! -d "$2"/"$3" ]; then + echo "$this: !!! Unable to unpack $1 archive." + exit 1 + fi +} + +# +# Now do it: +# + +for i +do + case "$i" in + runtime) + unpack run-time "$ROOT"/base runtime runtime + ;; + boot.*) + unpack bootfiles "$ROOT" sml.$i $i + ;; + compiler) + unpack compiler "$ROOT"/base compiler compiler + ;; + cm) + unpack compiler "$ROOT"/base cm cm + ;; + old-basis) + unpack "Old Basis Libraries" "$ROOT"/base old-basis old-basis + ;; + system) + unpack compiler "$ROOT"/base system system + ;; + ml-yacc|ml-yacc-grm-ext) + unpack ML-Yacc "$ROOT" ml-yacc ml-yacc + ;; + ml-lex|ml-lex-mllex-tool|ml-lex-lex-ext) + unpack ML-Lex "$ROOT" ml-lex ml-lex + ;; + ml-burg) + unpack ML-Burg "$ROOT" ml-burg ml-burg + ;; + smlnj-lib) + unpack "SML/NJ Library" "$ROOT" smlnj-lib smlnj-lib + ;; + cml|cml-lib) + unpack CML "$ROOT" cml cml + ;; + eXene) + unpack EXene "$ROOT" eXene eXene + ;; + ckit) + unpack "C-Kit" "$ROOT" ckit ckit + ;; + ml-nlffi-lib|ml-nlffigen) + unpack "NLFFI" "$ROOT" nlffi nlffi + ;; + pgraph-util) + unpack "CM source code" "$ROOT" pgraph pgraph + ;; + tdp-util) + unpack "Trace/Debug/Profile code" "$ROOT" trace-debug-profile trace-debug-profile + ;; + mlrisc|mlrisc-tools|nowhere) + unpack "MLRISC Library" "$ROOT" MLRISC MLRISC + ;; + smlnj-c) + unpack "SML/NJ-C FFI" "$ROOT" smlnj-c smlnj-c + ;; + heap2asm) + unpack "Heap->ASM tool" "$ROOT" heap2asm heap2asm + ;; + ml-ulex|ml-ulex-mllex-tool|ml-ulex-lex-ext) + unpack "new unicode lexer generator" "$ROOT" ml-lpt ml-lpt + ;; + ml-antlr|ml-antlr-grm-ext) + unpack "new LL(k) parser generator" "$ROOT" ml-lpt ml-lpt + ;; + ml-lpt-lib) + unpack "utilities for language processing tools" "$ROOT" ml-lpt ml-lpt + ;; + doc) + unpack "documentation" "$ROOT" doc doc + # cd $ROOT/doc + # build $ROOT + ;; + *) + echo Unknown package: ${i}. + echo Trying default method... + unpack ${i} "$ROOT" ${i} ${i} + ;; + esac +done + +exit 0 diff --git a/config/version b/config/version new file mode 100644 index 0000000..42467aa --- /dev/null +++ b/config/version @@ -0,0 +1 @@ +110.99.5 diff --git a/doc.tgz b/doc.tgz new file mode 100644 index 0000000..1ee676c Binary files /dev/null and b/doc.tgz differ diff --git a/doc/README b/doc/README new file mode 100644 index 0000000..075cee4 --- /dev/null +++ b/doc/README @@ -0,0 +1,4 @@ +This directory contains documentation for the SML/NJ system. Currently it just +consists of manual pages for the various command-line tools provided by the +system. + diff --git a/doc/html/HISTORY.html b/doc/html/HISTORY.html new file mode 100644 index 0000000..e1803b9 --- /dev/null +++ b/doc/html/HISTORY.html @@ -0,0 +1,22636 @@ + + + + + + + + +Standard ML of New Jersey Change Log + + + + + +

    +
    +
    +
    +
    +

    This file documents changes to the Standard ML of New Jersey system since +March of 2000 (around Version 110.26). The change log primarily covers +the compiler, the compilation manager (CM), the MLRISC library, and +the runtime system. There are occasional entries about other components +(e.g., the SML/NJ Library and ML-LPT), but these components have +their own change logs that should be consulted.

    +
    +
    +
    +
    +

    Version 110.99.5; 2024/03/14

    +
    + + +
    +
    +
    [2024/03/05]
    +
    +
    +

    Added ALIGN_STACK_16 preprocessor flag to x86 assembly code +(mach-dep/X86.prim.asm) in runtime system. This flag controls +whether the SML stack frame is a multiple of 16 bytes (which allows +foreign function calls to ensure 16-byte stack alignment).

    +
    + +
    +
    +
    +
    +
    +
    [2024/03/03]
    +
    +
    +

    Expose the machine property functions as Unsafe.isBigEndian and +Unsafe.wordSize. These functions are inlined by the compiler, which +allows one to expect tests of the form if Unsafe.isBigEndian() to +be evaluated at compile time. These primops are also available to +user code via the Unsafe structure.

    +
    + +
    +
    +
    +
    +
    +
    [2024/03/01]
    +
    +
    +

    Improved the Unsafe.Real64.fromBits implementation to use a cast +and a reference (instead of pack/unpack from a byte array).

    +
    + +
    +
    +
    +
    +
    +
    [2024/03/01]
    +
    +
    +

    We now use Word_t to represent lengths in the various allocation +routines in gc/ml-objects.c. This change fixes +Issue #283 (TextIO.inputAll segfaults when reading 980M file).

    +
    + +
    +
    +
    +
    +
    +
    [2024/03/01]
    +
    +
    +

    Added implementation of Real64.nextAfter.

    +
    + +
    +
    +
    +
    +
    +
    [2024/02/29]
    +
    +
    +

    Rewrote the Real64.split function to use the bit-representation of +reals in its implementation. This new version fixes +Issue #269 (Real.realMod and Real.split produce incorrect result +for values close to zero).

    +
    + +
    +
    +
    +
    +
    +
    [2024/02/26]
    +
    +
    +

    Did a complete rewrite of the mechanisms used to implement conversions +between strings and reals. As part of this rewrite, I implemented the +support for the StringCvt.EXACT formatting mode and implemented the +missing Real.toDecimal/fromDecimal functions. These changes also +fix Issue #194 (Incorrect formatting of real number).

    +
    +
    +

    The new implementation is based on the Ryũ library and related PLDI +2018 paper by Ulf Adams.

    +
    + +
    +
    +
    + +
    +
    +
    [2024/02/22]
    +
    +
    +

    Added Unsafe.Real64 structure to hold conversions between Real64.real +and Word64.word values. The fromBits function can be further improved, +but it needs the new host_big_endian primop to select the best code path +based on endianess.

    +
    + +
    +
    +
    +
    +
    +
    [2024/02/06]
    +
    +
    +

    Refactoring the Basis implementation directory structure to move code related to +real numbers into its own directory. Modules that support sequences of reals +are not included in this refactoring. This is the first step to a reimplementation +of the support for printing reals.

    +
    + +
    +
    +
    +
    +
    +
    [2024/02/05]
    +
    +
    +

    Added host_word_size and host_big_endian primops. These will expand +into constant functions (e.g., fn () ⇒ 64 for host_word_size on a +64-bit system) that will be visible to the optimizer. We can then use +code like:

    +
    +
    +

    [[source,sml]

    +
    +
    +
    +
    +
    +
    +
    if InlineT.host_big_endian() then ... else ...
    +
    +
    +
    +

    + +where the if can be evaluated at compile time.

    +
    +
    +

    + +John Reppy

    +
    +
    +
    +
    [2024/02/05]
    +
    +
    +

    The implementations of PackWord64Little.update and PackWord64Big.update +were swapped on 64-bit platforms (resulting in byte-order reversal). These +have been fixed. Note that the 32-bit versions are correct.

    +
    + +
    +
    +
    +
    +
    +
    [2024/01/30]
    +
    +
    +

    Modified the implementation of OS.Path to allow for systems (like +Windows) that support more than one valid arc separator character. +This change addresses Issue #280 (Support forward-slash +("/") as a separator in the Windows implementation of OS.Path).

    +
    + +
    +
    +
    +
    +
    +
    [2024/01/29]
    +
    +
    +

    Modified the CPS contraction phase to address Issue #292 +(Word8.toLargeInt is broken in 32-bit version). The change is to +disable fusing of conversions to IntInf.int that use 64-bit numbers +as the intermediate type on 32-bit platforms.

    +
    + +
    +
    +
    +
    +
    +
    [2023/12/09]
    +
    +
    +

    There was an inconsistency in the way that datatypes were printed +in the REPL depending on if they were a top-level definition or +defined in a structure. It was also the case that the printing of +top-level declarations (TopLevel/print/ppdec.sml) was using relative +indentation, while the printing of declarations in modules +(ElabData/modules/ppmod.sml) was using absolute indentation. +I changed the code in TopLevel/print/ppdec.sml to use absolute +indentation.

    +
    + +
    +
    +
    +
    +
    +
    [2023/11/21]
    +
    +
    +

    Fix bug in new-literals.sml, where the integer value 231 was being +stored as a 32-bit signed integer in the literal table, instead of as +a 64-bit signed integer. This change fixes Issue #287 +(The word literal 0wx80000000 is incorrectly converted to +0wx7FFFFFFF80000000).

    +
    + +
    +
    +
    +
    +
    +
    [2023/10/13]
    +
    +
    +

    Add contraction rule for when two calls to SETHDLR appear in +sequence. This situation arises when the body of a handler is +optimized away.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.99.4; 2023/08/01

    +
    + +
    +
    +
    [2023/07/25]
    +
    +
    +

    Modified the way that MLRISC handles zero-extension (ZX) when the +source bit width is smaller than the target. Previously, the code assumed +that the high bits would be zero, but as demonstrated by +issue #272, this is not guaranteed, so we now mask +out the high bits (see MLRISC/mltree/mltree-gen.sml).

    +
    + +
    +
    +
    +
    +
    +
    [2023/07/25]
    +
    +
    +

    Changed code generation for the RCC (Raw C Call) operation to +sign/zero-extend the result when it is smaller than 32 bits. +Note that the RCC operation is not supported on 64-bit platforms.

    +
    + +
    +
    +
    +
    +
    +
    [2023/07/24]
    +
    +
    +

    Added macOS 14 (Sonoma) to the systems recognized by the .arch-n-opsys +script. Have verified that the system builds and runs on macOS 14 Beta 4.

    +
    + +
    +
    +
    + +
    +
    +
    [2023/06/09]
    +
    +
    +

    The ULexBuffer.getu function was raising the Incomplete exception +on valid 4-byte UTF-8 sequences. Fixed this problem by rewriting the +code for handling multi-byte sequences.

    +
    + +
    +
    +
    +
    +
    +
    [2022/12/19]
    +
    +
    +

    Merge fixes for Slice all combinators (see +pull request 264).

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.99.3; 2022/07/27

    +
    + +
    +
    +
    [2022/07/25]
    +
    +
    +

    Implemented Basis Library proposal +2021-001 +(Add getWindowSz function to Posix.TTY structure).

    +
    + +
    +
    +
    +
    +
    +
    [2022/07/25]
    +
    +
    +

    Implemented Basis Library proposal +2022-001 +(Add tau to MATH signature).

    +
    + +
    +
    +
    +
    +
    +
    [2022/07/24]
    +
    + + +
    +
    +
    + +
    +
    +
    [2022/07/22]
    +
    + + +
    +
    +
    + +
    +
    +
    [2022/07/22]
    +
    + + +
    +
    +
    +
    +
    +
    [2022/07/06]
    +
    +
    +

    Fixed some interfaces that did not agree with the SML Basis Library +specification. Bugs #318 (IEEEReal.decimal_approx does not +match the Basis Library) and #319 (Type of Real.fromDecimal +does not match the Basis Library).

    +
    + +
    +
    +
    +
    +
    +
    [2022/07/05]
    +
    +
    +

    Fixed bug #316 (Real.fromManExp does not return expected +value if man = 0.0).

    +
    + +
    +
    +
    +
    +
    +
    [2022/07/05]
    +
    +
    +

    Fixed bug #317 (Conversion from string to real does not +accept non-finite values).

    +
    + +
    +
    +
    +
    +
    +
    [2022/06/29]
    +
    +
    +

    Fixed bug #314 (IEEEReal.float_class does not match the +Basis Library). For some reason, the NAN constructor took an argument +in our implementation. This code was probably an early design of the +API that was changed in the Basis Library specification, but not in our +code.

    +
    + +
    +
    +
    +
    +
    +
    [2022/06/27]
    +
    +
    +

    Fixed bug #313 (Real.fromLargeInt crashes on large integer +input). Changed the assembly code for the AMD64 to return infinity +when the scaled exponent exceeds the maximum allowed (instead of +generating an overflow).

    +
    + +
    +
    +
    +
    +
    +
    [2022/06/27]
    +
    +
    +

    Adding support for generating a SIG_GC signal when there is a garbage +collection. I have also added a function

    +
    +
    +
    +
      val signalThreshold : int -> unit
    +
    +
    +
    +

    to the SMLofNJ.Internals.GC structure that allows one to specify the +threshold for generating a signal. The default is 1, which means +that for any major collection a signal is generated. Setting the +threshold to 0 means that minor collections also generate signals, +while setting the value to something greater than 1 will filter out +collections of younger generations. Collections that happen while a +sigGC handler is running are ignored, which should not be an issue +for thresholds of 1 or greater.

    +
    +
    +

    These changes fix bugs #65 (Garbage collection does not trigger +sigGC) and #291 (Signals are not delivered for corresponding +events).

    +
    + +
    +
    +
    +
    +
    +
    [2022/06/05]
    +
    +
    +

    Fixed bug #314 (IEEEReal.setRoundingMode is a no-op on Linux).

    +
    + +
    +
    +
    +
    +
    +
    [2022/06/05]
    +
    +
    +

    Fixed bug #312 (CM.make is unable to handle filenames that +contain a backslash). We have changed the semantics of paths given +to the functions in the CM structure to be interpreted using the +native pathname syntax (instead of CM's generic syntax).

    +
    + +
    +
    +
    +
    +
    +
    [2022/06/01]
    +
    +
    +

    Fixed bug #284 (Compiler bug: Contract: UsageMap on 132). +The problem was the the CPSTrans.cpstrans function was generating +code for loading spilled parameters in reverse order. In addition +to fixing the bug, added some detailed documentation of the code.

    +
    + +
    +
    +
    +
    +
    +
    [2022/05/20]
    +
    +
    +

    Fixed bug #310 (Error when REPL tries to print value of type +Posix.FileSys.ST.stat).

    +
    + +
    +
    +
    +
    +
    +
    [2022/05/10]
    +
    +
    +

    Fixed bug #306 (Word8VectorSlice: mapping a subslice produces +wrong result or crashes SML/NJ).

    +
    + +
    +
    +
    +
    +
    +
    [2022/04/03]
    +
    +
    +

    Fix a module compilation performance bug by removing packStr and +packFct from Elaborator/modules/sigmatch.sml (reducing the size of +that file by about 25%), and removing the call of packStr +(in function constrStr) in Elaborator/elaborate/elabmod.sml, replacing +it with a call to Instantiate.instAbstr. Goodbye at last to packStr!

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.99.2; 2021/09/23

    +
    +
    +
    +
    [2021/08/18]
    +
    +
    +

    Fix a benign bug where the size of a floating-point spill record was twice +as large as necessary on 64-bit systems.

    +
    + +
    +
    +
    +
    +
    +
    [2021/08/10]
    +
    +
    +

    Split out the Real.toLargeInt implementation into target-word-size +versions (the Real64ToIntInf module). For 64-bit targets, the new +version uses the bit representation of the real number to compute the +result. The 32-bit version is the old code that uses floating-point +operations. This change fixes bug #279 (Real.toLargeInt returns +zero for anything in range [-512,512]).

    +
    + +
    +
    +
    +
    +
    +
    [2021/04/29]
    +
    +
    +

    In the translation from Absyn to PLambda, there was a function (inlops) +that was used to build the primop and type data structures for numeric +types. This function was being called for every primitive operator, +even though its results only depended on the numeric type. I added +a hash table to cache the results indexed numeric kind. This change +speeds up the compiler by about 3% (e.g., compiling the compiler +went from 58s to 55s on a MacBookPro with a 2.4GHz Intel i9 processor).

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.99.1; 2021/04/12

    +
    +
    +
    +
    [2021/04/10]
    +
    +
    +

    Changed the AMD64 frame layout to include a word to hold the +Overflow exception. This value is used by the LLVM backend +to generate the exception for checked arithmetic operations.

    +
    + +
    +
    +
    +
    +
    +
    [2021/04/10]
    +
    +
    +

    Added support for running SML/NJ on M1 Macs via the Rosetta2 +emulator. The change is to identify the arm processor as amd64 +in the config/_arch-n-opsys script. Note that while the system +basically seems to work okay under rosetta, trying to run the makeml +command after having compiled the complier caused a crash.

    +
    + +
    +
    +
    +
    +
    +
    [2021/04/10]
    +
    +
    +

    Some minor restructuring of the logic in the generic installer.

    +
    + +
    +
    +
    +
    +
    +
    [2021/03/25]
    +
    +
    +

    Fixed a bug with how FLINT numeric types were being translated to CPS +types. Specifically, types that were smaller than the default integer +size (e.g., word8) should have been marked as having a tagged +representation.

    +
    + +
    +
    +
    +
    +
    +
    [2021/02/12]
    +
    +
    +

    Fix for bug #280 (110.99 config/install.sh -64 fails on macOS 10.15.7). +I was unable to reproduce this problem, but after some investigation, it +appears that the problem was inconsistent build tools being picked up from +the user’s path. To protect against this issue, I made the paths to the +ar and ranlib tools absolute.

    +
    + +
    +
    +
    +
    +
    +
    [2021/01/12]
    +
    +
    +

    Fixed a serious performance bug in the implementation of the CharBuffer +and MonoBuffer structures. Essentially, if one did not reserve sufficient +space for the contents, it could take quadratic time to fill the buffer. +We now grow the buffer by a factor of 1.5 of its current size, with an +upper bound on the extra growth of 256K.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.99; 2020/12/24

    +
    +
    +
    +
    [2020/12/23]
    +
    +
    +

    Changed the layout of the SML stack frame on the AMD64 +architecture to make it compatible with the way that LLVM +spills registers. Essentially, this just involved swapping +the order of the swap area and the ML stuff. We took this opportunity, +however, to localize up the representation of this information in +the compiler.

    +
    + +
    +
    +
    +
    +
    +
    [2020/12/23]
    +
    +
    +

    Changed the format of the "magic string" in the header of binfiles. +The new format is "arch-version", where the architecture +name is limited to at most seven bytes and the version is limited to +at most eight bytes. The string is padded with spaces to a total +length of 16 bytes.

    +
    + +
    +
    +
    +
    +
    +
    [2020/12/22]
    +
    +
    +

    Various pretty-printer bug fixes:

    +
    +
    +
    +
    +
      +
    • +

      bug #274 (Minor pretty printing glitch when printing +structure specs)

      +
    • +
    • +

      bug #276 (Missing option to control extra newlines in REPL)

      +
    • +
    • +

      bug #277 (Excess white space when pretty printing a module +signature)

      +
    • +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    [2020/12/19]
    +
    +
    +

    Fixed bug #254 (Real.fromLargeInt produces negative results). +The problem was because the digit size for IntInf.int is only one bit +smaller than the default int and the scaling factor rbase was being +computed using the InlineT.Real64.from_int function (so rbase +ends up being negative). Thus it would return incorrect results +whenever the IntInf representation involved more than one digit. +This is a bug on both 32-bit and 64-bit systems. The fix was to +switch to using InlineT.Real64.from_int{32,64} to convert rbase +and digits to real values.

    +
    + +
    +
    +
    +
    +
    +
    [2020/12/19]
    +
    +
    +

    Fixed bug #267 (Returns an incorrect result for a calculation +on Position.int for 32-bit mode). The problem was that on 32-bit +machines, 64-bit division is implemented by the IntInf module with +the result then being converted to 64-bits. The conversion used did +not test for overflow in the result.

    +
    + +
    +
    +
    +
    +
    +
    [2020/10/13]
    +
    +
    +

    Changed the semantics of the spans returned by ml-ulex so that the +second component of a span is the position of the rightmost character +in the token (instead of the character following the token). +Specifically, the span \((p_1, p_2)\) specifies the +\(p_2 - p_1 + 1\) characters that start with the character at +position \(p_1\) and run to \(p_2\) (inclusive). +This change avoids a potential problem when the span of a token ends +at the last character in a file (when the input is spread across +multiple files).

    +
    + +
    +
    +
    +
    +
    +
    [2020/09/10]
    +
    +
    +

    Simplified the binfile representation by removing the option of +having multiple code objects. Many years ago, we would split the +code for a compilation unit into multiple independent functions +so that the garbage collector could reclaim code that was only +executed once (or was not referenced). The actual splitting of +the code in the CpsSplitFun functor (CPS/clos/cps-split.sml) +was replaced by a dummy implementation at some point, so we have +not been generating multiple code objects for some time. Therefore, +we have simplified the code generator to assume only one code +object and have changed the binfile import/export code to only +support a single code object per binfile.

    +
    +
    +

    Also made this change to the bootstrap loader (kernel/boot.c).

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.98.1; 2020/08/25

    +
    +
    +
    +
    [2020/08/25]
    +
    +
    +

    Reverted some of the pretty printing changes that were made in 110.98 +to the 110.97 version (the renaming of PrettyPrintNew to PrettyPrint +and the directory reorganizations were unchanged). These changes +fix bugs #266 (Pretty printing regression in SML/NJ 110.98), +#268 (Polymorphic Type Pretty Printing Regression), and +#271 (pretty printer regression for structure binding).

    +
    + +
    +
    +
    +
    +
    +
    [2020/08/02]
    +
    +
    +

    Fixed bug #269 (Word64.fromString causes an Overflow for +greater than 232-1). This bug was the result of constants from the +32-bit version of the code not getting updated for the 64-bit version. +Scanning of both hexadecimal and octal representations of both integers +and words were affected.

    +
    + +
    +
    +
    +
    +
    +
    [2020/07/22]
    +
    +
    +

    Added an additional lowering pass for the STREQL primop. This +primop is generated to implement pattern matching against string +literals. Previously it was unrolled in the MLRISC code generator, +but we now do the unrolling in CPS. The reason for this change +is that implementing the unrolling in the LLVM code generator +would be complicated because of the need to introduce phi nodes +in one of the branches.

    +
    +
    +

    The unrolling in CPS is somewhat different from before in that +we now bake the literal string being tested into the equality tests.

    +
    +
    +

    Also changed the representation of the primop to include the string +being tested against and removed the STRNEQ primop.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.98; 2020/07/16

    +
    + +
    +
    +
    [2020/07/10]
    +
    +
    +

    Changed the config/install.sh script so that the default size +is 64 bits for any machine that reports its machine as "x86_64."

    +
    + +
    +
    +
    +
    +
    +
    [2020/07/10]
    +
    +
    +

    Fixed bug #260 (Perform divide on Position.int crashes +with FPE on Linux). The fix required adding SIGFPE as a second +source of Overflow exceptions on Linux/amd64 machines.

    +
    + +
    +
    +
    +
    +
    +
    [2020/07/08]
    +
    +
    +

    Fixes for structure and signature pretty printing problems that were +introduced in the extensive pretty printer/pretty printing +modifications around revision 6291.

    +
    +
    +

    Files affected include ElabData/modules/ppmod.sml, +ElabData/types/pptype.sml, TopLevel/print/ppdec.sml, +and Basics/print/pputil.{sig,sml}.

    +
    +
    +

    The pretty printing for modules still seems quite fragile, so there +are likely to be more pretty printing problems to be fixed later. In +particular, pretty printing of functor and functor signature +declarations haven’t been tested.

    +
    + +
    +
    +
    +
    +
    +
    [2020/07/07]
    +
    +
    +

    Added a new lowering pass following CPS optimization, but before +closure conversion. This pass includes the previous passes for +64-bit operations on 32-bit matchines and for conversions involving +IntInf.int. It also adds lowering for div and mod to native +machine division (i.e., quot and rem) and for trapping conversions.

    +
    +
    +

    The purpose of this change is to simplify code generation in preparation +for migrating to a LLVM-based backend.

    +
    + +
    +
    +
    +
    +
    +
    [2020/07/06]
    +
    +
    +

    Fix for bug #261 (Weird "calc_strictness" message being printed). +Rewrote the function ElabUtil.calc_strictness and moved it to +TypesUtil.calcStrictness.

    +
    + +
    +
    +
    +
    +
    +
    [2020/07/02]
    +
    +
    +

    The HTMLDev structure in the pretty-printing library has been +moved into its own library (pp-extras-lib.cm) and renamed as +HTML3Dev. This change removes the dependency of pp-lib.cm +on html-lib.cm, which allowed us to remove all mention of +html-lib.cm from the compiler CM files.

    +
    + +
    +
    +
    +
    +
    +
    [2020/07/02]
    +
    +
    +

    Completed the removal of the trigonometry operators from the primops. +This change also allowed the removal of extension support from the +MLRISC code generator for the x86.

    +
    + +
    +
    +
    +
    +
    +
    [2020/07/01]
    +
    +
    +

    Disabled the use of the hardware instructions for the basic trig +functions on the x86. Doing so simplifies cross compilation +from non x86 hosts and also paves the way to removing the operations +from the compiler’s primitive operators.

    +
    + +
    +
    +
    +
    +
    +
    [2020/06/30]
    +
    +
    +

    Improved the CPS contraction phase by adding strength-reduction +optimizations to ContractPrim. These include recognizing when +multiplications and divisions by powers of 2 can be replaced by +shifts. Previously, these sorts of optimizations were provided +by MLRISC, but we plan to simplify the CPS IR prior to code +generation by replacing div and mod operations with native +machine arithmetic, which would prevent MLRISC from making the +optimizations.

    +
    + +
    +
    +
    +
    +
    +
    [2020/05/16]
    +
    +
    +

    The MLRISC instruction selector for the x86 and amd64 targets +erroneously assumed that the idiv instruction sets the OF (overflow) +condition code when dividing the largest negative number by ~1. +In fact, such a division operation traps, which is okay, because the +runtime system maps the trap to the Overflow exception. Since the +check for overflow is unnecessary, it has been removed from the files +MLRISC/amd64/mltree/amd64-gen.sml and MLRISC/x86/mltree/x86.sml.

    +
    +
    +

    Note that MLRISC's non-trapping signed division operations can +actually trap on overflow, but this was true before this change.

    +
    + +
    +
    +
    +
    +
    +
    [2020/05/16]
    +
    +
    +

    Changed the semantics of the --debug command-line option for ml-antlr. +Previously this option replaced the actions with a print expression, but that +limited its usefulness because of type errors in the generated code. The new +behavior is to preserve the existing actions and just add the printing code.

    +
    + +
    +
    +
    +
    +
    +
    [2020/04/27]
    +
    +
    +

    Added a pass to the elaborator that check for variables that are +bound, but never referenced. This check can be controlled by +the Control.Elab.unusedWarn flag. Unused top-level variables +are not reported (unless they are bound in the local part of +a local declaration).

    +
    +
    +

    The check is currently disabled because of false positives +caused by a transformation in the type checker. For example, +the following function declaration:

    +
    +
    +
    +
    fun foo n = let
    +      fun f x = g x - 1
    +      and g x = f x + 1
    +      in
    +        f n
    +      end;
    +
    +
    +
    +

    gets represented by the following Absyn:

    +
    +
    +
    +
    val foo = let
    +      val foo = (fn n => let
    +              local
    +                val tmp = let
    +                      val rec f = (fn x => Int.- (g x,1))
    +                          and g = (fn x => Int.+ (f x,1))
    +                      in (f,g)
    +                      end
    +              in
    +                val f = #1 tmp
    +                val g = #2 tmp
    +              end
    +            in f n
    +            end)
    +      in foo
    +      end
    +
    +
    +
    +

    where the instance of g bound to #2 tmp is unused. This transformation +is done by the wrapRECdec function. +The unused-variable implementation was influenced by +Jacob Van +Buren’s patch for Version 110.82.

    +
    + +
    +
    +
    +
    +
    +
    [2020/04/24]
    +
    +
    +

    The LambdaVar.lvar type is represented as an integer; since the earliest +days of the compiler this representation has been concrete, which meant +that the type system was not able to provide any guarantees that int`s and +`lvar`s were not being mixed up. As of this change, `LambdaVar.lvar is +now an abstract equality type (internally, it is still and int). +Some comments about the changes:

    +
    +
    +
    +
    +
      +
    • +

      The LambdaVar structure now includes substructures that implement +hash tables (LambdaVar.Tbl), finite maps (LambdaVar.Map), and +finite sets (LambdaVar.Set).

      +
    • +
    • +

      most of the changes involved replacing IntHashTable, IntRedBlackSet, +IntRedBlackMap with the equivalent substructures that were added + to the LambdaVar structure (e.g., IntHashTable =⇒ LambdaVar.Tbl).

      +
    • +
    • +

      there were a few place where debugging code assumed that lvars +could be printed as integers.

      +
    • +
    • +

      the pickling code requires a mechanism to convert between integers and +lvars; this is the one place where the abstraction is broken.

      +
    • +
    • +

      the worst abuse of the fact that the lvar type was int was in the +code generator, where arithmetic was used to generate a unique negative +number that could be used as a hash key, so that a given lvar could +be mapped to two different labels. I fixed this by using two tables.

      +
    • +
    +
    +
    +
    + +
    +
    [2020/04/24]
    +
    +
    +

    Fixed bug #256 (Ref.exchange incorrect). The original +source of the bug was the Basis Library sample code, which +has also been fixed.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.97; 2020/04/21

    +
    +
    +
    +
    [2020/04/21]
    +
    +
    +

    Changed the printing of tyvars; e.g., an OVLDV tyvar introduced +by an occurrence of the overloaded operator "*" that also +acquires the equality attribute will be printed as ''Z[OL(*)].

    +
    + +
    +
    +
    +
    +
    +
    [2020/04/20]
    +
    +
    +

    Eliminated AbsDec and ABSdec constructors

    +
    +
    +
    +
    +
      +
    • +

      Eliminated AbsDec constructor in Parse/ast/ast.{sig,sml} +and ABSdec constructor in ElabData/syntax/absyn.{sig,sml} +The "abstraction" declaration that these constructors +implemented was in SML/NJ 0.93, but was eliminated in favor +of opaque "sealing" signature ascription after SML '97.

      +
    • +
    • +

      Eliminated all other occurrences of these constructors +throughout the compiler (Front End, FLINT, and cm).

      +
    • +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    [2020/04/20]
    +
    +
    +

    File structure reoganization: the top-level compiler/Semant +directory was eliminated. Remaining relevant subdirectories +were Semant/pickle, which moved to ElabData, and Semant/prim, +whose two files, prim-env.sml and primop-bindings.sml moved to +the existing ElabData/prim directory. CM files compiler/core.cm +and ElabData/elabdata.cm changed accordingly.

    +
    + +
    +
    +
    +
    +
    +
    [2020/04/20]
    +
    +
    +

    Fixed bug #220

    +
    +
    +
    +
    +
      +
    • +

      Major redesign of the overloading resolution mechanisms. +Changed syntax of overload declaration (partly deferred to +110.98). Changed OVLDvar in VarCon.var, added files +overloadclasses.sml and overloadvar.sml to Elaborator/types. +Changed Types.tvKind in ElabData/types/types.{sig,sml}, +splitting OVLD tvKind into OVLDV (overloaded +variables/operators), OVLDI (overloaded int literals) and +OVLDW (overloaded word literals). Modified treatment of the +overload metavariables in Unify (Elaborator/types/unify.sml).

      +
    • +
    • +

      Changed printing of overload type metavariables (unification +type variables (Types.tyvar)).

      +
    • +
    • +

      Files: +ElabData/types/types.{sig,sml} +ElabData/types/overloadclasses.sml (new) +ElabData/types/overloadvar.sml (new) +ElabData/types/overload.sml +Elaborator/elaborate/elabcore.sml

      +
    • +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    [2020/04/09]
    +
    +
    +

    Fixed bug #214

    +
    +
    +
    +
    +
      +
    • +

      Changed printed message when a VALvar binding is shadowed to +print “<hidden>” (function ppVar in MiscUtil/print/ppdec.sml).

      +
    • +
    • +

      Minor cleanup of dontPickle function in Semant/pickle/pickmod.sml.

      +
    • +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    [2020/04/08]
    +
    +
    +

    Fixed bug #209

    +
    +
    +
    +
    +
      +
    • +

      Added function checkForbiddenCons to ElabUtil +(Elaborator/elaborate/elabutil.{sig,sml}) that checks if +a symbol is in the forbidden constructor set (it, true, +false, nil, ::, and ref).

      +
    • +
    • +

      Modified elabEXCEPTIONdec to check for forbidden exception +constructor names. Rewrote function elabEb to simplify.

      +
    • +
    • +

      Added a test for forbidden constructor names in function +elabConstr within elabDB.

      +
    • +
    • +

      Changed specs for types list and bool to datatype replication +specs in system/Basis/Implementation/{list,bool}.sig +to avoid an error caused by the occurrence of "forbidden" +constructors.

      +
    • +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    [2020/04/07]
    +
    +
    +

    Some clean up in the ml-lex/lexgen.sml code. Replaced the one-off +implementation of finite maps with the RedBlackMapFn functor from +the SML/NJ Library. Also got rid of the uses of polymorphic equality +by changing token equality tests to pattern matching.

    +
    + +
    +
    +
    +
    +
    +
    [2020/04/05]
    +
    +
    +

    Turned several functors (ElabModFn, ElabTopFn, SigMatchFn, etc. into +structures and removed the redundant functor application files (and +the directories) in Semant/elaborate and Semant/modules.

    +
    + +
    +
    +
    +
    +
    +
    [2020/04/05]
    +
    +
    +

    Fixed bugs #195 and #196

    +
    +
    +
    +
    +
      +
    • +

      #195: added missing DOdec case in function getDeclOrder in +ElabMod (Elaborator/elaborate/elabmod.sml l. ~563)

      +
    • +
    • +

      #196: modified elabDOdec in Elaborator/elaborate/elabcore.sml to +return the empty Static environment (SE.empty)

      +
    • +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    [2020/04/02]
    +
    +
    +

    Various minor changes related to the heap2exec and heap2asm +programs.

    +
    +
    +
    +
    +
      +
    • +

      Modified the config/install.sh script to remove bin/heap2exec +when the required helper bin/heap2asm is not installed.

      +
    • +
    • +

      Added -static and -dynamic as options to heap2exec +(these are the same as --linkwith-a and --linkwith-so)

      +
    • +
    • +

      Rewrote heap2asm be a bit more future-proof.

      +
    • +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    [2020/04/02]
    +
    +
    +

    Addressed bug #247 (@SMLVersion should report 64/32 bit) by +adding a new command-line option (@SMLwordsize) to the .run-sml +command script. Specifying this option will cause the the wordsize +to be printed (either 32 or 64) and then the program will exit.

    +
    + +
    +
    +
    +
    +
    +
    [2020/03/19]
    +
    +
    +

    Fix for bug #252 (Boyer Benchmark Compile Failure). This crash +was caused by a typo in the CPS/main/build-literals.sml code that +caused an incorrect opcode to be generated for SAVE/LOAD +instructions when the offset was >= 256.

    +
    + +
    +
    +
    +
    +
    +
    [2020/03/04]
    +
    +
    +

    Fixed the calculation of the maximum array/vector length for +64-bit targets. We had been using the calculation for 32-bit +targets.

    +
    + +
    +
    +
    +
    +
    +
    [2020/01/14]
    +
    +
    +

    Fix for bug #245 (Lazy data types result in Compiler Bug error). +The problem was that a number of symbols (e.g., deref) had been +dropped from the _Core structure, but were required to support +the lazy (and profiling) features in the compiler. The symbols have +been reinstated and a comment has been added to explain why they +are being included.

    +
    + +
    +
    +
    +
    +
    +
    [2019/12/23]
    +
    +
    +

    Fix for bug #244 (Compiler bug: PPObj: ppFields in ppval.sml). +The code generator was using the wrong length tag for raw64 records +on 64-bit machines (twice the length).

    +
    + +
    +
    +
    +
    +
    +
    [2019/12/21]
    +
    +
    +

    Clean up various issues in the configuration/build machinery for asdl. +This includes a fix for bug #240 (Non-default 64-bit installation +build failure)

    +
    + +
    +
    +
    +
    +
    +
    [2019/12/18]
    +
    +
    +

    Fix bug #239 (Date.toTime is incorrect (by a factor of 10E9)). +Thanks to Johannes 5 Joemann for both the report and fix.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.96; 2019/12/13

    +
    +
    +
    +
    [2019/12/12]
    +
    +
    +

    Bug fix for a problem where ^C (and other signals might be ignored). +The fix is to use word-sized fields in the VProc state vector so that +the word-sized move operations in the assembly code do not clobber +adjacent fields.

    +
    + +
    +
    +
    +
    +
    +
    [2019/12/12]
    +
    +
    +

    Fix for bug #234 (Converting NaN to a string causes an infinite +loop on 64-bit machines). The problem was in MLRISC/amd64/mltree/amd64-gen.sml, +which was not generating comparisons that work correctly when the +arguments are unordered.

    +
    + +
    +
    +
    +
    +
    +
    [2019/12/12]
    +
    +
    +

    Removed assertion checking from the amd64 runtime makefiles. It +has not turned up any errors since 110.94 was released, so we will +assume that things are working the way that they should.

    +
    + +
    +
    +
    +
    +
    +
    [2019/12/01]
    +
    +
    +

    Bugfix for bug #237 (heap2exec script fails on 110.95). +The fix was provided by Kirill Boltaev.

    +
    + +
    +
    +
    +
    +
    +
    [2019/11/23]
    +
    +
    +

    Changed the default installation size to 64 bits on macOS 10.14 +Mojave and later.

    +
    + +
    +
    +
    +
    +
    +
    [2019/11/14]
    +
    +
    +

    Fixed some code rot in the eXene sources (bug #233). With +the LargeWord module changing from Word32 to Word64, there were +a few places were things broke.

    +
    + +
    +
    +
    +
    +
    +
    [2019/11/14]
    +
    +
    +

    Added support for 64-bit executables on FreeBSD. As part of this +effort, we fixed a couple of regressions (makefile issues) for the +32-bit version on FreeBSD that were introduced when the X86.prim.asm +file was rewritten. We also switch from BSD signal handling to +POSIX signal handling, since that is what we use for most other systems.

    +
    + +
    +
    +
    +
    +
    +
    [2019/11/12]
    +
    +
    +

    Fixed config/install.sh script, which was not passing the size +option to the .link-sml, which caused confusion for the "-64" +flag. This problem was later reported as bugs #235 and +#236.

    +
    + +
    +
    +
    +
    +
    +
    [2019/11/10]
    +
    +
    +

    Many years ago, SML/NJ had a bytecode interpreter, but it was mostly +removed from the system a long time ago. There were, however, some +remnants of it in the runtime system. These have now been removed.

    +
    +
    +

    Having made this change, the distinction between the "target" and +"host" architectures is no longer necessary. Therefore, these have +been merged into a single architecture property. The effects of this +merge are as follows:

    +
    +
    +
    +
    +
      +
    • +

      the TARGET_xxx and HOST_xxx C-preprocessor symbols have been +replaced with a single ARCH_xxx symbol in the runtime system.

      +
    • +
    • +

      The SMLofNJ.SysInfo structure now provides getArchName and +getArchSize functions.

      +
    • +
    • +

      The following SMLofNJ.SysInfo functions are deprecated and will +be removed in 110.97: getHostSize, getHostArch, and +getTargetArch.

      +
    • +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.95; 2019/11/09

    +
    +
    +
    +
    [2019/11/09]
    +
    +
    +

    Fix for bug #230 (New literals-lifting code does not handle +pair of reals).

    +
    + +
    +
    +
    +
    +
    +
    [2019/11/09]
    +
    +
    +

    Simplified the runtime-system build rules for Cygwin.

    +
    + +
    +
    +
    +
    +
    +
    [2019/11/08]
    +
    +
    +

    Created the script config/prepare-win-install.sh, which implements +the fetching and unbundling of source and bin files in preparation +for a Windows installation.

    +
    + +
    +
    +
    +
    +
    +
    [2019/11/07]
    +
    +
    +

    Fix for bug #229 (Real.fromString errors). This bug was +actually two unrelated issues. The problem that Real.toString +returns Real.posInf for 0.0e123213213123213123123 has been +fixed in the RealScan module (system/basis/Implementation/real-scan.sml). +The second bug was a regression introduced in 110.93, where the +SIGFPE signal was specified as the result of the into instruction, +whereas Linux actually signals SIGSEGV for into. Note that +there may be a related issue of BSD systems, where SIGBUS might +be the signal, but we need access to a test machine to verify.

    +
    + +
    +
    +
    +
    +
    +
    [2019/11/07]
    +
    +
    +

    Fix for bug #230 (segmentation fault when compiling MLton sources +with SML/NJ 64-bit). The problem was that when a large vector was +being created, the assembly code did not correctly restore the stack +state before trying to call the runtime system to do the allocation.

    +
    + +
    +
    +
    +
    +
    +
    [2019/11/04]
    +
    +
    +

    The runtime now uses MAP_ANON for allocating memory on 64-bit Linux. +This change fixes a problem with versions of Linux that do not allow +access to /dev/zero (such as on ChromeBooks).

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.94; 2019/10/31

    +
    +
    +
    +
    [2019/10/23]
    +
    +
    +

    Modified the generic installed (base/system/smlnj/installer/generic-install.sml) +to support conditional targets. You can now write tests like

    +
    +
    +
    +
    if UNIX
    +  unix-utils
    +endif
    +
    +
    +
    +

    The symbols that can currently be tested for are SIZE_32, SIZE_64, +UNIX, and WINDOWS. See the config/targets file for more details.

    +
    + +
    +
    +
    +
    +
    +
    [2019/10/23]
    +
    +
    +

    Fixed bug #227 (CPS contraction is taking an excessive amount +of time on word8 basis test).

    +
    + +
    +
    +
    +
    +
    +
    [2019/10/22]
    +
    +
    +

    Modified the CPS contraction phase to optimize the case where a +numeric conversion is applied to a constant value.

    +
    + +
    +
    +
    +
    +
    +
    [2019/10/21]
    +
    +
    +

    Modified the Unix installer script (base/system/smlnj/installer/nix-install.sml) +to pass a size argument to the configuration script. This argument is used +by the ASDL configuration.

    +
    + +
    +
    +
    +
    +
    +
    [2019/10/21]
    +
    +
    +

    Overhauled the installation script (config/install.sh) and various +script templates (e.g., config/_run-sml) to allow setting the +default size. The config/install.sh script now supports the following +arguments:

    +
    +
    +
    +
    +
    +
    -default size
    +
    +

    specify the default size for the sml and other commands, where +size is either 32 or 64.

    +
    +
    -32
    +
    +

    install the 32-bit version of the system.

    +
    +
    -64
    +
    +

    install the 64-bit version of the system.

    +
    +
    +
    +
    +
    +
    +

    It is possible to install both versions in the same location by running +the install.sh script twice. For example, the commands

    +
    +
    +
    +
    % config/install.sh -32
    +% config/install.sh -default 64
    +
    +
    +
    +

    will install both versions with the 64-bit version as default. One +would then use the command sml -32 to run the 32-bit version of +the system. Note that the default version must be installed second.

    +
    + +
    +
    +
    +
    +
    +
    [2019/10/21]
    +
    +
    +

    Added support for the -64 flag to the fixpt script in base/system.

    +
    + +
    +
    +
    +
    +
    +
    [2019/10/17]
    +
    +
    +

    Added support for the -64 flag to the cmb-make script in base/system.

    +
    + +
    +
    +
    +
    +
    +
    [2019/10/17]
    +
    +
    +

    Renamed the REAL representation constructor to Raw64, which matches +what is going on in the runtime system. Also renamed the toReal function +to toReal64.

    +
    + +
    +
    +
    +
    +
    +
    [2019/10/15]
    +
    +
    +

    Updated the SMLofNJ.SysInfo structure by removing constructors from the +oskind datatype that correspond to obsolete systems. Also added a +getHostSize function that returns the host architecture’s native word +size in bits (e.g., 32 or 64).

    +
    + +
    +
    +
    +
    +
    +
    [2019/10/13]
    +
    +
    +

    Added the -64 flag to the testml script in base/system and +to the .run-sml script. Thus, one will be able to specify the +32-bit version of SML/NJ using the command sml -32 and the +64-bit version using the command sml -64. Currently, 32-bits +is the default, since the 64-bit system is unstable.

    +
    + +
    +
    +
    +
    +
    +
    [2019/10/13]
    +
    +
    +

    Removed obsolete operating systems from the SMLofNJ.SysInfo.os_kind +datatype. This change reduces the type to two constructors: UNIX +and WIN32. Also added a function getHostSize to the SysInfo +structure, which returns the host word size in bits (i.e., either +32 or 64). The word size is now reported in the compiler’s +banner message at startup.

    +
    + +
    +
    +
    +
    +
    +
    [2019/10/12]
    +
    +
    +

    Fixed bug #130 (failure to raise Bind exception). Added +a function refutable to ElabData/types/typesutil.{sig,sml} and +used it to limit type generalization of val bindings in +Elaborator/types/typecheck.sml. +The fix does not deal properly with refutability of OR patterns, but +OR patterns in val bindings is a dubious feature.

    +
    +
    +

    This change also fixes bug #188 (Missing warning for +nonexhaustive valbind patterns), bug #190 (Unexpected exception +in SML/NJ with invalid list pattern match), and #199 (Compiler +bug in pretty printing of result).

    +
    + +
    +
    +
    +
    +
    +
    [2019/10/04]
    +
    +
    +

    Modified the cmb-make script to support passing compiler control +flags to the build command. The flags should be specified after the +path to sml command (if it is given).

    +
    + +
    +
    +
    +
    +
    +
    [2019/10/04]
    +
    +
    +

    Finished the implementation of the new literal bytecode engine. +There is a control flag (Control.CG.newLiterals that allows +switching between the old and new bytecodes).

    +
    + +
    +
    +
    +
    +
    +
    [2019/10/04]
    +
    +
    +

    Fix for bug #225 (Math.ln giving erroneous answers on Windows). +The problem was an inconsistency in the way the Unix and Microsoft +assemblers interpreted the addressing mode for the FLD instruction.

    +
    + +
    +
    +
    +
    +
    +
    [2019/10/03]
    +
    +
    +

    Clean up in the Basis Posix library code (both SML and runtime) to +be consistent about when the SysWord.word type is being used to +communicate information between SML code and the runtime system.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.93; 2019/09/05

    +
    +
    +
    +
    [2019/09/04]
    +
    +
    +

    Add support for specifying a 32 or 64-bit target as command-line option +to the .arch-n-opsys and .link-sml scripts. The default size is +currently 32-bits, but that will change once 64-bit support is solid.

    +
    + +
    +
    +
    +
    +
    +
    [2019/09/03]
    +
    +
    +

    Generalize code generation for conversions involving tagged integers/words, +where the size is not the default integer size. This situation only +occurred for Word8.word values on 32-bit targets, but also occurs for +32-bit values on 64-bit targets.

    +
    + +
    +
    +
    +
    +
    +
    [2019/09/02]
    +
    +
    +

    Rewrote the expansion of the INLLSHIFT, INLRSHIFTL, and INLRSHIFT +primops (compiler/FLINT/trans/transprim.sml). The expansion process +now correctly handles shift operations on types that are smaller than +the default tagged-integer size. This change also allows the Word8 +shift operations to be inlined.

    +
    + +
    +
    +
    +
    +
    +
    [2019/09/02]
    +
    +
    +

    Fixed a bug in the constant folding of arithmetic-right-shift operations. +The sign was not getting extended for words when the most-significant-bit +was set.

    +
    + +
    +
    +
    +
    +
    +
    [2019/08/25]
    +
    +
    +

    Fixed a bug in the Real.toManExp function (the exponent was off +by one, which meant that the mantissa was two times its expected +value). This fix also fixes a problem in Real.toLargeInt, where +the function would go into an infinite loop in some cases.

    +
    +
    + + + + + +
    +
    UPDATE
    +
    +
    2022/06/29
    +
    +

    This change probably also fixed bug #208 (Real.toManExp +produces incorrect results in some cases).

    +
    +
    +
    + +
    +
    +
    +
    +
    +
    [2019/08/24]
    +
    +
    +

    Fixed bug #173 (OS.Process.sleep only works with whole numbers). +For systems that have finer-grain sleep function, such as the +nanosleep(2) system call, the OS.Process.sleep and Posix.Process.sleep +functions now support sub-second granularity.

    +
    + +
    +
    +
    +
    +
    +
    [2019/08/18]
    +
    +
    +

    Restructured the CPS contraction phase to make the fusion of +integer/word conversions more uniform. Also fixed a bug +where Int32.fromLarge(Word32.toLargeInt 0wxffffffff) would +return ~1 instead of raising Overflow. The problem was +that TEST(m,n) o COPY(n,p) was getting fused to COPY(m,p) +when m = p, instead of TRUNC(m,p).

    +
    + +
    +
    +
    +
    +
    +
    [2019/08/14]
    +
    +
    +

    Int64 comparisons were not always correct, which lead to some positive +values being printed as negative numbers (basically when the sign +bit of the lower word was set).

    +
    + +
    +
    +
    +
    +
    +
    [2019/08/13]
    +
    +
    +

    Added Unsafe.IntInf structure, which provides access to the +internal representation of the IntInf.int type. Note that +this representation may change in the future.

    +
    + +
    +
    +
    +
    +
    +
    [2019/08/12]
    +
    +
    +

    Fixed bug #223 (Incremental Build fails on Windows). There was +a missing CloseHandle() when getting a file’s timestamp.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.92; 2019/08/10

    +
    +
    +
    +
    [2019/08/08]
    +
    +
    +

    Changed base/system/allcross script to use cmb-cross script. +Also modified the cmb-cross script to build compressed tar +files, when given the -z option, and to clean up intermediate +files.

    +
    + +
    +
    +
    +
    +
    +
    [2019/07/16]
    +
    +
    +

    Restructured the amd64 machine-code generation implementation and +filled in many of the missing encodings. It should be complete for +SML/NJ code generation, but needs more work to support the full set +of operations described in the amd64.mdl file.

    +
    + +
    +
    +
    +
    +
    +
    [2019/07/16]
    +
    +
    +

    Some cleanup in the x86 MLRISC backend. Removed the MULB, +MULW, and MULL unsigned-multiplication instructions, since they +are not binary operations. The MULL instruction is covered by +the MULL1 constructor in the multDivOp datatype. The same change +was applied to the amd64 backend.

    +
    + +
    +
    +
    +
    +
    +
    [2019/07/15]
    +
    +
    +

    Many changes to the amd64 machine description:

    +
    +
    +
    +
    +
      +
    • +

      Removed the PUSHB, PUSHW, and PUSHL instructions, since the matching +POP operations are not supported.

      +
    • +
    • +

      Removed the CALLQ operation, since it is the same as CALL.

      +
    • +
    • +

      Removed the CLTD and CQTO operations, since those names are just +synonyms for CDQ and CDO.

      +
    • +
    • +

      Replaced the INTO operation (which is not valid in 64-bit mode) with +INT of byte.

      +
    • +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    [2019/07/13]
    +
    +
    +

    New script for cross compiling to other architectures; the script +is still called cmb-make, but now supports target-specific +dependencies in the front-end (i.e., representation of numeric +types and endianess). The cross compilation scheme was developed +by Matthias Blume and then encoded in a script.

    +
    + +
    +
    +
    +
    +
    +
    [2019/07/11]
    +
    +
    +

    The runtime system now builds for the amd64 architecture. Most of +the changes relate to the difference between the flat BIBOP on 32-bit +platforms and the two-level BIBOP on 64-bit platforms.

    +
    + +
    +
    +
    +
    +
    +
    [2019/07/09]
    +
    +
    +

    Fix bug #224 (Word64.fromLargeInt fails). The problem was +an incorrect record kind in CPS/opt/infcnv.sml (it was RK_RECORD +instead of RK_RAWBLOCK).

    +
    + +
    +
    +
    +
    +
    +
    [2019/07/08]
    +
    +
    +

    Changed the rep datatype constructor Word32 to Raw (which +covers both 32 and 64-bit numbers on 32-bit platforms). We now +check the length of the raw object when converting to an +concrete numeric type.

    +
    + +
    +
    +
    +
    +
    +
    [2019/06/28]
    +
    +
    +

    Removed the use of runtime-type passing for polymorphic arrays. +The effect of this change is that code that uses the Array.array +type will be faster when the element type is not real (e.g., +sorting an Int32.int array was 1.2 times faster), but +slower when the type is real. Use the monomorphic type +RealArray.array for best performance on arrays of reals.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.91; 2019/06/20

    +
    +
    +
    +
    [2019/06/20]
    +
    +
    +

    We added a new primop, REAL_TO_BITS that casts a floating-point value to the +same-size word value. This primop allows the Assembly.logb function to be +implemented in SML.

    +
    +
    +

    We have also refactored the implementation of the Math structure to share +common code across the versions that are specialized for different levels of +hardware support.

    +
    + +
    +
    +
    +
    +
    +
    [2019/06/19]
    +
    +
    +

    Rewrote the assembly code for the x86 and AMD64 targets. Previously, there +were separate source files for Unix and Windows; these have been replaced +by a single common file (one for each architecture). The assyntax.h file +has also been replaced by x86-syntax.h, which covers both the x86 +and AMD64 on both UNIX and Windows.

    +
    +
    +

    The AMD64.prim.asm file now compiles, although there are a few minor +issues that will have to be fixed once we have a working code generator. +We have also fixed a number of issues in the garbage collector related +to the use of the 2-level BIBOP on 64-bit targets.

    +
    + +
    +
    +
    +
    +
    +
    [2019/06/18]
    +
    +
    +

    Some cleanup in the interval-timer code. In keeping with the other +time-specific functions, I have switched the runtime-system API to +use unsigned 64-bit nanoseconds to specify time values. I have also +added an implementation for c-libs/smlnj-runtime/itick.c, which +was missing. Lastly, moved the Windows-specific file win32-timers.c +from runtime/kernel to runtime/mach-dep.

    +
    + +
    +
    +
    +
    +
    +
    [2019/06/16]
    +
    +
    +

    Added 64-bit implementations of the target-specific Basis Library +modules in directory Basis/Implementation/Target64Bit.

    +
    + +
    +
    +
    +
    +
    +
    [2019/06/16]
    +
    +
    +

    Added PackWord64Big and PackWord64Little structures to Basis Library. +Note that the implementation of these is target-specific.

    +
    + +
    +
    +
    +
    +
    +
    [2019/06/16]
    +
    +
    +

    Added bigEndian flag to the TARGET signature.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.90; 2019/06/12

    +
    +
    +
    +
    [2019/06/12]
    +
    +
    +

    Fixed the Concurrent ML library to use 64-bit positions (both Unix +and Windows) versions.

    +
    + +
    +
    +
    +
    +
    +
    [2019/06/11]
    +
    +
    +

    Moved the year offset from SML to the runtime system. This change is +necessary because Windows uses 1601 as year 0, whereas UNIX uses 1900. +We have also switched to using unsigned 64-bit times in nanoseconds as +the interface between the Basis code and runtime system. This change +is consistent with the other places where time values are communicated +between the runtime and SML code.

    +
    + +
    +
    +
    +
    +
    +
    [2019/06/07]
    +
    +
    +

    Fixed a problem with CM’s symbol filtering (see bug #222).

    +
    +
    +

    The problem could manifest itself when a library l2.cm imported two +symbols A and B from l1.cm and then exported the same A but a +different B (which could have been defined in terms of the imported +B). Moreover, for the problem to occur both A and B within +l1.cm must have come from the same SML source file.

    +
    +
    +

    With the above setup, when running

    +
    +
    +
    +
    +
    +
    +
    CM.make "l2.cm";
    +
    +
    +
    +

    it was possible that instead of seeing the new A defined within +l2.cm one would still see the original version that came from +l1.cm.

    +
    + +
    +
    +
    [2019/06/04]
    +
    +
    +

    Various 64-bit porting changes to the Windows implementation of +the Basis Library and runtime system:

    +
    +
    +
    +
    +
    +
    +
    +
      +
    • +

      Add a target-specific Handle structure to support the HANDLE type, +which is a pointer-sized word value.

      +
    • +
    • +

      Changes to support the use of 64-bit file positions.

      +
    • +
    • +

      Replaced pairs of argumnents representing time values (seconds and +microseconds) with a single 64-bit count of microseconds.

      +
    • +
    +
    +
    +
    +
    +

    + +John Reppy

    +
    +
    +
    +
    [2019/06/04]
    +
    +
    +

    Implemented +Basis Library proposal 2019-001 (Correction to the PRIM_IO signature). +This proposal changes the return type of the avail function in a reader to +be Position.int option, which is necessary to support large files.

    +
    + +
    +
    +
    +
    +
    +
    [2019/06/03]
    +
    +
    +

    Added primop support (PTR_TO_WORD and WORD_TO_PTR) for the +c_pointer type that was added in 110.89. These primops are +exposed in the new InlineT.Pointer structure. We define a +PointerImp structure that is used inside the Basis implementation +and a Unsafe.Pointer structure that is visible to users.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.89; 2019/06/01

    +
    +
    +
    +
    [2019/06/01]
    +
    +
    +

    Switched the Position structure to be bound to Int64 and updated +the runtime system to use 64-bit integers for file offsets and +time values (in nanoseconds). This change fixes bugs #33 +(Overflow exception with inputLine function) and #36 (Can’t +open very large file).

    +
    + +
    +
    +
    +
    +
    +
    [2019/06/01]
    +
    +
    +

    Added abstract c_pointer type to the primitive types. This type +will be used to represent runtime-system pointers (e.g., the +HANDLE values in the Windows implementation).

    +
    + +
    +
    +
    +
    +
    +
    [2019/05/31]
    +
    +
    +

    Removed makefiles and code for architectures and operating systems +that are no longer supported (e.g., the DEC Alpha and HPPA +architectures).

    +
    + +
    +
    +
    +
    +
    +
    [2019/05/31]
    +
    +
    +

    Switched the FixedInt and LargeWord structure aliases to be 64-bits +(i.e., FixedInt is now bound to Int64 and LargeWord is bound +to Word64).

    +
    + +
    +
    +
    +
    +
    +
    [2019/05/30]
    +
    +
    +

    We are now assuming that we have at least C99 support (for +practical purposes, this assumption is even true on Windows). +With this assumption, the allocation of small objects in the +runtime has been switched from macros to inline functions +(see runtime/include/ml-objects.h). This change allows a +graceful handling of 32-bit integers, which are heap allocated +on 32-bit machines, but tagged on 64-bit machines.

    +
    + +
    +
    +
    +
    +
    +
    [2019/05/29]
    +
    +
    +

    Fixed various bugs in the implementation of the Word64 operations. +The addition and subtraction operators were using arithmetic right +shifts, instead of logical right shifts. Also, the translation +of 64-bit shift operations was incorrect because of a typo in +the variable names.

    +
    + +
    +
    +
    +
    +
    +
    [2019/05/27]
    +
    +
    +

    Created a simplified version of the MLRiscGen functor. This version +of the functor, which is in the file CodeGen/main/mlrisc-gen-fn.sml +does not include the memory disambiguation and GC types code. Since +the old version (CodeGen/main/mlriscGen.sml) did not use these +features by default, there should be no difference in the quality of +the generated code.

    +
    +
    +

    The purpose of this change is to remove unused code that has 32-bit +dependencies.

    +
    + +
    +
    +
    +
    +
    +
    [2019/05/25]
    +
    +
    +

    Added contraction for unsigned REM and NEG operations in +CPS/opt/contract-prim.sml.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.88; 2019/05/15

    +
    +
    +
    +
    [2019/05/15]
    +
    +
    +

    Moved the compiler/DEVNOTES directory to the dev-notes tree +and renamed it old-compiler-notes.

    +
    + +
    +
    +
    +
    +
    +
    [2019/05/15]
    +
    +
    +

    Added 64-bit versions of NumFormat and NumScan. We use the 32-bit +version for numbers of 32-bits or less and the 64-bit versions for +numbers with up to 64 bits. Thus, on 32-bit machines, the default +int and word types use NumFormat32 and NumScan32, while on +64-bit machines they use NumFormat64 and NumScan64. This change +also required splitting out some common code into a ScanUtil +structure and also splitting out the scanning of real numbers into +the ScanReal structure (formatting of reals was already in its own +structure).

    +
    + +
    +
    +
    +
    +
    +
    [2019/05/15]
    +
    +
    +

    Reimplemented the 64-bit int and word types to put them on a +(mostly) equal footing with the other precisions. In this new implementation, +the basic types int64 and word64 are now PRIMITIVE (instead +of being ABSTRACT type represented by pairs of boxed 32-bit words). +Arithmetic and comparison operations on these types are represented as +primops and are preserved as such up to just before closure conversion. +At that point, the new Num64Cnv structure (compiler/CPS/opt/numcnv.sml) +is used to expand 64-bit operations and constants into 32-bit operations. +Most of the 64-bit primops are inline expanded, but multiplication and +division operations are converted to calls to library code from the +CoreInt64 and CoreWord64 modules (system/smlnj/init).

    +
    +
    +

    Because the type are primitive, we were able to change the runtime +representation to use packed records (RK_RAWBLOCK) to represent +them, which saves space and should also help with performance.

    +
    +
    +

    See the dev-notes/num64.md file for more details about the +implementation.

    +
    + +
    +
    +
    +
    +
    +
    [2019/05/09]
    +
    +
    +

    Reorganized the Basis Library source files (system/Basis) to isolate +dependences on target word size.

    +
    +
    +

    In the Basis/Implementation directory, I created subdirectories +(e.g., Target32Bit) to hold implementations that are specific +to the target. These directories include a bind-structs.sml file +that replaces the many bind-*.sml files in Basis/Implementation.

    +
    +
    +

    In the Basis/Exports directory, I replaced the many individual +files (each with a single module renaming) with bind-common.sml +(for target-independent bindings) and a target-specific file +(either bind-target-32-bit.sml or bind-target-64-bit.sml).

    +
    + +
    +
    +
    +
    +
    +
    [2019/05/05]
    +
    +
    +

    Some of the CPS optimization modules (Expand and EtaSplit were +written as functors over the machine spec, when, in fact, they never +reference their functor argument. Therefore, they have been converted +to structures.

    +
    + +
    +
    +
    +
    +
    +
    [2019/05/04]
    +
    +
    +

    We now use the InlineT.identity primop for Fn.id, so the compiler +can optimize it.

    +
    + +
    +
    +
    +
    +
    +
    [2019/05/03]
    +
    +
    +

    Fixed pretty-printing regression in 110.87; value of char type +were missing their enclosing quotes.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.87; 2019/05/03

    +
    +
    +
    +
    [2019/05/03]
    +
    +
    +

    Made the Char.chr operator inline (a primop was added to support +this change in 110.86).

    +
    + +
    +
    +
    +
    +
    +
    [2019/05/03]
    +
    +
    +

    Major renaming of the primitive operators in the Inline structure +(as described in dev-notes/primop-list.md). Also cleaned up the +Basis Library implementation to remove most (but not all) +32-bit dependencies.

    +
    + +
    +
    +
    +
    +
    +
    [2019/05/03]
    +
    +
    +

    Added cases to the top-level pretty printer to handle the new basic +types that were added in 110.86 (e.g., word8vector and chararray). +Also changed the way that primitive types are handled to use a +table keyed by tycons, instead of a sequence of nested conditionals.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.86; 2019/05/02

    +
    +
    +
    +
    [2019/05/01]
    +
    +
    +

    Added word8vector and chararray to the primitive types that +the compiler knows about. These will be used in the rewriting +of the InlineT structure.

    +
    + +
    +
    +
    +
    +
    +
    [2019/05/01]
    +
    +
    +

    Replaced the Primop.primop constructors NUMSUBSCRIPT and NUMUPDATE +with

    +
    +
    +

    `sml +| NUMSUBSCRIPT of numkind +| NUMSUBSCRIPTV of numkind +| NUMUPDATE of numkind +| INLNUMSUBSCRIPT of numkind +| INLNUMSUBSCRIPTV of numkind +| INLNUMUPDATE of numkind +`

    +
    +
    +

    This design matches the naming conventions for polymorphic subscripting +and updating.

    +
    + +
    +
    +
    +
    +
    +
    [2019/05/01]
    +
    +
    +

    Added Primop.INLCHR to implement Char.chr as an inline function. +This change also required moving the definition of the Chr +exception to the Core module so that it is accessible to the +translate phase. The inline version of Char.chr will be enabled +in the 110.87 release (we need the internal primop before we can +use it).

    +
    + +
    +
    +
    +
    +
    +
    [2019/05/01]
    +
    +
    +

    Major overhaul of the representation of primitive operators (both in +the Primop and CPS.P structures). The primitive arithmetic and +comparison operations are now defined in the ArithOps structure +(ElabData/prim/arithops.sml). There are three datatypes defined +in this module

    +
    +
    +
    +
    +
      +
    • +

      arithop — integer arithmetic operations that may raise overflow

      +
    • +
    • +

      pureop — arithmetic operations that are pure

      +
    • +
    • +

      cmpop — comparison operations

      +
    • +
    +
    +
    +
    +
    +

    These types are used in both the Primop and CPS.P modules, +which makes the translation between representations more direct.

    +
    +
    +

    Some details:

    +
    +
    +
    +
    +
      +
    • +

      inline division and modulo operations were added to the +Primop.primop datatype; the expansion of these in +the TransPrim module (FLINT/trans/transprim.sml) +adds explicit checks for division by zero.

      +
    • +
    • +

      the FSGN operator was added to the Primop.primop datatype, +since the new cmpop datatype does not include it (the +CPS IR already had FSGN as a separate branch constructor).

      +
    • +
    • +

      unsigned comparison operations are now represented by using +the UINT numkind, which is consistent with how they +are represented in CPS.

      +
    • +
    • +

      Renamed the primop ROUND to REAL_TO_ROUND.

      +
    • +
    • +

      the encodings for operators were revised in the pickler, resulting +in a more compact use of the numeric codes.

      +
    • +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    [2019/04/28]
    +
    +
    +

    Removed unused record kind constructors (RK_SPILL, RK_EXN, and +RK_BLOCK) from CPS.record_kind datatype. Also renamed RK_I32BLOCK +to RK_RAWBLOCK and RK_FBLOCK to RK_RAW64BLOCK. Various other +renamings to remove 32-bit assumptions.

    +
    + +
    +
    +
    +
    +
    +
    [2019/04/28]
    +
    +
    +

    Renamed DTAG_raw32 to DTAG_raw, since the semantics on 64-bit systems +will be to require word-size aligned raw data. Also renamed ML_AllocRaw32 +to ML_AllocRaw and ML_ShrinkRaw32 to ML_ShrinkRaw for similar reasons.

    +
    + +
    +
    +
    +
    +
    +
    [2019/04/28]
    +
    +
    +

    Removed unused flags from the Control structure; most of these came +from Control.CG, where roughly 20 out of 60 flags were no longer used.

    +
    + +
    +
    +
    +
    +
    +
    [2019/04/27]
    +
    +
    +

    Split the contraction of primitive operators out of the Contract +structure into its own ContractPrim structure.

    +
    + +
    +
    +
    +
    +
    +
    [2019/04/27]
    +
    +
    +

    Split the translation of primops to PLambda out into its own file +(compiler/FLINT/trans/transprim.sml).

    +
    + +
    +
    +
    +
    +
    +
    [2019/04/27]
    +
    +
    +

    Fixed regression: Word32.toInt 0wx8002DE32; would return 187954 instead +of raising Overflow. The problem was a mistake in the way that the overflow +trap was being generated in MLRiscGen.

    +
    + +
    +
    +
    +
    +
    +
    [2019/04/26]
    +
    +
    +

    Some minor primop cleanup.

    +
    +
    +
    +
    +
      +
    • +

      Changed the types of Primop.ROUND and Primop.REAL +to take bitwidths, instead of numkinds, since the kinds are always the same. +Also, the fields are now called from and to (instead of fromkind and +tokind) to be consistent with other conversion primops.

      +
    • +
    • +

      Renamed ABS to FABS, since it is only used on floating-point numbers.

      +
    • +
    • +

      Renamed the CPS primitive operator ROUND to REAL_TO_INT and the operator +REAL to INT_TO_REAL.

      +
    • +
    • +

      Renamed the Primop.REAL to Primop.INT_TO_REAL so that it is not confused +with the other constructors named REAL.

      +
    • +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    [2019/04/23]
    +
    +
    +

    Improvements to the core 64-bit int and word modules in system/smlnj/init. +Replaced Int64.`, `Int64.-`, `Word64., and Word64. with versions +from *Hacker’s Delight that use fewer conditional branches. Also +replaced the relational operators (<, , etc) with more direct +implementations.

    +
    + +
    +
    +
    +
    +
    +
    [2019/04/21]
    +
    +
    +

    Fix for bug #213 (Int32.div raises Div instead of Overflow +when dividing minInt by ~1). Since the compiler generates an +explicit test for division by zero, we know that the only arithmetic +traps must be caused by other operations. Therefore, we can just +map any arithmetic trap to Overflow.

    +
    +
    +

    Also removed the old SPARC assembly code for multiplication and +division. The code generator always uses the native hardware +instructions, so the assembly code is not needed.

    +
    + +
    +
    +
    +
    +
    +
    [2019/04/21]
    +
    +
    +

    Yet another attempt to get the implementation of use in the REPL +working in a sensible way.

    +
    +
    +

    With these changes, use should behave as follows. +If an invocation of use encounters +a compilation error (either in the initial file or in a nested +invocation of use), then the compiler error message will be +printed and the call to use will immediately return (). +If an invocation of use raises an exception during execution +of the compiled code (either in the initial file or in a nested +invocation of use), then the exception will be reported at +the top-level. Any change to the global state or environment +that occurs before an error is encountered, will not be rolled +back.

    +
    +
    +

    Files specified as command-line arguments to the sml command +will be treated as if use was invoked on them. If there is an +error, then the error will be reported and the sml command +will terminate with a non-zero exit status (at least on Unix).

    +
    +
    +

    This change fixes bugs #193, #217, and #219. +There is a connection between this change and #183, which +was fixed in Version 110.82.

    +
    + +
    +
    +
    +
    +
    +
    [2019/04/21]
    +
    +
    +

    Change to the CPS primops: moved the F_SGN operator (which is unary) +from the fcmp datatype to the branch datatype (and renamed it FSGN).

    +
    + +
    +
    +
    +
    +
    +
    [2019/04/21]
    +
    +
    +

    Finished conversion of the CPS IR to a form that is compatible with +ASDL. Basically, this involved converting the datatype constructor +names to upper-case identifiers.

    +
    +
    +

    These changes are a step in the plan to eventually switch to an LLVM-based +code generator that will be given pickled CPS code as its input.

    +
    + +
    +
    +
    +
    +
    +
    [2019/04/08]
    +
    +
    +

    Starting to migrate the CPS IR toward the ASDL version. Changed the +names of the CPS.P.arith and CPS.P.cmpop constructors to be upper-case +alpha IDs (many of them were symbolic identifiers). Also split out the +various utility functions into the new CPSUtil module (CPS/cps/cps-util.sml). +Lastly, moved the literals.sml file from FLINT/main to CPS/main (where +it belongs).

    +
    +
    +

    Note that the CPS.P.arithop datatype is now identical to the Primop.arithop +datatype

    +
    + +
    +
    +
    +
    +
    +
    [2019/04/07]
    +
    +
    +

    Reorganized the backend of the compiler by moving the CPS-related +code into its own directory tree (Compiler/CPS) and replacing the +FLINTComp functor with the FLINTOpt structure and the CPSCompFn +functor. The conversion from FLINT to CPS is part of the CPSCompFn +functor, which takes the program representation all the way from +FLINT to machine code segments.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.85; 2018/12/21

    +
    +
    +
    +
    [2018/12/21]
    +
    +
    +

    Modified config/install.sh to look for a pre-Mojave SDK when trying +to install on macOS 10.14 Mojave.

    +
    + +
    +
    +
    +
    +
    +
    [2018/12/21]
    +
    +
    +

    Updated runtime/objs/cygwin.def so that the runtime system will build +on 32-bit Cygwin. Also updated installation script to suggest using +the 32-bit version of Cygwin when a user tries to install it on Cygwin64.

    +
    + +
    +
    +
    +
    +
    +
    [2018/11/10]
    +
    +
    +

    Xcode 10.1, which is Apple’s development environment for macOS 10.14 +Mojave, does not include the libraries needed to build 32-bit executables, +such as the SML/NJ runtime, although 32-bit programs will still run.

    +
    +
    +

    To support building on Mojave, I added a new Makefile (mk.x86-darwin18) +for the runtime system and modified the config/install.sh +script to use this makefile when necessary. This new makefile expects +that the MacOSX10.13.sdk directory from Xcode 9 has been +copied into the Xcode 10 SDKs directory. Note that updating Xcode +from the AppStore will likely remove the 10.13 SDK, so you should keep +a copy in a safe place.

    +
    +
    +

    The Xcode SDKs live in Platforms/MacOSX.platform/Developer/SDKs +under the Developer directory. One can determine the path to the +current developer directory using the command

    +
    +
    +
    +
    % xcode-select -p
    +
    +
    + +
    +
    +
    +
    +
    +
    [2018/10/10]
    +
    +
    +

    Removed several unsupported primitive operators from the compiler. +In the CPS IR, these were free, acclink, setpseudo, setmark, and +getpseudo. The pseudo-register operations were not supported in the +code generator, while the others were no-ops. The corresponding +operators GETPSEUDO, SETPSEUDO, SETMARK, and DISPOSE were removed +from ElabData/prim/primop.sml and their bindings were removed from +Semant/prim/primop-bindings.sml and the InlineT and Unsafe +structures.

    +
    +
    +

    The AllocProf module in the compiler was also disabled, since it +relied on the pseudo registers for recording profile information at +runtime. Furthermore, uses of the acclink primitive operation in +FLINT/cps/closure.sml when static profiling is enabled were removed.

    +
    +
    +

    These changes were committed as revision 4886.

    +
    + +
    +
    +
    +
    +
    +
    [2018/10/08]
    +
    +
    +

    Fix for bug #216 (run-time system fatal error with large top-level +value). The problem was in the code for building literals.

    +
    + +
    +
    +
    +
    +
    +
    [2018/09/23]
    +
    +
    +

    Change CPS operators for wrapping/unwrapping integer and float values to +be word-size flexible. We now use a single wrap (and unwrap) operator +that is parameterized by a numkind value. We also changed the wrap/unwrap +operators to box/unbox. The mapping from old operators to new ones +is as follows:

    +
    + +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    wrap

    box

    unwrap

    unbox

    iwrap

    wrap(INT defaultIntSz)

    iunwrap

    unwrap(INT defaultIntSz)

    i32wrap

    wrap(INT 32)

    i32unwrap

    unwrap(INT 32)

    fwrap

    wrap(FLOAT 64)

    funwrap

    unwrap(FLOAT 64)

    + +
    +
    +
    +
    +
    +
    [2018/09/13]
    +
    +
    +

    Further cleanup for 64BIT in function atomeq in PEqual. +(base/compiler/FLINT/trans/pequal.sml). +Added numKind, intEqTy, and uintEqTy functions. +The numKind function should be extended once int64 and word64 are treated as primitive +types in the compiler.

    +
    + +
    +
    +
    +
    +
    +
    [2018/09/12]
    +
    +
    +

    Fixed 64BIT issue in module MatchComp +(base/compiler/FLINT/trans/matchcomp.sml). +Added int64Ty and word64Ty cases to function numCon.

    +
    + +
    +
    +
    +
    +
    +
    [2018/09/12]
    +
    +
    +

    Fixed 64BIT issue in module Equal +(base/compiler/FLINT/reps/equal.sml). +Exports just one function: equal_branch, which is called once in +reps/wrapping.sml to type-specialize branches on calls to POLYEQUAL.

    +
    + +
    +
    +
    +
    +
    +
    [2018/09/12]
    +
    +
    +

    The CPS optimizer had a mechanism for checking the CPS against the FLINT +types, which required maintaining a mapping from lvars to their FLINT +types. This code has long since bit-rotted and cannot even handle a +simple expression like 1+2. Therefore, I’ve removed the mapping (a hash +table) from the CPS optimizer and the vestigial code that modified it +in the various CPS optimization passes.

    +
    + +
    +
    +
    +
    +
    +
    [2018/09/12]
    +
    +
    +

    Modified the InfCnv (now named IntInfCnv) structure to remove +32-bit dependencies.

    +
    + +
    +
    +
    +
    +
    +
    [2018/09/11]
    +
    +
    +

    Modified Pequal (in base/compiler/FLINT/trans/pequal.sml) and +Translate (in base/compiler/FLINT/trans/translate.sml) to remove +32-bit dependencies. though further changes will be required to +properly handle int64 and word64 types when defaultIntSz = 64.

    +
    + +
    +
    +
    +
    +
    +
    [2018/09/11]
    +
    +
    +

    Reimplemented the Switch module (int base/compiler/FLINT/cps). The +new implementation follows the same basic design as before, but the code +is better organized and documented, and it now uses the concrete CPS +representations, instead of being parameterized over an abstraction of +them. It also now uses binary search for boxed (e.g., Int32.int) +switches.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.84; 2018/09/03

    +
    +
    +
    +
    [2018/09/03]
    +
    +
    +

    Reimplemented the array/vector-slice modules to use a (base, start, length) +representation (as does Substring in system/smlnj/init/substring.sml). Also +fixed a bug in the slice findi functions, where the index being passed to the +predicate function was not adjusted to be slice-relative.

    +
    + +
    +
    +
    + +
    +
    +
    [2018/09/02]
    +
    +
    +

    Improved implementation of CharVectorSlice.map and CharVectorSlice.mapi +to not build intermediate list of results.

    +
    + +
    +
    +
    +
    +
    +
    [2018/08/28]
    +
    +
    +

    A beta-release of ASDL library and asdlgen tool have been added to +the system. This version of the tool implements SML support, but the +C++ support is not complete. There is a CM tool for ASDL, which +recognizes the .asdl file suffix.

    +
    + +
    +
    +
    +
    +
    +
    [2018/08/28]
    +
    +
    +

    Two changes to the installer (base/base/system/smlnj/installer):

    +
    +
    +
    +
    +
      +
    1. +

      The build scripts for programs are now named build.sh (instead of +build) on Unix systems.

      +
    2. +
    3. +

      The config action has been added to support module configuration.

      +
    4. +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    [2018/08/27]
    +
    +
    +

    Added RENAME extension style to CM tool support. This extension +style allows arbitrary file names to be generated from the base name.

    +
    + +
    +
    +
    +
    +
    +
    [2018/08/19]
    +
    +
    +

    Fixed a bug in the implementation of monomorphic buffers: the functions +CharBuffer.add1 and Word8Buffer.add1 had an incorrect length +test.

    +
    + +
    +
    +
    +
    +
    +
    [2018/06/15]
    +
    +
    +

    Fixed a compiler bug (arg ty lists wrong length) in unifyTy that +could occur when one of the type constructors is the ERRORtyc. +This bug occurs because the ERRORtyc is equal to any other type +constructor, which (incorrectly) implies that the number of type +arguments should be equal.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.83; 2018/06/01

    +
    +
    +
    +
    [2018/05/29]
    +
    +
    +

    Fixed #206 (Parsing of explicit type variables and val rec +is broken). This bug was also bug number 1261 in the old bugs list.

    +
    + +
    +
    +
    +
    +
    +
    [2018/05/29]
    +
    +
    +

    Fixed minor bug in Date.toString (missing leading "0" for day of month). +This issue was bug number 1444 in the old bugs list.

    +
    + +
    +
    +
    +
    +
    +
    [2018/05/29]
    +
    +
    +

    Cleaned up match compiler code (FLINT/trans/matchcomp.sml) and added +typing and function comments. Added debugging and printing +infrastructure, including new FLINT/trans/mcprint.sml file, and new +Control.MC.debugging flag.

    +
    + +
    +
    +
    +
    +
    +
    [2018/05/29]
    +
    +
    +

    Fixed parser to allow parentheses around val rec patterns.

    +
    + +
    +
    +
    +
    +
    +
    [2018/05/28]
    +
    +
    +

    Fixed the scanner to produce the correct error message for bad escape +sequences in string literals.

    +
    + +
    +
    +
    +
    +
    +
    [2018/05/26]
    +
    +
    +

    Fixed old bug number 1383: Char.toCString #"\000" returned "\\0", +instead of "\\000", which caused String.toCString to produce invalid +results.

    +
    + +
    +
    +
    +
    +
    +
    [2018/05/19]
    +
    +
    +

    Fix for bug #201 (The AMD64.cm library is missing).

    +
    + +
    +
    +
    +
    +
    +
    [2018/05/18]
    +
    +
    +

    Added MONO_BUFFER signature, with instances CharBuffer and Word8Buffer, +to Basis implementation +( +Basis Library Proposal 2018-001).

    +
    + +
    +
    +
    +
    +
    +
    [2018/05/16]
    +
    +
    +

    Fix a bug where “0w” was being accepted as a prefix for a hexidecimal +word value in Word.fromString/scan (ignoring case, only “0x” +and “0wx” are valid prefixes). This change fixes bug number 1375 +from the old bugs list.

    +
    + +
    +
    +
    +
    +
    +
    [2018/05/13]
    +
    +
    +

    Fixed a bug in the parsing of bindings involving the op keyword. +The parser was more restrictive than the definition. This change +fixes bug number 1370 from the old bugs list.

    +
    + +
    +
    +
    +
    +
    +
    [2018/05/12]
    +
    +
    +

    The lexer gave an unmatched close comment error on "*)", when it +should have scanned it as the tokens "*" ")". This change +fixes bug number 330 in the old bugs list.

    +
    +
    +

    Note: there is some ambiguity as to what the correct behavior +should be here. The Definition of Standard ML (1997) only says that +unmatched open comments should be signalled as errors, but the +Commentary on the Definition of Standard ML (1991) says otherwise in +Appendix D. SML/NJ started signalling an error in version 0.71, +but we choose to revert to accepting this sequence, to match +the 1997 Definition (and the behavior of other systems).

    +
    + +
    +
    +
    +
    +
    +
    [2018/05/07]
    +
    +
    +

    The sameSign function returned incorrect results in the Int31 +and Int32 modules.

    +
    + +
    +
    +
    +
    +
    +
    [2018/05/07]
    +
    +
    +

    Fixed various minor parsing and scanning issues:

    +
    +
    +
    +
    +
      +
    • +

      correct syntax for type variables

      +
    • +
    • +

      signature/structure/functor IDs should always be alpha IDs

      +
    • +
    • +

      the equality ID (=) cannot appear in a binding context. Note that +we still allow the syntax val op = = …​ because it is needed to +parse the file system/smlnj/init/built-in.sml.

      +
    • +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    [2018/05/05]
    +
    +
    +

    Completed overhaul of the way that int/word literals are handled in +the compiler. We now use IntInf.int to represent the values in +all IRs. This change also results in better CPS contraction, since +we now perform constant folding for both signed and unsigned values +at all sizes. We were also able to get rid of the tricky code that +worries about large tagged integer values that might cause overflow +during code generation.

    +
    + +
    +
    +
    +
    +
    +
    [2018/04/21]
    +
    +
    +

    Improved the reporting of errors involving literal values. We now +use the original source text when describing the value in the error +message.

    +
    + +
    +
    +
    +
    +
    +
    [2018/04/20]
    +
    +
    +

    Fix for bug #191 (Compiler crash when handling large reals). +We now issue a warning for real literals that will round to zero and +an error for real literals that are too large to represent. There +still needs to be done some work to support sub-normal literal values +(these are currently rounded to zero).

    +
    + +
    +
    +
    +
    +
    +
    [2018/04/14]
    +
    +
    +

    Changed the representation of real literals from strings to RealLit.t.

    +
    + +
    +
    +
    +
    +
    +
    [2018/04/13]
    +
    +
    +

    Removed real patterns from Absyn and FLINT, since they are not allowed +by SML'93 and were not present in the AST representation.

    +
    + +
    +
    +
    +
    +
    +
    [2018/04/12]
    +
    +
    +

    Fix for bug #194 (Real.fromString overflows or hangs). There +were two issues here. First, the Overflow exception was being raised +when scanning large exponents, but it was not being handled by the +scanning code. The second issue was that the scaling loop for large +exponents did not immediately terminate once infinity (or zero) was +reached, so it could take a long time.

    +
    + +
    +
    +
    +
    +
    +
    [2017/10/22]
    +
    +
    +

    Moved the Version-1 literal building code into gc/old-literals.c. +This file can be removed once the compiler generates the Version-2 +literal bytecode.

    +
    + +
    +
    +
    +
    +
    +
    [2017/10/16]
    +
    +
    +

    Moved the check for whether a int or word literal is in range for +its type from the absyn→plambda translation to the overload +resolver (compiler/Elaborator/types/overload.sml).

    +
    + +
    +
    +
    +
    +
    +
    [2017/10/14]
    +
    +
    +

    Part 1 of an overhaul of the way that the compiler treats int/word +literals. The end goal is to use IntInf.int to represent literals +throughout all phases of the compiler. In this step, we changed the +representation of literals in the Absyn representation (earlier +representations already used IntInf.int).

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.82; 2017/10/16

    +
    +
    +
    +
    [2017/10/01]
    +
    +
    +

    Fixed unnumbered bug in IntInf.mod and IntInf.rem functions, +where the Div exception was not getting raised when both +arguments are 0.

    +
    + +
    +
    +
    +
    +
    +
    [2017/09/20]
    +
    +
    +

    Various bits of cleanup in the handling of primitive operations, such +as removing the ptnum mechanism for translating from Absyn to FLINT.

    +
    + +
    +
    +
    +
    +
    +
    [2017/09/20]
    +
    +
    +

    Added Target module, which specifies the properties of the target +(e.g., the size in bits of the default int type). Reworked the +generation of the InlineT structure to be target specific.

    +
    + +
    +
    +
    +
    +
    +
    [2017/09/18]
    +
    +
    +

    Removed FLINT primops (and their CPS counterparts) that are not +in the InlineT structure and, thus, are never used by the compiler.

    +
    + +
    +
    +
    +
    +
    +
    [2017/09/18]
    +
    +
    +

    Fixed bug #123 (missing nonexhaustive bind warning). The mkVBs +function in FLINT/trans/translate.sml was adding a redundant default +rule by calling ElabUtil.completeMatch after a default rule had +already been explicitly added to the match for let bindings.

    +
    + +
    +
    +
    +
    +
    +
    [2017/09/18]
    +
    +
    +

    Fixed bug #183 (status code returned by sml REPL). This fix +restores the version 110.79 behavior of having sml foo.sml exit with +a non-zero status when there is a type-checking error in foo.sml. +It also cleans up the error messages associated with use when there +is a syntax error.

    +
    + +
    +
    +
    +
    +
    +
    [2017/08/28]
    +
    +
    +

    Fixed bug #185 (Bring command line help text into parity with man page). +Added missing options (@SMLversion and @SMLsuffix) to the +help message that is printed for the command “sml -h”. Also +adjusted the order of options in the help message, and in the man +page, so that the orders match.

    +
    + +
    +
    +
    +
    +
    +
    [2017/08/12]
    +
    +
    +

    Changed the way that we test for allocation-space addresses in minor GCs. +Instead of using the BIBOP, we now do a pointer range test. On 32-bit +systems, this change results in a small (~0.13%) performance boost, but +we expect a bigger impact on 64-bit hardware, where the cost of BIBOP +probes will be higher and there are more registers available to hold +the nursery bounds.

    +
    + +
    +
    +
    +
    +
    +
    [2017/08/12]
    +
    +
    +

    Fixed some issues in build-literals.c. These were mostly false +positives in the assertions, but there was also a bug in the way +that the available space was tracked that could conceivably +result in a crash (but was very unlikely).

    +
    + +
    +
    +
    +
    +
    +
    [2017/06/07]
    +
    +
    +

    Updated _arch-n-os script to recognize macOS 10.13 (High Sierra) +as a valid target.

    +
    + +
    +
    +
    +
    +
    +
    [2017/05/17]
    +
    +
    +

    Fixed a bug in the way that JSON string values were being printed. +The code previously assumed that C-style escaping will work, but +that is not true for "\'" (as well as for control and non-ASCII +characters). The new implementation assumes that the string value +is UTF-8 and uses the "\\u" escape sequences for characters outside +the JSON escapes and printable ASCII characters.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.81; 2017/05/01

    +
    +
    +
    +
    [2017/04/28]
    +
    +
    +

    Fixed bug #129 (Symbolic identifiers are allowed as strids).

    +
    + +
    +
    +
    +
    +
    +
    [2017/04/07]
    +
    +
    +

    Fixed bug #179 (ml-ulex writing debug messages to stdOut). +Both ml-ulex and ml-antlr now direct their debug and status messages +to stdErr (instead of stdOut).

    +
    + +
    +
    +
    +
    +
    +
    [2017/02/09]
    +
    +
    +

    Linux distributions are starting to require that the stack be marked +as non-executable in applications. Because the runtime system includes +assembly code, this marking was not happening. We’ve added .section +directives to the PPC.prim.asm and X86.prim.asm files as +per https://wiki.gentoo.org/wiki/Hardened/GNU_stack_quickstart#Patching. +Thanks to Daniel Moerner for reporting this issue and for providing +a pointer to the fix.

    +
    + +
    +
    +
    +
    +
    +
    [2016/10/15]
    +
    +
    +

    Added --debug command-line option to ml-antlr. +This flag causes <b>ml-antlr</b> to generate debug actions that print +the left-hand-side non-terminal of the production.

    +
    + +
    +
    +
    +
    +
    +
    [2016/09/15]
    +
    +
    +

    Working on 64-bit support. Changes include making code generation +dependent on the target word size and abstracting over the BIBOP +representation in the runtime system.

    +
    + +
    +
    +
    +
    +
    +
    [2016/09/15]
    +
    +
    +

    Further cleanup of the separation of FLINT from the front-end. +Eliminated all references to ModulePropLists (module-plists.sml) in +the front end and in pickling, and moved module-plists from +Semant/modules to FLINT/trans. ModulePropLists is now only used +in FLINT/trans/translate.sml.

    +
    +
    +

    Revision: 4314

    +
    +
    +

    Files changed:

    +
    +
    +
    +
    +
      +
    • +

      compiler/ElabData/modules/modules.sml (cleaned up)

      +
    • +
    • +

      compiler/Elaborator/print/ppmod.sml (cleaned up)

      +
    • +
    • +

      compiler/FLINT/trans/module-plists.sml (moved from Semant/modules)

      +
    • +
    • +

      compiler/Semant/modules/instantiate-param.sml (deleted)

      +
    • +
    • +

      compiler/Semant/pickle/pickmod.sml (no longer mentions property lists)

      +
    • +
    • +

      compiler/Semant/pickle/unpickmod.sml (ditto)

      +
    • +
    • +

      compiler/Semant/statenv/prim.sml

      +
    • +
    • +

      compiler/Semant/types/tp-var-info.sml (deleted)

      +
    • +
    • +

      compiler/core.cm (modified for move of module-plists.sml)

      +
    • +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    [2016/09/14]
    +
    +
    +

    Eliminated dependency of PlambdaType from the front end by adding a +type TKind.tkind which is a simplified standin for PlambdaType.tkind +for use during elaboration. TKind.tkind values are translated on +demand to PlambdaType.tkind in trans/transtypes.sml. Types still has +a tycpath type but it is defined using TKind.tkind now. The new structure +SigPropList replaces ModulePropLists +(Semant/modules/module-plists.sml) for use in instantiate.sml. +Instantiate is now defined directly as a structure so the functor +application in Semant/modules/instantiate.sml no longer exists.

    +
    +
    +

    Files changed:

    +
    +
    +
    +
    +
      +
    • +

      ElabData/basics/debindex.sig (moved here from Elaborator/basics)

      +
    • +
    • +

      ElabData/basics/debindex.sml (ditto)

      +
    • +
    • +

      ElabData/basics/sig-plist.sml (new)

      +
    • +
    • +

      ElabData/basics/tkind.sml (new)

      +
    • +
    • +

      ElabData/types/types.sig

      +
    • +
    • +

      ElabData/types/types.sml

      +
    • +
    • +

      Elaborator/modules/instantiate.sml

      +
    • +
    • +

      Elaborator/print/ppmod.sml

      +
    • +
    • +

      FLINT/trans/transtkind.sml (new)

      +
    • +
    • +

      FLINT/trans/transtypes.sml

      +
    • +
    • +

      TopLevel/interact/evalloop.sml

      +
    • +
    • +

      ElabData/elabdata.cm

      +
    • +
    • +

      Elaborator/elaborate.cm

      +
    • +
    • +

      core.cm

      +
    • +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    [2016/09/14]
    +
    +
    +

    Added support for Successor ML record-expression-punning syntax. For +example, one can now define a function f as

    +
    +
    +
    +
    fun f x = {x}
    +
    +
    +
    +

    which is equivalent to the definition

    +
    +
    +
    +
    fun f x = {x = x}
    +
    +
    + +
    +
    +
    +
    +
    +
    [2016/09/14]
    +
    +
    +

    Fixed a bug in the parser. Asterix (*) was not allowed as a record label +when using the record-pattern-punning syntax.

    +
    + +
    +
    +
    +
    +
    +
    [2016/09/14]
    +
    +
    +

    Added support for do exp Successor ML syntax.

    +
    + +
    +
    +
    +
    +
    +
    [2016/09/12]
    +
    +
    +

    Fixed bug #153 (Enabling Successor ML features is delayed). +We now use a function Control.setSuccML to switch to/from Successor ML +mode in the REPL. The function resets the parser, so the next input will +be correctly parsed. The Control.succML flag is no longer visibile +in the REPL.

    +
    + +
    +
    +
    +
    +
    +
    [2016/09/12]
    +
    +
    +

    Fixed bug #149 (Datatype replication exposes hidden constructors). +Added boolean field stripped to DATATYPE variant of tyckind in +compiler/ElabData/types/types.sml with default value false. +stripped is set to true when a datatype is matched with a simple +type spec in signature matching, and datatypes with stripped set +to true are disallowed in datatype replications.

    +
    +
    +

    Files changed:

    +
    +
    +
    +
    +
      +
    • +

      compiler/ElabData/types/types.sig

      +
    • +
    • +

      compiler/ElabData/types/types.sml

      +
    • +
    • +

      compiler/ElabData/types/typesutil.sml

      +
    • +
    • +

      compiler/ElabData/types/core-basictypes.sml

      +
    • +
    • +

      compiler/Elaborator/types/basictypes.sml

      +
    • +
    • +

      compiler/Elaborator/types/eqtypes.sml

      +
    • +
    • +

      compiler/Elaborator/modules/evalent.sml

      +
    • +
    • +

      compiler/Elaborator/modules/sigmatch.sml

      +
    • +
    • +

      compiler/Elaborator/modules/instantiate.sml

      +
    • +
    • +

      compiler/Elaborator/print/ppabsyn.sml

      +
    • +
    • +

      compiler/Elaborator/print/pptype.sml

      +
    • +
    • +

      compiler/Elaborator/elaborate/elabcore.sml

      +
    • +
    • +

      compiler/Elaborator/elaborate/elabmod.sml

      +
    • +
    • +

      compiler/Elaborator/elaborate/elabtype.sml

      +
    • +
    • +

      compiler/Elaborator/elaborate/elabsig.sml

      +
    • +
    • +

      compiler/Semant/pickle/pickmod.sml

      +
    • +
    • +

      compiler/Semant/pickle/unpickmod.sml

      +
    • +
    • +

      compiler/MiscUtil/print/ppobj.sml

      +
    • +
    • +

      compiler/FLINT/trans/transtypes.sml

      +
    • +
    • +

      compiler/FLINT/trans/pequal.sml

      +
    • +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    [2016/08/31]
    +
    +
    +

    Added %tokentype directive to ml-antlr; this directive allows users +to specify the token datatype externally, which is necessary in order +to share a lexer with two different ml-antlr parsers.

    +
    + +
    +
    +
    +
    +
    +
    [2016/08/20]
    +
    +
    +

    Change the interface to AMD64Gen in MLRISC; the signBit and +negateSignBit callbacks now return an MLTree.rexp (instead of a +label).

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.80; 2016/08/19

    +
    +
    +
    +
    [2016/08/16]
    +
    +
    +

    Fixed #151 (Error installing from source on Mac OS X). The fix +involves both changes to the config/install.sh script and the +mk.x86-darwin makefile. With this fix, we include the SDK argument +to the /usr/bin/as only when the OS version is 10.10 (Yosemite) or +later.

    +
    + +
    +
    +
    +
    +
    +
    [2016/08/10]
    +
    +
    +

    Added the proposed unzipMap, unzipMapi, find, and findi functions +to the ListPair module.

    +
    + +
    +
    +
    +
    +
    +
    [2016/08/10]
    +
    +
    +

    Added the proposed mapLeft, mapRight, appLeft, and appRight +functions to the Either module.

    +
    + +
    +
    +
    +
    +
    +
    [2016/08/09]
    +
    +
    +

    Fixed bug #145 (Internal exception occurs on bogus annotation +instead of typechecking diagnostic). Added missing OVLD_UB case in +function failMessage in compiler/Elaborator/types/unify.sml.

    +
    + +
    +
    +
    +
    +
    +
    [2016/08/04]
    +
    +
    +

    Fixed bug #166 (Can’t install SML/NJ in directories containing +spaces). Thanks to Eugene Sharygin for the patch.

    +
    + +
    +
    +
    +
    +
    +
    [2016/06/21]
    +
    +
    +

    Fixed incorrect dividend sign extension before 32-bit divide in amd64 +code generator in MLRISC

    +
    + +
    +
    +
    +
    +
    +
    [2016/06/16]
    +
    +
    +

    Fixed bug #150 (Add title to batch script).

    +
    + +
    +
    +
    +
    +
    +
    [2016/05/11]
    +
    +
    +

    Implemented the changes for + +Basis Library Proposal 2016-001. This proposal added the popCount +function to the WORD signature.

    +
    + +
    +
    +
    +
    +
    +
    [2016/05/03]
    +
    +
    +

    Fixed bug #156 (sml resumes after SIGSTOP with bogus exception +report). The fix is a bit of a hack: I modified the non_bt_hdl +function in evalloop.sml to match an IO.Io exception with +the appropriate shape for this situation.

    +
    + +
    +
    +
    +
    +
    +
    [2016/04/07]
    +
    +
    +

    Fixed bug #154 (Return code for ml-ulex when there is an error).

    +
    + +
    +
    +
    +
    +
    +
    [2016/04/07]
    +
    +
    +

    Fixed bug #155 (Misleading printing of word literals in +error messages).

    +
    + +
    +
    +
    +
    +
    +
    [2016/04/02]
    +
    +
    +

    Fixed a bug in the implementation of the --ml-lex-mode flag for +ml-ulex. The \h escape sequence is supposed to map to the +character range [\128-\255], but did not.

    +
    + +
    +
    +
    +
    +
    +
    [2015/11/09]
    +
    +
    +

    Fixed bug #147 (Hexadecimal escapes in strings are not supported). +We previously did not support Unicode escapes in string literals. We now +do so, with non-ascii codepoints being mapped to the +UTF-8 encoding with escape values in the range 0..255 being mapped to the +corresponding 8-bit character. Values outside that range are flagged +as an error.

    +
    +
    +

    Revised August 4, 2016

    +
    + +
    +
    +
    +
    +
    +
    [2015/10/28]
    +
    +
    +

    Partial fix for the noisy exception-stack traces on the Error exception. +The cases that are handled by this change are applying use to a non-existent +file and when there are compilation errors in a program being built by +CM.make. What remains to be handled is the situation where CM.make is +applied to a non-existent file.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.79; 2015/10/04

    +
    +
    +
    +
    [2015/10/04]
    +
    +
    +

    Patched base/compiler/FLINT/clos/closure.sml so that Twelf will +build again. Fixes bug #140 (Lookup failure in closure.sml +when compiling Twelf).

    +
    + +
    +
    +
    +
    +
    +
    [2015/09/28]
    +
    +
    +

    Added support for a Successor ML tool to CM. This tool allows one +to specify that a source file fool.sml is Successor ML source code in the +following ways:

    +
    +
    +
    +
    foo.sml : succ-ml
    +foo.sml : sml (succ-ml)
    +foo.sml (succ-ml)
    +
    +
    + +
    +
    +
    +
    +
    +
    [2015/09/28]
    +
    +
    +

    Added the directory base/old-basis to support backward-compatible +views of the Basis Library. You can use these by replacing the +line

    +
    +
    +
    +
    $/basis.cm
    +
    +
    +
    +

    with

    +
    +
    +
    +
    $/basis-2004.cm
    +
    +
    +
    +

    in your CM files.

    +
    +
    +
    +
    +
    +
    +
    [2015/09/28]
    +
    +
    +

    New implementation of Date structure in the Basis, which fixes bugs +#138 (Incorrect behavior for Date.fromTimeLocal) and +#139 (Date.date is broken). Note that some more thought should +be given to the correct semantics of Date.date when dealing with +offsets. For example, should an offset of +23 hours produce the same +date as an offset of -1 hours? Currently our implementation produces +different results (by a day) for these two situations.

    +
    + +
    +
    +
    +
    +
    +
    [2015/09/25]
    +
    +
    +

    Implemented the changes for + +Basis Library Proposal 2015-003. This proposal added operations to +the following signatures:

    +
    +
    +
    +
    signature ARRAY
    +signature LIST
    +signature LIST_PAIR
    +signature MONO_ARRAY
    +signature MONO_VECTOR
    +signature OPTION
    +signature STRING
    +signature TEXT
    +signature VECTOR
    +
    +
    +
    +

    and the following structures:

    +
    +
    +
    +
    structure Array : ARRAY
    +structure CharArray : MONO_ARRAY
    +structure CharVector : MONO_VECTOR
    +structure List : LIST
    +structure ListPair : LIST_PAIR
    +structure Option : OPTION
    +structure Real64Array : MONO_ARRAY
    +structure Real64Vector : MONO_VECTOR
    +structure String : STRING
    +structure Text : TEXT
    +structure Vector : VECTOR
    +structure Word8Array : MONO_ARRAY
    +structure Word8Vector : MONO_VECTOR
    +
    +
    +
    +

    While it is very unlikely that these changes will break existing code, there are +a a couple scenarios in which the code might break. Namely, when use of open +introduces conflicts and when user code implements one of the affected Basis Library +signatures. Both of these examples occurred in the SML/NJ source code; the former +in the ml-yacc sources and the latter in the MLRISC sources.

    +
    + +
    +
    +
    +
    +
    +
    [2015/09/25]
    +
    +
    +

    Added the optional implementations of PackReal64Big and PackReal64Little. +This addition addresses feature request #82 +(Implementations of PACK_REAL missing). The implementation +uses the approach suggested by +Michael Sullivan.

    +
    + +
    +
    +
    +
    +
    +
    [2015/09/24]
    +
    +
    +

    Fixed bug #45 (Compiler bug in specialize phase). This bug +was in compiler/FLINT/opt/fcontract.sml and was the result of a bad +interaction between eta contraction and inlining. As part of the fix, +I cleaned up the code in this part of FLINT a bit.

    +
    + +
    +
    +
    +
    +
    +
    [2015/09/21]
    +
    +

    Improvements to the error messages produced by the ml-ulex lexer generator.

    + +
    +
    +
    +
    +
    +
    [2015/09/21]
    +
    +

    Added Ref structure and REF signature to Basis implementation +( +Basis Library Proposal 2015-007).

    + +
    +
    +
    +
    +
    +
    [2015/09/21]
    +
    +

    Added Fn structure and FN signature to Basis implementation +( +Basis Library Proposal 2015-005).

    + +
    +
    +
    +
    +
    +
    [2015/08/22]
    +
    +
    +

    Fixed bug #136 (Incorrect raising of exceptions in Real.fmt +and Time.fmt).

    +
    + +
    +
    +
    +
    +
    +
    [2015/08/14]
    +
    +
    +

    Added Either structure and EITHER signature to Basis implementation +( +Basis Library Proposal 2015-002).

    +
    + +
    +
    +
    +
    +
    +
    [2015/07/23]
    +
    +
    +

    Fixed bug #135 (Fails to build on Linux PowerPC).

    +
    + +
    +
    +
    +
    +
    +
    [2015/07/08]
    +
    +
    +

    Added Linux 4.* kernels to the list of operating systems recognized +by the .arch-n-opsys script (fixes bug #134).

    +
    + +
    +
    +
    +
    +
    +
    [2015/06/11]
    +
    +
    +

    Added Mac OS X 10.11 (El Capitan) to the list of operating systems +recognized by the .arch-n-opsys script.

    +
    + +
    +
    +
    +
    +
    +
    [2015/05/27]
    +
    +
    +

    Added support for Successor ML lexical extensions. These can be enabled +using the command-line option -Cparser.succ-ml=true or by the assignment

    +
    +
    +
    +
    Control.succML := true;
    +
    +
    +
    +

    at the REPL. The extensions are as follows:

    +
    +
    +
    +
    +
      +
    • +

      Underscore (“_”) as a separator in numeric literals; e.g., 123_456, +0wxff_ff_ff_f3, 123_456.1, …​

      +
    • +
    • +

      end-of-line comments, which are denoted using (*). End-of-line comments +properly nest into conventional block comments. For example, the following +block comment is well formed:

      +
      +
      +
      (*
      +fun f x = x (*) my identity function *)
      +*)
      +
      +
      +
    • +
    • +

      binary literals for both integers and words; e.g., 0b0101_1110, or +0wb1101.

      +
    • +
    +
    +
    +
    +
    +

    This change is the beginning of a program to add Successor ML feature to SML/NJ; +See https://github.com/SMLFamily/Proposed-Definition-of-Successor-ML for more +details.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.78; 2014/12/24

    +
    +
    +
    +
    [2014/12/19]
    +
    +
    +

    Major revision of the machinery for overloading resolution for both +operators (vars) and literals, now using a common mechanism. This fixes +bug #52 by improving the error message when an overloaded +operator is inconsistent with its context. Updated 23 files, including +major changes in overload.sml, types.sml, unify.sml, elabcore.sml, +typesutil.sml. The overload declaration is still used in +pervasives.sml, where the order of the specified instances of an +ordering determines the default interpretation (i.e., the first one).

    +
    +
    +

    The SCHEME and LITERAL forms of tyvars are replaced by a new +OVLD form that tracks potential instantiations of the type of the +overloaded vars or literals.

    +
    + +
    +
    +
    +
    +
    +
    [2014/12/18]
    +
    +
    +

    Moved base/NOTES/HISTORY file to doc/src/changelog/HISTORY.txt +and converted it to ASCIIDOC format. Have also +moved the README files from base/READMES to doc/src/release-notes. +These changes are part of a general effort to rationalize and improve +the documentation of the SML/NJ system.

    +
    + +
    +
    +
    +
    +
    +
    [2014/12/13]
    +
    +
    +

    Preliminary cleanups before changes to overloading

    +
    +
    +

    Minor cleanup in Elaborator/elaborate/elabcore (function elabOVERLOADdec) +and in ElabData/types/typesutil.sml (function matchScheme). Preparing for +a new method of handling type checking of overloaded operators. [Note +that there is no reason for the options field of OVLDvar to be a +reference — it is never updated. Changing this requires corresponding +change in pickling.]

    +
    +
    +

    Also added an etopdebugging flag (ElabControl) for debugging in +elabtop.sml. Modified elabcontrol.{sml,sig} and +elabtop.sml. Also rearranged ast and absyn printing in evalloop.sml.

    +
    + +
    +
    +
    +
    +
    +
    [2014/10/23]
    +
    +
    +

    Improved error messages in ml-ulex for unclosed strings. Also made +documentation improvements.

    +
    + +
    +
    +
    +
    +
    +
    [2014/10/11]
    +
    +
    +

    Added -D_FILE_OFFSET_BITS=64 flag to x86-linux makefile. This flag +is necessary to avoid spurious EOVERFLOW errors on some versions of +Linux. The problem appears to be limited to large file systems that +have more than 232 inodes.

    +
    + +
    +
    +
    +
    +
    +
    [2014/09/13]
    +
    +
    +

    Added %value directive to ml-antlr; this addition improves the +error repair choices by allowing non-nullary tokens to be inserted +when making repairs.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.77; 2014/08/22

    +
    +
    +
    +
    [2014/08/21]
    +
    +
    +

    Created new doc tree in SML/NJ repository. Currently this +tree just holds the sources for UNIX-style manual pages for +the command-line tools (fixing bug #35). The documentation +is written using the ASCIIDOC format. Use +the following svn command to checkout a copy of the documentation tree:

    +
    +
    +
    +
    svn co https://smlnj-gforge.cs.uchicago.edu/svn/smlnj/doc/trunk doc
    +
    +
    + +
    +
    +
    +
    +
    +
    [2014/08/19]
    +
    +
    +

    Compiling the runtime system on cygwin was failing because the file +exceptions.h was missing. It appears to have been part of previous +versions, so a version has been incorporated verbatim in the +file runtime/mach-base/cygwin-fault.c file (fixes bug #125).

    +
    + +
    +
    +
    +
    +
    +
    [2014/08/19]
    +
    +
    +

    Added the actionToString' and repairToString' functions +to the AntlrRepair structure. These functions allow one to +specialize the printing of tokens based on whether they are +being added or deleted.

    +
    + +
    +
    +
    +
    +
    +
    [2014/08/17]
    +
    +
    +

    Added patches to support OpenBSD on PowerPC. The patches were +contributed by Jasper Lievisse Adriaanse (fixes bug #124).

    +
    + +
    +
    +
    +
    +
    +
    [2014/08/17]
    +
    +
    +

    Use mkstemp to implement OS.FileSys.tmpName() on systems that support +it (should be all modern versions of Unix). This change fixes bug #128. +(Thanks to Johannes 5 Joemann).

    +
    + +
    +
    +
    +
    +
    +
    [2014/08/17]
    +
    +
    +

    Fixed a bug in IntInf.~>>, which did not handle negative arguments +correctly (bug #110).

    +
    + +
    +
    +
    +
    +
    +
    [2014/08/14]
    +
    +
    +

    Fixed a problem in the CPS contraction phase. An optimization that +eliminates construction of a record that already exists was not +checking that the existing record was the same record kind (bug #119).

    +
    + +
    +
    +
    +
    +
    +
    [2014/07/28]
    +
    +
    +

    Switch to using MAP_ANONYMOUS to allocate memory on Linux systems. +This change avoids problems when "/dev" does not support execute +permission (as seems to be the case with some versions of Linux +running on ChromeBooks; bug #120).

    +
    + +
    +
    +
    +
    +
    +
    [2014/06/28]
    +
    +
    +

    Fix for bug #127 (Crash on Windows with OS.Process.system).

    +
    + +
    +
    +
    +
    +
    +
    [2014/06/07]
    +
    +
    +

    Fixed a long-standing bug in Socket.recvVec, which prevented the result +from being used in a string pattern match (thanks to Vesa Norrman +for the fix).

    +
    + +
    +
    +
    +
    +
    +
    [2014/05/01]
    +
    +
    +

    Fixed minor issue in an error message; type variable name should be +printed with leading '.

    +
    + +
    +
    +
    +
    +
    +
    [2013/11/25]
    +
    +
    +

    Added PackWord{16,32}{Big,Little} structures +to the Unsafe module. This change makes the UNSAFE signature +closer to the MLton version, although we +still need to add the PackReal structures.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.76; 2013/07/01

    +
    +
    +
    +
    [2013/06/04]
    +
    +
    +

    Fix bug #115 (BinPrimIO writer method getPos does not +work under CML). Just needed to port the position update from mkReader +code to the mkWriter code.

    +
    + +
    +
    +
    +
    +
    +
    [2013/06/04]
    +
    +
    +

    Fix bug #111 (Socket.acceptNB returns somewhat broken sockets). +The problem was that under Win32, sockets returned from accept inherit +their parents' non-blocking status, whereas on UNIX they are always +blocking.

    +
    + +
    +
    +
    +
    +
    +
    [2013/05/20]
    +
    +
    +

    Fix bug #117 (BinIO.openAppend raises IO on non-existent file). +We were opening the file for append if it existed but not creating it +if it did not exist.

    +
    + +
    +
    +
    +
    +
    +
    [2013/05/02]
    +
    +
    +

    Fix bug #116 (Socket.sameDesc raises Match exception). +The problem is that on Windows the iodesc datatype (defined in +Basis/Implementation/Win32/pre-os.sml) has both an IODesc constructor +and a SockDesc constructor. Updated the code in Win32/os-io.sml to +handle the SockDesc constructor.

    +
    + +
    +
    +
    +
    +
    +
    [2013/04/19]
    +
    +
    +

    Fix bug #113 (Socket.select waits exactly twice the indicated timeout)

    +
    + +
    +
    +
    +
    +
    +
    [2013/01/19]
    +
    +
    +

    Fix AMD64 code generator to properly sign-extend arguments to IDIVQ.

    +
    + +
    +
    +
    +
    +
    +
    [2012/10/20]
    +
    +
    +

    Fix bug #108 (off-by-one error in Util/dynamic-array.sml; +iterators crash)

    +
    + +
    +
    +
    +
    +
    +
    [2012/10/20]
    +
    +
    +

    Fix bug #107 (Bogus Int64 comparison operators)

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.75; 2012/10/01

    +
    +
    +
    +
    [2012/09/28]
    +
    +
    +

    Fixed bug #92. IntInf.scan now handles the “0x” prefix +correctly. Also made minor improvements to the NumScan module.

    +
    + +
    +
    +
    +
    +
    +
    [2012/09/24]
    +
    +
    +

    Added Base64 module to SML/NJ Library to support encoding and decoding +Word8 vectors as base64 strings.

    +
    + +
    +
    +
    +
    +
    +
    [2012/09/23]
    +
    +
    +

    Additions to the SML/NJ Library. Added exists, existsi, all, +and alli functions to ORD_MAP signature and implementations, +and added all function to ORD_SET signature and implementations.

    +
    + +
    +
    +
    +
    +
    +
    [2012/09/21]
    +
    +
    +

    Bug fix in ml-antlr to ensure that the generated toString function +for tokens is strictly legal SML code (i.e., non-printing characters +and UTF8 multibyte sequences are properly escaped).

    +
    + +
    +
    +
    +
    +
    +
    [2012/09/11]
    +
    +
    +

    Added getu function to ULexBuffer as a way to improve ml-ulex +performance. This addition allows a fastpath for processing ASCII +characters, which improved lexer performance by 3-4%.

    +
    + +
    +
    +
    +
    +
    +
    [2012/08/02]
    +
    +
    +

    Fixed bugs #89 and #96: Build Failure with Xcode 4.3 +Also removed build support for MacOS X pre-10.5 (Leopard) on PPC and +pre-10.6 (Snow Leopard) on Intel.

    +
    + +
    +
    +
    +
    +
    +
    [2012/02/05]
    +
    +
    +

    Fixed bug #88. The check for valid arcs on Unix systems now allows +any character other than slash or nul.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.74; 2012/01/20

    +
    +
    +
    +
    [2012/01/20]
    +
    +
    +

    Fixed implementation of Real.signBit on little-endian machines.

    +
    + +
    +
    +
    +
    +
    +
    [2012/01/19]
    +
    +
    +

    1) Fix for bug #60 + recalculate strictness for DEFtyc`s in functor bodies when functor + is applied (`Elaborator/modules/evalent.sml)

    +
    +
    +

    2) Fix for bug #77 + separate ast representations for datatypes and datatype replications + in decs and specs (multiple files)

    +
    +
    +

    3) set version to 110.74

    +
    +
    +

    Details in NOTES/changes/dbm_2012_1.

    +
    + +
    +
    +
    +
    +
    +
    [2012/01/12]
    +
    +
    +
      +
    1. +

      Change of SourceMap interface. +related to fix of off-by-one error in lexer (committed earlier?), +and cleanup of noweb code added by Norman Ramsey many years +ago (but little used today).

      +
    2. +
    3. +

      Slight cleanup of match compiler, eliminating compiler/FLINT/tempexpn.sml +file that was part of unused implementation of pattern templates (pattern macros).

      +
    4. +
    5. +

      Modification of type checker to add "culprit tracking" for +improved type error messages (printing of additional culprit +information is controlled by ElabControl.showTypeErrorCulprits +flag, default false, added in revision 3652). The culprit tracking needs to be debugged +and improved, and the presentation of the culprits needs to be done better.

      +
      +

      Details in NOTES/changes/dbm_2012_1.

      +
      + +
    6. +
    +
    +
    +
    +
    +
    +
    +
    [2011/11/25]
    +
    +
    +

    Bug fixes for Unsafe.blastRead (bug #76): + . proper error handling when reading from memory and there are + not enough bytes. + . pass correct data pointer and length to BlastIn (code was using + old macros).

    +
    +
    +

    + +John Reppy

    +
    +
    +
    +
    +
    +
    +
    [2011/11/25]
    +
    +
    +

    Added hash-table-based implementation of sets to SML/NJ Library.

    +
    + +
    +
    +
    +
    +
    +
    [2011/10/25]
    +
    +
    +

    Better error reporting under 32-bit linux for the missing dpkg +support (bug #70). +Enable 3.x kernels to build (bugs #80, #81, #83).

    +
    + +
    +
    +
    +
    +
    +
    [2011/05/23]
    +
    +
    +

    Added new S-expression library to SML/NJ Library +(contributed by Damon Wang)

    +
    + +
    +
    +
    +
    +
    +
    [2011/05/17]
    +
    +
    +

    Fixed bug in JSON scanner (SML/NJ Library). It didn’t handle escaped +backslash or double quote correctly.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.73; 2011/05/13

    +
    +
    +
    +
    [2011/05/10]
    +
    +
    +

    Added boolean literals (true and false) to the conditional-expression +syntax in CM. Thus, you can write

    +
    +
    +
    +
    #if true
    +  structure Foo
    +#endif
    +
    +
    +
    +

    in a CM file. This change is meant to make it easier to use autoconf +to configure the build process of an SML application.

    +
    + +
    +
    +
    +
    +
    +
    [2011/05/09]
    +
    +
    +

    Added missing String.scan function (bug #69). This also +fixes the handling of certain corner cases by String.fromString.

    +
    + +
    +
    +
    +
    +
    +
    [2011/05/03]
    +
    +
    +

    Added the RTDSC and RTDSCP instructions to the amd64 code generator.

    +
    + +
    +
    +
    +
    +
    +
    [2011/04/08]
    +
    +
    +

    Added fix for comments in code bug (bug #63). +Thanks to Michael Norrish.

    +
    + +
    +
    +
    +
    +
    +
    [2011/04/08]
    +
    +
    +

    Fixed bug in Socket.acceptNB (bug #59)

    +
    + +
    +
    +
    +
    +
    +
    [2011/03/31]
    +
    +
    +

    Fixed syntax error in ml-lex compatibility mode (bug #49)

    +
    + +
    +
    +
    +
    +
    +
    [2011/03/22]
    +
    +
    +

    Update _arch-n-opsys script for Mac OS X Lion (10.7).

    +
    + +
    +
    +
    +
    +
    +
    [2011/02/18]
    +
    +
    +

    Added Barriers module to CML.

    +
    + +
    +
    +
    +
    +
    +
    [2011/02/10]
    +
    +
    +

    Fixed ml-yacc examples to respect the changed signatures with respect +to TextIO.inputLine.

    +
    + +
    +
    +
    +
    +
    +
    [2010/09/16]
    +
    +
    +

    Changed the Win32 implementation of validArc to support directories +with extended characters (umlauts, etc.).

    +
    + +
    +
    +
    +
    +
    +
    [2010/09/16]
    +
    +
    +

    Fixed the Win32 socket and polling implementation to work correctly +with CML. Signature of poll was wrong and didn’t handle sockets at all.

    +
    + +
    +
    +
    +
    +
    +
    [2010/06/16]
    +
    +
    +

    Fixed Real.toString and Real.fmt to include sign for negative zero.

    +
    + +
    +
    +
    +
    +
    +
    [2010/03/23]
    +
    +
    +

    Fixed the bug with Win32 calls to OS.Process.system not quoting +the string.

    +
    + +
    +
    +
    +
    +
    +
    [2010/02/11]
    +
    +
    +

    Applied patch for building on more recent versions of NetBSD +(bug #39).

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.72; 2010/02/02

    +
    +
    +
    +
    [2009/12/20]
    +
    +
    +

    Fixed performance bugs in List module by making @ and foldr be +tail recursive (bug #51).

    +
    + +
    +
    +
    +
    +
    +
    [2009/12/11]
    +
    +
    +

    Fixed the Win32 unable to print long strings bug (bug #37).

    +
    + +
    +
    +
    +
    +
    +
    [2009/12/10]
    +
    +
    +

    Fixed an overrun during major GC. If the string arena was nearly +full, it was possible for alignment padding added during copy to +the to-space to overrun the allocated size.

    +
    + +
    +
    +
    +
    +
    +
    [2009/11/18]
    +
    +
    +

    The ml-antlr and ml-ulex programs have been ported to build under +mlton.

    +
    + +
    +
    +
    +
    +
    +
    [2009/11/17]
    +
    +
    +

    Added %header directive to the ml-ulex scanner generator. Also +updated the documentation.

    +
    + +
    +
    +
    +
    +
    +
    [2009/11/17]
    +
    +
    +

    Added @SMLsuffix flag to sml command. This flag can be used to get +the suffix for heap files.

    +
    + +
    +
    +
    +
    +
    +
    [2009/11/17]
    +
    +
    +

    Added --strict-sml flag to ml-ulex for MLton compatibility.

    +
    + +
    +
    +
    +
    +
    +
    [2009/11/10]
    +
    +
    +

    Added %header directive to the ml-antlr parser generator. Also +updated the documentation.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.71; 2009/09/16

    +
    +
    +
    +
    [2009/09/13]
    +
    +
    +

    Changes to support compiling the runtime system on Mac OS X 10.6 +(aka Snow Leopard).

    +
    + +
    +
    +
    +
    +
    +
    [2009/08/19]
    +
    +
    +

    Fixed a bug in the register-spill generator that is part of the MLRISC +register allocator. The problem was that the code in RASpillWithRenaming +functor assumed incorrectly that dedicated registers would appear in +def/use information generated by ClusterRA. Thanks to Allen Leung for +helping with this bug.

    +
    + +
    +
    +
    +
    +
    +
    [2009/07/09]
    +
    +
    +

    Removed redundant implementations of various top-level operations by +consolidating them in base/system/smlnj/init/pervasive.sml. This +change also fixes a bug in that the top-level version of round was +incorrect.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.70; 2009/06/15

    +
    +
    +
    +
    [2009/06/12]
    +
    +
    +
    +
    +
      +
    1. +

      Corrected problem in config/actions that led to the so-called +"unpickling bug" which appeared in version 110.68.

      +
    2. +
    3. +

      Provided fix for the 64-bit pattern match bug.

      +
    4. +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    [2009/03/21]
    +
    +
    +

    Fixed bug in Int32.fmt when the argument was the minimum int and the +radix was something other than DEC.

    +
    + +
    +
    +
    +
    +
    +
    [2009/02/21]
    +
    +
    +

    Fixed bugs in how ml-antlr parsed ML types in %tokens specifications.

    +
    + +
    +
    +
    +
    +
    +
    [2009/01/13]
    +
    +
    +

    Picking up some additional fixes for 110.69, including a fix for +spaces in CM file paths.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.69; 2008/12/22

    +
    +
    +
    +
    [2008/12/06]
    +
    +
    +

    Re-enabled some CPS optimizations (first_contract and eta). The most +important effect of this change is to make uses of SMLofNJ.Cont.capture +be properly tail recursive.

    +
    + +
    +
    +
    +
    +
    +
    [2008/12/03]
    +
    +
    +

    New concurrency-related instructions for x86 and amd64 code +generators.

    +
    +
    +
    +
    +
    +
    +
    +
      +
    • +

      PAUSE: Notify the CPU that the program is spin waiting.

      +
    • +
    • +

      MFENCE: memory fence for reads and writes.

      +
    • +
    • +

      SFENCE: memory fence for writes.

      +
    • +
    • +

      LFENCE: memory fence for reads.

      +
    • +
    +
    +
    +
    + +
    +
    +
    [2008/12/02]
    +
    +
    +

    Added makefile and other support for building runtime on OpenBSD.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.68; 2008/08/13

    +
    +
    +
    +
    [2008/08/11]
    +
    +
    +

    Minor fix to ml-ulex backend for regexps that match any character +and perform a single action. Previously the emitted code would +not allow the regexps to match any character at all.

    +
    + +
    +
    +
    +
    +
    +
    [2008/08/05]
    +
    +
    +

    Added build support for OpenBSD (thanks to Brian O’Hanlon).

    +
    + +
    +
    +
    +
    +
    +
    [2008/07/12]
    +
    +
    +

    Fixed Int64.fromString to use base-10.

    +
    + +
    +
    +
    +
    +
    +
    [2008/04/12]
    +
    +
    +

    Various updates to the ml-lpt tree. The documentation has been updated; +bugs in the parsing of negation and character classes in ml-ulex have +been fixed; and changes have been made to make the ml-lpt tools more +compatible with MLton (and other SML implementations). Thanks to +Matthew Fluet and Aaron Turon for their patches.

    +
    + +
    +
    +
    +
    +
    +
    [2008/20/04]
    +
    +
    +

    Implemented timer-based profiling on Windows, with behavior as close +to the *nix ITIMER-based profiling as possible.

    +
    + +
    +
    +
    +
    +
    +
    [2008/07/04]
    +
    +
    +

    Finished off the Windows subset of the basis library. Added process +support and various configuration and system identification utils.

    +
    + +
    +
    +
    +
    +
    +
    [2008/03/18]
    +
    +
    +

    Major changes to the RegExp library: see smlnj-lib/CHANGES for details.

    +
    + +
    +
    +
    +
    +
    +
    [2008/02/14]
    +
    +
    +

    Added the Windows Status structure

    +
    + +
    +
    +
    +
    +
    +
    [2008/02/14]
    +
    +
    +

    Added the Windows DDE structure

    +
    + +
    +
    +
    +
    +
    +
    [2008/02/05]
    +
    +
    +

    Added the Windows Config structure

    +
    + +
    +
    +
    +
    +
    +
    [2008/01/31]
    +
    +
    +

    Added outline of the Windows basis library and the basic registry +functionality.

    +
    + +
    +
    +
    +
    +
    +
    [2008/01/23]
    +
    +
    +

    Fixed the amd64 code generator to compile with the current MLRISC.

    +
    + +
    +
    +
    +
    +
    +
    [2007/11/26]
    +
    +
    +

    nlffi was updated to work on Windows. It needed to pass in the correct +value for the name of the kernel32 DLL to obtain 'base' bindings. +Additionally updated the README for the most basic nlffi sample with +what you need to do on MacOSX and on Windows to make it work.

    +
    + +
    +
    +
    +
    +
    +
    [2007/11/21]
    +
    +
    +

    Overwrite the SMLNJ_HOME environment variable on installation +Properly change the package code so that subsequent version +installations prompt for uninstall (instead of 'repair/remove')

    +
    + +
    +
    +
    +
    +
    +
    [2007/11/14]
    +
    +
    +

    Cleaned up WININSTALL file for new MSI-based setup.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.67; 2007/11/13

    +
    +
    +
    +
    [2007/11/12]
    +
    +
    +

    Ensure that the size of the allocation space is at least 128K.

    +
    + +
    +
    +
    +
    +
    +
    [2007/11/05]
    +
    +
    +

    Fixed type error in ml-lpt library that occurs when compiling +against a basis that was compiled with the USE_64_BIT_POSITIONS +symbol set. +(Thanks to Johannes Joemann)

    +
    + +
    +
    +
    +
    +
    +
    [2007/11/03]
    +
    +
    +

    Fixed the amd64 code generator to compile with the current MLRISC.

    +
    + +
    +
    +
    +
    +
    +
    [2007/11/02]
    +
    +
    +

    Made sml.bat more resilient to either not having run the installer +and having no SMLNJ_HOME set or having just shuffled the directory +around.

    +
    + +
    +
    +
    +
    +
    +
    [2007/11/01]
    +
    +
    +

    A collection of bug fixes for machine.sml in the Reactive library. +(Thanks to Timothy Bourke)

    +
    + +
    +
    +
    +
    +
    +
    [2007/10/28]
    +
    +
    +

    Patches for Mac OS X 10.5 (Leopard).

    +
    + +
    +
    +
    +
    +
    +
    [2007/10/28]
    +
    +
    +

    Fixed some bugs in the AMD64 floating-point spilling code.

    +
    + +
    +
    +
    +
    +
    +
    [2007/10/25]
    +
    +
    +

    Added support for the atomic XCHG instruction.

    +
    + +
    +
    +
    +
    +
    +
    [2007/10/22]
    +
    +
    +

    Added AMD64 support for floating-point negation.

    +
    + +
    +
    +
    +
    +
    +
    [2007/10/22]
    +
    +
    +

    Fixed ^C handling in Windows +Added a Windows installer +Made it possible to build for Windows on a mapped drive from Parallels

    +
    + +
    +
    +
    +
    +
    +
    [2007/10/22]
    +
    +
    +

    Added AMD64 support for the atomic fetch and add instruction.

    +
    + +
    +
    +
    +
    +
    +
    [2007/10/18]
    +
    +
    +

    The GAS output now favors p2align over align, since the +former is guaranteed to be consistent over multiple architectures +and the latter is not.

    +
    + +
    +
    +
    +
    +
    +
    [2007/10/18]
    +
    +
    +

    Fixed a bug in register spilling.

    +
    + +
    +
    +
    +
    +
    +
    [2007/10/2]
    +
    +
    +

    Added demo support for AMD64 for quick testing and fixed support for +64-bit label constants.

    +
    + +
    +
    +
    +
    +
    +
    [2007/09/20]
    +
    +
    +

    Added support for the MLRISC COND instruction and the x86-64 CMOVCC +instruction.

    +
    + +
    +
    +
    +
    +
    +
    [2007/09/17]
    +
    +
    +

    Fixed an instruction-selection bug when loading 64-bit labels.

    +
    + +
    +
    +
    +
    +
    +
    [2007/09/14]
    +
    +
    +

    Fixed Elaborator and Translate performance bugs

    +
    + +
    +
    +
    +
    +
    +
    [2007/09/12]
    +
    +
    +

    Fixed xorl memory argument bug.

    +
    + +
    +
    +
    +
    +
    +
    [2007/07/27]
    +
    +
    +

    Added f64sgn (for Real64.signBit) as a primop defined +in MLRiscGen. signBit(~0.0) not handled correctly.

    +
    + +
    +
    +
    +
    +
    +
    [2007/06/21]
    +
    +
    +

    Fixed bug in {TextIO,BinIO}`.StreamIO.endOfStream` that +would incorrectly signal end of stream.

    +
    + +
    +
    +
    +
    +
    +
    [2007/06/12]
    +
    +
    +

    Eliminated config/allsources. The information is now drawn directly +from config/actions.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.65; 2007/06/07

    +
    +
    +
    +
    [2007/06/06]
    +
    +
    +
    +
    +
      +
    • +

      Aaron: fixed a number of bugs in ml-ulex.

      +
    • +
    • +

      Matthias:

      +
      +
        +
      • +

        added CM control cm.force-tools; this is false by default; +when set to true, then tools like ml-yacc, ml-lex, ml-ulex, etc. +will be forced to run regardless of whether or not their targets +are up-to-date

        +
      • +
      • +

        changed installer code so that config/install.sh will re-build +heap images for all tools even if those heap images already existed

        +
      • +
      +
      +
    • +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    [2007/06/04]
    +
    +
    +

    After Aaron Turon’s bug fix for ml-ulex (handling the ^ character +in legacy mode), re-ran the lexer generator on all lex input files +and committed the results.

    +
    +
    +

    This should fix the problem with ckit and nlffi that was reported +by Vesa A. Norrman.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.64; 2007/05/31

    +
    +
    +
    +
    [2007/05/31]
    +
    +
    +

    3rd merge of base from primop-branch-3 into the trunk. +Additional bug fixes included:

    +
    +
    +
      +
    • +

      Timer.cpu_timer, etc. type printing corrected (by making +Timer have opaque sig constraint in basis/Implementation/timer.sml)

      +
    • +
    • +

      Infinite loop in FLINT (tests/typing/tests/25.sml) (fix by +Stefan Monnier)

      + +
    • +
    +
    +
    +
    +
    +
    +
    +
    [2007/05/31]
    +
    +
    +

    Fixed some bugs in new Div code in FLINT/trans/translate.sml.

    +
    +
    +

    Also, changed the handling of "no core access": When translate.sml +needs access to a core exception at a time when the core has not +been set up yet (this only happens when compiling system/smlnj/init/*), +then don’t bother generating the corresponding tests.

    +
    +
    +

    The old scheme was to generate a bogus value to be used in place of +the exception. Unfortunately, that confuses the plambda type checker. +Moreover, it does not do any good, because at runtime we don’t expect +such an exception to be ever raised. (The code in system/smlnj/init/* +has to be written very carefully with this in mind!)

    +
    + +
    +
    +
    +
    +
    +
    [2007/05/29]
    +
    +
    +

    Added FSQRT instructions for the AMD64 code generator.

    +
    + +
    +
    +
    +
    +
    +
    [2007/05/29]
    +
    +
    +

    FLINT/trans/translate.sml now wraps all DIV/MOD/QUOT/REM +operations with an explicit test for zero division. This should +fixe several regressions and makes it possible for downstream optimization +phases to treat these operations as "pure" when they are applied to unsigned +operands.

    +
    + +
    +
    +
    +
    +
    +
    [2007/05/29]
    +
    +
    +

    Added the new MLRISC code generator for the AMD64. This version, +in contrast to the previous one, uses SSE registers and instructions +for all floating-point computations.

    +
    + +
    +
    +
    +
    +
    +
    [2007/05/29]
    +
    +
    +

    A number of fixes related to the formatting of dates. These include fixes +for bugs #1415 and #1416. We also now correctly handle format characters that +lie outside the specified set.

    +
    + +
    +
    +
    +
    +
    +
    [2007/05/23]
    +
    +
    +

    CMB (and CM) now automatically defines the CM "preprocessor" symbol +NO_PLUGINS during "makeml -rebuild" or when CM operates in "slave" mode.

    +
    +
    +
    +
    (In addition, CMB_REBUILD_MODE is defined for `makeml -rebuild`,
    + CM_SLAVE_MODE is defined in attached slaves while running CM.make
    + or CM.rebuild, and CMB_SLAVE_MODE is defined in attached slaves
    + while running `CMB.make`.  The point is that the single symbol
    + NO_PLUGINS is definde in all three cases.)
    +
    +
    +
    +

    I changed the three locations within the sources that get compiled +during CMB.make where ml-yacc or ml-lex input is processed: When +NO_PLUGINS defined, then the use of the mlyacc and mllex tools +is bypassed.

    +
    +
    +

    When bootstrapping new versions of the compiler, there can be +situations where the plugin tools for ml-yacc and ml-lex (or ml-ulex) +are not available or otherwise not operational. In this case one can +manually define the NO_PLUGINS symbol prior to running CMB.make(). To +do so, the following command should be issued at the interactive +prompt:

    +
    +
    +
    +
       #set (CMB.symval "NO_PLUGINS") (SOME 1);
    +
    +
    +
    +

    A CMB.make with NO_PLUGINS defined relies on the existence of the +files that normally would be generated by ml-yacc and ml-lex. +(Copies of these files are in the repository.)

    +
    + +
    +
    +
    +
    +
    +
    [2007/5/23]
    +
    +
    +

    Second merge of base from primop-branch-3 into the trunk. +Additional bug fixes included:

    +
    +
    +
    +
    +
    +
    +
    +
      +
    • +

      Date.scan and Date.fromString fixed;

      +
    • +
    • +

      Overloading resolution fixed and some type printing problems corrected.

      +
    • +
    +
    +
    +
    + +
    +
    +
    [2007/05/23]
    +
    +
    +

    Changed the installation mechanism for CM tool plugins. These are +just libaries and now get installed like ordinary libaries.

    +
    +
    +

    There are now a number of new installation targets that give some +fine-grain control over what classes and suffixes are known, and what +they will map to. +See config/targets for details.

    +
    +
    +

    The code that caused plugin installation as part of running a +tool’s "build" script has been removed. (The build script is for +building, not for installing.)

    +
    + +
    +
    +
    +
    +
    +
    [2007/05/22]
    +
    +
    +

    Added a boolean control named cm.tolerate-tool-failures (env. variable +name: CM_TOLERATE_TOOL_FAILURES). The default is false and makes +CM fail if a shell tool reports a non-success exit status. If +the control is set to true, then CM will press on after tool failures +in the event that the target files exist (even though they are +considered outdated). Turning the control to true can be useful for +bootstrapping.

    +
    + +
    +
    +
    +
    +
    +
    [2007/05/19]
    +
    +
    +

    Merge of ml-lpt revisions for 110.64.

    +
    +
    +

    The name of several ml-lpt-lib modules has changed:

    +
    + +++++ + + + + + + + + + + + + + + + + + + + + + + +

    Repair

    AntlrRepair

    StreamPos

    AntlrStreamPos

    ErrHandlerFn

    AntlrErrHandler

    EBNF

    AntlrEBNF

    +
    +

    The ml-antlr specification format has changed: declarations such as +%tokens and nonterminal definitions can occur multiple times in the +same specification. The semantics are such that each new declaration +extends the previous ones. This does not apply to %start or %name, +of course.

    +
    +
    +

    Importing a grammar via %import now includes all declarations in from +the imported grammar, except for %name, %entry, and %start. Tokens +and nonterminals can be dropped using the new %dropping clause of the +%import directive; the separate %drop and %extend have been removed.

    +
    +
    +

    We now allow optional type annotations on nonterminals, using the %nonterms +directive as in ml-yacc.

    +
    +
    +

    The refcell construct is now implemented using SML’s regular reference +cells, so the :== and !! notation has been deprecated.

    +
    +
    +

    The ml-antlr tool now does much more checking of specifications, and +its error messages have been greatly improved. Error repair for +generated parsers has been completely rewritten, and is now both +much faster and more accurate.

    +
    +
    +

    ml-ulex is now more lenient with escape codes (non-SML-standard +escape codes are now interpreted literally, so e.g., \| denotes “|”). +Also, character classes may now include a “-” character at the beginning +as is standard in most other regexp tools.

    +
    +
    +

    All of these changes are documented in the user guide, which has +been updated and improved with this merge.

    +
    + +
    +
    +
    +
    +
    +
    [2007/05/03]
    +
    +
    +

    Merge of the primop3 branch (base) into the trunk to create 110.63.1. +Significant changes in FLINT and the front end, mostly having to do +with a reorganized system for handling primops. Various bug fixes +and improvements in printing signatures.

    +
    + +
    +
    +
    +
    +
    +
    [2007/05/02]
    +
    +
    +

    Preliminary commit of large ml-lpt revisions (more to come for 110.64). +The ml-antlr specification format has changed: declarations such as +%tokens and nonterminal definitions can occur multiple times in the +same specification. The semantics are such that each new declaration +extends the previous ones. Grammar extension constructs have also +changed. We now allow type annotations on nonterminals. Finally, +the refcell construct is now implemented using SML’s regular reference +cells, so the :== and !! notation has been deprecated. All of these +changes will appear in the 110.64 user guide for ml-lpt.

    +
    +
    +

    The ml-antlr tool now does much more checking of specifications, and +its error messages have been greatly improved. There has also been +some work on the error repair process for generated parsers, but this +will be further improved in 110.64.

    +
    + +
    +
    +
    +
    +
    +
    [2007/04/24]
    +
    +
    +

    More Basis fixes: The Char.fromString (etc.) functions did not handle +the “\uxxxx” escape sequence. There is still an outstanding bug with +String.fromString the tail is a format escape. I added a comment to +this effect in Basis/Implementation/string.sml. +Thanks to Andreas Rossberg.

    +
    + +
    +
    +
    +
    +
    +
    [2007/04/24]
    +
    +
    +

    Added next function to Fifo and Queue modules in the SML/NJ Library.

    +
    + +
    +
    +
    +
    +
    +
    [2007/04/23]
    +
    +
    +

    More Basis fixes: Time.fmt dropped the leading “~” for negative time +values that had no whole part.

    +
    + +
    +
    +
    +
    +
    +
    [2007/04/23]
    +
    +
    +

    More Basis fixes: the OS.Path module did not include the InvalidArc +exception and did not do sufficient argument checking. +Thanks to Stephen Weeks and Adam Chilpala.

    +
    + +
    +
    +
    +
    +
    +
    [2007/04/16]
    +
    +
    +

    Fixed an unbound functor bug in the AMD64 CM file.

    +
    + +
    +
    +
    +
    +
    +
    [2007/04/12]
    +
    +
    +

    Substantially changed the signature and implementation of AMD64 +SVID. It now looks similar to the ia32 SVID, yet uses staged +allocation.

    +
    + +
    +
    +
    +
    +
    +
    [2007/04/07]
    +
    +
    +

    More Basis fixes: the WORD signature was missing {to,from}`Large.` +Thanks to Andreas Rossberg.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.63; 2007/03/22

    +
    +
    +
    +
    [2007/03/19]
    +
    +
    +

    Fixed bogus operand sizes in AMD64 instruction spilling.

    +
    + +
    +
    +
    +
    +
    +
    [2007/03/19]
    +
    +
    +

    Fixed a number of inconsistencies between the Posix.TTY structure +and the Basis specification. Thanks to Adam Chilpala.

    +
    + +
    +
    +
    +
    +
    +
    [2007/02/26]
    +
    +
    +

    Added preliminary support in MLRISC for Staged Allocation, a technique +for specifying calling conventions (see +http://www.eecs.harvard.edu/~nr/pubs/staged-abstract.html). +Initially, we plan to use this code to generate C calls for the AMD64.

    +
    +
    +

    The staged allocation code base resides in MLRISC/staged-allocation, +and specialized calling conventions go in MLRISC/ARCH/staged-allocation.

    +
    + +
    +
    +
    +
    +
    +
    [2007/02/20]
    +
    +
    +

    Bug fix: when SaveCState was called with two values to save, a subsequent +GC could cause the RestoreCState to fail because the saved state had been +promoted to tagless pair.

    +
    + +
    +
    +
    +
    +
    +
    [2007/02/20]
    +
    +
    +

    Fixed bug triggered by:

    +
    +
    +
    +
        val a1 = Word8Array.array(a1, 0w0);
    +    val _  = Word8Array.update(a1, 0, 0w128);
    +
    +
    +
    +

    The x86MCEmitter crashed when the immediate operand to MOVB was outside +of the range -128 …​ 127. I’ve changed the code so that the range +check is disabled. Only the low order 8 bits of the immediate operand +are now significant.

    +
    + +
    +
    +
    +
    +
    +
    [2007/02/15]
    +
    +
    +

    Eliminated any mention of lexgen, which was an early precursor +to ml-ulex.

    +
    +
    +

    You should update your admin directory, so the shell scripts for +maintaining your local copy of the repository reflect this change.

    +
    + +
    +
    +
    +
    +
    +
    [2007/02/14]
    +
    +
    +

    Fixed bug in CM’s parallel make facility that failed to have +the master re-link modules after letting slaves compile them.

    +
    + +
    +
    +
    +
    +
    +
    [2007/02/12]
    +
    +
    +

    Fixed typo in ml-build script that prevented library anchors +from being registered.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.62; 2007/02/02

    +
    +
    +
    +
    [2007/01/31]
    +
    +
    +

    Brought ml-lpt manual up-to-date with the code. Many minor +improvements to the tools, and a few minor bugfixes. Prepared +ml-lpt-lib for integration with new UTF8 structure. Changed +the interface for creating streams in ml-ulex (we now support +stream creation from several kinds of sources). Overall, the +tools are now quite stable, and their interfaces are unlikely to +change in a way that would break compatibility. More work, +however, is needed in the documentation.

    +
    + +
    +
    +
    +
    +
    +
    [2007/02/01]
    +
    +
    +

    Implemented library installer. Moved CM plugin code for +ml-burg, ml-lex, and ml-yacc out of CM source tree and into +their respective trees. Implemented CM plugin for ml-ulex +and ml-antlr. Used library installer for ml-burg, ml-ulex, +and ml-antlr.

    +
    +
    +

    For ml-yacc and ml-lex we continue to have permanently "plugged-in" +CM tools. (It turns out to be too messy to do otherwise because +there is too much code that during installation relies on the presence +of these tools — resulting in a tricky ordering problem.)

    +
    + +
    +
    +
    +
    +
    +
    [2007/01/30]
    +
    +
    +

    Added SMLofNJ.shiftArgs which is like a shell’s "shift" command. +Modified CM’s startup code to use shiftArgs as it processes +command line arguments. This way, the init code in each .sml-file +or library that is mentioned at the top level will see only +those arguments that have not yet been processed at this point. +In other words, the init code can "seize control" and process the +remaining command line.

    +
    + +
    +
    +
    +
    +
    +
    [2007/01/30]
    +
    +
    +

    Added fromList function to the ORD_SET interface and lookup to the +ORD_MAP interface. See the SML/NJ Library CHANGES file for details.

    +
    + +
    +
    +
    +
    +
    +
    [2007/01/28]
    +
    +
    +

    Added the UTF8 structure and signature from the Moby compiler to +the SML/NJ library (and the CML library). These modules will replace +the version in the ml-lpt-lib.

    +
    + +
    +
    +
    +
    +
    +
    [2007/01/26]
    +
    +
    +

    Added entries to handle ml-lpt-lib.cm in installer.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.61.1; 2006/12/15

    +
    +
    +
    +
    [2006/12/15]
    +
    +
    +

    Fixed brown-paper-bag bug with CM’s pathname handling, which made +installation fail under Win32.

    +
    +
    +

    This supersedes the pre-brown-paper-bug release (see below).

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.61; 2006/12/14

    +
    +
    +
    +
    [2006/14/06]
    +
    +
    +

    Fixed the code in runtime/c-libs/posix-tty/{tcgetattr,tcsetattr}`.c` to +get the c_cc termios data copied correctly. Also moved the allocation +of the string to avoid problems if it caused a GC.

    +
    +
    +

    Thanks to Timothy Bourke for the bug report and fix.

    +
    + +
    +
    +
    +
    +
    +
    [2006/12/08]
    +
    +
    +

    Added code to CM’s "standard shell tool" implementation which +causes it to tolerate (with a warning) the situation where the shell +command fails (e.g., due to the shell command’s non-existence) +as long as all target files exist.

    +
    +
    +

    This makes it possible to, e.g., build ml-yacc from sources even if +svn checkout messed up the time stamps on files in such a way that +yacc.grm is younger than yacc.grm.sml or yacc.grm.sig. (Ml-yacc +would be needed to re-process yacc.grm, but obviously it might not yet be +available at that time.)

    +
    + +
    +
    +
    +
    +
    +
    [2006/12/06]
    +
    +
    +

    Fixed the types of recvVecFrom, recvVecFrom', recvVecFromNB, and +recvVecFromNB' in the SOCKET signature. This error is actually +in the SML Basis specification too.

    +
    + +
    +
    +
    +
    +
    +
    [2006/12/05]
    +
    +
    +

    CM now reports undefined anchors as errors and aborts execution +rather than silently pressing on using bogus values.

    +
    + +
    +
    +
    +
    +
    +
    [2006/11/29]
    +
    +
    +

    Use Say.vsay for printing the “[autoloading]” message, so +#set CM.Control.verbose false (or -Ccm.verbose=false) can be used +to suppress them.

    +
    + +
    +
    +
    +
    +
    +
    [2006/11/10]
    +
    +
    +

    Fixed bug in CM where “with:” specifications that affect compilation +(as opposed to parsing) were ignored.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.60; 2006/11/09

    +
    +
    +
    +
    [2006/11/09]
    +
    +
    +

    This is the first subversion-hosted release. There are also changes +to the directory layout. Here is a table:

    +
    + ++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    config

    config

    src/cm

    base/cm

    src/compiler

    base/compiler

    src/runtime

    base/runtime

    src/system

    base/system

    src/cm/pgraph

    pgraph

    src/READMES

    base/READMES

    `src/$`note

    `base/NOTES/$`note

    for note in BOOT CVSNOTES CYGWININSTALL + HISTORY INSTALL MACOSXINSTALL WININSTALL

    src/smlnj-lib

    smlnj-lib

    src/MLRISC

    MLRISC

    ckit

    ckit

    src/cml

    cml

    src/eXene

    eXene

    src/heap2asm

    heap2asm

    src/lexgen

    lexgen

    src/ml-burg

    ml-burg

    src/ml-lex

    ml-lex

    src/ml-yacc

    ml-yacc

    src/ml-nlffi-lib

    nlffi/lib

    src/ml-nlffigen

    nlffi/gen

    src/smlnj-c

    smlnj-c

    src/tools/TraceDebugProf

    trace-debug-profile

    +
    +

    (All pathnames are relative to the SML/NJ "root" directory.)

    +
    +
    +

    In addition, there is also a new

    +
    +
    +
    +
      ml-lpt
    +
    +
    +
    +

    directory containing two new program generator tools: ml-ulex and +ml-antlr (a lexer generator that handles unicode and an ANTRL-inspired +LL(k) parser generator). These tools are currently "beta-quality"

    +
    +
    +

    The latest versios of the sources can now be obtained anonymously +via subversion. For this, it is useful to first check out

    +
    +
    +
    +
      svn://smlnj-gforge.cs.uchicago.edu/smlnj/admin
    +
    +
    +
    +

    and put the resulting directory on your shell’s PATH. This provides +access to three shell scripts: checkout-all.sh, stat-all.sh, +and refresh-all.sh.

    +
    +
    +

    To create a freshly checked-out copy of the sources, do

    +
    +
    +
    +
      checkout-all.sh [dir]
    +
    +
    +
    +

    where dir is the optional SML/NJ root directory (default is ".").

    +
    +
    +

    This creates the above directory layout. Each subdirectory of +the root is under individual subversion control. The stat-all.sh +and refresh-al.sh scripts apply "svn stat" or "svn update" to +each of these subtrees.

    +
    + +
    +
    +
    +
    +
    +
    [2006/11/02]
    +
    +
    +

    Reorganized directory layout.

    +
    +
    +

    This is a temporary solution, more reorganization is to come.

    +
    +
    +

    The basic idea is to have a number of toplevel trees, each corresponding +to a well-defined part of the overall system. Each part can be +maintained individually, even in separate source repositories, +although currently we still serve everything out of the main +smlnj-gforge tree.

    +
    +
    +

    Installer and and scripts have been updated to reflect the new +layout. The installer (base/src/system/smlnj/installer) is now +"scriptable" to avoid burning too much knowledge about the +layout into SML source code. The main script used by the installer +is in config/actions.

    +
    +
    +

    The main change is that many of the subdirectories of what used to +be known as "src" have moved to the toplevel. The "src" directory +itself has moved down into a subtree called "base". (We may eventually +get rid of the extra level of indirection represented by "src".)

    +
    +
    +

    The layout is now as follows:

    +
    + ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    toplevel tree namedefault repository + (using svn://smlnj-gforge.cs.uchicago.edu/smlnj for $gf) ++

    config

    $gf/config/trunk ++

    base

    $gf/sml/trunk ++

    smlnj-lib

    $gf/smlnj-lib/trunk

    MLRISC

    $gf/MLRISC/trunk ++

    ml-yacc

    $gf/ml-yacc/trunk

    ml-lex

    $gf/ml-lex/trunk

    ml-burg

    $gf/ml-burg/trunk

    lexgen

    $gf/lexgen/trunk

    heap2asm

    $gf/heap2asm/trunk ++

    cml

    $gf/cml/trunk

    eXene

    $gf/eXene/trunk

    ckit

    $gf/ckit/trunk

    nlffi

    $gf/nlffi/trunk ++

    smlnj-c

    $gf/smlnj-c/trunk

    +
    +

    In $gf/admin there are a few useful shell scripts for checking out +and maintaining the entire collection of trees:

    +
    +
    +
    +
    +
    +
    admin/checkout-all.sh [dir]
    +
    +
    +

    optionally creates dir and checks out all trees from their +default repositories; if dir is missing, checkout into the +current working directory.

    +
    +
    +
    admin/refresh-all.sh [dir]
    +
    +

    looks at all trees (from the above list) in dir (default: .) +and runs “svn update” if the tree exists and is under +subversion control; +non-existing or non-subversion trees are skipped

    +
    +
    admin/stat-all.sh [dir]
    +
    +

    like refresh-all.sh, but runs “svn stat” instead

    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    [2006/10/05]
    +
    +
    +

    Merged code for AMD64 backend (Mike Rainey’s work). +Everything is hooked up but untested.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.59; 2006/05/17

    +
    +
    +
    +
    [2006/05/17]
    +
    +
    +

    I am freezing 110.59. Changes other than the version-number increase:

    +
    +
    +
    +
    +
    +
    eXene
    +
    +
    +

    committed changes to eXene from Alley Stoughton: +"fixed bugs in X authorization and resource handling, as well + as in the pile and viewport widgets"

    +
    +
    +
    Runtime
    +
    +
    +

    fixed linking problem with NetBSD 3.x.

    +
    +
    +
    Lexgen
    +
    +
    +
      +
    • +

      lexgen tool handles non-ascii characters in 7-bit mode the same +way that ml-lex does

      +
    • +
    • +

      lexgen propagates exceptions the same way that ml-lex does

      +
    • +
    +
    +
    +
    CML
    +
    +
    +

    Fixed a bug in the SyncVar polling functions (iGetPoll, +mTakePoll, and mGetPoll) that could lead to livelock.

    +
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    [2006/05/12]
    +
    +
    +

    Implemented ml-makedepend (i.e., CM.sources) in a better (more robust) +way. This should hopefully fix the ml-makedepend problem permanently.

    +
    + +
    +
    +
    +
    +
    +
    [2006/05/12]
    +
    +
    +

    Fixed long-standing bug with ml-makedepend where it would output a +spurious dependency to a non-existing file. (This is a simple fix. +It might need further looking into.)

    +
    + +
    +
    +
    +
    +
    +
    [2006/04/20]
    +
    +
    +

    Committed patches received from Johannes 5 Joemann (joemann@befree.free.de) +that enable heap2exec under Linux and FreeBSD.

    +
    + +
    +
    +
    +
    +
    +
    [2006/04/14]
    +
    +
    +

    MLRISC changes:

    +
    +
    +
    +
    +
      +
    1. +

      renamed GAS_PSEUDO_OPS to AS_PSEUDO_OPS and put it in its own file.

      +
    2. +
    3. +

      added support for NOTB and XORB operators in pseudo-op expressions

      +
    4. +
    5. +

      added DarwinPseudoOp functor that supports Darwin’s assembler syntax.

      +
    6. +
    7. +

      added support for 64-bit integer literals

      +
    8. +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.58; 2006/03/03

    +
    +
    +
    +
    [2006/03/01]
    +
    +
    +

    Incorporated several bugfixes to lexgen. Compiler now compiles to +fixpoint when using lexgen instead of ml-lex.

    +
    + +
    +
    +
    +
    +
    +
    [2006/02/28]
    +
    +
    +

    Removed ml-flex and added lexgen instead, using Aaron Turon’s newly +provided tarball. The generated lexers still have problems.

    +
    + +
    +
    +
    +
    +
    +
    [2006/02/26]
    +
    +
    +

    Removed ml-flex's dependency on regexp-lib.cm. Turned installation +of ml-flex on by default.

    +
    + +
    +
    +
    +
    +
    +
    [2006/02/24]
    +
    +
    +

    Added ml-flex sources. Partially integrated, but should not be +turned on yet! (Read: leave it commented-out in config/targets!)

    +
    + +
    +
    +
    +
    +
    +
    [2006/02/23]
    +
    +
    +

    Changes to support building on x86-64 systems (using the 32-bit mode). +Also cleaned up signal handling on Linux. Support for pre-2.2 Linux +kernels dropped.

    +
    + +
    +
    +
    +
    +
    +
    [2006/02/22]
    +
    +
    +

    CM has changed. Updated the script for rebuilding the MLRISC generated +files.

    +
    + +
    +
    +
    +
    +
    +
    [2006/02/14]
    +
    +
    +

    Hooked code for Darwin-specific Intel ABI into main compiler. +(This is a temporary solution which relies on the fact that the +compiler itself does not use NLFFI. Eventually we need to divorce +intel mac from generic x86 unix code and make separate sets of binaries.)

    +
    + +
    +
    +
    +
    +
    +
    [2006/02/13]
    +
    +
    +

    Changed MLRISC x86 CCalls for partial support of Mac OS X ABI.

    +
    + +
    +
    +
    +
    +
    +
    [2006/02/06]
    +
    +
    +

    Changes to support Mac OS X on Intel hardware. The C-calls support in +MLRISC must also be updated to support the Mac OS X ABI.

    +
    + +
    +
    +
    +
    +
    +
    [2005/12/16]
    +
    +
    +

    Improved error reporting and handling in CM.

    +
    + +
    +
    +
    +
    +
    +
    [2005/11/21]
    +
    +
    +

    Enabling $/html-lib.cm in config/preloads did not work. This is fixed +now. (Since the anchor mapping for html-lib.cm is not yet in effect +at the time when config/preloads is processed, the library has to +be referred to by another name. In 110.57 this would be +$SMLNJ-LIB/HTML/html-lib.cm. I arranged for $smlnj/smlnj-lib/html-lib.cm +to be valid as well.)

    +
    +
    +

    Thanks to Todd Wilson (Fresno) for alerting me to this issue.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.57; 2005/11/19

    +
    +
    +
    +
    [2005/11/19]
    +
    +
    +

    Fixed a problem in config/install.sh where it tries to "fish" the +name of the CM metadata directory from the wrong place (because +the physical location of basis.cm has changed). Also, corrected the +path anchor for $/html-lib.cm. (Thanks to M. Fluet for pointing out +these problems.)

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.57; 2005/11/16

    +
    +
    +
    +
    [2005/11/16]
    +
    +
    +

    Fixed problem with bogus exception message when using back-trace +facility.

    +
    + +
    +
    +
    +
    +
    +
    [2005/11/15]
    +
    +
    +

    Added simple implementation of Array2.copy. (Warning: mostly untested.)

    +
    + +
    +
    +
    +
    +
    +
    [2005/11/15]
    +
    +
    +

    Reversed change to src/system/smlnj/internals/versiontool.cm. This +file gets loaded as a tool — by the equivalent of CM.make during the +run of CMB.make. Thus, CMB’s path configuration is meaningless for +it. Instead, it has the status of "user code", so it should use +$/basis.cm to refer to the Basis library. (At least that’s true for +the purpose of bootstrapping the previous change. In the future it +might make sense to have versiontool.cm refer to +$smlnj/basis/basis.cm, i.e., the version of the Basis that the +compiler itself uses.)

    +
    +
    +

    Also patched src/system/testml to have it activate those extra anchor +bindings in config/extrapathconfig.

    +
    + +
    +
    +
    +
    +
    +
    [2005/11/15]
    +
    +
    +

    This change affects the way the following libraries are tied into +the system:

    +
    +
    +
    +
       $/basis.cm
    +   $/smlnj-lib.cm
    +   $/pp-lib.cm
    +   $/controls-lib.cm
    +   $/html-lib.cm
    +   $/ml-yacc-lib.cm
    +
    +
    +
    +

    These libraries are now internally (as seen from the source code +of the implementation itself) known by the following names:

    +
    +
    +
    +
       $smlnj/basis/basis.cm
    +   $smlnj/smlnj-lib/smlnj-lib.cm
    +   $smlnj/smlnj-lib/pp-lib.cm
    +   $smlnj/smlnj-lib/controls-lib.cm
    +   $smlnj/smlnj-lib/html-lib.cm
    +   $smlnj/ml-yacc/ml-yacc-lib.cm
    +
    +
    +
    +

    This makes it possible to work with code that requires different +versions of these libraries, and which refers to these libraries using +their "default" names (i.e., the first set of names above). In other +words, one can un-define or re-define those default names without +compromising the proper functioning of the compiler itself.

    +
    +
    +

    A similar procedure had already been performed for several of the +MLRISC libraries that are linked into the compiler. I did some +cleanup on this code.

    +
    +
    +

    A new file in the config directory (named extrapathconfig) is +responsible for setting up path anchors that the compiler itself does +not need, but that are typically required by user code.

    +
    + +
    +
    +
    +
    +
    +
    [2005/11/07]
    +
    +
    +

    Fixed erroneous out-of-bounds test in the “update” function of +various *ArraySlice modules. (Thanks to Vesa A. Norrman for pointing +out the problem.)

    +
    +
    +

    Pushed some Basis changes through ML-Lex, CML, and eXene.

    +
    + +
    +
    +
    +
    +
    +
    [2005/11/07]
    +
    +
    +

    Fixed a Basis incompatibility: The depreciated function Substring.all +was removed (use Substring.full instead).

    +
    + +
    +
    +
    +
    +
    +
    [2005/11/05]
    +
    +
    +

    Tweaked interval set API in SML/NJ library; see the CHANGES file for details.

    +
    + +
    +
    +
    +
    +
    +
    [2005/11/02]
    +
    +
    +
    +
    +
      +
    • +

      Runtime system bootstrap code now accepts hex digits in BOOTLIST +in either upper case or lower case format.

      +
    • +
    • +

      Pushed changes to names of Pack<N>{Big,Little} +structures through +CML and eXene.

      +
    • +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    [2005/11/02]
    +
    +
    +

    Fixed a Basis incompatibility: Pack<N>{Big,Little} +structures should be named PackWord<N>{Big,Little}.

    +
    + +
    +
    +
    +
    +
    +
    [2005/10/28]
    +
    +
    +

    Fixed a minor Basis incompatibility: hex digits should be upper case.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.56; 2005/10/25

    +
    +
    +
    +
    [2005/10/25]
    +
    +
    +

    Added interval sets to utility library (signatures INTERVAL_DOMAIN +and INTERVAL_SET, and functor IntervalSetFn).

    +
    + +
    +
    +
    +
    +
    +
    [2005/10/14]
    +
    +
    +

    Add Zhong Shao’s fix for datatype equality functions.

    +
    + +
    +
    +
    +
    +
    +
    [2005/10/14]
    +
    +
    +

    Bug fix a bug found by Carl Hauser. +There was a typo in the reload code for FCMP in x86SpillInstr.sml.

    +
    + +
    +
    +
    +
    +
    +
    [2005/10/14]
    +
    +
    +

    Removed some debugging code in file x86Asm.sml. +The function emit_operand was printing out debugging output.

    +
    + +
    +
    +
    +
    +
    +
    [2005/07/27]
    +
    +
    +

    Fixed ml-lex to recognize “\r” as representing carriage return.

    +
    + +
    +
    +
    +
    +
    +
    [2005/07/27]
    +
    +
    +

    Fixed ml-yacc to work on files with non-native end-of-line +encodings (e.g., Windows text file on a Unix system).

    +
    + +
    +
    +
    +
    +
    +
    [2005/07/20]
    +
    +
    +

    Added changes from Dominic Evans (oldmanuk (at) gmail (dot) com) +to support HPUX 11.

    +
    + +
    +
    +
    +
    +
    +
    [2005/07/06]
    +
    +
    +

    Changes to the SML/NJ Library. See smlnj-lib/CHANGES for details.

    +
    + +
    +
    +
    +
    +
    +
    [2005/07/06]
    +
    +
    +

    Fixed reversed logic for deciding whether to "copy up" or "copy down" +in *-array-slice.sml.

    +
    + +
    +
    +
    +
    +
    +
    [2005/05/31]
    +
    +
    +

    A typo in the Cygwin code fixed.

    +
    + +
    +
    +
    +
    +
    +
    [2005/05/31]
    +
    +
    +

    Updated Cygwin’s fault/signal handling to match the Windows version. +Updated the export list.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.54; 2005/05/18

    +
    +
    +
    +
    [2005/05/18]
    +
    +
    +

    Added support scripts for Mac OS X PackageMaker and modified +config/install.sh so that it supports re-dumping a heap image +after customization.

    +
    + +
    +
    +
    +
    +
    +
    [2005/05/18]
    +
    +
    +

    Un-overloaded / to work around bug in overloading resolution code.

    +
    + +
    +
    +
    +
    +
    +
    [2005/05/16]
    +
    +
    +

    Added mechanism for re-creating a heap file for the interactive system +after configuration variables have been changed.

    +
    +
    +
    +
       CM.redump_heap : string -> unit
    +
    +
    +
    +

    This is much like SMLofNJ.exportML, but starting from the resulting +heap does not return to the caller of CM.redump_heap but +restarts the interactive system from scratch. The original call of +CM.redump_heap does not return but ends the interactive session. +Thus, CM.redump_heap is a lot like SMLofNJ.exportFn.

    +
    +
    +

    Internally, redump_heap winds the dynamic execution context back to +the point where the original heap image was created and re-executes +the heap image generation code in the boot code.

    +
    + +
    +
    +
    +
    +
    +
    [2005/05/09]
    +
    +
    +

    Added a hack to the existing hack known as Word64 to make fromString +behave correctly. I am still not sure whether Word64.scan will work +as specified with respect to the interaction of radix and prefix.

    +
    + +
    +
    +
    +
    +
    +
    [2005/05/04]
    +
    +
    +

    Added a gc protocol checking phase. This phase is enabled with +the flag "check-gc". "debug-check-gc" turns on the verbose mode.

    +
    + +
    +
    +
    +
    +
    +
    [2005/05/04]
    +
    +
    +

    Fixed a bug in the implementation of div and mod for IntInf. +Thanks to Neophytos Michael for reporting the problem.

    +
    + +
    +
    +
    +
    +
    +
    [2005/05/04]
    +
    +
    +

    Added the join combinator to the ParserComb module in the +SML/NJ Library.

    +
    + +
    +
    +
    +
    +
    +
    [2005/02/28]
    +
    +
    +

    Fixed serious bug (brown paper bag variety) in new implementation of +structure Atom in CML. (I had accidentally used a mailbox instead of +an mvar, leaving the door open for races.)

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.53; 2005/02/25

    +
    +
    +
    +
    [2005/02/25]
    +
    +
    +

    Brought back SMLofNJ.Susp. The underlying suspension type is the one +implemented in Core, which means that it is the same as the one used +by the lazy extension.

    +
    + +
    +
    +
    +
    +
    +
    [2005/02/24]
    +
    +
    +

    Simpler and at the same time more general implementation of structure +Atom in CML.

    +
    + +
    +
    +
    +
    +
    +
    [2005/02/15]
    +
    +
    +

    Created new “tools” directory under “src” and moved “TraceDebugProf” +there.

    +
    + +
    +
    +
    +
    +
    +
    [2005/02/10]
    +
    +
    +

    Implemented “long long” arguments and results for NLFFI. (Only the +PPC/MacOS implementation is complete, the other backends still need to +be updated.)

    +
    + +
    +
    +
    +
    +
    +
    [2005/01/24]
    +
    +
    +

    Minor cleanup in ML-Yacc rule printing mechanism. This should fix a +problem with certain "as" patterns which previously got rendered +using incorrect syntax.

    +
    + +
    +
    +
    +
    +
    +
    [2005/01/18]
    +
    +
    +

    Made time profiling code (interrupt handler) in runtime system aware +of new array representation.

    +
    + +
    +
    +
    +
    +
    +
    [2005/01/14]
    +
    +
    +

    Implemented new (but still experimental) heap2exec facility. This is +tested under Mac OS X and should work under Linux (will test shortly). +It will probably also work on the Sparc (will test some time later). +Also removed old “HACKED_STANDALONE” hack from runtime

    +
    +
    +

    To be able to test heap2exec, uncomment the request for “heap2asm” in +config/targets prior to installation. (Notice that this is different +from "heap2exec" mentioned below. Not a typo.)

    +
    +
    +

    To perform an actual test, run the command

    +
    +
    +
    +
    $ bin/heap2exec heapfile execfile
    +
    +
    +
    +

    (You can put heap2exec on your shell’s path.)

    +
    +
    +

    For example, run

    +
    +
    +
    +
    $ bin/heap2exec bin/.heap/ml-yacc.ppc-darwin mly
    +
    +
    +
    +

    This will create a standalone executable called “mly” that you can +then invoke directly as a command.

    +
    + +
    +
    +
    +
    +
    +
    [2005/01/07]
    +
    +
    +

    fixed off-by-one error in ML_STRING macro (globals.c)

    +
    + +
    +
    +
    +
    +
    +
    [2004/12/23]
    +
    +
    +

    Made ml-build script "smarter" (but only very little).

    +
    + +
    +
    +
    +
    +
    +
    [2004/12/21]
    +
    +
    +
      +
    • +

      Implemented access to signed and unsigned long long data in NLFFI. +(The parameter-passing part of the picture has not complete. But +data structure access seems to work.)

      +
    • +
    • +

      Fixed CM's incorrect assumption that the PPC is little-endian. +(On the Mac, it is big-endian. And that’s currently our only +PPC platform.)

      + +
    • +
    +
    +
    +
    +
    +
    +
    +
    [2004/12/21]
    +
    +
    +

    Some cleanup in the $c/memory.cm library: separated some concerns by +moving allocation code and memory access code each into their own +files.

    +
    + +
    +
    +
    +
    +
    +
    [2004/12/17]
    +
    +
    +

    The Unix I/O library of SML/NJ on Cygwin does not understand +Windows style pathname, so problems arise when SMLNJ_HOME is set +to a Windows style pathname. The _run-sml script now +converts SMLNJ_HOME to a POSIX pathname on Cygwin.

    +
    + +
    +
    +
    +
    +
    +
    [2004/12/16]
    +
    +
    +

    Last-minute changes incorporated into 110.52. Release tag moved.

    +
    +
    +

    The changes: + - HashString.hashString'HashString.hashSubstring + - bug fix in UnivariateStats

    +
    + +
    +
    +
    +
    +
    +
    [2004/12/15]
    +
    +
    +
      +
    • +

      +HashString.hashString' → HashString.hashSubstring

      +
    • +
    • +

      corresponding changes in atom.sml

      +
    • +
    • +

      "de-compressed" (aka. un-obfuscated) code for UnivariateStats and +added some comments

      + +
    • +
    +
    +
    +
    +
    +
    +
    +
    +

    Version 110.52; 2004/12/15

    +
    +
    +
    +
    [2004/12/15]
    +
    +
    +

    More on the space problem (this time for Win32).

    +
    + +
    +
    +
    +
    +
    +
    [2004/12/14]
    +
    +
    +

    Hacked some of the scripts (in particular: the installer) to cope with +spaces in filenames a bit better. But beware: the current "solution" +is likely still full of bugs and inherently incomplete. (We need to +do away with those shell scripts for a comprehensive solution.)

    +
    + +
    +
    +
    +
    +
    +
    [2004/12/13]
    +
    +
    +

    Fixed bug in code for ml-makedepend.

    +
    + +
    +
    +
    +
    +
    +
    [2004/12/09]
    +
    +
    +

    Added two simple but potentially useful statistics modules to SML/NJ Library. +(See CHANGES file there.)

    +
    + +
    +
    +
    +
    +
    +
    [2004/12/01]
    +
    +
    +

    Updates to SML/NJ Library

    +
    +
    +
    +
    +
      +
    • +

      Added function HashString.hashString' for substrings.

      +
    • +
    • +

      Hand-inlined CharVector.foldl into HashString (for speed).

      +
    • +
    • +

      Modified implementation of structure Atom to avoid extracting +strings from substrings unless necessary.

      +
    • +
    +
    +
    +
    +
    +

    (Also see CHANGES file for smlnj-lib.)

    +
    + +
    +
    +
    +
    +
    +
    [2004/11/24]
    +
    +
    +

    Made sure CML compiles when the Position structure is Int64.

    +
    + +
    +
    +
    +
    +
    +
    [2004/11/24]
    +
    +
    +

    The compiler can now be compiled in a mode that makes structure Position +equal to Int64. The default, however, is unchanged (Position is Int31) +for the time being.

    +
    +
    +

    To enable 64-bit positions, use the following procedure:

    +
    +
    +
    +
    +
      +
    1. +

      Start sml

      +
    2. +
    3. +

      Autoload $smlnj/cmb.cm (if not already autoloaded)

      +
    4. +
    5. +

      Type

      +
      +
      +
      #set (CMB.symval "USE_64_BIT_POSITIONS") (SOME 1);
      +
      +
      +
    6. +
    7. +

      Run CMB.make() as usual.

      +
    8. +
    +
    +
    +
    +
    +

    This is barely tested. The only test so far was a little SML program +counting the number of characters in an 8-gigabyte file by +reading it character-by-character. That test was successful.

    +
    +
    +

    In support of 64-bit positions, a number of new functions have been +added to the runtime system.

    +
    + +
    +
    +
    +
    +
    +
    [2004/11/23]
    +
    +
    +

    Fixed a problem with unhelpful error messages related to problems with +.cm or .sml files that appear as part of the sml command line.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.51; 2004/11/18

    +
    +
    +
    +
    [2004/11/18]
    +
    +
    +

    Enabled dlopen and friends for FreeBSD (as recommended by Johannes 5 +Joemann).

    +
    + +
    +
    +
    +
    +
    +
    [2004/11/17]
    +
    +
    +

    Added support for MLTree constructs LIVE and KILL +to all the architectures.

    +
    + +
    +
    +
    +
    +
    +
    [2004/11/13]
    +
    +
    +
      +
    • +

      Stripped down the versiontool: It now only handles the version number. +The date string is generated at bootstrap time (during makeml).

      +
    • +
    • +

      In a previous commit, fixed a minor issue with how polyequal is being +translated. In particular, the code now "looks through" abstractions. +This results in slightly fewer polyEqual warnings and hopefully slightly +more efficient code. Important examples for where this matters are +the new int64 and word64 types.

      + +
    • +
    +
    +
    +
    +
    +
    +
    +
    [2004/11/12]
    +
    +
    +

    Structure Int64 fully hooked in. (The implementation is not very +efficient, though.)

    +
    + +
    +
    +
    +
    +
    +
    [2004/11/11]
    +
    +
    +

    All the pieces of Word64 are now there, with the exception of the +conversions from and to LargeWord. (Eventually these need to be identities, +but for the time being they don’t even make sense because LargeWord is +32-bit wide.)

    +
    +
    +

    Also started to add similar support for Int64, but major pieces of that +are still missing.

    +
    + +
    +
    +
    +
    +
    +
    [2004/11/11]
    +
    +
    +

    Structure Word64 is now (almost) complete, word literals and patterns +seem to work. There are a few odd pieces missing. In particular, +I didn’t do the {from,to}`LargeWord` parts because LargeWord +is still Word32 at the moment.

    +
    +
    +

    Making Word64 official would mean that LargeWord becomes Word64. But +this requires extreme care because most word-word conversions have to +go through LargeWord, so making a mistake means loss of efficiency or +worse. Eventually there will be a solution similar to (but actually +simpler than) what I did with IntInf.

    +
    + +
    +
    +
    +
    +
    +
    [2004/11/10]
    +
    +
    +

    More 64-bit hacking (but still not even half-way there yet). +Also, some assorted improvements to the handling of 8-bit words.

    +
    + +
    +
    +
    +
    +
    +
    [2004/11/09]
    +
    +
    +

    Started adding some infrastructure for supporting 64-bit int- and +word-types. (Still in its very early stages.)

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.50; 2004/10/28

    +
    +
    +
    +
    [2004/10/28]
    +
    +
    +
      +
    • +

      Changend config/srcarchiveurl from a file just +containing the URL string into a file containing +shell script code. The code has access to the $VERSION variable.

      +
    • +
    • +

      Made corresponding changes to config/install.sh and config/unpack.

      +
    • +
    • +

      Default contents of config/srcarchiveurl uses $VERSION and +normally does not have to be edited to reflect a version change.

      +
      +

      (As a result, a version change can be done by just editing +config/version, the rest is now automatic.)

      +
      + +
    • +
    +
    +
    +
    +
    +
    +
    +
    [2004/10/27]
    +
    +
    +

    BackTrace.monitor now also reports the source of the exception that +triggered the trace.

    +
    + +
    +
    +
    +
    +
    +
    [2004/10/27]
    +
    +
    +

    This is the HISTORY entry for two earlier commits, both concerning +the x86 c-calls code in MLRISC:

    +
    +
    +
      +
    • +

      added a missing LOAD in the code that deals with struct arguments

      +
    • +
    • +

      made sure the caller does not add the wrong number of bytes to the +stack pointer after a call of a function returning a struct +(the callee already pops the implicit argument which points to + the space reserved for the result)

      + +
    • +
    +
    +
    +
    +
    +
    +
    +
    [2004/10/24]
    +
    +
    +

    John discovered a bug in the syntax of fucomip. +The opcodes FU?COMIP? have been changed to

    +
    +
    +
    +
    fu?comip? %st(i), %st
    +
    +
    + +
    +
    +
    +
    +
    +
    [2004/10/20]
    +
    +
    +

    Added a mechanism for getting back-trace information from standalone +programs. Here is how it works:

    +
    +
    +
    +
    +
      +
    1. +

      The part of the program from which you want to get backtrace +information (usually the whole program) should be wrapped with +BackTrace.monitor. This is a (unit→'a)→'a function, and your +main program could be modified from something like

      +
      +
      +
      fun main (pgm, args) = ...
      +
      +
      +
      +

      to

      +
      +
      +
      +
      fun main (pgm, args) = BackTrace.monitor (fn () => ...)
      +
      +
      +
    2. +
    3. +

      To be able to access BackTrace.monitor, you have to add the +library $smlnj-tdp/plugins.cm to the .cm file that contains your +main function.

      +
    4. +
    5. +

      Remove all compiled code (i.e., all the .cm/ subdirectories that +CM might have created in the past for your project).

      +
    6. +
    7. +

      Build the system using this command line:

      +
      +
      +
      ml-build -Ctdp.instrument=true \$smlnj-tdp/back-trace.cm myprog.cm MyProg.main myprog
      +
      +
      +
      +

      instead of the usual

      +
      +
      +
      +
      ml-build myprog.cm MyProg.main myprog
      +
      +
      +
    8. +
    +
    +
    +
    +
    +

    I changed the library name $/trace-debug-profile.cm to $smlnj-tdp/plugins.cm, +and added the following new libraries:

    +
    +
    +
    +
    +
    +
    $smlnj-tdp/back-trace.cm
    +
    +
    +

    when loaded causes the back-trace plugin to be installed

    +
    +
    +
    $smlnj-tdp/coverage.cm
    +
    +
    +

    when loaded causes the coverage plugin to be installed

    +
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    [2004/10/18]
    +
    +
    +

    Added an "obsolete" warning for the "group owner" syntax to CM's parser.

    +
    +
    +

    Eliminated group owner specs from .cm files throughout the source tree.

    +
    + +
    +
    +
    +
    +
    +
    [2004/10/15]
    +
    +
    +
    +
    +
      +
    • +

      Test coverage tool added!

      +
    • +
    • +

      Further reorganization of tracing-, debugging-, and profiling support:

      +
      +
        +
      • +

        moved original BTImp — now called BackTrace — into a separate +library called $/trace-debug-profile.cm

        +
      • +
      • +

        eliminated all mentions of BTrace from SMLofNJ.Internals

        +
      • +
      • +

        only the instrumentation mechanism is now left in the compiler proper

        +
      • +
      • +

        BackTrace module is a plugin which is NOT plugged in by default

        +
      • +
      • +

        Coverage module is another such plugin

        +
      • +
      +
      +
    • +
    +
    +
    +
    +
    +

    To get the benefits of any of these plugin modules, the code in +question must be compiled with TDP instrumentation turned on. This +can be done by setting SMLofNJ.Internals.TDP.mode to true. +(The ref cell is also controlled via the -Ctdp.instrument=…​ switch.)

    +
    +
    +

    Plugins are selected at link time. (Pre-compiled instrumented code +can be re-loaded with different plugins in effect.) When an +instrumented module is linked, whatever plugins are at that time +enabled will come into effect for that module.

    +
    +
    +

    To enable the back-trace plugin, load library $/trace-debug-profile.cm +and invoke BackTrace.install() (e.g., from the interactive prompt). +To enable the coverage plugin, load the same library and invoke +Coverage.install().

    +
    +
    +

    Back-traces are generated automatically on uncaught exceptions and +when the code in question explicitly invokes BackTrace.trigger().

    +
    +
    +

    Coverage (and execution frequency-) information must be queried +explicitly by calling Coverage.not_covered and Coverage.hot_spots.

    +
    + +
    +
    +
    +
    +
    +
    [2004/10/14]
    +
    +
    +

    Snapshot of a significant overhaul of how the trace/debug/profile support +is hooked into the system (specifically: Core and SMLofNJ.Internals).

    +
    + +
    +
    +
    +
    +
    +
    [2004/10/13]
    +
    +
    +

    Some rationalization of names:

    +
    +
    +
    +
    structure BTrace -> structure TDPInstrument
    +etc.
    +
    +
    +
    +

    This is is preparation of using the original back-trace +instrumentation for other purposes. "TDP" stands for +Trace/Debug/Profile.

    +
    +
    +

    The control flag controlling whether instrumentation is on or off is now +registered under a different name, so instead of running sml as

    +
    +
    +
    +
    sml -Cinstrument.btrace-mode=true
    +
    +
    +
    +

    one has to say

    +
    +
    +
    +
    sml -Ctdp.instrument=true
    +
    +
    + +
    +
    +
    +
    +
    +
    [2004/10/11]
    +
    +
    +

    Made some minor modifications to elabcore.sml to have source regions +be propagated more tightly — resulting in better (i.e., smaller) +regions being reported in error- and debug messages.

    +
    + +
    +
    +
    +
    +
    +
    [2004/10/08]
    +
    +
    +

    Fixed handling of keywords in .cm files: After seeing "is" the lexer +treats subsequent occurrences of "group", "library", "source", "is", +"*", and "-" as ordinary identifiers rather than keywords.

    +
    +
    +

    Most seriously, this fixes a problem with CM’s "shell" tool. The tool +is supposed to accept a tool argument called "source", but this did +not work because of the clash with the keyword.

    +
    + +
    +
    +
    +
    +
    +
    [2004/10/07]
    +
    +
    +

    Assorted cleanup work:

    +
    +
    +
      +
    • +

      got rid of intstrmap in favor of using the library’s +hash table implementation

      +
    • +
    • +

      threw out most of the pathnames stuff, as it was not used anyway

      +
    • +
    • +

      simplified tokentable implementation

      +
    • +
    • +

      fixed some minor spelling errors

      + +
    • +
    +
    +
    +
    +
    +
    +
    +
    [2004/10/06]
    +
    +
    +

    Cleaned up the absyn to reflect the invariant that HANDLE always +carries a FNexp as part of the type definition. This eliminates some +superfluous sanity checks at runtime down the road.

    +
    +
    +

    Some minor cleanup of the btrace code.

    +
    + +
    +
    +
    +
    +
    +
    [2004/10/01]
    +
    +
    +

    Added hack to make slave mode work in the presence of the version +tool. (Still, since the master does two passes over the code for +CMB.make, the release number gets bumped twice when slaves are +attached. I don’t know if this is worth fixing…​)

    +
    + +
    +
    +
    +
    +
    +
    [2004/09/30]
    +
    +
    +
      +
    • +

      Moved the "version" magic into its own little library under +src/system/smlnj/internal. This avoids expensive reconstruction of +a stable src/compiler/core.cm.

      +
    • +
    • +

      At the same time, structure CompilerVersion is now known as +structure SMLNJVersion.

      +
    • +
    • +

      Arranged for the version tool to NOT kick in when rebuilding the system +(makeml -rebuild, fixpt). Otherwise one would never reach a fixpoint. +Also, loading the versiontool does not work when rebuilding the system +because CM is not properly initialized at that time.

      + +
    • +
    +
    +
    +
    +
    +
    +
    +
    [2004/09/29]
    +
    +
    +

    Implemented some CM magic to have + file src/compiler/TopLevel/main/version.sml +generated automagically. +The version is taken from two files: config/version and config/release. +The first is expected to contain a two-part version number such as 110.49. +The second should contain a single number, but it may be missing.

    +
    +
    +

    If the environment variable VERSIONTOOL_BUMP_RELEASE is defined at the +time the version tool is loaded (which is the first time you say CMB.make), +then the tool will increment the value stored in config/release every +time CMB.make is invoked.

    +
    +
    +

    The binfile format is now insensitive to anything beyond the first +two components of a version number, so bumping the release does not render +binfiles incompatible. Auto-bumping can be used to keep track of versions +during development without invalidating existing binfiles.

    +
    +
    +

    In any case, every CMB.make updates the date information in version.sml. +(This is the date that is printed in the banner.)

    +
    + +
    +
    +
    +
    +
    +
    [2004/09/28]
    +
    +
    +

    Some cleanup of the controls code.

    +
    + +
    +
    +
    +
    +
    +
    [2004/09/27]
    +
    +
    +

    Added two pieces of functionality to the Controls interface:

    +
    +
    +
    +
    val save'restore: 'a control -> unit -> unit
    +
    +
    +
    +

    grabs the current value of the control in stage 1 and restores it +in stage 2.

    +
    +
    +
    +
    val set' : 'a control * 'a -> unit -> unit
    +
    +
    +
    +

    stores the given value into the control in stage 2 (i.e., delayed) +but does all error checking in stage 1. +(This is for string controls that need to do parse their argument — something that might fail. In some cases, notably in CM, one +already knows the intended argument but wants to delay the actual +assignment until a time when error recovery would be more difficult.)

    +
    +
    +

    Also changed the handling of controls in tool arguments to classes “sml” and +“lazysml”:

    +
    +
    +
    +
    +
      +
    • +

      use Controls.save’restore as a more robust way of restoring the +old value (in particular: without having to re-parse the string)

      +
    • +
    • +

      use controls to handle the “overload” keyword in the init group +(I believe this change actually fixes a long-standing obscure bug.)

      +
    • +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    [2004/09/27]
    +
    +
    +

    Added a new tool class called “lazysml” to CM’s tool chest. The only +difference to “sml” is that compilation is done with Control.lazysml +set to true. A source of class “lazysml” is automatically recognized +by a file name suffix of “.lml”.

    +
    +
    +

    In addition to the above feature, the original class “sml” now also +supports a tool argument “lazy” which has the same effect. As a +result, the following three lines are equivalent:

    +
    +
    +
    +
        foo.sml : lazysml
    +    foo.sml : sml (lazy)
    +    foo.sml (lazy)
    +
    +
    +
    +

    The setting goes into effect both during parsing and during +compilation. The original setting is restored right after parsing and +after compilation, respectively.

    +
    +
    +

    In addition to all the above, there is also a general mechanism to set +ANY of the "controls" that are available at the command line via +“-C…​” on a per-sml-file basis. The same rules that apply for “lazy” +apply as well. (In fact, “lazy” is implemented as a special case of +the general mechanism.)

    +
    +
    +

    The .cm file syntax uses a new keyword tool argument called “with”. +There are several ways of indicating the desired settings:

    +
    +
    +
    +
        foo.sml (with:parser.quotations=true)
    +    foo.sml (with:(name:parser.quotations value:true))
    +    foo.sml (with:(name:name1 value:value1 name:name2 value:value2 ...))
    +    foo.sml (with:(name1=value1 name2=value2 ...))
    +    foo.sml (with:(name1=value1 name:name2 value:value2 name3=value3 ...))
    +
    +
    +
    +

    Another possible abbreviation is to leave out the =v or value:v part +if the name refers to a boolean control (in which case the value is +taken to be true). Thus, one could get lazy sml also by saying:

    +
    +
    +
    +
        foo.sml (with:parser.lazy-keyword=true)
    +    foo.sml (with:parser.lazy-keyword)
    +    foo.sml (with:(name:parser.lazy-keyword value:true))
    +    foo.sml (with:(name:parser.lazy-keyword))
    +
    +
    + +
    +
    +
    +
    +
    +
    [2004/09/24]
    +
    +
    +

    Turned message about "emiting long form of branch" off by default. +Added a control flag to turn it back on when desired.

    +
    + +
    +
    +
    +
    +
    +
    [2004/09/24]
    +
    +
    +

    Applied patch for setting rounding modes under Mac OS X. Thanks to +Melissa O’Neill for providing the code!

    +
    + +
    +
    +
    +
    +
    +
    [2004/09/23]
    +
    +
    +
    +
    +
      +
    1. +

      Changed definition of type ControlRegistry.registry_tree to +include control_info (i.e., the name of the controlling environment +variable).

      +
    2. +
    3. +

      Added command-line flags -e and -E to print the names of +environment variables that can be used to control internal settings. +(This uses the new API mentioned in 1.)

      +
    4. +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.49; 2004/09/13

    +
    +
    +
    +
    [2004/09/13]
    +
    +
    +

    Put target “mlrisc” back into the default list. +(There is no harm in having it, and some users have expressed their +wish to have “mlrisc” included by default.)

    +
    + +
    +
    +
    +
    +
    +
    [2004/09/13]
    +
    +
    +

    Fixed the signal masking code to properly nest mask/unmask operations +on a per-signal basis.

    +
    + +
    +
    +
    +
    +
    +
    [2004/09/08]
    +
    +
    +

    Bumped the heap magic number to 0x09082004 to account for the changed +layout of the ML frame under Mac OS X.

    +
    + +
    +
    +
    +
    +
    +
    [2004/09/03]
    +
    +
    +

    Added a patch to _arch-n-opsys to enable the Cygwin runtime. +The Cygwin runtime is turned on by setting the environment +variable SMLNJ_CYGWIN_RUNTIME to 1.

    +
    + +
    +
    +
    +
    +
    +
    [2004/08/31]
    +
    +
    +

    Added some exports to src/compiler/core.cm upon request by J. Joemann.

    +
    + +
    +
    +
    +
    +
    +
    [2004/08/30]
    +
    +
    +

    Upon request by Johannes Joemann:

    +
    +
    +
    +
    +
      +
    • +

      improved ML code of installer to fall back to coping when renaming +fails (i.e., when source and target are on different file systems); +the code compiles but has yet to be tested in anger

      +
    • +
    • +

      removed mlrisc from list of default targets (config/targets)

      +
    • +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    [2004/08/27]
    +
    +
    +

    Added ptreql primop to structure InlineT (upon request from Larry +Paulson).

    +
    + +
    +
    +
    +
    +
    +
    [2004/08/15]
    +
    +
    +

    Another bug fix from Carl Hauser:

    +
    +
    +
    +
    diff /net/niflab/smlnj48/src/MLRISC/graphs/udgraph.sml udgraph.sml
    +> 48c48
    +> <              | rmv((e as (k,_))::es,L) = rmv(es,if k = i then es else
    +> e::L)
    +> ---
    +> >              | rmv((e as (k,_))::es,L) = rmv(es,if k = i then L else e::L)
    +
    +
    +
    +

    Without this, any deletion of an edge in an undirected graph does severe +violence to the graph.

    +
    + +
    +
    +
    +
    +
    +
    [2004/08/10]
    +
    +
    +

    The IBM/MacOS syntax switch on PPC was incorrectly swapped.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.48; 2004/08/10

    +
    +
    +
    +
    [2004/08/09]
    +
    +
    +

    Bug fix from Carl Hauser:

    +
    +
    +

    single_source_shortest_paths in dijkstra.sml was observed to get wrong +answers (by comparing to single_source_shortest_paths in bellman-ford.sml).

    +
    +
    +

    The problem is that following the expression A.update(dist,s,Num.zero) +it is necessary to update the priority queue using Q.decreaseWeight(Q,s).

    +
    + +
    +
    +
    +
    +
    +
    [2004/08/06]
    +
    +
    +

    Fiddled with handling of command-line options:

    +
    +
    +
    +
    +
      +
    • +

      sml now quits after processing the command line +if -H, -S, -h<n>, or -s<n> appears as the last +command-line argument

      +
    • +
    • +

      a new option -q terminates the session when encountered on +the command line; subsequent arguments will be ignored

      +
    • +
    • +

      bug fixes: short (erroneous) arguments are no longer ignored +completely

      +
    • +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    [2004/08/04]
    +
    +
    +
    +
    +
      +
    • +

      Added minimal IBM assembly syntax support for PowerPC.

      +
    • +
    • +

      Cygwin: manually changed the file cygwin.def. Some exported symbols have +been altered in the runtime. We need an automatic way to keep the file +in sync.

      +
    • +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.47; 2004/08/04

    +
    +
    +
    +
    [2004/08/03]
    +
    +
    +

    Added low-level support for choosing C calling conventions by +twiddling the type of rawccall. (See +src/compiler/Semant/types/cproto.sml for details.)

    +
    + +
    +
    +
    +
    +
    +
    [2004/08/02]
    +
    +
    +

    Backed out of change to win32-filesys.c. The earlier patch to +get_file_time caused CM to produce files with the wrong time stamp.

    +
    + +
    +
    +
    +
    +
    +
    [2004/08/02]
    +
    +
    +

    Added NLFFI support for Win32, adapted from a patch provided by David +Hansel. This is currently completely untested. Also, the issue +concerning stdcall vs. ccall is still unresolved.

    +
    + +
    +
    +
    +
    +
    +
    [2004/07/30]
    +
    +
    +

    Gearing up towards 110.47…​

    +
    +
    +
    +
    +
      +
    • +

      various minor bugfixes to ml-nlffigen

      +
    • +
    • +

      a beginning of a manual for nlffi

      +
    • +
    • +

      eliminated 'export name=value' in config/install.sh as this does +not work with certain versions of /bin/sh +(Thanks to David King at Motorola for catching this.)

      +
    • +
    • +

      several bugfixes provided or suggested by David Hansel at Reactive Systems:

      +
      +
        +
      • +

        added a test for tm==NULL to gmtime.c and localtime.c

        +
      • +
      • +

        applied patch for incorrect GetFileTime under win32

        +
      • +
      • +

        toSecondstoMilliseconds in Win32/win32-process.sml

        +
      • +
      +
      +
    • +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    [2004/07/21]
    +
    +
    +
    +
    +
      +
    • +

      Fixed minor issue in ml-nlffigen: Now generate structure T_foo +for a typedef to an incomplete type, but leave out the “typ” member. +(This is just for consistency.)

      +
    • +
    • +

      Started to produce what is supposed to become better (i.e., comprehensive) +documentation of what ml-nlffigen does and produces.

      +
    • +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    [2004/07/14]
    +
    +
    +

    Added C_UNION to c-calls/c-types.sml and updated the machinery +(ml-nlffigen, cproto.sml) that conveys C function interface +information to the code generator.

    +
    +
    +

    However, the actual architecture-specific implementation of function +arguments and results that are C unions is still not implemented.

    +
    + +
    +
    +
    +
    +
    +
    [2004/07/14]
    +
    +
    +

    Added these instructions to the PowerPC architecture: + LBZU(X), LHZU(X), LWZU(X), + STWU(X), STFDU, STFSU, +etc…​

    +
    +
    +

    Note: I haven’t added their instruction encoding into the description.

    +
    + +
    +
    +
    +
    +
    +
    [2004/07/13]
    +
    +
    +

    Added the two instructions LWARX and STWCX to the PowerPC +instruction set.

    +
    +
    +

    A (untested) rewrite of loop-structure.sml. The old version +is completely broken.

    +
    + +
    +
    +
    +
    +
    +
    [2004/07/13]
    +
    +
    +
    +
    +
      +
    • +

      use paramAlloc to report c-calls with too many arguments +(for PPC version where parameter area is pre-allocated)

      +
    • +
    • +

      added ccall_maxargspace to machspec (to implement the above)

      +
    • +
    • +

      made "make" commend in CM’s "make" tool configurable

      +
    • +
    • +

      added option (default: on) for passing the name of the SML/NJ’s "bin" +directory to "make"; the call looks like this:

      +
      +
      +
      make <options> SMLNJ_BINDIR=<dir> <target>
      +
      +
      +
      +

      This can be used by the Makefile to, e.g., pick the "right" version +of ml-nlffigen.

      +
      +
    • +
    • +

      minor code tweaks

      +
    • +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.46.1; 2004/07/12

    +
    +
    +
    +
    [2004/07/12]
    +
    +
    +

    NLFFI under Mac OS X now working (sort of). This is largely untested, +though.

    +
    +
    +
    Note:
    +
    +
    +
      +
    1. +

      You have to make a new, clean build of the runtime system.

      +
    2. +
    3. +

      There are new BOOTFILES, you have to use them! +(Doing the bootstrap process yourself would be very painful! + If you absolutely have to do it, build the system under + a different architecture and then cross-compile.)

      +
    4. +
    +
    +
    +
    +
    +

    Version bumped to 110.46.1 to account for runtime data format changes.

    +
    + +
    +
    +
    +
    +
    +
    [2004/06/18]
    +
    +
    +

    Changed the implementation of structure Unix so that the same stream +is returned every time one of the {text,bin}{In,Out}`streamOf` +functions is invoked on the same proc. This is not what the spec currently +says — although IMO it arguably should. (See discussion below.)

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.46; 2004/06/17

    +
    +
    +
    +
    [2004/06/17]
    +
    +
    +

    Changed the interface of structures Timer and Unix to match the most +recent Basis spec.

    +
    +
    +

    In the case of Unix there still seems to be an open/weird issue:

    +
    +
    +
    +
    The {text,bin}{In,Out}streamOf functions are supposed to create
    +fresh streams whenever they are called -- as opposed to have them
    +return the same stream every time.  This design is supposed to
    +prevent space leaks caused by proc values hanging on to streams.
    +
    +
    +
    +
    +
    The reap function, on the other hand, is supposed to close the
    +streams.  This cannot be done without having a handle on the
    +stream in proc after all...
    +
    +
    +
    +

    I took the liberty to implement the following stopgap solution:

    +
    +
    +
    +
    The proc value hangs on to the most recently created stream(s).
    +Reap closes those.  If either or both of the two streams hadn't
    +been created at all yet, then reap will close the corresponding
    +file descriptors directly.
    +
    +
    +
    +

    PS: I don’t understand the original space leak argument anymore. If +a proc hangs on to the imperative stream, then I/O operations on those +will advance the state of the cached stream and avoid the space leak.

    +
    + +
    +
    +
    +
    +
    +
    [2004/05/28]
    +
    +
    +

    Added signature PACK_REAL and exported functor PrimIO.

    +
    + +
    +
    +
    +
    +
    +
    [2004/05/25]
    +
    +
    +

    CM now ignores (but still accepts) the "owner" information in group +descriptions. The owner of a group is its next enclosing +library. Each group must have a unique owner. (There is a virtual +"toplevel" library that owns groups which are not nested within a real +library.) Previously, each group had to explicitly declare its owner, +and CM would check that such a declaration is correct. The new scheme +is to have CM check that for each group there is precisely one owning +library.

    +
    +
    +

    The advantage of the new scheme is that the programmer no longer needs +to maintain the somewhat annoying owner information. The downside is +that CM cannot enforce the ownership rule across multiple runs of +CM.make. Fortunately, enclosing the same group in two different +libraries A and B which are not part of the same program does not +cause real problems.

    +
    + +
    +
    +
    +
    +
    +
    [2004/05/20]
    +
    +
    +

    Made the win32 version work again. (Strangely, a misplaced comma had +slipped into win32-process.c which prevented the runtime from being +compiled correctly.)

    +
    +
    +

    Also, included a minor addition to ml-build.bat analogous to what was +done in blume-20040519-ml-build.

    +
    + +
    +
    +
    +
    +
    +
    [2004/05/19]
    +
    +
    +

    Arranged for ml-build to clean up after itself a little bit better. +The script generates a temporary SML source file and compiles it using +CM, so CM generates metadata (GUID, SKEL, objectfile) for it. It now +gets rid of those at the end, so they don’t accumulate under .cm.

    +
    +
    +

    This required a minor change to install.sh because the name of the +metadata directory (default: .cm) is actually configurable at +installation time.

    +
    + +
    +
    +
    +
    +
    +
    [2004/05/18]
    +
    +
    +

    Added Posix.IO.mk{Bin,Text}{Reader,Writer} by lifting their respective +implementations from internal modules PosixBinPrimIO and PosixTextPrimIO.

    +
    + +
    +
    +
    +
    +
    +
    [2004/05/11]
    +
    +
    +

    Added previously missing support for many socket-related functions +under win32. Thanks to David Hansel <hansel@reactive-systems.com> +for the voluminous patch!

    +
    +
    +

    (I have not tested this patch under win32 yet.)

    +
    +
    +

    Here is David’s e-mail:

    +
    +
    +

    Hi,

    +
    +
    +

    Attached to this email you find a diff against sml/nj 110.45 +that will enable socket support under Windows.

    +
    +
    +

    To apply the patch (using unix or cygwin) +1) gunzip runtime.diff.gz +2) "cd" into "src/runtime" in the source tree of a fresh + 110.45 installation. +3) patch -p 1 < [your/path/to]runtime.diff

    +
    +
    +

    The code compiles fine but has NOT yet been extensively tested. +I only ran a few tests for basic socket client functionality +(which worked fine). Especially the functions that use ioctl +are not tested at all and might not work (see below).

    +
    +
    +

    I implemented this since we want to move to a newer version of sml/nj +but need socket support in order to use it. This is the first time I +even had a look at the sml/nj source, so please review my changes +before making this part of the distribution! Here are a few issues +that I think might be better for someone to solve who is more +familiar with the sml/nj source (and socket programming):

    +
    +
    +
      +
    • +

      getnetbyaddr.c and getnetbyname.c will raise a "not implemented" +exception since I could not figure out what the windows equivalent +of these functions is

      +
    • +
    • +

      In sockets-osdep.h there are a some #include statements that are +only used in a few files that include sockets-osdep.h

      +
    • +
    • +

      In smlnj-sock-lib.c, function init_fn() calls WSAStartup() but +does not process its return value since I don’t know how to +report an error upwards.

      +
    • +
    • +

      It would probably be good to have a call to WSACleanup() when +the library is unloaded (if there is such a possibility). +Otherwise I think Windows will take care of this automatically +when the process finishes.

      +
    • +
    • +

      I used ioctlsocket() as a replacement for ioctl() but I have +no idea if that is actually the proper replacement on Windows.

      +
    • +
    • +

      All these issues are marked in the code by "FIXME" comments.

      +
      +

      We use sml/nj extensively in our products and are quite happy +with it. I hope this contribution will help you.

      +
      +
      +

      Keep up the good work!

      +
      +
      +

      David

      +
      + +
    • +
    +
    +
    +
    +
    +
    +
    +
    [2004/05/11]
    +
    +
    +

    Fixed two bugs in installml script. (Thanks to Vesa A. Norrman for +the patch.)

    +
    + +
    +
    +
    +
    +
    +
    [2004/05/11]
    +
    +
    +

    Added support for nlffi under netbsd. (Thanks to Vesa A. Norrman for +the patch.)

    +
    + +
    +
    +
    +
    +
    +
    [2004/05/11]
    +
    +
    +

    As per request by Adam Chlipala <adam@hcoop.net>, extended various +export lists in compiler-related .cm-files.

    +
    + +
    +
    +
    +
    +
    +
    [2004/05/11]
    +
    +
    +

    The installer now honors the "src-smlnj" target again, although its meaning +has changed from "all sources required for the compiler" to "all sources +the installer knows about". In other words, if you enable "src-smlnj" +in the "targets" file, then the installer will pull in sources for +everything. (Notice that this refers to source code only. Compiled +code is still only installed for modules that were requested explicitly +or which are required for other modules that were requested explicitly.)

    +
    + +
    +
    +
    +
    +
    +
    [2004/04/23]
    +
    +
    +

    Fixed IEEEReal.scan (and .fromString) so that if there is an overflow +in the exponent calculation we get INF or ZERO (depending on the mantissa +and the sign of the exponent).

    +
    + +
    +
    +
    +
    +
    +
    [2004/04/23]
    +
    +
    +

    The ml-build script now terminates with a non-0 status when something +goes wrong.

    +
    + +
    +
    +
    +
    +
    +
    [2004/04/22]
    +
    +
    +

    Made exception Option to be the same as exception Option.Option +(as it should be).

    +
    + +
    +
    +
    +
    +
    +
    [2004/03/19]
    +
    +
    +

    Fixed the runtime so that ml-nlffi-lib runs on the cygwin version +of SML/NJ. The problem is that

    +
    +
    +
    +
    lib = dlopen(NULL, ...)
    +f   = dlsym(lib, "malloc");
    +
    +
    +
    +

    does not work on Windows unless we explicitly export symbols +such as 'malloc' during linking. We fixed this by explicitly +exporting the required symbols with the magic gcc incantation:

    +
    +
    +
    +
    -Wl,--export-all cygwin.def
    +
    +
    +
    +

    where cygwin.def is a file containing all the symbols that +we wish to export.

    +
    +
    +

    I suspect this is a Windows problem and we’ll have to +do the same (somehow with windows compilers) when +we build the native win32 version with the system +calls LoadLibrary/GetProcAddress.

    +
    + +
    +
    +
    +
    +
    +
    [2004/03/04]
    +
    +
    +

    Fixed problem with IntInf.fmt (sign would show up on the right instead +of on the left for BIN, OCT, and HEX).

    +
    + +
    +
    +
    +
    +
    +
    [2004/03/04]
    +
    +
    +

    Fixed problem with installer script (unix only) where bin/ml-yacc and +friends pointed (via symlinks) to absolute locations instead of just +.run-sml. This was reported by Vesa A Norrman.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.45; 2004/02/13

    +
    +
    +
    +
    [2004/01/26]
    +
    +
    +

    Improved handling of exceptions at the interactive toplevel.

    +
    + +
    +
    +
    +
    +
    +
    [2004/01/26]
    +
    +
    +

    Type of top-level "app" corrected. +Added code for setting vp_limitPtrMask to Win32-specific runtime.

    +
    + +
    +
    +
    +
    +
    +
    [2003/11/18]
    +
    +
    +
      +
    • +

      changed Timer interface to what might become the spec

      +
    • +
    • +

      POSIX_FLAGS → BIT_FLAGS according to spec

      +
    • +
    • +

      some other minor discrepancies wrt. spec eliminated

      + +
    • +
    +
    +
    +
    +
    +
    +
    +
    +

    Version 110.44; 2003/11/06

    +
    +
    +
    +
    [2003/11/04]
    +
    +
    +

    Eliminated the "dont_move_libraries" directive in config/targets. +(The mechanism was broken and could not be fixed easily. Moreover, +there does not seem to be any reason not to move all libraries into +lib during installation. I originally implemented this directive as a +backward-compatibility feature when I first introduced the new CM. +Now that things have been stable for a long time and going back to the +old CM is not an option, there is no reason to keep it around.)

    +
    + +
    +
    +
    +
    +
    +
    [2003/11/03]
    +
    +
    +

    Made installer honor INSTALLDIR variable again. (Thanks to Chris +Richards for pointing out the problem and providing the solution.)

    +
    + +
    +
    +
    +
    +
    +
    [2003/10/01]
    +
    +
    +

    MLRISC bug fix from Lal.

    +
    + +
    +
    +
    +
    +
    +
    [2003/09/30]
    +
    +
    +
      +
    1. +

      Added openVector, nullRd, and nullWr to PRIM_IO.

      +
    2. +
    3. +

      Improved .bat files (for Win32 port) to make things work under Win95. +(thanks to Aaron S. Hawley for this one)

      + +
    4. +
    +
    +
    +
    +
    +
    +
    +
    [2003/09/26]
    +
    +
    +

    Added missing wrapper for privilege "primitive" in $smlnj/viscomp/core.cm.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.43.3; 2003/09/26

    +
    +
    +
    +
    [2003/09/26]
    +
    +
    +

    I modified the read-eval-print loop so that the autoloader gets +invoked whenever the prettyprinter tries to look up a symbol that +is not currently defined in the toplevel environment but which +appears in CM’s autoload registry. As a result, we see far fewer of +those ?.Foo.Bar.xxx names in the prettyprinter’s output.

    +
    +
    +

    In addition to this I tried to clean up some pieces of the Basis +implementation (e.g., Socket, Word8Array) in order to prevent other +instances of these ?.Foo.Bar.xxx names from being printed.

    +
    +
    +

    The mechanism that picks names for types still needs some work, though. +(Right now it seems that if there is a type A.t which is defined to +be B.u, but B is unavailable at toplevel, then A.t gets printed as +"?.B.u" although the perhaps more sensible solution would be to use +"A.t" in this case. In other words, the prettyprinter should follow +a chain of DEFtycs not farther than there are corresponding toplevel +names in the current environment.)

    +
    + +
    +
    +
    +
    +
    +
    [2003/09/24]
    +
    +
    +

    Another installer tweak: All the ML code for the installer is now +compiled during CMB.make and put into a little library called +$smlnj/installer.cm. The installation then simply invokes

    +
    +
    +
    +
       sml -m $smlnj/installer.cm
    +
    +
    +
    +

    and everything happens automagically.

    +
    +
    +

    Win32: ML code senses value of environment variable SMLNJ_HOME. +Unix: ML code senses values of environment variables ROOT, CONFIGDIR, + and BINDIR.

    +
    +
    +

    The new scheme guarantees that the ML code responsible for the installation +is in sync with the APIs of the main system. Also, the installer is +somewhat faster because the installer script is precompiled.

    +
    + +
    +
    +
    +
    +
    +
    [2003/09/24]
    +
    +
    +

    Added a signature SYNCHRONOUS_SOCKET to basis.cm. This is like SOCKET +but excludes all non-blocking operations. Defined SOCKET (in Basis) +and CML_SOCKET in terms of SYNCHRONOUS_SOCKET. Removed superfluous +implementations of non-blocking operations from CML’s Socket +structure.

    +
    + +
    +
    +
    +
    +
    +
    [2003/09/24]
    +
    +
    +
      +
    1. +

      Fixed SOCKET API and implementation to match Basis spec. +This required changing the internal representation of sockets to one +that remembers (for each socket file descriptor) whether it is currently +blocking or non-blocking. This state is maintained lazily (i.e., a system +call is made only if the state actually needs to change).

      +
    2. +
    3. +

      OS-specific details of sockets were moved into separate files, thus +making it possible to unify the bulk of the socket implementations +between Unix and Win32.

      +
    4. +
    5. +

      CML’s socket API changed accordingly. +(Note that we need to remove non-blocking functions from this API +since they are redundant in the case of CML!)

      +
    6. +
    7. +

      CML’s socket implementation now makes use of non-blocking functions +provided by Basis, thus removing all OS-dependent code from this part +of CML.

      +
    8. +
    9. +

      Changed Real64.precision from 52 to 53. Minor cleanup in Real64 code.

      + +
    10. +
    +
    +
    +
    +
    +
    +
    +
    +

    Version 110.43.2; 2003/09/22

    +
    +
    +
    +
    [2003/09/22]
    +
    +
    +

    Made a new interim version and bootfiles for developer’s bootstrapping +convenience.

    +
    + +
    +
    +
    +
    +
    +
    [2003/09/19]
    +
    +
    +
      +
    1. +

      new-install.sh → install.sh

      +
    2. +
    3. +

      changed default CM "metadata" directory name to ".cm" (instead of "CM")

      +
    4. +
    5. +

      tweaked installer so that another name instead of .cm can be chosen +at install time (by setting the CM_DIR_ARC environment variable +during installation); once installation is complete, the name is +fixed

      + +
    6. +
    +
    +
    +
    +
    +
    +
    +
    +

    Version 110.43.1; 2003/09/18

    +
    +
    +
    +
    [2003/09/18]
    +
    +
    +

    Made a new interim version and bootfiles for developer’s bootstrapping +convenience.

    +
    + +
    +
    +
    +
    +
    +
    [2003/09/18]
    +
    +
    +
      +
    1. +

      Exported fractionsPerSecond etc. from TimeImp (but not from Time as +this seems to be controversial at the moment) and used those in +Posix.ProcEnv.times.

      +
    2. +
    3. +

      Added Time.{from,to}Nanoseconds to Time.

      +
    4. +
    5. +

      Improved Real.{from,to}LargeInt by avoiding needless calculations. +For example, fromLargeInt never needs to look at more than 3 "big +digits" to get its 53 bits of precision.

      + +
    6. +
    +
    +
    +
    +
    +
    +
    +
    [2003/09/17]
    +
    +
    +

    Added an entry to the primitive environment +(compiler/Semant/statenv/prim.sml) for int32→real64 conversion and +added code to compiler/CodeGen/main/mlriscGen.sml to implement it.

    +
    +
    +

    Removed some of the "magic" constants in real64.sml and replaced them +with code that generates these values from their corresponding +integer counterparts.

    +
    +
    +

    Made all(?) the slice-related changes to the Basis and made everything +compile again…​

    +
    + +
    +
    +
    +
    +
    +
    [2003/09/15]
    +
    +
    +

    Fixed bug in Real.fromLargeInt.

    +
    + +
    +
    +
    +
    +
    +
    [2003/09/13]
    +
    +
    +

    Minor bugfix in config/libinstall (set anchor with path to +standalone tool after installing it, otherwise libraries that +need ml-lex or ml-yacc won’t compile the first time the installer +runs).

    +
    + +
    +
    +
    +
    +
    +
    [2003/09/12]
    +
    +
    +
      +
    • +

      fixed bug in Real.toLargeInt

      +
    • +
    • +

      fixed bug in Posix.ProcEnv.times

      +
    • +
    • +

      changed inputLine functions to return an option

      +
    • +
    • +

      minor installer improvements / bugfixes

      +
    • +
    • +

      changed default @SMLalloc parameter for x86/celeron to 64k

      + +
    • +
    +
    +
    +
    +
    +
    +
    +
    +

    Version 110.43; 2003/09/09

    +
    +
    +
    +
    [2003/09/09]
    +
    +
    +

    Rewrote large parts of config/install.sh in SML (config/libinstall.sml). +Modified config/install.bat to take advantage of it. Also modified +config/install.sh (and called it config/new-install.sh) to take advantage +of it on Unix systems. (The SML code is (supposed to be) platform- +independent.)

    +
    +
    +

    The installer can now install everything under Win32 +as well as under *nix as long as it compiles.

    +
    +
    +

    Other changes:

    +
    +
    +
      +
    • +

      made CML compile again under Win32

      +
    • +
    • +

      made eXene compile under Win32 (by providing a fake structure UnixSock +and by using OS.Process.getEnv instead of Posix.ProcEnv.getenv)

      +
    • +
    • +

      fixed a bug in nowhere: it assumed that type OS.Process.status is the +same as type int; under Win32 it isn’t

      +
    • +
    • +

      fixed some slice-related problems in the win32-specific parts of CML

      +
    • +
    • +

      added a functor argument "sameVol" to os-path-fn.sml in the Basis +(under Win32, the volume name is case-insensitive, and the +OS.Path code compares volume names for equality)

      + +
    • +
    +
    +
    +
    +
    +
    +
    +
    [2003/09/08]
    +
    +
    +

    Made Win32 version of OS.FileSys.fullPath return current directory +when given an empty string. This is what the spec says, and incidentally, +CM depends on it. (CM otherwise goes into an infinite loop in certain +cases when presented with the name of a non-existing .cm file.)

    +
    + +
    +
    +
    +
    +
    +
    [2003/09/04]
    +
    +
    +
      +
    1. +

      Changed interface to vectors and arrays in Basis to match +(draft) Basis spec.

      +
    2. +
    3. +

      Added signatures and implementations of slices according to +Basis spec.

      +
    4. +
    5. +

      Edited source code throughout the system to make it compile again +under 1. and 2. (In some cases code had to be added to have it +match the new signatures.)

      +
    6. +
    7. +

      MLRISC should be backward-compatible: the copies of the originals +of files that needed to change under 3. were retained, the .cm files +check the compiler version number and use old versions when +appropriate.

      +
    8. +
    9. +

      Changed type of OS.FileSys.readDir and Posix.FileSys.readdir to +dirstream → string option (in accordance with Basis spec).

      +
    10. +
    11. +

      When generating code that counts lines, ml-lex used function +CharVector.foldli, taking advantage of its old interface. +This has been replaced with the corresponding code from +CharVectorSlice. (html-lex must be re-lexed!)

      +
    12. +
    13. +

      BitArray in smlnj-lib/Util has been extended/modified to match the +new MONO_ARRAY signature. (Do we need BitArraySlice?)

      +
    14. +
    15. +

      Removed temporary additions (fromInternal, toInternal) from the +(now obsolete) IntInf in smlnj-lib/Util.

      +
    16. +
    17. +

      Cleaned up structure Byte.

      +
    18. +
    19. +

      Added localOffset, scan, and fromString to Date (according to spec). +Cleaned/corrected implementation of Date. +(Still need to check for correctness; implement better canonicalizeDate.)

      +
    20. +
    21. +

      Added "scan" to signature IEEE_REAL.

      +
    22. +
    23. +

      Some improvements to IntInf [in particular: efficiency-hack for +mod and rem when second operand is 2 (for parity checks).]

      +
    24. +
    25. +

      Changed representation of type Time.time, using a single IntInf.int +value counting microseconds. This considerably simplified the +implementation of structure Time. We now support negative time +values; scan and fromString handle signs.

      +
    26. +
    27. +

      Functor PrimIO now takes two additional arguments (VectorSlice and +ArraySlice).

      + +
    28. +
    +
    +
    +
    +
    +
    +
    +
    [2003/08/28]
    +
    +
    +

    This is a major update which comes with a version number bump +(110.42.99 — yes, we are really close to 110.43 :-), NEW BOOTFILES, +and an implementation of IntInf in the Basis.

    +
    +
    +
    +
    +
    +

    There are a fairly large number of related changes and updates throughout +the system:

    +
    +
    +

    + +Basis: + - Implemented IntInf. + - Made LargeInt a projection of IntInf (by filtering through INTEGER). + - Added some missing Real64 operations, most notably Real.toLargeInt. + - Added FixedInt as a synonym for Int32.

    +
    +
    +

    + +compiler: + * Added support for a built-in intinf type. +  —  - literals + - pattern matching + - conversion shortcuts (Int32.fromLarge o Int.toLarge etc.) + - overloading on literals and operations +  —  This required adding a primitive type intinf, some additional + primops, and implementations for several non-trivial intinf + operations in Core. (The intinf type is completely abstract + to the compiler; all operations get delegated back to the Core.)

    +
    +
    +

    + + * Intinf equality is handled by polyequal. However, the compiler + does not print its usual warning in this case (since polyequal + is the right thing to do there).

    +
    +
    +

    + + * Improved the organization of structure InlineT.

    +
    +
    +

    + + * A word about conversion primops: + If conversions involving intinf do not cancel out during + CPS contract, then the compiler must insert calls to Core functions. + Since all core access must be resolved already during the FLINT + translate phase, it would be too late a the time of CPS contract + to add new Core calls. For this reason, conversion primops + for intinf carry two arguments: 1. the numeric argument that + they are supposed to convert, and 2. the Core function that + can help with this conversion if necessary. If CPS contract + eliminates a primop, then the associated Core function becomes + dead and goes away. Intinf conversion primops that do not get + eliminated by CPS contract get rewritten into calls of their + core functions by a separate, new phase.

    +
    +
    +

    + +interactive system: + - Control.Print.intinfDepth controls max length of intinf constants + being printed. (Analogous to Control.Print.stringDepth.) + - Cleanup in printutil and pputil: got rid of unused stuff and + duplicates; replaced some of the code with code that makes better + use of library functionality.

    +
    +
    +

    + +CM: + Bugfix: parse-errors in init group (system/smlnj/init/init.cmi) + are no longer silent.

    +
    +
    +

    + +CKIT: + Fixed mismatched uses of Int32 and LargeInt. I always decided + in favor of LargeInt — which is now the same as IntInf. + CKIT-knowledgable people should check whether this is what’s + intended and otherwise change things back to using Int32 or + FixedInt.

    +
    +
    +

    + +Throughout the code: + Started using IntInf.int literals and built-in operations + (e.g., comparison with 0) where this seems appropriate.

    +
    +
    +

    +

    +
    + +
    +
    +
    [2003/08/13]
    +
    +
    +

    Merging changes from the mcz-branch development branch into trunk. +These changes involve replacement of the emulated old prettyprinter +interface with direct use of the SML/NJ Lib PP library, and fixing +of a couple of bugs (895, 1186) relating to error messages. A new +prettyprinter for ast datatypes (Elaborator/print/ppast.{sig,sml}) +has been added.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.42.9; 2003/08/11

    +
    +
    +
    +
    [2003/08/11]
    +
    +
    +

    This patch restores SML/NJ’s ability to run under win32. There are a +number of changes, including fixes for several bugs that had gone +unnoticed until now:

    +
    +
    +
    +
    +
      +
    • +

      uname "CYGWIN_NT*" is recognized as win32 (This is relevant only when +trying to run the win32 version from within cygwin.)

      +
    • +
    • +

      There are a number of simple .bat scripts that substitute for their +corresponding Unix shell-scripts. (See below.)

      +
    • +
    • +

      The internals of ml-build have been modified slightly. The main +difference is that instead of calling ".link-sml" (or link-sml.bat) +using OS.Process.system, the ML process delegates this task back +to the script. Otherwise problems arise in mixed environments such +as Cygwin where scripts look and work like Unix scripts, but +where OS.Process.system cannot run them.

      +
    • +
    • +

      In CM, the srcpath pickler used native pathname syntax — which +is incorrect in the case of cross-compilation. The new pickle format +is independent of platform-specific naming conventions.

      +
    • +
    • +

      Path configuration files (such as lib/pathconfig) can now choose +between native and standard syntax. Placing a line of the form

      +
      +
      +
      standard!
      +
      +
      +
      +

      into the file causes all subsequent paths to be interpreted using +CM standard pathname syntax (= Unix conventions); a line

      +
      +
      +
      +
      native!
      +
      +
      +
      +

      switches back to native style. This was needed so that +path config files can be written portably, see src/system/pathconfig.

      +
      +
    • +
    • +

      Runtime system:

      +
      +
        +
      • +

        win32-filesys.c: get_file_time and set_file_time now +access modification time, not creation time.

        +
      • +
      • +

        I/O code made aware of new array representation.

        +
      • +
      • +

        Bug fixes in X86.prim.masm.

        +
      • +
      • +

        src/system/makeml made aware of win32. (For use under cygwin +and other Unix-environments for windows.)

        +
      • +
      • +

        In Basis, fixed off-by-one error in win32-io.sml (function vecF) +which caused BinIO.inputAll to fail consistently.

        +
      • +
      +
      +
    • +
    • +

      .bat scripts:

      +
      +
      +
      Windows .bat scripts assume that `SMLNJ_HOME` is defined.
      +
      +
      +
      +
        +
      • +

        sml.bat, ml-yacc.bat, ml-lex.bat: Driver scripts for standalone +applications (sml, ml-yacc, ml-lex).

        +
      • +
      • +

        ml-build.bat: analogous to ml-build.

        +
      • +
      • +

        config\install.bat: Analogous to config/install.sh. This requires +that SMLNJ_HOME is set and that Microsoft Visual C is ready to use. +(nmake etc. must be on the path, and vcvars32 must have been run.) +Moreover, sources for ml-lex and ml-yacc need to exist under src, +and the bootfile hierarchy must have been unpacked under +sml.boot.x86-win32. +The script is very primitive and does a poor job at error checking. +It only installs the base system, ml-lex, and ml-yacc. No other +libraries are being installed (i.e., you get only those that +are part of the compiler.)

        +
      • +
      • +

        link-sml.bat: analogous to .link-sml, but not currently used

        +
      • +
      +
      +
    • +
    • +

      Unrelated bug fixes:

      +
      +
        +
      • +

        ml-nlffigen now exports structures ST_* corresponding to incomplete +types.

        +
      • +
      • +

        Added getDevice to PP/src/pp-debug-fn.sml. (Would not compile +otherwise.)

        +
      • +
      +
      +
    • +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    [2003/06/17]
    +
    +
    +

    Modified compiler/Elaborator/print/pptype.sml to fix bug 895. +Tag will be used for new development branch (mcz-branch) for +use by MacQueen, (Lucasz) Zairek, and (George) Cao at uchicago.

    +
    + +
    +
    +
    +
    +
    +
    [2003/05/27]
    +
    +
    +

    Tried to eliminated most cases of polymorphic equality.

    +
    + +
    +
    +
    +
    +
    +
    [2003/05/21]
    +
    +
    +

    Two changes:

    +
    +
    +
      +
    1. +

      Added a flag for controlling whether non-exhaustive bindings will +be treated as errors (default is false).

      +
    2. +
    3. +

      Cleaned up the entire source tree so that CMB.make goes through +without a single non-exhaustive match- or bind warning.

      + +
    4. +
    +
    +
    +
    +
    +
    +
    +
    [2003/05/17]
    +
    +
    +
      +
    1. +

      Added cases for IF, WHILE, ANDALSO, and ORELSE to Absyn.

      +
      +

      This mainly affects the quality of error messages. However, some +of the code is now more straightforward than before. (Treatment of +the above four constructs in translate.sml is much simpler than +the "macro-expansion" that was going on before. Plus, the mach- +compiler no longer gets invoked just to be able to compile an +if-expression.)

      +
      +
    2. +
    3. +

      The ErrorMsg.Error exception is now caught and absorbed by the +interactive loop.

      + +
    4. +
    +
    +
    +
    +
    +
    +
    +
    [2003/05/16]
    +
    +
    +

    Ported the runtime system to cygwin, which uses the unix +x86-unix bin files. Missing/buggy features:

    +
    +
    +
    +
    +
    +
    +
    +
      +
    • +

      getnetbyname, getnetbyaddr: these functions seem to be missing in +the Cygwin library.

      +
    • +
    • +

      Ctrl-C handling may be flaky.

      +
    • +
    • +

      Windows system calls and Windows I/O are not supported.

      +
    • +
    +
    +
    +
    +
    +

    A new set of binfiles is located at:

    +
    + +
    +

    + +This is only needed for bootstrapping the cygwin version of smlnj. +Other x86 versions can use the existing binfiles.

    +
    + +
    +
    +
    [2003/04/08]
    +
    +
    +
    +
    +
      +
    1. +

      Added a target 'mlrisc' to installer.

      +
    2. +
    3. +

      Added missing elements to structure ListPair.

      +
    4. +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    [2003/01/07]
    +
    +
    +

    Fixed a bug in Int.rem(x,y) where y is a power of 2 on x86. +The arguments to the SUBL instruction were swapped.

    +
    + +
    +
    +
    +
    +
    +
    [2002/12/12]
    +
    +
    +

    Fixed a serious bug in the rewrite code for FP spilling/reloading that +sent the RA into an infinite loop when floating point registers get +spilled. (Because of this bug, e.g., nucleic stopped compiling between +110.37 and 110.38.) +There was another set of potential problems related to the handling of +MLRISC annotations (but those did not yet cause real problems, apparently).

    +
    + +
    +
    +
    +
    +
    +
    [2002/12/06]
    +
    +
    +

    Added a call of SrcPath.sync at the beginning of Parse.parse (in CM). +This fixes the problem of CM getting confused by files that suddenly +change their identity (e.g., by getting unlinked and recreated by some +text editor such as vi). There might be a better/cheaper/cleaner way +of doing this, but for now this will have to do.

    +
    + +
    +
    +
    +
    +
    +
    [2002/10/28]
    +
    +
    +

    Exported structure Typecheck from $smlnj/viscomp/core.cm.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.42.1; 2002/10/17

    +
    +
    +
    +
    [2002/10/17]
    +
    +
    +

    In good old tradition, there has been a slight hiccup so that we have +to patch 110.42 after the fact. The old release tag has been replaced +(see below).

    +
    +
    +

    The change solves a problem with two competing approaches the +configuration problem regarding MacOS 10.1 vs. MacOS 10.2 which got in +each other’s way.

    +
    +
    +

    This change only affects the runtime system code and the installer script. +(No new bootfiles.)

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.42; 2002/10/16

    +
    +
    +
    +
    [2002/10/10]
    +
    +
    +

    The mltree operator DIVS must be implemented with an overflow check on +the PPC because the hardware indicates divide-by-zero using "overflow" as +well.

    +
    + +
    +
    +
    +
    +
    +
    [2002/07/23]
    +
    +
    +

    Sml now senses the SMLNJ_HOME environment variable. If this is set, +then the bin dir is assumed to be in $SMLNJ_HOME/bin and (unless +CM_PATHCONFIG is also set), the path configuration file is assumed +to be in $SMLNJ_HOME/lib/pathconfig. This way one can easily move +the entire tree to some other place and everything will "just work".

    +
    +
    +

    (Companion commands such as ml-build and ml-makedepend also sense this +variable.)

    +
    + +
    +
    +
    +
    +
    +
    [2002/07/12]
    +
    +
    +

    Exported two useful "step" functions from liveness module (MLRISC).

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.41; 2002/07/05

    +
    +
    +
    +
    [2002/07/05]
    +
    +
    +

    Exported structure BTImp from $smlnj/viscomp/debugprof.cm so that +other clients can set up backtracing support.

    +
    + +
    +
    +
    +
    +
    +
    [2002/06/25]
    +
    +
    +

    Fixed a bug in translation of INLMAX (and INLMIN) for the floating-point +case. (The sense of the isNaN test was reversed — which made min and +max always return their first argument.)

    +
    + +
    +
    +
    +
    +
    +
    [2002/06/11]
    +
    +
    +

    Back-ported OS.Path.{from,to}UnixPath from idlbasis-devel branch.

    +
    + +
    +
    +
    +
    +
    +
    [2002/06/10]
    +
    +
    +

    I back-ported my implementation of IEEEReal.fromString from the +idlbasis-devel branch so that we can test it.

    +
    +
    +

    Another small change is that ppDec tries to give more information +than just "<sig>" in the case of functors. However, this code is +broken in some mysterious way if the functor’s body’s signature +has not been declared by ascription but gets inferred from the +implementation. This needs fixing…​

    +
    + +
    +
    +
    +
    +
    +
    [2002/05/31]
    +
    +
    +

    Resurrected SMLofNJ.Internals.BTrace.mode. (It accidentally fell by +the wayside when I switched over to using Controls everywhere.)

    +
    + +
    +
    +
    +
    +
    +
    [2002/05/23]
    +
    +
    +

    Labels are now displayed in the graphical output to make +the fall-through and target blocks obvious.

    +
    + +
    +
    +
    +
    +
    +
    [2002/05/22]
    +
    +
    +

    John tweaked yesterday’s fix for 1131 to handle an out-of-memory +situation that comes up when allocating huge arrays.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.40; 2002/05/21

    +
    +
    +
    +
    [2002/05/21]
    +
    +
    +

    John Reppy fixed GC bug 1131.

    +
    + +
    +
    +
    +
    +
    +
    [2002/05/21]
    +
    +
    +

    CM documentation update.

    +
    + +
    +
    +
    +
    +
    +
    [2002/05/21]
    +
    +
    +
      +
    • +

      John tweaked runtime to be silent on heap export (except when +GC messages are on).

      +
    • +
    • +

      I added a few more things (cross-compiling versions of CMB) to +config/preloads (as suggestions).

      + +
    • +
    +
    +
    +
    +
    +
    +
    +
    [2002/05/20]
    +
    +
    +
      +
    • +

      Added ControlUtil structure to control-lib.cm.

      +
    • +
    • +

      Use it throughout.

      +
    • +
    • +

      Used Controls facility to define MLRISC controls (as opposed to +registering MLRISC control ref cells with Controls after the fact)

      +
    • +
    • +

      Fixed messed-up controls priorities.

      +
      +
        +
      • +

        Removed again all the stuff from config/preloads that one wouldn’t +be able to preload at the time the initial heap image is built. +(Many libraries, e.g., CML, do not exist yet at this time. The + only libraries that can be preloaded via config/preloads are those + that come bundled with the bootfiles.)

        + +
      • +
      +
      +
    • +
    +
    +
    +
    +
    +
    +
    +
    [2002/05/20]
    +
    +
    +

    Added a lot of commented-out suggestions for things to be included +in config/preloads.

    +
    + +
    +
    +
    +
    +
    +
    [2002/05/18]
    +
    +
    +
      +
    • +

      Made the mdl tool stuff compile and run again.

      +
    • +
    • +

      I’ve disabled all the stuff that depends on RTL specifications; they +are all badly broken anyway.

      + +
    • +
    +
    +
    +
    +
    +
    +
    +
    [2002/05/17]
    +
    +
    +
      +
    1. +

      John Reppy made several modifications to the SML/NJ Library. +In particular, there is a shiny new controls-lib.cm.

      +
    2. +
    3. +

      Pushed new controls interface through compiler so that everything +compiles again.

      +
    4. +
    5. +

      Added FormatComb and FORMAT_COMB to the CML version of the +SML/NJ Library (so that CML compiles again).

      +
    6. +
    7. +

      Modified init scripts because XXX_DEFAULT environment variables +are no longer with us. (Boot-time initialization is now done +using the same environment variables that are also used for +startup-time initialization of controls.)

      + +
    8. +
    +
    +
    +
    +
    +
    +
    +
    [2002/05/15]
    +
    +
    +

    All pseudo-ops emitted before the first segment declaration +such as TEXT, DATA, and BSS directives are assumed to be global +declarations and are emitted first in the assembly file. This is +useful in a number of situations where one has pseudo-ops that are not +specific to any segment, and also works around the constraint that one +cannot have client pseudo-ops in the TEXT segment.

    +
    +
    +

    Because no segment is associated with these declarations it is +an error to allocate any space or objects before the first segment +directive and an exception will be raised. However, we cannot make +this check for client pseudo-ops.

    +
    +
    +

    These top level declarations are a field in the CFG graph_info. +In theory you can continue to add to this field after the CFG has been +built — provided you know what you are doing;-)

    +
    + +
    +
    +
    +
    +
    +
    [2002/05/13]
    +
    +
    +

    A few minor bugfixes:

    +
    +
    +
      +
    • +

      Stopgap measure for bug recently reported by Elsa Gunter (ppDec). +(Bogus printouts for redefined bindings still occur. Compiler +bug should no longer occur now. We need to redo the prettyprinter +from scratch.)

      +
    • +
    • +

      CM pathname printer now also adds escape sequences for ( and )

      +
    • +
    • +

      commend and docu fixes for ml-nlffi

      + +
    • +
    +
    +
    +
    +
    +
    +
    +
    [2002/05/10]
    +
    +
    +

    Applied the following bugfix provided by Emden Gansner:

    +
    +
    +
    +
    Output is corrupted when outputSubstr is used rather than output.
    +
    +
    +
    +
    +
    The problem occurs when a substring
    +
    +
    +
    +
    +
    ss = (s, dataStart, dataLen)
    +
    +
    +
    +
    +
    where dataStart > 0, fills a stream buffer with avail bytes left.
    +avail bytes of s, starting at index dataStart, are copied into the
    +buffer, the buffer is flushed, and then the remaining dataLen-avail
    +bytes of ss are copied into the beginning of the buffer. Instead of
    +starting this copy at index dataStart+avail in s, the current code
    +starts the copy at index avail.
    +
    +
    +
    +
    +
      Fix:
    +  In text-io-fn.sml, change line 695 from
    +val needsFlush = copyVec(v, avail, dataLen-avail, buf, 0)
    +  to
    +val needsFlush = copyVec(v, dataStart+avail, dataLen-avail, buf, 0)
    +
    +
    + +
    +
    +
    +
    +
    +
    [2002/04/12]
    +
    +
    +
      +
    1. +

      Grabbed newer assyntax.h from the XFree86 project.

      +
    2. +
    3. +

      Fiddled with how to compile X86.prim.asm without warnings.

      +
    4. +
    5. +

      (Very) Minor cleanup in CM.

      + +
    6. +
    +
    +
    +
    +
    +
    +
    +
    [2002/04/01]
    +
    +
    +

    Added full support for div/mod/rem/quot on the x86, using the machine +instruction’s two results (without clumsily recomputing the remainder) +directly where appropriate.

    +
    +
    +

    Some more extensive power-of-two support was added to the x86 instruction +selector (avoiding expensive divs, mods, and muls where they can be +replaced with cheaper shifts and masks). However, this sort of thing +ought to be done earlier, e.g., within the CPS optimizer so that +all architectures benefit from it.

    +
    +
    +

    The compiler compiles to a fixed point, but changes might be somewhat +fragile nevertheless. Please, report any strange things that you might +see wrt. div/mod/quot/rem…​

    +
    + +
    +
    +
    +
    +
    +
    [2002/03/29]
    +
    +
    +

    Fixed my broken div/mod logic. Unfortunately, this means that the +inline code for div/mod now has one more comparison than before. +Fast paths (quotient > 0 or remainder = 0) are not affected, though. +The problem was with quotient = 0, because that alone does not tell +us which way the rounding went. One then has to look at whether +remainder and divisor have the same sign…​ :(

    +
    +
    +

    Anyway, I replaced the bootfiles with fresh ones…​

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.39.3; 2002/03/29

    +
    +
    +
    +
    [2002/03/29]
    +
    +
    +

    Primops have changed. This means that the bin/boot-file formats have +changed as well.

    +
    +
    +

    To make sure that there is no confusion, I made a new version.

    +
    +
    +

    + +CHANGES:

    +
    +
    +

    +

    +
    +
    +
      +
    • +

      removed REMT from mltree (remainder should never overflow).

      +
    • +
    • +

      added primops to deal with divisions of all flavors to the frontend

      +
    • +
    • +

      handled these primops all the way through so they map to their respective +MLRISC support

      +
    • +
    • +

      used these primops in the implementation of Int, Int32, Word, Word32

      +
    • +
    • +

      removed INLDIV, INLMOD, and INLREM as they are no longer necessary

      +
    • +
    • +

      parameterized INLMIN, INLMAX, and INLABS by a numkind

      +
    • +
    • +

      translate.sml now deals with all flavors of INL{MIN,MAX,ABS}, including +floating point

      +
    • +
    • +

      used INL{MIN,MAX,ABS} in the implementation of Int, Int32, Word, Word32, +and Real (but Real.abs maps to a separate floating-point-only primop)

      +
      +

      TODO items:

      +
      +
    • +
    • +

      Hacked Alpha32 instruction selection, disabling the selection of REMx +instructions because the machine instruction encoder cannot handle +them. (Hppa, PPC, and Sparc instruction selection did not handle +REM in the first place, and REM is supported by the x86 machine coder.)

      +
    • +
    • +

      Handle DIV and MOD with DIV_TO_NEGINF directly in the x86 instruction +selection phase. (The two can be streamlined because the hardware +delivers both quotient and remainder at the same time anyway.)

      +
    • +
    • +

      Think about what to do with "valOf(Int32.minInt) div ~1" and friends. +(Currently the behavior is inconsistent both across architectures and +wrt. the draft Basis spec.)

      +
    • +
    • +

      Word8 should eventually be handled natively, too.

      +
    • +
    • +

      There seems to be one serious bug in mltree-gen.sml. It appears, though, +as if there currently is no execution path that could trigger it in +SML/NJ. (The assumptions underlying functions arith and promotable do not +hold for things like multiplication and division.)

      + +
    • +
    +
    +
    +
    +
    +
    +
    +
    [2002/03/27]
    +
    +
    +

    Added support for all four division operations (ML’s div, mod, quot, +and rem) to MLRISC. In the course of doing so, I also rationalized +the naming (no more annoying switch-around of DIV and QUOT), by +parameterizing the operation by div_rounding_mode (which can be either +DIV_TO_ZERO or DIV_TO_NEGINF).

    +
    +
    +

    The generic MLTreeGen functor takes care of compiling all four +operations down to only round-to-zero div.

    +
    +
    +

    Missing pieces:

    +
    +
    +
      +
    • +

      Doing something smarter than relying on MLTreeGen on architectures +like, e.g., the x86 where hardware division delivers both quotient and +remainder at the same time. With this, the implementation of the +round-to-neginf operations could be further streamlined.

      +
    • +
    • +

      Remove inlining support for div/mod/rem from the frontend and replace it +with primops that get carried through to the backend. Do this for all +int and word types.

      + +
    • +
    +
    +
    +
    +
    +
    +
    +
    [2002/03/25]
    +
    +
    +

    I improved (hopefully without breaking them) the implementation of Int.div, +Int.mod, and Int.rem. For this, the code in translate.sml now takes +advantage of the following observations:

    +
    +
    +
    +
    Let  q = x quot y      r = x rem y
    +     d = x div  y      m = x mod y
    +
    +
    +
    +

    where "quot" is the round-to-zero version of integer division that +hardware usually provides. Then we have:

    +
    +
    +
    +
    r = x - q * y        where neither the * nor the - will overflow
    +d = if q >= 0 orelse x = q * y then q else q - 1
    +                     where neither the * nor the - will overflow
    +m = if q >= 0 orelse r = 0 then r else r + y
    +                     where the + will not overflow
    +
    +
    +
    +

    This results in substantial simplification of the generated code. +The following table shows the number of CFG nodes and edges generated +for + fun f (x, y) = x OPER y + (* with OPER \in div, mod, quot, rem *)

    +
    +
    +

    + + OPER | nodes(old) | edges(old) | nodes(new) | edges(new) + -------------------------------------------------------- + div | 24 | 39 | 12 | 16 + mod | 41 | 71 | 12 | 16 + quot | 8 | 10 | 8 | 10 + rem | 10 | 14 | 8 | 10

    +
    + +
    +
    +
    +
    +
    +
    [2002/03/25]
    +
    +
    +

    Fixed a bug in cproto (c prototype decoder).

    +
    + +
    +
    +
    +
    +
    +
    [2002/03/25]
    +
    +
    +

    I did some cleanup to Allen’s new primop code and +replaced yesterday’s bootfiles with new ones. +(But they are stored in the same place.)

    +
    + +
    +
    +
    +
    +
    +
    [2002/03/24]
    +
    +
    +

    Made the bootfiles that Allen asked for.

    +
    + +
    +
    +
    +
    +
    +
    [2002/03/23]
    +
    +
    +
    +
    +
      +
    1. +

      Changes to FLINT primops:

      +
      +
      +
          (* make a call to a C-function;
      +     * The primop carries C function prototype information and specifies
      +     * which of its (ML-) arguments are floating point. C prototype
      +     * information is for use by the backend, ML information is for
      +     * use by the CPS converter. *)
      +  | RAW_CCALL of { c_proto: CTypes.c_proto,
      +                   ml_args: ccall_type list,
      +                   ml_res_opt: ccall_type option,
      +                   reentrant : bool
      +                 } option
      +   (* Allocate uninitialized storage on the heap.
      +    * The record is meant to hold short-lived C objects, _i.e._, they
      +    * are not ML pointers.  With the tag, the representation is
      +    * the same as RECORD with tag tag_raw32 (sz=4), or tag_fblock (sz=8)
      +    *)
      +  | RAW_RECORD of {tag:bool,sz:int}
      +  and ccall_type = CCALL_INT32 | CCALL_REAL64 | CCALL_ML_PTR
      +
      +
      +
    2. +
    3. +

      These CPS primops are now overloaded:

      +
      +
      +
             rawload of {kind:numkind}
      +       rawstore of {kind:numkind}
      +
      +
      +
      +

      The one argument form is:

      +
      +
      +
      +
               rawload {kind} address
      +
      +
      +
      +

      The two argument form is:

      +
      +
      +
      +
               rawload {kind} [ml object, byte-offset]
      +
      +
      +
    4. +
    5. +

      RAW_CCALL/RCC now takes two extra arguments:

      +
      +
        +
      1. +

        The first is whether the C call is reentrant, i.e., whether +ML state should be saved and restored.

        +
      2. +
      3. +

        The second argument is a string argument specifying the name of +library and the C function.

        +
        +

        These things are currently not handled in the code generator, yet.

        +
        +
      4. +
      +
      +
    6. +
    7. +

      In CProto,

      +
      +

      An encoding type of "bool" means "ml object" and is mapped into +C prototype of PTR. Note that "bool" is different than "string", +even though "string" is also mapped into PTR, because "bool" +is assigned an CPS type of BOGt, while "string" is assigned INT32t.

      +
      +
    8. +
    9. +

      Pickler/unpicker

      +
      +

      Changed to handle RAW_RECORD and newest RAW_CCALL

      +
      +
    10. +
    11. +

      MLRiscGen,

      +
      +
        +
      1. +

        Changed to handle the new rawload/rawstore/rawrecord operators.

        +
      2. +
      3. +

        Code for handling C Calls has been moved to a new module CPSCCalls, +in the file CodeGen/cpscompile/cps-c-calls.sml

        +
      4. +
      +
      +
    12. +
    13. +

      Added the conditional move operator

      +
      +
      +
               condmove of branch
      +
      +
      +
      +

      to cps. Generation of this is still buggy so it is currently +disabled.

      +
      +
    14. +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    [2002/03/22]
    +
    +
    +

    Implemented the Ball-Larus branch prediction-heuristics, and +incorporated graphical viewers for control flow graphs.

    +
    +
    +

    Ball-Larus Heuristics:

    +
    +
    +

    See the file compiler/CodeGen/cpscompile/cpsBranchProb.sml.

    +
    +
    +

    By design it uses the Dempster-Shafer theory for combining +probabilities. For example, in the function:

    +
    +
    +
    +
    fun f(n,acc) = if n = 0 then acc else f(n-1, n*acc)
    +
    +
    +
    +

    the ball-larus heuristics predicts that the n=0 is unlikely +(OH-heuristic), and the 'then' branch is unlikely because of the +RH-heuristic — giving the 'then' branch an even lower combined +probability using the Dempster-Shafer theory.

    +
    +
    +

    Finally, John Reppy’s loop analysis in MLRISC, further lowers the +probability of the 'then' branch because of the loop in the else +branch.

    +
    +
    +

    + +Graphical Viewing:

    +
    +
    +

    + +I merely plugged in Allen’s graphical viewers into the compiler. The +additional code is not much. At the top level, saying:

    +
    +
    +

    + + Control.MLRISC.getFlag "cfg-graphical-view" := true;

    +
    +
    +

    + +will display the graphical view of the control flow graph just before +back-patching. daVinci must be in your path for this to work. If +daVinci is not available, then the default viewer can be changed +using:

    +
    +
    +

    + + Control.MLRISC.getString "viewer"

    +
    +
    +

    + +which can be set to "dot" or "vcg" for the corresponding viewers. Of +course, these viewers must be in your path.

    +
    +
    +

    + +The above will display the compilation unit at the level of clusters, +many of which are small, boring, and un-interesting. Also setting:

    +
    +
    +

    + + Control.MLRISC.getInt "cfg-graphical-view_size"

    +
    +
    +

    + +will display clusters that are larger than the value set by the above.

    +
    +
    +

    + +Lal George

    +
    +
    +
    +
    +
    +
    +
    [2002/03/21]
    +
    +
    +

    Changed the interface to the KMP routine in PreString and fixed +a minor bug in one place where it was used.

    +
    + +
    +
    +
    +
    +
    +
    [2002/03/21]
    +
    +
    +

    Fixed a potential problem in cfg edge splitting.

    +
    + +
    +
    +
    +
    +
    +
    [2002/03/21]
    +
    +
    +
    +
    +
      +
    1. +

      Recoded the buggy parts of x86-fp.

      +
      +
        +
      1. +

        All the block reordering code has been removed. +We now depend on the block placement phases to do this work.

        +
      2. +
      3. +

        Critical edge splitting code has been simplified and moved into the +CFG modules, as where they belong.

        +
        +
        +
        Both of these were quite buggy and complex.  The code is now much, much
        +simpler.
        +
        +
        +
      4. +
      +
      +
    2. +
    3. +

      X86 backend.

      +
      +
        +
      1. +

        Added instructions for 64-bit support. Instruction selection for +64-bit has not been committed, however, since that +requires changes to MLTREE which haven’t been approved by +Lal and John.

        +
      2. +
      3. +

        Added support for FUCOMI and FUCOMIP when generating code for +PentiumPro and above. We only generate these instructions in +the fast-fp mode.

        +
      4. +
      5. +

        Added cases for JP and JNP in X86FreqProps.

        +
      6. +
      +
      +
    4. +
    5. +

      CFG

      +
      +
      +
      CFG now has a bunch of methods for edge splitting and merging.
      +
      +
      +
    6. +
    7. +

      Machine description.

      +
      +
      +
      John's simplification of MLTREE_BASIS.fcond broke a few machine
      +description things:
      +
      +
      +
      +
      +
      rtl-build.{sig,sml} and hppa.mdl fixed.
      +
      +
      +
      +
      +
      NOTE: the machine description stuff in the repository is still broken.
      +      Again, I can't put my fixes in because that involves
      +      changes to MLTREE.
      +
      +
      +
    8. +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    [2002/03/20]
    +
    +
    +

    Implemented Knuth-Morris-Pratt string matching in PreString and used +it for String.isSubstring, Substring.isSubstring, and +Substring.position.

    +
    +
    +

    (Might need some stress-testing. Simple examples worked fine.)

    +
    + +
    +
    +
    +
    +
    +
    [2002/03/19]
    +
    +
    +

    Added a structure C.W and functions convert/Ptr.convert to ml-nlffi-lib.

    +
    +
    +

    This implements a generic mechanism for changing constness qualifiers +anywhere within big C types without resorting to outright "casts". +(So far, functions such as C.rw/C.ro or C.Ptr.rw/C.Ptr.ro only let you +modify the constness at the outermost level.) +The implementation of "convert" is based on the idea of "witness" +values — values that are not used by the operation but whose types +"testify" to their applicability. On the implementation side, "convert" +is simply a projection (returning its second curried argument). With +cross-module inlining, it should not result in any machine code being +generated.

    +
    + +
    +
    +
    +
    +
    +
    [2002/03/15]
    +
    +
    +

    Provided (preliminary?) implementations for

    +
    +
    +
    +
    {String,Substring}.{concatWith,isSuffix,isSubstring}
    +
    +
    +
    +

    and

    +
    +
    +
    +
    Substring.full
    +
    +
    +
    +

    Those are in the Basis spec but they were missing in SML/NJ.

    +
    + +
    +
    +
    +
    +
    +
    [2002/03/14]
    +
    +
    +

    Controls:

    +
    +
    +
      +
    1. +

      Factored out the recently-added Controls : CONTROLS stuff and put +it into its own library $/controls-lib.cm. The source tree for +this is under src/smlnj-lib/Controls.

      +
    2. +
    3. +

      Changed the names of types and functions in this interface, so they +make a bit more "sense":

      +
      +
      +
      module -> registry
      +'a registry -> 'a group
      +
      +
      +
    4. +
    5. +

      The interface now deals in ref cells only. The getter/setter interface +is (mostly) gone.

      +
    6. +
    7. +

      Added a function that lets one register an already-existing ref cell.

      +
    8. +
    9. +

      Made the corresponding modifications to the rest of the code so that +everything compiles again.

      +
    10. +
    11. +

      Changed the implementation of Controls.MLRISC back to something closer +to the original. In particular, this module (and therefore MLRISC) +does not depend on Controls. There now is some link-time code in +int-sys.sml that registers the MLRISC controls with the Controls +module.

      +
      +

      CM:

      +
      +
      +
        +
      • +

        One can now specify the lambda-split aggressiveness in init.cmi.

        + +
      • +
      +
      +
    12. +
    +
    +
    +
    +
    +
    +
    +
    [2002/03/13]
    +
    +
    +

    Bug fix for:

    +
    +
    +
    +
    > leunga@weaselbane:~/Yale/tmp/sml-dist{21} bin/sml
    +> Standard ML of New Jersey v110.39.1 [FLINT v1.5], March 08, 2002
    +> - fun f(x,(y,z)) = Real.~ y;
    +> [autoloading]
    +> [autoloading done]
    +>       fchsl   (%eax), 184(%esp)
    +> Error: MLRisc bug: X86MCEmitter.emitInstr
    +>
    +> uncaught exception Error
    +>   raised at: ../MLRISC/control/mlriscErrormsg.sml:16.14-16.19
    +
    +
    +
    +

    The problem was that the code generator did not generate any fp registers +in this case, and the ra didn’t know that it needed to run the X86FP phase to +translate the pseudo fp instruction. This only happened with unary fp +operators in certain situations.

    +
    + +
    +
    +
    +
    +
    +
    [2002/03/13]
    +
    +
    +
      +
    1. +

      Added _overload as a synonym for overload for backward compatibility. +(Control.overloadKW must be true for either version to be accepted.)

      +
    2. +
    3. +

      Fixed bug in install script that caused more things to be installed +than what was requested in config/targets.

      +
    4. +
    5. +

      Made CM aware of the (_)overload construct so that autoloading +works.

      + +
    6. +
    +
    +
    +
    +
    +
    +
    +
    [2002/03/12]
    +
    +
    +

    Forgot to update BOOT and srcarchiveurl.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.39.2; 2002/03/12

    +
    +
    +
    +
    [2002/03/12]
    +
    +
    +

    Yet another version number bump (because of small changes to the +binfile format). Version number is now 110.39.2. NEW BOOTFILES!

    +
    +
    +

    Changes:

    +
    +
    +
    +
    The new pid generation scheme described a few weeks ago was overly
    +complicated.  I implemented a new mechanism that is simpler and
    +provides a bit more "stability":  Once CM has seen a compilation
    +unit, it keeps its identity constant (as long as you do not delete
    +those crucial CM/GUID/* files).  This means that when you change
    +an interface, compile, then go back to the old interface, and
    +compile again, you arrive at the original pid.
    +
    +
    +
    +
    +
    There now also is a mechanism that instructs CM to use the plain
    +environment hash as a module's pid (effectively making its GUID
    +the empty string).  For this, "noguid" must be specified as an
    +option to the .sml file in question within its .cm file.
    +This is most useful for code that is being generated by tools such
    +as ml-nlffigen (because during development programmers tend to
    +erase the tool's entire output directory tree including CM's cached
    +GUIDs).  "noguid" is somewhat dangerous (since it can be used to locally
    +revert to the old, broken behavior of SML/NJ, but in specific cases
    +where there is no danger of interface confusion, its use is ok
    +(I think).
    +
    +
    +
    +
    +
    ml-nlffigen by default generates "noguid" annotations.  They can be
    +turned off by specifying -guid in its command line.
    +
    +
    + +
    +
    +
    +
    +
    +
    [2002/03/12]
    +
    +
    +

    Integrated jump chaining and static block frequency into the +compiler. More details and numbers later.

    +
    + +
    +
    +
    +
    +
    +
    [2002/03/11]
    +
    +
    +

    Tested the jump chain elimination on all architectures (except the +hppa). This is on by default right now and is profitable for the +alpha and x86, however, it may not be profitable for the sparc and ppc +when compiling the compiler.

    +
    +
    +

    The gc test will typically jump to a label at the end of the cluster, +where there is another jump to an external cluster containing the actual +code to invoke gc. This is to allow factoring of common gc invocation +sequences. That is to say, we generate:

    +
    +
    +
    +
    f:
    +   testgc
    +   ja	L1	% jump if above to L1
    +
    +
    +
    +
    +
    L1:
    +   jmp L2
    +
    +
    +
    +

    + +After jump chain elimination the 'ja L1' instructions is converted to +'ja L2'. On the sparc and ppc, many of the 'ja L2' instructions may end +up being implemented in their long form (if L2 is far away) using:

    +
    +
    +

    + + jbe L3 % jump if below or equal to L3 + jmp L2 + L3: + …​

    +
    +
    +

    + +For large compilation units L2 may be far away.

    +
    +
    +

    + +Lal George

    +
    +
    +
    +
    +
    +
    +
    [2002/03/11]
    +
    +
    +

    A functor parameter was missing.

    +
    + +
    +
    +
    +
    +
    +
    [2002/03/11]
    +
    +
    +
    +
       The representation of the empty string now points to a
    +legal null terminated C string instead of unit.  It is now possible
    +to convert an ML string into C string with InlineT.CharVector.getData.
    +This compiles into one single machine instruction.
    +
    +
    + +
    +
    +
    +
    +
    +
    [2002/03/10]
    +
    +
    +
    +
    Added machine generation for CALL instruction (relative displacement mode)
    +
    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.39.1; 2002/03/08

    +
    +
    +
    +
    [2002/03/08]
    +
    +
    +

    Entrypoints: non-zero offset into a code object where execution should begin.

    +
    +
    +
      +
    • +

      Added the notion of an entrypoint to CodeObj.

      +
    • +
    • +

      Added reading/writing of entrypoint info to Binfile.

      +
    • +
    • +

      Made runtime system bootloader aware of entrypoints.

      +
    • +
    • +

      Use the address of the label of the first function given to mlriscGen +as the entrypoint. This address is currently always 0, but it will +not be 0 once we turn on block placement.

      +
    • +
    • +

      Removed the linkage cluster code (which was The Other Way(tm) of dealing +with entry points) from mlriscGen.

      + +
    • +
    +
    +
    +
    +
    +
    +
    +
    [2002/03/07]
    +
    +
    +
    +
    Bug fixes for CMOVcc on x86.
    +
    +
    +
    +
      +
    1. +

      Added machine code generation for CMOVcc

      +
    2. +
    3. +

      CMOVcc is now generated in preference over SETcc on PentiumPro or above.

      +
    4. +
    5. +

      CMOVcc cannot have an immediate operand as argument.

      + +
    6. +
    +
    +
    +
    +
    +
    +
    +
    [2002/03/07]
    +
    +
    +

    This is a very large but mostly boring patch which makes (almost) +every tuneable compiler knob (i.e., pretty much everything under +Control.* plus a few other things) configurable via both the command +line and environment variables in the style CM did its configuration +until now.

    +
    +
    +

    Try starting sml with '-h' (or, if you are brave, '-H')

    +
    +
    +

    To this end, I added a structure Controls : CONTROLS to smlnj-lib.cm which +implements the underlying generic mechanism.

    +
    +
    +

    The interface to some of the existing such facilities has changed somewhat. +For example, the MLRiscControl module now provides mkFoo instead of getFoo. +(The getFoo interface is still there for backward-compatibility, but its +use is deprecated.)

    +
    +
    +

    The ml-build script passes -Cxxx=yyy command-line arguments through so +that one can now twiddle the compiler settings when using this "batch" +compiler.

    +
    +
    +

    TODO items:

    +
    +
    +

    We should go through and throw out all controls that are no longer +connected to anything. Moreover, we should go through and provide +meaningful (and correct!) documentation strings for those controls +that still are connected.

    +
    +
    +

    Currently, multiple calls to Controls.new are accepted (only the first +has any effect). Eventually we should make sure that every control +is being made (via Controls.new) exactly once. Future access can then +be done using Controls.acc.

    +
    +
    +

    Finally, it would probably be a good idea to use the getter-setter +interface to controls rather than ref cells. For the time being, both +styles are provided by the Controls module, but getter-setter pairs are +better if thread-safety is of any concern because they can be wrapped.

    +
    +
    +
    +
    +

    + +One bug fix: The function blockPlacement in three of the MLRISC +backpatch files used to be hard-wired to one of two possibilities at +link time (according to the value of the placementFlag). But (I +think) it should rather sense the flag every time.

    +
    +
    +

    +

    +
    +
    +
    +
    +

    Other assorted changes (by other people who did not supply a HISTORY entry):

    +
    +
    +
      +
    1. +

      the cross-module inliner now works much better (Monnier)

      +
    2. +
    3. +

      representation of weights, frequencies, and probabilities in MLRISC +changed in preparation of using those for weighted block placement +(Reppy, George)

      + +
    4. +
    +
    +
    +
    +
    +
    +
    +
    [2002/03/07]
    +
    +
    +

    Tested the weighted block placement optimization on all architectures +(except the hppa) using AMPL to generate the block and edge frequencies. +Changes were required in the machine properties to correctly +categorize trap instructions. There is an MLRISC flag +"weighted-block-placement" that can be used to enable weighted block +placement, but this will be ineffective without block/edge +frequencies (coming soon).

    +
    +
    +

    + +Lal George

    +
    +
    +
    +
    +
    +
    +
    [2002/03/05]
    +
    +
    +

    In order to support the block placement optimization, a new cluster +is generated as the very first cluster (called the linkage cluster). +It contains a single jump to the 'real' entry point for the compilation +unit. Block placement has no effect on the linkage cluster itself, but +all the other clusters have full freedom in the manner in which they +reorder blocks or functions.

    +
    +
    +

    On the x86 the typical linkage code that is generated is: + ---------------------- + .align 2 + L0: + addl $L1-L0, 72(%esp) + jmp L1

    +
    +
    +

    + + .align 2 + L1: + ----------------------

    +
    +
    +

    + +72(%esp) is the memory location for the stdlink register. This +must contain the address of the CPS function being called. In the +above example, it contains the address of L0; before +calling L1 (the real entry point for the compilation unit), it +must contain the address for L1, and hence

    +
    +
    +

    + + addl $L1-L0, 72(%esp)

    +
    +
    +

    + +I have tested this on all architectures except the hppa.The increase +in code size is of course negligible

    +
    +
    +

    + +Lal George

    +
    +
    +
    +
    +
    +
    +
    [2002/03/03]
    +
    +
    +

    Added #[ …​ ] expressions to mlrisc tools

    +
    + +
    +
    +
    +
    +
    +
    [2002/02/27]
    +
    +
    +
      +
    • +

      made types in structure C and C_Debug to be equal

      +
    • +
    • +

      got rid of code duplication (c-int.sml vs. c-int-debug.sml)

      +
    • +
    • +

      there no longer is a C_Int_Debug (C_Debug is directly derived from C)

      + +
    • +
    +
    +
    +
    +
    +
    +
    +
    [2002/02/26]
    +
    +
    +
      +
    1. +

      Fixed a minor bug in CM’s "noweb" tool: +If numbering is turned off, then truly don’t number (i.e., do not +supply the -L option to noweb). The previous behavior was to supply +-L'' — which caused noweb to use the "default" line numbering scheme. +Thanks to Chris Richards for pointing this out (and supplying the fix).

      +
    2. +
    3. +

      Once again, I reworked some aspects of the FFI:

      +
      +
        +
      1. +

        The incomplete/complete type business:

        +
        +
          +
        • +

          Signatures POINTER_TO_INCOMPLETE_TYPE and accompanying functors are +gone!

          +
        • +
        • +

          ML types representing an incomplete type are now equal to +ML types representing their corresponding complete types (just like +in C). This is still safe because ml-nlffigen will not generate +RTTI for incomplete types, nor will it generate functions that +require access to such RTTI. But when ML code generated from both +incomplete and complete versions of the C type meet, the ML types +are trivially interoperable.

          +
          +
          +
          NOTE:  These changes restore the full generality of the translation
          +(which was previously lost when I eliminated functorization)!
          +
          +
          +
        • +
        +
        +
      2. +
      3. +

        Enum types:

        +
        +
          +
        • +

          Structure C now has a type constructor "enum" that is similar to +how the "su" constructor works. However, "enum" is not a phantom +type because each "T enum" has values (and is isomorphic to +MLRep.Signed.int).

          +
        • +
        • +

          There are generic access operations for enum objects (using +MLRep.Signed.int).

          +
        • +
        • +

          ml-nlffigen will generate a structure E_foo for each "enum foo".

          +
          +
            +
          • +

            The structure contains the definition of type "mlrep" (the ML-side +representation type of the enum). Normally, mlrep is the same +as "MLRep.Signed.int", but if ml-nlffigen was invoked with "-ec", +then mlrep will be defined as a datatype — thus facilitating +pattern matching on mlrep values. +("-ec" will be suppressed if there are duplicate values in an + enumeration.)

            +
          • +
          • +

            Constructors ("-ec") or values (no "-ec") e_xxx of type mlrep +will be generated for each C enum constant xxx.

            +
          • +
          • +

            Conversion functions m2i and i2m convert between mlrep and +MLRep.Signed.int. (Without "-ec", these functions are identities.)

            +
          • +
          • +

            Coversion functions c and ml convert between mlrep and "tag enum".

            +
          • +
          • +

            Access functions (get/set) fetch and store mlrep values.

            +
          • +
          +
          +
        • +
        • +

          By default (unless ml-nlffigen was invoked with "-nocollect"), unnamed +enumerations are merged into one single enumeration represented by +structure E_'.

          + +
        • +
        +
        +
      4. +
      +
      +
    4. +
    +
    +
    +
    +
    +
    +
    +
    [2002/02/25]
    +
    +
    +

    This is a new implementation of the CPS spill phase. +The new phase is in the new file compiler/CodeGen/cpscompile/spill-new.sml +In case of problems, replace it with the old file spill.sml

    +
    +
    +

    The current compiler runs into some serious performance problems when +constructing a large record. This can happen when we try to compile a +structure with many items. Even a very simple structure like the following +makes the compiler slow down.

    +
    +
    +
    +
        structure Foo = struct
    +       val x_1 = 0w1 : Word32.int
    +       val x_2 = 0w2 : Word32.int
    +       val x_3 = 0w3 : Word32.int
    +       ...
    +       val x_N = 0wN : Word32.int
    +    end
    +
    +
    +
    +

    The following table shows the compile time, from N=1000 to N=4000, +with the old compiler:

    +
    +
    +

    N +1000 CPS 100 spill 0.04u 0.00s 0.00g + MLRISC ra 0.06u 0.00s 0.05g + (spills = 0 reloads = 0) + TOTAL 0.63u 0.07s 0.21g

    +
    +
    +

    1100 CPS 100 spill 8.25u 0.32s 0.64g + MLRISC ra 5.68u 0.59s 3.93g + (spills = 0 reloads = 0) + TOTAL 14.71u 0.99s 4.81g

    +
    +
    +

    1500 CPS 100 spill 58.55u 2.34s 1.74g + MLRISC ra 5.54u 0.65s 3.91g + (spills = 543 reloads = 1082) + TOTAL 65.40u 3.13s 6.00g

    +
    +
    +

    2000 CPS 100 spill 126.69u 4.84s 3.08g + MLRISC ra 0.80u 0.10s 0.55g + (spills = 42 reloads = 84) + TOTAL 129.42u 5.10s 4.13g

    +
    +
    +

    3000 CPS 100 spill 675.59u 19.03s 11.64g + MLRISC ra 2.69u 0.27s 1.38g + (spills = 62 reloads = 124) + TOTAL 682.48u 19.61s 13.99g

    +
    +
    +

    4000 CPS 100 spill 2362.82u 56.28s 43.60g + MLRISC ra 4.96u 0.27s 2.72g + (spills = 85 reloads = 170) + TOTAL 2375.26u 57.21s 48.00g

    +
    +
    +

    As you can see the old cps spill module suffers from some serious +performance problem. But since I cannot decipher the old code fully, +instead of patching the problems up, I’m reimplementing it +with a different algorithm. The new code is more modular, +smaller when compiled, and substantially faster +(O(n log n) time and O(n) space). Timing of the new spill module:

    +
    +
    +

    4000 CPS 100 spill 0.02u 0.00s 0.00g + MLRISC ra 0.25u 0.02s 0.15g + (spills=1 reloads=3) + TOTAL 7.74u 0.34s 1.62g

    +
    +
    +

    Implementation details:

    +
    +
    +

    As far as I can tell, the purpose of the CPS spill module is to make sure the +number of live variables at any program point (the bandwidth) +does not exceed a certain limit, which is determined by the +size of the spill area.

    +
    +
    +

    When the bandwidth is too large, we decrease the register pressure by +packing live variables into spill records. How we achieve this is +completely different than what we did in the old code.

    +
    +
    +

    First, there is something about the MLRiscGen code generator +that we should be aware of:

    +
    +
    +

    MLRiscGen performs code motion!

    +
    +
    +
    +
    In particular, it will move floating point computations and
    +address computations involving only the heap pointer to
    +their use sites (if there is only a single use).
    +What this means is that if we have a CPS record construction
    +statement
    +
    +
    +
    +
    +
           RECORD(k,vl,w,e)
    +
    +
    +
    +
    +
    we should never count the new record address w as live if w
    +has only one use (which is often the case).
    +
    +
    +
    +
    +
    We should do something similar to floating point, but the transformation
    +there is much more complex, so I won't deal with that.
    +
    +
    +
    +

    Secondly, there are now two new cps primops at our disposal:

    +
    +
    +
    +
    +
      +
    1. +

      rawrecord of record_kind option +This pure operator allocates some uninitialized storage from the heap. +There are two forms:

      +
      +
      +
      rawrecord NONE [INT n]  allocates a tagless record of length n
      +rawrecord (SOME rk) [INT n] allocates a tagged record of length n
      +                            and initializes the tag.
      +
      +
      +
    2. +
    3. +

      rawupdate of cty +rawupdate cty (v,i,x) +Assigns to x to the ith component of record v. +The storelist is not updated.

      +
    4. +
    +
    +
    +
    +
    +

    We use these new primops for both spilling and increment record construction.

    +
    +
    +
    +
    +
      +
    1. +

      Spilling.

      +
      +
      +
      This is implemented with a linear scan algorithm (but generalized
      +to trees).  The algorithm will create a single spill record at the
      +beginning of the cps function and use rawupdate to spill to it,
      +and SELECT or SELp to reload from it.  So both spills and reloads
      +are fine-grain operations.  In contrast, in the old algorithm
      +"spills" have to be bundled together in records.
      +
      +
      +
      +
      +
      Ideally, we should sink the spill record construction to where
      +it is needed.  We can even split the spill record into multiple ones
      +at the places where they are needed.  But CPS is not a good
      +representation for global code motion, so I'll keep it simple and
      +am not attempting this.
      +
      +
      +
    2. +
    3. +

      Incremental record construction (aka record splitting).

      +
      +
      +
      Long records with many component values which are simulatenously live
      +(recall that single use record addresses are not considered to
      + be live) are constructed with rawrecord and rawupdate.
      +We allocate space on the heap with rawrecord first, then gradually
      +fill it in with rawupdate.  This is the technique suggested to me
      +by Matthias.
      +
      +
      +
      +
      +
      Some restrictions on when this is applicable:
      +a. It is not a VECTOR record.  The code generator currently does not handle
      +   this case. VECTOR record uses double indirection like arrays.
      +b. All the record component values are defined in the same "basic block"
      +   as the record constructor.  This is to prevent speculative
      +   record construction.
      +
      +
      +
    4. +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    [2002/02/22]
    +
    +
    +

    Minor bug fixes in the parser and rewriter

    +
    + +
    +
    +
    +
    +
    +
    [2002/02/21]
    +
    +
    +

    Regenerated the peephole files. Some contained typos in the specification +and some didn’t compile because of pretty printing bugs in the old version +of 'nowhere'.

    +
    + +
    +
    +
    +
    +
    +
    [2002/02/19]
    +
    +
    +
    +
    Minor bug fixes to the mlrisc-tools library:
    +
    +
    +
    +
      +
    1. +

      Fixed up parsing colon suffixed keywords

      +
    2. +
    3. +

      Added the ability to shut the error messages up

      +
    4. +
    5. +

      Reimplemented the pretty printer and fixed up/improved +the pretty printing of handle and → types.

      +
    6. +
    7. +

      Fixed up generation of literal symbols in the nowhere tool.

      +
    8. +
    9. +

      Added some SML keywords to to sml.sty

      + +
    10. +
    +
    +
    +
    +
    +
    +
    +
    [2002/02/19]
    +
    +
    +

    A wild mix of changes, some minor, some major:

    +
    +
    +
    +
    +
      +
    • +

      All C FFI-related libraries are now anchored under $c: +$/c.cm -→ $c/c.cm +$/c-int.cm -→ $c/internals/c-int.cm +$/memory.cm -→ $c/memory/memory.cm

      +
    • +
    • +

      "make" tool (in CM) now treats its argument pathname slightly +differently:

      +
      +
        +
      1. +

        If the native expansion is an absolute name, then before invoking +the "make" command on it, CM will apply OS.Path.mkRelative +(with relativeTo = OS.FileSys.getDir()) to it.

        +
      2. +
      3. +

        The argument will be passed through to subsequent phases of CM +processing without "going native". In particular, if the argument +was an anchored path, then "make" will not lose track of that anchor.

        +
      4. +
      +
      +
    • +
    • +

      Compiler backends now "know" their respective C calling conventions +instead of having to be told about it by ml-nlffigen. This relieves +ml-nlffigen from one of its burdens.

      +
    • +
    • +

      The X86Backend has been split into X86CCallBackend and X86StdCallBackend.

      +
    • +
    • +

      Export C_DEBUG and C_Debug from $c/c.cm.

      +
    • +
    • +

      C type encoding in ml-nlffi-lib has been improved to model the conceptual +subtyping relationship between incomplete pointers and their complete +counterparts. For this, ('t, 'c) ptr has been changed to 'o ptr — with the convention of instantiating 'o with ('t, 'c) obj whenever +the pointer target type is complete. In the incomplete case, 'o +will be instantiated with some "'c iobj" — a type obtained by +using one of the functors PointerToIncompleteType or PointerToCompleteType.

      +
      +
      +
      Operations that work on both incomplete and complete pointer types are
      +typed as taking an 'o ptr while operations that require the target to
      +be known are typed as taking some ('t, 'c) obj ptr.
      +
      +
      +
      +
      +
      voidptr is now a bit "more concrete", namely "type voidptr = void ptr'"
      +where void is an eqtype without any values.  This makes it possible
      +to work on voidptr values using functions meant to operate on light
      +incomplete pointers.
      +
      +
      +
    • +
    • +

      As a result of the above, signature POINTER_TO_INCOMPLETE_TYPE has +been vastly simplified.

      +
    • +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    [2002/02/19]
    +
    +
    +

    Applied Chris Okasaki’s bug fix for priority queues.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.39; 2002/02/15

    +
    +
    +
    +
    [2002/02/15]
    +
    +
    +

    Added EnvRef.listBoundSymbols and CM.State.showBindings. Especially +the latter can be useful for exploring what bindings are available at +the interactive prompt. (The first function returns only the list +of symbols that are really bound, the second prints those but also the +ones that CM’s autoloading mechanism knows about.)

    +
    + +
    +
    +
    +
    +
    +
    [2002/02/15]
    +
    +
    +

    Two improvements to ml-nlffigen:

    +
    +
    +
      +
    1. +

      Write files only if they do not exist or if their current contents +do not coincide with what’s being written. (That is, avoid messing +with the time stamps unless absolutely necessary.)

      +
    2. +
    3. +

      Implement a "repository" mechanism for generated files related +to "incomplete pointer types". See the README file for details.

      + +
    4. +
    +
    +
    +
    +
    +
    +
    +
    [2002/02/14]
    +
    +
    +

    Added a type 't t_' to tag.sml (in ml-nlffi-lib.cm). This is required +because of the new and improved tag generation scheme. (Thanks to Allen +Leung for pointing it out.)

    +
    + +
    +
    +
    +
    +
    +
    [2002/02/14]
    +
    +
    +
    +
    +

    Fixed the MLRISC bug sent by Markus Wenzel regarding the compilation +of Isabelle on the x86.

    +
    +
    +

    + +From Allen:

    +
    +
    +

    + +* I’ve found the problem:

    +
    +
    +

    + + in ra-core.sml, I use the counter "blocked" to keep track of the + true number of elements in the freeze queue. When the counter goes + to zero, I skip examining the queue. But I’ve messed up the + bookkeeping in combine():

    +
    +
    +

    +

    +
    +
    +
    +
             else ();
    +         case !ucol of
    +           PSEUDO => (if !cntv > 0 then
    +                 (if !cntu > 0 then blocked := !blocked - 1 else ();
    +                                    ^^^^^^^^^^^^^^^^^^^^^^^
    +                  moveu := mergeMoveList(!movev, !moveu)
    +                 )
    +              else ();
    +
    +
    +
    +

    + + combine() is called to coalesce two nodes u and v. + I think I was thinking that if the move counts of u and v are both + greater than zero then after they are coalesced then one node is + removed from the freeze queue. Apparently I was thinking that + both u and v are of low degree, but that’s clearly not necessarily true.

    +
    +
    +

    +

    +
    +
    +

    + +* 02/12/2002:

    +
    +
    +

    + + Here’s the patch. HOL now compiles.

    +
    +
    +

    + + I don’t know how this impact on performance (compile + time or runtime). This bug caused the RA (especially on the x86) + to go thru the potential spill phase when there are still nodes on the + freeze queue.

    +
    +
    +
    + +
    +
    +
    +
    +
    +
    [2002/02/13]
    +
    +
    +

    Fixed a bug in ml-nlffigen that was introduced with one of the previous +updates.

    +
    + +
    +
    +
    +
    +
    +
    [2002/02/13]
    +
    +
    +

    Added new priority queue export symbols (which have just been added to +smlnj-lib.cm) to CML’s version of smlnj-lib.cm. (Otherwise CML would +not compile and the installer would choke.)

    +
    + +
    +
    +
    +
    +
    +
    [2002/02/13]
    +
    +
    +
    +
    +
      +
    1. +

      More tweaks to ml-nlffigen:

      +
      +
        +
      • +

        better internal datastructures (resulting in slight speedup)

        +
      • +
      • +

        "-match" option requires exact match

        +
      • +
      • +

        "localized" gensym counters (untagged structs/unions nested within +other structs/unions or within typedefs get a fresh counter; their +tag will be prefixed by a concatenation of their parents' tags)

        +
      • +
      • +

        bug fixes (related to calculation of transitive closure of types +to be included in the output)

        +
      • +
      +
      +
    2. +
    3. +

      Minor Basis updates:

      +
      +
        +
      • +

        added implementations for List.collate and Option.app

        +
      • +
      +
      +
    4. +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    [2002/02/11]
    +
    +
    +

    Added a "-gensym" option to command line of ml-nlffigen. This can be +used to specify a "stem" — a string that is inserted in all "gensym’d" +names (ML structure names that correspond to unnamed C structs, unions, +and enums), so that separate runs of ml-nlffigen do not clash.

    +
    + +
    +
    +
    +
    +
    +
    [2002/02/11]
    +
    +
    +

    A quick fix for a problem with GenSML (in the pgraph-util library): +Make generation of toplevel "local" optional. (Strictly speaking, +signature definitions within "local" are not legal SML.)

    +
    +
    +

    Other than that: updates to INSTALL and cm/TODO.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.38.1; 2002/02/08

    +
    +
    +
    +
    [2002/02/08]
    +
    +
    +
    +
    +
      +
    1. +

      The installer (config/install.sh) has gotten smarter:

      +
    2. +
    +
    +
    +
    +
    +
      +
    • +

      Configuration options are a bit easier to specify now +(in config/targets).

      +
    • +
    • +

      Bug in recognizing .tar.bz2 files fixed.

      +
    • +
    • +

      Installer automatically resolves dependencies between +configuration options (e.g., if you ask for eXene, you will +also get cml — regardless whether you asked for it or not).

      +
    • +
    • +

      Installer can run in "quieter mode" by setting the environment +variable INSTALL_QUIETLY to "true". "Quieter" does not mean +"completely silent", though.

      +
    • +
    • +

      Build HashCons library as part of smlnj-lib.

      +
    • +
    +
    +
    +
    +
    +
    +
    +
    +

    + +. A new scheme for assigning persistent identifiers to compilation + units (and, by extension, to types etc.) has been put into place. + This fixes a long-standing bug where types and even dynamic values + can get internally confused, thereby compromising type safety + (abstraction) and dynamic correctness. See + http://cm.bell-labs.com/cm/cs/who/blume/pid-confusion.tgz + for an example of how things could go wrong until now.

    +
    +
    +

    + + The downside of the new scheme is that pids are not quite as + persistent as they used to be: CM will generate a fresh pid + for every compilation unit that it thinks it sees for the first + time. That means that if you compile starting from a clean, fresh + source tree at two different times, you end up with different + binaries.

    +
    +
    +

    + + Cutoff recompilation, however, has not been compromised because + CM keeps pid information in special caches between runs.

    +
    +
    +
    + +
    +
    +
    [2002/02/07]
    +
    +
    +

    Compilers that generate assembly code may produce global labels +whose value is resolved at link time. The various peephole optimization +modules did not take this in account.

    +
    +
    +

    TODO. The Labels.addrOf function should really return an option +type so that clients are forced to deal with this issue, rather +than an exception being raised.

    +
    + +
    +
    +
    +
    +
    +
    [2002/02/06]
    +
    +
    +
    +
    +
      +
    1. +

      A bug fix from Allen: +A typo causes extra fstp %st(0) instructions to be generated at compensation +edges, which might cause stack underflow traps at runtime. This +occurs in fft where there are extraneous fstps right before the into +trap instruction (in this case they are harmless since none of the +integers overflow.)

      +
    2. +
    3. +

      Pulled out various utility modules that were embedded in the modules +of the register allocator. I need these modules for other purposes, but +they are not complete enough to put into a library (just yet).

      +
    4. +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    [2002/01/31]
    +
    +
    +
    +
    +
      +
    1. +

      Fixed a bug where C-calls on SPARC needlessly allocated a huge chunk (96 bytes) +of extra stack space by mistake.

      +
    2. +
    3. +

      Bug in logic of handling of command-line options in ml-nlffigen fixed.

      +
    4. +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    [2002/01/30]
    +
    +
    +

    MLRISC bug fixes:

    +
    +
    +
    +
    +
    +
    +
    +
      +
    1. +

      Fixed a bindings computation bug in the 'nowhere' program generator tool.

      +
    2. +
    3. +

      MachineInt.fromString was negating its value.

      +
    4. +
    +
    +
    +
    + +
    +
    +
    [2002/01/29]
    +
    +
    +
      +
    • +

      Added somewhat detailed installation instructions (file INSTALL).

      +
    • +
    • +

      Fixed curl-detection bug in config/install.sh.

      +
    • +
    • +

      It is now possible to select the URL getter using the URLGETTER +environment variable:

      +
      +
      +
      not set / "unknown"      --> automatic detection (script tries wget,
      +                             curl, and lynx)
      +"wget" / "curl" / "lynx" --> use the specified program (script "knows"
      +                             how to properly invoke them)
      +other                    --> use $URLGETTER directly, it must take
      +                             precisely two command-line arguments
      +                             (source URL and destination file name)
      +
      +
      + +
    • +
    +
    +
    +
    +
    +
    +
    +
    [2002/01/28]
    +
    +
    +
      +
    • +

      Fixed problem with calculation of "used" registers in sparc-c-calls.

      +
    • +
    • +

      Make use of the allocParam argument in sparc-c-calls.

      + +
    • +
    +
    +
    +
    +
    +
    +
    +
    [2002/01/28]
    +
    +
    +

    John Reppy: Changes c-calls API to accept client-callback for +allocating extra stack space. +me: Corresponding changes to mlriscGen (using a dummy argument that + does not change the current behavior).

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.38; 2002/01/28

    +
    +
    +
    +
    [2002/01/28]
    +
    +
    +
    +
    +
      +
    1. +

      Retracted earlier 110.38. (The Release_110_38 tag has been replaced +with blume-Release_110_38-retracted.)

      +
    2. +
    3. +

      Fixed a problem with incorrect rounding modes in real64.sml. +(Thanks to Andrew Mccreight <andrew.mccreight@yale.edu>.)

      +
    4. +
    5. +

      A bug in ml-nlffigen related to the handling of unnamed structs, unions, +and enums fixed. The naming of corresponding ML identifiers should +now be consistent again.

      +
    6. +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    [2002/01/27]
    +
    +
    +
    +
    Added a target called nowhere in the configuration scripts.
    +Enabling this will build the MLRISC 'nowhere' tool (for translating
    +programs with where-clauses into legal SML code) during installation.
    +
    +
    + +
    +
    +
    +
    +
    +
    [2002/01/25]
    +
    +
    +

    Call it a (working) release! Version is 110.38. Bootfiles are ready.

    +
    +
    +

    README will be added later.

    +
    +
    +

    !!! NOTE: Re-tagged as blume-Release_110_38-retracted. Original tag +(Release_110_38) removed. Reason: Last-minute bug fixes.

    +
    + +
    +
    +
    +
    +
    +
    [2002/01/25]
    +
    +
    +

    A large number of tweaks and improvements to ml-nlffi-lib and +ml-nlffigen:

    +
    +
    +
      +
    • +

      ML represenation types have been streamlined

      +
    • +
    • +

      getter and setter functions work with concrete values, not abstract +ones where possible

      +
    • +
    • +

      ml-nlffigen command line more flexible (see README file there)

      +
    • +
    • +

      some bugs have been fixed (hopefully)

      + +
    • +
    +
    +
    +
    +
    +
    +
    +
    [2002/01/24]
    +
    +
    +
    +
    There is a dramatic simplification in the interface to the
    +register allocator for RISC architectures as a result of making
    +parallel copy instructions explicit.
    +
    +
    + +
    +
    +
    +
    +
    +
    [2002/01/22]
    +
    +
    +

    Bug fix for c-calls on x86 (having to do with how char- and +short-arguments are being handled).

    +
    + +
    +
    +
    +
    +
    +
    [2002/01/21]
    +
    +
    +

    Another day of fiddling with the FFI…​

    +
    +
    +
      +
    1. +

      Bug fix/workaround: CKIT does not complain about negative array +dimensions, so ml-nlffigen has to guard itself against this possibility. +(Otherwise a negative dimension would send it into an infinite loop.)

      +
    2. +
    3. +

      Some of the abstract types (light objects, light pointers, most "base" +types) in structure C are now eqtypes.

      +
    4. +
    5. +

      Added constructors and test functions for NULL function pointers.

      + +
    6. +
    +
    +
    +
    +
    +
    +
    +
    [2002/01/18]
    +
    +
    +

    Made config/srcarchiveurl point to a new place. (Will provide boot +files shortly.)

    +
    +
    +

    Maybe we christen this to be 110.38?

    +
    + +
    +
    +
    +
    +
    +
    [2002/01/18]
    +
    +
    +

    Today’s FFI fiddling:

    +
    +
    +
      +
    • +

      Provided a structure CGetSet with "convenient" versions of C.Get.* and +C.Set.* that use concrete (MLRep.*) arguments and results instead +of abstract ones.

      +
    • +
    • +

      Provided word-style bit operations etc. for "int" representation +types in MLRep.S<Foo>Bitops where <Foo> ranges over Char, Int, Short, +and Long.

      + +
    • +
    +
    +
    +
    +
    +
    +
    +
    [2002/01/18]
    +
    +
    +

    Now that x86-fast-fp seems to be working, I turned it back on again +by default. (Seems to work fine now, even with the FFI.)

    +
    +
    +

    Other than that, I added some documentation about the FFI to +src/ml-nlffigen/README and updated the FFI test examples in +src/ml-nlffi-lib/Tests/*.

    +
    + +
    +
    +
    +
    +
    +
    [2002/01/17]
    +
    +
    +
      +
    1. +

      Fixed a problem with handling return fp values when x86’s fast fp +mode is turned on.

      +
    2. +
    3. +

      Minor pretty printing fix for cellset. Print %st(0) as %st(0) instead +of %f32.

      +
    4. +
    5. +

      Added a constructor INT32lit to the ast of MLRISC tools.

      + +
    6. +
    +
    +
    +
    +
    +
    +
    +
    [2002/01/16]
    +
    +
    +

    More fiddling with the FFI interface:

    +
    +
    +
      +
    • +

      Make constness 'c instead of rw wherever possible. This eliminates +the need for certain explicit coercions. (However, due to ML’s +value polymorphism, there will still be many cases where explicit +coercions are necessary. Phantom types are not the whole answer +to modeling a subtyping relationship in ML.)

      +
    • +
    • +

      ro/rw coersions for pointers added. (Avoids the detour through */&.)

      +
    • +
    • +

      "printf" test example added to src/ml-nlffi-lib/Tests. (Demonstrates +clumsy workaround for varargs problem.)

      + +
    • +
    +
    +
    +
    +
    +
    +
    +
    [2002/01/15]
    +
    +
    +
      +
    1. +

      Since COPY instructions are no longer native to the architecture, +a generic functor can be used to implement the expandCopies function.

      +
    2. +
    3. +

      Allowed EXPORT and IMPORT pseudo-op declarations to appear inside a +TEXT segment.

      + +
    4. +
    +
    +
    +
    +
    +
    +
    +
    [2002/01/15]
    +
    +
    +
      +
    1. +

      Fix for bug resulting in single-precision float values being returned +incorrectly from FFI calls.

      +
    2. +
    3. +

      Small modifications to C FFI API:

      +
      +
        +
      • +

        memory-allocation routines return straight objects (no options) +and raise an exception in out-of-memory situations

        +
      • +
      • +

        unsafe extensions to cast between function pointers and pointers +from/to ints

        +
      • +
      • +

        added structure C_Debug as an alternative to structure C where +pointer-dereferencing (|| and |!) always check for null-pointers

        +
      • +
      • +

        added open_lib' to DynLinkage; open_lib' works like open_lib +but also takes a (possibly empty) list of existing library handles +that the current library depends on

        + +
      • +
      +
      +
    4. +
    +
    +
    +
    +
    +
    +
    +
    [2002/01/10]
    +
    +
    +
      +
    1. +

      Updates to portable graph code.

      +
    2. +
    3. +

      Major update to ml-nlffigen and ml-nlffi-lib. Things are much +more scalable now so that even huge interfaces such as the one +for GTK compile in finite time and space. :-) +See src/ml-nlffigen/README for details on what’s new.

      + +
    4. +
    +
    +
    +
    +
    +
    +
    +
    [2001/01/09]
    +
    +
    +
    +
    Removed the native COPY and FCOPY instructions
    +from all the architectures and replaced it with the
    +explicit COPY instruction from the previous commit.
    +
    +
    +
    +
    +
    It is now possible to simplify many of the optimizations
    +modules that manipulate copies. This has not been
    +done in this change.
    +
    +
    + +
    +
    +
    +
    +
    +
    [2001/12/06]
    +
    +
    +

    Changed the representation of instructions from being fully abstract +to being partially concrete. That is to say:

    +
    +
    +
    +
     from
    +type instruction
    +
    +
    +
    +
    +
     to
    +type instr				(* machine instruction *)
    +
    +
    +
    +
    +
    datatype instruction =
    +    LIVE of {regs: C.cellset, spilled: C.cellset}
    +         | KILL of {regs: C.cellset, spilled: C.cellset}
    +         | COPYXXX of {k: CB.cellkind, dst: CB.cell list, src: CB.cell list}
    +         | ANNOTATION of {i: instruction, a: Annotations.annotation}
    +         | INSTR of instr
    +
    +
    +
    +

    This makes the handling of certain special instructions that appear on +all architectures easier and uniform.

    +
    +
    +

    LIVE and KILL say that a list of registers are live or killed at the +program point where they appear. No spill code is generated when an +element of the 'regs' field is spilled, but the register is moved to +the 'spilled' (which is present, more for debugging than anything else).

    +
    +
    +

    LIVE replaces the (now deprecated) DEFFREG instruction on the alpha. +We used to generate:

    +
    +
    +
    +
    DEFFREG f1
    +f1 := f2 + f3
    +       trapb
    +
    +
    +
    +

    but now generate:

    +
    +
    +
    +
    f1 := f2 + f3
    +trapb
    +LIVE {regs=[f1,f2,f3], spilled=[]}
    +
    +
    +
    +

    Furthermore, the DEFFREG (hack) required that all floating point instruction +use all registers mentioned in the instruction. Therefore f1 := f2 + f3, +defines f1 and uses [f1,f2,f3]! This hack is no longer required resulting +in a cleaner alpha implementation. (Hopefully, intel will not get rid of +this architecture).

    +
    +
    +

    COPYXXX is intended to replace the parallel COPY and FCOPY available on +all the architectures. This will result in further simplification of the +register allocator that must be aware of them for coalescing purposes, and +will also simplify certain aspects of the machine description that provides +callbacks related to parallel copies.

    +
    +
    +

    ANNOTATION should be obvious, and now INSTR represents the honest to God +machine instruction set!

    +
    +
    +

    The <arch>/instructions/<arch>Instr.sml files define certain utility +functions for making porting easier — essentially converting upper case +to lower case. All machine instructions (of type instr) are in upper case, +and the lower case form generates an MLRISC instruction. For example on +the alpha we have:

    +
    +
    +
    +
    datatype instr =
    +   LDA of {r:cell, b:cell, d:operand}
    + | ...
    +
    +
    +
    +
    +
    val lda : {r:cell, b:cell, d:operand} -> instruction
    +  ...
    +
    +
    +
    +

    where lda is just (INSTR o LDA), etc.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.37; 2001/11/22

    +
    +
    +
    +
    [2001/11/21]
    +
    +
    +

    Removed the "Release_110_37" tag because of a serious bug. +This will be re-tagged once the bug is fixed.

    +
    + +
    +
    +
    +
    +
    +
    [2001/11/21]
    +
    +
    +

    Forgot to add a file. (Just a .tex-file — part of +the CM manual source.)

    +
    + +
    +
    +
    +
    +
    +
    [2001/11/21]
    +
    +
    +

    Note: I removed the original tag "Release_110_37" from this commit + because we found a serious bug in all non-x86 backends. + - Matthias

    +
    +
    +

    +

    +
    +
    +
      +
    1. +

      Modifications to the SML/NJ code generator and to the runtime system +so that code object name strings are directly inserted into code +objects at code generation time. The only business the runtime system +has with this is now to read the name strings on occasions. +(The encoding of the name string has also changed somewhat.)

      +
    2. +
    3. +

      CM now implements a simple "set calculus" for specifying export lists. +In particular, it is now possible to refer to the export lists of +other libraries/groups/sources and form unions as well as differences. +See the latest CM manual for details.

      +
    4. +
    5. +

      An separate notion of "proxy" libraries has again be eliminated from +CM’s model. (Proxy libraries are now simply a special case of using +the export list calculus.)

      +
    6. +
    7. +

      Some of the existing libraries now take advantage of the new set +calculus. +(Notice that not all libraries have been converted because some +of the existing .cm-files are supposed to be backward compatible +with 110.0.x.)

      +
    8. +
    9. +

      Some cleanup in stand-alone programs. (Don’t use "exnMessage" — use +"General.exnMessage"! The former relies on a certain hook to be +initialized, and that often does not happen in the stand-alone case.)

      + +
    10. +
    +
    +
    +
    +
    +
    +
    +
    [2001/11/21]
    +
    +
    +
    +
    Implemented a complete redesign of MLRISC pseudo-ops. Now there
    +ought to never be any question of incompatabilities with
    +pseudo-op syntax expected by host assemblers.
    +
    +
    +
    +
    +
    For now, only modules supporting GAS syntax are implemented
    +but more should follow, such as MASM, and vendor assembler
    +syntax, _e.g._ IBM as, Sun as, etc.
    +
    +
    + +
    +
    +
    +
    +
    +
    [2001/11/14]
    +
    +
    +
      +
    1. +

      Routed the name of the current source file to mlriscgen where it +should be directly emitted into the code object. (This last part +is yet to be done.)

      +
    2. +
    3. +

      Some cleanup of the pgraph code to make it match the proposal that +I put out the other day. (The proposal notwithstanding, things are +still in flux here.)

      + +
    4. +
    +
    +
    +
    +
    +
    +
    +
    [2001/11/14]
    +
    +
    +
    +
    Fix for a backpatching bug reported by Allen.
    +
    +
    +
    +
    +
    Because the boundary between short and long span-dependent
    +instructions is +/- 128, there are an astounding number of
    +span-dependent instructions whose size is over estimated.
    +
    +
    +
    +
    +
    Allen came up with the idea of letting the size of span
    +dependent instructions be non-monotonic, for a maxIter
    +number of times, after which the size must be monotonically
    +increasing.
    +
    +
    +
    +
    +
    This table shows the number of span-dependent instructions
    +whose size was over-estimated as a function of maxIter, for the
    +file Parse/parse/ml.grm.sml:
    +
    +
    +
    +
    +
       maxIter		# of instructions:
    +10			687
    +20			438
    +30			198
    +      40			  0
    +
    +
    +
    +
    +
    In compiling the compiler, there is no significant difference in
    +compilation speed between maxIter=10 and maxIter=40. Actually,
    +my measurements showed that maxIter=40 was a tad faster than
    +maxIter=10! Also 96% of the  files in the compiler reach a fix
    +point within 13 iterations, so fixing maxIter at 40, while high,
    +is okay.
    +
    +
    + +
    +
    +
    +
    +
    +
    [2001/10/31]
    +
    +
    +

    CKIT: +* Changed the "Function" constructor of type Ast.ctype to carry optional + argument identifiers. +* Changed the return type of TypeUtil.getFunction accordingly. +* Type equality ignores the argument names. +* TypeUtil.composite tries to preserve argument names but gives up quickly + if there is a mismatch.

    +
    +
    +

    + +installation script:

    +
    +
    +
      +
    • +

      attempts to use "curl" if available (unless "wget" is available as well)

      +
      +

      CM:

      +
      +
    • +
    • +

      has an experimental implementation of "portable graphs" which I will +soon propose as an implementation-independent library format

      +
    • +
    • +

      there are also new libraries $/pgraph.cm and $/pgraph-util.cm

      +
      +

      NLFFI-LIB:

      +
      +
    • +
    • +

      some cleanup (all cosmetic)

      +
      +

      NLFFIGEN:

      +
      +
    • +
    • +

      temporarily disabled the mechanism that suppresses ML output for +C definitions whose identifiers start with an underscore character

      +
    • +
    • +

      generate val bindings for enum constants

      +
    • +
    • +

      user can request that only one style (light or heavy) is being used; +default is to use both (command-line arguments: -heavy and -light)

      +
    • +
    • +

      fixed bug in handling of function types involving incomplete pointers

      +
    • +
    • +

      generate ML entry points that take record arguments (i.e., using +named arguments) for C functions that have a prototype with named +arguments +(see changes to CKIT)

      + +
    • +
    +
    +
    +
    +
    +
    +
    +
    [2001/10/27]
    +
    +
    +
    +
    Fixed the bug described in blume-20010920-slowfp.
    +
    +
    +
    +
    +
    The fix involves
    +   1. generating FCOPYs in FSTP in ia32-svid
    +   2. marking a CALL with the appropriate annotation
    +
    +
    + +
    +
    +
    +
    +
    +
    [2001/10/16]
    +
    +
    +

    Underscore patch from Chris Richards (fixing problem with compiling +runtime system under recent NetBSD).

    +
    + +
    +
    +
    +
    +
    +
    [2001/10/12]
    +
    +
    +

    X86RA now uses a valid (instead of dummy) PrintFlowgraph module.

    +
    + +
    +
    +
    +
    +
    +
    [2001/10/11]
    +
    +
    +

    The representation of a program point never expected to see more +than 65536 instructions in a basic block!

    +
    + +
    +
    +
    +
    +
    +
    [2001/10/09]
    +
    +
    +

    Changed the machine description files to support printing of +local and global labels in assembly code, based on host assembler +conventions.

    +
    + +
    +
    +
    +
    +
    +
    [2001/09/25]
    +
    +
    +

    I provided a non-hook implementation of exnName (at the toplevel) and +made the "dummy" implementation of exnMessage (at the toplevel) more +useful: if nothing gets "hooked in", then at least you are going to +see the exception name and a message indicating why you don’t see more.

    +
    +
    +

    [For the time being, programs that need exnMessage and want to use +ml-build should either use General.exnMessage (strongly recommended) or +refer to structure General at some other point so that CM sees a +static dependency.]

    +
    +
    +

    [Similar remarks go for "print" and "use": If you want to use their +functionality in stand-alone programs generated by ml-build, then use +TextIO.output and Backend.Interact.useFile (from $smlnj/compiler.cm).]

    +
    + +
    +
    +
    +
    +
    +
    [2001/09/20]
    +
    +
    +

    Allen says that x86-fast-fp is not safe yet, so I turned it off again…​

    +
    + +
    +
    +
    +
    +
    +
    [2001/09/20]
    +
    +
    +
      +
    1. +

      Updated the BOOT file (something that I forgot to do earlier).

      +
    2. +
    3. +

      Small internal change to CM so that it avoids "/../" in filenames +as much as possible (but only where it is safe).

      +
    4. +
    5. +

      Changed config/_run-sml (resulting in a changed bin/.run-sml) so +that arguments that contain delimiters are passed through correctly. +This change also means that all "special" arguments of the form +@SMLxxx…​ must come first.

      +
    6. +
    7. +

      Changed install script to put relative anchor names for tool commands +into pathconfig.

      + +
    8. +
    +
    +
    +
    +
    +
    +
    +
    +

    Version 110.36; 2001/09/18

    +
    +
    +
    +
    [2001/09/14]
    +
    +
    +

    John committed some changes that Allen made, in particular a (hopefully) +correctly working version of the x86-fp module.

    +
    +
    +

    I changed the default setting of the Control.MLRISC.getFlag "x86-fast-fp" +flag to "true". Everything seems to compile to a fixpoint ok, and +"mandelbrot" speeds up by about 15%.

    +
    + +
    +
    +
    +
    +
    +
    [2001/09/13]
    +
    +
    +
      +
    1. +

      Stefan Monnier’s patch to fix a miscompilation problem that +was brought to light by John Reppy’s work on Moby.

      +
    2. +
    3. +

      Implemented a minimal "structure Compiler" that contains just +"version" and "architecture". The minimal version will be +available when the full version is not. This is for backward- +compatibility with code that wants to test Compiler.version.

      + +
    4. +
    +
    +
    +
    +
    +
    +
    +
    [2001/08/28]
    +
    +
    +

    Fix for bug 1581, received from Neophytos Michael.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.35; 2001/08/24

    +
    +
    +
    +
    [2001/08/24]
    +
    +
    +
    +
    removed clusters from MLRISC completely and replaced with graphs.
    +
    +
    + +
    +
    +
    +
    +
    +
    [2001/08/23]
    +
    +
    +
      +
    • +

      some reorganization of the code that implements various kinds of +environments in the compiler (static, dynamic, symbolic, combined)

      +
    • +
    • +

      re-implemented the EnvRef module so that evalStream works properly +(if the stream contains references to "use", "CM.make", etc.)

      +
    • +
    • +

      cleaned up evalloop.sml and interact.sml (but they need more cleaning)

      + +
    • +
    +
    +
    +
    +
    +
    +
    +
    [2001/08/20]
    +
    +
    +

    I forgot to commit a few files. Here they are…​

    +
    + +
    +
    +
    +
    +
    +
    [2001/08/20]
    +
    +
    +

    !!!! NEW BOOTFILES !!!!

    +
    +
    +

    This is another round of reorganizing the compiler sources. This +time the main goal was to factor out all the "instrumentation" +passes (for profiling and backtracing) into their own library. +The difficulty was to do it in such a way that it does not depend +on elaborate.cm but only on elabdata.cm.

    +
    +
    +

    Therefore there have been further changes to both elaborate.cm and +elabdata.cm — more "generic" things have been moved from the former +to the latter. As a result, I was forced to split the assignment +of numbers indicating "primtyc"s into two portions: SML-generic and +SML/NJ-specific. Since it would have been awkward to maintain, +I bit the bullet and actually changed the mapping between these +numbers and primtycs. The bottom line of this is that you need +a new set of bin- and bootfiles.

    +
    +
    +

    I have built new bootfiles for all architectures, so doing a fresh +checkout and config/install.sh should be all you need.

    +
    +
    +

    The newly created library’s name is

    +
    +
    +
    +
    $smlnj/viscomp/debugprof.cm
    +
    +
    +
    +

    and its sources live under

    +
    +
    +
    +
    src/compiler/DebugProf
    +
    +
    + +
    +
    +
    +
    +
    +
    [2001/08/15]
    +
    +
    +

    This is a first cut at reorganizing the CM libraries that make up the +core of the compiler. The idea is to separate out pieces that could +be used independently by tools, e.g., the parser, the typechecker, etc.

    +
    +
    +

    The current status is a step in this direction, but it is not quite +satisfactory yet. Expect more changes in the future.

    +
    +
    +

    Here is the current (new) organization…​

    +
    +
    +
    +
    What used to be $smlnj/viscomp/core.cm is now divided into
    +six CM libraries:
    +
    +
    +
    +
    +
    $smlnj/viscomp/basics.cm
    +              /parser.cm
    +              /elabdata.cm
    +              /elaborate.cm
    +              /execute.cm
    +              /core.cm
    +
    +
    +
    +
    +
    The CM files for these libraries live under src/system/smlnj/viscomp.
    +All these libraries are proxy libraries that contain precisely
    +one CM library component.  Here are the locations of the components
    +(all within the src/compiler tree):
    +
    +
    +
    +
    +
    Basics/basics.cm
    +Parse/parser.cm
    +ElabData/elabdata.cm
    +Elaborator/elaborate.cm
    +Execution/execute.cm
    +core.cm
    +
    +
    +
    +
    +
    [This organization is the same that has been used already
    +for a while for the architecture-specific parts of the visible
    +compiler and for the old version of core.cm.]
    +
    +
    +
    +
    +
    As you will notice, many source files have been moved from their
    +respective original locations to a new home in one of the above
    +subtrees.
    +
    +
    +
    +
    +
    The division of labor between the new libraries is the following:
    +
    +
    +
    +
    +
    basics.cm:
    +   - Simple, basic definitions that pertain to many (or all) of
    +     the other libraries.
    +parser.cm:
    +   - The SML parser, producing output of type Ast.dec.
    +   - The type family for Ast is also defined and exported here.
    +elabdata.cm:
    +   - The datatypes that describe input and output of the elaborator.
    +     This includes types, absyn, and static environments.
    +elaborator.cm:
    +   - The SML/NJ type checker and elaborator.
    +     This maps an Ast.dec (with a given static environment) to
    +     an Absyn.dec (with a new static environment).
    +   - This libraries implements certain modules that used to be
    +     structures as functors (to remove dependencies on FLINT).
    +execute.cm:
    +   - Everything having to do with executing binary code objects.
    +   - Dynamic environments.
    +core.cm:
    +   - SML/NJ-specific instantiations of the elaborator and MLRISC.
    +   - Top-level modules.
    +   - FLINT (this should eventually become its own library)
    +
    +
    +
    +

    Notes:

    +
    +
    +

    I am not 100% happy with the way I separated the elaborator (and its +data structures) from FLINT. Two instances of the same problem:

    +
    +
    +
      +
    1. +

      Data structures contain certain fields that carry FLINT-specific +information. I hacked around this using exn and the property list +module from smlnj-lib. But the fact that there are middle-end +specific fields around at all is a bit annoying.

      +
    2. +
    3. +

      The elaborator calculates certain FLINT-related information. I tried +to make this as abstract as I could using functorization, but, again, +the fact that the elaborator has to perform calculations on behalf +of the middle-end at all is not nice.

      +
    4. +
    5. +

      Having to used exn and property lists is unfortunate because it +weakens type checking. The other alternative (parameterizing +nearly everything) is not appealing, though.

      +
      +

      I removed the "rebinding =" warning hack because due to the new organization +it was awkward to maintain it. As a result, the compiler now issues some of +these warnings when compiling init.cmi during bootstrap compilation. On +the plus side, you also get a warning when you do, for example: + val op = = Int32.+ +which was not the case up to now.

      +
      +
      +

      I placed "assign" and "deref" into the _Core structure so that the +code that deals with the "lazy" keyword can find them there. This +removes the need for having access to the primitive environment +during elaboration.

      +
      + +
    6. +
    +
    +
    +
    +
    +
    +
    +
    [2001/08/13]
    +
    +
    +

    This fix was sent to us by Zhong Shao. It is supposed to improve the +performance of certain loops by avoiding needless closure allocation.

    +
    + +
    +
    +
    +
    +
    +
    [2001/07/31]
    +
    +
    +

    There was a bug where call instructions would mysteriously +vanish. The call instruction had to be one that returned +a floating point value.

    +
    + +
    +
    +
    +
    +
    +
    [2001/07/19]
    +
    +
    +

    I have dramatically simplified the interface for CELLS in MLRISC.

    +
    +
    +

    In summary, the cells interface is broken up into three parts:

    +
    +
    +
    +
    +
    +
    +
    +
      +
    1. +

      CellsBasis : CELLS_BASIS

      +
      +

      CellsBasis is a top level structure and common for all +architectures. it contains the definitions of basic datatypes +and utility functions over these types.

      +
      +
    2. +
    3. +

      functor Cells() : CELLS

      +
      +

      Cells generates an interface for CELLS that incorporates the +specific resources on the target architecture, such as the +presence of special register classes, their number and size, +and various useful substructures.

      +
      +
    4. +
    5. +

      <ARCH>CELLS

      +
      +

      e.g., SparcCells: SPARCCELLS

      +
      +
      +

      <ARCH>CELLS usually contains additional bindings for special +registers on the architecture, such as:

      +
      +
      +
      +
              val r0 : cell          (* register zero *)
      +        val y : cell           (* Y register *)
      +        val psr : cell         (* processor status register *)
      +        ...
      +
      +
      +
      +

      The structure returned by applying the Cells functor is opened +in this interface.

      +
      +
      +

      The main implication of all this is that the datatypes for cells is +split between CellsBasis and CELLS — a fairly simple change for user +code.

      +
      +
      +

      In the old scheme the CELLS interface had a definitional binding of +the form:

      +
      +
      +
      +
              signature CELLS =
      +          sig
      +            structure CellsBasis = CellsBasis
      +            ...
      +          end
      +
      +
      +
      +

      With all the sharing constraints that goes on in MLRISC, this old +design quickly leads to errors such as:

      +
      +
      +
      +
              structure definition spec inside of sharing ...
      +
      +
      +
      +

      and appears to require an unacceptable amount of sharing and where +constraint hackery.

      +
      +
      +

      I think this error message (the interaction of definitional specs and +sharing) requires more explanation on our web page.

      +
      +
    6. +
    +
    +
    +
    +
    +

    + +Lal George

    +
    +
    +
    +
    [2001/07/19]
    +
    +
    +

    This update puts together a fairly extensive but straightforward change +to the way the libraries that implement the interactive system are +organized:

    +
    +
    +

    The biggest change is the elimination of structure Compiler. As a +replacement for this structure, there is now a CM library +(known as $smlnj/compiler.cm or $smlnj/compiler/current.cm) +that exports all the substructures of the original Compiler structure +directly. So instead of saying Compiler.Foo.bar one now simply +says Foo.bar. (The CM libraries actually export a collection of +structures that is richer than the collection of substructures of +structure Compiler.)

    +
    +
    +

    To make the transition smooth, there is a separate library called +$smlnj/compiler/compiler.cm that puts together and exports the +original structure Compiler (or at least something very close to it).

    +
    +
    +

    There are five members of the original structure Compiler +that are not exported directly but which instead became members +of a new structure Backend (described by signature BACKEND). +These are:

    +
    +
    +
    +
            structure Profile  : PROFILE
    +        structure Compile  : COMPILE
    +        structure Interact : INTERACT
    +        structure Machine  : MACHINE
    +
    +        val architecture : string
    +
    +
    +
    +

    Structure Compiler.Version has become structure CompilerVersion.

    +
    +
    +

    Cross-compilers for alpha32, hppa, ppc, sparc, and x86 are provided +by $smlnj/compiler/<arch>.cm where <arch> is alpha32, hppa, ppc, sparc, +or x86, respectively. +Each of these exports the same frontend structures that +$smlnj/compiler.cm exports. But they do not have a structure Backend +and instead export some structure <Arch>Backend where <Arch> is Alpha32, +Hppa, PPC, Sparc, or X86, respectively.

    +
    +
    +

    Library $smlnj/compiler/all.cm exports the union of the exports of +$smlnj/compiler/<arch>.cm

    +
    +
    +

    There are no structures <Arch>Compiler anymore, use +$smlnj/compiler/<arch>.cm instead.

    +
    +
    +

    Library host-compiler-0.cm is gone. Instead, the internal library +that instantiates CM is now called cm0.cm. Selection of the host +compiler (backend) is no longer done here but. (Responsibility for it +now lies with $smlnj/compiler/current.cm. This seems to be more +logical.)

    +
    +
    +

    Many individual files have been moved or renamed. Some files have +been split into multiple files, and some "dead" files have been deleted.

    +
    +
    +

    Aside from these changes to library organization, there are also changes +to the way the code itself is organized:

    +
    +
    +

    Structure Binfile has been re-implemented in such a way that it no +longer needs any knowledge of the compiler. It exclusively deals +with the details of binfile layout. It no longer invokes the +compiler (for the purpose of creating new prospective binfile +content), and it no longer has any knowledge of how to interpret +pickles.

    +
    +
    +

    Structure Compile has been stripped down to the bare +essentials of compilation. It no longer deals with linking/execution. +The interface has been cleaned up considerably.

    +
    +
    +

    Utility routines for dealing with linking and execution have been +moved into their own substructures.

    +
    +
    +

    (The ultimate goal of these changes is to provide a light-weight +binfile loader/linker (at least for, e.g., stable libraries) that +does not require CM or the compiler to be present.)

    +
    +
    +

    CM documentation has been updated to reflect the changes to library +organization.

    +
    + +
    +
    +
    +
    +
    +
    [2001/07/10]
    +
    +
    +

    Minor tweak to 110.34 (re-tagged):

    +
    +
    +
      +
    • +

      README.html file added to CVS repository

      +
    • +
    • +

      runtime compiles properly under FreeBSD 3.X and 4.X

      + +
    • +
    +
    +
    +
    +
    +
    +
    +
    +

    Version 110.34; 2001/07/10

    +
    +
    +
    +
    [2001/07/09]
    +
    +
    +

    I changed the handling of varargs in ml-nlffigen again: +The ellipsis …​ will now simply be ignored (with an accompanying warning).

    +
    +
    +

    The immediate effect is that you can actually call a varargs function +from ML — but you can’t actually supply any arguments beyond the ones +specified explicitly. (For example, you can call printf with its format +string, but you cannot pass additional arguments.)

    +
    +
    +

    This behavior is only marginally more useful than the one before, but +it has the advantage that a function or, more importantly, a function +type never gets dropped on the floor, thus avoiding follow-up problems with +other types that refer to the offending one.

    +
    + +
    +
    +
    +
    +
    +
    [2001/07/09]
    +
    +
    +
      +
    1. +

      ckit-lib.cm now exports structure Error

      +
    2. +
    3. +

      ml-nlffigen reports occurences of "…​" (i.e., varargs function types) +with a warning accompanied by a source location. Moreover, it +merely skips the offending function or type and proceeds with the +rest of its work.u As a result, one can safely feed C code containing +"…​" to ml-nlffigen.

      +
    4. +
    5. +

      There are some internal improvements to CM, providing slightly +more general string substitutions in the tools subsystem.

      + +
    6. +
    +
    +
    +
    +
    +
    +
    +
    [2001/06/27]
    +
    +
    +

    Fixed a small bug in CM’s handling of parallel compilation. +(You could observe the bug by Control-C-interrupting an ordinary +CMB.make or CM.stabilize and then attaching some compile servers. +The result was that all of a sudden the previously interrupted +compilation would continue on its own. This was because of +an over-optimization: CM did not bother to clean out certain queues +when no servers were attached "anyway", resulting in the contents +of these queues to grab control when new servers did get attached.)

    +
    +
    +

    There is also another minor update to the CM manual.

    +
    + +
    +
    +
    +
    +
    +
    [2001/06/26]
    +
    +
    +

    Minor typo fixed in CM manual (syntax diagram for libraries).

    +
    + +
    +
    +
    +
    +
    +
    [2001/06/25]
    +
    +
    +

    Fixed a nasty bug in the X86 assembly code that caused signal +handlers to fail (crash) randomly.

    +
    + +
    +
    +
    +
    +
    +
    [2001/06/25]
    +
    +
    +

    This update fixes a number of minor bugs in ml-nlffigen as reported by +Nick Carter <nbc@andrew.cmu.edu>.

    +
    +
    +
      +
    1. +

      Silly but ok typedefs of the form "typedef void myvoid;" are now accepted.

      +
    2. +
    3. +

      Default names for generated files are now derived from the name of +the C file without its directory. In particular, this causes generated +files to be placed locally even if the C file is in some system directory.

      +
    4. +
    5. +

      Default names for generated signatures and structures are also derived +from the C file name without its directory. This avoids silly things +like "structure GL/GL". +(Other silly names are still possible because ml-nlffigen does not do + a thorough check of whether generated names are legal ML identifiers. + When in doubt, use command line arguments to force particular names.)

      + +
    6. +
    +
    +
    +
    +
    +
    +
    +
    [2001/06/21]
    +
    +
    +

    eXene now compiles and (sort of) works again.

    +
    +
    +

    The library name (for version > 110.33) is $/eXene.cm.

    +
    +
    +

    I also added an new example in src/eXene/examples/nbody. See the +README file there for details.

    +
    + +
    +
    +
    +
    +
    +
    [2001/06/20]
    +
    +
    +

    CML now compiles and works again.

    +
    +
    +

    Libraries (for version > 110.33):

    +
    +
    +
    +
    $cml/cml.cm            Main CML library.
    +$cml/basis.cm          CML's version of $/basis.cm.
    +$cml/cml-internal.cm   Internal helper library.
    +$cml/core-cml.cm       Internal helper library.
    +$cml-lib/trace-cml.cm  Tracing facility.
    +$cml-lib/smlnj-lib.cm  CML's version of $/smlnj-lib.cm
    +
    +
    +
    +

    The installer (config/install.sh) has been taught how to properly +install this stuff.

    +
    + +
    +
    +
    +
    +
    +
    [2001/06/19]
    +
    +
    +

    This un-breaks the fix for bug 1432. +(The bug was originally fixed in 110.9 but I broke it again some +time after that.)

    +
    + +
    +
    +
    +
    +
    +
    [2001/06/19]
    +
    +
    +

    This should (hopefully) fix the long-standing signal handling bug. +(The runtime system was constructing a continuation record with an +incorrect descriptor which would cause the GC to drop data on the floor…​)

    +
    + +
    +
    +
    +
    +
    +
    [2001/06/15]
    +
    +
    +

    Here is a short late-hour update related to Sparc c-calls:

    +
    +
    +
    +
    -- made handling of double-word arguments a bit smarter
    +
    +
    +
    +
    +
    -- instruction selection phase tries to collapse certain clumsily
    +   constructed ML-Trees; typical example:
    +
    +
    +
    +
    +
    ADD(ty,ADD(_,e,LI d1),LI d2)  ->  ADD(ty,e,LI(d1+d2))
    +
    +
    +
    +
    +
    This currently has no further impact on SML/NJ since mlriscGen does
    +not seem to generate such patterns in the first place, and c-calls
    +(which did generate them in the beginning) has meanwhile been fixed
    +so as to avoid them as well.
    +
    +
    + +
    +
    +
    +
    +
    +
    [2001/06/15]
    +
    +
    +

    The purpose of this update is to provide an implementation of NLFFI +on Sparc machines.

    +
    +
    +

    Here are the changes in detail:

    +
    +
    +
      +
    • +

      src/MLRISC/sparc/c-calls/sparc-c-calls.sml is a new file containing +the Sparc implementation of the c-calls API.

      +
    • +
    • +

      The Sparc backend of SML/NJ has been modified to uniformely use %fp +for accessing the ML frame. Thus, we have a real frame pointer and +can freely modify %sp without need for an omit-frame-ptr phase. +The vfp logic in src/compiler/CodeGen/* has been changed to accomodate +this case.

      +
    • +
    • +

      ml-nlffigen has been taught to produce code for different architectures +and calling conventions.

      +
    • +
    • +

      In a way similar to what was done in the x86 case, the Sparc +backend uses its own specific extension to mltree. (For example, +it needs to be able to generate UNIMP instructions which are part +of the calling convention.)

      +
    • +
    • +

      ml-nlffi-lib was reorganized to make it more modular (in particular, +to make it easier to plug in new machine- and os-dependent parts).

      +
      +

      There are some other fairly unrelated bug fixes and cleanups as well:

      +
      +
    • +
    • +

      I further hacked the .cm files for MLRISC tools (like MDLGen) so +that they properly share their libraries with existing SML/NJ libraries.

      +
    • +
    • +

      I fixed a minor cosmetic bug in CM, supressing certain spurious +follow-up error messages.

      +
    • +
    • +

      Updates to CM/CMB documentation.

      +
      +

      TODO items:

      +
      +
    • +
    • +

      MLRISC should use a different register as its asmTemp on the Sparc. +(The current %o2 is a really bad choice because it is part of the +calling conventions, so things might interfere in unexpected ways.)

      + +
    • +
    +
    +
    +
    +
    +
    +
    +
    [2001/06/07]
    +
    +
    +

    A number of internal changes related to C calls and calling conventions:

    +
    +
    +
      +
    1. +

      ML-Tree CALL statements now carry a "pops" field. It indicates the +number of bytes popped implicitly (by the callee). In most cases +this field is 0 but on x86/win32 it is some non-zero value. This +is information provided for the benefit of the "omit-frameptr" pass.

      +
    2. +
    3. +

      The CALL instruction on the x86 carries a similar "pops" field. +The instruction selection phase copies its value from the ML-Tree +CALL statement.

      +
    4. +
    5. +

      On all other architectures, the instruction selection phase checks +whether "pops=0" and complains if not.

      +
    6. +
    7. +

      The c-calls implementation for x86 now accepts two calling conventions: +"ccall" and "stdcall". When "ccall" is selected, the caller cleans +up after the call and pops is set to 0. For "stdcall", the caller +does nothing, leaving the cleanup to the callee; pops is set to +the number of bytes that were pushed onto the stack.

      +
    8. +
    9. +

      The cproto decoder (compiler/Semant/types/cproto.sml) now can +distinguish between "ccall" and "stdcall".

      +
    10. +
    11. +

      The UNIMP instruction has been added to the supported Sparc instruction +set. (This is needed for implementing the official C calling convention +on this architecture.)

      +
    12. +
    13. +

      I fixed some of the .cm files under src/MLRISC/Tools to make them +work with the latest CM.

      + +
    14. +
    +
    +
    +
    +
    +
    +
    +
    [2001/06/05]
    +
    +
    +
      +
    1. +

      The "lambdasplit" parameter for class "sml" in CM has been documented.

      +
    2. +
    3. +

      CM can now generate "index files". These are human-readable files +that list on a per-.cm-file basis each toplevel symbol defined or +imported. The location of the index file for +<p>/<d>.cm is <p>/CM/INDEX/<d>.cm. +To enable index-file generation, set CM.Control.generate_index to true +or export an environment-symbol: export CM_GENERATE_INDEX=true.

      +
      +
      +
      The CM manual has been updated accordingly.
      +
      +
      +
    4. +
    5. +

      I made some slight modifications to the c-calls API in MLRISC.

      +
      +
      +
      a) There is now a callback to support saving/restoring of
      +   dedicated but caller-save registers around the actual call
      +   instruction.
      +b) One can optionally specify a comment-annotation for the
      +   call instruction.
      +
      +
      +
    6. +
    7. +

      SML/NJ (mlriscGen.sml) uses this new API for the rawccall primop. +(For example, the comment annotation shows the C prototype of + the function being called.)

      + +
    8. +
    +
    +
    +
    +
    +
    +
    +
    [2001/06/01]
    +
    +
    +

    This is mostly a cleanup of MLFFI stuff:

    +
    +
    +
      +
    • +

      some signature files have been put into a more exposed place

      +
    • +
    • +

      the ugly 'f type parameter is gone (simplifies types tremendously!)

      +
    • +
    • +

      ml-nlffigen changed accordingly

      +
    • +
    • +

      tutorial updated

      +
      +

      Other changes:

      +
      +
    • +
    • +

      author’s affiliation in CM manual(s) updated

      +
    • +
    • +

      some more recognized keywords added to Allen’s sml.sty

      + +
    • +
    +
    +
    +
    +
    +
    +
    +
    [2001/05/25]
    +
    +
    +
      +
    • +

      put the official 110.33-README (as it appears on the ftp server) under +CVS

      +
    • +
    • +

      fixed a small bug related to incomplete pointer types in +ml-nlffigen

      +
    • +
    • +

      small cosmetic change to the ml-nlffi-lib’s "arr" type constructor +(it does not need the 'f type parameter)

      + +
    • +
    +
    +
    +
    +
    +
    +
    +
    +

    Version 110.33; 2001/05/23

    +
    +
    +
    +
    [2001/05/22]
    +
    +
    +

    Made install.sh use file config/targets.customized if it exists, falling +back to config/targets if it doesn’t. This way one can have a customized +version of the targets file without touching the "real thing", thus +eliminating the constant fear of accidentally checking something bogus +back into the CVS repository…​ (File config/targets.customized must +not be added to the repository!)

    +
    + +
    +
    +
    +
    +
    +
    [2001/05/22]
    +
    +
    +
      +
    1. +

      Bug fix in ml-nlffigen; now (hopefully) correctly handling +struct returns.

      +
    2. +
    3. +

      Added src/ml-nlffi-lib/Doc/mini-tutorial.txt. This is some very +incomplete, preliminary documentation for NLFFI.

      + +
    4. +
    +
    +
    +
    +
    +
    +
    +
    [2001/05/14]
    +
    +
    +

    Some bugs in install script fixed.

    +
    +
    +

    In addition to that I also made a slight change to the NLFFI API: +Functors generated by ml-nlffigen now take the dynamic library as a +straight functor argument, not as a suspended one. (The original +functor code used to force the suspension right away anyway, so there +was nothing gained by this complication of the interface.)

    +
    + +
    +
    +
    +
    +
    +
    [2001/05/11]
    +
    +
    +

    I finally took the plunge and added my new FFI code to the main +repository. For x86-linux it is now ready for prime-time.

    +
    +
    +

    There are two new subdirectories of "src":

    +
    +
    +
      +
    • +

      ml-nlffi-lib: +The utility library for programs using the FFI interface. +Here is the implementation of $/c.cm and its associated low-level +partners $/c-int.cm and $/memory.cm.

      +
    • +
    • +

      ml-nlffigen: +A stand-alone program for generating ML glue code from C source +code.

      +
      +

      Building ml-nlffigen requires $/ckit-lib.cm.

      +
      +
      +

      The config/install.sh script has been updates to do the Right Thing +(hopefully).

      +
      +
      +

      Notice that the source tree for the C-Kit will not be put under "src" +but directly under the installation root directory. (This is the +structure that currently exists on the CVS server when you check out +module "sml".) Fortunately, config/install.sh knows about this oddity.

      +
      +
      +

      Bugs: No documentation yet.

      +
      + +
    • +
    +
    +
    +
    +
    +
    +
    +
    [2001/05/09]
    +
    +
    +

    Fixed a bug in the accounting code in cpsopt/contract.sml. (The +wrapper/unwrapper elimination did not decrement usage counts and some +dead variables got overlooked by the dead-up logic.)

    +
    + +
    +
    +
    +
    +
    +
    [2001/05/08]
    +
    +
    +

    Changes to implement the omit-frame-pointer optimization to support +raw C calls. For now, there is only support on the Intel x86, but +other architectures will follow as more experience is gained with this.

    +
    +
    +

    + +Lal George

    +
    +
    +
    +
    +
    +
    +
    [2001/05/07]
    +
    +
    +

    I made into "proxy libraries" all libraries that qualify for such a +change. (A qualifying library is a library that has another library or +groups as its sole member and repeats that member’s export list +verbatim. A proxy library avoids this repetition by omitting its export +list, effectively inheriting the list that its (only) member exports. +See the CM manual for more explanation.) +The main effect is that explicit export lists for these libraries +do not have to be kepts in sync, making maintenance a bit easier.

    +
    +
    +

    I also added copyright notices to many .cm-files.

    +
    +
    +

    Last but not least, I made a new set of bootfiles.

    +
    + +
    +
    +
    +
    +
    +
    [2001/05/04]
    +
    +
    +
      +
    1. +

      John merged pending changes to $/smlnj-lib.cm

      +
    2. +
    3. +

      Allen’s previous change accidentally backed out of one of Lal’s +earlier changes. I undid this mistake (re-introducing Lal’s change).

      +
    4. +
    5. +

      I used the new topOrder' function from graph-scc.sml (from $/smlnj-lib.cm) +within the compiler where applicable. There is some code simplification +because of that.

      +
    6. +
    7. +

      The "split" phase (in FLINT) is now part of the default list of phases. +Compiler.Control.LambdaSplitting.* can be used to globally control the +lambda-splitting (cross-module-inlining) engine. In addition to that, +it can now also be controlled on a per-source basis: CM has been taught +a new tool parameter applicable to ML source files.

      +
      +
        +
      • +

        To turn lambda-splitting off completely: +local open Compiler.Control.LambdaSplitting in + val _ = set Off +end

        +
      • +
      • +

        To make "no lambda-splitting" the global default (but allow per-source +overriding); this is the initial setting: + local open Compiler.Control.LambdaSplitting in + val _ = set (Default NONE) + end

        +
      • +
      • +

        To make "lambda-splitting with aggressiveness a" the global default +(and allow per-source overriding): + local open Compiler.Control.LambdaSplitting in + val _ = set (Default (SOME a)) + end

        +
      • +
      • +

        To turn lambda-splitting off for a given ML souce file (say: a.sml) +write (in the respective .cm-file): + a.sml (lambdasplitting:off)

        +
      • +
      • +

        To turn lambda-splitting for a.sml on with minimal aggressiveness: +a.sml (lambdasplitting:on)

        +
      • +
      • +

        To turn lambda-splitting for a.sml on with aggressiveness <a> (where +<a> is a decimal non-negative integer): + a.sml (lambdasplitting:<a>)

        +
      • +
      • +

        To turn lambda-splitting for a.sml on with maximal aggressiveness: +a.sml (lambdasplitting:infinity)

        +
      • +
      • +

        To use the global default for a.sml: + a.sml (lambdasplitting:default) +or simply + a.sml

        + +
      • +
      +
      +
    8. +
    +
    +
    +
    +
    +
    +
    +
    [2001/05/04]
    +
    +
    +
    +
    MLRISC features.
    +
    +
    +
    +
      +
    1. +

      Fix to CMPXCHG instructions.

      +
    2. +
    3. +

      Changed RA interface to allow annotations in callbacks.

      +
    4. +
    5. +

      Added a new method to the stream interface to allow annotations updates.

      + +
    6. +
    +
    +
    +
    +
    +
    +
    +
    [2001/05/01]
    +
    +
    +

    Changed install.sh to use the current working directory instead of +/usr/tmp for a temporary file (pcedittmp). The previous choice +of /usr/tmp caused trouble with MacOS X because of file premission +problems.

    +
    + +
    +
    +
    +
    +
    +
    [2001/04/20]
    +
    +
    +
      +
    • +

      added vp_limitPtrMask to vproc-state.h +(for use by the raw-C-calls mechanism to implement proper interrupt + handling)

      +
    • +
    • +

      made the ML compiler aware of various data-structure offsets so it +can generate code for accessing the vp_inML flag and vp_limitPtrMask

      +
    • +
    • +

      tweaked mlriscGen.sml to have it emit interrupt-handling code for +raw C-calls

      + +
    • +
    +
    +
    +
    +
    +
    +
    +
    [2001/04/20]
    +
    +
    +
      +
    • +

      Changes to port to Mac OS X; Darwin.

      +
    • +
    • +

      In the process I found that sqrt was broken on the PPC, because the +fsqrt instruction is not implemented.

      + +
    • +
    +
    +
    +
    +
    +
    +
    +
    [2001/04/18]
    +
    +
    +
      +
    • +

      fixed two off-by-4 errors in the x86-specific c-calls implementation +(this bug prevented structure arguments containing pointers from being + passed correctly)

      +
    • +
    • +

      changed the raw-C-call code in mlriscGen.sml in such a way that +structure arguments are represented as a pointer to the beginning +of the structure (instead of having a series of synthesized arguments, +one for each structure member)

      +
    • +
    • +

      made makeml script’s verbosity level configurable via environment +variable (MAKEML_VERBOSITY)

      +
    • +
    • +

      eliminated placeholder implementations for f32l, w16s, i16s, and f32s +in rawmem-x86.sml; we are now using the real thing

      + +
    • +
    +
    +
    +
    +
    +
    +
    +
    [2001/03/22]
    +
    +
    +

    Created a new set of bootfiles (for your automatic installation convenience).

    +
    + +
    +
    +
    +
    +
    +
    [2001/03/22]
    +
    +
    +
      +
    1. +

      All "raw memory access" primitives for the new FFI are implemented now +(at least on the x86).

      +
    2. +
    3. +

      Some further cleanup of CM’s parallel make mechanism.

      + +
    4. +
    +
    +
    +
    +
    +
    +
    +
    [2001/03/19]
    +
    +
    +

    Parallel make (using compile servers) now works again.

    +
    +
    +

    To this end, CM.stabilize and CMB.make have been modified to work in +two passes when compile servers are attached: + 1. Compile everything, do not perform stabilization; this pass + uses compile servers + 2. Stabilize everything; this pass does not use compile servers +If there are no compile servers, the two passes are combined into one +(as before). Splitting the passes increases the inherent parallelism +in the dependency graph because the entire graph including all +libraries is available at the same time. This, in turn, improves +server utilization. The downside is that the master process will +have to do some extra work after compilation is done (because for +technical reasons it must re-read all the binfiles during stabilization).

    +
    + +
    +
    +
    +
    +
    +
    [2001/03/16]
    +
    +
    +

    Created a new set of bootfiles (for your automatic installation convenience).

    +
    + +
    +
    +
    +
    +
    +
    [2001/03/16]
    +
    +
    +

    This is a minor fixup for an (untagged) earlier commit by Allen. +(A file was missing).

    +
    + +
    +
    +
    +
    +
    +
    [2001/03/05]
    +
    +
    +
      +
    1. +

      New support for alternative control-flow in MLTREE. +Currently we support

      +
      +
      +
      FLOW_TO(CALL ...., [k1,...,kn])
      +
      +
      +
      +
      +
      This is needed for 'cuts to' in C-- and try/handle-like constructs
      +in Moby
      +
      +
      +
      +
      +
      New assembler flag "asm-show-cutsto" to turn on control-flow debugging.
      +
      +
      +
    2. +
    3. +

      Register Allocator

      +
      +
      +
      Changes in interface [from Fermin, John]
      +
      +
      +
    4. +
    5. +

      Alpha 8-bit SLL support [Fermin]

      +
    6. +
    7. +

      All architectures

      +
      +
      +
      A new module (ClusterExpandCopies) for expanding parallel copies.
      +
      +
      + +
    8. +
    +
    +
    +
    +
    +
    +
    +
    [2001/02/27]
    +
    +
    +
      +
    1. +

      Alpha bug fix for CMOVNE

      +
    2. +
    3. +

      Handle mltree COND(..,FCMP …​,…​)

      +
    4. +
    5. +

      Bug fix in simplifier

      + +
    6. +
    +
    +
    +
    +
    +
    +
    +
    [2001/01/30]
    +
    +
    +

    This is just a minor update to sync my devel branch with the main brach. +The only visible change is the addition of some README files.

    +
    + +
    +
    +
    +
    +
    +
    [2001/01/12]
    +
    +
    +

    Made a new set of bootfiles that goes with the current state of the +repository.

    +
    + +
    +
    +
    +
    +
    +
    [2001/01/12]
    +
    +
    +

    I am just flushing out some minor changes that had accumulated in +my private branch in order to sync with the main tree. (This is +mainly because I had CVS trouble when trying to merge into my +private branch.)

    +
    +
    +

    Most people should be completely unaffected by this.

    +
    + +
    +
    +
    +
    +
    +
    [2001/01/22]
    +
    +
    +
      +
    1. +

      Removed the type LabelExp and replace it by MLTree.

      +
    2. +
    3. +

      Rewritten mltree-simplify with the pattern matcher tool.

      +
    4. +
    5. +

      There were some bugs in alpha code generator which would break +64-bit code generation.

      +
    6. +
    7. +

      Redo the tools to generate code with the

      +
    8. +
    9. +

      The CM files in MLRISC (and in src/system/smlnj/MLRISC) +are now generated by perl scripts.

      + +
    10. +
    +
    +
    +
    +
    +
    +
    +
    [2001/01/10]
    +
    +
    +

    The RCC stuff now seems to work (but only on the x86). +This required hacking of the c-calls interface (and -implementation) in +MLRISC.

    +
    +
    +

    Normal compiler users should be unaffected.

    +
    + +
    +
    +
    +
    +
    +
    [2001/01/09]
    +
    +
    +

    This is a fairly big patch, flushing out a large number of pending +changes that I made to my development copy over the last couple of days.

    +
    +
    +

    Of practical relevance at this moment is a workaround for a pickling +bug that Allen ran into the other day. The cause of the bug itself is +still unknown and it might be hard to fix it properly, but the +workaround has some merits of its own (namely somewhat reducing pickling +overhead for certain libraries). Therefore, I think this solution should +be satisfactory at this time.

    +
    +
    +

    The rest of the changes (i.e., the vast majority) has to do with my +ongoing efforts of providing direct support for C function calls from +ML. At the moment there is a new primop "RAW_CCALL", typing magic +in types/cproto.sml (invoked from FLINT/trans/translate.sml), a new +case in the FLINT CPS datatype (RCC), changes to cps/convert.sml to +translate uses of RAW_CCALL into RCC, and changes to mlriscGen.sml to +handle RCC.

    +
    +
    +

    The last part (the changes to mlriscGen.sml) are still known to be +wrong on the x86 and not implemented on all other architectures. But +the infrastructure is in place. I had to change a few functor +signatures in the backend to be able to route the CCalls interface +from MLRISC there, and I had to specialize the mltree type (on the +x86) to include the necessary extensions. (The extensions themselves +were already there and redy to go in MLRISC/x86).

    +
    +
    +

    Everything should be very happy as soon as someone helps me with +mlriscGen.sml…​

    +
    +
    +

    In any case, nothing of this should matter to anyone as long as the +new primop is not being used (which is going to be the case unless you +find it where I hid it :). The rest of the compiler is completely +unaffected.

    +
    + +
    +
    +
    +
    +
    +
    [2001/01/05]
    +
    +
    +

    Added some experimental support for work that I am doing right now. +These changes mostly concern added primops, but there is also a new +experimental C library in the runtime system (but currently not enabled +anywhere except on Linux/X86).

    +
    +
    +

    In the course of adding primops (and playing with them), I discovered that +Zhong’s INL_PRIM hack (no type info for certain primops) was, in fact, badly +broken. (Zhong was very right he labeled this stuff as "major gross hack".) +To recover, I made type information in INL_PRIM mandatory and changed +prim.sml as well as built-in.sml accordingly. The InLine structure now +has complete, correct type information (i.e., no bottom types).

    +
    +
    +

    Since all these changes mean that we need new binfiles, I also bumped the +version number to 110.32.1.

    +
    + +
    +
    +
    +
    +
    +
    [2000/12/30]
    +
    +
    +

    Added proxy libraries for MLRISC and let MLRISC libraries refer +to each other using path anchors. (See CM manual for explanation.)

    +
    +
    +

    Updated CM documentation.

    +
    +
    +

    Fixed some bugs in CM.

    +
    +
    +

    Implemented "proxy" libraries (= syntactic sugar for CM).

    +
    +
    +

    Added "-quiet" option to makeml and changed runtime system accordingly.

    +
    +
    +

    Added cleanup handler for exportML to reset timers and compiler stats.

    +
    + +
    +
    +
    +
    +
    +
    [2000/12/22]
    +
    +
    +
    +
    Infinite precision used throughout MLRISC.
    +see MLRISC/mltree/machine-int.sig
    +
    +
    + +
    +
    +
    +
    +
    +
    [2000/12/22]
    +
    +
    +

    Corrected wording and formatting of some CM warning message which I +broke in my previous patch.

    +
    + +
    +
    +
    +
    +
    +
    [2000/12/22]
    +
    +
    +

    Fixed CM’s handling of anchor environments in connection with CMB.make.

    +
    + +
    +
    +
    +
    +
    +
    [2000/12/22]
    +
    +
    +

    Removed src/cm/ffi which does not (and did not) belong here.

    +
    + +
    +
    +
    +
    +
    +
    [2000/12/21]
    +
    +
    +

    Probably most important: CM no longer silently swallows all exceptions +in the compiler. +Plus: some other minor CM changes. For example, CM now reports some +sizes for generated binfiles (code, data, envpickle, lambdapickle).

    +
    + +
    +
    +
    +
    +
    +
    [2000/12/15]
    +
    +
    +
      +
    • +

      "dir" tool added.

      +
    • +
    • +

      improvements and cleanup to Tools structure

      +
    • +
    • +

      documentation updates

      + +
    • +
    +
    +
    +
    +
    +
    +
    +
    [2000/12/14]
    +
    +
    +
    +
       In IntInf, added these standard functions, which are missing from our
    +implementation:
    +
    +
    +
    +
    +
    andb : int * int -> int
    +xorb : int * int -> int
    +orb  : int * int -> int
    +notb : int -> int
    + <<   : int * word -> int
    +~>>  : int * word -> int
    +
    +
    +
    +
    +
    Not tested, I hope they are correct.
    +
    +
    + +
    +
    +
    +
    +
    +
    [2000/12/08]
    +
    +
    +
    +
      Slight improvements to the 'nowhere' tool to handle OR-patterns,
    +to generate better error messages etc.  Plus a brief manual.
    +
    +
    + +
    +
    +
    +
    +
    +
    [2000/12/08]
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    [2000/12/07]
    +
    +
    +

    Major MLRISC internal changes. Affect all clients. +Summary:

    +
    +
    +
      +
    1. +

      Type CELLS.cell = int is now replaced by a datatype. +As a result, the old regmap is now gone. Almost all interfaces +in MLRISC change as a consequence.

      +
    2. +
    3. +

      A new brand version of machine description tool (v3.0) that generates +modules expecting the new interface. The old version is removed.

      +
    4. +
    5. +

      The RA interface has been further abstracted into two new functors. +RISC_RA and X86RA. These functors have much simpler interfaces. +[See also directory MLRISC/demo.]

      +
    6. +
    7. +

      Some other new source→source code generation tools are available:

      +
      +
        +
      1. +

        MLRISC/Tools/RewriteGen — generate rewriters from rules.

        +
      2. +
      3. +

        MLRISC/Tools/WhereGen — expands conditional pattern matching rules. +I use this tool to generate the peephole optimizers---with the new +cell type changes, peephole rules are becoming difficult to write +without conditional pattern matching.

        +
      4. +
      +
      +
    8. +
    9. +

      More Intmap → IntHashTable change. Previous changes by Matthias didn’t +cover the entire MLRISC source tree so many things broke.

      +
    10. +
    11. +

      CM files have been moved to the subdirectory MLRISC/cm. +They are moved because there are a lot of them and they clutter up the +root dir.

      +
    12. +
    13. +

      More detailed documentation to come…​

      +
      +
      +
      NOTE: To rebuild from 110.30 (ftp distribution), you'll have to do
      +a makeml -rebuild first.  This is because of other other
      +changes that Matthias has made (see below).
      +
      +
      + +
    14. +
    +
    +
    +
    +
    +
    +
    +
    [2000/11/30]
    +
    +
    +

    Some manual updates and some file reorganizations in CM.

    +
    + +
    +
    +
    +
    +
    +
    [2000/11/24]
    +
    +
    +

    Drastically improved link traversal code for the case that the dynamic +value was already loaded at bootstrap time. As a result, CM and CMB +now both load blazingly fast — even on a very slow machine. Also, +memory consumption has been further reduced by this.

    +
    +
    +

    Warning: The format of the PIDMAP file has changed. THerefore, to +bootstrap you have to do this:

    +
    +
    +
      +
    1. +

      Run CMB.make

      +
    2. +
    3. +

      Make a symbolic link for the boot directory: +ln -s sml.boot.ARCH-OS xxx

      +
    4. +
    5. +

      "Rebuild" the boot directory: +./makeml -boot xxx -rebuild sml ; rm xxx

      +
    6. +
    7. +

      Boot normally: +./makeml

      + +
    8. +
    +
    +
    +
    +
    +
    +
    +
    [2000/11/21]
    +
    +
    +

    Continued hacking on autoloading problem — with success this time. +Also changed tool-plugin mechanism. See new CM manual.

    +
    + +
    +
    +
    +
    +
    +
    [2000/11/19]
    +
    +
    +

    Some hacking to make autoloading faster. Success for CMB, no success +so far for CM. There is a reduced structure CM' that autoloads faster. +(This is a temporary, non-documented hack to be eliminated again when +the general problem is solved.)

    +
    + +
    +
    +
    +
    +
    +
    [2000/11/17]
    +
    +
    +
      +
    1. +

      Eliminated comp-lib.cm

      +
    2. +
    3. +

      Made pickle-lib.cm

      +
    4. +
    5. +

      Eliminated all uses of intset.sml (from comp-lib.cm)

      +
    6. +
    7. +

      Replaced all uses of intmap.{sig,sml} (from comp-lib.cm) with +equivalent constructs from smlnj-lib.cm (INtHashTable).

      +
    8. +
    9. +

      Point 4. also goes for those uses of intmap.* in MLRISC. +Duplicated intmap modules thrown out.

      +
    10. +
    11. +

      Hunted down all duplicated SCC code and replaced it with +equivalent stuff (GraphSCCFn from smlnj-lib.cm).

      +
    12. +
    13. +

      Rewrote Feedback module.

      +
    14. +
    15. +

      Moved sortedlist.sml into viscomp-lib.cm. Eventually it +should be thrown out and equivalent modules from smlnj-lib.cm +should be used (IntRedBlackSet, IntListSet, …​).

      +
      +

      Confirmed that compiler compiles to fixpoint.

      +
      + +
    16. +
    +
    +
    +
    +
    +
    +
    +
    [2000/11/10]
    +
    +
    +

    A new x86 floating point code generator has been added. +By default this is turned off. To turn this on, do:

    +
    +
    +
    +
    CM.autoload "$smlnj/compiler.cm";
    +Compiler.Control.MLRISC.getFlag "x86-fast-fp" := true;
    +
    +
    +
    +

    Changes:

    +
    +
    +
      +
    1. +

      Changed FTAN to FPTAN so that the assembly output is correct.

      +
    2. +
    3. +

      Changed the extension callback for FTANGENT to generate:

      +
      +
      +
            fptan
      +      fstp  %st(0)
      +instead of
      +      fptan
      +      fstpl ftempmem
      +
      +
      +
    4. +
    5. +

      Numerous assembly fixes for x86.

      +
    6. +
    7. +

      Cleaned up the machine code output module x86/x86MC.sml and added +support for a whole bunch of instructions and addressing modes:

      +
      +
      +
      fadd/fsub/fsubr/fmul/fdiv/fdivr  %st, %st(n)
      +faddp/fsubp/fsubrp/fmulp/fdivp/fdivrp  %st, %st(n)
      +fadd/fsub/fsubr/fmul/fdiv/fdivr  %st(n), %st
      +fiadd/fisub/fisubr/fimul/fidiv/fidivr mem
      +fxch %st(n)
      +fld %st(n)
      +fst %st(n)
      +fst mem
      +fstp %st(n)
      +fucom %st(n)
      +fucomp %st(n)
      +
      +
      +
      +
      +
      All these are now generated when the fast fp mode is turned on.
      +
      +
      +
    8. +
    9. +

      Removed the dedicated registers %st(0), …​, %st(7) from X86CpsRegs

      + +
    10. +
    +
    +
    +
    +
    +
    +
    +
    [2000/11/09]
    +
    +
    +

    Eliminated some code duplication:

    +
    +
    +
      +
    1. +

      Added "where" clause to GraphSCCFn in SML/NJ Library. +(Otherwise the functor is useless.)

      +
    2. +
    3. +

      Used GraphSCCFn where SCCUtilFun was used previously.

      +
    4. +
    5. +

      Got rid of SCCUtilFun (in comp-lib.cm).

      + +
    6. +
    +
    +
    +
    +
    +
    +
    +
    +

    Version 110.30; 2000/11/06

    +
    +
    +
    +
    [2000/11/04]
    +
    +
    +
      +
    • +

      Made ml-build faster on startup.

      +
    • +
    • +

      Documentation fixes.

      + +
    • +
    +
    +
    +
    +
    +
    +
    +
    [2000/11/02]
    +
    +
    +
      +
    • +

      Small tweaks to pickler — new BOOTFILES!

      +
    • +
    • +

      Version bumped to 110.29.2.

      +
    • +
    • +

      Added conditional compilation facility to init.cmi (see comment there).

      + +
    • +
    +
    +
    +
    +
    +
    +
    +
    [2000/10/23]
    +
    +
    +
      +
    1. +

      Minor RA changes that improves spilling on x86 (affects Moby and C-- only)

      +
    2. +
    3. +

      Test programs for the graph library updated

      +
    4. +
    5. +

      Some new MLRISC demo programs added

      + +
    6. +
    +
    +
    +
    +
    +
    +
    +
    [2000/08/31]
    +
    +
    +

    More error message grief: Where there used to be no messages, there +now were some that had bogus error regions. Fixed.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.29.1; 2000/08/31

    +
    +
    +
    +
    [2000/08/31]
    +
    +
    +

    I made a version 110.29.1 with new bootfiles.

    +
    +
    +

    Changes: Modified pickler/unpickler for faster and leaner unpickling. + CM documentation changes and a small bugfix in CM’s error reporting.

    +
    + +
    +
    +
    +
    +
    +
    [2000/09/27]
    +
    +
    +

    Changed the type of the nodestatus, so that:

    +
    +
    +
    +
    SPILLED(~1)		is now SPILLED
    +SPILLED(m) where m>=0   is now MEMREG(m)
    +SPILLED(s) where s<~1   is now SPILL_LOC(~s)
    +
    +
    + +
    +
    +
    +
    +
    +
    [2000/09/07]
    +
    +
    +

    Small tweak to CM to avoid getting ML syntax error messages twice.

    +
    + +
    +
    +
    +
    +
    +
    [2000/08/31]
    +
    +
    +

    New URL for boot files (because the 110.29 files on the BL server do +now work correctly with my updated install scripts for yacc and lex).

    +
    + +
    +
    +
    +
    +
    +
    [2000/08/08]
    +
    +
    +

    Tiny update to CM manual.

    +
    + +
    +
    +
    +
    +
    +
    [2000/08/7]
    +
    +
    +
    +
      Moby, C--, SSA, x86, machine descriptions etc.  Should only affect C--
    +and Mobdy.
    +
    +
    +
    +
      +
    1. +

      x86

      +
      +
        +
      1. +

        Fixes to peephole module by John and Dan.

        +
      2. +
      3. +

        Assembly fix to SETcc by Allen.

        +
      4. +
      5. +

        Fix to c-call by John.

        +
      6. +
      7. +

        Fix to spilling by John. (This one deals with the missing FSTPT case)

        +
      8. +
      9. +

        Instruction selection optimization to SETcc as suggested by John.

        +
        +
        +
        For example,
        +
        +
        +
        +
        +
        MV(32, x, COND(32, CMP(32, LT, a, b), LI 1, LI 0))
        +
        +
        +
        +
        +
        should generate:
        +
        +
        +
        +
        +
        MOVL a, x
        +SUBL b, x
        +SHRL 31, x
        +
        +
        +
      10. +
      +
      +
    2. +
    3. +

      IR stuff

      +
      +
      +
      A bunch of new DJ-graph related algorithms added.  These
      +speed up SSA construction.
      +
      +
      +
    4. +
    5. +

      SSA + Scheduling

      +
      +
      +
      Added code for SSA and scheduling to the repository
      +
      +
      + +
    6. +
    +
    +
    +
    +
    +
    +
    +
    [2000/07/27]
    +
    +
    +

    + + Made changes to support Linux PPC. + p.s. I have confirmation that the 110.29 boot files work fine.

    +
    +
    +

    + +Lal George

    +
    +
    +
    +
    +
    +
    +
    [2000/07/27]
    +
    +
    +

    !!!! WARNING !!!! +You must recompile the runtime system! +!!!! WARNING !!!!

    +
    +
    +

    This is basically another round of script-enhancements:

    +
    +
    +
      +
    1. +

      sml, ml-build, and ml-makedepend accept options -D and -U to define +and undefine CM preprocessor symbols.

      +
    2. +
    3. +

      ml-build avoids generating a new heap image if it finds that the +existing one is still ok. (The condition is that no ML file had to +be recompiled and all ML files are found to be older that the heap +file.)

      +
      +
      +
      To make this work smoothly, I also hacked the runtime system as
      +well as SMLofNJ.SysInfo to get access to the heap image suffix
      +(.sparc-solaris, ...) that is currently being used.
      +
      +
      +
      +
      +
      Moreover, the signature of CM.mk_standalone has changed.  See the
      +CM manual.
      +
      +
      +
    4. +
    5. +

      ml-makedepend accepts additional options -n, -a, and -o. (See the +CM manual for details.)

      +
    6. +
    7. +

      More CM manual updates:

      +
      +
        +
      • +

        all of the above has been documented.

        +
      • +
      • +

        there is now a section describing the (CM-related) command line +arguments that are accepted by the "sml" command

        + +
      • +
      +
      +
    8. +
    +
    +
    +
    +
    +
    +
    +
    [2000/07/25]
    +
    +
    +

    Added a script called ml-makedepend. This can be used in makefiles +for Unix' make in a way very similar to the "makedepend" command for +C.

    +
    +
    +

    The script internally uses function CM.sources.

    +
    +
    +

    Synopsis:

    +
    +
    +
    +
    *ml-makedepend* [-f makefile] cmfile targetname
    +
    +
    +
    +

    The default for the makefile is "makefile" (or "Makefile" should +"makefile" not exist).

    +
    +
    +

    ml-makedepend adds a cmfile/targetname-specific section to this +makefile (after removing the previous version of this section). The +section contains a single dependency specification with targetname on +the LHS (targetname is an arbitrary name), and a list of files derived +from the cmfile on the RHS. Some of the files on the RHS are +ARCH/OPSYS-specific. Therefore, ml-makedepend inserts references to +"make" variables $(ARCH) and $(OPSYS) in place of the corresponding +path names. The makefile writer is responsible for making sure that +these variables have correct at the time "make" is invoked.

    +
    + +
    +
    +
    +
    +
    +
    [2000/07/22]
    +
    +
    +

    Changed BOOT and config/srcarchiveurl to point to BL server:

    +
    +
    +
    +
    ftp://ftp.research.bell-labs.com/dist/smlnj/working/110.29/
    +
    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.29; 2000/07/18

    +
    +
    +
    +
    [2000/07/18]
    +
    +
    +
      +
    1. +

      Updated src/compiler/TopLevel/main/version.sml to version 110.29

      +
    2. +
    3. +

      Updated config/version to 110.29

      +
    4. +
    5. +

      Updated config/srcarchiveurl

      +
    6. +
    7. +

      New boot files! +ftp://ftp.cs.princeton.edu/pub/people/blume/sml/110.29-autofetch

      + +
    8. +
    +
    +
    +
    +
    +
    +
    +
    [2000/07/11]
    +
    +
    +

    Fixed a few typos in CM manual.

    +
    + +
    +
    +
    +
    +
    +
    [2000/06/15]
    +
    +
    +
      +
    1. +

      x86 peephole improvement sp += k; sp -= k ⇒ nop [from John]

      +
    2. +
    3. +

      fix to x86 RET bug [found by Dan Grossman]

      +
    4. +
    5. +

      sparc assembly bug fix for ticc instructions [found by Fermin]

      +
      +
      +
      Affects c-- and moby only
      +
      +
      + +
    6. +
    +
    +
    +
    +
    +
    +
    +
    [2000/07/04]
    +
    +
    +
      +
    1. +

      Improvements to CM manual.

      +
    2. +
    3. +

      SMLofNJ.Internals.BTrace.trigger reinstated as an alternative way +of getting a back-trace. The function, when called, raises an +internal exception which explicitly carries the full back-trace history, +so it is unaffected by any intervening handle-raise pairs ("trivial" +or not). The interactive loop will print that history once it arrives +at top level. +Short of having all exceptions implicitly carry the full history, the +recommended way of using this facility is:

      +
      +
        +
      • +

        compile your program with instrumentation "on"

        +
      • +
      • +

        run it, when it raises an exception, look at the history

        +
      • +
      • +

        if the history is "cut off" because of some handler, go and modify +your program so that it explicitly calls BTrace.trigger

        +
      • +
      • +

        recompile (still instrumented), and rerun; look at the full history

        + +
      • +
      +
      +
    4. +
    +
    +
    +
    +
    +
    +
    +
    [2000/07/03]
    +
    +
    +

    Small corrections and updates to CM manual.

    +
    + +
    +
    +
    +
    +
    +
    [2000/06/29]
    +
    +
    +

    Changes:

    +
    +
    +
      +
    1. +

      Class "mlyacc" now takes separate arguments to pass options to +generated .sml- and .sig-files independently.

      +
    2. +
    3. +

      Corresponding CM manual updates.

      +
    4. +
    5. +

      BTrace module now also reports call sites. (However, for loop clusters +it only shows from where the cluster was entered.) There are associated +modifications to core.sml, internals.{sig,sml}, btrace.sml, and btimp.sml.

      + +
    6. +
    +
    +
    +
    +
    +
    +
    +
    [2000/06/27]
    +
    +
    +

    Changes:

    +
    +
    +
      +
    1. +

      Implemented "subdir" and "witness" options for noweb tool. +This caused some slight internal changes in CM’s tool implementation.

      +
    2. +
    3. +

      Fixed bug in "tool plugin" mechanism. This is essentially cleaning +some remaining issues from earlier path anchor changes.

      +
    4. +
    5. +

      Updated CM manual accordingly.

      +
    6. +
    7. +

      Changed implementation of back-tracing so that I now consider it +ready for prime-time.

      +
      +
      +
      In particular, you don't have to explicitly trigger the back-trace
      +anymore.  Instead, if you are running BTrace-instrumented code and
      +there is an uncaught exception (regardless of whether or not it was
      +raised in instrumented code), the top-level evalloop will print
      +the back-trace.
      +
      +
      +
      +
      +
      Features:
      +
      +
      +
      +
        +
      • +

        Instrumented and uninstrumented code work together seemlessly. +(Of course, uninstrumented code is never mentioned in actual + back-traces.)

        +
      • +
      • +

        Asymptotic time- and space-complexity of instrumented code is +equal to that of uninstrumented code. (This means that +tail-recursion is preserved by the instrumentation phase.)

        +
      • +
      • +

        Modules whose code has been instrumented in different sessions +work together without problem.

        +
      • +
      • +

        There is no penalty whatsoever on uninstrumented code.

        +
      • +
      • +

        There is no penalty on "raise" expressions, even in +instrumented code.

        +
        +
        +
        A potential bug (or perhaps it is a feature, too):
        +
        +
        +
        +
        +
        A back-trace reaches no further than the outermost instrumented
        +non-trivial "raise".  Here, a "trivial" raise is one that is the
        +sole RHS of a "handle" rule.  Thus, back-traces reach trough
        +
        +
        +
        +
        +
        <exp> handle e => raise e
        +
        +
        +
        +
        +
        and even
        +
        +
        +
        +
        +
        <exp> handle Foo => raise Bar
        +
        +
        +
        +
        +
        and, of course, through
        +
        +
        +
        +
        +
        <exp> handle Foo => ...
        +
        +
        +
        +
        +
        if the exception was not Foo.
        +
        +
        +
        +
        +
        Back-traces always reach right through any un-instrumented code
        +including any of its "handle" expressions, trivial or not.
        +
        +
        +
        +
        +
        To try this out, do the following:
        +
        +
        +
      • +
      • +

        Erase all existing binfiles for your program. +(You may keep binfiles for those modules where you think you + definitely don’t need back-tracing.)

        +
      • +
      • +

        Turn on back-trace instrumentation: +SMLofNJ.Internals.BTrace.mode (SOME true);

        +
      • +
      • +

        Recompile your program. (I.e., run "CM.make" or "use".)

        +
      • +
      • +

        You may now turn instrumentation off again (if you want): +SMLofNJ.Internals.BTrace.mode (SOME false);

        +
      • +
      • +

        Run your program as usual. If it raises an exception that +reaches the interactive toplevel, then a back-trace will +automatically be printed. After that, the toplevel loop +will print the exception history as usual.

        + +
      • +
      +
      +
    8. +
    +
    +
    +
    +
    +
    +
    +
    [2000/06/26]
    +
    +
    +

    CM: - setup-parameter to "sml" added; this can be used to run arbitrary + ML code before and after compiling a file (e.g., to set compiler + flags)

    +
    +
    +

    Compiler: - improved btrace API (in core.sml, internals.{sig,sml}) + - associated changes to btrace.sml (BTrace instrumentation pass) + - cleaner implementation of btimp.sml (BTrace tracing and report + module)

    +
    +
    +

    + +CM manual: * new path encoding documented

    +
    +
    +
      +
    • +

      description of setup-parameter to "sml" added

      +
      +

      The biggest user-visible change to back-tracing is that it is no +longer necessary to compile all traced modules within the same +session. (This was a real limitation.)

      +
      + +
    • +
    +
    +
    +
    +
    +
    +
    +
    [2000/06/24]
    +
    +
    +

    Fixes startup slowdown problem. (I was calling SrcPath.sync a tad +bit too often — to put it mildly. :)

    +
    + +
    +
    +
    +
    +
    +
    [2000/06/23]
    +
    +
    +

    This updates adds a backtrace facility to aid programmers in debugging +their programs. This involves the following changes:

    +
    +
    +
      +
    1. +

      Module system/smlnj/init/core.sml (structure _Core) now has hooks for +keeping track of the current call stack. When programs are compiled +in a special mode, the compiler will insert calls to these hooks +into the user program. +"Hook" means that it is possible for different implementations of +back-tracing to register themselves (at different times).

      +
    2. +
    3. +

      compiler/MiscUtil/profile/btrace.sml implements the annotation phase +as an Absyn.dec→Absyn.dec rewrite. Normally this phase is turned off. +It can be turned on using this call: + SMLofNJ.Internals.BTrace.mode (SOME true); +Turning it off again: + SMLofNJ.Internals.BTrace.mode (SOME false); +Querying the current status: + SMLofNJ.Internals.BTrace.mode NONE; +Annotated programs are about twice as big as normal ones, and they +run a factor of 2 to 4 slower with a dummy back-trace plugin (one +where all hooks do nothing). The slowdown with a plugin that is +actually useful (such as the one supplied by default) is even greater, +but in the case of the default plugin it is still only an constant +factor (amortized).

      +
    4. +
    5. +

      system/Basis/Implementation/NJ/internals.{sig,sml} have been augmented +with a sub-structure BTrace for controlling back-tracing. In particular, +the above-mentioned function "mode" controls whether the annotation +phase is invoked by the compiler. Another important function is +"trigger": when called it aborts the current execution and causes +the top-level loop to print a full back-trace.

      +
    6. +
    7. +

      compiler/MiscUtil/profile/btimp.sml is the current default plugin +for back-tracing. It keeps track of the dynamic call stack and in +addition to that it keeps a partial history at each "level" of that +stack. For example, if a tail-calls b, b tail-calls c, and c tail-calls +d and b (at separate times, dynamically), then the report will show:

      +
      +
      +
         GOTO   d
      +         /c
      +   GOTO  \b
      +   CALL   a
      +
      +
      +
      +
      +
      This shows that there was an initial non-tail call of a, then a
      +tail-call to b or c, looping behavior in a cluster of functions that
      +consist of b and c, and then a goto from that cluster (_i.e._, either from
      +b or from c) to d.
      +
      +
      +
      +
      +
      Note that (depending on the user program) the amount of information
      +that the back-trace module has to keep track of at each level is bounded
      +by a constant.  Thus, the whole implementation has the same asymptotical
      +complexity as the original program (both in space and in time).
      +
      +
      +
    8. +
    9. +

      compiler/TopLevel/interact/evalloop.sml has been modified to +handle the special exception SMLofNJ.Internals.BTrace.BTrace +which is raised by the "trigger" function mentioned above.

      +
      +

      Notes on usage:

      +
      +
      +
        +
      • +

        Annotated code works well together with unannotated code: +Unannotated calls simply do not show up at all in the backtrace.

        +
      • +
      • +

        It is not a good idea to let modules that were annotated during +different sessions run at the same time. This is because the compiler +chooses small integers to identify individual functions, and there +will be clashes if different modules were compiled in separate sessions. +(Nothing will crash, and you will even be told about the clashes, but +back-trace information will in general not be useful.)

        +
      • +
      • +

        Back-tracing can be confused by callcc and capture.

        +
      • +
      • +

        The only way of getting a back-trace right now is to explicitly +invoke the "trigger" function from your user program. Eventually, we +should make every exception carry back-trace information (if +available). But since this creates more overhead at "raise"-time +(similar to the current exnHistory overhead), I have not yet +implemented this. (The implementation will be rather easy.) With +exceptions carrying back-trace information, this facility will be even +more useful because users don’t need to modify their programs…​

        +
      • +
      • +

        While it is possible to compile the compiler with back-trace +annotations turned on (I did it to get some confidence in +correctness), you must make absolutely sure that core.sml and +btimp.sml are compiled WITHOUT annotation! (core.sml cannot actually +be compiled with annotation because there is no core access yet, but +if you compile btimp.sml with annotation, then the system will go into +an infinite recursion and crash.) +Since CM currently does not know about BTrace, the only way to turn +annotations on and off for different modules of the compiler is to +interrupt CMB.make, change the settings, and re-invoke it. Of course, +this is awkward and clumsy.

        +
        +

        Sample sessions:

        +
        +
        +
        +
        Standard ML of New Jersey v110.28.1 [FLINT v1.5], June 5, 2000
        +- SMLofNJ.Internals.BTrace.mode (SOME true);
        +[autoloading]
        +[autoloading done]
        +val it = false : bool
        +- structure X = struct
        +-     fun main n = let
        +-         fun a (x, 0) = d x
        +-           | a (x, n) = b (x, n - 1)
        +-         and b (x, n) = c (x, n)
        +-         and c (x, n) = a (x, n)
        +-         and d x = e (x, 3)
        +-         and e (x, 0) = f x
        +-           | e (x, n) = e (x, n - 1)
        +-         and f 0 = SMLofNJ.Internals.BTrace.trigger ()
        +-           | f n = n * g (n - 1)
        +-         and g n = a (n, 3)
        +-     in
        +-         f n
        +-     end
        +- end;
        +structure X : sig val main : int -> int end
        +- X.main 3;
        +*** BACK-TRACE ***
        +GOTO   stdIn:4.2-13.20: X.main[2].f
        +GOTO-( stdIn:4.2-13.20: X.main[2].e
        +GOTO   stdIn:4.2-13.20: X.main[2].d
        +     / stdIn:4.2-13.20: X.main[2].a
        +     | stdIn:4.2-13.20: X.main[2].b
        +GOTO-\ stdIn:4.2-13.20: X.main[2].c
        +CALL   stdIn:4.2-13.20: X.main[2].g
        +GOTO   stdIn:4.2-13.20: X.main[2].f
        +GOTO-( stdIn:4.2-13.20: X.main[2].e
        +GOTO   stdIn:4.2-13.20: X.main[2].d
        +     / stdIn:4.2-13.20: X.main[2].a
        +     | stdIn:4.2-13.20: X.main[2].b
        +GOTO-\ stdIn:4.2-13.20: X.main[2].c
        +CALL   stdIn:4.2-13.20: X.main[2].g
        +GOTO   stdIn:4.2-13.20: X.main[2].f
        +GOTO-( stdIn:4.2-13.20: X.main[2].e
        +GOTO   stdIn:4.2-13.20: X.main[2].d
        +     / stdIn:4.2-13.20: X.main[2].a
        +     | stdIn:4.2-13.20: X.main[2].b
        +GOTO-\ stdIn:4.2-13.20: X.main[2].c
        +CALL   stdIn:4.2-13.20: X.main[2].g
        +GOTO   stdIn:4.2-13.20: X.main[2].f
        +CALL   stdIn:2.15-17.4: X.main[2]
        +-
        +
        +
        +
        +

        (Note that because of a FLINt bug the above code currently does not +compile without BTrace turned on.)

        +
        +
        +

        Here is another example, using my modified Tiger compiler:

        +
        +
        +
        +
        Standard ML of New Jersey v110.28.1 [FLINT v1.5], June 5, 2000
        +- SMLofNJ.Internals.BTrace.mode (SOME true);
        +[autoloading]
        +[autoloading done]
        +val it = false : bool
        +- CM.make "sources.cm";
        +[autoloading]
        +...
        +[autoloading done]
        +[scanning sources.cm]
        +[parsing (sources.cm):parse.sml]
        +[creating directory CM/SKEL ...]
        +[parsing (sources.cm):tiger.lex.sml]
        +...
        +[wrote CM/sparc-unix/semant.sml]
        +[compiling (sources.cm):main.sml]
        +[wrote CM/sparc-unix/main.sml]
        +[New bindings added.]
        +val it = true : bool
        +- Main.compile ("../testcases/merge.tig", "foo.out");
        +*** BACK-TRACE ***
        +CALL   lib/semant.sml:99.2-396.21: SemantFun[2].transExp.trvar
        +CALL   lib/semant.sml:99.2-396.21: SemantFun[2].transExp.trexp
        +CALL   lib/semant.sml:289.3-295.22: SemantFun[2].transExp.trexp.check[2]
        +GOTO   lib/semant.sml:289.3-295.22: SemantFun[2].transExp.trexp.check[2]
        +CALL   lib/semant.sml:99.2-396.21: SemantFun[2].transExp.trexp
        +CALL   lib/semant.sml:99.2-396.21: SemantFun[2].transExp.trexp
        +CALL   lib/semant.sml:488.3-505.6: SemantFun[2].transDec.trdec[2].transBody[2]
        +     / lib/semant.sml:411.65-543.8: SemantFun[2].transDec
        +CALL-\ lib/semant.sml:413.2-540.9: SemantFun[2].transDec.trdec[2]
        +CALL   lib/semant.sml:99.2-396.21: SemantFun[2].transExp.trexp
        +CALL   lib/semant.sml:8.52-558.4: SemantFun[2].transProg[2]
        +CALL   main.sml:1.18-118.4: Main.compile[2]
        +-
        +
        +
        + +
      • +
      +
      +
    10. +
    +
    +
    +
    +
    +
    +
    +
    [2000/06/21]
    +
    +
    +

    CM manual update: Path environments documented.

    +
    + +
    +
    +
    +
    +
    +
    [2000/06/19]
    +
    +
    +

    CM manual and system/README update. This only covers the fact that +there are no more implicit anchors. (Path environments and the "bind" +option to "cm" have yet to be documented.)

    +
    + +
    +
    +
    +
    +
    +
    [2000/06/19]
    +
    +
    +

    Fixed a bug in new SrcPath module that sometimes led to a bad chDir call.

    +
    + +
    +
    +
    +
    +
    +
    [2000/06/18]
    +
    +
    +

    I updates the previous HISTORY entry where I forgot to mention that +implicit anchors are no longer with us.

    +
    +
    +

    The current update also gets rid of the (now useless) controller +CM.Control.implicit_anchors.

    +
    + +
    +
    +
    +
    +
    +
    [2000/06/16]
    +
    +
    +

    This patch implements the long anticipated (just kidding :) "anchor +environment" mechanism. In the course of doing this, I also +re-implemented CM’s internal "SrcPath" module from scratch. The new +one should be more robust in certain boundary cases. In any case, it +is a lot cleaner than its predecessor (IMHO).

    +
    +
    +

    This time, although there is yet another boot file format change, I +kept the unpickler backward-compatible. As a result, no new bootfiles +are necessary and bootstrapping is straightforward. (You cannot read +new bootfiles into an old system, but the other way around is no +problem.)

    +
    +
    +

    Visible changes:

    +
    +
    +
    +
    +
      +
    1. +

      Implicit path anchors (without the leading $-symbol) are no +longer recognized at all. This means that such path names are not +illegal either. For example, the name basis.cm simply refers to a +local file called "basis.cm" (i.e, the name is an ordinary path +relative to .cm-files directory). Or, to put it differently, only +names that start with $ are anchored paths.

      +
    2. +
    3. +

      The $<singlearc> abbreviation for $/<singlearc> has finally vanished.

      +
      +
      +
      John (Reppy) had critizised this as soon as I originally proposed and
      +implemented it, but at that time I did not really deeply believe
      +him. :) Now I came full-circle because I need the $<singlearc> syntax
      +in another place where it cannot be seen as an abbreviation for
      +$/<singlearc>.  To avoid the confusion, $<singlearc> now means what it
      +seems to mean (_i.e._, it "expands" into the corresponding anchor
      +value).
      +
      +
      +
      +
      +
      However, when paths are used as members in CM description files, it
      +continues to be true that there must be at least another arc after the
      +anchor.  This is now enforced separately during semantic analysis
      +(_i.e._, from a lexical/syntactical point of view, the notation is ok.)
      +
      +
      +
    4. +
    5. +

      The "cm" class now accepts an option "bind". The option’s value +is a sub-option list of precisely two items — one labeled "anchor" +and the other one labeled "value". As you might expect, "anchor" is +used to specify an anchor name to be bound, and "value" specifies what +the anchor is being bound to.

      +
      +
      +
      The value must be a directory name and can be given in either standard
      +syntax (including the possibility that it is itself an anchored path)
      +or native syntax.
      +
      +
      +
      +
      +
      Examples:
      +
      +
      +
      +
      +
      foo.cm (bind:(anchor:bar value:$mystuff/bar))
      +lib.cm (bind:(anchor:a value:"H:\\x\\y\\z"))  (* only works under windows *)
      +
      +
      +
      +
      +
      and so on.
      +
      +
      +
      +
      +
      The meaning of this is that the .cm-file will be processed with an
      +augmented anchor environment where the given anchor(s) is/are bound to
      +the given values(s).
      +
      +
      +
      +
      +
      The rationale for having this feature is this: Suppose you are trying
      +to use two different (already stable) libraries a.cm and b.cm (that
      +you perhaps didn't write yourself).  Further, suppose each of these
      +two libraries internally uses its own auxiliary library $aux/lib.cm.
      +Normally you would now have a problem because the anchor "lib" can not
      +be bound to more than one value globally.  Therefore, the project that
      +uses both a.cm and b.cm must locally redirect the anchor to some other
      +place:
      +
      +
      +
      +
      +
      a.cm (bind:(anchor:lib value:/usr/lib/smlnj/a-stuff))
      +b.cm (bind:(anchor:lib value:/usr/lib/smlnj/b-stuff))
      +
      +
      +
      +
      +
      This hard-wires $lib/aux.cm to /usr/lib/smlnj/a-stuff/aux.cm or
      +/usr/lib/smlnj/b-stuff/aux.cm, respectively.
      +
      +
      +
      +
      +
      Hard-wiring path names is a bit inflexible (and CM will verbosely warn
      +you when you do so at the time of CM.stabilize).  Therefore, you can
      +also use an anchored path as the value:
      +
      +
      +
      +
      +
      a.cm (bind:(anchor:lib value:$a-lib))
      +b.cm (bind:(anchor:lib value:$b-lib))
      +
      +
      +
      +
      +
      Now you can globally configure (using the usual CM.Anchor.anchor or
      +pathconfig machinery) bindings for "a-lib" and "b-lib".  Since "lib"
      +itself is always locally bound, setting it globally is no longer
      +meaningful or necessary (but it does not hurt either).  In fact, "lib"
      +can still be used as a global anchor for separate purposes.  As a
      +matter of fact, one can locally define "lib" in terms of a global
      +"lib":
      +
      +
      +
      +
      +
      a.cm (bind:(anchor:lib value:$lib/a))
      +b.cm (bind:(anchor:lib value:$lib/b))
      +
      +
      +
      +

      4: The encoding of path names has changed. This affects the way + path names are shown in CM’s progress report and also the internal + protocol encoding used for parallel make.

      +
      +
      +
      +
      The encoding now uses one or more ':'-separated segments.  Each
      +segments corresponds to a file that has been specified relative to the
      +file given by its preceding segment.  The first segment is either
      +relative to the CWD, absolute, or anchored.  Each segment itself is
      +basically a Unix pathname; all segments but the first are relative.
      +
      +
      +
      +
      +
      Example:
      +
      +
      +
      +
      +
      $foo/bar/baz.cm:a/b/c.sml
      +
      +
      +
      +
      +
      This path denotes the file bar/a/b/c.sml relative to the directory
      +denoted by anchor "foo".  Notice that the encoding also includes
      +baz.cm which is the .cm-file that listed a/b/c.sml.  As usual, such
      +paths are resolved relative to the .cm-files directory, so baz.cm must
      +be ignored to get the "real" pathname.
      +
      +
      +
      +
      +
      To make this fact more obvious, CM puts the names of such "virtual
      +arcs" into parentheses when they appear in progress reports. (No
      +parentheses will appear in the internal protocol encoding.)  Thus,
      +what you really see is:
      +
      +
      +
      +
      +
      $foo/bar/(baz.cm):a/b/c.sml
      +
      +
      +
      +
      +
      I find this notation to be much more informative than before.
      +
      +
      +
      +
      +
      Another new feature of the encoding is that special characters
      +including parentheses, colons, (back)slashes, and white space are
      +written as \ddd (where ddd is the decimal encoding of the character).
      +
      +
      +
    6. +
    +
    +
    +
    +
    +

    NOTE: The CM manual still needs to be updated.

    +
    + +
    +
    +
    +
    +
    +
    [2000/06/15]
    +
    +
    +

    x86 Peephole fix by Fermin. Affects c-- and moby only.

    +
    + +
    +
    +
    +
    +
    +
    [2000/06/12]
    +
    +
    +

    More cleanup after changing the file naming scheme: This time I +repaired the parallel make mechanism for CMB.make which I broke earlier.

    +
    + +
    +
    +
    +
    +
    +
    [2000/06/09]
    +
    +
    +

    None of these things should affect normal SML/NJ operations

    +
    +
    +
      +
    1. +

      Peephole improvements provided by Fermin (c--)

      +
    2. +
    3. +

      New annotation DEFUSE for adding extra dependence (moby)

      +
    4. +
    5. +

      New X86 LOCK instructions (moby)

      +
    6. +
    7. +

      New machine description language for reservation tables (scheduling)

      +
    8. +
    9. +

      Fixes to various optimization/analysis modules (branch chaining, dominator +trees etc.)

      +
    10. +
    11. +

      I’ve changed the CM files so that they can work with versions +110.0.6, 110.25 and 110.28

      + +
    12. +
    +
    +
    +
    +
    +
    +
    +
    [2000/06/09]
    +
    +
    +
      +
    • +

      Removed all(?) remaining RCS Log entries from sources.

      +
    • +
    • +

      Fixed bug in ml-yacc and ml-lex sources (use explicit anchors for +anchored paths).

      + +
    • +
    +
    +
    +
    +
    +
    +
    +
    [2000/06/07]
    +
    +
    +
      +
    1. +

      This update changes the default setting for +CM.Control.implicit_anchors from true to false. This means that +implicit anchors are no longer permitted by default. I also tried to +make sure that nothing else still relies on implicit anchors. +(This is the next step on the schedule towards a CM that does not even +have the notion of implicit anchors anymore.)

      +
    2. +
    3. +

      More CM manual updates.

      +
    4. +
    5. +

      I managed to track down and fix the pickling bug I mentioned last +time. Because of the previously existing workaround, this entails no +immediate practical changes.

      + +
    6. +
    +
    +
    +
    +
    +
    +
    +
    +

    Version 110.28.1; 2000/06/06

    +
    +
    +
    +
    [2000/06/06]
    +
    +
    +
      +
    • +

      The main purpose of this update is to make library pickles lazier in +order to reduce the initial space penalty for autoloading a library. +As a result, it is now possible to have $smlnj/compiler.cm +pre-registered. This should take care of the many complaints or +inquiries about missing structure Compiler. This required changes to +CM’s internal data structures and small tweaks to some algorithms.

      +
      +

      As a neat additional effect, it is no longer necessary (for the sake +of lean heap image files) to distinguish between a "minimal" CM and a +"full" CM. Now, there is only one CM (i.e., the "full" version: +$smlnj/cm.cm aka $smlnj/cm/full.cm), and it is always available at the +interactive top level. ($smlnj/cm/minimal.cm is gone.)

      +
      +
      +

      To make the life of compiler-hackers easier, "makeml" now also +pre-registers $smlnj/cmb.cm (aka $smlnj/cmb/current.cm). In other +words, after you bootstrap a new sml for the first time, you will not +have to autoload $smlnj/cmb.cm again afterwards. (The first time +around you will still have to do it, though.)

      +
      +
    • +
    • +

      A second change consists of major updates to the CM manual. There +are now several appendices with summary information and also a full +specification of the CM description file syntax.

      +
    • +
    • +

      In directory src/system I added the script "allcross". This script +invokes sml and cross-compiles the compiler for all supported +architectures. (Useful when providing a new set of boot files.)

      +
    • +
    • +

      There seems to be a latent bug in my "lazy pickles" mechanism. I +added a small tweak to pickle-util.sml to work around this problem, +but it is not a proper fix yet. I will investigate further. (The +effect of the bug was an inflation of library pickle size.)

      +
    • +
    • +

      Version number increased to 110.28.1 (to avoid compatibility problems).

      + +
    • +
    +
    +
    +
    +
    +
    +
    +
    [2000/05/25]
    +
    +
    +
    +
    Fixed a bug in freezing phase of the register allocator.
    +
    +
    + +
    +
    +
    +
    +
    +
    [2000/05/15]
    +
    +
    +
      +
    1. +

      Alpha

      +
      +
      +
      Slight cleanup.  Removed the instruction SGNXL
      +
      +
      +
    2. +
    3. +

      X86

      +
      +
      +
      Added the following instructions to the instruction set:
      +
      +
      +
      +
      +
      ROLx, RORx,
      +BTx, BTSx, BTLx, BTRx,
      +XCHGx, and variants with the LOCK prefix
      +
      +
      +
    4. +
    5. +

      Register Allocation

      +
      +
      +
      The module ra-rewrite-with-renaming has been improved.
      +
      +
      +
      +
      +
      These have no effect on SML/NJ.
      +
      +
      + +
    6. +
    +
    +
    +
    +
    +
    +
    +
    [2000/05/15]
    +
    +
    +
      +
    1. +

      I added an alternative to "-rebuild" to "makeml". The difference is +that prior to calling CMB.make' the CM-variable "LIGHT" will be +defined. In effect, the command will not build any cross-compiler +backends and therefore finish more quickly.

      +
      +
      +
      The "fixpt" script also takes a "-light" switch to be able to use
      +this new facility while compiling for a fixpoint.
      +
      +
      +
    2. +
    3. +

      I replaced all mentions of anchored paths in group owner specifications +with simple relative paths (usually starting with ".."). +The rationale is that a library’s internal workings should not be +compromised by the lack of some anchor. (An anchor is necessary +for someone who wants to refer to the library by an anchored path, +but it should not be necessary to build the same library in the first +place.)

      +
    4. +
    5. +

      I changed the way CM’s tool mechanism determines the shell command +string used for things like ml-yacc etc. so that it does not break +when CM.Control.implicit_anchors is turned off.

      + +
    6. +
    +
    +
    +
    +
    +
    +
    +
    [2000/05/12]
    +
    +
    +

    Fixed a bug in config/_ml-build that prevented ml-yacc and ml-lex from +getting installed properly (by config/install.sh).

    +
    + +
    +
    +
    +
    +
    +
    [2000/05/12]
    +
    +
    +

    !!! NEW BOOT FILES !!!

    +
    +
    +

    This change is in preparation of fading out support for "implicitly +anchored path names". I went through all sources and used the +explicit (and relatively new) $-notation. See system/README and the +CM manual for more info on this.

    +
    +
    +

    I also modified the anchoring scheme for some things such as "smlnj", +"MLRISC", "cm", etc. to take advantage of the fact that explicit +anchors are more expressive: anchor name and first arc do not have to +coincide. This entails the following user-visible change:

    +
    +
    +

    You have to write $smlnj/foo/bar instead of smlnj/foo/bar. In +particular, when you fire up sml with a command-line argument, say, +e.g.:

    +
    +
    +
    +
    sml '$smlnj/cmb.cm'
    +
    +
    +
    +

    At the ML toplevel prompt:

    +
    +
    +
    +
    CM.autoload "$smlnj/cmb.cm";
    +
    +
    +
    +

    There is also a new controller in CM.Control that can be used to turn +off all remaining support for implicit anchors by saying:

    +
    +
    +
    +
    CM.autoload "$smlnj/
    +#set CM.Control.implicit_anchors false;
    +
    +
    +
    +

    This causes CM to reject implicitly anchored paths. This is (for the +time being) less permissive than the "final" version where there will +be no more such implicit anchors and relative paths will be just that: +relative.

    +
    +
    +

    The next step (version after next version?) will be to make the +default for CM.Control.implicit_anchors false. After the dust has +settled, I can then produce the "final" version of this…​

    +
    +
    +

    Note: Since bootstrapping is a bit tricky, I provided new boot files.

    +
    + +
    +
    +
    +
    +
    +
    [2000/05/11]
    +
    +
    +

    The main change is that I added function CM.sources as a generalized +version of the earlier CM.makedepend. This entails the following +additional changes:

    +
    +
    +
      +
    • +

      CM.makedepend has been dropped.

      +
    • +
    • +

      CM manual has been updated.

      +
    • +
    • +

      TOOLS signature and API have been changed.

      + +
    • +
    +
    +
    +
    +
    +
    +
    +
    [2000/05/10]
    +
    +
    +
    +
      Various bug fixes and new features for C--, Moby and MLRISC optimizations.
    +None of these affect SML/NJ.
    +
    +
    +
    +
      +
    1. +

      Register Allocation

      +
      +
        +
      1. +

        A new ra spilling module (ra/ra-spill-with-renaming) is implemented. +This module tries to remove local (i.e. basic block level) redundancies +during spilling.

        +
      2. +
      3. +

        A new framework for performing region based register allocation. +Not yet entirely functional.

        +
      4. +
      +
      +
    2. +
    3. +

      X86

      +
      +
        +
      1. +

        DefUse for POP was missing the stack pointer [found by Lal]

        +
      2. +
      3. +

        Reload for CALL was incorrect in X86Spill [found by John]

        +
      4. +
      5. +

        Various fixes in X86Spill so that it can be used correctly for +the new spilling module.

        +
      6. +
      +
      +
    4. +
    5. +

      SSA/IR

      +
      +
        +
      1. +

        New module ir/dj-dataflow.sml implements elimination based +data flow analysis.

        +
      2. +
      +
      +
    6. +
    7. +

      MLRiscGen

      +
      +
        +
      1. +

        Fix for gc type annotation

        +
      2. +
      +
      +
    8. +
    9. +

      MDGen

      +
      +
      +
      Various fixes for machine description -> ml code translation.  For ssa
      +only.
      +
      +
      + +
    10. +
    +
    +
    +
    +
    +
    +
    +
    [2000/05/08]
    +
    +
    +
    +
    Fermin has found a few assembly problems with constant expressions
    +generated in LabelExp.  Mostly, the problems involve extra parentheses,
    +which choke on dumb assemblers.  This is his fix.
    +
    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.28; 2000/04/09

    +
    +
    +
    +
    [2000/04/09]
    +
    +
    +
      +
    1. +

      Updated src/compiler/TopLevel/main/version.sml to version 110.28

      +
    2. +
    3. +

      Updated config/version to 110.28

      +
    4. +
    5. +

      Updated config/srcarchiveurl

      +
    6. +
    7. +

      New boot files! +ftp://ftp.research.bell-labs.com/dist/smlnj/working/110.28/

      + +
    8. +
    +
    +
    +
    +
    +
    +
    +
    [2000/05/01]
    +
    +
    +

    A new noweb tool has been added. The existing system is entirely +unaffected by this, but some CM users have asked for renewed noweb +support. Everything is documented in the CM manual.

    +
    +
    +

    New (plugin) libraries:

    +
    +
    +
    +
    noweb-tool.cm
    +nw-ext.cm
    +
    +
    + +
    +
    +
    +
    +
    +
    [2000/04/30]
    +
    +
    +
      +
    1. +

      Fix for bug 1498 +smlnj/src/system/Basis/Implementation/Unsafe/object.sig +smlnj/src/system/Basis/Implementation/Unsafe/object.sml + added toRealArray function +smlnj/src/compiler/MiscUtil/print/ppobj.sml + added check for tag Obj.RealArray to array printing case in ppObj

      +
    2. +
    3. +

      Fix for bug 1510 +smlnj/src/compiler/Semant/types/typesutil.sml + fixed definition of dummyargs (used by equalTycon) so that + dummy args are distinct types

      + +
    4. +
    +
    +
    +
    +
    +
    +
    +
    [2000/04/30]
    +
    +
    +
      +
    1. +

      CM version numbering added. This is an implementation of Lal’s +proposal for adding version numbers and version checking to .cm +files. Lal said that his proposal was just that — a proposal. +For the time being I went ahead and implemented it so that people +can comment on it. Everything is completely backward-compatible +(except for the stable library format, i.e., new bootfiles!).

      +
      +
      +
      As usual, see the CM manual for details.
      +
      +
      +
    2. +
    3. +

      An alternative syntax for anchored paths has been implemented. +Dave has recently voiced the same concerns that I had when I did +this, so there should be some support. My take is that eventually +I will let support for the current syntax (where anchors are +"implicit") fade out in favor of the new, explicit syntax. +In order to be backward-compatible, both old and new syntax are +currently supported.

      +
      +
      +
      Again, see the CM manual for details.
      +
      +
      +
    4. +
    5. +

      Parallel make is trying to be slightly smarter: When the master +process finds a "bottleneck", i.e., when there is only one +compilation unit that can be compiled and everybody else is +waiting on it, then it will simply compile it directly instead +of clumsily telling one of the slaves to do it.

      +
    6. +
    7. +

      Support for "unsharing" added. This is necessary in order to be +able to have two different versions of the same library running +at the same time (e.g., for trying out a new MLRISC while still +having the old MLRISC linked into the current compiler, etc.) +See the CM manual.

      +
    8. +
    9. +

      Simple "makedepend" functionality added for generating Makefile +dependency information. (This is rather crude at the moment. +Expect some changes here in the future.)

      +
    10. +
    11. +

      ".fun" added as a recognized suffix for ML files. Also documented +explicitly in the manual that the fallback behavior (unknown suffix +→ ML file) is not an official feature!

      +
    12. +
    13. +

      Small changes to the pickler for stable libraries.

      +
    14. +
    15. +

      Several internal changes to CM (for cleanup/improvement).

      +
      +

      + +!!!! NEW BINFILES !!!!

      +
      + +
    16. +
    +
    +
    +
    +
    +
    +
    +
    [2000/04/28]
    +
    +
    +
      +
    1. +

      I changed config/install.sh to remove duplicate entries from the +lib/pathconfig file at the end. Moreover, the final version of +lib/pathconfig is sorted alphabetically. The same (sorting) is done +in src/system/installml.

      +
    2. +
    3. +

      The config/install.sh script now consistently uses relative +pathnames in lib/pathconfig whenever the anchor is in the lib +directory. (So far this was true for the libraries that come +pre-compiled and bundled as part of the bootfiles but not for +libraries that are compiled by the script itself.)

      + +
    4. +
    +
    +
    +
    +
    +
    +
    +
    [2000/04/26]
    +
    +
    +

    Added ".fun" as a recognized file name suffix (for ML code).

    +
    + +
    +
    +
    +
    +
    +
    [2000/04/25]
    +
    +
    +
      +
    1. +

      Alpha

      +
      +
      +
          PSEUDOARITH was missing in AlphaRewrite.  This causes an endless loop
      +in C--.
      +
      +
      +
    2. +
    3. +

      RA

      +
      +
      +
      Added a flag "ra-dump-size" to print out the size of the flowgraph
      +and the interference graph.
      +
      +
      + +
    4. +
    +
    +
    +
    +
    +
    +
    +
    [2000/04/25]
    +
    +
    +
    +
    Updated mlyacc.tex sections 5 and 7 for SML '97 and CM.
    +Updated all three examples in src/ml-yacc/examples to run
    +under 110.* using CM.make.
    +
    +
    + +
    +
    +
    +
    +
    +
    [2000/04/20]
    +
    +
    +
    +
      This update synchronizes my repository with Yale's.  Most of these
    +changes, however, do not affect SML/NJ at all (the RA is an exception).
    +
    +
    +
    +
      +
    1. +

      Register Allocator

      +
      +
        +
      1. +

        An improvement in the interference graph construction: +Given a copy

        +
        +
        +
        s <- t
        +
        +
        +
        +
        +
        no interference edge between s and t is added for this definition of s.
        +
        +
        +
      2. +
      3. +

        I’ve added two new spill heuristic modules that Fermin and I developed +(in the new library RA.cm). These are unused in SML/NJ but maybe +useful for others (Moby?)

        +
      4. +
      +
      +
    2. +
    3. +

      X86

      +
      +
        +
      1. +

        Various fixes in the backend provided by Fermin [C--] and Lal.

        +
      2. +
      +
      +
    4. +
    5. +

      Alpha

      +
      +
        +
      1. +

        Added the BSR instruction and code generation that goes with it [C--]

        +
      2. +
      3. +

        Other fixes too numerous to recount provided by Fermin [C--]

        +
      4. +
      +
      +
    6. +
    7. +

      Regmaps

      +
      +
        +
      1. +

        The regmaps are not initialized with the identity physical bindings +at creation time. This is unneeded.

        +
      2. +
      +
      +
    8. +
    9. +

      MLRISC Optimizations

      +
      +
        +
      1. +

        The DJ-Graph module can now compute the iterated dominance frontiers +intersects with liveness incrementally in linear time! Woohoo! +This is now used in my new SSA construction algorithm.

        +
      2. +
      3. +

        THe branch reorganization module is now smarter about linear chains of +basic blocks.

        + +
      4. +
      +
      +
    10. +
    +
    +
    +
    +
    +
    +
    +
    [2000/04/12]
    +
    +
    +

    Changed install.sh script to handle archive files without version number +and to use "boot.<arch>-<os>" instead of "sml.boot.<arch>-<os>" for the +name of the boot file archive.

    +
    + +
    +
    +
    +
    +
    +
    +

    Version 110.27; 2000/04/09

    +
    +
    +
    +
    [2000/04/09]
    +
    +
    +
      +
    1. +

      Updated src/compiler/TopLevel/main/version.sml to version 110.27

      +
    2. +
    3. +

      Updated src/config/version to 110.27

      +
    4. +
    5. +

      New boot files!

      + +
    6. +
    +
    +
    +
    +
    +
    +
    +
    [2000/04/09]
    +
    +
    +
      +
    1. +

      Yet another fix for x86 assembly for idivl, imull, mull and friends.

      +
    2. +
    3. +

      Miscellaneous improvements to MLRISC (unused in sml/nj)

      + +
    4. +
    +
    +
    +
    +
    +
    +
    +
    [2000/04/07]
    +
    +
    +

    Improved handling of branches (mostly those generated from +polymorphic equality), removed switchoff and changed the +default optimization settings (more cpsopt and less flintopt).

    +
    +
    +

    Stefan

    +
    +
    +
    +
    +
    +
    +
    [2000/04/06]
    +
    +
    +
    +
    Forgot a few files.
    +
    +
    + +
    +
    +
    +
    +
    +
    [2000/04/06]
    +
    +
    +
      +
    1. +

      New Peephole code

      +
    2. +
    3. +

      Minor improvement to X86 instruction selection

      +
    4. +
    5. +

      Various fixes to SSA and machine description → code translator

      + +
    6. +
    +
    +
    +
    +
    +
    +
    +
    [2000/04/05]
    +
    +
    +

    This update just merges three minor cosmetic updates to CM’s sources +to get ready for the 110.27 code freeze on Friday. No functionality +has changed.

    +
    + +
    +
    +
    +
    +
    +
    [2000/04/04]
    +
    +
    +
      +
    1. +

      Fixed a problem in X86 assembly.

      +
      +
      +
      Things like
      +
      +
      +
      +
      +
      jmp %eax
      +jmp (%eax)
      +
      +
      +
      +
      +
      should be output as
      +
      +
      +
      +
      +
      jmp *%eax
      +jmp *(%eax)
      +
      +
      +
    2. +
    3. +

      Assembly output

      +
      +
      +
      Added a new flag
      +
      +
      +
      +
      +
      "asm-indent-copies" (default to false)
      +
      +
      +
      +
      +
      When this flag is on, parallel copies will be indented an extra level.
      +
      +
      + +
    4. +
    +
    +
    +
    +
    +
    +
    +
    [2000/04/04]
    +
    +
    +
    +
    All of these fixes are related to C--, Moby, and my own optimization
    +stuff; so they shouldn't affect SML/NJ.
    +
    +
    +
    +
      +
    1. +

      X86

      +
      +
      +
      Various fixes related floating point, and extensions.
      +
      +
      +
    2. +
    3. +

      Alpha

      +
      +
      +
      Some extra patterns related to loads with signed/zero extension
      +provided by Fermin.
      +
      +
      +
    4. +
    5. +

      Assembly

      +
      +
      +
      When generating assembly, resolve the value of client defined constants,
      +instead of generating symbolic values.  This is controlled by the
      +new flag "asm-resolve-constants", which is default to true.
      +
      +
      +
    6. +
    7. +

      Machine Descriptions

      +
      +
        +
      1. +

        The precedence parser was slightly broken when parsing infixr symbols.

        +
      2. +
      3. +

        The type generalizing code had the bound variables reversed, resulting +in a problem during arity raising.

        +
      4. +
      5. +

        Various fixes in machine descriptions.

        + +
      6. +
      +
      +
    8. +
    +
    +
    +
    +
    +
    +
    +
    [2000/04/03]
    +
    +
    +

    I eliminated coreEnv from compInfo. Access to the "Core" structure is +now done via the ordinary static environment that is context to each +compilation unit.

    +
    +
    +

    To this end, I arranged that instead of "structure Core" as "structure +_Core" is bound in the pervasive environment. Core access is done via +_Core (which can never be accidentally rebound because _Core is not a +legal surface-syntax symbol).

    +
    +
    +

    The current solution is much cleaner because the core environment is +now simply part of the pervasive environment which is part of every +compilation unit’s context anyway. In particular, this eliminates all +special-case handling that was necessary until now in order to deal +with dynamic and symbolic parts of the core environment.

    +
    +
    +

    Remaining hackery (to bind the "magic" symbol _Core) is localized in the +compilation manager’s bootstrap compiler (actually: in the "init group" +handling). See the comments in src/system/smlnj/init/init.cmi for +more details.

    +
    +
    +

    I also tried to track down all mentions of "Core" (as string argument +to Symbol.strSymbol) in the compiler and replaced them with a +reference to the new CoreSym.coreSym. Seems cleaner since the actual +name appears in one place only.

    +
    +
    +

    Binfile and bootfile format have not changed, but the switchover from +the old "init.cmi" to the new one is a bit tricky, so I supplied new +bootfiles anyway.

    +
    + +
    +
    +
    +
    +
    +
    [2000/04/02]
    +
    +
    +
      +
    1. +

      Renamed the constructor CALL in MLTREE by popular demand.

      +
    2. +
    3. +

      Added a bunch of files from my repository. These are currently +used by other non-SMLNJ backends.

      + +
    4. +
    +
    +
    +
    +
    +
    +
    +
    [2000/03/31]
    +
    +
    +

    This update contains a rewritten (and hopefully more correct) module +for extracting aliasing information from CPS.

    +
    +
    +
    +
    To turn on this feature:
    +
    +
    +
    +
    +
    Compiler.Control.CG.memDisambiguate := true
    +
    +
    +
    +
    +
    To pretty print the region information with assembly
    +
    +
    +
    +
    +
    Compiler.Control.MLRISC.getFlag "asm-show-region" := true;
    +
    +
    +
    +
    +
    To control how many levels of aliasing information are printed, use:
    +
    +
    +
    +
    +
    Compiler.Control.MLRISC.getInt "points-to-show-level" := n
    +
    +
    +
    +
    +
    The default of n is 3.
    +
    +
    + +
    +
    +
    +
    +
    +
    [2000/03/31]
    +
    +
    +

    This update contains:

    +
    +
    +
      +
    1. +

      runtime/c-lib/c-libraries.c +includes added in revision 1.2 caused compilation errors on hppa-hpux

      +
    2. +
    3. +

      fix for bug 1556 +system/Basis/Implementation/NJ/internal-signals.sml

      + +
    4. +
    +
    +
    +
    +
    +
    +
    +
    [2000/03/31]
    +
    +
    +

    This update contains:

    +
    +
    +
      +
    1. +

      A small change to CM’s handling of stable libraries: +CM now maintains one "global" modmap that is used for all stable +libraries. The use of such a global modmap maximizes sharing and +minimizes the need for re-traversing parts of environments during +modmap construction. (However, this has minor impact since modmap +construction seems to account for just one percent or less of total +compile time.)

      +
    2. +
    3. +

      I added a "genmap" phase to the statistics. This is where I got the +"one percent" number (see above).

      +
    4. +
    5. +

      CM’s new tool parameter mechanism just became even better. :)

      +
      +
        +
      • +

        The parser understands named parameters and recursive options.

        +
      • +
      • +

        The "make" and "shell" tools use these new features. +(This makes it a lot easier to cascade these tools.)

        +
      • +
      • +

        There is a small syntax change: named parameters use a

        +
        +
        +
        <name> : ( <option> ... )            or
        +<name> : <string>
        +
        +
        +
        +
        +
        syntax.  Previously, named parameters were implemented in an
        +ad-hoc fashion by each tool individually (by parsing strings)
        +and had the form
        +
        +
        +
        +
        +
        <name>=<string>
        +
        +
        +
        +
        +
        See the CM manual for a full description of these issues.
        +
        +
        + +
      • +
      +
      +
    6. +
    +
    +
    +
    +
    +
    +
    +
    +

    Version 110.26.2; 2000/03/30

    +
    +
    +
    +
    [2000/03/30]
    +
    +
    +

    !!!!! WARNING !!!!!! +!! New binfiles !! +!!!!!!!!!!!!!!!!!!!!

    +
    +
    +

    This update contains:

    +
    +
    +
      +
    1. +

      Moderate changes to CM:

      +
      +
        +
      • +

        Changes to CM’s tools mechanism. In particular, it is now possible +to have tools that accept additional "command line" parameters +(specified in the .cm file at each instance where the tool’s class is +used).

        +
        +
        +
        This was done to accommodate the new "make" and "shell" tools which
        +facilitate fairly seamless hookup to portions of code managed using
        +Makefiles or Shell scripts.
        +
        +
        +
        +
        +
        There are no classes "shared" or "private" anymore.  Instead, the
        +sharing annotation is now a parameter to the "sml" class.
        +
        +
        +
        +
        +
        There is a bit of generic machinery for implementing one's own
        +tools that accept command-line parameters.  However, I am not yet fully
        +satisfied with that part, so expect changes here in the future.
        +
        +
        +
        +
        +
        All existing tools are described in the CM manual.
        +
        +
        +
      • +
      • +

        Slightly better error handling. (CM now suppresses many followup +error messages that tended to be more annoying than helpful.)

        +
      • +
      +
      +
    2. +
    3. +

      Major changes to the compiler’s static environment data structures.

      +
      +
        +
      • +

        no CMStaticEnv anymore.

        +
      • +
      • +

        no CMEnv, no "BareEnvironment" (actually, only BareEnvironment, +but it is called Environment), no conversions between different +kinds of static environments

        +
      • +
      • +

        There is still a notion of a "modmap", but such modmaps are generated +on demand at the time when they are needed. This sounds slow, but I +sped up the code that generates modmaps enough for this not to lead to +a slowdown of the compiler (at least I didn’t detect any).

        +
      • +
      • +

        To facilitate rapid modmap generation, static environments now +contain an (optional) "modtree" structure. Modtree annotations are +constructed by the unpickler during unpickling. (This means that +the elaborator does not have to worry about modtrees at all.) +Modtrees have the advantage that they are compositional in the same +way as the environment data structure itself is compositional. +As a result, modtrees never hang on to parts of an environment that +has already been rendered "stale" by filtering or rebinding.

        +
      • +
      • +

        I went through many, many trials and errors before arriving at the +current solution. (The initial idea of "linkpaths" did not work.) +But the result of all this is that I have touched a lot of files that +depend on the "modules" and "types" data structures (most of the +elaborator). There were a lot of changes during my "linkpath" trials +that could have been reverted to their original state but weren’t. +Please, don’t be too harsh on me for messing with this code a bit more +than what was strictly necessary…​ (I did resist the temptation +of doing any "global reformatting" to avoid an untimely death at +Dave’s hands. :)

        +
      • +
      • +

        One positive aspect of the previous point: At least I made sure that +all files that I touched now compile without warnings (other than +"polyEqual").

        +
      • +
      • +

        compiler now tends to run "leaner" (i.e., ties up less memory in +redundant modmaps)

        + +
      • +
      +
      +
    4. +
    +
    +
    +
    +
    +
    +
    +
    [2000/03/29]
    +
    + +
    +
    +
       This update contains *MAJOR* changes to the way code is generated from CPS
    +in the module mlriscGen, and in various backend modules.
    +
    +
    +
    +

    CHANGES

    +
    +
    +
      +
    1. +

      MLRiscGen: forward propagation fix.

      +
      +
      +
      There was a bug in forward propagation introduced at about the same time
      +as the MLRISC x86 backend, which prohibits coalescing to be
      +performed effectively in loops.
      +
      +
      +
      +
      +
      Effect: speed up of loops in RISC architectures.
      +        By itself, this actually slowed down certain benchmarks on the x86.
      +
      +
      +
    2. +
    3. +

      MLRiscGen: forward propagating addresses from consing.

      +
      +
      +
      I've changed the way consing code is generated.  Basically I separated
      +out the initialization part:
      +
      +
      +
      +
      +
      store tag,   offset(allocptr)
      +store elem1, offset+4(allocptr)
      +store elem2, offset+8(allocptr)
      +...
      +store elemn, offset+4n(allocptr)
      +
      +
      +
      +
      +
      and the address computation part:
      +
      +
      +
      +
      +
      celladdr <- offset+4+alloctpr
      +
      +
      +
      +
      +
      and move the address computation part
      +
      +
      +
      +
      +
      Effect:  register pressure is generally lower as a result.  This
      +         makes compilation of certain expressions much faster, such as
      +         long lists with non-trivial elements.
      +
      +
      +
      +
      +
      [(0,0), (0,0), .... (0,0)]
      +
      +
      +
    4. +
    5. +

      MLRiscGen: base pointer elimination.

      +
      +
      +
      As part of the linkage mechanism, we generate the sequence:
      +
      +
      +
      +
      +
      L:  ...  <- start of the code fragment
      +
      +
      +
      +
      +
      L1:
      +    base pointer <- linkreg - L1 + L
      +
      +
      +
      +
      +
        The base pointer was then used for computing relocatable addresses
      +in the code fragment.  Frequently (such as in lots of continuations)
      +this is not needed.  We now eliminate this sequence whenever possible.
      +
      +
      +
      +
      +
        For compile time efficiency, I'm using a very stupid local heuristic.
      +But in general, this should be done as a control flow analysis.
      +
      +
      +
      +
      +
      Effect:  Smaller code size.  Speed up of most programs.
      +
      +
      +
    6. +
    7. +

      Hppa back end

      +
      +
      +
         Long jumps in span dependence resolution used to depend on the existence
      +of the base pointer.
      +
      +
      +
      +
      +
      A jump to a long label L was expanded into the following sequence:
      +
      +
      +
      +
      +
      LDIL %hi(L-8192), %r29
      +LDO  %lo(L-8192)(%r29), %r29
      +ADD  %r29, baseptr, %r29
      +BV,n %r0(%r29)
      +
      +
      +
      +
      +
        In the presence of change (3) above, this will not work.  I've changed
      +it so that the following sequence of instructions are generated, which
      +doesn't mention the base pointer at all:
      +
      +
      +
      +
      +
           BL,n  L', %r29           /* branch and link, L' + 4 -> %r29 */
      +L':  ADDIL L-(L'+4), %r29     /* Compute address of L */
      +     BV,n  %r0(%r29)          /* Jump */
      +
      +
      +
    8. +
    9. +

      Alpha back end

      +
      +
      +
         New alpha instructions LDB/LDW have been added, as per Fermin's
      +suggestions.   This is unrelated to all other changes.
      +
      +
      +
    10. +
    11. +

      X86 back end

      +
      +
      +
      I've changed andl to testl in the floating point test sequence
      +whenever appropriate.  The Intel optimization guide states that
      +testl is preferable to andl.
      +
      +
      +
    12. +
    13. +

      RA (x86 only)

      +
      +
      +
        I've improved the spill propagation algorithm, using an approximation
      +of maximal weighted independent sets.   This seems to be necessary to
      +alleviate the negative effect in light of the slow down in (1).
      +
      +
      +
      +
      +
      I'll write down the algorithm one of these days.
      +
      +
      +
    14. +
    15. +

      MLRiscGen: frequencies

      +
      +
      +
        I've added an annotation that states that all call gc blocks have zero
      +execution frequencies.  This improves register allocation on the x86.
      +
      +
      +
      +

      BENCHMARKS

      +
      +
      +
      +
      I've only perform the comparison on 110.25.
      +
      +
      +
      +
      +
      The platforms are:
      +
      +
      +
      +
      +
      HPPA  A four processor HP machine (E9000) with 5G of memory.
      +X86   A 300Hhz Pentium II with 128M of memory, and
      +SPARC An Ultra sparc 2 with 512M of memory.
      +
      +
      +
      +
      +
      I used the following parameters for the SML benchmarks:
      +
      +
      +
      +
      +
              @SMLalloc
      +HPPA    256k
      +SPARC   512k
      +X86     256k
      +
      +
      +
      +

      COMPILATION TIME

      +
      +
      +
      +
      Here are the numbers comparing the compilation times of the compilers.
      +I've only compared 110.25 compiling the new sources versus
      +a fixpoint version of the new compiler compiling the same.
      +
      +
      +
      +
      +
                  110.25                                  New
      +      Total  Time in RA  Spill+Reload   Total  Time In RA Spill+Reload
      +HPPA   627s    116s        2684+3584     599s    95s       1003+1879
      +SPARC  892s    173s        2891+3870     708s    116s      1004+1880
      +X86    999s    315s       94006+130691   987s    296s    108877+141957
      +
      +
      +
      +
      +
                110.25         New
      +       Code Size      Code Size
      +HPPA   8596736         8561421
      +SPARC  8974299         8785143
      +X86    9029180         8716783
      +
      +
      +
      +
      +
      So in summary, things are at least as good as before.   Dramatic
      +reduction in compilation is obtained on the Sparc; I can't explain it,
      +but it is reproducible.  Perhaps someone should try to reproduce this
      +on their own machines.
      +
      +
      +
      +

      SML BENCHMARKS

      +
      +
      +
      +
      On the average, all benchmarks perform at least as well as before.
      +
      +
      +
      +
      +
      HPPA         Compilation Time     Spill+Reload      Run Time
      +           110.25  New            110.25    New   110.25  New
      +
      +
      +
      +
      +
          barnesHut  3.158  3.015  4.75%    1+1       0+0   2.980  2.922   2.00%
      +        boyer  6.152  5.708  7.77%    0+0       0+0   0.218  0.213   2.34%
      + count-graphs  1.168  1.120  4.32%    0+0       0+0  22.705 23.073  -1.60%
      +          fft  0.877  0.792 10.74%    1+3       1+3   0.602  0.587   2.56%
      +  knuthBendix  3.180  2.857 11.32%    0+0       0+0   0.675  0.662   2.02%
      +       lexgen  6.190  5.290 17.01%    0+0       0+0   0.913  0.788  15.86%
      +         life  0.803  0.703 14.22%   25+25      0+0   0.153  0.140   9.52%
      +        logic  2.048  2.007  2.08%    6+6       1+1   4.133  4.008   3.12%
      +   mandelbrot  0.077  0.080 -4.17%    0+0       0+0   0.765  0.712   7.49%
      +       mlyacc 22.932 20.937  9.53%  154+181    32+57  0.468  0.430   8.91%
      +      nucleic  5.183  5.060  2.44%    2+2       0+0   0.125  0.120   4.17%
      +ratio-regions  3.357  3.142  6.84%    0+0       0+0  116.225 113.173 2.70%
      +          ray  1.283  1.290 -0.52%    0+0       0+0   2.887  2.855   1.11%
      +       simple  6.307  6.032  4.56%   28+30      5+7   3.705  3.658   1.28%
      +          tsp  0.888  0.862  3.09%    0+0       0+0   7.040  6.893   2.13%
      +         vliw 24.378 23.455  3.94%  106+127    25+45  2.758  2.707   1.91%
      +--------------------------------------------------------------------------
      + Average                     6.12%                                   4.09%
      +
      +
      +
      +
      +
      SPARC        Compilation Time     Spill+Reload      Run Time
      +           110.25  New            110.25    New   110.25  New
      +
      +
      +
      +
      +
          barnesHut  3.778  3.592  5.20%    2+2       0+0   3.648  3.453    5.65%
      +        boyer  6.632  6.110  8.54%    0+0       0+0   0.258  0.242    6.90%
      + count-graphs  1.435  1.325  8.30%    0+0       0+0  33.672 34.737   -3.07%
      +          fft  0.980  0.940  4.26%    3+9       2+6   0.838  0.827    1.41%
      +  knuthBendix  3.590  3.138 14.39%    0+0       0+0   0.962  0.967   -0.52%
      +       lexgen  6.593  6.072  8.59%    1+1       0+0   1.077  1.078   -0.15%
      +         life  0.972  0.868 11.90%   26+26      0+0   0.143  0.140    2.38%
      +        logic  2.525  2.387  5.80%    7+7       1+1   5.625  5.158    9.05%
      +   mandelbrot  0.090  0.093 -3.57%    0+0       0+0   0.855  0.728   17.39%
      +       mlyacc 26.732 23.827 12.19%  162+189    32+57  0.550  0.560   -1.79%
      +      nucleic  6.233  6.197  0.59%    3+3       0+0   0.163  0.173   -5.77%
      +ratio-regions  3.780  3.507  7.79%    0+0       0+0 133.993 131.035   2.26%
      +          ray  1.595  1.550  2.90%    1+1       0+0   3.440  3.418    0.63%
      +       simple  6.972  6.487  7.48%   29+32      5+7   3.523  3.525   -0.05%
      +          tsp  1.115  1.063  4.86%    0+0       0+0   7.393  7.265    1.77%
      +         vliw 27.765 24.818 11.87%  110+135    25+45  2.265  2.135    6.09%
      +----------------------------------------------------------------------------
      + Average                     6.94%                                    2.64%
      +
      +
      +
      +
      +
      X86          Compilation Time     Spill+Reload      Run Time
      +           110.25  New            110.25    New   110.25  New
      +
      +
      +
      +
      +
          barnesHut  5.530  5.420  2.03%  593+893   597+915   3.532  3.440   2.66%
      +        boyer  8.768  7.747 13.19%  493+199   301+289   0.327  0.297  10.11%
      + count-graphs  2.040  2.010  1.49%  298+394   315+457  26.578 28.660  -7.26%
      +          fft  1.327  1.302  1.92%  112+209   115+210   1.055  0.962   9.71%
      +  knuthBendix  5.218  5.475 -4.69%  451+598   510+650   0.928  0.932  -0.36%
      +       lexgen  9.970  9.623  3.60% 1014+841  1157+885   0.947  0.928   1.97%
      +         life  1.183  1.183  0.00%  162+182   145+148   0.127  0.103  22.58%
      +        logic  3.285  3.512 -6.45%  514+684   591+836   5.682  5.577   1.88%
      +   mandelbrot  0.147  0.143  2.33%   38+41     33+54    0.703  0.690   1.93%
      +       mlyacc 35.457 32.763  8.22% 3496+4564 3611+4860  0.552  0.550   0.30%
      +      nucleic  7.100  6.888  3.07%  239+168   201+158   0.175  0.173   0.96%
      +ratio-regions  6.388  6.843 -6.65% 1182+257   981+300  120.142 120.345 -0.17%
      +          ray  2.332  2.338 -0.29%  346+398   402+494   3.593  3.540   1.51%
      +       simple  9.912  9.903  0.08% 1475+941  1579+1168  3.057  3.178  -3.83%
      +          tsp  1.623  1.532  5.98%  266+200   250+211   8.045  7.878   2.12%
      +         vliw 33.947 35.470 -4.29% 2629+2774 2877+3171  2.072  1.890   9.61%
      +----------------------------------------------------------------------------
      + Average                     1.22%                                     3.36%
      +
      +
      + +
    16. +
    +
    +
    +
    +
    +
    +
    +
    [2000/03/23]
    +
    +
    +
      +
    1. +

      X86 fixes/changes

      +
      +
        +
      1. +

        The old code generated for SETcc was completely wrong. +The Intel optimization guide is VERY misleading.

        +
      2. +
      +
      +
    2. +
    3. +

      ALPHA fixes/changes

      +
      +
        +
      1. +

        Added the instructions LDBU, LDWU, STB, STW as per Fermin’s suggestion.

        +
      2. +
      3. +

        Added a new mode byteWordLoadStores to the functor parameter to Alpha()

        +
      4. +
      5. +

        Added reassociation code for address computation.

        + +
      6. +
      +
      +
    4. +
    +
    +
    +
    +
    +
    +
    +
    [2000/03/22]
    +
    +
    +
      +
    1. +

      X86 fixes/changes

      +
      +
        +
      1. +

        x86Rewrite bug with MUL3 (found by Lal)

        +
      2. +
      3. +

        Added the instructions FSTS, FSTL

        +
      4. +
      +
      +
    2. +
    3. +

      PA-RISC fixes/changes

      +
      +
        +
      1. +

        B label should not be a delay slot candidate! Why did this work?

        +
      2. +
      3. +

        ADDT(32, REG(32, r), LI n) now generates one instruction instead of two, +as it should be.

        +
      4. +
      5. +

        The assembly syntax for fstds and fstdd was wrong.

        +
      6. +
      7. +

        Added the composite instruction COMICLR/LDO, which is the immediate +operand variant of COMCLR/LDO.

        +
      8. +
      +
      +
    4. +
    5. +

      Generic MLRISC

      +
      +
        +
      1. +

        shuffle.sml rewritten to be slightly more efficient

        +
      2. +
      3. +

        DIV bug in mltree-simplify fixed (found by Fermin)

        +
      4. +
      +
      +
    6. +
    7. +

      Register Allocator

      +
      +
        +
      1. +

        I now release the interference graph earlier during spilling. +May improve memory usage.

        + +
      2. +
      +
      +
    8. +
    +
    +
    +
    +
    +
    +
    +
    +

    Version 110.26.1; 2000/03/14

    +
    +
    +
    +
    [2000/03/14]
    +
    +
    +
      +
    1. +

      Tools.registerStdShellCmdTool (from smlnj/cm/tool.cm) takes an +additional argument called "template" which is an optional string that +specifies the layout of the tool command line. See the CM manual for +explanation.

      +
    2. +
    3. +

      A special-purpose tool can be "registered" by simply dropping the +corresponding <…​>-tool.cm (and/or <…​>-ext.cm) into the same +directory where the .cm file lives that uses this tool. (The +behavior/misfeature until now was to look for the tool description +files in the current working directory.) As before, tool description +files could also be anchored — in which case they can live anywhere +they like. Following the recent e-mail discussion, this change should +make it easier to have special-purpose tools that are shipped together +with the sources of the program that uses them.

      + +
    4. +
    +
    +
    +
    +
    +
    +
    +
    [2000/03/10]
    +
    +
    +

    I added a re-written version of Dave’s fixpt script to src/system. +Changes relative to the original version: + - sh-ified (not everybody has ksh) + - automatically figures out which architecture it runs on + - uses ./makeml a bit more cleverly + - never invokes ./installml (and, thus, does not clobber your + good and working installation of sml in case something goes wrong) + - accepts max iteration count using option "-iter <n>" + - accepts a "base" name using option "-base <base>"

    +
    +
    +

    + +It does not build any extraneous heap images but directly rebuilds +bin- and boot-hierarchies using makeml’s "-rebuild" switch. Finally, +it can incorporate existing bin- and boot- hierarchies. For example, +suppose the base is set to "sml" (which is the default). Then it +successively builds

    +
    +
    +

    + + sml.bin.<arch>-unix and sml.boot.<arch>-unix +then sml1.bin.<arch>-unix and sml1.boot.<arch>-unix +then sml2.bin.<arch>-unix and sml2.boot.<arch>-unix +…​ +then sml<n>.bin.<arch>-unix and sml<n>.boot.<arch>-unix

    +
    +
    +

    + +and so on. If any of these already exist, it will just use what’s +there. In particular, many people will have the initial set of bin +and boot files around, so this saves time for at least one full +rebuild. Having sets of the form <base><k>.{bin,boot}.<arch>-unix for +<k>=1,2,…​ is normally not a good idea when invoking fixpt. However, +they might be the result of an earlier partial run of fixpt (which +perhaps got accidentally killed). In this case, fixpt will quickly +move through what exists before continuing where it left off earlier, +and, thus, saves a lot of time.

    +
    + +
    +
    +
    +
    +
    +
    [2000/03/10]
    +
    +
    +

    More assembly output problems involving the indexed addressing mode +on the x86 have been found and corrected. Thanks to Fermin Reig for the +fix.

    +
    +
    +

    The interface and implementation of the register allocator have been changed +slightly to accommodate the possibility to skip the register allocation +phases completely and go directly to memory allocation. This is needed +for C-- use.

    +
    + +
    +
    +
    +
    +
    +
    [2000/03/09]
    +
    +
    +
    +
    +
      +
    • +

      Complete re-organization of library names. Many libraries have been +consolidated so that they share the same path anchor. For example, +all MLRISC-related libraries are anchored at MLRISC, most libraries that +are SML/NJ-specific are under "smlnj". Notice that names like +host-cmb.cm or host-compiler.cm no longer exist. See system/README +for a complete description of the new naming scheme. Quick reference:

      +
      +
      +
      host-cmb.cm        -> smlnj/cmb.cm
      +host-compiler.cm   -> smlnj/compiler.cm
      +full-cm.cm         -> smlnj/cm.cm
      +<arch>-<os>.cm     -> smlnj/cmb/<arch>-<os>.cm
      +<arch>-compiler.cm -> smlnj/compiler/<arch>.cm
      +
      +
      +
    • +
    • +

      Bug fixes in CM.

      +
      +
        +
      • +

        exceptions in user code are being passed through (i.e., reach top level)

        +
      • +
      • +

        more bugs in paranoia mode fixed

        +
      • +
      • +

        bug related to checking group owners fixed

        +
      • +
      +
      +
    • +
    • +

      New install.sh script that automagically fetches archive files: +The new file config/srcarchiveurl must contain the URL of the +(remote) directory that contains bin files (or other source archives). +If install.sh does not find the archive locally, it tries to get +it from that remote directory. +This should simplify installation further: For machines that have +access to the internet, just fetch <version>-config.tgz, unpack it, +edit config/targets, and go (run config/install.sh). The script will +fetch everything else that it might need all by itself.

      +
      +

      For CVS users, this mechanism is not relevant for source archives, but +it is convenient for getting new sets of binfiles.

      +
      +
      +

      Archives should be tar files compressed with either gzip, compress, or +bzip2. The script recognizes .tgz, .tar, tar.gz, tz, .tar.Z, and .tar.bz2.

      +
      +
    • +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    [2000/03/07]
    +
    +
    +
      +
    • +

      size info in BOOTLIST

      +
      +
        +
      • +

        no fixed upper limits for number of bootfiles or length of +bootfile names in runtime

        +
      • +
      • +

        falling back to old behavior if no BOOTLIST size info found

        +
      • +
      +
      +
    • +
    • +

      allocation size heuristics in .run-sml

      +
      +
        +
      • +

        tries to read cache size from /proc/cpuinfo (this is important for +small-cache Celeron systems!)

        +
      • +
      +
      +
    • +
    • +

      install.sh robustified

      +
    • +
    • +

      CM manual updates

      +
    • +
    • +

      paranoid mode

      +
      +
        +
      • +

        no more CMB.deliver() (i.e., all done by CMB.make())

        +
      • +
      • +

        can re-use existing sml.boot.* files

        +
      • +
      • +

        init.cmi now treated as library

        +
      • +
      • +

        library stamps for consistency checks

        +
      • +
      +
      +
    • +
    • +

      sml.boot.<arch>-<os>/PIDMAP file

      +
      +
        +
      • +

        This file is read by the CM startup code. This is used to minimize +the amount of dynamic state that needs to be stowed away for the +purpose of sharing between interactive system and user code.

        +
      • +
      +
      +
    • +
    • +

      CM.Anchor.anchor instead of CM.Anchor.{set,cancel}

      +
      +
        +
      • +

        Upon request by Elsa. Anchors now controlled by get-set-pair +like most other CM state variables.

        +
      • +
      +
      +
    • +
    • +

      Compiler.CMSA eliminated

      +
      +
        +
      • +

        No longer supported by CM anyway.

        +
      • +
      +
      +
    • +
    • +

      fixed bugs in pickler that kept biting Stefan

      +
      +
        +
      • +

        past refs to past refs (was caused by the possibility that +ad-hoc sharing is more discriminating than hash-cons sharing)

        +
      • +
      • +

        integer overflow on LargeInt.minInt

        +
      • +
      +
      +
    • +
    • +

      ml-{lex,yacc} build scripts now use new mechanism +for building standalone programs

      +
    • +
    • +

      fixed several gcc -Wall warnings that were caused by missing header +files, missing initializations, etc., in runtime (not all warnings +eliminated, though)

      + +
    • +
    +
    +
    +
    +
    +
    +
    +
    + + + + + \ No newline at end of file diff --git a/doc/html/extensions.html b/doc/html/extensions.html new file mode 100644 index 0000000..6cbf8a0 --- /dev/null +++ b/doc/html/extensions.html @@ -0,0 +1,640 @@ + + + + + + + + +Standard ML of New Jersey Extensions + + + + + +
    +
    +
    +
    +

    The Standard ML of New Jersey (SML/NJ) system implements a number of extensions +to the language as defined by the Definition.

    +
    +
    +
    +
    +

    Module language extensions

    +
    +
    +

    Higher-order functors

    + +
    +
    +

    Structure constraints for signatures

    +
    +
    +
    structure ASTRUCT :> ASIG where B = BSTRUCT
    +
    +
    +
    +
    +
    +
    +

    Core language extensions

    +
    +
    +

    Vector expressions and patterns

    +
    +

    SML/NJ extends the expression and pattern matching syntax to include support +for vectors of some known size. The syntax is similar to that of list patterns, +except that the opening bracket has a preceeding # character. +For example,

    +
    +
    +
    +
      fun scaleV (s : real, #[x, y, z]) = #[s*x, s*y, s*z]
    +
    +
    +
    +

    This extension is always enabled in SML/NJ.

    +
    +
    +
    +

    Or patterns

    +
    +

    TO BE WRITTEN

    +
    +
    +
    +

    Lazy datatypes and functions

    +
    +

    SML/NJ provides support for lazy data structures with an extension to the datatype +and function declaration forms (val rec). There are a number of ways to enable +this feature, depending on the ??

    +
    +
    +
      +
    • +

      From the command-line, you can use the -Clazysml=true flag when launching +SML/NJ.

      +
    • +
    • +

      From the REPL, you can enable the feature using the SML command:

      +
      +
      +
        Control.lazysml := true;
      +
      +
      +
    • +
    • +

      In a CM, file you can specify that a source file uses the lazy keyword in a number +of different ways. The following three lines all have the effect of enabling the +lazy keyword during compilation of the file.

      +
    • +
    +
    +
    +
    +
      foo.sml : lazysml
    +  foo.sml : sml (lazy)
    +  foo.sml (with:parser.lazy-keyword=true)
    +
    +
    +
    +

    TO BE WRITTEN

    +
    +
    +
    +
      datatype lazy 'a stream = Nil | Cons of 'a * 'a stream;
    +
    +  fun lazy map f Nil =  Nil
    +         | map f (Cons(x,xs))  =  Cons(f x, map f xs);
    +
    +
    +
    +
    +

    Quotation/antiquotation

    +
    +

    TO BE WRITTEN

    +
    +
    +
    +
    +
    +

    Successor ML extensions

    +
    +
    +

    Successor ML (sML) is an effort to continue to grow and improve the SML language. +Andreas Rossberg has defined and implemented an some of the proposed features in +a version of HaMLet. We are beginning to +implement these features in SML/NJ in coordination with the MLton implementation.

    +
    +
    +

    Lexical extensions

    +
    +

    As of version 110.79, SML/NJ supports the sML lexical extensions. These can be +enabled using the command-line option -Cparser.succ-ml=true or the assignment

    +
    +
    +
    +
      Control.succML := true;
    +
    +
    +
    +

    in the REPL.

    +
    +
    +

    Numeric literals (SML/NJ 110.79)

    +
    +

    The syntax of numeric literals is extended in two ways. First, the underscore +character ("_") is now allowed as a separator between digits in a numeric +literal. For example,

    +
    +
    +
    +
      123_456
    +  0wxff_ff_ff_f3
    +  123_456.1
    +
    +
    +
    +

    are valid numeric literals under this extension, but the following are not:

    +
    +
    +
    +
      123._456		(* underscore not proceeded by digit *)
    +  0wx_ff_ff_ff_f3	(* leading underscore *)
    +
    +
    +
    +

    The second extension is binary literals for both integers and words. Similar to +hexidecimal literals, the syntax uses a leading "0b" to signal a binary +literal. Examples include

    +
    +
    +
    +
      0b0101_1110	(* the same value as 0x56 or 86 *)
    +  0wb1101	(* the same value as 0wD or 13 *)
    +
    +
    +
    +
    +

    Line comments (SML/NJ 110.79)

    +
    +

    This extension adds line comments (i.e., comments that are terminated by +the end of the line) to SML. These comments are denoted using the character +sequence "(*)". Line comments properly nest into conventional block +comments. For example, the following block comment is well formed:

    +
    +
    +
    +
      (*
    +  fun f x = x (*) my identity function *)
    +  *)
    +
    +
    +
    +
    +
    +
    +
    + + + \ No newline at end of file diff --git a/doc/html/install-notes/cygwin.html b/doc/html/install-notes/cygwin.html new file mode 100644 index 0000000..6678858 --- /dev/null +++ b/doc/html/install-notes/cygwin.html @@ -0,0 +1,584 @@ + + + + + + + +Installing SML/NJ under Cygwin + + + + + + +
    +
    +

    Introduction

    +
    +
    +

    These instructions are for installing Standard ML of New Jersey +(SML/NJ) in the Cygwin 32-bit environment.

    +
    +
    +

    Building SML/NJ as a Unix application

    +
    +

    It is recommended that you install SML/NJ in a volume that has +been mounted in binmode.

    +
    +
    +

    Otherwise, just following the standard instructions +for installing on Unix/Linux systems.

    +
    +
    +
    +

    Building SML/NJ as a Windows application

    +
    +

    It is possible to build SML/NJ using the Windows version of +the runtime system under Cygwin. To do so, you must set the +SMLNJ_WINDOWS_RUNTIME shell variable:

    +
    +
    +
    +
    % export SMLNJ_WINDOWS_RUNTIME
    +% SMLNJ_WINDOWS_RUNTIME=1
    +% config/install.sh
    +
    +
    +
    +
    +
    +
    + + + + + \ No newline at end of file diff --git a/doc/html/install-notes/install.html b/doc/html/install-notes/install.html new file mode 100644 index 0000000..f71cacf --- /dev/null +++ b/doc/html/install-notes/install.html @@ -0,0 +1,783 @@ + + + + + + + +Installing SML/NJ on Unix + + + + + + +
    +
    +

    Introduction

    +
    +
    +

    These instructions are for installing Standard ML of New Jersey +(SML/NJ) on Unix and Unix-like operating systems (including +Cygwin, Linux, and macOS) from source and the pre-compiled +bin files.

    +
    +
    +

    To install SML/NJ on macOS using the installer package, see +the macOS installation instructions.

    +
    +
    +

    To install SML/NJ on Windows using the installer, the +Windows installation instructions.

    +
    +
    +
    +
    +

    Basic installation

    +
    +
    +

    The standard Unix installation of SML/NJ is mostly automated via +a installation shell script and installer written in SML. +The only manual steps are downloading the installer, unbundling it, and +running the script.

    +
    +
    +
    +
    +
      +
    1. +

      Create a directory to do the installation and cd to that directory.

      +
    2. +
    3. +

      Then use either curl or wget to down load the +config.tgz file. +For example,

      +
      +
      +
      % wget https://smlnj.org/dist/working/110.99.5/config.tgz
      +
      +
      +
    4. +
    5. +

      Unbundle the config.tgz file.

      +
      +
      +
      % tar -xzf config.tgz
      +
      +
      +
    6. +
    7. +

      Optionally edit the config/targets file to add or remove installation +targets.

      +
    8. +
    9. +

      Run the install script. For some x86 Unix systems, there is a +choice of 32 vs. 64-bit installations, with the default being 64 bits. +For other systems, the installation is always 32-bit.

      +
      +
      +
      % config/install.sh options
      +
      +
      +
      +

      For systems that support 64-bit installations, the installer options are

      +
      +
      +
      +
      -default size
      +
      +

      specify the default size for the sml and other commands, where +size is either 32 or 64.

      +
      +
      -32
      +
      +

      install the 32-bit version of the system (see below for details).

      +
      +
      -64
      +
      +

      install the 64-bit version of the system (see below for details).

      +
      +
      +
      +
    10. +
    +
    +
    +
    +
    +
    +
    +

    Installing 64-bit SML/NJ

    +
    +
    +

    As of version 110.94, SML/NJ supports 64-bit installations on most +x86-based Unix systems. It is possible to have both the 32 and 64-bit +versions installed in the same place (see below +for details). The default installation for these systems is 64-bits +(as of version 110.98); thus, one can just run the installer command +without options:

    +
    +
    +
    +
    % config/install.sh
    +
    +
    +
    +
    +
    +

    Installing 32-bit SML/NJ

    +
    +
    +

    For non-x86 systems, the only installation option is 32-bits, which +is what you get by running the install command without options:

    +
    +
    +
    +
    % config/install.sh
    +
    +
    +
    +

    As noted above, the default on x86 systems that report their hardware +as “`x86_64`” is 64-bits. If you want to install a 32-bit version +instead, you can use the following command:

    +
    +
    +
    +
    % config/install.sh -default 32
    +
    +
    +
    +

    It is also possible to have both the 32 and 64-bit versions installed +in the same place (see below for details).

    +
    +
    +

    Troubleshooting the 32-bit install

    +
    +

    To install the 32-bit version on a 64-bit architecture, you must have +support for compiling and running 32-bit binaries. On macOS (prior +to Mojave), this support is standard, but most 64-bit Linux systems +are not configured with 32-bit support. +In such a case, you will get the message

    +
    +
    +
    +
    !!! SML/NJ requires support for 32-bit executables
    +
    +
    +
    +

    during the install process. +To fix this problem, you will need to acquire the 32-bit emulation +libraries for your particular Linux distribution.

    +
    +
    +

    For Debian (7.0 Wheezy and later) and recent version of Ubuntu, you +will need to enable multiarch support. +See https://wiki.ubuntu.com/MultiarchSpec for details, or try the +following commands:

    +
    +
    +
    +
    % dpkg --add-architecture i386
    +% apt-get update
    +% apt-get install libc6:i386
    +
    +
    +
    +

    You may also need to install the two following packages:

    +
    +
    +
    +
    % apt-get install gcc-multilib g++-multilib
    +
    +
    +
    +

    For RedHat Fedora (at least Fedora 16), you will need to install +glibc-devel.i686:

    +
    +
    +
    +
    % yum install glibc-devel.i686
    +
    +
    +
    +

    For Red Hat Enterprise Linux (or CentOS), you may also have to install +the rpm package libgcc-multilib, e.g.:

    +
    +
    +
    +
    % yum groupinstall "Development tools"
    +% yum install libgcc.i686
    +% yum install glibc-devel.i686
    +
    +
    +
    +

    Older instructions for RHEL and CentOS: download the libgcc-multilib +package from rpmseek.com and then run

    +
    +
    +
    +
    rpm -ivh libgcc-multilib-xxx.x86_64.rpm
    +
    +
    +
    +

    where the libgcc-multilib package is the one you downloaded. +For openSUSE use the YaST administration tool to install the +gcc-32bit package.

    +
    +
    +
    +
    +
    +

    Installing both 32-bit and 64-bit versions of SML/NJ

    +
    +
    +

    It is possible to install both versions in the same location by running +the install.sh script twice. For example, the commands

    +
    +
    +
    +
    % config/install.sh -32
    +% config/install.sh -default 64
    +
    +
    +
    +

    will install both versions with the 64-bit version as default. One +can then use the command sml -32 to run the 32-bit version of +the system. Note that the default version must be installed second.

    +
    +
    +
    +
    + + + + + \ No newline at end of file diff --git a/doc/html/install-notes/macos.html b/doc/html/install-notes/macos.html new file mode 100644 index 0000000..085befa --- /dev/null +++ b/doc/html/install-notes/macos.html @@ -0,0 +1,698 @@ + + + + + + + +Installing SML/NJ under macOS + + + + + + +
    +
    +

    Introduction

    +
    +
    +

    These instructions are for installing Standard ML of New Jersey +(SML/NJ) on macOS.

    +
    +
    +
    +
    +

    Using the installer package

    +
    +
    +

    The most direct way to install SML/NJ is to use one of the +provided installer packages:

    +
    +
    +
    +
    + +
    +
    +
    +
    +

    These packages put the installation in /usr/local/smlnj. If there is an +existing installation, then it will be updated. We recommend that you +add /usr/local/smlnj/bin to your shell’s PATH variable.

    +
    +
    +

    There is also a Homebrew cask that wraps the 64-bit installation +package. If you have homebrew installed, then you can run the following command +to install SML/NJ:

    +
    +
    +
    +
    % brew cask install smlnj
    +
    +
    +
    +

    If you have previously installed SML/NJ using Homebrew, then you will +need to use the command

    +
    +
    +
    +
    % brew cask upgrade smlnj
    +
    +
    +
    +
    +
    +

    Installing from source

    +
    +
    +

    It is also possible to install SML/NJ from source code and the pre-compiled +bin files. To do so, you will need a copy of Apple’s Xcode development +environment. Furthermore, you will need to install the command-line tools, +which you can do by running the command

    +
    +
    +
    +
    % xcode-select --install
    +
    +
    +
    +

    With the command-line tools installed, you can follow the standard +instructions for installing on Unix/Linux +systems.

    +
    +
    + + + + + +
    +
    Note
    +
    +
    +

    For macOS 10.13 High Sierra and earlier, the default install is 32-bits. +Starting with macOS 10.14 Mojave, the default install is 64 bits. +It is possible to install SML/NJ on Mojave, but it requires some +extra steps (see the instructions below).

    +
    +
    +
    +
    +
    +
    +

    Installing 32-bit SML/NJ on macOS 10.14 (Mojave)

    +
    +
    +

    Xcode 10.1 (and later) does not include the libraries needed to +build 32-bit executables, such as the SML/NJ runtime.

    +
    +
    +

    To support building on Mojave, there is a special makefile (mk.x86-darwin18) +for the runtime system and the config/install.sh uses this makefile +when necessary. This makefile expects that the MacOSX10.13.sdk directory +from Xcode 9 has been +copied into the Xcode 10 SDKs directory. Note that updating Xcode +from the AppStore will remove the 10.13 SDK, so you should keep +a copy in a safe place.

    +
    +
    +

    The Xcode SDKs live in Platforms/MacOSX.platform/Developer/SDKs +under the Developer directory. One can determine the path to the +current developer directory using the command

    +
    +
    +
    +
    % xcode-select -p
    +
    +
    +
    +
    +
    +

    Quarantine issues

    +
    +
    +

    Another issue that you may encounter when building from source +on macOs 10.14 Mojave is an error message for a shell +script of the form

    +
    +
    +
    +
      /bin/sh: bad interpreter: Operation not permitted
    +
    +
    +
    +

    This error arises because the shell script has the com.apple.quarantine +attribute set. To fix the problem, remove the attribute using the command

    +
    +
    +
    +
    % xattr -d com.apple.quarantine shell-script
    +
    +
    +
    +

    and resume the build.

    +
    +
    +
    +
    + + + + + \ No newline at end of file diff --git a/doc/html/install-notes/windows.html b/doc/html/install-notes/windows.html new file mode 100644 index 0000000..8437622 --- /dev/null +++ b/doc/html/install-notes/windows.html @@ -0,0 +1,656 @@ + + + + + + + +Installing SML/NJ under Windows + + + + + + +
    +
    +

    Introduction

    +
    +
    +

    These instructions are for installing Standard ML of New Jersey +(SML/NJ) as an application on Microsoft Windows. To use SML/NJ under the +cygwin environment, see the installation +instructions.

    +
    +
    +
    +
    +

    Using the installer package

    +
    +
    +

    We provide a Microsoft Installer package for SML/NJ +(https://smlnj.org/dist/working/110.99.5/smlnj-110.99.5.msi). +The installer contains a full installation including +nearly all of the optional components (it does not +include "asdl", "mlrisc-tools", or "nowhere").

    +
    +
    +

    You can choose the installation directory. The default +is C:\Program Files\SMLNJ. The bin directory containing +the sml command is added to the default PATH, although +you will need to re-open a command window to see +the binding. The SMLNJ_HOME environment variable is also +set to point at the location where SML/NJ was installed.

    +
    +
    +
    +
    +

    Using SML/NJ on Windows

    +
    +
    +

    Once you have SML/NJ installed, you can launch the interactive +top-level loop by selecting the application in the Start Menu. +Alternatively, you can open a shell window and running the sml +command.

    +
    +
    +
    +
    +

    Building from sources

    +
    +
    +

    To build SML/NJ from the runtime sources and precompiled "bin" +files requires a Unix shell (we use Cygwin) +to fetch and unbundle the files. In addition, you will need to +have a version of Microsoft’s +Visual Studio +available.

    +
    +
    +

    To build the Windows installation, use the following steps. +We have marked those steps that must be done in a Unix shell +with a "[UNIX]" prefix and those that must be done in the +Visual Studio shell with "[VS]".

    +
    +
    +
    +
    +
      +
    1. +

      Create an installation directory for SML/NJ.

      +
    2. +
    3. +

      [UNIX] Use curl or wget to download the +config.tgz file.

      +
      +
      +
      % curl -O {dist-dir}/config.tgz
      +
      +
      +
    4. +
    5. +

      [UNIX] Unbundle the config.tgz file.

      +
      +
      +
      % tar -xzf config.tgz
      +
      +
      +
    6. +
    7. +

      [UNIX] Run the prepare-win-install.sh script to download and unbundle the +necessary source files. This script those files used in the standard +installation on Windows.

      +
      +
      +
      % config/prepare-win-install.sh
      +
      +
      +
    8. +
    9. +

      [VS] Run the SML/NJ installation script for Windows

      +
      +
      +
      config\install.bat
      +
      +
      +
    10. +
    +
    +
    +
    +
    +
    +
    + + + + + \ No newline at end of file diff --git a/doc/html/man/man1/asdlgen.1.html b/doc/html/man/man1/asdlgen.1.html new file mode 100644 index 0000000..b51de24 --- /dev/null +++ b/doc/html/man/man1/asdlgen.1.html @@ -0,0 +1,605 @@ + + + + + + + +ASDLGEN(1) + + + + + +
    +
    +

    SYNOPSIS

    +
    +
    +

    asdlgen COMMAND [ OPTIONS ] FILE …​

    +
    +
    +

    where COMMAND is one of the following:

    +
    +
    +
    +
    help
    +
    +

    prints information about the available options to the standard output.

    +
    +
    version
    +
    +

    prints the version of asdlgen to the standard output.

    +
    +
    c++ or cxx
    +
    +

    generate C++ code.

    +
    +
    sml
    +
    +

    generate SML code.

    +
    +
    check
    +
    +

    check the correctness of the input specifications, but do not generate +output.

    +
    +
    +
    +
    +
    +
    +

    DESCRIPTION

    +
    +
    +

    Asdlgen reads the specified of files, which should +contain ASDL module and view declarations, and produces pickling +and unpickling code.

    +
    +
    +
    +
    +

    OPTIONS

    +
    +
    +

    COMMON OPTIONS

    +
    +
    +
    -n
    +
    +

    Do not write any output files. Instead write the list of files that +would have been written to the standard output.

    +
    +
    -d DIR, --output-directory=DIR
    +
    +

    Specify the output directory to place the generated files. +By default the output will be placed in the same directory +as the input file from which it was produced.

    +
    +
    --gen=NAMES
    +
    +

    Specifies the components to generate, where NAMES is a comma-separated +list of names taken from the following:

    +
    +
      +
    • +

      types — generate the type definitions from the ASDL specification.

      +
    • +
    • +

      memory — generate the memory pickler

      +
    • +
    • +

      file — generate the file pickler

      +
    • +
    • +

      sexp — generate the S-Expression pickler (SML only).

      +
    • +
    +
    +
    +
    +
    +
    +
    +

    C++ OPTIONS

    +
    +
    +
    --base-include=FILE
    +
    +

    Specify the name of the C++ header file that defines the primitive ASDL types and functions. The default value is asdl/asdl.hxx.

    +
    +
    +
    +
    +
    +

    SML OPTIONS

    +
    +
    +
    --cm=FILE
    +
    +

    Generate a CM file for the pickler; this will define a CM library. +Note that if the ASDL specification includes primitive modules, these +will be included in the list of exported structures, but the supporting +source files will have to be added to the CM file by hand.

    +
    +
    --mlb=FILE
    +
    +

    Generate an MLB file for the pickler. Note that if the ASDL specification +includes primitive modules, these will be included in the list of +exported structures, but the supporting source files will have to be +added to the MLB file by hand.

    +
    +
    +
    +
    +
    +
    +
    +

    AUTHOR

    +
    +
    +

    The original version of asdlgen was written by Dan Wang as part of +the National Compiler Infrastructure Project at Princeton University. +This version of the tool was implemented by John Reppy.

    +
    +
    +
    +
    +

    SEE-ALSO

    +
    +
    +

    ASDL Reference Manual (included in the SML/NJ documentation).

    +
    +
    +
    +
    +

    COPYING

    +
    +
    +

    Copyright © 2020 The Fellowship of SML/NJ

    +
    +
    +

    This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

    +
    +
    +
    +
    + + + \ No newline at end of file diff --git a/doc/html/man/man1/heap2exec.1.html b/doc/html/man/man1/heap2exec.1.html new file mode 100644 index 0000000..6181208 --- /dev/null +++ b/doc/html/man/man1/heap2exec.1.html @@ -0,0 +1,544 @@ + + + + + + + +HEAP2EXEC(1) + + + + + +
    +
    +

    SYNOPSIS

    +
    +
    +

    heap2exec [ -32 | -64 ] [ LINKMODE ] file

    +
    +
    +
    +
    +

    DESCRIPTION

    +
    +
    +

    Under normal usage, SML/NJ represents a program as a heap-image file +with a name of the form foo.arch-opsys +Heap2exec generates a standalone executable from a heap image. +You have the option to specify a preferred linking mode.

    +
    +
    +

    The default is to link statically on FreeBSD and Linux.

    +
    +
    +

    Note that heap2exec is an optional component of the SML/NJ +installation process that is not included by default. Edit the +config/targets file before installation to include it.

    +
    +
    +
    +
    +

    OPTIONS

    +
    +
    +
    +
    -32
    +
    +

    run the 32-bit version of the program (currently the default).

    +
    +
    -64
    +
    +

    run the 64-bit version of the program. This option only applies to +the x86-64 (aka amd64) architecture.

    +
    +
    -static, -linkwith-a
    +
    +

    statically link the program with the runtime system. Static linking +is the default on Linux and FreeBSD.

    +
    +
    -dynamic, -linkwith-so
    +
    +

    dynamically link the program with the runtime system.

    +
    +
    +
    +
    +
    +
    +

    AUTHOR

    +
    +
    +

    Heap2exec was written by Matthias Blume.

    +
    +
    +
    +
    +

    SEE-ALSO

    +
    +
    +

    sml(1)

    +
    +
    +
    +
    +

    BUGS

    +
    +
    +

    Heap2exec is limited to the x86 and amd64 architectures and to the +macOS, Linux, and FreeBSD operating systems.

    +
    +
    +
    +
    +

    COPYING

    +
    +
    +

    Copyright © 2020 The Fellowship of SML/NJ

    +
    +
    +

    This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

    +
    +
    +
    +
    + + + \ No newline at end of file diff --git a/doc/html/man/man1/ml-antlr.1.html b/doc/html/man/man1/ml-antlr.1.html new file mode 100644 index 0000000..8e69e57 --- /dev/null +++ b/doc/html/man/man1/ml-antlr.1.html @@ -0,0 +1,529 @@ + + + + + + + +ML-ANTLR(1) + + + + + +
    +
    +

    SYNOPSIS

    +
    +
    +

    ml-antlr [OPTIONS] file

    +
    +
    +
    +
    +

    DESCRIPTION

    +
    +
    +

    ML-Antlr is an LL(k) parser generator for Standard ML that is loosely modeled +on Terence Parr’s ANTLR parser generator.

    +
    +
    +
    +
    +

    OPTIONS

    +
    +
    +
    +
    -32
    +
    +

    run the 32-bit version of the program (currently the default).

    +
    +
    -64
    +
    +

    run the 64-bit version of the program. This option only applies to +the x86-64 (aka amd64) architecture.

    +
    +
    --dot
    +
    +

    generate DOT output (http://www.graphviz.org). The generated file will be named file.dot, +where file is the input file.

    +
    +
    --latex
    +
    +

    generate a simple LaTeX version of the grammar, named file.tex.

    +
    +
    --unit-actions
    +
    +

    ignore the action code in the grammar, and instead return unit for every production.

    +
    +
    +
    +
    +
    +
    +

    AUTHOR

    +
    +
    +

    ML-Antlr was written by Aaron Turon.

    +
    +
    +
    +
    +

    SEE-ALSO

    +
    +
    +

    ml-ulex(1), ml-yacc(1)

    +
    +
    +
    +
    +

    COPYING

    +
    +
    +

    Copyright © 2020 The Fellowship of SML/NJ

    +
    +
    +

    This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

    +
    +
    +
    +
    + + + \ No newline at end of file diff --git a/doc/html/man/man1/ml-build.1.html b/doc/html/man/man1/ml-build.1.html new file mode 100644 index 0000000..9f87923 --- /dev/null +++ b/doc/html/man/man1/ml-build.1.html @@ -0,0 +1,549 @@ + + + + + + + +ML-BUILD(1) + + + + + +
    +
    +

    SYNOPSIS

    +
    +
    +

    ml-build [OPTIONS] group.cm [main [heap-image]]

    +
    +
    +
    +
    +

    DESCRIPTION

    +
    +
    +

    ML-Build is a command-line tool for building applications from SML source +files using the Compilation Manager and SML/NJ compiler.

    +
    +
    +
    +
    +

    OPTIONS

    +
    +
    +
    +
    -Cctl=value
    +
    +

    set the control ctl to value.

    +
    +
    -Dname=value
    +
    +

    define the CM variable name to have the given value.

    +
    +
    -D'name
    +
    +

    define the CM variable name to have the value 1.

    +
    +
    -Uname
    +
    +

    remove any definition of the CM variable name.

    +
    +
    -S setup.cm
    +
    +

    load and execute the code specified by the CM file setup.cm prior to the main +build process. This option allows one to customize the compiler via side-effect.

    +
    +
    +
    +
    +
    +
    +

    EXAMPLES

    +
    +
    +
    +
    ml-build foo.cm
    +
    +

    builds the program foo assuming a main function named Test.main.

    +
    +
    ml-build foo.cm Main.main
    +
    +

    builds the program foo with main function named Main.main.

    +
    +
    ml-build sources.cm Main.main prog
    +
    +

    builds the program prog with main function named Main.main.

    +
    +
    +
    +
    +
    +
    +

    AUTHOR

    +
    +
    +

    ML-Build was written by Matthias Blume.

    +
    +
    +
    +
    +

    SEE-ALSO

    +
    +
    +

    ml-makedepend(1), sml(1), smlnj(7)

    +
    +
    +
    +
    +

    COPYING

    +
    +
    +

    Copyright © 2020 The Fellowship of SML/NJ

    +
    +
    +

    This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

    +
    +
    +
    +
    + + + \ No newline at end of file diff --git a/doc/html/man/man1/ml-burg.1.html b/doc/html/man/man1/ml-burg.1.html new file mode 100644 index 0000000..6e90c10 --- /dev/null +++ b/doc/html/man/man1/ml-burg.1.html @@ -0,0 +1,508 @@ + + + + + + + +ML-BURG(1) + + + + + +
    +
    +

    SYNOPSIS

    +
    +
    +

    ml-burg file

    +
    +
    +
    +
    +

    DESCRIPTION

    +
    +
    +

    The ml-burg program generates a Standard ML program to perform +bottom-up rewriting of an input tree. Cost information associated +with each rewrite rule is used to derive the minimum rewrite cost +for the entire tree. A successful reduction corresponds to rewriting +the input tree to a special non-terminal symbol called the +start non-terminal. Upon successful reduction, facilities are +provided to walk the tree emitting semantic actions corresponding to +the rules that matched.

    +
    +
    +
    +
    +

    AUTHOR

    +
    +
    +

    ML-Burg was written by Lal George and Florent Guillaume.

    +
    +
    +
    +
    +

    SEE-ALSO

    +
    +
    +

    sml(1)

    +
    +
    +

    ML-Burg — Documentation by Florent Guillaume and Lal George (included +in the SML/NJ documentation).

    +
    +
    +
    +
    +

    COPYING

    +
    +
    +

    Copyright © 2020 The Fellowship of SML/NJ

    +
    +
    +

    This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

    +
    +
    +
    +
    + + + \ No newline at end of file diff --git a/doc/html/man/man1/ml-lex.1.html b/doc/html/man/man1/ml-lex.1.html new file mode 100644 index 0000000..c5bd090 --- /dev/null +++ b/doc/html/man/man1/ml-lex.1.html @@ -0,0 +1,505 @@ + + + + + + + +ML-LEX(1) + + + + + +
    +
    +

    SYNOPSIS

    +
    +
    +

    ml-lex file

    +
    +
    +
    +
    +

    DESCRIPTION

    +
    +
    +

    ML-Lex is a scanner generator for Standard ML.

    +
    +
    +

    This software is deprecated; we recommend using the ml-ulex(1) tool +in ml-lex-compatibility mode instead.

    +
    +
    +
    +
    +

    AUTHOR

    +
    +
    +

    ml-lex(1) was written by James S. Mattson and David Tarditi.

    +
    +
    +
    +
    +

    SEE-ALSO

    +
    +
    +

    ml-ulex(1)

    +
    +
    +

    ML-Lex User’s Manual +(also included in the SML/NJ documentation).

    +
    +
    +
    +
    +

    COPYING

    +
    +
    +

    Copyright © 2020 The Fellowship of SML/NJ

    +
    +
    +

    This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

    +
    +
    +
    +
    + + + \ No newline at end of file diff --git a/doc/html/man/man1/ml-makedepend.1.html b/doc/html/man/man1/ml-makedepend.1.html new file mode 100644 index 0000000..196cadc --- /dev/null +++ b/doc/html/man/man1/ml-makedepend.1.html @@ -0,0 +1,545 @@ + + + + + + + +ML-MAKEDEPEND(1) + + + + + +
    +
    +

    SYNOPSIS

    +
    +
    +

    ml-makedepend [OPTIONS] project.cm target

    +
    +
    +
    +
    +

    DESCRIPTION

    +
    +
    +

    ML-Makedepend is a tool for generating dependency information to allow the +Unix make(1) program to be used to build SML/NJ programs.

    +
    +
    +
    +
    +

    OPTIONS

    +
    +
    +
    +
    -f makefile
    +
    +

    Specify the name of the makefile to which the dependency information is appended. If this +option is not given, then the output is appended to the end of the file makefile +(or Makefile). It is an error if neither file exists and the -f option is not specified.

    +
    +
    -a arch
    +
    +

    specifies the name a make variable for architecture-specific filenames and paths +(the default is ARCH).

    +
    +
    -o os
    +
    +

    specifies the name a make variable for operating-system-specific filenames and paths +(the default is OPSYS).

    +
    +
    -n
    +
    +

    generates full filenames and paths with any architecture or operating-system-specific +parts expanded out to their definition for the host system. If this option is specified, +then any -a and -o options are ignored.

    +
    +
    -Cctl=value
    +
    +

    set the control ctl to value.

    +
    +
    -Dname=value
    +
    +

    define the CM variable name to have the given value.

    +
    +
    -D'name
    +
    +

    define the CM variable name to have the value 1.

    +
    +
    -Uname
    +
    +

    remove any definition of the CM variable name.

    +
    +
    +
    +
    +
    +
    +

    AUTHOR

    +
    +
    +

    ML-Makedepend was written by Matthias Blume.

    +
    +
    +
    +
    +

    SEE-ALSO

    +
    +
    +

    ml-build(1), sml(1), smlnj(7)

    +
    +
    +
    +
    +

    COPYING

    +
    +
    +

    Copyright © 2020 The Fellowship of SML/NJ

    +
    +
    +

    This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

    +
    +
    +
    +
    + + + \ No newline at end of file diff --git a/doc/html/man/man1/ml-nlffigen.1.html b/doc/html/man/man1/ml-nlffigen.1.html new file mode 100644 index 0000000..f9fd29b --- /dev/null +++ b/doc/html/man/man1/ml-nlffigen.1.html @@ -0,0 +1,671 @@ + + + + + + + +ML-NLFFIGEN(1) + + + + + +
    +
    +

    SYNOPSIS

    +
    +
    +

    ml-nlffigen [OPTIONS] file

    +
    +
    +
    +
    +

    DESCRIPTION

    +
    +
    +

    ML-Nlffigen is a tool for generating glue code from C language +header files. The generator reads C source code and emits SML +code along with a description file for the compilation manager (CM).

    +
    +
    +
    +
    +

    OPTIONS

    +
    +
    +
    +
    -d, -dir DIR
    +
    +

    Specify the output directory where all generated files are +placed (default NLFFI-Generated).

    +
    +
    -allSU
    +
    +

    instructs ml-nlffigen to include all structs and unions, +even those that are defined in included files, as opposed +to files explicitly listed as arguments.

    +
    +
    -width WID
    +
    +

    set output line width (just a guess) to WID (default 75).

    +
    +
    -smloption OPT
    +
    +

    instructs ml-nlffigen to add OPT to the list +of options to annotate .sml entries in the generated .cm +file with. By default, the list consists just of "noguid."

    +
    +
    -guid
    +
    +

    Removes the default "noguid" from the list of sml options. +This option re-enables strict handling of type- and object-identity +but can have negative impact on CM cutoff recompilation +performance if the programmer routinely removes the entire +tree of ml-nlffigen-generated files during development.

    +
    +
    -t, -target TGT
    +
    +

    Sets the target to TGT (which must be one of "sparc-unix", +"x86-unix", or "x86-win32"). The default is the host architecture.

    +
    +
    -l, -light
    +
    +

    suppress "heavy" versions of function wrappers and +field accessors; also cancels any earlier -heavy option.

    +
    +
    -h, -heavy
    +
    +

    suppress "light" versions of function wrappers and +field accessors; also cancels any earlier -light option.

    +
    +
    -na, -namedargs
    +
    +

    instructs ml-nlffigen to generated function wrappers that +use named arguments (SML records) instead of tuples if +there is enough information for this in the C source.

    +
    +
    -nocollect
    +
    +

    Do not collect enum constants from truly unnamed enumerations +(those without tags that occur at toplevel or in an unnamed +context; i.e., not in a typedef or another named struct +or union) into a single artificial enumeration tagged by ' +(single apostrohe). The corresponding SML-side representative +will be a structure named E_'.

    +
    +
    -ec, -enum-constructors
    +
    +

    When possible (i.e., if all values of a given enumeration +are different from each other), make the ML representation +type of the enumeration a datatype. The default (and +fallback) is to make that type the same as MLRep.Signed.int.

    +
    +
    -lh -libhandle H
    +
    +

    Use the variable H to refer to the handle to the +shared library object. Given the constraints of CM, the +argument H must have the form of a long SML identifier; +e.g., MyLibrary.libhandle (default Library.libh).

    +
    +
    -add, -include file
    +
    +

    Include file in the generated .cm file. This option +is necessary at least once for providing the library handle. +It can be used arbitrarily many times, resulting in more +than one such programmer-supplied file to be mentioned. +If file is a relative path, then it must be relative to +the directory specified in the -dir option.

    +
    +
    -cm -cmfile file
    +
    +

    Specifies the name of the generated .cm file, relative to the directory +specified by the -dir option (default nlffi-generated.cm).

    +
    +
    -cppopt opt
    +
    +

    The string opt gets added to the list of options to be +passed to the C preprocessor. The list of options +gets substituted for %o in the cpp command line template.

    +
    +
    -Ux
    +
    +

    The option (i.e., -Ux) is added to the list of cpp options.

    +
    +
    -Dx
    +
    +

    The option (i.e., -Dx) is added to the list of cpp options.

    +
    +
    -Ix
    +
    +

    The option (i.e., -Ix) is added to the list of cpp options.

    +
    +
    -version
    +
    +

    Print the version number of ml-nlffigen to standard output and then quit.

    +
    +
    -m, -match RE
    +
    +

    Normally ml-nlffigen will include ML definitions for a C +declaration if the C declaration textually appears in +one of the files specified at the command line. Definitions +in included files will normally not appear (unless +their absence would lead to inconsistencies). +By specifying -match RE, ml-nlffigen will also include +definitions that occur in recursively included files +for which the AWK-style regular expression RE matches +their names.

    +
    +
    -p, -prefix P
    +
    +

    Generated SML structure names will all have the prefix P +(in addition to the usual "S_" or "U_" or "F_").

    +
    +
    -g, -gensym G
    +
    +

    Names generated by ml-nlffigen (for anonymous struct/union/enums) +will get _G as an additional suffix. This option should +be used if output from several indepdendent runs of +ml-nlffigen are to coexist in the same ML program.

    +
    +
    --
    +
    +

    Terminate processing of options, remaining arguments are +taken to be C sources.

    +
    +
    +
    +
    +
    +
    +

    ENVIRONMENT

    +
    +
    +

    ML-Nlffigen looks at the environment variable FFIGEN_CPP to obtain +the template string for the C-Preprocessor command line. If FFIGEN_CPP is not +set, the template defaults to "gcc -E -U__GNUC__ %o %s > %t." +The actual command line is obtained by substituting occurences of +%s with the name of the source, and %t with the name of a temporary +file holding the pre-processed code.

    +
    +
    +
    +
    +

    AUTHOR

    +
    +
    +

    ML-Nlffigen was written by Matthias Blume.

    +
    +
    +
    +
    +

    SEE-ALSO

    +
    +
    +

    sml(1)

    +
    +
    +

    NLFFI — A new SML/NJ Foreign-Function Interface (User Manual) (included in +the SML/NJ documentation).

    +
    +
    +
    +
    +

    BUGS

    +
    +
    +

    ML-Nlffigen does not yet support 64-bit platforms.

    +
    +
    +
    +
    +

    COPYING

    +
    +
    +

    Copyright (C) 2020 The Fellowship of SML/NJ

    +
    +
    +

    This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

    +
    +
    +
    +
    + + + \ No newline at end of file diff --git a/doc/html/man/man1/ml-ulex.1.html b/doc/html/man/man1/ml-ulex.1.html new file mode 100644 index 0000000..e078177 --- /dev/null +++ b/doc/html/man/man1/ml-ulex.1.html @@ -0,0 +1,555 @@ + + + + + + + +ML-ULEX(1) + + + + + +
    +
    +

    SYNOPSIS

    +
    +
    +

    ml-ulex [OPTIONS] file

    +
    +
    +
    +
    +

    DESCRIPTION

    +
    +
    +

    ML-Ulex is a lexer generator that supports Unicode characters (in UTF-8 representation) and +an extended form of regular expressions.

    +
    +
    +
    +
    +

    OPTIONS

    +
    +
    +
    +
    -32
    +
    +

    run the 32-bit version of the program (currently the default).

    +
    +
    -64
    +
    +

    run the 64-bit version of the program. This option only applies to +the x86-64 (aka amd64) architecture.

    +
    +
    --dot
    +
    +

    generate DOT output (http://www.graphviz.org). The generated file will be named file.dot, +where file is the input file.

    +
    +
    --match
    +
    +

    enter interactive matching mode. This will allow interactive testing of the machine; presently, +only the INITIAL start state is available for testing +(see the User Guide for details on start states).

    +
    +
    --ml-lex-mode
    +
    +

    operate in ml-lex compatibility mode.

    +
    +
    --table-based
    +
    +

    generate a table-based lexer.

    +
    +
    --fn-based
    +
    +

    generate a lexer that represents states as functions and transitions as tail calls.

    +
    +
    --minimize
    +
    +

    generate a minimal machine. Note that this is slow, and is almost never necessary.

    +
    +
    --strict-sml
    +
    +

    generate strict SML (i.e., do not use SML/NJ extensions). This flag +is useful if you want to use the output with a different SML system.

    +
    +
    +
    +
    +
    +
    +

    AUTHOR

    +
    +
    +

    ML-Ulex was written by Aaron Turon.

    +
    +
    +
    +
    +

    SEE-ALSO

    +
    +
    +

    ml-antlr(1), ml-yacc(1)

    +
    + +
    +

    Regular-expression derivatives reexamined by Scott Owens, John Reppy, and Aaron Turon. +Journal of Functional Programming, 19(2):173-190, 2009.

    +
    +
    +
    +
    +

    COPYING

    +
    +
    +

    Copyright © 2020 The Fellowship of SML/NJ

    +
    +
    +

    This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

    +
    +
    +
    +
    + + + \ No newline at end of file diff --git a/doc/html/man/man1/ml-yacc.1.html b/doc/html/man/man1/ml-yacc.1.html new file mode 100644 index 0000000..32bde73 --- /dev/null +++ b/doc/html/man/man1/ml-yacc.1.html @@ -0,0 +1,502 @@ + + + + + + + +ML-YACC(1) + + + + + +
    +
    +

    SYNOPSIS

    +
    +
    +

    ml-yacc file

    +
    +
    +
    +
    +

    DESCRIPTION

    +
    +
    +

    ML-Yacc is an LALR(k) parser generator for Standard ML that is modeled +on the YACC parser generator for the C language.

    +
    +
    +
    +
    +

    AUTHOR

    +
    +
    +

    ml-yacc(1) was written by David Tarditi.

    +
    +
    +
    +
    +

    SEE-ALSO

    +
    +
    +

    ml-antlr(1), ml-lex(1), ml-ulex(1)

    +
    +
    +

    ML-Yacc User’s Manual +(also included in the SML/NJ documentation).

    +
    +
    +
    +
    +

    COPYING

    +
    +
    +

    Copyright © 2020 The Fellowship of SML/NJ

    +
    +
    +

    This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

    +
    +
    +
    +
    + + + \ No newline at end of file diff --git a/doc/html/man/man1/sml.1.html b/doc/html/man/man1/sml.1.html new file mode 100644 index 0000000..f3f1c51 --- /dev/null +++ b/doc/html/man/man1/sml.1.html @@ -0,0 +1,642 @@ + + + + + + + +SML(1) + + + + + +
    +
    +

    SYNOPSIS

    +
    +
    +

    sml [SML-OPTIONS] [OPTIONS] [FILES]

    +
    +
    +
    +
    +

    DESCRIPTION

    +
    +
    +

    Standard ML of New Jersey is an incremental, interactive compiler that +accepts SML declarations and expressions to be evaluated from standard input.

    +
    +
    +
    +
    +

    OPTIONS

    +
    +
    +
    +
    -32
    +
    +

    run the 32-bit version of the SML/NJ system (currently the default).

    +
    +
    -64
    +
    +

    run the 64-bit version of the SML/NJ system. This option only applies to +the x86-64 (aka amd64) architecture.

    +
    +
    @SMLwordsize
    +
    +

    echo the wordsize (i.e., either 32 or 64) of the system to standard +output and then exit. (This option was added in version 110.97).

    +
    +
    @SMLversion
    +
    +

    echo the command name and SML/NJ version (e.g., "sml 110.99.5") +to standard output and then exit.

    +
    +
    @SMLload=IMAGE
    +
    +

    specifies the name of the heap-image file to load. The heap suffix can be omitted +as long as there there is not a file of the same name.

    +
    +
    @SMLcmdname=NAME
    +
    +

    set the command name; this is the value returned by CommandLine.name().

    +
    +
    @SMLsuffix
    +
    +

    echo the heap suffix for the system to standard output and then exit

    +
    +
    @SMLalloc=SIZE
    +
    +

    Specify the prefered size of the allocation area

    +
    +
    @SMLrun=RUNTIME
    +
    +

    specifies runtime system

    +
    +
    @SMLquiet
    +
    +

    load heap image silently (default)

    +
    +
    @SMLverbose
    +
    +

    show heap image load progress

    +
    +
    @SMLobjects
    +
    +

    show list of executable objects

    +
    +
    @SMLdebug=FILE
    +
    +

    write debugging info to file

    +
    +
    -Cctl=value
    +
    +

    set the control ctl to value.

    +
    +
    -Dname=value
    +
    +

    define the CM variable name to have the given value.

    +
    +
    -D'name
    +
    +

    define the CM variable name to have the value 1.

    +
    +
    -Uname
    +
    +

    remove any definition of the CM variable name.

    +
    +
    -H
    +
    +

    produce complete help listing

    +
    +
    -h
    +
    +

    produce minimal help listing

    +
    +
    -hlevel
    +
    +

    help with obscurity limit

    +
    +
    -S
    +
    +

    list all the controls along with their default value

    +
    +
    -slevel
    +
    +

    print a limited list of settings. +The number of levels of the control heirarchy is controled by level, with 0 printing +just the root of the heirarchy.

    +
    +
    -E
    +
    +

    list all the controls along with their corresponding environment variables

    +
    +
    -elevel
    +
    +

    print a limited list of environment variables; +The number of levels of the control heirarchy is controled by level, with 0 printing +just the root of the heirarchy.

    +
    +
    -m
    +
    +

    switch to CM.make

    +
    +
    -a
    +
    +

    switch to CM.autoload; default

    +
    +
    file.cm
    +
    +

    run CM.make or CM.autoload on file.cm and then return to the SML prompt. +Which command is run is determined by the -m and -a flags.

    +
    +
    file.sig
    +
    +

    execute use file.sig before entering the SML top-level loop.

    +
    +
    file.sml
    +
    +

    (use) +execute use file.sml before entering the SML top-level loop.

    +
    +
    file.fun
    +
    +

    (use) +execute use file.fun before entering the SML top-level loop.

    +
    +
    +
    +
    +
    +
    +

    AUTHOR

    +
    +
    +

    sml(1) was originally created by Andrew Appel and David MacQueen in 1987. +The system is currently maintained by Matthias Blume, David MacQueen, and John Reppy.

    +
    +
    +
    +
    +

    SEE-ALSO

    +
    +
    +

    smlnj(7)

    +
    + +
    +
    +
    +

    COPYING

    +
    +
    +

    Copyright © 2020 The Fellowship of SML/NJ

    +
    +
    +

    This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

    +
    +
    +
    +
    + + + \ No newline at end of file diff --git a/doc/html/man/man7/smlnj.7.html b/doc/html/man/man7/smlnj.7.html new file mode 100644 index 0000000..86f1067 --- /dev/null +++ b/doc/html/man/man7/smlnj.7.html @@ -0,0 +1,535 @@ + + + + + + + +SMLNJ(7) + + + + + +
    +
    +

    DESCRIPTION

    +
    +
    +

    Compiled programs in the SML/NJ system are represented using a pair of the +runtime system executable and a heap image file. Heap images have a filename +suffix that is based on the architecture and operating system.

    +
    +
    +
    +
    +

    AUTHOR

    +
    +
    +

    The Standard ML of New Jersey system was originally created by Andrew Appel +and David MacQueen in 1987, and is currently supported by Matthias Blume, +David MacQueen, and John Reppy.

    +
    +
    +

    Many people have contributed to SML/NJ over the 30+ years since the +project was started. These include (but are not limited to) +William Aitken +Lars Bergstrom, +Matthias Blume, +Pierre Cregut, +Adam T. Dingle, +Damien Doligez, +Scott Draves, +Bruce F. Duba +Emden Gansner, +Lal George, +Georges Gonthier, +Yngvi Guttesen, +Lorenz Huelsbergen, +Trevor Jim, +George Kuan, +Christopher League, +Mark Leone, +Allen Leung, +Stefan Monnier, +Greg Morrisett, +Riccardo Pucella, +Mike Rainey, +Norman Ramsey, +Jon Riehl, +John Reppy, +Gene Rollins, +Nick Rothwell, +Bratin Saha, +Zhong Shao, +Konrad Slind, +David Tarditti, +Andrew Tolmach, +Valery Trifonov, +Aaron Turon, +and Peter Weinberger.

    +
    +
    +
    +
    +

    SEE-ALSO

    + +
    +
    +

    COPYING

    +
    +
    +

    Copyright © 2020 The Fellowship of SML/NJ

    +
    +
    +

    This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

    +
    +
    +
    +
    + + + \ No newline at end of file diff --git a/doc/html/readme/110.27-README.html b/doc/html/readme/110.27-README.html new file mode 100644 index 0000000..9b33e8a --- /dev/null +++ b/doc/html/readme/110.27-README.html @@ -0,0 +1,696 @@ + + + + SML/NJ Version 110.25 NEWS + + + +

    Standard ML of New Jersey
    + Version 110.27, April 10, 2000

    +
    + +
    + http://cm.bell-labs.com/cm/cs/what/smlnj/index.html +
    + +
    +
    +

    Warning

    +
    +
    +
    + + This version is intended for compiler hackers. + We are in the midst of substantial structural changes, + and this is a snapshot. + +
    +
    + +
    + +

    Summary:

    +
      +
    • This version has some minor tweeks to FLINT (after the major merge + in 110.26). Work continues on tuning FLINT and the various optimizations + it implements. +

      +

    • CM has been revised extensively, and the modmap environment mechanism + supporting stubbified pickles has been reworked completely. The pathconfig + file has been simplified. Installation scripts have been further + modified. See src/system/README and the latest version of the + CM manual at +
      + + +
      + for further information about these changes. +

      +

    • MRISC, and particularly the x86 back end have been modified extensively. +

      +

    • There are a few updates to the SML/NJ Library +

      +

    • Reported bug fixes:
      + 1556. (jhr) signal race condition
      + Some CM bugs (not recorded) +

      +

    • Distribution file names have been simplified. They no longer start + with the version number (e.g. "110.27-config.tar.gz" is now + simply "config.tar.gz"). The boot directory tarballs are now + "boot.alpha32-unix.tar.gz", etc. (i.e. no version number and the + "sml." prefix is dropped). The new install script will restore + the usual name (e.g. "sml.boot.alpha32-unix" when the tarball is + unpacked. [We dropped the initial "sml." for the boot tarballs to + get the file names under 28 characters because of a limitation of + the Bell Labs ftp server.] + The version README file is still named 110.27-README, however. +
      +
      +     110.27-README
      +     HISTORY
      +     MLRISC.tar.gz
      +     boot.alpha32-unix.tar.gz
      +     boot.hppa-unix.tar.gz
      +     boot.ppc-unix.tar.gz
      +     boot.sparc-unix.tar.gz
      +     boot.x86-unix.tar.gz
      +     cm.tar.gz
      +     compiler.tar.gz
      +     config.tar.gz
      +     ml-burg.tar.gz
      +     ml-lex.tar.gz
      +     ml-yacc.tar.gz
      +     runtime.tar.gz
      +     smlnj-lib.tar.gz
      +     system.tar.gz
      +
      +
      + +
      +

      Change Details

      +

      FLINT

      +Improved handling of branches (mostly those generated from +polymorphic equality), removed switchoff and changed the +default optimization settings (more cpsopt and less flintopt). + +
      +

      MLRISC

      +
        +
      1. Register Allocator +
          +
        1. The interface and implementation of the register allocator have been +changed slightly to accommodate the possibility of skipping +the register allocation phases completely and go directly to +memory allocation. This is needed for C-- use. +
        2. I've improved the spill propagation algorithm, using an approximation +of maximal weighted independent sets. This affects only the x86 +platform. +
        +

        +

      2. MLTREE +
          +
        1. Renamed the constructor CALL in MLTREE by popular demand. +
        +

        +

      3. X86 +
          +
        1. More assembly output problems involving the indexed addressing mode +on the x86 have been found and corrected. Thanks to Fermin Reig for the +fix. +
        2. x86Rewrite bug with MUL3 (found by Lal) +
        3. Added the instructions FSTS, FSTL +
        4. The old code generated for SETcc was completely wrong. +The Intel optimization guide is VERY misleading. +
        5. Various fixes related floating point, and extensions. +
        6. Things like +
          +       jmp %eax
          +       jmp (%eax)
          +
          +are now output as +
          +       jmp *%eax
          +       jmp *(%eax)
          +
          +
        7. Yet another fix for x86 assembly for idivl, imull, mull and friends. +
        8. I've changed andl to testl in the floating point test sequence +whenever appropriate. The Intel optimization guide states that +testl is perferable to andl. +
        +

        +

      4. Alpha +
          +
        1. Some extra patterns related to loads with signed/zero extension +provided by Fermin. +
        2. Added the instructions LDBU, LDWU, STB, STW as per Fermin's suggestion. +
        3. Added a new mode byteWordLoadStores to the functor parameter to Alpha() +
        4. Added reassociation code for address computation. +
        +

        +

      5. PA-RISC +
          +
        1. B label should not be a delay slot candidate! Why did this work? +
        2. ADDT(32, REG(32, r), LI n) now generates one +instruction instead of two, as it should be. +
        3. The assembly syntax for fstds and fstdd was wrong. +
        4. Added the composite instruction COMICLR/LDO, which is the immediate +operand variant of COMCLR/LDO. +
        5. Long jumps in span dependence resolution used to depend on the existence +of the base pointer in the SML/NJ runtime. +

          +A jump to a long label L was expanded into the following sequence: +

               
          +      LDIL %hi(L-8192), %r29
          +      LDO  %lo(L-8192)(%r29), %r29
          +      ADD  %r29, baseptr, %r29
          +      BV,n %r0(%r29)
          +
          +I've changed it so that the following sequence of instructions +are generated, which doesn't mention the base pointer at all: +
          +         BL,n  L', %r29           /* branch and link, L' + 4 -> %r29 */
          +    L':  ADDIL L-(L'+4), %r29     /* Compute address of L */ 
          +         BV,n  %r0(%r29)          /* Jump */ 
          +
          +
        +

        +

      6. Generic MLRISC +
          +
        1. shuffle.sml rewritten to be slightly more efficient +
        2. DIV bug in mltree-simplify fixed (found by Fermin) +
        +

        +

      7. Assembly Output +
          +
        1. When generating assemby, resolve the value of client defined constants, +instead of generating symbolic values. This is controlled by the +new flag "asm-resolve-constants", which is default to true. +
        2. Added a new flag +
          +asm-indent-copies (default to false) +
          +When this flag is on, parallel copies will be indented an extra level. +
        3. Machine Descriptions/Generation +
            +
          1. The precedence parser was slightly broken when parsing infixr symbols. +
          2. The type generalizing code had the bound variables reversed, resulting +in a problem during arity raising. +
          3. Various fixes in machine descriptions. +
          +
        +
      +

      CPS->MLRISC Code Generation

      +

      +This release contains *MAJOR* changes to the way code is generated from CPS +in the module mlriscGen, and in various backend modules. +

      +

        +
      1. Forward propagation fix. +

        +There was a bug in forward propagation introduced at about the same time +as the MLRISC x86 backend, which prohibits coalescing to be +performed effectively in loops. +

        +Effect: speed up of loops in RISC architectures. +By itself, this actually slowed down certain benchmarks on the x86. +

        +

      2. Forward propagating addresses from consing. +

        +I've changed the way consing code is generated. Basically I separated +out the initialization part: +

        +        store tag,   offset(allocptr)
        +        store elem1, offset+4(allocptr)
        +        store elem2, offset+8(allocptr)
        +        ...
        +        store elemn, offset+4n(allocptr)
        +
        +and the address computation part: +
        +        celladdr <- offset+4+alloctpr
        +
        +and move the address computation part +

        +Effect: register pressure is generally lower as a result. This +makes compilation of certain expressions much faster, such as +long lists with non-trivial elements. +

        +       [(0,0), (0,0), .... (0,0)]
        +
        +

        +

      3. Base pointer elimination. +

        +As part of the linkage mechanism, we generate the sequence: +

        +     L:  ...  <- start of the code fragment
        + 
        +     L1:
        +         base pointer <- linkreg - L1 + L
        +
        +The base pointer was then used for computing relocatable addresses +in the code fragment. Frequently (such as in lots of continuations) +this is not needed. We now eliminate this sequence whenever possible. +

        +For compile time efficiency, I'm using a very stupid local heuristic. +But in general, this should be done as a control flow analysis. +

        +Effect: Smaller code size. Speed up of most programs. +

        +

      4. Frequency annotations. +

        +I've added an annotation that states that all call gc blocks have zero +execution frequencies. This improves register allocation on the x86. +

      +

      +

      Aliasing

      +

      +This update contains a rewritten (and hopefully more correct) module +for extracting aliasing information from CPS. +

      +To turn on this feature: +

      +    Compiler.Control.CG.memDisambiguate := true
      +
      +To pretty print the region information with assembly +
      +    Compiler.Control.MLRISC.getFlag "asm-show-region" := true;
      +
      +To control how many levels of aliasing information are printed, use: +
      +    Compiler.Control.MLRISC.getInt "points-to-show-level" := n
      +
      +The default value of n is 3. + +

      Benchmarks

      +

      +I've only performed the comparison with 110.25. +

      + The platforms are: +

        
      +    HPPA  A four processor HP machine (E9000) with 5G of memory.
      +    X86   A 300Hhz Pentium II with 128M of memory, and 
      +    SPARC An Ultra sparc 2 with 512M of memory.
      +
      +I used the following parameters for the SML benchmarks: +
      +             @SMLalloc
      +     HPPA    256k
      +     SPARC   512k
      +     X86     256k
      +
      +

      +

      COMPILATION TIME

      +

      + Here are the numbers comparing the compilation times of the compilers. + I've only compared 110.25 compiling the new sources versus + a fixpoint version of the new compiler compiling the same. +

      +                 110.25                                  New
      +           Total  Time in RA  Spill+Reload   Total  Time In RA Spill+Reload
      +     HPPA   627s    116s        2684+3584     599s    95s       1003+1879
      +     SPARC  892s    173s        2891+3870     708s    116s      1004+1880
      +     X86    999s    315s       94006+130691   987s    296s    108877+141957
      +
      +               110.25         New
      +            Code Size      Code Size
      +     HPPA   8596736         8561421
      +     SPARC  8974299         8785143
      +     X86    9029180         8716783
      +
      + So in summary, things are at least as good as before. Dramatic + reduction in compilation is obtained on the Sparc; I can't explain it, + but it is reproducible. Perhaps someone should try to reproduce this + on their own machines. + +

      SML BENCHMARKS

      +

      + On the average, all benchmarks perform at least as well as before. +

      +      HPPA         Compilation Time     Spill+Reload      Run Time
      +                 110.25  New            110.25    New   110.25  New    
      +
      +      barnesHut  3.158  3.015  4.75%    1+1       0+0   2.980  2.922   2.00%
      +          boyer  6.152  5.708  7.77%    0+0       0+0   0.218  0.213   2.34%
      +   count-graphs  1.168  1.120  4.32%    0+0       0+0  22.705 23.073  -1.60%
      +            fft  0.877  0.792 10.74%    1+3       1+3   0.602  0.587   2.56%
      +    knuthBendix  3.180  2.857 11.32%    0+0       0+0   0.675  0.662   2.02%
      +         lexgen  6.190  5.290 17.01%    0+0       0+0   0.913  0.788  15.86%
      +           life  0.803  0.703 14.22%   25+25      0+0   0.153  0.140   9.52%
      +          logic  2.048  2.007  2.08%    6+6       1+1   4.133  4.008   3.12%
      +     mandelbrot  0.077  0.080 -4.17%    0+0       0+0   0.765  0.712   7.49%
      +         mlyacc 22.932 20.937  9.53%  154+181    32+57  0.468  0.430   8.91%
      +        nucleic  5.183  5.060  2.44%    2+2       0+0   0.125  0.120   4.17%
      +  ratio-regions  3.357  3.142  6.84%    0+0       0+0  116.225 113.173 2.70%
      +            ray  1.283  1.290 -0.52%    0+0       0+0   2.887  2.855   1.11%
      +         simple  6.307  6.032  4.56%   28+30      5+7   3.705  3.658   1.28%
      +            tsp  0.888  0.862  3.09%    0+0       0+0   7.040  6.893   2.13%
      +           vliw 24.378 23.455  3.94%  106+127    25+45  2.758  2.707   1.91%
      +  --------------------------------------------------------------------------
      +   Average                     6.12%                                   4.09%
      +
      +      SPARC        Compilation Time     Spill+Reload      Run Time
      +                 110.25  New            110.25    New   110.25  New    
      +
      +      barnesHut  3.778  3.592  5.20%    2+2       0+0   3.648  3.453    5.65%
      +          boyer  6.632  6.110  8.54%    0+0       0+0   0.258  0.242    6.90%
      +   count-graphs  1.435  1.325  8.30%    0+0       0+0  33.672 34.737   -3.07%
      +            fft  0.980  0.940  4.26%    3+9       2+6   0.838  0.827    1.41%
      +    knuthBendix  3.590  3.138 14.39%    0+0       0+0   0.962  0.967   -0.52%
      +         lexgen  6.593  6.072  8.59%    1+1       0+0   1.077  1.078   -0.15%
      +           life  0.972  0.868 11.90%   26+26      0+0   0.143  0.140    2.38%
      +          logic  2.525  2.387  5.80%    7+7       1+1   5.625  5.158    9.05%
      +     mandelbrot  0.090  0.093 -3.57%    0+0       0+0   0.855  0.728   17.39%
      +         mlyacc 26.732 23.827 12.19%  162+189    32+57  0.550  0.560   -1.79%
      +        nucleic  6.233  6.197  0.59%    3+3       0+0   0.163  0.173   -5.77%
      +  ratio-regions  3.780  3.507  7.79%    0+0       0+0 133.993 131.035   2.26%
      +            ray  1.595  1.550  2.90%    1+1       0+0   3.440  3.418    0.63%
      +         simple  6.972  6.487  7.48%   29+32      5+7   3.523  3.525   -0.05%
      +            tsp  1.115  1.063  4.86%    0+0       0+0   7.393  7.265    1.77%
      +           vliw 27.765 24.818 11.87%  110+135    25+45  2.265  2.135    6.09%
      +  ----------------------------------------------------------------------------
      +   Average                     6.94%                                    2.64%
      +
      +      X86          Compilation Time     Spill+Reload      Run Time
      +                 110.25  New            110.25    New   110.25  New    
      +
      +      barnesHut  5.530  5.420  2.03%  593+893   597+915   3.532  3.440   2.66%
      +          boyer  8.768  7.747 13.19%  493+199   301+289   0.327  0.297  10.11%
      +   count-graphs  2.040  2.010  1.49%  298+394   315+457  26.578 28.660  -7.26%
      +            fft  1.327  1.302  1.92%  112+209   115+210   1.055  0.962   9.71%
      +    knuthBendix  5.218  5.475 -4.69%  451+598   510+650   0.928  0.932  -0.36%
      +         lexgen  9.970  9.623  3.60% 1014+841  1157+885   0.947  0.928   1.97%
      +           life  1.183  1.183  0.00%  162+182   145+148   0.127  0.103  22.58%
      +          logic  3.285  3.512 -6.45%  514+684   591+836   5.682  5.577   1.88%
      +     mandelbrot  0.147  0.143  2.33%   38+41     33+54    0.703  0.690   1.93%
      +         mlyacc 35.457 32.763  8.22% 3496+4564 3611+4860  0.552  0.550   0.30%
      +        nucleic  7.100  6.888  3.07%  239+168   201+158   0.175  0.173   0.96%
      +  ratio-regions  6.388  6.843 -6.65% 1182+257   981+300  120.142 120.345 -0.17%
      +            ray  2.332  2.338 -0.29%  346+398   402+494   3.593  3.540   1.51%
      +         simple  9.912  9.903  0.08% 1475+941  1579+1168  3.057  3.178  -3.83%
      +            tsp  1.623  1.532  5.98%  266+200   250+211   8.045  7.878   2.12%
      +           vliw 33.947 35.470 -4.29% 2629+2774 2877+3171  2.072  1.890   9.61%
      +  ----------------------------------------------------------------------------
      +   Average                     1.22%                                     3.36%
      +
      + + +
      +

      Boot code and glue scripts

      + +

      Size info in BOOTLIST

      +

      + The BOOTLIST file now has an optional first line that specifies an + upper bound on the number of boot files and an upper bound on the + length of each individual name. With this, there are no longer + hard-wired restrictions on these values in the runtime system. + (If the specification is missing in BOOTLIST, the runtime system + falls back to its old behavior, i.e., hard-wired defaults.) +

      +

      Allocation-size heuristics in .run-sml

      +

      + The .run-sml scripts tries to read processor cache size from + /proc/cpuinfo. This works on Linux and is important for small-cache + Celeron systems that suffer badly when allocation size is set too + high. +

      +

      Install script

      +

      +

        +
      • Written in a more modular fashion (using shell functions). +
      • Made more robust. +
      • Automagically fetches archive files over the network if they do not + exist locally. Thus, you only need to fetch config.tar.gz yourself. + Unpack it and go! + (Requires "wget" or "lynx" to be installed on the system and a + live connection to the internet. Moreover, the contents of + config/srcarchiveurl must be set properly.) + For CVS users, this may be convenient when fetching new sets of binfiles. +
      • Handles archive files with or without version number and compressed + with one of "gzip", "compress", or "bzip2". Recognized suffixes are + ".tar.gz", ".tgz", ".tar", ".tar.Z", and ".tar.bz2". +
      +

      +

      PIDMAP file

      +

      +There is a file called PIDMAP in the bootfile directory. +It is used to minimize the amount of dynamic state that needs to be +stowed away for the purpose of sharing between interactive system +and user code. +

      +

      Building standalone programs

      +

      +The command ml-build can be used to build standalone programs. +ml-build takes three arguments: +

        +
      1. the name of the CM library that implements and exports the "main" +function of your program +
      2. the name of the "main" function of your program as exported by 1. +(The function must have a type that makes it suitable as an argument +to SMLofNJ.exportFn.) +
      3. the name of the heapfile to be generated +
      +

      Other build scripts

      +

      +ml-{lex,yacc} build scripts now make use of the new mechanism for +building standalone programs. +

      +

      Fixpoint script

      +

      +I added a re-written version of Dave's fixpt script to src/system. +Changes relative to the original version: +

      +

        +
      • sh-ified (not everybody has ksh) +
      • automatically figures out which architecture it runs on +
      • uses ./makeml a bit more cleverly +
      • never invokes ./installml (and, thus, does not clobber your +good and working installation of sml in case something goes wrong) +
      • accepts max iteration count using option "-iter " +
      • accepts a "base" name using option "-base " +
      +

      +It does not build any extraneous heap images but directly rebuilds +bin- and boot-hierarchies using makeml's "-rebuild" switch. Finally, +it can incorporate existing bin- and boot- hierarchies. For example, +suppose the base is set to "sml" (which is the default). Then it +successively builds +

      +          sml.bin.<arch>-unix and sml.boot.<arch>-unix
      +  then    sml1.bin.<arch>-unix and sml1.boot.<arch>-unix
      +  then    sml2.bin.<arch>-unix and sml2.boot.<arch>-unix
      +  ...
      +  then    sml<n>.bin.<arch>-unix and sml<n>.boot.<arch>-unix
      +
      +and so on. If any of these already exist, it will just use what's +there. In particular, many people will have the initial set of bin +and boot files around, so this saves time for at least one full +rebuild. Having sets of the form <base><k>.{bin,boot}.<arch>-unix for +<k>=1,2,... is normally not a good idea when invoking fixpt. However, +they might be the result of an earlier partial run of fixpt (which +perhaps got accidentially killed). In this case, fixpt will quickly +move through what exists before continuing where it left off earlier, +and, thus, saves a lot of time. +

      +

      Runtime system code

      +

      +

        +
      • fixed several gcc -Wall warnings that were caused by missing header +files, missing initializations, etc., in runtime (not all warnings +eliminated, though) +
      • had to "un-fix" some of them later because they broke the HPPA compile +
      +

      +


      +

      CM

      +

      +

      Several manual updates

      +

      +I always try to keep the manual in sync with CM's latest features. +

      +

      Bootstrap compilation

      +

      +No more "CMB.deliver" +

      +

        +
      • All work is done by CMB.make (as it used to be in the old CM). +
      • CMB.make can be used even with existing bootfiles, i.e., bootfiles do +not have to be removed beforehand. +
      • In "paranoid mode" CM checks a stable libraries CRC checksum to +verify that it is "valid". (In "normal mode", such checks do not +occur.) Paranoid mode is used for bootstrap compilation. This is +what makes it possible to re-use existing bootfiles. +
      +

      +

      Initial glue code (init.cmi)

      +
        +
      • treated as a genuine library now +
      • there are no more "built-in" modules +
      +

      +

      CM API

      +

      +

        +
      1. CM.Anchor.anchor instead of CM.Anchor.{set,cancel} +
          +
        • Upon request by Elsa. Anchors now controlled by get-set-pair +like most other CM state variables. +
        +

        +

      2. CM tools: +
          +
        • It is now possible to have tools that accept additional +"command line" parameters (specified in the .cm file at each +instance where the tool's class is used). +
        • The parser understands named parameters and recursive options. +
        • new "make" and "shell" tools added to facilitate fairly seemless +hookup to portions of code managed using Makefiles or Shell scripts. +
        • There are no classes "shared" or "private" anymore. Instead, +the sharing annotation is now a parameter to the "sml" class. +
        • Tools.registerStdShellCmdTool (from smlnj/cm/tool.cm) takes an +additional argument called "template" which is an optional +string that specifiel the layout of the tool command line. See +the CM manual for explanation. +
        • A special-purpose tool can be "registered" by simply dropping +the corresponding <...>-tool.cm (and/or <...>-ext.cm) into the +same directory where the .cm file lives that uses this tool. +(The behavior/misfeature until now was to look for the tool +description files in the current working directory.) As +before, tool description files could also be anchored -- in +which case they can live anywhere they like. Following the +recent e-mail discussion, this change should make it easier to +have special-purpose tools that are shipped together with the +sources of the program that uses them. +Bug: such a tool does not get un-registered after being done. +
        +
      +

      +

      Library names

      +

      +Library names have been completely re-organized. +Many libraries have been consolidated so that they share the same +path anchor. For example, all MLRISC-related libraries are +anchored at MLRISC, most libraries that are SML/NJ-specific are +under "smlnj". Notice that names like host-cmb.cm or +host-compiler.cm no longer exist. See system/README for a +complete description of the new naming scheme. Quick reference: +

      +    host-cmb.cm        -> smlnj/cmb.cm
      +    host-compiler.cm   -> smlnj/compiler.cm
      +    full-cm.cm         -> smlnj/cm.cm
      +    <arch>-<os>.cm     -> smlnj/cmb/<arch>-<os>.cm
      +    <arch>-compiler.cm -> smlnj/compiler/<arch>.cm
      +
      +

      +

      CM bug fixes

      +
        +
      • exceptions in user code are being passed through (i.e., reach top level) +
      • more bugs in paranoia mode fixed +
      • bug related to checking group owners fixed +
      • better error handling (suppresses many followup-messages) +
      +

      +

      Internals

      +

      +

      +
      "Global" modmap: +
      CM now maintains one "global" modmap that is used for all stable +libraries. The use of such a global modmap maximizes sharing and +minimizes the need for re-traversing parts of environments during +modmap construction. (However, this has minor impact since modmap +construction seems to account for just one percent or less of total +compile time.) +
      +

      + +


      +

      Compiler Internals

      +

      Environment data structures: No more CMStaticEnv

      +
        +
      • no CMEnv, no BareEnvironment (actually, +only BareEnvironment, +but it is called Environment), no conversions between different +kinds of static environments. +

        +

      • There is still a notion of a "modmap", but such modmaps are generated +on demand at the time when they are needed. This sounds slow, but I +sped up the code that generates modmaps enough for this not to lead to +a slowdown of the compiler (at least I didn't detect any). +

        +

      • To facilitate rapid modmap generation, static environments now +contain an (optional) "modtree" structure. Modtree annotations are +constructed by the unpickler during unpickling. (This means that +the elaborator does not have to worry about modtrees at all.) +Modtrees have the advantage that they are compositional in the same +way as the environment data structure itself is compositional. +As a result, modtrees never hang on to parts of an environment that +has already been rendered "stale" by filtering or rebinding. +

        +

      • all files that I touched now compile without warnings (other than +"polyEqual" warnings). +

        +

      • compiler now tends to run "leaner" (i.e., ties up less memory in +redundant modmaps) +
      +

      +

      Stats phase "genmap" added

      +
        +
      • It measures time spent during on-the-fly modmap generation. +
      +

      +

      Changes on behalf of CM

      +

      +

        +
      • Compiler.CMSA eliminated +

        +No longer supported by CM anyway. +

        +

      • Fixed bugs in pickler that kept biting Stefan +
          +
        • past refs to past refs (was caused by the possibility that +ad-hoc sharing is more discriminating than hash-cons sharing) +
        • integer overflow on LargeInt.minInt +
        +

        +

      • Handling of "core" environment +

        +I eliminated coreEnv from compInfo. Access to the Core +structure is now done via the ordinary static environment that is +context to each compilation unit. +

        +To this end, I arranged that instead of "structure Core" a +"structure _Core" is bound in the pervasive environment. Core +access is done via _Core (which can never be accidentially rebound +because _Core is not a legal surface-syntax symbol). +

        +The current solution is much cleaner because the core environment +is now simply part of the pervasive environment which is part of +every compilation unit's context anyway. In particular, this +eliminates all special-case handling that was necessary until now +in order to deal with dynamic and symbolic parts of the core +environment. +

        +Remaining hackery (to bind the "magic" symbol _Core) is localized +in the compilation mananger's bootstrap compiler (actually: in the +"init group" handling). See the comments in +src/system/smlnj/init/init.cmi for more details. +

        +I also tried to track down all mentions of "Core" (as string +argument to Symbol.strSymbol) in the compiler and replaced them +with a reference to the new CoreSym.coreSym. Seems cleaner since +the actual name appears in one place only. +

      +

      + +


      + + +
      + Lal George
      + + +Last modified: Wed Apr 19 16:24:39 EDT 2000 + +
      +
    + + diff --git a/doc/html/readme/110.28-README.html b/doc/html/readme/110.28-README.html new file mode 100644 index 0000000..25c8a5d --- /dev/null +++ b/doc/html/readme/110.28-README.html @@ -0,0 +1,333 @@ + + + + SML/NJ Version 110.28 NEWS + + + +

    Standard ML of New Jersey
    + Version 110.28, May 1, 2000

    +
    + +
    + http://cm.bell-labs.com/cm/cs/what/smlnj/index.html +
    + +
    +
    +

    Warning

    +
    +
    +
    + + This version is intended for compiler hackers. + We are in the midst of substantial structural changes, + and this is a snapshot. + +
    +
    + +
    + +

    Summary:

    +
      +
    • This version has some tuning of phase ordering for FLINT, which +mostly solves the excessive space problem during compilation. With +this tuning, 110.28 is slightly better than 110.25 in performance, +but not quite as good as 110.25+latest MLRISC. See the benchmarks +below. Futher work on the register allocator in MLRISC should +improve space performance further. +

      +

    • CM: +Installation scripts have been tweaked. A new noweb tool has been +added. ".fun" suffix has been added to those that imply sml compilation. +New version numbering scheme added. Alternative syntax for anchored +paths added. Parallel make made smarter. Support for "unsharing". +Simple "makedepend" functionality added. Further details below. +

      +See src/system/README and the latest version of the CM manual at +

      +<http://www.kurims.kyoto-u.ac.jp/~blume/SMLNJ-DEV/manual/index.html> +<http://www.kurims.kyoto-u.ac.jp/~blume/SMLNJ-DEV/manual.ps> +
      +for further information about visible changes. +

      +

    • A few MLRISC changes, most not SML/NJ related. +

      +

    • ML-Yacc: updated manual and examples to be SML '97, SML/NJ 110+ compliant. +Examples now compiled with CM. +

      +

    • Reported bug fixes: +
      +   1498. Specialized real arrays not pretty-printed correctly
      +   1510. Signature matching bug makes "casts" possible
      +   1562. CM complains about unrecognized file extension ".fun"
      +   1563. redundant pathconfig contents with multiple builds
      +
      +

      +


      +

      Change Details

      +

      FLINT

      +Tuned ordering of phases to improve time/space performance. See +src/compiler/FLINT/opt/flintopt.txt for description of some of the +issues. +

      +


      +

      MLRISC

      +This update synchronizes my repository with Yale's. Most of these +changes, however, do not affect SML/NJ at all (the RA is an exception). +

      +

        +
      1. Register Allocator +
          +
        1. An improvement in the interference graph construction: +Given a copy +
          +            s <- t
          +
          +no interference edge between s and t is added for this definition of s. +

          +

        2. I've added two new spill heuristic modules that Fermin and I developed +(in the new library RA.cm). These are unused in SML/NJ but maybe +useful for others (Moby?) +

          +

        3. Added a flag "ra-dump-size" to print out the size of the flowgraph +and the interference graph. +
        +
      2. X86 +
          +
        1. Various fixes in the backend provided by Fermin [C--] and Lal. +
        +

        +

      3. Alpha +
          +
        1. Added the BSR instruction and code generation that goes with it [C--] +

          +

        2. Other fixes too numerous to recount provided by Fermin [C--] +

          +

        3. PSEUDOARITH was missing in AlphaRewrite. This causes an endless +loop in C--. +
        +

        +

      4. Regmaps +
          +
        1. The regmaps are not initialized with the identity physical bindings +at creation time. This is unneeded. +
        +

        +

      5. MLRISC Optimizations +
          +
        1. The DJ-Graph module can now compute the iterated dominance frontiers +intersects with liveness incrementally in linear time! Woohoo! +This is now used in my new SSA construction algorithm. +

          +

        2. THe branch reorganization module is now smarter about linear chains of +basic blocks. +
        +
      +

      +


      +

      CM

      +
        +
      • Several manual updates +
        +I always try to keep the manual in sync with CM's latest features. +
        +

        +

      • New noweb tool +
        +The existing system is entirely unaffected by this, but some CM users +have asked for renewed noweb support. Everything is documented in the +CM manual. +

        +New (plugin) libraries: +

        +   noweb-tool.cm
        +   nw-ext.cm
        +
        +
        +

        +

      • CM version numbering added +
        +This is an implementation of Lal's proposal for adding version +numbers and version checking to .cm files. Lal said that his +proposal was just that -- a proposal. For the time being I went +ahead and implemented it so that people can comment on it. +Everything is completely backward-compatible (except for the stable +library format, i.e., new bootfiles!). +

        +As usual, see the CM manual for details. +

        +

        +

      • Alternative syntax for anchored paths +
        +Dave has recently voiced the same concerns that I had when I did +this, so there should be some support. My take is that eventually +I will let support for the current syntax (where anchors are +"implicit") fade out in favor of the new, explicit syntax. +In order to be backward-compatible, both old and new syntax are +currently supported. +

        +Again, see the CM manual for details. +

        +

        +

      • Parallel make is trying to be slightly smarter +
        +When the master process finds a "bottleneck", i.e., when there is +only one compilation unit that can be compiled and everybody else +is waiting on it, then it will simply compile it directly instead +of clumsily telling one of the slaves to do it. +
        +

        +

      • Support for "unsharing" added +
        +This is necessary in order to be able to have two different +versions of the same library running at the same time (e.g., for +trying out a new MLRISC while still having the old MLRISC linked +into the current compiler, etc.) See the CM manual. +
        +

        +

      • makedepend +
        +Simple "makedepend" functionality added for generating Makefile +dependency information. (This is rather crude at the moment. +Expect some changes here in the future.) +
        +

        +

      • New sml class prefix +
        +".fun" added as a recognized suffix for ML files. Also documented +explicitly in the manual that the fallback behavior (unknown suffix +-> ML file) is not an official feature! +
        +

        +

      • Pickler +
        +Small changes to the pickler for stable libraries. +
        +

        +

      • Internal cleanup +
        +Several internal changes to CM (for cleanup/improvement). +
        +

        +

      • install.sh changes +
        +I changed config/install.sh to remove duplicate entries from the +lib/pathconfig file at the end. Moreover, the final version of +lib/pathconfig is sorted alphabetically. The same (sorting) is done +in src/system/installml. +

        +The config/install.sh script now consistently uses relative +pathnames in lib/pathconfig whenever the anchor is in the lib +directory. (So far this was true for the libraries that come +pre-compiled and bundled as part of the bootfiles but not for +libraries that are compiled by the script itself.) +

        +Changed install.sh script to handle archive files without version number +and to use "boot.-" instead of "sml.boot.-" for the +name of the boot file archive. +

        +
      +

      +


      +

      BENCHMARKS

      +Allen Leung, 28 April 2000: +

      +I've rerun the benchmarks to see if anything has slowed down in +MLRISC recently, but found nothing. I compared 110.25, 110.25+latest +MLRISC, and 110.27+ (which also has the latest MLRISC). 110.25 uses +cpsopt, and 110.27+ uses flintopt. [110.27+ is repository state +at the end of April, which is essentially 110.28 - dbm]. +

      +The results are: +

      +  110.25 versus 110.25+latest MLRISC
      +  ----------------------------------
      +           Name   Compilation             Runtime    Speedup
      +      barnesHut  5.560  5.368  3.57%    3.690  3.302  11.76%
      +          boyer  8.678  7.672 13.12%    0.313  0.312   0.53%
      +   count-graphs  2.048  1.915  6.96%   28.577 27.128   5.34%
      +            fft  1.325  1.233  7.43%    1.060  0.982   7.98%
      +    knuthBendix  5.337  4.680 14.03%    0.962  0.918   4.72%
      +         lexgen 10.310  9.950  3.62%    0.917  0.902   1.66%
      +           life  1.230  1.127  9.17%    0.128  0.100  28.33%
      +          logic  3.533  3.258  8.44%    5.750  5.413   6.22%
      +     mandelbrot  0.145  0.148 -2.25%    0.700  0.685   2.19%
      +         mlyacc 34.557 33.342  3.64%    0.553  0.533   3.75%
      +        nucleic  6.675  6.507  2.59%    0.173  0.167   4.00%
      +  ratio-regions  6.358  6.218  2.25%  119.753 120.772 -0.84%
      +            ray  2.280  2.247  1.48%    3.563  3.517   1.33%
      +         simple  9.798  9.650  1.54%    2.987  3.083  -3.14%
      +            tsp  1.763  1.528 15.38%    8.657  7.718  12.16%
      +           vliw 33.938 32.570  4.20%    1.982  2.000  -0.92%
      +Average speedup:  5.32%
      +Average compile time speedup:  5.95%
      +
      +As you can see, MLRISC on x86 has improved slightly since the +110.25. Now comparing 110.25 with 110.27+: +
      +   110.25 versus 110.27+
      +   ---------------------
      +           Name   Compilation             Runtime    Speedup
      +      barnesHut  5.560  5.568  -0.15%   3.690  3.347  10.26%
      +          boyer  8.678 10.058 -13.72%   0.313  0.322  -2.59%
      +   count-graphs  2.048  2.125  -3.61%  28.577 31.533  -9.38%
      +            fft  1.325  1.297   2.19%   1.060  1.052   0.79%
      +    knuthBendix  5.337  3.737  42.82%   0.962  1.175 -18.16%
      +         lexgen 10.310  9.985   3.25%   0.917  0.992  -7.56%
      +           life  1.230  1.105  11.31%   0.128  0.125   2.67%
      +          logic  3.533  3.302   7.02%   5.750  5.357   7.34%
      +     mandelbrot  0.145  0.162 -10.31%   0.700  0.753  -7.08%
      +         mlyacc 34.557 37.292  -7.33%   0.553  0.493  12.16%
      +        nucleic  6.675  7.717 -13.50%   0.173  0.165   5.05%
      +  ratio-regions  6.358  3.915  62.41% 119.753 126.153 -5.07%
      +            ray  2.280  2.142   6.46%   3.563  2.827  26.06%
      +         simple  9.798 10.067  -2.67%   2.987  2.812   6.22%
      +            tsp  1.763  1.605   9.87%   8.657  8.870  -2.41%
      +           vliw 33.938 46.907 -27.65%   1.982  1.802   9.99%
      +Average speedup:  1.77%
      +Average compile time speedup:  4.15%
      +
      +  110.25+latest MLRISC versus 110.27+
      +  -----------------------------------
      +           Name   Compilation              Runtime    Speedup
      +      barnesHut  5.368  5.568  -3.59%    3.302  3.347  -1.34%
      +          boyer  7.672 10.058 -23.73%    0.312  0.322  -3.11%
      +   count-graphs  1.915  2.125  -9.88%   27.128 31.533 -13.97%
      +            fft  1.233  1.297  -4.88%    0.982  1.052  -6.66%
      +    knuthBendix  4.680  3.737  25.25%    0.918  1.175 -21.84%
      +         lexgen  9.950  9.985  -0.35%    0.902  0.992  -9.08%
      +           life  1.127  1.105   1.96%    0.100  0.125 -20.00%
      +          logic  3.258  3.302  -1.31%    5.413  5.357   1.06%
      +     mandelbrot  0.148  0.162  -8.25%    0.685  0.753  -9.07%
      +         mlyacc 33.342 37.292 -10.59%    0.533  0.493   8.11%
      +        nucleic  6.507  7.717 -15.68%    0.167  0.165   1.01%
      +  ratio-regions  6.218  3.915  58.83%  120.772 126.153 -4.27%
      +            ray  2.247  2.142   4.90%    3.517  2.827  24.41%
      +         simple  9.650 10.067  -4.14%    3.083  2.812   9.66%
      +            tsp  1.528  1.605  -4.78%    7.718  8.870 -12.98%
      +           vliw 32.570 46.907 -30.56%    2.000  1.802  11.01%
      +Average speedup: -2.94%
      +Average compile time speedup: -1.68%
      +
      +Overall, I'd say 110.27+ is pretty competitive with 110.25. There are +some big improvements (ray, vliw, mlyacc) which can only be attributed to FLINT +changes. But something has slowed down (tsp, knuth-bendix, life, mandelbrot, +count-graphs). [We know the reasons with knuth-bendix.] +Also, compilation time in 110.27+ is generally slower (which is offset +by huge improvements in knuth-bendix and ratio-regions). +

      +


      + + +
      + Lal George
      + + +Last modified: Tue May 2 15:34:00 EDT 2000 + +
      +
    + + diff --git a/doc/html/readme/110.30-README.html b/doc/html/readme/110.30-README.html new file mode 100644 index 0000000..bb6c58f --- /dev/null +++ b/doc/html/readme/110.30-README.html @@ -0,0 +1,120 @@ + + + + SML/NJ Version 110.30 NEWS + + + +

    Standard ML of New Jersey
    + Version 110.30, November 4, 2000

    +
    + +
    + http://cm.bell-labs.com/cm/cs/what/smlnj/index.html +
    + +
    + +
    + +

    Summary:

    +
      +
    • Port to ppc Linux (thanks to Carl Pulley). +

      +

    • support for fsqrt, fptan, fcos, fsin. +

      +

    • Several CM and CMB changes. +

      +

    • Some critical bug fixes +

      +

    +
    + +

    Bug fixes

    +
      +
    • 1514. sockets c-library broken +

      +

    • 1582. SysErr exception connecting to socket +
    + +
    +

    Back ends

    +

    x86

    + All back ends now directly generate the sqrt instruction, and the Intel + x86 also generate the trig instructions to implement sine, cosine, and + tangent. +

    + The ICFP00, PCLubIN entry shows a significant improvement as a result + (most of it coming from fsqrt). +

    +

    +			     110.29	110.30	     Speedup%
    +			 ------------------------------------
    +    tests/chess.gml:	      21.63      18.38	      17.67	
    +    tests/cone-fractal.gml:    6.24       4.71	      32.46	
    +    tests/dice.gml:	       8.81       6.14	      43.53	
    +    tests/fractal.gml:	      46.25      35.57	      30.03     
    +    tests/golf.gml:	       3.24       2.62	      23.73     
    +    tests/holes.gml:	       4.26       3.15	      35.41     
    +    tests/intercyl.gml:	       4.43       2.52	      75.87     
    +    tests/large.gml:	       7.27       6.68	       8.83     
    +    tests/pipe.gml:	       8.71       5.23	      66.42     
    +    tests/snowgoon.gml:	       5.41       3.99	      35.83     
    +    ---------------------------------------------------------
    +    Geometric Mean	       8.06	  5.94	      31.64%    
    +
    +
    + +
    +

    CM

    +

    Changes to scripts

    +
      +
    • "ml-build" for generating standalone programs +
    • "ml-makedepend" (new!) for generating Makefile dependencies + (not generally required but some project might want them) +
    • "sml" - the main driver script for running SML/NJ. +
    +

    +

    + All these scripts now accept more CM-related command-line options. +

    + ml-build is faster and cleverer in avoiding unnecessary work. +

    + +

    Other Changes

    + Type of CM.mk_standalone has changed. +

    + All user-visible changes to CM (including the above-mentioned scripts) + are documented in the CM manual. Some errors and omissions have been + corrected as well. +

    + Bug fixes to CM's internals (e.g., missing or repeated error messages). +

    + Pickling/Unpickling improvements result in faster autoloading and + reduce resident memory size. + +


    +

    CMB

    + Conditional compilation within the "init library". + (One can now use "ifdef" and "ifndef" guards in init.cmi. + This is documented in the long comment at the beginning of + that file (src/system/smlnj/init/init.cmi).) +
    +

    Basis Library

    + + Added SMLofNJ.SysInfo.getHeapSuffix (for use by "ml-build"). + + +
    + + +
    + Lal George
    + + +Last modified: Tue Nov 7 15:36:53 EST 2000 + +
    +
    + + diff --git a/doc/html/readme/110.31-README.html b/doc/html/readme/110.31-README.html new file mode 100644 index 0000000..7b1f702 --- /dev/null +++ b/doc/html/readme/110.31-README.html @@ -0,0 +1,207 @@ + + + + SML/NJ Version 110.31 NEWS + + + +

    Standard ML of New Jersey
    + Version 110.31, December 8, 2000

    +
    + +
    + + Home page +
    + +
    +
    +

    Warning

    +
    +
    +
    + + This version is intended for compiler hackers. The + version ought to be stable, however we have not run + our full regression testing. + +
    +
    + + +
    + +

    Summary:

    +
      +
    • Socket related bug fixes. +

      +

    • Improvements to CM autoloading. +

      +

    • General cleanup in the use of CM libraries in the compiler. +

      +

    • A new x86 fp compilation strategy. +

      +

    • Removal of regmaps from MLRISC. +

      +

    +
    + +

    Bug fixes

    +
      +
    • 1514. sockets c-library broken +

      +

    • 1582. SysErr exception connecting to socket +

      +

    • 1585. getpeername in sockets +

      +

    + +
    +

    CM

    + Drastically improved link traversal code, resulting is faster load + times for CM and CMB. +

    + Changed CM tool-plugin mechanism. See new manual +

    + Made pickle-lib.cm and eliminated use of comp-lib.cm +

    + +


    + +

    SML/NJ Library

    + Fixed "where" clause to GraphSCCFn. +
    +

    MLRISC

    +

    Intel x86 floating point

    + As of 110.31, there is an alternative floating point code generator + and register allocator for the x86. Since this is still experimental, + by default this is turned off. To turn this on, do: + +
    +     CM.autoload "$smlnj/compiler.cm";
    +     Compiler.Control.MLRISC.getFlag "x86-fast-fp" := true;
    +
    + + The new floating point code generator treats the x86 fp stack as + 7 registers, plus one temporary, and directly allocates floating point + values into these registers. + Currently, fp parameter passing is still + done through memory, so the new code generator only benefits floating + point heavy loops. However, code compiled under the old and new + code generator can coexist. The algorithm is described in + + this paper. +

    + We compared Version 110.30 compiling the PCLubIN entry in the + ICFP'00 programming context. + +

    +			   110.30    new fp   Speedup
    +     (ICFP00, PCLubIN)
    +     chess.gml             22.16     20.98       5.63%
    +     cone-fractal.gml       5.70      5.45       4.51%
    +     cylinder.gml           1.61      1.58       2.28%
    +     dice.gml               7.33      6.88       6.57%
    +     ellipsoid.gml          1.35      1.30       4.16%
    +     fov.gml                2.63      2.51       4.70%
    +     Fractal.gml           42.08     41.03       2.56%
    +     golf.gml               3.09      2.95       4.75%
    +     holes.gml              3.72      3.50       6.40%
    +     house.gml              1.41      1.33       5.71%
    +     intercyl.gml           3.02      2.78       8.41%
    +     large.gml              8.01      7.81       2.64%
    +     pipe.gml               6.35      5.78      10.01%
    +     snowgoon.gml           4.70      4.31       8.95%
    +     spheres.gml            1.26      1.17       6.98%
    +     spotlight.gml          0.71      0.68       4.69% 
    +
    + + By inlining Array2 in the same benchmark we get the following results: + +
    +			       110.30   new fp Speedup
    +     chess.gml                 21.85s  21.46s  1.83%
    +     cone-fractal.gml           5.82s   5.47s  6.28%
    +     cylinder.gml               1.57s   1.61s -2.85%
    +     dice.gml                   7.57s   6.85s 10.50%
    +     ellipsoid.gml              1.33s   1.25s  6.74%
    +     fov.gml                    2.75s   2.57s  7.01%
    +     fractal.gml               22.64s  21.52s  5.20%
    +     golf.gml                   3.04s   2.92s  4.25%
    +     holes.gml                  3.66s   3.48s  5.11%
    +     house.gml                  1.39s   1.29s  7.74%
    +     intercyl.gml               3.00s   2.78s  7.91%
    +     large.gml                  7.91s   7.82s  1.13%
    +     pipe.gml                   6.44s   5.65s 13.98%
    +     snowgoon.gml               4.75s   4.29s 10.53%
    +     spheres.gml                1.22s   1.12s  8.36%
    +     spotlight.gml              0.71s   0.68s  5.62%
    +
    + + Results from other benchmarks: +
    +     barnes-hut            1.714     1.696        1.0%   
    +     fft                   0.954     0.906        5.2%
    +     mandelbrot            19.91     14.99       32.8%
    +     matrix-multiply(a)    47.77     45.81        4.3%
    +     matrix-multiply(b)    17.04     15.42       10.5%
    +     simple                 3.02      2.69       12.3%
    +     tsp                    1.75      1.656       5.6%
    +
    + NOTE: Matrix multiply(b) has all bounds checking removed. + + Each test is run 10 times and I take the average. + + Overall, the numbers do not improve as much as I was hoping, except for + mandelbrot. The following benchmarks compare smlnj with mlton and C: + +
    +		     mandelbrot fft    barnes-hut 
    +     sml/nj 110.30   19.91      0.96   1.71       
    +     sml/nj new fp   14.99      0.90   1.71       
    +     gcc -O          14.83      
    +     gcc -O6         14.01      0.68
    +     mlton -O6       17.46      1.04   1.62       
    +     (version 200000906) 
    +
    + +

    Internal regmap Changes

    +
      +

    1. Changed interface to CELLS and the type of cell, cellkind, cellset etc. + +

    2. No more regmaps!! The attributes of cells, including its current color, + are accessible from CELLS interface. Cells can now take arbitrary + annotations. [They will also have a width attribute in the next + go around.] + +

    3. Interface of STREAM etc have changed (again, no more regmap). + +

    4. Some MLTREE constructors, like IF, BCC, JMP, CALL, etc have + been simplified. CVTI2I has been renamed into SX and ZX respectively, + following the lambda rtl convention. + +

    5. The old RA interface was getting too complicated. There are now + two functors, RISC_RA (in ra/risc_ra.sml) and X86RA (in x86/ra/x86RA.sml) + which abstract out from all the ugly business. The first is for + RISC machines, and the second is for x86. Please let us know if you + use these functors. + +

    6. The cell change broke the peephole phases, because they used to + pattern match on specific cell number. I (Allen) hacked up a simple + tool to translate fake SML with where clauses into real ML. This makes + it much easier to write the rules. Seems to work. (See Tools/WhereGen) +
    + +
    + + +
    + Lal George
    + + +Last modified: Fri Dec 8 15:15:08 EST 2000 + +
    +
    + + diff --git a/doc/html/readme/110.32-README.html b/doc/html/readme/110.32-README.html new file mode 100644 index 0000000..b77dca8 --- /dev/null +++ b/doc/html/readme/110.32-README.html @@ -0,0 +1,140 @@ + + + + SML/NJ Version 110.32 NEWS + + + +

    Standard ML of New Jersey
    + Version 110.32, December 22, 2000

    +
    + +
    + + Home page +
    + +
    +
    +

    Warning

    +
    +
    +
    + + This version is intended for compiler hackers. The + version ought to be quite stable, however we have not run + our full regression testing. + +
    +
    + + +
    + +

    Summary:

    + This version is being used to check point a large number of changes + to MLRISC, which now uses infinite precision integers on all + architectures. We have run the Basis regressions tests so the + changes ought to be fairly robust. +

    + There are a number of other CM, Basis, and Tool related changes as + well. + +


    +

    MLRISC

    +

    Fixes

    + This change fixes two problems: +

    + +

      + +

    1. There was a proliferation of constructors in MLRISC to + handle integers of various sizes, e.g. +
      +    LI of int   | LI32 of Int32.int  | LI64 of Int64.int
      +    ...
      +
      + which are all replaced with +
      +    LI of IntInf.int
      +
      + +

    2. There is the ever nagging question of cross compiling to + 64bits. +
    + +

    Compilation Speed

    + The x86 compiling the compiler is 9% slower than 110.31, however, + a lot of this, and more, can be recovered. The compiler presently + uses infinite precision in the front end (AST creation), which is converted to 31 + and 32 bit integers in the middle end (FLINT and CPS), and then + converted back to infinite precision in MLRISC (with the current + changes). +

    + Infinite precision in the middle end will allow a uniform + treatment of optimizations across words and integers of different + widths/sizes. Currently, most optimizations are biased towards + integers and words represented in 31 bits only. +

    + +

    What's next

    + Not in chronological order: + +
      +
    1. Use infinite precision in the middle end to allow a + cleaner and more uniform optimization module for arithmetic. + +

    2. Support for the generation of infinite precision values + in the back end. + +

    3. Changes to support the single toplevel declaration: +
      +   structure LargeInt = IntInf
      +
      +
    + +
    + +

    Basis

    + Added the missing bit level functions to the Basis IntInf +structure. + +
    +

    Tools

    + Improvements to the 'nowhere' tool to handle OR-patterns. + The nowhere tool is a syntactic extension that allows for + guarded patterns. See MLRISC/Tools/Doc/nowhere.tex. + +
    +

    CM

    +
      +
    • Added a new "dir" tool to CM's toolset; this can simplify writing + CM descriptions (see the manual) + +

    • Made some modifications to CM's "Tools" API and documented everything + +

    • CM now leaves exceptions (from the compiler) alone if they are not + expected; this causes such exceptions to travel up to the top-level + where they are reported as usual + +

    • Eliminated the "[wrote ...]" progress message in favor of reporting + some size information for the generated binfile instead + +

    • Removed source code for some project-in-progress that is not ready + for prime-time (yet) + +

    • Made anchor environments ("bind:"-option for class "cm") inter-operate + in a more useful way with CMB.make + +
      + +
      + Lal George
      + + +Last modified: Sat Dec 23 11:23:17 EST 2000 + +
      +
    + + diff --git a/doc/html/readme/110.33-README.html b/doc/html/readme/110.33-README.html new file mode 100644 index 0000000..964ddb2 --- /dev/null +++ b/doc/html/readme/110.33-README.html @@ -0,0 +1,186 @@ + + + + SML/NJ Version 110.33 NEWS + + + +

    Standard ML of New Jersey
    + Version 110.33, May 23, 2001

    +
    + +
    + + Home page +
    + +
    +
    +

    Warning

    +
    +
    +
    + + This version is intended for compiler hackers. The + version ought to be quite stable, however we have not run + our full regression testing. + +
    +
    + + +
    + +

    Summary:

    + This version should build under Mac OS X. +

    + This version is being used to introduce the new experimental C FFI + (currently only available for x86/Linux) and to check point a number + of changes to MLRISC. +

    + The CKIT is now part of the release because it is needed for ml-nlffigen. +

    + In support of the C FFI, the new $/c.cm library (from src/ml-nlffi-lib) + implements an encoding of the C type system in ML. It receives help + from the ml-nlffigen glue code generator (from src/ml-nlffigen) which + transforms C source input into corresponding ML glue code. +

    + The compiler has been modified to support the C FFI. +

    + There are various bugfixes and some organizational changes. + + +


    +

    Mac OS X

    +
      +
    1. This version should build under the new Apple Mac OS X + operating system. +
    + +
    +

    MLRISC

    +
      +

    1. infinite precision throughout +

    2. support for virtual frame pointers ("omit-frame-pointer") + (This was needed for a correct implementation of raw C calls, + but it should also come in handy for clients of MLRISC other + than SML/NJ. Currently x86 only.) +

    3. see the + omit-frame-pointer notes +

    4. support for alternative control-flow in MLTREE +

    5. other API changes +

    6. bug fixes +
    + + + + + +
    +

    CM

    + +
      +

    1. "lambdasplit" parameter for class "sml" to control status and + aggressiveness of cross-module inlining +

    2. parallel make now works again +

    3. "proxy libraries" +

    4. other bug fixes +
    + + + + +
    +

    CKIT

    +
      +
    1. The ckit source tree is now part of the SML/NJ distribution. +
    + + + +
    +

    compiler

    + +
      +

    1. bug fixes (e.g., dead-up logic in cpsopt/contract.sml) +

    2. lambda-split phase now in the optimization pipeline by default + control via Compiler.Control.LambdaSplitting.* (and/or via CM) +

    3. compiler now aware of certain offsets in runtime-system data + structures to be able to generate code for raw C calls +

    4. added raw memory access primops and a primop for raw C calls + and handled them throughout the compiler +

    5. prim.sml now has correct types for all primops +
    + + + + +
    +

    ML-NLFFI

    + +
      +

    1. two source trees in support of the new NLFFI ("no-longer foreign + function interface") added: +
      +    src/ml-nlffi-lib     --- support library
      +    src/ml-nlffigen      --- glue code generator
      +       
      +

    2. some preliminary documentation under src/ml-nlffi-lib/Doc + (The tar file for this will be retrieved only if you enable + ml-nlffi-lib in config/targets. Be sure to do this only on + x86/Linux platforms, though. If you use a different platform + and want to look at the source or the documentation, fetch + the tar file by hand.) +
    + + +
    +

    smlnj-lib

    +
      +
    1. Some API cleanup and bug fixes +
    + + +
    +

    runtime

    + +
      +

    1. dlopen and friends added (currently x86/Linux only) +

    2. changes to vproc-state.h to support efficient signal handling + by new raw C call mechanism +

    3. support for Mac OS X; Darwin +
    + + + +
    +

    overall organization

    +
      +
    1. Use CM "proxy libraries" where possible +
    + + + +
    +

    scripts

    +
      +

    1. various bug fixes to config/install.sh +

    2. config/install.sh now knows about ckit, ml-nlffi-lib, and ml-nlffigen +

    3. config/install.sh will use config/targets.customized if it exist + (falls back to config/targets if not) +

    4. makeml verbosity level controllable via MAKEML_VERBOSITY + or command line switch +
    + +
    + +
    + Lal George
    + + +Last modified: Thu May 24 11:27:19 EDT 2001 + +
    +
    + + diff --git a/doc/html/readme/110.34-README.html b/doc/html/readme/110.34-README.html new file mode 100644 index 0000000..77344c8 --- /dev/null +++ b/doc/html/readme/110.34-README.html @@ -0,0 +1,114 @@ + + + + + +
    +

    S M L / N J 1 1 0 . 3 4 N E W +S

    +
    +
    +
    June 10, 2001
    +
    +
    +
    WARNING This version is intended for compiler +hackers. The version ought to be stable, however we have not run our full +regression testing. http://cm.bell-labs.com/cm/cs/what/smlnj/index.html
    +
    +
    +
    +

    Summary:

    +
      +
    • A long-standing signal-handling bug has been fixed.
    • +
    • As a result, CML finally works again, and so does eXene.
    • +
    • NLFFI has been further revised and improved. It now supports the +Sparc architecture in addition to x86/Linux.
    • +
    • There are various bugfixes and some organizational changes. 
    • +
    +
    +
    +

    NLFFI:

    +
      +
    • The types in the public interface to the "C" module have been simplified: +the annoying 'f type parameter has been dropped in most places. This +requires some cheating under the hood, but fortunately the cheat is a +provably safe cheat.
    • +
    • Various bugs in ml-nlffigen have been fixed so that it should be + more robust now.
    • +
    • C functions and function types that use variable-length argument + lists are no longer rejected. Instead, ml-nlffigen will produce ML +code that lets you call the function with just its mandatory arguments. + This means that ml-nlffigen will not bail out just because there is +one odd occurence of a varargs function somewhere in one big .h-file.
    • +
    • Default names for files are now derived from the C files's basename + so that ml-nlffigen will always place them in the current directory.
    • +
    • Default names for generated ML identifiers are also derived from + the C file's basename.
    • +
    • Sparc architecture supported now.
    • +
    • Support for x86/win32 mostly in place. (All missing pieces are + in the runtime system.)
    • +
    +

    CM:

    +
      +
    • CM can now generate "index files" which are human-readable files +that list on a per-.cm-file basis each toplevel symbol defined or imported.
    • +
    • some internal improvements in the tools subsystem
    • +
    • "lambdasplit" parameter for class "sml" to control the cross-module + inliner documented
    • +
    • other documentation updates
    • +
    • bug fixes
    • +
    +

    runtime:

    +
      +
    • A serious signal handling problem that affected CML has been fixed.
    • +
    • Support for dlopen/dlsym on Sparc/Solaris added.
    • +
    +

    CML:

    +
      +
    • Compiles under the new CM.
    • +
    • Libraries and anchors for version 110.34 and later are organized + as follows:
    • +
        +
      • $cml/cml.cm : Main CML library.
      • +
      • $cml/basis.cm : CML's version of $/basis.cm.
      • +
      • $cml/cml-internal.cm : Internal helper library.
      • +
      • $cml/core-cml.cm : Internal helper library.
      • +
      • $cml-lib/trace-cml.cm : Tracing facility.
      • +
      • $cml-lib/smlnj-lib.cm : CML's version of $/smlnj-lib.cm
      • +
      +
    +

    eXene:

    +
      +
    • Compiles under the new CM.
    • +
    • The library's name for version 110.34 is $/eXene.cm.
    • +
    +

    scripts:

    +
      +
    • The config/install.sh script has been updated to support the installation + of CML and eXene under the New World Order (aka new CM).
    • +
    +

    compiler:

    +
      +
    • bug fixes
    • +
    • cross-module inliner now in the compilation pipeline by default + (By default it is still turned off. But it can be enabled either + globally or selectively by using CM's new "lambdasplit" parameter.) +
    • +
    +

    MLRISC:

    +
      +
    • Sparc implementation of c-calls API. (This is used by NLFFI.) + (Caveat: Register assignments in the Sparc backend of SML/NJ +still need to be redone. In particular, the ASM temp register must +not be %o2. It is unlikely but not impossible that the current situation +leads to certain subtle code-generation bugs.)
    • +
    • Support for "stdcall" calling convention in x86 version of c-calls. +
    • +
    +

    CKIT:

    +
      +
    • structure Error exported from ckit-lib.cm
    • +
    +
    + + diff --git a/doc/html/readme/110.36-README.html b/doc/html/readme/110.36-README.html new file mode 100644 index 0000000..97eab10 --- /dev/null +++ b/doc/html/readme/110.36-README.html @@ -0,0 +1,79 @@ + + + + SML/NJ 110.36 NEWS + + + + + +
    +

    S M L / N J   1 1 0 . 36   + N E W S

    +
    +
    + +
    September 18, 2001
    +
    +
    + +
    WARNING This version is intended for compiler hackers. +The version ought to be stable, however we have not run our full regression +testing. +http://cm.bell-labs.com/cm/cs/what/smlnj/index.html +
    +
    +
    +
    + +
    +

    Summary:

    + +
      +
    • This is a bugfix release, mainly with the purpose of solving a problem +with the FLINT optimizer that was erroneously throwing away code in some +cases.
    • +
    • Other changes include a minor bugfix for ml-lex, the implementation +of a "minimal" structure Compiler, and switching over to an improved code +generator on the x86 (affecting floating-point computations).
      +
    • + +
    + +
    +
    +

    FLINT:

    + +
      +
    • There was a problem with FLINT occasionally using the wrong continuation +for certain functions because of a faulty internal analysis.  Fixed.
      +
    • +
    +

    MLRISC (x86):

    + +
      +
    • An updated version of the x86-fp module has been enabled by default. + This results in significant speedups for FP-intensive code.
    • + +
    + +

    Interactive system / libraries:

    + +
      +
    • A minimal version of structure Compiler is that only contains "version" +and "architecture" is available at toplevel.  Thus, legacy code that +wants to test the current version number can do so without having to load +the rather heavy (and obsolete) full structure Compiler from $smlnj/compiler/compiler.cm.
      +
    • +
    +

    ML-Lex:

    + +
      +
    • Bug 1581 (concerning open parentheses in constant strings) has been +fixed.
    • +
    +

    +
    + + + diff --git a/doc/html/readme/110.37-README.html b/doc/html/readme/110.37-README.html new file mode 100644 index 0000000..4228a86 --- /dev/null +++ b/doc/html/readme/110.37-README.html @@ -0,0 +1,207 @@ + + + + SML/NJ 110.37 NEWS + + + + + + + +
    +

    S M L / N J   1 1 0 . 37   + N E W S

    +
    +
    + +
    November 23, 2001
    +
    +
    + +
    WARNING This version is intended for compiler +hackers. The version ought to be stable, however we have not run our full +regression testing. + http://cm.bell-labs.com/cm/cs/what/smlnj/index.html
    +
    +
    + +
    +

    Summary:

    + +
      +
    • Improvements to the MLRISC backend.
    • +
    • Enhanced export-list syntax for .cm-files.
    • +
    • Various assorted bug fixes and improvements to other components and +subsystems.
      +
    • +
    + +
    +
    +

    Details:

    +
    MLRISC:
    +
      +
    • Implemented a complete redesign of MLRISC pseudo-ops. Now there ought +to never be any question of incompatabilities with pseudo-op syntax expected +by host assemblers. For now, only modules supporting GAS syntax are implemented +but more should follow, such as MASM, and vendor assembler syntax, e.g. IBM +as, Sun as, etc. The new pseudo-ops design is used to add a string to the +end of the code stream that represents the file name.
    • +

    • +
    • +
    • Fix for a backpatching bug reported by Allen. Because the boundary +between short and long span-dependent instructions is +/- 128, there are +an astounding number of span-dependent instructions whose size is over estimated. +Allen came up with the idea of letting the size of span dependent instructions +be non-monotonic, for a maxIter number of times, after which the size must +be monotonically increasing. This table shows the number of span-dependent +instructions whose size was over-estimated as a function of maxIter, for +the file Parse/parse/ml.grm.sml:  + + + + + + + + + + + + + + + + + + + + + + + +
      maxIter
      +
      # of instructions
      +
      10
      +
      687
      +
      20
      +
      438
      +
      30
      +
      198
      +
      40
      +
      0
      +
      +
    • +
    • In compiling the compiler, there is no significant difference in compilation +speed between maxIter=10 and maxIter=40. Actually, my measurements showed +that maxIter=40 was a tad faster than maxIter=10! Also 96% of the  files +in the compiler reach a fix point within 13 iterations, so fixing maxIter +at 40, while high, is okay.
    • +
    • Fixed the bug described in blume-20010920-slowfp. The fix involves 
    • +
        +
      1. generating FCOPYs in FSTP in ia32-svid
      2. +
      3. marking a CALL with the appropriate annotation
      4. +
      +
    • (x86-fast-fp is still off by default.)
    • +
    • X86RA now uses a valid (instead of dummy) PrintFlowgraph module.
    • +
    • The representation of a program point never expected to see more than +65536 instructions in a basic block!  Fixed.
    • +
    +
    CM
    +
      +
    • avoid "/../" in filenames as much as possible (but only where it is +safe)
    • +
    • experimental implementation of "portable graphs" (See the proposal +in +http://cm.bell-labs.com/cm/cs/who/blume/pgraph/proposal.pdf for details +on portable library graphs.)
    • +
    • there are also new libraries $/pgraph.cm and $/pgraph-util.cm
    • +
    • CM now implements a simple "set calculus" for specifying export lists. +In particular, it is now possible to refer to the export lists of other libraries/groups/sources +and form unions as well as differences. See the latest +CM manual for details.
    • +
    • A separate notion of "proxy" libraries has been eliminated from CM's +model.  (Proxy libraries are now simply a special case of using the +export list calculus.)
    • +
    +
    compiler
    +
      +
    • routed the name of the current source file to mlriscgen where it gets +directly emitted into the code object (This uses the new pseudo-op infrastructure +of MLRISC.)
    • +
    +
    runtime:
    +
      +
    • Underscore patch from Chris Richards (fixing problem with compiling +runtime system under recent NetBSD).
    • +
    • Removed handling of code object name strings from runtime code. (The +only remaining code-name-string-related code in the runtime is for extracting/reading +of same.)
    • +
    +
    CKIT:
    +
      +
    • Changed the "Function" constructor of type Ast.ctype to carry optional +argument identifiers.
    • +
    • Changed the return type of TypeUtil.getFunction accordingly.
    • +
    • Type equality ignores the argument names.
    • +
    • TypeUtil.composite tries to preserve argument names but gives up quickly +if there is a mismatch.
    • +
    +
    installation script:
    +
      +
    • attempts to use "curl" if available (unless "wget" is available as +well)
    • +
    • put relative anchor names for tool commands into pathconfig
    • +
    +
    run script:
    +
      +
    • Changed config/_run-sml (resulting in a changed bin/.run-sml) so that +arguments that contain delimiters are passed through correctly. This change +also means that all "special" arguments of the form @SMLxxx... must come +first.
    • +
    +
    NLFFI-LIB:
    +
      +
    • some cleanup (all cosmetic)
    • +
    +
    NLFFIGEN:
    +
      +
    • temporarily disabled the mechanism that suppresses ML output for C +definitions whose identifiers start with an underscore character
    • +
    • generate val bindings for enum constants
    • +
    • user can request that only one style (light or heavy) is being used; +default is to use both (command-line arguments: -heavy and -light)
    • +
    • fixed bug in handling of function types involving incomplete pointers
    • +
    • generate ML entry points that take record arguments (i.e., using named +arguments) for C functions that have a prototype with named arguments (see +changes to CKIT)
    • +
    +
    Basis implementation:
    +
      +
    • provided a non-hook implementation of exnName (at the toplevel) and +made the "dummy" implementation of exnMessage (at the toplevel) more useful: +if nothing gets "hooked in", then at least you are going to see the exception +name and a message indicating why you don't see more.
    • +
        +
      • [For the time being, programs that need exnMessage and want to use +ml-build should either use General.exnMessage (strongly recommended) or refer +to structure General at some other point so that CM sees a static dependency.]
      • +
      +
        +
      • [Similar remarks go for "print" and "use":  If you want to use +their functionality in stand-alone programs generated by ml-build, then use +TextIO.output and Backend.Interact.useFile (from $smlnj/compiler.cm)
      • +
      +
    +
    ml-yacc/ml-lex/ml-burg:
    +
      +
    • Don't use "exnMessage" -- use "General.exnMessage"! (see "Basis implementation")
    • +
    +

    + +

    +
    + + + diff --git a/doc/html/readme/110.38-README.html b/doc/html/readme/110.38-README.html new file mode 100644 index 0000000..788a809 --- /dev/null +++ b/doc/html/readme/110.38-README.html @@ -0,0 +1,108 @@ + + + + SML/NJ 110.38 NEWS + + + + + + + +
    +

    S M L / N J   1 1 0 . 38   + N E W S

    +
    +
    + +
    January 28, 2002
    +
    +
    + +
    WARNING This version is intended for compiler hackers. +The version ought to be stable, however we have not run our full regression +testing. + http://cm.bell-labs.com/cm/cs/what/smlnj/index.html
    +
    +
    + +
    +

    Summary:

    + +
      +
    • Major internal improvements to MLRISC.
    • +
    • A re-worked FFI.
    • +
    • Bug fixes.
      +
    • + +
    + +
    +
    +

    Details:

    +
      +
    • +

      FFI:

      +
    • +
    +
    +
      +
    • A complete overhaul of the library interfaces and the glue-code generator +(ml-nlffigen).  The implementation now scales much better, making it +possible to target huge C libraries such as gtk+ etc. (See the README file +under src/ml-nlffigen.)
    • +
    • ML represenation types have been streamlined.
    • +
    • Getter and setter functions work with concrete values, not abstract + ones where possible.
    • +
    • WORD-style bit-operations on INTEGER representations added.
    • +
    • Ml-nlffigen command line more flexible (see README file).
    • +
    • C_Debug added as an alternative to structure C.  The difference +is that it always checks for NULL on pointer-dereference operations.
    • +
    • DynLinkage.open_lib' added.  This function takes an additional +list of library handles.  This is used for expressing inter-library +(symbol-resolution-)dependencies.
    • +
    • bug fixes
    • +
    +
    +
      +
    • +

      MLRISC:

      +
    • +
        +
      • There is a dramatic simplification in the interface to the register +allocator for RISC architectures as a result of making parallel copy instructions +explicit. 
      • +
      • Bugs in x86-fast-fp have been fixed.  It is now turned on +by default in SML/NJ.
      • +
      • Changed the representation of instructions from being fully abstract + to being partially concrete.  See HISTORY file for more details.
      • +
      • Removed the native COPY and FCOPY instructions from all the architectures +and replaced it with the explicit COPY instruction from the previous commit.
      • +
      • Since COPY instructions are no longer native to the architecture, +a generic functor can be used to implement the expandCopies function.
      • +
      • Allowed EXPORT and IMPORT pseudo-op declarations to appear inside +a TEXT segment.
      • +
      • bug fixes
        +
      • +
      +
    +
      +
    • +

      misc:

      +
    • +
        +
      • Allen Leung's "nowhere" tool added.
      • +
      • Minor modifications to pgraph.
      • +
      • rounding mode-related bug fixed in Basis library
      • +
      +
    + +
    + +

    + +

    +
    + + + diff --git a/doc/html/readme/110.39-README.html b/doc/html/readme/110.39-README.html new file mode 100644 index 0000000..8cb43cc --- /dev/null +++ b/doc/html/readme/110.39-README.html @@ -0,0 +1,118 @@ + + +SML/NJ 110.39 NEWS + + +
    +			S  M  L   /   N  J
    +
    +                  1  1  0  .  3  9      N  E  W  S
    +			
    +  		         February 15, 2002
    +
    +			      WARNING
    +
    +  	This version is intended for compiler hackers. The 
    +	version ought to be stable, however we have not run
    +	our full regression testing.
    +
    +        http://cm.bell-labs.com/cm/cs/what/smlnj/index.html
    +
    + +
    +
    Summary: +
    Important bug fixes, improvements to libraries, FFI generator, + and installer.
    +
    + +
    + +

    Details:

    + +

    MLRISC:

    + +
      +
    • Important bug fixes. (Isabelle now compiles again on the x86.) +
    • Compilers that generate assembly code may produce global labels + whose value is resolved at link time. The various peephole optimization + modules did not take this in account. +
      + TODO: The Labels.addrOf function should really return an option + type so that clients are forced to deal with this issue, rather + than an exception being raised.
    • +
    • Some internal reorganization.
    • +
    • c-calls API changed: accept a client-callback for allocating + extra stack space. TODO: x86 version currently ignores it.
    • + +
    • Fixed bug in sparc-ccalls.
    • +
    + +

    Basis:

    + +
      +
    • Added missing implementations for List.collate and Option.app.
    • +
    + +

    SML/NJ library:

    + +
      +
    • Added priority queue implementation to smlnj-lib.cm.
    • +
    + +

    FFI:

    + +Various improvements to ml-nlffigen: + +
      +
    • improved name encoding scheme that avoids most "global" gensym +counters
    • +
    • don't write over files unless their contents would change
    • +
    • implemented a "repository" mechanism for dealing with "incomplete + pointers" that are to be shared between different runs of + ml-nlffigen
    • +
    • "-match" option requires an exact match now
    • +
    • improved internal datastructures, resulting in slight speedup
    • +
    • bug fixes
    • +
    + +

    Compiler/CM:

    + +
      +
    • A new PID (persistent ID) generation scheme is place. This + fixes the long-standing (but little-known) problem that it was + possible to defeat ML's type abstractions because types in + different compilation units could be mistaken for one another. +
      + See the HISTORY file and click here + for more information.
    • +
    + +

    Installer:

    + +
      +
    • Installation instructions added (file INSTALL).
    • +
    • Syntax in config/targets changed.
    • +
    • Installer automatically satisfies inter-target dependencies.
    • +
    • Build hash-cons-lib.cm if smlnj-lib was selected.
    • +
    • Installer can run in "quiet" mode.
    • +
    • Bug fixes.
    • +
    + +

    REPL:

    + +
      +
    • Added functions CM.State.showBindings and EnvRef.listBoundSymbols +for exploring available toplevel bindings.
    • +
    + +

    Misc:

    + +
      +
    • GenSML.gen now makes generation of topl-level "local" optional + (because it is not Standard ML).
    • +
    • bug fixes in "nowhere" tool
    • +
    + + + diff --git a/doc/html/readme/110.40-README.html b/doc/html/readme/110.40-README.html new file mode 100644 index 0000000..a64fb61 --- /dev/null +++ b/doc/html/readme/110.40-README.html @@ -0,0 +1,172 @@ + + +SML/NJ 110.40 NEWS + + +
    +			S  M  L   /   N  J
    +
    +                  1  1  0  .  4  0      N  E  W  S
    +			
    +  		           May 21, 2002
    +
    +			      WARNING
    +
    +  	This version is intended for compiler hackers. The 
    +	version ought to be stable, however we have not run
    +	our full regression testing.
    +
    +        http://cm.bell-labs.com/cm/cs/what/smlnj/index.html
    +
    + +
    +
    Summary: +
    Significant work in the MLRISC backend. Many improvements + and bugfixes in various parts of the implementation.
    +
    + +
    + +

    Details:

    + +

    Libraries:

    + +
      +
    • added $/controls-lib.cm, a library of configurable "controls" +
      (In essence, controls are managed ref cells that can be + grouped in sets and put in "registries" to facilitate + configuration and customization via, e.g., environment variables + and command-line arguments.) +
      $/controls-lib.cm is a distillation of what used to be a CM-only + mechanism. It is now used throughout the compiler. +
    • bug fixed in IntInf (parsing of hex literals) +
    • == and != added to INT_INF interface (just + placeholders for now) +
    • added mergeWith function to ORD_MAP interface +
    • added an implementation of Danvy-style format combinators to $/smlnj-lib.cm +
    • priority queues added to Util library (using Okasaki's leftist-tree + implementation) +
    • updates to $/hash-cons-lib.cm (consR1 ... consR5 added) +
    • fixed bug in outputSubstr (in TextIO) +
    • new scrollbar implementation (eXene) courtesy of Allen Stoughton +
    + +

    Basis:

    + +
      +
    • implemented Knuth-Morris-Pratt string matching and used it + for String.isSubstring, Substring.isSubstring, and Substring.position. +
    • added {String,Substring}.{concatWith,isSuffix,isSubstring} and + Substring.full +
    + +

    MLRISC:

    + +
      +
    • jump chaining +
    • static block frequency calculation +
    • Ball-Larus branch prediction heuristics implemented +
    • graphical viewer for control flow graphs added +
    • treatment of pseudo-ops changed (get emitted first now) +
    • REMT removed from mltree; DIV and MOD now take a rounding + mode (DIV_TO_ZERO and DIV_TO_NEGINF) +
    • made generic implementations of DIV_TO_NEGINF more efficient +
    • CFG edge splitting added +
    • buggy parts of x86-fp recoded +
    • added instructions for 64-bit support on x86 (not enabled yet) +
    + +

    CM:

    + +
      +
    • a new PID generation scheme is in place; it guarantees freedom + from accidental clashes that could subvert the type system but + is "stable" enough to maintain cutoff recompilation properties +
    • documentation updates +
    • minor bug fixes +
    • lambda-split aggressiveness can now be specified on a per-file + basis in init.cmi as well +
    • CM is now aware of (_)overload, so it calculates dependecies for + it correctly +
    • support for non-0 entrypoint offsets added +
    • command-line argument handling for showing/setting controls added +
    • bug in command-line handling of noweb (in noweb tool) fixed +
    • improved handling of pathnames in make tool +
    + +

    Runtime:

    + +
      +
    • fix for bug 1131 (excessive heap usage) +
    • heap export now silent unless GC messages are on +
    • fixed assembly code so that X86.prim.asm now compiles without + warnings +
    • support for non-0 entrypoint offsets added +
    + +

    Compiler:

    + +
      +
    • cross-module inlining works much better now +
    • stopgap fix for problem with ppDec in the presence of multiple + declarations of the same type name within one compilation unit +
    • full support for div/mod/rem/quot added to x86 code generator +
    • primops added for divisions of all flavors; track those operations + to the backend instead of clumsily macro-expanding them out + at "translate" time +
    • INLMIN, INLMAX, INLABS parameterized by numkind; use this + to implement all flavors of min, max, and abs + (Real.abs still maps to a separate FP primop.) +
    • preliminary support for "raw record" allocation and reentrant + C function calls added +
    • conditional move added to cps (still buggy, therefore disabled) +
    • _overload now accepted as synonym for overload (when overloading + is enabled); this is for backward-compatibility +
    • support for non-0 entrypoint offsets added; this (or something + similar) is needed to be able to deal with block reordering +
    • new, better scaling CPS spill phase +
    • compiler "knows" its native C calling convention (no longer needs + to be told by ml-nlffigen); X86Backend now split into X86CCallBackend + and X86StdCallBackend +
    + +

    FFI:

    + +
      +
    • all C FFI libraries now anchored under $c +
    • tweaks to type encoding (use polymorphism to emulate subtyping) +
    • encoding of incomplete type is now equal to that of the + corresponding complete type; no more POINTER_TO_INCOMPLETE_TYPE + etc. + (This restores the full generality of the translation -- something + that was lost when we removed functors.) +
    • support for (optional) mapping enum types to datatypes added +
    • types in structures C and C_Debug are now equal +
    • added "witness value" mechanism to ml-nlffi-lib.cm to + implement efficient arbitrary-depth "casts" that strip away + or add "const" modifiers +
    • ML representation of "" changed so that conversion to C string + is very efficient +
    + +

    Installer/scripts:

    + +
      +
    • suggestions for more preloads added to config/preloads +
    • don't use ?_DEFAULT environment variables. (They no longer + exist since we switched to $/controls-lib.cm.) +
    • buggy transitive-closure algorithm fixed; it caused more things + than necessary to be compiled +
    + +

    Miscellanea:

    + +
      +
    • minor bug fixes and improvements to mlrisc tools +
    • mdl tool compiles again, but without anything that depends on + RTL specs (since this is currently broken) +
    • added #[...] expressions to mlrisc tools +
    + + + diff --git a/doc/html/readme/110.41-README.html b/doc/html/readme/110.41-README.html new file mode 100644 index 0000000..f6add04 --- /dev/null +++ b/doc/html/readme/110.41-README.html @@ -0,0 +1,47 @@ + + +SML/NJ 110.41 NEWS + + +
    +			S  M  L   /   N  J
    +
    +                  1  1  0  .  4  1      N  E  W  S
    +			
    +  		           July 5, 2002
    +
    +			      WARNING
    +
    +  	This working version is believed to be stable, but
    +	we have not run our full regression-test suite.
    +
    +        http://cm.bell-labs.com/cm/cs/what/smlnj/index.html
    +
    + +
    +
    Summary: +
    A number of small, but important bug fixes.
    +
    + +
    + +

    Details:

    + +
      +
    • Fixed a bug in MatchTree.nth (regexp library). +
    • Fixed a long-standing memory leak in the GC where a program + with fixed-sized live data could suffer from unbounded VM + growth (bugs 1119 and 1131). Note that we thought that this + bug was fixed in 110.40, but we were wrong. +
    • Fixed inlined versions of Real.min and Real.max. +
    • Resurrected SMLofNJ.Internals.BTrace.mode, which had gone missing. +
    • Added missing implementation of IEEEReal.fromString. +
    • Added missing implementations of + OS.Path.{from,to}UnixPath. +
    • Exported structure BTImp from + $smlnj/viscomp/debugprof.cm so that other clients can + set up backtracing support. +
    + + + diff --git a/doc/html/readme/110.42-README.html b/doc/html/readme/110.42-README.html new file mode 100644 index 0000000..6e6da82 --- /dev/null +++ b/doc/html/readme/110.42-README.html @@ -0,0 +1,46 @@ + + +SML/NJ 110.42 NEWS + + +
    +			S  M  L   /   N  J
    +
    +                 1  1  0  .  4  2      N  E  W  S
    +			
    +                          October 16, 2002
    +
    +  	This working version is believed to be stable, but
    +	we have not run our full regression-test suite.
    +
    +        http://cm.bell-labs.com/cm/cs/what/smlnj/index.html
    +        ftp://ftp.research.bell-labs.com/dist/smlnj/working/110.42/
    +
    + +
    +
    Summary: +
    A number of small, but important bug fixes.
    +
    + +
    + +

    Details:

    + +
      +
    • Fixed a space leak in the CML implementation of TextIO and BinIO.
    • +
    • Fixed the handling of %g in the Format library module.
    • +
    • Ported to MacOS X 10.2 (Jaguar). The MacOS X 10.1 and 10.2 runtime +systems are different. The former is called run.ppc-darwin5 +and the latter is run.ppc-darwin.
    • +
    • Fixed a bug in division by zero on the PowerPC.
    • +
    • Sml now senses the SMLNJ_HOME environment variable. +If this is set, +then the bin dir is assumed to be in $SMLNJ_HOME/bin and (unless +CM_PATHCONFIG is also set), the path configuration file is +assumed to be in $SMLNJ_HOME/lib/pathconfig. This way one can +easily move the entire tree to some other place and everything will +``just work''. Companion commands such as ml-build and +ml-makedepend also sense this variable.
    • +
    + + diff --git a/doc/html/readme/110.43-README.html b/doc/html/readme/110.43-README.html new file mode 100644 index 0000000..80a46b7 --- /dev/null +++ b/doc/html/readme/110.43-README.html @@ -0,0 +1,136 @@ + + +SML/NJ 110.43 NEWS + + +
    +			S  M  L   /   N  J
    +
    +                  1  1  0  .  4  3      N  E  W  S
    +			
    +  		         September 9, 2003
    +
    +			      WARNING
    +
    +  	This working version is believed to be stable, but
    +	we have not run our full regression-test suite.
    +
    +             HOME:  http://www.smlnj.org/index.html
    +             FILES: http://smlnj.cs.uchicago.edu/dist/working/110.43/ 
    +
    + +

    Summary:

    + +In addition to a number of important bugfixes, this working version +brings us a big step closer to a new full release: the win32 port works +again (but requires extensive testing!), IntInf.int has been put into +the Basis (with compiler support), and many Basis interfaces have +been updated or added in accordance with the Basis spec. + +
    + +

    Details:

    + +
    +
    Basis: +
    +
      +
    • IntInf implemented; LargeInt = IntInf; FixedInt = Int32
    • +
    • added some missing Real64 operations (in particular, Real.toLargeInt)
    • +
    • a lot of work went into bringing the Basis closer to the spec: + missing functions added, types adjusted, *Slice modules + written and added
    • +
    • new implementation of structure Time based on IntInf
    • +
    • major overhaul of code in structure Date; added missing functionality
    • +
    • various bug fixes
    • +
    • a lot of cleanup work
    • +
    + +
    installer: +
    +
      +
    • large parts of installer script written in ML for portability
    • +
    • new installer script for win32 (uses above ML code)
    • +
    • new installer script for *nix that uses the above ML code
    • +
    • new target "mlrisc" (MLRISC modules that don't get compiled + into the SML/NJ compiler but are useful for other projects + such as Moby)
    • +
    + +
    runtime: +
    +
      +
    • compiles and works under Win32 again
    • +
    + +
    CM: +
    +
      +
    • no longer gets confused by files that "change their identity" + (e.g., by getting unlinked and recreated by a text editor)
    • +
    • parse-errors in init group (bootstrap compiler) no longer get + swallowed
    • +
    + +
    Compiler: +
    +
      +
    • compiler support for IntInf.int
    • +
    • exported structure Typecheck from $smlnj/viscomp/core.cm
    • +
    • added casse for IF, WHILE, ANDALSO, ORELSE to Absyn; improves + error messages and simplifies translation
    • +
    • flag added for treating non-exhaustive bindings as errors
    • +
    • improved organization of structure InlineT
    • +
    • important bug fixes
    • +
    + +
    interactive system: +
    +
      +
    • Control.Print.intinfDepth control max length of intinf constants
    • +
    + +
    global: +
    +
      +
    • source tree "cleaned" so that CMB.make goes through without any + warning other than polyequal warnings; (not quite true for + win32 version which still needs some more cleaning)
    • +
    • many unnecessary calls of polyequal removed
    • +
    • started using IntInf.int literals where appropriate
    • +
    + +
    ml-nlffigen: +
    +
      +
    • exports structures ST_* corresponding to incomplete types
    • +
    + +
    CML: +
    +
      +
    • compiles under Win32 again
    • +
    + +
    eXene: +
    +
      +
    • now also compiles under Win32 -- but is untested; + (it is still an X toolkit, so you need to be able to + connect to an X server to test it)
    • +
    + +
    nowhere: +
    +
      +
    • compiles under Win32
    • +
    + +
    ckit: +
    +
      +
    • mismatched uses of LargeInt vs. Int32 fixed
    • +
    +
    + + diff --git a/doc/html/readme/110.44-README.html b/doc/html/readme/110.44-README.html new file mode 100644 index 0000000..2bf324e --- /dev/null +++ b/doc/html/readme/110.44-README.html @@ -0,0 +1,132 @@ + + + + SML/NJ 110.44 NEWS + + +
    			S  M  L   /   N  J

    1 1 0 . 4 4 N E W S

    November 6, 2003

    WARNING

    This working version is believed to be stable, but
    we have not run our full regression-test suite.

    HOME: http://www.smlnj.org/index.html
    FILES: http://smlnj.cs.uchicago.edu/dist/working/110.44/
    +

    Summary:

    +Again, there are a number of important bug fixes and updates, mostly to +the Basis library and the installer.
    +
    +
    +

    Details:

    +
    +
    Basis:
    +
    +
      +
    • fixed bugs and improved code for Real.{from,to}LargeInt
    • +
    • fixed bug in Posix.ProcEnv.times
    • +
    • changed inputLine +functions to return an option
    • +
    • implemented native + int32->real64 +conversion
    • +
    • slices implemented according to spec
    • +
    • Time.{from,to}NanoSeconds +added to Time
    • +
    • made SOCKET API +match the spec
    • +
    • OS-specific details of socket implementation moved into +separate files (this makes it possible to share the bulk of the code +between Unix and Win32)
    • +
    • added signature SYNCHRONOUS_SOCKET +to Basis
    • +
    • precision of real64 is 53 (not 52)
    • +
    • added openVector, + nullRd, and nullWr to PRIM_IO
    • +
    +
    +
    installer:
    +
    +
      +
    • new installer (which is mostly written in SML) is no longer +optional (old installer has been removed)
    • +
    • the ML code of the installer is now in its own library and +gets compiled by CMB.make; +the installer script simply invokes sml -m +$smlnj/installer.cm
      +(the ML code does not have to be compiled as +part +of the installation process)
    • +
    • minor improvements and bugfixes
    • +
    • sense environment variable CM_DIR_ARC that lets one +override +the default name for CM meta-data (default is .cm -- see CM section)
    • +
    • sense value of INSTALLDIR +environment variable and behave +accordingly
    • +
    • dont_move_libraries +directive eliminated from + config/targets +(This was a legacy mechanism which turned out to be +broken anyway.)
    • +
    +
    +
    Scripts:
    +
    +
      +
    • made .bat-files +Win95-compatible
    • +
    • changed default @SMLalloc +parameter for x86/celeron to 64k +
    • +
    +
    +
    CM:
    +
    +
      +
    • default name of meta-data directory changed from CM to .cm
    • +
    • a different name can be chosen at installation time
    • +
    • added missing wrapper for privilege primitive in + $smlnj/viscomp/core.cm
    • +
    +
    +
    CML:
    +
    +
      +
    • tracked Basis changes (non-blocking Socket functons are left +out since they are redundant: CML_SOCKET +is defined in terms of + SYNCHRONOUS_SOCKET)
    • +
    • make use of non-blocking socket functions from Basis; CML +socket implementation is now OS-independent
    • +
    +
    +
    Interactive Loop:
    +
    +
      +
    • added hook to prettyprinter so it can invoke the autoloader +if a qid to be printed is not currently bound in the toplevel +environment; this eliminates many instances where the prettyprinter +used to print ? in front +of a name
    • +
    +
    +
    runtime
    +
    +
      +
    • works with Mac OS X 10.3 (Panther)
    • +
    +
    +
    MLRISC
    +
    +
      +
    • minor bug fixes
    • +
    +
    +
    + + diff --git a/doc/html/readme/110.45-README.html b/doc/html/readme/110.45-README.html new file mode 100644 index 0000000..ecae456 --- /dev/null +++ b/doc/html/readme/110.45-README.html @@ -0,0 +1,54 @@ + + + + SML/NJ 110.45 NEWS + + +
    			S  M  L   /   N  J

    1 1 0 . 4 5 N E W S

    February 13, 2004

    WARNING

    This working version is believed to be stable, but
    we have not run our full regression-test suite.

    HOME: http://www.smlnj.org/index.html
    FILES: http://smlnj.cs.uchicago.edu/dist/working/110.45/
    +

    Summary:

    +This is a maintenance release which fixes some minor problems, most +notably an incompatibility with GCC 3.3.
    +
    +

    Details:

    +Basis:
    +
      +
    • Timer interface +changed in anticipation of a change to the spec
    • +
    • POSIX_FLAGS replaced +with BIT_FLAGS
    • +
    • type of top-level app +corrected
    • +
    • other minor discrepancies with spec fixed
    • +
    +runtime:
    +
      +
    • added vp_limitPtrMask +to win32-specific code
    • +
    • added win32 Sleep +function to runtime
    • +
    • dropped "-ansi" +flag from gcc invocation when used on assembly code (fixes problem +related to gcc 3.3)
    • +
    +interactive loop:
    +
      +
    •   - exception reporting improved (don't show part of +exception history trace that reaches into the implementation of the +toplevel loop itself)
    • +
    +MLRISC:
    +
      +
    • preliminary support for c-calls +interface on PPC architecture added (not yet used for NLFFI)
    • +
    • bug in getreg fixed
    • +
    +compiler:
    +
      +
    • code generation for int32 +comparisons added
      +
    • +
    + + diff --git a/doc/html/readme/110.46-README.html b/doc/html/readme/110.46-README.html new file mode 100644 index 0000000..0fc4876 --- /dev/null +++ b/doc/html/readme/110.46-README.html @@ -0,0 +1,128 @@ + + + +SML/NJ 110.46 NEWS + + + +
    +                         S  M  L   /   N  J
    +
    +                 1  1  0  .  4  6      N  E  W  S
    +			
    +                            June 17, 2004
    +
    +                               WARNING
    +
    +        This working version is believed to be stable, but
    +        we have not run our full regression-test suite.
    +
    + +
    +
    HOME:
    +
    http://www.smlnj.org/index.html
    +
    FILES:
    +
    http://smlnj.cs.uchicago.edu/dist/working/110.46/
    +
    + +

    Summary:

    + +This release fixes some bugs, moves the Basis implementation closer +to conformance with the specification (i.e., the upcoming Basis Library +book by Gansner and Reppy), and enhances some of the functionality by +incorporating contributions from our users. + +
    + +

    Details:

    + +Basis: +
    +
      +
    • interface to structure Timer now matches spec
    • +
    • interface to structure Unix now matches spec
      + (Caveat: There probably still is a problem with the spec - + concerning the behavior of reap as well as + {text,bin}{In,Out}streamOf + when they get called multiple times. However, fixing this will + in all likelihood not change any types.)
    • +
    • signature PACK_REAL added
    • +
    • functor PrimIO exported
    • +
    • added Posix.IO.mk{Bin,Text}{Reader,Writer} + by lifting their respective implementations from internal modules + PosixBinPrimIO and PosixTextPrimIO
    • +
    • exceptions Option and Option.Option are now + identical (as they should be)
    • +
    • bug in IntInf.fmt fixed
    • +
    + +
    +Compilation Manager: +
    + +
      +
    • CM now ignores (but still syntactically accepts) the "owner" + information in group descriptions. It continues to enforce the + "single owner rule" for groups within each run of CM.make, but + can no longer do so across multiple runs. (Fortunately, there is + no fundamental problem with this.) The advantage of the new + scheme is that the programmer no longer needs to provide this + awkward piece of information.
    • +
    • fixed IEEEReal.scan (and .fromString) so that if there is an + overflow in the exponent calculation we get INF or ZERO + (depending on the mantissa and the sign of the exponent)
    • +
    + +
    +Windows port: +
    + +
      +
    • incorporated a voluminous patch kindly provided by David Hansel + from Reactive Systems, implementing previously missing support + for many socket-related functions
    • +
    + +
    +Command-line tools: +
    + +
      +
    • arranged for ml-build to clean up after itself a little bit better + (The script generates a temporary SML source file and + compiles it using CM, so CM generates metadata (GUID, SKEL, + objectfile) for it. It now gets rid of those at the end, so they + don't accumulate under .cm.)
    • +
    • ml-build now terminates with a non-0 status when something goes wrong
    • +
    + +
    +Installer: +
    + +
      +
    • bugs fixed
    • +
    • src-smlnj now recognized as a valid target (in config/targets) again + (The meaning of this has changed from "all sources required for the + compiler" to "all sources the installer knows about".)
    • +
    + +
    +NLFFI: +
    + +
      +
    • support for NetBSD added (thanks to Vesa A. Norrman)
    • +
    • ml-nlffi-lib made to run on cygwin
    • +
    + +
    +Compiler internals: +
    + +
      +
    • as per request by Adam Chlipala, extended + various export lists in compiler-related .cm-files
    • +
    + + diff --git a/doc/html/readme/110.47-README.html b/doc/html/readme/110.47-README.html new file mode 100644 index 0000000..ad19ae6 --- /dev/null +++ b/doc/html/readme/110.47-README.html @@ -0,0 +1,129 @@ + + +SML/NJ 110.47 NEWS + + + +
    +                         S  M  L   /   N  J
    +
    +                   1  1  0  .  4  7      N  E  W  S
    +			
    +                             August 4, 2004
    +
    +                                WARNING
    +
    +        This working version is believed to be stable, but
    +        we have not run our full regression-test suite.
    +
    + +
    +
    HOME:
    +
    http://www.smlnj.org/index.html
    +
    FILES:
    +
    http://smlnj.cs.uchicago.edu/dist/working/110.47/
    +
    + +

    Summary:

    + +The most important and noticable changes in 110.47 concern preliminary +implementations of the NLFFI foreign-function interface under Mac OS X +(PowerPC) and Win32 (x86). There are also a number of important bug +fixes. + +
    + +

    Details:

    + +
    +
    NLFFI:
    +
    +
      +
    • preliminary support for Mac OS X added
    • +
    • preliminary support for Win32 added + (untested; + currently can only interface with C functions using the stdcall + calling conventions)
    • +
    • function arguments that are C unions are now handled
    • +
    • various minor bug fixes to ml-nlffigen
    • +
    • preliminary documentation for nlffi and ml-nlffigen + (currently only describes the output of ml-nlffigen; + see src/ml-nlffi-lib/Doc/manual/nlffi.tex)
    • +
    +
    + +
    Compilation Manager:
    +
    +
      +
    • made make commend in CM's make tool + configurable
    • + +
    • added option (default: on) for passing the name of + the SML/NJ's bin directory to make; + This can be used by the Makefile to, e.g., pick the "right" version + of ml-nlffigen.
    • +
    +
    + +
    Basis:
    +
    +
      +
    • Changed the implementation of structure Unix so that the + same stream is returned every time one of the + {text,bin}{In,Out}streamOf functions is invoked on the + same proc. (NOTE: This is not what the spec says, so it will + probably change again. Don't rely on it!)
    • +
    +
    + +
    Windows port:
    +
    +
      +
    • bug fixes: +
        +
      • NULL test in gmtime and localtime
      • +
      • toSeconds -> toMilliseconds in + win32-process.sml (Basis implementation)
      • +
      +
    +
    + +
    Installer:
    +
    +
      +
    • eliminated some shell syntax not understood by + /bin/sh under Solaris
    • +
    +
    + +
    Compiler internals:
    +
    +
      +
    • low-level support for choosing C calling conventions by + twiddling the type of of the rawccall primop (see + src/compiler/Semant/types/cproto.sml for details)
    • +
    • use paramAlloc to report c-calls with too many + arguments (for PPC version where parameter area is + pre-allocated)
    • +
    • added ccall_maxargspace to machspec (to implement + the above)
    • +
    +
    + +
    MLRISC:
    +
    +
      +
    • redesigned c-calls interface
    • +
    • c-calls implementation for ppc-macosx added
    • +
    • Added these instructions to the PowerPC architecture: + LWARX, STWCX, LBZU(X), + LHZU(X), LWZU(X), STWU(X), + STFDU, STFSU, etc. + (Instruction encodings not added yet, though.)
    • +
    • loop-structure.sml has been rewritten
    • +
    +
    +
    + + + diff --git a/doc/html/readme/110.48-README.html b/doc/html/readme/110.48-README.html new file mode 100644 index 0000000..f55f6b0 --- /dev/null +++ b/doc/html/readme/110.48-README.html @@ -0,0 +1,72 @@ + + +SML/NJ 110.48 NEWS + + + +
    +                         S  M  L   /   N  J
    +
    +                   1  1  0  .  4  8      N  E  W  S
    +			
    +                             August 10, 2004
    +
    +                                WARNING
    +
    +        This working version is believed to be stable, but
    +        we have not run our full regression-test suite.
    +
    + +
    +
    HOME:
    +
    http://www.smlnj.org/index.html
    +
    FILES:
    +
    http://smlnj.cs.uchicago.edu/dist/working/110.48/
    +
    + +

    Summary:

    + +This is a bugfix release. Its main purpose is to solve a showstopper +problem introduced into the x86 c-calls implementation which is used +by NLFFI. + +
    + +

    Details:

    + +
    +
    Interactive System:
    +
    +
      +
    • Command-line arguments are handled slightly differently: +
        +
      • system automatically quits after processing -S, + -H, -sn or -hn if one + of these appears as the last command-line argument
      • +
      • new option -q which forces a quit
      • +
      +
    • +
    +
    + +
    Basis:
    +
    +
      +
    • Race condition in signal handling code fixed.
    • +
    +
    + +
    MLRISC:
    +
    +
      +
    • fixed problem with c-calls implementation on x86, preventing the + compilation of calls of any C function returning any form of int
    • +
    • bug in dijkstra.sml fixed
    • +
    • bug in udgraph.sml fixed
    • +
    • some support for IBM assempler syntax (PPC) added
    • +
    +
    +
    + + + diff --git a/doc/html/readme/110.49-README.html b/doc/html/readme/110.49-README.html new file mode 100644 index 0000000..4570244 --- /dev/null +++ b/doc/html/readme/110.49-README.html @@ -0,0 +1,89 @@ + + +SML/NJ 110.49 NEWS + + + +
    +                         S  M  L   /   N  J
    +
    +                   1  1  0  .  4  9      N  E  W  S
    +			
    +                          September 13, 2004
    +
    +                                WARNING
    +
    +        This working version is believed to be stable, but
    +        we have not run our full regression-test suite.
    +
    + +
    +
    HOME:
    +
    http://www.smlnj.org/index.html
    +
    FILES:
    +
    http://smlnj.cs.uchicago.edu/dist/working/110.49/
    +
    + +

    Summary:

    + + This is a bugfix release. Its main purpose is to solve a another + showstopper problem introduced into the x86 c-calls + implementation which is used by NLFFI. It also fixes a problem + with the signal masking/unmasking code. + +
    + +

    Details:

    + +
    +
    Basis:
    +
    +
      +
    • Fixed the signal masking code to properly nest mask/unmask + operations on a per-signal basis. +
    • +
    +
    + +
    MLRISC:
    +
    +
      +
    • fixed problem in x86/c-calls that caused NLFFI to generate + bogus code
    • +
    • bug fix in udgraph.sml
    • +
    • The IBM/MacOS syntax switch on PPC was incorrectly + swapped. Fixed.
    • +
    +
    + +
    Runtime:
    +
    +
      +
    • heap image magic number updated + (causes a graceful exit rather than a crash when an outdated + heap image is being loaded)
    • +
    • Cygwin support
    • +
    +
    + +
    Libraries:
    +
    +
      +
    • added some exports to src/compiler/core.cm
    • +
    • added a ptreql functon to structure InlineT + (exported from $smlnj/init/init.cmi)
    • +
    +
    + +
    Installer:
    +
    +
      +
    • falls back to copying when renaming of files fails + (usually this happens when source and destination are in different + file systems)
    • +
    +
    +
    + + + diff --git a/doc/html/readme/110.50-README.html b/doc/html/readme/110.50-README.html new file mode 100644 index 0000000..d5f0303 --- /dev/null +++ b/doc/html/readme/110.50-README.html @@ -0,0 +1,138 @@ + + + +SML/NJ 110.50 NEWS + + + +
    +                         S  M  L   /   N  J
    +
    +                   1  1  0  .  5  0      N  E  W  S
    +			
    +                           October 28, 2004
    +
    +                                WARNING
    +
    +        This working version is believed to be stable, but
    +        we have not run our full regression-test suite.
    +
    + +
    +
    HOME:
    +
    http://www.smlnj.org/index.html
    +
    FILES:
    +
    http://smlnj.cs.uchicago.edu/dist/working/110.50/
    +
    + +

    Summary:

    + + This release fixes important bugs and provides some additional + functionality. + +
    + +

    Details:

    + +
    +
    Basis:
    +
    +
      +
    • Added specialized implementation of Int31.fromString.
    • +
    +
    + +
    MLRISC:
    +
    +
      +
    • Fixed bugs in x86/c-calls related to passing + structs as arguments or results.
    • + +
    • Fixed bug in x86 assembly syntax (FU?COMIP?).
    • +
    +
    + +
    Libraries:
    +
    +
      +
    • Minor enhancements to interfaces exported from the + controls library.
    • + +
    • Added libraries $smlnj-tdp/plugins.cm, + $smlnj-tdp/back-trace.cm, and + $smlnj-tdp/coverage.cm. This moves back-trace support + out of the main compiler and into a library of + trace/debug/profile plugins.
    • + +
    • The test-coverage plugin is new.
    • +
    +
    + +
    Command-line:
    +
    +
      +
    • Added flags -e and -E which print the + names of environment variables that can be used to control + internal settings.
    • + +
    • Added support for tracing, debugging, and profiling (using + stuff from $smlnj-tdp/plugins.cm) for stand-alone + programs. (Stand-alone programs are those constructed using + ml-build.)
    • +
    +
    + + +
    Runtime:
    +
    +
      +
    • Applied patch for setting rounding modes under Mac OS X.
    • +
    +
    + +
    Compiler:
    +
    +
      +
    • PPC backend now silent (by default) even when it uses the "long + form of branch"
    • + +
    • Made the former backtrace-instrumentation pass more generic.
    • + +
    • Some internal cleanup (consolidation of duplicated + functionality).
    • +
    +
    + +
    Compilation Manager:
    +
    +
      +
    • Direct support for lazy sml (keyword lazy) via new + toolclass lazysml and filename suffix + .lml.
    • + +
    • Support for selectively setting arbitrary control flags on + a per-sourcefile basis.
    • + +
    • Fixed handling of CM keywords, thereby eliminating a bug + with the shell tool.
    • + +
    • CM now issues an obsolete feature warning when group files + use the old owner syntax.
    • +
    +
    + +
    Bootstrap compiler:
    +
    +
      +
    • Adde a version tool and some CM magic to have the + file version.sml be auto-generated during + CMB.make.
    • + +
    • Moved version.sml (and above-mentioned magic) into its own + library.
    • +
    +
    +
    + + + diff --git a/doc/html/readme/110.51-README.html b/doc/html/readme/110.51-README.html new file mode 100644 index 0000000..d8da6b5 --- /dev/null +++ b/doc/html/readme/110.51-README.html @@ -0,0 +1,70 @@ + + + +SML/NJ 110.51 NEWS + + + +
    +                         S  M  L   /   N  J
    +
    +                   1  1  0  .  5  1      N  E  W  S
    +			
    +                          November 18, 2004
    +
    +                                WARNING
    +
    +        This working version is believed to be stable, but
    +        we have not run our full regression-test suite.
    +
    + +
    +
    HOME:
    +
    http://www.smlnj.org/index.html
    +
    FILES:
    +
    http://smlnj.cs.uchicago.edu/dist/working/110.51/
    +
    + +

    Summary:

    + + This version adds implementations of structures Int64 and + (inofficially) Word64. + +
    + +

    Details:

    + +
    +
    Basis:
    +
    +
      +
    • Added structure Int64, an implementation of + non-native 64-bit integer arithmetic.
    • + +
    • Added an inofficial structure Word64, an + implementation of non-native 64-bit word arithmetic. + The structure is inofficial in the sense that: +
        +
      • LargeWord is still the same as Word32
      • +
      • the implementations of toLargeWord, + toLargeWordX, and fromLargeWord + are dummy placeholders that raise an exception when called.
      • +
    • +
    + Neither Int64 nor Word64 are very efficient at the moment as they + do not rely on native machine support for 64-bit arithmetic + (and not even on such things as 32-bit multiplications with 64-bit + results). +
    + +
    Compiler
    +
    +
      +
    • Added a few primops in support of the implementation of + 64-bit arithmetic mentioned above.
    • +
    +
    +
    + + + diff --git a/doc/html/readme/110.52-README.html b/doc/html/readme/110.52-README.html new file mode 100644 index 0000000..0e3909d --- /dev/null +++ b/doc/html/readme/110.52-README.html @@ -0,0 +1,84 @@ + + + +SML/NJ 110.52 NEWS + + + +
    +                         S  M  L   /   N  J
    +
    +                   1  1  0  .  5  2      N  E  W  S
    +			
    +                          December 15, 2004
    +
    +                                WARNING
    +
    +        This working version is believed to be stable, but
    +        we have not run our full regression-test suite.
    +
    + +
    +
    HOME:
    +
    http://www.smlnj.org/index.html
    +
    FILES:
    +
    http://smlnj.cs.uchicago.edu/dist/working/110.52/
    +
    + +

    Summary:

    + + Minor additions and improvements to library code; bug fixes. + +
    + +

    Details:

    + +
    +
    Interactive system:
    +
    +
      +
    • improved error messages related to .cm- and + .sml-files that appear as part of the command line
    • +
    +
    + +
    Basis:
    +
    +
      +
    • The system can now (optionally) be compiled with 64-bit position + values (structure Position = Int64). See the + HISTORY file for more information.
    • +
    +
    + +
    Libraries:
    +
    +
      +
    • HashString.hashSubstring added.
    • +
    • Atom.atom' now extracts a string from the given + substring only if it is not already in the table.
    • +
    • Two simple but potentially useful statistics modules added to + SML/NJ library.
    • +
    • Minor performance improvements.
    • +
    +
    + +
    Compiler/MLRISC:
    +
    +
      +
    • bug fixes in PPC backend (does not affect SML/NJ directly)
    • +
    +
    + +
    Scripts:
    +
    +
      +
    • fixed bug in ml-makedepend
    • +
    • tried to make scripts more robust against filenames that contain + whitespace (common under Win32 and Mac OS X)
    • +
    +
    +
    + + + diff --git a/doc/html/readme/110.53-README.html b/doc/html/readme/110.53-README.html new file mode 100644 index 0000000..604ce1c --- /dev/null +++ b/doc/html/readme/110.53-README.html @@ -0,0 +1,123 @@ + + + +SML/NJ 110.53 NEWS + + + +
    +                         S  M  L   /   N  J
    +
    +                   1  1  0  .  5  3      N  E  W  S
    +			
    +                          Februar 25, 2005
    +
    +                                WARNING
    +
    +        This working version is believed to be stable, but
    +        we have not run our full regression-test suite.
    +
    + +
    +
    HOME:
    +
    http://www.smlnj.org/index.html
    +
    FILES:
    +
    http://smlnj.cs.uchicago.edu/dist/working/110.53/
    +
    + +

    Summary:

    + + Bugfixes and minor feature improvements. + +
    + +

    Details:

    + +
    +
    Basis:
    +
    +
      +
    • SMLofNJ.Susp is back.
    • +
    +
    + +
    CML
    +
    +
      +
    • Thread-safe version of structure Atom matches + original signature and re-uses the original implementation. + Moreover, since an mvar is used as a lock (instead of a separate + server thread) to protect the global hashtable, it is now + possible to create atoms even before RunCML.doit has started + executing.
    • +
    +
    + +
    Library
    +
    +
      +
    • Moved source tree from smlnj-lib/TraceDebugProf to a new + tools/TraceDebugProf.
    • +
    +
    + +
    NLFFI
    +
    +
      +
    • Handles access to signed and unsigned long long data.
    • +
    • Handles long long function arguments and results (but so far + only on the PowerPC).
    • +
    • Some organizational cleanup of the library code.
    • +
    +
    + +
    ML-Yacc
    +
    +
      +
    • Fixed formatting bug that sometimes caused syntactically incorrect + code from being formed.
    • +
    +
    + +
    Runtime
    +
    +
      +
    • Fixed bug in PC-sampling profiler.
    • +
    • Fixed off-by-1 bug in ML_STRING macro (which caused the names + of certain exceptions to be garbled).
    • +
    +
    + +
    Scripts
    +
    +
      +
    • Added an experimental heap2exec script (with a helper + heap2asm script in the background). This is experimental + and so far only works under Mac OS X.
    • +
    • Made ml-build script slightly smarter. (One can now omit + the 3rd argument -- which in this case is taken to be the same as + the 1st argument without the extension.
    • +
    • _run-sml now converts SMLNJ_HOME to a POSIX pathname on + cygwin.
    • +
    +
    + +
    CM
    +
    +
      + Fixed its incorrect assumption about the PowerPC being + little-endian. (PPC couldn be little-endian, but on the only + supported platform -- Mac OS X -- it is big-endian.) +
    +
    + +
    MLRISC
    +
    +
      +
    • Bugfixes in c-calls implementation.
    • +
    +
    +
    + + + diff --git a/doc/html/readme/110.54-README.html b/doc/html/readme/110.54-README.html new file mode 100644 index 0000000..0b7094e --- /dev/null +++ b/doc/html/readme/110.54-README.html @@ -0,0 +1,108 @@ + + + +SML/NJ 110.54 NEWS + + + +
    +                         S  M  L   /   N  J
    +
    +                   1  1  0  .  5  4      N  E  W  S
    +			
    +                             May 18, 2005
    +
    +                                WARNING
    +
    +        This working version is believed to be stable, but
    +        we have not run our full regression-test suite.
    +
    + +
    +
    HOME:
    +
    http://www.smlnj.org/index.html
    +
    FILES:
    +
    http://smlnj.cs.uchicago.edu/dist/working/110.54/
    +
    + +

    Summary:

    + + Important bugfixes and minor feature enhancements. + +
    + +

    Details:

    + +
    +
    Runtime system:
    +
    +
      +
    • Fixed a GC bug that occasionally caused data corruption.
    • +
    • Improved the heap sizing policy.
    • +
    • Added a runtime flag that lets the user turn of aggressive + heap trimming. This can improve performance of programs with + very large heaps at the expense of causing a (very slow) space + leak.
    • +
    • Added support for Mac OS X 10.4 ("Tiger").
    • +
    +
    +
    Compiler:
    +
    +
      +
    • Added workaround for overloading bug in type checker.
    • +
    +
    +
    Basis:
    +
    +
      +
    • Word64.fromString should now behave correctly. + (Notice that Word64 is not an official feature. It's + current implementation is extremely + inefficient. We provided it just for the benefit of NLFFI so that + C functions that pass long long values can be used.)
    • +
    • Fixed a bug in the implementation of div and + mod for IntInf. This also fixes a + corresponding problem with Word64 and Int64. +
    +
    +
    Libraries:
    +
    +
      +
    • Added a join combinator to ParserComb in + smlnj-lib.cm. +
    • Fixed serious bug in new implementation of structure + Atom in CML.
    • +
    • Added singleton function to MONO_PRIORITYQ + interface.
    • +
    • Fixed bug in GetOpt.getOpt.
    • +
    • Added Atom.same and Atom.lexCompare. +
    +
    +
    Interactive system:
    +
    +
      +
    • Added mechanism for re-creating a heap file for the + interactive system after configuration variables have been + changed.
    • +
    +
    +
    Installer:
    +
    +
      +
    • Added support scripts for Mac OS X PackageMaker and modified + config/install.sh so that it supports re-dumping a heap image + after customization.
    • +
    +
    +
    Code generator:
    +
    +
      +
    • Added a gc protocol checking phase. This phase is enabled + with the flag check-gc. Flag debug-check-gc + turns on the verbose mode.
    • +
    +
    +
    + + + diff --git a/doc/html/readme/110.55-README.html b/doc/html/readme/110.55-README.html new file mode 100644 index 0000000..7a1d0fe --- /dev/null +++ b/doc/html/readme/110.55-README.html @@ -0,0 +1,66 @@ + + + +SML/NJ 110.55 NEWS + + + +
    +                         S  M  L   /   N  J
    +
    +                   1  1  0  .  5  5      N  E  W  S
    +			
    +                             May 18, 2005
    +
    +                                WARNING
    +
    +        This working version is believed to be stable, but
    +        we have not run our full regression-test suite.
    +
    + +
    +
    HOME:
    +
    http://www.smlnj.org/index.html
    +
    FILES:
    +
    http://smlnj.cs.uchicago.edu/dist/working/110.55/
    +
    + +

    Summary:

    + + Bugfixes and minor feature enhancements. + +
    + +

    Details:

    + +
    +
    Libraries:
    +
    +
      +
    • Fixed bug regarding the copy function in + *array-slice.sml modules.
    • +
    • Refactored the pretty printing library. The declarative way + to construct pretty-printing descriptions now sits on top of the + PP_STREAM interface.
    • +
    • Removed onNewline function, which was not + implemented.
    • +
    • Added ANSITerm structure, which provides support + for ANSI terminal display attributes (e.g., red text).
    • +
    • Added ANSITermDev device that uses ANSI terminal + display attributes and added a ANSITermPP for pretty + printing to such a device.
    • +
    +
    +
    Runtime system:
    +
    +
      +
    • Support for HPUX 11 added. (There is still a problem, + though.)
    • +
    • Updated Cygwin's fault/signal handling.
    • +
    +
    +
    + + + diff --git a/doc/html/readme/110.56-README.html b/doc/html/readme/110.56-README.html new file mode 100644 index 0000000..19e9b2e --- /dev/null +++ b/doc/html/readme/110.56-README.html @@ -0,0 +1,70 @@ + + + +SML/NJ 110.56 NEWS + + + +
    +                         S  M  L   /   N  J
    +
    +                   1  1  0  .  5  6      N  E  W  S
    +			
    +                            October 25, 2005
    +
    +                                WARNING
    +
    +        This working version is believed to be stable, but
    +        we have not run our full regression-test suite.
    +
    + +
    +
    HOME:
    +
    http://www.smlnj.org/index.html
    +
    FILES:
    +
    http://smlnj.cs.uchicago.edu/dist/working/110.56/
    +
    + +

    Summary:

    + + Bugfixes and minor feature enhancements. + +
    + +

    Details:

    + +
    +
    Libraries:
    +
    +
      +
    • Added interval sets to utility library (signatures + INTERVAL_DOMAIN and INTERVAL_SET, and functor + IntervalSetFn).
    • +
    +
    +
    Compiler:
    +
    +
      +
    • Improved code generation for polymorphic equality on datatypes.
    • +
    +
    +
    MLRISC:
    +
    +
      +
    • Fixed bug in x86 register allocator.
    • +
    • Some code cleanup.
    • +
    +
    +
    Tools:
    +
    +
      +
    • ML-Lex now correctly recognizes \r as representing + carriage return.
    • +
    • ML-Yacc now works with files that use non-native end-of-line + encodings (e.g., Windows text files on a Unix system).
    • +
    +
    +
    + + + diff --git a/doc/html/readme/110.57-README.html b/doc/html/readme/110.57-README.html new file mode 100644 index 0000000..23af62a --- /dev/null +++ b/doc/html/readme/110.57-README.html @@ -0,0 +1,79 @@ + + + +SML/NJ 110.57 NEWS + + + +
    +                         S  M  L   /   N  J
    +
    +                   1  1  0  .  5  7      N  E  W  S
    +			
    +                            November 16, 2005
    +
    +                                WARNING
    +
    +        This working version is believed to be stable, but
    +        we have not run our full regression-test suite.
    +
    + +
    +
    HOME:
    +
    http://www.smlnj.org/index.html
    +
    FILES:
    +
    http://smlnj.cs.uchicago.edu/dist/working/110.57/
    +
    + +

    Summary:

    + + Bugfixes and minor feature enhancements. + +
    + +

    Details:

    + +
    +
    Libraries:
    +
    +
      +
    • Replaced stub for Array2.copy with an actual + implementation.
    • +
    • Fixed erroneous out-of-bounds test in + *ArraySlice.update.
    • +
    • Removed deprecated function Substring.all.
    • +
    • Tweaked the new interval set API (in SML/NJ library) + somewhat.
    • +
    • Renamed + Pack{Big,Little} + to their correct names + PackWord{Big,Little}.
    • +
    • Changed hexadecimal digits to upper-case as required by the + Basis specification.
    • +
    • EXene is now maintained by a group of people at K-State: + http://www.cis.ksu.edu/~stough/eXene/index.html
    • +
    +
    +
    Tools:
    +
    +
      +
    • Fixed problem in back-trace facility where a spurious + message about exception ExnDuringExecution would be printed.
    • +
    +
    +
    System:
    +
    +
      +
    • Re-organized the way the compiler refers to libraries such as + the Basis, various components of the SML/NJ library, and the + ML-Yacc library.
      + (It is now possible to redefine the pathname anchors that normally + are used to refer to these libraries (for the purpose of pointing + user code to alternative implementations etc.) without disturbing + the proper functioning of the interactive system and the compiler.)
    • +
    +
    +
    + + + diff --git a/doc/html/readme/110.58-README.html b/doc/html/readme/110.58-README.html new file mode 100644 index 0000000..e13d54f --- /dev/null +++ b/doc/html/readme/110.58-README.html @@ -0,0 +1,117 @@ + + + +SML/NJ 110.58 NEWS + + + +
    +                         S  M  L   /   N  J
    +
    +                   1  1  0  .  5  8      N  E  W  S
    +			
    +                             March 3, 2006
    +
    +                                WARNING
    +
    +        This working version is believed to be stable, but
    +        we have not run our full regression-test suite.
    +
    + +
    +
    HOME:
    +
    http://www.smlnj.org/index.html
    +
    FILES:
    +
    http://smlnj.cs.uchicago.edu/dist/working/110.58/
    +
    + +

    Summary:

    + + Support for Intel Macs and x86-64 under Linux, a brand-new + replacement for ml-lex, and other minor improvements as well + as bugfixes. + +
    + +

    Details:

    + +
    +
    Lexer generator:
    +
    +
      +
    • A new, much improved lexer generator tool has been developed by + Aaron Turon and John Reppy. The new tool, called lexgen, can be + used as a drop-in replacement for ml-lex.
    • + +
    • The compilation manager is able to have lex specification + files processed by either lexgen or the original + ml-lex. For the time being, the latter is the default.
    • + +
    • To change this aspect of CM's behavior, invoke sml + (and related commands such as ml-build) using the + -Ccm.use-legacy-lex=false command line switch. + Alternatively, one can define an environment variable + CM_USE_LEGACY_LEX and set it to false.
    • + +
    • Binaries (bootfiles) shipped with this version have been + compiled using lexgen. +
    +
    +
    Newly supported platforms:
    +
    +
      +
    • SML/NJ builds and runs on Intel-based Macs running Mac OS X.
    • +
    • SML/NJ builds and runs on x86-64 systems using 32-bit mode.
    • +
    • SML/NJ should build and run on NetBSD 3.x (untested).
    • +
    +
    +
    Platforms no longer supported:
    +
    +
      +
    • Support for pre-2.2 Linux kernels has been dropped.
    • +
    +
    +
    Runtime system:
    +
    +
      +
    • Signal handling on x86/Linux has been cleaned up.
    • +
    +
    +
    MLRISC:
    +
    +
      +
    • Scripts for rebuilding MLRISC-generated files have been updated + to reflect changes to CM.
    • +
    +
    +
    Backend:
    +
    +
      +
    • Darwin-specific Intel ABI (for foreign-function interface) + implemented. The solution is temporary since it currently relies + on the fact that the compiler itself does not use the FFI.
    • +
    +
    +
    Compilation manager:
    +
    +
      +
    • Improved CM-specific error reporting and handling.
    • +
    +
    +
    Installation:
    +
    +
      +
    • Enabling $/html-lib.cm in config/preloads now works.
    • +
    +
    +
    CML:
    +
    +
      +
    • withNack wrapped around a never event will + now properly signal the negative acknowledgement.
    • +
    +
    +
    + + + diff --git a/doc/html/readme/110.59-README.html b/doc/html/readme/110.59-README.html new file mode 100644 index 0000000..f854987 --- /dev/null +++ b/doc/html/readme/110.59-README.html @@ -0,0 +1,93 @@ + + +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> +<html> +<head> +<title>SML/NJ 110.59 NEWS + + + +
    +                         S  M  L   /   N  J
    +
    +                   1  1  0  .  5  9      N  E  W  S
    +			
    +                             June 5, 2006
    +
    +                                WARNING
    +
    +        This working version is believed to be stable, but
    +        we have not run our full regression-test suite.
    +
    + +
    +
    HOME:
    +
    http://www.smlnj.org/index.html
    +
    FILES:
    +
    http://smlnj.cs.uchicago.edu/dist/working/110.59/
    +
    + +

    Summary:

    + + Bugfixes and minor improvements in various areas. + +
    + +

    Details:

    + +
    +
    eXene:
    +
    +
      +
    • committed changes to eXene from Alley Stoughton: + "fixed bugs in X authorization and resource handling, as well + as in the pile and viewport widgets"
    • +
    +
    +
    Runtime:
    +
    +
      +
    • fixed linking problem with NetBSD 3.x.
    • +
    +
    +
    Lexgen:
    +
    +
      +
    • lexgen tool handles non-ascii characters in 7-bit mode + the same way that ml-lex does
    • +
    • lexgen propagates exceptions the same way that + ml-lex does
    • +
    +
    +
    CML:
    +
    +
      +
    • Fixed a bug in the SyncVar polling functions (iGetPoll, + mTakePoll, and mGetPoll) that could lead to + livelock.
    • +
    +
    +
    Scripts:
    +
    +
      +
    • bug in ml-makedepend fixed
    • +
    • patches to heap2exec for Linux and FreeBSD (received + from Johannes 5 Joemann) applied
    • +
    +
    +
    MLRISC:
    +
    +
      +
    • renamed GAS_PSEUDO_OPS to AS_PSEUDO_OPS and + put it in its own file.
    • +
    • added support for NOTB and XORB operators in + pseudo-op expressions
    • +
    • added DarwinPseudoOp functor that supports Darwin's + assembler syntax.
    • +
    • added support for 64-bit integer literals
    • +
    +
    +
    + + + diff --git a/doc/html/readme/110.60-README.html b/doc/html/readme/110.60-README.html new file mode 100644 index 0000000..4fea584 --- /dev/null +++ b/doc/html/readme/110.60-README.html @@ -0,0 +1,103 @@ + + +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> +<html> +<head> +<title>SML/NJ 110.60 NEWS + + + +
    +                         S  M  L   /   N  J
    +
    +                   1  1  0  .  6  0      N  E  W  S
    +			
    +                           November 9, 2006
    +
    +                                WARNING
    +
    +        This working version is believed to be stable, but
    +        we have not run our full regression-test suite.
    +
    + +
    +
    HOME:
    +
    http://www.smlnj.org/index.html
    +
    FILES:
    +
    http://smlnj.cs.uchicago.edu/dist/working/110.60/
    +
    + +

    Summary:

    + + Support for Mac OS X on Intel machines, new directory layout, + subversion-based source repository, preliminary amd64 support + in MLRISC and backend, new code generator tools + (ml-ulex, ml-antlr). + + +
    + +

    Details:

    + +
    +
    MLRISC:
    +
    +
      +
    • support for amd64 added (not yet thoroughly tested)
    • +
    +
    + +
    compiler:
    +
    +
      +
    • preliminary hook-up of amd64 backend
    • +
    +
    + +
    runtime:
    +
    +
      +
    • worked around problem with signal delivery on Intel-based Macs
    • +
    +
    + +
    ml-lpt (language processing tools):
    +
    +
      +
    • two new tools: ml-ulex, ml-antlr
    • +
    • ml-ulex: lexer generator with unicode support
    • +
    • ml-antlr: ANTLR-inspired LL(k) parser generator
    • +
    • first public release; still "beta" quality
    • +
    +
    + +
    source repository:
    +
    +
      +
    • the SML/NJ sources are now hosted at smlnj-gforge.cs.uchicago.edu
    • +
    • the revision control system is now subversion
    • +
    • the project name is "smlnj"
    • +
    • source code available via anonymous subversion from + svn://smlnj-gforge.cs.uchicago.edu/smlnj
    • +
    • source code (directory-) tree has new layout
    • +
    +
    + +
    scripts:
    +
    +
      +
    • scripts adjusted to deal with new directory layout
    • +
    • three scripts for simplifying svn access added:
      + check out svn://smlnj-gforge.cs.uchicago.edu/smlnj/admin + and put it on your shell's PATH
    • +
    • use checkout-all.sh to get a fresh copy of the sources
    • +
    • refresh-all.sh: perform svn update on each source (sub-)tree
    • +
    • stat-all.sh: perform svn stat on each source (sub-)tree
    • +
    • see HISTORY file for details + on new layout
    • +
    +
    +
    + + + diff --git a/doc/html/readme/110.61-README.html b/doc/html/readme/110.61-README.html new file mode 100644 index 0000000..30f54aa --- /dev/null +++ b/doc/html/readme/110.61-README.html @@ -0,0 +1,94 @@ + + +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> +<html> +<head> +<title>SML/NJ 110.61 NEWS + + + +
    +                         S  M  L   /   N  J
    +
    +                   1  1  0  .  6  1      N  E  W  S
    +			
    +                           December 14, 2006
    +
    +                                WARNING
    +
    +        This working version is believed to be stable, but
    +        we have not run our full regression-test suite.
    +
    + +
    +
    HOME:
    +
    http://www.smlnj.org/index.html
    +
    FILES:
    +
    http://smlnj.cs.uchicago.edu/dist/working/110.61/
    +
    + +

    Summary:

    + + This is mostly a bugfixing release. + +
    + +

    Details:

    + +
    +
    runtime:
    +
    +
      +
    • Fixed the code in + runtime/c-libs/posix-tty/{tcgetattr,tcsetattr}.c to get the + c_cc termios data copied correctly. Also moved the + allocation of the string to avoid problems if it caused a GC. + Thanks to Timothy Bourke for the bug report and fix.
    • +
    +
    + +
    CM:
    +
    +
      +
    • CM's standard shell tools (e.g., mlyacc, + mllex, etc.) that are implemented in terms of + Tools.registerStdShellCmdTool now tolerate (with a + warning) the situation where target files exist and appear + outdated, but the shell command in question fails (e.g., because + the program in question has not been installed yet).
    • + +
    • CM now reports undefined anchors as errors and aborts + execution rather than silently pressing on using bogus + values.
    • + +
    • "[autoloading]" messages can now be suppressed + using the CM.Control.verbose control (or the + -Ccm.verbose=false command line option).
    • + +
    • Fixed bug in CM where "fwith:" specifications that + affect compilation (as opposed to parsing) were ignored.
    • +
    +
    + +
    Basis:
    +
    +
      +
    • Fixed the types of recvVecFrom, + recvVecFrom', recvVecFromNB, and + recvVecFromNB' in the SOCKET signature. This + error is actually in the SML Basis specification too.
    • +
    +
    + +
    Language processing tools (ml-lpt):
    +
    +
      +
    • The implementations of our new language processing tools + (fml-ulex, ml-antlr) have been improved, but + documenation is not yet up-to-date.
    • +
    +
    +
    + + + diff --git a/doc/html/readme/110.62-README.html b/doc/html/readme/110.62-README.html new file mode 100644 index 0000000..16b4af2 --- /dev/null +++ b/doc/html/readme/110.62-README.html @@ -0,0 +1,101 @@ + + +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> +<html> +<head> +<title>SML/NJ 110.62 NEWS + + + +
    +                         S  M  L   /   N  J
    +
    +                   1  1  0  .  6  2      N  E  W  S
    +			
    +                           February 2, 2007
    +
    +                                WARNING
    +
    +        This working version is believed to be stable, but
    +        we have not run our full regression-test suite.
    +
    + +
    +
    HOME:
    +
    http://www.smlnj.org/index.html
    +
    FILES:
    +
    http://smlnj.cs.uchicago.edu/dist/working/110.62/
    +
    + +

    Summary:

    + + Improvements to language processing tools and installation procedure. + Other minor bugfixes and enhancements. + +
    + +

    Details:

    + +
    +
    installer:
    +
    +
      +
    • Installer now handles the new ml-lpt-lib.cm.
    • +
    • There is now a new library installer that can be + used by shell scripts.
    • +
    +
    + +
    SML/NJ library:
    +
    +
      +
    • UTF8 structure and signature (from Moby) added.
    • +
    +
    + +
    CML:
    +
    +
      +
    • UTF8 structure and signature exported.
    • +
    +
    + +
    runtime:
    +
    +
      +
    • Added functionality to "shift" command line arguments + (like /bin/sh's shift command).
    • +
    +
    + +
    CM:
    +
    +
      +
    • During startup, while processing command-line arguments, + CM uses the new shift functionality (see above) to trim + down the argument list.
      + This way one can easily write ML code that (when executed via + the command line) takes over control and processes all + remaining arguments.
      + This functionality is utilized by the new library installer.
    • + +
    • CM plugins for tools (e.g., ml-burg, + ml-ulex, ml-antlr) now live + within their respective source trees and are installed + by the library installer.
    • +
    +
    + +
    language processing tools:
    +
    +
      +
    • manual is up-to-date with the code
    • +
    • many minor improvements
    • +
    • a few minor bugfixes
    • +
    • added support for stream created from several kinds of sources
    • +
    +
    +
    + + + diff --git a/doc/html/readme/110.63-README.html b/doc/html/readme/110.63-README.html new file mode 100644 index 0000000..91e456f --- /dev/null +++ b/doc/html/readme/110.63-README.html @@ -0,0 +1,118 @@ + + +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> +<html> +<head> +<title>SML/NJ 110.63 NEWS + + + +
    + + Standard ML of New Jersey +
    + Version 110.62 NEWS +
    + March 22, 2007 +
    +
    + WARNING +

    + This working version is believed to be stable, but + we have not run our full regression-test suite. +
    +
    + +
    + +
    +
    SML/NJ HOME:
    +
    http://www.smlnj.org/index.html
    +
    FILES:
    +
    http://smlnj.cs.uchicago.edu/dist/working/110.63/
    +
    + +
    + +

    Summary:

    + + This release is primarily a bugfix release. + +

    Details:

    + +
    +
    installer:
    +
    +
      +
    • Eliminated any mention of lexgen. (Lexgen was an early precursor + to ml-ulex.)
    • +
    +
    + +
    command-line tools
    +
    +
      +
    • + Fixed typo in ml-build script that prevented library anchors + from being registered. +
    • +
    +
    + +
    SML Basis library:
    +
    +
      +
    • + Fixed a number of inconsistencies between the Posix.TTY structure + and the Basis specification. Thanks to Adam Chilpala. +
    • +
    +
    + +
    CM:
    +
    +
      +
    • + Fixed bug in CM's parallel make facility that failed to have + the master re-link modules after letting slaves compile them. +
    • +
    +
    + +
    MLRISC:
    +
    +
      +
    • + x86MCEmitter crashed when the immediate operand to MOVB was outside + of the range -128 ... 127. Only the low order 8 bits of the + immediate operand are now significant. +

    • +
    • + Added preliminary support in MLRISC for Staged Allocation, a technique + for specifying calling conventions. See +
      + + http://www.eecs.harvard.edu/~nr/pubs/staged-abstract.html +
      + Initially, this mechanism is being used to generate C calls for the + AMD64. +

    • +
    • + Fixed bogus operand sizes in AMD64 instruction spilling. +
    • +
    +
    +
    + +
    runtime:
    +
    +
      +
    • Fixed a problem with files that have very large numbers of literals. + When SaveCState was called with two values to save, a subsequent + GC could cause the RestoreCState to fail because the saved state had been + promoted to tagless pair.
    • +
    +
    + + + diff --git a/doc/html/readme/110.64-README.html b/doc/html/readme/110.64-README.html new file mode 100644 index 0000000..fdc6fc2 --- /dev/null +++ b/doc/html/readme/110.64-README.html @@ -0,0 +1,247 @@ + + + +SML/NJ 110.64 NEWS + + + +
    + + Standard ML of New Jersey +
    + Version 110.64 NEWS +
    + May 31, 2007 +
    +
    +
    + +
    +
    +

    +


    + +
    +
    SML/NJ HOME:
    +
    http://www.smlnj.org/index.html
    +
    FILES:
    +
    +http://smlnj.cs.uchicago.edu/dist/working/110.64/
    +
    + +
    + +

    Summary:

    + +Many of the changes affect FLINT and its interaction with the +front end, and particularly the system for supporting primops +(primitive operations). Additional bugs have been fixed, and +ml-lpt as well as AMD64 support have been updated. + +

    Details:

    + +
    + +
    Front End and FLINT:
    +
    +
      +
    • + Implemented a new scheme for defining primops and tracking + and analyzing their types in the type checker and in FLINT. + Improved the way that the type checker captures type + information used by the translate phase to calculate + FLINT types. +
    • +
    • + Improved signature printing by making it faithfully reflect + the original order of specifications or (for inferred + signatures) definitions. +
    • +
    • + First phase of streamlining the type system of the plambda and flint + intermediate languages. +
    • +
    • + Fixed some bugs in the type checker and module system. +
    • +
    • + Infinite loop in FLINT (tests/typing/tests/25.sml) + (fix by Stefan Monnier). +
    • +
    • + Implemented integer and word division operations (div, mod, + rem, quot) with an explicit test for zero division. This + fixes several previously open bugs related to the treatment of + divide-by-zero errors. +
    • +
    +
    + +
    SML Basis library:
    +
    +
      +
    • + Fixed a bug in the implementation of order operations + and min and max for Int32. Fixed several other bugs in the Basis + that were revealed by running the regression tests. +
    • +
    • + A number of fixes related to the formatting of dates. These + include fixes for bugs #1415 and #1416. We also now correctly + handle format characters that lie outside the specified set. +
    • +
    • + Timer.cpu_timer, etc. type printing corrected (by making Timer + have opaque sig constraint in basis/Implementation/timer.sml) +
    • +
    • + Added missing toLarge/fromLarge operations to WordN modules. +
    • +
    • + Added missing InvalidArc to OS.Path. +
    • +
    • + Fixed bug in printing negative time values. +
    • +
    • + Char.fromString now handles the \uxxxx escape sequence. +
    • +
    +
    + +
    CM:
    +
    +
      +
    • + Added a boolean control named cm.tolerate-tool-failures + (env. variable name: CM_TOLERATE_TOOL_FAILURES). The default + is false and makes CM fail if a shell tool reports a + non-success exit status. If the control is set to true, then + CM will press on after tool failures in the event that all + target files exist (even when some of them are considered + outdated). Turning the control to true can be useful for + bootstrapping. +
    • +
    • + Changed the installation mechanism for CM tool plugins. These + are just libaries and now get installed like ordinary + libaries.
      + + There are now a number of new installation targets that give + some fine-grain control over what classes and suffixes are + known, and what they will map to. See config/targets for + details.
      + + The code that caused plugin installation as part of running a + tool's "build" script has been removed. (The build script is for + building, not for installing.) +
    • +
    +
    + +
    SML/NJ Library
    +
    +
      +
    • + Added next function to Fifo and Queue modules. +
    • +
    +
    + +
    ml-lpt:
    +
    +
      +
    • + The name of several ml-lpt-lib modules has changed: +
      +	  Repair	=> AntlrRepair
      +	  StreamPos	=> AntlrStreamPos
      +	  ErrHandlerFn	=> AntlrErrHandler
      +	  EBNF		=> AntlrEBNF
      +        
      +
    • +
    • + The ml-antlr specification format has changed: declarations such as + %tokens and nonterminal definitions can occur *multiple* times in the + same specification. The semantics are such that each new + declaration extends the previous ones. This does not apply to + %start or %name, of course. +
    • + +
    • + Importing a grammar via %import now includes all + declarations in from the imported grammar, except for %name, + %entry, and %start. Tokens and nonterminals can be dropped + using the new %dropping clause of the + %import directive; the separate %drop and %extend have been + removed. +
    • + +
    • + We now allow optional type annotations on nonterminals, using + the %nonterms directive as in ml-yacc. +
    • + +
    • + The refcell construct is now implemented using SML's regular + reference cells, so the :== and !! notation has been + deprecated. +
    • + +
    • + The ml-antlr tool now does much more checking of + specifications, and its error messages have been greatly + improved. Error repair for generated parsers has been + completely rewritten, and is now both much faster and more + accurate. +
    • +
    • + ml-ulex is now more lenient with escape codes + (non-SML-standard escape codes are now interpreted literally, + so e.g. \| denotes "|"). Also, character classes may now + include a "-" character at the beginning as is standard in + most other regexp tools. +
    • +
    • + All of these changes are documented in the user guide, which + has been updated and improved. +
    • +
    +
    + +
    AMD64 support:
    +
    +
      +
    • + Added the new MLRISC code generator for the AMD64. This + version, in contrast to the previous one, uses SSE registers + and instructions for all floating-point computations. +
    • +
    • + Added FSQRT instructions for the AMD64 code generator. +
    • +
    +
    + +
    Regression Tests:
    +
    +
      +
    • + Revised the regression test suite and brought the tests and + reference outputs up to date. Added a few new tests. Cleaned + up the test scripts. +
    • +
    +
    + +
    + + + diff --git a/doc/html/readme/110.65-README.html b/doc/html/readme/110.65-README.html new file mode 100644 index 0000000..dcd285a --- /dev/null +++ b/doc/html/readme/110.65-README.html @@ -0,0 +1,42 @@ + + + +SML/NJ 110.65 NEWS + + + +
    + + Standard ML of New Jersey +
    + Version 110.65 NEWS +
    + June 7, 2007 +
    +
    +
    +
    +
    +

    +


    + +
    +
    SML/NJ HOME:
    +
    http://www.smlnj.org/index.html
    +
    FILES:
    +
    +http://smlnj.cs.uchicago.edu/dist/working/110.65/
    +
    + +
    + +

    Summary:

    + +This is a pure bugfix release. In 110.64 we completely switched over +from using ml-lex to using the new ml-ulex. This +step exposed a number of bugs which, among others, +affected ckit and nlffi. 110.65 is provided to fix +these immediate problems. + + + diff --git a/doc/html/readme/110.67-README.html b/doc/html/readme/110.67-README.html new file mode 100644 index 0000000..14dcf94 --- /dev/null +++ b/doc/html/readme/110.67-README.html @@ -0,0 +1,70 @@ + + + +SML/NJ 110.67 NEWS + + + +
    + + Standard ML of New Jersey +
    + Version 110.67 NEWS +
    + November 14, 2007 +
    +
    +
    +
    +
    +

    +


    + +
    +
    SML/NJ HOME:
    +
    http://www.smlnj.org/index.html
    +
    FILES:
    +
    +http://smlnj.cs.uchicago.edu/dist/working/110.67/
    +
    + +
    + +

    Summary:

    + +This release includes a revamped installer for Windows, support for Mac OS X 10.5 +(Leopard), a few enhancements and many bug fixes. +The detailed description below includes changes that were part of the 110.66 +release, since it was not widely announced. + +

    Details:

    + +
    + +
    Windows:
    +
    +
      +
    • The installer now uses the MSI mechanism and should be much more robust.
    • +
    • We have dropped support for Windows 95 and Windows 98.
    • +
    • The long-standing problem with handling ^C on Windows has been fixed.
    • +
    +
    + +
    Bug fixes:
    +
    +
      +
    • Fixed a type error in the ml-lpt library that occurs when compiling + against a basis that was compiled with the USE_64_BIT_POSITIONS + symbol set. +
    • A collection of bug fixes for the Reactive library.
    • +
    • Patches to support Mac OS X 10.5 (Leopard) on both Intel and PPC Macs.
    • +
    • Many fixes to the AMD64 code generator in MLRISC.
    • +
    • Fixed performance bugs when dealing with very large structures and record values.
    • +
    • Fixed bug in {TextIO,BinIO}.StreamIO.endOfStream that would incorrectly + signal end of stream. (110.66)
    • +
    • Real.signBit(~0.0) now returns true. (110.66)
    • +
    +
    + + + diff --git a/doc/html/readme/110.68-README.html b/doc/html/readme/110.68-README.html new file mode 100644 index 0000000..ad3a09b --- /dev/null +++ b/doc/html/readme/110.68-README.html @@ -0,0 +1,88 @@ + + + +SML/NJ 110.68 NEWS + + + +
    + + Standard ML of New Jersey +
    + Version 110.68 NEWS +
    + August 13, 2008 +
    +
    +
    +
    +
    +

    +


    + +
    +
    SML/NJ HOME:
    +
    http://www.smlnj.org/index.html
    +
    FILES:
    +
    +http://smlnj.cs.uchicago.edu/dist/working/110.68/
    +
    + +
    + +

    Summary:

    + +

    Details:

    + +
    + +
    General:
    +
    +
      +
    • first batch of changes moving FLINT transforms and + computations out of front end to translate phase; improves type + checking and type error messages for val declarations. +
    • +
    • added build support for OpenBSD (not tested)
    • +
    • fixed Int64.fromString to use base-10
    • +
    +

    + +

    SML/NJ Library:
    +
    +
      +
    • New library to support the reading and writing of JSON files.
    • +
    • Committed a major overhaul of the RegExp library. There is now a new + engine that implements Ken Thompson's RE matching algorithm. The result + type of the RE matches has also been simplified by removing an unecessary + option type. +
      + NOTE: the new RE engine is not complete in that it does not yet support + end marks. +
    • +
    • Added additional operations to the FormatComb module
    • +
    +

    + +

    ml-lpt changes:
    +
    +
      +
    • ml-ulex bug fix for corner case
    • +
    • documentation updates
    • +
    • bugs in the parsing of specifications were fixed
    • +
    • changes to be more compatable with MLton
    • +
    +

    + +

    Windows:
    +
    +
      +
    • implemented timer-based profiling
    • +
    • completed implementation of SML Basis Library Windows structure.
    • +
    • added nlffi support (see README for info)
    • +
    • various fixes to installation process
    • +
    +
    + + + diff --git a/doc/html/readme/110.69-README.html b/doc/html/readme/110.69-README.html new file mode 100644 index 0000000..3c51fab --- /dev/null +++ b/doc/html/readme/110.69-README.html @@ -0,0 +1,67 @@ + + + +SML/NJ 110.69 NEWS + + + +
    + + Standard ML of New Jersey +
    + Version 110.69 NEWS +
    + January 13, 2009 +
    +
    +
    +
    +
    +

    +


    + +
    +
    SML/NJ HOME:
    +
    http://www.smlnj.org/index.html
    +
    FILES:
    +
    +http://smlnj.cs.uchicago.edu/dist/working/110.69/
    +
    + +
    + +

    Summary:

    + +SML/NJ 110.69 fixes several minor bugs, and adds new +concurrency-related instructions to MLRISC. + +

    Details:

    + +
    + +
    General:
    +
    +
      +
    • Fixes to build support for OpenBSD.
    • +
    • Fixed problem with CM tools when paths contain spaces.
    • +
    • Minor bug fixes and updates.
    • +
    +

    + +

    SML/NJ Library:
    +
    +
      +
    • Minor fixes to the JSON library. +
    • +
    +

    + +

    MLRISC:
    +
    +
      +
    • Added concurrency instructions for x86 and AMD64.
    • +
    +
    + + + diff --git a/doc/html/readme/110.70-README.html b/doc/html/readme/110.70-README.html new file mode 100644 index 0000000..5c2ce95 --- /dev/null +++ b/doc/html/readme/110.70-README.html @@ -0,0 +1,60 @@ + + + +SML/NJ 110.70 NEWS + + + +
    + + Standard ML of New Jersey +
    + Version 110.70 NEWS +
    + June 17, 2009 +
    +
    +
    +
    +
    +

    +


    + +
    +
    SML/NJ HOME:
    +
    http://www.smlnj.org/index.html
    +
    FILES:
    +
    +http://smlnj.cs.uchicago.edu/dist/working/110.70/
    +
    + +
    + +

    Summary:

    + +SML/NJ 110.70 fixes several bugs, most importantly the "unpickling bug". + +

    Details:

    + +
    + +
    General:
    +
    +
      +
    • Corrected problem in config/actions that led to the + so-called "unpickling bug" which appeared in version 110.68.
    • +
    • Provided fix for the 64-bit pattern match bug.
    • +
    • Fixed bug in Int32.fmt when the argument was the minimum int and the + radix was something other than DEC.
    • +
    +

    + +

    ML-ANTLR:
    +
    +
      +
    • Fixed bugs in how ml-antlr parsed ML types in %tokens specifications.
    • +
    +
    + + + diff --git a/doc/html/readme/110.71-README.html b/doc/html/readme/110.71-README.html new file mode 100644 index 0000000..2fe02f0 --- /dev/null +++ b/doc/html/readme/110.71-README.html @@ -0,0 +1,64 @@ + + + +SML/NJ 110.71 NEWS + + + +
    + + Standard ML of New Jersey +
    + Version 110.71 NEWS +
    + September 16, 2009 +
    +
    +
    +
    +
    +

    +


    + +
    +
    SML/NJ HOME:
    +
    http://www.smlnj.org/index.html
    +
    FILES:
    +
    +http://smlnj.cs.uchicago.edu/dist/working/110.71/
    +
    + +
    + +

    Summary:

    + +SML/NJ 110.71 fixes several bugs and adds support for OS X 10.6 (Snow Leopard). + +

    Details:

    + +
    + +
    General:
    +
    +
      +
    • Changed the runtime system to support compilation on Mac OS X 10.6.
    • +
    • Removed redundant implementations of various top-level operations.
    • +
    +

    + +

    ML-Ulex:
    +
    +
      +
    • Fixed scanning of actions when the terminating ")" and ";" have whitespace between them.
    • +
    +

    + +

    MLRISC:
    +
    +
      +
    • Fixed a bug in the register-spill generator that is part of the MLRISC register allocator.
    • +
    +
    + + + diff --git a/doc/html/readme/110.72-README.html b/doc/html/readme/110.72-README.html new file mode 100644 index 0000000..0807331 --- /dev/null +++ b/doc/html/readme/110.72-README.html @@ -0,0 +1,89 @@ + + + +SML/NJ 110.72 NEWS + + + +
    + + Standard ML of New Jersey +
    + Version 110.72 NEWS +
    + February 2, 2010 +
    +
    +
    +
    +
    +

    +


    + +
    +
    SML/NJ HOME:
    +
    http://www.smlnj.org/index.html
    +
    FILES:
    +
    +http://smlnj.cs.uchicago.edu/dist/working/110.72/
    +
    + +
    + +

    Summary:

    + +SML/NJ 110.72 adds a new flag for determining the heap suffix, new +directives and MLton compatibility to ML-Ulex and ML-Antlr, and fixes +several bugs. + +

    Details:

    + +
    + +
    General:
    +
    +
      +
    • Added "@SMLsuffix" flag to sml command. This can be used to + get the suffix for heap files.
    • + +
    • Fixed an overrun during major GC. If the string arena was + nearly full, it was possible for alignment padding added during + copy to the to-space to overrun the allocated size.
    • + +
    • Fixed the Win32 unable to print long strings bug.
    • + +
    • Fixed performance bugs in List module by making @ and foldr + be tail recursive.
    • + + +
    +

    + +

    ML-Ulex:
    +
    +
      +
    • Added "--strict-sml" flag to ml-ulex for MLton + compatibility.
    • + +
    • Added %header directive to the ml-ulex scanner generator. + Also updated the documentation.
    • + +
    • The ml-ulex program has been ported to build under + mlton.
    • + +
    +

    + +

    ML-Antlr:
    +
    +
      +
    • Added %header directive to the ml-antlr parser generator. + Also updated the documentation.
    • + +
    • The ml-antlr program has been ported to build under + mlton.
    • +
    +

    + + + diff --git a/doc/html/readme/110.73-README.html b/doc/html/readme/110.73-README.html new file mode 100644 index 0000000..1681dfd --- /dev/null +++ b/doc/html/readme/110.73-README.html @@ -0,0 +1,193 @@ + + + +SML/NJ 110.73 NEWS + + + + +

    + + Standard ML of New Jersey +
    + Version 110.73 NEWS +
    + May 16, 2011 +
    +
    +
    +
    +
    +

    +


    + +
    +
    SML/NJ HOME:
    +
    http://www.smlnj.org/index.html
    +
    FILES:
    +
    +http://smlnj.cs.uchicago.edu/dist/working/110.73/
    +
    + +
    + +

    Summary:

    +

    +SML/NJ 110.73 provides a number of new library features, including a +new library for working with HTML 4, as well as many bug fixes. +

    + +

    Details:

    + +
    + +
    CM:
    +
    +
      +
    • + Added boolean literals (true and false) to the conditional-expression + syntax in CM. Thus, you can write
      +
      +#if true + structure Foo +#endif +
      + in a CM file. This change is meant to make it easier to use autoconf + to configure the build process of an SML application. +
    • +
    +
    + +
    ML-Yacc:
    +
    +
      +
    • + Fixed ml-yacc examples to respect the changed signatures with respect + to TextIO.inputLine. +
    • +
    +

    + +

    SML/NJ Library:
    +
    +
      +
    • + Added findExe function to PathUtil module. +
    • +
    • + Modified the implementation of GetOpt.usageInfo so that if the help + string has embedded newlines, then the extra lines are properly + indented. +
    • +
    • + Changed the interface of JSONStreamParser to support both parsing files + and TextIO.instreams. +
    • +
    • + Added HTML4 library. +
    • +
    • + Fixed bug in hashed cons library (bug #55). +
    • +
    • + Added array iterators to DynamicArray module. +
    • +
    +
    + +
    Concurrent ML:
    +
    +
      +
    • + The paths used to specify the CML versions of libraries in a CM file have been + rationalized (bug #68) +
      +$cml/basis.cm -- the CML version of $/basis.cm +$cml/cml.cm -- core CML features +$cml/cml-lib.cm -- CML library code +$cml/trace-cml.cm -- TraceCML library for debugging +$cml/smlnj-lib.cm -- CML version of the $/smlnj-lib.cm library +$cml/inet-lib.cm -- CML version of the $/inet-lib.cm library +$cml/unix-lib.cm -- CML version of the $/unix-lib.cm library +
      + Note that the old naming scheme is still supported, but may be removed + in some future version. +
    • +
    • + Added Barriers module to CML. +
    • +
    • + Fixed the Win32 socket and polling implementation to work correctly + with CML. Signature of poll was wrong and didn't handle sockets at all. +
    • +
    +
    +
    MLRISC:
    +
    +
      +
    • + Added support for the RTDSC and RTDSCP instructions to the amd64 code generator. +
    • +
    +

    + +

    Bugs:
    +
    +

    Here is a list of tracked bugs fixed by this release, please see the + bug tracker + for more details. +

    + + + + + + + + + + + + + + +
    38CM bug in calling ML-Lex/Yacc in Windows
    39SML/NJ doesn't build on NetBSD out of the box
    49Slight syntactic difference between ml-lex and ml-ulex in backward compatibility mode
    50CM can't invoke ml-yacc or ml-lex on Windows
    55HashCons equality test doesn't work because tags aren't incremented
    56CM still can't invoke ml-yacc on Windows
    59Non-blocking socket functions broken
    61Problem with quoting, CM, and Windows
    63Comments not lexed correctly in mlyacc grammar files
    67Installation fails on mac OSX (10.7, Lion)
    68CM anchors for CML need rationalization
    69String.scan function missing from basis library
    70confusing error message when trying to install on 64-bit Linux without 32-bit support
    +

    The following unnumbered bugs were also fixed: +

    +
      +
    • + Changed the Win32 implementation of validArc to support directories + with extended characters (umlauts, etc.). +
    • +
    • + Fixed Real.toString and Real.fmt to include sign for negative zero. +
    • +
    • + Fixed the bug with Win32 calls to OS.Process.system not quoting the string. +
    • +
    +
    + + + diff --git a/doc/html/readme/110.74-README.html b/doc/html/readme/110.74-README.html new file mode 100644 index 0000000..9935048 --- /dev/null +++ b/doc/html/readme/110.74-README.html @@ -0,0 +1,123 @@ + + + +SML/NJ 110.74 NEWS + + + + +
    + + Standard ML of New Jersey +
    + Version 110.74 NEWS +
    + January 20, 2012 +
    +
    +
    +
    +
    +

    +


    + +
    +
    SML/NJ HOME:
    +
    http://www.smlnj.org/index.html
    +
    FILES:
    +
    +http://smlnj.cs.uchicago.edu/dist/working/110.74/
    +
    + +
    + +

    Summary:

    +
    +

    +SML/NJ 110.74 provides a number of new library features, including a +new library for working with S-expressions, as well as many bug fixes. +

    +

    +Changes to the type checker support a preliminary version of enhanced +type error messages including "culprits" indicating the origins of +conflicting type constructors. This functionality is not fully +debugged printing of enhanced error messages needs further refinement, +so the additional information is not printed by default. +

    +
    + +

    Details:

    + +
    + +
    Compiler:
    +
    +
      +
    • + Added cuprit tracking (as described in "A simple and effective + method for assigning blame for type errors" at the 2010 ML + Workshop) to the type checker. Further debugging and more refined + presentation of the cuprit info are needed, so this functionality + is currently off by default. It is controled by the new flag + Control.Elab.showTypeErrorCulprits (default false). +
    • +
    +
    + +
    SML/NJ Library:
    +
    +
      +
    • + Added hash-table-based implementation of sets to the utility library. + The new modules are signature HASH_SET and functor HashSetFn. +
    • +
    • + Added new S-expression library for I/O of semi-structured data (contributed + by Damon Wang). +
    • +
    • + Fixed bug in JSON scanner. +
    • +
    +
    + +
    Bugs:
    +
    +

    Here is a list of tracked bugs fixed by this release, please see the + bug tracker + for more details. +

    + + + + + + + + +
    60Incorrect type inference with functor taking/producing polymorphic type
    74Errors in ml-lpt manual
    76blastWrite crashes
    77strange error message for syntax error
    80Installation fails on Linux 3.0
    81sml/nj does not work with Linux 3.0
    83No support for Kernel 3.x
    +
    + + + diff --git a/doc/html/readme/110.75-README.html b/doc/html/readme/110.75-README.html new file mode 100644 index 0000000..4ead536 --- /dev/null +++ b/doc/html/readme/110.75-README.html @@ -0,0 +1,127 @@ + + + +SML/NJ 110.75 NEWS + + + + +
    + + Standard ML of New Jersey +
    + Version 110.75 NEWS +
    + October 1, 2012 +
    +
    +
    +
    +
    +

    +


    + +
    +
    SML/NJ HOME:
    +
    http://www.smlnj.org/index.html
    +
    FILES:
    +
    + http://smlnj.cs.uchicago.edu/dist/working/110.75/ +
    +
    + +
    + +

    Summary:

    +
    +

    +The primary purpose of this release is to support the latest version of +Mac OS X (10.8/Mountain Lion). +There are also a number of other bug fixes and additions to the SML/NJ Library. +

    +
    + +

    Details:

    + +
    + +
    SML/NJ Library:
    +
    +
      +
    • + Added subtract, subtract', and subtractList + functions to the ORD_SET interface and implementations. +
    • +
    • + Added all function to the ORD_SET interface and implementations. +
    • +
    • + Added exists, existsi, all, and alli functions + to the ORD_MAP interface and implementations. +
    • +
    • + Added Base64 module that supports encoding/decoding between Word8 vectors and + base64 strings. +
    • +
    • + Modified PathUtil module to handle the case where the filename is an + absolute path. +
    • +
    +
    + +
    Language Processing Tools (ML-LPT)
    +
    +
      +
    • Performance improvements and eliminated the direct dependency of ml-ulex + lexers on the SML/NJ Library. +
    • +
    • + Bug fix in ml-antlr to ensure that the generated toString function + for tokens is strictly legal SML code (i.e., non-printing characters + and UTF8 multibyte sequences are properly escaped). +
    • +
    +
    + +
    Bugs:
    +
    +

    Here is a list of tracked bugs fixed (or closed) with this release, please see the + bug tracker + for more details. +

    + + + + + + + + + +
    87Buildling SML/NJ on Windows fails if there is a number in the path
    88Support for UTF8 path names
    89Building on OS X Lion
    92IntInf.scan does not work on Hex numbers
    94Running on OS X Mountain Lion
    96Build Failure with Xcode 4.3
    101errono = 12
    104ckit number parsing
    +
    + + + diff --git a/doc/html/readme/110.76-README.html b/doc/html/readme/110.76-README.html new file mode 100644 index 0000000..71f8de4 --- /dev/null +++ b/doc/html/readme/110.76-README.html @@ -0,0 +1,95 @@ + + + +SML/NJ 110.76 NEWS + + + + +
    + Standard ML of New Jersey +
    + Version 110.76 NEWS +
    + July 1, 2013 +
    +
    +
    +

    +


    + +
    +
    SML/NJ HOME:
    +
    http://www.smlnj.org/index.html
    +
    FILES:
    +
    + http://smlnj.cs.uchicago.edu/dist/working/110.76/ +
    +
    + +
    + +

    Summary:

    +
    +

    +The primary purpose of this release are bug fixes (mostly on Windows) and to add support for the latest version of +Mac OS X (10.9/Mavericks). +

    +
    + +

    Details:

    + +
    +
    CML:
    +
    + Added Word64 to the CML Basis. Note that, as with SML/NJ, this module is not + the LargeWord structure (because of efficiency issues). Also added + some missing SML Basis Library signatures to the CML version of + the Basis Library. +
    +
    Bugs:
    +
    +

    Here is a list of tracked bugs fixed (or closed) with this release, please see the + bug tracker + for more details. +

    + + + + + + + + + +
    107Bogus of Int64 comparison
    108off-by-one error in Util/dynamic-array.sml; iterators crash
    111Socket.acceptNB returns somewhat broken sockets
    113Socket.select waits exactly twice the indicated timeout
    115BinPrimIO writer method getPos doesn't work under CML
    116Socket.sameDesc raises Match exception
    117BinIO.openAppend raises IO on non-existent file
    118Incorrect comparisons of Int64 on amd64 (same as #107)
    +
    + + + diff --git a/doc/html/readme/110.77-README.html b/doc/html/readme/110.77-README.html new file mode 100644 index 0000000..422120d --- /dev/null +++ b/doc/html/readme/110.77-README.html @@ -0,0 +1,216 @@ + + + +SML/NJ 110.77 NEWS + + + + +
    + Standard ML of New Jersey +
    + Version 110.77 NEWS +
    + August 22, 2014 +
    +
    +
    +

    +


    + +
    +
    SML/NJ HOME:
    +
    http://www.smlnj.org/index.html
    +
    FILES:
    +
    + http://smlnj.cs.uchicago.edu/dist/working/110.77/ +
    +
    + +
    + +

    Summary:

    +

    +The primary purpose of this release is bug fixes, but it does include some new features +and adds support for Mac OS X 10.10 (Yosemite). +

    + +

    Details:

    + +
    +
    SML/NJ compiler:
    +
    +

    + We added the PackWord{16,32}{Big,Little} + structures to the Unsafe module. + This change makes SML/NJ's UNSAFE signature closer to the MLton version. +

    +
    +
    ml-lpt
    +
    +

    + Fixed an inconsistency in the way that ml-antlr and ml-ulex handled the contents of + a %defs declaration. Ml-ulex made these definitions visible in the UserDeclarations + substructure, whereas ml-antlr hid them. We have changed the behavior of ml-ulex to match + that of ml-antlr (i.e., hide the user definitions). We chose to hide the user definitions + in ml-ulex because they are usually not useful outside the lexer, hiding them reduces The + size of the generated code, and definitions that are needed outside the lexer can be + defined in an external module. Note that the previous behavior remains when ml-ulex is + run in ml-lex compatibility mode. (This change addresses bug #79). +

    +

    + Added the actionToString' and repairToString' functions + to the AntlrRepair structure. These functions allow one to + specialize the printing of tokens based on whether they are + being added or deleted. +

    +

    + Removed the toksToString function from the tokens structure that ml-antlr + generates. It was originally for use by the AntlrRepair structure, but that + structure does not use it. +

    +

    + Fixed a minor bug where a syntax error in the grammar specification could go undetected and + result in a syntatically incorrect output file. +

    +

    + Improvements to the documentation. +

    +
    +
    SML/NJ Library:
    +
    +

    + Added new library for parsing XML files. The library does not support validation + or the full XML syntax. It is designed to for applications that need to process + XML-format data files. To include the library in a project, add the following + line in your CM file: +

    +
    +$/xml-lib.cm +
    +

    + Reimplementation of the delete/remove operations in the red-black-tree + versions of sets and maps. The previous implementation had a bug + that could result in violations of the data-structure invariants, + which was leading to unbalanced trees and loss of performance. +

    +

    + Fixes and improvements to the S-expression parsing library. The supported syntax + is now a proper subset of that found in Lisp-like languages. +

    +
    +
    Build scripts
    +
    +

    + Changed how the system handles running on Cygwin. Previously, the default behavior + was to run the Win32 version of the runtime. Now the default behavior is to run + the Cygwin (i.e., Unix) version of the runtime. Setting the shell variable + SMLNJ_WINDOWS_RUNTIME to any non-empty value will cause the Win32 runtime + to be used. The script config/build-windows.sh has been added to make + using Cygwin to build the windows version more convenient. +

    +
    +
    Documentation
    +
    +

    + This release includes Unix-style manual pages for the command-line tools. On Mac OS X + (x86 only), the installer will copy these to /usr/local/share/man (if it exists). +

    +
    + + +

    Bugs:

    +

    + Here is a list of tracked bugs fixed (or closed) with this release, please see the + bug tracker + for more details. +

    + + + + + + + + + + + + +
    35Please ship manpages for binaries
    79ml-lpt: inconsistent treatment of %defs directive between ml-antlr and ml-ulex
    85top-level identfiers in generated code clash with legacy code
    110IntInf.~>> returns the wrong answer on negative input
    119Conversion from vector of tuple to vector of vector causes a crash
    120Install fails w/ map failure on Chromebook
    124Add support for OpenBSD/powerpc
    125build script is broken on Cygwin-x86
    127Crash on windows with OS.Process.system
    128Basis spec violation, race condition in OS.Filesys.tmpName()
    +

    The following unnumbered bugs were also fixed: +

    +
      +
    • + Fixed a long-standing bug in Socket.recvVec, which prevented the result + from being used in a string pattern match (thanks to Vesa Norrman + for the fix). +
    • +
    • + Fixed minor issue in an error message; type variable name should be + printed with leading '. +
    • +
    + +

    Supported systems:

    +
    +

    +We have verified that the system builds and runs on the following systems. +

    +
    + + + + + + + + + + + + +
    Architecture Operating System
    PowerPC Mac OS X 10.5 (Leopard)
       
    x86 Mac OS X 10.6 (Snow Leopard)
    Mac OS X 10.7 (Lion)
    Mac OS X 10.8 (Mountain Lion)
    Mac OS X 10.9 (Mavericks)
    Mac OS X 10.10 (Yosemite)
    Ubuntu 12.04.4 (GNU/Linux 3.2.0)
    Windows 7
    Cygwin (hosted on Windows 7)
    +
    +

    +We believe that it runs on any recent Linux distribute that has support for 32-bit +executables, as well as on BSD variants, SPARC/Solaris, and PPC/AIX, but we have not +tested these systems. +

    +
    + + + diff --git a/doc/html/readme/110.78-README.html b/doc/html/readme/110.78-README.html new file mode 100644 index 0000000..20020bc --- /dev/null +++ b/doc/html/readme/110.78-README.html @@ -0,0 +1,188 @@ + + + +SML/NJ 110.78 NEWS + + + + +
    + Standard ML of New Jersey +
    + Version 110.78 NEWS +
    + December 24, 2014 +
    +
    +
    +

    +


    + +
    +
    SML/NJ HOME:
    +
    http://www.smlnj.org/index.html
    +
    FILES:
    +
    + http://smlnj.cs.uchicago.edu/dist/working/110.78/ +
    +
    + +
    + +

    Summary:

    +

    +The primary purpose of this release is improvements to libraries and tools, with some +important bug fixes too. +

    + +

    Details:

    + +
    +
    SML/NJ compiler:
    +
    +

    + Major revision of the machinery for overloading resolution for both operators (vars) + and literals, now using a common mechanism. These changes fixes bug #52 + by improving the error message when an overloaded operator is inconsistent with its context. + Updated 23 files, including major changes in overload.sml, types.sml, + unify.sml, elabcore.sml, typesutil.sml. + The overload declaration is still used in pervasives.sml, where the order + of the specified instances of an ordering determines the default interpretation (i.e., + the first one).

    +

    +

    The SCHEME and LITERAL forms of tyvars are replaced by a new + OVLD form that tracks potential instantiations of the type of the + overloaded vars or literals. +

    +
    +
    SML/NJ Library:
    +
    +

    + A number of improvements and changes to the HTML4 library. + Added the HTML4Attrs module, which provides help for constructing attribute-value + pairs, and the HTML4Entities modules, which provides names for the standard + HTML4 entities. +

    +

    + New implementation of ListMergeSort. This implementation is + better than the previous implementation on sorted data (both ascending and descending) and + faster on smaller lists. It is slower than the previous + implementation on very large lists (e.g., 106 elements + or more). +

    +
    SML/NJ runtime:
    +
    +

    + Added -D_FILE_OFFSET_BITS=64 flag to x86-linux makefile. This flag + is necessary to avoid spurious EOVERFLOW errors on some versions of + Linux. The problem appears to be limited to large file systems that + have more than 232 inodes. +

    +
    +
    ml-lpt
    +
    +

    + Improved error message for when the lookahead computation fails in ml-antlr. +

    +

    + Improved error messages in ml-ulex for unclosed strings. Also made + documentation improvements. +

    +

    + Added %value directive to ml-antlr; this addition improves the + error repair choices by allowing non-nullary tokens to be inserted + when making repairs. +

    +
    +
    Documentation:
    +
    +

    + Moved base/NOTES/HISTORY file to doc/src/changelog/HISTORY.txt + and converted it to ASCIIDOC format. Also + moved the README files from base/READMES to doc/src/release-notes. + These changes are part of a general effort to rationalize and improve + the documentation of the SML/NJ system.

    +

    +
    + + +

    Bugs:

    +

    + Here is a list of tracked bugs fixed (or closed) with this release, please see the + bug tracker + for more details. +

    + + + +
    52nonsensical type error message
    +

    The following unnumbered bugs were also fixed: +

    +
      +
    • + Fixed bug with failures on very large Linux file systems (described above). +
    • +
    + +

    Supported systems:

    +
    +

    +We have verified that the system builds and runs on the following systems. +

    +
    + + + + + + + + + + + + +
    Architecture Operating System
    PowerPC Mac OS X 10.5 (Leopard)
       
    x86 Mac OS X 10.6 (Snow Leopard)
    Mac OS X 10.7 (Lion)
    Mac OS X 10.8 (Mountain Lion)
    Mac OS X 10.9 (Mavericks)
    Mac OS X 10.10 (Yosemite)
    Ubuntu 12.04.4 (GNU/Linux 3.2.0)
    Windows 7
    Cygwin (hosted on Windows 7)
    +
    +

    +We believe that it runs on any recent Linux distribute that has support for 32-bit +executables, as well as on BSD variants, SPARC/Solaris, and PPC/AIX, but we have not +tested these systems. +

    +
    + + + diff --git a/doc/html/readme/110.79-README.html b/doc/html/readme/110.79-README.html new file mode 100644 index 0000000..56575e2 --- /dev/null +++ b/doc/html/readme/110.79-README.html @@ -0,0 +1,345 @@ + + + +SML/NJ 110.79 Release Notes + + + + +
    + Standard ML of New Jersey +
    + Version 110.79 NEWS +
    + October 4, 2015 +
    + +
    +
    +
    SML/NJ HOME:
    +
    http://www.smlnj.org/index.html
    +
    FILES:
    +
    + http://smlnj.cs.uchicago.edu/dist/working/110.79/ +
    +
    +
    + +

    Summary:

    +

    +This release is fairly substantial and incudes new +Basis Library modules and the start of support for +Successor ML, +which is an evolution of the Standard ML language. +It also has support for the latest version of Mac OS X (El Capitan) and Linux 4.x kernels, +and many bug fixes. +

    + +

    Details:

    + +
    +
    Compiler:
    +
    +

    + The compiler is somewhat faster (about 7% when compiling itself). +

    +
    +
    Basis Library:
    +
    +

    + This version implements a number of proposed improvements to the Basis Library. + Note that these improvements are proposals + that have not yet been ratified and are subject to change! + Specifically, the following proposals are supported by this version of SML/NJ: +

    +
    +
    [2015-001] Correction to ListPair
    +
    SML/NJ already implemented the semantics described in this proposal. +
    +
    [2015-002] Addition of Either module
    +
    This change adds the Either structure to the Basis Library. +
    +
    [2015-003] Additional operations on sequences
    +
    This change adds operations to the Array, + List, ListPair, Option, + and Vector structures, and to the MONO_ARRAY + and MONO_VECTOR signatures. +
    +
    [2015-005] Addition of Fn module
    +
    This change adds the Fn structure to the Basis Library. +
    +
    [2015-007] Addition of Ref module
    +
    This change adds the Ref structure to the Basis Library. +
    +
    [2015-009] Policy for exceptions and partial applications
    +
    This change standardizes the generation of exceptions when partially applying curried basis functions. + The change fixed bug 136. +
    +
    +

    + It is very unlikely that these changes will break existing code, but there are + a couple of scenarios in which the code might break. The first is conflicts created + by the use of open. For example, the implementation + of ml-yacc had declarations of the form +

    +
    + open Array List +
    +

    + which created a type error because List.sub shadowed + Array.sub. +

    +

    + The second class of problem is when a module implements one of the affected Basis + signatures. For example, the MLRISC library had two modules that implemented + extensions of the ARRAY signature and which required + modification to compile. +

    +

    + In such cases, we recommend updating the source code, but it is also possible to compile + against the old version of the Basis Library by changing the line +

    +
    + $/basis.cm +
    +

    + to +

    +
    + $/basis-2004.cm +
    +

    + in your CM files. Note that backward-compatible version of the Basis Library is only + available if the old-basis target in config/targets + was requested when SML/NJ was built (this target is requested + by default). +

    +

    + We have also added implementations of the following optional Basis Library + structures: +

    +
    + structure PackReal64Big : PACK_REAL + structure PackReal64Little : PACK_REAL + structure PackRealBig : PACK_REAL + structure PackRealLittle : PACK_REAL +
    +
    +
    + +
    +
    SML/NJ Library:
    +
    +

    + Added additional array operations toList, + fromVector, and toVector to + BitArray module so that it matches the new + proposed Basis Library specification. +

    +
    +
    + +
    +
    Concurrent ML:
    +
    +

    + Added the additional Basis Library modules described above + to the CML version of the library. +

    +
    +
    + +
    +
    Successor ML:
    +
    +

    + Successor ML + is collection of proposed enhancements to the Standard ML language. + In collaboration with the MLton implementors, we are + starting to add support for these features the SML/NJ. + In this release, we have added support for the lexical extensions to the language: +

    +
      +
    • + Allow underscore ("_") as a separator in numeric literals; + e.g., 123_456, + 0wxff_ff_ff_f3, 123_456.1, ... +
    • +
    • + end-of-line comments, which are denoted using (*). + End-of-line comments properly nest into conventional block comments. For example, + the following block comment is well formed: +
      + (* + fun f x = x (*) my identity function *) + *) +
      +
    • +
    • + binary literals for both integers and words; + e.g., 0b0101_1110, or + 0wb1101. +
    • +
    +

    + These features can be enabled using the command-line option -Cparser.succ-ml=true + or by using the assignment +

    +
    + Control.succML := true; +
    +

    + at the REPL. It is also possible to mark individual source files as + being "Successor ML" sources in a CM file. There are + several ways to do so; the easiest is to specify that they should be + handled by the "succ-ml" tool: +

    +
    + foo.sml : succ-ml +
    +

    + Alternatively, one can give the "succ-ml" argument to the + "sml" tool: +

    +
    + foo.sml : sml (succ-ml) + foo.sml (succ-ml) +
    +

    + It is also possible to enable them by setting the control flag + using the with keyword in a CM file as + illustrated by the following equivalent forms: +

    +
    + foo.sml (with: parser.succ-ml) + foo.sml (with: parser.succ-ml=true) + foo.sml (with:(name:parser.succ-ml)) + foo.sml (with:(name:parser.succ-ml value:true)) +
    +
    +
    + +

    Bugs:

    +

    + Here is a list of tracked bugs fixed (or closed) with this release, please see the + bug tracker + for more details. +

    + + + + + + + + + + + + + +
    45Compiler bug in specialize phase
    53psfig.sty is deprecated, use epsfig.sty wrapper or graphicx.sty
    82Implementations of PACK_REAL missing
    131ml-ulex does not allow comments in %states directives
    134Fails to build with Linux kernel 4.x
    135Fails to build on Linux PowerPC
    136Incorrect raising of exceptions in Real.fmt and Time.fmt
    137Adapt Linux PPC build to use mkstemp instead of tmpnam
    138Incorrect behavior for Date.fromTimeLocal
    139Date.date is broken
    140Lookup failure in closure.sml when compiling Twelf
    + + +

    Supported systems:

    +
    +

    +We have verified that the system builds and runs on the following systems. +

    +
    + + + + + + + + + + + + + +
    Architecture Operating System
    PowerPC Mac OS X 10.5 (Leopard)
       
    x86 Mac OS X 10.6 (Snow Leopard)
    Mac OS X 10.7 (Lion)
    Mac OS X 10.8 (Mountain Lion)
    Mac OS X 10.9 (Mavericks)
    Mac OS X 10.10 (Yosemite)
    Mac OS X 10.11 (El Capitan)
    Ubuntu 12.04.4 (GNU/Linux 3.2.0)
    Windows 7
    Cygwin (hosted on Windows 7)
    +
    +

    +We believe that it runs on any recent Linux distribution that has support for 32-bit +executables, as well as on BSD variants, SPARC/Solaris, and PPC/AIX, but we have not +tested these systems. +

    +
    + + + diff --git a/doc/html/readme/110.80-README.html b/doc/html/readme/110.80-README.html new file mode 100644 index 0000000..b0f274d --- /dev/null +++ b/doc/html/readme/110.80-README.html @@ -0,0 +1,371 @@ + + + +SML/NJ 110.80 Release Notes + + + + +
    + Standard ML of New Jersey +
    + Version 110.80 NEWS +
    + August 19, 2016 +
    + +
    +
    +
    SML/NJ HOME:
    +
    http://www.smlnj.org/index.html
    +
    FILES:
    +
    + http://smlnj.cs.uchicago.edu/dist/working/110.80/ +
    +
    +
    + +

    Summary:

    +

    +This release incudes further additions to the +Basis Library +and many bug fixes. +As did 110.79, it includes some initial support for +Successor ML, +which is an evolution of the Standard ML language. +It also has support for the upcoming version of macOS Sierra. +

    + +

    Details:

    + +
    +
    Basis Library:
    +
    +

    + This version continues to track proposed Basis Library improvements. + Note that these improvements are proposals + that have not yet been ratified and are subject to change! + Specifically, the following proposals are supported by this version of SML/NJ: +

    +
    +
    + [2015-001] Correction to ListPair
    +
    SML/NJ already implemented the semantics described in this proposal. +
    +
    + [2015-002] Addition of Either module
    +
    This change adds the Either structure to the Basis Library.
    + In version 110.80, we have added the functions mapLeft, + mapRight, appLeft, and + appRight functions to the Either + module. +
    +
    + [2015-003] Additional operations on sequences
    +
    + This change adds operations to the Array, + List, ListPair, Option, + and Vector structures, and to the MONO_ARRAY + and MONO_VECTOR signatures.
    + In version 110.80, we have added the functions unzipMap, + unzipMapi, find, and + findi functions to the ListPair + module. +
    +
    + [2015-005] Addition of Fn module
    +
    This change adds the Fn structure to the Basis Library. +
    +
    + [2015-007] Addition of Ref module
    +
    This change adds the Ref structure to the Basis Library. +
    +
    + [2015-009] Policy for exceptions and partial applications
    +
    This change standardizes the generation of exceptions when partially applying curried basis functions. + The change fixed bug 136. +
    +
    + [2016-001] Add popCount to WORD signature
    +
    This change adds the popCount function to the WORD + signature. +
    +
    +

    + It is very unlikely that these changes will break existing code, but there are + a couple of scenarios in which the code might break. The first is conflicts created + by the use of open. For example, the implementation + of ml-yacc had declarations of the form +

    +
    + open Array List +
    +

    + which created a type error because List.sub shadowed + Array.sub. +

    +

    + The second class of problem is when a module implements one of the affected Basis + signatures. For example, the MLRISC library had two modules that implemented + extensions of the ARRAY signature and which required + modification to compile. +

    +

    + In such cases, we recommend updating the source code, but it is also possible to compile + against the old version of the Basis Library by changing the line +

    +
    + $/basis.cm +
    +

    + to +

    +
    + $/basis-2004.cm +
    +

    + in your CM files. Note that backward-compatible version of the Basis Library is only + available if the old-basis target in config/targets + was requested when SML/NJ was built (this target is requested + by default). +

    +
    +
    + +
    +
    Successor ML:
    +
    +

    + Successor ML + is collection of proposed enhancements to the Standard ML language. + In collaboration with the MLton implementors, we are + starting to add support for these features the SML/NJ. + This release does not include any new features over what was supported in version 110.79 + (i.e., lexical extensions). + We plan to start adding syntactic extensions to the core language in the next release. +

    +

    + The Successor ML features can be enabled using the command-line + option -Cparser.succ-ml=true or by using the assignment +

    +
    + Control.succML := true; +
    +

    + at the REPL. + There is a known bug (#153) that you man encounter when enabling Successor ML + features from the REPL; namely, there is some latency from when you set the + Control.succML flag and when the lexer switches mode. +

    +

    + It is also possible to mark individual source files as + being "Successor ML" sources in a CM file. There are + several ways to do so; the easiest is to specify that they should be + handled by the "succ-ml" tool: +

    +
    + foo.sml : succ-ml +
    +

    + Alternatively, one can give the "succ-ml" argument to the + "sml" tool: +

    +
    + foo.sml : sml (succ-ml) + foo.sml (succ-ml) +
    +

    + It is also possible to enable them by setting the control flag + using the with keyword in a CM file as + illustrated by the following equivalent forms: +

    +
    + foo.sml (with: parser.succ-ml) + foo.sml (with: parser.succ-ml=true) + foo.sml (with:(name:parser.succ-ml)) + foo.sml (with:(name:parser.succ-ml value:true)) +
    +
    +
    + +
    +
    SML/NJ Library:
    +
    +

    + A number of changes to the ORD_SET signature. + The following functions were added: +

    +
    + val minItem : set -> item + val maxItem : set -> item + val toList : set -> item list +
    +

    + In addition, the function listItems is now considered deprecated. +

    +

    + Added the function Controls.help for getting the help + string from a control. +

    +

    + Added the functions mkOption, mkOptionReqArg, + and mkOptionFlag to the Controls module. + These functions make it easier to package controls as command-line options. + They provide an alternative to the usual approach of using a + "--C<ctl>=<value>" + form for all of the controls. +

    +

    + Fixed a bug (#144) in the implementation of the all function in the + splay-tree implementation of sets + (functor SplaySetFn). +

    +

    + Fixed a bug (#167) in the way that the GetOpt module handles long + arguments. + Previously, it was not possible to have one long argument that was a prefix of another + (e.g., "--foo" and "--foobar"). + The new behavior is to allow long-option prefixes to overlap with other long options, but to + favor an exact match over prefix matches. + For example, if the long options are "--foo," + "--foobar," and "--foobaz," then + "--foo" will match the first, but "--foob" + will be flagged as ambiguous. +

    +
    +
    + +

    Bugs:

    +

    + Here is a list of tracked bugs fixed (or closed) with this release, please see the + bug tracker + for more details. +

    + + + + + + + + + + + + + + + +
    144Splay sets are broken
    145Internal exception occurs on bogus annotation instead of typechecking diagnostic
    146Problems building on recent versions of 64-bit Ubuntu
    147Hexadecimal escapes in strings are not supported
    150Add title to batch script
    151Error installing from source on Mac OS X
    154Return code for ml-ulex when there is an error
    155Misleading printing of word literals in error messages
    156SML resumes after SIGSTOP with bogus exception report
    161MLRISC incorrect dividend sign extension before 32-bit divide in x86_64
    164Inaccurate install instructions, ia32-libs have been deprecated
    166Can't install SML/NJ in directories containing spaces
    167Bug in handling of long options in GetOpt
    +

    The following unnumbered bugs were also fixed: +

    +
      +
    • + Fixed the handling of the Error exception + in the interactive loop so that we no longer get an exception traceback + from syntax and type errors in user code. +
    • +
    • + Fixed a bug in the implementation of ml-lex-mode in ml-ulex. + The '\h' escape was not supported (it is supposed to match + the character range [\128-\255]). +
    • +
    + +

    Supported systems:

    +
    +

    +We have verified that the system builds and runs on the following systems. +

    +
    + + + + + + + + + + + + + + +
    Architecture Operating System
    PowerPC Mac OS X 10.5 (Leopard)
       
    x86 Mac OS X 10.6 (Snow Leopard)
    Mac OS X 10.7 (Lion)
    Mac OS X 10.8 (Mountain Lion)
    Mac OS X 10.9 (Mavericks)
    Mac OS X 10.10 (Yosemite)
    Mac OS X 10.11 (El Capitan)
    macOS Sierra
    Ubuntu 14.04.3 (GNU/Linux 3.13.0)
    Windows 7
    Cygwin (hosted on Windows 7)
    +
    +

    +We believe that it runs on any recent Linux distribution that has support for 32-bit +executables, as well as on BSD variants, SPARC/Solaris, and PPC/AIX, but we have not +tested these systems. +

    +
    + + + diff --git a/doc/html/readme/110.81-README.html b/doc/html/readme/110.81-README.html new file mode 100644 index 0000000..e045853 --- /dev/null +++ b/doc/html/readme/110.81-README.html @@ -0,0 +1,230 @@ + + + +SML/NJ 110.81 Release Notes + + + + +
    + Standard ML of New Jersey +
    + Version 110.81 NEWS +
    + May 1, 2017 +
    + +
    +
    +
    SML/NJ HOME:
    +
    http://www.smlnj.org/index.html
    +
    FILES:
    +
    + http://smlnj.cs.uchicago.edu/dist/working/110.81/ +
    +
    +
    + +

    Summary:

    +

    +This release has some fairly substantial under-the-hood changes to the compiler +to clean up the interface between the front-end and FLINT, and changes to the +compiler and runtime system to prepare the ground for 64-bit support. +The release also contains some bug fixes and adds +additional support for +Successor ML, +which is an evolution of the Standard ML language. +

    + +

    Details:

    + +
    +
    Successor ML:
    +
    +

    + The 110.81 release adds additional support for Successor ML syntactic extensions. + It is now possible to use record-punning syntax in expressions (as well as in patterns); + e.g., you can write the following function definition +

    +
    +fun f x = {x} +
    +

    + which is equivalent to +

    +
    +fun f x = {x = x} +
    +

    + And we have added support for do syntax in + let bindings. +

    +

    + We have also changed the way that Successor ML features are enabled from the REPL. + Instead of assigning true to the + Control.succML variable, one should use the + function Control.setSuccML is used to enable or disable + Successor ML (i.e., pass the argument + true to enable Successor ML features). +

    +
    + +
    Compiler
    +
    +

    + This version eliminates dependencies on FLINT types in the front + end. In particular, uses of PlambdaType have been eliminated. +

    +
    + +
    SML/NJ Library:
    +
    +

    + Added the JSONUtil module to the JSON library. + This module provides helper functions for deconstructing and editing the + tree (aka DOM) representation of a JSON file. +

    +
    + +
    ml-lpt
    +
    +

    + Added --debug command-line option to ml-antlr. + This flag causes ml-antlr to generate debug actions that print + the left-hand-side non-terminal of the production. +

    +

    + Added %tokentype directive to ml-antlr, which allows + user-defined datatypes to be used to represent tokens. +

    +
    + +
    MLRISC:
    +
    +

    + Changed the interface to AMD64Gen; the + signBit and negateSignBit + callback functions now return an MLTree.rexp (instead of a label). +

    +
    + +
    + +

    Bugs:

    +

    + Here is a list of tracked bugs fixed (or closed) with this release, please see the + bug tracker + for more details. +

    + + + + + + + +
    129Symbolic identifiers are allowed as strids
    149Datatype replication exposes hidden constructors
    153Enabling Successor ML features is delayed
    175Executable stack on Linux
    179ml-ulex writing debug messages to stdOut
    +

    The following unnumbered bugs were also fixed: +

    +
      +
    • Parser bugfix: we now allow `*` as a field label in record-punning patterns.
    • +
    + +

    Supported systems:

    +
    +

    +We have verified that the system builds and runs on the following systems. +

    +
    + + + + + + + + + + + + + + + +
    Architecture Operating System
    PowerPC Mac OS X 10.5 (Leopard)
       
    x86 Mac OS X 10.6 (Snow Leopard)
    Mac OS X 10.7 (Lion)
    Mac OS X 10.8 (Mountain Lion)
    Mac OS X 10.9 (Mavericks)
    Mac OS X 10.10 (Yosemite)
    Mac OS X 10.11 (El Capitan)
    macOS 10.12 (Sierra)
    Ubuntu 14.04.3 (GNU/Linux 3.13.0)
    Windows 7
    Windows 10
    Cygwin (hosted on Windows 7 or 10)
    +
    +

    +We believe that it runs on any recent Linux distribution that has support for 32-bit +executables, as well as on BSD variants, SPARC/Solaris, and PPC/AIX, but we have not +tested these systems. +

    +
    + + + diff --git a/doc/html/readme/110.82-README.html b/doc/html/readme/110.82-README.html new file mode 100644 index 0000000..7b32abe --- /dev/null +++ b/doc/html/readme/110.82-README.html @@ -0,0 +1,229 @@ + + + +SML/NJ 110.82 Release Notes + + + + +
    + Standard ML of New Jersey +
    + Version 110.82 NEWS +
    + October 16, 2017 +
    + +
    +
    +
    SML/NJ HOME:
    +
    http://www.smlnj.org/index.html
    +
    FILES:
    +
    + http://smlnj.cs.uchicago.edu/dist/working/110.82/ +
    +
    +
    + +

    Summary:

    +

    +This release continues the march toward 64-bit support (it looks to be a long march). +There are many under-the-hood changes to the compiler to make supporting both 32-bit and +64-bit targets possible, as well as a number of bug fixes. +It also adds support for macOS 10.13 (High Sierra). +

    + +

    Details:

    + +
    +
    Compiler
    +
    +

    + Rationalized the treatment of primitive operators in Absyn and FLINT, and removed + unused primitive operators. + These changes affected the pickle format, which means that the + --rebuild option must be used when building + the compiler from sources. +

    +

    + Added Target structure to Basics; this module + specifies properties of the target, such as the size of ML values and + the size of the default int type. +

    +

    + Changed the way that the InlineT structure is + defined to be target sensitive. For example, on 32-bit targets, there + will be an InlineT.i31add function for adding + the default int type, whereas + on 64-bit targets the corresponding function will be + InlineT.i63add. +

    +

    + Restored the pre-110.80 behavior of having the shell command sml foo.sml + return a non-zero exit status when there is an error in the compilation of + foo.sml (bug #183). +

    +
    + +
    Run-time system
    +
    +

    + Changed the way that we test for allocation-space addresses in minor GCs. + Instead of using the BIBOP, we now do a pointer range test; this change + produces a small speedup on 32-bit machines, but we expect a bigger benefit + on 64-bit machines, which use the 2-level BIBOP. +

    +

    + Various changes to prepare for 64-bit targets, including adding a + 2-level BIBOP implementation, and splitting assyntax.h + into 32-bit (assyntax.h) and 64-bit + (assyntax64.h) versions. +

    +
    +
    + +

    Bugs:

    +

    + Here is a list of tracked bugs fixed (or closed) with this release, please see the + bug tracker + for more details. +

    + + + + + +
    123missing warning for non-exaustive binding at top level.
    183status code returned by sml REPL.
    185Bring command line help text into parity with man page.
    +

    The following unnumbered bugs were also fixed: +

    +
      +
    • JSON Library: printing of strings was fixed to match JSON specification.
    • +
    • Eliminated false positives in assertion checking in + build-literals.c. + Also fixed a bookkeeping bug in the tracking of available space in + build-literals.c. +
    • +
    • Fixed a bug in the IntInf.mod and + IntInf.rem operators, + where the Div exception was not + getting raised when both arguments are zero. +
    • +
    • + Fixed a runtime-system build issue on macOS for the situation where Xcode + in installed, but /usr/include does not exist. +
    • +
    + +

    Supported systems:

    +
    +

    +We believe that SML/NJ will build and run on the following systems, but have only +tested some of them: +

    +
    + + + + + + + + + + + + + + + + + + + + + + +
    Architecture Operating System Status
    Power PC Mac OS X 10.5 (Leopard)
    AIX
       
    Sparc Solaris
    Linux
       
    x86 Mac OS X 10.6 (Snow Leopard)
    Mac OS X 10.7 (Lion)
    Mac OS X 10.8 (Mountain Lion)
    Mac OS X 10.9 (Mavericks)
    Mac OS X 10.10 (Yosemite)
    Mac OS X 10.11 (El Capitan) Tested
    macOS 10.12 (Sierra) Tested
    macOS 10.13 (High Sierra) Tested
    Ubuntu 14.04.3 (GNU/Linux 3.13.0)  Tested
    Other Linux variants
    BSD variants
    Windows 7
    Windows 10 Tested
    Cygwin (hosted on Windows 7 or 10)
    +
    +
    + + + diff --git a/doc/html/readme/110.83-README.html b/doc/html/readme/110.83-README.html new file mode 100644 index 0000000..047a7e9 --- /dev/null +++ b/doc/html/readme/110.83-README.html @@ -0,0 +1,265 @@ + + + +SML/NJ 110.83 Release Notes + + + + +
    + Standard ML of New Jersey +
    + Version 110.83 NEWS +
    + June 1, 2018 +
    + +
    +
    +
    SML/NJ HOME:
    +
    http://www.smlnj.org/index.html
    +
    FILES:
    +
    + http://smlnj.cs.uchicago.edu/dist/working/110.83/ +
    +
    +
    + +

    Summary:

    +

    +This release continues the march toward 64-bit support (it looks to be a long march). +There are many under-the-hood changes to the compiler to make supporting both 32-bit and +64-bit targets possible, as well as a number of bug fixes. +

    + +

    Details:

    + +
    +
    Basis Library:
    +
    +

    + This version continues to track proposed Basis Library improvements. + Note that these improvements are proposals + that have not yet been ratified and are subject to change! + Specifically, the following proposals are supported by this version of SML/NJ: +

    +
    +
    + [2018-001] Addition of monomorphic buffers
    +
    This change adds the MONO_BUFFER signature + to the Basis Library, with instances CharBuffer + and Word8Buffer. +
    +
    +

    + We also fixed a number of cases where our implementation of Basis functions + did not agree with the specification. See the list of bug fixes for details. +

    +
    +
    Compiler
    +
    +

    + Changed the internal representation of real literals from strings to a more + structured representation (see compiler/Basics/reals/ + for the code). +

    +

    + Changed the Absyn, PLambda, + FLINT, and CPS IRs to + represent integer and word literals as IntInf.int + values. This change removes unnecessary complexity in code generation, improves + constant folding, and lays the groundwork for 64-bit support. +

    +

    + Improved the reporting of errors involving literal values. We now + use the original source text when describing the value in the error + message. We also report an error in the front-end when real literals + are too large to be represented (instead of letting the code generator fail). +

    +

    + Fixed a number of bugs where the compiler did not handle the language syntax + as specified in the Definition. See the list of bug fixes for details. +

    +
    +
    + +

    Bugs:

    +

    + Here is a list of tracked bugs fixed (or closed) with this release, please see the + bug tracker + for more details. +

    + + + + + + +
    191Compiler crash when handling large reals
    194Real.fromString overflows or hangs
    201The AMD64.cm library is missing
    206Parsing of explicit type variables and val rec is broken
    +

    The following unnumbered bugs were also fixed: +

    +
      +
    • + Fixed the scanning of type variables to agree with the Definition + (e.g., '1 and '_ + are now accepted as type variables). +
    • +
    • + Modified the parser to enforce the restriction that module-language + identifiers must be alphanumeric (not symbolic). +
    • +
    • + Modified the parser to disallow the rebinding of =. + Note that we still allow the syntax + val op = = ... + because it is needed + when bootstrapping the initial Basis environment. +
    • +
    • + Fixed the behavior of Int.sameSign and + Int32.sameSign to correctly handle the situation + where one argument was zero and the other was greater than zero. +
    • +
    • + Fixed the scanner to allow unmatched close comment sequences + (e.g., (op *)(2,3)). +
    • +
    • + Fixed the parser's handling of op combined with + long identifiers. +
    • +
    • + Fixed scanning of prefixes of word values in strings to match Basis + Library specification ("0w" is not + a valid prefix for hexadecimal values). +
    • +
    • + Fixed Char.toCString to produce "\000" + (instead of "\0") for the nul character. +
    • +
    • + Fixed the scanner to produce the correct error message for bad escape sequences in string + literals. +
    • +
    • + Fixed parser to allow parentheses around val rec patterns. +
    • +
    • + Fixed minor bug in Date.toString (missing leading "0" + for day of month). +
    • +
    + +

    Supported systems:

    +
    +

    +We believe that SML/NJ will build and run on the following systems, but have only +tested some of them: +

    +
    + + + + + + + + + + + + + + + + + + + + + + +
    Architecture Operating System Status
    Power PC Mac OS X 10.5 (Leopard)
    AIX
       
    Sparc Solaris
    Linux
       
    x86 Mac OS X 10.6 (Snow Leopard)
    Mac OS X 10.7 (Lion)
    Mac OS X 10.8 (Mountain Lion)
    Mac OS X 10.9 (Mavericks)
    Mac OS X 10.10 (Yosemite)
    Mac OS X 10.11 (El Capitan)
    macOS 10.12 (Sierra) Tested
    macOS 10.13 (High Sierra) Tested
    Ubuntu 16.04.3 LTS Tested
    Other Linux variants
    BSD variants
    Windows 7
    Windows 10 Tested
    Cygwin (hosted on Windows 7 or 10) 
    +
    +
    + + + diff --git a/doc/html/readme/110.84-README.html b/doc/html/readme/110.84-README.html new file mode 100644 index 0000000..a9d070e --- /dev/null +++ b/doc/html/readme/110.84-README.html @@ -0,0 +1,274 @@ + + + +SML/NJ 110.84 Release Notes + + + + +
    + Standard ML of New Jersey +
    + Version 110.84 NEWS +
    + September 3, 2018 +
    + +
    +
    +
    SML/NJ HOME:
    +
    https://smlnj.org
    +
    FILES:
    +
    + https://smlnj.org/dist/working/110.84/ +
    +
    +
    + +

    Summary:

    +

    + This is a minor release whose primary purpose is to support future changes to the + compiler's implementation. +

    +

    + This release introduces a new implementation of the + Abstract Syntax Description Library (ASDL). + It also adds support for Mac OS X 10.14 (Mojave) and includes a few other + enhancements and bug fixes. +

    + +

    Details:

    + +
    +
    ASDL
    +
    +

    + This release of SML/NJ includes a beta-version of a new implementation of the + Abstract Syntax Description Library (ASDL), which was originally designed + and implemented by Daniel Wang as part of the Zephyr project. ASDL allows one + to declare a collection of inductive types and then use the asdlgen tool + to generate an implementation of the types with pickler/unpickler operations. + We plan to replace the compiler's current environment pickler with ASDL and + eventually also use it in a new LLVM-based backend. +

    +

    + While the previous versions of ASDL supported many different target languages, + the new version currently only supports SML and C++ (note that C++ support is + currently incomplete). +

    +

    + See the ASDL Manual (Version 3.0) for details about the specification + language and the asdlgen tool. +

    +

    + Also note that ASDL is not currently part of the Windows installation. +

    +
    +
    Basis Library:
    +
    +

    + This version adds support for the Basis Library proposal: +

    +
    +
    + [2018-002] Additional slice operations
    +
    + This proposal adds four functions to the various slice structures: + triml, + trimr, + splitAt, and + getVec. +
    + +
    +

    + Note that this change is a proposal + that have not yet been ratified and is subject to change! +

    + +
    SML/NJ Library
    +
    +

    + Added pretty printing (structure SExpPP) + to the S-Expression library. +

    +
    +
    Installer
    +
    +

    + A couple of changes were made to the installer + (base/base/system/smlnj/installer). +

    +
      +
    • + The name of build scripts used for the prog and dprog actions + was changed from "build" to + "build.sh" on Unix systems (it is still + "build.bat" on Windows). +
    • +
    • + A new config action was added that causes a configuration script + to be run. The script should be named config.sh (or config.bat + on Windows). +
    • +
    +
    +
    + +

    Bugs:

    +

    + There are no numbered bug fixes in this release, but the following + unnumbered bugs were fixed: +

    +
      +
    • + Fixed a compiler bug ("arg ty lists have wrong length") in + unifyTy that could occur when one of the type constructors + is the ERRORtyc (because a type error had been previously + detected). +
    • +
    • + Fixed a bug in the implementation of monomorphic buffers: + the functions CharBuffer.add1 and + Word8Buffer.add1 had an incorrect length + test. +
    • +
    • + Fixed a bug in the SExp.same function, where + lists of unequal length would compare as equal if the shorter list was + a prefix of the longer list. +
    • +
    • + Fixed a bug where the findi function in the various + slice structures (e.g., Word8ArraySlice) + passed the wrong index value to the predicate function (the index is supposed + to be relative to the start of the slice, not the underlying vector or array). +
    • +
    + +

    Supported systems:

    +
    +

    +We believe that SML/NJ will build and run on the following systems, but have only +tested some of them: +

    +
    + + + + + + + + + + + + + + + + + + + + + + + +
    Architecture Operating System Status
    Power PC Mac OS X 10.5 (Leopard)
    AIX
       
    Sparc Solaris
    Linux
       
    x86 Mac OS X 10.6 (Snow Leopard)
    Mac OS X 10.7 (Lion)
    Mac OS X 10.8 (Mountain Lion)
    Mac OS X 10.9 (Mavericks)
    Mac OS X 10.10 (Yosemite)
    Mac OS X 10.11 (El Capitan)
    macOS 10.12 (Sierra) Tested
    macOS 10.13 (High Sierra) Tested
    macOS 10.14 (Mojave) Tested (see note below)
    Ubuntu 16.04.3 LTS Tested
    Other Linux variants
    BSD variants
    Windows 7
    Windows 10 Tested
    Cygwin (hosted on Windows 7 or 10) 
    +
    +
    +

    +Apple is in the process of deprecating 32-bit support in macOS. With macOS 10.14 +(Mojave), it is no longer possible to compile 32-bit applications, such as the +SML/NJ runtime system. Thus, while it is possible to install SML/NJ using the + +installer, installing using the config/install.sh script will not +work without extra effort. +

    +

    +We have been working on 64-bit support since 110.82 we expect to have it in place +before the release of macOS 10.15. +

    + + + diff --git a/doc/html/readme/110.85-README.html b/doc/html/readme/110.85-README.html new file mode 100644 index 0000000..c23a3ae --- /dev/null +++ b/doc/html/readme/110.85-README.html @@ -0,0 +1,213 @@ + + + +SML/NJ 110.85 Release Notes + + + + +
    + Standard ML of New Jersey +
    + Version 110.85 NEWS +
    + December 21, 2018 +
    + +
    +
    +
    SML/NJ HOME:
    +
    https://smlnj.org
    +
    FILES:
    +
    + https://smlnj.org/dist/working/110.85/ +
    +
    +
    + +

    Summary:

    +

    + This release fixes a critical bug in the runtime system, as well has including + some additional changes to the compiler. +

    + +

    Details:

    + +
    +
    Installation
    +
    +

    + While the installer + for 110.85 works on macOS 10.14 Mojave, building from source requires some extra steps. +

    +

    + We have added a new Makefile (mk.x86-darwin18) + for the runtime system and modified the config/install.sh + script to use this makefile when necessary. This new makefile expects + that the MacOSX10.13.sdk directory from Xcode 9 has been + copied into the Xcode 10 SDKs directory. See the + change log for more + details. +

    +
    +
    SML/NJ Library
    +
    +

    + Added CharBufferPP structure to the pretty-printing + library. This module can be used to generate strings from a pretty-printer. +

    +
    +
    Compiler
    +
    +

    + A number of changes to internal representations as part of the preparation for + supporting 64-bit targets. See the + change log for details. +

    +

    + The getPseudo and setPseudo + functions were removed from the Unsafe structure, since + they were no longer supported by the code generator. + We also removed the getpseudo, setpseudo, + setmark, and dispose functions + from the InlineT structure (these are not visible to user code). +

    +
    +
    + +

    Bugs:

    +

    +Here is a list of tracked bugs fixed (or closed) with this release, please see the +bug tracker +for more details. +

    + + + +
    216run-time system fatal error with large top-level value
    + +

    Supported systems:

    +
    +

    +We believe that SML/NJ will build and run on the following systems, but have only +tested some of them: +

    +
    + + + + + + + + + + + + + + + + + + + + + + + +
    Architecture Operating System Status
    Power PC Mac OS X 10.5 (Leopard)
    AIX
       
    Sparc Solaris
    Linux
       
    x86 Mac OS X 10.6 (Snow Leopard)
    Mac OS X 10.7 (Lion)
    Mac OS X 10.8 (Mountain Lion)
    Mac OS X 10.9 (Mavericks)
    Mac OS X 10.10 (Yosemite)
    Mac OS X 10.11 (El Capitan)
    macOS 10.12 (Sierra) Tested
    macOS 10.13 (High Sierra) Tested
    macOS 10.14 (Mojave) Tested (see note below)
    Ubuntu 16.04.3 LTS Tested
    Other Linux variants
    BSD variants
    Windows 7
    Windows 10 Tested
    Cygwin (32-bit) 
    +
    +
    +

    +Apple is in the process of deprecating 32-bit support in macOS. +With macOS 10.14 Mojave, compiling 32-bit applications, such as the SML/NJ +runtime system, requires using an older SDK. +The SML/NJ +installer, however, works without issue on macOS 10.14 Mojave. +

    +

    +We have been working on 64-bit support since 110.82 we expect to have it in place +before the release of macOS 10.15. +

    + + + diff --git a/doc/html/readme/110.86-README.html b/doc/html/readme/110.86-README.html new file mode 100644 index 0000000..ba02c01 --- /dev/null +++ b/doc/html/readme/110.86-README.html @@ -0,0 +1,512 @@ + + + + + + + +Standard ML of New Jersey Release Notes + + + + +
    +
    +
    +
    + Standard ML of New Jersey
    Release Notes
    +

    + Version 110.86
    May 2, 2019
    +
    +
    +
    +
    +
    +
    +
    SML/NJ HOME
    +
    +

    https://smlnj.org

    +
    +
    FILES
    +
    +

    https://smlnj.org/dist/working/110.86/

    +
    +
    +
    +
    +
    +
    +
    +
    +
    +

    Summary

    +
    +
    +

    The main purpose of this release is to provide a stable checkpoint +for a series of internal compiler changes, which are part of the +effort to support 64-bit systems. In addition, however, it also +contains a number of bug fixes and other improvements.

    +
    +
    +
    +
    +

    Details

    +
    +
    +

    Installation

    +
    +

    While the installer +for 110.86 works on macOS 10.14 Mojave, building from source +requires some extra steps.

    +
    +
    +
    +

    SML/NJ Library

    +
    +

    Renamed the function CharBufferPP.openOut to openBuf, and added the +TextPP structure that supports pretty printing to either an output +stream (like TextIOPP) or a character buffer (like CharBufferPP).

    +
    +
    +
    +

    ASDL

    +
    +

    Further improvements to the new implementation of ASDL.

    +
    +
    +
    +
    +
      +
    • +

      The interface to file and memory picklers was unified for the SML view.

      +
    • +
    • +

      Added support for S-Expression pickling for the SML view (unpickling +has not yet been implemented).

      +
    • +
    • +

      Significant editing of the documentation to make it more accurately +reflect the implementation.

      +
    • +
    +
    +
    +
    +
    +
    +

    Compiler

    +
    +

    We have made a number of changes to internal representations and +implementations as part of the preparation for supporting 64-bit targets. +We summarize these below (see the change log for details).

    +
    +
    +
    +
    +
      +
    • +

      Moved CPS related code out of the FLINT directory and into its own +compiler/CPS directory tree.

      +
    • +
    • +

      Split out the utility code from the CPS structure into a new +CPSUtil structure.

      +
    • +
    • +

      Renamed the various CPS primitive constructors so as to be compatible +with ASDL (i.e., changed symbolic and lower-case constructor names to +upper-case identifiers).

      +
    • +
    • +

      Moved the F_SGN constructor from the fcmp datatype to the branch +datatype (and renamed it FSGN).

      +
    • +
    • +

      Many changes to the internal representation of primitive operators.

      +
    • +
    +
    +
    +
    +
    +

    We have also made improvements to the core 64-bit arithmetic operations.

    +
    +
    +

    Culled unused flags from the Control structure (mostly from Control.CG).

    +
    +
    +
    +

    Interactive System

    +
    +

    The implementation of the use function in the REPL has been rewritten to +fix bugs 193, 217, and 219. The semantics of +use are as follows:

    +
    +
    +
    +
    +
      +
    • +

      If an invocation of use encounters a compilation error (either in the +initial file or in a nested invocation of use), then the compiler error +message will be printed and the call to use will immediately return ().

      +
    • +
    • +

      If an invocation of use raises an exception during execution +of the compiled code (either in the initial file or in a nested +invocation of use), then the exception will be reported at +the top-level.

      +
    • +
    • +

      Otherwise, if no errors or uncaught exceptions are encountered, then +() will be returned once the code in the used file has executed.

      +
    • +
    +
    +
    +
    +
    +

    Note that any change to the global state or environment +that occurs before an error is encountered, will not be rolled +back. Also, wrapping calls to use in exception handlers or using +callcc in combination with use may break it.

    +
    +
    +

    Files specified as command-line arguments to the sml command +are treated as if use was invoked on them. If there is an +error, then the error is reported and the sml command +will terminate with a non-zero exit status (at least on Unix).

    +
    +
    +
    +
    +
    +

    Bugs

    +
    +
    +

    Here is a list of tracked bugs fixed (or closed) with this release, please see the +bug tracker +for more details.

    +
    + ++++ + + + + + + + + + + + + + + + + + + +

    193

    SML/NJ does not print unhandled exceptions in loaded files

    213

    Int32.div raises Div instead of Overflow when dividing minInt by ~1

    217

    Unhandled exceptions no longer print anything when inside used file

    219

    "use" fails silently when a function inside a structure raises an exception

    +
    +

    The following unnumbered bugs were also fixed:

    +
    +
    +
    +
    +
      +
    • +

      The Word32.toInt function did not raise Overflow when given an +argument with its sign bit set in some cases (e.g., +Word32.toInt 0wx8002DE32; would return 187954).

      +
    • +
    +
    +
    +
    +
    +
    +
    +

    Supported systems

    +
    +
    +

    We believe that SML/NJ will build and run on the following systems, but have only +tested some of them:

    +
    + +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ArchitectureOperating SystemStatus

    Power PC

    Mac OS X 10.5 (Leopard)

    AIX

     

    Sparc

    Solaris

    Linux

     

    x86 (32-bit)

    Mac OS X 10.6 (Snow Leopard)

    Mac OS X 10.7 (Lion)

    Mac OS X 10.8 (Mountain Lion)

    Mac OS X 10.9 (Mavericks)

    Mac OS X 10.10 (Yosemite)

    Mac OS X 10.11 (El Capitan)

    macOS 10.12 (Sierra)

    Tested

    macOS 10.13 (High Sierra)

    Tested

    macOS 10.14 (Mojave)

    Tested

    Ubuntu 16.04.3 LTS

    Tested

    Other Linux variants

    BSD variants

    Windows 7

    Windows 10

    Cygwin (32-bit)

    +
    +

    A note about 64-bit support

    +
    +

    Apple is in the process of deprecating 32-bit support in macOS. +With macOS 10.14 Mojave, compiling 32-bit applications, such as the SML/NJ +runtime system, requires using an older SDK. +The SML/NJ installer, +however, works without issue on macOS 10.14 Mojave.

    +
    +
    +

    We have been working on 64-bit support since 110.82 we expect to have it in place +before the release of macOS 10.15.

    +
    +
    +
    +
    +
    + + + + + \ No newline at end of file diff --git a/doc/html/readme/110.87-README.html b/doc/html/readme/110.87-README.html new file mode 100644 index 0000000..dabba89 --- /dev/null +++ b/doc/html/readme/110.87-README.html @@ -0,0 +1,528 @@ + + + + + + + +Standard ML of New Jersey Release Notes + + + + +
    +
    +
    +
    + Standard ML of New Jersey
    Release Notes
    +

    + Version 110.87
    May 3, 2019
    +
    +
    +
    +
    +
    +
    +
    SML/NJ HOME
    +
    +

    https://smlnj.org

    +
    +
    FILES
    +
    +

    https://smlnj.org/dist/working/110.87/

    +
    +
    +
    +
    +
    +
    +
    +
    +
    +

    Summary

    +
    +
    +

    This release completes the overhaul of primitive operators in the +compiler that was started in 110.86. Most of the changes listed +here were part of the 110.86 release, but we include them here +since 110.86 was not officially announced.

    +
    +
    +
    +
    +

    Details

    +
    +
    +

    Installation

    +
    +

    While the installer +for 110.87 works on macOS 10.14 Mojave, building from source +requires some extra steps.

    +
    +
    +
    +

    SML/NJ Library

    +
    +

    Renamed the function CharBufferPP.openOut to openBuf, and added the +TextPP structure that supports pretty printing to either an output +stream (like TextIOPP) or a character buffer (like CharBufferPP).

    +
    +
    +
    +

    ASDL

    +
    +

    Further improvements to the new implementation of ASDL.

    +
    +
    +
    +
    +
      +
    • +

      The interface to file and memory picklers was unified for the SML view.

      +
    • +
    • +

      Added support for S-Expression pickling for the SML view (unpickling +has not yet been implemented).

      +
    • +
    • +

      Significant editing of the documentation to make it more accurately +reflect the implementation.

      +
    • +
    +
    +
    +
    +
    +
    +

    Compiler

    +
    +

    110.86 changes

    +
    +

    We have made a number of changes to internal representations and +implementations as part of the preparation for supporting 64-bit targets. +We summarize these below (see the change log for details).

    +
    +
    +
    +
    +
      +
    • +

      Moved CPS related code out of the FLINT directory and into its own +compiler/CPS directory tree.

      +
    • +
    • +

      Split out the utility code from the CPS structure into a new +CPSUtil structure.

      +
    • +
    • +

      Renamed the various CPS primitive constructors so as to be compatible +with ASDL (i.e., changed symbolic and lower-case constructor names to +upper-case identifiers).

      +
    • +
    • +

      Moved the F_SGN constructor from the fcmp datatype to the branch +datatype (and renamed it FSGN).

      +
    • +
    • +

      Many changes to the internal representation of primitive operators.

      +
    • +
    +
    +
    +
    +
    +

    We have also made improvements to the core 64-bit arithmetic operations.

    +
    +
    +

    Culled unused flags from the Control structure (mostly from Control.CG).

    +
    +
    +
    +

    110.87 changes

    +
    +

    In 110.87, we continue the overhaul of the compiler’s handling of primitive +operations by introducing a new, more consistent, naming scheme for the +operations that are exposed by the compiler in the InLine structure. +In particular, we have abstracted away from the size of the default integer +and word types (both in the names and in the semantics of numeric conversions).

    +
    +
    +

    The compiler now inlines the Char.chr operator.

    +
    +
    +
    +
    +

    Interactive System

    +
    +

    The implementation of the use function in the REPL has been rewritten to +fix bugs 193, 217, and 219. The semantics of +use are as follows:

    +
    +
    +
    +
    +
      +
    • +

      If an invocation of use encounters a compilation error (either in the +initial file or in a nested invocation of use), then the compiler error +message will be printed and the call to use will immediately return ().

      +
    • +
    • +

      If an invocation of use raises an exception during execution +of the compiled code (either in the initial file or in a nested +invocation of use), then the exception will be reported at +the top-level.

      +
    • +
    • +

      Otherwise, if no errors or uncaught exceptions are encountered, then +() will be returned once the code in the used file has executed.

      +
    • +
    +
    +
    +
    +
    +

    Note that any change to the global state or environment +that occurs before an error is encountered, will not be rolled +back. Also, wrapping calls to use in exception handlers or using +callcc in combination with use may break it.

    +
    +
    +

    Files specified as command-line arguments to the sml command +are treated as if use was invoked on them. If there is an +error, then the error is reported and the sml command +will terminate with a non-zero exit status (at least on Unix).

    +
    +
    +
    +
    +
    +

    Bugs

    +
    +
    +

    Here is a list of tracked bugs fixed (or closed) with this release, please see the +bug tracker +for more details.

    +
    + ++++ + + + + + + + + + + + + + + + + + + +

    193

    SML/NJ does not print unhandled exceptions in loaded files

    213

    Int32.div raises Div instead of Overflow when dividing minInt by ~1

    217

    Unhandled exceptions no longer print anything when inside used file

    219

    "use" fails silently when a function inside a structure raises an exception

    +
    +

    The following unnumbered bugs were also fixed:

    +
    +
    +
    +
    +
      +
    • +

      The Word32.toInt function did not raise Overflow when given an +argument with its sign bit set in some cases (e.g., +Word32.toInt 0wx8002DE32; would return 187954).

      +
    • +
    +
    +
    +
    +
    +
    +
    +

    Supported systems

    +
    +
    +

    We believe that SML/NJ will build and run on the following systems, but have only +tested some of them:

    +
    + +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ArchitectureOperating SystemStatus

    Power PC

    Mac OS X 10.5 (Leopard)

    AIX

     

    Sparc

    Solaris

    Linux

     

    x86 (32-bit)

    Mac OS X 10.6 (Snow Leopard)

    Mac OS X 10.7 (Lion)

    Mac OS X 10.8 (Mountain Lion)

    Mac OS X 10.9 (Mavericks)

    Mac OS X 10.10 (Yosemite)

    Mac OS X 10.11 (El Capitan)

    macOS 10.12 (Sierra)

    Tested

    macOS 10.13 (High Sierra)

    Tested

    macOS 10.14 (Mojave)

    Tested

    Ubuntu 16.04.3 LTS

    Tested

    Other Linux variants

    BSD variants

    Windows 7

    Windows 10

    Cygwin (32-bit)

    +
    +

    A note about 64-bit support

    +
    +

    Apple is in the process of deprecating 32-bit support in macOS. +With macOS 10.14 Mojave, compiling 32-bit applications, such as the SML/NJ +runtime system, requires using an older SDK. +The SML/NJ installer, +however, works without issue on macOS 10.14 Mojave.

    +
    +
    +

    We have been working on 64-bit support since 110.82 we expect to have it in place +before the release of macOS 10.15.

    +
    +
    +
    +
    +
    + + + + + \ No newline at end of file diff --git a/doc/html/readme/110.88-README.html b/doc/html/readme/110.88-README.html new file mode 100644 index 0000000..d3aeee1 --- /dev/null +++ b/doc/html/readme/110.88-README.html @@ -0,0 +1,366 @@ + + + + + + + +Standard ML of New Jersey Release Notes + + + + +
    +
    +
    +
    + Standard ML of New Jersey
    Release Notes
    +

    + Version 110.88
    May 15, 2019
    +
    +
    +
    +
    +
    +
    +
    SML/NJ HOME
    +
    +

    https://smlnj.org

    +
    +
    FILES
    +
    +

    https://smlnj.org/dist/working/110.88/

    +
    +
    +
    +
    +
    +
    +
    +
    +
    +

    Summary

    +
    +
    +

    This release does not have any user-visible changes. It consists of +a reimplementation of the Int64.int and Word64.word types. +With this new implementation in place, we expect to rebind the +LargeWord, FixedInt, and Position structures to 64-bit +implementations in the next release.

    +
    +
    +
    +
    +

    Details

    +
    +
    +

    Installation

    +
    +

    While the installer +for 110.88 works on macOS 10.14 Mojave, building from source +requires some extra steps.

    +
    +
    +
    +

    Compiler

    +
    +

    Previously, 64-bit numbers were represented as pairs of 32-bit words +and the Int64 and Word64 Basis Library structures were built on this +implementation. While the compiler supported 64-bit literals, the +compiler translated these to pairs of 32-bit words when converting +to FLINT.

    +
    +
    +

    In order to support 64-bit targets, it is desirable to make 64-bit +numbers a primitive type (just like Int32.int). In this version, +we have replaced the old implementation of the Int64 and Word64 +modules with compiler primops. A new pass, Num64Cnv, that runs +before closure conversion is used to expand 64-bit operations into +32-bit operations on 32-bit targets. While the main purpose of +this change is to make porting to 64-bit targets easier, the new +implementation of Int64.int and Word64.word is faster and +more space efficient than before.

    +
    +
    +
    +
    +
    +

    Supported systems

    +
    +
    +

    We believe that SML/NJ will build and run on the following systems, but have only +tested some of them:

    +
    + +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ArchitectureOperating SystemStatus

    Power PC

    Mac OS X 10.5 (Leopard)

    AIX

     

    Sparc

    Solaris

    Linux

     

    x86 (32-bit)

    Mac OS X 10.6 (Snow Leopard)

    Mac OS X 10.7 (Lion)

    Mac OS X 10.8 (Mountain Lion)

    Mac OS X 10.9 (Mavericks)

    Mac OS X 10.10 (Yosemite)

    Mac OS X 10.11 (El Capitan)

    macOS 10.12 (Sierra)

    Tested

    macOS 10.13 (High Sierra)

    Tested

    macOS 10.14 (Mojave)

    Tested

    Ubuntu 16.04.3 LTS

    Tested

    Other Linux variants

    BSD variants

    Windows 7

    Windows 10

    Cygwin (32-bit)

    +
    +

    A note about 64-bit support

    +
    +

    Apple is in the process of deprecating 32-bit support in macOS. +With macOS 10.14 Mojave, compiling 32-bit applications, such as the SML/NJ +runtime system, requires using an older SDK. +The SML/NJ installer, +however, works without issue on macOS 10.14 Mojave.

    +
    +
    +

    We have been working on 64-bit support since 110.82 we expect to have it in place +before the release of macOS 10.15.

    +
    +
    +
    +
    +
    + + + + + \ No newline at end of file diff --git a/doc/html/readme/110.89-README.html b/doc/html/readme/110.89-README.html new file mode 100644 index 0000000..bbb00cb --- /dev/null +++ b/doc/html/readme/110.89-README.html @@ -0,0 +1,443 @@ + + + + + + + +Standard ML of New Jersey Release Notes + + + + +
    +
    +
    +
    + Standard ML of New Jersey
    Release Notes
    +

    + Version 110.89
    June 1, 2019
    +
    +
    +
    +
    +
    +
    +
    SML/NJ HOME
    +
    +

    https://smlnj.org

    +
    +
    FILES
    +
    +

    https://smlnj.org/dist/working/110.89/

    +
    +
    +
    +
    +
    +
    +
    +
    +
    +

    Summary

    +
    +
    +

    This release continues the march toward 64-bit support. The main +change is that the bindings of the FixedInt, LargeWord, and +Position structure aliases are now 64-bit. This change will +break code that assumes that these structures are the same as Int32, +Word32, and Int32 (respectively).

    +
    +
    +

    This version may be unstable, so we recommend sticking with +Version 110.87 +for production work. Also, we have not finished porting +changes to the Windows version of the runtime system, so we are +not releasing a Windows MSI file for this version.

    +
    +
    +
    +
    +

    Details

    +
    +
    +

    Installation

    +
    +

    While the installer +for 110.89 works on macOS 10.14 Mojave, building from source +requires some extra steps.

    +
    +
    +
    +

    Compiler

    +
    +
      +
    • +

      Fixed some bugs in the implementation of the Word64 arithmetic +operations.

      +
    • +
    • +

      Simplified the code generator by removing disabled optimization +features (memory disambiguation and GC types).

      +
    • +
    • +

      Added missing contractions for QUOT and REM primops.

      +
    • +
    +
    +
    +
    +

    Basis Library

    +
    +
      +
    • +

      The FixedInt, LargeWord, and Position structures are now all +64-bit. Thus the Int64 and Word64 structures properly conform +to the Basis Specification.
      +Another effect of this change is that files that are over 230-1 +bytes in size are now supported (bugs 33 and 36).

      +
    • +
    +
    +
    +
    +

    MLRISC and NLFFI

    +
    +
      +
    • +

      Fixed code that assumed that LargeWord and Word32 are the same.

      +
    • +
    +
    +
    +
    +

    Runtime System

    +
    +
      +
    • +

      The runtime system was written assuming ANSI X3.159-1989 Standard C; in +this releaser, we have added the use of inline functions, which +where added in ISO C99. While Microsoft’s Visual Studio does not +fully support the standard, it is generally compatible with the ISO C99.

      +
    • +
    • +

      Allocation of small objects was previously implemented using macros; +we now use inline functions. Inline functions are more robust and type safe, and +also enable cleaner handling of 32-bit integers, which are boxed on +32-bit targets, but unboxed on 64-bit targets.

      +
    • +
    • +

      Removed makefiles and code for arichtectures and operating systems that +are no longer supported.

      +
    • +
    +
    +
    +
    +
    +
    +

    Bugs

    +
    +
    +

    Here is a list of tracked bugs fixed (or closed) with this release, please see the +bug tracker +for more details.

    +
    + ++++ + + + + + + + + + + +

    33

    Overflow exception with inputLine function

    36

    Can’t open very large file

    +
    +
    +
    +

    Supported systems

    +
    +
    +

    We believe that SML/NJ will build and run on the following systems, but have only +tested some of them:

    +
    + +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ArchitectureOperating SystemStatus

    Power PC

    Mac OS X 10.5 (Leopard)

    AIX

     

    Sparc

    Solaris

    Linux

     

    x86 (32-bit)

    Mac OS X 10.6 (Snow Leopard)

    Mac OS X 10.7 (Lion)

    Mac OS X 10.8 (Mountain Lion)

    Mac OS X 10.9 (Mavericks)

    Mac OS X 10.10 (Yosemite)

    Mac OS X 10.11 (El Capitan)

    macOS 10.12 (Sierra)

    Tested

    macOS 10.13 (High Sierra)

    Tested

    macOS 10.14 (Mojave)

    Tested

    Ubuntu 16.04.3 LTS

    Tested

    Other Linux variants

    BSD variants

    Windows 7

    Windows 10

    Cygwin (32-bit)

    +
    +

    A note about 64-bit support

    +
    +

    Apple is in the process of deprecating 32-bit support in macOS. +With macOS 10.14 Mojave, compiling 32-bit applications, such as the SML/NJ +runtime system, requires using an older SDK. +The SML/NJ installer, +however, works without issue on macOS 10.14 Mojave.

    +
    +
    +

    We have been working on 64-bit support since 110.82 we expect to have it in place +before the release of macOS 10.15.

    +
    +
    +
    +
    +
    + + + + + \ No newline at end of file diff --git a/doc/html/readme/110.90-README.html b/doc/html/readme/110.90-README.html new file mode 100644 index 0000000..d759c4d --- /dev/null +++ b/doc/html/readme/110.90-README.html @@ -0,0 +1,513 @@ + + + + + + + +Standard ML of New Jersey Release Notes + + + + + +
    +
    +
    +
    + Standard ML of New Jersey
    Release Notes
    +

    + Version 110.90
    June 12, 2019
    +
    +
    +
    +
    +
    +
    +
    SML/NJ HOME
    +
    +

    https://smlnj.org

    +
    +
    FILES
    +
    +

    https://smlnj.org/dist/working/110.90/

    +
    +
    +
    +
    +
    +
    +
    +
    +
    +

    Summary

    +
    +
    +

    The primary purpose of this release is to bring the Windows port of +SML/NJ up to parity with the Unix versions w.r.t. the 64-bit cleanup. +It also provides a fix for a long dormant bug in CM that was exposed +by the Basis reorganization in 110.88.

    +
    +
    +
    +
    +

    Details

    +
    +
    +

    Installation

    +
    +

    While the installer +for 110.90 works on macOS 10.14 Mojave, building from source +requires some extra steps. Another issue that you may encounter +when building on macOS 10.14 Mojave is an error message for a shell +script of the form

    +
    +
    +
    +
      /bin/sh: bad interpreter: Operation not permitted
    +
    +
    +
    +

    This error arises because the com.apple.quarantine attribute is set on the +shell script. To fix the problem, remove the attribute using the command

    +
    +
    +
    +
      xattr -d com.apple.quarantine shell-script
    +
    +
    +
    +

    and resume the build.

    +
    +
    +
    +

    Compiler

    +
    +

    This compiler now supports a primitive type to represent pointers to runtime-system +data structures. User-level access to this type is available via the Unsafe.Pointer +structure. Note that these values (other than the null pointer) cannot persist +across heap exports. In fact, exporting a heap that contains a pointer will +result in an error message.

    +
    +
    +
    +

    Basis Library

    +
    +

    This version implements the following Basis Library proposal:

    +
    +
    +
    +
    +
    +
    [2019-001]
    +
    +

    Correction to the PRIM_IO signature — This proposal changes the return type of the avail function for readers to +return Position.int option, instead of int option type.

    +
    +
    +
    +
    +
    +
    +
    +

    Runtime System

    +
    +
    +
    +
      +
    • +

      The interface between the SML/NJ Basis code and the runtime system now uses unsigned 64-bit +values to communicate time information (e.g., for the current time or file-modification +timestamps). The Windows code that works with time and date values now uses the native +Windows API, instead of the C-Library API.

      +
    • +
    • +

      The handling of 32-bit values in the runtime system has been modified to allow for +both boxed representations (on 32-bit machines) and tagged representations (on 64-bit +machines).

      +
    • +
    • +

      The runtime and Basis representation of the Microsoft HANDLE type has been abstracted +over the target-machine word size. While this type is an alias for void *, the +values are not actually pointers; therefore we represent them as boxed words of the native +machine size.

      +
    • +
    +
    +
    +
    +
    +
    +
    +
    +

    Bugs

    +
    +
    +

    Here is a list of tracked bugs fixed (or closed) with this release, please see the +bug tracker +for more details.

    +
    + ++++ + + + + + + +

    222

    CM exports from imported library rather than from defined library

    +
    +
    +
    +

    Supported systems

    +
    +
    +

    We believe that SML/NJ will build and run on the following systems, but have only +tested some of them:

    +
    + +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ArchitectureOperating SystemStatus

    Power PC

    Mac OS X 10.5 (Leopard)

    AIX

     

    Sparc

    Solaris

    Linux

     

    x86 (32-bit)

    Mac OS X 10.6 (Snow Leopard)

    Mac OS X 10.7 (Lion)

    Mac OS X 10.8 (Mountain Lion)

    Mac OS X 10.9 (Mavericks)

    Mac OS X 10.10 (Yosemite)

    Mac OS X 10.11 (El Capitan)

    macOS 10.12 (Sierra)

    Tested

    macOS 10.13 (High Sierra)

    Tested

    macOS 10.14 (Mojave)

    Tested

    Ubuntu 16.04.3 LTS

    Tested

    Other Linux variants

    BSD variants

    Windows 7

    Windows 10

    Cygwin (32-bit)

    +
    +

    A note about 64-bit support

    +
    +

    Apple is in the process of deprecating 32-bit support in macOS. +With macOS 10.14 Mojave, compiling 32-bit applications, such as the SML/NJ +runtime system, requires using an older SDK. +The SML/NJ installer, +however, works without issue on macOS 10.14 Mojave.

    +
    +
    +

    We have been working on 64-bit support since 110.82 we expect to have it in place +before the release of macOS 10.15.

    +
    +
    +
    +
    +
    + + + + + \ No newline at end of file diff --git a/doc/html/readme/110.91-README.html b/doc/html/readme/110.91-README.html new file mode 100644 index 0000000..70eab9c --- /dev/null +++ b/doc/html/readme/110.91-README.html @@ -0,0 +1,494 @@ + + + + + + + +Standard ML of New Jersey Release Notes + + + + + +
    +
    +
    +
    + Standard ML of New Jersey
    Release Notes
    +

    + Version 110.91
    June 20, 2019
    +
    +
    +
    +
    +
    +
    +
    SML/NJ HOME
    +
    +

    https://smlnj.org

    +
    +
    FILES
    +
    +

    https://smlnj.org/dist/working/110.91/

    +
    +
    +
    +
    +
    +
    +
    +
    +
    +

    Summary

    +
    +
    +

    This release is another checkpoint on the march to 64-bits. Most of the changes +are related to porting the runtime system to build on the AMD64, but we have also +made a few improvements elsewhere in the system.

    +
    +
    +
    +
    +

    Details

    +
    +
    +

    Installation

    +
    +

    While the installer +for 110.91 works on macOS 10.14 Mojave, building from source +requires some extra steps. Another issue that you may encounter +when building on macOS 10.14 Mojave is an error message for a shell +script of the form

    +
    +
    +
    +
      /bin/sh: bad interpreter: Operation not permitted
    +
    +
    +
    +

    This error arises because the com.apple.quarantine attribute is set on the +shell script. To fix the problem, remove the attribute using the command

    +
    +
    +
    +
      xattr -d com.apple.quarantine shell-script
    +
    +
    +
    +

    and resume the build.

    +
    +
    +
    +

    Compiler

    +
    +

    We added a new primop, REAL_TO_BITS that casts a floating-point value to the +same-size word value. This primop allows the Assembly.logb function to be +implemented in SML.

    +
    +
    +

    The Target structure defined in Basis/mlcomp now includes endianess information. +This information was neede to implement the REAL_TO_BITS primop on 32-bit +targets.

    +
    +
    +
    +

    Basis Library

    +
    +

    We have added implementations of the optional Basis Library modules +PackWord64Big and PackWord64Little.

    +
    +
    +

    We have also refactored the implementation of the Math structure to share +common code across the versions that are specialized for different levels of +hardware support.

    +
    +
    +
    +

    Runtime System

    +
    +
    +
    +
      +
    • +

      The mach-dep/assyntax.h file, which provides a set of macros to allow the assembly +code to work with both AT&T and GNU assembler syntax, has been replaced by +mach-dep/x86-syntax.h. This new file has been stripped down to only cover the +instructions actually used in the assembly code. Furthermore, it covers both +the Unix and Windows assemblers, and both the x86 and amd64 architectures.

      +
    • +
    • +

      Because we now assume C99 support, we can use the C Library functions +fegetround and fesetround to control rounding modes. Therefore, we have +removed these from the assembly code.

      +
    • +
    • +

      With the compiler support for bitcasting a real value to Word64.word, we can +implement the logb function in SML. Therefore, we have removed logb +from the assembly code.

      +
    • +
    +
    +
    +
    +
    +
    +
    +
    +

    Bugs

    +
    +
    +

    This release does not fix any known bugs.

    +
    +
    +
    +
    +

    Supported systems

    +
    +
    +

    We believe that SML/NJ will build and run on the following systems, but have only +tested some of them:

    +
    + +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ArchitectureOperating SystemStatus

    Power PC

    Mac OS X 10.5 (Leopard)

    AIX

     

    Sparc

    Solaris

    Linux

     

    x86 (32-bit)

    Mac OS X 10.6 (Snow Leopard)

    Mac OS X 10.7 (Lion)

    Mac OS X 10.8 (Mountain Lion)

    Mac OS X 10.9 (Mavericks)

    Mac OS X 10.10 (Yosemite)

    Mac OS X 10.11 (El Capitan)

    macOS 10.12 (Sierra)

    Tested

    macOS 10.13 (High Sierra)

    Tested

    macOS 10.14 (Mojave)

    Tested

    Ubuntu 16.04.3 LTS

    Tested

    Other Linux variants

    BSD variants

    Windows 7

    Windows 10

    Cygwin (32-bit)

    +
    +

    A note about 64-bit support

    +
    +

    Apple is in the process of deprecating 32-bit support in macOS. +With macOS 10.14 Mojave, compiling 32-bit applications, such as the SML/NJ +runtime system, requires using an older SDK. +The SML/NJ installer, +however, works without issue on macOS 10.14 Mojave.

    +
    +
    +

    We have been working on 64-bit support since 110.82 we expect to have it in place +before the release of macOS 10.15.

    +
    +
    +
    +
    +
    + + + + + \ No newline at end of file diff --git a/doc/html/readme/110.92-README.html b/doc/html/readme/110.92-README.html new file mode 100644 index 0000000..48c3b69 --- /dev/null +++ b/doc/html/readme/110.92-README.html @@ -0,0 +1,535 @@ + + + + + + + +Standard ML of New Jersey Release Notes + + + + + +
    +
    +
    +
    + Standard ML of New Jersey
    Release Notes
    +

    + Version 110.92
    August 10, 2019
    +
    +
    +
    +
    +
    +
    +
    SML/NJ HOME
    +
    +

    https://smlnj.org

    +
    +
    FILES
    +
    +

    https://smlnj.org/dist/working/110.92/

    +
    +
    +
    +
    +
    +
    +
    +
    +
    +

    Summary

    +
    +
    +

    This release has a major improvement in the compiler that makes the +use of polymorphic arrays more efficient; namely, we have removed +the dictionary passing for polymorphic array operations from FLINT. +Besides that change, this release is primarily a checkpoint of work +being done to support 64-bit targets, including new infrastructure +for cross compilation.

    +
    +
    +

    We have had reports of a problem with using CM on Windows +(bug #223). We have not been able to reproduce this problem, but +we are leaving the bug open for now. If you encounter problems +with this release on Windows, please submit a bug report using +the bug form.

    +
    +
    +
    +
    +

    Details

    +
    +
    +

    Installation

    +
    +

    While the installer +for 110.92 works on macOS 10.14 Mojave, building from source +requires some extra steps. Another issue that you may encounter +when building on macOS 10.14 Mojave is an error message for a shell +script of the form

    +
    +
    +
    +
      /bin/sh: bad interpreter: Operation not permitted
    +
    +
    +
    +

    This error arises because the com.apple.quarantine attribute is set on the +shell script. To fix the problem, remove the attribute using the command

    +
    +
    +
    +
      xattr -d com.apple.quarantine shell-script
    +
    +
    +
    +

    and resume the build.

    +
    +
    +
    +

    Compiler

    +
    +

    We have removed the runtime-type passing from the FLINT IR. This mechanism was +solely used to allow the real array type to be implemented using a packed +representation (i.e., like RealArray.array), but it incurred an overhead +on all polymorphic array code. By removing this mechanism, we have spead-up +the compiler by almost 10% and we see significant improvements in most uses +of polymorphic arrays. The downside is that code that uses the real array +type heavily will be somewhat slower; in that case, we recommend switching +to the monomorphic RealArray.array type.

    +
    +
    +

    We have a new version of the cross-compilation script (system/cmb-cross) +thatsupports target-specific dependencies in the front-end (i.e., +representation of numeric types and endianess). We use this new script +in the allcross script too.

    +
    +
    +
    +

    MLRISC

    +
    +
    +
    +
      +
    • +

      Some cleanup in the x86 backend. Removed the MULB, MULW, and MULL +unsigned-multiplication instructions, since they are not binary operations. +The MULL instruction is covered by the MULL1 constructor in the multDivOp +datatype. The same change was applied to the amd64 backend.

      +
    • +
    • +

      Many changes to the amd64 machine description:

      +
      +
        +
      • +

        Removed the PUSHB, PUSHW, and PUSHL instructions, since the matching +POP operations are not supported.

        +
      • +
      • +

        Removed the CALLQ operation, since it is the same as CALL.

        +
      • +
      • +

        Removed the CLTD and CQTO operations, since those names are just +synonyms for CDQ and CDO.

        +
      • +
      • +

        Replaced the INTO operation (which is not valid in 64-bit mode) with +INT of byte.

        +
      • +
      +
      +
    • +
    • +

      Restructured the amd64 machine-code generation implementation and +filled in many of the missing encodings. It should be complete for +SML/NJ code generation, but needs more work to support the full set +of operations described in the amd64.mdl file.

      +
    • +
    +
    +
    +
    +
    +
    +

    Runtime System

    +
    +

    Made a bunch of changes to fix compilation issues on 64-bit targets.

    +
    +
    +
    +
    +
    +

    Bugs

    +
    +
    +

    Here is a list of tracked bugs fixed (or closed) with this release, please see the +bug tracker +for more details.

    +
    + ++++ + + + + + + +

    224

    Word64.fromLargeInt fails

    +
    +
    +
    +

    Supported systems

    +
    +
    +

    We believe that SML/NJ will build and run on the following systems, but have only +tested some of them:

    +
    + +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ArchitectureOperating SystemStatus

    Power PC

    Mac OS X 10.5 (Leopard)

    AIX

     

    Sparc

    Solaris

    Linux

     

    x86 (32-bit)

    Mac OS X 10.6 (Snow Leopard)

    Mac OS X 10.7 (Lion)

    Mac OS X 10.8 (Mountain Lion)

    Mac OS X 10.9 (Mavericks)

    Mac OS X 10.10 (Yosemite)

    Mac OS X 10.11 (El Capitan)

    macOS 10.12 (Sierra)

    Tested

    macOS 10.13 (High Sierra)

    Tested

    macOS 10.14 (Mojave)

    Tested

    Ubuntu 16.04.3 LTS

    Tested

    Other Linux variants

    BSD variants

    Windows 7

    Windows 10

    Cygwin (32-bit)

    +
    +

    A note about 64-bit support

    +
    +

    Apple is in the process of deprecating 32-bit support in macOS. +With macOS 10.14 Mojave, compiling 32-bit applications, such as the SML/NJ +runtime system, requires using an older SDK. +The SML/NJ installer, +however, works without issue on macOS 10.14 Mojave.

    +
    +
    +

    We have been working on 64-bit support since 110.82 we expect to have it in place +before the release of macOS 10.15.

    +
    +
    +
    +
    +
    + + + + + \ No newline at end of file diff --git a/doc/html/readme/110.93-README.html b/doc/html/readme/110.93-README.html new file mode 100644 index 0000000..b936fc8 --- /dev/null +++ b/doc/html/readme/110.93-README.html @@ -0,0 +1,521 @@ + + + + + + + +Standard ML of New Jersey Release Notes + + + + + +
    +
    +
    +
    + Standard ML of New Jersey
    Release Notes
    +

    + Version 110.93
    September 5, 2019
    +
    +
    +
    +
    +
    +
    +
    SML/NJ HOME
    +
    +

    https://smlnj.org

    +
    +
    FILES
    +
    +

    https://smlnj.org/dist/working/110.93/

    +
    +
    +
    +
    +
    +
    +
    +
    +
    +

    Summary

    +
    +
    +

    This release fixes a critical bug in the Windows implementation, +as well as several other serious bugs. In addition, it contains +a number of changes that are part of the 64-bit porting effort, +but these should not affect 32-bit behavior.

    +
    +
    +
    +
    +

    Details

    +
    +
    +

    Installation

    +
    +

    While the installer +for 110.93 works on macOS 10.14 Mojave, building from source +requires some extra steps. Another issue that you may encounter +when building on macOS 10.14 Mojave is an error message for a shell +script of the form

    +
    +
    +
    +
      /bin/sh: bad interpreter: Operation not permitted
    +
    +
    +
    +

    This error arises because the com.apple.quarantine attribute is set on the +shell script. To fix the problem, remove the attribute using the command

    +
    +
    +
    +
      xattr -d com.apple.quarantine shell-script
    +
    +
    +
    +

    and resume the build.

    +
    +
    +
    +

    Compiler

    +
    +
    +
    +
      +
    • +

      Restructured the CPS contraction phase to make the fusion of +integer/word conversions more uniform.

      +
    • +
    • +

      Rewrote the expansion of the INLLSHIFT, INLRSHIFTL, and INLRSHIFT +primops to correcly handle shift operations on types that are smaller +than the default integer size. This change also allows the Word8 +shift operations to be inlined.

      +
    • +
    • +

      Generalized code generation for conversions involving tagged integers/words, +where the size is not the default integer size.

      +
    • +
    +
    +
    +
    +
    +
    +

    Runtime System

    +
    +

    Made a bunch of changes to fix issues on 64-bit targets.

    +
    +
    +
    +
    +
    +

    Bugs

    +
    +
    +

    Here is a list of tracked bugs fixed (or closed) with this release, please see the +bug tracker +for more details.

    +
    + ++++ + + + + + + + + + + + + + + +

    173

    OS.Process.sleep only works with whole numbers

    208

    Real.toManExp produces incorrect results in some cases

    223

    Incremental Build fails on Windows

    +
    +

    The following unnumbered bugs were also fixed:

    +
    +
    +
    +
    +
      +
    • +

      Fixed a bug where Int32.fromLarge(Word32.toLargeInt 0wxffffffff) would +return ~1 instead of raising Overflow.

      +
    • +
    • +

      Int64 comparisons were incorrect for the case where bit 31 of the low +word was set.

      +
    • +
    • +

      Real.toManExp computed an exponent that was off by one. This bug also +broke the Real.toLargeInt function.

      +
    • +
    • +

      Fixed a bug in the constant folding of arithmetic-right-shift operations.

      +
    • +
    +
    +
    +
    +
    +
    +
    +

    Supported systems

    +
    +
    +

    We believe that SML/NJ will build and run on the following systems, but have only +tested some of them:

    +
    + +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ArchitectureOperating SystemStatus

    Power PC

    Mac OS X 10.5 (Leopard)

    AIX

     

    Sparc

    Solaris

    Linux

     

    x86 (32-bit)

    Mac OS X 10.6 (Snow Leopard)

    Mac OS X 10.7 (Lion)

    Mac OS X 10.8 (Mountain Lion)

    Mac OS X 10.9 (Mavericks)

    Mac OS X 10.10 (Yosemite)

    Mac OS X 10.11 (El Capitan)

    macOS 10.12 (Sierra)

    Tested

    macOS 10.13 (High Sierra)

    Tested

    macOS 10.14 (Mojave)

    Tested

    Ubuntu 16.04.3 LTS

    Tested

    Other Linux variants

    BSD variants

    Windows 7

    Windows 10

    Cygwin (32-bit)

    +
    +

    A note about 64-bit support

    +
    +

    Apple is in the process of deprecating 32-bit support in macOS. +With macOS 10.14 Mojave, compiling 32-bit applications, such as the SML/NJ +runtime system, requires using an older SDK. +The SML/NJ installer, +however, works without issue on macOS 10.14 Mojave.

    +
    +
    +

    We have been working on 64-bit support since 110.81 we expect to have it in place +before the release of macOS 10.15.

    +
    +
    +
    +
    +
    + + + + + \ No newline at end of file diff --git a/doc/html/readme/110.94-README.html b/doc/html/readme/110.94-README.html new file mode 100644 index 0000000..8f3223c --- /dev/null +++ b/doc/html/readme/110.94-README.html @@ -0,0 +1,608 @@ + + + + + + + +Standard ML of New Jersey Release Notes + + + + + +
    +
    +
    +
    + Standard ML of New Jersey
    Release Notes
    +

    + Version 110.94
    October 31, 2019
    +
    +
    +
    +
    +
    +
    +
    SML/NJ HOME
    +
    +

    https://smlnj.org

    +
    +
    FILES
    +
    +

    https://smlnj.org/dist/working/110.94/

    +
    +
    +
    +
    +
    +
    +
    +
    +
    +

    Summary

    +
    +
    +

    This release is the first to provide support for 64-bit executables on +Linux and macOS. Support for other operating systems (BSD variants and +64-bit windows) will be included in future releases.

    +
    +
    +

    This release also fixes a critical bug in the Windows implementation of +some of the Math functions.

    +
    +
    +
    +
    +

    Details

    +
    +
    +

    Installation

    +
    +

    64-bit support

    +
    +

    The various installation and configuration scripts have been updated +to accept a command-line argument to override the +default machine size when running on the x86/amd64 architecture. +For the config/install.sh script, you can specify the machine-word +size of the target architecture as follows:

    +
    +
    +
    +
    +
    +
    -default size
    +
    +

    specify the default size for the sml and other commands, where +size is either 32 or 64.

    +
    +
    -32
    +
    +

    install the 32-bit version of the system.

    +
    +
    -64
    +
    +

    install the 64-bit version of the system.

    +
    +
    +
    +
    +
    +
    +

    It is possible to install both versions in the same location by running +the install.sh script twice. For example, the commands

    +
    +
    +
    +
      % config/install.sh -32
    +  % config/install.sh -default 64
    +
    +
    +
    +

    will install both versions with the 64-bit version as default. One +would then use the command sml -32 to run the 32-bit version of +the system.

    +
    +
    +

    If both versions are installed, then use the -32 or -64 flag +to override the default version.

    +
    +
    +
    +

    32-bit macOS issues

    +
    +

    While the x86 installer +for 110.94 works on macOs 10.14 Mojave, building from source +requires some extra steps because the version of Xcode +distributed for Mojave does not include a 32-bit SDK.

    +
    +
    +

    Another issue that you may encounter +when building on macOs 10.14 Mojave is an error message for a shell +script of the form

    +
    +
    +
    +
      /bin/sh: bad interpreter: Operation not permitted
    +
    +
    +
    +

    This error arises because the com.apple.quarantine attribute is set on the +shell script. To fix the problem, remove the attribute using the command

    +
    +
    +
    +
      xattr -d com.apple.quarantine shell-script
    +
    +
    +
    +

    and resume the build.

    +
    +
    +
    +
    +

    Compiler

    +
    +

    Various changes were made to support 64-bit targets.

    +
    +
    +

    Improved the CPS optimizer to do constant folding on numeric conversions when +the argument is a constant.

    +
    +
    +

    The fix for bug 130 required a change in the semantics of the type checker. +The definition of SML specifies that when typechecking a binding like +the following:

    +
    +
    +
    +
      val (x, NONE) = ([], SOME 1)
    +
    +
    +
    +

    the type of x should be polymorphic since the right-hand-side of the +binding is a syntactic value. As of version 110.94, we now have a more +restrictive rule for when the left-hand-side variables are made polymorphic. +Specifically, the right-hand-side expression must be a syntactic value and +the left-hand-side pattern must be irrefutable (which is not the case +in this example).

    +
    +
    +
    +

    MLRISC

    +
    +

    Fixed a number of bugs in the instruction selection and encoding for the +amd64 architecture. Also made a slight improvement to the x86 +instruction selector.

    +
    +
    +
    +

    Runtime System

    +
    +

    The runtime system has a two-level data structure for the garbage-collector’s +BIBOP on 64-bit machines.

    +
    +
    +

    Made the handling of the SysInt.int and SysWord.word types consistent between +the runtime system and SML code.

    +
    +
    +
    +

    ML-LPT

    +
    +

    Switched the type of source-file positions from Position.int to Int.int. +This change is because the Position.int type was changed to Int64.int in +version 110.89, +which is excessive for source-file parsing and which introduces space +overhead (64-bit integers are boxed).

    +
    +
    +
    +
    +
    +

    Bugs

    +
    +
    +

    Here is a list of tracked bugs fixed (or closed) with this release, please see the +bug tracker +for more details.

    +
    + ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    130

    failure to raise Bind exception

    180

    Missing warning for nonexhaustive valbind patterns

    190

    Unexpected exception in SML/NJ with invalid list pattern match

    199

    Compiler bug in pretty printing of result

    225

    Math.ln giving erroneous answers on Windows

    226

    SML/NJ does not support 64-bit systems

    227

    CPS contraction is taking an excessive amount of time on word8 basis test

    +
    +
    +
    +

    Supported systems

    +
    +
    +

    We believe that SML/NJ will build and run on the following systems, but have only +tested some of them:

    +
    + +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ArchitectureOperating SystemStatus

    AMD64

    macOS 10.14 (Mojave)

    Tested

    macOS 10.15 (Catalina)

    Tested

    Ubuntu 16.04.3 LTS

    Tested

     

    Power PC

    Mac OS X 10.5 (Leopard)

    AIX

     

    Sparc

    Solaris

    Linux

     

    x86 (32-bit)

    Mac OS X 10.6 (Snow Leopard)

    Mac OS X 10.7 (Lion)

    Mac OS X 10.8 (Mountain Lion)

    Mac OS X 10.9 (Mavericks)

    Mac OS X 10.10 (Yosemite)

    Mac OS X 10.11 (El Capitan)

    macOS 10.12 (Sierra)

    Tested

    macOS 10.13 (High Sierra)

    Tested

    macOS 10.14 (Mojave)

    Tested

    Ubuntu 16.04.3 LTS

    Tested

    Other Linux variants

    BSD variants

    Windows 7

    Windows 10

    Cygwin (32-bit)

     

    +
    +
    +
    + + + + + \ No newline at end of file diff --git a/doc/html/readme/110.95-README.html b/doc/html/readme/110.95-README.html new file mode 100644 index 0000000..62aa57c --- /dev/null +++ b/doc/html/readme/110.95-README.html @@ -0,0 +1,562 @@ + + + + + + + +Standard ML of New Jersey Release Notes + + + + + +
    +
    +
    +
    + Standard ML of New Jersey
    Release Notes
    +

    + Version 110.95
    November 9, 2019
    +
    +
    +
    +
    +
    +
    +
    SML/NJ HOME
    +
    +

    https://smlnj.org

    +
    +
    FILES
    +
    +

    https://smlnj.org/dist/working/110.95/

    +
    +
    +
    +
    +
    +
    +
    +
    +
    +

    Summary

    +
    +
    +

    This release fixes a few critical problems with 110.94, as well as a +couple of other bugs. There are no new features or other major changes.

    +
    +
    +
    +
    +

    Details

    +
    +
    +

    Installation

    +
    +

    64-bit support

    +
    +

    The various installation and configuration scripts have been updated +to accept a command-line argument to override the +default machine size when running on the x86/amd64 architecture. +For the config/install.sh script, you can specify the machine-word +size of the target architecture as follows:

    +
    +
    +
    +
    +
    +
    -default size
    +
    +

    specify the default size for the sml and other commands, where +size is either 32 or 64.

    +
    +
    -32
    +
    +

    install the 32-bit version of the system.

    +
    +
    -64
    +
    +

    install the 64-bit version of the system.

    +
    +
    +
    +
    +
    +
    +

    It is possible to install both versions in the same location by running +the install.sh script twice. For example, the commands

    +
    +
    +
    +
      % config/install.sh -32
    +  % config/install.sh -default 64
    +
    +
    +
    +

    will install both versions with the 64-bit version as default. One +would then use the command sml -32 to run the 32-bit version of +the system.

    +
    +
    +

    If both versions are installed, then use the -32 or -64 flag +to override the default version.

    +
    +
    +
    +

    32-bit macOS issues

    +
    +

    While the x86 installer +for 110.95 works on macOs 10.14 Mojave, building from source +requires some extra steps because the version of Xcode +distributed for Mojave does not include a 32-bit SDK.

    +
    +
    +

    Another issue that you may encounter +when building on macOs 10.14 Mojave is an error message for a shell +script of the form

    +
    +
    +
    +
      /bin/sh: bad interpreter: Operation not permitted
    +
    +
    +
    +

    This error arises because the com.apple.quarantine attribute is set on the +shell script. To fix the problem, remove the attribute using the command

    +
    +
    +
    +
      xattr -d com.apple.quarantine shell-script
    +
    +
    +
    +

    and resume the build.

    +
    +
    +
    +
    +

    Runtime System

    +
    +

    The runtime now uses MAP_ANON for allocating memory on 64-bit Linux. +This change fixes a problem with versions of Linux that do not allow +access to /dev/zero.

    +
    +
    +
    +
    +
    +

    Bugs

    +
    +
    +

    Here is a list of tracked bugs fixed (or closed) with this release, please see the +bug tracker +for more details.

    +
    + ++++ + + + + + + + + + + + + + + +

    229

    Real.fromString errors

    230

    segmentation fault when compiling MLton sources with SML/NJ 64-bit

    231

    New literals-lifting code does not handle pair of reals

    +
    +

    The following unnumbered bugs were also fixed:

    +
    +
    +
    +
    +
      +
    • +

      Fixed the AST pretty printer so that it handles datatype replication +declarations.

      +
    • +
    • +

      Fixed problem with 64-bit runtime on some versions of Linux (e.g.. +Linux on ChromeBooks).

      +
    • +
    +
    +
    +
    +
    +
    +
    +

    Supported systems

    +
    +
    +

    We believe that SML/NJ will build and run on the following systems, but have only +tested some of them:

    +
    + +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ArchitectureOperating SystemStatus

    AMD64

    macOS 10.14 (Mojave)

    Tested

    macOS 10.15 (Catalina)

    Tested

    Ubuntu 16.04.3 LTS

    Tested

    Ubuntu 18.04.3 LTS

    Tested

     

    Power PC

    Mac OS X 10.5 (Leopard)

    AIX

     

    Sparc

    Solaris

    Linux

     

    x86 (32-bit)

    Mac OS X 10.6 (Snow Leopard)

    Mac OS X 10.7 (Lion)

    Mac OS X 10.8 (Mountain Lion)

    Mac OS X 10.9 (Mavericks)

    Mac OS X 10.10 (Yosemite)

    Mac OS X 10.11 (El Capitan)

    macOS 10.12 (Sierra)

    Tested

    macOS 10.13 (High Sierra)

    Tested

    macOS 10.14 (Mojave)

    Tested

    Ubuntu 16.04.3 LTS

    Tested

    Other Linux variants

    BSD variants

    Windows 7

    Windows 10

    Cygwin (32-bit)

     

    +
    +
    +
    + + + + + \ No newline at end of file diff --git a/doc/html/readme/110.96-README.html b/doc/html/readme/110.96-README.html new file mode 100644 index 0000000..d50f99e --- /dev/null +++ b/doc/html/readme/110.96-README.html @@ -0,0 +1,598 @@ + + + + + + + +Standard ML of New Jersey Release Notes + + + + + +
    +
    +
    +
    + Standard ML of New Jersey
    Release Notes
    +

    + Version 110.96
    December 13, 2019
    +
    +
    +
    +
    +
    +
    +
    SML/NJ HOME
    +
    +

    https://smlnj.org

    +
    +
    FILES
    +
    +

    https://smlnj.org/dist/working/110.96/

    +
    +
    +
    +
    +
    +
    +
    +
    +
    +

    Summary

    +
    +
    +

    This release fixes a couple of scripting bugs related to supporting both +32 and 64-bit versions of the system, as well as some other 64-bit-related +issues. It also adds 64-bit support for FreeBSD.

    +
    +
    +
    +
    +

    Details

    +
    +
    +

    Installation

    +
    +

    64-bit support

    +
    +

    The various installation and configuration scripts have been updated +to accept a command-line argument to override the +default machine size when running on the x86/amd64 architecture. +For the config/install.sh script, you can specify the machine-word +size of the target architecture as follows:

    +
    +
    +
    +
    +
    +
    -default size
    +
    +

    specify the default size for the sml and other commands, where +size is either 32 or 64.

    +
    +
    -32
    +
    +

    install the 32-bit version of the system.

    +
    +
    -64
    +
    +

    install the 64-bit version of the system.

    +
    +
    +
    +
    +
    +
    +

    It is possible to install both versions in the same location by running +the install.sh script twice. For example, the commands

    +
    +
    +
    +
      % config/install.sh -32
    +  % config/install.sh -default 64
    +
    +
    +
    +

    will install both versions with the 64-bit version as default. One +would then use the command sml -32 to run the 32-bit version of +the system.

    +
    +
    +

    If both versions are installed, then use the -32 or -64 flag +to override the default version.

    +
    +
    +
    +

    32-bit macOS issues

    +
    +

    While the x86 installer +for 110.96 works on macOs 10.14 Mojave, building from source +requires some extra steps because the version of Xcode +distributed for Mojave does not include a 32-bit SDK.

    +
    +
    +

    Another issue that you may encounter +when building on macOs 10.14 Mojave is an error message for a shell +script of the form

    +
    +
    +
    +
      /bin/sh: bad interpreter: Operation not permitted
    +
    +
    +
    +

    This error arises because the com.apple.quarantine attribute is set on the +shell script. To fix the problem, remove the attribute using the command

    +
    +
    +
    +
      xattr -d com.apple.quarantine shell-script
    +
    +
    +
    +

    and resume the build.

    +
    +
    +
    +
    +

    MLRISC

    +
    +

    Fixed a bug in the way that floating-point comparisons were being generated +for the AMD64 target. This was the root cause of bug 234, but could affect +other code.

    +
    +
    +
    +

    Runtime System

    +
    +

    We now use POSIX signal handling on FreeBSD (instead of the "Traditional BSD" +signal handling).

    +
    +
    +
    +

    ML-LPT

    +
    +

    The AntlrStreamPos structure now has a FilePos substructure that has the +INTEGER signature. This substructure defines the representation type for +positions and can be used to write code that will be portable against any +future changes in that type (e.g., the switch from Position.int to int +in 110.94).

    +
    +
    +
    +
    +
    +

    Bugs

    +
    +
    +

    Here is a list of tracked bugs fixed (or closed) with this release, please see the +bug tracker +for more details.

    +
    + ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + +

    168

    heap2exec broken on 64-bit machines with default toolchains

    233

    eXene build fails for 64bit install

    234

    Converting NaN to a string causes an infinite loop on 64-bit machines

    235

    Mac OS Catalina Compilation Failure

    236

    Unable to package smlnj for 64-bit x86, for Arch Linux

    237

    heap2exec script fails on 110.95

    +
    +

    The following unnumbered bug was also fixed:

    +
    +
    +
    +
    +
      +
    • +

      Fixed a problem where a control-C (SIGINT), or other signal, might +be ignored.

      +
    • +
    +
    +
    +
    +
    +
    +
    +

    Supported systems

    +
    +
    +

    We believe that SML/NJ will build and run on the following systems, but have only +tested some of them:

    +
    + +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ArchitectureOperating SystemStatus

    AMD64

    FreeBSD 12.0

    Tested

    macOS 10.14 (Mojave)

    Tested

    macOS 10.15 (Catalina)

    Tested

    Ubuntu 16.04.3 LTS

    Tested

    Ubuntu 18.04.3 LTS

    Tested

     

    Power PC

    Mac OS X 10.5 (Leopard)

    AIX

     

    Sparc

    Solaris

    Linux

     

    x86 (32-bit)

    Mac OS X 10.6 (Snow Leopard)

    Mac OS X 10.7 (Lion)

    Mac OS X 10.8 (Mountain Lion)

    Mac OS X 10.9 (Mavericks)

    Mac OS X 10.10 (Yosemite)

    Mac OS X 10.11 (El Capitan)

    macOS 10.12 (Sierra)

    Tested

    macOS 10.13 (High Sierra)

    Tested

    macOS 10.14 (Mojave)

    Tested

    Ubuntu 16.04.3 LTS

    Tested

    Other Linux variants

    FreeBSD 12.0

    Tested

    Other BSD variants

    Windows 7

    Windows 10

    Cygwin (32-bit)

     

    +
    +
    +
    + + + + + \ No newline at end of file diff --git a/doc/html/readme/110.97-README.html b/doc/html/readme/110.97-README.html new file mode 100644 index 0000000..afe00af --- /dev/null +++ b/doc/html/readme/110.97-README.html @@ -0,0 +1,677 @@ + + + + + + + +Standard ML of New Jersey Release Notes + + + + + +
    +
    +
    +
    + Standard ML of New Jersey
    Release Notes
    +

    + Version 110.97
    April 21, 2020
    +
    +
    +
    +
    +
    +
    +
    SML/NJ HOME
    +
    +

    https://smlnj.org

    +
    +
    FILES
    +
    +

    https://smlnj.org/dist/working/110.97/

    +
    +
    +
    +
    +
    +
    +
    +
    +
    +

    Summary

    +
    +
    +

    This release is fairly substantial, with a bunch of bug fixes, additional +documentation, and a large number of changes and additions to the +SML/NJ Library.

    +
    +
    +
    +
    +

    Details

    +
    +
    +

    Compiler

    +
    +

    As part of the fix for bug 220, there has been a change to the way +that "meta" (or "unification") type variables are printed in error +messages. Such meta variables will be represented by type variables +with upper-case letters from the end of the alphabet (e.g., 'X, +and ''Z). Overloading constraints are added as a suffix enclosed +in square brackets. For example, the variable ''Z[OL(*)] has +an equality constraint (denoted by the double quotes and an overload +constraint introduced by an occurrence of the overloaded multiplication +operator. Overload constraints for literals are denoted by WORD +and INT.

    +
    +
    +
    +

    SML/NJ Library

    +
    +

    We are in the process of documenting the SML/NJ Library +using asciidoctor. As a result of +this documentation effort, we have been tweaking the interfaces +and implementations of many of the SML/NJ modules, which has +resulted in a large number of fairly small changes. The biggest +change is the addition of the new UUID Library, but here is the +complete list of changes.

    +
    +
    +
      +
    • +

      Added the disjoint function to the ORD_SET signature.

      +
    • +
    • +

      Added more modes to the ANSITerm command set. Specifically, we added +the Default color specifier and the styles DIM, NORMAL, UL_OFF, +BLINK_OFF, REV_OFF, INVIS_OFF, and RESET.

      +
    • +
    • +

      Added the structure FNVHash to the Util library. This +structure implements the +Fowler-Noll-Vo +hashing algorithm.

      +
    • +
    • +

      Added a new library for generating and manipulating "Universally +Unique Identifiers" (UUID/uuid-lib.cm).

      +
    • +
    • +

      Made some changes to the stream processing modules in the JSON +library. The return type for the error call-back function in +the JSONStreamParser.callbacks type is changed to unit (note +that the function is not expected to return). The JSONStreamPrinter +implementation now raises the Fail exception when printing is +attempted on a closed printer.

      +
    • +
    • +

      Reimplemented the find functions in UnixPath to use the +PathUtil implementation, rather than reimplementing it. +Also changed the result types of findFile and findFileOfType +to return string option, instead of raising an exception.

      +
    • +
    • +

      Added a QUOTE constructor to the SExp.value datatype and +cleaned up the details of the syntax of identifiers. Also +added a compare function for the SExp.value datatype.

      +
    • +
    • +

      Added operations to the HashConsSet and HashConsMap structures +(and corresponding signatures) to bring them inline with the +ORD_SET and ORD_MAP interfaces. Also reimplemented these

      +
    • +
    • +

      Added insertWith and insertWithi functions to the ORD_MAP +signature.

      +
    • +
    • +

      Replaced the "directional" fold functions (e.g., foldl, foldri) +with non-directional functions (_e.g., fold, foldi) in the +HashConsSet and HashConsMap structures. The reason for this +change is that the order of objects is pretty arbitrary, so there +is not any usefulness to processing elements in increasing or +decreasing order. +For backward compatibility, the old names will continue to work, +but they are deprecated amd will be removed in some future release.

      +
    • +
    • +

      Made the ControlUtil.Cvt.bool converter case insensitive. Also, +it now allows "yes"/"no" as values.

      +
    • +
    • +

      Updated the BitArray:BIT_ARRAY interface to follow standard patterns +(this interfaces was originally designed before the SML Basis Library, +so it did not follow the conventions). +The changes are to have the fromString function return NONE, +instead of raising an exception on bad input, and to use deprecate +the lshift and rshift operations in favor of >> and << (which +use word for the shift amount).

      +
    • +
    • +

      Added the fromVector, toList, and toVector functions to +the DynamicArray:DYNAMIC_ARRAY interface.

      +
    • +
    • +

      Renamed the HASH_SET signature to MONO_HASH_SET, which is +the correct name given the naming conventions.

      +
    • +
    • +

      Added implode, map, app, all, and exists functions to +the UTF8 signature. Also implemented the handling of 4-byte +encodings, which were previously not supported.

      +
    • +
    • +

      Removed the version and banner components from the LibBase +structure, since the library has been tracking SML/NJ release +versions for a very long time.

      +
    • +
    +
    +
    +
    +

    Documentation

    +
    +

    Additional manual pages have been added for the commands that are part +of the standard installation.

    +
    +
    +
    +

    Installation

    +
    +

    We have updated our infrastructure for building MSI files for Windows +to use the latest version of the WiX Toolset. +This change should not have any impact on the user-side of the +installation process.

    +
    +
    +
    +

    32-bit macOS issues

    +
    +

    While the x86 installer +for 110.97 works on macOs 10.14 Mojave, building from source +requires some extra steps because the version of Xcode +distributed for Mojave does not include a 32-bit SDK.

    +
    +
    +

    Another issue that you may encounter +when building on macOs 10.14 Mojave is an error message for a shell +script of the form

    +
    +
    +
    +
      /bin/sh: bad interpreter: Operation not permitted
    +
    +
    +
    +

    This error arises because the com.apple.quarantine attribute is set on the +shell script. To fix the problem, remove the attribute using the command

    +
    +
    +
    +
      xattr -d com.apple.quarantine shell-script
    +
    +
    +
    +

    and resume the build.

    +
    +
    +
    +
    +
    +

    Bugs

    +
    +
    +

    Here is a list of tracked bugs fixed (or closed) with this release, please see the +bug tracker +for more details.

    +
    + ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    176

    Manpages

    195

    SuccessorML do expression in functor arguments.

    196

    dontPickle error when trying to use SuccessorML

    209

    Special identifiers can be rebound in datatype and exception declarations

    214

    exception raised by pretty printer in REPL

    220

    Incorrect error message for equality-type failure?

    239

    Date.toTime is incorrect (by a factor of 10E9)

    240

    Non-default 64-bit installation build failure

    244

    Compiler bug: PPObj: ppFields in ppval.sml (also bugs 248 and 255)

    245

    Lazy data types result in Compiler Bug error (also 249)

    247

    @SMLVersion should report 64/32 bit

    252

    Boyer Benchmark Compile Failure

    +
    +

    The following unnumbered bug was also fixed:

    +
    +
    +
    +
    +
      +
    • +

      Fixed the calculation of the maximum array/vector length for 64-bit targets.

      +
    • +
    +
    +
    +
    +
    +
    +
    +

    Supported systems

    +
    +
    +

    We believe that SML/NJ will build and run on the following systems, but have only +tested some of them:

    +
    + +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ArchitectureOperating SystemStatus

    AMD64

    FreeBSD 12.0

    Tested

    macOS 10.14 (Mojave)

    Tested

    macOS 10.15 (Catalina)

    Tested

    Ubuntu 16.04.3 LTS

    Tested

    Ubuntu 18.04.3 LTS

    Tested

     

    Power PC

    Mac OS X 10.5 (Leopard)

    AIX

     

    Sparc

    Solaris

    Linux

     

    x86 (32-bit)

    Mac OS X 10.6 (Snow Leopard)

    Mac OS X 10.7 (Lion)

    Mac OS X 10.8 (Mountain Lion)

    Mac OS X 10.9 (Mavericks)

    Mac OS X 10.10 (Yosemite)

    Mac OS X 10.11 (El Capitan)

    macOS 10.12 (Sierra)

    Tested

    macOS 10.13 (High Sierra)

    Tested

    macOS 10.14 (Mojave)

    Tested

    Ubuntu 16.04.3 LTS

    Tested

    Other Linux variants

    FreeBSD 12.0

    Tested

    Other BSD variants

    Windows 7

    Windows 10

    Cygwin (32-bit)

     

    +
    +
    +
    + + + + + \ No newline at end of file diff --git a/doc/html/readme/110.98-README.html b/doc/html/readme/110.98-README.html new file mode 100644 index 0000000..4c148b4 --- /dev/null +++ b/doc/html/readme/110.98-README.html @@ -0,0 +1,649 @@ + + + + + + + +Standard ML of New Jersey Release Notes + + + + + +
    +
    +
    +
    + Standard ML of New Jersey
    Release Notes
    +

    + Version 110.98
    July 16, 2020
    +
    +
    +
    +
    +
    +
    +
    SML/NJ HOME
    +
    +

    https://smlnj.org

    +
    +
    FILES
    +
    +

    https://smlnj.org/dist/working/110.98/

    +
    +
    +
    +
    +
    +
    +
    +
    +
    +

    Summary

    +
    +
    +

    This release includes substantial improvements to the documentation +(the SML/NJ Library is now mostly documented) as well as many small +improvements to the SML/NJ Library, various +improvements in other parts of the system, and bug fixes.

    +
    +
    +
    +
    +

    Future Plans

    +
    +
    +

    We are reworking the back-end of the compiler in preparation for +migrating from our current MLRISC code generator to one based +on LLVM. With that migration, we expect to drop support for +all processors other than the x86-64 (aka amd64), but we plan +to add support for the arm64 (aka AArch64).

    +
    +
    +

    If you are a user of SML/NJ on the Sparc, PowerPC, or 32-bit +x86 and would like to see support for that system continued, +please contact us.

    +
    +
    +
    +
    +

    Details

    +
    +
    +

    Compiler

    +
    +

    Made the LambdaVar.lvar type abstract. This is an internal change +that should not affect compiler behavior.

    +
    +
    +

    We have started a project to migrate the backend of SML/NJ to use +the LLVM infrastructure for code generation. +The prepare the ground for this migration, we have made a number +of changes to the compiler internals:

    +
    +
    +
    +
    +
      +
    • +

      Support for mapping the trigonometry functions sin, cos, and tan +to hardware instructions on the x86 architecture has been removed. +This change was made to simplify the code generator as we work on +migrating to a new LLVM backend.

      +
    • +
    • +

      The SML operators div and mod have a floor rounding semantics +instead of the truncation semantics supported by hardware. Previously, +we relied on MLRISC to handle the implementation of these operators, +but we now do that in a new lowering pass. We also added optimizations +for when the second argument to div or mod is a power of two to +the CPS contraction phase.

      +
    • +
    • +

      The lowering pass also lowers trapping conversions (CPS primops +TEST and TESTU) such that they only involve conversions that +can be checked using trapping add operations.

      +
    • +
    +
    +
    +
    +
    +
    +

    MLRISC

    +
    +

    The MLRISC instruction selector for the x86 and amd64 targets +erroneously assumed that the idiv instruction sets the OF (overflow) +condition code when dividing the largest negative number by ~1. +In fact, such a division operation traps, which is okay, because the +runtime system maps the trap to the Overflow exception. Since the +check for overflow is unnecessary, it has been removed from the files +MLRISC/amd64/mltree/amd64-gen.sml and MLRISC/x86/mltree/x86.sml.

    +
    +
    +
    +

    Basis Library

    +
    +

    This version implements the following Basis Library proposal:

    +
    +
    +
    +
    +
    +
    [2020-001]
    +
    +

    +Basis Library proposal 2020-001 (Addition of Universal module) — This proposal adds the Universal structure found in Poly/ML to the +Basis Library.

    +
    +
    +
    +
    +
    +
    +
    +

    SML/NJ Library

    +
    +

    The SML/NJ Library is now mostly documented; see doc/html/smlnj-lib/index.html +in the distribution or the online +documentation.

    +
    +
    +

    The HTMLDev structure in the pretty-printing library ($/pp-lib.cm) has +been renamed as HTML3Dev and moved into its own library (pp-extras-lib.cm). +The renaming is in anticipation of renaming the HTML Library to "HTML3" +and the moving it to its own library removes a dependency from the compiler +on $/html-lib.cm.

    +
    +
    +

    There were many small improvements (and a couple of bug fixes) to various +parts of the SML/NJ Library; see the smlnj-lib/CHANGES file for details.

    +
    +
    +
    +

    ML-LPT

    +
    +

    Changed the semantics of the --debug command-line option for ml-antlr. +Previously this option replaced the actions with a print expression, but that +limited its usefulness because of type errors in the generated code. The new +behavior is to preserve the existing actions and just add the printing code.

    +
    +
    +
    +

    Documentation

    +
    +

    This release contains a substantial amount of new documentation for the +SML/NJ Library (see doc/html/smlnj-lib). While the documentation is +not complete, it does cover the most commonly used components (with the +exception of the pretty-printing library).

    +
    +
    +
    +

    Installation

    +
    +

    The default installation for machines that report “`x86_64`” as their +hardware is now 64 bits. See the installation instructions for more +details.

    +
    +
    +
    +

    32-bit macOS issues

    +
    +

    While the x86 installer +for 110.98 works on macOs 10.14 Mojave, building from source +requires some extra steps because the version of Xcode +distributed for Mojave does not include a 32-bit SDK.

    +
    +
    +

    Another issue that you may encounter +when building on macOs 10.14 Mojave is an error message for a shell +script of the form

    +
    +
    +
    +
      /bin/sh: bad interpreter: Operation not permitted
    +
    +
    +
    +

    This error arises because the com.apple.quarantine attribute is set on the +shell script. To fix the problem, remove the attribute using the command

    +
    +
    +
    +
      xattr -d com.apple.quarantine shell-script
    +
    +
    +
    +

    and resume the build.

    +
    +
    +
    +
    +
    +

    Bugs

    +
    +
    +

    Here is a list of tracked bugs fixed (or closed) with this release, please see the +bug tracker +for more details.

    +
    + ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    238

    Cross compilation from amd64 to x86 does not work

    256

    Ref.exchange incorrect

    257

    HASH_TABLE listItems error in reference manual

    260

    Perform divide on Position.int crashes with FPE on Linux

    261

    Weird "calc_strictness" message being printed

    262

    JSON parser fails on empty object

    263

    JSON parser ignores suffixes

    +
    +
    +
    +

    Supported systems

    +
    +
    +

    We believe that SML/NJ will build and run on the following systems, but have only +tested some of them:

    +
    + +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ArchitectureOperating SystemStatus

    AMD64

    FreeBSD 12.0

    Tested

    macOS 10.14 (Mojave)

    Tested

    macOS 10.15 (Catalina)

    Tested

    macOS 11.0 (Big Sur)

    Tested

    Ubuntu 16.04.3 LTS

    Tested

    Ubuntu 18.04.3 LTS

    Tested

     

    Power PC

    Mac OS X 10.5 (Leopard)

    AIX

     

    Sparc

    Solaris

    Linux

     

    x86 (32-bit)

    Mac OS X 10.6 (Snow Leopard)

    Mac OS X 10.7 (Lion)

    Mac OS X 10.8 (Mountain Lion)

    Mac OS X 10.9 (Mavericks)

    Mac OS X 10.10 (Yosemite)

    Mac OS X 10.11 (El Capitan)

    macOS 10.12 (Sierra)

    Tested

    macOS 10.13 (High Sierra)

    Tested

    macOS 10.14 (Mojave)

    Tested

    Ubuntu 16.04.3 LTS

    Tested

    Other Linux variants

    FreeBSD 12.0

    Tested

    Other BSD variants

    Windows 7

    Windows 10

    Cygwin (32-bit)

     

    +
    +
    +
    + + + + + \ No newline at end of file diff --git a/doc/html/readme/110.98.1-README.html b/doc/html/readme/110.98.1-README.html new file mode 100644 index 0000000..ed066a9 --- /dev/null +++ b/doc/html/readme/110.98.1-README.html @@ -0,0 +1,390 @@ + + + + + + + +Standard ML of New Jersey Release Notes + + + + +
    +
    +
    +
    + Standard ML of New Jersey
    Release Notes
    +

    + Version 110.98.1
    August 25, 2020
    +
    +
    +
    +
    +
    +
    +
    SML/NJ HOME
    +
    +

    https://smlnj.org

    +
    +
    FILES
    +
    +

    https://smlnj.org/dist/working/110.98.1/

    +
    +
    +
    +
    +
    +
    +
    +
    +
    +

    Summary

    +
    +
    +

    This patch release fixes a number of regressions in the 110.98 pretty +printer. It also has some internal changes that are part of the effort +to migrate to a LLVM backend.

    +
    +
    +
    +
    +

    Bugs

    +
    +
    +

    Here is a list of tracked bugs fixed (or closed) with this release, please see the +bug tracker +for more details.

    +
    + ++++ + + + + + + + + + + + + + + + + + + +

    266

    Pretty printing regression in SML/NJ 110.98

    268

    Polymorphic Type Pretty Printing Regression

    269

    Word64.fromString causes an Overflow for greater than 232-1

    271

    pretty printer regression for structure binding

    +
    +
    +
    +

    Supported systems

    +
    +
    +

    We believe that SML/NJ will build and run on the following systems, but have only +tested some of them:

    +
    + +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ArchitectureOperating SystemStatus

    AMD64

    FreeBSD 12.0

    Tested

    macOS 10.14 (Mojave)

    Tested

    macOS 10.15 (Catalina)

    Tested

    Ubuntu 16.04.3 LTS

    Tested

    Ubuntu 18.04.3 LTS

    Tested

     

    Power PC

    Mac OS X 10.5 (Leopard)

    AIX

     

    Sparc

    Solaris

    Linux

     

    x86 (32-bit)

    Mac OS X 10.6 (Snow Leopard)

    Mac OS X 10.7 (Lion)

    Mac OS X 10.8 (Mountain Lion)

    Mac OS X 10.9 (Mavericks)

    Mac OS X 10.10 (Yosemite)

    Mac OS X 10.11 (El Capitan)

    macOS 10.12 (Sierra)

    Tested

    macOS 10.13 (High Sierra)

    Tested

    macOS 10.14 (Mojave)

    Tested

    Ubuntu 16.04.3 LTS

    Tested

    Other Linux variants

    FreeBSD 12.0

    Tested

    Other BSD variants

    Windows 7

    Windows 10

    Cygwin (32-bit)

     

    +
    +
    +
    + + + + + \ No newline at end of file diff --git a/doc/html/readme/110.99-README.html b/doc/html/readme/110.99-README.html new file mode 100644 index 0000000..05faa5a --- /dev/null +++ b/doc/html/readme/110.99-README.html @@ -0,0 +1,579 @@ + + + + + + + +Standard ML of New Jersey Release Notes + + + + + +
    +
    +
    +
    + Standard ML of New Jersey
    Release Notes
    +

    + Version 110.99
    December 24, 2020
    +
    +
    +
    +
    +
    +
    +
    SML/NJ HOME
    +
    +

    https://smlnj.org

    +
    +
    FILES
    +
    +

    https://smlnj.org/dist/working/110.99/

    +
    +
    +
    +
    +
    +
    +
    +
    +
    +

    Summary

    +
    +
    +

    This release is primarily a bug fix release with some additional runtime-system +changes to help prepare for the planned switch to using an LLVM backend for +the AMD64 target.

    +
    +
    +

    Version 110.98.1 +was a patch release that addressed some of the pretty printing regressions +introduced in 110.98. The release notes for those fixes are included +here.

    +
    +
    +

    Also note that this release is the last 110 series release. We are switching +our release numbering scheme to the format <year>.<relnum>, so the next +release will be 2021.1.

    +
    +
    +
    +
    +

    Details

    +
    +
    +

    Compiler

    +
    +

    The binary-file format used to store compiled code has been simplified +by eliminating support for multiple code objects in a file (a feature that +has not been used in many years). We have also changed the format of +the "magic string" that is used to identify binfiles.

    +
    +
    +
    +

    Runtime System

    +
    +

    The boot loader was updated to work with the new binary-file format for +code.

    +
    +
    +

    The layout of the stack frame used by SML code was changed to be +compatible with the register spilling conventions used by LLVM. +Specifically, the spill area is now at the top of the frame (i.e., +at offset zero from the stack pointer and the various bits of +stack-allocated state are at the bottom of the frame.

    +
    +
    +
    +

    SML/NJ Library

    +
    +

    Made a number of improvements to the pretty-printing library. +The PP_DEVICE signature was extended with a number of additional +properties (max indentation, max depth, ellipses, …​), as well +as functions for setting properties on a device. The pretty-printer +engine was updated to to use the max depth and max indentation +properties when rendering.

    +
    +
    +

    A new device module (CharBufferDev) was added. This is a device +for pretty printing to a character buffer and is the device structure +that underlies the CharBufferPP structure.

    +
    +
    +
    +

    ML-LPT

    +
    +

    Changed the semantics of the spans returned by ml-ulex so that the +second component of a span is the position of the rightmost character +in the token (instead of the character following the token). +Specifically, the span \((p_1, p_2)\) specifies the +\(p_2 - p_1 + 1\) characters that start with the character at +position \(p_1\) and run to \(p_2\) (inclusive).

    +
    +
    +
    +

    32-bit macOS issues

    +
    +

    While the x86 installer +for 110.99 works on macOs 10.14 Mojave, building from source +requires some extra steps because the version of Xcode +distributed for Mojave does not include a 32-bit SDK.

    +
    +
    +

    Another issue that you may encounter +when building on macOs 10.14 Mojave is an error message for a shell +script of the form

    +
    +
    +
    +
      /bin/sh: bad interpreter: Operation not permitted
    +
    +
    +
    +

    This error arises because the com.apple.quarantine attribute is set on the +shell script. To fix the problem, remove the attribute using the command

    +
    +
    +
    +
      xattr -d com.apple.quarantine shell-script
    +
    +
    +
    +

    and resume the build.

    +
    +
    +
    +
    +
    +

    Bugs

    +
    +
    +

    Here is a list of tracked bugs fixed (or closed) since 110.98 (including +those patched in 110.98.1). Please see the +bug tracker +for more details.

    +
    + ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    254

    Real.fromLargeInt produces negative results

    266

    Pretty printing regression in SML/NJ 110.98

    267

    Returns an incorrect result for a calculation on Position.int for 32-bit mode.

    268

    Polymorphic Type Pretty Printing Regression

    269

    Word64.fromString causes an Overflow for greater than 232-1

    271

    Pretty printer regression for structure binding

    274

    Minor pretty printing glitch when printing structure specs

    276

    Missing option to control extra newlines in REPL

    277

    Excess white space when pretty printing a module signature

    +
    +
    +
    +

    Supported systems

    +
    +
    +

    We believe that SML/NJ will build and run on the following systems, but have only +tested some of them:

    +
    + +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ArchitectureOperating SystemStatus

    AMD64

    FreeBSD 12.0

    Tested

    macOS 10.14 (Mojave)

    Tested

    macOS 10.15 (Catalina)

    Tested

    Ubuntu 16.04.3 LTS

    Tested

    Ubuntu 18.04.3 LTS

    Tested

     

    Power PC

    Mac OS X 10.5 (Leopard)

    AIX

     

    Sparc

    Solaris

    Linux

     

    x86 (32-bit)

    Mac OS X 10.6 (Snow Leopard)

    Mac OS X 10.7 (Lion)

    Mac OS X 10.8 (Mountain Lion)

    Mac OS X 10.9 (Mavericks)

    Mac OS X 10.10 (Yosemite)

    Mac OS X 10.11 (El Capitan)

    macOS 10.12 (Sierra)

    Tested

    macOS 10.13 (High Sierra)

    Tested

    macOS 10.14 (Mojave)

    Tested

    Ubuntu 16.04.3 LTS

    Tested

    Other Linux variants

    FreeBSD 12.0

    Tested

    Other BSD variants

    Windows 7

    Windows 10

    Cygwin (32-bit)

     

    +
    +
    +
    + + + + + \ No newline at end of file diff --git a/doc/html/readme/110.99.1-README.html b/doc/html/readme/110.99.1-README.html new file mode 100644 index 0000000..1624350 --- /dev/null +++ b/doc/html/readme/110.99.1-README.html @@ -0,0 +1,541 @@ + + + + + + + +Standard ML of New Jersey Release Notes + + + + + +
    +
    +
    +
    + Standard ML of New Jersey
    Release Notes
    +

    + Version 110.99.1
    April 12, 2021
    +
    +
    +
    +
    +
    +
    +
    SML/NJ HOME
    +
    +

    https://smlnj.org

    +
    +
    FILES
    +
    +

    https://smlnj.org/dist/working/110.99.1/

    +
    +
    +
    +
    +
    +
    +
    +
    +
    +

    Summary

    +
    +
    +

    This is a patch release that includes a few bug fixes and which enables +running SML/NJ on M1 (aka Arm) Macs under +Rosetta2. +Because the system is running under emulation, it may not be as reliable +as when running as a native application on a AMD64 processor. We are +working on a native Arm64 port that we hope to release this summer.

    +
    +
    +
    +
    +

    Details

    +
    +
    +

    Runtime System

    +
    +

    Modified the stack frame layout on AMD64 to better support the LLVM +backend (currently under development).

    +
    +
    +
    +

    SML/NJ Library

    +
    +

    The ListMergeSort.sort function is now stable (as claimed by the documantation).

    +
    +
    +

    Added some additional mechanism to the JSONUtil structure to make +writing robust queries easier.

    +
    +
    +
    +

    Installation

    +
    +

    It is possible to install the amd64 version of the system on M1 Macs. +Because these machines run Big Sur, you will need to control-click on the +installer package and open it with the installer application.

    +
    +
    +
    +

    32-bit macOS issues

    +
    +

    While the x86 installer +for 110.99.1 works on macOs 10.14 Mojave, building from source +requires some extra steps because the version of Xcode +distributed for Mojave does not include a 32-bit SDK.

    +
    +
    +

    Another issue that you may encounter +when building on macOs 10.14 Mojave is an error message for a shell +script of the form

    +
    +
    +
    +
      /bin/sh: bad interpreter: Operation not permitted
    +
    +
    +
    +

    This error arises because the com.apple.quarantine attribute is set on the +shell script. To fix the problem, remove the attribute using the command

    +
    +
    +
    +
      xattr -d com.apple.quarantine shell-script
    +
    +
    +
    +

    and resume the build.

    +
    +
    +
    +
    +
    +

    Bugs

    +
    +
    +

    Here is a list of tracked bugs fixed (or closed) with this release, please see the +bug tracker +for more details.

    +
    + ++++ + + + + + + + + + + +

    278

    ListMergeSort is documented as stable, but is not (esp. since 110.78 rewrite!)

    280

    110.99 config/install.sh -64 fails on macOS 10.15.7

    +
    +

    The following unnumbered bug was also fixed:

    +
    +
    +
    +
    +
      +
    • +

      Fixed a performance bug in the implementation of the CharBuffer and MonoBuffer structures.

      +
    • +
    +
    +
    +
    +
    +
    +
    +

    Supported systems

    +
    +
    +

    We believe that SML/NJ will build and run on the following systems, but have only +tested some of them:

    +
    + +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ArchitectureOperating SystemStatus

    AMD64

    FreeBSD 12.0

    Tested

    macOS 10.14 (Mojave)

    Tested

    macOS 10.15 (Catalina)

    Tested

    Ubuntu 16.04.3 LTS

    Tested

    Ubuntu 18.04.3 LTS

    Tested

     

    Arm64

    macOS 11 (Big Sur)

    Tested under Rosetta2

     

    Power PC

    Mac OS X 10.5 (Leopard)

    AIX

     

    Sparc

    Solaris

    Linux

     

    x86 (32-bit)

    Mac OS X 10.6 (Snow Leopard)

    Mac OS X 10.7 (Lion)

    Mac OS X 10.8 (Mountain Lion)

    Mac OS X 10.9 (Mavericks)

    Mac OS X 10.10 (Yosemite)

    Mac OS X 10.11 (El Capitan)

    macOS 10.12 (Sierra)

    Tested

    macOS 10.13 (High Sierra)

    Tested

    macOS 10.14 (Mojave)

    Tested

    Ubuntu 16.04.3 LTS

    Tested

    Other Linux variants

    FreeBSD 12.0

    Tested

    Other BSD variants

    Windows 7

    Windows 10

    Cygwin (32-bit)

     

    +
    +
    +
    + + + + + \ No newline at end of file diff --git a/doc/html/readme/110.99.2-README.html b/doc/html/readme/110.99.2-README.html new file mode 100644 index 0000000..8082649 --- /dev/null +++ b/doc/html/readme/110.99.2-README.html @@ -0,0 +1,547 @@ + + + + + + + +Standard ML of New Jersey Release Notes + + + + + +
    +
    +
    +
    + Standard ML of New Jersey
    Release Notes
    +

    + Version 110.99.2
    September 23, 2021
    +
    +
    +
    +
    +
    +
    +
    SML/NJ HOME
    +
    +

    https://smlnj.org

    +
    +
    FILES
    +
    +

    https://smlnj.org/dist/working/110.99.2/

    +
    +
    +
    +
    +
    +
    +
    +
    +
    +

    Summary

    +
    +
    +

    This is a patch release that includes a few bug fixes and +adds support for macOS 12 (Monterey).

    +
    +
    +
    +
    +

    Details

    +
    +
    +

    SML/NJ Library

    +
    +

    Several improvements to the JSON library:

    +
    +
    +
    +
    +
      +
    • +

      Added convenience function int to the JSON_STREAM_OUTPUT interface.

      +
    • +
    • +

      Refactored the output modules in the JSON library. These changes should +not affect current clients of the library, but allow the addition of +printing JSON to a CharBuffer.buf.

      +
    • +
    • +

      Bug fix to JSONUtil module; the FIND path arc was not getting +handled for the update functions (i.e., replace, insert, and append).

      +
    • +
    +
    +
    +
    +
    +
    +

    32-bit macOS issues

    +
    +

    While the x86 installer +for 110.99.2 works on macOs 10.14 Mojave, building from source +requires some extra steps because the version of Xcode +distributed for Mojave does not include a 32-bit SDK.

    +
    +
    +

    Another issue that you may encounter +when building on macOs 10.14 Mojave is an error message for a shell +script of the form

    +
    +
    +
    +
      /bin/sh: bad interpreter: Operation not permitted
    +
    +
    +
    +

    This error arises because the com.apple.quarantine attribute is set on the +shell script. To fix the problem, remove the attribute using the command

    +
    +
    +
    +
      xattr -d com.apple.quarantine shell-script
    +
    +
    +
    +

    and resume the build.

    +
    +
    +
    +
    +
    +

    Bugs

    +
    +
    +

    Here is a list of tracked bugs fixed (or closed) with this release, please see the +bug tracker +for more details.

    +
    + ++++ + + + + + + + + + + + + + + + + + + +

    279

    Real.toLargeInt returns zero for anything in range [-512,512]

    286

    Installation of SML/NJ v110.99.1 fails in fresh Ubuntu 20.04/macOS 10.15

    287

    Install errors with request asdl

    293

    SMLNJ Incompatibility with macOS 12 Beta

    +
    +

    The following unnumbered bug was also fixed:

    +
    +
    +
    +
    +
      +
    • +

      Floating-point spill records were twice as large as necessary on 64-bit +systems.

      +
    • +
    +
    +
    +
    +
    +
    +
    +

    Supported systems

    +
    +
    +

    We believe that SML/NJ will build and run on the following systems, but have only +tested some of them:

    +
    + +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ArchitectureOperating SystemStatus

    AMD64

    FreeBSD 12.0

    Tested

    macOS 10.14 (Mojave)

    Tested

    macOS 10.15 (Catalina)

    Tested

    macOS 11 (Big Sur)

    Tested

    macOS 12 (Monterey)

    Tested

    Ubuntu 16.04.3 LTS

    Tested

    Ubuntu 18.04.3 LTS

    Tested

     

    Power PC

    Mac OS X 10.5 (Leopard)

    AIX

     

    Sparc

    Solaris

    Linux

     

    x86 (32-bit)

    Mac OS X 10.6 (Snow Leopard)

    Mac OS X 10.7 (Lion)

    Mac OS X 10.8 (Mountain Lion)

    Mac OS X 10.9 (Mavericks)

    Mac OS X 10.10 (Yosemite)

    Mac OS X 10.11 (El Capitan)

    macOS 10.12 (Sierra)

    Tested

    macOS 10.13 (High Sierra)

    Tested

    macOS 10.14 (Mojave)

    Tested

    Ubuntu 16.04.3 LTS

    Tested

    Other Linux variants

    FreeBSD 12.0

    Tested

    Other BSD variants

    Windows 7

    Windows 10

    Cygwin (32-bit)

     

    +
    +
    +
    + + + + + \ No newline at end of file diff --git a/doc/html/readme/110.99.3-README.html b/doc/html/readme/110.99.3-README.html new file mode 100644 index 0000000..8a152ea --- /dev/null +++ b/doc/html/readme/110.99.3-README.html @@ -0,0 +1,716 @@ + + + + + + + +Standard ML of New Jersey Release Notes + + + + + +
    +
    +
    +
    + Standard ML of New Jersey
    Release Notes
    +

    + Version 110.99.3
    July 27, 2022
    +
    +
    +
    +
    +
    +
    +
    SML/NJ HOME
    +
    +

    https://smlnj.org

    +
    +
    FILES
    +
    +

    https://smlnj.org/dist/working/110.99.3/

    +
    +
    +
    +
    +
    +
    +
    +
    +
    +

    Summary

    +
    +
    +

    This patch release fixes a large number of bugs in the "legacy" version +of SML/NJ and adds some new functions to the SML/NJ Library.

    +
    +
    +
    +
    +

    GitHub Migration

    +
    +
    +

    With this release, we have migrated the legacy branch from our own svn server +to a GitHub repository. This migration +is the first step in migrating everything to GitHub.

    +
    +
    +

    In addition, we have retired the gforge bug-tracking infrastructure in favor of using +GitHub issues. We have prepopulated the issues for the legacy repository with +the entries from the gforge bug database.

    +
    +
    +

    Since issues are associated with a repository, we now have separate trackers for the +legacy and main development versions. While most bugs that are present in one version +will also be present in the other version, it is not necessary to create two issues +when reporting a bug.

    +
    +
    +
    +
    +

    Details

    +
    +
    +

    Basis Library

    +
    +

    Added support for the following proposed Basis +Library extensions:

    +
    + +
    +
    +

    SML/NJ Library

    +
    +
    +
    +
      +
    • +

      Added the findAndRemove function to ORD_MAP interface.

      +
    • +
    • +

      Generalized comparisons on ordered maps; added equiv and +extends functions to ORD_MAP interface and made the type of +collate more polymorphic.

      +
    • +
    • +

      The SExp library now uses Scheme syntax for string values.

      +
    • +
    • +

      The SExp parser accepts empty files as valid input (producing +an empty list of S-Expressions).

      +
    • +
    +
    +
    +
    +
    +
    +

    Windows

    +
    +

    The MSI installer now correctly sets the version of SML/NJ (see +issue 59 for details). +We also add a shortcut for uninstalling SML/NJ to the Program +Menu.

    +
    +
    +
    +

    32-bit macOS issues

    +
    +

    While the x86 installer +for 110.99.3 works on macOs 10.14 Mojave, building from source +requires some extra steps because the version of Xcode +distributed for Mojave does not include a 32-bit SDK.

    +
    +
    +

    Another issue that you may encounter +when building on macOs 10.14 Mojave is an error message for a shell +script of the form

    +
    +
    +
    +
      /bin/sh: bad interpreter: Operation not permitted
    +
    +
    +
    +

    This error arises because the com.apple.quarantine attribute is set on the +shell script. To fix the problem, remove the attribute using the command

    +
    +
    +
    +
      xattr -d com.apple.quarantine shell-script
    +
    +
    +
    +

    and resume the build.

    +
    +
    +
    +
    +
    +

    Closed Issues

    +
    +
    +

    Here is a list of the issues that are fixed (or closed) with this release. +We include the original bug numbers for bugs that were reported using the +gforge bug tracker.

    +
    + +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Issue No.DescriptionGforge Bug

    43

    smlnj installation: error in first cm file memory.cm from nlffi

    283

    44

    Compiler bug: Contract: UsageMap on 132

    284

    48

    Signals are not delivered for corresponding events

    291

    58

    ml-build fails on Windows with error

    301

    59

    MSI installer for SMLNJ 110.99.2 reports as version 0.0.0.0

    302

    60

    Use raises wrong exception

    303

    63

    Word8VectorSlice: mapping a subslice produces wrong result or crashes SML/NJ

    306

    64

    SExpParser.parseFile should return empty list on empty file

    307

    65

    Add function for converting HTML4.html to string

    308

    66

    SExp: parsing the output of the printer produces a different SExp

    309

    67

    Error when REPL tries to print value of type Posix.FileSys.ST.stat

    310

    68

    Unable to interrupt execution by using SIGINT

    311

    69

    Real.fromLargeInt crashes on large integer input

    313

    70

    IEEEReal.setRoundingMode is a no-op on Linux

    314

    71

    IEEEReal.float_class does not match the Basis Library

    315

    72

    Real.fromManExp does not return expected value if man = 0.0

    316

    73

    Conversion from string to real does not accept non-finite values

    317

    74

    IEEEReal.decimal_approx does not match the Basis Library

    318

    75

    Type of Real.fromDecimal does not match the Basis Library

    319

    81

    Garbage collection does not trigger sigGC

    65

    103

    makeml and installml scripts do not like locations with spaces

    90

    177

    Pretty printing of Absyn in error message does not respect fixity

    204

    241

    Last branch of case expression omitted from PPAst

    270

    253

    CM.make is unable to handle filenames that contain a backslash

    312

    +
    +

    The following unnumbered bug was also fixed:

    +
    +
    +
    +
    +
      +
    • +

      Performance bug in the compilation of deeply-nested modules.

      +
    • +
    +
    +
    +
    +
    +
    +
    +

    Supported systems

    +
    +
    +

    We believe that SML/NJ will build and run on the following systems, but have only +tested some of them:

    +
    + +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ArchitectureOperating SystemStatus

    AMD64

    FreeBSD 12.0

    macOS 10.14 (Mojave)

    Tested

    macOS 10.15 (Catalina)

    Tested

    macOS 11 (Big Sur)

    Tested

    macOS 12 (Monterey)

    Tested

    macOS 13 (Ventura)

    Ubuntu 16.04.3 LTS

    Ubuntu 18.04.3 LTS

    Tested

     

    Power PC

    Mac OS X 10.5 (Leopard)

    AIX

     

    Sparc

    Solaris

    Linux

     

    x86 (32-bit)

    Mac OS X 10.6 (Snow Leopard)

    Mac OS X 10.7 (Lion)

    Mac OS X 10.8 (Mountain Lion)

    Mac OS X 10.9 (Mavericks)

    Mac OS X 10.10 (Yosemite)

    Mac OS X 10.11 (El Capitan)

    macOS 10.12 (Sierra)

    macOS 10.13 (High Sierra)

    macOS 10.14 (Mojave)

    Ubuntu 16.04.3 LTS

    Other Linux variants

    FreeBSD 12.0

    Other BSD variants

    Windows 7

    Windows 10

    Cygwin (32-bit)

     

    +
    +
    +
    + + + + + \ No newline at end of file diff --git a/doc/html/readme/110.99.4-README.html b/doc/html/readme/110.99.4-README.html new file mode 100644 index 0000000..7c0feb7 --- /dev/null +++ b/doc/html/readme/110.99.4-README.html @@ -0,0 +1,526 @@ + + + + + + + +Standard ML of New Jersey Release Notes + + + + +
    +
    +
    +
    + Standard ML of New Jersey
    Release Notes
    +

    + Version 110.99.4
    August 1, 2023
    +
    +
    +
    +
    +
    +
    +
    SML/NJ HOME
    +
    +

    https://smlnj.org

    +
    +
    FILES
    +
    +

    https://smlnj.org/dist/working/110.99.4/

    +
    +
    +
    +
    +
    +
    +
    +
    +
    +

    Summary

    +
    +
    +

    This patch release fixes a number of bugs in the "legacy" version +of SML/NJ.

    +
    +
    +
    +
    +

    Details

    +
    +
    +

    MLRISC

    +
    +

    Modified the way that zero-extension (ZX) is handled when the source bit width is +smaller than the target. Previously, the code assumed that the high bits would be +zero, but as demonstrated by issue #272, this is not guaranteed, +so we now mask out the high bits.

    +
    +
    +
    +

    SML/NJ Library

    +
    +
    +
    +
      +
    • +

      Added the NativeInt and NativeWord structure aliases to provide +a portable way to refer to the native numeric types in signatures.

      +
    • +
    • +

      Reimplementation of the Random structure to use the Mersenne Twister +algorithm. There are both 32-bit and 64-bit versions of the generator +(which one is included depends on the target platform).

      +
    • +
    • +

      Reworked the code for determining the maximum hash-table size (used +in the HashSetFn and HashTableRep modules) so that it does not +depend on Int.int and Word.word being the same size (an issue +for MLton). Also split that code out into the internal MaxHashTableSize +module.

      +
    • +
    • +

      Added support for the end-of-line assertion ("$") and full support for +intervals to the Thompson engine in the RegExp library.

      +
    • +
    +
    +
    +
    +
    +
    +

    Installation

    +
    +

    Added macOS 14 (Sonoma) as a recognized system.

    +
    +
    +
    +

    32-bit macOS issues

    +
    +

    While the x86 installer +for 110.99.4 works on macOs 10.14 Mojave, building from source +requires some extra steps because the version of Xcode +distributed for Mojave does not include a 32-bit SDK.

    +
    +
    +
    +
    +
    +

    Bugs

    +
    +
    +

    Here is a list of the issues and pull requests that are fixed (or closed) with +this release. +We include the original bug numbers for bugs that were reported using the +gforge bug tracker.

    +
    + +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    IssueDescriptionGforge Bug

    87

    ml-makedepend trashes dependency file on error

    71

    256

    Initial numbers generated by Util/random.sml have unexpected regularities

    n.a.

    258

    Thompson NFA regex engine doesn’t advance stream position when matching

    n.a.

    260

    64-bit word literals are parsed incorrectly on 32-bit systems

    n.a.

    264

    Fix VectorSlice.all and ArraySlice.all

    n.a.

    266

    config: restore PPC for 10.4 and 10.6

    n.a.

    267

    Update makefile

    n.a.

    272

    Calling C function via NLFFI binding may give result outside range of its C return type

    n.a.

    278

    CharBuffer.addSlice does not properly handle starting offsets

    n.a.

    279

    HashSetFn’s calculation of `maxSize can cause Overflow during functor instantiation

    n.a.

    +
    +

    The following unnumbered bugs were also fixed:

    +
    +
    +
    +
    +
      +
    • +

      The ULexBuffer.getu function did not handle 4-byte UTF-8 sequences in some +situations.

      +
    • +
    • +

      The ULexBuffer.getu function did not reject surrogate halves or too-large +codepoints.

      +
    • +
    +
    +
    +
    +
    +
    +
    +

    Supported systems

    +
    +
    +

    We believe that SML/NJ will build and run on the following systems, but have only +tested some of them:

    +
    + +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ArchitectureOperating SystemStatus

    AMD64

    FreeBSD 12.0

    macOS 10.14 (Mojave)

    Tested

    macOS 10.15 (Catalina)

    Tested

    macOS 11 (Big Sur)

    Tested

    macOS 12 (Monterey)

    Tested

    macOS 13 (Ventura)

    Tested

    macOS 14 (Sonoma beta)

    Tested

    Ubuntu 20.04.3 LTS

    Tested

     

    Power PC

    Mac OS X 10.5 (Leopard)

    AIX

     

    Sparc

    Solaris

    Linux

     

    x86 (32-bit)

    Mac OS X 10.6 (Snow Leopard)

    Mac OS X 10.7 (Lion)

    Mac OS X 10.8 (Mountain Lion)

    Mac OS X 10.9 (Mavericks)

    Mac OS X 10.10 (Yosemite)

    Mac OS X 10.11 (El Capitan)

    macOS 10.12 (Sierra)

    macOS 10.13 (High Sierra)

    macOS 10.14 (Mojave)

    Ubuntu 16.04.3 LTS

    Other Linux variants

    FreeBSD 12.0

    Other BSD variants

    Windows 7

    Windows 10

    Cygwin (32-bit)

     

    +
    +
    +
    + + + + + \ No newline at end of file diff --git a/doc/html/readme/110.99.5-README.html b/doc/html/readme/110.99.5-README.html new file mode 100644 index 0000000..fb00b9f --- /dev/null +++ b/doc/html/readme/110.99.5-README.html @@ -0,0 +1,717 @@ + + + + + + + +Standard ML of New Jersey Release Notes + + + + + +
    +
    +
    +
    + Standard ML of New Jersey
    Release Notes
    +

    + Version 110.99.5
    March 14, 2024
    +
    +
    +
    +
    +
    +
    +
    SML/NJ HOME
    +
    +

    https://smlnj.org

    +
    +
    FILES
    +
    +

    https://smlnj.org/dist/working/110.99.5/

    +
    +
    +
    +
    +
    +
    +
    +
    +
    +

    Summary

    +
    +
    +

    This patch release fixes a large number of bugs in the "legacy" version +of SML/NJ; implements some missing features from the +Standard ML Basis Library, and +includes some improvements to the SML/NJ Library.

    +
    +
    +
    +
    +

    Details

    +
    +
    +

    Compiler

    +
    +
      +
    • +

      Some minor improvements in the REPL’s pretty printing.

      +
    • +
    +
    +
    +
    +

    CM

    +
    +
      +
    • +

      The system/cmb-make script now runs with the CM_VERBOSE environment variable +set to false by default (use the -verbose option for the old behavior). It +also now sets the PATH environment variable to include the directory where the +sml command lives when it is given as an argument to cmb-make.

      +
    • +
    +
    +
    +
    +

    Basis Library

    +
    +
      +
    • +

      Complete rewrite of the mechanisms used to implement conversions +between strings and reals. As part of this rewrite, we implemented the +support for the StringCvt.EXACT formatting mode and implemented the +missing Real.toDecimal/fromDecimal functions. The real-string +conversions is based on the Ryu library +and associated PLDI paper +by Ulf Adams.

      +
    • +
    • +

      Added missing implementation of Real64.nextAfter function.

      +
    • +
    • +

      Added Unsafe.Real64 structure that implements bit casts between +double-precision reals and 64-bit words.

      +
    • +
    +
    +
    +
    +

    SML/NJ Library

    +
    +

    In addition to the bug fixes noted below, the following improvements +made to the SML/NJ Library:

    +
    +
    +
      +
    • +

      Rewrote the JSON parsers to work directly on the input source (instead of +using a ML-ulex lexer. This change fixes https://github.com/smlnj/legacy/issues/284[Issue #284 (ML-ULex’s +memoization causes massive performance penalties for JSON parsing). For +the data.json file mentioned in the issue, parsing is about eight times +faster, while the speedup is even greater for the huge.json file.

      +
    • +
    • +

      Added the JSONDecode structure to the JSON library. This module implements +a set of combinators (inspired by the Elm JSON.Decode module) +for decoding JSON values.

      +
    • +
    • +

      Added insertWith, insertWithi, and findAndRemove operations to the +HASH_TABLE and MONO_HASH_TABLE interfaces (and corresponding +implementations).

      +
    • +
    • +

      Fixed a bug in Random.randReal on 32-bit systems.

      +
    • +
    • +

      Fixed the error checking and documentation for the subArray function +in the DynamicArray structure and DynamicArrayFn functor.

      +
    • +
    • +

      Fixes to the subArray and truncate functions in the dynamic array +implementation (both structure DynamicArray and functor DynamicArrayFn).

      +
    • +
    • +

      Add EditDistance module to utility library.

      +
    • +
    • +

      Reworked the UTF8 structure to impose stricter validation of the +encodings. Added the Invalid exception for when an invalid encoding +is encountered and replaced uses of the Domain exception with Invalid. +Also added the size' function for getting the number of UTF-8 +characters in a substring.

      +
    • +
    • +

      Added modules for the representation of booleans, integers, and words +as hash-consed values to the HashCons library.

      +
    • +
    +
    +
    +
    +
    +
    +

    Bugs

    +
    +
    +

    Here is a list of the issues that are fixed (or closed) with this release. +We include the original bug numbers for bugs that were reported using the +gforge bug tracker.

    +
    + +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    IssueDescriptionGforge Bug

    194

    Incorrect formatting of real number

    221

    269

    Real.realMod and Real.split produce incorrect result for values close to zero

    n.a.

    280

    Support forward-slash ("/") as a separator in the Windows implementation of OS.Path

    n.a.

    283

    TextIO.inputAll segfaults when reading 980M file

    n.a.

    284

    ML-ULex’s memozation causes massive performance penalties for JSON parsing

    n.a.

    285

    Unmatched specification in opaque signature ascription causes uncaught exception

    n.a.

    286

    Get "fgrep is obsolescent" when run sml on Fedora

    n.a.

    287

    The word literal 0wx80000000 is incorrectly converted to 0wx7FFFFFFF80000000

    n.a.

    288

    DynamicArray.subArray creates array with length bound+1

    n.a.

    289

    Uncaught Bind exception in compiler with opaque signature matching

    n.a.

    290

    Random.realRand returns far smaller value than unity

    n.a.

    292

    Word8.toLargeInt is broken in 32-bit version

    n.a.

    294

    Starting SML/NJ on Windows produces unsightly REM lines

    n.a.

    295

    CM cannot find tools when using cmb-make to compile the compiler

    n.a.

    296

    The fromDecimal and toDecimal functions in the Real structure are not implemented

    n.a.

    297

    Additional operations for the MONO_HASH_TABLE interface

    n.a.

    298

    Incorrect printing of source code in error message

    n.a.

    300

    PackReal64{Big,Little}.update is not implemented

    n.a.

    302

    SML/NJ implementation of Date.fromTimeLocal differs from SML Basis Library Documentation

    n.a.

    +
    +

    We also fixed the following bugs that did not have issues +associated with them (or were bug fixes back-ported from the +development repository):

    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +

    Supported systems

    +
    +
    +

    We believe that SML/NJ will build and run on the following systems, but have only +tested some of them:

    +
    + +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ArchitectureOperating SystemStatus

    AMD64

    FreeBSD 12.0

    macOS 10.14 (Mojave)

    Tested

    macOS 10.15 (Catalina)

    Tested

    macOS 11 (Big Sur)

    Tested

    macOS 12 (Monterey)

    Tested

    Ubuntu 16.04.3 LTS

    Ubuntu 18.04.3 LTS

    Tested

     

    Power PC

    Mac OS X 10.5 (Leopard)

    AIX

     

    Sparc

    Solaris

    Linux

     

    x86 (32-bit)

    Mac OS X 10.6 (Snow Leopard)

    Mac OS X 10.7 (Lion)

    Mac OS X 10.8 (Mountain Lion)

    Mac OS X 10.9 (Mavericks)

    Mac OS X 10.10 (Yosemite)

    Mac OS X 10.11 (El Capitan)

    macOS 10.12 (Sierra)

    macOS 10.13 (High Sierra)

    macOS 10.14 (Mojave)

    Ubuntu 16.04.3 LTS

    Other Linux variants

    FreeBSD 12.0

    Other BSD variants

    Windows 7

    Windows 10

    Cygwin (32-bit)

     

    +
    +

    32-bit macOS issues

    +
    +

    While the x86 installer +for 110.99.5 works on macOs 10.14 Mojave, building from source +requires some extra steps because the version of Xcode +distributed for Mojave does not include a 32-bit SDK.

    +
    +
    +

    Another issue that you may encounter +when building on macOs 10.14 Mojave is an error message for a shell +script of the form

    +
    +
    +
    +
      /bin/sh: bad interpreter: Operation not permitted
    +
    +
    +
    +

    This error arises because the com.apple.quarantine attribute is set on the +shell script. To fix the problem, remove the attribute using the command

    +
    +
    +
    +
      xattr -d com.apple.quarantine shell-script
    +
    +
    +
    +

    and resume the build.

    +
    +
    +
    +
    +
    + + + + + \ No newline at end of file diff --git a/doc/html/readme/2021.1-README.html b/doc/html/readme/2021.1-README.html new file mode 100644 index 0000000..96318b9 --- /dev/null +++ b/doc/html/readme/2021.1-README.html @@ -0,0 +1,566 @@ + + + + + + + +Standard ML of New Jersey Release Notes + + + + + +
    +
    +
    +
    + Standard ML of New Jersey
    Release Notes
    +

    + Version @VERSION@
    @DATE@
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +

    Summary

    +
    + +
    +
    +
    +

    Details

    +
    +
    +

    Compiler

    + +
    +
    +

    CM

    + +
    +
    +

    MLRISC

    + +
    +
    +

    Runtime System

    + +
    +
    +

    Basis Library

    + +
    +
    +

    SML/NJ Library

    + +
    +
    +

    Successor ML

    + +
    +
    +

    ML-LPT

    + +
    +
    +

    Documentation

    + +
    +
    +

    Installation

    + +
    +
    +

    32-bit macOS issues

    +
    +

    While the x86 installer +for @VERSION@ works on macOs 10.14 Mojave, building from source +requires some extra steps because the version of Xcode +distributed for Mojave does not include a 32-bit SDK.

    +
    +
    +

    Another issue that you may encounter +when building on macOs 10.14 Mojave is an error message for a shell +script of the form

    +
    +
    +
    +
      /bin/sh: bad interpreter: Operation not permitted
    +
    +
    +
    +

    This error arises because the com.apple.quarantine attribute is set on the +shell script. To fix the problem, remove the attribute using the command

    +
    +
    +
    +
      xattr -d com.apple.quarantine shell-script
    +
    +
    +
    +

    and resume the build.

    +
    +
    +
    +
    +
    +

    Bugs

    +
    +
    +

    Here is a list of tracked bugs fixed (or closed) with this release, please see the +bug tracker +for more details.

    +
    + ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + +

    278

    ListMergeSort is documented as stable, but is not (esp. since 110.78 rewrite!)

    279

    Real.toLargeInt returns zero for anything in range [-512,512]

    280

    110.99 config/install.sh -64 fails on macOS 10.15.7

    286

    Installation of SML/NJ v110.99.1 fails in fresh Ubuntu 20.04/macOS 10.15

    287

    Install errors with request asdl

    293

    SMLNJ Incompatibility with macOS 12 Beta

    +
    +

    The following unnumbered bug was also fixed:

    +
    +
    +
    +
    +
      +
    • +

      Floating-point spill records were twice as large as necessary on 64-bit +systems.

      +
    • +
    +
    +
    +
    +
    +
    +
    +

    Supported systems

    +
    +
    +

    We believe that SML/NJ will build and run on the following systems, but have only +tested some of them:

    +
    + +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ArchitectureOperating SystemStatus

    AMD64

    FreeBSD 12.0

    Tested

    macOS 10.14 (Mojave)

    Tested

    macOS 10.15 (Catalina)

    Tested

    macOS 11 (Big Sur)

    Tested

    macOS 12 (Monterey)

    Tested

    Ubuntu 16.04.3 LTS

    Tested

    Ubuntu 18.04.3 LTS

    Tested

     

    Power PC

    Mac OS X 10.5 (Leopard)

    AIX

     

    Sparc

    Solaris

    Linux

     

    x86 (32-bit)

    Mac OS X 10.6 (Snow Leopard)

    Mac OS X 10.7 (Lion)

    Mac OS X 10.8 (Mountain Lion)

    Mac OS X 10.9 (Mavericks)

    Mac OS X 10.10 (Yosemite)

    Mac OS X 10.11 (El Capitan)

    macOS 10.12 (Sierra)

    Tested

    macOS 10.13 (High Sierra)

    Tested

    macOS 10.14 (Mojave)

    Tested

    Ubuntu 16.04.3 LTS

    Tested

    Other Linux variants

    FreeBSD 12.0

    Tested

    Other BSD variants

    Windows 7

    Windows 10

    Cygwin (32-bit)

     

    +
    +
    +
    + + + + + \ No newline at end of file diff --git a/doc/html/smlnj-lib/Controls/controls-lib.html b/doc/html/smlnj-lib/Controls/controls-lib.html new file mode 100644 index 0000000..b2750a9 --- /dev/null +++ b/doc/html/smlnj-lib/Controls/controls-lib.html @@ -0,0 +1,148 @@ + + + + + + + + + + The Controls Library + + + + + + + + +
    +
    +
    +
    + +
    + +
    The Controls Library
    +
    +
    +
    + +
    +
    +
    +

    Overview

    +
    +
    +

    The Controls Library provides support for managing application +controls via command-line options, environment variables, and +code.

    +
    +
    +

    The original design and implementation of the Controls Library +was by Matthias Blume.

    +
    +
    +
    +
    +

    Contents

    +
    +
    +
    +
    structure Controls
    +
    +

    The main structure of the Controls Library, which defines the +representation and basic operations for controls.

    +
    +
    structure ControlSet
    +
    +

    Provides facilities for managing sets of controls, with +associated information, by name.

    +
    +
    structure ControlRegistry
    +
    +

    Provides infrastructure for defining a hierarchical registry of controls.

    +
    +
    structure ControlUtil
    +
    +

    Provides utility functions for defining controls.

    +
    +
    +
    +
    +
    +
    +

    Usage

    +
    +
    +

    For SML/NJ, include $/controls-lib.cm in your +CM file.

    +
    +
    +

    For use in MLton, include +$(SML_LIB)/smlnj-lib/Controls/controls-lib.mlb in your MLB file.

    +
    +
    +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Controls/str-ControlRegistry.html b/doc/html/smlnj-lib/Controls/str-ControlRegistry.html new file mode 100644 index 0000000..3b4e360 --- /dev/null +++ b/doc/html/smlnj-lib/Controls/str-ControlRegistry.html @@ -0,0 +1,287 @@ + + + + + + + + + + The ControlRegistry structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The ControlRegistry structure
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    The ControlRegistry structure provides infrastructure for defining +a hierarchical registry of controls.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature CONTROL_REGISTRY
    +structure ControlRegistry : CONTROL_REGISTRY
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    type registry
    +
    +type control_info = { envName : string option }
    +
    +val new : {help : string} -> registry
    +
    +val register : registry -> {
    +        ctl : string Controls.control,
    +        envName : string option
    +      } -> unit
    +
    +val registerSet : registry -> {
    +        ctls : (string, 'a) ControlSet.control_set,
    +        mkEnvName : string -> string option
    +      } -> unit
    +
    +val nest : registry -> {
    +        prefix : string option,
    +        pri : Controls.priority,
    +        obscurity : int,
    +        reg : registry
    +      } -> unit
    +
    +val control : registry -> string list -> string Controls.control option
    +
    +val init : registry -> unit
    +
    +datatype registry_tree = RTree of {
    +    path : string list,
    +    help : string,
    +    ctls : { ctl : string Controls.control, info : control_info } list,
    +    subregs : registry_tree list
    +  }
    +
    +val controls : (registry * int option) -> registry_tree
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    type registry
    +
    +

    the type of a control registry hierarchy.

    +
    +
    type control_info = { envName : string option }
    +
    +

    a record of information about a control. Currently, this record type +only contains an optional environment-variable name for the control.

    +
    +
    val new : {help : string} -> registry
    +
    +

    new {help} creates a new registry, where the help string +describes the registry.

    +
    +
    val register : registry -> {ctl, envName} -> unit
    +
    +

    register {ctl, envName} adds the control ctl to the registry reg. +The optional string envName specifies the name of the environment +variable that can be used to specify the value of the control.

    +
    +
    val registerSet : registry -> {ctls, mkEnvName} -> unit
    +
    +

    registerSet {ctls, mkEnvName} registers the controls in the +control set ctls. +The function mkEnvName is applied to the names of the controls +to generate the optional environment-variable names.

    +
    +
    val nest : registry -> {prefix, pri, obscurity, reg} -> unit
    +
    +

    nest parent {prefix, pri, obscurity, reg} adds the registry reg as +a child of the registry parent. The fields of the second argument +have the following meaning:

    +
    +
    +
    +
    +
    prefix : string option
    +
    +

    The prefix (or name) that qualifies the child registry +(see the control function).

    +
    +
    pri : Controls.priority
    +
    +

    The registry’s priority; used when ordering the elements in a +registry.

    +
    +
    obscurity : int
    +
    +

    The obscurity level of the registrion (higher means more obscure).

    +
    +
    reg : registry
    +
    +

    The child registry being added to parent.

    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    val control : registry -> string list -> string Controls.control option
    +
    +

    control reg path searches the registry for a control with the given path.

    +
    +
    val init : registry -> unit
    +
    +

    init reg uses the host process’s environment (as accessed by the +OS.Process.getEnv +function) to initialize those controls that have associated environment-variables.

    +
    +
    datatype registry_tree = RTree of { …​ }
    +
    +

    The registry_tree datatype provides a concrete representation of the +registry hierarchy.

    +
    +
    +
    +
    +
    path : string list
    +
    +

    is the full path to the node in the tree.

    +
    +
    help : string
    +
    +

    is the description of the node in the tree.

    +
    +
    ctls : { ctl : string Controls.control, info : control_info } list
    +
    +

    is a priority-ordered list of the controls at the node +in the tree.

    +
    +
    subregs : registry_tree list
    +
    +

    is a priority-ordered list of the sub-registries at the node +in the tree.

    +
    +
    +
    +
    +
    +
    +
    val controls : (registry * int option) -> registry_tree
    +
    +

    controls (reg, optLevel) returns the registry_tree representation +of the registry reg. If optLevel is SOME n, then sub-registries +that have an obscurity level greater or equal to n are omitted +from the result.

    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Controls/str-ControlSet.html b/doc/html/smlnj-lib/Controls/str-ControlSet.html new file mode 100644 index 0000000..a6b1505 --- /dev/null +++ b/doc/html/smlnj-lib/Controls/str-ControlSet.html @@ -0,0 +1,219 @@ + + + + + + + + + + The ControlSet structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The ControlSet structure
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    The ControlSet structure provides facilities for managing sets +of controls, with associated information, by name.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature CONTROL_SET
    +structure ControlSet : CONTROL_SET
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    type 'a control = 'a Controls.control
    +type ('a, 'b) control_set
    +
    +val new : unit -> ('a, 'b) control_set
    +
    +val member : (('a, 'b) control_set * Atom.atom) -> bool
    +val find   : (('a, 'b) control_set * Atom.atom)
    +      -> {ctl : 'a control, info : 'b} option
    +val insert : (('a, 'b) control_set * 'a control * 'b) -> unit
    +val remove : (('a, 'b) control_set * Atom.atom) -> unit
    +val infoOf : ('a, 'b) control_set -> 'a control -> 'b option
    +
    +val listControls : ('a, 'b) control_set -> {ctl : 'a control, info : 'b} list
    +val listControls' : (('a, 'b) control_set * int) -> {ctl : 'a control, info : 'b} list
    +
    +val app : ({ctl : 'a control, info : 'b} -> unit) -> ('a, 'b) control_set -> unit
    +
    +val stringControls : 'a Controls.value_cvt -> ('a, 'b) control_set
    +      -> (string, 'b) control_set
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    type ('a, 'b) control_set
    +
    +

    The abstract type of control set, where the first type parameter +is the value-type of the controls in the set and the second is the +type of the information associated with each control.

    +
    +
    val new : unit -> ('a, 'b) control_set
    +
    +

    new () creates a new, empty, set of controls.

    +
    +
    val member : (('a, 'b) control_set * Atom.atom) -> bool
    +
    +

    member (ctlSet, name) returns true if there is a control with +the given name in the set.

    +
    +
    val find : (('a, 'b) control_set * Atom.atom) -> {ctl : 'a control, info : 'b} option
    +
    +

    find (ctsSet, name) returns SOME{ctl, info} when the control ctl, which +has the name name is in the set and info is its associated information. +Otherwise, NONE is returned.

    +
    +
    val insert : (('a, 'b) control_set * 'a control * 'b) -> unit
    +
    +

    insert (ctsSet, ctl, info) inserts the control ctl with associated +information into into the control set.

    +
    +
    val remove : (('a, 'b) control_set * Atom.atom) -> unit
    +
    +

    remove (ctlSet, name) removes the named control from the set (if +it is present).

    +
    +
    val infoOf : ('a, 'b) control_set -> 'a control -> 'b option
    +
    +

    infoOf ctlSet ctl returns SOME info, when ctl is in the set +with associated information info. If ctl is not in the set, +then NONE is returned.

    +
    +
    val listControls : ('a, 'b) control_set -> {ctl : 'a control, info : 'b} list
    +
    +

    listControls ctlSet returns a list of the controls in the set ordered by +priority.

    +
    +
    val listControls' : (('a, 'b) control_set * int) -> {ctl : 'a control, info : 'b} list
    +
    +

    listControls (ctlSet, level) returns a list of the controls in the set ordered by +priority, but omits any controls with an obscurity level greater or equal to +level.

    +
    +
    val app : ({ctl : 'a control, info : 'b} -> unit) -> ('a, 'b) control_set -> unit
    +
    +

    app f ctlSet applies the function f to the controls (and their associated +information). The order in which f is applied is unspecified.

    +
    +
    val stringControls : 'a Controls.value_cvt -> ('a, 'b) control_set -> (string, 'b) control_set
    +
    +

    stringControls cvt ctlSet returns a set of string controls, where the controls +in the new set are created by applying the value-converter cvt to the controls +in ctlSet. The associated information is preserved.

    +
    +
    +
    +
    +
    + +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Controls/str-ControlUtil.html b/doc/html/smlnj-lib/Controls/str-ControlUtil.html new file mode 100644 index 0000000..7c697d5 --- /dev/null +++ b/doc/html/smlnj-lib/Controls/str-ControlUtil.html @@ -0,0 +1,197 @@ + + + + + + + + + + The ControlUtil structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The ControlUtil structure
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    The ControlUtil structure provides some utility functions +for defining controls.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature CONTROL_UTIL
    +structure ControlUtil : CONTROL_UTIL
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    structure Cvt : sig
    +    val int : int Controls.value_cvt
    +    val bool : bool Controls.value_cvt
    +    val real : real Controls.value_cvt
    +
    +    val stringList : string list Controls.value_cvt
    +
    +    val string : string Controls.value_cvt
    +  end
    +
    +structure EnvName : sig
    +    val toUpper : string -> string -> string
    +  end
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +

    structure Cvt

    +
    +

    The ControlUtil.Cvt structure provides some common value-conversion +functions.

    +
    +
    +
    +
    val int : int Controls.value_cvt
    +
    +

    A value converter for the int type.

    +
    +
    val bool : bool Controls.value_cvt
    +
    +

    A value converter for the bool type. +This converter is case-insensitive, and accepts "yes" for true and +"no" for false.

    +
    +
    val real : real Controls.value_cvt
    +
    +

    A value converter for the real type.

    +
    +
    val stringList : string list Controls.value_cvt
    +
    +

    A value converter for the +comma-separated lists of strings.

    +
    +
    val string : string Controls.value_cvt
    +
    +

    A value converter for the string type. +This converter is just the identity.

    +
    +
    +
    +
    +
    +

    structure EnvName

    +
    +
    +
    val toUpper : string -> string -> string
    +
    +

    toUpper prefix s returns the string prefix ^ s', where s' is +the string s with lower-case letters converted to upper-case and any +occurrences of the minus character converted to the underscore character.

    +
    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Controls/str-Controls.html b/doc/html/smlnj-lib/Controls/str-Controls.html new file mode 100644 index 0000000..0540449 --- /dev/null +++ b/doc/html/smlnj-lib/Controls/str-Controls.html @@ -0,0 +1,469 @@ + + + + + + + + + + The Controls structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The Controls structure
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    The Controls structure defines the basic types and operations +for the Controls Library.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature CONTROLS
    +structure Controls : CONTROLS
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    type priority = int list
    +type 'a control
    +
    +type 'a value_cvt = {
    +    tyName : string,
    +    fromString : string -> 'a option,
    +    toString : 'a -> string
    +  }
    +
    +val control : {
    +        name : string,
    +        pri : priority,
    +        obscurity : int,
    +        help : string,
    +        ctl : 'a ref
    +      } -> 'a control
    +
    +val genControl : {
    +        name : string,
    +        pri : priority,
    +        obscurity : int,
    +        help : string,
    +        default : 'a
    +      } -> 'a control
    +
    +exception ValueSyntax of {tyName : string, ctlName : string, value : string}
    +
    +val stringControl : 'a value_cvt -> 'a control -> string control
    +
    +val name : 'a control -> string
    +val get : 'a control -> 'a
    +val set : 'a control * 'a -> unit
    +val set' : 'a control * 'a -> unit -> unit
    +val help : 'a control -> string
    +val info : 'a control -> {priority : priority, obscurity : int, help : string}
    +
    +val mkOptionFlag : {
    +        ctl : bool control,
    +        short : string,
    +        long : string option
    +      } -> unit GetOpt.opt_descr
    +
    +val mkOptionReqArg : {
    +        ctl : string control,
    +        arg : string,
    +        short : string,
    +        long : string option
    +      } -> unit GetOpt.opt_descr
    +
    +val mkOption : {
    +        ctl : string control,
    +        arg : string,
    +        default : string,
    +        short : string,
    +        long : string option
    +      } -> unit GetOpt.opt_descr
    +
    +val save'restore : 'a control -> unit -> unit
    +
    +val compare : ('a control * 'a control) -> order
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    type priority = int list
    +
    +

    something

    +
    +
    type 'a control
    +
    +

    something

    +
    +
    +
    +
    +
    +
    type 'a value_cvt = { …​ }
    +
    +

    A value converter is used to convert between strings and another +type. The fields have the following meaning:

    +
    +
    +
    +
    +
    tyName : string
    +
    +

    The name of the type being converted,

    +
    +
    fromString : string -> 'a option
    +
    +

    The function for converting from strings to the type.

    +
    +
    toString : 'a -> string'
    +
    +

    The function for converting from the type to strings.

    +
    +
    +
    +
    +
    +
    +
    val control : {name, pri, obscurity, help, ctl} -> 'a control
    +
    +

    control {name, pri, obscurity, help, ctl} creates a new control, +where the arguments are

    +
    +
    +
    +
    +
    name : string
    +
    +

    the name of the control.

    +
    +
    pri : priority
    +
    +

    the control’s priority.

    +
    +
    obscurity : int
    +
    +

    the control’s obscurity level (higher means more obscure).

    +
    +
    help : string
    +
    +

    the control’s description.

    +
    +
    ctl : 'a ref
    +
    +

    the reference cell that holds the control’s state.

    +
    +
    +
    +
    +
    +
    +
    val genControl : {name, pri, obscurity, help, ctl, default} -> 'a control
    +
    +

    genControl {name, pri, obscurity, help, default} creates a new control, +where the arguments are

    +
    +
    +
    +
    +
    name : string
    +
    +

    the name of the control.

    +
    +
    pri : priority
    +
    +

    the control’s priority.

    +
    +
    obscurity : int
    +
    +

    the control’s obscurity level (higher means more obscure).

    +
    +
    help : string
    +
    +

    the control’s description.

    +
    +
    default : 'a
    +
    +

    the initial, or default, value of the control.

    +
    +
    +
    +
    +
    +
    +
    exception ValueSyntax of {tyName : string, ctlName : string, value : string}
    +
    +

    This exception is raised to communicate that there is a syntax error +in a string representation of a control value.

    +
    +
    +
    +
    +
    +
    val stringControl : 'a value_cvt -> 'a control -> string control
    +
    +

    stringControl cvt ctl creates a string-valued interface to the control ctl +using the given value converter.

    +
    +
    val name : 'a control -> string
    +
    +

    name ctl returns the name of the control ctl.

    +
    +
    val get : 'a control -> 'a
    +
    +

    get ctl returns the value of the control ctl.

    +
    +
    val set : 'a control * 'a -> unit
    +
    +

    set (ctl, v) sets the value of the control ctl to v.

    +
    +
    val set' : 'a control * 'a -> unit -> unit (* delayed; error checking in 1st stage *)
    +
    +

    set (ctl, v) returns a unit -> unit function that will set +the value of the control ctl to v. This staged evaluation is useful +when the control does some error checking (i.e., because it is the +result of stringControl) on the value v. +In that case, the value is checked for syntactic correctness and +converted when set' is applied.

    +
    +
    val help : 'a control -> string
    +
    +

    help ctl returns the description of the control ctl.

    +
    +
    val info : 'a control -> {priority : priority, obscurity : int, help : string}
    +
    +

    info ctl returns a record {priority, obscurity, help}, +where the fields of the result are

    +
    +
    +
    +
    +
    priority : priority
    +
    +

    the control’s priority.

    +
    +
    obscurity : int
    +
    +

    the control’s obscurity level (higher means more obscure).

    +
    +
    help : string
    +
    +

    the control’s description.

    +
    +
    +
    +
    +
    +
    +
    val mkOptionFlag : {ctl, short, long} -> unit GetOpt.opt_descr
    +
    +

    mkOptionFlag {ctl, short, long} returns a command-line-option +GetOpt.NoArg descriptor +for a boolean control. The arguments are

    +
    +
    +
    +
    +
    ctl : bool control
    +
    +

    the control that will be set by the command-line option.

    +
    +
    short : string
    +
    +

    the short name for the command-line option; either zero or one chars.

    +
    +
    long : string option
    +
    +

    an optional long-name for the command-line option.

    +
    +
    +
    +
    +
    +
    +
    val mkOptionReqArg : {ctl, arg, short, long} -> unit GetOpt.opt_descr
    +
    +

    mkOptionReqArg {ctl, arg, short, long} returns a command-line-option +GetOpt.ReqArg descriptor +for a string control, where an argument for the command-line option +is required. The arguments to the call are

    +
    +
    +
    +
    +
    ctl : string control
    +
    +

    the control that will be set by the command-line option.

    +
    +
    arg : string
    +
    +

    the name for the argument, which is used in the usage message.

    +
    +
    short : string
    +
    +

    the short name for the option; either zero or one chars.

    +
    +
    long : string option
    +
    +

    an optional long-name for the option.

    +
    +
    +
    +
    +
    +
    +
    val mkOption : {ctl, arg, default, short, long} -> unit GetOpt.opt_descr
    +
    +

    mkOptionReqArg {ctl, arg, short, long} returns a command-line-option +GetOpt.OptArg descriptor +for a string control, where an argument for the command-line option +is optional. The arguments to the call are

    +
    +
    +
    +
    +
    ctl : string control
    +
    +

    the control that will be set by the command-line option.

    +
    +
    arg : string
    +
    +

    the name for the argument, which is used in the usage message.

    +
    +
    default : string
    +
    +

    the default value for when no argument is given.

    +
    +
    short : string
    +
    +

    the short name for the command-line option; either zero or one chars.

    +
    +
    long : string option
    +
    +

    an optional long-name for the command-line option.

    +
    +
    +
    +
    +
    +
    +
    val save’restore : 'a control -> unit -> unit
    +
    +

    save’restore ctl saves the current value of the control and +returns a unit -> unit function that will restore the value.

    +
    +
    val compare : ('a control * 'a control) -> order
    +
    +

    compare (ctl1, ctl2) returns the priority order of the two controls.

    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/HashCons/fun-HashConsGroundFn.html b/doc/html/smlnj-lib/HashCons/fun-HashConsGroundFn.html new file mode 100644 index 0000000..ae41a55 --- /dev/null +++ b/doc/html/smlnj-lib/HashCons/fun-HashConsGroundFn.html @@ -0,0 +1,198 @@ + + + + + + + + + + The HashConsGroundFn functor + + + + + + + + +
    +
    +
    +
    + +
    + +
    The HashConsGroundFn functor
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    The HashConsGroundFn functor provides a mechanism for defining +a hash-consed representations for "leaf" (or "ground") types. +These are types that might be atomic (e.g., +the HashConsAtom structure) or +datatypes, but they are treated as atomic values by the HashCons Library +and are the leaves of the hash-consed data structures.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    functor HashConsGroundFn (T : HASH_KEY)
    +
    +
    +
    +
    +
    +

    Arguments

    +
    +
    +
      +
    • +

      T : HASH_KEY:: +The argument structure T defines the type, equality function, and +hashing function a the "leaf" type.

      +
    • +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    type hash_key = T.hash_key
    +type obj = hash_key HashCons.obj
    +
    +val mk : hash_key -> obj
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    type hash_key = T.hash_key
    +
    +

    the ground type.

    +
    +
    type obj = hash_key HashCons.obj
    +
    +

    the hash-consed ground type.

    +
    +
    val mk : hash_key -> obj
    +
    +

    map a ground type value to a hash-consed value.

    +
    +
    +
    +
    +
    +
    +

    Example

    +
    +
    +

    Suppose that we wish to have pairs of integers as a ground type +for a hash-consed data structure. We might implement this using +the following functor application:

    +
    +
    +
    +
    structure HCPairs = HashConsGroundFn (
    +    struct
    +      type hash_key = int * int
    +      fun sameKey (a : hash_key, b) = (a = b)
    +      fun hashVal (a, b) = Word.xorb(Word.fromInt a, Word.fromInt b)
    +    end)
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/HashCons/hash-cons-lib.html b/doc/html/smlnj-lib/HashCons/hash-cons-lib.html new file mode 100644 index 0000000..28ba2ee --- /dev/null +++ b/doc/html/smlnj-lib/HashCons/hash-cons-lib.html @@ -0,0 +1,234 @@ + + + + + + + + + + The HashCons Library + + + + + + + + +
    +
    +
    +
    + +
    + +
    The HashCons Library
    +
    +
    +
    + +
    +
    +
    +

    Overview

    +
    +
    +

    The HashCons Library supports the implementation of +hash-consed representations of data structures. Such representations +are useful to reduce space usage by sharing common substructures +and to provide constant-time equality testing for large structures.

    +
    +
    +

    To use this library, you need to use a two-level definition of your +data structures. For example, we might define a hash-cons representation +of lambda terms as follows:

    +
    +
    +
    +
    structure HC = HashCons
    +
    +type var = HashConsString.obj
    +
    +datatype term_node
    +  = VAR of var
    +  | LAM of (var * term)
    +  | APP of (term * term)
    +withtype term = term_node HC.obj
    +
    +
    +
    +

    And you need to define an equality function on your terms (this function +can use the hash-cons identity on subterms). For example, here is the +equality function for our lambda terms:

    +
    +
    +
    +
    fun eq (APP(t11, t12), APP(t21, t22)) = HC.same(t11, t21) andalso HC.same(t12, t22)
    +  | eq (LAM(x, t1), LAM(y, t2)) = HC.same(x, y) andalso HC.same(t1, t2)
    +  | eq (VAR x, VAR y) = HC.same(x, y)
    +  | eq _ = false
    +
    +
    +
    +

    With the equality function defined, we can then create a hash-cons table:

    +
    +
    +
    +
    val tbl = HC.new {eq = eq}
    +
    +
    +
    +

    And define constructor functions:

    +
    +
    +
    +
    val mkAPP = HC.cons2 tbl (0wx1, APP)
    +val mkLAM = HC.cons2 tbl (0wx3, LAM)
    +val mkVAR = HC.cons1 tbl (0wx7, VAR)
    +val var = HW.mk
    +
    +
    +
    +

    Note that we pick successive prime numbers for the constructor hash codes. +Using these constructors, we can construct the representation of the +identity function "\(\lambda{} x . x\)" as follows:

    +
    +
    +
    +
    mkLAM(var "x", mkVAR(var "x"))
    +
    +
    +
    +

    In addition to term construction, this library also supports finite sets +and maps using the unique hash-cons codes as keys.

    +
    +
    +
    +
    +

    Contents

    +
    +
    +
    +
    structure HashCons
    +
    +

    The main module in the library, which defines the basic types +and various utility functions.

    +
    +
    structure HashConsAtom
    +
    +

    Code to package the Atom.atom type +as a hash-consed object.

    +
    +
    structure HashConsBool
    +
    +

    Code to package the bool type as a hash-consed object.

    +
    +
    structure HashConsInt
    +
    +

    Code to package the int type as a hash-consed object.

    +
    +
    structure HashConsMap
    +
    +

    Implements finite maps keyed by hash-consed objects.

    +
    +
    structure HashConsString
    +
    +

    Code to package the string type as a hash-consed object.

    +
    +
    structure HashConsSet
    +
    +

    Implements finite sets of hash-consed objects.

    +
    +
    structure HashConsWord
    +
    +

    Code to package the word type as a hash-consed object.

    +
    +
    functor HashConsGroundFn
    +
    +

    A functor for implementing new leaf types as hash-consed objects.

    +
    +
    +
    +
    +
    +
    +

    Usage

    +
    +
    +

    For SML/NJ, include $/hash-cons-lib.cm in your +CM file.

    +
    +
    +

    For use in MLton, include +$(SML_LIB)/smlnj-lib/HashCons/hash-cons-lib.mlb in your MLB file.

    +
    +
    +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/HashCons/str-HashCons.html b/doc/html/smlnj-lib/HashCons/str-HashCons.html new file mode 100644 index 0000000..41cfc0e --- /dev/null +++ b/doc/html/smlnj-lib/HashCons/str-HashCons.html @@ -0,0 +1,341 @@ + + + + + + + + + + The HashCons structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The HashCons structure
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    The HashCons structure is the main module for the HashCons Library.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature HASH_CONS
    +structure HashCons : HASH_CONS
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    type 'a tbl
    +
    +val new : {eq : 'a * 'a -> bool} -> 'a tbl
    +
    +val clear : 'a tbl -> unit
    +
    +type 'a obj = { nd : 'a, tag : word, hash : word }
    +
    +val node : 'a obj -> 'a
    +val tag  : 'a obj -> word
    +
    +val same : ('a obj * 'a obj) -> bool
    +val compare : ('a obj * 'a obj) -> order
    +
    +val cons0 : 'a tbl -> (word * 'a) -> 'a obj
    +val cons1 : 'a tbl -> (word * ('b obj -> 'a))
    +      -> 'b obj -> 'a obj
    +val cons2 : 'a tbl -> (word * ('b obj * 'c obj -> 'a))
    +      -> 'b obj * 'c obj -> 'a obj
    +val cons3 : 'a tbl -> (word * ('b obj * 'c obj * 'd obj -> 'a))
    +      -> 'b obj * 'c obj * 'd obj -> 'a obj
    +val cons4 : 'a tbl -> (word * ('b obj * 'c obj * 'd obj * 'e obj -> 'a))
    +      -> 'b obj * 'c obj * 'd obj * 'e obj -> 'a obj
    +val cons5 : 'a tbl -> (word * ('b obj * 'c obj * 'd obj * 'e obj * 'f obj -> 'a))
    +      -> 'b obj * 'c obj * 'd obj * 'e obj * 'f obj -> 'a obj
    +
    +val consList : 'a tbl -> (word * ('b obj list -> 'a)) -> 'b obj list -> 'a obj
    +
    +val consR1 : 'a tbl -> (word * ('b obj -> 'a) * ('r -> 'b obj))
    +      -> 'r -> 'a obj
    +val consR2 : 'a tbl
    +      -> (word * ('b obj * 'c obj -> 'a) * ('r -> 'b obj * 'c obj))
    +        -> 'r -> 'a obj
    +val consR3 : 'a tbl
    +      -> (word * ('b obj * 'c obj * 'd obj -> 'a)
    +        * ('r -> 'b obj * 'c obj * 'd obj))
    +        -> 'r -> 'a obj
    +val consR4 : 'a tbl
    +      -> (word * ('b obj * 'c obj * 'd obj * 'e obj -> 'a)
    +        * ('r -> 'b obj * 'c obj * 'd obj * 'e obj))
    +        -> 'r -> 'a obj
    +val consR5 : 'a tbl
    +      -> (word * ('b obj * 'c obj * 'd obj * 'e obj * 'f obj -> 'a)
    +        * ('r -> 'b obj * 'c obj * 'd obj * 'e obj * 'f obj))
    +        -> 'r -> 'a obj
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    type 'a tbl
    +
    +

    The type of a table for hash-consing objects of type 'a. Typically, only +one table per distinct type should be defined.

    +
    +
    val new : {eq : 'a * 'a -> bool} -> 'a tbl
    +
    +

    new {eq} creates a new hash-cons table using the equality function eq.

    +
    +
    val clear : 'a tbl -> unit
    +
    +

    clear tbl clears the table of all elements.

    +
    +
    type 'a obj = { …​ }
    +
    +

    The representation of a hash-consed object. The fields are

    +
    +
    +
    +
    +
    nd
    +
    +

    the underlying representation of the object.

    +
    +
    tag
    +
    +

    a tag that is unique for the object (for the object’s table)

    +
    +
    hash
    +
    +

    a hash of the object (used to index into the table)

    +
    +
    +
    +
    +
    +
    +
    val node : 'a obj -> 'a
    +
    +

    node obj projects out the node from obj.

    +
    +
    val tag : 'a obj -> word
    +
    +

    tag obj projects out the unique tag from obj.

    +
    +
    val same : ('a obj * 'a obj) -> bool
    +
    +

    same (obj1, obj2) returns true if the objects are the same; this test +is constant time (it compares the object tags).

    +
    +
    val compare : ('a obj * 'a obj) -> order
    +
    +

    compare (obj1, obj2) returns the order of the two objects; this test +is constant time (it compares the object tags).

    +
    +
    val cons0 : 'a tbl -> (word * 'a) -> 'a obj
    +
    +

    cons0 tbl (h, x) creates a unique (w.r.t. tbl) representation +for the value x, where h is the hash of x.

    +
    +
    val cons1 : 'a tbl -> (word * ('b obj -> 'a)) -> 'b obj -> 'a obj
    +
    +

    cons1 tbl (h, mk) obj1 creates a unique (w.r.t. tbl) representation +for mk obj1, where h is a hash code for the term constructor mk.

    +
    +
    val cons2 : 'a tbl -> (word * ('b obj * 'c obj -> 'a)) -> …​
    +
    +

    cons2 tbl (h, mk) (obj1, obj2) creates a unique (w.r.t. tbl) representation +for mk (obj1, obj2), where h is a hash code for the term constructor mk.

    +
    +
    val cons3 : 'a tbl -> (word * ('b obj * 'c obj * 'd obj -> 'a)) -> …​
    +
    +

    cons3 tbl (h, mk) (obj1, obj2, obj3) creates a unique (w.r.t. tbl) representation +for mk (obj1, obj2, obj3), where h is a hash code for the term constructor mk.

    +
    +
    val cons4 : 'a tbl -> (word * ('b obj * 'c obj * 'd obj * 'e obj -> 'a)) -> …​
    +
    +

    cons4 tbl (h, mk) (obj1, obj2, obj3, obj4) creates a unique (w.r.t. tbl) +representation for mk (obj1, obj2, obj3, obj4), where h is a hash code +for the term constructor mk.

    +
    +
    val cons5 : 'a tbl -> (word * ('b obj * 'c obj * 'd obj * 'e obj * 'f obj -> 'a)) -> …​
    +
    +

    cons5 tbl (h, mk) (obj1, obj2, obj3, obj4, obj5) creates a unique (w.r.t. tbl) +representation for mk (obj1, obj2, obj3, obj4, obj5), where h is a hash code +for the term constructor mk.

    +
    +
    val consList : 'a tbl -> (word * ('b obj list -> 'a)) -> 'b obj list -> 'a obj
    +
    +

    consList tbl (h, mk) objs creates a unique (w.r.t. tbl) representation +for mk objs, where h is a hash code for the term constructor mk.

    +
    +
    val consR1 : 'a tbl -> (word * ('b obj -> 'a) * ('r -> 'b obj)) -> 'r -> 'a obj
    +
    +

    consR1 (h, mk, proj) r creates a unique (w.r.t. tbl) representation +for mk (proj r), where h is a hash code for the term constructor mk +and proj projects the sub-component of r as an object.

    +
    +
    val consR2 : 'a tbl -> (word * ('b obj * 'c obj -> 'a) * ('r -> 'b obj * 'c obj)) -> 'r -> 'a obj
    +
    +

    consR2 (h, mk, proj) r creates a unique (w.r.t. tbl) representation +for mk (proj r), where h is a hash code for the term constructor mk +and proj projects the sub-components of r as a tuple of objects.

    +
    +
    val consR3 : 'a tbl -> (word * ('b obj * 'c obj * 'd obj -> 'a) -> …​
    +
    +

    consR3 (h, mk, proj) r creates a unique (w.r.t. tbl) representation +for mk (proj r), where h is a hash code for the term constructor mk +and proj projects the sub-components of r as a tuple of objects.

    +
    +
    val consR4 : 'a tbl -> (word * ('b obj * 'c obj * 'd obj * 'e obj -> 'a) -> …​
    +
    +

    consR4 (h, mk, proj) r creates a unique (w.r.t. tbl) representation +for mk (proj r), where h is a hash code for the term constructor mk +and proj projects the sub-components of r as a tuple of objects.

    +
    +
    val consR5 : 'a tbl -> (word * ('b obj * 'c obj * 'd obj * 'e obj * 'f obj -> 'a) -> …​
    +
    +

    consR5 (h, mk, proj) r creates a unique (w.r.t. tbl) representation +for mk (proj r), where h is a hash code for the term constructor mk +and proj projects the sub-components of r as a tuple of objects.

    +
    +
    +
    +
    +
    +
    +

    Discussion

    +
    +
    +

    The functions cons1, cons2, etc., provide an easy way to convert a data +constructor of the given arity to a hash-cons constructor. For example, if +we have

    +
    +
    +
    +
    datatype t = ... | Foo of (x obj * y obj * z obj) | ...
    +
    +
    +
    +

    as a constructor in our two-level hash-consed datatype, then we can +define a hash-cons constructor for Foo has

    +
    +
    +
    +
    val mkFoo : x obj * y obj * z obj -> t obj = cons3 (0w17, Foo)
    +
    +
    +
    +

    where 0w17 is the hash code we selected for the Foo constructor.

    +
    +
    +

    Likewise, the cons1R, cons2R, etc., functions can be used when +record types are involved.

    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/HashCons/str-HashConsAtom.html b/doc/html/smlnj-lib/HashCons/str-HashConsAtom.html new file mode 100644 index 0000000..21cf246 --- /dev/null +++ b/doc/html/smlnj-lib/HashCons/str-HashConsAtom.html @@ -0,0 +1,163 @@ + + + + + + + + + + The HashConsAtom structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The HashConsAtom structure
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    The HashConsAtom structure embeds the Atom.atom +type as a hash-consed object. +It is implemented using the HashConsGroundFn +functor.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    structure HashConsAtom
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    type hash_key = Atom.atom
    +type obj = hash_key HashCons.obj
    +
    +val mk : hash_key -> obj
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    type hash_key = Atom.atom
    +
    +

    The ground type being hashed.

    +
    +
    type obj = hash_key HashCons.obj
    +
    +

    The type of hash-consed atoms.

    +
    +
    val mk : hash_key -> obj
    +
    +

    mk a converts the atom a to a hash-consed object.

    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/HashCons/str-HashConsBool.html b/doc/html/smlnj-lib/HashCons/str-HashConsBool.html new file mode 100644 index 0000000..6545daf --- /dev/null +++ b/doc/html/smlnj-lib/HashCons/str-HashConsBool.html @@ -0,0 +1,170 @@ + + + + + + + + + + The HashConsBool structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The HashConsBool structure
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    The HashConsBool structure embeds the bool type as a hash-consed object.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    structure HashConsBool
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    type hash_key = bool
    +type obj = hash_key HashCons.obj
    +
    +val mk : hash_key -> obj
    +
    +val hcFalse : obj
    +val hcTrue : obj
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    type hash_key = bool
    +
    +

    The ground type being hashed.

    +
    +
    type obj = hash_key HashCons.obj
    +
    +

    The type of hash-consed booleans.

    +
    +
    val mk : hash_key -> obj
    +
    +

    mk b converts the boolean b to a hash-consed object.

    +
    +
    val hcFalse : obj
    +
    +

    the hash-consed representation of false.

    +
    +
    val hcTrue : obj
    +
    +

    the hash-consed representation of true.

    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/HashCons/str-HashConsInt.html b/doc/html/smlnj-lib/HashCons/str-HashConsInt.html new file mode 100644 index 0000000..892d1f2 --- /dev/null +++ b/doc/html/smlnj-lib/HashCons/str-HashConsInt.html @@ -0,0 +1,161 @@ + + + + + + + + + + The HashConsInt structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The HashConsInt structure
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    The HashConsInt structure embeds the int type as a hash-consed object. +It is implemented by directly using the value as the tag and hash key +(i.e., no hash table).

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    structure HashConsInt
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    type hash_key = int
    +type obj = hash_key HashCons.obj
    +
    +val mk : hash_key -> obj
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    type hash_key = int
    +
    +

    The ground type being hashed.

    +
    +
    type obj = hash_key HashCons.obj
    +
    +

    The type of hash-consed integers.

    +
    +
    val mk : hash_key -> obj
    +
    +

    mk n converts the integer n to a hash-consed object.

    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/HashCons/str-HashConsMap.html b/doc/html/smlnj-lib/HashCons/str-HashConsMap.html new file mode 100644 index 0000000..94c4c78 --- /dev/null +++ b/doc/html/smlnj-lib/HashCons/str-HashConsMap.html @@ -0,0 +1,506 @@ + + + + + + + + + + The HashConsMap structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The HashConsMap structure
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    The HashConsMap structure implements functional, finite maps keyed +by hash-consed objects. A balanced tree structure is used in the +representation.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature HASH_CONS_MAP
    +structure HashConsMap : HASH_CONS_MAP
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    type 'a obj = 'a HashCons.obj
    +
    +type ('a, 'b) map
    +
    +val isEmpty : ('a, 'b) map -> bool
    +
    +val singleton : ('a obj * 'b) -> ('a, 'b) map
    +
    +val insert  : ('a, 'b) map * 'a obj * 'b -> ('a, 'b) map
    +val insert' : (('a obj * 'b) * ('a, 'b) map) -> ('a, 'b) map
    +
    +val insertWith  : (('b * 'b) -> 'b)
    +      -> ('a, 'b) map * 'a obj * 'b -> ('a, 'b) map
    +val insertWithi :  (('a obj * 'b * 'b) -> 'b)
    +      -> ('a, 'b) map * 'a obj * 'b -> ('a, 'b) map
    +
    +val find : ('a, 'b) map * 'a obj -> 'b option
    +
    +val lookup : ('a, 'b) map * 'a obj -> 'b
    +
    +val inDomain : (('a, 'b) map * 'a obj) -> bool
    +
    +val remove : ('a, 'b) map * 'a obj -> ('a, 'b) map * 'b
    +
    +val empty : ('a, 'b) map
    +
    +val numItems : ('a, 'b) map ->  int
    +
    +val listItems  : ('a, 'b) map -> 'b list
    +val listItemsi : ('a, 'b) map -> ('a obj * 'b) list
    +
    +val listKeys : ('a, 'b) map -> 'a obj list
    +
    +val collate : ('b * 'b -> order) -> (('a, 'b) map * ('a, 'b) map) -> order
    +
    +val unionWith  : ('b * 'b -> 'b) -> (('a, 'b) map * ('a, 'b) map)
    +      -> ('a, 'b) map
    +val unionWithi : ('a obj * 'b * 'b -> 'b) -> (('a, 'b) map * ('a, 'b) map)
    +      -> ('a, 'b) map
    +
    +val intersectWith  : ('b * 'c -> 'd) -> (('a, 'b) map * ('a, 'c) map)
    +      -> ('a, 'd) map
    +val intersectWithi : ('a obj * 'b * 'c -> 'd) -> (('a, 'b) map * ('a, 'c) map)
    +      -> ('a, 'd) map
    +
    +val mergeWith : ('b option * 'c option -> 'd option)
    +      -> (('a, 'b) map * ('a, 'c) map) -> ('a, 'd) map
    +val mergeWithi : ('a obj * 'b option * 'c option -> 'd option)
    +      -> (('a, 'b) map * ('a, 'c) map) -> ('a, 'd) map
    +
    +val app  : ('b -> unit) -> ('a, 'b) map -> unit
    +val appi : (('a obj * 'b) -> unit) -> ('a, 'b) map -> unit
    +
    +val map  : ('b -> 'c) -> ('a, 'b) map -> ('a, 'c) map
    +val mapi : ('a obj * 'b -> 'c) -> ('a, 'b) map -> ('a, 'c) map
    +
    +val fold  : ('b * 'c -> 'c) -> 'c -> ('a, 'b) map -> 'c
    +val foldi : ('a obj * 'b * 'c -> 'c) -> 'c -> ('a, 'b) map -> 'c
    +
    +val filter  : ('b -> bool) -> ('a, 'b) map -> ('a, 'b) map
    +val filteri : ('a obj * 'b -> bool) -> ('a, 'b) map -> ('a, 'b) map
    +
    +val mapPartial  : ('b -> 'c option) -> ('a, 'b) map -> ('a, 'c) map
    +val mapPartiali : ('a obj * 'b -> 'c option) -> ('a, 'b) map -> ('a, 'c) map
    +
    +val exists : ('b -> bool) -> ('a, 'b) map -> bool
    +val existsi : ('a obj * 'b -> bool) -> ('a, 'b) map -> bool
    +
    +val all : ('b -> bool) -> ('a, 'b) map -> bool
    +val alli : ('a obj * 'b -> bool) -> ('a, 'b) map -> bool
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +

    In the description of operations below, we write \(\mathbf{dom}(m)\) +for the domain of the map \(m\) (i.e, the set of keys for which +\(m\) is defined), and \(\mathbf{rng}(m)\) for its range +(i.e., the set \(\{ m(k)\;|\;k \in \mathbf{dom}(m) \}\)). It is also +useful to view a map as the set of key-value pairs +\(\{ (k, m(k))\;|\;k \in \mathbf{dom}(m) \}\), which we call the items +of \(m\).

    +
    +
    +
    +
    type 'a obj = 'a HashCons.obj
    +
    +

    Hash-consed objects are the search keys for the finite maps.

    +
    +
    type ('a, 'b) map
    +
    +

    A finite map from 'a obj values to 'b values.

    +
    +
    val empty : ('a, 'b) map
    +
    +

    The empty map.

    +
    +
    val singleton : ('a obj * 'b) -> ('a, 'b) map
    +
    +

    singleton (obj, v) creates the singleton map that maps obj to v.

    +
    +
    val insert : ('a, 'b) map * 'a obj * 'b -> ('a, 'b) map
    +
    +

    insert (m, obj, v) adds the mapping from obj to v to m. +This mapping overrides any previous mapping from obj.

    +
    +
    val insert' : (('a obj * 'b) * ('a, 'b) map) -> ('a, 'b) map
    +
    +

    insert' ((obj, v), map) adds the mapping from obj to v to m. +This mapping overrides any previous mapping from obj.

    +
    +
    val insertWith : (('b * 'b) -> 'b) -> ('a, 'b) map * 'a obj * 'b -> ('a, 'b) map
    +
    +

    insertWith comb (m, obj, v) adds the mapping from obj to value to m, +where value = comb(v', v), if m already contained a mapping from obj +to v'; otherwise, value = v.

    +
    +
    val insertWithi : (('a obj * 'b * 'b) -> 'b) -> ('a, 'b) map * 'a obj * 'b -> ('a, 'b) map
    +
    +

    insertWithi comb (m, obj, v) adds the mapping from obj to value to m, +where value = comb(obj, v', v), if m already contained a mapping from obj +to v'; otherwise, value = v.

    +
    +
    val find : ('a, 'b) map * 'a obj -> 'b option
    +
    +

    find (m, obj) returns SOME v, if m maps obj to v and NONE otherwise.

    +
    +
    val lookup : ('a, 'b) map * 'a obj -> 'b
    +
    +

    lookup (m, obj) returns v, if m maps obj to v; otherwise it +raises the exception NotFound.

    +
    +
    val inDomain : (('a, 'b) map * 'a obj) -> bool
    +
    +

    inDomain (m, obj) returns true if obj is in the domain of m.

    +
    +
    val remove : ('a, 'b) map * 'a obj -> ('a, 'b) map * 'b
    +
    +

    remove (m, obj) returns the pair (m', v), if m maps obj to v +and where m' is m with obj removed from its domain. If obj +is not in the domain of m, then it raises the exception +NotFound.

    +
    +
    val isEmpty : ('a, 'b) map -> bool
    +
    +

    isEmpty m returns true if, and only if, m is empty.

    +
    +
    val numItems : ('a, 'b) map -> int
    +
    +

    numItems m returns the size of m's domain.

    +
    +
    val listItems : ('a, 'b) map -> 'b list
    +
    +

    listItems m returns a list of the values in the range of m. +Note that this list will contain duplicates when multiple keys in +m's domain map to the same value.

    +
    +
    val listKeys : ('a, 'b) map -> 'a obj list
    +
    +

    listKeys m returns a list of the objects in the domain of m.

    +
    +
    val listItemsi : ('a, 'b) map -> ('a obj * 'b) list
    +
    +

    listItemsi m returns a list of (obj, v) pairs, where m maps +obj to v.

    +
    +
    val collate : ('b * 'b -> order) -> (('a, 'b) map * ('a, 'b) map) -> order
    +
    +

    collate cmpV (m1, m2) returns the order of the two maps, where cmpV is +used to compare the values in the domain.

    +
    +
    val unionWith : ('b * 'b -> 'b) -> (('a, 'b) map * ('a, 'b) map) -> ('a, 'b) map
    +
    +

    unionWith comb (m1, m2) returns the union of the two maps, using the function comb +to combine values when there is a collision of keys. More formally, this expression +returns the map

    +
    +
    +\[ \begin{array}{l} + \{ (k, \mathtt{m1}(k)) + \;|\;k \in \mathbf{dom}(\mathtt{m1}) \setminus \mathbf{dom}(\mathtt{m2}) \} + \cup \\ + \{ (k, \mathtt{m2}(k)) + \;|\;k \in \mathbf{dom}(\mathtt{m2}) \setminus \mathbf{dom}(\mathtt{m1}) \} + \cup \\ + \{ (k, \mathtt{comb}(\mathtt{m1}(k), \mathtt{m2}(k)) + \;|\;k \in \mathbf{dom}(\mathtt{m1}) \cap \mathbf{dom}(\mathtt{m2}) \} + \end{array}\] +
    +
    +
    +

    For example, we could implement a multiset of objects by mapping objects to their +multiplicity. Then, the union of two multisets could be defined by

    +
    +
    +
    +
    fun union (ms1, ms2) = unionWith Int.+ (ms1, ms2)
    +
    +
    +
    +
    val unionWithi : ('a obj * 'b * 'b -> 'b) -> (('a, 'b) map * ('a, 'b) map) -> ('a, 'b) map
    +
    +

    unionWithi comb (m1, m2) returns the union of the two maps, using the function comb +to combine values when there is a collision of keys. More formally, this expression +returns the map

    +
    +
    +\[ \begin{array}{l} + \{ (k, \mathtt{m1}(k)) + \;|\;k \in \mathbf{dom}(\mathtt{m1}) \setminus \mathbf{dom}(\mathtt{m2}) \} + \cup \\ + \{ (k, \mathtt{m2}(k)) + \;|\;k \in \mathbf{dom}(\mathtt{m2}) \setminus \mathbf{dom}(\mathtt{m1}) \} + \cup \\ + \{ (k, \mathtt{comb}(k, \mathtt{m1}(k), \mathtt{m2}(k)) + \;|\;k \in \mathbf{dom}(\mathtt{m1}) \cap \mathbf{dom}(\mathtt{m2}) \} + \end{array}\] +
    +
    +
    +
    val intersectWith : ('b * 'c -> 'd) -> (('a, 'b) map * ('a, 'c) map) -> ('a, 'd) map
    +
    +

    intersectWith comb (m1, m2) returns the intersection of the two maps, +where the values in the range are a computed by applying the function +comb to the values from the two maps. More formally, this expression +returns the map

    +
    +
    +\[ \{ (k, \mathtt{comb}(\mathtt{m1}(k), \mathtt{m2}(k)) + \;|\;k \in \mathbf{dom}(\mathtt{m1}) \cap \mathbf{dom}(\mathtt{m2}) \}\] +
    +
    +
    +
    val intersectWithi : ('a obj * 'b * 'c -> 'd) -> (('a, 'b) map * ('a, 'c) map) -> ('a, 'd) map
    +
    +

    intersectWithi comb (m1, m2) returns the intersection of the two maps, +where the values in the range are a computed by applying the function +comb to the kay and the values from the two maps. More formally, this +expression returns the map

    +
    +
    +\[ \{ (k, \mathtt{comb}(k, \mathtt{m1}(k), \mathtt{m2}(k)) + \;|\;k \in \mathbf{dom}(\mathtt{m1}) \cap \mathbf{dom}(\mathtt{m2}) \}\] +
    +
    +
    +
    val mergeWith : ('b option * 'c option -> 'd option) -> (('a, 'b) map * ('a, 'c) map) -> ('a, 'd) map
    +
    +

    mergeWith comb (m1, m2) merges the two maps using the function comb +as a decision procedure for adding elements to the new map. For each object +\(\mathtt{obj} \in \mathbf{dom}(\mathtt{m1}) \cup \mathbf{dom}(\mathtt{m2})\), +we evaluate comb(optV1, optV2), where optV1 is SOME v if +\((\mathtt{obj}, \mathtt{v}) \in \mathtt{m1}\) and is NONE if +latexmath:[\mathtt{obj} \not\in \mathbf{dom}(\mathtt{m1}); likewise for optV2. +If comb(optV1, optV2) returns SOME v', then we add (obj, v') +to the result.

    +
    +

    The mergeWith function is a generalization of the unionWith and +intersectionWith functions.

    +
    +
    +
    val mergeWithi : ('a obj * 'b option * 'c option -> 'd option) -> (('a, 'b) map * ('a, 'c) map) -> ('a, 'd) map
    +
    +

    mergeWithi comb (m1, m2) merges the two maps using the function comb +as a decision procedure for adding elements to the new map. The difference +between this function and mergeWith is that the comb function takes the +key value in addition to the optional values from the range.

    +
    +
    val app : ('b -> unit) -> ('a, 'b) map -> unit
    +
    +

    app f m applies the function f to the values in the range of m.

    +
    +
    val appi : (('a obj * 'b) -> unit) -> ('a, 'b) map -> unit
    +
    +

    appi f map applies the function f to the key-value pairs that +define m.

    +
    +
    val map : ('b -> 'c) -> ('a, 'b) map -> ('a, 'c) map
    +
    +

    map f m creates a new finite map m' by applying the function f to the +values in the range of m. Thus, if +\((\mathtt{obj}, \mathtt{v}) \in \mathtt{m}\), then +(obj, f v) will be in m'.

    +
    +
    val mapi : ('a obj * 'b -> 'c) -> ('a, 'b) map -> ('a, 'c) map
    +
    +

    mapi f m creates a new finite map m' by applying the function f to the +key-value pairs of m. Thus, if +\((\mathtt{obj}, \mathtt{v}) \in \mathtt{m}\), then +(obj, f(obj, v)) will be in m'.

    +
    +
    val fold : ('b * 'c -> 'c) -> 'c -> ('a, 'b) map -> 'c
    +
    +

    fold f init m folds the function f over the range of +m using init as the initial value.

    +
    +
    val foldi : ('a obj * 'b * 'c -> 'c) -> 'c -> ('a, 'b) map -> 'c
    +
    +

    foldi f init m folds the function f over the key-value pairs in +m using init as the initial value.

    +
    +
    val filter : ('b -> bool) -> ('a, 'b) map -> ('a, 'b) map
    +
    +

    filter pred m filters out those items (obj, v) from m, such that +pred v returns false. More formally, this expression returns the map +\(\{ (\mathtt{obj}, \mathtt{v})\;|\;\mathtt{obj} \in \mathbf{dom}(\mathtt{m}) +\wedge \mathtt{pred}(\mathtt{v}) \}\).

    +
    +
    val filteri : ('a obj * 'b -> bool) -> ('a, 'b) map -> ('a, 'b) map
    +
    +

    filteri pred m filters out those items (obj, v) from m, such that +pred(obj, v) returns false. More formally, this expression returns the map +\(\{ (\mathtt{obj}, \mathtt{v})\;|\;\mathtt{obj} \in \mathbf{dom}(\mathtt{m}) +\wedge \mathtt{pred}(\mathtt{obj}, \mathtt{v}) \}\).

    +
    +
    val mapPartial : ('b -> 'c option) -> ('a, 'b) map -> ('a, 'c) map
    +
    +

    mapPartial f m maps the partial function f over the items of m. +More formally, this expression returns the map

    +
    +
    +
    +
    +
    +\[ \{ (k, v') \;|\; (k, v) \in \mathtt{m} \wedge \mathtt{f}(v) = \mathtt{SOME}(v') \}\] +
    +
    +
    +
    +
    val mapPartiali : ('a obj * 'b -> 'c option) -> ('a, 'b) map -> ('a, 'c) map
    +
    +

    mapPartiali f m maps the partial function f over the items of m. +More formally, this expression returns the map

    +
    +
    +
    +
    +
    +\[ \{ (k, v') \;|\; (k, v) \in \mathtt{m} \wedge \mathtt{f}(k, v) = \mathtt{SOME}(v') \}\] +
    +
    +
    +
    +
    val exists : ('b -> bool) -> ('a, 'b) map -> bool
    +
    +

    exists pred m returns true if, and only if, there exists an item +\((\mathtt{obj}, \mathtt{v}) \in \mathtt{m}\), +such that pred v returns true.

    +
    +
    val existsi : ('a obj * 'b -> bool) -> ('a, 'b) map -> bool
    +
    +

    exists pred m returns true if, and only if, there exists an item +\((\mathtt{obj}, \mathtt{v}) \in \mathtt{m}\), such that +pred(obj, v) returns true.

    +
    +
    val all : ('b -> bool) -> ('a, 'b) map -> bool
    +
    +

    all pred m returns true if, and only if, pred v returns true +for all items \((\mathtt{obj}, \mathtt{v}) \in \mathtt{m}\).

    +
    +
    val alli : ('a obj * 'b -> bool) -> ('a, 'b) map -> bool
    +
    +

    all pred m returns true if, and only if, pred(obj, v) returns true +for all items \((\mathtt{obj}, \mathtt{v}) \in \mathtt{m}\).

    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/HashCons/str-HashConsSet.html b/doc/html/smlnj-lib/HashCons/str-HashConsSet.html new file mode 100644 index 0000000..8832049 --- /dev/null +++ b/doc/html/smlnj-lib/HashCons/str-HashConsSet.html @@ -0,0 +1,416 @@ + + + + + + + + + + The HashConsSet structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The HashConsSet structure
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    The HashConsSet structure implements finite sets of hash-consed objects.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature HASH_CONS_SET
    +structure HashConsSet : HASH_CONS_SET
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    type 'a obj = 'a HashCons.obj
    +
    +type 'a set
    +
    +val empty : 'a set
    +
    +val singleton : 'a obj -> 'a set
    +
    +val fromList : 'a obj list -> 'a set
    +
    +val add  : 'a set * 'a obj -> 'a set
    +val add' : ('a obj * 'a set) -> 'a set
    +
    +val addList : 'a set * 'a obj list -> 'a set
    +
    +val subtract  : 'a set * 'a obj -> 'a set
    +val subtract' : ('a obj * 'a set) -> 'a set
    +
    +val subtractList : 'a set * 'a obj list -> 'a set
    +
    +val delete : 'a set * 'a obj -> 'a set
    +
    +val member : 'a set * 'a obj -> bool
    +
    +val isEmpty : 'a set -> bool
    +
    +val equal : ('a set * 'a set) -> bool
    +
    +val compare : ('a set * 'a set) -> order
    +
    +val isSubset : ('a set * 'a set) -> bool
    +
    +val disjoint : 'a set * 'a set -> bool
    +
    +val numItems : 'a set ->  int
    +
    +val toList : 'a set -> 'a obj list
    +val listItems : 'a set -> 'a obj list
    +
    +val union : 'a set * 'a set -> 'a set
    +val intersection : 'a set * 'a set -> 'a set
    +val difference : 'a set * 'a set -> 'a set
    +
    +val map : ('a obj -> 'b obj) -> 'a set -> 'b set
    +
    +val mapPartial : ('a obj -> 'b obj option) -> 'a set -> 'b set
    +
    +val app : ('a obj -> unit) -> 'a set -> unit
    +
    +val foldl : ('a obj * 'b -> 'b) -> 'b -> 'a set -> 'b
    +val foldr : ('a obj * 'b -> 'b) -> 'b -> 'a set -> 'b
    +
    +val partition : ('a obj -> bool) -> 'a set -> ('a set * 'a set)
    +
    +val filter : ('a obj -> bool) -> 'a set -> 'a set
    +
    +val all : ('a obj -> bool) -> 'a set -> bool
    +val exists : ('a obj -> bool) -> 'a set -> bool
    +
    +val find : ('a obj -> bool) -> 'a set -> 'a obj option
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    type 'a obj = 'a HashCons.obj
    +
    +

    The elements in the set are hash-cons objects.

    +
    +
    type 'a set
    +
    +

    A finite set of 'a obj values.

    +
    +
    val empty : 'a set
    +
    +

    The empty set.

    +
    +
    val singleton : 'a obj -> 'a set
    +
    +

    singleton obj creates a singleton set containing obj.

    +
    +
    val fromList : 'a obj list -> 'a set
    +
    +

    fromList objs creates a set from the list of objects.

    +
    +
    val add : 'a set * 'a obj -> 'a set
    +
    +

    add (set, obj) adds the object to the set.

    +
    +
    val add' : ('a obj * 'a set) -> 'a set
    +
    +

    add' (obj, set) adds the object to the set.

    +
    +
    val addList : 'a set * 'a obj list -> 'a set
    +
    +

    addList (set, objs) adds the list of objects to the set.

    +
    +
    val subtract : 'a set * 'a obj -> 'a set
    +
    +

    subtract (set, obj) removes the object obj from set. +Acts as the identity if obj is not in the set.

    +
    +
    val subtract' : ('a obj * 'a set) -> 'a set
    +
    +

    subtract (obj, set) removes the object obj from set. +Acts as the identity if obj is not in the set.

    +
    +
    val delete : 'a set * 'a obj -> 'a set
    +
    +

    delete (set, obj) removes the object obj from set. +Unlike subtract, this function raises the +NotFound +exception if obj is not in the set.

    +
    +
    val member : 'a set * 'a obj -> bool
    +
    +

    member (obj, set) returns true if, and only if, obj +is an element of set.

    +
    +
    val isEmpty : 'a set -> bool
    +
    +

    isEmpty set returns true if, and only if, set is empty.

    +
    +
    val equal : ('a set * 'a set) -> bool
    +
    +

    equal (set1, set2) returns true if, and only if, the two +sets are equal (i.e., they contain the same elements).

    +
    +
    val compare : ('a set * 'a set) -> order
    +
    +

    compare (set1, set2) returns the lexical order of +the two sets.

    +
    +
    val isSubset : ('a set * 'a set) -> bool
    +
    +

    isSubset (set1, set2) returns true if, and only if, set1 +is a subset of set2 (i.e., any element of set1 is an +element of set2).

    +
    +
    val disjoint : 'a set * 'a set -> bool
    +
    +

    equal (set1, set2) returns true if, and only if, the two +sets are disjoint (i.e., their intersection is empty).

    +
    +
    val numItems : 'a set -> int
    +
    +

    numItems set returns the number of items in the set.

    +
    +
    +
    +
    +
    +
    val toList : 'a set -> 'a obj list
    +
    +

    toList set returns a list of the objects in set.

    +
    +
    val union : 'a set * 'a set -> 'a set
    +
    +

    union (set1, set2) returns the union of the two sets.

    +
    +
    val intersection : 'a set * 'a set -> 'a set
    +
    +

    intersection (set1, set2) returns the intersection of the two sets.

    +
    +
    val difference : 'a set * 'a set -> 'a set
    +
    +

    difference (set1, set2) returns the difference of the two sets; +i.e., the set of objects that are in set1, but not in +set2.

    +
    +
    val map : ('a obj -> 'b obj) -> 'a set -> 'b set
    +
    +

    map f set constructs a new set from the result of applying the +function f to the elements of set. This expression is +equivalent to

    +
    +
    +
    fromList (List.map f (toList set))
    +
    +
    +
    +
    val mapPartial : ('a obj -> 'b obj option) -> 'a set -> 'b set
    +
    +

    mapPartial f set constructs a new set from the result of applying the +function f to the elements of set. This expression is +equivalent to

    +
    +
    +
    fromList (List.mapPartial f (toList set))
    +
    +
    +
    +
    val app : ('a obj -> unit) -> 'a set -> unit
    +
    +

    app f set applies the function f to the objects in set. +This expression is equivalent to

    +
    +
    +
    List.app f (toList set)
    +
    +
    +
    +
    +
    +
    +
    +
    val fold : ('a obj * 'b -> 'b) -> 'b -> 'a set -> 'b
    +
    +

    fold f init set folds the function f over the objects in +set using init as the initial value. +This expression is equivalent to

    +
    +
    +
    List.foldl f init (toList set)
    +
    +
    +
    +

    Although the order in which the elements are processed is unspecified.

    +
    +
    +
    val partition : ('a obj -> bool) -> 'a set -> ('a set * 'a set)
    +
    +

    partition pred set returns a pair of disjoint sets (tSet, fSet), where + the predicate pred returns true for every element of tSet, +false for every element of fSet, and set is the union of tSet + and fSet.

    +
    +
    val filter : ('a obj -> bool) -> 'a set -> 'a set
    +
    +

    filter pred set filters out any elements of set for which the +predicate pred returns false. +This expression is equivalent to

    +
    +
    +
    #1 (partition pred set)
    +
    +
    +
    +
    val all : ('a obj -> bool) -> 'a set -> bool
    +
    +

    all pred set returns true if, and only if, pred obj returns +true for all elements obj in set.

    +
    +
    val exists : ('a obj -> bool) -> 'a set -> bool
    +
    +

    exists pred set returns true if, and only if, there exists an +element obj in set such that pred obj returns true.

    +
    +
    val find : ('a obj -> bool) -> 'a set -> 'a obj option
    +
    +

    find pred set returns SOME obj if there exists an object obj +in the set for which pred obj returns true; otherwise NONE is returned.

    +
    +
    +
    +
    +

    Deprecated functions

    +
    +

    The following functions are part of the interface, but have been +deprecated.

    +
    +
    +
    +
    val listItems : 'a set -> 'a obj list
    +
    +

    Use toList instead.

    +
    +
    val foldl : ('a obj * 'b -> 'b) -> 'b -> 'a set -> 'b
    +
    +

    Use fold instead.

    +
    +
    val foldr : ('a obj * 'b -> 'b) -> 'b -> 'a set -> 'b
    +
    +

    Use fold instead.

    +
    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/HashCons/str-HashConsString.html b/doc/html/smlnj-lib/HashCons/str-HashConsString.html new file mode 100644 index 0000000..64db773 --- /dev/null +++ b/doc/html/smlnj-lib/HashCons/str-HashConsString.html @@ -0,0 +1,162 @@ + + + + + + + + + + The HashConsString structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The HashConsString structure
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    The HashConsString structure embeds the string +type as a hash-consed object. +It is implemented using the HashConsGroundFn +functor.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    structure HashConsString
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    type hash_key = string
    +type obj = hash_key HashCons.obj
    +
    +val mk : hash_key -> obj
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    type hash_key = string
    +
    +

    The ground type being hashed.

    +
    +
    type obj = hash_key HashCons.obj
    +
    +

    The type of hash-consed strings.

    +
    +
    val mk : hash_key -> obj
    +
    +

    mk s converts the string s to a hash-consed object.

    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/HashCons/str-HashConsWord.html b/doc/html/smlnj-lib/HashCons/str-HashConsWord.html new file mode 100644 index 0000000..d741b40 --- /dev/null +++ b/doc/html/smlnj-lib/HashCons/str-HashConsWord.html @@ -0,0 +1,161 @@ + + + + + + + + + + The HashConsWord structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The HashConsWord structure
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    The HashConsWord structure embeds the word type as a hash-consed object. +It is implemented by directly using the value as the tag and hash key +(i.e., no hash table).

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    structure HashConsWord
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    type hash_key = word
    +type obj = hash_key HashCons.obj
    +
    +val mk : hash_key -> obj
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    type hash_key = word
    +
    +

    The ground type being hashed.

    +
    +
    type obj = hash_key HashCons.obj
    +
    +

    The type of hash-consed words.

    +
    +
    val mk : hash_key -> obj
    +
    +

    mk w converts the word w to a hash-consed object.

    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/INet/inet-lib.html b/doc/html/smlnj-lib/INet/inet-lib.html new file mode 100644 index 0000000..85e45a2 --- /dev/null +++ b/doc/html/smlnj-lib/INet/inet-lib.html @@ -0,0 +1,130 @@ + + + + + + + + + + The INet Library + + + + + + + + +
    +
    +
    +
    + +
    + +
    The INet Library
    +
    +
    +
    + +
    +
    +
    +

    Overview

    +
    +
    +

    The INet Library provides utilities for network programming with sockets.

    +
    +
    +
    +
    +

    Contents

    +
    +
    +
    +
    structure SockUtil
    +
    +

    Various utility functions for programming with sockets.

    +
    +
    structure UnixSockUtil
    +
    +

    Various utility functions for programming with Unix-domain sockets.

    +
    +
    +
    +
    +
    +
    +

    Usage

    +
    +
    +

    For SML/NJ, include $/inet-lib.cm in your +CM file.

    +
    +
    +

    For use in MLton, include +$(SML_LIB)/smlnj-lib/INet/inet-lib.mlb in your MLB file.

    +
    +
    +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/INet/str-SockUtil.html b/doc/html/smlnj-lib/INet/str-SockUtil.html new file mode 100644 index 0000000..5aff487 --- /dev/null +++ b/doc/html/smlnj-lib/INet/str-SockUtil.html @@ -0,0 +1,244 @@ + + + + + + + + + + The SockUtil structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The SockUtil structure
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    The SockUtil structure provides a collection of utility functions for +programming with the Basis Library +Socket structure

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature SOCK_UTIL
    +structure SockUtil : SOCK_UTIL
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    datatype port = PortNumber of int | ServName of string
    +
    +datatype hostname = HostName of string | HostAddr of NetHostDB.in_addr
    +
    +val scanAddr : (char, 'a) StringCvt.reader
    +      -> ({host : hostname, port : port option}, 'a) StringCvt.reader
    +val addrFromString : string -> {host : hostname, port : port option} option
    +
    +exception BadAddr of string
    +
    +val resolveAddr : {host : hostname, port : port option}
    +      -> {host : string, addr : NetHostDB.in_addr, port : int option}
    +
    +type 'a stream_sock = ('a, Socket.active Socket.stream) Socket.sock
    +
    +val connectINetStrm : {addr : NetHostDB.in_addr, port : int}
    +      -> INetSock.inet stream_sock
    +
    +val recvVec : ('a stream_sock * int) -> Word8Vector.vector
    +val recvStr : ('a stream_sock * int) -> string
    +
    +val sendVec : ('a stream_sock * Word8Vector.vector) -> unit
    +val sendStr : ('a stream_sock * string) -> unit
    +val sendArr : ('a stream_sock * Word8Array.array) -> unit
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    datatype port = PortNumber of int | ServName of string
    +
    +

    specifies a port identifier, which either be a number (Port) +or the name of a service (ServName).

    +
    +
    datatype hostname = HostName of string | HostAddr of NetHostDB.in_addr
    +
    +

    something

    +
    +
    +
    +
    +
    +
    val scanAddr : (char, 'a) StringCvt.reader -> ({host : hostname, port : port option}, 'a) StringCvt.reader
    +
    +

    scanAddr getc returns an address reader. An address is a string of +the form \(\mathit{addr}\,[\,\mathtt{:}\,\mathit{port}\,\)], +where \(\mathit{addr}\) may either be a numeric or symbolic host +name and the optional port is either a decimal port number or alphanumeric service +name. Legal host names must begin with a letter, and may contain any alphanumeric +character, the minus sign (-) and period (.), where the period is used as a +domain separator.

    +
    +
    val addrFromString : string -> {host : hostname, port : port option} option
    +
    +

    addrFromString addr converts the string addr to a host-port address specifier. +The syntax of addresses is as described for scanAddr.

    +
    +
    +
    +
    +
    +
    exception BadAddr of string
    +
    +

    This exception is raised by resolveAddr.

    +
    +
    +
    +
    +
    +
    val resolveAddr : {host : hostname, port : port option} -> {host : string, addr : NetHostDB.in_addr, port : int option}
    +
    +

    resolveAddr {host, port} resolves the hostname and optional port in the +host and service databases. If either the host or service name is not +found, then the BadAddr exception is raised.

    +
    +
    type 'a stream_sock = ('a, Socket.active Socket.stream) Socket.sock
    +
    +

    A type abbreviation for active stream sockets.

    +
    +
    val connectINetStrm : {addr : NetHostDB.in_addr, port : int} -> INetSock.inet stream_sock
    +
    +

    connectINetStrm {addr, port} establishs a client-side connection to an +INET domain stream socket.

    +
    +
    val recvVec : ('a stream_sock * int) -> Word8Vector.vector
    +
    +

    recvVec (sock, n) reads n bytes from the stream socket sock; fewer than +n bytes is returned when the stream is closed at the other end of the connection. +It raises the Size exception +when n is negative.

    +
    +
    val recvStr : ('a stream_sock * int) -> string
    +
    +

    recvStr (sock, n) reads n characters from the stream socket sock; fewer than +n characters is returned when the stream is closed at the other end of the connection. +It raises the Size exception +when n is negative.

    +
    +
    val sendVec : ('a stream_sock * Word8Vector.vector) -> unit
    +
    +

    sendVec (sock, vec) sends the vector vec on the stream socket sock.

    +
    +
    val sendStr : ('a stream_sock * string) -> unit
    +
    +

    sendStr (sock, s) sends the string s on the stream socket sock.

    +
    +
    val sendArr : ('a stream_sock * Word8Array.array) -> unit
    +
    +

    sendArr (sock, arr) sends the array arr on the stream socket sock.

    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/INet/str-UnixSockUtil.html b/doc/html/smlnj-lib/INet/str-UnixSockUtil.html new file mode 100644 index 0000000..a15427d --- /dev/null +++ b/doc/html/smlnj-lib/INet/str-UnixSockUtil.html @@ -0,0 +1,152 @@ + + + + + + + + + + The UnixSockUtil structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The UnixSockUtil structure
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    The UnixSockUtil structure extends the SockUtil +structure with an additional operation for making Unix-domain connections.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature UNIX_SOCK_UTIL
    +structure UnixSockUtil : UNIX_SOCK_UTIL
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    include SOCK_UTIL
    +
    +val connectUnixStrm : string -> UnixSock.unix stream_sock
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    include SOCK_UTIL
    +
    +

    The UnixSockUtil structure includes the operations from the +SockUtil structure.

    +
    +
    val connectUnixStrm : string -> UnixSock.unix stream_sock
    +
    +

    connectUnixStrm path establishs a client-side connection to the + Unix-domain stream socket specified by path.

    +
    +
    +
    +
    +
    + +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/JSON/json-lib.html b/doc/html/smlnj-lib/JSON/json-lib.html new file mode 100644 index 0000000..72280ec --- /dev/null +++ b/doc/html/smlnj-lib/JSON/json-lib.html @@ -0,0 +1,179 @@ + + + + + + + + + + The JSON Library + + + + + + + + +
    +
    +
    +
    + +
    + +
    The JSON Library
    +
    +
    +
    + +
    +
    +
    +

    Overview

    +
    +
    +

    The JSON Library provides a representation of the +JavaScript Object Notation (JSON) +with support for parsing, printing, and manipulating JSON +values as trees. There is also support for processing JSON data as +streams using a "SAX-style" API.

    +
    +
    +
    +
    +

    Contents

    +
    +
    +

    Tree-based API

    +
    +
    +
    structure JSON
    +
    +

    Defines the representation of JSON values as an SML datatype.

    +
    +
    structure JSONParser
    +
    +

    A parser for JSON input.

    +
    +
    structure JSONPrinter
    +
    +

    A printer for JSON output.

    +
    +
    structure JSONUtil
    +
    +

    A collection of utility functions for working with JSON values.

    +
    +
    structure JSONDecode
    +
    +

    A collection of combinators for decoding JSON values.

    +
    +
    +
    +
    +
    +

    Stream-based API

    +
    +
    +
    structure JSONStreamParser
    +
    +

    A stream (or event) based parser for JSON input.

    +
    +
    signature JSON_STREAM_OUTPUT
    +
    +

    The interface to a stream-based printer for JSON output.

    +
    +
    structure JSONBufferPrinter
    +
    +

    A stream-based printer for JSON output to character buffers.

    +
    +
    structure JSONStreamPrinter
    +
    +

    A stream-based printer for JSON output to text files.

    +
    +
    +
    +
    +
    +
    +
    +

    Usage

    +
    +
    +

    For SML/NJ, include $/json-lib.cm in your +CM file.

    +
    +
    +

    For use in MLton, include +$(SML_LIB)/smlnj-lib/JSON/json-lib.mlb in your MLB file.

    +
    +
    +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/JSON/sig-JSON_STREAM_OUTPUT.html b/doc/html/smlnj-lib/JSON/sig-JSON_STREAM_OUTPUT.html new file mode 100644 index 0000000..b1232c9 --- /dev/null +++ b/doc/html/smlnj-lib/JSON/sig-JSON_STREAM_OUTPUT.html @@ -0,0 +1,292 @@ + + + + + + + + + + The JSON_STREAM_OUTPUT signature + + + + + + + + +
    +
    +
    +
    + +
    + +
    The JSON_STREAM_OUTPUT signature
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    The JSON_STREAM_OUTPUT signature defines an interface for +stream-output of JSON values.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature JSON_STREAM_OUTPUT
    +
    +structure JSONBufferPrinter : JSON_STREAM_OUTPUT
    +    where type outstream = CharBuffer.buf
    +structure JSONStreamPrinter : JSON_STREAM_OUTPUT
    +    where type outstream = outstream
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    type outstream
    +
    +type printer
    +
    +val new : outstream -> printer
    +val new' : {strm : outstream, pretty : bool} -> printer
    +
    +val close : printer -> unit
    +
    +val null : printer -> unit
    +val boolean : printer * bool -> unit
    +val integer : printer * IntInf.int -> unit
    +val int : printer * int -> unit
    +val float : printer * real -> unit
    +val string : printer * string -> unit
    +val beginObject : printer -> unit
    +val objectKey : printer * string -> unit
    +val endObject : printer -> unit
    +val beginArray : printer -> unit
    +val endArray : printer -> unit
    +
    +val value : printer * JSON.value -> unit
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    type outstream
    +
    +

    The type of the text consumer that the printer is layered on top of.

    +
    +
    type printer
    +
    +

    The printer type tracks the current state of the output so that it +can correctly add punctuation and white space (when pretty printing).

    +
    +
    val new : outstream -> printer
    +
    +

    new outS creates a new printer from the output stream outS. +The printer produces a condensed format without newlines or +indentation; use the new' function to create a pretty-printer +for JSON output.

    +
    +
    val new' : {strm : outstream, pretty : bool} -> printer
    +
    +

    new' {strm, pretty} creates a new pretty-printing stream from the output +stream strm, where the value of the pretty field controls whether +the output is condensed (when pretty is false) or printed with +new lines and indentation to improve readability (when pretty is true).

    +
    +
    val close : printer -> unit
    +
    +

    close pr closes the printer, but not the underlying output stream. +Closing the printer while there is an open object or array results in +the Fail +exception being raised. Also, calling any of the below printing +functions on a closed printer will result in the +Fail +exception being raised.

    +
    +
    val null : printer -> unit
    +
    +

    null pr prints the JSON null value. Raises the +Fail +exception if the printer is closed.

    +
    +
    val boolean : printer * bool -> unit
    +
    +

    boolean (pr, b) prints the JSON boolean value b. Raises the +Fail +exception if the printer is closed.

    +
    +
    val integer : printer * IntInf.int -> unit
    +
    +

    integer (pr, n) prints the JSON number n. Raises the +Fail +exception if the printer is closed.

    +
    +
    val int : printer * int -> unit
    +
    +

    int (pr, n) prints the JSON number n. Raises the +Fail +exception if the printer is closed. This function is a convenience +for when one wants to print a default-int-type value without +converting it to a IntInf.int first.

    +
    +
    val float : printer * real -> unit
    +
    +

    float (pr, r) prints the JSON floating-point number r. +Raises the Fail +exception if the printer is closed.

    +
    +
    val string : printer * string -> unit
    +
    +

    string (pr, s) prints the JSON string s. Raises the +Fail +exception if the printer is closed.

    +
    +
    val beginObject : printer -> unit
    +
    +

    beginArray pr prints the opening “{” for a JSON object. +Note that each call to beginObject should be matched by a call +to endObject. Raises the +Fail +exception if the printer is closed.

    +
    +
    val objectKey : printer * string -> unit
    +
    +

    objectKey (pr, key) prints the JSON key-value key followed +by a “:”. This function should be inside matched +beginObject/endObject calls and should be followed by +the printing of a JSON value. Raises the +Fail +exception if the printer is closed.

    +
    +
    val endObject : printer -> unit
    +
    +

    endObject pr prints the closing } for the currently open object. +The Fail +exception is raised if the current context is not an open object, +if a key has been printed without an associated value, or +if the printer is closed.

    +
    +
    val beginArray : printer -> unit
    +
    +

    beginArray pr prints the opening “[” for a JSON array. +Note that each call to beginArray should be matched by a call +to endArray. Raises the +Fail +exception if the printer is closed.

    +
    +
    val endArray : printer -> unit
    +
    +

    endArray pr prints the closing ] for the currently open array. +The Fail +exception is raised if the current context is not an open array +or if the printer is closed.

    +
    +
    val value : printer * JSON.value -> unit
    +
    +

    value (pr, v) embeds the JSON value v in the output. It is +equivalent to recursively traversing the JSON value while calling +the appropriate output functions from above.

    +
    +
    +
    +
    +
    + +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/JSON/str-JSON.html b/doc/html/smlnj-lib/JSON/str-JSON.html new file mode 100644 index 0000000..d16190b --- /dev/null +++ b/doc/html/smlnj-lib/JSON/str-JSON.html @@ -0,0 +1,220 @@ + + + + + + + + + + The JSON structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The JSON structure
    +
    +
    +
    + +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    structure JSON
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    datatype value
    +  = OBJECT of (string * value) list
    +  | ARRAY of value list
    +  | NULL
    +  | BOOL of bool
    +  | INT of IntInf.int
    +  | INTLIT of string
    +  | FLOAT of real
    +  | STRING of string
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    datatype value = …​
    +
    +

    +This datatype represents JSON values as trees. The constructors +are

    +
    +
    +
    +
    +
    OBJECT of (string * value) list
    +
    +

    represents a JSON object value; i.e., a list of key-value pairs. +Note that the keys should be unique.

    +
    +
    ARRAY of value list
    +
    +

    represents a JSON array value.

    +
    +
    NULL
    +
    +

    represents the JSON value "null".

    +
    +
    BOOL of bool
    +
    +

    represents the JSON values "true" and "false".

    +
    +
    INT of IntInf.int
    +
    +

    represents JSON integer numbers.

    +
    +
    INTLIT of string
    +
    +

    represents JSON integer numbers that have a large number of digits.

    +
    +
    FLOAT of real
    +
    +

    represents JSON floating-point numbers.

    +
    +
    STRING of string
    +
    +

    represents JSON strings, which are assumed to be UTF-8 encoded.

    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +

    Example

    +
    +
    +

    The JSON value

    +
    +
    +
    +
    { "a" : 23,
    +  "b" : [ false, true ],
    +  "c" : "hello world"
    +}
    +
    +
    +
    +

    has the following representation using the value datatype:

    +
    +
    +
    +
    OBJECT[
    +  ("a", INT 23),
    +  ("b", ARRAY[BOOL false, BOOL true]),
    +  ("c", STRING "hello world")
    +]
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/JSON/str-JSONBufferPrinter.html b/doc/html/smlnj-lib/JSON/str-JSONBufferPrinter.html new file mode 100644 index 0000000..5ecfc13 --- /dev/null +++ b/doc/html/smlnj-lib/JSON/str-JSONBufferPrinter.html @@ -0,0 +1,131 @@ + + + + + + + + + + The JSONBufferPrinter structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The JSONBufferPrinter structure
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    The JSONBufferPrinter structure provides an imperative +printer for producing JSON output to a + +character buffer.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    structure JSONBufferPrinter : JSON_STREAM_OUTPUT
    +    where type outstream = CharBuffer.buf
    +
    +
    +
    +
    + +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/JSON/str-JSONDecode.html b/doc/html/smlnj-lib/JSON/str-JSONDecode.html new file mode 100644 index 0000000..5f6d252 --- /dev/null +++ b/doc/html/smlnj-lib/JSON/str-JSONDecode.html @@ -0,0 +1,502 @@ + + + + + + + + + + The JSONDecode structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The JSONDecode structure
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    The JSONDecode structure implements combinators for decoding JSON values. +The design is based on +Elm's JSON.Decode module.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature JSON_DECODE
    +structure JSONDecode : JSON_DECODE
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    exception Failure of string * JSON.value
    +
    +exception NotBool of JSON.value
    +exception NotInt of JSON.value
    +exception NotNumber of JSON.value
    +exception NotString of JSON.value
    +
    +exception NotObject of JSON.value
    +exception FieldNotFound of JSON.value * string
    +
    +exception NotArray of JSON.value
    +exception ArrayBounds of JSON.value * int
    +
    +val exnMessage : exn -> string
    +
    +type 'a decoder
    +
    +val decode : 'a decoder -> JSON.value -> 'a
    +val decodeString : 'a decoder -> string -> 'a
    +val decodeFile : 'a decoder -> string -> 'a
    +
    +val bool : bool decoder
    +val int : int decoder
    +val intInf : IntInf.int decoder
    +val number : Real64.real decoder
    +val string : string decoder
    +val null : 'a -> 'a decoder
    +
    +val raw : JSON.value decoder
    +
    +val nullable : 'a decoder -> 'a option decoder
    +
    +val try : 'a decoder -> 'a option decoder
    +
    +val seq : 'a decoder -> ('a -> 'b) decoder -> 'b decoder
    +
    +val field : string -> 'a decoder -> 'a decoder
    +
    +val reqField : string -> 'a decoder -> ('a -> 'b) decoder -> 'b decoder
    +
    +val optField : string -> 'a decoder -> ('a option -> 'b) decoder -> 'b decoder
    +
    +val dfltField : string -> 'a decoder -> 'a -> ('a -> 'b) decoder -> 'b decoder
    +
    +val array : 'a decoder -> 'a list decoder
    +val sub : int -> 'a decoder -> 'a decoder
    +
    +val at : JSONUtil.path -> 'a decoder -> 'a decoder
    +
    +val succeed : 'a -> 'a decoder
    +
    +val fail : string -> 'a decoder
    +
    +val andThen : ('a -> 'b decoder) -> 'a decoder -> 'b decoder
    +
    +val orElse : 'a decoder * 'a decoder -> 'a decoder
    +
    +val choose : 'a decoder list -> 'a decoder
    +
    +val map : ('a -> 'b) -> 'a decoder -> 'b decoder
    +val map2 : ('a * 'b -> 'res)
    +  -> ('a decoder * 'b decoder)
    +  -> 'res decoder
    +val map3 : ('a * 'b * 'c -> 'res)
    +  -> ('a decoder * 'b decoder * 'c decoder)
    +  -> 'res decoder
    +val map4 : ('a * 'b * 'c * 'd -> 'res)
    +  -> ('a decoder * 'b decoder * 'c decoder * 'd decoder)
    +  -> 'res decoder
    +
    +val tuple2 : ('a decoder * 'b decoder) -> ('a * 'b) decoder
    +val tuple3 : ('a decoder * 'b decoder * 'c decoder) -> ('a * 'b * 'c) decoder
    +val tuple4 : ('a decoder * 'b decoder * 'c decoder * 'd decoder)
    +  -> ('a * 'b * 'c * 'd) decoder
    +
    +val delay : (unit -> 'a decoder) -> 'a decoder
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    exception Failure of string * JSON.value
    +
    +

    +raised by the fail decoder.

    +
    +
    exception NotNull of JSON.value
    +
    +

    +raised by the null decoder when the argument +is not the JSON null value.

    +
    +
    exception NotBool of JSON.value
    +
    +

    +raised by the bool decode when the argument +is not a JSON boolean. +This exception is the same as +JSONUtil.NotBool.

    +
    +
    exception NotInt of JSON.value
    +
    +

    +raised by the int and intInf +decoders when the argument is not a JSON integer number. +This exception is the same as +JSONUtil.NotInt.

    +
    +
    exception NotNumber of JSON.value
    +
    +

    +raised by the number decoder when the argument +is not a JSON number. +This exception is the same as +JSONUtil.NotNumber.

    +
    +
    exception NotString of JSON.value
    +
    +

    +raised by the string decoder when the argument +is not a JSON string. +This exception is the same as +JSONUtil.NotString.

    +
    +
    exception NotObject of JSON.value
    +
    +

    +raised by the field decoder when the +argument is not a JSON object. +This exception is the same as +JSONUtil.NotObject.

    +
    +
    exception FieldNotFound of JSON.value * string
    +
    +

    +This exception is raised by the field decoder +when the given field is not found in an object. +This exception is the same as +JSONUtil.FieldNotFound.

    +
    +
    exception NotArray of JSON.value
    +
    +

    +This exception is raised by the array decoder +when the argument is not a JSON array. +This exception is the same as +JSONUtil.NotArray.

    +
    +
    exception ArrayBounds of JSON.value * int
    +
    +

    +This exception is raised when access to an array value is out of bounds. +This exception is the same as +JSONUtil.ArrayBounds.

    +
    +
    val exnMessage : exn -> string
    +
    +

    exnMessage exn returns an error-message string for the exception value +exn. This function produces specialized messages for the exceptions defined +in the JSONDecode structure and falls back to the +General.exnMessage +function for other exceptions.

    +
    +
    type 'a decoder'
    +
    +

    the type of a decoder that decodes a JSON value to a value of type 'a.

    +
    +
    val decode : 'a decoder -> JSON.value -> 'a
    +
    +

    decode d jv decodes the JSON value jv using the decoder d. +Failure to decode will be signaled by raising an exception that depends on the +decoder and value.

    +
    +
    val decodeString : 'a decoder -> string -> 'a
    +
    +

    decode d s decodes the JSON value that results from parsing the string s.

    +
    +
    val decodeFile : 'a decoder -> string -> 'a
    +
    +

    decode d f decodes the JSON value that results from parsing the file f.

    +
    +
    val bool : bool decoder
    +
    +

    decodes a JSON Boolean value. This decoder raises the +NotBool exception if the value is not a JSON Boolean.

    +
    +
    val int : int decoder
    +
    +

    decodes a JSON integer value. This decoder raises the +NotInt exception if the value is not a JSON integer +and the Overflow +exception if the integer is too large to be represented as an Int.int.

    +
    +
    val intInf : IntInf.int decoder
    +
    +

    decodes a JSON integer value. This decoder raises the +NotInt exception if the value is not a JSON integer.

    +
    +
    val number : Real64.real decoder
    +
    +

    decodes a JSON number value. This decoder raises the +NotNumber exception if the value is not a JSON number.

    +
    +
    val string : string decoder
    +
    +

    decodes a JSON string value. This decoder raises the +NotString exception if the value is not a JSON string.

    +
    +
    val null : 'a -> 'a decoder
    +
    +

    null v returns a decoder for the JSON null value. When used to decode +a null value, it will return its argument v; otherwise it will raise the +NotNull exception.

    +
    +
    val raw : JSON.value decoder
    +
    +

    this decoder returns the raw JSON value that it is applied to (i.e., it +is the identity decoder).

    +
    +
    val nullable : 'a decoder -> 'a option decoder
    +
    +

    nullable d returns a decoder that maps null to NONE and otherwise applies +SOME to the result of decoding the value using the decoder d.

    +
    +
    val try : 'a decoder -> 'a option decoder
    +
    +

    try d returns a decoder that attempts to decode its argument using the decoder +d. If it fails, then NONE is returned. Otherwise, SOME is applied to +the result od decoding the value.

    +
    +
    val seq : 'a decoder → ('a -> 'b) decoder -> 'b decoder
    +
    +

    seq d k sequences decoding operations in a continuation-passing style.

    +
    +
    val field : string -> 'a decoder -> 'a decoder
    +
    +

    field lab d returns a decoder that decodes the object field with the +label lab using the decoder d. It will raise the NotObject +exception when the argument is not a JSON object and the +FieldNotFound exception when the given object does +not have a field with the specified label.

    +
    +
    val reqField : string -> 'a decoder -> ('a -> 'b) decoder -> 'b decoder
    +
    +

    reqField lab d k returns a decoder for a required object field that can be +sequenced in a continuation-passing style (it is equivalent to seq (field lab d) k). +It will raise the NotObject exception when the argument +is not a JSON object and the FieldNotFound exception +when the given object does not have a field with the specified label.

    +
    +
    val optField : string -> 'a decoder -> ('a option -> 'b) decoder -> 'b decoder
    +
    +

    optField lab d k returns a decoder for an optional object field that can be +sequenced in a continuation-passing style. If the field is not present in the +object, then NONE is passed to k.

    +
    +
    val dfltField : string -> 'a decoder -> 'a -> ('a -> 'b) decoder -> 'b decoder
    +
    +

    dfltField lab d dflt k returns a decoder for an optional object field that can be +sequenced in a continuation-passing style. If the field is not present in the +object, then dflt is passed to k.

    +
    +
    val array : 'a decoder -> 'a list decoder
    +
    +

    array d returns a decoder that when applied to a JSON array, will decode +the elements of the array using the decoder d and return the result as a +list. It raises the NotArray exception if the +argument is not a JSON array.

    +
    +
    val sub : int -> 'a decoder -> 'a decoder
    +
    +

    sub i d returns a decoder that when given a JSON array, decodes the i'th +element of the array using the decoder d. This decoder will raise the +NotArray exception if the argument is not a JSON array, +and the ArrayBounds exception if the index is out of +bounds for the array.

    +
    +
    val at : JSONUtil.path -> 'a decoder -> 'a decoder
    +
    +

    at path d returns a decoder that uses the path to select a value +from its argument (see JSONUtil.get) +and then decodes that value using the decoder d.

    +
    +
    val succeed : 'a -> 'a decoder
    +
    +

    succeed v returns a decoder that always yields v for any argument.

    +
    +
    val fail : string -> 'a decoder
    +
    +

    fail msg returns a decoder that raises Failure(msg, jv) for +any JSON input jv.

    +
    +
    val andThen : ('a -> 'b decoder) -> 'a decoder -> 'b decoder
    +
    +

    andThen f d returns a decoder that first uses d to decode a value v +from its argument and then returns the result of applying f to v.

    +
    +
    val orElse : 'a decoder * 'a decoder -> 'a decoder
    +
    +

    orElse (d1, d2) returns a decoder that tries to decode its argument using +the decoder d1` and, if that fails, tries to decode the argument using d2.

    +
    +
    val choose : 'a decoder list -> 'a decoder
    +
    +

    choose ds returns a decoder that tries to decode its argument +using each of the decoders in the list ds, returning the first successful result. +If all of the decoders fail, the the Failure exception is +raised. +The expression choose [d1, …​, dn] is equivalent to

    +
    +
    +
    orElse(d1, orElse(d2, ..., orElse(dn, fail "no choice") ... ))
    +
    +
    +
    +
    val map : ('a -> 'b) -> 'a decoder -> 'b decoder
    +
    +

    map f d returns a decoder that applies the function f to the result +of decoding a JSON value using the decoder d.

    +
    +
    val map2 : ('a * 'b -> 'res) -> …​ -> 'res decoder
    +
    val map3 : ('a * 'b * 'c -> 'res) -> …​ -> 'res decoder
    +
    val map4 : ('a * 'b * 'c * 'd -> 'res) -> …​ -> 'res decoder
    +
    val tuple2 : ('a decoder * 'b decoder) -> ('a * 'b) decoder
    +
    +

    tuple2 (d1, d2) is equivalent to map2 Fn.id (d1, d2).

    +
    +
    val tuple3 : ('a decoder * 'b decoder * 'c decoder) -> ('a * 'b * 'c) decoder
    +
    +

    tuple3 (d1, d2, d3) is equivalent to map2 Fn.id (d1, d2, d3).

    +
    +
    val tuple4 : ('a decoder * 'b decoder * 'c decoder * 'd decoder) -> ('a * 'b * 'c * 'd) decoder
    +
    +

    tuple4 (d1, d2, d3, d4) is equivalent to map4 Fn.id (d1, d2, d3, d4).

    +
    +
    val delay : (unit -> 'a decoder) -> 'a decoder
    +
    +

    delay f returns a decoder that delays the application of f to produce the +decoder and can be used to define recursive decoders. +The expression delay f is equivalent to andThen f (succeed ()).

    +
    +
    +
    +
    +
    +
    +

    Discussion

    +
    +
    +

    A number of these combinators work best when composed using a infix pipe operator. +For example:

    +
    +
    +
    +
    fun |> (x, f) = f x
    +infix |>
    +
    +val d = succeed (fn (n : string) => fn (a : int) => {name=n, age=a})
    +        |> reqField "name" string
    +        |> reqField "age" int
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/JSON/str-JSONParser.html b/doc/html/smlnj-lib/JSON/str-JSONParser.html new file mode 100644 index 0000000..d4e0cec --- /dev/null +++ b/doc/html/smlnj-lib/JSON/str-JSONParser.html @@ -0,0 +1,222 @@ + + + + + + + + + + The JSONParser structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The JSONParser structure
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    The JSONParser structure implements a parser for the +JSON.value type.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    structure JSONParser
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    type source
    +
    +val openStream : TextIO.instream -> source
    +val openFile : string -> source
    +val openString : string -> source
    +
    +val close : source -> unit
    +
    +val parse : source -> JSON.value
    +
    +val parseFile : string -> JSON.value
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    type source
    +
    +

    The abstract type of JSON input sources. Note that this type is the +same as JSONStreamParser.source.

    +
    +
    +
    +
    +
    +
    val openStream : TextIO.instream → source
    +
    +

    openStream inS returns a input source for the given input stream.

    +
    +
    +
    +
    +
    +
    val openFile : string → source
    +
    +

    openStream file returns a input source for the given file. This function +opens an input stream for reading from the file, so one should make sure to +call close on the source once all of the JSON values +have been read from the file.

    +
    +
    val openString : string → source
    +
    +

    openStream s returns a input source for the given string.

    +
    +
    +
    +
    +
    +
    val close : source → unit
    +
    +

    close src closes the input source, which has the effect of marking the source +as closed. Furthermore, if src was created by a call to +openFile, then the underlying input stream that +was created for the file is closed. This function does not close the +input stream for sources created by openStream

    +
    +
    +
    +
    +
    +
    val parse : source -> JSON.value
    +
    +

    parse src parses a JSON value from the input source src. +If src is closed or if there is a syntax error, then the +Fail +exception is raised.

    +
    +
    val parseFile : string -> JSON.value
    +
    +

    parse f parses a JSON value from the text file f. +If there is a syntax error, then the +Fail +exception is raised. +This function can also raise the +Io exception if +there is an error opening f. +Note that this function will only parse a single +JSON value from the file; to parse multiple values, one should used +the parse function with a source created by +openFile.

    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/JSON/str-JSONPrinter.html b/doc/html/smlnj-lib/JSON/str-JSONPrinter.html new file mode 100644 index 0000000..7965352 --- /dev/null +++ b/doc/html/smlnj-lib/JSON/str-JSONPrinter.html @@ -0,0 +1,159 @@ + + + + + + + + + + The JSONPrinter structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The JSONPrinter structure
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    The JSONPrinter structure implements a printer for the +JSON.value type.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    structure JSONPrinter
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    val print : TextIO.outstream * JSON.value -> unit
    +val print' : {strm : TextIO.outstream, pretty : bool} -> JSON.value -> unit
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    val print : TextIO.outstream * JSON.value -> unit
    +
    +

    print (outS, jv) prints the JSON value jv to the outstream outS +in condensed format (i.e., without newlines or indentation).

    +
    +
    val print' : {strm : TextIO.outstream, pretty : bool} -> JSON.value -> unit
    +
    +

    print' {outS, pretty} jv prints the JSON value jv to the +outstream outS, where the value of the pretty field controls whether +the output is condensed (when pretty is false) or printed with +new lines and indentation to improve readability (when pretty is true).

    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/JSON/str-JSONStreamParser.html b/doc/html/smlnj-lib/JSON/str-JSONStreamParser.html new file mode 100644 index 0000000..b466fae --- /dev/null +++ b/doc/html/smlnj-lib/JSON/str-JSONStreamParser.html @@ -0,0 +1,377 @@ + + + + + + + + + + The JSONStreamParser structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The JSONStreamParser structure
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    The JSONStreamParser structure provides an event-based +(or stream) parsing model for JSON files. It is suitable +for scanning large files for particular items without having +to first build an in-memory data structure. It can also +be useful to directly translate from JSON to a specific SML +datatype without having to go through the intermediate +JSON.value representation.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    structure JSONStreamParser
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    type source
    +
    +type 'ctx callbacks = {
    +    null : 'ctx -> 'ctx,
    +    boolean : 'ctx * bool -> 'ctx,
    +    integer : 'ctx * IntInf.int -> 'ctx,
    +    float : 'ctx * real -> 'ctx,
    +    string : 'ctx * string -> 'ctx,
    +    startObject : 'ctx -> 'ctx,
    +    objectKey : 'ctx * string -> 'ctx,
    +    endObject : 'ctx -> 'ctx,
    +    startArray : 'ctx -> 'ctx,
    +    endArray : 'ctx -> 'ctx,
    +    error : 'ctx * string -> unit
    +  }
    +
    +val openStream : TextIO.instream -> source
    +val openFile : string -> source
    +val openString : string -> source
    +
    +val close : source -> unit
    +
    +val parse : 'ctx callbacks -> (source * 'ctx) -> 'ctx
    +
    +val parseFile : 'ctx callbacks -> (string * 'ctx) -> 'ctx
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    type source
    +
    +

    The abstract type of JSON input sources. Note that this type is the +same as JSONParser.source.

    +
    +
    type 'ctx callbacks = { …​ }
    +
    +

    This type is a record of the parsing-event call-back functions, where +the type parameter 'cxt is instantiated to the context (or state) +needed to preserve information between events. The call-back functions +in this record type are invoked as follows:

    +
    +
    +
    +
    +
    null : 'ctx -> 'ctx
    +
    +

    called when the JSON null value is encountered.

    +
    +
    boolean : 'ctx * bool -> 'ctx
    +
    +

    called when the JSON true or false values are encountered.

    +
    +
    integer : 'ctx * IntInf.int -> 'ctx
    +
    +

    called when a JSON integral-number value encountered.

    +
    +
    float : 'ctx * real -> 'ctx
    +
    +

    called when a JSON floating-point-number value encountered.

    +
    +
    string : 'ctx * string -> 'ctx
    +
    +

    called when a JSON string value encountered.

    +
    +
    startObject : 'ctx -> 'ctx
    +
    +

    called at the beginning of a JSON object definition (i.e., +when a “{” is encountered).

    +
    +
    objectKey : 'ctx * string -> 'ctx
    +
    +

    called when a JSON object key is encountered (including the +"`:"). The next call-back will specify the value +associated with the key.

    +
    +
    endObject : 'ctx -> 'ctx
    +
    +

    called at the end of a JSON object definition (i.e., +when a “}” is encountered).

    +
    +
    startArray : 'ctx -> 'ctx
    +
    +

    called at the beginning of a JSON array definition (i.e., +when a “[” is encountered).

    +
    +
    endArray : 'ctx -> 'ctx
    +
    +

    called at the end of a JSON array definition (i.e., +when a “]” is encountered).

    +
    +
    error : 'ctx * string -> unit
    +
    +

    called when a syntax error is encountered in the input. The + second argument is an error message describing the error. +It is expected that this call-back does not return (i.e., it +either raises an exception or terminates the program). +If it does return, then the parser will raise the +Fail +exception.

    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    val openStream : TextIO.instream → source
    +
    +

    openStream inS returns a input source for the given input stream.

    +
    +
    +
    +
    +
    +
    val openFile : string → source
    +
    +

    openStream file returns a input source for the given file. This function +opens an input stream for reading from the file, so one should make sure to +call close on the source once all of the JSON values +have been read from the file.

    +
    +
    val openString : string → source
    +
    +

    openStream s returns a input source for the given string.

    +
    +
    +
    +
    +
    +
    val close : source → unit
    +
    +

    close src closes the input source, which has the effect of marking the source +as closed. Furthermore, if src was created by a call to +openFile, then the underlying input stream that +was created for the file is closed. This function does not close the +input stream for sources created by openStream

    +
    +
    +
    +
    +
    +
    val parse : 'ctx callbacks -> (source * 'ctx) -> 'ctx
    +
    +

    parse cbs (src, cxt) will parse the JSON input from the input source +src, using the record of call-back functions cbs and the initial +context cxt.

    +
    +
    val parseFile : 'ctx callbacks -> (string * 'ctx) -> 'ctx
    +
    +

    parse cbs (f, cxt) will parse the JSON input from the file +f, using the record of call-back functions cbs and the initial +context cxt. Note that this function will only parse a single +JSON value from the file; to parse multiple values, one should used +the parse function with a source created by +openFile.

    +
    +
    +
    +
    +
    +
    +

    Exampless

    +
    +
    +

    Consider the following JSON input:

    +
    +
    +
    +
    { "a" : 23,
    +  "b" : [ false, true ],
    +  "c" : "hello world"
    +}
    +
    +
    +
    +

    Parsing this value has the same result as evaluating the following +function:

    +
    +
    +
    +
    fun f cxt = let
    +      val cxt = startObject cxt
    +      val cxt = objectKey (cxt, "a")
    +      val cxt = integer (cxt, 23)
    +      val cxt = objectKey (cxt, "b")
    +      val cxt = startArray cxt
    +      val cxt = boolean (cxt, false)
    +      val cxt = boolean (cxt, true)
    +      val cxt = endArray cxt
    +      val cxt = objectKey (cxt, "c")
    +      val cxt = objectString (cxt, "hello world")
    +      val cxt = endObject cxt
    +      in
    +        cxt
    +      end
    +
    +
    +
    +

    The following function returns a list of all of the string-valued +fields labeled as "name" in the input file.

    +
    +
    +
    +
    fun getNames file = let
    +      fun objectKey ({names, ...}, "name") = {names = names, isName = true}
    +        | objectKey (cxt, _) = cxt
    +      fun string (cxt as {names, isName}, s) = if isName
    +            then {names = s :: names, isName = false}
    +            else cxt
    +      fun default ({names, isName}, _) = {names = names, isName = false}
    +      val cbs = {
    +              null = Fn.id,
    +              boolean = default,
    +              integer = default,
    +              float = default,
    +              string = string,
    +              startObject = Fn.id,
    +              objectKey = objectKey,
    +              endObject = Fn.id,
    +              startArray = Fn.id,
    +              endArray = Fn.id,
    +              error = fn (_, msg) => raise Fail msg
    +            }
    +      val {names, ...} =
    +            JSONStreamParser.parseFile cbs (file, {names = [], isName = false})
    +      in
    +        List.rev names
    +      end
    +
    +
    +
    +
    + +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/JSON/str-JSONStreamPrinter.html b/doc/html/smlnj-lib/JSON/str-JSONStreamPrinter.html new file mode 100644 index 0000000..c6b5219 --- /dev/null +++ b/doc/html/smlnj-lib/JSON/str-JSONStreamPrinter.html @@ -0,0 +1,130 @@ + + + + + + + + + + The JSONStreamPrinter structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The JSONStreamPrinter structure
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    The JSONStreamPrinter structure provides an imperative +printer for producing JSON output to a TextIO output +stream.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    structure JSONStreamPrinter : JSON_STREAM_OUTPUT
    +    where type outstream = outstream
    +
    +
    +
    +
    + +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/JSON/str-JSONUtil.html b/doc/html/smlnj-lib/JSON/str-JSONUtil.html new file mode 100644 index 0000000..8b1b964 --- /dev/null +++ b/doc/html/smlnj-lib/JSON/str-JSONUtil.html @@ -0,0 +1,375 @@ + + + + + + + + + + The JSONUtil structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The JSONUtil structure
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    The JSONUtil module defines a collection of utility functions for +working with JSON values. These include operations for testing +if a value is of a given type and navigating the structure of a +JSON value.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    structure JSONUtil
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    exception NotBool of JSON.value
    +exception NotInt of JSON.value
    +exception NotNumber of JSON.value
    +exception NotString of JSON.value
    +
    +exception NotObject of JSON.value
    +
    +exception FieldNotFound of JSON.value * string
    +
    +exception NotArray of JSON.value
    +
    +exception ArrayBounds of JSON.value * int
    +
    +exception ElemNotFound of JSON.value
    +
    +val exnMessage : exn -> string
    +
    +val asBool : JSON.value -> bool
    +val asInt : JSON.value -> Int.int
    +val asIntInf : JSON.value -> IntInf.int
    +val asNumber : JSON.value -> Real.real
    +val asString : JSON.value -> string
    +
    +val findField : JSON.value -> string -> JSON.value option
    +val lookupField : JSON.value -> string -> JSON.value
    +val hasField : string -> JSON.value -> bool
    +val testField : string -> (JSON.value -> bool) -> JSON.value -> bool
    +
    +val asArray : JSON.value -> JSON.value vector
    +
    +val arrayMap : (JSON.value -> 'a) -> JSON.value -> 'a list
    +
    +datatype edge
    +  = SEL of string
    +  | SUB of int
    +  | FIND of JSON.value -> bool
    +
    +type path = edge list
    +
    +val get : JSON.value * path -> JSON.value
    +
    +val replace : JSON.value * path * JSON.value -> JSON.value
    +
    +val insert : JSON.value * path * string * JSON.value -> JSON.value
    +
    +val append : JSON.value * path * JSON.value list -> JSON.value
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    exception NotBool of JSON.value
    +
    +

    +raised by the asBool function when the argument +is not a JSON boolean.

    +
    +
    exception NotInt of JSON.value
    +
    +

    +raised by the asInt and asIntInf +functions when the argument is not a JSON integer number.

    +
    +
    exception NotNumber of JSON.value
    +
    +

    +raised by the asNumber function when the argument +is not a JSON number.

    +
    +
    exception NotString of JSON.value
    +
    +

    +raised by the asString function when the argument +is not a JSON string.

    +
    +
    exception NotObject of JSON.value
    +
    +

    +raised by the findField and +lookupField functions when the +argument is not a JSON object.

    +
    +
    exception FieldNotFound of JSON.value * string
    +
    +

    +This exception is raised when the given field is not found in an object.

    +
    +
    exception NotArray of JSON.value
    +
    +

    +This exception is raised when trying to process a non-array value as an array.

    +
    +
    exception ArrayBounds of JSON.value * int
    +
    +

    +This exception is raised when access to an array value is out of bounds.

    +
    +
    exception ElemNotFound of JSON.value
    +
    +

    +This exception is raised when there is no element of an array that satisfies +the predicate of a FIND edge in a path. The argument will be the array +in question.

    +
    +
    val exnMessage : exn -> string
    +
    +

    exnMessage exn returns an error-message string for the exception value +exn. This function produces specialized messages for the exceptions defined +in the JSONUtil structure and falls back to the +General.exnMessage +function for other exceptions.

    +
    +
    val asBool : JSON.value -> bool
    +
    +

    +asBool (JSON.BOOL b) returns the value b. This function raises +the NotBool exception if the value is not a +JSON Boolean value.

    +
    +
    val asInt : JSON.value -> int
    +
    +

    +asInt (JSON.INT n) returns the value n converted to int. +This function raises the NotInt exception if the +value is not a JSON integer value. It may also raise the +Overflow +exception if n is too large for the default int type.

    +
    +
    val asIntInf : JSON.value -> IntInf.int
    +
    +

    +asIntInf (JSON.INT n) returns the value n. +This function raises the NotInt exception if the +value is not a JSON integer value.

    +
    +
    val asNumber : JSON.value -> Real.real
    +
    +

    +asNumber jv converts the JSON number jv to an SML real value. +The jv argument can either have the form JSON.INT n, in which case +n is converted to the real type and returned, or JSON.FLOAT f, +in which case f is returned; otherwise, the +NotNumber exception is raised.

    +
    +
    val asString : JSON.value -> string
    +
    +

    +asBool (JSON.STRING s) returns the value s. This function raises +the NotString exception if the value is not a +JSON string value.

    +
    +
    val findField : JSON.value -> string -> JSON.value option
    +
    +

    +findField (JSON.OBJECT flds) key returns SOME jv when the +list of fields flds contains (key, jv) and NONE otherwise. +If findField is called on a value that is not a JSON object, +then it raises the NotObject exception.

    +
    +
    val lookupField : JSON.value -> string -> JSON.value
    +
    +

    +lookupField (JSON.OBJECT flds) key returns jv when the +list of fields flds contains (key, jv) and raises the +FieldNotFound exception otherwise. +If lookupField is called on a value that is not a JSON object, +then it raises the NotObject exception.

    +
    +
    val hasField : string -> JSON.value -> bool
    +
    +

    +hasField key v returns true when the value v is a JSON object that +has a field with key as its label and false otherwise.

    +
    +
    val testField : string -> (JSON.value -> bool) -> JSON.value -> bool
    +
    +

    +testField key pred v returns the result of pred jv when + the value v is a JSON object that contains (key, jv). + It returns false otherwise.

    +
    +
    val asArray : JSON.value -> JSON.value vector
    +
    +

    +asArray jv converts the JSON array value jv to an SML +vector value. It raises the NotArray exception +when jv is not a JSON array.

    +
    +
    val arrayMap : (JSON.value -> 'a) -> JSON.value -> 'a list
    +
    +

    +map a conversion function over a JSON array to produce a list; this function +raises the NotArray exception if the second argument +is not an array.

    +
    +
    datatype edge = …​
    +
    +

    specifies an edge of a path into a JSON value. +The constructors have the following meaning:

    +
    +
    +
    +
    +
    SEL of string
    +
    +

    SEL key specifies the value labeled by key in a JSON object.

    +
    +
    SUB of int
    +
    +

    SUB i specifies the ith element of a JSON array.

    +
    +
    FIND of JSON.value -> bool
    +
    +

    FIND pred specifies the first element of a JSON array that satisfies +the given predicate.

    +
    +
    +
    +
    +
    +
    +
    type path = edge list
    +
    +

    specifies a path into a JSON value.

    +
    +
    val get : JSON.value * path -> JSON.value
    +
    +

    +get (jv, path) returns the component of jv named by path. It raises +one of the NotObject, NotArray, +FieldNotFound, or ElemNotFound +exceptions if there is an inconsistency between the path and the structure of jv.

    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Reactive/reactive-lib.html b/doc/html/smlnj-lib/Reactive/reactive-lib.html new file mode 100644 index 0000000..86ff479 --- /dev/null +++ b/doc/html/smlnj-lib/Reactive/reactive-lib.html @@ -0,0 +1,131 @@ + + + + + + + + + + The Reactive Library + + + + + + + + +
    +
    +
    +
    + +
    + +
    The Reactive Library
    +
    +
    +
    + +
    +
    +
    +

    Overview

    +
    +
    +

    The Reactive Library is an implementation of a low-level reactive +engine (or toolkit). This implementation is a port of the Java +SugarCubes +Library. The main difference (aside from the implementation language) +is that we support preemption of actions (as in Berry’s +Communicating Reactive Processes +model).

    +
    +
    +
    +
    +

    Contents

    +
    +
    +
    +
    structure Reactive
    +
    +

    something

    +
    +
    +
    +
    +
    +
    +

    Usage

    +
    +
    +

    For SML/NJ, include $/reactive-lib.cm in your +CM file.

    +
    +
    +

    For use in MLton, include +$(SML_LIB)/smlnj-lib/Reactive/reactive-lib.mlb in your MLB file.

    +
    +
    +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Reactive/str-Reactive.html b/doc/html/smlnj-lib/Reactive/str-Reactive.html new file mode 100644 index 0000000..283df13 --- /dev/null +++ b/doc/html/smlnj-lib/Reactive/str-Reactive.html @@ -0,0 +1,427 @@ + + + + + + + + + + The Reactive structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The Reactive structure
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    The Reactive structure provides types and operations to build and +run reactive systems. The inputs and outputs of a reactive system +are sets of signals, which can either be present (i.e., true) +or absent (i.e., false). A reactive system runs in discrete +timesteps.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature REACTIVE
    +structure Reactive : REACTIVE
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    type machine
    +type instruction
    +type signal = Atom.atom
    +type config
    +type in_signal
    +type out_signal
    +
    +val machine : {
    +	inputs : signal list,
    +	outputs : signal list,
    +	body : instruction
    +      } -> machine
    +
    +val run : machine -> bool
    +val reset : machine -> unit
    +
    +val inputsOf : machine -> in_signal list
    +val outputsOf : machine -> out_signal list
    +
    +val inputSignal : in_signal -> signal
    +val outputSignal : out_signal -> signal
    +
    +val setInSignal  : (in_signal * bool) -> unit
    +val getInSignal  : in_signal -> bool
    +val getOutSignal : out_signal -> bool
    +
    +val || : (instruction * instruction) -> instruction
    +val &  : (instruction * instruction) -> instruction
    +
    +val nothing : instruction
    +val stop : instruction
    +val suspend : instruction
    +
    +val action : (machine -> unit) -> instruction
    +val exec   : (machine -> {stop : unit -> unit, done : unit -> bool})
    +	      -> instruction
    +
    +val ifThenElse : ((machine -> bool) * instruction * instruction) -> instruction
    +val repeat     : (int * instruction) -> instruction
    +val loop       : instruction -> instruction
    +val close      : instruction -> instruction
    +
    +val signal   : (signal * instruction) -> instruction
    +val rebind   : (signal * signal * instruction) -> instruction
    +val when     : (config * instruction * instruction) -> instruction
    +val trap     : (config * instruction) -> instruction
    +val trapWith : (config * instruction * instruction) -> instruction
    +val emit     : signal -> instruction
    +val await    : config -> instruction
    +
    +val posConfig : signal -> config
    +val negConfig : signal -> config
    +val orConfig  : (config * config) -> config
    +val andConfig : (config * config) -> config
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +

    The description of the interface is organized into sections.

    +
    +
    +

    Types

    +
    +
    +
    type machine
    +
    +

    The type of a reactive system.

    +
    +
    type instruction
    +
    +

    The abstract representation of a reactive program.

    +
    +
    type signal = Atom.atom
    +
    +

    The name of a signal.

    +
    +
    type config
    +
    +

    A signal configuration is a logical combination of signals.

    +
    +
    type in_signal
    +
    +

    An input signal for a reactive system.

    +
    +
    type out_signal
    +
    +

    An output signal for a reactive system.

    +
    +
    +
    +
    +
    +

    Machines

    +
    +
    +
    val machine : { …​ } -> machine
    +
    +

    machine {inputs, outputs, body} creates a new reactive system (or machine) +from a list of input signal names, a list of output signal names, and a reactive +program.

    +
    +
    val run : machine -> bool
    +
    +

    run m will run the reactive system m one instant (or activation). +It returns true if, and only if, the machine ends in a terminal state +(_i.e., by executing the stop instruction).

    +
    +
    val reset : machine -> unit
    +
    +

    reset m resets the state of m to its initial state.

    +
    +
    val inputsOf : machine -> in_signal list
    +
    +

    inputsOf m returns a list of the input signals in the machine.

    +
    +
    val outputsOf : machine -> out_signal list
    +
    +

    outputsOf m returns a list of the output signals in the machine.

    +
    +
    +
    +
    +
    +
    val nameOfInput : in_signal -> signal
    +
    +

    inputSignal inSig returns the name of the input signal.

    +
    +
    +
    +
    +
    +
    val nameOfOutput : out_signal -> signal
    +
    +

    inputSignal outSig returns the name of the output signal.

    +
    +
    val setInSignal : (in_signal * bool) -> unit
    +
    +

    setInSignal (inSig, b) sets the value of the input signal to b.

    +
    +
    val getInSignal : in_signal -> bool
    +
    +

    getInSignal inSig gets the current value of the input signal.

    +
    +
    val getOutSignal : out_signal -> bool
    +
    +

    getOutSignal inSig gets the current value of the output signal.

    +
    +
    +
    +
    +
    +

    Instructions

    +
    +
    +
    val || : (instruction * instruction) -> instruction
    +
    +

    || (i1, i2) forms the parallel composition of the two programs. +Activation of the resulting program will interleave the two +programs until either one of them suspends (see the +suspend instruction) or both programs terminate.

    +
    +
    val & : (instruction * instruction) -> instruction
    +
    +

    & (i1, i2) forms the sequential composition of the two programs.

    +
    +
    val nothing : instruction
    +
    +

    The program that does nothing.

    +
    +
    +
    +
    +
    +
    val stop : instruction
    +
    +

    The program that stops; i.e., reaches the terminal state for +the current and all future activations.

    +
    +
    +
    +
    +
    +
    val suspend : instruction
    +
    +

    The program that suspends the current activation.

    +
    +
    val action : (machine -> unit) -> instruction
    +
    +

    something

    +
    +
    val exec : (machine -> {stop : unit -> unit, done : unit -> bool}) -> instruction
    +
    +

    exec f returns a program that encapsulates the SML computation defined by +the function f.

    +
    +
    val ifThenElse : ((machine -> bool) * instruction * instruction) -> instruction
    +
    +

    something

    +
    +
    val repeat : (int * instruction) -> instruction
    +
    +

    something

    +
    +
    val loop : instruction -> instruction
    +
    +

    something

    +
    +
    val close : instruction -> instruction
    +
    +

    something

    +
    +
    val signal : (signal * instruction) -> instruction
    +
    +

    something

    +
    +
    val rebind : (signal * signal * instruction) -> instruction
    +
    +

    something

    +
    +
    val when : (config * instruction * instruction) -> instruction
    +
    +

    something

    +
    +
    val trapWith : (config * instruction * instruction) -> instruction
    +
    +

    trapWith (cfg, i1, i2) returns the program that …​

    +
    +
    val trap : (config * instruction) -> instruction
    +
    +

    trap (cfg, i) +This expression is equivalent to

    +
    +
    +
    trapWith (cfg, i, nothing)
    +
    +
    +
    +
    val emit : signal -> instruction
    +
    +

    emit sigId returns the program that emits the signal with the given +name (i.e., the signal is present).

    +
    +
    val await : config -> instruction
    +
    +

    await cfg returns the program that waits for the configuration to hold.

    +
    +
    +
    +
    +
    +

    Signal configurations

    +
    +
    +
    val posConfig : signal -> config
    +
    +

    posConfig sigId defines a configuration that holds if, and only if, +the signal named sigId is present.

    +
    +
    val negConfig : signal -> config
    +
    +

    negConfig sigId defines a configuration that holds if, and only if, +the signal named sigId is not present.

    +
    +
    val orConfig : (config * config) -> config
    +
    +

    orConfig (cfg1, cfg2) defines a configuration that holds if either cfg1 +or cfg2 (inclusive) holds.

    +
    +
    val andConfig : (config * config) -> config
    +
    +

    andConfig (cfg1, cfg2) defines a configuration that holds if both cfg1 +and cfg2 hold.

    +
    +
    +
    +
    +
    +

    Deprecated functions

    +
    +

    The following functions are part of the interface, but have been +deprecated.

    +
    +
    +
    +
    val inputSignal : in_signal → signal
    +
    +

    use nameOfInput instead.

    +
    +
    val outputSignal : out_signal → signal
    +
    +

    use nameOfOutput instead.

    +
    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/RegExp/fun-RegExpFn.html b/doc/html/smlnj-lib/RegExp/fun-RegExpFn.html new file mode 100644 index 0000000..2de7b57 --- /dev/null +++ b/doc/html/smlnj-lib/RegExp/fun-RegExpFn.html @@ -0,0 +1,280 @@ + + + + + + + + + + The RegExpFn functor + + + + + + + + +
    +
    +
    +
    + +
    + +
    The RegExpFn functor
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    The RegExpFn functor glues together a front-end regular-expression +parser with a back-end regular-expression engine.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature REGEXP
    +functor RegExpFn (
    +    structure P : REGEXP_PARSER
    +    structure E : REGEXP_ENGINE
    +  ) :> REGEXP where type regexp = E.regexp
    +
    +
    +
    +
    +
    +

    Functor Argument Interface

    +
    +
    +
    +
    structure P : REGEXP_PARSER
    +structure E : REGEXP_ENGINE
    +
    +
    +
    +
    +
    +

    Functor Argument Description

    +
    +
    +
    +
    structure P : REGEXP_PARSER
    +
    +

    The front-end parser for the regular-expression syntax.

    +
    +
    structure E : REGEXP_ENGINE
    +
    +

    The back-end engine.

    +
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    type regexp
    +
    +type 'a match = {pos : 'a, len : int} MatchTree.match_tree
    +
    +exception CannotParse
    +
    +val compile : (char,'a) StringCvt.reader -> (regexp, 'a) StringCvt.reader
    +val compileString : string -> regexp
    +
    +val find : regexp -> (char,'a) StringCvt.reader -> ('a match, 'a) StringCvt.reader
    +
    +val prefix : regexp -> (char,'a) StringCvt.reader -> ('a match, 'a) StringCvt.reader
    +
    +val match : (string * ('a match -> 'b)) list
    +      -> (char,'a) StringCvt.reader -> ('b, 'a) StringCvt.reader
    +
    +
    +
    +
    +
    +

    Interface Description

    +
    +
    +
    +
    type regexp
    +
    +

    The type of a compiled regular expression.

    +
    +
    +
    (* a match specifies the position (as a stream) and the length of the match *)
    +
    +
    +
    +
    type 'a match = {pos : 'a, len : int} MatchTree.match_tree
    +
    +

    A match tree specifying the starting +position and size of matches. For a general +character reader getc, +we can extract the string for a match using the following function:

    +
    +
    +
    fun getMatchString {pos, len} = let
    +      fun get (_, 0, chrs) = String.implodeRev chrs
    +        | get (strm, n, chrs) = let
    +            val SOME(c, rest) = getc strm
    +            in
    +              get (rest, n-1, c::chrs)
    +            end
    +      in
    +        get (pos, len, [])
    +      end
    +
    +
    +
    +

    More direct means are possible for specific input sources (e.g., strings, +substrings, or text input).

    +
    +
    +
    +
    +
    +
    +
    exception CannotParse
    +
    +

    This exception is raised by the functions compileString +match when the front-end encounters a syntax error.

    +
    +
    val compile : (char,'a) StringCvt.reader -> (regexp, 'a) StringCvt.reader
    +
    +

    compile getc strm parses and compiles a regular expression from the input +stream strm using the +character reader getc. +If successful, it returns SOME(re, strm'), where re is the compiled +regular expression and strm' is the residual input stream. It returns +NONE if there is a syntax error in the input. If the source regular +expression contains features that are not supported by the back-end engine, +then the CannotCompile +exception is raised.

    +
    +
    +
    +
    +
    +
    val compileString : string -> regexp
    +
    +

    compileString s returns the compiled regular expression defined +by the string s. The CannotParse exception +is raised if there was a syntax error when parsing s and the +CannotCompile exception +is raised if the source regular expression contains features that are +not supported by the back-end engine.

    +
    +
    val find : regexp -> (char,'a) StringCvt.reader -> ('a match, 'a) StringCvt.reader
    +
    +

    find re getc strm returns SOME mt where mt describes the first +match of re in the input stream; otherwise it returns NONE if there +is no match.

    +
    +
    val prefix : regexp -> (char,'a) StringCvt.reader -> ('a match, 'a) StringCvt.reader
    +
    +

    prefix re getc strm returns SOME mt where mt describes the +matching of re at the beginning of the input stream; otherwise it +returns NONE if re does not match a prefix of the input.

    +
    +
    +
    +
    +
    +
    val match : (string * ('a match -> 'b)) list -> (char,'a) StringCvt.reader -> ('b, 'a) StringCvt.reader
    +
    +

    match rules getc strm attempts to match one of the rules starting at the +current stream position. Each rule is a pair of a regular expression and +an action. The rules are tested in order; if a rule (re, act) matches with +the result mt, then the result of match will be SOME(act mt). +If no rule matches, then NONE is the result.

    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/RegExp/regexp-lib.html b/doc/html/smlnj-lib/RegExp/regexp-lib.html new file mode 100644 index 0000000..1b67468 --- /dev/null +++ b/doc/html/smlnj-lib/RegExp/regexp-lib.html @@ -0,0 +1,172 @@ + + + + + + + + + + The RegExp Library + + + + + + + + +
    +
    +
    +
    + +
    + +
    The RegExp Library
    +
    +
    +
    + +
    +
    +
    +

    Overview

    +
    +
    +

    The RegExp Library …​

    +
    +
    +
    +
    +

    Contents

    +
    +
    +
    +
    Using the RegExp Library
    +
    +

    A tutorial guide to using this library.

    +
    +
    signature REGEXP_PARSER
    +
    +

    Defines the interface to a parser for a concrete +regular-expression syntax.

    +
    +
    signature REGEXP_ENGINE
    +
    +

    Defines the interface to a regular-expression search engine.

    +
    +
    structure RegExpSyntax
    +
    +

    Defines the abstract syntax of regular expressions.

    +
    +
    structure MatchTree
    +
    +

    Provides a tree-structured representation +of the result of a successful regular expression match.

    +
    +
    structure AwkSyntax
    +
    +

    A parser for the regular-expression syntax defined by the AWK language.

    +
    +
    structure BackTrackEngine
    +
    +

    A regular-expression search engine implemented as a back-tracking +interpreter.

    +
    +
    structure DfaEngine
    +
    +

    A regular-expression search engine implemented as a deterministic +finite automata.

    +
    +
    structure ThompsonEngine
    +
    +

    A regular-expression search engine based on Ken Thompson’s design.

    +
    +
    functor RegExpFn
    +
    +

    Provides a functor for combining a regular-expression parser with an engine to +form a complete implementation of an regular-expression search module.

    +
    +
    +
    +
    +
    +
    +

    Usage

    +
    +
    +

    For SML/NJ, include $/regexp-lib.cm in your +CM file.

    +
    +
    +

    For use in MLton, include +$(SML_LIB)/smlnj-lib/RegExp/regexp-lib.mlb in your MLB file.

    +
    +
    +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/RegExp/sig-REGEXP_ENGINE.html b/doc/html/smlnj-lib/RegExp/sig-REGEXP_ENGINE.html new file mode 100644 index 0000000..61cd13c --- /dev/null +++ b/doc/html/smlnj-lib/RegExp/sig-REGEXP_ENGINE.html @@ -0,0 +1,265 @@ + + + + + + + + + + The REGEXP_ENGINE signature + + + + + + + + +
    +
    +
    +
    + +
    + +
    The REGEXP_ENGINE signature
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    The REGEXP_ENGINE signature …​

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature REGEXP_ENGINE
    +
    +structure BackTrackEngine : REGEXP_ENGINE
    +structure DfaEngine : REGEXP_ENGINE
    +structure ThompsonEngine : REGEXP_ENGINE
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    type regexp
    +
    +type 'a match = {pos : 'a, len : int} MatchTree.match_tree
    +
    +val compile : RegExpSyntax.syntax -> regexp
    +
    +val find : regexp -> (char,'a) StringCvt.reader -> ('a match, 'a) StringCvt.reader
    +
    +val prefix : regexp ->(char,'a) StringCvt.reader -> ('a match, 'a) StringCvt.reader
    +
    +val match : (RegExpSyntax.syntax * ('a match -> 'b)) list
    +	      -> (char,'a) StringCvt.reader -> ('b, 'a) StringCvt.reader
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    type regexp
    +
    +

    something +(* the type of a compiled regular expression + *)

    +
    +
    +
    (* a match specifies the position (as a stream) and the length of the match *)
    +
    +
    +
    +
    type 'a match = {pos : 'a, len : int} MatchTree.match_tree
    +
    +

    something

    +
    +
    val compile : RegExpSyntax.syntax -> regexp
    +
    +

    something +(* compile a regular expression from the abstract syntax + *)

    +
    +
    val find : regexp -> (char,'a) StringCvt.reader -> ('a match, 'a) StringCvt.reader
    +
    +

    something +(* scan the stream for the first occurence of the regular expression. The call + *

    +
    +
      +
    • +

      find re getc strm +*

      +
    • +
    • +

      returns NONE if the end of stream is reached without a match. Otherwise it

      +
    • +
    • +

      returns SOME(match, strm'), where match is the match-tree for the match and

      +
    • +
    • +

      strm' is the stream following the match. +*)

      +
    • +
    +
    +
    +
    val prefix : regexp ->(char,'a) StringCvt.reader -> ('a match, 'a) StringCvt.reader
    +
    +

    something +(* attempt to match the stream at the current position with the

    +
    +
      +
    • +

      regular expression +*)

      +
    • +
    +
    +
    +
    val match : (RegExpSyntax.syntax * ('a match -> 'b)) list -> (char,'a) StringCvt.reader -> ('b, 'a) StringCvt.reader
    +
    +

    something +(* attempt to the match the stream at the current position with one of

    +
    +
      +
    • +

      the abstract syntax representations of regular expressions and trigger

      +
    • +
    • +

      the corresponding action +*)

      +
    • +
    +
    +
    +
    +
    +
    +
    +
    +

    Instances

    +
    +
    +
    +
    structure BackTrackEngine
    +
    +

    A backtracking interpreter for the regular-expression syntax. This implementation +requires no extra compilation time, but backtracking can result in slow searches +for some regular expressions.

    +
    +
    +
    +
    +
    +
    structure DfaEngine
    +
    +

    This implementation compiles the regular expression to a nondeterministic +finite-state machine and then coverts that to a deterministic machine. +The resulting machine is fast, but can be exponential in size for some +regular expressions.

    +
    +
    +
    +
    +
    +
    structure ThompsonEngine
    +
    +

    An implementation of Ken Thompson’s famous +Regular Expression Search Algorithm.

    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/RegExp/sig-REGEXP_PARSER.html b/doc/html/smlnj-lib/RegExp/sig-REGEXP_PARSER.html new file mode 100644 index 0000000..cf5e7ec --- /dev/null +++ b/doc/html/smlnj-lib/RegExp/sig-REGEXP_PARSER.html @@ -0,0 +1,168 @@ + + + + + + + + + + The REGEXP_PARSER signature + + + + + + + + +
    +
    +
    +
    + +
    + +
    The REGEXP_PARSER signature
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    The REGEXP_PARSER signature defines the interface to a parser for a +concrete regular-expression syntax.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature REGEXP_PARSER
    +
    +structure AwkSyntax : REGEXP_PARSER
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    val scan : (char, 'a) StringCvt.reader -> (RegExpSyntax.syntax, 'a) StringCvt.reader
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    val scan : (char, 'a) StringCvt.reader -> (RegExpSyntax.syntax, 'a) StringCvt.reader
    +
    +

    scan getc strm parses a regular expression from the input stream strm using +the character reader getc. +If successful, it returns SOME(re, strm'), where re is the abstract syntax +of the regular expression and strm' is the residual input stream. It returns +NONE if there is a syntax error in the input.

    +
    +
    +
    +
    +
    +
    +

    Instances

    +
    +
    +
    +
    structure AwkSyntax
    +
    +

    A parser for the regular-expression syntax defined by the AWK language.

    +
    +
    +
    +
    +
    + +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/RegExp/str-AwkSyntax.html b/doc/html/smlnj-lib/RegExp/str-AwkSyntax.html new file mode 100644 index 0000000..c2a7a8c --- /dev/null +++ b/doc/html/smlnj-lib/RegExp/str-AwkSyntax.html @@ -0,0 +1,178 @@ + + + + + + + + + + The AwkSyntax structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The AwkSyntax structure
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    The AwkSyntax structure implements the AWK syntax for regular expressions. +The syntax is defined on pp. 28-30 of The AWK Programming Language, +by Aho, Kernighan and Weinberger. The syntax has been extended with interval +syntax, which was added as part of the POSIX standard.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    structure AwkSyntax : REGEXP_PARSER
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +

    The meta characters are: + "\" "^" "$" "." "[" "]" "|" "(" ")" "*" "+" "?"

    +
    +
    +
    +
    Atomic REs:
    +  c	matches the character c (for non-metacharacters c)
    +  "^"	matches the empty string at the beginning of a line
    +    "$"	matches the empty string at the end of a line
    +  "."	matches any single character (except \000 and \n)
    +
    +
    +
    +
    +
    Escape sequences:
    +    "\b"	matches backspace
    +    "\f"	matches formfeed
    +    "\n"	matches newline (linefeed)
    +    "\r"	matches carriage return
    +    "\t"	matches tab
    +    "\"ddd	matches the character with octal code ddd.
    +    "\"c	matches the character c (e.g., \\ for \, \" for ")
    +  "\x"dd  matches the character with hex code dd.
    +
    +
    +
    +
    +
    Character classes:
    +    [...]	matches any character in "..."
    +    [^...]	a complemented character list, which matches any character not
    +     in the list "..."
    +
    +
    +
    +
    +
    Compound regular expressions, where A and B are REs:
    +    A|B	matches A or B
    +    AB	matches A followed by B
    +    A?	matches zero or one As
    +    A*	matches zero or more As
    +    A+	matches one or more As
    +    A{n}	matches n copies of A
    +    A{n,}	matches n copies of A
    +    A{n,m}	matches n copies of A
    +    (A)	matches A
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/RegExp/str-MatchTree.html b/doc/html/smlnj-lib/RegExp/str-MatchTree.html new file mode 100644 index 0000000..0d7723a --- /dev/null +++ b/doc/html/smlnj-lib/RegExp/str-MatchTree.html @@ -0,0 +1,208 @@ + + + + + + + + + + The MatchTree structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The MatchTree structure
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    The MatchTree structure provides a tree-structured representation +of the result of a successful regular expression match. The tree structure +corresponds to the nesting of groups +in the regular expression.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature MATCH_TREE
    +structure MatchTree : MATCH_TREE
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    datatype 'a match_tree = Match of 'a * 'a match_tree list
    +
    +val root : 'a match_tree -> 'a
    +val nth : ('a match_tree * int) -> 'a (* raises Subscript *)
    +val map : ('a -> 'b) -> 'a match_tree -> 'b match_tree
    +val app : ('a -> unit) -> 'a match_tree -> unit
    +val foldl : ('a * 'b -> 'b) -> 'b -> 'a match_tree -> 'b
    +val foldr : ('a * 'b -> 'b) -> 'b -> 'a match_tree -> 'b
    +val find : ('a -> bool) -> 'a match_tree -> 'a option
    +val num : 'a match_tree -> int
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    datatype 'a match_tree = Match of 'a * 'a match_tree list
    +
    +

    The representation of the results of a nested grouping of regular expressions. +The type variable 'a is typically instantiated to information about the +particular range of the source that the node covers. For example, +it might be the pair of the start position and length of the match.

    +
    +
    val root : 'a match_tree -> 'a
    +
    +

    root mt returns the information about the root (outermost) match in the tree.

    +
    +
    val nth : ('a match_tree * int) -> 'a (* raises Subscript *)
    +
    +

    nth (mt, i) returns the information about the i'th match in the tree, +where matches are labeled in pre-order starting with 0 for the root. +This function raises the +Subscript exception +if i < 0 or there are fewer than i-1 nodes in the tree.

    +
    +
    val map : ('a -> 'b) -> 'a match_tree -> 'b match_tree
    +
    +

    map f mt returns the result of mapping the function f over mt. +For example, this function can be used to convert a match-tree of +position information to a tree of strings. The function is applied +to the tree in pre-order.

    +
    +
    val app : ('a -> unit) -> 'a match_tree -> unit
    +
    +

    app f mt applies the given function to the nodes in the tree +in pre-order.

    +
    +
    val foldl : ('a * 'b -> 'b) -> 'b -> 'a match_tree -> 'b
    +
    +

    foldl f init mt folds the function f over mt in left-to-right pre-order +using init as the initial value.

    +
    +
    val foldr : ('a * 'b -> 'b) -> 'b -> 'a match_tree -> 'b
    +
    +

    foldr f init mt folds the function f over mt in right-to-left post-order +using init as the initial value.

    +
    +
    val find : ('a -> bool) -> 'a match_tree -> 'a option
    +
    +

    find pred mt returns SOME info where info is the first +information that satisfies pred in a pre-order traversal of +the tree. It returns NONE if there is no match information +that satisfies pred.

    +
    +
    val num : 'a match_tree -> int
    +
    +

    num mt returns the number of sub-matches in the tree; i.e., the number +of nodes not counting the root.

    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/RegExp/str-RegExpSyntax.html b/doc/html/smlnj-lib/RegExp/str-RegExpSyntax.html new file mode 100644 index 0000000..0eb3c4a --- /dev/null +++ b/doc/html/smlnj-lib/RegExp/str-RegExpSyntax.html @@ -0,0 +1,352 @@ + + + + + + + + + + The RegExpSyntax structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The RegExpSyntax structure
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    The RegExpSyntax structure provides an abstract-syntax-tree +representation of regular expressions. Its main purpose is to +provide communication between different front-ends (implementing +different RE specification languages), and different back-ends +(implementing different compilation/searching algorithms). +It is also possible, however, to use it as a way to directly +specify a regular expression for a back-end engine.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature REGEXP_SYNTAX
    +structure RegExpSyntax : REGEXP_SYNTAX
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    exception CannotCompile
    +
    +structure CharSet : ORD_SET where type Key.ord_key = char
    +
    +datatype syntax
    +  = Group of syntax
    +  | Alt of syntax list
    +  | Concat of syntax list
    +  | Interval of (syntax * int * int option)
    +  | MatchSet of CharSet.set
    +  | NonmatchSet of CharSet.set
    +  | Char of char
    +  | Begin
    +  | End
    +
    +val optional : syntax -> syntax
    +val closure : syntax -> syntax
    +val posClosure : syntax -> syntax
    +
    +val fromRange : char * char -> CharSet.set
    +val addRange : CharSet.set * char * char -> CharSet.set
    +
    +val allChars : CharSet.set
    +
    +val alnum : CharSet.set
    +val alpha : CharSet.set
    +val ascii : CharSet.set
    +val blank : CharSet.set
    +val cntl : CharSet.set
    +val digit : CharSet.set
    +val graph : CharSet.set
    +val lower : CharSet.set
    +val print : CharSet.set
    +val punct : CharSet.set
    +val space : CharSet.set
    +val upper : CharSet.set
    +val word : CharSet.set
    +val xdigit : CharSet.se
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    exception CannotCompile
    +
    +

    This exception is meant to be raised by back-ends when they encounter +a feature that they cannot handle.

    +
    +
    structure CharSet : ORD_SET where type Key.ord_key = char
    +
    +

    This substructure implements sets of 8-bit characters. Currently it +is implemented using sorted lists (i.e., using the +ListSetFn functor), but that may +be changed in the future.

    +
    +
    datatype syntax
    +
    +

    This datatype defines the abstract syntax of regular expressions that + is supported by the library. The constructors are defined as follows:

    +
    +
    +
    +
      +
    • +

      Group re:: +defines a match group (i.e., that produce a corresponding +match-tree node for the input matched by re.

      +
    • +
    • +

      Alt[re1, re2, …​, ren]:: +matches any of re1, re2, …​, ren. If the list is empty, then it +matches nothing.

      +
    • +
    • +

      Concat[re1, re2, …​, ren]:: +matches the concatenation of re1, re2, …​, ren. If the list +is empty, then it matches the empty string.

      +
    • +
    • +

      Interval(re, n, NONE):: +matches re repeated at least n times.

      +
    • +
    • +

      Interval(re, n, SOME m):: +matches re repeated from n to m times.

      +
    • +
    • +

      MatchSet cs:: +matches a single character that is in the set cs.

      +
    • +
    • +

      NonmatchSet cs:: +matches a single character that is not in the set cs.

      +
    • +
    • +

      Char c:: +matches the single character c.

      +
    • +
    • +

      Begin:: +matches beginning of the input stream.

      +
    • +
    • +

      End:: +matches end of the input stream.

      +
    • +
    +
    +
    +
    +
    +
    val optional : syntax → syntax
    +
    +

    optional re is equivalent to Interval(re, 0, SOME 1).

    +
    +
    val closure : syntax → syntax
    +
    +

    closure re is equivalent to Interval(re, 0, NONE).

    +
    +
    val posClosure : syntax → syntax
    +
    +

    posClosure re is equivalent to Interval(re, 1, NONE).

    +
    +
    val fromRange : char * char -> CharSet.set
    +
    +

    fromRange (c1, c2) returns the set containing the characters +in the range from c1 to c2 (inclusive). This expression +raises the Size +exception if c2 < c1.

    +
    +
    val addRange : CharSet.set * char * char -> CharSet.set
    +
    +

    addRange (cs, c1, c2) adds the set of characters in the range +from c1 to c2 (inclusive) to cs. This expression raises +the Size +exception if c2 < c1.

    +
    +
    val allChars : CharSet.set
    +
    +

    is the set of all 8-bit characters.

    +
    +
    +
    +
    +

    POSIX Character Classes

    +
    +

    The RegExpSyntax structure pre-defines the following character sets, +which are part of the POSIX regular-expression standard (plus a couple +of extras):

    +
    +
    +
    +
    val alnum : CharSet.set
    +
    +

    is the set of letters and digits.

    +
    +
    val alpha : CharSet.set
    +
    +

    is the set of letters.

    +
    +
    val ascii : CharSet.set
    +
    +

    is the set of characters c such that 0 <= ord c <= 127.

    +
    +
    val blank : CharSet.set
    +
    +

    is the set of #"\t" and space.

    +
    +
    val cntl : CharSet.set
    +
    +

    is the set of non-printable characters.

    +
    +
    val digit : CharSet.set
    +
    +

    is the set of decimal digits.

    +
    +
    val graph : CharSet.set
    +
    +

    is the set of visible characters (does not include space).

    +
    +
    val lower : CharSet.set
    +
    +

    is the set of lower-case letters.

    +
    +
    val print : CharSet.set
    +
    +

    is the set of printable characters (includes space).

    +
    +
    val punct : CharSet.set
    +
    +

    is the set of visible characters other than letters and digits.

    +
    +
    val space : CharSet.set
    +
    +

    is the set of #"\t", #"\r", #"\n", #"\v", #"\f", and space.

    +
    +
    val upper : CharSet.set
    +
    +

    is the set of upper-case letters.

    +
    +
    val word : CharSet.set
    +
    +

    is the set of letters, digit, and #"_".

    +
    +
    val xdigit : CharSet.set
    +
    +

    is the set of hexadecimal digits.

    +
    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/RegExp/tutorial.html b/doc/html/smlnj-lib/RegExp/tutorial.html new file mode 100644 index 0000000..789b032 --- /dev/null +++ b/doc/html/smlnj-lib/RegExp/tutorial.html @@ -0,0 +1,229 @@ + + + + + + + + + + Using the RegExp Library + + + + + + + + +
    +
    +
    +
    + +
    + +
    Using the RegExp Library
    +
    +
    +
    + +
    +
    +
    +

    Introduction

    +
    +
    +

    The RegExp Library is designed for flexibility; it allows mixing and +matching of different front-end syntax with back-end engines, as well +as supporting arbitrary input sources. +This flexibility, however, comes at the cost of making some of the +simple applications a bit less obvious. This tutorial shows how the +RegExp Library can be used in a variety of common applications.

    +
    +
    +
    +
    +

    Assembling an Regular Expression Matcher

    +
    +
    +

    Before we can do anything else, we must assemble a regular-expression +matcher. For the purposes of this tutorial, we use a combination +of the AwkSyntax front-end and the BackTrackEngine back-end.

    +
    +
    +
    +
    structure RE = RegExpFn(
    +    structure P = AwkSyntax
    +    structure E = BackTrackEngine)
    +
    +
    +
    +
    +
    +

    Match trees

    +
    +
    +

    Regular expressions may contain grouping +operators. When a pattern matches a string, these groups induce a nested +tree structure on the matched string. +The MatchTree structure defines +a polymorphic representation of this structure, along with a +number of utility functions for extracting information from +a match.

    +
    +
    +
    +
    structure MT = MatchTree
    +
    +
    +
    +
    +
    +

    Example: scanning tokens

    +
    +
    +

    The match function in the REGEXP signature allows one to distinguish +between a set of possible regular expression matches. One application of +this mechanism is a simple scanner. Let us define a datatype for tokens, +which can be white space, numbers, or identifiers.

    +
    +
    +
    +
    datatype tok
    +  = WS | NUM of IntInf.int | ID of string
    +
    +
    +
    +

    We can then define the scanner as follows:

    +
    +
    +
    +
    fun scanner getc gets = let
    +      fun getMatch cons (MT.Match({pos, len}, _)) = cons (gets (pos, len))
    +      in
    +        RE.match [
    +            ("[ \t\n]+", getMatch (fn _ => WS)),
    +            ("[0-9]+", getMatch (fn s => NUM(valOf(IntInf.fromString s)))),
    +            ("[a-zA-Z][a-zA-Z0-9]*", getMatch ID)
    +          ] getc
    +      end
    +
    +
    +
    +

    Here the getc parameter is the standard character reader; we have also included +the gets parameter, which is a function of type

    +
    +
    +
    +
    'strm * int -> string
    +
    +
    +
    +

    for getting a string from a stream. For many input sources, the gets function +has an efficient and direct implementation, but it can also be implemented in +terms of the getc function as follows:

    +
    +
    +
    +
    fun gets getc (strm, n) = let
    +      fun getChrs (0, _, chrs) = String.implodeRev chrs
    +        | getChrs (n, strm, chrs) = (case getc strm
    +             of NONE => raise Fail "empty stream"
    +              | SOME(c, strm) => getChrs (n-1, strm, c::chrs)
    +            (* end case *))
    +      in
    +        getChrs (n, strm, [])
    +      end;
    +
    +
    +
    +

    Because this function is only called after the scanner function has matched +a sequence of n characters from strm, the "empty stream" case will not +occur for well behaving input streams.

    +
    +
    +

    Here is an example of using the scanner to tokenize strings, where we use the +Basis Library substring type to implement the stream type:

    +
    +
    +
    +
    fun tokens s = let
    +      fun gets (ss, n) = Substring.string(Substring.slice (ss, 0, SOME n))
    +      val scan = scanner Substring.getc gets
    +      fun lp (ss, toks) = (case scan ss
    +             of SOME(tok, ss') => lp (ss', tok::toks)
    +              | NONE => List.rev toks
    +            (* end case *))
    +      in
    +        lp (Substring.full s, [])
    +      end;
    +
    +
    +
    +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/SExp/sexp-lib.html b/doc/html/smlnj-lib/SExp/sexp-lib.html new file mode 100644 index 0000000..3b83043 --- /dev/null +++ b/doc/html/smlnj-lib/SExp/sexp-lib.html @@ -0,0 +1,213 @@ + + + + + + + + + + The SExp Library + + + + + + + + +
    +
    +
    +
    + +
    + +
    The SExp Library
    +
    +
    +
    + +
    +
    +
    +

    Overview

    +
    +
    +

    The SExp Library supports the reading and writing of structured data using +the S-expression syntax. It is a work in progress, and does not fully conform with +any formal S-exp specification.

    +
    +
    +
      +
    • +

      End-of-line comments begin with a semicolon (;)

      +
    • +
    • +

      An S-Expression is either an atomic token (boolean, number, symbol, or +string), a quoted expression, or a list of S-Expressions enclosed in brackets.

      +
    • +
    • +

      Quoted expressions are formed by the single-quote character (') followed +by an expression.

      +
    • +
    • +

      Lists are delimited by matched pairs of () [] or {}, nested freely.

      +
    • +
    • +

      List items are separated with whitespace (space, tab, newlines, or carriage +returns).

      +
    • +
    • +

      Symbols (or identifiers) begin with an initial character followed by +zero or more subsequent characters, where an initial character is +either a letter or one of the characters -+.@!$%&*/:<⇒?^_~ and +a subsequent character is either an initial character, a decimal digit, +or the character #.

      +
    • +
    • +

      Booleans are represented by the literals #f (false) and #t (true).

      +
    • +
    • +

      Numbers are either signed integers or floating-point numbers; the +sign (if present) is one of "'+'," “-," or "~”.

      +
    • +
    • +

      Integers may be specified in decimal without any prefix, or in hexadecimal +with the prefix "0x". In hex, the value is assumed to be unsigned, so -255 +should be written "-0xff" rather than "0x-ff".

      +
    • +
    • +

      The format of a floating point number is described by the following +regular expression:

      +
      +
      +\[ \mathit{sign}^{?}\,\mathit{digit}^{+}\,\mathtt{.}\;\mathit{digit}^{+}\, + ([\mathtt{eE}]\,\mathit{sign}^{?}\,\mathit{digit}^{+})^{?}\] +
      +
      +
      +

      Notably, “1.” and “.1” are invalid and “1” is parsed as an +integer — floats must have a dot with digits +on both sides.

      +
      +
    • +
    • +

      Strings are sequences of ASCII characters enclosed in double quotes ("). +We follow the syntax of Scheme strings as described in +https://www.scheme.com/tspl4/grammar.html#./grammar:strings

      +
      +

      The difference between symbols and strings is that symbols are represented as +Atom.atom types, so equality comparisons are more efficient.

      +
      +
    • +
    +
    +
    +

    The original version of the library was written by Damon Wang +at the University of Chicago. It has since been modified and +maintained by John Reppy.

    +
    +
    +
    +
    +

    Contents

    +
    +
    +
    +
    structure SExp
    +
    +

    Defines the tree representation of S-expression data.

    +
    +
    structure SExpParser
    +
    +

    Implements an S-Expression parser.

    +
    +
    structure SExpPP
    +
    +

    Implements an S-Expression pretty-printer.

    +
    +
    structure SExpPrinter
    +
    +

    Implements an S-Expression printer that produces condensed +output without indentation or line breaks.

    +
    +
    +
    +
    +
    +
    +

    Usage

    +
    +
    +

    For SML/NJ, include $/sexp-lib.cm in your +CM file.

    +
    +
    +

    For use in MLton, include +$(SML_LIB)/smlnj-lib/SExp/sexp-lib.mlb in your MLB file.

    +
    +
    +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/SExp/str-SExp.html b/doc/html/smlnj-lib/SExp/str-SExp.html new file mode 100644 index 0000000..f39a1e4 --- /dev/null +++ b/doc/html/smlnj-lib/SExp/str-SExp.html @@ -0,0 +1,202 @@ + + + + + + + + + + The SExp structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The SExp structure
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    The SExp structure defines the tree representation of S-expression data.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    structure SExp
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    datatype value
    +  = SYMBOL of Atom.atom
    +  | BOOL of bool
    +  | INT of IntInf.int
    +  | FLOAT of real
    +  | STRING of string
    +  | QUOTE of value
    +  | LIST of value list
    +
    +val same : value * value -> bool
    +val compare : value * value -> order
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    datatype value = …​
    +
    +

    This datatype represents S-expression trees. The constuctors are +defined as follows:

    +
    +
    +
    +
    +
    SYMBOL of Atom.atom
    +
    +

    represents an identifier. We use the atom +type to represent these for fast equality testing.

    +
    +
    BOOL of bool
    +
    +

    represents a boolean literal.

    +
    +
    INT of IntInf.int
    +
    +

    represents an integer number.

    +
    +
    FLOAT of real
    +
    +

    represents a floating-point number.

    +
    +
    STRING of string
    +
    +

    represents a string value.

    +
    +
    QUOTE of value list
    +
    +

    represents a quoted value.

    +
    +
    LIST of value list
    +
    +

    represents a list of values.

    +
    +
    +
    +
    +
    +
    +
    val same : value * value -> bool
    +
    +

    same (se1, se2) compares se1 and se2 for equality and returns +true if, and only if, they are equal.

    +
    +
    val compares : value * value -> order
    +
    +

    same (se1, se2) compares se1 and se2 for their order.

    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/SExp/str-SExpPP.html b/doc/html/smlnj-lib/SExp/str-SExpPP.html new file mode 100644 index 0000000..6b1bb6f --- /dev/null +++ b/doc/html/smlnj-lib/SExp/str-SExpPP.html @@ -0,0 +1,145 @@ + + + + + + + + + + The SExpPP structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The SExpPP structure
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    The SExpPP structure implements a pretty printer for S-Expressions.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    structure SExpPP
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    val output : TextIOPP.stream * SExp.value -> unit
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    val output : TextIOPP.stream * SExp.value -> unit
    +
    +

    output (ppS, v) pretty prints the value using the specified +pretty-printing stream.

    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/SExp/str-SExpParser.html b/doc/html/smlnj-lib/SExp/str-SExpParser.html new file mode 100644 index 0000000..12a6003 --- /dev/null +++ b/doc/html/smlnj-lib/SExp/str-SExpParser.html @@ -0,0 +1,153 @@ + + + + + + + + + + The SExpParser structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The SExpParser structure
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    The SExpParser structure implements a parser for S-Expressions.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    structure SExpParser
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    val parse : TextIO.instream -> SExp.value list
    +
    +val parseFile : string -> SExp.value list
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    val parse : TextIO.instream -> SExp.value list
    +
    +

    parse inS parses a sequence of S-Expressions from the input +stream inS. The Fail +exception is raised if a syntax error is encountered.

    +
    +
    val parseFile : string -> SExp.value list
    +
    +

    parse f parses a sequence of S-Expressions from the file f. +The Fail +exception is raised if a syntax error is encountered.

    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/SExp/str-SExpPrinter.html b/doc/html/smlnj-lib/SExp/str-SExpPrinter.html new file mode 100644 index 0000000..0b0093e --- /dev/null +++ b/doc/html/smlnj-lib/SExp/str-SExpPrinter.html @@ -0,0 +1,147 @@ + + + + + + + + + + The SExpPrinter structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The SExpPrinter structure
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    The SExpPrinter structure implements a printer for S-Expressions. +This printer does not introduce any line breaks. For output that +is more readable, use the SExpPP module.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    structure SExpPrinter
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    val print : TextIO.outstream * SExp.value -> unit
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    val print : TextIO.outstream * SExp.value -> unit
    +
    +

    print (outS, v) prints the S-Expression value v to the +output stream outS.

    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/UUID/str-GenUUID.html b/doc/html/smlnj-lib/UUID/str-GenUUID.html new file mode 100644 index 0000000..ac689ba --- /dev/null +++ b/doc/html/smlnj-lib/UUID/str-GenUUID.html @@ -0,0 +1,143 @@ + + + + + + + + + + The GenUUID structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The GenUUID structure
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    The GenUUID structure implements a generator for random +(Variant 1, Type 4) UUIDs. The random number generator +is seeded by the current time.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    structure GenUUID
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    val new : unit -> UUID.t
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    val new : unit -> UUID.t
    +
    +

    new () generates a new Variant 1, Type 4 UUID.

    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/UUID/str-UUID.html b/doc/html/smlnj-lib/UUID/str-UUID.html new file mode 100644 index 0000000..72d13d6 --- /dev/null +++ b/doc/html/smlnj-lib/UUID/str-UUID.html @@ -0,0 +1,222 @@ + + + + + + + + + + The UUID structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The UUID structure
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    The UUID structure provides an implementation of + +UUIDs (Universally Unique IDentifiers). +UUIDs, which are also known as GUIDs (Globally Unique IDentifiers), +are sequences of 16-bytes.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    structure UUID
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    type t
    +
    +val null : t
    +
    +val compare : t * t -> order
    +
    +val same : t * t -> bool
    +
    +val hash : t -> word
    +
    +val toString : t -> string
    +
    +val fromString : string -> t option
    +
    +val toBytes : t -> Word8Vector.vector
    +
    +val fromBytes : Word8Vector.vector -> t
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    type t
    +
    +

    the abstract type of UUIDs.

    +
    +
    val null : t
    +
    +

    null is the all-zeros UUID

    +
    +
    val compare : t * t -> order
    +
    +

    compare (uuid1, uuid2) does a byte-wise comparison of the two +UUIDs and returns their order.

    +
    +
    val same : t * t -> bool
    +
    +

    same (uuid1, uuid2) does a byte-wise comparison of the two +UUIDs and returns true is they are equal and false otherwise.

    +
    +
    val hash : t -> word
    +
    +

    hash uuid returns a hash of the UUID.

    +
    +
    +
    +
    +
    +
    val toString : t -> string
    +
    +

    toString uuid formats uuid as a string of the form

    +
    +
    +
    +
    +
    +
    "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
    +
    +
    +
    +
    +
    where each "`x`" is a lower-case hexadecimal digit.  The first two digits
    +in the string correspond to the first byte, and so on.
    +
    +
    +
    +
    +
    val fromString : string -> t option
    +
    +

    fromString s converts the string s, which should be of the form returned +by toString to SOME uuid, where uuid is the UUID +denoted by the string. Leading whitespace is ignored. If the string does +not have the correct format, then NONE is returned.

    +
    +
    val toBytes : t -> Word8Vector.vector
    +
    +

    toBytes uuid returns the 16-element Word8Vector.vector value +that represents uuid.

    +
    +
    val fromBytes : Word8Vector.vector -> t
    +
    +

    fromBytes bytes takes a 16-element vector of bytes and converts it to +a UUID. The Size +exception is raised if the length of the vector is not exactly 16. +Otherwise, there is no validity chechking of the UUID (i.e., the variant +and type are not checked).

    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/UUID/uuid-lib.html b/doc/html/smlnj-lib/UUID/uuid-lib.html new file mode 100644 index 0000000..18cb7a2 --- /dev/null +++ b/doc/html/smlnj-lib/UUID/uuid-lib.html @@ -0,0 +1,139 @@ + + + + + + + + + + The UUID Library + + + + + + + + +
    +
    +
    +
    + +
    + +
    The UUID Library
    +
    +
    +
    + +
    +
    +
    +

    Overview

    +
    +
    +

    The UUID Library is a library for generating + +UUIDs (Universally Unique IDentifiers), which +are also known as GUIDs (Globally Unique IDentifiers). +It currently only supports generating Variant 1, Type 4 UUIDs, which +are random bit strings (these are the UUIDs generated by the NewGuid +function on Microsoft Windows).

    +
    +
    +

    The UUID Library was first included in Version 110.97 of SML/NJ.

    +
    +
    +
    +
    +

    Contents

    +
    +
    +
    +
    structure UUID
    +
    +

    This structure implements an abstract type for representing UUIDs.

    +
    +
    structure GenUUID
    +
    +

    This structure implements a generator for UUIDs.

    +
    +
    +
    +
    +
    +
    +

    Usage

    +
    +
    +

    For SML/NJ, include $/uuid-lib.cm in your +CM file.

    +
    +
    +

    For use in MLton, include +$(SML_LIB)/smlnj-lib/UUID/uuid-lib.mlb in your MLB file.

    +
    +
    +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Unix/str-UnixEnv.html b/doc/html/smlnj-lib/Unix/str-UnixEnv.html new file mode 100644 index 0000000..3f604fe --- /dev/null +++ b/doc/html/smlnj-lib/Unix/str-UnixEnv.html @@ -0,0 +1,200 @@ + + + + + + + + + + The UnixEnv structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The UnixEnv structure
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    The UnixEnv structure supports operations on the host process’s environment, +which is essentially a list of strings of the form “`name=value`”, where +the “=” character does not appear in name. We assume that environments +are "well formed;" i.e., that an environment variable is only defined once.

    +
    +
    + + + + + +
    +
    Warning
    +
    +
    +

    Binding the user’s environment as an SML value and then exporting the +SML heap image can result in incorrect behavior, since the environment bound +in the heap image may differ from the user’s environment when the exported +heap image is loaded.

    +
    +
    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature UNIX_ENV
    +structure UnixEnv : UNIX_ENV
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    val getFromEnv : (string * string list) -> string option
    +
    +val getValue : {name : string, default : string, env : string list} -> string
    +
    +val removeFromEnv : (string * string list) -> string list
    +
    +val addToEnv : (string * string list) -> string list
    +
    +val environ : unit -> string list
    +
    +val getEnv : string -> string option
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    val getFromEnv : (string * string list) -> string option
    +
    +

    getEnv (name, env) returns SOME v if (name, v) is in the environment +env. Otherwise, it returns NONE if name is not bound in env.

    +
    +
    val getValue : {name : string, default : string, env : string list} -> string
    +
    +

    getEnv {name, default, env} returns v if (name, v) is in the +environment env. Otherwise, it returns default if name is not +bound in env.

    +
    +
    val removeFromEnv : (string * string list) -> string list
    +
    +

    removeFromEnv (name, env) removes any binding of name from the +environment. Note that if env has multiple bindings of name +(i.e., env is not well formed), then only the first binding +is removed.

    +
    +
    val addToEnv : (string * string list) -> string list
    +
    +

    addToEnv (bind, env) adds the binding bind, which should be of the +form “`name=value`”, to the environment. If there was an +existing binding of name in env, then it will be replaced.

    +
    +
    val environ : unit -> string list
    +
    +

    env () returns the user’s (host process) environment.

    +
    +
    val getEnv : string -> string option
    +
    +

    getEnv name returns the binding of the environment variable name +in the user’s (host process) environment.

    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Unix/str-UnixPath.html b/doc/html/smlnj-lib/Unix/str-UnixPath.html new file mode 100644 index 0000000..d36b3b4 --- /dev/null +++ b/doc/html/smlnj-lib/Unix/str-UnixPath.html @@ -0,0 +1,203 @@ + + + + + + + + + + The UnixPath structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The UnixPath structure
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    The UnixPath structure provides support for searching for files +in the Unix file system using a list of possible locations.

    +
    +
    +

    Note that this module is largely superseded by the +PathUtil module +in the Util Library.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature UNIX_PATH
    +structure UnixPath : UNIX_PATH
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    type path_list = string list
    +
    +val getPath : unit -> path_list
    +
    +datatype access_mode = datatype OS.FileSys.access_mode
    +datatype file_type = F_REGULAR | F_DIR | F_SYMLINK | F_SOCK | F_CHR | F_BLK
    +
    +val findFile : (path_list * access_mode list) -> string -> string option
    +
    +val findFiles : (path_list * access_mode list) -> string -> string list
    +
    +val findFileOfType : (path_list * file_type * access_mode list) -> string -> string option
    +
    +val findFilesOfType : (path_list * file_type * access_mode list) -> string -> string list
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    type path_list = string list
    +
    +

    A list of file-system paths that is used to search for files.

    +
    +
    val getPath : unit -> path_list
    +
    +

    getPath () return’s the value of the user’s PATH shell variable +as a path_list.

    +
    +
    datatype access_mode = datatype OS.FileSys.access_mode
    +
    +

    Rebind the file-system access-mode constructors.

    +
    +
    datatype file_type = F_REGULAR | F_DIR | F_SYMLINK | F_SOCK | F_CHR | F_BLK
    +
    +

    The different types of file-system objects in Unix.

    +
    +
    val findFile : (path_list * access_mode list) -> string -> string option
    +
    +

    findFile (paths, mode) name returns SOME path, where path is a string of + the form "p/name" and p is the first string in paths + such that path has the given access modes (the empty list of access modes + is used to test for existence). If no such file exists, then NONE is returned.

    +
    +
    val findFiles : (path_list * access_mode list) -> string -> string list
    +
    +

    findFiles (paths, mode) name returns a list of strings, such that each string +s in the result has the form "p/name" with p in paths +and the file named by path has the specified access modes.

    +
    +
    val findFileOfType : (path_list * file_type * access_mode list) -> string -> string option
    +
    +

    findFileOfType (paths, ftype, mode) name returns the SOME path, where path is + a string of the form "p/name" and p is the first string + in paths such that path has the given access modes (the empty list of + access modes is used to test for existence) and is of the specified file type. + If no such file exists, then NONE is returned.

    +
    +
    val findFilesOfType : (path_list * file_type * access_mode list) -> string -> string list
    +
    +

    findFilesOfType (paths, mode) name returns a list of strings, +such that each string s in the result has the form "p/name" +with p in paths and the file named by path has the specified +access modes and is of the specified file type.

    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Unix/unix-lib.html b/doc/html/smlnj-lib/Unix/unix-lib.html new file mode 100644 index 0000000..eed4cbd --- /dev/null +++ b/doc/html/smlnj-lib/Unix/unix-lib.html @@ -0,0 +1,130 @@ + + + + + + + + + + The Unix Library + + + + + + + + +
    +
    +
    +
    + +
    + +
    The Unix Library
    +
    +
    +
    + +
    +
    +
    +

    Overview

    +
    +
    +

    The Unix Library provides some Unix-specific utilities.

    +
    +
    +
    +
    +

    Contents

    +
    +
    +
    +
    structure UnixEnv
    +
    +

    This structure provides support for querying the user’s environment.

    +
    +
    structure UnixPath
    +
    +

    This structure supports Unix-specific file-system searches.

    +
    +
    +
    +
    +
    +
    +

    Usage

    +
    +
    +

    For SML/NJ, include $/unix-lib.cm in your +CM file.

    +
    +
    +

    For use in MLton, include +$(SML_LIB)/smlnj-lib/Unix/unix-lib.mlb in your MLB file.

    +
    +
    +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/fun-ArrayQSortFn.html b/doc/html/smlnj-lib/Util/fun-ArrayQSortFn.html new file mode 100644 index 0000000..952df15 --- /dev/null +++ b/doc/html/smlnj-lib/Util/fun-ArrayQSortFn.html @@ -0,0 +1,241 @@ + + + + + + + + + + The ArrayQSortFn functor + + + + + + + + +
    +
    +
    +
    + +
    + +
    The ArrayQSortFn functor
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The ArrayQSortFn functor provides in situ sorting of monomorphic arrays +using the quicksort algorithm.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature MONO_ARRAY_SORT
    +functor ArrayQSortFn (A : MONO_ARRAY) : MONO_ARRAY_SORT
    +
    +
    +
    +
    +
    +

    Functor Argument Interface

    +
    +
    +
    +
    A : MONO_ARRAY
    +
    +
    +
    +
    +
    +

    Functor Argument Description

    +
    +
    +
    +
    A : MONO_ARRAY
    +
    +

    A structure that implements the +MONO_ARRAY signature from +the SML Basis Library.

    +
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    structure A : MONO_ARRAY
    +
    +val sort : (A.elem * A.elem -> order) -> A.array -> unit
    +
    +val sorted : (A.elem * A.elem -> order) -> A.array -> bool
    +
    +
    +
    +
    +
    +

    Interface Description

    +
    +
    +
    +
    structure A : MONO_ARRAY
    +
    +

    The argument structure.

    +
    +
    val sort : (A.elem * A.elem -> order) -> A.array -> unit
    +
    +

    sort cmp arr sorts the array arr into ascending order +according to the comparison function cmp.

    +
    +
    val sorted : (A.elem * A.elem -> order) -> A.array -> bool
    +
    +

    sorted cmp arr returns true if, and only if, the array arr= is +sorted in ascending order.

    +
    +
    +
    +
    +
    + +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/fun-BSearchFn.html b/doc/html/smlnj-lib/Util/fun-BSearchFn.html new file mode 100644 index 0000000..a40aed4 --- /dev/null +++ b/doc/html/smlnj-lib/Util/fun-BSearchFn.html @@ -0,0 +1,238 @@ + + + + + + + + + + The BSearchFn functor + + + + + + + + +
    +
    +
    +
    + +
    + +
    The BSearchFn functor
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The BSearchFn functor provides binary search on sorted monomorphic +arrays.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    functor BSearchFn (A : MONO_ARRAY)
    +
    +
    +
    +
    +
    +

    Functor Argument Interface

    +
    +
    +
    +
    A : MONO_ARRAY
    +
    +
    +
    +
    +
    +

    Functor Argument Description

    +
    +
    +
    +
    A : MONO_ARRAY
    +
    +

    A structure that implements the +MONO_ARRAY signature from +the SML Basis Library.

    +
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    structure A : MONO_ARRAY
    +
    +val bsearch : (('a * A.elem) -> order) -> ('a * A.array) -> (int * A.elem) option
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    structure A : MONO_ARRAY
    +
    +

    The array structure that defines the element and array types.

    +
    +
    val bsearch : (('a * A.elem) -> order) -> ('a * A.array) -> (int * A.elem) option
    +
    +

    bsearch cmp (key, arr) returns SOME(ix, elem) where A.sub(arr, ix) is +elem and cmp(key, elem) returns EQUAL; if no such element is present, then +NONE is returned. This function uses a binary search over the array, + which requires that the elements be arranged in increasing order by the cmp +function. Usually, the type of the search key will be A.elem, but the +interface allows some computation on the elements, as long as the ordering +is respected.

    +
    +
    +
    +
    +
    + +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/fun-BinaryMapFn.html b/doc/html/smlnj-lib/Util/fun-BinaryMapFn.html new file mode 100644 index 0000000..2a89398 --- /dev/null +++ b/doc/html/smlnj-lib/Util/fun-BinaryMapFn.html @@ -0,0 +1,188 @@ + + + + + + + + + + The BinaryMapFn functor + + + + + + + + +
    +
    +
    +
    + +
    + +
    The BinaryMapFn functor
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The BinaryMapFn functor provides a balanced-binary-tree implementation of the +ORD_MAP signature parameterized over the key type.

    +
    +
    +

    The original implementation was written by Stephen Adams and was based +on the paper Binary Search Trees of Bounded Balance +by Nievergelt and Reingold (SIAM Journal of Computing; March 1973).

    +
    +
    +

    It is recommended, however, that one use the RedBlackMapFn +functor instead of BinaryMapFn, since experimentation has shown it to be +faster across the board.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    functor BinaryMapFn (K : ORD_KEY) :> ORD_MAP where type Key.ord_key = K.ord_key
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/fun-BinarySetFn.html b/doc/html/smlnj-lib/Util/fun-BinarySetFn.html new file mode 100644 index 0000000..dad8da2 --- /dev/null +++ b/doc/html/smlnj-lib/Util/fun-BinarySetFn.html @@ -0,0 +1,188 @@ + + + + + + + + + + The BinarySetFn functor + + + + + + + + +
    +
    +
    +
    + +
    + +
    The BinarySetFn functor
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The BinarySetFn functor provides a balanced-binary-tree implementation of the +ORD_SET signature parameterized over the element type.

    +
    +
    +

    The original implementation was written by Stephen Adams and was based +on the paper Binary Search Trees of Bounded Balance +by Nievergelt and Reingold (SIAM Journal of Computing; March 1973).

    +
    +
    +

    It is recommended, however, that one use the RedBlackSetFn +functor instead of BinarySetFn, since experimentation has shown it to be +faster across the board.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    functor BinarySetFn (K : ORD_KEY) :> ORD_SET where type Key.ord_key = K.ord_key
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/fun-DynamicArrayFn.html b/doc/html/smlnj-lib/Util/fun-DynamicArrayFn.html new file mode 100644 index 0000000..dac7779 --- /dev/null +++ b/doc/html/smlnj-lib/Util/fun-DynamicArrayFn.html @@ -0,0 +1,323 @@ + + + + + + + + + + The DynamicArrayFn functor + + + + + + + + +
    +
    +
    +
    + +
    + +
    The DynamicArrayFn functor
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The DynamicArrayFn functor provides dynamically sized monomorphic +arrays. Each array has an associated default value, which is +covers those elements that have not been explicitly initialized +(conceptually, one can view an array as having an infinite size). +Thus, reads from indices above the bound will return the default value. +The bound of an array is the highest index of an initialized +element (or ~1 if there are no initialized elements). The +defined range of the array are the elements in the positions +indexed from zero to the bound.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature MONO_DYNAMIC_ARRAY
    +functor DynamicArrayFn (A : MONO_ARRAY) : MONO_DYNAMIC_ARRAY
    +
    +
    +
    +
    +
    +

    Functor Argument Interface

    +
    +
    +
    +
    A : MONO_ARRAY
    +
    +
    +
    +
    +
    +

    Functor Argument Description

    +
    +
    +
    +
    A : MONO_ARRAY
    +
    +

    A structure that implements the +MONO_ARRAY signature from +the SML Basis Library.

    +
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    type elem
    +type array
    +
    +val array : (int * elem) -> array
    +
    +val subArray : array * int * int -> array
    +
    +val fromList : elem list * elem -> array
    +val toList : array -> elem list
    +
    +val tabulate: int * (int -> elem) * elem -> array
    +
    +val default : array -> elem
    +
    +val sub : array * int -> elem
    +
    +val update : array * int * elem -> unit
    +
    +val bound : array -> int
    +
    +val truncate : array * int -> unit
    +
    +
    +
    +
    +
    +

    Interface Description

    +
    +
    +
    +
    type elem
    +
    +

    The type of array elements.

    +
    +
    type array
    +
    +

    The type of dynamic arrays.

    +
    +
    val array : (int * elem) -> array
    +
    +

    array (sz, dflt) returns a new array with bound ~1 and default +value dflt. The sz argument, which must be non-negative, is used +as a hint of the potential range of indices. This function raises +the Size +exception if sz < 0.

    +
    +
    val subArray : array * int * int -> array
    +
    +

    subArray (arr, lo, hi) returns a new array with the same default +as arr, and whose values in the range [0, hi-lo] are equal to +the values in arr in the range [lo, hi]. This function raises +the Size +exception if lo < 0 or hi < lo-1.

    +
    +
    val fromList : elem list * elem -> array
    +
    +

    fromList (lst, dflt) returns a new array created from the elements +of lst and with default value dflt. The bound of the array will be +length lst - 1.

    +
    +
    val toList : 'a array -> 'a list
    +
    +

    toList arr returns a list of the array’s contents. The resulting +list will have the array’s bound plus one elements.

    +
    +
    val tabulate: int * (int -> elem) * elem -> array
    +
    +

    tabulate (sz, init, dflt) returns a new array with the first +sz elements initialized using the function init and the +default value dflt. This function raises the +Size +exception if sz < 0.

    +
    +
    val default : array -> elem
    +
    +

    default arr returns the default value for the array.

    +
    +
    val sub : array * int -> elem
    +
    +

    sub (arr, ix) returns the value of the array at index ix. +If that value has not been explicitly set, then it returns the array’s +default value. This function raises the +Subscript +exception if ix < 0.

    +
    +
    val update : array * int * elem -> unit
    +
    +

    update (arr, ix, v) sets the value at index ix of the array to v. +If ix is greater than the current bound of the array, then the bound +is set to ix. This function raises the +Subscript +exception if ix < 0.

    +
    +
    val bound : array -> int
    +
    +

    bound arr returns the current bound of the array, which is the highest +index that has been explicitly set (e.g., by update).

    +
    +
    val truncate : array * int -> unit
    +
    +

    truncate (arr, sz) sets every entry with index greater or equal to +sz to the array’s default value.

    +
    +
    +
    +
    +
    + +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/fun-GraphSCCFn.html b/doc/html/smlnj-lib/Util/fun-GraphSCCFn.html new file mode 100644 index 0000000..aac1450 --- /dev/null +++ b/doc/html/smlnj-lib/Util/fun-GraphSCCFn.html @@ -0,0 +1,260 @@ + + + + + + + + + + The GraphSCCFn functor + + + + + + + + +
    +
    +
    +
    + +
    + +
    The GraphSCCFn functor
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The GraphSCCFn functor implements an algorithm for +calculating the strongly-connected components of a directed graph. +The resulting components are topologically-sorted; i.e., if a component +A comes before a component B in the result, then there is no +path from B to A (but there might be a path from A to B).

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    functor GraphSCCFn (Nd: ORD_KEY) :> GRAPH_SCC where Nd = Nd
    +
    +
    +
    +
    +
    +

    Arguments

    +
    +
    +
    +
    Nd: ORD_KEY
    +
    +
    +
    +
      +
    • +

      Nd : ORD_KEY:: +The argument structure Nd defines the type of graph nodes paired with +a comparison function that is used by the algorithm to implement finite +maps keyed by nodes.

      +
    • +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    structure Nd : ORD_KEY
    +
    +type node = Nd.ord_key
    +
    +datatype component
    +  = SIMPLE of node
    +  | RECURSIVE of node list
    +
    +val topOrder' : { roots: node list, follow: node \-> node list } -> component list
    +
    +val topOrder : { root: node, follow: node \-> node list } -> component list
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    structure Nd : ORD_KEY
    +
    +

    The argument structure.

    +
    +
    type node = Nd.ord_key
    +
    +

    The type of a node in the graph.

    +
    +
    datatype component
    +
    +

    The type of a component in the result. Components are either +SIMPLE, consisting of a single node, or RECURSIVE, consisting +of a list of nodes that are all connected by cyclic paths. +A single node with a self loop is represented by the RECURSIVE +constructor.

    +
    +
    val topOrder': { roots: node list, follow: node -> node list } -> component list
    +
    +

    topOrder {roots, follow}` returns a topologically-sorted list of +strongly-connected components for a directed graph. The graph is specified +by a list of root nodes and a follow (or successor) function that returns +the list of successors for a node. The first component in the result will +contain the first node in the roots list.

    +
    +
    val topOrder : { root: node, follow: node -> node list } -> component list
    +
    +

    topOrder {root, follow} is equivalent to the expression

    +
    +
    +
    +
    +
    +
    topOrder' {roots = [root], follow = follow}
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/fun-Hash2TableFn.html b/doc/html/smlnj-lib/Util/fun-Hash2TableFn.html new file mode 100644 index 0000000..52a5a0f --- /dev/null +++ b/doc/html/smlnj-lib/Util/fun-Hash2TableFn.html @@ -0,0 +1,439 @@ + + + + + + + + + + The Hash2TableFn functor + + + + + + + + +
    +
    +
    +
    + +
    + +
    The Hash2TableFn functor
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The Hash2TableFn functor provides hash tables that are keyed by two +different key types. Items are inserted with two keys, either of +which may be used to lookup the item. Essentially, it is a pair of +hash tables that are kept synchronized.

    +
    +
    +

    The tables are implemented as an array of buckets, which are +lists of key-value pairs. The number of buckets grows with the number +of table entries.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    functor Hash2TableFn (
    +    structure Key1 : HASH_KEY
    +    structure Key2 : HASH_KEY
    +  ) : MONO_HASH2_TABLE
    +
    +
    +
    +
    +
    +

    Functor Argument Interface

    +
    +
    +
    +
    structure Key1 : HASH_KEY
    +structure Key2 : HASH_KEY
    +
    +
    +
    +
    +
    +

    Functor Argument Description

    +
    +
    +
    +
    structure Key1 : HASH_KEY
    +
    +

    The argument structure that specifies the first key type +with its hashing and equality functions.

    +
    +
    structure Key2 : HASH_KEY
    +
    +

    The substructure that specifies the second key type +with its hashing and equality functions.

    +
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    structure Key1 : HASH_KEY
    +structure Key2 : HASH_KEY
    +
    +type 'a hash_table
    +
    +val mkTable : (int * exn) -> 'a hash_table
    +
    +val clear : 'a hash_table -> unit
    +
    +val insert : 'a hash_table -> (Key1.hash_key * Key2.hash_key * 'a) -> unit
    +
    +val inDomain1 : 'a hash_table -> Key1.hash_key -> bool
    +val inDomain2 : 'a hash_table -> Key2.hash_key -> bool
    +
    +val lookup1 : 'a hash_table -> Key1.hash_key -> 'a
    +val lookup2 : 'a hash_table -> Key2.hash_key -> 'a
    +
    +val find1 : 'a hash_table -> Key1.hash_key -> 'a option
    +val find2 : 'a hash_table -> Key2.hash_key -> 'a option
    +
    +val remove1 : 'a hash_table -> Key1.hash_key -> 'a
    +val remove2 : 'a hash_table -> Key2.hash_key -> 'a
    +
    +val numItems : 'a hash_table ->  int
    +
    +val listItems  : 'a hash_table -> 'a list
    +val listItemsi : 'a hash_table -> (Key1.hash_key * Key2.hash_key * 'a) list
    +
    +val app  : ('a -> unit) -> 'a hash_table -> unit
    +val appi : ((Key1.hash_key * Key2.hash_key * 'a) -> unit) -> 'a hash_table
    +	    -> unit
    +
    +val map  : ('a -> 'b) -> 'a hash_table -> 'b hash_table
    +val mapi : ((Key1.hash_key * Key2.hash_key * 'a) -> 'b) -> 'a hash_table
    +	    -> 'b hash_table
    +
    +val fold  : (('a * 'b) -> 'b) -> 'b -> 'a hash_table -> 'b
    +val foldi : ((Key1.hash_key * Key2.hash_key * 'a * 'b) -> 'b) -> 'b
    +
    +val filter  : ('a -> bool) -> 'a hash_table -> unit
    +val filteri : ((Key1.hash_key * Key2.hash_key * 'a) -> bool) -> 'a hash_table
    +	    -> unit
    +
    +val copy : 'a hash_table -> 'a hash_table
    +
    +val bucketSizes : 'a hash_table -> (int list * int list)
    +
    +
    +
    +
    +
    +

    Interface Description

    +
    +
    +
    +
    structure Key1 : HASH_KEY
    +
    +

    The substructure that specifies the first key type.

    +
    +
    structure Key2 : HASH_KEY
    +
    +

    The substructure that specifies the second key type.

    +
    +
    type 'a hash_table
    +
    +

    The type of imperative hash tables indexed by the key types.

    +
    +
    val mkTable : (int * exn) -> 'a hash_table
    +
    +

    mkTable (n, ex) creates a new hash table; the table will be initially +sized to hold at least n items. The exception ex is raised by the +lookup and remove functions (described below) +when the search key is not in the domain.

    +
    +
    val clear : 'a hash_table -> unit
    +
    +

    clear tbl removes all of the entries in the table.

    +
    +
    val insert : 'a hash_table -> (Key1.hash_key * Key2.hash_key * 'a) -> unit
    +
    +

    insert tbl (key1, key2, item) inserts a mappings from key1 and key2 +to item into tbl. Any existing mapping of the keys is discarded.

    +
    +
    val inDomain1 : 'a hash_table -> Key1.hash_key -> bool
    +
    +

    inDomain1 tbl key returns true if, and only if, key is in the +first domain of the table

    +
    +
    val inDomain2 : 'a hash_table -> Key2.hash_key -> bool
    +
    +

    inDomain2 tbl key returns true if, and only if, key is in the +second domain of the table

    +
    +
    +
    +
    +
    +
    val lookup1 : 'a hash_table -> Key1.hash_key -> 'a
    +
    +

    lookup1 tbl key returns the item that key maps to if key is in +the first mapping of tbl. Otherwise, the table’s exception is raised.

    +
    +
    +
    +
    +
    +
    val lookup2 : 'a hash_table -> Key2.hash_key -> 'a
    +
    +

    lookup2 tbl key returns the item that key maps to if key is in +the second mapping of tbl. Otherwise, the table’s exception is raised.

    +
    +
    val find1 : 'a hash_table -> Key1.hash_key -> 'a option
    +
    +

    find1 tbl key returns the SOME v if key is in the first domain +of tbl and is mapped to v. Otherwise, it returns NONE.

    +
    +
    val find2 : 'a hash_table -> Key2.hash_key -> 'a option
    +
    +

    find2 tbl key returns the SOME v if key is in the second domain +of tbl and is mapped to v. Otherwise, it returns NONE.

    +
    +
    +
    +
    +
    +
    val remove1 : 'a hash_table -> Key1.hash_key -> 'a
    +
    +

    remove1 tbl key1 returns the item that key1 maps to if key1 is in +the first mapping of tbl. Furthermore, if the item was inserted with +keys key1 and key2, then key1 is removed from the first mapping +and key2 is removed from the second mapping. If key1 is not in the +first domain of the table, then the table’s exception is raised.

    +
    +
    +
    +
    +
    +
    val remove2 : 'a hash_table -> Key2.hash_key -> 'a
    +
    +

    remove2 tbl key2 returns the item that key2 maps to if key2 is in +the second mapping of tbl. Furthermore, if the item was inserted with +keys key1 and key2, then key1 is removed from the first mapping +and key2 is removed from the second mapping. If key2 is not in the +second domain of the table, then the table’s exception is raised.

    +
    +
    val numItems : 'a hash_table -> int
    +
    +

    numItems tbl returns the number of entries in the table.

    +
    +
    val listItems : 'a hash_table -> 'a list
    +
    +

    listItems tbl returns a list of the items in the range of tbl.

    +
    +
    val listItemsi : 'a hash_table -> (Key1.hash_key * Key2.hash_key * 'a) list
    +
    +

    listItemsi tbl returns a list of the (key1, key2, item) triples that +are in tbl.

    +
    +
    val app : ('a -> unit) -> 'a hash_table -> unit
    +
    +

    app f tbl applies the function f to each item in tbl.

    +
    +
    val appi : ((Key1.hash_key * Key2.hash_key * 'a) -> unit) -> 'a hash_table
    +
    +

    appi f tbl applies the function f to each (key1, key2, item) triple in tbl.

    +
    +
    val map : ('a -> 'b) -> 'a hash_table -> 'b hash_table
    +
    +

    map f tbl creates a new table with an entry (key1, key2, f item) +in the new table for every (key1, key2, item) triple in tbl. +The exception for the new table is copied from tbl.

    +
    +
    val mapi : ((Key1.hash_key * Key2.hash_key * 'a) -> 'b) -> 'a hash_table -> 'b hash_table
    +
    +

    mapi f tbl creates a new table with an entry (key1, key2, f(key1, key2, item)) +in the new table for every (key1, key2, item) triple in tbl. +The exception for the new table is copied from tbl.

    +
    +
    val fold : (('a * 'b) -> 'b) -> 'b -> 'a hash_table -> 'b
    +
    +

    fold f init tbl folds the function f over the items in the range of tbl +using init as an initial value.

    +
    +
    val foldi : ((Key1.hash_key * Key2.hash_key * 'a * 'b) -> 'b) -> 'b -> 'a hash_table -> 'b
    +
    +

    foldi f init tbl folds the function f over the (key1, key2, item) +triples in tbl using init as an initial value.

    +
    +
    val filter : ('a -> bool) -> 'a hash_table -> unit
    +
    +

    filter pred tbl removes any entry (key1, key2, item) from tbl for which +pred item returns false.

    +
    +
    val filteri : ((Key1.hash_key * Key2.hash_key * 'a) -> bool) -> 'a hash_table -> unit
    +
    +

    filteri pred tbl removes any entry (key1, key2, item) from tbl for which +pred(key1, key2, item) returns false.

    +
    +
    val copy : 'a hash_table -> 'a hash_table
    +
    +

    copy tbl creates a copy of tbl. This expression is equivalent to

    +
    +
    +
    map (fn x => x) tbl
    +
    +
    +
    +
    val bucketSizes : 'a hash_table -> (int list * int list)
    +
    +

    bucketSizes tbl returns a list of the current number of items per +bucket for each of the tables. This function allows users to gauge +the quality of their hashing functions.

    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/fun-HashSetFn.html b/doc/html/smlnj-lib/Util/fun-HashSetFn.html new file mode 100644 index 0000000..d7fac60 --- /dev/null +++ b/doc/html/smlnj-lib/Util/fun-HashSetFn.html @@ -0,0 +1,441 @@ + + + + + + + + + + The HashSetFn functor + + + + + + + + +
    +
    +
    +
    + +
    + +
    The HashSetFn functor
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The HashSetFn functor provides a hash-table-based implementation +of imperative sets parameterized over a Key structure.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature MONO_HASH_SET
    +functor HashSetFn (Key : HASH_KEY) : MONO_HASH_SET
    +
    +
    +
    +
    +
    +

    Functor Argument Interface

    +
    +
    +
    +
    Key : HASH_KEY
    +
    +
    +
    +
    +
    +

    Functor Argument Description

    +
    +
    +
    +
    Key : HASH_KEY
    +
    +

    A structure that implements the HASH_KEY` +signature, where Key.hash_key +will be the type of the elements in the hash set.

    +
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    structure Key : HASH_KEY
    +
    +type item = Key.hash_key
    +type set
    +
    +val mkEmpty : int -> set
    +
    +val mkSingleton : item -> set
    +
    +val mkFromList : item list -> set
    +
    +val toList : set -> item list
    +
    +val add  : set * item -> unit
    +val addc : set -> item -> unit
    +
    +val addList : set * item list -> unit
    +
    +val subtract  : set * item -> unit
    +val subtractc : set -> item -> unit
    +
    +val subtractList : set * item list -> unit
    +
    +val delete : set * item -> bool
    +
    +val member : set * item -> bool
    +
    +val isEmpty : set -> bool
    +
    +val isSubset : (set * set) -> bool
    +
    +val numItems : set ->  int
    +
    +val map : (item -> item) -> set -> set
    +val mapPartial : (item -> item option) -> set -> set
    +val app : (item -> unit) -> set -> unit
    +val fold : (item * 'b -> 'b) -> 'b -> set -> 'b
    +
    +val partition : (item -> bool) -> set -> (set * set)
    +
    +val filter : (item -> bool) -> set -> unit
    +
    +val exists : (item -> bool) -> set -> bool
    +val all : (item -> bool) -> set -> bool
    +
    +val find : (item -> bool) -> set -> item option
    +
    +val listItems : set -> item list
    +val without : set * item -> unit
    +
    +
    +
    +
    +
    +

    Interface Description

    +
    +
    +
    +
    structure Key : HASH_KEY
    +
    +

    This substructure is the argument structure, which defines the type +of set elements, and hash and equality functions on the key type.

    +
    +
    type item = Key.hash_key
    +
    +

    The type of items in the sets.

    +
    +
    type set
    +
    +

    The type of imperative sets of items.

    +
    +
    val mkEmpty : int -> set
    +
    +

    mkEmpty n creates an empty set that has initial space to store +at least n items.

    +
    +
    val mkSingleton : item -> set
    +
    +

    mkSingleton item creates a set with item as its only initial element.

    +
    +
    val mkFromList : item list -> set
    +
    +

    mkFromList items creates a set with items as its initial elements.

    +
    +
    +
    +
    +
    +
    val toList : set -> item list
    +
    +

    toList set returns a list of the items in set.

    +
    +
    val add : set * item -> unit
    +
    +

    add (set, item) destructively adds the item to the set.

    +
    +
    val addc : set -> item -> unit
    +
    +

    addc set item destructively adds the item to the set.

    +
    +
    val addList : set * item list -> unit
    +
    +

    addList (set, items) destructively adds the list of items to the set.

    +
    +
    +
    +
    +
    +
    val subtract : set * item -> unit
    +
    +

    subtract (set, item) removes the object item from set; it has no +effect if item is not in set.

    +
    +
    val subtractc : set -> item -> unit
    +
    +

    subtractc set item removes the object item from set; it has no +effect if item is not in set.

    +
    +
    val subtractList : set -> item list -> unit
    +
    +

    subtractList set items removes the items from set. This expression +is equivalent to

    +
    +
    +
    List.app (subtractc set) items
    +
    +
    +
    +
    val delete : set * item -> bool
    +
    +

    subtract (set, item) removes the object item from set (if present) +and returns true if the item was removed and false if it was not +present.

    +
    +
    val member : set * item -> bool
    +
    +

    member (item, set) returns true if, and only if, item +is an element of set.

    +
    +
    val isEmpty : set -> bool
    +
    +

    isEmpty set returns true if, and only if, set is empty.

    +
    +
    val isSubset : (set * set) -> bool
    +
    +

    isSubset (set1, set2) returns true if, and only if, set1 +is a subset of set2 (i.e., any element of set1 is an +element of set2).

    +
    +
    val numItems : set -> int
    +
    +

    numItems set returns the number of items in the set.

    +
    +
    val map : (item -> item) -> set -> set
    +
    +

    map f set creates a new set from the result of applying the +function f to the elements of set. This expression is +equivalent to

    +
    +
    +
    mkFromList (List.map f (toList set))
    +
    +
    +
    +
    val mapPartial : (item -> item option) -> set -> set
    +
    +

    mapPartial f set creates a new set from the result of applying the +partial function f to the elements of set. This expression is +equivalent to

    +
    +
    +
    mkFromList (List.mapPartial f (toList set))
    +
    +
    +
    +
    val app : (item -> unit) -> set -> unit
    +
    +

    app f set applies the function f to the items in set.

    +
    +
    val fold : (item * 'b -> 'b) -> 'b -> set -> 'b
    +
    +

    foldl f init set folds the function f over the items in +set using init as the initial value.

    +
    +
    val partition : (item -> bool) -> set -> (set * set)
    +
    +

    partition pred set returns a pair of disjoint sets (tSet, fSet), + where the predicate pred returns true for every element of tSet, +false for every element of fSet, and set is the union of tSet + and fSet.

    +
    +
    val filter : (item -> bool) -> set -> unit
    +
    +

    filter pred set removes any elements of set for which the +predicate pred returns false.

    +
    +
    val exists : (item -> bool) -> set -> bool
    +
    +

    all pred set returns true if, and only if, pred item returns +true for all elements item in set. Elements are checked in +an undefined order.

    +
    +
    val all : (item -> bool) -> set -> bool
    +
    +

    exists pred set returns true if, and only if, there exists an +element item in set such that pred item returns true. +Elements are checked in an undefined order.

    +
    +
    val find : (item -> bool) -> set -> item option
    +
    +

    find pred set returns SOME item if there exists an object item +in the set for which pred item returns true; otherwise NONE is +returned. Items are tested in an undefined order.

    +
    +
    +
    +
    +

    Deprecated functions

    +
    +
    +
    val without : set * item -> unit
    +
    +

    Use subtract instead.

    +
    +
    val listItems : set -> item list
    +
    +

    Use toList instead.

    +
    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/fun-HashTableFn.html b/doc/html/smlnj-lib/Util/fun-HashTableFn.html new file mode 100644 index 0000000..b3c8b7b --- /dev/null +++ b/doc/html/smlnj-lib/Util/fun-HashTableFn.html @@ -0,0 +1,184 @@ + + + + + + + + + + The HashTableFn functor + + + + + + + + +
    +
    +
    +
    + +
    + +
    The HashTableFn functor
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The HashTableFn functor provides an implementation of the +MONO_HASH_TABLE signature parameterized +over the key type.

    +
    +
    +

    The tables are implemented as an array of buckets, which are +lists of key-value pairs. The number of buckets grows with the number +of table entries.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    functor HashTableFn (Key : HASH_KEY) : MONO_HASH_TABLE
    +
    +
    +
    +
    + +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/fun-IntervalSetFn.html b/doc/html/smlnj-lib/Util/fun-IntervalSetFn.html new file mode 100644 index 0000000..5405bb9 --- /dev/null +++ b/doc/html/smlnj-lib/Util/fun-IntervalSetFn.html @@ -0,0 +1,504 @@ + + + + + + + + + + The IntervalSetFn functor + + + + + + + + +
    +
    +
    +
    + +
    + +
    The IntervalSetFn functor
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The IntervalSetFn functor provides sets over a discrete ordered domain, +where the sets are represented by intervals. It is meant for representing +dense sets (e.g., unicode character classes).

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature INTERVAL_SET
    +functor IntervalSetFn (D : INTERVAL_DOMAIN) : INTERVAL_SET
    +
    +
    +
    +
    +
    +

    Functor Argument Description

    +
    +
    +
    +
    D : INTERVAL_DOMAIN
    +
    +

    The argument defines the type of points in the domain and +their order structure.

    +
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    structure D : INTERVAL_DOMAIN
    +
    +type item = D.point
    +type interval = (item * item)
    +type set
    +
    +val empty : set
    +val universe : set
    +
    +val singleton : item -> set
    +
    +val interval : item * item -> set
    +
    +val isEmpty : set -> bool
    +val isUniverse : set -> bool
    +
    +val member : set * item -> bool
    +
    +val items : set -> item list
    +
    +val intervals : set -> interval list
    +
    +val add : set * item -> set
    +val add' : item * set -> set
    +
    +val addInt : set * interval -> set
    +val addInt' : interval * set -> set
    +
    +val complement : set -> set
    +val union : (set * set) -> set
    +val intersect : (set * set) -> set
    +val difference : (set * set) -> set
    +
    +val app    : (item -> unit) -> set -> unit
    +val foldl  : (item * 'a -> 'a) -> 'a -> set -> 'a
    +val foldr  : (item * 'a -> 'a) -> 'a -> set -> 'a
    +val filter : (item -> bool) -> set -> set
    +val exists : (item -> bool) -> set -> bool
    +val all    : (item -> bool) -> set -> bool
    +
    +val appInt    : (interval -> unit) -> set -> unit
    +val foldlInt  : (interval * 'a -> 'a) -> 'a -> set -> 'a
    +val foldrInt  : (interval * 'a -> 'a) -> 'a -> set -> 'a
    +val filterInt : (interval -> bool) -> set -> set
    +val existsInt : (interval -> bool) -> set -> bool
    +val allInt    : (interval -> bool) -> set -> bool
    +
    +val compare : set * set -> order
    +val isSubset : set * set -> bool
    +
    +
    +
    +
    +
    +

    Interface Description

    +
    +
    +
    +
    structure D : INTERVAL_DOMAIN
    +
    +

    The argument structure.

    +
    +
    type item = D.point
    +
    +

    The type of items in the set.

    +
    +
    type interval = (item * item)
    +
    +

    A collection of items defined by an interval.

    +
    +
    type set
    +
    +

    The type of a set of items.

    +
    +
    val empty : set
    +
    +

    The empty set.

    +
    +
    val universe : set
    +
    +

    The set of all elements in the domain, which is specified as the +interval (D.minPt, D.maxPt).

    +
    +
    val singleton : item -> set
    +
    +

    singleton item returns the singleton set containing item.

    +
    +
    val fromList : item list -> set
    +
    +

    fromList items returns the set containing the list of items.

    +
    +
    val interval : item * item -> set
    +
    +

    singleton (pt1, pt2) returns a set containing the items between +the items pt1 and pt2 (as ordered by +D.compare). +This expression raises the +Domain exception +if D.compare(pt1, pt2) = GREATER.

    +
    +
    val isEmpty : set -> bool
    +
    +

    isEmpty set returns true if, and only if, set is empty.

    +
    +
    val isUniverse : set -> bool
    +
    +

    isUniverse set returns true if, and only if, set contains all of +the elements of the domain.

    +
    +
    val member : set * item -> bool
    +
    +

    isEmpty (set, item) returns true if, and only if, item is contained +in set.

    +
    +
    +
    +
    +
    +
    val toList : set -> item list
    +
    +

    toList set returns a list of the items in set. The items will be +sorted in increasing order.

    +
    +
    val intervals : set -> interval list
    +
    +

    intervals set returns a list of disjoint intervals that represents +the set. The intervals will be sorted in increasing order.

    +
    +
    val add : set * item -> set
    +
    +

    add (set, item) adds item to set and returns the resulting set.

    +
    +
    val add' : item * set -> set
    +
    +

    add' (item, set) adds item to set and returns the resulting set.

    +
    +
    +
    (* add an interval to the set *)
    +
    +
    +
    +
    val addInt : set * interval -> set
    +
    +

    addInt (set, (pt1, pt2)) adds the items between the items pt1 and pt2 +(as ordered by D.compare) +to set. This expression raises the +Domain exception +if D.compare(pt1, pt2) = GREATER.

    +
    +
    val addInt' : interval * set -> set
    +
    +

    addInt' ((pt1, pt2), set) adds the items between the items pt1 and pt2 +(as ordered by D.compare) +to set. This expression raises the +Domain exception +if D.compare(pt1, pt2) = GREATER.

    +
    +
    val complement : set -> set
    +
    +

    complement set returns the complement of set (i.e., the set of +items from the universe that are not in set).

    +
    +
    val union : (set * set) -> set
    +
    +

    union (set1, set2) returns the union of set1 and set2; +(i.e., the set of items that are in set1 or in set2).

    +
    +
    val intersect : (set * set) -> set
    +
    +

    intersect (set1, set2) returns the intersection of set1 and set2; +(i.e., the set of items that are in both set1 and`set2`).

    +
    +
    val difference : (set * set) -> set
    +
    +

    difference (set1, set2) returns the set difference of set1 and set2; +(i.e., the set of items that are in set1, but not in set2).

    +
    +
    val app : (item -> unit) -> set -> unit
    +
    +

    app f set applies the function f to the items in set. +This expression is equivalent to

    +
    +
    +
    List.app f (toList set)
    +
    +
    +
    +
    val foldl : (item * 'a -> 'a) -> 'a -> set -> 'a
    +
    +

    foldl f init set folds the function f over the items in +set in increasing order using init as the initial value. +This expression is equivalent to

    +
    +
    +
    List.foldl f init (toList set)
    +
    +
    +
    +
    val foldr : (item * 'a -> 'a) -> 'a -> set -> 'a
    +
    +

    foldr f init set folds the function f over the items in +set in decreasing order using init as the initial value. +This expression is equivalent to

    +
    +
    +
    List.foldr f init (toList set)
    +
    +
    +
    +
    val filter : (item -> bool) -> set -> set
    +
    +

    filter pred set filters out any items of set for which the +predicate pred returns false.

    +
    +
    val exists : (item -> bool) -> set -> bool
    +
    +

    exists pred set returns true if, and only if, there is an item +in the set for which pred returns true. This function +short-circuits evaluation once an item is encountered for which +pred returns true.

    +
    +
    val all : (item -> bool) -> set -> bool
    +
    +

    all pred set returns true if, and only if, pred returns true +for all items in the set. This function short-circuits evaluation +once an item is encountered for which pred returns false.

    +
    +
    val appInt : (interval -> unit) -> set -> unit
    +
    +

    appInt f set applies the function f to the intervals in set. +This expression is equivalent to

    +
    +
    +
    List.app f (intervals set)
    +
    +
    +
    +
    val foldlInt : (interval * 'a -> 'a) -> 'a -> set -> 'a
    +
    +

    foldlInt f init set folds the function f over the intervals in +set in increasing order using init as the initial value. +This expression is equivalent to

    +
    +
    +
    List.foldl f init (intervals set)
    +
    +
    +
    +
    val foldrInt : (interval * 'a -> 'a) -> 'a -> set -> 'a
    +
    +

    foldrInt f init set folds the function f over the intervals in +set in decreasing order using init as the initial value. +This expression is equivalent to

    +
    +
    +
    List.foldr f init (intervals set)
    +
    +
    +
    +
    val filterInt : (interval -> bool) -> set -> set
    +
    +

    filterInt pred set filters out any intervals of set for which the +predicate pred returns false.

    +
    +
    val existsInt : (interval -> bool) -> set -> bool
    +
    +

    existsInt pred set returns true if, and only if, there is an interval +in the set for which pred returns true. This function +short-circuits evaluation once an interval is encountered for which +pred returns true.

    +
    +
    val allInt : (interval -> bool) -> set -> bool
    +
    +

    allInt pred set returns true if, and only if, pred returns true +for all of the intervals in the set. This function short-circuits evaluation +once an interval is encountered for which pred returns false.

    +
    +
    val compare : set * set -> order
    +
    +

    compare (set1, set2) returns the lexical order of +the two sets.

    +
    +
    val isSubset : set * set -> bool
    +
    +

    isSubset (set1, set2) returns true if, and only if, set1 +is a subset of set2 (i.e., any element of set1 is an +element of set2).

    +
    +
    +
    +
    +

    Deprecated functions

    +
    +

    The following functions are part of the interface, but have been +deprecated.

    +
    +
    +
    +
    val items : set -> item list`
    +
    +

    Use toList instead.

    +
    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/fun-KeywordFn.html b/doc/html/smlnj-lib/Util/fun-KeywordFn.html new file mode 100644 index 0000000..8516225 --- /dev/null +++ b/doc/html/smlnj-lib/Util/fun-KeywordFn.html @@ -0,0 +1,274 @@ + + + + + + + + + + The KeywordFn functor + + + + + + + + +
    +
    +
    +
    + +
    + +
    The KeywordFn functor
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The KeywordFn functor provides a simple way to support a table of keyword +(or reserved) identifiers in a scanner.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    functor KeywordFn ()
    +
    +
    +
    +
    +
    +

    Functor Argument Interface

    +
    +
    +
    +
    type token
    +type pos
    +val ident : (Atom.atom * pos * pos) -> token
    +val keywords : (string * ((pos * pos) -> token)) list
    +
    +
    +
    +
    +
    +

    Functor Argument Description

    +
    +
    +
    +
    type token
    +
    +

    The type of tokens in the scanner.

    +
    +
    type pos
    +
    +

    The type of source file positions used by the scanner (e.g., character +positions in the source file).

    +
    +
    +
    +
    +
    +
    val ident : (Atom.atom * pos * pos) -> token
    +
    +

    ident (id, pos, pos) is used to create an identifier token (i.e., non-keyword) + for the given string, and start and end file positions.

    +
    +
    +
    +
    +
    +
    val keywords : (string * pos * pos) -> token list
    +
    +

    A list of string-function pairs, where the strings are the keywords and +the functions are used to create the corresponding scanner tokens from +start and end file positions.

    +
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
        type token
    +    type pos
    +    val keyword : (string * pos * pos) -> token
    +
    +
    +
    +
    +
    +

    Interface Description

    +
    +
    +
    +
    type token
    +
    +

    The type of tokens in the scanner.

    +
    +
    type pos
    +
    +

    The type of source file positions used by the scanner.

    +
    +
    val keyword : (string * pos * pos) → token
    +
    +

    keyword (id, p1, p2) returns the token for the identifier id, +which is either one of the keyword tokens from the +keywords list or otherwise is an identifier +token created using the ident function.

    +
    +
    +
    +
    +
    +
    +

    Discussion

    +
    +
    +

    This functor was designed for the ml-yacc +scanner interface, where tokens contain their file position. It is not clear that +it adds much utility over just using the atom type, but +is maintained for backward compatibility.

    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/fun-LeftPriorityQFn.html b/doc/html/smlnj-lib/Util/fun-LeftPriorityQFn.html new file mode 100644 index 0000000..936df58 --- /dev/null +++ b/doc/html/smlnj-lib/Util/fun-LeftPriorityQFn.html @@ -0,0 +1,320 @@ + + + + + + + + + + The MONO_PRIORITYQ signature + + + + + + + + +
    +
    +
    +
    + +
    + +
    The MONO_PRIORITYQ signature
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The LeftPriorityQFn functor provides a functional implementation of priority +queues using leaftist heaps.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature PRIORITY
    +signature MONO_PRIORITYQ
    +functor LeftPriorityQFn (P : PRIORITY) : MONO_PRIORITYQ
    +
    +
    +
    +
    +
    +

    Functor Argument Interface

    +
    +
    +
    +
    type priority
    +val compare : (priority * priority) -> order
    +
    +type item
    +val priority : item -> priority
    +
    +
    +
    +
    +
    +

    Functor Argument Description

    +
    +
    +
    +
    type priority
    +
    +

    The abstract type of priority values.

    +
    +
    val compare : (priority * priority) -> order
    +
    +

    compare (pri1, pri2) returns the order of the two priority values.

    +
    +
    type item
    +
    +

    The type of items in the priority queue.

    +
    +
    val priority : item -> priority
    +
    +

    priority item returns the priority value for item.

    +
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    type item
    +type queue
    +
    +val empty : queue
    +
    +val singleton : item -> queue
    +
    +val fromList : item list -> queue
    +
    +val insert : (item * queue) -> queue
    +
    +val remove : queue -> (item * queue)
    +
    +val next : queue -> (item * queue) option
    +
    +val findAndRemove : queue * (item -> bool) -> (item * queue) option
    +
    +val delete : queue * (item -> bool) -> queue
    +
    +val merge : (queue * queue) -> queue
    +
    +val numItems : queue -> int
    +
    +val isEmpty : queue -> bool
    +
    +
    +
    +
    +
    +

    Interface Description

    +
    +
    +
    +
    type item
    +
    +

    The type of items in the priority queue.

    +
    +
    type queue
    +
    +

    The priority queue type.

    +
    +
    val empty : queue
    +
    +

    The empty priority queue.

    +
    +
    val singleton : item -> queue
    +
    +

    singleton item returns a queue containing just item.

    +
    +
    val fromList : item list -> queue
    +
    +

    fromList items returns a queue containing the items.

    +
    +
    val insert : (item * queue) -> queue
    +
    +

    insert (pq, item) returns the queue that is pq with item added.

    +
    +
    val remove : queue -> (item * queue)
    +
    +

    remove pq returns (item, pq'), where item is the highest priority item +in pq and pq' is the result of removing item from pq. This function +raises the Empty exception +when pq is empty.

    +
    +
    val next : queue -> (item * queue) option
    +
    +

    remove pq returns SOME(item, pq'), where item is the highest +priority item in pq and pq' is the result of removing item +from pq. If pq is empty, then NONE is returned.

    +
    +
    val findAndRemove : queue * (item → bool) → (item * queue) option
    +
    +

    findAndRemove (pq, pred) returns SOME(item, pq'), where item is the +highest priority item in pq such that pred item returns true, and +and pq' is the result of removing item from pq. If no such item +exists, then NONE is returned.

    +
    +
    val delete : queue * (item → bool) → queue
    +
    +

    delete (pq, pred) deletes any item in pq that satisfies the predicate +and returns the resulting queue.

    +
    +
    val merge : (queue * queue) -> queue
    +
    +

    merge (pq1, pq2) returns the priority queue formed by merging the items in +the two queues.

    +
    +
    val numItems : queue -> int
    +
    +

    numItems pq returns the number of items in pq.

    +
    +
    val isEmpty : queue -> bool
    +
    +

    isEmpty pq returns true if, and only if, pq is empty.

    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/fun-ListMapFn.html b/doc/html/smlnj-lib/Util/fun-ListMapFn.html new file mode 100644 index 0000000..19b0da1 --- /dev/null +++ b/doc/html/smlnj-lib/Util/fun-ListMapFn.html @@ -0,0 +1,181 @@ + + + + + + + + + + The ListMapFn functor + + + + + + + + +
    +
    +
    +
    + +
    + +
    The ListMapFn functor
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The ListMapFn functor provides a sorted-list implementation of the +ORD_MAP signature parameterized over the key type. +This implementation is light weight and fast for small domains, but +for larger applications, it is recommended that one use the +RedBlackMapFn functor instead.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    functor ListMapFn (K : ORD_KEY) :> ORD_MAP where type Key.ord_key = K.ord_key
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/fun-ListSetFn.html b/doc/html/smlnj-lib/Util/fun-ListSetFn.html new file mode 100644 index 0000000..48c5ec6 --- /dev/null +++ b/doc/html/smlnj-lib/Util/fun-ListSetFn.html @@ -0,0 +1,180 @@ + + + + + + + + + + The ListSetFn functor + + + + + + + + +
    +
    +
    +
    + +
    + +
    The ListSetFn functor
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The ListSetFn functor provides a sorted-list implementation of the +ORD_SET signature parameterized over the element type. +This implementation is light weight and fast for small sets, but +for larger applications, it is recommended that one use the +RedBlackSetFn functor instead.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    functor ListSetFn (K : ORD_KEY) :> ORD_SET where type Key.ord_key = K.ord_key
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/fun-MonoArrayFn.html b/doc/html/smlnj-lib/Util/fun-MonoArrayFn.html new file mode 100644 index 0000000..1a579ad --- /dev/null +++ b/doc/html/smlnj-lib/Util/fun-MonoArrayFn.html @@ -0,0 +1,192 @@ + + + + + + + + + + The MonoArrayFn functor + + + + + + + + +
    +
    +
    +
    + +
    + +
    The MonoArrayFn functor
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The MonoArrayFn functor allows easy construction of new monomorphic array +structures.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    functor MonoArrayFn (type elem) :> MONO_ARRAY where type elem = elem
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +

    This functor takes an element type and defines a structure that +matches the +MONO_ARRAY +signature from the SML Basis Library, +which can then be supplied to other functors that require such +an argument.

    +
    +
    +
    + +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/fun-RedBlackMapFn.html b/doc/html/smlnj-lib/Util/fun-RedBlackMapFn.html new file mode 100644 index 0000000..d44d5f0 --- /dev/null +++ b/doc/html/smlnj-lib/Util/fun-RedBlackMapFn.html @@ -0,0 +1,181 @@ + + + + + + + + + + The RedBlackMapFn functor + + + + + + + + +
    +
    +
    +
    + +
    + +
    The RedBlackMapFn functor
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The RedBlackMapFn functor provides a red-black-tree implementation of the +ORD_MAP signature parameterized over the key type.

    +
    +
    +

    This functor is the preferred implementation of finite maps over ordered +keys as it is faster than the other implementations.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    functor RedBlackMapFn (K : ORD_KEY) :> ORD_MAP where type Key.ord_key = K.ord_key
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/fun-RedBlackSetFn.html b/doc/html/smlnj-lib/Util/fun-RedBlackSetFn.html new file mode 100644 index 0000000..7357d80 --- /dev/null +++ b/doc/html/smlnj-lib/Util/fun-RedBlackSetFn.html @@ -0,0 +1,181 @@ + + + + + + + + + + The RedBlackSetFn functor + + + + + + + + +
    +
    +
    +
    + +
    + +
    The RedBlackSetFn functor
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The RedBlackSetFn functor provides a red-black-tree implementation of the +ORD_SET signature parameterized over the element type.

    +
    +
    +

    This functor is the preferred implementation of finite sets of ordered +elements as it is faster than the other implementations.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    functor RedBlackSetFn (K : ORD_KEY) :> ORD_SET where type Key.ord_key = K.ord_key
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/fun-SplayMapFn.html b/doc/html/smlnj-lib/Util/fun-SplayMapFn.html new file mode 100644 index 0000000..1cdac18 --- /dev/null +++ b/doc/html/smlnj-lib/Util/fun-SplayMapFn.html @@ -0,0 +1,183 @@ + + + + + + + + + + The SplayMapFn functor + + + + + + + + +
    +
    +
    +
    + +
    + +
    The SplayMapFn functor
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The SplayMapFn functor provides a splay-tree implementation of the +ORD_MAP signature parameterized over the key type.

    +
    +
    +

    It is recommended, however, that one use the RedBlackMapFn +functor instead of SplayMapFn, since experimentation has shown it to be +faster across the board.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    functor SplayMapFn (K : ORD_KEY) :> ORD_MAP where type Key.ord_key = K.ord_key
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/fun-SplaySetFn.html b/doc/html/smlnj-lib/Util/fun-SplaySetFn.html new file mode 100644 index 0000000..cc8978c --- /dev/null +++ b/doc/html/smlnj-lib/Util/fun-SplaySetFn.html @@ -0,0 +1,183 @@ + + + + + + + + + + The SplaySetFn functor + + + + + + + + +
    +
    +
    +
    + +
    + +
    The SplaySetFn functor
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The SplaySetFn functor provides a splay-tree implementation of the +ORD_SET signature parameterized over the element type.

    +
    +
    +

    It is recommended, however, that one use the RedBlackSetFn +functor instead of SplaySetFn, since experimentation has shown it to be +faster across the board.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    functor SplaySetFn (K : ORD_KEY) :> ORD_SET where type Key.ord_key = K.ord_key
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/sig-HASH_KEY.html b/doc/html/smlnj-lib/Util/sig-HASH_KEY.html new file mode 100644 index 0000000..18a0bbc --- /dev/null +++ b/doc/html/smlnj-lib/Util/sig-HASH_KEY.html @@ -0,0 +1,219 @@ + + + + + + + + + + The HASH_KEY signature + + + + + + + + +
    +
    +
    +
    + +
    + +
    The HASH_KEY signature
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The HASH_KEY signature describes a monomorphic type with an equality +test and hashing function. It is used as the argument signature for +the HashSetFn and HashTableFn +functors, and as a sub-structure signature in the +MONO_HASH_SET and +MONO_HASH_TABLE signatures.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature HASH_KEY
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    type hash_key
    +
    +val hashVal : hash_key -> word
    +
    +val sameKey : (hash_key * hash_key) -> bool
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    type hash_key
    +
    +

    The type of key values.

    +
    +
    val hashVal : hash_key -> word
    +
    +

    hashVal key returns a hash value for the key.

    +
    +
    val sameKey : (hash_key * hash_key) -> bool
    +
    +

    sameKey (key1, key2) returns true of two hash keys are equal. +Implementations of this signature should ensure that if +sameKey (key1, key2), then hashVal key1 = hashVal key2.

    +
    +
    +
    +
    +
    + +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/sig-INTERVAL_DOMAIN.html b/doc/html/smlnj-lib/Util/sig-INTERVAL_DOMAIN.html new file mode 100644 index 0000000..11a7ad9 --- /dev/null +++ b/doc/html/smlnj-lib/Util/sig-INTERVAL_DOMAIN.html @@ -0,0 +1,279 @@ + + + + + + + + + + The INTERVAL_DOMAIN signature + + + + + + + + +
    +
    +
    +
    + +
    + +
    The INTERVAL_DOMAIN signature
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The INTERVAL_DOMAIN signature defines a representation of an +abstract ordered domain. It is required that the domain +consist of discrete values that are totally orders and that there +be a minimum and maximum value. This signature is used as the +argument signature for the IntervalSetFn +functor.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature INTERVAL_DOMAIN
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    type point
    +
    +val compare : (point * point) -> order
    +
    +val succ : point -> point
    +val pred : point -> point
    +
    +val isSucc : (point * point) -> bool
    +
    +val minPt : point
    +val maxPt : point
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    type point
    +
    +

    The abstract type of elements in the ordered domain.

    +
    +
    +
    +
    +
    +
    val compare : (point * point) -> order
    +
    +

    compare (pt1, pt2) returns the relation between two points +in the domain.

    +
    +
    val succ : point -> point
    +
    +

    succ item returns the successor to item. If item is +the maximum element (maxPt), then maxPt is +returned.

    +
    +
    val pred : point -> point
    +
    +

    succ item returns the successor to item. If item is +the minimum element (minPt), then minPt is +returned.

    +
    +
    val isSucc : (point * point) -> bool
    +
    +

    isSucc (pt1, pt2) returns true if pt1 is the predecessor +of pt2 and pt2 is the successor of `pt1.

    +
    +
    +
    +
    +
    +
    val minPt : point
    +
    +

    The minimum point in the domain.

    +
    +
    +
    +
    +
    +
    val maxPt : point
    +
    +

    The maximum point in the domain.

    +
    +
    +
    +
    +
    +
    +

    Example

    +
    +
    +

    Here is an example of the 8-bit character type as an interval domain.

    +
    +
    +
    +
    structure CharDom : INTERVAL_DOMAIN =
    +  struct
    +    type point = char
    +    val compare = Char.compare
    +    fun succ #"\255" = #"\255"
    +      | succ c = chr(ord c + 1)
    +    fun pred #"\000" = #"\000"
    +      | pred c = chr(ord c - 1)
    +    fun isSucc (c1, c2) = (ord c1 + 1 = ord c2)
    +    val minPt = #"\000"
    +    val maxPt = #"\255"
    +  end
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/sig-MONO_HASH_TABLE.html b/doc/html/smlnj-lib/Util/sig-MONO_HASH_TABLE.html new file mode 100644 index 0000000..e9a4c8c --- /dev/null +++ b/doc/html/smlnj-lib/Util/sig-MONO_HASH_TABLE.html @@ -0,0 +1,435 @@ + + + + + + + + + + The MONO_HASH_TABLE signature + + + + + + + + +
    +
    +
    +
    + +
    + +
    The MONO_HASH_TABLE signature
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The MONO_HASH_TABLE signature defines an interface to imperative hash +tables with monomorphic keys. The SML/NJ Library provides two +specialized implementations of this signature, as well as a functor for +constructing additional implementations.

    +
    +
    +

    The tables are implemented as an array of buckets, which are +lists of key-value pairs. The number of buckets grows with the number +of table entries.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature MONO_HASH_TABLE
    +
    +structure AtomTable :> MONO_HASH_TABLE where type Key.hash_key = Atom.atom
    +structure IntHashTable :> MONO_HASH_TABLE where type Key.hash_key = int
    +structure WordHashTable :> MONO_HASH_TABLE where type Key.hash_key = word
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    structure Key : HASH_KEY
    +
    +type 'a hash_table
    +
    +val mkTable : (int * exn) -> 'a hash_table
    +
    +val clear : 'a hash_table -> unit
    +
    +val insert : 'a hash_table -> (Key.hash_key * 'a) -> unit
    +
    +val insertWith  : ('a * 'a -> 'a) -> 'a hash_table -> Key.hash_key * 'a -> unit
    +val insertWithi : (Key.hash_key * 'a * 'a -> 'a)
    +      -> 'a hash_table
    +      -> Key.hash_key * 'a
    +      -> unit
    +
    +val inDomain : 'a hash_table -> Key.hash_key -> bool
    +
    +val lookup : 'a hash_table -> Key.hash_key -> 'a
    +val find : 'a hash_table -> Key.hash_key -> 'a option
    +
    +val findAndRemove : 'a hash_table -> Key.hash_key -> 'a option
    +
    +val remove : 'a hash_table -> Key.hash_key -> 'a
    +
    +val numItems : 'a hash_table ->  int
    +
    +val listItems  : 'a hash_table -> 'a list
    +val listItemsi : 'a hash_table -> (Key.hash_key * 'a) list
    +
    +val app  : ('a -> unit) -> 'a hash_table -> unit
    +val appi : ((Key.hash_key * 'a) -> unit) -> 'a hash_table -> unit
    +
    +val map  : ('a -> 'b) -> 'a hash_table -> 'b hash_table
    +val mapi : ((Key.hash_key * 'a) -> 'b) -> 'a hash_table -> 'b hash_table
    +
    +val fold  : (('a * 'b) -> 'b) -> 'b -> 'a hash_table -> 'b
    +val foldi : ((Key.hash_key * 'a * 'b) -> 'b) -> 'b -> 'a hash_table -> 'b
    +
    +val modify  : ('a -> 'a) -> 'a hash_table -> unit
    +val modifyi : ((Key.hash_key * 'a) -> 'a) -> 'a hash_table -> unit
    +
    +val filter  : ('a -> bool) -> 'a hash_table -> unit
    +val filteri : ((Key.hash_key * 'a) -> bool) -> 'a hash_table -> unit
    +
    +val copy : 'a hash_table -> 'a hash_table
    +
    +val bucketSizes : 'a hash_table -> int list
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    structure Key : HASH_KEY
    +
    +

    This substructure defines the type of keys used to index the tables and +hash and equality functions on the key type.

    +
    +
    type 'a hash_table
    +
    +

    The type of imperative hash tables indexed by Key.hash_key values

    +
    +
    val mkTable : (int * exn) -> 'a hash_table
    +
    +

    mkTable (n, ex) creates a new hash table; the table will be initially +sized to hold at least n items. The exception ex is raised by the +lookup and remove functions +when the search key is not in the domain.

    +
    +
    val clear : 'a hash_table -> unit
    +
    +

    clear tbl removes all of the entries in the table.

    +
    +
    val insert : 'a hash_table -> (Key.hash_key * 'a) -> unit
    +
    +

    insert tbl (key, item) inserts a mapping from key to item into tbl. +Any existing mapping of key is discarded.

    +
    +
    val insertWith : ('a * 'a → 'a) -> 'a hash_table -> Key.hash_key * 'a -> unit
    +
    +

    insertWith comb (tbl, key, v) adds the mapping from key to value to tbl, +where value = comb(v', v), if tbl already contained a mapping from key +to v'; otherwise, value = v.

    +
    +
    val insertWithi : (Key.hash_key * 'a * 'a → 'a) -> 'a hash_table -> Key.hash_key * 'a -> unit
    +
    +

    insertWithi comb (tbl, key, v) adds the mapping from key to value to tbl, +where value = comb(key, v', v), if m already contained a mapping from key +to v'; otherwise, value = v.

    +
    +
    val inDomain : 'a hash_table -> Key.hash_key -> bool
    +
    +

    inDomain tbl key returns true if, and only if, key is in the +domain of the table

    +
    +
    +
    +
    +
    +
    val lookup : 'a hash_table -> Key.hash_key -> 'a
    +
    +

    lookup tbl key returns the item that key maps to if key is in +the domain of tbl. Otherwise, the table’s exception is raised.

    +
    +
    val find : 'a hash_table -> Key.hash_key -> 'a option
    +
    +

    find tbl key returns the SOME v if key is mapped to v in tbl. +Otherwise, it returns NONE.

    +
    +
    val findAndRemove : 'a hash_table → Key.hash_key → 'a option
    +
    +

    findAndRemove (tbl, key) returns SOME v and removes key from the +table if tbl maps key to v. If key is not in the domain of tbl, +then NONE is returned and tbl is unchanged.

    +
    +
    +
    +
    +
    +
    val remove : 'a hash_table -> Key.hash_key -> 'a
    +
    +

    remove tbl key returns the item that key maps to if key is in +the domain of tbl and removes it from the table. Otherwise, the +table’s exception is raised.

    +
    +
    val numItems : 'a hash_table -> int
    +
    +

    numItems tbl returns the number of entries in the table.

    +
    +
    val listItems : 'a hash_table -> 'a list
    +
    +

    listItems tbl returns a list of the items in the range of tbl.

    +
    +
    val listItemsi : 'a hash_table -> (Key.hash_key * 'a) list
    +
    +

    listItemsi tbl returns a list of the key-value entries in tbl.

    +
    +
    val app : ('a -> unit) -> 'a hash_table -> unit
    +
    +

    app f tbl applies the function f to each item in the range of tbl.

    +
    +
    val appi : ((Key.hash_key * 'a) -> unit) -> 'a hash_table -> unit
    +
    +

    appi f tbl applies the function f to each item in the +key-value entries in tbl.

    +
    +
    val map : ('a -> 'b) -> 'a hash_table -> 'b hash_table
    +
    +

    map f tbl creates a new table with an entry (key, f(lookup tbl key)) +in the new table for every key in tbl. The exception for the new +table is copied from tbl.

    +
    +
    val mapi : ((Key.hash_key * 'a) -> 'b) -> 'a hash_table -> 'b hash_table
    +
    +

    mapi f tbl creates a new table with an entry (key, f(key, lookup tbl key)) +in the new table for every key in tbl. The exception for the new +table is copied from tbl.

    +
    +
    val fold : (('a * 'b) -> 'b) -> 'b -> 'a hash_table -> 'b
    +
    +

    fold f init tbl folds the function f over the items in the range of tbl +using init as an initial value.

    +
    +
    val foldi : ((Key.hash_key * 'a * 'b) -> 'b) -> 'b -> 'a hash_table -> 'b
    +
    +

    foldi f init tbl folds the function f over the key-value entries in tbl +using init as an initial value.

    +
    +
    val modify : ('a -> 'a) -> 'a hash_table -> unit
    +
    +

    modify f tbl applies the function f for effect to the items in the +range of tbl, replacing the old items with the result of applying f.

    +
    +
    val modifyi : ((Key.hash_key * 'a) -> 'a) -> 'a hash_table -> unit
    +
    +

    modifyi f tbl applies the function f for effect to the key-value +entries in tbl, replacing the old items with the result of applying f.

    +
    +
    val filter : ('a -> bool) -> 'a hash_table -> unit
    +
    +

    filter pred tbl removes any entry (key, item) from tbl for which +pred item returns false.

    +
    +
    val filteri : ((Key.hash_key * 'a) -> bool) -> 'a hash_table -> unit
    +
    +

    filteri pred tbl removes any entry (key, item) from tbl for which +pred(key, item) returns false.

    +
    +
    val copy : 'a hash_table -> 'a hash_table
    +
    +

    copy tbl creates a copy of tbl. This expression is equivalent to

    +
    +
    +
    map (fn x => x) tbl
    +
    +
    +
    +
    val bucketSizes : 'a hash_table -> int list
    +
    +

    bucketSizes tbl returns a list of the current number of items per +bucket. This function allows users to gauge the quality of their +hashing function.

    +
    +
    +
    +
    +
    +
    +

    Instances

    +
    +
    +
    +
    structure AtomTable
    +
    +

    This structure implements hash tables keyed by the +Atom.atom type.

    +
    +
    +
    +
    +
    +
    structure IntHashTable
    +
    +

    This structure implements hash tables keyed by the default int type.

    +
    +
    +
    +
    +
    +
    structure WordHashTable
    +
    +

    This structure implements hash tables keyed by the default word type.

    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/sig-ORD_KEY.html b/doc/html/smlnj-lib/Util/sig-ORD_KEY.html new file mode 100644 index 0000000..1f0687c --- /dev/null +++ b/doc/html/smlnj-lib/Util/sig-ORD_KEY.html @@ -0,0 +1,209 @@ + + + + + + + + + + The ORD_KEY signature + + + + + + + + +
    +
    +
    +
    + +
    + +
    The ORD_KEY signature
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The ORD_KEY signature defines an interface to an abstract type +of keys with a comparison function. This signature is used as +the argument to various implementations of finite maps and sets.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature ORD_KEY
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    type ord_key
    +
    +val compare : ord_key * ord_key -> order
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    type ord_key
    +
    +

    The abstract key type.

    +
    +
    val compare : ord_key * ord_key -> order
    +
    +

    compare (k1, k2) returns the relation between the two keys. This +function should define a total order on the ord_key type.

    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/sig-ORD_MAP.html b/doc/html/smlnj-lib/Util/sig-ORD_MAP.html new file mode 100644 index 0000000..994c9e9 --- /dev/null +++ b/doc/html/smlnj-lib/Util/sig-ORD_MAP.html @@ -0,0 +1,670 @@ + + + + + + + + + + The ORD_MAP signature + + + + + + + + +
    +
    +
    +
    + +
    + +
    The ORD_MAP signature
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The ORD_MAP signature defines an interface to finite maps +over ordered keys. The SML/NJ Library provides a number of +different implementations of this interface. Functors are +provided for constructing maps for user-defined key types; +in addition, a number of instances for specific types +are also provided.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature ORD_MAP
    +
    +structure AtomMap : ORD_MAP where type Key.ord_key = Atom.atom
    +structure AtomBinaryMap : ORD_MAP where type Key.ord_key = Atom.atom
    +structure AtomRedBlackMap : ORD_MAP where type Key.ord_key = Atom.atom
    +structure IntBinaryMap : ORD_MAP where type Key.ord_key = int
    +structure IntListMap : ORD_MAP where type Key.ord_key = int
    +structure IntRedBlackMap : ORD_MAP where type Key.ord_key = int
    +structure WordRedBlackMap : ORD_MAP where type Key.ord_key = word
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    structure Key : ORD_KEY
    +
    +type 'a map
    +
    +val empty : 'a map
    +
    +val isEmpty : 'a map -> bool
    +
    +val singleton : (Key.ord_key * 'a) -> 'a map
    +
    +val insert  : 'a map * Key.ord_key * 'a -> 'a map
    +val insert' : ((Key.ord_key * 'a) * 'a map) -> 'a map
    +
    +val insertWith  : ('a * 'a -> 'a) -> 'a map * Key.ord_key * 'a -> 'a map
    +val insertWithi : (Key.ord_key * 'a * 'a -> 'a) -> 'a map * Key.ord_key * 'a -> 'a map
    +
    +val find : 'a map * Key.ord_key -> 'a option
    +
    +val lookup : 'a map * Key.ord_key -> 'a
    +
    +val inDomain : ('a map * Key.ord_key) -> bool
    +
    +val remove : 'a map * Key.ord_key -> 'a map * 'a
    +
    +val findAndRemove : 'a map * Key.ord_key -> ('a map * 'a) option
    +
    +val first : 'a map -> 'a option
    +val firsti : 'a map -> (Key.ord_key * 'a) option
    +
    +val numItems : 'a map ->  int
    +
    +val listItems  : 'a map -> 'a list
    +val listItemsi : 'a map -> (Key.ord_key * 'a) list
    +
    +val listKeys : 'a map -> Key.ord_key list
    +
    +val collate : ('a * 'a -> order) -> ('a map * 'a map) -> order
    +
    +val unionWith  : ('a * 'a -> 'a) -> ('a map * 'a map) -> 'a map
    +val unionWithi : (Key.ord_key * 'a * 'a -> 'a) -> ('a map * 'a map) -> 'a map
    +
    +val intersectWith  : ('a * 'b -> 'c) -> ('a map * 'b map) -> 'c map
    +val intersectWithi : (Key.ord_key * 'a * 'b -> 'c) -> ('a map * 'b map) -> 'c map
    +
    +val mergeWith : ('a option * 'b option -> 'c option)
    +      -> ('a map * 'b map) -> 'c map
    +val mergeWithi : (Key.ord_key * 'a option * 'b option -> 'c option)
    +      -> ('a map * 'b map) -> 'c map
    +
    +val app  : ('a -> unit) -> 'a map -> unit
    +val appi : ((Key.ord_key * 'a) -> unit) -> 'a map -> unit
    +
    +val map  : ('a -> 'b) -> 'a map -> 'b map
    +val mapi : (Key.ord_key * 'a -> 'b) -> 'a map -> 'b map
    +
    +val foldl  : ('a * 'b -> 'b) -> 'b -> 'a map -> 'b
    +val foldli : (Key.ord_key * 'a * 'b -> 'b) -> 'b -> 'a map -> 'b
    +val foldr  : ('a * 'b -> 'b) -> 'b -> 'a map -> 'b
    +val foldri : (Key.ord_key * 'a * 'b -> 'b) -> 'b -> 'a map -> 'b
    +
    +val filter  : ('a -> bool) -> 'a map -> 'a map
    +val filteri : (Key.ord_key * 'a -> bool) -> 'a map -> 'a map
    +
    +val mapPartial  : ('a -> 'b option) -> 'a map -> 'b map
    +val mapPartiali : (Key.ord_key * 'a -> 'b option) -> 'a map -> 'b map
    +
    +val exists : ('a -> bool) -> 'a map -> bool
    +val existsi : (Key.ord_key * 'a -> bool) -> 'a map -> bool
    +val all : ('a -> bool) -> 'a map -> bool
    +val alli : (Key.ord_key * 'a -> bool) -> 'a map -> bool
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    structure Key : ORD_KEY
    +
    +

    This substructure defines the type of keys used to index the maps and +the comparison function used to order them.

    +
    +
    type 'a map
    +
    +

    A finite map from Key.ord_key values to 'b values.

    +
    +
    val empty : 'a map
    +
    +

    The empty map.

    +
    +
    val isEmpty : 'a map -> bool
    +
    +

    isEmpty m returns true if, and only if, m is empty.

    +
    +
    val singleton : (Key.ord_key * 'a) -> 'a map
    +
    +

    singleton (key, v) creates the singleton map that maps key to v.

    +
    +
    val insert : 'a map * Key.ord_key * 'a -> 'a map
    +
    +

    insert (m, key, v) adds the mapping from key to v to m. +This mapping overrides any previous mapping from key.

    +
    +
    val insert' : ((Key.ord_key * 'a) * 'a map) -> 'a map
    +
    +

    insert' ((key, v), map) adds the mapping from key to v to m. +This mapping overrides any previous mapping from key.

    +
    +
    val insertWith : ('a * 'a -> 'a) -> 'a map * Key.ord_key * 'a -> 'a map
    +
    +

    insertWith comb (m, key, v) adds the mapping from key to value to m, +where value = comb(v', v), if m already contained a mapping from key +to v'; otherwise, value = v.

    +
    +
    val insertWithi : (Key.ord_key * 'a * 'a -> 'a) -> 'a map * Key.ord_key * 'a -> 'a map
    +
    +

    insertWithi comb (m, key, v) adds the mapping from key to value to m, +where value = comb(key, v', v), if m already contained a mapping from key +to v'; otherwise, value = v.

    +
    +
    val find : 'a map * Key.ord_key -> 'a option
    +
    +

    find (m, key) returns SOME v, if m maps key to v and NONE otherwise.

    +
    +
    val lookup : 'a map * Key.ord_key -> 'a
    +
    +

    lookup (m, key) returns v, if m maps key to v; otherwise it +raises the exception NotFound.

    +
    +
    val inDomain : ('a map * Key.ord_key) -> bool
    +
    +

    inDomain (m, key) returns true if key is in the domain of m.

    +
    +
    val remove : 'a map * Key.ord_key -> 'a map * 'a
    +
    +

    remove (m, key) returns the pair (m', v), if m maps key to v +and where m' is m with key removed from its domain. If key +is not in the domain of m, then it raises the exception +NotFound.

    +
    +
    val findAndRemove : 'a map * Key.ord_key -> ('a map * 'a) option
    +
    +

    findAndRemove (m, key) returns SOME(m', v), if m maps key to v +and where m' is m with key removed from its domain. If key +is not in the domain of m, then it returns NONE.

    +
    +
    val first : 'a map -> 'a option
    +
    +

    first m returns SOME item when item is the value associated with +the first (or smallest) key in the domain of the map m. It returns +NONE when the map is empty.

    +
    +
    val firsti : 'a map -> (Key.ord_key * 'a) option
    +
    +

    first m returns SOME(key, item) when key is the first (or smallest) +key in the domain of the map m and key maps to item. It returns +NONE when the map is empty.

    +
    +
    val numItems : 'a map -> int
    +
    +

    numItems m returns the size of m's domain.

    +
    +
    val listItems : 'a map -> 'a list
    +
    +

    listItems m returns a list of the values in the range of m. +Note that this list will contain duplicates when multiple keys in +m's domain map to the same value.

    +
    +
    val listItemsi : 'a map -> (Key.ord_key * 'a) list
    +
    +

    listItemsi m returns a list of the key-value pairs in m.

    +
    +
    val listKeys : 'a map -> Key.ord_key list
    +
    +

    listKeys m returns a list of the keys in the domain of m.

    +
    +
    val equiv : ('a * 'b -> order) -> ('a map * 'b map) -> bool
    +
    +

    equiv eqV (m1, m2) returns true if the two maps have the same domains +and if, for all x in the domain of the maps, eqV(lookup(m1, x), lookup(m2, x)) +evaluates to true.

    +
    +
    val collate : ('a * 'b -> order) -> ('a map * 'b map) -> order
    +
    +

    collate cmpV (m1, m2) returns the order of the two maps, where cmpV is +used to compare the values in the range of the maps.

    +
    +
    val extends : ('a * 'b -> order) -> ('a map * 'b map) -> order
    +
    +

    extends exV (m1, m2) returns true if the domain of m2 is a subset of the +domain of m1 and if, for all x in the domain of m2, +exV(lookup(m1, x), lookup(m2, x)) evaluates to true.

    +
    +
    val unionWith : ('a * 'a -> 'a) -> ('a map * 'a map) -> 'a map
    +
    +

    unionWith comb (m1, m2) returns the union of the two maps, using the function comb +to combine values when there is a collision of keys. More formally, this expression +returns the map

    +
    +
    +\[ \begin{array}{l} + \{ (k, \mathtt{m1}(k)) + \;|\;k \in \mathbf{dom}(\mathtt{m1}) \setminus \mathbf{dom}(\mathtt{m2}) \} + \cup \\ + \{ (k, \mathtt{m2}(k)) + \;|\;k \in \mathbf{dom}(\mathtt{m2}) \setminus \mathbf{dom}(\mathtt{m1}) \} + \cup \\ + \{ (k, \mathtt{comb}(\mathtt{m1}(k), \mathtt{m2}(k)) + \;|\;k \in \mathbf{dom}(\mathtt{m1}) \cap \mathbf{dom}(\mathtt{m2}) \} + \end{array}\] +
    +
    +
    +

    For example, we could implement a multiset of keys by mapping keys to their +multiplicity. Then, the union of two multisets could be defined by

    +
    +
    +
    +
    fun union (ms1, ms2) = unionWith Int.+ (ms1, ms2)
    +
    +
    +
    +
    val unionWithi : (Key.ord_key * 'a * 'a -> 'a) -> ('a map * 'a map) -> 'a map
    +
    +

    unionWithi comb (m1, m2) returns the union of the two maps, using the function comb +to combine values when there is a collision of keys. More formally, this expression +returns the map

    +
    +
    +\[ \begin{array}{l} + \{ (k, \mathtt{m1}(k)) + \;|\;k \in \mathbf{dom}(\mathtt{m1}) \setminus \mathbf{dom}(\mathtt{m2}) \} + \cup \\ + \{ (k, \mathtt{m2}(k)) + \;|\;k \in \mathbf{dom}(\mathtt{m2}) \setminus \mathbf{dom}(\mathtt{m1}) \} + \cup \\ + \{ (k, \mathtt{comb}(k, \mathtt{m1}(k), \mathtt{m2}(k)) + \;|\;k \in \mathbf{dom}(\mathtt{m1}) \cap \mathbf{dom}(\mathtt{m2}) \} + \end{array}\] +
    +
    +
    +
    val intersectWith : ('a * 'b -> 'c) -> ('a map * 'b map) -> 'c map
    +
    +

    intersectWith comb (m1, m2) returns the intersection of the two maps, +where the values in the range are a computed by applying the function +comb to the values from the two maps. More formally, this expression +returns the map

    +
    +
    +\[ \{ (k, \mathtt{comb}(\mathtt{m1}(k), \mathtt{m2}(k)) + \;|\;k \in \mathbf{dom}(\mathtt{m1}) \cap \mathbf{dom}(\mathtt{m2}) \}\] +
    +
    +
    +
    val intersectWithi : (Key.ord_key * 'a * 'b -> 'c) -> ('a map * 'b map) -> 'c map
    +
    +

    intersectWithi comb (m1, m2) returns the intersection of the two maps, +where the values in the range are a computed by applying the function +comb to the kay and the values from the two maps. More formally, this +expression returns the map

    +
    +
    +\[ \{ (k, \mathtt{comb}(k, \mathtt{m1}(k), \mathtt{m2}(k)) + \;|\;k \in \mathbf{dom}(\mathtt{m1}) \cap \mathbf{dom}(\mathtt{m2}) \}\] +
    +
    +
    +
    val mergeWith : ('a option * 'b option -> 'c option) -> ('a map * 'b map) -> 'c map
    +
    +

    mergeWith comb (m1, m2) merges the two maps using the function comb +as a decision procedure for adding elements to the new map. For each key +\(\mathtt{key} \in \mathbf{dom}(\mathtt{m1}) \cup \mathbf{dom}(\mathtt{m2})\), +we evaluate comb(optV1, optV2), where optV1 is SOME v if +\((\mathtt{key}, \mathtt{v}) \in \mathtt{m1}\) and is NONE if +latexmath:[\mathtt{key} \not\in \mathbf{dom}(\mathtt{m1}); likewise for optV2. +If comb(optV1, optV2) returns SOME v', then we add (key, v') +to the result.

    +
    +

    The mergeWith function is a generalization of the unionWith and +intersectionWith functions.

    +
    +
    +
    val mergeWithi : (Key.ord_key * 'a option * 'b option -> 'c option) -> ('a map * 'b map) -> 'c map
    +
    +

    mergeWithi comb (m1, m2) merges the two maps using the function comb +as a decision procedure for adding elements to the new map. The difference +between this function and mergeWith is that the comb function takes the +key value in addition to the optional values from the range.

    +
    +
    val app : ('a -> unit) -> 'a map -> unit
    +
    +

    app f m applies the function f to the values in the range of m.

    +
    +
    val appi : ((Key.ord_key * 'a) -> unit) -> 'a map -> unit
    +
    +

    appi f map applies the function f to the key-value pairs that +define m.

    +
    +
    val map : ('a -> 'b) -> 'a map -> 'b map
    +
    +

    map f m creates a new finite map m' by applying the function f to the +values in the range of m. Thus, if +\((\mathtt{key}, \mathtt{v}) \in \mathtt{m}\), then +(key, f v) will be in m'.

    +
    +
    val mapi : (Key.ord_key * 'a -> 'b) -> 'a map -> 'b map
    +
    +

    mapi f m creates a new finite map m' by applying the function f to the +key-value pairs of m. Thus, if +\((\mathtt{key}, \mathtt{v}) \in \mathtt{m}\), then +(key, f(key, v)) will be in m'.

    +
    +
    val foldl : ('a * 'b -> 'b) -> 'b -> 'a map -> 'b
    +
    +

    foldl fl init m folds the function f over the range of +m using init as the initial value. Items are processed in +increasing order of their key values.

    +
    +
    val foldli : (Key.ord_key * 'a * 'b -> 'b) -> 'b -> 'a map -> 'b
    +
    +

    foldli f init m folds the function f over the key-value pairs in +m using init as the initial value. Items are processed in +increasing order of their key values.

    +
    +
    val foldr : ('a * 'b -> 'b) -> 'b -> 'a map -> 'b
    +
    +

    foldr fl init m folds the function f over the range of +m using init as the initial value. Items are processed in +decreasing order of their key values.

    +
    +
    val foldri : (Key.ord_key * 'a * 'b -> 'b) -> 'b -> 'a map -> 'b
    +
    +

    foldri f init m folds the function f over the key-value pairs in +m using init as the initial value. Items are processed in +decreasing order of their key values.

    +
    +
    val filter : ('a -> bool) -> 'a map -> 'a map
    +
    +

    filter pred m filters out those items (key, v) from m, such that +pred v returns false. More formally, this expression returns the map +\(\{ (\mathtt{key}, \mathtt{v})\;|\;\mathtt{key} \in \mathbf{dom}(\mathtt{m}) +\wedge \mathtt{pred}(\mathtt{v}) \}\).

    +
    +
    val filteri : (Key.ord_key * 'a -> bool) -> 'a map -> 'a map
    +
    +

    filteri pred m filters out those items (key, v) from m, such that +pred(key, v) returns false. More formally, this expression returns the map +\(\{ (\mathtt{key}, \mathtt{v})\;|\;\mathtt{key} \in \mathbf{dom}(\mathtt{m}) +\wedge \mathtt{pred}(\mathtt{key}, \mathtt{v}) \}\).

    +
    +
    val mapPartial : ('a -> 'b option) -> 'a map -> 'b map
    +
    +

    mapPartial f m maps the partial function f over the items of m. +More formally, this expression returns the map

    +
    +
    +
    +
    +
    +\[ \{ (k, v') \;|\; (k, v) \in \mathtt{m} \wedge \mathtt{f}(v) = \mathtt{SOME}(v') \}\] +
    +
    +
    +
    +
    val mapPartiali : (Key.ord_key * 'a -> 'b option) -> 'a map -> 'b map
    +
    +

    mapPartiali f m maps the partial function f over the items of m. +More formally, this expression returns the map

    +
    +
    +
    +
    +
    +\[ \{ (k, v') \;|\; (k, v) \in \mathtt{m} \wedge \mathtt{f}(k, v) = \mathtt{SOME}(v') \}\] +
    +
    +
    +
    +
    val exists : ('a -> bool) -> 'a map -> bool
    +
    +

    exists pred m returns true if, and only if, there exists an item +\((\mathtt{key}, \mathtt{v}) \in \mathtt{m}\), +such that pred v returns true.

    +
    +
    val existsi : (Key.ord_key * 'a -> bool) -> 'a map -> bool
    +
    +

    exists pred m returns true if, and only if, there exists an item +\((\mathtt{key}, \mathtt{v}) \in \mathtt{m}\), such that +pred(key, v) returns true.

    +
    +
    val all : ('a -> bool) -> 'a map -> bool
    +
    +

    all pred m returns true if, and only if, pred v returns true +for all items \((\mathtt{key}, \mathtt{v}) \in \mathtt{m}\).

    +
    +
    val alli : (Key.ord_key * 'a -> bool) -> 'a map -> bool
    +
    +

    all pred m returns true if, and only if, pred(key, v) returns true +for all items \((\mathtt{key}, \mathtt{v}) \in \mathtt{m}\).

    +
    +
    +
    +
    +
    +
    +

    Instances

    +
    +
    +
    +
    structure AtomMap
    +
    +

    This structure is an alias for AtomRedBlackMap.

    +
    +
    +
    +
    +
    +
    structure AtomBinaryMap
    +
    +

    Maps over atoms implemented using balanced binary trees. +Note that it is recommended that one use the AtomMap structure +as it provides better performance.

    +
    +
    +
    +
    +
    +
    structure AtomRedBlackMap
    +
    +

    Maps over atoms implemented using red-black trees.

    +
    +
    +
    +
    +
    +
    structure IntBinaryMap
    +
    +

    Maps over ints implemented using balanced binary trees. +Note that it is recommended that one use the IntRedBlackMap structure +as it provides better performance.

    +
    +
    +
    +
    +
    +
    structure IntListMap
    +
    +

    Maps over words implemented using sorted lists. This implementation +is fast for small sets, but does not scale well to large sizes.

    +
    +
    +
    +
    +
    +
    structure IntRedBlackMap
    +
    +

    Maps over ints implemented using red-black binary trees.

    +
    +
    +
    +
    +
    +
    structure WordRedBlackMap
    +
    +

    Maps over words implemented using red-black binary trees.

    +
    +
    +
    +
    +
    + +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/sig-ORD_SET.html b/doc/html/smlnj-lib/Util/sig-ORD_SET.html new file mode 100644 index 0000000..b9bbe02 --- /dev/null +++ b/doc/html/smlnj-lib/Util/sig-ORD_SET.html @@ -0,0 +1,569 @@ + + + + + + + + + + The ORD_SET signature + + + + + + + + +
    +
    +
    +
    + +
    + +
    The ORD_SET signature
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The ORD_SET signature defines an interface to finite sets of +ordered elements. The SML/NJ Library provides a number of +different implementations of this interface. Functors are +provided for constructing sets for user-defined item types; +in addition, a number of instances for specific types +are also provided.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature ORD_SET
    +
    +structure AtomSet : ORD_SET where type Key.ord_key = Atom.atom
    +structure AtomBinarySet : ORD_SET where type Key.ord_key = Atom.atom
    +structure AtomRedBlackSet : ORD_SET where type Key.ord_key = Atom.atom
    +structure IntBinarySet : ORD_SET where type Key.ord_key = int
    +structure IntListSet : ORD_SET where type Key.ord_key = int
    +structure IntRedBlackSet : ORD_SET where type Key.ord_key = int
    +structure WordRedBlackSet : ORD_SET where type Key.ord_key = word
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    structure Key : ORD_KEY
    +
    +type item = Key.ord_key
    +type set
    +
    +val empty : set
    +
    +val singleton : item -> set
    +
    +val fromList : item list -> set
    +
    +val toList : set -> item list
    +
    +val add  : set * item -> set
    +val add' : (item * set) -> set
    +
    +val addList : set * item list -> set
    +
    +val subtract  : set * item -> set
    +val subtract' : (item * set) -> set
    +
    +val subtractList : set * item list -> set
    +
    +val delete : set * item -> set
    +
    +val member : set * item -> bool
    +
    +val isEmpty : set -> bool
    +
    +val minItem : set -> item
    +val maxItem : set -> item
    +
    +val equal : (set * set) -> bool
    +
    +val compare : (set * set) -> order
    +
    +val isSubset : (set * set) -> bool
    +
    +val disjoint : set * set -> bool
    +
    +val numItems : set ->  int
    +
    +val listItems : set -> item list
    +
    +val union : set * set -> set
    +val intersection : set * set -> set
    +val difference : set * set -> set
    +
    +val map : (item -> item) -> set -> set
    +val mapPartial : (item -> item option) -> set -> set
    +val app : (item -> unit) -> set -> unit
    +val foldl : (item * 'b -> 'b) -> 'b -> set -> 'b
    +val foldr : (item * 'b -> 'b) -> 'b -> set -> 'b
    +
    +val partition : (item -> bool) -> set -> (set * set)
    +val filter : (item -> bool) -> set -> set
    +
    +val exists : (item -> bool) -> set -> bool
    +val all : (item -> bool) -> set -> bool
    +
    +val find : (item -> bool) -> set -> item option
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    `structure Key : ORD_KEY
    +
    +

    This substructure defines the type of elements in the set and +the comparison function used to order them.

    +
    +
    type item = Key.ord_key
    +
    +

    The type of elements in the set.

    +
    +
    type set
    +
    +

    A finite set of item values.

    +
    +
    val empty : set
    +
    +

    The empty set.

    +
    +
    val singleton : item -> set
    +
    +

    singleton item returns a singleton set containing item.

    +
    +
    val fromList : item list -> set
    +
    +

    fromList items returns the set containing the list of items.

    +
    +
    +
    +
    +
    +
    val toList : set -> item list
    +
    +

    toList set returns a list of the items in set. The items will be +sorted in increasing order.

    +
    +
    val add : set * item -> set
    +
    +

    add (set, item) adds the item to the set.

    +
    +
    val add' : (item * set) -> set
    +
    +

    add' (item, set) adds the item to the set.

    +
    +
    val addList : set * item list -> set
    +
    +

    addList (set, items) adds the list of items to the set.

    +
    +
    val subtract : set * item -> set
    +
    +

    subtract (set, item) removes the object item from set. +Acts as the identity if item is not in the set.

    +
    +
    val subtract' : (item * set) -> set
    +
    +

    subtract' (item, set) removes the object item from set. +Acts as the identity if item is not in the set.

    +
    +
    val subtractList : set * item list -> set
    +
    +

    subtractList (set, items) removes the items from the set.

    +
    +
    val delete : set * item -> set
    +
    +

    delete (set, item) removes the object item from set. +Unlike subtract, the delete function raises the +NotFound +exception if item is not in the set.

    +
    +
    val member : set * item -> bool
    +
    +

    member (item, set) returns true if, and only if, item +is an element of set.

    +
    +
    val isEmpty : set -> bool
    +
    +

    isEmpty set returns true if, and only if, set is empty.

    +
    +
    val minItem : set -> item
    +
    +

    minItem set returns the smallest element of the set. This function +raises the Empty +exception if the set is empty.

    +
    +
    val maxItem : set -> item
    +
    +

    minItem set returns the largest element of the set. This function +raises the Empty +exception if the set is empty.

    +
    +
    val equal : (set * set) -> bool
    +
    +

    equal (set1, set2) returns true if, and only if, the two +sets are equal (i.e., they contain the same elements).

    +
    +
    val compare : (set * set) -> order
    +
    +

    compare (set1, set2) returns the lexical order of +the two sets.

    +
    +
    val isSubset : (set * set) -> bool
    +
    +

    isSubset (set1, set2) returns true if, and only if, set1 +is a subset of set2 (i.e., any element of set1 is an +element of set2).

    +
    +
    val disjoint : set * set -> bool
    +
    +

    equal (set1, set2) returns true if, and only if, the two +sets are disjoint (i.e., their intersection is empty).

    +
    +
    val numItems : set -> int
    +
    +

    numItems set returns the number of items in the set.

    +
    +
    val union : set * set -> set
    +
    +

    union (set1, set2) returns the union of the two sets.

    +
    +
    val intersection : set * set -> set
    +
    +

    intersection (set1, set2) returns the intersection of the two sets.

    +
    +
    val difference : set * set -> set
    +
    +

    difference (set1, set2) returns the difference of the two sets; +i.e., the set of items that are in set1, but not in +set2.

    +
    +
    val map : (item -> item) -> set -> set
    +
    +

    map f set constructs a new set from the result of applying the +function f to the elements of set. This expression is +equivalent to

    +
    +
    +
    fromList (List.map f (toList set))
    +
    +
    +
    +
    val mapPartial : (item -> item option) -> set -> set`
    +
    +

    mapPartial f set constructs a new set from the result of applying the +function f to the elements of set. This expression is +equivalent to

    +
    +
    +
    fromList (List.mapPartial f (toList set))
    +
    +
    +
    +
    val app : (item -> unit) -> set -> unit
    +
    +

    app f set applies the function f to the items in set. +This expression is equivalent to

    +
    +
    +
    List.app f (toList set)
    +
    +
    +
    +
    val foldl : (item * 'b -> 'b) -> 'b -> set -> 'b
    +
    +

    foldl f init set folds the function f over the items in +set in increasing order using init as the initial value. +This expression is equivalent to

    +
    +
    +
    List.foldl f init (toList set)
    +
    +
    +
    +
    val foldr : (item * 'b -> 'b) -> 'b -> set -> 'b
    +
    +

    foldr f init set folds the function f over the items in +set in decreasing order using init as the initial value. +This expression is equivalent to

    +
    +
    +
    List.foldr f init (toList set)
    +
    +
    +
    +
    val partition : (item -> bool) -> set -> (set * set)
    +
    +

    partition pred set returns a pair of disjoint sets (tSet, fSet), where + the predicate pred returns true for every element of tSet, +false for every element of fSet, and set is the union of tSet + and fSet.

    +
    +
    val filter : (item -> bool) -> set -> set
    +
    +

    filter pred set filters out any elements of set for which the +predicate pred returns false. +This expression is equivalent to

    +
    +
    +
    #1 (partition pred set)
    +
    +
    +
    +
    val exists : (item -> bool) -> set -> bool
    +
    +

    all pred set returns true if, and only if, pred item returns +true for all elements item in set. Elements are checked in increasing +order.

    +
    +
    val all : (item -> bool) -> set -> bool
    +
    +

    exists pred set returns true if, and only if, there exists an +element item in set such that pred item returns true. +Elements are checked in increasing order.

    +
    +
    val find : (item -> bool) -> set -> item option
    +
    +

    find pred set returns SOME item if there exists an object item +in the set for which pred item returns true; otherwise NONE is +returned. Items are tested in increasing order.

    +
    +
    +
    +
    +

    Deprecated functions

    +
    +

    The following functions are part of the interface, but have been +deprecated.

    +
    +
    +
    +
    val listItems : set -> item list`
    +
    +

    Use toList instead.

    +
    +
    +
    +
    +
    +
    +
    +

    Instances

    +
    +
    +
    +
    structure AtomSet
    +
    +

    This structure is an alias for AtomRedBlackSet.

    +
    +
    +
    +
    +
    +
    structure AtomBinarySet
    +
    +

    Sets of atoms implemented using balanced binary trees. +Note that it is recommended that one use the AtomSet structure +as it provides better performance.

    +
    +
    +
    +
    +
    +
    structure AtomRedBlackSet
    +
    +

    Sets of atoms implemented using red-black trees.

    +
    +
    +
    +
    +
    +
    structure IntBinarySet
    +
    +

    Sets of ints implemented using balanced binary trees. +Note that it is recommended that one use the IntRedBlackSet structure +as it provides better performance.

    +
    +
    +
    +
    +
    +
    structure IntListSet
    +
    +

    Sets of words implemented using sorted lists. This implementation +is fast for small sets, but does not scale well to large sizes.

    +
    +
    +
    +
    +
    +
    structure IntRedBlackSet
    +
    +

    Sets of ints implemented using red-black binary trees.

    +
    +
    +
    +
    +
    +
    structure WordRedBlackSet
    +
    +

    Sets of words implemented using red-black binary trees.

    +
    +
    +
    +
    +
    + +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/smlnj-lib.html b/doc/html/smlnj-lib/Util/smlnj-lib.html new file mode 100644 index 0000000..f061404 --- /dev/null +++ b/doc/html/smlnj-lib/Util/smlnj-lib.html @@ -0,0 +1,477 @@ + + + + + + + + + + The Util Library + + + + + + + + +
    +
    +
    +
    + +
    + +
    The Util Library
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    Overview

    +
    +
    +

    The Util Library provides a large collection of utility data structures +and algorithms. It is the core library in the SML/NJ Library suite +and dates back to the early 1990’s, where it was originally developed +by Emden Gansner and John Reppy as part of the eXene X11 toolkit. +Modules from this library are autoloaded into the SML/NJ interactive +environment by default.

    +
    +
    +
    +
    +

    Contents

    +
    +
    +
    +
    structure ANSITerm
    +
    +

    provides support for displaying stylized text using the +ANSI escape codes.

    +
    +
    structure ArrayQSort
    +
    +

    Provides in situ sorting of polymorphic arrays +using the quicksort algorithm.

    +
    +
    functor ArrayQSortFn
    +
    +

    Provides in situ sorting of monomorphic arrays +using the quicksort algorithm.

    +
    +
    structure Atom
    +
    +

    provides hashed strings that have fast equality testing.

    +
    +
    structure Base64
    +
    +

    Provides support for Base-64 encoding/decoding as specified +by RFC 4648.

    +
    +
    functor BinaryMapFn
    +
    +

    Provides a balanced-binary-tree implementation of the +ORD_MAP signature parameterized over the key type.

    +
    +
    functor BinarySetFn
    +
    +

    Provides a balanced-binary-tree implementation of the +ORD_SET signature parameterized over the element type.

    +
    +
    structure BitArray
    +
    +

    Provides mutable arrays of booleans represented by one bit per element.

    +
    +
    functor BSearchFn
    +
    +

    Provides binary search on sorted monomorphic arrays.

    +
    +
    structure CharMap
    +
    +

    Provides fast, read-only, maps from 8-bit characters to values.

    +
    +
    +
    +
    +
    +
    structure DynamicArray
    +
    +

    Provides dynamically sized polymorphic arrays.

    +
    +
    functor DynamicArrayFn
    +
    +

    Provides dynamically sized monomorphic arrays.

    +
    +
    structure EditDistance
    +
    +

    Provides a function for computing Levenshtein between distance between strings.

    +
    +
    structure Fifo
    +
    +

    Provides a functional queue data structure.

    +
    +
    structure FNVHash
    +
    +

    Provides an implementation of the +Fowler-Noll-Vo +hashing algorithm.

    +
    +
    structure Format
    +
    +

    Provides printf-style string formatting.

    +
    +
    structure FormatComb
    +
    +

    To be written

    +
    +
    structure GetOpt
    +
    +

    Provides command-line argument processing.

    +
    +
    functor GraphSCCFn
    +
    +

    Provides an algorithm for computing the strongly-connected +components of a directed graph.

    +
    +
    functor Hash2TableFn
    +
    +

    Provides hash tables that are keyed by two different key types.

    +
    +
    +
    +
    +
    +
    signature HASH_KEY
    +
    +

    Defines an interface to a monomorphic type with an equality +test and hashing function.

    +
    +
    functor HashSetFn
    +
    +

    Provides a hash-table-based implementation of imperative sets +parameterized over a Key structure.

    +
    +
    structure HashString
    +
    +

    Provides hashing functions for strings and substrings.

    +
    +
    structure HashTable
    +
    +

    Provides an implementation of polymorphic hash tables.

    +
    +
    functor HashTableFn
    +
    +

    Provides a functor that implements the +MONO_HASH_TABLE signature.

    +
    +
    signature INTERVAL_DOMAIN
    +
    +

    Defines a representation of an abstract ordered domain that is +used as an argument to the IntervalSetFn +functor.

    +
    +
    functor IntervalSetFn
    +
    +

    Provides sets over a discrete ordered domain, where the +sets are represented by intervals.

    +
    +
    structure IOUtil
    +
    +

    Provides support for redirecting the standard input and output streams.

    +
    +
    functor KeywordFn
    +
    +

    Provides a simple way to support a table of keyword +(or reserved) identifiers in a scanner.

    +
    +
    functor LeftPriorityQFn
    +
    +

    Provides a functional implementation of priority +queues using leaftist heaps.

    +
    +
    +
    +
    +
    +
    structure LibBase
    +
    +

    Provides some common definitions that are +shared across the SML/NJ Lbrary.

    +
    +
    structure ListFormat
    +
    +

    Provides some utility functions for converting +lists into strings (and back).

    +
    +
    functor ListMapFn
    +
    +

    Provides a sorted-list implementation of the +ORD_MAP signature parameterized over the key type.

    +
    +
    structure ListMergeSort
    +
    +

    Provides a merge-sort algorithm for lists.

    +
    +
    functor ListSetFn
    +
    +

    Provides a sorted-list implementation of the +ORD_SET signature parameterized over the element type.

    +
    +
    structure ListXProd
    +
    +

    Provides list combinators for computing +over the "Cartesian product" of two lists.

    +
    +
    functor MonoArrayFn
    +
    +

    Provides easy construction of new monomorphic array structures.

    +
    +
    signature MONO_HASH_TABLE
    +
    +

    Defines an interface to imperative hash tables with monomorphic keys.

    +
    +
    structure NativeInt
    +
    +

    An alias to the native-sized integer structure (e.g., Int64).

    +
    +
    structure NativeWord
    +
    +

    An alias to the native-sized word structure (e.g., Word64).

    +
    +
    +
    +
    +
    +
    signature ORD_KEY
    +
    +

    Defines an interface to an abstract type +of keys with a comparison function.

    +
    +
    signature ORD_MAP
    +
    +

    Defines an interface to finite maps over ordered keys.

    +
    +
    signature ORD_SET
    +
    +

    Defines an interface to finite sets of ordered elements.

    +
    +
    structure ParserComb
    +
    +

    Provides parser combinators over character readers.

    +
    +
    structure PathUtil
    +
    +

    Provides support for searching for files +in the file system using a list of possible locations.

    +
    +
    structure PropList
    +
    +

    Provides a extensible, but type safe, implementation +of property lists.

    +
    +
    structure Queue
    +
    +

    Provides an imperative queue data structure.

    +
    +
    structure Rand
    +
    +

    Provides a simple random number generator.

    +
    +
    structure Random
    +
    +

    Pseudo-random-number generation using the +Mersenne Twister +algorithm.

    +
    +
    structure RealOrderStats
    +
    +

    To be written

    +
    +
    +
    +
    +
    +
    functor RedBlackMapFn
    +
    +

    Provides a red-black-tree implementation of the +ORD_MAP signature parameterized over the key type.

    +
    +
    functor RedBlackSetFn
    +
    +

    Provides a red-black-tree implementation of the +ORD_SET signature parameterized over the element type.

    +
    +
    structure Scan
    +
    +

    To be written

    +
    +
    functor SplayMapFn
    +
    +

    Provides a splay-tree implementation of the +ORD_MAP signature parameterized over the key type.

    +
    +
    functor SplaySetFn
    +
    +

    Provides a splay-tree implementation of the +ORD_SET signature parameterized over the element type.

    +
    +
    structure TimeLimit
    +
    +

    Provides a mechanism for limiting the execution +time of a computation.

    +
    +
    structure UnivariateStats
    +
    +

    To be written

    +
    +
    structure URef
    +
    +

    Provides mutable references with Union-Find semantics.

    +
    +
    structure UTF8
    +
    +

    Provides support for working with UTF-8 +encoded strings.

    +
    +
    +
    +
    +
    +
    +

    Usage

    +
    +
    +

    By default, the Util Library is autoloaded by CM, which means that it is +immediately available to interactive use in the SML/NJ REPL.

    +
    +
    +

    For SML/NJ, include $/smlnj-lib.cm in your +CM file.

    +
    +
    +

    For use in MLton, include +$(SML_LIB)/smlnj-lib/Util/smlnj-lib.mlb in your MLB file.

    +
    +
    +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/str-ANSITerm.html b/doc/html/smlnj-lib/Util/str-ANSITerm.html new file mode 100644 index 0000000..f9c3534 --- /dev/null +++ b/doc/html/smlnj-lib/Util/str-ANSITerm.html @@ -0,0 +1,308 @@ + + + + + + + + + + The ANSITerm structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The ANSITerm structure
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The ANSITerm structure provides support for displaying stylized +text using the ANSI escape codes.

    +
    +
    +

    Note that currently this module only supports the limited palette of +eight fixed colors.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    structure ANSITerm
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    datatype color
    +  = Black | Red | Green | Yellow | Blue | Magenta | Cyan | White | Default
    +
    +datatype style
    +  = FG of color
    +  | BG of color
    +  | BF
    +  | DIM
    +  | NORMAL
    +  | UL
    +  | UL_OFF
    +  | BLINK
    +  | BLINK_OFF
    +  | REV
    +  | REV_OFF
    +  | INVIS
    +  | INVIS_OFF
    +  | RESET
    +
    +val toString : style list -> string
    +
    +val setStyle : TextIO.outstream * style list -> unit
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    datatype color = …​
    +
    +

    This datatype can be used to specify the eight standard colors, plus the default +color.

    +
    +
    datatype style = …​
    +
    +

    This datatype specifies the different styles that can be used when +displaying text. The styles are

    +
    +
    +
    +
    +
    FG of color
    +
    +

    specifies the foreground color of the text.

    +
    +
    BG of color
    +
    +

    specifies the background color of the text.

    +
    +
    BF
    +
    +

    specifies bold or bright text (note that this does not cancel the effect +of DIM).

    +
    +
    DIM
    +
    +

    specifies dim text (note that this does not cancel the effect +of BF).

    +
    +
    NORMAL
    +
    +

    specifies normal color and intensity (cancels the effect of +BF and DIM).

    +
    +
    UL
    +
    +

    enables underlining of the text.

    +
    +
    UL_OFF
    +
    +

    cancels underlining.

    +
    +
    BLINK
    +
    +

    enables blinking text.

    +
    +
    BLINK_OFF
    +
    +

    cancels blinking mode.

    +
    +
    REV
    +
    +

    reverses the foreground and background colors.

    +
    +
    REV_OFF
    +
    +

    cancels reverse mode.

    +
    +
    INVIS
    +
    +

    makes the text invisible.

    +
    +
    INVIS_OFF
    +
    +

    cancels invisible mode.

    +
    +
    RESET
    +
    +

    resets the style to the default mode.

    +
    +
    +
    +
    +
    +
    +
    val toString : style list -> string
    +
    +

    toString styles returns a command string that will cause the terminal +to switch to the specified styles. Specifying the empty list is +equivalent to [RESET].

    +
    +
    val setStyle : TextIO.outstream * style list -> unit
    +
    +

    setStyle (outS, styles) sets the styles for the terminal connected +to the output stream outS. Specifying the empty list is +equivalent to [RESET].

    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/str-ArrayQSort.html b/doc/html/smlnj-lib/Util/str-ArrayQSort.html new file mode 100644 index 0000000..b20f3bf --- /dev/null +++ b/doc/html/smlnj-lib/Util/str-ArrayQSort.html @@ -0,0 +1,207 @@ + + + + + + + + + + The ArrayQSort structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The ArrayQSort structure
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The ArrayQSort structure provides in situ sorting of polymorphic arrays +using the quicksort algorithm.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature ARRAY_SORT
    +structure ArrayQSort : ARRAY_SORT
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    val sort   : ('a * 'a -> order) -> 'a array -> unit
    +val sorted : ('a * 'a -> order) -> 'a array -> bool
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    val sort : ('a * 'a -> order) -> 'a array -> unit
    +
    +

    sort cmp arr sorts the array arr into ascending order +according to the comparison function cmp.

    +
    +
    val sorted : ('a * 'a -> order) -> 'a array -> bool
    +
    +

    sorted cmp arr returns true if, and only if, the array arr= is +sorted in ascending order.

    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/str-Atom.html b/doc/html/smlnj-lib/Util/str-Atom.html new file mode 100644 index 0000000..2e08a53 --- /dev/null +++ b/doc/html/smlnj-lib/Util/str-Atom.html @@ -0,0 +1,271 @@ + + + + + + + + + + The Atom structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The Atom structure
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The Atom structure provides hashed strings that have fast +equality testing. The Util Library provides predefined +finite maps and sets of atoms, as well as hash tables keyed by atoms.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature ATOM
    +structure Atom : ATOM
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    type atom
    +
    +val atom : string -> atom
    +val atom' : substring -> atom
    +
    +val toString : atom -> string
    +
    +val same : (atom * atom) -> bool
    +val sameAtom : (atom * atom) -> bool
    +
    +val compare : (atom * atom) -> order
    +val lexCompare : (atom * atom) -> order
    +
    +val hash : atom -> word
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    type atom
    +
    +

    The abstract type of hashed strings that support fast equality testing.

    +
    +
    val atom : string -> atom
    +
    +

    atom s returns the unique hashed representation of the string s as an atom.

    +
    +
    val atom' : substring -> atom
    +
    +

    atom ss returns the unique hashed representation of the substring ss + as an atom.

    +
    +
    val toString : atom -> string
    +
    +

    toString atm returns the string representation of the atom atm.

    +
    +
    +
    +
    +
    +
    val same : (atom * atom) -> bool
    +
    +

    same (atm1, atm2) returns true if the two atoms are the same (i.e., their +string representations are equal).

    +
    +
    val compare : (atom * atom) -> order
    +
    +

    compare (atm1, atm2) returns the relation of the two atoms in some +unspecified total order. Use lexCompare to +compare atoms lexicographically.

    +
    +
    +
    +
    +
    +
    val lexCompare : (atom * atom) -> order
    +
    +

    compare (atm1, atm2) returns the relation of the two atoms in lexical order.

    +
    +
    val hash : atom -> word
    +
    +

    hash atm returns a hash key for the atom.

    +
    +
    +
    +
    +

    Deprecated functions

    +
    +

    The following functions are part of the interface, but have been +deprecated.

    +
    +
    +
    +
    val sameAtom : (atom * atom) -> bool
    +
    +

    Use same instead.

    +
    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/str-Base64.html b/doc/html/smlnj-lib/Util/str-Base64.html new file mode 100644 index 0000000..c263c56 --- /dev/null +++ b/doc/html/smlnj-lib/Util/str-Base64.html @@ -0,0 +1,275 @@ + + + + + + + + + + The Base64 structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The Base64 structure
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The Base64 structure provides support for Base-64 encoding/decoding +as specified by RFC 4648.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature BASE64
    +structure Base64 : BASE64
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    val isBase64 : char -> bool
    +
    +val encode : Word8Vector.vector -> string
    +val encodeSlice : Word8VectorSlice.slice -> string
    +
    +exception Incomplete
    +
    +exception Invalid of (int * char)
    +
    +val decode : string -> Word8Vector.vector
    +val decodeSlice : substring -> Word8Vector.vector
    +
    +val decodeStrict : string -> Word8Vector.vector
    +val decodeSliceStrict : substring -> Word8Vector.vector
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    val isBase64 : char -> bool
    +
    +

    isBase64 c returns true if the character c is in the Base-64 +alphabet (i.e., a letter, digit, or a slash or plus character). +Note that the padding character (the equals sign) is not considered +to be in the alphabet.

    +
    +
    val encode : Word8Vector.vector -> string
    +
    +

    encode bv returns a string that is the Base-64 encoding of +the byte vector bv.

    +
    +
    val encodeSlice : Word8VectorSlice.slice -> string
    +
    +

    encode bvs returns a string that is the Base-64 encoding of +the byte-vector slice bvs.

    +
    +
    +
    +
    +
    +
    exception Incomplete
    +
    +

    This exception is raised by the decoding functions if a Base-64 string +does not end in a complete encoding quantum (i.e., four characters including +padding characters).

    +
    +
    +
    +
    +
    +
    exception Invalid of (int * char)
    +
    +

    This exception is raised by the decoding functions if an invalid Base-64 +character is encountered. The int is the position of the character and +the char is the invalid character.

    +
    +
    val decode : string -> Word8Vector.vector
    +
    +

    decode s returns the result of decoding the Base-64 string s. +This function ignores whitespace (e.g., line breaks), but +will raise the Incomplete exception if the +last quantum is incomplete.

    +
    +
    val decodeSlice : substring -> Word8Vector.vector
    +
    +

    decode ss returns the result of decoding the Base-64 substring ss. +This function ignores whitespace (e.g., line breaks), but +will raise the Incomplete exception if the +last quantum is incomplete.

    +
    +
    val decodeStrict : string -> Word8Vector.vector
    +
    +

    decodeStrict s returns the result of decoding the Base-64 string s. +The string s maust only contain valid Base-64 characters, otherwise +the Invalid exception is raised. This function +will also raise the Incomplete exception if the +last quantum is incomplete.

    +
    +
    val decodeSliceStrict : substring -> Word8Vector.vector
    +
    +

    decode ss returns the result of decoding the Base-64 substring ss. +The string s maust only contain valid Base-64 characters, otherwise +the Invalid exception is raised. This function +will also raise the Incomplete exception if the +last quantum is incomplete.

    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/str-BitArray.html b/doc/html/smlnj-lib/Util/str-BitArray.html new file mode 100644 index 0000000..062b653 --- /dev/null +++ b/doc/html/smlnj-lib/Util/str-BitArray.html @@ -0,0 +1,410 @@ + + + + + + + + + + The BitArray structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The BitArray structure
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The BitArray structure provides a implementation of +monomorphic arrays of booleans implemented one bit per +element. The BitArray structure extends the +MONO_ARRAY signature +with bit-level operations.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature BIT_ARRAY
    +structure BitArray :> BIT_ARRAY
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    include MONO_ARRAY
    +
    +val fromString : string -> array option
    +
    +val bits : (int * int list) -> array
    +
    +val getBits : array -> int list
    +
    +val toString : array -> string
    +
    +val isZero  : array -> bool
    +
    +val extend0 : (array * int) -> array
    +val extend1 : (array * int) -> array
    +
    +val eqBits : (array * array) -> bool
    +val equal : (array * array) -> bool
    +
    +val andb : (array * array * int) -> array
    +val orb  : (array * array * int) -> array
    +val xorb : (array * array * int) -> array
    +
    +val notb  : array -> array
    +
    +val <<  : (array * word) -> array
    +val >>  : (array * word) -> array
    +
    +val setBit : (array * int) -> unit
    +val clrBit : (array * int) -> unit
    +
    +val union : array -> array -> unit
    +val intersection : array -> array -> unit
    +
    +val complement : array -> unit
    +
    +val lshift  : (array * int) -> array
    +val rshift  : (array * int) -> array
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    include MONO_ARRAY
    +
    +

    The BIT_ARRAY signature extends the +MONO_ARRAY signature from +the Standard ML Basis Library. Note that while the +MONO_ARRAY signature includes a corresponding monomorphic +vector type, there is currently no implementation of a +corresponding BitVector structure.

    +
    +
    val fromString : string -> array option
    +
    +

    fromString s returns SOME ba when the string s is +a sequence of hexadecimal digits. The length of ba will +be 4*(length s). Returns NONE of a non-hexadecimal +character is encountered.

    +
    +
    val bits : (int * int list) -> array
    +
    +

    bits (n, ixs) returns a new array ba of length n, where +ba[ix] is true for each ix in the list of indices ixs. +This function raises the +Size +exception if n < 0 and the +Subscript +exception if any index is out of bounds.

    +
    +
    val getBits : array -> int list
    +
    +

    getBits ba returns a list of indices ix for which ba[ix] is true +in increasing order.

    +
    +
    val toString : array -> string
    +
    +

    toString ba returns a string representation of the array as a sequence +of hexadecimal digits in little-endian order (i.e., ba[0] is represented +as the high-order bit in the first digit).

    +
    +
    val isZero : array -> bool
    +
    +

    isZero ba returns true if, and only if, no elements are true in ba.

    +
    +
    val extend0 : (array * int) -> array
    +
    +

    extend0 (ba, n) returns a new array ba' that is max(n, length ba) +elements long, where the length ba elements of ba' are copied from +ba and the remaining elements are false. +This function raises the +Size +exception if n < 0.

    +
    +
    val extend1 : (array * int) -> array
    +
    +

    extend1 (ba, n) returns a new array ba' that is max(n, length ba) +elements long, where the length ba elements of ba' are copied from +ba and the remaining elements are true. +This function raises the +Size +exception if n < 0.

    +
    +
    val eqBits : (array * array) -> bool
    +
    +

    eqBits (ba1, ba2) returns true if the two arrays have +the same true entries. In other words, the following identity +holds

    +
    +
    +
    eqBits(ba1, ba2) = (getBits ba1 = getBits ba2)
    +
    +
    +
    +
    val equal : (array * array) -> bool
    +
    +

    equal (ba1, ba2) returns true if the two arrays are the same length +and have the same elements.

    +
    +
    val andb : (array * array * int) -> array
    +
    +

    andb (ba1, ba2, n) returns a new array ba of length n, +where the element ba[ix] is the logical AND of ba1[ix] +and ba2[ix], where the inputs are extended with false as +necessary. +This function raises the +Size +exception if n < 0.

    +
    +
    val orb : (array * array * int) -> array
    +
    +

    orb (ba1, ba2, n) returns a new array ba of length n, +where the element ba[ix] is the logical OR of ba1[ix] +and ba2[ix], where the inputs are extended with false as +necessary. +This function raises the +Size +exception if n < 0.

    +
    +
    val xorb : (array * array * int) -> array
    +
    +

    xorb (ba1, ba2, n) returns a new array ba of length n, +where the element ba[ix] is the logical XOR of ba1[ix] +and ba2[ix], where the inputs are extended with false as +necessary. +This function raises the +Size +exception if n < 0.

    +
    +
    val notb : array -> array
    +
    +

    notb ba returns a new array of the same length as ba with +the elements negated.

    +
    +
    +
    +
    +
    +
    val << : (array * word) -> array
    +
    +

    << (ba, n)`returns a new array by appending `n false +elements on the end of ba. The new array will have +length equal to n + length ba.

    +
    +
    +
    +
    +
    +
    val >> : (array * word) -> array
    +
    +

    >> (ba, n)`returns a new array by trimming `n elements +from the "right" of ba. The new array will have +max(0, length ba - n) elements.

    +
    +
    val setBit : (array * int) -> unit
    +
    +

    setBit (ba, ix) sets the element of ba at index ix to true. +This function raises the +Subscript +exception if ix is out of bounds.

    +
    +
    val clrBit : (array * int) -> unit
    +
    +

    setBit (ba, ix) sets the element of ba at index ix to false. +This function raises the +Subscript +exception if ix is out of bounds.

    +
    +
    val union : array -> array -> unit
    +
    +

    union ba1 ba2 updates ba1 by setting each element ba1[ix] to +the logical OR of ba1[ix] and ba2[ix], where ba2[ix] is +extended with false elements as necessary to match the length of ba1.

    +
    +
    val intersection : array -> array -> unit
    +
    +

    intersection ba1 ba2 updates ba1 by setting each element ba1[ix] to +the logical AND of ba1[ix] and ba2[ix], where ba2[ix] is +extended with false elements as necessary to match the length of ba1.

    +
    +
    val complement : array -> unit
    +
    +

    complement ba logically negates all of the elements of ba.

    +
    +
    +
    +
    +

    Deprecated Functions

    +
    +
    +
    val lshift : (array * int) -> array
    +
    +

    Use << instead.

    +
    +
    val rshift : (array * int) -> array
    +
    +

    Use >> instead.

    +
    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/str-CharMap.html b/doc/html/smlnj-lib/Util/str-CharMap.html new file mode 100644 index 0000000..ea15a4b --- /dev/null +++ b/doc/html/smlnj-lib/Util/str-CharMap.html @@ -0,0 +1,237 @@ + + + + + + + + + + The CharMap structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The CharMap structure
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The CharMap structure provides fast, read-only, maps from 8-bit characters +to values.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature CHAR_MAP
    +structure CharMap :> CHAR_MAP
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    type 'a char_map
    +
    +val mkCharMap : {default : 'a, bindings : (string * 'a) list} -> 'a char_map
    +
    +val mapChr : 'a char_map -> char -> 'a
    +val mapStrChr : 'a char_map -> (string * int) -> 'a
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    type 'a char_map
    +
    +

    The type of a mapping from 8-bit characters to 'a.

    +
    +
    val mkCharMap : {default : 'a, bindings : (string * 'a) list} -> 'a char_map
    +
    +

    mkCharMap {default, bindings} creates a new character map. For each +item (s, v) in the bindings list, the characters in s are mapped to +the value v. If a character is bound multiple times, then the rightmost +binding is used. Characters not covered by a binding are mapped to the +default value. For example, the following code creates a mapping that classifies +characters into lower and upper case letter, digits, and other characters:

    +
    +
    +
    datatype char_kind = LOWER | UPPER | DIGIT | OTHER
    +
    +val cmap = mkCharMap {
    +        default = OTHER,
    +        bindings = [
    +            ("abcdefghijklmnopqrstuvwxyz", LOWER),
    +            ("ABCDEFGHIJKLMNOPQRSTUVWXYZ", UPPER),
    +            ("0123456789", DIGIT)
    +          ]
    +      }
    +
    +
    +
    +
    val mapChr : 'a char_map -> char -> 'a
    +
    +

    mapChr cmap c applies the map to the character.

    +
    +
    val mapStrChr : 'a char_map -> (string * int) -> 'a
    +
    +

    mapStrChr cmap (s, i) applies the map to the i`th character in `s. +The Subscript +exception is raised if i is out of bounds.

    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/str-DynamicArray.html b/doc/html/smlnj-lib/Util/str-DynamicArray.html new file mode 100644 index 0000000..7a708b1 --- /dev/null +++ b/doc/html/smlnj-lib/Util/str-DynamicArray.html @@ -0,0 +1,416 @@ + + + + + + + + + + The DynamicArray structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The DynamicArray structure
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The DynamicArray structure provides dynamically sized polymorphic +arrays. Each array has an associated default value, which is +covers those elements that have not been explicitly initialized +(conceptually, one can view an array as having an infinite size). +Thus, reads from indices above the bound will return the default value. +The bound of an array is the highest index of an initialized +element (or ~1 if there are no initialized elements). The +defined range of the array are the elements in the positions +indexed from zero to the bound.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    structure DynamicArray
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    type 'a array
    +
    +val array : (int * 'a) -> 'a array
    +
    +val subArray : ('a array * int * int) -> 'a array
    +
    +val fromList : 'a list * 'a -> 'a array
    +val fromVector : 'a vector * 'a -> 'a array
    +
    +val toList : 'a array -> 'a list
    +val toVector : 'a array -> 'a vector
    +
    +val tabulate: (int * (int -> 'a) * 'a) -> 'a array
    +
    +val default : 'a array -> 'a
    +
    +val sub : ('a array * int) -> 'a
    +
    +val update : ('a array * int * 'a) -> unit
    +
    +val bound : 'a array -> int
    +
    +val truncate : ('a array * int) -> unit
    +
    +val appi : (int * 'a -> unit) -> 'a array -> unit
    +val app : ('a -> unit) -> 'a array -> unit
    +val modifyi : (int * 'a -> 'a) -> 'a array -> unit
    +val modify : ('a -> 'a) -> 'a array -> unit
    +val foldli : (int * 'a * 'b -> 'b) -> 'b -> 'a array -> 'b
    +val foldri : (int * 'a * 'b -> 'b) -> 'b -> 'a array -> 'b
    +val foldl : ('a * 'b -> 'b) -> 'b -> 'a array -> 'b
    +val foldr : ('a * 'b -> 'b) -> 'b -> 'a array -> 'b
    +val findi : (int * 'a -> bool) -> 'a array -> (int * 'a) option
    +val find : ('a -> bool) -> 'a array -> 'a option
    +val exists : ('a -> bool) -> 'a array -> bool
    +val all : ('a -> bool) -> 'a array -> bool
    +val collate : ('a * 'a -> order) -> 'a array * 'a array -> order
    +
    +val vector : 'a array -> 'a vector
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    type 'a array
    +
    +

    The polymorphic type of dynamic arrays. Each array has a default value +and a bound, where the bound is the largest index for which an explicit +value has been set. Reads from indices above the bound will return the +default value.

    +
    +
    val array : (int * 'a) -> 'a array
    +
    +

    array (sz, dflt) returns a new array with bound ~1 and default +value dflt. The sz argument, which must be non-negative, is used +as a hint of the potential range of indices. This function raises +the Size +exception if sz < 0.

    +
    +
    val subArray : ('a array * int * int) -> 'a array
    +
    +

    subArray (arr, lo, hi) returns a new array with the same default +as arr, and whose values in the range [0, hi-lo] are equal to +the values in arr in the range [lo, hi]. This function raises +the Size +exception if lo < 0 or hi < lo-1.

    +
    +
    val fromList : 'a list * 'a -> 'a array
    +
    +

    fromList (lst, dflt) returns a new array created from the elements +of lst and with default value dflt. The bound of the array will be +length lst - 1.

    +
    +
    val fromVector : 'a Vector.vector * 'a -> 'a array
    +
    +

    fromVector (vec, dflt) returns a new array created from the elements +of vec and with default value dflt. The bound of the array will be +length vec - 1.

    +
    +
    val toList : 'a array -> 'a list
    +
    +

    toList arr returns a list of the array’s contents. The resulting +list will have the array’s bound plus one elements.

    +
    +
    +
    +
    +
    +
    val toVector : 'a array -> 'a vector
    +
    +

    toVector arr returns a list of the array’s contents. The resulting +vector will have the array’s bound plus one elements.

    +
    +
    val tabulate: (int * (int -> 'a) * 'a) -> 'a array
    +
    +

    tabulate (sz, init, dflt) returns a new array with the first +sz elements initialized using the function init and the +default value dflt. This function raises the +Size +exception if sz < 0.

    +
    +
    val default : 'a array -> 'a
    +
    +

    default arr returns the default value for the array.

    +
    +
    val sub : ('a array * int) -> 'a
    +
    +

    sub (arr, ix) returns the value of the array at index ix. +If that value has not been explicitly set, then it returns the array’s +default value. This function raises the +Subscript +exception if ix < 0.

    +
    +
    val update : ('a array * int * 'a) -> unit
    +
    +

    update (arr, ix, v) sets the value at index ix of the array to v. +If ix is greater than the current bound of the array, then the bound +is set to ix. This function raises the +Subscript +exception if ix < 0.

    +
    +
    val bound : 'a array -> int
    +
    +

    bound arr returns the current bound of the array, which is the highest +index that has been explicitly set (e.g., by update).

    +
    +
    val truncate : ('a array * int) -> unit
    +
    +

    truncate (arr, sz) sets every entry with index greater or equal to +sz to the array’s default value.

    +
    +
    val appi : (int * 'a -> unit) -> 'a array -> unit
    +
    +

    appi f arr behaves like the +Array.appi +function on the defined range of arr.

    +
    +
    val app : ('a -> unit) -> 'a array -> unit
    +
    +

    app f arr behaves like the +Array.app +function on the defined range of arr.

    +
    +
    val modifyi : (int * 'a -> 'a) -> 'a array -> unit
    +
    +

    modifyi f arr behaves like the +Array.modifyi +function on the defined range of arr.

    +
    +
    val modify : ('a -> 'a) -> 'a array -> unit
    +
    +

    modify f arr behaves like the +Array.modify +function on the defined range of arr.

    +
    +
    val foldli : (int * 'a * 'b -> 'b) -> 'b -> 'a array -> 'b
    +
    +

    foldli f init arr behaves like the +Array.foldli +function on the defined range of arr.

    +
    +
    val foldri : (int * 'a * 'b -> 'b) -> 'b -> 'a array -> 'b
    +
    +

    foldri f init arr behaves like the +Array.foldri +function on the defined range of arr.

    +
    +
    val foldl : ('a * 'b -> 'b) -> 'b -> 'a array -> 'b
    +
    +

    foldl f init arr behaves like the +Array.foldl +function on the defined range of arr.

    +
    +
    val foldr : ('a * 'b -> 'b) -> 'b -> 'a array -> 'b
    +
    +

    foldr f init arr behaves like the +Array.foldr +function on the defined range of arr.

    +
    +
    val findi : (int * 'a -> bool) -> 'a array -> (int * 'a) option
    +
    +

    findi f arr behaves like the +Array.findi +function on the defined range of arr.

    +
    +
    val find : ('a -> bool) -> 'a array -> 'a option
    +
    +

    find f arr behaves like the +Array.find +function on the defined range of arr.

    +
    +
    val exists : ('a -> bool) -> 'a array -> bool
    +
    +

    exists f arr behaves like the +Array.exists +function on the defined range of arr.

    +
    +
    val all : ('a -> bool) -> 'a array -> bool
    +
    +

    all f arr behaves like the +Array.all +function on the defined range of arr.

    +
    +
    val collate : ('a * 'a -> order) -> 'a array * 'a array -> order
    +
    +

    collate cmp (arr1, arr2) return the lexicographic order of the defined +ranges of the two arrays using the given comparison cmp on elements.

    +
    +
    +
    +
    +

    Deprecated functions

    +
    +
    +
    val vector : 'a array -> 'a vector
    +
    +

    Use toVector instead.

    +
    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/str-EditDistance.html b/doc/html/smlnj-lib/Util/str-EditDistance.html new file mode 100644 index 0000000..46d9a51 --- /dev/null +++ b/doc/html/smlnj-lib/Util/str-EditDistance.html @@ -0,0 +1,203 @@ + + + + + + + + + + The EditDistance structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The EditDistance structure
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The EditDistance structure computes the "optimal string alignment" +(or Levenshtein) distance between two strings.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    structure EditDistance
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    val distance : string * string -> int
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    val distance : string * string -> int
    +
    +

    distance (s1, s2) returns the number of edit operations required to make the +two strings equal. Edit operations include deleting a character, inserting a +character, replacing a character with another, and swapping two adjacent +characters.

    +
    +
    +
    +
    +
    + +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/str-FNVHash.html b/doc/html/smlnj-lib/Util/str-FNVHash.html new file mode 100644 index 0000000..b89f853 --- /dev/null +++ b/doc/html/smlnj-lib/Util/str-FNVHash.html @@ -0,0 +1,237 @@ + + + + + + + + + + The FNVHash structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The FNVHash structure
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The FNVHash structure provides an implementation of the +Fowler-Noll-Vo +(FNV) hashing algorithm (specifically the 64-bit FNV-1a algorithm).

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    structure FNVHash
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    val offsetBasis : Word64.word
    +
    +val hashByte : Word8.word * Word64.word -> Word64.word
    +val hashChar : char * Word64.word -> Word64.word
    +
    +val hashString : string -> word
    +val hashSubstring : substring -> word
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    val offsetBasis : Word64.word
    +
    +

    The offsetBasis should be used as the initial value when +using hashByte and/or hashChar to incrementally hash a +data value.

    +
    +
    val hashByte : Word8.word * Word64.word -> Word64.word
    +
    +

    hashByte (b, h) computes one step of the FNV hashing algorithm +for byte b and initial hash value h.

    +
    +
    val hashChar : char * Word64.word -> Word64.word
    +
    +

    hashByte (c, h) computes one step of the FNV hashing algorithm +for character c and initial hash value h.

    +
    +
    val hashString : string -> word
    +
    +

    hashString s returns the hash of the given string. It is equivalent to +the expression

    +
    +
    +
    CharVector.foldl hashChar offsetBasis s
    +
    +
    +
    +
    val hashSubstring : substring -> word
    +
    +

    hashSubstring ss returns the hash of the given substring. It is equivalent to +the expression

    +
    +
    +
    Substring.foldl hashChar offsetBasis ss
    +
    +
    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/str-Fifo.html b/doc/html/smlnj-lib/Util/str-Fifo.html new file mode 100644 index 0000000..1aead53 --- /dev/null +++ b/doc/html/smlnj-lib/Util/str-Fifo.html @@ -0,0 +1,311 @@ + + + + + + + + + + The Fifo structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The Fifo structure
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The Fifo structure provides a functional queue data structure, +which are implemented as a pair of stacks (lists) representing the +front and rear of the queue. Single-threaded enqueuing and +dequeuing operations will have amortized constant time.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature FIFO
    +structure Fifo :> FIFO
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    type 'a fifo
    +
    +exception Dequeue
    +
    +val empty : 'a fifo
    +val isEmpty : 'a fifo -> bool
    +val enqueue : 'a fifo * 'a -> 'a fifo
    +val dequeue : 'a fifo -> 'a fifo * 'a
    +val next : 'a fifo -> ('a * 'a fifo) option
    +val delete : ('a fifo * ('a -> bool)) -> 'a fifo
    +val head : 'a fifo -> 'a
    +val peek : 'a fifo -> 'a option
    +val length : 'a fifo -> int
    +val contents : 'a fifo -> 'a list
    +val app : ('a -> unit) -> 'a fifo -> unit
    +val map : ('a -> 'b) -> 'a fifo -> 'b fifo
    +val foldl : ('a * 'b -> 'b) -> 'b -> 'a fifo -> 'b
    +val foldr : ('a * 'b -> 'b) -> 'b -> 'a fifo -> 'b
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    type 'a fifo
    +
    +

    The type constructor for functional queues.

    +
    +
    +
    +
    +
    +
    exception Dequeue
    +
    +

    This exceptions is raised when the dequeue function is +applied to an empty queue.

    +
    +
    val empty : 'a fifo
    +
    +

    The empty queue.

    +
    +
    val isEmpty : 'a fifo -> bool
    +
    +

    ifEmpty q returns true if the queue is empty.

    +
    +
    val enqueue : 'a fifo * 'a -> 'a fifo
    +
    +

    enqueue (q, x) returns a queue with x added to the end.

    +
    +
    val dequeue : 'a fifo -> 'a fifo * 'a
    +
    +

    dequeue q returns a pair (q', x), where x was the first +element in q and q' is the queue with x removed. +This function raises the Dequeue exception +if it is called on an empty queue.

    +
    +
    val next : 'a fifo -> ('a * 'a fifo) option
    +
    +

    next q returns SOME(q', x), where x was the first +element in q and q' is the queue with x removed, or +NONE if q is empty.

    +
    +
    val delete : ('a fifo * ('a -> bool)) -> 'a fifo
    +
    +

    delete (q, pred) removes those items from q for which the +function pred returns true and returns the resulting queue.

    +
    +
    val head : 'a fifo -> 'a
    +
    +

    head q returns the first element of q or raises the exception +Dequeue if q is empty.

    +
    +
    val peek : 'a fifo -> 'a option
    +
    +

    peek q returns SOME x, where x is the first element of q, +or NONE if q is empty.

    +
    +
    val length : 'a fifo -> int
    +
    +

    length q returns the number of elements in the queue.

    +
    +
    val contents : 'a fifo -> 'a list
    +
    +

    contents q returns the contents of q as a list.

    +
    +
    val app : ('a -> unit) -> 'a fifo -> unit
    +
    +

    app f q applies the function f to the elements of q. +This expression is equivalent to

    +
    +
    +
    List.app f (contents q)
    +
    +
    +
    +
    val map : ('a -> 'b) -> 'a fifo -> 'b fifo
    +
    +

    map f q returns the queue that results from mapping +the function f across the elements of the queue.

    +
    +
    val foldl : ('a * 'b -> 'b) -> 'b -> 'a fifo -> 'b
    +
    +

    foldl f init q folds the function f over the elements of q from +front to back. This expression is equivalent to

    +
    +
    +
    List.foldl f init (contents q)
    +
    +
    +
    +
    val foldr : ('a * 'b -> 'b) -> 'b -> 'a fifo -> 'b
    +
    +

    foldr f init q folds the function f over the elements of q from +back to front. This expression is equivalent to

    +
    +
    +
    List.foldr f init (contents q)
    +
    +
    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/str-Format.html b/doc/html/smlnj-lib/Util/str-Format.html new file mode 100644 index 0000000..4c02f38 --- /dev/null +++ b/doc/html/smlnj-lib/Util/str-Format.html @@ -0,0 +1,459 @@ + + + + + + + + + + The Format structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The Format structure
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The Format structure provides printf-style string formatting. +The syntax of format strings is a subset of that +supported by the C printf function.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    structure Format
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    datatype fmt_item
    +  = ATOM of Atom.atom
    +  | LINT of LargeInt.int
    +  | INT of Int.int
    +  | LWORD of LargeWord.word
    +  | WORD of Word.word
    +  | WORD8 of Word8.word
    +  | BOOL of bool
    +  | CHR of char
    +  | STR of string
    +  | REAL of Real.real
    +  | LREAL of LargeReal.real
    +  | LEFT of (int * fmt_item)
    +  | RIGHT of (int * fmt_item)
    +
    +exception BadFormat
    +exception BadFmtList
    +
    +val format  : string -> fmt_item list -> string
    +val formatf : string -> (string -> unit) -> fmt_item list -> unit
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    datatype fmt_item = …​
    +
    +

    The fmt_item datatype is a union of the types that the format function +supports. The constructors are interpreted as follows:

    +
    +
    +
    +
    +
    ATOM atm
    +
    +

    specifies an atom atm to convert (the conversion +specifier must be “s”).

    +
    +
    LINT n
    +
    +

    specifies a large integer value n to convert +(the conversion specifier must be one of “d”, “o”, “x”, or “X”).

    +
    +
    INT n
    +
    +

    specifies a default integer value n to convert +(the conversion specifier must be one of “d”, “o”, “x”, or “X”).

    +
    +
    LWORD w
    +
    +

    specifies a large word value w to convert +(the conversion specifier must be one of “d”, “o”, “x”, or “X”).

    +
    +
    WORD w
    +
    +

    specifies a default word value w to convert +(the conversion specifier must be one of “d”, “o”, “x”, or “X”).

    +
    +
    WORD8 w
    +
    +

    specifies an 8-bit word value w to convert +(the conversion specifier must be one of “d”, “o”, “x”, or “X”).

    +
    +
    BOOL b
    +
    +

    specifies a Boolean value b to convert +(the conversion specifier must be “b”).

    +
    +
    CHR c
    +
    +

    specifies a character value +(the conversion specifier must be “c”).

    +
    +
    STR s
    +
    +

    specifies a string value s to convert +(the conversion specifier must be “s”). The conversion is the +identity; e.g., STR "\n" will produce a newline in the result +string.

    +
    +
    REAL r
    +
    +

    specifies a default real value r to convert +(the conversion specifier must be one of “e”, “E”, “f”, +“F”, “g”, or “G”).

    +
    +
    LREAL r
    +
    +

    specifies a large real value r to convert +(the conversion specifier must be one of “e”, “E”, “f”, +“F”, “g”, or “G”).

    +
    +
    LEFT(wid, item)
    +
    +

    specifies a left-padded (right-justified) +conversion, where the result of formatting item +is padded on the left with spaces to the width specified +by wid. Note that the padding occurs after item +formatted, so it can be combined with width specifiers and +zero padding.

    +
    +
    RIGHT(wid, item)
    +
    +

    specifies a right-padded (left-justified) +conversion, where the result of formatting item +is padded on the right with spaces to the width specified +by wid. Note that the padding occurs after item +formatted, so it can be combined with width specifiers and +zero padding.

    +
    +
    +
    +
    +
    +
    +
    exception BadFormat
    +
    +

    This exception is raised when either format or formatf is applied +to an ill-formed format string.

    +
    +
    exception BadFmtList
    +
    +

    This exception is raised when there is a mismatch in either +number or type between the format string and the list of items.

    +
    +
    val format : string -> fmt_item list -> string
    +
    +

    format fmt returns a function for formating a list of format items as +a string by converting the list of items according to the format +string fmt. If the format string is ill formed, the BadFormat +exception will be raised. Likewise, if there is a mismatch between +the conversion specifiers in the format string and the list of items, +then the BadFmtList exception is raised.

    +
    +
    val formatf : string -> (string -> unit) -> fmt_item list -> unit
    +
    +

    format fmt consumer items is equivalent to the expression

    +
    +
    +
    consumer (format fmt items)
    +
    +
    +
    +
    +
    +
    +
    +
    +

    Format Strings

    +
    +
    +

    The format and `formatf functions take a format string and a list of +format items as arguments. The format string is composed of zero or more +directives, which are either ordinary characters (excluding %), which are +copied to the result, or conversion specifiers, which are used to convert +the corresponding format items to strings that are then added to the result.

    +
    +
    +

    Conversion specifiers begin with the percent (%) character followed by +the following in sequence:

    +
    +
    +
      +
    • +

      Zero or more of the following single-character flags. Note that these +only apply to the numeric conversion specifiers.

      +
      +
        +
      • +

        A " " (space), which means that a space character is used as the +sign for positive numbers. This flag is incompatible with the +“+” flag.

        +
      • +
      • +

        A "``", which means that a `` character is used as the sign for positive +numbers. This flag is incompatible with the "` `" flag.

        +
      • +
      • +

        A “~”, which means that the tilde character is used as the sign for negative +numbers (i.e., SML syntax).

        +
      • +
      • +

        A “0”, which means that the zero character should be used to pad the number +(on the left) to the requested width.

        +
      • +
      • +

        A “-”, which means that the minus character is used as the sign for negative +numbers, which is the default behavior. Note that this interpretation +of the “-” flag differs from the C printf function, where it is +used to specify left justification; use the LEFT constructor for that +purpose.

        +
      • +
      • +

        A “#”, which means that a base specifier should be prepended to +the representation of the number.

        +
      • +
      +
      +
    • +
    • +

      an optional decimal number specifying a minimum field width. If the +converted value has fewer characters than the field width, it will be padded +on the left with spaces (or zeros, when zero-padding has been specified).

      +
    • +
    • +

      An optional precision, in the form of a period “.” followed by an optional +decimal number. If the number is omitted, the precision is taken as zero. +The precision specifies the the number of digits to appear after the +decimal-point for “a”, “A”, “e”, “E”, “f”, and “F” conversions, +the maximum number of significant digits for “g” and “G” conversions, and +the maximum number of characters for the “s” conversion.

      +
    • +
    • +

      The conversion-specifier character, which must match the corresponding +format item. The conversion character is one of the following:

      +
      +
        +
      • +

        A “d”, which specifies the conversion of an integer (INT or LINT) +or word (WORD, LWORD, or WORD8) item to its decimal representation.

        +
      • +
      • +

        An “o”, which specifies the conversion of an integer (INT or LINT) +or word (WORD, LWORD, or WORD8) item to its octal representation. If the +“#” flag was specifies, then a leading "0" is prepended to the result.

        +
      • +
      • +

        An “x” or “X”, which specifies the conversion of an integer +(INT or LINT) or word (WORD, LWORD, or WORD8) item to its hexadecimal +representation. The digits are lower-case for “x” and upper-case +for “X”. If the “#” flag was specifies, then a leading “0x” (or “0X”) +is prepended to the result.

        +
      • +
      • +

        An “e” or “E”, which specifies the conversion of a real (REAL or +LREAL) item to the format s d . ddd e s dd, +where there is one digit before the decimal-point character and the +number of digits after the decimal-point is equal to the precision. +(The “`e” is replaced by “E`” for the “E” conversion specifier.) +If the precision is missing, it defaults to six and if the precision is +zero, no decimal-point character appears. The signs (s) +of the number and exponent are displayed as specified by the flags.

        +
      • +
      • +

        A “f” or “F”, which specifies the conversion of a real (REAL or +LREAL) item to the format s ddd . ddd, where the +number of digits after the decimal-point is equal to the precision +specification (or six if not specified).

        +
      • +
      • +

        A “g” or “G”, which specifies the conversion of a real (REAL or +LREAL) item to either the format specified by “e” or “f” +(or “E” or “F” in the case of “G”).

        +
      • +
      • +

        A “b”, which specifies the conversion of a boolean (BOOL) item, +which will be displayed as either “true” or "`false`."

        +
      • +
      • +

        A “c”, which specifies the identity conversion of a character (CHAR) item.

        +
      • +
      • +

        A “s”, which specifies the identity conversion of a string (STR) or +atom (ATOM) item.

        +
      • +
      +
      +
    • +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/str-FormatComb.html b/doc/html/smlnj-lib/Util/str-FormatComb.html new file mode 100644 index 0000000..c85b59c --- /dev/null +++ b/doc/html/smlnj-lib/Util/str-FormatComb.html @@ -0,0 +1,562 @@ + + + + + + + + + + The FormatComb structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The FormatComb structure
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The FormatComb structure provides well-typed formating (or unparsing) +combinators in the style of Olivier Danvy’s +Functional Unparsing work.

    +
    +
    +

    The idea is to use combinators for constructing something akin to +the format string of C's printf function. The difference is, however, +that our formats are not strings. Instead, format fragments have +meaningful types, and passing them to the format function results +in a curried function whose arguments have precisely the types that +correspond to argument-consuming parts of the format. (Such +argument-consuming parts are similar to the conversion-specifications +the Format structure.)

    +
    +
    +

    There is an underlying notion of "abstract formats" of type 'a format, +but the user operates at the level of "format fragments," which +have type ('a, 'b) fragment and are typically polymorphic +in 'a (where 'b is instantiated to some type containing 'a). +Fragments are functions from formats to formats and can be composed +freely using the infix function-composition operator (o). This +form of format composition translates to a corresponding concatenation +of the resulting output.

    +
    +
    +

    Fragments are composed from two kids of primitve fragments called +elements and glue, respectively. An element is a fragment that +consumes some argument (which thanks to the typing magic appears as a +curried argument when the format gets executed). Glue are fragments +that do not consume arguments but merely insert fixed text (fixed +at format construction time) into the output.

    +
    +
    +

    There are also adjustment operations that pad, trim, or fit the output +of entire fragments (primitive or not) to a given size.

    +
    +
    +

    Matthias Blume wrote the code for this module.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature FORMAT_COMB
    +structure FormatComb : FORMAT_COMB
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    type 'a format
    +type ('a, 'b) fragment = 'a format -> 'b format
    +
    +type 'a glue = ('a, 'a) fragment
    +type ('a, 't) element = ('a, 't -> 'a) fragment
    +type 'a gg
    +
    +val format  : (string, 'a) fragment -> 'a
    +
    +val format' : (string list -> 'b) -> ('b, 'a) fragment -> 'a
    +
    +val using : ('t -> string) -> ('a, 't) element
    +
    +val int     : ('a, int) element
    +val real    : ('a, real) element
    +val bool    : ('a, bool) element
    +val string  : ('a, string) element
    +val string' : ('a, string) element
    +val char    : ('a, char) element
    +val char'   : ('a, char) element
    +
    +val int'  : StringCvt.radix   -> ('a, int) element
    +val real' : StringCvt.realfmt -> ('a, real) element
    +
    +val list   : ('a, 'x) element -> ('a, 'x list) element
    +val option : ('a, 'x) element -> ('a, 'x option) element
    +val seq    : (('x * 'a gg -> 'a gg) -> 'a gg -> 's -> 'a gg)
    +	     -> 'a glue
    +	     -> ('a, 'x) element
    +	     -> ('a, 's) element
    +
    +val glue : ('a, 't) element -> 't -> 'a glue
    +
    +val elem : ('t -> 'a glue) -> ('a, 't) element
    +
    +val nothing :           'a glue
    +val text    : string -> 'a glue
    +val sp      : int ->    'a glue
    +val nl      :           'a glue
    +val tab     :           'a glue
    +
    +val listg   : ('t -> 'a glue) -> ('t list -> 'a glue)
    +val optiong : ('t -> 'a glue) -> ('t option -> 'a glue)
    +
    +val seqg   : (('x * 'a gg -> 'a gg) -> 'a gg -> 's -> 'a gg)
    +	     -> 'a glue
    +	     -> ('x -> 'a glue)
    +	     -> 's -> 'a glue
    +
    +type place
    +val left   : place
    +val center : place
    +val right  : place
    +
    +val pad  : place -> int -> ('a, 't) fragment -> ('a, 't) fragment
    +val trim : place -> int -> ('a, 't) fragment -> ('a, 't) fragment
    +val fit  : place -> int -> ('a, 't) fragment -> ('a, 't) fragment
    +
    +val padl : int -> ('a, 't) fragment -> ('a, 't) fragment
    +val padr : int -> ('a, 't) fragment -> ('a, 't) fragment
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    type 'a format
    +
    +

    An abstract type; values of this type are internal to the +implementation.

    +
    +
    type ('a, 'b) fragment = 'a format -> 'b format
    +
    +

    A fragment is a function from formats to formats.

    +
    +
    type 'a glue = ('a, 'a) fragment
    +
    +

    A glue fragment insertes text into the output without consuming +and argument.

    +
    +
    type ('a, 't) element = ('a, 't -> 'a) fragment
    +
    +

    An element fragment consumes an argument of type 't, converts it to a string, +and inserts the result in the output.

    +
    +
    type 'a gg
    +
    +

    An abstract helper type that is internal to the implementation.

    +
    +
    +
    +
    +
    +
    val format : (string, 'a) fragment -> 'a
    +
    +

    format fmt returns a format function as defined by the expression fmt +that will produce a string result when applied to the necessary arguments.

    +
    +
    val format' : (string list -> 'b) -> ('b, 'a) fragment -> 'a
    +
    +

    format' consumer fmt returns a format function as defined by the +expression fmt that will invoke the consumer on the list of strings +produced by formating when applied to the necessary arguments. +(The argument to the consumer is a string list to avoid +premature string concatenation in the implementation). +Note that the format function can be defined in +terms of format' as follows:

    +
    +
    +
    +
    +
    +
    val format = format' String.concat
    +
    +
    +
    +
    +
      (* Make a type-specific element given a toString function for this type *)
    +`[.kw]#val# using : ('t \-> string) \-> ('a, 't) element`::
    +  `using tos` returns an element fragment that represents the given
    +  "value-to-string" conversion.
    +
    +
    +
    +
    +
    val int : ('a, int) element
    +
    +

    an element fragment for formating integers; it is equivalent to the +expression using Int.toString.

    +
    +
    val int' : StringCvt.radix -> ('a, int) element
    +
    +

    int' radix returns an element fragment for formating integers in the specified +radix. It is equivalent to the expression +using (Int.fmt radix).

    +
    +
    val real : ('a, real) element
    +
    +

    an element fragment for formating reals; it is equivalent to the +expression using Real.toString.

    +
    +
    val real' : StringCvt.realfmt -> ('a, real) element
    +
    +

    real' rf returns an element fragment for formating reals with the specified +format. It is equivalent to the expression +using (Real.fmt rf).

    +
    +
    val bool : ('a, bool) element
    +
    +

    an element fragment for formating booleans; it is equivalent to the +expression using Bool.toString.

    +
    +
    val string : ('a, string) element
    +
    +

    an element fragment for formating raw strings; it is equivalent to the +expression using (fn x ⇒ x).

    +
    +
    val string' : ('a, string) element
    +
    +

    an element fragment for formating strings with escapes; it is equivalent to the +expression using String.toString.

    +
    +
    val char : ('a, char) element (* using String.str *)
    +
    +

    an element fragment for formating raw characters; it is equivalent to the +expression using String.str.

    +
    +
    val char' : ('a, char) element (* using Char.toString *)
    +
    +

    an element fragment for formating characters with escapes; it is equivalent to the +expression using Char.toString.

    +
    +
    +
    (* "polymorphic" elements *)
    +
    +
    +
    +
    val list : ('a, 'x) element -> ('a, 'x list) element (* "[", ", ", "]" *)
    +
    +

    list elemFmt returns an element fragment that formats lists of items +using the elemFmt element fragment to format items. The list will be enclosed +in brackets (“`[” “]`”) with elements separated by commas.

    +
    +
    val option : ('a, 'x) element -> ('a, 'x option) element
    +
    +

    option elemFmt returns an element fragment that formats optional items +using the elemFmt element fragment to format the item value. For +an argument of NONE, the string "NONE" is returned, while for an argument +of SOME v, the string "SOME(s)" is returned, where s is the result +of formatting v using elemFmt.

    +
    +
    val seq : (('x * 'a gg -> 'a gg) -> 'a gg -> 's -> 'a gg) -> 'a glue -> ('a, 'x) element -> ('a, 's) element
    +
    +

    something

    +
    +
    +
    +
    +
    +
    val glue : ('a, 't) element -> 't -> 'a glue
    +
    +

    glue fmt arg returns a glue element that renders as the string +that results from using fmt to convert arg to a string.

    +
    +
    val elem : ('t -> 'a glue) -> ('a, 't) element
    +
    +

    elem glueGen returns an element for rendering arguments to the +glueGen function. This function is the inverse of glue +and is useful for extending the set of combinators.

    +
    +
    val nothing : 'a glue
    +
    +

    A glue fragment that renders as the empty string.

    +
    +
    val text : string -> 'a glue
    +
    +

    text s returns a glue fragment that renders as the text s.

    +
    +
    val sp : int -> 'a glue
    +
    +

    sp n returns a glue fragment that renders as n space characters.

    +
    +
    val nl : 'a glue
    +
    +

    A glue fragment that renders as a newline character.

    +
    +
    val tab : 'a glue
    +
    +

    A glue fragment that renders as a tab character.

    +
    +
    val listg : ('t -> 'a glue) -> ('t list -> 'a glue)
    +
    +

    something

    +
    +
    val optiong : ('t -> 'a glue) -> ('t option -> 'a glue)
    +
    +

    something

    +
    +
    val seqg : (('x * 'a gg -> 'a gg) -> 'a gg -> 's -> 'a gg) -> 'a glue -> ('x -> 'a glue) -> 's -> 'a glue
    +
    +

    something

    +
    +
    type place
    +
    +

    An abstract type that represents how to pad or trim of string.

    +
    +
    val left : place
    +
    +

    Pad or trim the left side of a string.

    +
    +
    val center : place
    +
    +

    Pad or trim both sides of a string.

    +
    +
    val right : place
    +
    +

    Pad or trim the left side of a string.

    +
    +
    +
    +
    +
    +
    val pad : place -> int -> ('a, 't) fragment -> ('a, 't) fragment
    +
    +

    pad place n frag wraps the fragment frag with padding to bring the total +with to no fewer than n characters. The place specifies where padding +spaces will be added. Padding never reduces the size of the result.

    +
    +
    +
    +
    +
    +
    val trim : place -> int -> ('a, 't) fragment -> ('a, 't) fragment
    +
    +

    trim place n frag wraps the fragment frag with a trimming operation +to bring the total with to no more than n characters. The place specifies +where trimming occurs. Trimming never increases the size of the result.

    +
    +
    +
    +
    +
    +
    val fit : place -> int -> ('a, 't) fragment -> ('a, 't) fragment
    +
    +

    fit place n frag wraps the fragment frag with an operation that +guarantees the result will be exactly n characters by either padding or +trimming as necessary.

    +
    +
    val padl : int -> ('a, 't) fragment -> ('a, 't) fragment
    +
    +

    padl n frag is equivalent to pad left n frag.

    +
    +
    val padr : int -> ('a, 't) fragment -> ('a, 't) fragment
    +
    +

    padr n frag is equivalent to pad right n frag.

    +
    +
    +
    +
    +
    +
    +

    Examples

    +
    +
    +

    Here are examples on how to use this facility.

    +
    +
    +
    +
    format nothing          (* ==> "" *)
    +format int 1234         (* ==> "1234" *)
    +
    +format (text "The square of " o int o text " is " o int o text ".") 2 4
    +                        (* ==> "The square of 2 is 4." *)
    +
    +format (int o bool o char) 1 true #"x"
    +                        (* ==> "1truex" *)
    +
    +format (glue string "glue vs. " o string o glue int 42 o sp 5 o int)
    +       "ordinary text " 17
    +                        (* ==> "glue vs. ordinary text 42     17" *)
    +
    +
    +
    +

    and here are examples of how the +pad/trim/fit functions work.

    +
    +
    +
    +
    format (pad left 6 int) 1234        (* ==> "  1234" *)
    +format (pad center 6 int) 1234      (* ==> " 1234 " *)
    +format (pad right 6 int) 1234       (* ==> "1234  " *)
    +format (trim left 2 int) 1234       (* ==> "34"     *)
    +format (trim center 2 int) 1234     (* ==> "23"     *)
    +format (trim right 2 int) 1234      (* ==> "12"     *)
    +format (fit left 3 int) 12          (* ==> " 12"    *)
    +format (fit left 3 int) 123         (* ==> "123"    *)
    +format (fit left 3 int) 1234        (* ==> "234"    *)
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/str-GetOpt.html b/doc/html/smlnj-lib/Util/str-GetOpt.html new file mode 100644 index 0000000..1313eaf --- /dev/null +++ b/doc/html/smlnj-lib/Util/str-GetOpt.html @@ -0,0 +1,508 @@ + + + + + + + + + + The GetOpt structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The GetOpt structure
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The GetOpt structure provides command-line argument processing similar +to the GNU getopt library. It supports both short options (a single +character preceded by a single minus character) and long options (multi-character +names preceded by two minus characters). Options may require an argument; +for short options, the argument is the next command-line argument, while for +long options, the argument follows an equal character (e.g., +"--foo=bar"). If the command-line arguments contains the string +"--", then all subsequent arguments are passed through +as non-options.

    +
    +
    +

    This implementation was ported from Sven Panne’s Haskell implementation +by Riccardo Pucella and has then been updated in various ways.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    structure GetOpt
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    datatype 'a arg_order
    +  = RequireOrder
    +  | Permute
    +  | ReturnInOrder of string -> 'a
    +
    +datatype 'a arg_descr
    +  = NoArg of unit -> 'a
    +  | ReqArg of (string -> 'a) * string
    +  | OptArg of (string option -> 'a) * string
    +
    +type 'a opt_descr = {
    +    short : string,
    +    long : string list,
    +    desc : 'a arg_descr,
    +    help : string
    +  }
    +
    +val usageInfo : {
    +        header : string,
    +        options : 'a opt_descr list
    +      } -> string
    +
    +val getOpt : {
    +        argOrder : 'a arg_order,
    +        options : 'a opt_descr list,
    +        errFn : string -> unit
    +      } -> string list -> ('a list * string list)
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    datatype 'a arg_order = …​
    +
    +

    This datatype is used to specify the ordering policy for command-line +arguments. The constructors are interpreted as follows:

    +
    +
    +
    +
    +
    RequireOrder
    +
    +

    No options are processed after the first non-option argument is encountered.

    +
    +
    Permute
    +
    +

    Options and non-options may be freely mixed.

    +
    +
    ReturnInOrder of string -> 'a
    +
    +

    Non-options are converted to options using the supplied function.

    +
    +
    +
    +
    +
    +
    +
    datatype 'a arg_descr = …​
    +
    +

    This datatype is used to describe the optional argument of an option. +Each of the constructors has a function as an argument that is used +to generate the representation of the processed option. +The constructors are interpreted as follows:

    +
    +
    +
    +
    +
    NoArg of unit -> 'a
    +
    +

    The option does not have an argument, the supplied function is applied to +unit when processing the option.

    +
    +
    +
    +
    +
    +
    ReqArg of (string -> 'a) * string
    +
    +

    The option requires an argument, which is handled by the given function. +The string is the name of the argument used when printing a usage message.

    +
    +
    +
    +
    +
    +
    OptArg of (string option -> 'a) * string
    +
    +

    The argument is optional and +The string is the name of the argument used when printing a usage message.

    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    type 'a opt_descr = { …​ }
    +
    +

    This record type describes the properties of a command-line option. +Its fields have the following meaning:

    +
    +
    +
    +
    +
    short : string
    +
    +

    A string containing the allowed short flags for the option.

    +
    +
    long : string list
    +
    +

    A list of the allowed long flags for the option.

    +
    +
    desc : 'a arg_descr
    +
    +

    The description of how to process the option’s argument.

    +
    +
    help : string
    +
    +

    A descriptive message that is used to construct the usage message +(see the usageInfo function).

    +
    +
    +
    +
    +
    +
    +
    val usageInfo : {header, options} -> string
    +
    +

    usageInfo {header, options} returns a usage string suitable for a help +message. The header argument is prepended to the message (with a newline +between it and the rest of the message). Each option is described on its +own line.

    +
    +
    val getOpt : {…​} -> string list -> ('a list * string list)
    +
    +

    getOpt {argOrder, options, errFn} returns a function for processing +command-line options, which will return a list of results from processing +the options and a list of the residual command-line arguments. The +arguments to the call are

    +
    +
    +
    +
    +
    argOrder : 'a arg_order
    +
    +

    Specifies the ordering policy for processing command-line arguments.

    +
    +
    options : 'a opt_descr list
    +
    +

    The descriptors for the command-line options.

    +
    +
    errFn : string -> unit
    +
    +

    An error callback function that is used to report errors during +argument processing.

    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +

    Examples

    +
    +
    +

    There are two common approaches to using the GetOpt structure. The first +is to define a type that classifies the command-line options. For example,

    +
    +
    +
    +
    datatype opt = AFlg | B of string | C of int | Other of string | Bad
    +
    +val opts = [
    +        { short = "aA", long = [],
    +          desc = NoArg(fn () => AFlg),
    +          help = "Set A flag"
    +        },
    +        { short = "b", long = ["set-b"],
    +          desc = ReqArg(B, "<name>"), help = "Set B name"
    +        },
    +        { short = "", long = ["cval"],
    +          desc = OptArg (
    +            fn (SOME s) => (case Int.fromString s
    +                   of SOME n => C n
    +                    | NONE => Bad)
    +             | NONE => C 0,
    +            "<n>"),
    +          help = "Set C value (default 0)"
    +        }
    +      ]
    +
    +fun usage () = print (usageInfo{header = "usage:", options = opts})
    +
    +val doOpts = getOpt {
    +        argOrder = ReturnInOrder (fn s => Other s),
    +        options = opts,
    +        errFn = fn msg => raise Fail msg
    +      }
    +
    +
    +
    +

    The usage function will print the following text:

    +
    +
    +
    +
    usage:
    +  -a, -A                     Set A flag
    +  -b <name>  --set-b=<name>  Set B name
    +             --cval[=<n>]    Set C value (default 0)
    +
    +
    +
    +

    Applying the doOpts function with the following arguments

    +
    +
    +
    +
    doOpts ["-A", "foo", "--", "-c", "baz"];
    +
    +
    +
    +

    results in

    +
    +
    +
    +
    ([AFlg, Other "foo", Other "--", Other "-c", Other "baz"], [])
    +
    +
    +
    +

    Note that the second component of the result will always be the empty list +because the non-options were wrapped with Other. The “-c” argument was +treated as a non-option because it came after the "--."

    +
    +
    +

    The other approach to using the GetOpt structure is to define references +for the various options and then update them in the argument-descriptor +functions. For example:

    +
    +
    +
    +
    val aFlg : bool ref = ref false
    +val bOpt : string option ref = ref NONE
    +val cVal : int option ref = ref NONE
    +val errorFlg : bool ref = ref false
    +
    +val opts = [
    +        { short = "aA", long = [],
    +          desc = NoArg(fn () => aFlg := true),
    +          help = "Set A flag"
    +        },
    +        { short = "b", long = ["set-b"],
    +          desc = ReqArg(fn s => bOpt := SOME s, "<name>"),
    +          help = "Set B name"
    +        },
    +        { short = "", long = ["cval"],
    +          desc = OptArg (
    +            fn (SOME s) => (case Int.fromString s
    +                   of NONE => errorFlg := true
    +                    | someN => cVal := someN)
    +             | NONE => cVal := SOME 0,
    +            "<n>"),
    +          help = "Set C value (default 0)"
    +        }
    +      ]
    +
    +val doOpts = getOpt {
    +        argOrder = Permute,
    +        options = opts,
    +        errFn = fn msg => raise Fail msg
    +      }
    +
    +
    +
    +

    With this version, applying the doOpts function with the following arguments

    +
    +
    +
    +
    doOpts ["-A", "foo", "--", "-c", "baz"];
    +
    +
    +
    +

    results in

    +
    +
    +
    +
    ([()], ["foo", "--", "-c", "baz"])
    +
    +
    +
    +

    with the aFlg set to true and the other flags unchanged. One reason +for using this imperative approach is that it is supported by the +Controls Library.

    +
    +
    +
    +
    +

    Bugs

    +
    +
    +

    The function arguments to ReqArg and OptArg should really have +an option return type so that the case where the argument is badly formed +can be identified in the GetOpt implementation.

    +
    +
    +
    + +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/str-HashString.html b/doc/html/smlnj-lib/Util/str-HashString.html new file mode 100644 index 0000000..205a387 --- /dev/null +++ b/doc/html/smlnj-lib/Util/str-HashString.html @@ -0,0 +1,206 @@ + + + + + + + + + + The HashString structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The HashString structure
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The HashString structure provides hashing functions for strings +and substrings. It is currently implemented as an alias for the +same functions from the FNVHash structure.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    structure HashString
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    val hashString  : string -> word
    +
    +val hashSubstring : substring -> word
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    val hashString : string -> word
    +
    +

    hashString s returns a hash code for the string s.

    +
    +
    val hashSubstring : substring -> word
    +
    +

    hashSubstring s returns a hash code for the substring ss.

    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/str-HashTable.html b/doc/html/smlnj-lib/Util/str-HashTable.html new file mode 100644 index 0000000..f944a45 --- /dev/null +++ b/doc/html/smlnj-lib/Util/str-HashTable.html @@ -0,0 +1,389 @@ + + + + + + + + + + The HashTable structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The HashTable structure
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The HashTable structure implements hash tables that are polymorphic +in the key type.

    +
    +
    +

    The tables are implemented as an array of buckets, which are +lists of key-value pairs. The number of buckets grows with the number +of table entries.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    structure HashTable
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    type ('a, 'b) hash_table
    +
    +val mkTable : (('a -> word) * (('a * 'a) -> bool)) -> (int * exn)
    +      -> ('a,'b) hash_table
    +
    +val clear : ('a, 'b) hash_table -> unit
    +
    +val insert : ('a, 'b) hash_table -> ('a * 'b) -> unit
    +
    +val insertWith  : ('b * 'b -> 'b) -> ('a, 'b) hash_table -> 'a * 'b -> unit
    +val insertWithi : ('a * 'b * 'b -> 'b) -> ('a, 'b) hash_table -> 'a * 'b -> unit
    +
    +val inDomain : ('a, 'b) hash_table -> 'a -> bool
    +
    +val lookup : ('a, 'b) hash_table -> 'a -> 'b
    +val find : ('a, 'b) hash_table -> 'a -> 'b option
    +
    +val findAndRemove : ('a, 'b) hash_table -> 'a -> 'b option
    +
    +val remove : ('a, 'b) hash_table -> 'a -> 'b
    +
    +val numItems : ('a, 'b) hash_table ->  int
    +
    +val listItems  : ('a, 'b) hash_table -> 'b list
    +val listItemsi : ('a, 'b) hash_table -> ('a * 'b) list
    +
    +val app  : ('b -> unit) -> ('a, 'b) hash_table -> unit
    +val appi : (('a * 'b) -> unit) -> ('a, 'b) hash_table -> unit
    +
    +val map  : ('b -> 'c) -> ('a, 'b) hash_table -> ('a, 'c) hash_table
    +val mapi : (('a * 'b) -> 'c) -> ('a, 'b) hash_table -> ('a, 'c) hash_table
    +
    +val fold  : (('b *'c) -> 'c) -> 'c -> ('a, 'b) hash_table -> 'c
    +val foldi : (('a * 'b * 'c) -> 'c) -> 'c -> ('a, 'b) hash_table -> 'c
    +
    +val modify  : ('b -> 'b) -> ('a, 'b) hash_table -> unit
    +val modifyi : (('a * 'b) -> 'b) -> ('a, 'b) hash_table -> unit
    +
    +val filter  : ('b -> bool) -> ('a, 'b) hash_table -> unit
    +val filteri : (('a * 'b) -> bool) -> ('a, 'b) hash_table -> unit
    +
    +val copy : ('a, 'b) hash_table -> ('a, 'b) hash_table
    +
    +val bucketSizes : ('a, 'b) hash_table -> int list
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    type ('a, 'b) hash_table
    +
    +

    The type of imperative hash tables indexed by 'a values

    +
    +
    val mkTable : 'a -> word) * (('a * 'a) -> bool -> (int * exn) -> ('a,'b) hash_table
    +
    +

    mkTable (hash, same) (n, ex) creates a new hash table that uses the hash +function to compute hash values for keys and the same function to test +key equality. The table will be initially sized to hold at least n items. +The exception ex is raised by the lookup and +remove functions when the search key is not in the domain.

    +
    +
    val clear : ('a, 'b) hash_table -> unit
    +
    +

    clear tbl removes all of the entries in the table.

    +
    +
    val insert : ('a, 'b) hash_table -> ('a * 'b) -> unit
    +
    +

    insert tbl (key, item) inserts a mapping from key to item into tbl. +Any existing mapping of key is discarded.

    +
    +
    val insertWith : ('b * 'b → 'b) -> ('a, 'b) hash_table -> 'a * 'b -> unit
    +
    +

    insertWith comb (tbl, key, v) adds the mapping from key to value to tbl, +where value = comb(v', v), if tbl already contained a mapping from key +to v'; otherwise, value = v.

    +
    +
    val insertWithi : ('a * 'b * 'b → 'b) -> ('a, 'b) hash_table -> 'a * 'b -> unit`
    +
    +

    insertWithi comb (tbl, key, v) adds the mapping from key to value to tbl, +where value = comb(key, v', v), if m already contained a mapping from key +to v'; otherwise, value = v.

    +
    +
    val inDomain : ('a, 'b) hash_table -> 'a -> bool
    +
    +

    inDomain tbl key returns true if, and only if, key is in the +domain of the table

    +
    +
    +
    +
    +
    +
    val lookup : ('a, 'b) hash_table -> 'a -> 'b
    +
    +

    lookup tbl key returns the item that key maps to if key is in +the domain of tbl. Otherwise, the table’s exception is raised.

    +
    +
    val find : ('a, 'b) hash_table -> 'a -> 'b option
    +
    +

    find tbl key returns the SOME v if key is mapped to v in tbl. +Otherwise, it returns NONE.

    +
    +
    val findAndRemove : ('a, 'b) hash_table → 'a → 'b option
    +
    +

    findAndRemove (tbl, key) returns SOME v and removes key from the +table if tbl maps key to v. If key is not in the domain of tbl, +then NONE is returned and tbl is unchanged.

    +
    +
    +
    +
    +
    +
    val remove : ('a, 'b) hash_table -> 'a -> 'b
    +
    +

    remove tbl key returns the item that key maps to if key is in +the domain of tbl and removes it from the table. Otherwise, the +table’s exception is raised.

    +
    +
    val numItems : ('a, 'b) hash_table -> int
    +
    +

    numItems tbl returns the number of entries in the table.

    +
    +
    val listItems : ('a, 'b) hash_table -> 'b list
    +
    +

    listItems tbl returns a list of the items in the range of tbl.

    +
    +
    val listItemsi : ('a, 'b) hash_table -> ('a * 'b) list
    +
    +

    listItemsi tbl returns a list of the key-value entries in tbl.

    +
    +
    val app : ('b -> unit) -> ('a, 'b) hash_table -> unit
    +
    +

    app f tbl applies the function f to each item in the range of tbl.

    +
    +
    val appi : (('a * 'b) -> unit) -> ('a, 'b) hash_table -> unit
    +
    +

    appi f tbl applies the function f to each item in the +key-value entries in tbl.

    +
    +
    val map : ('b -> 'c) -> ('a, 'b) hash_table -> ('a, 'c) hash_table
    +
    +

    map f tbl creates a new table with an entry (key, f(lookup tbl key)) +in the new table for every key in tbl. The new table inherits its +hash and key-equality functions, and exception from tbl.

    +
    +
    val mapi : (('a * 'b) -> 'c) -> ('a, 'b) hash_table -> ('a, 'c) hash_table
    +
    +

    mapi f tbl creates a new table with an entry (key, f(key, lookup tbl key)) +in the new table for every key in tbl. The new table inherits its +hash and key-equality functions, and exception from tbl.

    +
    +
    val fold : (('b *'c) -> 'c) -> 'c -> ('a, 'b) hash_table -> 'c
    +
    +

    fold f init tbl folds the function f over the items in the range of tbl +using init as an initial value.

    +
    +
    val foldi : (('a * 'b * 'c) -> 'c) -> 'c -> ('a, 'b) hash_table -> 'c
    +
    +

    foldi f init tbl folds the function f over the key-velu entries in tbl +using init as an initial value.

    +
    +
    val modify : ('b -> 'b) -> ('a, 'b) hash_table -> unit
    +
    +

    modify f tbl applies the function f for effect to the items in the +range of tbl, replacing the old items with the result of applying f.

    +
    +
    val modifyi : (('a * 'b) -> 'b) -> ('a, 'b) hash_table -> unit
    +
    +

    modifyi f tbl applies the function f for effect to the key-value +entries in tbl, replacing the old items with the result of applying f.

    +
    +
    val filter : ('b -> bool) -> ('a, 'b) hash_table -> unit
    +
    +

    filter pred tbl removes any entry (key, item) from tbl for which +pred item returns false.

    +
    +
    val filteri : (('a * 'b) -> bool) -> ('a, 'b) hash_table -> unit
    +
    +

    filteri pred tbl removes any entry (key, item) from tbl for which +pred(key, item) returns false.

    +
    +
    val copy : ('a, 'b) hash_table -> ('a, 'b) hash_table
    +
    +

    copy tbl creates a copy of tbl. This expression is equivalent to

    +
    +
    +
    map (fn x => x) tbl
    +
    +
    +
    +
    val bucketSizes : ('a, 'b) hash_table -> int list
    +
    +

    bucketSizes tbl returns a list of the current number of items per +bucket. This function allows users to gauge the quality of their +hashing function.

    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/str-IOUtil.html b/doc/html/smlnj-lib/Util/str-IOUtil.html new file mode 100644 index 0000000..3dbd0db --- /dev/null +++ b/doc/html/smlnj-lib/Util/str-IOUtil.html @@ -0,0 +1,238 @@ + + + + + + + + + + The IOUtil structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The IOUtil structure
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The IOUtil structure provides support for redirecting the standard input +and output streams.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature IO_UTIL
    +structure IOUtil : IO_UTIL
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    val withInputFile : string * ('a -> 'b) -> 'a -> 'b
    +val withInstream : TextIO.instream * ('a -> 'b) -> 'a -> 'b
    +
    +val withOutputFile : string * ('a -> 'b) -> 'a -> 'b
    +val withOutstream : TextIO.outstream * ('a -> 'b) -> 'a -> 'b
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    val withInputFile : string * ('a -> 'b) -> 'a -> 'b
    +
    +

    withInputFile (file, f) x evaluates the expression f x with +standard input bound to file. The file is closed and the +TextIO.stdIn input stream is restored to its original binding +once evaluation terminates.

    +
    +
    val withInstream : TextIO.instream * ('a -> 'b) -> 'a -> 'b
    +
    +

    withInstream (inS, f) x evaluates the expression f x with +standard output redirected to inS. The TextIO.stdIn input stream +is restored to its original binding once evaluation terminates.

    +
    +
    val withOutputFile : string * ('a -> 'b) -> 'a -> 'b
    +
    +

    withOutputFile (file, f) x evaluates the expression f x with +standard output redirected to file. The file is closed and the +TextIO.stdOut output stream is restored to its original destination +once evaluation terminates.

    +
    +
    val withOutstream : TextIO.outstream * ('a -> 'b) -> 'a -> 'b
    +
    +

    withOutstream (outS, f) x evaluates the expression f x with +standard output redirected to outS. The TextIO.stdOut output stream +is restored to its original destination once evaluation terminates.

    +
    +
    +
    +
    +
    +
    +

    Example

    +
    +
    +

    The following expression will put its output in the file "hello.txt":

    +
    +
    +
    +
    withOutputFile ("hello.txt", fn () => print "hello world\n") ()
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/str-LibBase.html b/doc/html/smlnj-lib/Util/str-LibBase.html new file mode 100644 index 0000000..24ceb1b --- /dev/null +++ b/doc/html/smlnj-lib/Util/str-LibBase.html @@ -0,0 +1,224 @@ + + + + + + + + + + The LibBase structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The LibBase structure
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The LibBase structure provides some common definitions that are +shared across the SML/NJ Lbrary.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature LIB_BASE
    +structure LibBase : LIB_BASE
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    exception Unimplemented of string
    +exception Impossible of string
    +
    +exception NotFound
    +
    +val failure : {module : string, func : string, msg : string} -> 'a
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    exception Unimplemented of string
    +
    +

    This exception is raised to report unimplemented features.

    +
    +
    exception Impossible of string
    +
    +

    This exception is raised to report internal errors.

    +
    +
    +
    +
    +
    +
    exception NotFound
    +
    +

    This exception is raised by searching operations when something being +searched for is missing.

    +
    +
    val failure : {module : string, func : string, msg : string} -> 'a
    +
    +

    failure {module, func, msg} raises the +Fail exception +with a message in a standard format. It is used internally to report +errors.

    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/str-ListFormat.html b/doc/html/smlnj-lib/Util/str-ListFormat.html new file mode 100644 index 0000000..592c3a6 --- /dev/null +++ b/doc/html/smlnj-lib/Util/str-ListFormat.html @@ -0,0 +1,243 @@ + + + + + + + + + + The ListFormat structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The ListFormat structure
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The ListFormat structure provides some utility functions for converting +lists into strings (and back).

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature LIST_FORMAT
    +structure ListFormat : LIST_FORMAT
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    val fmt : {
    +	init : string,
    +	sep : string,
    +	final : string,
    +	fmt : 'a -> string
    +      } -> 'a list -> string
    +
    +val listToString : ('a -> string) -> 'a list -> string
    +
    +val scan : {
    +	init : string,
    +	sep : string,
    +	final : string,
    +	scan : (char, 'b) StringCvt.reader -> ('a, 'b) StringCvt.reader
    +      } -> (char, 'b) StringCvt.reader -> ('a list, 'b) StringCvt.reader
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    val fmt : { …​ } -> 'a list -> string
    +
    +

    fmt {init, sep, final, fmt} lst converts the list lst to a string, +where init is an initial string, sep is the separator, final is +the final string, and fmt is a function for converting the list +elements to strings. For the list value [a, b, …​, c], the resulting +string will be formatted as

    +
    +
    +
    init ^ (fmt a) ^ sep ^ (fmt b) ^ sep ^ ... ^ sep ^ (fmt c) ^ final
    +
    +
    +
    +
    val listToString : ('a -> string) -> 'a list -> string
    +
    +

    listToString fmt lst returns a string representing lst using SML's +list notation. In other words, the above expression is equivalent to

    +
    +
    +
    fmt {init="[", sep=",", final="]", fmt=fmt} lst
    +
    +
    +
    +
    val scan : { …​ } -> (char, 'b) StringCvt.reader -> ('a list, 'b) StringCvt.reader
    +
    +

    scan {init, sep, final, scan} getc returns a +reader +for scanning lists of items from a character stream. The resulting +reader expects the list to begin with the init string, use sep as +a separator, and end with the final string. The reader uses the scan +argument function to scan individual list elements.
    +The reader will skip extra whitespace, so to scan a list of items separated +by spaces, use the empty string ("") as the separator.

    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/str-ListMergeSort.html b/doc/html/smlnj-lib/Util/str-ListMergeSort.html new file mode 100644 index 0000000..f4c9300 --- /dev/null +++ b/doc/html/smlnj-lib/Util/str-ListMergeSort.html @@ -0,0 +1,218 @@ + + + + + + + + + + The ListMergeSort structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The ListMergeSort structure
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The ListMergeSort structure implements the merge-sort +algorithm for lists.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature LIST_SORT
    +structure ListMergeSort : LIST_SORT
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
     val sort : ('a * 'a -> bool) -> 'a list -> 'a list
    +
    + val uniqueSort : ('a * 'a -> order) -> 'a list -> 'a list
    +
    + val sorted : ('a * 'a -> bool) -> 'a list -> bool
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    val sort : ('a * 'a -> bool) -> 'a list -> 'a list
    +
    +

    sort gt l sorts the list l in ascending order using the +"greater-than" relationship defined by gt. This sort is stable +and detects initial increasing and decreasing runs and thus is linear +time on ordered inputs.

    +
    +
    val uniqueSort : ('a * 'a -> order) -> 'a list -> 'a list
    +
    +

    uniquesort cmp l sorts the list l in ascending order using the +comparison function cmp, while removing duplicate elements.

    +
    +
    val sorted : ('a * 'a -> bool) -> 'a list -> bool
    +
    +

    sorted gt l returns true if the list is sorted in ascending +order under the greater-than relation gt.

    +
    +
    +
    +
    +
    +
    +

    See Also

    +
    +
    +

    sig-MONO_ARRAY_SORT.adoc[MONO_ARRAY_SORT], +sig-ARRAY_SORT.adoc[ARRAY_SORT], +The Util Library

    +
    +
    +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/str-ListXProd.html b/doc/html/smlnj-lib/Util/str-ListXProd.html new file mode 100644 index 0000000..6712bb8 --- /dev/null +++ b/doc/html/smlnj-lib/Util/str-ListXProd.html @@ -0,0 +1,258 @@ + + + + + + + + + + The ListXProd structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The ListXProd structure
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The ListXProd structure provides list combinators for computing +over the "Cartesian product" of two lists. For lists [a, b, c] +and [x, y, z], the elements are processed in the order

    +
    +
    +
    +
    [ (a, x), (a, y), (a, z),
    +  (b, x), (b, y), (b, z),
    +  (c, x), (c, y), (c, z)
    +]
    +
    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature LIST_XPROD
    +structure ListXProd : LIST_XPROD
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    val app : (('a * 'b) -> unit) -> ('a list * 'b list) -> unit
    +val map : (('a * 'b) -> 'c) -> ('a list * 'b list) -> 'c list
    +val fold : (('a * 'b * 'c) -> 'c) -> 'c -> ('a list * 'b list) -> 'c
    +
    +val appX : (('a * 'b) -> unit) -> ('a list * 'b list) -> unit
    +val mapX : (('a * 'b) -> 'c) -> ('a list * 'b list) -> 'c list
    +val foldX : (('a * 'b * 'c) -> 'c) -> ('a list * 'b list) -> 'c -> 'c
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    val app : (('a * 'b) -> unit) -> ('a list * 'b list) -> unit
    +
    +

    appX f (l1, l2) applies the function f to the Cartesian product of the +to lists l1 and l2.

    +
    +
    +
    +
    +
    +
    val map : (('a * 'b) -> 'c) -> ('a list * 'b list) -> 'c list
    +
    +

    mapX f (l1, l2) maps the function f over the Cartesian product of the +to lists l1 and l2 to produce a new list.

    +
    +
    +
    +
    +
    +
    val fold : (('a * 'b * 'c) -> 'c) -> 'c ->('a list * 'b list) -> 'c
    +
    +

    foldX f init (l1, l2) folds the function f over the Cartesian product of the + to lists l1 and l2, using init as the initial value.

    +
    +
    +
    +
    +

    Deprecated functions

    +
    +

    The following functions are part of the interface, but have been +deprecated.

    +
    +
    +
    +
    val appX : (('a * 'b) -> 'c) -> ('a list * 'b list) -> unit
    +
    +

    Use app instead. Note that app expects +that its first argument will have a unit return type.

    +
    +
    val mapX : (('a * 'b) -> 'c) -> ('a list * 'b list) -> 'c list
    +
    +

    Use map instead.

    +
    +
    val foldX : (('a * 'b * 'c) -> 'c) -> 'c ->('a list * 'b list) -> 'c
    +
    +

    Use fold instead. Note that the second and third +arguments of fold are swapped with respect to foldX.

    +
    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/str-Native.html b/doc/html/smlnj-lib/Util/str-Native.html new file mode 100644 index 0000000..3d28d6a --- /dev/null +++ b/doc/html/smlnj-lib/Util/str-Native.html @@ -0,0 +1,178 @@ + + + + + + + + + + Native structure aliases + + + + + + + + +
    +
    +
    +
    + +
    + +
    Native structure aliases
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    These structure aliases provide portable names for the host machine’s +native numeric types.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    structure NativeInt : INTEGER
    +structure NativeWord : WORD
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +

    See the INTEGER +and WORD signatures for +details about the structures.

    +
    +
    +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/str-ParserComb.html b/doc/html/smlnj-lib/Util/str-ParserComb.html new file mode 100644 index 0000000..b76fd56 --- /dev/null +++ b/doc/html/smlnj-lib/Util/str-ParserComb.html @@ -0,0 +1,450 @@ + + + + + + + + + + The ParserComb structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The ParserComb structure
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The ParserComb structure provides parser combinators over +character readers. This structure is modeled after the Haskell +combinators of Hutton and Meijer. The main difference is that +they return a single result, instead of a list of results. +This fact means that the or combinator is a committed choice; +once one branch succeeds, the others will not be enabled. While +this property is somewhat limiting, for many applications it +will not be a problem.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature PARSER_COMB
    +structure ParserComb : PARSER_COMB
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    type ('a, 'strm) parser =
    +      (char, 'strm) StringCvt.reader -> ('a, 'strm) StringCvt.reader
    +
    +val result : 'a -> ('a, 'strm) parser
    +
    +val failure : ('a, 'strm) parser
    +
    +val wrap : (('a, 'strm) parser * ('a -> 'b)) -> ('b, 'strm) parser
    +
    +val seq : (('a, 'strm) parser * ('b, 'strm) parser) -> (('a * 'b), 'strm) parser
    +val seqWith : (('a * 'b) -> 'c)
    +      -> (('a, 'strm) parser * ('b, 'strm) parser)
    +	-> ('c, 'strm) parser
    +
    +val bind : (('a, 'strm) parser * ('a -> ('b, 'strm) parser))
    +      -> ('b, 'strm) parser
    +
    +val eatChar : (char -> bool) -> (char, 'strm) parser
    +
    +val char   : char -> (char, 'strm) parser
    +val string : string -> (string, 'strm) parser
    +
    +val skipBefore : (char -> bool) -> ('a, 'strm) parser -> ('a, 'strm) parser
    +
    +val or : (('a, 'strm) parser * ('a, 'strm) parser) -> ('a, 'strm) parser
    +val or' : ('a, 'strm) parser list -> ('a, 'strm) parser
    +
    +val zeroOrMore : ('a, 'strm) parser -> ('a list, 'strm) parser
    +val oneOrMore  : ('a, 'strm) parser -> ('a list, 'strm) parser
    +
    +val option : ('a, 'strm) parser -> ('a option, 'strm) parser
    +val join   : ('a option, 'strm) parser -> ('a, 'strm) parser
    +
    +val token : (char -> bool) -> (string, 'strm) parser
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    type ('a, 'strm) parser = (char, 'strm) StringCvt.reader -> ('a, 'strm) StringCvt.reader
    +
    +

    A parser is a function that takes a +character reader +and returns reader for the type of values the parser accepts.

    +
    +
    val result : 'a -> ('a, 'strm) parser
    +
    +

    result v getc strm returns SOME(v, strm); i.e., result v +lifts the value v to a parser that returns v without consuming +any input.

    +
    +
    val failure : ('a, 'strm) parser
    +
    +

    failure getc strm returns NONE; i.e. it is the parser that +does not accept any input.

    +
    +
    val wrap : 'a, 'strm) parser * ('a -> 'b -> ('b, 'strm) parser
    +
    +

    wrap parser f composes the function f with parser

    +
    +
    val seq : (('a, 'strm) parser * ('b, 'strm) parser) -> (('a * 'b), 'strm) parser
    +
    +

    seq (parser1, parser2) is the sequential combination of the +two parsers; i.e., a parser that will first parse a value v1 +from the input using parser1 and then parse a value v2 using parser2 +yielding the pair (v1, v2).

    +
    +
    val seqWith : (('a * 'b) -> 'c) -> (('a, 'strm) parser * ('b, 'strm) parser) -> ('c, 'strm) parser
    +
    +

    seqWith f (parser1, parser2) is the sequential combination of the two parsers composed +with the function f; i.e., a parser that will first parse a value v1 +from the input using parser1 and then parse a value v2 using parser2 +yielding the result of f(v1, v2). This expression is equivalent to

    +
    +
    +
    wrap (seq (parser1, parser2), f)
    +
    +
    +
    +
    val bind : 'a, 'strm) parser * ('a -> ('b, 'strm) parser -> ('b, 'strm) parser
    +
    +

    bind parser f returns a parser that first uses parser to parse a value v +from the input and then continues using the parser that results from f v.

    +
    +
    val eatChar : (char -> bool) -> (char, 'strm) parser
    +
    +

    eatChar pred returns a parser that parses one character c for which +pred c returns true.

    +
    +
    val char : char -> (char, 'strm) parser
    +
    +

    char c returns a parser that parses the character c.

    +
    +
    val string : string -> (string, 'strm) parser
    +
    +

    string s`returns a parser that parses the string `s.

    +
    +
    val skipBefore : (char -> bool) -> ('a, 'strm) parser -> ('a, 'strm) parser
    +
    +

    skipBefore pred parser returns a parser that first skips any prefix of characters +that satisfy the predicate pred and then applies parser to the input.

    +
    +
    val or : (('a, 'strm) parser * ('a, 'strm) parser) -> ('a, 'strm) parser
    +
    +

    or (parser1, parser2) returns the ordered choice of the two parsers; i.e., +it returns a parser that first attempts to parse the input using parser1; if +parser1 fails on the input, then it uses parser2.

    +
    +
    val or' : ('a, 'strm) parser list -> ('a, 'strm) parser
    +
    +

    or' parsers returns the ordered choice of a list of parsers. This +expression is equivalent to

    +
    +
    +
    List.foldr or failure parsers
    +
    +
    +
    +
    val zeroOrMore : ('a, 'strm) parser -> ('a list, 'strm) parser
    +
    +

    zeroOrMore parser returns a parser that parses a list of zero or more +items using parser.

    +
    +
    val oneOrMore : ('a, 'strm) parser -> ('a list, 'strm) parser
    +
    +

    oneOrMore parser returns a parser that parses a list of one or more +items using parser.

    +
    +
    val option : ('a, 'strm) parser -> ('a option, 'strm) parser
    +
    +

    option parser returns a parser that parses an optional item +(i.e., zero or one occurrences) using parser.

    +
    +
    val join : ('a option, 'strm) parser -> ('a, 'strm) parser
    +
    +

    join parser returns a parser that requires the optional item parsed +by parser to be present.

    +
    +
    val token : (char -> bool) -> (string, 'strm) parser
    +
    +

    token pred returns a parser for a string of characters, where every +character satisfies the predicate function pred.

    +
    +
    +
    +
    +
    +
    +

    Examples

    +
    +
    +

    As noted above, the parser type and combinators are +designed around the +StringCvt.reader +representation of input streams. +Thus, the scan functions defined in the {basis-lib-url}/index.html[Basis Library] +are compatible with the parser type defined here. For example,

    +
    +
    +
    +
    val boolParser : (bool, 'strm) parser = Bool.scan
    +val intParser  : (int, 'strm) parser = Int.scan StringCvt.DEC
    +
    +
    +
    +

    Let us define the abstract syntax of a small expression language with +addition, numbers, and let-bound variables.

    +
    +
    +
    +
    datatype exp
    +  = VAR of string
    +  | NUM of int
    +  | ADD of exp * exp
    +  | LET of string * exp * exp
    +
    +
    +
    +

    We can use parser combinators to implement a simple parser +for this language as follows.

    +
    +
    +

    We start by defining a few utility definitions:

    +
    +
    +
    +
    structure P = ParserComb
    +
    +val +> = P.seq
    +infixr 3 +>
    +
    +fun skipWS getc = P.skipBefore Char.isSpace getc
    +
    +
    +
    +

    We can then define parsers for the atomic expressions +(numbers and variables):

    +
    +
    +
    +
    fun numParser getc = P.wrap (Int.scan StringCvt.DEC, NUM) getc
    +fun idParser getc = P.seqWith
    +      (fn (a, SOME b) => a ^ b | (a, NONE) => a)
    +      (P.wrap (P.eatChar Char.isAlpha, str),
    +       P.option (P.token Char.isAlphaNum))
    +      getc
    +fun varParser getc = P.wrap(idParser, VAR) getc
    +
    +
    +
    +

    We need the separate idParser to parse let-bound identifiers.

    +
    +
    +

    We then define three, mutually-recursive, functions to parse +expressions.

    +
    +
    +
    +
    fun letParser getc = P.wrap (
    +      P.string "let" +> skipWS(idParser) +> skipWS(P.char #"=") +> expParser
    +      +> skipWS(P.string "in") +> expParser,
    +      fn (_, (x, (_, (e1, (_, e2))))) => LET(x, e1, e2)) getc
    +and expParser getc = P.wrap (
    +      skipWS (P.seq (
    +	P.or' [letParser, numParser, varParser],
    +	addParser)),
    +      fn (e, es) => List.foldl (fn (a, b) => ADD(b, a)) e es) getc
    +and addParser getc =
    +      P.zeroOrMore (skipWS (P.wrap (P.char #"+" +> expParser, #2))) getc
    +
    +
    +
    +

    Note that the letParser must appear before the varParser in the +list of parsers combined by or' to avoid treating the string "let" +as a variable. Another detail is that we use +List.foldl with a +function that swaps the order of its arguments in order +that addition is left associative.

    +
    +
    +

    If we evaluate the expression

    +
    +
    +
    +
    StringCvt.scanString expParser " let x = 1+2 in x + x ";
    +
    +
    +
    +

    we get the expected result

    +
    +
    +
    +
    SOME (LET ("x", ADD (NUM 1, NUM 2), ADD (VAR "x", VAR "x")))
    +
    +
    +
    +
    + +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/str-PathUtil.html b/doc/html/smlnj-lib/Util/str-PathUtil.html new file mode 100644 index 0000000..0420a95 --- /dev/null +++ b/doc/html/smlnj-lib/Util/str-PathUtil.html @@ -0,0 +1,248 @@ + + + + + + + + + + The PathUtil structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The PathUtil structure
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The PathUtil structure provides support for searching for files +in the file system using a list of possible locations. It is implemented +using the SML Basis Library portable +file-system mechanisms and, thus, it itself portable across different +operating systems.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature PATH_UTIL
    +structure PathUtil : PATH_UTIL
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    val findFile : string list -> string -> string option
    +
    +val findFiles : string list -> string -> string list
    +
    +val existsFile : (string -> bool) -> string list -> string -> string option
    +
    +val allFiles : (string -> bool) -> string list -> string -> string list
    +
    +val findExe : string list -> string -> string option
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    val findFile : string list -> string -> string option
    +
    +

    findFile paths name returns SOME path, where path is a string of + the form "p/name" and p is the first string in paths + such that path exists. If no such file exists, then NONE is returned. + If name is an absolute path, then SOME name is returned + if it exists.

    +
    +
    val findFiles : string list -> string -> string list
    +
    +

    findFiles (paths, mode) name returns a list of strings, such that each string +s in the result has the form "p/name" with p in paths +and the file named by path existing in the file system.

    +
    +
    val existsFile : (string -> bool) -> string list -> string -> string option
    +
    +

    existsFile pred paths name returns SOME path, where path is a string of + the form "p/name" and p is the first string in paths + such that path exists and that pred path returns true. If no such file exists, + then NONE is returned. If name is an absolute path, then + SOME name is returned if it exists and satisfies the predicate.

    +
    +
    val allFiles : (string -> bool) -> string list -> string -> string list
    +
    +

    allFiles pred paths name returns a list of strings, such that each string +s in the result has the form "p/name" with p in paths, +the file named by path existing in the file system, and pred path returns +true. The order of the path list is preserved in the result. If name +is an absolute path, then the list [name] is returned if name exists +and satisfies the predicate.

    +
    +
    val findExe : string list -> string -> string option
    +
    +

    findExe paths name searches paths for an executable file with the given +name. This expression is equivalent to

    +
    +
    +
    existsFile (fn p => OS.FileSys.access(p, [OS.FileSys.A_EXEC])) paths name
    +
    +
    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/str-PropList.html b/doc/html/smlnj-lib/Util/str-PropList.html new file mode 100644 index 0000000..1ee0727 --- /dev/null +++ b/doc/html/smlnj-lib/Util/str-PropList.html @@ -0,0 +1,337 @@ + + + + + + + + + + The PropList structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The PropList structure
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The PropList structure provides a extensible, but type safe, implementation +of property lists.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    structure PropList
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    type holder
    +
    +val newHolder : unit -> holder
    +
    +val hasProps : holder -> bool
    +
    +val clearHolder : holder -> unit
    +
    +val sameHolder : (holder * holder) -> bool
    +
    +val newProp : (('a -> holder) * ('a -> 'b)) -> {
    +        peekFn : 'a -> 'b option,
    +        getFn  : 'a -> 'b,
    +        setFn  : ('a * 'b) -> unit,
    +        clrFn  : 'a -> unit
    +      }
    +
    +val newFlag : ('a -> holder) -> {
    +        getFn : 'a -> bool,
    +        setFn : ('a * bool) -> unit
    +      }
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    type holder
    +
    +

    The type of a property-list container.

    +
    +
    val newHolder : unit -> holder
    +
    +

    newHolder () creates a new property-list holder.

    +
    +
    +
    +
    +
    +
    val hasProps : holder -> bool
    +
    +

    hasProps holder return true if, and only if, the holder contains +properties (including set flags).

    +
    +
    val clearHolder : holder -> unit
    +
    +

    clearHolder holder removes all properties and flags from the holder.

    +
    +
    val sameHolder : (holder * holder) -> bool
    +
    +

    sameHolder (holder1, holder2) returns true if, and only if, +the two holders are the same.

    +
    +
    val newProp : 'a -> holder) * ('a -> 'b -> { …​ }
    +
    +

    newProp (getHolder, init) creates a new property of type 'b +associated with values of type 'a, where getHolder is a function +for getting the holder from a value and init is a function for +defining the initial value of the property for a value. The property +is represented by a record of operations, which are as follows:

    +
    +
    +
    +
    +
    peekFn : 'a -> 'b option
    +
    +

    peekFn obj returns SOME v, where v is the value of the +property for obj. If the property has not been set for obj, +then NONE is returned.

    +
    +
    getFn : 'a -> 'b
    +
    +

    getFn obj returns the value of the property for obj. If the +property has not been set for obj, then the init function is +used to set the initial value of the property.

    +
    +
    setFn: ('a * 'b) -> unit
    +
    +

    setFn (obj, v) sets the value of the property to v for obj.

    +
    +
    clrFn : 'a -> unit
    +
    +

    clrFn obj removes the property from obj.

    +
    +
    +
    +
    +
    +
    +
    val newFlag : ('a -> holder) -> { …​ }
    +
    +

    newFlag getHolder creates a new boolean property for values of type 'a. +The property is represented by a record of two functions:

    +
    +
    +
    +
    +
    getFn : 'a -> bool
    +
    +

    getFn obj returns the value of the flag for obj.

    +
    +
    setFn : ('a * bool) -> unit
    +
    +

    setFn (obj, b) sets the value of the flag to b for obj.

    +
    +
    +
    +
    +
    +
    +

    Flags represent boolean properties in a way that is more space efficient +than using newProp. Basically, a true value is represented by the +presence of the property in the holder, while false is represented by +its absence. This representation affects the behavior of +hasProps as flags that are false are not counted.

    +
    +
    +
    +
    +
    +
    +
    +

    Examples

    +
    +
    +

    A common use of property lists is to provide a mechanism for attaching +attributes to existing types. For example, we might define a representation +of variables in a compiler as:

    +
    +
    +
    +
    datatype var = V of {
    +    name : string,
    +    props : PropList.holder
    +  }
    +
    +
    +
    +

    We might define a use count property as follows:

    +
    +
    +
    +
    local
    +  val {getFn, setFn, ...} = PropList.newProp (
    +        fn (V{props, ...}) => props,
    +        fn _ => 0)
    +in
    +fun use x = setFn(x, getFn x + 1)
    +fun countOf x = getFn x
    +end
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/str-Queue.html b/doc/html/smlnj-lib/Util/str-Queue.html new file mode 100644 index 0000000..14be661 --- /dev/null +++ b/doc/html/smlnj-lib/Util/str-Queue.html @@ -0,0 +1,314 @@ + + + + + + + + + + The Queue structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The Queue structure
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The Queue structure provides an imperative queue data structure, +which are implemented as a pair of stacks (lists) representing the +front and rear of the queue. Single-threaded enqueuing and +dequeuing operations will have amortized constant time.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature QUEUE
    +structure Queue :> QUEUE
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    type 'a queue
    +
    +exception Dequeue
    +
    +val mkQueue : unit -> 'a queue
    +val clear : 'a queue -> unit
    +val isEmpty : 'a queue -> bool
    +val enqueue : 'a queue * 'a -> unit
    +val dequeue : 'a queue -> 'a
    +val next : 'a queue -> 'a option
    +val delete : ('a queue * ('a -> bool)) -> unit
    +val head : 'a queue -> 'a
    +val peek : 'a queue -> 'a option
    +val length : 'a queue -> int
    +val contents : 'a queue -> 'a list
    +val app : ('a -> unit) -> 'a queue -> unit
    +val map : ('a -> 'b) -> 'a queue -> 'b queue
    +val foldl : ('a * 'b -> 'b) -> 'b -> 'a queue -> 'b
    +val foldr : ('a * 'b -> 'b) -> 'b -> 'a queue -> 'b
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    type 'a queue
    +
    +

    The type constructor for queues.

    +
    +
    +
    +
    +
    +
    exception Dequeue
    +
    +

    This exceptions is raised when the dequeue function is +applied to an empty queue.

    +
    +
    val mkQueue : unit -> 'a queue
    +
    +

    mkQueue () returns a new empty queue.

    +
    +
    val clear : 'a queue -> unit
    +
    +

    clear q removes any elements from q leaving it empty.

    +
    +
    val isEmpty : 'a queue -> bool
    +
    +

    ifEmpty q returns true if the queue is empty.

    +
    +
    val enqueue : 'a queue * 'a -> unit
    +
    +

    enqueue (q, x) adds x to the end of q.

    +
    +
    val dequeue : 'a queue -> 'a
    +
    +

    dequeue q removes and returns the first element in q. +This function raises the Dequeue exception +if it is called on an empty queue.

    +
    +
    val next : 'a queue -> 'a option
    +
    +

    next q returns SOME x and removes x from q, where x was the first +element in q, or NONE if q is empty.

    +
    +
    val delete : ('a queue * ('a -> bool)) -> unit
    +
    +

    delete (q, pred) removes those items from q for which the +function pred returns true.

    +
    +
    val head : 'a queue -> 'a
    +
    +

    head q returns the first element of q or raises the exception +Dequeue if q is empty. The queue is unchanged.

    +
    +
    val peek : 'a queue -> 'a option
    +
    +

    peek q returns SOME x, where x is the first element of q, +or NONE if q is empty. The queue is unchanged.

    +
    +
    val length : 'a queue -> int
    +
    +

    length q returns the number of elements in the queue.

    +
    +
    val contents : 'a queue -> 'a list
    +
    +

    contents q returns the contents of q as a list.

    +
    +
    val app : ('a -> unit) -> 'a queue -> unit
    +
    +

    app f q applies the function f to the elements of q. +This expression is equivalent to

    +
    +
    +
    List.app f (contents q)
    +
    +
    +
    +
    val map : ('a -> 'b) -> 'a queue -> 'b queue
    +
    +

    map f q returns a new queue that results from mapping +the function f across the elements of the queue.

    +
    +
    val foldl : ('a * 'b -> 'b) -> 'b -> 'a queue -> 'b
    +
    +

    foldl f init q folds the function f over the elements of q from +front to back. This expression is equivalent to

    +
    +
    +
    List.foldl f init (contents q)
    +
    +
    +
    +
    val foldr : ('a * 'b -> 'b) -> 'b -> 'a queue -> 'b
    +
    +

    foldr f init q folds the function f over the elements of q from +back to front. This expression is equivalent to

    +
    +
    +
    List.foldr f init (contents q)
    +
    +
    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/str-Rand.html b/doc/html/smlnj-lib/Util/str-Rand.html new file mode 100644 index 0000000..2662d5e --- /dev/null +++ b/doc/html/smlnj-lib/Util/str-Rand.html @@ -0,0 +1,261 @@ + + + + + + + + + + The Rand structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The Rand structure
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The Rand structure provides a simple random number generator as described +in Larry Paulson in ML for the Working Programmer (pp. 170-171). +The original algorithm was recommended by Park and Miller in +Random number generators: good ones +are hard to find, CACM 1988 (pp 1192-1201) with modifications described +in CACM 1993 (pp. 105-110).

    +
    +
    +

    Note: it is recommended that one use the Random +structure when the quality of the generated numbers is at all important. +The main advantages of this implementation is that it is functional +(the generators provided by the Random structure are imperative) and +it is fast.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    structure Rand
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    type rand = Word.word
    +
    +val randMin : rand
    +val randMax : rand
    +
    +val random : rand -> rand
    +
    +val mkRandom : rand -> unit -> rand
    +
    +val norm : rand -> real
    +
    +val range : (int * int) -> rand -> int
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    type rand = Word.word
    +
    +

    The "state" of the generator, which is just a single word.

    +
    +
    val randMin : rand
    +
    +

    The minimum allowed value for the state.

    +
    +
    val randMax : rand
    +
    +

    The maximum allowed value for the state.

    +
    +
    val random : rand -> rand
    +
    +

    random seed returns a pseudo-random value in the range +[randMin .. randMax]. Iteratively using the value returned by +random as the next seed will produce a sequence of pseudo-random +numbers.

    +
    +
    val mkRandom : rand -> unit -> rand
    +
    +

    mkRandom seed returns a function that generates a fresh random number +in the range [randMin .. randMax].

    +
    +
    val norm : rand -> real
    +
    +

    norm rand maps the random number in the range [randMin .. randMax] +to the real interval (0..1).

    +
    +
    val range : (int * int) -> rand -> int
    +
    +

    range (lo, hi) rand maps the random number in the range [randMin .. randMax] +to the interval [lo..hi]. This function will raise the +Fail exception +if hi < lo.

    +
    +
    +
    +
    +
    +
    +

    Bugs

    +
    +
    +

    This implementation needs to be updated for 64-bit systems.

    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/str-Random.html b/doc/html/smlnj-lib/Util/str-Random.html new file mode 100644 index 0000000..822148a --- /dev/null +++ b/doc/html/smlnj-lib/Util/str-Random.html @@ -0,0 +1,307 @@ + + + + + + + + + + The Random structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The Random structure
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The Random structure implements a random number generator +using the +Mersenne Twister algorithm. The implementation is specialized +to the native word size.

    +
    +
    +

    Note that prior to 2023.1 (and 110.99.4), this structure was implemented +using a subtract-with-borrow algorithm.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    structure Random :> RANDOM
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    type rand
    +
    +val rand : (int * int) -> rand
    +
    +val fromList : NativeWord.word list -> rand
    +
    +val toBytes : rand -> Word8Vector.vector
    +val fromBytes : Word8Vector.vector -> rand
    +
    +val toString : rand -> string
    +val fromString : string -> rand
    +
    +val randNativeInt : rand -> NativeInt.int
    +
    +val randNativeWord : rand -> NativeWord.word
    +
    +val randInt : rand -> int
    +
    +val randWord : rand -> int
    +
    +val randNat : rand -> int
    +
    +val randReal : rand -> real
    +
    +val randRange : (int * int) -> rand -> int
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    type rand
    +
    +

    Represents the internal state of a random number generator.

    +
    +
    val rand : (int * int) -> rand
    +
    +

    rand (n1, n2) creates a random number generator from the +initial seed specified by the pair (n1, n2). +This function is kept for backward compatibility with the old +implementation, but it is recommended that one use the fromList +function in new code.

    +
    +
    val fromList : NativeWord.word list -> rand
    +
    +

    creates a random number generator from the given list of initial seeds.

    +
    +
    val toBytes : rand -> Word8Vector.vector
    +
    +

    toBytes rand returns a byte vector representing the current state of +the generator.

    +
    +
    val fromBytes : Word8Vector.vector -> rand
    +
    +

    fromBytes bv creates a generator with the initial state that was encoded +in the byte vector bv. This expression will raise +Fail exception +if the byte vector is invalid.

    +
    +
    val toString : rand -> string
    +
    +

    toString rand returns a string representing the random-number-generator +state rand. This string is a Base64 encoding +of the result of toBytes rand.

    +
    +
    val fromString : string -> rand
    +
    +

    fromString s returns the random-number-generator encoded as the string s +(presumably generated by toString). This expression will raise +Fail exception +if the string s does not have the proper form.

    +
    +
    val randNativeWord : rand -> NativeWord.word
    +
    +

    randNativeWord rand generates a uniform random number in the range +\([0 .. 2^k-1\)], where \(k\) is the host platform’s +native word size (e.g., 32 or 64).

    +
    +
    val randNativeInt : rand -> NativeInt.int
    +
    +

    randNativeInt rand generates a random word a uniform distribution in +the range \([0 .. 2^{k-1}-1\)], where \(k\) is the host +platform’s native word size (e.g., 32 or 64).

    +
    +
    val randInt : rand -> int
    +
    +

    randInt rand generates a random integer with a uniform distribution in +the range \([-2^{k-1} .. 2^{k-1}-1\)], where \(k\) +is the host platform’s default word size (e.g., 31 or 63).

    +
    +
    val randWord : rand -> word
    +
    +

    randWord rand generates a random word with a uniform distribution in +the range \([0 .. 2^k-1\)], where \(k\) +is the host platform’s default word size (e.g., 31 or 63).

    +
    +
    val randNat : rand -> int
    +
    +

    randNat rand generates a random integer with a uniform distribution in +the range \([0 .. 2^{k-1}-1\)], where \(k\) +is the host platform’s default word size (e.g., 31 or 63).

    +
    +
    val randReal : rand -> real
    +
    +

    randReal rand generates a random real number in the range [0..1).

    +
    +
    val randRange : (int * int) -> rand -> int
    +
    +

    randRange (lo, hi) rand generates a random number in the +[lo..hi]. This function will raise the +Fail exception +if hi < lo.

    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/str-RealOrderStats.html b/doc/html/smlnj-lib/Util/str-RealOrderStats.html new file mode 100644 index 0000000..786dad6 --- /dev/null +++ b/doc/html/smlnj-lib/Util/str-RealOrderStats.html @@ -0,0 +1,201 @@ + + + + + + + + + + The RealOrderStats structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The RealOrderStats structure
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The RealOrderStats structure …​

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    structure RealOrderStats
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    type foo
    +
    +

    something

    +
    +
    val bar : foo -> foo
    +
    +

    something

    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/str-Scan.html b/doc/html/smlnj-lib/Util/str-Scan.html new file mode 100644 index 0000000..1d9c158 --- /dev/null +++ b/doc/html/smlnj-lib/Util/str-Scan.html @@ -0,0 +1,344 @@ + + + + + + + + + + The Scan structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The Scan structure
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The Scan structure provides C-style conversions from string +representations.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature SCAN
    +structure Scan : SCAN
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    datatype fmt_item
    +  = ATOM of Atom.atom
    +  | LINT of LargeInt.int
    +  | INT of Int.int
    +  | LWORD of LargeWord.word
    +  | WORD of Word.word
    +  | WORD8 of Word8.word
    +  | BOOL of bool
    +  | CHR of char
    +  | STR of string
    +  | REAL of Real.real
    +  | LREAL of LargeReal.real
    +  | LEFT of (int * fmt_item)
    +  | RIGHT of (int * fmt_item)
    +
    +exception BadFormat
    +
    +val sscanf : string -> string -> fmt_item list option
    +val scanf  : string -> (char, 'a) StringCvt.reader
    +      -> (fmt_item list, 'a) StringCvt.reader
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    datatype fmt_item
    +
    +

    This datatype, which is the same type as +Format.fmt_item, is used as a union +type to represent the results of scanning input.

    +
    +
    +
    +
    +
    LINT n
    +
    +

    wraps a large integer value n to convert +(the conversion specifier must be one of “d”, “o”, “x”, or “X”).

    +
    +
    INT n
    +
    +

    wraps a default integer value n to convert +(the conversion specifier must be one of “d”, “o”, “x”, or “X”).

    +
    +
    BOOL b
    +
    +

    wraps a Boolean value b to convert +(the conversion specifier must be “b”).

    +
    +
    CHR c
    +
    +

    wraps a character value +(the conversion specifier must be “c”).

    +
    +
    STR s
    +
    +

    wraps a string value s to convert +(the conversion specifier must be “s”). The conversion is the +identity; e.g., STR "\n" will produce a newline in the result +string.

    +
    +
    REAL r
    +
    +

    wraps a default real value r to convert +(the conversion specifier must be one of “e”, “E”, “f”, +“F”, “g”, or “G”).

    +
    +
    ATOM atm
    +
    +

    this constructor will never be returned by scanf or sscanf.

    +
    +
    LWORD w
    +
    +

    this constructor will never be returned by scanf or sscanf.

    +
    +
    WORD w
    +
    +

    this constructor will never be returned by scanf or sscanf.

    +
    +
    WORD8 w
    +
    +

    this constructor will never be returned by scanf or sscanf.

    +
    +
    LREAL r
    +
    +

    this constructor will never be returned by scanf or sscanf.

    +
    +
    LEFT _
    +
    +

    this constructor will never be returned by scanf or sscanf.

    +
    +
    RIGHT _
    +
    +

    this constructor will never be returned by scanf or sscanf.

    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    exception BadFormat
    +
    +

    This exception is raised when either sscanf or scanf is applied +to an ill-formed format string.

    +
    +
    +
    +
    +
    +
    val sscanf : string → string → fmt_item list option
    +
    +

    sscanf fmt s scans the string s using the format specifier fmt. If +successful, SOME items is returned, where each item in the items list +corresponds to a specified item in fmt. If the input cannot be scanned +according to fmt, then NONE is returned. +If the format string is ill formed, then the BadFormat exception +will be raised when sscanf fmt is evaluated.

    +
    +
    +
    +
    +
    +
    val scanf : string → (char, 'a) StringCvt.reader → (fmt_item list, 'a) StringCvt.reader
    +
    +

    sscanf fmt getc returns a reader that scan a character stream using + the format specifier fmt. If the format string is ill formed, then the +BadFormat exception will be raised when scanf fmt is evaluated.

    +
    +
    +
    +
    +
    +
    +

    Format Strings

    +
    +
    +

    The sscanf and `scanf functions take a format string as their first +argument. The format string is composed of zero or more +directives, which are either ordinary characters (excluding %) +or conversion specifiers. The result of applying one of the scan functions +to an input will be a list of fmt_item corresponding +to the conversion specifiers in the format string. All conversions are +introduced by the % character. The format string may also contain +other characters. White space (such as blanks, tabs, or newlines) in the format +string match any amount of white space, including none, in the input. Everything +else matches only itself. Scanning stops when an input character does not match +such a format character. Scanning also stops when an input conversion cannot be +made (see below).

    +
    +
    +

    To be written

    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/str-TimeLimit.html b/doc/html/smlnj-lib/Util/str-TimeLimit.html new file mode 100644 index 0000000..79ea146 --- /dev/null +++ b/doc/html/smlnj-lib/Util/str-TimeLimit.html @@ -0,0 +1,207 @@ + + + + + + + + + + The TimeLimit structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The TimeLimit structure
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The TimeLimit structure provides a mechanism for limiting the execution +time of a computation. The mechanism is implemented using the runtime +system’s interval timer and the SML/NJ signal mechanism.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    structure TimeLimit
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    exception TimeOut
    +
    +val timeLimit : Time.time -> ('a -> 'b) -> 'a -> 'b
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    exception TimeOut
    +
    +

    The exception that is raised if the time limit expires.

    +
    +
    val timeLimit : Time.time -> ('a -> 'b) -> 'a -> 'b
    +
    +

    timeLimit t f x computes the expression f x. If the computation +takes longer than the time limit t, then the +TimeOut exception is raised.

    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/str-URef.html b/doc/html/smlnj-lib/Util/str-URef.html new file mode 100644 index 0000000..d0b5ca7 --- /dev/null +++ b/doc/html/smlnj-lib/Util/str-URef.html @@ -0,0 +1,280 @@ + + + + + + + + + + The URef structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The URef structure
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The URef structure provides mutable references with Union-Find +semantics. The interface is similar to that of references, but +adds operations to union two references together. When two uref +values are joined by one of the union operations, they become +equal (and, thus, their contents will be equal too).

    +
    +
    +

    The original design and implementation of this module was by Fritz Henglein.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature UREF
    +structure URef : UREF
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    type 'a uref
    +
    +val uRef: 'a -> 'a uref
    +
    +val equal: 'a uref * 'a uref -> bool
    +
    +val !! : 'a uref -> 'a
    +
    +val update : 'a uref * 'a -> unit
    +
    +val unify : ('a * 'a -> 'a) -> 'a uref * 'a uref -> bool
    +
    +val union : 'a uref * 'a uref -> bool
    +
    +val link : 'a uref * 'a uref -> bool
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    type 'a uref
    +
    +

    The type constructor for union-find references.

    +
    +
    +
    +
    +
    +
    val uRef: 'a -> 'a uref
    +
    +

    uRef v creates a new reference with contents v.

    +
    +
    val equal: 'a uref * 'a uref -> bool
    +
    +

    equal (ur1, ur2) returns true if, and only if, ur1 and ur2 were +created by the same call to uRef or if they have been +unioned by a link, union, or +unify operation.

    +
    +
    val !! : 'a uref -> 'a
    +
    +

    !! ur returns the contents of ur.

    +
    +
    val update : 'a uref * 'a -> unit
    +
    +

    update (ur, v) updates the contents of ur to be v.

    +
    +
    +
    +
    +
    +
    val unify : ('a * 'a -> 'a) -> 'a uref * 'a uref -> bool
    +
    +

    unify f (ur1, ur2) unions ur1 and ur2 (i.e., after this +call, the expression equal(r1, ur2) will return true) and +returns true if they were not equal prior to the call to unify. +The contents of the unioned reference is set to f (v1, v2), where v1 +(resp. v2) was the contents of ur1 (resp. ur2) prior to the +call to unify.

    +
    +
    +
    +
    +
    +
    val union : 'a uref * 'a uref -> bool
    +
    +

    union (ur1, ur2) unions ur1 and ur2 (i.e., after this +call, the expression equal(r1, ur2) will return true) and +returns true if they were not equal prior to the call to union. +The contents of the unioned reference is set to one of v1 or v2, +where v1 (resp. v2) was the contents of ur1 (resp. ur2) +prior to the call to union.

    +
    +
    +
    + +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/str-UTF8.html b/doc/html/smlnj-lib/Util/str-UTF8.html new file mode 100644 index 0000000..4ff76b4 --- /dev/null +++ b/doc/html/smlnj-lib/Util/str-UTF8.html @@ -0,0 +1,503 @@ + + + + + + + + + + The UTF8 structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The UTF8 structure
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The UTF8 structure provides support for working +with UTF-8 +encoded strings. UTF-8 is a way to represent Unicode +code points in an 8-bit character type while being backward +compatible with the ASCII encoding for 7-bit characters. +The encoding scheme uses one to four bytes as follows:

    +
    + +++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Wide Character BitsByte 0Byte 1Byte 2Byte 3
    +

    00000 00000000 0xxxxxxx

    +
    +

    0xxxxxxx

    +
    +

    00000 00000yyy yyxxxxxx

    +
    +

    110yyyyy

    +
    +

    10xxxxxx

    +
    +

    00000 zzzzyyyy yyxxxxxx

    +
    +

    1110zzzz

    +
    +

    10yyyyyy

    +
    +

    10xxxxxx

    +
    +

    wwwzz zzzzyyyy yyxxxxxx

    +
    +

    11110www

    +
    +

    10zzzzzz

    +
    +

    10yyyyyy

    +
    +

    10xxxxxx

    +
    +
    +

    There are three additional well-formedness restrictions on UTF-8 encodings +that were introduced in the Unicode 3.1 and 3.2 standards.

    +
    +
    +
    +
    +
      +
    • +

      Characters cannot be larger than 0x10FFFF (the maximum code point).

      +
    • +
    • +

      Characters must be in the shortest encoding for the codepoint (e.g., +using two bytes to encode an ASCII character is invalid).

      +
    • +
    • +

      Surogate pairs should be encoded as a single three-byte character instead of +as two three-byte sequences.

      +
    • +
    +
    +
    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature UTF8
    +structure UTF8 :> UTF8
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    type wchar = word
    +
    +val maxCodePoint : wchar
    +
    +exception Incomplete
    +exception Invalid
    +
    +val getu : (char, 'strm) StringCvt.reader -> (wchar, 'strm) StringCvt.reader
    +
    +val encode : wchar -> string
    +
    +val isAscii : wchar -> bool
    +val toAscii : wchar -> char
    +val fromAscii : char -> wchar
    +
    +val toString : wchar -> string
    +
    +val size : string -> int
    +
    +val size' : substring -> int
    +
    +val explode : string -> wchar list
    +val implode : wchar list -> string
    +
    +val map : (wchar -> wchar) -> string -> string
    +val app : (wchar -> unit) -> string -> unit
    +val fold : ((wchar * 'a) -> 'a) -> 'a -> string -> 'a
    +val all : (wchar -> bool) -> string -> bool
    +val exists : (wchar -> bool) -> string -> bool
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    type wchar = word
    +
    +

    The type of a Unicode code point.
    +Note that we use the word type for this because SML/NJ does not currently +have a wide-character type. If such a type is introduced, then this type +definition will likely change.

    +
    +
    val maxCodePoint : wchar
    +
    +

    The maximum code point in the Unicode character set (0wx10FFFF).

    +
    +
    +
    +
    +
    +
    exception Incomplete
    +
    +

    This exception is raised when certain operations are applied to incomplete +strings (i.e., strings that end in the middle of multi-byte UTF-8 character +encoding).

    +
    +
    +
    +
    +
    +
    exception Invalid
    +
    +

    This exception is raised when invalid UTF-8 encodings, such as +non-shortest-length encodings, are encountered.

    +
    +
    val getu : (char, 'strm) StringCvt.reader -> (wchar, 'strm) StringCvt.reader
    +
    +

    getu getc returns a wide-character reader for the character reader getc. +The resulting reader raises the Incomplete exception +if it encounters an incomplete UTF-8 character and it raises the +Invalid exception if it encounters an invalid encoding.

    +
    +
    val encode : wchar -> string
    +
    +

    encode wc returns the UTF-8 encoding of the wide character wc. +This expression raises the +Invalid exception if wc is greater than the +maximum Unicode code point.

    +
    +
    val isAscii : wchar -> bool
    +
    +

    isAscii wc returns true if, and only if, wc is an ASCII character.

    +
    +
    val toAscii : wchar -> char (* truncates to 7-bits *)
    +
    +

    toAscii wc converts wc to an 8-bit character by truncating wc +to its low seven bits.

    +
    +
    val fromAscii : char -> wchar (* truncates to 7-bits *)
    +
    +

    toAscii c converts the 8-bit character c to a wide character in +the ASCII range (the high bit of c is ignored).

    +
    +
    val toString : wchar -> string
    +
    +

    toString wc returns a printable string representation of a wide character +as a Unicode escape sequence.

    +
    +
    val size : string -> int
    +
    +

    size s returns the number of UTF-8 encoded Unicode characters +in the string s. This expression raises the +Incomplete exception if an incomplete +character is encountered.

    +
    +
    val size : string -> int
    +
    +

    size s returns the number of UTF-8 encoded Unicode characters +in the string s. This expression raises the +Incomplete exception +if it encounters an incomplete UTF-8 character and it raises the +Invalid exception if it encounters an invalid encoding.

    +
    +
    val size' : substring -> int
    +
    +

    size' ss returns the number of UTF-8 encoded Unicode characters +in the substring ss. This expression raises the +Incomplete exception +if it encounters an incomplete UTF-8 character and it raises the +Invalid exception if it encounters an invalid encoding.

    +
    +
    val explode : string -> wchar list
    +
    +

    explode s returns the list of UTF-8 encoded Unicode characters that +comprise the string s.

    +
    +
    val implode : wchar list -> string
    +
    +

    implode wcs returns the UTF-8 encoded string that represents +the list wcs of Unicode code points. +This expression raises the +Invalid exception if it encounters an invalid encoding.

    +
    +
    val map : (wchar -> wchar) -> string -> string
    +
    +

    map f s maps the function f over the UTF-8 encoded characters +in the string s to produce a new UTF-8 string. This expression raises +the Incomplete exception +if it encounters an incomplete UTF-8 character and it raises the +Invalid exception if it encounters an invalid encoding. +It is equivalent to the expression

    +
    +
    +
    implode (List.map f (explode s))
    +
    +
    +
    +
    val app : (wchar -> unit) -> string -> unit
    +
    +

    app f s applies the function f to the UTF-8 encoded characters +in the string s. This expression raises the +Incomplete exception +if it encounters an incomplete UTF-8 character and it raises the +Invalid exception if it encounters an invalid encoding. +It is equivalent to the expression

    +
    +
    +
    List.app f (explode s)
    +
    +
    +
    +
    val fold : ((wchar * 'a) -> 'a) -> 'a -> string -> 'a
    +
    +

    fold f init s folds a function from left-to-right over the +UTF-8 encoded characters in the string. Incomplete exception +if it encounters an incomplete UTF-8 character and it raises the +Invalid exception if it encounters an invalid encoding. +It is equivalent to the expression

    +
    +
    +
    List.foldl f init (explode s)
    +
    +
    +
    +
    val all : (wchar -> bool) -> string -> bool
    +
    +

    all pred s returns true if, and only if, the function pred +returns true for all of the UTF-8 encoded characters in the +string. It short-circuits evaluation as soon as a character +is encountered for which pred returns false. This expression +raises the Incomplete exception +if it encounters an incomplete UTF-8 character and it raises the +Invalid exception if it encounters an invalid encoding. +It is equivalent to the expression

    +
    +
    +
    List.all pred (explode s)
    +
    +
    +
    +

    when s only contains complete characters.

    +
    +
    +
    val exists : (wchar -> bool) -> string -> bool
    +
    +

    exists pred s returns true if, and only if, the function pred +returns true for at least one UTF-8 encoded character in +the string s. It short-circuits evaluation as soon as a character +is encountered for which pred returns true. This expression raises +the Incomplete exception +if it encounters an incomplete UTF-8 character and it raises the +Invalid exception if it encounters an invalid encoding. +It is equivalent to the expression

    +
    +
    +
    List.exists pred (explode s)
    +
    +
    +
    +

    when s only contains complete characters.

    +
    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/Util/str-UnivariateStats.html b/doc/html/smlnj-lib/Util/str-UnivariateStats.html new file mode 100644 index 0000000..36cee18 --- /dev/null +++ b/doc/html/smlnj-lib/Util/str-UnivariateStats.html @@ -0,0 +1,201 @@ + + + + + + + + + + The UnivariateStats structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The UnivariateStats structure
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    The UnivariateStats structure …​

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    structure UnivariateStats
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    type foo
    +
    +

    something

    +
    +
    val bar : foo -> foo
    +
    +

    something

    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/XML/fun-XMLParserFn.html b/doc/html/smlnj-lib/XML/fun-XMLParserFn.html new file mode 100644 index 0000000..61f9ed0 --- /dev/null +++ b/doc/html/smlnj-lib/XML/fun-XMLParserFn.html @@ -0,0 +1,192 @@ + + + + + + + + + + The XMLParserFn functor + + + + + + + + +
    +
    +
    +
    + +
    + +
    The XMLParserFn functor
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    The XMLParserFn functor generates a parser for the given SML +tree representation. The treatment of whitespace and comments +when parsing is determined by the Schema substructure of the +functor argument.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature XML_PARSER
    +functor XMLParserFn (XT : XML_TREE) : XML_PARSER
    +
    +
    +
    +
    +
    +

    Functor Argument Interface

    +
    +
    +
    +
    XT : XML_TREE
    +
    +
    +
    +
    +
    +

    Functor Argument Description

    +
    +
    +
    +
    XT : XML_TREE
    +
    +

    Defines the tree representation used for the result of the parser, as well +as the policy for handling whitespace and comments.

    +
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    structure XMLTree : XML_TREE
    +
    +val parseFile : string -> XMLTree.tree
    +
    +exception ParseError of string
    +
    +
    +
    +
    +
    +

    Interface Description

    +
    +
    +
    +
    structure XMLTree : XML_TREE
    +
    +

    The argument structure.

    +
    +
    val parseFile : string -> XMLTree.tree
    +
    +

    parseFile file returns the tree representation of the named text file. +The ParseError exception is raised if a syntax +error is encountered during parsing.

    +
    +
    +
    +
    +
    +
    exception ParseError of string
    +
    +

    This exception is raise with a useful error message as its argument when +a syntax error is encountered by the parser.

    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/XML/fun-XMLTreeFn.html b/doc/html/smlnj-lib/XML/fun-XMLTreeFn.html new file mode 100644 index 0000000..4347051 --- /dev/null +++ b/doc/html/smlnj-lib/XML/fun-XMLTreeFn.html @@ -0,0 +1,123 @@ + + + + + + + + + + The XMLTreeFn functor + + + + + + + + +
    +
    +
    +
    + +
    + +
    The XMLTreeFn functor
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    The XMLTreeFn functor generates a tree representation of XML +files from a schema structure argument.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    functor XMLTreeFn (Schema : XML_SCHEMA) : XML_TREE
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/XML/sig-XML_SCHEMA.html b/doc/html/smlnj-lib/XML/sig-XML_SCHEMA.html new file mode 100644 index 0000000..1934c7e --- /dev/null +++ b/doc/html/smlnj-lib/XML/sig-XML_SCHEMA.html @@ -0,0 +1,197 @@ + + + + + + + + + + The XML_SCHEMA signature + + + + + + + + +
    +
    +
    +
    + +
    + +
    The XML_SCHEMA signature
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    The XML_SCHEMA signature describes the elements and attributes of +an XML schema that are required to parse files of that schema.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature XML_SCHEMA
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    type element
    +type attribute
    +
    +val element : string -> element option
    +
    +val preserveWS : element -> bool
    +
    +val preserveComment : element -> bool
    +
    +val same : element * element -> bool
    +
    +val toString : element -> string
    +
    +val attribute : (string * string) -> attribute
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    type element
    +
    +

    the representation type for XML elements. Note that this type is not +the type of a tree node in an XML tree, but rather the label or tag +associated with the tree.

    +
    +
    type attribute
    +
    +

    the representation type for XML attributes.

    +
    +
    val element : string -> element option
    +
    +

    element tag returns SOME elem where elem is the representation of +the element with the given tag. It returns NONE when tag is not the +name of any element in the schema.

    +
    +
    val preserveWS : element -> bool
    +
    +

    preserveWS elem returns true if whitespace should be preserved in +the element’s content and false if whitespace can be normalized in +the element’s content. Note that if true, this property is inherited by any +nested elements.

    +
    +
    val preserveComment : element -> bool
    +
    +

    preserveComment elem returns true if comments should be preserved in +the element’s content and false if comments if they should be deleted.

    +
    +
    val same : element * element -> bool
    +
    +

    same (elem1, elem2) returns true if, and only if, the two elements +are the same.

    +
    +
    val toString : element -> string
    +
    +

    toString elem returns the string representation of the element +(without the “<” and “>” brackets).

    +
    +
    val attribute : (string * string) -> attribute
    +
    +

    attribute (name, value) returns an attribute value for the given +name-value pair.

    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/XML/sig-XML_TREE.html b/doc/html/smlnj-lib/XML/sig-XML_TREE.html new file mode 100644 index 0000000..d2995ab --- /dev/null +++ b/doc/html/smlnj-lib/XML/sig-XML_TREE.html @@ -0,0 +1,252 @@ + + + + + + + + + + The XML_TREE signature + + + + + + + + +
    +
    +
    +
    + +
    + +
    The XML_TREE signature
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    The XML_TREE signature defines a tree representation of XML files.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    signature XML_TREE
    +
    +
    +
    +
    +
    +

    Interface

    +
    +
    +
    +
    structure Schema : XML_SCHEMA
    +
    +datatype doctype = DOCTYPE of string * external_id option
    +
    +and external_id
    +  = SYSTEM of string
    +  | PUBLIC of string * string
    +
    +datatype content
    +  = TEXT of string
    +  | CDATA of string
    +  | ELEMENT of {
    +	name : Schema.element,
    +	attrs : Schema.attribute list,
    +	content : content list
    +      }
    +
    +type tree = {
    +    xmlDecl : Schema.attribute list option,
    +    doctype : doctype option,
    +    content : content
    +  }
    +
    +
    +
    +
    +
    +

    Description

    +
    +
    +
    +
    structure Schema : XML_SCHEMA
    +
    +

    This substructure defines the representation of elements and attributes.

    +
    +
    datatype doctype = DOCTYPE of string * external_id option
    +
    +

    This datatype represents the contents of the optional DOCTYPE +element found the beginning of the file (following the optional +XML declaration). We currently only support external DTDs.

    +
    +
    datatype external_id
    +
    +

    This datatype represents an external DTD specification; its constructors +are defined as follows:

    +
    +
    +
    +
    +
    SYSTEM of url
    +
    +

    specifies a "private" external DTD, where the string url specifies +the DTD’s location.

    +
    +
    PUBLIC(name, url)
    +
    +

    specifies a "public" external DTD, where the string name is the name +of the DTD and url specifies the DTD’s location.

    +
    +
    +
    +
    +
    +
    +
    datatype content
    +
    +

    This datatype is used to represent the content of an XML file as a tree. +The constructors have the following meanings:

    +
    +
    +
    +
    +
    TEXT s
    +
    +

    represents the text described by the string s. When parsing, entities +in the source (e.g., <) are replaced by their definition and, +when printing, special characters (e.g., <) are replaced by their +entities.

    +
    +
    CDATA s
    +
    +

    represents the literal text described by the string s.

    +
    +
    ELEMENT{name, attrs, content}
    +
    +

    represents a subtree enclosed by "`<element>` …​ </element>" tags, +where `name is the name of the element, attrs is a list of attributes +in the start tag, and content is the stuff between the tags.

    +
    +
    +
    +
    +
    +
    +
    type tree
    +
    +

    An XML tree, which is a record type with the following fields:

    +
    +
    +
    +
    +
    xmlDecl : Schema.attribute list option
    +
    +

    This field represents the optional XML declaration at the beginning of +a file, where a value of SOME attrs means that there was an +XML declaration present with the list of attributes attrs.

    +
    +
    doctype : doctype option
    +
    +

    This field represents the optional DOCTYPE element that follows the +XML declaration.

    +
    +
    content : content
    +
    +

    This field is the root of the content and will always be an ELEMENT.

    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/XML/str-GenericXMLTree.html b/doc/html/smlnj-lib/XML/str-GenericXMLTree.html new file mode 100644 index 0000000..6303f60 --- /dev/null +++ b/doc/html/smlnj-lib/XML/str-GenericXMLTree.html @@ -0,0 +1,126 @@ + + + + + + + + + + The GenericXMLTree structure + + + + + + + + +
    +
    +
    +
    + +
    + +
    The GenericXMLTree structure
    +
    +
    +
    + +
    +
    +
    +
    +
    +

    The GenericXMLTree structure provides a generic representation of +XML trees where elements and attribute names are represented as +Atom.atom values.

    +
    +
    +
    +
    +

    Synopsis

    +
    +
    +
    +
    structure GenericXMLTree : XML_TREE
    +    where type Schema.element = Atom.atom
    +    where type Scheme.attribute = Atom.atom * string
    +
    +
    +
    +
    +
    +

    See Also

    + +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/XML/xml-lib.html b/doc/html/smlnj-lib/XML/xml-lib.html new file mode 100644 index 0000000..165f0a9 --- /dev/null +++ b/doc/html/smlnj-lib/XML/xml-lib.html @@ -0,0 +1,173 @@ + + + + + + + + + + The XML Library + + + + + + + + +
    +
    +
    +
    + +
    + +
    The XML Library
    +
    +
    +
    + +
    +
    +
    +

    Overview

    +
    +
    +

    The XML Library is a small library for parsing +XML files. It does +not support validation (e.g., against a DTD or Schema). The +basic idea is that the user supplies a "schema" module that describes +the elements and attribute representation of an XML document. From this, +one builds an XML tree representation

    +
    +
    +
    +
    structure MyXMLTree = XMLTreeFn (MyXMLSchema)
    +
    +
    +
    +

    and an XML parser

    +
    +
    +
    +
    structure MyXMLParser = XMLParserFn (MyXMLTree)
    +
    +
    +
    +

    The library also provides a generic XML tree definition +(i.e., one that allows any element name).

    +
    +
    +

    For a more complete library for XML processing, use the +fxp library.

    +
    +
    +
    +
    +

    Contents

    +
    +
    +
    +
    signature XML_SCHEMA
    +
    +

    describes the elements and attributes of an XML schema +that are required to parse files of that schema.

    +
    +
    signature XML_TREE
    +
    +

    Defines a tree representation of XML files.

    +
    +
    functor XMLTreeFn
    +
    +

    Generates a tree representation of XML +files from a schema structure argument.

    +
    +
    functor XMLParserFn
    +
    +

    generates a parser for the given SML tree representation.

    +
    +
    structure GenericXMLTree
    +
    +

    Provides a generic representation of XML trees.

    +
    +
    +
    +
    +
    +
    +

    Usage

    +
    +
    +

    For SML/NJ, include $/xml-lib.cm in your +CM file.

    +
    +
    +

    For use in MLton, include +$(SML_LIB)/smlnj-lib/XML/xml-lib.mlb in your MLB file.

    +
    +
    +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/images/smlnj-logo.png b/doc/html/smlnj-lib/images/smlnj-logo.png new file mode 100644 index 0000000..3dfd7ff Binary files /dev/null and b/doc/html/smlnj-lib/images/smlnj-logo.png differ diff --git a/doc/html/smlnj-lib/index.html b/doc/html/smlnj-lib/index.html new file mode 100644 index 0000000..65a7e3e --- /dev/null +++ b/doc/html/smlnj-lib/index.html @@ -0,0 +1,102 @@ + + + + + + + + + + SML/NJ Library Overview + + + + + + + + +
    +
    +
    +
    + +
    + +
    Overview
    +
    +
    +
    + +
    +
    +
    +

    Introduction

    +
    +
    +

    The Standard ML of New Jersey Library (SML/NJ Library) +is a collection of over 200 modules grouped into 14 libraries. +It was designed to build on the Standard ML Basis Library +by providing higher-level and application-specific services +that were out of scope for the Basis Library. +The SML/NJ Library is distributed as part of both the +SML/NJ and +MLton SML Compiler systems.

    +
    +
    +
    +
    +
    + +
    + + diff --git a/doc/html/smlnj-lib/styles/smlnj-lib-base.css b/doc/html/smlnj-lib/styles/smlnj-lib-base.css new file mode 100644 index 0000000..555cc0c --- /dev/null +++ b/doc/html/smlnj-lib/styles/smlnj-lib-base.css @@ -0,0 +1,456 @@ +/* Modified version of default asciidoctor.css */ + +/* Asciidoctor default stylesheet | MIT License | https://asciidoctor.org */ +/* Uncomment @import statement when using as custom stylesheet */ +/*@import "https://fonts.googleapis.com/css?family=Open+Sans:300,300italic,400,400italic,600,600italic%7CNoto+Serif:400,400italic,700,700italic%7CDroid+Sans+Mono:400,700";*/ +article,aside,details,figcaption,figure,footer,header,hgroup,main,nav,section{display:block} +audio,canvas,video{display:inline-block} +audio:not([controls]){display:none;height:0} +script{display:none!important} +html{font-family:sans-serif;-ms-text-size-adjust:100%;-webkit-text-size-adjust:100%} +a{background:none} +a:focus{outline:thin dotted} +a:active,a:hover{outline:0} +h1{font-size:2em;margin:.67em 0} +abbr[title]{border-bottom:1px dotted} +b,strong{font-weight:bold} +dfn{font-style:italic} +hr{-moz-box-sizing:content-box;box-sizing:content-box;height:0} +mark{background:#ff0;color:#000} +code,kbd,pre,samp{font-family:monospace;font-size:1em} +pre{white-space:pre-wrap} +q{quotes:"\201C" "\201D" "\2018" "\2019"} +small{font-size:80%} +sub,sup{font-size:75%;line-height:0;position:relative;vertical-align:baseline} +sup{top:-.5em} +sub{bottom:-.25em} +img{border:0} +svg:not(:root){overflow:hidden} +figure{margin:0} +fieldset{border:1px solid silver;margin:0 2px;padding:.35em .625em .75em} +legend{border:0;padding:0} +button,input,select,textarea{font-family:inherit;font-size:100%;margin:0} +button,input{line-height:normal} +button,select{text-transform:none} +button,html input[type="button"],input[type="reset"],input[type="submit"]{-webkit-appearance:button;cursor:pointer} +button[disabled],html input[disabled]{cursor:default} +input[type="checkbox"],input[type="radio"]{box-sizing:border-box;padding:0} +button::-moz-focus-inner,input::-moz-focus-inner{border:0;padding:0} +textarea{overflow:auto;vertical-align:top} +table{border-collapse:collapse;border-spacing:0} +*,*::before,*::after{-moz-box-sizing:border-box;-webkit-box-sizing:border-box;box-sizing:border-box} +html,body{font-size:100%} +body{background:#fff;color:rgba(0,0,0,.8);padding:0;margin:0;font-family:"Noto Serif","DejaVu Serif",serif;font-weight:400;font-style:normal;line-height:1;position:relative;cursor:auto;tab-size:4;-moz-osx-font-smoothing:grayscale;-webkit-font-smoothing:antialiased} +a:hover{cursor:pointer} +img,object,embed{max-width:100%;height:auto} +object,embed{height:100%} +img{-ms-interpolation-mode:bicubic} +.left{float:left!important} +.right{float:right!important} +.text-left{text-align:left!important} +.text-right{text-align:right!important} +.text-center{text-align:center!important} +.text-justify{text-align:justify!important} +.hide{display:none} +img,object,svg{display:inline-block;vertical-align:middle} +textarea{height:auto;min-height:50px} +select{width:100%} +.center{margin-left:auto;margin-right:auto} +.stretch{width:100%} +.subheader,.admonitionblock td.content>.title,.audioblock>.title,.exampleblock>.title,.imageblock>.title,.listingblock>.title,.literalblock>.title,.stemblock>.title,.openblock>.title,.paragraph>.title,.quoteblock>.title,table.tableblock>.title,.verseblock>.title,.videoblock>.title,.dlist>.title,.olist>.title,.ulist>.title,.qlist>.title,.hdlist>.title{line-height:1.45;color:#7a2518;font-weight:400;margin-top:0;margin-bottom:.25em} +div,dl,dt,dd,ul,ol,li,h1,h2,h3,#toctitle,.sidebarblock>.content>.title,h4,h5,h6,pre,form,p,blockquote,th,td{margin:0;padding:0;direction:ltr} +a{color:#2156a5;text-decoration:underline;line-height:inherit} +a:hover,a:focus{color:#1d4b8f} +a img{border:0} +p{font-family:inherit;font-weight:400;font-size:1em;line-height:1.6;margin-bottom:1.25em;text-rendering:optimizeLegibility} +p aside{font-size:.875em;line-height:1.35;font-style:italic} +h1,h2,h3,#toctitle,.sidebarblock>.content>.title,h4,h5,h6{font-family:"Open Sans","DejaVu Sans",sans-serif;font-weight:300;font-style:normal;color:#ba3925;text-rendering:optimizeLegibility;margin-top:1em;margin-bottom:.5em;line-height:1.0125em} +h1 small,h2 small,h3 small,#toctitle small,.sidebarblock>.content>.title small,h4 small,h5 small,h6 small{font-size:60%;color:#e99b8f;line-height:0} +h1{font-size:2.125em} +h2{font-size:1.6875em} +h3,#toctitle,.sidebarblock>.content>.title{font-size:1.375em} +h4,h5{font-size:1.125em} +h6{font-size:1em} +hr{border:solid #dddddf;border-width:1px 0 0;clear:both;margin:1.25em 0 1.1875em;height:0} +em,i{font-style:italic;line-height:inherit} +strong,b{font-weight:bold;line-height:inherit} +small{font-size:60%;line-height:inherit} +code{font-family:"Droid Sans Mono","DejaVu Sans Mono",monospace;font-weight:400;color:rgba(0,0,0,.9)} +ul,ol,dl{font-size:1em;line-height:1.6;margin-bottom:1.25em;list-style-position:outside;font-family:inherit} +ul,ol{margin-left:1.5em} +ul li ul,ul li ol{margin-left:1.25em;margin-bottom:0;font-size:1em} +ul.square li ul,ul.circle li ul,ul.disc li ul{list-style:inherit} +ul.square{list-style-type:square} +ul.circle{list-style-type:circle} +ul.disc{list-style-type:disc} +ol li ul,ol li ol{margin-left:1.25em;margin-bottom:0} +dl dt{margin-bottom:.3125em;font-weight:bold} +dl dd{margin-bottom:1.25em} +abbr,acronym{text-transform:uppercase;font-size:90%;color:rgba(0,0,0,.8);border-bottom:1px dotted #ddd;cursor:help} +abbr{text-transform:none} +blockquote{margin:0 0 1.25em;padding:.5625em 1.25em 0 1.1875em;border-left:1px solid #ddd} +blockquote cite{display:block;font-size:.9375em;color:rgba(0,0,0,.6)} +blockquote cite::before{content:"\2014 \0020"} +blockquote cite a,blockquote cite a:visited{color:rgba(0,0,0,.6)} +blockquote,blockquote p{line-height:1.6;color:rgba(0,0,0,.85)} +@media screen and (min-width:768px){h1,h2,h3,#toctitle,.sidebarblock>.content>.title,h4,h5,h6{line-height:1.2} +h1{font-size:2.75em} +h2{font-size:2.3125em} +h3,#toctitle,.sidebarblock>.content>.title{font-size:1.6875em} +h4{font-size:1.4375em}} +table{background:#fff;margin-bottom:1.25em;border:solid 1px #dedede} +table thead,table tfoot{background:#f7f8f7} +table thead tr th,table thead tr td,table tfoot tr th,table tfoot tr td{padding:.5em .625em .625em;font-size:inherit;color:rgba(0,0,0,.8);text-align:left} +table tr th,table tr td{padding:.5625em .625em;font-size:inherit;color:rgba(0,0,0,.8)} +table tr.even,table tr.alt{background:#f8f8f7} +table thead tr th,table tfoot tr th,table tbody tr td,table tr td,table tfoot tr td{display:table-cell;line-height:1.6} +h1,h2,h3,#toctitle,.sidebarblock>.content>.title,h4,h5,h6{line-height:1.2;word-spacing:-.05em} +h1 strong,h2 strong,h3 strong,#toctitle strong,.sidebarblock>.content>.title strong,h4 strong,h5 strong,h6 strong{font-weight:400} +.clearfix::before,.clearfix::after,.float-group::before,.float-group::after{content:" ";display:table} +.clearfix::after,.float-group::after{clear:both} +:not(pre):not([class^=L])>code{font-size:.9375em;font-style:normal!important;letter-spacing:0;padding:.1em .5ex;word-spacing:-.15em;background:#f7f7f8;-webkit-border-radius:4px;border-radius:4px;line-height:1.45;text-rendering:optimizeSpeed;word-wrap:break-word} +:not(pre)>code.nobreak{word-wrap:normal} +:not(pre)>code.nowrap{white-space:nowrap} +pre{color:rgba(0,0,0,.9);font-family:"Droid Sans Mono","DejaVu Sans Mono",monospace;line-height:1.45;text-rendering:optimizeSpeed} +pre code,pre pre{color:inherit;font-size:inherit;line-height:inherit} +pre>code{display:block} +pre.nowrap,pre.nowrap pre{white-space:pre;word-wrap:normal} +em em{font-style:normal} +strong strong{font-weight:400} +.keyseq{color:rgba(51,51,51,.8)} +kbd{font-family:"Droid Sans Mono","DejaVu Sans Mono",monospace;display:inline-block;color:rgba(0,0,0,.8);font-size:.65em;line-height:1.45;background:#f7f7f7;border:1px solid #ccc;-webkit-border-radius:3px;border-radius:3px;-webkit-box-shadow:0 1px 0 rgba(0,0,0,.2),0 0 0 .1em white inset;box-shadow:0 1px 0 rgba(0,0,0,.2),0 0 0 .1em #fff inset;margin:0 .15em;padding:.2em .5em;vertical-align:middle;position:relative;top:-.1em;white-space:nowrap} +.keyseq kbd:first-child{margin-left:0} +.keyseq kbd:last-child{margin-right:0} +.menuseq,.menuref{color:#000} +.menuseq b:not(.caret),.menuref{font-weight:inherit} +.menuseq{word-spacing:-.02em} +.menuseq b.caret{font-size:1.25em;line-height:.8} +.menuseq i.caret{font-weight:bold;text-align:center;width:.45em} +b.button::before,b.button::after{position:relative;top:-1px;font-weight:400} +b.button::before{content:"[";padding:0 3px 0 2px} +b.button::after{content:"]";padding:0 2px 0 3px} +p a>code:hover{color:rgba(0,0,0,.9)} +#header,#content,#footnotes,#footer{width:100%;margin-left:auto;margin-right:auto;margin-top:0;margin-bottom:0;max-width:62.5em;*zoom:1;position:relative;padding-left:.9375em;padding-right:.9375em} +#header::before,#header::after,#content::before,#content::after,#footnotes::before,#footnotes::after,#footer::before,#footer::after{content:" ";display:table} +#header::after,#content::after,#footnotes::after,#footer::after{clear:both} +#content{margin-top:1.25em} +#content::before{content:none} +#header>h1:first-child{color:rgba(0,0,0,.85);margin-top:2.25rem;margin-bottom:0} +#header>h1:first-child+#toc{margin-top:8px;border-top:1px solid #dddddf} +#header>h1:only-child,body.toc2 #header>h1:nth-last-child(2){border-bottom:1px solid #dddddf;padding-bottom:8px} +#header .details{border-bottom:1px solid #dddddf;line-height:1.45;padding-top:.25em;padding-bottom:.25em;padding-left:.25em;color:rgba(0,0,0,.6);display:-ms-flexbox;display:-webkit-flex;display:flex;-ms-flex-flow:row wrap;-webkit-flex-flow:row wrap;flex-flow:row wrap} +#header .details span:first-child{margin-left:-.125em} +#header .details span.email a{color:rgba(0,0,0,.85)} +#header .details br{display:none} +#header .details br+span::before{content:"\00a0\2013\00a0"} +#header .details br+span.author::before{content:"\00a0\22c5\00a0";color:rgba(0,0,0,.85)} +#header .details br+span#revremark::before{content:"\00a0|\00a0"} +#header #revnumber{text-transform:capitalize} +#header #revnumber::after{content:"\00a0"} +#content>h1:first-child:not([class]){color:rgba(0,0,0,.85);border-bottom:1px solid #dddddf;padding-bottom:8px;margin-top:0;padding-top:1rem;margin-bottom:1.25rem} +#toc{border-bottom:1px solid #e7e7e9;padding-bottom:.5em} +#toc>ul{margin-left:.125em} +#toc ul.sectlevel0>li>a{font-style:italic} +#toc ul.sectlevel0 ul.sectlevel1{margin:.5em 0} +#toc ul{font-family:"Open Sans","DejaVu Sans",sans-serif;list-style-type:none} +#toc li{line-height:1.3334;margin-top:.3334em} +#toc a{text-decoration:none} +#toc a:active{text-decoration:underline} +#toctitle{color:#7a2518;font-size:1.2em} +@media screen and (min-width:768px){#toctitle{font-size:1.375em} +body.toc2{padding-left:15em;padding-right:0} +#toc.toc2{ + margin-top:0!important; + background:#f8f8f7; + position:fixed; + width:15em; + left:0; top:0; + border-right:1px solid #e7e7e9; + border-top-width:0!important; + border-bottom-width:0!important; + z-index:1000; + padding:1.25em 1em; + height:100%; + overflow:auto +} +#toc.toc2 #toctitle{margin-top:0;margin-bottom:.8rem;font-size:1.2em} +#toc.toc2>ul{font-size:.9em;margin-bottom:0} +#toc.toc2 ul ul{margin-left:0;padding-left:1em} +#toc.toc2 ul.sectlevel0 ul.sectlevel1{padding-left:0;margin-top:.5em;margin-bottom:.5em} +body.toc2.toc-right{padding-left:0;padding-right:15em} +body.toc2.toc-right #toc.toc2{border-right-width:0;border-left:1px solid #e7e7e9;left:auto;right:0}} +@media screen and (min-width:1280px){body.toc2{padding-left:20em;padding-right:0} +#toc.toc2{width:20em} +#toc.toc2 #toctitle{font-size:1.375em} +#toc.toc2>ul{font-size:.95em} +#toc.toc2 ul ul{padding-left:1.25em} +body.toc2.toc-right{padding-left:0;padding-right:20em}} +#content #toc{ + border-style:solid; + border-width:1px; + border-color:#e0e0dc; + margin-bottom:1.25em; + padding:1.25em; + background:#f8f8f7; + -webkit-border-radius:4px; + border-radius:4px +} +#content #toc>:first-child{margin-top:0} +#content #toc>:last-child{margin-bottom:0} +#footer{max-width:100%;background:rgba(0,0,0,.8);padding:1.25em} +#footer-text{color:rgba(255,255,255,.8);line-height:1.44} +#content{margin-bottom:.625em} +.sect1{padding-bottom:.625em} +@media screen and (min-width:768px){#content{margin-bottom:1.25em} +.sect1{padding-bottom:1.25em}} +.sect1:last-child{padding-bottom:0} +.sect1+.sect1{border-top:1px solid #e7e7e9} +#content h1>a.anchor,h2>a.anchor,h3>a.anchor,#toctitle>a.anchor,.sidebarblock>.content>.title>a.anchor,h4>a.anchor,h5>a.anchor,h6>a.anchor{position:absolute;z-index:1001;width:1.5ex;margin-left:-1.5ex;display:block;text-decoration:none!important;visibility:hidden;text-align:center;font-weight:400} +#content h1>a.anchor::before,h2>a.anchor::before,h3>a.anchor::before,#toctitle>a.anchor::before,.sidebarblock>.content>.title>a.anchor::before,h4>a.anchor::before,h5>a.anchor::before,h6>a.anchor::before{content:"\00A7";font-size:.85em;display:block;padding-top:.1em} +#content h1:hover>a.anchor,#content h1>a.anchor:hover,h2:hover>a.anchor,h2>a.anchor:hover,h3:hover>a.anchor,#toctitle:hover>a.anchor,.sidebarblock>.content>.title:hover>a.anchor,h3>a.anchor:hover,#toctitle>a.anchor:hover,.sidebarblock>.content>.title>a.anchor:hover,h4:hover>a.anchor,h4>a.anchor:hover,h5:hover>a.anchor,h5>a.anchor:hover,h6:hover>a.anchor,h6>a.anchor:hover{visibility:visible} +#content h1>a.link,h2>a.link,h3>a.link,#toctitle>a.link,.sidebarblock>.content>.title>a.link,h4>a.link,h5>a.link,h6>a.link{color:#ba3925;text-decoration:none} +#content h1>a.link:hover,h2>a.link:hover,h3>a.link:hover,#toctitle>a.link:hover,.sidebarblock>.content>.title>a.link:hover,h4>a.link:hover,h5>a.link:hover,h6>a.link:hover{color:#a53221} +details,.audioblock,.imageblock,.literalblock,.listingblock,.stemblock,.videoblock{margin-bottom:1.25em} +details>summary:first-of-type{cursor:pointer;display:list-item;outline:none;margin-bottom:.75em} +.admonitionblock td.content>.title,.audioblock>.title,.exampleblock>.title,.imageblock>.title,.listingblock>.title,.literalblock>.title,.stemblock>.title,.openblock>.title,.paragraph>.title,.quoteblock>.title,table.tableblock>.title,.verseblock>.title,.videoblock>.title,.dlist>.title,.olist>.title,.ulist>.title,.qlist>.title,.hdlist>.title{text-rendering:optimizeLegibility;text-align:left;font-family:"Noto Serif","DejaVu Serif",serif;font-size:1rem;font-style:italic} +table.tableblock.fit-content>caption.title{white-space:nowrap;width:0} +.paragraph.lead>p,#preamble>.sectionbody>[class="paragraph"]:first-of-type p{font-size:1.21875em;line-height:1.6;color:rgba(0,0,0,.85)} +table.tableblock #preamble>.sectionbody>[class="paragraph"]:first-of-type p{font-size:inherit} +.admonitionblock>table{border-collapse:separate;border:0;background:none;width:100%} +.admonitionblock>table td.icon{text-align:center;width:80px} +.admonitionblock>table td.icon img{max-width:none} +.admonitionblock>table td.icon .title{font-weight:bold;font-family:"Open Sans","DejaVu Sans",sans-serif;text-transform:uppercase} +.admonitionblock>table td.content{padding-left:1.125em;padding-right:1.25em;border-left:1px solid #dddddf;color:rgba(0,0,0,.6)} +.admonitionblock>table td.content>:last-child>:last-child{margin-bottom:0} +.exampleblock>.content{border-style:solid;border-width:1px;border-color:#e6e6e6;margin-bottom:1.25em;padding:1.25em;background:#fff;-webkit-border-radius:4px;border-radius:4px} +.exampleblock>.content>:first-child{margin-top:0} +.exampleblock>.content>:last-child{margin-bottom:0} +.sidebarblock{border-style:solid;border-width:1px;border-color:#dbdbd6;margin-bottom:1.25em;padding:1.25em;background:#f3f3f2;-webkit-border-radius:4px;border-radius:4px} +.sidebarblock>:first-child{margin-top:0} +.sidebarblock>:last-child{margin-bottom:0} +.sidebarblock>.content>.title{color:#7a2518;margin-top:0;text-align:center} +.exampleblock>.content>:last-child>:last-child,.exampleblock>.content .olist>ol>li:last-child>:last-child,.exampleblock>.content .ulist>ul>li:last-child>:last-child,.exampleblock>.content .qlist>ol>li:last-child>:last-child,.sidebarblock>.content>:last-child>:last-child,.sidebarblock>.content .olist>ol>li:last-child>:last-child,.sidebarblock>.content .ulist>ul>li:last-child>:last-child,.sidebarblock>.content .qlist>ol>li:last-child>:last-child{margin-bottom:0} +.literalblock pre,.listingblock>.content>pre{-webkit-border-radius:4px;border-radius:4px;word-wrap:break-word;overflow-x:auto;padding:1em;font-size:.8125em} +@media screen and (min-width:768px){.literalblock pre,.listingblock>.content>pre{font-size:.90625em}} +@media screen and (min-width:1280px){.literalblock pre,.listingblock>.content>pre{font-size:1em}} +.literalblock.output pre{color:#f7f7f8;background:rgba(0,0,0,.9)} +.listingblock>.content>pre:not(.highlight),.listingblock>.content>pre[class="highlight"],.listingblock>.content>pre[class^="highlight "]{background:#f7f7f8} +.listingblock>.content{position:relative} +.listingblock code[data-lang]::before{display:none;content:attr(data-lang);position:absolute;font-size:.75em;top:.425rem;right:.5rem;line-height:1;text-transform:uppercase;color:inherit;opacity:.5} +.listingblock:hover code[data-lang]::before{display:block} +.listingblock.terminal pre .command::before{content:attr(data-prompt);padding-right:.5em;color:inherit;opacity:.5} +.listingblock.terminal pre .command:not([data-prompt])::before{content:"$"} +.listingblock pre.highlightjs{padding:0} +.listingblock pre.highlightjs>code{padding:1em;-webkit-border-radius:4px;border-radius:4px} +.listingblock pre.prettyprint{border-width:0} +.prettyprint{background:#f7f7f8} +pre.prettyprint .linenums{line-height:1.45;margin-left:2em} +pre.prettyprint li{background:none;list-style-type:inherit;padding-left:0} +pre.prettyprint li code[data-lang]::before{opacity:1} +pre.prettyprint li:not(:first-child) code[data-lang]::before{display:none} +table.linenotable{border-collapse:separate;border:0;margin-bottom:0;background:none} +table.linenotable td[class]{color:inherit;vertical-align:top;padding:0;line-height:inherit;white-space:normal} +table.linenotable td.code{padding-left:.75em} +table.linenotable td.linenos{border-right:1px solid currentColor;opacity:.35;padding-right:.5em} +pre.pygments .lineno{border-right:1px solid currentColor;opacity:.35;display:inline-block;margin-right:.75em} +pre.pygments .lineno::before{content:"";margin-right:-.125em} +.quoteblock{margin:0 1em 1.25em 1.5em;display:table} +.quoteblock>.title{margin-left:-1.5em;margin-bottom:.75em} +.quoteblock blockquote,.quoteblock p{color:rgba(0,0,0,.85);font-size:1.15rem;line-height:1.75;word-spacing:.1em;letter-spacing:0;font-style:italic;text-align:justify} +.quoteblock blockquote{margin:0;padding:0;border:0} +.quoteblock blockquote::before{content:"\201c";float:left;font-size:2.75em;font-weight:bold;line-height:.6em;margin-left:-.6em;color:#7a2518;text-shadow:0 1px 2px rgba(0,0,0,.1)} +.quoteblock blockquote>.paragraph:last-child p{margin-bottom:0} +.quoteblock .attribution{margin-top:.75em;margin-right:.5ex;text-align:right} +.verseblock{margin:0 1em 1.25em} +.verseblock pre{font-family:"Open Sans","DejaVu Sans",sans;font-size:1.15rem;color:rgba(0,0,0,.85);font-weight:300;text-rendering:optimizeLegibility} +.verseblock pre strong{font-weight:400} +.verseblock .attribution{margin-top:1.25rem;margin-left:.5ex} +.quoteblock .attribution,.verseblock .attribution{font-size:.9375em;line-height:1.45;font-style:italic} +.quoteblock .attribution br,.verseblock .attribution br{display:none} +.quoteblock .attribution cite,.verseblock .attribution cite{display:block;letter-spacing:-.025em;color:rgba(0,0,0,.6)} +.quoteblock.abstract blockquote::before,.quoteblock.excerpt blockquote::before,.quoteblock .quoteblock blockquote::before{display:none} +.quoteblock.abstract blockquote,.quoteblock.abstract p,.quoteblock.excerpt blockquote,.quoteblock.excerpt p,.quoteblock .quoteblock blockquote,.quoteblock .quoteblock p{line-height:1.6;word-spacing:0} +.quoteblock.abstract{margin:0 1em 1.25em;display:block} +.quoteblock.abstract>.title{margin:0 0 .375em;font-size:1.15em;text-align:center} +.quoteblock.excerpt,.quoteblock .quoteblock{margin:0 0 1.25em;padding:0 0 .25em 1em;border-left:.25em solid #dddddf} +.quoteblock.excerpt blockquote,.quoteblock.excerpt p,.quoteblock .quoteblock blockquote,.quoteblock .quoteblock p{color:inherit;font-size:1.0625rem} +.quoteblock.excerpt .attribution,.quoteblock .quoteblock .attribution{color:inherit;text-align:left;margin-right:0} +table.tableblock{max-width:100%;border-collapse:separate} +p.tableblock:last-child{margin-bottom:0} +td.tableblock>.content{margin-bottom:-1.25em} +table.tableblock,th.tableblock,td.tableblock{border:0 solid #dedede} +table.grid-all>thead>tr>.tableblock,table.grid-all>tbody>tr>.tableblock{border-width:0 1px 1px 0} +table.grid-all>tfoot>tr>.tableblock{border-width:1px 1px 0 0} +table.grid-cols>*>tr>.tableblock{border-width:0 1px 0 0} +table.grid-rows>thead>tr>.tableblock,table.grid-rows>tbody>tr>.tableblock{border-width:0 0 1px} +table.grid-rows>tfoot>tr>.tableblock{border-width:1px 0 0} +table.grid-all>*>tr>.tableblock:last-child,table.grid-cols>*>tr>.tableblock:last-child{border-right-width:0} +table.grid-all>tbody>tr:last-child>.tableblock,table.grid-all>thead:last-child>tr>.tableblock,table.grid-rows>tbody>tr:last-child>.tableblock,table.grid-rows>thead:last-child>tr>.tableblock{border-bottom-width:0} +table.frame-all{border-width:1px} +table.frame-sides{border-width:0 1px} +table.frame-topbot,table.frame-ends{border-width:1px 0} +table.stripes-all tr,table.stripes-odd tr:nth-of-type(odd),table.stripes-even tr:nth-of-type(even),table.stripes-hover tr:hover{background:#f8f8f7} +th.halign-left,td.halign-left{text-align:left} +th.halign-right,td.halign-right{text-align:right} +th.halign-center,td.halign-center{text-align:center} +th.valign-top,td.valign-top{vertical-align:top} +th.valign-bottom,td.valign-bottom{vertical-align:bottom} +th.valign-middle,td.valign-middle{vertical-align:middle} +table thead th,table tfoot th{font-weight:bold} +tbody tr th{display:table-cell;line-height:1.6;background:#f7f8f7} +tbody tr th,tbody tr th p,tfoot tr th,tfoot tr th p{color:rgba(0,0,0,.8);font-weight:bold} +p.tableblock>code:only-child{background:none;padding:0} +p.tableblock{font-size:1em} +ol{margin-left:1.75em} +ul li ol{margin-left:1.5em} +dl dd{margin-left:1.125em} +dl dd:last-child,dl dd:last-child>:last-child{margin-bottom:0} +ol>li p,ul>li p,ul dd,ol dd,.olist .olist,.ulist .ulist,.ulist .olist,.olist .ulist{margin-bottom:.625em} +ul.checklist,ul.none,ol.none,ul.no-bullet,ol.no-bullet,ol.unnumbered,ul.unstyled,ol.unstyled{list-style-type:none} +ul.no-bullet,ol.no-bullet,ol.unnumbered{margin-left:.625em} +ul.unstyled,ol.unstyled{margin-left:0} +ul.checklist{margin-left:.625em} +ul.checklist li>p:first-child>.fa-square-o:first-child,ul.checklist li>p:first-child>.fa-check-square-o:first-child{width:1.25em;font-size:.8em;position:relative;bottom:.125em} +ul.checklist li>p:first-child>input[type="checkbox"]:first-child{margin-right:.25em} +ul.inline{display:-ms-flexbox;display:-webkit-box;display:flex;-ms-flex-flow:row wrap;-webkit-flex-flow:row wrap;flex-flow:row wrap;list-style:none;margin:0 0 .625em -1.25em} +ul.inline>li{margin-left:1.25em} +.unstyled dl dt{font-weight:400;font-style:normal} +ol.arabic{list-style-type:decimal} +ol.decimal{list-style-type:decimal-leading-zero} +ol.loweralpha{list-style-type:lower-alpha} +ol.upperalpha{list-style-type:upper-alpha} +ol.lowerroman{list-style-type:lower-roman} +ol.upperroman{list-style-type:upper-roman} +ol.lowergreek{list-style-type:lower-greek} +.hdlist>table,.colist>table{border:0;background:none} +.hdlist>table>tbody>tr,.colist>table>tbody>tr{background:none} +td.hdlist1,td.hdlist2{vertical-align:top;padding:0 .625em} +td.hdlist1{font-weight:bold;padding-bottom:1.25em} +.literalblock+.colist,.listingblock+.colist{margin-top:-.5em} +.colist td:not([class]):first-child{padding:.4em .75em 0;line-height:1;vertical-align:top} +.colist td:not([class]):first-child img{max-width:none} +.colist td:not([class]):last-child{padding:.25em 0} +.thumb,.th{line-height:0;display:inline-block;border:solid 4px #fff;-webkit-box-shadow:0 0 0 1px #ddd;box-shadow:0 0 0 1px #ddd} +.imageblock.left{margin:.25em .625em 1.25em 0} +.imageblock.right{margin:.25em 0 1.25em .625em} +.imageblock>.title{margin-bottom:0} +.imageblock.thumb,.imageblock.th{border-width:6px} +.imageblock.thumb>.title,.imageblock.th>.title{padding:0 .125em} +.image.left,.image.right{margin-top:.25em;margin-bottom:.25em;display:inline-block;line-height:0} +.image.left{margin-right:.625em} +.image.right{margin-left:.625em} +a.image{text-decoration:none;display:inline-block} +a.image object{pointer-events:none} +sup.footnote,sup.footnoteref{font-size:.875em;position:static;vertical-align:super} +sup.footnote a,sup.footnoteref a{text-decoration:none} +sup.footnote a:active,sup.footnoteref a:active{text-decoration:underline} +#footnotes{padding-top:.75em;padding-bottom:.75em;margin-bottom:.625em} +#footnotes hr{width:20%;min-width:6.25em;margin:-.25em 0 .75em;border-width:1px 0 0} +#footnotes .footnote{padding:0 .375em 0 .225em;line-height:1.3334;font-size:.875em;margin-left:1.2em;margin-bottom:.2em} +#footnotes .footnote a:first-of-type{font-weight:bold;text-decoration:none;margin-left:-1.05em} +#footnotes .footnote:last-of-type{margin-bottom:0} +#content #footnotes{margin-top:-.625em;margin-bottom:0;padding:.75em 0} +.gist .file-data>table{border:0;background:#fff;width:100%;margin-bottom:0} +.gist .file-data>table td.line-data{width:99%} +div.unbreakable{page-break-inside:avoid} +.big{font-size:larger} +.small{font-size:smaller} +.underline{text-decoration:underline} +.overline{text-decoration:overline} +.line-through{text-decoration:line-through} +.aqua{color:#00bfbf} +.aqua-background{background:#00fafa} +.black{color:#000} +.black-background{background:#000} +.blue{color:#0000bf} +.blue-background{background:#0000fa} +.fuchsia{color:#bf00bf} +.fuchsia-background{background:#fa00fa} +.gray{color:#606060} +.gray-background{background:#7d7d7d} +.green{color:#006000} +.green-background{background:#007d00} +.lime{color:#00bf00} +.lime-background{background:#00fa00} +.maroon{color:#600000} +.maroon-background{background:#7d0000} +.navy{color:#000060} +.navy-background{background:#00007d} +.olive{color:#606000} +.olive-background{background:#7d7d00} +.purple{color:#600060} +.purple-background{background:#7d007d} +.red{color:#bf0000} +.red-background{background:#fa0000} +.silver{color:#909090} +.silver-background{background:#bcbcbc} +.teal{color:#006060} +.teal-background{background:#007d7d} +.white{color:#bfbfbf} +.white-background{background:#fafafa} +.yellow{color:#bfbf00} +.yellow-background{background:#fafa00} +span.icon>.fa{cursor:default} +a span.icon>.fa{cursor:inherit} +.admonitionblock td.icon [class^="fa icon-"]{font-size:2.5em;text-shadow:1px 1px 2px rgba(0,0,0,.5);cursor:default} +.admonitionblock td.icon .icon-note::before{content:"\f05a";color:#19407c} +.admonitionblock td.icon .icon-tip::before{content:"\f0eb";text-shadow:1px 1px 2px rgba(155,155,0,.8);color:#111} +.admonitionblock td.icon .icon-warning::before{content:"\f071";color:#bf6900} +.admonitionblock td.icon .icon-caution::before{content:"\f06d";color:#bf3400} +.admonitionblock td.icon .icon-important::before{content:"\f06a";color:#bf0000} +.conum[data-value]{display:inline-block;color:#fff!important;background:rgba(0,0,0,.8);-webkit-border-radius:100px;border-radius:100px;text-align:center;font-size:.75em;width:1.67em;height:1.67em;line-height:1.67em;font-family:"Open Sans","DejaVu Sans",sans-serif;font-style:normal;font-weight:bold} +.conum[data-value] *{color:#fff!important} +.conum[data-value]+b{display:none} +.conum[data-value]::after{content:attr(data-value)} +pre .conum[data-value]{position:relative;top:-.125em} +b.conum *{color:inherit!important} +.conum:not([data-value]):empty{display:none} +dt,th.tableblock,td.content,div.footnote{text-rendering:optimizeLegibility} +h1,h2,p,td.content,span.alt{letter-spacing:-.01em} +p strong,td.content strong,div.footnote strong{letter-spacing:-.005em} +p,blockquote,dt,td.content,span.alt{font-size:1.0625rem} +p{margin-bottom:1.25rem} +.sidebarblock p,.sidebarblock dt,.sidebarblock td.content,p.tableblock{font-size:1em} +.exampleblock>.content{background:#fffef7;border-color:#e0e0dc;-webkit-box-shadow:0 1px 4px #e0e0dc;box-shadow:0 1px 4px #e0e0dc} +.print-only{display:none!important} +@page{margin:1.25cm .75cm} +@media print{*{-webkit-box-shadow:none!important;box-shadow:none!important;text-shadow:none!important} +html{font-size:80%} +a{color:inherit!important;text-decoration:underline!important} +a.bare,a[href^="#"],a[href^="mailto:"]{text-decoration:none!important} +a[href^="http:"]:not(.bare)::after,a[href^="https:"]:not(.bare)::after{content:"(" attr(href) ")";display:inline-block;font-size:.875em;padding-left:.25em} +abbr[title]::after{content:" (" attr(title) ")"} +pre,blockquote,tr,img,object,svg{page-break-inside:avoid} +thead{display:table-header-group} +svg{max-width:100%} +p,blockquote,dt,td.content{font-size:1em;orphans:3;widows:3} +h2,h3,#toctitle,.sidebarblock>.content>.title{page-break-after:avoid} +#toc,.sidebarblock,.exampleblock>.content{background:none!important} +#toc{border-bottom:1px solid #dddddf!important;padding-bottom:0!important} +body.book #header{text-align:center} +body.book #header>h1:first-child{border:0!important;margin:2.5em 0 1em} +body.book #header .details{border:0!important;display:block;padding:0!important} +body.book #header .details span:first-child{margin-left:0!important} +body.book #header .details br{display:block} +body.book #header .details br+span::before{content:none!important} +body.book #toc{border:0!important;text-align:left!important;padding:0!important;margin:0!important} +body.book #toc,body.book #preamble,body.book h1.sect0,body.book .sect1>h2{page-break-before:always} +.listingblock code[data-lang]::before{display:block} +#footer{padding:0 .9375em} +.hide-on-print{display:none!important} +.print-only{display:block!important} +.hide-for-print{display:none!important} +.show-for-print{display:inherit!important}} +@media print,amzn-kf8{#header>h1:first-child{margin-top:1.25rem} +.sect1{padding:0!important} +.sect1+.sect1{border:0} +#footer{background:none} +#footer-text{color:rgba(0,0,0,.6);font-size:.9em}} +@media amzn-kf8{#header,#content,#footnotes,#footer{padding:0}} + +/* additional code styling support */ +dt > code { color: #19177C } /* tok-nv in pygments */ +code > span.kw { color: #008000; font-weight: bold } /* tok-k in pygments */ +code > span.tv { color: #AA22FF } /* tok-nd in pygments */ +code > span.ty { color: #B00040 } /* tok-kt in pygments */ +code > span.con { color: #0000FF; font-weight: bold } /* tok-nc in pygments */ diff --git a/doc/html/smlnj-lib/styles/smlnj-lib-pygments.css b/doc/html/smlnj-lib/styles/smlnj-lib-pygments.css new file mode 100644 index 0000000..6c7208f --- /dev/null +++ b/doc/html/smlnj-lib/styles/smlnj-lib-pygments.css @@ -0,0 +1,25 @@ +/* + pygmentize filter for SML code +*/ +pre.pygments .hll { background-color: #ffffcc } +pre.pygments { font-size: 0.9em; background: #f8f8f8; } /* code background */ +pre.pygments .tok-k { color: #268bd2; font-weight: bold } /* Keyword */ +pre.pygments .tok-kr { color: #268bd2; font-weight: bold; } /* keywords */ +pre.pygments .tok-kt { color: #b00040; } /* type */ +pre.pygments .tok-o { color: #268bd2; } /* Operator */ +pre.pygments .tok-c { color: #dc322f; font-style: italic; } /* comments */ +pre.pygments .tok-cm { color: #dc322f; font-style: italic; } /* comments */ +pre.pygments .tok-s2 { color: #b58900; } /* strings */ +pre.pygments .tok-mb { color: #b58900; } /* Literal.Number.Bin */ +pre.pygments .tok-mf { color: #b58900; } /* Literal.Number.Float */ +pre.pygments .tok-mh { color: #b58900; } /* Literal.Number.Hex */ +pre.pygments .tok-mi { color: #b58900; } /* Literal.Number.Integer */ +pre.pygments .tok-mo { color: #b58900; } /* Literal.Number.Oct */ +pre.pygments .tok-kt { color: #6c71c4; } /* lhs type names */ +pre.pygments .tok-n { color: #6c71c4; } /* type names on rhs; parameters */ +pre.pygments .tok-p { color: #6c71c4; } /* punctuation */ +pre.pygments .tok-nn { color: #6c71c4; font-weight: bold; } /* Module names */ +pre.pygments .tok-nv { color: #6c71c4; font-weight: bold; } /* variable names in val bindings */ +pre.pygments .tok-nf { color: #6c71c4; font-weight: bold; } /* function names in fun bindings */ +pre.pygments .tok-nc { color: #6c71c4; font-weight: bold; } /* data constructor name in rhs of datatype */ +pre.pygments .tok-nd { color: #aa22ff; } /* type variable */ diff --git a/doc/html/smlnj-lib/styles/smlnj-lib.css b/doc/html/smlnj-lib/styles/smlnj-lib.css new file mode 100644 index 0000000..d3fc421 --- /dev/null +++ b/doc/html/smlnj-lib/styles/smlnj-lib.css @@ -0,0 +1,184 @@ +/* CSS for SML/NJ Library documentation + * This file overrides the default asciidoctor definitions + * in smlnj-lib-base.css. + */ + +/* page layout */ + +#layout-top-mask { /* masks out scrolling material */ + position: fixed; + top: 0px; + left: 0px; + height: 2em; + width: 100%; + background-color: #fff; + z-index: 5; +} + +#layout-main { + margin: 0.5em 0.5em 0.5em 0.5em; +} + +#layout-banner-spacer { + position: relative; + width: 100%; + height: 10em; + clear: both; +} + +#layout-banner { + margin: 0; + padding: 1.25em; + background-color: #ececec; + font-family: Arial, Helvetica, Geneva, sans-serif; + text-align: left; + -webkit-border-top-left-radius: 1em; + -webkit-border-top-right-radius: 1em; + -webkit-border-bottom-right-radius: 1em; + -moz-border-radius-topright: 1em; + -moz-border-radius-bottomright: 1em; + border-top-right-radius: 1em; + border-bottom-right-radius: 1em; + position: fixed; + top: 0.5em; /* should match layout-main margin */ + left: 0.5em; + right: 0.5em; + z-index: 10; +} + +#layout-title { + height: 7.5em; + color: #000099; +} + +#layout-title-main { + font-size: 4.0em; + margin: 0 0 5px; + color: inherit; +} + +#layout-title-main a { + color: inherit; + text-decoration: none; +} + +#layout-title-description { + font-size: 2.4em; + margin: 0; + color: inherit; +} + +#layout-title-description code { + color: inherit; + background-color: inherit; +} + +#layout-logo { + height: 7.5em; + float: left; + width: 14em; +} + +#layout-logo img { + height: 7.5em; + max-width: none; /* override smlnj-lib-base.css */ +} + +#layout-toc { + position: fixed; + left: 0.5em; + width: 15em; + top: 10em; + bottom: 0; + z-index: 2; + background-color: #ececec; + color: #000099; + overflow: auto; +} + +#layout-content-box { + margin-left: 15em; + z-index: 0; +} + +#layout-footer-box { + margin-left: 15em; + margin-top: 4.0em; + padding-left: 1.0em; + padding-right: 0.4em; + padding-top: 0.5em; +} + +#layout-footer { + border-top: 2px solid #527bbd; +} + +#layout-footer-text { + float: left; + font-size: 0.8em; + padding-top: 0.2em; + padding-bottom: 0.5em; + border-bottom: 2px solid #527bbd; + margin-bottom: 1em; +} + +/* Navigation (aka TOC) */ +#toc { + margin-top: 2em; + margin-left: 0; + margin-bottom: 2em; +} +#toc>:first-child{margin-top:0} +#toc>:last-child{margin-bottom:0} + +#toc a{ + color: inherit; + background-color: inherit; + text-decoration: none; +} + +#toc a:hover{ + background-color: #99ccff; +} + +#toc code{ + color: inherit; + background-color: inherit; +} + +ul.toc-lib-list{ + font-size: 1.1em; + list-style-type:none; + padding: 0; + margin-left: 0; +} + +li.toc-lib{ + margin-left: 0; + margin-bottom: 0.2em; + padding: 2px; +} + +ul.toc-page-list{ + font-size: 0.9em; + list-style-type: none; + padding: 0; + margin-left: 1em; +} + +li.toc-page{ + margin-left: 0; + margin-bottom: 0.2em; + padding: 2px; +} + +#toc-current { + color: blue; +} + +/* additional code styling support */ +dt > code { color: #19177C } /* tok-nv in pygments */ +code > span.kw { color: #268bd2; font-weight: bold } /* tok-k in pygments */ +code > span.tv { color: #AA22FF } /* tok-nd in pygments */ +code > span.ty { color: #B00040 } /* tok-kt in pygments */ +code > span.con { color: #6c71c4; font-weight: bold } /* tok-nc in pygments */ diff --git a/doc/man/man1/asdlgen.1 b/doc/man/man1/asdlgen.1 new file mode 100644 index 0000000..21c3021 --- /dev/null +++ b/doc/man/man1/asdlgen.1 @@ -0,0 +1,169 @@ +'\" t +.\" Title: asdlgen +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.21 +.\" Date: 2024-03-14 +.\" Manual: \ \& +.\" Source: SML/NJ +.\" Language: English +.\" +.TH "ASDLGEN" "1" "2024-03-14" "SML/NJ" "\ \&" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +asdlgen \- compiler for the Abstract Syntax Description Language (ASDL). +.SH "SYNOPSIS" +.sp +\fBasdlgen\fP \fICOMMAND\fP [ \fIOPTIONS\fP ] \fIFILE\fP ... +.sp +where \fICOMMAND\fP is one of the following: +.sp +\fBhelp\fP +.RS 4 +prints information about the available options to the standard output. +.RE +.sp +\fBversion\fP +.RS 4 +prints the version of \fBasdlgen\fP to the standard output. +.RE +.sp +\fBc++\fP or \fBcxx\fP +.RS 4 +generate \fBC++\fP code. +.RE +.sp +\fBsml\fP +.RS 4 +generate \fBSML\fP code. +.RE +.sp +\fBcheck\fP +.RS 4 +check the correctness of the input specifications, but do not generate +output. +.RE +.SH "DESCRIPTION" +.sp +\fBAsdlgen\fP reads the specified of files, which should +contain ASDL module and view declarations, and produces pickling +and unpickling code. +.SH "OPTIONS" +.SS "COMMON OPTIONS" +.sp +\fB\-n\fP +.RS 4 +Do not write any output files. Instead write the list of files that +would have been written to the standard output. +.RE +.sp +\fB\-d\fP \fIDIR\fP, \fB\-\-output\-directory=\fP\fIDIR\fP +.RS 4 +Specify the output directory to place the generated files. +By default the output will be placed in the same directory +as the input file from which it was produced. +.RE +.sp +\fB\-\-gen=\fP\fINAMES\fP +.RS 4 +Specifies the components to generate, where \fINAMES\fP is a comma\-separated +list of names taken from the following: +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +\fBtypes\fP \(em generate the type definitions from the ASDL specification. +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +\fBmemory\fP \(em generate the memory pickler +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +\fBfile\fP \(em generate the file pickler +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +\fBsexp\fP \(em generate the S\-Expression pickler (SML only). +.RE +.RE +.SS "C++ OPTIONS" +.sp +\fB\-\-base\-include=\fP\fIFILE\fP +.RS 4 +Specify the name of the \fBC++\fP header file that defines the primitive ASDL types and functions. The default value is asdl/asdl.hxx. +.RE +.SS "SML OPTIONS" +.sp +\fB\-\-cm=\fP\fIFILE\fP +.RS 4 +Generate a CM file for the pickler; this will define a CM library. +Note that if the ASDL specification includes primitive modules, these +will be included in the list of exported structures, but the supporting +source files will have to be added to the CM file by hand. +.RE +.sp +\fB\-\-mlb=\fP\fIFILE\fP +.RS 4 +Generate an MLB file for the pickler. Note that if the ASDL specification +includes primitive modules, these will be included in the list of +exported structures, but the supporting source files will have to be +added to the MLB file by hand. +.RE +.SH "AUTHOR" +.sp +The original version of \fBasdlgen\fP was written by Dan Wang as part of +the National Compiler Infrastructure Project at Princeton University. +This version of the tool was implemented by John Reppy. +.SH "SEE\-ALSO" +.sp +\fIASDL Reference Manual\fP (included in the \fBSML/NJ\fP documentation). +.SH "COPYING" +.sp +Copyright \(co 2020 The Fellowship of SML/NJ +.sp +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. \ No newline at end of file diff --git a/doc/man/man1/heap2exec.1 b/doc/man/man1/heap2exec.1 new file mode 100644 index 0000000..d9b85a8 --- /dev/null +++ b/doc/man/man1/heap2exec.1 @@ -0,0 +1,85 @@ +'\" t +.\" Title: heap2exec +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.21 +.\" Date: 2024-03-14 +.\" Manual: \ \& +.\" Source: SML/NJ +.\" Language: English +.\" +.TH "HEAP2EXEC" "1" "2024-03-14" "SML/NJ" "\ \&" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +heap2exec \- generate standalone executable from a *SML/NJ* heap image. +.SH "SYNOPSIS" +.sp +\fBheap2exec\fP [ \fB\-32\fP | \fB\-64\fP ] [ \fILINKMODE\fP ] \fIfile\fP +.SH "DESCRIPTION" +.sp +Under normal usage, \fBSML/NJ\fP represents a program as a heap\-image file +with a name of the form \f(CRfoo.arch\-opsys\fP +\fBHeap2exec\fP generates a standalone executable from a heap image. +You have the option to specify a preferred linking mode. +.sp +The default is to link statically on \fBFreeBSD\fP and \fBLinux\fP. +.sp +Note that \fBheap2exec\fP is an \fIoptional\fP component of the SML/NJ +installation process that is not included by default. Edit the +\f(CRconfig/targets\fP file before installation to include it. +.SH "OPTIONS" +.sp +\fB\-32\fP +.RS 4 +run the 32\-bit version of the program (currently the default). +.RE +.sp +\fB\-64\fP +.RS 4 +run the 64\-bit version of the program. This option only applies to +the \fBx86\-64\fP (aka \fBamd64\fP) architecture. +.RE +.sp +\fB\-static, \-linkwith\-a\fP +.RS 4 +statically link the program with the runtime system. Static linking +is the default on \fBLinux\fP and \fBFreeBSD\fP. +.RE +.sp +\fB\-dynamic, \-linkwith\-so\fP +.RS 4 +dynamically link the program with the runtime system. +.RE +.SH "AUTHOR" +.sp +\fBHeap2exec\fP was written by Matthias Blume. +.SH "SEE\-ALSO" +.sp +sml(1) +.SH "BUGS" +.sp +\fBHeap2exec\fP is limited to the \fBx86\fP and \fBamd64\fP architectures and to the +\fBmacOS\fP, \fBLinux\fP, and \fBFreeBSD\fP operating systems. +.SH "COPYING" +.sp +Copyright \(co 2020 The Fellowship of SML/NJ +.sp +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. \ No newline at end of file diff --git a/doc/man/man1/ml-antlr.1 b/doc/man/man1/ml-antlr.1 new file mode 100644 index 0000000..bcffd3b --- /dev/null +++ b/doc/man/man1/ml-antlr.1 @@ -0,0 +1,80 @@ +'\" t +.\" Title: ml-antlr +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.21 +.\" Date: 2024-03-14 +.\" Manual: \ \& +.\" Source: SML/NJ +.\" Language: English +.\" +.TH "ML\-ANTLR" "1" "2024-03-14" "SML/NJ" "\ \&" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +ml-antlr \- an LL(k) parser generator for Standard ML +.SH "SYNOPSIS" +.sp +\fBml\-antlr\fP [\fIOPTIONS\fP] \fIfile\fP +.SH "DESCRIPTION" +.sp +\fBML\-Antlr\fP is an LL(k) parser generator for Standard ML that is loosely modeled +on Terence Parr\(cqs \fBANTLR\fP parser generator. +.SH "OPTIONS" +.sp +\fB\-32\fP +.RS 4 +run the 32\-bit version of the program (currently the default). +.RE +.sp +\fB\-64\fP +.RS 4 +run the 64\-bit version of the program. This option only applies to +the \fBx86\-64\fP (aka \fBamd64\fP) architecture. +.RE +.sp +\fB\-\-dot\fP +.RS 4 +generate DOT output (\c +.URL "http://www.graphviz.org" "" ")." +The generated file will be named \f(CRfile.dot\fP, +where \f(CRfile\fP is the input file. +.RE +.sp +\fB\-\-latex\fP +.RS 4 +generate a simple LaTeX version of the grammar, named \f(CRfile.tex\fP. +.RE +.sp +\fB\-\-unit\-actions\fP +.RS 4 +ignore the action code in the grammar, and instead return \f(CRunit\fP for every production. +.RE +.SH "AUTHOR" +.sp +\fBML\-Antlr\fP was written by Aaron Turon. +.SH "SEE\-ALSO" +.sp +ml\-ulex(1), ml\-yacc(1) +.SH "COPYING" +.sp +Copyright \(co 2020 The Fellowship of SML/NJ +.sp +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. \ No newline at end of file diff --git a/doc/man/man1/ml-build.1 b/doc/man/man1/ml-build.1 new file mode 100644 index 0000000..0fcecfc --- /dev/null +++ b/doc/man/man1/ml-build.1 @@ -0,0 +1,93 @@ +'\" t +.\" Title: ml-build +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.21 +.\" Date: 2024-03-14 +.\" Manual: \ \& +.\" Source: SML/NJ +.\" Language: English +.\" +.TH "ML\-BUILD" "1" "2024-03-14" "SML/NJ" "\ \&" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +ml-build \- command\-line build tool for SML/NJ programs +.SH "SYNOPSIS" +.sp +\fBml\-build\fP [\fIOPTIONS\fP] \fIgroup.cm\fP [\fImain\fP [\fIheap\-image\fP]] +.SH "DESCRIPTION" +.sp +\fBML\-Build\fP is a command\-line tool for building applications from SML source +files using the Compilation Manager and SML/NJ compiler. +.SH "OPTIONS" +.sp +\fB\-C\fP\fIctl\fP=\fIvalue\fP +.RS 4 +set the control \fIctl\fP to \fIvalue\fP. +.RE +.sp +\fB\-D\fP\fIname\fP=\fIvalue\fP +.RS 4 +define the CM variable \fIname\fP to have the given value. +.RE +.sp +\fB\-D\fP\*(Aqname +.RS 4 +define the CM variable \fIname\fP to have the value 1. +.RE +.sp +\fB\-U\fP\fIname\fP +.RS 4 +remove any definition of the CM variable \fIname\fP. +.RE +.sp +\fB\-S\fP \fIsetup.cm\fP +.RS 4 +load and execute the code specified by the CM file \fIsetup.cm\fP prior to the main +build process. This option allows one to customize the compiler via side\-effect. +.RE +.SH "EXAMPLES" +.sp +\f(CRml\-build foo.cm\fP +.RS 4 +builds the program \f(CRfoo\fP assuming a main function named \f(CRTest.main\fP. +.RE +.sp +\f(CRml\-build foo.cm Main.main\fP +.RS 4 +builds the program \f(CRfoo\fP with main function named \f(CRMain.main\fP. +.RE +.sp +\f(CRml\-build sources.cm Main.main prog\fP +.RS 4 +builds the program \f(CRprog\fP with main function named \f(CRMain.main\fP. +.RE +.SH "AUTHOR" +.sp +\fBML\-Build\fP was written by Matthias Blume. +.SH "SEE\-ALSO" +.sp +\fBml\-makedepend\fP(1), \fBsml\fP(1), \fBsmlnj\fP(7) +.SH "COPYING" +.sp +Copyright \(co 2020 The Fellowship of SML/NJ +.sp +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. \ No newline at end of file diff --git a/doc/man/man1/ml-burg.1 b/doc/man/man1/ml-burg.1 new file mode 100644 index 0000000..1d17c61 --- /dev/null +++ b/doc/man/man1/ml-burg.1 @@ -0,0 +1,59 @@ +'\" t +.\" Title: ml-burg +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.21 +.\" Date: 2024-03-14 +.\" Manual: \ \& +.\" Source: SML/NJ +.\" Language: English +.\" +.TH "ML\-BURG" "1" "2024-03-14" "SML/NJ" "\ \&" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +ml-burg \- a tree parser generator for Standard ML +.SH "SYNOPSIS" +.sp +\fBml\-burg\fP \fIfile\fP +.SH "DESCRIPTION" +.sp +The \fBml\-burg\fP program generates a \fBStandard ML\fP program to perform +bottom\-up rewriting of an input tree. Cost information associated +with each rewrite rule is used to derive the minimum rewrite cost +for the entire tree. A successful reduction corresponds to rewriting +the input tree to a special non\-terminal symbol called the +\fIstart non\-terminal\fP. Upon successful reduction, facilities are +provided to walk the tree emitting semantic actions corresponding to +the rules that matched. +.SH "AUTHOR" +.sp +\fBML\-Burg\fP was written by Lal George and Florent Guillaume. +.SH "SEE\-ALSO" +.sp +sml(1) +.sp +\fIML\-Burg \(em Documentation\fP by Florent Guillaume and Lal George (included +in the \fBSML/NJ\fP documentation). +.SH "COPYING" +.sp +Copyright \(co 2020 The Fellowship of SML/NJ +.sp +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. \ No newline at end of file diff --git a/doc/man/man1/ml-lex.1 b/doc/man/man1/ml-lex.1 new file mode 100644 index 0000000..85974ba --- /dev/null +++ b/doc/man/man1/ml-lex.1 @@ -0,0 +1,56 @@ +'\" t +.\" Title: ml-lex +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.21 +.\" Date: 2024-03-14 +.\" Manual: \ \& +.\" Source: SML/NJ +.\" Language: English +.\" +.TH "ML\-LEX" "1" "2024-03-14" "SML/NJ" "\ \&" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +ml-lex \- a scanner generator for *Standard ML*. +.SH "SYNOPSIS" +.sp +\fBml\-lex\fP \fIfile\fP +.SH "DESCRIPTION" +.sp +\fBML\-Lex\fP is a scanner generator for \fBStandard ML\fP. +.sp +This software is deprecated; we recommend using the \fBml\-ulex\fP(1) tool +in \fBml\-lex\fP\-compatibility mode instead. +.SH "AUTHOR" +.sp +ml\-lex(1) was written by James S. Mattson and David Tarditi. +.SH "SEE\-ALSO" +.sp +ml\-ulex(1) +.sp +\c +.URL "https://smlnj.cs.uchicago.edu/doc/ML\-Lex/manual.html" "ML\-Lex User\(cqs Manual" +(also included in the \fBSML/NJ\fP documentation). +.SH "COPYING" +.sp +Copyright \(co 2020 The Fellowship of SML/NJ +.sp +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. \ No newline at end of file diff --git a/doc/man/man1/ml-makedepend.1 b/doc/man/man1/ml-makedepend.1 new file mode 100644 index 0000000..71929a6 --- /dev/null +++ b/doc/man/man1/ml-makedepend.1 @@ -0,0 +1,97 @@ +'\" t +.\" Title: ml-makedepend +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.21 +.\" Date: 2024-03-14 +.\" Manual: \ \& +.\" Source: SML/NJ +.\" Language: English +.\" +.TH "ML\-MAKEDEPEND" "1" "2024-03-14" "SML/NJ" "\ \&" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +ml-makedepend \- makefile dependency generator for SML/NJ programs +.SH "SYNOPSIS" +.sp +\fBml\-makedepend\fP [\fIOPTIONS\fP] \fIproject.cm\fP \fItarget\fP +.SH "DESCRIPTION" +.sp +\fBML\-Makedepend\fP is a tool for generating dependency information to allow the +Unix \fBmake\fP(1) program to be used to build SML/NJ programs. +.SH "OPTIONS" +.sp +\fB\-f\fP \fImakefile\fP +.RS 4 +Specify the name of the makefile to which the dependency information is appended. If this +option is \fInot\fP given, then the output is appended to the end of the file \f(CRmakefile\fP +(or \f(CRMakefile\fP). It is an error if neither file exists and the \fB\-f\fP option is not specified. +.RE +.sp +\fB\-a\fP \fIarch\fP +.RS 4 +specifies the name a make variable for architecture\-specific filenames and paths +(the default is \f(CRARCH\fP). +.RE +.sp +\fB\-o\fP \fIos\fP +.RS 4 +specifies the name a make variable for operating\-system\-specific filenames and paths +(the default is \f(CROPSYS\fP). +.RE +.sp +\fB\-n\fP +.RS 4 +generates full filenames and paths with any architecture or operating\-system\-specific +parts expanded out to their definition for the host system. If this option is specified, +then any \fB\-a\fP and \fB\-o\fP options are ignored. +.RE +.sp +\fB\-C\fP\fIctl\fP=\fIvalue\fP +.RS 4 +set the control \fIctl\fP to \fIvalue\fP. +.RE +.sp +\fB\-D\fP\fIname\fP=\fIvalue\fP +.RS 4 +define the CM variable \fIname\fP to have the given value. +.RE +.sp +\fB\-D\fP\*(Aqname +.RS 4 +define the CM variable \fIname\fP to have the value 1. +.RE +.sp +\fB\-U\fP\fIname\fP +.RS 4 +remove any definition of the CM variable \fIname\fP. +.RE +.SH "AUTHOR" +.sp +\fBML\-Makedepend\fP was written by Matthias Blume. +.SH "SEE\-ALSO" +.sp +\fBml\-build\fP(1), \fBsml\fP(1), \fBsmlnj\fP(7) +.SH "COPYING" +.sp +Copyright \(co 2020 The Fellowship of SML/NJ +.sp +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. \ No newline at end of file diff --git a/doc/man/man1/ml-nlffigen.1 b/doc/man/man1/ml-nlffigen.1 new file mode 100644 index 0000000..d583221 --- /dev/null +++ b/doc/man/man1/ml-nlffigen.1 @@ -0,0 +1,227 @@ +'\" t +.\" Title: ml-nlffigen +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.21 +.\" Date: 2024-03-14 +.\" Manual: \ \& +.\" Source: SML/NJ +.\" Language: English +.\" +.TH "ML\-NLFFIGEN" "1" "2024-03-14" "SML/NJ" "\ \&" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +ml-nlffigen \- foreign function glue\-code generator for Standard ML +.SH "SYNOPSIS" +.sp +\fBml\-nlffigen\fP [\fIOPTIONS\fP] \fIfile\fP +.SH "DESCRIPTION" +.sp +\fBML\-Nlffigen\fP is a tool for generating glue code from \fBC\fP language +header files. The generator reads \fBC\fP source code and emits \fBSML\fP +code along with a description file for the compilation manager (CM). +.SH "OPTIONS" +.sp +\fB\-d, \-dir\fP \fIDIR\fP +.RS 4 +Specify the output directory where all generated files are +placed (default \f(CRNLFFI\-Generated\fP). +.RE +.sp +\fB\-allSU\fP +.RS 4 +instructs \fBml\-nlffigen\fP to include all structs and unions, +even those that are defined in included files, as opposed +to files explicitly listed as arguments. +.RE +.sp +\fB\-width\fP \fIWID\fP +.RS 4 +set output line width (just a guess) to \fIWID\fP (default \f(CR75\fP). +.RE +.sp +\fB\-smloption\fP \fIOPT\fP +.RS 4 +instructs \fBml\-nlffigen\fP to add \fIOPT\fP to the list +of options to annotate \f(CR.sml\fP entries in the generated \f(CR.cm\fP +file with. By default, the list consists just of "\f(CRnoguid\fP." +.RE +.sp +\fB\-guid\fP +.RS 4 +Removes the default "noguid" from the list of sml options. +This option re\-enables strict handling of type\- and object\-identity +but can have negative impact on CM cutoff recompilation +performance if the programmer routinely removes the entire +tree of ml\-nlffigen\-generated files during development. +.RE +.sp +\fB\-t, \-target\fP \fITGT\fP +.RS 4 +Sets the target to \fITGT\fP (which must be one of "sparc\-unix", +"x86\-unix", or "x86\-win32"). The default is the host architecture. +.RE +.sp +\fB\-l, \-light\fP +.RS 4 +suppress "heavy" versions of function wrappers and +field accessors; also cancels any earlier \fB\-heavy\fP option. +.RE +.sp +\fB\-h, \-heavy\fP +.RS 4 +suppress "light" versions of function wrappers and +field accessors; also cancels any earlier \fB\-light\fP option. +.RE +.sp +\fB\-na, \-namedargs\fP +.RS 4 +instructs \fBml\-nlffigen\fP to generated function wrappers that +use named arguments (\fBSML\fP records) instead of tuples if +there is enough information for this in the \fBC\fP source. +.RE +.sp +\fB\-nocollect\fP +.RS 4 +Do not collect enum constants from truly unnamed enumerations +(those without tags that occur at toplevel or in an unnamed +context; \fIi.e.\fP, not in a typedef or another named struct +or union) into a single artificial enumeration tagged by \f(CR\*(Aq\fP +(single apostrohe). The corresponding SML\-side representative +will be a structure named \f(CRE_\*(Aq\fP. +.RE +.sp +\fB\-ec, \-enum\-constructors\fP +.RS 4 +When possible (i.e., if all values of a given enumeration +are different from each other), make the ML representation +type of the enumeration a datatype. The default (and +fallback) is to make that type the same as MLRep.Signed.int. +.RE +.sp +\fB\-lh \-libhandle\fP \fIH\fP +.RS 4 +Use the variable \fIH\fP to refer to the handle to the +shared library object. Given the constraints of CM, the +argument \fIH\fP must have the form of a long SML identifier; +\fIe.g.\fP, \f(CRMyLibrary.libhandle\fP (default \f(CRLibrary.libh\fP). +.RE +.sp +\fB\-add, \-include\fP \fIfile\fP +.RS 4 +Include \fIfile\fP in the generated \f(CR.cm\fP file. This option +is necessary at least once for providing the library handle. +It can be used arbitrarily many times, resulting in more +than one such programmer\-supplied file to be mentioned. +If \fIfile\fP is a relative path, then it must be relative to +the directory specified in the \fB\-dir\fP option. +.RE +.sp +\fB\-cm \-cmfile\fP \fIfile\fP +.RS 4 +Specifies the name of the generated \f(CR.cm\fP file, relative to the directory +specified by the \fB\-dir\fP option (default \f(CRnlffi\-generated.cm\fP). +.RE +.sp +\fB\-cppopt\fP \fIopt\fP +.RS 4 +The string \fIopt\fP gets added to the list of options to be +passed to the \fBC\fP preprocessor. The list of options +gets substituted for \f(CR%o\fP in the cpp command line template. +.RE +.sp +\fB\-U\fP\fIx\fP +.RS 4 +The option (\fIi.e.\fP, \fB\-U\fP\fIx\fP) is added to the list of cpp options. +.RE +.sp +\fB\-D\fP\fIx\fP +.RS 4 +The option (\fIi.e.\fP, \fB\-D\fP\fIx\fP) is added to the list of cpp options. +.RE +.sp +\fB\-I\fP\fIx\fP +.RS 4 +The option (\fIi.e.\fP, \fB\-I\fP\fIx\fP) is added to the list of cpp options. +.RE +.sp +\fB\-version\fP +.RS 4 +Print the version number of \fBml\-nlffigen\fP to standard output and then quit. +.RE +.sp +\fB\-m, \-match\fP \fIRE\fP +.RS 4 +Normally \fBml\-nlffigen\fP will include ML definitions for a \fBC\fP +declaration if the \fBC\fP declaration textually appears in +one of the files specified at the command line. Definitions +in included files will normally not appear (unless +their absence would lead to inconsistencies). +By specifying \fB\-match\fP \fIRE\fP, \fBml\-nlffigen\fP will also include +definitions that occur in recursively included files +for which the AWK\-style regular expression \fIRE\fP matches +their names. +.RE +.sp +\fB\-p, \-prefix\fP \fIP\fP +.RS 4 +Generated \fBSML\fP structure names will all have the prefix \fIP\fP +(in addition to the usual "\f(CRS_\fP" or "\f(CRU_\fP" or "\f(CRF_\fP"). +.RE +.sp +\fB\-g, \-gensym\fP \fIG\fP +.RS 4 +Names generated by \fBml\-nlffigen\fP (for anonymous struct/union/enums) +will get \f(CR_\fP\fIG\fP as an additional suffix. This option should +be used if output from several indepdendent runs of +\fBml\-nlffigen\fP are to coexist in the same ML program. +.RE +.sp +\fB\-\-\fP +.RS 4 +Terminate processing of options, remaining arguments are +taken to be \fBC\fP sources. +.RE +.SH "ENVIRONMENT" +.sp +\fBML\-Nlffigen\fP looks at the environment variable \f(CRFFIGEN_CPP\fP to obtain +the template string for the \fBC\fP\-Preprocessor command line. If \f(CRFFIGEN_CPP\fP is not +set, the template defaults to "\f(CRgcc \-E \-U__GNUC__ %o %s > %t\fP." +The actual command line is obtained by substituting occurences of +\f(CR%s\fP with the name of the source, and \f(CR%t\fP with the name of a temporary +file holding the pre\-processed code. +.SH "AUTHOR" +.sp +\fBML\-Nlffigen\fP was written by Matthias Blume. +.SH "SEE\-ALSO" +.sp +sml(1) +.sp +\fINLFFI \(em A new SML/NJ Foreign\-Function Interface (User Manual)\fP (included in +the \fBSML/NJ\fP documentation). +.SH "BUGS" +.sp +\fBML\-Nlffigen\fP does not yet support 64\-bit platforms. +.SH "COPYING" +.sp +Copyright (\fBC\fP) 2020 The Fellowship of SML/NJ +.sp +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. \ No newline at end of file diff --git a/doc/man/man1/ml-ulex.1 b/doc/man/man1/ml-ulex.1 new file mode 100644 index 0000000..f8cd725 --- /dev/null +++ b/doc/man/man1/ml-ulex.1 @@ -0,0 +1,111 @@ +'\" t +.\" Title: ml-ulex +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.21 +.\" Date: 2024-03-14 +.\" Manual: \ \& +.\" Source: SML/NJ +.\" Language: English +.\" +.TH "ML\-ULEX" "1" "2024-03-14" "SML/NJ" "\ \&" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +ml-ulex \- a Unicode\-compatible lexical analyser generator for Standard ML +.SH "SYNOPSIS" +.sp +\fBml\-ulex\fP [\fIOPTIONS\fP] \fIfile\fP +.SH "DESCRIPTION" +.sp +\fBML\-Ulex\fP is a lexer generator that supports Unicode characters (in UTF\-8 representation) and +an extended form of regular expressions. +.SH "OPTIONS" +.sp +\fB\-32\fP +.RS 4 +run the 32\-bit version of the program (currently the default). +.RE +.sp +\fB\-64\fP +.RS 4 +run the 64\-bit version of the program. This option only applies to +the \fBx86\-64\fP (aka \fBamd64\fP) architecture. +.RE +.sp +\fB\-\-dot\fP +.RS 4 +generate DOT output (\c +.URL "http://www.graphviz.org" "" ")." +The generated file will be named \f(CRfile.dot\fP, +where \f(CRfile\fP is the input file. +.RE +.sp +\fB\-\-match\fP +.RS 4 +enter interactive matching mode. This will allow interactive testing of the machine; presently, +only the \f(CRINITIAL\fP start state is available for testing +(see the \c +.URL "http://www.smlnj.org/doc/ml\-lpt/manual.pdf" "User Guide" "" +for details on start states). +.RE +.sp +\fB\-\-ml\-lex\-mode\fP +.RS 4 +operate in \fBml\-lex\fP compatibility mode. +.RE +.sp +\fB\-\-table\-based\fP +.RS 4 +generate a table\-based lexer. +.RE +.sp +\fB\-\-fn\-based\fP +.RS 4 +generate a lexer that represents states as functions and transitions as tail calls. +.RE +.sp +\fB\-\-minimize\fP +.RS 4 +generate a minimal machine. Note that this is slow, and is almost never necessary. +.RE +.sp +\fB\-\-strict\-sml\fP +.RS 4 +generate strict SML (\fIi.e.\fP, do not use SML/NJ extensions). This flag +is useful if you want to use the output with a different SML system. +.RE +.SH "AUTHOR" +.sp +\fBML\-Ulex\fP was written by Aaron Turon. +.SH "SEE\-ALSO" +.sp +ml\-antlr(1), ml\-yacc(1) +.sp +The \c +.URL "https://www.smlnj.org/doc/ml\-lpt/manual.pdf" "\fISML/NJ Language Processing Tools: User Guide\fP" "." +.sp +\fBRegular\-expression derivatives reexamined\fP by Scott Owens, John Reppy, and Aaron Turon. +\fIJournal of Functional Programming\fP, 19(2):173\-190, 2009. +.SH "COPYING" +.sp +Copyright \(co 2020 The Fellowship of SML/NJ +.sp +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. \ No newline at end of file diff --git a/doc/man/man1/ml-yacc.1 b/doc/man/man1/ml-yacc.1 new file mode 100644 index 0000000..ecdd92a --- /dev/null +++ b/doc/man/man1/ml-yacc.1 @@ -0,0 +1,54 @@ +'\" t +.\" Title: ml-yacc +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.21 +.\" Date: 2024-03-14 +.\" Manual: \ \& +.\" Source: SML/NJ +.\" Language: English +.\" +.TH "ML\-YACC" "1" "2024-03-14" "SML/NJ" "\ \&" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +ml-yacc \- an LALR(1) parser generator +.SH "SYNOPSIS" +.sp +\fBml\-yacc\fP \fIfile\fP +.SH "DESCRIPTION" +.sp +ML\-Yacc is an LALR(k) parser generator for \fBStandard ML\fP that is modeled +on the YACC parser generator for the \fBC\fP language. +.SH "AUTHOR" +.sp +ml\-yacc(1) was written by David Tarditi. +.SH "SEE\-ALSO" +.sp +ml\-antlr(1), ml\-lex(1), ml\-ulex(1) +.sp +\c +.URL "https://smlnj.cs.uchicago.edu/doc/ML\-Yacc/index.html" "ML\-Yacc User\(cqs Manual" +(also included in the \fBSML/NJ\fP documentation). +.SH "COPYING" +.sp +Copyright \(co 2020 The Fellowship of SML/NJ +.sp +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. \ No newline at end of file diff --git a/doc/man/man1/sml.1 b/doc/man/man1/sml.1 new file mode 100644 index 0000000..cb61e2c --- /dev/null +++ b/doc/man/man1/sml.1 @@ -0,0 +1,215 @@ +'\" t +.\" Title: sml +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.21 +.\" Date: 2024-03-14 +.\" Manual: \ \& +.\" Source: SML/NJ +.\" Language: English +.\" +.TH "SML" "1" "2024-03-14" "SML/NJ" "\ \&" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +sml \- the interactive Standard ML of New Jersey system +.SH "SYNOPSIS" +.sp +\fBsml\fP [\fISML\-OPTIONS\fP] [\fIOPTIONS\fP] [\fIFILES\fP] +.SH "DESCRIPTION" +.sp +\fBStandard ML of New Jersey\fP is an incremental, interactive compiler that +accepts \fBSML\fP declarations and expressions to be evaluated from standard input. +.SH "OPTIONS" +.sp +\fB\-32\fP +.RS 4 +run the 32\-bit version of the SML/NJ system (currently the default). +.RE +.sp +\fB\-64\fP +.RS 4 +run the 64\-bit version of the SML/NJ system. This option only applies to +the \fBx86\-64\fP (aka \fBamd64\fP) architecture. +.RE +.sp +\fB@SMLwordsize\fP +.RS 4 +echo the wordsize (\fIi.e.\fP, either \f(CR32\fP or \f(CR64\fP) of the system to standard +output and then exit. (This option was added in version 110.97). +.RE +.sp +\fB@SMLversion\fP +.RS 4 +echo the command name and SML/NJ version (\fIe.g.\fP, "\f(CRsml 110.99.5\fP") +to standard output and then exit. +.RE +.sp +\fB@SMLload\fP=\fIIMAGE\fP +.RS 4 +specifies the name of the heap\-image file to load. The heap suffix can be omitted +as long as there there is not a file of the same name. +.RE +.sp +\fB@SMLcmdname\fP=\fINAME\fP +.RS 4 +set the command name; this is the value returned by \f(CRCommandLine.name()\fP. +.RE +.sp +\fB@SMLsuffix\fP +.RS 4 +echo the heap suffix for the system to standard output and then exit +.RE +.sp +\fB@SMLalloc\fP=\fISIZE\fP +.RS 4 +Specify the prefered size of the allocation area +.RE +.sp +\fB@SMLrun\fP=\fIRUNTIME\fP +.RS 4 +specifies runtime system +.RE +.sp +\fB@SMLquiet\fP +.RS 4 +load heap image silently (default) +.RE +.sp +\fB@SMLverbose\fP +.RS 4 +show heap image load progress +.RE +.sp +\fB@SMLobjects\fP +.RS 4 +show list of executable objects +.RE +.sp +\fB@SMLdebug\fP=\fIFILE\fP +.RS 4 +write debugging info to file +.RE +.sp +\fB\-C\fP\fIctl\fP=\fIvalue\fP +.RS 4 +set the control \fIctl\fP to \fIvalue\fP. +.RE +.sp +\fB\-D\fP\fIname\fP=\fIvalue\fP +.RS 4 +define the CM variable \fIname\fP to have the given value. +.RE +.sp +\fB\-D\fP\*(Aqname +.RS 4 +define the CM variable \fIname\fP to have the value 1. +.RE +.sp +\fB\-U\fP\fIname\fP +.RS 4 +remove any definition of the CM variable \fIname\fP. +.RE +.sp +\fB\-H\fP +.RS 4 +produce complete help listing +.RE +.sp +\fB\-h\fP +.RS 4 +produce minimal help listing +.RE +.sp +\fB\-h\fP\fIlevel\fP +.RS 4 +help with obscurity limit +.RE +.sp +\fB\-S\fP +.RS 4 +list all the controls along with their default value +.RE +.sp +\fB\-s\fP\fIlevel\fP +.RS 4 +print a limited list of settings. +The number of levels of the control heirarchy is controled by \fIlevel\fP, with 0 printing +just the root of the heirarchy. +.RE +.sp +\fB\-E\fP +.RS 4 +list all the controls along with their corresponding environment variables +.RE +.sp +\fB\-e\fP\fIlevel\fP +.RS 4 +print a limited list of environment variables; +The number of levels of the control heirarchy is controled by \fIlevel\fP, with 0 printing +just the root of the heirarchy. +.RE +.sp +\fB\-m\fP +.RS 4 +switch to CM.make +.RE +.sp +\fB\-a\fP +.RS 4 +switch to CM.autoload; default +.RE +.sp +\fIfile.cm\fP +.RS 4 +run \f(CRCM.make\fP or \f(CRCM.autoload\fP on \fIfile.cm\fP and then return to the SML prompt. +Which command is run is determined by the \fB\-m\fP and \fB\-a\fP flags. +.RE +.sp +\fIfile.sig\fP +.RS 4 +execute \f(CRuse\fP \fIfile.sig\fP before entering the SML top\-level loop. +.RE +.sp +\fIfile.sml\fP +.RS 4 +(use) +execute \f(CRuse\fP \fIfile.sml\fP before entering the SML top\-level loop. +.RE +.sp +\fIfile.fun\fP +.RS 4 +(use) +execute \f(CRuse\fP \fIfile.fun\fP before entering the SML top\-level loop. +.RE +.SH "AUTHOR" +.sp +sml(1) was originally created by Andrew Appel and David MacQueen in 1987. +The system is currently maintained by Matthias Blume, David MacQueen, and John Reppy. +.SH "SEE\-ALSO" +.sp +smlnj(7) +.sp +.URL "https://smlnj.org" "\fIStandard ML of New Jersey website\fP" "" +.SH "COPYING" +.sp +Copyright \(co 2020 The Fellowship of SML/NJ +.sp +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. \ No newline at end of file diff --git a/doc/man/man7/smlnj.7 b/doc/man/man7/smlnj.7 new file mode 100644 index 0000000..d41d9ef --- /dev/null +++ b/doc/man/man7/smlnj.7 @@ -0,0 +1,90 @@ +'\" t +.\" Title: smlnj +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.21 +.\" Date: 2024-03-14 +.\" Manual: \ \& +.\" Source: SML/NJ +.\" Language: English +.\" +.TH "SMLNJ" "7" "2024-03-14" "SML/NJ" "\ \&" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +smlnj \- an implementation of the Standard ML language with supporting tools and libraries. +.SH "DESCRIPTION" +.sp +Compiled programs in the SML/NJ system are represented using a pair of the +runtime system executable and a \fIheap image\fP file. Heap images have a filename +suffix that is based on the architecture and operating system. +.SH "AUTHOR" +.sp +The Standard ML of New Jersey system was originally created by Andrew Appel +and David MacQueen in 1987, and is currently supported by Matthias Blume, +David MacQueen, and John Reppy. +.sp +Many people have contributed to SML/NJ over the 30+ years since the +project was started. These include (but are not limited to) +William Aitken +Lars Bergstrom, +Matthias Blume, +Pierre Cregut, +Adam T. Dingle, +Damien Doligez, +Scott Draves, +Bruce F. Duba +Emden Gansner, +Lal George, +Georges Gonthier, +Yngvi Guttesen, +Lorenz Huelsbergen, +Trevor Jim, +George Kuan, +Christopher League, +Mark Leone, +Allen Leung, +Stefan Monnier, +Greg Morrisett, +Riccardo Pucella, +Mike Rainey, +Norman Ramsey, +Jon Riehl, +John Reppy, +Gene Rollins, +Nick Rothwell, +Bratin Saha, +Zhong Shao, +Konrad Slind, +David Tarditti, +Andrew Tolmach, +Valery Trifonov, +Aaron Turon, +and Peter Weinberger. +.SH "SEE\-ALSO" +.sp +sml(1) +.sp +.URL "https://smlnj.org" "\fIStandard ML of New Jersey website\fP" "" +.SH "COPYING" +.sp +Copyright \(co 2020 The Fellowship of SML/NJ +.sp +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. \ No newline at end of file diff --git a/doc/pdf/asdl-manual.pdf b/doc/pdf/asdl-manual.pdf new file mode 100644 index 0000000..9d54767 Binary files /dev/null and b/doc/pdf/asdl-manual.pdf differ diff --git a/doc/pdf/btcomp-manual.pdf b/doc/pdf/btcomp-manual.pdf new file mode 100644 index 0000000..a734951 Binary files /dev/null and b/doc/pdf/btcomp-manual.pdf differ diff --git a/doc/pdf/cm-manual.pdf b/doc/pdf/cm-manual.pdf new file mode 100644 index 0000000..3f6e1b3 Binary files /dev/null and b/doc/pdf/cm-manual.pdf differ diff --git a/doc/pdf/ml-burg-manual.pdf b/doc/pdf/ml-burg-manual.pdf new file mode 100644 index 0000000..0e13666 Binary files /dev/null and b/doc/pdf/ml-burg-manual.pdf differ diff --git a/doc/pdf/ml-lex-manual.pdf b/doc/pdf/ml-lex-manual.pdf new file mode 100644 index 0000000..59373b7 Binary files /dev/null and b/doc/pdf/ml-lex-manual.pdf differ diff --git a/doc/pdf/ml-lpt-manual.pdf b/doc/pdf/ml-lpt-manual.pdf new file mode 100644 index 0000000..8fefa34 Binary files /dev/null and b/doc/pdf/ml-lpt-manual.pdf differ diff --git a/doc/pdf/ml-yacc-manual.pdf b/doc/pdf/ml-yacc-manual.pdf new file mode 100644 index 0000000..a13d140 Binary files /dev/null and b/doc/pdf/ml-yacc-manual.pdf differ diff --git a/doc/pdf/nlffi-manual.pdf b/doc/pdf/nlffi-manual.pdf new file mode 100644 index 0000000..7c94edb Binary files /dev/null and b/doc/pdf/nlffi-manual.pdf differ diff --git a/lib/SMLNJ-BASIS/.cm/amd64-unix/basis-common.cm b/lib/SMLNJ-BASIS/.cm/amd64-unix/basis-common.cm new file mode 100644 index 0000000..e865ade Binary files /dev/null and b/lib/SMLNJ-BASIS/.cm/amd64-unix/basis-common.cm differ diff --git a/lib/SMLNJ-BASIS/.cm/amd64-unix/basis.cm b/lib/SMLNJ-BASIS/.cm/amd64-unix/basis.cm new file mode 100644 index 0000000..60777b6 Binary files /dev/null and b/lib/SMLNJ-BASIS/.cm/amd64-unix/basis.cm differ diff --git a/lib/SMLNJ-LIB/Controls/.cm/amd64-unix/controls-lib.cm b/lib/SMLNJ-LIB/Controls/.cm/amd64-unix/controls-lib.cm new file mode 100644 index 0000000..fce9b4c Binary files /dev/null and b/lib/SMLNJ-LIB/Controls/.cm/amd64-unix/controls-lib.cm differ diff --git a/lib/SMLNJ-LIB/PP/.cm/amd64-unix/pp-lib.cm b/lib/SMLNJ-LIB/PP/.cm/amd64-unix/pp-lib.cm new file mode 100644 index 0000000..1e01b8f Binary files /dev/null and b/lib/SMLNJ-LIB/PP/.cm/amd64-unix/pp-lib.cm differ diff --git a/lib/SMLNJ-LIB/Util/.cm/amd64-unix/smlnj-lib.cm b/lib/SMLNJ-LIB/Util/.cm/amd64-unix/smlnj-lib.cm new file mode 100644 index 0000000..82c974e Binary files /dev/null and b/lib/SMLNJ-LIB/Util/.cm/amd64-unix/smlnj-lib.cm differ diff --git a/lib/SMLNJ-ML-YACC-LIB/.cm/amd64-unix/ml-yacc-lib.cm b/lib/SMLNJ-ML-YACC-LIB/.cm/amd64-unix/ml-yacc-lib.cm new file mode 100644 index 0000000..3d8f35c Binary files /dev/null and b/lib/SMLNJ-ML-YACC-LIB/.cm/amd64-unix/ml-yacc-lib.cm differ diff --git a/lib/SMLNJ-MLRISC/.cm/amd64-unix/AMD64-Peephole.cm b/lib/SMLNJ-MLRISC/.cm/amd64-unix/AMD64-Peephole.cm new file mode 100644 index 0000000..f1d9e98 Binary files /dev/null and b/lib/SMLNJ-MLRISC/.cm/amd64-unix/AMD64-Peephole.cm differ diff --git a/lib/SMLNJ-MLRISC/.cm/amd64-unix/AMD64.cm b/lib/SMLNJ-MLRISC/.cm/amd64-unix/AMD64.cm new file mode 100644 index 0000000..9c22a91 Binary files /dev/null and b/lib/SMLNJ-MLRISC/.cm/amd64-unix/AMD64.cm differ diff --git a/lib/SMLNJ-MLRISC/.cm/amd64-unix/CCall-Vararg.cm b/lib/SMLNJ-MLRISC/.cm/amd64-unix/CCall-Vararg.cm new file mode 100644 index 0000000..ad0361c Binary files /dev/null and b/lib/SMLNJ-MLRISC/.cm/amd64-unix/CCall-Vararg.cm differ diff --git a/lib/SMLNJ-MLRISC/.cm/amd64-unix/CCall-sparc.cm b/lib/SMLNJ-MLRISC/.cm/amd64-unix/CCall-sparc.cm new file mode 100644 index 0000000..ddc2b78 Binary files /dev/null and b/lib/SMLNJ-MLRISC/.cm/amd64-unix/CCall-sparc.cm differ diff --git a/lib/SMLNJ-MLRISC/.cm/amd64-unix/CCall-x86-64.cm b/lib/SMLNJ-MLRISC/.cm/amd64-unix/CCall-x86-64.cm new file mode 100644 index 0000000..3aa0cd4 Binary files /dev/null and b/lib/SMLNJ-MLRISC/.cm/amd64-unix/CCall-x86-64.cm differ diff --git a/lib/SMLNJ-MLRISC/.cm/amd64-unix/CCall-x86.cm b/lib/SMLNJ-MLRISC/.cm/amd64-unix/CCall-x86.cm new file mode 100644 index 0000000..730d4a8 Binary files /dev/null and b/lib/SMLNJ-MLRISC/.cm/amd64-unix/CCall-x86.cm differ diff --git a/lib/SMLNJ-MLRISC/.cm/amd64-unix/CCall.cm b/lib/SMLNJ-MLRISC/.cm/amd64-unix/CCall.cm new file mode 100644 index 0000000..4f577ce Binary files /dev/null and b/lib/SMLNJ-MLRISC/.cm/amd64-unix/CCall.cm differ diff --git a/lib/SMLNJ-MLRISC/.cm/amd64-unix/Control.cm b/lib/SMLNJ-MLRISC/.cm/amd64-unix/Control.cm new file mode 100644 index 0000000..a1cac62 Binary files /dev/null and b/lib/SMLNJ-MLRISC/.cm/amd64-unix/Control.cm differ diff --git a/lib/SMLNJ-MLRISC/.cm/amd64-unix/Graphs.cm b/lib/SMLNJ-MLRISC/.cm/amd64-unix/Graphs.cm new file mode 100644 index 0000000..1760728 Binary files /dev/null and b/lib/SMLNJ-MLRISC/.cm/amd64-unix/Graphs.cm differ diff --git a/lib/SMLNJ-MLRISC/.cm/amd64-unix/IA32-Peephole.cm b/lib/SMLNJ-MLRISC/.cm/amd64-unix/IA32-Peephole.cm new file mode 100644 index 0000000..1fcf8a1 Binary files /dev/null and b/lib/SMLNJ-MLRISC/.cm/amd64-unix/IA32-Peephole.cm differ diff --git a/lib/SMLNJ-MLRISC/.cm/amd64-unix/IA32.cm b/lib/SMLNJ-MLRISC/.cm/amd64-unix/IA32.cm new file mode 100644 index 0000000..03cd210 Binary files /dev/null and b/lib/SMLNJ-MLRISC/.cm/amd64-unix/IA32.cm differ diff --git a/lib/SMLNJ-MLRISC/.cm/amd64-unix/Lib.cm b/lib/SMLNJ-MLRISC/.cm/amd64-unix/Lib.cm new file mode 100644 index 0000000..1752c42 Binary files /dev/null and b/lib/SMLNJ-MLRISC/.cm/amd64-unix/Lib.cm differ diff --git a/lib/SMLNJ-MLRISC/.cm/amd64-unix/MLRISC.cm b/lib/SMLNJ-MLRISC/.cm/amd64-unix/MLRISC.cm new file mode 100644 index 0000000..b4dcaed Binary files /dev/null and b/lib/SMLNJ-MLRISC/.cm/amd64-unix/MLRISC.cm differ diff --git a/lib/SMLNJ-MLRISC/.cm/amd64-unix/MLTREE.cm b/lib/SMLNJ-MLRISC/.cm/amd64-unix/MLTREE.cm new file mode 100644 index 0000000..a2a4699 Binary files /dev/null and b/lib/SMLNJ-MLRISC/.cm/amd64-unix/MLTREE.cm differ diff --git a/lib/SMLNJ-MLRISC/.cm/amd64-unix/PPC.cm b/lib/SMLNJ-MLRISC/.cm/amd64-unix/PPC.cm new file mode 100644 index 0000000..50f8a11 Binary files /dev/null and b/lib/SMLNJ-MLRISC/.cm/amd64-unix/PPC.cm differ diff --git a/lib/SMLNJ-MLRISC/.cm/amd64-unix/Peephole.cm b/lib/SMLNJ-MLRISC/.cm/amd64-unix/Peephole.cm new file mode 100644 index 0000000..d66e40b Binary files /dev/null and b/lib/SMLNJ-MLRISC/.cm/amd64-unix/Peephole.cm differ diff --git a/lib/SMLNJ-MLRISC/.cm/amd64-unix/RA.cm b/lib/SMLNJ-MLRISC/.cm/amd64-unix/RA.cm new file mode 100644 index 0000000..e0deb67 Binary files /dev/null and b/lib/SMLNJ-MLRISC/.cm/amd64-unix/RA.cm differ diff --git a/lib/SMLNJ-MLRISC/.cm/amd64-unix/SPARC.cm b/lib/SMLNJ-MLRISC/.cm/amd64-unix/SPARC.cm new file mode 100644 index 0000000..49097d8 Binary files /dev/null and b/lib/SMLNJ-MLRISC/.cm/amd64-unix/SPARC.cm differ diff --git a/lib/SMLNJ-MLRISC/.cm/amd64-unix/StagedAlloc.cm b/lib/SMLNJ-MLRISC/.cm/amd64-unix/StagedAlloc.cm new file mode 100644 index 0000000..748b2c3 Binary files /dev/null and b/lib/SMLNJ-MLRISC/.cm/amd64-unix/StagedAlloc.cm differ diff --git a/lib/SMLNJ-MLRISC/.cm/amd64-unix/Visual.cm b/lib/SMLNJ-MLRISC/.cm/amd64-unix/Visual.cm new file mode 100644 index 0000000..395cbc0 Binary files /dev/null and b/lib/SMLNJ-MLRISC/.cm/amd64-unix/Visual.cm differ diff --git a/lib/basis-2004.cm/.cm/amd64-unix/basis-2004.cm b/lib/basis-2004.cm/.cm/amd64-unix/basis-2004.cm new file mode 100644 index 0000000..322d405 Binary files /dev/null and b/lib/basis-2004.cm/.cm/amd64-unix/basis-2004.cm differ diff --git a/lib/burg-ext.cm/.cm/amd64-unix/burg-ext.cm b/lib/burg-ext.cm/.cm/amd64-unix/burg-ext.cm new file mode 100644 index 0000000..c7b09f0 Binary files /dev/null and b/lib/burg-ext.cm/.cm/amd64-unix/burg-ext.cm differ diff --git a/lib/ckit-lib.cm/.cm/amd64-unix/ckit-lib.cm b/lib/ckit-lib.cm/.cm/amd64-unix/ckit-lib.cm new file mode 100644 index 0000000..4604e79 Binary files /dev/null and b/lib/ckit-lib.cm/.cm/amd64-unix/ckit-lib.cm differ diff --git a/lib/cml-lib/.cm/amd64-unix/smlnj-lib.cm b/lib/cml-lib/.cm/amd64-unix/smlnj-lib.cm new file mode 100644 index 0000000..37a78f4 Binary files /dev/null and b/lib/cml-lib/.cm/amd64-unix/smlnj-lib.cm differ diff --git a/lib/cml-lib/.cm/amd64-unix/trace-cml.cm b/lib/cml-lib/.cm/amd64-unix/trace-cml.cm new file mode 100644 index 0000000..54d9a16 Binary files /dev/null and b/lib/cml-lib/.cm/amd64-unix/trace-cml.cm differ diff --git a/lib/cml/.cm/amd64-unix/basis.cm b/lib/cml/.cm/amd64-unix/basis.cm new file mode 100644 index 0000000..18b868a Binary files /dev/null and b/lib/cml/.cm/amd64-unix/basis.cm differ diff --git a/lib/cml/.cm/amd64-unix/cml-internal.cm b/lib/cml/.cm/amd64-unix/cml-internal.cm new file mode 100644 index 0000000..41016e0 Binary files /dev/null and b/lib/cml/.cm/amd64-unix/cml-internal.cm differ diff --git a/lib/cml/.cm/amd64-unix/cml-lib.cm b/lib/cml/.cm/amd64-unix/cml-lib.cm new file mode 100644 index 0000000..2dc850c Binary files /dev/null and b/lib/cml/.cm/amd64-unix/cml-lib.cm differ diff --git a/lib/cml/.cm/amd64-unix/cml.cm b/lib/cml/.cm/amd64-unix/cml.cm new file mode 100644 index 0000000..8e06a18 Binary files /dev/null and b/lib/cml/.cm/amd64-unix/cml.cm differ diff --git a/lib/cml/.cm/amd64-unix/core-cml.cm b/lib/cml/.cm/amd64-unix/core-cml.cm new file mode 100644 index 0000000..7613ddc Binary files /dev/null and b/lib/cml/.cm/amd64-unix/core-cml.cm differ diff --git a/lib/cml/.cm/amd64-unix/inet-lib.cm b/lib/cml/.cm/amd64-unix/inet-lib.cm new file mode 100644 index 0000000..e81011d Binary files /dev/null and b/lib/cml/.cm/amd64-unix/inet-lib.cm differ diff --git a/lib/cml/.cm/amd64-unix/smlnj-lib.cm b/lib/cml/.cm/amd64-unix/smlnj-lib.cm new file mode 100644 index 0000000..21279aa Binary files /dev/null and b/lib/cml/.cm/amd64-unix/smlnj-lib.cm differ diff --git a/lib/cml/.cm/amd64-unix/trace-cml.cm b/lib/cml/.cm/amd64-unix/trace-cml.cm new file mode 100644 index 0000000..6151399 Binary files /dev/null and b/lib/cml/.cm/amd64-unix/trace-cml.cm differ diff --git a/lib/cml/.cm/amd64-unix/unix-lib.cm b/lib/cml/.cm/amd64-unix/unix-lib.cm new file mode 100644 index 0000000..a184c98 Binary files /dev/null and b/lib/cml/.cm/amd64-unix/unix-lib.cm differ diff --git a/lib/compiler/Library/const-arith/.cm/amd64-unix/sources.cm b/lib/compiler/Library/const-arith/.cm/amd64-unix/sources.cm new file mode 100644 index 0000000..f82e8e4 Binary files /dev/null and b/lib/compiler/Library/const-arith/.cm/amd64-unix/sources.cm differ diff --git a/lib/dir-tool.cm/.cm/amd64-unix/dir-tool.cm b/lib/dir-tool.cm/.cm/amd64-unix/dir-tool.cm new file mode 100644 index 0000000..9245496 Binary files /dev/null and b/lib/dir-tool.cm/.cm/amd64-unix/dir-tool.cm differ diff --git a/lib/grm-ext.cm/.cm/amd64-unix/grm-ext.cm b/lib/grm-ext.cm/.cm/amd64-unix/grm-ext.cm new file mode 100644 index 0000000..f674b23 Binary files /dev/null and b/lib/grm-ext.cm/.cm/amd64-unix/grm-ext.cm differ diff --git a/lib/hash-cons-lib.cm/.cm/amd64-unix/hash-cons-lib.cm b/lib/hash-cons-lib.cm/.cm/amd64-unix/hash-cons-lib.cm new file mode 100644 index 0000000..0f7606b Binary files /dev/null and b/lib/hash-cons-lib.cm/.cm/amd64-unix/hash-cons-lib.cm differ diff --git a/lib/html-lib.cm/.cm/amd64-unix/html-lib.cm b/lib/html-lib.cm/.cm/amd64-unix/html-lib.cm new file mode 100644 index 0000000..d699a66 Binary files /dev/null and b/lib/html-lib.cm/.cm/amd64-unix/html-lib.cm differ diff --git a/lib/html4-lib.cm/.cm/amd64-unix/html4-lib.cm b/lib/html4-lib.cm/.cm/amd64-unix/html4-lib.cm new file mode 100644 index 0000000..69d23ca Binary files /dev/null and b/lib/html4-lib.cm/.cm/amd64-unix/html4-lib.cm differ diff --git a/lib/inet-lib.cm/.cm/amd64-unix/inet-lib.cm b/lib/inet-lib.cm/.cm/amd64-unix/inet-lib.cm new file mode 100644 index 0000000..8c03c95 Binary files /dev/null and b/lib/inet-lib.cm/.cm/amd64-unix/inet-lib.cm differ diff --git a/lib/json-lib.cm/.cm/amd64-unix/json-lib.cm b/lib/json-lib.cm/.cm/amd64-unix/json-lib.cm new file mode 100644 index 0000000..9de4bf4 Binary files /dev/null and b/lib/json-lib.cm/.cm/amd64-unix/json-lib.cm differ diff --git a/lib/lex-ext.cm/.cm/amd64-unix/lex-ext.cm b/lib/lex-ext.cm/.cm/amd64-unix/lex-ext.cm new file mode 100644 index 0000000..3ca10fb Binary files /dev/null and b/lib/lex-ext.cm/.cm/amd64-unix/lex-ext.cm differ diff --git a/lib/make-tool.cm/.cm/amd64-unix/make-tool.cm b/lib/make-tool.cm/.cm/amd64-unix/make-tool.cm new file mode 100644 index 0000000..851c780 Binary files /dev/null and b/lib/make-tool.cm/.cm/amd64-unix/make-tool.cm differ diff --git a/lib/ml-antlr-tool.cm/.cm/amd64-unix/ml-antlr-tool.cm b/lib/ml-antlr-tool.cm/.cm/amd64-unix/ml-antlr-tool.cm new file mode 100644 index 0000000..90c7a93 Binary files /dev/null and b/lib/ml-antlr-tool.cm/.cm/amd64-unix/ml-antlr-tool.cm differ diff --git a/lib/ml-lpt-lib.cm/.cm/amd64-unix/ml-lpt-lib.cm b/lib/ml-lpt-lib.cm/.cm/amd64-unix/ml-lpt-lib.cm new file mode 100644 index 0000000..d5f0145 Binary files /dev/null and b/lib/ml-lpt-lib.cm/.cm/amd64-unix/ml-lpt-lib.cm differ diff --git a/lib/ml-ulex-tool.cm/.cm/amd64-unix/ml-ulex-tool.cm b/lib/ml-ulex-tool.cm/.cm/amd64-unix/ml-ulex-tool.cm new file mode 100644 index 0000000..447cc3d Binary files /dev/null and b/lib/ml-ulex-tool.cm/.cm/amd64-unix/ml-ulex-tool.cm differ diff --git a/lib/mlburg-tool.cm/.cm/amd64-unix/mlburg-tool.cm b/lib/mlburg-tool.cm/.cm/amd64-unix/mlburg-tool.cm new file mode 100644 index 0000000..7690a01 Binary files /dev/null and b/lib/mlburg-tool.cm/.cm/amd64-unix/mlburg-tool.cm differ diff --git a/lib/mllex-tool.cm/.cm/amd64-unix/mllex-tool.cm b/lib/mllex-tool.cm/.cm/amd64-unix/mllex-tool.cm new file mode 100644 index 0000000..ae5bb02 Binary files /dev/null and b/lib/mllex-tool.cm/.cm/amd64-unix/mllex-tool.cm differ diff --git a/lib/mlyacc-tool.cm/.cm/amd64-unix/mlyacc-tool.cm b/lib/mlyacc-tool.cm/.cm/amd64-unix/mlyacc-tool.cm new file mode 100644 index 0000000..3e2b5bb Binary files /dev/null and b/lib/mlyacc-tool.cm/.cm/amd64-unix/mlyacc-tool.cm differ diff --git a/lib/noweb-tool.cm/.cm/amd64-unix/noweb-tool.cm b/lib/noweb-tool.cm/.cm/amd64-unix/noweb-tool.cm new file mode 100644 index 0000000..3cfbc04 Binary files /dev/null and b/lib/noweb-tool.cm/.cm/amd64-unix/noweb-tool.cm differ diff --git a/lib/nw-ext.cm/.cm/amd64-unix/nw-ext.cm b/lib/nw-ext.cm/.cm/amd64-unix/nw-ext.cm new file mode 100644 index 0000000..f9d5a55 Binary files /dev/null and b/lib/nw-ext.cm/.cm/amd64-unix/nw-ext.cm differ diff --git a/lib/pathconfig b/lib/pathconfig new file mode 100644 index 0000000..555f08f --- /dev/null +++ b/lib/pathconfig @@ -0,0 +1,45 @@ +SMLNJ-BASIS SMLNJ-BASIS +SMLNJ-LIB SMLNJ-LIB +SMLNJ-ML-YACC-LIB SMLNJ-ML-YACC-LIB +SMLNJ-MLRISC SMLNJ-MLRISC +basis-2004.cm basis-2004.cm +basis.cm smlnj/basis +bindir ../bin +burg-ext.cm burg-ext.cm +ckit-lib.cm ckit-lib.cm +cml cml +cml-lib cml-lib +compiler compiler +controls-lib.cm smlnj/smlnj-lib +dir-tool.cm dir-tool.cm +grm-ext.cm grm-ext.cm +hash-cons-lib.cm hash-cons-lib.cm +html-lib.cm html-lib.cm +html4-lib.cm html4-lib.cm +inet-lib.cm inet-lib.cm +json-lib.cm json-lib.cm +lex-ext.cm lex-ext.cm +make-tool.cm make-tool.cm +ml-antlr-tool.cm ml-antlr-tool.cm +ml-lpt-lib.cm ml-lpt-lib.cm +ml-ulex-tool.cm ml-ulex-tool.cm +ml-yacc-lib.cm smlnj/ml-yacc +mlburg-tool.cm mlburg-tool.cm +mllex-tool.cm mllex-tool.cm +mlyacc-tool.cm mlyacc-tool.cm +noweb-tool.cm noweb-tool.cm +nw-ext.cm nw-ext.cm +pgraph.cm pgraph.cm +pickle-lib.cm pickle-lib.cm +pp-extras-lib.cm pp-extras-lib.cm +pp-lib.cm smlnj/smlnj-lib +reactive-lib.cm reactive-lib.cm +regexp-lib.cm regexp-lib.cm +sexp-lib.cm sexp-lib.cm +shell-tool.cm shell-tool.cm +smlnj smlnj +smlnj-lib.cm smlnj/smlnj-lib +smlnj-tdp smlnj-tdp +unix-lib.cm unix-lib.cm +uuid-lib.cm uuid-lib.cm +xml-lib.cm xml-lib.cm diff --git a/lib/pgraph.cm/.cm/amd64-unix/pgraph.cm b/lib/pgraph.cm/.cm/amd64-unix/pgraph.cm new file mode 100644 index 0000000..9f96ab0 Binary files /dev/null and b/lib/pgraph.cm/.cm/amd64-unix/pgraph.cm differ diff --git a/lib/pickle-lib.cm/.cm/amd64-unix/pickle-lib.cm b/lib/pickle-lib.cm/.cm/amd64-unix/pickle-lib.cm new file mode 100644 index 0000000..e3d29ec Binary files /dev/null and b/lib/pickle-lib.cm/.cm/amd64-unix/pickle-lib.cm differ diff --git a/lib/pp-extras-lib.cm/.cm/amd64-unix/pp-extras-lib.cm b/lib/pp-extras-lib.cm/.cm/amd64-unix/pp-extras-lib.cm new file mode 100644 index 0000000..fdf79bf Binary files /dev/null and b/lib/pp-extras-lib.cm/.cm/amd64-unix/pp-extras-lib.cm differ diff --git a/lib/reactive-lib.cm/.cm/amd64-unix/reactive-lib.cm b/lib/reactive-lib.cm/.cm/amd64-unix/reactive-lib.cm new file mode 100644 index 0000000..2dd1448 Binary files /dev/null and b/lib/reactive-lib.cm/.cm/amd64-unix/reactive-lib.cm differ diff --git a/lib/regexp-lib.cm/.cm/amd64-unix/regexp-lib.cm b/lib/regexp-lib.cm/.cm/amd64-unix/regexp-lib.cm new file mode 100644 index 0000000..9159fc4 Binary files /dev/null and b/lib/regexp-lib.cm/.cm/amd64-unix/regexp-lib.cm differ diff --git a/lib/sexp-lib.cm/.cm/amd64-unix/sexp-lib.cm b/lib/sexp-lib.cm/.cm/amd64-unix/sexp-lib.cm new file mode 100644 index 0000000..541b8a8 Binary files /dev/null and b/lib/sexp-lib.cm/.cm/amd64-unix/sexp-lib.cm differ diff --git a/lib/shell-tool.cm/.cm/amd64-unix/shell-tool.cm b/lib/shell-tool.cm/.cm/amd64-unix/shell-tool.cm new file mode 100644 index 0000000..e7447bd Binary files /dev/null and b/lib/shell-tool.cm/.cm/amd64-unix/shell-tool.cm differ diff --git a/lib/smlnj-tdp/.cm/amd64-unix/back-trace.cm b/lib/smlnj-tdp/.cm/amd64-unix/back-trace.cm new file mode 100644 index 0000000..4af3ed3 Binary files /dev/null and b/lib/smlnj-tdp/.cm/amd64-unix/back-trace.cm differ diff --git a/lib/smlnj-tdp/.cm/amd64-unix/coverage.cm b/lib/smlnj-tdp/.cm/amd64-unix/coverage.cm new file mode 100644 index 0000000..ddc2fa4 Binary files /dev/null and b/lib/smlnj-tdp/.cm/amd64-unix/coverage.cm differ diff --git a/lib/smlnj-tdp/.cm/amd64-unix/plugins.cm b/lib/smlnj-tdp/.cm/amd64-unix/plugins.cm new file mode 100644 index 0000000..0981b49 Binary files /dev/null and b/lib/smlnj-tdp/.cm/amd64-unix/plugins.cm differ diff --git a/lib/smlnj/.cm/amd64-unix/cm.cm b/lib/smlnj/.cm/amd64-unix/cm.cm new file mode 100644 index 0000000..91a8072 Binary files /dev/null and b/lib/smlnj/.cm/amd64-unix/cm.cm differ diff --git a/lib/smlnj/.cm/amd64-unix/cmb.cm b/lib/smlnj/.cm/amd64-unix/cmb.cm new file mode 100644 index 0000000..94e55a2 Binary files /dev/null and b/lib/smlnj/.cm/amd64-unix/cmb.cm differ diff --git a/lib/smlnj/.cm/amd64-unix/compiler.cm b/lib/smlnj/.cm/amd64-unix/compiler.cm new file mode 100644 index 0000000..e34c088 Binary files /dev/null and b/lib/smlnj/.cm/amd64-unix/compiler.cm differ diff --git a/lib/smlnj/.cm/amd64-unix/installer.cm b/lib/smlnj/.cm/amd64-unix/installer.cm new file mode 100644 index 0000000..e36737b Binary files /dev/null and b/lib/smlnj/.cm/amd64-unix/installer.cm differ diff --git a/lib/smlnj/.cm/amd64-unix/library-install.cm b/lib/smlnj/.cm/amd64-unix/library-install.cm new file mode 100644 index 0000000..f54b89a Binary files /dev/null and b/lib/smlnj/.cm/amd64-unix/library-install.cm differ diff --git a/lib/smlnj/MLRISC/.cm/amd64-unix/AMD64.cm b/lib/smlnj/MLRISC/.cm/amd64-unix/AMD64.cm new file mode 100644 index 0000000..3bd3a6f Binary files /dev/null and b/lib/smlnj/MLRISC/.cm/amd64-unix/AMD64.cm differ diff --git a/lib/smlnj/MLRISC/.cm/amd64-unix/Control.cm b/lib/smlnj/MLRISC/.cm/amd64-unix/Control.cm new file mode 100644 index 0000000..ed8746e Binary files /dev/null and b/lib/smlnj/MLRISC/.cm/amd64-unix/Control.cm differ diff --git a/lib/smlnj/MLRISC/.cm/amd64-unix/Graphs.cm b/lib/smlnj/MLRISC/.cm/amd64-unix/Graphs.cm new file mode 100644 index 0000000..1133b01 Binary files /dev/null and b/lib/smlnj/MLRISC/.cm/amd64-unix/Graphs.cm differ diff --git a/lib/smlnj/MLRISC/.cm/amd64-unix/IA32.cm b/lib/smlnj/MLRISC/.cm/amd64-unix/IA32.cm new file mode 100644 index 0000000..bd34e7f Binary files /dev/null and b/lib/smlnj/MLRISC/.cm/amd64-unix/IA32.cm differ diff --git a/lib/smlnj/MLRISC/.cm/amd64-unix/Lib.cm b/lib/smlnj/MLRISC/.cm/amd64-unix/Lib.cm new file mode 100644 index 0000000..355cdf7 Binary files /dev/null and b/lib/smlnj/MLRISC/.cm/amd64-unix/Lib.cm differ diff --git a/lib/smlnj/MLRISC/.cm/amd64-unix/MLRISC.cm b/lib/smlnj/MLRISC/.cm/amd64-unix/MLRISC.cm new file mode 100644 index 0000000..cbbdeaf Binary files /dev/null and b/lib/smlnj/MLRISC/.cm/amd64-unix/MLRISC.cm differ diff --git a/lib/smlnj/MLRISC/.cm/amd64-unix/MLTREE.cm b/lib/smlnj/MLRISC/.cm/amd64-unix/MLTREE.cm new file mode 100644 index 0000000..0713aa2 Binary files /dev/null and b/lib/smlnj/MLRISC/.cm/amd64-unix/MLTREE.cm differ diff --git a/lib/smlnj/MLRISC/.cm/amd64-unix/PPC.cm b/lib/smlnj/MLRISC/.cm/amd64-unix/PPC.cm new file mode 100644 index 0000000..990fd02 Binary files /dev/null and b/lib/smlnj/MLRISC/.cm/amd64-unix/PPC.cm differ diff --git a/lib/smlnj/MLRISC/.cm/amd64-unix/SPARC.cm b/lib/smlnj/MLRISC/.cm/amd64-unix/SPARC.cm new file mode 100644 index 0000000..9f725f0 Binary files /dev/null and b/lib/smlnj/MLRISC/.cm/amd64-unix/SPARC.cm differ diff --git a/lib/smlnj/MLRISC/.cm/amd64-unix/Visual.cm b/lib/smlnj/MLRISC/.cm/amd64-unix/Visual.cm new file mode 100644 index 0000000..2cdd95e Binary files /dev/null and b/lib/smlnj/MLRISC/.cm/amd64-unix/Visual.cm differ diff --git a/lib/smlnj/basis/.cm/amd64-unix/basis.cm b/lib/smlnj/basis/.cm/amd64-unix/basis.cm new file mode 100644 index 0000000..411d5b4 Binary files /dev/null and b/lib/smlnj/basis/.cm/amd64-unix/basis.cm differ diff --git a/lib/smlnj/cm/.cm/amd64-unix/cm.cm b/lib/smlnj/cm/.cm/amd64-unix/cm.cm new file mode 100644 index 0000000..fcaf064 Binary files /dev/null and b/lib/smlnj/cm/.cm/amd64-unix/cm.cm differ diff --git a/lib/smlnj/cm/.cm/amd64-unix/tools.cm b/lib/smlnj/cm/.cm/amd64-unix/tools.cm new file mode 100644 index 0000000..79aff69 Binary files /dev/null and b/lib/smlnj/cm/.cm/amd64-unix/tools.cm differ diff --git a/lib/smlnj/cmb/.cm/amd64-unix/amd64-unix.cm b/lib/smlnj/cmb/.cm/amd64-unix/amd64-unix.cm new file mode 100644 index 0000000..6895ec3 Binary files /dev/null and b/lib/smlnj/cmb/.cm/amd64-unix/amd64-unix.cm differ diff --git a/lib/smlnj/cmb/.cm/amd64-unix/current.cm b/lib/smlnj/cmb/.cm/amd64-unix/current.cm new file mode 100644 index 0000000..c1a8e50 Binary files /dev/null and b/lib/smlnj/cmb/.cm/amd64-unix/current.cm differ diff --git a/lib/smlnj/cmb/.cm/amd64-unix/ppc-unix.cm b/lib/smlnj/cmb/.cm/amd64-unix/ppc-unix.cm new file mode 100644 index 0000000..fd25216 Binary files /dev/null and b/lib/smlnj/cmb/.cm/amd64-unix/ppc-unix.cm differ diff --git a/lib/smlnj/cmb/.cm/amd64-unix/sparc-unix.cm b/lib/smlnj/cmb/.cm/amd64-unix/sparc-unix.cm new file mode 100644 index 0000000..7c1bd66 Binary files /dev/null and b/lib/smlnj/cmb/.cm/amd64-unix/sparc-unix.cm differ diff --git a/lib/smlnj/cmb/.cm/amd64-unix/x86-unix.cm b/lib/smlnj/cmb/.cm/amd64-unix/x86-unix.cm new file mode 100644 index 0000000..4a82313 Binary files /dev/null and b/lib/smlnj/cmb/.cm/amd64-unix/x86-unix.cm differ diff --git a/lib/smlnj/cmb/.cm/amd64-unix/x86-win32.cm b/lib/smlnj/cmb/.cm/amd64-unix/x86-win32.cm new file mode 100644 index 0000000..105503e Binary files /dev/null and b/lib/smlnj/cmb/.cm/amd64-unix/x86-win32.cm differ diff --git a/lib/smlnj/compiler/.cm/amd64-unix/all.cm b/lib/smlnj/compiler/.cm/amd64-unix/all.cm new file mode 100644 index 0000000..ccb08e7 Binary files /dev/null and b/lib/smlnj/compiler/.cm/amd64-unix/all.cm differ diff --git a/lib/smlnj/compiler/.cm/amd64-unix/amd64.cm b/lib/smlnj/compiler/.cm/amd64-unix/amd64.cm new file mode 100644 index 0000000..375b591 Binary files /dev/null and b/lib/smlnj/compiler/.cm/amd64-unix/amd64.cm differ diff --git a/lib/smlnj/compiler/.cm/amd64-unix/compiler.cm b/lib/smlnj/compiler/.cm/amd64-unix/compiler.cm new file mode 100644 index 0000000..525b9eb Binary files /dev/null and b/lib/smlnj/compiler/.cm/amd64-unix/compiler.cm differ diff --git a/lib/smlnj/compiler/.cm/amd64-unix/current.cm b/lib/smlnj/compiler/.cm/amd64-unix/current.cm new file mode 100644 index 0000000..5977919 Binary files /dev/null and b/lib/smlnj/compiler/.cm/amd64-unix/current.cm differ diff --git a/lib/smlnj/compiler/.cm/amd64-unix/minimal-only.cm b/lib/smlnj/compiler/.cm/amd64-unix/minimal-only.cm new file mode 100644 index 0000000..9b5ccc5 Binary files /dev/null and b/lib/smlnj/compiler/.cm/amd64-unix/minimal-only.cm differ diff --git a/lib/smlnj/compiler/.cm/amd64-unix/minimal.cm b/lib/smlnj/compiler/.cm/amd64-unix/minimal.cm new file mode 100644 index 0000000..783ba5e Binary files /dev/null and b/lib/smlnj/compiler/.cm/amd64-unix/minimal.cm differ diff --git a/lib/smlnj/compiler/.cm/amd64-unix/ppc.cm b/lib/smlnj/compiler/.cm/amd64-unix/ppc.cm new file mode 100644 index 0000000..1aca8d0 Binary files /dev/null and b/lib/smlnj/compiler/.cm/amd64-unix/ppc.cm differ diff --git a/lib/smlnj/compiler/.cm/amd64-unix/sparc.cm b/lib/smlnj/compiler/.cm/amd64-unix/sparc.cm new file mode 100644 index 0000000..c97c838 Binary files /dev/null and b/lib/smlnj/compiler/.cm/amd64-unix/sparc.cm differ diff --git a/lib/smlnj/compiler/.cm/amd64-unix/x86.cm b/lib/smlnj/compiler/.cm/amd64-unix/x86.cm new file mode 100644 index 0000000..c94b401 Binary files /dev/null and b/lib/smlnj/compiler/.cm/amd64-unix/x86.cm differ diff --git a/lib/smlnj/init/.cm/amd64-unix/init.cmi b/lib/smlnj/init/.cm/amd64-unix/init.cmi new file mode 100644 index 0000000..c92c01a Binary files /dev/null and b/lib/smlnj/init/.cm/amd64-unix/init.cmi differ diff --git a/lib/smlnj/installer/.cm/amd64-unix/util.cm b/lib/smlnj/installer/.cm/amd64-unix/util.cm new file mode 100644 index 0000000..8309afc Binary files /dev/null and b/lib/smlnj/installer/.cm/amd64-unix/util.cm differ diff --git a/lib/smlnj/internal/.cm/amd64-unix/cm-lib.cm b/lib/smlnj/internal/.cm/amd64-unix/cm-lib.cm new file mode 100644 index 0000000..648167f Binary files /dev/null and b/lib/smlnj/internal/.cm/amd64-unix/cm-lib.cm differ diff --git a/lib/smlnj/internal/.cm/amd64-unix/cm-sig-lib.cm b/lib/smlnj/internal/.cm/amd64-unix/cm-sig-lib.cm new file mode 100644 index 0000000..b5b4706 Binary files /dev/null and b/lib/smlnj/internal/.cm/amd64-unix/cm-sig-lib.cm differ diff --git a/lib/smlnj/internal/.cm/amd64-unix/cm0.cm b/lib/smlnj/internal/.cm/amd64-unix/cm0.cm new file mode 100644 index 0000000..6299a7d Binary files /dev/null and b/lib/smlnj/internal/.cm/amd64-unix/cm0.cm differ diff --git a/lib/smlnj/internal/.cm/amd64-unix/intsys.cm b/lib/smlnj/internal/.cm/amd64-unix/intsys.cm new file mode 100644 index 0000000..df576ed Binary files /dev/null and b/lib/smlnj/internal/.cm/amd64-unix/intsys.cm differ diff --git a/lib/smlnj/internal/.cm/amd64-unix/smlnj-version.cm b/lib/smlnj/internal/.cm/amd64-unix/smlnj-version.cm new file mode 100644 index 0000000..00823c2 Binary files /dev/null and b/lib/smlnj/internal/.cm/amd64-unix/smlnj-version.cm differ diff --git a/lib/smlnj/internal/.cm/amd64-unix/srcpath-lib.cm b/lib/smlnj/internal/.cm/amd64-unix/srcpath-lib.cm new file mode 100644 index 0000000..a02038b Binary files /dev/null and b/lib/smlnj/internal/.cm/amd64-unix/srcpath-lib.cm differ diff --git a/lib/smlnj/ml-yacc/.cm/amd64-unix/ml-yacc-lib.cm b/lib/smlnj/ml-yacc/.cm/amd64-unix/ml-yacc-lib.cm new file mode 100644 index 0000000..78ecb03 Binary files /dev/null and b/lib/smlnj/ml-yacc/.cm/amd64-unix/ml-yacc-lib.cm differ diff --git a/lib/smlnj/smlnj-lib/.cm/amd64-unix/controls-lib.cm b/lib/smlnj/smlnj-lib/.cm/amd64-unix/controls-lib.cm new file mode 100644 index 0000000..19b3cf6 Binary files /dev/null and b/lib/smlnj/smlnj-lib/.cm/amd64-unix/controls-lib.cm differ diff --git a/lib/smlnj/smlnj-lib/.cm/amd64-unix/pp-lib.cm b/lib/smlnj/smlnj-lib/.cm/amd64-unix/pp-lib.cm new file mode 100644 index 0000000..1cf42f3 Binary files /dev/null and b/lib/smlnj/smlnj-lib/.cm/amd64-unix/pp-lib.cm differ diff --git a/lib/smlnj/smlnj-lib/.cm/amd64-unix/smlnj-lib.cm b/lib/smlnj/smlnj-lib/.cm/amd64-unix/smlnj-lib.cm new file mode 100644 index 0000000..5f90493 Binary files /dev/null and b/lib/smlnj/smlnj-lib/.cm/amd64-unix/smlnj-lib.cm differ diff --git a/lib/smlnj/viscomp/.cm/amd64-unix/amd64.cm b/lib/smlnj/viscomp/.cm/amd64-unix/amd64.cm new file mode 100644 index 0000000..5583256 Binary files /dev/null and b/lib/smlnj/viscomp/.cm/amd64-unix/amd64.cm differ diff --git a/lib/smlnj/viscomp/.cm/amd64-unix/basics.cm b/lib/smlnj/viscomp/.cm/amd64-unix/basics.cm new file mode 100644 index 0000000..0a7f3ec Binary files /dev/null and b/lib/smlnj/viscomp/.cm/amd64-unix/basics.cm differ diff --git a/lib/smlnj/viscomp/.cm/amd64-unix/core.cm b/lib/smlnj/viscomp/.cm/amd64-unix/core.cm new file mode 100644 index 0000000..fdf7520 Binary files /dev/null and b/lib/smlnj/viscomp/.cm/amd64-unix/core.cm differ diff --git a/lib/smlnj/viscomp/.cm/amd64-unix/debugprof.cm b/lib/smlnj/viscomp/.cm/amd64-unix/debugprof.cm new file mode 100644 index 0000000..f05dd20 Binary files /dev/null and b/lib/smlnj/viscomp/.cm/amd64-unix/debugprof.cm differ diff --git a/lib/smlnj/viscomp/.cm/amd64-unix/elabdata.cm b/lib/smlnj/viscomp/.cm/amd64-unix/elabdata.cm new file mode 100644 index 0000000..3f9e1b5 Binary files /dev/null and b/lib/smlnj/viscomp/.cm/amd64-unix/elabdata.cm differ diff --git a/lib/smlnj/viscomp/.cm/amd64-unix/elaborate.cm b/lib/smlnj/viscomp/.cm/amd64-unix/elaborate.cm new file mode 100644 index 0000000..c7f48a3 Binary files /dev/null and b/lib/smlnj/viscomp/.cm/amd64-unix/elaborate.cm differ diff --git a/lib/smlnj/viscomp/.cm/amd64-unix/execute.cm b/lib/smlnj/viscomp/.cm/amd64-unix/execute.cm new file mode 100644 index 0000000..7ac556e Binary files /dev/null and b/lib/smlnj/viscomp/.cm/amd64-unix/execute.cm differ diff --git a/lib/smlnj/viscomp/.cm/amd64-unix/parser.cm b/lib/smlnj/viscomp/.cm/amd64-unix/parser.cm new file mode 100644 index 0000000..2e5380d Binary files /dev/null and b/lib/smlnj/viscomp/.cm/amd64-unix/parser.cm differ diff --git a/lib/smlnj/viscomp/.cm/amd64-unix/ppc.cm b/lib/smlnj/viscomp/.cm/amd64-unix/ppc.cm new file mode 100644 index 0000000..c3bc862 Binary files /dev/null and b/lib/smlnj/viscomp/.cm/amd64-unix/ppc.cm differ diff --git a/lib/smlnj/viscomp/.cm/amd64-unix/sparc.cm b/lib/smlnj/viscomp/.cm/amd64-unix/sparc.cm new file mode 100644 index 0000000..8b4eae4 Binary files /dev/null and b/lib/smlnj/viscomp/.cm/amd64-unix/sparc.cm differ diff --git a/lib/smlnj/viscomp/.cm/amd64-unix/x86.cm b/lib/smlnj/viscomp/.cm/amd64-unix/x86.cm new file mode 100644 index 0000000..611cc4f Binary files /dev/null and b/lib/smlnj/viscomp/.cm/amd64-unix/x86.cm differ diff --git a/lib/unix-lib.cm/.cm/amd64-unix/unix-lib.cm b/lib/unix-lib.cm/.cm/amd64-unix/unix-lib.cm new file mode 100644 index 0000000..cea85ce Binary files /dev/null and b/lib/unix-lib.cm/.cm/amd64-unix/unix-lib.cm differ diff --git a/lib/uuid-lib.cm/.cm/amd64-unix/uuid-lib.cm b/lib/uuid-lib.cm/.cm/amd64-unix/uuid-lib.cm new file mode 100644 index 0000000..51b8b47 Binary files /dev/null and b/lib/uuid-lib.cm/.cm/amd64-unix/uuid-lib.cm differ diff --git a/lib/xml-lib.cm/.cm/amd64-unix/xml-lib.cm b/lib/xml-lib.cm/.cm/amd64-unix/xml-lib.cm new file mode 100644 index 0000000..e896f50 Binary files /dev/null and b/lib/xml-lib.cm/.cm/amd64-unix/xml-lib.cm differ diff --git a/ml-burg.tgz b/ml-burg.tgz new file mode 100644 index 0000000..db6fbd2 Binary files /dev/null and b/ml-burg.tgz differ diff --git a/ml-burg/.cm/GUID/burg-ast.sml b/ml-burg/.cm/GUID/burg-ast.sml new file mode 100644 index 0000000..71d51b4 --- /dev/null +++ b/ml-burg/.cm/GUID/burg-ast.sml @@ -0,0 +1 @@ +guid-(ml-burg.cm):burg-ast.sml-1714016079.860 diff --git a/ml-burg/.cm/GUID/burg-gram.sig b/ml-burg/.cm/GUID/burg-gram.sig new file mode 100644 index 0000000..ce66454 --- /dev/null +++ b/ml-burg/.cm/GUID/burg-gram.sig @@ -0,0 +1 @@ +guid-(ml-burg.cm):burg-gram.sig-1714016079.870 diff --git a/ml-burg/.cm/GUID/burg-gram.sml b/ml-burg/.cm/GUID/burg-gram.sml new file mode 100644 index 0000000..fd3e75c --- /dev/null +++ b/ml-burg/.cm/GUID/burg-gram.sml @@ -0,0 +1 @@ +guid-(ml-burg.cm):burg-gram.sml-1714016079.878 diff --git a/ml-burg/.cm/GUID/burg-lex.sml b/ml-burg/.cm/GUID/burg-lex.sml new file mode 100644 index 0000000..e0ffdad --- /dev/null +++ b/ml-burg/.cm/GUID/burg-lex.sml @@ -0,0 +1 @@ +guid-(ml-burg.cm):burg-lex.sml-1714016080.176 diff --git a/ml-burg/.cm/GUID/burg.sml b/ml-burg/.cm/GUID/burg.sml new file mode 100644 index 0000000..e631e16 --- /dev/null +++ b/ml-burg/.cm/GUID/burg.sml @@ -0,0 +1 @@ +guid-(ml-burg.cm):burg.sml-1714016080.433 diff --git a/ml-burg/.cm/GUID/errormsg.sml b/ml-burg/.cm/GUID/errormsg.sml new file mode 100644 index 0000000..0707e63 --- /dev/null +++ b/ml-burg/.cm/GUID/errormsg.sml @@ -0,0 +1 @@ +guid-(ml-burg.cm):errormsg.sml-1714016080.156 diff --git a/ml-burg/.cm/GUID/main.sml b/ml-burg/.cm/GUID/main.sml new file mode 100644 index 0000000..a4090d0 --- /dev/null +++ b/ml-burg/.cm/GUID/main.sml @@ -0,0 +1 @@ +guid-(ml-burg.cm):main.sml-1714016080.953 diff --git a/ml-burg/.cm/GUID/parse.sml b/ml-burg/.cm/GUID/parse.sml new file mode 100644 index 0000000..1899ac0 --- /dev/null +++ b/ml-burg/.cm/GUID/parse.sml @@ -0,0 +1 @@ +guid-(ml-burg.cm):parse.sml-1714016080.401 diff --git a/ml-burg/.cm/SKEL/burg-ast.sml b/ml-burg/.cm/SKEL/burg-ast.sml new file mode 100644 index 0000000..537ad75 --- /dev/null +++ b/ml-burg/.cm/SKEL/burg-ast.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ad"BurgAST"h0 \ No newline at end of file diff --git a/ml-burg/.cm/SKEL/burg-gram.sig b/ml-burg/.cm/SKEL/burg-gram.sig new file mode 100644 index 0000000..415e12c --- /dev/null +++ b/ml-burg/.cm/SKEL/burg-gram.sig @@ -0,0 +1,2 @@ +Skeleton 5 +d2aBurg_TOKENS"h0ac"Burg_LRVALS"h2ad"Tokens"gp1ad"ParserData"gp1c"PARSER_DATA" \ No newline at end of file diff --git a/ml-burg/.cm/SKEL/burg-gram.sml b/ml-burg/.cm/SKEL/burg-gram.sml new file mode 100644 index 0000000..17069cd --- /dev/null +++ b/ml-burg/.cm/SKEL/burg-gram.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ae"BurgLrValsFun"i2aToken"gp1c"TOKEN"fjh2aParserData"h7aHeader"h1aA"gp1d"BurgAST"aLrTable"gp2Cagp(bf4d"Char"d"String"d"Array"b?d2f1 aMlyValue"0ad"EC"h28b+ad"Actions"h2bf3 +NaTokens"j6Burg_TOKENS"h2a/gp1c"PARSER_DATA"a-: \ No newline at end of file diff --git a/ml-burg/.cm/SKEL/burg-lex.sml b/ml-burg/.cm/SKEL/burg-lex.sml new file mode 100644 index 0000000..522f1ad Binary files /dev/null and b/ml-burg/.cm/SKEL/burg-lex.sml differ diff --git a/ml-burg/.cm/SKEL/burg.sml b/ml-burg/.cm/SKEL/burg.sml new file mode 100644 index 0000000..73c70c1 --- /dev/null +++ b/ml-burg/.cm/SKEL/burg.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d3f2d"HashString"TextIO"aBURGEMIT"0ad"BurgEmit"jh4aHashStringKey"j&gp1c"HASH_KEY"aBurgHash"jgp18gp1e"HashTableFn"egp1d"BurgAST"f8d"Char"d"List"Cd"Int"d"Parse"d"String"d"Array"Ngp1 \ No newline at end of file diff --git a/ml-burg/.cm/SKEL/errormsg.sml b/ml-burg/.cm/SKEL/errormsg.sml new file mode 100644 index 0000000..c6c545b --- /dev/null +++ b/ml-burg/.cm/SKEL/errormsg.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ad"ErrorMsg"h0 \ No newline at end of file diff --git a/ml-burg/.cm/SKEL/main.sml b/ml-burg/.cm/SKEL/main.sml new file mode 100644 index 0000000..662f236 --- /dev/null +++ b/ml-burg/.cm/SKEL/main.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f4d"OS"d"BurgEmit"d"General"d"TextIO"ad"Main"h0 \ No newline at end of file diff --git a/ml-burg/.cm/SKEL/parse.sml b/ml-burg/.cm/SKEL/parse.sml new file mode 100644 index 0000000..564ba9d --- /dev/null +++ b/ml-burg/.cm/SKEL/parse.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f2d"Int"d"TextIO"ad"Parse"h3aBurgLrVals"jh1aToken"gp2LrParser"0gp1e"BurgLrValsFun"aBurgLex"jh1aTokens"gp2*gp1e"BurgLexFun"ad"BurgParser"jh3aParserData"gp2ad"Lex"gp1a;gp1;gp1e"Join" \ No newline at end of file diff --git a/ml-burg/.cm/amd64-unix/burg-ast.sml b/ml-burg/.cm/amd64-unix/burg-ast.sml new file mode 100644 index 0000000..9aebd68 Binary files /dev/null and b/ml-burg/.cm/amd64-unix/burg-ast.sml differ diff --git a/ml-burg/.cm/amd64-unix/burg-gram.sig b/ml-burg/.cm/amd64-unix/burg-gram.sig new file mode 100644 index 0000000..0d30b6d Binary files /dev/null and b/ml-burg/.cm/amd64-unix/burg-gram.sig differ diff --git a/ml-burg/.cm/amd64-unix/burg-gram.sml b/ml-burg/.cm/amd64-unix/burg-gram.sml new file mode 100644 index 0000000..eb9808f Binary files /dev/null and b/ml-burg/.cm/amd64-unix/burg-gram.sml differ diff --git a/ml-burg/.cm/amd64-unix/burg-lex.sml b/ml-burg/.cm/amd64-unix/burg-lex.sml new file mode 100644 index 0000000..34049bf Binary files /dev/null and b/ml-burg/.cm/amd64-unix/burg-lex.sml differ diff --git a/ml-burg/.cm/amd64-unix/burg.sml b/ml-burg/.cm/amd64-unix/burg.sml new file mode 100644 index 0000000..fe84be7 Binary files /dev/null and b/ml-burg/.cm/amd64-unix/burg.sml differ diff --git a/ml-burg/.cm/amd64-unix/errormsg.sml b/ml-burg/.cm/amd64-unix/errormsg.sml new file mode 100644 index 0000000..945b241 Binary files /dev/null and b/ml-burg/.cm/amd64-unix/errormsg.sml differ diff --git a/ml-burg/.cm/amd64-unix/main.sml b/ml-burg/.cm/amd64-unix/main.sml new file mode 100644 index 0000000..29c6ea8 Binary files /dev/null and b/ml-burg/.cm/amd64-unix/main.sml differ diff --git a/ml-burg/.cm/amd64-unix/parse.sml b/ml-burg/.cm/amd64-unix/parse.sml new file mode 100644 index 0000000..4008f8a Binary files /dev/null and b/ml-burg/.cm/amd64-unix/parse.sml differ diff --git a/ml-burg/INSTALL b/ml-burg/INSTALL new file mode 100644 index 0000000..173e92c --- /dev/null +++ b/ml-burg/INSTALL @@ -0,0 +1,25 @@ +Installation instructions for ML-Burg +------------------------------------- + +ML-Burg can be automatically +installed as part of the SML/NJ system +by the SML/NJ installer. + +To install by hand (e.g., if you make +your own modifications), run the + + ./build + +script in this directory and then move +the file + + ml-burg.$ARCH-$OS + +to the heap-file directory. Also, +make sure that you have a symbolic +link from "ml-burg" to ".run-sml" +in the bin directory. + +Running ./build requires a properly +functioning installation of SML/NJ, +including ml-lex and ml-yacc. diff --git a/ml-burg/README b/ml-burg/README new file mode 100644 index 0000000..6f39a54 --- /dev/null +++ b/ml-burg/README @@ -0,0 +1,5 @@ +This is ML-Burg. + +See INSTALL for how to install. + +The directory ``doc'' contains documentation. diff --git a/ml-burg/build.bat b/ml-burg/build.bat new file mode 100644 index 0000000..86d83fd --- /dev/null +++ b/ml-burg/build.bat @@ -0,0 +1,2 @@ +@ECHO off +%COMSPEC% /C "..\bin\ml-build.bat -D NO_ML_LEX -D NO_ML_YACC ml-burg.cm Main.main ml-burg" diff --git a/ml-burg/build.sh b/ml-burg/build.sh new file mode 100755 index 0000000..7312044 --- /dev/null +++ b/ml-burg/build.sh @@ -0,0 +1,51 @@ +#!/bin/sh +# +# Copyright (c) 2018 The Fellowship of SML/NJ (https://smlnj.org) +# +# build script for ml-burg under the new runtime system. +# +# options: +# -o image -- specify the name of the heap image, "ml-burg" +# is the default. + +CMD=$0 + +ROOT="ml-burg" +HEAP_IMAGE="" +SMLNJROOT=`pwd`/.. +BIN=${INSTALLDIR:-$SMLNJROOT}/bin +BUILD=$BIN/ml-build +SIZE_OPT="-32" + +# +# process command-line options +# +while [ "$#" != "0" ] ; do + arg=$1 + shift + case $arg in + -32) SIZE_OPT=$arg ;; + -64) SIZE_OPT=$arg ;; + -o) + if [ "$#" = "0" ]; then + echo "$CMD: must supply image name for -o option" + exit 1 + fi + HEAP_IMAGE=$1; shift + ;; + *) + echo $CMD: invalid argument: $arg + exit 1 + ;; + esac +done + +if [ "$HEAP_IMAGE" = "" ]; then + HEAP_IMAGE="$ROOT" +fi + +# +# Build the ml-burg standalone program: +"$BUILD" $SIZE_OPT -DNO_ML_LEX -DNO_ML_YACC ml-burg.cm Main.main $HEAP_IMAGE + +exit 0 diff --git a/ml-burg/burg-ast.sml b/ml-burg/burg-ast.sml new file mode 100644 index 0000000..87c34da --- /dev/null +++ b/ml-burg/burg-ast.sml @@ -0,0 +1,42 @@ +(* + * burg-ast.sml + * + * Abstract syntax trees for BURG specifications. + * + * $Log$ + * Revision 1.2 2000/06/01 18:33:42 monnier + * bring revisions from the vendor branch to the trunk + * + * Revision 1.1.1.8 1999/04/17 18:56:03 monnier + * version 110.16 + * + * Revision 1.1.1.1 1997/01/14 01:37:59 george + * Version 109.24 + * + * Revision 1.1.1.2 1997/01/11 18:52:28 george + * ml-burg Version 109.24 + * + * Revision 1.1.1.1 1996/01/31 16:01:24 george + * Version 109 + * + *) + +structure BurgAST = + struct + + datatype decl_ast = START of string + | TERM of (string * string option) list + | TERMPREFIX of string + | RULEPREFIX of string + | SIG of string + + datatype pattern_ast = PAT of (string * pattern_ast list) + + datatype rule_ast = RULE of (string * pattern_ast * string * int list) + + datatype spec_ast = SPEC of {head : string list, + decls : decl_ast list, + rules : rule_ast list, + tail : string list} + end (* BurgAST *) + diff --git a/ml-burg/burg-gram b/ml-burg/burg-gram new file mode 100644 index 0000000..f3a0ef0 --- /dev/null +++ b/ml-burg/burg-gram @@ -0,0 +1,101 @@ +(* burg-gram +** +** ML-Yacc grammar for BURG. +*) + +structure A = BurgAST; +fun outputRaw s = print (s:string) + +%% + +%term K_EOF + | K_TERM + | K_START + | K_TERMPREFIX + | K_RULEPREFIX + | K_SIG + | K_COLON + | K_SEMICOLON + | K_COMMA + | K_LPAREN | K_RPAREN + | K_EQUAL + | K_PIPE + | PPERCENT of string list + | INT of int + | ID of string + | RAW of string list + +%nonterm full of A.spec_ast + | spec of A.spec_ast + | decl of A.decl_ast + | binding of (string * string option) + | cost of int list + | costtail of int list + | rulename of string + | pattern of A.pattern_ast + | patterntail of A.pattern_ast list + | decls of A.decl_ast list + | rules of A.rule_ast list + | rule of A.rule_ast + | bindinglist of (string * string option) list + | raw of unit + | prelude of unit + | postlude of unit + +%start full + +%pos int +%pure + +%eop K_EOF + +%name Burg + +%% + +full : decls PPERCENT rules PPERCENT + (A.SPEC{head=PPERCENT1, + decls=rev decls, + rules=rev rules, + tail=PPERCENT2}) + +decls : (* empty *) ([]) + | decls decl (decl :: decls) + +decl : K_TERM bindinglist (A.TERM (rev bindinglist)) + | K_START ID (A.START ID) + | K_TERMPREFIX ID (A.TERMPREFIX ID) + | K_RULEPREFIX ID (A.RULEPREFIX ID) + | K_SIG ID (A.SIG ID) + + +bindinglist : binding ([binding]) + | bindinglist K_PIPE binding + (binding :: bindinglist) + +binding : ID ((ID, NONE)) + | ID K_EQUAL ID ((ID1, SOME ID2)) + +rules : (* empty *) ([]) + | rules rule (rule :: rules) + +rule : ID K_COLON pattern K_EQUAL rulename cost K_SEMICOLON + (A.RULE(ID, pattern, rulename, cost)) + +rulename : ID (ID) + +pattern : ID (A.PAT(ID, [])) + | ID K_LPAREN pattern patterntail K_RPAREN + (A.PAT(ID, pattern :: patterntail)) + +patterntail : (* empty *) ([]) + | K_COMMA pattern patterntail + (pattern :: patterntail) + + +cost : (* empty *) ([]) + | K_LPAREN INT costtail K_RPAREN + (INT :: costtail) + +costtail : (* empty *) ([]) + | K_COMMA INT costtail (INT :: costtail) diff --git a/ml-burg/burg-gram.sig b/ml-burg/burg-gram.sig new file mode 100644 index 0000000..6f9e1f9 --- /dev/null +++ b/ml-burg/burg-gram.sig @@ -0,0 +1,29 @@ +signature Burg_TOKENS = +sig +type ('a,'b) token +type svalue +val RAW: (string list) * 'a * 'a -> (svalue,'a) token +val ID: (string) * 'a * 'a -> (svalue,'a) token +val INT: (int) * 'a * 'a -> (svalue,'a) token +val PPERCENT: (string list) * 'a * 'a -> (svalue,'a) token +val K_PIPE: 'a * 'a -> (svalue,'a) token +val K_EQUAL: 'a * 'a -> (svalue,'a) token +val K_RPAREN: 'a * 'a -> (svalue,'a) token +val K_LPAREN: 'a * 'a -> (svalue,'a) token +val K_COMMA: 'a * 'a -> (svalue,'a) token +val K_SEMICOLON: 'a * 'a -> (svalue,'a) token +val K_COLON: 'a * 'a -> (svalue,'a) token +val K_SIG: 'a * 'a -> (svalue,'a) token +val K_RULEPREFIX: 'a * 'a -> (svalue,'a) token +val K_TERMPREFIX: 'a * 'a -> (svalue,'a) token +val K_START: 'a * 'a -> (svalue,'a) token +val K_TERM: 'a * 'a -> (svalue,'a) token +val K_EOF: 'a * 'a -> (svalue,'a) token +end +signature Burg_LRVALS= +sig +structure Tokens : Burg_TOKENS +structure ParserData:PARSER_DATA +sharing type ParserData.Token.token = Tokens.token +sharing type ParserData.svalue = Tokens.svalue +end diff --git a/ml-burg/burg-gram.sml b/ml-burg/burg-gram.sml new file mode 100644 index 0000000..0536301 --- /dev/null +++ b/ml-burg/burg-gram.sml @@ -0,0 +1,430 @@ +functor BurgLrValsFun(structure Token : TOKEN) + : sig structure ParserData : PARSER_DATA + structure Tokens : Burg_TOKENS + end + = +struct +structure ParserData= +struct +structure Header = +struct +(* burg-gram +** +** ML-Yacc grammar for BURG. +*) + +structure A = BurgAST; +fun outputRaw s = print (s:string) + + +end +structure LrTable = Token.LrTable +structure Token = Token +local open LrTable in +val table=let val actionRows = +"\ +\\001\000\001\000\000\000\000\000\ +\\001\000\002\000\010\000\003\000\009\000\004\000\008\000\005\000\007\000\ +\\006\000\006\000\014\000\005\000\000\000\ +\\001\000\007\000\024\000\000\000\ +\\001\000\008\000\038\000\000\000\ +\\001\000\011\000\040\000\000\000\ +\\001\000\011\000\045\000\000\000\ +\\001\000\012\000\029\000\000\000\ +\\001\000\014\000\021\000\016\000\020\000\000\000\ +\\001\000\015\000\039\000\000\000\ +\\001\000\015\000\046\000\000\000\ +\\001\000\016\000\012\000\000\000\ +\\001\000\016\000\013\000\000\000\ +\\001\000\016\000\014\000\000\000\ +\\001\000\016\000\015\000\000\000\ +\\001\000\016\000\018\000\000\000\ +\\001\000\016\000\026\000\000\000\ +\\001\000\016\000\028\000\000\000\ +\\001\000\016\000\032\000\000\000\ +\\049\000\000\000\ +\\050\000\000\000\ +\\051\000\000\000\ +\\052\000\013\000\022\000\000\000\ +\\053\000\000\000\ +\\054\000\000\000\ +\\055\000\000\000\ +\\056\000\000\000\ +\\057\000\000\000\ +\\058\000\000\000\ +\\059\000\012\000\023\000\000\000\ +\\060\000\000\000\ +\\061\000\000\000\ +\\062\000\000\000\ +\\063\000\000\000\ +\\064\000\000\000\ +\\065\000\010\000\030\000\000\000\ +\\066\000\000\000\ +\\067\000\009\000\037\000\000\000\ +\\068\000\000\000\ +\\069\000\010\000\035\000\000\000\ +\\070\000\000\000\ +\\071\000\009\000\043\000\000\000\ +\\072\000\000\000\ +\" +val actionRowNumbers = +"\019\000\001\000\020\000\030\000\ +\\010\000\011\000\012\000\013\000\ +\\014\000\007\000\025\000\024\000\ +\\023\000\022\000\021\000\026\000\ +\\028\000\031\000\002\000\018\000\ +\\014\000\015\000\016\000\027\000\ +\\029\000\006\000\034\000\017\000\ +\\016\000\038\000\033\000\036\000\ +\\003\000\008\000\004\000\016\000\ +\\032\000\040\000\035\000\036\000\ +\\005\000\009\000\037\000\039\000\ +\\040\000\041\000\000\000" +val gotoT = +"\ +\\001\000\046\000\010\000\001\000\000\000\ +\\003\000\002\000\000\000\ +\\000\000\ +\\011\000\009\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\004\000\015\000\013\000\014\000\000\000\ +\\012\000\017\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\004\000\023\000\000\000\ +\\000\000\ +\\008\000\025\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\007\000\029\000\000\000\ +\\008\000\031\000\000\000\ +\\005\000\032\000\000\000\ +\\000\000\ +\\009\000\034\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\008\000\039\000\000\000\ +\\000\000\ +\\006\000\040\000\000\000\ +\\000\000\ +\\009\000\042\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\006\000\045\000\000\000\ +\\000\000\ +\\000\000\ +\" +val numstates = 47 +val numrules = 24 +val s = ref "" and index = ref 0 +val string_to_int = fn () => +let val i = !index +in index := i+2; Char.ord(String.sub(!s,i)) + Char.ord(String.sub(!s,i+1)) * 256 +end +val string_to_list = fn s' => + let val len = String.size s' + fun f () = + if !index < len then string_to_int() :: f() + else nil + in index := 0; s := s'; f () + end +val string_to_pairlist = fn (conv_key,conv_entry) => + let fun f () = + case string_to_int() + of 0 => EMPTY + | n => PAIR(conv_key (n-1),conv_entry (string_to_int()),f()) + in f + end +val string_to_pairlist_default = fn (conv_key,conv_entry) => + let val conv_row = string_to_pairlist(conv_key,conv_entry) + in fn () => + let val default = conv_entry(string_to_int()) + val row = conv_row() + in (row,default) + end + end +val string_to_table = fn (convert_row,s') => + let val len = String.size s' + fun f ()= + if !index < len then convert_row() :: f() + else nil + in (s := s'; index := 0; f ()) + end +local + val memo = Array.array(numstates+numrules,ERROR) + val _ =let fun g i=(Array.update(memo,i,REDUCE(i-numstates)); g(i+1)) + fun f i = + if i=numstates then g i + else (Array.update(memo,i,SHIFT (STATE i)); f (i+1)) + in f 0 handle Subscript => () + end +in +val entry_to_action = fn 0 => ACCEPT | 1 => ERROR | j => Array.sub(memo,(j-2)) +end +val gotoT=Array.fromList(string_to_table(string_to_pairlist(NT,STATE),gotoT)) +val actionRows=string_to_table(string_to_pairlist_default(T,entry_to_action),actionRows) +val actionRowNumbers = string_to_list actionRowNumbers +val actionT = let val actionRowLookUp= +let val a=Array.fromList(actionRows) in fn i=>Array.sub(a,i) end +in Array.fromList(map actionRowLookUp actionRowNumbers) +end +in LrTable.mkLrTable {actions=actionT,gotos=gotoT,numRules=numrules, +numStates=numstates,initialState=STATE 0} +end +end +local open Header in +type pos = int +type arg = unit +structure MlyValue = +struct +datatype svalue = VOID | ntVOID of unit | RAW of (string list) + | ID of (string) | INT of (int) | PPERCENT of (string list) + | postlude of (unit) | prelude of (unit) | raw of (unit) + | bindinglist of ( ( string * string option ) list) + | rule of (A.rule_ast) | rules of (A.rule_ast list) + | decls of (A.decl_ast list) | patterntail of (A.pattern_ast list) + | pattern of (A.pattern_ast) | rulename of (string) + | costtail of (int list) | cost of (int list) + | binding of ( ( string * string option ) ) | decl of (A.decl_ast) + | spec of (A.spec_ast) | full of (A.spec_ast) +end +type svalue = MlyValue.svalue +type result = A.spec_ast +end +structure EC= +struct +open LrTable +infix 5 $$ +fun x $$ y = y::x +val is_keyword = +fn _ => false +val preferred_change : (term list * term list) list = +nil +val noShift = +fn _ => false +val showTerminal = +fn (T 0) => "K_EOF" + | (T 1) => "K_TERM" + | (T 2) => "K_START" + | (T 3) => "K_TERMPREFIX" + | (T 4) => "K_RULEPREFIX" + | (T 5) => "K_SIG" + | (T 6) => "K_COLON" + | (T 7) => "K_SEMICOLON" + | (T 8) => "K_COMMA" + | (T 9) => "K_LPAREN" + | (T 10) => "K_RPAREN" + | (T 11) => "K_EQUAL" + | (T 12) => "K_PIPE" + | (T 13) => "PPERCENT" + | (T 14) => "INT" + | (T 15) => "ID" + | (T 16) => "RAW" + | _ => "bogus-term" +local open Header in +val errtermvalue= +fn _ => MlyValue.VOID +end +val terms : term list = nil + $$ (T 12) $$ (T 11) $$ (T 10) $$ (T 9) $$ (T 8) $$ (T 7) $$ (T 6) $$ +(T 5) $$ (T 4) $$ (T 3) $$ (T 2) $$ (T 1) $$ (T 0)end +structure Actions = +struct +exception mlyAction of int +local open Header in +val actions = +fn (i392,defaultPos,stack, + (()):arg) => +case (i392,stack) +of ( 0, ( ( _, ( MlyValue.PPERCENT PPERCENT2, _, PPERCENT2right)) :: +( _, ( MlyValue.rules rules, _, _)) :: ( _, ( MlyValue.PPERCENT +PPERCENT1, _, _)) :: ( _, ( MlyValue.decls decls, decls1left, _)) :: +rest671)) => let val result = MlyValue.full ( +A.SPEC{head=PPERCENT1, + decls=rev decls, + rules=rev rules, + tail=PPERCENT2} +) + in ( LrTable.NT 0, ( result, decls1left, PPERCENT2right), rest671) + +end +| ( 1, ( rest671)) => let val result = MlyValue.decls ([]) + in ( LrTable.NT 9, ( result, defaultPos, defaultPos), rest671) +end +| ( 2, ( ( _, ( MlyValue.decl decl, _, decl1right)) :: ( _, ( +MlyValue.decls decls, decls1left, _)) :: rest671)) => let val result + = MlyValue.decls (decl :: decls) + in ( LrTable.NT 9, ( result, decls1left, decl1right), rest671) +end +| ( 3, ( ( _, ( MlyValue.bindinglist bindinglist, _, +bindinglist1right)) :: ( _, ( _, K_TERM1left, _)) :: rest671)) => let + val result = MlyValue.decl (A.TERM (rev bindinglist)) + in ( LrTable.NT 2, ( result, K_TERM1left, bindinglist1right), rest671 +) +end +| ( 4, ( ( _, ( MlyValue.ID ID, _, ID1right)) :: ( _, ( _, +K_START1left, _)) :: rest671)) => let val result = MlyValue.decl ( +A.START ID) + in ( LrTable.NT 2, ( result, K_START1left, ID1right), rest671) +end +| ( 5, ( ( _, ( MlyValue.ID ID, _, ID1right)) :: ( _, ( _, +K_TERMPREFIX1left, _)) :: rest671)) => let val result = MlyValue.decl + (A.TERMPREFIX ID) + in ( LrTable.NT 2, ( result, K_TERMPREFIX1left, ID1right), rest671) + +end +| ( 6, ( ( _, ( MlyValue.ID ID, _, ID1right)) :: ( _, ( _, +K_RULEPREFIX1left, _)) :: rest671)) => let val result = MlyValue.decl + (A.RULEPREFIX ID) + in ( LrTable.NT 2, ( result, K_RULEPREFIX1left, ID1right), rest671) + +end +| ( 7, ( ( _, ( MlyValue.ID ID, _, ID1right)) :: ( _, ( _, K_SIG1left +, _)) :: rest671)) => let val result = MlyValue.decl (A.SIG ID) + in ( LrTable.NT 2, ( result, K_SIG1left, ID1right), rest671) +end +| ( 8, ( ( _, ( MlyValue.binding binding, binding1left, binding1right +)) :: rest671)) => let val result = MlyValue.bindinglist ([binding]) + in ( LrTable.NT 12, ( result, binding1left, binding1right), rest671) + +end +| ( 9, ( ( _, ( MlyValue.binding binding, _, binding1right)) :: _ :: +( _, ( MlyValue.bindinglist bindinglist, bindinglist1left, _)) :: +rest671)) => let val result = MlyValue.bindinglist ( +binding :: bindinglist) + in ( LrTable.NT 12, ( result, bindinglist1left, binding1right), +rest671) +end +| ( 10, ( ( _, ( MlyValue.ID ID, ID1left, ID1right)) :: rest671)) => + let val result = MlyValue.binding ((ID, NONE)) + in ( LrTable.NT 3, ( result, ID1left, ID1right), rest671) +end +| ( 11, ( ( _, ( MlyValue.ID ID2, _, ID2right)) :: _ :: ( _, ( +MlyValue.ID ID1, ID1left, _)) :: rest671)) => let val result = +MlyValue.binding ((ID1, SOME ID2)) + in ( LrTable.NT 3, ( result, ID1left, ID2right), rest671) +end +| ( 12, ( rest671)) => let val result = MlyValue.rules ([]) + in ( LrTable.NT 10, ( result, defaultPos, defaultPos), rest671) +end +| ( 13, ( ( _, ( MlyValue.rule rule, _, rule1right)) :: ( _, ( +MlyValue.rules rules, rules1left, _)) :: rest671)) => let val result + = MlyValue.rules (rule :: rules) + in ( LrTable.NT 10, ( result, rules1left, rule1right), rest671) +end +| ( 14, ( ( _, ( _, _, K_SEMICOLON1right)) :: ( _, ( MlyValue.cost +cost, _, _)) :: ( _, ( MlyValue.rulename rulename, _, _)) :: _ :: ( _, + ( MlyValue.pattern pattern, _, _)) :: _ :: ( _, ( MlyValue.ID ID, +ID1left, _)) :: rest671)) => let val result = MlyValue.rule ( +A.RULE(ID, pattern, rulename, cost)) + in ( LrTable.NT 11, ( result, ID1left, K_SEMICOLON1right), rest671) + +end +| ( 15, ( ( _, ( MlyValue.ID ID, ID1left, ID1right)) :: rest671)) => + let val result = MlyValue.rulename (ID) + in ( LrTable.NT 6, ( result, ID1left, ID1right), rest671) +end +| ( 16, ( ( _, ( MlyValue.ID ID, ID1left, ID1right)) :: rest671)) => + let val result = MlyValue.pattern (A.PAT(ID, [])) + in ( LrTable.NT 7, ( result, ID1left, ID1right), rest671) +end +| ( 17, ( ( _, ( _, _, K_RPAREN1right)) :: ( _, ( +MlyValue.patterntail patterntail, _, _)) :: ( _, ( MlyValue.pattern +pattern, _, _)) :: _ :: ( _, ( MlyValue.ID ID, ID1left, _)) :: rest671 +)) => let val result = MlyValue.pattern ( +A.PAT(ID, pattern :: patterntail)) + in ( LrTable.NT 7, ( result, ID1left, K_RPAREN1right), rest671) +end +| ( 18, ( rest671)) => let val result = MlyValue.patterntail ([]) + in ( LrTable.NT 8, ( result, defaultPos, defaultPos), rest671) +end +| ( 19, ( ( _, ( MlyValue.patterntail patterntail, _, +patterntail1right)) :: ( _, ( MlyValue.pattern pattern, _, _)) :: ( _, + ( _, K_COMMA1left, _)) :: rest671)) => let val result = +MlyValue.patterntail (pattern :: patterntail) + in ( LrTable.NT 8, ( result, K_COMMA1left, patterntail1right), +rest671) +end +| ( 20, ( rest671)) => let val result = MlyValue.cost ([]) + in ( LrTable.NT 4, ( result, defaultPos, defaultPos), rest671) +end +| ( 21, ( ( _, ( _, _, K_RPAREN1right)) :: ( _, ( MlyValue.costtail +costtail, _, _)) :: ( _, ( MlyValue.INT INT, _, _)) :: ( _, ( _, +K_LPAREN1left, _)) :: rest671)) => let val result = MlyValue.cost ( +INT :: costtail) + in ( LrTable.NT 4, ( result, K_LPAREN1left, K_RPAREN1right), rest671) + +end +| ( 22, ( rest671)) => let val result = MlyValue.costtail ([]) + in ( LrTable.NT 5, ( result, defaultPos, defaultPos), rest671) +end +| ( 23, ( ( _, ( MlyValue.costtail costtail, _, costtail1right)) :: ( + _, ( MlyValue.INT INT, _, _)) :: ( _, ( _, K_COMMA1left, _)) :: +rest671)) => let val result = MlyValue.costtail (INT :: costtail) + in ( LrTable.NT 5, ( result, K_COMMA1left, costtail1right), rest671) + +end +| _ => raise (mlyAction i392) +end +val void = MlyValue.VOID +val extract = fn a => (fn MlyValue.full x => x +| _ => let exception ParseInternal + in raise ParseInternal end) a +end +end +structure Tokens : Burg_TOKENS = +struct +type svalue = ParserData.svalue +type ('a,'b) token = ('a,'b) Token.token +fun K_EOF (p1,p2) = Token.TOKEN (ParserData.LrTable.T 0,( +ParserData.MlyValue.VOID,p1,p2)) +fun K_TERM (p1,p2) = Token.TOKEN (ParserData.LrTable.T 1,( +ParserData.MlyValue.VOID,p1,p2)) +fun K_START (p1,p2) = Token.TOKEN (ParserData.LrTable.T 2,( +ParserData.MlyValue.VOID,p1,p2)) +fun K_TERMPREFIX (p1,p2) = Token.TOKEN (ParserData.LrTable.T 3,( +ParserData.MlyValue.VOID,p1,p2)) +fun K_RULEPREFIX (p1,p2) = Token.TOKEN (ParserData.LrTable.T 4,( +ParserData.MlyValue.VOID,p1,p2)) +fun K_SIG (p1,p2) = Token.TOKEN (ParserData.LrTable.T 5,( +ParserData.MlyValue.VOID,p1,p2)) +fun K_COLON (p1,p2) = Token.TOKEN (ParserData.LrTable.T 6,( +ParserData.MlyValue.VOID,p1,p2)) +fun K_SEMICOLON (p1,p2) = Token.TOKEN (ParserData.LrTable.T 7,( +ParserData.MlyValue.VOID,p1,p2)) +fun K_COMMA (p1,p2) = Token.TOKEN (ParserData.LrTable.T 8,( +ParserData.MlyValue.VOID,p1,p2)) +fun K_LPAREN (p1,p2) = Token.TOKEN (ParserData.LrTable.T 9,( +ParserData.MlyValue.VOID,p1,p2)) +fun K_RPAREN (p1,p2) = Token.TOKEN (ParserData.LrTable.T 10,( +ParserData.MlyValue.VOID,p1,p2)) +fun K_EQUAL (p1,p2) = Token.TOKEN (ParserData.LrTable.T 11,( +ParserData.MlyValue.VOID,p1,p2)) +fun K_PIPE (p1,p2) = Token.TOKEN (ParserData.LrTable.T 12,( +ParserData.MlyValue.VOID,p1,p2)) +fun PPERCENT (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 13,( +ParserData.MlyValue.PPERCENT i,p1,p2)) +fun INT (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 14,( +ParserData.MlyValue.INT i,p1,p2)) +fun ID (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 15,( +ParserData.MlyValue.ID i,p1,p2)) +fun RAW (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 16,( +ParserData.MlyValue.RAW i,p1,p2)) +end +end diff --git a/ml-burg/burg-lex b/ml-burg/burg-lex new file mode 100644 index 0000000..037adf5 --- /dev/null +++ b/ml-burg/burg-lex @@ -0,0 +1,112 @@ +(* burg-lex + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * + * ML-Lex specification for ML-burg. + *) + +structure T = Tokens +structure E = ErrorMsg +type pos = int +type svalue = T.svalue +type ('a,'b) token = ('a,'b) T.token +type lexresult = (svalue,pos) token + +val comLevel = ref 0 +val lineNum = ref 0 +val verbatimLevel = ref 0 +val percentCount = ref 0 +val rawLine = ref "" +val rawNoNewLine = ref false +val raw:string list ref = ref [] +val reachedEop = ref false + +fun resetState() = (comLevel := 0; + lineNum := 0; + verbatimLevel := 0; + percentCount := 0; + rawLine := ""; + rawNoNewLine := false; + raw := []; + reachedEop := false) + +fun inc (ri as ref i) = (ri := i+1) +fun dec (ri as ref i) = (ri := i-1) + +fun incVerbLvl() = if !verbatimLevel <> 0 + then E.impossible "nested verbatim levels" + else inc verbatimLevel + +fun outputRaw (s:string) = (rawLine := !rawLine^s; rawNoNewLine := true) + +fun rawNextLine () = (raw := !rawLine^"\n":: (!raw); + rawLine := ""; rawNoNewLine := false) + +fun rawStop () = if !rawNoNewLine then rawNextLine () else () + +fun eof() = (if !comLevel > 0 then E.complain "unclosed comment" + else if !verbatimLevel <> 0 then + E.complain "unclosed user input" + else (); + if !reachedEop + then T.K_EOF(!lineNum,!lineNum) + else (rawStop (); + T.PPERCENT(rev(!raw),!lineNum,!lineNum) + before (raw := []; + reachedEop := true))) + +%% + +%s COMMENT DUMP POSTLUDE; +%header (functor BurgLexFun(structure Tokens : Burg_TOKENS)); +idchars = [A-Za-z_0-9]; +id = [A-Za-z]{idchars}*; +ws = [\t\ ]*; +num = [0-9]+; +line = .*; + + + + +%% + + "\n" => (inc lineNum; continue()); + "%{" => (incVerbLvl(); YYBEGIN DUMP; continue()); + "%%" => (inc percentCount; + if !percentCount = 2 + then (YYBEGIN POSTLUDE; continue()) + else T.PPERCENT(rev(!raw),!lineNum,!lineNum) + before raw := []); + {ws} => (continue()); + \n => (inc lineNum; continue()); + "(" => (T.K_LPAREN(!lineNum,!lineNum)); + ")" => (T.K_RPAREN(!lineNum,!lineNum)); + "," => (T.K_COMMA(!lineNum,!lineNum)); + ":" => (T.K_COLON(!lineNum,!lineNum)); + ";" => (T.K_SEMICOLON(!lineNum,!lineNum)); + "=" => (T.K_EQUAL(!lineNum,!lineNum)); + "|" => (T.K_PIPE(!lineNum,!lineNum)); + "%term" => (T.K_TERM(!lineNum,!lineNum)); + "%start" => (T.K_START(!lineNum,!lineNum)); + "%termprefix" => (T.K_TERMPREFIX(!lineNum,!lineNum)); + "%ruleprefix" => (T.K_RULEPREFIX(!lineNum,!lineNum)); + "%sig" => (T.K_SIG(!lineNum,!lineNum)); + "(*" => (YYBEGIN COMMENT; comLevel:=1; continue()); + {num} => (T.INT(valOf(Int.fromString yytext),!lineNum,!lineNum)); + {id} => (T.ID(yytext,!lineNum,!lineNum)); + + "(*" => (inc comLevel; continue()); + \n => (inc lineNum; continue()); + "*)" => (dec comLevel; + if !comLevel=0 then YYBEGIN INITIAL else (); + continue()); + . => (continue()); + + "%}" => (rawStop(); dec verbatimLevel; + YYBEGIN INITIAL; continue()); + "\n" => (rawNextLine (); inc lineNum; continue()); + {line} => (outputRaw yytext; continue()); + + + "\n" => (rawNextLine (); inc lineNum; continue()); + {line} => (outputRaw yytext; continue()); diff --git a/ml-burg/burg-lex.sml b/ml-burg/burg-lex.sml new file mode 100644 index 0000000..491a3ae --- /dev/null +++ b/ml-burg/burg-lex.sml @@ -0,0 +1,767 @@ +functor BurgLexFun(structure Tokens : Burg_TOKENS) = struct + + structure yyInput : sig + + type stream + val mkStream : (int -> string) -> stream + val fromStream : TextIO.StreamIO.instream -> stream + val getc : stream -> (Char.char * stream) option + val getpos : stream -> int + val getlineNo : stream -> int + val subtract : stream * stream -> string + val eof : stream -> bool + val lastWasNL : stream -> bool + + end = struct + + structure TIO = TextIO + structure TSIO = TIO.StreamIO + structure TPIO = TextPrimIO + + datatype stream = Stream of { + strm : TSIO.instream, + id : int, (* track which streams originated + * from the same stream *) + pos : int, + lineNo : int, + lastWasNL : bool + } + + local + val next = ref 0 + in + fun nextId() = !next before (next := !next + 1) + end + + val initPos = 2 (* ml-lex bug compatibility *) + + fun mkStream inputN = let + val strm = TSIO.mkInstream + (TPIO.RD { + name = "lexgen", + chunkSize = 4096, + readVec = SOME inputN, + readArr = NONE, + readVecNB = NONE, + readArrNB = NONE, + block = NONE, + canInput = NONE, + avail = (fn () => NONE), + getPos = NONE, + setPos = NONE, + endPos = NONE, + verifyPos = NONE, + close = (fn () => ()), + ioDesc = NONE + }, "") + in + Stream {strm = strm, id = nextId(), pos = initPos, lineNo = 1, + lastWasNL = true} + end + + fun fromStream strm = Stream { + strm = strm, id = nextId(), pos = initPos, lineNo = 1, lastWasNL = true + } + + fun getc (Stream {strm, pos, id, lineNo, ...}) = (case TSIO.input1 strm + of NONE => NONE + | SOME (c, strm') => + SOME (c, Stream { + strm = strm', + pos = pos+1, + id = id, + lineNo = lineNo + + (if c = #"\n" then 1 else 0), + lastWasNL = (c = #"\n") + }) + (* end case*)) + + fun getpos (Stream {pos, ...}) = pos + + fun getlineNo (Stream {lineNo, ...}) = lineNo + + fun subtract (new, old) = let + val Stream {strm = strm, pos = oldPos, id = oldId, ...} = old + val Stream {pos = newPos, id = newId, ...} = new + val (diff, _) = if newId = oldId andalso newPos >= oldPos + then TSIO.inputN (strm, newPos - oldPos) + else raise Fail + "BUG: yyInput: attempted to subtract incompatible streams" + in + diff + end + + fun eof (Stream {strm, ...}) = TSIO.endOfStream strm + + fun lastWasNL (Stream {lastWasNL, ...}) = lastWasNL + + end + + datatype yystart_state = +DUMP | POSTLUDE | COMMENT | INITIAL + structure UserDeclarations = + struct + +(* burg-lex + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * + * ML-Lex specification for ML-burg. + *) + +structure T = Tokens +structure E = ErrorMsg +type pos = int +type svalue = T.svalue +type ('a,'b) token = ('a,'b) T.token +type lexresult = (svalue,pos) token + +val comLevel = ref 0 +val lineNum = ref 0 +val verbatimLevel = ref 0 +val percentCount = ref 0 +val rawLine = ref "" +val rawNoNewLine = ref false +val raw:string list ref = ref [] +val reachedEop = ref false + +fun resetState() = (comLevel := 0; + lineNum := 0; + verbatimLevel := 0; + percentCount := 0; + rawLine := ""; + rawNoNewLine := false; + raw := []; + reachedEop := false) + +fun inc (ri as ref i) = (ri := i+1) +fun dec (ri as ref i) = (ri := i-1) + +fun incVerbLvl() = if !verbatimLevel <> 0 + then E.impossible "nested verbatim levels" + else inc verbatimLevel + +fun outputRaw (s:string) = (rawLine := !rawLine^s; rawNoNewLine := true) + +fun rawNextLine () = (raw := !rawLine^"\n":: (!raw); + rawLine := ""; rawNoNewLine := false) + +fun rawStop () = if !rawNoNewLine then rawNextLine () else () + +fun eof() = (if !comLevel > 0 then E.complain "unclosed comment" + else if !verbatimLevel <> 0 then + E.complain "unclosed user input" + else (); + if !reachedEop + then T.K_EOF(!lineNum,!lineNum) + else (rawStop (); + T.PPERCENT(rev(!raw),!lineNum,!lineNum) + before (raw := []; + reachedEop := true))) + + + + end + + datatype yymatch + = yyNO_MATCH + | yyMATCH of yyInput.stream * action * yymatch + withtype action = yyInput.stream * yymatch -> UserDeclarations.lexresult + + local + + val yytable = +#[ +] + + fun mk yyins = let + (* current start state *) + val yyss = ref INITIAL + fun YYBEGIN ss = (yyss := ss) + (* current input stream *) + val yystrm = ref yyins + (* get one char of input *) + val yygetc = yyInput.getc + (* create yytext *) + fun yymktext(strm) = yyInput.subtract (strm, !yystrm) + open UserDeclarations + fun lex +(yyarg as ()) = let + fun continue() = let + val yylastwasn = yyInput.lastWasNL (!yystrm) + fun yystuck (yyNO_MATCH) = raise Fail "stuck state" + | yystuck (yyMATCH (strm, action, old)) = + action (strm, old) + val yypos = yyInput.getpos (!yystrm) + val yygetlineNo = yyInput.getlineNo + fun yyactsToMatches (strm, [], oldMatches) = oldMatches + | yyactsToMatches (strm, act::acts, oldMatches) = + yyMATCH (strm, act, yyactsToMatches (strm, acts, oldMatches)) + fun yygo actTable = + (fn (~1, _, oldMatches) => yystuck oldMatches + | (curState, strm, oldMatches) => let + val (transitions, finals') = Vector.sub (yytable, curState) + val finals = map (fn i => Vector.sub (actTable, i)) finals' + fun tryfinal() = + yystuck (yyactsToMatches (strm, finals, oldMatches)) + fun find (c, []) = NONE + | find (c, (c1, c2, s)::ts) = + if c1 <= c andalso c <= c2 then SOME s + else find (c, ts) + in case yygetc strm + of SOME(c, strm') => + (case find (c, transitions) + of NONE => tryfinal() + | SOME n => + yygo actTable + (n, strm', + yyactsToMatches (strm, finals, oldMatches))) + | NONE => tryfinal() + end) + in +let +fun yyAction0 (strm, lastMatch : yymatch) = (yystrm := strm; + (inc lineNum; continue())) +fun yyAction1 (strm, lastMatch : yymatch) = (yystrm := strm; + (incVerbLvl(); YYBEGIN DUMP; continue())) +fun yyAction2 (strm, lastMatch : yymatch) = (yystrm := strm; + (inc percentCount; + if !percentCount = 2 + then (YYBEGIN POSTLUDE; continue()) + else T.PPERCENT(rev(!raw),!lineNum,!lineNum) + before raw := [])) +fun yyAction3 (strm, lastMatch : yymatch) = (yystrm := strm; (continue())) +fun yyAction4 (strm, lastMatch : yymatch) = (yystrm := strm; + (inc lineNum; continue())) +fun yyAction5 (strm, lastMatch : yymatch) = (yystrm := strm; + (T.K_LPAREN(!lineNum,!lineNum))) +fun yyAction6 (strm, lastMatch : yymatch) = (yystrm := strm; + (T.K_RPAREN(!lineNum,!lineNum))) +fun yyAction7 (strm, lastMatch : yymatch) = (yystrm := strm; + (T.K_COMMA(!lineNum,!lineNum))) +fun yyAction8 (strm, lastMatch : yymatch) = (yystrm := strm; + (T.K_COLON(!lineNum,!lineNum))) +fun yyAction9 (strm, lastMatch : yymatch) = (yystrm := strm; + (T.K_SEMICOLON(!lineNum,!lineNum))) +fun yyAction10 (strm, lastMatch : yymatch) = (yystrm := strm; + (T.K_EQUAL(!lineNum,!lineNum))) +fun yyAction11 (strm, lastMatch : yymatch) = (yystrm := strm; + (T.K_PIPE(!lineNum,!lineNum))) +fun yyAction12 (strm, lastMatch : yymatch) = (yystrm := strm; + (T.K_TERM(!lineNum,!lineNum))) +fun yyAction13 (strm, lastMatch : yymatch) = (yystrm := strm; + (T.K_START(!lineNum,!lineNum))) +fun yyAction14 (strm, lastMatch : yymatch) = (yystrm := strm; + (T.K_TERMPREFIX(!lineNum,!lineNum))) +fun yyAction15 (strm, lastMatch : yymatch) = (yystrm := strm; + (T.K_RULEPREFIX(!lineNum,!lineNum))) +fun yyAction16 (strm, lastMatch : yymatch) = (yystrm := strm; + (T.K_SIG(!lineNum,!lineNum))) +fun yyAction17 (strm, lastMatch : yymatch) = (yystrm := strm; + (YYBEGIN COMMENT; comLevel:=1; continue())) +fun yyAction18 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (T.INT(valOf(Int.fromString yytext),!lineNum,!lineNum)) + end +fun yyAction19 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (T.ID(yytext,!lineNum,!lineNum)) + end +fun yyAction20 (strm, lastMatch : yymatch) = (yystrm := strm; + (inc comLevel; continue())) +fun yyAction21 (strm, lastMatch : yymatch) = (yystrm := strm; + (inc lineNum; continue())) +fun yyAction22 (strm, lastMatch : yymatch) = (yystrm := strm; + (dec comLevel; + if !comLevel=0 then YYBEGIN INITIAL else (); + continue())) +fun yyAction23 (strm, lastMatch : yymatch) = (yystrm := strm; (continue())) +fun yyAction24 (strm, lastMatch : yymatch) = (yystrm := strm; + (rawStop(); dec verbatimLevel; + YYBEGIN INITIAL; continue())) +fun yyAction25 (strm, lastMatch : yymatch) = (yystrm := strm; + (rawNextLine (); inc lineNum; continue())) +fun yyAction26 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (outputRaw yytext; continue()) + end +fun yyAction27 (strm, lastMatch : yymatch) = (yystrm := strm; + (rawNextLine (); inc lineNum; continue())) +fun yyAction28 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (outputRaw yytext; continue()) + end +fun yyQ27 (strm, lastMatch : yymatch) = yyAction11(strm, yyNO_MATCH) +fun yyQ26 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction19(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"[" + then yyAction19(strm, yyNO_MATCH) + else if inp < #"[" + then if inp = #":" + then yyAction19(strm, yyNO_MATCH) + else if inp < #":" + then if inp <= #"/" + then yyAction19(strm, yyNO_MATCH) + else yyQ26(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp <= #"@" + then yyAction19(strm, yyNO_MATCH) + else yyQ26(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp = #"`" + then yyAction19(strm, yyNO_MATCH) + else if inp < #"`" + then if inp = #"_" + then yyQ26(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyAction19(strm, yyNO_MATCH) + else if inp <= #"z" + then yyQ26(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyAction19(strm, yyNO_MATCH) + (* end case *)) +fun yyQ25 (strm, lastMatch : yymatch) = yyAction10(strm, yyNO_MATCH) +fun yyQ24 (strm, lastMatch : yymatch) = yyAction9(strm, yyNO_MATCH) +fun yyQ23 (strm, lastMatch : yymatch) = yyAction8(strm, yyNO_MATCH) +fun yyQ22 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction18(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"0" + then yyQ22(strm', yyMATCH(strm, yyAction18, yyNO_MATCH)) + else if inp < #"0" + then yyAction18(strm, yyNO_MATCH) + else if inp <= #"9" + then yyQ22(strm', yyMATCH(strm, yyAction18, yyNO_MATCH)) + else yyAction18(strm, yyNO_MATCH) + (* end case *)) +fun yyQ21 (strm, lastMatch : yymatch) = yyAction7(strm, yyNO_MATCH) +fun yyQ20 (strm, lastMatch : yymatch) = yyAction6(strm, yyNO_MATCH) +fun yyQ28 (strm, lastMatch : yymatch) = yyAction17(strm, yyNO_MATCH) +fun yyQ19 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction5(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"*" + then yyQ28(strm', yyMATCH(strm, yyAction5, yyNO_MATCH)) + else yyAction5(strm, yyNO_MATCH) + (* end case *)) +fun yyQ33 (strm, lastMatch : yymatch) = yyAction1(strm, yyNO_MATCH) +fun yyQ42 (strm, lastMatch : yymatch) = yyAction14(strm, yyNO_MATCH) +fun yyQ41 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"x" + then yyQ42(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ40 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"i" + then yyQ41(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ39 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"f" + then yyQ40(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ38 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"e" + then yyQ39(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ37 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"r" + then yyQ38(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ36 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction12(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"p" + then yyQ37(strm', yyMATCH(strm, yyAction12, yyNO_MATCH)) + else yyAction12(strm, yyNO_MATCH) + (* end case *)) +fun yyQ35 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"m" + then yyQ36(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ34 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"r" + then yyQ35(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ32 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"e" + then yyQ34(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ47 (strm, lastMatch : yymatch) = yyAction13(strm, yyNO_MATCH) +fun yyQ46 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"t" + then yyQ47(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ45 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"r" + then yyQ46(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ44 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"a" + then yyQ45(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ48 (strm, lastMatch : yymatch) = yyAction16(strm, yyNO_MATCH) +fun yyQ43 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"g" + then yyQ48(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ31 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"j" + then yystuck(lastMatch) + else if inp < #"j" + then if inp = #"i" + then yyQ43(strm', lastMatch) + else yystuck(lastMatch) + else if inp = #"t" + then yyQ44(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ57 (strm, lastMatch : yymatch) = yyAction15(strm, yyNO_MATCH) +fun yyQ56 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"x" + then yyQ57(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ55 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"i" + then yyQ56(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ54 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"f" + then yyQ55(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ53 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"e" + then yyQ54(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ52 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"r" + then yyQ53(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ51 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"p" + then yyQ52(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ50 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"e" + then yyQ51(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ49 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"l" + then yyQ50(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ30 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"u" + then yyQ49(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ29 (strm, lastMatch : yymatch) = yyAction2(strm, yyNO_MATCH) +fun yyQ18 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"s" + then yyQ31(strm', lastMatch) + else if inp < #"s" + then if inp = #"&" + then yystuck(lastMatch) + else if inp < #"&" + then if inp = #"%" + then yyQ29(strm', lastMatch) + else yystuck(lastMatch) + else if inp = #"r" + then yyQ30(strm', lastMatch) + else yystuck(lastMatch) + else if inp = #"{" + then yyQ33(strm', lastMatch) + else if inp < #"{" + then if inp = #"t" + then yyQ32(strm', lastMatch) + else yystuck(lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ17 (strm, lastMatch : yymatch) = yyAction0(strm, yyNO_MATCH) +fun yyQ16 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction3(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\n" + then yyAction3(strm, yyNO_MATCH) + else if inp < #"\n" + then if inp = #"\t" + then yyQ16(strm', yyMATCH(strm, yyAction3, yyNO_MATCH)) + else yyAction3(strm, yyNO_MATCH) + else if inp = #" " + then yyQ16(strm', yyMATCH(strm, yyAction3, yyNO_MATCH)) + else yyAction3(strm, yyNO_MATCH) + (* end case *)) +fun yyQ3 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yyAction3(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"-" + then if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yyAction3(strm, yyNO_MATCH) + else if inp < #"-" + then if inp = #"%" + then yyQ18(strm', yyMATCH(strm, yyAction3, yyNO_MATCH)) + else if inp < #"%" + then if inp = #"\v" + then if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yyAction3(strm, yyNO_MATCH) + else if inp < #"\v" + then if inp = #"\t" + then yyQ16(strm', yyMATCH(strm, yyAction3, yyNO_MATCH)) + else if inp = #"\n" + then yyQ17(strm', yyMATCH(strm, yyAction3, yyNO_MATCH)) + else if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yyAction3(strm, yyNO_MATCH) + else if inp = #" " + then yyQ16(strm', yyMATCH(strm, yyAction3, yyNO_MATCH)) + else if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yyAction3(strm, yyNO_MATCH) + else if inp = #")" + then yyQ20(strm', yyMATCH(strm, yyAction3, yyNO_MATCH)) + else if inp < #")" + then if inp = #"(" + then yyQ19(strm', yyMATCH(strm, yyAction3, yyNO_MATCH)) + else if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yyAction3(strm, yyNO_MATCH) + else if inp = #"," + then yyQ21(strm', yyMATCH(strm, yyAction3, yyNO_MATCH)) + else if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yyAction3(strm, yyNO_MATCH) + else if inp = #">" + then if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yyAction3(strm, yyNO_MATCH) + else if inp < #">" + then if inp = #";" + then yyQ24(strm', yyMATCH(strm, yyAction3, yyNO_MATCH)) + else if inp < #";" + then if inp = #"0" + then yyQ22(strm', yyMATCH(strm, yyAction3, yyNO_MATCH)) + else if inp < #"0" + then if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yyAction3(strm, yyNO_MATCH) + else if inp = #":" + then yyQ23(strm', yyMATCH(strm, yyAction3, yyNO_MATCH)) + else yyQ22(strm', yyMATCH(strm, yyAction3, yyNO_MATCH)) + else if inp = #"<" + then if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yyAction3(strm, yyNO_MATCH) + else yyQ25(strm', yyMATCH(strm, yyAction3, yyNO_MATCH)) + else if inp = #"a" + then yyQ26(strm', yyMATCH(strm, yyAction3, yyNO_MATCH)) + else if inp < #"a" + then if inp = #"A" + then yyQ26(strm', yyMATCH(strm, yyAction3, yyNO_MATCH)) + else if inp < #"A" + then if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yyAction3(strm, yyNO_MATCH) + else if inp <= #"Z" + then yyQ26(strm', yyMATCH(strm, yyAction3, yyNO_MATCH)) + else if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yyAction3(strm, yyNO_MATCH) + else if inp = #"|" + then yyQ27(strm', yyMATCH(strm, yyAction3, yyNO_MATCH)) + else if inp < #"|" + then if inp = #"{" + then if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yyAction3(strm, yyNO_MATCH) + else yyQ26(strm', yyMATCH(strm, yyAction3, yyNO_MATCH)) + else if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yyAction3(strm, yyNO_MATCH) + (* end case *)) +fun yyQ14 (strm, lastMatch : yymatch) = yyAction22(strm, yyNO_MATCH) +fun yyQ13 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction23(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #")" + then yyQ14(strm', yyMATCH(strm, yyAction23, yyNO_MATCH)) + else yyAction23(strm, yyNO_MATCH) + (* end case *)) +fun yyQ15 (strm, lastMatch : yymatch) = yyAction20(strm, yyNO_MATCH) +fun yyQ12 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction23(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"*" + then yyQ15(strm', yyMATCH(strm, yyAction23, yyNO_MATCH)) + else yyAction23(strm, yyNO_MATCH) + (* end case *)) +fun yyQ11 (strm, lastMatch : yymatch) = yyAction21(strm, yyNO_MATCH) +fun yyQ10 (strm, lastMatch : yymatch) = yyAction23(strm, yyNO_MATCH) +fun yyQ2 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"(" + then yyQ12(strm', lastMatch) + else if inp < #"(" + then if inp = #"\n" + then yyQ11(strm', lastMatch) + else yyQ10(strm', lastMatch) + else if inp = #"*" + then yyQ13(strm', lastMatch) + else yyQ10(strm', lastMatch) + (* end case *)) +fun yyQ9 (strm, lastMatch : yymatch) = yyAction27(strm, yyNO_MATCH) +fun yyQ8 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction28(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\n" + then yyAction28(strm, yyNO_MATCH) + else yyQ8(strm', yyMATCH(strm, yyAction28, yyNO_MATCH)) + (* end case *)) +fun yyQ1 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yyAction28(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\n" + then yyQ9(strm', yyMATCH(strm, yyAction28, yyNO_MATCH)) + else yyQ8(strm', yyMATCH(strm, yyAction28, yyNO_MATCH)) + (* end case *)) +fun yyQ4 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction26(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\n" + then yyAction26(strm, yyNO_MATCH) + else yyQ4(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + (* end case *)) +fun yyQ7 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction24(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\n" + then yyAction24(strm, yyNO_MATCH) + else yyQ4(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + (* end case *)) +fun yyQ6 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction26(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\v" + then yyQ4(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else if inp < #"\v" + then if inp = #"\n" + then yyAction26(strm, yyNO_MATCH) + else yyQ4(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else if inp = #"}" + then yyQ7(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else yyQ4(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + (* end case *)) +fun yyQ5 (strm, lastMatch : yymatch) = yyAction25(strm, yyNO_MATCH) +fun yyQ0 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yyAction26(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\v" + then yyQ4(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else if inp < #"\v" + then if inp = #"\n" + then yyQ5(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else yyQ4(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else if inp = #"%" + then yyQ6(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else yyQ4(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + (* end case *)) +in + (case (!(yyss)) + of DUMP => yyQ0(!(yystrm), yyNO_MATCH) + | POSTLUDE => yyQ1(!(yystrm), yyNO_MATCH) + | COMMENT => yyQ2(!(yystrm), yyNO_MATCH) + | INITIAL => yyQ3(!(yystrm), yyNO_MATCH) + (* end case *)) +end + end + in + continue() + handle IO.Io{cause, ...} => raise cause + end + in + lex + end + in + fun makeLexer yyinputN = mk (yyInput.mkStream yyinputN) + fun makeLexer' ins = mk (yyInput.mkStream ins) + end + + end diff --git a/ml-burg/burg.sml b/ml-burg/burg.sml new file mode 100644 index 0000000..b27810f --- /dev/null +++ b/ml-burg/burg.sml @@ -0,0 +1,1297 @@ +(* burg.sml + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * + * $Log$ + * Revision 1.2 2000/06/01 18:33:42 monnier + * bring revisions from the vendor branch to the trunk + * + * Revision 1.1.1.8 1999/04/17 18:56:04 monnier + * version 110.16 + * + * Revision 1.2 1997/10/28 15:02:45 george + * Made compatible with new basis + * +# Revision 1.1.1.1 1997/01/14 01:37:59 george +# Version 109.24 +# + * Revision 1.1.1.2 1997/01/11 18:52:29 george + * ml-burg Version 109.24 + * + * Revision 1.3 1996/06/03 17:48:15 jhr + * Changes to bring ML-Burg upto new SML/NJ library. + * + * Revision 1.2 1996/02/26 15:02:05 george + * print no longer overloaded. + * use of makestring has been removed and replaced with Int.toString .. + * use of IO replaced with TextIO + * + * Revision 1.1.1.1 1996/01/31 16:01:24 george + * Version 109 + * + *) + + +signature BURGEMIT = sig + exception BurgError + val emit : TextIO.instream * (unit -> TextIO.outstream) -> unit +end + + + +structure BurgEmit : BURGEMIT = + struct + + structure HashStringKey : HASH_KEY = struct + type hash_key = string + val hashVal = HashString.hashString + val sameKey = (op =) : string * string -> bool + end + structure BurgHash = HashTableFn (HashStringKey) + exception NotThere; (* raised by BurgHash.find *) + + exception BurgError (* for error reporting *) + + val inf = 16383 + + open BurgAST + + (* debugging *) + fun debug s = (TextIO.output (TextIO.stdErr, s); + TextIO.flushOut TextIO.stdErr) + + + (* Output functions *) + val s_out = ref TextIO.stdOut (* changed into the output stream *) + fun say s = TextIO.output (!s_out, s) + fun saynl s = say (s^"\n") + fun sayi s = say ("\t"^s) + fun sayinl s = say ("\t"^s^"\n") + + + fun arrayapp (function, array) = + let + val len = Array.length array + fun loop pos = + if pos=len then () + else + (function (Array.sub (array, pos)); loop (pos+1)) + in + loop 0 + end + + fun arrayiter (function, array) = + let + val len = Array.length array + fun loop pos = + if pos=len then () + else + (function (pos, Array.sub (array, pos)); loop (pos+1)) + in + loop 0 + end + + fun iter (function, n) = + let + fun loop pos = + if pos=n then () else (function pos; loop (pos+1)) + in + loop 0 + end + + fun listiter (function, lis) = + let + fun loop (pos, li) = + case li of + [] => () + | (l::ll) => (function (pos, l); loop ((pos+1), ll)) + in + loop (0, lis) + end + + exception NotSameSize + + fun exists2 (function, list1, list2) = + let + exception Found + fun loop ([],[]) = () + | loop (e1::l1,e2::l2) = + if function (e1,e2) then raise Found else loop (l1,l2) + | loop _ = raise NotSameSize + in + (loop (list1,list2); false) handle Found => true + end + + fun forall2 (f,l1,l2) = not (exists2 (not o f, l1, l2)) + + fun map2 (function, list1, list2) = + let + fun loop ([],[],acc) = rev acc + | loop (e1::l1,e2::l2,acc) = loop (l1,l2,(function(e1,e2))::acc) + | loop _ = raise NotSameSize + in + loop (list1,list2,[]) + end + + fun tofirstupper s = (case String.explode s + of [] => "" + | (c::r) => implode(Char.toUpper c :: (map Char.toLower r)) + (* end case *)) + + fun emit (s_in, oustreamgen) = + let + + (* + * Error reporting + *) + val error_encountered = ref false + fun warning s = (error_encountered := true; + TextIO.output (TextIO.stdErr, "Error: "^s^"\n"); + TextIO.flushOut TextIO.stdErr) + fun error s = (TextIO.output (TextIO.stdErr, "Error: "^s^"\n"); + TextIO.flushOut TextIO.stdErr; + raise BurgError) + fun stop_if_error () = if !error_encountered then raise BurgError else () + + (* + * ids (for hashing) : + * TERMINAL (internal terminal number, external terminal string/number) + * NONTERMINAL (internal nonterminal number) + *) + datatype ids = TERMINAL of int * string + | NONTERMINAL of int + + (* hash table type *) + type htt = ids BurgHash.hash_table + + (* + * rule_pat : + * NT (nonterminal) + * T (terminal, sons) + *) + datatype rule_pat = NT of int | T of int * rule_pat list + + (* + * rule + *) + type ern = string (* type for external rule name *) + type rule = {nt:int, pat:rule_pat, ern:ern, cost: int, num:int} + + + + (* hash table symbols *) + val HT = BurgHash.mkTable (60, NotThere) : htt + + (* hash table for rule names and the arity of the pattern *) + val HR = BurgHash.mkTable (60, NotThere) + : int BurgHash.hash_table + + + val start_sym = ref (NONE : string option) (* %start symbol *) + val start = ref 0 (* nonterminal where to start *) + + + val term_prefix = ref "" (* prefix for terminals *) + val rule_prefix = ref "" (* prefix for rules *) + val sig_name = ref "" (* BURM by default *) + val struct_name = ref "" (* Burm (first upper, rest lower) *) + + val nb_t = ref 0 (* current internal terminal number *) + val nb_nt = ref 0 (* current internal nonterminal number *) + + (* Return a new internal terminal number *) + fun gen_tnum () = !nb_t before (nb_t := !nb_t+1) + + (* Return a new internal nonterminal number *) + fun gen_ntnum () = !nb_nt before (nb_nt := !nb_nt+1) + + + (* + * Emit the header + *) + fun emit_header (SPEC {head, ...}) = app say head + + + (* + * Emit the tail + *) + fun emit_tail (SPEC {tail, ...}) = app say tail + + + (* + * Give each terminal an internal terminal number, + * and remember the external terminal number. + * Also, find start symbol. + *) + fun reparse_decls (SPEC {decls=decls, ...}) = + let + val t_prefix = ref (NONE : string option) + val r_prefix = ref (NONE : string option) + val s_name = ref (NONE : string option) + + fun newt (sym, etn') = + let + val etn = case etn' of + SOME str => str + | NONE => sym + in + case (BurgHash.find HT sym) : ids option of + NONE => BurgHash.insert HT (sym, TERMINAL (gen_tnum(), etn)) + | SOME _ => warning ("term "^sym^" redefined") + end + + fun newdecl (START s) = + (case !start_sym of + NONE => start_sym := (SOME s) + | (SOME _) => warning "%start redefined") + | newdecl (TERM l) = app newt l + | newdecl (TERMPREFIX tp) = + (case (!t_prefix) of + NONE => t_prefix := (SOME tp) + | _ => warning "%termprefix redefined") + | newdecl (RULEPREFIX rp) = + (case (!r_prefix) of + NONE => r_prefix := (SOME rp) + | _ => warning "%ruleprefix redefined") + | newdecl (SIG s) = + (case (!s_name) of + NONE => s_name := (SOME s) + | _ => warning "%sig redefined") + in + app newdecl decls; + if !nb_t=0 then error "no terminals !" else (); + term_prefix := + (case (!t_prefix) of + NONE => "" + | SOME tp => tp); + rule_prefix := + (case (!r_prefix) of + NONE => "" + | SOME rp => rp); + sig_name := + (case (!s_name) of + NONE => "BURM" + | SOME s => String.translate (String.str o Char.toUpper) s); + struct_name := tofirstupper (!sig_name) + end (* fun reparse_decls *) + + + fun get_id sym = + case (BurgHash.find HT sym) : ids option of + NONE => error ("symbol "^sym^" not declared") + | SOME id => id + + + (* + * Arrays that contain for each t or nt its external symbol. + *) + val sym_terminals = ref (Array.array (0,("",""))) + val sym_nonterminals = ref (Array.array (0,"")) + + + fun build_num_to_sym_arrays () = + let + fun store (sym, TERMINAL (t, etn)) = + Array.update (!sym_terminals, t, (sym, etn)) + | store (sym, NONTERMINAL nt) = + Array.update (!sym_nonterminals, nt, sym) + in + sym_terminals := Array.array (!nb_t, ("","")); + sym_nonterminals := Array.array (!nb_nt, ("")); + BurgHash.appi store HT + end + + fun get_ntsym nt = Array.sub (!sym_nonterminals, nt) + fun get_tsym t = #1 (Array.sub (!sym_terminals, t)) + + + fun reparse_rules (SPEC {rules=spec_rules, ...}) = + let + (* Arity for terminals. *) + val t_arity = Array.array (!nb_t, NONE : int option) + + fun newnt (RULE (ntsym, _, _, _)) = + case (BurgHash.find HT ntsym) : ids option of + NONE => BurgHash.insert HT (ntsym, NONTERMINAL (gen_ntnum ())) + | SOME (TERMINAL _) => + warning (ntsym^" redefined as a nonterminal") + | SOME (NONTERMINAL _) => () + + + val rule_num = ref 0 (* first rule is rule 1 *) + + fun newrule (RULE (ntsym, pattern, ern, costlist)) = + let + val num = (rule_num := !rule_num+1; !rule_num) + + val nt = + case BurgHash.find HT ntsym of + SOME (NONTERMINAL nt) => nt + | _ => error "internal : get nt" + + val cost = case costlist of [] => 0 | (c::_) => c + + val pat = + let + fun makepat (PAT (sym, sons)) = + case get_id sym of + NONTERMINAL nt => + (NT nt) before + (if (null sons) then () else + warning ("nonterminal "^sym^" is not a tree")) + | TERMINAL (t, _) => + let + val len = List.length sons + in + case Array.sub (t_arity, t) of + NONE => Array.update (t_arity, t, SOME len) + | SOME len' => if len=len' then () else + warning ("bad arity for terminal "^sym); + T (t, map makepat sons) + end + in + makepat pattern + end (* val pat *) + val patarity = + let + fun cnt (NT _, n) = n+1 + | cnt (T (_, pat), n) = + List.foldl cnt n pat + in + cnt (pat, 0) + end + in + case (BurgHash.find HR ern) of + NONE => BurgHash.insert HR (ern, patarity) + | SOME ar => if ar = patarity then () else + warning ("rulename "^ern^" is used with patterns of different arity"); + {nt=nt, pat=pat, ern=ern, cost=cost, num=num} + end (* fun newrule *) + + val _ = app newnt spec_rules + val _ = stop_if_error () + val _ = if !nb_nt=0 then error "no rules !" else () + val rules = Array.fromList (map newrule spec_rules) + val _ = stop_if_error () + val _ = build_num_to_sym_arrays () + val arity = Array.tabulate (!nb_t, (* terminals numbers begin at 0 *) + fn i => case Array.sub (t_arity, i) of + NONE => 0 before + (warning ("terminal "^(get_tsym i)^" unused")) + | SOME len => len) + val _ = stop_if_error () + in + (rules, arity) + end (* fun reparse_rules *) + + + fun print_intarray array = + let + fun printit (pos, n) = + (if pos>0 then say "," else (); + say (Int.toString n) + ) + in + arrayiter (printit, array) + end + + (* + * Print a rule. + *) + fun print_rule ({nt, pat, ern, cost, ...} : rule) = + let + fun print_sons [] = () + | print_sons [p] = print_pat p + | print_sons (p::pl) = + (print_pat p; say ","; print_sons pl) + and print_pat (NT nt) = say (get_ntsym nt) + | print_pat (T (t, sons)) = + (say (get_tsym t); + case (List.length sons) of + 0 => () + | len => (say "("; print_sons sons; say ")") + ) + in + say ((get_ntsym nt)^":\t"); + print_pat pat; + say ("\t= "^ern^" ("^(Int.toString cost)^");\n") + end + + + fun prep_rule_cons ({ern=ern, ...} : rule) = (!rule_prefix)^ern + + + fun prep_node_cons t = + let + val (sym, _) = Array.sub (!sym_terminals, t) + in + "N_"^sym + end + + + fun prep_term_cons t = (!term_prefix)^(#2 (Array.sub (!sym_terminals, t))) + + + (* + * rules_for_lhs : array with the rules for a given lhs nt + * chains_for_rhs : array with the chain rules for a given rhs nt + * rule_groups : + * (rl,ntl,str_for_match,uniqstr,iscst,iswot) list list array + * array of, for each terminal that begin a pattern + * list of, for each different "case of" + * list of, for each pattern in "case of" + * (rule list * ntl) list + * string for the match expression printing + * unique string for constant patterns + * is_cst (bool: is the pattern without nonterminals) + * is_wot (bool: is the pattern without terminals : A(x,y,z,t)) + *) + + fun build_rules_tables (rules : rule array) = + let + val rules_for_lhs = Array.array (!nb_nt, []:rule list) + val chains_for_rhs = Array.array (!nb_nt, []:rule list) + + fun add_lhs_rhs (rule as {nt,pat,...} : rule) = + (Array.update (rules_for_lhs, nt, + rule::(Array.sub (rules_for_lhs, nt))); + case pat of + NT rhs => Array.update (chains_for_rhs, rhs, + rule::(Array.sub (chains_for_rhs, rhs))) + | _ => () + ) + + + fun findntl (rule as {pat,...} : rule) = + let + fun flat (NT nt, ntl) = nt::ntl + | flat (T (_,sons), ntl) = List.foldr flat ntl sons + in + (rule, flat (pat,[])) + end + + + local + exception NotSamePat; + fun samepattern (NT _, NT _) = true + | samepattern (T (t1,spat1), T (t2, spat2)) = + if t1=t2 + then samepatternsons (spat1,spat2) + else raise NotSamePat + | samepattern _ = raise NotSamePat + and samepatternsons (l1,l2) = + if ((forall2 (fn (p1,p2) => samepattern (p1,p2), l1, l2)) + handle NotSameSize => raise NotSamePat) + then true + else raise NotSamePat + in + fun samepat (p1,p2) = + samepattern (p1,p2) handle NotSamePat => false + end + + fun clustersamepat (zap as ({pat,...}:rule, _), rg) = + let + fun loop ([],_) = (pat,[zap])::rg + | loop ((e as (p,zapl))::rest, acc) = + if samepat (p,pat) + then acc@((p,zap::zapl)::rest) (* don't keep order *) + else loop (rest,e::acc) + in + loop (rg, []) + end + + + fun minmaxcostlhss (pat,zapl) = + let + fun min (({cost,...}:rule,_), b) = if cost<=b then cost else b + fun max (({cost,...}:rule,_), b) = if cost>=b then cost else b + val mincost = List.foldl min inf zapl + val maxcost = List.foldl max ~1 zapl + fun addlhs (({nt=lhs,...}:rule,_), lhss) = + let + fun loop ([],_) = lhs::lhss + | loop (e as (i::il), acc) = + if lhs=i then lhss + else if lhst2 then raise (Forced NotUnif) else + (let + val sonsg = map2 (uniftype, spat1, spat2) + fun addson (NotUnif,_) = raise (Forced NotUnif) + | addson (_,NotUnif) = raise (Forced NotUnif) + | addson (NoMG,_) = NoMG + | addson (_,NoMG) = NoMG + | addson (SameG,x) = x + | addson (x,SameG) = x + | addson (FirstMG, FirstMG) = FirstMG + | addson (SecondMG, SecondMG) = SecondMG + | addson _ = NoMG + in + List.foldl addson SameG sonsg + end + handle NotSameSize => error "bug : uniftype") + in + fun unify (p1,p2) = (uniftype (p1,p2)) handle (Forced x) => x + end + + + (* "matches" is a list. Each elem is a list of (pat,...) + * in increasing order of minimum cost for the rl, and with + * either non-unifiable patterns, or with a pattern more general + * than another -- but only if the more general one is second, and + * it has a strictly higher cost, and all lhs of rules in the more + * general pattern are also lhs of some rules in the less general + * one (that is, if the less general rule matches, we lose + * nothing in not seeing the more general one). + * That's all. + *) + fun clustermatches (elem as (pat,_,mincost,maxcost,lhss), + matches) = + let + (* works on already (increasing,unique) ordered lists *) + fun subset ([],_) = true + | subset (_,[]) = false + | subset (a1 as (e1::l1),e2::l2) = + if e1=e2 then subset (l1,l2) + else if e1>(e2:int) then subset (a1,l2) + else false + datatype sowhat = ANOTHER | NOTU | AFTER | BEFORE of int + fun loop (prev, i, []) = prev + | loop (prev, i, (p,_,min,max,lh)::rest) = + case unify (pat,p) of + NotUnif => loop (prev,i+1,rest) + | NoMG => ANOTHER + | SameG => error "bug : clustermatches.SameG" + | FirstMG => + if mincost>(max:int) andalso subset (lhss,lh) + then + case prev of + NOTU => loop (AFTER,i+1,rest) + | AFTER => loop (AFTER,i+1,rest) + | BEFORE k => ANOTHER + | _ => error "bug : clustermatches.FirstMG" + else ANOTHER + | SecondMG => + if min>(maxcost:int) andalso subset (lh,lhss) + then + case prev of + NOTU => loop (BEFORE i,i+1,rest) + | AFTER => loop (BEFORE i,i+1,rest) + | BEFORE k => ANOTHER + | _ => error "bug : clustermatches.SecondMG" + else ANOTHER + fun insertat (0,prev,next,e) = (rev prev)@(e::next) + | insertat (n,prev,x::next,e) = insertat (n-1,x::prev,next,e) + | insertat (_,prev,[],e) = rev (e::prev) + fun try ([],_) = [elem]::matches + | try (l::ll,acc) = + case loop (NOTU,0,l) of + ANOTHER => try (ll,l::acc) + | NOTU => acc@((elem::l)::ll) (* don't keep order *) + | AFTER => acc@((l@[elem])::ll) + | BEFORE i => acc@((insertat (i,[],l,elem))::ll) + in + try (matches,[]) + end + + + val uniq_cnt = ref 0 + + fun compute (pat, rlntll, _, _, _) = + let + fun do_pat (NT nt, cnt, iswot) = + let val s = Int.toString cnt in + ("(s"^s^"_c,s"^s^"_r,_,_)", cnt+1, iswot) + end + | do_pat (T (t,sons), cnt, _) = + let + val (s,cnt',_) = do_sons (sons, cnt) + in + ("(_,_,"^(prep_node_cons t) + ^(if null sons then "" else + if null (tl sons) then s else + "("^s^")") + ^",_)" + , cnt', false) + end + and do_sons (sons,cnt) = + let + val (s,cnt,_,iswot) = + List.foldl (fn (pat,(s,cnt,first,iswot)) => + let + val (s',cnt',iswot') = + do_pat (pat,cnt,iswot) + in + (if first then s' else s^","^s', cnt', false, + iswot') + end + ) ("",cnt,true,true) sons + in (s,cnt,iswot) end + + val (string_for_match, iscst, iswot) = + case pat of + T (_,sons) => + let val (s,c,iswot) = do_sons (sons,0) + in (s,c=0,iswot) end + | NT _ => error "bug : string_for_match" + val uniqstr = Int.toString(!uniq_cnt) before (uniq_cnt := !uniq_cnt+1) + + in + (rlntll, string_for_match, uniqstr, iscst, iswot) + end + + val tgroup = Array.array (!nb_t, []:rule list) + + fun addt (rule as {pat,...} : rule) = + case pat of + T (t,_) => Array.update (tgroup, t, rule::(Array.sub (tgroup, t))) + | NT _ => () + val _ = arrayapp (addt,rules) + + fun eacht t = + let + val v1 = Array.sub (tgroup, t) + (* v1 : rule list *) + val v2 = map findntl v1 + (* v2 : (rule * ntl) list (= zap list) *) + val v3 = List.foldl clustersamepat [] v2 + (* v3 : (pattern * zap list) list *) + val v4 = map minmaxcostlhss v3 + (* v4 : (pattern * zap list * mincost * maxcost * lhss) list*) + val v5 = map clustersamentl v4 + (* v5 : same thing with (rule list * ntl) list (= rlntll) + instead of zap list *) + val v6 = List.foldl clustermatches [] v5 + (* v6 : (pattern * rlntll * min * max * lhss) list list *) + in + (* now, inside each subgroup, compute the elements *) + map (map compute) v6 + (* : (rlntll*str_for_match*uniqstr*iscst*iswot) list list *) + end + + val rule_groups = Array.tabulate (!nb_t, eacht) + in + arrayapp (add_lhs_rhs, rules); + (rules_for_lhs, chains_for_rhs, rule_groups) + end + + + (* + * Check that each nonterminal is reachable from start. + *) + fun check_reachable (start, rules_for_lhs : rule list array) = + let + val notseen = Array.array (!nb_nt, true) + fun explore_nt nt = + (Array.update (notseen, nt, false); + app (fn ({pat,...}:rule) => reach pat) + (Array.sub (rules_for_lhs, nt)) + ) + and reach (NT nt) = + if Array.sub (notseen, nt) then explore_nt nt else () + | reach (T (t, sons)) = app reach sons + fun test (nt, b) = + if b then + warning ("nonterminal "^(get_ntsym nt)^" is unreachable") + else () + in + explore_nt start; + arrayiter (test, notseen); + stop_if_error () + end + + + (** + ** Emit the code + **) + + fun emit_type_rule rules = + let + (* I just want a map, really, not a hashtable. *) + val H = BurgHash.mkTable (32, NotThere) : unit BurgHash.hash_table + val first = ref true + fun onerule (rule as {ern=ern, ...} : rule) = + let + val name = prep_rule_cons rule + in + case (BurgHash.find H name) of + NONE => + let + val patarity = + case (BurgHash.find HR ern) of + NONE => error "emit_type_rule, no rule name ?" + | SOME ar => ar + fun pr 0 = "" + | pr 1 = " of (rule * tree)" + | pr n = ((pr (n-1))^" * (rule * tree)") + val constructor = name^(pr patarity) + in + BurgHash.insert H (name, ()); + if !first then first := false else say "\t\t| "; + saynl constructor + end + | SOME _ => () + end + in + say " datatype rule = "; + arrayapp (onerule, rules) + end + + + + fun emit_ruleToString rules = let + val H : unit BurgHash.hash_table = BurgHash.mkTable(32,NotThere) + val first = ref true + fun onerule (rule as {ern,...}:rule) = let + val name = prep_rule_cons rule + in + case (BurgHash.find H name) + of NONE => let + val patarity = + case BurgHash.find HR ern + of NONE => error "emit_ruleToString.onerule" + | SOME ar => ar + fun pr 0 = "" + | pr _ = " _" + val constructor = "("^ name ^ (pr patarity) ^ ")" + in + BurgHash.insert H (name,()); + if !first then first:=false + else say " | ruleToString"; + say constructor; + saynl (" = " ^ "\"" ^ name ^ "\"") + end + | SOME _ => () + end + in + say " fun ruleToString "; + arrayapp (onerule,rules) + end + + + + fun emit_debug rules = + let + fun p_nterm (i, sym) = + saynl ("nonterm "^(Int.toString i)^" : "^sym) + fun p_rule (i, rule as {num, ...} : rule) = + (say ("rule "^(Int.toString num)^" : "); + print_rule rule + ) + in + saynl "(***** debug info *****"; + arrayiter (p_nterm, !sym_nonterminals); + say "\n"; + arrayiter (p_rule, rules); + saynl "**********************)\n\n" + end + + + fun emit_struct_burmterm () = + let + fun loop t = + (if t=0 then () else say "\t | "; + saynl (prep_term_cons t) + ) + in + saynl ("structure "^(!struct_name)^"Ops = struct"); + say " datatype ops = "; + iter (loop, !nb_t); + saynl "end\n\n" + end + + fun emit_sig_burmgen () = + (saynl ("signature "^(!sig_name)^"_INPUT_SPEC = sig"); + saynl " type tree"; + saynl (" val opchildren : tree -> "^(!struct_name) + ^"Ops.ops * (tree list)"); + saynl "end\n\n" + ) + + fun emit_sig_burm rules = + (saynl ("signature "^(!sig_name)^" = sig"); + saynl " exception NoMatch"; + saynl " type tree"; + emit_type_rule rules; + saynl " val reduce : tree -> rule * tree"; + saynl " val ruleToString : rule -> string"; + saynl "end\n\n" + ) + + fun emit_beg_functor (rules, arity) = + let + fun loop_node t = + let + val ar = Array.sub (arity, t) + fun loop_sons i = + (say "s_tree"; + if i=ar then () else + (say " * "; loop_sons (i+1)) + ) + in + say (if t=0 then " " else " | "); + say (prep_node_cons t); + if ar>0 then + (say "\t\tof "; + loop_sons 1 + ) + else (); + say "\n" + end + in + saynl ("functor "^(!struct_name)^"Gen (In : " + ^(!sig_name)^"_INPUT_SPEC) : "^(!sig_name)^" ="); + saynl " struct\n"; + saynl " type tree = In.tree\n"; + saynl " exception NoMatch"; + emit_type_rule rules; + say "\n\n"; + emit_ruleToString rules; say "\n\n"; + saynl " type s_cost = int Array.array"; + saynl " type s_rule = int Array.array"; + saynl " datatype s_node ="; + iter (loop_node, !nb_t); + saynl " withtype s_tree = s_cost * s_rule * s_node * tree\n\n"; + saynl " val sub = Array.sub"; + saynl " val update = Array.update" + end + + + fun emit_val_cst (rules, arity, chains_for_rhs, rule_groups) = + let + fun do_cstrule (t, rlntll: (rule list * int list) list, + uniqstr, iscst) = + if iscst then + let + val ar = Array.sub (arity, t) + val a_cost = Array.array (!nb_nt, inf); + val a_rule = Array.array (!nb_nt, 0); + + fun record ({nt=lhs, cost, num, ...} : rule, c) = + let + val cc = c + cost + in + if cc < (Array.sub (a_cost, lhs)) then + (Array.update (a_cost, lhs, cc); + Array.update (a_rule, lhs, num); + app (fn rule => record (rule, cc)) + (Array.sub (chains_for_rhs, lhs)) + ) + else () + end + in + app ((app (fn rule => record (rule, 0))) o #1) rlntll; + if ar=0 then + (saynl (" val leaf_"^(prep_node_cons t)^" ="); + say " (Array.fromList ["; + print_intarray a_cost; + say "],\n Array.fromList ["; + print_intarray a_rule; + saynl ("],\n "^(prep_node_cons t)^")") + ) + else + (say (" val cst_cost_"^uniqstr^" = Array.fromList ["); + print_intarray a_cost; + saynl "]"; + say (" val cst_rule_"^uniqstr^" = Array.fromList ["); + print_intarray a_rule; + saynl "]" + ) + end + else () + + fun do_cstrules (t, ll) = + app (app (fn (rlntll,_,uniqstr,iscst,_) => + do_cstrule (t, rlntll, uniqstr, iscst))) ll + val n = Int.toString (!nb_nt) + val sinf = Int.toString inf + in + arrayiter (do_cstrules, rule_groups); + saynl (" val s_c_nothing = Array.array ("^n^","^sinf^")"); + saynl (" val s_r_nothing = Array.array ("^n^",0)"); + say "\n\n" + end + + + fun emit_label_function (rules, arity, chains_for_rhs, rule_groups) = + let + val firstcl = ref true + fun emit_closure (nt, rl : rule list) = + let + val firstrule = ref true + fun emit_cl ({nt=lhs, cost, num, ...} : rule) = + let + val c = Int.toString cost + val slhs = Int.toString lhs; + in + if !firstrule + then firstrule := false + else say ";\n\t "; + saynl ("if c + "^c^" < sub (s_c,"^slhs^") then"); + sayinl (" (update (s_c,"^slhs^",c + "^c^");"); + sayi (" update (s_r,"^slhs^","^(Int.toString num) + ^")"); + if null (Array.sub (chains_for_rhs, lhs)) then () else + say (";\n\t closure_"^(get_ntsym lhs) + ^" (s_c, s_r, c + "^c^")"); + saynl "\n\t )"; + sayinl " else"; + sayi " ()" + end + in + if null rl then () else + (if !firstcl then + (firstcl := false; say "\tfun") else say "\tand"; + saynl (" closure_"^(get_ntsym nt)^" (s_c, s_r, c) ="); + sayi " ("; + List.app emit_cl rl; + saynl "\n\t )" + ) + end + + + val nbnt = Int.toString (!nb_nt) + val sinf = Int.toString inf + val firstmatch = ref true + + fun emit_match t = + let (* "(" *) + val ar = Array.sub (arity, t) + + fun inlistofsons i = (say ("t"^(Int.toString i)); + if i=(ar-1) then () else say ",") + + fun listofsons () = + (say " ("; iter (inlistofsons, ar); say ")") + + val firstcst = ref true + fun emit_match_cst (_,str,uniq,iscst,_) = + if iscst then + (if !firstcst + then (say "\t "; firstcst := false) + else say "\t | "; + saynl ("("^str^") =>"); + sayinl ("\t (cst_cost_"^uniq^", cst_rule_"^uniq^")") + ) + else () + + + + val firstcase = ref true + val firstcaseelem = ref true + fun emit_match_case (rlntll,str,uniq,iscst,iswot) = + if iscst then () else + (if !firstcase then + (firstcase := false; + saynl "z =>"; + sayinl "\tlet"; + sayinl ("\t val s_c = Array.array (" + ^nbnt^","^sinf^")"); + sayinl ("\t val s_r = Array.array (" + ^nbnt^",0)"); + sayinl "\tin") + else (); + if !firstcaseelem then + (firstcaseelem := false; + sayinl "\tcase z of"; + sayi "\t ") + else sayi "\t | "; + saynl ("("^str^") =>"); + sayinl "\t ("; + let + fun dorules (rl : rule list, ntl) = + let + fun dorule ({nt=lhs, num, cost, ...} : rule) = + let + val slhs = Int.toString lhs + val c = Int.toString cost + in + sayinl ("\t\t if c + "^c^" < sub (s_c,"^slhs + ^") then"); + sayinl ("\t\t (update (s_c, "^slhs + ^", c + "^c^");"); + sayinl ("\t\t update (s_r, "^slhs + ^", "^(Int.toString num)^");"); + if null (Array.sub (chains_for_rhs, lhs)) then () + else sayinl ("\t\t closure_" + ^(get_ntsym lhs) + ^" (s_c, s_r, c + "^c^");"); + sayinl "\t\t ())"; + sayinl "\t\t else ();" + end + in + sayi "\t if "; + listiter ((fn (i, nt) => + (if i=0 then () else say "andalso "; + say ("sub (s"^(Int.toString i)^"_r," + ^(Int.toString (nt:int)) + ^")<>0 "))), + ntl); + saynl "then"; + sayinl "\t\t let"; + sayi ("\t\t val c = "); + listiter ((fn (i, nt) => + (if i=0 then () else say " + "; + say ("sub (s"^(Int.toString i)^"_c," + ^(Int.toString (nt:int))^")"))), + ntl); + saynl "\n\t\t\t in"; + app dorule rl; + sayinl "\t\t ()"; + sayinl "\t\t end"; + sayinl "\t else ();" + end + in + app dorules rlntll + end; + sayinl "\t ()"; + sayinl "\t )" + ) (* fun emit_match_case *) + + in (* ")(" fun emit_match *) + + if !firstmatch + then (sayi " "; firstmatch := false) + else sayi "| "; + say ((!struct_name)^"Ops."); + saynl ((prep_term_cons t)^" =>"); + + if ar=0 then (* leaf term *) + if null (Array.sub (rule_groups, t)) + then sayinl (" (s_c_nothing, s_r_nothing, " + ^(prep_node_cons t)^")") + else sayinl (" leaf_"^(prep_node_cons t)) + else (* ar<>0 *) + let + val group = Array.sub (rule_groups, t) + fun dosamecase eleml = + (firstcaseelem := true; + app emit_match_case eleml; + if (not (!firstcaseelem) andalso + not (List.exists (fn (_,_,_,_,iswot) => iswot) eleml)) + then sayinl "\t | _ => ()" else (); + if (not (!firstcaseelem)) then sayinl "\t ;" else () + ) + in + sayinl " let"; + sayi " val ["; + iter (inlistofsons, ar); + saynl "] = map rec_label children"; + sayinl " in"; + if null group then (* transfert rule *) + (sayi " (s_c_nothing, s_r_nothing, "; + say (prep_node_cons t); + listofsons (); + saynl ")" + ) + else + (sayi " let val (s_c, s_r) = case"; + listofsons (); + saynl " of"; + app (app emit_match_cst) group; + sayi (if !firstcst then "\t " else "\t | "); + app dosamecase group; + if !firstcase then + saynl "_ => (s_c_nothing, s_r_nothing)" + else + (sayinl "\t (s_c, s_r)"; + sayinl "\tend" + ); + sayi " in (s_c, s_r, "; + say (prep_node_cons t); + listofsons (); + saynl ") end" + ); + sayinl " end" + end + + end (* ")" fun emit_match *) + + + in + saynl " fun rec_label (tree : In.tree) ="; + saynl " let"; + arrayiter (emit_closure, chains_for_rhs); + sayinl "val (term, children) = In.opchildren tree"; + sayinl "val (s_c, s_r, t) = case term of"; + iter (emit_match, !nb_t); + saynl " in"; + saynl " (s_c, s_r, t, tree)"; + saynl " end\n" + end + + + fun emit_reduce_function (rules) = + let + val firstmatch = ref true + + fun domatch (rule as {num, pat, ...} : rule) = + let + fun flatsons (the_sons, cnt, ntl) = + List.foldl + (fn (patson, (b, c, l, ss)) => + let + val (c', l', ss') = flat (patson, c, l) + in + (false, c', l', (if b then ss' else ss^","^ss')) + end) + (true, cnt, ntl, "") + the_sons + and flat (pat, cnt, ntl) = + case pat of + NT nt => (cnt+1, nt::ntl, "t"^(Int.toString cnt)) + | T (t, sons) => + let + val len = List.length sons + val (_, cnt', ntl', s') = flatsons (sons, cnt, ntl) + val nexts = + "(_,_,"^(prep_node_cons t) + ^(if len=0 then "" else + (if len=1 then " "^s' else " ("^s'^")")) + ^",_)" + in + (cnt', ntl', nexts) + end + + val (cnt, ntl, s) = flat (pat, 0, []) + val ntl = rev ntl + in + if !firstmatch then (firstmatch := false; say "\t\t(") else + say "\t | ("; + saynl ((Int.toString num)^", "^s^") =>"); + sayi ("\t ("^(prep_rule_cons rule)); + case pat of + NT nt => say (" (doreduce (t0,"^(Int.toString nt)^"))") + | T (t, _) => + (case List.length ntl of + 0 => () + | _ => + (say " ("; + listiter ((fn (i,nt) => + (if i=0 then () else say ", "; + say ("doreduce (t"^(Int.toString i)^"," + ^(Int.toString nt)^")"))), + ntl); + say ")") + ); + saynl ")" + end + in + saynl " fun doreduce (stree : s_tree, nt) ="; + saynl " let"; + sayinl "val (s_c, s_r, _, tree) = stree"; + sayinl "val cost = sub (s_c, nt)"; + saynl " in"; + +sayinl ("if cost="^(Int.toString inf)^" then"); +sayinl (" (print (\"No Match on nonterminal \"^(Int.toString nt)^\"\\n\");"); +sayinl (" print \"Possibilities were :\\n\";"); +sayinl (" let"); +sayinl (" fun loop n ="); +sayinl (" let"); +sayinl (" val c = Array.sub (s_c, n);"); +sayinl (" val r = Array.sub (s_r, n);"); +sayinl (" in"); +sayinl (" if c=16383 then () else"); +sayinl (" print (\"rule \"^(Int.toString r)^\" with cost \""); +sayinl (" ^(Int.toString c)^\"\\n\");"); +sayinl (" loop (n+1)"); +sayinl (" end"); +sayinl (" in"); +sayinl (" (loop 0) handle General.Subscript => ()"); +sayinl (" end;"); +sayinl (" raise NoMatch)"); +sayinl ("else"); + + + sayinl " let"; + sayinl " val rulensons ="; + sayinl " case (sub (s_r, nt), stree) of"; + arrayapp (domatch, rules); + sayinl " | _ => raise NoMatch (* bug in iburg *)"; + sayinl " in"; + sayinl " (rulensons, tree)"; + sayinl " end"; + saynl " end\n" + end + + + fun emit_end_functor (start : int) = + (saynl " fun reduce (tree) ="; + saynl (" doreduce (rec_label (tree), "^(Int.toString start)^")"); + saynl " end\n\n" + ) + + in + let + val spec = #1 (Parse.parse s_in) before TextIO.closeIn s_in + val _ = reparse_decls spec + val (rules, arity) = reparse_rules spec + val start = + case !start_sym of + NONE => 0 + | SOME sym => + case get_id sym of + TERMINAL _ => error ("cannot start on a terminal") + | NONTERMINAL n => n + (* rule numbers for each nonterminal (array) *) + val (rules_for_lhs, chains_for_rhs, rule_groups) + = build_rules_tables rules + in + check_reachable (start, rules_for_lhs); + s_out := (oustreamgen ()); + emit_header (spec); + emit_debug (rules); + emit_struct_burmterm (); + emit_sig_burmgen (); + emit_sig_burm (rules); + emit_beg_functor (rules, arity); + emit_val_cst (rules, arity, chains_for_rhs, rule_groups); + emit_label_function (rules, arity, chains_for_rhs, rule_groups); + emit_reduce_function (rules); + emit_end_functor (start); + emit_tail (spec); + TextIO.closeOut (!s_out) + end + end (* fun emit *) + + end + diff --git a/ml-burg/doc/code.sty b/ml-burg/doc/code.sty new file mode 100644 index 0000000..ec76501 --- /dev/null +++ b/ml-burg/doc/code.sty @@ -0,0 +1,336 @@ +% code.sty: -*- latex -*- +% Latex macros for a "weak" verbatim mode. +% -- like verbatim, except \, {, and } have their usual meanings. + +% Environments: code, tightcode, codeaux, codebox, centercode +% Commands: \dcd, \cddollar, \cdmath, \cd, \codeallowbreaks, \codeskip, \^ +% Already defined in LaTeX, but of some relevance: \#, \$, \%, \&, \_, \{, \} + +% Changelog at the end of the file. + +% These commands give you an environment, code, that is like verbatim +% except that you can still insert commands in the middle of the environment: +% \begin{code} +% for(x=1; x] option, then the following newline will +% be read *after* ^M is bound to \cr, so we're cool. If there isn't +% an option given (i.e., default to [c]), then the @\ifnextchar will +% gobble up the newline as it gobbles whitespace. So we insert the +% \cr explicitly. Isn't TeX fun? +\def\codebox{\leavevmode\@ifnextchar[{\@codebox}{\@codebox[c]\cr}} %] + +\def\@codebox[#1]% + {\hbox\bgroup$\if #1t\vtop \else \if#1b\vbox \else \vcenter \fi\fi\bgroup% + \tabskip\z@\setupsmallcode\cd@obeycr% just before cd@obey + \halign\bgroup##\hfil\span} + +\def\endcodebox{\crcr\egroup\egroup\m@th$\egroup} + +% Center the box on the page: +\newenvironment{centercode}% + {\begin{center}\begin{codebox}[c]}% + {\end{codebox}\end{center}} + + +%% code, codeaux, tightcode +%%============================================================================= +%% Code environment as described above. Lines are kept on one page. +%% This actually works by setting a huge penalty for breaking +%% between lines of code. Code is indented same as other displayed paras. +%% Note: to increase left margin, use \begin{codeaux}{\leftmargin=1in}. + +% To allow pagebreaks, say \codeallowbreaks immediately inside the env. +% You can allow breaks at specific lines with a \pagebreak form. + +%% N.B.: The \global\@ignoretrue command must be performed just inside +%% the *last* \end{...} before the following text. If not, you will +%% get an extra space on the following line. Blech. + +%% This environment takes two arguments. +%% The second, required argument is the \list parameters to override the +%% \@listi... defaults. +%% - Usefully set by clients: \topsep \leftmargin +%% - Possible, but less useful: \partopsep +%% The first, optional argument is the extra \parskip glue that you get around +%% \list environments. It defaults to the value of \parskip. +\def\codeaux{\@ifnextchar[{\@codeaux}{\@codeaux[\parskip]}} %] +\def\@codeaux[#1]#2{% + \bgroup\parskip#1% + \begin{list}{}% + {\parsep\z@\leftmargin=\codeindent\rightskip\z@\listparindent\z@\itemindent\z@#2}% + \item[]\setupsmallcode\cd@obeylines}% +\def\endcodeaux{\end{list}\leavevmode\egroup\ignorespaces\global\@ignoretrue} + +%% Code env is codeaux with the default margin and spacing \list params: +\def\code{\codeaux{}} \let\endcode=\endcodeaux + +%% Like code, but with no extra vertical space above and below. +\def\tightcode{\codeaux[=0pt]{\topsep\z@}}% +\let\endtightcode\endcodeaux +% {\vspace{-1\parskip}\begin{codeaux}{\partopsep\z@\topsep\z@}}% +% {\end{codeaux}\vspace{-1\parskip}} + + + +% Reasonable separation between lines of code +\newcommand{\codeskip}{\penalty0\vspace{2ex}} + + +% \cd is used to build a code environment in the middle of text. +% Note: only difference from display code is that cr's are taken +% as unbreakable spaces instead of linebreaks. + +\def\cd{\leavevmode\begingroup\ifmmode\let\startcode=\startmcode\else% + \let\startcode\starttcode\fi% + \setupcode\cd@obeycrsp\startcode} + +\def\cdm{\leavevmode\begingroup\ifmmode\let\startcode=\startmcode\else% + \let\startcode\starttcode\fi% + \setupcode\cd@obeycrsp\cd@mathspecial\startcode} + +\def\starttcode#1{#1\endgroup} +\def\startmcode#1{\hbox{#1}\endgroup} + + +% Restore $&#^_~% to their normal catcodes +% Define \^ to give the ^ char. +% \dcd points to this guy inside a code env. +\def\cd@dcd{\catcode`\$=3\catcode`\&=4\catcode`\#=6\catcode`\^=7% + \catcode`\_=8\catcode`\~=13\catcode`\%=14\def\^{\char`\^}} + +% Selectively enable $, and $^_ as special. +% \cd@mathspecial also defines \^ give the ^ char. +% \cddollar and \cdmath point to these guys inside a code env. +\def\cd@dollarspecial{\catcode`\$=3} +\def\cd@mathspecial{\catcode`\$=3\catcode`\^=7\catcode`\_=8% + \def\^{\char`\^}} + + +% Change log: +% Started off as some macros found in C. Rich's library. +% Olin 1/90: +% Removed \makeatletter, \makeatother's -- they shouldn't be there, +% because style option files are read with makeatletter. The terminal +% makeatother screwed things up for the following style options. +% Olin 3/91: +% Rewritten. +% - Changed things so blank lines don't get compressed out (the \leavevmove +% in \cd@cr and \cd@crwb). +% - Changed names to somewhat less horrible choices. +% - Added lots of doc, so casual hackers can more easily mess with all this. +% - Removed `'"@ from the set of hacked chars, since they are already +% non-special. +% - Removed the bigcode env, which effect can be had with the \codeallowbreaks +% command. +% - Removed the \@noligs command, since it's already defined in latex.tex. +% - Win big with the new \dcd, \cddollar, and \cdmath commands. +% - Now, *only* the chars \{} are special inside the code env. If you need +% more, use the \dcd command inside a group. +% - \cd now works inside math mode. (But if you use it in a superscript, +% it still comes out full size. You must explicitly put a \scriptsize\tt +% inside the \cd: $x^{\cd{\scriptsize\tt...}}$. A \leavevmode was added +% so that if you begin a paragraph with a \cd{...}, TeX realises you +% are starting a paragraph. +% - Added the codebox env. Tricky bit involving the first line hacked +% with help from David Long. +% +% JHR 8/19/91: +% - Added \setupsmallcode to use in multi-line code displays (code, codeaux and +% codebox environments). +% +% JHR 8/31/91: +% - changed size of small code to \small (from \footnotesize). Also added +% code to set the baselinestretch to 1 in smallcode. +% +% JHR 9/12/91: +% - added \codeindent (set to \parindent) +% +% JHR 11/19/91 +% - added \cdm{} command for supporting math mode in \cd{} diff --git a/ml-burg/doc/doc.bib b/ml-burg/doc/doc.bib new file mode 100644 index 0000000..15c8230 --- /dev/null +++ b/ml-burg/doc/doc.bib @@ -0,0 +1,39 @@ +@InBook{emmelmann-92 + ,author={Emmelmann, H.} + ,key={E92} + ,title={Testing completeness of code selector specifications} + ,booktitle={{LNCS} 641, Compiler Construction} + ,pages={163-175} + ,publisher={Springer-Verlag} + ,year={1992} +} + +@InProceedings{fraser-hanson-proebsting-92 + ,author={Fraser, C. W. and Hanson, D. R. and Proebsting, T. A.} + ,key={FHP92} + ,title={Engineering a simple, efficient code generator generator} + ,organization={ACM} + ,booktitle={Letters on Programming Languages and Systems} + ,year={1992} +} + +@Article{balachandran-dhamdhere-biswas-90 + ,author={Balachandran, A. and Dhamdhere, D. M. and Biswas, S.} + ,key={BDB90} + ,title={Efficient retargetable code generation using bottom-up tree pattern matching} + ,journal={Computer Languages} + ,volume={15(3)} + ,pages={127-140} + ,year={1990} +} + +@InProceedings{proebsting-pldi92 + ,author={Proebsting, T. A.} + ,key={P92} + ,title={Simple and Efficient BURS Table Generation} + ,organization={ACM} + ,booktitle={{SIGPLAN} '92 Conf. on Programming Language Design and Implementation} + ,pages={331-340} + ,month={June} + ,year={1992} +} diff --git a/ml-burg/doc/doc.tex b/ml-burg/doc/doc.tex new file mode 100644 index 0000000..4021f3b --- /dev/null +++ b/ml-burg/doc/doc.tex @@ -0,0 +1,479 @@ +% Documentation for mlburg + +\documentclass[11pt]{article} + +\usepackage{graphicx} +\usepackage{url} +\usepackage{code} + +\begin{document} +\parskip 10pt +\parindent 0in + +\newcommand{\mlburg}{ML-Burg} +\newcommand{\burmgen}{\cd{BurmGen}} +\newcommand{\figureRef}[1]{\mbox{Figure\ \ref{#1}}} + + +\title{\mlburg\ --- Documentation} +\author{\begin{tabular}[t]{c@{\extracolsep{4em}}c} +\ \\ +Florent Guillaume & Lal George \\ +\ \\ + \'Ecole Normale Sup\'erieure & Room 2A-426 \\ + 45, rue d'Ulm & AT\&T Bell Laboratories \\ + 75005 Paris, France & Murray Hill, NJ 07922 \\ +\url{Florent.Guillaume@ens.fr} & \url{george@research.att.com} +\end{tabular}} +\date{June 23, 1993} +\maketitle +\begin{center} +\copyright 1993 L. George, F. Guillaume. +\end{center} + + \section{Introduction} + +\mlburg\ is a Standard ML version of the \cd{iburg} +tool developed by Fraser, Hanson and +Proebsting \cite{fraser-hanson-proebsting-92}. \mlburg\ generates +a Standard ML program to perform bottom-up rewriting of an input tree. +Cost information associated with each rewrite rule is used to derive +the minimum rewrite cost for the entire tree. A successful reduction +corresponds to rewriting the input tree to a special non-terminal +symbol called the {\em start non-terminal}. Upon successful reduction, +facilities are provided to walk the tree emitting semantic actions +corresponding to the rules that matched. + +Like \cd{iburg}, \mlburg\ generates a program that consists of a +large case statement. Indeed, the \cd{i} in \cd{iburg} was meant to +indicate \mbox{{\em interpreted-}burg} to distinguish it from +table driven implementations of similar +tools \cite{balachandran-dhamdhere-biswas-90,proebsting-pldi92}. +We arbitrarily decided to drop the \cd{i} (no pun intended). + +Given a system of rewrite rules augmented with costs, called the {\em +\mlburg\ specification}, \mlburg\ generates the following: + +\begin{itemize} + \item\cd{signature BURM_INPUT_SPEC} + \item\cd{signature BURM} + \item\cd{structure BurmOps} + \item\cd{functor BurmGen(In : BURM_INPUT_SPEC) : BURM} +\end{itemize} + +The signature \cd{BURM_INPUT_SPEC} specifies utilities over the +user supplied input +tree. The required matcher is obtained by applying the functor +\cd{BurmGen} to a structure matching \cd{BURM_INPUT_SPEC}. + + + + \section{ML-Burg specifications} + +\figureRef{f:specification} shows the extended BNF grammar for +\mlburg\ specifications. Grammar symbols are {\it italicized}, terminals +are in {\tt typewriter} font, \{{\it X}\} represents zero or more +occurrences of {\it X}, {\rm [}{\it X}{\rm ]} means {\it X} is optional, +{\sl cost} is a non-negative integer, {\sl trailer} and {\sl header} +are arbitrary pieces of text, and everything else is an identifier. +An identifier is a leading alphabet followed by zero or more +alphanumeric characters and underscores. Comments are delimited +by {\tt (*}, and {\tt *)}. + +A specification consists of three parts: {\it declaration}, {\it rule}, +and {\sl trailer}, separated by \verb|%%|. + + + +\begin{figure}[t] +\small + \begin{center} + \sl + \begin{tabular}{lcl} + spec & $\rightarrow$ & {\it declaration} + \verb.%%. \{{\it rule}\} + \verb.%%. trailer \\[1ex] + + {\it declaration} + & $\rightarrow$ & {\tt \%term} {\it op} \{\verb.|. {\it op}\} \\ + & $\mid$ & {\tt \%start} nonterminal \\ + & $\mid$ & {\tt \%termprefix} termprefix \\ + & $\mid$ & {\tt \%ruleprefix} ruleprefix \\ + & $\mid$ & {\tt \%sig} signame \\ + & $\mid$ & \verb.%{. header \verb.%}. \\[1ex] + + {\it op} + & $\rightarrow$ & operator {\rm [}{\tt =} opname{\rm ]} \\[1ex] + + {\it rule} + & $\rightarrow$ & nonterminal \verb.:. {\it tree} \verb.=. + rulename {\rm [}{\tt (~}cost{\tt ~)}{\rm ]} + \verb.;. \\[1ex] + + {\it tree} + & $\rightarrow$ & nonterminal \\[1ex] + & $\mid$ & operator {\rm [}{\tt (} + {\it tree}\{\verb.,.{\it tree}\}{\tt ~)}{\rm ]} + \end{tabular} + \end{center} +\caption{EBNF \mlburg\ specifications} + +\label{f:specification} +\end{figure} + +A {\tt \%term} declaration enumerates the operators or function symbols +used to construct nodes of the tree. There must be at least one {\tt +\%term} declaration for a valid specification. The {\tt \%start} +declaration, which defaults to the left hand side {\sl nonterminal} of the +first rule, declares the {\em start non-terminal}. The {\sl header} part is +text that is included verbatim at the beginning of the matcher. The names +of the modules generated by \mlburg\ may be changed by using a {\tt +\%sig} declaration (case of {\sl signame} is not significant). For example +if we had a line ``\verb.%sig glop.'' in the declarations, the generated +names would be {\tt GlopOps}, {\tt GLOP\_INPUT\_SPEC}, {\tt GLOP} and {\tt +GlopGen}. This allows for multiple matchers in the same program. + +\begin{figure} +\small +\begin{centercode} +%term ASGNI | ADDI | CVCI | INDIRC | I0I | ADDRLP | CNSTI +%termprefix T_ +%start stmt +%% +stmt: ASGNI(disp,reg) = stmt_ASGNI_disp_reg (1); +stmt: reg = stmt_reg; +reg: ADDI(reg,rc) = reg_ADDI_reg_rc (1); +reg: CVCI(INDIRC(disp)) = reg_CVCI_INDIRC_disp (1); +reg: I0I = reg_I0I; +reg: disp = reg_disp (1); +disp: ADDI(reg,con) = disp_ADDI_reg_con; +disp: ADDRLP = disp_ADDRLP; +rc: con = rc_con; +rc: reg = rc_reg; +con: CNSTI = con_CNSTI; +con: I0I = con_I0I; +%% +\end{centercode} + \caption{Example of \mlburg\ specification.} + \label{f:ex_spec} +\end{figure} + +The rule-part of the specification (following the first {\tt \%\%}) +describes the tree grammar or the rewrite rule system to use. The +\mbox{{\sl nonterminal} {\tt :} {\it tree}} specification can be viewed a +rewrite rule of the form, \mbox{{\sl nonterminal} $\leftarrow$ {\it tree}}. +Each {\sl operator} used in a tree must be mentioned in a {\tt +\%term} declaration. The special case of \mbox{\sl nonterminal +$\leftarrow$ nonterminal}, specifies a chain rule. Associated with +each rule is an optional cost that defaults to zero. The {\sl +rulename}, which is not necessarily unique, is used to identify the +rule during the emission of semantic actions. It is important to note +that the same {\sl rulename} may be associated with multiple rules. + +The {\sl trailer} is an arbitrary piece of text that is inserted at +the end of the generated matcher. This is typically segments of +program that will perform the semantic actions. + +\figureRef{f:ex_spec} show a sample specification taken from +\cite{fraser-hanson-proebsting-92}. The {\tt \%termprefix}, and {\tt +\%ruleprefix} are explained in subsequent sections. + + + \section{Interface between the matcher and the program} + + \subsection{structure BurmOps} + +The \cd{structure BurmOps} declares a type \cd{ops} that enumerates +the operators or functions symbols specified in {\tt \%term} +declarations of the specification. The matcher cannot extract the +operator from the user supplied tree, or establish a correspondence +between nodes in the tree and operators in the specification. + +In the example above (\figureRef{f:ex_spec}), the user may have +defined the tree to be: +\begin{code} + datatype tree = ... + | CNSTI of int + ... +\end{code} +The data constructor \cd{CNSTI} is of arity 1, whereas, in the +specification it is used with arity 0. + +For the example, the generated structure would be: + +\begin{code} + structure BurmOps = struct + datatype ops = + T_ASGNI + | T_ADDI + | T_CVCI + | T_INDIRC + | T_I0I + | T_ADDRLP + | T_CNSTI + end +\end{code} + +The {\tt \%termprefix}, if specified, is used to prepend the {\sl +termprefix} to each operator. If the optional {\sl {\tt =}opname} is +specified with the operator, then {\sl opname} is used in the +datatype \cd{ops} instead of the operator. + + \subsection{signature BURM\_INPUT\_SPEC} + +The \cd{signature BURM_INPUT_SPEC}, shown below, specifies the +interface to the user supplied input tree. +\begin{code} + signature BURM_INPUT_SPEC = sig + type tree + val opchildren : tree -> BurmOps.ops * (tree list) + end +\end{code} +It contains: +\begin{itemize} + \item The type {\tt tree} of trees on which the program operates. + + \item A function {\tt opchildren} which takes a {\tt tree} and returns the +operator (of type {\tt BurmOps.ops}) at the root of this tree, and a list +of children of this root (a {\tt tree list}). +\end{itemize} + +The function {\tt opchildren} must return the children in the order in +which they appear in the rules (which is the only order the matcher knows +of). For example, if the root of the tree corresponds to the operator {\tt +ASGNI} (\figureRef{f:ex_spec}, the first element of the list +must be the tree corresponding to {\tt disp}, and the second +to {\tt reg}. + + + + \subsection{signature BURM} + +The structure generated by the functor \cd{BurmGen} matches the +signature \cd{BURM}. Specified in \cd{BURM} are: + +\begin{itemize} + \item An exception \cd{NoMatch}, which is raised if \cd{reduce} is +called on a tree which cannot be rewritten to the start non-terminal. + + \item The type \cd{tree} (the one passed to the functor). + + \item A datatype \cd{rule} enumerating the rules of the tree +grammar. This datatype is defined by prepending to each {\sl rulename}, +the {\sl ruleprefix} from a \cd{ruleprefix} declaration (if any). +The arity of each constructor is equal to the number of non-terminal +symbols in the pattern. Each \cd{(rule,tree)} pair specifies the +\cd{rule} and the \cd{tree} that matched each non-terminal symbol. +These pairs describe the remaining steps in the reduction. + + \item A function \cd{reduce} which takes a \cd{tree} and +returns a pair \verb|(rule * tree)|. As described above, the \cd{rule} +describes the best match that generated the start non-terminal, and the +\cd{tree} is the original input tree. +\end{itemize} + +In the example above, the signature \cd{BURM} generated is: + +\begin{centercode} +signature BURM = sig + exception NoMatch + type tree + + datatype rule = + stmt_ASGNI_disp_reg of (rule*tree) * (rule*tree) + | stmt_reg of (rule*tree) + | reg_ADDI_reg_rc of (rule*tree) * (rule*tree) + | reg_CVCI_INDIRC_disp of (rule*tree) + | reg_I0I + | reg_disp of (rule*tree) + | disp_ADDI_reg_con of (rule*tree) * (rule*tree) + | disp_ADDRLP + | rc_con of (rule*tree) + | rc_reg of (rule*tree) + | con_CNSTI + | con_I0I + + val reduce : tree -> (rule*tree) +end +\end{centercode} + +The functions \cd{reduce} is used as follows: given a tree $t_0$, +\cd{reduce} returns an initial pair $(r_0, t_0)$ (it returns $t_0$ to +make the user program simpler - Section\ \ref{s:example}). +$r_0$ +describes the first rule to apply to $t_0$ to perform the optimal +reduction. This rule, except in trivial cases, will have in its +pattern several {\sl nonterminal}s which represent other trees to be +reduced. To that end, the constructor $r_0$ carries the pairs +\mbox{$(r_1,t_1), \ldots, (r_n,t_n)$} of its children. +$(r_i,t_i)$ corresponds to the $i$th {\sl nonterminal} in the tree pattern +for the rule $r_0$, when read from left to right. These pairs can be used +to find the rule to use to reduce each child. In turn, the rules $r_1, +\ldots, r_n$ carry information about their children. + +Why return the tree in addition to each rule? Often, additional +information is stored in the tree, and it may be necessary to access this +information when a semantic action is executed. This information may +include constants like {\em integers}, {\em reals} or {\em string}, or more +complex objects like symbol table information. + + + \section{Example} + \label{s:example} + +Using the \mlburg\ specification of \figureRef{f:ex_spec}, a sample +input to provide to the functor \cd{BurmGen} is shown below: +\begin{code} + structure In : BURM_INPUT_SPEC = struct + structure BO = BurmOps + datatype tree = + ASGNI of tree * tree + | ADDI of tree * tree + | CVCI of tree + | INDIRC of tree + | I0I + | ADDRLP of string + | CNSTI of int + + fun opchildren t = + case t of + ASGNI(t1,t2) => (BO.T_ASGNI, [t1,t2]) + | ADDI(t1,t2) => (BO.T_ADDI, [t1,t2]) + | CVCI(t1) => (BO.T_CVCI, [t1]) + | INDIRC(t1) => (BO.T_INDIRC,[t1]) + | I0I => (BO.T_I0I, []) + | ADDRLP _ => (BO.T_ADDRLP,[]) + | CNSTI _ => (BO.T_CNSTI, []) + end +\end{code} + + + + +In \figureRef{ex:prog} we show a sample function called \cd{walk} that +performs semantic actions. The semantic actions merely prints out the +rules that applied assuming the children of each node are traversed +from left to right. Note that in the action corresponding to the +\cd{reg_CVCI_INDIRC_disp} rule, the recursive call to \cd{walk}, +specifically \cd{walk disp}, steps over the \cd{CVCI} and \cd{INDIRC} +nodes --- yet, information associated with the \cd{CVCI} or +\cd{INDIRC} is available to this rule. + + + +\begin{figure} +\begin{centercode} +structure Example = struct + structure Burm = BurmGen (In) + open In + + local val num = ref 1 in + fun new s = (s^(makestring (!num)) before inc num) + end + + fun walk (Burm.stmt_ASGNI_disp_reg (disp,reg), _) = + let + val (disp',reg') = (walk disp, walk reg) + val stmt = new "stmt" + in + say (stmt^" <- ASGNI ("^disp'^" + "^reg'^")\\n"); stmt + end + ... + | walk (Burm.reg_CVCI_INDIRC_disp disp, _) = + let + val disp' = walk disp + val reg = new "reg" + in + say (reg^" <- CVCI (INDIRC ("^disp'^"))\\n"); reg + end + ... + | walk (Burm.con_CNSTI, CNSTI i) = + let + val con = new "con" + in + say (con^" <- CNSTI "^(makestring i)^"\\n"); con + end + ... + | walk _ = (print "Error, bad match in walk\\n"; raise Match) + + + fun doit t = walk (Burm.reduce t) + + val sampleTree = ASGNI (ADDRLP "p", + ADDI (CVCI (INDIRC (ADDRLP "c")), + CNSTI 4)) +end +\end{centercode} + \caption{Example program.} + \label{ex:prog} +\end{figure} + +A graphical representation of \cd{sampleTree} and the result of +executing: +\begin{code} + - open Example; doit sampleTree; +\end{code} +is shown in \figureRef{ex:output}. + +\begin{figure} + \begin{center} + \includegraphics[width=3in]{tree} + \end{center}% + +\vskip 1in + +\begin{centercode} +disp1 <- ADDRLP p +disp2 <- ADDRLP c +reg3 <- CVCI (INDIRC (disp2)) +con4 <- CNSTI 4 +disp5 <- ADDI (reg3,con4) +reg6 <- disp5 +stmt7 <- ASGNI (disp1 + reg6) +\end{centercode} + \caption{{\tt sampleTree} and the produced output.} + \label{ex:output} +\end{figure} + + \section{Using {\tt mlburg} and debugging} + +The executable for \mlburg\ is usually called \cd{mlburg}. When \cd{mlburg} is +presented with a file name {\em filename}{\tt .burg}, a file {\em +filename}{\tt .sml} is created - assuming no errors were encountered. This +generated file will contain all the modules described above, and can be +directly loaded into an interactive session. The error messages displayed +during the execution of \cd{mlburg} are self-explanatory. + +During execution, a \cd{NoMatch} is raised when the tree cannot be reduced +to the start non-terminal. For example, suppose that the function +\cd{reduce} was called on the tree \cd{CNSTI}. Obviously, \cd{CNSTI} can +only be reduced to a \cd{con} and an \cd{rc}. The matcher would then print +the message~: +\begin{code} + No Match on nonterminal 0 + Possibilities were : + rule 9 with cost 0 + rule 11 with cost 0 +\end{code} +The {\sl nonterminal}s and rules are printed using integers, but a +correspondence between these integers and the identifiers used in the +specification can be found at the beginning of the generated SML file. + +Note, however, that such debugging information will only be useful if the +incorrect match occurs at the first level of the reduction to the start +non-terminal. If \cd{reduce} is called with \cd{ASGNI(I0I,\(x\))}, the +problem occurs deeper, because it is ultimately \cd{I0I} that cannot be +reduced to a \cd{disp}, and the fact that the whole tree cannot be reduced +to the start non-terminal is only a consequence of it. In such cases, the +matcher will only give the message~: +\begin{code} + No Match on nonterminal 0 + Possibilities were : +\end{code} +At this stage it would be necessary to check the completeness of the +rewrite system. Automated tools to do this, may be expected in the +future\cite{emmelmann-92}. + +\bibliographystyle{acm} +\bibliography{doc} +\end{document} diff --git a/ml-burg/doc/tree.fig b/ml-burg/doc/tree.fig new file mode 100644 index 0000000..6eaacf9 --- /dev/null +++ b/ml-burg/doc/tree.fig @@ -0,0 +1,30 @@ +#FIG 2.0 +80 2 +2 1 0 1 -1 0 0 0 0.000 1 0 + 0 0 1.000 4.000 8.000 + 339 99 399 139 9999 9999 +2 1 0 1 -1 0 0 0 0.000 1 0 + 0 0 1.000 4.000 8.000 + 339 99 279 139 9999 9999 +2 1 0 1 -1 0 0 0 0.000 1 0 + 0 0 1.000 4.000 8.000 + 399 159 459 199 9999 9999 +2 1 0 1 -1 0 0 0 0.000 1 0 + 0 0 1.000 4.000 8.000 + 399 159 339 199 9999 9999 +2 1 0 1 -1 0 0 0 0.000 1 0 + 0 0 1.000 4.000 8.000 + 339 219 339 259 9999 9999 +2 1 0 1 -1 0 0 0 0.000 1 0 + 0 0 1.000 4.000 8.000 + 339 279 339 319 9999 9999 +4 1 12 7 0 -1 0 0.000 1 4 5 369 339 c +4 1 12 7 0 -1 0 0.000 1 6 5 484 219 4 +4 1 12 7 0 -1 0 0.000 1 5 5 309 159 P +4 1 12 15 0 -1 0 0.000 1 10 50 339 94 ASGNI +4 1 12 15 0 -1 0 0.000 1 10 40 399 154 ADDI +4 1 12 15 0 -1 0 0.000 1 10 60 279 154 ADDRLP +4 1 12 15 0 -1 0 0.000 1 10 40 339 214 CVCI +4 1 12 15 0 -1 0 0.000 1 10 50 459 214 CNSTI +4 1 12 15 0 -1 0 0.000 1 10 60 339 274 INDIRC +4 1 12 15 0 -1 0 0.000 1 10 60 339 334 ADDRLP diff --git a/ml-burg/doc/tree.pdf b/ml-burg/doc/tree.pdf new file mode 100644 index 0000000..80619aa Binary files /dev/null and b/ml-burg/doc/tree.pdf differ diff --git a/ml-burg/errormsg.sml b/ml-burg/errormsg.sml new file mode 100644 index 0000000..c188c7f --- /dev/null +++ b/ml-burg/errormsg.sml @@ -0,0 +1,35 @@ +(* errormsg.sml + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * + * $Log$ + * Revision 1.2 2000/06/01 18:33:42 monnier + * bring revisions from the vendor branch to the trunk + * + * Revision 1.1.1.8 1999/04/17 18:56:04 monnier + * version 110.16 + * + * Revision 1.1.1.1 1997/01/14 01:37:59 george + * Version 109.24 + * + * Revision 1.1.1.2 1997/01/11 18:52:30 george + * ml-burg Version 109.24 + * + * Revision 1.1.1.1 1996/01/31 16:01:24 george + * Version 109 + * + *) + +structure ErrorMsg = struct + + exception Compiler + val anyErrors = ref false + + fun say (msg:string) = (print msg; print "\n") + + fun warning msg = say ("\tWarning: " ^ msg); + + fun complain msg = (say ("\tError: " ^ msg); anyErrors := true) + + fun impossible msg = (complain("Internal bug: " ^ msg); raise Compiler) +end diff --git a/ml-burg/example1.burg b/ml-burg/example1.burg new file mode 100644 index 0000000..cb620dd --- /dev/null +++ b/ml-burg/example1.burg @@ -0,0 +1,182 @@ + + +%{ +(* this is the header *) +%} + + + +%term ASGNI + | ADDI + | CVCI + | INDIRC + | I0I + | ADDRLP + | CNSTI + +%termprefix T_ + +%start stmt + + +%% + +stmt: ASGNI(disp,reg) = stmt_ASGNI_disp_reg (1); +stmt: reg = stmt_reg; +reg: ADDI(reg,rc) = reg_ADDI_reg_rc (1); +reg: CVCI(INDIRC(disp)) = reg_CVCI_INDIRC_disp (1); +reg: I0I = reg_I0I; +reg: disp = reg_disp (1); +disp: ADDI(reg,con) = disp_ADDI_reg_con; +disp: ADDRLP = disp_ADDRLP; +rc: con = rc_con; +rc: reg = rc_reg; +con: CNSTI = con_CNSTI; +con: I0I = con_I0I; + + + +%% + +structure In = struct + + open BurmOps + + datatype tree = + ASGNI of tree * tree + | ADDI of tree * tree + | CVCI of tree + | INDIRC of tree + | I0I + | ADDRLP of string + | CNSTI of int + + fun opchildren t = + case t of + ASGNI (t1,t2) => (T_ASGNI, [t1,t2]) + | ADDI (t1,t2) => (T_ADDI, [t1,t2]) + | CVCI (t1) => (T_CVCI, [t1]) + | INDIRC (t1) => (T_INDIRC, [t1]) + | I0I => (T_I0I, []) + | ADDRLP _ => (T_ADDRLP, []) + | CNSTI _ => (T_CNSTI, []) + +end + + + + +structure Example = struct + + structure Burm = BurmGen (In) + open In + + fun say s = print s + + + local + val num = ref 1 + fun inc iref = iref := (!iref + 1) + in + fun new s = (s^(Int.toString (!num)) before inc num) + end + + + fun walk (Burm.stmt_ASGNI_disp_reg (disp,reg), _) = + let + val (disp',reg') = (walk disp, walk reg) + val stmt = new "stmt" + in + say (stmt^" <- ASGNI ("^disp'^" + "^reg'^")\n"); stmt + end + | walk (Burm.stmt_reg reg, _) = + let + val reg' = walk reg + val stmt = new "stmt" + in + say (stmt^" <- "^reg'^"\n"); stmt + end + | walk (Burm.reg_ADDI_reg_rc (reg,rc), _) = + let + val (reg',rc') = (walk reg, walk rc) + val reg2 = new "reg" + in + say (reg2^" <- ADDI ("^reg'^","^rc'^")\n"); reg2 + end + | walk (Burm.reg_CVCI_INDIRC_disp disp, _) = + let + val disp' = walk disp + val reg = new "reg" + in + say (reg^" <- CVCI (INDIRC ("^disp'^"))\n"); reg + end + | walk (Burm.reg_I0I, _) = + let + val reg = new "reg" + in + say (reg^" <- ZERO\n"); reg + end + | walk (Burm.reg_disp disp, _) = + let + val disp' = walk disp + val reg = new "reg" + in + say (reg^" <- "^disp'^"\n"); reg + end + | walk (Burm.disp_ADDI_reg_con (reg,con), _) = + let + val (reg',con') = (walk reg, walk con) + val disp = new "disp" + in + say (disp^" <- ADDI ("^reg'^","^con'^")\n"); disp + end + | walk (Burm.disp_ADDRLP, ADDRLP addr) = + let + val disp = new "disp" + in + say (disp^" <- ADDRLP "^addr^"\n"); disp + end + | walk (Burm.rc_con con, _) = + let + val con' = walk con + val rc = new "rc" + in + say (rc^" <- "^con'^"\n"); rc + end + | walk (Burm.rc_reg reg, _) = + let + val reg' = walk reg + val rc = new "rc" + in + say (rc^" <- "^reg'^"\n"); rc + end + | walk (Burm.con_CNSTI, CNSTI i) = + let + val con = new "con" + in + say (con^" <- CNSTI "^(Int.toString i)^"\n"); con + end + | walk (Burm.con_I0I, _) = + let + val con = new "con" + in + say (con^" <- CNSTI ZERO\n"); con + end + | walk _ = (print "Error, bad match in walk\n"; raise Match) + + + + fun doit t = walk (Burm.reduce t) + + + (* + * int p; + * char c; + * p = c+4; + *) + val a = ASGNI (ADDRLP "p", + ADDI (CVCI (INDIRC (ADDRLP "c")), + CNSTI 4)) + +end + diff --git a/ml-burg/example2.burg b/ml-burg/example2.burg new file mode 100644 index 0000000..4e8ae19 --- /dev/null +++ b/ml-burg/example2.burg @@ -0,0 +1,252 @@ + + +%{ +(* this is the header *) +%} + + + +%term INT + | VAR + | ADD + | SUB + | MUL + | DIV + | NEG + +%termprefix T_ +%ruleprefix R_ + +%start reg + +%% + + +reg: INT = reg_INT (1); +sreg: INT = sreg_INT (1); +ureg: INT = ureg_INT (1); + +reg: VAR = reg_VAR (1); (* fetch *) + +reg: sreg = reg_sreg (1); (* inc *) +sreg: reg = sreg_reg (1); (* dec *) +sreg: ureg = sreg_ureg (1); (* shl *) +ureg: sreg = ureg_sreg_or_reg (1); (* shr *) +ureg: reg = ureg_sreg_or_reg (1); (* shr *) + +reg: ADD(reg,sreg) = r_ADD_r_r (1); (* add *) +reg: ADD(sreg,reg) = r_ADD_r_r (1); (* add *) +reg: ADD(INT,reg) = r_ADD_2i_r (1); (* add 2*imm *) +reg: ADD(reg,INT) = r_ADD_r_2i (1); (* add 2*imm *) +reg: ADD(INT,sreg) = r_ADD_2ip_r (1); (* add 2*imm+1 *) +reg: ADD(sreg,INT) = r_ADD_r_2ip (1); (* add 2*imm+1 *) +sreg: ADD(sreg,sreg) = r_ADD_r_r (1); (* add *) +sreg: ADD(INT,sreg) = r_ADD_2i_r (1); (* add 2*imm *) +sreg: ADD(sreg,INT) = r_ADD_r_2i (1); (* add 2*imm *) +sreg: ADD(INT,reg) = r_ADD_2im_r (1); (* add 2*imm-1 *) +sreg: ADD(reg,INT) = r_ADD_r_2im (1); (* add 2*imm-1 *) + +reg: SUB(reg,sreg) = r_SUB_r_r (1); (* sub *) +reg: SUB(INT,reg) = r_SUB_2ipp_r (2); (* sub 2*imm+2,x *) +reg: SUB(reg,INT) = r_SUB_r_2i (1); (* sub x,2*imm *) +reg: SUB(sreg,INT) = r_SUB_r_2im (1); (* sub x,2*imm-1 *) +sreg: SUB(sreg,sreg) = r_SUB_r_r (1); (* sub *) +sreg: SUB(reg,reg) = r_SUB_r_r (1); (* sub *) +sreg: SUB(sreg,INT) = r_SUB_r_2i (1); (* sub x,2*imm *) +sreg: SUB(reg,INT) = r_SUB_r_2ip (1); (* sub x,2*imm+1 *) + +sreg: MUL(sreg,ureg) = r_MUL_r_r (1); (* mul *) +sreg: MUL(ureg,sreg) = r_MUL_r_r (1); (* mul *) +sreg: MUL(INT,sreg) = r_MUL_i_r (1); (* mul *) +sreg: MUL(sreg,INT) = r_MUL_r_i (1); (* mul *) +sreg: MUL(INT,ureg) = r_MUL_2i_r (1); (* mul 2*imm,x *) +sreg: MUL(ureg,INT) = r_MUL_r_2i (1); (* mul x,2*imm *) + +ureg: DIV(sreg,sreg) = r_DIV_r_r (1); (* div *) +ureg: DIV(ureg,ureg) = r_DIV_r_r (1); (* div *) +ureg: DIV(ureg,INT) = r_DIV_r_i (1); (* div *) + +reg: NEG(reg) = r_NEG_r_p_2 (2); (* 2-r *) +ureg: NEG(ureg) = r_NEG_r (1); (* 0-r *) +sreg: NEG(sreg) = r_NEG_r (1); (* 0-r *) + + + +%% + +structure In = struct + + open BurmOps + + datatype tree = + INT of int + | VAR of string + | ADD of tree * tree + | SUB of tree * tree + | MUL of tree * tree + | DIV of tree * tree + | NEG of tree + + fun opchildren t = + case t of + INT _ => (T_INT, []) + | VAR _ => (T_VAR, []) + | ADD (t1,t2) => (T_ADD, [t1,t2]) + | SUB (t1,t2) => (T_SUB, [t1,t2]) + | MUL (t1,t2) => (T_MUL, [t1,t2]) + | DIV (t1,t2) => (T_DIV, [t1,t2]) + | NEG (t1) => (T_NEG, [t1]) + +end + + + + +structure Example = struct + + structure Burm = BurmGen (In) + open In + + fun say s = print s + + + local + val num = ref 1 + fun inc iref = iref := (!iref + 1) + in + fun resetreg () = (num := 1) + fun newreg () = ("r"^(Int.toString (!num)) before inc num) + end + + + fun walk (Burm.R_reg_INT, INT n) = + let val reg = newreg () in + say ("ldi "^reg^","^(Int.toString (n+n+1))^"\n"); reg + end + | walk (Burm.R_sreg_INT, INT n) = + let val reg = newreg () in + say ("ldi "^reg^","^(Int.toString (n+n))^"\n"); reg + end + | walk (Burm.R_ureg_INT, INT n) = + let val reg = newreg () in + say ("ldi "^reg^","^(Int.toString n)^"\n"); reg + end + | walk (Burm.R_reg_VAR, VAR v) = + let val reg = newreg () in + say ("ld "^reg^",["^v^"]\n"); reg + end + | walk (Burm.R_reg_sreg reg, _) = + let val reg' = walk reg in + say ("inc "^reg'^"\n"); reg' + end + | walk (Burm.R_sreg_reg reg, _) = + let val reg' = walk reg in + say ("dec "^reg'^"\n"); reg' + end + | walk (Burm.R_sreg_ureg reg, _) = + let val reg' = walk reg in + say ("shl "^reg'^"\n"); reg' + end + | walk (Burm.R_ureg_sreg_or_reg reg, _) = + let val reg' = walk reg in + say ("shr "^reg'^"\n"); reg' + end + | walk (Burm.R_r_ADD_r_r (r1,r2), _) = + let val (r1',r2') = (walk r1, walk r2) in + say ("add "^r1'^","^r2'^"\n"); r1' + end + | walk (Burm.R_r_ADD_2i_r reg, ADD (INT n,_)) = + let val reg' = walk reg in + say ("addi "^reg'^","^(Int.toString (n+n))^"\n"); reg' + end + | walk (Burm.R_r_ADD_r_2i reg, ADD (_,INT n)) = + let val reg' = walk reg in + say ("addi "^reg'^","^(Int.toString (n+n))^"\n"); reg' + end + | walk (Burm.R_r_ADD_2ip_r reg, ADD (INT n,_)) = + let val reg' = walk reg in + say ("addi "^reg'^","^(Int.toString (n+n+1))^"\n"); reg' + end + | walk (Burm.R_r_ADD_r_2ip reg, ADD (_,INT n)) = + let val reg' = walk reg in + say ("addi "^reg'^","^(Int.toString (n+n+1))^"\n"); reg' + end + | walk (Burm.R_r_ADD_2im_r reg, ADD (INT n,_)) = + let val reg' = walk reg in + say ("addi "^reg'^","^(Int.toString (n+n-1))^"\n"); reg' + end + | walk (Burm.R_r_ADD_r_2im reg, ADD (_,INT n)) = + let val reg' = walk reg in + say ("addi "^reg'^","^(Int.toString (n+n-1))^"\n"); reg' + end + | walk (Burm.R_r_SUB_r_r (r1,r2), _) = + let val (r1',r2') = (walk r1, walk r2) in + say ("sub "^r1'^","^r2'^"\n"); r1' + end + | walk (Burm.R_r_SUB_2ipp_r reg, SUB (INT n,_)) = + let val reg' = walk reg val r = newreg () in + say ("ldi "^r^","^(Int.toString (n+n+1))^"\n"); + say ("sub "^r^","^reg'^"\n"); + r + end + | walk (Burm.R_r_SUB_r_2i reg, SUB (_,INT n)) = + let val reg' = walk reg in + say ("subi "^reg'^","^(Int.toString (n+n))^"\n"); reg' + end + | walk (Burm.R_r_SUB_r_2im reg, SUB (_,INT n)) = + let val reg' = walk reg in + say ("subi "^reg'^","^(Int.toString (n+n-1))^"\n"); reg' + end + | walk (Burm.R_r_SUB_r_2ip reg, SUB (_,INT n)) = + let val reg' = walk reg in + say ("subi "^reg'^","^(Int.toString (n+n+1))^"\n"); reg' + end + | walk (Burm.R_r_MUL_r_r (r1,r2), _) = + let val (r1',r2') = (walk r1, walk r2) in + say ("mul "^r1'^","^r2'^"\n"); r1' + end + | walk (Burm.R_r_MUL_i_r reg, MUL (INT n,_)) = + let val reg' = walk reg in + say ("muli "^reg'^","^(Int.toString (n))^"\n"); reg' + end + | walk (Burm.R_r_MUL_r_i reg, MUL (_,INT n)) = + let val reg' = walk reg in + say ("muli "^reg'^","^(Int.toString (n))^"\n"); reg' + end + | walk (Burm.R_r_MUL_2i_r reg, MUL (INT n,_)) = + let val reg' = walk reg in + say ("muli "^reg'^","^(Int.toString (n+n))^"\n"); reg' + end + | walk (Burm.R_r_MUL_r_2i reg, MUL (_,INT n)) = + let val reg' = walk reg in + say ("muli "^reg'^","^(Int.toString (n+n))^"\n"); reg' + end + | walk (Burm.R_r_DIV_r_r (r1,r2), _) = + let val (r1',r2') = (walk r1, walk r2) in + say ("div "^r1'^","^r2'^"\n"); r1' + end + | walk (Burm.R_r_DIV_r_i reg, DIV (_,INT n)) = + let val reg' = walk reg in + say ("divi "^reg'^","^(Int.toString (n))^"\n"); reg' + end + | walk (Burm.R_r_NEG_r reg, _) = + let val reg' = walk reg in + say ("neg "^reg'^"\n"); reg' + end + | walk (Burm.R_r_NEG_r_p_2 reg, _) = + let val reg' = walk reg in + say ("neg "^reg'^"\n"); + say ("addi "^reg'^",\n"); + reg' + end + | walk _ = (print "Error, bad match in walk\n"; raise Match) + + + fun doit t = walk (Burm.reduce t) + + val a = SUB (ADD (VAR "a", INT 2), INT 5) + val b = ADD (DIV (SUB (VAR "a", INT 1), VAR "b"), INT 1) + val c = ADD (VAR "a", INT 1) + +end + diff --git a/ml-burg/main.sml b/ml-burg/main.sml new file mode 100644 index 0000000..97738b7 --- /dev/null +++ b/ml-burg/main.sml @@ -0,0 +1,85 @@ +(* main.sml + * + * COPYRIGHT (c) 1994 AT&T Bell Laboratories. + * + * $Log$ + * Revision 1.4 2001/11/21 21:03:16 blume + * Release 110.37 -- see HISTORY + * + * Revision 1.3.4.1 2001/11/17 03:14:16 blume + * fixed uses of exnMessage in standalone programs + * + * Revision 1.3 2000/06/01 18:33:42 monnier + * bring revisions from the vendor branch to the trunk + * + * Revision 1.2 2000/03/07 03:59:09 blume + * build script now uses new mechanism for building stanalone programs + * + * Revision 1.1.1.8.4.1 2000/02/20 14:44:33 blume + * CMB.deliver merged with CMB.make; runtime boot code made more flexible + * + * Revision 1.1.1.8 1999/04/17 18:56:04 monnier + * version 110.16 + * + * Revision 1.1.1.1 1997/01/14 01:37:59 george + * Version 109.24 + * + * Revision 1.1.1.2 1997/01/11 18:52:31 george + * ml-burg Version 109.24 + * + * Revision 1.3 1996/02/26 16:55:12 jhr + * Moved exportFn/exportML to SMLofNJ structure. + * + * Revision 1.2 1996/02/26 15:02:06 george + * print no longer overloaded. + * use of makestring has been removed and replaced with Int.toString .. + * use of IO replaced with TextIO + * + * Revision 1.1.1.1 1996/01/31 16:01:25 george + * Version 109 + * + *) +structure Main = struct + + fun main (cmdName, argv) = let + fun help () = ( + TextIO.output (TextIO.stdErr, "usage: mlburg [.burg]\n"); + OS.Process.failure) + in + case argv + of [] => ( + BurgEmit.emit (TextIO.stdIn, (fn () => TextIO.stdOut)); + OS.Process.success) + | ("-h"::_) => help () + | ("-help"::_) => help () + | files => let + fun findname file = let + val {base, ext} = OS.Path.splitBaseExt file + in + case ext + of (SOME("brg" | "burg")) => + OS.Path.joinBaseExt{base=base, ext=SOME "sml"} + | _ => file ^ ".sml" + (* end case *) + end + val names = map (fn n => (n,findname n)) files + fun emit (inname, outname) = (let + val s_in = TextIO.openIn inname + in + BurgEmit.emit (s_in, (fn () => (TextIO.openOut outname))) + end) + handle err => (TextIO.output (TextIO.stdErr, + General.exnMessage err^"\n"); + raise err) + in + app emit names; + OS.Process.success + end + end + + (* + * This is the function to call in an interactive session. + * Takes a filename (something.burg) as argument, and produces something.sml + *) + fun doit s = main ("", [s]) +end diff --git a/ml-burg/ml-burg.cm b/ml-burg/ml-burg.cm new file mode 100644 index 0000000..ae47f7e --- /dev/null +++ b/ml-burg/ml-burg.cm @@ -0,0 +1,38 @@ +(* sources.cm file for ML-Burg *) + +Library + structure Main +is + +# if defined (NEW_CM) + $/basis.cm + $/smlnj-lib.cm + $/ml-yacc-lib.cm +# else + smlnj-lib.cm + ml-yacc-lib.cm +# endif + + errormsg.sml + burg-ast.sml + +(* During installation we rely on pre-generated files + * to avoid certain chicken-and-egg problems. *) +#if defined(NO_ML_LEX) orelse defined(NO_PLUGINS) + burg-lex.sml +#else + burg-lex : MLLex +#endif + +(* During installation we rely on pre-generated files + * to avoid certain chicken-and-egg problems. *) +#if defined(NO_ML_YACC) orelse defined(NO_PLUGINS) + burg-gram.sig + burg-gram.sml +#else + burg-gram : MLYacc +#endif + + parse.sml + burg.sml + main.sml diff --git a/ml-burg/parse.sml b/ml-burg/parse.sml new file mode 100644 index 0000000..ef47fc6 --- /dev/null +++ b/ml-burg/parse.sml @@ -0,0 +1,51 @@ +(* parse.sml + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * + * $Log$ + * Revision 1.2 2000/06/01 18:33:42 monnier + * bring revisions from the vendor branch to the trunk + * + * Revision 1.1.1.8 1999/04/17 18:56:04 monnier + * version 110.16 + * + * Revision 1.1.1.1 1997/01/14 01:38:00 george + * Version 109.24 + * + * Revision 1.1.1.2 1997/01/11 18:52:32 george + * ml-burg Version 109.24 + * + * Revision 1.2 1996/02/26 15:02:06 george + * print no longer overloaded. + * use of makestring has been removed and replaced with Int.toString .. + * use of IO replaced with TextIO + * + * Revision 1.1.1.1 1996/01/31 16:01:25 george + * Version 109 + * + *) +structure Parse = +struct + structure BurgLrVals = BurgLrValsFun(structure Token = LrParser.Token) + structure BurgLex = BurgLexFun(structure Tokens = BurgLrVals.Tokens) + structure BurgParser = Join(structure ParserData = BurgLrVals.ParserData + structure Lex = BurgLex + structure LrParser = LrParser) + + fun parse stream = + let + val lexer = BurgParser.makeLexer(fn n => TextIO.inputN(stream,n)) + fun error(msg,i:int,_) = + TextIO.output(TextIO.stdOut, + "Error: line " ^ Int.toString i ^ ", " ^ msg ^ "\n") + in + BurgParser.parse(30,lexer,error,()) + before + BurgLex.UserDeclarations.resetState() + end + + fun reset () = + BurgLex.UserDeclarations.resetState() + +end + diff --git a/ml-burg/tool/.cm/GUID/ext.sml b/ml-burg/tool/.cm/GUID/ext.sml new file mode 100644 index 0000000..a14888d --- /dev/null +++ b/ml-burg/tool/.cm/GUID/ext.sml @@ -0,0 +1 @@ +guid-$/(burg-ext.cm):ext.sml-1714016109.403 diff --git a/ml-burg/tool/.cm/GUID/tool.sml b/ml-burg/tool/.cm/GUID/tool.sml new file mode 100644 index 0000000..362a3be --- /dev/null +++ b/ml-burg/tool/.cm/GUID/tool.sml @@ -0,0 +1 @@ +guid-$/(mlburg-tool.cm):tool.sml-1714016109.388 diff --git a/ml-burg/tool/.cm/SKEL/ext.sml b/ml-burg/tool/.cm/SKEL/ext.sml new file mode 100644 index 0000000..8eebf45 --- /dev/null +++ b/ml-burg/tool/.cm/SKEL/ext.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"Tools"ad"BurgBurgExt"h0 \ No newline at end of file diff --git a/ml-burg/tool/.cm/SKEL/tool.sml b/ml-burg/tool/.cm/SKEL/tool.sml new file mode 100644 index 0000000..209b2e4 --- /dev/null +++ b/ml-burg/tool/.cm/SKEL/tool.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"Tools"ad"BurgTool"h0 \ No newline at end of file diff --git a/ml-burg/tool/.cm/amd64-unix/ext.sml b/ml-burg/tool/.cm/amd64-unix/ext.sml new file mode 100644 index 0000000..838e7b9 Binary files /dev/null and b/ml-burg/tool/.cm/amd64-unix/ext.sml differ diff --git a/ml-burg/tool/.cm/amd64-unix/tool.sml b/ml-burg/tool/.cm/amd64-unix/tool.sml new file mode 100644 index 0000000..e2e4209 Binary files /dev/null and b/ml-burg/tool/.cm/amd64-unix/tool.sml differ diff --git a/ml-burg/tool/burg-ext.cm b/ml-burg/tool/burg-ext.cm new file mode 100644 index 0000000..449fbee --- /dev/null +++ b/ml-burg/tool/burg-ext.cm @@ -0,0 +1,12 @@ +(* + * Plugin for registering "burg" classifier. + * + * (C) 2007 The Fellowship of SML/NJ + * + * Author: Matthias Blume (blume@tti-c.org) + *) +Library + structure BurgBurgExt +is + $smlnj/cm/tools.cm + ext.sml diff --git a/ml-burg/tool/ext.sml b/ml-burg/tool/ext.sml new file mode 100644 index 0000000..1a5ee28 --- /dev/null +++ b/ml-burg/tool/ext.sml @@ -0,0 +1,19 @@ +(* ext.sml + * + * Plugin for registering classifiers. + * + * Copyright (c) 2007 by The Fellowship of SML/NJ + * + * Author: Matthias Blume (blume@tti-c.org) + *) +structure BurgBurgExt = struct + local + val suffixes = ["burg"] + val class = "mlburg" + fun sfx s = + Tools.registerClassifier + (Tools.stdSfxClassifier { sfx = s, class = class }) + in + val _ = app sfx suffixes + end +end diff --git a/ml-burg/tool/mlburg-tool.cm b/ml-burg/tool/mlburg-tool.cm new file mode 100644 index 0000000..b1585bf --- /dev/null +++ b/ml-burg/tool/mlburg-tool.cm @@ -0,0 +1,12 @@ +(* + * The plugin library for ML-Burg. + * + * (C) 2000 Lucent Technologies, Bell Laboratories + * + * Author: Matthias Blume (blume@kurims.kyoto-u.ac.jp) + *) +Library + structure BurgTool +is + $smlnj/cm/tools.cm + tool.sml diff --git a/ml-burg/tool/tool.sml b/ml-burg/tool/tool.sml new file mode 100644 index 0000000..c5b2357 --- /dev/null +++ b/ml-burg/tool/tool.sml @@ -0,0 +1,17 @@ +(* + * Running ML-Burg from CM. + * + * (C) 1999 Lucent Technologies, Bell Laboratories + * + * Author: Matthias Blume (blume@kurims.kyoto-u.ac.jp) + *) +structure BurgTool = struct + val _ = Tools.registerStdShellCmdTool + { tool = "ML-Burg", + class = "mlburg", + cmdStdPath = fn () => ("ml-burg", []), + template = NONE, + extensionStyle = + Tools.REPLACE (["burg"], [("sml", SOME "sml", fn too => too)]), + dflopts = [] } +end diff --git a/ml-lex.tgz b/ml-lex.tgz new file mode 100644 index 0000000..b105a28 Binary files /dev/null and b/ml-lex.tgz differ diff --git a/ml-lex/.cm/GUID/export-lex.sml b/ml-lex/.cm/GUID/export-lex.sml new file mode 100644 index 0000000..7fe1792 --- /dev/null +++ b/ml-lex/.cm/GUID/export-lex.sml @@ -0,0 +1 @@ +guid-(ml-lex.cm):export-lex.sml-1714016076.051 diff --git a/ml-lex/.cm/GUID/lexgen.sml b/ml-lex/.cm/GUID/lexgen.sml new file mode 100644 index 0000000..2d560d0 --- /dev/null +++ b/ml-lex/.cm/GUID/lexgen.sml @@ -0,0 +1 @@ +guid-(ml-lex.cm):lexgen.sml-1714016075.458 diff --git a/ml-lex/.cm/SKEL/export-lex.sml b/ml-lex/.cm/SKEL/export-lex.sml new file mode 100644 index 0000000..b7027a3 --- /dev/null +++ b/ml-lex/.cm/SKEL/export-lex.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f8d"OS"d"SMLofNJ"d"List"Cd"Signals"d"General"d"String"d"TextIO"d"LexGen"Nad"ExportLexGen"j0 \ No newline at end of file diff --git a/ml-lex/.cm/SKEL/lexgen.sml b/ml-lex/.cm/SKEL/lexgen.sml new file mode 100644 index 0000000..ee88f4a --- /dev/null +++ b/ml-lex/.cm/SKEL/lexgen.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d3f2Array"TextIO"aLEXGEN"0ad"LexGen"jh6adict"Cegp1/f6d"Char"Cd"List"Int"String" NaRB"jh1ad"Map"jgp1e"RedBlackMapFn"bd2bd2egp1%f2d"StringCvt"f0f2 f1f6d"OS"Cd"Substring" NNgp1 \ No newline at end of file diff --git a/ml-lex/.cm/amd64-unix/export-lex.sml b/ml-lex/.cm/amd64-unix/export-lex.sml new file mode 100644 index 0000000..0531309 Binary files /dev/null and b/ml-lex/.cm/amd64-unix/export-lex.sml differ diff --git a/ml-lex/.cm/amd64-unix/lexgen.sml b/ml-lex/.cm/amd64-unix/lexgen.sml new file mode 100644 index 0000000..371afcc Binary files /dev/null and b/ml-lex/.cm/amd64-unix/lexgen.sml differ diff --git a/ml-lex/INSTALL b/ml-lex/INSTALL new file mode 100644 index 0000000..f8f7a91 --- /dev/null +++ b/ml-lex/INSTALL @@ -0,0 +1,21 @@ +Installation instructions for ML-Lex +------------------------------------- + +ML-Lex will normally be automatically +installed as part of the SML/NJ system +by the SML/NJ installer. + +To install by hand (e.g., if you make +your own modifications), run the + + ./build + +script in this directory and then move +the file + + ml-lex.$ARCH-$OS + +to the heap-file directory. + +Running ./build requires a properly +functioning installation of SML/NJ. diff --git a/ml-lex/README b/ml-lex/README new file mode 100644 index 0000000..3ce40d8 --- /dev/null +++ b/ml-lex/README @@ -0,0 +1,22 @@ + Lexical analyzer generator for Standard ML. + Version 1.6, October 1994 + +Copyright (c) 1989-92 by Andrew W. Appel, James S. Mattson, David R. Tarditi + +This software comes with ABSOLUTELY NO WARRANTY. +This software is subject only to the PRINCETON STANDARD ML SOFTWARE LIBRARY +COPYRIGHT NOTICE, LICENSE AND DISCLAIMER, (in the file "COPYRIGHT", +distributed with this software). You may copy and distribute this software; +see the COPYRIGHT NOTICE for details and restrictions. + +Files of interest: + +lexgen.doc - User's manual for ML-Lex +lexgen.sml - ML version of Lex +export-lex.sml - implements an exportable (via SMLofNJ.exportFn) + toplevel driver for ML-Lex; the resulting + stand-alone program takes the specification + file name as a command line argument +ml-lex.cm - CM description file for ML-Lex +build - script that invokes ../../bin/ml-build in order + to construct the stand-alone version of ML-Lex diff --git a/ml-lex/build.bat b/ml-lex/build.bat new file mode 100644 index 0000000..1384d82 --- /dev/null +++ b/ml-lex/build.bat @@ -0,0 +1 @@ +..\bin\ml-build.bat ml-lex.cm ExportLexGen.lexGen ml-lex diff --git a/ml-lex/build.sh b/ml-lex/build.sh new file mode 100755 index 0000000..2d728c3 --- /dev/null +++ b/ml-lex/build.sh @@ -0,0 +1,50 @@ +#!/bin/sh +# +# Copyright (c) 2018 The Fellowship of SML/NJ (https://smlnj.org) +# +# build script for ml-lex under the new runtime system. +# +# options: +# -o image -- specify the name of the heap image, "ml-lex" +# is the default. + +CMD=$0 + +ROOT="ml-lex" +HEAP_IMAGE="" +SMLNJROOT=`pwd`/.. +BIN=${INSTALLDIR:-$SMLNJROOT}/bin +BUILD=$BIN/ml-build +SIZE_OPT="-32" + +# +# process command-line options +# +while [ "$#" != "0" ] ; do + arg=$1 + shift + case $arg in + -32) SIZE_OPT=$arg ;; + -64) SIZE_OPT=$arg ;; + -o) + if [ "$#" = "0" ]; then + echo "$CMD: must supply image name for -o option" + exit 1 + fi + HEAP_IMAGE=$1; shift + ;; + *) + echo $CMD: invalid argument: $arg + exit 1 + ;; + esac +done + +if [ "$HEAP_IMAGE" = "" ]; then + HEAP_IMAGE="$ROOT" +fi + +echo "$BUILD" $SIZE_OPT ml-lex.cm ExportLexGen.lexGen $HEAP_IMAGE +"$BUILD" $SIZE_OPT ml-lex.cm ExportLexGen.lexGen $HEAP_IMAGE + +exit 0 diff --git a/ml-lex/doc/lexgen.doc b/ml-lex/doc/lexgen.doc new file mode 100644 index 0000000..86d2714 --- /dev/null +++ b/ml-lex/doc/lexgen.doc @@ -0,0 +1,539 @@ + A lexical analyzer generator for Standard ML. + +THIS TEXT FILE IS OBSOLETE and IS NOT MAINTAINED. +The current (maintained) documentation is in lexgen.tex. + + + Andrew W. Appel + James S. Mattson + David R. Tarditi + + Princeton University + + Version 1.6, October 1994 + +Copyright (c) 1989-1992 by Andrew W. Appel, James S. Mattson, David R. Tarditi + +This software comes with ABSOLUTELY NO WARRANTY. +This software is subject only to the PRINCETON STANDARD ML SOFTWARE LIBRARY +COPYRIGHT NOTICE, LICENSE AND DISCLAIMER, (in the file "COPYRIGHT", +distributed with this software). You may copy and distribute this software; +see the COPYRIGHT NOTICE for details and restrictions. + +I. General Description + +Computer programs often need to divide their input into words and +distinguish between different kinds of words. Compilers, for +example, need to distinguish between integers, reserved words, and +identifiers. Applications programs often need to be able to +recognize components of typed commands from users. + +The problem of segmenting input into words and recognizing classes of +words is known as lexical analysis. Small cases of this problem, +such as reading text strings separated by spaces, can be solved by +using hand-written programs. Larger cases of this problem, such as +tokenizing an input stream for a compiler, can also be solved using +hand-written programs. + +A hand-written program for a large lexical analysis problem, however, +suffers from two major problems. First, the program requires a fair +amount of programmer time to create. Second, the description of +classes of words is not explicit in the program. It must be inferred +from the program code. This makes it difficult to verify if the +program recognizes the correct words for each class. It also makes +future maintenance of the program difficult. + +Lex, a programming tool for the Unix system, is a successful solution +to the general problem of lexical analysis. It uses regular +expressions to describe classes of words. A program fragment is +associated with each class of words. This information is given to +Lex as a specification (a Lex program). Lex produces a program for a +function that can be used to perform lexical analysis. + +The function operates as follows. It finds the longest word starting +from the current position in the input stream that is in one of the +word classes. It executes the program fragment associated with the +class, and sets the current position in the input stream to be the +character after the word. The program fragment has the actual text +of the word available to it, and may be any piece of code. For many +applications it returns some kind of value. + +Lex allows the programmer to make the language description explicit, +and to concentrate on what to do with the recognized words, not how +to recognize the words. It saves programmer time and increases +program maintainability. + +Unfortunately, Lex is targeted only C. It also places artificial +limits on the size of strings that can be recognized. + +ML-Lex is a variant of Lex for the ML programming language. ML-Lex +has a syntax similar to Lex, and produces an ML program instead of a +C program. ML-Lex produces a program that runs very efficiently. +Typically the program will be as fast or even faster than a +hand-coded lexer implemented in Standard ML. + +The program typically uses only a small amount of space. +ML-Lex thus allows ML programmers the same benefits that Lex allows C +programmers. It also does not place artificial limits on the size of +recognized strings. + +II. ML-Lex specifications + +An ML-Lex specification has the general format: + + {user declarations} + %% + {ML-Lex definitions} + %% + {rules} + +Each section is separated from the others by a '%%' delimiter. + +The rules are used to define the lexical analysis function. Each +rule has two parts - a regular expression and an action. The regular +expression defines the word class that a rule matches. The action is +a program fragment to be executed when a rule matches the input. The +actions are used to compute values, and must all return values of the +same type. + +The user can define values available to all rule actions in the user +declarations section. The user must define two values in this +section - a type lexresult and a function eof. Lexresult defines the +type of values returned by the rule actions. The function "eof" is +called by the lexer when the end of the input stream is reached. It +will typically return a value signalling eof or raise an exception. +It is called with the same argument as lex (see %arg, below), +and must return a value of type lexresult. + +In the definitions section, the user can define named regular +expressions, a set of start states, and specify which of the various +bells and whistles of ML-Lex are desired. + +The start states allow the user to control when certain rules are +matched. Rules may be defined to match only when the lexer is in +specific start states. The user may change the lexer's start state +in a rule action. This allows the user to specify special handling +of lexical objects. + +This feature is typically used to handle quoted strings with escapes +to denote special characters. The rules to recognize the inside +contents of a string are defined for only one start state. This +start state is entered when the beginning of a string is recognized, +and exited when the end of the string is recognized. + +III. Regular expressions. + +Regular expressions are a simple language for denoting classes of +strings. A regular expression is defined inductively over an +alphabet with a set of basic operations. The alphabet for ML-Lex is +the Ascii character set (character codes 0-127; or if %full is used, 0-255). + +The syntax and semantics of regular expressions will be described in +order of decreasing precedence (from the most tightly-binding operators +to the most weakly-binding): + + An individual character stands for itself, except for the + reserved characters ? * + | ( ) ^ $ / ; . = < > [ { " \ + + A backslash followed by one of the reserved characters stands + for that character. + + A set of characters enclosed in square brackets [ ] stands + for any one of those characters. Inside the brackets, only + the symbols \ - ^ are reserved. An initial up-arrow ^ stands + for the complement of the characters listed, e.g. [^abc] + stands any character except a, b, or c. The hyphen - denotes + a range of characters, e.g. [a-z] stands for any lower-case + alphabetic character, and [0-9a-fA-F] stands for any hexadecimal + digit. To include ^ literally in a bracketed set, put it anywhere + but first; to include - literally in a set, put it first or last. + + The dot . character stands for any character except newline, + i.e. the same as [^\n] + + The following special escape sequences are available, inside + or outside of square-brackets: + \b - backspace + \n - newline + \t - tab + \h - stands for all characters with codes >127, + when 7-bit characters are used. + \ddd - where ddd is a 3 digit decimal escape. + + A sequence of characters will stand for itself (reserved + characters will be taken literally) if it is enclosed in + double quotes " ". + + A named regular expression (defined in the "definitions" + section) may be referred to by enclosing its name in + braces { }. + + Any regular expression may be enclosed in parentheses ( ) + for syntactic (but, as usual, not semantic) effect. + + The postfix operator * stands for Kleene closure: + zero or more repetitions of the preceding expression. + + The postfix operator + stands for one or more repetitions + of the preceding expression. + + The postfix operator ? stands for zero or one occurrence of + the preceding expression. + + A postfix repetition range {n1,n2} where n1 and n2 are small + integers stands for any number of repetitions between n1 and n2 + of the preceding expression. The notation {n1} stands for + exactly n1 repetitions. + + Concatenation of expressions denotes concatenation of strings. + The expression e1 e2 stands for any string that results from + the concatenation of one string that matches e1 with another + string that matches e2. + + The infix operator | stands for alternation. The expression + e1 | e2 stands for anything that either e1 or e2 stands for. + + The infix operator / denotes lookahead. Lookahead is not + implemented and cannot be used, because there is a bug + in the algorithm for generating lexers with lookahead. If + it could be used, the expression e1 / e2 would match any string + that e1 stands for, but only when that string is followed by a + string that matches e2. + + When the up-arrow ^ occurs at the beginning of an expression, + that expression will only match strings that occur at the + beginning of a line (right after a newline character). + + The dollar sign $ is not implemented, since it is an abbreviation + for lookahead involving the newline character (that is, it + is an abbreviation /\n). If it could be used, when the dollar + sign $ occurred at the end of an expression, that expression + would only match strings that occur at the end of a line + (right before a newline character). + +Here are some examples of regular expressions, and descriptions of the +set of strings they denote: + + 0 | 1 | 2 | 3 A single digit between 0 and 3 + [0123] A single digit between 0 and 3 + 0123 The string "0123" + 0* All strings of 0 or more 0's + 00* All strings of 1 or more 0's + 0+ All strings of 1 or more 0's + [0-9]{3} Any three-digit decimal number. + \\[ntb] The strings "\n" "\t" "\b" + (00)* Any string with an even number of 0's. + +IV. ML-Lex syntax summary + +A. User declarations + +Anything up to the first %% is in the user declarations section. The +user should note that no symbolic identifier containing '%%' can be +used in this section. + +B. ML-Lex definitions + +Start states can be defined with + + %s {identifier list} ; + + or %S {identifier list} ; + +An identifier list consists of one or more identifiers. + +An identifier consists of one or more letters, digits, underscores, +or primes. It must begin with a letter. + +Named expressions can be defined with + + {identifier} = {regular expression} ; + +Regular expressions are defined below. + +The following % commands are also available: + + %reject - create REJECT() function + %count - count newlines using yylineno + %full - create lexer for the full 8-bit character set, + with characters in the range 0-255 permitted + as input. + %structure {identifier} - name the structure in the output program + {identifier} instead of Mlex + %header - use code following it to create header for lexer + structure + %arg - extra (curried) formal parameter argument to be + passed to the lex functions, and to be passed + to the eof function in place of () + These functions are discussed below, under values available to + actions. + +C. Rules + +Each rule has the format: + + {regular expression} => ( ... code ... ); + +All parentheses in ... code ... must be balanced, including those +used in strings and comments. + +The start state list is optional. It consists of a list of +identifiers separated by commas, and is delimited by triangle +brackets < >. Each identifier must be a start state defined in the +%s section above. + +The regular expression is only recognized when the lexer is in one of +the start states in the start state list. If no start state list is +given, the expression is recognized in all start states. + +The lexer begins in a pre-defined start state called INITIAL. + +The lexer resolves conflicts among rules by choosing the rule with +the longest match, and in the case two rules match the same string, +choosing the rule listed first in the specification. + +The rules should match all possible input. If some input occurs that +does not match any rule, the lexer created by ML-Lex will raise an +exception LexError. Note that this differs from C Lex, which prints +any unmatched input on the standard output. + +V. Values available inside the code associated with a rule. + +Mlex places the value of the string matched by a regular expression +in yytext, a string variable. + +The user may recursively +call the lexing function with lex(). (If %arg is used, the +lexing function may be re-invoked with the same argument by using +continue().) This is convenient for ignoring white space or comments silently: + + [\ \t\n]+ => ( lex()); + +To switch start states, the user may call YYBEGIN with the name of a +start state. + +The following values will be available only if the corresponding % +command is in the ML-Lex definitions sections: + + value %command description + ----- -------- ----------- + REJECT %reject REJECT() causes the current + rule to be "rejected." + The lexer behaves as if the + current rule had not matched; + another rule that matches this + string, or that matches the longest + possible prefix of this string, + is used instead. + + yypos Current character position from + beginning of file. + + yylineno %count Current line number + + +These values should be used only if necessary. Adding REJECT to a +lexer will slow it down by 20%; adding yylineno will slow it down by +another 20%, or more. (It is much more efficient to recognize \n and +have an action that increments the line-number variable.) The use of +the lookahead operator / will also slow down the entire lexer. +The character-position, yypos, is not costly to maintain, however. + +VI. Running ML-Lex + +From the Unix shell, run sml-lex myfile.lex +The output file will be myfile.lex.sml. The extension ".lex" is not +required but is recommended. + +Within an interactive system [not the preferred method]: +Use "lexgen.sml"; this will create a structure LexGen. The function +LexGen.lexGen creates a program for a lexer from an input +specification. It takes a string argument -- the name of the file +containing the input specification. The output file name is +determined by appending ".sml" to the input file name. + +VII. Using the program produced by ML-Lex. + +When the output file is loaded, it will create a structure Mlex that +contains the function makeLexer. makeLexer takes a function from int +-> string and returns a lexing function. + +For example, + + val lexer = Mlex.makeLexer (inputc (open_in "f")) + +creates a lexer that operates on the file whose name is f. + +The function from int -> string should read a string of characters +from the input stream. It should return a null string to indicate +that the end of the stream has been reached. The integer is the +number of characters that the lexer wishes to read; the function may +return any non-zero number of characters. For example, + + val lexer = + let val input_line = fn f => + let fun loop result = + let val c = input (f,1) + val result = c :: result + in if String.size c = 0 orelse c = "\n" then + String.implode (rev result) + else loop result + end + in loop nil + end + in Mlex.makeLexer (fn n => input_line std_in) + end +is appropriate for interactive streams where prompting, etc. occurs; +the lexer won't care that input_line might return a string of more +than or less than n characters. + +The lexer tries to read a large number of characters from the input +function at once, and it is desirable that the input function return +as many as possible. Reading many characters at once makes the lexer +more efficient. Fewer input calls and buffering operations are +needed, and input is more efficient in large block reads. For +interactive streams this is less of a concern, as the limiting factor +is the speed at which the user can type. + +To obtain a value, invoke the lexer by passing it a unit: + + val nextToken = lexer() + +If one wanted to restart the lexer, one would just discard "lexer" +and create a new lexer on the same stream with another call to +makeLexer. This is the best way to discard any characters buffered +internally by the lexer. + +All code in the user declarations section is placed inside a +structure UserDeclarations. To access this structure, use the path name +Mlex.UserDeclarations. + +If any input cannot be matched, the program will raise the exception +Mlex.LexError. An internal error (i.e. bug) will cause the +exception Internal.LexerError to be raised. + +If %structure is used, remember that the structure name will no +longer be Mlex, but the one specified in the command. + +VIII. Sample + +Here is a sample lexer for a calculator program: + +datatype lexresult= DIV | EOF | EOS | ID of string | LPAREN | + NUM of int | PLUS | PRINT | RPAREN | SUB | TIMES + +val linenum = ref 1 +val error = fn x => output(std_out,x ^ "\n") +val eof = fn () => EOF +%% +%structure CalcLex +alpha=[A-Za-z]; +digit=[0-9]; +ws = [\ \t]; +%% +\n => (inc linenum; lex()); +{ws}+ => (lex()); +"/" => (DIV); +";" => (EOS); +"(" => (LPAREN); +{digit}+ => (NUM (revfold (fn(a,r)=>ord(a)-ord("0")+10*r) (explode yytext) 0)); +")" => (RPAREN); +"+" => (PLUS); +{alpha}+ => (if yytext="print" then PRINT else ID yytext); +"-" => (SUB); +"*" => (TIMES); +. => (error ("calc: ignoring bad character "^yytext); lex()); + + +Here is the parser for the calculator: + +(* Sample interactive calculator to demonstrate use of lexer produced by ML-Lex + + The original grammar was + + stmt_list -> stmt_list stmt + stmt -> print exp ; | exp ; + exp -> exp + t | exp - t | t + t -> t * f | t/f | f + f -> (exp) | id | num + + The function parse takes a stream and parses it for the calculator + program. + + If a syntax error occurs, parse prints an error message and calls itself + on the stream. On this system that has the effect of ignoring all input + to the end of a line. +*) + +structure Calc = + struct + open CalcLex + open UserDeclarations + exception Error + fun parse strm = + let + val say = fn s => output(std_out,s) + val input_line = fn f => + let fun loop result = + let val c = input (f,1) + val result = c :: result + in if String.size c = 0 orelse c = "\n" then + String.implode (rev result) + else loop result + end + in loop nil + end + val lexer = makeLexer (fn n => input_line strm) + val nexttok = ref (lexer()) + val advance = fn () => (nexttok := lexer(); !nexttok) + val error = fn () => (say ("calc: syntax error on line" ^ + (makestring(!linenum)) ^ "\n"); raise Error) + val lookup = fn i => + if i = "ONE" then 1 + else if i = "TWO" then 2 + else (say ("calc: unknown identifier '" ^ i ^ "'\n"); raise Error) + fun STMT_LIST () = + case !nexttok of + EOF => () + | _ => (STMT(); STMT_LIST()) + + and STMT() = + (case !nexttok + of EOS => () + | PRINT => (advance(); say ((makestring (E():int)) ^ "\n"); ()) + | _ => (E(); ()); + case !nexttok + of EOS => (advance()) + | _ => error()) + and E () = E' (T()) + and E' (i : int ) = + case !nexttok of + PLUS => (advance (); E'(i+T())) + | SUB => (advance (); E'(i-T())) + | RPAREN => i + | EOF => i + | EOS => i + | _ => error() + and T () = T'(F()) + and T' i = + case !nexttok of + PLUS => i + | SUB => i + | TIMES => (advance(); T'(i*F())) + | DIV => (advance (); T'(i div F())) + | EOF => i + | EOS => i + | RPAREN => i + | _ => error() + and F () = + case !nexttok of + ID i => (advance(); lookup i) + | LPAREN => + let val v = (advance(); E()) + in if !nexttok = RPAREN then (advance (); v) else error() + end + | NUM i => (advance(); i) + | _ => error() + in STMT_LIST () handle Error => parse strm + end + end diff --git a/ml-lex/doc/ml-lex.tex b/ml-lex/doc/ml-lex.tex new file mode 100644 index 0000000..b6c61ad --- /dev/null +++ b/ml-lex/doc/ml-lex.tex @@ -0,0 +1,612 @@ +\documentstyle{article} +\title{ A lexical analyzer generator for Standard ML.\\ + Version 1.6.0, October 1994 + } +\author{ Andrew W. Appel$^1$\\ + James S. Mattson\\ + David R. Tarditi$^2$\\ +\\ +\small +$^1$Department of Computer Science, Princeton University \\ +\small +$^2$School of Computer Science, Carnegie Mellon University +} +\date{} +\begin{document} +\maketitle +\begin{center} +(c) 1989-94 Andrew W. Appel, James S. Mattson, David R. Tarditi +\end{center} + +{\bf +This software comes with ABSOLUTELY NO WARRANTY. It is subject only to +the terms of the ML-Yacc NOTICE, LICENSE, and DISCLAIMER (in the +file COPYRIGHT distributed with this software). +} + +\vspace{1in} + +New in this version: +\begin{itemize} +\item REJECT is much less costly than before. +\item Lexical analyzers with more than 255 states can now compile in your +lifetime. +\end{itemize} + +\newpage +\tableofcontents +\newpage + +\section{General Description} + +Computer programs often need to divide their input into words and +distinguish between different kinds of words. Compilers, for +example, need to distinguish between integers, reserved words, and +identifiers. Applications programs often need to be able to +recognize components of typed commands from users. + +The problem of segmenting input into words and recognizing classes of +words is known as lexical analysis. Small cases of this problem, +such as reading text strings separated by spaces, can be solved by +using hand-written programs. Larger cases of this problem, such as +tokenizing an input stream for a compiler, can also be solved using +hand-written programs. + +A hand-written program for a large lexical analysis problem, however, +suffers from two major problems. First, the program requires a fair +amount of programmer time to create. Second, the description of +classes of words is not explicit in the program. It must be inferred +from the program code. This makes it difficult to verify if the +program recognizes the correct words for each class. It also makes +future maintenance of the program difficult. + +Lex, a programming tool for the Unix system, is a successful solution +to the general problem of lexical analysis. It uses regular +expressions to describe classes of words. A program fragment is +associated with each class of words. This information is given to +Lex as a specification (a Lex program). Lex produces a program for a +function that can be used to perform lexical analysis. + +The function operates as follows. It finds the longest word starting +from the current position in the input stream that is in one of the +word classes. It executes the program fragment associated with the +class, and sets the current position in the input stream to be the +character after the word. The program fragment has the actual text +of the word available to it, and may be any piece of code. For many +applications it returns some kind of value. + +Lex allows the programmer to make the language description explicit, +and to concentrate on what to do with the recognized words, not how +to recognize the words. It saves programmer time and increases +program maintainability. + +Unfortunately, Lex is targeted only C. It also places artificial +limits on the size of strings that can be recognized. + +ML-Lex is a variant of Lex for the ML programming language. ML-Lex +has a syntax similar to Lex, and produces an ML program instead of a +C program. ML-Lex produces a program that runs very efficiently. +Typically the program will be as fast or even faster than a +hand-coded lexer implemented in Standard ML. + +The program typically uses only a small amount of space. +ML-Lex thus allows ML programmers the same benefits that Lex allows C +programmers. It also does not place artificial limits on the size of +recognized strings. + +\section{ML-Lex specifications} + +An ML-Lex specification has the general format: + +\begin{quote} + {user declarations} + \verb|%%| + {ML-Lex definitions} + \verb|%%| + {rules} +\end{quote} + +Each section is separated from the others by a \verb|%%| delimiter. + +The rules are used to define the lexical analysis function. Each +rule has two parts---a regular expression and an action. The regular +expression defines the word class that a rule matches. The action is +a program fragment to be executed when a rule matches the input. The +actions are used to compute values, and must all return values of the +same type. + +The user can define values available to all rule actions in the user +declarations section. The user must define two values in this +section---a type lexresult and a function eof. Lexresult defines the +type of values returned by the rule actions. The function "eof" is +called by the lexer when the end of the input stream is reached. It +will typically return a value signalling eof or raise an exception. +It is called with the same argument as lex (see \verb|%arg|, below), +and must return a value of type lexresult. + +In the definitions section, the user can define named regular +expressions, a set of start states, and specify which of the various +bells and whistles of ML-Lex are desired. + +The start states allow the user to control when certain rules are +matched. Rules may be defined to match only when the lexer is in +specific start states. The user may change the lexer's start state +in a rule action. This allows the user to specify special handling +of lexical objects. + +This feature is typically used to handle quoted strings with escapes +to denote special characters. The rules to recognize the inside +contents of a string are defined for only one start state. This +start state is entered when the beginning of a string is recognized, +and exited when the end of the string is recognized. + +\section{Regular expressions} + +Regular expressions are a simple language for denoting classes of +strings. A regular expression is defined inductively over an +alphabet with a set of basic operations. The alphabet for ML-Lex is +the Ascii character set (character codes 0--127; or if +\verb|%full| is used, 0--255). + +The syntax and semantics of regular expressions will be described in +order of decreasing precedence (from the most tightly binding operators +to the most weakly binding): + +\begin{itemize} +\item An individual character stands for itself, except for the + reserved characters \verb@? * + | ( ) ^ $ / ; . = < > [ { " \@ + +\item[\\] A backslash followed by one of the reserved characters stands + for that character. + +\item A set of characters enclosed in square brackets [ ] stands + for any one of those characters. Inside the brackets, only + the symbols \verb|\ - ^| are reserved. An initial up-arrow + \verb|^| stands + for the complement of the characters listed, e.g. \verb|[^abc]| + stands any character except a, b, or c. The hyphen - denotes + a range of characters, e.g. \verb|[a-z]| stands for any lower-case + alphabetic character, and \verb|[0-9a-fA-F]| stands for any hexadecimal + digit. To include \verb|^| literally in a bracketed set, put it anywhere + but first; to include \verb|-| literally in a set, put it first or last. + +\item[\verb|.|] The dot \verb|.| character stands for any character except newline, + i.e. the same as \verb|[^\n]| + +\item The following special escape sequences are available, inside + or outside of square-brackets: + + \begin{tabular}{ll} + \verb|\b|& backspace\\ + \verb|\n|& newline\\ + \verb|\t|& tab\\ + \verb|\h|& stands for all characters with codes $>127$,\\ + &~~~~ when 7-bit characters are used.\\ + \verb|\ddd|& where \verb|ddd| is a 3 digit decimal escape.\\ + + \end{tabular} + +\item[\verb|"|] A sequence of characters will stand for itself (reserved + characters will be taken literally) if it is enclosed in + double quotes \verb|" "|. + +\item[\{\}] A named regular expression (defined in the ``definitions" + section) may be referred to by enclosing its name in + braces \verb|{ }|. + +\item[()] Any regular expression may be enclosed in parentheses \verb|( )| + for syntactic (but, as usual, not semantic) effect. + +\item[\verb|*|] The postfix operator \verb|*| stands for Kleene closure: + zero or more repetitions of the preceding expression. + +\item[\verb|+|] The postfix operator \verb|+| stands for one or more repetitions + of the preceding expression. + +\item[\verb|?|] The postfix operator \verb|?| stands for zero or one occurrence of + the preceding expression. + +\item A postfix repetition range $\{n_1,n_2\}$ where $n_1$ and $n_2$ are small + integers stands for any number of repetitions between $n_1$ and $n_2$ + of the preceding expression. The notation $\{n_1\}$ stands for + exactly $n_1$ repetitions. + +\item Concatenation of expressions denotes concatenation of strings. + The expression $e_1 e_2$ stands for any string that results from + the concatenation of one string that matches $e_1$ with another + string that matches $e_2$. + +\item\verb-|- The infix operator \verb-|- stands for alternation. The expression + $e_1$~\verb"|"~$e_2$ stands for anything that either $e_1$ or $e_2$ stands for. + +\item[\verb|/|] The infix operator \verb|/| denotes lookahead. Lookahead is not + implemented and cannot be used, because there is a bug + in the algorithm for generating lexers with lookahead. If + it could be used, the expression $e_1 / e_2$ would match any string + that $e_1$ stands for, but only when that string is followed by a + string that matches $e_2$. + +\item When the up-arrow \verb|^| occurs at the beginning of an expression, + that expression will only match strings that occur at the + beginning of a line (right after a newline character). + +\item[\$] The dollar sign of C Lex \$ is not implemented, since it is an abbreviation + for lookahead involving the newline character (that is, it + is an abbreviation for \verb|/\n|). +\end{itemize} + +Here are some examples of regular expressions, and descriptions of the +set of strings they denote: + +\begin{tabular}{ll} +\verb~0 | 1 | 2 | 3~& A single digit between 0 and 3\\ +\verb|[0123]|& A single digit between 0 and 3\\ +\verb|0123|& The string ``0123"\\ +\verb|0*|& All strings of 0 or more 0's\\ +\verb|00*|& All strings of 1 or more 0's\\ +\verb|0+|& All strings of 1 or more 0's\\ +\verb|[0-9]{3}|& Any three-digit decimal number.\\ +\verb|\\[ntb]|& A newline, tab, or backspace.\\ +\verb|(00)*|& Any string with an even number of 0's. +\end{tabular} + +\section{ML-Lex syntax summary} + +\subsection{User declarations} + +Anything up to the first \verb|%%| is in the user declarations section. The +user should note that no symbolic identifier containing +\verb|%%| can be +used in this section. + +\subsection{ML-Lex definitions} + +Start states can be defined with +\begin{quote} +\verb|%s| {identifier list} \verb|;| +\end{quote} + +An identifier list consists of one or more identifiers. + +An identifier consists of one or more letters, digits, underscores, +or primes, and must begin with a letter. + +Named expressions can be defined with + +\begin{quote} + {identifier} = {regular expression} ; +\end{quote} + +Regular expressions are defined below. + +The following \% commands are also available: + +\begin{description} +\item[\tt \%reject] create REJECT() function +\item[\tt \%count] count newlines using yylineno +\item[\tt \%posarg] pass initial-position argument to makeLexer +\item[\tt \%full] create lexer for the full 8-bit character set, + with characters in the range 0--255 permitted + as input. +\item[\tt \%structure \{identifier\}] name the structure in the output program + {identifier} instead of Mlex +\item[\tt \%header] use code following it to create header for lexer + structure +\item[\tt \%arg] extra (curried) formal parameter argument to be + passed to the lex functions, and to be passed + to the eof function in place of () +\end{description} + These functions are discussed in section~\ref{avail}. + +\subsection{Rules} + +Each rule has the format: + +\begin{quote} + \verb|<|{\it start state list}\verb|>| {\it regular expression} \verb|=> (| {\it code} \verb|);| +\end{quote} + +All parentheses in {\it code} must be balanced, including those +used in strings and comments. + +The {\it start state list} is optional. It consists of a list of +identifiers separated by commas, and is delimited by triangle +brackets \verb|< >|. Each identifier must be a start state defined in the +\verb|%s| section above. + +The regular expression is only recognized when the lexer is in one of +the start states in the start state list. If no start state list is +given, the expression is recognized in all start states. + +The lexer begins in a pre-defined start state called \verb|INITIAL|. + +The lexer resolves conflicts among rules by choosing the rule with +the longest match, and in the case two rules match the same string, +choosing the rule listed first in the specification. + +The rules should match all possible input. If some input occurs that +does not match any rule, the lexer created by ML-Lex will raise an +exception LexError. Note that this differs from C Lex, which prints +any unmatched input on the standard output. + +\section{Values available inside the code associated with a rule.} +\label{avail} + +ML-Lex places the value of the string matched by a regular expression +in \verb|yytext|, a string variable. + +The user may recursively +call the lexing function with \verb|lex()|. (If \verb|%arg| is used, the +lexing function may be re-invoked with the same argument by using +continue().) This is convenient for ignoring white space or comments silently: + +\begin{verbatim} + [\ \t\n]+ => ( lex()); +\end{verbatim} + +To switch start states, the user may call \verb|YYBEGIN| with the name of a +start state. + +The following values will be available only if the corresponding \verb|%| +command is in the ML-Lex definitions sections: + +\begin{tabular}{lll} +\\ +{\bf Value}&{\bf \% command}&{\bf description}\\ +\hline +{\tt REJECT} &{\tt\%reject}&\parbox[t]{2.6in}{{\tt REJECT()} causes the current + rule to be ``rejected.'' + The lexer behaves as if the + current rule had not matched; + another rule that matches this + string, or that matches the longest + possible prefix of this string, + is used instead.} \\ +{\tt yypos} & & \parbox[t]{2.6in}{The position of the first character +of {\tt yytext}, relative to the beginning of the file.}\\ +{\tt yylineno } & {\tt \%count} & Current line number\\ +\\ +\end{tabular} + + +These values should be used only if necessary. Adding {\tt REJECT} to a +lexer will slow it down by 20\%; adding {\tt yylineno} will slow it down by +another 20\%, or more. (It is much more efficient to +recognize {\tt \\n} and +have an action that increments the line-number variable.) The use of +the lookahead operator {\tt /} will also slow down the entire lexer. +The character-position, {\tt yypos}, is not costly to maintain, however. + +\paragraph{Bug.} The position of the first character in the file +is reported as 2 (unless the {\tt \%posarg} feature is used). +To preserve compatibility, this bug has not been fixed. + +\section{Running ML-Lex} + +From the Unix shell, run {\tt sml-lex~myfile.lex} +The output file will be myfile.lex.sml. The extension {\tt .lex} is not +required but is recommended. + +Within an interactive system [not the preferred method]: +Use {\tt lexgen.sml}; this will create a structure LexGen. The function +LexGen.lexGen creates a program for a lexer from an input +specification. It takes a string argument -- the name of the file +containing the input specification. The output file name is +determined by appending ``{\tt .sml}'' to the input file name. + +\section{Using the program produced by ML-Lex} + +When the output file is loaded, it will create a structure Mlex that +contains the function {\tt makeLexer} which takes a function from +${\it int} \rightarrow {\it string}$ and returns a lexing function: + +\begin{verbatim} + val makeLexer : (int->string) -> yyarg -> lexresult +\end{verbatim} +where {\tt yyarg} is the type given in the {\tt \%yyarg} directive, +or {\tt unit} if there is no {\tt \%yyarg} directive. + +For example, + +\begin{verbatim} + val lexer = Mlex.makeLexer (inputc (open_in "f")) +\end{verbatim} + +creates a lexer that operates on the file whose name is f. + +When the {\tt \%posarg} directive is used, the type of +{\tt makeLexer} is +\begin{verbatim} + val makeLexer : ((int->string)*int) -> yyarg -> lexresult +\end{verbatim} +where the extra {\tt int} argument is one less than the {\tt yypos} +of the first character in the input. The value $k$ would be used, +for example, when creating +a lexer to start in the middle of a file, when $k$ characters have +already been read. At the beginning of the file, $k=0$ should be used. + +The ${\it int} \rightarrow {\it string}$ function +should read a string of characters +from the input stream. It should return a null string to indicate +that the end of the stream has been reached. The integer is the +number of characters that the lexer wishes to read; the function may +return any non-zero number of characters. For example, + +\begin{verbatim} + val lexer = + let val input_line = fn f => + let fun loop result = + let val c = input (f,1) + val result = c :: result + in if String.size c = 0 orelse c = "\n" then + String.implode (rev result) + else loop result + end + in loop nil + end + in Mlex.makeLexer (fn n => input_line std_in) + end +\end{verbatim} + +is appropriate for interactive streams where prompting, etc. occurs; +the lexer won't care that \verb|input_line| might return a string of more +than or less than $n$ characters. + +The lexer tries to read a large number of characters from the input +function at once, and it is desirable that the input function return +as many as possible. Reading many characters at once makes the lexer +more efficient. Fewer input calls and buffering operations are +needed, and input is more efficient in large block reads. For +interactive streams this is less of a concern, as the limiting factor +is the speed at which the user can type. + +To obtain a value, invoke the lexer by passing it a unit: + +\begin{verbatim} + val nextToken = lexer() +\end{verbatim} + +If one wanted to restart the lexer, one would just discard {\tt lexer} +and create a new lexer on the same stream with another call to +{\tt makeLexer}. This is the best way to discard any characters buffered +internally by the lexer. + +All code in the user declarations section is placed inside a +structure UserDeclarations. To access this structure, use the path name +{\tt Mlex.UserDeclarations}. + +If any input cannot be matched, the program will raise the exception +{\tt Mlex.LexError}. An internal error (i.e. bug) will cause the +exception {\tt Internal.LexerError} to be raised. + +If {\tt \%structure} is used, remember that the structure name will no +longer be Mlex, but the one specified in the command. + +\section{Sample} + +Here is a sample lexer for a calculator program: + +\small +\begin{verbatim} +datatype lexresult= DIV | EOF | EOS | ID of string | LPAREN | + NUM of int | PLUS | PRINT | RPAREN | SUB | TIMES + +val linenum = ref 1 +val error = fn x => output(std_out,x ^ "\n") +val eof = fn () => EOF +%% +%structure CalcLex +alpha=[A-Za-z]; +digit=[0-9]; +ws = [\ \t]; +%% +\n => (inc linenum; lex()); +{ws}+ => (lex()); +"/" => (DIV); +";" => (EOS); +"(" => (LPAREN); +{digit}+ => (NUM (revfold (fn(a,r)=>ord(a)-ord("0")+10*r) (explode yytext) 0)); +")" => (RPAREN); +"+" => (PLUS); +{alpha}+ => (if yytext="print" then PRINT else ID yytext); +"-" => (SUB); +"*" => (TIMES); +. => (error ("calc: ignoring bad character "^yytext); lex()); +\end{verbatim} + + +Here is the parser for the calculator: +\begin{verbatim} + +(* Sample interactive calculator to demonstrate use of lexer + + The original grammar was + + stmt_list -> stmt_list stmt + stmt -> print exp ; | exp ; + exp -> exp + t | exp - t | t + t -> t * f | t/f | f + f -> (exp) | id | num + + The function parse takes a stream and parses it for the calculator + program. + + If a syntax error occurs, parse prints an error message and calls + itself on the stream. On this system that has the effect of ignoring + all input to the end of a line. +*) + +structure Calc = + struct + open CalcLex + open UserDeclarations + exception Error + fun parse strm = + let + val say = fn s => output(std_out,s) + val input_line = fn f => + let fun loop result = + let val c = input (f,1) + val result = c :: result + in if String.size c = 0 orelse c = "\n" then + String.implode (rev result) + else loop result + end + in loop nil + end + val lexer = makeLexer (fn n => input_line strm) + val nexttok = ref (lexer()) + val advance = fn () => (nexttok := lexer(); !nexttok) + val error = fn () => (say ("calc: syntax error on line" ^ + (makestring(!linenum)) ^ "\n"); raise Error) + val lookup = fn i => + if i = "ONE" then 1 + else if i = "TWO" then 2 + else (say ("calc: unknown identifier '" ^ i ^ "'\n"); raise Error) + fun STMT_LIST () = + case !nexttok of + EOF => () + | _ => (STMT(); STMT_LIST()) + + and STMT() = + (case !nexttok + of EOS => () + | PRINT => (advance(); say ((makestring (E():int)) ^ "\n"); ()) + | _ => (E(); ()); + case !nexttok + of EOS => (advance()) + | _ => error()) + and E () = E' (T()) + and E' (i : int ) = + case !nexttok of + PLUS => (advance (); E'(i+T())) + | SUB => (advance (); E'(i-T())) + | RPAREN => i + | EOF => i + | EOS => i + | _ => error() + and T () = T'(F()) + and T' i = + case !nexttok of + PLUS => i + | SUB => i + | TIMES => (advance(); T'(i*F())) + | DIV => (advance (); T'(i div F())) + | EOF => i + | EOS => i + | RPAREN => i + | _ => error() + and F () = + case !nexttok of + ID i => (advance(); lookup i) + | LPAREN => + let val v = (advance(); E()) + in if !nexttok = RPAREN then (advance (); v) else error() + end + | NUM i => (advance(); i) + | _ => error() + in STMT_LIST () handle Error => parse strm + end + end +\end{verbatim} +\end{document} diff --git a/ml-lex/export-lex.sml b/ml-lex/export-lex.sml new file mode 100644 index 0000000..5b5efd4 --- /dev/null +++ b/ml-lex/export-lex.sml @@ -0,0 +1,45 @@ +(* export-lex.sml + * + * Revision 1.2 2000/03/07 04:01:05 blume + * - build script now use new ml-build mechanism + *) +structure ExportLexGen : sig + val lexGen : (string * string list) -> OS.Process.status +end = struct + + exception Interrupt + + (* This function applies operation to (). If it handles an interrupt + * signal (Control-C), it raises the exception Interrupt. Example: + * (handleInterrupt foo) handle Interrupt => print "Bang!\n" + *) + fun handleInterrupt (operation : unit -> unit) = + let exception Done + val old'handler = Signals.inqHandler(Signals.sigINT) + fun reset'handler () = + Signals.setHandler(Signals.sigINT, old'handler) + in (SMLofNJ.Cont.callcc (fn k => + (Signals.setHandler(Signals.sigINT, Signals.HANDLER(fn _ => k)); + operation (); + raise Done)); + raise Interrupt) + handle Done => (reset'handler ()) + | exn => (reset'handler (); raise exn) + end + + fun err msg = TextIO.output(TextIO.stdErr, String.concat msg) + + fun lexGen (name, args) = let + fun lex_gen () = + case args of + [] => (err [name, ": missing filename\n"]; + OS.Process.exit OS.Process.failure) + | files => List.app LexGen.lexGen files + in + (handleInterrupt lex_gen; OS.Process.success) + handle Interrupt => (err [name, ": Interrupt\n"]; OS.Process.failure) + | any => (err [name, ": uncaught exception ", + General.exnMessage any, "\n"]; + OS.Process.failure) + end +end diff --git a/ml-lex/lexgen.sml b/ml-lex/lexgen.sml new file mode 100644 index 0000000..33029d8 --- /dev/null +++ b/ml-lex/lexgen.sml @@ -0,0 +1,1421 @@ +(* Lexical analyzer generator for Standard ML. + Version 1.7.0, June 1998 + +Copyright (c) 1989-1992 by Andrew W. Appel, + David R. Tarditi, James S. Mattson + +This software comes with ABSOLUTELY NO WARRANTY. +This software is subject only to the PRINCETON STANDARD ML SOFTWARE LIBRARY +COPYRIGHT NOTICE, LICENSE AND DISCLAIMER, (in the file "COPYRIGHT", +distributed with this software). You may copy and distribute this software; +see the COPYRIGHT NOTICE for details and restrictions. + + Changes: + 07/25/89 (drt): added %header declaration, code to place + user declarations at same level as makeLexer, etc. + This is needed for the parser generator. + /10/89 (appel): added %arg declaration (see lexgen.doc). + /04/90 (drt): fixed following bug: couldn't use the lexer after an + error occurred -- NextTok and inquote weren't being reset + 10/22/91 (drt): disabled use of lookahead + 10/23/92 (drt): disabled use of $ operator (which involves lookahead), + added handlers for dictionary lookup routine + 11/02/92 (drt): changed handler for exception Reject in generated lexer + to Internal.Reject + 02/01/94 (appel): Moved the exception handler for Reject in such + a way as to allow tail-recursion (improves performance + wonderfully!). + 02/01/94 (appel): Fixed a bug in parsing of state names. + 05/19/94 (Mikael Pettersson, mpe@ida.liu.se): + Transition tables are usually represented as strings, but + when the range is too large, int vectors constructed by + code like "Vector.vector[1,2,3,...]" are used instead. + The problem with this isn't that the vector itself takes + a lot of space, but that the code generated by SML/NJ to + construct the intermediate list at run-time is *HUGE*. My + fix is to encode an int vector as a string literal (using + two bytes per int) and emit code to decode the string to + a vector at run-time. SML/NJ compiles string literals into + substrings in the code, so this uses much less space. + 06/02/94 (jhr): Modified export-lex.sml to conform to new installation + scheme. Also removed tab characters from string literals. + 10/05/94 (jhr): Changed generator to produce code that uses the new + basis style strings and characters. + 10/06/94 (jhr) Modified code to compile under new basis style strings + and characters. + 02/08/95 (jhr) Modified to use new List module interface. + 05/18/95 (jhr) changed Vector.vector to Vector.fromList + 04/07/20 (jhr) Switch to using RedBlackMapFn from SML/NJ Library + 04/07/20 (jhr) Replaced uses of polymorphic equality with pattern + matching. + + * Revision 1.9 1998/01/06 19:23:53 appel + * added %posarg feature to permit position-within-file to be passed + * as a parameter to makeLexer + * +# Revision 1.8 1998/01/06 19:01:48 appel +# repaired error messages like "cannot have both %structure and %header" +# +# Revision 1.7 1998/01/06 18:55:49 appel +# permit %% to be unescaped within regular expressions +# +# Revision 1.6 1998/01/06 18:46:13 appel +# removed undocumented feature that permitted extra %% at end of rules +# +# Revision 1.5 1998/01/06 18:29:23 appel +# put yylineno variable inside makeLexer function +# +# Revision 1.4 1998/01/06 18:19:59 appel +# check for newline inside quoted string +# +# Revision 1.3 1997/10/04 03:52:13 dbm +# Fix to remove output file if ml-lex fails. +# + 10/17/02 (jhr) changed bad character error message to properly + print the bad character. + 10/17/02 (jhr) fixed skipws to use Char.isSpace test. + 07/27/05 (jhr) add \r as a recognized escape sequence. + *) + +(* Subject: lookahead in sml-lex + Reply-to: david.tarditi@CS.CMU.EDU + Date: Mon, 21 Oct 91 14:13:26 -0400 + +There is a serious bug in the implementation of lookahead, +as done in sml-lex, and described in Aho, Sethi, and Ullman, +p. 134 "Implementing the Lookahead Operator" + +We have disallowed the use of lookahead for now because +of this bug. + +As a counter-example to the implementation described in +ASU, consider the following specification with the +input string "aba" (this example is taken from +a comp.compilers message from Dec. 1989, I think): + +type lexresult=unit +val linenum = ref 1 +fun error x = TextIO.output(TextIO.stdErr, x ^ "\n") +val eof = fn () => () +%% +%structure Lex +%% +(a|ab)/ba => (print yytext; print "\n"; ()); + +The ASU proposal works as follows. Suppose that we are +using NFA's to represent our regular expressions. Then to +build an NFA for e1 / e2, we build an NFA n1 for e1 +and an NFA n2 for e2, and add an epsilon transition +from e1 to e2. + +When lexing, when we encounter the end state of e1e2, +we take as the end of the string the position in +the string that was the last occurrence of the state of +the NFA having a transition on the epsilon introduced +for /. + +Using the example we have above, we'll have an NFA +with the following states: + + + 1 -- a --> 2 -- b --> 3 + | | + | epsilon | epsilon + | | + |------------> 4 -- b --> 5 -- a --> 6 + +On our example, we get the following list of transitions: + +a : 2, 4 (make an epsilon transition from 2 to 4) +ab : 3, 4, 5 (make an epsilon transition from 3 to 4) +aba : 6 + +If we chose the last state in which we made an epsilon transition, +we'll chose the transition from 3 to 4, and end up with "ab" +as our token, when we should have "a" as our token. + +*) + +(* +functor RedBlack(B : sig type key + val > : key*key->bool + end): + sig type tree + type key + val empty : tree + val insert : key * tree -> tree + val lookup : key * tree -> key + exception notfound of key + end = +struct + open B + datatype color = RED | BLACK + datatype tree = empty | tree of key * color * tree * tree + exception notfound of key + + fun insert (key,t) = + let fun f empty = tree(key,RED,empty,empty) + | f (tree(k,BLACK,l,r)) = + if key>k + then case f r + of r as tree(rk,RED, rl as tree(rlk,RED,rll,rlr),rr) => + (case l + of tree(lk,RED,ll,lr) => + tree(k,RED,tree(lk,BLACK,ll,lr), + tree(rk,BLACK,rl,rr)) + | _ => tree(rlk,BLACK,tree(k,RED,l,rll), + tree(rk,RED,rlr,rr))) + | r as tree(rk,RED,rl, rr as tree(rrk,RED,rrl,rrr)) => + (case l + of tree(lk,RED,ll,lr) => + tree(k,RED,tree(lk,BLACK,ll,lr), + tree(rk,BLACK,rl,rr)) + | _ => tree(rk,BLACK,tree(k,RED,l,rl),rr)) + | r => tree(k,BLACK,l,r) + else if k>key + then case f l + of l as tree(lk,RED,ll, lr as tree(lrk,RED,lrl,lrr)) => + (case r + of tree(rk,RED,rl,rr) => + tree(k,RED,tree(lk,BLACK,ll,lr), + tree(rk,BLACK,rl,rr)) + | _ => tree(lrk,BLACK,tree(lk,RED,ll,lrl), + tree(k,RED,lrr,r))) + | l as tree(lk,RED, ll as tree(llk,RED,lll,llr), lr) => + (case r + of tree(rk,RED,rl,rr) => + tree(k,RED,tree(lk,BLACK,ll,lr), + tree(rk,BLACK,rl,rr)) + | _ => tree(lk,BLACK,ll,tree(k,RED,lr,r))) + | l => tree(k,BLACK,l,r) + else tree(key,BLACK,l,r) + | f (tree(k,RED,l,r)) = + if key>k then tree(k,RED,l, f r) + else if k>key then tree(k,RED, f l, r) + else tree(key,RED,l,r) + in case f t + of tree(k,RED, l as tree(_,RED,_,_), r) => tree(k,BLACK,l,r) + | tree(k,RED, l, r as tree(_,RED,_,_)) => tree(k,BLACK,l,r) + | t => t + end + + + fun lookup (key,t) = + let fun look empty = raise (notfound key) + | look (tree(k,_,l,r)) = + if k>key then look l + else if key>k then look r + else k + in look t + end + +end +*) + +signature LEXGEN = + sig + val lexGen: string -> unit + end + +structure LexGen: LEXGEN = + struct + val sub = Array.sub + infix 9 sub + + datatype token = CHARS of bool array | QMARK | STAR | PLUS | BAR + | LP | RP | CARAT | DOLLAR | SLASH | STATE of string list + | REPS of int * int | ID of string | ACTION of string + | BOF | EOF | ASSIGN | SEMI | ARROW | LEXMARK | LEXSTATES + | COUNT | REJECT | FULLCHARSET | STRUCT | HEADER | ARG | POSARG + + datatype exp = EPS | CLASS of bool array * int | CLOSURE of exp + | ALT of exp * exp | CAT of exp * exp | TRAIL of int + | END of int + + (* flags describing input Lex spec. - unnecessary code is omitted *) + (* if possible *) + + val CharFormat = ref false; + val UsesTrailingContext = ref false; + val UsesPrevNewLine = ref false; + + (* flags for various bells & whistles that Lex has. These slow the + lexer down and should be omitted from production lexers (if you + really want speed) *) + + val CountNewLines = ref false; + val PosArg = ref false; + val HaveReject = ref false; + + (* Can increase size of character set *) + + val CharSetSize = ref 129; + + (* Can name structure or declare header code *) + + val StrName = ref "Mlex" + val HeaderCode = ref "" + val HeaderDecl = ref false + val ArgCode = ref (NONE: string option) + val StrDecl = ref false + + val ResetFlags = fn () => (CountNewLines := false; HaveReject := false; + PosArg := false; + UsesTrailingContext := false; + CharSetSize := 129; StrName := "Mlex"; + HeaderCode := ""; HeaderDecl:= false; + ArgCode := NONE; + StrDecl := false) + + val LexOut = ref(TextIO.stdOut) + fun say x = TextIO.output(!LexOut, x) + +(* Union: merge two sorted lists of integers *) + +fun union(a,b) = let val rec merge = fn + (nil,nil,z) => z + | (nil,el::more,z) => merge(nil,more,el::z) + | (el::more,nil,z) => merge(more,nil,el::z) + | (x::morex,y::morey,z) => if (x:int)=(y:int) + then merge(morex,morey,x::z) + else if x>y then merge(morex,y::morey,x::z) + else merge(x::morex,morey,y::z) + in merge(rev a,rev b,nil) +end + +(* Nullable: compute if a important expression parse tree node is nullable *) + +val rec nullable = fn + EPS => true + | CLASS(_) => false + | CLOSURE(_) => true + | ALT(n1,n2) => nullable(n1) orelse nullable(n2) + | CAT(n1,n2) => nullable(n1) andalso nullable(n2) + | TRAIL(_) => true + | END(_) => false + +(* FIRSTPOS: firstpos function for parse tree expressions *) + +and firstpos = fn + EPS => nil + | CLASS(_,i) => [i] + | CLOSURE(n) => firstpos(n) + | ALT(n1,n2) => union(firstpos(n1),firstpos(n2)) + | CAT(n1,n2) => if nullable(n1) then union(firstpos(n1),firstpos(n2)) + else firstpos(n1) + | TRAIL(i) => [i] + | END(i) => [i] + +(* LASTPOS: Lastpos function for parse tree expressions *) + +and lastpos = fn + EPS => nil + | CLASS(_,i) => [i] + | CLOSURE(n) => lastpos(n) + | ALT(n1,n2) => union(lastpos(n1),lastpos(n2)) + | CAT(n1,n2) => if nullable(n2) then union(lastpos(n1),lastpos(n2)) + else lastpos(n2) + | TRAIL(i) => [i] + | END(i) => [i] + ; + +(* ++: Increment an integer reference *) + +fun ++(x) : int = (x := !x + 1; !x); + +structure dict = + struct + type 'a relation = 'a * 'a -> bool + abstype ('b,'a) dictionary = DATA of { Table : ('b * 'a) list, + Leq : 'b * 'b -> bool } + with + exception LOOKUP + fun create Leqfunc = DATA { Table = nil, Leq = Leqfunc } + fun lookup (DATA { Table = entrylist, Leq = leq }) key = + let fun search [] = raise LOOKUP + | search((k,item)::entries) = + if leq(key,k) + then if leq(k,key) then item else raise LOOKUP + else search entries + in search entrylist + end + fun enter (DATA { Table = entrylist, Leq = leq }) + (newentry as (key : 'b,item :'a)) : ('b,'a) dictionary = + let val gt = fn a => fn b => not (leq(a,b)) + val eq = fn k => fn k' => (leq(k,k')) andalso (leq(k',k)) + fun update nil = [ newentry ] + | update ((entry as (k,_))::entries) = + if (eq key k) then newentry::entries + else if gt k key then newentry::(entry::entries) + else entry::(update entries) + in DATA { Table = update entrylist, Leq = leq } + end + fun listofdict (DATA { Table = entrylist,Leq = leq}) = + let fun f (nil,r) = rev r + | f (a::b,r) = f (b,a::r) + in f(entrylist,nil) + end + end +end + +open dict; + +(* INPUT.ML : Input w/ one character push back capability *) + +val LineNum = ref 1; + +abstype ibuf = + BUF of TextIO.instream * {b : string ref, p : int ref} +with + fun make_ibuf(s) = BUF (s, {b=ref"", p = ref 0}) + fun close_ibuf (BUF (s,_)) = TextIO.closeIn(s) + exception eof + fun getch (a as (BUF(s,{b,p}))) = + if (!p = (size (!b))) + then (b := TextIO.inputN(s, 1024); + p := 0; + if (size (!b))=0 + then raise eof + else getch a) + else (let val ch = String.sub(!b,!p) + in (if ch = #"\n" + then LineNum := !LineNum + 1 + else (); + p := !p + 1; + ch) + end) + fun ungetch(BUF(s,{b,p})) = ( + p := !p - 1; + if String.sub(!b,!p) = #"\n" + then LineNum := !LineNum - 1 + else ()) +end; + +exception Error + +fun prErr x = ( + TextIO.output (TextIO.stdErr, String.concat [ + "ml-lex: error, line ", (Int.toString (!LineNum)), ": ", x, "\n" + ]); + raise Error) +fun prSynErr x = ( + TextIO.output (TextIO.stdErr, String.concat [ + "ml-lex: syntax error, line ", (Int.toString (!LineNum)), ": ", x, "\n" + ]); + raise Error) + +exception SyntaxError; (* error in user's input file *) + +exception LexError; (* unexpected error in lexer *) + +val LexBuf = ref(make_ibuf(TextIO.stdIn)); +val LexState = ref 0; +val NextTok = ref BOF; +val inquote = ref false; + +fun AdvanceTok () : unit = let + fun isLetter c = + ((c >= #"a") andalso (c <= #"z")) orelse + ((c >= #"A") andalso (c <= #"Z")) + fun isDigit c = (c >= #"0") andalso (c <= #"9") + (* check for valid (non-leading) identifier character (added by JHR) *) + fun isIdentChr c = + ((isLetter c) orelse (isDigit c) orelse (c = #"_") orelse (c = #"'")) + fun atoi s = let + fun num (c::r, n) = if isDigit c + then num (r, 10*n + (Char.ord c - Char.ord #"0")) + else n + | num ([], n) = n + in + num (explode s, 0) + end + + fun skipws () = let val ch = nextch() + in + if Char.isSpace ch + then skipws() + else ch + end + + and nextch () = getch(!LexBuf) + + and escaped () = (case nextch() + of #"b" => #"\008" + | #"n" => #"\n" + | #"r" => #"\r" + | #"t" => #"\t" + | #"h" => #"\128" + | x => let + fun err t = prErr("illegal ascii escape '"^(implode(rev t))^"'") + fun cvt c = (Char.ord c - Char.ord #"0") + fun f (n, c, t) = if c=3 + then if n >= (!CharSetSize) + then err t + else Char.chr n + else let val ch=nextch() + in + if isDigit ch + then f(n*10+(cvt ch), c+1, ch::t) + else err t + end + in + if isDigit x then f(cvt x, 1, [x]) else x + end + (* end case *)) + + and onechar x = let val c = Array.array(!CharSetSize, false) + in + Array.update(c, Char.ord(x), true); CHARS(c) + end + + in case !LexState of 0 => let val makeTok = fn () => + case skipws() + (* Lex % operators *) + of #"%" => (case nextch() of + #"%" => LEXMARK + | a => let fun f s = + let val a = nextch() + in if isLetter a then f(a::s) + else (ungetch(!LexBuf); + implode(rev s)) + end + in case f [a] + of "reject" => REJECT + | "count" => COUNT + | "full" => FULLCHARSET + | "s" => LEXSTATES + | "S" => LEXSTATES + | "structure" => STRUCT + | "header" => HEADER + | "arg" => ARG + | "posarg" => POSARG + | _ => prErr "unknown % operator " + end + ) + (* semicolon (for end of LEXSTATES) *) + | #";" => SEMI + (* anything else *) + | ch => if isLetter(ch) then + let fun getID matched = + let val x = nextch() +(**** fix by JHR + in if isLetter(x) orelse isDigit(x) orelse + x = "_" orelse x = "'" +****) + in if (isIdentChr x) + then getID (x::matched) + else (ungetch(!LexBuf); implode(rev matched)) + end + in ID(getID [ch]) + end + else prSynErr (String.concat[ + "bad character: \"", Char.toString ch, "\"" + ]) + in NextTok := makeTok() + end + | 1 => let val rec makeTok = fn () => + if !inquote then case nextch() of + (* inside quoted string *) + #"\\" => onechar(escaped()) + | #"\"" => (inquote := false; makeTok()) + | #"\n" => (prSynErr "end-of-line inside quoted string"; + inquote := false; makeTok()) + | x => onechar(x) + else case skipws() of + (* single character operators *) + #"?" => QMARK + | #"*" => STAR + | #"+" => PLUS + | #"|" => BAR + | #"(" => LP + | #")" => RP + | #"^" => CARAT + | #"$" => DOLLAR + | #"/" => SLASH + | #";" => SEMI + | #"." => let val c = Array.array(!CharSetSize,true) in + Array.update(c,10,false); CHARS(c) + end + (* assign and arrow *) + | #"=" => let val c = nextch() in + if c = #">" then ARROW else (ungetch(!LexBuf); ASSIGN) + end + (* character set *) + | #"[" => let val rec classch = fn () => let val x = skipws() + in if x = #"\\" then escaped() else x + end; + val first = classch(); + val flag = (first <> #"^"); + val c = Array.array(!CharSetSize,not flag); + fun add NONE = () + | add (SOME x) = Array.update(c, Char.ord(x), flag) + and range (x, y) = if x>y + then (prErr "bad char. range") + else let + val i = ref(Char.ord(x)) and j = Char.ord(y) + in while !i<=j do ( + add (SOME(Char.chr(!i))); + i := !i + 1) + end + and getClass last = (case classch() + of #"]" => (add(last); c) + | #"-" => (case last + of NONE => getClass(SOME #"-") + | (SOME last') => let val x = classch() + in + if x = #"]" + then (add(last); add(SOME #"-"); c) + else (range(last',x); getClass(NONE)) + end + (* end case *)) + | x => (add(last); getClass(SOME x)) + (* end case *)) + in CHARS(getClass(if first = #"^" then NONE else SOME first)) + end + (* Start States specification *) + | #"<" => let val rec get_state = fn (prev,matched) => + case nextch() of + #">" => matched::prev + | #"," => get_state(matched::prev,"") + | x => if isIdentChr(x) + then get_state(prev,matched ^ String.str x) + else (prSynErr "bad start state list") + in STATE(get_state(nil,"")) + end + (* {id} or repititions *) + | #"{" => let val ch = nextch() in if isLetter(ch) then + let fun getID matched = (case nextch() + of #"}" => matched + | x => if (isIdentChr x) then + getID(matched ^ String.str x) + else (prErr "invalid char. class name") + (* end case *)) + in ID(getID(String.str ch)) + end + else if isDigit(ch) then + let fun get_r (matched, r1) = (case nextch() + of #"}" => let val n = atoi(matched) in + if r1 = ~1 then (n,n) else (r1,n) + end + | #"," => if r1 = ~1 then get_r("",atoi(matched)) + else (prErr "invalid repetitions spec.") + | x => if isDigit(x) + then get_r(matched ^ String.str x,r1) + else (prErr "invalid char in repetitions spec") + (* end case *)) + in REPS(get_r(String.str ch,~1)) + end + else (prErr "bad repetitions spec") + end + (* Lex % operators *) + | #"\\" => onechar(escaped()) + (* start quoted string *) + | #"\"" => (inquote := true; makeTok()) + (* anything else *) + | ch => onechar(ch) + in NextTok := makeTok() + end + | 2 => NextTok := + (case skipws() of + #"(" => + let + fun loop_to_end (backslash, x) = + let + val c = getch (! LexBuf) + val notb = not backslash + val nstr = c :: x + in + case c of + #"\"" => if notb then nstr + else loop_to_end (false, nstr) + | _ => loop_to_end (c = #"\\" andalso notb, nstr) + end + fun GetAct (lpct, x) = + let + val c = getch (! LexBuf) + val nstr = c :: x + in + case c of + #"\"" => GetAct (lpct, loop_to_end (false, nstr)) + | #"(" => GetAct (lpct + 1, nstr) + | #")" => if lpct = 0 then implode (rev x) + else GetAct(lpct - 1, nstr) + | _ => GetAct(lpct, nstr) + end + in + ACTION (GetAct (0,nil)) + end + | #";" => SEMI + | c => (prSynErr ("invalid character " ^ String.str c))) + | _ => raise LexError +end +handle eof => NextTok := EOF ; + +fun GetTok (_:unit) : token = + let val t = !NextTok in AdvanceTok(); t + end; +val SymTab = ref (create String.<=) : (string,exp) dictionary ref + +fun GetExp () : exp = + + let val rec optional = fn e => ALT(EPS,e) + + and lookup' = fn name => + lookup(!SymTab) name + handle LOOKUP => prErr ("bad regular expression name: "^ + name) + + and newline = fn () => let val c = Array.array(!CharSetSize,false) in + Array.update(c,10,true); c + end + + and endline = fn e => trail(e,CLASS(newline(),0)) + + and trail = fn (e1,e2) => CAT(CAT(e1,TRAIL(0)),e2) + + and closure1 = fn e => CAT(e,CLOSURE(e)) + + and repeat = fn (min,max,e) => let val rec rep = fn + (0,0) => EPS + | (0,1) => ALT(e,EPS) + | (0,i) => CAT(rep(0,1),rep(0,i-1)) + | (i,j) => CAT(e,rep(i-1,j-1)) + in rep(min,max) + end + + and exp0 = fn () => case GetTok() + of CHARS(c) => exp1(CLASS(c,0)) + | LP => let + val e = exp0() + in + case !NextTok + of RP => (AdvanceTok(); exp1(e)) + | _ => (prSynErr "missing ')'") + end + | ID(name) => exp1(lookup' name) + | _ => raise SyntaxError + + and exp1 = fn (e) => case !NextTok of + SEMI => e + | ARROW => e + | EOF => e + | LP => exp2(e,exp0()) + | RP => e + | t => (AdvanceTok(); case t of + QMARK => exp1(optional(e)) + | STAR => exp1(CLOSURE(e)) + | PLUS => exp1(closure1(e)) + | CHARS(c) => exp2(e,CLASS(c,0)) + | BAR => ALT(e,exp0()) + | DOLLAR => (UsesTrailingContext := true; endline(e)) + | SLASH => (UsesTrailingContext := true; + trail(e,exp0())) + | REPS(i,j) => exp1(repeat(i,j,e)) + | ID(name) => exp2(e,lookup' name) + | _ => raise SyntaxError) + + and exp2 = fn (e1,e2) => case !NextTok of + SEMI => CAT(e1,e2) + | ARROW => CAT(e1,e2) + | EOF => CAT(e1,e2) + | LP => exp2(CAT(e1,e2),exp0()) + | RP => CAT(e1,e2) + | t => (AdvanceTok(); case t of + QMARK => exp1(CAT(e1,optional(e2))) + | STAR => exp1(CAT(e1,CLOSURE(e2))) + | PLUS => exp1(CAT(e1,closure1(e2))) + | CHARS(c) => exp2(CAT(e1,e2),CLASS(c,0)) + | BAR => ALT(CAT(e1,e2),exp0()) + | DOLLAR => (UsesTrailingContext := true; + endline(CAT(e1,e2))) + | SLASH => (UsesTrailingContext := true; + trail(CAT(e1,e2),exp0())) + | REPS(i,j) => exp1(CAT(e1,repeat(i,j,e2))) + | ID(name) => exp2(CAT(e1,e2),lookup' name) + | _ => raise SyntaxError) +in exp0() +end; +val StateTab = ref(create(String.<=)) : (string,int) dictionary ref + +val StateNum = ref 0; + +fun GetStates () : int list = + + let fun add nil sl = sl + | add (x::y) sl = add y (union ([lookup (!StateTab)(x) + handle LOOKUP => + prErr ("bad state name: "^x) + ],sl)) + + fun addall i sl = + if i <= !StateNum then addall (i+2) (union ([i],sl)) + else sl + + fun incall (x::y) = (x+1)::incall y + | incall nil = nil + + fun addincs nil = nil + | addincs (x::y) = x::(x+1)::addincs y + + val state_list = + case !NextTok of + STATE s => (AdvanceTok(); LexState := 1; add s nil) + | _ => addall 1 nil + + in case !NextTok + of CARAT => (LexState := 1; AdvanceTok(); UsesPrevNewLine := true; + incall state_list) + | _ => addincs state_list + end + +val LeafNum = ref ~1; + +fun renum(e : exp) : exp = + let val rec label = fn + EPS => EPS + | CLASS(x,_) => CLASS(x,++LeafNum) + | CLOSURE(e) => CLOSURE(label(e)) + | ALT(e1,e2) => ALT(label(e1),label(e2)) + | CAT(e1,e2) => CAT(label(e1),label(e2)) + | TRAIL(i) => TRAIL(++LeafNum) + | END(i) => END(++LeafNum) +in label(e) +end; + +exception ParseError; + +fun parse() : (string * (int list * exp) list * ((string,string) dictionary)) = let + fun isSEMI SEMI = true | isSEMI _ = false + val Accept = ref (create String.<=) : (string,string) dictionary ref + val rec ParseRtns = fn l => case getch(!LexBuf) of + #"%" => let val c = getch(!LexBuf) in + if c = #"%" then (implode (rev l)) + else ParseRtns(c :: #"%" :: l) + end + | c => ParseRtns(c::l) + and ParseDefs = fn () => + (LexState:=0; AdvanceTok(); case !NextTok of + LEXMARK => () + | LEXSTATES => + let fun f () = (case !NextTok of (ID i) => + (StateTab := enter(!StateTab)(i,++StateNum); + ++StateNum; AdvanceTok(); f()) + | _ => ()) + in AdvanceTok(); f (); + if isSEMI (!NextTok) then ParseDefs() else + (prSynErr "expected ';'") + end + | ID x => ( + LexState:=1; AdvanceTok(); + case GetTok() + of ASSIGN => ( + SymTab := enter(!SymTab)(x,GetExp()); + if isSEMI (!NextTok) then ParseDefs() + else (prSynErr "expected ';'")) + | _ => raise SyntaxError) + | REJECT => (HaveReject := true; ParseDefs()) + | COUNT => (CountNewLines := true; ParseDefs()) + | FULLCHARSET => (CharSetSize := 256; ParseDefs()) + | HEADER => (LexState := 2; AdvanceTok(); + case GetTok() + of ACTION s => + if (!StrDecl) then + (prErr "cannot have both %structure and %header \ + \declarations") + else if (!HeaderDecl) then + (prErr "duplicate %header declarations") + else + (HeaderCode := s; LexState := 0; + HeaderDecl := true; ParseDefs()) + | _ => raise SyntaxError) + | POSARG => (PosArg := true; ParseDefs()) + | ARG => (LexState := 2; AdvanceTok(); + case GetTok() + of ACTION s => + (case !ArgCode + of SOME _ => prErr "duplicate %arg declarations" + | NONE => ArgCode := SOME s; + LexState := 0; + ParseDefs()) + | _ => raise SyntaxError) + | STRUCT => (AdvanceTok(); + case !NextTok of + (ID i) => + if (!HeaderDecl) then + (prErr "cannot have both %structure and %header \ + \declarations") + else if (!StrDecl) then + (prErr "duplicate %structure declarations") + else (StrName := i; StrDecl := true) + | _ => (prErr "expected ID"); + ParseDefs()) + | _ => raise SyntaxError) + and ParseRules = + fn rules => (LexState:=1; AdvanceTok(); case !NextTok of + EOF => rules + | _ => + let val s = GetStates() + val e = renum(CAT(GetExp(),END(0))) + in + case !NextTok + of ARROW => (LexState:=2; AdvanceTok(); + case GetTok() of ACTION(act) => + if isSEMI (!NextTok) then + (Accept:=enter(!Accept) (Int.toString (!LeafNum),act); + ParseRules((s,e)::rules)) + else (prSynErr "expected ';'") + | _ => raise SyntaxError) + | _ => (prSynErr "expected '=>'") + end) +in let val usercode = ParseRtns nil + in (ParseDefs(); (usercode,ParseRules(nil),!Accept)) + end +end handle SyntaxError => (prSynErr "") + +fun makebegin () : unit = + let fun make nil = () + | make ((x,n:int)::y)=(say "val "; say x; say " = " ; + say "STARTSTATE "; + say (Int.toString n); say ";\n"; make y) + in say "\n(* start state definitions *)\n\n"; make(listofdict(!StateTab)) + end + +(* +structure L = + struct + nonfix > + type key = int list * string + fun > ((key,item:string),(key',item')) = + let fun f ((a:int)::a') (b::b') = if Int.> (a,b) then true + else if a=b then f a' b' + else false + | f _ _ = false + in f key key' + end + end + +structure RB = RedBlack(L) +*) + +(* a finite map implementation that replaces the original version, but + * keeps the same interface. + *) +structure RB : sig + type tree + type key + val empty : tree + val insert : key * tree -> tree + val lookup : key * tree -> key + exception notfound of key + end = struct + structure Map = RedBlackMapFn ( + struct + type ord_key = int list + val compare = List.collate Int.compare + end) + type key = (int list * string) + type tree = string Map.map + val empty = Map.empty + val insert = Map.insert' + exception notfound of key + fun lookup (arg as (key, _), t) = (case Map.find(t, key) + of SOME item => (key, item) + | NONE => raise notfound arg + (* end case *)) + end + +fun maketable (fins:(int * (int list)) list, + tcs :(int * (int list)) list, + tcpairs: (int * int) list, + trans : (int*(int list)) list) : unit = + +(* Fins = (state #, list of final leaves for the state) list + tcs = (state #, list of trailing context leaves which begin in this state) + list + tcpairs = (trailing context leaf, end leaf) list + trans = (state #,list of transitions for state) list *) + + let datatype elem = N of int | T of int | D of int + val count = ref 0 + val _ = (if length(trans)<256 then CharFormat := true + else CharFormat := false; + if !UsesTrailingContext then + (say "\ndatatype yyfinstate = N of int | \ + \ T of int | D of int\n") + else say "\ndatatype yyfinstate = N of int"; + say "\ntype statedata = {fin : yyfinstate list, trans: "; + case !CharFormat of + true => say "string}" + | false => say "int Vector.vector}"; + say "\n(* transition & final state table *)\nval tab = let\n"; + case !CharFormat of + true => () + | false => + (say "fun decode s k =\n"; + say " let val k' = k + k\n"; + say " val hi = Char.ord(String.sub(s, k'))\n"; + say " val lo = Char.ord(String.sub(s, k' + 1))\n"; + say " in hi * 256 + lo end\n")) + + val newfins = + let fun IsEndLeaf t = + let fun f ((l,e)::r) = if (e=t) then true else f r + | f nil = false in f tcpairs end + + fun GetEndLeaf t = + let fun f ((tl,el)::r) = if (tl=t) then el else f r + | f _ = raise Match + in f tcpairs + end + fun GetTrConLeaves s = + let fun f ((s',l)::r) = if (s = s') then l else f r + | f nil = nil + in f tcs + end + fun sort_leaves s = + let fun insert (x:int) (a::b) = + if (x <= a) then x::(a::b) + else a::(insert x b) + | insert x nil = [x] + in List.foldr (fn (x,r) => insert x r) [] s + end + fun conv a = if (IsEndLeaf a) then (D a) else (N a) + fun merge (a::a',b::b') = + if (a <= b) then (conv a)::merge(a',b::b') + else (T b)::(merge(a::a',b')) + | merge (a::a',nil) = (conv a)::(merge (a',nil)) + | merge (nil,b::b') = (T b)::(merge (b',nil)) + | merge (nil,nil) = nil + + in map (fn (x,l) => + rev (merge (l, + sort_leaves (map (fn x => GetEndLeaf x) (GetTrConLeaves x))))) + fins + end + + val rs = + let open RB + fun makeItems x = + let fun emit8(x, pos) = + let val s = StringCvt.padLeft #"0" 3 (Int.toString x) + in + case pos + of 16 => (say "\\\n\\\\"; say s; 1) + | _ => (say "\\"; say s; pos+1) + end + fun emit16(x, pos) = + let val hi8 = x div 256 + val lo8 = x - hi8 * 256 (* x rem 256 *) + in + emit8(lo8, emit8(hi8, pos)) + end + fun MakeString([], _, _) = () + | MakeString(x::xs, emitter, pos) = + MakeString(xs, emitter, emitter(x, pos)) + in case !CharFormat of + true => (say " \n\""; MakeString(x,emit8,0); say "\"\n") + | false => (say (Int.toString(length x)); + say ", \n\""; MakeString(x,emit16,0); say "\"\n") + end + + fun makeEntry(nil,rs,t) = rev rs + | makeEntry(((l:int,x)::y),rs,t) = + let val name = (Int.toString l) + in let val (r,n) = lookup ((x,name),t) + in makeEntry(y,(n::rs),t) + end handle notfound _ => + (count := !count+1; + say " ("; say name; say ","; + makeItems x; say "),\n"; + makeEntry(y,(name::rs),(insert ((x,name),t)))) + end + + val _ = say "val s = [ \n" + val res = makeEntry(trans,nil,empty) + val _ = + case !CharFormat + of true => (say "(0, \"\")]\n"; say "fun f x = x \n") + | false => (say "(0, 0, \"\")]\n"; + say "fun f(n, i, x) = (n, Vector.tabulate(i, decode x)) \n") + + val _ = say "val s = List.map f (List.rev (tl (List.rev s))) \n" + val _ = say "exception LexHackingError \n" + val _ = say "fun look ((j,x)::r, i: int) = if i = j then x else look(r, i) \n" + val _ = say " | look ([], i) = raise LexHackingError\n" + + val _ = say "fun g {fin=x, trans=i} = {fin=x, trans=look(s,i)} \n" + in res + end + + fun makeTable args = let + fun makeOne (a, b) = let + fun item (N i) = ("N", i) + | item (T i) = ("T", i) + | item (D i) = ("D", i) + fun makeItem x = let + val (t, n) = item x + in + app say ["(", t, " ", Int.toString n, ")"] + end + fun makeItems [] = () + | makeItems [x] = makeItem x + | makeItems (hd :: tl) = + (makeItem hd; say ","; makeItems tl) + in + say "{fin = ["; + makeItems b; + app say ["], trans = ", a, "}"] + end + fun mt ([], []) = () + | mt ([a], [b]) = makeOne (a, b) + | mt (a :: a', b :: b') = + (makeOne (a, b); say ",\n"; mt (a', b')) + | mt _ = raise Match + in + mt args + end + +(* + fun makeTable(nil,nil) = () + | makeTable(a::a',b::b') = + let fun makeItems nil = () + | makeItems (hd::tl) = + let val (t,n) = + case hd of + (N i) => ("(N ",i) + | (T i) => ("(T ",i) + | (D i) => ("(D ",i) + in (say t; say (Int.toString n); say ")"; + if null tl + then () + else (say ","; makeItems tl)) + end + in (say "{fin = ["; makeItems b; + say "], trans = "; say a; say "}"; + if null a' + then () + else (say ",\n"; makeTable(a',b'))) + end +*) + + fun msg x = TextIO.output(TextIO.stdOut, x) + + in (say "in Vector.fromList(List.map g \n["; makeTable(rs,newfins); + say "])\nend\n"; + msg ("\nNumber of states = " ^ (Int.toString (length trans))); + msg ("\nNumber of distinct rows = " ^ (Int.toString (!count))); + msg ("\nApprox. memory size of trans. table = " ^ + (Int.toString (!count*(!CharSetSize)*(if !CharFormat then 1 else 8)))); + msg " bytes\n") +end + +(* makeaccept: Takes a (string,string) dictionary, prints case statement for + accepting leaf actions. The key strings are the leaf #'s, the data strings + are the actions *) + +fun makeaccept ends = + let fun startline f = if f then say " " else say "| " + fun make(nil,f) = (startline f; say "_ => raise Internal.LexerError\n") + | make((x,a)::y,f) = (startline f; say x; say " => "; + if Substring.size(#2 (Substring.position "yytext" (Substring.full a))) = 0 + then + (say "("; say a; say ")") + else (say "let val yytext=yymktext() in "; + say a; say " end"); + say "\n"; make(y,false)) + in make (listofdict(ends),true) + end + +fun leafdata(e:(int list * exp) list) = + let val fp = Array.array(!LeafNum + 1,nil) + and leaf = Array.array(!LeafNum + 1,EPS) + and tcpairs = ref nil + and trailmark = ref ~1; + val rec add = fn + (nil,x) => () + | (hd::tl,x) => (Array.update(fp,hd,union(fp sub hd,x)); + add(tl,x)) + and moredata = fn + CLOSURE(e1) => + (moredata(e1); add(lastpos(e1),firstpos(e1))) + | ALT(e1,e2) => (moredata(e1); moredata(e2)) + | CAT(e1,e2) => (moredata(e1); moredata(e2); + add(lastpos(e1),firstpos(e2))) + | CLASS(x,i) => Array.update(leaf,i,CLASS(x,i)) + | TRAIL(i) => (Array.update(leaf,i,TRAIL(i)); if !trailmark = ~1 + then trailmark := i else ()) + | END(i) => (Array.update(leaf,i,END(i)); if !trailmark <> ~1 + then (tcpairs := (!trailmark,i)::(!tcpairs); + trailmark := ~1) else ()) + | _ => () + and makedata = fn + nil => () + | (_,x)::tl => (moredata(x);makedata(tl)) + in trailmark := ~1; makedata(e); (fp,leaf,!tcpairs) + end; + +fun makedfa(rules) = +let val StateTab = ref (create(String.<=)) : (string,int) dictionary ref + val fintab = ref (create(Int.<=)) : (int,(int list)) dictionary ref + val transtab = ref (create(Int.<=)) : (int,int list) dictionary ref + val tctab = ref (create(Int.<=)) : (int,(int list)) dictionary ref + val (fp, leaf, tcpairs) = leafdata(rules); + +fun visit (state,statenum) = + let val transitions = gettrans(state) in + fintab := enter(!fintab)(statenum,getfin(state)); + tctab := enter(!tctab)(statenum,gettc(state)); + transtab := enter(!transtab)(statenum,transitions) + end + +and visitstarts (states) = + let fun vs nil i = () + | vs (hd::tl) i = (visit (hd,i); vs tl (i+1)) + in vs states 0 + end + +and hashstate(s: int list) = + let val rec hs = + fn (nil,z) => z + | ((x:int)::y,z) => hs(y,z ^ " " ^ (Int.toString x)) + in hs(s,"") + end + +and find(s) = lookup(!StateTab)(hashstate(s)) + +and add(s,n) = StateTab := enter(!StateTab)(hashstate(s),n) + +and getstate (state) = + find(state) + handle LOOKUP => let val n = ++StateNum in + add(state,n); visit(state,n); n + end + +and getfin state = + let fun f nil fins = fins + | f (hd::tl) fins = + case (leaf sub hd) + of END _ => f tl (hd::fins) + | _ => f tl fins + in f state nil + end + +and gettc state = + let fun f nil fins = fins + | f (hd::tl) fins = + case (leaf sub hd) + of TRAIL _ => f tl (hd::fins) + | _ => f tl fins + in f state nil + end + +and gettrans (state) = + let fun loop c tlist = + let fun cktrans nil r = r + | cktrans (hd::tl) r = + case (leaf sub hd) of + CLASS(i,_)=> + (if (i sub c) then cktrans tl (union(r,fp sub hd)) + else cktrans tl r handle Subscript => + cktrans tl r + ) + | _ => cktrans tl r + in if c >= 0 then + let val v=cktrans state nil + in loop (c-1) (if v=nil then 0::tlist else (getstate v)::tlist) + end + else tlist + end + in loop ((!CharSetSize) - 1) nil + end + +and startstates() = + let val startarray = Array.array(!StateNum + 1, nil); + fun listofarray(a,n) = + let fun f i l = if i >= 0 then f (i-1) ((a sub i)::l) else l + in f (n-1) nil end + val rec makess = fn + nil => () + | (startlist,e)::tl => (fix(startlist,firstpos(e));makess(tl)) + and fix = fn + (nil,_) => () + | (s::tl,firsts) => (Array.update(startarray,s, + union(firsts,startarray sub s)); + fix(tl,firsts)) + in makess(rules);listofarray(startarray, !StateNum + 1) + end + +in visitstarts(startstates()); +(listofdict(!fintab),listofdict(!transtab),listofdict(!tctab),tcpairs) +end + +val skel_hd = +" struct\n\ +\ structure UserDeclarations =\n\ +\ struct\n\ +\" + +val skel_mid2 = +" | Internal.D k => action (i,(acts::l),k::rs)\n\ +\ | Internal.T k =>\n\ +\ let fun f (a::b,r) =\n\ +\ if a=k\n\ +\ then action(i,(((Internal.N a)::acts)::l),(b@r))\n\ +\ else f (b,a::r)\n\ +\ | f (nil,r) = action(i,(acts::l),rs)\n\ +\ in f (rs,nil)\n\ +\ end\n\ +\" + +fun lexGen(infile) = + let val outfile = infile ^ ".sml" + fun PrintLexer (ends) = + let val sayln = fn x => (say x; say "\n") + in case !ArgCode + of NONE => (sayln "fun lex () : Internal.result ="; + sayln "let fun continue() = lex() in") + | SOME s => (say "fun lex "; say "(yyarg as ("; say s; sayln ")) ="; + sayln "let fun continue() : Internal.result = "); + say " let fun scan (s,AcceptingLeaves : Internal.yyfinstate"; + sayln " list list,l,i0) ="; + if !UsesTrailingContext + then say "\tlet fun action (i,nil,rs)" + else say "\tlet fun action (i,nil)"; + sayln " = raise LexError"; + if !UsesTrailingContext + then sayln "\t| action (i,nil::l,rs) = action(i-1,l,rs)" + else sayln "\t| action (i,nil::l) = action (i-1,l)"; + if !UsesTrailingContext + then sayln "\t| action (i,(node::acts)::l,rs) =" + else sayln "\t| action (i,(node::acts)::l) ="; + sayln "\t\tcase node of"; + sayln "\t\t Internal.N yyk => "; + sayln "\t\t\t(let fun yymktext() = String.substring(!yyb,i0,i-i0)\n\ + \\t\t\t val yypos = i0+ !yygone"; + if !CountNewLines + then (sayln "\t\t\tval _ = yylineno := CharVectorSlice.foldli"; + sayln "\t\t\t\t(fn (_,#\"\\n\", n) => n+1 | (_,_, n) => n) (!yylineno) (CharVectorSlice.slice (!yyb,i0,SOME(i-i0)))") + else (); + if !HaveReject + then (say "\t\t\tfun REJECT() = action(i,acts::l"; + if !UsesTrailingContext + then sayln ",rs)" else sayln ")") + else (); + sayln "\t\t\topen UserDeclarations Internal.StartStates"; + sayln " in (yybufpos := i; case yyk of "; + sayln ""; + sayln "\t\t\t(* Application actions *)\n"; + makeaccept(ends); + say "\n\t\t) end "; + say ")\n\n"; + if (!UsesTrailingContext) then say skel_mid2 else (); + sayln "\tval {fin,trans} = Unsafe.Vector.sub(Internal.tab, s)"; + sayln "\tval NewAcceptingLeaves = fin::AcceptingLeaves"; + sayln "\tin if l = !yybl then"; + sayln "\t if trans = #trans(Vector.sub(Internal.tab,0))"; + sayln "\t then action(l,NewAcceptingLeaves"; + if !UsesTrailingContext then say ",nil" else (); + say ") else"; + sayln "\t let val newchars= if !yydone then \"\" else yyinput 1024"; + sayln "\t in if (String.size newchars)=0"; + sayln "\t\t then (yydone := true;"; + say "\t\t if (l=i0) then UserDeclarations.eof "; + sayln (case !ArgCode of NONE => "()" | SOME _ => "yyarg"); + say "\t\t else action(l,NewAcceptingLeaves"; + if !UsesTrailingContext then + sayln ",nil))" else sayln "))"; + sayln "\t\t else (if i0=l then yyb := newchars"; + sayln "\t\t else yyb := String.substring(!yyb,i0,l-i0)^newchars;"; + sayln "\t\t yygone := !yygone+i0;"; + sayln "\t\t yybl := String.size (!yyb);"; + sayln "\t\t scan (s,AcceptingLeaves,l-i0,0))"; + sayln "\t end"; + sayln "\t else let val NewChar = Char.ord(Unsafe.CharVector.sub(!yyb,l))"; + if !CharSetSize=129 + then sayln "\t\tval NewChar = if NewChar<128 then NewChar else 128" + else (); + say "\t\tval NewState = "; + sayln (if !CharFormat + then "Char.ord(Unsafe.CharVector.sub(trans,NewChar))" + else "Unsafe.Vector.sub(trans, NewChar)"); + say "\t\tin if NewState=0 then action(l,NewAcceptingLeaves"; + if !UsesTrailingContext then sayln ",nil)" else sayln ")"; + sayln "\t\telse scan(NewState,NewAcceptingLeaves,l+1,i0)"; + sayln "\tend"; + sayln "\tend"; + if !UsesPrevNewLine then () else sayln "(*"; + sayln "\tval start= if String.substring(!yyb,!yybufpos-1,1)=\"\\n\""; + sayln "then !yybegin+1 else !yybegin"; + if !UsesPrevNewLine then () else sayln "*)"; + say "\tin scan("; + if !UsesPrevNewLine then say "start" + else say "!yybegin (* start *)"; + sayln ",nil,!yybufpos,!yybufpos)"; + sayln " end"; + sayln (case !ArgCode of NONE => "end" | SOME _ => "in continue end"); + sayln " in lex"; + sayln " end"; + sayln "end" + end + + in (UsesPrevNewLine := false; + ResetFlags(); + LexBuf := make_ibuf(TextIO.openIn infile); + NextTok := BOF; + inquote := false; + LexOut := TextIO.openOut(outfile); + StateNum := 2; + LineNum := 1; + StateTab := enter(create(String.<=))("INITIAL",1); + LeafNum := ~1; + let + val (user_code,rules,ends) = + parse() handle x => + (close_ibuf(!LexBuf); + TextIO.closeOut(!LexOut); + OS.FileSys.remove outfile; + raise x) + val (fins,trans,tctab,tcpairs) = makedfa(rules) + val _ = if !UsesTrailingContext then + (close_ibuf(!LexBuf); + TextIO.closeOut(!LexOut); + OS.FileSys.remove outfile; + prErr "lookahead is unimplemented") + else () + in + if (!HeaderDecl) + then say (!HeaderCode) + else say ("structure " ^ (!StrName)); + say "=\n"; + say skel_hd; + say user_code; + say "end (* end of user routines *)\n"; + say "exception LexError (* raised if illegal leaf "; + say "action tried *)\n"; + say "structure Internal =\n\tstruct\n"; + maketable(fins,tctab,tcpairs,trans); + say "structure StartStates =\n\tstruct\n"; + say "\tdatatype yystartstate = STARTSTATE of int\n"; + makebegin(); + say "\nend\n"; + say "type result = UserDeclarations.lexresult\n"; + say "\texception LexerError (* raised if illegal leaf "; + say "action tried *)\n"; + say "end\n\n"; + say (if (!PosArg) then "fun makeLexer (yyinput,yygone0:int) =\nlet\n" + else "fun makeLexer yyinput =\nlet\tval yygone0=1\n"); + if !CountNewLines then say "\tval yylineno = ref 0\n\n" else (); + say "\tval yyb = ref \"\\n\" \t\t(* buffer *)\n\ + \\tval yybl = ref 1\t\t(*buffer length *)\n\ + \\tval yybufpos = ref 1\t\t(* location of next character to use *)\n\ + \\tval yygone = ref yygone0\t(* position in file of beginning of buffer *)\n\ + \\tval yydone = ref false\t\t(* eof found yet? *)\n\ + \\tval yybegin = ref 1\t\t(*Current 'start state' for lexer *)\n\ + \\n\tval YYBEGIN = fn (Internal.StartStates.STARTSTATE x) =>\n\ + \\t\t yybegin := x\n\n"; + PrintLexer(ends); + close_ibuf(!LexBuf); + TextIO.closeOut(!LexOut) + end) + end +end diff --git a/ml-lex/ml-lex.cm b/ml-lex/ml-lex.cm new file mode 100644 index 0000000..78ee742 --- /dev/null +++ b/ml-lex/ml-lex.cm @@ -0,0 +1,15 @@ +(* ml-lex.cm + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +Library + structure ExportLexGen +is + + $/basis.cm + $/smlnj-lib.cm + + lexgen.sml + export-lex.sml diff --git a/ml-lex/mlex_int.doc b/ml-lex/mlex_int.doc new file mode 100644 index 0000000..2401f75 --- /dev/null +++ b/ml-lex/mlex_int.doc @@ -0,0 +1,83 @@ +This is minimal documentation for the lexer driver produced by ml-lex. + +Main data structures: + + The transition table is stored in tab. Tab is an array of records, indexed +by state number. The first field of the record, fin, is a list of final leaves +assocated with it. The second field of the record, trans, is a transition +table for the state indexed by character number. It gives the next state +for a given input character. + + The usual initial start state is state #1. State 0 is a dead state, which +has transitions only to itself. + + The field yyfin has type yyfinstate list. yyfinstate consists of the +following three constructors: + + * N of int - indicates normal end leaf. + * D of int - dummy end leaf - for indicating when an end state for + a trailing context regular expression has been reached. These are + stored and propagated backwards when action is executed. + * T of int - indicates an actual end leaf for a trailing context reg. + expression, which should be executed only if D i was encountered + after this end leaf while scanning forward. The dummy end leaf is + removed from the backward propagating list after this node is + encountered. + + + The function scan inside the function lex operates as a transition +function, scanning the input until it is no longer possible to take any +more transitions. It accumulates a list of the accepting leaf list +associated with each accepting state passed through. + + Scan operates as follows: + + Input: * s - current state + * AcceptingLeaves - list of accepting leave lists. Each state + has a list of accepting leaves associated with it. This list + may be nil if the state is not a final state. + * l - position of the next character in the buffer b to read + * i0 - starting position in the buffer. + + Output: If no match is found, it raises the exception LexError. + Otherwise, it returns a value of type lexresult. + + It operates as a transtion function: + It (1) adds the list of accepting leaves for the current state to + the list of accepting leave lists + (2) tries to make a transition on the current input character + to the next state. If it can't make a transition, it + executes the action function. + (a) - if it is past the end of the buffer, it + (1) checks if it as at end eof. If it is then: + It checks to see if it has made any + transitions since it was first called - + (l>i0 when this is true.) If it hasn't + this implies that scan was called at + the end of file. It thus executes + eof function declared by the user. + Otherwise it must execute action w/ + the current accepting state list. + (2) otherwise it reads a block of up to 1024 + characters, and appends this block to the + useful suffix of characters left in the + buffer (those character which have been + scanned in this call to lex()). The buffer + operation should be altered if one intends + to process reg. expressions whose lexemes' + length will be >> 1024. For most normal + applications, the buffer update operation + will be fine. + + This buffer update operation requires + O(n^2/1024) char. copies for lexemes > 1024 + characters in length, and O(n) char. copies + for lexemes <= 1024 characters in length. + It can be made O(n) using linked list + buffers & a Byte.array of size n (not the + ^operator!) for concatenating the buffers + to return a value for yytext when a lexeme + is longer than the typical buffer length. + + (3) If the transition is to a dead state (0 is used + for the dead state), action is executed instead. diff --git a/ml-lex/tool/.cm/GUID/ext.sml b/ml-lex/tool/.cm/GUID/ext.sml new file mode 100644 index 0000000..47e021d --- /dev/null +++ b/ml-lex/tool/.cm/GUID/ext.sml @@ -0,0 +1 @@ +guid-$/(lex-ext.cm):ext.sml-1714016094.612 diff --git a/ml-lex/tool/.cm/SKEL/ext.sml b/ml-lex/tool/.cm/SKEL/ext.sml new file mode 100644 index 0000000..a919945 --- /dev/null +++ b/ml-lex/tool/.cm/SKEL/ext.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"Tools"ad"LexLexExt"h0 \ No newline at end of file diff --git a/ml-lex/tool/.cm/amd64-unix/ext.sml b/ml-lex/tool/.cm/amd64-unix/ext.sml new file mode 100644 index 0000000..f13b724 Binary files /dev/null and b/ml-lex/tool/.cm/amd64-unix/ext.sml differ diff --git a/ml-lex/tool/ext.sml b/ml-lex/tool/ext.sml new file mode 100644 index 0000000..0fb5c7d --- /dev/null +++ b/ml-lex/tool/ext.sml @@ -0,0 +1,19 @@ +(* ext.sml + * + * Classifier plug-in for suffixes. + * + * Copyright (c) 2007 by The Fellowship of SML/NJ + * + * Author: Matthias Blume (blume@tti-c.org) + *) +structure LexLexExt = struct + local + val suffixes = ["lex", "l"] + val class = "mllex" + fun sfx s = + Tools.registerClassifier + (Tools.stdSfxClassifier { sfx = s, class = class }) + in + val _ = app sfx suffixes + end +end diff --git a/ml-lex/tool/lex-ext.cm b/ml-lex/tool/lex-ext.cm new file mode 100644 index 0000000..6a2e57d --- /dev/null +++ b/ml-lex/tool/lex-ext.cm @@ -0,0 +1,12 @@ +(* + * Plugin for registering the "lex" classifier. + * + * (C) 2007 The Fellowship of SML/NJ + * + * Author: Matthias Blume (blume@tti-c.org) + *) +Library + structure LexLexExt +is + $smlnj/cm/tools.cm + ext.sml diff --git a/ml-lex/tool/mllex-tool.cm b/ml-lex/tool/mllex-tool.cm new file mode 100644 index 0000000..efc61ad --- /dev/null +++ b/ml-lex/tool/mllex-tool.cm @@ -0,0 +1,12 @@ +(* + * The plugin library for ML-Lex. + * + * (C) 2000 Lucent Technologies, Bell Laboratories + * + * Author: Matthias Blume (blume@kurims.kyoto-u.ac.jp) + *) +Library + structure LexTool +is + $smlnj/cm/tools.cm + tool.sml diff --git a/ml-lex/tool/tool.sml b/ml-lex/tool/tool.sml new file mode 100644 index 0000000..7d3efba --- /dev/null +++ b/ml-lex/tool/tool.sml @@ -0,0 +1,16 @@ +(* + * Running ML-Lex from CM. + * + * (C) 1999 Lucent Technologies, Bell Laboratories + * + * Author: Matthias Blume (blume@kurims.kyoto-u.ac.jp) + *) +structure LexTool = struct + val _ = Tools.registerStdShellCmdTool + { tool = "ML-Lex", + class = "mllex", + cmdStdPath = fn () => ("ml-lex", []), + template = NONE, + extensionStyle = Tools.EXTEND [("sml", SOME "sml", fn too => too)], + dflopts = [] } +end diff --git a/ml-lpt.tgz b/ml-lpt.tgz new file mode 100644 index 0000000..14cc8fb Binary files /dev/null and b/ml-lpt.tgz differ diff --git a/ml-lpt/README b/ml-lpt/README new file mode 100644 index 0000000..831aa01 --- /dev/null +++ b/ml-lpt/README @@ -0,0 +1,7 @@ +---------------------------------- + SML/NJ LANGUAGE PROCESSING TOOLS +---------------------------------- + + doc + ml-ulex + ml-antlr \ No newline at end of file diff --git a/ml-lpt/TODO b/ml-lpt/TODO new file mode 100644 index 0000000..16947ed --- /dev/null +++ b/ml-lpt/TODO @@ -0,0 +1,127 @@ +================ + ml-ulex +================ + + - new spec format + x implement & and ^ + - unicode char classes (and meeting the standard, generally) + - lookahead / and $ + + - better lex/parse error messages + - allow $ and /, but give error that not supported in --ml-lex-mode + - use new region infrastructure and carry line numbers further + + - check that all used start states are defined + - check for stuck states + - default EOF rules for states that do not have an explicit rule (when + other states have an EOF rule) + + x switch ulex backend to functional stream + +================ + ml-antlr +================ + + X fix error repair + x implement heuristic for primary repair + x implement secondary recovery + x work on error messages + x position details -- WSTREAM + X deal with keywords + - preferred replacements + x make prePath required in GLA.* + x think about: EBNF predicates should only include true case? + x grammar inheritance + x custom names for return values + x move actions into separate functions + x reverse order of %where and => actions + x region information + + - add signature to generated parser + - add type for %arg + - test performance + + - controlling lexer state / lexer multiplexing + + - friendlier errors for prediction tree failure + - report multiple failures + - more/better comments! + X improve check-grammar + x allow for and report multiple errors + X do additional checking + X multiple tokens w/ same abbrev + + - figure out syntax for A (X A)* and A ((X | Y | Z) A)* + - automatic left-factoring? + - improve DOT output + - predicate hoisting (or maybe not...) + + - (*#line directive + - ran into problems with ordering in generated file not matching + ordering in source + + + + + +================================================ + +add built-in Unicode character classes +look into OCaml-style rules (starting with | rather than :) +decrease lexer size and compile time +check that nonterminals with parameters are given parameters +union/insct/containment for stream pos spans + +speculative: +higher-order nonterminal definitions + +================================================================ + +update HISTORY file + +support for %arg + +better handling of EOF/EOP +nonunicode charsets + +allow lexer to pass error messages to the parser data structures + +adjustable k + +error repair: get rid of exception-based, instead use 5-token markers +ml-antlr pretty-printing of flattened spec +make for better type error messages dealing with actions (wrt hygienic parameters) +"functorize" lexer over notion of stream positions + +to document: + X skip vs continue for lexers + - type annotations + - ref cells + - changes to grammar import + - changes to overall decl format + X RE syntax, semantics + X yylinepos = 1 for the first line of the file + X dump.dot and latex output. add switches to ml-antlr and document them + - larger example + +X allow type annotations!!! +X issue with "-" in character classes; escape codes in general +X check for same token specified multiple times with %keyword +X new interfaces for ulex (stream textio, imperative textio, string, etc) +X verbosify parser error messages ("Try deleting ';'") +X support for {1,3} (ranges) in ml-ulex +X make sure checking that token names and NT names are disjoint +X make ref cells work with := directly +X when refcells declared, used = anyway +X Matthew Fluet's port for ml-ulex +X weird naming issues (spec.grm) eg. UserCode.File_PROD_1_File_File_PROD_1_SUBRULE_1_PROD_1_ACT +X refcells: to avoid accidental capture, put type and initial value in + UserDeclarations, then load into refcell when instantiating parser +X bug: refcell parens in grammar spec do not appear in emitted code, so + tuples do not work correctly +X use ml-antlr for parsing +X use new region infrastructure and carry line numbers further + X overhaul CheckGrammar for better error messages +X switches for unit actions & debugging actions +X line breaks in table-based lexer table +X error repair should treat reaching EOF as a successful repair diff --git a/ml-lpt/common/.cm/GUID/expand-file.sml b/ml-lpt/common/.cm/GUID/expand-file.sml new file mode 100644 index 0000000..d5e0396 --- /dev/null +++ b/ml-lpt/common/.cm/GUID/expand-file.sml @@ -0,0 +1 @@ +guid-(sources.cm):../common/(lpt-common.cm):expand-file.sml-1714016110.454 diff --git a/ml-lpt/common/.cm/SKEL/expand-file.sml b/ml-lpt/common/.cm/SKEL/expand-file.sml new file mode 100644 index 0000000..cbc791e --- /dev/null +++ b/ml-lpt/common/.cm/SKEL/expand-file.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f2d"List"TextIO"ad"ExpandFile"jh4ad"TIO"gp1 ad"SS"gp1d"Substring"ad"RE"jh2ad"P"gp1d"AwkSyntax"ad"E"gp1d"BackTrackEngine"gp1e"RegExpFn"ad"M"gp1d"MatchTree"h0 \ No newline at end of file diff --git a/ml-lpt/common/.cm/amd64-unix/expand-file.sml b/ml-lpt/common/.cm/amd64-unix/expand-file.sml new file mode 100644 index 0000000..ac714ab Binary files /dev/null and b/ml-lpt/common/.cm/amd64-unix/expand-file.sml differ diff --git a/ml-lpt/common/expand-file.sml b/ml-lpt/common/expand-file.sml new file mode 100644 index 0000000..19250ae --- /dev/null +++ b/ml-lpt/common/expand-file.sml @@ -0,0 +1,94 @@ +(* expand-file.sml + * + * COPYRIGHT (c) 2016 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Copy a template file to an output file while expanding placeholders. + * Placeholders are denoted by @id@ on a line by themselves. + *) + +structure ExpandFile :> sig + + type hook = TextIO.outstream -> unit + + type template + + val mkTemplateFromFile : string -> template + val mkTemplateFromList : string list -> template + + val expandTemplate : { + src : template, + dst : string, (* file name *) + hooks : (string * hook) list + } -> unit + + end = struct + + structure TIO = TextIO + structure SS = Substring + structure RE = RegExpFn ( + structure P = AwkSyntax + structure E = BackTrackEngine) + structure M = MatchTree + + type hook = TextIO.outstream -> unit + type template = string list + + fun mkTemplateFromFile fname = let + val file = TIO.openIn fname + fun done () = TIO.closeIn file + fun read () = (case TIO.inputLine file + of NONE => [] + | SOME line => line::read() + (* end case *)) + in + read() handle ex => (done(); raise ex) + before done() + end + + fun mkTemplateFromList l = l + + val placeholderRE = RE.compileString "[\\t ]*@([a-zA-Z][-a-zA-Z0-9_]*)@[\\t ]*" + val prefixPlaceholder = RE.prefix placeholderRE SS.getc + + fun findPlaceholder s = (case prefixPlaceholder(SS.full s) + of SOME(M.Match(_, [M.Match({pos, len}, _)]), _) => + SOME(SS.string(SS.slice(pos, 0, SOME len))) + | _ => NONE + (* end case *)) + + (* copy from inStrm to outStrm expanding placeholders *) + fun copy (inStrm, outStrm, hooks) = let + fun lp [] = () + | lp (s::ss) = ( + case findPlaceholder s + of NONE => TIO.output (outStrm, s) + | (SOME id) => ( + case (List.find (fn (id', h) => id = id') hooks) + of (SOME(_, h)) => h outStrm + | NONE => raise Fail(concat["bogus placeholder '", id, "'"]) + (* end case *)) + (* end case *); + lp(ss)) + in + lp(inStrm) + end + + exception OpenOut + + fun expandTemplate {src, dst, hooks} = (let + val dstStrm = TIO.openOut dst + handle ex => ( + TIO.output(TIO.stdErr, concat[ + "Error: unable to open output file \"", + dst, "\"\n" + ]); + raise OpenOut) + fun done () = (TIO.closeOut dstStrm) + in + copy (src, dstStrm, hooks) handle ex => (done(); raise ex); + done() + end + handle OpenOut => ()) + + end diff --git a/ml-lpt/common/lpt-common.cm b/ml-lpt/common/lpt-common.cm new file mode 100644 index 0000000..cbae655 --- /dev/null +++ b/ml-lpt/common/lpt-common.cm @@ -0,0 +1,19 @@ +(* lpt-common.cm + * + * COPYRIGHT (c) 2008 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Code that is common to the implementations of both ml-ulex and ml-antlr. + *) + + +Library + + structure ExpandFile + +is + + $/basis.cm + $/regexp-lib.cm + + expand-file.sml diff --git a/ml-lpt/common/lpt-common.mlb b/ml-lpt/common/lpt-common.mlb new file mode 100644 index 0000000..628dfae --- /dev/null +++ b/ml-lpt/common/lpt-common.mlb @@ -0,0 +1,14 @@ +(* lpt-common.mlb + * + * COPYRIGHT (c) 2009 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * A MLton-style MLB file for the ML-LPT library. + *) + +local + $(SML_LIB)/basis/basis.mlb + $(SML_LIB)/smlnj-lib/RegExp/regexp-lib.mlb +in + expand-file.sml +end diff --git a/ml-lpt/doc/code-ml-antlr.tex b/ml-lpt/doc/code-ml-antlr.tex new file mode 100644 index 0000000..07abf22 --- /dev/null +++ b/ml-lpt/doc/code-ml-antlr.tex @@ -0,0 +1 @@ +\chapter[\mlantlr]{Implementation: \mlantlr} \ No newline at end of file diff --git a/ml-lpt/doc/code-ml-ulex.tex b/ml-lpt/doc/code-ml-ulex.tex new file mode 100644 index 0000000..d1244b7 --- /dev/null +++ b/ml-lpt/doc/code-ml-ulex.tex @@ -0,0 +1,322 @@ +\chapter[\ulex]{Implementation: \ulex} + +\section{Organization} + +%\ulex{} is a scanner generator written in Standard ML. It replaces the older +%ML-Lex tool. For information about features and usage, see the \ulex{} +%Release Notes. This document describes the algorithms and code that make up +%\ulex{}. + +\ulex{} is organized much like a compiler: there is a replacable front end for +parsing the ML-Lex specification format, several back ends to support various +output formats, and a middle component responsible for DFA generation. The +\nm{Main} module drives the tool, while the \nm{RegExp} and \nm{LexGen} modules +together provide DFA generation (see figure~\ref{fig:ml-flex}). The DFA +generation algorithm used in \ulex{} is somewhat nonstandard; it is based on +Brzozowski's notion of regular expression derivatives~\cite{derivatives}. +Chapter~\ref{ch:ulex-theory} describes the algorithm, as well as modifications +necessary to support unicode. This chapter gives more concrete +details about the code, broken down by module. + +\begin{figure}\label{fig:ml-flex} +\begin{center} +\ifpdf + \includegraphics[scale=0.8]{impl-pic.pdf} +\fi +\end{center} +\caption{\ulex{} organization} +\end{figure} + +\section{\nm{RegExp}}\label{sec:reg-exp} + +In \ulex{}, REs are captured by the abstract type \nm{RegExp.re}. Introduction +is provided by various ``smart constructors'' (\nm{mkSym}, \nm{mkClosure}, +$\dots$), and elimination is provided by the derivatives algorithm. The +signature for the \rm{RegExp} module is shown below: + +\begin{verbatim} +signature REG_EXP = + sig + + structure Sym : INTERVAL_DOMAIN + structure SymSet : INTERVAL_SET + + type symbol + type sym_set + type re + + val any : re (* wildcard *) + val none : re (* EMPTY language *) + val epsilon : re (* the nil character (of length 0) *) + + val mkSym : symbol -> re + val mkSymSet : sym_set -> re + + val mkOr : re * re -> re + val mkAnd : re * re -> re + val mkXor : re * re -> re + val mkNot : re -> re + val mkConcat : re * re -> re + val mkClosure : re -> re + val mkOpt : re -> re + val mkRep : re * int * int -> re + val mkAtLeast : re * int -> re + + val isNone : re -> bool + val nullable : re -> bool + val derivative : symbol -> re -> re + val derivatives : re Vector.vector -> + ((re Vector.vector) * sym_set) list + + val symToString : symbol -> string + val toString : re -> string + val compare : re * re -> order + + end +\end{verbatim} + +The included structure \nm{SymSet} provides symbol interval sets, which are +ideal when working with dense sets such as unicode character classes. Interval +set operations (\nm{union}, \nm{complement}, $\dots$) are used extensively; +documentation for the interval set library is available with the SML/NJ +distribution. + +Recall that, in using RE derivatives for DFA construction, it is +important to aggresively identify when two REs generate the same +language so that they may be merged to a single state in the automaton. +\nm{RegExp} canonicalizes REs, which is why its \nm{re} type is abstract. +Canonicalization is performed using a lexicographic ordering on REs given by the +\nm{compare} function. The comparison is lexicographic in the sense that it +first examines the top-level operation of the two REs, and only does more +comparisons if that operation is the same. We represent REs as follows: +\begin{verbatim} + datatype re + = Epsilon + | Any + | None + | SymSet of sym_set + | Concat of re list + | Closure of re + | Op of (rator * re list) + | Not of re + and rator = OR | AND | XOR +\end{verbatim} +For the \nm{Op} constructor, which is used for three different commutative +operations, the sub-REs are to be listed in canonical order. The \nm{compare} +function itself will expect that this is the case, and since only smart +constructors can be used to construct REs, the invariant will always hold. + +Smart constructors do additional canonicalization beyond ordering. For +example, \nm{mkNot (mkNot (none))} will be canonicalized +to the same representation as \nm{none}. Several similar RE equalities +are detected and used. Also, the smart constructors attempt to push the RE +reprsentation as much toward symbol sets as possible, replacing boolean +operations at the RE level with a single resulting symbol set when possible. +For example, suppose that \nm{AM} held the symbol set for $[A-M]$, \nm{HZ} the +set for $[H-Z]$, and \nm{AZ} the set for $[A-Z]$. Then we would have +\[ +\texttt{compare (mkSymSet AZ, mkOr (mkSymSet AM, mkSymSet HZ)) = EQUAL} +\] +The hope is that the cumulative effect of such canonicalization will keep the +generated DFA close to minimal size. + +The \nm{derivatve} function constructs a canonicalized derivative for a given +RE with respect to a given symbol; it is a transcription of the algorithm +described in section~\ref{sec:derivatives}. + +In general, we will be interested in the derivatives of a vector of REs +(representing the rules in a lexer specification) with respect to \emph{every} +symbol in the alphabet. As section~\ref{sec:factorings} explains, the unicode +alphabet is too large for us to literally test the derivative at each symbol. +The \nm{derivatives} function will take a vector of REs and return a list of +\nm{re~Vector.vector~*~sym\_set} \emph{pairs}. In fact, \nm{derivatives} +is just an implementation of the factor and compress algorithm given +in~\ref{sec:factorings}. Each pair in the result list represents an arrow +transitioning out of the current state (which corresponds to the input RE +vector) to a new state (the output RE vector) on a given set of symbols (the +output symbol set). Thus the labor of DFA construction is split between the +\nm{RegExp} module, which computes derivatives (and hence transitions) and +\nm{LexGen}, which actually constructs the DFA graph. + +\section{\nm{LexSpec} and \nm{LexOutputSpec}} + +Before describing DFA generation, we briefly discuss the relevant input and +output data structures. The \nm{LexSpec} module has data constructors and +functions relevant to \eg the ML-Lex input specification format, as shown in +Figure~\ref{fig:lex-spec} + +\begin{figure} +\begin{verbatim} + type action = string + type rule_spec = AtomSet.set option * RegExp.re + type rule = rule_spec * action + + datatype spec = Spec of { + decls : string, + conf : config, + rules : rule list + } +\end{verbatim} +\caption{A fragment of \nm{LexSpec}}\label{fig:lex-spec} +\end{figure} + +Actions, at least in the present implementation, are just raw strings. A +\nm{rule} consists of a rule specification and an associated action. The +optional atom set included with a rule spec represents the start states +to which that rule applies (with \nm{NONE} meaning, strangely enough, all start +states). With these definitions, a lexer specification is just a list of +rules, some declarations (a raw string containing code) and some miscellaneous +configuration data. + +\begin{figure} +\begin{verbatim} + datatype dfa_state + = State of { + id : int, + label : RegExp.re Vector.vector, + final : int list, + next : (RegExp.sym_set * dfa_state) list ref + } + + datatype machine = Machine of { + label : string, + rules : (RegExp.re * int) vector, + states : dfa_state list + } + + type action = string + + datatype spec = Spec of { + actions : action vector, + machines : machine list, + ... (* configuration data *) + } +\end{verbatim} +\caption{A fragment of \nm{LexOutputSpec}}\label{fig:lex-output-spec} +\end{figure} + +As Figure~\ref{fig:lex-output-spec} illustrates, the output of DFA generation is +\emph{not} a DFA, but a collection of DFAs along with a vector of actions. +Each \nm{machine} represents a start state for the lexer; that is, each start +state has its own automaton. However, since start states may use the same +actions, we separate out the actions into a vector so that they are only +emitted once. + +A \nm{machine} includes a label, which is just the name of the start state, as +well as the rules relevant to that machine, which are paired with an index to +the associated action in the action vector. The DFA itself is a list of +states, with the head of the list being $q_0$. A state, in turn, is labeled by +a vector of REs (with the same length as the \nm{rules} vector in the machine). + Since a given DFA state may be an accepting state for more than one rule, we +store a \emph{list} of rule indices for the \nm{final} flag. On a match, the +action for the lowest-index rule is executed first, but that action may use +\nm{REJECT()}, in which case it may be necessary to jump to the action for the +next rule index given in the \nm{final} list. + +\section{\nm{LexGen}}\label{sec:lex-gen} + +\nm{LexGen} is a very simple module with a single accessible function: +\begin{center} +\nm{gen : LexSpec.spec -> LexOutputSpec.spec} +\end{center} + +The first task of the \nm{gen} function is to collate the actions and start +states. The actions from the input spec are separated from their rules into a +vector. Afterwards, \nm{gen} iterates over the specified start states, +collecting the rules for each start state and using the \nm{mkDFA} function to +generate a machine for each. + +\nm{mkDFA} performs a straightforward imperative DFA construction, using +the \nm{derivatives} function from \nm{RegExp} to compute the transitions out of +each node. A map of RE vectors to DFA nodes is maintained (using +\nm{Vector.collate RegExp.compare} for ordering); this map is used to recognize +when a new out-edge is going to an existing DFA node. Finally, any transition +to a vector of REs which all generate the empty language is an error transition +(that is, the derivatives for the given transition symbol set all indicate a +non-match). + +\section{Front ends} + +At the moment, only one front end is available: the ML-Lex specification +format. Eventually \ulex{} will have its own format. + +The ML-Lex front end is fairly straightforward: it uses ML-Yacc to do the +parsing, and can use either ML-Lex or \ulex{} to do the lexing. The +distribution includes a lexer that was generated from \ulex{} to serve as a +bootstrap. + +\section{Back ends} + +One exciting aspect of \ulex{} is the ability to easily add back ends. The +following back ends are currently included in the distribution. + +\paragraph{SML control-flow lexer generation} + +The most important back end is code generation for SML. At present, there is a +single code generation strategy: build a lexer using control-flow (\ie +if statements and tail-calls) to match the input. If the control-flow strategy +turns out to be inadequate, it would be fairly easy to add a code +generator for table-based lexing. + +Code generation is performed by expanding stubs in a template SML file. The +template contains code for dealing with streams and setting up the lexer in the +appropriate way. Input is read from functional strings, which allow for +the arbitrary lookahead needed for maximal-munch. These streams also track +the current file position and line number. + +A helper module, \nm{ML}, contains a representation for a good portion of the +Standard ML expression langauge, and support for pretty-printing such +expressions. + +The generated code includes user actions and the DFA for each start state. + Within each start state there is one \emph{state function} per DFA node. Each +state function will examine the next character of input and perform a series of +if-tests to determine the correct transition. The if-tests perform a +hard-coded binary search over the symbol set intervals for the transitions. + +The lexer stores a reference cell for the current functional stream. Each time +the lexer accepts a token, the cell is updated. \nm{yytext} is generated (only +when needed) by ``subtracting'' the new stream from the one stored in the +reference cell before updating it. Subtraction simply rescans the appropriate +number of characters from the stored stream, which should still have the string +in its buffer. + +One subtlety is handling calls to \nm{REJECT()}. Since such calls are fairly +rare, we want to avoid tracking the information \nm{REJECT} needs if it won't +be used. \nm{REJECT} information consists of (in essence) a list of all the +previous matches. A previous match could occur on the same DFA state if +multiple REs matched the input at that state; otherwise, the previous match is +a prefix of the \nm{REJECT}ed match. Whenever a transition is made, the +appropriate previous match information is passed up to the next state. +However, if the current state is an accepting state that does not use +\nm{REJECT}, then the history is \emph{truncated} at the \nm{REJECT}-free +point, since it could never be used. This determination is made statically and +is hard-coded into the generated SML lexer. + +\paragraph{Graphviz} + +The Graphviz toolkit provides easy graph visualization using a very simple +graph description format. We utilize the DOT format. The Graphviz backend +will write one DOT file for each start state, showing all nodes for that start +state's DFA, along with labeled transitions. See \texttt{www.graphviz.org} for +details on the DOT format. + +\paragraph{Text dump} + +If Graphviz is not available or a more detailed summary is desired, the +\nm{DumpOutput} module can be used to print to standard out every DFA state, +labeled by its RE vector, along with out-edges, for every start state in the +lexer. + +\paragraph{Interactive matching} + +Finally, a simple interactive back end is available. Interactive matching +allows a user to enter arbitrary strings and find out (1) if they matched and +(2) what RE matched them. The code for interactive matching is quite simple. + +\section{\nm{Main}} + +The \nm{Main} module is the driver for \ulex{}: it is responsible for processing +command-line arguments and hooking up the appropriate front and back ends. +Since it is very simple, it gives a nice overview of the system and is a good +place to look first in trying to undertand the code. \ No newline at end of file diff --git a/ml-lpt/doc/deriv.bib b/ml-lpt/doc/deriv.bib new file mode 100644 index 0000000..bdef5f4 --- /dev/null +++ b/ml-lpt/doc/deriv.bib @@ -0,0 +1,13 @@ +@article{derivatives, + author = {Janusz A. Brzozowski}, + title = {Derivatives of Regular Expressions}, + journal = {J. ACM}, + volume = {11}, + number = {4}, + year = {1964}, + issn = {0004-5411}, + pages = {481--494}, + doi = {http://doi.acm.org/10.1145/321239.321249}, + publisher = {ACM Press}, + address = {New York, NY, USA}, + } \ No newline at end of file diff --git a/ml-lpt/doc/design.pdf b/ml-lpt/doc/design.pdf new file mode 100644 index 0000000..13c512e Binary files /dev/null and b/ml-lpt/doc/design.pdf differ diff --git a/ml-lpt/doc/example.tex b/ml-lpt/doc/example.tex new file mode 100644 index 0000000..8f4eed4 --- /dev/null +++ b/ml-lpt/doc/example.tex @@ -0,0 +1,157 @@ +%!TEX root = manual.tex +% +\chapter{A complete example} +\label{ch:example} + +This chapter gives a complete example of a simple calculator +language implemented using both \texttt{ml-ulex} and \texttt{ml-antlr}. +The language has the following syntax: +\begin{displaymath} + \begin{array}{rcl} + E & ::= & \mathtt{let}\;\mathrm{id}\;\mathtt{in}\;E \\ + & \mid & E \;\mathtt{+}\; E \\ + & \mid & E \;\mathtt{*}\; E \\ + & \mid & \mathtt{-}\;E \\ + & \mid & \mathrm{id} \\ + & \mid & \mathrm{num} + \end{array}% +\end{displaymath}% +The lexical conventions allow arbitrary whitespace between tokens. +Numbers ($\mathrm{num}$) numbers are unsigned decimal numbers +and identifiers ($\mathrm{id}$) begin with a letter followed by letters and digits. +The expression forms are listed in order of increasing precedence: +let-expressions have the lowest precedence; then addition, multiplication, and negation. +The calculator will compute and return the value of the expression. + +Figure~\ref{fig:calc-sources} gives the CM file for the project. +%Note that we are assuming that the \texttt{ml-ulex} and \texttt{ml-antlr} tools have been run by hand. +\begin{figure}[h] +\begin{lstlisting}[language=CM] +Library + + structure CalcLexer + functor CalcParseFn + structure CalcTest + +is + $/basis.cm + $/smlnj-lib.cm + $/ml-lpt-lib.cm + + calc.grm : ml-antlr + calc.lex : ml-ulex + calc-test.sml +\end{lstlisting}% +\caption{The CM file: {\tt sources.cm}} +\label{fig:calc-sources} +\end{figure}% + +\begin{figure} + +\begin{lstlisting}[language=MLULex] +%name CalcLexer; + +%let digit = [0-9]; +%let int = {digit}+; +%let alpha = [a-zA-Z]; +%let id = {alpha}({alpha} | {digit})*; + +%defs ( + structure T = CalcTokens + type lex_result = T.token + fun eof() = T.EOF +); + +let => ( T.KW_let ); +in => ( T.KW_in ); +{id} => ( T.ID yytext ); +{int} => ( T.NUM (valOf (Int.fromString yytext)) ); +"=" => ( T.EQ ); +"+" => ( T.PLUS ); +"-" => ( T.MINUS ); +"*" => ( T.TIMES ); +"(" => ( T.LP ); +")" => ( T.RP ); +" " | \n | \t + => ( skip() ); +. => ( (* handle error *) ); +\end{lstlisting} +\caption{The ml-ulex specification: \texttt{calc.lex}} +\label{fig:calc-lex} +\end{figure}% + +\begin{figure}[p] +\begin{lstlisting}[language=SML] +%name Calc; + +%tokens + : KW_let ("let") | KW_in ("in") + | ID of string | NUM of Int.int + | EQ ("=") | PLUS ("+") + | TIMES ("*") | MINUS ("-") + | LP ("(") | RP (")") + ; + +exp(env) + : "let" ID "=" exp@(env) + "in" exp@(AtomMap.insert(env, Atom.atom ID, exp1)) + => ( exp2 ) + | addExp@(env) + ; + +addExp(env) + : multExp@(env) ("+" multExp@(env))* + => ( List.foldr op+ 0 (multExp::SR) ) + ; + +multExp(env) + : prefixExp@(env) ("*" prefixExp@(env))* + => ( List.foldr op* 1 (prefixExp::SR) ) + ; + +prefixExp(env) + : atomicExp@(env) + | "-" prefixExp@(env) + => ( ~prefixExp ) + ; + +atomicExp(env) + : ID + => ( valOf(AtomMap.find (env, Atom.atom ID)) ) + | NUM + | "(" exp@(env) ")" + ; +\end{lstlisting} +\caption{The ml-antlr specification: \texttt{calc.grm}} +\label{fig:calc-grm} +\end{figure}% + +\begin{figure} +\begin{lstlisting}[language=SML] +structure CalcTest = + struct + + structure CP = CalcParseFn(CalcLexer) + + fun tok2s (ID s) = s + | tok2s (NUM n) = Int.toString n + | tok2s tok = CalcTokens.toString tok + + (* val calc : TextIO.instream -> Int.int *) + fun calc instrm = let + val sm = AntlrStreamPos.mkSourcemap() + val lex = CalcLexer.lex sm + val strm = CalcLexer.streamifyInstream instrm + val (r, strm', errs) = CP.parse lex AtomMap.empty strm + in + print (String.concatWith "\n" + (List.map (AntlrRepair.repairToString tok2s sm) + errs)); + r + end + + end +\end{lstlisting} +\caption{The driver: {\tt calc-test.sml}} +\label{fig:calc-test} +\end{figure}% diff --git a/ml-lpt/doc/history.tex b/ml-lpt/doc/history.tex new file mode 100644 index 0000000..c19b10c --- /dev/null +++ b/ml-lpt/doc/history.tex @@ -0,0 +1,92 @@ +%!TEX root = manual.tex +% +\chapter{Change history} +\label{ch:history} + +Here is a history of changes to the SML/NJ Language Processing Tools. +More details can be found in the SML/NJ \texttt{HISTORY} and \texttt{README} files. +\begin{description} + \item[SML/NJ 110.99] + \mbox{}\\[0.5em] + Changed the semantics of the spans returned by \ulex{} so that the second + component of a span is the position of the rightmost character in the token (instead + of the character following the token). + Specifically, the span $(p_1, p_2)$ specifies the $p_2 - p_1 + 1$ characters + that start with the character at position $p_1$ and run to $p_2$ (inclusive). + This change avoids a potential problem when the span of a token ends at the last + character in a file (when the input is spread across multiple files). + \item[SML/NJ 110.98] + \mbox{}\\[0.5em] + Changed the semantics of the \texttt{--debug} command-line option for \mlantlr{}. + Previously this option replaced the actions with a print expression, but that + limited its usefulness because of type errors in the generated code. The new + behavior is to preserve the existing actions and just add the printing code. + \item[SML/NJ 110.96] + \mbox{}\\[0.5em] + Added the \texttt{FilePos} sub-structure to the \texttt{AntlrStreamPos} structure. + This addition allows code to be written that is independent of the precision + of the \texttt{AntlrStreamPos.pos} type. + \item[SML/NJ 110.94] + \mbox{}\\[0.5em] + Changed type of source-file positions from \texttt{Position.int} to \texttt{Int.int}. + This change is because the \texttt{Position.int} type was changed to 64-bit integers + in Version 110.89, which is overkill for processing text files (especially since + we are moving to 64-bit executables). + \item[SML/NJ 110.81] + \mbox{}\\[0.5em] + Added ``\texttt{--debug}'' command-line option to \mlantlr{} to expose the generation + of debug actions. + \\[0.5em] + Added \texttt{\%tokentype} directive to \mlantlr{}. + \\[0.5em] + Modified \mlantlr{} and \ulex{} to direct status and debugging messages to + \texttt{stdErr} instead of \texttt{stdOut}. +% + \item[SML/NJ 110.79] + \mbox{}\\[0.5em] + Modified scanner to allow comments in \ulex{} directives. + \\[0.5em] + \mlantlr{} now inlines the \texttt{EBNF} structure in the generated parser (instead of using + the \texttt{AntlrEBNF} functor from the \texttt{ml-lpt} library. + \\[0.5em] + Preliminary work on supporting the \texttt{\%prefer} and \texttt{\%change} directives + from \texttt{ml-yacc}. The front-end accepts and checks these declaration forms, but + the back-end does not yet generate code for them. These will be documented once the + implementation is complete. +% + \item[SML/NJ 110.78] + \mbox{}\\[0.5em] + Improved the error message for when the lookahead computation fails in \mlantlr{}. + \\[0.5em] + Added \texttt{\%value} directive to allow non-nullary tokens to be inserted as + an error-repair action. + Note that the first version of this feature used the directive \texttt{\%default}, but + this name was changed to \texttt{\%value} to match the ML-Yacc feature. + \\[0.5em] + Improved error messages for the situation where the lexer specification has an unclosed string. +% + \item[SML/NJ 110.77] + \mbox{}\\[0.5em] + Fixed an inconsistency in the way that \mlantlr{} and \ulex{} handled the contents of + a \texttt{\%defs} declaration. \ulex{} made these definitions visible in the \texttt{UserDeclarations} + substructure, whereas \mlantlr{} hid them. We have changed the behavior of \ulex{} to match + that of \mlantlr{} (\ie{}, hide the user definitions). We chose to hide the user definitions + in \ulex{} because they are usually not useful outside the lexer, hiding them reduces The + size of the generated code, and definitions that are needed outside the lexer can be + defined in an external module. Note that the \texttt{UserDeclarations} substructure remains + visible when \ulex{} is run in \texttt{ml-lex} compatibility mode. + \\[0.5em] + Added the \texttt{actionToString'} and \texttt{repairToString'} functions + to the \texttt{AntlrRepair} structure. These functions allow one to + specialize the printing of tokens based on whether they are being added or deleted. + \\[0.5em] + Removed the \texttt{toksToString} function from the tokens structure that ml-antlr + generates. It was originally for use by the \texttt{AntlrRepair} structure, but that + structure does not use it. +% + \item[SML/NJ 110.72] + \mbox{}\\[0.5em] + Added \texttt{--strict-sml} flag to \ulex{} for MLton compatibility. + \\[0.5em] + Added \texttt{\%header} directive to the \mlantlr{} parser generator. +\end{description}% diff --git a/ml-lpt/doc/impl-pic.pdf b/ml-lpt/doc/impl-pic.pdf new file mode 100644 index 0000000..be58db1 Binary files /dev/null and b/ml-lpt/doc/impl-pic.pdf differ diff --git a/ml-lpt/doc/manual.tex b/ml-lpt/doc/manual.tex new file mode 100644 index 0000000..9701c82 --- /dev/null +++ b/ml-lpt/doc/manual.tex @@ -0,0 +1,206 @@ +\documentclass[11pt,letterpaper]{book} + +\usepackage{times} +%------------------------- +% the following magic makes the tt font in math mode be the same as the +% normal tt font (i.e., Courier) +% +\SetMathAlphabet{\mathtt}{normal}{OT1}{pcr}{n}{n} +\SetMathAlphabet{\mathtt}{bold}{OT1}{pcr}{bx}{n} +%------------------------- + +\usepackage[top=1.33in, bottom=1.33in, left=1.33in, right=1.33in]{geometry} +\usepackage{amsmath} +\usepackage[all]{xy} +\usepackage{graphicx} +\usepackage{stmaryrd} + +\newcommand{\ulex}{\texttt{ml-ulex}} +\newcommand{\mlantlr}{\texttt{ml-antlr}} +\newcommand{\antlr}{\texttt{ml-antlr}} +\newcommand{\Antlr}{\texttt{Ml-antlr}} + +\title{ + SML/NJ Language Processing Tools:\\ + User Guide} +\author{ + Aaron Turon\\ + \texttt{adrassi@gmail.com}\\[0.5em] + John Reppy\\ + \texttt{jhr@cs.chicago.edu}} +\date{Revised: October 2020} + +\newcommand{\Carat}{\^{ }} +\newcommand{\RE}{r} +\newcommand{\OR}{\ | \ } +\newcommand{\AND}{\ \& \ } +\newcommand{\CL}{\mathcal{L}} +\newcommand{\CS}{\mathcal{S}} +\newcommand{\CR}{\mathcal{R}} +\newcommand{\CP}{\mathcal{P}} +\newcommand{\Sem}[1]{[ \! [ #1 ] \! ]} +\newcommand{\Ls}[1]{\CL\Sem{#1}} + +\newcommand{\eg}{{\em e.g.}} +\newcommand{\cf}{{\em cf.}} +\newcommand{\ie}{{\em i.e.}} + +\newcommand{\nm}[1]{\texttt{#1}} + +\newcommand{\ra}{\rightarrow} +\newcommand{\Ra}{\Rightarrow} + +\newcommand{\New}[1]{\emph{\textbf{#1}}} + +% Grammar +\newcommand{\Grammar}[1]{\[ \begin{array}{rcll} #1 \end{array} \]} +\newcommand{\GFirst}[3]{#1 & ::= & #2 & \textrm{#3} \\ } +\newcommand{\GNext}[2]{ & | & #1 & \textrm{#2} \\ } + +\newcommand{\GFirstB}[2]{\textit{#1} & ::= & \multicolumn{2}{l}{\textit{#2}} \\ } +\newcommand{\GNextB}[1]{ & | & \multicolumn{2}{l}{\textit{#1}} \\ } + +\newcommand{\GFirstC}[3]{\textit{#1} & ::= & \textit{#2} & \textrm{#3} \\ } +\newcommand{\GNextC}[2]{ & | & \textit{#1} & \textrm{#2} \\ } +\newcommand{\GNextCC}[2]{ & | & \multicolumn{2}{l}{\textit{#1}} \\ & & & \textrm{#2} \\ } + +%\newcommand{\T}[1]{{\tt '#1'}} +\newcommand{\T}[1]{{\textbf{\tt #1}}} +\newcommand{\kw}[1]{{\T{\%#1}}} + +\usepackage{amsthm} + +\newtheorem*{theorem}{Theorem} +\newtheorem*{definition}{Definition} +\newtheorem*{remark}{Remark} + +\usepackage{ifpdf} + +\usepackage{mathpazo} +%\renewcommand{\ttdefault}{cmtt} + +%\newcommand{\parttext}{} +%\newcommand{\cpart}[1]{\renewcommand{\parttext}{#1}\part{#1}} + +\usepackage{fancyhdr} +\usepackage{multicol} + +\newcommand{\sem}[1]{\llbracket #1\rrbracket} + +\usepackage{color} +\definecolor{Red}{rgb}{0.9,0.0,0.0} +\definecolor{DarkBlue}{rgb}{0.0,0.0,0.75} +\definecolor{Purple}{rgb}{0.5,0.0,0.4} +\definecolor{DarkGreen}{rgb}{0.0,0.5,0.0} +\newcommand{\cdColor}{DarkBlue} +\newcommand{\kwColor}{Purple} +\newcommand{\dirColor}{Purple} +\newcommand{\strColor}{DarkGreen} +\newcommand{\comColor}{Red} + +\usepackage{listings} + +\lstdefinelanguage{SML}{% + morekeywords={% + abstype, and, andalso, as, case, datatype, do, else, end, eqtype, exception,% + fn, fun, functor, handle, if, in, include, infix, infixr, let, local, nonfix,% + of, op, open, orelse, raise, rec, sharing, sig, signature, struct, structure,% + then, type, val, where, while, with, withtype% + },% + otherkeywords={[,],\{,\},\,,:,...,_,|,=,=>,->,\#,:>}, + sensitive,% + alsoletter={_}, + morecomment=[n]{(*}{*)},% + morestring=[d]",% +}[keywords,comments,strings]% + +\lstdefinelanguage{CM}{% + morekeywords={% + functor, signature, structure,% + Library, is,% + },% + sensitive,% + alsoletter={_}, + morecomment=[n]{(*}{*)},% + morestring=[d]",% +}[keywords,comments,strings]% + +\lstdefinelanguage{MLULex}[]{SML}{% + moredirectives={arg,defs,header,let,name,states}, + moredelim=*[directive]\%,% + alsoletter={_}, +}[keywords,comments,strings,directives]% + +\lstdefinelanguage{MLAntlr}{% + morekeywords={\%defs,\%entry,\%header,\%import,\%keywords,\%name,\%nonterms,of, + \%refcell,\%tokens,\%tokentype,\%start,\%value}, + sensitive,% + alsoletter={_,\%},% + morecomment=[n]{(*}{*)},% + morestring=[d]",% +}[keywords,comments,strings]% + +\lstset{ + basicstyle=\footnotesize\ttfamily\color{\cdColor}, + keywordstyle=\color{\kwColor}\bfseries, + commentstyle=\color{\comColor}\itshape, + directivestyle=\color{\dirColor}\bfseries, + stringstyle=\color{\strColor}\itshape, + showstringspaces=false, + language=SML +} + +\begin{document} + +\frontmatter + + \maketitle + + \phantom{.} + \vspace{\stretch{1}} + + \noindent Copyright \copyright{}2016. Fellowship of SML/NJ. All rights reserved. + + \vskip 12pt + \noindent This document was written with support. in part, from NSF grant + CNS-0454136, ``CRI: Standard ML Software Infrastructure.'' + + \pagebreak + + \tableofcontents + +\mainmatter + +% \renewcommand{\chaptermark}[1]{\markboth{#1}{}} +% \renewcommand{\sectionmark}[1]{\markright{\thesection. \ #1}{}} + + \newpage + + \input{overview} + +% \renewcommand{\chaptermark}[1]{\markboth{\parttext{}: #1}{}} + +% \cpart{Usage} + + \input{usage-ml-ulex} + \input{usage-ml-antlr} + \input{ml-lpt-lib} + \input{example} + \input{history} + +% \cpart{Theory} +% +% \input{theory-ml-ulex} +% \input{theory-ml-antlr} + +% \cpart{Implementation} + +% \input{code-ml-ulex} +% \input{code-ml-antlr} + +%\backmatter + +% \bibliographystyle{plain} +% \bibliography{deriv.bib} + +\end{document} diff --git a/ml-lpt/doc/ml-lpt-lib.tex b/ml-lpt/doc/ml-lpt-lib.tex new file mode 100644 index 0000000..6573c0b --- /dev/null +++ b/ml-lpt/doc/ml-lpt-lib.tex @@ -0,0 +1,104 @@ +%!TEX root = manual.tex +% +\chapter{The {\tt ml-lpt-lib} library} +\label{ch:ml-lpt-lib} + +To use the output of \ulex{} or \mlantlr{} in an SML program requires including the +\texttt{ml-opt-lib} library. +This library includes the \texttt{AntlrStreamPos} structure, which manages tracking +positions in the input stream, and the \texttt{AntlrRepair} structure, which defines +a representation of \mlantlr{}'s error-repair actions that can be used to generate +error messages. + +\section{Usage} +For SML/NJ, you should include the following line in your CM file: +\begin{lstlisting}[language=CM] +$/ml-lpt-lib.cm +\end{lstlisting}% +The SML/NJ Compilation Manager also understands how to generate SML files from \ulex{} and +\mlantlr{} files. +For example, if you include +\begin{lstlisting} +foo.grm : ml-antlr +foo.lex : ml-ulex +\end{lstlisting}% +in the list of sources in a CM file, then CM will run the \ulex{} (reps.\ \mlantlr{}) +to produce \texttt{foo.lex.sml} (resp.\ \texttt{foo.grm.sml}). + +If you are using MLton SML compiler, then you will need to include the following line +in your MLB file: +\begin{lstlisting} +$(SML_LIB)/mllpt-lib/mllpt-lib.mlb +\end{lstlisting}% + +\section{The {\tt AntlrStreamPos} structure} + +\begin{lstlisting} +structure AntlrStreamPos : sig + + type pos = int + type span = pos * pos + type sourceloc = { fileName : string option, lineNo : int, colNo : int } + type sourcemap + + (* the result of moving forward an integer number of characters *) + val forward : pos * int -> pos + + val mkSourcemap : unit -> sourcemap + val mkSourcemap' : string -> sourcemap + + val same : sourcemap * sourcemap -> bool + + (* log a new line occurence *) + val markNewLine : sourcemap -> pos -> unit + (* resychronize to a full source location *) + val resynch : sourcemap -> pos * sourceloc -> unit + + val sourceLoc : sourcemap -> pos -> sourceloc + val fileName : sourcemap -> pos -> string option + val lineNo : sourcemap -> pos -> int + val colNo : sourcemap -> pos -> int + val toString : sourcemap -> pos -> string + val spanToString : sourcemap -> span -> string + +end +\end{lstlisting}% + + +\section{The {\tt AntlrRepair} structure} + +\begin{lstlisting} +structure AntlrRepair : sig + + datatype 'tok repair_action + = Insert of 'tok list + | Delete of 'tok list + | Subst of { + old : 'tok list, + new : 'tok list + } + | FailureAt of 'tok + + type 'a repair = AntlrStreamPos.pos * 'tok repair_action + + val actionToString : ('tok -> string) + -> 'tok repair_action + -> string + + val repairToString : ('tok -> string) + -> AntlrStreamPos.sourcemap + -> 'tok repair -> string + + datatype add_or_delete = ADD | DEL + +(* return a string representation of the repair action. This version + * uses the add_or_delete information to allow different token names + * for deletion (more specific) and addition (more general). + *) + val actionToString' : (add_or_delete -> 'tok -> string) + -> 'tok repair_action -> string + val repairToString' : (add_or_delete -> 'tok -> string) + -> AntlrStreamPos.sourcemap -> 'tok repair -> string + +end +\end{lstlisting}% diff --git a/ml-lpt/doc/overview.tex b/ml-lpt/doc/overview.tex new file mode 100644 index 0000000..d7fcafc --- /dev/null +++ b/ml-lpt/doc/overview.tex @@ -0,0 +1,26 @@ +\chapter{Overview}\label{chap:overview} + +In software, language recognition is ubiquitous: nearly every program deals at some level with structured input given in textual form. The simplest recognition problems can be solved directly, but as the complexity of the language grows, recognition and processing become more difficult. + +Although sophisticated language processing is sometimes done by hand, the use of scanner and parser generators\footnote{ + ``Scanner generator'' and ``parser generator'' will often be shortened to ``scanner'' and ``parser'' respectively. This is justified by viewing a parser generator as a parameterized parser. +} is more common. The Unix tools {\tt lex} and {\tt yacc} are the archetypical examples of such generators. Tradition has it that when a new programming language is introduced, new scanner and parser generators are written in that language, and generate code for that language. Traditional \emph{also} has it that the new tools are modeled after the old {\tt lex} and {\tt yacc} tools, both in terms of the algorithms used, and often the syntax as well. The language Standard ML is no exception: {\tt ml-lex} and {\tt ml-yacc} are the SML incarnations of the old Unix tools. + +This manual describes two new tools, \ulex{} and \mlantlr{}, that follow tradition in separating scanning from parsing, but break from tradition in their implementation: \ulex{} is based on \emph{regular expression derivatives} rather than subset-construction, and \mlantlr{} is based on $LL(k)$ parsing rather than $LALR(1)$ parsing. + +\section{Motivation} + +Most parser generators use some variation on $LR$ parsing, a form of \emph{bottom-up} parsing that tracks possible interpretations (reductions) of an input phrase until only a single reduction is possible. While this is a powerful technique, it has the following downsides: +\begin{itemize} + \item Compared to predictive parsing, it is more complicated and difficult to understand. This is particularly troublesome when debugging an $LR$-ambiguous grammar. + \item Because reductions take place as late as possible, the choice of reduction cannot depend on any semantic information; such information would only become available \emph{after} the choice was made. + \item Similarly, information flow in the parser is strictly bottom-up. For (syntactic or semantic) context to influence a semantic action, higher-order programming is necessary. +\end{itemize} +The main alternative to $LR$ parsing is the top-down, $LL$ approach, which is commonly used for hand-coded parsers. An $LL$ parser, when faced with a decision point in the grammar, utilizes lookahead to unambiguously predict the correct interpretation of the input. As a result, $LL$ parsers do not suffer from the problems above. $LL$ parsers have been considered impractical because the size of their prediction table is exponential in $k$ --- the number of tokens to look ahead --- and many languages need $k > 1$. However, Parr showed that an approximate form of lookahead, using tables linear in $k$, is usually sufficient. + +To date, the only mature $LL$ parser based on Parr's technique is his own parser, {\tt antlr}. While {\tt antlr} is sophisticated and robust, it is designed for and best used within imperative languages. The primary motivation for the tools this manual describes is to bring practical $LL$ parsing to a functional language. +Our hope with \ulex{} and \mlantlr{} is to modernize and improve the Standard ML language processing infrastructure, while demonstrating the effectiveness of regular expression derivatives and $LL(k)$ parsing. The tools are more powerful than their predecessors, and they raise the level of discourse in language processing. + +%\section{Outline} + +%This manual is organized into three parts: usage, theory, and implementation. Each of these parts is further broken down into two chapters, one on \ulex{} and one on \mlantlr{}. The usage section is self-contained, and gives a fairly complete specification of the two tools. Full details on the algorithms used are given in the theory section. Data structures, system organization, and other code-related particulars are described in the implementation section. \ No newline at end of file diff --git a/ml-lpt/doc/theory-ml-antlr.tex b/ml-lpt/doc/theory-ml-antlr.tex new file mode 100644 index 0000000..454de65 --- /dev/null +++ b/ml-lpt/doc/theory-ml-antlr.tex @@ -0,0 +1 @@ +\chapter[\mlantlr]{Theory: \mlantlr}\label{ch:antlr-theory} \ No newline at end of file diff --git a/ml-lpt/doc/theory-ml-ulex.tex b/ml-lpt/doc/theory-ml-ulex.tex new file mode 100644 index 0000000..694e9de --- /dev/null +++ b/ml-lpt/doc/theory-ml-ulex.tex @@ -0,0 +1,179 @@ +\chapter[\ulex]{Theory: \ulex}\label{ch:ulex-theory} + +{\Large NOTE: this chapter has been integrated into a paper, and thereafter much improved. In the near future, the paper will be re-adapted to replace this chapter.} + +\section{Regular expressions} + +Throughout this section, we assume an \emph{alphabet} $\Sigma$; any $a \in \Sigma$ is a \emph{symbol}. Since we support unicode, $\Sigma$ can be quite large. Our abstract regular expression (RE) language is as follows: + +\Grammar{ +\GFirst{\rm RE}{\epsilon}{empty string} +\GNext{\CS}{symbol set, $\CS \subseteq \Sigma$} +\GNext{\rm RE\cdot RE}{concatenation} +\GNext{\rm RE^*}{Kleene-closure} +\GNext{\rm RE \OR RE}{alternation (union)} +\GNext{\rm RE \AND RE}{intersection} +\GNext{\neg \rm RE}{negation} +} + +Note that we treat symbol sets (\ie{}, character classes) as primitive; this matches the implementation strategy and simplifies the description of DFA generation. With this representation, the empty set $\emptyset$ and the alphabet $\Sigma$ are both treated as symbol sets. The former will yield an RE that matches no input (\ie{}, $\CL\Sem{\emptyset} = \emptyset$), and the latter will match any single symbol. Notice also that our language of REs allows for intersection and negation in addition to the standard operations. + +The semantics of our RE language are given in the form of a function $\Ls{-} \ : \ \mathrm{RE} \rightarrow \Sigma^*$ from REs to their corresponding language over $\Sigma$: + +\begin{eqnarray*} +\Ls{\epsilon} &=& \epsilon \\ +\Ls{\CS} &=& \CS \\ +\Ls{r\cdot s} &=& \Ls{r} \cdot \Ls{s} \\ +\Ls{r^*} &=& \epsilon \cup \Ls{r}\cdot\Ls{r^*} \\ +\Ls{r \OR s} &=& \Ls{r} \cup \Ls{s} \\ +\Ls{r \AND s} &=& \Ls{r} \cap \Ls{s} \\ +\Ls{\neg r} &=& \Sigma \setminus \Ls{r} +\end{eqnarray*} + +\section{Derivatives}\label{sec:derivatives} + +Brzozowski introduced \emph{derivatives} of regular expressions as an alternative means of DFA construction \cite{derivatives}. His approach is attrative because it easily allows the language of REs to be extended with arbitrary boolean operations. Further, it is intuitive, relatively easy to implement, goes directly from an RE to a DFA, and with some care in implementation can be made competitive with other DFA construction approaches. We begin by introducing the notion of a derivative of some language $\CL$. + +\begin{definition} The \New{derivative} of a set of symbol sequences $\CL \subset \Sigma^*$ with respect to a finite symbol sequence $u$ is defined to be $D_u(\CL) = \{ v \ | \ u\cdot v \in \CL \}$. +\end{definition} + +Derivatives give a very natural algorithm for DFA construction. Before giving that algorithm, however, we need a means of computing derivatives for regular expressions. + +\begin{definition} A regular expression $\RE$ is \New{nullable} if the language it defines contains the empty string, that is, if $\epsilon \in \Ls{\RE}$. +\end{definition} + +We also need the following function: +\[ \delta(\RE) = + \begin{cases} + \epsilon & \textrm{if} \ \epsilon \in \CL\Sem{\RE} \\ + \emptyset & \textrm{if} \ \epsilon \notin \CL\Sem{\RE} + \end{cases} +\] +The $\delta$ function takes REs to REs (recall that the empty set is a symbol set, which is an RE). Intuitively, $\delta$ collapses an RE to the ``smallest'' RE with the same nullability. + +The following function, due to Brzozowski, gives the derivative of a regular expression with respect to a symbol $a$. +\begin{eqnarray*} +D_a (\epsilon) &=& \emptyset \\ +D_a (\CS) &=& + \begin{cases} + \epsilon & \textrm{if} \ a \in \CS \\ + \emptyset & \textrm{if} \ a \notin \CS \\ + \end{cases} \\ +D_a (r \cdot s) &=& D_a(r)\cdot s \OR \delta(r) \cdot D_a(s) \\ +D_a (r^*) &=& D_a(r) \cdot r^* \\ +D_a (r \OR s) &=& D_a(r) \OR D_a(s) \\ +D_a (r \AND s) &=& D_a(r) \AND D_a(s) \\ +D_a (\neg r) &=& \neg D_a(r) +\end{eqnarray*} + +We can take the derivative of an RE with respect to a sequence of symbols in a straightforward way: +\begin{eqnarray*} +D_\epsilon (r) &=& r \\ +D_{ua} (r) &=& D_a(D_u(r)) +\end{eqnarray*} + +Intuitively, the derivative of an RE with respect to a symbol $a$ yields a new RE after matching $a$. The following two theorems, again due to Brzozowski, make this precise. + +\begin{theorem} The derivative $D_s(\RE)$ of any regular expression $\RE$ with respect to any sequence $u$ is a regular expression. +\end{theorem} + +\begin{theorem} A sequence $u$ is contained in $\Ls{\RE}$ if and only if $\Ls{D_u(\RE)}$ is nullable. +\end{theorem} + +Derivatives provide an easy method of DFA construction. Suppose we want to build a DFA that recognizes $\RE$. We can think of each state of the DFA as a regular expression. We start with a state $Q_0$ that represents $\RE$. We then take the derivative of $\RE$ with respect to each symbol of the alphabet and create a new state each time a new derivative is found, adding each new state to the work list. We pop a state from the work list and repeat, until the work list is empty. There will be a transition from $Q_j$ to $Q_k$ if and only if (identifying states and their REs) $D_a (Q_j) = Q_k$ for some symbol $a$; the transition will be labeled with the set of all such $a$. Finally, any state that represents a nullable RE is an accepting state. The correctness of the recognizer is a direct consequence of the above theorems. + +The sketch glosses over several important details. First, what notion of equality do we intend for the equation $D_a (Q_j) = Q_k$? Ideally, we would identify as a single state all those REs which admit the same language, so that $D_a (Q_j) = Q_k$ if and only if $\Ls{D_a (Q_j)} = \Ls{Q_k}$. This is expensive to compute, so Brzozowski introduced the notion of RE similarity, an equivalence on REs which is easy to compute but still guarantees that the DFA is finite. + +Let $\approx$ denote the least equivalence relation on REs such that +\begin{eqnarray*} +r \OR r &\approx& r \\ +r \OR s &\approx& s \OR r \\ +(r \OR s) \OR t &\approx& r \OR (s \OR t) +\end{eqnarray*} + +\begin{definition} Two regular expressions $r$ and $s$ are \New{similar} if $r \approx s$ and are \New{dissimilar} otherwise. +\end{definition} + +\begin{theorem} Every regular expression has only a finite number of dissimilar derivatives. +\end{theorem} + +Hence, DFA construction is guaranteed to succeed if new states are only created when no existing state is similar to a given derivative. In fact, we want to do much better then this to avoid blowup in DFA size. + +\begin{remark} +In a practical implementation of DFA construction using derivatives, it is crucial to aggresively identify when a derivative admits the same language as an existing state (RE) in the DFA. The cost of this identification must be balanced against the number of duplicate states avoided. +\end{remark} + +In \ulex{}, we accomplish this by canonicalizing all input and derived REs. The canonicalization is described in detail in section~\ref{sec:reg-exp}. + +\section{Factorings}\label{sec:factorings} + +Another problem with DFA construction is the size of the unicode alphabet: taking the derivative with respect to each unicode symbol is not feasible. But to construct the DFA, we have to examine every possible derivative of a given RE. We must try to conservatively estimate what sets of symbols will yield the same derivative for an RE. Here we break from Brzozowski's work and introduce new terminology and an algorithm to make derivatives more amenable to large alphabets. + +Let $\sim_\RE$ be the relation defined as follows. For a regular expression $\RE$ and symbols $a, b$, $a \sim_\RE b$ if and only if $D_a (\RE) = D_b (\RE)$. + +\begin{definition} +The \New{derivative classes} of $\RE$ are the the equivalence classes $\Sigma/{\sim_\RE}$. +\end{definition} + +Ultimately, the outedges for a DFA state and the derivative classes of the RE for that state are in one-to-one correspondence.\footnote{This is not quite true: we usually drop error transitions, that is, transitions going to the RE $\emptyset$.} Hence, we must eventually determine all the derivative classes for an RE in order to construct the DFA. To avoid testing the entire alphabet a symbol at a time, we introduce an algorithm which (over)partitions $\Sigma$, so that each partition is a subset of a derivative class. We can then take the derivative with respect to a representative from each partition, and determine which partitions actually belong to the same derivative class. + +\begin{definition} +Let $r$ be an RE. A \New{factoring} of $\Sigma$ under $r$ is a partitioning of $\Sigma$ such that each partition is a subset of a derivative class for $\RE$. +\end{definition} + +To be clear: we are factoring the \emph{alphabet} into partitions, but the factoring is guided by (\emph{under}) a regular expression. A factoring under a given RE is not unique. The derivative classes for an RE are one possible factoring (with a minimal number of partitions) while the set of all singleton sets of symbols is another factoring (with a maximal number of partitions). We will present a simple recursive factoring algorithm and prove its correctness, but first, an example. + +Suppose we have two regular expressions $r$ and $s$ yielding factorings $\{ \CR_1, \CR_2 \}$ and $\{ \CS_1, \CS_2 \}$ respectively. Let $t = r \OR s$. The derivative of $t$ with respect to some symbol $a$ is $D_a(t) = D_a(r) \OR D_a(s)$. Hence, if $D_a(r) = D_b(r)$ and $D_a(s) = D_b(s)$ for some symbols $a, b$, then $D_a(t) = D_b(t)$ and so $a \sim_t b$. We can use this to give a factoring under $t$. The relationship between the factorings under $r$, $s$ and $t$ can be visualized as follows: + +\[ + \xymatrix{ + \bullet \ar@{-}[rrr]|{\Sigma} &&& \bullet \\ + \bullet \ar@{-}[rr]|{\CR_1} && \bullet \ar@{-}[r]|{\CR_2} & \bullet \\ + \bullet \ar@{-}[r]|{\CS_1} & \bullet \ar@{-}[rr]|{\CS_2} && \bullet \\ + \bullet \ar@{-}[r]|{\CR_1 \cap \CS_1} & + \bullet \ar@{-}[r]|{\CR_1 \cap \CS_2} & + \bullet \ar@{-}[r]|{\CR_2 \cap \CS_2} & + \bullet + } +\] + +This small example captures the essential idea of the algorithm. To give a factoring under an RE, we recursively find factorings under its components and ``compress'' those factorings into a single new factoring that respects them. The factorings are being compressed (flattened) in the sense that the boundaries of one factoring are forced onto another, causing some partitions to split. The algorithm we present is in two stages: first, a factoring function recurively collects factorings under an RE; then, a compress function compresses them all onto $\Sigma$ to produce a single factoring for an RE. We now make this precise. + +The \emph{factoring} function $F$ takes a regular expression and gives a factoring of $\Sigma$ under that RE. It is defined recursively as follows: +\begin{eqnarray*} +F(\epsilon) &=& \emptyset \\ +F(\CS) &=& \{ \CS \} \\ +F(r \cdot s) &=& + \begin{cases} + F(r) & \epsilon \notin \Ls{r} \\ + F(r) \cup F(s) & \textrm{otherwise} + \end{cases} \\ +F(r \OR s) &=& F(r) \cup F(s) \\ +F(r \AND s) &=& F(r) \cup F(s) \\ +F(r^*) &=& F(r) \\ +F(\neg r) &=& F(r) +\end{eqnarray*} + +The \emph{compress} function $C : \CP(\Sigma) \longrightarrow \CP(\Sigma)$ takes a set of subsets of the alphabet and produces the smallest partitioning of $\Sigma$ that respects them. In particular, if +\[ C(\{\CS_1, \CS_2, \dots, \CS_m \}) = \{ \CS'_1, \CS'_2, \dots, \CS'_n \} \] +then we have that $\{ \CS'_1, \CS'_2, \dots, \CS'_n \}$ is a partitioning of $\Sigma$ such that for each $\CS'_i$ and $\CS_k$ either $\CS'_i \subseteq \CS_k$ or $\CS'_i \cap \CS_k = \emptyset$. + +\begin{theorem} Let $\RE$ be an RE. Then $C(F(r))$ is a factoring of $\Sigma$ under $\RE$. +\end{theorem} + +\emph{Proof:} by induction on the structure of $\RE$. We use $a$ to denote an arbitrary symbol. + +\vskip 5pt +\emph{Case} $\epsilon$: we have $D_a(\epsilon) = \emptyset$ for all $a \in \Sigma$, so $\Sigma/{\sim_\epsilon} = \{ \Sigma \}$. We have $C(F(\epsilon)) = C(\{ \emptyset \})= \{ \Sigma \}$. + +\vskip 5pt +\emph{Case} $\CS$: we have $D_a(\CS) = \epsilon$ if $a \in \CS$ and $D_a(\CS) = \emptyset$ otherwise. Thus the derivative classes are $\CS$ and $\Sigma \setminus \CS$, which are exactly the sets produced by $C(F(\CS)) = C(\{ \CS \})$. + +\vskip 5pt +\emph{Case} $s \cdot t$ and $\epsilon \notin \Ls{s}$: here $D_a(s \cdot t) = D_a(s) \cdot t$. Because $t$ is fixed as $a$ varies, the derivative classes are just the derivative classes of $s$. Since $F(s \cdot t) = F(s)$ the result holds by the induction hypothesis on $s$. + +\vskip 5pt +\emph{Case} $s \cdot t$ and $\epsilon \in \Ls{s}$: here $D_a(s \cdot t) = D_a(s) \cdot t \OR \epsilon \cdot D_a(t)$. Let $b, c \in \Sigma$ such that $b \sim_s c$ and $b \sim_t c$. Then $b \sim_{s \cdot t} c$. The result follows from this fact and the inductive hypothesis applied to $s$ and $t$. + +\vskip 5pt +The other cases are similar. \ No newline at end of file diff --git a/ml-lpt/doc/usage-ml-antlr.tex b/ml-lpt/doc/usage-ml-antlr.tex new file mode 100644 index 0000000..3375f1c --- /dev/null +++ b/ml-lpt/doc/usage-ml-antlr.tex @@ -0,0 +1,604 @@ +%!TEX root = manual.tex +% +\chapter{ML-Antlr} + +%\section{Overview} + +Parsers analyze the syntactic structure of an input string, and are usually specified with some variant of context-free grammars. \antlr{} is a parser generator for Standard ML based on Terence Parr's variant of $LL(k)$ parsing. The details of the parsing algorithm are given in the companion implementation notes; the practical restrictions on grammars are discussed in Section~\ref{sec:antlr-llk}. A parser generated by \antlr{} is a functor; it requires a module with the \texttt{ANTLR\char`\_LEXER} signature: +\begin{lstlisting} +signature ANTLR_LEXER = sig + type strm + val getPos : strm -> AntlrStreamPos.pos + end +\end{lstlisting} +Applying the parser functor will yield a module containing a \texttt{parse} function: +\begin{lstlisting} +val parse : + (Lex.strm -> ParserToks.token * AntlrStreamPos.span * Lex.strm) + -> Lex.strm + -> result_ty option * strm * ParserToks.token AntlrRepair.repair list +\end{lstlisting} +where \texttt{result\char`\_ty} is determined by the semantic actions for the parser. The \texttt{ParserTokens} module is generated by \antlr{} (see Section~\ref{sec:antlr-gencode}) and the \texttt{AntlrRepair} module is available in the \texttt{ml-lpt} library (see Chapter~\ref{ch:ml-lpt-lib}). + +Notable features of \antlr{} include: +\begin{itemize} + \item Extended BNF format, including Kleene-closure (*), positive closure (+), and optional (?) operators. + \item Robust, automatic error repair. + \item Selective backtracking. + \item ``Inherited attributes'': information can flow downward as well as upward during a parse. + \item Semantic predicates: a syntactic match can be qualified by a semantic condition. + \item Grammar inheritence. + \item Convenient default actions, especially for EBNF constructions. + \item Convenient abbreviations for token names (\emph{e{.}g{.}}, \texttt{"("} rather than \texttt{LP}) +\end{itemize} +The tool is invoked from the command-line as follows: +\begin{verbatim} + ml-antlr [options] file +\end{verbatim} +where \texttt{file} is the name of the input \ulex{} specification, and where \texttt{options} may be any combination of: + +\vskip 12pt +\begin{tabular}{lp{0.65\textwidth}} + \texttt{--dot} & generate DOT output (for graphviz; see \texttt{http://www.graphviz.org}). + The generated file will be named \texttt{file.dot}, where \texttt{file} is the input file. \\ + \\ + \texttt{--latex} & generate a simple \LaTeX version of the grammar, named \texttt{file.tex}. \\ + \\ + \texttt{--unit-actions} & ignore the action code in the grammar, and instead return \texttt{()} + for every production. \\ + \\ + \texttt{--debug} & add code to the actions to print the left-hand-side of the production. +\end{tabular}% + +\vskip 10pt \noindent +The output file will be called \texttt{file.sml}. + +\section{Background definitions} + +Before describing \antlr{}, we need some terminology. A \emph{context-free grammar} (CFG) is a set of \emph{token} (or \emph{terminal}) symbols, a set of \emph{nonterminal} symbols, a set of \emph{productions}, and a start symbol $S$, which must be a nonterminal. +The general term \emph{symbol} refers to both tokens and nonterminals. A production relates a nonterminal $A$ to a string of symbols $\alpha$; we write this relation as $A \ra \alpha$. Suppose $\alpha A \beta$ is a symbol string, and $A$ is a nonterminal symbol. We write $\alpha A \beta \Ra \alpha \gamma \beta$ if $A \ra \gamma$ is a production; this is called a one-step derivation. In general, a CFG generates a language, which is a set of token strings. The strings included in this language are exactly those token string derived in one or more steps from the start symbol $S$. + +A parser recognizes whether an input string is in the language generated by a given CFG, usually computing some value (such as a parse tree) while doing so. The computations performed during a parse are called \emph{semantic actions} (or just \emph{actions}). + +\section{Specification format}\label{sec:antlr-spec} + +A \antlr{} specification is a list of semicolon-terminated \emph{declarations}. Each declaration is either a \emph{directive} or a \emph{nonterminal definition}. Directives are used to alter global specification properties (such as the name of the functor that will be generated) or to define supporting infrastructure for the grammar. The nonterminal definitions specify the grammar itself. The grammar for \antlr{} is given in Figure~\ref{fig:antlr-syntax}. + +\begin{figure} +\Grammar{ +\GFirstB{spec} + {$($ declaration \T{;} $)^*$} + +\GFirstB{declaration} + {directive} +\GNextB + {nonterminal} + +\GFirstB{directive} + {\kw{defs} code} +\GNextB + {\kw{entry} ID $($ \T{,} ID $)^*$} +\GNextB + {\kw{import} STRING $($ \kw{dropping} symbol$^+$ $)^?$} +\GNextB + {\kw{keywords} symbol $($ \T{,} symbol $)^*$} +\GNextB + {\kw{value} ID code} +\GNextB + {\kw{name} ID} +\GNextB + {\kw{header} code} +\GNextB + {\kw{refcell} ID \T{:} monotype \T{=} code} +\GNextB + {\kw{start} ID} +\GNextB + {\kw{tokentype} qualid} +\GNextB + {\kw{tokens} \T{:} tokdef $($ \T{|} tokdef $)^*$} +\GNextB + {\kw{nonterms} \T{:} datacon $($ \T{|} datacon $)^*$} + +\GFirstB{code} + { \T{(} $\dots$ \T{)} } + +\GFirstB{tokdef} + {datacon $($ \T{(} STRING \T{)} $)^?$} + +\GFirstB{datacon} + {ID} +\GNextB + {ID \T{of} monotype} + +\GFirstB{nonterminal} + {ID formals$^?$ \T{:} prodlist} + +\GFirstB{formals} + { \T{(} ID $($ \T{,} ID $)^*$ \T{)} } + +\GFirstB{prodlist} + {production $($ \T{|} production $)^*$} + +\GFirstB{production} + {\kw{try}$^?$ named-item$^*$ $($ \kw{where} code $)^?$ $($ \T{=>} code $)^?$ } +\GFirstB{named-item} + {$($ ID \T{:} $)^?$ item} +\GFirstB{item} + {prim-item \T{?}} +\GNextB + {prim-item \T{+}} +\GNextB + {prim-item \T{*}} + +\GFirstB{prim-item} + {symbol $($ \T{@} code $)^?$} +\GNextB + { \T{(} prodlist \T{)} } + +\GFirstB{symbol} + {ID} +\GNextB + {STRING} + +\GFirstB{monotype}{\textrm{An SML monomorphic type expression}} + +\GFirstB{qualid}{\textrm{An SML qualified identifier}} + +\GFirstB{ID}{\textrm{An SML identifier}} +\GFirstB{STRING}{\textrm{An SML string literal}} +} +\caption{The \antlr{} grammar}\label{fig:antlr-syntax} +\end{figure} + +SML-style comments (\texttt{(* ... *)}) are treated as ignored whitespace anywhere they occur in the specification, \emph{except} in segments of code. The \textit{code} symbol represents a segment of SML code, enclosed in parentheses. Extra parentheses occuring within strings or comments in code need not be balanced. +A complete example specification appears in Chapter~\ref{ch:example}. + +Most \antlr{} declarations are \emph{cumulative}: they may appear multiple times in a grammar specification, with each new declaration adding to the effect of the previous ones. Thus, for instance, the specification fragment +\begin{lstlisting}[language=MLAntlr] + %tokens : foo ; + %tokens : bar of string ; +\end{lstlisting}% +is equivalent to the single directive +\begin{lstlisting}[language=MLAntlr] + %tokens : foo | bar of string ; +\end{lstlisting}% +and similarly for nonterminal definitions and so on. All declarations are cumulative except for the \kw{start} and \kw{name} directives. +The reason for treating specifications in this way is to give the \kw{import} directive very simple semantics, as described below. + +\section{Directives} + +\subsection{The \kw{defs} directive} + +The \kw{defs} directive is used to include a segment of code in the generated parser: +\begin{lstlisting}[language=SML] +%defs ( + fun helperFn x = (* ... *) +); +\end{lstlisting}% +All definitions given will be in scope for the semantic actions (see Section~\ref{sec:antlr-actions}). + +\subsection{The \kw{entry} directive} + +It is often useful to parse input based on some fragment of a grammar. When a nonterminal is declared to be an \emph{entry point} for the grammar via \kw{entry}, \antlr{} will generate a separate \texttt{parse} function that expects the input to be a string derived from that nonterminal. Given a grammar with a nonterminal \texttt{exp} and the declaration +\begin{lstlisting}[language=MLAntlr] + %entry exp; +\end{lstlisting}% +the generated parser will include a function +\begin{lstlisting} +val parseexp : (Lex.strm -> ParserToks.token * AntlrStreamPos.span * Lex.strm) + -> Lex.strm + -> exp_ty option * strm * ParserToks.token AntlrRepair.repair list +\end{lstlisting} +where \texttt{exp\char`\_ty} is the type of the actions for the \texttt{exp} nonterminal. Note that if \texttt{exp} has inherited attributes (Section~\ref{sec:inh-attr}) they will appear as a tuple argument, curried after the lexer argument: +\begin{lstlisting} +val parseexp : (Lex.strm -> ParserToks.token * AntlrStreamPos.span * Lex.strm) + -> attributes + -> Lex.strm + -> exp_ty option * strm * ParserToks.token AntlrRepair.repair list +\end{lstlisting} +Finally, the \emph{start} symbol (Section~\ref{sec:start}) is always an entry point to the grammar, but the generated function is simply called \texttt{parse}. + +\subsection{The \kw{import} directive} + +The \kw{import} directive is used to include one grammar inside another. The string given in the directive should hold the path to a grammar file, and $\backslash$ characters must be escaped. By default, all declarations appearing in the specified file are included in the resulting grammar, except for \kw{start}, \kw{entry}, and \kw{name} declarations. However, individual tokens or nonterminals can be dropped by listing them in the \kw{dropping} clause of an \kw{import} declaration. Since nonterminal definitions are cumulative (Section~\ref{sec:antlr-nt}), the imported nonterminals can be extended with new productions simply by listing them. +The final grammar must, of course, ensure that all used tokens and nonterminals are defined. + +\subsection{The \kw{keywords} and \kw{value} directives} + +\Antlr{} uses an error-repair mechanism that is based on inserting or deleting tokens until +a correct parse is found. +The \kw{keywords} and \kw{default} directives allow one to improve this process. +Since changes to the input involving keywords can drastically alter the meaning of the +input, it is usually desirable to favor non-keyword repairs. +The \kw{keywords} directive is used to tell \antlr{} which tokens should be considered keywords. +The \kw{value} directive is used to define a default argument for non-nullary tokens, which +is used to construct a token value that can be inserted into the token stream +when attempting to find a repair. +For example +\begin{lstlisting}[language=MLAntlr] + %value NUMBER(0); +\end{lstlisting}% +would allow the \texttt{NUMBER} token to be inserted as an error repair. + +Section~\ref{sec:antlr-gencode} describes how to report error repairs to the user. +%When a syntax error is discovered, \antlr{} attempts to find a single-token repair to the input that will %allow the parse to continue. + +\subsection{The \kw{name} directive} + +The prefix to use for the name of the generated parser functor is specified using \kw{name}. +In addition to the functor, \antlr{} will generate a module to define the \texttt{token} datatype. +If the declaration +\begin{lstlisting}[language=MLAntlr] + %name Example; +\end{lstlisting}% +appears in the specification, then the parser functor will be named +\texttt{ExampleParseFn} and the tokens module will be called \texttt{ExampleTokens}. + +\subsection{The \kw{header} directive} +The \kw{header} directive allows one to specify the parser's functor. +This directive is useful when you want to add additional parameters to the functor, +but the declaration must include the \texttt{Lex} structure with signature +\texttt{ANTLR\char`\_LEXER}. +For example, +\begin{lstlisting} +%header ( + functor ExampleParseFn ( + structure Lex : ANTLR_LEXER + structure Extras : EXTRA_SIG)); +\end{lstlisting}% + +\noindent{}\textbf{Note:} the \kw{header} directive was added in SML/NJ version 110.72. + +\subsection{The \kw{refcell} directive} + +Because semantic actions must be pure (for backtracking and error repair), they cannot make use of standard reference cells to communicate information. +Nonterminals may inherit attributes (Section~\ref{sec:inh-attr}), which allows information to flow downward, but in some cases flowing information this way can become extremely tedious. +For example, a data structure may only need to be updated at a single point in the grammar, but in order to properly thread this state through the grammar, an inherited attribute would have to be added and propagated through every nonterminal. + +The \kw{refcell} directive is used to declare a backtracking-safe reference cell and make it available to all semantic actions. Reference cells are declared by giving the name, type, and initial value for the cell. Each cell is bound in the semantic actions as a standard SML \texttt{ref} value. Thus, for example, we might have the following specification fragment: +\begin{lstlisting}[language=MLAntlr] + %refcell symbols : StringSet.set = ( StringSet.empty ); + + exp + : INT + | (exp) + | ID => ( symbols := StringSet.add(!symbols, ID); ID ) + ; +\end{lstlisting}% +The result of this fragment is that all symbol uses are tracked, in any use of +the \texttt{exp} nonterminal, but without having to manually thread the data +structure state through the grammar. + +The final contents of a reference cell is returned as an extra result from the parse +following the repair list. + +\subsection{The \kw{start} directive}\label{sec:start} + +A particular nonterminal must be designated as the start symbol for the grammar. The start symbol can be specified using \kw{start}; otherwise, the first nonterminal defined is assumed to be the start symbol. + +\subsection{The \kw{tokentype} directive} + +As noted above, \mlantlr{} synthesizes a structure that contains a datatype representing the +tokens specified in the grammar. +In some situations, it may be useful to specify the token datatype elsewhere (\eg{}, if you +want to use the same lexer for two different parsers). +In such a case, one can use the \kw{tokentype} directive to give a qualified name for the token +dataype. +This identifier will be used in the tokens structure to specify the tokens type. +For example, if the grammar specification contains the directive +\begin{lstlisting}[language=MLAntlr] + %tokentype MyTokens.t; +\end{lstlisting}% +then the code +\begin{lstlisting}[language=SML] + datatype token = MyTokens.t +\end{lstlisting}% +in the tokens structure. +Even when the \kw{tokentype} directive is used, one must also declare the grammar's tokens +using the \kw{tokens} directive. +Furthermore, the user-supplied token datatype must include a nullary \texttt{EOF} constructor +in addition to exactly the constructors specified by the \kw{tokens} directives.\footnote{ + When the \kw{tokentype} directive is specified, \mlantlr{} will generate a signature for + the tokens structure. +} + +\noindent{}\textbf{Note:} the \kw{tokentype} directive was added in SML/NJ version 110.81. + +\subsection{The \kw{tokens} directive} + +The alphabet of the parser is defined using \kw{tokens}. The syntax for this directive resembles a datatype declaration in SML, except that optional abbreviations for tokens may be defined. For example: +\begin{lstlisting}[language=MLAntlr] + %tokens + : KW_let ("let") | KW_in ("in") + | ID of string | NUM of Int.int + | EQ ("=") | PLUS ("+") + | LP ("(") | RP (")") + ; +\end{lstlisting}% +Within nonterminal definitions, tokens may be referenced either by their name or abbreviation; the latter must always be double-quoted. + +\subsection{The \kw{nonterms} directive} +\mlantlr{} normally identifies non-terminal symbols by their appearance on the left-hand side +of a production and relies on SML type inference to determine the type returned by parsing +a string derived from the nonterminal. +It is also possible to declare the type of a non-terminal using a \kw{nonterms} directive. + +\section{Nonterminal definitions}\label{sec:antlr-nt} + +The syntax of nontermal definitions is given in Figure~\ref{fig:antlr-syntax}. As an illustration of the grammar, consider the following example, which defines a nonterminal with three productions, taking a formal parameter \texttt{env}: +\begin{lstlisting}[language=MLAntlr] + atomicExp(env) + : ID => ( valOf(AtomMap.find (env, Atom.atom ID)) ) + | NUM + | "(" exp@(env) ")" + ; +\end{lstlisting}% +Note that actions are only allowed at the end of a production, and that they are optional. + +As with most directives, the non-terminal definitions are cumulative. +For example, the definition of \texttt{atomicExp} above could also be +written as three separate rules. +\begin{lstlisting}[language=MLAntlr] + atomicExp(env) : ID => ( valOf(AtomMap.find (env, Atom.atom ID)) ); + atomicExp(env) : NUM; + atomicExp(env) : "(" exp@(env) ")"; +\end{lstlisting}% + +\subsection{Extended BNF constructions} + +In standard BNF syntax, the right side of a production is a simple string of symbols. Extended BNF allows regular expression-like operators to be used: \texttt{*}, \texttt{+}, and \texttt{?} can follow a symbol, denoting 0 or more, 1 or more, or 0 or 1 occurrences respectively. In addition, parentheses can be used within a production to enclose a \emph{subrule}, which may list several \texttt{|}-separated alternatives, each of which may have its own action. In the following example, the nonterminal \texttt{item\char`\_list} matches a semicolon-terminated list of identifiers and integers: +\begin{lstlisting}[language=MLAntlr] + item_list : (( ID | INT ) ";")* ; +\end{lstlisting}% +All of the extended BNF constructions have implications for the actions of a production; see Section~\ref{sec:antlr-actions} for details. + +\subsection{Inherited attributes}\label{sec:inh-attr} + +In most parsers, information can flow upward during the parse through actions, but not downard. In attribute grammar terminology, the former refers to \emph{synthesized} attributes, while the latter refers to \emph{inherited attributes}. Since \antlr{} is a predictive parser, it allows both kinds of attributes. Inherited attributes are treated as parameters to nonterminals, which can be used in their actions or semantic predicates. Formal parameters are introduced by enclosing them in parentheses after the name of a nonterminal and before its production list; the list of parameters will become a tuple. In the following, the nonterminal \texttt{expr} takes a single parameter called \texttt{env}: +\begin{lstlisting}[language=MLAntlr] + expr(env) : (* ... *) ; +\end{lstlisting}% +If a nonterminal has a formal parameter, any use of that nonterminal is required to apply it to an actual parameter. Actual parameters are introduced in a production by giving the name of a nonterminal, followed by the \texttt{@} sign, followed by the code to compute the parameter. For example: +\begin{lstlisting}[language=MLAntlr] + assignment : ID ":=" expr@(Env.emptyEnv) ; +\end{lstlisting}% + +\subsection{Selective backtracking} + +Sometimes it is inconvenient or impossible to construct a nonterminal definition which can be unambiguously resolved with finite lookahead. +% (see Section~\ref{sec:antlr-llk} for examples). +The \kw{try} keyword can be used to mark ambiguous \emph{productions} for selective backtracking. For backtracking to take place, each involved production must be so marked. Consider the following: +\begin{lstlisting}[language=MLAntlr] + A : %try B* ";" + | %try B* "(" C+ ")" + ; +\end{lstlisting}% +As written, the two productions cannot be distinguished with finite lookahead, since they share an arbitrary long prefix of \texttt{B} nonterminal symbols. Adding the \kw{try} markers tells \antlr{} to attempt to parse the first alternative, and if that fails to try the second. Another way to resolve the ambiguity is the use of subrules, which do not incur a performance penalty: +\begin{lstlisting}[language=MLAntlr] + A : B* ( ";" + | "(" C+ ")" + ) + ; +\end{lstlisting}% +This is essentially \emph{left-factoring}. See Section~\ref{sec:antlr-llk} for more guidance on working with the $LL(k)$ restriction. + +\subsection{Semantic predicates} + +A production can be qualified by a \emph{semantic predicate} by introducting a \kw{where} clause. Even if the production is syntactically matched by the input, it will not be used unless its semantic predicate evaluates to \texttt{true}. A \kw{where} clause can thus introduce context-sensitivity into a grammar. The following example uses an inherited \texttt{env} attribute, containing a variable-value environment: +\begin{lstlisting}[language=MLAntlr] + atomicExp(env) + : ID %where ( AtomMap.inDomain(env, Atom.atom ID) ) + => ( valOf(AtomMap.find (env, Atom.atom ID)) ) + | NUM + | "(" exp@(env) ")" + ; +\end{lstlisting}% +In this example, if a variable is mentioned that has not been defined, the error is detected and reported during the parse as a syntax error. + +Semantic predicates are most powerful when combined with selective backtracking. The combination allows two syntactically identical phrases to be distinguished by contextual, semantic information. + +\subsection{Actions}\label{sec:antlr-actions} + +Actions for productions are just SML code enclosed in parentheses. Because of potential backtracking and error repair, action code should be pure (except that they may update \antlr{} \texttt{refcell}s; see the \kw{refcell} directive). + +In scope for an action are all the user definitions from the \kw{defs} directive. In addition, the formal parameters of the production are in scope, as are the semantic yield of all symbols to the left of the action (the yield of a token is the data associated with that token's constructor). In the following example, the first action has \texttt{env} and \texttt{exp} in scope, while the second action has \texttt{env} and \texttt{NUM} in scope: +\begin{lstlisting}[language=MLAntlr] + atomicExp(env) + : "(" exp@(env) ")" => ( exp ) + | NUM => ( NUM ) + ; +\end{lstlisting}% +Notice also that the actual parameter to \texttt{exp} in the first production is \texttt{env}, which is in scope at the point the parameter is given; \texttt{exp} itself would not be in scope at that point. + +An important aspect of actions is naming: in the above example, \texttt{exp} and \texttt{NUM} were the default names given to the symbols in the production. In general, the default name of a symbol is just the symbol's name. If the same name appears multiple times in a production, a number is appended to the name of each yield, start from 1, going from left to right. A subrule (any items enclosed in parentheses) is by default called \texttt{SR}. Any default name may be overriden using the syntax \texttt{name=symbol}. Overriding a default name does \emph{not} change the automatic number for other default names. Consider: +\begin{lstlisting}[language=MLAntlr] + foo : A bar=A A ("," A)* A* + ; +\end{lstlisting}% +In this production, the names in scope from left to right are: \texttt{A1}, \texttt{bar}, \texttt{A3}, \texttt{SR}, \texttt{A4}. + +The EBNF operators \texttt{*}, \texttt{+} and \texttt{?} have a special effect on the semantic yield of the symbols to which they are applied. Both \texttt{*} and \texttt{+} yield a \emph{list} of the type of their symbol, while \texttt{?} yields an option. For example, if \texttt{ID*} appeared in a production, its default name would be \texttt{ID}, and if the type of value of \texttt{ID} was \texttt{string}, it would yield a \texttt{string list}; likewise \texttt{ID?} would yield a \texttt{string option}. + +Subrules can have embedded actions that determine their yield: +\begin{lstlisting}[language=MLAntlr] + plusList : ((exp "+" exp => ( exp1 + exp2 )) ";" => ( SR ))* => ( SR ) +\end{lstlisting}% +The \texttt{plusList} nonterminal matches a list of semicolon-terminated additions. The innermost subrule, containing the addition, yields the value of the addition; that subrule is contained in a larger subrule terminated by a semicolon, which yield the value of the inner subrule. Finally, the semicolon-terminated subrule is itself within a subrule, which is repeated zero or more times. Note that the numbering scheme for names is restarted within each subrule. + +Actions are \emph{optional}: if an action is not specified, the default behavior is to return all nonterminals and non-nullary tokens in scope. Thus, the last example can be written as +\begin{lstlisting}[language=MLAntlr] + plusList : ((exp "+" exp => ( exp1 + exp2 )) ";")* +\end{lstlisting}% +since \texttt{"+"} and \texttt{";"} represent nullary token values. + +\section{The $LL(k)$ restriction}\label{sec:antlr-llk} + +When working with any parser, one must be aware of the restrictions its algorithm places on grammars. +When \antlr{} analyzes a grammar, it attempts to create a prediction-decision +tree for each nonterminal. +In the usual case, this decision is made using lookahead token sets. The tool will start with $k = 1$ +lookahead and increment up to a set maximum until it can +uniquely predict each production. Subtrees of the decision tree +remember the tokens chosen by their parents, and take this into account +when computing lookahead. For example, suppose we have two productions +at the top level that generate the following sentences: +\begin{verbatim} + prod1 ==> AA + prod1 ==> AB + prod1 ==> BC + prod2 ==> AC + prod2 ==> C +\end{verbatim} +At $k = 1$, the productions can generate the following sets: +\begin{verbatim} + prod1 {A, B} + prod2 {A, C} +\end{verbatim} +and $k = 2$, +\begin{verbatim} + prod1 {A, B, C} + prod2 {C, } +\end{verbatim} +Examining the lookahead sets alone, this grammar fragment looks ambiguous +even for $k = 2$. However, \antlr{} will generate the following decision +tree: +\begin{lstlisting}[language=SML] +if LA(0) = A then + if LA(1) = A or LA(1) = B then + predict prod1 + else if LA(1) = C then + predict prod2 +else if LA(0) = B then + predict prod1 +else if LA(1) = C then + predict prod2 +\end{lstlisting}% + +In \antlr{}, only a small amount of lookahead is used by default ($k = 3$). Thus, the following grammar is ambiguous for \antlr{}: +\begin{lstlisting}[language=MLAntlr] + foo : A A A B + | A A A A + ; +\end{lstlisting}% +and will generate the following error message: +\begin{verbatim} + Error: lookahead computation failed for 'foo', + with a conflict for the following productions: + foo ::= A A A A EOF + foo ::= A A A B EOF + The conflicting token sets are: + k = 1: {A} + k = 2: {A} + k = 3: {A} +\end{verbatim} +Whenever a lookahead ambiguity is detected, an error message of this form is given. The listed productions are the point of conflict. +The \texttt{k = ...} sets together give examples that can cause the ambiguity, in this case an input of \texttt{AAA}. + +The problem with this example is that the two \texttt{foo} productions can only be distinguished by a token at $k = 4$ depth. This situation can usually be resolved using \emph{left-factoring}, which lifts the common prefix of multiple productions into a single production, and then distinguishes the old productions through a subrule: +\begin{lstlisting}[language=MLAntlr] + foo : A A A (B | A) + ; +\end{lstlisting}% +Recall that subrule alternatives can have their own actions: +\begin{lstlisting}[language=MLAntlr] + foo : A A A ( B => ( "got a B" ) + | A => ( "got an A" ) + ) + ; +\end{lstlisting}% +making left-factoring a fairly versatile technique. + +Another limitation of predictive parsing is \emph{left-recursion}, where a nonterminal recurs without any intermediate symbols: +\begin{lstlisting}[language=MLAntlr] + foo : foo A A + | B + ; +\end{lstlisting}% +Left-recursion breaks predictive parsing, because it is impossible to make a prediction for a left-recursive production without already having a prediction in hand. Usually, this is quite easily resolved using EBNF operators, since left-recursion is most often used for specifying lists. Thus, the previous example can be rewritten as +\begin{lstlisting}[language=MLAntlr] + foo : B (A A)* + ; +\end{lstlisting}% +which is both more readable and more amenable to $LL(k)$ parsing. + +\section{Position tracking} + +\antlr{} includes built-in support for propagating position information. Because the lexer module is required to provide a \texttt{getPos} function, the tokens themselves do not need to carry explicit position information. +A position \emph{span} is a pair to two lexer positions (the type \texttt{AntlrStreamPos.span} is +an abbreviation for \texttt{AntlrStreamPos.pos * AntlrStreamPos.pos}). +Within action code, the position span of any symbol (token, nonterminal, subrule) is available as a value; if the yield of the symbol is named \texttt{Sym}, its span is called \texttt{Sym\char`\_SPAN}. +Note that the span of a symbol after applying the \texttt{*} or \texttt{+} operators is the +span of the entire matched list: +\begin{lstlisting}[language=MLAntlr] + foo : A* => (* A_SPAN starts at the first A and ends at the last *) +\end{lstlisting}% +In addition, the span of the entire current production is available as \texttt{FULL\char`\_SPAN}. + +%\section{Handling precedence} + +\section{Using the generated code}\label{sec:antlr-gencode} + +When \antlr{} is run, it generates a tokens module and a parser functor. +If the parser is given the name \texttt{Xyz} via the \kw{name} directive, these structures will be called \texttt{XyzParseFn} and \texttt{XyzTokens} respectively. +The tokens module will contain a single datatype, called \texttt{token}. The data constructors for the \texttt{token} type have the same name and type as those given in the \kw{tokens} directive; in addition, a nullary constructor called \texttt{EOF} will be available. + +The generated parser functor includes the following: +\begin{lstlisting}[language=SML] +val parse : (Lex.strm -> ParserToks.token * AntlrStreamPos.span * Lex.strm) + -> Lex.strm + -> result_ty option * strm * ParserToks.token AntlrRepair.repair list +\end{lstlisting}% +where \texttt{result\char`\_ty} is the type of the semantic action for the grammar's start symbol. +The \texttt{parse} function is given a lexer function and a stream. +The result of a parse is the semantic yield of the parse, the value of the stream at the end of the parse, and a list of error repairs. +If an unrepairable error occurred, \texttt{NONE} is returned for the yield of the parse. + +Note that if the start symbol for the grammar includes an inherited attribute (or a tuple of attributes), it will appear as an additional, curried parameter to the parser following the lexer parameter. Suppose, for example, that a grammar has a start symbol with an inherited \texttt{Int.int AtomMap.map}, and that the grammar yields \texttt{Int.int} values. The type of its \texttt{parse} function is as follows: +\begin{lstlisting} + val parse : + (strm -> ParserToks.token * strm) -> + Int.int AtomMap.map -> + strm -> + Int.int option * strm * ParserToks.token AntlrRepair.repair list +\end{lstlisting} + +The \texttt{AntlrRepair} module is part of the \texttt{ml-lpt-lib} library; it is fully described in Chapter~\ref{ch:ml-lpt-lib}. +It includes a function \texttt{repairToString}: +\begin{lstlisting} +val repairToString : + ('token -> string) -> AntlrStreamPos.sourcemap + -> 'token repair -> string + +\end{lstlisting} +which can be used to produce error messages from the parser's repair actions. +There is a more refined version of this function that allows one to specialize +the string representation of tokens based on whether the repair adds or deletes +them. +\begin{lstlisting} +datatype add_or_delete = ADD | DEL + +val repairToString : + (add_or_delete -> 'token -> string) -> AntlrStreamPos.sourcemap + ->'token repair -> string +\end{lstlisting} + + +Likewise, the tokens module (\texttt{ParserTokens} in this example) includes a function: +\begin{lstlisting} +val toString : token -> string +\end{lstlisting} +Thus, although error reporting is customizable, a reasonable default is provided, as illustrated below: +\begin{lstlisting} +let + val sm = AntlrStreamPos.mkSourcemap() + val (result, strm', errs) = Parser.parse (Lexer.lex sm) strm + val errStrings = + map (AntlrRepair.repairToString ParserTokens.toString sm) + errs +in + print (String.concatWith "\n" errStrings) +end +\end{lstlisting} +The \texttt{toString} function will convert each token to its symbol as given in a \kw{tokens} directive, +using abbreviations when they are available. +By substituting a different function for \texttt{toString}, this behavior can be altered. diff --git a/ml-lpt/doc/usage-ml-ulex.tex b/ml-lpt/doc/usage-ml-ulex.tex new file mode 100644 index 0000000..3d7ef76 --- /dev/null +++ b/ml-lpt/doc/usage-ml-ulex.tex @@ -0,0 +1,378 @@ +%!TEX root = manual.tex +% +\chapter{ML-ULex} + +%\section{Overview} + +Lexers analyze the lexical structure of an input string, and are usually specified using regular expressions. \textsc{ml-ulex} is a lexer generator for Standard ML. The module it generates will contain a type \texttt{strm} and a function +\begin{verbatim} + val lex : AntlrStreamPos.sourcemap -> strm + -> lex_result * AntlrStreamPos.span * strm +\end{verbatim}% +where \texttt{lex\char`\_result} is a type that must be defined by the user of \ulex{}. +Note that the lexer always returns a token: we assume that end-of-file will be explicitly represented by a token. +Compared to ML-Lex, \ulex{} offers the following improvements: +\begin{itemize} + \item Unicode is supported under the UTF8 encoding. + \item Regular expressions can include intersection and negation operators. + \item The position span of a token is automatically computed and returned to the lexer's caller (as can be seen + by the specification of the \texttt{lex} function above). + \item The specification format is somewhat cleaner. + \item The code base is much cleaner, and supports multiple back-ends, including DFA graph visualization and interactive testing of rules. +\end{itemize}% +The tool is invoked from the command-line as follows: +\begin{verbatim} + ml-ulex [options] file +\end{verbatim}% +where \texttt{file} is the name of the input \ulex{} specification, and where \texttt{options} may be any combination of: + +\vskip 12pt +\begin{tabular}{lp{0.65\textwidth}} + \texttt{--dot} & generate DOT output (for graphviz; see \texttt{http://www.graphviz.org}). The produced file will be named \texttt{file.dot}, where \texttt{file} is the input file. \\ + \\ + \texttt{--match} & enter interactive matching mode. This will allow interactive testing of the machine; presently, only the \texttt{INITIAL} start state is available for testing + (see Section~\ref{sec:start-states} for details on start states). \\ + \\ + \texttt{--ml-lex-mode} & operate in \texttt{ml-lex} compatibility mode. See Section~\ref{sec:lex-compat} for details. \\ + \\ + \texttt{--table-based} & generate a table-based lexer.\\ + \\ + \texttt{--fn-based} & generate a lexer that represents states as functions and transitions as tail calls.\\ + \\ + \texttt{--minimize} & generate a minimal machine. Note that this is slow, and is almost never necessary. \\ + \\ + \texttt{--strict-sml} & generate strict SML (\ie{}, do not use SML/NJ extensions). This flag + is useful if you want to use the output with a different SML system. +\end{tabular} + +\vskip 10pt \noindent +The output file will be called \texttt{file.sml}. + +\section{Specification format} + +A \ulex{} specification is a list of semicolon-terminated \emph{declarations}. Each declaration is either a \emph{directive} or a \emph{rule}. Directives are used to alter global specification properties (such as the name of the module that will be generated) or to define named regular expressions. Rules specify the actual reguluar expressions to be matched. The grammar is given in Figure~\ref{fig:ulex-syntax}. + +\begin{figure} +\Grammar{ +\GFirstB{spec} + {$($ declaration \T{;} $)^*$} + +\GFirstB{declaration} + {directive} +\GNextB + {rule} + +% {\kw{charset} $($ \T{ASCII7} $|$ \T{ASCII8} $|$ \T{UTF8} $)$} + +\GFirstB{directive} + {\kw{arg} code} +\GNextB + {\kw{defs} code} +\GNextB + {\kw{let} ID \T{=} re} +\GNextB + {\kw{name} ID} +\GNextB + {\kw{header} code} +\GNextB + {\kw{states} ID$^+$} + +\GFirstB{code} + { \T{(} $\dots$ \T{)} } + +\GFirstB{rule} + { $($\T{<} ID $($ \T{,} ID $)^*$ \T{>}$)^?$ re \T{=>} code} +\GNextB + { $($\T{<} ID $($ \T{,} ID $)^*$ \T{>}$)^?$ \T{<>} \T{=>} code} + +\GFirstB{re} + {CHAR} +\GNextB + {\T{"} SCHAR$^*$ \T{"}} +\GNextB + {\T{(} re \T{)}} +\GNextCC + {\T{[} $($ \T{-} $|$ \T{\^{ }} $)^?$ $($ CCHAR \T{-} CCHAR $|$ CCHAR $)^+$ + \T{-}$^?$ \T{]} \quad\phantom{.}} + {a character class} +\GNextC + {\T{\{} ID \T{\}}} {\kw{let}-bound RE} +\GNextC + {\T{.}} {wildcard (any single character including \texttt{{$\backslash$}n})} +\GNextC + {re \T{*}} {Kleene-closure (0 or more)} +\GNextC + {re \T{?}} {optional (0 or 1)} +\GNextC + {re \T{+}} {positive-closure (1 or more)} +\GNextC + {re \T{\{} NUM \T{\}}} {exactly {\it NUM} repetitions} +\GNextC + {re \T{\{} NUM$_1$, \ NUM$_2$ \T{\}}} {between {\it NUM$_1$} and {\it NUM$_2$} repetitions} +\GNextC + {re re} {concatenation} +\GNextC + {\T{$\sim$} re} {negation} %(any string not matched by \textit{re})} +\GNextC + {re \T{\&} re} {intersection} +\GNextC + {re \T{|} re} {union} +%\GNextB +% {re \T{/} re} +%\GNextB +% {re \T{\$}} +%\GNextB +% {\T{\_}} +\GFirstB{CHAR} + {any printable character not one of \ + $\begin{array}[t]{l} + \normalfont + \texttt{\^{ } < > $\backslash$ ( ) \{ \} [ \& | * ?}\\ + \normalfont + \texttt{+ " . ; = $\sim$ } + \end{array}$} +\GNextB{an SML or Unicode escape code} +\GFirstB{CCHAR} + {any printable character not one of \ \T{\^{ } - ] $\backslash$}} +\GNextB{an SML or Unicode escape code} +\GFirstB{SCHAR} + {any printable character not one of \ \T{" $\backslash$}} +\GNextB{an SML or Unicode escape code} +\GFirstB{NUM} + {one or more digits} +} +\caption{The \ulex{} grammar}\label{fig:ulex-syntax} +\end{figure} + +There are a few lexical details of the specification format worth mentioning. First, SML-style comments (\texttt{(* ... *)}) are treated as ignored whitespace anywhere they occur in the specification, \emph{except} in segments of code. The \textit{ID} symbol used in the grammar stands for alpha-numeric-underscore identifiers, starting with an alpha character. The \textit{code} symbol represents a segment of SML code, enclosed in parentheses. Extra parentheses occuring within strings or comments in code need not be balanced. + +A complete example specification appears in Chapter~\ref{ch:example}. + +\section{Directives} + +\subsection{The \kw{arg} directive} + +Specifies an additional curried parameter, appearing after the sourcemap parameter, that will be passed into the \texttt{lex} function and made available to all lexer actions. + +\subsection{The \kw{defs} directive} + +The \kw{defs} directive is used to include a segment of code in the generated lexer module, as in the following example: +\begin{lstlisting} +%defs ( + type lex_result = CalcParseToks.token + fun eof() = CalcParseTokens.EOF + fun helperFn x = (* ... *) +) +\end{lstlisting} +The definitions must at least fulfill the following signature: +\begin{lstlisting} +type lex_result +val eof : unit -> lex_result +\end{lstlisting} +unless EOF rules are specified, in which case only the \texttt{lex\char`\_result} type is needed~(see Section~\ref{sec:eof-rules}). +All semantic actions must yield values of type \texttt{lex\char`\_result}. The \texttt{eof} function is called by \ulex{} when the end of file is reached -- it acts as the semantic action for the empty input string. All definitions given will be in scope for the rule actions (see Section~\ref{sec:ulex-rules}). + +\subsection{The \kw{let} directive} + +Use \kw{let} to define named abbreviations for regular expressions; once bound, an abbreviation can be used in further \kw{let}-bindings or in rules. For example, +\begin{verbatim} +%let digit = [0-9]; +\end{verbatim} +introduces an abbreviation for a regular expression matching a single digit. To use abbreviations, enclose their name in curly braces. For example, an additional \kw{let} definition can be given in terms of \texttt{digit}, +\begin{verbatim} +%let int = {digit}+; +\end{verbatim} +which matches arbitrary-length integers. Note that scoping of let-bindings follows standard SML rules, so that the definition of \texttt{int} must appear after the definition of \texttt{digit}. + +\subsection{The \kw{name} directive} + +The name to use for the generated lexer module is specified using \kw{name}. + +\subsection{The \kw{header} directive} +The \kw{header} directive allows one to specify the lexer's header. +This directive is useful when you want to use a functor for the lexer. +For example, +\begin{lstlisting} +%header (functor ExampleLexFn (Extras : EXTRA_SIG)); +\end{lstlisting}% +Note that it is an error to include both the \kw{header} and \kw{name} directive in the +same lexer specification. + +\subsection{The \kw{states} directive} +\label{sec:start-states} + +It is often helpful for a lexer to have multiple \emph{start states}, which influence +the regular expressions that the lexer will match. +For instance, after seeing a double-quote, the lexer might switch into a \texttt{STRING} +start state that contains only the rules necessary for matching strings, and which +returns to the standard start state after the closing quote. + +Start states are introduced via \kw{states} and are named using standard identifiers. +There is always an implicit, default start state called \texttt{INITIAL}. +Within a rule action, the function \texttt{YYBEGIN} can be applied to the name +of a start state to switch the lexer into that state; see~\ref{sec:ulex-actions} +for details on rule actions. + +Using start states allows the lexer to be defined as a collection of finite-state machines +(one per start state), with arbitrary SML code used to control switching between machines. +This mechanism thus allows non-regular features to be supported by the lexer, such as +nested comments. + +\section{Rules}\label{sec:ulex-rules} + +In general, when \texttt{lex} is applied to an input stream, it will attempt to match a prefix of the input with a regular expression given in one of the rules. When a rule is matched, its \emph{action} (associated code) is evaluated and the result is returned. Hence, all actions must belong to the same type. %, but no restrictions are placed on what that type is. +Rules are specified by an optional list of start states, a regular expression, and the action code. The rule is said to ``belong'' to the start states it lists. If no start states are specified, the rule belongs to \emph{all} defined start states. + +Rule matching is determined by three factors: start state, match length, and rule order. A rule is only considered for matching if it belongs to the lexer's current start state. If multiple rules match an input prefix, the rule matching the longest prefix is selected. In the case of a tie, the rule appearing first in the specification is selected. + +For example, suppose the start state \texttt{FOO} is defined, and the following rules appear, with no other rules belonging to \texttt{FOO}: +\begin{verbatim} + a+ => ( Tokens.as ); + a+b+ => ( Tokens.asbs ); + a+bb* => ( Tokens.asbs ); +\end{verbatim} +If the current start state is not \texttt{FOO}, none of the rules will be considered. Otherwise, on input ``aabbbc'' all three rules are possible matches. The first rule is discarded, since the others match a longer prefix. The second rule is then selected, because it matches the same prefix as the third rule, but appears earlier in the specification. + +\subsection{Regular expression syntax} + +\newcommand{\REX}{\textit{re}} +\begin{figure} +\begin{minipage}[t]{.5\textwidth} +\begin{eqnarray*} + \sem{\T{.}} &=& \Sigma \\ + \sem{\REX_1 \ \REX_2} &=& \sem{\REX_1} \cdot \sem{\REX_2} \\ + \sem{\T{$\sim$} \ \REX} &=& \left(\bigcup_{i=0}^\infty \Sigma^i\right)\setminus \sem{\REX} \\ + \sem{\REX_1 \ \T{\&} \ \REX_2} &=& \sem{\REX_1} \cap \sem{\REX_2} \\ + \sem{\REX_1 \ \T{|} \ \REX_2} &=& \sem{\REX_1} \cup \sem{\REX_2} \\ + \sem{\REX \ \T{?}} &=& \sem{\REX} \cup \{ \epsilon \} +\end{eqnarray*} +\end{minipage}\begin{minipage}[t]{.5\textwidth} +\begin{eqnarray*} + \sem{\REX \ \T{*}} &=& \bigcup_{i=0}^\infty \sem{\REX}^i \\ + \sem{\REX \ \T{+}} &=& \bigcup_{i=1}^\infty \sem{\REX}^i \\ + \sem{\REX \ \T{\{} n \T{\}}} &=& \sem{\REX}^n \\ + \sem{\REX \ \T{\{} n \T{, } m \T{\}}} &=& \bigcup_{i=n}^m \sem{\REX}^i +\end{eqnarray*} +\end{minipage} +\caption{Semantics for regular expressions}\label{ulex-re-semantics} +\end{figure} + +The syntax of regular expressions is given in Figure~\ref{fig:ulex-syntax}; constructs are listed in precedence order, from most tightly-binding to least. Escape codes are the same as in SML, but also include \texttt{$\backslash$uxxxx} and \texttt{$\backslash$Uxxxxxxxx}, where \texttt{xxxx} represents a hexidecimal number which in turn represents a Unicode symbol. The specification format itself freely accepts Unicode characters, and they may be used within a quoted string, or by themselves. + +The semantics for \ulex{} regular expressions are shown in Figure~\ref{ulex-re-semantics}; they are standard. Some examples: +\[ +\begin{array}{rcl} +\texttt{0 | 1 | 2 | 3} & \textit{denotes} & + \{ \texttt{0}, \texttt{1}, \texttt{2}, \texttt{3} \} \\ +\texttt{[0123]} & \textit{denotes} & + \{ \texttt{0}, \texttt{1}, \texttt{2}, \texttt{3} \} \\ +\texttt{0123} & \textit{denotes} & + \{ \texttt{0123} \} \\ +\texttt{0*} & \textit{denotes} & + \{ \epsilon, \texttt{0}, \texttt{00}, \dots \} \\ +\texttt{00*} & \textit{denotes} & + \{ \texttt{0}, \texttt{00}, \dots \} \\ +\texttt{0+} & \textit{denotes} & + \{ \texttt{0}, \texttt{00}, \dots \} \\ +\texttt{[0-9]\{3\}} & \textit{denotes} & + \{ \texttt{000}, \texttt{001}, \texttt{002}, \dots, \texttt{999} \} \\ +\texttt{0* \& (..)*} & \textit{denotes} & + \{ \epsilon, \texttt{00}, \texttt{0000}, \dots \} \\ +\texttt{\^{ }(abc)} & \textit{denotes} & + \Sigma^* \setminus \{ \texttt{abc} \} \\ +\texttt{[\^{ }abc]} & \textit{denotes} & + \Sigma \setminus \{ \texttt{a}, \texttt{b}, \texttt{c} \} +\end{array} +\] + +\subsection{EOF rules}\label{sec:eof-rules} + +It is sometimes useful for the behavior of a lexer when it reaches the end-of-file to change +depending on the current start state. +Normally, there is a single user-defined \texttt{eof} function that defines EOF behavior, but +EOF rules can be used to be more selective, as in the following example: +\begin{verbatim} + <> => ( Tok.EOF ); + <> => ( print "Error: unclosed comment"; + Tok.EOF ); +\end{verbatim} +Other than the special \T{<>} symbol, EOF rules work exactly like normal rules. + +Note that if you define any EOF rules, then you must define EOF rules for all states; +otherwise, your scanner may generate a \texttt{Match} exception on EOF. +Furthermore, if you define EOF rules, you do not need a definition of the \texttt{eof} +function. + +\subsection{Actions}\label{sec:ulex-actions} + +\newcommand{\itemtt}[2]{\item[{\normalfont\textbf{\texttt{val}} \texttt{#1} \texttt{:} \texttt{#2}}]\mbox{}\\} + +Actions are arbitrary SML code enclosed in parentheses. The following names are in scope: +\begin{description} + \itemtt{YYBEGIN}{yystart\char`\_state -> unit} + This function changes the start state to its argument. + \itemtt{yysetStrm}{ULexBuffer.stream -> unit} + This function changes the current input source to its argument. + The functions \texttt{yystreamify}, \texttt{yystreamifyInstream} and \texttt{yystreamifyReader} + can be used to construct the stream; they work identically to the corresponding functions + described in Section~\ref{sec:ulex-code} + \itemtt{yytext}{string} + The matched text as a string. + \itemtt{yysubstr}{substring} + The matched text as a substring (avoids copying). + \itemtt{yyunicode}{UTF8.wchar list} + The matched text as a list of Unicode wide characters (\ie{}, words). + \itemtt{continue}{unit -> lex\char`\_result} + This function recursively calls the lexer on the input following the matched prefix, and returns its result. + The span for the resulting token begins at the left border of the match that calls \texttt{continue}. + \itemtt{skip}{unit -> lex\char`\_result} + This function is identical to \texttt{continue}, except that it moves forward the left marker + for the span of the returned token. + For example, \texttt{skip} can be used to skip preceding whitespace. + \itemtt{yysm}{AntlrSourcePos.sourcemap} + the sourcemap for the lexer, to be used with the functions in the \texttt{AntlrSourcePos} module. + \itemtt{yypos}{ref int} + the input character position of the left border of the matched RE, starting from 0. + \itemtt{yylineno}{ref int} + The current line number, starting from 1. + Note that lines are counted using the Unix line-ending convention. + \itemtt{yycolno}{ref int} + The current column number, starting from 1. +\end{description}% +Futhermore, any name bound in the \kw{\%defs} section is in scope in the actions. + +\section{Using the generated code}\label{sec:ulex-code} + +The generated lexer module has a signature that includes the following specifications: +\begin{lstlisting} +type prestrm +type strm = prestrm * start_state + +val streamify : (unit -> string) -> strm +val streamifyReader : (char, 'a) StringCvt.reader -> 'a -> strm +val streamifyInstream : TextIO.instream -> strm + +val lex : AntlrStreamPos.sourcemap -> strm + -> lex_result * AntlrStreamPos.span * strm +\end{lstlisting}% +where \texttt{lex\char`\_result} is the result type of the lexer actions, and \texttt{start\char`\_state} is an algebraic datatype with nullary constructors for each defined start state. +Note that \texttt{lex\char`\_result} must be defined as a type using the \kw{defs} directive. +In this interface, lexer start states are conceptually part of the input stream; thus, from an external viewpoint, +start states can be ignored. +It is, however, sometimes helpful to control the lexer start state externally, allowing contextual +information to influence the lexer. +This is why the \texttt{strm} type includes a concrete \texttt{start\char`\_state} component. + +Note that the \texttt{AntlrStreamPos} module is part of the \texttt{ml-lpt-lib} library described in Chapter~\ref{ch:ml-lpt-lib}. +An \texttt{AntlrStreamPos.sourcemap} value, combined with an \texttt{AntlrStreamPos.pos} value, compactly represents position information (line number, column number, and so on). +An \texttt{AntlrStreamPos.span} is a pair of \texttt{pos} values that specify the character position of the +leftmost (first) character of the scanned token and the position of the +rightmost (last) character in the token (respectively). +By default, the span will cover the entire sequence of characters consumed by a call to the lexer, but one may +use the \texttt{skip} function (see Section~\ref{sec:ulex-actions}) to ignore leading whitespace, +\textit{etc.}\ when computing the span. +Code to compute the span is generated automatically by \texttt{ml-ulex}. + +\section{\texttt{ml-lex} compatibility}\label{sec:lex-compat} + +Running \ulex{} with the \texttt{--ml-lex-mode} option will cause it to process its input file using the ML-Lex format, and interpret the actions in a ML-Lex-compatible way. The compatibility extends to the bugs in ML-Lex, so in particular \texttt{yylineno} starts at 2 in \texttt{--ml-lex-mode}. diff --git a/ml-lpt/gen/Makefile b/ml-lpt/gen/Makefile new file mode 100644 index 0000000..806d4d1 --- /dev/null +++ b/ml-lpt/gen/Makefile @@ -0,0 +1,24 @@ +# Makefile +# +# COPYRIGHT (c) 2009 The Fellowship of SML/NJ (http://www.smlnj.org) +# All rights reserved. +# + +SHELL = /bin/sh + +MLTON = mlton +MLTON_FLAGS = + +TARGET = gen-template-struct + +SOURCES = gen-template-struct.mlb \ + gen-template-struct.sml \ + mlton-main.sml + +$(TARGET): $(SOURCES) + $(MLTON) -output $(TARGET) $(MLTON_FLAGS) $(TARGET).mlb + +.PHONEY: clean +clean: + rm -f $(TARGET) + diff --git a/ml-lpt/gen/README b/ml-lpt/gen/README new file mode 100644 index 0000000..6301ab4 --- /dev/null +++ b/ml-lpt/gen/README @@ -0,0 +1,4 @@ +This directory contains a program for generating the templates +as SML modules. This is required to build the system on MLton. +The SML/NJ build process reads the template files directly. + diff --git a/ml-lpt/gen/gen-template-struct.mlb b/ml-lpt/gen/gen-template-struct.mlb new file mode 100644 index 0000000..e84478f --- /dev/null +++ b/ml-lpt/gen/gen-template-struct.mlb @@ -0,0 +1,15 @@ +(* gen-template-struct.mlb + * + * COPYRIGHT (c) 2009 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +local + + $(SML_LIB)/basis/basis.mlb + + gen-template-struct.sml + +in + mlton-main.sml +end diff --git a/ml-lpt/gen/gen-template-struct.sml b/ml-lpt/gen/gen-template-struct.sml new file mode 100644 index 0000000..7ed39b0 --- /dev/null +++ b/ml-lpt/gen/gen-template-struct.sml @@ -0,0 +1,102 @@ +(* gen-template-struct.sml + * + * COPYRIGHT (c) 2009 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This is a helper program for converting the template files into SML + * structures. The usage is: + * + * gen-template-struct struct-name template-file output-file + * + * where struct-name is the name of the structure to be generated, + * template-file is the source file for the string, and output-file + * is the generated file. The generated structure will have the + * following signature: + * + * structure : sig + * val template : ExpandFile.template + * end + *) + +structure Gen = + struct + + structure TIO = TextIO + + fun header {fileName, structName} = String.concat [ + "(* ", fileName, "\n", + " *\n\ + \ * COPYRIGHT (c) 2009 The Fellowship of SML/NJ (http://www.smlnj.org)\n\ + \ * All rights reserved.\n\ + \ *\n\ + \ * !!! WARNING: this file is generated; do not edit !!!\n\ + \ *)\n\ + \\n\ + \structure ", structName, " : sig val template : ExpandFile.template end =\n\ + \ struct\n\ + \\n\ + \ val template = ExpandFile.mkTemplateFromList [" + ] + + val tail = "\ + \\n\ + \ ]\n\ + \\n\ + \ end\n\ + \" + + fun gen {outputFile, structName, templateFile} = let + val src = TIO.openIn templateFile + handle ex => ( + TIO.output(TIO.stdErr, concat[ + "Error: unable to open template file \"", + templateFile, "\"\n" + ]); + raise ex) + val dst = TIO.openOut outputFile + handle ex => ( + TIO.output(TIO.stdErr, concat[ + "Error: unable to open output file \"", + outputFile, "\"\n" + ]); + raise ex) + fun done () = (TIO.closeIn src; TIO.closeOut dst) + fun copy first = (case TIO.inputLine src + of NONE => () + | SOME line => ( + if (not first) then TIO.output (dst, ",") else (); + TIO.output (dst, "\n \""); + TIO.output (dst, String.toString line); + TIO.output (dst, "\""); + copy false) + (* end case *)) + in + ((* try *) + TIO.output (dst, header { + fileName=outputFile, structName=structName + }); + copy true; + TIO.output (dst, tail) + handle ex => ( + TIO.output(TIO.stdErr, "Error copying file\n"); + done(); raise ex)); + done() + end + + fun main (_, [structName, templateFile, outputFile]) = ( + (* try *) + (gen { + outputFile = outputFile, + structName = structName, + templateFile = templateFile + }; + OS.Process.success) + handle ex => OS.Process.failure) + | main (cmd, _) = ( + TIO.output(TIO.stdErr, concat [ + "usage: ", cmd, " struct-name template-file output-file\n" + ]); + OS.Process.failure) + + end + diff --git a/ml-lpt/gen/mlton-main.sml b/ml-lpt/gen/mlton-main.sml new file mode 100644 index 0000000..31e0ec3 --- /dev/null +++ b/ml-lpt/gen/mlton-main.sml @@ -0,0 +1,7 @@ +(* mlton-main.sml + * + * COPYRIGHT (c) 2009 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +val _ = OS.Process.exit (Gen.main (CommandLine.name (), CommandLine.arguments ())) diff --git a/ml-lpt/lib/.cm/GUID/antlr-lexer-sig.sml b/ml-lpt/lib/.cm/GUID/antlr-lexer-sig.sml new file mode 100644 index 0000000..8093a9c --- /dev/null +++ b/ml-lpt/lib/.cm/GUID/antlr-lexer-sig.sml @@ -0,0 +1 @@ +guid-$/(ml-lpt-lib.cm):antlr-lexer-sig.sml-1714016082.583 diff --git a/ml-lpt/lib/.cm/GUID/antlr-stream-pos.sml b/ml-lpt/lib/.cm/GUID/antlr-stream-pos.sml new file mode 100644 index 0000000..b49a190 --- /dev/null +++ b/ml-lpt/lib/.cm/GUID/antlr-stream-pos.sml @@ -0,0 +1 @@ +guid-$/(ml-lpt-lib.cm):antlr-stream-pos.sml-1714016082.515 diff --git a/ml-lpt/lib/.cm/GUID/antlr-tokens-sig.sml b/ml-lpt/lib/.cm/GUID/antlr-tokens-sig.sml new file mode 100644 index 0000000..96aac21 --- /dev/null +++ b/ml-lpt/lib/.cm/GUID/antlr-tokens-sig.sml @@ -0,0 +1 @@ +guid-$/(ml-lpt-lib.cm):antlr-tokens-sig.sml-1714016082.512 diff --git a/ml-lpt/lib/.cm/GUID/ebnf.sml b/ml-lpt/lib/.cm/GUID/ebnf.sml new file mode 100644 index 0000000..bef4c9a --- /dev/null +++ b/ml-lpt/lib/.cm/GUID/ebnf.sml @@ -0,0 +1 @@ +guid-$/(ml-lpt-lib.cm):ebnf.sml-1714016082.549 diff --git a/ml-lpt/lib/.cm/GUID/err-handler.sml b/ml-lpt/lib/.cm/GUID/err-handler.sml new file mode 100644 index 0000000..9609887 --- /dev/null +++ b/ml-lpt/lib/.cm/GUID/err-handler.sml @@ -0,0 +1 @@ +guid-$/(ml-lpt-lib.cm):err-handler.sml-1714016082.615 diff --git a/ml-lpt/lib/.cm/GUID/repair.sml b/ml-lpt/lib/.cm/GUID/repair.sml new file mode 100644 index 0000000..c36c92c --- /dev/null +++ b/ml-lpt/lib/.cm/GUID/repair.sml @@ -0,0 +1 @@ +guid-$/(ml-lpt-lib.cm):repair.sml-1714016082.562 diff --git a/ml-lpt/lib/.cm/GUID/ulex-buffer.sml b/ml-lpt/lib/.cm/GUID/ulex-buffer.sml new file mode 100644 index 0000000..f80158b --- /dev/null +++ b/ml-lpt/lib/.cm/GUID/ulex-buffer.sml @@ -0,0 +1 @@ +guid-$/(ml-lpt-lib.cm):ulex-buffer.sml-1714016082.713 diff --git a/ml-lpt/lib/.cm/GUID/wrapped-strm.sml b/ml-lpt/lib/.cm/GUID/wrapped-strm.sml new file mode 100644 index 0000000..3cf1825 --- /dev/null +++ b/ml-lpt/lib/.cm/GUID/wrapped-strm.sml @@ -0,0 +1 @@ +guid-$/(ml-lpt-lib.cm):wrapped-strm.sml-1714016082.586 diff --git a/ml-lpt/lib/.cm/SKEL/antlr-lexer-sig.sml b/ml-lpt/lib/.cm/SKEL/antlr-lexer-sig.sml new file mode 100644 index 0000000..a5960af --- /dev/null +++ b/ml-lpt/lib/.cm/SKEL/antlr-lexer-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"AntlrStreamPos"ac"ANTLR_LEXER"h0 \ No newline at end of file diff --git a/ml-lpt/lib/.cm/SKEL/antlr-stream-pos.sml b/ml-lpt/lib/.cm/SKEL/antlr-stream-pos.sml new file mode 100644 index 0000000..522433b --- /dev/null +++ b/ml-lpt/lib/.cm/SKEL/antlr-stream-pos.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d3f2Int"d"String"aLPT_STREAM_POS"h1aFilePos"gp1c"INTEGER"ad"AntlrStreamPos"jh1a(gp1gp1 \ No newline at end of file diff --git a/ml-lpt/lib/.cm/SKEL/antlr-tokens-sig.sml b/ml-lpt/lib/.cm/SKEL/antlr-tokens-sig.sml new file mode 100644 index 0000000..5a2fb5a --- /dev/null +++ b/ml-lpt/lib/.cm/SKEL/antlr-tokens-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2aANTLR_TOKENS"h0ac"ANTLR_TOKENS_WITH_CHANGES"h1egp1 \ No newline at end of file diff --git a/ml-lpt/lib/.cm/SKEL/ebnf.sml b/ml-lpt/lib/.cm/SKEL/ebnf.sml new file mode 100644 index 0000000..4150460 --- /dev/null +++ b/ml-lpt/lib/.cm/SKEL/ebnf.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"AntlrStreamPos"ae"AntlrEBNF"i2aS"0f2%d"List") \ No newline at end of file diff --git a/ml-lpt/lib/.cm/SKEL/err-handler.sml b/ml-lpt/lib/.cm/SKEL/err-handler.sml new file mode 100644 index 0000000..9c88519 --- /dev/null +++ b/ml-lpt/lib/.cm/SKEL/err-handler.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ae"AntlrErrHandler"i3aTok"gp1c"ANTLR_TOKENS"aLex"gp1c"ANTLR_LEXER"f7d"AntlrStreamPos"d"SMLofNJ"C/d"List"d"Int"AntlrRepair"Njh2ad"AR"gp13ad"WS"jh2agp1a/gp1/gp1e"AntlrWrappedStream"h0 \ No newline at end of file diff --git a/ml-lpt/lib/.cm/SKEL/repair.sml b/ml-lpt/lib/.cm/SKEL/repair.sml new file mode 100644 index 0000000..a9f4573 --- /dev/null +++ b/ml-lpt/lib/.cm/SKEL/repair.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f3d"AntlrStreamPos"d"List"d"String"ad"AntlrRepair"j05 \ No newline at end of file diff --git a/ml-lpt/lib/.cm/SKEL/ulex-buffer.sml b/ml-lpt/lib/.cm/SKEL/ulex-buffer.sml new file mode 100644 index 0000000..91f5d4b --- /dev/null +++ b/ml-lpt/lib/.cm/SKEL/ulex-buffer.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f4d"AntlrStreamPos"d"Char"d"String"d"Substring"ad"ULexBuffer"jh1ad"W"gp1d"Word"h0 \ No newline at end of file diff --git a/ml-lpt/lib/.cm/SKEL/wrapped-strm.sml b/ml-lpt/lib/.cm/SKEL/wrapped-strm.sml new file mode 100644 index 0000000..d5aad50 --- /dev/null +++ b/ml-lpt/lib/.cm/SKEL/wrapped-strm.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ae"AntlrWrappedStream"i3aTok"gp1c"ANTLR_TOKENS"aLex"gp1c"ANTLR_LEXER"f5Cd"AntlrStreamPos"2Int"AntlrRepair"Njh2egp1%f3d"ListPair"d"String"h0 \ No newline at end of file diff --git a/ml-lpt/lib/.cm/amd64-unix/antlr-lexer-sig.sml b/ml-lpt/lib/.cm/amd64-unix/antlr-lexer-sig.sml new file mode 100644 index 0000000..436747d Binary files /dev/null and b/ml-lpt/lib/.cm/amd64-unix/antlr-lexer-sig.sml differ diff --git a/ml-lpt/lib/.cm/amd64-unix/antlr-stream-pos.sml b/ml-lpt/lib/.cm/amd64-unix/antlr-stream-pos.sml new file mode 100644 index 0000000..8171dde Binary files /dev/null and b/ml-lpt/lib/.cm/amd64-unix/antlr-stream-pos.sml differ diff --git a/ml-lpt/lib/.cm/amd64-unix/antlr-tokens-sig.sml b/ml-lpt/lib/.cm/amd64-unix/antlr-tokens-sig.sml new file mode 100644 index 0000000..4e1139e Binary files /dev/null and b/ml-lpt/lib/.cm/amd64-unix/antlr-tokens-sig.sml differ diff --git a/ml-lpt/lib/.cm/amd64-unix/ebnf.sml b/ml-lpt/lib/.cm/amd64-unix/ebnf.sml new file mode 100644 index 0000000..281a516 Binary files /dev/null and b/ml-lpt/lib/.cm/amd64-unix/ebnf.sml differ diff --git a/ml-lpt/lib/.cm/amd64-unix/err-handler.sml b/ml-lpt/lib/.cm/amd64-unix/err-handler.sml new file mode 100644 index 0000000..6096019 Binary files /dev/null and b/ml-lpt/lib/.cm/amd64-unix/err-handler.sml differ diff --git a/ml-lpt/lib/.cm/amd64-unix/repair.sml b/ml-lpt/lib/.cm/amd64-unix/repair.sml new file mode 100644 index 0000000..40eeeeb Binary files /dev/null and b/ml-lpt/lib/.cm/amd64-unix/repair.sml differ diff --git a/ml-lpt/lib/.cm/amd64-unix/ulex-buffer.sml b/ml-lpt/lib/.cm/amd64-unix/ulex-buffer.sml new file mode 100644 index 0000000..96565f7 Binary files /dev/null and b/ml-lpt/lib/.cm/amd64-unix/ulex-buffer.sml differ diff --git a/ml-lpt/lib/.cm/amd64-unix/wrapped-strm.sml b/ml-lpt/lib/.cm/amd64-unix/wrapped-strm.sml new file mode 100644 index 0000000..a514355 Binary files /dev/null and b/ml-lpt/lib/.cm/amd64-unix/wrapped-strm.sml differ diff --git a/ml-lpt/lib/antlr-lexer-sig.sml b/ml-lpt/lib/antlr-lexer-sig.sml new file mode 100644 index 0000000..4496061 --- /dev/null +++ b/ml-lpt/lib/antlr-lexer-sig.sml @@ -0,0 +1,19 @@ +(* antlr-lexer-sig.sml + * + * COPYRIGHT (c) 2006 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (http://www.cs.uchicago.edu/~adrassi) + * All rights reserved. + * + * Signature for the lexer argument to parser functors generated + * by ml-antlr. + *) + +signature ANTLR_LEXER = sig + + type strm + type pos = AntlrStreamPos.pos + + val getPos : strm -> pos + +end \ No newline at end of file diff --git a/ml-lpt/lib/antlr-stream-pos.sml b/ml-lpt/lib/antlr-stream-pos.sml new file mode 100644 index 0000000..2536bf9 --- /dev/null +++ b/ml-lpt/lib/antlr-stream-pos.sml @@ -0,0 +1,131 @@ +(* stream-pos.sml + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Authors: + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (http://www.cs.uchicago.edu/~adrassi) + * + * Very simple position tracking and source maps for ml-ulex/ml-antlr + *) + +signature LPT_STREAM_POS = + sig + + (* the representation of file positions *) + structure FilePos : INTEGER + + type pos = FilePos.int + type span = pos * pos + type sourceloc = { fileName : string option, lineNo : int, colNo : int } + type sourcemap + + exception PosMustIncrease + + (* the result of moving forward an integer number of characters *) + val forward : pos * int -> pos + + (* create an anonymous sourcemap (i.e., one with no associated file) *) + val mkSourcemap : unit -> sourcemap + + (* create a sourcemap for the specified file *) + val mkSourcemap' : string -> sourcemap + + val same : sourcemap * sourcemap -> bool + + (* log a new line occurence *) + val markNewLine : sourcemap -> pos -> unit + + (* resychronize to a full source location *) + val resynch : sourcemap -> pos * sourceloc -> unit + + val sourceLoc : sourcemap -> pos -> sourceloc + val fileName : sourcemap -> pos -> string option + val lineNo : sourcemap -> pos -> int + val colNo : sourcemap -> pos -> int + val toString : sourcemap -> pos -> string + val spanToString : sourcemap -> span -> string + + end; + +structure AntlrStreamPos : LPT_STREAM_POS = + struct + + structure FilePos = Int + + type pos = FilePos.int + type span = pos * pos + type sourceloc = { fileName : string option, lineNo : int, colNo : int } + + type sourcemap = (sourceloc * pos) list ref + + exception PosMustIncrease + + fun forward (p, i) = FilePos.+(p, FilePos.fromInt i) + + fun mkSrcMap (fileOpt) : sourcemap = ref [ + ({fileName = fileOpt, lineNo = 1, colNo = 0}, ~1) + ] + + fun mkSourcemap () = mkSrcMap NONE + fun mkSourcemap' fname = mkSrcMap (SOME fname) + + fun same (sm1 : sourcemap, sm2) = (sm1 = sm2) + + fun markNewLine sm (newPos : pos) = let + val ({fileName, lineNo, colNo}, pos) = hd (!sm) + in + if pos < newPos then + sm := ({ fileName = fileName, + lineNo = lineNo + 1, + colNo = 0}, + newPos)::(!sm) + else () (* raise PosMustIncrease *) + end + + fun resynch sm (newPos : pos, sourceLoc) = let + val (_, pos) = hd (!sm) + in + (* if pos < newPos then *) + sm := (sourceLoc, newPos)::(!sm) + (* else raise PosMustIncrease *) + end + + fun findLB ((loc, pos)::sm, pos' : pos) = + if pos <= pos' then (loc, pos) + else findLB(sm, pos') + | findLB _ = raise Fail "impossible" + + fun sourceLoc sm pos = let + val ({fileName, lineNo, colNo}, anchor) = findLB(!sm, pos) + in { + fileName = fileName, + lineNo = lineNo, + colNo = colNo + (pos - anchor) + } end + fun fileName sm pos = #fileName (sourceLoc sm pos) + fun lineNo sm pos = #lineNo (sourceLoc sm pos) + fun colNo sm pos = #colNo (sourceLoc sm pos) + + fun toString sm pos = String.concat [ + "[", case fileName sm pos + of NONE => "" + | SOME f => f ^ ":", + Int.toString (lineNo sm pos), ".", + Int.toString (colNo sm pos), "]"] + + fun spanToString sm (pos1, pos2) = + if lineNo sm pos1 = lineNo sm pos2 andalso + colNo sm pos1 = colNo sm pos2 + then toString sm pos1 + else String.concat [ + "[", case fileName sm pos1 + of NONE => "" + | SOME f => f ^ ":", + Int.toString (lineNo sm pos1), ".", + Int.toString (colNo sm pos1), "-", + Int.toString (lineNo sm pos2), ".", + Int.toString (colNo sm pos2), "]"] + + end diff --git a/ml-lpt/lib/antlr-tokens-sig.sml b/ml-lpt/lib/antlr-tokens-sig.sml new file mode 100644 index 0000000..6b5517e --- /dev/null +++ b/ml-lpt/lib/antlr-tokens-sig.sml @@ -0,0 +1,62 @@ +(* antlr-tokens-sig.sml + * + * COPYRIGHT (c) 2006-2015 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (http://www.cs.uchicago.edu/~adrassi) + * All rights reserved. + * + * Signature for tokens module generated by ml-antlr + *) + +signature ANTLR_TOKENS = + sig + + type token + + (* return a string representation of a token that is suitable for error messages *) + val toString : token -> string + + (* a list of all the tokens in the grammar *) + val allToks : token list + + (* is a token marked as a keyword? Error repairs that do not involve keywords + * are preferred. + *) + val isKW : token -> bool + + (* is a token the end-of-file marker *) + val isEOF : token -> bool + +end + +(* an extension of the TOKENS signature for when the grammar specifies + * preferred insertions (%prefer) and substitutions (%change). + *) +signature ANTLR_TOKENS_WITH_CHANGES = + sig + + include ANTLR_TOKENS + + (* an element in a stream of preferred changes. An item + * + * CHANGE(strm, n, new, next) + * + * has components + * s the underlying token stream after consuming the old + * tokens. + * n the number of old tokens removed + * new the list of new tokens to replace the old tokens + * next the function for getting the next change in the stream + * of preferred changes. + *) + datatype 'strm changes + = CHANGE of 'strm * int * token list * ('strm -> 'strm changes) + | NOCHANGE; + + (* given a function for reading tokens from the token stream, return + * a function for getting the first change in the preferred changes + * stream. + *) + val changes : ('strm -> token * 'strm) -> 'strm -> 'strm changes + + end diff --git a/ml-lpt/lib/ebnf.sml b/ml-lpt/lib/ebnf.sml new file mode 100644 index 0000000..b7832fd --- /dev/null +++ b/ml-lpt/lib/ebnf.sml @@ -0,0 +1,50 @@ +(* ebnf.sml + * + * COPYRIGHT (c) 2006 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (http://www.cs.uchicago.edu/~adrassi) + * All rights reserved. + * + * EBNF combinators used for ml-antlr. + * + * NOTE: this functor is no longer used; instead we have inlined its code into the + * body of the generated parser module. It remains in the ml-lpt library to + * preserve backward compatibility with parsers that were generated with an older + * version of the ml-antlr tool. [JHR; 2015-09-22] + *) + +functor AntlrEBNF (S : sig + type strm + val getSpan : strm -> AntlrStreamPos.span + end) = +struct + + fun optional (pred, parse, strm) = + if pred strm + then let + val (y, span, strm') = parse strm + in + (SOME y, span, strm') + end + else (NONE, S.getSpan strm, strm) + + fun closure (pred, parse, strm) = let + fun iter (strm, (left, right), ys) = + if pred strm + then let + val (y, (_, right'), strm') = parse strm + in iter (strm', (left, right'), y::ys) + end + else (List.rev ys, (left, right), strm) + in + iter (strm, S.getSpan strm, []) + end + + fun posclos (pred, parse, strm) = let + val (y, (left, _), strm') = parse strm + val (ys, (_, right), strm'') = closure (pred, parse, strm') + in + (y::ys, (left, right), strm'') + end + +end \ No newline at end of file diff --git a/ml-lpt/lib/err-handler.sml b/ml-lpt/lib/err-handler.sml new file mode 100644 index 0000000..4026cd5 --- /dev/null +++ b/ml-lpt/lib/err-handler.sml @@ -0,0 +1,401 @@ +(* err-handler.sml + * + * COPYRIGHT (c) 2006 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (http://www.cs.uchicago.edu/~adrassi) + * All rights reserved. + * + * Error repair for ml-antlr + *) + +functor AntlrErrHandler ( + structure Tok : ANTLR_TOKENS + structure Lex : ANTLR_LEXER + ) : sig + + exception ParseError + + type 'a err_handler + type wstream + type lexer = Lex.strm -> Tok.token * AntlrStreamPos.span * Lex.strm + type 'a wreader = wstream -> 'a * AntlrStreamPos.span * wstream + + val mkErrHandler : { get : unit -> 'a, put : 'a -> unit } + -> 'a err_handler * Tok.token wreader + val launch : 'a err_handler * lexer * 'b wreader * bool -> + Lex.strm -> ('b option * Lex.strm * Tok.token AntlrRepair.repair list) + val failure : 'a err_handler -> 'b + + val getPos : wstream -> AntlrStreamPos.pos + val getSpan : wstream -> AntlrStreamPos.span + + val whileDisabled : 'b err_handler -> (unit -> 'a) -> 'a + +(* + val wrap : err_handler -> (R.strm -> ('a * R.strm)) -> R.strm -> ('a * R.strm) + val tryProds : 'b err_handler -> (R.strm -> 'a) list -> R.strm -> 'a +*) + +end = struct + + exception ParseError + exception Unrepairable + + structure AR = AntlrRepair + + structure WS = AntlrWrappedStream( + structure Tok = Tok + structure Lex = Lex) + + type wstream = WS.wstream + val getPos = WS.getPos + val getSpan = WS.getSpan + + type lexer = Lex.strm -> Tok.token * AntlrStreamPos.span * Lex.strm + type 'a wreader = wstream -> 'a * AntlrStreamPos.span * wstream + type 'a checkpoint = 'a * unit SMLofNJ.Cont.cont * wstream + + datatype 'a err_handler = EH of { + checkpoints : 'a checkpoint list ref, + maxPos : WS.tok_pos ref, + cont : unit SMLofNJ.Cont.cont option ref, + get : unit -> 'a, + put : 'a -> unit, + rs : WS.repair_state, + enabled : bool ref + } + + fun getGet (EH {get, ...}) = get + fun getPut (EH {put, ...}) = put + fun getRS (EH {rs, ...}) = rs + + fun getCont (EH {cont, ...}) = !cont + fun setCont (EH {cont, ...}, new) = cont := new + + fun getCheckpoints (EH {checkpoints, ...}) = !checkpoints + fun setCheckpoints (EH {checkpoints, ...}, new) = checkpoints := new + + fun getMaxPos (EH {maxPos, ...}) = !maxPos + fun setMaxPos (EH {maxPos, ...}, new) = maxPos := new + + fun getEnabled (EH {enabled, ...}) = !enabled + fun setEnabled (EH {enabled, ...}, n) = enabled := n +(* + fun getRepairs (EH {repairs, ...}) = !repairs + fun addRepair (EH {repairs, ...}, n) = repairs := (!repairs) @ [n] *) + + fun mkErrHandler {get, put} = let + val cont = ref NONE + val checkpoints = ref [] + val maxPos = ref ~1 + val eh = EH { + cont = cont, checkpoints = checkpoints, + maxPos = maxPos, get = get, put = put, + rs = WS.mkRepairState(), enabled = ref true + } + fun lex ws = (case !cont + of SOME _ => ( + maxPos := Int.max (WS.getTokPos ws, !maxPos); + WS.get1 ws) + | NONE => if WS.getTokPos ws > !maxPos + then let + val () = SMLofNJ.Cont.callcc (fn k => ( + checkpoints := (get(), k, ws) :: !checkpoints; + maxPos := WS.getTokPos ws)) + in + WS.get1 ws + end + else WS.get1 ws + (* end case *)) + in + (eh, lex) + end + + val isEOF = Tok.isEOF o #1 o WS.get1 + + val minAdvance = 1 + + fun restoreCheckpoint (eh, (x, cont, strm)) = ( + getPut eh x; (* restore refcell data for checkpoint *) + setMaxPos (eh, WS.getTokPos strm); + SMLofNJ.Cont.throw cont ()) + + fun tryRepair (eh, c) = let + val oldMax = getMaxPos eh + val firstTime = ref true + val () = SMLofNJ.Cont.callcc (fn k => (setCont (eh, SOME k))) + in + if !firstTime + then ( (* first time through, try the repair *) + firstTime := false; restoreCheckpoint (eh, c)) + else ( (* second time through, return the distance achieved *) + setCont (eh, NONE); getMaxPos eh - oldMax) + end + + local + + val allToks = List.filter (not o Tok.isEOF) Tok.allToks + fun mkDelete strm = (WS.getPos strm, AR.Delete [#1 (WS.get1 strm)]) + fun mkInsert strm tok = (WS.getPos strm, AR.Insert [tok]) + fun mkSubst strm tok = (WS.getPos strm, AR.Subst { old = [#1 (WS.get1 strm)], new = [tok] }) + fun allRepairs strm = + (if isEOF strm then [] else [mkDelete strm]) @ + map (mkInsert strm) allToks @ + (if isEOF strm then [] else map (mkSubst strm) allToks) + + fun involvesKW (_, r) = (case r + of AR.Insert toks => List.exists Tok.isKW toks + | AR.Delete toks => List.exists Tok.isKW toks + | AR.Subst {old,new} => List.exists Tok.isKW (old @ new) + | AR.FailureAt _ => false + (* end case *)) + + in + fun trySingleToken eh = let + val RS = getRS eh + val oldRepairs = WS.getRepairs RS + val oldMax = getMaxPos eh + val oldMaxRepair = WS.maxRepairPos RS + val oldCheckpoints = getCheckpoints eh + fun restoreToErr() = (WS.setRepairs (RS, oldRepairs); setMaxPos (eh, oldMax)) + (* stream value for checkpoint *) + fun strmOf (_, _, strm) = strm + fun setupRepair (r, c::cs) = + WS.setRepairs (RS, WS.addRepair (oldRepairs, WS.getTokPos (strmOf c), r)) + | setupRepair _ = raise Fail "bug" + fun try (_::c::cs, [], best, n) = + if n < 15 andalso WS.getTokPos (strmOf c) > oldMaxRepair + then try (c::cs, allRepairs (strmOf c), best, n+1) + else try ([], [], best, n) + | try (c::cs, r::rs, best, n) = ( + restoreToErr(); setupRepair (r, c::cs); + let val score = tryRepair (eh, c) + - (if involvesKW r then 2 else 0) + + (case #2 r + of AR.Insert _ => ~1 + | AR.Delete _ => 1 + | AR.Subst _ => 0 + | _ => 0) + val oldScore = case best of NONE => 0 + | SOME (_, _, s) => s + in if score > oldScore andalso score > minAdvance + then try (c::cs, rs, SOME (c::cs, r, score), n) + else try (c::cs, rs, best, n) + end) + | try (_, [], SOME (c::cs, r, score), _) = + (setupRepair (r, c::cs); + setCheckpoints (eh, c::cs); + setMaxPos (eh, List.length cs); + restoreCheckpoint (eh, c)) + | try _ = restoreToErr() + val curStrm = strmOf (hd oldCheckpoints) + in if WS.getTokPos curStrm <= WS.maxRepairPos RS then () + else try (oldCheckpoints, allRepairs curStrm, NONE, 1) + end + end + + val maxDel = 50 + + fun tryDeletion eh = let + fun getn (strm, 0, acc) = SOME (rev acc) + | getn (strm, n, acc) = let + val (tok, _, strm') = WS.get1 strm + in + if Tok.isEOF tok then NONE + else getn (strm', n-1, tok::acc) + end + val rs = getRS eh + val oldRepairs = WS.getRepairs rs + val oldMax = getMaxPos eh + val oldRepairMax = WS.maxRepairPos rs + fun restoreToErr() = (WS.setRepairs (rs, oldRepairs); setMaxPos (eh, oldMax)) + (* stream value for checkpoint *) + fun strmOf (_, _, strm) = strm + val cs = getCheckpoints eh + fun tryCS ([], n, max) = () + | tryCS (c::cs, n, max) = + if WS.getTokPos (strmOf c) <= oldRepairMax + orelse oldMax - WS.getTokPos (strmOf c) > maxDel then () + else + (WS.setRepairs (rs, + WS.addRepair (oldRepairs, WS.getTokPos (strmOf c), + (WS.getPos (strmOf c), AR.Delete (valOf (getn (strmOf c, n, [])))))); + setMaxPos (eh, WS.getTokPos (strmOf c)); + if tryRepair (eh, c) > minAdvance + 2 + then (setCheckpoints (eh, c::cs); + restoreCheckpoint (eh, c)) + else (restoreToErr(); tryCS (cs, n+1, max))) + and tryN (n, c::cs, max) = (case getn (strmOf c, n, []) + of NONE => () + | SOME toks => (tryCS (c::cs, n, max); + if n > max then () else tryN (n+1, c::cs, max)) + (* end case *)) + | tryN _ = raise Fail "bug" + in + tryN (1, [hd cs], 5); + tryN (1, cs, maxDel) + end + + fun failure eh = + if getEnabled eh + then (case getCont eh + of NONE => (trySingleToken eh; + tryDeletion eh; + raise Unrepairable) + | SOME k => SMLofNJ.Cont.throw k () + (* end case *)) + else raise ParseError + + fun launch (eh, lex, parse, reqEOF) strm = let + val wstrm = WS.wrap (getRS eh, strm, lex) + in let val (result, _, wstrm') = parse wstrm + val (strm', repairs) = WS.unwrap wstrm' + in + if reqEOF andalso not (isEOF wstrm') then failure eh + else (); + (SOME result, strm', repairs) + end + handle Unrepairable => let + val (_, repairs) = WS.unwrap wstrm + val (tok, (pos, _), _) = (WS.get1 o #3 o hd o getCheckpoints) eh + in (NONE, strm, repairs @ [(pos, AR.FailureAt tok)]) end + end + + fun whileDisabled eh f = let + val oldEnabled = getEnabled eh + in + setEnabled (eh, false); + (f () handle e => (setEnabled (eh, oldEnabled); + raise e)) + before setEnabled (eh, oldEnabled) + end + +(* + fun throwIfEH (eh, t) = + if getEnabled eh then + Option.app (fn k => SMLofNJ.Cont.throw k (SOME t)) (getCont eh) + else () + + fun wrap eh f t = if not (getEnabled eh) then f t else let + val cont_ref : retry_cont option ref = ref NONE + val state = (getGet eh) () + val t' = SMLofNJ.Cont.callcc (fn k => (cont_ref := SOME k; t)) + val retry = (t', valOf (!cont_ref)) + in + getPut eh state; + f t' + handle RepairableError => ( + throwIfEH (eh, t'); + raise JumpOut [retry]) + | JumpOut stack => ( + throwIfEH (eh, t'); + raise JumpOut (retry::stack)) + end + + fun findWindow (stack) = let + val revStack = rev stack + val rightMost = hd revStack + fun TOf (t, _) = t + fun find [] = raise (Fail "BUG: findWindow given an empty stack") + | find [top] = (top, rightMost) + | find (top::stack) = + if R.farEnoughWindow {startAt = TOf top, endAt = TOf rightMost} + then (top, rightMost) + else find stack + in + find revStack + end + + fun tryRepair (eh, cont) t = + (case SMLofNJ.Cont.callcc (fn k => (setCont (eh, SOME k); NONE)) + of NONE => + (* first time through, try the repair *) + SMLofNJ.Cont.throw cont t + | SOME t' => + (* second time through, return the new right-most strm *) + (setCont (eh, NONE); t') + (* end case *)) + + fun primaryRepair (eh, stack) = let + val ((leftT, leftCont), (rightT, rightCont)) = + findWindow stack + val repair = R.chooseRepair { + startAt = leftT, + endAt = rightT, + try = tryRepair (eh, leftCont) + } + in case repair + of SOME {repair, repaired} => + SOME (repair, leftCont, repaired) + | NONE => NONE + end + + fun secondaryRepair (eh, revStack) = let + val stack = rev revStack + val (errStrm, errCont) = hd stack + fun try ((strm, cont), strm', next) = let + val strm'' = tryRepair (eh, cont) strm' + in case (R.tryDeletion {oldStartAt = strm, + startAt = strm', + endAt = strm''}) + of SOME r => SOME (r, cont, strm') + | NONE => next() + end + fun rightRepair (strm, n) = + if n = 0 then NONE + else let + val strm' = R.skip (strm, 1) + in + try (hd stack, strm', fn () => rightRepair (strm', n-1)) + end + fun leftRightRepair (strm, []) = + if R.isEmpty strm then + (addRepair (eh, (R.getPos errStrm, + Repair.FailureAt (#1 (R.get1 errStrm)))); + raise UnrepairableError) + else leftRightRepair (R.skip (strm, 1), stack) + | leftRightRepair (strm, top::stack) = + try (top, strm, fn () => leftRightRepair (strm, stack)) + in case rightRepair (errStrm, 5) + of SOME r => r + | _ => valOf (leftRightRepair (errStrm, [])) + end + + fun repair (eh, stack) = (case primaryRepair (eh, stack) + of SOME r => r + | NONE => secondaryRepair (eh, stack) + (* end case *)) + + fun launch eh f t = let + val (x, _, t') = wrap eh f t + handle JumpOut stack => let + val (r, cont, t') = repair (eh, stack) + in + addRepair (eh, r); + SMLofNJ.Cont.throw cont t' + end + in + throwIfEH (eh, t'); + (SOME x, t', getRepairs eh) + end + handle UnrepairableError => + (NONE, t, getRepairs eh) +*) + +(* + fun tryProds eh prods strm = let + fun try [] = raise RepairableError + | try (prod :: prods) = let + val state = (getGet eh) () + in + whileDisabled eh (fn () => prod strm) + handle _ => + (getPut eh state; + try (prods)) + end + in + try prods + end +*) + +end diff --git a/ml-lpt/lib/ml-lpt-lib.cm b/ml-lpt/lib/ml-lpt-lib.cm new file mode 100644 index 0000000..9e53725 --- /dev/null +++ b/ml-lpt/lib/ml-lpt-lib.cm @@ -0,0 +1,42 @@ +(* ml-lpt-lib.cm + * + * COPYRIGHT (c) 2006 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (http://www.cs.uchicago.edu/~adrassi) + * All rights reserved. + * + * Sources file for ml-lpt lib + *) + +Library + + signature ANTLR_LEXER + signature ANTLR_TOKENS + + structure AntlrRepair + structure AntlrStreamPos + structure ULexBuffer + structure UTF8 (* from smlnj-lib.cm; used in generated scanners *) + + functor AntlrEBNF + functor AntlrErrHandler + +is + + $/basis.cm + $/smlnj-lib.cm + ebnf.sml + err-handler.sml + antlr-lexer-sig.sml + repair.sml + +(* + repairable-strm-sig.sml + repairable-strm.sml +*) + wrapped-strm.sml + + antlr-stream-pos.sml + antlr-tokens-sig.sml + + ulex-buffer.sml diff --git a/ml-lpt/lib/ml-lpt-lib.mlb b/ml-lpt/lib/ml-lpt-lib.mlb new file mode 100644 index 0000000..14413ce --- /dev/null +++ b/ml-lpt/lib/ml-lpt-lib.mlb @@ -0,0 +1,39 @@ +(* ml-lpt-lib.mlb + * + * COPYRIGHT (c) 2008 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (http://www.cs.uchicago.edu/~adrassi) + * All rights reserved. + * + * MLB file for ml-lpt lib + *) + +local + + $(SML_LIB)/basis/basis.mlb + $(SML_LIB)/basis/sml-nj.mlb + $(SML_LIB)/smlnj-lib/Util/smlnj-lib.mlb + + antlr-stream-pos.sml + ebnf.sml + antlr-lexer-sig.sml + antlr-tokens-sig.sml + repair.sml + wrapped-strm.sml + err-handler.sml + ulex-buffer.sml + +in + + signature ANTLR_LEXER + signature ANTLR_TOKENS + + structure AntlrRepair + structure AntlrStreamPos + structure ULexBuffer + structure UTF8 (* from smlnj-lib.cm; used in generated scanners *) + + functor AntlrEBNF + functor AntlrErrHandler + +end diff --git a/ml-lpt/lib/repair.sml b/ml-lpt/lib/repair.sml new file mode 100644 index 0000000..a9debe2 --- /dev/null +++ b/ml-lpt/lib/repair.sml @@ -0,0 +1,81 @@ +(* repair.sml + * + * COPYRIGHT (c) 2006 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (http://www.cs.uchicago.edu/~adrassi) + * All rights reserved. + * + * Representation and pretty-printing of ml-antlr repair actions + *) + +structure AntlrRepair :> sig + + datatype 'tok repair_action + = Insert of 'tok list + | Delete of 'tok list + | Subst of { + old : 'tok list, + new : 'tok list + } + | FailureAt of 'tok + + type 'tok repair = AntlrStreamPos.pos * 'tok repair_action + + val actionToString : ('tok -> string) -> 'tok repair_action -> string + val repairToString : ('tok -> string) -> AntlrStreamPos.sourcemap -> 'tok repair -> string + + (* used to define the context of a repair action *) + datatype add_or_delete = ADD | DEL + + (* return a string representation of the repair action. This version uses the add_or_delete + * information to allow different token names for deletion (more specific) and addition (more + * general). + *) + val actionToString' : (add_or_delete -> 'tok -> string) -> 'tok repair_action -> string + val repairToString' : (add_or_delete -> 'tok -> string) -> AntlrStreamPos.sourcemap -> 'tok repair -> string + + end = struct + + datatype 'a repair_action + = Insert of 'a list + | Delete of 'a list + | Subst of { + old : 'a list, + new : 'a list + } + | FailureAt of 'a + + type 'a repair = AntlrStreamPos.pos * 'a repair_action + + (* used to define the context of a repair action *) + datatype add_or_delete = ADD | DEL + + fun actionToString' tokToString repair = let + val join = String.concatWith " " + in + case repair + of Insert toks => join ("try inserting" :: List.map (tokToString ADD) toks) + | Delete toks => join ("try deleting" :: List.map (tokToString DEL) toks) + | Subst {old, new} => join( + "try substituting" :: List.map (tokToString ADD) new @ + "for" :: List.map (tokToString DEL) old) + | FailureAt tok => "syntax error at " ^ tokToString DEL tok + (* end case *) + end + + fun repairToString' tokToString sm (pos, repair) = + (AntlrStreamPos.toString sm pos ^ ": " ^ actionToString' tokToString repair) + + fun actionToString tokToString = let + fun tok2s _ tok = tokToString tok + in + actionToString' tok2s + end + + fun repairToString tokToString = let + fun tok2s _ tok = tokToString tok + in + repairToString' tok2s + end + + end diff --git a/ml-lpt/lib/ulex-buffer.sml b/ml-lpt/lib/ulex-buffer.sml new file mode 100644 index 0000000..a001d6f --- /dev/null +++ b/ml-lpt/lib/ulex-buffer.sml @@ -0,0 +1,171 @@ +(* ulex-buffer.sml + * + * COPYRIGHT (c) 2006 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (http://www.cs.uchicago.edu/~adrassi) + * All rights reserved. + * + * Forward-chained buffers for lexing + *) + +structure ULexBuffer : sig + + type stream + + exception Incomplete (* raised by getu on an incomplete multi-byte character *) + exception Invalid (* raised by getu on invalid code points *) + + val mkStream : (AntlrStreamPos.pos * (unit -> string)) -> stream + val getc : stream -> (char * stream) option + val getu : stream -> (word * stream) option + val getpos : stream -> AntlrStreamPos.pos + val subtract : stream * stream -> Substring.substring + val eof : stream -> bool + val lastWasNL : stream -> bool + + end = struct + + structure W = Word + + datatype stream = S of (buf * int * bool) + and buf = B of { + data : string, + basePos : AntlrStreamPos.pos, + more : more ref, + input : unit -> string + } + and more = UNKNOWN | YES of buf | NO + + fun mkStream (pos, input) = + (S (B {data = "", basePos = pos, + more = ref UNKNOWN, + input = input}, + 0, true)) + + (* advance the stream to the next block of input *) + fun advance (data, input, basePos, more) = (case !more + of UNKNOWN => (case input() + of "" => (more := NO; NO) + | data' => let + val buf' = B { + data = data', + basePos = AntlrStreamPos.forward (basePos, String.size data), + more = ref UNKNOWN, + input = input + } + in + more := YES buf'; + YES buf' + end + (* end case *)) + | m => m + (* end case *)) + + fun getc (S(buf as B{data, basePos, more, input}, pos, lastWasNL)) = + if pos < String.size data + then let + val c = String.sub (data, pos) + in + SOME (c, S (buf, pos+1, c = #"\n")) + end + else (case advance(data, input, basePos, more) + of NO => NONE + | YES buf' => getc (S (buf', 0, lastWasNL)) + | UNKNOWN => raise Fail "impossible" + (* end case *)) + + exception Incomplete + exception Invalid + +(* NOTE: surrogates (U+D800 to U+DFFF) and values larger than U+10FFFF are + * not valid Unicode values. + *) + (* get the next UTF-8 character represented as a word *) + fun getu (S(buf as B{data, basePos, more, input}, pos, _)) = + if pos < String.size data + then let + val c = W.fromInt(Char.ord(String.sub(data, pos))) + in + if (c < 0w128) + then SOME(c, S(buf, pos+1, c = 0w10)) (* ord #"\n" = 10 *) + else let (* multibyte character *) + fun getByte (S(buf as B{data, basePos, more, input}, pos, _)) = + if pos < String.size data + then let + val c = W.fromInt(Char.ord(String.sub(data, pos))) + in + SOME (c, S (buf, pos+1, false)) + end + else (case advance(data, input, basePos, more) + of NO => NONE + | YES buf' => getByte (S (buf', 0, false)) + | UNKNOWN => raise Fail "impossible" + (* end case *)) + fun getCByte (wc, strm) = (case getByte strm + of NONE => raise Incomplete + | SOME(b, strm') => if (W.andb(0wxc0, b) = 0wx80) + then (W.orb(W.<<(wc, 0w6), W.andb(0wx3f, b)), strm') + else raise Incomplete + (* end case *)) + val strm = S(buf, pos+1, false) + in +(* TODO: should also be checking for over-long sequences *) + if (W.andb(c, 0wxe0) = 0wxc0) + (* 2-byte character *) + then SOME(getCByte (W.andb(0wx1f, c), strm)) + else if (W.andb(c, 0wxf0) = 0wxe0) + (* 3-byte character *) + then let + val (w, strm') = getCByte(getCByte(W.andb(0wx0f, c), strm)) + in + (* check for surrogate halves, which are not valid UTF-8 *) + if (w < 0wxd800) orelse (0wxdfff < w) + then SOME(w, strm') + else raise Invalid + end + else if (W.andb(c, 0wxf8) = 0wxf0) + (* 4-byte character *) + then let + val (w, strm') = + getCByte(getCByte(getCByte(W.andb(0wx07, c), strm))) + in + (* check for too-big values *) + if (w <= 0wx10ffff) + then SOME(w, strm') + else raise Invalid + end + else raise Incomplete + end + end + (* advance buffer *) + else (case advance(data, input, basePos, more) + of NO => NONE + | YES buf' => getu (S(buf', 0, false)) + | UNKNOWN => raise Fail "impossible" + (* end case *)) + + fun getpos (S (B {basePos, ...}, pos, _)) = AntlrStreamPos.forward (basePos, pos) + + fun subtract (new, old) = let + val (S (B {data = ndata, basePos = nbasePos, ...}, npos, _)) = new + val (S (B {data = odata, basePos = obasePos, + more, input}, opos, _)) = old + in + if nbasePos = obasePos then + Substring.substring (ndata, opos, npos-opos) + else case !more + of NO => raise Fail "BUG: ULexBuffer.subtract, but buffers are unrelated" + | UNKNOWN => raise Fail "BUG: ULexBuffer.subtract, but buffers are unrelated" + | YES buf => + Substring.extract ( + Substring.concat [ + Substring.extract (odata, opos, NONE), + subtract (new, S (buf, 0, false))], + 0, NONE) + end + + fun eof s = not (isSome (getc s)) + + fun lastWasNL (S (_, _, lastWasNL)) = lastWasNL + + end diff --git a/ml-lpt/lib/wrapped-strm.sml b/ml-lpt/lib/wrapped-strm.sml new file mode 100644 index 0000000..50ba7fe --- /dev/null +++ b/ml-lpt/lib/wrapped-strm.sml @@ -0,0 +1,110 @@ +(* wrapped-strm.sml + * + * COPYRIGHT (c) 2006 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (http://www.cs.uchicago.edu/~adrassi) + * All rights reserved. + * + * "wrapped" streams, which track the number of tokens read + * and allow "prepending" a sequence of tokens. + *) + +functor AntlrWrappedStream ( + structure Tok : ANTLR_TOKENS + structure Lex : ANTLR_LEXER + ) :> sig + + type tok_pos = Int.int (* position in terms of number of tokens *) + type lexer = Lex.strm -> Tok.token * AntlrStreamPos.span * Lex.strm + + type repairs + val addRepair : repairs * tok_pos * Tok.token AntlrRepair.repair -> repairs + + type repair_state + val mkRepairState : unit -> repair_state + val getRepairs : repair_state -> repairs + val setRepairs : repair_state * repairs -> unit + val maxRepairPos : repair_state -> tok_pos + + type wstream + val wrap : repair_state * Lex.strm * lexer -> wstream + val unwrap : wstream -> Lex.strm * Tok.token AntlrRepair.repair list + + val get1 : wstream -> Tok.token * AntlrStreamPos.span * wstream + val getPos : wstream -> AntlrStreamPos.pos + val getSpan : wstream -> AntlrStreamPos.span + val getTokPos : wstream -> tok_pos + +end = struct + + type tok_pos = Int.int (* position in terms of number of tokens *) + type repair = tok_pos * Tok.token AntlrRepair.repair + type repairs = repair list + type repair_state = repairs ref (* invariant: at most one repair per tok_pos *) + type lexer = Lex.strm -> Tok.token * AntlrStreamPos.span * Lex.strm + + datatype global_state = GS of { + lex : (Lex.strm -> Tok.token * AntlrStreamPos.span * Lex.strm), + repairs : repair_state + } + + datatype wstream = WSTREAM of { + curTok : tok_pos, + strm : Lex.strm, + gs : global_state + } + + fun mkRepairState() = ref [] + fun getRepairs repairs = !repairs + fun setRepairs (repairs, new) = repairs := new + fun maxRepairPos (ref []) = ~1 + | maxRepairPos (ref ((p, _)::_)) = p + + open AntlrRepair + + fun addRepair (rs, pos, r) = + if pos > maxRepairPos (ref rs) then (pos, r)::rs + else raise Fail (String.concat [ + "bug: repairs not monotonic adding at ", + Int.toString pos, " to a max pos of ", + Int.toString (maxRepairPos (ref rs))]) + + fun wrap (repairs, strm, lex) = + WSTREAM {strm = strm, curTok = 0, gs = GS {lex = lex, repairs = repairs}} + fun unwrap (WSTREAM {strm, gs = GS {repairs, ...}, ...}) = + (strm, rev (#2 (ListPair.unzip (!repairs)))) + + fun skip1 lex strm = let + val (_, _, strm') = lex strm + in strm' end + fun get1 (WSTREAM {strm, curTok, gs = gs as GS {lex, repairs}}) = let + fun findRepair [] = NONE + | findRepair ((pos, r)::rs) = if curTok = pos then SOME r + else findRepair rs + in case findRepair (!repairs) + of NONE => let + val (tok, span, strm') = lex strm + in + (tok, span, WSTREAM {strm = strm', curTok = curTok + 1, gs = gs}) + end + | SOME (p, Insert [tok]) => + (tok, (p, p), WSTREAM {strm = strm, curTok = curTok + 1, gs = gs}) + | SOME (p, Delete toks) => let + val strm' = foldl (fn (_, s) => (skip1 lex) s) strm toks + val (tok, span, strm'') = lex strm' + in + (tok, span, WSTREAM {strm = strm'', curTok = curTok + 1, gs = gs}) + end + | SOME (p, Subst {old = [old], new = [new]}) => + (new, (p, p), WSTREAM {strm = skip1 lex strm, curTok = curTok + 1, gs = gs}) + | SOME (p, FailureAt _) => raise Fail "bug: findRepair found FailureAt" + | _ => raise Fail "bug: unimplemented" + end + + (* get position AFTER trimming whitespace *) + fun getPos ws = let val (_, (left, _), _) = get1 ws in left end + fun getSpan ws = (getPos ws, getPos ws) + fun getTokPos (WSTREAM {curTok, ...}) = curTok + + +end diff --git a/ml-lpt/ml-antlr/.cm/GUID/action.sml b/ml-lpt/ml-antlr/.cm/GUID/action.sml new file mode 100644 index 0000000..351e641 --- /dev/null +++ b/ml-lpt/ml-antlr/.cm/GUID/action.sml @@ -0,0 +1 @@ +guid-(sources.cm):action.sml-1714016113.270 diff --git a/ml-lpt/ml-antlr/.cm/GUID/check-grammar.sml b/ml-lpt/ml-antlr/.cm/GUID/check-grammar.sml new file mode 100644 index 0000000..2a33381 --- /dev/null +++ b/ml-lpt/ml-antlr/.cm/GUID/check-grammar.sml @@ -0,0 +1 @@ +guid-(sources.cm):check-grammar.sml-1714016115.077 diff --git a/ml-lpt/ml-antlr/.cm/GUID/compute-predict.sml b/ml-lpt/ml-antlr/.cm/GUID/compute-predict.sml new file mode 100644 index 0000000..fad2424 --- /dev/null +++ b/ml-lpt/ml-antlr/.cm/GUID/compute-predict.sml @@ -0,0 +1 @@ +guid-(sources.cm):compute-predict.sml-1714016115.379 diff --git a/ml-lpt/ml-antlr/.cm/GUID/err.sml b/ml-lpt/ml-antlr/.cm/GUID/err.sml new file mode 100644 index 0000000..66b6ed6 --- /dev/null +++ b/ml-lpt/ml-antlr/.cm/GUID/err.sml @@ -0,0 +1 @@ +guid-(sources.cm):err.sml-1714016113.218 diff --git a/ml-lpt/ml-antlr/.cm/GUID/gla.sml b/ml-lpt/ml-antlr/.cm/GUID/gla.sml new file mode 100644 index 0000000..b3b8d44 --- /dev/null +++ b/ml-lpt/ml-antlr/.cm/GUID/gla.sml @@ -0,0 +1 @@ +guid-(sources.cm):gla.sml-1714016114.274 diff --git a/ml-lpt/ml-antlr/.cm/GUID/item.sml b/ml-lpt/ml-antlr/.cm/GUID/item.sml new file mode 100644 index 0000000..b744498 --- /dev/null +++ b/ml-lpt/ml-antlr/.cm/GUID/item.sml @@ -0,0 +1 @@ +guid-(sources.cm):item.sml-1714016113.948 diff --git a/ml-lpt/ml-antlr/.cm/GUID/llk-spec.sml b/ml-lpt/ml-antlr/.cm/GUID/llk-spec.sml new file mode 100644 index 0000000..7c23392 --- /dev/null +++ b/ml-lpt/ml-antlr/.cm/GUID/llk-spec.sml @@ -0,0 +1 @@ +guid-(sources.cm):llk-spec.sml-1714016113.293 diff --git a/ml-lpt/ml-antlr/.cm/GUID/main.sml b/ml-lpt/ml-antlr/.cm/GUID/main.sml new file mode 100644 index 0000000..0353a1f --- /dev/null +++ b/ml-lpt/ml-antlr/.cm/GUID/main.sml @@ -0,0 +1 @@ +guid-(sources.cm):main.sml-1714016115.436 diff --git a/ml-lpt/ml-antlr/.cm/GUID/nonterm.sml b/ml-lpt/ml-antlr/.cm/GUID/nonterm.sml new file mode 100644 index 0000000..b9e682e --- /dev/null +++ b/ml-lpt/ml-antlr/.cm/GUID/nonterm.sml @@ -0,0 +1 @@ +guid-(sources.cm):nonterm.sml-1714016113.307 diff --git a/ml-lpt/ml-antlr/.cm/GUID/options.sml b/ml-lpt/ml-antlr/.cm/GUID/options.sml new file mode 100644 index 0000000..f5b635c --- /dev/null +++ b/ml-lpt/ml-antlr/.cm/GUID/options.sml @@ -0,0 +1 @@ +guid-(sources.cm):options.sml-1714016114.003 diff --git a/ml-lpt/ml-antlr/.cm/GUID/predict.sml b/ml-lpt/ml-antlr/.cm/GUID/predict.sml new file mode 100644 index 0000000..e2f9b6e --- /dev/null +++ b/ml-lpt/ml-antlr/.cm/GUID/predict.sml @@ -0,0 +1 @@ +guid-(sources.cm):predict.sml-1714016114.009 diff --git a/ml-lpt/ml-antlr/.cm/GUID/preitem.sml b/ml-lpt/ml-antlr/.cm/GUID/preitem.sml new file mode 100644 index 0000000..94cc1bc --- /dev/null +++ b/ml-lpt/ml-antlr/.cm/GUID/preitem.sml @@ -0,0 +1 @@ +guid-(sources.cm):preitem.sml-1714016113.478 diff --git a/ml-lpt/ml-antlr/.cm/GUID/prod.sml b/ml-lpt/ml-antlr/.cm/GUID/prod.sml new file mode 100644 index 0000000..da44807 --- /dev/null +++ b/ml-lpt/ml-antlr/.cm/GUID/prod.sml @@ -0,0 +1 @@ +guid-(sources.cm):prod.sml-1714016113.974 diff --git a/ml-lpt/ml-antlr/.cm/GUID/token.sml b/ml-lpt/ml-antlr/.cm/GUID/token.sml new file mode 100644 index 0000000..e22b75b --- /dev/null +++ b/ml-lpt/ml-antlr/.cm/GUID/token.sml @@ -0,0 +1 @@ +guid-(sources.cm):token.sml-1714016113.411 diff --git a/ml-lpt/ml-antlr/.cm/SKEL/action.sml b/ml-lpt/ml-antlr/.cm/SKEL/action.sml new file mode 100644 index 0000000..b1f4bce --- /dev/null +++ b/ml-lpt/ml-antlr/.cm/SKEL/action.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f2d"Int"d"Err"ad"Action"j0 \ No newline at end of file diff --git a/ml-lpt/ml-antlr/.cm/SKEL/check-grammar.sml b/ml-lpt/ml-antlr/.cm/SKEL/check-grammar.sml new file mode 100644 index 0000000..99ae54c --- /dev/null +++ b/ml-lpt/ml-antlr/.cm/SKEL/check-grammar.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f8GrammarSyntax"d"ListPair"d"Nonterm"Cd"Action"LLKSpec"d"List"d"Int"d"Option"CAtomSet"d"Atom"d"Err"d"ParseFile"d"Token"Nad"CheckGrammar"jh5Cad"Syn"gp1ad"S"gp13ad"ATbl"gp1d"AtomTable"ad"AMap"gp1d"AtomMap"ad"ASet"gp1Nh0 \ No newline at end of file diff --git a/ml-lpt/ml-antlr/.cm/SKEL/compute-predict.sml b/ml-lpt/ml-antlr/.cm/SKEL/compute-predict.sml new file mode 100644 index 0000000..965a2db --- /dev/null +++ b/ml-lpt/ml-antlr/.cm/SKEL/compute-predict.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f6Nonterm"CLLKSpec"d"Prod"d"List"d"Int"Predict"Cd"String"d"Err"Token"d"GLA"d"TextIO"Nad"ComputePredict"jh4ad"P"gp1-ad"TSet"gp2d"Set"ad"NMap"gp2d"Map"ad"S"gp1h0 \ No newline at end of file diff --git a/ml-lpt/ml-antlr/.cm/SKEL/err.sml b/ml-lpt/ml-antlr/.cm/SKEL/err.sml new file mode 100644 index 0000000..cd1c69d --- /dev/null +++ b/ml-lpt/ml-antlr/.cm/SKEL/err.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f4d"List"d"Int"d"String"d"TextIO"ad"Err"h0 \ No newline at end of file diff --git a/ml-lpt/ml-antlr/.cm/SKEL/gla.sml b/ml-lpt/ml-antlr/.cm/SKEL/gla.sml new file mode 100644 index 0000000..8794e1a --- /dev/null +++ b/ml-lpt/ml-antlr/.cm/SKEL/gla.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f6d"ListPair"CNonterm"LLKSpec"Prod"d"List"d"Int"Cd"Item"d"String"d"Err"Token"d"TextIO"Nad"GLA"jh7ad"S"gp1ad"NMap"gp2Map"Cad"PMap"gp2$ad"TSet"gp2d"Set"aOrd"0ad"SCC"j.gp1e"GraphSCCFn"ad"NodeSet"j>gp1e"RedBlackSetFn"N4 \ No newline at end of file diff --git a/ml-lpt/ml-antlr/.cm/SKEL/item.sml b/ml-lpt/ml-antlr/.cm/SKEL/item.sml new file mode 100644 index 0000000..e1ae694 --- /dev/null +++ b/ml-lpt/ml-antlr/.cm/SKEL/item.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f8d"ListPair"d"Nonterm"LLKSpec"Cd"List"d"Int"d"AtomSet"d"Atom"d"Preitem"Nad"Item"h3ad"S"gp1ad"Set"j0gp1e"RedBlackSetFn"ad"Map"j)gp1e"RedBlackMapFn" \ No newline at end of file diff --git a/ml-lpt/ml-antlr/.cm/SKEL/llk-spec.sml b/ml-lpt/ml-antlr/.cm/SKEL/llk-spec.sml new file mode 100644 index 0000000..cff2c4d --- /dev/null +++ b/ml-lpt/ml-antlr/.cm/SKEL/llk-spec.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f4d"Action"d"Int"d"Atom"d"Err"ad"LLKSpec"h0 \ No newline at end of file diff --git a/ml-lpt/ml-antlr/.cm/SKEL/main.sml b/ml-lpt/ml-antlr/.cm/SKEL/main.sml new file mode 100644 index 0000000..01bf508 --- /dev/null +++ b/ml-lpt/ml-antlr/.cm/SKEL/main.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f6d"LaTeXOutput"Cd"OS"d"Nonterm"d"SMLofNJ"d"LLKSpec"d"List"Cd"Int"d"General"d"Options"d"ComputePredict"d"String"Cd"Err"d"ParseFile"d"CheckGrammar"d"GLA"d"SMLOutput"Nad"Main"j00 \ No newline at end of file diff --git a/ml-lpt/ml-antlr/.cm/SKEL/nonterm.sml b/ml-lpt/ml-antlr/.cm/SKEL/nonterm.sml new file mode 100644 index 0000000..d49c143 --- /dev/null +++ b/ml-lpt/ml-antlr/.cm/SKEL/nonterm.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f5CLLKSpec"d"List"d"Int"d"Atom"d"String"Nad"Nonterm"h5CaOrd"h0ad"Set"j unit + end +*) + +structure LaTeXOutput (* : BACK_END *) = + struct + + structure S = LLKSpec + + (* containsChar : (char list) -> char -> bool *) + fun containsChar (cs : char list) (x : char) = + (case cs + of [] => false + | (ch :: chs) => (ch = x) orelse (containsChar chs x) + (* end case *)) + + (* removeChar : (char list) -> char -> (char list) *) + fun removeChar (cs : char list) (x : char) = + (case cs + of [] => [] + | (ch :: chs) => if (ch = x) then chs else (removeChar chs x) + (* end case *)) + + (* escape : char -> string -> string *) + fun escape c = let + fun backslash [] = [] + | backslash (x::xs) = + if x = c + then #"\\" :: x :: (backslash xs) + else x :: (backslash xs) + in + (implode o backslash o explode) + end + + (* backslashFirst : (char list) -> (char list) *) + (* put backslash first if present, otherwise do nothing *) + fun backslashFirst cs = + if containsChar cs #"\\" + then #"\\" :: (removeChar cs #"\\") + else cs + + (* escapeL : (char list) -> string -> string *) + (* escape the characters in cs by prepending backslashes *) + (* escape backslash first to avoid double-escaping *) + fun escapeL cs s = let + val cs' = backslashFirst cs + fun escl ([], s) = s + | escl (ch :: chs, s) = escl (chs, escape ch s) + in + escl (cs', s) + end + + fun curry f = (fn a => fn b => f (a, b)) + fun flip f = (fn (a, b) => f (b, a)) + val drop = List.drop + fun clipby n = implode o rev o (((curry o flip) drop) n) o rev o explode + + fun wrap d s = concat [d, s, d] + + fun maybewrap (d, t) = fn s => if s = t then wrap d t else s + + fun mathif t = maybewrap ("$", t) + + fun stripquotes s = + if String.isPrefix "\"" s + then stripquotes (String.extract (s, 1, NONE)) + else if String.isSuffix "\"" s + then stripquotes (clipby 1 s) + else s + + (* mathescape : string -> string -> string *) + (* to surround all occurences of s with $ . $ *) + fun mathescape s s' = + if (size s) > (size s') + then s' + else if String.isPrefix s s' + then concat [wrap "$" s, + mathescape s (String.extract (s', String.size s, NONE))] + else concat [String.substring (s', 0, 1), + mathescape s (String.extract (s', 1, NONE))] + + (* mathescapeL : (string list) -> string -> string *) + fun mathescapeL ss s' = + (case ss + of [] => s' + | (st :: sts) => mathescapeL sts (mathescape st s') + (* end case *)) + + (* isID: string -> bool *) + (* returns true if a string starts with _ or an alphabetic character *) + (* TODO implement this less cheesily *) + fun isID s = let + fun f [] = false + | f (p :: ps) = (String.isPrefix (Char.toString p) s) + orelse (f ps) + in + f (explode "_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") + end + + fun ttif s s' = if s = s' then concat ["{\tt ", s', "}"] else s' + + val cat = String.concat + val catw = String.concatWith + + fun prods nt = let + val (S.NT {prods, ...}) = nt + in + !prods + end + + val lhs = Prod.lhs + val rhs = Prod.items + + val nt2s = Nonterm.toString + val i2s = Item.toString + + val spacer = " $\\;$ " + val spacedPipe = " $\\;|\\;$ " + + fun tt s = concat ["{\\tt ", s, "}"] + fun bf s = concat ["{\\bf ", s, "}"] + + (* some grammar.sty functions *) + structure GrammarSty = struct + fun tag t = (fn s => concat ["\\", t, "{", s, "}"]) + val RHS = tag "RHS" + val GRP = tag "GRP" + val OPT = tag "OPT" + val OPTGRP = tag "OPTGRP" + val LIST = tag "LIST" + val LISTGRP = tag "LISTGRP" + val LISTONE = tag "LISTONE" + val LISTONEGRP = tag "LISTONEGRP" + val ITEM = tag "ITEM" + val kw = tag "kw" + val nt = tag "nt" + val term = tag "term" + val sym = tag "sym" + fun Grammar s = concat ["\\begin{Grammar}\n", s, "\n\n\\end{Grammar}\n"] + fun Rules (nt, rhss) = concat (["\n\\begin{Rules}{", nt, "}\n"] @ + [catw "\n" rhss] @ + ["\n\\end{Rules}"]) + end + + structure G = GrammarSty + + fun prod isSubrule = (if isSubrule then (fn x => x) else G.RHS) o + (catw " ") o (map (item isSubrule)) o rhs + + and nonterm isSubrule nt = let + val ps = prods nt + in + if isSubrule + then catw spacedPipe (map (prod true) ps) + else let val nt' = nt2s nt + in G.Rules (nt', map (prod isSubrule) ps) + end + end + + and tok t = let + val abbrev = Token.toString t + val scrub = (escapeL (explode "_{}")) o + (mathescapeL ["<", ">", "-", "|"]) o + (stripquotes) + in + if isID abbrev + then (G.kw o (* G.ITEM o *) scrub) abbrev + else (G.sym o (* G.ITEM o *) scrub) abbrev + end + + and item isSubrule i = + (case Item.sym i + of S.TOK t => tok t + | S.NONTERM (nt, _) => if Nonterm.isSubrule nt + then (G.nt o G.GRP o (nonterm true)) nt + else (G.nt o i2s) i + | S.CLOS nt => (G.LISTGRP o (nonterm true)) nt + | S.POSCLOS nt => (G.LISTONEGRP o (nonterm true)) nt + | S.OPT nt => (G.OPTGRP o (nonterm true)) nt + (* end case *)) + + fun nonterms [] = "the null grammar" + | nonterms ns = G.Grammar (catw "\n" (map (nonterm false) ns)) + + (* output grammar *) + fun grammarHook spec strm = let + val (S.Grammar {sortedTops, ...}, _) = spec + val nts = List.concat sortedTops + val g = nonterms nts + in + TextIO.output (strm, g) + end + + fun output (grm, pm, fname) = (print (" writing " ^ fname ^ ".tex\n"); + ExpandFile.expandTemplate { + src = LaTeXTemplate.template, + dst = fname ^ ".tex", + hooks = [("grammar", grammarHook (grm, pm))] + }) + + end diff --git a/ml-lpt/ml-antlr/BackEnds/LaTeX/smlnj-template.sml b/ml-lpt/ml-antlr/BackEnds/LaTeX/smlnj-template.sml new file mode 100644 index 0000000..2f168a9 --- /dev/null +++ b/ml-lpt/ml-antlr/BackEnds/LaTeX/smlnj-template.sml @@ -0,0 +1,14 @@ +(* smlnj-template.sml + * + * COPYRIGHT (c) 2009 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * In SML/NJ, we load the template files when we elaborate this module. + *) + +structure LaTeXTemplate = + struct + + val template = ExpandFile.mkTemplateFromFile "BackEnds/LaTeX/template.tex" + + end diff --git a/ml-lpt/ml-antlr/BackEnds/LaTeX/template.tex b/ml-lpt/ml-antlr/BackEnds/LaTeX/template.tex new file mode 100644 index 0000000..b37ca85 --- /dev/null +++ b/ml-lpt/ml-antlr/BackEnds/LaTeX/template.tex @@ -0,0 +1,16 @@ +\documentclass[11pt]{article} + +\usepackage{nopageno} +\usepackage{grammar} + +\title{A Grammar} +\author{You\\ +\texttt{your-email-address@somewhere.com}} + +\begin{document} + +\maketitle + +@grammar@ + +\end{document} diff --git a/ml-lpt/ml-antlr/BackEnds/SML/.cm/GUID/abs.sml b/ml-lpt/ml-antlr/BackEnds/SML/.cm/GUID/abs.sml new file mode 100644 index 0000000..45f56bb --- /dev/null +++ b/ml-lpt/ml-antlr/BackEnds/SML/.cm/GUID/abs.sml @@ -0,0 +1 @@ +guid-(sources.cm):BackEnds/SML/abs.sml-1714016113.520 diff --git a/ml-lpt/ml-antlr/BackEnds/SML/.cm/GUID/ml.sml b/ml-lpt/ml-antlr/BackEnds/SML/.cm/GUID/ml.sml new file mode 100644 index 0000000..b9a9112 --- /dev/null +++ b/ml-lpt/ml-antlr/BackEnds/SML/.cm/GUID/ml.sml @@ -0,0 +1 @@ +guid-(sources.cm):BackEnds/SML/ml.sml-1714016113.939 diff --git a/ml-lpt/ml-antlr/BackEnds/SML/.cm/GUID/pp.sml b/ml-lpt/ml-antlr/BackEnds/SML/.cm/GUID/pp.sml new file mode 100644 index 0000000..d7b5670 --- /dev/null +++ b/ml-lpt/ml-antlr/BackEnds/SML/.cm/GUID/pp.sml @@ -0,0 +1 @@ +guid-(sources.cm):BackEnds/SML/pp.sml-1714016113.541 diff --git a/ml-lpt/ml-antlr/BackEnds/SML/.cm/GUID/sml-output.sml b/ml-lpt/ml-antlr/BackEnds/SML/.cm/GUID/sml-output.sml new file mode 100644 index 0000000..e56254c --- /dev/null +++ b/ml-lpt/ml-antlr/BackEnds/SML/.cm/GUID/sml-output.sml @@ -0,0 +1 @@ +guid-(sources.cm):BackEnds/SML/sml-output.sml-1714016114.025 diff --git a/ml-lpt/ml-antlr/BackEnds/SML/.cm/GUID/smlnj-template.sml b/ml-lpt/ml-antlr/BackEnds/SML/.cm/GUID/smlnj-template.sml new file mode 100644 index 0000000..cc7ce93 --- /dev/null +++ b/ml-lpt/ml-antlr/BackEnds/SML/.cm/GUID/smlnj-template.sml @@ -0,0 +1 @@ +guid-(sources.cm):BackEnds/SML/smlnj-template.sml-1714016114.020 diff --git a/ml-lpt/ml-antlr/BackEnds/SML/.cm/SKEL/abs.sml b/ml-lpt/ml-antlr/BackEnds/SML/.cm/SKEL/abs.sml new file mode 100644 index 0000000..074ad3d --- /dev/null +++ b/ml-lpt/ml-antlr/BackEnds/SML/.cm/SKEL/abs.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f2d"Atom"d"IntInf"ad"MLABS"h0 \ No newline at end of file diff --git a/ml-lpt/ml-antlr/BackEnds/SML/.cm/SKEL/ml.sml b/ml-lpt/ml-antlr/BackEnds/SML/.cm/SKEL/ml.sml new file mode 100644 index 0000000..d8df6a5 --- /dev/null +++ b/ml-lpt/ml-antlr/BackEnds/SML/.cm/SKEL/ml.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ad"ML"h2egp1d"MLABS"egp1d"MLPP" \ No newline at end of file diff --git a/ml-lpt/ml-antlr/BackEnds/SML/.cm/SKEL/pp.sml b/ml-lpt/ml-antlr/BackEnds/SML/.cm/SKEL/pp.sml new file mode 100644 index 0000000..55a2507 --- /dev/null +++ b/ml-lpt/ml-antlr/BackEnds/SML/.cm/SKEL/pp.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f3d"Atom"d"String"d"IntInf"ad"MLPP"h2ad"PP"gp1d"TextIOPP"ad"ABS"gp1d"MLABS" \ No newline at end of file diff --git a/ml-lpt/ml-antlr/BackEnds/SML/.cm/SKEL/sml-output.sml b/ml-lpt/ml-antlr/BackEnds/SML/.cm/SKEL/sml-output.sml new file mode 100644 index 0000000..509408f --- /dev/null +++ b/ml-lpt/ml-antlr/BackEnds/SML/.cm/SKEL/sml-output.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f7d"ML"d"ListPair"Cd"TextIOPP"Nonterm"d"Action"d"Prod"d"List"Cd"Options"d"AtomSet"d"Item"d"Atom"Predict"Cd"String"d"SMLTemplate"d"ExpandFile"Token"d"TextIO"Nad"SMLOutput"h6ad"S"gp1d"LLKSpec"Cad"P"gp1$ad"TMap"gp2Map"ad"TSet"gp2d"Set"ad"NMap"gp2 ad"NT"gp1 N \ No newline at end of file diff --git a/ml-lpt/ml-antlr/BackEnds/SML/.cm/SKEL/smlnj-template.sml b/ml-lpt/ml-antlr/BackEnds/SML/.cm/SKEL/smlnj-template.sml new file mode 100644 index 0000000..594aeb8 --- /dev/null +++ b/ml-lpt/ml-antlr/BackEnds/SML/.cm/SKEL/smlnj-template.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"ExpandFile"ad"SMLTemplate"h0 \ No newline at end of file diff --git a/ml-lpt/ml-antlr/BackEnds/SML/.cm/amd64-unix/abs.sml b/ml-lpt/ml-antlr/BackEnds/SML/.cm/amd64-unix/abs.sml new file mode 100644 index 0000000..f3815dc Binary files /dev/null and b/ml-lpt/ml-antlr/BackEnds/SML/.cm/amd64-unix/abs.sml differ diff --git a/ml-lpt/ml-antlr/BackEnds/SML/.cm/amd64-unix/ml.sml b/ml-lpt/ml-antlr/BackEnds/SML/.cm/amd64-unix/ml.sml new file mode 100644 index 0000000..8ec509e Binary files /dev/null and b/ml-lpt/ml-antlr/BackEnds/SML/.cm/amd64-unix/ml.sml differ diff --git a/ml-lpt/ml-antlr/BackEnds/SML/.cm/amd64-unix/pp.sml b/ml-lpt/ml-antlr/BackEnds/SML/.cm/amd64-unix/pp.sml new file mode 100644 index 0000000..d6dc224 Binary files /dev/null and b/ml-lpt/ml-antlr/BackEnds/SML/.cm/amd64-unix/pp.sml differ diff --git a/ml-lpt/ml-antlr/BackEnds/SML/.cm/amd64-unix/sml-output.sml b/ml-lpt/ml-antlr/BackEnds/SML/.cm/amd64-unix/sml-output.sml new file mode 100644 index 0000000..27f43c5 Binary files /dev/null and b/ml-lpt/ml-antlr/BackEnds/SML/.cm/amd64-unix/sml-output.sml differ diff --git a/ml-lpt/ml-antlr/BackEnds/SML/.cm/amd64-unix/smlnj-template.sml b/ml-lpt/ml-antlr/BackEnds/SML/.cm/amd64-unix/smlnj-template.sml new file mode 100644 index 0000000..4492f57 Binary files /dev/null and b/ml-lpt/ml-antlr/BackEnds/SML/.cm/amd64-unix/smlnj-template.sml differ diff --git a/ml-lpt/ml-antlr/BackEnds/SML/abs.sml b/ml-lpt/ml-antlr/BackEnds/SML/abs.sml new file mode 100644 index 0000000..15f454a --- /dev/null +++ b/ml-lpt/ml-antlr/BackEnds/SML/abs.sml @@ -0,0 +1,152 @@ +(* abs.sml + * + * COPYRIGHT (c) 2015 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +structure MLABS = +struct + type atom = Atom.atom + datatype raw_ml = Raw of ml_token list + + and ml_token = Tok of string + + datatype cmp_op = LT | GT | EQ | LEQ | GEQ + datatype bool_op = AND | OR + + datatype ml_file + = OBJECT of ml_object + + and ml_object + = DECL of ml_decl list + | STRUCTURE of ml_signature option * ml_structure + | LOCAL of ml_object * ml_object + + and ml_decl + = D_VAL of (ml_pat * ml_exp) list + | D_FUN of ml_fundecl list + | D_TYPE of ml_type_binding + | D_ABS_DATATYPE of ml_datatype_binding * ml_type_binding option + (* datatype $1 withtype $2*) + | D_ABS_TYPE of ml_datatype_binding * ml_type_binding option * ml_decl list + (* abstype $1 withtype $2 with $3 end *) +(* | SD_LOCAL of () + | SD_OPEN of () + | SD_OPERATOR of () +*) + + and ml_fundecl = FUN of (ml_fun_heading * ml_type option * ml_exp) list (* none-empty list *) + + and ml_type_binding + = TypeBind of (typevar list * ml_type) list (* none-empty *) + + and ml_datatype_binding + = DatatypeBind of (typevar list * (id * ml_type option) list (* none-empty*)) list (* none-empty *) + + and ml_fun_heading + = FUNHEAD of atom * ml_pat list (* none-empty *) + + and ml_type + (*ml_type::= Type_Var | Type_Var_List Compound_Ident | Type "*" L(Type , "*") | Type "->" Type | "{" O_List( Label ":" Type ) "}" | "(" Type ")". *) + = T_VAR of typevar + | T_TUPLE of ml_type list (* NoneEmptyList *) (* t1 * t2 * .. *) + | T_FUN of ml_type * ml_type (* t1-> t2 *) + | T_RECORD of (label * ml_type) list + | T_PAREN of ml_type + + and ml_signature (*signature*) + (* Signature::= "sig" Specification "end" | Ident.*) + = SG of ml_specification (* fix this *) + + and ml_specification + = SP_EMPTY (* fix this *) + (*Specification::= Empty | value_spec | various_type_spec | exception_spec | structure_spec | other_spec | inclusion | Specification O(";") Specification. *) + + and ml_structure (*structure*) + = ST_STRUCT of ml_object (* struct $1 end *) + (* Structure::= "struct" Object_Declaration "end" | Compound_Ident | Functor_Application | Ident "(" Object_Declaration ")" | "let" Object_Declaration "in" Structure "end".*) + + (* a subset of ML expressions and patterns *) + and ml_exp + = ML_Var of string + | ML_Int of IntInf.int + | ML_Cmp of (cmp_op * ml_exp * ml_exp) + | ML_Bool of (bool_op * ml_exp * ml_exp) + | ML_Case of ml_exp * (ml_pat * ml_exp) list + | ML_If of ml_exp * ml_exp * ml_exp + | ML_App of (string * ml_exp list) + | ML_Let of (string * ml_exp * ml_exp) + (* a group of mutually-recursive functions *) + | ML_Funs of (string * string list * ml_exp) list * ml_exp + | ML_Seq of ml_exp list + | ML_Tuple of ml_exp list + | ML_List of ml_exp list + | ML_RefGet of ml_exp + | ML_RefPut of ml_exp * ml_exp + | ML_Handle of ml_exp * (ml_pat * ml_exp) list + | ML_Raw of ml_token list + (* the following added by Chunyan *) + | ML_TypeExp of ml_exp * ml_type + + and ml_pat + = ML_Wild + | ML_VarPat of string + | ML_IntPat of IntInf.int + | ML_ConPat of string * ml_pat list + | ML_TupPat of ml_pat list + (* the following added by Chunyan *) + | ML_TypePat of ml_pat * ml_type + | ML_ListPat of ml_pat list + +(* + and pattern + (* Pattern::= Atomic_Pattern | Compound_Name Atomic_Pattern | Pattern infix_constructor Pattern | Pattern ":" ml_type | Name O(":" ml_type) "as" Pattern. *) + = P_ATOM of atomic_pattern + | P_TYPE of pattern * ml_type + (* more *) + and atomic_pattern + = AP_NULL (* "_" *) + | AP_COMPNAME of id + (* | P_CONST why?? *) + | AP_TUPLE of pattern list + | AP_LIST of pattern list + | AP_RECORD of (label * pattern) list + (* more? *) + + and exp + (* # Expression::= Infix_Expression | Expression ":" ml_type | Expression boolean_operator Expression | Expression "handle" Match | "raise" Expression | selection| loop | "fn" Match. *) + (* atomic expressions *) + = E_COMPNAME of atom + | E_CONST of const + | E_TUPLE of exp list + | E_LIST of exp list (* NoneEmptyList *) + | E_RECORD of (label * exp) list + | E_STMT of exp list (* NoneEmptyList *) (* e1; e2;... *) + | E_LET of decl * exp list (* NoneEmptyList *) + (* *) + | E_TYPE of exp * ml_type + | E_APPLY of exp list (* NoneEmptyList *) + | E_BOOLEAN of exp * boolop * exp + | E_HANDLE of exp * match + | E_RAISE of exp +(* + | E_SELECTION of selection + | E_LOOP of loop +*) + | E_FN of match + and const + = C_INT of int + | C_REAL of real + | C_STRING of string + (* ... *) + + and boolop + = B_AND + | B_OR +*) + withtype label = atom + and id = atom + and typevar = atom (* fix this *) + and match = (ml_pat * ml_exp) list (* NoneEmptyList *) + +end \ No newline at end of file diff --git a/ml-lpt/ml-antlr/BackEnds/SML/ml.sml b/ml-lpt/ml-antlr/BackEnds/SML/ml.sml new file mode 100644 index 0000000..20c0200 --- /dev/null +++ b/ml-lpt/ml-antlr/BackEnds/SML/ml.sml @@ -0,0 +1,12 @@ +(* ml.sml + * + * COPYRIGHT (c) 2015 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +structure ML= + struct + open MLABS + open MLPP + val ppML = ppExp + end diff --git a/ml-lpt/ml-antlr/BackEnds/SML/pp.sml b/ml-lpt/ml-antlr/BackEnds/SML/pp.sml new file mode 100644 index 0000000..c346522 --- /dev/null +++ b/ml-lpt/ml-antlr/BackEnds/SML/pp.sml @@ -0,0 +1,496 @@ +(* pp.sml + * + * COPYRIGHT (c) 2018 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +structure MLPP = +struct + structure PP = TextIOPP + structure ABS = MLABS + + fun ppAtom (ppStrm, a) = PP.string ppStrm (Atom.toString a) + fun ppList {lb, rb, sep, item} (ppStrm, l) = let + fun pp [] = () + | pp [x] = item(ppStrm, x) + | pp (x::r) = (item(ppStrm, x); sep ppStrm; pp r) + in + lb ppStrm; pp l; rb ppStrm + end + + fun ppAtomList (ppStrm, als) = ppList { + lb = fn ppStrm => (PP.openVBox ppStrm (PP.Abs 0); + PP.string ppStrm "("), + rb = fn ppStrm => (PP.closeBox ppStrm; PP.string ppStrm ")"), + sep = fn ppStrm => (PP.string ppStrm ", "), + item = fn (ppStrm, a) => ppAtom (ppStrm, a) + } (ppStrm, als) + + fun ppNullaryConsList(ppStrm, als) = ppList { + lb = fn ppStrm => (PP.openVBox ppStrm (PP.Abs 0)), + rb = fn ppStrm => (PP.closeBox ppStrm), + sep = fn ppStrm => (PP.string ppStrm ", "), + item = fn (ppStrm, a) => ppAtom (ppStrm, a) + } (ppStrm, als) + + fun ppGuid (ppStrm, guid) = PP.string ppStrm (concat["(\"", guid, "\")"]) + + fun ppStringLit (ppStrm, s) = PP.string ppStrm (concat [ + "(\"", String.toCString s, "\")" + ]) + + fun ppTyFormals(ppStrm, als) = ppList { + lb = fn ppStrm => (PP.openVBox ppStrm (PP.Abs 0); + PP.string ppStrm "["), + rb = fn ppStrm => (PP.closeBox ppStrm; PP.string ppStrm "] "), + sep = fn ppStrm => (PP.string ppStrm ", "), + item = fn (ppStrm, a) => ppAtom (ppStrm, a) + } (ppStrm, als) + + fun ppLongUid (ppStrm, uids) = ppList { + lb = fn ppStrm => (PP.openVBox ppStrm (PP.Abs 0)), + rb = fn ppStrm => (PP.closeBox ppStrm), + sep = fn ppStrm => (PP.string ppStrm "."), + item = fn (ppStrm, uid) => ppAtom (ppStrm, uid) + } (ppStrm, uids) + + + fun ppAnd f (ppStrm, (a::alist)) + = ppList {lb= fn ppStrm => (), + rb= fn ppStrm => (), + sep= fn ppStrm => (PP.string ppStrm "and "), + item = f} (ppStrm, a::alist) + | ppAnd _ _ = raise Fail "empty list" + + fun ppBar f (ppStrm, (a::alist)) + = ppList {lb= fn ppStrm => (), + rb= fn ppStrm => (), + sep= fn ppStrm => (PP.string ppStrm "| "), + item = f} (ppStrm, a::alist) + | ppBar _ _ = raise Fail "empty list" +(* + fun ppConst (ppStrm, const) + = case const of + ABS.C_INT i => PP.string ppStrm (Int.toString i) + | ABS.C_REAL r => PP.string ppStrm (Real.toString r) + | ABS.C_STRING s => PP.string ppStrm s + + fun ppBoolop (ppStrm, ABS.B_AND) + = PP.string ppStrm "andalso" + | ppBoolop (ppStrm, ABS.B_OR) + = PP.string ppStrm "orelse" +*) + fun ppType (ppStrm, ty) + = case ty of + ABS.T_VAR var => ppAtom (ppStrm, var) + | ABS.T_TUPLE tys_nonempty + => ppList {lb= fn ppStrm => (), + rb= fn ppStrm => (), + sep= fn ppStrm => (PP.string ppStrm "*"), + item = ppType} (ppStrm, tys_nonempty) + | ABS.T_FUN (t1, t2) => (ppType (ppStrm, t1); + PP.string ppStrm "->"; + ppType (ppStrm, t2)) + | ABS.T_RECORD fields + => let fun ppField (ppStrm, (labl, ty)) + = (ppAtom (ppStrm, labl); + PP.string ppStrm ":"; + ppType (ppStrm, ty)) + in + ppList{lb= fn ppStrm => (PP.string ppStrm "{"), + rb= fn ppStrm => (PP.string ppStrm "}"), + sep= fn ppStrm => (PP.string ppStrm ", "), + item = ppField + } (ppStrm, fields) + end + | ABS.T_PAREN t => (PP.string ppStrm "("; + ppType (ppStrm, t); + PP.string ppStrm ")") + + fun ppTypeBinding (ppStrm, ABS.TypeBind andlist) + = let fun try (ppStrm, (varlist, Type)) + = (ppAtomList (ppStrm, varlist); + PP.string ppStrm "="; + ppType (ppStrm, Type)) + in + ppAnd try (ppStrm, andlist) + end + + fun ppDatatypeBinding (ppStrm, ABS.DatatypeBind andlist) + = let fun trybar (ppStrm, (id, ty)) + = (ppAtom (ppStrm, id); + case ty + of NONE => () + | SOME ty => (PP.string ppStrm " of "; + ppType (ppStrm, ty))) + + fun try (ppStrm, (varlist, barlist)) + = (ppAtomList (ppStrm, varlist); + PP.string ppStrm "="; + ppBar trybar (ppStrm, barlist)) + in + ppAnd try (ppStrm, andlist) + end + + fun ppSimpleDecl (ppStrm, simple_decl) + = case simple_decl + of ABS.D_VAL val_andlist + => let + fun ppVal (ppStrm, (pattern, exp)) + = (ppPat (ppStrm, pattern); + PP.string ppStrm "="; + ppExp (ppStrm, exp)) + in + PP.string ppStrm "val "; + ppAnd ppVal (ppStrm, val_andlist) + end + | ABS.D_FUN fun_andlist + => (PP.string ppStrm "fun "; + ppAnd ppFunDecl (ppStrm, fun_andlist)) + | ABS.D_TYPE typebinding + => (PP.string ppStrm "type "; + ppTypeBinding (ppStrm, typebinding)) + | ABS.D_ABS_DATATYPE (dataTBind, TBindOp) + => (PP.string ppStrm "datatype "; + ppDatatypeBinding (ppStrm, dataTBind); + case TBindOp + of NONE => () + | SOME b => (PP.string ppStrm "withtype "; + ppTypeBinding (ppStrm, b))) + | ABS.D_ABS_TYPE (dataTBind, TBindOp, decl) + => (PP.string ppStrm "abstype "; + ppDatatypeBinding (ppStrm, dataTBind); + case TBindOp + of NONE => () + | SOME b => (PP.string ppStrm "withtype "; + ppTypeBinding (ppStrm, b)); + PP.string ppStrm "with "; + ppDecl (ppStrm, decl); + PP.string ppStrm "end") + + and ppDecl (ppStrm, dlist_nonempty) + = ppList {lb= fn ppStrm => (), + rb= fn ppStrm => (), + sep= fn ppStrm => (), + item = ppSimpleDecl} (ppStrm, dlist_nonempty) + + + and ppMatch (ppStrm, matchlist) + = let fun try (ppStrm, (pattern, exp)) + = (ppPat (ppStrm, pattern); + PP.string ppStrm "=> "; + ppExp (ppStrm, exp)) + in ppBar try (ppStrm, matchlist) + end + + and ppFunDecl (ppStrm, ABS.FUN barlist) + = let fun trybar (ppStrm, (fun_heading, tyop, exp)) + = (ppFunHeading (ppStrm, fun_heading); + case tyop of + NONE => () + | SOME ty => (PP.string ppStrm ": "; + ppType (ppStrm, ty)); + ppExp (ppStrm, exp)) + in + PP.string ppStrm "fun "; + ppBar trybar (ppStrm, barlist) + end + + and ppFunHeading (ppStrm, ABS.FUNHEAD (s, patlist_nonempty)) + = (ppAtom(ppStrm, s); + ppList {lb= fn ppStrm => (), + rb= fn ppStrm => (), + sep= fn ppStrm => (PP.string ppStrm " "), + item = ppPat} (ppStrm, patlist_nonempty)) + + and ppSigna (ppStrm, ABS.SG s) + (* fix this *) + = case s of + ABS.SP_EMPTY => () + + and ppStruc (ppStrm, ABS.ST_STRUCT object_decl) + = (PP.string ppStrm "struct"; + ppObject (ppStrm, object_decl); + PP.string ppStrm "end") + + and ppObject (ppStrm, object_decl) + = case object_decl + of ABS.DECL r => ppDecl (ppStrm, r) + | ABS.STRUCTURE (SOME sg, st) + => (PP.string ppStrm "structure :sig"; + ppSigna (ppStrm, sg); + PP.string ppStrm "end = "; + ppStruc (ppStrm, st)) + | ABS.STRUCTURE (NONE, st) + => (PP.string ppStrm "structure "; + ppStruc (ppStrm, st)) + | ABS.LOCAL (o1, o2) + => (PP.string ppStrm "local"; + ppObject (ppStrm, o1); + PP.string ppStrm "in"; + ppObject (ppStrm, o2); + PP.string ppStrm "end") + (* end case *) + + + and ppFile (ppStrm, ABS.OBJECT r) = ( + PP.openHBox ppStrm; + ppObject (ppStrm, r); + PP.closeBox ppStrm) + + and ppExp (ppStrm, e) + = let + fun str s = PP.string ppStrm s + fun sp () = PP.space ppStrm 1 + fun nl () = PP.newline ppStrm + fun hbox () = PP.openHBox ppStrm + fun vbox () = PP.openVBox ppStrm (PP.Abs 2) + fun close () = PP.closeBox ppStrm + fun letBody (true, pp) + = (nl(); + str "in"; + vbox(); nl(); pp(); close(); + nl(); + str "end") + | letBody (false, pp) = pp() + fun ppE (inLet, prevFn, e) + = (case e + of (ABS.ML_Var x) => letBody(inLet, fn () => str x) + | (ABS.ML_Int n) => letBody(inLet, fn () => str(IntInf.toString n)) + | (ABS.ML_Cmp (cop, e1, e2)) + => letBody(inLet, + fn () + => ( + ppExp' e1; + sp(); + str (case cop + of ABS.LT => "<" + | ABS.GT => ">" + | ABS.EQ => "=" + | ABS.LEQ => "<=" + | ABS.GEQ => ">="); + sp(); + ppExp' e2)) + | (ABS.ML_Bool (bop, e1, e2)) + => letBody(inLet, + fn () => ( + ppExp' e1; + sp(); + str (case bop + of ABS.AND => "andalso" + | ABS.OR => "orelse"); + sp(); + ppExp' e2)) + | (ABS.ML_Case(arg, pl)) => + letBody(inLet, + fn () => ( + hbox(); + str "(case"; sp(); str "("; ppExp' arg; str ")"; + close(); + doCases (false, true, pl); + nl(); str "(* end case *))")) + | (ABS.ML_App(f, args)) => + letBody(inLet, + fn () => ( + hbox(); + str f; str "("; + case args + of [] => () + | [e] => ppExp' e + | (e::r) => ( + ppExp' e; app (fn e => (str ","; sp(); ppExp' e)) r) + (* end case *); + str ")"; + close())) + | (ABS.ML_If(e1, e2, e3 as ABS.ML_If _)) => + letBody(inLet, + fn () => ( + PP.openVBox ppStrm (PP.Abs 0); + vbox(); + hbox(); str "if"; sp(); ppExp' e1; close(); nl(); + hbox(); str "then"; sp(); + vbox(); ppExp' e2; close(); + close(); + close(); nl(); + hbox(); str "else"; sp(); + ppExp' e3; + close(); + close())) + | (ABS.ML_If(e1, e2, e3)) => + letBody(inLet, + fn () => ( + vbox(); + hbox(); str "if"; sp(); ppExp' e1; close(); nl(); + hbox(); str "then"; sp(); + vbox(); ppExp' e2; close(); + close(); nl(); + hbox(); str "else"; sp(); + vbox(); ppExp' e3; close(); + close(); + close())) + | (ABS.ML_Let(x, e1, e2)) => let + fun pp () = ( + nl(); + hbox(); + str "val"; sp(); str x; sp(); str "="; sp(); + ppExp' e1; + close(); + ppE (true, false, e2)) + in + if inLet + then pp() + else ( + str "let"; + PP.openVBox ppStrm (PP.Abs 0); + pp(); + close()) + end + | (ABS.ML_Funs([], e)) => + ppE (inLet, false, e) + | (ABS.ML_Funs((f, params, body)::fs, e)) => let + fun pp prefix = ( + nl(); + hbox(); + str prefix; sp(); str f; sp(); + str "("; + case params + of [] => () + | [x] => str x + | (x::r) => ( + str x; app (fn x => (str ","; sp(); str x)) r) + (* end case *); + str ")"; sp(); str "="; sp(); + PP.openVBox ppStrm (PP.Abs 6); + ppExp' body; + close(); + close(); + ppE (true, true, ABS.ML_Funs(fs, e))) + in + if inLet + then if prevFn then pp "and" else pp "fun" + else ( + PP.openVBox ppStrm (PP.Abs 0); + str "let"; + pp "fun"; + close()) + end + | (ABS.ML_Seq[]) => letBody(inLet, fn () => str "()") + | (ABS.ML_Seq[e]) => ppE(inLet, prevFn, e) + | (ABS.ML_Seq(e::r)) => let + fun pp () = ( + ppExp' e; + app (fn e => (str ";"; sp(); ppExp' e)) r) + in + if inLet + then ( + nl(); str "in"; + PP.openBox ppStrm (PP.Abs 2); + nl(); pp(); + close(); + nl(); + str "end") + else ( + PP.openBox ppStrm (PP.Abs 0); + str "("; pp(); str ")"; + close()) + end + | (ABS.ML_Tuple[]) => letBody(inLet, fn () => str "()") + | (ABS.ML_Tuple(e::r)) => + letBody (inLet, fn () => ( + PP.openBox ppStrm (PP.Abs 2); + str "("; + ppExp' e; + app (fn e => (str ","; sp(); ppExp' e)) r; + str ")"; + close())) + | (ABS.ML_List[]) => letBody(inLet, fn () => str "[]") + | (ABS.ML_List(e::r)) => + letBody (inLet, fn () => ( + PP.openBox ppStrm (PP.Abs 2); + str "["; + ppExp' e; + app (fn e => (str ","; sp(); ppExp' e)) r; + str "]"; + close())) + | (ABS.ML_RefGet e) => letBody(inLet, fn () => ( + str "!("; + ppExp' e; + str ")")) + | (ABS.ML_RefPut (e1, e2)) => + letBody(inLet, fn () => ( + ppExp' e1; + str " := "; + ppExp' e2)) + | (ABS.ML_Raw toks) => + letBody(inLet, + fn () => (hbox(); app (fn (ABS.Tok s) => str s) toks; close())) + | (ABS.ML_Handle (exp, cases)) => + ( + ppE (inLet, prevFn, exp); + nl(); str "handle"; + doCases (true, true, cases)) + (* end case *)) + and ppExp' e = ppE (false, false, e) + and doCases (_,_, []) = () + | doCases (isExn, isFirst, (p, e)::r) = + ( + nl(); + (* NOTE: the following seems to trigger a bug in the PP library (bad indent) *) + PP.openHOVBox ppStrm (PP.Abs 6); + hbox(); + if isFirst + then if isExn + then (sp(); sp(); sp()) + else (sp(); str "of") + else (PP.space ppStrm 2; str "|"); + sp(); + ppPat (ppStrm, p); sp(); str "=>"; + close(); + sp(); + hbox(); + PP.openVBox ppStrm (PP.Abs 0); + ppExp' e; + close(); + close(); + close(); + doCases (isExn, false, r)) + in + ppE(false, false, e) + end + + and ppPat (ppStrm, p) = let + fun str s = PP.string ppStrm s + fun sp () = PP.space ppStrm 1 + fun nl () = PP.newline ppStrm + fun hbox () = PP.openHBox ppStrm + fun vbox () = PP.openVBox ppStrm (PP.Abs 2) + fun close () = PP.closeBox ppStrm + fun pp (ABS.ML_Wild) = str "_" + | pp (ABS.ML_VarPat x) = str x + | pp (ABS.ML_IntPat n) = str(IntInf.toString n) + | pp (ABS.ML_ConPat(c, [])) = str c + | pp (ABS.ML_ConPat(c, [p])) = ( + str c; str "("; pp p; str ")") + | pp (ABS.ML_ConPat(c, p::r)) = ( + str c; str "("; pp p; + app (fn p => (str ","; sp(); pp p)) r; + str ")") + | pp (ABS.ML_TupPat []) = str "()" + | pp (ABS.ML_TupPat (p::r)) = ( + str "("; pp p; + app (fn p => (str ","; sp(); pp p)) r; + str ")") + | pp (ABS.ML_ListPat []) = str "[]" + | pp (ABS.ML_ListPat (p::r)) + = (str "["; pp p; + app (fn p => (str ","; sp(); pp p)) r; + str "]") + | pp (ABS.ML_TypePat (p, ty)) + = (pp p; + str ":"; + ppType (ppStrm, ty)) + in + hbox(); pp p; close() + end + +end diff --git a/ml-lpt/ml-antlr/BackEnds/SML/sml-output.sml b/ml-lpt/ml-antlr/BackEnds/SML/sml-output.sml new file mode 100644 index 0000000..5501c0b --- /dev/null +++ b/ml-lpt/ml-antlr/BackEnds/SML/sml-output.sml @@ -0,0 +1,592 @@ +(* sml-output.sml + * + * COPYRIGHT (c) 2006 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (http://www.cs.uchicago.edu/~adrassi) + * All rights reserved. + * + * Back end for SML code, using first-class continuations for + * Burke-Fisher-style error repair/recovery + *) + +structure SMLOutput = + struct + + structure S = LLKSpec + structure P = Predict + + structure TMap = Token.Map + structure TSet = Token.Set + structure NMap = Nonterm.Map + + structure NT = Nonterm + + datatype ml_exp = datatype ML.ml_exp + datatype ml_pat = datatype ML.ml_pat + datatype ml_decl = datatype ML.ml_decl + datatype ml_fundecl = datatype ML.ml_fundecl + datatype ml_fun_heading = datatype ML.ml_fun_heading + + (* the following functions compute names or small expressions + * used throughout the backend + *) + + fun NTFnName nt = NT.name nt ^ "_NT" + fun NTFnVar nt = ML_Var (NTFnName nt) + + fun predFnName nt = NT.name nt ^ "_PRED" + + fun tokConName tok = "Tok." ^ Token.name tok + fun tokConVar tok = ML_Var (tokConName tok) + fun tokConPat tok = ML_ConPat (tokConName tok, + if Token.hasTy tok + then [ML_Wild] + else []) + fun tokConPat' tok = ML_ConPat (Token.name tok, + if Token.hasTy tok + then [ML_Wild] + else []) + fun tokMatch' tok = "match" ^ (Token.name tok) + fun tokMatch tok = "" ^ tokMatch' tok + + fun tokExpected tok = ML_Raw [ML.Tok ("raise Fail \"expected " ^ (Token.name tok) ^ "\"")] + + val bindingSuffix = "_RES" + val spanSuffix = "_SPAN" + val fullSpan = "FULL_SPAN" + val spanTySuffix = " : (Lex.pos * Lex.pos)" + val rcSuffix = "_REFC" + + fun actionHeader (name, (bindings, formals), suffix, isPred, refcells, refSuffix) = let + val withSuffix = map (fn b => Atom.toString b ^ suffix) + (AtomSet.listItems (AtomSet.union (bindings, formals))) + val withSpan = map (fn b => Atom.toString b ^ spanSuffix ^ spanTySuffix) + (AtomSet.listItems bindings) + val refs = map (fn (S.REFCELL {name, ...}) => name ^ refSuffix) refcells + val args = if isPred then + withSuffix @ refs + else + withSuffix @ withSpan @ [fullSpan ^ spanTySuffix] @ refs + in + String.concat [name, " (", String.concatWith ", " args, ")"] + end + + (* make an expression that will pull the next token off the stream *) + fun mkGet1 strm = ML_App ("lex", [ML_Var (strm)]) + (* make an expression that will pull the kth token off the stream *) + fun mkGetk (strm, 1) = mkGet1 strm + | mkGetk (strm, k) = ML_App ("lex", [ML_App ("#2", [mkGetk (strm, k-1)])]) + + fun rawCode code = ML_Raw [ML.Tok code] + + (* make an expression for the given (polymorphic) decision tree *) + fun mkPredict (pickFn, choiceFn, strm, tree, errAction) = let + fun mkPredict (strm, P.Pick p) = + pickFn p + | mkPredict (strm, P.ByTok branches) = let + val branches = List.concat (map mkMatch branches) + val errCase = (ML_Wild, errAction) + in + ML_Case (mkGet1 strm, branches @ [errCase]) + end + | mkPredict (strm, P.Choice prods) = + choiceFn prods + and mkMatch (set, tree) = + map (fn tok => (ML_TupPat [tokConPat tok, ML_Wild, ML_VarPat "strm'"], + mkPredict ("strm'", tree))) + (TSet.listItems set) + in + mkPredict (strm, tree) + end + + (* make a production *) + fun mkProd (grm, pm) prod = let + val rhs = Prod.items prod + val S.Grammar {refcells, ...} = grm + fun mkTok (t, strmExp, letFn) = + letFn (ML_App (tokMatch t, [strmExp])) + fun mkNT (nt, strmExp, args, letFn, item) = let + val name = (case (args, !Options.unitActions) + of (SOME args, false) => concat [ + "(", NTFnName nt, " (", + actionHeader ( + "UserCode.ARGS_" ^ Action.name args, + Item.bindingsLeftOf (item, prod), + bindingSuffix, true, refcells, rcSuffix), + "))" + ] + | _ => NTFnName nt + (* end case *)) + val innerExp = ML_App (name, [strmExp]) + in + if NT.isSubrule nt + then letFn (mkNonterm (grm, pm) (nt, innerExp)) + else letFn innerExp + end + fun mkEBNF (nt, strmExp, fname, letFn) = let + val predName = predFnName nt + val innerExp = letFn (ML_App (fname, [ML_Var predName, ML_Var (NTFnName nt), strmExp])) + val Predict.PMaps {ebnfPredict, ...} = pm + val predTree = ebnfPredict nt + fun mkBool true = ML_Var "true" + | mkBool false = ML_Var "false" + fun choiceFn _ = raise Fail "BUG: mkEBNF: backtracking choice unexpected" + val errAction = ML_Var "false" + val caseExp = mkPredict (mkBool, choiceFn, "strm", predTree, errAction) + val predFn = ML_Funs ([(predName, ["strm"], caseExp)], innerExp) + in + mkNonterm (grm, pm) (nt, predFn) + end + fun mkItem strm ((item, binding), k) = let + val strmExp = ML_Var strm + fun mkLet e = ML_Let (String.concat + ["(", binding, bindingSuffix, + ", ", binding, spanSuffix, ", strm')"], + e, k) + in + case Item.sym item + of S.TOK t => mkTok (t, strmExp, mkLet) + | S.NONTERM (nt, args) + => mkNT (nt, strmExp, args, mkLet, item) + | S.CLOS nt => mkEBNF (nt, strmExp, "EBNF.closure", mkLet) + | S.POSCLOS nt => mkEBNF (nt, strmExp, "EBNF.posclos", mkLet) + | S.OPT nt => mkEBNF (nt, strmExp, "EBNF.optional", mkLet) + end + val itemBindings = Prod.itemBindings prod + fun debugCode () = concat[ + "print \"", Nonterm.qualName (Prod.lhs prod), "\\n\"" + ] + val action = if !Options.unitActions + then "()" + else (case Prod.action prod + of SOME _ => actionHeader ( + concat ["UserCode.", Prod.fullName prod, "_ACT"], + Prod.bindingsAtAction prod, bindingSuffix, false, + refcells, rcSuffix) + | NONE => let + val bindings = ListPair.mapPartial + (fn (binding, hasValue) => + if hasValue + then SOME (binding ^ bindingSuffix) + else NONE) + (itemBindings, Prod.itemYields prod) + in + case bindings + of [] => "()" + | _ => concat["(", String.concatWith ", " bindings, ")"] + (* end case *) + end + (* end case *)) + fun innerExp strm = let + val strmVar = ML_Var (strm) + val span = if List.length itemBindings = 0 then + ML_Tuple [ML_App ("Err.getPos", [strmVar]), + ML_App ("Err.getPos", [strmVar])] + else + ML_Tuple [ML_App ("#1", [ML_Var (hd itemBindings ^ spanSuffix)]), + ML_App ("#2", [ML_Var (hd (rev itemBindings) ^ spanSuffix)])] + val act = ML_Tuple [ML_Raw [ML.Tok action], ML_Var fullSpan, strmVar] + val act = if !Options.debug + then ML_Seq[ + ML_Raw[ML.Tok(concat[ + "print \"", Nonterm.qualName (Prod.lhs prod), "\\n\"" + ])], + act + ] + else act + val spanExp = ML_Let (fullSpan, span, act) + in case (Prod.pred prod, !Options.unitActions) + of (SOME pred, false) => + ML_If (ML_Raw [ML.Tok ("(" + ^ actionHeader + ("UserCode." ^ Prod.fullName prod ^ "_PRED", + Prod.bindingsAtAction prod, bindingSuffix, true, + refcells, rcSuffix) + ^ ")")], + spanExp, + ML_App ("fail", [])) + | _ => spanExp + end + val parse = case (ListPair.zip (rhs, itemBindings)) + of [] => innerExp "strm" + | fst::rst => + mkItem "strm" + (fst, List.foldr (mkItem "strm'") (innerExp "strm'") rst) + in + parse + end + + (* make a group of productions, along with a decision tree to choose one of them *) + and mknProds (grm, pm, nt) = let + fun mkProdFun (prod, k) = ML_Funs ([(Prod.name prod, ["strm"], + mkProd (grm, pm) prod)], k) + val Predict.PMaps {prodPredict, ...} = pm + val tree = prodPredict nt + fun pickFn prod = ML_App (Prod.name prod, [ML_Var "strm"]) + fun choiceFn prods = + ML_App ("tryProds", [ML_Var "strm", + ML_List (map (ML_Var o Prod.name) prods)]) + val errAction = ML_App ("fail", []) + val caseExp = mkPredict (pickFn, choiceFn, "strm", tree, errAction) + in + foldr mkProdFun caseExp (Nonterm.prods nt) + end + + and mkNonterm' (grm, pm) nt = let + val formals = (case (!Options.unitActions, Nonterm.formals nt) + of (false, _::_) => concat[ + " (", + String.concatWithMap ", " + (fn f => Atom.toString f ^ bindingSuffix) + (Nonterm.formals nt), + ")" + ] + | _ => "" + (* end case *)) + val exp = (case Nonterm.prods nt + of [prod] => mkProd (grm, pm) prod + | _ => mknProds(grm, pm, nt) + (* end case *)) + in + (NTFnName nt ^ formals, ["strm"], exp) + end + and mkNonterm (grm, pm) (nt, k) = ML_Funs ([mkNonterm' (grm, pm) nt], k) + + fun mkNonterms (grm, pm) (nts, k) = + ML_Funs (map (mkNonterm' (grm, pm)) nts, k) + + (* output the main parser body *) + fun parserHook spec strm = let + val (grm as S.Grammar {toks, nterms, startnt, sortedTops, entryPoints, ...}, + pm) = spec + val ppStrm = TextIOPP.openOut {dst = strm, wid = 80} + val entries = map NTFnName (startnt :: entryPoints) + val entriesVal = "val (" ^ String.concatWith ", " entries ^ ") = " + val innerExp = ML_Tuple (map ML_Var entries) + val parser = List.foldl (mkNonterms (grm, pm)) innerExp sortedTops + fun optParam nt = if List.null (Nonterm.formals nt) then " " else " x " + fun optParamFn nt = if List.null (Nonterm.formals nt) then " " else " fn x => " + fun wrWrapParse nt = TextIO.output (strm, String.concat [ + "val ", NTFnName nt, " = ", optParamFn nt, + "fn s => unwrap (Err.launch (eh, lexFn, ", + NTFnName nt, optParam nt, ", ", + if Nonterm.same (nt, startnt) then "true" else "false", + ") s)\n" + ]) + fun wrEntry (name, nt) = TextIO.output (strm, String.concat [ + "fun ", name, " lexFn ", optParam nt, + "s = let ", entriesVal, "mk lexFn in ", NTFnName nt, + if List.null (Nonterm.formals nt) then " " else " x ", + "s end\n\n" + ]) + in + TextIO.output (strm, entriesVal ^ "\n"); + ML.ppML (ppStrm, parser); + TextIO.output (strm, "\n"); + app wrWrapParse (startnt::entryPoints); + TextIO.output (strm, concat [ + "\nin (", + String.concatWith ", " entries, + ") end\n" + ]); + TextIO.output (strm, " in\n"); + wrEntry ("parse", startnt); + app (wrEntry o (fn x => ("parse" ^ Nonterm.name x, x))) entryPoints; + TextIO.output (strm, " end\n") + end + + (* make a match function for a token *) + fun ppMatch (strm, ppStrm) t = let + val matchCase = + (ML_TupPat + [ML_ConPat (tokConName t, + if Token.hasTy t + then [ML_VarPat "x"] + else []), + ML_VarPat "span", + ML_VarPat "strm'"], + if Token.hasTy t + then ML_Tuple [ML_Var "x", ML_Var "span", ML_Var "strm'"] + else ML_Tuple [ML_Var "()", ML_Var "span", ML_Var "strm'"]) + val errCase = (ML_Wild, ML_App ("fail", [])) + val exp = ML_Case (mkGet1 "strm", [matchCase, errCase]) + in + TextIO.output (strm, "fun " ^ tokMatch' t ^ " strm = "); + ML.ppML (ppStrm, exp); + TextIO.output (strm, "\n") + end + + (* a token trie used to organize the preferred changes *) + datatype trie = TR of { + replacements : S.token list list, (* replacements *) + kids : (S.token * trie) list + } + + val emptyTrie = TR{replacements=[], kids=[]} + + (* build the token trie from the list of preferred changes; we quietly + * eliminate duplicate changes as part of this building process. + *) + fun mkTrie (changes : (S.token list * S.token list) list) = let + fun insert ((old, new), tr) = let + fun ins ([], tr as TR{replacements, kids}) = let + fun same repl = ListPair.allEq Token.same (new, repl) + in + if List.exists same replacements + then tr + else TR{replacements=new::replacements, kids=kids} + end + | ins (t::toks, TR{replacements, kids}) = let + fun find [] = [(t, ins(toks, emptyTrie))] + | find ((t', tr)::rest) = if Token.same(t, t') + then (t', ins(toks, tr)) :: rest + else (t', tr) :: find rest + in + TR{replacements = replacements, kids = find kids} + end + in + ins (old, tr) + end + in + List.foldl insert emptyTrie changes + end + + (* output the tokens structure *) + fun tokenStructHook spec strm = let + val (S.Grammar{name, toks, toksImport, changes, ...}, _) = spec + val ppStrm = TextIOPP.openOut {dst = strm, wid = 80} + fun pr s = TextIO.output (strm, s) + fun prl s = TextIO.output (strm, concat s) + fun prDT () = let + val first::rest = toks + in + pr " datatype token\n"; + prl [" = ", Token.def first, "\n"]; + List.app (fn tok => prl [" | ", Token.def tok, "\n"]) rest + end + (* list of all nullary and default token values for error repair *) + val allToks = let + fun make (tok, ts) = if Token.hasTy tok + then (case Token.default tok + of SOME arg => concat[Token.name tok, "(", arg, ")"] :: ts + | NONE => ts + (* end case *)) + else Token.name tok :: ts + in + List.foldr make [] toks + end + (* support for toString function *) + fun mkMat t = (ML_TupPat [tokConPat' t], rawCode (Token.quoted t)) + val casesExp = ML_Case (ML_Var "tok", List.map mkMat toks) + (* support for isKW function *) + fun mkKWMat t = (ML_TupPat [tokConPat' t], + ML_Var (if Token.isKW t then "true" else "false")) + val kwCasesExp = ML_Case (ML_Var "tok", List.map mkKWMat toks) + (* support for the changes function *) +(***** + val changesDT = + " datatype 'strm changes\n\ + \ = CHANGE of 'strm * int * token list * ('strm -> 'strm changes)\n\ + \ | NOCHANGE;\n" + fun genChanges [] = TextIO.output (strm, " fun changes _ _ = NOCHANGE\n") + | genChanges changes = let + val TR{replacements, kids} = mkTrie changes + val noKids = List.null kids + (* map a token to an expression that evaluates to it *) + fun tok2exp tok = if Token.hasTy tok + then (case Token.default tok + of SOME code => ML_App(Token.name tok, [ML_Raw[Tok code]]) + | NONE => raise Fail(concat[ + "token '", Token.name tok, "' does not have a default value" + ]) + (* end case *)) + else ML_Var(Token.name tok) + (* generate the insertion states first *) + fun genInsFns (idx, [], fns) = if noKids + then (idx+1, (s idx, ["_"], ML_Var "NOCHANGE") :: fns) + else (idx+1, fns) + | genInsFns (idx, []::reps, fns) = (* ignore [] -> [] substitution *) + genInsFns (idx, reps, fns) + | genInsFns (idx, new::reps, fns) = let + val f = (s idx, ["strm"], ML_App("CHANGE", [ + ML_Var "strm", ML_Int 0, + ML_List(List.map tok2exp new), + s(idx+1) + ])) + in + genInsFns (idx+1, reps, f::fns) + end + val (idx, fns) = genInsFns (0, replacements, []) + (* function to handle recursive cases *) + fun gen (idx, depth, [], fns) = (idx, fns) + | gen (idx, depth, kids, fns) = let + fun genKid ((tok, tr), (idx, kfns, fns)) = let + val (idx', fns) = genKidFn (idx, depth+1, tr, fns) + in + (idx', (tok, s idx)::kfns, fns) + end + val (idx, kfns, fns) = List.foldl genKid (idx, [], fns) kids + in + (idx, fns) + end + val (idx, fns) = gen (idx, 0, kids, fns) + in + FUN(FUNHEAD("changes", [ML_TypePat(ML_VarPat "getTok", T_FUN())]), + ML_Funs(List.rev fns, ML_Var(s 0))) + end + fun genChanges (idx, depth, TR{replacements, kids}) = let + val noKids = List.null kids + fun s idx = "s" ^ Int.toString idx + + fun gen +*****) + in + prl ["structure ", name, "Tokens"]; + case toksImport + of NONE => ( + pr " =\n"; + pr " struct\n"; + prDT()) + | SOME code => ( + pr " : sig\n"; + prDT(); + List.app pr [ + " val allToks : token list\n", + " val toString : token -> bool\n", + " val isKW : token -> bool\n", + " val isEOF : token -> bool\n", + " end = struct\n" + ]; + prl [" datatype token = datatype ", Action.code code, "\n"]) + (* end case *); + (* "allToks" value *) + prl [ + " val allToks = [\n ", + String.concatWith ", " allToks, "\n ]\n" + ]; + (* "toString" function *) + TextIO.output (strm, " fun toString tok =\n"); + ML.ppML (ppStrm, casesExp); + pr "\n"; + (* "isKW" function *) + TextIO.output (strm, " fun isKW tok =\n"); + ML.ppML (ppStrm, kwCasesExp); + pr "\n"; + (* "isEOF" function *) + pr " fun isEOF EOF = true\n"; + pr " | isEOF _ = false\n"; + (* preferred "changes" support *) + (* TODO *) + prl [" end (* ", name, "Tokens *)\n"] + end + + fun matchfnsHook spec strm = let + val (S.Grammar {toks, ...}, _) = spec + val ppStrm = TextIOPP.openOut {dst = strm, wid = 80} + in + app (ppMatch (strm, ppStrm)) toks + end + + (* output header *) + fun headerHook (grm, _) strm = (case grm + of S.Grammar{header = SOME h, ...} => TextIO.output (strm, h) + | S.Grammar{name, ...} => + TextIO.output (strm, String.concat [ + "functor ", name, "ParseFn (Lex : ANTLR_LEXER)" + ]) + (* end case *)) + + (* output tokens module name *) + fun tokmodHook spec strm = let + val (S.Grammar {name, ...}, _) = spec + in + TextIO.output (strm, name ^ "Tokens") + end + + (* output user definitions *) + fun defsHook spec strm = let + val (S.Grammar {defs, ...}, _) = spec + fun output ss = TextIO.output (strm, String.concat ss) + in + TextIO.output (strm, Action.toString defs) + end + + (* output user actions *) + fun actsHook spec strm = let + val (S.Grammar {prods, refcells, ...}, _) = spec + fun output ss = TextIO.output (strm, String.concat ss) + fun actionLevel (suffix, f, isPred) prod = (case f prod + of SOME code => output [ + "fun ", + actionHeader ( + Prod.fullName prod ^ suffix, + Prod.bindingsAtAction prod, "", isPred, refcells, ""), + " = \n (", Action.toString code, ")", + (case Nonterm.ty (Prod.lhs prod) + of NONE => "" + | SOME ty => " : " ^ ty), + "\n"] + | NONE => ()) + fun args prod (itm as S.ITEM {sym = S.NONTERM (nt, SOME code), ...}) = + output ["fun ", + actionHeader ( + "ARGS_" ^ Action.name code, + Item.bindingsLeftOf (itm, prod), "", true, refcells, ""), + " = \n (", Action.toString code, ")\n"] + | args _ _ = () + fun outCell (S.REFCELL {name, initCode, ty, ...}) = TextIO.output (strm, + String.concat [ + "fun mk", name ^ rcSuffix, "() : (", ty, ") ref = ref (", + Action.toString initCode, ")\n"]) + in + app (actionLevel ("_ACT", Prod.action, false)) prods; + app (actionLevel ("_PRED", Prod.pred, true)) prods; + app (fn prod => app (args prod) (Prod.items prod)) prods; + app outCell refcells + end + + fun ehargsHook spec strm = let + fun out s = TextIO.output (strm, s) + val (S.Grammar {refcells, ...}, _) = spec + val names = map (fn (S.REFCELL {name, ...}) => name) refcells + fun prepend pre s = pre ^ s + fun append post s = s ^ post + fun mkc (S.REFCELL {name, ...}) = String.concat [ + "val ", name, rcSuffix, " = UserCode.mk", name, rcSuffix, "()\n"] + in + app (out o mkc) refcells; + out (String.concat ["fun getS() = {", + String.concatWith ", " + (map (fn nm => nm ^ " = !" ^ nm ^ rcSuffix) + names), + "}\n"]); + out (String.concat ["fun putS{", + String.concatWith ", " names, + "} = (", + String.concatWith + "; " (map (fn nm => nm ^ rcSuffix ^ " := " ^ nm) + names), + ")\n"]); + out (String.concat ["fun unwrap (ret, strm, repairs) = ", + "(ret, strm, repairs", + if List.length names > 0 then ", getS()" else "", + ")\n"]) + end + + fun output (grm, pm, fname) = + ExpandFile.expandTemplate { + src = SMLTemplate.template, + dst = fname ^ ".sml", + hooks = [ + ("parser", parserHook (grm, pm)), + ("token-struct", tokenStructHook (grm, pm)), + ("tokmod", tokmodHook (grm, pm)), + ("header", headerHook (grm, pm)), + ("usrdefs", defsHook (grm, pm)), + ("actions", actsHook (grm, pm)), + ("ehargs", ehargsHook (grm, pm)), + ("matchfns", matchfnsHook (grm, pm)) + ] + } + + end diff --git a/ml-lpt/ml-antlr/BackEnds/SML/smlnj-template.sml b/ml-lpt/ml-antlr/BackEnds/SML/smlnj-template.sml new file mode 100644 index 0000000..106d9e8 --- /dev/null +++ b/ml-lpt/ml-antlr/BackEnds/SML/smlnj-template.sml @@ -0,0 +1,14 @@ +(* smlnj-template.sml + * + * COPYRIGHT (c) 2009 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * In SML/NJ, we load the template files when we elaborate this module. + *) + +structure SMLTemplate = + struct + + val template = ExpandFile.mkTemplateFromFile "BackEnds/SML/template.sml" + + end diff --git a/ml-lpt/ml-antlr/BackEnds/SML/template.sml b/ml-lpt/ml-antlr/BackEnds/SML/template.sml new file mode 100644 index 0000000..0b10e64 --- /dev/null +++ b/ml-lpt/ml-antlr/BackEnds/SML/template.sml @@ -0,0 +1,73 @@ +@token-struct@ + +@header@ + = struct + + local + structure Tok = +@tokmod@ + + structure UserCode = + struct +@usrdefs@ + +@actions@ + end (* UserCode *) + + structure Err = AntlrErrHandler( + structure Tok = Tok + structure Lex = Lex) + +(* replace functor with inline structure for better optimization + structure EBNF = AntlrEBNF( + struct + type strm = Err.wstream + val getSpan = Err.getSpan + end) +*) + structure EBNF = + struct + fun optional (pred, parse, strm) = + if pred strm + then let + val (y, span, strm') = parse strm + in + (SOME y, span, strm') + end + else (NONE, Err.getSpan strm, strm) + + fun closure (pred, parse, strm) = let + fun iter (strm, (left, right), ys) = + if pred strm + then let + val (y, (_, right'), strm') = parse strm + in iter (strm', (left, right'), y::ys) + end + else (List.rev ys, (left, right), strm) + in + iter (strm, Err.getSpan strm, []) + end + + fun posclos (pred, parse, strm) = let + val (y, (left, _), strm') = parse strm + val (ys, (_, right), strm'') = closure (pred, parse, strm') + in + (y::ys, (left, right), strm'') + end + end + + fun mk lexFn = let +@ehargs@ + val (eh, lex) = Err.mkErrHandler {get = getS, put = putS} + fun fail() = Err.failure eh + fun tryProds (strm, prods) = let + fun try [] = fail() + | try (prod :: prods) = + (Err.whileDisabled eh (fn() => prod strm)) + handle Err.ParseError => try (prods) + in try prods end +@matchfns@ + +@parser@ + +end diff --git a/ml-lpt/ml-antlr/BackEnds/back-end-sig.sml b/ml-lpt/ml-antlr/BackEnds/back-end-sig.sml new file mode 100644 index 0000000..9898c6e --- /dev/null +++ b/ml-lpt/ml-antlr/BackEnds/back-end-sig.sml @@ -0,0 +1,6 @@ +signature BACK_END = + sig + + val output : (LLKSpec.grammar * Predict.predict_maps * string) -> unit + + end diff --git a/ml-lpt/ml-antlr/FrontEnds/.cm/GUID/grammar-syntax.sml b/ml-lpt/ml-antlr/FrontEnds/.cm/GUID/grammar-syntax.sml new file mode 100644 index 0000000..661d097 --- /dev/null +++ b/ml-lpt/ml-antlr/FrontEnds/.cm/GUID/grammar-syntax.sml @@ -0,0 +1 @@ +guid-(sources.cm):FrontEnds/grammar-syntax.sml-1714016114.364 diff --git a/ml-lpt/ml-antlr/FrontEnds/.cm/SKEL/grammar-syntax.sml b/ml-lpt/ml-antlr/FrontEnds/.cm/SKEL/grammar-syntax.sml new file mode 100644 index 0000000..02f7bfd --- /dev/null +++ b/ml-lpt/ml-antlr/FrontEnds/.cm/SKEL/grammar-syntax.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f3d"Atom"d"String"d"Err"ad"GrammarSyntax"h0 \ No newline at end of file diff --git a/ml-lpt/ml-antlr/FrontEnds/.cm/amd64-unix/grammar-syntax.sml b/ml-lpt/ml-antlr/FrontEnds/.cm/amd64-unix/grammar-syntax.sml new file mode 100644 index 0000000..eb130cf Binary files /dev/null and b/ml-lpt/ml-antlr/FrontEnds/.cm/amd64-unix/grammar-syntax.sml differ diff --git a/ml-lpt/ml-antlr/FrontEnds/grammar-syntax.sml b/ml-lpt/ml-antlr/FrontEnds/grammar-syntax.sml new file mode 100644 index 0000000..e2a6540 --- /dev/null +++ b/ml-lpt/ml-antlr/FrontEnds/grammar-syntax.sml @@ -0,0 +1,80 @@ +(* grammar-syntax.sml + * + * COPYRIGHT (c) 2009 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Parse tree for grammar input. + *) + +structure GrammarSyntax = + struct + + type span = Err.span + type code = span * String.string + type symbol = Atom.atom + type name = string + type ty = string + type constr = (symbol * ty option * Atom.atom option) + + datatype decl + = NAME of name + | HEADER of code + | START of symbol + | ENTRY of symbol + | KEYWORD of symbol (* %keywords TOKEN ... *) + | VALUE of symbol * code (* %value TOKEN ( ... ) *) + | PREFER of symbol (* %prefer TOKEN ... *) + | CHANGE of (symbol list * symbol list) (* %change TOKEN* -> TOKEN* | ... *) + | DEFS of code + | TOKEN of constr + | TOKENTYPE of ty (* %tokentype monotype *) + | IMPORT of { + filename : string, + dropping : (span * symbol) list + } + | REFCELL of name * ty * code + | RULE of { + lhs : symbol, + formals : name list, + rhs : rhs + } + | NONTERM of symbol * ty + + and rhs = RHS of { + items : (string option * (span * item)) list, + try : bool, + predicate : code option, + action : code option, + loc : span + } + + and item + = SYMBOL of symbol * code option + | SUBRULE of rhs list (* ( ... ) *) + | CLOS of span * item (* ( ... )* *) + | POSCLOS of span * item (* ( ... )+ *) + | OPT of span * item (* ( ... )? *) + + type grammar = (span * decl) list + + local + fun ppDecl (_, NAME n) = "%name" + | ppDecl (_, HEADER _) = "%code" + | ppDecl (_, START s) = "%start" + | ppDecl (_, ENTRY s) = "%entry" + | ppDecl (_, KEYWORD s) = "%keyword" + | ppDecl (_, VALUE _) = "%value" + | ppDecl (_, PREFER _) = "%prefer" + | ppDecl (_, CHANGE _) = "%change" + | ppDecl (_, DEFS c) = "%defs" + | ppDecl (_, TOKENTYPE c) = "%tokentype" + | ppDecl (_, TOKEN cstr) = "%tokens" + | ppDecl (_, NONTERM cstr) = "%nonterm" + | ppDecl (_, IMPORT {filename, dropping}) = "%import" + | ppDecl (_, REFCELL (n, ty, c)) = "%refcell" + | ppDecl (_, RULE {lhs, formals, rhs}) = "-- rule: " ^ (Atom.toString lhs) + in + fun ppGrammar decls = String.concatWith "\n" (map ppDecl decls) + end + + end diff --git a/ml-lpt/ml-antlr/FrontEnds/ml-antlr-based/.cm/GUID/parse-file.sml b/ml-lpt/ml-antlr/FrontEnds/ml-antlr-based/.cm/GUID/parse-file.sml new file mode 100644 index 0000000..b7c65ac --- /dev/null +++ b/ml-lpt/ml-antlr/FrontEnds/ml-antlr-based/.cm/GUID/parse-file.sml @@ -0,0 +1 @@ +guid-(sources.cm):FrontEnds/ml-antlr-based/parse-file.sml-1714016115.057 diff --git a/ml-lpt/ml-antlr/FrontEnds/ml-antlr-based/.cm/GUID/spec.grm.sml b/ml-lpt/ml-antlr/FrontEnds/ml-antlr-based/.cm/GUID/spec.grm.sml new file mode 100644 index 0000000..22a76d6 --- /dev/null +++ b/ml-lpt/ml-antlr/FrontEnds/ml-antlr-based/.cm/GUID/spec.grm.sml @@ -0,0 +1 @@ +guid-(sources.cm):FrontEnds/ml-antlr-based/spec.grm.sml-1714016114.376 diff --git a/ml-lpt/ml-antlr/FrontEnds/ml-antlr-based/.cm/GUID/spec.lex.sml b/ml-lpt/ml-antlr/FrontEnds/ml-antlr-based/.cm/GUID/spec.lex.sml new file mode 100644 index 0000000..3f01bfe --- /dev/null +++ b/ml-lpt/ml-antlr/FrontEnds/ml-antlr-based/.cm/GUID/spec.lex.sml @@ -0,0 +1 @@ +guid-(sources.cm):FrontEnds/ml-antlr-based/spec.lex.sml-1714016114.796 diff --git a/ml-lpt/ml-antlr/FrontEnds/ml-antlr-based/.cm/SKEL/parse-file.sml b/ml-lpt/ml-antlr/FrontEnds/ml-antlr-based/.cm/SKEL/parse-file.sml new file mode 100644 index 0000000..bfa36fd --- /dev/null +++ b/ml-lpt/ml-antlr/FrontEnds/ml-antlr-based/.cm/SKEL/parse-file.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f6d"SpecTokens"Cd"AntlrStreamPos"SpecLex"d"AntlrRepair"d"Err"d"TextIO"Nad"ParseFile"h1ad"P"jgp1#gp1e"SpecParseFn" \ No newline at end of file diff --git a/ml-lpt/ml-antlr/FrontEnds/ml-antlr-based/.cm/SKEL/spec.grm.sml b/ml-lpt/ml-antlr/FrontEnds/ml-antlr-based/.cm/SKEL/spec.grm.sml new file mode 100644 index 0000000..b374e30 --- /dev/null +++ b/ml-lpt/ml-antlr/FrontEnds/ml-antlr-based/.cm/SKEL/spec.grm.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2aSpecTokens"0ae"SpecParseFn"i2aLex"gp1c"ANTLR_LEXER"f7GrammarSyntax"AntlrStreamPos"C$d"List"d"Atom"d"String"d"Substring"Nh1bd4aTok"gp1ad"UserCode"h2ad"GS"gp1=ad"StreamPos"gp1 ad"Err"jh2a gp1 a$gp1$gp1e"AntlrErrHandler"ad"EBNF"f0 \ No newline at end of file diff --git a/ml-lpt/ml-antlr/FrontEnds/ml-antlr-based/.cm/SKEL/spec.lex.sml b/ml-lpt/ml-antlr/FrontEnds/ml-antlr-based/.cm/SKEL/spec.lex.sml new file mode 100644 index 0000000..aff394b --- /dev/null +++ b/ml-lpt/ml-antlr/FrontEnds/ml-antlr-based/.cm/SKEL/spec.lex.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f9ULexBuffer"AntlrStreamPos"d"UTF8"d"Int"CString"d"Err"d"Substring"d"TextIO"Vector"Nad"SpecLex"h1bd2aUserDeclarations"h1aTok"gp1d"SpecTokens"bd2egp1/f7C/d"List"0Nf0f3/ \ No newline at end of file diff --git a/ml-lpt/ml-antlr/FrontEnds/ml-antlr-based/.cm/amd64-unix/parse-file.sml b/ml-lpt/ml-antlr/FrontEnds/ml-antlr-based/.cm/amd64-unix/parse-file.sml new file mode 100644 index 0000000..accd74f Binary files /dev/null and b/ml-lpt/ml-antlr/FrontEnds/ml-antlr-based/.cm/amd64-unix/parse-file.sml differ diff --git a/ml-lpt/ml-antlr/FrontEnds/ml-antlr-based/.cm/amd64-unix/spec.grm.sml b/ml-lpt/ml-antlr/FrontEnds/ml-antlr-based/.cm/amd64-unix/spec.grm.sml new file mode 100644 index 0000000..87fef93 Binary files /dev/null and b/ml-lpt/ml-antlr/FrontEnds/ml-antlr-based/.cm/amd64-unix/spec.grm.sml differ diff --git a/ml-lpt/ml-antlr/FrontEnds/ml-antlr-based/.cm/amd64-unix/spec.lex.sml b/ml-lpt/ml-antlr/FrontEnds/ml-antlr-based/.cm/amd64-unix/spec.lex.sml new file mode 100644 index 0000000..6da20c3 Binary files /dev/null and b/ml-lpt/ml-antlr/FrontEnds/ml-antlr-based/.cm/amd64-unix/spec.lex.sml differ diff --git a/ml-lpt/ml-antlr/FrontEnds/ml-antlr-based/parse-file.sml b/ml-lpt/ml-antlr/FrontEnds/ml-antlr-based/parse-file.sml new file mode 100644 index 0000000..389a129 --- /dev/null +++ b/ml-lpt/ml-antlr/FrontEnds/ml-antlr-based/parse-file.sml @@ -0,0 +1,36 @@ +(* parse-file.sml + * + * COPYRIGHT (c) 2006 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (http://www.cs.uchicago.edu/~adrassi) + * All rights reserved. + * + * Driver for the parser. + *) + +structure ParseFile = + struct + + structure P = SpecParseFn(SpecLex) + + fun parse filename = let + val _ = Err.status ("parsing " ^ filename) + val file = TextIO.openIn filename + val sm = AntlrStreamPos.mkSourcemap' filename + val strm = SpecLex.streamifyInstream file +(* performance testing +fun go 0 = () + | go n = (ignore (P.parse (SpecLex.lex (AntlrStreamPos.mkSourcemap())) (filename, sm) strm); + go (n - 1)) +val _ = go 1000 +*) + val (res, strm', errs, _) = P.parse (SpecLex.lex sm) (filename, sm) strm + fun doErr err = Err.errMsg ["Syntax error ", + AntlrRepair.repairToString SpecTokens.toString sm err] + in + app doErr errs; + TextIO.closeIn file; + res + end + + end \ No newline at end of file diff --git a/ml-lpt/ml-antlr/FrontEnds/ml-antlr-based/spec.grm b/ml-lpt/ml-antlr/FrontEnds/ml-antlr-based/spec.grm new file mode 100644 index 0000000..b948770 --- /dev/null +++ b/ml-lpt/ml-antlr/FrontEnds/ml-antlr-based/spec.grm @@ -0,0 +1,231 @@ +(* spec.grm + * + * COPYRIGHT (c) 2009 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * ML-Antlr grammar for ML-Antlr. + *) + +%defs ( + structure GS = GrammarSyntax + structure StreamPos = AntlrStreamPos + + fun lift f (vspan, v) = (vspan, f v) + fun mapFst f (fst, snd) = (f fst, snd) + + val trimQuotes = + Substring.string o + (Substring.triml 1) o + (Substring.trimr 1) o + Substring.full + +); + +%name Spec; + +%tokens + : KW_change ("%change") + | KW_defs ("%defs") + | KW_dropping ("%dropping") + | KW_entry ("%entry") + | KW_header ("%header") + | KW_import ("%import") + | KW_keywords ("%keywords") + | KW_name ("%name") + | KW_nonterms ("%nonterms") + | KW_prefer ("%prefer") + | KW_refcell ("%refcell") + | KW_start ("%start") + | KW_tokens ("%tokens") + | KW_tokentype ("%tokentype") + | KW_try ("%try") + | KW_value ("%value") + | KW_where ("%where") + | LP ("(") + | RP (")") + | LSB ("[") + | RSB ("]") + | LCB ("{") + | RCB ("}") + | SLASH ("/") + | COLON (":") + | SEMI (";") + | COMMA (",") + | AT ("@") + | DOLLAR ("$") + | BAR ("|") + | STAR ("*") + | PLUS ("+") + | QUERY ("?") + | EQ ("=") + | ARROW ("->") + | DARROW ("=>") + | REFSET (":=") + | OF ("of") + | CODE of string + | ID of string + | STRING of string + | IDDOT of string (* identifier with dot (e.g., "module.") *) + | TYVAR of string (* ML-style type variable name *) + | INT of string (* integer label within record *) + | BOGUS + ; + +%refcell liftSpan : AntlrStreamPos.span -> GrammarSyntax.span + = (fn _ => let val z = ("", 0, 0) in (z, z) end); + +%nonterm File of GS.grammar ; +File(fileName, sm) + : ( => (liftSpan := (fn (p1, p2) => ((fileName, StreamPos.lineNo sm p1, + StreamPos.colNo sm p1), + (fileName, StreamPos.lineNo sm p2, + StreamPos.colNo sm p2))) + ) ) + (Decl ";")* => (map (mapFst (!liftSpan)) (List.concat SR2)) + ; + +%nonterm Decl of (StreamPos.span * GS.decl) list ; +Decl + : "%name" ID + => ([ (FULL_SPAN, GS.NAME ID) ]) + | "%header" Code + => ([ (FULL_SPAN, GS.HEADER Code) ]) + | "%start" ID + => ([ (FULL_SPAN, GS.START (Atom.atom ID)) ]) + | "%entry" IDList + => (map (lift (GS.ENTRY o Atom.atom)) IDList) + | "%keywords" SymList + => (map (lift GS.KEYWORD) SymList) + | "%value" ID Code + => ([ (FULL_SPAN, GS.VALUE (Atom.atom ID, Code)) ]) + | "%prefer" SymList + => (map (lift GS.PREFER) SymList) + | "%change" ChangeList + => (map (lift GS.CHANGE) ChangeList) + | "%defs" Code + => ([ (FULL_SPAN, GS.DEFS Code) ]) + | "%tokentype" QualID + => ([ (FULL_SPAN, GS.TOKENTYPE QualID)]) + | "%tokens" ":"? ConstrList + => (List.map (lift GS.TOKEN) ConstrList) + | "%import" STRING ("%dropping" SymList)? + => ([ (FULL_SPAN, GS.IMPORT{ + filename = trimQuotes STRING, + dropping = map (mapFst (!liftSpan)) (getOpt (SR, [])) + }) ]) + | "%refcell" ID ":" Ty "=" Code + => ([ (FULL_SPAN, GS.REFCELL (ID, Ty, Code)) ]) + | "%nonterms" ":"? TyAnn ("|" TyAnn)* + => (map (lift GS.NONTERM) (TyAnn::SR)) + | ID Formals? ":" AltList + => (map (fn (span, alt) => + (span, GS.RULE { + lhs = Atom.atom ID, + formals = getOpt(Formals, []), + rhs = alt + })) AltList) + ; + +Formals : "(" ID ("," ID)* ")" => (ID::SR) ; +IDList : ID' ("," ID')* => (ID'::SR) ; +SymList : Symbol' ("," Symbol')* => (Symbol'::SR) ; + +ChangeList + : Change ("|" Change)* + => (Change::SR) + ; + +Change + : Symbol* "->" Symbol* + => (FULL_SPAN, (Symbol1, Symbol2)) + ; + +AltList : Alt ("|" Alt)* => (Alt::SR) ; +TyAnn : ID ("of" Ty | => ("unit")) => (FULL_SPAN, (Atom.atom ID, SR)) ; + +Alt : "%try"? NamedItem* ("%where" Code)? ("=>" Code)? + => (FULL_SPAN, GS.RHS { + items = NamedItem, + try = isSome KW_try, + predicate = SR1, + action = SR2, + loc = (!liftSpan) FULL_SPAN + }) + ; + +NamedItem + : (ID "=")? Item => (SR, Item) ; + +Item + : PrimItem + ( "*" => ((!liftSpan) PrimItem_SPAN, GS.CLOS PrimItem) + | "+" => ((!liftSpan) PrimItem_SPAN, GS.POSCLOS PrimItem) + | "?" => ((!liftSpan) PrimItem_SPAN, GS.OPT PrimItem) + | => (PrimItem) ) + => (SR) + ; + +PrimItem + : Symbol ("@" Code)? + => ((!liftSpan) FULL_SPAN, GS.SYMBOL (Symbol, SR)) + | "(" AltList ")" + => ((!liftSpan) FULL_SPAN, GS.SUBRULE (map (fn (_, alt) => alt) AltList)) + ; + +ID' : ID => (ID_SPAN, ID) ; + +Symbol' + : Symbol + => (FULL_SPAN, Symbol) + ; + +Symbol + : ID + => (Atom.atom ID) + | STRING + => (Atom.atom STRING) + ; + +Constr : ID ("of" Ty)? Abbrev? => (FULL_SPAN, (Atom.atom ID, SR, Abbrev)) ; +ConstrList + : Constr ("|" Constr)* => (Constr::SR) + ; + +Ty : TyFun + ; + +TyFun + : TyProd ("->" TyProd)* => (String.concatWith " -> " (TyProd::SR)) ; + +TyProd + : TyApp ("*" TyApp)* => (String.concatWith " * " (TyApp::SR)) ; + +TyApp + : %try "(" Ty ("," Ty)+ ")" QualID + => ("(" ^ String.concatWith "," (Ty::SR) ^ ") " ^ QualID) + | %try TyAtom ( QualID => (TyAtom ^ " " ^ QualID) | => (TyAtom)) + => (SR) + ; + +TyAtom + : TYVAR + | "(" Ty ")" => ("(" ^ Ty ^ ")") + | "{" RowList "}" => ("{ " ^ RowList ^" } ") + | "{" "}" => ("{}") + | QualID + ; + +Row : Label ":" Ty => (Label ^ " : " ^ Ty) ; +RowList : Row ("," Row)* => (String.concatWith "," (Row::SR)) ; + +Label : ID | INT ; + +QualID + : ID + | IDDOT QualID => (IDDOT ^ QualID) + ; + +Abbrev : "(" STRING ")" => (Atom.atom STRING) ; + +%nonterm Code of GrammarSyntax.code ; +Code : CODE => ((!liftSpan) FULL_SPAN, CODE) ; diff --git a/ml-lpt/ml-antlr/FrontEnds/ml-antlr-based/spec.grm.sml b/ml-lpt/ml-antlr/FrontEnds/ml-antlr-based/spec.grm.sml new file mode 100644 index 0000000..4bc1c67 --- /dev/null +++ b/ml-lpt/ml-antlr/FrontEnds/ml-antlr-based/spec.grm.sml @@ -0,0 +1,1493 @@ +structure +SpecTokens = struct + + datatype token = EOF + | BOGUS + | INT of string + | TYVAR of string + | IDDOT of string + | STRING of string + | ID of string + | CODE of string + | OF + | REFSET + | DARROW + | ARROW + | EQ + | QUERY + | PLUS + | STAR + | BAR + | DOLLAR + | AT + | COMMA + | SEMI + | COLON + | SLASH + | RCB + | LCB + | RSB + | LSB + | RP + | LP + | KW_where + | KW_value + | KW_try + | KW_tokentype + | KW_tokens + | KW_start + | KW_refcell + | KW_prefer + | KW_nonterms + | KW_name + | KW_keywords + | KW_import + | KW_header + | KW_entry + | KW_dropping + | KW_defs + | KW_change + + val allToks = [EOF, BOGUS, OF, REFSET, DARROW, ARROW, EQ, QUERY, PLUS, STAR, BAR, DOLLAR, AT, COMMA, SEMI, COLON, SLASH, RCB, LCB, RSB, LSB, RP, LP, KW_where, KW_value, KW_try, KW_tokentype, KW_tokens, KW_start, KW_refcell, KW_prefer, KW_nonterms, KW_name, KW_keywords, KW_import, KW_header, KW_entry, KW_dropping, KW_defs, KW_change] + + fun toString tok = +(case (tok) + of (EOF) => "EOF" + | (BOGUS) => "BOGUS" + | (INT(_)) => "INT" + | (TYVAR(_)) => "TYVAR" + | (IDDOT(_)) => "IDDOT" + | (STRING(_)) => "STRING" + | (ID(_)) => "ID" + | (CODE(_)) => "CODE" + | (OF) => "of" + | (REFSET) => ":=" + | (DARROW) => "=>" + | (ARROW) => "->" + | (EQ) => "=" + | (QUERY) => "?" + | (PLUS) => "+" + | (STAR) => "*" + | (BAR) => "|" + | (DOLLAR) => "$" + | (AT) => "@" + | (COMMA) => "," + | (SEMI) => ";" + | (COLON) => ":" + | (SLASH) => "/" + | (RCB) => "}" + | (LCB) => "{" + | (RSB) => "]" + | (LSB) => "[" + | (RP) => ")" + | (LP) => "(" + | (KW_where) => "%where" + | (KW_value) => "%value" + | (KW_try) => "%try" + | (KW_tokentype) => "%tokentype" + | (KW_tokens) => "%tokens" + | (KW_start) => "%start" + | (KW_refcell) => "%refcell" + | (KW_prefer) => "%prefer" + | (KW_nonterms) => "%nonterms" + | (KW_name) => "%name" + | (KW_keywords) => "%keywords" + | (KW_import) => "%import" + | (KW_header) => "%header" + | (KW_entry) => "%entry" + | (KW_dropping) => "%dropping" + | (KW_defs) => "%defs" + | (KW_change) => "%change" +(* end case *)) + fun isKW tok = +(case (tok) + of (EOF) => false + | (BOGUS) => false + | (INT(_)) => false + | (TYVAR(_)) => false + | (IDDOT(_)) => false + | (STRING(_)) => false + | (ID(_)) => false + | (CODE(_)) => false + | (OF) => false + | (REFSET) => false + | (DARROW) => false + | (ARROW) => false + | (EQ) => false + | (QUERY) => false + | (PLUS) => false + | (STAR) => false + | (BAR) => false + | (DOLLAR) => false + | (AT) => false + | (COMMA) => false + | (SEMI) => false + | (COLON) => false + | (SLASH) => false + | (RCB) => false + | (LCB) => false + | (RSB) => false + | (LSB) => false + | (RP) => false + | (LP) => false + | (KW_where) => false + | (KW_value) => false + | (KW_try) => false + | (KW_tokentype) => false + | (KW_tokens) => false + | (KW_start) => false + | (KW_refcell) => false + | (KW_prefer) => false + | (KW_nonterms) => false + | (KW_name) => false + | (KW_keywords) => false + | (KW_import) => false + | (KW_header) => false + | (KW_entry) => false + | (KW_dropping) => false + | (KW_defs) => false + | (KW_change) => false +(* end case *)) + + fun isEOF EOF = true + | isEOF _ = false + +end + +functor SpecParseFn(Lex : ANTLR_LEXER) = struct + + local + structure Tok = +SpecTokens + structure UserCode = + struct + + structure GS = GrammarSyntax + structure StreamPos = AntlrStreamPos + + fun lift f (vspan, v) = (vspan, f v) + fun mapFst f (fst, snd) = (f fst, snd) + + val trimQuotes = + Substring.string o + (Substring.triml 1) o + (Substring.trimr 1) o + Substring.full + + +fun File_PROD_1_SUBRULE_1_PROD_1_ACT (sm, fileName, FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + (liftSpan := (fn (p1, p2) => ((fileName, StreamPos.lineNo sm p1, + StreamPos.colNo sm p1), + (fileName, StreamPos.lineNo sm p2, + StreamPos.colNo sm p2))) + ) +fun File_PROD_1_ACT (sm, SR1, SR2, fileName, SR1_SPAN : (Lex.pos * Lex.pos), SR2_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + (map (mapFst (!liftSpan)) (List.concat SR2)) : GS.grammar +fun Decl_PROD_1_ACT (ID, KW_name, ID_SPAN : (Lex.pos * Lex.pos), KW_name_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + ([ (FULL_SPAN, GS.NAME ID) ]) : (StreamPos.span * GS.decl) list +fun Decl_PROD_2_ACT (Code, KW_header, Code_SPAN : (Lex.pos * Lex.pos), KW_header_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + ([ (FULL_SPAN, GS.HEADER Code) ]) : (StreamPos.span * GS.decl) list +fun Decl_PROD_3_ACT (ID, KW_start, ID_SPAN : (Lex.pos * Lex.pos), KW_start_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + ([ (FULL_SPAN, GS.START (Atom.atom ID)) ]) : (StreamPos.span * GS.decl) list +fun Decl_PROD_4_ACT (KW_entry, IDList, KW_entry_SPAN : (Lex.pos * Lex.pos), IDList_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + (map (lift (GS.ENTRY o Atom.atom)) IDList) : (StreamPos.span * GS.decl) list +fun Decl_PROD_5_ACT (KW_keywords, SymList, KW_keywords_SPAN : (Lex.pos * Lex.pos), SymList_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + (map (lift GS.KEYWORD) SymList) : (StreamPos.span * GS.decl) list +fun Decl_PROD_6_ACT (ID, Code, KW_value, ID_SPAN : (Lex.pos * Lex.pos), Code_SPAN : (Lex.pos * Lex.pos), KW_value_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + ([ (FULL_SPAN, GS.VALUE (Atom.atom ID, Code)) ]) : (StreamPos.span * GS.decl) list +fun Decl_PROD_7_ACT (SymList, KW_prefer, SymList_SPAN : (Lex.pos * Lex.pos), KW_prefer_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + (map (lift GS.PREFER) SymList) : (StreamPos.span * GS.decl) list +fun Decl_PROD_8_ACT (ChangeList, KW_change, ChangeList_SPAN : (Lex.pos * Lex.pos), KW_change_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + (map (lift GS.CHANGE) ChangeList) : (StreamPos.span * GS.decl) list +fun Decl_PROD_9_ACT (Code, KW_defs, Code_SPAN : (Lex.pos * Lex.pos), KW_defs_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + ([ (FULL_SPAN, GS.DEFS Code) ]) : (StreamPos.span * GS.decl) list +fun Decl_PROD_10_ACT (QualID, KW_tokentype, QualID_SPAN : (Lex.pos * Lex.pos), KW_tokentype_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + ([ (FULL_SPAN, GS.TOKENTYPE QualID)]) : (StreamPos.span * GS.decl) list +fun Decl_PROD_11_ACT (COLON, KW_tokens, ConstrList, COLON_SPAN : (Lex.pos * Lex.pos), KW_tokens_SPAN : (Lex.pos * Lex.pos), ConstrList_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + (List.map (lift GS.TOKEN) ConstrList) : (StreamPos.span * GS.decl) list +fun Decl_PROD_12_ACT (SR, STRING, KW_import, SR_SPAN : (Lex.pos * Lex.pos), STRING_SPAN : (Lex.pos * Lex.pos), KW_import_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + ([ (FULL_SPAN, GS.IMPORT{ + filename = trimQuotes STRING, + dropping = map (mapFst (!liftSpan)) (getOpt (SR, [])) + }) ]) : (StreamPos.span * GS.decl) list +fun Decl_PROD_13_ACT (EQ, ID, Ty, Code, COLON, KW_refcell, EQ_SPAN : (Lex.pos * Lex.pos), ID_SPAN : (Lex.pos * Lex.pos), Ty_SPAN : (Lex.pos * Lex.pos), Code_SPAN : (Lex.pos * Lex.pos), COLON_SPAN : (Lex.pos * Lex.pos), KW_refcell_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + ([ (FULL_SPAN, GS.REFCELL (ID, Ty, Code)) ]) : (StreamPos.span * GS.decl) list +fun Decl_PROD_14_ACT (SR, COLON, TyAnn, KW_nonterms, SR_SPAN : (Lex.pos * Lex.pos), COLON_SPAN : (Lex.pos * Lex.pos), TyAnn_SPAN : (Lex.pos * Lex.pos), KW_nonterms_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + (map (lift GS.NONTERM) (TyAnn::SR)) : (StreamPos.span * GS.decl) list +fun Decl_PROD_15_ACT (ID, COLON, AltList, Formals, ID_SPAN : (Lex.pos * Lex.pos), COLON_SPAN : (Lex.pos * Lex.pos), AltList_SPAN : (Lex.pos * Lex.pos), Formals_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + (map (fn (span, alt) => + (span, GS.RULE { + lhs = Atom.atom ID, + formals = getOpt(Formals, []), + rhs = alt + })) AltList) : (StreamPos.span * GS.decl) list +fun Formals_PROD_1_ACT (ID, LP, RP, SR, ID_SPAN : (Lex.pos * Lex.pos), LP_SPAN : (Lex.pos * Lex.pos), RP_SPAN : (Lex.pos * Lex.pos), SR_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + (ID::SR) +fun IDList_PROD_1_ACT (SR, ID', SR_SPAN : (Lex.pos * Lex.pos), ID'_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + (ID'::SR) +fun SymList_PROD_1_ACT (SR, Symbol', SR_SPAN : (Lex.pos * Lex.pos), Symbol'_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + (Symbol'::SR) +fun ChangeList_PROD_1_ACT (SR, Change, SR_SPAN : (Lex.pos * Lex.pos), Change_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + (Change::SR) +fun Change_PROD_1_ACT (ARROW, Symbol1, Symbol2, ARROW_SPAN : (Lex.pos * Lex.pos), Symbol1_SPAN : (Lex.pos * Lex.pos), Symbol2_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + (FULL_SPAN, (Symbol1, Symbol2)) +fun AltList_PROD_1_ACT (SR, Alt, SR_SPAN : (Lex.pos * Lex.pos), Alt_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + (Alt::SR) +fun TyAnn_PROD_1_SUBRULE_1_PROD_2_ACT (ID, ID_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + ("unit") +fun TyAnn_PROD_1_ACT (ID, SR, ID_SPAN : (Lex.pos * Lex.pos), SR_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + (FULL_SPAN, (Atom.atom ID, SR)) +fun Alt_PROD_1_ACT (SR1, SR2, NamedItem, KW_try, SR1_SPAN : (Lex.pos * Lex.pos), SR2_SPAN : (Lex.pos * Lex.pos), NamedItem_SPAN : (Lex.pos * Lex.pos), KW_try_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + (FULL_SPAN, GS.RHS { + items = NamedItem, + try = isSome KW_try, + predicate = SR1, + action = SR2, + loc = (!liftSpan) FULL_SPAN + }) +fun NamedItem_PROD_1_ACT (SR, Item, SR_SPAN : (Lex.pos * Lex.pos), Item_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + (SR, Item) +fun Item_PROD_1_SUBRULE_1_PROD_1_ACT (STAR, PrimItem, STAR_SPAN : (Lex.pos * Lex.pos), PrimItem_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + ((!liftSpan) PrimItem_SPAN, GS.CLOS PrimItem) +fun Item_PROD_1_SUBRULE_1_PROD_2_ACT (PLUS, PrimItem, PLUS_SPAN : (Lex.pos * Lex.pos), PrimItem_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + ((!liftSpan) PrimItem_SPAN, GS.POSCLOS PrimItem) +fun Item_PROD_1_SUBRULE_1_PROD_3_ACT (QUERY, PrimItem, QUERY_SPAN : (Lex.pos * Lex.pos), PrimItem_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + ((!liftSpan) PrimItem_SPAN, GS.OPT PrimItem) +fun Item_PROD_1_SUBRULE_1_PROD_4_ACT (PrimItem, PrimItem_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + (PrimItem) +fun Item_PROD_1_ACT (SR, PrimItem, SR_SPAN : (Lex.pos * Lex.pos), PrimItem_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + (SR) +fun PrimItem_PROD_1_ACT (SR, Symbol, SR_SPAN : (Lex.pos * Lex.pos), Symbol_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + ((!liftSpan) FULL_SPAN, GS.SYMBOL (Symbol, SR)) +fun PrimItem_PROD_2_ACT (LP, RP, AltList, LP_SPAN : (Lex.pos * Lex.pos), RP_SPAN : (Lex.pos * Lex.pos), AltList_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + ((!liftSpan) FULL_SPAN, GS.SUBRULE (map (fn (_, alt) => alt) AltList)) +fun ID'_PROD_1_ACT (ID, ID_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + (ID_SPAN, ID) +fun Symbol'_PROD_1_ACT (Symbol, Symbol_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + (FULL_SPAN, Symbol) +fun Symbol_PROD_1_ACT (ID, ID_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + (Atom.atom ID) +fun Symbol_PROD_2_ACT (STRING, STRING_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + (Atom.atom STRING) +fun Constr_PROD_1_ACT (ID, SR, Abbrev, ID_SPAN : (Lex.pos * Lex.pos), SR_SPAN : (Lex.pos * Lex.pos), Abbrev_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + (FULL_SPAN, (Atom.atom ID, SR, Abbrev)) +fun ConstrList_PROD_1_ACT (SR, Constr, SR_SPAN : (Lex.pos * Lex.pos), Constr_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + (Constr::SR) +fun TyFun_PROD_1_ACT (SR, TyProd, SR_SPAN : (Lex.pos * Lex.pos), TyProd_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + (String.concatWith " -> " (TyProd::SR)) +fun TyProd_PROD_1_ACT (SR, TyApp, SR_SPAN : (Lex.pos * Lex.pos), TyApp_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + (String.concatWith " * " (TyApp::SR)) +fun TyApp_PROD_1_ACT (LP, RP, SR, Ty, QualID, LP_SPAN : (Lex.pos * Lex.pos), RP_SPAN : (Lex.pos * Lex.pos), SR_SPAN : (Lex.pos * Lex.pos), Ty_SPAN : (Lex.pos * Lex.pos), QualID_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + ("(" ^ String.concatWith "," (Ty::SR) ^ ") " ^ QualID) +fun TyApp_PROD_2_SUBRULE_1_PROD_1_ACT (QualID, TyAtom, QualID_SPAN : (Lex.pos * Lex.pos), TyAtom_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + (TyAtom ^ " " ^ QualID) +fun TyApp_PROD_2_SUBRULE_1_PROD_2_ACT (TyAtom, TyAtom_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + (TyAtom) +fun TyApp_PROD_2_ACT (SR, TyAtom, SR_SPAN : (Lex.pos * Lex.pos), TyAtom_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + (SR) +fun TyAtom_PROD_2_ACT (LP, RP, Ty, LP_SPAN : (Lex.pos * Lex.pos), RP_SPAN : (Lex.pos * Lex.pos), Ty_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + ("(" ^ Ty ^ ")") +fun TyAtom_PROD_3_ACT (LCB, RCB, RowList, LCB_SPAN : (Lex.pos * Lex.pos), RCB_SPAN : (Lex.pos * Lex.pos), RowList_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + ("{ " ^ RowList ^" } ") +fun TyAtom_PROD_4_ACT (LCB, RCB, LCB_SPAN : (Lex.pos * Lex.pos), RCB_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + ("{}") +fun Row_PROD_1_ACT (Ty, COLON, Label, Ty_SPAN : (Lex.pos * Lex.pos), COLON_SPAN : (Lex.pos * Lex.pos), Label_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + (Label ^ " : " ^ Ty) +fun RowList_PROD_1_ACT (SR, Row, SR_SPAN : (Lex.pos * Lex.pos), Row_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + (String.concatWith "," (Row::SR)) +fun QualID_PROD_2_ACT (QualID, IDDOT, QualID_SPAN : (Lex.pos * Lex.pos), IDDOT_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + (IDDOT ^ QualID) +fun Abbrev_PROD_1_ACT (LP, RP, STRING, LP_SPAN : (Lex.pos * Lex.pos), RP_SPAN : (Lex.pos * Lex.pos), STRING_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + (Atom.atom STRING) +fun Code_PROD_1_ACT (CODE, CODE_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan) = + ((!liftSpan) FULL_SPAN, CODE) : GrammarSyntax.code +fun mkliftSpan_REFC() : (AntlrStreamPos.span -> GrammarSyntax.span) ref = ref (fn _ => let val z = ("", 0, 0) in (z, z) end) + end (* UserCode *) + + structure Err = AntlrErrHandler( + structure Tok = Tok + structure Lex = Lex) + +(* replace functor with inline structure for better optimization + structure EBNF = AntlrEBNF( + struct + type strm = Err.wstream + val getSpan = Err.getSpan + end) +*) + structure EBNF = + struct + fun optional (pred, parse, strm) = + if pred strm + then let + val (y, span, strm') = parse strm + in + (SOME y, span, strm') + end + else (NONE, Err.getSpan strm, strm) + + fun closure (pred, parse, strm) = let + fun iter (strm, (left, right), ys) = + if pred strm + then let + val (y, (_, right'), strm') = parse strm + in iter (strm', (left, right'), y::ys) + end + else (List.rev ys, (left, right), strm) + in + iter (strm, Err.getSpan strm, []) + end + + fun posclos (pred, parse, strm) = let + val (y, (left, _), strm') = parse strm + val (ys, (_, right), strm'') = closure (pred, parse, strm') + in + (y::ys, (left, right), strm'') + end + end + + fun mk lexFn = let +val liftSpan_REFC = UserCode.mkliftSpan_REFC() +fun getS() = {liftSpan = !liftSpan_REFC} +fun putS{liftSpan} = (liftSpan_REFC := liftSpan) +fun unwrap (ret, strm, repairs) = (ret, strm, repairs, getS()) + val (eh, lex) = Err.mkErrHandler {get = getS, put = putS} + fun fail() = Err.failure eh + fun tryProds (strm, prods) = let + fun try [] = fail() + | try (prod :: prods) = + (Err.whileDisabled eh (fn() => prod strm)) + handle Err.ParseError => try (prods) + in try prods end +fun matchEOF strm = (case (lex(strm)) + of (Tok.EOF, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchBOGUS strm = (case (lex(strm)) + of (Tok.BOGUS, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchINT strm = (case (lex(strm)) + of (Tok.INT(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchTYVAR strm = (case (lex(strm)) + of (Tok.TYVAR(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchIDDOT strm = (case (lex(strm)) + of (Tok.IDDOT(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchSTRING strm = (case (lex(strm)) + of (Tok.STRING(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchID strm = (case (lex(strm)) + of (Tok.ID(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchCODE strm = (case (lex(strm)) + of (Tok.CODE(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchOF strm = (case (lex(strm)) + of (Tok.OF, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchREFSET strm = (case (lex(strm)) + of (Tok.REFSET, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchDARROW strm = (case (lex(strm)) + of (Tok.DARROW, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchARROW strm = (case (lex(strm)) + of (Tok.ARROW, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchEQ strm = (case (lex(strm)) + of (Tok.EQ, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchQUERY strm = (case (lex(strm)) + of (Tok.QUERY, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchPLUS strm = (case (lex(strm)) + of (Tok.PLUS, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTAR strm = (case (lex(strm)) + of (Tok.STAR, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchBAR strm = (case (lex(strm)) + of (Tok.BAR, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchDOLLAR strm = (case (lex(strm)) + of (Tok.DOLLAR, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchAT strm = (case (lex(strm)) + of (Tok.AT, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchCOMMA strm = (case (lex(strm)) + of (Tok.COMMA, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSEMI strm = (case (lex(strm)) + of (Tok.SEMI, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchCOLON strm = (case (lex(strm)) + of (Tok.COLON, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSLASH strm = (case (lex(strm)) + of (Tok.SLASH, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchRCB strm = (case (lex(strm)) + of (Tok.RCB, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchLCB strm = (case (lex(strm)) + of (Tok.LCB, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchRSB strm = (case (lex(strm)) + of (Tok.RSB, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchLSB strm = (case (lex(strm)) + of (Tok.LSB, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchRP strm = (case (lex(strm)) + of (Tok.RP, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchLP strm = (case (lex(strm)) + of (Tok.LP, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchKW_where strm = (case (lex(strm)) + of (Tok.KW_where, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchKW_value strm = (case (lex(strm)) + of (Tok.KW_value, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchKW_try strm = (case (lex(strm)) + of (Tok.KW_try, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchKW_tokentype strm = (case (lex(strm)) + of (Tok.KW_tokentype, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchKW_tokens strm = (case (lex(strm)) + of (Tok.KW_tokens, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchKW_start strm = (case (lex(strm)) + of (Tok.KW_start, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchKW_refcell strm = (case (lex(strm)) + of (Tok.KW_refcell, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchKW_prefer strm = (case (lex(strm)) + of (Tok.KW_prefer, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchKW_nonterms strm = (case (lex(strm)) + of (Tok.KW_nonterms, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchKW_name strm = (case (lex(strm)) + of (Tok.KW_name, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchKW_keywords strm = (case (lex(strm)) + of (Tok.KW_keywords, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchKW_import strm = (case (lex(strm)) + of (Tok.KW_import, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchKW_header strm = (case (lex(strm)) + of (Tok.KW_header, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchKW_entry strm = (case (lex(strm)) + of (Tok.KW_entry, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchKW_dropping strm = (case (lex(strm)) + of (Tok.KW_dropping, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchKW_defs strm = (case (lex(strm)) + of (Tok.KW_defs, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchKW_change strm = (case (lex(strm)) + of (Tok.KW_change, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) + +val (File_NT) = +let +fun Code_NT (strm) = let + val (CODE_RES, CODE_SPAN, strm') = matchCODE(strm) + val FULL_SPAN = (#1(CODE_SPAN), #2(CODE_SPAN)) + in + (UserCode.Code_PROD_1_ACT (CODE_RES, CODE_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end +fun Symbol_NT (strm) = let + fun Symbol_PROD_1 (strm) = let + val (ID_RES, ID_SPAN, strm') = matchID(strm) + val FULL_SPAN = (#1(ID_SPAN), #2(ID_SPAN)) + in + (UserCode.Symbol_PROD_1_ACT (ID_RES, ID_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end + fun Symbol_PROD_2 (strm) = let + val (STRING_RES, STRING_SPAN, strm') = matchSTRING(strm) + val FULL_SPAN = (#1(STRING_SPAN), #2(STRING_SPAN)) + in + (UserCode.Symbol_PROD_2_ACT (STRING_RES, STRING_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.STRING(_), _, strm') => Symbol_PROD_2(strm) + | (Tok.ID(_), _, strm') => Symbol_PROD_1(strm) + | _ => fail() + (* end case *)) + end +fun AltList_NT (strm) = let + val (Alt_RES, Alt_SPAN, strm') = Alt_NT(strm) + fun AltList_PROD_1_SUBRULE_1_NT (strm) = let + val (BAR_RES, BAR_SPAN, strm') = matchBAR(strm) + val (Alt_RES, Alt_SPAN, strm') = Alt_NT(strm') + val FULL_SPAN = (#1(BAR_SPAN), #2(Alt_SPAN)) + in + ((Alt_RES), FULL_SPAN, strm') + end + fun AltList_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.BAR, _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.closure(AltList_PROD_1_SUBRULE_1_PRED, AltList_PROD_1_SUBRULE_1_NT, strm') + val FULL_SPAN = (#1(Alt_SPAN), #2(SR_SPAN)) + in + (UserCode.AltList_PROD_1_ACT (SR_RES, Alt_RES, SR_SPAN : (Lex.pos * Lex.pos), Alt_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end +and Alt_NT (strm) = let + fun Alt_PROD_1_SUBRULE_1_NT (strm) = let + val (KW_try_RES, KW_try_SPAN, strm') = matchKW_try(strm) + val FULL_SPAN = (#1(KW_try_SPAN), #2(KW_try_SPAN)) + in + ((), FULL_SPAN, strm') + end + fun Alt_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.KW_try, _, strm') => true + | _ => false + (* end case *)) + val (KW_try_RES, KW_try_SPAN, strm') = EBNF.optional(Alt_PROD_1_SUBRULE_1_PRED, Alt_PROD_1_SUBRULE_1_NT, strm) + fun Alt_PROD_1_SUBRULE_2_NT (strm) = let + val (NamedItem_RES, NamedItem_SPAN, strm') = NamedItem_NT(strm) + val FULL_SPAN = (#1(NamedItem_SPAN), #2(NamedItem_SPAN)) + in + ((NamedItem_RES), FULL_SPAN, strm') + end + fun Alt_PROD_1_SUBRULE_2_PRED (strm) = (case (lex(strm)) + of (Tok.LP, _, strm') => true + | (Tok.ID(_), _, strm') => true + | (Tok.STRING(_), _, strm') => true + | _ => false + (* end case *)) + val (NamedItem_RES, NamedItem_SPAN, strm') = EBNF.closure(Alt_PROD_1_SUBRULE_2_PRED, Alt_PROD_1_SUBRULE_2_NT, strm') + fun Alt_PROD_1_SUBRULE_3_NT (strm) = let + val (KW_where_RES, KW_where_SPAN, strm') = matchKW_where(strm) + val (Code_RES, Code_SPAN, strm') = Code_NT(strm') + val FULL_SPAN = (#1(KW_where_SPAN), #2(Code_SPAN)) + in + ((Code_RES), FULL_SPAN, strm') + end + fun Alt_PROD_1_SUBRULE_3_PRED (strm) = (case (lex(strm)) + of (Tok.KW_where, _, strm') => true + | _ => false + (* end case *)) + val (SR1_RES, SR1_SPAN, strm') = EBNF.optional(Alt_PROD_1_SUBRULE_3_PRED, Alt_PROD_1_SUBRULE_3_NT, strm') + fun Alt_PROD_1_SUBRULE_4_NT (strm) = let + val (DARROW_RES, DARROW_SPAN, strm') = matchDARROW(strm) + val (Code_RES, Code_SPAN, strm') = Code_NT(strm') + val FULL_SPAN = (#1(DARROW_SPAN), #2(Code_SPAN)) + in + ((Code_RES), FULL_SPAN, strm') + end + fun Alt_PROD_1_SUBRULE_4_PRED (strm) = (case (lex(strm)) + of (Tok.DARROW, _, strm') => true + | _ => false + (* end case *)) + val (SR2_RES, SR2_SPAN, strm') = EBNF.optional(Alt_PROD_1_SUBRULE_4_PRED, Alt_PROD_1_SUBRULE_4_NT, strm') + val FULL_SPAN = (#1(KW_try_SPAN), #2(SR2_SPAN)) + in + (UserCode.Alt_PROD_1_ACT (SR1_RES, SR2_RES, NamedItem_RES, KW_try_RES, SR1_SPAN : (Lex.pos * Lex.pos), SR2_SPAN : (Lex.pos * Lex.pos), NamedItem_SPAN : (Lex.pos * Lex.pos), KW_try_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end +and NamedItem_NT (strm) = let + fun NamedItem_PROD_1_SUBRULE_1_NT (strm) = let + val (ID_RES, ID_SPAN, strm') = matchID(strm) + val (EQ_RES, EQ_SPAN, strm') = matchEQ(strm') + val FULL_SPAN = (#1(ID_SPAN), #2(EQ_SPAN)) + in + ((ID_RES), FULL_SPAN, strm') + end + fun NamedItem_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.ID(_), _, strm') => + (case (lex(strm')) + of (Tok.EQ, _, strm') => true + | _ => false + (* end case *)) + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.optional(NamedItem_PROD_1_SUBRULE_1_PRED, NamedItem_PROD_1_SUBRULE_1_NT, strm) + val (Item_RES, Item_SPAN, strm') = Item_NT(strm') + val FULL_SPAN = (#1(SR_SPAN), #2(Item_SPAN)) + in + (UserCode.NamedItem_PROD_1_ACT (SR_RES, Item_RES, SR_SPAN : (Lex.pos * Lex.pos), Item_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end +and Item_NT (strm) = let + val (PrimItem_RES, PrimItem_SPAN, strm') = PrimItem_NT(strm) + val (SR_RES, SR_SPAN, strm') = let + fun Item_PROD_1_SUBRULE_1_NT (strm) = let + fun Item_PROD_1_SUBRULE_1_PROD_1 (strm) = let + val (STAR_RES, STAR_SPAN, strm') = matchSTAR(strm) + val FULL_SPAN = (#1(STAR_SPAN), #2(STAR_SPAN)) + in + (UserCode.Item_PROD_1_SUBRULE_1_PROD_1_ACT (STAR_RES, PrimItem_RES, STAR_SPAN : (Lex.pos * Lex.pos), PrimItem_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end + fun Item_PROD_1_SUBRULE_1_PROD_2 (strm) = let + val (PLUS_RES, PLUS_SPAN, strm') = matchPLUS(strm) + val FULL_SPAN = (#1(PLUS_SPAN), #2(PLUS_SPAN)) + in + (UserCode.Item_PROD_1_SUBRULE_1_PROD_2_ACT (PLUS_RES, PrimItem_RES, PLUS_SPAN : (Lex.pos * Lex.pos), PrimItem_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end + fun Item_PROD_1_SUBRULE_1_PROD_3 (strm) = let + val (QUERY_RES, QUERY_SPAN, strm') = matchQUERY(strm) + val FULL_SPAN = (#1(QUERY_SPAN), #2(QUERY_SPAN)) + in + (UserCode.Item_PROD_1_SUBRULE_1_PROD_3_ACT (QUERY_RES, PrimItem_RES, QUERY_SPAN : (Lex.pos * Lex.pos), PrimItem_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end + fun Item_PROD_1_SUBRULE_1_PROD_4 (strm) = let + val FULL_SPAN = (Err.getPos(strm), Err.getPos(strm)) + in + (UserCode.Item_PROD_1_SUBRULE_1_PROD_4_ACT (PrimItem_RES, PrimItem_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm) + end + in + (case (lex(strm)) + of (Tok.KW_where, _, strm') => + Item_PROD_1_SUBRULE_1_PROD_4(strm) + | (Tok.LP, _, strm') => Item_PROD_1_SUBRULE_1_PROD_4(strm) + | (Tok.RP, _, strm') => Item_PROD_1_SUBRULE_1_PROD_4(strm) + | (Tok.SEMI, _, strm') => Item_PROD_1_SUBRULE_1_PROD_4(strm) + | (Tok.BAR, _, strm') => Item_PROD_1_SUBRULE_1_PROD_4(strm) + | (Tok.DARROW, _, strm') => Item_PROD_1_SUBRULE_1_PROD_4(strm) + | (Tok.ID(_), _, strm') => Item_PROD_1_SUBRULE_1_PROD_4(strm) + | (Tok.STRING(_), _, strm') => + Item_PROD_1_SUBRULE_1_PROD_4(strm) + | (Tok.PLUS, _, strm') => Item_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.STAR, _, strm') => Item_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.QUERY, _, strm') => Item_PROD_1_SUBRULE_1_PROD_3(strm) + | _ => fail() + (* end case *)) + end + in + Item_PROD_1_SUBRULE_1_NT(strm') + end + val FULL_SPAN = (#1(PrimItem_SPAN), #2(SR_SPAN)) + in + (UserCode.Item_PROD_1_ACT (SR_RES, PrimItem_RES, SR_SPAN : (Lex.pos * Lex.pos), PrimItem_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end +and PrimItem_NT (strm) = let + fun PrimItem_PROD_1 (strm) = let + val (Symbol_RES, Symbol_SPAN, strm') = Symbol_NT(strm) + fun PrimItem_PROD_1_SUBRULE_1_NT (strm) = let + val (AT_RES, AT_SPAN, strm') = matchAT(strm) + val (Code_RES, Code_SPAN, strm') = Code_NT(strm') + val FULL_SPAN = (#1(AT_SPAN), #2(Code_SPAN)) + in + ((Code_RES), FULL_SPAN, strm') + end + fun PrimItem_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.AT, _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.optional(PrimItem_PROD_1_SUBRULE_1_PRED, PrimItem_PROD_1_SUBRULE_1_NT, strm') + val FULL_SPAN = (#1(Symbol_SPAN), #2(SR_SPAN)) + in + (UserCode.PrimItem_PROD_1_ACT (SR_RES, Symbol_RES, SR_SPAN : (Lex.pos * Lex.pos), Symbol_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end + fun PrimItem_PROD_2 (strm) = let + val (LP_RES, LP_SPAN, strm') = matchLP(strm) + val (AltList_RES, AltList_SPAN, strm') = AltList_NT(strm') + val (RP_RES, RP_SPAN, strm') = matchRP(strm') + val FULL_SPAN = (#1(LP_SPAN), #2(RP_SPAN)) + in + (UserCode.PrimItem_PROD_2_ACT (LP_RES, RP_RES, AltList_RES, LP_SPAN : (Lex.pos * Lex.pos), RP_SPAN : (Lex.pos * Lex.pos), AltList_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.LP, _, strm') => PrimItem_PROD_2(strm) + | (Tok.ID(_), _, strm') => PrimItem_PROD_1(strm) + | (Tok.STRING(_), _, strm') => PrimItem_PROD_1(strm) + | _ => fail() + (* end case *)) + end +fun Formals_NT (strm) = let + val (LP_RES, LP_SPAN, strm') = matchLP(strm) + val (ID_RES, ID_SPAN, strm') = matchID(strm') + fun Formals_PROD_1_SUBRULE_1_NT (strm) = let + val (COMMA_RES, COMMA_SPAN, strm') = matchCOMMA(strm) + val (ID_RES, ID_SPAN, strm') = matchID(strm') + val FULL_SPAN = (#1(COMMA_SPAN), #2(ID_SPAN)) + in + ((ID_RES), FULL_SPAN, strm') + end + fun Formals_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMA, _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.closure(Formals_PROD_1_SUBRULE_1_PRED, Formals_PROD_1_SUBRULE_1_NT, strm') + val (RP_RES, RP_SPAN, strm') = matchRP(strm') + val FULL_SPAN = (#1(LP_SPAN), #2(RP_SPAN)) + in + (UserCode.Formals_PROD_1_ACT (ID_RES, LP_RES, RP_RES, SR_RES, ID_SPAN : (Lex.pos * Lex.pos), LP_SPAN : (Lex.pos * Lex.pos), RP_SPAN : (Lex.pos * Lex.pos), SR_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end +fun QualID_NT (strm) = let + fun QualID_PROD_1 (strm) = let + val (ID_RES, ID_SPAN, strm') = matchID(strm) + val FULL_SPAN = (#1(ID_SPAN), #2(ID_SPAN)) + in + ((ID_RES), FULL_SPAN, strm') + end + fun QualID_PROD_2 (strm) = let + val (IDDOT_RES, IDDOT_SPAN, strm') = matchIDDOT(strm) + val (QualID_RES, QualID_SPAN, strm') = QualID_NT(strm') + val FULL_SPAN = (#1(IDDOT_SPAN), #2(QualID_SPAN)) + in + (UserCode.QualID_PROD_2_ACT (QualID_RES, IDDOT_RES, QualID_SPAN : (Lex.pos * Lex.pos), IDDOT_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.IDDOT(_), _, strm') => QualID_PROD_2(strm) + | (Tok.ID(_), _, strm') => QualID_PROD_1(strm) + | _ => fail() + (* end case *)) + end +fun Label_NT (strm) = let + fun Label_PROD_1 (strm) = let + val (ID_RES, ID_SPAN, strm') = matchID(strm) + val FULL_SPAN = (#1(ID_SPAN), #2(ID_SPAN)) + in + ((ID_RES), FULL_SPAN, strm') + end + fun Label_PROD_2 (strm) = let + val (INT_RES, INT_SPAN, strm') = matchINT(strm) + val FULL_SPAN = (#1(INT_SPAN), #2(INT_SPAN)) + in + ((INT_RES), FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.INT(_), _, strm') => Label_PROD_2(strm) + | (Tok.ID(_), _, strm') => Label_PROD_1(strm) + | _ => fail() + (* end case *)) + end +fun Ty_NT (strm) = let + val (TyFun_RES, TyFun_SPAN, strm') = TyFun_NT(strm) + val FULL_SPAN = (#1(TyFun_SPAN), #2(TyFun_SPAN)) + in + ((TyFun_RES), FULL_SPAN, strm') + end +and TyFun_NT (strm) = let + val (TyProd_RES, TyProd_SPAN, strm') = TyProd_NT(strm) + fun TyFun_PROD_1_SUBRULE_1_NT (strm) = let + val (ARROW_RES, ARROW_SPAN, strm') = matchARROW(strm) + val (TyProd_RES, TyProd_SPAN, strm') = TyProd_NT(strm') + val FULL_SPAN = (#1(ARROW_SPAN), #2(TyProd_SPAN)) + in + ((TyProd_RES), FULL_SPAN, strm') + end + fun TyFun_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.ARROW, _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.closure(TyFun_PROD_1_SUBRULE_1_PRED, TyFun_PROD_1_SUBRULE_1_NT, strm') + val FULL_SPAN = (#1(TyProd_SPAN), #2(SR_SPAN)) + in + (UserCode.TyFun_PROD_1_ACT (SR_RES, TyProd_RES, SR_SPAN : (Lex.pos * Lex.pos), TyProd_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end +and TyProd_NT (strm) = let + val (TyApp_RES, TyApp_SPAN, strm') = TyApp_NT(strm) + fun TyProd_PROD_1_SUBRULE_1_NT (strm) = let + val (STAR_RES, STAR_SPAN, strm') = matchSTAR(strm) + val (TyApp_RES, TyApp_SPAN, strm') = TyApp_NT(strm') + val FULL_SPAN = (#1(STAR_SPAN), #2(TyApp_SPAN)) + in + ((TyApp_RES), FULL_SPAN, strm') + end + fun TyProd_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.STAR, _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.closure(TyProd_PROD_1_SUBRULE_1_PRED, TyProd_PROD_1_SUBRULE_1_NT, strm') + val FULL_SPAN = (#1(TyApp_SPAN), #2(SR_SPAN)) + in + (UserCode.TyProd_PROD_1_ACT (SR_RES, TyApp_RES, SR_SPAN : (Lex.pos * Lex.pos), TyApp_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end +and TyApp_NT (strm) = let + fun TyApp_PROD_1 (strm) = let + val (LP_RES, LP_SPAN, strm') = matchLP(strm) + val (Ty_RES, Ty_SPAN, strm') = Ty_NT(strm') + fun TyApp_PROD_1_SUBRULE_1_NT (strm) = let + val (COMMA_RES, COMMA_SPAN, strm') = matchCOMMA(strm) + val (Ty_RES, Ty_SPAN, strm') = Ty_NT(strm') + val FULL_SPAN = (#1(COMMA_SPAN), #2(Ty_SPAN)) + in + ((Ty_RES), FULL_SPAN, strm') + end + fun TyApp_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMA, _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.posclos(TyApp_PROD_1_SUBRULE_1_PRED, TyApp_PROD_1_SUBRULE_1_NT, strm') + val (RP_RES, RP_SPAN, strm') = matchRP(strm') + val (QualID_RES, QualID_SPAN, strm') = QualID_NT(strm') + val FULL_SPAN = (#1(LP_SPAN), #2(QualID_SPAN)) + in + (UserCode.TyApp_PROD_1_ACT (LP_RES, RP_RES, SR_RES, Ty_RES, QualID_RES, LP_SPAN : (Lex.pos * Lex.pos), RP_SPAN : (Lex.pos * Lex.pos), SR_SPAN : (Lex.pos * Lex.pos), Ty_SPAN : (Lex.pos * Lex.pos), QualID_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end + fun TyApp_PROD_2 (strm) = let + val (TyAtom_RES, TyAtom_SPAN, strm') = TyAtom_NT(strm) + val (SR_RES, SR_SPAN, strm') = let + fun TyApp_PROD_2_SUBRULE_1_NT (strm) = let + fun TyApp_PROD_2_SUBRULE_1_PROD_1 (strm) = let + val (QualID_RES, QualID_SPAN, strm') = QualID_NT(strm) + val FULL_SPAN = (#1(QualID_SPAN), #2(QualID_SPAN)) + in + (UserCode.TyApp_PROD_2_SUBRULE_1_PROD_1_ACT (QualID_RES, TyAtom_RES, QualID_SPAN : (Lex.pos * Lex.pos), TyAtom_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end + fun TyApp_PROD_2_SUBRULE_1_PROD_2 (strm) = let + val FULL_SPAN = (Err.getPos(strm), Err.getPos(strm)) + in + (UserCode.TyApp_PROD_2_SUBRULE_1_PROD_2_ACT (TyAtom_RES, TyAtom_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm) + end + in + (case (lex(strm)) + of (Tok.LP, _, strm') => + TyApp_PROD_2_SUBRULE_1_PROD_2(strm) + | (Tok.RP, _, strm') => + TyApp_PROD_2_SUBRULE_1_PROD_2(strm) + | (Tok.RCB, _, strm') => + TyApp_PROD_2_SUBRULE_1_PROD_2(strm) + | (Tok.SEMI, _, strm') => + TyApp_PROD_2_SUBRULE_1_PROD_2(strm) + | (Tok.COMMA, _, strm') => + TyApp_PROD_2_SUBRULE_1_PROD_2(strm) + | (Tok.BAR, _, strm') => + TyApp_PROD_2_SUBRULE_1_PROD_2(strm) + | (Tok.STAR, _, strm') => + TyApp_PROD_2_SUBRULE_1_PROD_2(strm) + | (Tok.EQ, _, strm') => + TyApp_PROD_2_SUBRULE_1_PROD_2(strm) + | (Tok.ARROW, _, strm') => + TyApp_PROD_2_SUBRULE_1_PROD_2(strm) + | (Tok.ID(_), _, strm') => + TyApp_PROD_2_SUBRULE_1_PROD_1(strm) + | (Tok.IDDOT(_), _, strm') => + TyApp_PROD_2_SUBRULE_1_PROD_1(strm) + | _ => fail() + (* end case *)) + end + in + TyApp_PROD_2_SUBRULE_1_NT(strm') + end + val FULL_SPAN = (#1(TyAtom_SPAN), #2(SR_SPAN)) + in + (UserCode.TyApp_PROD_2_ACT (SR_RES, TyAtom_RES, SR_SPAN : (Lex.pos * Lex.pos), TyAtom_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.LCB, _, strm') => TyApp_PROD_2(strm) + | (Tok.ID(_), _, strm') => TyApp_PROD_2(strm) + | (Tok.IDDOT(_), _, strm') => TyApp_PROD_2(strm) + | (Tok.TYVAR(_), _, strm') => TyApp_PROD_2(strm) + | (Tok.LP, _, strm') => tryProds(strm, [TyApp_PROD_1, TyApp_PROD_2]) + | _ => fail() + (* end case *)) + end +and TyAtom_NT (strm) = let + fun TyAtom_PROD_1 (strm) = let + val (TYVAR_RES, TYVAR_SPAN, strm') = matchTYVAR(strm) + val FULL_SPAN = (#1(TYVAR_SPAN), #2(TYVAR_SPAN)) + in + ((TYVAR_RES), FULL_SPAN, strm') + end + fun TyAtom_PROD_2 (strm) = let + val (LP_RES, LP_SPAN, strm') = matchLP(strm) + val (Ty_RES, Ty_SPAN, strm') = Ty_NT(strm') + val (RP_RES, RP_SPAN, strm') = matchRP(strm') + val FULL_SPAN = (#1(LP_SPAN), #2(RP_SPAN)) + in + (UserCode.TyAtom_PROD_2_ACT (LP_RES, RP_RES, Ty_RES, LP_SPAN : (Lex.pos * Lex.pos), RP_SPAN : (Lex.pos * Lex.pos), Ty_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end + fun TyAtom_PROD_3 (strm) = let + val (LCB_RES, LCB_SPAN, strm') = matchLCB(strm) + val (RowList_RES, RowList_SPAN, strm') = RowList_NT(strm') + val (RCB_RES, RCB_SPAN, strm') = matchRCB(strm') + val FULL_SPAN = (#1(LCB_SPAN), #2(RCB_SPAN)) + in + (UserCode.TyAtom_PROD_3_ACT (LCB_RES, RCB_RES, RowList_RES, LCB_SPAN : (Lex.pos * Lex.pos), RCB_SPAN : (Lex.pos * Lex.pos), RowList_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end + fun TyAtom_PROD_4 (strm) = let + val (LCB_RES, LCB_SPAN, strm') = matchLCB(strm) + val (RCB_RES, RCB_SPAN, strm') = matchRCB(strm') + val FULL_SPAN = (#1(LCB_SPAN), #2(RCB_SPAN)) + in + (UserCode.TyAtom_PROD_4_ACT (LCB_RES, RCB_RES, LCB_SPAN : (Lex.pos * Lex.pos), RCB_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end + fun TyAtom_PROD_5 (strm) = let + val (QualID_RES, QualID_SPAN, strm') = QualID_NT(strm) + val FULL_SPAN = (#1(QualID_SPAN), #2(QualID_SPAN)) + in + ((QualID_RES), FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.ID(_), _, strm') => TyAtom_PROD_5(strm) + | (Tok.IDDOT(_), _, strm') => TyAtom_PROD_5(strm) + | (Tok.LCB, _, strm') => + (case (lex(strm')) + of (Tok.ID(_), _, strm') => TyAtom_PROD_3(strm) + | (Tok.INT(_), _, strm') => TyAtom_PROD_3(strm) + | (Tok.RCB, _, strm') => TyAtom_PROD_4(strm) + | _ => fail() + (* end case *)) + | (Tok.TYVAR(_), _, strm') => TyAtom_PROD_1(strm) + | (Tok.LP, _, strm') => TyAtom_PROD_2(strm) + | _ => fail() + (* end case *)) + end +and RowList_NT (strm) = let + val (Row_RES, Row_SPAN, strm') = Row_NT(strm) + fun RowList_PROD_1_SUBRULE_1_NT (strm) = let + val (COMMA_RES, COMMA_SPAN, strm') = matchCOMMA(strm) + val (Row_RES, Row_SPAN, strm') = Row_NT(strm') + val FULL_SPAN = (#1(COMMA_SPAN), #2(Row_SPAN)) + in + ((Row_RES), FULL_SPAN, strm') + end + fun RowList_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMA, _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.closure(RowList_PROD_1_SUBRULE_1_PRED, RowList_PROD_1_SUBRULE_1_NT, strm') + val FULL_SPAN = (#1(Row_SPAN), #2(SR_SPAN)) + in + (UserCode.RowList_PROD_1_ACT (SR_RES, Row_RES, SR_SPAN : (Lex.pos * Lex.pos), Row_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end +and Row_NT (strm) = let + val (Label_RES, Label_SPAN, strm') = Label_NT(strm) + val (COLON_RES, COLON_SPAN, strm') = matchCOLON(strm') + val (Ty_RES, Ty_SPAN, strm') = Ty_NT(strm') + val FULL_SPAN = (#1(Label_SPAN), #2(Ty_SPAN)) + in + (UserCode.Row_PROD_1_ACT (Ty_RES, COLON_RES, Label_RES, Ty_SPAN : (Lex.pos * Lex.pos), COLON_SPAN : (Lex.pos * Lex.pos), Label_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end +fun TyAnn_NT (strm) = let + val (ID_RES, ID_SPAN, strm') = matchID(strm) + val (SR_RES, SR_SPAN, strm') = let + fun TyAnn_PROD_1_SUBRULE_1_NT (strm) = let + fun TyAnn_PROD_1_SUBRULE_1_PROD_1 (strm) = let + val (OF_RES, OF_SPAN, strm') = matchOF(strm) + val (Ty_RES, Ty_SPAN, strm') = Ty_NT(strm') + val FULL_SPAN = (#1(OF_SPAN), #2(Ty_SPAN)) + in + ((Ty_RES), FULL_SPAN, strm') + end + fun TyAnn_PROD_1_SUBRULE_1_PROD_2 (strm) = let + val FULL_SPAN = (Err.getPos(strm), Err.getPos(strm)) + in + (UserCode.TyAnn_PROD_1_SUBRULE_1_PROD_2_ACT (ID_RES, ID_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm) + end + in + (case (lex(strm)) + of (Tok.SEMI, _, strm') => TyAnn_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.BAR, _, strm') => TyAnn_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.OF, _, strm') => TyAnn_PROD_1_SUBRULE_1_PROD_1(strm) + | _ => fail() + (* end case *)) + end + in + TyAnn_PROD_1_SUBRULE_1_NT(strm') + end + val FULL_SPAN = (#1(ID_SPAN), #2(SR_SPAN)) + in + (UserCode.TyAnn_PROD_1_ACT (ID_RES, SR_RES, ID_SPAN : (Lex.pos * Lex.pos), SR_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end +fun Symbol'_NT (strm) = let + val (Symbol_RES, Symbol_SPAN, strm') = Symbol_NT(strm) + val FULL_SPAN = (#1(Symbol_SPAN), #2(Symbol_SPAN)) + in + (UserCode.Symbol'_PROD_1_ACT (Symbol_RES, Symbol_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end +fun SymList_NT (strm) = let + val (Symbol'_RES, Symbol'_SPAN, strm') = Symbol'_NT(strm) + fun SymList_PROD_1_SUBRULE_1_NT (strm) = let + val (COMMA_RES, COMMA_SPAN, strm') = matchCOMMA(strm) + val (Symbol'_RES, Symbol'_SPAN, strm') = Symbol'_NT(strm') + val FULL_SPAN = (#1(COMMA_SPAN), #2(Symbol'_SPAN)) + in + ((Symbol'_RES), FULL_SPAN, strm') + end + fun SymList_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMA, _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.closure(SymList_PROD_1_SUBRULE_1_PRED, SymList_PROD_1_SUBRULE_1_NT, strm') + val FULL_SPAN = (#1(Symbol'_SPAN), #2(SR_SPAN)) + in + (UserCode.SymList_PROD_1_ACT (SR_RES, Symbol'_RES, SR_SPAN : (Lex.pos * Lex.pos), Symbol'_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end +fun Abbrev_NT (strm) = let + val (LP_RES, LP_SPAN, strm') = matchLP(strm) + val (STRING_RES, STRING_SPAN, strm') = matchSTRING(strm') + val (RP_RES, RP_SPAN, strm') = matchRP(strm') + val FULL_SPAN = (#1(LP_SPAN), #2(RP_SPAN)) + in + (UserCode.Abbrev_PROD_1_ACT (LP_RES, RP_RES, STRING_RES, LP_SPAN : (Lex.pos * Lex.pos), RP_SPAN : (Lex.pos * Lex.pos), STRING_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end +fun Constr_NT (strm) = let + val (ID_RES, ID_SPAN, strm') = matchID(strm) + fun Constr_PROD_1_SUBRULE_1_NT (strm) = let + val (OF_RES, OF_SPAN, strm') = matchOF(strm) + val (Ty_RES, Ty_SPAN, strm') = Ty_NT(strm') + val FULL_SPAN = (#1(OF_SPAN), #2(Ty_SPAN)) + in + ((Ty_RES), FULL_SPAN, strm') + end + fun Constr_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.OF, _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.optional(Constr_PROD_1_SUBRULE_1_PRED, Constr_PROD_1_SUBRULE_1_NT, strm') + fun Constr_PROD_1_SUBRULE_2_NT (strm) = let + val (Abbrev_RES, Abbrev_SPAN, strm') = Abbrev_NT(strm) + val FULL_SPAN = (#1(Abbrev_SPAN), #2(Abbrev_SPAN)) + in + ((Abbrev_RES), FULL_SPAN, strm') + end + fun Constr_PROD_1_SUBRULE_2_PRED (strm) = (case (lex(strm)) + of (Tok.LP, _, strm') => true + | _ => false + (* end case *)) + val (Abbrev_RES, Abbrev_SPAN, strm') = EBNF.optional(Constr_PROD_1_SUBRULE_2_PRED, Constr_PROD_1_SUBRULE_2_NT, strm') + val FULL_SPAN = (#1(ID_SPAN), #2(Abbrev_SPAN)) + in + (UserCode.Constr_PROD_1_ACT (ID_RES, SR_RES, Abbrev_RES, ID_SPAN : (Lex.pos * Lex.pos), SR_SPAN : (Lex.pos * Lex.pos), Abbrev_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end +fun ConstrList_NT (strm) = let + val (Constr_RES, Constr_SPAN, strm') = Constr_NT(strm) + fun ConstrList_PROD_1_SUBRULE_1_NT (strm) = let + val (BAR_RES, BAR_SPAN, strm') = matchBAR(strm) + val (Constr_RES, Constr_SPAN, strm') = Constr_NT(strm') + val FULL_SPAN = (#1(BAR_SPAN), #2(Constr_SPAN)) + in + ((Constr_RES), FULL_SPAN, strm') + end + fun ConstrList_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.BAR, _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.closure(ConstrList_PROD_1_SUBRULE_1_PRED, ConstrList_PROD_1_SUBRULE_1_NT, strm') + val FULL_SPAN = (#1(Constr_SPAN), #2(SR_SPAN)) + in + (UserCode.ConstrList_PROD_1_ACT (SR_RES, Constr_RES, SR_SPAN : (Lex.pos * Lex.pos), Constr_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end +fun Change_NT (strm) = let + fun Change_PROD_1_SUBRULE_1_NT (strm) = let + val (Symbol_RES, Symbol_SPAN, strm') = Symbol_NT(strm) + val FULL_SPAN = (#1(Symbol_SPAN), #2(Symbol_SPAN)) + in + ((Symbol_RES), FULL_SPAN, strm') + end + fun Change_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.ID(_), _, strm') => true + | (Tok.STRING(_), _, strm') => true + | _ => false + (* end case *)) + val (Symbol1_RES, Symbol1_SPAN, strm') = EBNF.closure(Change_PROD_1_SUBRULE_1_PRED, Change_PROD_1_SUBRULE_1_NT, strm) + val (ARROW_RES, ARROW_SPAN, strm') = matchARROW(strm') + fun Change_PROD_1_SUBRULE_2_NT (strm) = let + val (Symbol_RES, Symbol_SPAN, strm') = Symbol_NT(strm) + val FULL_SPAN = (#1(Symbol_SPAN), #2(Symbol_SPAN)) + in + ((Symbol_RES), FULL_SPAN, strm') + end + fun Change_PROD_1_SUBRULE_2_PRED (strm) = (case (lex(strm)) + of (Tok.ID(_), _, strm') => true + | (Tok.STRING(_), _, strm') => true + | _ => false + (* end case *)) + val (Symbol2_RES, Symbol2_SPAN, strm') = EBNF.closure(Change_PROD_1_SUBRULE_2_PRED, Change_PROD_1_SUBRULE_2_NT, strm') + val FULL_SPAN = (#1(Symbol1_SPAN), #2(Symbol2_SPAN)) + in + (UserCode.Change_PROD_1_ACT (ARROW_RES, Symbol1_RES, Symbol2_RES, ARROW_SPAN : (Lex.pos * Lex.pos), Symbol1_SPAN : (Lex.pos * Lex.pos), Symbol2_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end +fun ChangeList_NT (strm) = let + val (Change_RES, Change_SPAN, strm') = Change_NT(strm) + fun ChangeList_PROD_1_SUBRULE_1_NT (strm) = let + val (BAR_RES, BAR_SPAN, strm') = matchBAR(strm) + val (Change_RES, Change_SPAN, strm') = Change_NT(strm') + val FULL_SPAN = (#1(BAR_SPAN), #2(Change_SPAN)) + in + ((Change_RES), FULL_SPAN, strm') + end + fun ChangeList_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.BAR, _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.closure(ChangeList_PROD_1_SUBRULE_1_PRED, ChangeList_PROD_1_SUBRULE_1_NT, strm') + val FULL_SPAN = (#1(Change_SPAN), #2(SR_SPAN)) + in + (UserCode.ChangeList_PROD_1_ACT (SR_RES, Change_RES, SR_SPAN : (Lex.pos * Lex.pos), Change_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end +fun ID'_NT (strm) = let + val (ID_RES, ID_SPAN, strm') = matchID(strm) + val FULL_SPAN = (#1(ID_SPAN), #2(ID_SPAN)) + in + (UserCode.ID'_PROD_1_ACT (ID_RES, ID_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end +fun IDList_NT (strm) = let + val (ID'_RES, ID'_SPAN, strm') = ID'_NT(strm) + fun IDList_PROD_1_SUBRULE_1_NT (strm) = let + val (COMMA_RES, COMMA_SPAN, strm') = matchCOMMA(strm) + val (ID'_RES, ID'_SPAN, strm') = ID'_NT(strm') + val FULL_SPAN = (#1(COMMA_SPAN), #2(ID'_SPAN)) + in + ((ID'_RES), FULL_SPAN, strm') + end + fun IDList_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMA, _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.closure(IDList_PROD_1_SUBRULE_1_PRED, IDList_PROD_1_SUBRULE_1_NT, strm') + val FULL_SPAN = (#1(ID'_SPAN), #2(SR_SPAN)) + in + (UserCode.IDList_PROD_1_ACT (SR_RES, ID'_RES, SR_SPAN : (Lex.pos * Lex.pos), ID'_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end +fun Decl_NT (strm) = let + fun Decl_PROD_1 (strm) = let + val (KW_name_RES, KW_name_SPAN, strm') = matchKW_name(strm) + val (ID_RES, ID_SPAN, strm') = matchID(strm') + val FULL_SPAN = (#1(KW_name_SPAN), #2(ID_SPAN)) + in + (UserCode.Decl_PROD_1_ACT (ID_RES, KW_name_RES, ID_SPAN : (Lex.pos * Lex.pos), KW_name_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end + fun Decl_PROD_2 (strm) = let + val (KW_header_RES, KW_header_SPAN, strm') = matchKW_header(strm) + val (Code_RES, Code_SPAN, strm') = Code_NT(strm') + val FULL_SPAN = (#1(KW_header_SPAN), #2(Code_SPAN)) + in + (UserCode.Decl_PROD_2_ACT (Code_RES, KW_header_RES, Code_SPAN : (Lex.pos * Lex.pos), KW_header_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end + fun Decl_PROD_3 (strm) = let + val (KW_start_RES, KW_start_SPAN, strm') = matchKW_start(strm) + val (ID_RES, ID_SPAN, strm') = matchID(strm') + val FULL_SPAN = (#1(KW_start_SPAN), #2(ID_SPAN)) + in + (UserCode.Decl_PROD_3_ACT (ID_RES, KW_start_RES, ID_SPAN : (Lex.pos * Lex.pos), KW_start_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end + fun Decl_PROD_4 (strm) = let + val (KW_entry_RES, KW_entry_SPAN, strm') = matchKW_entry(strm) + val (IDList_RES, IDList_SPAN, strm') = IDList_NT(strm') + val FULL_SPAN = (#1(KW_entry_SPAN), #2(IDList_SPAN)) + in + (UserCode.Decl_PROD_4_ACT (KW_entry_RES, IDList_RES, KW_entry_SPAN : (Lex.pos * Lex.pos), IDList_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end + fun Decl_PROD_5 (strm) = let + val (KW_keywords_RES, KW_keywords_SPAN, strm') = matchKW_keywords(strm) + val (SymList_RES, SymList_SPAN, strm') = SymList_NT(strm') + val FULL_SPAN = (#1(KW_keywords_SPAN), #2(SymList_SPAN)) + in + (UserCode.Decl_PROD_5_ACT (KW_keywords_RES, SymList_RES, KW_keywords_SPAN : (Lex.pos * Lex.pos), SymList_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end + fun Decl_PROD_6 (strm) = let + val (KW_value_RES, KW_value_SPAN, strm') = matchKW_value(strm) + val (ID_RES, ID_SPAN, strm') = matchID(strm') + val (Code_RES, Code_SPAN, strm') = Code_NT(strm') + val FULL_SPAN = (#1(KW_value_SPAN), #2(Code_SPAN)) + in + (UserCode.Decl_PROD_6_ACT (ID_RES, Code_RES, KW_value_RES, ID_SPAN : (Lex.pos * Lex.pos), Code_SPAN : (Lex.pos * Lex.pos), KW_value_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end + fun Decl_PROD_7 (strm) = let + val (KW_prefer_RES, KW_prefer_SPAN, strm') = matchKW_prefer(strm) + val (SymList_RES, SymList_SPAN, strm') = SymList_NT(strm') + val FULL_SPAN = (#1(KW_prefer_SPAN), #2(SymList_SPAN)) + in + (UserCode.Decl_PROD_7_ACT (SymList_RES, KW_prefer_RES, SymList_SPAN : (Lex.pos * Lex.pos), KW_prefer_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end + fun Decl_PROD_8 (strm) = let + val (KW_change_RES, KW_change_SPAN, strm') = matchKW_change(strm) + val (ChangeList_RES, ChangeList_SPAN, strm') = ChangeList_NT(strm') + val FULL_SPAN = (#1(KW_change_SPAN), #2(ChangeList_SPAN)) + in + (UserCode.Decl_PROD_8_ACT (ChangeList_RES, KW_change_RES, ChangeList_SPAN : (Lex.pos * Lex.pos), KW_change_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end + fun Decl_PROD_9 (strm) = let + val (KW_defs_RES, KW_defs_SPAN, strm') = matchKW_defs(strm) + val (Code_RES, Code_SPAN, strm') = Code_NT(strm') + val FULL_SPAN = (#1(KW_defs_SPAN), #2(Code_SPAN)) + in + (UserCode.Decl_PROD_9_ACT (Code_RES, KW_defs_RES, Code_SPAN : (Lex.pos * Lex.pos), KW_defs_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end + fun Decl_PROD_10 (strm) = let + val (KW_tokentype_RES, KW_tokentype_SPAN, strm') = matchKW_tokentype(strm) + val (QualID_RES, QualID_SPAN, strm') = QualID_NT(strm') + val FULL_SPAN = (#1(KW_tokentype_SPAN), #2(QualID_SPAN)) + in + (UserCode.Decl_PROD_10_ACT (QualID_RES, KW_tokentype_RES, QualID_SPAN : (Lex.pos * Lex.pos), KW_tokentype_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end + fun Decl_PROD_11 (strm) = let + val (KW_tokens_RES, KW_tokens_SPAN, strm') = matchKW_tokens(strm) + fun Decl_PROD_11_SUBRULE_1_NT (strm) = let + val (COLON_RES, COLON_SPAN, strm') = matchCOLON(strm) + val FULL_SPAN = (#1(COLON_SPAN), #2(COLON_SPAN)) + in + ((), FULL_SPAN, strm') + end + fun Decl_PROD_11_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COLON, _, strm') => true + | _ => false + (* end case *)) + val (COLON_RES, COLON_SPAN, strm') = EBNF.optional(Decl_PROD_11_SUBRULE_1_PRED, Decl_PROD_11_SUBRULE_1_NT, strm') + val (ConstrList_RES, ConstrList_SPAN, strm') = ConstrList_NT(strm') + val FULL_SPAN = (#1(KW_tokens_SPAN), #2(ConstrList_SPAN)) + in + (UserCode.Decl_PROD_11_ACT (COLON_RES, KW_tokens_RES, ConstrList_RES, COLON_SPAN : (Lex.pos * Lex.pos), KW_tokens_SPAN : (Lex.pos * Lex.pos), ConstrList_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end + fun Decl_PROD_12 (strm) = let + val (KW_import_RES, KW_import_SPAN, strm') = matchKW_import(strm) + val (STRING_RES, STRING_SPAN, strm') = matchSTRING(strm') + fun Decl_PROD_12_SUBRULE_1_NT (strm) = let + val (KW_dropping_RES, KW_dropping_SPAN, strm') = matchKW_dropping(strm) + val (SymList_RES, SymList_SPAN, strm') = SymList_NT(strm') + val FULL_SPAN = (#1(KW_dropping_SPAN), #2(SymList_SPAN)) + in + ((SymList_RES), FULL_SPAN, strm') + end + fun Decl_PROD_12_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.KW_dropping, _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.optional(Decl_PROD_12_SUBRULE_1_PRED, Decl_PROD_12_SUBRULE_1_NT, strm') + val FULL_SPAN = (#1(KW_import_SPAN), #2(SR_SPAN)) + in + (UserCode.Decl_PROD_12_ACT (SR_RES, STRING_RES, KW_import_RES, SR_SPAN : (Lex.pos * Lex.pos), STRING_SPAN : (Lex.pos * Lex.pos), KW_import_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end + fun Decl_PROD_13 (strm) = let + val (KW_refcell_RES, KW_refcell_SPAN, strm') = matchKW_refcell(strm) + val (ID_RES, ID_SPAN, strm') = matchID(strm') + val (COLON_RES, COLON_SPAN, strm') = matchCOLON(strm') + val (Ty_RES, Ty_SPAN, strm') = Ty_NT(strm') + val (EQ_RES, EQ_SPAN, strm') = matchEQ(strm') + val (Code_RES, Code_SPAN, strm') = Code_NT(strm') + val FULL_SPAN = (#1(KW_refcell_SPAN), #2(Code_SPAN)) + in + (UserCode.Decl_PROD_13_ACT (EQ_RES, ID_RES, Ty_RES, Code_RES, COLON_RES, KW_refcell_RES, EQ_SPAN : (Lex.pos * Lex.pos), ID_SPAN : (Lex.pos * Lex.pos), Ty_SPAN : (Lex.pos * Lex.pos), Code_SPAN : (Lex.pos * Lex.pos), COLON_SPAN : (Lex.pos * Lex.pos), KW_refcell_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end + fun Decl_PROD_14 (strm) = let + val (KW_nonterms_RES, KW_nonterms_SPAN, strm') = matchKW_nonterms(strm) + fun Decl_PROD_14_SUBRULE_1_NT (strm) = let + val (COLON_RES, COLON_SPAN, strm') = matchCOLON(strm) + val FULL_SPAN = (#1(COLON_SPAN), #2(COLON_SPAN)) + in + ((), FULL_SPAN, strm') + end + fun Decl_PROD_14_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COLON, _, strm') => true + | _ => false + (* end case *)) + val (COLON_RES, COLON_SPAN, strm') = EBNF.optional(Decl_PROD_14_SUBRULE_1_PRED, Decl_PROD_14_SUBRULE_1_NT, strm') + val (TyAnn_RES, TyAnn_SPAN, strm') = TyAnn_NT(strm') + fun Decl_PROD_14_SUBRULE_2_NT (strm) = let + val (BAR_RES, BAR_SPAN, strm') = matchBAR(strm) + val (TyAnn_RES, TyAnn_SPAN, strm') = TyAnn_NT(strm') + val FULL_SPAN = (#1(BAR_SPAN), #2(TyAnn_SPAN)) + in + ((TyAnn_RES), FULL_SPAN, strm') + end + fun Decl_PROD_14_SUBRULE_2_PRED (strm) = (case (lex(strm)) + of (Tok.BAR, _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.closure(Decl_PROD_14_SUBRULE_2_PRED, Decl_PROD_14_SUBRULE_2_NT, strm') + val FULL_SPAN = (#1(KW_nonterms_SPAN), #2(SR_SPAN)) + in + (UserCode.Decl_PROD_14_ACT (SR_RES, COLON_RES, TyAnn_RES, KW_nonterms_RES, SR_SPAN : (Lex.pos * Lex.pos), COLON_SPAN : (Lex.pos * Lex.pos), TyAnn_SPAN : (Lex.pos * Lex.pos), KW_nonterms_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end + fun Decl_PROD_15 (strm) = let + val (ID_RES, ID_SPAN, strm') = matchID(strm) + fun Decl_PROD_15_SUBRULE_1_NT (strm) = let + val (Formals_RES, Formals_SPAN, strm') = Formals_NT(strm) + val FULL_SPAN = (#1(Formals_SPAN), #2(Formals_SPAN)) + in + ((Formals_RES), FULL_SPAN, strm') + end + fun Decl_PROD_15_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.LP, _, strm') => true + | _ => false + (* end case *)) + val (Formals_RES, Formals_SPAN, strm') = EBNF.optional(Decl_PROD_15_SUBRULE_1_PRED, Decl_PROD_15_SUBRULE_1_NT, strm') + val (COLON_RES, COLON_SPAN, strm') = matchCOLON(strm') + val (AltList_RES, AltList_SPAN, strm') = AltList_NT(strm') + val FULL_SPAN = (#1(ID_SPAN), #2(AltList_SPAN)) + in + (UserCode.Decl_PROD_15_ACT (ID_RES, COLON_RES, AltList_RES, Formals_RES, ID_SPAN : (Lex.pos * Lex.pos), COLON_SPAN : (Lex.pos * Lex.pos), AltList_SPAN : (Lex.pos * Lex.pos), Formals_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.ID(_), _, strm') => Decl_PROD_15(strm) + | (Tok.KW_refcell, _, strm') => Decl_PROD_13(strm) + | (Tok.KW_tokens, _, strm') => Decl_PROD_11(strm) + | (Tok.KW_defs, _, strm') => Decl_PROD_9(strm) + | (Tok.KW_prefer, _, strm') => Decl_PROD_7(strm) + | (Tok.KW_keywords, _, strm') => Decl_PROD_5(strm) + | (Tok.KW_start, _, strm') => Decl_PROD_3(strm) + | (Tok.KW_name, _, strm') => Decl_PROD_1(strm) + | (Tok.KW_header, _, strm') => Decl_PROD_2(strm) + | (Tok.KW_entry, _, strm') => Decl_PROD_4(strm) + | (Tok.KW_value, _, strm') => Decl_PROD_6(strm) + | (Tok.KW_change, _, strm') => Decl_PROD_8(strm) + | (Tok.KW_tokentype, _, strm') => Decl_PROD_10(strm) + | (Tok.KW_import, _, strm') => Decl_PROD_12(strm) + | (Tok.KW_nonterms, _, strm') => Decl_PROD_14(strm) + | _ => fail() + (* end case *)) + end +fun File_NT (fileName_RES, sm_RES) (strm) = let + val (SR1_RES, SR1_SPAN, strm') = let + fun File_PROD_1_SUBRULE_1_NT (strm) = let + val FULL_SPAN = (Err.getPos(strm), Err.getPos(strm)) + in + (UserCode.File_PROD_1_SUBRULE_1_PROD_1_ACT (sm_RES, fileName_RES, FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm) + end + in + File_PROD_1_SUBRULE_1_NT(strm) + end + fun File_PROD_1_SUBRULE_2_NT (strm) = let + val (Decl_RES, Decl_SPAN, strm') = Decl_NT(strm) + val (SEMI_RES, SEMI_SPAN, strm') = matchSEMI(strm') + val FULL_SPAN = (#1(Decl_SPAN), #2(SEMI_SPAN)) + in + ((Decl_RES), FULL_SPAN, strm') + end + fun File_PROD_1_SUBRULE_2_PRED (strm) = (case (lex(strm)) + of (Tok.KW_change, _, strm') => true + | (Tok.KW_defs, _, strm') => true + | (Tok.KW_entry, _, strm') => true + | (Tok.KW_header, _, strm') => true + | (Tok.KW_import, _, strm') => true + | (Tok.KW_keywords, _, strm') => true + | (Tok.KW_name, _, strm') => true + | (Tok.KW_nonterms, _, strm') => true + | (Tok.KW_prefer, _, strm') => true + | (Tok.KW_refcell, _, strm') => true + | (Tok.KW_start, _, strm') => true + | (Tok.KW_tokens, _, strm') => true + | (Tok.KW_tokentype, _, strm') => true + | (Tok.KW_value, _, strm') => true + | (Tok.ID(_), _, strm') => true + | _ => false + (* end case *)) + val (SR2_RES, SR2_SPAN, strm') = EBNF.closure(File_PROD_1_SUBRULE_2_PRED, File_PROD_1_SUBRULE_2_NT, strm') + val FULL_SPAN = (#1(SR1_SPAN), #2(SR2_SPAN)) + in + (UserCode.File_PROD_1_ACT (sm_RES, SR1_RES, SR2_RES, fileName_RES, SR1_SPAN : (Lex.pos * Lex.pos), SR2_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), liftSpan_REFC), + FULL_SPAN, strm') + end +in + (File_NT) +end +val File_NT = fn x => fn s => unwrap (Err.launch (eh, lexFn, File_NT x , true) s) + +in (File_NT) end + in +fun parse lexFn x s = let val (File_NT) = mk lexFn in File_NT x s end + + end + +end diff --git a/ml-lpt/ml-antlr/FrontEnds/ml-antlr-based/spec.lex b/ml-lpt/ml-antlr/FrontEnds/ml-antlr-based/spec.lex new file mode 100644 index 0000000..caafeed --- /dev/null +++ b/ml-lpt/ml-antlr/FrontEnds/ml-antlr-based/spec.lex @@ -0,0 +1,182 @@ +(* spec.lex + * + * COPYRIGHT (c) 2009 The Manticore Project (http://manticore.cs.uchicago.edu) + * All rights reserved. + * + * (With some code borrowed from ml-yacc) + *) + +%defs ( + +structure Tok = SpecTokens + +val comLvl : int ref = ref 0 (* nesting depth of comments *) +val comStart : int ref = ref 0 (* start line of current comment *) + +type lex_result = Tok.token + +fun err (lineNo, colNo, msg) = Err.errMsg [ + "Lexical error [", Int.toString lineNo, ".", Int.toString colNo, "]: ", msg + ] + +fun eof () = ( + if (!comLvl > 0) + then () +(* err(~1, "unclosed comment starting at line " ^ Int.toString(!comStart)) *) + else (); + Tok.EOF) + +val text : string list ref = ref [] +fun addText s = (text := s::(!text)) +fun clrText () = (text := []) +fun getText () = concat (rev (!text)) + +val pcount = ref 0 (* nesting depth of parentheses in CODE *) +fun inc (ri as ref i) = (ri := i+1) +fun dec (ri as ref i) = (ri := i-1) +); + +%let eol=("\n"|"\013\n"|"\013"); +%let ws=("\009"|"\011"|"\012"|" "|{eol}); +%let lc=[a-z]; +%let uc=[A-Z]; +%let alpha=({lc}|{uc}); +%let digit=[0-9]; +%let int=digit*; +%let idchars=({alpha}|{digit}|"_"|"'"); +%let id={alpha}{idchars}*; +%let qualid ={id}"."; +%let tyvar="'"{idchars}*; + +(* the PRECODE state is used for scanning the whitespace between the precursor + * of a parenthesized SML code fragment and the opening "(". On most tokens it + * behaves like INITIAL so that the parser's error correction can respond. + *) +%states STRING COM PRECODE CODE PRECONSTR CONSTR; + +%name SpecLex; + +"of" => (YYBEGIN CONSTR; Tok.OF); + +{ws}+ => (skip()); +{id} => (Tok.ID yytext); + +"%change" => (Tok.KW_change); +"%defs" => (YYBEGIN PRECODE; Tok.KW_defs); +"%dropping" => (Tok.KW_dropping); +"%entry" => (Tok.KW_entry); +"%header" => (YYBEGIN PRECODE; Tok.KW_header); +"%import" => (Tok.KW_import); +"%keyword"("s")? => (Tok.KW_keywords); +"%name" => (Tok.KW_name); +"%nonterm"("s")? => (Tok.KW_nonterms); +"%prefer" => (Tok.KW_prefer); +"%refcell" => (YYBEGIN CONSTR; Tok.KW_refcell); +"%start" => (Tok.KW_start); +"%token"("s")? => (YYBEGIN CONSTR; Tok.KW_tokens); +"%tokentype" => (YYBEGIN CONSTR; Tok.KW_tokentype); +"%try" => (Tok.KW_try); +"%value" => (YYBEGIN PRECODE; Tok.KW_value); +"%where" => (YYBEGIN PRECODE; Tok.KW_where); + +"|" => (Tok.BAR); +"@" => (YYBEGIN PRECODE; Tok.AT); +"$" => (Tok.DOLLAR); +"+" => (Tok.PLUS); +"*" => (Tok.STAR); +"?" => (Tok.QUERY); +":" => (Tok.COLON); +";" => (Tok.SEMI); +"," => (Tok.COMMA); +"(" => (Tok.LP); +")" => (Tok.RP); +"[" => (Tok.LSB); +"]" => (Tok.RSB); +"/" => (Tok.SLASH); +"=" => (Tok.EQ); +"->" => (Tok.ARROW); +"=>" => (YYBEGIN PRECODE; Tok.DARROW); +"\"" => (YYBEGIN STRING; clrText(); addText yytext; + ignore(continue() before YYBEGIN INITIAL); + Tok.STRING (getText())); + +"(*" + => (comLvl := 1; comStart := !yylineno; YYBEGIN COM; + ignore(continue() before YYBEGIN INITIAL); + continue()); +"(*" + => (comLvl := 1; comStart := !yylineno; YYBEGIN COM; + ignore(continue() before YYBEGIN CONSTR); + continue()); +"(*" + => (comLvl := 1; comStart := !yylineno; YYBEGIN COM; + ignore(continue() before YYBEGIN PRECODE); + continue()); +"(*" + => (comLvl := 1; comStart := !yylineno; YYBEGIN COM; + ignore(continue() before YYBEGIN CODE); + continue()); + +"(*" + => (comLvl := !comLvl+1; continue()); +"*)" + => (comLvl := !comLvl-1; + if (!comLvl = 0) + then (Tok.BOGUS) + else continue()); +.|{eol} + => (continue()); + +"(" => (pcount := 1; YYBEGIN CODE; clrText(); continue()); +"\"" => (YYBEGIN STRING; clrText(); addText yytext; + ignore(continue() before YYBEGIN PRECODE); + Tok.STRING (getText())); + +"(" => (addText yytext; (* NOTE: the initial "(" is consumed in the PRECODE state *) + inc pcount; continue()); +")" => (dec pcount; + if !pcount = 0 + then (YYBEGIN INITIAL; Tok.CODE (getText())) + else (addText yytext; continue())); +"\"" => (addText yytext; YYBEGIN STRING; + ignore(continue() before YYBEGIN CODE); + continue()); +[^()"]+ => (addText yytext; continue()); + +"\"" => (addText yytext; Tok.BOGUS); +{eol} => (addText yytext; err (!yylineno, !yycolno, "unclosed string"); + Tok.BOGUS); +\\ => (addText yytext; continue()); +\\\\ => (addText yytext; continue()); +(* we apply toString to yytext below to ensure that UTF8 characters are + * printed in a format that is legal SML. + *) +[^"\\\n\013]+ + => (addText(String.toString yytext); continue()); +\\\" => (addText yytext; continue()); + +{ws} => (continue()); +"of" => (Tok.OF); +{id} => (Tok.ID yytext); +{tyvar} => (Tok.TYVAR yytext); +{qualid}=> (Tok.IDDOT yytext); +{int} => (Tok.INT yytext); +"|" => (Tok.BAR); +"*" => (Tok.STAR); +":" => (Tok.COLON); +"," => (Tok.COMMA); +";" => (YYBEGIN INITIAL; Tok.SEMI); +"(" => (Tok.LP); +")" => (Tok.RP); +"{" => (Tok.LCB); +"}" => (Tok.RCB); +"->" => (Tok.ARROW); +"\"" => (YYBEGIN STRING; clrText(); addText yytext; + ignore(continue() before YYBEGIN CONSTR); + Tok.STRING (getText())); +"=" => (YYBEGIN PRECODE; Tok.EQ); + +. => (err (!yylineno, !yycolno, + concat["illegal character '", + String.toCString yytext, "'"]); + continue()); diff --git a/ml-lpt/ml-antlr/FrontEnds/ml-antlr-based/spec.lex.sml b/ml-lpt/ml-antlr/FrontEnds/ml-antlr-based/spec.lex.sml new file mode 100644 index 0000000..26d4990 --- /dev/null +++ b/ml-lpt/ml-antlr/FrontEnds/ml-antlr-based/spec.lex.sml @@ -0,0 +1,679 @@ +structure SpecLex = struct + + datatype yystart_state = +COM | CODE | STRING | PRECODE | PRECONSTR | CONSTR | INITIAL + local + + structure UserDeclarations = + struct + + + +structure Tok = SpecTokens + +val comLvl : int ref = ref 0 (* nesting depth of comments *) +val comStart : int ref = ref 0 (* start line of current comment *) + +type lex_result = Tok.token + +fun err (lineNo, colNo, msg) = Err.errMsg [ + "Lexical error [", Int.toString lineNo, ".", Int.toString colNo, "]: ", msg + ] + +fun eof () = ( + if (!comLvl > 0) + then () +(* err(~1, "unclosed comment starting at line " ^ Int.toString(!comStart)) *) + else (); + Tok.EOF) + +val text : string list ref = ref [] +fun addText s = (text := s::(!text)) +fun clrText () = (text := []) +fun getText () = concat (rev (!text)) + +val pcount = ref 0 (* nesting depth of parentheses in CODE *) +fun inc (ri as ref i) = (ri := i+1) +fun dec (ri as ref i) = (ri := i-1) + + end + + datatype yymatch + = yyNO_MATCH + | yyMATCH of ULexBuffer.stream * action * yymatch + withtype action = ULexBuffer.stream * yymatch -> UserDeclarations.lex_result + + val yytable : ((UTF8.wchar * UTF8.wchar * int) list * int list) Vector.vector = +#[([(0w0,0w12,7), +(0w14,0w39,7), +(0w41,0w41,7), +(0w43,0w2147483647,7), +(0w13,0w13,8), +(0w40,0w40,9), +(0w42,0w42,10)], []), ([(0w0,0w33,14), +(0w35,0w39,14), +(0w42,0w2147483647,14), +(0w34,0w34,15), +(0w40,0w40,16), +(0w41,0w41,17)], []), ([(0w0,0w9,20), +(0w11,0w12,20), +(0w14,0w33,20), +(0w35,0w91,20), +(0w93,0w2147483647,20), +(0w10,0w10,21), +(0w13,0w13,22), +(0w34,0w34,23), +(0w92,0w92,24)], []), ([(0w0,0w8,29), +(0w14,0w31,29), +(0w33,0w33,29), +(0w35,0w35,29), +(0w38,0w39,29), +(0w41,0w41,29), +(0w46,0w46,29), +(0w48,0w57,29), +(0w60,0w60,29), +(0w62,0w62,29), +(0w92,0w92,29), +(0w94,0w96,29), +(0w123,0w123,29), +(0w125,0w2147483647,29), +(0w9,0w12,30), +(0w32,0w32,30), +(0w13,0w13,31), +(0w34,0w34,32), +(0w36,0w36,33), +(0w37,0w37,34), +(0w40,0w40,35), +(0w42,0w42,36), +(0w43,0w43,37), +(0w44,0w44,38), +(0w45,0w45,39), +(0w47,0w47,40), +(0w58,0w58,41), +(0w59,0w59,42), +(0w61,0w61,43), +(0w63,0w63,44), +(0w64,0w64,45), +(0w65,0w90,46), +(0w97,0w110,46), +(0w112,0w122,46), +(0w91,0w91,47), +(0w93,0w93,48), +(0w111,0w111,49), +(0w124,0w124,50)], []), ([(0w0,0w2147483647,29)], []), ([(0w0,0w8,29), +(0w14,0w31,29), +(0w33,0w33,29), +(0w35,0w38,29), +(0w43,0w43,29), +(0w46,0w57,29), +(0w60,0w60,29), +(0w62,0w64,29), +(0w91,0w96,29), +(0w126,0w2147483647,29), +(0w9,0w12,151), +(0w32,0w32,151), +(0w13,0w13,152), +(0w34,0w34,153), +(0w39,0w39,154), +(0w40,0w40,155), +(0w41,0w41,156), +(0w42,0w42,157), +(0w44,0w44,158), +(0w45,0w45,159), +(0w58,0w58,160), +(0w59,0w59,161), +(0w61,0w61,162), +(0w65,0w90,163), +(0w97,0w99,163), +(0w101,0w110,163), +(0w112,0w122,163), +(0w100,0w100,164), +(0w111,0w111,165), +(0w123,0w123,166), +(0w124,0w124,167), +(0w125,0w125,168)], []), ([(0w0,0w8,29), +(0w14,0w31,29), +(0w33,0w33,29), +(0w35,0w35,29), +(0w38,0w39,29), +(0w46,0w46,29), +(0w48,0w57,29), +(0w60,0w60,29), +(0w62,0w62,29), +(0w92,0w92,29), +(0w94,0w96,29), +(0w123,0w123,29), +(0w125,0w2147483647,29), +(0w9,0w12,30), +(0w32,0w32,30), +(0w13,0w13,31), +(0w34,0w34,179), +(0w36,0w36,33), +(0w37,0w37,34), +(0w40,0w40,180), +(0w41,0w41,181), +(0w42,0w42,36), +(0w43,0w43,37), +(0w44,0w44,38), +(0w45,0w45,39), +(0w47,0w47,40), +(0w58,0w58,41), +(0w59,0w59,42), +(0w61,0w61,43), +(0w63,0w63,44), +(0w64,0w64,45), +(0w65,0w90,46), +(0w97,0w110,46), +(0w112,0w122,46), +(0w91,0w91,47), +(0w93,0w93,48), +(0w111,0w111,49), +(0w124,0w124,50)], []), ([], [44, 75]), ([(0w10,0w10,13)], [44, 75]), ([(0w42,0w42,12)], [44, 75]), ([(0w41,0w41,11)], [44, 75]), ([], [43]), ([], [42]), ([], [44]), ([(0w0,0w33,19), +(0w35,0w39,19), +(0w42,0w2147483647,19)], [50, 75]), ([], [49, 75]), ([(0w42,0w42,18)], [47, 75]), ([], [48, 75]), ([], [41]), ([(0w0,0w33,19), +(0w35,0w39,19), +(0w42,0w2147483647,19)], [50]), ([(0w0,0w9,28), +(0w11,0w12,28), +(0w14,0w33,28), +(0w35,0w91,28), +(0w93,0w2147483647,28)], [55, 75]), ([], [52, 75]), ([(0w10,0w10,27)], [52, 75]), ([], [51, 75]), ([(0w34,0w34,25), +(0w92,0w92,26)], [53, 75]), ([], [56]), ([], [54]), ([], [52]), ([(0w0,0w9,28), +(0w11,0w12,28), +(0w14,0w33,28), +(0w35,0w91,28), +(0w93,0w2147483647,28)], [55]), ([], [75]), ([(0w9,0w12,149), +(0w32,0w32,149), +(0w13,0w13,150)], [1, 75]), ([(0w9,0w12,149), +(0w32,0w32,149), +(0w13,0w13,150)], [1, 75]), ([], [46, 75]), ([], [22, 75]), ([(0w99,0w99,56), +(0w100,0w100,57), +(0w101,0w101,58), +(0w104,0w104,59), +(0w105,0w105,60), +(0w107,0w107,61), +(0w110,0w110,62), +(0w112,0w112,63), +(0w114,0w114,64), +(0w115,0w115,65), +(0w116,0w116,66), +(0w118,0w118,67), +(0w119,0w119,68)], [75]), ([(0w42,0w42,55)], [45, 75]), ([], [24, 75]), ([], [23, 75]), ([], [28, 75]), ([(0w62,0w62,54)], [75]), ([], [33, 75]), ([], [26, 75]), ([], [27, 75]), ([(0w62,0w62,53)], [34, 75]), ([], [25, 75]), ([], [21, 75]), ([(0w39,0w39,51), +(0w48,0w57,51), +(0w65,0w90,51), +(0w95,0w95,51), +(0w97,0w122,51)], [2, 75]), ([], [31, 75]), ([], [32, 75]), ([(0w39,0w39,51), +(0w48,0w57,51), +(0w65,0w90,51), +(0w95,0w95,51), +(0w97,0w101,51), +(0w103,0w122,51), +(0w102,0w102,52)], [2, 75]), ([], [20, 75]), ([(0w39,0w39,51), +(0w48,0w57,51), +(0w65,0w90,51), +(0w95,0w95,51), +(0w97,0w122,51)], [2]), ([(0w39,0w39,51), +(0w48,0w57,51), +(0w65,0w90,51), +(0w95,0w95,51), +(0w97,0w122,51)], [0, 2]), ([], [36]), ([], [35]), ([], [40]), ([(0w104,0w104,144)], []), ([(0w101,0w101,134), +(0w114,0w114,135)], []), ([(0w110,0w110,130)], []), ([(0w101,0w101,125)], []), ([(0w109,0w109,120)], []), ([(0w101,0w101,113)], []), ([(0w97,0w97,103), +(0w111,0w111,104)], []), ([(0w114,0w114,98)], []), ([(0w101,0w101,92)], []), ([(0w116,0w116,88)], []), ([(0w111,0w111,77), +(0w114,0w114,78)], []), ([(0w97,0w97,73)], []), ([(0w104,0w104,69)], []), ([(0w101,0w101,70)], []), ([(0w114,0w114,71)], []), ([(0w101,0w101,72)], []), ([], [19]), ([(0w108,0w108,74)], []), ([(0w117,0w117,75)], []), ([(0w101,0w101,76)], []), ([], [18]), ([(0w107,0w107,80)], []), ([(0w121,0w121,79)], []), ([], [17]), ([(0w101,0w101,81)], []), ([(0w110,0w110,82)], []), ([(0w115,0w115,83), +(0w116,0w116,84)], [15]), ([], [15]), ([(0w121,0w121,85)], []), ([(0w112,0w112,86)], []), ([(0w101,0w101,87)], []), ([], [16]), ([(0w97,0w97,89)], []), ([(0w114,0w114,90)], []), ([(0w116,0w116,91)], []), ([], [14]), ([(0w102,0w102,93)], []), ([(0w99,0w99,94)], []), ([(0w101,0w101,95)], []), ([(0w108,0w108,96)], []), ([(0w108,0w108,97)], []), ([], [13]), ([(0w101,0w101,99)], []), ([(0w102,0w102,100)], []), ([(0w101,0w101,101)], []), ([(0w114,0w114,102)], []), ([], [12]), ([(0w109,0w109,111)], []), ([(0w110,0w110,105)], []), ([(0w116,0w116,106)], []), ([(0w101,0w101,107)], []), ([(0w114,0w114,108)], []), ([(0w109,0w109,109)], []), ([(0w115,0w115,110)], [11]), ([], [11]), ([(0w101,0w101,112)], []), ([], [10]), ([(0w121,0w121,114)], []), ([(0w119,0w119,115)], []), ([(0w111,0w111,116)], []), ([(0w114,0w114,117)], []), ([(0w100,0w100,118)], []), ([(0w115,0w115,119)], [9]), ([], [9]), ([(0w112,0w112,121)], []), ([(0w111,0w111,122)], []), ([(0w114,0w114,123)], []), ([(0w116,0w116,124)], []), ([], [8]), ([(0w97,0w97,126)], []), ([(0w100,0w100,127)], []), ([(0w101,0w101,128)], []), ([(0w114,0w114,129)], []), ([], [7]), ([(0w116,0w116,131)], []), ([(0w114,0w114,132)], []), ([(0w121,0w121,133)], []), ([], [6]), ([(0w102,0w102,142)], []), ([(0w111,0w111,136)], []), ([(0w112,0w112,137)], []), ([(0w112,0w112,138)], []), ([(0w105,0w105,139)], []), ([(0w110,0w110,140)], []), ([(0w103,0w103,141)], []), ([], [5]), ([(0w115,0w115,143)], []), ([], [4]), ([(0w97,0w97,145)], []), ([(0w110,0w110,146)], []), ([(0w103,0w103,147)], []), ([(0w101,0w101,148)], []), ([], [3]), ([(0w9,0w12,149), +(0w32,0w32,149), +(0w13,0w13,150)], [1]), ([(0w9,0w12,149), +(0w32,0w32,149), +(0w13,0w13,150)], [1]), ([], [57, 75]), ([(0w10,0w10,178)], [57, 75]), ([], [73, 75]), ([(0w39,0w39,177), +(0w48,0w57,177), +(0w65,0w90,177), +(0w95,0w95,177), +(0w97,0w122,177)], [60, 75]), ([(0w42,0w42,176)], [68, 75]), ([], [69, 75]), ([], [64, 75]), ([], [66, 75]), ([(0w62,0w62,175)], [75]), ([], [65, 75]), ([], [67, 75]), ([], [74, 75]), ([(0w39,0w39,169), +(0w48,0w57,169), +(0w65,0w90,169), +(0w95,0w95,169), +(0w97,0w122,169), +(0w46,0w46,170)], [59, 75]), ([(0w39,0w39,169), +(0w48,0w57,169), +(0w65,0w90,169), +(0w95,0w95,169), +(0w97,0w104,169), +(0w106,0w122,169), +(0w46,0w46,170), +(0w105,0w105,172)], [59, 75]), ([(0w39,0w39,169), +(0w48,0w57,169), +(0w65,0w90,169), +(0w95,0w95,169), +(0w97,0w101,169), +(0w103,0w122,169), +(0w46,0w46,170), +(0w102,0w102,171)], [59, 75]), ([], [70, 75]), ([], [63, 75]), ([], [71, 75]), ([(0w39,0w39,169), +(0w48,0w57,169), +(0w65,0w90,169), +(0w95,0w95,169), +(0w97,0w122,169), +(0w46,0w46,170)], [59]), ([], [61]), ([(0w39,0w39,169), +(0w48,0w57,169), +(0w65,0w90,169), +(0w95,0w95,169), +(0w97,0w122,169), +(0w46,0w46,170)], [58, 59]), ([(0w39,0w39,169), +(0w48,0w57,169), +(0w65,0w90,169), +(0w95,0w95,169), +(0w97,0w102,169), +(0w104,0w122,169), +(0w46,0w46,170), +(0w103,0w103,173)], [59]), ([(0w39,0w39,169), +(0w48,0w57,169), +(0w65,0w90,169), +(0w95,0w95,169), +(0w97,0w104,169), +(0w106,0w122,169), +(0w46,0w46,170), +(0w105,0w105,174)], [59]), ([(0w39,0w39,169), +(0w48,0w57,169), +(0w65,0w90,169), +(0w95,0w95,169), +(0w97,0w115,169), +(0w117,0w122,169), +(0w46,0w46,170), +(0w116,0w116,174)], [59, 62]), ([], [72]), ([], [39]), ([(0w39,0w39,177), +(0w48,0w57,177), +(0w65,0w90,177), +(0w95,0w95,177), +(0w97,0w122,177)], [60]), ([], [57]), ([], [37, 75]), ([(0w42,0w42,182)], [29, 75]), ([], [30, 75]), ([], [38])] + fun yystreamify' p input = ULexBuffer.mkStream (p, input) + + fun yystreamifyReader' p readFn strm = let + val s = ref strm + fun iter(strm, n, accum) = + if n > 1024 then (String.implode (rev accum), strm) + else (case readFn strm + of NONE => (String.implode (rev accum), strm) + | SOME(c, strm') => iter (strm', n+1, c::accum)) + fun input() = let + val (data, strm) = iter(!s, 0, []) + in + s := strm; + data + end + in + yystreamify' p input + end + + fun yystreamifyInstream' p strm = yystreamify' p (fn ()=>TextIO.input strm) + + fun innerLex +(yystrm_, yyss_, yysm) = let + (* current start state *) + val yyss = ref yyss_ + fun YYBEGIN ss = (yyss := ss) + (* current input stream *) + val yystrm = ref yystrm_ + fun yysetStrm strm = yystrm := strm + fun yygetPos() = ULexBuffer.getpos (!yystrm) + fun yystreamify input = yystreamify' (yygetPos()) input + fun yystreamifyReader readFn strm = yystreamifyReader' (yygetPos()) readFn strm + fun yystreamifyInstream strm = yystreamifyInstream' (yygetPos()) strm + (* start position of token -- can be updated via skip() *) + val yystartPos = ref (yygetPos()) + (* get one char of input *) + fun yygetc strm = (case ULexBuffer.getu strm + of (SOME (0w10, s')) => + (AntlrStreamPos.markNewLine yysm (ULexBuffer.getpos strm); + SOME (0w10, s')) + | x => x) + fun yygetList getc strm = let + val get1 = UTF8.getu getc + fun iter (strm, accum) = + (case get1 strm + of NONE => rev accum + | SOME (w, strm') => iter (strm', w::accum) + (* end case *)) + in + iter (strm, []) + end + (* create yytext *) + fun yymksubstr(strm) = ULexBuffer.subtract (strm, !yystrm) + fun yymktext(strm) = Substring.string (yymksubstr strm) + fun yymkunicode(strm) = yygetList Substring.getc (yymksubstr strm) + open UserDeclarations + fun lex () = let + fun yystuck (yyNO_MATCH) = raise Fail "lexer reached a stuck state" + | yystuck (yyMATCH (strm, action, old)) = + action (strm, old) + val yypos = yygetPos() + fun yygetlineNo strm = AntlrStreamPos.lineNo yysm (ULexBuffer.getpos strm) + fun yygetcolNo strm = AntlrStreamPos.colNo yysm (ULexBuffer.getpos strm) + fun yyactsToMatches (strm, [], oldMatches) = oldMatches + | yyactsToMatches (strm, act::acts, oldMatches) = + yyMATCH (strm, act, yyactsToMatches (strm, acts, oldMatches)) + fun yygo actTable = + (fn (~1, _, oldMatches) => yystuck oldMatches + | (curState, strm, oldMatches) => let + val (transitions, finals') = Vector.sub (yytable, curState) + val finals = List.map (fn i => Vector.sub (actTable, i)) finals' + fun tryfinal() = + yystuck (yyactsToMatches (strm, finals, oldMatches)) + fun find (c, []) = NONE + | find (c, (c1, c2, s)::ts) = + if c1 <= c andalso c <= c2 then SOME s + else find (c, ts) + in case yygetc strm + of SOME(c, strm') => + (case find (c, transitions) + of NONE => tryfinal() + | SOME n => + yygo actTable + (n, strm', + yyactsToMatches (strm, finals, oldMatches))) + | NONE => tryfinal() + end) + val yylastwasnref = ref (ULexBuffer.lastWasNL (!yystrm)) + fun continue() = let val yylastwasn = !yylastwasnref in +let +fun yyAction0 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN CONSTR; Tok.OF) +fun yyAction1 (strm, lastMatch : yymatch) = (yystrm := strm; skip()) +fun yyAction2 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; Tok.ID yytext + end +fun yyAction3 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.KW_change) +fun yyAction4 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN PRECODE; Tok.KW_defs) +fun yyAction5 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.KW_dropping) +fun yyAction6 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.KW_entry) +fun yyAction7 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN PRECODE; Tok.KW_header) +fun yyAction8 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.KW_import) +fun yyAction9 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.KW_keywords) +fun yyAction10 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.KW_name) +fun yyAction11 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.KW_nonterms) +fun yyAction12 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.KW_prefer) +fun yyAction13 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN CONSTR; Tok.KW_refcell) +fun yyAction14 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.KW_start) +fun yyAction15 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN CONSTR; Tok.KW_tokens) +fun yyAction16 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN CONSTR; Tok.KW_tokentype) +fun yyAction17 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.KW_try) +fun yyAction18 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN PRECODE; Tok.KW_value) +fun yyAction19 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN PRECODE; Tok.KW_where) +fun yyAction20 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.BAR) +fun yyAction21 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN PRECODE; Tok.AT) +fun yyAction22 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.DOLLAR) +fun yyAction23 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.PLUS) +fun yyAction24 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.STAR) +fun yyAction25 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.QUERY) +fun yyAction26 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.COLON) +fun yyAction27 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.SEMI) +fun yyAction28 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.COMMA) +fun yyAction29 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.LP) +fun yyAction30 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.RP) +fun yyAction31 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.LSB) +fun yyAction32 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.RSB) +fun yyAction33 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.SLASH) +fun yyAction34 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.EQ) +fun yyAction35 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.ARROW) +fun yyAction36 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN PRECODE; Tok.DARROW) +fun yyAction37 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; + YYBEGIN STRING; clrText(); addText yytext; + ignore(continue() before YYBEGIN INITIAL); + Tok.STRING (getText()) + end +fun yyAction38 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; + comLvl := 1; comStart := !yylineno; YYBEGIN COM; + ignore(continue() before YYBEGIN INITIAL); + continue() + end +fun yyAction39 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; + comLvl := 1; comStart := !yylineno; YYBEGIN COM; + ignore(continue() before YYBEGIN CONSTR); + continue() + end +fun yyAction40 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; + comLvl := 1; comStart := !yylineno; YYBEGIN COM; + ignore(continue() before YYBEGIN PRECODE); + continue() + end +fun yyAction41 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; + comLvl := 1; comStart := !yylineno; YYBEGIN COM; + ignore(continue() before YYBEGIN CODE); + continue() + end +fun yyAction42 (strm, lastMatch : yymatch) = (yystrm := strm; + comLvl := !comLvl+1; continue()) +fun yyAction43 (strm, lastMatch : yymatch) = (yystrm := strm; + comLvl := !comLvl-1; + if (!comLvl = 0) + then (Tok.BOGUS) + else continue()) +fun yyAction44 (strm, lastMatch : yymatch) = (yystrm := strm; continue()) +fun yyAction45 (strm, lastMatch : yymatch) = (yystrm := strm; + pcount := 1; YYBEGIN CODE; clrText(); continue()) +fun yyAction46 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; + YYBEGIN STRING; clrText(); addText yytext; + ignore(continue() before YYBEGIN PRECODE); + Tok.STRING (getText()) + end +fun yyAction47 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; + addText yytext; (* NOTE: the initial "(" is consumed in the PRECODE state *) + inc pcount; continue() + end +fun yyAction48 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; + dec pcount; + if !pcount = 0 + then (YYBEGIN INITIAL; Tok.CODE (getText())) + else (addText yytext; continue()) + end +fun yyAction49 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; + addText yytext; YYBEGIN STRING; + ignore(continue() before YYBEGIN CODE); + continue() + end +fun yyAction50 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; addText yytext; continue() + end +fun yyAction51 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; addText yytext; Tok.BOGUS + end +fun yyAction52 (strm, lastMatch : yymatch) = let + val yycolno = ref(yygetcolNo(!(yystrm))) + val yylineno = ref(yygetlineNo(!(yystrm))) + val yytext = yymktext(strm) + in + yystrm := strm; + addText yytext; err (!yylineno, !yycolno, "unclosed string"); + Tok.BOGUS + end +fun yyAction53 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; addText yytext; continue() + end +fun yyAction54 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; addText yytext; continue() + end +fun yyAction55 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; addText(String.toString yytext); continue() + end +fun yyAction56 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; addText yytext; continue() + end +fun yyAction57 (strm, lastMatch : yymatch) = (yystrm := strm; continue()) +fun yyAction58 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.OF) +fun yyAction59 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; Tok.ID yytext + end +fun yyAction60 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; Tok.TYVAR yytext + end +fun yyAction61 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; Tok.IDDOT yytext + end +fun yyAction62 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; Tok.INT yytext + end +fun yyAction63 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.BAR) +fun yyAction64 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.STAR) +fun yyAction65 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.COLON) +fun yyAction66 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.COMMA) +fun yyAction67 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN INITIAL; Tok.SEMI) +fun yyAction68 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.LP) +fun yyAction69 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.RP) +fun yyAction70 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.LCB) +fun yyAction71 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.RCB) +fun yyAction72 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.ARROW) +fun yyAction73 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; + YYBEGIN STRING; clrText(); addText yytext; + ignore(continue() before YYBEGIN CONSTR); + Tok.STRING (getText()) + end +fun yyAction74 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN PRECODE; Tok.EQ) +fun yyAction75 (strm, lastMatch : yymatch) = let + val yycolno = ref(yygetcolNo(!(yystrm))) + val yylineno = ref(yygetlineNo(!(yystrm))) + val yytext = yymktext(strm) + in + yystrm := strm; + err (!yylineno, !yycolno, + concat["illegal character '", + String.toCString yytext, "'"]); + continue() + end +val yyactTable = Vector.fromList([yyAction0, yyAction1, yyAction2, yyAction3, + yyAction4, yyAction5, yyAction6, yyAction7, yyAction8, yyAction9, yyAction10, + yyAction11, yyAction12, yyAction13, yyAction14, yyAction15, yyAction16, + yyAction17, yyAction18, yyAction19, yyAction20, yyAction21, yyAction22, + yyAction23, yyAction24, yyAction25, yyAction26, yyAction27, yyAction28, + yyAction29, yyAction30, yyAction31, yyAction32, yyAction33, yyAction34, + yyAction35, yyAction36, yyAction37, yyAction38, yyAction39, yyAction40, + yyAction41, yyAction42, yyAction43, yyAction44, yyAction45, yyAction46, + yyAction47, yyAction48, yyAction49, yyAction50, yyAction51, yyAction52, + yyAction53, yyAction54, yyAction55, yyAction56, yyAction57, yyAction58, + yyAction59, yyAction60, yyAction61, yyAction62, yyAction63, yyAction64, + yyAction65, yyAction66, yyAction67, yyAction68, yyAction69, yyAction70, + yyAction71, yyAction72, yyAction73, yyAction74, yyAction75]) +in + if ULexBuffer.eof(!(yystrm)) + then let + val yycolno = ref(yygetcolNo(!(yystrm))) + val yylineno = ref(yygetlineNo(!(yystrm))) + in + (case (!(yyss)) + of _ => (UserDeclarations.eof()) + (* end case *)) + end + else (case (!(yyss)) + of COM => yygo yyactTable (0, !(yystrm), yyNO_MATCH) + | CODE => yygo yyactTable (1, !(yystrm), yyNO_MATCH) + | STRING => yygo yyactTable (2, !(yystrm), yyNO_MATCH) + | PRECODE => yygo yyactTable (3, !(yystrm), yyNO_MATCH) + | PRECONSTR => yygo yyactTable (4, !(yystrm), yyNO_MATCH) + | CONSTR => yygo yyactTable (5, !(yystrm), yyNO_MATCH) + | INITIAL => yygo yyactTable (6, !(yystrm), yyNO_MATCH) + (* end case *)) +end +end + and skip() = (yystartPos := yygetPos(); + yylastwasnref := ULexBuffer.lastWasNL (!yystrm); + continue()) + in (continue(), (!yystartPos, yygetPos()), !yystrm, !yyss) end + in + lex() + end + in + type pos = AntlrStreamPos.pos + type span = AntlrStreamPos.span + type tok = UserDeclarations.lex_result + + datatype prestrm = STRM of ULexBuffer.stream * + (yystart_state * tok * span * prestrm * yystart_state) option ref + type strm = (prestrm * yystart_state) + + fun lex sm +(STRM (yystrm, memo), ss) = (case !memo + of NONE => let + val (tok, span, yystrm', ss') = innerLex +(yystrm, ss, sm) + val strm' = STRM (yystrm', ref NONE); + in + memo := SOME (ss, tok, span, strm', ss'); + (tok, span, (strm', ss')) + end + | SOME (ss', tok, span, strm', ss'') => + if ss = ss' then + (tok, span, (strm', ss'')) + else ( + memo := NONE; + lex sm +(STRM (yystrm, memo), ss)) + (* end case *)) + + fun streamify input = (STRM (yystreamify' 0 input, ref NONE), INITIAL) + fun streamifyReader readFn strm = (STRM (yystreamifyReader' 0 readFn strm, ref NONE), + INITIAL) + fun streamifyInstream strm = (STRM (yystreamifyInstream' 0 strm, ref NONE), + INITIAL) + + fun getPos (STRM (strm, _), _) = ULexBuffer.getpos strm + + end +end diff --git a/ml-lpt/ml-antlr/FrontEnds/ml-yacc-based/parse-file.sml b/ml-lpt/ml-antlr/FrontEnds/ml-yacc-based/parse-file.sml new file mode 100644 index 0000000..cbf6bf9 --- /dev/null +++ b/ml-lpt/ml-antlr/FrontEnds/ml-yacc-based/parse-file.sml @@ -0,0 +1,37 @@ +(* parse-file.sml + * + * COPYRIGHT (c) 2006 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (http://www.cs.uchicago.edu/~adrassi) + * All rights reserved. + * + * Driver for the parser. + *) + +structure ParseFile = + struct + + (* glue together the lexer and parser *) + structure LLKLrVals = MLYLrValsFun (structure Token = LrParser.Token) + structure LLKLex = MLYLexFn (structure Tok = LLKLrVals.Tokens) + structure LLKParser = JoinWithArg( + structure ParserData = LLKLrVals.ParserData + structure Lex = LLKLex + structure LrParser = LrParser) + + (* parse a file, returning a parse tree *) + fun parse' (startGrm, filename) = let + val _ = Err.status ("parsing " ^ filename) + val file = TextIO.openIn filename + fun get n = TextIO.inputN (file, n) + val lexer = LLKParser.makeLexer get (Err.lexErr filename) + in + #1(LLKParser.parse + (15, lexer, Err.parseErr filename, + (Err.parseErr filename, startGrm, parse'))) + before TextIO.closeIn file + end + + fun parse filename = parse'(GrammarSyntax.mkGrammar(), filename) + + end \ No newline at end of file diff --git a/ml-lpt/ml-antlr/FrontEnds/ml-yacc-based/spec.grm b/ml-lpt/ml-antlr/FrontEnds/ml-yacc-based/spec.grm new file mode 100644 index 0000000..6ae3f2b --- /dev/null +++ b/ml-lpt/ml-antlr/FrontEnds/ml-yacc-based/spec.grm @@ -0,0 +1,284 @@ +(* spec.grm + * + * COPYRIGHT (c) 2006 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (http://www.cs.uchicago.edu/~adrassi) + * All rights reserved. + *) + +structure GS = GrammarSyntax + +type code = Int.int * String.string + +%% + +%name MLY + +%term EOF + | KW_tokens | KW_defs | KW_keywords | KW_import | KW_name | KW_start | KW_entry | KW_refcell + | KW_try | KW_where | KW_debugact | KW_unitact | KW_header + | KW_drop | KW_extend | KW_replace + | LP | RP (* ( ) *) + | LSB | RSB (* [ ] *) + | LCB | RCB (* { } *) + | SLASH (* / *) + | COLON + | SEMI + | COMMA + | AT + | DOLLAR + | BAR (* | *) + | STAR (* * *) + | PLUS (* + *) + | QUERY (* ? *) + | CODE of string + | EQ (* = *) + | ARROW (* -> *) + | DARROW (* => *) + | REFSET (* :== *) + | OF + | ID of string + | STRING of string + | IDDOT of string (* identifier with dot (e.g., "module.") *) + | TYVAR of string (* ML-style type variable name *) + | INT of string (* integer label within record *) + | BOGUS + +%nonterm + File of GS.grammar + | Decls of GS.grammar + | Rule of GS.rule + | Formals of Atom.atom list + | IDList of string list + | SymList of Atom.atom list + | AltList of GS.alt list + | Alt of GS.alt + | IsTry of bool + | AltPred of code option + | AltCode of code option + | ItemList of (string option * GS.item) list + | NamedItem of string option * GS.item + | Item of GS.item + | PrimItem of GS.item + | Args of GS.action option + | LHS of Atom.atom + | Symbol of Atom.atom + | ConstrList of GS.constr list + | OptTy of GS.ty option + | Ty of GS.ty + | OptAbbrev of Atom.atom option + | RecordList of string + | Label of string + | QualID of string + | Code of code + +%arg (errFn, startGrm) : ((string * int * int) -> unit) * GS.grammar + +%pos int +%eop EOF +%noshift EOF +%start File +%verbose + +%right ARROW +%left STAR + +%% + +File : Decls + (Decls) + +Decls + : + (startGrm) + | Decls KW_name ID SEMI + (GS.updName (Decls, ID)) + | Decls KW_start ID SEMI + (GS.updStartSym (Decls, Atom.atom ID)) + | Decls KW_entry IDList SEMI + (GS.updEntryPoints (Decls, map Atom.atom IDList)) + | Decls KW_keywords SymList SEMI + (GS.updKeywords (Decls, SymList)) +(* | Decls KW_header STRING SEMI + (GS.updHeader (Decls, STRING)) *) + | Decls KW_defs Code SEMI + (GS.updDefs (Decls, Code)) + | Decls KW_tokens COLON ConstrList SEMI + (GS.updToks (Decls, ConstrList)) + | Decls KW_import STRING SEMI + (GS.updImport (Decls, + (Substring.string o + (Substring.triml 1) o + (Substring.trimr 1) o + Substring.full) STRING)) + | Decls KW_refcell ID COLON Ty REFSET Code SEMI + (GS.addRefcell (Decls, (ID, Ty, Code))) + | Decls Rule SEMI + (GS.addRule (Decls, Rule)) + | Decls KW_extend Rule SEMI + (GS.addImportChange (Decls, GS.ICExtend Rule)) + | Decls KW_replace Rule SEMI + (GS.addImportChange (Decls, GS.ICReplace Rule)) + | Decls KW_drop IDList SEMI + (foldl + (fn (id, g) => + GS.addImportChange (g, GS.ICDrop (Atom.atom id))) + Decls + IDList) + | Decls KW_debugact SEMI + (GS.debugAct Decls) + | Decls KW_unitact SEMI + (GS.unitAct Decls) + +Rule + : LHS Formals COLON AltList + (GS.RULE{lhs=LHS, formals = Formals, alts=AltList}) + +Formals + : LP IDList RP + (map Atom.atom IDList) + | + ([]) + +IDList + : ID + ([ID]) + | ID COMMA IDList + (ID::IDList) + +SymList + : Symbol + ([Symbol]) + | Symbol COMMA SymList + (Symbol::SymList) + +AltList + : Alt + ([Alt]) + | Alt BAR AltList + (Alt :: AltList) + +Alt + : IsTry ItemList AltPred AltCode + (GS.ALT {items = ItemList, action = AltCode, + try = IsTry, pred = AltPred}) + +IsTry + : KW_try + (true) + | + (false) + +AltPred + : KW_where Code + (SOME Code) + | + (NONE) + +AltCode + : DARROW Code + (SOME Code) + | + (NONE) + +ItemList + : (* empty *) + ([]) + | NamedItem ItemList + (NamedItem :: ItemList) + +NamedItem + : ID EQ Item + (SOME ID, Item) + | Item + (NONE, Item) + +Item + : PrimItem + (PrimItem) + | PrimItem STAR + (GS.CLOS PrimItem) + | PrimItem PLUS + (GS.POSCLOS PrimItem) + | PrimItem QUERY + (GS.OPT PrimItem) + +PrimItem + : Symbol Args + (GS.SYMBOL (Symbol, Args)) + | LP AltList RP + (GS.SUBRULE AltList) + +Args + : + (NONE) + | AT Code + (SOME Code) + +LHS + : ID + (Atom.atom ID) + +Symbol + : ID + (Atom.atom ID) + | STRING + (Atom.atom STRING) + +ConstrList + : ConstrList BAR ID OptTy OptAbbrev + ((Atom.atom ID, OptTy, OptAbbrev)::ConstrList) + | ID OptTy OptAbbrev + ([(Atom.atom ID, OptTy, OptAbbrev)]) + +OptTy + : + (NONE) + | OF Ty + (SOME Ty) + +Ty + : TYVAR + (TYVAR) + | LCB RecordList RCB + ("{ " ^ RecordList ^" } ") + | LCB RCB + ("{}") + | LP Ty RP + ("(" ^ Ty ^ ")") + | Ty QualID + (Ty ^ " " ^ QualID) + | QualID + (QualID) + | Ty STAR Ty + (Ty1 ^ "*" ^ Ty2) + | Ty ARROW Ty + (Ty1 ^ " -> " ^ Ty2) + +OptAbbrev + : + (NONE) + | LP STRING RP + (SOME (Atom.atom STRING)) + +RecordList + : RecordList COMMA Label COLON Ty + (RecordList ^ "," ^ Label ^ ":" ^ Ty) + | Label COLON Ty + (Label ^ ":" ^ Ty) + +Label + : ID + (ID) + | INT + (INT) + +QualID + : ID + (ID) + | IDDOT QualID + (IDDOT ^ QualID) + +Code + : CODE + (CODEleft, CODE) diff --git a/ml-lpt/ml-antlr/FrontEnds/ml-yacc-based/spec.lex b/ml-lpt/ml-antlr/FrontEnds/ml-yacc-based/spec.lex new file mode 100644 index 0000000..c22d898 --- /dev/null +++ b/ml-lpt/ml-antlr/FrontEnds/ml-yacc-based/spec.lex @@ -0,0 +1,175 @@ +(* spec.lex + * + * COPYRIGHT (c) 2006 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (http://www.cs.uchicago.edu/~adrassi) + * All rights reserved. + * + * (With some code borrowed from ml-yacc) + *) + +val comLvl : int ref = ref 0 (* nesting depth of comments *) +val comStart : int ref = ref 0 (* start line of current comment *) + +type pos = int +type svalue = Tok.svalue +type lexresult = (svalue,pos) Tok.token +type lexarg = (int * string) -> unit (* error reporting function *) +type arg = lexarg +type ('a, 'b) token = ('a,'b) Tok.token + +fun eof (err) = ( + if (!comLvl > 0) + then err(~1, "unclosed comment starting at line " ^ Int.toString(!comStart)) + else (); + Tok.EOF(~1, ~1)) + +val text : string list ref = ref [] +fun addText s = (text := s::(!text)) +fun clrText () = (text := []) +fun getText () = concat (rev (!text)) + +val pcount = ref 0 +fun inc (ri as ref i) = (ri := i+1) +fun dec (ri as ref i) = (ri := i-1) + +%% + +%header (functor MLYLexFn (structure Tok : MLY_TOKENS)); +%arg (err); +%count + +eol=("\n"|"\013\n"|"\013"); +ws=("\009"|"\011"|"\012"|" "|{eol}); +lc=[a-z]; +uc=[A-Z]; +alpha=({lc}|{uc}); +digit=[0-9]; +int=digit*; +idchars=({alpha}|{digit}|"_"|"'"); +id={alpha}{idchars}*; +qualid ={id}"."; +tyvar="'"{idchars}*; + +%s STRING COM CODE CONSTR; + +%% + +{ws}+ => (continue()); +{id} => (Tok.ID (yytext, !yylineno, !yylineno)); + +"%tokens" => (YYBEGIN CONSTR; + Tok.KW_tokens (!yylineno, !yylineno)); +"%defs" => (YYBEGIN CODE; clrText(); + Tok.KW_defs (!yylineno, !yylineno)); +"%keywords" => (Tok.KW_keywords (!yylineno, !yylineno)); +"%import" => (Tok.KW_import (!yylineno, !yylineno)); +"%name" => (Tok.KW_name (!yylineno, !yylineno)); +"%header" => (Tok.KW_header (!yylineno, !yylineno)); +"%start" => (Tok.KW_start (!yylineno, !yylineno)); +"%entry" => (Tok.KW_entry (!yylineno, !yylineno)); +"%try" => (Tok.KW_try (!yylineno, !yylineno)); +"%where" => (YYBEGIN CODE; clrText(); + Tok.KW_where (!yylineno, !yylineno)); +"%debugactions"=> (Tok.KW_debugact (!yylineno, !yylineno)); +"%unitactions" => (Tok.KW_unitact (!yylineno, !yylineno)); +"%drop" => (Tok.KW_drop (!yylineno, !yylineno)); +"%extend" => (Tok.KW_extend (!yylineno, !yylineno)); +"%replace" => (Tok.KW_replace (!yylineno, !yylineno)); +"%refcell" => (YYBEGIN CONSTR; + Tok.KW_refcell (!yylineno, !yylineno)); + +"|" => (Tok.BAR (!yylineno, !yylineno)); +"@" => (YYBEGIN CODE; clrText(); + Tok.AT (!yylineno, !yylineno)); +"$" => (Tok.DOLLAR (!yylineno, !yylineno)); +"+" => (Tok.PLUS (!yylineno, !yylineno)); +"*" => (Tok.STAR (!yylineno, !yylineno)); +"?" => (Tok.QUERY (!yylineno, !yylineno)); +":" => (Tok.COLON (!yylineno, !yylineno)); +";" => (Tok.SEMI (!yylineno, !yylineno)); +"," => (Tok.COMMA (!yylineno, !yylineno)); +"(" => (Tok.LP (!yylineno, !yylineno)); +")" => (Tok.RP (!yylineno, !yylineno)); +"[" => (Tok.LSB (!yylineno, !yylineno)); +"]" => (Tok.RSB (!yylineno, !yylineno)); +"/" => (Tok.SLASH (!yylineno, !yylineno)); +"=" => (Tok.EQ (!yylineno, !yylineno)); +"->" => (Tok.ARROW (!yylineno, !yylineno)); +"=>" => (YYBEGIN CODE; clrText(); + Tok.DARROW (!yylineno, !yylineno)); +"\"" + => (YYBEGIN STRING; clrText(); addText yytext; + ignore(continue() before YYBEGIN INITIAL); + Tok.STRING (getText(), !yylineno, !yylineno)); + +"(*" + => (comLvl := 1; comStart := !yylineno; YYBEGIN COM; + ignore(continue() before YYBEGIN INITIAL); + continue()); +"(*" + => (comLvl := 1; comStart := !yylineno; YYBEGIN COM; + ignore(continue() before YYBEGIN CONSTR); + continue()); +"(*" + => (comLvl := 1; comStart := !yylineno; YYBEGIN COM; + ignore(continue() before YYBEGIN CODE); + continue()); + +"(*" + => (comLvl := !comLvl+1; continue()); +"*)" + => (comLvl := !comLvl-1; + if (!comLvl = 0) + then (Tok.BOGUS(!yylineno, !yylineno)) + else continue()); +.|{eol} + => (continue()); + +"(" => (if !pcount = 0 then () else addText yytext; + inc pcount; continue()); +")" => (dec pcount; + if !pcount = 0 then + (YYBEGIN INITIAL; + Tok.CODE (getText(), !yylineno, !yylineno)) + else (addText yytext; continue())); +"\"" => (addText yytext; YYBEGIN STRING; + ignore(continue() before YYBEGIN CODE); + continue()); +[^()"]+ => (addText yytext; continue()); + +"\"" => (addText yytext; Tok.BOGUS(!yylineno, !yylineno)); +{eol} => (addText yytext; err (!yylineno, "unclosed string"); + Tok.BOGUS(!yylineno, !yylineno)); +\\ => (addText yytext; continue()); +\\\\ => (addText yytext; continue()); +[^"\\\n\013]+ + => (addText yytext; continue()); +\\\" => (addText yytext; continue()); + +{ws} => (continue()); +"of" => (Tok.OF (!yylineno, !yylineno)); +{id} => (Tok.ID (yytext, !yylineno, !yylineno)); +{tyvar} => (Tok.TYVAR (yytext, !yylineno, !yylineno)); +{qualid}=> (Tok.IDDOT (yytext, !yylineno, !yylineno)); +{int} => (Tok.INT (yytext, !yylineno, !yylineno)); +"|" => (Tok.BAR (!yylineno, !yylineno)); +"*" => (Tok.STAR (!yylineno, !yylineno)); +":" => (Tok.COLON (!yylineno, !yylineno)); +";" => (YYBEGIN INITIAL; + Tok.SEMI (!yylineno, !yylineno)); +"(" => (Tok.LP (!yylineno, !yylineno)); +")" => (Tok.RP (!yylineno, !yylineno)); +"{" => (Tok.LCB (!yylineno, !yylineno)); +"}" => (Tok.RCB (!yylineno, !yylineno)); +"->" => (Tok.ARROW (!yylineno, !yylineno)); +"\"" => (YYBEGIN STRING; clrText(); addText yytext; + ignore(continue() before YYBEGIN CONSTR); + Tok.STRING (getText(), !yylineno, !yylineno)); +":==" => (YYBEGIN CODE; clrText(); + Tok.REFSET (!yylineno, !yylineno)); + +. => (err (!yylineno, + concat["illegal character '", + String.toCString yytext, "'"]); + continue()); diff --git a/ml-lpt/ml-antlr/FrontEnds/parse-file.sml b/ml-lpt/ml-antlr/FrontEnds/parse-file.sml new file mode 100644 index 0000000..cbf6bf9 --- /dev/null +++ b/ml-lpt/ml-antlr/FrontEnds/parse-file.sml @@ -0,0 +1,37 @@ +(* parse-file.sml + * + * COPYRIGHT (c) 2006 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (http://www.cs.uchicago.edu/~adrassi) + * All rights reserved. + * + * Driver for the parser. + *) + +structure ParseFile = + struct + + (* glue together the lexer and parser *) + structure LLKLrVals = MLYLrValsFun (structure Token = LrParser.Token) + structure LLKLex = MLYLexFn (structure Tok = LLKLrVals.Tokens) + structure LLKParser = JoinWithArg( + structure ParserData = LLKLrVals.ParserData + structure Lex = LLKLex + structure LrParser = LrParser) + + (* parse a file, returning a parse tree *) + fun parse' (startGrm, filename) = let + val _ = Err.status ("parsing " ^ filename) + val file = TextIO.openIn filename + fun get n = TextIO.inputN (file, n) + val lexer = LLKParser.makeLexer get (Err.lexErr filename) + in + #1(LLKParser.parse + (15, lexer, Err.parseErr filename, + (Err.parseErr filename, startGrm, parse'))) + before TextIO.closeIn file + end + + fun parse filename = parse'(GrammarSyntax.mkGrammar(), filename) + + end \ No newline at end of file diff --git a/ml-lpt/ml-antlr/Makefile b/ml-lpt/ml-antlr/Makefile new file mode 100644 index 0000000..7a42535 --- /dev/null +++ b/ml-lpt/ml-antlr/Makefile @@ -0,0 +1,51 @@ +# Makefile +# +# COPYRIGHT (c) 2009 The Fellowship of SML/NJ (http://www.smlnj.org) +# All rights reserved. +# + +SHELL = /bin/sh +SML = sml +ML_BUILD = ml-build +ML_MAKEDEPEND = ml-makedepend + +# NOTE: the @SMLsuffix option is new with SML/NJ 110.72 +#HEAP_SUFFIX = $(shell sml @SMLsuffix) +HEAP_SUFFIX = $(shell echo 'TextIO.output (TextIO.stdErr, SMLofNJ.SysInfo.getHeapSuffix ());' | sml 2>&1 1> /dev/null) + +PROGRAM = ml-antlr +HEAP_IMAGE = $(PROGRAM).$(HEAP_SUFFIX) + +FE = FrontEnds +BE = BackEnds + +SML_SRC = $(wildcard $(BE)/SML/*.sml) +LATEX_SRC = $(wildcard $(BE)/LaTeX/*.sml) + +FE_SRC = $(wildcard $(FE)/*.sml) $(FE)/ml-antlr-based/spec.grm $(FE)/ml-antlr-based/spec.lex +BE_SRC = $(wildcard $(BE)/*.sml) $(SML_SRC) $(LATEX_SRC) + +TEMPLATES = $(BE)/SML/template.sml $(BE)/LaTeX/template.tex + +CORE_SRC = $(wildcard *.sml) + +CM_FILES = sources.cm ../common/lpt-common.cm + +SOURCES = $(CORE_SRC) $(FE_SRC) $(BE_SRC) $(TEMPLATES) $(CM_FILES) + +build: $(HEAP_IMAGE) + +$(HEAP_IMAGE): $(SOURCES) + $(ML_BUILD) sources.cm Main.main $(PROGRAM) + +.depend: $(CM_FILES) + touch .depend + $(ML_MAKEDEPEND) -n -f .depend sources.cm $(HEAP_IMAGE) + +sinclude .depend + +clean: + rm -rf .depend .cm $(HEAP_IMAGE) + +devclean: + rm -rf $(HEAP_IMAGE) \ No newline at end of file diff --git a/ml-lpt/ml-antlr/Makefile.mlton b/ml-lpt/ml-antlr/Makefile.mlton new file mode 100644 index 0000000..419ad82 --- /dev/null +++ b/ml-lpt/ml-antlr/Makefile.mlton @@ -0,0 +1,48 @@ +# Makefile +# +# COPYRIGHT (c) 2009 The Fellowship of SML/NJ (http://www.smlnj.org) +# All rights reserved. +# +# Makefile for building ml-antlr using MLton +# + +SHELL = /bin/sh + +MLTON = mlton +MLTON_FLAGS = + +TARGET = mlantlr + +SML_BE = BackEnds/SML +LATEX_BE = BackEnds/LaTeX + +TEMPLATES = $(SML_BE)/string-template.sml \ + $(LATEX_BE)/string-template.sml + +SML_FILES = + +MLB_FILES = ../common/lpt-common.mlb \ + ../lib/ml-lpt-lib.mlb \ + $(TARGET).mlb + +$(TARGET) : $(MLB_FILES) $(SML_FILES) $(TEMPLATES) + $(MLTON) -output $(TARGET) $(MLTON_FLAGS) $(TARGET).mlb + +# a generator for the template string files +# +GEN = ../gen/gen-template-struct + +$(GEN): + (cd ../gen; make gen-template-struct) + +$(SML_BE)/string-template.sml: $(GEN) $(SML_BE)/template.sml + $(GEN) SMLTemplate $(SML_BE)/template.sml $(SML_BE)/string-template.sml + +$(LATEX_BE)/string-template.sml: $(GEN) $(LATEX_BE)/template.tex + $(GEN) LaTeXTemplate $(LATEX_BE)/template.tex $(LATEX_BE)/string-template.sml + +.PHONEY: clean +clean: + rm -f $(TARGET) + rm -f $(TEMPLATES) + (cd ../gen; make clean) diff --git a/ml-lpt/ml-antlr/README b/ml-lpt/ml-antlr/README new file mode 100644 index 0000000..9e391bc --- /dev/null +++ b/ml-lpt/ml-antlr/README @@ -0,0 +1,151 @@ +ml-antlr + +The tool is invoked using the command + + ml-antlr grammar.g + +The basic specification format is described in the doc directory, +in design.pdf. + +================================================================ + NOTE: differences from the design document +================================================================ + - %keywords, %start, %drop, %replace, %extend, %import are + unsupported at present +================================================================ + +The following example grammar illustrates the format: + + %defs ( (* SML code goes here *) ) + + %tokens + : LP + | RP + | PLUS + | NUM of int + | ID of string + ; + + exp + : atomicExp (PLUS atomicExp)* => ( foldl op+ atomicExp SR1 ) + ; + + atomicExp + : LP exp RP + | NUM + | ID => ( lookupVal(ID) ) + ; + +Actions must always occur in the tail position of rules or subrules. +The yield of a token or nonterminal is bound to the name of that +token or nonterminal; if there are multiple occurences with the same +name, the occurences are numbered starting from one as in ml-yacc. + +Subrules are generated any time items are grouped with parentheses, +including for EBNF constructs. In fact, EBNF constructs can only be +used on a parenthesized subrule. Subrules are always numbered, and +are bound to the names SR1, SR2, ... within each production. + +Actions are optional. The default action is to create a tuple that +includes the yield of each data-containing token and each nonterminal +referenced. Thus in the example above, the production "LP exp RP" +needs no action, since the default action will simply return the yield +of the nested exp. + +When ml-antlr analyzes a grammar, it attempts to create a prediction- +decision tree for each nonterminal. In the usual case, this decision +is made using lookahead token sets. The tool will start with k = 1 +lookahead and increment to a set value (e.g. k = 5) until it can +uniquely predict each production. Subtrees of the decision tree +remember the tokens chosen by their parents, and take this into account +when computing lookahead. For example, suppose we have two productions +at the top level that generate the following sentences: + + prod1 ==> AA + prod1 ==> AB + prod1 ==> BC + prod2 ==> AC + prod2 ==> C + +At k = 1, the productions can generate the following sets: + + prod1 {A, B} + prod2 {A, C} + +and k = 2, + + prod1 {A, B, C} + prod2 {C, $} + +Examining the lookahead sets alone, this grammar fragment looks ambiguous +even for k = 2. However, ml-antlr will generate the following decision +tree: + + if LA(0) = A then + if LA(1) = A or LA(1) = B then + predict prod1 + else if LA(1) = C then + predict prod2 + else if LA(0) = B then + predict prod1 + else if LA(1) = C then + predict prod2 + +When predictive parsing is not powerful enough to disambiguate a nonterminal, +backtracking can be used. Productions are marked as eligible for +backtracking by prepending a "%try": + + exp + : %try LP VAR (COMMA VAR)* RP COLON VAR + | %try LP VAR (COMMA VAR)* RP + ; + +When analyzing a grammar, ml-antlr uses roughly the following algorithm: + + computePredictions(k) = + computeLookahead(k) // taking into account parent decisions + for each ambiguous prediction token set + if each predicted production is marked for backtracking + use backtracking to make determination + else + computePredictions(k+1) + +Thus, backtracking will only be used when (1) lookahead prediction fails for +at least k = 1 and (2) ALL productions involved in an ambiguity are marked +as backtracking. Lookahead can be used to narrow down the choices to a +small number of ambiguous productions, which can then be marked as +backtracking. + +Note that when backtracking is used, productions are attempted in the +order in which they appear in the grammar. + +Semantic predicates may be used anywhere in a production to specify further +constraints on the production succeeding. By default, semantic predicates +are only used to cause a parse error after a productions has been +*unambiguously* selected. To use a semantic predicate to disambiguate +a grammar, simply mark the production as backtracking. The exception +caused by a predicate failure will be caught by the backtracker, which +will attempt the next production. + +The following short grammar illustrates these constructs: + + %tokens + : LP + | RP + | VAR of string + | COMMA + | COLON + ; + + exp + : %try LP VAR (COMMA VAR)* RP COLON VAR => ( "prod1" ) + | %try LP VAR RP => ( "prod2" ) + %where ( VAR = "x" ) + | %try LP VAR (COMMA VAR)* RP => ( "prod3" ) + ; + +Finally, a more substantial example grammar is available in the +tests/dragon directory. This grammar is based on the simplified Pascal +grammar presented in the appendix to the dragon book. It includes a +lexer, a testing program, and an example Pascal program. Use "make" to +build the example and "dragon example.pas" to run it. diff --git a/ml-lpt/ml-antlr/action.sml b/ml-lpt/ml-antlr/action.sml new file mode 100644 index 0000000..0cfb3f9 --- /dev/null +++ b/ml-lpt/ml-antlr/action.sml @@ -0,0 +1,61 @@ +(* action.sml + * + * COPYRIGHT (c) 2006 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (http://www.cs.uchicago.edu/~adrassi) + * All rights reserved. + * + * Simple (opaque) encapsulation of semantic actions. + *) + +structure Action :> + sig + + type action + + val action : Err.span * string -> action + val empty : action + val concat : action * action -> action + + val toString : action -> string + val name : action -> string + val span : action -> Err.span + val code : action -> string + val same : (action * action) -> bool + + end = struct + + datatype action + = ACT of { + id : int, + code : string, + span : Err.span + } + + local + val cnt = ref 0 + in + fun nextId() = (cnt := !cnt + 1; !cnt) + end + + fun action (i, s) = ACT {id = nextId(), code = s, span = i} + fun toString (ACT {code, span, ...}) = code +(* if span = 1 then + "(*#line " ^ Int.toString span ^ ".0*)" ^ code + else + "(*#line " ^ Int.toString (span - 1) ^ ".0*) \n" ^ code +*) + + fun name (ACT{id, ...}) = Int.toString id + fun same (ACT{id = id1, ...}, ACT {id = id2, ...}) = (id1 = id2) + + fun span (ACT{span, ...}) = span + fun code (ACT{code, ...}) = code + + val empty = action (Err.emptySpan, "") + fun concat (a, b) = + if same (a, empty) then b + else if same (b, empty) then a + else action (span a, code a ^ code b) + + end diff --git a/ml-lpt/ml-antlr/build.bat b/ml-lpt/ml-antlr/build.bat new file mode 100644 index 0000000..3d9bc35 --- /dev/null +++ b/ml-lpt/ml-antlr/build.bat @@ -0,0 +1,2 @@ +@ECHO off +%COMSPEC% /C "..\..\bin\ml-build.bat -D NO_ML_ULEX -D NO_ML_ANTLR sources.cm Main.main ml-antlr" diff --git a/ml-lpt/ml-antlr/build.sh b/ml-lpt/ml-antlr/build.sh new file mode 100755 index 0000000..433a669 --- /dev/null +++ b/ml-lpt/ml-antlr/build.sh @@ -0,0 +1,51 @@ +#!/bin/sh +# +# Copyright (c) 2018 The Fellowship of SML/NJ (https://smlnj.org) +# +# build script for ml-antlr +# +# options: +# -o image -- specify the name of the heap image, "ml-antlr" +# is the default. + +CMD=$0 + +ROOT="ml-antlr" +HEAP_IMAGE="" +SMLNJROOT=`pwd`/../.. +BIN=${INSTALLDIR:-$SMLNJROOT}/bin +BUILD=$BIN/ml-build +SIZE_OPT="-32" + +# +# process command-line options +# +while [ "$#" != "0" ] ; do + arg=$1 + shift + case $arg in + -32) SIZE_OPT=$arg ;; + -64) SIZE_OPT=$arg ;; + -o) + if [ "$#" = "0" ]; then + echo "$CMD: must supply image name for -o option" + exit 1 + fi + HEAP_IMAGE=$1; shift + ;; + *) + echo $CMD: invalid argument: $arg + exit 1 + ;; + esac +done + +if [ "$HEAP_IMAGE" = "" ]; then + HEAP_IMAGE="$ROOT" +fi + +# +# Build the ml-antlr standalone program: +"$BUILD" $SIZE_OPT -DNO_ML_ULEX -DNO_ML_ANTLR sources.cm Main.main $HEAP_IMAGE + +exit 0 diff --git a/ml-lpt/ml-antlr/check-grammar.sml b/ml-lpt/ml-antlr/check-grammar.sml new file mode 100644 index 0000000..d4071dc --- /dev/null +++ b/ml-lpt/ml-antlr/check-grammar.sml @@ -0,0 +1,412 @@ +(* check-grammar.sml + * + * COPYRIGHT (c) 2005 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (http://www.cs.uchicago.edu/~adrassi) + * All rights reserved. + * + * Check a parse tree, returning a grammar. + *) + +structure CheckGrammar : sig + + val check : GrammarSyntax.grammar option -> LLKSpec.grammar + + end = struct + + structure Syn = GrammarSyntax + structure S = LLKSpec + structure ATbl = AtomTable + structure AMap = AtomMap + structure ASet = AtomSet + + fun nextId (r : int ref) () = let val id = !r in r := id+1; id end + + (* auto-number the bindings for repeated variable names *) + local + fun addNumbers(acc, _, _, []) = rev acc + | addNumbers(acc, n, s, s'::bs) = + if s = s' then + addNumbers((s' ^ (Int.toString n))::acc, n+1, s, bs) + else addNumbers(s'::acc, n, s, bs) + fun numberBindings([]) = [] + | numberBindings(b::bs) = + if List.exists (fn b' => b = b') bs + then (b ^ "1")::numberBindings(addNumbers([], 2, b, bs)) + else b::(numberBindings bs) + (* assign a binding to an item *) + fun binding tokTbl (_, Syn.SYMBOL (name, _)) = (case ATbl.find tokTbl name + of SOME tok => (Token.name tok, Token.hasTy tok) + | NONE => (Atom.toString name, true) + (* end case *)) + | binding tokTbl (_, Syn.SUBRULE _) = ("SR", true) + | binding tokTbl (_, Syn.CLOS itm) = binding tokTbl itm + | binding tokTbl (_, Syn.POSCLOS itm) = binding tokTbl itm + | binding tokTbl (_, Syn.OPT itm) = binding tokTbl itm + in + (* assign bindings to a list of items *) + fun bindings (userNames, items, tokTbl) = let + (* all symbols with user-assigned names should yield a result + * for default actions + *) + val userBindings = map (Option.map (fn nm => (nm, true))) userNames + val autoBindings = map (binding tokTbl) items + val (autoNames, autoYields) = ListPair.unzip autoBindings + val autoNames' = numberBindings autoNames + val autoBindings' = ListPair.zip (autoNames', autoYields) + in + ListPair.map getOpt (userBindings, autoBindings') + end + end + + fun flatten [] = [] + | flatten ((_, Syn.IMPORT {filename, dropping}) :: ds) = let + val dropSet = ASet.fromList (map (fn (_, s) => s) dropping) + fun keep s = not (ASet.member (dropSet, s)) + fun shouldImport (_, Syn.KEYWORD _) = true + | shouldImport (_, Syn.VALUE _) = true + | shouldImport (_, Syn.DEFS _) = true + | shouldImport (_, Syn.TOKEN (sym, _, SOME abbrev)) = + keep sym andalso keep abbrev + | shouldImport (_, Syn.TOKEN (sym, _, NONE)) = keep sym + | shouldImport (_, Syn.REFCELL _) = true + | shouldImport (_, Syn.RULE {lhs, ...}) = keep lhs + | shouldImport _ = false + val imp = case ParseFile.parse filename + of SOME ds' => flatten ds' + | NONE => [] + in + (List.filter shouldImport imp) @ flatten ds + end + | flatten (d::ds) = d :: flatten ds + + (* check a GrammarSyntax.grammar value for errors, while transforming + * it into an LLKSpec.grammar suitable for analysis and parser generation. + *) + fun check (SOME (g : Syn.grammar)) = let + val _ = Err.status "checking grammar" + val ds = flatten g (* flatten import directives *) +(* val _ = print (Syn.ppGrammar ds) *) + (* ref cells used to incrementally build grammar representation *) + val nextGlobalID = nextId (ref 0) + val tokTbl : S.token ATbl.hash_table = ATbl.mkTable (128, Fail "tokens table") + val tokList : S.token list ref = ref [] + val keywords : Err.span ATbl.hash_table = ATbl.mkTable (64, Fail "keywords table") + val prefers : Err.span ATbl.hash_table = ATbl.mkTable (64, Fail "prefers table") + val changeList : (S.token list * S.token list) list ref = ref [] + val defaults : (Err.span * string) ATbl.hash_table = ATbl.mkTable (32, Fail "defaults table") + val entryPts : Err.span ATbl.hash_table = ATbl.mkTable (16, Fail "entryPts table") + val name : (string * Err.span) option ref = ref NONE + val header : Syn.code option ref = ref NONE + val startSym : (Atom.atom * Err.span) option ref = ref NONE + val defs : Action.action ref = ref Action.empty + val toksImport : Action.action option ref = ref NONE + val refCells : S.refcell ATbl.hash_table = ATbl.mkTable (4, Fail "refCells table") + val ntTbl : (S.nonterm * (unit -> int)) ATbl.hash_table = + ATbl.mkTable (64, Fail "ntTbl table") + val ntList : S.nonterm list ref = ref [] + val prodList : S.prod list ref = ref [] + (* error message for duplicate declarations *) + fun dupeErr (origSpan, newSpan, whatDupe) = + Err.spanErr (newSpan, + "duplicate " :: whatDupe + @ [", originally declared at ", Err.span2str origSpan]) + (* PHASE 1: record basic directives, checking for duplicates *) + fun doDecl1 (span, Syn.NAME n) = (case !name + of SOME (_, span') => dupeErr (span', span, ["%name declaration"]) + | NONE => name := SOME (n, span) + (* end case *)) + | doDecl1 (span, Syn.HEADER h) = (case !header + of SOME(span', _) => dupeErr (span', span, ["%header declaration"]) + | NONE => header := SOME h + (* end case *)) + | doDecl1 (span, Syn.START sym) = (case !startSym + of SOME (_, span') => dupeErr (span', span, ["%start declaration"]) + | NONE => startSym := SOME (sym, span) + (* end case *)) + | doDecl1 (span, Syn.ENTRY sym) = (case ATbl.find entryPts sym + of NONE => ATbl.insert entryPts (sym, span) + | SOME span' => dupeErr (span', span, [ + "%entry declaration for '", Atom.toString sym, "'" + ]) + (* end case *)) + | doDecl1 (span, Syn.KEYWORD sym) = (case ATbl.find keywords sym + of NONE => ATbl.insert keywords (sym, span) + | SOME span' => dupeErr (span', span, [ + "%keyword declaration for '", Atom.toString sym, "'" + ]) + (* end case *)) + | doDecl1 (span, Syn.PREFER sym) = (case ATbl.find prefers sym + of NONE => ATbl.insert prefers (sym, span) + | SOME span' => dupeErr (span', span, [ + "%prefer declaration for '", Atom.toString sym, "'" + ]) + (* end case *)) + | doDecl1 (span, Syn.VALUE(sym, (_, code))) = (case ATbl.find defaults sym + of NONE => ATbl.insert defaults (sym, (span, code)) + | SOME(span', _) => dupeErr (span', span, [ + "%value declaration for '", Atom.toString sym, "'" + ]) + (* end case *)) + | doDecl1 (span, Syn.REFCELL (name, ty, code)) = ( + case ATbl.find refCells (Atom.atom name) + of NONE => ATbl.insert refCells + (Atom.atom name, + S.REFCELL { + name = name, ty = ty, + initCode = Action.action code, loc = span}) + | SOME (S.REFCELL {loc, ...}) => + dupeErr (loc, span, ["%refcell declaration for '", name, "'"]) + (* end case *)) + | doDecl1 (span, Syn.DEFS code) = + defs := Action.concat (!defs, Action.action code) + | doDecl1 (span, Syn.TOKENTYPE ty) = (case !toksImport + of NONE => toksImport := SOME(Action.action(span, ty)) + | SOME act => dupeErr (span, Action.span act, ["%tokentype declaration"]) + (* end case *)) + | doDecl1 _ = () + val _ = app doDecl1 ds + (* PHASE 2: record %tokens declarations *) + val kwSet = ASet.addList (ASet.empty, map (fn (x, _) => x) (ATbl.listItemsi keywords)) + fun isKW s = ASet.member (kwSet, s) + fun doDecl2 (span, Syn.TOKEN (sym, tyOpt, abbrevOpt)) = ( + case ATbl.find tokTbl sym + of NONE => let + val _ = (case abbrevOpt + of SOME a => (case ATbl.find tokTbl a + of SOME (S.T{loc, ...}) => + dupeErr (loc, span, [ + "%tokens declaration with abbreviation ", + Atom.toString a + ]) + | NONE => () + (* end case *)) + | NONE => () + (* end case *)) + (* check for default argument for non-nullary tokens *) + val dfltOpt = (case ATbl.find defaults sym + of SOME(span, code) => if Option.isSome tyOpt + then SOME code + else ( + Err.spanErr (span, [ + "value declaration for nullary token '", + Atom.toString sym, "'" + ]); + NONE) + | NONE => NONE + (* end case *)) + val tok = S.T { + id = nextGlobalID(), + name = sym, + loc = span, + ty = tyOpt, + abbrev = abbrevOpt, + keyword = isKW sym orelse getOpt(Option.map isKW abbrevOpt, false), + default = dfltOpt + } + in + ATbl.insert tokTbl (sym, tok); + Option.app (fn a => ATbl.insert tokTbl (a, tok)) abbrevOpt; + tokList := tok :: !tokList + end + | SOME (S.T{loc, ...}) => + dupeErr (loc, span, ["%tokens declaration for '", Atom.toString sym, "'"]) + (* end case *)) + | doDecl2 _ = () + val _ = app doDecl2 ds + val _ = if List.length (!tokList) = 0 then + Err.errMsg ["Error: no tokens defined"] + else () + val eofTok = S.T { + id = nextGlobalID(), name = Atom.atom "EOF", loc = Err.emptySpan, ty = NONE, + abbrev = NONE, keyword = false, default = NONE + } + val _ = (ATbl.insert tokTbl (Atom.atom "EOF", eofTok); + tokList := eofTok :: !tokList) + (* check for symbols in a %keyword, %prefer, %value, or %change declaration that + * are note defined as tokens. + *) + val tokSet = ASet.addList (ASet.empty, #1 (ListPair.unzip (ATbl.listItemsi tokTbl))) + fun undefErr spanOf kind name = + Err.spanErr (spanOf name, [kind, " '", Atom.toString name, "' is not declared as a token"]) + val _ = ASet.app (undefErr (fn name => ATbl.lookup keywords name) "keyword") (ASet.difference (kwSet, tokSet)) + val prefSet = ASet.addList (ASet.empty, map (fn (x, _) => x) (ATbl.listItemsi prefers)) + val _ = ASet.app (undefErr (fn name => ATbl.lookup prefers name) "preferred token") (ASet.difference (prefSet, tokSet)) + val dfltSet = ASet.addList (ASet.empty, map (fn (x, _) => x) (ATbl.listItemsi defaults)) + val _ = ASet.app (undefErr (fn name => #1(ATbl.lookup defaults name)) "value") (ASet.difference (dfltSet, tokSet)) + fun doChange (span, Syn.CHANGE(fromToks, toToks)) = let + fun toTok name = (case ATbl.find tokTbl name + of NONE => ( + Err.spanErr (span, [ + "'", Atom.toString name, + "' in %change declaration is not declared as a token" + ]); + eofTok) + | SOME tok => tok + (* end case *)) + val change = (List.map toTok fromToks, List.map toTok toToks) + in + changeList := change :: !changeList + end + | doChange _ = () + val _ = List.app doChange ds +(* FIXME: need to check that any value-carrying token that might be inserted has a default value *) + (* PHASE 3: load nonterminals *) + fun insNTerm (nt as S.NT{name, ...}) = let + val nid = nextId (ref 1) + in + ATbl.insert ntTbl (name, (nt, nid)); + ntList := nt :: !ntList; + (nt, nid) + end + (* map a non-terminal name to its info record, creating a new nonterminal + * record if none is found. + *) + fun lookupNTerm name = (case ATbl.find ntTbl name + of NONE => ( + if ASet.member (tokSet, name) then + Err.errMsg ["Error: symbol ", Atom.toString name, + " defined as both a token and a nonterminal."] + else (); + insNTerm (S.NT{name = name, id = nextGlobalID(), formals = ref [], ty = ref NONE, + binding = S.TOP, prods = ref[], isEBNF = false, loc = ref NONE})) + | SOME info => info + (* end case *)) + (* check and load a rules and type annotations *) + fun doDecl3 (span, Syn.RULE {lhs, formals = newFormals, rhs}) = let + val (nt as S.NT{prods, formals, loc = ntLoc, ...}, nextProdID) = + lookupNTerm lhs + val nextSRID = nextId (ref 1) + val prodName = concat [Nonterm.name nt, "_PROD_", Int.toString (nextProdID())] + (* check the rhs, creating a production *) + val Syn.RHS {items, action, try, predicate, loc = rhsLoc} = rhs + val (userNames, items) = ListPair.unzip items + val finishedRHS : S.item list ref = ref [] + val prod = S.PROD{ + name = prodName, + lhs = nt, + rhs = finishedRHS, + rhsBindings = bindings (userNames, items, tokTbl), + try = try, + id = nextGlobalID(), + action = Option.map Action.action action, + pred = Option.map Action.action predicate, + loc = rhsLoc + } + fun doPreitem (Syn.SYMBOL (name, args)) = + if ATbl.inDomain tokTbl name + then if not (isSome args) + then S.TOK(valOf (ATbl.find tokTbl name)) + else (Err.errMsg ["Attempted to apply arguments to token ", + Atom.toString name, "."]; + S.TOK eofTok) + else S.NONTERM(#1 (lookupNTerm name), Option.map Action.action args) + | doPreitem (Syn.SUBRULE alts) = S.NONTERM(doSubrule (false, alts), NONE) + | doPreitem (Syn.CLOS itm) = S.CLOS(doSubrule (true, mkAlts itm)) + | doPreitem (Syn.POSCLOS itm) = S.POSCLOS(doSubrule (true, mkAlts itm)) + | doPreitem (Syn.OPT itm) = S.OPT(doSubrule (true, mkAlts itm)) + and doItem (span, s) = S.ITEM {sym = doPreitem s, + id = nextGlobalID(), + loc = span} + and mkAlts (_, Syn.SUBRULE alts) = alts + | mkAlts (span, itm) = [Syn.RHS { + items = [(NONE, (span, itm))], loc = span, + action = NONE, try = false, predicate = NONE + }] + and doSubrule (isEBNF, alts) = let + val prods = ref [] + val srName = Atom.atom (concat [prodName, + "_SUBRULE_", Int.toString (nextSRID())]) + val sr = S.NT{ + name = srName, formals = ref [], binding = S.WITHIN prod, + id = nextGlobalID(), prods = prods, isEBNF = isEBNF, + loc = ref NONE, ty = ref NONE} + fun altToRule alt = let + val Syn.RHS {loc, ...} = alt + in + (loc, Syn.RULE {lhs = srName, formals = [], rhs = alt}) + end + in + ignore (insNTerm sr); + app (doDecl3 o altToRule) alts; + sr + end + in + finishedRHS := map doItem items; (* actually process the RHS *) + prodList := prod :: !prodList; (* add to global prod list *) + prods := prod :: !prods; (* add to lhs's prod list *) + formals := (map Atom.atom newFormals) (* TODO: check for agreement *) + end + | doDecl3 (span, Syn.NONTERM (nt, tyAnn)) = let + val (S.NT{ty, ...}, _) = lookupNTerm nt + in case !ty + of NONE => ty := SOME tyAnn + | SOME _ => Err.spanErr (span, [ + "duplicate type annotation for nonterminal ", + Atom.toString nt, "."]) + end + | doDecl3 _ = () + val _ = app doDecl3 ds + val nterms = rev(!ntList) + (* check the grammar *) + val _ = if List.length (!prodList) = 0 then ( + Err.errMsg ["Error: no rules defined."]; + raise Err.Abort) + else () + (* check for undefined nonterminals, while reversing the order of productions *) + fun chkNT (S.NT{name, prods, ...}) = (case !prods + of [] => Err.errMsg ["Error: symbol ", Atom.toString name, " is not defined."] + | l => prods := List.rev l + (* end case *)) + val _ = app chkNT nterms + (* note: safe to assume length nterms > 0, otherwise aborted above *) + fun findNT errStr (sym, span) = (case ATbl.find ntTbl sym + of NONE => (Err.spanErr (span, ["Error: ", errStr, " symbol ", + Atom.toString sym, + " is not defined."]); + hd nterms) + | SOME (nt, _) => nt + (* end case *)) + val startnt = (case !startSym + of NONE => hd nterms + | SOME s => findNT "%start" s + (* end case *)) + val entryPoints = map (findNT "%entry") (ATbl.listItemsi entryPts) + val sortedTops = Nonterm.topsort (startnt::entryPoints) + val topsSet = AtomSet.addList + (AtomSet.empty, + map (Atom.atom o Nonterm.name) (List.concat sortedTops)) + (* check that all defined nonterminals are used *) + fun checkNTInTops nt = + if Nonterm.isSubrule nt = false + andalso AtomSet.member (topsSet, Atom.atom (Nonterm.name nt)) = false + then Err.warning ["Warning: nonterminal ", Nonterm.name nt, + " is not reachable from any entry point."] + else () + val _ = app checkNTInTops nterms + val _ = Err.abortIfErr() + in + S.Grammar{ + name = getOpt (Option.map #1 (!name), ""), + header = Option.map #2 (!header), + defs = !defs, + toks = List.rev (!tokList), + toksImport = !toksImport, + changes = let (* add the %prefer tokens to the changes list *) + fun preferChange (tok, _, changes) = + ([], [ATbl.lookup tokTbl tok]) :: changes + in + ATbl.foldi preferChange (List.rev (!changeList)) prefers + end, + nterms = nterms, + prods = List.rev(!prodList), + eof = eofTok, + sortedTops = sortedTops, + startnt = startnt, + entryPoints = entryPoints, + refcells = ATbl.listItems refCells + } + end + | check NONE = raise Err.Abort + + end diff --git a/ml-lpt/ml-antlr/compute-predict.sml b/ml-lpt/ml-antlr/compute-predict.sml new file mode 100644 index 0000000..6022f2b --- /dev/null +++ b/ml-lpt/ml-antlr/compute-predict.sml @@ -0,0 +1,162 @@ +(* compute-predict.sml + * + * COPYRIGHT (c) 2006 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (http://www.cs.uchicago.edu/~adrassi) + * All rights reserved. + * + * Prediction tree computation; given a grammar, + * produce a decision tree for each decision point. + *) + +structure ComputePredict : + sig + + val mkPM : LLKSpec.grammar * GLA.gla -> Predict.predict_maps + + end = struct + + structure P = Predict + structure TSet = Token.Set + structure NMap = Nonterm.Map + structure S = LLKSpec + + val maxK = 5 + + val debugPredict = false + + fun debug s = if debugPredict + then TextIO.output(TextIO.stdErr, s ^ "\n") + else () + fun debugs ss = debug (concat ss) + + fun mapi f l = let + fun mapf (_, [], l) = List.rev l + | mapf (i, x::r, l) = mapf (i+1, r, f(i, x)::l) + in + mapf (0, l, []) + end + + fun ntToString (S.NT{binding = S.WITHIN prod, ...}) = Prod.toString prod + | ntToString nt = Nonterm.name nt + + (* error handling, for lookahead computation failure *) + fun doErr (prePath, nt, msg) = let + val (prefix, obj) = (case nt + of S.NT{binding = S.WITHIN prod, ...} => + ("Error " ^ Err.span2str(Prod.span prod), concat["\n ", Prod.toString prod, "\n"]) + | _ => ("Error", concat[" '", Nonterm.name nt, "',\n"]) + (* end case *)) + in + Err.errMsg [ + prefix, ": lookahead computation failed for", + obj, msg, "\n", + "The conflicting token sets are:\n ", + String.concatWith "\n " + (mapi (fn (k, s) => concat[ + "k = ", Int.toString (k+1), ": ", Token.setToString s + ]) prePath), + "\n" + ] + end + + (* compute a decision tree for predicting a production for a nonterminal *) + fun compute(gla, nt) = let + fun tryToks (prePath, k) (prod, branches) = let + fun consNE ((set, prods), ls) = + if TSet.isEmpty set + then ls + else (set, prods)::ls + fun intersect ((set, prods), (branches, toks)) = let + val isct = TSet.intersection (set, toks) + in + (consNE((isct, prod::prods), + consNE((TSet.difference (set, isct), prods), + branches)), + TSet.difference (toks, isct)) + end + val toks = GLA.lookK (gla, prod, prePath, k) + val (branches', toks') = foldl intersect ([], toks) branches + in + consNE((toks', [prod]), branches') + end + fun try (prePath, k, prods) = let + fun finBranch (set, [prod]) = (set, P.Pick prod) + | finBranch (set, []) = raise Fail "bug: ComputePredict" + | finBranch (set, prods) = + if List.all Prod.canTry prods then + (set, P.Choice (Prod.sortProds prods)) + else if k = maxK then ( + doErr (prePath @ [set], nt, String.concat [ + "with a conflict for the following productions:\n ", + String.concatWith "\n " (map Prod.toString prods) + ]); + raise Err.Abort) + else (set, try (prePath @ [set], k+1, prods)) + val branches = foldl (tryToks(prePath, k)) [] prods + in + debugs [" trying k = ", Int.toString k]; + P.ByTok (map finBranch branches) + end + in + debugs [" computing prediction tree for ", Nonterm.qualName nt]; + try ([], 1, Nonterm.prods nt) + before debugs [" done"] + end + + fun unionAll sets = foldl TSet.union TSet.empty sets + + (* compute a decision tree for an EBNF decision, e.g., for + * (A* B), the tree will predict true if A appears again and + * false otherwise. + *) + fun computeEBNF(gla, nt) = let + fun tryToks (k, prePath) = let + fun lookProd prod = GLA.lookK (gla, prod, prePath, k) + val trueToks = unionAll (map lookProd (Nonterm.prods nt)) + val falseToks = GLA.lookKFollow (gla, nt, prePath, k) + val isct = TSet.intersection (trueToks, falseToks) + val trueOnly = TSet.difference (trueToks, isct) + val falseOnly = TSet.difference (falseToks, isct) + val choices = List.concat [ + if TSet.isEmpty trueOnly then [] + else [(trueOnly, Predict.Pick true)], +(* No longer include "false" cases *) +(* if TSet.isEmpty falseOnly then [] + else [(falseOnly, Predict.Pick false)], *) + if TSet.isEmpty isct then [] + else (if k <= maxK + then [(isct, tryToks (k+1, prePath @ [isct]))] + else (doErr (prePath @ [isct], nt, + "deciding between the subrule and " + ^ "the sequence following it:"); + raise Err.Abort))] + in + Predict.ByTok choices + end + in +(* +debugs [" EBNF: ", Nonterm.qualName nt, "\n"]; +*) + tryToks (1, []) + end + + fun mkPM (grm, gla) = let + val LLKSpec.Grammar {sortedTops, nterms, ...} = grm + fun doNT (nt, prodMap) = + NMap.insert (prodMap, nt, compute(gla, nt)) + fun doEBNF (nt, ebnfMap) = + NMap.insert (ebnfMap, nt, computeEBNF(gla, nt)) + (* foldr ==> do innermost predictions first *) + val prodMapTops = foldr doNT NMap.empty (List.concat sortedTops) + val prodMap = foldl doNT prodMapTops (List.filter Nonterm.isSubrule nterms) + val ebnfMap = foldl doEBNF NMap.empty (List.filter Nonterm.isEBNF nterms) + fun mkFn map nt = valOf (NMap.find (map, nt)) + in + Predict.PMaps { + prodPredict = mkFn prodMap, + ebnfPredict = mkFn ebnfMap + } + end + + end diff --git a/ml-lpt/ml-antlr/err.sml b/ml-lpt/ml-antlr/err.sml new file mode 100644 index 0000000..8e62a21 --- /dev/null +++ b/ml-lpt/ml-antlr/err.sml @@ -0,0 +1,82 @@ +(* err.sml + * + * COPYRIGHT (c) 2006 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (http://www.cs.uchicago.edu/~adrassi) + * All rights reserved. + * + * Global error handling, including printing of error messages + * and a flag to trigger halting of the tool. + *) + +structure Err = + struct + + (* signal that the program should be aborted with no further messages printed *) + exception Abort + + type pos = string * int * int +(* type pos = StreamPos.pos *) + type span = pos * pos + +(* + val emptySpan = let val npos = 0 in (npos, npos) end +*) + val emptySpan = let val npos = ("",0,0) in (npos, npos) end + + (* global flag to record the existance of errors *) + val anyErrors = ref false + val leftRecurs : string list ref = ref [] + + fun abortIfErr() = if !anyErrors then raise Abort else () + + fun errMsg l = ( + anyErrors := true; + TextIO.output(TextIO.stdErr, String.concat l ^ "\n")) + + fun warning l = + TextIO.output(TextIO.stdErr, String.concat l ^ "\n") + + local + fun lc2str (l, c) = Int.toString l ^ "." ^ Int.toString c + in + + fun pos2str (fname, l, c) = "[" ^ fname ^ ":" ^ lc2str (l, c) ^ "]" + fun span2str ((fname, l1, c1), (_, l2, c2)) = + if l1 = l2 andalso c1 = c2 + then pos2str (fname, l1, c1) + else + "[" ^ fname ^ ":" ^ lc2str (l1, c1) ^ "-" ^ lc2str (l2, c2) ^ "]" +(* + fun pos2str _ = "" + fun span2str _ = "" +*) + + (* error function at a single position *) + fun posErr (pos, msg) = errMsg (["Error ", pos2str pos, ": "]@msg) + (* error function over a position span *) + fun spanErr (span, msg) = errMsg (["Error ", span2str span, ": "]@msg) + + end + + (* left recursion detected *) + fun leftRecur name = + if List.exists (fn n => (n = name)) (!leftRecurs) + then () + else ( + leftRecurs := name::(!leftRecurs); + warning ["Left recursion detected: ", name, " -> ", name, " ..."]) + + val printDebug = ref true + + (* print a debugging message *) + fun debug s = if !printDebug + then (TextIO.output(TextIO.stdErr, s); TextIO.output(TextIO.stdErr, "\n")) + else () + fun debugs ss = debug (concat ss) + + (* print a status message *) + fun status s = TextIO.output(TextIO.stdErr, concat ["[ml-antlr: ", s, "]\n"]) + fun statuss ss = status (concat ss) + + end diff --git a/ml-lpt/ml-antlr/examples/c/Makefile b/ml-lpt/ml-antlr/examples/c/Makefile new file mode 100644 index 0000000..b5f4aa6 --- /dev/null +++ b/ml-lpt/ml-antlr/examples/c/Makefile @@ -0,0 +1,42 @@ +# +# COPYRIGHT (c) 20056 +# John Reppy (http://www.cs.uchicago.edu/~jhr) +# Aaron Turon (http://www.cs.uchicago.edu/~adrassi) +# All rights reserved. +# + +OS = $(shell uname -s) + +ifeq ($(shell uname -s),Darwin) +HEAP_SUFFIX = ppc-darwin +else +HEAP_SUFFIX = x86-unix +endif + +SHELL = /bin/sh +SML = sml +ML_BUILD = ml-build +ML_MAKEDEPEND = ml-makedepend + +PROGRAM = ct +HEAP_IMAGE = $(PROGRAM).$(HEAP_SUFFIX) + +SRC = +SOURCES = $(wildcard *.sml) cc.lex c.grm.sml sources.cm + +build: c.grm.sml $(HEAP_IMAGE) + +$(HEAP_IMAGE): $(SOURCES) + $(ML_BUILD) sources.cm Main.main $(PROGRAM) + +.depend: $(CM_FILES) + touch .depend + $(ML_MAKEDEPEND) -n -f .depend sources.cm $(HEAP_IMAGE) + +c.grm.sml: c.grm + ml-antlr c.grm + +sinclude .depend + +clean: + rm -rf .depend .cm cc.lex.sml c.grm.sml $(HEAP_IMAGE) diff --git a/ml-lpt/ml-antlr/examples/c/c.grm b/ml-lpt/ml-antlr/examples/c/c.grm new file mode 100644 index 0000000..2237fb0 --- /dev/null +++ b/ml-lpt/ml-antlr/examples/c/c.grm @@ -0,0 +1,413 @@ +%defs ( ); +%unitactions; + +%tokens + : COLON (":") + | SEMICOLON (";") + | LPAREN ("(") + | RPAREN (")") + | LCURLY ("{") + | RCURLY ("}") + | LBRACE ("[") + | RBRACE ("]") + | DOT (".") + | COMMA (",") + | QUESTION ("?") + | PERCENT ("%") + | AMP ("&") + | BAR ("|") + | TILDE ("~") + | DIVIDE ("/") + | PLUS ("+") + | MINUS ("-") + | HAT ("^") + | BANG ("!") + | TIMES ("*") + | INC ("++") + | DEC ("--") + | ARROW ("->") + | ID of string + | EQUALS ("=") + | PLUSEQUALS ("+=") + | MINUSEQUALS ("-=") + | XOREQUALS ("^=") + | MODEQUALS ("%=") + | TIMESEQUALS ("*=") + | DIVEQUALS ("/=") + | OREQUALS ("|=") + | ANDEQUALS ("&=") + | LSHIFTEQUALS ("<<=") + | RSHIFTEQUALS (">>=") + | LTE ("<=") + | GTE (">=") + | LT ("<") + | GT (">") + | EQ ("==") + | NEQ ("!=") + | OR ("||") + | AND ("&&") + | LSHIFT ("<<") + | RSHIFT (">>") + | DECNUM of LargeInt.int + | REALNUM of real + | STRING of string + | CCONST of LargeInt.int + | EXTERN ("extern") + | AUTO ("auto") + | STATIC ("static") + | REGISTER ("register") + | CONST ("const") + | VOLATILE ("volatile") + | IF ("if") + | ELSE ("else") + | FOR ("for") + | DO ("do") + | SWITCH ("switch") + | CASE ("case") + | DEFAULT ("default") + | WHILE ("while") + | RETURN ("return") + | BREAK ("break") + | CONTINUE ("continue") + | GOTO ("goto") + + | CHAR ("char") + | DOUBLE ("double") + | ENUM ("enum") + | FLOAT ("float") + | INT ("int") + | LONG ("long") + | SHORT ("short") +(* | FRACTIONAL | SATURATE (* D *) *) + | STRUCT ("struct") + | UNION ("union") + | UNSIGNED ("unsigned") + | SIGNED ("signed") + | VOID ("void") + | SIZEOF ("sizeof") + | TYPEDEF ("typedef") + | ELIPSIS ("...") + ; + +translation_unit + : external_declaration* + ; + +external_declaration + : %try function_definition + | %try declaration + ; + +function_definition + : declaration_specifiers? declarator declaration* compound_statement + ; + +declaration + : declaration_specifiers init_declarator_list? ";" + ; + +declaration_specifiers + : (storage_class_specifier | type_specifier | type_qualifier)+ + ; + +storage_class_specifier + : "auto" + | "register" + | "static" + | "extern" + | "typedef" + ; + +type_specifier + : "void" + | "char" + | "short" + | "int" + | "long" + | "float" + | "double" + | "signed" + | "unsigned" + | struct_or_union_specifier + | enum_specifier +(* | typedef_name *) + ; + +type_qualifier + : "const" + | "volatile" + ; + +struct_or_union_specifier + : struct_or_union ID? "{" struct_declaration+ "}" + | struct_or_union ID + ; + +struct_or_union + : "struct" + | "union" + ; + +init_declarator_list + : init_declarator ("," init_declarator)* + ; + +init_declarator + : declarator ("=" initializer)? + ; + +struct_declaration + : specifier_qualifier_list struct_declarator_list ";" + ; + +specifier_qualifier_list + : (type_specifier | type_qualifier)+ + ; + +struct_declarator_list + : struct_declarator ("," struct_declarator)* + ; + +struct_declarator + : declarator (":" constant_expression)? + | ":" constant_expression + ; + +enum_specifier + : "enum" ID? "{" enumerator_list "}" + | "enum" ID + ; + +enumerator_list + : enumerator ("," enumerator)* + ; + +enumerator + : ID ("=" constant_expression)? + ; + +declarator + : pointer? direct_declarator + ; + +direct_declarator + : inner_direct_declarator + ( "[" constant_expression? "]" + | "(" parameter_type_list ")" + | "(" identifier_list? ")" + )* + ; + +inner_direct_declarator + : ID + | "(" declarator ")" + ; + +pointer + : ("*" type_qualifier_list?)+ + ; + +type_qualifier_list + : type_qualifier+ + ; + +parameter_type_list + : parameter_list ("," "...")? + ; + +parameter_list + : parameter_declaration ("," parameter_declaration)* + ; + +parameter_declaration + : declaration_specifiers (%try declarator | %try abstract_declarator)? + ; + +identifier_list + : ID ("," ID)* + ; + +initializer + : assignment_expression + | "{" initializer_list ","? "}" + ; + +initializer_list + : initializer ("," initializer)* + ; + +type_name + : specifier_qualifier_list abstract_declarator? + ; + +abstract_declarator + : pointer direct_abstract_declarator? + | direct_abstract_declarator + ; + +direct_abstract_declarator + : "(" abstract_declarator ")" + ( "[" constant_expression? "]" + | "(" parameter_type_list? ")" + )* + | ( "[" constant_expression? "]" + | "(" parameter_type_list? ")" + )+ + ; + +(* +typedef_name + : ID + ; +*) + +statement + : labeled_statement + | expression_statement + | compound_statement + | selection_statement + | iteration_statement + | jump_statement + ; + +labeled_statement + : ID ":" statement + | "case" constant_expression ":" statement + | "default" ":" statement + ; + +expression_statement + : expression? ";" + ; + +compound_statement + : "{" (declaration | statement)* "}" + ; + +selection_statement + : %try "if" "(" expression ")" statement "else" statement + | %try "if" "(" expression ")" statement + | "switch" "(" expression ")" statement + ; + +iteration_statement + : "while" "(" expression ")" statement + | "do" statement "while" "(" expression ")" ";" + | "for" "(" expression? ";" expression? ";" expression? ")" statement + ; + +jump_statement + : "goto" ID ";" + | "continue" ";" + | "break" ";" + | "return" expression? ";" + ; + +expression + : assignment_expression ("," assignment_expression)* + ; + +(* take a look at this ... +assignment_expression + : conditional_expression + | unary_expression assignment_operator assignment_expression + ; +*) +assignment_expression + : conditional_expression (assignment_operator conditional_expression)* + ; + +assignment_operator + : "=" | "*=" | "/=" | "%=" | "+=" | "-=" | "<<=" | ">>=" | "&=" | "^=" | "|=" + ; + +conditional_expression + : logical_OR_expression ("?" expression ":" conditional_expression)? + ; + +constant_expression + : conditional_expression + ; + +logical_OR_expression + : logical_AND_expression ("||" logical_AND_expression)* + ; + +logical_AND_expression + : inclusive_OR_expression ("&&" inclusive_OR_expression)* + ; + +inclusive_OR_expression + : exclusive_OR_expression ("|" exclusive_OR_expression)* + ; + +exclusive_OR_expression + : AND_expression ("^" AND_expression)* + ; + +AND_expression + : equality_expression ("&" equality_expression)* + ; + +equality_expression + : relational_expression (("==" | "!=") relational_expression)* + ; + +relational_expression + : shift_expression + (("<" | ">" | "<=" | ">=") shift_expression)* + ; + +shift_expression + : additive_expression (("<<" | ">>") additive_expression)* + ; + +additive_expression + : multiplicative_expression (("+" | "-") multiplicative_expression)* + ; + +multiplicative_expression + : cast_expression (("*" | "/" | "%") cast_expression)* + ; + +cast_expression + : unary_expression + | "(" type_name ")" cast_expression + ; + +unary_expression + : postfix_expression + | "++" unary_expression + | "--" unary_expression + | ("&" | "*" | "+" | "-" | "~" | "!") cast_expression + | "sizeof" unary_expression + | "sizeof" "(" type_name ")" + ; + +postfix_expression + : primary_expression + ( "[" expression "]" + | "(" argument_expression_list? ")" + | "." ID + | "->" ID + | "++" + | "--" + )* + ; + +primary_expression + : ID + | constant + | "(" expression ")" + ; + +argument_expression_list + : assignment_expression ("," assignment_expression)* + ; + +constant + : DECNUM + | REALNUM + | STRING + | CCONST + ; diff --git a/ml-lpt/ml-antlr/examples/c/cc.lex b/ml-lpt/ml-antlr/examples/c/cc.lex new file mode 100644 index 0000000..50f4c6e --- /dev/null +++ b/ml-lpt/ml-antlr/examples/c/cc.lex @@ -0,0 +1,121 @@ + +fun eof() = Tok.EOF + +%% + +id = [_A-Za-z][_A-Za-z0-9]*; +decnum = (0|([1-9][0-9]*))[uUlL]?[uUlL]?; +realnum = (([0-9]+(\.[0-9]+)?)|(\.[0-9]+))([eE][+-]?[0-9]+)?[lL]?; +octdigit = [0-7]; +hexdigit = [0-9a-fA-F]; +hexnum = 0[xX]{hexdigit}+[uUlL]?[uUlL]?; +octnum = 0{octdigit}+[uUlL]?[uUlL]?; + +%s C S; + +%% + +(":") => (Tok.COLON); +(";") => (Tok.SEMICOLON); +("(") => (Tok.LPAREN); +(")") => (Tok.RPAREN); +("{") => (Tok.LCURLY); +("}") => (Tok.RCURLY); +("[") => (Tok.LBRACE); +("]") => (Tok.RBRACE); +(".") => (Tok.DOT); +(",") => (Tok.COMMA); +("?") => (Tok.QUESTION); +("%") => (Tok.PERCENT); +("&") => (Tok.AMP); +("|") => (Tok.BAR); +("~") => (Tok.TILDE); +("/") => (Tok.DIVIDE); +("+") => (Tok.PLUS); +("-") => (Tok.MINUS); +("^") => (Tok.HAT); +("!") => (Tok.BANG); +("*") => (Tok.TIMES); +("++") => (Tok.INC); +("--") => (Tok.DEC); +("->") => (Tok.ARROW); +("=") => (Tok.EQUALS); +("+=") => (Tok.PLUSEQUALS); +("-=") => (Tok.MINUSEQUALS); +("^=") => (Tok.XOREQUALS); +("%=") => (Tok.MODEQUALS); +("*=") => (Tok.TIMESEQUALS); +("/=") => (Tok.DIVEQUALS); +("|=") => (Tok.OREQUALS); +("&=") => (Tok.ANDEQUALS); +("<<=") => (Tok.LSHIFTEQUALS); +(">>=") => (Tok.RSHIFTEQUALS); +("<=") => (Tok.LTE); +(">=") => (Tok.GTE); +("<") => (Tok.LT); +(">") => (Tok.GT); +("==") => (Tok.EQ); +("!=") => (Tok.NEQ); +("||") => (Tok.OR); +("&&") => (Tok.AND); +("<<") => (Tok.LSHIFT); +(">>") => (Tok.RSHIFT); +("extern") => (Tok.EXTERN); +("auto") => (Tok.AUTO); +("static") => (Tok.STATIC); +("register") => (Tok.REGISTER); +("const") => (Tok.CONST); +("volatile") => (Tok.VOLATILE); +("if") => (Tok.IF); +("else") => (Tok.ELSE); +("for") => (Tok.FOR); +("do") => (Tok.DO); +("switch") => (Tok.SWITCH); +("case") => (Tok.CASE); +("default") => (Tok.DEFAULT); +("while") => (Tok.WHILE); +("return") => (Tok.RETURN); +("break") => (Tok.BREAK); +("continue") => (Tok.CONTINUE); +("goto") => (Tok.GOTO); +("char") => (Tok.CHAR); +("double") => (Tok.DOUBLE); +("enum") => (Tok.ENUM); +("float") => (Tok.FLOAT); +("int") => (Tok.INT); +("long") => (Tok.LONG); +("short") => (Tok.SHORT); +("struct") => (Tok.STRUCT); +("union") => (Tok.UNION); +("unsigned") => (Tok.UNSIGNED); +("signed") => (Tok.SIGNED); +("void") => (Tok.VOID); +("sizeof") => (Tok.SIZEOF); +("typedef") => (Tok.TYPEDEF); +("...") => (Tok.ELIPSIS); + +{id} => (Tok.ID yytext); +{decnum} => (Tok.DECNUM (valOf (IntInf.fromString yytext))); +{octnum} => (Tok.DECNUM 0); +{hexnum} => (Tok.DECNUM 0); +{realnum} => (Tok.REALNUM 0.0); +"\"" => (YYBEGIN S; continue()); +"/*" => (YYBEGIN C; continue()); + +"#" (.)* \n => (continue()); + +. | \n => (continue()); + + +"*/" => (YYBEGIN INITIAL; continue()); +. | "\n" => (continue()); + +\" => (YYBEGIN INITIAL; Tok.STRING("")); +\n => (print "unclosed string..."; YYBEGIN INITIAL; continue()); +[^"\\\n]* => (continue()); +\\\n => (continue()); +\\0 => (continue()); +\\{octdigit}{3} =>(continue()); +\\x{hexdigit}+ => (continue()); +\\\^[@-_] => (continue()); +\\. => (continue()); diff --git a/ml-lpt/ml-antlr/examples/c/cmain.sml b/ml-lpt/ml-antlr/examples/c/cmain.sml new file mode 100644 index 0000000..ede0b46 --- /dev/null +++ b/ml-lpt/ml-antlr/examples/c/cmain.sml @@ -0,0 +1,32 @@ +structure Main = + struct + + structure P = Parser(ListLexer) + + fun parse(strm) = let + fun inputN n = (case TextIO.inputLine strm + of SOME s => s + | NONE => "") + val lexer = Mlex.makeLexer inputN + fun lex() = (case lexer() + of Tok.EOF => [Tok.EOF] + | t => t :: lex()) + in + #1 (P.parser (lex())) + end + + fun errMsg l = + TextIO.output(TextIO.stdErr, String.concat l) + + fun main (_, [file]) = ( + parse (TextIO.openIn file); + OS.Process.success) + handle ex => ( + errMsg [ + "uncaught exception ", General.exnName ex, + " [", exnMessage ex, "]\n" + ]; + List.app (fn s => errMsg [" raised at ", s, "\n"]) (SMLofNJ.exnHistory ex); + OS.Process.failure) + + end diff --git a/ml-lpt/ml-antlr/examples/c/ct b/ml-lpt/ml-antlr/examples/c/ct new file mode 100755 index 0000000..816b6c0 --- /dev/null +++ b/ml-lpt/ml-antlr/examples/c/ct @@ -0,0 +1,16 @@ +#!/bin/sh +# +# Wrapper for dragon heap image +# + +case `uname -s` in + Darwin) heap=ct.ppc-darwin ;; + *) heap=ct.x86-linux ;; +esac + +if test ! -r $heap ; then + echo "ct: no heap image!" + exit 1 +fi + +exec sml @SMLload=$heap $@ \ No newline at end of file diff --git a/ml-lpt/ml-antlr/examples/c/parse-tree.sml b/ml-lpt/ml-antlr/examples/c/parse-tree.sml new file mode 100644 index 0000000..7c76115 --- /dev/null +++ b/ml-lpt/ml-antlr/examples/c/parse-tree.sml @@ -0,0 +1,170 @@ +(* Copyright (c) 1998 by Lucent Technologies *) + +structure ParseTree = +struct + + datatype qualifier = CONST | VOLATILE + + datatype storage + = TYPEDEF + | STATIC + | EXTERN + | REGISTER + | AUTO + + datatype operator + = Plus | Minus | Times | Divide | Mod + | Gt | Lt | Gte | Lte | Eq | Neq | And | Or + | BitOr | BitAnd | BitXor | Lshift | Rshift + | Star | AddrOf | Dot | Arrow | Sub | Sizeof + | PreInc | PostInc | PreDec | PostDec | Comma + | Not | Negate | BitNot | Assign + | PlusAssign | MinusAssign | TimesAssign | DivAssign + | ModAssign | XorAssign | OrAssign | AndAssign + | LshiftAssign | RshiftAssign + | Uplus + | SizeofType of ctype +(* + | OperatorExt of operatorExt +*) + + and expression + = EmptyExpr + | IntConst of LargeInt.int + | RealConst of real + | String of string + | Id of string + | Unop of operator * expression + | Binop of operator * expression * expression + | QuestionColon of expression * expression * expression + | Call of expression * expression list + | Cast of ctype * expression + | InitList of expression list +(* + | MARKexpression of (SourceMap.location * expression) + | ExprExt of expressionExt +*) + + and specifier + = Void + | Ellipses + | Signed + | Unsigned + | Char + | Short + | Int + | Long + | Float + | Double + | Fractional + | Wholenum + | Saturate + | Nonsaturate + | Array of expression * ctype + | Pointer of ctype + | Function of + {retType : ctype, + params : (decltype * declarator) list} + | Enum of + {tagOpt : string option, + enumerators : (string * expression) list, + trailingComma : bool} (* true if there was there a trailing comma in the declaration *) + | Struct of + {isStruct : bool, (* struct or union; true => struct *) + tagOpt : string option, (* optional tag *) + members: (ctype * (declarator * expression) list) list} (* member specs *) + | TypedefName of string + | StructTag of + {isStruct : bool, (* ??? *) + name : string} + | EnumTag of string +(* + | SpecExt of specifierExt +*) + + and declarator (* constructor suffix: "Decr" *) + = EmptyDecr + | EllipsesDecr + | VarDecr of string + | ArrayDecr of declarator * expression + | PointerDecr of declarator + | QualDecr of qualifier * declarator + | FuncDecr of declarator * (decltype * declarator) list +(* + | MARKdeclarator of (SourceMap.location * declarator) + | DecrExt of declaratorExt +*) + + (* supports extensions of C in which expressions contain statements *) + and statement + = Decl of declaration + | Expr of expression + | Compound of statement list + | While of expression * statement + | Do of expression * statement + | For of expression * expression * expression * statement + | Labeled of string * statement + | CaseLabel of expression * statement + | DefaultLabel of statement + | Goto of string + | Break + | Continue + | Return of expression + | IfThen of expression * statement + | IfThenElse of expression * statement * statement + | Switch of expression * statement +(* + | MARKstatement of (SourceMap.location * statement) + | StatExt of statementExt +*) + + and declaration + = Declaration of decltype * (declarator * expression) list +(* + | MARKdeclaration of (SourceMap.location * declaration) + | DeclarationExt of declarationExt +*) + + and externalDecl + = ExternalDecl of declaration + | FunctionDef of (* record? *) + {retType : decltype, (* return type *) + funDecr : declarator, (* function name declarator *) + krParams : declaration list, (* K&R-style parameter declarations *) + body : statement} (* function body *) +(* + | MARKexternalDecl of (SourceMap.location * externalDecl) + | ExternalDeclExt of externalDeclExt +*) + + withtype ctype = + {qualifiers : qualifier list, + specifiers : specifier list} + and decltype = + {qualifiers : qualifier list, + specifiers : specifier list, + storage : storage list} + +(* + and externalDeclExt = + (specifier, declarator, ctype, decltype, operator, expression, statement) + ParseTreeExt.externalDeclExt + and declarationExt = + (specifier, declarator, ctype, decltype, operator, expression, statement) + ParseTreeExt.declarationExt + and statementExt = + (specifier, declarator, ctype, decltype, operator, expression, statement) + ParseTreeExt.statementExt + and declaratorExt = + (specifier, declarator, ctype, decltype, operator, expression, statement) + ParseTreeExt.declaratorExt + and specifierExt = + (specifier, declarator, ctype, decltype, operator, expression, statement) + ParseTreeExt.specifierExt + and expressionExt = + (specifier, declarator, ctype, decltype, operator, expression, statement) + ParseTreeExt.expressionExt + and operatorExt = ParseTreeExt.operatorExt +*) + +end (* structure ParseTree *) diff --git a/ml-lpt/ml-antlr/examples/c/sources.cm b/ml-lpt/ml-antlr/examples/c/sources.cm new file mode 100644 index 0000000..4a5cdf4 --- /dev/null +++ b/ml-lpt/ml-antlr/examples/c/sources.cm @@ -0,0 +1,19 @@ +Library + + structure Main + +is + $/basis.cm + $/smlnj-lib.cm + +(* NOTE: we use a relative path here, but you should use + * + * $/ml-lpt-lib.cm + * + * for your projects. + *) + ../../../lib/ml-lpt-lib.cm + + c.grm.sml + cc.lex + cmain.sml \ No newline at end of file diff --git a/ml-lpt/ml-antlr/examples/calc/calc-test.sml b/ml-lpt/ml-antlr/examples/calc/calc-test.sml new file mode 100644 index 0000000..2bcf025 --- /dev/null +++ b/ml-lpt/ml-antlr/examples/calc/calc-test.sml @@ -0,0 +1,47 @@ +(* calc-test.sml + * + * COPYRIGHT (c) 2011 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +structure CalcTest = + struct + + structure Tok = CalcParseTokens + + structure ListLex = struct + type strm = Tok.token list + fun lex [] = (Tok.EOF, (0, 0), []) + | lex (t::ts) = (t, (0, 0), ts) + type pos = AntlrStreamPos.pos + type span = pos * pos + fun getPos _ = 0 + end + + structure CP = CalcParseFn(ListLex) + + fun fragToToks (SMLofNJ.QUOTE s) = let + val sref = ref true + fun input _ = if !sref then + (sref := false; s) + else "" + val lex = CalcLex.lex (AntlrStreamPos.mkSourcemap()) + fun loop ((Tok.EOF, _, _), accum) = rev accum + | loop ((s, _, strm), accum) = loop (lex strm, s::accum) + in + loop (lex (CalcLex.streamify input), []) + end + | fragToToks (SMLofNJ.ANTIQUOTE i) = [Tok.DummyExp i] + + fun % frags = let + val (r, s', errs, {vars, nums}) = CP.parseexp ListLex.lex AtomMap.empty (List.concat (map fragToToks frags)) + in + app (fn (pos, repair) => print (AntlrRepair.actionToString Tok.toString repair ^ "\n")) errs; + print (" -- VARS: " ^ (String.concatWith ", " vars) ^ "\n"); + print (" -- NUMS: " ^ (String.concatWith ", " (map Int.toString nums)) ^ "\n"); + (r, s') + end + +(* val _ = Control.quotation := true *) + + end diff --git a/ml-lpt/ml-antlr/examples/calc/calc.g b/ml-lpt/ml-antlr/examples/calc/calc.g new file mode 100644 index 0000000..ae8e1d2 --- /dev/null +++ b/ml-lpt/ml-antlr/examples/calc/calc.g @@ -0,0 +1,59 @@ +(* calc.g + * + * COPYRIGHT (c) 2011 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +%name CalcParse; + +(* an example of using the header declaration, although it + * just describes the default behavior. + *) +%header (functor CalcParseFn (Lex : ANTLR_LEXER)); + +%entry exp; + +%tokens + : KW_let ("let") | KW_in ("in") + | ID of string | NUM of Int.int + | EQ ("=") | PLUS ("+") + | TIMES ("*") | MINUS ("-") + | LP ("(") | RP (")") + | SEMI (";") + | DummyExp of int + ; + +%refcell vars : string list = ([]); +%refcell nums : int list = ([]); + +prog + : (exp@(AtomMap.empty) ";")* + ; +exp(env) + : "let" ID "=" exp@(env) + "in" exp@(AtomMap.insert(env, Atom.atom ID, exp1)) + => ( vars := ID::(!vars); exp2 ) + | addExp@(env) + ; +addExp(env) + : multExp@(env) ("+" multExp@(env))* + => ( List.foldl op+ multExp SR ) + ; +multExp(env) + : prefixExp@(env) ("*" prefixExp@(env))* + => ( List.foldl op* prefixExp SR ) + ; +prefixExp(env) + : atomicExp@(env) + | "-" prefixExp@(env) + => ( ~prefixExp ) + ; +atomicExp(env) + : ID + %where ( AtomMap.inDomain (env, Atom.atom ID) ) + => ( valOf(AtomMap.find (env, Atom.atom ID)) ) + | NUM + => ( nums := NUM::(!nums); NUM ) + | "(" exp@(env) ")" + | DummyExp + ; diff --git a/ml-lpt/ml-antlr/examples/calc/calc.g.sml b/ml-lpt/ml-antlr/examples/calc/calc.g.sml new file mode 100644 index 0000000..736898b --- /dev/null +++ b/ml-lpt/ml-antlr/examples/calc/calc.g.sml @@ -0,0 +1,357 @@ +structure +CalcParseTokens = struct + + datatype token = EOF + | DummyExp of int + | SEMI + | RP + | LP + | MINUS + | TIMES + | PLUS + | EQ + | NUM of Int.int + | ID of string + | KW_in + | KW_let + + val allToks = [EOF, SEMI, RP, LP, MINUS, TIMES, PLUS, EQ, KW_in, KW_let] + + fun toString tok = +(case (tok) + of (EOF) => "EOF" + | (DummyExp(_)) => "DummyExp" + | (SEMI) => ";" + | (RP) => ")" + | (LP) => "(" + | (MINUS) => "-" + | (TIMES) => "*" + | (PLUS) => "+" + | (EQ) => "=" + | (NUM(_)) => "NUM" + | (ID(_)) => "ID" + | (KW_in) => "in" + | (KW_let) => "let" +(* end case *)) + fun isKW tok = +(case (tok) + of (EOF) => false + | (DummyExp(_)) => false + | (SEMI) => false + | (RP) => false + | (LP) => false + | (MINUS) => false + | (TIMES) => false + | (PLUS) => false + | (EQ) => false + | (NUM(_)) => false + | (ID(_)) => false + | (KW_in) => false + | (KW_let) => false +(* end case *)) + val changes = [] + + + fun isEOF EOF = true + | isEOF _ = false + +end + +functor CalcParseFn (Lex : ANTLR_LEXER) = struct + + local + structure Tok = +CalcParseTokens + structure UserCode = + struct + +fun exp_PROD_1_ACT (EQ, ID, env, exp1, exp2, KW_in, KW_let, EQ_SPAN : (Lex.pos * Lex.pos), ID_SPAN : (Lex.pos * Lex.pos), exp1_SPAN : (Lex.pos * Lex.pos), exp2_SPAN : (Lex.pos * Lex.pos), KW_in_SPAN : (Lex.pos * Lex.pos), KW_let_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), nums, vars) = + ( vars := ID::(!vars); exp2 ) +fun addExp_PROD_1_ACT (SR, env, multExp, SR_SPAN : (Lex.pos * Lex.pos), multExp_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), nums, vars) = + ( List.foldl op+ multExp SR ) +fun multExp_PROD_1_ACT (SR, env, prefixExp, SR_SPAN : (Lex.pos * Lex.pos), prefixExp_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), nums, vars) = + ( List.foldl op* prefixExp SR ) +fun prefixExp_PROD_2_ACT (env, MINUS, prefixExp, MINUS_SPAN : (Lex.pos * Lex.pos), prefixExp_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), nums, vars) = + ( ~prefixExp ) +fun atomicExp_PROD_1_ACT (ID, env, ID_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), nums, vars) = + ( valOf(AtomMap.find (env, Atom.atom ID)) ) +fun atomicExp_PROD_2_ACT (NUM, env, NUM_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), nums, vars) = + ( nums := NUM::(!nums); NUM ) +fun atomicExp_PROD_1_PRED (ID, env, nums, vars) = + ( AtomMap.inDomain (env, Atom.atom ID) ) +fun ARGS_4 (nums, vars) = + (AtomMap.empty) +fun ARGS_6 (EQ, ID, env, KW_let, nums, vars) = + (env) +fun ARGS_7 (EQ, ID, env, exp1, KW_in, KW_let, nums, vars) = + (AtomMap.insert(env, Atom.atom ID, exp1)) +fun ARGS_8 (env, nums, vars) = + (env) +fun ARGS_11 (env, PLUS, multExp, nums, vars) = + (env) +fun ARGS_10 (env, nums, vars) = + (env) +fun ARGS_14 (env, TIMES, prefixExp, nums, vars) = + (env) +fun ARGS_13 (env, nums, vars) = + (env) +fun ARGS_15 (env, nums, vars) = + (env) +fun ARGS_17 (env, MINUS, nums, vars) = + (env) +fun ARGS_21 (LP, env, nums, vars) = + (env) +fun mknums_REFC() : (int list) ref = ref ([]) +fun mkvars_REFC() : (string list) ref = ref ([]) + end (* UserCode *) + + structure Err = AntlrErrHandler( + structure Tok = Tok + structure Lex = Lex) + structure EBNF = AntlrEBNF( + struct + type strm = Err.wstream + val getSpan = Err.getSpan + end) + + fun mk lexFn = let +val nums_REFC = UserCode.mknums_REFC() +val vars_REFC = UserCode.mkvars_REFC() +fun getS() = {nums = !nums_REFC, vars = !vars_REFC} +fun putS{nums, vars} = (nums_REFC := nums; vars_REFC := vars) +fun unwrap (ret, strm, repairs) = (ret, strm, repairs, getS()) val (eh, lex) = Err.mkErrHandler {get = getS, put = putS} + fun fail() = Err.failure eh + fun tryProds (strm, prods) = let + fun try [] = fail() + | try (prod :: prods) = + (Err.whileDisabled eh (fn() => prod strm)) + handle Err.ParseError => try (prods) + in try prods end +fun matchEOF strm = (case (lex(strm)) + of (Tok.EOF, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchDummyExp strm = (case (lex(strm)) + of (Tok.DummyExp(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchSEMI strm = (case (lex(strm)) + of (Tok.SEMI, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchRP strm = (case (lex(strm)) + of (Tok.RP, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchLP strm = (case (lex(strm)) + of (Tok.LP, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchMINUS strm = (case (lex(strm)) + of (Tok.MINUS, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchTIMES strm = (case (lex(strm)) + of (Tok.TIMES, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchPLUS strm = (case (lex(strm)) + of (Tok.PLUS, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchEQ strm = (case (lex(strm)) + of (Tok.EQ, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchNUM strm = (case (lex(strm)) + of (Tok.NUM(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchID strm = (case (lex(strm)) + of (Tok.ID(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchKW_in strm = (case (lex(strm)) + of (Tok.KW_in, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchKW_let strm = (case (lex(strm)) + of (Tok.KW_let, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) + +val (prog_NT, exp_NT) = +let +fun exp_NT (env_RES) (strm) = let + fun exp_PROD_1 (strm) = let + val (KW_let_RES, KW_let_SPAN, strm') = matchKW_let(strm) + val (ID_RES, ID_SPAN, strm') = matchID(strm') + val (EQ_RES, EQ_SPAN, strm') = matchEQ(strm') + val (exp1_RES, exp1_SPAN, strm') = (exp_NT (UserCode.ARGS_6 (EQ_RES, ID_RES, env_RES, KW_let_RES, nums_REFC, vars_REFC)))(strm') + val (KW_in_RES, KW_in_SPAN, strm') = matchKW_in(strm') + val (exp2_RES, exp2_SPAN, strm') = (exp_NT (UserCode.ARGS_7 (EQ_RES, ID_RES, env_RES, exp1_RES, KW_in_RES, KW_let_RES, nums_REFC, vars_REFC)))(strm') + val FULL_SPAN = (#1(KW_let_SPAN), #2(exp2_SPAN)) + in + (UserCode.exp_PROD_1_ACT (EQ_RES, ID_RES, env_RES, exp1_RES, exp2_RES, KW_in_RES, KW_let_RES, EQ_SPAN : (Lex.pos * Lex.pos), ID_SPAN : (Lex.pos * Lex.pos), exp1_SPAN : (Lex.pos * Lex.pos), exp2_SPAN : (Lex.pos * Lex.pos), KW_in_SPAN : (Lex.pos * Lex.pos), KW_let_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), nums_REFC, vars_REFC), + FULL_SPAN, strm') + end + fun exp_PROD_2 (strm) = let + val (addExp_RES, addExp_SPAN, strm') = (addExp_NT (UserCode.ARGS_8 (env_RES, nums_REFC, vars_REFC)))(strm) + val FULL_SPAN = (#1(addExp_SPAN), #2(addExp_SPAN)) + in + ((addExp_RES), FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.ID(_), _, strm') => exp_PROD_2(strm) + | (Tok.NUM(_), _, strm') => exp_PROD_2(strm) + | (Tok.MINUS, _, strm') => exp_PROD_2(strm) + | (Tok.LP, _, strm') => exp_PROD_2(strm) + | (Tok.DummyExp(_), _, strm') => exp_PROD_2(strm) + | (Tok.KW_let, _, strm') => exp_PROD_1(strm) + | _ => fail() + (* end case *)) + end +and addExp_NT (env_RES) (strm) = let + val (multExp_RES, multExp_SPAN, strm') = (multExp_NT (UserCode.ARGS_10 (env_RES, nums_REFC, vars_REFC)))(strm) + fun addExp_PROD_1_SUBRULE_1_NT (strm) = let + val (PLUS_RES, PLUS_SPAN, strm') = matchPLUS(strm) + val (multExp_RES, multExp_SPAN, strm') = (multExp_NT (UserCode.ARGS_11 (env_RES, PLUS_RES, multExp_RES, nums_REFC, vars_REFC)))(strm') + val FULL_SPAN = (#1(PLUS_SPAN), #2(multExp_SPAN)) + in + ((multExp_RES), FULL_SPAN, strm') + end + fun addExp_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.PLUS, _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.closure(addExp_PROD_1_SUBRULE_1_PRED, addExp_PROD_1_SUBRULE_1_NT, strm') + val FULL_SPAN = (#1(multExp_SPAN), #2(SR_SPAN)) + in + (UserCode.addExp_PROD_1_ACT (SR_RES, env_RES, multExp_RES, SR_SPAN : (Lex.pos * Lex.pos), multExp_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), nums_REFC, vars_REFC), + FULL_SPAN, strm') + end +and multExp_NT (env_RES) (strm) = let + val (prefixExp_RES, prefixExp_SPAN, strm') = (prefixExp_NT (UserCode.ARGS_13 (env_RES, nums_REFC, vars_REFC)))(strm) + fun multExp_PROD_1_SUBRULE_1_NT (strm) = let + val (TIMES_RES, TIMES_SPAN, strm') = matchTIMES(strm) + val (prefixExp_RES, prefixExp_SPAN, strm') = (prefixExp_NT (UserCode.ARGS_14 (env_RES, TIMES_RES, prefixExp_RES, nums_REFC, vars_REFC)))(strm') + val FULL_SPAN = (#1(TIMES_SPAN), #2(prefixExp_SPAN)) + in + ((prefixExp_RES), FULL_SPAN, strm') + end + fun multExp_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.TIMES, _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.closure(multExp_PROD_1_SUBRULE_1_PRED, multExp_PROD_1_SUBRULE_1_NT, strm') + val FULL_SPAN = (#1(prefixExp_SPAN), #2(SR_SPAN)) + in + (UserCode.multExp_PROD_1_ACT (SR_RES, env_RES, prefixExp_RES, SR_SPAN : (Lex.pos * Lex.pos), prefixExp_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), nums_REFC, vars_REFC), + FULL_SPAN, strm') + end +and prefixExp_NT (env_RES) (strm) = let + fun prefixExp_PROD_1 (strm) = let + val (atomicExp_RES, atomicExp_SPAN, strm') = (atomicExp_NT (UserCode.ARGS_15 (env_RES, nums_REFC, vars_REFC)))(strm) + val FULL_SPAN = (#1(atomicExp_SPAN), #2(atomicExp_SPAN)) + in + ((atomicExp_RES), FULL_SPAN, strm') + end + fun prefixExp_PROD_2 (strm) = let + val (MINUS_RES, MINUS_SPAN, strm') = matchMINUS(strm) + val (prefixExp_RES, prefixExp_SPAN, strm') = (prefixExp_NT (UserCode.ARGS_17 (env_RES, MINUS_RES, nums_REFC, vars_REFC)))(strm') + val FULL_SPAN = (#1(MINUS_SPAN), #2(prefixExp_SPAN)) + in + (UserCode.prefixExp_PROD_2_ACT (env_RES, MINUS_RES, prefixExp_RES, MINUS_SPAN : (Lex.pos * Lex.pos), prefixExp_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), nums_REFC, vars_REFC), + FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.MINUS, _, strm') => prefixExp_PROD_2(strm) + | (Tok.ID(_), _, strm') => prefixExp_PROD_1(strm) + | (Tok.NUM(_), _, strm') => prefixExp_PROD_1(strm) + | (Tok.LP, _, strm') => prefixExp_PROD_1(strm) + | (Tok.DummyExp(_), _, strm') => prefixExp_PROD_1(strm) + | _ => fail() + (* end case *)) + end +and atomicExp_NT (env_RES) (strm) = let + fun atomicExp_PROD_1 (strm) = let + val (ID_RES, ID_SPAN, strm') = matchID(strm) + in + if (UserCode.atomicExp_PROD_1_PRED (ID_RES, env_RES, nums_REFC, vars_REFC)) + then let + val FULL_SPAN = (#1(ID_SPAN), #2(ID_SPAN)) + in + (UserCode.atomicExp_PROD_1_ACT (ID_RES, env_RES, ID_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), nums_REFC, vars_REFC), + FULL_SPAN, strm') + end + else fail() + end + fun atomicExp_PROD_2 (strm) = let + val (NUM_RES, NUM_SPAN, strm') = matchNUM(strm) + val FULL_SPAN = (#1(NUM_SPAN), #2(NUM_SPAN)) + in + (UserCode.atomicExp_PROD_2_ACT (NUM_RES, env_RES, NUM_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), nums_REFC, vars_REFC), + FULL_SPAN, strm') + end + fun atomicExp_PROD_3 (strm) = let + val (LP_RES, LP_SPAN, strm') = matchLP(strm) + val (exp_RES, exp_SPAN, strm') = (exp_NT (UserCode.ARGS_21 (LP_RES, env_RES, nums_REFC, vars_REFC)))(strm') + val (RP_RES, RP_SPAN, strm') = matchRP(strm') + val FULL_SPAN = (#1(LP_SPAN), #2(RP_SPAN)) + in + ((exp_RES), FULL_SPAN, strm') + end + fun atomicExp_PROD_4 (strm) = let + val (DummyExp_RES, DummyExp_SPAN, strm') = matchDummyExp(strm) + val FULL_SPAN = (#1(DummyExp_SPAN), #2(DummyExp_SPAN)) + in + ((DummyExp_RES), FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.DummyExp(_), _, strm') => atomicExp_PROD_4(strm) + | (Tok.NUM(_), _, strm') => atomicExp_PROD_2(strm) + | (Tok.ID(_), _, strm') => atomicExp_PROD_1(strm) + | (Tok.LP, _, strm') => atomicExp_PROD_3(strm) + | _ => fail() + (* end case *)) + end +fun prog_NT (strm) = let + fun prog_PROD_1_SUBRULE_1_NT (strm) = let + val (exp_RES, exp_SPAN, strm') = (exp_NT (UserCode.ARGS_4 (nums_REFC, vars_REFC)))(strm) + val (SEMI_RES, SEMI_SPAN, strm') = matchSEMI(strm') + val FULL_SPAN = (#1(exp_SPAN), #2(SEMI_SPAN)) + in + ((exp_RES), FULL_SPAN, strm') + end + fun prog_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.KW_let, _, strm') => true + | (Tok.ID(_), _, strm') => true + | (Tok.NUM(_), _, strm') => true + | (Tok.MINUS, _, strm') => true + | (Tok.LP, _, strm') => true + | (Tok.DummyExp(_), _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.closure(prog_PROD_1_SUBRULE_1_PRED, prog_PROD_1_SUBRULE_1_NT, strm) + val FULL_SPAN = (#1(SR_SPAN), #2(SR_SPAN)) + in + ((SR_RES), FULL_SPAN, strm') + end +in + (prog_NT, exp_NT) +end +val prog_NT = fn s => unwrap (Err.launch (eh, lexFn, prog_NT , true) s) +val exp_NT = fn x => fn s => unwrap (Err.launch (eh, lexFn, exp_NT x , false) s) + +in (prog_NT, exp_NT) end + in +fun parse lexFn s = let val (prog_NT, exp_NT) = mk lexFn in prog_NT s end + +fun parseexp lexFn x s = let val (prog_NT, exp_NT) = mk lexFn in exp_NT x s end + + end + +end diff --git a/ml-lpt/ml-antlr/examples/calc/calc.lex b/ml-lpt/ml-antlr/examples/calc/calc.lex new file mode 100644 index 0000000..1592ff6 --- /dev/null +++ b/ml-lpt/ml-antlr/examples/calc/calc.lex @@ -0,0 +1,33 @@ +(* calc.lex + * + * COPYRIGHT (c) 2011 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +%name CalcLex; + +%let digit = [0-9]; +%let int = {digit}+; +%let alpha = [a-zA-Z]; +%let id = {alpha}({alpha} | {digit})*; + +%defs ( + structure T = CalcParseTokens + type lex_result = T.token + + fun eof() = T.EOF +); + +let => ( T.KW_let ); +in => ( T.KW_in ); +{id} => ( T.ID (yytext) ); +{int} => ( T.NUM (valOf (Int.fromString (yytext))) ); +"=" => ( T.EQ ); +"+" => ( T.PLUS ); +"-" => ( T.MINUS ); +"*" => ( T.TIMES ); +"(" => ( T.LP ); +")" => ( T.RP ); +";" => ( T.SEMI ); +" " | \n | \t + => ( continue() ); diff --git a/ml-lpt/ml-antlr/examples/calc/calc.lex.sml b/ml-lpt/ml-antlr/examples/calc/calc.lex.sml new file mode 100644 index 0000000..b8d7234 --- /dev/null +++ b/ml-lpt/ml-antlr/examples/calc/calc.lex.sml @@ -0,0 +1,542 @@ +structure CalcLex = struct + + datatype yystart_state = +INITIAL + structure UserDeclarations = + struct + + + structure T = CalcParseTokens + type lex_result = T.token + + fun eof() = T.EOF + + + end + + local + datatype yymatch + = yyNO_MATCH + | yyMATCH of ULexBuffer.stream * action * yymatch + withtype action = ULexBuffer.stream * yymatch -> UserDeclarations.lex_result + + val yytable : ((UTF8.wchar * UTF8.wchar * int) list * int list) Vector.vector = +Vector.fromList [] + fun yystreamify' p input = ULexBuffer.mkStream (p, input) + + fun yystreamifyReader' p readFn strm = let + val s = ref strm + fun iter(strm, n, accum) = + if n > 1024 then (String.implode (rev accum), strm) + else (case readFn strm + of NONE => (String.implode (rev accum), strm) + | SOME(c, strm') => iter (strm', n+1, c::accum)) + fun input() = let + val (data, strm) = iter(!s, 0, []) + in + s := strm; + data + end + in + yystreamify' p input + end + + fun yystreamifyInstream' p strm = yystreamify' p (fn ()=>TextIO.input strm) + + fun innerLex +(yystrm_, yyss_, yysm) = let + (* current start state *) + val yyss = ref yyss_ + fun YYBEGIN ss = (yyss := ss) + (* current input stream *) + val yystrm = ref yystrm_ + fun yysetStrm strm = yystrm := strm + fun yygetPos() = ULexBuffer.getpos (!yystrm) + fun yystreamify input = yystreamify' (yygetPos()) input + fun yystreamifyReader readFn strm = yystreamifyReader' (yygetPos()) readFn strm + fun yystreamifyInstream strm = yystreamifyInstream' (yygetPos()) strm + (* start position of token -- can be updated via skip() *) + val yystartPos = ref (yygetPos()) + (* get one char of input *) + fun yygetc strm = (case UTF8.getu ULexBuffer.getc strm + of (SOME (0w10, s')) => + (AntlrStreamPos.markNewLine yysm (ULexBuffer.getpos strm); + SOME (0w10, s')) + | x => x) + fun yygetList getc strm = let + val get1 = UTF8.getu getc + fun iter (strm, accum) = + (case get1 strm + of NONE => rev accum + | SOME (w, strm') => iter (strm', w::accum) + (* end case *)) + in + iter (strm, []) + end + (* create yytext *) + fun yymksubstr(strm) = ULexBuffer.subtract (strm, !yystrm) + fun yymktext(strm) = Substring.string (yymksubstr strm) + fun yymkunicode(strm) = yygetList Substring.getc (yymksubstr strm) + open UserDeclarations + fun lex () = let + fun yystuck (yyNO_MATCH) = raise Fail "lexer reached a stuck state" + | yystuck (yyMATCH (strm, action, old)) = + action (strm, old) + val yypos = yygetPos() + fun yygetlineNo strm = AntlrStreamPos.lineNo yysm (ULexBuffer.getpos strm) + fun yygetcolNo strm = AntlrStreamPos.colNo yysm (ULexBuffer.getpos strm) + fun yyactsToMatches (strm, [], oldMatches) = oldMatches + | yyactsToMatches (strm, act::acts, oldMatches) = + yyMATCH (strm, act, yyactsToMatches (strm, acts, oldMatches)) + fun yygo actTable = + (fn (~1, _, oldMatches) => yystuck oldMatches + | (curState, strm, oldMatches) => let + val (transitions, finals') = Vector.sub (yytable, curState) + val finals = map (fn i => Vector.sub (actTable, i)) finals' + fun tryfinal() = + yystuck (yyactsToMatches (strm, finals, oldMatches)) + fun find (c, []) = NONE + | find (c, (c1, c2, s)::ts) = + if c1 <= c andalso c <= c2 then SOME s + else find (c, ts) + in case yygetc strm + of SOME(c, strm') => + (case find (c, transitions) + of NONE => tryfinal() + | SOME n => + yygo actTable + (n, strm', + yyactsToMatches (strm, finals, oldMatches))) + | NONE => tryfinal() + end) + val yylastwasnref = ref (ULexBuffer.lastWasNL (!yystrm)) + fun continue() = let val yylastwasn = !yylastwasnref in +let +fun yyAction0 (strm, lastMatch : yymatch) = (yystrm := strm; T.KW_let ) +fun yyAction1 (strm, lastMatch : yymatch) = (yystrm := strm; T.KW_in ) +fun yyAction2 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; T.ID (yytext) + end +fun yyAction3 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; T.NUM (valOf (Int.fromString (yytext))) + end +fun yyAction4 (strm, lastMatch : yymatch) = (yystrm := strm; T.EQ ) +fun yyAction5 (strm, lastMatch : yymatch) = (yystrm := strm; T.PLUS ) +fun yyAction6 (strm, lastMatch : yymatch) = (yystrm := strm; T.MINUS ) +fun yyAction7 (strm, lastMatch : yymatch) = (yystrm := strm; T.TIMES ) +fun yyAction8 (strm, lastMatch : yymatch) = (yystrm := strm; T.LP ) +fun yyAction9 (strm, lastMatch : yymatch) = (yystrm := strm; T.RP ) +fun yyAction10 (strm, lastMatch : yymatch) = (yystrm := strm; T.SEMI ) +fun yyAction11 (strm, lastMatch : yymatch) = (yystrm := strm; continue() ) +fun yyQ10 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction2(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx41 + then yyQ10(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx41 + then if inp = 0wx30 + then yyQ10(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx30 + then yyAction2(strm, yyNO_MATCH) + else if inp <= 0wx39 + then yyQ10(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else if inp = 0wx61 + then yyQ10(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx61 + then if inp <= 0wx5A + then yyQ10(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else if inp <= 0wx7A + then yyQ10(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + (* end case *)) +fun yyQ14 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction0(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx41 + then yyQ10(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp < 0wx41 + then if inp = 0wx30 + then yyQ10(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp < 0wx30 + then yyAction0(strm, yyNO_MATCH) + else if inp <= 0wx39 + then yyQ10(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else yyAction0(strm, yyNO_MATCH) + else if inp = 0wx61 + then yyQ10(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp < 0wx61 + then if inp <= 0wx5A + then yyQ10(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else yyAction0(strm, yyNO_MATCH) + else if inp <= 0wx7A + then yyQ10(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else yyAction0(strm, yyNO_MATCH) + (* end case *)) +fun yyQ13 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction2(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5B + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx5B + then if inp = 0wx3A + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction2(strm, yyNO_MATCH) + else yyQ10(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp <= 0wx40 + then yyAction2(strm, yyNO_MATCH) + else yyQ10(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx74 + then yyQ14(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx74 + then if inp <= 0wx60 + then yyAction2(strm, yyNO_MATCH) + else yyQ10(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ10(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + (* end case *)) +fun yyQ12 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction2(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5B + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx5B + then if inp = 0wx3A + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction2(strm, yyNO_MATCH) + else yyQ10(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp <= 0wx40 + then yyAction2(strm, yyNO_MATCH) + else yyQ10(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx65 + then yyQ13(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx65 + then if inp <= 0wx60 + then yyAction2(strm, yyNO_MATCH) + else yyQ10(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ10(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + (* end case *)) +fun yyQ15 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction1(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx41 + then yyQ10(strm', yyMATCH(strm, yyAction1, yyNO_MATCH)) + else if inp < 0wx41 + then if inp = 0wx30 + then yyQ10(strm', yyMATCH(strm, yyAction1, yyNO_MATCH)) + else if inp < 0wx30 + then yyAction1(strm, yyNO_MATCH) + else if inp <= 0wx39 + then yyQ10(strm', yyMATCH(strm, yyAction1, yyNO_MATCH)) + else yyAction1(strm, yyNO_MATCH) + else if inp = 0wx61 + then yyQ10(strm', yyMATCH(strm, yyAction1, yyNO_MATCH)) + else if inp < 0wx61 + then if inp <= 0wx5A + then yyQ10(strm', yyMATCH(strm, yyAction1, yyNO_MATCH)) + else yyAction1(strm, yyNO_MATCH) + else if inp <= 0wx7A + then yyQ10(strm', yyMATCH(strm, yyAction1, yyNO_MATCH)) + else yyAction1(strm, yyNO_MATCH) + (* end case *)) +fun yyQ11 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction2(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5B + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx5B + then if inp = 0wx3A + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction2(strm, yyNO_MATCH) + else yyQ10(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp <= 0wx40 + then yyAction2(strm, yyNO_MATCH) + else yyQ10(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx6E + then yyQ15(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx6E + then if inp <= 0wx60 + then yyAction2(strm, yyNO_MATCH) + else yyQ10(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ10(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + (* end case *)) +fun yyQ9 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction4(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction4(strm, yyNO_MATCH) + (* end case *)) +fun yyQ8 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction10(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction10(strm, yyNO_MATCH) + (* end case *)) +fun yyQ7 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction3(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx30 + then yyQ7(strm', yyMATCH(strm, yyAction3, yyNO_MATCH)) + else if inp < 0wx30 + then yyAction3(strm, yyNO_MATCH) + else if inp <= 0wx39 + then yyQ7(strm', yyMATCH(strm, yyAction3, yyNO_MATCH)) + else yyAction3(strm, yyNO_MATCH) + (* end case *)) +fun yyQ6 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction6(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction6(strm, yyNO_MATCH) + (* end case *)) +fun yyQ5 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction5(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction5(strm, yyNO_MATCH) + (* end case *)) +fun yyQ4 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction7(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction7(strm, yyNO_MATCH) + (* end case *)) +fun yyQ3 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction9(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction9(strm, yyNO_MATCH) + (* end case *)) +fun yyQ2 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction8(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction8(strm, yyNO_MATCH) + (* end case *)) +fun yyQ1 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction11(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction11(strm, yyNO_MATCH) + (* end case *)) +fun yyQ0 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if ULexBuffer.eof(!(yystrm)) + then let + val yycolno = ref(yygetcolNo(!(yystrm))) + val yylineno = ref(yygetlineNo(!(yystrm))) + in + (case (!(yyss)) + of _ => (UserDeclarations.eof()) + (* end case *)) + end + else yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx3A + then if ULexBuffer.eof(!(yystrm)) + then let + val yycolno = ref(yygetcolNo(!(yystrm))) + val yylineno = ref(yygetlineNo(!(yystrm))) + in + (case (!(yyss)) + of _ => (UserDeclarations.eof()) + (* end case *)) + end + else yystuck(lastMatch) + else if inp < 0wx3A + then if inp = 0wx29 + then yyQ3(strm', lastMatch) + else if inp < 0wx29 + then if inp = 0wx20 + then yyQ1(strm', lastMatch) + else if inp < 0wx20 + then if inp = 0wx9 + then yyQ1(strm', lastMatch) + else if inp < 0wx9 + then if ULexBuffer.eof(!(yystrm)) + then let + val yycolno = ref(yygetcolNo(!(yystrm))) + val yylineno = ref(yygetlineNo(!(yystrm))) + in + (case (!(yyss)) + of _ => (UserDeclarations.eof()) + (* end case *)) + end + else yystuck(lastMatch) + else if inp <= 0wxA + then yyQ1(strm', lastMatch) + else if ULexBuffer.eof(!(yystrm)) + then let + val yycolno = ref(yygetcolNo(!(yystrm))) + val yylineno = ref(yygetlineNo(!(yystrm))) + in + (case (!(yyss)) + of _ => (UserDeclarations.eof()) + (* end case *)) + end + else yystuck(lastMatch) + else if inp = 0wx28 + then yyQ2(strm', lastMatch) + else if ULexBuffer.eof(!(yystrm)) + then let + val yycolno = ref(yygetcolNo(!(yystrm))) + val yylineno = ref(yygetlineNo(!(yystrm))) + in + (case (!(yyss)) + of _ => (UserDeclarations.eof()) + (* end case *)) + end + else yystuck(lastMatch) + else if inp = 0wx2D + then yyQ6(strm', lastMatch) + else if inp < 0wx2D + then if inp = 0wx2B + then yyQ5(strm', lastMatch) + else if inp = 0wx2A + then yyQ4(strm', lastMatch) + else if ULexBuffer.eof(!(yystrm)) + then let + val yycolno = ref(yygetcolNo(!(yystrm))) + val yylineno = ref(yygetlineNo(!(yystrm))) + in + (case (!(yyss)) + of _ => (UserDeclarations.eof()) + (* end case *)) + end + else yystuck(lastMatch) + else if inp <= 0wx2F + then if ULexBuffer.eof(!(yystrm)) + then let + val yycolno = ref(yygetcolNo(!(yystrm))) + val yylineno = ref(yygetlineNo(!(yystrm))) + in + (case (!(yyss)) + of _ => (UserDeclarations.eof()) + (* end case *)) + end + else yystuck(lastMatch) + else yyQ7(strm', lastMatch) + else if inp = 0wx61 + then yyQ10(strm', lastMatch) + else if inp < 0wx61 + then if inp = 0wx3E + then if ULexBuffer.eof(!(yystrm)) + then let + val yycolno = ref(yygetcolNo(!(yystrm))) + val yylineno = ref(yygetlineNo(!(yystrm))) + in + (case (!(yyss)) + of _ => (UserDeclarations.eof()) + (* end case *)) + end + else yystuck(lastMatch) + else if inp < 0wx3E + then if inp = 0wx3C + then if ULexBuffer.eof(!(yystrm)) + then let + val yycolno = ref(yygetcolNo(!(yystrm))) + val yylineno = ref(yygetlineNo(!(yystrm))) + in + (case (!(yyss)) + of _ => (UserDeclarations.eof()) + (* end case *)) + end + else yystuck(lastMatch) + else if inp = 0wx3B + then yyQ8(strm', lastMatch) + else yyQ9(strm', lastMatch) + else if inp = 0wx41 + then yyQ10(strm', lastMatch) + else if inp < 0wx41 + then if ULexBuffer.eof(!(yystrm)) + then let + val yycolno = ref(yygetcolNo(!(yystrm))) + val yylineno = ref(yygetlineNo(!(yystrm))) + in + (case (!(yyss)) + of _ => (UserDeclarations.eof()) + (* end case *)) + end + else yystuck(lastMatch) + else if inp <= 0wx5A + then yyQ10(strm', lastMatch) + else if ULexBuffer.eof(!(yystrm)) + then let + val yycolno = ref(yygetcolNo(!(yystrm))) + val yylineno = ref(yygetlineNo(!(yystrm))) + in + (case (!(yyss)) + of _ => (UserDeclarations.eof()) + (* end case *)) + end + else yystuck(lastMatch) + else if inp = 0wx6C + then yyQ12(strm', lastMatch) + else if inp < 0wx6C + then if inp = 0wx69 + then yyQ11(strm', lastMatch) + else yyQ10(strm', lastMatch) + else if inp <= 0wx7A + then yyQ10(strm', lastMatch) + else if ULexBuffer.eof(!(yystrm)) + then let + val yycolno = ref(yygetcolNo(!(yystrm))) + val yylineno = ref(yygetlineNo(!(yystrm))) + in + (case (!(yyss)) + of _ => (UserDeclarations.eof()) + (* end case *)) + end + else yystuck(lastMatch) + (* end case *)) +in + (case (!(yyss)) + of INITIAL => yyQ0(!(yystrm), yyNO_MATCH) + (* end case *)) +end +end + and skip() = (yystartPos := yygetPos(); + yylastwasnref := ULexBuffer.lastWasNL (!yystrm); + continue()) + in (continue(), (!yystartPos, yygetPos()), !yystrm, !yyss) end + in + lex() + end + in + type pos = AntlrStreamPos.pos + type span = AntlrStreamPos.span + type tok = UserDeclarations.lex_result + + datatype prestrm = STRM of ULexBuffer.stream * + (yystart_state * tok * span * prestrm * yystart_state) option ref + type strm = (prestrm * yystart_state) + + fun lex sm +(STRM (yystrm, memo), ss) = (case !memo + of NONE => let + val (tok, span, yystrm', ss') = innerLex +(yystrm, ss, sm) + val strm' = STRM (yystrm', ref NONE); + in + memo := SOME (ss, tok, span, strm', ss'); + (tok, span, (strm', ss')) + end + | SOME (ss', tok, span, strm', ss'') => + if ss = ss' then + (tok, span, (strm', ss'')) + else ( + memo := NONE; + lex sm +(STRM (yystrm, memo), ss)) + (* end case *)) + + fun streamify input = (STRM (yystreamify' 0 input, ref NONE), INITIAL) + fun streamifyReader readFn strm = (STRM (yystreamifyReader' 0 readFn strm, ref NONE), + INITIAL) + fun streamifyInstream strm = (STRM (yystreamifyInstream' 0 strm, ref NONE), + INITIAL) + + fun getPos (STRM (strm, _), _) = ULexBuffer.getpos strm + + end +end diff --git a/ml-lpt/ml-antlr/examples/calc/sources.cm b/ml-lpt/ml-antlr/examples/calc/sources.cm new file mode 100644 index 0000000..d2b8a7b --- /dev/null +++ b/ml-lpt/ml-antlr/examples/calc/sources.cm @@ -0,0 +1,25 @@ +(* sources.cm + * + * COPYRIGHT (c) 2011 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +Library + + structure CalcTest + +is + $/basis.cm + $/smlnj-lib.cm + +(* NOTE: we use a relative path here, but you should use + * + * $/ml-lpt-lib.cm + * + * for your projects. + *) + ../../../lib/ml-lpt-lib.cm + + calc.g : ml-antlr + calc.lex : ml-ulex + calc-test.sml diff --git a/ml-lpt/ml-antlr/examples/dragon/Makefile b/ml-lpt/ml-antlr/examples/dragon/Makefile new file mode 100644 index 0000000..6a1f2f0 --- /dev/null +++ b/ml-lpt/ml-antlr/examples/dragon/Makefile @@ -0,0 +1,42 @@ +# +# COPYRIGHT (c) 20056 +# John Reppy (http://www.cs.uchicago.edu/~jhr) +# Aaron Turon (http://www.cs.uchicago.edu/~adrassi) +# All rights reserved. +# + +OS = $(shell uname -s) + +ifeq ($(shell uname -s),Darwin) +HEAP_SUFFIX = ppc-darwin +else +HEAP_SUFFIX = x86-unix +endif + +SHELL = /bin/sh +SML = sml +ML_BUILD = ml-build +ML_MAKEDEPEND = ml-makedepend + +PROGRAM = dragon +HEAP_IMAGE = $(PROGRAM).$(HEAP_SUFFIX) + +SRC = +SOURCES = $(wildcard *.sml) dragon.l dragon.g.sml sources.cm + +build: dragon.g.sml $(HEAP_IMAGE) + +$(HEAP_IMAGE): $(SOURCES) + $(ML_BUILD) sources.cm Main.main $(PROGRAM) + +.depend: $(CM_FILES) + touch .depend + $(ML_MAKEDEPEND) -n -f .depend sources.cm $(HEAP_IMAGE) + +dragon.g.sml: dragon.g + ml-antlr dragon.g + +sinclude .depend + +clean: + rm -rf .depend .cm dragon.l.sml dragon.g.sml $(HEAP_IMAGE) diff --git a/ml-lpt/ml-antlr/examples/dragon/bad1.pas b/ml-lpt/ml-antlr/examples/dragon/bad1.pas new file mode 100644 index 0000000..a9b91cd --- /dev/null +++ b/ml-lpt/ml-antlr/examples/dragon/bad1.pas @@ -0,0 +1,12 @@ +program example(input, output); +var x, y : integer; +function gcd(a, b : integer): integer; +begin + if b = 0 then gcd := a + else gcd := gcd(b, a mod b) +end; { gcd } + +begin + read(x, y); + write(gcd(x, y)) +end. \ No newline at end of file diff --git a/ml-lpt/ml-antlr/examples/dragon/bad2.pas b/ml-lpt/ml-antlr/examples/dragon/bad2.pas new file mode 100644 index 0000000..c43d9bc --- /dev/null +++ b/ml-lpt/ml-antlr/examples/dragon/bad2.pas @@ -0,0 +1,10 @@ +kprogram example(input, output); +var x, y : integer; +function blah(a, b : integer) : integer; +begin +end; { blah } + +begin + read(x, y); + write(gcd(x, y)); +end. diff --git a/ml-lpt/ml-antlr/examples/dragon/bad3.pas b/ml-lpt/ml-antlr/examples/dragon/bad3.pas new file mode 100644 index 0000000..3cb17a8 --- /dev/null +++ b/ml-lpt/ml-antlr/examples/dragon/bad3.pas @@ -0,0 +1,12 @@ +program example(input, output); +var x, y : integer; +procedure P; +begin + if x > 0 then y := 2; + if y < 0 then z := 3 else z := 2 +end; + +begin + read(x, y); + write(gcd(x, y)) +end. diff --git a/ml-lpt/ml-antlr/examples/dragon/bad4.pas b/ml-lpt/ml-antlr/examples/dragon/bad4.pas new file mode 100644 index 0000000..127fdda --- /dev/null +++ b/ml-lpt/ml-antlr/examples/dragon/bad4.pas @@ -0,0 +1,8 @@ +program P(input, output); +begin; +procedure FACTR(N : integer; factor : integer); +begin + x := 1 +end; { FACTR } +x := 1 +end. \ No newline at end of file diff --git a/ml-lpt/ml-antlr/examples/dragon/dragon b/ml-lpt/ml-antlr/examples/dragon/dragon new file mode 100755 index 0000000..7df883e --- /dev/null +++ b/ml-lpt/ml-antlr/examples/dragon/dragon @@ -0,0 +1,16 @@ +#!/bin/sh +# +# Wrapper for dragon heap image +# + +case `uname -s` in + Darwin) heap=dragon.ppc-darwin ;; + *) heap=dragon.x86-linux ;; +esac + +if test ! -r $heap ; then + echo "dragon: no heap image!" + exit 1 +fi + +exec sml @SMLload=$heap $@ \ No newline at end of file diff --git a/ml-lpt/ml-antlr/examples/dragon/dragon.g b/ml-lpt/ml-antlr/examples/dragon/dragon.g new file mode 100644 index 0000000..1623240 --- /dev/null +++ b/ml-lpt/ml-antlr/examples/dragon/dragon.g @@ -0,0 +1,183 @@ +(* a subset of Pascal, a la the appendix to the Dragon book *) + +%defs ( + val cat = String.concat + val catSp = String.concatWith " " + val catNl = String.concatWith "\n" + val catNlNl = String.concatWith "\n\n" + val catCm = String.concatWith ", " + val catSemi = String.concatWith "; " + val catSemiNl = String.concatWith ";\n" +); + +%tokens + : KW_program ("program") + | KW_var ("var") + | KW_array ("array") + | KW_of ("of") + | KW_integer ("integer") + | KW_real ("real") + | KW_function ("function") + | KW_procedure ("procedure") + | KW_begin ("begin") + | KW_end ("end") + | KW_if ("if") + | KW_then ("then") + | KW_else ("else") + | KW_while ("while") + | KW_do ("do") + | KW_not ("not") + | ASSIGNOP (":=") + | COMMA (",") + | COLON (":") + | SEMI (";") + | DOT (".") + | LSB ("[") + | RSB ("]") + | LP ("(") + | RP (")") + | MINUS ("-") + | RELOP of string + | ADDOP of string + | MULOP of string + | ID of string + | INT of IntInf.int + | REAL of Real.real + ; + +%keywords + KW_program, + KW_var, + KW_array, + KW_of, + KW_integer, + KW_real, + KW_function, + KW_procedure, + KW_begin, + KW_end, + KW_if, + KW_then, + KW_else, + KW_while, + KW_do, + KW_not + ; + +%prefer ID, ";", ",", "["; + +%change "," -> ";" | ";" -> "," ; + +%value ID("bogus"); + +program + : "program" ID LP id_list RP SEMI + (declaration)* + (subprogram_declaration)* + compound_statement + DOT + => ( catNl [ + cat ["program ", ID, "(", id_list, ");"], + catNl SR1, catNlNl SR2, + compound_statement ^ "." + ]) + ; + +id_list + : ID (COMMA ID)* => ( catCm (ID::SR) ) + ; + +declaration + : KW_var id_list_type SEMI => ( cat ["var ", id_list_type, ";"] ) + ; + +compound_type + : standard_type + | KW_array LSB INT DOT DOT INT RSB KW_of standard_type + => ( cat ["array [", IntInf.toString INT1, "..", IntInf.toString INT2, "] of ", standard_type] ) + ; + +standard_type + : KW_integer => ( "integer" ) + | KW_real => ( "real" ) + ; + +id_list_type + : id_list COLON compound_type => ( catSp [id_list, ":", compound_type] ) + ; + +subprogram_declaration + : subprogram_head (declaration)* compound_statement SEMI + => ( catNl [subprogram_head, catNl SR, compound_statement ^ ";"] ) + ; + +subprogram_head + : KW_function ID arguments COLON standard_type SEMI + => ( cat ["function ", ID, arguments, " : ", standard_type, ";"] ) + | KW_procedure ID arguments SEMI + => ( cat ["procedure ", ID, arguments, ";"] ) + ; + +arguments + : LP parameter_list RP => ( "(" ^ parameter_list ^ ")" ) + | => ( "" ) + ; + +parameter_list + : id_list_type (SEMI id_list_type)* => ( catSemi (id_list_type::SR) ) + ; + +compound_statement + : KW_begin (statement (SEMI statement)* => ( catSemiNl (statement::SR) ))? KW_end + => ( catNl ["begin", getOpt(SR, ""), "end"] ) + ; + +statement + : variable ASSIGNOP exp + => ( cat [variable, " := ", exp] ) + | procedure_statement + | compound_statement + | KW_if exp KW_then statement KW_else statement + => ( cat ["if ", exp, " then ", statement1, " else ", statement2] ) + | KW_while exp KW_do statement + => ( cat ["while ", exp, " do ", statement] ) + ; + +variable + : ID + | ID LSB exp RSB => ( cat [ID, "[", exp, "]"] ) + ; + +procedure_statement + : ID + | ID LP exp (COMMA exp)* RP => ( cat [ID, "(", catCm (exp::SR), ")" ] ) + ; + +exp + : simple_exp (RELOP simple_exp => ( RELOP ^ " " ^ simple_exp ))? + => ( catSp [simple_exp, getOpt(SR, "")] ) + ; + +simple_exp + : signed_term (ADDOP signed_term => ( ADDOP ^ " " ^signed_term ))* + => ( catSp (signed_term::SR) ) + ; + +signed_term + : MINUS term => ( "-" ^ term ) + | term + ; + +term + : factor (MULOP factor => ( MULOP ^ " " ^ factor ))* + => ( catSp (factor::SR) ) + ; + +factor + : variable + | ID LP exp (COMMA exp)* RP => ( cat [ID, "(", catCm (exp::SR), ")" ] ) + | INT => ( IntInf.toString INT ) + | REAL => ( Real.toString REAL ) + | LP exp RP => ( "(" ^ exp ^ ")" ) + | KW_not factor => ( "not " ^ factor ) + ; diff --git a/ml-lpt/ml-antlr/examples/dragon/dragon.g.sml b/ml-lpt/ml-antlr/examples/dragon/dragon.g.sml new file mode 100644 index 0000000..246740a --- /dev/null +++ b/ml-lpt/ml-antlr/examples/dragon/dragon.g.sml @@ -0,0 +1,944 @@ +structure +Tokens = struct + + datatype token = EOF + | REAL of Real.real + | INT of IntInf.int + | ID of string + | MULOP of string + | ADDOP of string + | RELOP of string + | MINUS + | RP + | LP + | RSB + | LSB + | DOT + | SEMI + | COLON + | COMMA + | ASSIGNOP + | KW_not + | KW_do + | KW_while + | KW_else + | KW_then + | KW_if + | KW_end + | KW_begin + | KW_procedure + | KW_function + | KW_real + | KW_integer + | KW_of + | KW_array + | KW_var + | KW_program + + val allToks = [EOF, ID("bogus"), MINUS, RP, LP, RSB, LSB, DOT, SEMI, COLON, COMMA, ASSIGNOP, KW_not, KW_do, KW_while, KW_else, KW_then, KW_if, KW_end, KW_begin, KW_procedure, KW_function, KW_real, KW_integer, KW_of, KW_array, KW_var, KW_program] + + fun toString tok = +(case (tok) + of (EOF) => "EOF" + | (REAL(_)) => "REAL" + | (INT(_)) => "INT" + | (ID(_)) => "ID" + | (MULOP(_)) => "MULOP" + | (ADDOP(_)) => "ADDOP" + | (RELOP(_)) => "RELOP" + | (MINUS) => "-" + | (RP) => ")" + | (LP) => "(" + | (RSB) => "]" + | (LSB) => "[" + | (DOT) => "." + | (SEMI) => ";" + | (COLON) => ":" + | (COMMA) => "," + | (ASSIGNOP) => ":=" + | (KW_not) => "not" + | (KW_do) => "do" + | (KW_while) => "while" + | (KW_else) => "else" + | (KW_then) => "then" + | (KW_if) => "if" + | (KW_end) => "end" + | (KW_begin) => "begin" + | (KW_procedure) => "procedure" + | (KW_function) => "function" + | (KW_real) => "real" + | (KW_integer) => "integer" + | (KW_of) => "of" + | (KW_array) => "array" + | (KW_var) => "var" + | (KW_program) => "program" +(* end case *)) + fun isKW tok = +(case (tok) + of (EOF) => false + | (REAL(_)) => false + | (INT(_)) => false + | (ID(_)) => false + | (MULOP(_)) => false + | (ADDOP(_)) => false + | (RELOP(_)) => false + | (MINUS) => false + | (RP) => false + | (LP) => false + | (RSB) => false + | (LSB) => false + | (DOT) => false + | (SEMI) => false + | (COLON) => false + | (COMMA) => false + | (ASSIGNOP) => false + | (KW_not) => true + | (KW_do) => true + | (KW_while) => true + | (KW_else) => true + | (KW_then) => true + | (KW_if) => true + | (KW_end) => true + | (KW_begin) => true + | (KW_procedure) => true + | (KW_function) => true + | (KW_real) => true + | (KW_integer) => true + | (KW_of) => true + | (KW_array) => true + | (KW_var) => true + | (KW_program) => true +(* end case *)) + val changes = [] + + + fun isEOF EOF = true + | isEOF _ = false + +end + +functor ParseFn(Lex : ANTLR_LEXER) = struct + + local + structure Tok = +Tokens + structure UserCode = + struct + + val cat = String.concat + val catSp = String.concatWith " " + val catNl = String.concatWith "\n" + val catNlNl = String.concatWith "\n\n" + val catCm = String.concatWith ", " + val catSemi = String.concatWith "; " + val catSemiNl = String.concatWith ";\n" + +fun program_PROD_1_ACT (ID, LP, RP, DOT, SR1, SR2, SEMI, compound_statement, id_list, KW_program, ID_SPAN : (Lex.pos * Lex.pos), LP_SPAN : (Lex.pos * Lex.pos), RP_SPAN : (Lex.pos * Lex.pos), DOT_SPAN : (Lex.pos * Lex.pos), SR1_SPAN : (Lex.pos * Lex.pos), SR2_SPAN : (Lex.pos * Lex.pos), SEMI_SPAN : (Lex.pos * Lex.pos), compound_statement_SPAN : (Lex.pos * Lex.pos), id_list_SPAN : (Lex.pos * Lex.pos), KW_program_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ( catNl [ + cat ["program ", ID, "(", id_list, ");"], + catNl SR1, catNlNl SR2, + compound_statement ^ "." + ]) +fun id_list_PROD_1_ACT (ID, SR, ID_SPAN : (Lex.pos * Lex.pos), SR_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ( catCm (ID::SR) ) +fun declaration_PROD_1_ACT (SEMI, id_list_type, KW_var, SEMI_SPAN : (Lex.pos * Lex.pos), id_list_type_SPAN : (Lex.pos * Lex.pos), KW_var_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ( cat ["var ", id_list_type, ";"] ) +fun compound_type_PROD_2_ACT (LSB, RSB, DOT1, DOT2, INT1, INT2, KW_array, standard_type, KW_of, LSB_SPAN : (Lex.pos * Lex.pos), RSB_SPAN : (Lex.pos * Lex.pos), DOT1_SPAN : (Lex.pos * Lex.pos), DOT2_SPAN : (Lex.pos * Lex.pos), INT1_SPAN : (Lex.pos * Lex.pos), INT2_SPAN : (Lex.pos * Lex.pos), KW_array_SPAN : (Lex.pos * Lex.pos), standard_type_SPAN : (Lex.pos * Lex.pos), KW_of_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ( cat ["array [", IntInf.toString INT1, "..", IntInf.toString INT2, "] of ", standard_type] ) +fun standard_type_PROD_1_ACT (KW_integer, KW_integer_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ( "integer" ) +fun standard_type_PROD_2_ACT (KW_real, KW_real_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ( "real" ) +fun id_list_type_PROD_1_ACT (compound_type, COLON, id_list, compound_type_SPAN : (Lex.pos * Lex.pos), COLON_SPAN : (Lex.pos * Lex.pos), id_list_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ( catSp [id_list, ":", compound_type] ) +fun subprogram_declaration_PROD_1_ACT (SR, SEMI, compound_statement, subprogram_head, SR_SPAN : (Lex.pos * Lex.pos), SEMI_SPAN : (Lex.pos * Lex.pos), compound_statement_SPAN : (Lex.pos * Lex.pos), subprogram_head_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ( catNl [subprogram_head, catNl SR, compound_statement ^ ";"] ) +fun subprogram_head_PROD_1_ACT (ID, SEMI, standard_type, COLON, arguments, KW_function, ID_SPAN : (Lex.pos * Lex.pos), SEMI_SPAN : (Lex.pos * Lex.pos), standard_type_SPAN : (Lex.pos * Lex.pos), COLON_SPAN : (Lex.pos * Lex.pos), arguments_SPAN : (Lex.pos * Lex.pos), KW_function_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ( cat ["function ", ID, arguments, " : ", standard_type, ";"] ) +fun subprogram_head_PROD_2_ACT (ID, SEMI, KW_procedure, arguments, ID_SPAN : (Lex.pos * Lex.pos), SEMI_SPAN : (Lex.pos * Lex.pos), KW_procedure_SPAN : (Lex.pos * Lex.pos), arguments_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ( cat ["procedure ", ID, arguments, ";"] ) +fun arguments_PROD_1_ACT (LP, RP, parameter_list, LP_SPAN : (Lex.pos * Lex.pos), RP_SPAN : (Lex.pos * Lex.pos), parameter_list_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ( "(" ^ parameter_list ^ ")" ) +fun arguments_PROD_2_ACT (FULL_SPAN : (Lex.pos * Lex.pos)) = + ( "" ) +fun parameter_list_PROD_1_ACT (SR, id_list_type, SR_SPAN : (Lex.pos * Lex.pos), id_list_type_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ( catSemi (id_list_type::SR) ) +fun compound_statement_PROD_1_SUBRULE_1_PROD_1_ACT (SR, KW_begin, statement, SR_SPAN : (Lex.pos * Lex.pos), KW_begin_SPAN : (Lex.pos * Lex.pos), statement_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ( catSemiNl (statement::SR) ) +fun compound_statement_PROD_1_ACT (SR, KW_begin, KW_end, SR_SPAN : (Lex.pos * Lex.pos), KW_begin_SPAN : (Lex.pos * Lex.pos), KW_end_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ( catNl ["begin", getOpt(SR, ""), "end"] ) +fun statement_PROD_1_ACT (exp, variable, ASSIGNOP, exp_SPAN : (Lex.pos * Lex.pos), variable_SPAN : (Lex.pos * Lex.pos), ASSIGNOP_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ( cat [variable, " := ", exp] ) +fun statement_PROD_4_ACT (exp, KW_else, KW_then, statement1, statement2, KW_if, exp_SPAN : (Lex.pos * Lex.pos), KW_else_SPAN : (Lex.pos * Lex.pos), KW_then_SPAN : (Lex.pos * Lex.pos), statement1_SPAN : (Lex.pos * Lex.pos), statement2_SPAN : (Lex.pos * Lex.pos), KW_if_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ( cat ["if ", exp, " then ", statement1, " else ", statement2] ) +fun statement_PROD_5_ACT (exp, KW_while, KW_do, statement, exp_SPAN : (Lex.pos * Lex.pos), KW_while_SPAN : (Lex.pos * Lex.pos), KW_do_SPAN : (Lex.pos * Lex.pos), statement_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ( cat ["while ", exp, " do ", statement] ) +fun variable_PROD_2_ACT (ID, LSB, RSB, exp, ID_SPAN : (Lex.pos * Lex.pos), LSB_SPAN : (Lex.pos * Lex.pos), RSB_SPAN : (Lex.pos * Lex.pos), exp_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ( cat [ID, "[", exp, "]"] ) +fun procedure_statement_PROD_2_ACT (ID, LP, RP, SR, exp, ID_SPAN : (Lex.pos * Lex.pos), LP_SPAN : (Lex.pos * Lex.pos), RP_SPAN : (Lex.pos * Lex.pos), SR_SPAN : (Lex.pos * Lex.pos), exp_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ( cat [ID, "(", catCm (exp::SR), ")" ] ) +fun exp_PROD_1_SUBRULE_1_PROD_1_ACT (simple_exp, RELOP, simple_exp_SPAN : (Lex.pos * Lex.pos), RELOP_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ( RELOP ^ " " ^ simple_exp ) +fun exp_PROD_1_ACT (SR, simple_exp, SR_SPAN : (Lex.pos * Lex.pos), simple_exp_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ( catSp [simple_exp, getOpt(SR, "")] ) +fun simple_exp_PROD_1_SUBRULE_1_PROD_1_ACT (signed_term, ADDOP, signed_term_SPAN : (Lex.pos * Lex.pos), ADDOP_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ( ADDOP ^ " " ^signed_term ) +fun simple_exp_PROD_1_ACT (SR, signed_term, SR_SPAN : (Lex.pos * Lex.pos), signed_term_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ( catSp (signed_term::SR) ) +fun signed_term_PROD_1_ACT (term, MINUS, term_SPAN : (Lex.pos * Lex.pos), MINUS_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ( "-" ^ term ) +fun term_PROD_1_SUBRULE_1_PROD_1_ACT (factor, MULOP, factor_SPAN : (Lex.pos * Lex.pos), MULOP_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ( MULOP ^ " " ^ factor ) +fun term_PROD_1_ACT (SR, factor, SR_SPAN : (Lex.pos * Lex.pos), factor_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ( catSp (factor::SR) ) +fun factor_PROD_2_ACT (ID, LP, RP, SR, exp, ID_SPAN : (Lex.pos * Lex.pos), LP_SPAN : (Lex.pos * Lex.pos), RP_SPAN : (Lex.pos * Lex.pos), SR_SPAN : (Lex.pos * Lex.pos), exp_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ( cat [ID, "(", catCm (exp::SR), ")" ] ) +fun factor_PROD_3_ACT (INT, INT_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ( IntInf.toString INT ) +fun factor_PROD_4_ACT (REAL, REAL_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ( Real.toString REAL ) +fun factor_PROD_5_ACT (LP, RP, exp, LP_SPAN : (Lex.pos * Lex.pos), RP_SPAN : (Lex.pos * Lex.pos), exp_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ( "(" ^ exp ^ ")" ) +fun factor_PROD_6_ACT (factor, KW_not, factor_SPAN : (Lex.pos * Lex.pos), KW_not_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ( "not " ^ factor ) + end (* UserCode *) + + structure Err = AntlrErrHandler( + structure Tok = Tok + structure Lex = Lex) + structure EBNF = AntlrEBNF( + struct + type strm = Err.wstream + val getSpan = Err.getSpan + end) + + fun mk lexFn = let +fun getS() = {} +fun putS{} = () +fun unwrap (ret, strm, repairs) = (ret, strm, repairs) val (eh, lex) = Err.mkErrHandler {get = getS, put = putS} + fun fail() = Err.failure eh + fun tryProds (strm, prods) = let + fun try [] = fail() + | try (prod :: prods) = + (Err.whileDisabled eh (fn() => prod strm)) + handle Err.ParseError => try (prods) + in try prods end +fun matchEOF strm = (case (lex(strm)) + of (Tok.EOF, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchREAL strm = (case (lex(strm)) + of (Tok.REAL(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchINT strm = (case (lex(strm)) + of (Tok.INT(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchID strm = (case (lex(strm)) + of (Tok.ID(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchMULOP strm = (case (lex(strm)) + of (Tok.MULOP(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchADDOP strm = (case (lex(strm)) + of (Tok.ADDOP(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchRELOP strm = (case (lex(strm)) + of (Tok.RELOP(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchMINUS strm = (case (lex(strm)) + of (Tok.MINUS, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchRP strm = (case (lex(strm)) + of (Tok.RP, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchLP strm = (case (lex(strm)) + of (Tok.LP, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchRSB strm = (case (lex(strm)) + of (Tok.RSB, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchLSB strm = (case (lex(strm)) + of (Tok.LSB, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchDOT strm = (case (lex(strm)) + of (Tok.DOT, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSEMI strm = (case (lex(strm)) + of (Tok.SEMI, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchCOLON strm = (case (lex(strm)) + of (Tok.COLON, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchCOMMA strm = (case (lex(strm)) + of (Tok.COMMA, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchASSIGNOP strm = (case (lex(strm)) + of (Tok.ASSIGNOP, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchKW_not strm = (case (lex(strm)) + of (Tok.KW_not, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchKW_do strm = (case (lex(strm)) + of (Tok.KW_do, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchKW_while strm = (case (lex(strm)) + of (Tok.KW_while, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchKW_else strm = (case (lex(strm)) + of (Tok.KW_else, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchKW_then strm = (case (lex(strm)) + of (Tok.KW_then, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchKW_if strm = (case (lex(strm)) + of (Tok.KW_if, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchKW_end strm = (case (lex(strm)) + of (Tok.KW_end, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchKW_begin strm = (case (lex(strm)) + of (Tok.KW_begin, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchKW_procedure strm = (case (lex(strm)) + of (Tok.KW_procedure, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchKW_function strm = (case (lex(strm)) + of (Tok.KW_function, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchKW_real strm = (case (lex(strm)) + of (Tok.KW_real, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchKW_integer strm = (case (lex(strm)) + of (Tok.KW_integer, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchKW_of strm = (case (lex(strm)) + of (Tok.KW_of, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchKW_array strm = (case (lex(strm)) + of (Tok.KW_array, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchKW_var strm = (case (lex(strm)) + of (Tok.KW_var, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchKW_program strm = (case (lex(strm)) + of (Tok.KW_program, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) + +val (program_NT) = +let +fun exp_NT (strm) = let + val (simple_exp_RES, simple_exp_SPAN, strm') = simple_exp_NT(strm) + fun exp_PROD_1_SUBRULE_1_NT (strm) = let + val (RELOP_RES, RELOP_SPAN, strm') = matchRELOP(strm) + val (simple_exp_RES, simple_exp_SPAN, strm') = simple_exp_NT(strm') + val FULL_SPAN = (#1(RELOP_SPAN), #2(simple_exp_SPAN)) + in + (UserCode.exp_PROD_1_SUBRULE_1_PROD_1_ACT (simple_exp_RES, RELOP_RES, simple_exp_SPAN : (Lex.pos * Lex.pos), RELOP_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun exp_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.RELOP(_), _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.optional(exp_PROD_1_SUBRULE_1_PRED, exp_PROD_1_SUBRULE_1_NT, strm') + val FULL_SPAN = (#1(simple_exp_SPAN), #2(SR_SPAN)) + in + (UserCode.exp_PROD_1_ACT (SR_RES, simple_exp_RES, SR_SPAN : (Lex.pos * Lex.pos), simple_exp_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and simple_exp_NT (strm) = let + val (signed_term_RES, signed_term_SPAN, strm') = signed_term_NT(strm) + fun simple_exp_PROD_1_SUBRULE_1_NT (strm) = let + val (ADDOP_RES, ADDOP_SPAN, strm') = matchADDOP(strm) + val (signed_term_RES, signed_term_SPAN, strm') = signed_term_NT(strm') + val FULL_SPAN = (#1(ADDOP_SPAN), #2(signed_term_SPAN)) + in + (UserCode.simple_exp_PROD_1_SUBRULE_1_PROD_1_ACT (signed_term_RES, ADDOP_RES, signed_term_SPAN : (Lex.pos * Lex.pos), ADDOP_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun simple_exp_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.ADDOP(_), _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.closure(simple_exp_PROD_1_SUBRULE_1_PRED, simple_exp_PROD_1_SUBRULE_1_NT, strm') + val FULL_SPAN = (#1(signed_term_SPAN), #2(SR_SPAN)) + in + (UserCode.simple_exp_PROD_1_ACT (SR_RES, signed_term_RES, SR_SPAN : (Lex.pos * Lex.pos), signed_term_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and signed_term_NT (strm) = let + fun signed_term_PROD_1 (strm) = let + val (MINUS_RES, MINUS_SPAN, strm') = matchMINUS(strm) + val (term_RES, term_SPAN, strm') = term_NT(strm') + val FULL_SPAN = (#1(MINUS_SPAN), #2(term_SPAN)) + in + (UserCode.signed_term_PROD_1_ACT (term_RES, MINUS_RES, term_SPAN : (Lex.pos * Lex.pos), MINUS_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun signed_term_PROD_2 (strm) = let + val (term_RES, term_SPAN, strm') = term_NT(strm) + val FULL_SPAN = (#1(term_SPAN), #2(term_SPAN)) + in + ((term_RES), FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.KW_not, _, strm') => signed_term_PROD_2(strm) + | (Tok.LP, _, strm') => signed_term_PROD_2(strm) + | (Tok.ID(_), _, strm') => signed_term_PROD_2(strm) + | (Tok.INT(_), _, strm') => signed_term_PROD_2(strm) + | (Tok.REAL(_), _, strm') => signed_term_PROD_2(strm) + | (Tok.MINUS, _, strm') => signed_term_PROD_1(strm) + | _ => fail() + (* end case *)) + end +and term_NT (strm) = let + val (factor_RES, factor_SPAN, strm') = factor_NT(strm) + fun term_PROD_1_SUBRULE_1_NT (strm) = let + val (MULOP_RES, MULOP_SPAN, strm') = matchMULOP(strm) + val (factor_RES, factor_SPAN, strm') = factor_NT(strm') + val FULL_SPAN = (#1(MULOP_SPAN), #2(factor_SPAN)) + in + (UserCode.term_PROD_1_SUBRULE_1_PROD_1_ACT (factor_RES, MULOP_RES, factor_SPAN : (Lex.pos * Lex.pos), MULOP_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun term_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.MULOP(_), _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.closure(term_PROD_1_SUBRULE_1_PRED, term_PROD_1_SUBRULE_1_NT, strm') + val FULL_SPAN = (#1(factor_SPAN), #2(SR_SPAN)) + in + (UserCode.term_PROD_1_ACT (SR_RES, factor_RES, SR_SPAN : (Lex.pos * Lex.pos), factor_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and factor_NT (strm) = let + fun factor_PROD_1 (strm) = let + val (variable_RES, variable_SPAN, strm') = variable_NT(strm) + val FULL_SPAN = (#1(variable_SPAN), #2(variable_SPAN)) + in + ((variable_RES), FULL_SPAN, strm') + end + fun factor_PROD_2 (strm) = let + val (ID_RES, ID_SPAN, strm') = matchID(strm) + val (LP_RES, LP_SPAN, strm') = matchLP(strm') + val (exp_RES, exp_SPAN, strm') = exp_NT(strm') + fun factor_PROD_2_SUBRULE_1_NT (strm) = let + val (COMMA_RES, COMMA_SPAN, strm') = matchCOMMA(strm) + val (exp_RES, exp_SPAN, strm') = exp_NT(strm') + val FULL_SPAN = (#1(COMMA_SPAN), #2(exp_SPAN)) + in + ((exp_RES), FULL_SPAN, strm') + end + fun factor_PROD_2_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMA, _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.closure(factor_PROD_2_SUBRULE_1_PRED, factor_PROD_2_SUBRULE_1_NT, strm') + val (RP_RES, RP_SPAN, strm') = matchRP(strm') + val FULL_SPAN = (#1(ID_SPAN), #2(RP_SPAN)) + in + (UserCode.factor_PROD_2_ACT (ID_RES, LP_RES, RP_RES, SR_RES, exp_RES, ID_SPAN : (Lex.pos * Lex.pos), LP_SPAN : (Lex.pos * Lex.pos), RP_SPAN : (Lex.pos * Lex.pos), SR_SPAN : (Lex.pos * Lex.pos), exp_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun factor_PROD_3 (strm) = let + val (INT_RES, INT_SPAN, strm') = matchINT(strm) + val FULL_SPAN = (#1(INT_SPAN), #2(INT_SPAN)) + in + (UserCode.factor_PROD_3_ACT (INT_RES, INT_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun factor_PROD_4 (strm) = let + val (REAL_RES, REAL_SPAN, strm') = matchREAL(strm) + val FULL_SPAN = (#1(REAL_SPAN), #2(REAL_SPAN)) + in + (UserCode.factor_PROD_4_ACT (REAL_RES, REAL_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun factor_PROD_5 (strm) = let + val (LP_RES, LP_SPAN, strm') = matchLP(strm) + val (exp_RES, exp_SPAN, strm') = exp_NT(strm') + val (RP_RES, RP_SPAN, strm') = matchRP(strm') + val FULL_SPAN = (#1(LP_SPAN), #2(RP_SPAN)) + in + (UserCode.factor_PROD_5_ACT (LP_RES, RP_RES, exp_RES, LP_SPAN : (Lex.pos * Lex.pos), RP_SPAN : (Lex.pos * Lex.pos), exp_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun factor_PROD_6 (strm) = let + val (KW_not_RES, KW_not_SPAN, strm') = matchKW_not(strm) + val (factor_RES, factor_SPAN, strm') = factor_NT(strm') + val FULL_SPAN = (#1(KW_not_SPAN), #2(factor_SPAN)) + in + (UserCode.factor_PROD_6_ACT (factor_RES, KW_not_RES, factor_SPAN : (Lex.pos * Lex.pos), KW_not_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.KW_not, _, strm') => factor_PROD_6(strm) + | (Tok.REAL(_), _, strm') => factor_PROD_4(strm) + | (Tok.ID(_), _, strm') => + (case (lex(strm')) + of (Tok.KW_end, _, strm') => factor_PROD_1(strm) + | (Tok.KW_then, _, strm') => factor_PROD_1(strm) + | (Tok.KW_else, _, strm') => factor_PROD_1(strm) + | (Tok.KW_do, _, strm') => factor_PROD_1(strm) + | (Tok.COMMA, _, strm') => factor_PROD_1(strm) + | (Tok.SEMI, _, strm') => factor_PROD_1(strm) + | (Tok.LSB, _, strm') => factor_PROD_1(strm) + | (Tok.RSB, _, strm') => factor_PROD_1(strm) + | (Tok.RP, _, strm') => factor_PROD_1(strm) + | (Tok.RELOP(_), _, strm') => factor_PROD_1(strm) + | (Tok.ADDOP(_), _, strm') => factor_PROD_1(strm) + | (Tok.MULOP(_), _, strm') => factor_PROD_1(strm) + | (Tok.LP, _, strm') => factor_PROD_2(strm) + | _ => fail() + (* end case *)) + | (Tok.INT(_), _, strm') => factor_PROD_3(strm) + | (Tok.LP, _, strm') => factor_PROD_5(strm) + | _ => fail() + (* end case *)) + end +and variable_NT (strm) = let + fun variable_PROD_1 (strm) = let + val (ID_RES, ID_SPAN, strm') = matchID(strm) + val FULL_SPAN = (#1(ID_SPAN), #2(ID_SPAN)) + in + ((ID_RES), FULL_SPAN, strm') + end + fun variable_PROD_2 (strm) = let + val (ID_RES, ID_SPAN, strm') = matchID(strm) + val (LSB_RES, LSB_SPAN, strm') = matchLSB(strm') + val (exp_RES, exp_SPAN, strm') = exp_NT(strm') + val (RSB_RES, RSB_SPAN, strm') = matchRSB(strm') + val FULL_SPAN = (#1(ID_SPAN), #2(RSB_SPAN)) + in + (UserCode.variable_PROD_2_ACT (ID_RES, LSB_RES, RSB_RES, exp_RES, ID_SPAN : (Lex.pos * Lex.pos), LSB_SPAN : (Lex.pos * Lex.pos), RSB_SPAN : (Lex.pos * Lex.pos), exp_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.ID(_), _, strm') => + (case (lex(strm')) + of (Tok.KW_end, _, strm') => variable_PROD_1(strm) + | (Tok.KW_then, _, strm') => variable_PROD_1(strm) + | (Tok.KW_else, _, strm') => variable_PROD_1(strm) + | (Tok.KW_do, _, strm') => variable_PROD_1(strm) + | (Tok.ASSIGNOP, _, strm') => variable_PROD_1(strm) + | (Tok.COMMA, _, strm') => variable_PROD_1(strm) + | (Tok.SEMI, _, strm') => variable_PROD_1(strm) + | (Tok.RSB, _, strm') => variable_PROD_1(strm) + | (Tok.RP, _, strm') => variable_PROD_1(strm) + | (Tok.RELOP(_), _, strm') => variable_PROD_1(strm) + | (Tok.ADDOP(_), _, strm') => variable_PROD_1(strm) + | (Tok.MULOP(_), _, strm') => variable_PROD_1(strm) + | (Tok.LSB, _, strm') => variable_PROD_2(strm) + | _ => fail() + (* end case *)) + | _ => fail() + (* end case *)) + end +fun procedure_statement_NT (strm) = let + fun procedure_statement_PROD_1 (strm) = let + val (ID_RES, ID_SPAN, strm') = matchID(strm) + val FULL_SPAN = (#1(ID_SPAN), #2(ID_SPAN)) + in + ((ID_RES), FULL_SPAN, strm') + end + fun procedure_statement_PROD_2 (strm) = let + val (ID_RES, ID_SPAN, strm') = matchID(strm) + val (LP_RES, LP_SPAN, strm') = matchLP(strm') + val (exp_RES, exp_SPAN, strm') = exp_NT(strm') + fun procedure_statement_PROD_2_SUBRULE_1_NT (strm) = let + val (COMMA_RES, COMMA_SPAN, strm') = matchCOMMA(strm) + val (exp_RES, exp_SPAN, strm') = exp_NT(strm') + val FULL_SPAN = (#1(COMMA_SPAN), #2(exp_SPAN)) + in + ((exp_RES), FULL_SPAN, strm') + end + fun procedure_statement_PROD_2_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMA, _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.closure(procedure_statement_PROD_2_SUBRULE_1_PRED, procedure_statement_PROD_2_SUBRULE_1_NT, strm') + val (RP_RES, RP_SPAN, strm') = matchRP(strm') + val FULL_SPAN = (#1(ID_SPAN), #2(RP_SPAN)) + in + (UserCode.procedure_statement_PROD_2_ACT (ID_RES, LP_RES, RP_RES, SR_RES, exp_RES, ID_SPAN : (Lex.pos * Lex.pos), LP_SPAN : (Lex.pos * Lex.pos), RP_SPAN : (Lex.pos * Lex.pos), SR_SPAN : (Lex.pos * Lex.pos), exp_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.ID(_), _, strm') => + (case (lex(strm')) + of (Tok.KW_end, _, strm') => procedure_statement_PROD_1(strm) + | (Tok.KW_else, _, strm') => procedure_statement_PROD_1(strm) + | (Tok.SEMI, _, strm') => procedure_statement_PROD_1(strm) + | (Tok.LP, _, strm') => procedure_statement_PROD_2(strm) + | _ => fail() + (* end case *)) + | _ => fail() + (* end case *)) + end +fun compound_statement_NT (strm) = let + val (KW_begin_RES, KW_begin_SPAN, strm') = matchKW_begin(strm) + fun compound_statement_PROD_1_SUBRULE_1_NT (strm) = let + val (statement_RES, statement_SPAN, strm') = statement_NT(strm) + fun compound_statement_PROD_1_SUBRULE_1_PROD_1_SUBRULE_1_NT (strm) = let + val (SEMI_RES, SEMI_SPAN, strm') = matchSEMI(strm) + val (statement_RES, statement_SPAN, strm') = statement_NT(strm') + val FULL_SPAN = (#1(SEMI_SPAN), #2(statement_SPAN)) + in + ((statement_RES), FULL_SPAN, strm') + end + fun compound_statement_PROD_1_SUBRULE_1_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.SEMI, _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.closure(compound_statement_PROD_1_SUBRULE_1_PROD_1_SUBRULE_1_PRED, compound_statement_PROD_1_SUBRULE_1_PROD_1_SUBRULE_1_NT, strm') + val FULL_SPAN = (#1(statement_SPAN), #2(SR_SPAN)) + in + (UserCode.compound_statement_PROD_1_SUBRULE_1_PROD_1_ACT (SR_RES, KW_begin_RES, statement_RES, SR_SPAN : (Lex.pos * Lex.pos), KW_begin_SPAN : (Lex.pos * Lex.pos), statement_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun compound_statement_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.KW_begin, _, strm') => true + | (Tok.KW_if, _, strm') => true + | (Tok.KW_while, _, strm') => true + | (Tok.ID(_), _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.optional(compound_statement_PROD_1_SUBRULE_1_PRED, compound_statement_PROD_1_SUBRULE_1_NT, strm') + val (KW_end_RES, KW_end_SPAN, strm') = matchKW_end(strm') + val FULL_SPAN = (#1(KW_begin_SPAN), #2(KW_end_SPAN)) + in + (UserCode.compound_statement_PROD_1_ACT (SR_RES, KW_begin_RES, KW_end_RES, SR_SPAN : (Lex.pos * Lex.pos), KW_begin_SPAN : (Lex.pos * Lex.pos), KW_end_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +and statement_NT (strm) = let + fun statement_PROD_1 (strm) = let + val (variable_RES, variable_SPAN, strm') = variable_NT(strm) + val (ASSIGNOP_RES, ASSIGNOP_SPAN, strm') = matchASSIGNOP(strm') + val (exp_RES, exp_SPAN, strm') = exp_NT(strm') + val FULL_SPAN = (#1(variable_SPAN), #2(exp_SPAN)) + in + (UserCode.statement_PROD_1_ACT (exp_RES, variable_RES, ASSIGNOP_RES, exp_SPAN : (Lex.pos * Lex.pos), variable_SPAN : (Lex.pos * Lex.pos), ASSIGNOP_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun statement_PROD_2 (strm) = let + val (procedure_statement_RES, procedure_statement_SPAN, strm') = procedure_statement_NT(strm) + val FULL_SPAN = (#1(procedure_statement_SPAN), + #2(procedure_statement_SPAN)) + in + ((procedure_statement_RES), FULL_SPAN, strm') + end + fun statement_PROD_3 (strm) = let + val (compound_statement_RES, compound_statement_SPAN, strm') = compound_statement_NT(strm) + val FULL_SPAN = (#1(compound_statement_SPAN), + #2(compound_statement_SPAN)) + in + ((compound_statement_RES), FULL_SPAN, strm') + end + fun statement_PROD_4 (strm) = let + val (KW_if_RES, KW_if_SPAN, strm') = matchKW_if(strm) + val (exp_RES, exp_SPAN, strm') = exp_NT(strm') + val (KW_then_RES, KW_then_SPAN, strm') = matchKW_then(strm') + val (statement1_RES, statement1_SPAN, strm') = statement_NT(strm') + val (KW_else_RES, KW_else_SPAN, strm') = matchKW_else(strm') + val (statement2_RES, statement2_SPAN, strm') = statement_NT(strm') + val FULL_SPAN = (#1(KW_if_SPAN), #2(statement2_SPAN)) + in + (UserCode.statement_PROD_4_ACT (exp_RES, KW_else_RES, KW_then_RES, statement1_RES, statement2_RES, KW_if_RES, exp_SPAN : (Lex.pos * Lex.pos), KW_else_SPAN : (Lex.pos * Lex.pos), KW_then_SPAN : (Lex.pos * Lex.pos), statement1_SPAN : (Lex.pos * Lex.pos), statement2_SPAN : (Lex.pos * Lex.pos), KW_if_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun statement_PROD_5 (strm) = let + val (KW_while_RES, KW_while_SPAN, strm') = matchKW_while(strm) + val (exp_RES, exp_SPAN, strm') = exp_NT(strm') + val (KW_do_RES, KW_do_SPAN, strm') = matchKW_do(strm') + val (statement_RES, statement_SPAN, strm') = statement_NT(strm') + val FULL_SPAN = (#1(KW_while_SPAN), #2(statement_SPAN)) + in + (UserCode.statement_PROD_5_ACT (exp_RES, KW_while_RES, KW_do_RES, statement_RES, exp_SPAN : (Lex.pos * Lex.pos), KW_while_SPAN : (Lex.pos * Lex.pos), KW_do_SPAN : (Lex.pos * Lex.pos), statement_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.KW_while, _, strm') => statement_PROD_5(strm) + | (Tok.KW_begin, _, strm') => statement_PROD_3(strm) + | (Tok.ID(_), _, strm') => + (case (lex(strm')) + of (Tok.ASSIGNOP, _, strm') => statement_PROD_1(strm) + | (Tok.LSB, _, strm') => statement_PROD_1(strm) + | (Tok.KW_end, _, strm') => statement_PROD_2(strm) + | (Tok.KW_else, _, strm') => statement_PROD_2(strm) + | (Tok.SEMI, _, strm') => statement_PROD_2(strm) + | (Tok.LP, _, strm') => statement_PROD_2(strm) + | _ => fail() + (* end case *)) + | (Tok.KW_if, _, strm') => statement_PROD_4(strm) + | _ => fail() + (* end case *)) + end +fun standard_type_NT (strm) = let + fun standard_type_PROD_1 (strm) = let + val (KW_integer_RES, KW_integer_SPAN, strm') = matchKW_integer(strm) + val FULL_SPAN = (#1(KW_integer_SPAN), #2(KW_integer_SPAN)) + in + (UserCode.standard_type_PROD_1_ACT (KW_integer_RES, KW_integer_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun standard_type_PROD_2 (strm) = let + val (KW_real_RES, KW_real_SPAN, strm') = matchKW_real(strm) + val FULL_SPAN = (#1(KW_real_SPAN), #2(KW_real_SPAN)) + in + (UserCode.standard_type_PROD_2_ACT (KW_real_RES, KW_real_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.KW_real, _, strm') => standard_type_PROD_2(strm) + | (Tok.KW_integer, _, strm') => standard_type_PROD_1(strm) + | _ => fail() + (* end case *)) + end +fun compound_type_NT (strm) = let + fun compound_type_PROD_1 (strm) = let + val (standard_type_RES, standard_type_SPAN, strm') = standard_type_NT(strm) + val FULL_SPAN = (#1(standard_type_SPAN), #2(standard_type_SPAN)) + in + ((standard_type_RES), FULL_SPAN, strm') + end + fun compound_type_PROD_2 (strm) = let + val (KW_array_RES, KW_array_SPAN, strm') = matchKW_array(strm) + val (LSB_RES, LSB_SPAN, strm') = matchLSB(strm') + val (INT1_RES, INT1_SPAN, strm') = matchINT(strm') + val (DOT1_RES, DOT1_SPAN, strm') = matchDOT(strm') + val (DOT2_RES, DOT2_SPAN, strm') = matchDOT(strm') + val (INT2_RES, INT2_SPAN, strm') = matchINT(strm') + val (RSB_RES, RSB_SPAN, strm') = matchRSB(strm') + val (KW_of_RES, KW_of_SPAN, strm') = matchKW_of(strm') + val (standard_type_RES, standard_type_SPAN, strm') = standard_type_NT(strm') + val FULL_SPAN = (#1(KW_array_SPAN), #2(standard_type_SPAN)) + in + (UserCode.compound_type_PROD_2_ACT (LSB_RES, RSB_RES, DOT1_RES, DOT2_RES, INT1_RES, INT2_RES, KW_array_RES, standard_type_RES, KW_of_RES, LSB_SPAN : (Lex.pos * Lex.pos), RSB_SPAN : (Lex.pos * Lex.pos), DOT1_SPAN : (Lex.pos * Lex.pos), DOT2_SPAN : (Lex.pos * Lex.pos), INT1_SPAN : (Lex.pos * Lex.pos), INT2_SPAN : (Lex.pos * Lex.pos), KW_array_SPAN : (Lex.pos * Lex.pos), standard_type_SPAN : (Lex.pos * Lex.pos), KW_of_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.KW_array, _, strm') => compound_type_PROD_2(strm) + | (Tok.KW_integer, _, strm') => compound_type_PROD_1(strm) + | (Tok.KW_real, _, strm') => compound_type_PROD_1(strm) + | _ => fail() + (* end case *)) + end +fun id_list_NT (strm) = let + val (ID_RES, ID_SPAN, strm') = matchID(strm) + fun id_list_PROD_1_SUBRULE_1_NT (strm) = let + val (COMMA_RES, COMMA_SPAN, strm') = matchCOMMA(strm) + val (ID_RES, ID_SPAN, strm') = matchID(strm') + val FULL_SPAN = (#1(COMMA_SPAN), #2(ID_SPAN)) + in + ((ID_RES), FULL_SPAN, strm') + end + fun id_list_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMA, _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.closure(id_list_PROD_1_SUBRULE_1_PRED, id_list_PROD_1_SUBRULE_1_NT, strm') + val FULL_SPAN = (#1(ID_SPAN), #2(SR_SPAN)) + in + (UserCode.id_list_PROD_1_ACT (ID_RES, SR_RES, ID_SPAN : (Lex.pos * Lex.pos), SR_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +fun id_list_type_NT (strm) = let + val (id_list_RES, id_list_SPAN, strm') = id_list_NT(strm) + val (COLON_RES, COLON_SPAN, strm') = matchCOLON(strm') + val (compound_type_RES, compound_type_SPAN, strm') = compound_type_NT(strm') + val FULL_SPAN = (#1(id_list_SPAN), #2(compound_type_SPAN)) + in + (UserCode.id_list_type_PROD_1_ACT (compound_type_RES, COLON_RES, id_list_RES, compound_type_SPAN : (Lex.pos * Lex.pos), COLON_SPAN : (Lex.pos * Lex.pos), id_list_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +fun declaration_NT (strm) = let + val (KW_var_RES, KW_var_SPAN, strm') = matchKW_var(strm) + val (id_list_type_RES, id_list_type_SPAN, strm') = id_list_type_NT(strm') + val (SEMI_RES, SEMI_SPAN, strm') = matchSEMI(strm') + val FULL_SPAN = (#1(KW_var_SPAN), #2(SEMI_SPAN)) + in + (UserCode.declaration_PROD_1_ACT (SEMI_RES, id_list_type_RES, KW_var_RES, SEMI_SPAN : (Lex.pos * Lex.pos), id_list_type_SPAN : (Lex.pos * Lex.pos), KW_var_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +fun parameter_list_NT (strm) = let + val (id_list_type_RES, id_list_type_SPAN, strm') = id_list_type_NT(strm) + fun parameter_list_PROD_1_SUBRULE_1_NT (strm) = let + val (SEMI_RES, SEMI_SPAN, strm') = matchSEMI(strm) + val (id_list_type_RES, id_list_type_SPAN, strm') = id_list_type_NT(strm') + val FULL_SPAN = (#1(SEMI_SPAN), #2(id_list_type_SPAN)) + in + ((id_list_type_RES), FULL_SPAN, strm') + end + fun parameter_list_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.SEMI, _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.closure(parameter_list_PROD_1_SUBRULE_1_PRED, parameter_list_PROD_1_SUBRULE_1_NT, strm') + val FULL_SPAN = (#1(id_list_type_SPAN), #2(SR_SPAN)) + in + (UserCode.parameter_list_PROD_1_ACT (SR_RES, id_list_type_RES, SR_SPAN : (Lex.pos * Lex.pos), id_list_type_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +fun arguments_NT (strm) = let + fun arguments_PROD_1 (strm) = let + val (LP_RES, LP_SPAN, strm') = matchLP(strm) + val (parameter_list_RES, parameter_list_SPAN, strm') = parameter_list_NT(strm') + val (RP_RES, RP_SPAN, strm') = matchRP(strm') + val FULL_SPAN = (#1(LP_SPAN), #2(RP_SPAN)) + in + (UserCode.arguments_PROD_1_ACT (LP_RES, RP_RES, parameter_list_RES, LP_SPAN : (Lex.pos * Lex.pos), RP_SPAN : (Lex.pos * Lex.pos), parameter_list_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun arguments_PROD_2 (strm) = let + val FULL_SPAN = (Err.getPos(strm), Err.getPos(strm)) + in + (UserCode.arguments_PROD_2_ACT (FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm) + end + in + (case (lex(strm)) + of (Tok.COLON, _, strm') => arguments_PROD_2(strm) + | (Tok.SEMI, _, strm') => arguments_PROD_2(strm) + | (Tok.LP, _, strm') => arguments_PROD_1(strm) + | _ => fail() + (* end case *)) + end +fun subprogram_head_NT (strm) = let + fun subprogram_head_PROD_1 (strm) = let + val (KW_function_RES, KW_function_SPAN, strm') = matchKW_function(strm) + val (ID_RES, ID_SPAN, strm') = matchID(strm') + val (arguments_RES, arguments_SPAN, strm') = arguments_NT(strm') + val (COLON_RES, COLON_SPAN, strm') = matchCOLON(strm') + val (standard_type_RES, standard_type_SPAN, strm') = standard_type_NT(strm') + val (SEMI_RES, SEMI_SPAN, strm') = matchSEMI(strm') + val FULL_SPAN = (#1(KW_function_SPAN), #2(SEMI_SPAN)) + in + (UserCode.subprogram_head_PROD_1_ACT (ID_RES, SEMI_RES, standard_type_RES, COLON_RES, arguments_RES, KW_function_RES, ID_SPAN : (Lex.pos * Lex.pos), SEMI_SPAN : (Lex.pos * Lex.pos), standard_type_SPAN : (Lex.pos * Lex.pos), COLON_SPAN : (Lex.pos * Lex.pos), arguments_SPAN : (Lex.pos * Lex.pos), KW_function_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun subprogram_head_PROD_2 (strm) = let + val (KW_procedure_RES, KW_procedure_SPAN, strm') = matchKW_procedure(strm) + val (ID_RES, ID_SPAN, strm') = matchID(strm') + val (arguments_RES, arguments_SPAN, strm') = arguments_NT(strm') + val (SEMI_RES, SEMI_SPAN, strm') = matchSEMI(strm') + val FULL_SPAN = (#1(KW_procedure_SPAN), #2(SEMI_SPAN)) + in + (UserCode.subprogram_head_PROD_2_ACT (ID_RES, SEMI_RES, KW_procedure_RES, arguments_RES, ID_SPAN : (Lex.pos * Lex.pos), SEMI_SPAN : (Lex.pos * Lex.pos), KW_procedure_SPAN : (Lex.pos * Lex.pos), arguments_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.KW_procedure, _, strm') => subprogram_head_PROD_2(strm) + | (Tok.KW_function, _, strm') => subprogram_head_PROD_1(strm) + | _ => fail() + (* end case *)) + end +fun subprogram_declaration_NT (strm) = let + val (subprogram_head_RES, subprogram_head_SPAN, strm') = subprogram_head_NT(strm) + fun subprogram_declaration_PROD_1_SUBRULE_1_NT (strm) = let + val (declaration_RES, declaration_SPAN, strm') = declaration_NT(strm) + val FULL_SPAN = (#1(declaration_SPAN), #2(declaration_SPAN)) + in + ((declaration_RES), FULL_SPAN, strm') + end + fun subprogram_declaration_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.KW_var, _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.closure(subprogram_declaration_PROD_1_SUBRULE_1_PRED, subprogram_declaration_PROD_1_SUBRULE_1_NT, strm') + val (compound_statement_RES, compound_statement_SPAN, strm') = compound_statement_NT(strm') + val (SEMI_RES, SEMI_SPAN, strm') = matchSEMI(strm') + val FULL_SPAN = (#1(subprogram_head_SPAN), #2(SEMI_SPAN)) + in + (UserCode.subprogram_declaration_PROD_1_ACT (SR_RES, SEMI_RES, compound_statement_RES, subprogram_head_RES, SR_SPAN : (Lex.pos * Lex.pos), SEMI_SPAN : (Lex.pos * Lex.pos), compound_statement_SPAN : (Lex.pos * Lex.pos), subprogram_head_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +fun program_NT (strm) = let + val (KW_program_RES, KW_program_SPAN, strm') = matchKW_program(strm) + val (ID_RES, ID_SPAN, strm') = matchID(strm') + val (LP_RES, LP_SPAN, strm') = matchLP(strm') + val (id_list_RES, id_list_SPAN, strm') = id_list_NT(strm') + val (RP_RES, RP_SPAN, strm') = matchRP(strm') + val (SEMI_RES, SEMI_SPAN, strm') = matchSEMI(strm') + fun program_PROD_1_SUBRULE_1_NT (strm) = let + val (declaration_RES, declaration_SPAN, strm') = declaration_NT(strm) + val FULL_SPAN = (#1(declaration_SPAN), #2(declaration_SPAN)) + in + ((declaration_RES), FULL_SPAN, strm') + end + fun program_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.KW_var, _, strm') => true + | _ => false + (* end case *)) + val (SR1_RES, SR1_SPAN, strm') = EBNF.closure(program_PROD_1_SUBRULE_1_PRED, program_PROD_1_SUBRULE_1_NT, strm') + fun program_PROD_1_SUBRULE_2_NT (strm) = let + val (subprogram_declaration_RES, subprogram_declaration_SPAN, strm') = subprogram_declaration_NT(strm) + val FULL_SPAN = (#1(subprogram_declaration_SPAN), + #2(subprogram_declaration_SPAN)) + in + ((subprogram_declaration_RES), FULL_SPAN, strm') + end + fun program_PROD_1_SUBRULE_2_PRED (strm) = (case (lex(strm)) + of (Tok.KW_function, _, strm') => true + | (Tok.KW_procedure, _, strm') => true + | _ => false + (* end case *)) + val (SR2_RES, SR2_SPAN, strm') = EBNF.closure(program_PROD_1_SUBRULE_2_PRED, program_PROD_1_SUBRULE_2_NT, strm') + val (compound_statement_RES, compound_statement_SPAN, strm') = compound_statement_NT(strm') + val (DOT_RES, DOT_SPAN, strm') = matchDOT(strm') + val FULL_SPAN = (#1(KW_program_SPAN), #2(DOT_SPAN)) + in + (UserCode.program_PROD_1_ACT (ID_RES, LP_RES, RP_RES, DOT_RES, SR1_RES, SR2_RES, SEMI_RES, compound_statement_RES, id_list_RES, KW_program_RES, ID_SPAN : (Lex.pos * Lex.pos), LP_SPAN : (Lex.pos * Lex.pos), RP_SPAN : (Lex.pos * Lex.pos), DOT_SPAN : (Lex.pos * Lex.pos), SR1_SPAN : (Lex.pos * Lex.pos), SR2_SPAN : (Lex.pos * Lex.pos), SEMI_SPAN : (Lex.pos * Lex.pos), compound_statement_SPAN : (Lex.pos * Lex.pos), id_list_SPAN : (Lex.pos * Lex.pos), KW_program_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +in + (program_NT) +end +val program_NT = fn s => unwrap (Err.launch (eh, lexFn, program_NT , true) s) + +in (program_NT) end + in +fun parse lexFn s = let val (program_NT) = mk lexFn in program_NT s end + + end + +end diff --git a/ml-lpt/ml-antlr/examples/dragon/dragon.l b/ml-lpt/ml-antlr/examples/dragon/dragon.l new file mode 100644 index 0000000..168ea91 --- /dev/null +++ b/ml-lpt/ml-antlr/examples/dragon/dragon.l @@ -0,0 +1,55 @@ +%defs ( + open Tokens; + fun eof() = EOF + type lex_result = token +); + +%let eol=("\n"|"\013\n"|"\013"); +%let ws=("\009"|"\011"|"\012"|" "|{eol}); +%let alpha=[a-zA-Z]; +%let digit=[0-9]; +%let int={digit}+; +%let idchars=({alpha}|{digit}|"_"); +%let id={alpha}{idchars}*; + +{ws}+ => (skip()); +"{" [^}]* "}" => (skip()); + +"program" => (KW_program); +"(" => (LP); +")" => (RP); +";" => (SEMI); +"." => (DOT); +"," => (COMMA); +"var" => (KW_var); +":" => (COLON); +"array" => (KW_array); +"[" => (LSB); +"]" => (RSB); +"of" => (KW_of); +"integer" => (KW_integer); +"real" => (KW_real); +"function" => (KW_function); +"procedure" => (KW_procedure); +"begin" => (KW_begin); +"end" => (KW_end); +":=" => (ASSIGNOP); +"if" => (KW_if); +"then" => (KW_then); +"else" => (KW_else); +"while" => (KW_while); +"do" => (KW_do); +"=" | "<>" | "<" | "<=" | ">=" | ">" + => (RELOP yytext); +"+" | "-" | "or" + => (ADDOP yytext); +"*" | "/" | "div" | "mod" | "and" + => (MULOP yytext); +"not" => (KW_not); +"-" => (MINUS); + +{id} => (ID yytext); +{int} => (INT (valOf (IntInf.fromString yytext))); + +. => (print (concat ["Unexpected character: '", yytext, + "'\n"]); continue()); \ No newline at end of file diff --git a/ml-lpt/ml-antlr/examples/dragon/dragon.l.sml b/ml-lpt/ml-antlr/examples/dragon/dragon.l.sml new file mode 100644 index 0000000..22e057a --- /dev/null +++ b/ml-lpt/ml-antlr/examples/dragon/dragon.l.sml @@ -0,0 +1,2584 @@ +structure Mlex = struct + + datatype yystart_state = +INITIAL + structure UserDeclarations = + struct + + + open Tokens; + fun eof() = EOF + type lex_result = token + + + end + + local + datatype yymatch + = yyNO_MATCH + | yyMATCH of ULexBuffer.stream * action * yymatch + withtype action = ULexBuffer.stream * yymatch -> UserDeclarations.lex_result + + val yytable : ((UTF8.wchar * UTF8.wchar * int) list * int list) Vector.vector = +#[ +] + + fun innerLex (yystrm_, yyss_, yysm) = let + (* current start state *) + val yyss = ref yyss_ + fun YYBEGIN ss = (yyss := ss) + (* current input stream *) + val yystrm = ref yystrm_ + fun yygetPos() = ULexBuffer.getpos (!yystrm) + (* start position of token -- can be updated via skip() *) + val yystartPos = ref (yygetPos()) + (* get one char of input *) + fun yygetc strm = (case UTF8.getu ULexBuffer.getc strm + of (SOME (0w10, s')) => + (AntlrStreamPos.markNewLine yysm (ULexBuffer.getpos strm); + SOME (0w10, s')) + | x => x) + fun yygetList getc strm = let + val get1 = UTF8.getu getc + fun iter (strm, accum) = + (case get1 strm + of NONE => rev accum + | SOME (w, strm') => iter (strm', w::accum) + (* end case *)) + in + iter (strm, []) + end + (* create yytext *) + fun yymksubstr(strm) = ULexBuffer.subtract (strm, !yystrm) + fun yymktext(strm) = Substring.string (yymksubstr strm) + fun yymkunicode(strm) = yygetList Substring.getc (yymksubstr strm) + open UserDeclarations + fun lex () = let + fun yystuck (yyNO_MATCH) = raise Fail "lexer reached a stuck state" + | yystuck (yyMATCH (strm, action, old)) = + action (strm, old) + val yypos = yygetPos() + fun yygetlineNo strm = AntlrStreamPos.lineNo yysm (ULexBuffer.getpos strm) + fun yygetcolNo strm = AntlrStreamPos.colNo yysm (ULexBuffer.getpos strm) + fun yyactsToMatches (strm, [], oldMatches) = oldMatches + | yyactsToMatches (strm, act::acts, oldMatches) = + yyMATCH (strm, act, yyactsToMatches (strm, acts, oldMatches)) + fun yygo actTable = + (fn (~1, _, oldMatches) => yystuck oldMatches + | (curState, strm, oldMatches) => let + val (transitions, finals') = Vector.sub (yytable, curState) + val finals = map (fn i => Vector.sub (actTable, i)) finals' + fun tryfinal() = + yystuck (yyactsToMatches (strm, finals, oldMatches)) + fun find (c, []) = NONE + | find (c, (c1, c2, s)::ts) = + if c1 <= c andalso c <= c2 then SOME s + else find (c, ts) + in case yygetc strm + of SOME(c, strm') => + (case find (c, transitions) + of NONE => tryfinal() + | SOME n => + yygo actTable + (n, strm', + yyactsToMatches (strm, finals, oldMatches))) + | NONE => tryfinal() + end) + fun continue() = +let +fun yyAction0 (strm, lastMatch : yymatch) = (yystrm := strm; skip()) +fun yyAction1 (strm, lastMatch : yymatch) = (yystrm := strm; skip()) +fun yyAction2 (strm, lastMatch : yymatch) = (yystrm := strm; KW_program) +fun yyAction3 (strm, lastMatch : yymatch) = (yystrm := strm; LP) +fun yyAction4 (strm, lastMatch : yymatch) = (yystrm := strm; RP) +fun yyAction5 (strm, lastMatch : yymatch) = (yystrm := strm; SEMI) +fun yyAction6 (strm, lastMatch : yymatch) = (yystrm := strm; DOT) +fun yyAction7 (strm, lastMatch : yymatch) = (yystrm := strm; COMMA) +fun yyAction8 (strm, lastMatch : yymatch) = (yystrm := strm; KW_var) +fun yyAction9 (strm, lastMatch : yymatch) = (yystrm := strm; COLON) +fun yyAction10 (strm, lastMatch : yymatch) = (yystrm := strm; KW_array) +fun yyAction11 (strm, lastMatch : yymatch) = (yystrm := strm; LSB) +fun yyAction12 (strm, lastMatch : yymatch) = (yystrm := strm; RSB) +fun yyAction13 (strm, lastMatch : yymatch) = (yystrm := strm; KW_of) +fun yyAction14 (strm, lastMatch : yymatch) = (yystrm := strm; KW_integer) +fun yyAction15 (strm, lastMatch : yymatch) = (yystrm := strm; KW_real) +fun yyAction16 (strm, lastMatch : yymatch) = (yystrm := strm; KW_function) +fun yyAction17 (strm, lastMatch : yymatch) = (yystrm := strm; KW_procedure) +fun yyAction18 (strm, lastMatch : yymatch) = (yystrm := strm; KW_begin) +fun yyAction19 (strm, lastMatch : yymatch) = (yystrm := strm; KW_end) +fun yyAction20 (strm, lastMatch : yymatch) = (yystrm := strm; ASSIGNOP) +fun yyAction21 (strm, lastMatch : yymatch) = (yystrm := strm; KW_if) +fun yyAction22 (strm, lastMatch : yymatch) = (yystrm := strm; KW_then) +fun yyAction23 (strm, lastMatch : yymatch) = (yystrm := strm; KW_else) +fun yyAction24 (strm, lastMatch : yymatch) = (yystrm := strm; KW_while) +fun yyAction25 (strm, lastMatch : yymatch) = (yystrm := strm; KW_do) +fun yyAction26 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; RELOP yytext + end +fun yyAction27 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; ADDOP yytext + end +fun yyAction28 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; MULOP yytext + end +fun yyAction29 (strm, lastMatch : yymatch) = (yystrm := strm; KW_not) +fun yyAction30 (strm, lastMatch : yymatch) = (yystrm := strm; MINUS) +fun yyAction31 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; ID yytext + end +fun yyAction32 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; INT (valOf (IntInf.fromString yytext)) + end +fun yyAction33 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; + print (concat ["Unexpected character: '", yytext, + "'\n"]); continue() + end +fun yyQ36 (strm, lastMatch : yymatch) = yyAction1(strm, yyNO_MATCH) +fun yyQ35 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx7D + then yyQ36(strm', lastMatch) + else yyQ35(strm', lastMatch) + (* end case *)) +fun yyQ34 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction33(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx7D + then yyQ36(strm', yyMATCH(strm, yyAction33, yyNO_MATCH)) + else yyQ35(strm', yyMATCH(strm, yyAction33, yyNO_MATCH)) + (* end case *)) +fun yyQ37 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5B + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx5B + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx40 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx60 + then if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ41 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction24(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5B + then yyAction24(strm, yyNO_MATCH) + else if inp < 0wx5B + then if inp = 0wx3A + then yyAction24(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction24(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else if inp <= 0wx40 + then yyAction24(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else if inp = 0wx60 + then yyAction24(strm, yyNO_MATCH) + else if inp < 0wx60 + then if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else yyAction24(strm, yyNO_MATCH) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else yyAction24(strm, yyNO_MATCH) + (* end case *)) +fun yyQ40 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx65 + then yyQ41(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx65 + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ39 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx6C + then yyQ40(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx6C + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ38 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx69 + then yyQ39(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx69 + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ33 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx68 + then yyQ38(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx68 + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ43 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction8(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5B + then yyAction8(strm, yyNO_MATCH) + else if inp < 0wx5B + then if inp = 0wx3A + then yyAction8(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction8(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction8, yyNO_MATCH)) + else if inp <= 0wx40 + then yyAction8(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction8, yyNO_MATCH)) + else if inp = 0wx60 + then yyAction8(strm, yyNO_MATCH) + else if inp < 0wx60 + then if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction8, yyNO_MATCH)) + else yyAction8(strm, yyNO_MATCH) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction8, yyNO_MATCH)) + else yyAction8(strm, yyNO_MATCH) + (* end case *)) +fun yyQ42 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx72 + then yyQ43(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx72 + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ32 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx62 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx62 + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ42(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ46 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction22(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5B + then yyAction22(strm, yyNO_MATCH) + else if inp < 0wx5B + then if inp = 0wx3A + then yyAction22(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction22(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction22, yyNO_MATCH)) + else if inp <= 0wx40 + then yyAction22(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction22, yyNO_MATCH)) + else if inp = 0wx60 + then yyAction22(strm, yyNO_MATCH) + else if inp < 0wx60 + then if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction22, yyNO_MATCH)) + else yyAction22(strm, yyNO_MATCH) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction22, yyNO_MATCH)) + else yyAction22(strm, yyNO_MATCH) + (* end case *)) +fun yyQ45 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx6E + then yyQ46(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx6E + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ44 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx65 + then yyQ45(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx65 + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ31 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx68 + then yyQ44(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx68 + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ49 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction15(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5B + then yyAction15(strm, yyNO_MATCH) + else if inp < 0wx5B + then if inp = 0wx3A + then yyAction15(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction15(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction15, yyNO_MATCH)) + else if inp <= 0wx40 + then yyAction15(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction15, yyNO_MATCH)) + else if inp = 0wx60 + then yyAction15(strm, yyNO_MATCH) + else if inp < 0wx60 + then if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction15, yyNO_MATCH)) + else yyAction15(strm, yyNO_MATCH) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction15, yyNO_MATCH)) + else yyAction15(strm, yyNO_MATCH) + (* end case *)) +fun yyQ48 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx6C + then yyQ49(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx6C + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ47 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx62 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx62 + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ48(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ30 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx65 + then yyQ47(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx65 + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ56 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction2(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5B + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx5B + then if inp = 0wx3A + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction2(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp <= 0wx40 + then yyAction2(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx60 + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx60 + then if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + (* end case *)) +fun yyQ55 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx6D + then yyQ56(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx6D + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ54 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx62 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx62 + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ55(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ53 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx72 + then yyQ54(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx72 + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ61 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction17(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5B + then yyAction17(strm, yyNO_MATCH) + else if inp < 0wx5B + then if inp = 0wx3A + then yyAction17(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction17(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp <= 0wx40 + then yyAction17(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp = 0wx60 + then yyAction17(strm, yyNO_MATCH) + else if inp < 0wx60 + then if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyAction17(strm, yyNO_MATCH) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyAction17(strm, yyNO_MATCH) + (* end case *)) +fun yyQ60 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx65 + then yyQ61(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx65 + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ59 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx72 + then yyQ60(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx72 + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ58 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx75 + then yyQ59(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx75 + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ57 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx64 + then yyQ58(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx64 + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ52 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx65 + then yyQ57(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx65 + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ51 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx60 + then if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then if inp = 0wx30 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx30 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx39 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx5B + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx5B + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx67 + then yyQ53(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx67 + then if inp = 0wx63 + then yyQ52(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ50 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx6F + then yyQ51(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx6F + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ29 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx72 + then yyQ50(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx72 + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ63 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction27(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5B + then yyAction27(strm, yyNO_MATCH) + else if inp < 0wx5B + then if inp = 0wx3A + then yyAction27(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction27(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction27, yyNO_MATCH)) + else if inp <= 0wx40 + then yyAction27(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction27, yyNO_MATCH)) + else if inp = 0wx60 + then yyAction27(strm, yyNO_MATCH) + else if inp < 0wx60 + then if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction27, yyNO_MATCH)) + else yyAction27(strm, yyNO_MATCH) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction27, yyNO_MATCH)) + else yyAction27(strm, yyNO_MATCH) + (* end case *)) +fun yyQ62 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction13(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5B + then yyAction13(strm, yyNO_MATCH) + else if inp < 0wx5B + then if inp = 0wx3A + then yyAction13(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction13(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction13, yyNO_MATCH)) + else if inp <= 0wx40 + then yyAction13(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction13, yyNO_MATCH)) + else if inp = 0wx60 + then yyAction13(strm, yyNO_MATCH) + else if inp < 0wx60 + then if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction13, yyNO_MATCH)) + else yyAction13(strm, yyNO_MATCH) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction13, yyNO_MATCH)) + else yyAction13(strm, yyNO_MATCH) + (* end case *)) +fun yyQ28 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx60 + then if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then if inp = 0wx30 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx30 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx39 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx5B + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx5B + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx72 + then yyQ63(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx72 + then if inp = 0wx66 + then yyQ62(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ65 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction29(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5B + then yyAction29(strm, yyNO_MATCH) + else if inp < 0wx5B + then if inp = 0wx3A + then yyAction29(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction29(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction29, yyNO_MATCH)) + else if inp <= 0wx40 + then yyAction29(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction29, yyNO_MATCH)) + else if inp = 0wx60 + then yyAction29(strm, yyNO_MATCH) + else if inp < 0wx60 + then if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction29, yyNO_MATCH)) + else yyAction29(strm, yyNO_MATCH) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction29, yyNO_MATCH)) + else yyAction29(strm, yyNO_MATCH) + (* end case *)) +fun yyQ64 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx74 + then yyQ65(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx74 + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ27 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx6F + then yyQ64(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx6F + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ67 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction28(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5B + then yyAction28(strm, yyNO_MATCH) + else if inp < 0wx5B + then if inp = 0wx3A + then yyAction28(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction28(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction28, yyNO_MATCH)) + else if inp <= 0wx40 + then yyAction28(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction28, yyNO_MATCH)) + else if inp = 0wx60 + then yyAction28(strm, yyNO_MATCH) + else if inp < 0wx60 + then if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction28, yyNO_MATCH)) + else yyAction28(strm, yyNO_MATCH) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction28, yyNO_MATCH)) + else yyAction28(strm, yyNO_MATCH) + (* end case *)) +fun yyQ66 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx64 + then yyQ67(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx64 + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ26 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx6F + then yyQ66(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx6F + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ74 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction14(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5B + then yyAction14(strm, yyNO_MATCH) + else if inp < 0wx5B + then if inp = 0wx3A + then yyAction14(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction14(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction14, yyNO_MATCH)) + else if inp <= 0wx40 + then yyAction14(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction14, yyNO_MATCH)) + else if inp = 0wx60 + then yyAction14(strm, yyNO_MATCH) + else if inp < 0wx60 + then if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction14, yyNO_MATCH)) + else yyAction14(strm, yyNO_MATCH) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction14, yyNO_MATCH)) + else yyAction14(strm, yyNO_MATCH) + (* end case *)) +fun yyQ73 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx72 + then yyQ74(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx72 + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ72 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx65 + then yyQ73(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx65 + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ71 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx67 + then yyQ72(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx67 + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ70 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx65 + then yyQ71(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx65 + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ69 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx74 + then yyQ70(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx74 + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ68 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction21(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5B + then yyAction21(strm, yyNO_MATCH) + else if inp < 0wx5B + then if inp = 0wx3A + then yyAction21(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction21(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction21, yyNO_MATCH)) + else if inp <= 0wx40 + then yyAction21(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction21, yyNO_MATCH)) + else if inp = 0wx60 + then yyAction21(strm, yyNO_MATCH) + else if inp < 0wx60 + then if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction21, yyNO_MATCH)) + else yyAction21(strm, yyNO_MATCH) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction21, yyNO_MATCH)) + else yyAction21(strm, yyNO_MATCH) + (* end case *)) +fun yyQ25 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx60 + then if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then if inp = 0wx30 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx30 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx39 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx5B + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx5B + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx6E + then yyQ69(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx6E + then if inp = 0wx66 + then yyQ68(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ81 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction16(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5B + then yyAction16(strm, yyNO_MATCH) + else if inp < 0wx5B + then if inp = 0wx3A + then yyAction16(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction16(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction16, yyNO_MATCH)) + else if inp <= 0wx40 + then yyAction16(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction16, yyNO_MATCH)) + else if inp = 0wx60 + then yyAction16(strm, yyNO_MATCH) + else if inp < 0wx60 + then if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction16, yyNO_MATCH)) + else yyAction16(strm, yyNO_MATCH) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction16, yyNO_MATCH)) + else yyAction16(strm, yyNO_MATCH) + (* end case *)) +fun yyQ80 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx6E + then yyQ81(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx6E + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ79 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx6F + then yyQ80(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx6F + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ78 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx69 + then yyQ79(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx69 + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ77 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx74 + then yyQ78(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx74 + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ76 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx63 + then yyQ77(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx63 + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ75 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx6E + then yyQ76(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx6E + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ24 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx75 + then yyQ75(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx75 + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ84 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction19(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5B + then yyAction19(strm, yyNO_MATCH) + else if inp < 0wx5B + then if inp = 0wx3A + then yyAction19(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction19(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp <= 0wx40 + then yyAction19(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp = 0wx60 + then yyAction19(strm, yyNO_MATCH) + else if inp < 0wx60 + then if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyAction19(strm, yyNO_MATCH) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyAction19(strm, yyNO_MATCH) + (* end case *)) +fun yyQ83 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx64 + then yyQ84(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx64 + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ86 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction23(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5B + then yyAction23(strm, yyNO_MATCH) + else if inp < 0wx5B + then if inp = 0wx3A + then yyAction23(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction23(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction23, yyNO_MATCH)) + else if inp <= 0wx40 + then yyAction23(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction23, yyNO_MATCH)) + else if inp = 0wx60 + then yyAction23(strm, yyNO_MATCH) + else if inp < 0wx60 + then if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction23, yyNO_MATCH)) + else yyAction23(strm, yyNO_MATCH) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction23, yyNO_MATCH)) + else yyAction23(strm, yyNO_MATCH) + (* end case *)) +fun yyQ85 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx65 + then yyQ86(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx65 + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ82 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx73 + then yyQ85(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx73 + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ23 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx60 + then if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then if inp = 0wx30 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx30 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx39 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx5B + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx5B + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx6E + then yyQ83(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx6E + then if inp = 0wx6C + then yyQ82(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ88 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction25(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5B + then yyAction25(strm, yyNO_MATCH) + else if inp < 0wx5B + then if inp = 0wx3A + then yyAction25(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction25(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction25, yyNO_MATCH)) + else if inp <= 0wx40 + then yyAction25(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction25, yyNO_MATCH)) + else if inp = 0wx60 + then yyAction25(strm, yyNO_MATCH) + else if inp < 0wx60 + then if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction25, yyNO_MATCH)) + else yyAction25(strm, yyNO_MATCH) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction25, yyNO_MATCH)) + else yyAction25(strm, yyNO_MATCH) + (* end case *)) +fun yyQ87 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx76 + then yyQ67(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx76 + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ22 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx60 + then if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then if inp = 0wx30 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx30 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx39 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx5B + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx5B + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx6F + then yyQ88(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx6F + then if inp = 0wx69 + then yyQ87(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ92 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction18(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5B + then yyAction18(strm, yyNO_MATCH) + else if inp < 0wx5B + then if inp = 0wx3A + then yyAction18(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction18(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction18, yyNO_MATCH)) + else if inp <= 0wx40 + then yyAction18(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction18, yyNO_MATCH)) + else if inp = 0wx60 + then yyAction18(strm, yyNO_MATCH) + else if inp < 0wx60 + then if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction18, yyNO_MATCH)) + else yyAction18(strm, yyNO_MATCH) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction18, yyNO_MATCH)) + else yyAction18(strm, yyNO_MATCH) + (* end case *)) +fun yyQ91 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx6E + then yyQ92(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx6E + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ90 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx69 + then yyQ91(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx69 + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ89 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx67 + then yyQ90(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx67 + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ21 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx65 + then yyQ89(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx65 + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ96 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction10(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5B + then yyAction10(strm, yyNO_MATCH) + else if inp < 0wx5B + then if inp = 0wx3A + then yyAction10(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction10(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction10, yyNO_MATCH)) + else if inp <= 0wx40 + then yyAction10(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction10, yyNO_MATCH)) + else if inp = 0wx60 + then yyAction10(strm, yyNO_MATCH) + else if inp < 0wx60 + then if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction10, yyNO_MATCH)) + else yyAction10(strm, yyNO_MATCH) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction10, yyNO_MATCH)) + else yyAction10(strm, yyNO_MATCH) + (* end case *)) +fun yyQ95 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx79 + then yyQ96(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx79 + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ94 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx62 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx62 + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ95(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ93 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx72 + then yyQ94(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx72 + then if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ20 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx60 + then if inp = 0wx41 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx41 + then if inp = 0wx30 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx30 + then yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx39 + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx5B + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx5B + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp = 0wx72 + then yyQ93(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx72 + then if inp = 0wx6E + then yyQ66(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ19 (strm, lastMatch : yymatch) = yyAction12(strm, yyNO_MATCH) +fun yyQ18 (strm, lastMatch : yymatch) = yyAction11(strm, yyNO_MATCH) +fun yyQ17 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5B + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx5B + then if inp = 0wx3A + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp <= 0wx2F + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wx40 + then yyAction31(strm, yyNO_MATCH) + else yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx60 + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wx60 + then if inp = 0wx5F + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + else if inp <= 0wx7A + then yyQ37(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ97 (strm, lastMatch : yymatch) = yyAction26(strm, yyNO_MATCH) +fun yyQ16 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction26(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx3D + then yyQ97(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else yyAction26(strm, yyNO_MATCH) + (* end case *)) +fun yyQ15 (strm, lastMatch : yymatch) = yyAction26(strm, yyNO_MATCH) +fun yyQ14 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction26(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx3D + then yyQ97(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else if inp < 0wx3D + then yyAction26(strm, yyNO_MATCH) + else if inp <= 0wx3E + then yyQ97(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else yyAction26(strm, yyNO_MATCH) + (* end case *)) +fun yyQ13 (strm, lastMatch : yymatch) = yyAction5(strm, yyNO_MATCH) +fun yyQ98 (strm, lastMatch : yymatch) = yyAction20(strm, yyNO_MATCH) +fun yyQ12 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction9(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx3D + then yyQ98(strm', yyMATCH(strm, yyAction9, yyNO_MATCH)) + else yyAction9(strm, yyNO_MATCH) + (* end case *)) +fun yyQ99 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction32(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx30 + then yyQ99(strm', yyMATCH(strm, yyAction32, yyNO_MATCH)) + else if inp < 0wx30 + then yyAction32(strm, yyNO_MATCH) + else if inp <= 0wx39 + then yyQ99(strm', yyMATCH(strm, yyAction32, yyNO_MATCH)) + else yyAction32(strm, yyNO_MATCH) + (* end case *)) +fun yyQ11 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction32(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx30 + then yyQ99(strm', yyMATCH(strm, yyAction32, yyNO_MATCH)) + else if inp < 0wx30 + then yyAction32(strm, yyNO_MATCH) + else if inp <= 0wx39 + then yyQ99(strm', yyMATCH(strm, yyAction32, yyNO_MATCH)) + else yyAction32(strm, yyNO_MATCH) + (* end case *)) +fun yyQ10 (strm, lastMatch : yymatch) = yyAction6(strm, yyNO_MATCH) +fun yyQ9 (strm, lastMatch : yymatch) = yyAction27(strm, yyNO_MATCH) +fun yyQ8 (strm, lastMatch : yymatch) = yyAction7(strm, yyNO_MATCH) +fun yyQ7 (strm, lastMatch : yymatch) = yyAction27(strm, yyNO_MATCH) +fun yyQ6 (strm, lastMatch : yymatch) = yyAction28(strm, yyNO_MATCH) +fun yyQ5 (strm, lastMatch : yymatch) = yyAction4(strm, yyNO_MATCH) +fun yyQ4 (strm, lastMatch : yymatch) = yyAction3(strm, yyNO_MATCH) +fun yyQ101 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction0(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wxE + then yyAction0(strm, yyNO_MATCH) + else if inp < 0wxE + then if inp = 0wx9 + then yyQ100(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp < 0wx9 + then yyAction0(strm, yyNO_MATCH) + else if inp = 0wxD + then yyQ101(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else yyQ100(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp = 0wx20 + then yyQ100(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else yyAction0(strm, yyNO_MATCH) + (* end case *)) +and yyQ100 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction0(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wxE + then yyAction0(strm, yyNO_MATCH) + else if inp < 0wxE + then if inp = 0wx9 + then yyQ100(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp < 0wx9 + then yyAction0(strm, yyNO_MATCH) + else if inp = 0wxD + then yyQ101(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else yyQ100(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp = 0wx20 + then yyQ100(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else yyAction0(strm, yyNO_MATCH) + (* end case *)) +fun yyQ3 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction0(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wxE + then yyAction0(strm, yyNO_MATCH) + else if inp < 0wxE + then if inp = 0wx9 + then yyQ100(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp < 0wx9 + then yyAction0(strm, yyNO_MATCH) + else if inp = 0wxD + then yyQ101(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else yyQ100(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp = 0wx20 + then yyQ100(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else yyAction0(strm, yyNO_MATCH) + (* end case *)) +fun yyQ2 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction0(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wxE + then yyAction0(strm, yyNO_MATCH) + else if inp < 0wxE + then if inp = 0wx9 + then yyQ100(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp < 0wx9 + then yyAction0(strm, yyNO_MATCH) + else if inp = 0wxD + then yyQ101(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else yyQ100(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp = 0wx20 + then yyQ100(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else yyAction0(strm, yyNO_MATCH) + (* end case *)) +fun yyQ1 (strm, lastMatch : yymatch) = yyAction33(strm, yyNO_MATCH) +fun yyQ0 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if ULexBuffer.eof(strm) + then UserDeclarations.eof(()) + else yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx5D + then yyQ19(strm', lastMatch) + else if inp < 0wx5D + then if inp = 0wx2E + then yyQ10(strm', lastMatch) + else if inp < 0wx2E + then if inp = 0wx28 + then yyQ4(strm', lastMatch) + else if inp < 0wx28 + then if inp = 0wxE + then yyQ1(strm', lastMatch) + else if inp < 0wxE + then if inp = 0wx9 + then yyQ2(strm', lastMatch) + else if inp < 0wx9 + then yyQ1(strm', lastMatch) + else if inp = 0wxD + then yyQ3(strm', lastMatch) + else yyQ2(strm', lastMatch) + else if inp = 0wx20 + then yyQ2(strm', lastMatch) + else yyQ1(strm', lastMatch) + else if inp = 0wx2B + then yyQ7(strm', lastMatch) + else if inp < 0wx2B + then if inp = 0wx29 + then yyQ5(strm', lastMatch) + else yyQ6(strm', lastMatch) + else if inp = 0wx2C + then yyQ8(strm', lastMatch) + else yyQ9(strm', lastMatch) + else if inp = 0wx3D + then yyQ15(strm', lastMatch) + else if inp < 0wx3D + then if inp = 0wx3A + then yyQ12(strm', lastMatch) + else if inp < 0wx3A + then if inp = 0wx2F + then yyQ6(strm', lastMatch) + else yyQ11(strm', lastMatch) + else if inp = 0wx3B + then yyQ13(strm', lastMatch) + else yyQ14(strm', lastMatch) + else if inp = 0wx41 + then yyQ17(strm', lastMatch) + else if inp < 0wx41 + then if inp = 0wx3E + then yyQ16(strm', lastMatch) + else yyQ1(strm', lastMatch) + else if inp = 0wx5B + then yyQ18(strm', lastMatch) + else if inp = 0wx5C + then yyQ1(strm', lastMatch) + else yyQ17(strm', lastMatch) + else if inp = 0wx6F + then yyQ28(strm', lastMatch) + else if inp < 0wx6F + then if inp = 0wx66 + then yyQ24(strm', lastMatch) + else if inp < 0wx66 + then if inp = 0wx63 + then yyQ17(strm', lastMatch) + else if inp < 0wx63 + then if inp = 0wx61 + then yyQ20(strm', lastMatch) + else if inp = 0wx62 + then yyQ21(strm', lastMatch) + else yyQ1(strm', lastMatch) + else if inp = 0wx64 + then yyQ22(strm', lastMatch) + else yyQ23(strm', lastMatch) + else if inp = 0wx6A + then yyQ17(strm', lastMatch) + else if inp < 0wx6A + then if inp = 0wx69 + then yyQ25(strm', lastMatch) + else yyQ17(strm', lastMatch) + else if inp = 0wx6D + then yyQ26(strm', lastMatch) + else if inp = 0wx6E + then yyQ27(strm', lastMatch) + else yyQ17(strm', lastMatch) + else if inp = 0wx75 + then yyQ17(strm', lastMatch) + else if inp < 0wx75 + then if inp = 0wx72 + then yyQ30(strm', lastMatch) + else if inp < 0wx72 + then if inp = 0wx70 + then yyQ29(strm', lastMatch) + else yyQ17(strm', lastMatch) + else if inp = 0wx73 + then yyQ17(strm', lastMatch) + else yyQ31(strm', lastMatch) + else if inp = 0wx78 + then yyQ17(strm', lastMatch) + else if inp < 0wx78 + then if inp = 0wx76 + then yyQ32(strm', lastMatch) + else yyQ33(strm', lastMatch) + else if inp = 0wx7B + then yyQ34(strm', lastMatch) + else if inp <= 0wx7A + then yyQ17(strm', lastMatch) + else yyQ1(strm', lastMatch) + (* end case *)) +in + (case (!(yyss)) + of INITIAL => yyQ0(!(yystrm), yyNO_MATCH) + (* end case *)) +end + and skip() = (yystartPos := yygetPos(); continue()) + in (continue(), (!yystartPos, yygetPos()), !yystrm, !yyss) end + in + lex() + end + in + type pos = AntlrStreamPos.pos + type span = AntlrStreamPos.span + type tok = UserDeclarations.lex_result + + datatype prestrm = STRM of ULexBuffer.stream * + (yystart_state * tok * span * prestrm * yystart_state) option ref + type strm = (prestrm * yystart_state) + + fun lex sm (STRM (yystrm, memo), ss) = (case !memo + of NONE => let + val (tok, span, yystrm', ss') = innerLex (yystrm, ss, sm) + val strm' = STRM (yystrm', ref NONE); + in + memo := SOME (ss, tok, span, strm', ss'); + (tok, span, (strm', ss')) + end + | SOME (ss', tok, span, strm', ss'') => + if ss = ss' then + (tok, span, (strm', ss'')) + else ( + memo := NONE; + lex sm (STRM (yystrm, memo), ss)) + (* end case *)) + + fun streamify input = (STRM (ULexBuffer.mkStream input, ref NONE), + INITIAL) + + fun streamifyReader readFn strm = let + val s = ref strm + fun iter(strm, n, accum) = + if n > 1024 then (String.implode (rev accum), strm) + else (case readFn strm + of NONE => (String.implode (rev accum), strm) + | SOME(c, strm') => iter (strm', n+1, c::accum)) + fun input() = let + val (data, strm) = iter(!s, 0, []) + in + s := strm; + data + end + in + streamify input + end + + fun streamifyInstream strm = streamify (fn ()=>TextIO.input strm) + + fun getPos (STRM (strm, _), _) = ULexBuffer.getpos strm + + end +end diff --git a/ml-lpt/ml-antlr/examples/dragon/dump.dot b/ml-lpt/ml-antlr/examples/dragon/dump.dot new file mode 100644 index 0000000..92dedee --- /dev/null +++ b/ml-lpt/ml-antlr/examples/dragon/dump.dot @@ -0,0 +1,635 @@ +digraph dump { + graph + [ rankdir = "LR" ] + 1 + [ label = "program" ] + [ shape = "box" ] + 2 + [ label = "id_list" ] + [ shape = "box" ] + 3 + [ label = "declaration" ] + [ shape = "box" ] + 4 + [ label = "program_SR1" ] + [ shape = "box" ] + 5 + [ label = "subprogram_declaration" ] + [ shape = "box" ] + 6 + [ label = "program_SR2" ] + [ shape = "box" ] + 7 + [ label = "compound_statement" ] + [ shape = "box" ] + 8 + [ label = "id_list_SR1" ] + [ shape = "box" ] + 9 + [ label = "id_list_type" ] + [ shape = "box" ] + 10 + [ label = "compound_type" ] + [ shape = "box" ] + 11 + [ label = "standard_type" ] + [ shape = "box" ] + 12 + [ label = "subprogram_head" ] + [ shape = "box" ] + 13 + [ label = "subprogram_declaration_SR1" ] + [ shape = "box" ] + 14 + [ label = "arguments" ] + [ shape = "box" ] + 15 + [ label = "parameter_list" ] + [ shape = "box" ] + 16 + [ label = "parameter_list_SR1" ] + [ shape = "box" ] + 17 + [ label = "statement" ] + [ shape = "box" ] + 18 + [ label = "compound_statement_SR1_SR1" ] + [ shape = "box" ] + 19 + [ label = "compound_statement_SR1" ] + [ shape = "box" ] + 20 + [ label = "variable" ] + [ shape = "box" ] + 21 + [ label = "exp" ] + [ shape = "box" ] + 22 + [ label = "procedure_statement" ] + [ shape = "box" ] + 23 + [ label = "procedure_statement_SR1" ] + [ shape = "box" ] + 24 + [ label = "simple_exp" ] + [ shape = "box" ] + 25 + [ label = "exp_SR1" ] + [ shape = "box" ] + 26 + [ label = "signed_term" ] + [ shape = "box" ] + 27 + [ label = "simple_exp_SR1" ] + [ shape = "box" ] + 28 + [ label = "term" ] + [ shape = "box" ] + 29 + [ label = "factor" ] + [ shape = "box" ] + 30 + [ label = "term_SR1" ] + [ shape = "box" ] + 31 + [ label = "factor_SR1" ] + [ shape = "box" ] + 62 + [ shape = "doublecircle" ] + 61 + [ shape = "doublecircle" ] + 60 + [ shape = "doublecircle" ] + 59 + [ shape = "doublecircle" ] + 58 + [ shape = "doublecircle" ] + 57 + [ shape = "doublecircle" ] + 56 + [ shape = "doublecircle" ] + 55 + [ shape = "doublecircle" ] + 54 + [ shape = "doublecircle" ] + 53 + [ shape = "doublecircle" ] + 52 + [ shape = "doublecircle" ] + 51 + [ shape = "doublecircle" ] + 50 + [ shape = "doublecircle" ] + 49 + [ shape = "doublecircle" ] + 48 + [ shape = "doublecircle" ] + 47 + [ shape = "doublecircle" ] + 46 + [ shape = "doublecircle" ] + 45 + [ shape = "doublecircle" ] + 44 + [ shape = "doublecircle" ] + 43 + [ shape = "doublecircle" ] + 42 + [ shape = "doublecircle" ] + 41 + [ shape = "doublecircle" ] + 40 + [ shape = "doublecircle" ] + 39 + [ shape = "doublecircle" ] + 38 + [ shape = "doublecircle" ] + 37 + [ shape = "doublecircle" ] + 36 + [ shape = "doublecircle" ] + 35 + [ shape = "doublecircle" ] + 34 + [ shape = "doublecircle" ] + 33 + [ shape = "doublecircle" ] + 32 + [ shape = "doublecircle" ] + 1 -> 67 + [ label = "" ] + 67 -> 68 + [ label = "KW_program" ] + 68 -> 69 + [ label = "ID" ] + 69 -> 70 + [ label = "LP" ] + 70 -> 71 + [ label = "id_list" ] + 71 -> 72 + [ label = "RP" ] + 72 -> 73 + [ label = "SEMI" ] + 73 -> 75 + [ label = "program_SR1" ] + 75 -> 73 + [ label = "" ] + 73 -> 74 + [ label = "" ] + 74 -> 77 + [ label = "program_SR2" ] + 77 -> 74 + [ label = "" ] + 74 -> 76 + [ label = "" ] + 76 -> 78 + [ label = "compound_statement" ] + 78 -> 79 + [ label = "DOT" ] + 79 -> 80 + [ label = "EOF" ] + 80 -> 32 + [ label = "" ] + 2 -> 84 + [ label = "" ] + 84 -> 85 + [ label = "ID" ] + 85 -> 87 + [ label = "id_list_SR1" ] + 87 -> 85 + [ label = "" ] + 85 -> 86 + [ label = "" ] + 86 -> 33 + [ label = "" ] + 3 -> 88 + [ label = "" ] + 88 -> 89 + [ label = "KW_var" ] + 89 -> 90 + [ label = "id_list_type" ] + 90 -> 91 + [ label = "SEMI" ] + 91 -> 34 + [ label = "" ] + 4 -> 63 + [ label = "" ] + 63 -> 64 + [ label = "declaration" ] + 64 -> 35 + [ label = "" ] + 5 -> 114 + [ label = "" ] + 114 -> 115 + [ label = "subprogram_head" ] + 115 -> 117 + [ label = "subprogram_declaration_SR1" ] + 117 -> 115 + [ label = "" ] + 115 -> 116 + [ label = "" ] + 116 -> 118 + [ label = "compound_statement" ] + 118 -> 119 + [ label = "SEMI" ] + 119 -> 36 + [ label = "" ] + 6 -> 65 + [ label = "" ] + 65 -> 66 + [ label = "subprogram_declaration" ] + 66 -> 37 + [ label = "" ] + 7 -> 151 + [ label = "" ] + 151 -> 152 + [ label = "KW_begin" ] + 152 -> 153 + [ label = "compound_statement_SR1" ] + 153 -> 154 + [ label = "KW_end" ] + 154 -> 38 + [ label = "" ] + 152 -> 153 + [ label = "" ] + 8 -> 81 + [ label = "" ] + 81 -> 82 + [ label = "COMMA" ] + 82 -> 83 + [ label = "ID" ] + 83 -> 39 + [ label = "" ] + 9 -> 108 + [ label = "" ] + 108 -> 109 + [ label = "id_list" ] + 109 -> 110 + [ label = "COLON" ] + 110 -> 111 + [ label = "compound_type" ] + 111 -> 40 + [ label = "" ] + 10 -> 94 + [ label = "" ] + 94 -> 95 + [ label = "KW_array" ] + 95 -> 96 + [ label = "LSB" ] + 96 -> 97 + [ label = "INT" ] + 97 -> 98 + [ label = "DOT" ] + 98 -> 99 + [ label = "DOT" ] + 99 -> 100 + [ label = "INT" ] + 100 -> 101 + [ label = "RSB" ] + 101 -> 102 + [ label = "KW_of" ] + 102 -> 103 + [ label = "standard_type" ] + 103 -> 41 + [ label = "" ] + 10 -> 92 + [ label = "" ] + 92 -> 93 + [ label = "standard_type" ] + 93 -> 41 + [ label = "" ] + 11 -> 106 + [ label = "" ] + 106 -> 107 + [ label = "KW_real" ] + 107 -> 42 + [ label = "" ] + 11 -> 104 + [ label = "" ] + 104 -> 105 + [ label = "KW_integer" ] + 105 -> 42 + [ label = "" ] + 12 -> 127 + [ label = "" ] + 127 -> 128 + [ label = "KW_procedure" ] + 128 -> 129 + [ label = "ID" ] + 129 -> 130 + [ label = "arguments" ] + 130 -> 131 + [ label = "SEMI" ] + 131 -> 43 + [ label = "" ] + 12 -> 120 + [ label = "" ] + 120 -> 121 + [ label = "KW_function" ] + 121 -> 122 + [ label = "ID" ] + 122 -> 123 + [ label = "arguments" ] + 123 -> 124 + [ label = "COLON" ] + 124 -> 125 + [ label = "standard_type" ] + 125 -> 126 + [ label = "SEMI" ] + 126 -> 43 + [ label = "" ] + 13 -> 112 + [ label = "" ] + 112 -> 113 + [ label = "declaration" ] + 113 -> 44 + [ label = "" ] + 14 -> 136 + [ label = "" ] + 136 -> 45 + [ label = "" ] + 14 -> 132 + [ label = "" ] + 132 -> 133 + [ label = "LP" ] + 133 -> 134 + [ label = "parameter_list" ] + 134 -> 135 + [ label = "RP" ] + 135 -> 45 + [ label = "" ] + 15 -> 140 + [ label = "" ] + 140 -> 141 + [ label = "id_list_type" ] + 141 -> 143 + [ label = "parameter_list_SR1" ] + 143 -> 141 + [ label = "" ] + 141 -> 142 + [ label = "" ] + 142 -> 46 + [ label = "" ] + 16 -> 137 + [ label = "" ] + 137 -> 138 + [ label = "SEMI" ] + 138 -> 139 + [ label = "id_list_type" ] + 139 -> 47 + [ label = "" ] + 17 -> 170 + [ label = "" ] + 170 -> 171 + [ label = "KW_while" ] + 171 -> 172 + [ label = "exp" ] + 172 -> 173 + [ label = "KW_do" ] + 173 -> 174 + [ label = "statement" ] + 174 -> 48 + [ label = "" ] + 17 -> 163 + [ label = "" ] + 163 -> 164 + [ label = "KW_if" ] + 164 -> 165 + [ label = "exp" ] + 165 -> 166 + [ label = "KW_then" ] + 166 -> 167 + [ label = "statement" ] + 167 -> 168 + [ label = "KW_else" ] + 168 -> 169 + [ label = "statement" ] + 169 -> 48 + [ label = "" ] + 17 -> 161 + [ label = "" ] + 161 -> 162 + [ label = "compound_statement" ] + 162 -> 48 + [ label = "" ] + 17 -> 159 + [ label = "" ] + 159 -> 160 + [ label = "procedure_statement" ] + 160 -> 48 + [ label = "" ] + 17 -> 155 + [ label = "" ] + 155 -> 156 + [ label = "variable" ] + 156 -> 157 + [ label = "ASSIGNOP" ] + 157 -> 158 + [ label = "exp" ] + 158 -> 48 + [ label = "" ] + 18 -> 144 + [ label = "" ] + 144 -> 145 + [ label = "SEMI" ] + 145 -> 146 + [ label = "statement" ] + 146 -> 49 + [ label = "" ] + 19 -> 147 + [ label = "" ] + 147 -> 148 + [ label = "statement" ] + 148 -> 150 + [ label = "compound_statement_SR1_SR1" ] + 150 -> 148 + [ label = "" ] + 148 -> 149 + [ label = "" ] + 149 -> 50 + [ label = "" ] + 20 -> 177 + [ label = "" ] + 177 -> 178 + [ label = "ID" ] + 178 -> 179 + [ label = "LSB" ] + 179 -> 180 + [ label = "exp" ] + 180 -> 181 + [ label = "RSB" ] + 181 -> 51 + [ label = "" ] + 20 -> 175 + [ label = "" ] + 175 -> 176 + [ label = "ID" ] + 176 -> 51 + [ label = "" ] + 21 -> 197 + [ label = "" ] + 197 -> 198 + [ label = "simple_exp" ] + 198 -> 199 + [ label = "exp_SR1" ] + 199 -> 52 + [ label = "" ] + 198 -> 199 + [ label = "" ] + 22 -> 187 + [ label = "" ] + 187 -> 188 + [ label = "ID" ] + 188 -> 189 + [ label = "LP" ] + 189 -> 190 + [ label = "exp" ] + 190 -> 192 + [ label = "procedure_statement_SR1" ] + 192 -> 190 + [ label = "" ] + 190 -> 191 + [ label = "" ] + 191 -> 193 + [ label = "RP" ] + 193 -> 53 + [ label = "" ] + 22 -> 182 + [ label = "" ] + 182 -> 183 + [ label = "ID" ] + 183 -> 53 + [ label = "" ] + 23 -> 184 + [ label = "" ] + 184 -> 185 + [ label = "COMMA" ] + 185 -> 186 + [ label = "exp" ] + 186 -> 54 + [ label = "" ] + 24 -> 203 + [ label = "" ] + 203 -> 204 + [ label = "signed_term" ] + 204 -> 206 + [ label = "simple_exp_SR1" ] + 206 -> 204 + [ label = "" ] + 204 -> 205 + [ label = "" ] + 205 -> 55 + [ label = "" ] + 25 -> 194 + [ label = "" ] + 194 -> 195 + [ label = "RELOP" ] + 195 -> 196 + [ label = "simple_exp" ] + 196 -> 56 + [ label = "" ] + 26 -> 210 + [ label = "" ] + 210 -> 211 + [ label = "term" ] + 211 -> 57 + [ label = "" ] + 26 -> 207 + [ label = "" ] + 207 -> 208 + [ label = "MINUS" ] + 208 -> 209 + [ label = "term" ] + 209 -> 57 + [ label = "" ] + 27 -> 200 + [ label = "" ] + 200 -> 201 + [ label = "ADDOP" ] + 201 -> 202 + [ label = "signed_term" ] + 202 -> 58 + [ label = "" ] + 28 -> 215 + [ label = "" ] + 215 -> 216 + [ label = "factor" ] + 216 -> 218 + [ label = "term_SR1" ] + 218 -> 216 + [ label = "" ] + 216 -> 217 + [ label = "" ] + 217 -> 59 + [ label = "" ] + 29 -> 239 + [ label = "" ] + 239 -> 240 + [ label = "KW_not" ] + 240 -> 241 + [ label = "factor" ] + 241 -> 60 + [ label = "" ] + 29 -> 235 + [ label = "" ] + 235 -> 236 + [ label = "LP" ] + 236 -> 237 + [ label = "exp" ] + 237 -> 238 + [ label = "RP" ] + 238 -> 60 + [ label = "" ] + 29 -> 233 + [ label = "" ] + 233 -> 234 + [ label = "REAL" ] + 234 -> 60 + [ label = "" ] + 29 -> 231 + [ label = "" ] + 231 -> 232 + [ label = "INT" ] + 232 -> 60 + [ label = "" ] + 29 -> 224 + [ label = "" ] + 224 -> 225 + [ label = "ID" ] + 225 -> 226 + [ label = "LP" ] + 226 -> 227 + [ label = "exp" ] + 227 -> 229 + [ label = "factor_SR1" ] + 229 -> 227 + [ label = "" ] + 227 -> 228 + [ label = "" ] + 228 -> 230 + [ label = "RP" ] + 230 -> 60 + [ label = "" ] + 29 -> 219 + [ label = "" ] + 219 -> 220 + [ label = "variable" ] + 220 -> 60 + [ label = "" ] + 30 -> 212 + [ label = "" ] + 212 -> 213 + [ label = "MULOP" ] + 213 -> 214 + [ label = "factor" ] + 214 -> 61 + [ label = "" ] + 31 -> 221 + [ label = "" ] + 221 -> 222 + [ label = "COMMA" ] + 222 -> 223 + [ label = "exp" ] + 223 -> 62 + [ label = "" ] +} \ No newline at end of file diff --git a/ml-lpt/ml-antlr/examples/dragon/example.pas b/ml-lpt/ml-antlr/examples/dragon/example.pas new file mode 100644 index 0000000..c47873d --- /dev/null +++ b/ml-lpt/ml-antlr/examples/dragon/example.pas @@ -0,0 +1,12 @@ +program example(input, output); +var x, y : integer; +function gcd(a, b : integer): integer; +begin + if b = 0 then gcd := a + else gcd := gcd(b, a mod b) +end; { gcd } + +begin + read(x, y); + write(gcd(x, y)) +end. diff --git a/ml-lpt/ml-antlr/examples/dragon/main.sml b/ml-lpt/ml-antlr/examples/dragon/main.sml new file mode 100644 index 0000000..ddb28f0 --- /dev/null +++ b/ml-lpt/ml-antlr/examples/dragon/main.sml @@ -0,0 +1,33 @@ +structure Main = + struct + + structure L = Mlex + structure P = ParseFn(L) + + fun errMsg l = + TextIO.output(TextIO.stdErr, String.concat l) + + fun parse strm = let + val s = Mlex.streamifyInstream strm + val sm = AntlrStreamPos.mkSourcemap() + val (SOME p, s', errors) = P.parse (L.lex sm) s before TextIO.closeIn strm + fun doErr err = print ("Syntax error " ^ + AntlrRepair.repairToString Tokens.toString sm err ^ "\n") + in + app doErr errors; + p + end + + fun main (_, [file]) = ( + print ("\n -- ECHO -- \n\n" ^ (parse (TextIO.openIn file))); + print "\n\n"; + OS.Process.success) + handle ex => ( + errMsg [ + "uncaught exception ", General.exnName ex, + " [", exnMessage ex, "]\n" + ]; + List.app (fn s => errMsg [" raised at ", s, "\n"]) (SMLofNJ.exnHistory ex); + OS.Process.failure) + + end diff --git a/ml-lpt/ml-antlr/examples/dragon/sources.cm b/ml-lpt/ml-antlr/examples/dragon/sources.cm new file mode 100644 index 0000000..3a1ca62 --- /dev/null +++ b/ml-lpt/ml-antlr/examples/dragon/sources.cm @@ -0,0 +1,19 @@ +Library + + structure Main + +is + $/basis.cm + $/smlnj-lib.cm + +(* NOTE: we use a relative path here, but you should use + * + * $/ml-lpt-lib.cm + * + * for your projects. + *) + ../../../lib/ml-lpt-lib.cm + + dragon.g.sml + dragon.l.sml + main.sml \ No newline at end of file diff --git a/ml-lpt/ml-antlr/examples/inherit/a.grm b/ml-lpt/ml-antlr/examples/inherit/a.grm new file mode 100644 index 0000000..6e8c7cb --- /dev/null +++ b/ml-lpt/ml-antlr/examples/inherit/a.grm @@ -0,0 +1,21 @@ +%tokens + : ID of string + | INT of int + | LP ("(") | RP (")") + | PLUS ("+") + | SEMI (";") | EQ ("=") + ; + +stmt + : ID "=" exp ";" + ; + +exp + : atomicExp ("+" atomicExp)* + ; + +atomicExp + : ID + | INT + | "(" exp ")" + ; \ No newline at end of file diff --git a/ml-lpt/ml-antlr/examples/inherit/b.grm b/ml-lpt/ml-antlr/examples/inherit/b.grm new file mode 100644 index 0000000..4f335e4 --- /dev/null +++ b/ml-lpt/ml-antlr/examples/inherit/b.grm @@ -0,0 +1,18 @@ +%import "a.grm" %dropping stmt, exp; + +%tokens + : REAL of real + | TIMES ("*") + ; + +exp + : multExp ("+" multExp)* + ; + +multExp + : atomicExp ("*" atomicExp)* + ; + +atomicExp + : REAL + ; \ No newline at end of file diff --git a/ml-lpt/ml-antlr/gla.sml b/ml-lpt/ml-antlr/gla.sml new file mode 100644 index 0000000..5e11501 --- /dev/null +++ b/ml-lpt/ml-antlr/gla.sml @@ -0,0 +1,293 @@ +(* gla.sml + * + * COPYRIGHT (c) 2006 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (http://www.cs.uchicago.edu/~adrassi) + * All rights reserved. + * + * An interpretation of Parr's Grammar Lookahead Automata. + * MORE COMMENTS COMING SOON + *) + +structure GLA :> + sig + + type gla + type prepath = Token.Set.set list + + val mkGLA : LLKSpec.grammar -> gla + val lookK : (gla * LLKSpec.prod * prepath * Int.int) -> Token.Set.set + val lookKFollow : (gla * LLKSpec.nonterm * prepath * Int.int) -> Token.Set.set + + val dumpGraph : (string * LLKSpec.grammar * gla) -> unit + + end = struct + + structure S = LLKSpec + structure NMap = Nonterm.Map + structure PMap = Prod.Map + structure TSet = Token.Set + + datatype node_type + = Initial + | Final + | Normal + + datatype node = Node of { + id : Int.int, + edges : edge list ref, + ty : node_type, + nt : S.nonterm + } + and edge = Edge of (label * node) + and label + = Epsilon + | Tok of Token.token + | Call of S.nonterm + + datatype gla = GLA of { + nodes : node list, + prodMap : node PMap.map, + startOf : S.nonterm -> node, + finEBNF : S.nonterm -> node + } + + type prepath = Token.Set.set list + + fun isFinal (Node {ty = Final, ...}) = true + | isFinal _ = false + fun isInitial (Node {ty = Initial, ...}) = true + | isInitial _ = false + fun edgesOf (Node {edges, ...}) = !edges + fun idOf (Node {id, ...}) = id + fun ntOf (Node {nt, ...}) = nt + fun sameNode (n1, n2) = (idOf n1 = idOf n2) + + structure Ord = + struct + type ord_key = node + fun compare (Node {id = id1, ...}, Node {id = id2, ...}) = Int.compare (id1, id2) + end + + structure SCC = GraphSCCFn(Ord) + structure NodeSet = RedBlackSetFn (Ord) + + (* Build an NFA for the grammar *) + fun mkGLA grm = let + val S.Grammar {toks, nterms, prods, eof, startnt, ...} = grm + val nextId = ref 0 + val nodes : node list ref = ref [] + fun newNode (nt, ty) = let + val _ = nextId := !nextId + 1 + val n = Node {id = !nextId, edges = ref [], ty = ty, nt = nt} + in + nodes := n :: (!nodes); + n + end + fun makeNTs (f) = List.foldl NMap.insert' NMap.empty + (ListPair.zip (nterms, List.map f nterms)) + (* create initial, final state for each nonterminal *) + val ntMap = makeNTs (fn nt => newNode (nt, Initial)) + val finalMap = makeNTs (fn nt => newNode (nt, Final)) + fun mkOf map nt = valOf (NMap.find (map, nt)) + val startOf = mkOf ntMap + val finalOf = mkOf finalMap + val finEBNF = ref NMap.empty + fun addFinEBNF (nt, n) = (finEBNF := NMap.insert (!finEBNF, nt, n)) + fun addEdge (Node {edges, ...}, target, label) = ( + edges := (Edge (label, target))::(!edges); + case label + of Call nt => + (* add follow edge *) + addEdge (finalOf nt, target, Epsilon) + | _ => ()) + (* iterate through the items of a production, creating both nodes + * and edges. + *) + fun iter (nt, lastNode, []) = addEdge (lastNode, finalOf nt, Epsilon) + | iter (nt, lastNode, itm::itms) = let + val n = newNode (nt, Normal) + in + (case Item.sym itm + of S.TOK t => + addEdge (lastNode, n, Tok t) + | S.NONTERM (nt, _) => + addEdge (lastNode, n, Call nt) + | S.CLOS nt => let + val nback = newNode (nt, Normal) + in + addEdge (lastNode, n, Epsilon); + addEdge (lastNode, nback, Call nt); + addEdge (nback, lastNode, Epsilon); + addFinEBNF (nt, n) + end + | S.POSCLOS nt => let + val nback = newNode (nt, Normal) + in + addEdge (lastNode, nback, Call nt); + addEdge (nback, n, Epsilon); + addEdge (nback, lastNode, Epsilon); + addFinEBNF (nt, n) + end + | S.OPT nt => ( + addEdge (lastNode, n, Epsilon); + addEdge (lastNode, n, Call nt); + addFinEBNF (nt, n)) + (* end case *)); + iter (nt, n, itms) + end + (* process a production *) + fun doProd prod = let + val lhs = Prod.lhs prod + val n = newNode (lhs, Normal) + in + iter (lhs, n, Prod.items prod); + addEdge (startOf lhs, n, Epsilon); + (prod, n) + end + val prodMap = foldl PMap.insert' PMap.empty (map doProd prods) + val eofNode = newNode (startnt, Normal) + in + addEdge (eofNode, eofNode, Tok eof); + addEdge (finalOf startnt, eofNode, Epsilon); + GLA {nodes = !nodes, prodMap = prodMap, + startOf = startOf, finEBNF = mkOf (!finEBNF)} + end + + fun unionAll sets = foldl TSet.union TSet.empty sets + + (* Traverse the GLA, finding all possible tokens that can appear + * k tokens ahead of node n. The stack holds sharper follow information + * when available. See the implementation notes for more detail. + *) + fun look (startOf, n, k, stack, prePath, leftSet, acc) = let + val leftSet' = NodeSet.add (leftSet, n) + val lookRepl = (fn (tokSet, leftSet) => (tokSet, leftSet')) o look + fun follow (Edge (Tok t, n'), (tokSet, leftSet)) = + if k = 1 then (TSet.add (tokSet, t), leftSet) + else if TSet.member(hd prePath, t) then + lookRepl (startOf, n', k - 1, stack, + tl prePath, NodeSet.empty, tokSet) + else (tokSet, leftSet) + | follow (Edge (Epsilon, n'), (tokSet, leftSet)) = + look (startOf, n', k, stack, prePath, leftSet, tokSet) + | follow (Edge (Call nt, n'), (tokSet, leftSet)) = + lookRepl (startOf, startOf nt, k, + n'::stack, prePath, leftSet, tokSet) + in + if NodeSet.member (leftSet, n) then + if isInitial n then ( + Err.leftRecur (Nonterm.qualName (ntOf n)); + (TSet.empty, leftSet')) + else + (acc, leftSet') + else if isFinal n andalso length stack > 0 then + look (startOf, hd stack, k, tl stack, prePath, leftSet', acc) + else + foldl follow (acc, leftSet') (edgesOf n) + end + + (* compute lookahead set k tokens into the given production *) + fun lookK (GLA {prodMap, startOf, ...}, prod, prePath, k) = + #1 (look (startOf, valOf (PMap.find (prodMap, prod)), k, [], + prePath, NodeSet.singleton (startOf (Prod.lhs prod)), + TSet.empty)) + + (* compute lookahead set k tokens AFTER the given EBNF subrule *) + fun lookKFollow (GLA {finEBNF, startOf, ...}, nt, prePath, k) = + #1 (look (startOf, finEBNF nt, k, [], prePath, NodeSet.empty, TSet.empty)) + + (* the below code dumps the graph to a .DOT file *) + (* TODO: move this to its own module *) + + datatype attribute = ATTR of string * string + datatype d_node = NODE of string * attribute list + datatype di_edge = EDGE of string * string * attribute list + datatype di_graph = GRAPH of string * d_node list * di_edge list * attribute list + + fun replBS str = + String.translate + (fn #"\\" => "\\\\" | c => String.str c) + str + + fun writeGraph (out, graph) = let + (* output a string *) + fun wr s = TextIO.output (out, s) + (* output a string list *) + fun wrs ss = wr (String.concat ss) + (* indent to some level *) + fun wrIndent 0 = () + | wrIndent lvl = (wr " "; wrIndent (lvl - 1)) + (* apply output functions, indenting each time *) + fun app indent f list = + List.app (fn x => (wrIndent indent; f x)) list + fun wrAttr (ATTR (name, value)) = wrs ([ + "[ ", name, " = \"", value, "\" ]", "\n" + ]) + fun wrNode (NODE (name, atts)) = + (wr name; + wr "\n"; + app 2 wrAttr atts) + fun wrEdge (EDGE (no1, no2, atts)) = + (wrs ([no1, " -> ", no2, "\n"]); + app 2 wrAttr atts) + fun wrGraphAttr attr = + (wr "graph\n"; + wrIndent 2; + wrAttr attr) + fun wrGraph (GRAPH (name, nodes, edges, atts)) = + (wrs (["digraph ", name, " {\n"]); + app 1 wrGraphAttr atts; + app 1 wrNode nodes; + app 1 wrEdge edges; + wr "}") + in wrGraph graph + end + + fun dumpGraph (fname, grm, gla) = let + val S.Grammar {toks, nterms, prods, eof, ...} = grm + val GLA {startOf, nodes, ...} = gla + val seen = ref NodeSet.empty + fun addSeen n = seen := NodeSet.add (!seen, n) + fun idOf (Node {id, ...}) = id + fun nameOf n = Int.toString (idOf n) + fun doNode n = + if NodeSet.member(!seen, n) then + [] + else ( + addSeen n; + if isFinal n then [] + else List.concat (map (doEdge n) (edgesOf n))) +(* + and doEdge n (Edge (Call nt, _)) = + [EDGE (nameOf n, nameOf (startOf nt), [])] + | doEdge n (Edge (lbl, n')) = + (EDGE (nameOf n, nameOf n', [ + ATTR ("label", case lbl + of Epsilon => "" + | Tok t => Token.name t)])) + :: (doNode n') +*) + and doEdge n (Edge (lbl, n')) = + (EDGE (nameOf n, nameOf n', [ + ATTR ("label", case lbl + of Epsilon => "" + | Tok t => Token.name t + | Call nt => Nonterm.qualName nt)])) + :: (doNode n') + val edges = List.concat (map (doNode o startOf) nterms) + fun doNT nt = NODE (nameOf (startOf nt), [ATTR ("label", Nonterm.qualName nt), ATTR ("shape", "box")]) + fun doFinal node = if isFinal node then + SOME (NODE (nameOf node, + [ATTR ("shape", "doublecircle")])) + else NONE + val nodes = (map doNT nterms) @ (List.mapPartial doFinal nodes) + val graph = GRAPH ("dump", nodes, edges, [ATTR ("rankdir", "LR")]) + val out = TextIO.openOut (fname ^ ".dot") + in + TextIO.output (TextIO.stdErr, concat[" writing ", fname, ".dot\n"]); + writeGraph (out, graph) + before TextIO.closeOut out + end + + end diff --git a/ml-lpt/ml-antlr/item.sml b/ml-lpt/ml-antlr/item.sml new file mode 100644 index 0000000..e8dff63 --- /dev/null +++ b/ml-lpt/ml-antlr/item.sml @@ -0,0 +1,78 @@ +(* item.sml + * + * COPYRIGHT (c) 2006 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (http://www.cs.uchicago.edu/~adrassi) + * All rights reserved. + * + * Utility code for the item datatype. + *) + +structure Item = + struct + + structure S = LLKSpec + + fun sym (S.ITEM {sym = s, ...}) = s + val name = Preitem.name o sym + val toString = Preitem.toString o sym + val listToString = Preitem.listToString o (map sym) + + fun id (S.ITEM {id, ...}) = id + fun compare (item1, item2) = Int.compare(id item1, id item2) + fun same pair = (case compare pair + of EQUAL => true + | _ => false) + + fun nt itm = (case sym itm + of S.NONTERM (nt, _) => SOME nt + | S.POSCLOS nt => SOME nt + | S.CLOS nt => SOME nt + | S.OPT nt => SOME nt + | S.TOK _ => NONE + (* end case *)) + + (* return the SET of bound names to the left of an item in a production, + * and the SET of the formal parameters available to the item. + * we use sets rather than lists because names might be + * repeated in the bindings. + *) + fun bindingsLeftOf (item, prod) = let + fun lhs (LLKSpec.PROD {lhs, ...}) = lhs + fun items (LLKSpec.PROD {rhs, ...}) = !rhs + fun itemBindings (LLKSpec.PROD {rhsBindings, ...}) = + (fn (x,_) => x) (ListPair.unzip rhsBindings) + fun leftOf ([], accum) = raise Fail "BUG: leftOf on empty list" + | leftOf ((i, name)::is, accum) = + if same (i, item) then accum + else leftOf (is, AtomSet.add(accum, Atom.atom name)) + val (parentBindings, formals) = case Nonterm.parent (lhs prod) + of NONE => (AtomSet.empty, + AtomSet.addList (AtomSet.empty, Nonterm.formals (lhs prod))) + | SOME prod' => let + fun isProdItm (itm) = (case nt itm + of SOME nt => Nonterm.same (nt, lhs prod) + | NONE => false) + val prodItm = valOf (List.find isProdItm (items prod')) + in + bindingsLeftOf (prodItm, prod') + end + in + (leftOf (ListPair.zip (items prod, itemBindings prod), + parentBindings), + formals) + end + + structure Set = RedBlackSetFn ( + struct + type ord_key = S.item + val compare = compare + end) + + structure Map = RedBlackMapFn ( + struct + type ord_key = S.item + val compare = compare + end) + + end diff --git a/ml-lpt/ml-antlr/ll1/compute-first1.sml b/ml-lpt/ml-antlr/ll1/compute-first1.sml new file mode 100644 index 0000000..6df4c94 --- /dev/null +++ b/ml-lpt/ml-antlr/ll1/compute-first1.sml @@ -0,0 +1,73 @@ +(* compute-first1.sml + * + * COPYRIGHT (c) 2005 John Reppy (http://www.cs.uchicago.edu/~jhr) + * All rights reserved. + *) + +structure ComputeFirst1 : sig + + datatype result = Result of { + firstOf : LLKSpec.nonterm -> Token.Set.set, + firstOfItem : LLKSpec.item -> Token.Set.set, + firstOfItems : LLKSpec.item list -> Token.Set.set + } + + val compute : LLKSpec.prod list + * ComputeNullable.result + -> result + + end = struct + + structure TSet = Token.Set + structure NMap = Nonterm.Map + structure P = Predict + + datatype result = Result of { + firstOf : LLKSpec.nonterm -> Token.Set.set, + firstOfItem : LLKSpec.item -> Token.Set.set, + firstOfItems : LLKSpec.item list -> Token.Set.set + } + + fun compute (rules, nres) = let + val ComputeNullable.Result + {nullable, nullableItem, nullableItems} = nres + fun firstOf nm nt = + Option.getOpt (NMap.find (nm, nt), TSet.empty) + fun firstOfItem nm item = (case item + of (LLKSpec.TOK tok) => P.mkTok tok + | (LLKSpec.NONTERM nt) => firstOf nm nt + | (LLKSpec.CLOS nt) => firstOf nm nt + | (LLKSpec.POSCLOS nt) => firstOf nm nt + | (LLKSpec.OPT nt) => firstOf nm nt + (* end case *)) + and firstOfItems nm items = let + fun iter (fs, []) = fs + | iter (fs, item::rest) = let + val fs' = TSet.union (fs, firstOfItem nm item) + in if nullableItem item + then iter (fs', rest) + else fs' + end + in iter (TSet.empty, items) + end + fun doRule (LLKSpec.PROD{lhs, rhs, ...}, (nm, changed)) = let + val toks = firstOf nm lhs + val toks' = TSet.union (toks, firstOfItems nm rhs) + val nm' = NMap.insert (nm, lhs, toks') + in + (nm', changed + orelse (TSet.numItems toks' > TSet.numItems toks)) + end + fun loopToFixedPt nm = let + val (nm', changed) = List.foldl doRule (nm, false) rules + in if changed then loopToFixedPt nm' else nm' + end + val nm = loopToFixedPt NMap.empty + in Result { + firstOf = firstOf nm, + firstOfItem = firstOfItem nm, + firstOfItems = firstOfItems nm + } + end + + end diff --git a/ml-lpt/ml-antlr/ll1/compute-follow1.sml b/ml-lpt/ml-antlr/ll1/compute-follow1.sml new file mode 100644 index 0000000..98dce80 --- /dev/null +++ b/ml-lpt/ml-antlr/ll1/compute-follow1.sml @@ -0,0 +1,92 @@ +(* compute-follow1.sml + * + * COPYRIGHT (c) 2006 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (http://www.cs.uchicago.edu/~adrassi) + * All rights reserved. + *) + +structure ComputeFollow1 : sig + + datatype result = Result of { + followOf : LLKSpec.nonterm -> Token.Set.set + } + + val compute : LLKSpec.prod list + * ComputeNullable.result + * ComputeFirst1.result + -> result + + end = struct + + structure TSet = Token.Set + structure NMap = Nonterm.Map + + datatype result = Result of { + followOf : LLKSpec.nonterm -> Token.Set.set + } + + fun getNT (LLKSpec.TOK _) = NONE + | getNT (LLKSpec.NONTERM nt) = SOME nt + | getNT (LLKSpec.CLOS nt) = SOME nt + | getNT (LLKSpec.POSCLOS nt) = SOME nt + | getNT (LLKSpec.OPT nt) = SOME nt + + fun compute (rules, nres, fres) = let + val ComputeNullable.Result + {nullable, nullableItem, nullableItems} = nres + val ComputeFirst1.Result + {firstOf, firstOfItem, firstOfItems} = fres + fun followOf nm nt = + Option.getOpt (NMap.find (nm, nt), TSet.empty) + fun doRule (LLKSpec.PROD{lhs, rhs, ...}, (nm, changed)) = let + val lhsToks = followOf nm lhs + fun iter (nm, changed, []) = (nm, changed) + | iter (nm, changed, item::prec) = let + val (nm', changed') = + case getNT item + of SOME nt => let + val toks = followOf nm nt + val toks' = TSet.union (lhsToks, toks) + val changed' = changed + orelse (TSet.numItems toks' > TSet.numItems toks) + val nm' = NMap.insert (nm, nt, toks') + in + (nm', changed') + end + | NONE => (nm, changed) + in + if nullableItem item then + iter (nm', changed', prec) + else (nm', changed') + end + in + iter (nm, false, List.rev rhs) + end + fun loopToFixedPt nm = let + val (nm', changed) = List.foldl doRule (nm, false) rules + in if changed then loopToFixedPt nm' else nm' + end + fun initRule (LLKSpec.PROD{lhs, rhs, ...}, nm) = let + fun iter (nm, []) = nm + | iter (nm, item::rest) = (case getNT item + of SOME nt => let + val toks = followOf nm nt + val toks' = TSet.union (toks, firstOfItems rest) + val nm' = NMap.insert (nm, nt, toks') + in + iter (nm', rest) + end + | NONE => iter (nm, rest) + (* end case *)) + in + iter (nm, rhs) + end + val nm = loopToFixedPt (List.foldl initRule NMap.empty rules) + in Result { + followOf = followOf nm + } + end + + end + diff --git a/ml-lpt/ml-antlr/ll1/compute-nullable.sml b/ml-lpt/ml-antlr/ll1/compute-nullable.sml new file mode 100644 index 0000000..e649370 --- /dev/null +++ b/ml-lpt/ml-antlr/ll1/compute-nullable.sml @@ -0,0 +1,60 @@ +(* compute-nullable.sml + * + * COPYRIGHT (c) 2006 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (http://www.cs.uchicago.edu/~adrassi) + * All rights reserved. + * + * Computation of nullable test on nonterminals. + *) + +structure ComputeNullable : sig + + datatype result = Result of { + nullable : LLKSpec.nonterm -> bool, + nullableItem : LLKSpec.item -> bool, + nullableItems : LLKSpec.item list -> bool + } + + val compute : LLKSpec.prod list -> result + + end = struct + + datatype result = Result of { + nullable : LLKSpec.nonterm -> bool, + nullableItem : LLKSpec.item -> bool, + nullableItems : LLKSpec.item list -> bool + } + + fun nullableItem ns item = (case item + of (LLKSpec.TOK _) => false + | (LLKSpec.NONTERM nt) => Nonterm.Set.member(ns, nt) + | (LLKSpec.CLOS _) => true + | (LLKSpec.POSCLOS nt) => Nonterm.Set.member(ns, nt) + | (LLKSpec.OPT _) => true + (* end case *)) + + fun nullableItems ns = List.all (nullableItem ns) + + fun compute (rules : LLKSpec.prod list) = let + fun doRule (LLKSpec.PROD{lhs, rhs, ...}, ns) = + if nullableItems ns rhs + then Nonterm.Set.add(ns, lhs) + else ns + fun loopToFixedPt ns = let + val ns' = List.foldl doRule ns rules + in + if Nonterm.Set.numItems ns' > Nonterm.Set.numItems ns + then loopToFixedPt ns' + else ns + end + (* compute the set of nullable nonterminals *) + val ns = loopToFixedPt Nonterm.Set.empty + in Result { + nullable = fn nt => Nonterm.Set.member(ns, nt), + nullableItem = nullableItem ns, + nullableItems = nullableItems ns + } + end + + end diff --git a/ml-lpt/ml-antlr/llk-spec.sml b/ml-lpt/ml-antlr/llk-spec.sml new file mode 100644 index 0000000..901ad28 --- /dev/null +++ b/ml-lpt/ml-antlr/llk-spec.sml @@ -0,0 +1,101 @@ +(* llk-spec.sml + * + * COPYRIGHT (c) 2006 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (http://www.cs.uchicago.edu/~adrassi) + * All rights reserved. + * + * Datatypes for grammar specification. + * NOTE: individual modules are available that + * manipulate the datatypes described in + * this module. + *) + +structure LLKSpec = + struct + + type ty = string + type span = Err.span + + type 'a info = { + data : 'a + } + + datatype token = T of { + id : Int.int, (* globally unique ID *) + name : Atom.atom, + loc : span, + ty : ty option, + abbrev : Atom.atom option, + keyword : bool, (* true if marked as a %keyword *) + default : string option (* optional default argument for error repair *) + } + + and nonterm = NT of { + id : Int.int, (* globally unique ID *) + name : Atom.atom, + loc : span option ref, + binding : nt_binding, + prods : prod list ref, + formals : Atom.atom list ref, + isEBNF : bool, + ty : ty option ref + } + + and nt_binding = TOP | WITHIN of prod + + and prod = PROD of { + id : Int.int, (* globally unique ID *) + name : string, + try : bool, + lhs : nonterm, + rhs : item list ref, (* ref for tying recursive knot: + * subrules refer to their containing prod + *) + rhsBindings : (string * bool) list, + pred : Action.action option, + action : Action.action option, + loc : span + } + + and preitem + = TOK of token + (* nonterm * optional actual args *) + | NONTERM of (nonterm * Action.action option) + | CLOS of nonterm (* ( ... )* *) + | POSCLOS of nonterm (* ( ... )+ *) + | OPT of nonterm (* ( ... )? *) + + and item = ITEM of { + id : Int.int, (* globally unique ID *) + loc : span, + sym : preitem + } + + withtype sem_pred = Action.action + + datatype refcell + = REFCELL of { + name : string, + ty : ty, + initCode : Action.action, + loc : span + } + + datatype grammar = Grammar of { + name : string, + header : string option, + defs : Action.action, (* user definitions (code) *) + toks : token list, + toksImport : Action.action option, (* optional token datatype *) + changes : (token list * token list) list, + nterms : nonterm list, + prods : prod list, + eof : token, + sortedTops : nonterm list list, (* topologically sorted nonterms *) + startnt : nonterm, + entryPoints : nonterm list, + refcells : refcell list + } + + end diff --git a/ml-lpt/ml-antlr/main.sml b/ml-lpt/ml-antlr/main.sml new file mode 100644 index 0000000..bec354a --- /dev/null +++ b/ml-lpt/ml-antlr/main.sml @@ -0,0 +1,88 @@ +(* main.sml + * + * COPYRIGHT (c) 2006 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (http://www.cs.uchicago.edu/~adrassi) + * All rights reserved. + * + * The driver. + *) + + +structure Main : sig + + val main : (string * string list) -> OS.Process.status + + val load : string -> LLKSpec.grammar * GLA.gla + val getNT : LLKSpec.grammar -> string -> LLKSpec.nonterm + + end = struct + + (* check a parse tree, returning a grammar *) + fun checkPT parseTree = let + val grm = CheckGrammar.check parseTree + val LLKSpec.Grammar {nterms, prods, ...} = grm + val _ = Err.debugs [" ", Int.toString (List.length nterms), + " nonterminals"] + val _ = Err.debugs [" ", Int.toString (List.length prods), + " productions"] +(* +val _ = app (Err.debug o Prod.toString) prods +*) + in + grm + end + + fun process () = let + val _ = Err.anyErrors := false + val file = !Options.fname + (* check that the input file exists *) + val _ = if OS.FileSys.access(file, [OS.FileSys.A_READ]) + then () + else ( + Err.errMsg [ + "ml-antlr: file \"", String.toString file, + "\" does not exist or is unreadable" + ]; + raise Err.Abort) + (* process the grammar *) + val grm = checkPT (ParseFile.parse file) + val gla = GLA.mkGLA grm + val pm = ComputePredict.mkPM (grm, gla) + val outspec = (grm, pm, file) + in + SMLOutput.output outspec; + if !Options.dotOutput then GLA.dumpGraph (file, grm, gla) else (); + if !Options.texOutput then LaTeXOutput.output outspec else (); + if !Err.anyErrors + then OS.Process.failure + else OS.Process.success + end + handle Err.Abort => OS.Process.failure + | ex => ( + Err.errMsg [ + "uncaught exception ", General.exnName ex, + " [", exnMessage ex, "]" + ]; + List.app (fn s => Err.errMsg [" raised at ", s]) (SMLofNJ.exnHistory ex); + OS.Process.failure) + + fun main (_, args) = + if Options.processArgs args + then ( + Err.errMsg [Options.usage]; + OS.Process.failure) + else process() + + (* these functions are for debugging in the interactive loop *) + fun load file = let + val grm = checkPT (ParseFile.parse file) + val gla = GLA.mkGLA grm + in + (grm, gla) + end + + fun getNT (LLKSpec.Grammar {nterms, ...}) name = + hd (List.filter (fn nt => Nonterm.qualName nt = name) nterms) + + end diff --git a/ml-lpt/ml-antlr/ml-antlr b/ml-lpt/ml-antlr/ml-antlr new file mode 100755 index 0000000..b0104f5 --- /dev/null +++ b/ml-lpt/ml-antlr/ml-antlr @@ -0,0 +1,145 @@ +#!/bin/sh +# +# Wrapper for ml-antlr heap image +# + +mlantlrdir=$0 +scripthome=${mlantlrdir%ml-antlr} +#echo "script home:" +#echo $scripthome + +# +# determine heap suffix +# +case `uname -s` in + HP-UX) + ARCH=hppa + case `uname -r` in + *.09.*) OPSYS=hpux9 ;; + *.10.*) OPSYS=hpux ;; + *.11.*) OPSYS=hpux ;; + *) exit 1 ;; + esac + ;; + IRIX*) + ARCH=mipseb + OPSYS=irix + ;; + SunOS) + case `uname -r` in + 4.*) + OPSYS=sunos + case `/usr/bin/arch` in + sun4) ARCH=sparc;; + *) exit 1;; + esac + ;; + 5.*) + OPSYS=solaris + case `uname -p` in + sparc) ARCH=sparc;; + *86) ARCH=x86;; + *) exit 1;; + esac + ;; + *) exit 1;; + esac + ;; + AIX) + OPSYS=aix + ARCH=ppc + ;; + Darwin) + OPSYS=darwin + case `uname -p` in + powerpc) ARCH=ppc;; + i386) ARCH=x86;; + esac + ;; + OSF1) + case `uname -m` in + alpha) + case `uname -r` in + V2.*) ARCH=alpha32x; OPSYS=osf1 ;; + V3.*) ARCH=alpha32x; OPSYS=osf1 ;; + V4.*) ARCH=alpha32; OPSYS=dunix ;; + *) exit 1 ;; + esac + ;; + *) exit 1 ;; + esac + ;; + Linux) + OPSYS=linux + case `uname -m` in + *86) + ARCH=x86 + # we no longer support Linux before the 2.2 kernel. + case `uname -r` in + 2.2.*) ;; + 2.3.*) ;; + 2.4.*) ;; + 2.5.*) ;; + 2.6.*) ;; + *) exit 1 ;; + esac + ;; + # As long as we do not natively support the amd64 architecture, + # we should fallback to the x86 compatibility mode. --Stef + x86_64) ARCH=x86;; + ppc) + ARCH=ppc + case `uname -r` in + *osfmach*) OPSYS=mklinux ;; + *) ;; + esac + ;; + *) exit 1;; + esac + ;; + FreeBSD) + OPSYS=bsd + case `uname -m` in + *86) ARCH=x86;; + *) exit 1;; + esac + ;; + NetBSD) + OPSYS=bsd + case `uname -m` in + *86) ARCH=x86;; + *) exit 1;; + esac + ;; + Windows_NT) + OPSYS=win32 + case `uname -m` in + *86) ARCH=x86;; + *) exit 1;; + esac + ;; + CYGWIN_NT*) + # If the environment variable SMLNJ_CYGWIN_RUNTIME is defined, + # use cygwin as the runtime environment. + if [ "$SMLNJ_CYGWIN_RUNTIME" != "" ]; then + OPSYS=cygwin + else + OPSYS=win32 + fi + case `uname -m` in + *86) ARCH=x86;; + *) exit 1;; + esac + ;; + *) exit 1;; +esac + +heap=${scripthome}ml-antlr.${ARCH}-${OPSYS} + +if test ! -r $heap ; then + echo "ml-antlr: no heap image!" + exit 1 +fi + +exec sml @SMLload=$heap $@ + diff --git a/ml-lpt/ml-antlr/mlantlr.mlb b/ml-lpt/ml-antlr/mlantlr.mlb new file mode 100644 index 0000000..40cc1ad --- /dev/null +++ b/ml-lpt/ml-antlr/mlantlr.mlb @@ -0,0 +1,48 @@ +(* ml-antlr.mlb + * + * COPYRIGHT (c) 2009 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * MLB file for building ml-antlr under MLton. + *) + +local + $(SML_LIB)/basis/basis.mlb + $(SML_LIB)/basis/sml-nj.mlb + $(SML_LIB)/smlnj-lib/Util/smlnj-lib.mlb + $(SML_LIB)/smlnj-lib/PP/pp-lib.mlb + ../lib/ml-lpt-lib.mlb + ../common/lpt-common.mlb + + err.sml + FrontEnds/grammar-syntax.sml + action.sml + llk-spec.sml + token.sml + predict.sml + nonterm.sml + preitem.sml + item.sml + prod.sml + gla.sml + compute-predict.sml + BackEnds/back-end-sig.sml + BackEnds/SML/string-template.sml + options.sml + BackEnds/SML/abs.sml + BackEnds/SML/pp.sml + BackEnds/SML/ml.sml + BackEnds/SML/sml-output.sml + FrontEnds/ml-antlr-based/spec.grm.sml + FrontEnds/ml-antlr-based/spec.lex.sml + FrontEnds/ml-antlr-based/parse-file.sml + check-grammar.sml + BackEnds/LaTeX/string-template.sml + BackEnds/LaTeX/latex-output.sml + main.sml + +in + + mlton-main.sml + +end diff --git a/ml-lpt/ml-antlr/mlton-main.sml b/ml-lpt/ml-antlr/mlton-main.sml new file mode 100644 index 0000000..4044fc7 --- /dev/null +++ b/ml-lpt/ml-antlr/mlton-main.sml @@ -0,0 +1,7 @@ +(* mlton-main.sml + * + * COPYRIGHT (c) 2009 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +val _ = OS.Process.exit (Main.main (CommandLine.name (), CommandLine.arguments ())) diff --git a/ml-lpt/ml-antlr/nonterm.sml b/ml-lpt/ml-antlr/nonterm.sml new file mode 100644 index 0000000..1de4ef1 --- /dev/null +++ b/ml-lpt/ml-antlr/nonterm.sml @@ -0,0 +1,91 @@ +(* nonterm.sml + * + * COPYRIGHT (c) 2006 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (http://www.cs.uchicago.edu/~adrassi) + * All rights reserved. + * + * Utility functions for the nonterm datatype. + *) + +structure Nonterm = + struct + + datatype nonterm = datatype LLKSpec.nonterm + + fun name (NT{name, ...}) = Atom.toString name + fun toString nt = name nt + + fun qualName (nt as NT{binding = LLKSpec.WITHIN (LLKSpec.PROD {lhs, ...}), ...}) = + (qualName lhs) ^ "_" ^ (name nt) + | qualName nt = name nt + + fun isSubrule (NT{binding = LLKSpec.TOP, ...}) = false + | isSubrule _ = true + + fun parent (NT{binding = LLKSpec.WITHIN prod, ...}) = SOME prod + | parent _ = NONE + + fun isEBNF (NT{isEBNF = v, ...}) = v + fun prods (NT{prods, ...}) = !prods + fun formals (NT{formals, ...}) = !formals + fun ty (NT{ty, ...}) = !ty + + fun span (NT{loc, ...}) = !loc + + fun compare (NT{id=a, ...}, NT{id=b, ...}) = Int.compare(a, b) + fun lexCompare (NT{name=a, ...}, NT{name=b, ...}) = + String.compare(Atom.toString a, Atom.toString b) + fun same (NT{id=a, ...}, NT{id=b, ...}) = (a = b) + + structure Ord = + struct + type ord_key = nonterm + val compare = compare + end + + structure Set = RedBlackSetFn (Ord) + structure Map = RedBlackMapFn (Ord) + + fun setToString s = let + (* simple insertion sort to lexically order the set *) + fun ins (nt, []) = [nt] + | ins (nt, nt'::nts) = (case lexCompare(nt, nt') + of LESS => nt::nt'::nts + | _ => nt'::ins(nt, nts) + (* end case *)) + val nts = Set.foldl ins [] s + in + String.concat[ + "{", String.concatWith "," (List.map toString nts), "}" + ] + end + + structure SCC = GraphSCCFn (Ord) + structure S = LLKSpec + + (* topologically sort the nonterminal dependency graph + * rooted at starts; return a list of nonterm lists + *) + fun topsort (starts) = let + fun prodItems (S.PROD {rhs, ...}) = !rhs + fun sym (S.ITEM {sym, ...}) = sym + fun followItem (S.NONTERM (nt, _)) = + if isSubrule nt then follow nt + else [nt] + | followItem (S.CLOS nt) = follow nt + | followItem (S.POSCLOS nt) = follow nt + | followItem (S.OPT nt) = follow nt + | followItem (S.TOK _) = [] + and followItems itms = List.concat (map (followItem o sym) itms) + and follow nt = + List.concat (map (followItems o prodItems) + (prods nt)) + val scc = SCC.topOrder' {roots = starts, follow = follow} + fun compToList (SCC.SIMPLE nt) = [nt] + | compToList (SCC.RECURSIVE nts) = nts + in + map compToList scc + end + + end diff --git a/ml-lpt/ml-antlr/options.sml b/ml-lpt/ml-antlr/options.sml new file mode 100644 index 0000000..6b447ae --- /dev/null +++ b/ml-lpt/ml-antlr/options.sml @@ -0,0 +1,34 @@ +(* options.sml + * + * COPYRIGHT (c) 2007-2016 Fellowship of SML/NJ + * + * Processing of command line arguments + *) + +structure Options = + struct + + val unitActions = ref false + val debug = ref false + val dotOutput = ref false + val texOutput = ref false + val fname = ref "" + + (* process the command line arguments; return true if there is an error *) + fun processArgs args = let + fun procArg "--dot" = (dotOutput := true; false) + | procArg "--latex" = (texOutput := true; false) + | procArg "--unit-actions" = (unitActions := true; false) + | procArg "--debug" = (debug := true; false) + | procArg _ = true + in + case List.filter procArg args + of [file] => (fname := file; false) + | _ => true (* error: exactly one file should be specified *) + (* end case *) + end + + (* usage message *) + val usage = "usage: ml-antlr [--dot] [--latex] [--unit-actions | --debug] " + + end \ No newline at end of file diff --git a/ml-lpt/ml-antlr/predict.sml b/ml-lpt/ml-antlr/predict.sml new file mode 100644 index 0000000..c6cf744 --- /dev/null +++ b/ml-lpt/ml-antlr/predict.sml @@ -0,0 +1,38 @@ +(* predict.sml + * + * COPYRIGHT (c) 2006 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (http://www.cs.uchicago.edu/~adrassi) + * All rights reserved. + * + * Decision trees for predictions. + *) + +structure Predict = + struct + + structure S = LLKSpec + structure TSet = Token.Set + + datatype 'a tree + = Pick of 'a + | ByTok of (TSet.set * 'a tree) list + | Choice of 'a list (* try each in order *) +(* | Error of TSet.set *) + + datatype predict_maps = PMaps of { + prodPredict : S.nonterm -> S.prod tree, + ebnfPredict : S.nonterm -> bool tree + } + + fun toString aToS tree = let + fun tos (i, Pick a) = "PICK " ^ (aToS a) + | tos (i, ByTok branches) = "\n" ^ i ^ + (String.concatWith ("\n" ^ i) + (map (fn (s, t) => (Token.setToString s) ^ " => " ^ + tos (i ^ " ", t)) branches)) + in + tos ("", tree) + end + + end diff --git a/ml-lpt/ml-antlr/preitem.sml b/ml-lpt/ml-antlr/preitem.sml new file mode 100644 index 0000000..3a91a97 --- /dev/null +++ b/ml-lpt/ml-antlr/preitem.sml @@ -0,0 +1,32 @@ +structure Preitem = + struct + + structure S = LLKSpec + + fun ntToString nt = + if not (Nonterm.isSubrule nt) then Nonterm.name nt + else "(" ^ + (String.concatWith " | " (map prodToString (Nonterm.prods nt))) ^ + ")" + + and prodToString (S.PROD{rhs, ...}) = listToString (map symOf (!rhs)) + and symOf (S.ITEM {sym, ...}) = sym + and toString (S.TOK t) = Token.toString t + | toString (S.NONTERM (nt, args)) = + String.concat ([ntToString nt] @ ( + case args + of SOME args => + ["@(", Action.toString args, ")"] + | _ => [])) + | toString (S.CLOS nt) = ntToString nt ^ "*" + | toString (S.POSCLOS nt) = ntToString nt ^ "+" + | toString (S.OPT nt) = ntToString nt ^ "?" + and listToString l = String.concatWith " " (map toString l) + + fun name (S.TOK t) = Token.name t + | name (S.NONTERM (nt, _)) = Nonterm.name nt + | name (S.CLOS nt) = Nonterm.name nt + | name (S.POSCLOS nt) = Nonterm.name nt + | name (S.OPT nt) = Nonterm.name nt + + end \ No newline at end of file diff --git a/ml-lpt/ml-antlr/prod.sml b/ml-lpt/ml-antlr/prod.sml new file mode 100644 index 0000000..eeaa577 --- /dev/null +++ b/ml-lpt/ml-antlr/prod.sml @@ -0,0 +1,88 @@ +(* prod.sml + * + * COPYRIGHT (c) 2006 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (http://www.cs.uchicago.edu/~adrassi) + * All rights reserved. + * + * Utility functions for the prod datatype. + *) + +structure Prod = + struct + + datatype prod = datatype LLKSpec.prod + + fun id (PROD{id, ...}) = id + fun lhs (PROD{lhs, ...}) = lhs + fun items (PROD{rhs, ...}) = !rhs + fun itemBindings (PROD{rhsBindings, ...}) = + (fn (x, _) => x) (ListPair.unzip rhsBindings) + fun itemYields (PROD{rhsBindings, ...}) = + (fn (_, y) => y) (ListPair.unzip rhsBindings) + fun action (PROD{action, ...}) = action + fun pred (PROD{pred, ...}) = pred + + fun span (PROD{loc, ...}) = loc + + fun name (PROD{name, ...}) = name + fun fullName p = name p +(* fun fullName p = (case Nonterm.parent (lhs p) + of SOME p' => String.concat [ + fullName p', "_", + Nonterm.name (lhs p'), "_", + name p] + | NONE => name p) +*) + + fun toString p = concat[ + Nonterm.qualName (lhs p), " ::= ", + String.concatWith " " (List.map Item.toString (items p)) + ] + + fun compare (p1, p2) = Int.compare(id p1, id p2) + + fun canTry (PROD{try = true, ...}) = true + | canTry _ = false +(* + | canTry (PROD{rhs, ...}) = + List.exists (fn (LLKSpec.SEM_PRED _) => true + | _ => false) + rhs +*) + + fun bindingsAtAction p = let + val (leftBindings, formals) = case Nonterm.parent (lhs p) + of NONE => (AtomSet.empty, + AtomSet.addList (AtomSet.empty, + Nonterm.formals (lhs p))) + | SOME p' => let + fun isProdItm (itm) = (case Item.nt itm + of SOME nt => Nonterm.same (nt, lhs p) + | NONE => false) + val prodItm = valOf (List.find isProdItm (items p')) + in + Item.bindingsLeftOf (prodItm, p') + end + in + (AtomSet.addList (leftBindings, map Atom.atom (itemBindings p)), formals) + end + + structure Set = RedBlackSetFn ( + struct + type ord_key = prod + val compare = compare + end) + + structure Map = RedBlackMapFn ( + struct + type ord_key = prod + val compare = compare + end) + + fun sortProds prods = + ListMergeSort.sort + (fn (x, y) => compare (x, y) = GREATER) + prods + + end diff --git a/ml-lpt/ml-antlr/sources.cm b/ml-lpt/ml-antlr/sources.cm new file mode 100644 index 0000000..c793f78 --- /dev/null +++ b/ml-lpt/ml-antlr/sources.cm @@ -0,0 +1,84 @@ +(* sources.cm + * + * COPYRIGHT (c) 2009 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +Library + + signature BACK_END + + structure Action + structure CheckGrammar + structure ComputePredict + structure Err + structure GLA + structure GrammarSyntax + structure Item + structure LLKSpec + structure Nonterm + structure ParseFile + structure Predict + structure Preitem + structure Prod + structure Token + + structure ExpandFile + structure ML + structure SMLOutput + structure LaTeXOutput + structure Main + +is + + $/basis.cm + $/smlnj-lib.cm + $/pp-lib.cm + $/ml-lpt-lib.cm + + ../common/lpt-common.cm + + BackEnds/back-end-sig.sml + + action.sml + check-grammar.sml + compute-predict.sml + err.sml + gla.sml + item.sml + llk-spec.sml + main.sml + nonterm.sml + options.sml + predict.sml + preitem.sml + prod.sml + token.sml + + FrontEnds/grammar-syntax.sml + FrontEnds/ml-antlr-based/parse-file.sml + +(* During installation we rely on pre-generated files + * to avoid certain chicken-and-egg problems. *) +#if defined(NO_ML_ULEX) + FrontEnds/ml-antlr-based/spec.lex.sml +#else + FrontEnds/ml-antlr-based/spec.lex : ml-ulex +#endif + +(* During installation we rely on pre-generated files + * to avoid certain chicken-and-egg problems. *) +#if defined(NO_ML_ANTLR) + FrontEnds/ml-antlr-based/spec.grm.sml +#else + FrontEnds/ml-antlr-based/spec.grm : ml-antlr +#endif + + BackEnds/SML/ml.sml + BackEnds/SML/abs.sml + BackEnds/SML/pp.sml + BackEnds/SML/sml-output.sml + BackEnds/SML/smlnj-template.sml + + BackEnds/LaTeX/latex-output.sml + BackEnds/LaTeX/smlnj-template.sml diff --git a/ml-lpt/ml-antlr/token.sml b/ml-lpt/ml-antlr/token.sml new file mode 100644 index 0000000..11fc60e --- /dev/null +++ b/ml-lpt/ml-antlr/token.sml @@ -0,0 +1,65 @@ +(* token.sml + * + * COPYRIGHT (c) 2006 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (http://www.cs.uchicago.edu/~adrassi) + * All rights reserved. + * + * Utility functions for the token datatype. + *) + +structure Token = + struct + + datatype token = datatype LLKSpec.token + + fun name (T{name, ...}) = Atom.toString name + fun ty (T{ty, ...}) = ty + fun hasTy (T{ty = SOME _, ...}) = true + | hasTy _ = false + fun isKW (T{keyword, ...}) = keyword + fun default (T{default, ...}) = default + + fun toString (T{abbrev = SOME a, ...}) = Atom.toString a + | toString tok = name tok + + fun quoted (T{abbrev = SOME a, ...}) = Atom.toString a + | quoted tok = String.concat["\"", name tok, "\""] + + fun def tok = (case ty tok + of NONE => name tok + | SOME ty => String.concat[name tok, " of ", ty] + (* end case *)) + + fun compare (T{id=a, ...}, T{id=b, ...}) = Int.compare(a, b) + fun lexCompare (T{name=a, ...}, T{name=b, ...}) = + String.compare(Atom.toString a, Atom.toString b) + fun same (T{id=a, ...}, T{id=b, ...}) = (a = b) + + structure Set = RedBlackSetFn ( + struct + type ord_key = token + val compare = compare + end) + + structure Map = RedBlackMapFn ( + struct + type ord_key = token + val compare = compare + end) + + fun setToString s = let + (* simple insertion sort to lexically order the set *) + fun ins (tok, []) = [tok] + | ins (tok, tok'::toks) = (case lexCompare(tok, tok') + of LESS => tok::tok'::toks + | _ => tok'::ins(tok, toks) + (* end case *)) + val toks = Set.foldl ins [] s + in + String.concat[ + "{", String.concatWith "," (List.map toString toks), "}" + ] + end + + end diff --git a/ml-lpt/ml-antlr/tool/.cm/GUID/tool.sml b/ml-lpt/ml-antlr/tool/.cm/GUID/tool.sml new file mode 100644 index 0000000..8f947bf --- /dev/null +++ b/ml-lpt/ml-antlr/tool/.cm/GUID/tool.sml @@ -0,0 +1 @@ +guid-$/(ml-antlr-tool.cm):tool.sml-1714016086.363 diff --git a/ml-lpt/ml-antlr/tool/.cm/SKEL/tool.sml b/ml-lpt/ml-antlr/tool/.cm/SKEL/tool.sml new file mode 100644 index 0000000..6257f9d --- /dev/null +++ b/ml-lpt/ml-antlr/tool/.cm/SKEL/tool.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"Tools"ad"AntlrTool"h0 \ No newline at end of file diff --git a/ml-lpt/ml-antlr/tool/.cm/amd64-unix/tool.sml b/ml-lpt/ml-antlr/tool/.cm/amd64-unix/tool.sml new file mode 100644 index 0000000..c502ab8 Binary files /dev/null and b/ml-lpt/ml-antlr/tool/.cm/amd64-unix/tool.sml differ diff --git a/ml-lpt/ml-antlr/tool/ext.sml b/ml-lpt/ml-antlr/tool/ext.sml new file mode 100644 index 0000000..76fb530 --- /dev/null +++ b/ml-lpt/ml-antlr/tool/ext.sml @@ -0,0 +1,19 @@ +(* ext.sml + * + * Plugin for registering classifiers. + * + * Copyright (c) 2007 by The Fellowship of SML/NJ + * + * Author: Matthias Blume (blume@tti-c.org) + *) +structure AntlrGrmExt = struct + local + val suffixes = ["grm"] + val class = "ml-antlr" + fun sfx s = + Tools.registerClassifier + (Tools.stdSfxClassifier { sfx = s, class = class }) + in + val _ = app sfx suffixes + end +end diff --git a/ml-lpt/ml-antlr/tool/grm-ext.cm b/ml-lpt/ml-antlr/tool/grm-ext.cm new file mode 100644 index 0000000..aa3d75b --- /dev/null +++ b/ml-lpt/ml-antlr/tool/grm-ext.cm @@ -0,0 +1,13 @@ +(* + * Plugin for registering "grm" suffix that points to ml-antlr. + * + * (C) 2007 The Fellowship of SML/NJ. + * + * This should currently not be installed as it conflicts with + * the equally-named plugin library for legacy ml-yacc. + *) +Library + structure AntlrGrmExt +is + $smlnj/cm/tools.cm + ext.sml diff --git a/ml-lpt/ml-antlr/tool/ml-antlr-tool.cm b/ml-lpt/ml-antlr/tool/ml-antlr-tool.cm new file mode 100644 index 0000000..1718f82 --- /dev/null +++ b/ml-lpt/ml-antlr/tool/ml-antlr-tool.cm @@ -0,0 +1,10 @@ +(* + * The plugin library for ML-Antlr + * + * (C) 2007 The Fellowship of SML/NJ + *) +Library + structure AntlrTool +is + $smlnj/cm/tools.cm + tool.sml diff --git a/ml-lpt/ml-antlr/tool/tool.sml b/ml-lpt/ml-antlr/tool/tool.sml new file mode 100644 index 0000000..8817065 --- /dev/null +++ b/ml-lpt/ml-antlr/tool/tool.sml @@ -0,0 +1,16 @@ +(* + * Running ML-Antlr from CM. + * + * (C) 2007 The Fellowship of SML/NJ. + *) +structure AntlrTool = struct + val _ = Tools.registerStdShellCmdTool { + tool = "ML-Antlr", + class = "ml-antlr", + cmdStdPath = fn () => ("ml-antlr", []), + template = NONE, + extensionStyle = + Tools.EXTEND [("sml", SOME "sml", fn too => too)], + dflopts = [] + } +end diff --git a/ml-lpt/ml-ulex/.cm/GUID/lex-gen.sml b/ml-lpt/ml-ulex/.cm/GUID/lex-gen.sml new file mode 100644 index 0000000..4313a5a --- /dev/null +++ b/ml-lpt/ml-ulex/.cm/GUID/lex-gen.sml @@ -0,0 +1 @@ +guid-(sources.cm):lex-gen.sml-1714016111.306 diff --git a/ml-lpt/ml-ulex/.cm/GUID/main.sml b/ml-lpt/ml-ulex/.cm/GUID/main.sml new file mode 100644 index 0000000..c1872af --- /dev/null +++ b/ml-lpt/ml-ulex/.cm/GUID/main.sml @@ -0,0 +1 @@ +guid-(sources.cm):main.sml-1714016112.702 diff --git a/ml-lpt/ml-ulex/.cm/GUID/options.sml b/ml-lpt/ml-ulex/.cm/GUID/options.sml new file mode 100644 index 0000000..d63c3ec --- /dev/null +++ b/ml-lpt/ml-ulex/.cm/GUID/options.sml @@ -0,0 +1 @@ +guid-(sources.cm):options.sml-1714016110.002 diff --git a/ml-lpt/ml-ulex/.cm/GUID/reg-exp-sig.sml b/ml-lpt/ml-ulex/.cm/GUID/reg-exp-sig.sml new file mode 100644 index 0000000..b511a66 --- /dev/null +++ b/ml-lpt/ml-ulex/.cm/GUID/reg-exp-sig.sml @@ -0,0 +1 @@ +guid-(sources.cm):reg-exp-sig.sml-1714016110.032 diff --git a/ml-lpt/ml-ulex/.cm/GUID/reg-exp.sml b/ml-lpt/ml-ulex/.cm/GUID/reg-exp.sml new file mode 100644 index 0000000..9659b8b --- /dev/null +++ b/ml-lpt/ml-ulex/.cm/GUID/reg-exp.sml @@ -0,0 +1 @@ +guid-(sources.cm):reg-exp.sml-1714016110.045 diff --git a/ml-lpt/ml-ulex/.cm/SKEL/lex-gen.sml b/ml-lpt/ml-ulex/.cm/SKEL/lex-gen.sml new file mode 100644 index 0000000..b2fb0cc --- /dev/null +++ b/ml-lpt/ml-ulex/.cm/SKEL/lex-gen.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f5Cd"ListPair"RegExp"d"List"d"Options"d"AtomSet"Cd"Array2"d"LexSpec"d"Atom"LexOutputSpec"d"Vector"Nad"LexGen"jh4ad"RE"gp1ad"SIS"gp2d"SymSet"ad"LO"gp1ad"Map"j0gp1e"RedBlackMapFn"* \ No newline at end of file diff --git a/ml-lpt/ml-ulex/.cm/SKEL/main.sml b/ml-lpt/ml-ulex/.cm/SKEL/main.sml new file mode 100644 index 0000000..90d56eb --- /dev/null +++ b/ml-lpt/ml-ulex/.cm/SKEL/main.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f6d"SMLTblOutput"Cd"Match"d"OS"d"SMLFunOutput"d"MLULexInput"d"SMLofNJ"Cd"List"d"Int"d"General"d"Options"d"LexSpec"Cd"DotOutput"d"String"d"DumpOutput"d"MLLexInput"d"TextIO"Nad"Main"h3ad"RE"gp1d"RegExp"ad"Lex"gp1d"LexGen"ad"LO"gp1d"LexOutputSpec" \ No newline at end of file diff --git a/ml-lpt/ml-ulex/.cm/SKEL/options.sml b/ml-lpt/ml-ulex/.cm/SKEL/options.sml new file mode 100644 index 0000000..540b891 --- /dev/null +++ b/ml-lpt/ml-ulex/.cm/SKEL/options.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"String"ad"Options"h0 \ No newline at end of file diff --git a/ml-lpt/ml-ulex/.cm/SKEL/reg-exp-sig.sml b/ml-lpt/ml-ulex/.cm/SKEL/reg-exp-sig.sml new file mode 100644 index 0000000..1cce645 --- /dev/null +++ b/ml-lpt/ml-ulex/.cm/SKEL/reg-exp-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f2d"UTF8"d"Vector"ac"REG_EXP"h2ad"Sym"gp1c"INTERVAL_DOMAIN"ad"SymSet"gp1c"INTERVAL_SET" \ No newline at end of file diff --git a/ml-lpt/ml-ulex/.cm/SKEL/reg-exp.sml b/ml-lpt/ml-ulex/.cm/SKEL/reg-exp.sml new file mode 100644 index 0000000..b40e90f --- /dev/null +++ b/ml-lpt/ml-ulex/.cm/SKEL/reg-exp.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f5Cd"Char"d"List"d"Options"d"String"d"Vector"Nad"RegExp"jh6ad"W"gp1d"Word"CaSym"0aSymSet"jgp1gp1e"IntervalSetFn"ad"SIS"gp1ad"Map"jgp1e"RedBlackMapFn"ad"SISSet"jgp1e"RedBlackSetFn"Ngp1c"REG_EXP" \ No newline at end of file diff --git a/ml-lpt/ml-ulex/.cm/amd64-unix/lex-gen.sml b/ml-lpt/ml-ulex/.cm/amd64-unix/lex-gen.sml new file mode 100644 index 0000000..cd62f9b Binary files /dev/null and b/ml-lpt/ml-ulex/.cm/amd64-unix/lex-gen.sml differ diff --git a/ml-lpt/ml-ulex/.cm/amd64-unix/main.sml b/ml-lpt/ml-ulex/.cm/amd64-unix/main.sml new file mode 100644 index 0000000..f81f744 Binary files /dev/null and b/ml-lpt/ml-ulex/.cm/amd64-unix/main.sml differ diff --git a/ml-lpt/ml-ulex/.cm/amd64-unix/options.sml b/ml-lpt/ml-ulex/.cm/amd64-unix/options.sml new file mode 100644 index 0000000..a869d8b Binary files /dev/null and b/ml-lpt/ml-ulex/.cm/amd64-unix/options.sml differ diff --git a/ml-lpt/ml-ulex/.cm/amd64-unix/reg-exp-sig.sml b/ml-lpt/ml-ulex/.cm/amd64-unix/reg-exp-sig.sml new file mode 100644 index 0000000..6b21df6 Binary files /dev/null and b/ml-lpt/ml-ulex/.cm/amd64-unix/reg-exp-sig.sml differ diff --git a/ml-lpt/ml-ulex/.cm/amd64-unix/reg-exp.sml b/ml-lpt/ml-ulex/.cm/amd64-unix/reg-exp.sml new file mode 100644 index 0000000..b0baf0a Binary files /dev/null and b/ml-lpt/ml-ulex/.cm/amd64-unix/reg-exp.sml differ diff --git a/ml-lpt/ml-ulex/BackEnds/.cm/GUID/lex-output-spec.sml b/ml-lpt/ml-ulex/BackEnds/.cm/GUID/lex-output-spec.sml new file mode 100644 index 0000000..94af0f8 --- /dev/null +++ b/ml-lpt/ml-ulex/BackEnds/.cm/GUID/lex-output-spec.sml @@ -0,0 +1 @@ +guid-(sources.cm):BackEnds/lex-output-spec.sml-1714016110.450 diff --git a/ml-lpt/ml-ulex/BackEnds/.cm/GUID/output-sig.sml b/ml-lpt/ml-ulex/BackEnds/.cm/GUID/output-sig.sml new file mode 100644 index 0000000..b0a25be --- /dev/null +++ b/ml-lpt/ml-ulex/BackEnds/.cm/GUID/output-sig.sml @@ -0,0 +1 @@ +guid-(sources.cm):BackEnds/output-sig.sml-1714016110.516 diff --git a/ml-lpt/ml-ulex/BackEnds/.cm/SKEL/lex-output-spec.sml b/ml-lpt/ml-ulex/BackEnds/.cm/SKEL/lex-output-spec.sml new file mode 100644 index 0000000..df3b8ee --- /dev/null +++ b/ml-lpt/ml-ulex/BackEnds/.cm/SKEL/lex-output-spec.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f2d"RegExp"d"Vector"ad"LexOutputSpec"h0 \ No newline at end of file diff --git a/ml-lpt/ml-ulex/BackEnds/.cm/SKEL/output-sig.sml b/ml-lpt/ml-ulex/BackEnds/.cm/SKEL/output-sig.sml new file mode 100644 index 0000000..469808b --- /dev/null +++ b/ml-lpt/ml-ulex/BackEnds/.cm/SKEL/output-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"LexOutputSpec"ac"OUTPUT"h0 \ No newline at end of file diff --git a/ml-lpt/ml-ulex/BackEnds/.cm/amd64-unix/lex-output-spec.sml b/ml-lpt/ml-ulex/BackEnds/.cm/amd64-unix/lex-output-spec.sml new file mode 100644 index 0000000..37515ec Binary files /dev/null and b/ml-lpt/ml-ulex/BackEnds/.cm/amd64-unix/lex-output-spec.sml differ diff --git a/ml-lpt/ml-ulex/BackEnds/.cm/amd64-unix/output-sig.sml b/ml-lpt/ml-ulex/BackEnds/.cm/amd64-unix/output-sig.sml new file mode 100644 index 0000000..4152e6a Binary files /dev/null and b/ml-lpt/ml-ulex/BackEnds/.cm/amd64-unix/output-sig.sml differ diff --git a/ml-lpt/ml-ulex/BackEnds/Dot/.cm/GUID/dot-output.sml b/ml-lpt/ml-ulex/BackEnds/Dot/.cm/GUID/dot-output.sml new file mode 100644 index 0000000..93169a1 --- /dev/null +++ b/ml-lpt/ml-ulex/BackEnds/Dot/.cm/GUID/dot-output.sml @@ -0,0 +1 @@ +guid-(sources.cm):BackEnds/Dot/dot-output.sml-1714016111.381 diff --git a/ml-lpt/ml-ulex/BackEnds/Dot/.cm/SKEL/dot-output.sml b/ml-lpt/ml-ulex/BackEnds/Dot/.cm/SKEL/dot-output.sml new file mode 100644 index 0000000..57dc628 --- /dev/null +++ b/ml-lpt/ml-ulex/BackEnds/Dot/.cm/SKEL/dot-output.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f5Cd"List"d"Int"d"String"d"TextIO"d"Vector"Nad"DotOutput"jh3ad"RE"gp1d"RegExp"ad"Lex"gp1d"LexGen"ad"LO"gp1d"LexOutputSpec"gp1c"OUTPUT" \ No newline at end of file diff --git a/ml-lpt/ml-ulex/BackEnds/Dot/.cm/amd64-unix/dot-output.sml b/ml-lpt/ml-ulex/BackEnds/Dot/.cm/amd64-unix/dot-output.sml new file mode 100644 index 0000000..d3c3c6e Binary files /dev/null and b/ml-lpt/ml-ulex/BackEnds/Dot/.cm/amd64-unix/dot-output.sml differ diff --git a/ml-lpt/ml-ulex/BackEnds/Dot/dot-output.sml b/ml-lpt/ml-ulex/BackEnds/Dot/dot-output.sml new file mode 100644 index 0000000..c29e33e --- /dev/null +++ b/ml-lpt/ml-ulex/BackEnds/Dot/dot-output.sml @@ -0,0 +1,109 @@ +(* dot-output.sml + * + * COPYRIGHT (c) 2005 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (adrassi@gmail.com) + * All rights reserved. + * + * Produce a .dot file from a DFA. + * (See www.graphviz.org for details about DOT) + *) + +structure DotOutput : OUTPUT = + struct + + structure RE = RegExp + structure Lex = LexGen + structure LO = LexOutputSpec + + datatype attribute = ATTR of string * string + datatype node = NODE of string * attribute list + datatype di_edge = EDGE of string * string * attribute list + datatype di_graph = GRAPH of string * node list * di_edge list * attribute list + + fun replBS str = + String.translate + (fn #"\\" => "\\\\" | #"\"" => "\\\"" | c => String.str c) + str + + fun writeGraph (out, graph) = let + (* output a string *) + fun wr s = TextIO.output (out, s) + (* output a string list *) + fun wrs ss = wr (String.concat ss) + (* indent to some level *) + fun wrIndent 0 = () + | wrIndent lvl = (wr " "; wrIndent (lvl - 1)) + (* apply output functions, indenting each time *) + fun app indent f list = + List.app (fn x => (wrIndent indent; f x)) list + fun wrAttr (ATTR (name, value)) = wrs ([ + "[ ", name, " = \"", value, "\" ]", "\n" + ]) + fun wrNode (NODE (name, atts)) = + (wr name; + wr "\n"; + app 2 wrAttr atts) + fun wrEdge (EDGE (no1, no2, atts)) = + (wrs ([no1, " -> ", no2, "\n"]); + app 2 wrAttr atts) + fun wrGraphAttr attr = + (wr "graph\n"; + wrIndent 2; + wrAttr attr) + fun wrGraph (GRAPH (name, nodes, edges, atts)) = + (wrs (["digraph ", name, " {\n"]); + app 1 wrGraphAttr atts; + app 1 wrNode nodes; + app 1 wrEdge edges; + wr "}") + in wrGraph graph + end + + fun mkGraph states = let + (* node id -> node name *) + fun name id = "Q" ^ Int.toString id + fun mkNode (LO.State{id, label, final = [], ...}) = + NODE (name id, [ATTR ("shape", "circle")]) + | mkNode (LO.State{id, label, final = i::_, ...}) = + NODE (name id, + [ATTR ("shape", "doublecircle"), + ATTR ("label", (name id) ^ "/" ^ (Int.toString i))]) + fun mkEdge fromID (symSet, LO.State{id, ...}) = + EDGE (name fromID, name id, + [ATTR ("label", replBS (RE.toString (RE.mkSymSet symSet)))]) + fun mkEdges (LO.State{id, next, ...}) = + List.map (mkEdge id) (List.rev (!next)) + fun mkRule (i, re) = String.concat ( + ["Rule ", + Int.toString i, + ": ", + replBS (RE.toString re), + "\\n"]) + (* node for input REs *) + fun mkRules res = + NODE ("Rules", + [ATTR ("label", Vector.foldli + (fn (i, r, s) => s ^ (mkRule (i, r))) + "" res), + ATTR ("shape", "plaintext"), + ATTR ("fontname", "Courier")]) + val nodes' = List.map mkNode states + val nodes = nodes' + val edges = List.concat (List.map mkEdges states) + in GRAPH ("DFA", nodes, edges, + [ATTR ("size", "7,10"), + ATTR ("rankdir", "LR")]) + end + + fun output (spec, fname) = let + val LO.Spec {dfa, startStates, ...} = spec + val out = TextIO.openOut (fname ^ ".dot") + val graph = mkGraph dfa + in + print (" writing " ^ fname ^ ".dot\n"); + writeGraph (out, graph) + before TextIO.closeOut out + end + + end diff --git a/ml-lpt/ml-ulex/BackEnds/Dump/.cm/GUID/dump-output.sml b/ml-lpt/ml-ulex/BackEnds/Dump/.cm/GUID/dump-output.sml new file mode 100644 index 0000000..5b17c71 --- /dev/null +++ b/ml-lpt/ml-ulex/BackEnds/Dump/.cm/GUID/dump-output.sml @@ -0,0 +1 @@ +guid-(sources.cm):BackEnds/Dump/dump-output.sml-1714016111.417 diff --git a/ml-lpt/ml-ulex/BackEnds/Dump/.cm/SKEL/dump-output.sml b/ml-lpt/ml-ulex/BackEnds/Dump/.cm/SKEL/dump-output.sml new file mode 100644 index 0000000..4df8e18 --- /dev/null +++ b/ml-lpt/ml-ulex/BackEnds/Dump/.cm/SKEL/dump-output.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f4d"List"d"Int"d"TextIO"d"Vector"ad"DumpOutput"jh2ad"RE"gp1d"RegExp"ad"LO"gp1d"LexOutputSpec"gp1c"OUTPUT" \ No newline at end of file diff --git a/ml-lpt/ml-ulex/BackEnds/Dump/.cm/amd64-unix/dump-output.sml b/ml-lpt/ml-ulex/BackEnds/Dump/.cm/amd64-unix/dump-output.sml new file mode 100644 index 0000000..2b6a161 Binary files /dev/null and b/ml-lpt/ml-ulex/BackEnds/Dump/.cm/amd64-unix/dump-output.sml differ diff --git a/ml-lpt/ml-ulex/BackEnds/Dump/dump-output.sml b/ml-lpt/ml-ulex/BackEnds/Dump/dump-output.sml new file mode 100644 index 0000000..a22d5ae --- /dev/null +++ b/ml-lpt/ml-ulex/BackEnds/Dump/dump-output.sml @@ -0,0 +1,54 @@ +(* dump-output.sml + * + * COPYRIGHT (c) 2005 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (adrassi@gmail.com) + * All rights reserved. + * + * Dump (to stderr) the complete DFA + *) + +structure DumpOutput : OUTPUT = + struct + + structure RE = RegExp + structure LO = LexOutputSpec + + fun pr s = TextIO.output(TextIO.stdOut, s) + fun prl ss = pr(concat ss) + + fun nameOf (LO.State{id, ...}) = "Q" ^ Int.toString id + + fun prState (s as LO.State{id, label, final, next, ...}) = let + val name = (case final + of [] => nameOf s + | id::_ => concat[nameOf s, " (act ", Int.toString id, ")"] + (* end case *)) + fun prEdge (symSet, st) = prl[ + " -- ", RE.toString (RE.mkSymSet symSet), " --> ", nameOf st, "\n" + ] + fun prRE re = prl[" ", RE.toString re, "\n"] + in + prl[name, ": "(*, RE.toString label*), "\n"]; + Vector.app prRE label; + List.app prEdge (!next); + pr "\n" + end + + fun dumpDFA states = + (List.app prState states; + pr (Int.toString (List.length states)); + pr " states\n\n") + + + fun outSS (label, ss) = prl ["Start state: ", label, " => ", nameOf ss, "\n"] + + fun output (spec, _) = let + val LO.Spec {dfa, startStates, ...} = spec + in + dumpDFA dfa; + pr "\n"; + List.app outSS startStates + end + + end diff --git a/ml-lpt/ml-ulex/BackEnds/Match/.cm/GUID/match.sml b/ml-lpt/ml-ulex/BackEnds/Match/.cm/GUID/match.sml new file mode 100644 index 0000000..cdd1065 --- /dev/null +++ b/ml-lpt/ml-ulex/BackEnds/Match/.cm/GUID/match.sml @@ -0,0 +1 @@ +guid-(sources.cm):BackEnds/Match/match.sml-1714016110.550 diff --git a/ml-lpt/ml-ulex/BackEnds/Match/.cm/SKEL/match.sml b/ml-lpt/ml-ulex/BackEnds/Match/.cm/SKEL/match.sml new file mode 100644 index 0000000..a7d55dd --- /dev/null +++ b/ml-lpt/ml-ulex/BackEnds/Match/.cm/SKEL/match.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f7d"Word"d"Char"CRegExp"d"List"d"String"d"TextIO"d"Vector"Nad"Match"jh2ad"SIS"gp2d"SymSet"ad"LO"gp1d"LexOutputSpec"gp1c"OUTPUT" \ No newline at end of file diff --git a/ml-lpt/ml-ulex/BackEnds/Match/.cm/amd64-unix/match.sml b/ml-lpt/ml-ulex/BackEnds/Match/.cm/amd64-unix/match.sml new file mode 100644 index 0000000..b90a3eb Binary files /dev/null and b/ml-lpt/ml-ulex/BackEnds/Match/.cm/amd64-unix/match.sml differ diff --git a/ml-lpt/ml-ulex/BackEnds/Match/match.sml b/ml-lpt/ml-ulex/BackEnds/Match/match.sml new file mode 100644 index 0000000..23c4494 --- /dev/null +++ b/ml-lpt/ml-ulex/BackEnds/Match/match.sml @@ -0,0 +1,58 @@ +(* match.sml + * + * COPYRIGHT (c) 2005 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (adrassi@gmail.com) + * All rights reserved. + * + * A simple match "backend" that runs the produced state machine directly + * on stdin. Treats end of line as end of input. Note that a match only + * occurs if the machine is in an accepting state after consuming the + * complete input; in particular, the input is meant to represent a single + * token, and the machine does not restart until the end of input. + * + **************************************************************** + * NOTE! NOT UNICODE COMPATIBLE + **************************************************************** + *) + +structure Match : OUTPUT = + struct + + structure SIS = RegExp.SymSet + structure LO = LexOutputSpec + + fun match (LO.State{id, label, final, next, ...}, []) = final + | match (LO.State{id, label, final, next, ...}, sym::r) = let + fun goto [] = [] + | goto ((syms, s)::r') = + if SIS.member (syms, sym) + then match(s, r) + else goto r' + in + goto (!next) + end + + fun matchLoop states = (case TextIO.inputLine (TextIO.stdIn) + of NONE => () + | SOME "\n" => () + | SOME s => let + val chars = List.rev (List.tl (List.rev (String.explode s))) + val syms = List.map (Word.fromInt o Char.ord) chars + val q0 as LO.State {label, ...} = List.hd states + val _ = case match (q0, syms) + of [] => print "-- No match --\n" + | i::_ => + (print "-- Match: "; + print (RegExp.toString (Vector.sub (label, i))); + print " --\n") + in + (* continue I/O loop *) + matchLoop states + end + (* end case *)) + + fun output (LO.Spec {dfa, ...}, _) = + matchLoop(dfa) + + end diff --git a/ml-lpt/ml-ulex/BackEnds/SML/.cm/GUID/ml.sml b/ml-lpt/ml-ulex/BackEnds/SML/.cm/GUID/ml.sml new file mode 100644 index 0000000..afcf2fa --- /dev/null +++ b/ml-lpt/ml-ulex/BackEnds/SML/.cm/GUID/ml.sml @@ -0,0 +1 @@ +guid-(sources.cm):BackEnds/SML/ml.sml-1714016110.338 diff --git a/ml-lpt/ml-ulex/BackEnds/SML/.cm/GUID/sml-fun-output.sml b/ml-lpt/ml-ulex/BackEnds/SML/.cm/GUID/sml-fun-output.sml new file mode 100644 index 0000000..6bb176e --- /dev/null +++ b/ml-lpt/ml-ulex/BackEnds/SML/.cm/GUID/sml-fun-output.sml @@ -0,0 +1 @@ +guid-(sources.cm):BackEnds/SML/sml-fun-output.sml-1714016110.563 diff --git a/ml-lpt/ml-ulex/BackEnds/SML/.cm/GUID/sml-output-support.sml b/ml-lpt/ml-ulex/BackEnds/SML/.cm/GUID/sml-output-support.sml new file mode 100644 index 0000000..e9cce8c --- /dev/null +++ b/ml-lpt/ml-ulex/BackEnds/SML/.cm/GUID/sml-output-support.sml @@ -0,0 +1 @@ +guid-(sources.cm):BackEnds/SML/sml-output-support.sml-1714016110.478 diff --git a/ml-lpt/ml-ulex/BackEnds/SML/.cm/GUID/sml-tbl-output.sml b/ml-lpt/ml-ulex/BackEnds/SML/.cm/GUID/sml-tbl-output.sml new file mode 100644 index 0000000..1e1bed0 --- /dev/null +++ b/ml-lpt/ml-ulex/BackEnds/SML/.cm/GUID/sml-tbl-output.sml @@ -0,0 +1 @@ +guid-(sources.cm):BackEnds/SML/sml-tbl-output.sml-1714016110.518 diff --git a/ml-lpt/ml-ulex/BackEnds/SML/.cm/GUID/smlnj-templates.sml b/ml-lpt/ml-ulex/BackEnds/SML/.cm/GUID/smlnj-templates.sml new file mode 100644 index 0000000..6c630fb --- /dev/null +++ b/ml-lpt/ml-ulex/BackEnds/SML/.cm/GUID/smlnj-templates.sml @@ -0,0 +1 @@ +guid-(sources.cm):BackEnds/SML/smlnj-templates.sml-1714016110.475 diff --git a/ml-lpt/ml-ulex/BackEnds/SML/.cm/SKEL/ml.sml b/ml-lpt/ml-ulex/BackEnds/SML/.cm/SKEL/ml.sml new file mode 100644 index 0000000..59cf8dd --- /dev/null +++ b/ml-lpt/ml-ulex/BackEnds/SML/.cm/SKEL/ml.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1RegExp"ad"ML"h1baPP"gp1d"TextIOPP"f2 \ No newline at end of file diff --git a/ml-lpt/ml-ulex/BackEnds/SML/.cm/SKEL/sml-fun-output.sml b/ml-lpt/ml-ulex/BackEnds/SML/.cm/SKEL/sml-fun-output.sml new file mode 100644 index 0000000..66483b3 --- /dev/null +++ b/ml-lpt/ml-ulex/BackEnds/SML/.cm/SKEL/sml-fun-output.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ad"SMLFunOutput"jh3egp1d"SMLOutputSupport"f7d"ML"d"LO"Cd"ListMergeSort"d"ListPair"d"TextIOPP"d"SIS"d"List"Cd"Int"d"Options"d"ExpandFile"d"TextIO"d"Vector"Nad"SCC"jh0gp1e"GraphSCCFn"gp1c"OUTPUT" \ No newline at end of file diff --git a/ml-lpt/ml-ulex/BackEnds/SML/.cm/SKEL/sml-output-support.sml b/ml-lpt/ml-ulex/BackEnds/SML/.cm/SKEL/sml-output-support.sml new file mode 100644 index 0000000..df4310c --- /dev/null +++ b/ml-lpt/ml-ulex/BackEnds/SML/.cm/SKEL/sml-output-support.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f6d"ML"Cd"ListPair"d"Int"d"Options"d"String"d"TextIO"Nad"SMLOutputSupport"h5CaRE"gp1RegExp"aSym"gp2"ad"SIS"gp2d"SymSet"ad"LO"gp1d"LexOutputSpec"egp1d"Templates"N \ No newline at end of file diff --git a/ml-lpt/ml-ulex/BackEnds/SML/.cm/SKEL/sml-tbl-output.sml b/ml-lpt/ml-ulex/BackEnds/SML/.cm/SKEL/sml-tbl-output.sml new file mode 100644 index 0000000..7eac944 --- /dev/null +++ b/ml-lpt/ml-ulex/BackEnds/SML/.cm/SKEL/sml-tbl-output.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ad"SMLTblOutput"jh2egp1d"SMLOutputSupport"f9d"ML"d"LO"d"StringCvt"d"TextIOPP"Cd"SIS"d"Word"d"Char"d"List"d"Int"Cd"Options"d"String"d"ExpandFile"d"TextIO"d"Vector"Ngp1c"OUTPUT" \ No newline at end of file diff --git a/ml-lpt/ml-ulex/BackEnds/SML/.cm/SKEL/smlnj-templates.sml b/ml-lpt/ml-ulex/BackEnds/SML/.cm/SKEL/smlnj-templates.sml new file mode 100644 index 0000000..71db7cd --- /dev/null +++ b/ml-lpt/ml-ulex/BackEnds/SML/.cm/SKEL/smlnj-templates.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"ExpandFile"ad"Templates"h0 \ No newline at end of file diff --git a/ml-lpt/ml-ulex/BackEnds/SML/.cm/amd64-unix/ml.sml b/ml-lpt/ml-ulex/BackEnds/SML/.cm/amd64-unix/ml.sml new file mode 100644 index 0000000..31e5c7f Binary files /dev/null and b/ml-lpt/ml-ulex/BackEnds/SML/.cm/amd64-unix/ml.sml differ diff --git a/ml-lpt/ml-ulex/BackEnds/SML/.cm/amd64-unix/sml-fun-output.sml b/ml-lpt/ml-ulex/BackEnds/SML/.cm/amd64-unix/sml-fun-output.sml new file mode 100644 index 0000000..444b2b8 Binary files /dev/null and b/ml-lpt/ml-ulex/BackEnds/SML/.cm/amd64-unix/sml-fun-output.sml differ diff --git a/ml-lpt/ml-ulex/BackEnds/SML/.cm/amd64-unix/sml-output-support.sml b/ml-lpt/ml-ulex/BackEnds/SML/.cm/amd64-unix/sml-output-support.sml new file mode 100644 index 0000000..7252f92 Binary files /dev/null and b/ml-lpt/ml-ulex/BackEnds/SML/.cm/amd64-unix/sml-output-support.sml differ diff --git a/ml-lpt/ml-ulex/BackEnds/SML/.cm/amd64-unix/sml-tbl-output.sml b/ml-lpt/ml-ulex/BackEnds/SML/.cm/amd64-unix/sml-tbl-output.sml new file mode 100644 index 0000000..9c37649 Binary files /dev/null and b/ml-lpt/ml-ulex/BackEnds/SML/.cm/amd64-unix/sml-tbl-output.sml differ diff --git a/ml-lpt/ml-ulex/BackEnds/SML/.cm/amd64-unix/smlnj-templates.sml b/ml-lpt/ml-ulex/BackEnds/SML/.cm/amd64-unix/smlnj-templates.sml new file mode 100644 index 0000000..3e45b44 Binary files /dev/null and b/ml-lpt/ml-ulex/BackEnds/SML/.cm/amd64-unix/smlnj-templates.sml differ diff --git a/ml-lpt/ml-ulex/BackEnds/SML/ml.sml b/ml-lpt/ml-ulex/BackEnds/SML/ml.sml new file mode 100644 index 0000000..3190ded --- /dev/null +++ b/ml-lpt/ml-ulex/BackEnds/SML/ml.sml @@ -0,0 +1,258 @@ +(* ml.sml + * + * COPYRIGHT (c) 1999 Bell Labs, Lucent Technologies + * (Used and modified with permission) + * Aaron Turon (http://www.cs.uchicago.edu/~adrassi) + * + * ML core language representation and pretty-printing + *) + +structure ML = + struct + + datatype raw_ml = Raw of ml_token list + + and ml_token = Tok of string + + datatype cmp_op = LT | GT | EQ | LEQ | GEQ + datatype bool_op = AND | OR + + (* a subset of ML expressions and patterns that we use to represent the + * match DFA + *) + datatype ml_exp + = ML_Var of string + | ML_Cmp of (cmp_op * ml_exp * ml_exp) + | ML_Bool of (bool_op * ml_exp * ml_exp) + | ML_Case of ml_exp * (ml_pat * ml_exp) list + | ML_If of ml_exp * ml_exp * ml_exp + | ML_App of (string * ml_exp list) + | ML_Let of (string * ml_exp * ml_exp) + | ML_Fun of (string * string list * ml_exp * ml_exp) + | ML_Seq of ml_exp list + | ML_Tuple of ml_exp list + | ML_List of ml_exp list + | ML_RefGet of ml_exp + | ML_RefPut of ml_exp * ml_exp + | ML_Raw of ml_token list + | ML_NewGroup of ml_exp + + and ml_pat + = ML_Wild + | ML_VarPat of string + | ML_IntPat of RegExp.Sym.point + | ML_ConPat of string * ml_pat list + + local + structure PP = TextIOPP + in + fun ppML (ppStrm, e) = let + fun str s = PP.string ppStrm s + fun sp () = PP.space ppStrm 1 + fun nl () = PP.newline ppStrm + fun hbox () = PP.openHBox ppStrm + fun vbox () = PP.openVBox ppStrm (PP.Abs 2) + fun close () = PP.closeBox ppStrm + fun letBody (true, pp) = ( + nl(); + str "in"; + vbox(); nl(); pp(); close(); + nl(); + str "end") + | letBody (false, pp) = pp() + fun ppExp (inLet, prevFn, e) = (case e + of (ML_Var x) => letBody(inLet, fn () => str x) + | (ML_Cmp (cop, e1, e2)) => letBody(inLet, fn () => ( + ppExp' e1; + sp(); + str (case cop + of LT => "<" + | GT => ">" + | EQ => "=" + | LEQ => "<=" + | GEQ => ">="); + sp(); + ppExp' e2)) + | (ML_Bool (bop, e1, e2)) => letBody(inLet, fn () => ( + ppExp' e1; + sp(); + str (case bop + of AND => "andalso" + | OR => "orelse"); + sp(); + ppExp' e2)) + | (ML_Case(arg, pl)) => let + fun doCases (_, []) = () + | doCases (isFirst, (p, e)::r) = ( + nl(); +(* NOTE: the following seems to trigger a bug in the PP library (bad indent) *) + PP.openHOVBox ppStrm (PP.Abs 6); + hbox(); + if isFirst + then (sp(); str "of") + else (PP.space ppStrm 2; str "|"); + sp(); + ppPat p; sp(); str "=>"; + close(); + sp(); + hbox(); + PP.openVBox ppStrm (PP.Abs 0); + ppExp' e; + close(); + close(); + close(); + doCases (false, r)) + in + letBody(inLet, fn () => ( + hbox(); + str "(case"; sp(); str "("; ppExp' arg; str ")"; + close(); + doCases (true, pl); + nl(); str "(* end case *))")) + end + | (ML_App(f, args)) => letBody(inLet, fn () => ( + hbox(); + str f; str "("; + case args + of [] => () + | [e] => ppExp' e + | (e::r) => ( + ppExp' e; app (fn e => (str ","; sp(); ppExp' e)) r) + (* end case *); + str ")"; + close())) + | (ML_If(e1, e2, e3 as ML_If _)) => letBody(inLet, fn () => ( + PP.openVBox ppStrm (PP.Abs 0); + vbox(); + hbox(); str "if"; sp(); ppExp' e1; close(); nl(); + hbox(); str "then"; sp(); + vbox(); ppExp' e2; close(); + close(); + close(); nl(); + hbox(); str "else"; sp(); + ppExp' e3; + close(); + close())) + | (ML_If(e1, e2, e3)) => letBody(inLet, fn () => ( + vbox(); + hbox(); str "if"; sp(); ppExp' e1; close(); nl(); + hbox(); str "then"; sp(); + vbox(); ppExp' e2; close(); + close(); nl(); + hbox(); str "else"; sp(); + vbox(); ppExp' e3; close(); + close(); + close())) + | (ML_Let(x, e1, e2)) => let + fun pp () = ( + nl(); + hbox(); + str "val"; sp(); str x; sp(); str "="; sp(); + ppExp' e1; + close(); + ppExp (true, false, e2)) + in + if inLet + then pp() + else ( + str "let"; + PP.openVBox ppStrm (PP.Abs 0); + pp(); + close()) + end + | (ML_Fun(f, params, body, e)) => let + fun pp prefix = ( + nl(); + hbox(); + str prefix; sp(); str f; sp(); + str "("; + case params + of [] => () + | [x] => str x + | (x::r) => ( + str x; app (fn x => (str ","; sp(); str x)) r) + (* end case *); + str ")"; sp(); str "="; sp(); + PP.openVBox ppStrm (PP.Abs 6); + ppExp' body; + close(); + close(); + ppExp (true, true, e)) + in + if inLet + then if prevFn then pp "and" else pp "fun" + else ( + PP.openVBox ppStrm (PP.Abs 0); + str "let"; + pp "fun"; + close()) + end + | (ML_Seq[]) => letBody(inLet, fn () => str "()") + | (ML_Seq[e]) => ppExp(inLet, prevFn, e) + | (ML_Seq(e::r)) => let + fun pp () = ( + ppExp' e; + app (fn e => (str ";"; sp(); ppExp' e)) r) + in + if inLet + then ( + nl(); str "in"; + PP.openBox ppStrm (PP.Abs 2); + nl(); pp(); + close(); + nl(); + str "end") + else ( + PP.openBox ppStrm (PP.Abs 0); + str "("; pp(); str ")"; + close()) + end + | (ML_Tuple[]) => letBody(inLet, fn () => str "()") + | (ML_Tuple(e::r)) => letBody (inLet, fn () => ( + PP.openBox ppStrm (PP.Abs 2); + str "("; + ppExp' e; + app (fn e => (str ","; sp(); ppExp' e)) r; + str ")"; + close())) + | (ML_List[]) => letBody(inLet, fn () => str "[]") + | (ML_List(e::r)) => letBody (inLet, fn () => ( + PP.openBox ppStrm (PP.Abs 2); + str "["; + ppExp' e; + app (fn e => (str ","; sp(); ppExp' e)) r; + str "]"; + close())) + | (ML_RefGet e) => letBody(inLet, fn () => ( + str "!("; + ppExp' e; + str ")")) + | (ML_RefPut (e1, e2)) => letBody(inLet, fn () => ( + ppExp' e1; + str " := "; + ppExp' e2)) + | (ML_Raw toks) => letBody(inLet, fn () => ( + hbox(); app (fn (Tok s) => str s) toks; close())) + | (ML_NewGroup e) => ppExp(inLet, false, e) + (* end case *)) + and ppExp' e = ppExp(false, false, e) + and ppPat p = let + fun pp (ML_Wild) = str "_" + | pp (ML_VarPat x) = str x + | pp (ML_IntPat n) = str(RegExp.symToString n) + | pp (ML_ConPat(c, [])) = str c + | pp (ML_ConPat(c, [p])) = ( + str c; str "("; pp p; str ")") + | pp (ML_ConPat(c, p::r)) = ( + str c; str "("; pp p; + app (fn p => (str ","; pp p)) r; + str ")") + in + hbox(); pp p; close() + end + in + ppExp (false, false, e) + end + end (* local *) + + end diff --git a/ml-lpt/ml-ulex/BackEnds/SML/mlton-templates.sml b/ml-lpt/ml-ulex/BackEnds/SML/mlton-templates.sml new file mode 100644 index 0000000..5273e2f --- /dev/null +++ b/ml-lpt/ml-ulex/BackEnds/SML/mlton-templates.sml @@ -0,0 +1,15 @@ +(* mlton-templates.sml + * + * COPYRIGHT (c) 2009 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * In MLton, the templates live in files that we generate at compile time. + *) + +structure Templates = + struct + + val lexTemplate = LexTemplate.template + val ulexTemplate = ULexTemplate.template + + end diff --git a/ml-lpt/ml-ulex/BackEnds/SML/sml-fun-output.sml b/ml-lpt/ml-ulex/BackEnds/SML/sml-fun-output.sml new file mode 100644 index 0000000..d951fd9 --- /dev/null +++ b/ml-lpt/ml-ulex/BackEnds/SML/sml-fun-output.sml @@ -0,0 +1,208 @@ +(* sml-fun-output.sml + * + * COPYRIGHT (c) 2005 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (adrassi@gmail.com) + * All rights reserved. + * + * Code generation for SML, using mutually recursive functions for states. + *) + +structure SMLFunOutput : OUTPUT = + struct + + open SMLOutputSupport + + (* transition interval representation *) + datatype transition_interval = TI of SIS.interval * int * ml_exp + fun intervalOf (TI (i, t, e)) = i + fun tagOf (TI (i, t, e)) = t + fun actionOf (TI (i, t, e)) = e + fun sameTag (TI (_, t1, _), TI (_, t2, _)) = t1 = t2 + fun singleton (TI ((i, j), _, _)) = i = j + + (* generate code for transitions: generate a hard-coded binary + * search on accepting characters + *) + fun mkTrans ([], _) = raise Fail "(BUG) SMLFunOutput: alphabet not covered" + | mkTrans ([t], _) = actionOf t + | mkTrans ([t1, t2], _) = + if sameTag (t1, t2) then actionOf t1 + else let + val (_, t1end) = intervalOf t1 + val (t2start, _) = intervalOf t2 + in + if singleton t1 then + ML_If (ML_Cmp (ML.EQ, inpVar, ML_Sym t1end), + actionOf t1, + actionOf t2) + else if singleton t2 then + ML_If (ML_Cmp (ML.EQ, inpVar, ML_Sym t2start), + actionOf t2, + actionOf t1) + else + ML_If (ML_Cmp (ML.LEQ, inpVar, ML_Sym t1end), + actionOf t1, + actionOf t2) + end + | mkTrans (ts, len) = let + val lh = len div 2 + fun split (ls, 0, l1) = (List.rev l1, ls) + | split (l::ls, cnt, l1) = split (ls, cnt-1, l::l1) + | split _ = raise Fail "(BUG) SMLFunOutput: split failed" + val (ts1, ts2) = split (ts, lh, []) + val (ts2start, ts2end) = intervalOf (List.hd ts2) + val (ts2', ts2len) = if ts2start = ts2end + then (List.tl ts2, len - lh - 1) + else (ts2, len - lh) + (* we want to take advantage of the special case when + * len = 3 and hd ts2 is a singleton. this case often + * occurs when we have an arrow for a single character. + *) + val elseClause = + if lh = 1 andalso ts2len = 1 + then mkTrans ([List.hd ts1, List.hd ts2'], 2) + else ML_If (ML_Cmp (ML.LT, inpVar, ML_Sym ts2start), + mkTrans (ts1, lh), + mkTrans (ts2', ts2len)) + in + ML_If (ML_Cmp (ML.EQ, inpVar, ML_Sym ts2start), + actionOf (List.hd ts2), + elseClause) + end + + fun mkState (arg, eofRules, actionVec) (s, k) = let + val LO.State {id, startState, label, final, next} = s + fun addMatch (i, lastMatch) = let + val lastMatch' = if hasREJECT (Vector.sub (actionVec, i)) + then lastMatch + else ML_Var "yyNO_MATCH" + in + ML_App ("yyMATCH", + [ML_Var "strm", + ML_Var (actName i), + lastMatch']) + end + val (curMatch, nextMatches) = (case final + of [] => (NONE, []) + | f::fs => (SOME f, fs) + (* end case *)) + val lastMatch = List.foldr addMatch (ML_Var "lastMatch") nextMatches + (* collect all valid transition symbols *) + val labels = List.foldl SIS.union SIS.empty (List.map #1 (!next)) + (* pair transition intervals with associated actions/transitions *) + val newFinal = (case curMatch + of SOME j => addMatch (j, lastMatch) + | NONE => lastMatch + (* end case *)) + fun arrows (syms, s) = + mapInt + (fn i => TI (i, idOf s, + ML_App (nameOf s, [ML_Var "strm'", newFinal]))) + syms + val TIs = List.map arrows (!next) + val errAct' = + (case curMatch + of SOME j => + ML_App (actName j, + [ML_Var "strm", + if hasREJECT (Vector.sub (actionVec, j)) + then lastMatch + else ML_Var "yyNO_MATCH"]) + | NONE => ML_App ("yystuck", [lastMatch]) + (* end case *)) + (* if start state, check for eof *) + val errAct = if startState + then mkEOF (eofRules, errAct') + else errAct' + (* error transitions = complement(valid transitions) *) + val error = SIS.complement labels + val errTIs = mapInt (fn i => TI (i, ~1, errAct)) error + (* the arrows represent intervals that partition the entire + * alphabet, with each interval mapped to some transition or + * action. we sort the intervals by their smallest member. + *) + fun gt (a, b) = (#1 (intervalOf a)) > (#1 (intervalOf b)) + val sorted = ListMergeSort.sort gt (List.concat (errTIs :: TIs)) + (* now we want to find adjacent partitions with the same + * action, and merge their intervals + *) + fun merge [] = [] + | merge [t] = [t] + | merge (t1::t2::ts) = + if sameTag (t1, t2) then let + val TI ((i, _), tag, act) = t1 + val TI ((_, j), _, _ ) = t2 + val t = TI ((i, j), tag, act) + in + merge (t::ts) + end + else + t1::(merge (t2::ts)) + val merged = merge sorted + (* create the transition code, which at least has an error transition *) + val trans = mkTrans(merged, List.length merged) + (* create the input code *) + val getInp = + ML_Case (ML_App ("yygetc", [ML_Var "strm"]), + [(ML_ConPat ("NONE", []), errAct), + (ML_ConPat ("SOME", [ML_VarPat (inp ^ ", strm'")]), + trans)]) + in + ML_Fun (nameOf s, ["strm", "lastMatch : yymatch"], getInp, k) + end + + structure SCC = GraphSCCFn ( + struct + type ord_key = LO.dfa_state + fun compare (LO.State{id = id1, ...}, LO.State{id = id2, ...}) = + Int.compare (id1, id2) + end) + + fun mkStates (arg, eofRules, actions, dfa, startStates, k) = let + fun follow (LO.State {next, ...}) = + #2 (ListPair.unzip (!next)) + val scc = SCC.topOrder' { roots = startStates, follow = follow } + val mkState' = mkState (arg, eofRules, actions) + fun mkGrp (SCC.SIMPLE state, k) = ML_NewGroup (mkState' (state, k)) + | mkGrp (SCC.RECURSIVE states, k) = + ML_NewGroup (List.foldr mkState' k states) + in + List.foldl mkGrp k scc + end + + fun lexerHook spec strm = let + val LO.Spec {arg, actions, dfa, startStates, eofRules, ...} = spec + fun matchSS (label, state) = + (ML_ConPat (label, []), + ML_App (nameOf state, + [ML_RefGet (ML_Var "yystrm"), + ML_Var "yyNO_MATCH"])) + val innerExp = ML_Case (ML_RefGet (ML_Var "yyss"), + List.map matchSS startStates) + val statesExp = mkStates + (arg, eofRules, actions, dfa, + #2 (ListPair.unzip startStates), innerExp) + val lexerExp = Vector.foldri mkAction statesExp actions + val ppStrm = TextIOPP.openOut {dst = strm, wid = 80} + in + ML.ppML (ppStrm, lexerExp) + end + + fun tableHook _ strm = TextIO.output (strm, "Vector.fromList []"); + + fun output (spec, fname) = + ExpandFile.expandTemplate { + src = if !Options.lexCompat + then lexTemplate else ulexTemplate, + dst = fname ^ ".sml", + hooks = [("lexer", lexerHook spec), + ("startstates", startStatesHook spec), + ("userdecls", userDeclsHook spec), + ("header", headerHook spec), + ("args", argsHook spec), + ("pargs", pargsHook spec), + ("table", tableHook spec)] + } + + end diff --git a/ml-lpt/ml-ulex/BackEnds/SML/sml-output-support.sml b/ml-lpt/ml-ulex/BackEnds/SML/sml-output-support.sml new file mode 100644 index 0000000..33073cb --- /dev/null +++ b/ml-lpt/ml-ulex/BackEnds/SML/sml-output-support.sml @@ -0,0 +1,166 @@ +(* sml-output-support.sml + * + * COPYRIGHT (c) 2005 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (adrassi@gmail.com) + * All rights reserved. + * + * Some supporting code shared between different methods of + * SML output + *) + +structure SMLOutputSupport = + struct + + structure RE = RegExp + structure Sym = RE.Sym + structure SIS = RegExp.SymSet + structure LO = LexOutputSpec + + datatype ml_exp = datatype ML.ml_exp + datatype ml_pat = datatype ML.ml_pat + + fun ML_Sym s = ML_Raw [ML.Tok (RE.symToString s)] + + val inp = "inp" + val inpVar = ML_Var inp + + fun idOf (LO.State {id, ...}) = id + fun nameOf' i = "yyQ" ^ (Int.toString i) + fun nameOf s = nameOf' (idOf s) + fun actName i = "yyAction" ^ (Int.toString i) + + (* simple heuristic to avoid computing unused values *) + local + val has = String.isSubstring + in + val hasyytext = has "yytext" + val hasyysubstr = has "yysubstr" + val hasyyunicode= has "yyunicode" + val hasREJECT = has "REJECT" + val hasyylineno = has "yylineno" + val hasyycolno = has "yycolno" + end + + (* map over the intervals of a symbol set *) + fun mapInt f syms = + SIS.foldlInt (fn (i, ls) => (f i)::ls) [] syms + + (* generate code for an action *) + fun mkAction (i, action, k) = let + val updStrm = ML_RefPut (ML_Var "yystrm", ML_Var "strm") + val act = ML_Raw [ML.Tok action] + val seq = ML_Seq [updStrm, act] + val lets = if hasyysubstr action andalso not (!Options.lexCompat) + then ML_Let + ("yysubstr", + ML_App("yymksubstr", [ML_Var "strm"]), + seq) + else seq + val lett = if hasyytext action + then ML_Let + ("yytext", + ML_App("yymktext", [ML_Var "strm"]), + lets) + else lets + val letu = if hasyyunicode action + then ML_Let + ("yyunicode", + ML_App("yymkunicode", [ML_Var "strm"]), + lett) + else lett + val letl = if hasyylineno action + then ML_Let + ("yylineno", + ML_App("ref", + [ML_App ("yygetlineNo", + [ML_RefGet (ML_Var "yystrm")])]), + letu) + else letu + val letc = if hasyycolno action + then ML_Let + ("yycolno", + ML_App("ref", + [ML_App ("yygetcolNo", + [ML_RefGet (ML_Var "yystrm")])]), + letl) + else letl + val letr = if hasREJECT action + then ML_Let + ("oldStrm", ML_RefGet (ML_Var "yystrm"), + ML_Fun + ("REJECT", [], + ML_Seq + [ML_RefPut (ML_Var "yystrm", + ML_Var "oldStrm"), + ML_App("yystuck", [ML_Var "lastMatch"])], + letc)) + else letc + in + ML_NewGroup (ML_Fun (actName i, ["strm", "lastMatch : yymatch"], letr, k)) + end + + (* output start state datatype *) + fun startStatesHook spec strm = let + val LO.Spec {startStates, ...} = spec + val machNames = #1 (ListPair.unzip startStates) + in + TextIO.output (strm, String.concatWith " | " machNames) + end + + (* output user declarations *) + fun userDeclsHook spec strm = let + val LO.Spec {decls, ...} = spec + in + TextIO.output (strm, decls) + end + + (* output "header" -- the structure/functor definition of the lexer *) + fun headerHook spec strm = let + val LO.Spec {header, ...} = spec + in + TextIO.output (strm, header) + end + + (* generate args parameter for the lexer *) + fun argsHook spec strm = let + val LO.Spec {arg, ...} = spec + val arg' = case (String.size arg, !Options.lexCompat) + of (0, true) => "(yyarg as ())" + | (_, true) => "(yyarg as " ^ arg ^ ") ()" + | (0, false) => "" + | (_, false) => "(yyarg as " ^ arg ^ ")" + in + TextIO.output (strm, arg') + end + + (* ml-ulex mode only *) + fun pargsHook spec strm = let + val LO.Spec {arg, ...} = spec + in + TextIO.output (strm, + if String.size arg > 0 then "yyarg" else "") + end + + fun mkEOF (eofRules, innerExp) = + if !Options.lexCompat + then + ML_If (ML_App("yyInput.eof", [ML_RefGet (ML_Var "yystrm")]), + ML_App("UserDeclarations.eof", [ML_Var "yyarg"]), + innerExp) + else + ML_If ( + ML_App("ULexBuffer.eof", [ML_RefGet (ML_Var "yystrm")]), + ML_Let("yycolno", ML_App("ref", [ML_App ("yygetcolNo", [ML_RefGet (ML_Var "yystrm")])]), + ML_Let("yylineno", ML_App("ref", [ML_App ("yygetlineNo", [ML_RefGet (ML_Var "yystrm")])]), + ML_Case(ML_RefGet (ML_Var "yyss"), + map (fn ("_", act) => (ML_Wild, ML_Raw [ML.Tok ("(" ^ act ^ ")")]) + | (ss, act) => (ML_ConPat (ss, []), ML_Raw [ML.Tok ("(" ^ act ^ ")")])) + eofRules + ))), + innerExp) + + (* include the templates, which are defined in a compiler-specific way *) + open Templates + + end \ No newline at end of file diff --git a/ml-lpt/ml-ulex/BackEnds/SML/sml-tbl-output.sml b/ml-lpt/ml-ulex/BackEnds/SML/sml-tbl-output.sml new file mode 100644 index 0000000..48c3fe6 --- /dev/null +++ b/ml-lpt/ml-ulex/BackEnds/SML/sml-tbl-output.sml @@ -0,0 +1,90 @@ +(* sml-tbl-output.sml + * + * COPYRIGHT (c) 2005 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (adrassi@gmail.com) + * All rights reserved. + * + * Code generation for SML, using a transition table + *) + +structure SMLTblOutput : OUTPUT = + struct + + open SMLOutputSupport + + (* generate code for an individual state in the table *) + fun mkState actionVec s = let + val LO.State {id, startState, label, final, next} = s + fun w2s w = + if !Options.lexCompat + then "#\"" ^ (Char.toString o Char.chr o Word.toInt) w ^ "\"" + else "0w" ^ Word.fmt StringCvt.DEC w + val ASCII = SIS.interval (0w0, 0w255) + fun mkTrans (set, state) = + map (fn (c1, c2) => String.concat [ + "(", w2s c1, ",", + w2s c2, ",", + Int.toString (idOf state), ")"]) + (if !Options.lexCompat then + SIS.intervals (SIS.intersect (set, ASCII)) + else SIS.intervals set) + val allTransitions = List.concat (map mkTrans (!next)) + in + String.concat [ + "([", + String.concatWith ",\n" allTransitions, + "], [", + String.concatWith ", " (map Int.toString final), + "])"] + end + + fun tableHook spec strm = let + val LO.Spec {actions, dfa, startStates, ...} = spec + in + if !Options.strictSML + then TextIO.output (strm, "Vector.fromList [") + else TextIO.output (strm, "#["); + TextIO.output (strm, + String.concatWith ", " (map (mkState actions) dfa)); + TextIO.output (strm, "]") + end + + fun lexerHook spec strm = let + val LO.Spec {actions, dfa, startStates, arg, eofRules, ...} = spec + fun matchSS (label, state) = + (ML_ConPat (label, []), + ML_App ("yygo yyactTable ", + [ML_Var (Int.toString (idOf state)), + ML_RefGet (ML_Var "yystrm"), + ML_Var "yyNO_MATCH"])) + val innerExp = ML_Case (ML_RefGet (ML_Var "yyss"), + List.map matchSS startStates) + val eofCheckExp = mkEOF (eofRules, innerExp) + val actList = Vector.foldri + (fn (i, _, ls) => (ML_Var o actName) i :: ls) + [] actions + val actTableExp = ML_Let ("yyactTable", + ML_App ("Vector.fromList", [ML_List actList]), + eofCheckExp) + val lexerExp = Vector.foldri mkAction actTableExp actions + val ppStrm = TextIOPP.openOut {dst = strm, wid = 80} + in + ML.ppML (ppStrm, lexerExp) + end + + fun output (spec, fname) = + ExpandFile.expandTemplate { + src = if !Options.lexCompat + then lexTemplate else ulexTemplate, + dst = fname ^ ".sml", + hooks = [("lexer", lexerHook spec), + ("startstates", startStatesHook spec), + ("userdecls", userDeclsHook spec), + ("header", headerHook spec), + ("args", argsHook spec), + ("pargs", pargsHook spec), + ("table", tableHook spec)] + } + + end diff --git a/ml-lpt/ml-ulex/BackEnds/SML/smlnj-templates.sml b/ml-lpt/ml-ulex/BackEnds/SML/smlnj-templates.sml new file mode 100644 index 0000000..caf3485 --- /dev/null +++ b/ml-lpt/ml-ulex/BackEnds/SML/smlnj-templates.sml @@ -0,0 +1,15 @@ +(* smlnj-templates.sml + * + * COPYRIGHT (c) 2009 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * In SML/NJ, we load the template files when we elaborate this module. + *) + +structure Templates = + struct + + val lexTemplate = ExpandFile.mkTemplateFromFile "BackEnds/SML/template-ml-lex.sml" + val ulexTemplate = ExpandFile.mkTemplateFromFile "BackEnds/SML/template-ml-ulex.sml" + + end diff --git a/ml-lpt/ml-ulex/BackEnds/SML/template-ml-lex.sml b/ml-lpt/ml-ulex/BackEnds/SML/template-ml-lex.sml new file mode 100644 index 0000000..790e540 --- /dev/null +++ b/ml-lpt/ml-ulex/BackEnds/SML/template-ml-lex.sml @@ -0,0 +1,182 @@ +@header@ + = struct + + structure yyInput : sig + + type stream + val mkStream : (int -> string) -> stream + val fromStream : TextIO.StreamIO.instream -> stream + val getc : stream -> (Char.char * stream) option + val getpos : stream -> int + val getlineNo : stream -> int + val subtract : stream * stream -> string + val eof : stream -> bool + val lastWasNL : stream -> bool + + end = struct + + structure TIO = TextIO + structure TSIO = TIO.StreamIO + structure TPIO = TextPrimIO + + datatype stream = Stream of { + strm : TSIO.instream, + id : int, (* track which streams originated + * from the same stream *) + pos : int, + lineNo : int, + lastWasNL : bool + } + + local + val next = ref 0 + in + fun nextId() = !next before (next := !next + 1) + end + + val initPos = 2 (* ml-lex bug compatibility *) + + fun mkStream inputN = let + val strm = TSIO.mkInstream + (TPIO.RD { + name = "lexgen", + chunkSize = 4096, + readVec = SOME inputN, + readArr = NONE, + readVecNB = NONE, + readArrNB = NONE, + block = NONE, + canInput = NONE, + avail = (fn () => NONE), + getPos = NONE, + setPos = NONE, + endPos = NONE, + verifyPos = NONE, + close = (fn () => ()), + ioDesc = NONE + }, "") + in + Stream {strm = strm, id = nextId(), pos = initPos, lineNo = 1, + lastWasNL = true} + end + + fun fromStream strm = Stream { + strm = strm, id = nextId(), pos = initPos, lineNo = 1, lastWasNL = true + } + + fun getc (Stream {strm, pos, id, lineNo, ...}) = (case TSIO.input1 strm + of NONE => NONE + | SOME (c, strm') => + SOME (c, Stream { + strm = strm', + pos = pos+1, + id = id, + lineNo = lineNo + + (if c = #"\n" then 1 else 0), + lastWasNL = (c = #"\n") + }) + (* end case*)) + + fun getpos (Stream {pos, ...}) = pos + + fun getlineNo (Stream {lineNo, ...}) = lineNo + + fun subtract (new, old) = let + val Stream {strm = strm, pos = oldPos, id = oldId, ...} = old + val Stream {pos = newPos, id = newId, ...} = new + val (diff, _) = if newId = oldId andalso newPos >= oldPos + then TSIO.inputN (strm, newPos - oldPos) + else raise Fail + "BUG: yyInput: attempted to subtract incompatible streams" + in + diff + end + + fun eof s = not (isSome (getc s)) + + fun lastWasNL (Stream {lastWasNL, ...}) = lastWasNL + + end + + datatype yystart_state = +@startstates@ + + structure UserDeclarations = + struct + +@userdecls@ + + + end + + datatype yymatch + = yyNO_MATCH + | yyMATCH of yyInput.stream * action * yymatch + withtype action = yyInput.stream * yymatch -> UserDeclarations.lexresult + + local + + val yytable = +@table@ + + fun mk yyins = let + (* current start state *) + val yyss = ref INITIAL + fun YYBEGIN ss = (yyss := ss) + (* current input stream *) + val yystrm = ref yyins + (* get one char of input *) + val yygetc = yyInput.getc + (* create yytext *) + fun yymktext(strm) = yyInput.subtract (strm, !yystrm) + open UserDeclarations + fun lex +@args@ + = let + fun continue() = let + val yylastwasn = yyInput.lastWasNL (!yystrm) + fun yystuck (yyNO_MATCH) = raise Fail "stuck state" + | yystuck (yyMATCH (strm, action, old)) = + action (strm, old) + val yypos = yyInput.getpos (!yystrm) + val yygetlineNo = yyInput.getlineNo + fun yyactsToMatches (strm, [], oldMatches) = oldMatches + | yyactsToMatches (strm, act::acts, oldMatches) = + yyMATCH (strm, act, yyactsToMatches (strm, acts, oldMatches)) + fun yygo actTable = + (fn (~1, _, oldMatches) => yystuck oldMatches + | (curState, strm, oldMatches) => let + val (transitions, finals') = Vector.sub (yytable, curState) + val finals = List.map (fn i => Vector.sub (actTable, i)) finals' + fun tryfinal() = + yystuck (yyactsToMatches (strm, finals, oldMatches)) + fun find (c, []) = NONE + | find (c, (c1, c2, s)::ts) = + if c1 <= c andalso c <= c2 then SOME s + else find (c, ts) + in case yygetc strm + of SOME(c, strm') => + (case find (c, transitions) + of NONE => tryfinal() + | SOME n => + yygo actTable + (n, strm', + yyactsToMatches (strm, finals, oldMatches))) + | NONE => tryfinal() + end) + in +@lexer@ + + end + in + continue() + handle IO.Io{cause, ...} => raise cause + end + in + lex + end + in + fun makeLexer yyinputN = mk (yyInput.mkStream yyinputN) + end + + end diff --git a/ml-lpt/ml-ulex/BackEnds/SML/template-ml-ulex.sml b/ml-lpt/ml-ulex/BackEnds/SML/template-ml-ulex.sml new file mode 100644 index 0000000..c880c8e --- /dev/null +++ b/ml-lpt/ml-ulex/BackEnds/SML/template-ml-ulex.sml @@ -0,0 +1,165 @@ +@header@ + = struct + + datatype yystart_state = +@startstates@ + + local + + structure UserDeclarations = + struct + +@userdecls@ + + end + + datatype yymatch + = yyNO_MATCH + | yyMATCH of ULexBuffer.stream * action * yymatch + withtype action = ULexBuffer.stream * yymatch -> UserDeclarations.lex_result + + val yytable : ((UTF8.wchar * UTF8.wchar * int) list * int list) Vector.vector = +@table@ + + fun yystreamify' p input = ULexBuffer.mkStream (p, input) + + fun yystreamifyReader' p readFn strm = let + val s = ref strm + fun iter(strm, n, accum) = + if n > 1024 then (String.implode (rev accum), strm) + else (case readFn strm + of NONE => (String.implode (rev accum), strm) + | SOME(c, strm') => iter (strm', n+1, c::accum)) + fun input() = let + val (data, strm) = iter(!s, 0, []) + in + s := strm; + data + end + in + yystreamify' p input + end + + fun yystreamifyInstream' p strm = yystreamify' p (fn ()=>TextIO.input strm) + + fun innerLex +@args@ +(yystrm_, yyss_, yysm) = let + (* current start state *) + val yyss = ref yyss_ + fun YYBEGIN ss = (yyss := ss) + (* current input stream *) + val yystrm = ref yystrm_ + fun yysetStrm strm = yystrm := strm + fun yygetPos() = ULexBuffer.getpos (!yystrm) + fun yystreamify input = yystreamify' (yygetPos()) input + fun yystreamifyReader readFn strm = yystreamifyReader' (yygetPos()) readFn strm + fun yystreamifyInstream strm = yystreamifyInstream' (yygetPos()) strm + (* start position of token -- can be updated via skip() *) + val yystartPos = ref (yygetPos()) + (* get one char of input *) + fun yygetc strm = (case ULexBuffer.getu strm + of (SOME (0w10, s')) => + (AntlrStreamPos.markNewLine yysm (ULexBuffer.getpos strm); + SOME (0w10, s')) + | x => x) + fun yygetList getc strm = let + val get1 = UTF8.getu getc + fun iter (strm, accum) = + (case get1 strm + of NONE => rev accum + | SOME (w, strm') => iter (strm', w::accum) + (* end case *)) + in + iter (strm, []) + end + (* create yytext *) + fun yymksubstr(strm) = ULexBuffer.subtract (strm, !yystrm) + fun yymktext(strm) = Substring.string (yymksubstr strm) + fun yymkunicode(strm) = yygetList Substring.getc (yymksubstr strm) + open UserDeclarations + fun lex () = let + fun yystuck (yyNO_MATCH) = raise Fail "lexer reached a stuck state" + | yystuck (yyMATCH (strm, action, old)) = + action (strm, old) + val yypos = yygetPos() + fun yygetlineNo strm = AntlrStreamPos.lineNo yysm (ULexBuffer.getpos strm) + fun yygetcolNo strm = AntlrStreamPos.colNo yysm (ULexBuffer.getpos strm) + fun yyactsToMatches (strm, [], oldMatches) = oldMatches + | yyactsToMatches (strm, act::acts, oldMatches) = + yyMATCH (strm, act, yyactsToMatches (strm, acts, oldMatches)) + fun yygo actTable = + (fn (~1, _, oldMatches) => yystuck oldMatches + | (curState, strm, oldMatches) => let + val (transitions, finals') = Vector.sub (yytable, curState) + val finals = List.map (fn i => Vector.sub (actTable, i)) finals' + fun tryfinal() = + yystuck (yyactsToMatches (strm, finals, oldMatches)) + fun find (c, []) = NONE + | find (c, (c1, c2, s)::ts) = + if c1 <= c andalso c <= c2 then SOME s + else find (c, ts) + in case yygetc strm + of SOME(c, strm') => + (case find (c, transitions) + of NONE => tryfinal() + | SOME n => + yygo actTable + (n, strm', + yyactsToMatches (strm, finals, oldMatches))) + | NONE => tryfinal() + end) + val yylastwasnref = ref (ULexBuffer.lastWasNL (!yystrm)) + fun continue() = let val yylastwasn = !yylastwasnref in +@lexer@ + +end + and skip() = (yystartPos := yygetPos(); + yylastwasnref := ULexBuffer.lastWasNL (!yystrm); + continue()) + in (continue(), (!yystartPos, yygetPos()-1), !yystrm, !yyss) end + in + lex() + end + in + type pos = AntlrStreamPos.pos + type span = AntlrStreamPos.span + type tok = UserDeclarations.lex_result + + datatype prestrm = STRM of ULexBuffer.stream * + (yystart_state * tok * span * prestrm * yystart_state) option ref + type strm = (prestrm * yystart_state) + + fun lex sm +@args@ +(STRM (yystrm, memo), ss) = (case !memo + of NONE => let + val (tok, span, yystrm', ss') = innerLex +@pargs@ +(yystrm, ss, sm) + val strm' = STRM (yystrm', ref NONE); + in + memo := SOME (ss, tok, span, strm', ss'); + (tok, span, (strm', ss')) + end + | SOME (ss', tok, span, strm', ss'') => + if ss = ss' then + (tok, span, (strm', ss'')) + else ( + memo := NONE; + lex sm +@pargs@ +(STRM (yystrm, memo), ss)) + (* end case *)) + + fun streamify input = (STRM (yystreamify' 0 input, ref NONE), INITIAL) + fun streamifyReader readFn strm = (STRM (yystreamifyReader' 0 readFn strm, ref NONE), + INITIAL) + fun streamifyInstream strm = (STRM (yystreamifyInstream' 0 strm, ref NONE), + INITIAL) + + fun getPos (STRM (strm, _), _) = ULexBuffer.getpos strm + + end +end + diff --git a/ml-lpt/ml-ulex/BackEnds/lex-output-spec.sml b/ml-lpt/ml-ulex/BackEnds/lex-output-spec.sml new file mode 100644 index 0000000..9ed3009 --- /dev/null +++ b/ml-lpt/ml-ulex/BackEnds/lex-output-spec.sml @@ -0,0 +1,38 @@ +(* lex-output-spec.sml + * + * COPYRIGHT (c) 2005 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (adrassi@gmail.com) + * All rights reserved. + * + * Specification produced by LexGen + *) + +structure LexOutputSpec = + struct + + datatype dfa_state + = State of { + id : int, + startState : bool, + label : RegExp.re Vector.vector, + final : int list, (* action vector indices *) + next : (RegExp.sym_set * dfa_state) list ref + } + + fun sameState (State {id = id1, ...}, State {id = id2, ...}) = + id1 = id2 + + type action = string + + datatype spec = Spec of { + decls : string, + header : string, + arg : string, + actions : action vector, + dfa : dfa_state list, + startStates : (string * dfa_state) list, + eofRules : (string * action) list + } + + end diff --git a/ml-lpt/ml-ulex/BackEnds/output-sig.sml b/ml-lpt/ml-ulex/BackEnds/output-sig.sml new file mode 100644 index 0000000..13f7b8d --- /dev/null +++ b/ml-lpt/ml-ulex/BackEnds/output-sig.sml @@ -0,0 +1,16 @@ +(* output-sig.sml + * + * COPYRIGHT (c) 2005 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (adrassi@gmail.com) + * All rights reserved. + * + * The expected signature for any "output" (backend) module. + *) + +signature OUTPUT = + sig + + val output : LexOutputSpec.spec * string -> unit + + end \ No newline at end of file diff --git a/ml-lpt/ml-ulex/FrontEnds/common/.cm/GUID/input-sig.sml b/ml-lpt/ml-ulex/FrontEnds/common/.cm/GUID/input-sig.sml new file mode 100644 index 0000000..60d0f59 --- /dev/null +++ b/ml-lpt/ml-ulex/FrontEnds/common/.cm/GUID/input-sig.sml @@ -0,0 +1 @@ +guid-(sources.cm):FrontEnds/common/input-sig.sml-1714016111.284 diff --git a/ml-lpt/ml-ulex/FrontEnds/common/.cm/GUID/lex-spec.sml b/ml-lpt/ml-ulex/FrontEnds/common/.cm/GUID/lex-spec.sml new file mode 100644 index 0000000..b3cae8d --- /dev/null +++ b/ml-lpt/ml-ulex/FrontEnds/common/.cm/GUID/lex-spec.sml @@ -0,0 +1 @@ +guid-(sources.cm):FrontEnds/common/lex-spec.sml-1714016110.625 diff --git a/ml-lpt/ml-ulex/FrontEnds/common/.cm/SKEL/input-sig.sml b/ml-lpt/ml-ulex/FrontEnds/common/.cm/SKEL/input-sig.sml new file mode 100644 index 0000000..11dfa92 --- /dev/null +++ b/ml-lpt/ml-ulex/FrontEnds/common/.cm/SKEL/input-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"LexSpec"ac"INPUT"h0 \ No newline at end of file diff --git a/ml-lpt/ml-ulex/FrontEnds/common/.cm/SKEL/lex-spec.sml b/ml-lpt/ml-ulex/FrontEnds/common/.cm/SKEL/lex-spec.sml new file mode 100644 index 0000000..761da17 --- /dev/null +++ b/ml-lpt/ml-ulex/FrontEnds/common/.cm/SKEL/lex-spec.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f4d"RegExp"d"List"d"AtomSet"d"String"ad"LexSpec"h0 \ No newline at end of file diff --git a/ml-lpt/ml-ulex/FrontEnds/common/.cm/amd64-unix/input-sig.sml b/ml-lpt/ml-ulex/FrontEnds/common/.cm/amd64-unix/input-sig.sml new file mode 100644 index 0000000..f17278b Binary files /dev/null and b/ml-lpt/ml-ulex/FrontEnds/common/.cm/amd64-unix/input-sig.sml differ diff --git a/ml-lpt/ml-ulex/FrontEnds/common/.cm/amd64-unix/lex-spec.sml b/ml-lpt/ml-ulex/FrontEnds/common/.cm/amd64-unix/lex-spec.sml new file mode 100644 index 0000000..35f775d Binary files /dev/null and b/ml-lpt/ml-ulex/FrontEnds/common/.cm/amd64-unix/lex-spec.sml differ diff --git a/ml-lpt/ml-ulex/FrontEnds/common/input-sig.sml b/ml-lpt/ml-ulex/FrontEnds/common/input-sig.sml new file mode 100644 index 0000000..2a9be8e --- /dev/null +++ b/ml-lpt/ml-ulex/FrontEnds/common/input-sig.sml @@ -0,0 +1,13 @@ +(* input-sig.sml + * + * COPYRIGHT (c) 2016 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +signature INPUT = + sig + + (* parse a file; return NONE on error *) + val parseFile : string -> LexSpec.spec option + + end diff --git a/ml-lpt/ml-ulex/FrontEnds/common/lex-spec.sml b/ml-lpt/ml-ulex/FrontEnds/common/lex-spec.sml new file mode 100644 index 0000000..b9b4e99 --- /dev/null +++ b/ml-lpt/ml-ulex/FrontEnds/common/lex-spec.sml @@ -0,0 +1,157 @@ +(* lex-spec.sml + * + * COPYRIGHT (c) 2005 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (adrassi@gmail.com) + * All rights reserved. + * + * Input specification to ml-ulex + *) + +structure LexSpec = + struct + + datatype clamp = CLAMP127 | CLAMP255 | NO_CLAMP + + type action = string + type rule_spec = AtomSet.set option * RegExp.re + type rule = rule_spec * action + type eof_rule = string * action + + datatype config = Conf of { + structName : string, + header : string, + arg : string, + startStates : AtomSet.set, + clamp : clamp (* not used yet *) + } + + datatype spec = Spec of { + decls : string, + conf : config, + rules : rule list, + eofRules : eof_rule list + } + + fun mkConfig () = Conf { + structName = "", header = "", arg = "", + startStates = AtomSet.empty, + clamp = CLAMP127 + } + + fun updStartStates (conf, new) = let + val Conf {structName, header, arg, startStates, clamp} = conf + in Conf { + structName = structName, + header = header, + arg = arg, + startStates = new, + clamp = clamp + } + end + + fun updHeader (conf, new) = let + val Conf {structName, header, startStates, arg, clamp} = conf +(* FIXME: we should be reporting an error instead of raising an exception here! *) + val _ = if String.size structName > 0 + then raise Fail "Cannot have both %structure and %header" + else () + in Conf { + structName = structName, + header = new, + arg = arg, + startStates = startStates, + clamp = clamp + } + end + + fun updStructName (conf, new) = let + val Conf {structName, header, startStates, arg, clamp} = conf +(* FIXME: we should be reporting an error instead of raising an exception here! *) + val _ = if String.size header > 0 + then raise Fail "Cannot have both %structure and %header" + else () + in Conf { + structName = new, + header = header, + arg = arg, + startStates = startStates, + clamp = clamp + } + end + + fun updArg (conf, new) = let + val Conf {structName, header, startStates, arg, clamp} = conf + in Conf { + structName = structName, + header = header, + arg = new, + startStates = startStates, + clamp = clamp + } + end + + fun updClamp (conf, new) = let + val Conf {structName, header, arg, startStates, clamp} = conf + in Conf { + structName = structName, + header = header, + arg = arg, + startStates = startStates, + clamp = new + } + end + + fun mkSpec() = Spec {decls = "", conf = mkConfig(), rules = [], eofRules = []} + + fun addRule (spec, new) = let + val Spec {decls, conf, rules, eofRules} = spec + in + Spec {decls = decls, conf = conf, + rules = rules @ [new], eofRules = eofRules} + end + + fun addEOFRuleFront (spec, new) = let + val Spec {decls, conf, rules, eofRules} = spec + in + Spec {decls = decls, conf = conf, + rules = rules, eofRules = new::eofRules} + end + + fun addEOFRule (spec, new) = let + val Spec {decls, conf, rules, eofRules} = spec + in + Spec {decls = decls, conf = conf, + rules = rules, eofRules = eofRules @ [new]} + end + + fun getConf (Spec {conf, ...}) = conf + fun updConf (spec, new) = let + val Spec {decls, conf, rules, eofRules} = spec + in + Spec {decls = decls, conf = new, rules = rules, eofRules = eofRules} + end + + fun updDecls (spec, new) = let + val Spec {decls, conf, rules, eofRules} = spec + in + Spec {decls = new, conf = conf, rules = rules, eofRules = eofRules} + end + + fun emptyActions (spec) = let + val Spec {decls, conf, rules, eofRules} = spec + val Conf {structName, header, arg, startStates, clamp} = conf + val conf' = Conf { + structName = "", header = "", arg = "", clamp = clamp, + startStates = startStates + } + fun clearRule (rspec, action) = (rspec, "()") + in Spec { + decls = "fun eof() = ()\ntype lex_result = unit", + conf = conf', + rules = List.map clearRule rules, + eofRules = List.map clearRule eofRules + } + end + + end diff --git a/ml-lpt/ml-ulex/FrontEnds/ml-lex/.cm/GUID/ml-lex-input.sml b/ml-lpt/ml-ulex/FrontEnds/ml-lex/.cm/GUID/ml-lex-input.sml new file mode 100644 index 0000000..3c0bc07 --- /dev/null +++ b/ml-lpt/ml-ulex/FrontEnds/ml-lex/.cm/GUID/ml-lex-input.sml @@ -0,0 +1 @@ +guid-(sources.cm):FrontEnds/ml-lex/ml-lex-input.sml-1714016112.667 diff --git a/ml-lpt/ml-ulex/FrontEnds/ml-lex/.cm/GUID/ml-lex.lex.sml b/ml-lpt/ml-ulex/FrontEnds/ml-lex/.cm/GUID/ml-lex.lex.sml new file mode 100644 index 0000000..ad253a0 --- /dev/null +++ b/ml-lpt/ml-ulex/FrontEnds/ml-lex/.cm/GUID/ml-lex.lex.sml @@ -0,0 +1 @@ +guid-(sources.cm):FrontEnds/ml-lex/ml-lex.lex.sml-1714016111.723 diff --git a/ml-lpt/ml-ulex/FrontEnds/ml-lex/.cm/GUID/ml-lex.yacc.sig b/ml-lpt/ml-ulex/FrontEnds/ml-lex/.cm/GUID/ml-lex.yacc.sig new file mode 100644 index 0000000..2107453 --- /dev/null +++ b/ml-lpt/ml-ulex/FrontEnds/ml-lex/.cm/GUID/ml-lex.yacc.sig @@ -0,0 +1 @@ +guid-(sources.cm):FrontEnds/ml-lex/ml-lex.yacc.sig-1714016111.438 diff --git a/ml-lpt/ml-ulex/FrontEnds/ml-lex/.cm/GUID/ml-lex.yacc.sml b/ml-lpt/ml-ulex/FrontEnds/ml-lex/.cm/GUID/ml-lex.yacc.sml new file mode 100644 index 0000000..52a97f4 --- /dev/null +++ b/ml-lpt/ml-ulex/FrontEnds/ml-lex/.cm/GUID/ml-lex.yacc.sml @@ -0,0 +1 @@ +guid-(sources.cm):FrontEnds/ml-lex/ml-lex.yacc.sml-1714016111.443 diff --git a/ml-lpt/ml-ulex/FrontEnds/ml-lex/.cm/SKEL/ml-lex-input.sml b/ml-lpt/ml-ulex/FrontEnds/ml-lex/.cm/SKEL/ml-lex-input.sml new file mode 100644 index 0000000..dc35fd1 --- /dev/null +++ b/ml-lpt/ml-ulex/FrontEnds/ml-lex/.cm/SKEL/ml-lex-input.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f2d"Int"d"TextIO"ad"MLLexInput"jh3aMLLexLrVals"jh1aToken"gp2LrParser"7gp1e"MLLexLrValsFun"aMLLexLex"jh1ad"Tok"gp2%d"Tokens"gp1e"MLLexLexFun"ad"MLLexParser"jh3aParserData"gp2%+ad"Lex"gp1$agp1gp1e"Join"gp1c"INPUT" \ No newline at end of file diff --git a/ml-lpt/ml-ulex/FrontEnds/ml-lex/.cm/SKEL/ml-lex.lex.sml b/ml-lpt/ml-ulex/FrontEnds/ml-lex/.cm/SKEL/ml-lex.lex.sml new file mode 100644 index 0000000..65e8787 --- /dev/null +++ b/ml-lpt/ml-ulex/FrontEnds/ml-lex/.cm/SKEL/ml-lex.lex.sml @@ -0,0 +1,3 @@ +Skeleton 5 +ae"MLLexLexFun"i2aTok"gp1c"MLLex_TOKENS"f4Char"TextIO"Vector"h3ayyInput"jh3aTIO"gp15ad"TSIO"gp2d"StreamIO"ad"TPIO"gp1d"TextPrimIO"h0aUserDeclarations"h3egp1f4d"UTF8",List"String"ad"SIS"gp2d"RegExp"d"SymSet"bbd2egp1f7d"IO" +C=d"Int">Nf0f1 diff --git a/ml-lpt/ml-ulex/FrontEnds/ml-lex/.cm/SKEL/ml-lex.yacc.sig b/ml-lpt/ml-ulex/FrontEnds/ml-lex/.cm/SKEL/ml-lex.yacc.sig new file mode 100644 index 0000000..53c4628 --- /dev/null +++ b/ml-lpt/ml-ulex/FrontEnds/ml-lex/.cm/SKEL/ml-lex.yacc.sig @@ -0,0 +1,2 @@ +Skeleton 5 +d3f1d"UTF8"aMLLex_TOKENS"h0ac"MLLex_LRVALS"h2ad"Tokens"gp1 ad"ParserData"gp1c"PARSER_DATA" \ No newline at end of file diff --git a/ml-lpt/ml-ulex/FrontEnds/ml-lex/.cm/SKEL/ml-lex.yacc.sml b/ml-lpt/ml-ulex/FrontEnds/ml-lex/.cm/SKEL/ml-lex.yacc.sml new file mode 100644 index 0000000..0cb7a8b Binary files /dev/null and b/ml-lpt/ml-ulex/FrontEnds/ml-lex/.cm/SKEL/ml-lex.yacc.sml differ diff --git a/ml-lpt/ml-ulex/FrontEnds/ml-lex/.cm/amd64-unix/ml-lex-input.sml b/ml-lpt/ml-ulex/FrontEnds/ml-lex/.cm/amd64-unix/ml-lex-input.sml new file mode 100644 index 0000000..8855fe0 Binary files /dev/null and b/ml-lpt/ml-ulex/FrontEnds/ml-lex/.cm/amd64-unix/ml-lex-input.sml differ diff --git a/ml-lpt/ml-ulex/FrontEnds/ml-lex/.cm/amd64-unix/ml-lex.lex.sml b/ml-lpt/ml-ulex/FrontEnds/ml-lex/.cm/amd64-unix/ml-lex.lex.sml new file mode 100644 index 0000000..1a100ca Binary files /dev/null and b/ml-lpt/ml-ulex/FrontEnds/ml-lex/.cm/amd64-unix/ml-lex.lex.sml differ diff --git a/ml-lpt/ml-ulex/FrontEnds/ml-lex/.cm/amd64-unix/ml-lex.yacc.sig b/ml-lpt/ml-ulex/FrontEnds/ml-lex/.cm/amd64-unix/ml-lex.yacc.sig new file mode 100644 index 0000000..3c8377f Binary files /dev/null and b/ml-lpt/ml-ulex/FrontEnds/ml-lex/.cm/amd64-unix/ml-lex.yacc.sig differ diff --git a/ml-lpt/ml-ulex/FrontEnds/ml-lex/.cm/amd64-unix/ml-lex.yacc.sml b/ml-lpt/ml-ulex/FrontEnds/ml-lex/.cm/amd64-unix/ml-lex.yacc.sml new file mode 100644 index 0000000..8d54a45 Binary files /dev/null and b/ml-lpt/ml-ulex/FrontEnds/ml-lex/.cm/amd64-unix/ml-lex.yacc.sml differ diff --git a/ml-lpt/ml-ulex/FrontEnds/ml-lex/ml-lex-input.sml b/ml-lpt/ml-ulex/FrontEnds/ml-lex/ml-lex-input.sml new file mode 100644 index 0000000..f37f966 --- /dev/null +++ b/ml-lpt/ml-ulex/FrontEnds/ml-lex/ml-lex-input.sml @@ -0,0 +1,36 @@ +(* ml-lex-input.sml + * + * COPYRIGHT (c) 2005 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (adrassi@gmail.com) + * All rights reserved. + * + * Driver for ml-lex input format. + *) + +structure MLLexInput : INPUT = + struct + + structure MLLexLrVals = + MLLexLrValsFun(structure Token = LrParser.Token) + structure MLLexLex = + MLLexLexFun(structure Tok = MLLexLrVals.Tokens) + structure MLLexParser = + Join(structure ParserData = MLLexLrVals.ParserData + structure Lex = MLLexLex + structure LrParser = LrParser) + + fun parseFile fname = let + val anyErrors = ref false + fun parseErr (msg, line, _) = ( + print(concat["** Syntax error [", fname, ":", Int.toString line, "]: ", msg, "\n"]); + anyErrors := true) + val strm = TextIO.openIn fname + val lexer = MLLexParser.makeLexer (fn n => TextIO.inputN (strm, n)) + val (spec, _) = MLLexParser.parse(15, lexer, parseErr, ()) + in + TextIO.closeIn strm; + if !anyErrors then NONE else SOME spec + end + + end diff --git a/ml-lpt/ml-ulex/FrontEnds/ml-lex/ml-lex.lex b/ml-lpt/ml-ulex/FrontEnds/ml-lex/ml-lex.lex new file mode 100644 index 0000000..3272427 --- /dev/null +++ b/ml-lpt/ml-ulex/FrontEnds/ml-lex/ml-lex.lex @@ -0,0 +1,164 @@ +type pos = int +type svalue = Tok.svalue +type ('a,'b) token = ('a,'b) Tok.token +type lexresult= (svalue,pos) token + +open Tok + +val eof = fn () => EOF(~1,~1) +val error = (* fn (e,l : int,_) => + output(std_out,"line " ^ (makestring l) ^ + ": " ^ e ^ "\n") *) + fn _ => () + +(* what to do (i.e. switch start states) after recognizing an action *) +val afterAction = ref (fn () => ()) + +(* paren counting for actions *) +val pcount = ref 0 +val inquote = ref false +fun inc r = if !inquote then () else r := !r + 1 +fun dec r = if !inquote then () else r := !r - 1 + +(* buffer for accumulating test across the rules for actions *) +local +val text = ref ([] : string list) +in +fun clrAction () = (text := ["("]) +fun updAction str = if !pcount > 0 + then (text := str :: !text) + else () +fun getAction () = String.concat (rev (!text)) +end + +structure SIS = RegExp.SymSet +fun uniChar s = let + fun toW32 (c : Char.char) : UTF8.wchar = + (case c of #"0" => 0w0 | #"1" => 0w1 | #"2" => 0w2 | #"3" => 0w3 + | #"4" => 0w4 | #"5" => 0w5 | #"6" => 0w6 | #"7" => 0w7 + | #"8" => 0w8 | #"9" => 0w9 | #"a" => 0w10 | #"A" => 0w10 + | #"b" => 0w11 | #"B" => 0w11 | #"c" => 0w12 | #"C" => 0w12 + | #"d" => 0w13 | #"D" => 0w13 | #"e" => 0w14 | #"E" => 0w14 + | #"f" => 0w15 | #"F" => 0w15 + | _ => raise Fail "invalid unicode escape sequence") + fun iter (#"u"::_, v) = v + | iter (c::cs, v) = iter (cs, 0w16*v + (toW32 c)) + | iter _ = raise Fail "invalid unicode escape sequence" + in iter (List.rev (String.explode s), 0w0) + end + +val highAscii = SIS.interval(0w128, 0w255) + +%% + +%header (functor MLLexLexFun(structure Tok: MLLex_TOKENS)); +%s DEFS RE RECB CHARCLASS LEXSTATES ACTION STRING; +%count + +ws = [\ \n\t\013]; +alpha = [a-zA-Z]; +num = [0-9]; +hex = {num} | [a-fA-F]; +id = {alpha}({alpha} | {num} | "_" | "'")*; + +%% + + "%%" => (YYBEGIN DEFS; LEXMARK(!yylineno, !yylineno)); + ([^%] | [^%]* % [^%])* + => (DECLS(yytext, !yylineno, !yylineno)); + + {ws} => (lex()); + "%%" => (YYBEGIN RE; LEXMARK(!yylineno, !yylineno)); + "%s" => (YYBEGIN LEXSTATES; STATES(!yylineno, !yylineno)); + "%header" {ws}* "(" + => (clrAction(); pcount := 1; inquote := false; + YYBEGIN ACTION; + afterAction := (fn () => YYBEGIN DEFS); + HEADER(!yylineno, !yylineno)); + "%structure" + => (STRUCT(!yylineno, !yylineno)); + "%arg" {ws}* "(" + => (clrAction(); pcount := 1; inquote := false; + YYBEGIN ACTION; + afterAction := (fn () => YYBEGIN DEFS); + ARG(!yylineno, !yylineno)); + "%count" => (COUNT(!yylineno, !yylineno)); + "%reject"=> (REJECTTOK(!yylineno, !yylineno)); + "%unicode" + => (UNICODE(!yylineno, !yylineno)); + "%full" => (FULL(!yylineno, !yylineno)); + {id} => (ID(yytext, !yylineno, !yylineno)); + "=" => (YYBEGIN RE; EQ(!yylineno, !yylineno)); + + {ws} => (lex()); + "?" => (QMARK(!yylineno, !yylineno)); + "*" => (STAR(!yylineno, !yylineno)); + "+" => (PLUS(!yylineno, !yylineno)); + "|" => (BAR(!yylineno, !yylineno)); + "(" => (LP(!yylineno, !yylineno)); + ")" => (RP(!yylineno, !yylineno)); + "$" => (DOLLAR(!yylineno, !yylineno)); + "/" => (SLASH(!yylineno, !yylineno)); + "." => (DOT(!yylineno, !yylineno)); +"^" => (CARAT(!yylineno, !yylineno)); + + "{" => (YYBEGIN RECB; lex()); + "\"" => (YYBEGIN STRING; lex()); + "[" => (YYBEGIN CHARCLASS; LB(!yylineno, !yylineno)); + "<" => (YYBEGIN LEXSTATES; LT(!yylineno, !yylineno)); + ">" => (GT(!yylineno, !yylineno)); + "=>" {ws}* "(" + => (clrAction(); pcount := 1; inquote := false; + YYBEGIN ACTION; + afterAction := (fn () => YYBEGIN RE); + ARROW(!yylineno, !yylineno)); + ";" => (YYBEGIN DEFS; SEMI(!yylineno, !yylineno)); + +{ws} => (lex()); +{id} => (ID(yytext, !yylineno, !yylineno)); +{num}+ => (REPS(valOf (Int.fromString yytext), !yylineno, !yylineno)); +"," => (COMMA(!yylineno, !yylineno)); +"}" => (YYBEGIN RE; RCB(!yylineno, !yylineno)); + +"-]" => (YYBEGIN RE; RBD(!yylineno, !yylineno)); +"]" => (YYBEGIN RE; RB(!yylineno, !yylineno)); +"-" => (DASH(!yylineno, !yylineno)); +"^" => (CARAT(!yylineno, !yylineno)); + + "\"" => (YYBEGIN RE; lex()); + +"\\" ({num}{3} | [btnr] | "\\" | "\"") + => (CHAR(valOf (String.fromString yytext), !yylineno, !yylineno)); +"\\u"{hex}{4} + => (UNICHAR(uniChar yytext, !yylineno, !yylineno)); +"\\h" + => (HIGH_CHAR(!yylineno, !yylineno)); +"\\". + => (CHAR(String.substring (yytext, 1, 1), !yylineno, !yylineno)); +. + => (CHAR(yytext, !yylineno, !yylineno)); + +{id} => (LEXSTATE(yytext, !yylineno, !yylineno)); +{ws} => (lex()); + "," => (COMMA(!yylineno, !yylineno)); + ">" => (YYBEGIN RE; GT(!yylineno, !yylineno)); + ";" => (YYBEGIN DEFS; SEMI(!yylineno, !yylineno)); + + ";" => (if !pcount = 0 + then ((!afterAction)(); + ACT(getAction(), !yylineno, !yylineno)) + else (updAction ";"; lex())); + "(" => (updAction "("; inc pcount; lex()); + ")" => (updAction ")"; dec pcount; lex()); + "\\\"" => (updAction "\\\""; lex()); + "\\\\" => (updAction "\\\\"; lex()); + "\\" => (updAction "\\"; lex()); + "\"" => (updAction "\""; inquote := not (!inquote); lex()); + [^;()\"\\]* + => (updAction yytext; lex()); + +. => (print (concat[ + "[", Int.toString (!yylineno), "] Illegal character '", + String.toCString yytext, "'\n" + ]); + continue()); diff --git a/ml-lpt/ml-ulex/FrontEnds/ml-lex/ml-lex.lex.sml b/ml-lpt/ml-ulex/FrontEnds/ml-lex/ml-lex.lex.sml new file mode 100644 index 0000000..6da4c1f --- /dev/null +++ b/ml-lpt/ml-ulex/FrontEnds/ml-lex/ml-lex.lex.sml @@ -0,0 +1,1921 @@ +functor MLLexLexFun(structure Tok: MLLex_TOKENS) = struct + + structure yyInput : sig + + type stream + val mkStream : (int -> string) -> stream + val fromStream : TextIO.StreamIO.instream -> stream + val getc : stream -> (Char.char * stream) option + val getpos : stream -> int + val getlineNo : stream -> int + val subtract : stream * stream -> string + val eof : stream -> bool + val lastWasNL : stream -> bool + + end = struct + + structure TIO = TextIO + structure TSIO = TIO.StreamIO + structure TPIO = TextPrimIO + + datatype stream = Stream of { + strm : TSIO.instream, + id : int, (* track which streams originated + * from the same stream *) + pos : int, + lineNo : int, + lastWasNL : bool + } + + local + val next = ref 0 + in + fun nextId() = !next before (next := !next + 1) + end + + val initPos = 2 (* ml-lex bug compatibility *) + + fun mkStream inputN = let + val strm = TSIO.mkInstream + (TPIO.RD { + name = "lexgen", + chunkSize = 4096, + readVec = SOME inputN, + readArr = NONE, + readVecNB = NONE, + readArrNB = NONE, + block = NONE, + canInput = NONE, + avail = (fn () => NONE), + getPos = NONE, + setPos = NONE, + endPos = NONE, + verifyPos = NONE, + close = (fn () => ()), + ioDesc = NONE + }, "") + in + Stream {strm = strm, id = nextId(), pos = initPos, lineNo = 1, + lastWasNL = true} + end + + fun fromStream strm = Stream { + strm = strm, id = nextId(), pos = initPos, lineNo = 1, lastWasNL = true + } + + fun getc (Stream {strm, pos, id, lineNo, ...}) = (case TSIO.input1 strm + of NONE => NONE + | SOME (c, strm') => + SOME (c, Stream { + strm = strm', + pos = pos+1, + id = id, + lineNo = lineNo + + (if c = #"\n" then 1 else 0), + lastWasNL = (c = #"\n") + }) + (* end case*)) + + fun getpos (Stream {pos, ...}) = pos + + fun getlineNo (Stream {lineNo, ...}) = lineNo + + fun subtract (new, old) = let + val Stream {strm = strm, pos = oldPos, id = oldId, ...} = old + val Stream {pos = newPos, id = newId, ...} = new + val (diff, _) = if newId = oldId andalso newPos >= oldPos + then TSIO.inputN (strm, newPos - oldPos) + else raise Fail + "BUG: yyInput: attempted to subtract incompatible streams" + in + diff + end + + fun eof s = not (isSome (getc s)) + + fun lastWasNL (Stream {lastWasNL, ...}) = lastWasNL + + end + + datatype yystart_state = +RE | DEFS | RECB | STRING | CHARCLASS | LEXSTATES | ACTION | INITIAL + structure UserDeclarations = + struct + +type pos = int +type svalue = Tok.svalue +type ('a,'b) token = ('a,'b) Tok.token +type lexresult= (svalue,pos) token + +open Tok + +val eof = fn () => EOF(~1,~1) +val error = (* fn (e,l : int,_) => + output(std_out,"line " ^ (makestring l) ^ + ": " ^ e ^ "\n") *) + fn _ => () + +(* what to do (i.e. switch start states) after recognizing an action *) +val afterAction = ref (fn () => ()) + +(* paren counting for actions *) +val pcount = ref 0 +val inquote = ref false +fun inc r = if !inquote then () else r := !r + 1 +fun dec r = if !inquote then () else r := !r - 1 + +(* buffer for accumulating test across the rules for actions *) +local +val text = ref ([] : string list) +in +fun clrAction () = (text := ["("]) +fun updAction str = if !pcount > 0 + then (text := str :: !text) + else () +fun getAction () = String.concat (rev (!text)) +end + +structure SIS = RegExp.SymSet +fun uniChar s = let + fun toW32 (c : Char.char) : UTF8.wchar = + (case c of #"0" => 0w0 | #"1" => 0w1 | #"2" => 0w2 | #"3" => 0w3 + | #"4" => 0w4 | #"5" => 0w5 | #"6" => 0w6 | #"7" => 0w7 + | #"8" => 0w8 | #"9" => 0w9 | #"a" => 0w10 | #"A" => 0w10 + | #"b" => 0w11 | #"B" => 0w11 | #"c" => 0w12 | #"C" => 0w12 + | #"d" => 0w13 | #"D" => 0w13 | #"e" => 0w14 | #"E" => 0w14 + | #"f" => 0w15 | #"F" => 0w15 + | _ => raise Fail "invalid unicode escape sequence") + fun iter (#"u"::_, v) = v + | iter (c::cs, v) = iter (cs, 0w16*v + (toW32 c)) + | iter _ = raise Fail "invalid unicode escape sequence" + in iter (List.rev (String.explode s), 0w0) + end + +val highAscii = SIS.interval(0w128, 0w255) + + + + end + + datatype yymatch + = yyNO_MATCH + | yyMATCH of yyInput.stream * action * yymatch + withtype action = yyInput.stream * yymatch -> UserDeclarations.lexresult + + local + + val yytable = +Vector.fromList [] + fun mk yyins = let + (* current start state *) + val yyss = ref INITIAL + fun YYBEGIN ss = (yyss := ss) + (* current input stream *) + val yystrm = ref yyins + (* get one char of input *) + val yygetc = yyInput.getc + (* create yytext *) + fun yymktext(strm) = yyInput.subtract (strm, !yystrm) + open UserDeclarations + fun lex +(yyarg as ()) = let + fun continue() = let + val yylastwasn = yyInput.lastWasNL (!yystrm) + fun yystuck (yyNO_MATCH) = raise Fail "stuck state" + | yystuck (yyMATCH (strm, action, old)) = + action (strm, old) + val yypos = yyInput.getpos (!yystrm) + val yygetlineNo = yyInput.getlineNo + fun yyactsToMatches (strm, [], oldMatches) = oldMatches + | yyactsToMatches (strm, act::acts, oldMatches) = + yyMATCH (strm, act, yyactsToMatches (strm, acts, oldMatches)) + fun yygo actTable = + (fn (~1, _, oldMatches) => yystuck oldMatches + | (curState, strm, oldMatches) => let + val (transitions, finals') = Vector.sub (yytable, curState) + val finals = List.map (fn i => Vector.sub (actTable, i)) finals' + fun tryfinal() = + yystuck (yyactsToMatches (strm, finals, oldMatches)) + fun find (c, []) = NONE + | find (c, (c1, c2, s)::ts) = + if c1 <= c andalso c <= c2 then SOME s + else find (c, ts) + in case yygetc strm + of SOME(c, strm') => + (case find (c, transitions) + of NONE => tryfinal() + | SOME n => + yygo actTable + (n, strm', + yyactsToMatches (strm, finals, oldMatches))) + | NONE => tryfinal() + end) + in +let +fun yyAction0 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; (YYBEGIN DEFS; LEXMARK(!yylineno, !yylineno)) + end +fun yyAction1 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + val yytext = yymktext(strm) + in + yystrm := strm; (DECLS(yytext, !yylineno, !yylineno)) + end +fun yyAction2 (strm, lastMatch : yymatch) = (yystrm := strm; (lex())) +fun yyAction3 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; (YYBEGIN RE; LEXMARK(!yylineno, !yylineno)) + end +fun yyAction4 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; (YYBEGIN LEXSTATES; STATES(!yylineno, !yylineno)) + end +fun yyAction5 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; + (clrAction(); pcount := 1; inquote := false; + YYBEGIN ACTION; + afterAction := (fn () => YYBEGIN DEFS); + HEADER(!yylineno, !yylineno)) + end +fun yyAction6 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; (STRUCT(!yylineno, !yylineno)) + end +fun yyAction7 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; + (clrAction(); pcount := 1; inquote := false; + YYBEGIN ACTION; + afterAction := (fn () => YYBEGIN DEFS); + ARG(!yylineno, !yylineno)) + end +fun yyAction8 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; (COUNT(!yylineno, !yylineno)) + end +fun yyAction9 (strm, lastMatch : yymatch) = let + val oldStrm = !(yystrm) + fun REJECT () = (yystrm := oldStrm; yystuck(lastMatch)) + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; (REJECTTOK(!yylineno, !yylineno)) + end +fun yyAction10 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; (UNICODE(!yylineno, !yylineno)) + end +fun yyAction11 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; (FULL(!yylineno, !yylineno)) + end +fun yyAction12 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + val yytext = yymktext(strm) + in + yystrm := strm; (ID(yytext, !yylineno, !yylineno)) + end +fun yyAction13 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; (YYBEGIN RE; EQ(!yylineno, !yylineno)) + end +fun yyAction14 (strm, lastMatch : yymatch) = (yystrm := strm; (lex())) +fun yyAction15 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; (QMARK(!yylineno, !yylineno)) + end +fun yyAction16 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; (STAR(!yylineno, !yylineno)) + end +fun yyAction17 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; (PLUS(!yylineno, !yylineno)) + end +fun yyAction18 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; (BAR(!yylineno, !yylineno)) + end +fun yyAction19 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; (LP(!yylineno, !yylineno)) + end +fun yyAction20 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; (RP(!yylineno, !yylineno)) + end +fun yyAction21 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; (DOLLAR(!yylineno, !yylineno)) + end +fun yyAction22 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; (SLASH(!yylineno, !yylineno)) + end +fun yyAction23 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; (DOT(!yylineno, !yylineno)) + end +fun yyAction24 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; (CARAT(!yylineno, !yylineno)) + end +fun yyAction25 (strm, lastMatch : yymatch) = (yystrm := strm; + (YYBEGIN RECB; lex())) +fun yyAction26 (strm, lastMatch : yymatch) = (yystrm := strm; + (YYBEGIN STRING; lex())) +fun yyAction27 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; (YYBEGIN CHARCLASS; LB(!yylineno, !yylineno)) + end +fun yyAction28 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; (YYBEGIN LEXSTATES; LT(!yylineno, !yylineno)) + end +fun yyAction29 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; (GT(!yylineno, !yylineno)) + end +fun yyAction30 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; + (clrAction(); pcount := 1; inquote := false; + YYBEGIN ACTION; + afterAction := (fn () => YYBEGIN RE); + ARROW(!yylineno, !yylineno)) + end +fun yyAction31 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; (YYBEGIN DEFS; SEMI(!yylineno, !yylineno)) + end +fun yyAction32 (strm, lastMatch : yymatch) = (yystrm := strm; (lex())) +fun yyAction33 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + val yytext = yymktext(strm) + in + yystrm := strm; (ID(yytext, !yylineno, !yylineno)) + end +fun yyAction34 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + val yytext = yymktext(strm) + in + yystrm := strm; + (REPS(valOf (Int.fromString yytext), !yylineno, !yylineno)) + end +fun yyAction35 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; (COMMA(!yylineno, !yylineno)) + end +fun yyAction36 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; (YYBEGIN RE; RCB(!yylineno, !yylineno)) + end +fun yyAction37 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; (YYBEGIN RE; RBD(!yylineno, !yylineno)) + end +fun yyAction38 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; (YYBEGIN RE; RB(!yylineno, !yylineno)) + end +fun yyAction39 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; (DASH(!yylineno, !yylineno)) + end +fun yyAction40 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; (CARAT(!yylineno, !yylineno)) + end +fun yyAction41 (strm, lastMatch : yymatch) = (yystrm := strm; + (YYBEGIN RE; lex())) +fun yyAction42 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + val yytext = yymktext(strm) + in + yystrm := strm; + (CHAR(valOf (String.fromString yytext), !yylineno, !yylineno)) + end +fun yyAction43 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + val yytext = yymktext(strm) + in + yystrm := strm; (UNICHAR(uniChar yytext, !yylineno, !yylineno)) + end +fun yyAction44 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; (HIGH_CHAR(!yylineno, !yylineno)) + end +fun yyAction45 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + val yytext = yymktext(strm) + in + yystrm := strm; + (CHAR(String.substring (yytext, 1, 1), !yylineno, !yylineno)) + end +fun yyAction46 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + val yytext = yymktext(strm) + in + yystrm := strm; (CHAR(yytext, !yylineno, !yylineno)) + end +fun yyAction47 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + val yytext = yymktext(strm) + in + yystrm := strm; (LEXSTATE(yytext, !yylineno, !yylineno)) + end +fun yyAction48 (strm, lastMatch : yymatch) = (yystrm := strm; (lex())) +fun yyAction49 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; (COMMA(!yylineno, !yylineno)) + end +fun yyAction50 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; (YYBEGIN RE; GT(!yylineno, !yylineno)) + end +fun yyAction51 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; (YYBEGIN DEFS; SEMI(!yylineno, !yylineno)) + end +fun yyAction52 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; + (if !pcount = 0 + then ((!afterAction)(); + ACT(getAction(), !yylineno, !yylineno)) + else (updAction ";"; lex())) + end +fun yyAction53 (strm, lastMatch : yymatch) = (yystrm := strm; + (updAction "("; inc pcount; lex())) +fun yyAction54 (strm, lastMatch : yymatch) = (yystrm := strm; + (updAction ")"; dec pcount; lex())) +fun yyAction55 (strm, lastMatch : yymatch) = (yystrm := strm; + (updAction "\\\""; lex())) +fun yyAction56 (strm, lastMatch : yymatch) = (yystrm := strm; + (updAction "\\\\"; lex())) +fun yyAction57 (strm, lastMatch : yymatch) = (yystrm := strm; + (updAction "\\"; lex())) +fun yyAction58 (strm, lastMatch : yymatch) = (yystrm := strm; + (updAction "\""; inquote := not (!inquote); lex())) +fun yyAction59 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (updAction yytext; lex()) + end +fun yyAction60 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + val yytext = yymktext(strm) + in + yystrm := strm; + (print (concat[ + "[", Int.toString (!yylineno), "] Illegal character '", + String.toCString yytext, "'\n" + ]); + continue()) + end +fun yyQ125 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction0(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction0(strm, yyNO_MATCH) + (* end case *)) +fun yyQ124 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction1(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"%" + then yyQ126(strm', yyMATCH(strm, yyAction1, yyNO_MATCH)) + else yyQ122(strm', yyMATCH(strm, yyAction1, yyNO_MATCH)) + (* end case *)) +and yyQ126 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"%" + then yystuck(lastMatch) + else yyQ124(strm', lastMatch) + (* end case *)) +and yyQ122 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction1(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"%" + then yyQ126(strm', yyMATCH(strm, yyAction1, yyNO_MATCH)) + else yyQ127(strm', yyMATCH(strm, yyAction1, yyNO_MATCH)) + (* end case *)) +and yyQ127 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction1(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"%" + then yyQ126(strm', yyMATCH(strm, yyAction1, yyNO_MATCH)) + else yyQ127(strm', yyMATCH(strm, yyAction1, yyNO_MATCH)) + (* end case *)) +fun yyQ123 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction60(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"%" + then yyQ125(strm', yyMATCH(strm, yyAction60, yyNO_MATCH)) + else yyQ124(strm', yyMATCH(strm, yyAction60, yyNO_MATCH)) + (* end case *)) +fun yyQ121 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction1(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"%" + then yyQ126(strm', yyMATCH(strm, yyAction1, yyNO_MATCH)) + else yyQ127(strm', yyMATCH(strm, yyAction1, yyNO_MATCH)) + (* end case *)) +fun yyQ7 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yyAction1(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\v" + then yyQ121(strm', yyMATCH(strm, yyAction1, yyNO_MATCH)) + else if inp < #"\v" + then if inp = #"\n" + then yyQ122(strm', yyMATCH(strm, yyAction1, yyNO_MATCH)) + else yyQ121(strm', yyMATCH(strm, yyAction1, yyNO_MATCH)) + else if inp = #"%" + then yyQ123(strm', yyMATCH(strm, yyAction1, yyNO_MATCH)) + else yyQ121(strm', yyMATCH(strm, yyAction1, yyNO_MATCH)) + (* end case *)) +fun yyQ120 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction56(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction56(strm, yyNO_MATCH) + (* end case *)) +fun yyQ119 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction55(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction55(strm, yyNO_MATCH) + (* end case *)) +fun yyQ118 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction57(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"#" + then yyAction57(strm, yyNO_MATCH) + else if inp < #"#" + then if inp = #"\"" + then yyQ119(strm', yyMATCH(strm, yyAction57, yyNO_MATCH)) + else yyAction57(strm, yyNO_MATCH) + else if inp = #"\\" + then yyQ120(strm', yyMATCH(strm, yyAction57, yyNO_MATCH)) + else yyAction57(strm, yyNO_MATCH) + (* end case *)) +fun yyQ117 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction52(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction52(strm, yyNO_MATCH) + (* end case *)) +fun yyQ116 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction54(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction54(strm, yyNO_MATCH) + (* end case *)) +fun yyQ115 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction53(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction53(strm, yyNO_MATCH) + (* end case *)) +fun yyQ114 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction58(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction58(strm, yyNO_MATCH) + (* end case *)) +fun yyQ113 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction59(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"*" + then yyQ113(strm', yyMATCH(strm, yyAction59, yyNO_MATCH)) + else if inp < #"*" + then if inp = #"#" + then yyQ113(strm', yyMATCH(strm, yyAction59, yyNO_MATCH)) + else if inp < #"#" + then if inp = #"\"" + then yyAction59(strm, yyNO_MATCH) + else yyQ113(strm', yyMATCH(strm, yyAction59, yyNO_MATCH)) + else if inp <= #"'" + then yyQ113(strm', yyMATCH(strm, yyAction59, yyNO_MATCH)) + else yyAction59(strm, yyNO_MATCH) + else if inp = #"<" + then yyQ113(strm', yyMATCH(strm, yyAction59, yyNO_MATCH)) + else if inp < #"<" + then if inp = #";" + then yyAction59(strm, yyNO_MATCH) + else yyQ113(strm', yyMATCH(strm, yyAction59, yyNO_MATCH)) + else if inp = #"\\" + then yyAction59(strm, yyNO_MATCH) + else yyQ113(strm', yyMATCH(strm, yyAction59, yyNO_MATCH)) + (* end case *)) +fun yyQ112 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction59(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"*" + then yyQ113(strm', yyMATCH(strm, yyAction59, yyNO_MATCH)) + else if inp < #"*" + then if inp = #"#" + then yyQ113(strm', yyMATCH(strm, yyAction59, yyNO_MATCH)) + else if inp < #"#" + then if inp = #"\"" + then yyAction59(strm, yyNO_MATCH) + else yyQ113(strm', yyMATCH(strm, yyAction59, yyNO_MATCH)) + else if inp <= #"'" + then yyQ113(strm', yyMATCH(strm, yyAction59, yyNO_MATCH)) + else yyAction59(strm, yyNO_MATCH) + else if inp = #"<" + then yyQ113(strm', yyMATCH(strm, yyAction59, yyNO_MATCH)) + else if inp < #"<" + then if inp = #";" + then yyAction59(strm, yyNO_MATCH) + else yyQ113(strm', yyMATCH(strm, yyAction59, yyNO_MATCH)) + else if inp = #"\\" + then yyAction59(strm, yyNO_MATCH) + else yyQ113(strm', yyMATCH(strm, yyAction59, yyNO_MATCH)) + (* end case *)) +fun yyQ6 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yyAction59(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #")" + then yyQ116(strm', yyMATCH(strm, yyAction59, yyNO_MATCH)) + else if inp < #")" + then if inp = #"\"" + then yyQ114(strm', yyMATCH(strm, yyAction59, yyNO_MATCH)) + else if inp < #"\"" + then if inp = #"\n" + then yyQ113(strm', yyMATCH(strm, yyAction59, yyNO_MATCH)) + else yyQ112(strm', yyMATCH(strm, yyAction59, yyNO_MATCH)) + else if inp = #"(" + then yyQ115(strm', yyMATCH(strm, yyAction59, yyNO_MATCH)) + else yyQ112(strm', yyMATCH(strm, yyAction59, yyNO_MATCH)) + else if inp = #"<" + then yyQ112(strm', yyMATCH(strm, yyAction59, yyNO_MATCH)) + else if inp < #"<" + then if inp = #";" + then yyQ117(strm', yyMATCH(strm, yyAction59, yyNO_MATCH)) + else yyQ112(strm', yyMATCH(strm, yyAction59, yyNO_MATCH)) + else if inp = #"\\" + then yyQ118(strm', yyMATCH(strm, yyAction59, yyNO_MATCH)) + else yyQ112(strm', yyMATCH(strm, yyAction59, yyNO_MATCH)) + (* end case *)) +fun yyQ111 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction47(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"A" + then yyQ111(strm', yyMATCH(strm, yyAction47, yyNO_MATCH)) + else if inp < #"A" + then if inp = #"(" + then yyAction47(strm, yyNO_MATCH) + else if inp < #"(" + then if inp = #"'" + then yyQ111(strm', yyMATCH(strm, yyAction47, yyNO_MATCH)) + else yyAction47(strm, yyNO_MATCH) + else if inp = #"0" + then yyQ111(strm', yyMATCH(strm, yyAction47, yyNO_MATCH)) + else if inp < #"0" + then yyAction47(strm, yyNO_MATCH) + else if inp <= #"9" + then yyQ111(strm', yyMATCH(strm, yyAction47, yyNO_MATCH)) + else yyAction47(strm, yyNO_MATCH) + else if inp = #"`" + then yyAction47(strm, yyNO_MATCH) + else if inp < #"`" + then if inp = #"[" + then yyAction47(strm, yyNO_MATCH) + else if inp < #"[" + then yyQ111(strm', yyMATCH(strm, yyAction47, yyNO_MATCH)) + else if inp = #"_" + then yyQ111(strm', yyMATCH(strm, yyAction47, yyNO_MATCH)) + else yyAction47(strm, yyNO_MATCH) + else if inp <= #"z" + then yyQ111(strm', yyMATCH(strm, yyAction47, yyNO_MATCH)) + else yyAction47(strm, yyNO_MATCH) + (* end case *)) +fun yyQ110 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction47(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"A" + then yyQ111(strm', yyMATCH(strm, yyAction47, yyNO_MATCH)) + else if inp < #"A" + then if inp = #"(" + then yyAction47(strm, yyNO_MATCH) + else if inp < #"(" + then if inp = #"'" + then yyQ111(strm', yyMATCH(strm, yyAction47, yyNO_MATCH)) + else yyAction47(strm, yyNO_MATCH) + else if inp = #"0" + then yyQ111(strm', yyMATCH(strm, yyAction47, yyNO_MATCH)) + else if inp < #"0" + then yyAction47(strm, yyNO_MATCH) + else if inp <= #"9" + then yyQ111(strm', yyMATCH(strm, yyAction47, yyNO_MATCH)) + else yyAction47(strm, yyNO_MATCH) + else if inp = #"`" + then yyAction47(strm, yyNO_MATCH) + else if inp < #"`" + then if inp = #"[" + then yyAction47(strm, yyNO_MATCH) + else if inp < #"[" + then yyQ111(strm', yyMATCH(strm, yyAction47, yyNO_MATCH)) + else if inp = #"_" + then yyQ111(strm', yyMATCH(strm, yyAction47, yyNO_MATCH)) + else yyAction47(strm, yyNO_MATCH) + else if inp <= #"z" + then yyQ111(strm', yyMATCH(strm, yyAction47, yyNO_MATCH)) + else yyAction47(strm, yyNO_MATCH) + (* end case *)) +fun yyQ109 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction50(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction50(strm, yyNO_MATCH) + (* end case *)) +fun yyQ108 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction51(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction51(strm, yyNO_MATCH) + (* end case *)) +fun yyQ107 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction49(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction49(strm, yyNO_MATCH) + (* end case *)) +fun yyQ106 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction48(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction48(strm, yyNO_MATCH) + (* end case *)) +fun yyQ105 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction48(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction48(strm, yyNO_MATCH) + (* end case *)) +fun yyQ42 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction60(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction60(strm, yyNO_MATCH) + (* end case *)) +fun yyQ5 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"-" + then yyQ42(strm', lastMatch) + else if inp < #"-" + then if inp = #"\r" + then yyQ105(strm', lastMatch) + else if inp < #"\r" + then if inp = #"\n" + then yyQ106(strm', lastMatch) + else if inp < #"\n" + then if inp = #"\t" + then yyQ105(strm', lastMatch) + else yyQ42(strm', lastMatch) + else yyQ42(strm', lastMatch) + else if inp = #"!" + then yyQ42(strm', lastMatch) + else if inp < #"!" + then if inp = #" " + then yyQ105(strm', lastMatch) + else yyQ42(strm', lastMatch) + else if inp = #"," + then yyQ107(strm', lastMatch) + else yyQ42(strm', lastMatch) + else if inp = #"?" + then yyQ42(strm', lastMatch) + else if inp < #"?" + then if inp = #"<" + then yyQ42(strm', lastMatch) + else if inp < #"<" + then if inp = #";" + then yyQ108(strm', lastMatch) + else yyQ42(strm', lastMatch) + else if inp = #">" + then yyQ109(strm', lastMatch) + else yyQ42(strm', lastMatch) + else if inp = #"[" + then yyQ42(strm', lastMatch) + else if inp < #"[" + then if inp <= #"@" + then yyQ42(strm', lastMatch) + else yyQ110(strm', lastMatch) + else if inp = #"a" + then yyQ110(strm', lastMatch) + else if inp < #"a" + then yyQ42(strm', lastMatch) + else if inp <= #"z" + then yyQ110(strm', lastMatch) + else yyQ42(strm', lastMatch) + (* end case *)) +fun yyQ103 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction40(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction40(strm, yyNO_MATCH) + (* end case *)) +fun yyQ102 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction38(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction38(strm, yyNO_MATCH) + (* end case *)) +fun yyQ37 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction43(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction43(strm, yyNO_MATCH) + (* end case *)) +fun yyQ36 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"A" + then yyQ37(strm', lastMatch) + else if inp < #"A" + then if inp = #"0" + then yyQ37(strm', lastMatch) + else if inp < #"0" + then yystuck(lastMatch) + else if inp <= #"9" + then yyQ37(strm', lastMatch) + else yystuck(lastMatch) + else if inp = #"a" + then yyQ37(strm', lastMatch) + else if inp < #"a" + then if inp <= #"F" + then yyQ37(strm', lastMatch) + else yystuck(lastMatch) + else if inp <= #"f" + then yyQ37(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ35 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"A" + then yyQ36(strm', lastMatch) + else if inp < #"A" + then if inp = #"0" + then yyQ36(strm', lastMatch) + else if inp < #"0" + then yystuck(lastMatch) + else if inp <= #"9" + then yyQ36(strm', lastMatch) + else yystuck(lastMatch) + else if inp = #"a" + then yyQ36(strm', lastMatch) + else if inp < #"a" + then if inp <= #"F" + then yyQ36(strm', lastMatch) + else yystuck(lastMatch) + else if inp <= #"f" + then yyQ36(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ34 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"A" + then yyQ35(strm', lastMatch) + else if inp < #"A" + then if inp = #"0" + then yyQ35(strm', lastMatch) + else if inp < #"0" + then yystuck(lastMatch) + else if inp <= #"9" + then yyQ35(strm', lastMatch) + else yystuck(lastMatch) + else if inp = #"a" + then yyQ35(strm', lastMatch) + else if inp < #"a" + then if inp <= #"F" + then yyQ35(strm', lastMatch) + else yystuck(lastMatch) + else if inp <= #"f" + then yyQ35(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ33 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction45(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"A" + then yyQ34(strm', yyMATCH(strm, yyAction45, yyNO_MATCH)) + else if inp < #"A" + then if inp = #"0" + then yyQ34(strm', yyMATCH(strm, yyAction45, yyNO_MATCH)) + else if inp < #"0" + then yyAction45(strm, yyNO_MATCH) + else if inp <= #"9" + then yyQ34(strm', yyMATCH(strm, yyAction45, yyNO_MATCH)) + else yyAction45(strm, yyNO_MATCH) + else if inp = #"a" + then yyQ34(strm', yyMATCH(strm, yyAction45, yyNO_MATCH)) + else if inp < #"a" + then if inp <= #"F" + then yyQ34(strm', yyMATCH(strm, yyAction45, yyNO_MATCH)) + else yyAction45(strm, yyNO_MATCH) + else if inp <= #"f" + then yyQ34(strm', yyMATCH(strm, yyAction45, yyNO_MATCH)) + else yyAction45(strm, yyNO_MATCH) + (* end case *)) +fun yyQ32 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction44(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction44(strm, yyNO_MATCH) + (* end case *)) +fun yyQ39 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction42(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction42(strm, yyNO_MATCH) + (* end case *)) +fun yyQ38 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"0" + then yyQ39(strm', lastMatch) + else if inp < #"0" + then yystuck(lastMatch) + else if inp <= #"9" + then yyQ39(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ31 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction45(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"0" + then yyQ38(strm', yyMATCH(strm, yyAction45, yyNO_MATCH)) + else if inp < #"0" + then yyAction45(strm, yyNO_MATCH) + else if inp <= #"9" + then yyQ38(strm', yyMATCH(strm, yyAction45, yyNO_MATCH)) + else yyAction45(strm, yyNO_MATCH) + (* end case *)) +fun yyQ30 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction42(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction42(strm, yyNO_MATCH) + (* end case *)) +fun yyQ29 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction45(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction45(strm, yyNO_MATCH) + (* end case *)) +fun yyQ25 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction46(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"c" + then yyQ29(strm', yyMATCH(strm, yyAction46, yyNO_MATCH)) + else if inp < #"c" + then if inp = #"0" + then yyQ31(strm', yyMATCH(strm, yyAction46, yyNO_MATCH)) + else if inp < #"0" + then if inp = #"\v" + then yyQ29(strm', yyMATCH(strm, yyAction46, yyNO_MATCH)) + else if inp < #"\v" + then if inp = #"\n" + then yyAction46(strm, yyNO_MATCH) + else yyQ29(strm', yyMATCH(strm, yyAction46, yyNO_MATCH)) + else if inp = #"\"" + then yyQ30(strm', yyMATCH(strm, yyAction46, yyNO_MATCH)) + else yyQ29(strm', yyMATCH(strm, yyAction46, yyNO_MATCH)) + else if inp = #"\\" + then yyQ30(strm', yyMATCH(strm, yyAction46, yyNO_MATCH)) + else if inp < #"\\" + then if inp <= #"9" + then yyQ31(strm', yyMATCH(strm, yyAction46, yyNO_MATCH)) + else yyQ29(strm', yyMATCH(strm, yyAction46, yyNO_MATCH)) + else if inp = #"b" + then yyQ30(strm', yyMATCH(strm, yyAction46, yyNO_MATCH)) + else yyQ29(strm', yyMATCH(strm, yyAction46, yyNO_MATCH)) + else if inp = #"r" + then yyQ30(strm', yyMATCH(strm, yyAction46, yyNO_MATCH)) + else if inp < #"r" + then if inp = #"i" + then yyQ29(strm', yyMATCH(strm, yyAction46, yyNO_MATCH)) + else if inp < #"i" + then if inp = #"h" + then yyQ32(strm', yyMATCH(strm, yyAction46, yyNO_MATCH)) + else yyQ29(strm', yyMATCH(strm, yyAction46, yyNO_MATCH)) + else if inp = #"n" + then yyQ30(strm', yyMATCH(strm, yyAction46, yyNO_MATCH)) + else yyQ29(strm', yyMATCH(strm, yyAction46, yyNO_MATCH)) + else if inp = #"u" + then yyQ33(strm', yyMATCH(strm, yyAction46, yyNO_MATCH)) + else if inp < #"u" + then if inp = #"s" + then yyQ29(strm', yyMATCH(strm, yyAction46, yyNO_MATCH)) + else yyQ30(strm', yyMATCH(strm, yyAction46, yyNO_MATCH)) + else yyQ29(strm', yyMATCH(strm, yyAction46, yyNO_MATCH)) + (* end case *)) +fun yyQ104 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction37(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction37(strm, yyNO_MATCH) + (* end case *)) +fun yyQ101 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction39(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"]" + then yyQ104(strm', yyMATCH(strm, yyAction39, yyNO_MATCH)) + else yyAction39(strm, yyNO_MATCH) + (* end case *)) +fun yyQ8 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction46(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction46(strm, yyNO_MATCH) + (* end case *)) +fun yyQ4 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"." + then yyQ8(strm', lastMatch) + else if inp < #"." + then if inp = #"\v" + then yyQ8(strm', lastMatch) + else if inp < #"\v" + then if inp = #"\n" + then if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yystuck(lastMatch) + else yyQ8(strm', lastMatch) + else if inp = #"-" + then yyQ101(strm', lastMatch) + else yyQ8(strm', lastMatch) + else if inp = #"]" + then yyQ102(strm', lastMatch) + else if inp < #"]" + then if inp = #"\\" + then yyQ25(strm', lastMatch) + else yyQ8(strm', lastMatch) + else if inp = #"^" + then yyQ103(strm', lastMatch) + else yyQ8(strm', lastMatch) + (* end case *)) +fun yyQ100 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction41(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction41(strm, yyNO_MATCH) + (* end case *)) +fun yyQ3 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"\"" + then yyQ100(strm', lastMatch) + else if inp < #"\"" + then if inp = #"\n" + then if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yystuck(lastMatch) + else yyQ8(strm', lastMatch) + else if inp = #"\\" + then yyQ25(strm', lastMatch) + else yyQ8(strm', lastMatch) + (* end case *)) +fun yyQ97 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction36(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction36(strm, yyNO_MATCH) + (* end case *)) +fun yyQ98 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction33(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"A" + then yyQ98(strm', yyMATCH(strm, yyAction33, yyNO_MATCH)) + else if inp < #"A" + then if inp = #"(" + then yyAction33(strm, yyNO_MATCH) + else if inp < #"(" + then if inp = #"'" + then yyQ98(strm', yyMATCH(strm, yyAction33, yyNO_MATCH)) + else yyAction33(strm, yyNO_MATCH) + else if inp = #"0" + then yyQ98(strm', yyMATCH(strm, yyAction33, yyNO_MATCH)) + else if inp < #"0" + then yyAction33(strm, yyNO_MATCH) + else if inp <= #"9" + then yyQ98(strm', yyMATCH(strm, yyAction33, yyNO_MATCH)) + else yyAction33(strm, yyNO_MATCH) + else if inp = #"`" + then yyAction33(strm, yyNO_MATCH) + else if inp < #"`" + then if inp = #"[" + then yyAction33(strm, yyNO_MATCH) + else if inp < #"[" + then yyQ98(strm', yyMATCH(strm, yyAction33, yyNO_MATCH)) + else if inp = #"_" + then yyQ98(strm', yyMATCH(strm, yyAction33, yyNO_MATCH)) + else yyAction33(strm, yyNO_MATCH) + else if inp <= #"z" + then yyQ98(strm', yyMATCH(strm, yyAction33, yyNO_MATCH)) + else yyAction33(strm, yyNO_MATCH) + (* end case *)) +fun yyQ96 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction33(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"A" + then yyQ98(strm', yyMATCH(strm, yyAction33, yyNO_MATCH)) + else if inp < #"A" + then if inp = #"(" + then yyAction33(strm, yyNO_MATCH) + else if inp < #"(" + then if inp = #"'" + then yyQ98(strm', yyMATCH(strm, yyAction33, yyNO_MATCH)) + else yyAction33(strm, yyNO_MATCH) + else if inp = #"0" + then yyQ98(strm', yyMATCH(strm, yyAction33, yyNO_MATCH)) + else if inp < #"0" + then yyAction33(strm, yyNO_MATCH) + else if inp <= #"9" + then yyQ98(strm', yyMATCH(strm, yyAction33, yyNO_MATCH)) + else yyAction33(strm, yyNO_MATCH) + else if inp = #"`" + then yyAction33(strm, yyNO_MATCH) + else if inp < #"`" + then if inp = #"[" + then yyAction33(strm, yyNO_MATCH) + else if inp < #"[" + then yyQ98(strm', yyMATCH(strm, yyAction33, yyNO_MATCH)) + else if inp = #"_" + then yyQ98(strm', yyMATCH(strm, yyAction33, yyNO_MATCH)) + else yyAction33(strm, yyNO_MATCH) + else if inp <= #"z" + then yyQ98(strm', yyMATCH(strm, yyAction33, yyNO_MATCH)) + else yyAction33(strm, yyNO_MATCH) + (* end case *)) +fun yyQ99 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction34(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"0" + then yyQ99(strm', yyMATCH(strm, yyAction34, yyNO_MATCH)) + else if inp < #"0" + then yyAction34(strm, yyNO_MATCH) + else if inp <= #"9" + then yyQ99(strm', yyMATCH(strm, yyAction34, yyNO_MATCH)) + else yyAction34(strm, yyNO_MATCH) + (* end case *)) +fun yyQ95 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction34(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"0" + then yyQ99(strm', yyMATCH(strm, yyAction34, yyNO_MATCH)) + else if inp < #"0" + then yyAction34(strm, yyNO_MATCH) + else if inp <= #"9" + then yyQ99(strm', yyMATCH(strm, yyAction34, yyNO_MATCH)) + else yyAction34(strm, yyNO_MATCH) + (* end case *)) +fun yyQ94 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction35(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction35(strm, yyNO_MATCH) + (* end case *)) +fun yyQ93 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction32(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction32(strm, yyNO_MATCH) + (* end case *)) +fun yyQ92 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction32(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction32(strm, yyNO_MATCH) + (* end case *)) +fun yyQ2 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"-" + then yyQ42(strm', lastMatch) + else if inp < #"-" + then if inp = #"\r" + then yyQ92(strm', lastMatch) + else if inp < #"\r" + then if inp = #"\n" + then yyQ93(strm', lastMatch) + else if inp < #"\n" + then if inp = #"\t" + then yyQ92(strm', lastMatch) + else yyQ42(strm', lastMatch) + else yyQ42(strm', lastMatch) + else if inp = #"!" + then yyQ42(strm', lastMatch) + else if inp < #"!" + then if inp = #" " + then yyQ92(strm', lastMatch) + else yyQ42(strm', lastMatch) + else if inp = #"," + then yyQ94(strm', lastMatch) + else yyQ42(strm', lastMatch) + else if inp = #"[" + then yyQ42(strm', lastMatch) + else if inp < #"[" + then if inp = #":" + then yyQ42(strm', lastMatch) + else if inp < #":" + then if inp <= #"/" + then yyQ42(strm', lastMatch) + else yyQ95(strm', lastMatch) + else if inp <= #"@" + then yyQ42(strm', lastMatch) + else yyQ96(strm', lastMatch) + else if inp = #"{" + then yyQ42(strm', lastMatch) + else if inp < #"{" + then if inp <= #"`" + then yyQ42(strm', lastMatch) + else yyQ96(strm', lastMatch) + else if inp = #"}" + then yyQ97(strm', lastMatch) + else yyQ42(strm', lastMatch) + (* end case *)) +fun yyQ48 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction12(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"A" + then yyQ48(strm', yyMATCH(strm, yyAction12, yyNO_MATCH)) + else if inp < #"A" + then if inp = #"(" + then yyAction12(strm, yyNO_MATCH) + else if inp < #"(" + then if inp = #"'" + then yyQ48(strm', yyMATCH(strm, yyAction12, yyNO_MATCH)) + else yyAction12(strm, yyNO_MATCH) + else if inp = #"0" + then yyQ48(strm', yyMATCH(strm, yyAction12, yyNO_MATCH)) + else if inp < #"0" + then yyAction12(strm, yyNO_MATCH) + else if inp <= #"9" + then yyQ48(strm', yyMATCH(strm, yyAction12, yyNO_MATCH)) + else yyAction12(strm, yyNO_MATCH) + else if inp = #"`" + then yyAction12(strm, yyNO_MATCH) + else if inp < #"`" + then if inp = #"[" + then yyAction12(strm, yyNO_MATCH) + else if inp < #"[" + then yyQ48(strm', yyMATCH(strm, yyAction12, yyNO_MATCH)) + else if inp = #"_" + then yyQ48(strm', yyMATCH(strm, yyAction12, yyNO_MATCH)) + else yyAction12(strm, yyNO_MATCH) + else if inp <= #"z" + then yyQ48(strm', yyMATCH(strm, yyAction12, yyNO_MATCH)) + else yyAction12(strm, yyNO_MATCH) + (* end case *)) +fun yyQ47 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction12(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"A" + then yyQ48(strm', yyMATCH(strm, yyAction12, yyNO_MATCH)) + else if inp < #"A" + then if inp = #"(" + then yyAction12(strm, yyNO_MATCH) + else if inp < #"(" + then if inp = #"'" + then yyQ48(strm', yyMATCH(strm, yyAction12, yyNO_MATCH)) + else yyAction12(strm, yyNO_MATCH) + else if inp = #"0" + then yyQ48(strm', yyMATCH(strm, yyAction12, yyNO_MATCH)) + else if inp < #"0" + then yyAction12(strm, yyNO_MATCH) + else if inp <= #"9" + then yyQ48(strm', yyMATCH(strm, yyAction12, yyNO_MATCH)) + else yyAction12(strm, yyNO_MATCH) + else if inp = #"`" + then yyAction12(strm, yyNO_MATCH) + else if inp < #"`" + then if inp = #"[" + then yyAction12(strm, yyNO_MATCH) + else if inp < #"[" + then yyQ48(strm', yyMATCH(strm, yyAction12, yyNO_MATCH)) + else if inp = #"_" + then yyQ48(strm', yyMATCH(strm, yyAction12, yyNO_MATCH)) + else yyAction12(strm, yyNO_MATCH) + else if inp <= #"z" + then yyQ48(strm', yyMATCH(strm, yyAction12, yyNO_MATCH)) + else yyAction12(strm, yyNO_MATCH) + (* end case *)) +fun yyQ46 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction13(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction13(strm, yyNO_MATCH) + (* end case *)) +fun yyQ62 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction10(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction10(strm, yyNO_MATCH) + (* end case *)) +fun yyQ61 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"e" + then yyQ62(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ60 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"d" + then yyQ61(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ59 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"o" + then yyQ60(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ58 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"c" + then yyQ59(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ57 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"i" + then yyQ58(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ56 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"n" + then yyQ57(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ70 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction6(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction6(strm, yyNO_MATCH) + (* end case *)) +fun yyQ69 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"e" + then yyQ70(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ68 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"r" + then yyQ69(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ67 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"u" + then yyQ68(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ66 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"t" + then yyQ67(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ65 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"c" + then yyQ66(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ64 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"u" + then yyQ65(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ63 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"r" + then yyQ64(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ55 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction4(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"t" + then yyQ63(strm', yyMATCH(strm, yyAction4, yyNO_MATCH)) + else yyAction4(strm, yyNO_MATCH) + (* end case *)) +fun yyQ75 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction9(strm, lastMatch) + | SOME(inp, strm') => yyAction9(strm, lastMatch) + (* end case *)) +fun yyQ74 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"t" + then yyQ75(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ73 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"c" + then yyQ74(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ72 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"e" + then yyQ73(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ71 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"j" + then yyQ72(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ54 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"e" + then yyQ71(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ81 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction5(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction5(strm, yyNO_MATCH) + (* end case *)) +fun yyQ80 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"\^N" + then yystuck(lastMatch) + else if inp < #"\^N" + then if inp = #"\v" + then yystuck(lastMatch) + else if inp < #"\v" + then if inp <= #"\b" + then yystuck(lastMatch) + else yyQ80(strm', lastMatch) + else if inp = #"\r" + then yyQ80(strm', lastMatch) + else yystuck(lastMatch) + else if inp = #"!" + then yystuck(lastMatch) + else if inp < #"!" + then if inp = #" " + then yyQ80(strm', lastMatch) + else yystuck(lastMatch) + else if inp = #"(" + then yyQ81(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ79 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"r" + then yyQ80(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ78 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"e" + then yyQ79(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ77 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"d" + then yyQ78(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ76 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"a" + then yyQ77(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ53 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"e" + then yyQ76(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ84 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction11(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction11(strm, yyNO_MATCH) + (* end case *)) +fun yyQ83 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"l" + then yyQ84(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ82 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"l" + then yyQ83(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ52 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"u" + then yyQ82(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ88 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction8(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction8(strm, yyNO_MATCH) + (* end case *)) +fun yyQ87 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"t" + then yyQ88(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ86 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"n" + then yyQ87(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ85 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"u" + then yyQ86(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ51 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"o" + then yyQ85(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ91 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction7(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction7(strm, yyNO_MATCH) + (* end case *)) +fun yyQ90 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"\^N" + then yystuck(lastMatch) + else if inp < #"\^N" + then if inp = #"\v" + then yystuck(lastMatch) + else if inp < #"\v" + then if inp <= #"\b" + then yystuck(lastMatch) + else yyQ90(strm', lastMatch) + else if inp = #"\r" + then yyQ90(strm', lastMatch) + else yystuck(lastMatch) + else if inp = #"!" + then yystuck(lastMatch) + else if inp < #"!" + then if inp = #" " + then yyQ90(strm', lastMatch) + else yystuck(lastMatch) + else if inp = #"(" + then yyQ91(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ89 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"g" + then yyQ90(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ50 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"r" + then yyQ89(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ49 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction3(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction3(strm, yyNO_MATCH) + (* end case *)) +fun yyQ45 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction60(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"g" + then yyAction60(strm, yyNO_MATCH) + else if inp < #"g" + then if inp = #"b" + then yyAction60(strm, yyNO_MATCH) + else if inp < #"b" + then if inp = #"&" + then yyAction60(strm, yyNO_MATCH) + else if inp < #"&" + then if inp = #"%" + then yyQ49(strm', yyMATCH(strm, yyAction60, yyNO_MATCH)) + else yyAction60(strm, yyNO_MATCH) + else if inp = #"a" + then yyQ50(strm', yyMATCH(strm, yyAction60, yyNO_MATCH)) + else yyAction60(strm, yyNO_MATCH) + else if inp = #"d" + then yyAction60(strm, yyNO_MATCH) + else if inp < #"d" + then yyQ51(strm', yyMATCH(strm, yyAction60, yyNO_MATCH)) + else if inp = #"f" + then yyQ52(strm', yyMATCH(strm, yyAction60, yyNO_MATCH)) + else yyAction60(strm, yyNO_MATCH) + else if inp = #"s" + then yyQ55(strm', yyMATCH(strm, yyAction60, yyNO_MATCH)) + else if inp < #"s" + then if inp = #"i" + then yyAction60(strm, yyNO_MATCH) + else if inp < #"i" + then yyQ53(strm', yyMATCH(strm, yyAction60, yyNO_MATCH)) + else if inp = #"r" + then yyQ54(strm', yyMATCH(strm, yyAction60, yyNO_MATCH)) + else yyAction60(strm, yyNO_MATCH) + else if inp = #"u" + then yyQ56(strm', yyMATCH(strm, yyAction60, yyNO_MATCH)) + else yyAction60(strm, yyNO_MATCH) + (* end case *)) +fun yyQ44 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction2(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction2(strm, yyNO_MATCH) + (* end case *)) +fun yyQ43 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction2(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction2(strm, yyNO_MATCH) + (* end case *)) +fun yyQ1 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"%" + then yyQ45(strm', lastMatch) + else if inp < #"%" + then if inp = #"\r" + then yyQ43(strm', lastMatch) + else if inp < #"\r" + then if inp = #"\n" + then yyQ44(strm', lastMatch) + else if inp < #"\n" + then if inp = #"\t" + then yyQ43(strm', lastMatch) + else yyQ42(strm', lastMatch) + else yyQ42(strm', lastMatch) + else if inp = #" " + then yyQ43(strm', lastMatch) + else yyQ42(strm', lastMatch) + else if inp = #"A" + then yyQ47(strm', lastMatch) + else if inp < #"A" + then if inp = #"=" + then yyQ46(strm', lastMatch) + else yyQ42(strm', lastMatch) + else if inp = #"a" + then yyQ47(strm', lastMatch) + else if inp < #"a" + then if inp <= #"Z" + then yyQ47(strm', lastMatch) + else yyQ42(strm', lastMatch) + else if inp <= #"z" + then yyQ47(strm', lastMatch) + else yyQ42(strm', lastMatch) + (* end case *)) +fun yyQ28 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction18(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction18(strm, yyNO_MATCH) + (* end case *)) +fun yyQ27 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction25(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction25(strm, yyNO_MATCH) + (* end case *)) +fun yyQ26 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction24(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction24(strm, yyNO_MATCH) + (* end case *)) +fun yyQ24 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction27(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction27(strm, yyNO_MATCH) + (* end case *)) +fun yyQ23 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction15(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction15(strm, yyNO_MATCH) + (* end case *)) +fun yyQ22 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction29(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction29(strm, yyNO_MATCH) + (* end case *)) +fun yyQ41 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction30(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction30(strm, yyNO_MATCH) + (* end case *)) +fun yyQ40 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"\^N" + then yystuck(lastMatch) + else if inp < #"\^N" + then if inp = #"\v" + then yystuck(lastMatch) + else if inp < #"\v" + then if inp <= #"\b" + then yystuck(lastMatch) + else yyQ40(strm', lastMatch) + else if inp = #"\r" + then yyQ40(strm', lastMatch) + else yystuck(lastMatch) + else if inp = #"!" + then yystuck(lastMatch) + else if inp < #"!" + then if inp = #" " + then yyQ40(strm', lastMatch) + else yystuck(lastMatch) + else if inp = #"(" + then yyQ41(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ21 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction46(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #">" + then yyQ40(strm', yyMATCH(strm, yyAction46, yyNO_MATCH)) + else yyAction46(strm, yyNO_MATCH) + (* end case *)) +fun yyQ20 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction28(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction28(strm, yyNO_MATCH) + (* end case *)) +fun yyQ19 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ18 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction22(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction22(strm, yyNO_MATCH) + (* end case *)) +fun yyQ17 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction23(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction23(strm, yyNO_MATCH) + (* end case *)) +fun yyQ16 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction17(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction17(strm, yyNO_MATCH) + (* end case *)) +fun yyQ15 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction16(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction16(strm, yyNO_MATCH) + (* end case *)) +fun yyQ14 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction20(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction20(strm, yyNO_MATCH) + (* end case *)) +fun yyQ13 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction19(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction19(strm, yyNO_MATCH) + (* end case *)) +fun yyQ12 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction21(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction21(strm, yyNO_MATCH) + (* end case *)) +fun yyQ11 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction26(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction26(strm, yyNO_MATCH) + (* end case *)) +fun yyQ10 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction14(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction14(strm, yyNO_MATCH) + (* end case *)) +fun yyQ9 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction14(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction14(strm, yyNO_MATCH) + (* end case *)) +fun yyQ0 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"." + then yyQ17(strm', lastMatch) + else if inp < #"." + then if inp = #"\"" + then yyQ11(strm', lastMatch) + else if inp < #"\"" + then if inp = #"\r" + then yyQ9(strm', lastMatch) + else if inp < #"\r" + then if inp = #"\n" + then yyQ10(strm', lastMatch) + else if inp < #"\n" + then if inp = #"\t" + then yyQ9(strm', lastMatch) + else yyQ8(strm', lastMatch) + else yyQ8(strm', lastMatch) + else if inp = #" " + then yyQ9(strm', lastMatch) + else yyQ8(strm', lastMatch) + else if inp = #")" + then yyQ14(strm', lastMatch) + else if inp < #")" + then if inp = #"%" + then yyQ8(strm', lastMatch) + else if inp < #"%" + then if inp = #"#" + then yyQ8(strm', lastMatch) + else yyQ12(strm', lastMatch) + else if inp = #"(" + then yyQ13(strm', lastMatch) + else yyQ8(strm', lastMatch) + else if inp = #"+" + then yyQ16(strm', lastMatch) + else if inp = #"*" + then yyQ15(strm', lastMatch) + else yyQ8(strm', lastMatch) + else if inp = #"[" + then yyQ24(strm', lastMatch) + else if inp < #"[" + then if inp = #"=" + then yyQ21(strm', lastMatch) + else if inp < #"=" + then if inp = #";" + then yyQ19(strm', lastMatch) + else if inp < #";" + then if inp = #"/" + then yyQ18(strm', lastMatch) + else yyQ8(strm', lastMatch) + else yyQ20(strm', lastMatch) + else if inp = #"?" + then yyQ23(strm', lastMatch) + else if inp = #">" + then yyQ22(strm', lastMatch) + else yyQ8(strm', lastMatch) + else if inp = #"_" + then yyQ8(strm', lastMatch) + else if inp < #"_" + then if inp = #"]" + then yyQ8(strm', lastMatch) + else if inp = #"\\" + then yyQ25(strm', lastMatch) + else yyQ26(strm', lastMatch) + else if inp = #"|" + then yyQ28(strm', lastMatch) + else if inp < #"|" + then if inp = #"{" + then yyQ27(strm', lastMatch) + else yyQ8(strm', lastMatch) + else yyQ8(strm', lastMatch) + (* end case *)) +in + (case (!(yyss)) + of RE => yyQ0(!(yystrm), yyNO_MATCH) + | DEFS => yyQ1(!(yystrm), yyNO_MATCH) + | RECB => yyQ2(!(yystrm), yyNO_MATCH) + | STRING => yyQ3(!(yystrm), yyNO_MATCH) + | CHARCLASS => yyQ4(!(yystrm), yyNO_MATCH) + | LEXSTATES => yyQ5(!(yystrm), yyNO_MATCH) + | ACTION => yyQ6(!(yystrm), yyNO_MATCH) + | INITIAL => yyQ7(!(yystrm), yyNO_MATCH) + (* end case *)) +end + end + in + continue() + handle IO.Io{cause, ...} => raise cause + end + in + lex + end + in + fun makeLexer yyinputN = mk (yyInput.mkStream yyinputN) + fun makeLexer' ins = mk (yyInput.mkStream ins) + end + + end diff --git a/ml-lpt/ml-ulex/FrontEnds/ml-lex/ml-lex.yacc b/ml-lpt/ml-ulex/FrontEnds/ml-lex/ml-lex.yacc new file mode 100644 index 0000000..088e376 --- /dev/null +++ b/ml-lpt/ml-ulex/FrontEnds/ml-lex/ml-lex.yacc @@ -0,0 +1,243 @@ +structure S = LexSpec + +structure RE = RegExp +structure SIS = RE.SymSet + +val symTable : RE.re AtomMap.map ref = ref AtomMap.empty + +val wildcard = SIS.complement (SIS.singleton 0w10) (* everything but \n *) +fun charToSym c = Word.fromInt (Char.ord c) +fun strToSym s = charToSym (String.sub (s, 0)) + +fun mkRule (ss, (false, re), act) = ((ss, re), act) + | mkRule (ss, (true, re), act) = ((ss, re), + "if not yylastwasn then REJECT() else (" ^ act ^")") + +%% + +%name MLLex + +%term + EOF + | DECLS of string + | LT (* < *) + | GT (* > *) + | LP (* ( *) + | RP (* ) *) + | LB (* [ *) + | RB (* ] *) + | RBD (* -] *) + | LCB (* { *) + | RCB (* } *) + | QMARK + | STAR + | PLUS + | BAR + | CARAT + | DOLLAR + | SLASH + | DASH + | HIGH_CHAR (* "\h"; represents the range \128-\255 when not in %full mode *) + | CHAR of string + | UNICHAR of UTF8.wchar + | DOT + | EQ + | REPS of int + | ID of string + | ARROW + | ACT of string + | SEMI + | LEXMARK + | COMMA + | STATES + | LEXSTATE of string + | COUNT + | REJECTTOK + | FULL + | UNICODE + | STRUCT + | HEADER + | ARG + | POSARG + +%nonterm + Start of S.spec + | Decls of string + | Defs of S.config + | StartStates of AtomSet.set + | Rules of S.rule list + | Rule of S.rule + | RuleStates of AtomSet.set + | LineBreakExp of (bool * RE.re) + | OrExp of RE.re + | CatExp of RE.re + | Exp of RE.re + | InExp of RE.re + | CharClass of SIS.set + | CharClass' of SIS.set + | CharRng of SIS.set + | AChar of UTF8.wchar + | NonCarat of UTF8.wchar + +%left BAR +%nonassoc QMARK +%left PLUS +%nonassoc STAR + +%pos int +%eop EOF +%noshift EOF +%start Start +%verbose + +%% + +Start + : Decls LEXMARK Defs LEXMARK Rules + (S.Spec {decls = Decls, + conf = Defs, + rules = Rules, + eofRules = []}) + +Decls + : DECLS + (DECLS) + | (* empty *) + ("") + +Defs + : (* empty *) + (S.mkConfig()) + | Defs STATES StartStates SEMI + (S.updStartStates (Defs, StartStates)) + | Defs HEADER ACT + (S.updHeader (Defs, + String.substring (ACT, 1, String.size ACT - 2))) + | Defs STRUCT ID + (S.updStructName (Defs, ID)) + | Defs ARG ACT + (S.updArg (Defs, ACT)) + | Defs UNICODE + (S.updClamp (Defs, S.NO_CLAMP)) + | Defs FULL + (S.updClamp (Defs, S.CLAMP255)) + | Defs COUNT + (Defs) + | Defs REJECTTOK + (Defs) + | Defs ID EQ OrExp SEMI + (symTable := AtomMap.insert + (!symTable, Atom.atom ID, OrExp); + Defs) + +StartStates + : LEXSTATE + (AtomSet.singleton (Atom.atom LEXSTATE)) + | LEXSTATE StartStates + (AtomSet.add (StartStates, Atom.atom LEXSTATE)) + +Rules + : (* empty *) + ([]) + | Rule Rules + (Rule :: Rules) + +Rule + : LineBreakExp ARROW ACT + (mkRule (NONE, LineBreakExp, ACT)) + | LT RuleStates GT LineBreakExp ARROW ACT + (mkRule (SOME RuleStates, LineBreakExp, ACT)) + +RuleStates + : LEXSTATE + (AtomSet.singleton (Atom.atom LEXSTATE)) + | RuleStates COMMA LEXSTATE + (AtomSet.add (RuleStates, Atom.atom LEXSTATE)) + +LineBreakExp + : CARAT OrExp + (true, OrExp) + | OrExp + (false, OrExp) + +OrExp + : OrExp BAR CatExp + (RE.mkOr (OrExp, CatExp)) + | CatExp + (CatExp) + +CatExp + : CatExp Exp + (RE.mkConcat (CatExp, Exp)) + | Exp + (Exp) + +Exp + : Exp QMARK + (RE.mkOpt Exp) + | Exp STAR + (RE.mkClosure Exp) + | Exp PLUS + (RE.mkAtLeast (Exp, 1)) + | Exp REPS RCB + (RE.mkRep (Exp, REPS, REPS)) + | Exp REPS COMMA REPS RCB + (RE.mkRep (Exp, REPS1, REPS2)) + | InExp + (InExp) + +InExp + : HIGH_CHAR + (RE.mkSymSet (SIS.interval (0w128, 0w255))) + | CHAR + (RE.mkSymSet (SIS.singleton (strToSym CHAR))) + | UNICHAR + (RE.mkSymSet (SIS.singleton UNICHAR)) + | DOT + (RE.mkSymSet wildcard) + | ID RCB + (case AtomMap.find (!symTable, Atom.atom ID) + of SOME re => re + | NONE => raise Fail ("'" ^ ID ^ "' not defined")) + | LP OrExp RP + (OrExp) + | LB CARAT CharClass + (RE.mkSymSet (SIS.complement CharClass)) + | LB CharClass + (RE.mkSymSet CharClass) + +CharClass + : CharClass' RB + (CharClass') + | DASH CharClass' RB + (SIS.add (CharClass', charToSym #"-")) + | RBD + (SIS.singleton (charToSym #"-")) + | CharClass' RBD + (SIS.add (CharClass', charToSym #"-")) + +CharClass' + : NonCarat + (SIS.singleton NonCarat) + | NonCarat DASH AChar + (SIS.interval (NonCarat, AChar)) + | CharClass' CharRng + (SIS.union (CharRng, CharClass')) + +CharRng + : AChar DASH AChar + (SIS.interval (AChar1, AChar2)) + | AChar + (SIS.singleton AChar) + +AChar + : CARAT + (charToSym #"^") + | NonCarat + (NonCarat) + +NonCarat + : CHAR + (strToSym CHAR) + | UNICHAR + (UNICHAR) diff --git a/ml-lpt/ml-ulex/FrontEnds/ml-lex/ml-lex.yacc.desc b/ml-lpt/ml-ulex/FrontEnds/ml-lex/ml-lex.yacc.desc new file mode 100644 index 0000000..2a0dbd8 --- /dev/null +++ b/ml-lpt/ml-ulex/FrontEnds/ml-lex/ml-lex.yacc.desc @@ -0,0 +1,1041 @@ + +state 0: + + Start : . Decls LEXMARK Defs LEXMARK Rules + + DECLS shift 2 + + Start goto 88 + Decls goto 1 + + . reduce by rule 2 + + +state 1: + + Start : Decls . LEXMARK Defs LEXMARK Rules + + LEXMARK shift 3 + + + . error + + +state 2: + + Decls : DECLS . (reduce by rule 1) + + + + . reduce by rule 1 + + +state 3: + + Start : Decls LEXMARK . Defs LEXMARK Rules + + + Defs goto 4 + + . reduce by rule 3 + + +state 4: + + Start : Decls LEXMARK Defs . LEXMARK Rules + Defs : Defs . STATES StartStates SEMI + Defs : Defs . HEADER ACT + Defs : Defs . STRUCT ID + Defs : Defs . ARG ACT + Defs : Defs . UNICODE + Defs : Defs . FULL + Defs : Defs . COUNT + Defs : Defs . REJECTTOK + Defs : Defs . ID EQ OrExp SEMI + + ID shift 14 + LEXMARK shift 13 + STATES shift 12 + COUNT shift 11 + REJECTTOK shift 10 + FULL shift 9 + UNICODE shift 8 + STRUCT shift 7 + HEADER shift 6 + ARG shift 5 + + + . error + + +state 5: + + Defs : Defs ARG . ACT + + ACT shift 15 + + + . error + + +state 6: + + Defs : Defs HEADER . ACT + + ACT shift 16 + + + . error + + +state 7: + + Defs : Defs STRUCT . ID + + ID shift 17 + + + . error + + +state 8: + + Defs : Defs UNICODE . (reduce by rule 8) + + + + . reduce by rule 8 + + +state 9: + + Defs : Defs FULL . (reduce by rule 9) + + + + . reduce by rule 9 + + +state 10: + + Defs : Defs REJECTTOK . (reduce by rule 11) + + + + . reduce by rule 11 + + +state 11: + + Defs : Defs COUNT . (reduce by rule 10) + + + + . reduce by rule 10 + + +state 12: + + Defs : Defs STATES . StartStates SEMI + + LEXSTATE shift 19 + + StartStates goto 18 + + . error + + +state 13: + + Start : Decls LEXMARK Defs LEXMARK . Rules + + LT shift 35 + LP shift 34 + LB shift 33 + CARAT shift 32 + HIGH_CHAR shift 31 + CHAR shift 30 + UNICHAR shift 29 + DOT shift 28 + ID shift 27 + + Rules goto 26 + Rule goto 25 + LineBreakExp goto 24 + OrExp goto 23 + CatExp goto 22 + Exp goto 21 + InExp goto 20 + + . reduce by rule 15 + + +state 14: + + Defs : Defs ID . EQ OrExp SEMI + + EQ shift 36 + + + . error + + +state 15: + + Defs : Defs ARG ACT . (reduce by rule 7) + + + + . reduce by rule 7 + + +state 16: + + Defs : Defs HEADER ACT . (reduce by rule 5) + + + + . reduce by rule 5 + + +state 17: + + Defs : Defs STRUCT ID . (reduce by rule 6) + + + + . reduce by rule 6 + + +state 18: + + Defs : Defs STATES StartStates . SEMI + + SEMI shift 37 + + + . error + + +state 19: + + StartStates : LEXSTATE . (reduce by rule 13) + StartStates : LEXSTATE . StartStates + + LEXSTATE shift 19 + + StartStates goto 38 + + . reduce by rule 13 + + +state 20: + + Exp : InExp . (reduce by rule 32) + + + + . reduce by rule 32 + + +state 21: + + CatExp : Exp . (reduce by rule 26) + Exp : Exp . QMARK + Exp : Exp . STAR + Exp : Exp . PLUS + Exp : Exp . REPS RCB + Exp : Exp . REPS COMMA REPS RCB + + QMARK shift 42 + STAR shift 41 + PLUS shift 40 + REPS shift 39 + + + . reduce by rule 26 + + +state 22: + + OrExp : CatExp . (reduce by rule 24) + CatExp : CatExp . Exp + + LP shift 34 + LB shift 33 + HIGH_CHAR shift 31 + CHAR shift 30 + UNICHAR shift 29 + DOT shift 28 + ID shift 27 + + Exp goto 43 + InExp goto 20 + + . reduce by rule 24 + + +state 23: + + LineBreakExp : OrExp . (reduce by rule 22) + OrExp : OrExp . BAR CatExp + + BAR shift 44 + + + . reduce by rule 22 + + +state 24: + + Rule : LineBreakExp . ARROW ACT + + ARROW shift 45 + + + . error + + +state 25: + + Rules : Rule . Rules + + LT shift 35 + LP shift 34 + LB shift 33 + CARAT shift 32 + HIGH_CHAR shift 31 + CHAR shift 30 + UNICHAR shift 29 + DOT shift 28 + ID shift 27 + + Rules goto 46 + Rule goto 25 + LineBreakExp goto 24 + OrExp goto 23 + CatExp goto 22 + Exp goto 21 + InExp goto 20 + + . reduce by rule 15 + + +state 26: + + Start : Decls LEXMARK Defs LEXMARK Rules . (reduce by rule 0) + + + + . reduce by rule 0 + + +state 27: + + InExp : ID . RCB + + RCB shift 47 + + + . error + + +state 28: + + InExp : DOT . (reduce by rule 36) + + + + . reduce by rule 36 + + +state 29: + + InExp : UNICHAR . (reduce by rule 35) + + + + . reduce by rule 35 + + +state 30: + + InExp : CHAR . (reduce by rule 34) + + + + . reduce by rule 34 + + +state 31: + + InExp : HIGH_CHAR . (reduce by rule 33) + + + + . reduce by rule 33 + + +state 32: + + LineBreakExp : CARAT . OrExp + + LP shift 34 + LB shift 33 + HIGH_CHAR shift 31 + CHAR shift 30 + UNICHAR shift 29 + DOT shift 28 + ID shift 27 + + OrExp goto 48 + CatExp goto 22 + Exp goto 21 + InExp goto 20 + + . error + + +state 33: + + InExp : LB . CARAT CharClass + InExp : LB . CharClass + + RBD shift 56 + CARAT shift 55 + DASH shift 54 + CHAR shift 53 + UNICHAR shift 52 + + CharClass goto 51 + CharClass' goto 50 + NonCarat goto 49 + + . error + + +state 34: + + InExp : LP . OrExp RP + + LP shift 34 + LB shift 33 + HIGH_CHAR shift 31 + CHAR shift 30 + UNICHAR shift 29 + DOT shift 28 + ID shift 27 + + OrExp goto 57 + CatExp goto 22 + Exp goto 21 + InExp goto 20 + + . error + + +state 35: + + Rule : LT . RuleStates GT LineBreakExp ARROW ACT + + LEXSTATE shift 59 + + RuleStates goto 58 + + . error + + +state 36: + + Defs : Defs ID EQ . OrExp SEMI + + LP shift 34 + LB shift 33 + HIGH_CHAR shift 31 + CHAR shift 30 + UNICHAR shift 29 + DOT shift 28 + ID shift 27 + + OrExp goto 60 + CatExp goto 22 + Exp goto 21 + InExp goto 20 + + . error + + +state 37: + + Defs : Defs STATES StartStates SEMI . (reduce by rule 4) + + + + . reduce by rule 4 + + +state 38: + + StartStates : LEXSTATE StartStates . (reduce by rule 14) + + + + . reduce by rule 14 + + +state 39: + + Exp : Exp REPS . RCB + Exp : Exp REPS . COMMA REPS RCB + + RCB shift 62 + COMMA shift 61 + + + . error + + +state 40: + + Exp : Exp PLUS . (reduce by rule 29) + + + + . reduce by rule 29 + + +state 41: + + Exp : Exp STAR . (reduce by rule 28) + + + + . reduce by rule 28 + + +state 42: + + Exp : Exp QMARK . (reduce by rule 27) + + + + . reduce by rule 27 + + +state 43: + + CatExp : CatExp Exp . (reduce by rule 25) + Exp : Exp . QMARK + Exp : Exp . STAR + Exp : Exp . PLUS + Exp : Exp . REPS RCB + Exp : Exp . REPS COMMA REPS RCB + + QMARK shift 42 + STAR shift 41 + PLUS shift 40 + REPS shift 39 + + + . reduce by rule 25 + + +state 44: + + OrExp : OrExp BAR . CatExp + + LP shift 34 + LB shift 33 + HIGH_CHAR shift 31 + CHAR shift 30 + UNICHAR shift 29 + DOT shift 28 + ID shift 27 + + CatExp goto 63 + Exp goto 21 + InExp goto 20 + + . error + + +state 45: + + Rule : LineBreakExp ARROW . ACT + + ACT shift 64 + + + . error + + +state 46: + + Rules : Rule Rules . (reduce by rule 16) + + + + . reduce by rule 16 + + +state 47: + + InExp : ID RCB . (reduce by rule 37) + + + + . reduce by rule 37 + + +state 48: + + LineBreakExp : CARAT OrExp . (reduce by rule 21) + OrExp : OrExp . BAR CatExp + + BAR shift 44 + + + . reduce by rule 21 + + +state 49: + + CharClass' : NonCarat . (reduce by rule 45) + CharClass' : NonCarat . DASH AChar + + DASH shift 65 + + + . reduce by rule 45 + + +state 50: + + CharClass : CharClass' . RB + CharClass : CharClass' . RBD + CharClass' : CharClass' . CharRng + + RB shift 71 + RBD shift 70 + CARAT shift 69 + CHAR shift 53 + UNICHAR shift 52 + + CharRng goto 68 + AChar goto 67 + NonCarat goto 66 + + . error + + +state 51: + + InExp : LB CharClass . (reduce by rule 40) + + + + . reduce by rule 40 + + +state 52: + + NonCarat : UNICHAR . (reduce by rule 53) + + + + . reduce by rule 53 + + +state 53: + + NonCarat : CHAR . (reduce by rule 52) + + + + . reduce by rule 52 + + +state 54: + + CharClass : DASH . CharClass' RB + + CHAR shift 53 + UNICHAR shift 52 + + CharClass' goto 72 + NonCarat goto 49 + + . error + + +state 55: + + InExp : LB CARAT . CharClass + + RBD shift 56 + DASH shift 54 + CHAR shift 53 + UNICHAR shift 52 + + CharClass goto 73 + CharClass' goto 50 + NonCarat goto 49 + + . error + + +state 56: + + CharClass : RBD . (reduce by rule 43) + + + + . reduce by rule 43 + + +state 57: + + OrExp : OrExp . BAR CatExp + InExp : LP OrExp . RP + + RP shift 74 + BAR shift 44 + + + . error + + +state 58: + + Rule : LT RuleStates . GT LineBreakExp ARROW ACT + RuleStates : RuleStates . COMMA LEXSTATE + + GT shift 76 + COMMA shift 75 + + + . error + + +state 59: + + RuleStates : LEXSTATE . (reduce by rule 19) + + + + . reduce by rule 19 + + +state 60: + + Defs : Defs ID EQ OrExp . SEMI + OrExp : OrExp . BAR CatExp + + BAR shift 44 + SEMI shift 77 + + + . error + + +state 61: + + Exp : Exp REPS COMMA . REPS RCB + + REPS shift 78 + + + . error + + +state 62: + + Exp : Exp REPS RCB . (reduce by rule 30) + + + + . reduce by rule 30 + + +state 63: + + OrExp : OrExp BAR CatExp . (reduce by rule 23) + CatExp : CatExp . Exp + + LP shift 34 + LB shift 33 + HIGH_CHAR shift 31 + CHAR shift 30 + UNICHAR shift 29 + DOT shift 28 + ID shift 27 + + Exp goto 43 + InExp goto 20 + + . reduce by rule 23 + + +state 64: + + Rule : LineBreakExp ARROW ACT . (reduce by rule 17) + + + + . reduce by rule 17 + + +state 65: + + CharClass' : NonCarat DASH . AChar + + CARAT shift 69 + CHAR shift 53 + UNICHAR shift 52 + + AChar goto 79 + NonCarat goto 66 + + . error + + +state 66: + + AChar : NonCarat . (reduce by rule 51) + + + + . reduce by rule 51 + + +state 67: + + CharRng : AChar . DASH AChar + CharRng : AChar . (reduce by rule 49) + + DASH shift 80 + + + . reduce by rule 49 + + +state 68: + + CharClass' : CharClass' CharRng . (reduce by rule 47) + + + + . reduce by rule 47 + + +state 69: + + AChar : CARAT . (reduce by rule 50) + + + + . reduce by rule 50 + + +state 70: + + CharClass : CharClass' RBD . (reduce by rule 44) + + + + . reduce by rule 44 + + +state 71: + + CharClass : CharClass' RB . (reduce by rule 41) + + + + . reduce by rule 41 + + +state 72: + + CharClass : DASH CharClass' . RB + CharClass' : CharClass' . CharRng + + RB shift 81 + CARAT shift 69 + CHAR shift 53 + UNICHAR shift 52 + + CharRng goto 68 + AChar goto 67 + NonCarat goto 66 + + . error + + +state 73: + + InExp : LB CARAT CharClass . (reduce by rule 39) + + + + . reduce by rule 39 + + +state 74: + + InExp : LP OrExp RP . (reduce by rule 38) + + + + . reduce by rule 38 + + +state 75: + + RuleStates : RuleStates COMMA . LEXSTATE + + LEXSTATE shift 82 + + + . error + + +state 76: + + Rule : LT RuleStates GT . LineBreakExp ARROW ACT + + LP shift 34 + LB shift 33 + CARAT shift 32 + HIGH_CHAR shift 31 + CHAR shift 30 + UNICHAR shift 29 + DOT shift 28 + ID shift 27 + + LineBreakExp goto 83 + OrExp goto 23 + CatExp goto 22 + Exp goto 21 + InExp goto 20 + + . error + + +state 77: + + Defs : Defs ID EQ OrExp SEMI . (reduce by rule 12) + + + + . reduce by rule 12 + + +state 78: + + Exp : Exp REPS COMMA REPS . RCB + + RCB shift 84 + + + . error + + +state 79: + + CharClass' : NonCarat DASH AChar . (reduce by rule 46) + + + + . reduce by rule 46 + + +state 80: + + CharRng : AChar DASH . AChar + + CARAT shift 69 + CHAR shift 53 + UNICHAR shift 52 + + AChar goto 85 + NonCarat goto 66 + + . error + + +state 81: + + CharClass : DASH CharClass' RB . (reduce by rule 42) + + + + . reduce by rule 42 + + +state 82: + + RuleStates : RuleStates COMMA LEXSTATE . (reduce by rule 20) + + + + . reduce by rule 20 + + +state 83: + + Rule : LT RuleStates GT LineBreakExp . ARROW ACT + + ARROW shift 86 + + + . error + + +state 84: + + Exp : Exp REPS COMMA REPS RCB . (reduce by rule 31) + + + + . reduce by rule 31 + + +state 85: + + CharRng : AChar DASH AChar . (reduce by rule 48) + + + + . reduce by rule 48 + + +state 86: + + Rule : LT RuleStates GT LineBreakExp ARROW . ACT + + ACT shift 87 + + + . error + + +state 87: + + Rule : LT RuleStates GT LineBreakExp ARROW ACT . (reduce by rule 18) + + + + . reduce by rule 18 + + +state 88: + + + EOF accept + + + . error + +110 of 232 action table entries left after compaction +62 goto table entries diff --git a/ml-lpt/ml-ulex/FrontEnds/ml-lex/ml-lex.yacc.sig b/ml-lpt/ml-ulex/FrontEnds/ml-lex/ml-lex.yacc.sig new file mode 100644 index 0000000..74b52ec --- /dev/null +++ b/ml-lpt/ml-ulex/FrontEnds/ml-lex/ml-lex.yacc.sig @@ -0,0 +1,53 @@ +signature MLLex_TOKENS = +sig +type ('a,'b) token +type svalue +val POSARG: 'a * 'a -> (svalue,'a) token +val ARG: 'a * 'a -> (svalue,'a) token +val HEADER: 'a * 'a -> (svalue,'a) token +val STRUCT: 'a * 'a -> (svalue,'a) token +val UNICODE: 'a * 'a -> (svalue,'a) token +val FULL: 'a * 'a -> (svalue,'a) token +val REJECTTOK: 'a * 'a -> (svalue,'a) token +val COUNT: 'a * 'a -> (svalue,'a) token +val LEXSTATE: (string) * 'a * 'a -> (svalue,'a) token +val STATES: 'a * 'a -> (svalue,'a) token +val COMMA: 'a * 'a -> (svalue,'a) token +val LEXMARK: 'a * 'a -> (svalue,'a) token +val SEMI: 'a * 'a -> (svalue,'a) token +val ACT: (string) * 'a * 'a -> (svalue,'a) token +val ARROW: 'a * 'a -> (svalue,'a) token +val ID: (string) * 'a * 'a -> (svalue,'a) token +val REPS: (int) * 'a * 'a -> (svalue,'a) token +val EQ: 'a * 'a -> (svalue,'a) token +val DOT: 'a * 'a -> (svalue,'a) token +val UNICHAR: (UTF8.wchar) * 'a * 'a -> (svalue,'a) token +val CHAR: (string) * 'a * 'a -> (svalue,'a) token +val HIGH_CHAR: 'a * 'a -> (svalue,'a) token +val DASH: 'a * 'a -> (svalue,'a) token +val SLASH: 'a * 'a -> (svalue,'a) token +val DOLLAR: 'a * 'a -> (svalue,'a) token +val CARAT: 'a * 'a -> (svalue,'a) token +val BAR: 'a * 'a -> (svalue,'a) token +val PLUS: 'a * 'a -> (svalue,'a) token +val STAR: 'a * 'a -> (svalue,'a) token +val QMARK: 'a * 'a -> (svalue,'a) token +val RCB: 'a * 'a -> (svalue,'a) token +val LCB: 'a * 'a -> (svalue,'a) token +val RBD: 'a * 'a -> (svalue,'a) token +val RB: 'a * 'a -> (svalue,'a) token +val LB: 'a * 'a -> (svalue,'a) token +val RP: 'a * 'a -> (svalue,'a) token +val LP: 'a * 'a -> (svalue,'a) token +val GT: 'a * 'a -> (svalue,'a) token +val LT: 'a * 'a -> (svalue,'a) token +val DECLS: (string) * 'a * 'a -> (svalue,'a) token +val EOF: 'a * 'a -> (svalue,'a) token +end +signature MLLex_LRVALS= +sig +structure Tokens : MLLex_TOKENS +structure ParserData:PARSER_DATA +sharing type ParserData.Token.token = Tokens.token +sharing type ParserData.svalue = Tokens.svalue +end diff --git a/ml-lpt/ml-ulex/FrontEnds/ml-lex/ml-lex.yacc.sml b/ml-lpt/ml-ulex/FrontEnds/ml-lex/ml-lex.yacc.sml new file mode 100644 index 0000000..dbe5c70 --- /dev/null +++ b/ml-lpt/ml-ulex/FrontEnds/ml-lex/ml-lex.yacc.sml @@ -0,0 +1,925 @@ +functor MLLexLrValsFun(structure Token : TOKEN) + : sig structure ParserData : PARSER_DATA + structure Tokens : MLLex_TOKENS + end + = +struct +structure ParserData= +struct +structure Header = +struct +structure S = LexSpec + +structure RE = RegExp +structure SIS = RE.SymSet + +val symTable : RE.re AtomMap.map ref = ref AtomMap.empty + +val wildcard = SIS.complement (SIS.singleton 0w10) (* everything but \n *) +fun charToSym c = Word.fromInt (Char.ord c) +fun strToSym s = charToSym (String.sub (s, 0)) + +fun mkRule (ss, (false, re), act) = ((ss, re), act) + | mkRule (ss, (true, re), act) = ((ss, re), + "if not yylastwasn then REJECT() else (" ^ act ^")") + + +end +structure LrTable = Token.LrTable +structure Token = Token +local open LrTable in +val table=let val actionRows = +"\ +\\001\000\001\000\000\000\000\000\ +\\001\000\004\000\078\000\031\000\077\000\000\000\ +\\001\000\005\000\036\000\007\000\035\000\016\000\034\000\020\000\033\000\ +\\021\000\032\000\022\000\031\000\023\000\030\000\026\000\029\000\000\000\ +\\001\000\005\000\036\000\007\000\035\000\020\000\033\000\021\000\032\000\ +\\022\000\031\000\023\000\030\000\026\000\029\000\000\000\ +\\001\000\006\000\076\000\015\000\046\000\000\000\ +\\001\000\008\000\073\000\009\000\072\000\016\000\071\000\021\000\055\000\ +\\022\000\054\000\000\000\ +\\001\000\008\000\083\000\016\000\071\000\021\000\055\000\022\000\054\000\000\000\ +\\001\000\009\000\058\000\016\000\057\000\019\000\056\000\021\000\055\000\ +\\022\000\054\000\000\000\ +\\001\000\009\000\058\000\019\000\056\000\021\000\055\000\022\000\054\000\000\000\ +\\001\000\011\000\049\000\000\000\ +\\001\000\011\000\064\000\031\000\063\000\000\000\ +\\001\000\011\000\086\000\000\000\ +\\001\000\015\000\046\000\029\000\079\000\000\000\ +\\001\000\016\000\071\000\021\000\055\000\022\000\054\000\000\000\ +\\001\000\021\000\055\000\022\000\054\000\000\000\ +\\001\000\024\000\038\000\000\000\ +\\001\000\025\000\080\000\000\000\ +\\001\000\026\000\016\000\030\000\015\000\032\000\014\000\034\000\013\000\ +\\035\000\012\000\036\000\011\000\037\000\010\000\038\000\009\000\ +\\039\000\008\000\040\000\007\000\000\000\ +\\001\000\026\000\019\000\000\000\ +\\001\000\027\000\047\000\000\000\ +\\001\000\027\000\088\000\000\000\ +\\001\000\028\000\017\000\000\000\ +\\001\000\028\000\018\000\000\000\ +\\001\000\028\000\066\000\000\000\ +\\001\000\028\000\089\000\000\000\ +\\001\000\029\000\039\000\000\000\ +\\001\000\030\000\005\000\000\000\ +\\001\000\033\000\021\000\000\000\ +\\001\000\033\000\061\000\000\000\ +\\001\000\033\000\084\000\000\000\ +\\091\000\000\000\ +\\092\000\000\000\ +\\093\000\002\000\004\000\000\000\ +\\094\000\000\000\ +\\095\000\000\000\ +\\096\000\000\000\ +\\097\000\000\000\ +\\098\000\000\000\ +\\099\000\000\000\ +\\100\000\000\000\ +\\101\000\000\000\ +\\102\000\000\000\ +\\103\000\000\000\ +\\104\000\033\000\021\000\000\000\ +\\105\000\000\000\ +\\106\000\003\000\037\000\005\000\036\000\007\000\035\000\016\000\034\000\ +\\020\000\033\000\021\000\032\000\022\000\031\000\023\000\030\000\ +\\026\000\029\000\000\000\ +\\107\000\000\000\ +\\108\000\000\000\ +\\109\000\000\000\ +\\110\000\000\000\ +\\111\000\000\000\ +\\112\000\015\000\046\000\000\000\ +\\113\000\015\000\046\000\000\000\ +\\114\000\005\000\036\000\007\000\035\000\020\000\033\000\021\000\032\000\ +\\022\000\031\000\023\000\030\000\026\000\029\000\000\000\ +\\115\000\005\000\036\000\007\000\035\000\020\000\033\000\021\000\032\000\ +\\022\000\031\000\023\000\030\000\026\000\029\000\000\000\ +\\116\000\012\000\044\000\013\000\043\000\014\000\042\000\025\000\041\000\000\000\ +\\117\000\012\000\044\000\013\000\043\000\014\000\042\000\025\000\041\000\000\000\ +\\118\000\000\000\ +\\119\000\000\000\ +\\120\000\000\000\ +\\121\000\000\000\ +\\122\000\000\000\ +\\123\000\000\000\ +\\124\000\000\000\ +\\125\000\000\000\ +\\126\000\000\000\ +\\127\000\000\000\ +\\128\000\000\000\ +\\129\000\000\000\ +\\130\000\000\000\ +\\131\000\000\000\ +\\132\000\000\000\ +\\133\000\000\000\ +\\134\000\000\000\ +\\135\000\000\000\ +\\136\000\019\000\067\000\000\000\ +\\137\000\000\000\ +\\138\000\000\000\ +\\139\000\000\000\ +\\140\000\019\000\082\000\000\000\ +\\141\000\000\000\ +\\142\000\000\000\ +\\143\000\000\000\ +\\144\000\000\000\ +\" +val actionRowNumbers = +"\032\000\026\000\031\000\033\000\ +\\017\000\021\000\022\000\018\000\ +\\038\000\039\000\041\000\040\000\ +\\027\000\045\000\015\000\037\000\ +\\035\000\036\000\025\000\043\000\ +\\062\000\056\000\054\000\052\000\ +\\019\000\045\000\030\000\009\000\ +\\066\000\065\000\064\000\063\000\ +\\003\000\007\000\003\000\028\000\ +\\003\000\034\000\044\000\010\000\ +\\059\000\058\000\057\000\055\000\ +\\003\000\023\000\046\000\067\000\ +\\051\000\075\000\005\000\070\000\ +\\083\000\082\000\014\000\008\000\ +\\073\000\004\000\001\000\049\000\ +\\012\000\016\000\060\000\053\000\ +\\047\000\013\000\081\000\079\000\ +\\077\000\080\000\074\000\071\000\ +\\006\000\069\000\068\000\029\000\ +\\002\000\042\000\011\000\076\000\ +\\013\000\072\000\050\000\020\000\ +\\061\000\078\000\024\000\048\000\ +\\000\000" +val gotoT = +"\ +\\001\000\088\000\002\000\001\000\000\000\ +\\000\000\ +\\000\000\ +\\003\000\004\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\004\000\018\000\000\000\ +\\005\000\026\000\006\000\025\000\008\000\024\000\009\000\023\000\ +\\010\000\022\000\011\000\021\000\012\000\020\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\004\000\038\000\000\000\ +\\000\000\ +\\000\000\ +\\011\000\043\000\012\000\020\000\000\000\ +\\000\000\ +\\000\000\ +\\005\000\046\000\006\000\025\000\008\000\024\000\009\000\023\000\ +\\010\000\022\000\011\000\021\000\012\000\020\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\009\000\048\000\010\000\022\000\011\000\021\000\012\000\020\000\000\000\ +\\013\000\051\000\014\000\050\000\017\000\049\000\000\000\ +\\009\000\057\000\010\000\022\000\011\000\021\000\012\000\020\000\000\000\ +\\007\000\058\000\000\000\ +\\009\000\060\000\010\000\022\000\011\000\021\000\012\000\020\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\010\000\063\000\011\000\021\000\012\000\020\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\015\000\068\000\016\000\067\000\017\000\066\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\014\000\072\000\017\000\049\000\000\000\ +\\013\000\073\000\014\000\050\000\017\000\049\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\011\000\043\000\012\000\020\000\000\000\ +\\000\000\ +\\016\000\079\000\017\000\066\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\015\000\068\000\016\000\067\000\017\000\066\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\008\000\083\000\009\000\023\000\010\000\022\000\011\000\021\000\ +\\012\000\020\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\016\000\085\000\017\000\066\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\" +val numstates = 89 +val numrules = 54 +val s = ref "" and index = ref 0 +val string_to_int = fn () => +let val i = !index +in index := i+2; Char.ord(String.sub(!s,i)) + Char.ord(String.sub(!s,i+1)) * 256 +end +val string_to_list = fn s' => + let val len = String.size s' + fun f () = + if !index < len then string_to_int() :: f() + else nil + in index := 0; s := s'; f () + end +val string_to_pairlist = fn (conv_key,conv_entry) => + let fun f () = + case string_to_int() + of 0 => EMPTY + | n => PAIR(conv_key (n-1),conv_entry (string_to_int()),f()) + in f + end +val string_to_pairlist_default = fn (conv_key,conv_entry) => + let val conv_row = string_to_pairlist(conv_key,conv_entry) + in fn () => + let val default = conv_entry(string_to_int()) + val row = conv_row() + in (row,default) + end + end +val string_to_table = fn (convert_row,s') => + let val len = String.size s' + fun f ()= + if !index < len then convert_row() :: f() + else nil + in (s := s'; index := 0; f ()) + end +local + val memo = Array.array(numstates+numrules,ERROR) + val _ =let fun g i=(Array.update(memo,i,REDUCE(i-numstates)); g(i+1)) + fun f i = + if i=numstates then g i + else (Array.update(memo,i,SHIFT (STATE i)); f (i+1)) + in f 0 handle General.Subscript => () + end +in +val entry_to_action = fn 0 => ACCEPT | 1 => ERROR | j => Array.sub(memo,(j-2)) +end +val gotoT=Array.fromList(string_to_table(string_to_pairlist(NT,STATE),gotoT)) +val actionRows=string_to_table(string_to_pairlist_default(T,entry_to_action),actionRows) +val actionRowNumbers = string_to_list actionRowNumbers +val actionT = let val actionRowLookUp= +let val a=Array.fromList(actionRows) in fn i=>Array.sub(a,i) end +in Array.fromList(List.map actionRowLookUp actionRowNumbers) +end +in LrTable.mkLrTable {actions=actionT,gotos=gotoT,numRules=numrules, +numStates=numstates,initialState=STATE 0} +end +end +local open Header in +type pos = int +type arg = unit +structure MlyValue = +struct +datatype svalue = VOID | ntVOID of unit -> unit + | LEXSTATE of unit -> (string) | ACT of unit -> (string) + | ID of unit -> (string) | REPS of unit -> (int) + | UNICHAR of unit -> (UTF8.wchar) | CHAR of unit -> (string) + | DECLS of unit -> (string) | NonCarat of unit -> (UTF8.wchar) + | AChar of unit -> (UTF8.wchar) | CharRng of unit -> (SIS.set) + | CharClass' of unit -> (SIS.set) | CharClass of unit -> (SIS.set) + | InExp of unit -> (RE.re) | Exp of unit -> (RE.re) + | CatExp of unit -> (RE.re) | OrExp of unit -> (RE.re) + | LineBreakExp of unit -> ( ( bool * RE.re ) ) + | RuleStates of unit -> (AtomSet.set) | Rule of unit -> (S.rule) + | Rules of unit -> (S.rule list) + | StartStates of unit -> (AtomSet.set) | Defs of unit -> (S.config) + | Decls of unit -> (string) | Start of unit -> (S.spec) +end +type svalue = MlyValue.svalue +type result = S.spec +end +structure EC= +struct +open LrTable +infix 5 $$ +fun x $$ y = y::x +val is_keyword = +fn _ => false +val preferred_change : (term list * term list) list = +nil +val noShift = +fn (T 0) => true | _ => false +val showTerminal = +fn (T 0) => "EOF" + | (T 1) => "DECLS" + | (T 2) => "LT" + | (T 3) => "GT" + | (T 4) => "LP" + | (T 5) => "RP" + | (T 6) => "LB" + | (T 7) => "RB" + | (T 8) => "RBD" + | (T 9) => "LCB" + | (T 10) => "RCB" + | (T 11) => "QMARK" + | (T 12) => "STAR" + | (T 13) => "PLUS" + | (T 14) => "BAR" + | (T 15) => "CARAT" + | (T 16) => "DOLLAR" + | (T 17) => "SLASH" + | (T 18) => "DASH" + | (T 19) => "HIGH_CHAR" + | (T 20) => "CHAR" + | (T 21) => "UNICHAR" + | (T 22) => "DOT" + | (T 23) => "EQ" + | (T 24) => "REPS" + | (T 25) => "ID" + | (T 26) => "ARROW" + | (T 27) => "ACT" + | (T 28) => "SEMI" + | (T 29) => "LEXMARK" + | (T 30) => "COMMA" + | (T 31) => "STATES" + | (T 32) => "LEXSTATE" + | (T 33) => "COUNT" + | (T 34) => "REJECTTOK" + | (T 35) => "FULL" + | (T 36) => "UNICODE" + | (T 37) => "STRUCT" + | (T 38) => "HEADER" + | (T 39) => "ARG" + | (T 40) => "POSARG" + | _ => "bogus-term" +local open Header in +val errtermvalue= +fn _ => MlyValue.VOID +end +val terms : term list = nil + $$ (T 40) $$ (T 39) $$ (T 38) $$ (T 37) $$ (T 36) $$ (T 35) $$ (T 34) + $$ (T 33) $$ (T 31) $$ (T 30) $$ (T 29) $$ (T 28) $$ (T 26) $$ (T 23) + $$ (T 22) $$ (T 19) $$ (T 18) $$ (T 17) $$ (T 16) $$ (T 15) $$ (T 14) + $$ (T 13) $$ (T 12) $$ (T 11) $$ (T 10) $$ (T 9) $$ (T 8) $$ (T 7) + $$ (T 6) $$ (T 5) $$ (T 4) $$ (T 3) $$ (T 2) $$ (T 0)end +structure Actions = +struct +exception mlyAction of int +local open Header in +val actions = +fn (i392,defaultPos,stack, + (()):arg) => +case (i392,stack) +of ( 0, ( ( _, ( MlyValue.Rules Rules1, _, Rules1right)) :: _ :: ( _, + ( MlyValue.Defs Defs1, _, _)) :: _ :: ( _, ( MlyValue.Decls Decls1, +Decls1left, _)) :: rest671)) => let val result = MlyValue.Start (fn _ + => let val (Decls as Decls1) = Decls1 () + val (Defs as Defs1) = Defs1 () + val (Rules as Rules1) = Rules1 () + in ( +S.Spec {decls = Decls, + conf = Defs, + rules = Rules, + eofRules = []} +) +end) + in ( LrTable.NT 0, ( result, Decls1left, Rules1right), rest671) +end +| ( 1, ( ( _, ( MlyValue.DECLS DECLS1, DECLS1left, DECLS1right)) :: +rest671)) => let val result = MlyValue.Decls (fn _ => let val (DECLS + as DECLS1) = DECLS1 () + in (DECLS) +end) + in ( LrTable.NT 1, ( result, DECLS1left, DECLS1right), rest671) +end +| ( 2, ( rest671)) => let val result = MlyValue.Decls (fn _ => ("")) + in ( LrTable.NT 1, ( result, defaultPos, defaultPos), rest671) +end +| ( 3, ( rest671)) => let val result = MlyValue.Defs (fn _ => ( +S.mkConfig())) + in ( LrTable.NT 2, ( result, defaultPos, defaultPos), rest671) +end +| ( 4, ( ( _, ( _, _, SEMI1right)) :: ( _, ( MlyValue.StartStates +StartStates1, _, _)) :: _ :: ( _, ( MlyValue.Defs Defs1, Defs1left, _) +) :: rest671)) => let val result = MlyValue.Defs (fn _ => let val ( +Defs as Defs1) = Defs1 () + val (StartStates as StartStates1) = StartStates1 () + in (S.updStartStates (Defs, StartStates)) +end) + in ( LrTable.NT 2, ( result, Defs1left, SEMI1right), rest671) +end +| ( 5, ( ( _, ( MlyValue.ACT ACT1, _, ACT1right)) :: _ :: ( _, ( +MlyValue.Defs Defs1, Defs1left, _)) :: rest671)) => let val result = +MlyValue.Defs (fn _ => let val (Defs as Defs1) = Defs1 () + val (ACT as ACT1) = ACT1 () + in ( +S.updHeader (Defs, + String.substring (ACT, 1, String.size ACT - 2)) +) +end) + in ( LrTable.NT 2, ( result, Defs1left, ACT1right), rest671) +end +| ( 6, ( ( _, ( MlyValue.ID ID1, _, ID1right)) :: _ :: ( _, ( +MlyValue.Defs Defs1, Defs1left, _)) :: rest671)) => let val result = +MlyValue.Defs (fn _ => let val (Defs as Defs1) = Defs1 () + val (ID as ID1) = ID1 () + in (S.updStructName (Defs, ID)) +end) + in ( LrTable.NT 2, ( result, Defs1left, ID1right), rest671) +end +| ( 7, ( ( _, ( MlyValue.ACT ACT1, _, ACT1right)) :: _ :: ( _, ( +MlyValue.Defs Defs1, Defs1left, _)) :: rest671)) => let val result = +MlyValue.Defs (fn _ => let val (Defs as Defs1) = Defs1 () + val (ACT as ACT1) = ACT1 () + in (S.updArg (Defs, ACT)) +end) + in ( LrTable.NT 2, ( result, Defs1left, ACT1right), rest671) +end +| ( 8, ( ( _, ( _, _, UNICODE1right)) :: ( _, ( MlyValue.Defs Defs1, +Defs1left, _)) :: rest671)) => let val result = MlyValue.Defs (fn _ + => let val (Defs as Defs1) = Defs1 () + in (S.updClamp (Defs, S.NO_CLAMP)) +end) + in ( LrTable.NT 2, ( result, Defs1left, UNICODE1right), rest671) +end +| ( 9, ( ( _, ( _, _, FULL1right)) :: ( _, ( MlyValue.Defs Defs1, +Defs1left, _)) :: rest671)) => let val result = MlyValue.Defs (fn _ + => let val (Defs as Defs1) = Defs1 () + in (S.updClamp (Defs, S.CLAMP255)) +end) + in ( LrTable.NT 2, ( result, Defs1left, FULL1right), rest671) +end +| ( 10, ( ( _, ( _, _, COUNT1right)) :: ( _, ( MlyValue.Defs Defs1, +Defs1left, _)) :: rest671)) => let val result = MlyValue.Defs (fn _ + => let val (Defs as Defs1) = Defs1 () + in (Defs) +end) + in ( LrTable.NT 2, ( result, Defs1left, COUNT1right), rest671) +end +| ( 11, ( ( _, ( _, _, REJECTTOK1right)) :: ( _, ( MlyValue.Defs +Defs1, Defs1left, _)) :: rest671)) => let val result = MlyValue.Defs + (fn _ => let val (Defs as Defs1) = Defs1 () + in (Defs) +end) + in ( LrTable.NT 2, ( result, Defs1left, REJECTTOK1right), rest671) + +end +| ( 12, ( ( _, ( _, _, SEMI1right)) :: ( _, ( MlyValue.OrExp OrExp1, + _, _)) :: _ :: ( _, ( MlyValue.ID ID1, _, _)) :: ( _, ( MlyValue.Defs + Defs1, Defs1left, _)) :: rest671)) => let val result = MlyValue.Defs + (fn _ => let val (Defs as Defs1) = Defs1 () + val (ID as ID1) = ID1 () + val (OrExp as OrExp1) = OrExp1 () + in ( +symTable := AtomMap.insert + (!symTable, Atom.atom ID, OrExp); + Defs +) +end) + in ( LrTable.NT 2, ( result, Defs1left, SEMI1right), rest671) +end +| ( 13, ( ( _, ( MlyValue.LEXSTATE LEXSTATE1, LEXSTATE1left, +LEXSTATE1right)) :: rest671)) => let val result = +MlyValue.StartStates (fn _ => let val (LEXSTATE as LEXSTATE1) = +LEXSTATE1 () + in (AtomSet.singleton (Atom.atom LEXSTATE)) +end) + in ( LrTable.NT 3, ( result, LEXSTATE1left, LEXSTATE1right), rest671) + +end +| ( 14, ( ( _, ( MlyValue.StartStates StartStates1, _, +StartStates1right)) :: ( _, ( MlyValue.LEXSTATE LEXSTATE1, +LEXSTATE1left, _)) :: rest671)) => let val result = +MlyValue.StartStates (fn _ => let val (LEXSTATE as LEXSTATE1) = +LEXSTATE1 () + val (StartStates as StartStates1) = StartStates1 () + in (AtomSet.add (StartStates, Atom.atom LEXSTATE)) +end) + in ( LrTable.NT 3, ( result, LEXSTATE1left, StartStates1right), +rest671) +end +| ( 15, ( rest671)) => let val result = MlyValue.Rules (fn _ => ([]) +) + in ( LrTable.NT 4, ( result, defaultPos, defaultPos), rest671) +end +| ( 16, ( ( _, ( MlyValue.Rules Rules1, _, Rules1right)) :: ( _, ( +MlyValue.Rule Rule1, Rule1left, _)) :: rest671)) => let val result = +MlyValue.Rules (fn _ => let val (Rule as Rule1) = Rule1 () + val (Rules as Rules1) = Rules1 () + in (Rule :: Rules) +end) + in ( LrTable.NT 4, ( result, Rule1left, Rules1right), rest671) +end +| ( 17, ( ( _, ( MlyValue.ACT ACT1, _, ACT1right)) :: _ :: ( _, ( +MlyValue.LineBreakExp LineBreakExp1, LineBreakExp1left, _)) :: rest671 +)) => let val result = MlyValue.Rule (fn _ => let val (LineBreakExp + as LineBreakExp1) = LineBreakExp1 () + val (ACT as ACT1) = ACT1 () + in (mkRule (NONE, LineBreakExp, ACT)) +end) + in ( LrTable.NT 5, ( result, LineBreakExp1left, ACT1right), rest671) + +end +| ( 18, ( ( _, ( MlyValue.ACT ACT1, _, ACT1right)) :: _ :: ( _, ( +MlyValue.LineBreakExp LineBreakExp1, _, _)) :: _ :: ( _, ( +MlyValue.RuleStates RuleStates1, _, _)) :: ( _, ( _, LT1left, _)) :: +rest671)) => let val result = MlyValue.Rule (fn _ => let val ( +RuleStates as RuleStates1) = RuleStates1 () + val (LineBreakExp as LineBreakExp1) = LineBreakExp1 () + val (ACT as ACT1) = ACT1 () + in (mkRule (SOME RuleStates, LineBreakExp, ACT)) +end) + in ( LrTable.NT 5, ( result, LT1left, ACT1right), rest671) +end +| ( 19, ( ( _, ( MlyValue.LEXSTATE LEXSTATE1, LEXSTATE1left, +LEXSTATE1right)) :: rest671)) => let val result = MlyValue.RuleStates + (fn _ => let val (LEXSTATE as LEXSTATE1) = LEXSTATE1 () + in (AtomSet.singleton (Atom.atom LEXSTATE)) +end) + in ( LrTable.NT 6, ( result, LEXSTATE1left, LEXSTATE1right), rest671) + +end +| ( 20, ( ( _, ( MlyValue.LEXSTATE LEXSTATE1, _, LEXSTATE1right)) :: + _ :: ( _, ( MlyValue.RuleStates RuleStates1, RuleStates1left, _)) :: +rest671)) => let val result = MlyValue.RuleStates (fn _ => let val ( +RuleStates as RuleStates1) = RuleStates1 () + val (LEXSTATE as LEXSTATE1) = LEXSTATE1 () + in (AtomSet.add (RuleStates, Atom.atom LEXSTATE)) +end) + in ( LrTable.NT 6, ( result, RuleStates1left, LEXSTATE1right), +rest671) +end +| ( 21, ( ( _, ( MlyValue.OrExp OrExp1, _, OrExp1right)) :: ( _, ( _, + CARAT1left, _)) :: rest671)) => let val result = +MlyValue.LineBreakExp (fn _ => let val (OrExp as OrExp1) = OrExp1 () + in (true, OrExp) +end) + in ( LrTable.NT 7, ( result, CARAT1left, OrExp1right), rest671) +end +| ( 22, ( ( _, ( MlyValue.OrExp OrExp1, OrExp1left, OrExp1right)) :: +rest671)) => let val result = MlyValue.LineBreakExp (fn _ => let val + (OrExp as OrExp1) = OrExp1 () + in (false, OrExp) +end) + in ( LrTable.NT 7, ( result, OrExp1left, OrExp1right), rest671) +end +| ( 23, ( ( _, ( MlyValue.CatExp CatExp1, _, CatExp1right)) :: _ :: ( + _, ( MlyValue.OrExp OrExp1, OrExp1left, _)) :: rest671)) => let val +result = MlyValue.OrExp (fn _ => let val (OrExp as OrExp1) = OrExp1 + () + val (CatExp as CatExp1) = CatExp1 () + in (RE.mkOr (OrExp, CatExp)) +end) + in ( LrTable.NT 8, ( result, OrExp1left, CatExp1right), rest671) +end +| ( 24, ( ( _, ( MlyValue.CatExp CatExp1, CatExp1left, CatExp1right)) + :: rest671)) => let val result = MlyValue.OrExp (fn _ => let val ( +CatExp as CatExp1) = CatExp1 () + in (CatExp) +end) + in ( LrTable.NT 8, ( result, CatExp1left, CatExp1right), rest671) +end +| ( 25, ( ( _, ( MlyValue.Exp Exp1, _, Exp1right)) :: ( _, ( +MlyValue.CatExp CatExp1, CatExp1left, _)) :: rest671)) => let val +result = MlyValue.CatExp (fn _ => let val (CatExp as CatExp1) = +CatExp1 () + val (Exp as Exp1) = Exp1 () + in (RE.mkConcat (CatExp, Exp)) +end) + in ( LrTable.NT 9, ( result, CatExp1left, Exp1right), rest671) +end +| ( 26, ( ( _, ( MlyValue.Exp Exp1, Exp1left, Exp1right)) :: rest671) +) => let val result = MlyValue.CatExp (fn _ => let val (Exp as Exp1) + = Exp1 () + in (Exp) +end) + in ( LrTable.NT 9, ( result, Exp1left, Exp1right), rest671) +end +| ( 27, ( ( _, ( _, _, QMARK1right)) :: ( _, ( MlyValue.Exp Exp1, +Exp1left, _)) :: rest671)) => let val result = MlyValue.Exp (fn _ => + let val (Exp as Exp1) = Exp1 () + in (RE.mkOpt Exp) +end) + in ( LrTable.NT 10, ( result, Exp1left, QMARK1right), rest671) +end +| ( 28, ( ( _, ( _, _, STAR1right)) :: ( _, ( MlyValue.Exp Exp1, +Exp1left, _)) :: rest671)) => let val result = MlyValue.Exp (fn _ => + let val (Exp as Exp1) = Exp1 () + in (RE.mkClosure Exp) +end) + in ( LrTable.NT 10, ( result, Exp1left, STAR1right), rest671) +end +| ( 29, ( ( _, ( _, _, PLUS1right)) :: ( _, ( MlyValue.Exp Exp1, +Exp1left, _)) :: rest671)) => let val result = MlyValue.Exp (fn _ => + let val (Exp as Exp1) = Exp1 () + in (RE.mkAtLeast (Exp, 1)) +end) + in ( LrTable.NT 10, ( result, Exp1left, PLUS1right), rest671) +end +| ( 30, ( ( _, ( _, _, RCB1right)) :: ( _, ( MlyValue.REPS REPS1, _, + _)) :: ( _, ( MlyValue.Exp Exp1, Exp1left, _)) :: rest671)) => let + val result = MlyValue.Exp (fn _ => let val (Exp as Exp1) = Exp1 () + val (REPS as REPS1) = REPS1 () + in (RE.mkRep (Exp, REPS, REPS)) +end) + in ( LrTable.NT 10, ( result, Exp1left, RCB1right), rest671) +end +| ( 31, ( ( _, ( _, _, RCB1right)) :: ( _, ( MlyValue.REPS REPS2, _, + _)) :: _ :: ( _, ( MlyValue.REPS REPS1, _, _)) :: ( _, ( MlyValue.Exp + Exp1, Exp1left, _)) :: rest671)) => let val result = MlyValue.Exp + (fn _ => let val (Exp as Exp1) = Exp1 () + val REPS1 = REPS1 () + val REPS2 = REPS2 () + in (RE.mkRep (Exp, REPS1, REPS2)) +end) + in ( LrTable.NT 10, ( result, Exp1left, RCB1right), rest671) +end +| ( 32, ( ( _, ( MlyValue.InExp InExp1, InExp1left, InExp1right)) :: +rest671)) => let val result = MlyValue.Exp (fn _ => let val (InExp + as InExp1) = InExp1 () + in (InExp) +end) + in ( LrTable.NT 10, ( result, InExp1left, InExp1right), rest671) +end +| ( 33, ( ( _, ( _, HIGH_CHAR1left, HIGH_CHAR1right)) :: rest671)) => + let val result = MlyValue.InExp (fn _ => ( +RE.mkSymSet (SIS.interval (0w128, 0w255)))) + in ( LrTable.NT 11, ( result, HIGH_CHAR1left, HIGH_CHAR1right), +rest671) +end +| ( 34, ( ( _, ( MlyValue.CHAR CHAR1, CHAR1left, CHAR1right)) :: +rest671)) => let val result = MlyValue.InExp (fn _ => let val (CHAR + as CHAR1) = CHAR1 () + in (RE.mkSymSet (SIS.singleton (strToSym CHAR))) +end) + in ( LrTable.NT 11, ( result, CHAR1left, CHAR1right), rest671) +end +| ( 35, ( ( _, ( MlyValue.UNICHAR UNICHAR1, UNICHAR1left, +UNICHAR1right)) :: rest671)) => let val result = MlyValue.InExp (fn _ + => let val (UNICHAR as UNICHAR1) = UNICHAR1 () + in (RE.mkSymSet (SIS.singleton UNICHAR)) +end) + in ( LrTable.NT 11, ( result, UNICHAR1left, UNICHAR1right), rest671) + +end +| ( 36, ( ( _, ( _, DOT1left, DOT1right)) :: rest671)) => let val +result = MlyValue.InExp (fn _ => (RE.mkSymSet wildcard)) + in ( LrTable.NT 11, ( result, DOT1left, DOT1right), rest671) +end +| ( 37, ( ( _, ( _, _, RCB1right)) :: ( _, ( MlyValue.ID ID1, ID1left +, _)) :: rest671)) => let val result = MlyValue.InExp (fn _ => let + val (ID as ID1) = ID1 () + in ( +case AtomMap.find (!symTable, Atom.atom ID) + of SOME re => re + | NONE => raise Fail ("'" ^ ID ^ "' not defined") +) +end) + in ( LrTable.NT 11, ( result, ID1left, RCB1right), rest671) +end +| ( 38, ( ( _, ( _, _, RP1right)) :: ( _, ( MlyValue.OrExp OrExp1, _, + _)) :: ( _, ( _, LP1left, _)) :: rest671)) => let val result = +MlyValue.InExp (fn _ => let val (OrExp as OrExp1) = OrExp1 () + in (OrExp) +end) + in ( LrTable.NT 11, ( result, LP1left, RP1right), rest671) +end +| ( 39, ( ( _, ( MlyValue.CharClass CharClass1, _, CharClass1right)) + :: _ :: ( _, ( _, LB1left, _)) :: rest671)) => let val result = +MlyValue.InExp (fn _ => let val (CharClass as CharClass1) = +CharClass1 () + in (RE.mkSymSet (SIS.complement CharClass)) +end) + in ( LrTable.NT 11, ( result, LB1left, CharClass1right), rest671) +end +| ( 40, ( ( _, ( MlyValue.CharClass CharClass1, _, CharClass1right)) + :: ( _, ( _, LB1left, _)) :: rest671)) => let val result = +MlyValue.InExp (fn _ => let val (CharClass as CharClass1) = +CharClass1 () + in (RE.mkSymSet CharClass) +end) + in ( LrTable.NT 11, ( result, LB1left, CharClass1right), rest671) +end +| ( 41, ( ( _, ( _, _, RB1right)) :: ( _, ( MlyValue.CharClass' +CharClass'1, CharClass'1left, _)) :: rest671)) => let val result = +MlyValue.CharClass (fn _ => let val (CharClass' as CharClass'1) = +CharClass'1 () + in (CharClass') +end) + in ( LrTable.NT 12, ( result, CharClass'1left, RB1right), rest671) + +end +| ( 42, ( ( _, ( _, _, RB1right)) :: ( _, ( MlyValue.CharClass' +CharClass'1, _, _)) :: ( _, ( _, DASH1left, _)) :: rest671)) => let + val result = MlyValue.CharClass (fn _ => let val (CharClass' as +CharClass'1) = CharClass'1 () + in (SIS.add (CharClass', charToSym #"-")) +end) + in ( LrTable.NT 12, ( result, DASH1left, RB1right), rest671) +end +| ( 43, ( ( _, ( _, RBD1left, RBD1right)) :: rest671)) => let val +result = MlyValue.CharClass (fn _ => (SIS.singleton (charToSym #"-"))) + in ( LrTable.NT 12, ( result, RBD1left, RBD1right), rest671) +end +| ( 44, ( ( _, ( _, _, RBD1right)) :: ( _, ( MlyValue.CharClass' +CharClass'1, CharClass'1left, _)) :: rest671)) => let val result = +MlyValue.CharClass (fn _ => let val (CharClass' as CharClass'1) = +CharClass'1 () + in (SIS.add (CharClass', charToSym #"-")) +end) + in ( LrTable.NT 12, ( result, CharClass'1left, RBD1right), rest671) + +end +| ( 45, ( ( _, ( MlyValue.NonCarat NonCarat1, NonCarat1left, +NonCarat1right)) :: rest671)) => let val result = MlyValue.CharClass' + (fn _ => let val (NonCarat as NonCarat1) = NonCarat1 () + in (SIS.singleton NonCarat) +end) + in ( LrTable.NT 13, ( result, NonCarat1left, NonCarat1right), rest671 +) +end +| ( 46, ( ( _, ( MlyValue.AChar AChar1, _, AChar1right)) :: _ :: ( _, + ( MlyValue.NonCarat NonCarat1, NonCarat1left, _)) :: rest671)) => let + val result = MlyValue.CharClass' (fn _ => let val (NonCarat as +NonCarat1) = NonCarat1 () + val (AChar as AChar1) = AChar1 () + in (SIS.interval (NonCarat, AChar)) +end) + in ( LrTable.NT 13, ( result, NonCarat1left, AChar1right), rest671) + +end +| ( 47, ( ( _, ( MlyValue.CharRng CharRng1, _, CharRng1right)) :: ( _ +, ( MlyValue.CharClass' CharClass'1, CharClass'1left, _)) :: rest671)) + => let val result = MlyValue.CharClass' (fn _ => let val ( +CharClass' as CharClass'1) = CharClass'1 () + val (CharRng as CharRng1) = CharRng1 () + in (SIS.union (CharRng, CharClass')) +end) + in ( LrTable.NT 13, ( result, CharClass'1left, CharRng1right), +rest671) +end +| ( 48, ( ( _, ( MlyValue.AChar AChar2, _, AChar2right)) :: _ :: ( _, + ( MlyValue.AChar AChar1, AChar1left, _)) :: rest671)) => let val +result = MlyValue.CharRng (fn _ => let val AChar1 = AChar1 () + val AChar2 = AChar2 () + in (SIS.interval (AChar1, AChar2)) +end) + in ( LrTable.NT 14, ( result, AChar1left, AChar2right), rest671) +end +| ( 49, ( ( _, ( MlyValue.AChar AChar1, AChar1left, AChar1right)) :: +rest671)) => let val result = MlyValue.CharRng (fn _ => let val ( +AChar as AChar1) = AChar1 () + in (SIS.singleton AChar) +end) + in ( LrTable.NT 14, ( result, AChar1left, AChar1right), rest671) +end +| ( 50, ( ( _, ( _, CARAT1left, CARAT1right)) :: rest671)) => let + val result = MlyValue.AChar (fn _ => (charToSym #"^")) + in ( LrTable.NT 15, ( result, CARAT1left, CARAT1right), rest671) +end +| ( 51, ( ( _, ( MlyValue.NonCarat NonCarat1, NonCarat1left, +NonCarat1right)) :: rest671)) => let val result = MlyValue.AChar (fn + _ => let val (NonCarat as NonCarat1) = NonCarat1 () + in (NonCarat) +end) + in ( LrTable.NT 15, ( result, NonCarat1left, NonCarat1right), rest671 +) +end +| ( 52, ( ( _, ( MlyValue.CHAR CHAR1, CHAR1left, CHAR1right)) :: +rest671)) => let val result = MlyValue.NonCarat (fn _ => let val ( +CHAR as CHAR1) = CHAR1 () + in (strToSym CHAR) +end) + in ( LrTable.NT 16, ( result, CHAR1left, CHAR1right), rest671) +end +| ( 53, ( ( _, ( MlyValue.UNICHAR UNICHAR1, UNICHAR1left, +UNICHAR1right)) :: rest671)) => let val result = MlyValue.NonCarat + (fn _ => let val (UNICHAR as UNICHAR1) = UNICHAR1 () + in (UNICHAR) +end) + in ( LrTable.NT 16, ( result, UNICHAR1left, UNICHAR1right), rest671) + +end +| _ => raise (mlyAction i392) +end +val void = MlyValue.VOID +val extract = fn a => (fn MlyValue.Start x => x +| _ => let exception ParseInternal + in raise ParseInternal end) a () +end +end +structure Tokens : MLLex_TOKENS = +struct +type svalue = ParserData.svalue +type ('a,'b) token = ('a,'b) Token.token +fun EOF (p1,p2) = Token.TOKEN (ParserData.LrTable.T 0,( +ParserData.MlyValue.VOID,p1,p2)) +fun DECLS (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 1,( +ParserData.MlyValue.DECLS (fn () => i),p1,p2)) +fun LT (p1,p2) = Token.TOKEN (ParserData.LrTable.T 2,( +ParserData.MlyValue.VOID,p1,p2)) +fun GT (p1,p2) = Token.TOKEN (ParserData.LrTable.T 3,( +ParserData.MlyValue.VOID,p1,p2)) +fun LP (p1,p2) = Token.TOKEN (ParserData.LrTable.T 4,( +ParserData.MlyValue.VOID,p1,p2)) +fun RP (p1,p2) = Token.TOKEN (ParserData.LrTable.T 5,( +ParserData.MlyValue.VOID,p1,p2)) +fun LB (p1,p2) = Token.TOKEN (ParserData.LrTable.T 6,( +ParserData.MlyValue.VOID,p1,p2)) +fun RB (p1,p2) = Token.TOKEN (ParserData.LrTable.T 7,( +ParserData.MlyValue.VOID,p1,p2)) +fun RBD (p1,p2) = Token.TOKEN (ParserData.LrTable.T 8,( +ParserData.MlyValue.VOID,p1,p2)) +fun LCB (p1,p2) = Token.TOKEN (ParserData.LrTable.T 9,( +ParserData.MlyValue.VOID,p1,p2)) +fun RCB (p1,p2) = Token.TOKEN (ParserData.LrTable.T 10,( +ParserData.MlyValue.VOID,p1,p2)) +fun QMARK (p1,p2) = Token.TOKEN (ParserData.LrTable.T 11,( +ParserData.MlyValue.VOID,p1,p2)) +fun STAR (p1,p2) = Token.TOKEN (ParserData.LrTable.T 12,( +ParserData.MlyValue.VOID,p1,p2)) +fun PLUS (p1,p2) = Token.TOKEN (ParserData.LrTable.T 13,( +ParserData.MlyValue.VOID,p1,p2)) +fun BAR (p1,p2) = Token.TOKEN (ParserData.LrTable.T 14,( +ParserData.MlyValue.VOID,p1,p2)) +fun CARAT (p1,p2) = Token.TOKEN (ParserData.LrTable.T 15,( +ParserData.MlyValue.VOID,p1,p2)) +fun DOLLAR (p1,p2) = Token.TOKEN (ParserData.LrTable.T 16,( +ParserData.MlyValue.VOID,p1,p2)) +fun SLASH (p1,p2) = Token.TOKEN (ParserData.LrTable.T 17,( +ParserData.MlyValue.VOID,p1,p2)) +fun DASH (p1,p2) = Token.TOKEN (ParserData.LrTable.T 18,( +ParserData.MlyValue.VOID,p1,p2)) +fun HIGH_CHAR (p1,p2) = Token.TOKEN (ParserData.LrTable.T 19,( +ParserData.MlyValue.VOID,p1,p2)) +fun CHAR (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 20,( +ParserData.MlyValue.CHAR (fn () => i),p1,p2)) +fun UNICHAR (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 21,( +ParserData.MlyValue.UNICHAR (fn () => i),p1,p2)) +fun DOT (p1,p2) = Token.TOKEN (ParserData.LrTable.T 22,( +ParserData.MlyValue.VOID,p1,p2)) +fun EQ (p1,p2) = Token.TOKEN (ParserData.LrTable.T 23,( +ParserData.MlyValue.VOID,p1,p2)) +fun REPS (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 24,( +ParserData.MlyValue.REPS (fn () => i),p1,p2)) +fun ID (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 25,( +ParserData.MlyValue.ID (fn () => i),p1,p2)) +fun ARROW (p1,p2) = Token.TOKEN (ParserData.LrTable.T 26,( +ParserData.MlyValue.VOID,p1,p2)) +fun ACT (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 27,( +ParserData.MlyValue.ACT (fn () => i),p1,p2)) +fun SEMI (p1,p2) = Token.TOKEN (ParserData.LrTable.T 28,( +ParserData.MlyValue.VOID,p1,p2)) +fun LEXMARK (p1,p2) = Token.TOKEN (ParserData.LrTable.T 29,( +ParserData.MlyValue.VOID,p1,p2)) +fun COMMA (p1,p2) = Token.TOKEN (ParserData.LrTable.T 30,( +ParserData.MlyValue.VOID,p1,p2)) +fun STATES (p1,p2) = Token.TOKEN (ParserData.LrTable.T 31,( +ParserData.MlyValue.VOID,p1,p2)) +fun LEXSTATE (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 32,( +ParserData.MlyValue.LEXSTATE (fn () => i),p1,p2)) +fun COUNT (p1,p2) = Token.TOKEN (ParserData.LrTable.T 33,( +ParserData.MlyValue.VOID,p1,p2)) +fun REJECTTOK (p1,p2) = Token.TOKEN (ParserData.LrTable.T 34,( +ParserData.MlyValue.VOID,p1,p2)) +fun FULL (p1,p2) = Token.TOKEN (ParserData.LrTable.T 35,( +ParserData.MlyValue.VOID,p1,p2)) +fun UNICODE (p1,p2) = Token.TOKEN (ParserData.LrTable.T 36,( +ParserData.MlyValue.VOID,p1,p2)) +fun STRUCT (p1,p2) = Token.TOKEN (ParserData.LrTable.T 37,( +ParserData.MlyValue.VOID,p1,p2)) +fun HEADER (p1,p2) = Token.TOKEN (ParserData.LrTable.T 38,( +ParserData.MlyValue.VOID,p1,p2)) +fun ARG (p1,p2) = Token.TOKEN (ParserData.LrTable.T 39,( +ParserData.MlyValue.VOID,p1,p2)) +fun POSARG (p1,p2) = Token.TOKEN (ParserData.LrTable.T 40,( +ParserData.MlyValue.VOID,p1,p2)) +end +end diff --git a/ml-lpt/ml-ulex/FrontEnds/ml-ulex/.cm/GUID/ml-ulex-input.sml b/ml-lpt/ml-ulex/FrontEnds/ml-ulex/.cm/GUID/ml-ulex-input.sml new file mode 100644 index 0000000..b1cf24c --- /dev/null +++ b/ml-lpt/ml-ulex/FrontEnds/ml-ulex/.cm/GUID/ml-ulex-input.sml @@ -0,0 +1 @@ +guid-(sources.cm):FrontEnds/ml-ulex/ml-ulex-input.sml-1714016111.287 diff --git a/ml-lpt/ml-ulex/FrontEnds/ml-ulex/.cm/GUID/ml-ulex.grm.sml b/ml-lpt/ml-ulex/FrontEnds/ml-ulex/.cm/GUID/ml-ulex.grm.sml new file mode 100644 index 0000000..173cfc9 --- /dev/null +++ b/ml-lpt/ml-ulex/FrontEnds/ml-ulex/.cm/GUID/ml-ulex.grm.sml @@ -0,0 +1 @@ +guid-(sources.cm):FrontEnds/ml-ulex/ml-ulex.grm.sml-1714016110.658 diff --git a/ml-lpt/ml-ulex/FrontEnds/ml-ulex/.cm/GUID/ml-ulex.lex.sml b/ml-lpt/ml-ulex/FrontEnds/ml-ulex/.cm/GUID/ml-ulex.lex.sml new file mode 100644 index 0000000..7db4792 --- /dev/null +++ b/ml-lpt/ml-ulex/FrontEnds/ml-ulex/.cm/GUID/ml-ulex.lex.sml @@ -0,0 +1 @@ +guid-(sources.cm):FrontEnds/ml-ulex/ml-ulex.lex.sml-1714016111.028 diff --git a/ml-lpt/ml-ulex/FrontEnds/ml-ulex/.cm/SKEL/ml-ulex-input.sml b/ml-lpt/ml-ulex/FrontEnds/ml-ulex/.cm/SKEL/ml-ulex-input.sml new file mode 100644 index 0000000..b86888d --- /dev/null +++ b/ml-lpt/ml-ulex/FrontEnds/ml-ulex/.cm/SKEL/ml-ulex-input.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f4d"AntlrRepair"d"MLULexTokens"d"String"d"TextIO"ad"MLULexInput"jh3ad"SP"gp1d"AntlrStreamPos"aL"gp1d"MLULexLex"ad"P"jgp1 gp1e"MLULexParseFn"gp1c"INPUT" \ No newline at end of file diff --git a/ml-lpt/ml-ulex/FrontEnds/ml-ulex/.cm/SKEL/ml-ulex.grm.sml b/ml-lpt/ml-ulex/FrontEnds/ml-ulex/.cm/SKEL/ml-ulex.grm.sml new file mode 100644 index 0000000..b97e565 --- /dev/null +++ b/ml-lpt/ml-ulex/FrontEnds/ml-ulex/.cm/SKEL/ml-ulex.grm.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d3f1d"UTF8"aMLULexTokens"0ae"MLULexParseFn"i2aLex"gp1c"ANTLR_LEXER"f8d"AntlrStreamPos"d"Word"d"Char"C1d"Option"d"AtomSet"d"Atom"d"String"Nh1bd4aTok"gp1 ad"UserCode"h4ad"LS"gp1d"LexSpec"ad"AMap"gp1d"AtomMap"aRE"gp1d"RegExp"ad"SIS"gp2d"SymSet"ad"Err"jh2agp1a1gp11gp1e"AntlrErrHandler"ad"EBNF"jgp1e"AntlrEBNF"f0 \ No newline at end of file diff --git a/ml-lpt/ml-ulex/FrontEnds/ml-ulex/.cm/SKEL/ml-ulex.lex.sml b/ml-lpt/ml-ulex/FrontEnds/ml-ulex/.cm/SKEL/ml-ulex.lex.sml new file mode 100644 index 0000000..ca0497f --- /dev/null +++ b/ml-lpt/ml-ulex/FrontEnds/ml-ulex/.cm/SKEL/ml-ulex.lex.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f9ULexBuffer"AntlrStreamPos"d"UTF8"d"Word"CChar"String"Substring"d"TextIO"Vector"Nad"MLULexLex"h1bd2aUserDeclarations"h1aTok"gp1d"MLULexTokens"bd2egp13f91d"List"Cd"Int"8 Nf0f33 \ No newline at end of file diff --git a/ml-lpt/ml-ulex/FrontEnds/ml-ulex/.cm/amd64-unix/ml-ulex-input.sml b/ml-lpt/ml-ulex/FrontEnds/ml-ulex/.cm/amd64-unix/ml-ulex-input.sml new file mode 100644 index 0000000..53c98c2 Binary files /dev/null and b/ml-lpt/ml-ulex/FrontEnds/ml-ulex/.cm/amd64-unix/ml-ulex-input.sml differ diff --git a/ml-lpt/ml-ulex/FrontEnds/ml-ulex/.cm/amd64-unix/ml-ulex.grm.sml b/ml-lpt/ml-ulex/FrontEnds/ml-ulex/.cm/amd64-unix/ml-ulex.grm.sml new file mode 100644 index 0000000..d533a31 Binary files /dev/null and b/ml-lpt/ml-ulex/FrontEnds/ml-ulex/.cm/amd64-unix/ml-ulex.grm.sml differ diff --git a/ml-lpt/ml-ulex/FrontEnds/ml-ulex/.cm/amd64-unix/ml-ulex.lex.sml b/ml-lpt/ml-ulex/FrontEnds/ml-ulex/.cm/amd64-unix/ml-ulex.lex.sml new file mode 100644 index 0000000..ad92e22 Binary files /dev/null and b/ml-lpt/ml-ulex/FrontEnds/ml-ulex/.cm/amd64-unix/ml-ulex.lex.sml differ diff --git a/ml-lpt/ml-ulex/FrontEnds/ml-ulex/ml-ulex-bootstrap.lex b/ml-lpt/ml-ulex/FrontEnds/ml-ulex/ml-ulex-bootstrap.lex new file mode 100644 index 0000000..d90b5c7 --- /dev/null +++ b/ml-lpt/ml-ulex/FrontEnds/ml-ulex/ml-ulex-bootstrap.lex @@ -0,0 +1,161 @@ +(* ml-ulex-bootstrap.lex + * + * COPYRIGHT (c) 2006 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (http://www.cs.uchicago.edu/~adrassi) + * All rights reserved. + * + * (With some code borrowed from ml-yacc) + *) + +val comLvl : int ref = ref 0 (* nesting depth of comments *) +val comStart : int ref = ref 0 (* start line of current comment *) + +fun eof () = ( + if (!comLvl > 0) + then print("unclosed comment starting at line " ^ Int.toString(!comStart) ^ "\n") + else (); + Tok.EOF) + +val text : string list ref = ref [] +fun addText s = (text := s::(!text)) +fun clrText () = (text := []) +fun getText () = concat (rev (!text)) + +val pcount = ref 0 +fun inc (ri as ref i) = (ri := i+1) +fun dec (ri as ref i) = (ri := i-1) + +fun chomp s = String.substring (s, 1, String.size s - 2) + +%% + +eol=("\n"|"\013\n"|"\013"); +ws=("\009"|"\011"|"\012"|" "|{eol}); +lc=[a-z]; +uc=[A-Z]; +alpha=({lc}|{uc}); +digit=[0-9]; +int={digit}+; +idchars=({alpha}|{digit}|"_"); +id={alpha}{idchars}*; +qualid ={id}"."; +tyvar="'"{idchars}*; + +%s STRING COM CODE CHARCLASS DIRECTIVE CHARSET RESTRING; + +%structure MLULexLex +%reject + +%% + +{ws}+ + => (continue()); + +"%defs" => (YYBEGIN CODE; clrText(); Tok.KW_defs); +"%name" => (YYBEGIN DIRECTIVE; Tok.KW_name); +"%states" => (YYBEGIN DIRECTIVE; Tok.KW_states); +"%let" => (YYBEGIN DIRECTIVE; Tok.KW_let); +"%charset" => (YYBEGIN CHARSET; Tok.KW_charset); + +{id} => (Tok.ID yytext); +"," => (Tok.COMMA); +";" => (YYBEGIN INITIAL; Tok.SEMI); +">" => (YYBEGIN INITIAL; Tok.GT); +"=" => (YYBEGIN INITIAL; Tok.EQ); +. => (YYBEGIN INITIAL; REJECT()); + +"utf8" | "UTF8" => (YYBEGIN INITIAL; Tok.UTF8); +"ascii7" | "ASCII7" => (YYBEGIN INITIAL; Tok.ASCII7); +"ascii8" | "ASCII8" => (YYBEGIN INITIAL; Tok.ASCII8); +";" => (YYBEGIN INITIAL; Tok.SEMI); +. => (YYBEGIN INITIAL; REJECT()); + +"|" => (Tok.BAR); +"&" => (Tok.AMP); +"." => (Tok.DOT); +"$" => (Tok.DOLLAR); +"+" => (Tok.PLUS); +"*" => (Tok.STAR); +"?" => (Tok.QUERY); +";" => (Tok.SEMI); +"(" => (Tok.LP); +")" => (Tok.RP); +"[" => (YYBEGIN CHARCLASS; Tok.LSB); +"]" => (Tok.RSB); +"{" {id} "}" + => (Tok.ID (chomp yytext)); +"{" {int} "}" + => ((Tok.REPEAT o valOf o Int.fromString o + Substring.string o (Substring.triml 1) o + (Substring.trimr 1)o Substring.full) yytext); +"<" => (YYBEGIN DIRECTIVE; Tok.LT); +">" => (Tok.GT); +"," => (Tok.COMMA); +"/" => (Tok.SLASH); +"=" => (Tok.EQ); +"=>" => (YYBEGIN CODE; clrText(); Tok.DARROW); +"\"" => (YYBEGIN RESTRING; continue()); + +"^" => (Tok.CARAT); +"-" => (Tok.DASH); +"\\" ([A-Za-z] | [0-9]{3} | "\\" | "\"") + => (let val c = Char.fromString yytext + in case c + of SOME c' => Tok.CHAR c' + | NONE => (print (concat [ + Int.toString (!yylineno), ": unknown escape sequence '", + yytext, "'\n"]); + continue()) + end); +"]" => (YYBEGIN INITIAL; Tok.RSB); +. => (Tok.CHAR (String.sub (yytext, 0))); + +"(*" + => (comLvl := 1; comStart := !yylineno; YYBEGIN COM; + ignore(continue() before YYBEGIN INITIAL); + continue()); +"(*" + => (comLvl := 1; comStart := !yylineno; YYBEGIN COM; + addText yytext; + ignore(continue() before YYBEGIN CODE); + continue()); + +"(*" + => (addText yytext; comLvl := !comLvl+1; continue()); +"*)" + => (addText yytext; comLvl := !comLvl-1; + if (!comLvl = 0) + then (Tok.BOGUS) + else continue()); +.|{eol} + => (addText yytext; continue()); + +"(" => (if !pcount = 0 then () else addText yytext; + inc pcount; continue()); +")" => (dec pcount; + if !pcount = 0 then + (YYBEGIN INITIAL; Tok.CODE (getText())) + else (addText yytext; continue())); +"\"" => (addText yytext; YYBEGIN STRING; + ignore(continue() before YYBEGIN CODE); + addText "\""; continue()); +[^()"]+ => (addText yytext; continue()); + +"\"" => (Tok.BOGUS); +{eol} => (addText yytext; print ("unclosed string"); + Tok.BOGUS); +\\ => (addText yytext; continue()); +\\\\ => (addText yytext; continue()); +[^"\\\n\013]+ + => (addText yytext; continue()); +\\\" => (addText yytext; continue()); + +"\"" => (YYBEGIN INITIAL; continue()); +{eol} => (print ("unclosed string\n"); continue()); +. => (Tok.CHAR (String.sub (yytext, 0))); + +. => (Tok.CHAR (String.sub (yytext, 0))); +. => (print (concat[Int.toString (!yylineno), ": illegal character '", + String.toCString yytext, "'\n"]); + continue()); diff --git a/ml-lpt/ml-ulex/FrontEnds/ml-ulex/ml-ulex-bootstrap.lex.sml b/ml-lpt/ml-ulex/FrontEnds/ml-ulex/ml-ulex-bootstrap.lex.sml new file mode 100644 index 0000000..4ccdda0 --- /dev/null +++ b/ml-lpt/ml-ulex/FrontEnds/ml-ulex/ml-ulex-bootstrap.lex.sml @@ -0,0 +1,1647 @@ +structure MLULexLex = struct + + structure yyInput : sig + + type stream + val mkStream : (int -> string) -> stream + val fromStream : TextIO.StreamIO.instream -> stream + val getc : stream -> (Char.char * stream) option + val getpos : stream -> int + val getlineNo : stream -> int + val subtract : stream * stream -> string + val eof : stream -> bool + val lastWasNL : stream -> bool + + end = struct + + structure TIO = TextIO + structure TSIO = TIO.StreamIO + structure TPIO = TextPrimIO + + datatype stream = Stream of { + strm : TSIO.instream, + id : int, (* track which streams originated + * from the same stream *) + pos : int, + lineNo : int, + lastWasNL : bool + } + + local + val next = ref 0 + in + fun nextId() = !next before (next := !next + 1) + end + + val initPos = 2 (* ml-lex bug compatibility *) + + fun mkStream inputN = let + val strm = TSIO.mkInstream + (TPIO.RD { + name = "lexgen", + chunkSize = 4096, + readVec = SOME inputN, + readArr = NONE, + readVecNB = NONE, + readArrNB = NONE, + block = NONE, + canInput = NONE, + avail = (fn () => NONE), + getPos = NONE, + setPos = NONE, + endPos = NONE, + verifyPos = NONE, + close = (fn () => ()), + ioDesc = NONE + }, "") + in + Stream {strm = strm, id = nextId(), pos = initPos, lineNo = 1, + lastWasNL = true} + end + + fun fromStream strm = Stream { + strm = strm, id = nextId(), pos = initPos, lineNo = 1, lastWasNL = true + } + + fun getc (Stream {strm, pos, id, lineNo, ...}) = (case TSIO.input1 strm + of NONE => NONE + | SOME (c, strm') => + SOME (c, Stream { + strm = strm', + pos = pos+1, + id = id, + lineNo = lineNo + + (if c = #"\n" then 1 else 0), + lastWasNL = (c = #"\n") + }) + (* end case*)) + + fun getpos (Stream {pos, ...}) = pos + + fun getlineNo (Stream {lineNo, ...}) = lineNo + + fun subtract (new, old) = let + val Stream {strm = strm, pos = oldPos, id = oldId, ...} = old + val Stream {pos = newPos, id = newId, ...} = new + val (diff, _) = if newId = oldId andalso newPos >= oldPos + then TSIO.inputN (strm, newPos - oldPos) + else raise Fail + "BUG: yyInput: attempted to subtract incompatible streams" + in + diff + end + + fun eof (Stream {strm, ...}) = TSIO.endOfStream strm + + fun lastWasNL (Stream {lastWasNL, ...}) = lastWasNL + + end + + datatype yystart_state = +COM | CODE | STRING | CHARSET | CHARCLASS | RESTRING | INITIAL | DIRECTIVE + structure UserDeclarations = + struct + +(* ml-ulex-bootstrap.lex + * + * COPYRIGHT (c) 2006 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (http://www.cs.uchicago.edu/~adrassi) + * All rights reserved. + * + * (With some code borrowed from ml-yacc) + *) + +val comLvl : int ref = ref 0 (* nesting depth of comments *) +val comStart : int ref = ref 0 (* start line of current comment *) + +fun eof () = ( + if (!comLvl > 0) + then print("unclosed comment starting at line " ^ Int.toString(!comStart) ^ "\n") + else (); + Tok.EOF) + +val text : string list ref = ref [] +fun addText s = (text := s::(!text)) +fun clrText () = (text := []) +fun getText () = concat (rev (!text)) + +val pcount = ref 0 +fun inc (ri as ref i) = (ri := i+1) +fun dec (ri as ref i) = (ri := i-1) + +fun chomp s = String.substring (s, 1, String.size s - 2) + + + + end + + datatype yymatch + = yyNO_MATCH + | yyMATCH of yyInput.stream * action * yymatch + withtype action = yyInput.stream * yymatch -> UserDeclarations.lexresult + + local + + val yytable = +#[ +] + + fun mk yyins = let + (* current start state *) + val yyss = ref INITIAL + fun YYBEGIN ss = (yyss := ss) + (* current input stream *) + val yystrm = ref yyins + (* get one char of input *) + val yygetc = yyInput.getc + (* create yytext *) + fun yymktext(strm) = yyInput.subtract (strm, !yystrm) + open UserDeclarations + fun lex +(yyarg as ()) = let + fun continue() = let + val yylastwasn = yyInput.lastWasNL (!yystrm) + fun yystuck (yyNO_MATCH) = raise Fail "stuck state" + | yystuck (yyMATCH (strm, action, old)) = + action (strm, old) + val yypos = yyInput.getpos (!yystrm) + val yygetlineNo = yyInput.getlineNo + fun yyactsToMatches (strm, [], oldMatches) = oldMatches + | yyactsToMatches (strm, act::acts, oldMatches) = + yyMATCH (strm, act, yyactsToMatches (strm, acts, oldMatches)) + fun yygo actTable = + (fn (~1, _, oldMatches) => yystuck oldMatches + | (curState, strm, oldMatches) => let + val (transitions, finals') = Vector.sub (yytable, curState) + val finals = map (fn i => Vector.sub (actTable, i)) finals' + fun tryfinal() = + yystuck (yyactsToMatches (strm, finals, oldMatches)) + fun find (c, []) = NONE + | find (c, (c1, c2, s)::ts) = + if c1 <= c andalso c <= c2 then SOME s + else find (c, ts) + in case yygetc strm + of SOME(c, strm') => + (case find (c, transitions) + of NONE => tryfinal() + | SOME n => + yygo actTable + (n, strm', + yyactsToMatches (strm, finals, oldMatches))) + | NONE => tryfinal() + end) + in +let +fun yyAction0 (strm, lastMatch : yymatch) = (yystrm := strm; (continue())) +fun yyAction1 (strm, lastMatch : yymatch) = (yystrm := strm; + (YYBEGIN CODE; clrText(); Tok.KW_defs)) +fun yyAction2 (strm, lastMatch : yymatch) = (yystrm := strm; + (YYBEGIN DIRECTIVE; Tok.KW_name)) +fun yyAction3 (strm, lastMatch : yymatch) = (yystrm := strm; + (YYBEGIN DIRECTIVE; Tok.KW_states)) +fun yyAction4 (strm, lastMatch : yymatch) = (yystrm := strm; + (YYBEGIN DIRECTIVE; Tok.KW_let)) +fun yyAction5 (strm, lastMatch : yymatch) = (yystrm := strm; + (YYBEGIN CHARSET; Tok.KW_charset)) +fun yyAction6 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (Tok.ID yytext) + end +fun yyAction7 (strm, lastMatch : yymatch) = (yystrm := strm; (Tok.COMMA)) +fun yyAction8 (strm, lastMatch : yymatch) = (yystrm := strm; + (YYBEGIN INITIAL; Tok.SEMI)) +fun yyAction9 (strm, lastMatch : yymatch) = (yystrm := strm; + (YYBEGIN INITIAL; Tok.GT)) +fun yyAction10 (strm, lastMatch : yymatch) = (yystrm := strm; + (YYBEGIN INITIAL; Tok.EQ)) +fun yyAction11 (strm, lastMatch : yymatch) = let + val oldStrm = !(yystrm) + fun REJECT () = (yystrm := oldStrm; yystuck(lastMatch)) + in + yystrm := strm; (YYBEGIN INITIAL; REJECT()) + end +fun yyAction12 (strm, lastMatch : yymatch) = (yystrm := strm; + (YYBEGIN INITIAL; Tok.UTF8)) +fun yyAction13 (strm, lastMatch : yymatch) = (yystrm := strm; + (YYBEGIN INITIAL; Tok.ASCII7)) +fun yyAction14 (strm, lastMatch : yymatch) = (yystrm := strm; + (YYBEGIN INITIAL; Tok.ASCII8)) +fun yyAction15 (strm, lastMatch : yymatch) = (yystrm := strm; + (YYBEGIN INITIAL; Tok.SEMI)) +fun yyAction16 (strm, lastMatch : yymatch) = let + val oldStrm = !(yystrm) + fun REJECT () = (yystrm := oldStrm; yystuck(lastMatch)) + in + yystrm := strm; (YYBEGIN INITIAL; REJECT()) + end +fun yyAction17 (strm, lastMatch : yymatch) = (yystrm := strm; (Tok.BAR)) +fun yyAction18 (strm, lastMatch : yymatch) = (yystrm := strm; (Tok.AMP)) +fun yyAction19 (strm, lastMatch : yymatch) = (yystrm := strm; (Tok.DOT)) +fun yyAction20 (strm, lastMatch : yymatch) = (yystrm := strm; (Tok.DOLLAR)) +fun yyAction21 (strm, lastMatch : yymatch) = (yystrm := strm; (Tok.PLUS)) +fun yyAction22 (strm, lastMatch : yymatch) = (yystrm := strm; (Tok.STAR)) +fun yyAction23 (strm, lastMatch : yymatch) = (yystrm := strm; (Tok.QUERY)) +fun yyAction24 (strm, lastMatch : yymatch) = (yystrm := strm; (Tok.SEMI)) +fun yyAction25 (strm, lastMatch : yymatch) = (yystrm := strm; (Tok.LP)) +fun yyAction26 (strm, lastMatch : yymatch) = (yystrm := strm; (Tok.RP)) +fun yyAction27 (strm, lastMatch : yymatch) = (yystrm := strm; + (YYBEGIN CHARCLASS; Tok.LSB)) +fun yyAction28 (strm, lastMatch : yymatch) = (yystrm := strm; (Tok.RSB)) +fun yyAction29 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (Tok.ID (chomp yytext)) + end +fun yyAction30 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; + ((Tok.REPEAT o valOf o Int.fromString o + Substring.string o (Substring.triml 1) o + (Substring.trimr 1)o Substring.full) yytext) + end +fun yyAction31 (strm, lastMatch : yymatch) = (yystrm := strm; + (YYBEGIN DIRECTIVE; Tok.LT)) +fun yyAction32 (strm, lastMatch : yymatch) = (yystrm := strm; (Tok.GT)) +fun yyAction33 (strm, lastMatch : yymatch) = (yystrm := strm; (Tok.COMMA)) +fun yyAction34 (strm, lastMatch : yymatch) = (yystrm := strm; (Tok.SLASH)) +fun yyAction35 (strm, lastMatch : yymatch) = (yystrm := strm; (Tok.EQ)) +fun yyAction36 (strm, lastMatch : yymatch) = (yystrm := strm; + (YYBEGIN CODE; clrText(); Tok.DARROW)) +fun yyAction37 (strm, lastMatch : yymatch) = (yystrm := strm; + (YYBEGIN RESTRING; continue())) +fun yyAction38 (strm, lastMatch : yymatch) = (yystrm := strm; (Tok.CARAT)) +fun yyAction39 (strm, lastMatch : yymatch) = (yystrm := strm; (Tok.DASH)) +fun yyAction40 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + val yytext = yymktext(strm) + in + yystrm := strm; + (let val c = Char.fromString yytext + in case c + of SOME c' => Tok.CHAR c' + | NONE => (print (concat [ + Int.toString (!yylineno), ": unknown escape sequence '", + yytext, "'\n"]); + continue()) + end) + end +fun yyAction41 (strm, lastMatch : yymatch) = (yystrm := strm; + (YYBEGIN INITIAL; Tok.RSB)) +fun yyAction42 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (Tok.CHAR (String.sub (yytext, 0))) + end +fun yyAction43 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; + (comLvl := 1; comStart := !yylineno; YYBEGIN COM; + ignore(continue() before YYBEGIN INITIAL); + continue()) + end +fun yyAction44 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + val yytext = yymktext(strm) + in + yystrm := strm; + (comLvl := 1; comStart := !yylineno; YYBEGIN COM; + addText yytext; + ignore(continue() before YYBEGIN CODE); + continue()) + end +fun yyAction45 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (addText yytext; comLvl := !comLvl+1; continue()) + end +fun yyAction46 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; + (addText yytext; comLvl := !comLvl-1; + if (!comLvl = 0) + then (Tok.BOGUS) + else continue()) + end +fun yyAction47 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (addText yytext; continue()) + end +fun yyAction48 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; + (if !pcount = 0 then () else addText yytext; + inc pcount; continue()) + end +fun yyAction49 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; + (dec pcount; + if !pcount = 0 then + (YYBEGIN INITIAL; Tok.CODE (getText())) + else (addText yytext; continue())) + end +fun yyAction50 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; + (addText yytext; YYBEGIN STRING; + ignore(continue() before YYBEGIN CODE); + addText "\""; continue()) + end +fun yyAction51 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (addText yytext; continue()) + end +fun yyAction52 (strm, lastMatch : yymatch) = (yystrm := strm; (Tok.BOGUS)) +fun yyAction53 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; + (addText yytext; print ("unclosed string"); + Tok.BOGUS) + end +fun yyAction54 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (addText yytext; continue()) + end +fun yyAction55 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (addText yytext; continue()) + end +fun yyAction56 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (addText yytext; continue()) + end +fun yyAction57 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (addText yytext; continue()) + end +fun yyAction58 (strm, lastMatch : yymatch) = (yystrm := strm; + (YYBEGIN INITIAL; continue())) +fun yyAction59 (strm, lastMatch : yymatch) = (yystrm := strm; + (print ("unclosed string\n"); continue())) +fun yyAction60 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (Tok.CHAR (String.sub (yytext, 0))) + end +fun yyAction61 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (Tok.CHAR (String.sub (yytext, 0))) + end +fun yyAction62 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + val yytext = yymktext(strm) + in + yystrm := strm; + (print (concat[Int.toString (!yylineno), ": illegal character '", + String.toCString yytext, "'\n"]); + continue()) + end +fun yyQ130 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction6(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"[" + then yyAction6(strm, yyNO_MATCH) + else if inp < #"[" + then if inp = #":" + then yyAction6(strm, yyNO_MATCH) + else if inp < #":" + then if inp <= #"/" + then yyAction6(strm, yyNO_MATCH) + else yyQ130(strm', yyMATCH(strm, yyAction6, yyNO_MATCH)) + else if inp <= #"@" + then yyAction6(strm, yyNO_MATCH) + else yyQ130(strm', yyMATCH(strm, yyAction6, yyNO_MATCH)) + else if inp = #"`" + then yyAction6(strm, yyNO_MATCH) + else if inp < #"`" + then if inp = #"_" + then yyQ130(strm', yyMATCH(strm, yyAction6, yyNO_MATCH)) + else yyAction6(strm, yyNO_MATCH) + else if inp <= #"z" + then yyQ130(strm', yyMATCH(strm, yyAction6, yyNO_MATCH)) + else yyAction6(strm, yyNO_MATCH) + (* end case *)) +fun yyQ129 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction6(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"[" + then yyAction6(strm, yyNO_MATCH) + else if inp < #"[" + then if inp = #":" + then yyAction6(strm, yyNO_MATCH) + else if inp < #":" + then if inp <= #"/" + then yyAction6(strm, yyNO_MATCH) + else yyQ130(strm', yyMATCH(strm, yyAction6, yyNO_MATCH)) + else if inp <= #"@" + then yyAction6(strm, yyNO_MATCH) + else yyQ130(strm', yyMATCH(strm, yyAction6, yyNO_MATCH)) + else if inp = #"`" + then yyAction6(strm, yyNO_MATCH) + else if inp < #"`" + then if inp = #"_" + then yyQ130(strm', yyMATCH(strm, yyAction6, yyNO_MATCH)) + else yyAction6(strm, yyNO_MATCH) + else if inp <= #"z" + then yyQ130(strm', yyMATCH(strm, yyAction6, yyNO_MATCH)) + else yyAction6(strm, yyNO_MATCH) + (* end case *)) +fun yyQ128 (strm, lastMatch : yymatch) = yyAction9(strm, yyNO_MATCH) +fun yyQ127 (strm, lastMatch : yymatch) = yyAction10(strm, yyNO_MATCH) +fun yyQ126 (strm, lastMatch : yymatch) = yyAction8(strm, yyNO_MATCH) +fun yyQ125 (strm, lastMatch : yymatch) = yyAction7(strm, yyNO_MATCH) +fun yyQ51 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction0(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\^N" + then yyAction0(strm, yyNO_MATCH) + else if inp < #"\^N" + then if inp = #"\t" + then yyQ31(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp < #"\t" + then yyAction0(strm, yyNO_MATCH) + else if inp = #"\r" + then yyQ51(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else yyQ31(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp = #" " + then yyQ31(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else yyAction0(strm, yyNO_MATCH) + (* end case *)) +and yyQ31 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction0(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\^N" + then yyAction0(strm, yyNO_MATCH) + else if inp < #"\^N" + then if inp = #"\t" + then yyQ31(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp < #"\t" + then yyAction0(strm, yyNO_MATCH) + else if inp = #"\r" + then yyQ51(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else yyQ31(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp = #" " + then yyQ31(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else yyAction0(strm, yyNO_MATCH) + (* end case *)) +fun yyQ124 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction0(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\^N" + then yyAction0(strm, yyNO_MATCH) + else if inp < #"\^N" + then if inp = #"\t" + then yyQ31(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp < #"\t" + then yyAction0(strm, yyNO_MATCH) + else if inp = #"\r" + then yyQ51(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else yyQ31(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp = #" " + then yyQ31(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else yyAction0(strm, yyNO_MATCH) + (* end case *)) +fun yyQ123 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction0(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\^N" + then yyAction0(strm, yyNO_MATCH) + else if inp < #"\^N" + then if inp = #"\t" + then yyQ31(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp < #"\t" + then yyAction0(strm, yyNO_MATCH) + else if inp = #"\r" + then yyQ51(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else yyQ31(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp = #" " + then yyQ31(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else yyAction0(strm, yyNO_MATCH) + (* end case *)) +fun yyQ122 (strm, lastMatch : yymatch) = yyAction11(strm, yyMATCH(strm, yyAction62, yyNO_MATCH)) +fun yyQ7 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"-" + then yyQ122(strm', lastMatch) + else if inp < #"-" + then if inp = #"\r" + then yyQ124(strm', lastMatch) + else if inp < #"\r" + then if inp = #"\n" + then yyQ31(strm', lastMatch) + else if inp < #"\n" + then if inp = #"\t" + then yyQ123(strm', lastMatch) + else yyQ122(strm', lastMatch) + else yyQ123(strm', lastMatch) + else if inp = #"!" + then yyQ122(strm', lastMatch) + else if inp < #"!" + then if inp = #" " + then yyQ123(strm', lastMatch) + else yyQ122(strm', lastMatch) + else if inp = #"," + then yyQ125(strm', lastMatch) + else yyQ122(strm', lastMatch) + else if inp = #"?" + then yyQ122(strm', lastMatch) + else if inp < #"?" + then if inp = #"<" + then yyQ122(strm', lastMatch) + else if inp < #"<" + then if inp = #";" + then yyQ126(strm', lastMatch) + else yyQ122(strm', lastMatch) + else if inp = #"=" + then yyQ127(strm', lastMatch) + else yyQ128(strm', lastMatch) + else if inp = #"[" + then yyQ122(strm', lastMatch) + else if inp < #"[" + then if inp <= #"@" + then yyQ122(strm', lastMatch) + else yyQ129(strm', lastMatch) + else if inp = #"a" + then yyQ129(strm', lastMatch) + else if inp < #"a" + then yyQ122(strm', lastMatch) + else if inp <= #"z" + then yyQ129(strm', lastMatch) + else yyQ122(strm', lastMatch) + (* end case *)) +fun yyQ91 (strm, lastMatch : yymatch) = yyAction17(strm, yyNO_MATCH) +fun yyQ94 (strm, lastMatch : yymatch) = yyAction29(strm, yyNO_MATCH) +fun yyQ93 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"_" + then yyQ93(strm', lastMatch) + else if inp < #"_" + then if inp = #":" + then yystuck(lastMatch) + else if inp < #":" + then if inp <= #"/" + then yystuck(lastMatch) + else yyQ93(strm', lastMatch) + else if inp = #"A" + then yyQ93(strm', lastMatch) + else if inp < #"A" + then yystuck(lastMatch) + else if inp <= #"Z" + then yyQ93(strm', lastMatch) + else yystuck(lastMatch) + else if inp = #"{" + then yystuck(lastMatch) + else if inp < #"{" + then if inp = #"`" + then yystuck(lastMatch) + else yyQ93(strm', lastMatch) + else if inp = #"}" + then yyQ94(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ95 (strm, lastMatch : yymatch) = yyAction30(strm, yyNO_MATCH) +fun yyQ92 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #":" + then yystuck(lastMatch) + else if inp < #":" + then if inp <= #"/" + then yystuck(lastMatch) + else yyQ92(strm', lastMatch) + else if inp = #"}" + then yyQ95(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ90 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction61(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"A" + then yyQ93(strm', yyMATCH(strm, yyAction61, yyNO_MATCH)) + else if inp < #"A" + then if inp = #"0" + then yyQ92(strm', yyMATCH(strm, yyAction61, yyNO_MATCH)) + else if inp < #"0" + then yyAction61(strm, yyNO_MATCH) + else if inp <= #"9" + then yyQ92(strm', yyMATCH(strm, yyAction61, yyNO_MATCH)) + else yyAction61(strm, yyNO_MATCH) + else if inp = #"a" + then yyQ93(strm', yyMATCH(strm, yyAction61, yyNO_MATCH)) + else if inp < #"a" + then if inp <= #"Z" + then yyQ93(strm', yyMATCH(strm, yyAction61, yyNO_MATCH)) + else yyAction61(strm, yyNO_MATCH) + else if inp <= #"z" + then yyQ93(strm', yyMATCH(strm, yyAction61, yyNO_MATCH)) + else yyAction61(strm, yyNO_MATCH) + (* end case *)) +fun yyQ89 (strm, lastMatch : yymatch) = yyAction38(strm, yyNO_MATCH) +fun yyQ88 (strm, lastMatch : yymatch) = yyAction28(strm, yyNO_MATCH) +fun yyQ59 (strm, lastMatch : yymatch) = yyAction40(strm, yyNO_MATCH) +fun yyQ61 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"0" + then yyQ59(strm', lastMatch) + else if inp < #"0" + then yystuck(lastMatch) + else if inp <= #"9" + then yyQ59(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ60 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"0" + then yyQ61(strm', lastMatch) + else if inp < #"0" + then yystuck(lastMatch) + else if inp <= #"9" + then yyQ61(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ87 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction61(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"A" + then yyQ59(strm', yyMATCH(strm, yyAction61, yyNO_MATCH)) + else if inp < #"A" + then if inp = #"#" + then yyAction61(strm, yyNO_MATCH) + else if inp < #"#" + then if inp = #"\"" + then yyQ59(strm', yyMATCH(strm, yyAction61, yyNO_MATCH)) + else yyAction61(strm, yyNO_MATCH) + else if inp = #"0" + then yyQ60(strm', yyMATCH(strm, yyAction61, yyNO_MATCH)) + else if inp < #"0" + then yyAction61(strm, yyNO_MATCH) + else if inp <= #"9" + then yyQ60(strm', yyMATCH(strm, yyAction61, yyNO_MATCH)) + else yyAction61(strm, yyNO_MATCH) + else if inp = #"]" + then yyAction61(strm, yyNO_MATCH) + else if inp < #"]" + then if inp = #"[" + then yyAction61(strm, yyNO_MATCH) + else yyQ59(strm', yyMATCH(strm, yyAction61, yyNO_MATCH)) + else if inp = #"a" + then yyQ59(strm', yyMATCH(strm, yyAction61, yyNO_MATCH)) + else if inp < #"a" + then yyAction61(strm, yyNO_MATCH) + else if inp <= #"z" + then yyQ59(strm', yyMATCH(strm, yyAction61, yyNO_MATCH)) + else yyAction61(strm, yyNO_MATCH) + (* end case *)) +fun yyQ86 (strm, lastMatch : yymatch) = yyAction27(strm, yyNO_MATCH) +fun yyQ85 (strm, lastMatch : yymatch) = yyAction23(strm, yyNO_MATCH) +fun yyQ84 (strm, lastMatch : yymatch) = yyAction32(strm, yyNO_MATCH) +fun yyQ96 (strm, lastMatch : yymatch) = yyAction36(strm, yyNO_MATCH) +fun yyQ83 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction35(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #">" + then yyQ96(strm', yyMATCH(strm, yyAction35, yyNO_MATCH)) + else yyAction35(strm, yyNO_MATCH) + (* end case *)) +fun yyQ82 (strm, lastMatch : yymatch) = yyAction31(strm, yyNO_MATCH) +fun yyQ81 (strm, lastMatch : yymatch) = yyAction24(strm, yyNO_MATCH) +fun yyQ80 (strm, lastMatch : yymatch) = yyAction34(strm, yyNO_MATCH) +fun yyQ79 (strm, lastMatch : yymatch) = yyAction19(strm, yyNO_MATCH) +fun yyQ78 (strm, lastMatch : yymatch) = yyAction33(strm, yyNO_MATCH) +fun yyQ77 (strm, lastMatch : yymatch) = yyAction21(strm, yyNO_MATCH) +fun yyQ76 (strm, lastMatch : yymatch) = yyAction22(strm, yyNO_MATCH) +fun yyQ75 (strm, lastMatch : yymatch) = yyAction26(strm, yyNO_MATCH) +fun yyQ97 (strm, lastMatch : yymatch) = yyAction43(strm, yyNO_MATCH) +fun yyQ74 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction25(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"*" + then yyQ97(strm', yyMATCH(strm, yyAction25, yyNO_MATCH)) + else yyAction25(strm, yyNO_MATCH) + (* end case *)) +fun yyQ73 (strm, lastMatch : yymatch) = yyAction18(strm, yyNO_MATCH) +fun yyQ107 (strm, lastMatch : yymatch) = yyAction3(strm, yyNO_MATCH) +fun yyQ106 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"s" + then yyQ107(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ105 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"e" + then yyQ106(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ104 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"t" + then yyQ105(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ103 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"a" + then yyQ104(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ102 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"t" + then yyQ103(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ110 (strm, lastMatch : yymatch) = yyAction2(strm, yyNO_MATCH) +fun yyQ109 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"e" + then yyQ110(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ108 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"m" + then yyQ109(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ101 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"a" + then yyQ108(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ112 (strm, lastMatch : yymatch) = yyAction4(strm, yyNO_MATCH) +fun yyQ111 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"t" + then yyQ112(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ100 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"e" + then yyQ111(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ115 (strm, lastMatch : yymatch) = yyAction1(strm, yyNO_MATCH) +fun yyQ114 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"s" + then yyQ115(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ113 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"f" + then yyQ114(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ99 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"e" + then yyQ113(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ121 (strm, lastMatch : yymatch) = yyAction5(strm, yyNO_MATCH) +fun yyQ120 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"t" + then yyQ121(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ119 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"e" + then yyQ120(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ118 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"s" + then yyQ119(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ117 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"r" + then yyQ118(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ116 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"a" + then yyQ117(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ98 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"h" + then yyQ116(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ72 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction61(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"m" + then yyAction61(strm, yyNO_MATCH) + else if inp < #"m" + then if inp = #"d" + then yyQ99(strm', yyMATCH(strm, yyAction61, yyNO_MATCH)) + else if inp < #"d" + then if inp = #"c" + then yyQ98(strm', yyMATCH(strm, yyAction61, yyNO_MATCH)) + else yyAction61(strm, yyNO_MATCH) + else if inp = #"l" + then yyQ100(strm', yyMATCH(strm, yyAction61, yyNO_MATCH)) + else yyAction61(strm, yyNO_MATCH) + else if inp = #"s" + then yyQ102(strm', yyMATCH(strm, yyAction61, yyNO_MATCH)) + else if inp < #"s" + then if inp = #"n" + then yyQ101(strm', yyMATCH(strm, yyAction61, yyNO_MATCH)) + else yyAction61(strm, yyNO_MATCH) + else yyAction61(strm, yyNO_MATCH) + (* end case *)) +fun yyQ71 (strm, lastMatch : yymatch) = yyAction20(strm, yyNO_MATCH) +fun yyQ70 (strm, lastMatch : yymatch) = yyAction37(strm, yyNO_MATCH) +fun yyQ69 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction0(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\^N" + then yyAction0(strm, yyNO_MATCH) + else if inp < #"\^N" + then if inp = #"\t" + then yyQ31(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp < #"\t" + then yyAction0(strm, yyNO_MATCH) + else if inp = #"\r" + then yyQ51(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else yyQ31(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp = #" " + then yyQ31(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else yyAction0(strm, yyNO_MATCH) + (* end case *)) +fun yyQ68 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction0(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\^N" + then yyAction0(strm, yyNO_MATCH) + else if inp < #"\^N" + then if inp = #"\t" + then yyQ31(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp < #"\t" + then yyAction0(strm, yyNO_MATCH) + else if inp = #"\r" + then yyQ51(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else yyQ31(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp = #" " + then yyQ31(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else yyAction0(strm, yyNO_MATCH) + (* end case *)) +fun yyQ67 (strm, lastMatch : yymatch) = yyAction61(strm, yyNO_MATCH) +fun yyQ6 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"," + then yyQ78(strm', lastMatch) + else if inp < #"," + then if inp = #"#" + then yyQ67(strm', lastMatch) + else if inp < #"#" + then if inp = #"\r" + then yyQ69(strm', lastMatch) + else if inp < #"\r" + then if inp = #"\n" + then yyQ31(strm', lastMatch) + else if inp < #"\n" + then if inp = #"\t" + then yyQ68(strm', lastMatch) + else yyQ67(strm', lastMatch) + else yyQ68(strm', lastMatch) + else if inp = #"!" + then yyQ67(strm', lastMatch) + else if inp < #"!" + then if inp = #" " + then yyQ68(strm', lastMatch) + else yyQ67(strm', lastMatch) + else yyQ70(strm', lastMatch) + else if inp = #"(" + then yyQ74(strm', lastMatch) + else if inp < #"(" + then if inp = #"&" + then yyQ73(strm', lastMatch) + else if inp < #"&" + then if inp = #"$" + then yyQ71(strm', lastMatch) + else yyQ72(strm', lastMatch) + else yyQ67(strm', lastMatch) + else if inp = #"*" + then yyQ76(strm', lastMatch) + else if inp = #")" + then yyQ75(strm', lastMatch) + else yyQ77(strm', lastMatch) + else if inp = #"@" + then yyQ67(strm', lastMatch) + else if inp < #"@" + then if inp = #";" + then yyQ81(strm', lastMatch) + else if inp < #";" + then if inp = #"/" + then yyQ80(strm', lastMatch) + else if inp < #"/" + then if inp = #"-" + then yyQ67(strm', lastMatch) + else yyQ79(strm', lastMatch) + else yyQ67(strm', lastMatch) + else if inp = #">" + then yyQ84(strm', lastMatch) + else if inp < #">" + then if inp = #"<" + then yyQ82(strm', lastMatch) + else yyQ83(strm', lastMatch) + else yyQ85(strm', lastMatch) + else if inp = #"^" + then yyQ89(strm', lastMatch) + else if inp < #"^" + then if inp = #"\\" + then yyQ87(strm', lastMatch) + else if inp < #"\\" + then if inp = #"[" + then yyQ86(strm', lastMatch) + else yyQ67(strm', lastMatch) + else yyQ88(strm', lastMatch) + else if inp = #"|" + then yyQ91(strm', lastMatch) + else if inp < #"|" + then if inp = #"{" + then yyQ90(strm', lastMatch) + else yyQ67(strm', lastMatch) + else yyQ67(strm', lastMatch) + (* end case *)) +fun yyQ66 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction60(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"A" + then yyQ59(strm', yyMATCH(strm, yyAction60, yyNO_MATCH)) + else if inp < #"A" + then if inp = #"#" + then yyAction60(strm, yyNO_MATCH) + else if inp < #"#" + then if inp = #"\"" + then yyQ59(strm', yyMATCH(strm, yyAction60, yyNO_MATCH)) + else yyAction60(strm, yyNO_MATCH) + else if inp = #"0" + then yyQ60(strm', yyMATCH(strm, yyAction60, yyNO_MATCH)) + else if inp < #"0" + then yyAction60(strm, yyNO_MATCH) + else if inp <= #"9" + then yyQ60(strm', yyMATCH(strm, yyAction60, yyNO_MATCH)) + else yyAction60(strm, yyNO_MATCH) + else if inp = #"]" + then yyAction60(strm, yyNO_MATCH) + else if inp < #"]" + then if inp = #"[" + then yyAction60(strm, yyNO_MATCH) + else yyQ59(strm', yyMATCH(strm, yyAction60, yyNO_MATCH)) + else if inp = #"a" + then yyQ59(strm', yyMATCH(strm, yyAction60, yyNO_MATCH)) + else if inp < #"a" + then yyAction60(strm, yyNO_MATCH) + else if inp <= #"z" + then yyQ59(strm', yyMATCH(strm, yyAction60, yyNO_MATCH)) + else yyAction60(strm, yyNO_MATCH) + (* end case *)) +fun yyQ65 (strm, lastMatch : yymatch) = yyAction58(strm, yyNO_MATCH) +fun yyQ63 (strm, lastMatch : yymatch) = yyAction59(strm, yyNO_MATCH) +fun yyQ64 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction59(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\n" + then yyQ63(strm', yyMATCH(strm, yyAction59, yyNO_MATCH)) + else yyAction59(strm, yyNO_MATCH) + (* end case *)) +fun yyQ62 (strm, lastMatch : yymatch) = yyAction60(strm, yyNO_MATCH) +fun yyQ5 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"\^N" + then yyQ62(strm', lastMatch) + else if inp < #"\^N" + then if inp = #"\v" + then yyQ62(strm', lastMatch) + else if inp < #"\v" + then if inp = #"\n" + then yyQ63(strm', lastMatch) + else yyQ62(strm', lastMatch) + else if inp = #"\r" + then yyQ64(strm', lastMatch) + else yyQ62(strm', lastMatch) + else if inp = #"#" + then yyQ62(strm', lastMatch) + else if inp < #"#" + then if inp = #"\"" + then yyQ65(strm', lastMatch) + else yyQ62(strm', lastMatch) + else if inp = #"\\" + then yyQ66(strm', lastMatch) + else yyQ62(strm', lastMatch) + (* end case *)) +fun yyQ58 (strm, lastMatch : yymatch) = yyAction38(strm, yyNO_MATCH) +fun yyQ57 (strm, lastMatch : yymatch) = yyAction41(strm, yyNO_MATCH) +fun yyQ56 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction42(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"A" + then yyQ59(strm', yyMATCH(strm, yyAction42, yyNO_MATCH)) + else if inp < #"A" + then if inp = #"#" + then yyAction42(strm, yyNO_MATCH) + else if inp < #"#" + then if inp = #"\"" + then yyQ59(strm', yyMATCH(strm, yyAction42, yyNO_MATCH)) + else yyAction42(strm, yyNO_MATCH) + else if inp = #"0" + then yyQ60(strm', yyMATCH(strm, yyAction42, yyNO_MATCH)) + else if inp < #"0" + then yyAction42(strm, yyNO_MATCH) + else if inp <= #"9" + then yyQ60(strm', yyMATCH(strm, yyAction42, yyNO_MATCH)) + else yyAction42(strm, yyNO_MATCH) + else if inp = #"]" + then yyAction42(strm, yyNO_MATCH) + else if inp < #"]" + then if inp = #"[" + then yyAction42(strm, yyNO_MATCH) + else yyQ59(strm', yyMATCH(strm, yyAction42, yyNO_MATCH)) + else if inp = #"a" + then yyQ59(strm', yyMATCH(strm, yyAction42, yyNO_MATCH)) + else if inp < #"a" + then yyAction42(strm, yyNO_MATCH) + else if inp <= #"z" + then yyQ59(strm', yyMATCH(strm, yyAction42, yyNO_MATCH)) + else yyAction42(strm, yyNO_MATCH) + (* end case *)) +fun yyQ55 (strm, lastMatch : yymatch) = yyAction39(strm, yyNO_MATCH) +fun yyQ54 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction0(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\^N" + then yyAction0(strm, yyNO_MATCH) + else if inp < #"\^N" + then if inp = #"\t" + then yyQ31(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp < #"\t" + then yyAction0(strm, yyNO_MATCH) + else if inp = #"\r" + then yyQ51(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else yyQ31(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp = #" " + then yyQ31(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else yyAction0(strm, yyNO_MATCH) + (* end case *)) +fun yyQ53 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction0(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\^N" + then yyAction0(strm, yyNO_MATCH) + else if inp < #"\^N" + then if inp = #"\t" + then yyQ31(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp < #"\t" + then yyAction0(strm, yyNO_MATCH) + else if inp = #"\r" + then yyQ51(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else yyQ31(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp = #" " + then yyQ31(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else yyAction0(strm, yyNO_MATCH) + (* end case *)) +fun yyQ52 (strm, lastMatch : yymatch) = yyAction42(strm, yyNO_MATCH) +fun yyQ4 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"!" + then yyQ52(strm', lastMatch) + else if inp < #"!" + then if inp = #"\v" + then yyQ53(strm', lastMatch) + else if inp < #"\v" + then if inp = #"\t" + then yyQ53(strm', lastMatch) + else if inp = #"\n" + then yyQ31(strm', lastMatch) + else yyQ52(strm', lastMatch) + else if inp = #"\^N" + then yyQ52(strm', lastMatch) + else if inp < #"\^N" + then if inp = #"\r" + then yyQ54(strm', lastMatch) + else yyQ53(strm', lastMatch) + else if inp = #" " + then yyQ53(strm', lastMatch) + else yyQ52(strm', lastMatch) + else if inp = #"\\" + then yyQ56(strm', lastMatch) + else if inp < #"\\" + then if inp = #"-" + then yyQ55(strm', lastMatch) + else yyQ52(strm', lastMatch) + else if inp = #"^" + then yyQ58(strm', lastMatch) + else if inp = #"]" + then yyQ57(strm', lastMatch) + else yyQ52(strm', lastMatch) + (* end case *)) +fun yyQ40 (strm, lastMatch : yymatch) = yyAction12(strm, yyNO_MATCH) +fun yyQ39 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"8" + then yyQ40(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ38 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"f" + then yyQ39(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ37 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction16(strm, yyMATCH(strm, yyAction62, yyNO_MATCH)) + | SOME(inp, strm') => + if inp = #"t" + then yyQ38(strm', yyMATCH(strm, yyAction16, yyMATCH(strm, yyAction62, yyNO_MATCH))) + else yyAction16(strm, yyMATCH(strm, yyAction62, yyNO_MATCH)) + (* end case *)) +fun yyQ46 (strm, lastMatch : yymatch) = yyAction14(strm, yyNO_MATCH) +fun yyQ45 (strm, lastMatch : yymatch) = yyAction13(strm, yyNO_MATCH) +fun yyQ44 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"8" + then yyQ46(strm', lastMatch) + else if inp < #"8" + then if inp = #"7" + then yyQ45(strm', lastMatch) + else yystuck(lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ43 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"i" + then yyQ44(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ42 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"i" + then yyQ43(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ41 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"c" + then yyQ42(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ36 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction16(strm, yyMATCH(strm, yyAction62, yyNO_MATCH)) + | SOME(inp, strm') => + if inp = #"s" + then yyQ41(strm', yyMATCH(strm, yyAction16, yyMATCH(strm, yyAction62, yyNO_MATCH))) + else yyAction16(strm, yyMATCH(strm, yyAction62, yyNO_MATCH)) + (* end case *)) +fun yyQ47 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"F" + then yyQ39(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ35 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction16(strm, yyMATCH(strm, yyAction62, yyNO_MATCH)) + | SOME(inp, strm') => + if inp = #"T" + then yyQ47(strm', yyMATCH(strm, yyAction16, yyMATCH(strm, yyAction62, yyNO_MATCH))) + else yyAction16(strm, yyMATCH(strm, yyAction62, yyNO_MATCH)) + (* end case *)) +fun yyQ50 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"I" + then yyQ44(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ49 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"I" + then yyQ50(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ48 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"C" + then yyQ49(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ34 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction16(strm, yyMATCH(strm, yyAction62, yyNO_MATCH)) + | SOME(inp, strm') => + if inp = #"S" + then yyQ48(strm', yyMATCH(strm, yyAction16, yyMATCH(strm, yyAction62, yyNO_MATCH))) + else yyAction16(strm, yyMATCH(strm, yyAction62, yyNO_MATCH)) + (* end case *)) +fun yyQ33 (strm, lastMatch : yymatch) = yyAction15(strm, yyNO_MATCH) +fun yyQ32 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction0(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\^N" + then yyAction0(strm, yyNO_MATCH) + else if inp < #"\^N" + then if inp = #"\t" + then yyQ31(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp < #"\t" + then yyAction0(strm, yyNO_MATCH) + else if inp = #"\r" + then yyQ51(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else yyQ31(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp = #" " + then yyQ31(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else yyAction0(strm, yyNO_MATCH) + (* end case *)) +fun yyQ30 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction0(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\^N" + then yyAction0(strm, yyNO_MATCH) + else if inp < #"\^N" + then if inp = #"\t" + then yyQ31(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp < #"\t" + then yyAction0(strm, yyNO_MATCH) + else if inp = #"\r" + then yyQ51(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else yyQ31(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp = #" " + then yyQ31(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else yyAction0(strm, yyNO_MATCH) + (* end case *)) +fun yyQ29 (strm, lastMatch : yymatch) = yyAction16(strm, yyMATCH(strm, yyAction62, yyNO_MATCH)) +fun yyQ3 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"<" + then yyQ29(strm', lastMatch) + else if inp < #"<" + then if inp = #"\r" + then yyQ32(strm', lastMatch) + else if inp < #"\r" + then if inp = #"\n" + then yyQ31(strm', lastMatch) + else if inp < #"\n" + then if inp = #"\t" + then yyQ30(strm', lastMatch) + else yyQ29(strm', lastMatch) + else yyQ30(strm', lastMatch) + else if inp = #"!" + then yyQ29(strm', lastMatch) + else if inp < #"!" + then if inp = #" " + then yyQ30(strm', lastMatch) + else yyQ29(strm', lastMatch) + else if inp = #";" + then yyQ33(strm', lastMatch) + else yyQ29(strm', lastMatch) + else if inp = #"V" + then yyQ29(strm', lastMatch) + else if inp < #"V" + then if inp = #"B" + then yyQ29(strm', lastMatch) + else if inp < #"B" + then if inp = #"A" + then yyQ34(strm', lastMatch) + else yyQ29(strm', lastMatch) + else if inp = #"U" + then yyQ35(strm', lastMatch) + else yyQ29(strm', lastMatch) + else if inp = #"b" + then yyQ29(strm', lastMatch) + else if inp < #"b" + then if inp = #"a" + then yyQ36(strm', lastMatch) + else yyQ29(strm', lastMatch) + else if inp = #"u" + then yyQ37(strm', lastMatch) + else yyQ29(strm', lastMatch) + (* end case *)) +fun yyQ27 (strm, lastMatch : yymatch) = yyAction55(strm, yyNO_MATCH) +fun yyQ26 (strm, lastMatch : yymatch) = yyAction57(strm, yyNO_MATCH) +fun yyQ25 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction54(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"#" + then yyAction54(strm, yyNO_MATCH) + else if inp < #"#" + then if inp = #"\"" + then yyQ26(strm', yyMATCH(strm, yyAction54, yyNO_MATCH)) + else yyAction54(strm, yyNO_MATCH) + else if inp = #"\\" + then yyQ27(strm', yyMATCH(strm, yyAction54, yyNO_MATCH)) + else yyAction54(strm, yyNO_MATCH) + (* end case *)) +fun yyQ24 (strm, lastMatch : yymatch) = yyAction52(strm, yyNO_MATCH) +fun yyQ22 (strm, lastMatch : yymatch) = yyAction53(strm, yyNO_MATCH) +fun yyQ23 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction53(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\n" + then yyQ22(strm', yyMATCH(strm, yyAction53, yyNO_MATCH)) + else yyAction53(strm, yyNO_MATCH) + (* end case *)) +fun yyQ28 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction56(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\^N" + then yyQ28(strm', yyMATCH(strm, yyAction56, yyNO_MATCH)) + else if inp < #"\^N" + then if inp = #"\v" + then yyQ28(strm', yyMATCH(strm, yyAction56, yyNO_MATCH)) + else if inp < #"\v" + then if inp = #"\n" + then yyAction56(strm, yyNO_MATCH) + else yyQ28(strm', yyMATCH(strm, yyAction56, yyNO_MATCH)) + else if inp = #"\r" + then yyAction56(strm, yyNO_MATCH) + else yyQ28(strm', yyMATCH(strm, yyAction56, yyNO_MATCH)) + else if inp = #"#" + then yyQ28(strm', yyMATCH(strm, yyAction56, yyNO_MATCH)) + else if inp < #"#" + then if inp = #"\"" + then yyAction56(strm, yyNO_MATCH) + else yyQ28(strm', yyMATCH(strm, yyAction56, yyNO_MATCH)) + else if inp = #"\\" + then yyAction56(strm, yyNO_MATCH) + else yyQ28(strm', yyMATCH(strm, yyAction56, yyNO_MATCH)) + (* end case *)) +fun yyQ21 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction56(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\^N" + then yyQ28(strm', yyMATCH(strm, yyAction56, yyNO_MATCH)) + else if inp < #"\^N" + then if inp = #"\v" + then yyQ28(strm', yyMATCH(strm, yyAction56, yyNO_MATCH)) + else if inp < #"\v" + then if inp = #"\n" + then yyAction56(strm, yyNO_MATCH) + else yyQ28(strm', yyMATCH(strm, yyAction56, yyNO_MATCH)) + else if inp = #"\r" + then yyAction56(strm, yyNO_MATCH) + else yyQ28(strm', yyMATCH(strm, yyAction56, yyNO_MATCH)) + else if inp = #"#" + then yyQ28(strm', yyMATCH(strm, yyAction56, yyNO_MATCH)) + else if inp < #"#" + then if inp = #"\"" + then yyAction56(strm, yyNO_MATCH) + else yyQ28(strm', yyMATCH(strm, yyAction56, yyNO_MATCH)) + else if inp = #"\\" + then yyAction56(strm, yyNO_MATCH) + else yyQ28(strm', yyMATCH(strm, yyAction56, yyNO_MATCH)) + (* end case *)) +fun yyQ2 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"\^N" + then yyQ21(strm', lastMatch) + else if inp < #"\^N" + then if inp = #"\v" + then yyQ21(strm', lastMatch) + else if inp < #"\v" + then if inp = #"\n" + then yyQ22(strm', lastMatch) + else yyQ21(strm', lastMatch) + else if inp = #"\r" + then yyQ23(strm', lastMatch) + else yyQ21(strm', lastMatch) + else if inp = #"#" + then yyQ21(strm', lastMatch) + else if inp < #"#" + then if inp = #"\"" + then yyQ24(strm', lastMatch) + else yyQ21(strm', lastMatch) + else if inp = #"\\" + then yyQ25(strm', lastMatch) + else yyQ21(strm', lastMatch) + (* end case *)) +fun yyQ19 (strm, lastMatch : yymatch) = yyAction49(strm, yyNO_MATCH) +fun yyQ20 (strm, lastMatch : yymatch) = yyAction44(strm, yyNO_MATCH) +fun yyQ18 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction48(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"*" + then yyQ20(strm', yyMATCH(strm, yyAction48, yyNO_MATCH)) + else yyAction48(strm, yyNO_MATCH) + (* end case *)) +fun yyQ17 (strm, lastMatch : yymatch) = yyAction50(strm, yyNO_MATCH) +fun yyQ16 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction51(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"#" + then yyQ16(strm', yyMATCH(strm, yyAction51, yyNO_MATCH)) + else if inp < #"#" + then if inp = #"\"" + then yyAction51(strm, yyNO_MATCH) + else yyQ16(strm', yyMATCH(strm, yyAction51, yyNO_MATCH)) + else if inp = #"(" + then yyAction51(strm, yyNO_MATCH) + else if inp < #"(" + then yyQ16(strm', yyMATCH(strm, yyAction51, yyNO_MATCH)) + else if inp <= #")" + then yyAction51(strm, yyNO_MATCH) + else yyQ16(strm', yyMATCH(strm, yyAction51, yyNO_MATCH)) + (* end case *)) +fun yyQ15 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction51(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"#" + then yyQ16(strm', yyMATCH(strm, yyAction51, yyNO_MATCH)) + else if inp < #"#" + then if inp = #"\"" + then yyAction51(strm, yyNO_MATCH) + else yyQ16(strm', yyMATCH(strm, yyAction51, yyNO_MATCH)) + else if inp = #"(" + then yyAction51(strm, yyNO_MATCH) + else if inp < #"(" + then yyQ16(strm', yyMATCH(strm, yyAction51, yyNO_MATCH)) + else if inp <= #")" + then yyAction51(strm, yyNO_MATCH) + else yyQ16(strm', yyMATCH(strm, yyAction51, yyNO_MATCH)) + (* end case *)) +fun yyQ1 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"#" + then yyQ15(strm', lastMatch) + else if inp < #"#" + then if inp = #"\v" + then yyQ15(strm', lastMatch) + else if inp < #"\v" + then if inp = #"\n" + then yyQ16(strm', lastMatch) + else yyQ15(strm', lastMatch) + else if inp = #"\"" + then yyQ17(strm', lastMatch) + else yyQ15(strm', lastMatch) + else if inp = #")" + then yyQ19(strm', lastMatch) + else if inp < #")" + then if inp = #"(" + then yyQ18(strm', lastMatch) + else yyQ15(strm', lastMatch) + else yyQ15(strm', lastMatch) + (* end case *)) +fun yyQ13 (strm, lastMatch : yymatch) = yyAction46(strm, yyNO_MATCH) +fun yyQ12 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction47(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #")" + then yyQ13(strm', yyMATCH(strm, yyAction47, yyNO_MATCH)) + else yyAction47(strm, yyNO_MATCH) + (* end case *)) +fun yyQ14 (strm, lastMatch : yymatch) = yyAction45(strm, yyNO_MATCH) +fun yyQ11 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction47(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"*" + then yyQ14(strm', yyMATCH(strm, yyAction47, yyNO_MATCH)) + else yyAction47(strm, yyNO_MATCH) + (* end case *)) +fun yyQ9 (strm, lastMatch : yymatch) = yyAction47(strm, yyNO_MATCH) +fun yyQ10 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction47(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\n" + then yyQ9(strm', yyMATCH(strm, yyAction47, yyNO_MATCH)) + else yyAction47(strm, yyNO_MATCH) + (* end case *)) +fun yyQ8 (strm, lastMatch : yymatch) = yyAction47(strm, yyNO_MATCH) +fun yyQ0 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"\^N" + then yyQ8(strm', lastMatch) + else if inp < #"\^N" + then if inp = #"\v" + then yyQ8(strm', lastMatch) + else if inp < #"\v" + then if inp = #"\n" + then yyQ9(strm', lastMatch) + else yyQ8(strm', lastMatch) + else if inp = #"\r" + then yyQ10(strm', lastMatch) + else yyQ8(strm', lastMatch) + else if inp = #")" + then yyQ8(strm', lastMatch) + else if inp < #")" + then if inp = #"(" + then yyQ11(strm', lastMatch) + else yyQ8(strm', lastMatch) + else if inp = #"*" + then yyQ12(strm', lastMatch) + else yyQ8(strm', lastMatch) + (* end case *)) +in + (case (!(yyss)) + of COM => yyQ0(!(yystrm), yyNO_MATCH) + | CODE => yyQ1(!(yystrm), yyNO_MATCH) + | STRING => yyQ2(!(yystrm), yyNO_MATCH) + | CHARSET => yyQ3(!(yystrm), yyNO_MATCH) + | CHARCLASS => yyQ4(!(yystrm), yyNO_MATCH) + | RESTRING => yyQ5(!(yystrm), yyNO_MATCH) + | INITIAL => yyQ6(!(yystrm), yyNO_MATCH) + | DIRECTIVE => yyQ7(!(yystrm), yyNO_MATCH) + (* end case *)) +end + end + in + continue() + handle IO.Io{cause, ...} => raise cause + end + in + lex + end + in + fun makeLexer yyinputN = mk (yyInput.mkStream yyinputN) + fun makeLexer' ins = mk (yyInput.mkStream ins) + end + + end diff --git a/ml-lpt/ml-ulex/FrontEnds/ml-ulex/ml-ulex-input-bootstrap.sml b/ml-lpt/ml-ulex/FrontEnds/ml-ulex/ml-ulex-input-bootstrap.sml new file mode 100644 index 0000000..abb9a95 --- /dev/null +++ b/ml-lpt/ml-ulex/FrontEnds/ml-ulex/ml-ulex-input-bootstrap.sml @@ -0,0 +1,50 @@ +(* ml-ulex-input-bootstrap.sml + * + * COPYRIGHT (c) 2005 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (adrassi@gmail.com) + * All rights reserved. + * + * Driver for ml-ulex input format. + *) + +(***** ADAPTED FROM ML-YACC *****) +structure Streamify = +struct + datatype str = EVAL of Tok.token * strm | UNEVAL of (unit -> Tok.token) + and strm = STRM of str ref + + fun lex(STRM (ref(EVAL t))) = SOME t + | lex(STRM (s as ref(UNEVAL f))) = let + val tok = f() + val t = (tok, STRM(ref(UNEVAL f))) + in + case tok + of Tok.EOF => NONE + | _ => (s := EVAL t; SOME(t)) + end + + fun streamify f = STRM(ref(UNEVAL f)) + fun cons(a,s) = STRM(ref(EVAL(a,s))) + +end + +structure MLULexInput : INPUT = + struct + + structure P = Parser(Streamify) + + fun parseFile fname = let + val strm = TextIO.openIn fname + val lexer = MLULexLex.makeLexer (fn n => TextIO.inputN (strm, n)) + val (spec, errors) = P.parse (Streamify.streamify lexer) + before TextIO.closeIn strm + fun errMsg (pos, err) = print (P.repairToString err ^ "\n") + in + if (null errors) + then SOME spec + else (app errMsg errors; NONE) + spec + end + + end diff --git a/ml-lpt/ml-ulex/FrontEnds/ml-ulex/ml-ulex-input.sml b/ml-lpt/ml-ulex/FrontEnds/ml-ulex/ml-ulex-input.sml new file mode 100644 index 0000000..57e22ab --- /dev/null +++ b/ml-lpt/ml-ulex/FrontEnds/ml-ulex/ml-ulex-input.sml @@ -0,0 +1,38 @@ +(* ml-ulex-input.sml + * + * COPYRIGHT (c) 2005 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (adrassi@gmail.com) + * All rights reserved. + * + * Driver for ml-ulex input format. + *) + +structure MLULexInput : INPUT = + struct + + structure SP = AntlrStreamPos + structure L = MLULexLex + structure P = MLULexParseFn(L) + + fun parseFile fname = let + val fstrm = TextIO.openIn fname + val strm = L.streamifyInstream fstrm + val sm = SP.mkSourcemap' fname + val lex = L.lex sm + val (spec, strm', errors, {errs}) = P.parse lex strm + fun errMsg ty (pos, err) = TextIO.output (TextIO.stdErr, String.concat [ + SP.toString sm pos, ty, ": ", err, "\n" + ]) + in + TextIO.closeIn fstrm; + if (null errors andalso null errs) + then spec + else ( + app (errMsg " Syntax error") + (map (fn (p, e) => (p, AntlrRepair.actionToString MLULexTokens.toString e)) errors); + app (errMsg "") (map (fn ((p, _), e) => (p, e)) errs); + NONE) + end + + end diff --git a/ml-lpt/ml-ulex/FrontEnds/ml-ulex/ml-ulex.grm b/ml-lpt/ml-ulex/FrontEnds/ml-ulex/ml-ulex.grm new file mode 100644 index 0000000..00b61ca --- /dev/null +++ b/ml-lpt/ml-ulex/FrontEnds/ml-ulex/ml-ulex.grm @@ -0,0 +1,201 @@ +(* ml-ulex.grm + * + * COPYRIGHT (c) 2006 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (http://www.cs.uchicago.edu/~adrassi) + * All rights reserved. + *) + +%defs ( + structure LS = LexSpec + structure AMap = AtomMap + structure RE = RegExp + structure SIS = RE.SymSet + + fun listToASet ls = AtomSet.addList (AtomSet.empty, ls) + fun charToSym c = Word.fromInt (Char.ord c) + val dashSet = SIS.singleton (charToSym #"-") + + fun flip (x, y) = (y, x) +); + +%name MLULex; + +%tokens + : BAR ("|") + | AMP ("&") + | DOT (".") + | DOLLAR ("$") + | PLUS ("+") + | STAR ("*") + | QUERY ("?") + | SEMI (";") + | LP ("(") | RP (")") + | LSB ("[") | RSB ("]") + | LCB ("{") | RCB ("}") + | LT ("<") | GT (">") + | SLASH ("/") + | COMMA (",") + | CARAT ("^") + | NEG ("~") + | DASH ("-") + | DARROW ("=>") + | EQ ("=") + | KW_defs ("%defs") + | KW_arg ("%arg") + | KW_header ("%header") + | KW_name ("%name") + | KW_states ("%states") + | KW_let ("%let") + | KW_charset ("%charset") + | UTF8 ("utf8") + | ASCII7 ("ascii7") + | ASCII8 ("ascii8") + | EOFMARK ("<>") + | INT of int + | CHAR of char + | UCHAR of UTF8.wchar + | ID of string + | CODE of string + | BOGUS + ; + +%keywords + "%defs", "%name", "%states", "%let", "%charset", "%arg", "%header" ; + +%refcell errs : (AntlrStreamPos.span * string) list = ([]); + +file + : decls@(LS.mkSpec(), AMap.empty) + ; + +decls(spec, env) + : decl@(spec, env) ";" decls@(decl) + => (decls) + | + => (spec) + ; + +decl(spec, env) + : directive@(LS.getConf spec, env) + => (let val (conf', env') = directive + in + (LS.updConf (spec, conf'), + env') + end) + | "%defs" CODE + => (LS.updDecls (spec, CODE), env) + | SS = ("<" (ID ","? => (ID))+ ">")? + main = + ( addNewlCheck = + ( => (fn c => c) + | "^" => (fn c => "if not yylastwasn then REJECT() else (" ^ c ^")")) + re@(env) "=>" CODE + => ( LS.addRule (spec, + ((Option.map (listToASet o (map Atom.atom)) SS, re), addNewlCheck CODE)), + env ) + | "<>" "=>" CODE + => ( case SS + of NONE => (LS.addEOFRule (spec, ("_", CODE)), env) + | SOME ss => (foldl (fn (s, spec) => LS.addEOFRuleFront (spec, (s, CODE))) + spec ss, + env) ) ) + => ( main ) + ; + +directive(conf, env) + : "%let" ID "=" re@(env) + => (conf, AMap.insert (env, Atom.atom ID, re)) + | "%arg" CODE + => (LS.updArg (conf, CODE), env) + | "%states" (ID ","? => (ID))+ + => (LS.updStartStates (conf, listToASet (map Atom.atom SR)), + env) + | "%charset" + ( "utf8" => (LS.updClamp (conf, LS.NO_CLAMP), env) + | "ascii7" => (LS.updClamp (conf, LS.CLAMP127), env) + | "ascii8" => (LS.updClamp (conf, LS.CLAMP255), env)) + | "%name" ID + => (LS.updStructName (conf, ID), env) + | "%header" CODE + => (LS.updHeader (conf, CODE), env) + ; + +re(env) + : or_re@(env) + ; + +or_re(env) + : and_re@(env) ("|" and_re@(env))* + => (foldl (RE.mkOr o flip) and_re SR) + ; + +and_re(env) + : cat_re@(env) ("&" cat_re@(env))* + => (foldl (RE.mkAnd o flip) cat_re SR) + ; + +cat_re(env) + : not_re@(env) (not_re@(env))* + => (foldl (RE.mkConcat o flip) not_re SR) + ; + +not_re(env) + : "~" post_re@(env) + => (RE.mkNot post_re) + | post_re@(env) + ; + +post_re(env) + : prim_re@(env) + ( "?" => (RE.mkOpt) + | "*" => (RE.mkClosure) + | "+" => (fn re => RE.mkAtLeast (re, 1)) + | "{" INT "}" => (fn re => RE.mkRep (re, INT, INT)) + | "{" INT "," INT "}" => (fn re => RE.mkRep (re, INT1, INT2)) + | => (fn x => x) + ) + => (SR prim_re) + ; + +prim_re(env) + : "{" ID "}" + => (case (AMap.find (env, Atom.atom ID)) + of SOME re => re + | NONE => (errs := (ID_SPAN, String.concat [ + "Error: {", ID, "} is undefined."])::(!errs); + RE.any)) + | "(" re@(env) ")" + | char + => (RE.mkSym char) + | "." + => (RE.mkSymSet SIS.universe) + | "[" + ( "^" => (SIS.complement) + | %try "-" => (fn x => SIS.union (x, dashSet)) + | %try => (fn x => x) + ) + + ( char "-" char => + (if char1 <= char2 then + SIS.interval (char1, char2) + else (errs := (FULL_SPAN, String.concat [ + "Error: malformed character class: ", + Word.toString char1, " - ", + Word.toString char2, "."])::(!errs); + SIS.universe)) + | char => (SIS.singleton char) + )* + + ("-" => (dashSet) + | => (SIS.empty)) + "]" + => (RE.mkSymSet + (SR1 (foldl SIS.union SR3 SR2))) + ; + +char + : CHAR + => (charToSym CHAR) + | UCHAR + ; diff --git a/ml-lpt/ml-ulex/FrontEnds/ml-ulex/ml-ulex.grm.sml b/ml-lpt/ml-ulex/FrontEnds/ml-ulex/ml-ulex.grm.sml new file mode 100644 index 0000000..02037fc --- /dev/null +++ b/ml-lpt/ml-ulex/FrontEnds/ml-ulex/ml-ulex.grm.sml @@ -0,0 +1,1232 @@ +structure +MLULexTokens = struct + + datatype token = EOF + | BOGUS + | CODE of string + | ID of string + | UCHAR of UTF8.wchar + | CHAR of char + | INT of int + | EOFMARK + | ASCII8 + | ASCII7 + | UTF8 + | KW_charset + | KW_let + | KW_states + | KW_name + | KW_header + | KW_arg + | KW_defs + | EQ + | DARROW + | DASH + | NEG + | CARAT + | COMMA + | SLASH + | GT + | LT + | RCB + | LCB + | RSB + | LSB + | RP + | LP + | SEMI + | QUERY + | STAR + | PLUS + | DOLLAR + | DOT + | AMP + | BAR + + val allToks = [EOF, BOGUS, EOFMARK, ASCII8, ASCII7, UTF8, KW_charset, KW_let, KW_states, KW_name, KW_header, KW_arg, KW_defs, EQ, DARROW, DASH, NEG, CARAT, COMMA, SLASH, GT, LT, RCB, LCB, RSB, LSB, RP, LP, SEMI, QUERY, STAR, PLUS, DOLLAR, DOT, AMP, BAR] + + fun toString tok = +(case (tok) + of (EOF) => "EOF" + | (BOGUS) => "BOGUS" + | (CODE(_)) => "CODE" + | (ID(_)) => "ID" + | (UCHAR(_)) => "UCHAR" + | (CHAR(_)) => "CHAR" + | (INT(_)) => "INT" + | (EOFMARK) => "<>" + | (ASCII8) => "ascii8" + | (ASCII7) => "ascii7" + | (UTF8) => "utf8" + | (KW_charset) => "%charset" + | (KW_let) => "%let" + | (KW_states) => "%states" + | (KW_name) => "%name" + | (KW_header) => "%header" + | (KW_arg) => "%arg" + | (KW_defs) => "%defs" + | (EQ) => "=" + | (DARROW) => "=>" + | (DASH) => "-" + | (NEG) => "~" + | (CARAT) => "^" + | (COMMA) => "," + | (SLASH) => "/" + | (GT) => ">" + | (LT) => "<" + | (RCB) => "}" + | (LCB) => "{" + | (RSB) => "]" + | (LSB) => "[" + | (RP) => ")" + | (LP) => "(" + | (SEMI) => ";" + | (QUERY) => "?" + | (STAR) => "*" + | (PLUS) => "+" + | (DOLLAR) => "$" + | (DOT) => "." + | (AMP) => "&" + | (BAR) => "|" +(* end case *)) + fun isKW tok = +(case (tok) + of (EOF) => false + | (BOGUS) => false + | (CODE(_)) => false + | (ID(_)) => false + | (UCHAR(_)) => false + | (CHAR(_)) => false + | (INT(_)) => false + | (EOFMARK) => false + | (ASCII8) => false + | (ASCII7) => false + | (UTF8) => false + | (KW_charset) => true + | (KW_let) => true + | (KW_states) => true + | (KW_name) => true + | (KW_header) => true + | (KW_arg) => true + | (KW_defs) => true + | (EQ) => false + | (DARROW) => false + | (DASH) => false + | (NEG) => false + | (CARAT) => false + | (COMMA) => false + | (SLASH) => false + | (GT) => false + | (LT) => false + | (RCB) => false + | (LCB) => false + | (RSB) => false + | (LSB) => false + | (RP) => false + | (LP) => false + | (SEMI) => false + | (QUERY) => false + | (STAR) => false + | (PLUS) => false + | (DOLLAR) => false + | (DOT) => false + | (AMP) => false + | (BAR) => false +(* end case *)) + + fun isEOF EOF = true + | isEOF _ = false + +end + +functor MLULexParseFn(Lex : ANTLR_LEXER) = struct + + local + structure Tok = +MLULexTokens + structure UserCode = + struct + + structure LS = LexSpec + structure AMap = AtomMap + structure RE = RegExp + structure SIS = RE.SymSet + + fun listToASet ls = AtomSet.addList (AtomSet.empty, ls) + fun charToSym c = Word.fromInt (Char.ord c) + val dashSet = SIS.singleton (charToSym #"-") + + fun flip (x, y) = (y, x) + +fun decls_PROD_1_ACT (env, SEMI, decl, spec, decls, SEMI_SPAN : (Lex.pos * Lex.pos), decl_SPAN : (Lex.pos * Lex.pos), decls_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs) = + (decls) +fun decls_PROD_2_ACT (env, spec, FULL_SPAN : (Lex.pos * Lex.pos), errs) = + (spec) +fun decl_PROD_1_ACT (env, spec, directive, directive_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs) = + (let val (conf', env') = directive + in + (LS.updConf (spec, conf'), + env') + end) +fun decl_PROD_2_ACT (env, CODE, spec, KW_defs, CODE_SPAN : (Lex.pos * Lex.pos), KW_defs_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs) = + (LS.updDecls (spec, CODE), env) +fun decl_PROD_3_SUBRULE_1_PROD_1_SUBRULE_1_PROD_1_ACT (ID, LT, env, spec, COMMA, ID_SPAN : (Lex.pos * Lex.pos), LT_SPAN : (Lex.pos * Lex.pos), COMMA_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs) = + (ID) +fun decl_PROD_3_SUBRULE_2_PROD_1_SUBRULE_1_PROD_1_ACT (SS, env, spec, SS_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs) = + (fn c => c) +fun decl_PROD_3_SUBRULE_2_PROD_1_SUBRULE_1_PROD_2_ACT (SS, env, spec, CARAT, SS_SPAN : (Lex.pos * Lex.pos), CARAT_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs) = + (fn c => "if not yylastwasn then REJECT() else (" ^ c ^")") +fun decl_PROD_3_SUBRULE_2_PROD_1_ACT (SS, re, env, CODE, spec, addNewlCheck, DARROW, SS_SPAN : (Lex.pos * Lex.pos), re_SPAN : (Lex.pos * Lex.pos), CODE_SPAN : (Lex.pos * Lex.pos), addNewlCheck_SPAN : (Lex.pos * Lex.pos), DARROW_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs) = + ( LS.addRule (spec, + ((Option.map (listToASet o (map Atom.atom)) SS, re), addNewlCheck CODE)), + env ) +fun decl_PROD_3_SUBRULE_2_PROD_2_ACT (SS, env, CODE, spec, EOFMARK, DARROW, SS_SPAN : (Lex.pos * Lex.pos), CODE_SPAN : (Lex.pos * Lex.pos), EOFMARK_SPAN : (Lex.pos * Lex.pos), DARROW_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs) = + ( case SS + of NONE => (LS.addEOFRule (spec, ("_", CODE)), env) + | SOME ss => (foldl (fn (s, spec) => LS.addEOFRuleFront (spec, (s, CODE))) + spec ss, + env) ) +fun decl_PROD_3_ACT (SS, env, main, spec, SS_SPAN : (Lex.pos * Lex.pos), main_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs) = + ( main ) +fun directive_PROD_1_ACT (EQ, ID, re, env, conf, KW_let, EQ_SPAN : (Lex.pos * Lex.pos), ID_SPAN : (Lex.pos * Lex.pos), re_SPAN : (Lex.pos * Lex.pos), KW_let_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs) = + (conf, AMap.insert (env, Atom.atom ID, re)) +fun directive_PROD_2_ACT (env, CODE, conf, KW_arg, CODE_SPAN : (Lex.pos * Lex.pos), KW_arg_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs) = + (LS.updArg (conf, CODE), env) +fun directive_PROD_3_SUBRULE_1_PROD_1_ACT (ID, env, conf, COMMA, KW_states, ID_SPAN : (Lex.pos * Lex.pos), COMMA_SPAN : (Lex.pos * Lex.pos), KW_states_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs) = + (ID) +fun directive_PROD_3_ACT (SR, env, conf, KW_states, SR_SPAN : (Lex.pos * Lex.pos), KW_states_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs) = + (LS.updStartStates (conf, listToASet (map Atom.atom SR)), + env) +fun directive_PROD_4_SUBRULE_1_PROD_1_ACT (env, UTF8, conf, KW_charset, UTF8_SPAN : (Lex.pos * Lex.pos), KW_charset_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs) = + (LS.updClamp (conf, LS.NO_CLAMP), env) +fun directive_PROD_4_SUBRULE_1_PROD_2_ACT (env, conf, ASCII7, KW_charset, ASCII7_SPAN : (Lex.pos * Lex.pos), KW_charset_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs) = + (LS.updClamp (conf, LS.CLAMP127), env) +fun directive_PROD_4_SUBRULE_1_PROD_3_ACT (env, conf, ASCII8, KW_charset, ASCII8_SPAN : (Lex.pos * Lex.pos), KW_charset_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs) = + (LS.updClamp (conf, LS.CLAMP255), env) +fun directive_PROD_5_ACT (ID, env, conf, KW_name, ID_SPAN : (Lex.pos * Lex.pos), KW_name_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs) = + (LS.updStructName (conf, ID), env) +fun directive_PROD_6_ACT (env, CODE, conf, KW_header, CODE_SPAN : (Lex.pos * Lex.pos), KW_header_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs) = + (LS.updHeader (conf, CODE), env) +fun or_re_PROD_1_ACT (SR, env, and_re, SR_SPAN : (Lex.pos * Lex.pos), and_re_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs) = + (foldl (RE.mkOr o flip) and_re SR) +fun and_re_PROD_1_ACT (SR, env, cat_re, SR_SPAN : (Lex.pos * Lex.pos), cat_re_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs) = + (foldl (RE.mkAnd o flip) cat_re SR) +fun cat_re_PROD_1_ACT (SR, env, not_re, SR_SPAN : (Lex.pos * Lex.pos), not_re_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs) = + (foldl (RE.mkConcat o flip) not_re SR) +fun not_re_PROD_1_ACT (NEG, env, post_re, NEG_SPAN : (Lex.pos * Lex.pos), post_re_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs) = + (RE.mkNot post_re) +fun post_re_PROD_1_SUBRULE_1_PROD_1_ACT (env, prim_re, QUERY, prim_re_SPAN : (Lex.pos * Lex.pos), QUERY_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs) = + (RE.mkOpt) +fun post_re_PROD_1_SUBRULE_1_PROD_2_ACT (env, STAR, prim_re, STAR_SPAN : (Lex.pos * Lex.pos), prim_re_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs) = + (RE.mkClosure) +fun post_re_PROD_1_SUBRULE_1_PROD_3_ACT (env, PLUS, prim_re, PLUS_SPAN : (Lex.pos * Lex.pos), prim_re_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs) = + (fn re => RE.mkAtLeast (re, 1)) +fun post_re_PROD_1_SUBRULE_1_PROD_4_ACT (INT, LCB, RCB, env, prim_re, INT_SPAN : (Lex.pos * Lex.pos), LCB_SPAN : (Lex.pos * Lex.pos), RCB_SPAN : (Lex.pos * Lex.pos), prim_re_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs) = + (fn re => RE.mkRep (re, INT, INT)) +fun post_re_PROD_1_SUBRULE_1_PROD_5_ACT (LCB, RCB, env, INT1, INT2, prim_re, COMMA, LCB_SPAN : (Lex.pos * Lex.pos), RCB_SPAN : (Lex.pos * Lex.pos), INT1_SPAN : (Lex.pos * Lex.pos), INT2_SPAN : (Lex.pos * Lex.pos), prim_re_SPAN : (Lex.pos * Lex.pos), COMMA_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs) = + (fn re => RE.mkRep (re, INT1, INT2)) +fun post_re_PROD_1_SUBRULE_1_PROD_6_ACT (env, prim_re, prim_re_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs) = + (fn x => x) +fun post_re_PROD_1_ACT (SR, env, prim_re, SR_SPAN : (Lex.pos * Lex.pos), prim_re_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs) = + (SR prim_re) +fun prim_re_PROD_1_ACT (ID, LCB, RCB, env, ID_SPAN : (Lex.pos * Lex.pos), LCB_SPAN : (Lex.pos * Lex.pos), RCB_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs) = + (case (AMap.find (env, Atom.atom ID)) + of SOME re => re + | NONE => (errs := (ID_SPAN, String.concat [ + "Error: {", ID, "} is undefined."])::(!errs); + RE.any)) +fun prim_re_PROD_3_ACT (env, char, char_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs) = + (RE.mkSym char) +fun prim_re_PROD_4_ACT (DOT, env, DOT_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs) = + (RE.mkSymSet SIS.universe) +fun prim_re_PROD_5_SUBRULE_1_PROD_1_ACT (LSB, env, CARAT, LSB_SPAN : (Lex.pos * Lex.pos), CARAT_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs) = + (SIS.complement) +fun prim_re_PROD_5_SUBRULE_1_PROD_2_ACT (LSB, env, DASH, LSB_SPAN : (Lex.pos * Lex.pos), DASH_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs) = + (fn x => SIS.union (x, dashSet)) +fun prim_re_PROD_5_SUBRULE_1_PROD_3_ACT (LSB, env, LSB_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs) = + (fn x => x) +fun prim_re_PROD_5_SUBRULE_2_PROD_1_ACT (LSB, SR1, env, DASH, char1, char2, LSB_SPAN : (Lex.pos * Lex.pos), SR1_SPAN : (Lex.pos * Lex.pos), DASH_SPAN : (Lex.pos * Lex.pos), char1_SPAN : (Lex.pos * Lex.pos), char2_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs) = + (if char1 <= char2 then + SIS.interval (char1, char2) + else (errs := (FULL_SPAN, String.concat [ + "Error: malformed character class: ", + Word.toString char1, " - ", + Word.toString char2, "."])::(!errs); + SIS.universe)) +fun prim_re_PROD_5_SUBRULE_2_PROD_2_ACT (LSB, SR1, env, char, LSB_SPAN : (Lex.pos * Lex.pos), SR1_SPAN : (Lex.pos * Lex.pos), char_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs) = + (SIS.singleton char) +fun prim_re_PROD_5_SUBRULE_3_PROD_1_ACT (LSB, SR1, SR2, env, DASH, LSB_SPAN : (Lex.pos * Lex.pos), SR1_SPAN : (Lex.pos * Lex.pos), SR2_SPAN : (Lex.pos * Lex.pos), DASH_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs) = + (dashSet) +fun prim_re_PROD_5_SUBRULE_3_PROD_2_ACT (LSB, SR1, SR2, env, LSB_SPAN : (Lex.pos * Lex.pos), SR1_SPAN : (Lex.pos * Lex.pos), SR2_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs) = + (SIS.empty) +fun prim_re_PROD_5_ACT (LSB, RSB, SR1, SR2, SR3, env, LSB_SPAN : (Lex.pos * Lex.pos), RSB_SPAN : (Lex.pos * Lex.pos), SR1_SPAN : (Lex.pos * Lex.pos), SR2_SPAN : (Lex.pos * Lex.pos), SR3_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs) = + (RE.mkSymSet + (SR1 (foldl SIS.union SR3 SR2))) +fun char_PROD_1_ACT (CHAR, CHAR_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs) = + (charToSym CHAR) +fun ARGS_4 (errs) = + (LS.mkSpec(), AMap.empty) +fun ARGS_6 (env, spec, errs) = + (spec, env) +fun ARGS_7 (env, SEMI, decl, spec, errs) = + (decl) +fun ARGS_10 (env, spec, errs) = + (LS.getConf spec, env) +fun ARGS_17 (SS, env, spec, addNewlCheck, errs) = + (env) +fun ARGS_20 (EQ, ID, env, conf, KW_let, errs) = + (env) +fun ARGS_29 (env, errs) = + (env) +fun ARGS_32 (BAR, env, and_re, errs) = + (env) +fun ARGS_31 (env, errs) = + (env) +fun ARGS_35 (AMP, env, cat_re, errs) = + (env) +fun ARGS_34 (env, errs) = + (env) +fun ARGS_38 (env, not_re, errs) = + (env) +fun ARGS_37 (env, errs) = + (env) +fun ARGS_40 (NEG, env, errs) = + (env) +fun ARGS_41 (env, errs) = + (env) +fun ARGS_43 (env, errs) = + (env) +fun ARGS_51 (LP, env, errs) = + (env) +fun mkerrs_REFC() : ((AntlrStreamPos.span * string) list) ref = ref ([]) + end (* UserCode *) + + structure Err = AntlrErrHandler( + structure Tok = Tok + structure Lex = Lex) + structure EBNF = AntlrEBNF( + struct + type strm = Err.wstream + val getSpan = Err.getSpan + end) + + fun mk lexFn = let +val errs_REFC = UserCode.mkerrs_REFC() +fun getS() = {errs = !errs_REFC} +fun putS{errs} = (errs_REFC := errs) +fun unwrap (ret, strm, repairs) = (ret, strm, repairs, getS()) val (eh, lex) = Err.mkErrHandler {get = getS, put = putS} + fun fail() = Err.failure eh + fun tryProds (strm, prods) = let + fun try [] = fail() + | try (prod :: prods) = + (Err.whileDisabled eh (fn() => prod strm)) + handle Err.ParseError => try (prods) + in try prods end +fun matchEOF strm = (case (lex(strm)) + of (Tok.EOF, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchBOGUS strm = (case (lex(strm)) + of (Tok.BOGUS, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchCODE strm = (case (lex(strm)) + of (Tok.CODE(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchID strm = (case (lex(strm)) + of (Tok.ID(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchUCHAR strm = (case (lex(strm)) + of (Tok.UCHAR(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchCHAR strm = (case (lex(strm)) + of (Tok.CHAR(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchINT strm = (case (lex(strm)) + of (Tok.INT(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchEOFMARK strm = (case (lex(strm)) + of (Tok.EOFMARK, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchASCII8 strm = (case (lex(strm)) + of (Tok.ASCII8, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchASCII7 strm = (case (lex(strm)) + of (Tok.ASCII7, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchUTF8 strm = (case (lex(strm)) + of (Tok.UTF8, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchKW_charset strm = (case (lex(strm)) + of (Tok.KW_charset, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchKW_let strm = (case (lex(strm)) + of (Tok.KW_let, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchKW_states strm = (case (lex(strm)) + of (Tok.KW_states, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchKW_name strm = (case (lex(strm)) + of (Tok.KW_name, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchKW_header strm = (case (lex(strm)) + of (Tok.KW_header, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchKW_arg strm = (case (lex(strm)) + of (Tok.KW_arg, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchKW_defs strm = (case (lex(strm)) + of (Tok.KW_defs, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchEQ strm = (case (lex(strm)) + of (Tok.EQ, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchDARROW strm = (case (lex(strm)) + of (Tok.DARROW, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchDASH strm = (case (lex(strm)) + of (Tok.DASH, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchNEG strm = (case (lex(strm)) + of (Tok.NEG, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchCARAT strm = (case (lex(strm)) + of (Tok.CARAT, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchCOMMA strm = (case (lex(strm)) + of (Tok.COMMA, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSLASH strm = (case (lex(strm)) + of (Tok.SLASH, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchGT strm = (case (lex(strm)) + of (Tok.GT, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchLT strm = (case (lex(strm)) + of (Tok.LT, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchRCB strm = (case (lex(strm)) + of (Tok.RCB, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchLCB strm = (case (lex(strm)) + of (Tok.LCB, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchRSB strm = (case (lex(strm)) + of (Tok.RSB, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchLSB strm = (case (lex(strm)) + of (Tok.LSB, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchRP strm = (case (lex(strm)) + of (Tok.RP, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchLP strm = (case (lex(strm)) + of (Tok.LP, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSEMI strm = (case (lex(strm)) + of (Tok.SEMI, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchQUERY strm = (case (lex(strm)) + of (Tok.QUERY, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTAR strm = (case (lex(strm)) + of (Tok.STAR, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchPLUS strm = (case (lex(strm)) + of (Tok.PLUS, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchDOLLAR strm = (case (lex(strm)) + of (Tok.DOLLAR, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchDOT strm = (case (lex(strm)) + of (Tok.DOT, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchAMP strm = (case (lex(strm)) + of (Tok.AMP, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchBAR strm = (case (lex(strm)) + of (Tok.BAR, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) + +val (file_NT) = +let +fun char_NT (strm) = let + fun char_PROD_1 (strm) = let + val (CHAR_RES, CHAR_SPAN, strm') = matchCHAR(strm) + val FULL_SPAN = (#1(CHAR_SPAN), #2(CHAR_SPAN)) + in + (UserCode.char_PROD_1_ACT (CHAR_RES, CHAR_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs_REFC), + FULL_SPAN, strm') + end + fun char_PROD_2 (strm) = let + val (UCHAR_RES, UCHAR_SPAN, strm') = matchUCHAR(strm) + val FULL_SPAN = (#1(UCHAR_SPAN), #2(UCHAR_SPAN)) + in + ((UCHAR_RES), FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.UCHAR(_), _, strm') => char_PROD_2(strm) + | (Tok.CHAR(_), _, strm') => char_PROD_1(strm) + | _ => fail() + (* end case *)) + end +fun re_NT (env_RES) (strm) = let + val (or_re_RES, or_re_SPAN, strm') = (or_re_NT (UserCode.ARGS_29 (env_RES, errs_REFC)))(strm) + val FULL_SPAN = (#1(or_re_SPAN), #2(or_re_SPAN)) + in + ((or_re_RES), FULL_SPAN, strm') + end +and or_re_NT (env_RES) (strm) = let + val (and_re_RES, and_re_SPAN, strm') = (and_re_NT (UserCode.ARGS_31 (env_RES, errs_REFC)))(strm) + fun or_re_PROD_1_SUBRULE_1_NT (strm) = let + val (BAR_RES, BAR_SPAN, strm') = matchBAR(strm) + val (and_re_RES, and_re_SPAN, strm') = (and_re_NT (UserCode.ARGS_32 (BAR_RES, env_RES, and_re_RES, errs_REFC)))(strm') + val FULL_SPAN = (#1(BAR_SPAN), #2(and_re_SPAN)) + in + ((and_re_RES), FULL_SPAN, strm') + end + fun or_re_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.BAR, _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.closure(or_re_PROD_1_SUBRULE_1_PRED, or_re_PROD_1_SUBRULE_1_NT, strm') + val FULL_SPAN = (#1(and_re_SPAN), #2(SR_SPAN)) + in + (UserCode.or_re_PROD_1_ACT (SR_RES, env_RES, and_re_RES, SR_SPAN : (Lex.pos * Lex.pos), and_re_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs_REFC), + FULL_SPAN, strm') + end +and and_re_NT (env_RES) (strm) = let + val (cat_re_RES, cat_re_SPAN, strm') = (cat_re_NT (UserCode.ARGS_34 (env_RES, errs_REFC)))(strm) + fun and_re_PROD_1_SUBRULE_1_NT (strm) = let + val (AMP_RES, AMP_SPAN, strm') = matchAMP(strm) + val (cat_re_RES, cat_re_SPAN, strm') = (cat_re_NT (UserCode.ARGS_35 (AMP_RES, env_RES, cat_re_RES, errs_REFC)))(strm') + val FULL_SPAN = (#1(AMP_SPAN), #2(cat_re_SPAN)) + in + ((cat_re_RES), FULL_SPAN, strm') + end + fun and_re_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.AMP, _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.closure(and_re_PROD_1_SUBRULE_1_PRED, and_re_PROD_1_SUBRULE_1_NT, strm') + val FULL_SPAN = (#1(cat_re_SPAN), #2(SR_SPAN)) + in + (UserCode.and_re_PROD_1_ACT (SR_RES, env_RES, cat_re_RES, SR_SPAN : (Lex.pos * Lex.pos), cat_re_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs_REFC), + FULL_SPAN, strm') + end +and cat_re_NT (env_RES) (strm) = let + val (not_re_RES, not_re_SPAN, strm') = (not_re_NT (UserCode.ARGS_37 (env_RES, errs_REFC)))(strm) + fun cat_re_PROD_1_SUBRULE_1_NT (strm) = let + val (not_re_RES, not_re_SPAN, strm') = (not_re_NT (UserCode.ARGS_38 (env_RES, not_re_RES, errs_REFC)))(strm) + val FULL_SPAN = (#1(not_re_SPAN), #2(not_re_SPAN)) + in + ((not_re_RES), FULL_SPAN, strm') + end + fun cat_re_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.DOT, _, strm') => true + | (Tok.LP, _, strm') => true + | (Tok.LSB, _, strm') => true + | (Tok.LCB, _, strm') => true + | (Tok.NEG, _, strm') => true + | (Tok.CHAR(_), _, strm') => true + | (Tok.UCHAR(_), _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.closure(cat_re_PROD_1_SUBRULE_1_PRED, cat_re_PROD_1_SUBRULE_1_NT, strm') + val FULL_SPAN = (#1(not_re_SPAN), #2(SR_SPAN)) + in + (UserCode.cat_re_PROD_1_ACT (SR_RES, env_RES, not_re_RES, SR_SPAN : (Lex.pos * Lex.pos), not_re_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs_REFC), + FULL_SPAN, strm') + end +and not_re_NT (env_RES) (strm) = let + fun not_re_PROD_1 (strm) = let + val (NEG_RES, NEG_SPAN, strm') = matchNEG(strm) + val (post_re_RES, post_re_SPAN, strm') = (post_re_NT (UserCode.ARGS_40 (NEG_RES, env_RES, errs_REFC)))(strm') + val FULL_SPAN = (#1(NEG_SPAN), #2(post_re_SPAN)) + in + (UserCode.not_re_PROD_1_ACT (NEG_RES, env_RES, post_re_RES, NEG_SPAN : (Lex.pos * Lex.pos), post_re_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs_REFC), + FULL_SPAN, strm') + end + fun not_re_PROD_2 (strm) = let + val (post_re_RES, post_re_SPAN, strm') = (post_re_NT (UserCode.ARGS_41 (env_RES, errs_REFC)))(strm) + val FULL_SPAN = (#1(post_re_SPAN), #2(post_re_SPAN)) + in + ((post_re_RES), FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.DOT, _, strm') => not_re_PROD_2(strm) + | (Tok.LP, _, strm') => not_re_PROD_2(strm) + | (Tok.LSB, _, strm') => not_re_PROD_2(strm) + | (Tok.LCB, _, strm') => not_re_PROD_2(strm) + | (Tok.CHAR(_), _, strm') => not_re_PROD_2(strm) + | (Tok.UCHAR(_), _, strm') => not_re_PROD_2(strm) + | (Tok.NEG, _, strm') => not_re_PROD_1(strm) + | _ => fail() + (* end case *)) + end +and post_re_NT (env_RES) (strm) = let + val (prim_re_RES, prim_re_SPAN, strm') = (prim_re_NT (UserCode.ARGS_43 (env_RES, errs_REFC)))(strm) + val (SR_RES, SR_SPAN, strm') = let + fun post_re_PROD_1_SUBRULE_1_NT (strm) = let + fun post_re_PROD_1_SUBRULE_1_PROD_1 (strm) = let + val (QUERY_RES, QUERY_SPAN, strm') = matchQUERY(strm) + val FULL_SPAN = (#1(QUERY_SPAN), #2(QUERY_SPAN)) + in + (UserCode.post_re_PROD_1_SUBRULE_1_PROD_1_ACT (env_RES, prim_re_RES, QUERY_RES, prim_re_SPAN : (Lex.pos * Lex.pos), QUERY_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs_REFC), + FULL_SPAN, strm') + end + fun post_re_PROD_1_SUBRULE_1_PROD_2 (strm) = let + val (STAR_RES, STAR_SPAN, strm') = matchSTAR(strm) + val FULL_SPAN = (#1(STAR_SPAN), #2(STAR_SPAN)) + in + (UserCode.post_re_PROD_1_SUBRULE_1_PROD_2_ACT (env_RES, STAR_RES, prim_re_RES, STAR_SPAN : (Lex.pos * Lex.pos), prim_re_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs_REFC), + FULL_SPAN, strm') + end + fun post_re_PROD_1_SUBRULE_1_PROD_3 (strm) = let + val (PLUS_RES, PLUS_SPAN, strm') = matchPLUS(strm) + val FULL_SPAN = (#1(PLUS_SPAN), #2(PLUS_SPAN)) + in + (UserCode.post_re_PROD_1_SUBRULE_1_PROD_3_ACT (env_RES, PLUS_RES, prim_re_RES, PLUS_SPAN : (Lex.pos * Lex.pos), prim_re_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs_REFC), + FULL_SPAN, strm') + end + fun post_re_PROD_1_SUBRULE_1_PROD_4 (strm) = let + val (LCB_RES, LCB_SPAN, strm') = matchLCB(strm) + val (INT_RES, INT_SPAN, strm') = matchINT(strm') + val (RCB_RES, RCB_SPAN, strm') = matchRCB(strm') + val FULL_SPAN = (#1(LCB_SPAN), #2(RCB_SPAN)) + in + (UserCode.post_re_PROD_1_SUBRULE_1_PROD_4_ACT (INT_RES, LCB_RES, RCB_RES, env_RES, prim_re_RES, INT_SPAN : (Lex.pos * Lex.pos), LCB_SPAN : (Lex.pos * Lex.pos), RCB_SPAN : (Lex.pos * Lex.pos), prim_re_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs_REFC), + FULL_SPAN, strm') + end + fun post_re_PROD_1_SUBRULE_1_PROD_5 (strm) = let + val (LCB_RES, LCB_SPAN, strm') = matchLCB(strm) + val (INT1_RES, INT1_SPAN, strm') = matchINT(strm') + val (COMMA_RES, COMMA_SPAN, strm') = matchCOMMA(strm') + val (INT2_RES, INT2_SPAN, strm') = matchINT(strm') + val (RCB_RES, RCB_SPAN, strm') = matchRCB(strm') + val FULL_SPAN = (#1(LCB_SPAN), #2(RCB_SPAN)) + in + (UserCode.post_re_PROD_1_SUBRULE_1_PROD_5_ACT (LCB_RES, RCB_RES, env_RES, INT1_RES, INT2_RES, prim_re_RES, COMMA_RES, LCB_SPAN : (Lex.pos * Lex.pos), RCB_SPAN : (Lex.pos * Lex.pos), INT1_SPAN : (Lex.pos * Lex.pos), INT2_SPAN : (Lex.pos * Lex.pos), prim_re_SPAN : (Lex.pos * Lex.pos), COMMA_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs_REFC), + FULL_SPAN, strm') + end + fun post_re_PROD_1_SUBRULE_1_PROD_6 (strm) = let + val FULL_SPAN = (Err.getPos(strm), Err.getPos(strm)) + in + (UserCode.post_re_PROD_1_SUBRULE_1_PROD_6_ACT (env_RES, prim_re_RES, prim_re_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs_REFC), + FULL_SPAN, strm) + end + in + (case (lex(strm)) + of (Tok.BAR, _, strm') => post_re_PROD_1_SUBRULE_1_PROD_6(strm) + | (Tok.AMP, _, strm') => post_re_PROD_1_SUBRULE_1_PROD_6(strm) + | (Tok.DOT, _, strm') => post_re_PROD_1_SUBRULE_1_PROD_6(strm) + | (Tok.SEMI, _, strm') => post_re_PROD_1_SUBRULE_1_PROD_6(strm) + | (Tok.LP, _, strm') => post_re_PROD_1_SUBRULE_1_PROD_6(strm) + | (Tok.RP, _, strm') => post_re_PROD_1_SUBRULE_1_PROD_6(strm) + | (Tok.LSB, _, strm') => post_re_PROD_1_SUBRULE_1_PROD_6(strm) + | (Tok.NEG, _, strm') => post_re_PROD_1_SUBRULE_1_PROD_6(strm) + | (Tok.DARROW, _, strm') => + post_re_PROD_1_SUBRULE_1_PROD_6(strm) + | (Tok.CHAR(_), _, strm') => + post_re_PROD_1_SUBRULE_1_PROD_6(strm) + | (Tok.UCHAR(_), _, strm') => + post_re_PROD_1_SUBRULE_1_PROD_6(strm) + | (Tok.LCB, _, strm') => + (case (lex(strm')) + of (Tok.ID(_), _, strm') => + post_re_PROD_1_SUBRULE_1_PROD_6(strm) + | (Tok.INT(_), _, strm') => + (case (lex(strm')) + of (Tok.COMMA, _, strm') => + post_re_PROD_1_SUBRULE_1_PROD_5(strm) + | (Tok.RCB, _, strm') => + post_re_PROD_1_SUBRULE_1_PROD_4(strm) + | _ => fail() + (* end case *)) + | _ => fail() + (* end case *)) + | (Tok.STAR, _, strm') => post_re_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.QUERY, _, strm') => + post_re_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.PLUS, _, strm') => post_re_PROD_1_SUBRULE_1_PROD_3(strm) + | _ => fail() + (* end case *)) + end + in + post_re_PROD_1_SUBRULE_1_NT(strm') + end + val FULL_SPAN = (#1(prim_re_SPAN), #2(SR_SPAN)) + in + (UserCode.post_re_PROD_1_ACT (SR_RES, env_RES, prim_re_RES, SR_SPAN : (Lex.pos * Lex.pos), prim_re_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs_REFC), + FULL_SPAN, strm') + end +and prim_re_NT (env_RES) (strm) = let + fun prim_re_PROD_1 (strm) = let + val (LCB_RES, LCB_SPAN, strm') = matchLCB(strm) + val (ID_RES, ID_SPAN, strm') = matchID(strm') + val (RCB_RES, RCB_SPAN, strm') = matchRCB(strm') + val FULL_SPAN = (#1(LCB_SPAN), #2(RCB_SPAN)) + in + (UserCode.prim_re_PROD_1_ACT (ID_RES, LCB_RES, RCB_RES, env_RES, ID_SPAN : (Lex.pos * Lex.pos), LCB_SPAN : (Lex.pos * Lex.pos), RCB_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs_REFC), + FULL_SPAN, strm') + end + fun prim_re_PROD_2 (strm) = let + val (LP_RES, LP_SPAN, strm') = matchLP(strm) + val (re_RES, re_SPAN, strm') = (re_NT (UserCode.ARGS_51 (LP_RES, env_RES, errs_REFC)))(strm') + val (RP_RES, RP_SPAN, strm') = matchRP(strm') + val FULL_SPAN = (#1(LP_SPAN), #2(RP_SPAN)) + in + ((re_RES), FULL_SPAN, strm') + end + fun prim_re_PROD_3 (strm) = let + val (char_RES, char_SPAN, strm') = char_NT(strm) + val FULL_SPAN = (#1(char_SPAN), #2(char_SPAN)) + in + (UserCode.prim_re_PROD_3_ACT (env_RES, char_RES, char_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs_REFC), + FULL_SPAN, strm') + end + fun prim_re_PROD_4 (strm) = let + val (DOT_RES, DOT_SPAN, strm') = matchDOT(strm) + val FULL_SPAN = (#1(DOT_SPAN), #2(DOT_SPAN)) + in + (UserCode.prim_re_PROD_4_ACT (DOT_RES, env_RES, DOT_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs_REFC), + FULL_SPAN, strm') + end + fun prim_re_PROD_5 (strm) = let + val (LSB_RES, LSB_SPAN, strm') = matchLSB(strm) + val (SR1_RES, SR1_SPAN, strm') = let + fun prim_re_PROD_5_SUBRULE_1_NT (strm) = let + fun prim_re_PROD_5_SUBRULE_1_PROD_1 (strm) = let + val (CARAT_RES, CARAT_SPAN, strm') = matchCARAT(strm) + val FULL_SPAN = (#1(CARAT_SPAN), #2(CARAT_SPAN)) + in + (UserCode.prim_re_PROD_5_SUBRULE_1_PROD_1_ACT (LSB_RES, env_RES, CARAT_RES, LSB_SPAN : (Lex.pos * Lex.pos), CARAT_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs_REFC), + FULL_SPAN, strm') + end + fun prim_re_PROD_5_SUBRULE_1_PROD_2 (strm) = let + val (DASH_RES, DASH_SPAN, strm') = matchDASH(strm) + val FULL_SPAN = (#1(DASH_SPAN), #2(DASH_SPAN)) + in + (UserCode.prim_re_PROD_5_SUBRULE_1_PROD_2_ACT (LSB_RES, env_RES, DASH_RES, LSB_SPAN : (Lex.pos * Lex.pos), DASH_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs_REFC), + FULL_SPAN, strm') + end + fun prim_re_PROD_5_SUBRULE_1_PROD_3 (strm) = let + val FULL_SPAN = (Err.getPos(strm), Err.getPos(strm)) + in + (UserCode.prim_re_PROD_5_SUBRULE_1_PROD_3_ACT (LSB_RES, env_RES, LSB_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs_REFC), + FULL_SPAN, strm) + end + in + (case (lex(strm)) + of (Tok.RSB, _, strm') => + prim_re_PROD_5_SUBRULE_1_PROD_3(strm) + | (Tok.CHAR(_), _, strm') => + prim_re_PROD_5_SUBRULE_1_PROD_3(strm) + | (Tok.UCHAR(_), _, strm') => + prim_re_PROD_5_SUBRULE_1_PROD_3(strm) + | (Tok.CARAT, _, strm') => + prim_re_PROD_5_SUBRULE_1_PROD_1(strm) + | (Tok.DASH, _, strm') => + tryProds(strm, [prim_re_PROD_5_SUBRULE_1_PROD_2, + prim_re_PROD_5_SUBRULE_1_PROD_3]) + | _ => fail() + (* end case *)) + end + in + prim_re_PROD_5_SUBRULE_1_NT(strm') + end + fun prim_re_PROD_5_SUBRULE_2_NT (strm) = let + fun prim_re_PROD_5_SUBRULE_2_PROD_1 (strm) = let + val (char1_RES, char1_SPAN, strm') = char_NT(strm) + val (DASH_RES, DASH_SPAN, strm') = matchDASH(strm') + val (char2_RES, char2_SPAN, strm') = char_NT(strm') + val FULL_SPAN = (#1(char1_SPAN), #2(char2_SPAN)) + in + (UserCode.prim_re_PROD_5_SUBRULE_2_PROD_1_ACT (LSB_RES, SR1_RES, env_RES, DASH_RES, char1_RES, char2_RES, LSB_SPAN : (Lex.pos * Lex.pos), SR1_SPAN : (Lex.pos * Lex.pos), DASH_SPAN : (Lex.pos * Lex.pos), char1_SPAN : (Lex.pos * Lex.pos), char2_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs_REFC), + FULL_SPAN, strm') + end + fun prim_re_PROD_5_SUBRULE_2_PROD_2 (strm) = let + val (char_RES, char_SPAN, strm') = char_NT(strm) + val FULL_SPAN = (#1(char_SPAN), #2(char_SPAN)) + in + (UserCode.prim_re_PROD_5_SUBRULE_2_PROD_2_ACT (LSB_RES, SR1_RES, env_RES, char_RES, LSB_SPAN : (Lex.pos * Lex.pos), SR1_SPAN : (Lex.pos * Lex.pos), char_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs_REFC), + FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.CHAR(_), _, strm') => + (case (lex(strm')) + of (Tok.DASH, _, strm') => + (case (lex(strm')) + of (Tok.RSB, _, strm') => + prim_re_PROD_5_SUBRULE_2_PROD_2(strm) + | (Tok.CHAR(_), _, strm') => + prim_re_PROD_5_SUBRULE_2_PROD_1(strm) + | (Tok.UCHAR(_), _, strm') => + prim_re_PROD_5_SUBRULE_2_PROD_1(strm) + | _ => fail() + (* end case *)) + | (Tok.RSB, _, strm') => + prim_re_PROD_5_SUBRULE_2_PROD_2(strm) + | (Tok.CHAR(_), _, strm') => + prim_re_PROD_5_SUBRULE_2_PROD_2(strm) + | (Tok.UCHAR(_), _, strm') => + prim_re_PROD_5_SUBRULE_2_PROD_2(strm) + | _ => fail() + (* end case *)) + | (Tok.UCHAR(_), _, strm') => + (case (lex(strm')) + of (Tok.DASH, _, strm') => + (case (lex(strm')) + of (Tok.RSB, _, strm') => + prim_re_PROD_5_SUBRULE_2_PROD_2(strm) + | (Tok.CHAR(_), _, strm') => + prim_re_PROD_5_SUBRULE_2_PROD_1(strm) + | (Tok.UCHAR(_), _, strm') => + prim_re_PROD_5_SUBRULE_2_PROD_1(strm) + | _ => fail() + (* end case *)) + | (Tok.RSB, _, strm') => + prim_re_PROD_5_SUBRULE_2_PROD_2(strm) + | (Tok.CHAR(_), _, strm') => + prim_re_PROD_5_SUBRULE_2_PROD_2(strm) + | (Tok.UCHAR(_), _, strm') => + prim_re_PROD_5_SUBRULE_2_PROD_2(strm) + | _ => fail() + (* end case *)) + | _ => fail() + (* end case *)) + end + fun prim_re_PROD_5_SUBRULE_2_PRED (strm) = (case (lex(strm)) + of (Tok.CHAR(_), _, strm') => true + | (Tok.UCHAR(_), _, strm') => true + | _ => false + (* end case *)) + val (SR2_RES, SR2_SPAN, strm') = EBNF.closure(prim_re_PROD_5_SUBRULE_2_PRED, prim_re_PROD_5_SUBRULE_2_NT, strm') + val (SR3_RES, SR3_SPAN, strm') = let + fun prim_re_PROD_5_SUBRULE_3_NT (strm) = let + fun prim_re_PROD_5_SUBRULE_3_PROD_1 (strm) = let + val (DASH_RES, DASH_SPAN, strm') = matchDASH(strm) + val FULL_SPAN = (#1(DASH_SPAN), #2(DASH_SPAN)) + in + (UserCode.prim_re_PROD_5_SUBRULE_3_PROD_1_ACT (LSB_RES, SR1_RES, SR2_RES, env_RES, DASH_RES, LSB_SPAN : (Lex.pos * Lex.pos), SR1_SPAN : (Lex.pos * Lex.pos), SR2_SPAN : (Lex.pos * Lex.pos), DASH_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs_REFC), + FULL_SPAN, strm') + end + fun prim_re_PROD_5_SUBRULE_3_PROD_2 (strm) = let + val FULL_SPAN = (Err.getPos(strm), Err.getPos(strm)) + in + (UserCode.prim_re_PROD_5_SUBRULE_3_PROD_2_ACT (LSB_RES, SR1_RES, SR2_RES, env_RES, LSB_SPAN : (Lex.pos * Lex.pos), SR1_SPAN : (Lex.pos * Lex.pos), SR2_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs_REFC), + FULL_SPAN, strm) + end + in + (case (lex(strm)) + of (Tok.RSB, _, strm') => + prim_re_PROD_5_SUBRULE_3_PROD_2(strm) + | (Tok.DASH, _, strm') => + prim_re_PROD_5_SUBRULE_3_PROD_1(strm) + | _ => fail() + (* end case *)) + end + in + prim_re_PROD_5_SUBRULE_3_NT(strm') + end + val (RSB_RES, RSB_SPAN, strm') = matchRSB(strm') + val FULL_SPAN = (#1(LSB_SPAN), #2(RSB_SPAN)) + in + (UserCode.prim_re_PROD_5_ACT (LSB_RES, RSB_RES, SR1_RES, SR2_RES, SR3_RES, env_RES, LSB_SPAN : (Lex.pos * Lex.pos), RSB_SPAN : (Lex.pos * Lex.pos), SR1_SPAN : (Lex.pos * Lex.pos), SR2_SPAN : (Lex.pos * Lex.pos), SR3_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs_REFC), + FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.LSB, _, strm') => prim_re_PROD_5(strm) + | (Tok.CHAR(_), _, strm') => prim_re_PROD_3(strm) + | (Tok.UCHAR(_), _, strm') => prim_re_PROD_3(strm) + | (Tok.LCB, _, strm') => prim_re_PROD_1(strm) + | (Tok.LP, _, strm') => prim_re_PROD_2(strm) + | (Tok.DOT, _, strm') => prim_re_PROD_4(strm) + | _ => fail() + (* end case *)) + end +fun directive_NT (conf_RES, env_RES) (strm) = let + fun directive_PROD_1 (strm) = let + val (KW_let_RES, KW_let_SPAN, strm') = matchKW_let(strm) + val (ID_RES, ID_SPAN, strm') = matchID(strm') + val (EQ_RES, EQ_SPAN, strm') = matchEQ(strm') + val (re_RES, re_SPAN, strm') = (re_NT (UserCode.ARGS_20 (EQ_RES, ID_RES, env_RES, conf_RES, KW_let_RES, errs_REFC)))(strm') + val FULL_SPAN = (#1(KW_let_SPAN), #2(re_SPAN)) + in + (UserCode.directive_PROD_1_ACT (EQ_RES, ID_RES, re_RES, env_RES, conf_RES, KW_let_RES, EQ_SPAN : (Lex.pos * Lex.pos), ID_SPAN : (Lex.pos * Lex.pos), re_SPAN : (Lex.pos * Lex.pos), KW_let_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs_REFC), + FULL_SPAN, strm') + end + fun directive_PROD_2 (strm) = let + val (KW_arg_RES, KW_arg_SPAN, strm') = matchKW_arg(strm) + val (CODE_RES, CODE_SPAN, strm') = matchCODE(strm') + val FULL_SPAN = (#1(KW_arg_SPAN), #2(CODE_SPAN)) + in + (UserCode.directive_PROD_2_ACT (env_RES, CODE_RES, conf_RES, KW_arg_RES, CODE_SPAN : (Lex.pos * Lex.pos), KW_arg_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs_REFC), + FULL_SPAN, strm') + end + fun directive_PROD_3 (strm) = let + val (KW_states_RES, KW_states_SPAN, strm') = matchKW_states(strm) + fun directive_PROD_3_SUBRULE_1_NT (strm) = let + val (ID_RES, ID_SPAN, strm') = matchID(strm) + fun directive_PROD_3_SUBRULE_1_PROD_1_SUBRULE_1_NT (strm) = let + val (COMMA_RES, COMMA_SPAN, strm') = matchCOMMA(strm) + val FULL_SPAN = (#1(COMMA_SPAN), #2(COMMA_SPAN)) + in + ((), FULL_SPAN, strm') + end + fun directive_PROD_3_SUBRULE_1_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMA, _, strm') => true + | _ => false + (* end case *)) + val (COMMA_RES, COMMA_SPAN, strm') = EBNF.optional(directive_PROD_3_SUBRULE_1_PROD_1_SUBRULE_1_PRED, directive_PROD_3_SUBRULE_1_PROD_1_SUBRULE_1_NT, strm') + val FULL_SPAN = (#1(ID_SPAN), #2(COMMA_SPAN)) + in + (UserCode.directive_PROD_3_SUBRULE_1_PROD_1_ACT (ID_RES, env_RES, conf_RES, COMMA_RES, KW_states_RES, ID_SPAN : (Lex.pos * Lex.pos), COMMA_SPAN : (Lex.pos * Lex.pos), KW_states_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs_REFC), + FULL_SPAN, strm') + end + fun directive_PROD_3_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.ID(_), _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.posclos(directive_PROD_3_SUBRULE_1_PRED, directive_PROD_3_SUBRULE_1_NT, strm') + val FULL_SPAN = (#1(KW_states_SPAN), #2(SR_SPAN)) + in + (UserCode.directive_PROD_3_ACT (SR_RES, env_RES, conf_RES, KW_states_RES, SR_SPAN : (Lex.pos * Lex.pos), KW_states_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs_REFC), + FULL_SPAN, strm') + end + fun directive_PROD_4 (strm) = let + val (KW_charset_RES, KW_charset_SPAN, strm') = matchKW_charset(strm) + val (SR_RES, SR_SPAN, strm') = let + fun directive_PROD_4_SUBRULE_1_NT (strm) = let + fun directive_PROD_4_SUBRULE_1_PROD_1 (strm) = let + val (UTF8_RES, UTF8_SPAN, strm') = matchUTF8(strm) + val FULL_SPAN = (#1(UTF8_SPAN), #2(UTF8_SPAN)) + in + (UserCode.directive_PROD_4_SUBRULE_1_PROD_1_ACT (env_RES, UTF8_RES, conf_RES, KW_charset_RES, UTF8_SPAN : (Lex.pos * Lex.pos), KW_charset_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs_REFC), + FULL_SPAN, strm') + end + fun directive_PROD_4_SUBRULE_1_PROD_2 (strm) = let + val (ASCII7_RES, ASCII7_SPAN, strm') = matchASCII7(strm) + val FULL_SPAN = (#1(ASCII7_SPAN), #2(ASCII7_SPAN)) + in + (UserCode.directive_PROD_4_SUBRULE_1_PROD_2_ACT (env_RES, conf_RES, ASCII7_RES, KW_charset_RES, ASCII7_SPAN : (Lex.pos * Lex.pos), KW_charset_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs_REFC), + FULL_SPAN, strm') + end + fun directive_PROD_4_SUBRULE_1_PROD_3 (strm) = let + val (ASCII8_RES, ASCII8_SPAN, strm') = matchASCII8(strm) + val FULL_SPAN = (#1(ASCII8_SPAN), #2(ASCII8_SPAN)) + in + (UserCode.directive_PROD_4_SUBRULE_1_PROD_3_ACT (env_RES, conf_RES, ASCII8_RES, KW_charset_RES, ASCII8_SPAN : (Lex.pos * Lex.pos), KW_charset_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs_REFC), + FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.ASCII8, _, strm') => + directive_PROD_4_SUBRULE_1_PROD_3(strm) + | (Tok.UTF8, _, strm') => + directive_PROD_4_SUBRULE_1_PROD_1(strm) + | (Tok.ASCII7, _, strm') => + directive_PROD_4_SUBRULE_1_PROD_2(strm) + | _ => fail() + (* end case *)) + end + in + directive_PROD_4_SUBRULE_1_NT(strm') + end + val FULL_SPAN = (#1(KW_charset_SPAN), #2(SR_SPAN)) + in + ((SR_RES), FULL_SPAN, strm') + end + fun directive_PROD_5 (strm) = let + val (KW_name_RES, KW_name_SPAN, strm') = matchKW_name(strm) + val (ID_RES, ID_SPAN, strm') = matchID(strm') + val FULL_SPAN = (#1(KW_name_SPAN), #2(ID_SPAN)) + in + (UserCode.directive_PROD_5_ACT (ID_RES, env_RES, conf_RES, KW_name_RES, ID_SPAN : (Lex.pos * Lex.pos), KW_name_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs_REFC), + FULL_SPAN, strm') + end + fun directive_PROD_6 (strm) = let + val (KW_header_RES, KW_header_SPAN, strm') = matchKW_header(strm) + val (CODE_RES, CODE_SPAN, strm') = matchCODE(strm') + val FULL_SPAN = (#1(KW_header_SPAN), #2(CODE_SPAN)) + in + (UserCode.directive_PROD_6_ACT (env_RES, CODE_RES, conf_RES, KW_header_RES, CODE_SPAN : (Lex.pos * Lex.pos), KW_header_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs_REFC), + FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.KW_header, _, strm') => directive_PROD_6(strm) + | (Tok.KW_charset, _, strm') => directive_PROD_4(strm) + | (Tok.KW_arg, _, strm') => directive_PROD_2(strm) + | (Tok.KW_let, _, strm') => directive_PROD_1(strm) + | (Tok.KW_states, _, strm') => directive_PROD_3(strm) + | (Tok.KW_name, _, strm') => directive_PROD_5(strm) + | _ => fail() + (* end case *)) + end +fun decl_NT (spec_RES, env_RES) (strm) = let + fun decl_PROD_1 (strm) = let + val (directive_RES, directive_SPAN, strm') = (directive_NT (UserCode.ARGS_10 (env_RES, spec_RES, errs_REFC)))(strm) + val FULL_SPAN = (#1(directive_SPAN), #2(directive_SPAN)) + in + (UserCode.decl_PROD_1_ACT (env_RES, spec_RES, directive_RES, directive_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs_REFC), + FULL_SPAN, strm') + end + fun decl_PROD_2 (strm) = let + val (KW_defs_RES, KW_defs_SPAN, strm') = matchKW_defs(strm) + val (CODE_RES, CODE_SPAN, strm') = matchCODE(strm') + val FULL_SPAN = (#1(KW_defs_SPAN), #2(CODE_SPAN)) + in + (UserCode.decl_PROD_2_ACT (env_RES, CODE_RES, spec_RES, KW_defs_RES, CODE_SPAN : (Lex.pos * Lex.pos), KW_defs_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs_REFC), + FULL_SPAN, strm') + end + fun decl_PROD_3 (strm) = let + fun decl_PROD_3_SUBRULE_1_NT (strm) = let + val (LT_RES, LT_SPAN, strm') = matchLT(strm) + fun decl_PROD_3_SUBRULE_1_PROD_1_SUBRULE_1_NT (strm) = let + val (ID_RES, ID_SPAN, strm') = matchID(strm) + fun decl_PROD_3_SUBRULE_1_PROD_1_SUBRULE_1_PROD_1_SUBRULE_1_NT (strm) = let + val (COMMA_RES, COMMA_SPAN, strm') = matchCOMMA(strm) + val FULL_SPAN = (#1(COMMA_SPAN), #2(COMMA_SPAN)) + in + ((), FULL_SPAN, strm') + end + fun decl_PROD_3_SUBRULE_1_PROD_1_SUBRULE_1_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.COMMA, _, strm') => true + | _ => false + (* end case *)) + val (COMMA_RES, COMMA_SPAN, strm') = EBNF.optional(decl_PROD_3_SUBRULE_1_PROD_1_SUBRULE_1_PROD_1_SUBRULE_1_PRED, decl_PROD_3_SUBRULE_1_PROD_1_SUBRULE_1_PROD_1_SUBRULE_1_NT, strm') + val FULL_SPAN = (#1(ID_SPAN), #2(COMMA_SPAN)) + in + (UserCode.decl_PROD_3_SUBRULE_1_PROD_1_SUBRULE_1_PROD_1_ACT (ID_RES, LT_RES, env_RES, spec_RES, COMMA_RES, ID_SPAN : (Lex.pos * Lex.pos), LT_SPAN : (Lex.pos * Lex.pos), COMMA_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs_REFC), + FULL_SPAN, strm') + end + fun decl_PROD_3_SUBRULE_1_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.ID(_), _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.posclos(decl_PROD_3_SUBRULE_1_PROD_1_SUBRULE_1_PRED, decl_PROD_3_SUBRULE_1_PROD_1_SUBRULE_1_NT, strm') + val (GT_RES, GT_SPAN, strm') = matchGT(strm') + val FULL_SPAN = (#1(LT_SPAN), #2(GT_SPAN)) + in + ((SR_RES), FULL_SPAN, strm') + end + fun decl_PROD_3_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.LT, _, strm') => true + | _ => false + (* end case *)) + val (SS_RES, SS_SPAN, strm') = EBNF.optional(decl_PROD_3_SUBRULE_1_PRED, decl_PROD_3_SUBRULE_1_NT, strm) + val (main_RES, main_SPAN, strm') = let + fun decl_PROD_3_SUBRULE_2_NT (strm) = let + fun decl_PROD_3_SUBRULE_2_PROD_1 (strm) = let + val (addNewlCheck_RES, addNewlCheck_SPAN, strm') = let + fun decl_PROD_3_SUBRULE_2_PROD_1_SUBRULE_1_NT (strm) = let + fun decl_PROD_3_SUBRULE_2_PROD_1_SUBRULE_1_PROD_1 (strm) = let + val FULL_SPAN = (Err.getPos(strm), + Err.getPos(strm)) + in + (UserCode.decl_PROD_3_SUBRULE_2_PROD_1_SUBRULE_1_PROD_1_ACT (SS_RES, env_RES, spec_RES, SS_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs_REFC), + FULL_SPAN, strm) + end + fun decl_PROD_3_SUBRULE_2_PROD_1_SUBRULE_1_PROD_2 (strm) = let + val (CARAT_RES, CARAT_SPAN, strm') = matchCARAT(strm) + val FULL_SPAN = (#1(CARAT_SPAN), + #2(CARAT_SPAN)) + in + (UserCode.decl_PROD_3_SUBRULE_2_PROD_1_SUBRULE_1_PROD_2_ACT (SS_RES, env_RES, spec_RES, CARAT_RES, SS_SPAN : (Lex.pos * Lex.pos), CARAT_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs_REFC), + FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.CARAT, _, strm') => + decl_PROD_3_SUBRULE_2_PROD_1_SUBRULE_1_PROD_2(strm) + | (Tok.DOT, _, strm') => + decl_PROD_3_SUBRULE_2_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.LP, _, strm') => + decl_PROD_3_SUBRULE_2_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.LSB, _, strm') => + decl_PROD_3_SUBRULE_2_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.LCB, _, strm') => + decl_PROD_3_SUBRULE_2_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.NEG, _, strm') => + decl_PROD_3_SUBRULE_2_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.CHAR(_), _, strm') => + decl_PROD_3_SUBRULE_2_PROD_1_SUBRULE_1_PROD_1(strm) + | (Tok.UCHAR(_), _, strm') => + decl_PROD_3_SUBRULE_2_PROD_1_SUBRULE_1_PROD_1(strm) + | _ => fail() + (* end case *)) + end + in + decl_PROD_3_SUBRULE_2_PROD_1_SUBRULE_1_NT(strm) + end + val (re_RES, re_SPAN, strm') = (re_NT (UserCode.ARGS_17 (SS_RES, env_RES, spec_RES, addNewlCheck_RES, errs_REFC)))(strm') + val (DARROW_RES, DARROW_SPAN, strm') = matchDARROW(strm') + val (CODE_RES, CODE_SPAN, strm') = matchCODE(strm') + val FULL_SPAN = (#1(addNewlCheck_SPAN), #2(CODE_SPAN)) + in + (UserCode.decl_PROD_3_SUBRULE_2_PROD_1_ACT (SS_RES, re_RES, env_RES, CODE_RES, spec_RES, addNewlCheck_RES, DARROW_RES, SS_SPAN : (Lex.pos * Lex.pos), re_SPAN : (Lex.pos * Lex.pos), CODE_SPAN : (Lex.pos * Lex.pos), addNewlCheck_SPAN : (Lex.pos * Lex.pos), DARROW_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs_REFC), + FULL_SPAN, strm') + end + fun decl_PROD_3_SUBRULE_2_PROD_2 (strm) = let + val (EOFMARK_RES, EOFMARK_SPAN, strm') = matchEOFMARK(strm) + val (DARROW_RES, DARROW_SPAN, strm') = matchDARROW(strm') + val (CODE_RES, CODE_SPAN, strm') = matchCODE(strm') + val FULL_SPAN = (#1(EOFMARK_SPAN), #2(CODE_SPAN)) + in + (UserCode.decl_PROD_3_SUBRULE_2_PROD_2_ACT (SS_RES, env_RES, CODE_RES, spec_RES, EOFMARK_RES, DARROW_RES, SS_SPAN : (Lex.pos * Lex.pos), CODE_SPAN : (Lex.pos * Lex.pos), EOFMARK_SPAN : (Lex.pos * Lex.pos), DARROW_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs_REFC), + FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.EOFMARK, _, strm') => + decl_PROD_3_SUBRULE_2_PROD_2(strm) + | (Tok.DOT, _, strm') => + decl_PROD_3_SUBRULE_2_PROD_1(strm) + | (Tok.LP, _, strm') => + decl_PROD_3_SUBRULE_2_PROD_1(strm) + | (Tok.LSB, _, strm') => + decl_PROD_3_SUBRULE_2_PROD_1(strm) + | (Tok.LCB, _, strm') => + decl_PROD_3_SUBRULE_2_PROD_1(strm) + | (Tok.CARAT, _, strm') => + decl_PROD_3_SUBRULE_2_PROD_1(strm) + | (Tok.NEG, _, strm') => + decl_PROD_3_SUBRULE_2_PROD_1(strm) + | (Tok.CHAR(_), _, strm') => + decl_PROD_3_SUBRULE_2_PROD_1(strm) + | (Tok.UCHAR(_), _, strm') => + decl_PROD_3_SUBRULE_2_PROD_1(strm) + | _ => fail() + (* end case *)) + end + in + decl_PROD_3_SUBRULE_2_NT(strm') + end + val FULL_SPAN = (#1(SS_SPAN), #2(main_SPAN)) + in + (UserCode.decl_PROD_3_ACT (SS_RES, env_RES, main_RES, spec_RES, SS_SPAN : (Lex.pos * Lex.pos), main_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs_REFC), + FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.DOT, _, strm') => decl_PROD_3(strm) + | (Tok.LP, _, strm') => decl_PROD_3(strm) + | (Tok.LSB, _, strm') => decl_PROD_3(strm) + | (Tok.LCB, _, strm') => decl_PROD_3(strm) + | (Tok.LT, _, strm') => decl_PROD_3(strm) + | (Tok.CARAT, _, strm') => decl_PROD_3(strm) + | (Tok.NEG, _, strm') => decl_PROD_3(strm) + | (Tok.EOFMARK, _, strm') => decl_PROD_3(strm) + | (Tok.CHAR(_), _, strm') => decl_PROD_3(strm) + | (Tok.UCHAR(_), _, strm') => decl_PROD_3(strm) + | (Tok.KW_arg, _, strm') => decl_PROD_1(strm) + | (Tok.KW_header, _, strm') => decl_PROD_1(strm) + | (Tok.KW_name, _, strm') => decl_PROD_1(strm) + | (Tok.KW_states, _, strm') => decl_PROD_1(strm) + | (Tok.KW_let, _, strm') => decl_PROD_1(strm) + | (Tok.KW_charset, _, strm') => decl_PROD_1(strm) + | (Tok.KW_defs, _, strm') => decl_PROD_2(strm) + | _ => fail() + (* end case *)) + end +fun decls_NT (spec_RES, env_RES) (strm) = let + fun decls_PROD_1 (strm) = let + val (decl_RES, decl_SPAN, strm') = (decl_NT (UserCode.ARGS_6 (env_RES, spec_RES, errs_REFC)))(strm) + val (SEMI_RES, SEMI_SPAN, strm') = matchSEMI(strm') + val (decls_RES, decls_SPAN, strm') = (decls_NT (UserCode.ARGS_7 (env_RES, SEMI_RES, decl_RES, spec_RES, errs_REFC)))(strm') + val FULL_SPAN = (#1(decl_SPAN), #2(decls_SPAN)) + in + (UserCode.decls_PROD_1_ACT (env_RES, SEMI_RES, decl_RES, spec_RES, decls_RES, SEMI_SPAN : (Lex.pos * Lex.pos), decl_SPAN : (Lex.pos * Lex.pos), decls_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos), errs_REFC), + FULL_SPAN, strm') + end + fun decls_PROD_2 (strm) = let + val FULL_SPAN = (Err.getPos(strm), Err.getPos(strm)) + in + (UserCode.decls_PROD_2_ACT (env_RES, spec_RES, FULL_SPAN : (Lex.pos * Lex.pos), errs_REFC), + FULL_SPAN, strm) + end + in + (case (lex(strm)) + of (Tok.EOF, _, strm') => decls_PROD_2(strm) + | (Tok.DOT, _, strm') => decls_PROD_1(strm) + | (Tok.LP, _, strm') => decls_PROD_1(strm) + | (Tok.LSB, _, strm') => decls_PROD_1(strm) + | (Tok.LCB, _, strm') => decls_PROD_1(strm) + | (Tok.LT, _, strm') => decls_PROD_1(strm) + | (Tok.CARAT, _, strm') => decls_PROD_1(strm) + | (Tok.NEG, _, strm') => decls_PROD_1(strm) + | (Tok.KW_defs, _, strm') => decls_PROD_1(strm) + | (Tok.KW_arg, _, strm') => decls_PROD_1(strm) + | (Tok.KW_header, _, strm') => decls_PROD_1(strm) + | (Tok.KW_name, _, strm') => decls_PROD_1(strm) + | (Tok.KW_states, _, strm') => decls_PROD_1(strm) + | (Tok.KW_let, _, strm') => decls_PROD_1(strm) + | (Tok.KW_charset, _, strm') => decls_PROD_1(strm) + | (Tok.EOFMARK, _, strm') => decls_PROD_1(strm) + | (Tok.CHAR(_), _, strm') => decls_PROD_1(strm) + | (Tok.UCHAR(_), _, strm') => decls_PROD_1(strm) + | _ => fail() + (* end case *)) + end +fun file_NT (strm) = let + val (decls_RES, decls_SPAN, strm') = (decls_NT (UserCode.ARGS_4 (errs_REFC)))(strm) + val FULL_SPAN = (#1(decls_SPAN), #2(decls_SPAN)) + in + ((decls_RES), FULL_SPAN, strm') + end +in + (file_NT) +end +val file_NT = fn s => unwrap (Err.launch (eh, lexFn, file_NT , true) s) + +in (file_NT) end + in +fun parse lexFn s = let val (file_NT) = mk lexFn in file_NT s end + + end + +end diff --git a/ml-lpt/ml-ulex/FrontEnds/ml-ulex/ml-ulex.lex b/ml-lpt/ml-ulex/FrontEnds/ml-ulex/ml-ulex.lex new file mode 100644 index 0000000..bec64ee --- /dev/null +++ b/ml-lpt/ml-ulex/FrontEnds/ml-ulex/ml-ulex.lex @@ -0,0 +1,202 @@ +(* ml-ulex.lex + * + * COPYRIGHT (c) 2006 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (http://www.cs.uchicago.edu/~adrassi) + * All rights reserved. + * + * (With some code borrowed from ml-yacc) + *) + +%defs ( + structure Tok = MLULexTokens + + val comLvl : int ref = ref 0 (* nesting depth of comments *) + val comStart : int ref = ref 0 (* start line of current comment *) + + type lex_result = Tok.token + + val text : string list ref = ref [] + fun addText s = (text := s::(!text)) + fun clrText () = (text := []) + fun getText () = concat (rev (!text)) + + val pcount = ref 0 + fun inc (ri as ref i) = (ri := i+1) + fun dec (ri as ref i) = (ri := i-1) + + fun hexDigit x = + if #"a" <= x andalso x <= #"f" then + Char.ord x - Char.ord #"a" + 10 + else if #"A" <=x andalso x <= #"F" then + Char.ord x - Char.ord #"A" + 10 + else Char.ord x - Char.ord #"0" + + fun hexVal (ss) : UTF8.wchar = + Substring.foldl + (fn (dig, acc) => (Word.fromInt o hexDigit) dig + 0w16 * acc) + 0w0 ss + + fun mkUChar yyunicode = Tok.UCHAR (hd yyunicode) + + fun unclosedErr (sm, pos) = + TextIO.output (TextIO.stdErr, String.concat[ + " ", AntlrStreamPos.toString sm pos, " Syntax error: unclosed string\n" + ]) +); + +%let eol=("\n"|"\013\n"|"\013"); +%let ws=("\009"|"\011"|"\012"|" "|{eol}); +(* +%let eol="\n"; +%let ws=("\t"|" "|{eol}); +*) +%let lc=[a-z]; +%let uc=[A-Z]; +%let alpha=({lc}|{uc}); +%let digit=[0-9]; +%let int={digit}+; +%let idchars=({alpha}|{digit}|"_"); +%let id={alpha}{idchars}*; +%let qualid ={id}"."; +%let tyvar="'"{idchars}*; + +%states STRING COM CODE CHARCLASS DIRECTIVE CHARSET RESTRING CURLY ; + +%name MLULexLex; +(* %charset utf8; *) + +{ws}+ + => (skip()); +{eol} + => (skip()); + +<> => (Tok.EOF); +<> => (unclosedErr (yysm, yypos); Tok.EOF); +<> => (unclosedErr (yysm, yypos); YYBEGIN INITIAL; Tok.EOF); + +"%defs" => (YYBEGIN CODE; clrText(); Tok.KW_defs); +"%arg" => (YYBEGIN CODE; clrText(); Tok.KW_arg); +"%header" => (YYBEGIN CODE; clrText(); Tok.KW_header); +"%name" => (YYBEGIN DIRECTIVE; Tok.KW_name); +"%states" => (YYBEGIN DIRECTIVE; Tok.KW_states); +"%let" => (YYBEGIN DIRECTIVE; Tok.KW_let); +"%charset" => (YYBEGIN CHARSET; Tok.KW_charset); +"<>" => (Tok.EOFMARK); + +{id} => (Tok.ID yytext); +"," => (Tok.COMMA); +";" => (YYBEGIN INITIAL; Tok.SEMI); +"=" => (YYBEGIN INITIAL; Tok.EQ); +">" => (YYBEGIN INITIAL; Tok.GT); +"(*" => (comLvl := 1; comStart := !yylineno; YYBEGIN COM; + ignore(continue() before YYBEGIN DIRECTIVE); + continue()); +. => (YYBEGIN INITIAL; REJECT()); + +"utf8" | "UTF8" => (YYBEGIN INITIAL; Tok.UTF8); +"ascii7" | "ASCII7" => (YYBEGIN INITIAL; Tok.ASCII7); +"ascii8" | "ASCII8" => (YYBEGIN INITIAL; Tok.ASCII8); +";" => (YYBEGIN INITIAL; Tok.SEMI); +. => (YYBEGIN INITIAL; REJECT()); + +"|" => (Tok.BAR); +"." => (Tok.DOT); +"$" => (Tok.DOLLAR); +"+" => (Tok.PLUS); +"&" => (Tok.AMP); +"*" => (Tok.STAR); +"?" => (Tok.QUERY); +"~" => (Tok.NEG); +";" => (Tok.SEMI); +"(" => (Tok.LP); +")" => (Tok.RP); +"[" => (YYBEGIN CHARCLASS; Tok.LSB); +"]" => (Tok.RSB); + +"{" => (YYBEGIN CURLY; Tok.LCB); +"}" => (YYBEGIN INITIAL; Tok.RCB); +{id} => (Tok.ID yytext); +{int} => (Tok.INT (valOf (Int.fromString yytext))); +"," => (Tok.COMMA); + +"<" => (YYBEGIN DIRECTIVE; Tok.LT); +">" => (Tok.GT); +"," => (Tok.COMMA); +"/" => (Tok.SLASH); +"=" => (Tok.EQ); +"=>" => (YYBEGIN CODE; clrText(); Tok.DARROW); +"\"" => (YYBEGIN RESTRING; continue()); + +"^" => (Tok.CARAT); +"-" => (Tok.DASH); +"\\" ([A-Za-z] | [0-9]{3} | "\\" | "\"") + => (let val c = Char.fromString yytext + in case c + of SOME c' => Tok.CHAR c' + | NONE => (print (concat [ + Int.toString (!yylineno), ".", + Int.toString (!yycolno), ": unknown escape sequence '", + yytext, "'\n"]); + continue()) + end); + + ("\\u" ([A-Za-z] | [0-9]){4}) | + ("\\U" ([A-Za-z] | [0-9]){8}) + => (Tok.UCHAR (hexVal (Substring.triml 2 yysubstr))); +"\\" [^A-Za-z] + => (Tok.CHAR (String.sub (yytext, 1))); + +"]" => (YYBEGIN INITIAL; Tok.RSB); +[^\n\\] => (mkUChar yyunicode); + +"(*" + => (comLvl := 1; comStart := !yylineno; YYBEGIN COM; + ignore(continue() before YYBEGIN INITIAL); + continue()); +"(*" + => (comLvl := 1; comStart := !yylineno; YYBEGIN COM; + addText yytext; + ignore(continue() before YYBEGIN CODE); + continue()); + +"(*" + => (addText yytext; comLvl := !comLvl+1; continue()); +"*)" + => (addText yytext; comLvl := !comLvl-1; + if (!comLvl = 0) + then (Tok.BOGUS) + else continue()); +.|{eol} + => (addText yytext; continue()); + +"(" => (if !pcount = 0 then () else addText yytext; + inc pcount; continue()); +")" => (dec pcount; + if !pcount = 0 then + (YYBEGIN INITIAL; Tok.CODE (getText())) + else (addText yytext; continue())); +"\"" => (addText yytext; YYBEGIN STRING; + ignore(continue() before YYBEGIN CODE); + addText "\""; continue()); +[^()"]+ => (addText yytext; continue()); + +"\"" => (Tok.BOGUS); +{eol} => (addText yytext; unclosedErr (yysm, yypos); Tok.BOGUS); +"\\" => (addText yytext; continue()); +"\\\\" => (addText yytext; continue()); +"\\\"" => (addText yytext; continue()); +[^"\\\n\013]+ + => (addText yytext; continue()); + +"\"" => (YYBEGIN INITIAL; continue()); +{eol} => (unclosedErr (yysm, yypos); YYBEGIN INITIAL; continue()); +. => (mkUChar yyunicode); + +[^\n{};] + => (mkUChar yyunicode); +. => (print (concat[Int.toString (!yylineno), ".", + Int.toString (!yycolno), + ": illegal character '", + String.toCString yytext, "'\n"]); + continue()); \ No newline at end of file diff --git a/ml-lpt/ml-ulex/FrontEnds/ml-ulex/ml-ulex.lex.sml b/ml-lpt/ml-ulex/FrontEnds/ml-ulex/ml-ulex.lex.sml new file mode 100644 index 0000000..c572550 --- /dev/null +++ b/ml-lpt/ml-ulex/FrontEnds/ml-ulex/ml-ulex.lex.sml @@ -0,0 +1,704 @@ +structure MLULexLex = struct + + datatype yystart_state = +COM | CODE | STRING | CHARSET | CHARCLASS | CURLY | RESTRING | INITIAL | DIRECTIVE + local + + structure UserDeclarations = + struct + + + structure Tok = MLULexTokens + + val comLvl : int ref = ref 0 (* nesting depth of comments *) + val comStart : int ref = ref 0 (* start line of current comment *) + + type lex_result = Tok.token + + val text : string list ref = ref [] + fun addText s = (text := s::(!text)) + fun clrText () = (text := []) + fun getText () = concat (rev (!text)) + + val pcount = ref 0 + fun inc (ri as ref i) = (ri := i+1) + fun dec (ri as ref i) = (ri := i-1) + + fun hexDigit x = + if #"a" <= x andalso x <= #"f" then + Char.ord x - Char.ord #"a" + 10 + else if #"A" <=x andalso x <= #"F" then + Char.ord x - Char.ord #"A" + 10 + else Char.ord x - Char.ord #"0" + + fun hexVal (ss) : UTF8.wchar = + Substring.foldl + (fn (dig, acc) => (Word.fromInt o hexDigit) dig + 0w16 * acc) + 0w0 ss + + fun mkUChar yyunicode = Tok.UCHAR (hd yyunicode) + + fun unclosedErr (sm, pos) = + TextIO.output (TextIO.stdErr, String.concat[ + " ", AntlrStreamPos.toString sm pos, " Syntax error: unclosed string\n" + ]) + + end + + datatype yymatch + = yyNO_MATCH + | yyMATCH of ULexBuffer.stream * action * yymatch + withtype action = ULexBuffer.stream * yymatch -> UserDeclarations.lex_result + + val yytable : ((UTF8.wchar * UTF8.wchar * int) list * int list) Vector.vector = +#[([(0w0,0w12,9), +(0w14,0w39,9), +(0w41,0w41,9), +(0w43,0w2147483647,9), +(0w13,0w13,10), +(0w40,0w40,11), +(0w42,0w42,12)], []), ([(0w0,0w33,16), +(0w35,0w39,16), +(0w42,0w2147483647,16), +(0w34,0w34,17), +(0w40,0w40,18), +(0w41,0w41,19)], []), ([(0w0,0w9,22), +(0w11,0w12,22), +(0w14,0w33,22), +(0w35,0w91,22), +(0w93,0w2147483647,22), +(0w10,0w10,23), +(0w13,0w13,24), +(0w34,0w34,25), +(0w92,0w92,26)], []), ([(0w0,0w8,31), +(0w14,0w31,31), +(0w33,0w58,31), +(0w60,0w64,31), +(0w66,0w84,31), +(0w86,0w96,31), +(0w98,0w116,31), +(0w118,0w2147483647,31), +(0w9,0w12,32), +(0w32,0w32,32), +(0w13,0w13,33), +(0w59,0w59,34), +(0w65,0w65,35), +(0w85,0w85,36), +(0w97,0w97,37), +(0w117,0w117,38)], []), ([(0w0,0w9,54), +(0w11,0w12,54), +(0w14,0w44,54), +(0w46,0w91,54), +(0w95,0w2147483647,54), +(0w10,0w10,55), +(0w13,0w13,56), +(0w45,0w45,57), +(0w92,0w92,58), +(0w93,0w93,59), +(0w94,0w94,60)], []), ([(0w0,0w43,77), +(0w45,0w47,77), +(0w58,0w64,77), +(0w91,0w96,77), +(0w123,0w124,77), +(0w126,0w2147483647,77), +(0w44,0w44,78), +(0w48,0w57,79), +(0w65,0w90,80), +(0w97,0w122,80), +(0w125,0w125,81)], []), ([(0w0,0w9,84), +(0w11,0w12,84), +(0w14,0w33,84), +(0w35,0w91,84), +(0w93,0w2147483647,84), +(0w10,0w10,85), +(0w13,0w13,86), +(0w34,0w34,87), +(0w92,0w92,88)], []), ([(0w0,0w8,90), +(0w14,0w31,90), +(0w33,0w33,90), +(0w35,0w35,90), +(0w39,0w39,90), +(0w45,0w45,90), +(0w48,0w58,90), +(0w64,0w90,90), +(0w95,0w122,90), +(0w127,0w2147483647,90), +(0w9,0w9,91), +(0w11,0w12,91), +(0w32,0w32,91), +(0w10,0w10,92), +(0w13,0w13,93), +(0w34,0w34,94), +(0w36,0w36,95), +(0w37,0w37,96), +(0w38,0w38,97), +(0w40,0w40,98), +(0w41,0w41,99), +(0w42,0w42,100), +(0w43,0w43,101), +(0w44,0w44,102), +(0w46,0w46,103), +(0w47,0w47,104), +(0w59,0w59,105), +(0w60,0w60,106), +(0w61,0w61,107), +(0w62,0w62,108), +(0w63,0w63,109), +(0w91,0w91,110), +(0w92,0w92,111), +(0w93,0w93,112), +(0w94,0w94,113), +(0w123,0w123,114), +(0w124,0w124,115), +(0w125,0w125,77), +(0w126,0w126,116)], []), ([(0w0,0w8,158), +(0w14,0w31,158), +(0w33,0w39,158), +(0w41,0w43,158), +(0w45,0w58,158), +(0w60,0w60,158), +(0w63,0w64,158), +(0w91,0w96,158), +(0w123,0w2147483647,158), +(0w9,0w12,159), +(0w32,0w32,159), +(0w13,0w13,160), +(0w40,0w40,161), +(0w44,0w44,162), +(0w59,0w59,163), +(0w61,0w61,164), +(0w62,0w62,165), +(0w65,0w90,166), +(0w97,0w122,166)], []), ([], [58, 73]), ([(0w10,0w10,15)], [58, 73]), ([(0w42,0w42,14)], [58, 73]), ([(0w41,0w41,13)], [58, 73]), ([], [57]), ([], [56]), ([], [58]), ([(0w0,0w33,21), +(0w35,0w39,21), +(0w42,0w2147483647,21)], [62, 73]), ([], [61, 73]), ([(0w42,0w42,20)], [59, 73]), ([], [60, 73]), ([], [55]), ([(0w0,0w33,21), +(0w35,0w39,21), +(0w42,0w2147483647,21)], [62]), ([(0w0,0w9,30), +(0w11,0w12,30), +(0w14,0w33,30), +(0w35,0w91,30), +(0w93,0w2147483647,30)], [68, 73]), ([], [64, 73]), ([(0w10,0w10,29)], [64, 73]), ([], [63, 73]), ([(0w34,0w34,27), +(0w92,0w92,28)], [65, 73]), ([], [67]), ([], [66]), ([], [64]), ([(0w0,0w9,30), +(0w11,0w12,30), +(0w14,0w33,30), +(0w35,0w91,30), +(0w93,0w2147483647,30)], [68]), ([], [21, 73]), ([(0w9,0w12,52), +(0w32,0w32,52), +(0w13,0w13,53)], [0, 21, 73]), ([(0w9,0w12,52), +(0w32,0w32,52), +(0w13,0w13,53)], [0, 21, 73]), ([], [20, 21, 73]), ([(0w83,0w83,49)], [21, 73]), ([(0w84,0w84,48)], [21, 73]), ([(0w115,0w115,42)], [21, 73]), ([(0w116,0w116,39)], [21, 73]), ([(0w102,0w102,40)], []), ([(0w56,0w56,41)], []), ([], [17]), ([(0w99,0w99,43)], []), ([(0w105,0w105,44)], []), ([(0w105,0w105,45)], []), ([(0w55,0w55,46), +(0w56,0w56,47)], []), ([], [18]), ([], [19]), ([(0w70,0w70,40)], []), ([(0w67,0w67,50)], []), ([(0w73,0w73,51)], []), ([(0w73,0w73,45)], []), ([(0w9,0w12,52), +(0w32,0w32,52), +(0w13,0w13,53)], [0]), ([(0w9,0w12,52), +(0w32,0w32,52), +(0w13,0w13,53)], [0]), ([], [53, 73]), ([], [1, 73]), ([(0w10,0w10,76)], [1, 53, 73]), ([], [48, 53, 73]), ([(0w0,0w33,61), +(0w35,0w47,61), +(0w58,0w64,61), +(0w91,0w91,61), +(0w93,0w96,61), +(0w123,0w2147483647,61), +(0w34,0w34,62), +(0w92,0w92,62), +(0w48,0w57,63), +(0w65,0w84,64), +(0w86,0w90,64), +(0w97,0w116,64), +(0w118,0w122,64), +(0w85,0w85,65), +(0w117,0w117,66)], [73]), ([], [52, 53, 73]), ([], [47, 53, 73]), ([], [51]), ([], [49, 51]), ([(0w48,0w57,75)], [51]), ([], [49]), ([(0w48,0w57,71), +(0w65,0w90,71), +(0w97,0w122,71)], [49]), ([(0w48,0w57,67), +(0w65,0w90,67), +(0w97,0w122,67)], [49]), ([(0w48,0w57,68), +(0w65,0w90,68), +(0w97,0w122,68)], []), ([(0w48,0w57,69), +(0w65,0w90,69), +(0w97,0w122,69)], []), ([(0w48,0w57,70), +(0w65,0w90,70), +(0w97,0w122,70)], []), ([], [50]), ([(0w48,0w57,72), +(0w65,0w90,72), +(0w97,0w122,72)], []), ([(0w48,0w57,73), +(0w65,0w90,73), +(0w97,0w122,73)], []), ([(0w48,0w57,74), +(0w65,0w90,74), +(0w97,0w122,74)], []), ([(0w48,0w57,67), +(0w65,0w90,67), +(0w97,0w122,67)], []), ([(0w48,0w57,64)], []), ([], [1]), ([], [73]), ([], [39, 73]), ([(0w48,0w57,83)], [38, 73]), ([(0w48,0w57,82), +(0w65,0w90,82), +(0w95,0w95,82), +(0w97,0w122,82)], [37, 73]), ([], [36, 73]), ([(0w48,0w57,82), +(0w65,0w90,82), +(0w95,0w95,82), +(0w97,0w122,82)], [37]), ([(0w48,0w57,83)], [38]), ([], [71, 73]), ([], [70, 71, 73]), ([(0w10,0w10,89)], [70, 71, 73]), ([], [69, 71, 73]), ([(0w0,0w33,61), +(0w35,0w47,61), +(0w58,0w64,61), +(0w91,0w91,61), +(0w93,0w96,61), +(0w123,0w2147483647,61), +(0w34,0w34,62), +(0w92,0w92,62), +(0w48,0w57,63), +(0w65,0w84,64), +(0w86,0w90,64), +(0w97,0w116,64), +(0w118,0w122,64), +(0w85,0w85,65), +(0w117,0w117,66)], [71, 73]), ([], [70]), ([], [72, 73]), ([(0w9,0w12,52), +(0w32,0w32,52), +(0w13,0w13,53)], [0, 72, 73]), ([(0w9,0w12,52), +(0w32,0w32,52), +(0w13,0w13,53)], [0, 73]), ([(0w9,0w12,52), +(0w32,0w32,52), +(0w13,0w13,53)], [0, 72, 73]), ([], [46, 72, 73]), ([], [24, 72, 73]), ([(0w97,0w97,125), +(0w99,0w99,126), +(0w100,0w100,127), +(0w104,0w104,128), +(0w108,0w108,129), +(0w110,0w110,130), +(0w115,0w115,131)], [72, 73]), ([], [26, 72, 73]), ([(0w42,0w42,124)], [31, 72, 73]), ([], [32, 72, 73]), ([], [27, 72, 73]), ([], [25, 72, 73]), ([], [42, 72, 73]), ([], [23, 72, 73]), ([], [43, 72, 73]), ([], [30, 73]), ([(0w60,0w60,118)], [40, 72, 73]), ([(0w62,0w62,117)], [44, 72, 73]), ([], [41, 72, 73]), ([], [28, 72, 73]), ([], [33, 72, 73]), ([(0w0,0w33,61), +(0w35,0w47,61), +(0w58,0w64,61), +(0w91,0w91,61), +(0w93,0w96,61), +(0w123,0w2147483647,61), +(0w34,0w34,62), +(0w92,0w92,62), +(0w48,0w57,63), +(0w65,0w84,64), +(0w86,0w90,64), +(0w97,0w116,64), +(0w118,0w122,64), +(0w85,0w85,65), +(0w117,0w117,66)], [72, 73]), ([], [34, 72, 73]), ([], [47, 72, 73]), ([], [35, 73]), ([], [22, 72, 73]), ([], [29, 72, 73]), ([], [45]), ([(0w69,0w69,119)], []), ([(0w79,0w79,120)], []), ([(0w70,0w70,121)], []), ([(0w62,0w62,122)], []), ([(0w62,0w62,123)], []), ([], [9]), ([], [54]), ([(0w114,0w114,156)], []), ([(0w104,0w104,150)], []), ([(0w101,0w101,147)], []), ([(0w101,0w101,142)], []), ([(0w101,0w101,140)], []), ([(0w97,0w97,137)], []), ([(0w116,0w116,132)], []), ([(0w97,0w97,133)], []), ([(0w116,0w116,134)], []), ([(0w101,0w101,135)], []), ([(0w115,0w115,136)], []), ([], [6]), ([(0w109,0w109,138)], []), ([(0w101,0w101,139)], []), ([], [5]), ([(0w116,0w116,141)], []), ([], [7]), ([(0w97,0w97,143)], []), ([(0w100,0w100,144)], []), ([(0w101,0w101,145)], []), ([(0w114,0w114,146)], []), ([], [4]), ([(0w102,0w102,148)], []), ([(0w115,0w115,149)], []), ([], [2]), ([(0w97,0w97,151)], []), ([(0w114,0w114,152)], []), ([(0w115,0w115,153)], []), ([(0w101,0w101,154)], []), ([(0w116,0w116,155)], []), ([], [8]), ([(0w103,0w103,157)], []), ([], [3]), ([], [16, 73]), ([(0w9,0w12,52), +(0w32,0w32,52), +(0w13,0w13,53)], [0, 16, 73]), ([(0w9,0w12,52), +(0w32,0w32,52), +(0w13,0w13,53)], [0, 16, 73]), ([(0w42,0w42,168)], [16, 73]), ([], [11, 16, 73]), ([], [12, 16, 73]), ([], [13, 16, 73]), ([], [14, 16, 73]), ([(0w48,0w57,167), +(0w65,0w90,167), +(0w95,0w95,167), +(0w97,0w122,167)], [10, 16, 73]), ([(0w48,0w57,167), +(0w65,0w90,167), +(0w95,0w95,167), +(0w97,0w122,167)], [10]), ([], [15])] + fun yystreamify' p input = ULexBuffer.mkStream (p, input) + + fun yystreamifyReader' p readFn strm = let + val s = ref strm + fun iter(strm, n, accum) = + if n > 1024 then (String.implode (rev accum), strm) + else (case readFn strm + of NONE => (String.implode (rev accum), strm) + | SOME(c, strm') => iter (strm', n+1, c::accum)) + fun input() = let + val (data, strm) = iter(!s, 0, []) + in + s := strm; + data + end + in + yystreamify' p input + end + + fun yystreamifyInstream' p strm = yystreamify' p (fn ()=>TextIO.input strm) + + fun innerLex +(yystrm_, yyss_, yysm) = let + (* current start state *) + val yyss = ref yyss_ + fun YYBEGIN ss = (yyss := ss) + (* current input stream *) + val yystrm = ref yystrm_ + fun yysetStrm strm = yystrm := strm + fun yygetPos() = ULexBuffer.getpos (!yystrm) + fun yystreamify input = yystreamify' (yygetPos()) input + fun yystreamifyReader readFn strm = yystreamifyReader' (yygetPos()) readFn strm + fun yystreamifyInstream strm = yystreamifyInstream' (yygetPos()) strm + (* start position of token -- can be updated via skip() *) + val yystartPos = ref (yygetPos()) + (* get one char of input *) + fun yygetc strm = (case ULexBuffer.getu strm + of (SOME (0w10, s')) => + (AntlrStreamPos.markNewLine yysm (ULexBuffer.getpos strm); + SOME (0w10, s')) + | x => x) + fun yygetList getc strm = let + val get1 = UTF8.getu getc + fun iter (strm, accum) = + (case get1 strm + of NONE => rev accum + | SOME (w, strm') => iter (strm', w::accum) + (* end case *)) + in + iter (strm, []) + end + (* create yytext *) + fun yymksubstr(strm) = ULexBuffer.subtract (strm, !yystrm) + fun yymktext(strm) = Substring.string (yymksubstr strm) + fun yymkunicode(strm) = yygetList Substring.getc (yymksubstr strm) + open UserDeclarations + fun lex () = let + fun yystuck (yyNO_MATCH) = raise Fail "lexer reached a stuck state" + | yystuck (yyMATCH (strm, action, old)) = + action (strm, old) + val yypos = yygetPos() + fun yygetlineNo strm = AntlrStreamPos.lineNo yysm (ULexBuffer.getpos strm) + fun yygetcolNo strm = AntlrStreamPos.colNo yysm (ULexBuffer.getpos strm) + fun yyactsToMatches (strm, [], oldMatches) = oldMatches + | yyactsToMatches (strm, act::acts, oldMatches) = + yyMATCH (strm, act, yyactsToMatches (strm, acts, oldMatches)) + fun yygo actTable = + (fn (~1, _, oldMatches) => yystuck oldMatches + | (curState, strm, oldMatches) => let + val (transitions, finals') = Vector.sub (yytable, curState) + val finals = List.map (fn i => Vector.sub (actTable, i)) finals' + fun tryfinal() = + yystuck (yyactsToMatches (strm, finals, oldMatches)) + fun find (c, []) = NONE + | find (c, (c1, c2, s)::ts) = + if c1 <= c andalso c <= c2 then SOME s + else find (c, ts) + in case yygetc strm + of SOME(c, strm') => + (case find (c, transitions) + of NONE => tryfinal() + | SOME n => + yygo actTable + (n, strm', + yyactsToMatches (strm, finals, oldMatches))) + | NONE => tryfinal() + end) + val yylastwasnref = ref (ULexBuffer.lastWasNL (!yystrm)) + fun continue() = let val yylastwasn = !yylastwasnref in +let +fun yyAction0 (strm, lastMatch : yymatch) = (yystrm := strm; skip()) +fun yyAction1 (strm, lastMatch : yymatch) = (yystrm := strm; skip()) +fun yyAction2 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN CODE; clrText(); Tok.KW_defs) +fun yyAction3 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN CODE; clrText(); Tok.KW_arg) +fun yyAction4 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN CODE; clrText(); Tok.KW_header) +fun yyAction5 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN DIRECTIVE; Tok.KW_name) +fun yyAction6 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN DIRECTIVE; Tok.KW_states) +fun yyAction7 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN DIRECTIVE; Tok.KW_let) +fun yyAction8 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN CHARSET; Tok.KW_charset) +fun yyAction9 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.EOFMARK) +fun yyAction10 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; Tok.ID yytext + end +fun yyAction11 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.COMMA) +fun yyAction12 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN INITIAL; Tok.SEMI) +fun yyAction13 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN INITIAL; Tok.EQ) +fun yyAction14 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN INITIAL; Tok.GT) +fun yyAction15 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; + comLvl := 1; comStart := !yylineno; YYBEGIN COM; + ignore(continue() before YYBEGIN DIRECTIVE); + continue() + end +fun yyAction16 (strm, lastMatch : yymatch) = let + val oldStrm = !(yystrm) + fun REJECT () = (yystrm := oldStrm; yystuck(lastMatch)) + in + yystrm := strm; YYBEGIN INITIAL; REJECT() + end +fun yyAction17 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN INITIAL; Tok.UTF8) +fun yyAction18 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN INITIAL; Tok.ASCII7) +fun yyAction19 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN INITIAL; Tok.ASCII8) +fun yyAction20 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN INITIAL; Tok.SEMI) +fun yyAction21 (strm, lastMatch : yymatch) = let + val oldStrm = !(yystrm) + fun REJECT () = (yystrm := oldStrm; yystuck(lastMatch)) + in + yystrm := strm; YYBEGIN INITIAL; REJECT() + end +fun yyAction22 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.BAR) +fun yyAction23 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.DOT) +fun yyAction24 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.DOLLAR) +fun yyAction25 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.PLUS) +fun yyAction26 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.AMP) +fun yyAction27 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.STAR) +fun yyAction28 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.QUERY) +fun yyAction29 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.NEG) +fun yyAction30 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.SEMI) +fun yyAction31 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.LP) +fun yyAction32 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.RP) +fun yyAction33 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN CHARCLASS; Tok.LSB) +fun yyAction34 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.RSB) +fun yyAction35 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN CURLY; Tok.LCB) +fun yyAction36 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN INITIAL; Tok.RCB) +fun yyAction37 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; Tok.ID yytext + end +fun yyAction38 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; Tok.INT (valOf (Int.fromString yytext)) + end +fun yyAction39 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.COMMA) +fun yyAction40 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN DIRECTIVE; Tok.LT) +fun yyAction41 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.GT) +fun yyAction42 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.COMMA) +fun yyAction43 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.SLASH) +fun yyAction44 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.EQ) +fun yyAction45 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN CODE; clrText(); Tok.DARROW) +fun yyAction46 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN RESTRING; continue()) +fun yyAction47 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.CARAT) +fun yyAction48 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.DASH) +fun yyAction49 (strm, lastMatch : yymatch) = let + val yycolno = ref(yygetcolNo(!(yystrm))) + val yylineno = ref(yygetlineNo(!(yystrm))) + val yytext = yymktext(strm) + in + yystrm := strm; + let val c = Char.fromString yytext + in case c + of SOME c' => Tok.CHAR c' + | NONE => (print (concat [ + Int.toString (!yylineno), ".", + Int.toString (!yycolno), ": unknown escape sequence '", + yytext, "'\n"]); + continue()) + end + end +fun yyAction50 (strm, lastMatch : yymatch) = let + val yysubstr = yymksubstr(strm) + in + yystrm := strm; Tok.UCHAR (hexVal (Substring.triml 2 yysubstr)) + end +fun yyAction51 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; Tok.CHAR (String.sub (yytext, 1)) + end +fun yyAction52 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN INITIAL; Tok.RSB) +fun yyAction53 (strm, lastMatch : yymatch) = let + val yyunicode = yymkunicode(strm) + in + yystrm := strm; mkUChar yyunicode + end +fun yyAction54 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; + comLvl := 1; comStart := !yylineno; YYBEGIN COM; + ignore(continue() before YYBEGIN INITIAL); + continue() + end +fun yyAction55 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + val yytext = yymktext(strm) + in + yystrm := strm; + comLvl := 1; comStart := !yylineno; YYBEGIN COM; + addText yytext; + ignore(continue() before YYBEGIN CODE); + continue() + end +fun yyAction56 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; addText yytext; comLvl := !comLvl+1; continue() + end +fun yyAction57 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; + addText yytext; comLvl := !comLvl-1; + if (!comLvl = 0) + then (Tok.BOGUS) + else continue() + end +fun yyAction58 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; addText yytext; continue() + end +fun yyAction59 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; + if !pcount = 0 then () else addText yytext; + inc pcount; continue() + end +fun yyAction60 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; + dec pcount; + if !pcount = 0 then + (YYBEGIN INITIAL; Tok.CODE (getText())) + else (addText yytext; continue()) + end +fun yyAction61 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; + addText yytext; YYBEGIN STRING; + ignore(continue() before YYBEGIN CODE); + addText "\""; continue() + end +fun yyAction62 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; addText yytext; continue() + end +fun yyAction63 (strm, lastMatch : yymatch) = (yystrm := strm; Tok.BOGUS) +fun yyAction64 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; addText yytext; unclosedErr (yysm, yypos); Tok.BOGUS + end +fun yyAction65 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; addText yytext; continue() + end +fun yyAction66 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; addText yytext; continue() + end +fun yyAction67 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; addText yytext; continue() + end +fun yyAction68 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; addText yytext; continue() + end +fun yyAction69 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN INITIAL; continue()) +fun yyAction70 (strm, lastMatch : yymatch) = (yystrm := strm; + unclosedErr (yysm, yypos); YYBEGIN INITIAL; continue()) +fun yyAction71 (strm, lastMatch : yymatch) = let + val yyunicode = yymkunicode(strm) + in + yystrm := strm; mkUChar yyunicode + end +fun yyAction72 (strm, lastMatch : yymatch) = let + val yyunicode = yymkunicode(strm) + in + yystrm := strm; mkUChar yyunicode + end +fun yyAction73 (strm, lastMatch : yymatch) = let + val yycolno = ref(yygetcolNo(!(yystrm))) + val yylineno = ref(yygetlineNo(!(yystrm))) + val yytext = yymktext(strm) + in + yystrm := strm; + print (concat[Int.toString (!yylineno), ".", + Int.toString (!yycolno), + ": illegal character '", + String.toCString yytext, "'\n"]); + continue() + end +val yyactTable = Vector.fromList([yyAction0, yyAction1, yyAction2, yyAction3, + yyAction4, yyAction5, yyAction6, yyAction7, yyAction8, yyAction9, yyAction10, + yyAction11, yyAction12, yyAction13, yyAction14, yyAction15, yyAction16, + yyAction17, yyAction18, yyAction19, yyAction20, yyAction21, yyAction22, + yyAction23, yyAction24, yyAction25, yyAction26, yyAction27, yyAction28, + yyAction29, yyAction30, yyAction31, yyAction32, yyAction33, yyAction34, + yyAction35, yyAction36, yyAction37, yyAction38, yyAction39, yyAction40, + yyAction41, yyAction42, yyAction43, yyAction44, yyAction45, yyAction46, + yyAction47, yyAction48, yyAction49, yyAction50, yyAction51, yyAction52, + yyAction53, yyAction54, yyAction55, yyAction56, yyAction57, yyAction58, + yyAction59, yyAction60, yyAction61, yyAction62, yyAction63, yyAction64, + yyAction65, yyAction66, yyAction67, yyAction68, yyAction69, yyAction70, + yyAction71, yyAction72, yyAction73]) +in + if ULexBuffer.eof(!(yystrm)) + then let + val yycolno = ref(yygetcolNo(!(yystrm))) + val yylineno = ref(yygetlineNo(!(yystrm))) + in + (case (!(yyss)) + of RESTRING => ( unclosedErr (yysm, yypos); YYBEGIN INITIAL; Tok.EOF) + | STRING => ( unclosedErr (yysm, yypos); Tok.EOF) + | _ => ( Tok.EOF) + (* end case *)) + end + else (case (!(yyss)) + of COM => yygo yyactTable (0, !(yystrm), yyNO_MATCH) + | CODE => yygo yyactTable (1, !(yystrm), yyNO_MATCH) + | STRING => yygo yyactTable (2, !(yystrm), yyNO_MATCH) + | CHARSET => yygo yyactTable (3, !(yystrm), yyNO_MATCH) + | CHARCLASS => yygo yyactTable (4, !(yystrm), yyNO_MATCH) + | CURLY => yygo yyactTable (5, !(yystrm), yyNO_MATCH) + | RESTRING => yygo yyactTable (6, !(yystrm), yyNO_MATCH) + | INITIAL => yygo yyactTable (7, !(yystrm), yyNO_MATCH) + | DIRECTIVE => yygo yyactTable (8, !(yystrm), yyNO_MATCH) + (* end case *)) +end +end + and skip() = (yystartPos := yygetPos(); + yylastwasnref := ULexBuffer.lastWasNL (!yystrm); + continue()) + in (continue(), (!yystartPos, yygetPos()), !yystrm, !yyss) end + in + lex() + end + in + type pos = AntlrStreamPos.pos + type span = AntlrStreamPos.span + type tok = UserDeclarations.lex_result + + datatype prestrm = STRM of ULexBuffer.stream * + (yystart_state * tok * span * prestrm * yystart_state) option ref + type strm = (prestrm * yystart_state) + + fun lex sm +(STRM (yystrm, memo), ss) = (case !memo + of NONE => let + val (tok, span, yystrm', ss') = innerLex +(yystrm, ss, sm) + val strm' = STRM (yystrm', ref NONE); + in + memo := SOME (ss, tok, span, strm', ss'); + (tok, span, (strm', ss')) + end + | SOME (ss', tok, span, strm', ss'') => + if ss = ss' then + (tok, span, (strm', ss'')) + else ( + memo := NONE; + lex sm +(STRM (yystrm, memo), ss)) + (* end case *)) + + fun streamify input = (STRM (yystreamify' 0 input, ref NONE), INITIAL) + fun streamifyReader readFn strm = (STRM (yystreamifyReader' 0 readFn strm, ref NONE), + INITIAL) + fun streamifyInstream strm = (STRM (yystreamifyInstream' 0 strm, ref NONE), + INITIAL) + + fun getPos (STRM (strm, _), _) = ULexBuffer.getpos strm + + end +end diff --git a/ml-lpt/ml-ulex/Makefile b/ml-lpt/ml-ulex/Makefile new file mode 100644 index 0000000..56e4fae --- /dev/null +++ b/ml-lpt/ml-ulex/Makefile @@ -0,0 +1,53 @@ +# Makefile +# +# COPYRIGHT (c) 2009 The Fellowship of SML/NJ (http://www.smlnj.org) +# All rights reserved. +# + +SHELL = /bin/sh +SML = sml +ML_BUILD = ml-build +ML_MAKEDEPEND = ml-makedepend + +# NOTE: the @SMLsuffix option is new with SML/NJ 110.72 +#HEAP_SUFFIX = $(shell sml @SMLsuffix) +HEAP_SUFFIX = $(shell echo 'TextIO.output (TextIO.stdErr, SMLofNJ.SysInfo.getHeapSuffix ());' | sml 2>&1 1> /dev/null) + +PROGRAM = ml-ulex +HEAP_IMAGE = $(PROGRAM).$(HEAP_SUFFIX) + +FE = FrontEnds +BE = BackEnds + +ML_LEX = $(FE)/ml-lex +ML_LEX_SRC = $(wildcard $(ML_LEX)/*.sml) $(ML_LEX)/ml-lex.lex $(ML_LEX)/ml-lex.yacc + +ML_ULEX = $(FE)/ml-ulex +ML_ULEX_SRC = $(wildcard $(ML_ULEX)/*.sml) $(ML_ULEX)/ml-ulex.lex $(ML_ULEX)/ml-ulex.grm + +DUMP_SRC = $(wildcard $(BE)/Dump/*.sml) +DOT_SRC = $(wildcard $(BE)/Dot/*.sml) +SML_SRC = $(wildcard $(BE)/SML/*.sml) +MATCH_SRC = $(wildcard $(BE)/Match/*.sml) + +FE_SRC = $(wildcard $(FE)/*.sml) $(ML_LEX_SRC) $(ML_ULEX_SRC) +BE_SRC = $(wildcard $(BE)/*.sml) $(DOT_SRC) $(SML_SRC) $(MATCH_SRC) + +CORE_SRC = $(wildcard *.sml) + +SOURCES = $(CORE_SRC) $(FE_SRC) $(BE_SRC) sources.cm + +build: $(HEAP_IMAGE) + +$(HEAP_IMAGE): $(SOURCES) + $(ML_BUILD) sources.cm Main.main $(PROGRAM) + +.depend: $(CM_FILES) + touch .depend + $(ML_MAKEDEPEND) -n -f .depend sources.cm $(HEAP_IMAGE) + +sinclude .depend + +# NOTE: do NOT clean out ml-lex.lex.sml, it is not autogenerated! +clean: + rm -rf .depend .cm $(HEAP_IMAGE) diff --git a/ml-lpt/ml-ulex/Makefile.mlton b/ml-lpt/ml-ulex/Makefile.mlton new file mode 100644 index 0000000..ac5b065 --- /dev/null +++ b/ml-lpt/ml-ulex/Makefile.mlton @@ -0,0 +1,47 @@ +# Makefile +# +# COPYRIGHT (c) 2009 The Fellowship of SML/NJ (http://www.smlnj.org) +# All rights reserved. +# +# Makefile for building ml-ulex using MLton +# + +SHELL = /bin/sh + +MLTON = mlton +MLTON_FLAGS = + +TARGET = mlulex + +SML_BE = BackEnds/SML + +TEMPLATES = $(SML_BE)/string-template-ml-lex.sml \ + $(SML_BE)/string-template-ml-ulex.sml + +SML_FILES = + +MLB_FILES = ../common/lpt-common.mlb \ + ../lib/ml-lpt-lib.mlb \ + $(TARGET).mlb + +$(TARGET) : $(MLB_FILES) $(SML_FILES) $(TEMPLATES) + $(MLTON) -output $(TARGET) $(MLTON_FLAGS) $(TARGET).mlb + +# a generator for the template string files +# +GEN = ../gen/gen-template-struct + +$(GEN): + (cd ../gen; make gen-template-struct) + +$(SML_BE)/string-template-ml-lex.sml: $(GEN) $(SML_BE)/template-ml-lex.sml + $(GEN) LexTemplate $(SML_BE)/template-ml-lex.sml $(SML_BE)/string-template-ml-lex.sml + +$(SML_BE)/string-template-ml-ulex.sml: $(GEN) $(SML_BE)/template-ml-ulex.sml + $(GEN) ULexTemplate $(SML_BE)/template-ml-ulex.sml $(SML_BE)/string-template-ml-ulex.sml + +.PHONEY: clean +clean: + rm -f $(TARGET) + rm -f $(TEMPLATES) + (cd ../gen; make clean) diff --git a/ml-lpt/ml-ulex/build.bat b/ml-lpt/ml-ulex/build.bat new file mode 100644 index 0000000..4a93b49 --- /dev/null +++ b/ml-lpt/ml-ulex/build.bat @@ -0,0 +1,2 @@ +@ECHO off +%COMSPEC% /C "..\..\bin\ml-build.bat -D NO_ML_ANTLR -D NO_ML_LEX -D NO_ML_YACC sources.cm Main.main ml-ulex" diff --git a/ml-lpt/ml-ulex/build.sh b/ml-lpt/ml-ulex/build.sh new file mode 100755 index 0000000..44843ee --- /dev/null +++ b/ml-lpt/ml-ulex/build.sh @@ -0,0 +1,53 @@ +#!/bin/sh +# +# Copyright (c) 2018 The Fellowship of SML/NJ (https://smlnj.org) +# +# build script for ml-ulex +# +# options: +# -o image -- specify the name of the heap image, "ml-ulex" +# is the default. + +CMD=$0 + +ROOT="ml-ulex" +HEAP_IMAGE="" +SMLNJROOT=`pwd`/../.. +BIN=${INSTALLDIR:-$SMLNJROOT}/bin +LIB=${INSTALLDIR:-$SMLNJROOT}/lib +BUILD=$BIN/ml-build +SML=$BIN/sml +SIZE_OPT="-32" + +# +# process command-line options +# +while [ "$#" != "0" ] ; do + arg=$1 + shift + case $arg in + -32) SIZE_OPT=$arg ;; + -64) SIZE_OPT=$arg ;; + -o) + if [ "$#" = "0" ]; then + echo "$CMD: must supply image name for -o option" + exit 1 + fi + HEAP_IMAGE=$1; shift + ;; + *) + echo $CMD: invalid argument: $arg + exit 1 + ;; + esac +done + +if [ "$HEAP_IMAGE" = "" ]; then + HEAP_IMAGE="$ROOT" +fi + +# +# Build the ml-ulex standalone program: +"$BUILD" $SIZE_OPT -DNO_ML_ANTLR -DNO_ML_LEX -DNO_ML_YACC sources.cm Main.main $HEAP_IMAGE + +exit 0 diff --git a/ml-lpt/ml-ulex/lex-gen.sml b/ml-lpt/ml-ulex/lex-gen.sml new file mode 100644 index 0000000..93dbefa --- /dev/null +++ b/ml-lpt/ml-ulex/lex-gen.sml @@ -0,0 +1,279 @@ +(* lex-gen.sml + * + * COPYRIGHT (c) 2005 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (adrassi@gmail.com) + * All rights reserved. + * + * DFA generation using RE derivatives + *) + +structure LexGen : + sig + + val gen : LexSpec.spec -> LexOutputSpec.spec + + end = struct + + structure RE = RegExp + structure SIS = RegExp.SymSet + structure LO = LexOutputSpec + + structure Map = RedBlackMapFn ( + struct + type ord_key = RE.re Vector.vector + val compare = Vector.collate RE.compare + end) + + (* given a list of RE vectors (start states), produce a DFA recognizer + * NOTE: invoked once per start state (each start state has a DFA) + *) + fun mkDFA startVecs = let + val n = ref 0 (* next state id *) + val states = ref [] + (* return the state that the re vector maps to and + * a flag set to true if the state is new. + *) + fun mkState (stateMap, res, asSS) = (case Map.find(stateMap, res) + of NONE => let + val id = !n + fun addFinal (idx, re, finals) = + if RE.nullable re + then idx :: finals + else finals + val q = LO.State { + id = id, startState = asSS, label = res, + final = Vector.foldri addFinal [] res, + next = ref [] + } + in + n := id+1; + states := q :: !states; + (true, q, Map.insert(stateMap, res, q)) + end + | SOME q => (false, q, stateMap) + (* end case *)) + fun initIter (states, stateMap, []) = (List.rev states, stateMap) + | initIter (states, stateMap, vec::vecs) = let + val (_, q, stateMap') = mkState (stateMap, vec, true) + in initIter (q :: states, stateMap', vecs) + end + val (initStates, initStatemap) = initIter ([], Map.empty, startVecs) +(* val approxEdges : int ref = ref 0 + val actEdges : int ref = ref 0 *) + fun f (stateMap, []) = stateMap + | f (stateMap, LO.State{next, label, ...}::workList) = let + val firstErr = ref true + fun move ((res, edge), (stateMap, workList)) = + if Vector.all RE.isNone res (* if error transition *) + then ( +(* if !firstErr then + (firstErr := false; + actEdges := !actEdges + 1) + else (); + approxEdges := !approxEdges + 1; *) + (stateMap, workList)) + else let +(* val _ = approxEdges := !approxEdges + 1 *) + val (isNew, q, stateMap) = mkState (stateMap, res, false) + fun findOrReplEdge [] = ( +(* actEdges := !actEdges + 1; *) + [(edge, q)]) + | findOrReplEdge ((e, p)::es) = + if LO.sameState (p, q) then + (SIS.union (e, edge), p) :: es + else (e, p) :: findOrReplEdge es + in +(* next := (edge, q) :: !next; *) + next := findOrReplEdge (!next); + if isNew + then (stateMap, q::workList) + else (stateMap, workList) + end + val edges = RE.derivatives label + in + f (List.foldl move (stateMap, workList) edges) + end + in + ignore (f (initStatemap, initStates)); +(* print (String.concat [ + " -- Approximate edges: ", Int.toString (!approxEdges), "\n", + " -- Actual edges: ", Int.toString (!actEdges), "\n"]); *) + (initStates, List.rev(!states), !n) + end + + (* clamp a machine to the right character set *) +(* unused + fun clamp clampTo states = let + val ascii127 = SIS.interval (0w0, 0w127) + fun clampTrans (edge, q) = + (SIS.intersect (ascii127, edge), q) + fun clampState (LO.State{next, ...}) = + next := List.map clampTrans (!next) + in + (List.app clampState states; + states) + end +*) + + fun minimize (initStates, states, numStates) = let + val statesVec = Vector.fromList states + val marked = Array2.array (numStates, numStates, false) + fun isMarked (i, j) = if i < j then Array2.sub (marked, i, j) + else Array2.sub (marked, j, i) + fun mark (i, j) = if i < j then Array2.update (marked, i, j, true) + else Array2.update (marked, j, i, true) + fun appAll f = let + fun iter i = if i < numStates then (f i; iter (i+1)) + else () + in iter 0 end + fun appAll2 f = + appAll (fn i => + appAll (fn j => + if i < j then f (i, j) + else ())) + fun smallestUnmarked j = let + fun iter i = + if i < j then + if not (isMarked (i, j)) then i + else iter (i+1) + else j + in iter 0 end + fun markAllIf (i, f) = appAll (fn j => + if f (Vector.sub (statesVec, j)) + then mark (i, j) + else ()) + fun markAll i = markAllIf (i, fn _ => true) + fun idOf (LO.State {id, ...}) = id + fun stateOf id = Vector.sub (statesVec, id) + fun iter() = let + val changed = ref false + fun diffEdge (_, []) = false + | diffEdge ((set1, state1), (set2, state2)::edges) = + if SIS.isEmpty (SIS.intersect (set1, set2)) + then diffEdge ((set1, state1), edges) + else + if isMarked (idOf state1, idOf state2) + then true + else diffEdge ((set1, state1), edges) + fun getEdgesAndLabels (i) = let + val LO.State {next, ...} = Vector.sub (statesVec, i) + val allLabels = foldl SIS.union SIS.empty + (map (fn (set, _) => set) (!next)) + in + (!next, allLabels) + end + fun check (i, j) = + if isMarked (i, j) then () + else let + val (edgesi, labelsi) = getEdgesAndLabels i + val (edgesj, labelsj) = getEdgesAndLabels j + fun tryEdge edge = diffEdge (edge, edgesj) + in + case SIS.compare (labelsi, labelsj) + of EQUAL => + if List.exists tryEdge edgesi + then (mark (i, j); changed := true) + else () + | _ => (mark (i, j); changed := true) + end + in + appAll2 check; + if !changed then iter() else () + end + fun merge() = let + fun move (sym, s) = (sym, (stateOf o smallestUnmarked o idOf) s) + fun upd (LO.State {next, ...}) = next := (map move (!next)) + fun keep s = (smallestUnmarked (idOf s) = idOf s) + val states' = List.filter keep states + in + app upd states'; + (initStates, states', List.length states') + end + fun isInit s1 = List.exists (fn s2 => idOf s1 = idOf s2) initStates + fun isFinal (LO.State {final = [], ...}) = false + | isFinal _ = true + fun sameFinal (LO.State {final = f1, ...}) + (LO.State {final = f2, ...}) = + ListPair.allEq op= (f1, f2) + handle ListPair.UnequalLengths => false + in + app (fn s => markAllIf (idOf s, isInit)) + initStates; + app (fn s => markAllIf (idOf s, not o (sameFinal s))) + states; + iter(); + merge() + end + + fun gen spec = let +(* TODO: check for invalid start states on rules *) + val LexSpec.Spec {decls, conf, rules, eofRules} = spec + val LexSpec.Conf {structName, header, + arg, startStates, ...} = conf + val startStates' = AtomSet.add (startStates, Atom.atom "INITIAL") +(* + (* split out actions and associate each ruleSpec to an action ID + * + * Note: matchActions tries to find textually idential actions and map + * them to the same entry in the action vector + *) + fun matchActions rules = let + fun iter ((ruleSpec, action)::rules, + ruleSpecs, actions, actionMap, n) = let + val key = Atom.atom action + val (i, actions', actionMap', n') = + case AtomMap.find (actionMap, key) + of NONE => (n, action::actions, + AtomMap.insert (actionMap, key, n), + n+1) + | SOME i => (i, actions, actionMap, n) + in + iter (rules, (i, ruleSpec)::ruleSpecs, + actions', actionMap', n') + end + | iter ([], ruleSpecs, actions, _, _) = + (List.rev ruleSpecs, List.rev actions) + in + iter (rules, [], [], AtomMap.empty, 0) + end + val (ruleSpecs, actions) = matchActions rules +*) + val (ruleSpecs, actions) = ListPair.unzip rules + val actionsVec = Vector.fromList actions + val startStates = AtomSet.listItems startStates' + fun SSVec label = let + fun hasRule (NONE, re) = re + | hasRule (SOME ss, re) = + if AtomSet.member (ss, label) + then re + else RegExp.none + val rules = List.map hasRule ruleSpecs + in Vector.fromList rules + end + val (initStates, states, numStates) = + mkDFA (List.map SSVec startStates) + val (initStates, states, numStates) = + if !Options.minimize then + minimize (initStates, states, numStates) + else (initStates, states, numStates) + in LO.Spec { + decls = decls, + header = (case (header, structName) + of ("", "") => "structure Mlex" + | (hdr, "") => hdr + | ("", name) => "structure "^name + (* end case *)), + arg = arg, + actions = actionsVec, + dfa = states, + startStates = ListPair.zip + (List.map Atom.toString startStates, + initStates), + eofRules = case eofRules + of [] => [("_", "UserDeclarations.eof()")] + | _ => eofRules + } + end + + end diff --git a/ml-lpt/ml-ulex/main.sml b/ml-lpt/ml-ulex/main.sml new file mode 100644 index 0000000..6c05134 --- /dev/null +++ b/ml-lpt/ml-ulex/main.sml @@ -0,0 +1,104 @@ +(* main.sml + * + * COPYRIGHT (c) 2005 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (adrassi@gmail.com) + * All rights reserved. + * + * Driver for ml-ulex. + *) + +structure Main = + struct + + structure RE = RegExp + structure Lex = LexGen + structure LO = LexOutputSpec + + (* print debug messages, etc to stdErr *) + fun errPrint msg = TextIO.output (TextIO.stdErr, String.concat msg) + + val name = "ml-ulex" + + (* count the total number of DFA states *) + fun numStates (LO.Spec{dfa, ...}) = List.length dfa + + fun status s = errPrint ["[", name, ": ", s, "]\n"] + + fun frontEnd () = let + val fname = !Options.fname + val _ = if (String.size fname = 0) + then ( + errPrint [ + "No input file specified\n usage: ", + name, " ", Options.args, " file\n" + ]; + OS.Process.exit OS.Process.failure) + else () + val _ = status "parsing" + in + if !Options.lexCompat + then MLLexInput.parseFile fname + else MLULexInput.parseFile fname + end + + fun compile inSpec = let + val inSpec = if (!Options.beTest) + then LexSpec.emptyActions inSpec + else inSpec + val _ = status "DFA gen" + val outSpec = Lex.gen inSpec + val _ = errPrint [" ", Int.toString (numStates outSpec), " states in full DFA\n"] + val _ = if !Options.dump + then ( + status "DFA dump"; + DumpOutput.output (outSpec, !Options.fname)) + else () + in + outSpec + end + + fun backEnd outSpec = ( + if !Options.dot + then ( + status "DOT gen"; + DotOutput.output (outSpec, !Options.fname)) + else (); + status "SML gen"; + if (numStates outSpec > 150 andalso !Options.beMode = Options.BySize) + orelse !Options.beMode = Options.TableBased + then SMLTblOutput.output (outSpec, !Options.fname) + else SMLFunOutput.output (outSpec, !Options.fname); + if !Options.match + then ( + errPrint ["-- Interactive matching (blank line to quit) --\n"]; + Match.output (outSpec, !Options.fname)) + else ()) + + fun mlULex () = (case frontEnd () + of SOME inSpec => (backEnd (compile inSpec); OS.Process.success) + | NONE => OS.Process.failure + (* end case *)) + + fun main (_, args) = let + val _ = List.app Options.procArg args + in + mlULex() + end + handle Options.Usage msg => ( + TextIO.output(TextIO.stdErr, concat[ + "** ", msg, "\nusage: ml-ulex ", Options.args, "\n" + ]); + OS.Process.exit OS.Process.failure) + | ex => ( + TextIO.output(TextIO.stdErr, concat[ + "** uncaught exception ", General.exnName ex, + " [", General.exnMessage ex, "]\n" + ]); + app (fn s => TextIO.output(TextIO.stdErr, concat[ + " raised at ", s, "\n" + ])) + (SMLofNJ.exnHistory ex); + OS.Process.exit OS.Process.failure) + + end diff --git a/ml-lpt/ml-ulex/ml-ulex b/ml-lpt/ml-ulex/ml-ulex new file mode 100755 index 0000000..366a6ac --- /dev/null +++ b/ml-lpt/ml-ulex/ml-ulex @@ -0,0 +1,24 @@ +#!/bin/sh +# +# Wrapper for ml-ulex heap image +# + +mlulexdir=$0 +scripthome=${mlulexdir%ml-ulex} + +case `uname -s` in + Darwin) + case `uname -p` in + i386) heap=${scripthome}ml-ulex.x86-darwin ;; + powerpc) heap=${scripthome}ml-ulex.ppc-darwin ;; + esac ;; + *) heap=${scripthome}ml-ulex.x86-linux ;; +esac + +if test ! -r $heap ; then + echo "ml-ulex: no heap image!" + exit 1 +fi + +exec sml @SMLload=$heap $@ + diff --git a/ml-lpt/ml-ulex/mlton-main.sml b/ml-lpt/ml-ulex/mlton-main.sml new file mode 100644 index 0000000..4044fc7 --- /dev/null +++ b/ml-lpt/ml-ulex/mlton-main.sml @@ -0,0 +1,7 @@ +(* mlton-main.sml + * + * COPYRIGHT (c) 2009 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +val _ = OS.Process.exit (Main.main (CommandLine.name (), CommandLine.arguments ())) diff --git a/ml-lpt/ml-ulex/mlulex.mlb b/ml-lpt/ml-ulex/mlulex.mlb new file mode 100644 index 0000000..fade046 --- /dev/null +++ b/ml-lpt/ml-ulex/mlulex.mlb @@ -0,0 +1,48 @@ +(* ml-ulex.mlb + * + * COPYRIGHT (c) 2009 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * MLB file for building ml-ulex under MLton. + *) + +local + $(SML_LIB)/basis/basis.mlb + $(SML_LIB)/basis/sml-nj.mlb + $(SML_LIB)/smlnj-lib/Util/smlnj-lib.mlb + $(SML_LIB)/smlnj-lib/PP/pp-lib.mlb + $(SML_LIB)/mlyacc-lib/mlyacc-lib.mlb + ../lib/ml-lpt-lib.mlb + ../common/lpt-common.mlb + + reg-exp-sig.sml + options.sml + reg-exp.sml + BackEnds/lex-output-spec.sml + FrontEnds/lex-spec.sml + lex-gen.sml + FrontEnds/ml-ulex/ml-ulex.grm.sml + FrontEnds/ml-ulex/ml-ulex.lex.sml + FrontEnds/ml-ulex/ml-ulex-input.sml + BackEnds/output-sig.sml + BackEnds/Dot/dot-output.sml + BackEnds/SML/ml.sml + BackEnds/SML/string-template-ml-ulex.sml + BackEnds/SML/string-template-ml-lex.sml + BackEnds/SML/mlton-templates.sml + BackEnds/SML/sml-output-support.sml + BackEnds/SML/sml-fun-output.sml + BackEnds/Match/match.sml + FrontEnds/ml-lex/ml-lex.yacc.sig + FrontEnds/ml-lex/ml-lex.lex.sml + FrontEnds/ml-lex/ml-lex.yacc.sml + FrontEnds/ml-lex/ml-lex-input.sml + BackEnds/Dump/dump-output.sml + BackEnds/SML/sml-tbl-output.sml + main.sml + +in + + mlton-main.sml + +end diff --git a/ml-lpt/ml-ulex/options.sml b/ml-lpt/ml-ulex/options.sml new file mode 100644 index 0000000..42f1da9 --- /dev/null +++ b/ml-lpt/ml-ulex/options.sml @@ -0,0 +1,48 @@ +(* options.sml + * + * COPYRIGHT (c) 2006 + * Aaron Turon (adrassi@gmail.com) + * All rights reserved. + * + * Command-line options for ml-ulex + *) + +structure Options = + struct + + exception Usage of string + + datatype be_mode = BySize | TableBased | FnBased + + val fname : string ref = ref "" + val lexCompat : bool ref = ref false + val dump : bool ref = ref false + val dot : bool ref = ref false + val match : bool ref = ref false + val beTest : bool ref = ref false + val minimize : bool ref = ref false + val beMode : be_mode ref = ref BySize + val strictSML : bool ref = ref false + + fun procArg arg = (case arg + of "--dot" => dot := true + | "--dump" => dump := true + | "--match" => match := true + | "--testbe" => beTest := true + | "--ml-lex-mode" => lexCompat := true + | "--minimize" => minimize := true + | "--table-based" => beMode := TableBased + | "--fn-based" => beMode := FnBased + | "--strict-sml" => strictSML := true + | file => + if String.isPrefix "--" file + then raise Usage(concat["Unrecognized option '", file, "'"]) + else if String.size (!fname) > 0 + then raise Usage "Only one input file may be specified" + else fname := file + (* end case *)) + + (* the arguments *) + val args = "[--dot] [--dump] [--match] [--ml-lex-mode] [--minimize] [--table-based] [--fn-based] [--strict-sml]" + + end diff --git a/ml-lpt/ml-ulex/reg-exp-sig.sml b/ml-lpt/ml-ulex/reg-exp-sig.sml new file mode 100644 index 0000000..30a38f5 --- /dev/null +++ b/ml-lpt/ml-ulex/reg-exp-sig.sml @@ -0,0 +1,48 @@ +(* reg-exp-sig.sml + * + * COPYRIGHT (c) 2005 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (adrassi@gmail.com) + * All rights reserved. + * + * RE representation and manipulation + *) + +signature REG_EXP = + sig + + structure Sym : INTERVAL_DOMAIN + structure SymSet : INTERVAL_SET + + type symbol = UTF8.wchar + type sym_set = SymSet.set + type re + + val any : re (* wildcard *) + val none : re (* EMPTY language *) + val epsilon : re (* the nil character (of length 0) *) + + val mkSym : symbol -> re + val mkSymSet : sym_set -> re + + val mkOr : re * re -> re + val mkAnd : re * re -> re + val mkXor : re * re -> re + val mkNot : re -> re + val mkConcat : re * re -> re + val mkClosure : re -> re + val mkOpt : re -> re + val mkRep : re * int * int -> re + val mkAtLeast : re * int -> re + + val isNone : re -> bool + val nullable : re -> bool + val derivative : symbol -> re -> re + val derivatives : re Vector.vector -> + ((re Vector.vector) * sym_set) list + + val symToString : symbol -> string + val toString : re -> string + val compare : re * re -> order + + end diff --git a/ml-lpt/ml-ulex/reg-exp.sml b/ml-lpt/ml-ulex/reg-exp.sml new file mode 100644 index 0000000..ce9866e --- /dev/null +++ b/ml-lpt/ml-ulex/reg-exp.sml @@ -0,0 +1,499 @@ +(* reg-exp-fn.sml + * + * COPYRIGHT (c) 2005 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (adrassi@gmail.com) + * All rights reserved. + * + * Regular expression representation and manipulation. + * + * The main points here are to: + * (1) make it easy for an RE parser to construct + * RE expressions + * (2) canonicalize REs for effective comparison + * (3) implement the RE derivatives algorithm + * + * See the implementation notes for details on the derivatives + * algorithm and the canonicalization strategy. + *) + +structure RegExp : REG_EXP = + struct + + structure W = Word + + (* symbols (i.e., words) *) + structure Sym = + struct + + type point = W.word + + val compare = W.compare + val minPt : W.word = 0w0 + val maxPt = W.notb 0w0 + + fun succ (w : W.word) = + if w = W.notb 0w0 then w + else w + 0w1 + fun pred (w : W.word) = + if w = 0w0 then w + else w - 0w1 + + fun isSucc (w1, w2) = (succ w1 = w2) + + end + + structure SymSet = IntervalSetFn(Sym) + + type symbol = Sym.point + type sym_set = SymSet.set + + structure SIS = SymSet + + (* REs *) + datatype re + = Epsilon (* matches the empty string *) + | Any (* matches any single symbol *) + | None (* matches nothing (i.e. the empty language) *) + | SymSet of sym_set + | Concat of re list + | Closure of re + | Op of (rator * re list) (* list length <> 1 and in sorted order *) + | Not of re + and rator = OR | AND | XOR + + (* we give a total order to REs; this is useful for canonicalization *) + fun compare (re1, re2) = let + fun cmpOp (OR, OR) = EQUAL + | cmpOp (OR, _) = LESS + | cmpOp (_, OR) = GREATER + | cmpOp (AND, AND) = EQUAL + | cmpOp (AND, _) = LESS + | cmpOp (_, AND) = GREATER + | cmpOp (XOR, XOR) = EQUAL + fun compareList (res1, res2) = List.collate compare (res1, res2) + in + case (re1, re2) + of (Epsilon, Epsilon) => EQUAL + | (Epsilon, _) => LESS + | (_, Epsilon) => GREATER + | (Any, Any) => EQUAL + | (Any, _) => LESS + | (_, Any) => GREATER + | (None, None) => EQUAL + | (None, _) => LESS + | (_, None) => GREATER + | (SymSet a, SymSet b) => SIS.compare(a, b) + | (SymSet a, _) => LESS + | (_, SymSet b) => GREATER + | (Concat a, Concat b) => compareList(a, b) + | (Concat a, _) => LESS + | (_, Concat b) => GREATER + | (Closure a, Closure b) => compare(a, b) + | (Closure a, _) => LESS + | (_, Closure b) => GREATER + | (Op(op1, res1), Op(op2, res2)) => (case cmpOp (op1, op2) + of EQUAL => compareList(res1, res2) + | order => order + (* end case *)) + | (Op _, _) => LESS + | (_, Op _) => GREATER + | (Not a, Not b) => compare(a, b) + (* end case *) + end +(* val sort = ListMergeSort.sort (fn (re1, re2) => compare(re1, re2) = LESS) *) + + (* primitive REs *) + + val any = Any + val none = None + val epsilon = Epsilon + + (* canonical constructors *) + + fun mkSymSet c = + if SIS.isEmpty c then None + else if SIS.isUniverse c then Any + else SymSet c + + fun mkSym sym = mkSymSet (SIS.singleton sym) + + fun mkConcat (re1, re2) = (case (re1, re2) + of (Epsilon, re2) => re2 + | (re1, Epsilon) => re1 + | (None, _) => None + | (_, None) => None + | (Concat res1, Concat res2) => Concat(res1@res2) + | (re1, Concat res2) => Concat(re1::res2) + | (Concat res1, re2) => Concat(res1@[re2]) + | _ => Concat[re1, re2] + (* end case *)) + + fun mkConcatList [] = Epsilon + | mkConcatList (re::res) = mkConcat(re, mkConcatList res) + + fun mkClosure (Epsilon) = Epsilon + | mkClosure (None) = Epsilon + | mkClosure (re as Closure _) = re + | mkClosure re = Closure re + + fun mergeSIS (inRes, mop) = let + fun isSIS (SymSet _) = true + | isSIS _ = false + val (siss, res) = List.partition isSIS inRes + in + case siss + of [] => inRes + | [re] => inRes + | sis::siss' => let + fun wrapmop (SymSet s1, SymSet s2) = SymSet (mop (s1, s2)) + | wrapmop _ = raise Fail "BUG: wrapmop: SymSet expected" + val merged = List.foldl wrapmop sis siss' + fun reinsert (re1, []) = [re1] + | reinsert (re1, re::res) = (case compare (re1, re) + of LESS => re1::re::res + | EQUAL => raise Fail "BUG: mergeSIS: only one SymSet expected" + | GREATER => re::(reinsert (re1, res)) + (* end case *)) + in + reinsert (merged, res) + end + (* end case *) + end + + fun mkOr (re1, re2) = let + fun merge ([], res2) = res2 + | merge (res1, []) = res1 + | merge (re1::r1, re2::r2) = (case compare(re1, re2) + of LESS => re1::merge(r1, re2::r2) + | EQUAL => merge (re1::r1, r2) + | GREATER => re2 :: merge(re1::r1, r2) + (* end case *)) + fun mk (a, b) = (case mergeSIS(merge(a, b), SIS.union) + of [] => None + | [re] => re + | res => Op(OR, res) + (* end case *)) + in + case (re1, re2) + of (None, _) => re2 + | (_, None) => re1 + | (SymSet s1, SymSet s2) => mkSymSet (SIS.union (s1, s2)) + | (Op(OR, res1), Op(OR, res2)) => mk(res1, res2) + | (Op(OR, res1), _) => mk(res1, [re2]) + | (_, Op(OR, res2)) => mk([re1], res2) + | (re1, re2) => (case compare(re1, re2) + of LESS => Op(OR, [re1, re2]) + | EQUAL => re1 + | GREATER => Op(OR, [re2, re1]) + (* end case *)) + (* end case *) + end + + fun mkAnd (re1, re2) = let + fun merge ([], res2) = res2 + | merge (res1, []) = res1 + | merge (re1::r1, re2::r2) = (case compare(re1, re2) + of LESS => re1::merge(r1, re2::r2) + | EQUAL => merge (re1::r1, r2) + | GREATER => re2 :: merge(re1::r1, r2) + (* end case *)) + fun mk (a, b) = (case mergeSIS(merge(a, b), SIS.intersect) + of [] => None + | [re] => re + | res => Op(AND, res) + (* end case *)) + in + case (re1, re2) + of (None, _) => None + | (_, None) => None + | (SymSet s1, SymSet s2) => mkSymSet (SIS.intersect (s1, s2)) + | (Op(AND, res1), Op(AND, res2)) => mk(res1, res2) + | (Op(AND, res1), _) => mk(res1, [re2]) + | (_, Op(AND, res2)) => mk([re1], res2) + | (re1, re2) => (case compare(re1, re2) + of LESS => Op(AND, [re1, re2]) + | EQUAL => re1 + | GREATER => Op(AND, [re2, re1]) + (* end case *)) + (* end case *) + end + + fun mkXor (re1, re2) = let + fun merge ([], res2) = res2 + | merge (res1, []) = res1 + | merge (re1::r1, re2::r2) = (case compare(re1, re2) + of LESS => re1::merge(r1, re2::r2) + | EQUAL => merge (r1, r2) + | GREATER => re2 :: merge(re1::r1, r2) + (* end case *)) + fun mk (a, b) = (case merge(a, b) + of [] => None + | [re] => re + | res => Op(XOR, res) + (* end case *)) + in + case (re1, re2) + of (None, _) => re2 + | (_, None) => re1 + | (SymSet s1, SymSet s2) => + mkSymSet (SIS.intersect ( + SIS.union (s1, s2), + SIS.complement (SIS.intersect (s1, s2)) + )) + | (Op(XOR, res1), Op(XOR, res2)) => mk(res1, res2) + | (Op(XOR, res1), _) => mk(res1, [re2]) + | (_, Op(XOR, res2)) => mk([re1], res2) + | (re1, re2) => (case compare(re1, re2) + of LESS => Op(XOR, [re1, re2]) + | EQUAL => None (* FIXME is this right? *) + | GREATER => Op(XOR, [re2, re1]) + (* end case *)) + (* end case *) + end + + fun mkOp (OR, re1, re2) = mkOr(re1, re2) + | mkOp (AND, re1, re2) = mkAnd(re1, re2) + | mkOp (XOR, re1, re2) = mkXor(re1, re2) + + fun mkNot (Not re) = re + | mkNot (None) = mkClosure(Any) + | mkNot re = Not re + + fun mkOpt re = mkOr(Epsilon, re) + + fun mkRep (re, low, high) = let + fun lowReps 0 = Epsilon + | lowReps 1 = re + | lowReps n = mkConcat (re, lowReps (n-1)) + fun highReps 0 = Epsilon + | highReps 1 = mkOpt re + | highReps n = mkConcat (mkOpt re, highReps (n-1)) + in + if high < low + then raise Subscript + else mkConcat (lowReps low, highReps (high - low)) + end + + fun mkAtLeast (re, 0) = mkClosure re + | mkAtLeast (re, n) = mkConcat (re, mkAtLeast (re, n-1)) + + fun isNone None = true + | isNone _ = false + + fun symToString w = if !Options.lexCompat + then concat["#\"", Char.toString (Char.chr (W.toInt w)), "\""] + handle Overflow => raise Fail "(BUG) RegExp: symToString on a nonascii character" + else "0wx" ^ W.toString w + + fun SISToString s = let + fun c2s c = if (c < 0w128) + then Char.toString (Char.chr (W.toInt c)) + else "\\u" ^ W.toString c + fun f (a, b) = if (a = b) + then c2s a + else concat[c2s a, "-", c2s b] + (* we want to describe the interval set as concisely as possible, + * so we compare the number of intervals in the set to the number + * of intervals in its complement, and use the smaller of the two. + *) + val intervals = SIS.intervals s + val intervals' = SIS.intervals (SIS.complement s) + val (neg, rngs) = if List.length intervals <= List.length intervals' + then ("", intervals) + else ("^", intervals') + val str = neg ^ (String.concatWithMap "" f rngs) + in + if String.size str <= 1 + then str + else concat["[", str, "]"] + end + + fun toString re = let + fun opToString OR = "|" + | opToString AND = "&" + | opToString XOR = "^" + fun opPrec OR = 0 + | opPrec AND = 2 + | opPrec XOR = 1 + fun prec Any = 6 + | prec None = 6 + | prec Epsilon = 6 + | prec (SymSet _) = 6 + | prec (Concat[]) = 6 + | prec (Concat _) = 3 + | prec (Closure _) = 5 + | prec (Op(_, [])) = 6 + | prec (Op(_, [re])) = prec re + | prec (Op(rator, _)) = opPrec rator + | prec (Not _) = 4 + fun toS (Any, l) = "{any}" :: l + | toS (None, l) = "{none}" :: l + | toS (Epsilon, l) = "{epsilon}" :: l + | toS (SymSet s, l) = SISToString s :: l + | toS (Concat[], l) = "" :: l + | toS (Concat[re], l) = toS(re, l) + | toS (Concat res, l) = toS'(res, 3, "", l) + | toS (Closure re, l) = paren(5, re, "*" :: l) + | toS (Op(_, []), l) = "{}" :: l + | toS (Op(rator, [re]), l) = toS(re, l) + | toS (Op(rator, res), l) = toS'(res, opPrec rator, opToString rator, l) + | toS (Not re, l) = "!" :: paren(4, re, l) + and toS' ([], p, rator, l) = raise Fail "empty" + | toS' (re::r, p, rator, l) = + paren(p, re, List.foldr + (fn (re, l) => rator :: paren(p, re, l)) + l r) + and paren (p, re, l) = if (p <= prec re) + then toS (re, l) + else "(" :: toS(re, ")" :: l) + in + String.concat(toS(re, [])) + end + + (* true iff epsilon is in the language recognized by the RE *) + fun nullable Any = false + | nullable None = false + | nullable Epsilon = true + | nullable (SymSet _) = false + | nullable (Closure _) = true + | nullable (Concat res) = List.all nullable res + | nullable (Op(OR, res)) = List.exists nullable res + | nullable (Op(AND, res)) = List.all nullable res + | nullable (Op(XOR, re::r)) = + (nullable re andalso not(List.exists nullable r)) + orelse nullable(Op(XOR, r)) + | nullable (Op(XOR, [])) = raise Fail "(BUG) RegExp: RE operator has no operands" + | nullable (Not re) = not(nullable re) + + fun delta re = if (nullable re) then Epsilon else None + + (* compute derivative w.r.t. a symbol *) + fun derivative a = let + fun da Any = Epsilon + | da None = None + | da Epsilon = None + | da (SymSet s) = if SIS.member(s, a) then Epsilon else None + | da (re as Closure re') = mkConcat(da re', re) + | da (Concat[]) = None + | da (Concat[re]) = da re + | da (Concat(re::res)) = + mkOr( + mkConcatList((da re)::res), + mkConcat(delta re, da(Concat res))) + | da (Op(_, [])) = raise Fail "(BUG) RegExp: RE operator has no operands" + | da (Op(rator, [re])) = da re + | da (Op(rator, re::res)) = mkOp(rator, da re, da(Op(rator, res))) + | da (Not re) = mkNot(da re) + in + da + end + + structure Map = RedBlackMapFn ( + struct + type ord_key = re Vector.vector + val compare = Vector.collate compare + end) + + structure SISSet = RedBlackSetFn ( + struct + type ord_key = SIS.set + val compare = SIS.compare + end) + +(* + (* yields the smallest partitioning of the alphabet that + * "respects" the given sets. if S is one of the sets + * returned by compress, then it must be either disjoint + * with or a subset of each of the sets in the sets + * parameter. see the implementation notes for more detail. + *) + fun compress sets = let + (* performs partition of a set againt a list of sets, + * assuming the list of sets is pairwise disjoint. + *) + fun part1 (set, []) = + if SIS.isEmpty set then [] + else [set] + | part1 (set1, set2 :: ss) = + if SIS.isEmpty set1 then + set2 :: ss + else let + val i = SIS.intersect (set1, set2) + in if SIS.isEmpty i then + (set2 :: (part1 (set1, ss))) + else let + val s1 = SIS.difference (set1, i) + val s2 = SIS.difference (set2, i) + val ss' = if SIS.isEmpty s1 then ss + else part1 (s1, ss) + in if SIS.isEmpty s2 then + (i :: ss') + else + (i :: s2 :: ss') + end + end + in + List.foldl part1 [] (SIS.universe::sets) + end +*) + + fun cross (s1, s2) = + SISSet.foldl (fn (s1elem, accum) => + SISSet.foldl (fn (s2elem, accum) => + SISSet.add (accum, SIS.intersect (s1elem, s2elem))) + accum s2) + SISSet.empty s1 + + val trivial = SISSet.singleton (SIS.universe) + + fun derivatives (res : re Vector.vector) = let + (* compute approximate derivative classes *) + fun ds Any = trivial + | ds None = trivial + | ds Epsilon = trivial + | ds (SymSet s) = SISSet.fromList [s, SIS.complement s] + | ds (Closure re) = ds re + | ds (Concat []) = trivial + | ds (Concat [re]) = ds re + | ds (Concat (re::res)) = if nullable re + then cross (ds re, ds (Concat res)) + else ds re + | ds (Op(rator, res)) = foldl cross trivial (map ds res) + | ds (Not re) = ds re + val sets = Vector.foldl + (fn (re, sets) => cross (ds re, sets)) + trivial res +(* val sets' = compress sets *) + fun classes ([], classMap) = Map.listItemsi classMap + | classes (set::sets, classMap) = let + (* use first element as representative of the equiv class *) + val (rep, _) = List.hd (SIS.intervals set) + val derivs = Vector.map (derivative rep) res + in + case Map.find (classMap, derivs) + of NONE => classes (sets, Map.insert(classMap, derivs, set)) + | SOME set' => let + val map' = Map.insert(classMap, derivs, SIS.union (set, set')) + in + classes (sets, map') + end + (* end case *) + end + fun classes ([], ls) = ls + | classes (set::sets, ls) = if SIS.isEmpty set + then classes (sets, ls) + else let + val (rep, _) = List.hd (SIS.intervals set) + val derivs = Vector.map (derivative rep) res + in +(* print (SISToString set ^ "\n"); *) + (derivs, set)::classes (sets, ls) + end + in +(* classes (sets', Map.empty) *) +(* print "\n"; *) + classes (SISSet.listItems sets, []) + end + + end diff --git a/ml-lpt/ml-ulex/sources.cm b/ml-lpt/ml-ulex/sources.cm new file mode 100644 index 0000000..64e71b7 --- /dev/null +++ b/ml-lpt/ml-ulex/sources.cm @@ -0,0 +1,77 @@ +(* sources.cm + * + * COPYRIGHT (c) 2005 + * John Reppy (http://www.cs.uchicago.edu/~jhr) + * Aaron Turon (adrassi@gmail.com) + * All rights reserved. + * + *) + +Library + structure Main +is + + $/basis.cm + $/smlnj-lib.cm + $/pp-lib.cm + $/ml-yacc-lib.cm + + ../lib/ml-lpt-lib.cm + ../common/lpt-common.cm + + FrontEnds/common/lex-spec.sml + FrontEnds/common/input-sig.sml + +(* During installation we rely on pre-generated files + * to avoid certain chicken-and-egg problems. *) +#if defined(NO_ML_LEX) orelse defined(NO_PLUGINS) + FrontEnds/ml-lex/ml-lex.lex.sml +#else + FrontEnds/ml-lex/ml-lex.lex : MLLex +#endif + +(* During installation we rely on pre-generated files + * to avoid certain chicken-and-egg problems. *) +#if defined(NO_ML_YACC) orelse defined(NO_PLUGINS) + FrontEnds/ml-lex/ml-lex.yacc.sig + FrontEnds/ml-lex/ml-lex.yacc.sml +#else + FrontEnds/ml-lex/ml-lex.yacc : MLYacc +#endif + + FrontEnds/ml-lex/ml-lex-input.sml + + FrontEnds/ml-ulex/ml-ulex-input.sml + FrontEnds/ml-ulex/ml-ulex.lex.sml + +(* + FrontEnds/ml-ulex/ml-ulex-input-bootstrap.sml + FrontEnds/ml-ulex/ml-ulex-bootstrap.lex.sml +*) + +(* During installation we rely on pre-generated files + * to avoid certain chicken-and-egg problems. *) +# if defined(NO_ML_ANTLR) orelse defined(NO_PLUGINS) + FrontEnds/ml-ulex/ml-ulex.grm.sml +# else + FrontEnds/ml-ulex/ml-ulex.grm : ml-antlr +#endif + + BackEnds/lex-output-spec.sml + BackEnds/output-sig.sml + + BackEnds/Dot/dot-output.sml + BackEnds/Dump/dump-output.sml + BackEnds/Match/match.sml + + BackEnds/SML/ml.sml + BackEnds/SML/sml-output-support.sml + BackEnds/SML/sml-fun-output.sml + BackEnds/SML/sml-tbl-output.sml + BackEnds/SML/smlnj-templates.sml + + lex-gen.sml + reg-exp-sig.sml + reg-exp.sml + main.sml + options.sml diff --git a/ml-lpt/ml-ulex/todo b/ml-lpt/ml-ulex/todo new file mode 100644 index 0000000..5e44a82 --- /dev/null +++ b/ml-lpt/ml-ulex/todo @@ -0,0 +1,19 @@ +%full + +better error handling on ml-lex.lex and ml-lex.yacc +check for invalid start states on rules + +escape codes and DOT + +(*line stuff + +docs + +lookahead? + + +----- + +changes: + -- yylineno not slow + -- %count and %reject assumed diff --git a/ml-lpt/ml-ulex/tool/.cm/GUID/mllex-tool.sml b/ml-lpt/ml-ulex/tool/.cm/GUID/mllex-tool.sml new file mode 100644 index 0000000..73ac03a --- /dev/null +++ b/ml-lpt/ml-ulex/tool/.cm/GUID/mllex-tool.sml @@ -0,0 +1 @@ +guid-$/(mllex-tool.cm):mllex-tool.sml-1714016083.328 diff --git a/ml-lpt/ml-ulex/tool/.cm/GUID/tool.sml b/ml-lpt/ml-ulex/tool/.cm/GUID/tool.sml new file mode 100644 index 0000000..6cb275c --- /dev/null +++ b/ml-lpt/ml-ulex/tool/.cm/GUID/tool.sml @@ -0,0 +1 @@ +guid-$/(ml-ulex-tool.cm):tool.sml-1714016086.817 diff --git a/ml-lpt/ml-ulex/tool/.cm/SKEL/mllex-tool.sml b/ml-lpt/ml-ulex/tool/.cm/SKEL/mllex-tool.sml new file mode 100644 index 0000000..6763835 --- /dev/null +++ b/ml-lpt/ml-ulex/tool/.cm/SKEL/mllex-tool.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"Tools"ad"ULexMLLexTool"h0 \ No newline at end of file diff --git a/ml-lpt/ml-ulex/tool/.cm/SKEL/tool.sml b/ml-lpt/ml-ulex/tool/.cm/SKEL/tool.sml new file mode 100644 index 0000000..76e0e5f --- /dev/null +++ b/ml-lpt/ml-ulex/tool/.cm/SKEL/tool.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"Tools"ad"ULexTool"h0 \ No newline at end of file diff --git a/ml-lpt/ml-ulex/tool/.cm/amd64-unix/mllex-tool.sml b/ml-lpt/ml-ulex/tool/.cm/amd64-unix/mllex-tool.sml new file mode 100644 index 0000000..884f6df Binary files /dev/null and b/ml-lpt/ml-ulex/tool/.cm/amd64-unix/mllex-tool.sml differ diff --git a/ml-lpt/ml-ulex/tool/.cm/amd64-unix/tool.sml b/ml-lpt/ml-ulex/tool/.cm/amd64-unix/tool.sml new file mode 100644 index 0000000..e998fbb Binary files /dev/null and b/ml-lpt/ml-ulex/tool/.cm/amd64-unix/tool.sml differ diff --git a/ml-lpt/ml-ulex/tool/ext.sml b/ml-lpt/ml-ulex/tool/ext.sml new file mode 100644 index 0000000..1ca58d3 --- /dev/null +++ b/ml-lpt/ml-ulex/tool/ext.sml @@ -0,0 +1,19 @@ +(* ext.sml + * + * Plugin for registering classifiers. + * + * Copyright (c) 2007 by The Fellowship of SML/NJ + * + * Author: Matthias Blume (blume@tti-c.org) + *) +structure ULexLexExt = struct + local + val suffixes = ["lex"] + val class = "ml-ulex" + fun sfx s = + Tools.registerClassifier + (Tools.stdSfxClassifier { sfx = s, class = class }) + in + val _ = app sfx suffixes + end +end diff --git a/ml-lpt/ml-ulex/tool/lex-ext.cm b/ml-lpt/ml-ulex/tool/lex-ext.cm new file mode 100644 index 0000000..93428a1 --- /dev/null +++ b/ml-lpt/ml-ulex/tool/lex-ext.cm @@ -0,0 +1,13 @@ +(* + * Plugin for "lex" suffix that points to ml-ulex. + * + * (C) 2007 The Fellowship of SML/NJ. + * + * This should currently not be installed as it conflicts with + * the equally-named plugin library for legacy ml-lex. + *) +Library + structure ULexLexExt +is + $smlnj/cm/tools.cm + ext.sml diff --git a/ml-lpt/ml-ulex/tool/ml-ulex-tool.cm b/ml-lpt/ml-ulex/tool/ml-ulex-tool.cm new file mode 100644 index 0000000..8c64f29 --- /dev/null +++ b/ml-lpt/ml-ulex/tool/ml-ulex-tool.cm @@ -0,0 +1,10 @@ +(* + * The plugin library for ML-ULex + * + * (C) 2007 The Fellowship of SML/NJ + *) +Library + structure ULexTool +is + $smlnj/cm/tools.cm + tool.sml diff --git a/ml-lpt/ml-ulex/tool/mllex-tool.cm b/ml-lpt/ml-ulex/tool/mllex-tool.cm new file mode 100644 index 0000000..74f0d73 --- /dev/null +++ b/ml-lpt/ml-ulex/tool/mllex-tool.cm @@ -0,0 +1,13 @@ +(* + * Plugin for the "mllex" CM tool class that causes "legacy" ml-lex + * input to be processed by "ml-ulex --ml-lex-mode". + * + * (C) 2007 The Fellowship of SML/NJ + * + * Author: Matthias Blume (blume@tti-c.org) + *) +Library + structure ULexMLLexTool +is + $smlnj/cm/tools.cm + mllex-tool.sml diff --git a/ml-lpt/ml-ulex/tool/mllex-tool.sml b/ml-lpt/ml-ulex/tool/mllex-tool.sml new file mode 100644 index 0000000..bd5dffc --- /dev/null +++ b/ml-lpt/ml-ulex/tool/mllex-tool.sml @@ -0,0 +1,18 @@ +(* mllex-tool.sml + * + * Plugin for the "mllex" CM tool class that causes "legacy" ml-lex + * input to be processed by "ml-ulex --ml-lex-mode". + * + * Copyright (c) 2007 by The Fellowship of SML/NJ + * + * Author: Matthias Blume (blume@tti-c.org) + *) +structure ULexMLLexTool = struct + val _ = Tools.registerStdShellCmdTool + { tool = "ULex-ML-Lex", + class = "mllex", + cmdStdPath = fn () => ("ml-ulex", ["--ml-lex-mode"]), + template = NONE, + extensionStyle = Tools.EXTEND [("sml", SOME "sml", fn too => too)], + dflopts = [] } +end diff --git a/ml-lpt/ml-ulex/tool/tool.sml b/ml-lpt/ml-ulex/tool/tool.sml new file mode 100644 index 0000000..bb33a81 --- /dev/null +++ b/ml-lpt/ml-ulex/tool/tool.sml @@ -0,0 +1,16 @@ +(* + * Running ML-ULex from CM. + * + * (C) 2007 The Fellowship of SML/NJ. + *) +structure ULexTool = struct + val _ = Tools.registerStdShellCmdTool { + tool = "ML-ULex", + class = "ml-ulex", + cmdStdPath = fn () => ("ml-ulex", []), + template = NONE, + extensionStyle = + Tools.EXTEND [("sml", SOME "sml", fn too => too)], + dflopts = [] + } +end diff --git a/ml-yacc.tgz b/ml-yacc.tgz new file mode 100644 index 0000000..e911d35 Binary files /dev/null and b/ml-yacc.tgz differ diff --git a/ml-yacc/COPYRIGHT b/ml-yacc/COPYRIGHT new file mode 100644 index 0000000..e17ff99 --- /dev/null +++ b/ml-yacc/COPYRIGHT @@ -0,0 +1,20 @@ +ML-YACC COPYRIGHT NOTICE, LICENSE AND DISCLAIMER. + +Copyright 1989, 1990 by David R. Tarditi Jr. and Andrew W. Appel + +Permission to use, copy, modify, and distribute this software and its +documentation for any purpose and without fee is hereby granted, +provided that the above copyright notice appear in all copies and that +both the copyright notice and this permission notice and warranty +disclaimer appear in supporting documentation, and that the names of +David R. Tarditi Jr. and Andrew W. Appel not be used in advertising +or publicity pertaining to distribution of the software without +specific, written prior permission. + +David R. Tarditi Jr. and Andrew W. Appel disclaim all warranties with regard to +this software, including all implied warranties of merchantability and fitness. +In no event shall David R. Tarditi Jr. and Andrew W. Appel be liable for any +special, indirect or consequential damages or any damages whatsoever resulting +from loss of use, data or profits, whether in an action of contract, negligence +or other tortious action, arising out of or in connection with the use or +performance of this software. diff --git a/ml-yacc/INSTALL b/ml-yacc/INSTALL new file mode 100644 index 0000000..f2ab8e7 --- /dev/null +++ b/ml-yacc/INSTALL @@ -0,0 +1,26 @@ +Installation instructions for ML-Yacc +------------------------------------- + +ML-Yacc will normally be automatically +installed as part of the SML/NJ system +by the SML/NJ installer. + +To install by hand (e.g., if you make +your own modifications), run the + + ./build + +script in this directory and then move +the file + + src/ml-yacc.$ARCH-$OS + +to the heap-file directory. + +Running ./build requires a properly +functioning installation of SML/NJ. +If you make modifications to lexer or +parser, be sure to also have properly +functioning instances of ml-lex and +ml-yacc installed before you run +./build. diff --git a/ml-yacc/README b/ml-yacc/README new file mode 100644 index 0000000..3af60ab --- /dev/null +++ b/ml-yacc/README @@ -0,0 +1,23 @@ +Copyright (c) 1989, 1990, 1991 Andrew W. Appel and David R. Tarditi Jr. + +This directory contains ML-Yacc, an LALR parser generator for Standard ML. +ML-Yacc is distributed subject to the terms of the accompanying ML-Yacc +copyright notice, license, and disclaimer in the file COPYRIGHT. + +Files of interest (those marked with a * must be built by the person +installing ML-Yacc): + + README - this file + INSTALL - installation instructions. + COPYRIGHT - this software is distributed subject to the + terms of this file. + lib - implementation of the ML-Yacc library + (aka $/ml-yacc-lib.cm); this library is used + by both by applications and by ML-Yacc itself + (because ML-Yacc IS an ML-Yacc application) + src - source code for the parser-generator part of ML-Yacc. + doc - documentation for ML-Yacc. Please read this before + using ML-Yacc + examples - sample parsers built with ML-Yacc + build - script that invokes ../../bin/ml-build to construct + the stand-alone version of ML-Yacc diff --git a/ml-yacc/build.bat b/ml-yacc/build.bat new file mode 100644 index 0000000..026462d --- /dev/null +++ b/ml-yacc/build.bat @@ -0,0 +1,2 @@ +cd src +..\..\bin\ml-build.bat -D NO_ML_YACC -D NO_ML_LEX ml-yacc.cm ExportParseGen.parseGen ml-yacc diff --git a/ml-yacc/build.sh b/ml-yacc/build.sh new file mode 100755 index 0000000..efb1833 --- /dev/null +++ b/ml-yacc/build.sh @@ -0,0 +1,50 @@ +#!/bin/sh +# +# Copyright (c) 2018 The Fellowship of SML/NJ (https://smlnj.org) +# +# build script for ml-yacc under the new runtime system. +# +# options: +# -o image -- specify the name of the heap image, "ml-yacc" +# is the default. + +CMD=$0 + +ROOT="ml-yacc" +HEAP_IMAGE="" +SMLNJROOT=`pwd`/.. +BIN=${INSTALLDIR:-$SMLNJROOT}/bin +BUILD=$BIN/ml-build +SIZE_OPT="-32" + +# +# process command-line options +# +while [ "$#" != "0" ] ; do + arg=$1 + shift + case $arg in + -32) SIZE_OPT=$arg ;; + -64) SIZE_OPT=$arg ;; + -o) + if [ "$#" = "0" ]; then + echo "$CMD: must supply image name for -o option" + exit 1 + fi + HEAP_IMAGE=$1; shift + ;; + *) + echo $CMD: invalid argument: $arg + exit 1 + ;; + esac +done + +if [ "$HEAP_IMAGE" = "" ]; then + HEAP_IMAGE="$ROOT" +fi + +cd src +"$BUILD" $SIZE_OPT -DNO_ML_YACC -DNO_ML_LEX ml-yacc.cm ExportParseGen.parseGen "$HEAP_IMAGE" + +exit 0 diff --git a/ml-yacc/doc/mlyacc.tex b/ml-yacc/doc/mlyacc.tex new file mode 100644 index 0000000..e26acf4 --- /dev/null +++ b/ml-yacc/doc/mlyacc.tex @@ -0,0 +1,1622 @@ +\documentstyle{article} +\title{ ML-Yacc User's Manual \\ + Version 2.4 + } +\author{ David R. Tarditi$^1$\\ + Andrew W. Appel$^2$\\ +\\ +$^1$Microsoft Research \\ +\\ +$^2$Department of Computer Science \\ + Princeton University \\ + Princeton, NJ 08544 +} +\date{April 24, 2000} + +\begin{document} +\maketitle +\begin{center} +(c) 1989, 1990, 1991,1994 Andrew W. Appel, David R. Tarditi +\end{center} + +{\bf +This software comes with ABSOLUTELY NO WARRANTY. It is subject only to +the terms of the ML-Yacc NOTICE, LICENSE, and DISCLAIMER (in the +file COPYRIGHT distributed with this software). +} + +New in this version: Improved error correction directive \verb|%change| +that allows multi-token insertions, deletions, substitutions. +Explanation of how to build a parser (Section 5) and the Calc example +(Section 7) revised for SML/NJ Version 110 and the use of CM. + +\newpage +\tableofcontents +\newpage + +\section{Introduction} +\subsection{General} +ML-Yacc is a parser generator for Standard ML modeled after the +Yacc parser generator. It generates parsers for LALR languages, like Yacc, +and has a similar syntax. The generated parsers use a different algorithm +for recovering from syntax errors than parsers generated by Yacc. +The algorithm is a partial implementation of an algorithm described in \cite{bf}. +A parser tries to recover from a syntax error +by making a single token insertion, deletion, or +substitution near the point in the input stream at which the error +was detected. The parsers delay the evaluation of semantic actions until +parses are completed successfully. This makes it possible for +parsers to recover from syntax errors that occur before the point +of error detection, but it does prevent the parsers from +affecting lexers in any significant way. The parsers +can insert tokens with values and substitute tokens with values +for other tokens. All symbols carry left and right position values +which are available to semantic actions and are used in +syntactic error messages. + +ML-Yacc uses context-free grammars to specify the syntax of languages to +be parsed. See \cite{ahu} for definitions and information on context-free +grammars and LR parsing. We briefly review some terminology here. A +context-free grammar is defined by a set of terminals $T$, a set of +nonterminals $NT$, a set of productions $P$, and a start +nonterminal $S$. +Terminals are interchangeably referred to as tokens. The terminal +and nonterminal sets are assumed to be disjoint. The set of symbols is the +union of the nonterminal and terminal sets. We use lower case +Greek letters to denote a string of symbols. We use upper case +Roman letters near the beginning of the alphabet to denote nonterminals. +Each production gives a +derivation of a string of symbols from a nonterminal, which we will +write as $A \rightarrow \alpha$. We define a relation between strings of +symbols $\alpha$ and $\beta$, written $\alpha \vdash \beta$ and read +as $\alpha$ derives $\beta$, if and only if $\alpha = \delta A \gamma$, +$\beta = \delta \phi \gamma$ and +there exists some production $A \rightarrow \phi$. We write the +transitive closure of this relation as +$\vdash_*$. We say that a string of terminals $\alpha$ is a valid sentence +of the language, {\em i.e.} it is derivable, if the start symbol +$S \vdash_* \alpha$. The sequence of derivations is often +visualized as a parse tree. + +ML-Yacc uses an attribute grammar scheme with synthesized attributes. +Each symbol in the grammar may have a value (i.e. attribute) associated +with it. Each production has a semantic action associated with it. +A production with a semantic action is called a rule. +Parsers perform bottom-up, left-to-right evaluations of parse trees using semantic +actions to compute values as they do so. Given a production +$P = A \rightarrow \alpha$, the corresponding semantic action is +used to compute a value for $A$ from the values of the symbols in $\alpha$. +If $A$ has no value, the semantic action is still evaluated but the value is ignored. +Each parse returns the value associated with the start symbol $S$ of the +grammar. A parse returns a nullary value if the start symbol does not carry a value. + +The synthesized attribute scheme can be adapted easily to inherited +attributes. An inherited attribute is a value which propagates from +a nonterminal to the symbols produced by the nonterminal according to +some rule. Since functions are values in ML, +the semantic actions for the derived symbols +can return functions which takes the +inherited value as an argument. + +\subsection{Modules} +ML-Yacc uses the ML modules facility to specify the interface between +a parser that it generates and a lexical analyzer that must be supplied +by you. It also uses the ML modules facility to factor out +a set of modules that are common to every generated parser. +These common modules include a parsing structure, which contains +an error-correcting LR parser\footnote{A plain LR parser is also +available.}, an LR table structure, and a structure +which defines the representation of terminals. ML-Yacc produces +a functor for a particular parser parameterized by the LR table +structure and the representation of terminals. This functor +contains values specific to the parser, such as the +LR table for the parser\footnote{The LR table is a value. The +LR table structure defines an abstract LR table type.}, the +semantic actions for the parser, and a structure containing +the terminals for the parser. ML-Yacc produces a signature +for the structure produced by applying this functor +and another signature for the structure containing the terminals for +the parser. You must +supply a functor for the lexing module parameterized this +structure. + +Figure 1 is a dependency diagram of the modules that summarizes this +information. A module at the head of an arrow is dependent +on the module at the tail. + +\begin{figure} +\begin{tabular}{|rcl|} +\hline +parsing structure & $\longrightarrow$ & values for a particular parser\\ +values for a particular parser & $\longrightarrow$ & lexical analyzer\\ +parsing structure, & $\longrightarrow$ & particular parser\\ +values for a particular parser, & & \\ +lexical analyzer & & \\ +\hline +\end{tabular} +\caption{Module Dependencies} +\end{figure} + +\subsection{Error Recovery} + +The error recovery algorithm is able to accurately recover from many +single token syntax errors. It tries to make a single token +correction at the token in the input stream at which the syntax error +was detected and any of the 15 tokens\footnote{An arbitrary number +chosen because numbers above this do not seem to improve error +correction much.} before that token. The algorithm checks corrections +before the point of error detection because a syntax error is often +not detected until several tokens beyond the token which caused the +error.\footnote{An LR parser detects a syntax error as soon as +possible, but this does not necessarily mean that the token at which +the error was detected caused the error.} + +The algorithm works by trying corrections at each +of the 16 tokens up to and including the token at which the +error was detected. At each token in the input stream, it +will try deleting the token, substituting other tokens for the +token, or inserting some other token before the token. + +The algorithm uses a parse check to evaluate corrections. A parse +check is a check of how far a correction allows a parser to +parse without encountering a syntax error. +You pass an upper bound on how many tokens beyond the error +point a parser may read while doing a parse check as an argument to the +parser. This allows +you to control the amount of lookahead that a parser reads +for different kinds of systems. For an interactive system, you +should set the lookahead to zero. Otherwise, a parser may hang +waiting for input in the case of a syntax error. If the lookahead +is zero, no syntax errors will be corrected. For a batch system, +you should set the lookahead to 15. + +The algorithm selects the set of corrections which allows the parse +to proceed the farthest +and parse through at least the error token. It then removes those +corrections involving keywords which do not meet a longer minimum +parse check. If there is more than one correction possible after this, +it uses a simple heuristic priority scheme to order the corrections, +and then arbitrarily chooses one of the corrections with the highest priority. +You have some control over the priority scheme by being able to +name a set of preferred insertions and a set of preferred substitutions. +The priorities for corrections, ordered from highest to lowest +priority, are +preferred insertions, preferred substitutions, insertions, deletions, +and substitutions. + +The error recovery algorithm is guaranteed to terminate since it always +selects fixes which parse through the +error token. + +The error-correcting LR parser implements the algorithm by keeping +a queue of its state stacks before shifting tokens and using +a lazy stream for the lexer. +This makes it possible to restart the +parse from before an error point and try various corrections. The +error-correcting LR parser does not defer semantic actions. Instead, +ML-Yacc creates semantic actions which are free of side-effects +and always terminate. +ML-Yacc uses higher-order functions to defer the +evaluation of all user semantic actions until the parse is successfully +completed without constructing an explicit parse tree. +You may declare whether your semantic actions are free of side-effects +and always terminate, in which case ML-Yacc does not need to defer +the evaluation of your semantic actions. + +\subsection{Precedence} +ML-Yacc uses the same precedence scheme as Yacc for resolving +shift/reduce conflicts. Each terminal may be assigned a precedence and +associativity. Each rule is then assigned the precedence of its rightmost +terminal. If a shift/reduce conflict occurs, the conflict is resolved +silently if the terminal and the rule in the conflict have +precedences. +If the terminal has the higher precedence, the shift is chosen. If +the rule has the higher precedence, the reduction is chosen. If both +the terminal and the rule have the same precedence, then the associativity +of the terminal is used to resolve the conflict. If the terminal is +left associative, the reduction is chosen. If the terminal is +right associative, the shift is chosen. Terminals may be declared to +be nonassociative, also, in which case an error message is produced +if the associativity is need to resolve the parsing conflict. + +If a terminal or a rule in a shift/reduce conflict does not have +a precedence, then an error message is produced and the shift +is chosen. + +In reduce/reduce conflicts, an error message is always produced and +the first rule listed in the specification is chosen for reduction. +\subsection{Notation} + +Text surrounded by brackets denotes meta-notation. If you see +something like \{parser name\}, you should substitute the actual +name of your parser for the meta-notation. Text in a bold-face +typewriter font ({\tt like this}) denotes text in a specification +or ML code. + +\section{ML-Yacc specifications} + +An ML-Yacc specification consists of three parts, each of which is +separated from the others by a {\tt \%\%} delimiter. The general format is: +\begin{quote} +\tt + \{user declarations\} \\ + \%\% \\ + \{ML-Yacc declarations\} \\ + \%\% \\ + \{rules\} +\end{quote} + +You can define values available in the semantic +actions of the rules in the user declarations section. +It is recommended that you keep the size of this +section as small as possible and place large +blocks of code in other modules. + +The ML-Yacc declarations section is used to make a set +of required declarations and a set of optional declarations. +You must declare the nonterminals and terminals and the +types of the values associated with them there. You must +also name the parser and declare the type of position values. +You should specify the set of terminals which can follow +the start symbol and the set of non-shiftable terminals. +You may optionally declare precedences for terminals, +make declarations that will +improve error-recovery, and suppress the generation of +default reductions in the parser. You may +declare whether the parser generator should create +a verbose description of the parser in a ``.desc'' file. This is useful +for finding the causes of shift/reduce errors and other parsing conflicts. + +You may also declare whether the semantic actions are +free of significant side-effects and always terminate. Normally, ML-Yacc +delays the evaluation of semantic actions until the completion of a +successful parse. This ensures that there will be no semantic actions +to ``undo'' if a syntactic error-correction invalidates some semantic +actions. If, however, the semantic actions are free of significant +side-effects and always terminate, the results of semantic actions that +are invalidated by a syntactic error-correction can always be safely +ignored. + +Parsers run faster and need less memory when it is not +necessary to delay the evaluation of semantic actions. You are +encouraged to write semantic actions that are free of side-effects and +always terminate and to declare this information to ML-Yacc. + +A semantic action is free of significant side-effects if it can be reexecuted +a reasonably small number of times without affecting the result of a +parse. (The reexecution occurs when the error-correcting parser is testing +possible corrections to fix a syntax error, and the number of times +reexecution occurs is roughly bounded, for each syntax error, by the number of +terminals times the amount of lookahead permitted for the error-correcting +parser). + +The rules section contains the context-free grammar productions and their +associated semantic actions. + +\subsection{Lexical Definitions} + +Comments have the same lexical definition as they do in Standard +ML and can be placed anywhere in a specification. + +All characters up to the first occurrence of a delimiting +{\tt \%\%} outside of +a comment are placed in the user declarations section. After that, the +following words and symbols are reserved: +\begin{quote} + +\verb'of for = { } , * -> : | ( )' + +\end{quote} + +The following classes of ML symbols are used: +\begin{quote} +\begin{description} +\item[identifiers:] + nonsymbolic ML identifiers, which consist + of an alphabetic character followed by one or + more alphabetic characters, numeric characters, + primes ``{\tt '}'', or underscores ``{\tt \_}''. +\item[type variables:] + nonsymbolic ML identifier starting with a prime ``{\tt '}'' +\item[integers:] one or more decimal digits. +\item[qualified identifiers:] an identifer followed by a period. + +\end{description} +\end{quote} +The following classes of non-ML symbols are used: +\begin{quote} +\begin{description} +\item[\% identifiers:] + a percent sign followed by one or more lowercase + alphabet letters. The valid \% identifiers + are: +\begin{quote} +\raggedright +\tt + \%arg \%eop \%header \%keyword \%left \%name \%nodefault + \%nonassoc \%nonterm \%noshift \%pos \%prec \%prefer + \%pure \%right \%start \%subst \%term \%value \%verbose +\end{quote} +\item[code:] + This class is meant to hold ML code. The ML code is not + parsed for syntax errors. It consists of a left parenthesis + followed by all characters up to a balancing right + parenthesis. Parentheses in ML comments and ML strings + are excluded from the count of balancing parentheses. + +\end{description} +\end{quote} + +\subsection{Grammar} + +This is the grammar for specifications: +\begin{eqnarray*} +\mbox{spec} & ::= & \mbox{user-declarations {\tt \%\%} cmd-list {\tt \%\%} rule-list} \\ +\mbox{ML-type} & ::= & \mbox{nonpolymorphic ML types (see the Standard ML manual)} \\ +\mbox{symbol} & ::= & \mbox{identifier} \\ +\mbox{symbol-list} & ::= & \mbox{symbol-list symbol} \\ + & | & \epsilon \\ +\mbox{symbol-type-list} & ::= & \mbox{symbol-type-list {\tt |} symbol {\tt of} ML-type} \\ + & | & \mbox{symbol-type list {\tt |} symbol} \\ + & | & \mbox{symbol {\tt of} ML-type} \\ + & | & \mbox{symbol} \\ +\mbox{subst-list} & ::= & \mbox{subst-list {\tt |} symbol {\tt for} symbol} \\ + & | & \epsilon \\ +\mbox{cmd} & ::= & \mbox{{\tt \%arg} (Any-ML-pattern) {\tt :} ML-type} \\ + & | & \mbox{{\tt \%eop} symbol-list} \\ + & | & \mbox{{\tt \%header} code} \\ + & | & \mbox{{\tt \%keyword} symbol-list} \\ + & | & \mbox{{\tt \%left} symbol-list} \\ + & | & \mbox{{\tt \%name} identifier} \\ + & | & \mbox{{\tt \%nodefault}} \\ + & | & \mbox{{\tt \%nonassoc} symbol-list} \\ + & | & \mbox{{\tt \%nonterm} symbol-type list} \\ + & | & \mbox{{\tt \%noshift} symbol-list } \\ + & | & \mbox{{\tt \%pos} ML-type} \\ + & | & \mbox{{\tt \%prefer} symbol-list} \\ + & | & \mbox{\tt \%pure} \\ + & | & \mbox{{\tt \%right} symbol-list} \\ + & | & \mbox{{\tt \%start} symbol} \\ + & | & \mbox{{\tt \%subst} subst-list} \\ + & | & \mbox{{\tt \%term} symbol-type-list} \\ + & | & \mbox{{\tt \%value} symbol code} \\ + & | & \mbox{{\tt \%verbose}} \\ +\mbox{cmd-list} & ::= &\mbox{ cmd-list cmd} \\ + & | & \mbox{cmd} \\ +\mbox{rule-prec} & ::= & \mbox{{\tt \%prec} symbol} \\ + & | & \epsilon \\ +\mbox{clause-list} & ::= & \mbox{symbol-list rule-prec code} \\ + & | & \mbox{clause-list {\tt |} symbol-list rule-prec code} \\ +\mbox{rule} & ::= & \mbox{symbol {\tt :} clause-list} \\ +\mbox{rule-list} & ::= & \mbox{rule-list rule} \\ + & | & \mbox{rule} +\end{eqnarray*} +\subsection{Required ML-Yacc Declarations} +\begin{description} +\item[{\tt \%name}] +You must specify the name of the parser with {\tt \%name} \{name\}. +\item[{\tt \%nonterm} and {\tt \%term}] +You must define the terminal and nonterminal sets using the +{\tt \%term} and {\tt \%nonterm} +declarations, respectively. These declarations are like an ML datatype +definition. +The type of the value that a symbol may carry is defined at the same time +that the symbol is defined. Each declarations consists of the keyword +({\tt \%term} or {\tt \%nonterm}) +followed by a list of symbol entries separated by a bar (``{\tt |}''). +Each symbol entry is a symbol name followed by an optional +``of \/ $<$ML-type$>$''. The types cannot be polymorphic. +Those symbol entries without a type carry no value. +Nonterminal and terminal names must be disjoint and no name may be declared +more than once in either declaration. + +The symbol names and types are used to construct a datatype union for the +values on the semantic stack in the LR parser and to name the values +associated with subcomponents of a rule. The names and types of +terminals are also used to construct a signature for a structure that +may be passed to the lexer functor. + +Because the types and names are used in these manners, do +not use ML keywords as symbol names. The programs produced by ML-Yacc +will not compile if ML keywords are used as symbol names. +Make sure that the types specified in the {\tt \%term} declaration are +fully qualified types or are available in the background +environment when the signatures produced by ML-Yacc are loaded. Do +not use any locally defined types from the user declarations section of +the specification. + +These requirements on the types in the {\tt \%term} declaration are not +a burden. +They force the types to be defined in another module, +which is a good idea since these types will +be used in the lexer module. +\item[{\tt \%pos}] +You must declare the type of position values using the {\tt \%pos} declaration. +The syntax is {\tt \%pos} $<$ML-type$>$. +This type MUST be the same type as that which is actually found in the lexer. +It cannot be polymorphic. + +\end{description} +\subsection{Optional ML-Yacc Declarations} +\label{optional-def} +\begin{description} +\item[{\tt \%arg}] +You may want each invocation of the entire parser to be parameterized +by a particular argument, such as the file-name of the input +being parsed in an invocation of the parser. The {\tt \%arg} declaration +allows you to specify such an argument. +(This is often cleaner than using ``global'' reference variables.) +The declaration +\begin{quote} + + {\tt \%arg} (Any-ML-pattern) {\tt :} $<$ML-type$>$ + +\end{quote} +specifies the argument to the parser, as well as its type. For example: +\begin{quote} + + {\tt \%arg (filename) : string} + +\end{quote} + +If {\tt \%arg} is not specified, it defaults to {\tt () : unit}. +\item[{\tt \%eop} and {\tt \%noshift}] +You should specify the set of +terminals that may follow the start +symbol, also called end-of-parse symbols, using the {\tt \%eop} +declaration. The {\tt \%eop} keyword should be followed by the list of +terminals. This is useful, for example, in an interactive system +where you want to force the evaluation of a statement before an +end-of-file (remember, a parser delays the execution of semantic +actions until a parse is successful). + +ML-Yacc has no concept of an end-of-file. You must +define an end-of-file terminal (EOF, perhaps) in the +{\tt \%term} declaration. +You must declare terminals which cannot be shifted, such as +end-of-file, in the {\tt \%noshift} declaration. The +{\tt \%noshift} keyword should be followed by the list of non-shiftable +terminals. An error message will be printed if a non-shiftable terminal +is found on the right hand side of any rule, but ML-Yacc will not prevent +you from using such grammars. + +It is important to emphasize that +\begin{em} +non-shiftable terminals must be declared. +\end{em} +The error-correcting parser may attempt to read past such terminals +while evaluating a correction to a syntax error otherwise. This may +confuse the lexer. +\item[{\tt \%header}] +You may define code to head the functor \{parser name\}LrValsFun here. This +may be useful for adding additonal parameter structures to the functor. +The functor must be parameterized by the Token structure, so +the declaration should always have the form: +\begin{quote} +\begin{verbatim} +%header (functor {parser name}LrValsFun( + structure Token : TOKEN + ...) + ) +\end{verbatim} +\end{quote} + +\item[{\tt \%left},{\tt \%right},{\tt \%nonassoc}] +You should list the precedence declarations in order of increasing (tighter-binding) +precedence. Each precedence declaration consists +of \% keyword specifying associativity followed by a list of terminals. +The keywords are {\tt \%left}, {\tt \%right}, and {\tt \%nonassoc}, +standing for their respective associativities. +\item[{\tt \%nodefault}] +The {\tt \%nodefault} declaration suppresses the generation of default +reductions. If only one production can be reduced in a given state in +an LR table, it may be made the default action for the state. An incorrect +reduction will be caught later when the parser attempts to shift the lookahead +terminal which caused the reduction. ML-Yacc usually produces programs and +verbose files with default reductions. This saves a great deal of +space in representing the LR tables,but +sometimes it is useful for debugging and advanced +uses of the parser to suppress the generation of default reductions. +\item[{\tt \%pure}] +Include the {\tt \%pure} declaration if the semantic actions +are free of significant side-effects and always terminate. +\item[{\tt \%start}] +You may define the start symbol using +the {\tt \%start} declaration. Otherwise the nonterminal for the +first rule will be used as the start nonterminal. +The keyword {\tt \%start} should be followed by the name of the starting +nonterminal. This nonterminal should not be used on the right hand +side of any rules, to avoid conflicts between reducing to the start +symbol and shifting a terminal. ML-Yacc will not prevent you +from using such grammars, but it will print a warning message. +\item[{\tt \%verbose}] + +Include the {\tt \%verbose} declaration to produce a verbose +description of the LALR parser. The name of this file is +the name of the specification file with a ``.desc'' appended to it. + + This file has the following format: +\begin{enumerate} + +\item A summary of errors found while generating the LALR tables. +\item A detailed description of all errors. +\item A description of the states of the parser. Each state + is preceded by a list of conflicts in the state. + +\end{enumerate} +\end{description} + +\subsection{Declarations for improving error-recovery} + +These optional declarations improve error-recovery: + +\begin{description} +\item[{\tt \%keyword}] + Specify all keywords in a grammar here. The {\tt \%keyword} + should be followed by a list + of terminal names. Fixes involving keywords are generally dangerous; + they are prone to substantially altering the syntactic meaning + of the program. They are subject to a more rigorous parse check than + other fixes. + +\item[{\tt \%prefer}] + List terminals to prefer for insertion after the {\tt \%prefer}. +Corrections which insert a terminal on this list will be chosen over +other corrections, all other things being equal. +\item[{\tt \%subst}] + This declaration should be followed by a list of clauses of the + form \{terminal\} {\tt for} \{terminal\}, where items on the list are + separated using a {\tt |}. Substitution corrections on this list +will be chosen over all other corrections except preferred insertion +corrections (listed above), all other things being equal. +\item[{\tt \%change}] + This is a generalization of {\tt \%prefer} and {\tt \%subst}. +It takes a the following syntax: +\begin{quote} +${\it tokens}_{1a}$ \verb|->| ${\it tokens}_{1b}$ \verb+|+ ${\it tokens}_{2a}$ \verb|->| ${\it tokens}_{2b}$ {\it etc.} +\end{quote} +where each {\it tokens} is a (possibly empty) seqence of tokens. The +idea is that any instance of ${\it tokens}_{1a}$ can be ``corrected'' to +${\it tokens}_{1b}$, and so on. For example, to suggest that a good +error correction to try is \verb|IN ID END| (which is useful for the +ML parser), write, +\begin{verbatim} + %change -> IN ID END +\end{verbatim} +\item[{\tt \%value}] + The error-correction algorithm may also insert terminals with values. + You must supply a value for such a terminal. The keyword + should be followed by a terminal and a piece of + code (enclosed in parentheses) that when evaluated supplies the value. + There must be a separate {\tt \%value} declaration for each terminal with + a value that you wish may be inserted or substituted in an error correction. + The code for the value is not evaluated until the parse is + successful. + + Do not specify a {\tt \%value} for terminals without + values. This will result in a type error in the program produced by + ML-Yacc. +\end{description} + +\subsection{Rules} + +All rules are declared in the final section, after the last {\tt \%\%} +delimiter. A rule consists of a left hand side nonterminal, followed by +a colon, followed by a list of right hand side clauses. + +The right hand side clauses should be separated by bars (``{\tt |}''). Each +clause consists of a list of nonterminal and terminal symbols, followed +by an optional {\tt \%prec} declaration, and then followed by the code to be +evaluated when the rule is reduced. + +The optional {\tt \%prec} consists of the keyword {\tt \%prec} followed by a +terminal whose precedence should be used as the precedence of the +rule. + +The values of those symbols on the right hand side which have values are +available inside the code. Positions for all the symbols are also +available. +Each value has the general form \{symbol name\}\{n+1\}, where \{n\} is the +number of occurrences of the symbol to the left of the symbol. If +the symbol occurs only once in the rule, \{symbol name\} may also +be used. +The positions are given by \{symbol~name\}\{n+1\}left and +\{symbol~name\}\{n+1\}right. where \{n\} is defined as before. +The position for a null rhs of +a production is assumed to be the leftmost position of the lookahead +terminal which is causing the reduction. This position value is +available in {\tt defaultPos}. + +The value to which the code evaluates is used as the value of the +nonterminal. The type of the value and the nonterminal must match. +The value is ignored if the nonterminal has no value, but is still +evaluated for side-effects. + +\section{Producing files with ML-Yacc} + +ML-Yacc may be used from the interactive system or built as a +stand-alone program which may be run from the Unix command line. +See the file {\bf README} in the mlyacc directory for directions +on installing ML-Yacc. We recommend thaat ML-Yacc be installed as +a stand-alone program. + +If you are using the stand-alone version of ML-Yacc, invoke the +program ``sml-yacc'' with the name of the specifcation file. +If you are using ML-Yacc in the interactive system, load the file +``smlyacc.sml''. The end result is a structure ParseGen, with one +value parseGen in it. Apply parseGen to a string containing the +name of the specification file. + +Two files will be created, one named by +attaching ``.sig'' to the name of the specification, the other named by +attaching ``.sml'' to the name of the specification. + +\section{The lexical analyzer} + +Let the name for +the parser given in the {\tt \%name} declaration be denoted by \{n\} and +the specification file name be denoted by \{spec name\} +The parser generator creates a functor named \{n\}LrValsFun for +the values needed for a particular parser. This functor is placed +in \{spec name\}.sml. This +functor contains a structure +Tokens which allows you to construct terminals from the appropriate +values. The structure has a function for each terminal that takes a tuple +consisting of the value for the terminal (if there is any), a leftmost +position for the terminal, and a rightmost position for the terminal and +constructs the terminal from these values. + +A signature for the structure Tokens is created and placed in the ``.sig'' +file created by ML-Yacc. This signature is \{n\}\_TOKENS, + where \{n\} is +the name given in the parser specification. A signature +\{n\}\_LRVALS is created for the structure produced by +applying \{n\}LrValsFun. + +Use the signature \{n\}\_TOKENS to create a functor for the +lexical analyzer which takes the structure Tokens as an argument. The +signature \{n\}\_TOKENS +will not change unless the {\tt \%term} declaration in a +specification is altered by adding terminals or +changing the types of terminals. You do not need to recompile +the lexical analyzer functor each time the specification for +the parser is changed if the +signature \{n\}\_TOKENS does not change. + +If you are using ML-Lex to create the lexical analyzer, you +can turn the lexer structure into a functor using the +{\tt \%header} declaration. +{\tt \%header} allows the user to define the header for a structure body. + +If the name of the parser in the specification were Calc, you +would add this declaration to the specification for the lexical +analyzer: +\begin{quote} +\tt +\begin{verbatim} +%header (functor CalcLexFun(structure Tokens : Calc_TOKENS)) +\end{verbatim} +\end{quote} + +You must define the following in the user definitions section: +\begin{quote} +\tt +\begin{verbatim} +type pos +\end{verbatim} +\end{quote} +This is the type of position values for terminals. This type +must be the same as the one declared in the specification for +the grammar. Note, however, that this type is not available +in the Tokens structure that parameterizes the lexer functor. + +You must include the following code in the user definitions section of +the ML-Lex specification: +\begin{quote} +\tt +\begin{verbatim} +type svalue = Tokens.svalue +type ('a,'b) token = ('a,'b) Tokens.token +type lexresult = (svalue,pos) token +\end{verbatim} +\end{quote} + +These types are used to give lexers signatures. + +You may use a lexer constructed using ML-Lex with the {\tt \%arg} +declaration, but you must follow special instructions for tying the parser +and lexer together. + +\section{Creating the parser} +\label{create-parser} +Let the name of the grammar specification file be denoted by +\{grammar\} and the name of the lexer specification file be +denoted by \{lexer\} (e.g. in our calculator example these would +stand for calc.grm and calc.lex, respectively). +Let the parser name in the specification be represented by \{n\} +(e.g. Calc in our calculator example). + +To construct a parser, do the following: +\begin{enumerate} +\item In the appropriate CM description file (e.g. for your main +program or one of its subgroups or libraries), include the lines: +\begin{quote} +\begin{verbatim} +ml-yacc-lib.cm +{lexer} +{grammar} +\end{verbatim} +\end{quote} +This will cause ML-Yacc to be run on \{grammar\}, producing source files +\{grammar\}.sig and \{grammar\}.sml, and ML-Lex to be run on +\{lexer\}, producing a source file \{lexer\}.sml. Then these files +will be compiled after loading the necessary signatures and modules +from the ML-Yacc library as specified by {\tt ml-yacc-lib.cm}. +\item Apply functors to create the parser: +\begin{quote} +\begin{verbatim} +structure {n}LrVals = + {n}LrValsFun(structure Token = LrParser.Token) +structure {n}Lex = + {n}LexFun(structure Tokens = {n}LrVals.Tokens) +structure {n}Parser= + Join(structure ParserData = {n}LrVals.ParserData + structure Lex={n}Lex + structure LrParser=LrParser) +\end{verbatim} +\end{quote} +If the lexer was created using the {\tt \%arg} declaration in ML-Lex, +the definition of \{n\}Parser must be changed to use another functor +called JoinWithArg: +\begin{quote} +\begin{verbatim} +structure {n}Parser= + JoinWithArg + (structure ParserData={n}LrVals.ParserData + structure Lex={n}Lex + structure LrParser=LrParser) +\end{verbatim} +\end{quote} +\end{enumerate} + +The following outline summarizes this process: +\begin{quote} +\begin{verbatim} +(* available at top level *) + +TOKEN +LR_TABLE +STREAM +LR_PARSER +PARSER_DATA +structure LrParser : LR_PARSER + +(* printed out in .sig file created by parser generator: *) + +signature {n}_TOKENS = +sig + structure Token : TOKEN + type svalue + val PLUS : 'pos * 'pos -> + (svalue,'pos) Token.token + val INTLIT : int * 'pos * 'pos -> + (svalue,'pos) Token.token + ... +end + +signature {n}_LRVALS = +sig + structure Tokens : {n}_TOKENS + structure ParserData : PARSER_DATA + sharing ParserData.Token = Tokens.Token + sharing type ParserData.svalue = Tokens.svalue +end + +(* printed out by lexer generator: *) + +functor {n}LexFun(structure Tokens : {n}_TOKENS)= +struct + ... +end + +(* printed out in .sml file created by parser generator: *) + +functor {n}LrValsFun(structure Token : TOKENS) = +struct + + structure ParserData = + struct + structure Token = Token + + (* code in header section of specification *) + + structure Header = ... + type svalue = ... + type result = ... + type pos = ... + structure Actions = ... + structure EC = ... + val table = ... + end + + structure Tokens : {n}_TOKENS = + struct + structure Token = ParserData.Token + type svalue = ... + fun PLUS(p1,p2) = ... + fun INTLIT(i,p1,p2) = ... + end + +end + +(* to be done by the user: *) + +structure {n}LrVals = + {n}LrValsFun(structure Token = LrParser.Token) + +structure {n}Lex = + {n}LexFun(structure Tokens = {n}LrVals.Tokens) + +structure {n}Parser = + Join(structure Lex = {n}Lex + structure ParserData = {n}ParserData + structure LrParser = LrParser) +\end{verbatim} +\end{quote} + +\section{Using the parser} +\subsection{Parser Structure Signatures} +The final structure created will have the signature PARSER: +\begin{quote} +\begin{verbatim} +signature PARSER = +sig + structure Token : TOKEN + structure Stream : STREAM + exception ParseError + + type pos (* pos is the type of line numbers *) + type result (* value returned by the parser *) + type arg (* type of the user-supplied argument *) + type svalue (* the types of semantic values *) + + val makeLexer : (int -> string) -> + (svalue,pos) Token.token Stream.stream + val parse : + int * ((svalue,pos) Token.token Stream.stream) * + (string * pos * pos -> unit) * arg -> + result * (svalue,pos) Token.token Stream.stream + val sameToken : + (svalue,pos) Token.token * (svalue,pos) Token.token -> + bool +end +\end{verbatim} +\end{quote} +or the signature ARG\_PARSER if you used {\tt \%arg} to create the lexer. +This signature differs from ARG\_PARSER in that it +which has an additional type {\tt lexarg} and a different type +for {\tt makeLexer}: +\begin{quote} +\begin{verbatim} +type lexarg +val makeLexer : (int -> string) -> lexarg -> + (svalue,pos) token stream +\end{verbatim} +\end{quote} + +The signature STREAM (providing lazy streams) is: +\begin{quote} +\begin{verbatim} +signature STREAM = +sig + type 'a stream + val streamify : (unit -> 'a) -> 'a stream + val cons : 'a * 'a stream -> 'a stream + val get : 'a stream -> 'a * 'a stream +end +\end{verbatim} +\end{quote} + +\subsection{Using the parser structure} + +The parser structure converts the lexing function produced by +ML-Lex into a function which creates a lazy stream of tokens. The +function {\tt makeLexer} takes the same values as the corresponding +{\tt makeLexer} created by ML-Lex, but returns a stream of tokens +instead of a function which yields tokens. + +The function parse takes the token stream and some other arguments that +are described below and parses the token stream. It returns a pair composed +of the value associated with the start symbol and the rest of +the token stream. The rest of the token stream includes the +end-of-parse symbol which caused the reduction of some rule +to the start symbol. The function parse raises the +exception ParseError if a syntax error occurs which it cannot fix. + +The lazy stream is implemented by the {\tt Stream} structure. +The function {\tt streamify} converts a conventional implementation +of a stream into a lazy stream. In a conventional implementation +of a stream, a stream consists of a position in a list of +values. Fetching a value from a stream returns the +value associated with the position and updates the position to +the next element in the list of values. The fetch is a side-effecting +operation. In a lazy stream, a fetch returns a value and a new +stream, without a side-effect which updates the position value. +This means that a stream can be repeatedly re-evaluated without +affecting the values that it returns. If $f$ is the function +that is passed to {\tt streamify}, $f$ is called only as many +times as necessary to construct the portion of the list of values +that is actually used. + +Parse also takes an integer giving the maximum amount of lookahead permitted +for the error-correcting parse, a function to print error messages, +and a value of type arg. The maximum amount of lookahead for interactive +systems should be zero. In this case, no attempt is made to correct any +syntax errors. For non-interactive systems, try 15. The +function to print error messages takes a tuple of values consisting +of the left and right positions of the terminal which caused the error +and an error message. If the {\tt \%arg} declaration is not used, the +value of type arg should be a value of type unit. + +The function sameToken can be used to see if two tokens +denote the same terminal, irregardless of any values that the +tokens carry. It is useful if you have multiple end-of-parse +symbols and must check which end-of-parse symbol has been left on the +front of the token stream. + +The types have the following meanings. The type {\tt arg} is the type +of the additional argument to the parser, which is specified by the +{\tt \%arg} declaration in the ML-Yacc specification. The type +{\tt lexarg} is the optional argument to lexers, and is specified by +the {\tt \%arg} declaration in an ML-Lex specifcation. The type {\tt pos} +is the type of line numbers, and is specified by the {\tt \%pos} declaration +in an ML-Yacc specification and defined in the user declarations +section of the ML-Lex specification. The type {\tt result} is +the type associated with the start symbol in the ML-Yacc specification. + +\section{Examples} + +See the directory examples for examples of parsers constructed using +ML-Yacc. Here is a small sample parser and lexer for an interactive +calculator, from the directory examples/calc, along with code for +creating a parsing function. The calculator reads one or more +expressions from the standard input, evaluates the expressions, and +prints their values. Expressions should be separated by semicolons, +and may also be ended by using an end-of-file. This shows how to +construct an interactive parser which reads a top-level declaration +and processes the declaration before reading the next top-level +declaration. + +\subsection{Sample Grammar} +\begin{tt} +\begin{verbatim} +(* Sample interactive calculator for ML-Yacc *) + +fun lookup "bogus" = 10000 + | lookup s = 0 + +%% + +%eop EOF SEMI + +(* %pos declares the type of positions for terminals. + Each symbol has an associated left and right position. *) + +%pos int + +%left SUB PLUS +%left TIMES DIV +%right CARAT + +%term ID of string | NUM of int | PLUS | TIMES | PRINT | + SEMI | EOF | CARAT | DIV | SUB +%nonterm EXP of int | START of int option + +%name Calc + +%subst PRINT for ID +%prefer PLUS TIMES DIV SUB +%keyword PRINT SEMI + +%noshift EOF +%value ID ("bogus") +%nodefault +%verbose +%% + +(* the parser returns the value associated with the expression *) + + START : PRINT EXP (print EXP; + print "\n"; + flush_out std_out; SOME EXP) + | EXP (SOME EXP) + | (NONE) + EXP : NUM (NUM) + | ID (lookup ID) + | EXP PLUS EXP (EXP1+EXP2) + | EXP TIMES EXP (EXP1*EXP2) + | EXP DIV EXP (EXP1 div EXP2) + | EXP SUB EXP (EXP1-EXP2) + | EXP CARAT EXP (let fun e (m,0) = 1 + | e (m,l) = m*e(m,l-1) + in e (EXP1,EXP2) + end) +\end{verbatim} +\end{tt} +\subsection{Sample Lexer} +\begin{tt} +\begin{verbatim} +structure Tokens = Tokens + +type pos = int +type svalue = Tokens.svalue +type ('a,'b) token = ('a,'b) Tokens.token +type lexresult= (svalue,pos) token + +val pos = ref 0 +val eof = fn () => Tokens.EOF(!pos,!pos) +val error = fn (e,l : int,_) => + output(std_out,"line " ^ (makestring l) ^ + ": " ^ e ^ "\n") +%% +%header (functor CalcLexFun(structure Tokens: Calc_TOKENS)); +alpha=[A-Za-z]; +digit=[0-9]; +ws = [\ \t]; +%% +\n => (pos := (!pos) + 1; lex()); +{ws}+ => (lex()); +{digit}+ => (Tokens.NUM + (revfold (fn (a,r) => ord(a)-ord("0")+10*r) + (explode yytext) 0, + !pos,!pos)); +"+" => (Tokens.PLUS(!pos,!pos)); +"*" => (Tokens.TIMES(!pos,!pos)); +";" => (Tokens.SEMI(!pos,!pos)); +{alpha}+ => (if yytext="print" + then Tokens.PRINT(!pos,!pos) + else Tokens.ID(yytext,!pos,!pos) + ); +"-" => (Tokens.SUB(!pos,!pos)); +"^" => (Tokens.CARAT(!pos,!pos)); +"/" => (Tokens.DIV(!pos,!pos)); +"." => (error ("ignoring bad character "^yytext,!pos,!pos); + lex()); +\end{verbatim} +\end{tt} +\subsection{Top-level code} + +You must follow the instructions in Section~\ref{create-parser} +to create the parser and lexer functors and load them. After you have +done this, you must then apply the functors to produce the {\tt CalcParser} +structure. The code for doing this is shown below. +\begin{quote} +\begin{verbatim} +structure CalcLrVals = + CalcLrValsFun(structure Token = LrParser.Token) + +structure CalcLex = + CalcLexFun(structure Tokens = CalcLrVals.Tokens); + +structure CalcParser = + Join(structure LrParser = LrParser + structure ParserData = CalcLrVals.ParserData + structure Lex = CalcLex) +\end{verbatim} +\end{quote} + +Now we need a function which given a lexer invokes the parser. The +function {\tt invoke} does this. + +\begin{quote} +\begin{verbatim} +fun invoke lexstream = + let fun print_error (s,i:int,_) = + TextIO.output(TextIO.stdOut, + "Error, line " ^ (Int.toString i) ^ ", " ^ s ^ "\n") + in CalcParser.parse(0,lexstream,print_error,()) + end +\end{verbatim} +\end{quote} + +Finally, we need a function which can read one or more expressions from +the standard input. The function {\tt parse}, shown below, does this. +It runs the calculator on the standard input and terminates +when an end-of-file is encountered. + +\begin{quote} +\begin{verbatim} +fun parse () = + let val lexer = CalcParser.makeLexer + (fn _ => TextIO.inputLine TextIO.stdIn) + val dummyEOF = CalcLrVals.Tokens.EOF(0,0) + val dummySEMI = CalcLrVals.Tokens.SEMI(0,0) + fun loop lexer = + let val (result,lexer) = invoke lexer + val (nextToken,lexer) = CalcParser.Stream.get lexer + in case result + of SOME r => + TextIO.output(TextIO.stdOut, + "result = " ^ (Int.toString r) ^ "\n") + | NONE => (); + if CalcParser.sameToken(nextToken,dummyEOF) then () + else loop lexer + end + in loop lexer + end +\end{verbatim} +\end{quote} + +\section{Signatures} + +This section contains signatures used by ML-Yacc for structures in +the file base.sml, functors and structures that it generates, and for +the signatures of lexer structures supplied by you. + +\subsection{Parsing structure signatures} + +\begin{quote} +\begin{verbatim} +(* STREAM: signature for a lazy stream.*) + +signature STREAM = +sig + type 'a stream + val streamify : (unit -> 'a) -> 'a stream + val cons : 'a * 'a stream -> 'a stream + val get : 'a stream -> 'a * 'a stream +end + +(* LR_TABLE: signature for an LR Table.*) + +signature LR_TABLE = +sig + datatype ('a,'b) pairlist + = EMPTY + | PAIR of 'a * 'b * ('a,'b) pairlist + datatype state = STATE of int + datatype term = T of int + datatype nonterm = NT of int + datatype action = SHIFT of state + | REDUCE of int + | ACCEPT + | ERROR + type table + + val numStates : table -> int + val numRules : table -> int + val describeActions : table -> state -> + (term,action) pairlist * action + val describeGoto : table -> state -> + (nonterm,state) pairlist + val action : table -> state * term -> action + val goto : table -> state * nonterm -> state + val initialState : table -> state + exception Goto of state * nonterm + + val mkLrTable : + {actions : ((term,action) pairlist * action) array, + gotos : (nonterm,state) pairlist array, + numStates : int, numRules : int, + initialState : state} -> table +end + +(* TOKEN: signature for the internal structure of a token.*) + +signature TOKEN = +sig + structure LrTable : LR_TABLE + datatype ('a,'b) token = TOKEN of LrTable.term * + ('a * 'b * 'b) + val sameToken : ('a,'b) token * ('a,'b) token -> bool +end + +(* LR_PARSER: signature for a polymorphic LR parser *) + +signature LR_PARSER = +sig + structure Stream: STREAM + structure LrTable : LR_TABLE + structure Token : TOKEN + + sharing LrTable = Token.LrTable + + exception ParseError + + val parse: + {table : LrTable.table, + lexer : ('b,'c) Token.token Stream.stream, + arg: 'arg, + saction : int * + 'c * + (LrTable.state * ('b * 'c * 'c)) list * + 'arg -> + LrTable.nonterm * + ('b * 'c * 'c) * + ((LrTable.state *('b * 'c * 'c)) list), + void : 'b, + ec: {is_keyword : LrTable.term -> bool, + noShift : LrTable.term -> bool, + preferred_subst:LrTable.term -> LrTable.term list, + preferred_insert : LrTable.term -> bool, + errtermvalue : LrTable.term -> 'b, + showTerminal : LrTable.term -> string, + terms: LrTable.term list, + error : string * 'c * 'c -> unit + }, + lookahead : int (* max amount of lookahead used in + * error correction *) + } -> 'b * (('b,'c) Token.token Stream.stream) +end +\end{verbatim} +\end{quote} + +\subsection{Lexers} + +Lexers for use with ML-Yacc's output must match one of these signatures. + +\begin{quote} +\begin{verbatim} +signature LEXER = +sig + structure UserDeclarations : + sig + type ('a,'b) token + type pos + type svalue + end + val makeLexer : (int -> string) -> unit -> + (UserDeclarations.svalue, UserDeclarations.pos) + UserDeclarations.token +end + +(* ARG_LEXER: the %arg option of ML-Lex allows users to + produce lexers which also take an argument before + yielding a function from unit to a token. +*) + +signature ARG_LEXER = +sig + structure UserDeclarations : + sig + type ('a,'b) token + type pos + type svalue + type arg + end + val makeLexer : + (int -> string) -> + UserDeclarations.arg -> + unit -> + (UserDeclarations.svalue, UserDeclarations.pos) + UserDeclarations.token +end +\end{verbatim} +\end{quote} + +\subsection{Signatures for the functor produced by ML-Yacc} + +The following signature is used in signatures generated by +ML-Yacc: +\begin{quote} +\begin{verbatim} +(* PARSER_DATA: the signature of ParserData structures in + {n}LrValsFun functor produced by ML-Yacc. All such + structures match this signature. *) + +signature PARSER_DATA = +sig + type pos (* the type of line numbers *) + type svalue (* the type of semantic values *) + type arg (* the type of the user-supplied *) + (* argument to the parser *) + type result + + structure LrTable : LR_TABLE + structure Token : TOKEN + sharing Token.LrTable = LrTable + + structure Actions : + sig + val actions : int * pos * + (LrTable.state * (svalue * pos * pos)) list * arg -> + LrTable.nonterm * (svalue * pos * pos) * + ((LrTable.state *(svalue * pos * pos)) list) + val void : svalue + val extract : svalue -> result + end + + (* structure EC contains information used to improve + error recovery in an error-correcting parser *) + + structure EC : + sig + val is_keyword : LrTable.term -> bool + val noShift : LrTable.term -> bool + val preferred_subst: LrTable.term -> LrTable.term list + val preferred_insert : LrTable.term -> bool + val errtermvalue : LrTable.term -> svalue + val showTerminal : LrTable.term -> string + val terms: LrTable.term list + end + + (* table is the LR table for the parser *) + + val table : LrTable.table +end +\end{verbatim} +\end{quote} + +ML-Yacc generates these two signatures: +\begin{quote} +\begin{verbatim} +(* printed out in .sig file created by parser generator: *) + +signature {n}_TOKENS = +sig + type ('a,'b) token + type svalue + ... +end + +signature {n}_LRVALS = +sig + structure Tokens : {n}_TOKENS + structure ParserData : PARSER_DATA + sharing type ParserData.Token.token = Tokens.token + sharing type ParserData.svalue = Tokens.svalue +end +\end{verbatim} +\end{quote} +\subsection{User parser signatures} + +Parsers created by applying the Join functor will match this signature: +\begin{quote} +\begin{verbatim} +signature PARSER = +sig + structure Token : TOKEN + structure Stream : STREAM + exception ParseError + + type pos (* pos is the type of line numbers *) + type result (* value returned by the parser *) + type arg (* type of the user-supplied argument *) + type svalue (* the types of semantic values *) + + val makeLexer : (int -> string) -> + (svalue,pos) Token.token Stream.stream + + val parse : + int * ((svalue,pos) Token.token Stream.stream) * + (string * pos * pos -> unit) * arg -> + result * (svalue,pos) Token.token Stream.stream + val sameToken : + (svalue,pos) Token.token * (svalue,pos) Token.token -> + bool +end +\end{verbatim} +\end{quote} +Parsers created by applying the JoinWithArg functor will match this +signature: +\begin{quote} +\begin{verbatim} +signature ARG_PARSER = +sig + structure Token : TOKEN + structure Stream : STREAM + exception ParseError + + type arg + type lexarg + type pos + type result + type svalue + + val makeLexer : (int -> string) -> lexarg -> + (svalue,pos) Token.token Stream.stream + val parse : int * + ((svalue,pos) Token.token Stream.stream) * + (string * pos * pos -> unit) * + arg -> + result * (svalue,pos) Token.token Stream.stream + val sameToken : + (svalue,pos) Token.token * (svalue,pos) Token.token -> + bool +end +\end{verbatim} +\end{quote} + +\section{Sharing constraints} + +Let the name of the parser be denoted by \{n\}. If +you have not created a lexer which takes an argument, and +you have followed the directions given earlier for creating the parser, you +will have the following structures with the following signatures: +\begin{quote} +\begin{verbatim} +(* always present *) + +signature TOKEN +signature LR_TABLE +signature STREAM +signature LR_PARSER +signature PARSER_DATA +structure LrParser : LR_PARSER + +(* signatures generated by ML-Yacc *) + +signature {n}_TOKENS +signature {n}_LRVALS + +(* structures created by you *) + +structure {n}LrVals : {n}_LRVALS +structure Lex : LEXER +structure {n}Parser : PARSER +\end{verbatim} +\end{quote} + +The following sharing constraints will exist: +\begin{quote} +\begin{verbatim} +sharing {n}Parser.Token = LrParser.Token = + {n}LrVals.ParserData.Token +sharing {n}Parser.Stream = LrParser.Stream + +sharing type {n}Parser.arg = {n}LrVals.ParserData.arg +sharing type {n}Parser.result = {n}LrVals.ParserData.result +sharing type {n}Parser.pos = {n}LrVals.ParserData.pos = + Lex.UserDeclarations.pos +sharing type {n}Parser.svalue = {n}LrVals.ParserData.svalue = + {n}LrVals.Tokens.svalue = Lex.UserDeclarations.svalue +sharing type {n}Parser.Token.token = + {n}LrVals.ParserData.Token.token = + LrParser.Token.token = + Lex.UserDeclarations.token + +sharing {n}LrVals.LrTable = LrParser.LrTable + +\end{verbatim} +\end{quote} + +If you used a lexer which takes an argument, then you will +have: +\begin{quote} +\begin{verbatim} +structure ARG_LEXER +structure {n}Parser : PARSER + +(* additional sharing constraint *) + +sharing type {n}Parser.lexarg = Lex.UserDeclarations.arg +\end{verbatim} +\end{quote} + +\section{Hints} +\subsection{Multiple start symbols} +To have multiple start symbols, define a dummy token for each +start symbol. Then define a start symbol which derives the +multiple start symbols with dummy tokens placed in front of +them. When you start the parser you must place a dummy token +on the front of the lexer stream to select a start symbol +from which to begin parsing. + +Assuming that you have followed the naming conventions used before, +create the lexer using the makeLexer function in the \{n\}Parser structure. +Then, place the dummy token on the front of the lexer: +\begin{quote} +\begin{verbatim} +val dummyLexer = + {n}Parser.Stream.cons + ({n}LrVals.Tokens.{dummy token name} + ({dummy lineno},{dummy lineno}), + lexer) +\end{verbatim} +\end{quote} +You have to pass a Tokens structure to the lexer. This Tokens structure +contains functions which construct tokens from values and line numbers. +So to create your dummy token just apply the appropriate token constructor +function from this Tokens structure to a value (if there is one) and the +line numbers. This is exactly what you do in the lexer to construct tokens. + +Then you must place the dummy token on the front of your lex stream. +The structure \{n\}Parser contains a structure Stream which implements +lazy streams. So you just cons the dummy token on to stream returned +by makeLexer. +\subsection{Functorizing things further} + +You may wish to functorize things even further. Two possibilities +are turning the lexer and parser structures into closed functors, +that is, functors which do not refer to types or values defined +outside their body or outside their parameter structures (except +for pervasive types and values), and creating a functor which +encapsulates the code necessary to invoke the parser. + +Use the {\tt \%header} declarations in ML-Lex and ML-Yacc to create +closed functors. See section~\ref{optional-def} of this manual +and section 4 of the manual for ML-Lex for complete descriptions of these +declarations. If you do this, you should also parameterize these +structures by the types of line numbers. The type will be an +abstract type, so you will also need to define all the valid +operations on the type. The signature INTERFACE, defined below, +shows one possible signature for a structure defining the line +number type and associated operations. + +If you wish to encapsulate the code necessary to invoke the +parser, your functor generally will have form: +\begin{quote} +\begin{verbatim} +functor Encapsulate( + structure Parser : PARSER + structure Interface : INTERFACE + sharing type Parser.arg = Interface.arg + sharing type Parser.pos = Interface.pos + sharing type Parser.result = ... + structure Tokens : {parser name}_TOKENS + sharing type Tokens.token = Parser.Token.token + sharing type Tokens.svalue = Parser.svalue) = + struct + ... + end +\end{verbatim} +\end{quote} + +The signature INTERFACE, defined below, is a possible signature for +a structure +defining the types +of line numbers and arguments (types pos and arg, respectively) +along with operations for them. You need this structure +because +these types will be abstract types inside the body of your +functor. +\begin{quote} +\begin{verbatim} +signature INTERFACE = +sig + type pos + val line : pos ref + val reset : unit -> unit + val next : unit -> unit + val error : string * pos * pos -> unit + + type arg + val nothing : arg +end +\end{verbatim} +\end{quote} + +The directory example/fol contains a sample parser in which +the code for tying together the lexer and parser has been +encapsulated in a functor. + +\section{Acknowledgements} + +Nick Rothwell wrote an SLR table generator in 1988 which inspired the +initial work on an ML parser generator. Bruce Duba and David +MacQueen made useful suggestions about the design of the error-correcting +parser. Thanks go to all the users at Carnegie Mellon who beta-tested +this version. Their comments and questions led to the creation of +this manual and helped improve it. + +\section{Bugs} + +There is a slight difference in syntax between ML-Lex and ML-Yacc. +In ML-Lex, semantic actions must be followed by a semicolon but +in ML-Yacc semantic actions cannot be followed by a semicolon. +The syntax should be the same. ML-Lex also produces structures with +two different signatures, but it should produce structures with just +one signature. This would simplify some things. + +\begin{thebibliography}{9} + +\bibitem{bf} ``A Practical Method for LR and LL Syntactic Error +Diagnosis and Recovery'', M. Burke and G. Fisher, +ACM Transactions on Programming Languages and +Systems, Vol. 9, No. 2, April 1987, pp. 164-167. +\bibitem{ahu} A. Aho, R. Sethi, J. Ullman, {\em Compilers: Principles, +Techniques, and Tools}, Addison-Wesley, Reading, MA, 1986. + +\end{thebibliography} + +\end{document} diff --git a/ml-yacc/doc/tech.doc b/ml-yacc/doc/tech.doc new file mode 100644 index 0000000..fcb03ed --- /dev/null +++ b/ml-yacc/doc/tech.doc @@ -0,0 +1,252 @@ +A Hacker's guide ML-Yacc itself + +The program for computing the LALR(1) table can be divided into 3 separate +parts. The first part computes the LR(0) graph. The second part attaches +lookahead to the LR(0) graph to get the LALR(1) graph. The third part +computes the parse tables from the LALR(1) graph. + +Look at the file sigs.sml to see how the modules are layed out. +The file graph.sml contains the Graph functor, which produces a structure +containing a function mkGraph. mkGraph takes a grammar and returns a +some useful values and functions, including the LR(0) graph. It renumbers +the rules to an internal form to make the LR(0) graph generation more +efficient. The LR(0) graph includes only core items in its set of items. + +The file look.sml takes some of theses values and produces functions +which tell whether a nonterm is nullable and the first set of a symbol +list. + +The functor mkLalr creates a structure with a function that takes an LR(0) +graph and some other values (notably the first and nullable) functions +produced by Look and creates a stripped down version of an LR(0) graph with +lookaheads attached. Nullable items (which usually aren't core items) are +added and all other items without dots at the end (i.e. non-reduction items) +are removed. + +The functor MkTable produces a function with takes the LR(0) graph +produced by the function in mkGraph and the LR(0) graph with lookaheads +produced by Lalr and creates an LALR(1) table from these graphs. + + +----------------------------------------------------------------------- +An overview of the algorithms used in LR(0) graph generation and +LALR(1) lookahead creation. + +LR(0) graph +----------- + +The LR(0) graph consists of sets of items. Each set of items will be +called a core set. The basic algorithm is: + + let fun add_gotos(graph,f,nil,r) = (graph,r) + | add_gotos(graph,f,(a,symbol)::b,r) + let newgraph = graph + edge from f to a labelled + with symbol + in if a exists in graph then + add_gotos(newgraph,f,b,r) + else add_gotos(newgraph,f,b,a::r) + end + fun f(graph,nil) = graph + | f(graph,a::b) = f(add_gotos(graph,a,gotos of closure a,b)) + in f(empty-graph,[initial core set]) + end + +For each core, we compute the new cores which result from doing a shift +or goto, and then add these new cores with the symbol used in the shift +or goto to the graph. We continue doing this until there are no more cores +to adds to the graph. + +We have to take the closure of a core to include those items which are +derived from nonterminals with a dot before them. If item A -> 'a .B 'c +is in a core, the all productions derived by B must also be in the core. + +We want to be able to do the following operations efficently: + (1) check if a core is in the graph already + (2) compute the closure of a core + (3) compute the cores resulting from goto/shift operations. + +(1) This can be done efficiently if a complete order exists for the cores. This +can be done by imposing an ordering on items, giving each item a unique +integer and using the place in an item. This can be used to order a +set of items. + +(2) Much of the computation for the closure can be done ahead of time. +The set of nonterminals to add for a given a nonterminal can be pre-computed +using a transitive closure algorithm (the transitive closure is sparse +in practice). One can then compute the closure for a core in the following +manner. First, compute the set of nonterminals with . in front of them. +This can be done in (m ln m) time. Next, use the results from the +transitive closure to compute the complete set of nonterminals that +should be used. Finally, for each nonterminal, merge its set of +productions (sort all rules by the nonterminals from which they +are derived before numbering them, then all we have to do is just +prepend the rules while scanning the list in reverse order). + +(3) To do this, just scan the core closure, sorting rules by their +symbols into lists. Then reverse all the lists, and we have the +new core sets. + +Lookahead representation +------------------------ + +The previous part throws away the result of the closure operations. +It is used only to compute new cores for use in the goto operation. +These intermediate results should be saved because they will be useful +here. + +Lookaheads are attached to an item when + + (1) an item is the result of a shift/goto. The item + must have the same lookahead as the item from which it + is derived. + (2) an item is added as the result of a closure. Note that + in fact all productions derived from a given nonterminal + are added here. This can be used (perhaps) to our + advantage, as we can represent a closure using just the + nonterminal. + + This can be divided into two cases: + + (a) A -> 'a .B 'c , where 'c derives epsilon, + (b) A -> 'a .B 'c , where 'c does not derive epsilon + + In (a), lookahead(items derived from B) includes first('c) + and lookahead(A -> 'a .B 'c) + + In (b), lookahead(items derived from B) includes only first('c). + + This is an example of back propagation. + + Note that an item is either the result of a closure or the + result of a shift/goto. It is never the result of both (that + would be a contradiction). + + The following representation will be used: + + goto/shift items: + an ordered list of item * lookahead ref * + lookahead ref for the resulting + shift/goto item in another core. + + closure items: + for each nonterminal: + (1) lookahead ref + (2) a list of item * lookahead ref for the + resulting shift/goto item in another + core. + +Lookahead algorithms +-------------------- + +After computing the LR(0) graph, lookaheads must be attached to the items in +the graph. An item i may receive lookaheads in two ways. If item i +was the result of a shift or goto from some item j, then lookahead(i) includes +lookahead(j). If item i is a production of some nonterminal B, and there +exists some item j of the form A -> x .B y, then item i will be added through +closure(j). This implies that lookahead(i) includes first(y). If y => +epsilon, then lookahead(i) includes lookahead(j). + +Lookahead must be recorded for completion items, which are items of the +form A -> x., non-closure items of the form A -> y . B z, where z is +not nullable, and closure items of the form A -> epsilon. (comment: +items of the form A -> .x can appear in the start state as non-closure items. +A must be the start symbol, which should not appear in the right hand side +of any rule. This implies that lookaheads will never be propagated to +such items) + +We chose to omit closure items that do not have the form A -> epsilon. +It is possible to add lookaheads to closure items, but we have not +done so because it would greatly slow down the addition of lookaheads. + +Instead we precompute the nonterminals whose productions are +added through the closure operation, the lookaheads for these +nonterminals, and whether the lookahead for these nonterminals +should include first(y) and lookahead(j) for some item j of the +form A -> x .B y. This information depends only on the particular +nonterminal whose closure is being taken. + +Some notation is necessary to describe what is happening here. Let +=c=> denote items added in one closure step that are derived from some +nonterminal B in a production A -> x .B y. Let =c+=> denote items +added in one or more =c=> steps. + +Consider the following productions + + B -> S ; + S -> E + E -> F * E + F -> num + +in a kernal with the item + + B -> .S + +The following derivations are possible: + +B -> .S =c=> S -> .E =c+=> S -> .E, E -> .F * E, F -> .num + +The nonterminals that are added through the closure operation +are the nonterminals for some item j = A -> .B x such that j =c+=> .C y. +Lookahead(C) includes first(y). If y =*=> epsilon then +lookahead (C) includes first (x). If x=*=> epsilon and y =*=> epsilon +then lookahead(C) includes first(j). + +The following algorithm computes the information for each nonterminal: + + (1) nonterminals such that c =c+=> .C y and y =*=> epsilon + + Let s = the set of nonterminals added through closure = B + + repeat + for all B which are elements of s, + if B -> .C z and z =*=> epsilon then + add B to s. + until s does not change. + + (2) nonterminals added through closure and their lookaheads + + Let s = the set of nonterminals added through closure = B + where A -> x . B y + + repeat + for all B which are elements of s, + if B -> .C z then add C to s, and + add first(z) to lookahead(C) + until nothing changes. + + Now, for each nonterminal A in s, find the set of nonterminals + such that A =c+=> .B z, and z =*=> epsilon (i.e. use the results + from 1). Add the lookahead for nonterminal A to the lookahead + for each nonterminal in this set. + +These algorithms can be restated as either breadth-first or depth-first search +algorithms. The loop invariant of the algorithms is that whenever a +nonterminal is added to the set being calculated, all the productions +for the nonterminal are checked. + +This algorithm computes the lookahead for each item: + + for each state, + for each item of the form A -> u .B v in the state, where u may be + nullable, + let first_v = first(v) + l-ref = ref for A -> u .B v + s = the set of nonterminals added through the closure of B. + + for each element X of s, + + let r = the rules produced by an element X of s + l = the lookahead ref cells for each rule, i.e. + all items of A -> x. or A -> x .B y, where + y =*=> epsilon, and x is not epsilon + + add the lookahead we have computed for X to the + elements of l + + if B =c+=> X z, where z is nullable, add first(y) to + the l. If y =*=> epsilon, save l with the ref for + A -> x .B y in a list. + + Now take the list of (lookahead ref, list of lookahead refs) and propagate + each lookahead ref cell's contents to the elements of the list of lookahead + ref cells associated with it. Iterate until no lookahead set changes. diff --git a/ml-yacc/examples/calc/README b/ml-yacc/examples/calc/README new file mode 100644 index 0000000..7916a63 --- /dev/null +++ b/ml-yacc/examples/calc/README @@ -0,0 +1,46 @@ +This is a sample interactive calculator built using ML-Yacc and ML-Lex. + +The calculator is defined by the files + + calc.lex (* defines lexer *) + calc.grm (* defines grammar *) + calc.sml (* defines driver function, Calc.parse *) + sources.cm (* cm description file *) + +To compile this example, type + + - CM.make "sources.cm"; + +in this directory. CM will invoke ml-lex and ml-yacc to process the +lexer specification calc.lex and the grammar specification calc.grm +respectively. Then it will compile the resulting SML source files + + calc.lex.sml + calc.grm.sig + calc.grm.sml + +and the calc.sml file containing the driver code. + +The end result of loading these files is a structure Calc containing a +top-level driver function named parse. + + Calc.parse : unit -> unit + +The calculator can be invoked by applying Calc.parse to the unit value. + + - Calc.parse(); + 1+3; + result = 4 + +The calculator reads a sequence of expressions from the standard input +and prints the value of each expression after reading the expression. +Expressions must be separated by semicolons. An expression is not +evaluated until the semicolon is encountered. The calculator +terminates when an end-of-file is encountered. There is no attempt to +fix input errors: a lexical error will cause exception LexError to be +raised, while a syntax error will cause ParseError to be raised. + +NOTE: The CM description file sources.cm mentions the ml-yacc library +(ml-yacc-lib.cm). CM's search path should be configured so that this +library will be found. This should normally be the case if SML/NJ is +properly installed. diff --git a/ml-yacc/examples/calc/calc.grm b/ml-yacc/examples/calc/calc.grm new file mode 100644 index 0000000..bb6b6af --- /dev/null +++ b/ml-yacc/examples/calc/calc.grm @@ -0,0 +1,50 @@ +(* Sample interactive calculator for ML-Yacc *) + +fun lookup "bogus" = 10000 + | lookup s = 0 + +%% + +%eop EOF SEMI + +(* %pos declares the type of positions for terminals. + Each symbol has an associated left and right position. *) + +%pos int + +%left SUB PLUS +%left TIMES DIV +%right CARAT + +%term ID of string | NUM of int | PLUS | TIMES | PRINT | + SEMI | EOF | CARAT | DIV | SUB +%nonterm EXP of int | START of int option + +%name Calc + +%subst PRINT for ID +%prefer PLUS TIMES DIV SUB +%keyword PRINT SEMI + +%noshift EOF +%value ID ("bogus") +%verbose +%% + +(* the parser returns the value associated with the expression *) + + START : PRINT EXP (print (Int.toString EXP); + print "\n"; + SOME EXP) + | EXP (SOME EXP) + | (NONE) + EXP : NUM (NUM) + | ID (lookup ID) + | EXP PLUS EXP (EXP1+EXP2) + | EXP TIMES EXP (EXP1*EXP2) + | EXP DIV EXP (EXP1 div EXP2) + | EXP SUB EXP (EXP1-EXP2) + | EXP CARAT EXP (let fun e (m,0) = 1 + | e (m,l) = m*e(m,l-1) + in e (EXP1,EXP2) + end) diff --git a/ml-yacc/examples/calc/calc.lex b/ml-yacc/examples/calc/calc.lex new file mode 100644 index 0000000..0ec2f69 --- /dev/null +++ b/ml-yacc/examples/calc/calc.lex @@ -0,0 +1,37 @@ +structure Tokens = Tokens + +type pos = int +type svalue = Tokens.svalue +type ('a,'b) token = ('a,'b) Tokens.token +type lexresult= (svalue,pos) token + +val pos = ref 0 +fun eof () = Tokens.EOF(!pos,!pos) +fun error (e,l : int,_) = TextIO.output (TextIO.stdOut, String.concat[ + "line ", (Int.toString l), ": ", e, "\n" + ]) + +%% +%header (functor CalcLexFun(structure Tokens: Calc_TOKENS)); +alpha=[A-Za-z]; +digit=[0-9]; +ws = [\ \t]; +%% +\n => (pos := (!pos) + 1; lex()); +{ws}+ => (lex()); +{digit}+ => (Tokens.NUM (valOf (Int.fromString yytext), !pos, !pos)); + +"+" => (Tokens.PLUS(!pos,!pos)); +"*" => (Tokens.TIMES(!pos,!pos)); +";" => (Tokens.SEMI(!pos,!pos)); +{alpha}+ => (if yytext="print" + then Tokens.PRINT(!pos,!pos) + else Tokens.ID(yytext,!pos,!pos) + ); +"-" => (Tokens.SUB(!pos,!pos)); +"^" => (Tokens.CARAT(!pos,!pos)); +"/" => (Tokens.DIV(!pos,!pos)); +"." => (error ("ignoring bad character "^yytext,!pos,!pos); + lex()); + + diff --git a/ml-yacc/examples/calc/calc.sml b/ml-yacc/examples/calc/calc.sml new file mode 100644 index 0000000..9c63d6a --- /dev/null +++ b/ml-yacc/examples/calc/calc.sml @@ -0,0 +1,68 @@ +(* calc.sml *) + +(* This file provides glue code for building the calculator using the + * parser and lexer specified in calc.lex and calc.grm. +*) + +structure Calc : sig + val parse : unit -> unit + end = +struct + +(* + * We apply the functors generated from calc.lex and calc.grm to produce + * the CalcParser structure. + *) + + structure CalcLrVals = + CalcLrValsFun(structure Token = LrParser.Token) + + structure CalcLex = + CalcLexFun(structure Tokens = CalcLrVals.Tokens) + + structure CalcParser = + Join(structure LrParser = LrParser + structure ParserData = CalcLrVals.ParserData + structure Lex = CalcLex) + +(* + * We need a function which given a lexer invokes the parser. The + * function invoke does this. + *) + + fun invoke lexstream = + let fun print_error (s,i:int,_) = + TextIO.output(TextIO.stdOut, + "Error, line " ^ (Int.toString i) ^ ", " ^ s ^ "\n") + in CalcParser.parse(0,lexstream,print_error,()) + end + +(* + * Finally, we need a driver function that reads one or more expressions + * from the standard input. The function parse, shown below, does + * this. It runs the calculator on the standard input and terminates when + * an end-of-file is encountered. + *) + + fun parse () = + let val lexer = CalcParser.makeLexer (fn _ => + (case TextIO.inputLine TextIO.stdIn + of SOME s => s + | _ => "")) + val dummyEOF = CalcLrVals.Tokens.EOF(0,0) + val dummySEMI = CalcLrVals.Tokens.SEMI(0,0) + fun loop lexer = + let val (result,lexer) = invoke lexer + val (nextToken,lexer) = CalcParser.Stream.get lexer + val _ = case result + of SOME r => + TextIO.output(TextIO.stdOut, + "result = " ^ (Int.toString r) ^ "\n") + | NONE => () + in if CalcParser.sameToken(nextToken,dummyEOF) then () + else loop lexer + end + in loop lexer + end + +end (* structure Calc *) diff --git a/ml-yacc/examples/calc/sources.cm b/ml-yacc/examples/calc/sources.cm new file mode 100644 index 0000000..ffc30b9 --- /dev/null +++ b/ml-yacc/examples/calc/sources.cm @@ -0,0 +1,13 @@ +Group is + +#if defined (NEW_CM) + $/basis.cm + $/ml-yacc-lib.cm +#else + ml-yacc-lib.cm +#endif + + calc.grm + calc.lex + calc.sml + diff --git a/ml-yacc/examples/fol/README b/ml-yacc/examples/fol/README new file mode 100644 index 0000000..e7ca0cf --- /dev/null +++ b/ml-yacc/examples/fol/README @@ -0,0 +1,55 @@ +fol/README + +This is a sample parser for first-order logic. The grammar +was contributed by Frank Pfenning. + +The parser is defined by the files + + fol.lex (* defines lexer *) + fol.grm (* defines grammar *) + link.sml (* constructs basic parser structures *) + absyn.sml (* a trivial abstract syntax *) + interface.sml (* interface to lexer and parser properties *) + parse.sml (* driver functions *) + sources.cm (* cm description file *) + +To compile this example, type + + - CM.make "sources.cm"; + +in this directory. CM will invoke ml-lex and ml-yacc to process the +lexer specification calc.lex and the grammar specification calc.grm +respectively. Then it will compile the resulting SML source files + + fol.lex.sml + fol.grm.sig + fol.grm.sml + +and the other sml source files. + +The end result of loading these files is a structure Parse containing +the following top-level driver functions: + + val prog_parse : string -> Absyn.absyn + (* parse a program from a string *) + + val query_parse : string -> Absyn.absyn + (* parse a query from a string *) + + val file_parse : string -> Absyn.absyn + (* parse a program in a file *) + + val top_parse : unit -> Absyn.absyn + (* parse a query from the standard input *) + + +The file list.fol is a sample input file that can be parsed using +the file_parse function: + + - Parse.file_parse "list.fol"; + + +NOTE: The CM description file sources.cm mentions the ml-yacc library +(ml-yacc-lib.cm). CM's search path should be configured so that this +library will be found. This should normally be the case if SML/NJ is +properly installed. diff --git a/ml-yacc/examples/fol/absyn.sml b/ml-yacc/examples/fol/absyn.sml new file mode 100644 index 0000000..6370c62 --- /dev/null +++ b/ml-yacc/examples/fol/absyn.sml @@ -0,0 +1,11 @@ +signature ABSYN = + sig + type absyn + val null : absyn + end + +structure Absyn :> ABSYN = + struct + datatype absyn = NULL + val null = NULL + end \ No newline at end of file diff --git a/ml-yacc/examples/fol/fol.grm b/ml-yacc/examples/fol/fol.grm new file mode 100644 index 0000000..dc67909 --- /dev/null +++ b/ml-yacc/examples/fol/fol.grm @@ -0,0 +1,82 @@ +%% +%header (functor FolLrValsFun (structure Token : TOKEN + structure Absyn : ABSYN ) : Fol_LRVALS) + +%term + EOF | DOT | COMMA | SEMICOLON + | LPAREN | RPAREN + | BACKARROW | DOUBLEARROW + | ARROW | BAR + | TRUE | FORALL | EXISTS + | PARSEPROG | PARSEQUERY + | LCID of string | UCID of string | INT of string + +(* gform: goal formula + dform: definite clause *) + +%nonterm + start of Absyn.absyn + | clause | query | gform | dform + | atom | termlist | term | varbd | id + +%start start +%eop EOF DOT +%pos int +%verbose +%pure + +%right FORALL EXISTS +%left BACKARROW +%right SEMICOLON +%right COMMA +%right DOUBLEARROW +%right ARROW +%left BAR + +%name Fol + +%prefer DOT +%% + +start : PARSEPROG clause (Absyn.null) + | PARSEQUERY query (Absyn.null) + +clause : dform () + | () + +query : gform () + | () + +gform : TRUE () + | gform COMMA gform () (* and *) + | gform SEMICOLON gform () (* disjunction *) + | gform BACKARROW dform () (* implication: dform implies gform *) + | gform ARROW gform BAR gform () (* if-then-else *) + | FORALL varbd gform () (* universal quantification *) + | EXISTS varbd gform () (* existential quantification *) + | atom () (* atomic formula *) + | LPAREN gform RPAREN () + +dform : TRUE () + | dform COMMA dform () (* and *) + | dform BACKARROW gform () (* gform implies dform *) + | FORALL varbd dform () + | atom () + | LPAREN dform RPAREN () + +atom : LCID () + | LCID LPAREN termlist RPAREN () + +termlist : term () + | term COMMA termlist () + +term : id () + | INT () + | LCID LPAREN termlist RPAREN () + +varbd : LCID DOT () + | UCID DOT () + +id : LCID () + | UCID () + diff --git a/ml-yacc/examples/fol/fol.lex b/ml-yacc/examples/fol/fol.lex new file mode 100644 index 0000000..20ab9d1 --- /dev/null +++ b/ml-yacc/examples/fol/fol.lex @@ -0,0 +1,42 @@ +structure Tokens = Tokens +structure Interface = Interface +open Interface + +type pos = Interface.pos +type svalue = Tokens.svalue +type ('a,'b) token = ('a,'b) Tokens.token +type lexresult= (svalue,pos) token + +val eof = fn () => Tokens.EOF(!line,!line) +fun makeInt (s : string) = s + +%% +%header (functor FolLexFun(structure Tokens: Fol_TOKENS + structure Interface: INTERFACE) : LEXER); +lcstart=[a-z!&$+/<=>?@~|#*`]|\-; +ucstart=[A-Z_]; +idchars={lcstart}|{ucstart}|[0-9]; +lcid={lcstart}{idchars}*; +ucid={ucstart}{idchars}*; +ws=[\t\ ]*; +num=[0-9]+; +%% +{ws} => (lex()); +\n => (next_line(); lex()); +":-" => (Tokens.BACKARROW(!line,!line)); +"," => (Tokens.COMMA(!line,!line)); +";" => (Tokens.SEMICOLON(!line,!line)); +"." => (Tokens.DOT(!line,!line)); +"(" => (Tokens.LPAREN(!line,!line)); +")" => (Tokens.RPAREN(!line,!line)); +"->" => (Tokens.ARROW(!line,!line)); +"=>" => (Tokens.DOUBLEARROW(!line,!line)); +"|" => (Tokens.BAR(!line,!line)); +"true" => (Tokens.TRUE(!line,!line)); +"forall" => (Tokens.FORALL(!line,!line)); +"exists" => (Tokens.EXISTS(!line,!line)); +{lcid} => (Tokens.LCID (yytext,!line,!line)); +{ucid} => (Tokens.UCID (yytext,!line,!line)); +{num} => (Tokens.INT (makeInt yytext,!line,!line)); +. => (error ("ignoring illegal character" ^ yytext, + !line,!line); lex()); diff --git a/ml-yacc/examples/fol/interface.sml b/ml-yacc/examples/fol/interface.sml new file mode 100644 index 0000000..28eb8d5 --- /dev/null +++ b/ml-yacc/examples/fol/interface.sml @@ -0,0 +1,31 @@ +(* Externally visible aspects of the lexer and parser *) + +signature INTERFACE = +sig + +type pos +val line : pos ref +val init_line : unit -> unit +val next_line : unit -> unit +val error : string * pos * pos -> unit + +type arg +val nothing : arg + +end (* signature INTERFACE *) + +functor Interface () : INTERFACE = +struct + +type pos = int +val line = ref 0 +fun init_line () = (line := 1) +fun next_line () = (line := !line + 1) +fun error (errmsg,line:pos,_) = + TextIO.output(TextIO.stdOut,"Line " ^ (Int.toString line) ^ ": " ^ errmsg ^ "\n") + +type arg = unit + +val nothing = () + +end (* functor INTERFACE *) diff --git a/ml-yacc/examples/fol/link.sml b/ml-yacc/examples/fol/link.sml new file mode 100644 index 0000000..00037ca --- /dev/null +++ b/ml-yacc/examples/fol/link.sml @@ -0,0 +1,19 @@ +structure FolLrVals : Fol_LRVALS = + FolLrValsFun(structure Token = LrParser.Token + structure Absyn = Absyn); + +structure Interface : INTERFACE = Interface(); +structure FolLex : LEXER = + FolLexFun(structure Tokens = FolLrVals.Tokens + structure Interface = Interface); + +structure FolParser : PARSER = + Join(structure ParserData = FolLrVals.ParserData + structure Lex = FolLex + structure LrParser = LrParser); + +structure Parse : PARSE = + Parse (structure Absyn = Absyn + structure Interface = Interface + structure Parser = FolParser + structure Tokens = FolLrVals.Tokens ); diff --git a/ml-yacc/examples/fol/list.fol b/ml-yacc/examples/fol/list.fol new file mode 100644 index 0000000..c84c9ad --- /dev/null +++ b/ml-yacc/examples/fol/list.fol @@ -0,0 +1,2 @@ +append(nil,K,K). +append(cons(X,L),K,cons(X,M)) :- append(L,K,M). diff --git a/ml-yacc/examples/fol/parse.sml b/ml-yacc/examples/fol/parse.sml new file mode 100644 index 0000000..a1dfdb7 --- /dev/null +++ b/ml-yacc/examples/fol/parse.sml @@ -0,0 +1,82 @@ +(* Uses the generated lexer and parser to export parsing functions + *) + +signature PARSE = +sig + + structure Absyn : ABSYN + +(* parse a program from a string *) + + val prog_parse : string -> Absyn.absyn + +(* parse a query from a string *) + + val query_parse : string -> Absyn.absyn + +(* parse a program in a file *) + + val file_parse : string -> Absyn.absyn + +(* parse a query from the standard input *) + + val top_parse : unit -> Absyn.absyn + +end (* signature PARSE *) + + +functor Parse (structure Absyn : ABSYN + structure Interface : INTERFACE + structure Parser : PARSER + sharing type Parser.arg = Interface.arg + sharing type Parser.pos = Interface.pos + sharing type Parser.result = Absyn.absyn + structure Tokens : Fol_TOKENS + sharing type Tokens.token = Parser.Token.token + sharing type Tokens.svalue = Parser.svalue + ) : PARSE = +struct + +structure Absyn = Absyn + +fun parse (dummyToken,lookahead,reader : int -> string) = + let val _ = Interface.init_line() + val empty = !Interface.line + val dummyEOF = Tokens.EOF(empty,empty) + val dummyTOKEN = dummyToken(empty,empty) + fun invoke lexer = + let val newLexer = Parser.Stream.cons(dummyTOKEN,lexer) + in Parser.parse(lookahead,newLexer,Interface.error, + Interface.nothing) + end + fun loop lexer = + let val (result,lexer) = invoke lexer + val (nextToken,lexer) = Parser.Stream.get lexer + in if Parser.sameToken(nextToken,dummyEOF) then result + else loop lexer + end + in loop (Parser.makeLexer reader) + end + +fun string_reader s = + let val next = ref s + in fn _ => !next before next := "" + end + +fun prog_parse s = parse (Tokens.PARSEPROG,15,string_reader s) + +fun query_parse s = parse (Tokens.PARSEQUERY,15,string_reader s) + +fun file_parse name = + let val dev = TextIO.openIn name + in (parse (Tokens.PARSEPROG,15,fn i => TextIO.inputN(dev,i))) + before TextIO.closeIn dev + end + +fun top_parse () = + parse (Tokens.PARSEQUERY,0,(fn i => + (case TextIO.inputLine TextIO.stdIn + of SOME s => s + | _ => ""))) + +end (* functor Parse *) diff --git a/ml-yacc/examples/fol/sources.cm b/ml-yacc/examples/fol/sources.cm new file mode 100644 index 0000000..b948fd5 --- /dev/null +++ b/ml-yacc/examples/fol/sources.cm @@ -0,0 +1,20 @@ +Group is + +#if defined (NEW_CM) + $/basis.cm + $/ml-yacc-lib.cm +#else + ml-yacc-lib.cm +#endif + + fol.lex + fol.grm + + absyn.sml + + interface.sml + + parse.sml + link.sml + + diff --git a/ml-yacc/examples/pascal/README b/ml-yacc/examples/pascal/README new file mode 100644 index 0000000..b590761 --- /dev/null +++ b/ml-yacc/examples/pascal/README @@ -0,0 +1,32 @@ +This is a grammar for Berkeley Pascal, hacked to be SLR, though that is +not necessary because ML-Yacc supports LALR(1). + +To construct the parser, make this your current directory and run + + CM.make "sources.cm"; + +This will apply ML-Yacc to the file "pascal.grm" to create +the files "pascal.grm.sig" and "pascal.grm.sml", then +ML_Lex will be applied to pascal.lex to produce pascal.lex.sml. + +Then these generated files will be compiled together with necessary +components from the ML-Yacc library supplied by the ml-yacc-lib.cm +file. + +The end result is a structure Parser with two functions. The +function + + parse: string -> + PascalParser.result * + (Parser.PascalParser.svalue,PascalParser.pos) LrParser.Token.token + LrParser.stream + +parses input from a file, while + + keybd: unit -> + Parser.PascalParser.result * + (Parser.PascalParser.svalue,Parser.PascalParser.pos) + LrParser.Token.token LrParser.stream + +parses from the standard input. + diff --git a/ml-yacc/examples/pascal/parser.sml b/ml-yacc/examples/pascal/parser.sml new file mode 100644 index 0000000..72d998c --- /dev/null +++ b/ml-yacc/examples/pascal/parser.sml @@ -0,0 +1,38 @@ +(* parser.sml *) +(* driver for Pascal parser *) + +structure Parser = +struct + +structure PascalLrVals = PascalLrValsFun(structure Token = LrParser.Token) + +structure PascalLex = PascalLexFun(structure Tokens = PascalLrVals.Tokens) + +structure PascalParser = Join(structure Lex= PascalLex + structure LrParser = LrParser + structure ParserData = PascalLrVals.ParserData) + +fun parse s = + let val dev = TextIO.openIn s + val stream = PascalParser.makeLexer(fn i => TextIO.inputN(dev,i)) + fun error (e,i:int,_) = + TextIO.output(TextIO.stdOut, + s ^ "," ^ " line " ^ (Int.toString i) ^ ", Error: " ^ e ^ "\n") + in PascalLex.UserDeclarations.lineNum := 1; + PascalParser.parse(30,stream,error,()) + before TextIO.closeIn dev + end + +fun keybd () = + let val stream = + PascalParser.makeLexer (fn i => (case TextIO.inputLine TextIO.stdIn + of SOME s => s + | _ => "")) + fun error (e,i:int,_) = + TextIO.output(TextIO.stdOut, + "std_in," ^ " line " ^ (Int.toString i) ^ ", Error: " ^ e ^ "\n") + in PascalLex.UserDeclarations.lineNum := 1; + PascalParser.parse(0,stream,error,()) + end + +end (* structure Parser *) diff --git a/ml-yacc/examples/pascal/pascal.grm b/ml-yacc/examples/pascal/pascal.grm new file mode 100644 index 0000000..aff2a3f --- /dev/null +++ b/ml-yacc/examples/pascal/pascal.grm @@ -0,0 +1,244 @@ +%% + +%name Pascal +%term + YAND | YARRAY | YBEGIN | YCASE | + YCONST | YDIV | YDO | YDOTDOT | + YTO | YELSE | YEND | YFILE | + YFOR | YFORWARD | YPROCEDURE | YGOTO | + YID | YIF | YIN | YINT | + YLABEL | YMOD | YNOT | YNUMB | + YOF | YOR | YPACKED | YNIL | + YFUNCTION | YPROG | YRECORD | YREPEAT | + YSET | YSTRING | YTHEN | YDOWNTO | + YTYPE | YUNTIL | YVAR | YWHILE | + YWITH | YBINT | YOCT | YHEX | + YCASELAB | YILLCH | YEXTERN | + YDOT | YLPAR | YRPAR | YSEMI | YCOMMA | YCOLON | YCARET | YLBRA | + YRBRA | YTILDE | + YLESS | YEQUAL | YGREATER +| YPLUS | YMINUS | YBAR +| UNARYSIGN +| YSTAR | YSLASH | YAMP +| EOF + +%eop EOF +%pos int +%pure +%noshift EOF + +%nonassoc YLESS YEQUAL YGREATER YIN +%left YPLUS YMINUS YOR YBAR +%left UNARYSIGN +%left YSTAR YSLASH YDIV YMOD YAND YAMP +%left YNOT + +%nonterm goal | prog_hedr | block | decls | decl | labels | label_decl | +const_decl | type_decl | var_decl | proc_decl | pheadres | phead | +porf | params | param | ftype | param_list | const | number | const_list | +type' | simple_type | struct_type | simple_type_list | field_list | +fixed_part | field | variant_part | variant_list | variant | stat_list | +stat_lsth | cstat_list | cstat | stat | assign | expr | element_list | +element | variable | qual_var | wexpr | octhex | expr_list | wexpr_list | +relop | addop | divop | negop | var_list | id_list | const_id | type_id | +var_id | array_id | ptr_id | record_id | field_id | func_id +| begin + +%keyword + YAND YARRAY YBEGIN YCASE + YCONST YDIV YDO + YTO YELSE YEND YFILE + YFOR YFORWARD YPROCEDURE YGOTO + YIF YIN + YLABEL YMOD YNOT + YOF YOR YPACKED YNIL + YFUNCTION YPROG YRECORD YREPEAT + YSET YSTRING YTHEN YDOWNTO + YTYPE YUNTIL YVAR YWHILE + YWITH YOCT YHEX + YEXTERN YAMP + +%prefer YID YSEMI YCOMMA YLBRA + +%subst YCOMMA for YSEMI | YSEMI for YCOMMA + +%% +begin: goal () +goal: prog_hedr decls block YDOT () +| decls () +prog_hedr: YPROG YID YLPAR id_list YRPAR YSEMI () +| YPROG YID YSEMI () +block: YBEGIN stat_list YEND () +decls: decls decl () +| () +decl: labels () +| const_decl () +| type_decl () +| var_decl () +| proc_decl () +labels: YLABEL label_decl YSEMI () +label_decl: YINT () +| label_decl YCOMMA YINT () +const_decl: YCONST YID YEQUAL const YSEMI () +| const_decl YID YEQUAL const YSEMI () +| YCONST YID YEQUAL YID YSEMI () +| const_decl YID YEQUAL YID YSEMI () +type_decl: YTYPE YID YEQUAL type' YSEMI () +| type_decl YID YEQUAL type' YSEMI () +var_decl: YVAR id_list YCOLON type' YSEMI () +| var_decl id_list YCOLON type' YSEMI () +proc_decl: phead YFORWARD YSEMI () +| phead YEXTERN YSEMI () +| pheadres decls block YSEMI () +pheadres: phead () +phead: porf YID params ftype YSEMI () +porf: YPROCEDURE () +| YFUNCTION () +params: YLPAR param_list YRPAR () +| () +param: id_list YCOLON type' () +| YVAR id_list YCOLON type' () +| YFUNCTION id_list params ftype () +| YPROCEDURE id_list params ftype () +ftype: YCOLON type' () +| () +param_list: param () +| param_list YSEMI param () +const: YSTRING () +| number () +| YPLUS number () +| YMINUS number () +| YPLUS YID () +| YMINUS YID () +number: YINT () +| YBINT () +| YNUMB () +const_list: const () +| const_list YCOMMA const () +| YID () +| const_list YCOMMA YID () +type': simple_type () +| YCARET YID () +| struct_type () +| YPACKED struct_type () +simple_type: type_id () +| YLPAR id_list YRPAR () +| const YDOTDOT const () +| YID YDOTDOT const () +| const YDOTDOT YID () +| YID YDOTDOT YID () +struct_type: YARRAY YLBRA simple_type_list YRBRA YOF type' () +| YFILE YOF type' () +| YSET YOF simple_type () +| YRECORD field_list YEND () +simple_type_list: simple_type () +| simple_type_list YCOMMA simple_type () +field_list: fixed_part variant_part () +fixed_part: field () +| fixed_part YSEMI field () +field: () +| id_list YCOLON type' () +variant_part: () +| YCASE type_id YOF variant_list () +| YCASE YID YCOLON type_id YOF variant_list () +variant_list: variant () +| variant_list YSEMI variant () +variant: () +| const_list YCOLON YLPAR field_list YRPAR () +stat_list: stat () +| stat_lsth stat () +stat_lsth: stat_list YSEMI () +cstat_list: cstat () +| cstat_list YSEMI cstat () +cstat: const_list YCOLON stat () +| YCASELAB stat () +| () +stat: () +| YINT YCOLON stat () +| YID () +| YID YLPAR wexpr_list YRPAR () +| assign () +| YBEGIN stat_list YEND () +| YCASE expr YOF cstat_list YEND () +| YWITH var_list YDO stat () +| YWHILE expr YDO stat () +| YREPEAT stat_list YUNTIL expr () +| YFOR assign YTO expr YDO stat () +| YFOR assign YDOWNTO expr YDO stat () +| YGOTO YINT () +| YIF expr YTHEN stat () +| YIF expr YTHEN stat YELSE stat () +assign: variable YCOLON YEQUAL expr () +| YID YCOLON YEQUAL expr () +expr: expr relop expr %prec YLESS () +| YPLUS expr %prec UNARYSIGN () +| YMINUS expr %prec UNARYSIGN () +| expr addop expr %prec YPLUS () +| expr divop expr %prec YSTAR () +| YNIL () +| YSTRING () +| YINT () +| YBINT () +| YNUMB () +| variable () +| YID () +| YID YLPAR wexpr_list YRPAR () +| YLPAR expr YRPAR () +| negop expr %prec YNOT () +| YLBRA element_list YRBRA () +| YLBRA YRBRA () +element_list: element () +| element_list YCOMMA element () +element: expr () +| expr YDOTDOT expr () +variable: qual_var () +qual_var: YID YLBRA expr_list YRBRA () +| qual_var YLBRA expr_list YRBRA () +| YID YDOT field_id () +| qual_var YDOT field_id () +| YID YCARET () +| qual_var YCARET () +wexpr: expr () +| expr YCOLON expr () +| expr YCOLON expr YCOLON expr () +| expr octhex () +| expr YCOLON expr octhex () +octhex: YOCT () +| YHEX () +expr_list: expr () +| expr_list YCOMMA expr () +wexpr_list: wexpr () +| wexpr_list YCOMMA wexpr () +relop: YEQUAL () +| YLESS () +| YGREATER () +| YLESS YGREATER () +| YLESS YEQUAL () +| YGREATER YEQUAL () +| YIN () +addop: YPLUS () +| YMINUS () +| YOR () +| YBAR () +divop: YSTAR () +| YSLASH () +| YDIV () +| YMOD () +| YAND () +| YAMP () +negop: YNOT () +| YTILDE () +var_list: variable () +| var_list YCOMMA variable () +| YID () +| var_list YCOMMA YID () +id_list: YID () +| id_list YCOMMA YID () +const_id: YID () +type_id: YID () +var_id: YID () +array_id: YID () +ptr_id: YID () +record_id: YID () +field_id: YID () +func_id: YID () diff --git a/ml-yacc/examples/pascal/pascal.lex b/ml-yacc/examples/pascal/pascal.lex new file mode 100644 index 0000000..3dc3efe --- /dev/null +++ b/ml-yacc/examples/pascal/pascal.lex @@ -0,0 +1,139 @@ +structure Tokens = Tokens +type pos = int +type svalue = Tokens.svalue +type ('a,'b) token = ('a,'b) Tokens.token +type lexresult = (svalue,pos) token + +open Tokens + +val lineNum = ref 0 +val eof = fn () => EOF(!lineNum,!lineNum) + + +structure KeyWord : sig + val find : string -> + (int * int -> (svalue,int) token) option + end = + struct + + val TableSize = 211 + val HashFactor = 5 + + val hash = fn s => + foldl (fn (c,v)=>(v*HashFactor+(ord c)) mod TableSize) 0 (explode s) + + + val HashTable = Array.array(TableSize,nil) : + (string * (int * int -> (svalue,int) token)) list Array.array + + + val add = fn (s,v) => + let val i = hash s + in Array.update(HashTable,i,(s,v) :: (Array.sub(HashTable, i))) + end + + val find = fn s => + let val i = hash s + fun f ((key,v)::r) = if s=key then SOME v else f r + | f nil = NONE + in f (Array.sub(HashTable, i)) + end + + val _ = + (List.app add + [("and",YAND), + ("array",YARRAY), + ("begin",YBEGIN), + ("case",YCASE), + ("const",YCONST), + ("div",YDIV), + ("do",YDO), + ("downto",YDOWNTO), + ("else",YELSE), + ("end",YEND), + ("extern",YEXTERN), + ("file",YFILE), + ("for",YFOR), + ("forward",YFORWARD), + ("function",YFUNCTION), + ("goto",YGOTO), + ("hex",YHEX), + ("if",YIF), + ("in",YIN), + ("label",YLABEL), + ("mod",YMOD), + ("nil",YNIL), + ("not",YNOT), + ("oct",YOCT), + ("of",YOF), + ("or",YOR), + ("packed",YPACKED), + ("procedure",YPROCEDURE), + ("program",YPROG), + ("record",YRECORD), + ("repeat",YREPEAT), + ("set",YSET), + ("then",YTHEN), + ("to",YTO), + ("type",YTYPE), + ("until",YUNTIL), + ("var",YVAR), + ("while",YWHILE), + ("with",YWITH) + ]) + end + open KeyWord + +%% + +%header (functor PascalLexFun(structure Tokens : Pascal_TOKENS)); +%s C B; +alpha=[A-Za-z]; +digit=[0-9]; +optsign=("+"|"-")?; +integer={digit}+; +frac="."{digit}+; +exp=(e|E){optsign}{digit}+; +octdigit=[0-7]; +ws = [\ \t]; +%% +{ws}+ => (lex()); +\n+ => (lineNum := (!lineNum) + (String.size yytext); lex()); +{alpha}+ => (case find yytext of SOME v => v(!lineNum,!lineNum) + | _ => YID(!lineNum,!lineNum)); +{alpha}({alpha}|{digit})* => (YID(!lineNum,!lineNum)); +{optsign}{integer}({frac}{exp}?|{frac}?{exp}) => (YNUMB(!lineNum,!lineNum)); +{optsign}{integer} => (YINT(!lineNum,!lineNum)); +{octdigit}+(b|B) => (YBINT(!lineNum,!lineNum)); +"'"([^']|"''")*"'" => (YSTRING(!lineNum,!lineNum)); +"(*" => (YYBEGIN C; lex()); +".." => (YDOTDOT(!lineNum,!lineNum)); +"." => (YDOT(!lineNum,!lineNum)); +"(" => (YLPAR(!lineNum,!lineNum)); +")" => (YRPAR(!lineNum,!lineNum)); +";" => (YSEMI(!lineNum,!lineNum)); +"," => (YCOMMA(!lineNum,!lineNum)); +":" => (YCOLON(!lineNum,!lineNum)); +"^" => (YCARET(!lineNum,!lineNum)); +"[" => (YLBRA(!lineNum,!lineNum)); +"]" => (YRBRA(!lineNum,!lineNum)); +"~" => (YTILDE(!lineNum,!lineNum)); +"<" => (YLESS(!lineNum,!lineNum)); +"=" => (YEQUAL(!lineNum,!lineNum)); +">" => (YGREATER(!lineNum,!lineNum)); +"+" => (YPLUS(!lineNum,!lineNum)); +"-" => (YMINUS(!lineNum,!lineNum)); +"|" => (YBAR(!lineNum,!lineNum)); +"*" => (YSTAR(!lineNum,!lineNum)); +"/" => (YSLASH(!lineNum,!lineNum)); +"{" => (YYBEGIN B; lex()); +. => (YILLCH(!lineNum,!lineNum)); +\n+ => (lineNum := (!lineNum) + (String.size yytext); lex()); +[^()*\n]+ => (lex()); +"(*" => (lex()); +"*)" => (YYBEGIN INITIAL; lex()); +[*()] => (lex()); +\n+ => (lineNum := (!lineNum) + (String.size yytext); lex()); +[^{}\n]+ => (lex()); +"{" => (lex()); +"}" => (YYBEGIN INITIAL; lex()); diff --git a/ml-yacc/examples/pascal/sources.cm b/ml-yacc/examples/pascal/sources.cm new file mode 100644 index 0000000..f5d469c --- /dev/null +++ b/ml-yacc/examples/pascal/sources.cm @@ -0,0 +1,14 @@ +Group is + +#if defined (NEW_CM) + $/basis.cm + $/ml-yacc-lib.cm +#else + ml-yacc-lib.cm +#endif + + pascal.grm + pascal.lex + + parser.sml + diff --git a/ml-yacc/examples/pascal/test/README b/ml-yacc/examples/pascal/test/README new file mode 100644 index 0000000..4efd63e --- /dev/null +++ b/ml-yacc/examples/pascal/test/README @@ -0,0 +1,2 @@ +Test files for the error-correcting parser. Files beginning with c +are correct. Those beginning with t have simple syntax errors. diff --git a/ml-yacc/examples/pascal/test/c1.p b/ml-yacc/examples/pascal/test/c1.p new file mode 100644 index 0000000..2c4687d --- /dev/null +++ b/ml-yacc/examples/pascal/test/c1.p @@ -0,0 +1,272 @@ +program simplex(input, output); + +{ two-phase simplex algorithm: version Feb. 24, 1988 } + +{ copyright K. Steiglitz } +{ Computer Science Dept. } +{ Princeton University 08544 } +{ ken@princeton.edu } + +const + maxpivots = 1000; { maximum no. of pivots } + large = 1.0e+31; { large number used in search for minimum cost column } + lowlim = -1.0e+31; { large negative number to test for unboundedness } + mmax = 32; { max. no. of rows } + ncolmax = 50; { max. no. of columns allowed in tableau } + eps = 1.0e-8; { for testing for zero } + +var + done, unbounded, optimal: boolean; { flags for simplex } + result: (toomanycols, unbound, infeas, toomanypivots, opt); + m: 1..mmax; { no. of rows - 1, the rows are numbered 0..m } + numpivots: integer; { pivot count } + pivotcol, pivotrow: integer; { pivot column and row } + pivotel: real; { pivot element } + cbar: real; { price when searching for entering column } + carry: array[-1..mmax, -1..mmax] of real; { inverse-basis matrix of the + revised simplex method } + phase: 1..2; { phase } + price: array[0..mmax] of real; { shadow prices = row -1 of carry = + -dual variables } + basis: array[0..mmax] of integer; { basis columns, negative integers + artificial } + ncol: 1..ncolmax; { number of columns } + tab: array[0..mmax, 1..ncolmax] of real; { tableau } + lhs: array[0..mmax] of real; { left-hand-side } + d: array[1..ncolmax] of real; { current cost vector } + c: array[1..ncolmax] of real; { cost vector in original problem } + curcol: array[-1..mmax] of real; { current column } + curcost: real; { current cost } + i, col, row: integer; { miscellaneous variables } + +procedure columnsearch; +{ looks for favorable column to enter basis. + returns lowest cost and its column number, or turns on the flag optimal } + +var + i , col : integer; + tempcost: real; { minimum cost, temporary cost of column } + + begin { columnsearch } + for i:= 0 to m do price[i]:= -carry[-1, i]; { set up price vector } + optimal:= false; + cbar:= large; + pivotcol:= 0; + for col:= 1 to ncol do + begin + tempcost:= d[col]; + for i:= 0 to m do tempcost:= tempcost - price[i]*tab[i, col]; + if( cbar > tempcost ) then + begin + cbar:= tempcost; + pivotcol:= col + end + end; { for col } + if ( cbar > -eps ) then optimal:= true + end; { columnsearch } + + +procedure rowsearch; +{ looks for pivot row. returns pivot row number, + or turns on the flag unbounded } + +var + i, j: integer; + ratio, minratio: real; { ratio and minimum ratio for ratio test } + + begin { rowsearch } + for i:= 0 to m do { generate column } + begin + curcol[i]:= 0.0; { current column = B inverse * original col. } + for j:= 0 to m do curcol[i]:= + curcol[i] + carry[i, j]*tab[j, pivotcol] + end; + curcol[-1]:= cbar; { first element in current column } + pivotrow:= -1; + minratio:= large; + for i:= 0 to m do { ratio test } + begin + if( curcol[i] > eps ) then + begin + ratio:= carry[i, -1]/curcol[i]; + if( minratio > ratio ) then { favorable row } + begin + minratio:= ratio; + pivotrow:= i; + pivotel:= curcol[i] + end + else { break tie with max pivot } + if ( (minratio = ratio) and (pivotel < curcol[i]) ) then + begin + pivotrow:= i; + pivotel:= curcol[i] + end + end { curcol > eps } + end; { for i } + if ( pivotrow = -1 ) then unbounded:= true { nothing found } + else unbounded:= false + end; { rowsearch } + + +procedure pivot; +{ pivots } + + var + i, j: integer; + + begin { pivot } + basis[pivotrow]:= pivotcol; + for j:= -1 to m do carry[pivotrow, j]:= carry[pivotrow, j]/pivotel; + for i:= -1 to m do + if( i<> pivotrow ) then + for j:= -1 to m do + carry[i, j]:= carry[i, j] - carry[pivotrow, j]*curcol[i]; + curcost:= -carry[-1, -1] + end; { pivot } + + +procedure changephase; +{ changes phase from 1 to 2, by switching to original cost vector } + + var + i, j, b: integer; + + begin { changephase } + phase:= 2; + for i:= 0 to m do if( basis[i] <= 0 ) then + writeln( '...artificial basis element ', basis[i]:5, + ' remains in basis after phase 1'); + for j:= 1 to ncol do d[j]:= c[j]; { switch to original cost vector } + for j:= -1 to m do + begin + carry[-1, j]:= 0.0; + for i:= 0 to m do + begin + b:= basis[i]; { ignore artificial basis elements that are } + if( b >= 1 ) then { still in basis } + carry[-1, j]:= carry[-1, j] - c[b]*carry[i,j] + end { for i } + end; { for j } + curcost:= -carry[-1, -1] + end; { changephase } + +procedure setup; +{ sets up test problem, lhs = tab*x, x >= 0, min c*x } +{ nrow = number of rows; ncol = number of cols } +{ tab = tableau; lhs = constants } + +var + i, j, nrow: integer; + +begin { setup } + readln(nrow); { read number of rows } + readln(ncol); { read number of columns } + m:= nrow - 1; { rows are numbered 0..m } + for j:= 1 to ncol do + read(c[j]); { cost vector } + for i:= 0 to m do + begin + read(lhs[i]); { left-hand-side } + for j:= 1 to ncol do + read(tab[i, j]) { tableau } + end; + + done:= false; { initialize carry matrix, etc. } + phase:= 1; + for i:= -1 to m do for j:= -1 to mmax do carry[i, j]:= 0.0; + for i:= 0 to m do carry[i, i]:= 1.0; { artificial basis } + for i:= 0 to m do + begin + carry[i, -1]:= lhs[i]; { -1 col of carry = left-hand-side } + carry[-1, -1]:= carry[-1, -1] - lhs[i] { - initial cost } + end; + curcost:= -carry[-1, -1]; + for i:= 0 to m do basis[i]:= -i; { initial, artificial basis } + if( ncol <= ncolmax ) then { check number of columns } + for col:= 1 to ncol do { initialize cost vector for phase 1 } + begin + d[col]:= 0.0; + for row:= 0 to m do d[col]:= d[col] - tab[row, col] + end + else + begin + writeln('...termination: too many columns for storage'); + done:= true; + result:= toomanycols + end; + numpivots:= 0; +end; { setup } + + +begin { simplex } + setup; + while( (numpivots < maxpivots) and (not done) and + ( (curcost > lowlim) or (phase = 1) ) ) do + begin + columnsearch; + if( not optimal ) then + begin { not optimal } + rowsearch; + if( unbounded ) then + begin + done:= true; + result:= unbound; + writeln('problem is unbounded') + end + else + begin + pivot; + numpivots:= numpivots + 1; + if ( (numpivots = 1 ) or ( numpivots mod 10 = 0 ) ) then + writeln('pivot ', numpivots:4, ' cost= ', curcost:12) + end + end { not optimal } + else { optimal } + if( phase = 1 ) then + begin + if( curcost > eps ) then + begin + done:= true; + result:= infeas; + writeln('problem is infeasible') + end + else + begin + if ( (numpivots <> 1 ) and ( numpivots mod 10 <> 0 ) ) then + writeln('pivot ', numpivots:4, ' cost= ', curcost:12); + writeln('phase 1 successfully completed'); + changephase + end + end { if phase = 1 } + else + begin + if ( (numpivots <> 1 ) and ( numpivots mod 10 <> 0 ) ) then + writeln('pivot ', numpivots:4, ' cost= ', curcost:12); + writeln('phase 2 successfully completed'); + done:= true; + result:= opt + end + end; { while } + if( (curcost <= lowlim) and (phase = 2) ) then + begin + if ( (numpivots <> 1 ) and ( numpivots mod 10 <> 0 ) ) then + writeln('pivot ', numpivots:4, ' cost= ', curcost:12); + result:= unbound; + writeln('problem is unbounded') + end; + if ( numpivots >= maxpivots ) then + begin + writeln('...termination: maximum number of pivots exceeded'); + result:= toomanypivots + end; + + if result = opt then + begin + writeln('optimal solution reached'); + writeln('cost =', -carry[-1,-1]:10:6); + for i:= 0 to m do + writeln('x(', basis[i]:4, ')= ', carry[i,-1]:10:6) + end + +end. + diff --git a/ml-yacc/examples/pascal/test/c2.p b/ml-yacc/examples/pascal/test/c2.p new file mode 100644 index 0000000..56f9242 --- /dev/null +++ b/ml-yacc/examples/pascal/test/c2.p @@ -0,0 +1,4 @@ +program p(input,output); +begin + if x=0 then x := 1 +end. diff --git a/ml-yacc/examples/pascal/test/t1.p b/ml-yacc/examples/pascal/test/t1.p new file mode 100644 index 0000000..29d7cd7 --- /dev/null +++ b/ml-yacc/examples/pascal/test/t1.p @@ -0,0 +1,271 @@ +junk simplex(input, output); + +{ two-phase simplex algorithm: version Feb. 24, 1988 } + +{ copyright K. Steiglitz } +{ Computer Science Dept. } +{ Princeton University 08544 } +{ ken@princeton.edu } + +var + maxpivots = 1000; { maximum no. of pivots } + large = 1.0e+31; { large number used in search for minimum cost column } + lowlim = -1.0e+31; { large negative number to test for unboundedness } + mmax = 32; { max. no. of rows } + ncolmax = 50; { max. no. of columns allowed in tableau } + eps = 1.0e-8; { for testing for zero } + +const + done, unbounded, optimal: boolean; { flags for simplex } + result: (toomanycols, unbound, infeas, toomanypivots, opt); + m: 1..mmax; { no. of rows - 1, the rows are numbered 0..m } + numpivots: integer; { pivot count } + pivotcol, pivotrow: integer; { pivot column and row } + pivotel: real; { pivot element } + cbar: real; { price when searching for entering column } + carry: array[-1..mmax, -1..mmax] of real; { inverse-basis matrix of the + revised simplex method } + phase: 1..2; { phase } + price: array[0..mmax] of real; { shadow prices = row -1 of carry = + -dual variables } + basis: array[0..mmax] of integer; { basis columns, negative integers + artificial } + ncol: 1..ncolmax; { number of columns } + tab: array[0..mmax, 1..ncolmax] of real; { tableau } + lhs: array[0..mmax] of real; { left-hand-side } + d: array[1..ncolmax] of real; { current cost vector } + c: array[1..ncolmax] of real; { cost vector in original problem } + curcol: array[-1..mmax] of real; { current column } + curcost: real; { current cost } + i, col, row: integer; { miscellaneous variables } + +procedure columnsearch; +{ looks for favorable column to enter basis. + returns lowest cost and its column number, or turns on the flag optimal } + +var + i , col : integer; + tempcost: real; { minimum cost, temporary cost of column } + + begin { columnsearch } + for i:= 0 to m do price[i]:= -carry[-1, i]; { set up price vector } + optimal:= false; + cbar:= large; + pivotcol:= 0; + for col:= 1 to ncol do + begin + tempcost:= d[col]; + for i:= 0 to m do tempcost:= tempcost - price[i]*tab[i, col]; + if( cbar > tempcost ) then + begin + cbar:= tempcost; + pivotcol:= col + end + end; { for col } + if ( cbar > -eps ) then optimal:= true + end; { columnsearch } + + +procedure rowsearch; +{ looks for pivot row. returns pivot row number, + or turns on the flag unbounded } + +var + i, j: integer; + ratio, minratio: real; { ratio and minimum ratio for ratio test } + + begin { rowsearch } + for i:= 0 to m do { generate column } + begin + curcol[i]:= 0.0; { current column = B inverse * original col. } + for j:= 0 to m do curcol[i]:= + curcol[i] + carry[i, j]*tab[j, pivotcol] + end; + curcol[-1]:= cbar; { first element in current column } + pivotrow:= -1; + minratio:= large; + for i:= 0 to m do { ratio test } + begin + if( curcol[i] > eps ) then + begin + ratio:= carry[i, -1]/curcol[i]; + if( minratio > ratio ) then { favorable row } + begin + minratio:= ratio; + pivotrow:= i; + pivotel:= curcol[i] + end + else { break tie with max pivot } + if ( (minratio = ratio) and (pivotel < curcol[i]) ) then + begin + pivotrow:= i; + pivotel:= curcol[i] + end + end { curcol > eps } + end; { for i } + if ( pivotrow = -1 ) then unbounded:= true { nothing found } + else unbounded:= false + end; { rowsearch } + + +procedure pivot; +{ pivots } + + var + i, j: integer; + + begin { pivot } + basis[pivotrow]:= pivotcol; + for j:= -1 to m do carry[pivotrow, j]:= carry[pivotrow, j]/pivotel; + for i:= -1 to m do + if( i<> pivotrow ) then + for j:= -1 to m do + carry[i, j]:= carry[i, j] - carry[pivotrow, j]*curcol[i]; + curcost:= -carry[-1, -1] + end; { pivot } + + +procedure changephase; +{ changes phase from 1 to 2, by switching to original cost vector } + + var + i, j, b: integer; + + begin { changephase } + phase:= 2; + for i:= 0 to m do if( basis[i] <= 0 ) then + writeln( '...artificial basis element ', basis[i]:5, + ' remains in basis after phase 1'); + for j:= 1 to ncol do d[j]:= c[j]; { switch to original cost vector } + for j:= -1 to m do + begin + carry[-1, j]:= 0.0; + for i:= 0 to m do + begin + b:= basis[i]; { ignore artificial basis elements that are } + if( b >= 1 ) then { still in basis } + carry[-1, j]:= carry[-1, j] - c[b]*carry[i,j]; + end { for i } + end; { for j } + curcost:= -carry[-1, -1] + end; { changephase } + +procedure setup; +{ sets up test problem, lhs = tab*x, x >= 0, min c*x } +{ nrow = number of rows; ncol = number of cols } +{ tab = tableau; lhs = constants } + +var + i, j, nrow: integer; + +begin { setup } + readln(nrow); { read number of rows } + readln(ncol); { read number of columns } + m:= nrow - 1; { rows are numbered 0..m } + for j:= 1 to ncol do + read(c[j]); { cost vector } + for i:= 0 to m do + begin + read(lhs[i]); { left-hand-side } + for j:= 1 to ncol do + read(tab[i, j]); { tableau } + end; + + done:= false; { initialize carry matrix, etc. } + phase:= 1; + for i:= -1 to m do for j:= -1 to mmax do carry[i, j]:= 0.0; + for i:= 0 to m do carry[i, i]:= 1.0; { artificial basis } + for i:= 0 to m do + begin + carry[i, -1]:= lhs[i]; { -1 col of carry = left-hand-side } + carry[-1, -1]:= carry[-1, -1] - lhs[i] { - initial cost } + end; + curcost:= -carry[-1, -1]; + for i:= 0 to m do basis[i]:= -i; { initial, artificial basis } + if( ncol <= ncolmax ) then { check number of columns } + for col:= 1 to ncol do { initialize cost vector for phase 1 } + begin + d[col]:= 0.0; + for row:= 0 to m do d[col]:= d[col] - tab[row, col] + end + else + begin + writeln('...termination: too many columns for storage'); + done:= true; + result:= toomanycols + end; + numpivots:= 0; +end; { setup } + + +begin { simplex } + setup; + while( (numpivots < maxpivots) and (not done) and + ( (curcost > lowlim) or (phase = 1) ) ) do + begin + columnsearch; + if( not optimal ) then + begin { not optimal } + rowsearch; + if( unbounded ) then + begin + done:= true; + result:= unbound; + writeln('problem is unbounded') + end + else + begin + pivot; + numpivots:= numpivots + 1; + if ( (numpivots = 1 ) or ( numpivots mod 10 = 0 ) ) then + writeln('pivot ', numpivots:4, ' cost= ', curcost:12) + end + end { not optimal } + else { optimal } + if( phase = 1 ) then + begin + if( curcost > eps ) then + begin + done:= true; + result:= infeas; + writeln('problem is infeasible') + end + else + begin + if ( (numpivots <> 1 ) and ( numpivots mod 10 <> 0 ) ) then + writeln('pivot ', numpivots:4, ' cost= ', curcost:12); + writeln('phase 1 successfully completed'); + changephase + end + end { if phase = 1 } + else + begin + if ( (numpivots <> 1 ) and ( numpivots mod 10 <> 0 ) ) then + writeln('pivot ', numpivots:4, ' cost= ', curcost:12); + writeln('phase 2 successfully completed'); + done:= true; + result:= opt + end + end; { while } + if(((curcost <= lowlim) and (phase = 2) ) then + begin + if ( (numpivots <> 1 ) and ( numpivots mod 10 <> 0 ) ) then + writeln('pivot ', numpivots:4, ' cost= ', curcost:12); + result:= unbound; + writeln('problem is unbounded') + end; + if ( numpivots >= maxpivots ) then + begin + writeln('...termination: maximum number of pivots exceeded'); + result:= toomanypivots, + end; + + if result = opt then + begin + writeln('optimal solution reached'); + writeln('cost =', -carry[-1,-1]:10:6); + for i:= 0 to m do + writeln('x(', basis[i]:4, ')= ', carry[i,-1]:10:6) + +end. + diff --git a/ml-yacc/examples/pascal/test/t2.p b/ml-yacc/examples/pascal/test/t2.p new file mode 100644 index 0000000..beb3363 --- /dev/null +++ b/ml-yacc/examples/pascal/test/t2.p @@ -0,0 +1,4 @@ +program p(input,output); +begin + if x := 0 then x := 1 +end. diff --git a/ml-yacc/examples/pascal/test/t3.p b/ml-yacc/examples/pascal/test/t3.p new file mode 100644 index 0000000..b6bdd7b --- /dev/null +++ b/ml-yacc/examples/pascal/test/t3.p @@ -0,0 +1,6 @@ +program p(input,output); + function topsort(var x: order, var y : sorted, x : integer); + begin end; +begin + x:= 1 +end. diff --git a/ml-yacc/examples/pascal/test/t4.p b/ml-yacc/examples/pascal/test/t4.p new file mode 100644 index 0000000..6f746eb --- /dev/null +++ b/ml-yacc/examples/pascal/test/t4.p @@ -0,0 +1,6 @@ +program p(input,output); + var l,n: real; + var x, nonprime,prime: ; + begin + var + end. diff --git a/ml-yacc/examples/pascal/test/t5.p b/ml-yacc/examples/pascal/test/t5.p new file mode 100644 index 0000000..68349ad --- /dev/null +++ b/ml-yacc/examples/pascal/test/t5.p @@ -0,0 +1,4 @@ +program p(input,output) + begin + writeln(' '; 9, 'x'; 10, 'm'; 9, '[x]'; 9,'approx x]'; 19, +end. diff --git a/ml-yacc/examples/pascal/test/t6.p b/ml-yacc/examples/pascal/test/t6.p new file mode 100644 index 0000000..324ccba --- /dev/null +++ b/ml-yacc/examples/pascal/test/t6.p @@ -0,0 +1,11 @@ +program this (output) + procedure addcor; + var bins,start,i,last : integer; level : real; + begin bins := trunc((r1+r2)*maxcor); + if bins < 1 then bins := 1; + start := round(d*maxcor) - bins div 2; + level := mm/bins; + last := start+bins; if last>maxcor then last := maxcor; + corfarray[start] := corfarray[start]-level; + corfarray[last] := corfarray[last]+level; + end; diff --git a/ml-yacc/examples/pascal/test/t7.p b/ml-yacc/examples/pascal/test/t7.p new file mode 100644 index 0000000..bd41e7a --- /dev/null +++ b/ml-yacc/examples/pascal/test/t7.p @@ -0,0 +1,5 @@ +program p(input,otput); +begin + for i := 1 to maxelements do + y[i] := 0; +end. diff --git a/ml-yacc/lib/base.sig b/ml-yacc/lib/base.sig new file mode 100644 index 0000000..5528865 --- /dev/null +++ b/ml-yacc/lib/base.sig @@ -0,0 +1,299 @@ +(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *) + +(* base.sig: Base signature file for SML-Yacc. This file contains signatures + that must be loaded before any of the files produced by ML-Yacc are loaded +*) + +(* STREAM: signature for a lazy stream.*) + +signature STREAM = + sig type 'xa stream + val streamify : (unit -> '_a) -> '_a stream + val cons : '_a * '_a stream -> '_a stream + val get : '_a stream -> '_a * '_a stream + end + +(* LR_TABLE: signature for an LR Table. + + The list of actions and gotos passed to mkLrTable must be ordered by state + number. The values for state 0 are the first in the list, the values for + state 1 are next, etc. +*) + +signature LR_TABLE = + sig + datatype ('a,'b) pairlist = EMPTY | PAIR of 'a * 'b * ('a,'b) pairlist + datatype state = STATE of int + datatype term = T of int + datatype nonterm = NT of int + datatype action = SHIFT of state + | REDUCE of int + | ACCEPT + | ERROR + type table + + val numStates : table -> int + val numRules : table -> int + val describeActions : table -> state -> + (term,action) pairlist * action + val describeGoto : table -> state -> (nonterm,state) pairlist + val action : table -> state * term -> action + val goto : table -> state * nonterm -> state + val initialState : table -> state + exception Goto of state * nonterm + + val mkLrTable : {actions : ((term,action) pairlist * action) array, + gotos : (nonterm,state) pairlist array, + numStates : int, numRules : int, + initialState : state} -> table + end + +(* TOKEN: signature revealing the internal structure of a token. This signature + TOKEN distinct from the signature {parser name}_TOKENS produced by ML-Yacc. + The {parser name}_TOKENS structures contain some types and functions to + construct tokens from values and positions. + + The representation of token was very carefully chosen here to allow the + polymorphic parser to work without knowing the types of semantic values + or line numbers. + + This has had an impact on the TOKENS structure produced by SML-Yacc, which + is a structure parameter to lexer functors. We would like to have some + type 'a token which functions to construct tokens would create. A + constructor function for a integer token might be + + INT: int * 'a * 'a -> 'a token. + + This is not possible because we need to have tokens with the representation + given below for the polymorphic parser. + + Thus our constructur functions for tokens have the form: + + INT: int * 'a * 'a -> (svalue,'a) token + + This in turn has had an impact on the signature that lexers for SML-Yacc + must match and the types that a user must declare in the user declarations + section of lexers. +*) + +signature TOKEN = + sig + structure LrTable : LR_TABLE + datatype ('a,'b) token = TOKEN of LrTable.term * ('a * 'b * 'b) + val sameToken : ('a,'b) token * ('a,'b) token -> bool + end + +(* LR_PARSER: signature for a polymorphic LR parser *) + +signature LR_PARSER = + sig + structure Stream: STREAM + structure LrTable : LR_TABLE + structure Token : TOKEN + + sharing LrTable = Token.LrTable + + exception ParseError + + val parse : {table : LrTable.table, + lexer : ('_b,'_c) Token.token Stream.stream, + arg: 'arg, + saction : int * + '_c * + (LrTable.state * ('_b * '_c * '_c)) list * + 'arg -> + LrTable.nonterm * + ('_b * '_c * '_c) * + ((LrTable.state *('_b * '_c * '_c)) list), + void : '_b, + ec : { is_keyword : LrTable.term -> bool, + noShift : LrTable.term -> bool, + preferred_change : (LrTable.term list * LrTable.term list) list, + errtermvalue : LrTable.term -> '_b, + showTerminal : LrTable.term -> string, + terms: LrTable.term list, + error : string * '_c * '_c -> unit + }, + lookahead : int (* max amount of lookahead used in *) + (* error correction *) + } -> '_b * + (('_b,'_c) Token.token Stream.stream) + end + +(* LEXER: a signature that most lexers produced for use with SML-Yacc's + output will match. The user is responsible for declaring type token, + type pos, and type svalue in the UserDeclarations section of a lexer. + + Note that type token is abstract in the lexer. This allows SML-Yacc to + create a TOKENS signature for use with lexers produced by ML-Lex that + treats the type token abstractly. Lexers that are functors parametrized by + a Tokens structure matching a TOKENS signature cannot examine the structure + of tokens. +*) + +signature LEXER = + sig + structure UserDeclarations : + sig + type ('a,'b) token + type pos + type svalue + end + val makeLexer : (int -> string) -> unit -> + (UserDeclarations.svalue,UserDeclarations.pos) UserDeclarations.token + end + +(* ARG_LEXER: the %arg option of ML-Lex allows users to produce lexers which + also take an argument before yielding a function from unit to a token +*) + +signature ARG_LEXER = + sig + structure UserDeclarations : + sig + type ('a,'b) token + type pos + type svalue + type arg + end + val makeLexer : (int -> string) -> UserDeclarations.arg -> unit -> + (UserDeclarations.svalue,UserDeclarations.pos) UserDeclarations.token + end + +(* PARSER_DATA: the signature of ParserData structures in {parser name}LrValsFun + produced by SML-Yacc. All such structures match this signature. + + The {parser name}LrValsFun produces a structure which contains all the values + except for the lexer needed to call the polymorphic parser mentioned + before. + +*) + +signature PARSER_DATA = + sig + (* the type of line numbers *) + + type pos + + (* the type of semantic values *) + + type svalue + + (* the type of the user-supplied argument to the parser *) + type arg + + (* the intended type of the result of the parser. This value is + produced by applying extract from the structure Actions to the + final semantic value resultiing from a parse. + *) + + type result + + structure LrTable : LR_TABLE + structure Token : TOKEN + sharing Token.LrTable = LrTable + + (* structure Actions contains the functions which mantain the + semantic values stack in the parser. Void is used to provide + a default value for the semantic stack. + *) + + structure Actions : + sig + val actions : int * pos * + (LrTable.state * (svalue * pos * pos)) list * arg-> + LrTable.nonterm * (svalue * pos * pos) * + ((LrTable.state *(svalue * pos * pos)) list) + val void : svalue + val extract : svalue -> result + end + + (* structure EC contains information used to improve error + recovery in an error-correcting parser *) + + structure EC : + sig + val is_keyword : LrTable.term -> bool + val noShift : LrTable.term -> bool + val preferred_change : (LrTable.term list * LrTable.term list) list + val errtermvalue : LrTable.term -> svalue + val showTerminal : LrTable.term -> string + val terms: LrTable.term list + end + + (* table is the LR table for the parser *) + + val table : LrTable.table + end + +(* signature PARSER is the signature that most user parsers created by + SML-Yacc will match. +*) + +signature PARSER = + sig + structure Token : TOKEN + structure Stream : STREAM + exception ParseError + + (* type pos is the type of line numbers *) + + type pos + + (* type result is the type of the result from the parser *) + + type result + + (* the type of the user-supplied argument to the parser *) + type arg + + (* type svalue is the type of semantic values for the semantic value + stack + *) + + type svalue + + (* val makeLexer is used to create a stream of tokens for the parser *) + + val makeLexer : (int -> string) -> + (svalue,pos) Token.token Stream.stream + + (* val parse takes a stream of tokens and a function to print + errors and returns a value of type result and a stream containing + the unused tokens + *) + + val parse : int * ((svalue,pos) Token.token Stream.stream) * + (string * pos * pos -> unit) * arg -> + result * (svalue,pos) Token.token Stream.stream + + val sameToken : (svalue,pos) Token.token * (svalue,pos) Token.token -> + bool + end + +(* signature ARG_PARSER is the signature that will be matched by parsers whose + lexer takes an additional argument. +*) + +signature ARG_PARSER = + sig + structure Token : TOKEN + structure Stream : STREAM + exception ParseError + + type arg + type lexarg + type pos + type result + type svalue + + val makeLexer : (int -> string) -> lexarg -> + (svalue,pos) Token.token Stream.stream + val parse : int * ((svalue,pos) Token.token Stream.stream) * + (string * pos * pos -> unit) * arg -> + result * (svalue,pos) Token.token Stream.stream + + val sameToken : (svalue,pos) Token.token * (svalue,pos) Token.token -> + bool + end + diff --git a/ml-yacc/lib/join.sml b/ml-yacc/lib/join.sml new file mode 100644 index 0000000..5cf9df1 --- /dev/null +++ b/ml-yacc/lib/join.sml @@ -0,0 +1,94 @@ +(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *) + +(* functor Join creates a user parser by putting together a Lexer structure, + an LrValues structure, and a polymorphic parser structure. Note that + the Lexer and LrValues structure must share the type pos (i.e. the type + of line numbers), the type svalues for semantic values, and the type + of tokens. +*) + +functor Join(structure Lex : LEXER + structure ParserData: PARSER_DATA + structure LrParser : LR_PARSER + sharing ParserData.LrTable = LrParser.LrTable + sharing ParserData.Token = LrParser.Token + sharing type Lex.UserDeclarations.svalue = ParserData.svalue + sharing type Lex.UserDeclarations.pos = ParserData.pos + sharing type Lex.UserDeclarations.token = ParserData.Token.token) + : PARSER = +struct + structure Token = ParserData.Token + structure Stream = LrParser.Stream + + exception ParseError = LrParser.ParseError + + type arg = ParserData.arg + type pos = ParserData.pos + type result = ParserData.result + type svalue = ParserData.svalue + val makeLexer = LrParser.Stream.streamify o Lex.makeLexer + val parse = fn (lookahead,lexer,error,arg) => + (fn (a,b) => (ParserData.Actions.extract a,b)) + (LrParser.parse {table = ParserData.table, + lexer=lexer, + lookahead=lookahead, + saction = ParserData.Actions.actions, + arg=arg, + void= ParserData.Actions.void, + ec = {is_keyword = ParserData.EC.is_keyword, + noShift = ParserData.EC.noShift, + preferred_change = ParserData.EC.preferred_change, + errtermvalue = ParserData.EC.errtermvalue, + error=error, + showTerminal = ParserData.EC.showTerminal, + terms = ParserData.EC.terms}} + ) + val sameToken = Token.sameToken +end + +(* functor JoinWithArg creates a variant of the parser structure produced + above. In this case, the makeLexer take an additional argument before + yielding a value of type unit -> (svalue,pos) token + *) + +functor JoinWithArg(structure Lex : ARG_LEXER + structure ParserData: PARSER_DATA + structure LrParser : LR_PARSER + sharing ParserData.LrTable = LrParser.LrTable + sharing ParserData.Token = LrParser.Token + sharing type Lex.UserDeclarations.svalue = ParserData.svalue + sharing type Lex.UserDeclarations.pos = ParserData.pos + sharing type Lex.UserDeclarations.token = ParserData.Token.token) + : ARG_PARSER = +struct + structure Token = ParserData.Token + structure Stream = LrParser.Stream + + exception ParseError = LrParser.ParseError + + type arg = ParserData.arg + type lexarg = Lex.UserDeclarations.arg + type pos = ParserData.pos + type result = ParserData.result + type svalue = ParserData.svalue + + val makeLexer = fn s => fn arg => + LrParser.Stream.streamify (Lex.makeLexer s arg) + val parse = fn (lookahead,lexer,error,arg) => + (fn (a,b) => (ParserData.Actions.extract a,b)) + (LrParser.parse {table = ParserData.table, + lexer=lexer, + lookahead=lookahead, + saction = ParserData.Actions.actions, + arg=arg, + void= ParserData.Actions.void, + ec = {is_keyword = ParserData.EC.is_keyword, + noShift = ParserData.EC.noShift, + preferred_change = ParserData.EC.preferred_change, + errtermvalue = ParserData.EC.errtermvalue, + error=error, + showTerminal = ParserData.EC.showTerminal, + terms = ParserData.EC.terms}} + ) + val sameToken = Token.sameToken +end; diff --git a/ml-yacc/lib/lrtable.sml b/ml-yacc/lib/lrtable.sml new file mode 100644 index 0000000..2b03b51 --- /dev/null +++ b/ml-yacc/lib/lrtable.sml @@ -0,0 +1,59 @@ +(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *) +structure LrTable : LR_TABLE = + struct + val sub = Array.sub + infix 9 sub + datatype ('a,'b) pairlist = EMPTY + | PAIR of 'a * 'b * ('a,'b) pairlist + datatype term = T of int + datatype nonterm = NT of int + datatype state = STATE of int + datatype action = SHIFT of state + | REDUCE of int (* rulenum from grammar *) + | ACCEPT + | ERROR + exception Goto of state * nonterm + type table = {states: int, rules : int,initialState: state, + action: ((term,action) pairlist * action) array, + goto : (nonterm,state) pairlist array} + val numStates = fn ({states,...} : table) => states + val numRules = fn ({rules,...} : table) => rules + val describeActions = + fn ({action,...} : table) => + fn (STATE s) => action sub s + val describeGoto = + fn ({goto,...} : table) => + fn (STATE s) => goto sub s + fun findTerm (T term,row,default) = + let fun find (PAIR (T key,data,r)) = + if key < term then find r + else if key=term then data + else default + | find EMPTY = default + in find row + end + fun findNonterm (NT nt,row) = + let fun find (PAIR (NT key,data,r)) = + if key < nt then find r + else if key=nt then SOME data + else NONE + | find EMPTY = NONE + in find row + end + val action = fn ({action,...} : table) => + fn (STATE state,term) => + let val (row,default) = action sub state + in findTerm(term,row,default) + end + val goto = fn ({goto,...} : table) => + fn (a as (STATE state,nonterm)) => + case findNonterm(nonterm,goto sub state) + of SOME state => state + | NONE => raise (Goto a) + val initialState = fn ({initialState,...} : table) => initialState + val mkLrTable = fn {actions,gotos,initialState,numStates,numRules} => + ({action=actions,goto=gotos, + states=numStates, + rules=numRules, + initialState=initialState} : table) +end; diff --git a/ml-yacc/lib/ml-yacc-lib.cm b/ml-yacc/lib/ml-yacc-lib.cm new file mode 100644 index 0000000..8a800bf --- /dev/null +++ b/ml-yacc/lib/ml-yacc-lib.cm @@ -0,0 +1,30 @@ +(* sources file for ML-Yacc library *) + +Library + +signature STREAM +signature LR_TABLE +signature TOKEN +signature LR_PARSER +signature LEXER +signature ARG_LEXER +signature PARSER_DATA +signature PARSER +signature ARG_PARSER +functor Join +functor JoinWithArg +structure LrTable +structure Stream +structure LrParser + +is + +#if defined(NEW_CM) + $/basis.cm +#endif + +base.sig +join.sml +lrtable.sml +stream.sml +parser2.sml (* error correcting version *) diff --git a/ml-yacc/lib/parser1.sml b/ml-yacc/lib/parser1.sml new file mode 100644 index 0000000..537ac95 --- /dev/null +++ b/ml-yacc/lib/parser1.sml @@ -0,0 +1,98 @@ +(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *) + +(* drt (12/15/89) -- the functor should be used during development work, + but it is wastes space in the release version. + +functor ParserGen(structure LrTable : LR_TABLE + structure Stream : STREAM) : LR_PARSER = +*) + +structure LrParser :> LR_PARSER = + struct + val print = fn s => output(std_out,s) + val println = fn s => (print s; print "\n") + structure LrTable = LrTable + structure Stream = Stream + structure Token : TOKEN = + struct + structure LrTable = LrTable + datatype ('a,'b) token = TOKEN of LrTable.term * ('a * 'b * 'b) + val sameToken = fn (TOKEN (t,_),TOKEN(t',_)) => t=t' + end + + + open LrTable + open Token + + val DEBUG = false + exception ParseError + + type ('a,'b) elem = (state * ('a * 'b * 'b)) + type ('a,'b) stack = ('a,'b) elem list + + val showState = fn (STATE s) => ("STATE " ^ (makestring s)) + + fun printStack(stack: ('a,'b) elem list, n: int) = + case stack + of (state, _) :: rest => + (print(" " ^ makestring n ^ ": "); + println(showState state); + printStack(rest, n+1) + ) + | nil => () + + val parse = fn {arg : 'a, + table : LrTable.table, + lexer : ('_b,'_c) token Stream.stream, + saction : int * '_c * ('_b,'_c) stack * 'a -> + nonterm * ('_b * '_c * '_c) * ('_b,'_c) stack, + void : '_b, + ec = {is_keyword,preferred_change, + errtermvalue,showTerminal, + error,terms,noShift}, + lookahead} => + let fun prAction(stack as (state, _) :: _, + next as (TOKEN (term,_),_), action) = + (println "Parse: state stack:"; + printStack(stack, 0); + print(" state=" + ^ showState state + ^ " next=" + ^ showTerminal term + ^ " action=" + ); + case action + of SHIFT s => println ("SHIFT " ^ showState s) + | REDUCE i => println ("REDUCE " ^ (makestring i)) + | ERROR => println "ERROR" + | ACCEPT => println "ACCEPT"; + action) + | prAction (_,_,action) = action + + val action = LrTable.action table + val goto = LrTable.goto table + + fun parseStep(next as (TOKEN (terminal, value as (_,leftPos,_)),lexer) : + ('_b,'_c) token * ('_b,'_c) token Stream.stream, + stack as (state,_) :: _ : ('_b ,'_c) stack) = + case (if DEBUG then prAction(stack, next,action(state, terminal)) + else action(state, terminal)) + of SHIFT s => parseStep(Stream.get lexer, (s,value) :: stack) + | REDUCE i => + let val (nonterm,value,stack as (state,_) :: _ ) = + saction(i,leftPos,stack,arg) + in parseStep(next,(goto(state,nonterm),value)::stack) + end + | ERROR => let val (_,leftPos,rightPos) = value + in error("syntax error\n",leftPos,rightPos); + raise ParseError + end + | ACCEPT => let val (_,(topvalue,_,_)) :: _ = stack + val (token,restLexer) = next + in (topvalue,Stream.cons(token,lexer)) + end + val next as (TOKEN (terminal,(_,leftPos,_)),_) = Stream.get lexer + in parseStep(next,[(initialState table,(void,leftPos,leftPos))]) + end +end; + diff --git a/ml-yacc/lib/parser2.sml b/ml-yacc/lib/parser2.sml new file mode 100644 index 0000000..618340e --- /dev/null +++ b/ml-yacc/lib/parser2.sml @@ -0,0 +1,542 @@ +(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *) + +(* parser.sml: This is a parser driver for LR tables with an error-recovery + routine added to it. The routine used is described in detail in this + article: + + 'A Practical Method for LR and LL Syntactic Error Diagnosis and + Recovery', by M. Burke and G. Fisher, ACM Transactions on + Programming Langauges and Systems, Vol. 9, No. 2, April 1987, + pp. 164-197. + + This program is an implementation is the partial, deferred method discussed + in the article. The algorithm and data structures used in the program + are described below. + + This program assumes that all semantic actions are delayed. A semantic + action should produce a function from unit -> value instead of producing the + normal value. The parser returns the semantic value on the top of the + stack when accept is encountered. The user can deconstruct this value + and apply the unit -> value function in it to get the answer. + + It also assumes that the lexer is a lazy stream. + + Data Structures: + ---------------- + + * The parser: + + The state stack has the type + + (state * (semantic value * line # * line #)) list + + The parser keeps a queue of (state stack * lexer pair). A lexer pair + consists of a terminal * value pair and a lexer. This allows the + parser to reconstruct the states for terminals to the left of a + syntax error, and attempt to make error corrections there. + + The queue consists of a pair of lists (x,y). New additions to + the queue are cons'ed onto y. The first element of x is the top + of the queue. If x is nil, then y is reversed and used + in place of x. + + Algorithm: + ---------- + + * The steady-state parser: + + This parser keeps the length of the queue of state stacks at + a steady state by always removing an element from the front when + another element is placed on the end. + + It has these arguments: + + stack: current stack + queue: value of the queue + lexPair ((terminal,value),lex stream) + + When SHIFT is encountered, the state to shift to and the value are + are pushed onto the state stack. The state stack and lexPair are + placed on the queue. The front element of the queue is removed. + + When REDUCTION is encountered, the rule is applied to the current + stack to yield a triple (nonterm,value,new stack). A new + stack is formed by adding (goto(top state of stack,nonterm),value) + to the stack. + + When ACCEPT is encountered, the top value from the stack and the + lexer are returned. + + When an ERROR is encountered, fixError is called. FixError + takes the arguments to the parser, fixes the error if possible and + returns a new set of arguments. + + * The distance-parser: + + This parser includes an additional argument distance. It pushes + elements on the queue until it has parsed distance tokens, or an + ACCEPT or ERROR occurs. It returns a stack, lexer, the number of + tokens left unparsed, a queue, and an action option. +*) + +signature FIFO = + sig type 'a queue + val empty : 'a queue + exception Empty + val get : 'a queue -> 'a * 'a queue + val put : 'a * 'a queue -> 'a queue + end + +(* drt (12/15/89) -- the functor should be used in development work, but + it wastes space in the release version. + +functor ParserGen(structure LrTable : LR_TABLE + structure Stream : STREAM) : LR_PARSER = +*) + +structure LrParser :> LR_PARSER = + struct + structure LrTable = LrTable + structure Stream = Stream + + fun eqT (LrTable.T i, LrTable.T i') = i = i' + + structure Token : TOKEN = + struct + structure LrTable = LrTable + datatype ('a,'b) token = TOKEN of LrTable.term * ('a * 'b * 'b) + val sameToken = fn (TOKEN(t,_),TOKEN(t',_)) => eqT (t,t') + end + + open LrTable + open Token + + val DEBUG1 = false + val DEBUG2 = false + exception ParseError + exception ParseImpossible of int + + structure Fifo :> FIFO = + struct + type 'a queue = ('a list * 'a list) + val empty = (nil,nil) + exception Empty + fun get(a::x, y) = (a, (x,y)) + | get(nil, nil) = raise Empty + | get(nil, y) = get(rev y, nil) + fun put(a,(x,y)) = (x,a::y) + end + + type ('a,'b) elem = (state * ('a * 'b * 'b)) + type ('a,'b) stack = ('a,'b) elem list + type ('a,'b) lexv = ('a,'b) token + type ('a,'b) lexpair = ('a,'b) lexv * (('a,'b) lexv Stream.stream) + type ('a,'b) distanceParse = + ('a,'b) lexpair * + ('a,'b) stack * + (('a,'b) stack * ('a,'b) lexpair) Fifo.queue * + int -> + ('a,'b) lexpair * + ('a,'b) stack * + (('a,'b) stack * ('a,'b) lexpair) Fifo.queue * + int * + action option + + type ('a,'b) ecRecord = + {is_keyword : term -> bool, + preferred_change : (term list * term list) list, + error : string * 'b * 'b -> unit, + errtermvalue : term -> 'a, + terms : term list, + showTerminal : term -> string, + noShift : term -> bool} + + local + val print = fn s => TextIO.output(TextIO.stdOut,s) + val println = fn s => (print s; print "\n") + val showState = fn (STATE s) => "STATE " ^ (Int.toString s) + in + fun printStack(stack: ('a,'b) stack, n: int) = + case stack + of (state,_) :: rest => + (print("\t" ^ Int.toString n ^ ": "); + println(showState state); + printStack(rest, n+1)) + | nil => () + + fun prAction showTerminal + (stack as (state,_) :: _, next as (TOKEN (term,_),_), action) = + (println "Parse: state stack:"; + printStack(stack, 0); + print(" state=" + ^ showState state + ^ " next=" + ^ showTerminal term + ^ " action=" + ); + case action + of SHIFT state => println ("SHIFT " ^ (showState state)) + | REDUCE i => println ("REDUCE " ^ (Int.toString i)) + | ERROR => println "ERROR" + | ACCEPT => println "ACCEPT") + | prAction _ (_,_,action) = () + end + + (* ssParse: parser which maintains the queue of (state * lexvalues) in a + steady-state. It takes a table, showTerminal function, saction + function, and fixError function. It parses until an ACCEPT is + encountered, or an exception is raised. When an error is encountered, + fixError is called with the arguments of parseStep (lexv,stack,and + queue). It returns the lexv, and a new stack and queue adjusted so + that the lexv can be parsed *) + + val ssParse = + fn (table,showTerminal,saction,fixError,arg) => + let val prAction = prAction showTerminal + val action = LrTable.action table + val goto = LrTable.goto table + fun parseStep(args as + (lexPair as (TOKEN (terminal, value as (_,leftPos,_)), + lexer + ), + stack as (state,_) :: _, + queue)) = + let val nextAction = action (state,terminal) + val _ = if DEBUG1 then prAction(stack,lexPair,nextAction) + else () + in case nextAction + of SHIFT s => + let val newStack = (s,value) :: stack + val newLexPair = Stream.get lexer + val (_,newQueue) =Fifo.get(Fifo.put((newStack,newLexPair), + queue)) + in parseStep(newLexPair,(s,value)::stack,newQueue) + end + | REDUCE i => + (case saction(i,leftPos,stack,arg) + of (nonterm,value,stack as (state,_) :: _) => + parseStep(lexPair,(goto(state,nonterm),value)::stack, + queue) + | _ => raise (ParseImpossible 197)) + | ERROR => parseStep(fixError args) + | ACCEPT => + (case stack + of (_,(topvalue,_,_)) :: _ => + let val (token,restLexer) = lexPair + in (topvalue,Stream.cons(token,restLexer)) + end + | _ => raise (ParseImpossible 202)) + end + | parseStep _ = raise (ParseImpossible 204) + in parseStep + end + + (* distanceParse: parse until n tokens are shifted, or accept or + error are encountered. Takes a table, showTerminal function, and + semantic action function. Returns a parser which takes a lexPair + (lex result * lexer), a state stack, a queue, and a distance + (must be > 0) to parse. The parser returns a new lex-value, a stack + with the nth token shifted on top, a queue, a distance, and action + option. *) + + val distanceParse = + fn (table,showTerminal,saction,arg) => + let val prAction = prAction showTerminal + val action = LrTable.action table + val goto = LrTable.goto table + fun parseStep(lexPair,stack,queue,0) = (lexPair,stack,queue,0,NONE) + | parseStep(lexPair as (TOKEN (terminal, value as (_,leftPos,_)), + lexer + ), + stack as (state,_) :: _, + queue,distance) = + let val nextAction = action(state,terminal) + val _ = if DEBUG1 then prAction(stack,lexPair,nextAction) + else () + in case nextAction + of SHIFT s => + let val newStack = (s,value) :: stack + val newLexPair = Stream.get lexer + in parseStep(newLexPair,(s,value)::stack, + Fifo.put((newStack,newLexPair),queue),distance-1) + end + | REDUCE i => + (case saction(i,leftPos,stack,arg) + of (nonterm,value,stack as (state,_) :: _) => + parseStep(lexPair,(goto(state,nonterm),value)::stack, + queue,distance) + | _ => raise (ParseImpossible 240)) + | ERROR => (lexPair,stack,queue,distance,SOME nextAction) + | ACCEPT => (lexPair,stack,queue,distance,SOME nextAction) + end + | parseStep _ = raise (ParseImpossible 242) + in parseStep : ('_a,'_b) distanceParse + end + +(* mkFixError: function to create fixError function which adjusts parser state + so that parse may continue in the presence of an error *) + +fun mkFixError({is_keyword,terms,errtermvalue, + preferred_change,noShift, + showTerminal,error,...} : ('_a,'_b) ecRecord, + distanceParse : ('_a,'_b) distanceParse, + minAdvance,maxAdvance) + + (lexv as (TOKEN (term,value as (_,leftPos,_)),_),stack,queue) = + let val _ = if DEBUG2 then + error("syntax error found at " ^ (showTerminal term), + leftPos,leftPos) + else () + + fun tokAt(t,p) = TOKEN(t,(errtermvalue t,p,p)) + + val minDelta = 3 + + (* pull all the state * lexv elements from the queue *) + + val stateList = + let fun f q = let val (elem,newQueue) = Fifo.get q + in elem :: (f newQueue) + end handle Fifo.Empty => nil + in f queue + end + + (* now number elements of stateList, giving distance from + error token *) + + val (_, numStateList) = + List.foldr (fn (a,(num,r)) => (num+1,(a,num)::r)) (0, []) stateList + + (* Represent the set of potential changes as a linked list. + + Values of datatype Change hold information about a potential change. + + oper = oper to be applied + pos = the # of the element in stateList that would be altered. + distance = the number of tokens beyond the error token which the + change allows us to parse. + new = new terminal * value pair at that point + orig = original terminal * value pair at the point being changed. + *) + + datatype ('a,'b) change = CHANGE of + {pos : int, distance : int, leftPos: 'b, rightPos: 'b, + new : ('a,'b) lexv list, orig : ('a,'b) lexv list} + + + val showTerms = concat o map (fn TOKEN(t,_) => " " ^ showTerminal t) + + val printChange = fn c => + let val CHANGE {distance,new,orig,pos,...} = c + in (print ("{distance= " ^ (Int.toString distance)); + print (",orig ="); print(showTerms orig); + print (",new ="); print(showTerms new); + print (",pos= " ^ (Int.toString pos)); + print "}\n") + end + + val printChangeList = app printChange + +(* parse: given a lexPair, a stack, and the distance from the error + token, return the distance past the error token that we are able to parse.*) + + fun parse (lexPair,stack,queuePos : int) = + case distanceParse(lexPair,stack,Fifo.empty,queuePos+maxAdvance+1) + of (_,_,_,distance,SOME ACCEPT) => + if maxAdvance-distance-1 >= 0 + then maxAdvance + else maxAdvance-distance-1 + | (_,_,_,distance,_) => maxAdvance - distance - 1 + +(* catList: concatenate results of scanning list *) + + fun catList l f = List.foldr (fn(a,r)=> f a @ r) [] l + + fun keywordsDelta new = if List.exists (fn(TOKEN(t,_))=>is_keyword t) new + then minDelta else 0 + + fun tryChange{lex,stack,pos,leftPos,rightPos,orig,new} = + let val lex' = List.foldr (fn (t',p)=>(t',Stream.cons p)) lex new + val distance = parse(lex',stack,pos+length new-length orig) + in if distance >= minAdvance + keywordsDelta new + then [CHANGE{pos=pos,leftPos=leftPos,rightPos=rightPos, + distance=distance,orig=orig,new=new}] + else [] + end + + +(* tryDelete: Try to delete n terminals. + Return single-element [success] or nil. + Do not delete unshiftable terminals. *) + + + fun tryDelete n ((stack,lexPair as (TOKEN(term,(_,l,r)),_)),qPos) = + let fun del(0,accum,left,right,lexPair) = + tryChange{lex=lexPair,stack=stack, + pos=qPos,leftPos=left,rightPos=right, + orig=rev accum, new=[]} + | del(n,accum,left,right,(tok as TOKEN(term,(_,_,r)),lexer)) = + if noShift term then [] + else del(n-1,tok::accum,left,r,Stream.get lexer) + in del(n,[],l,r,lexPair) + end + +(* tryInsert: try to insert tokens before the current terminal; + return a list of the successes *) + + fun tryInsert((stack,lexPair as (TOKEN(_,(_,l,_)),_)),queuePos) = + catList terms (fn t => + tryChange{lex=lexPair,stack=stack, + pos=queuePos,orig=[],new=[tokAt(t,l)], + leftPos=l,rightPos=l}) + +(* trySubst: try to substitute tokens for the current terminal; + return a list of the successes *) + + fun trySubst ((stack,lexPair as (orig as TOKEN (term,(_,l,r)),lexer)), + queuePos) = + if noShift term then [] + else + catList terms (fn t => + tryChange{lex=Stream.get lexer,stack=stack, + pos=queuePos, + leftPos=l,rightPos=r,orig=[orig], + new=[tokAt(t,r)]}) + + (* do_delete(toks,lexPair) tries to delete tokens "toks" from "lexPair". + If it succeeds, returns SOME(toks',l,r,lp), where + toks' is the actual tokens (with positions and values) deleted, + (l,r) are the (leftmost,rightmost) position of toks', + lp is what remains of the stream after deletion + *) + fun do_delete(nil,lp as (TOKEN(_,(_,l,_)),_)) = SOME(nil,l,l,lp) + | do_delete([t],(tok as TOKEN(t',(_,l,r)),lp')) = + if eqT (t, t') + then SOME([tok],l,r,Stream.get lp') + else NONE + | do_delete(t::rest,(tok as TOKEN(t',(_,l,r)),lp')) = + if eqT (t,t') + then case do_delete(rest,Stream.get lp') + of SOME(deleted,l',r',lp'') => + SOME(tok::deleted,l,r',lp'') + | NONE => NONE + else NONE + + fun tryPreferred((stack,lexPair),queuePos) = + catList preferred_change (fn (delete,insert) => + if List.exists noShift delete then [] (* should give warning at + parser-generation time *) + else case do_delete(delete,lexPair) + of SOME(deleted,l,r,lp) => + tryChange{lex=lp,stack=stack,pos=queuePos, + leftPos=l,rightPos=r,orig=deleted, + new=map (fn t=>(tokAt(t,r))) insert} + | NONE => []) + + val changes = catList numStateList tryPreferred @ + catList numStateList tryInsert @ + catList numStateList trySubst @ + catList numStateList (tryDelete 1) @ + catList numStateList (tryDelete 2) @ + catList numStateList (tryDelete 3) + + val findMaxDist = fn l => + foldr (fn (CHANGE {distance,...},high) => Int.max(distance,high)) 0 l + +(* maxDist: max distance past error taken that we could parse *) + + val maxDist = findMaxDist changes + +(* remove changes which did not parse maxDist tokens past the error token *) + + val changes = catList changes + (fn(c as CHANGE{distance,...}) => + if distance=maxDist then [c] else []) + + in case changes + of (l as change :: _) => + let fun print_msg (CHANGE {new,orig,leftPos,rightPos,...}) = + let val s = + case (orig,new) + of (_::_,[]) => "deleting " ^ (showTerms orig) + | ([],_::_) => "inserting " ^ (showTerms new) + | _ => "replacing " ^ (showTerms orig) ^ + " with " ^ (showTerms new) + in error ("syntax error: " ^ s,leftPos,rightPos) + end + + val _ = + (if length l > 1 andalso DEBUG2 then + (print "multiple fixes possible; could fix it by:\n"; + app print_msg l; + print "chosen correction:\n") + else (); + print_msg change) + + (* findNth: find nth queue entry from the error + entry. Returns the Nth queue entry and the portion of + the queue from the beginning to the nth-1 entry. The + error entry is at the end of the queue. + + Examples: + + queue = a b c d e + findNth 0 = (e,a b c d) + findNth 1 = (d,a b c) + *) + + val findNth = fn n => + let fun f (h::t,0) = (h,rev t) + | f (h::t,n) = f(t,n-1) + | f (nil,_) = let exception FindNth + in raise FindNth + end + in f (rev stateList,n) + end + + val CHANGE {pos,orig,new,...} = change + val (last,queueFront) = findNth pos + val (stack,lexPair) = last + + val lp1 = foldl(fn (_,(_,r)) => Stream.get r) lexPair orig + val lp2 = foldr(fn(t,r)=>(t,Stream.cons r)) lp1 new + + val restQueue = + Fifo.put((stack,lp2), + foldl Fifo.put Fifo.empty queueFront) + + val (lexPair,stack,queue,_,_) = + distanceParse(lp2,stack,restQueue,pos) + + in (lexPair,stack,queue) + end + | nil => (error("syntax error found at " ^ (showTerminal term), + leftPos,leftPos); raise ParseError) + end + + val parse = fn {arg,table,lexer,saction,void,lookahead, + ec=ec as {showTerminal,...} : ('_a,'_b) ecRecord} => + let val distance = 15 (* defer distance tokens *) + val minAdvance = 1 (* must parse at least 1 token past error *) + val maxAdvance = Int.max(lookahead,0)(* max distance for parse check *) + val lexPair = Stream.get lexer + val (TOKEN (_,(_,leftPos,_)),_) = lexPair + val startStack = [(initialState table,(void,leftPos,leftPos))] + val startQueue = Fifo.put((startStack,lexPair),Fifo.empty) + val distanceParse = distanceParse(table,showTerminal,saction,arg) + val fixError = mkFixError(ec,distanceParse,minAdvance,maxAdvance) + val ssParse = ssParse(table,showTerminal,saction,fixError,arg) + fun loop (lexPair,stack,queue,_,SOME ACCEPT) = + ssParse(lexPair,stack,queue) + | loop (lexPair,stack,queue,0,_) = ssParse(lexPair,stack,queue) + | loop (lexPair,stack,queue,distance,SOME ERROR) = + let val (lexPair,stack,queue) = fixError(lexPair,stack,queue) + in loop (distanceParse(lexPair,stack,queue,distance)) + end + | loop _ = let exception ParseInternal + in raise ParseInternal + end + in loop (distanceParse(lexPair,startStack,startQueue,distance)) + end + end; + diff --git a/ml-yacc/lib/sources.cm b/ml-yacc/lib/sources.cm new file mode 100644 index 0000000..dfa8b22 --- /dev/null +++ b/ml-yacc/lib/sources.cm @@ -0,0 +1,28 @@ +(* sources file for ML-Yacc library *) + +Library + +signature STREAM +signature LR_TABLE +signature TOKEN +signature LR_PARSER +signature LEXER +signature ARG_LEXER +signature PARSER_DATA +signature PARSER +signature ARG_PARSER +functor Join +functor JoinWithArg +structure LrTable +structure Stream +structure LrParser + +is + + $/basis.cm + + base.sig + join.sml + lrtable.sml + stream.sml + parser2.sml (* error correcting version *) diff --git a/ml-yacc/lib/stream.sml b/ml-yacc/lib/stream.sml new file mode 100644 index 0000000..ea90960 --- /dev/null +++ b/ml-yacc/lib/stream.sml @@ -0,0 +1,19 @@ +(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *) + +(* Stream: a structure implementing a lazy stream. The signature STREAM + is found in base.sig *) + +structure Stream :> STREAM = +struct + datatype 'a str = EVAL of 'a * 'a str ref | UNEVAL of (unit->'a) + + type 'a stream = 'a str ref + + fun get(ref(EVAL t)) = t + | get(s as ref(UNEVAL f)) = + let val t = (f(), ref(UNEVAL f)) in s := EVAL t; t end + + fun streamify f = ref(UNEVAL f) + fun cons(a,s) = ref(EVAL(a,s)) + +end; diff --git a/ml-yacc/src/.cm/GUID/absyn.sig b/ml-yacc/src/.cm/GUID/absyn.sig new file mode 100644 index 0000000..e9306c7 --- /dev/null +++ b/ml-yacc/src/.cm/GUID/absyn.sig @@ -0,0 +1 @@ +guid-(ml-yacc.cm):absyn.sig-1714016078.621 diff --git a/ml-yacc/src/.cm/GUID/absyn.sml b/ml-yacc/src/.cm/GUID/absyn.sml new file mode 100644 index 0000000..4f9e1c6 --- /dev/null +++ b/ml-yacc/src/.cm/GUID/absyn.sml @@ -0,0 +1 @@ +guid-(ml-yacc.cm):absyn.sml-1714016078.625 diff --git a/ml-yacc/src/.cm/GUID/core.sml b/ml-yacc/src/.cm/GUID/core.sml new file mode 100644 index 0000000..23a3cf0 --- /dev/null +++ b/ml-yacc/src/.cm/GUID/core.sml @@ -0,0 +1 @@ +guid-(ml-yacc.cm):core.sml-1714016078.053 diff --git a/ml-yacc/src/.cm/GUID/coreutils.sml b/ml-yacc/src/.cm/GUID/coreutils.sml new file mode 100644 index 0000000..5699aaf --- /dev/null +++ b/ml-yacc/src/.cm/GUID/coreutils.sml @@ -0,0 +1 @@ +guid-(ml-yacc.cm):coreutils.sml-1714016078.076 diff --git a/ml-yacc/src/.cm/GUID/export-yacc.sml b/ml-yacc/src/.cm/GUID/export-yacc.sml new file mode 100644 index 0000000..95df065 --- /dev/null +++ b/ml-yacc/src/.cm/GUID/export-yacc.sml @@ -0,0 +1 @@ +guid-(ml-yacc.cm):export-yacc.sml-1714016079.122 diff --git a/ml-yacc/src/.cm/GUID/grammar.sml b/ml-yacc/src/.cm/GUID/grammar.sml new file mode 100644 index 0000000..583dc80 --- /dev/null +++ b/ml-yacc/src/.cm/GUID/grammar.sml @@ -0,0 +1 @@ +guid-(ml-yacc.cm):grammar.sml-1714016078.028 diff --git a/ml-yacc/src/.cm/GUID/graph.sml b/ml-yacc/src/.cm/GUID/graph.sml new file mode 100644 index 0000000..64f77d8 --- /dev/null +++ b/ml-yacc/src/.cm/GUID/graph.sml @@ -0,0 +1 @@ +guid-(ml-yacc.cm):graph.sml-1714016078.132 diff --git a/ml-yacc/src/.cm/GUID/hdr.sml b/ml-yacc/src/.cm/GUID/hdr.sml new file mode 100644 index 0000000..c9ff587 --- /dev/null +++ b/ml-yacc/src/.cm/GUID/hdr.sml @@ -0,0 +1 @@ +guid-(ml-yacc.cm):hdr.sml-1714016076.825 diff --git a/ml-yacc/src/.cm/GUID/lalr.sml b/ml-yacc/src/.cm/GUID/lalr.sml new file mode 100644 index 0000000..e6fd4e5 --- /dev/null +++ b/ml-yacc/src/.cm/GUID/lalr.sml @@ -0,0 +1 @@ +guid-(ml-yacc.cm):lalr.sml-1714016078.225 diff --git a/ml-yacc/src/.cm/GUID/link.sml b/ml-yacc/src/.cm/GUID/link.sml new file mode 100644 index 0000000..158c2bc --- /dev/null +++ b/ml-yacc/src/.cm/GUID/link.sml @@ -0,0 +1 @@ +guid-(ml-yacc.cm):link.sml-1714016079.047 diff --git a/ml-yacc/src/.cm/GUID/look.sml b/ml-yacc/src/.cm/GUID/look.sml new file mode 100644 index 0000000..99e8e9a --- /dev/null +++ b/ml-yacc/src/.cm/GUID/look.sml @@ -0,0 +1 @@ +guid-(ml-yacc.cm):look.sml-1714016078.186 diff --git a/ml-yacc/src/.cm/GUID/mklrtable.sml b/ml-yacc/src/.cm/GUID/mklrtable.sml new file mode 100644 index 0000000..53910f8 --- /dev/null +++ b/ml-yacc/src/.cm/GUID/mklrtable.sml @@ -0,0 +1 @@ +guid-(ml-yacc.cm):mklrtable.sml-1714016078.347 diff --git a/ml-yacc/src/.cm/GUID/mkprstruct.sml b/ml-yacc/src/.cm/GUID/mkprstruct.sml new file mode 100644 index 0000000..8c54631 --- /dev/null +++ b/ml-yacc/src/.cm/GUID/mkprstruct.sml @@ -0,0 +1 @@ +guid-(ml-yacc.cm):mkprstruct.sml-1714016078.592 diff --git a/ml-yacc/src/.cm/GUID/parse.sml b/ml-yacc/src/.cm/GUID/parse.sml new file mode 100644 index 0000000..8dfe63a --- /dev/null +++ b/ml-yacc/src/.cm/GUID/parse.sml @@ -0,0 +1 @@ +guid-(ml-yacc.cm):parse.sml-1714016077.819 diff --git a/ml-yacc/src/.cm/GUID/shrink.sml b/ml-yacc/src/.cm/GUID/shrink.sml new file mode 100644 index 0000000..d1862f3 --- /dev/null +++ b/ml-yacc/src/.cm/GUID/shrink.sml @@ -0,0 +1 @@ +guid-(ml-yacc.cm):shrink.sml-1714016078.544 diff --git a/ml-yacc/src/.cm/GUID/sigs.sml b/ml-yacc/src/.cm/GUID/sigs.sml new file mode 100644 index 0000000..7e360aa --- /dev/null +++ b/ml-yacc/src/.cm/GUID/sigs.sml @@ -0,0 +1 @@ +guid-(ml-yacc.cm):sigs.sml-1714016076.809 diff --git a/ml-yacc/src/.cm/GUID/utils.sig b/ml-yacc/src/.cm/GUID/utils.sig new file mode 100644 index 0000000..10e5588 --- /dev/null +++ b/ml-yacc/src/.cm/GUID/utils.sig @@ -0,0 +1 @@ +guid-(ml-yacc.cm):utils.sig-1714016076.804 diff --git a/ml-yacc/src/.cm/GUID/utils.sml b/ml-yacc/src/.cm/GUID/utils.sml new file mode 100644 index 0000000..8e34d7e --- /dev/null +++ b/ml-yacc/src/.cm/GUID/utils.sml @@ -0,0 +1 @@ +guid-(ml-yacc.cm):utils.sml-1714016077.829 diff --git a/ml-yacc/src/.cm/GUID/verbose.sml b/ml-yacc/src/.cm/GUID/verbose.sml new file mode 100644 index 0000000..06b58ff --- /dev/null +++ b/ml-yacc/src/.cm/GUID/verbose.sml @@ -0,0 +1 @@ +guid-(ml-yacc.cm):verbose.sml-1714016078.504 diff --git a/ml-yacc/src/.cm/GUID/yacc.grm.sig b/ml-yacc/src/.cm/GUID/yacc.grm.sig new file mode 100644 index 0000000..04b7500 --- /dev/null +++ b/ml-yacc/src/.cm/GUID/yacc.grm.sig @@ -0,0 +1 @@ +guid-(ml-yacc.cm):yacc.grm.sig-1714016076.859 diff --git a/ml-yacc/src/.cm/GUID/yacc.grm.sml b/ml-yacc/src/.cm/GUID/yacc.grm.sml new file mode 100644 index 0000000..d70c73a --- /dev/null +++ b/ml-yacc/src/.cm/GUID/yacc.grm.sml @@ -0,0 +1 @@ +guid-(ml-yacc.cm):yacc.grm.sml-1714016076.863 diff --git a/ml-yacc/src/.cm/GUID/yacc.lex.sml b/ml-yacc/src/.cm/GUID/yacc.lex.sml new file mode 100644 index 0000000..8cf2d04 --- /dev/null +++ b/ml-yacc/src/.cm/GUID/yacc.lex.sml @@ -0,0 +1 @@ +guid-(ml-yacc.cm):yacc.lex.sml-1714016077.247 diff --git a/ml-yacc/src/.cm/GUID/yacc.sml b/ml-yacc/src/.cm/GUID/yacc.sml new file mode 100644 index 0000000..7270b86 --- /dev/null +++ b/ml-yacc/src/.cm/GUID/yacc.sml @@ -0,0 +1 @@ +guid-(ml-yacc.cm):yacc.sml-1714016078.685 diff --git a/ml-yacc/src/.cm/SKEL/absyn.sig b/ml-yacc/src/.cm/SKEL/absyn.sig new file mode 100644 index 0000000..dd63c41 --- /dev/null +++ b/ml-yacc/src/.cm/SKEL/absyn.sig @@ -0,0 +1,2 @@ +Skeleton 5 +ac"ABSYN"h0 \ No newline at end of file diff --git a/ml-yacc/src/.cm/SKEL/absyn.sml b/ml-yacc/src/.cm/SKEL/absyn.sml new file mode 100644 index 0000000..ad6ba18 --- /dev/null +++ b/ml-yacc/src/.cm/SKEL/absyn.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f4d"Char"d"List"d"Int"d"Option"ad"Absyn"jh0gp1c"ABSYN" \ No newline at end of file diff --git a/ml-yacc/src/.cm/SKEL/core.sml b/ml-yacc/src/.cm/SKEL/core.sml new file mode 100644 index 0000000..841f73f --- /dev/null +++ b/ml-yacc/src/.cm/SKEL/core.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ae"mkCore"i1aIntGrammar"gp1c"INTGRAMMAR"jh7e eGrammar"Ca .a74aItemList"jh0gp1e"ListOrdSet"egp1 f2 d"Int"Ngp1c"CORE" \ No newline at end of file diff --git a/ml-yacc/src/.cm/SKEL/coreutils.sml b/ml-yacc/src/.cm/SKEL/coreutils.sml new file mode 100644 index 0000000..e440598 --- /dev/null +++ b/ml-yacc/src/.cm/SKEL/coreutils.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ae"mkCoreUtils"i2aCore"gp1c"CORE"f1Array"jh7aaIntGrammar"gp29CaGrammar"gp29c3egp1egp19e3f3d"List"%ad"Assoc"gp1d"SymbolAssoc"ad"NtList"jh0gp1e"ListOrdSet"Ngp1c"CORE_UTILS" \ No newline at end of file diff --git a/ml-yacc/src/.cm/SKEL/export-yacc.sml b/ml-yacc/src/.cm/SKEL/export-yacc.sml new file mode 100644 index 0000000..c803976 --- /dev/null +++ b/ml-yacc/src/.cm/SKEL/export-yacc.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f7d"OS"d"SMLofNJ"Cd"Signals"d"General"d"String"d"ParseGen"d"TextIO"Nad"ExportParseGen"j0 \ No newline at end of file diff --git a/ml-yacc/src/.cm/SKEL/grammar.sml b/ml-yacc/src/.cm/SKEL/grammar.sml new file mode 100644 index 0000000..506dbb1 Binary files /dev/null and b/ml-yacc/src/.cm/SKEL/grammar.sml differ diff --git a/ml-yacc/src/.cm/SKEL/graph.sml b/ml-yacc/src/.cm/SKEL/graph.sml new file mode 100644 index 0000000..ef59be8 --- /dev/null +++ b/ml-yacc/src/.cm/SKEL/graph.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ae"mkGraph"i4aIntGrammar"gp1c"INTGRAMMAR"aCore"gp1c"CORE"aCoreUtils"gp1c"CORE_UTILS"f1Array"jh7a,,aGrammar"gp20Cac4e*egp2,0egp1>eaNodeSet"jh0gp1e"RbOrdSet"egp1%f5C>,d"List"d"Int"NNgp1c"LRGRAPH" \ No newline at end of file diff --git a/ml-yacc/src/.cm/SKEL/hdr.sml b/ml-yacc/src/.cm/SKEL/hdr.sml new file mode 100644 index 0000000..9b4c62f --- /dev/null +++ b/ml-yacc/src/.cm/SKEL/hdr.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d3f2d"Int"d"TextIO"aHeaderFun"j0gp1c"HEADER"ad"Header"j!gp1 \ No newline at end of file diff --git a/ml-yacc/src/.cm/SKEL/lalr.sml b/ml-yacc/src/.cm/SKEL/lalr.sml new file mode 100644 index 0000000..28d0218 --- /dev/null +++ b/ml-yacc/src/.cm/SKEL/lalr.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ae"mkLalr"i5CaIntGrammar"gp1c"INTGRAMMAR"aCore"gp1c"CORE"aGraph"gp1c"LRGRAPH"aLook"gp1c"LOOK"f1Array"Njh9c5CeGrammar"ee,e>egp1Nf4d"List"d"Int"'a>a, Ca<7aad"ItemList"0ListOrdSet"ad"NontermSet"ad"NTL"jgp1e"RbOrdSet"Ngp1c"LALR_GRAPH" \ No newline at end of file diff --git a/ml-yacc/src/.cm/SKEL/link.sml b/ml-yacc/src/.cm/SKEL/link.sml new file mode 100644 index 0000000..e293e68 Binary files /dev/null and b/ml-yacc/src/.cm/SKEL/link.sml differ diff --git a/ml-yacc/src/.cm/SKEL/look.sml b/ml-yacc/src/.cm/SKEL/look.sml new file mode 100644 index 0000000..78f09af --- /dev/null +++ b/ml-yacc/src/.cm/SKEL/look.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ae"mkLook"i2aIntGrammar"gp1c"INTGRAMMAR"f1Array"jh6aGrammar"gp2 8Ca c2egp18e f2d"List",ad"TermSet"0ListOrdSet"ad"NontermSet"4Ngp1c"LOOK" \ No newline at end of file diff --git a/ml-yacc/src/.cm/SKEL/mklrtable.sml b/ml-yacc/src/.cm/SKEL/mklrtable.sml new file mode 100644 index 0000000..8b24189 --- /dev/null +++ b/ml-yacc/src/.cm/SKEL/mklrtable.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ae"mkMakeLrTable"i3aIntGrammar"gp1c"INTGRAMMAR"aLrTable"gp1c"LR_TABLE"f2d"Int"Array"jh7aCore"jgp1e"mkCore"aCoreUtils"jh2(gp1e"mkCoreUtils"CaGraph"jh3(a=gp1=gp1e"mkGraph"aLook"j&gp1e"mkLook"aLalr"jh4(a,gp1,agp1gp1e"mkLalr"22(CaGrammar"gp2*aGotoList"jh0gp1e"ListOrdSet"aErrs"jh1gp1c"LR_ERRS"c5Ce+egp1*egp1e eNf8-d"List"C=,d"TextIO"NNgp1c"MAKE_LR_TABLE" \ No newline at end of file diff --git a/ml-yacc/src/.cm/SKEL/mkprstruct.sml b/ml-yacc/src/.cm/SKEL/mkprstruct.sml new file mode 100644 index 0000000..a2adcc2 --- /dev/null +++ b/ml-yacc/src/.cm/SKEL/mkprstruct.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ae"mkPrintStruct"i3aLrTable"gp1c"LR_TABLE"aShrinkLrTable"gp1c"SHRINK_LR_TABLE"f1Array"jh3ac2egp1-e"f2d"Int"gp1c"PRINT_STRUCT" \ No newline at end of file diff --git a/ml-yacc/src/.cm/SKEL/parse.sml b/ml-yacc/src/.cm/SKEL/parse.sml new file mode 100644 index 0000000..0afd8dc --- /dev/null +++ b/ml-yacc/src/.cm/SKEL/parse.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ae"ParseGenParserFun"i3aHeader"gp1c"HEADER"aParser"gp1c"ARG_PARSER"f2.d"TextIO"jh1agp1gp1c"PARSE_GEN_PARSER" \ No newline at end of file diff --git a/ml-yacc/src/.cm/SKEL/shrink.sml b/ml-yacc/src/.cm/SKEL/shrink.sml new file mode 100644 index 0000000..6d2808a --- /dev/null +++ b/ml-yacc/src/.cm/SKEL/shrink.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d7aSORT_ARG"0aSORT"CaEQUIV_ARG"aEQUIV"aMergeSortFun"i2aA"gp1f1jgp1aEquivFun"i2agp1f2d"Array"jh1ad"Sort"jgp16gp1+ae"ShrinkLrTableFun"i1aLrTable"gp1c"LR_TABLE"jh4a11eaActionEntryList"ad"EquivActionList"jgp1gp1gp1c"SHRINK_LR_TABLE"N \ No newline at end of file diff --git a/ml-yacc/src/.cm/SKEL/sigs.sml b/ml-yacc/src/.cm/SKEL/sigs.sml new file mode 100644 index 0000000..2b3c5d5 --- /dev/null +++ b/ml-yacc/src/.cm/SKEL/sigs.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d6f1d"TextIO"CaHEADER"0ac"PARSE_GEN_PARSER"h1ad"Header"gp1ac"PARSE_GEN"aGRAMMAR"aINTGRAMMAR"h3Grammar"ad"SymbolAssoc"TABLE"ad"NontermAssoc" CaCORE"+IntGrammar"ac"CORE_UTILS"+7Core"+aLRGRAPH"ac"LOOK"2ac"LALR_GRAPH"h4+7!ad"Graph"gp10CaLR_ERRS"LrTable"LR_TABLE"ac"PRINT_STRUCT"ac"SHRINK_LR_TABLE" unit) * (string -> unit)) -> rule -> unit + end diff --git a/ml-yacc/src/absyn.sml b/ml-yacc/src/absyn.sml new file mode 100644 index 0000000..6f1dcbe --- /dev/null +++ b/ml-yacc/src/absyn.sml @@ -0,0 +1,162 @@ +(* ML-Yacc Parser Generator (c) 1991 Andrew W. Appel, David R. Tarditi *) + +structure Absyn : ABSYN = + struct + datatype exp + = CODE of string + | EAPP of exp * exp + | EINT of int + | ETUPLE of exp list + | EVAR of string + | FN of pat * exp + | LET of decl list * exp + | SEQ of exp * exp + | UNIT + and pat + = PVAR of string + | PAPP of string * pat + | PINT of int + | PLIST of pat list * pat option + | PTUPLE of pat list + | WILD + | AS of string * pat + and decl = VB of pat * exp + and rule = RULE of pat * exp + + fun idchar #"'" = true + | idchar #"_" = true + | idchar c = Char.isAlpha c orelse Char.isDigit c + + fun code_to_ids s = let + fun g(nil,r) = r + | g(a as (h::t),r) = if Char.isAlpha h then f(t,[h],r) else g(t,r) + and f(nil,accum,r)= implode(rev accum)::r + | f(a as (h::t),accum,r) = + if idchar h then f(t,h::accum,r) else g(a,implode (rev accum) :: r) + in g(explode s,nil) + end + + val simplifyRule : rule -> rule = fn (RULE(p,e)) => + let val used : (string -> bool) = + let fun f(CODE s) = code_to_ids s + | f(EAPP(a,b)) = f a @ f b + | f(ETUPLE l) = List.concat (map f l) + | f(EVAR s) = [s] + | f(FN(_,e)) = f e + | f(LET(dl,e)) = + (List.concat (map (fn VB(_,e) => f e) dl)) @ f e + | f(SEQ(a,b)) = f a @ f b + | f _ = nil + val identifiers = f e + in fn s => List.exists (fn a=>a=s) identifiers + end + val simplifyPat : pat -> pat = + let fun f a = + case a + of (PVAR s) => if used s then a else WILD + | (PAPP(s,pat)) => + (case f pat + of WILD => WILD + | pat' => PAPP(s,pat')) + | (PLIST (l, topt)) => + let val l' = map f l + val topt' = Option.map f topt + fun notWild WILD = false + | notWild _ = true + in case topt' of + SOME WILD => if List.exists notWild l' then + PLIST (l', topt') + else WILD + | _ => PLIST (l', topt') + end + | (PTUPLE l) => + let val l' = map f l + in if List.exists(fn WILD=>false | _ => true) l' + then PTUPLE l' + else WILD + end + | (AS(a,b)) => + if used a then + case f b of + WILD => PVAR a + | b' => AS(a,b') + else f b + | _ => a + in f + end + val simplifyExp : exp -> exp = + let fun f(EAPP(a,b)) = EAPP(f a,f b) + | f(ETUPLE l) = ETUPLE(map f l) + | f(FN(p,e)) = FN(simplifyPat p,f e) + | f(LET(dl,e)) = + LET(map (fn VB(p,e) => + VB(simplifyPat p,f e)) dl, + f e) + | f(SEQ(a,b)) = SEQ(f a,f b) + | f a = a + in f + end + in RULE(simplifyPat p,simplifyExp e) + end + + fun printRule (say : string -> unit, sayln:string -> unit) r = let + fun flat (a, []) = rev a + | flat (a, SEQ (e1, e2) :: el) = flat (a, e1 :: e2 :: el) + | flat (a, e :: el) = flat (e :: a, el) + fun pl (lb, rb, c, f, [], a) = " " :: lb :: rb :: a + | pl (lb, rb, c, f, h :: t, a) = + " " :: lb :: f (h, foldr (fn (x, a) => c :: f (x, a)) + (rb :: a) + t) + fun pe (CODE c, a) = " (" :: c :: ")" :: a + | pe (EAPP (x, y as (EAPP _)), a) = + pe (x, " (" :: pe (y, ")" :: a)) + | pe (EAPP (x, y), a) = + pe (x, pe (y, a)) + | pe (EINT i, a) = + " " :: Int.toString i :: a + | pe (ETUPLE l, a) = pl ("(", ")", ",", pe, l, a) + | pe (EVAR v, a) = + " " :: v :: a + | pe (FN (p, b), a) = + " (fn" :: pp (p, " =>" :: pe (b, ")" :: a)) + | pe (LET ([], b), a) = + pe (b, a) + | pe (LET (dl, b), a) = + let fun pr (VB (p, e), a) = + " val " :: pp (p, " =" :: pe (e, "\n" :: a)) + in " let" :: foldr pr (" in" :: pe (b, "\nend" :: a)) dl + end + | pe (SEQ (e1, e2), a) = + pl ("(", ")", ";", pe, flat ([], [e1, e2]), a) + | pe (UNIT, a) = + " ()" :: a + and pp (PVAR v, a) = + " " :: v :: a + | pp (PAPP (x, y as PAPP _), a) = + " " :: x :: " (" :: pp (y, ")" :: a) + | pp (PAPP (x, y), a) = + " " :: x :: pp (y, a) + | pp (PINT i, a) = + " " :: Int.toString i :: a + | pp (PLIST (l, NONE), a) = + pl ("[", "]", ",", pp, l, a) + | pp (PLIST (l, SOME t), a) = + " (" :: foldr (fn (x, a) => pp (x, " ::" :: a)) + (pp (t, ")" :: a)) + l + | pp (PTUPLE l, a) = + pl ("(", ")", ",", pp, l, a) + | pp (WILD, a) = + " _" :: a + | pp (AS (v, PVAR v'), a) = + " (" :: v :: " as " :: v' :: ")" :: a + | pp (AS (v, p), a) = + " (" :: v :: " as (" :: pp (p, "))" :: a) + fun out "\n" = sayln "" + | out s = say s + in + case simplifyRule r of + RULE (p, e) => app out (pp (p, " =>" :: pe (e, ["\n"]))) + end +end; diff --git a/ml-yacc/src/core.sml b/ml-yacc/src/core.sml new file mode 100644 index 0000000..ae92e1a --- /dev/null +++ b/ml-yacc/src/core.sml @@ -0,0 +1,73 @@ +(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *) + +functor mkCore(structure IntGrammar : INTGRAMMAR) : CORE = + struct + open IntGrammar + open Grammar + structure IntGrammar = IntGrammar + structure Grammar = Grammar + + datatype item = ITEM of + { rule : rule, + dot : int, + rhsAfter : symbol list + } + + val eqItem = fn (ITEM{rule=RULE{num=n,...},dot=d,...}, + ITEM{rule=RULE{num=m,...},dot=e,...}) => + n=m andalso d=e + + val gtItem = fn (ITEM{rule=RULE{num=n,...},dot=d,...}, + ITEM{rule=RULE{num=m,...},dot=e,...}) => + n>m orelse (n=m andalso d>e) + + structure ItemList = ListOrdSet + (struct + type elem = item + val eq = eqItem + val gt = gtItem + end) + + open ItemList + datatype core = CORE of item list * int + + val gtCore = fn (CORE (a,_),CORE (b,_)) => ItemList.set_gt(a,b) + val eqCore = fn (CORE (a,_),CORE (b,_)) => ItemList.set_eq(a,b) + + (* functions for printing and debugging *) + + val prItem = fn (symbolToString,nontermToString,print) => + let val printInt = print o (Int.toString : int -> string) + val prSymbol = print o symbolToString + val prNonterm = print o nontermToString + fun showRest nil = () + | showRest (h::t) = (prSymbol h; print " "; showRest t) + fun showRhs (l,0) = (print ". "; showRest l) + | showRhs (nil,_) = () + | showRhs (h::t,n) = (prSymbol h; + print " "; + showRhs(t,n-1)) + in fn (ITEM {rule=RULE {lhs,rhs,rulenum,num,...}, + dot,rhsAfter,...}) => + (prNonterm lhs; print " : "; showRhs(rhs,dot); + case rhsAfter + of nil => (print " (reduce by rule "; + printInt rulenum; + print ")") + | _ => (); + if DEBUG then + (print " (num "; printInt num; print ")") + else ()) + end + + val prCore = fn a as (_,_,print) => + let val prItem = prItem a + in fn (CORE (items,state)) => + (print "state "; + print (Int.toString state); + print ":\n\n"; + app (fn i => (print "\t"; + prItem i; print "\n")) items; + print "\n") + end +end; diff --git a/ml-yacc/src/coreutils.sml b/ml-yacc/src/coreutils.sml new file mode 100644 index 0000000..cffa0b3 --- /dev/null +++ b/ml-yacc/src/coreutils.sml @@ -0,0 +1,239 @@ +(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *) + +functor mkCoreUtils(structure Core : CORE) : CORE_UTILS = + struct + val sub = Array.sub + infix 9 sub + val DEBUG = true + structure Core = Core + structure IntGrammar = Core.IntGrammar + structure Grammar = IntGrammar.Grammar + + open Grammar IntGrammar Core + + structure Assoc = SymbolAssoc + + structure NtList = ListOrdSet + (struct + type elem = nonterm + val eq = eqNonterm + val gt = gtNonterm + end) + + val mkFuncs = fn (GRAMMAR {rules,terms,nonterms,...}) => + let val derives=Array.array(nonterms,nil : rule list) + +(* sort rules by their lhs nonterminal by placing them in an array indexed + in their lhs nonterminal *) + + val _ = + let val f = fn {lhs=lhs as (NT n), rhs, precedence,rulenum} => + let val rule=RULE{lhs=lhs,rhs=rhs,precedence=precedence, + rulenum=rulenum,num=0} + in Array.update(derives,n,rule::(derives sub n)) + end + in app f rules + end + +(* renumber rules so that rule numbers increase monotonically with + the number of their lhs nonterminal, and so that rules are numbered + sequentially. **Functions below assume that this number is true**, + i.e. productions for nonterm i are numbered from j to k, + productions for nonterm i+1 are numbered from k+1 to m, and + productions for nonterm 0 start at 0 *) + + val _ = + let val f = + fn (RULE{lhs,rhs,precedence,rulenum,num}, (l,i)) => + (RULE{lhs=lhs,rhs=rhs, precedence=precedence, + rulenum=rulenum, num=i}::l,i+1) + fun g(i,num) = + if i + if DEBUG andalso (n<0 orelse n>=nonterms) then + let exception Produces of int in raise (Produces n) end + else derives sub n + + val memoize = fn f => + let fun loop i = if i = nonterms then nil + else f (NT i) :: (loop (i+1)) + val data = Array.fromList(loop 0) + in fn (NT i) => data sub i + end + + (* compute nonterminals which must be added to a closure when a given + nonterminal is added, i.e all nonterminals C for each nonterminal A such + that A =*=> Cx *) + + val nontermClosure = + let val collectNonterms = fn n => + List.foldr (fn (r,l) => + case r + of RULE {rhs=NONTERM n :: _,...} => + NtList.insert(n,l) + | _ => l) NtList.empty (produces n) + val closureNonterm = fn n => + NtList.closure(NtList.singleton n, + collectNonterms) + in memoize closureNonterm + end + +(* ntShifts: Take the items produced by a nonterminal, and sort them + by their first symbol. For each first symbol, make sure the item + list associated with the symbol is sorted also. ** This function + assumes that the item list returned by produces is sorted ** + + Create a table of item lists keyed by symbols. Scan the list + of items produced by a nonterminal, and insert those with a first + symbol on to the beginning of the item list for that symbol, creating + a list if necessary. Since produces returns an item list that is + already in order, the list for each symbol will also end up in order. + *) + + fun sortItems nt = + let fun add_item (a as RULE{rhs=symbol::rest,...},r) = + let val item = ITEM{rule=a,dot=1,rhsAfter=rest} + in Assoc.insert((symbol,case Assoc.find (symbol,r) + of SOME l => item::l + | NONE => [item]),r) + end + | add_item (_,r) = r + in List.foldr add_item Assoc.empty (produces nt) + end + + val ntShifts = memoize sortItems + +(* getNonterms: get the nonterminals with a . before them in a core. + Returns a list of nonterminals in ascending order *) + + fun getNonterms l = + List.foldr (fn (ITEM {rhsAfter=NONTERM sym ::_, ...},r) => + NtList.insert(sym,r) + | (_,r) => r) [] l + +(* closureNonterms: compute the nonterminals that would have a . before them + in the closure of the core. Returns a list of nonterminals in ascending + order *) + fun closureNonterms a = + let val nonterms = getNonterms a + in List.foldr (fn (nt,r) => + NtList.union(nontermClosure nt,r)) + nonterms nonterms + end + +(* shifts: compute the core sets that result from shift/gotoing on + the closure of a kernal set. The items in core sets are sorted, of + course. + + (1) compute the core sets that result just from items added + through the closure operation. + (2) then add the shift/gotos on kernal items. + + We can do (1) the following way. Keep a table which for each shift/goto +symbol gives the list of items that result from shifting or gotoing on the +symbol. Compute the nonterminals that would have dots before them in the +closure of the kernal set. For each of these nonterminals, we already have an +item list in sorted order for each possible shift symbol. Scan the nonterminal +list from back to front. For each nonterminal, prepend the shift/goto list +for each shift symbol to the list already in the table. + + We end up with the list of items in correct order for each shift/goto +symbol. We have kept the item lists in order, scanned the nonterminals from +back to front (=> that the items end up in ascending order), and never had any +duplicate items (each item is derived from only one nonterminal). *) + + fun shifts (CORE (itemList,_)) = + let + +(* mergeShiftItems: add an item list for a shift/goto symbol to the table *) + +fun mergeShiftItems (args as ((k,l),r)) = + case Assoc.find(k,r) + of NONE => Assoc.insert args + | SOME old => Assoc.insert ((k,l@old),r) + +(* mergeItems: add all items derived from a nonterminal to the table. We've + kept these items sorted by their shift/goto symbol (the first symbol on + their rhs) *) + + fun mergeItems (n,r) = + Assoc.fold mergeShiftItems (ntShifts n) r + +(* nonterms: a list of nonterminals that are in a core after the + closure operation *) + + val nonterms = closureNonterms itemList + +(* now create a table which for each shift/goto symbol gives the sorted list + of closure items which would result from first taking all the closure items + and then sorting them by the shift/goto symbols *) + + val newsets = List.foldr mergeItems Assoc.empty nonterms + +(* finally prepare to insert the kernal items of a core *) + + fun insertItem ((k,i),r) = + case (Assoc.find(k,r)) + of NONE => Assoc.insert((k,[i]),r) + | SOME l => Assoc.insert((k,Core.insert(i,l)),r) + fun shiftCores(ITEM{rule,dot,rhsAfter=symbol::rest},r) = + insertItem((symbol, + ITEM{rule=rule,dot=dot+1,rhsAfter=rest}),r) + | shiftCores(_,r) = r + +(* insert the kernal items of a core *) + + val newsets = List.foldr shiftCores newsets itemList + in Assoc.make_list newsets + end + +(* nontermEpsProds: returns a list of epsilon productions produced by a + nonterminal sorted by rule number. ** Depends on produces returning + an ordered list **. It does not alter the order in which the rules + were returned by produces; it only removes non-epsilon productions *) + + val nontermEpsProds = + let val f = fn nt => + List.foldr + (fn (rule as RULE {rhs=nil,...},results) => rule :: results + | (_,results) => results) + [] (produces nt) + in memoize f + end + +(* epsProds: take a core and compute a list of epsilon productions for it + sorted by rule number. ** Depends on closureNonterms returning a list + of nonterminals sorted by nonterminal #, rule numbers increasing + monotonically with their lhs production #, and nontermEpsProds returning + an ordered item list for each production +*) + + fun epsProds (CORE (itemList,state)) = + let val prods = map nontermEpsProds (closureNonterms itemList) + in List.concat prods + end + + in {produces=produces,shifts=shifts,rules=rules,epsProds=epsProds} + end +end; diff --git a/ml-yacc/src/export-yacc.sml b/ml-yacc/src/export-yacc.sml new file mode 100644 index 0000000..2b01f6a --- /dev/null +++ b/ml-yacc/src/export-yacc.sml @@ -0,0 +1,46 @@ +(* export-yacc.sml + * + * ML-Yacc Parser Generator (c) 1991 Andrew W. Appel, David R. Tarditi + *) +structure ExportParseGen : sig + val parseGen : (string * string list) -> OS.Process.status +end = struct + fun err msg = TextIO.output (TextIO.stdErr, msg) + + exception Interrupt; + + (* This function applies operation to (). If it handles an interrupt + signal (Control-C), it raises the exception Interrupt. Example: + (handleInterrupt foo) handle Interrupt => print "Bang!\n" *) + + fun handleInterrupt (operation : unit -> unit) = + let exception Done + val old'handler = Signals.inqHandler(Signals.sigINT) + fun reset'handler () = + Signals.setHandler(Signals.sigINT, old'handler) + in (SMLofNJ.Cont.callcc (fn k => + (Signals.setHandler(Signals.sigINT, Signals.HANDLER(fn _ => k)); + operation (); + raise Done)); + err ("\n--- Interrupt ml-yacc ---\n"); + raise Interrupt) + handle Done => (reset'handler ()) + | exn => (reset'handler (); raise exn) + end + + val exit = OS.Process.exit + + fun parseGen (_, argv) = let + fun parse_gen () = + case argv of + [file] => (ParseGen.parseGen file; exit OS.Process.success) + | _ => (err("Usage: ml-yacc filename\n"); + exit OS.Process.failure) + in + (handleInterrupt parse_gen; OS.Process.success) + handle Interrupt => OS.Process.failure + | ex => (err (String.concat ["? ml-yacc: uncaught exception ", + General.exnMessage ex, "\n"]); + OS.Process.failure) + end +end diff --git a/ml-yacc/src/grammar.sml b/ml-yacc/src/grammar.sml new file mode 100644 index 0000000..f7e52da --- /dev/null +++ b/ml-yacc/src/grammar.sml @@ -0,0 +1,101 @@ +(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *) + +structure Grammar : GRAMMAR = + struct + + (* define types term and nonterm using those in LrTable + datatype term = T of int + datatype nonterm = NT of int *) + + open LrTable + datatype symbol = TERM of term | NONTERM of nonterm + datatype grammar = GRAMMAR of + {rules: {lhs: nonterm, + rhs: symbol list, + precedence: int option, + rulenum: int} list, + noshift : term list, + eop : term list, + terms: int, + nonterms: int, + start : nonterm, + precedence : term -> int option, + termToString : term -> string, + nontermToString : nonterm -> string} +end; + +structure IntGrammar : INTGRAMMAR = + struct + structure Grammar = Grammar + open Grammar + + datatype rule = RULE of + {lhs: nonterm, + rhs: symbol list, + num: int,(* internal # assigned by coreutils *) + rulenum: int, + precedence: int option} + + val eqTerm : term * term -> bool = (op =) + val gtTerm : term * term -> bool = fn (T i,T j) => i>j + + val eqNonterm : nonterm * nonterm -> bool = (op =) + val gtNonterm : nonterm * nonterm -> bool = + fn (NT i,NT j) => i>j + + val eqSymbol : symbol * symbol -> bool = (op =) + val gtSymbol = fn (TERM (T i),TERM (T j)) => i>j + | (NONTERM (NT i),NONTERM (NT j)) => i>j + | (TERM _,NONTERM _) => false + | (NONTERM _,TERM _) => true + + + structure SymbolAssoc = Table(type key = symbol + val gt = gtSymbol) + + structure NontermAssoc = Table(type key = nonterm + val gt = gtNonterm) + + val DEBUG = false + + val prRule = fn (a as symbolToString,nontermToString,print) => + let val printSymbol = print o symbolToString + fun printRhs (h::t) = (printSymbol h; print " "; + printRhs t) + | printRhs nil = () + in fn (RULE {lhs,rhs,num,rulenum,precedence,...}) => + ((print o nontermToString) lhs; print " : "; + printRhs rhs; + if DEBUG then (print " num = "; + print (Int.toString num); + print " rulenum = "; + print (Int.toString rulenum); + print " precedence = "; + case precedence + of NONE => print " none" + | (SOME i) => + print (Int.toString i); + ()) + else ()) + end + + val prGrammar = + fn (a as (symbolToString,nontermToString,print)) => + fn (GRAMMAR {rules,terms,nonterms,start,...}) => + let val printRule = + let val prRule = prRule a + in fn {lhs,rhs,precedence,rulenum} => + (prRule (RULE {lhs=lhs,rhs=rhs,num=0, + rulenum=rulenum, precedence=precedence}); + print "\n") + end + in print "grammar = \n"; + List.app printRule rules; + print "\n"; + print (" terms = " ^ (Int.toString terms) ^ + " nonterms = " ^ (Int.toString nonterms) ^ + " start = "); + (print o nontermToString) start; + () + end + end; diff --git a/ml-yacc/src/graph.sml b/ml-yacc/src/graph.sml new file mode 100644 index 0000000..110ae31 --- /dev/null +++ b/ml-yacc/src/graph.sml @@ -0,0 +1,99 @@ +(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *) + +functor mkGraph(structure IntGrammar : INTGRAMMAR + structure Core : CORE + structure CoreUtils : CORE_UTILS + sharing IntGrammar = Core.IntGrammar = CoreUtils.IntGrammar + sharing CoreUtils.Core = Core + ) : LRGRAPH = + struct + val sub = Array.sub + infix 9 sub + structure Core = Core + structure Grammar = IntGrammar.Grammar + structure IntGrammar = IntGrammar + open Core Core.Grammar CoreUtils IntGrammar + + structure NodeSet = RbOrdSet + (struct + type elem = core + val eq = eqCore + val gt = gtCore + end) + + open NodeSet + exception Shift of int * symbol + + type graph = {edges: {edge:symbol,to:core} list array, + nodes: core list,nodeArray : core array} + val edges = fn (CORE (_,i),{edges,...}:graph) => edges sub i + val nodes = fn ({nodes,...} : graph) => nodes + val shift = fn ({edges,nodes,...} : graph) => fn a as (i,sym) => + let fun find nil = raise (Shift a) + | find ({edge,to=CORE (_,state)} :: r) = + if gtSymbol(sym,edge) then find r + else if eqSymbol(edge,sym) then state + else raise (Shift a) + in find (edges sub i) + end + + val core = fn ({nodeArray,...} : graph) => + fn i => nodeArray sub i + + val mkGraph = fn (g as (GRAMMAR {start,...})) => + let val {shifts,produces,rules,epsProds} = + CoreUtils.mkFuncs g + fun add_goto ((symbol,a),(nodes,edges,future,num)) = + case find(CORE (a,0),nodes) + of NONE => + let val core =CORE (a,num) + val edge = {edge=symbol,to=core} + in (insert(core,nodes),edge::edges, + core::future,num+1) + end + | (SOME c) => + let val edge={edge=symbol,to=c} + in (nodes,edge::edges,future,num) + end + fun f (nodes,node_list,edge_list,nil,nil,num) = + let val nodes=rev node_list + in {nodes=nodes, + edges=Array.fromList (rev edge_list), + nodeArray = Array.fromList nodes + } + end + | f (nodes,node_list,edge_list,nil,y,num) = + f (nodes,node_list,edge_list,rev y,nil,num) + | f (nodes,node_list,edge_list,h::t,y,num) = + let val (nodes,edges,future,num) = + List.foldr add_goto (nodes,[],y,num) (shifts h) + in f (nodes,h::node_list, + edges::edge_list,t,future,num) + end + in {graph= + let val makeItem = fn (r as (RULE {rhs,...})) => + ITEM{rule=r,dot=0,rhsAfter=rhs} + val initialItemList = map makeItem (produces start) + val orderedItemList = + List.foldr Core.insert [] initialItemList + val initial = CORE (orderedItemList,0) + in f(empty,nil,nil,[initial],nil,1) + end, + produces=produces, + rules=rules, + epsProds=epsProds} + end + val prGraph = fn a as (nontermToString,termToString,print) => fn g => + let val printCore = prCore a + val printSymbol = print o nontermToString + val nodes = nodes g + val printEdges = fn n => + List.app (fn {edge,to=CORE (_,state)} => + (print "\tshift on "; + printSymbol edge; + print " to "; + print (Int.toString state); + print "\n")) (edges (n,g)) + in List.app (fn c => (printCore c; print "\n"; printEdges c)) nodes + end +end; diff --git a/ml-yacc/src/hdr.sml b/ml-yacc/src/hdr.sml new file mode 100644 index 0000000..7e2629f --- /dev/null +++ b/ml-yacc/src/hdr.sml @@ -0,0 +1,107 @@ +(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *) + +functor HeaderFun () : HEADER = + struct + val DEBUG = true + + type pos = int + val lineno = ref 0 + val text = ref (nil: string list) + type inputSource = {name : string, + errStream : TextIO.outstream, + inStream : TextIO.instream, + errorOccurred : bool ref} + + val newSource = + fn (s : string,i : TextIO.instream ,errs : TextIO.outstream) => + {name=s,errStream=errs,inStream=i, + errorOccurred = ref false} + + val errorOccurred = fn (s : inputSource) =>fn () => !(#errorOccurred s) + + val pr = fn out : TextIO.outstream => fn s : string => TextIO.output(out,s) + + val error = fn {name,errStream, errorOccurred,...} : inputSource => + let val pr = pr errStream + in fn l : pos => fn msg : string => + (pr name; pr ", line "; pr (Int.toString l); pr ": Error: "; + pr msg; pr "\n"; errorOccurred := true) + end + + val warn = fn {name,errStream, errorOccurred,...} : inputSource => + let val pr = pr errStream + in fn l : pos => fn msg : string => + (pr name; pr ", line "; pr (Int.toString l); pr ": Warning: "; + pr msg; pr "\n") + end + + datatype prec = LEFT | RIGHT | NONASSOC + + datatype symbol = SYMBOL of string * pos + val symbolName = fn SYMBOL(s,_) => s + val symbolPos = fn SYMBOL(_,p) => p + val symbolMake = fn sp => SYMBOL sp + + type ty = string + val tyName = fn i => i + val tyMake = fn i => i + + datatype control = NODEFAULT | VERBOSE | PARSER_NAME of symbol | + FUNCTOR of string | START_SYM of symbol | + NSHIFT of symbol list | POS of string | PURE | + PARSE_ARG of string * string | + TOKEN_SIG_INFO of string + + datatype declData = DECL of + {eop : symbol list, + keyword : symbol list, + nonterm : (symbol*ty option) list option, + prec : (prec * (symbol list)) list, + change: (symbol list * symbol list) list, + term : (symbol* ty option) list option, + control : control list, + value : (symbol * string) list} + + type rhsData = {rhs:symbol list,code:string, prec:symbol option} list + datatype rule = RULE of {lhs : symbol, rhs : symbol list, + code : string, prec : symbol option} + + type parseResult = string * declData * rule list + val getResult = fn p => p + + fun join_decls + (DECL {eop=e,control=c,keyword=k,nonterm=n,prec, + change=su,term=t,value=v}:declData, + DECL {eop=e',control=c',keyword=k',nonterm=n',prec=prec', + change=su',term=t',value=v'} : declData, + inputSource,pos) = + let val ignore = fn s => + (warn inputSource pos ("ignoring duplicate " ^ s ^ + " declaration")) + val join = fn (e,NONE,NONE) => NONE + | (e,NONE,a) => a + | (e,a,NONE) => a + | (e,a,b) => (ignore e; a) + fun mergeControl (nil,a) = [a] + | mergeControl (l as h::t,a) = + case (h,a) + of (PARSER_NAME _,PARSER_NAME n1) => (ignore "%name"; l) + | (FUNCTOR _,FUNCTOR _) => (ignore "%header"; l) + | (PARSE_ARG _,PARSE_ARG _) => (ignore "%arg"; l) + | (START_SYM _,START_SYM s) => (ignore "%start"; l) + | (POS _,POS _) => (ignore "%pos"; l) + | (TOKEN_SIG_INFO _, TOKEN_SIG_INFO _) + => (ignore "%token_sig_info"; l) + | (NSHIFT a,NSHIFT b) => (NSHIFT (a@b)::t) + | _ => h :: mergeControl(t,a) + fun loop (nil,r) = r + | loop (h::t,r) = loop(t,mergeControl(r,h)) + in DECL {eop=e@e',control=loop(c',c),keyword=k'@k, + nonterm=join("%nonterm",n,n'), prec=prec@prec', + change=su@su', term=join("%term",t,t'),value=v@v'} : + declData + end +end; + +structure Header = HeaderFun(); + diff --git a/ml-yacc/src/lalr.sml b/ml-yacc/src/lalr.sml new file mode 100644 index 0000000..644bcf2 --- /dev/null +++ b/ml-yacc/src/lalr.sml @@ -0,0 +1,464 @@ +(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *) + +functor mkLalr ( structure IntGrammar : INTGRAMMAR + structure Core : CORE + structure Graph : LRGRAPH + structure Look: LOOK + sharing Graph.Core = Core + sharing Graph.IntGrammar = Core.IntGrammar = + Look.IntGrammar = IntGrammar) : LALR_GRAPH = + struct + val sub = Array.sub + infix 9 sub + open IntGrammar.Grammar IntGrammar Core Graph Look + structure Graph = Graph + structure Core = Core + structure Grammar = IntGrammar.Grammar + structure IntGrammar = IntGrammar + + datatype tmpcore = TMPCORE of (item * term list ref) list * int + datatype lcore = LCORE of (item * term list) list * int + + + val prLcore = + fn a as (SymbolToString,nontermToString,termToString,print) => + let val printItem = prItem (SymbolToString,nontermToString,print) + val printLookahead = prLook(termToString,print) + in fn (LCORE (items,state)) => + (print "\n"; + print "state "; + print (Int.toString state); + print " :\n\n"; + List.app (fn (item,lookahead) => + (print "{"; + printItem item; + print ","; + printLookahead lookahead; + print "}\n")) items) + end + + exception Lalr of int + + structure ItemList = ListOrdSet + (struct + type elem = item * term list ref + val eq = fn ((a,_),(b,_)) => eqItem(a,b) + val gt = fn ((a,_),(b,_)) => gtItem(a,b) + end) + + structure NontermSet = ListOrdSet + (struct + type elem = nonterm + val gt = gtNonterm + val eq = eqNonterm + end) + +(* NTL: nonterms with lookahead *) + + structure NTL = RbOrdSet + (struct + type elem = nonterm * term list + val gt = fn ((i,_),(j,_)) => gtNonterm(i,j) + val eq = fn ((i,_),(j,_)) => eqNonterm(i,j) + end) + + val DEBUG = false + + val addLookahead = fn {graph,nullable,first,eop, + rules,produces,nonterms,epsProds, + print,termToString,nontermToString} => + let + + val eop = Look.make_set eop + + val symbolToString = fn (TERM t) => termToString t + | (NONTERM t) => nontermToString t + + val print = if DEBUG then print + else fn _ => () + + val prLook = if DEBUG then prLook (termToString,print) + else fn _ => () + + val prNonterm = print o nontermToString + + val prRule = if DEBUG + then prRule(symbolToString,nontermToString,print) + else fn _ => () + + val printInt = print o (Int.toString : int -> string) + + val printItem = prItem(symbolToString,nontermToString,print) + +(* look_pos: position in the rhs of a rule at which we should start placing + lookahead ref cells, i.e. the minimum place at which A -> x .B y, where + B is a nonterminal and y =*=> epsilon, or A -> x. is true. Positions are + given by the number of symbols before the place. The place before the first + symbol is 0, etc. *) + + val look_pos = + let val positions = Array.array(length rules,0) + +(* rule_pos: calculate place in the rhs of a rule at which we should start + placing lookahead ref cells *) + + fun rule_pos (RULE {rhs,...}) = + case (rev rhs) of + nil => 0 + | (TERM t) :: r => length rhs + | (NONTERM n :: r) => let + (* f assumes that everything after n in the + * rule has proven to be nullable so far. + * Remember that the rhs has been reversed, + * implying that this is true initially *) + (* A -> .z t B y, where y is nullable *) + fun f (b, (r as (TERM _ :: _))) = length r + (* A -> .z B C y *) + | f (c, (NONTERM b :: r)) = + if nullable c then f (b, r) + else length r + 1 + (* A -> .B y, where y is nullable *) + | f (_, []) = 0 + in f (n, r) + end + + val check_rule = fn (rule as RULE {num,...}) => + let val pos = rule_pos rule + in (print "look_pos: "; + prRule rule; + print " = "; + printInt pos; + print "\n"; + Array.update(positions,num,rule_pos rule)) + end + in app check_rule rules; + fn RULE{num,...} => (positions sub num) + end + +(* rest_is_null: true for items of the form A -> x .B y, where y is nullable *) + + val rest_is_null = + fn (ITEM{rule,dot, rhsAfter=NONTERM _ :: _}) => + dot >= (look_pos rule) + | _ => false + +(* map core to a new core including only items of the form A -> x. or + A -> x. B y, where y =*=> epsilon. It also adds epsilon productions to the + core. Each item is given a ref cell to hold the lookahead nonterminals for + it.*) + + val map_core = + let val f = fn (item as ITEM {rhsAfter=nil,...},r) => + (item,ref nil) :: r + | (item,r) => + if (rest_is_null item) + then (item,ref nil)::r + else r + in fn (c as CORE (items,state)) => + let val epsItems = + map (fn rule=>(ITEM{rule=rule,dot=0,rhsAfter=nil}, + ref (nil : term list)) + ) (epsProds c) + in TMPCORE(ItemList.union(List.foldr f [] items,epsItems),state) + end + end + + val new_nodes = map map_core (nodes graph) + + exception Find + +(* findRef: state * item -> lookahead ref cell for item *) + + val findRef = + let val states = Array.fromList new_nodes + val dummy = ref nil + in fn (state,item) => + let val TMPCORE (l,_) = states sub state + in case ItemList.find((item,dummy),l) + of SOME (_,look_ref) => look_ref + | NONE => (print "find failed: state "; + printInt state; + print "\nitem =\n"; + printItem item; + print "\nactual items =\n"; + app (fn (i,_) => (printItem i; + print "\n")) l; + raise Find) + end + end + + +(* findRuleRefs: state -> rule -> lookahead refs for rule. *) + + val findRuleRefs = + let val shift = shift graph + in fn state => + (* handle epsilon productions *) + fn (rule as RULE {rhs=nil,...}) => + [findRef(state,ITEM{rule=rule,dot=0,rhsAfter=nil})] + | (rule as RULE {rhs=sym::rest,...}) => + let val pos = Int.max(look_pos rule,1) + fun scan'(state,nil,pos,result) = + findRef(state,ITEM{rule=rule, + dot=pos, + rhsAfter=nil}) :: result + | scan'(state,rhs as sym::rest,pos,result) = + scan'(shift(state,sym), rest, pos+1, + findRef(state,ITEM{rule=rule, + dot=pos, + rhsAfter=rhs})::result) + +(* find first item of the form A -> x .B y, where y =*=> epsilon and + x is not epsilon, or A -> x. use scan' to pick up all refs after this + point *) + + fun scan(state,nil,_) = + [findRef(state,ITEM{rule=rule,dot=pos,rhsAfter=nil})] + | scan(state,rhs,0) = scan'(state,rhs,pos,nil) + | scan(state,sym::rest,place) = + scan(shift(state,sym),rest,place-1) + + in scan(shift(state,sym),rest,pos-1) + end + + end + +(* function to compute for some nonterminal n the set of nonterminals A added + through the closure of nonterminal n such that n =c*=> .A x, where x is + nullable *) + + val nonterms_w_null = fn nt => + let val collect_nonterms = fn n => + List.foldr (fn (rule as RULE {rhs=rhs as NONTERM n :: _,...},r) => + (case + (rest_is_null(ITEM {dot=0,rhsAfter=rhs,rule=rule})) + of true => n :: r + | false => r) + | (_,r) => r) [] (produces n) + fun dfs(a as (n,r)) = + if (NontermSet.exists a) then r + else List.foldr dfs (NontermSet.insert(n,r)) + (collect_nonterms n) + in dfs(nt,NontermSet.empty) + end + + val nonterms_w_null = + let val data = Array.array(nonterms,NontermSet.empty) + fun f n = if n=nonterms then () + else (Array.update(data,n,nonterms_w_null (NT n)); + f (n+1)) + in (f 0; fn (NT nt) => data sub nt) + end + +(* look_info: for some nonterminal n the set of nonterms A added + through the closure of the nonterminal such that n =c+=> .Ax and the + lookahead accumlated for each nonterm A *) + + val look_info = fn nt => + let val collect_nonterms = fn n => + List.foldr (fn (RULE {rhs=NONTERM n :: t,...},r) => + (case NTL.find ((n,nil),r) + of SOME (key,data) => + NTL.insert((n,Look.union(data,first t)),r) + | NONE => NTL.insert ((n,first t),r)) + | (_,r) => r) + NTL.empty (produces n) + fun dfs(a as ((key1,data1),r)) = + case (NTL.find a) + of SOME (_,data2) => + NTL.insert((key1,Look.union(data1,data2)),r) + | NONE => NTL.fold dfs (collect_nonterms key1) + (NTL.insert a) + in dfs((nt,nil),NTL.empty) + end + + val look_info = + if not DEBUG then look_info + else fn nt => + (print "look_info of "; prNonterm nt; print "=\n"; + let val info = look_info nt + in (NTL.app (fn (nt,lookahead) => + (prNonterm nt; print ": "; prLook lookahead; + print "\n\n")) info; + info) + end) + +(* prop_look: propagate lookaheads for nonterms added in the closure of a + nonterm. Lookaheads must be propagated from each nonterminal m to + all nonterminals { n | m =c+=> nx, where x=*=>epsilon} *) + + val prop_look = fn ntl => + let val upd_lookhd = fn new_look => fn (nt,r) => + case NTL.find ((nt,new_look),r) + of SOME (_,old_look) => + NTL.insert((nt, Look.union(new_look,old_look)),r) + | NONE => raise (Lalr 241) + val upd_nonterm = fn ((nt,look),r) => + NontermSet.fold (upd_lookhd look) + (nonterms_w_null nt) r + in NTL.fold upd_nonterm ntl ntl + end + + val prop_look = + if not DEBUG then prop_look + else fn ntl => + (print "prop_look =\n"; + let val info = prop_look ntl + in (NTL.app (fn (nt,lookahead) => + (prNonterm nt; + print ": "; + prLook lookahead; + print "\n\n")) info; info) + end) + +(* now put the information from these functions together. Create a function + which takes a nonterminal n and returns a list of triplets of + (a nonterm added through closure, + the lookahead for the nonterm, + whether the nonterm should include the lookahead for the nonterminal + whose closure is being taken (i.e. first(y) for an item j of the + form A -> x .n y and lookahead(j) if y =*=> epsilon) +*) + + val closure_nonterms = + let val data = + Array.array(nonterms,nil: (nonterm * term list * bool) list) + val do_nonterm = fn i => + let val nonterms_followed_by_null = + nonterms_w_null i + val nonterms_added_through_closure = + NTL.make_list (prop_look (look_info i)) + val result = + map (fn (nt,l) => + (nt,l,NontermSet.exists (nt,nonterms_followed_by_null)) + ) nonterms_added_through_closure + in if DEBUG then + (print "closure_nonterms = "; + prNonterm i; + print "\n"; + app (fn (nt,look,nullable) => + (prNonterm nt; + print ":"; + prLook look; + case nullable + of false => print "(false)\n" + | true => print "(true)\n")) result; + print "\n") + else (); + result + end + fun f i = + if i=nonterms then () + else (Array.update(data,i,do_nonterm (NT i)); f (i+1)) + val _ = f 0 + in fn (NT i) => data sub i + end + +(* add_nonterm_lookahead: Add lookahead to all completion items for rules added + when the closure of a given nonterm in some state is taken. It returns + a list of lookahead refs to which the given nonterm's lookahead should + be propagated. For each rule, it must trace the shift/gotos in the LR(0) + graph to find all items of the form A-> x .B y where y =*=> epsilon or + A -> x. +*) + + val add_nonterm_lookahead = fn (nt,state) => + let val f = fn ((nt,lookahead,nullable),r) => + let val refs = map (findRuleRefs state) (produces nt) + val refs = List.concat refs + val _ = app (fn r => + r := (Look.union (!r,lookahead))) refs + in if nullable then refs @ r else r + end + in List.foldr f [] (closure_nonterms nt) + end + +(* scan_core: Scan a core for all items of the form A -> x .B y. Applies + add_nonterm_lookahead to each such B, and then merges first(y) into + the list of refs returned by add_nonterm_lookahead. It returns + a list of ref * ref list for all the items where y =*=> epsilon *) + + val scan_core = fn (CORE (l,state)) => + let fun f ((item as ITEM{rhsAfter= NONTERM b :: y, + dot,rule})::t,r) = + (case (add_nonterm_lookahead(b,state)) + of nil => r + | l => + let val first_y = first y + val newr = if dot >= (look_pos rule) + then (findRef(state,item),l)::r + else r + in (app (fn r => + r := Look.union(!r,first_y)) l; + f (t,newr)) + end) + | f (_ :: t,r) = f (t,r) + | f (nil,r) = r + in f (l,nil) + end + +(* add end-of-parse symbols to set of items consisting of all items + immediately derived from the start symbol *) + + val add_eop = fn (c as CORE (l,state),eop) => + let fun f (item as ITEM {rule,dot,...}) = + let val refs = findRuleRefs state rule + in + +(* first take care of kernal items. Add the end-of-parse symbols to + the lookahead sets for these items. Epsilon productions of the + start symbol do not need to be handled specially because they will + be in the kernal also *) + + app (fn r => r := Look.union(!r,eop)) refs; + +(* now take care of closure items. These are all nonterminals C which + have a derivation S =+=> .C x, where x is nullable *) + + if dot >= (look_pos rule) then + case item + of ITEM{rhsAfter=NONTERM b :: _,...} => + (case add_nonterm_lookahead(b,state) + of nil => () + | l => app (fn r => r := Look.union(!r,eop)) l) + | _ => () + else () + end + in app f l + end + + val iterate = fn l => + let fun f lookahead (nil,done) = done + | f lookahead (h::t,done) = + let val old = !h + in h := Look.union (old,lookahead); + if (length (!h)) <> (length old) + then f lookahead (t,false) + else f lookahead(t,done) + end + fun g ((from,to)::rest,done) = + let val new_done = f (!from) (to,done) + in g (rest,new_done) + end + | g (nil,done) = done + fun loop true = () + | loop false = loop (g (l,true)) + in loop false + end + + val lookahead = List.concat (map scan_core (nodes graph)) + +(* used to scan the item list of a TMPCORE and remove the items not + being reduced *) + + val create_lcore_list = + fn ((item as ITEM {rhsAfter=nil,...},ref l),r) => + (item,l) :: r + | (_,r) => r + + in add_eop(Graph.core graph 0,eop); + iterate lookahead; + map (fn (TMPCORE (l,state)) => + LCORE (List.foldr create_lcore_list [] l, state)) new_nodes + end +end; diff --git a/ml-yacc/src/link.sml b/ml-yacc/src/link.sml new file mode 100644 index 0000000..9dcc168 --- /dev/null +++ b/ml-yacc/src/link.sml @@ -0,0 +1,43 @@ +(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *) +local + +(* create parser *) + + structure LrVals = MlyaccLrValsFun(structure Token = LrParser.Token + structure Hdr = Header) + structure Lex = LexMLYACC(structure Tokens = LrVals.Tokens + structure Hdr = Header) + structure Parser = JoinWithArg(structure Lex=Lex + structure ParserData = LrVals.ParserData + structure LrParser= LrParser) + structure ParseGenParser = + ParseGenParserFun(structure Parser = Parser + structure Header = Header) + +(* create structure for computing LALR table from a grammar *) + + structure MakeLrTable = mkMakeLrTable(structure IntGrammar =IntGrammar + structure LrTable = LrTable) + +(* create structures for printing LALR tables: + + Verbose prints a verbose description of an lalr table + PrintStruct prints an ML structure representing that is an lalr table *) + + structure Verbose = mkVerbose(structure Errs = MakeLrTable.Errs) + structure PrintStruct = + mkPrintStruct(structure LrTable = MakeLrTable.LrTable + structure ShrinkLrTable = + ShrinkLrTableFun(structure LrTable=LrTable)) +in + +(* returns function which takes a file name, invokes the parser on the file, + does semantic checks, creates table, and prints it *) + + structure ParseGen = ParseGenFun(structure ParseGenParser = ParseGenParser + structure MakeTable = MakeLrTable + structure Verbose = Verbose + structure PrintStruct = PrintStruct + structure Absyn = Absyn) +end + diff --git a/ml-yacc/src/look.sml b/ml-yacc/src/look.sml new file mode 100644 index 0000000..efb8596 --- /dev/null +++ b/ml-yacc/src/look.sml @@ -0,0 +1,161 @@ +(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *) + +functor mkLook (structure IntGrammar : INTGRAMMAR) : LOOK = + struct + val sub = Array.sub + infix 9 sub + structure Grammar = IntGrammar.Grammar + structure IntGrammar = IntGrammar + open Grammar IntGrammar + + structure TermSet = ListOrdSet + (struct + type elem = term + val eq = eqTerm + val gt = gtTerm + end) + + val union = TermSet.union + val make_set = TermSet.make_set + + val prLook = fn (termToString,print) => + let val printTerm = print o termToString + fun f nil = print " " + | f (a :: b) = (printTerm a; print " "; f b) + in f + end + + structure NontermSet = ListOrdSet + (struct + type elem = nonterm + val eq = eqNonterm + val gt = gtNonterm + end) + + val mkFuncs = fn {rules : rule list, nonterms : int, + produces : nonterm -> rule list} => + + let + + (* nullable: create a function which tells if a nonterminal is nullable + or not. + + Method: Keep an array of booleans. The nth entry is true if + NT i is nullable. If is false if we don't know whether NT i + is nullable. + + Keep a list of rules whose remaining rhs we must prove to be + null. First, scan the list of rules and remove those rules + whose rhs contains a terminal. These rules are not nullable. + + Now iterate through the rules that were left: + (1) if there is no remaining rhs we have proved that + the rule is nullable, mark the nonterminal for the + rule as nullable + (2) if the first element of the remaining rhs is + nullable, place the rule back on the list with + the rest of the rhs + (3) if we don't know whether the nonterminal is nullable, + place it back on the list + (4) repeat until the list does not change. + + We have found all the possible nullable rules. + *) + + val nullable = let + fun add_rule (RULE { lhs, rhs, ... }, r) = let + fun addNT (TERM _, _) = NONE + | addNT (_, NONE) = NONE + | addNT (NONTERM (NT i), SOME ntlist) = SOME (i :: ntlist) + in + case foldr addNT (SOME []) rhs of + NONE => r + | SOME ntlist => (lhs, ntlist) :: r + end + val items = List.foldr add_rule [] rules + val nullable = Array.array(nonterms,false) + fun f ((NT i,nil),(l,_)) = (Array.update(nullable,i,true); + (l,true)) + | f (a as (lhs,(h::t)),(l,change)) = + (case (nullable sub h) of + false => (a::l,change) + | true => ((lhs,t)::l,true)) + fun prove(l,true) = prove(List.foldr f (nil,false) l) + | prove(_,false) = () + in (prove(items,true); fn (NT i) => nullable sub i) + end + + (* scanRhs : look at a list of symbols, scanning past nullable + nonterminals, applying addSymbol to the symbols scanned *) + + fun scanRhs addSymbol = + let fun f (nil,result) = result + | f ((sym as NONTERM nt) :: rest,result) = + if nullable nt then f (rest,addSymbol(sym,result)) + else addSymbol(sym,result) + | f ((sym as TERM _) :: _,result) = addSymbol(sym,result) + in f + end + + (* accumulate: look at the start of the right-hand-sides of rules, + looking past nullable nonterminals, applying addObj to the visible + symbols. *) + + fun accumulate(rules, empty, addObj) = + List.foldr (fn (RULE {rhs,...},r) =>(scanRhs addObj) (rhs,r)) empty rules + + val nontermMemo = fn f => + let val lookup = Array.array(nonterms,nil) + fun g i = if i=nonterms then () + else (Array.update(lookup,i,f (NT i)); g (i+1)) + in (g 0; fn (NT j) => lookup sub j) + end + + (* first1: the FIRST set of a nonterminal in the grammar. Only looks + at other terminals, but it is clever enough to move past nullable + nonterminals at the start of a production. *) + + fun first1 nt = accumulate(produces nt, TermSet.empty, + fn (TERM t, set) => TermSet.insert (t,set) + | (_, set) => set) + + val first1 = nontermMemo(first1) + + (* starters1: given a nonterminal "nt", return the set of nonterminals + which can start its productions. Looks past nullables, but doesn't + recurse *) + + fun starters1 nt = accumulate(produces nt, nil, + fn (NONTERM nt, set) => + NontermSet.insert(nt,set) + | (_, set) => set) + + val starters1 = nontermMemo(starters1) + + (* first: maps a nonterminal to its first-set. Get all the starters of + the nonterminal, get the first1 terminal set of each of these, + union the whole lot together *) + + fun first nt = + List.foldr (fn (a,r) => TermSet.union(r,first1 a)) + [] (NontermSet.closure (NontermSet.singleton nt, starters1)) + + val first = nontermMemo(first) + + (* prefix: all possible terminals starting a symbol list *) + + fun prefix symbols = + scanRhs (fn (TERM t,r) => TermSet.insert(t,r) + | (NONTERM nt,r) => TermSet.union(first nt,r)) + (symbols,nil) + + fun nullable_string ((TERM t) :: r) = false + | nullable_string ((NONTERM nt) :: r) = + (case (nullable nt) + of true => nullable_string r + | f => f) + | nullable_string nil = true + + in {nullable = nullable, first = prefix} + end +end; diff --git a/ml-yacc/src/mklrtable.sml b/ml-yacc/src/mklrtable.sml new file mode 100644 index 0000000..dc71b14 --- /dev/null +++ b/ml-yacc/src/mklrtable.sml @@ -0,0 +1,388 @@ +(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *) + +functor mkMakeLrTable (structure IntGrammar : INTGRAMMAR + structure LrTable : LR_TABLE + sharing type LrTable.term = IntGrammar.Grammar.term + sharing type LrTable.nonterm = IntGrammar.Grammar.nonterm + ) : MAKE_LR_TABLE = + struct + val sub = Array.sub + infix 9 sub + structure Core = mkCore(structure IntGrammar = IntGrammar) + structure CoreUtils = mkCoreUtils(structure IntGrammar = IntGrammar + structure Core = Core) + structure Graph = mkGraph(structure IntGrammar = IntGrammar + structure Core = Core + structure CoreUtils = CoreUtils) + structure Look = mkLook(structure IntGrammar = IntGrammar) + structure Lalr = mkLalr(structure IntGrammar = IntGrammar + structure Core = Core + structure Graph = Graph + structure Look = Look) + structure LrTable = LrTable + structure IntGrammar = IntGrammar + structure Grammar = IntGrammar.Grammar + structure GotoList = ListOrdSet + (struct + type elem = Grammar.nonterm * LrTable.state + val eq = fn ((Grammar.NT a,_),(Grammar.NT b,_)) => a=b + val gt = fn ((Grammar.NT a,_),(Grammar.NT b,_)) => a>b + end) + structure Errs : LR_ERRS = + struct + structure LrTable = LrTable + datatype err = RR of LrTable.term * LrTable.state * int * int + | SR of LrTable.term * LrTable.state * int + | NOT_REDUCED of int + | NS of LrTable.term * int + | START of int + + val summary = fn l => + let val numRR = ref 0 + val numSR = ref 0 + val numSTART = ref 0 + val numNOT_REDUCED = ref 0 + val numNS = ref 0 + fun loop (h::t) = + (case h + of RR _ => numRR := !numRR+1 + | SR _ => numSR := !numSR+1 + | START _ => numSTART := !numSTART+1 + | NOT_REDUCED _ => numNOT_REDUCED := !numNOT_REDUCED+1 + | NS _ => numNS := !numNS+1; loop t) + | loop nil = {rr = !numRR, sr = !numSR, + start = !numSTART, + not_reduced = !numNOT_REDUCED, + nonshift = !numNS} + in loop l + end + + val printSummary = fn say => fn l => + let val {rr,sr,start, + not_reduced,nonshift} = summary l + val say_plural = fn (i,s) => + (say (Int.toString i); say " "; + case i + of 1 => (say s) + | _ => (say s; say "s")) + val say_error = fn (args as (i,s)) => + case i + of 0 => () + | i => (say_plural args; say "\n") + in say_error(rr,"reduce/reduce conflict"); + say_error(sr,"shift/reduce conflict"); + if nonshift<>0 then + (say "non-shiftable terminal used on the rhs of "; + say_plural(start,"rule"); say "\n") + else (); + if start<>0 then (say "start symbol used on the rhs of "; + say_plural(start,"rule"); say "\n") + else (); + if not_reduced<>0 then (say_plural(not_reduced,"rule"); + say " not reduced\n") + else () + end + end + + + open IntGrammar Grammar Errs LrTable Core + +(* rules for resolving conflicts: + + shift/reduce: + + If either the terminal or the rule has no + precedence, a shift/reduce conflict is reported. + A shift is chosen for the table. + + If both have precedences, the action with the + higher precedence is chosen. + + If the precedences are equal, neither the + shift nor the reduce is chosen. + + reduce/reduce: + + A reduce/reduce conflict is reported. The lowest + numbered rule is chosen for reduction. +*) + + +(* method for filling tables - first compute the reductions called for in a + state, then add the shifts for the state to this information. + +How to compute the reductions: + + A reduction initially is given as an item and a lookahead set calling +for reduction by that item. The first reduction is mapped to a list of +terminal * rule pairs. Each additional reduction is then merged into this +list and reduce/reduce conflicts are resolved according to the rule +given. + +Missed Errors: + + This method misses some reduce/reduce conflicts that exist because +some reductions are removed from the list before conflicting reductions +can be compared against them. All reduce/reduce conflicts, however, +can be generated given a list of the reduce/reduce conflicts generated +by this method. + + This can be done by taking the transitive closure of the relation given +by the list. If reduce/reduce (a,b) and reduce/reduce (b,c) are true, +then reduce/reduce (a,c) is true. The relation is symmetric and transitive. + +Adding shifts: + + Finally scan the list merging in shifts and resolving conflicts +according to the rule given. + +Missed Shift/Reduce Errors: + + Some errors may be missed by this method because some reductions were +removed as the result of reduce/reduce conflicts. For a shift/reduce +conflict of term a, reduction by rule n, shift/reduce conficts exist +for all rules y such that reduce/reduce (x,y) or reduce/reduce (y,x) +is true. +*) + + fun unREDUCE (REDUCE num) = num + | unREDUCE _ = raise Fail "bug: unexpected action (expected REDUCE)" + + val mergeReduces = + let val merge = fn state => + let fun f (j as (pair1 as (T t1,action1)) :: r1, + k as (pair2 as (T t2,action2)) :: r2,result,errs) = + if t1 < t2 then f(r1,k,pair1::result,errs) + else if t1 > t2 then f(j,r2,pair2::result,errs) + else let val num1 = unREDUCE action1 + val num2 = unREDUCE action2 + val errs = RR(T t1,state,num1,num2) :: errs + val action = if num1 < num2 then pair1 else pair2 + in f(r1,r2,action::result,errs) + end + | f (nil,nil,result,errs) = (rev result,errs) + | f (pair1::r,nil,result,errs) = f(r,nil,pair1::result,errs) + | f (nil,pair2 :: r,result,errs) = f(nil,r,pair2::result,errs) + in f + end + in fn state => fn ((ITEM {rule=RULE {rulenum,...},...}, lookahead), + (reduces,errs)) => + let val action = REDUCE rulenum + val actions = map (fn a=>(a,action)) lookahead + in case reduces + of nil => (actions,errs) + | _ => merge state (reduces,actions,nil,errs) + end + end + + val computeActions = fn (rules,precedence,graph,defaultReductions) => + + let val rulePrec = + let val precData = Array.array(length rules,NONE : int option) + in app (fn RULE {rulenum=r,precedence=p,...} => Array.update(precData,r,p)) + rules; + fn i => precData sub i + end + + fun mergeShifts(state,shifts,nil) = (shifts,nil) + | mergeShifts(state,nil,reduces) = (reduces,nil) + | mergeShifts(state,shifts,reduces) = + let fun f(shifts as (pair1 as (T t1,_)) :: r1, + reduces as (pair2 as (T t2,action)) :: r2, + result,errs) = + if t1 < t2 then f(r1,reduces,pair1 :: result,errs) + else if t1 > t2 then f(shifts,r2,pair2 :: result,errs) + else let val rulenum = unREDUCE action + val (term1,_) = pair1 + in case (precedence term1,rulePrec rulenum) + of (SOME i,SOME j) => + if i>j then f(r1,r2,pair1 :: result,errs) + else if j>i then f(r1,r2,pair2 :: result,errs) + else f(r1,r2,(T t1, ERROR)::result,errs) + | (_,_) => + f(r1,r2,pair1 :: result, + SR (term1,state,rulenum)::errs) + end + | f (nil,nil,result,errs) = (rev result,errs) + | f (nil,h::t,result,errs) = + f (nil,t,h::result,errs) + | f (h::t,nil,result,errs) = + f (t,nil,h::result,errs) + in f(shifts,reduces,nil,nil) + end + + fun mapCore ({edge=symbol,to=CORE (_,state)}::r,shifts,gotos) = + (case symbol + of (TERM t) => mapCore (r,(t,SHIFT(STATE state))::shifts,gotos) + | (NONTERM nt) => mapCore(r,shifts,(nt,STATE state)::gotos) + ) + | mapCore (nil,shifts,gotos) = (rev shifts,rev gotos) + + fun pruneError ((_,ERROR)::rest) = pruneError rest + | pruneError (a::rest) = a :: pruneError rest + | pruneError nil = nil + + in fn (Lalr.LCORE (reduceItems,state),c as CORE (shiftItems,state')) => + if DEBUG andalso (state <> state') then + let exception MkTable in raise MkTable end + else + let val (shifts,gotos) = mapCore (Graph.edges(c,graph),nil,nil) + val tableState = STATE state + in case reduceItems + of nil => ((shifts,ERROR),gotos,nil) + | h :: nil => + let val (ITEM {rule=RULE {rulenum,...},...}, l) = h + val (reduces,_) = mergeReduces tableState (h,(nil,nil)) + val (actions,errs) = mergeShifts(tableState, + shifts,reduces) + val actions' = pruneError actions + val (actions,default) = + let fun hasReduce (nil,actions) = + (rev actions,REDUCE rulenum) + | hasReduce ((a as (_,SHIFT _)) :: r,actions) = + hasReduce(r,a::actions) + | hasReduce (_ :: r,actions) = + hasReduce(r,actions) + fun loop (nil,actions) = (rev actions,ERROR) + | loop ((a as (_,SHIFT _)) :: r,actions) = + loop(r,a::actions) + | loop ((a as (_,REDUCE _)) :: r,actions) = + hasReduce(r,actions) + | loop (_ :: r,actions) = loop(r,actions) + in if defaultReductions + andalso length actions = length actions' + then loop(actions,nil) + else (actions',ERROR) + end + in ((actions,default), gotos,errs) + end + | l => + let val (reduces,errs1) = + List.foldr (mergeReduces tableState) (nil,nil) l + val (actions,errs2) = + mergeShifts(tableState,shifts,reduces) + in ((pruneError actions,ERROR),gotos,errs1@errs2) + end + end + end + + val mkTable = fn (grammar as GRAMMAR{rules,terms,nonterms,start, + precedence,termToString,noshift, + nontermToString,eop},defaultReductions) => + let val symbolToString = fn (TERM t) => termToString t + | (NONTERM nt) => nontermToString nt + val {rules,graph,produces,epsProds,...} = Graph.mkGraph grammar + val {nullable,first} = + Look.mkFuncs{rules=rules,produces=produces,nonterms=nonterms} + val lcores = Lalr.addLookahead + {graph=graph, + nullable=nullable, + produces=produces, + eop=eop, + nonterms=nonterms, + first=first, + rules=rules, + epsProds=epsProds, + print=(fn s=>TextIO.output(TextIO.stdOut,s)), + termToString = termToString, + nontermToString = nontermToString} + + fun zip (h::t,h'::t') = (h,h') :: zip(t,t') + | zip (nil,nil) = nil + | zip _ = let exception MkTable in raise MkTable end + + fun unzip l = + let fun f ((a,b,c)::r,j,k,l) = f(r,a::j,b::k,c::l) + | f (nil,j,k,l) = (rev j,rev k,rev l) + in f(l,nil,nil,nil) + end + + val (actions,gotos,errs) = + let val doState = + computeActions(rules,precedence,graph, + defaultReductions) + in unzip (map doState (zip(lcores,Graph.nodes graph))) + end + + (* add goto from state 0 to a new state. The new state + has accept actions for all of the end-of-parse symbols *) + + val (actions,gotos,errs) = + case gotos + of nil => (actions,gotos,errs) + | h :: t => + let val newStateActions = + (map (fn t => (t,ACCEPT)) (Look.make_set eop),ERROR) + val state0Goto = + GotoList.insert((start,STATE (length actions)),h) + in (actions @ [newStateActions], + state0Goto :: (t @ [nil]), + errs @ [nil]) + end + + val startErrs = + List.foldr (fn (RULE {rhs,rulenum,...},r) => + if (List.exists (fn NONTERM a => a=start + | _ => false) rhs) + then START rulenum :: r + else r) [] rules + + val nonshiftErrs = + List.foldr (fn (RULE {rhs,rulenum,...},r) => + (List.foldr (fn (nonshift,r) => + if (List.exists (fn TERM a => a=nonshift + | _ => false) rhs) + then NS(nonshift,rulenum) :: r + else r) r noshift) + ) [] rules + + val notReduced = + let val ruleReduced = Array.array(length rules,false) + val test = fn REDUCE i => Array.update(ruleReduced,i,true) + | _ => () + val _ = app (fn (actions,default) => + (app (fn (_,r) => test r) actions; + test default) + ) actions; + fun scan (i,r) = + if i >= 0 then + scan(i-1, if ruleReduced sub i then r + else NOT_REDUCED i :: r) + else r + in scan(Array.length ruleReduced-1,nil) + end handle Subscript => + (if DEBUG then + print "rules not numbered correctly!" + else (); nil) + + val numstates = length actions + + val allErrs = startErrs @ notReduced @ nonshiftErrs @ + (List.concat errs) + + fun convert_to_pairlist(nil : ('a * 'b) list): ('a,'b) pairlist = + EMPTY + | convert_to_pairlist ((a,b) :: r) = + PAIR(a,b,convert_to_pairlist r) + + in (mkLrTable {actions=Array.fromList(map (fn (a,b) => + (convert_to_pairlist a,b)) actions), + gotos=Array.fromList (map convert_to_pairlist gotos), + numRules=length rules,numStates=length actions, + initialState=STATE 0}, + let val errArray = Array.fromList errs + in fn (STATE state) => errArray sub state + end, + + fn print => + let val printCore = + prCore(symbolToString,nontermToString,print) + val core = Graph.core graph + in fn STATE state => + printCore (if state=(numstates-1) then + Core.CORE (nil,state) + else (core state)) + end, + allErrs) + end +end; diff --git a/ml-yacc/src/mkprstruct.sml b/ml-yacc/src/mkprstruct.sml new file mode 100644 index 0000000..18e423e --- /dev/null +++ b/ml-yacc/src/mkprstruct.sml @@ -0,0 +1,196 @@ +(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *) + +functor mkPrintStruct(structure LrTable : LR_TABLE + structure ShrinkLrTable : SHRINK_LR_TABLE + sharing LrTable = ShrinkLrTable.LrTable):PRINT_STRUCT = + struct + val sub = Array.sub + infix 9 sub + structure LrTable = LrTable + open ShrinkLrTable LrTable + + + (* lineLength = approximately the largest number of characters to allow + on a line when printing out an encode string *) + + val lineLength = 72 + + (* maxLength = length of a table entry. All table entries are encoded + using two 16-bit integers, one for the terminal number and the other + for the entry. Each integer is printed as two characters (low byte, + high byte), using the ML ascii escape sequence. We need 4 + characters for each escape sequence and 16 characters for each entry + *) + + val maxLength = 16 + + (* number of entries we can fit on a row *) + + val numEntries = lineLength div maxLength + + (* convert integer between 0 and 255 to the three character ascii + decimal escape sequence for it *) + + val chr = + let val lookup = Array.array(256,"\000") + val intToString = fn i => + if i>=100 then "\\" ^ (Int.toString i) + else if i>=10 then "\\0" ^ (Int.toString i) + else "\\00" ^ (Int.toString i) + fun loop n = if n=256 then () + else (Array.update(lookup,n,intToString n); loop (n+1)) + in loop 0; fn i => lookup sub i + end + + val makeStruct = fn {table,name,print,verbose} => + let + val states = numStates table + val rules = numRules table + fun printPairList (prEntry : 'a * 'b -> unit) l = + let fun f (EMPTY,_) = () + | f (PAIR(a,b,r),count) = + if count >= numEntries then + (print "\\\n\\"; prEntry(a,b); f(r,1)) + else (prEntry(a,b); f(r,(count+1))) + in f(l,0) + end + val printList : ('a -> unit) -> 'a list -> unit = + fn prEntry => fn l => + let fun f (nil,_) = () + | f (a :: r,count) = + if count >= numEntries then + (print "\\\n\\"; prEntry a; f(r,1)) + else (prEntry a; f(r,count+1)) + in f(l,0) + end + val prEnd = fn _ => print "\\000\\000\\\n\\" + fun printPairRow prEntry = + let val printEntries = printPairList prEntry + in fn l => (printEntries l; prEnd()) + end + fun printPairRowWithDefault (prEntry,prDefault) = + let val f = printPairRow prEntry + in fn (l,default) => (prDefault default; f l) + end + fun printTable (printRow,count) = + (print "\"\\\n\\"; + let fun f i = if i=count then () + else (printRow i; f (i+1)) + in f 0 + end; + print"\"\n") + val printChar = print o chr + + (* print an integer between 0 and 2^16-1 as a 2-byte character, + with the low byte first *) + + val printInt = fn i => (printChar (i mod 256); + printChar (i div 256)) + + (* encode actions as integers: + + ACCEPT => 0 + ERROR => 1 + SHIFT i => 2 + i + REDUCE rulenum => numstates+2+rulenum + *) + + val printAction = + fn (REDUCE rulenum) => printInt (rulenum+states+2) + | (SHIFT (STATE i)) => printInt (i+2) + | ACCEPT => printInt 0 + | ERROR => printInt 1 + + val printTermAction = fn (T t,action) => + (printInt (t+1); printAction action) + + val printGoto = fn (NT n,STATE s) => (printInt (n+1); printInt s) + + val ((rowCount,rowNumbers,actionRows),entries)= + shrinkActionList(table,verbose) + val getActionRow = let val a = Array.fromList actionRows + in fn i => a sub i + end + val printGotoRow : int -> unit = + let val f = printPairRow printGoto + val g = describeGoto table + in fn i => f (g (STATE i)) + end + val printActionRow = + let val f = printPairRowWithDefault(printTermAction,printAction) + in fn i => f (getActionRow i) + end + in print "val "; + print name; + print "="; + print "let val actionRows =\n"; + printTable(printActionRow,rowCount); + print "val actionRowNumbers =\n\""; + printList (fn i => printInt i) rowNumbers; + print "\"\n"; + print "val gotoT =\n"; + printTable(printGotoRow,states); + print "val numstates = "; + print (Int.toString states); + print "\nval numrules = "; + print (Int.toString rules); + print "\n\ +\val s = ref \"\" and index = ref 0\n\ +\val string_to_int = fn () => \n\ +\let val i = !index\n\ +\in index := i+2; Char.ord(String.sub(!s,i)) + Char.ord(String.sub(!s,i+1)) * 256\n\ +\end\n\ +\val string_to_list = fn s' =>\n\ +\ let val len = String.size s'\n\ +\ fun f () =\n\ +\ if !index < len then string_to_int() :: f()\n\ +\ else nil\n\ +\ in index := 0; s := s'; f ()\n\ +\ end\n\ +\val string_to_pairlist = fn (conv_key,conv_entry) =>\n\ +\ let fun f () =\n\ +\ case string_to_int()\n\ +\ of 0 => EMPTY\n\ +\ | n => PAIR(conv_key (n-1),conv_entry (string_to_int()),f())\n\ +\ in f\n\ +\ end\n\ +\val string_to_pairlist_default = fn (conv_key,conv_entry) =>\n\ +\ let val conv_row = string_to_pairlist(conv_key,conv_entry)\n\ +\ in fn () =>\n\ +\ let val default = conv_entry(string_to_int())\n\ +\ val row = conv_row()\n\ +\ in (row,default)\n\ +\ end\n\ +\ end\n\ +\val string_to_table = fn (convert_row,s') =>\n\ +\ let val len = String.size s'\n\ +\ fun f ()=\n\ +\ if !index < len then convert_row() :: f()\n\ +\ else nil\n\ +\ in (s := s'; index := 0; f ())\n\ +\ end\n\ +\local\n\ +\ val memo = Array.array(numstates+numrules,ERROR)\n\ +\ val _ =let fun g i=(Array.update(memo,i,REDUCE(i-numstates)); g(i+1))\n\ +\ fun f i =\n\ +\ if i=numstates then g i\n\ +\ else (Array.update(memo,i,SHIFT (STATE i)); f (i+1))\n\ +\ in f 0 handle General.Subscript => ()\n\ +\ end\n\ +\in\n\ +\val entry_to_action = fn 0 => ACCEPT | 1 => ERROR | j => Array.sub(memo,(j-2))\n\ +\end\n\ +\val gotoT=Array.fromList(string_to_table(string_to_pairlist(NT,STATE),gotoT))\n\ +\val actionRows=string_to_table(string_to_pairlist_default(T,entry_to_action),actionRows)\n\ +\val actionRowNumbers = string_to_list actionRowNumbers\n\ +\val actionT = let val actionRowLookUp=\n\ +\let val a=Array.fromList(actionRows) in fn i=>Array.sub(a,i) end\n\ +\in Array.fromList(List.map actionRowLookUp actionRowNumbers)\n\ +\end\n\ +\in LrTable.mkLrTable {actions=actionT,gotos=gotoT,numRules=numrules,\n\ +\numStates=numstates,initialState=STATE "; +print (Int.toString ((fn (STATE i) => i) (initialState table))); +print "}\nend\n"; + entries + end +end; diff --git a/ml-yacc/src/ml-yacc.cm b/ml-yacc/src/ml-yacc.cm new file mode 100644 index 0000000..334e634 --- /dev/null +++ b/ml-yacc/src/ml-yacc.cm @@ -0,0 +1,53 @@ +Library + structure ExportParseGen +is + +# if defined (NEW_CM) + $/basis.cm + $/ml-yacc-lib.cm +# else + ../lib/ml-yacc-lib.cm +# endif + + sigs.sml + + utils.sig + hdr.sml + +(* During installation we rely on pre-generated files + * to avoid certain chicken-and-egg problems. *) +#if defined(NO_ML_YACC) + yacc.grm.sig + yacc.grm.sml +#else + yacc.grm +#endif + +(* During installation we rely on pre-generated files + * to avoid certain chicken-and-egg problems. *) +#if defined(NO_ML_LEX) + yacc.lex.sml +#else + yacc.lex +#endif + + parse.sml + + utils.sml + grammar.sml + core.sml + coreutils.sml + graph.sml + look.sml + lalr.sml + mklrtable.sml + mkprstruct.sml + shrink.sml + verbose.sml + + absyn.sig + absyn.sml + yacc.sml + link.sml + + export-yacc.sml diff --git a/ml-yacc/src/parse.sml b/ml-yacc/src/parse.sml new file mode 100644 index 0000000..45b81c5 --- /dev/null +++ b/ml-yacc/src/parse.sml @@ -0,0 +1,26 @@ +(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *) + +functor ParseGenParserFun(structure Header : HEADER + structure Parser : ARG_PARSER + where type pos = Header.pos + sharing type Parser.result = Header.parseResult + sharing type Parser.arg = Header.inputSource = + Parser.lexarg + ) : PARSE_GEN_PARSER = + + struct + structure Header = Header + val parse = fn file => + let + val in_str = TextIO.openIn file + val source = Header.newSource(file,in_str,TextIO.stdOut) + val error = fn (s : string,i:int,_) => + Header.error source i s + val stream = Parser.makeLexer (fn i => (TextIO.inputN(in_str,i))) + source + val (result,_) = (Header.lineno := 1; + Header.text := nil; + Parser.parse(15,stream,error,source)) + in (TextIO.closeIn in_str; (result,source)) + end + end; diff --git a/ml-yacc/src/shrink.sml b/ml-yacc/src/shrink.sml new file mode 100644 index 0000000..3a1c354 --- /dev/null +++ b/ml-yacc/src/shrink.sml @@ -0,0 +1,222 @@ +(* ML-Yacc Parser Generator (c) 1991 Andrew W. Appel, David R. Tarditi *) + +signature SORT_ARG = + sig + type entry + val gt : entry * entry -> bool + end +signature SORT = + sig + type entry + val sort : entry list -> entry list + end +signature EQUIV_ARG = + sig + type entry + val gt : entry * entry -> bool + val eq : entry * entry -> bool + end +signature EQUIV = + sig + type entry + + (* equivalences: take a list of entries and divides them into + equivalence classes numbered 0 to n-1. + + It returns a triple consisting of: + + * the number of equivalence classes + * a list which maps each original entry to an equivalence + class. The nth entry in this list gives the equivalence + class for the nth entry in the original entry list. + * a list which maps equivalence classes to some representative + element. The nth entry in this list is an element from the + nth equivalence class + *) + + val equivalences : entry list -> (int * int list * entry list) + end + +(* An O(n lg n) merge sort routine *) + +functor MergeSortFun(A : SORT_ARG) : SORT = + struct + type entry = A.entry + + (* sort: an O(n lg n) merge sort routine. We create a list of lists + and then merge these lists in passes until only one list is left.*) + + fun sort nil = nil + | sort l = + let (* merge: merge two lists *) + + fun merge (l as a::at,r as b::bt) = + if A.gt(a,b) + then b :: merge(l,bt) + else a :: merge(at,r) + | merge (l,nil) = l + | merge (nil,r) = r + + (* scan: merge pairs of lists on a list of lists. + Reduces the number of lists by about 1/2 *) + + fun scan (a :: b :: rest) = merge(a,b) :: scan rest + | scan l = l + + (* loop: calls scan on a list of lists until only + one list is left. It terminates only if the list of + lists is nonempty. (The pattern match for sort + ensures this.) *) + + fun loop (a :: nil) = a + | loop l = loop (scan l) + + in loop (map (fn a => [a]) l) + end + end + +(* an O(n lg n) routine for placing items in equivalence classes *) + +functor EquivFun(A : EQUIV_ARG) : EQUIV = + struct + val sub = Array.sub + infix 9 sub + + (* Our algorithm for finding equivalence class is simple. The basic + idea is to sort the entries and place duplicates entries in the same + equivalence class. + + Let the original entry list be E. We map E to a list of a pairs + consisting of the entry and its position in E, where the positions + are numbered 0 to n-1. Call this list of pairs EP. + + We then sort EP on the original entries. The second elements in the + pairs now specify a permutation that will return us to EP. + + We then scan the sorted list to create a list R of representative + entries, a list P of integers which permutes the sorted list back to + the original list and a list SE of integers which gives the + equivalence class for the nth entry in the sorted list . + + We then return the length of R, R, and the list that results from + permuting SE by P. + *) + + type entry = A.entry + + val gt = fn ((a,_),(b,_)) => A.gt(a,b) + + structure Sort = MergeSortFun(type entry = A.entry * int + val gt = gt) + val assignIndex = + fn l => + let fun loop (index,nil) = nil + | loop (index,h :: t) = (h,index) :: loop(index+1,t) + in loop (0,l) + end + + local fun loop ((e,_) :: t, prev, class, R , SE) = + if A.eq(e,prev) + then loop(t,e,class,R, class :: SE) + else loop(t,e,class+1,e :: R, (class + 1) :: SE) + | loop (nil,_,_,R,SE) = (rev R, rev SE) + in val createEquivalences = + fn nil => (nil,nil) + | (e,_) :: t => loop(t, e, 0, [e],[0]) + end + + val inversePermute = fn permutation => + fn nil => nil + | l as h :: _ => + let val result = Array.array(length l,h) + fun loop (elem :: r, dest :: s) = + (Array.update(result,dest,elem); loop(r,s)) + | loop _ = () + fun listofarray i = + if i < Array.length result then + (result sub i) :: listofarray (i+1) + else nil + in loop (l,permutation); listofarray 0 + end + + fun makePermutation x = map (fn (_,b) => b) x + + val equivalences = fn l => + let val EP = assignIndex l + val sorted = Sort.sort EP + val P = makePermutation sorted + val (R, SE) = createEquivalences sorted + in (length R, inversePermute P SE, R) + end +end + +functor ShrinkLrTableFun(structure LrTable : LR_TABLE) : SHRINK_LR_TABLE = + struct + structure LrTable = LrTable + open LrTable + val gtAction = fn (a,b) => + case a + of SHIFT (STATE s) => + (case b of SHIFT (STATE s') => s>s' | _ => true) + | REDUCE i => (case b of SHIFT _ => false | REDUCE i' => i>i' + | _ => true) + | ACCEPT => (case b of ERROR => true | _ => false) + | ERROR => false + structure ActionEntryList = + struct + type entry = (term, action) pairlist * action + local + fun eqlist (EMPTY, EMPTY) = true + | eqlist (PAIR (T t,d,r),PAIR(T t',d',r')) = + t=t' andalso d=d' andalso eqlist(r,r') + | eqlist _ = false + fun gtlist (PAIR _,EMPTY) = true + | gtlist (PAIR(T t,d,r),PAIR(T t',d',r')) = + t>t' orelse (t=t' andalso + (gtAction(d,d') orelse + (d=d' andalso gtlist(r,r')))) + | gtlist _ = false + in + fun eq ((l,a): entry, (l',a'): entry) = + a = a' andalso eqlist (l,l') + fun gt ((l,a): entry, (l',a'): entry) = + gtAction(a,a') orelse (a=a' andalso gtlist(l,l')) + end + end +(* structure GotoEntryList = + struct + type entry = (nonterm,state) pairlist + val rec eq = + fn (EMPTY,EMPTY) => true + | (PAIR (t,d,r),PAIR(t',d',r')) => + t=t' andalso d=d' andalso eq(r,r') + | _ => false + val rec gt = + fn (PAIR _,EMPTY) => true + | (PAIR(NT t,STATE d,r),PAIR(NT t',STATE d',r')) => + t>t' orelse (t=t' andalso + (d>d' orelse (d=d' andalso gt(r,r')))) + | _ => false + end *) + structure EquivActionList = EquivFun(ActionEntryList) + val states = fn max => + let fun f i=if i int = + fn l => + let fun g(EMPTY,len) = len + | g(PAIR(_,_,r),len) = g(r,len+1) + in g(l,0) + end + val size : (('a,'b) pairlist * 'c) list -> int = + fn l => + let val c = ref 0 + in (app (fn (row,_) => c := !c + length row) l; !c) + end + val shrinkActionList = + fn (table,verbose) => + case EquivActionList.equivalences + (map (describeActions table) (states (numStates table))) + of result as (_,_,l) => (result,if verbose then size l else 0) +end; diff --git a/ml-yacc/src/sigs.sml b/ml-yacc/src/sigs.sml new file mode 100644 index 0000000..e79231c --- /dev/null +++ b/ml-yacc/src/sigs.sml @@ -0,0 +1,375 @@ +(* ML-Yacc Parser Generator (c) 1989, 1991 Andrew W. Appel, David R. Tarditi *) + +signature HEADER = + sig + type pos = int + val lineno : pos ref + val text : string list ref + + type inputSource + val newSource : string * TextIO.instream * TextIO.outstream -> inputSource + val error : inputSource -> pos -> string -> unit + val warn : inputSource -> pos -> string -> unit + val errorOccurred : inputSource -> unit -> bool + + datatype symbol = SYMBOL of string * pos + val symbolName : symbol -> string + val symbolPos : symbol -> pos + val symbolMake : string * int -> symbol + + type ty + val tyName : ty -> string + val tyMake : string -> ty + + (* associativities: each kind of associativity is assigned a unique + integer *) + + datatype prec = LEFT | RIGHT | NONASSOC + datatype control = NODEFAULT | VERBOSE | PARSER_NAME of symbol | + FUNCTOR of string | START_SYM of symbol | + NSHIFT of symbol list | POS of string | PURE | + PARSE_ARG of string * string | + TOKEN_SIG_INFO of string + + datatype rule = RULE of {lhs : symbol, rhs : symbol list, + code : string, prec : symbol option} + + datatype declData = DECL of + {eop : symbol list, + keyword : symbol list, + nonterm : (symbol * ty option) list option, + prec : (prec * (symbol list)) list, + change: (symbol list * symbol list) list, + term : (symbol * ty option) list option, + control : control list, + value : (symbol * string) list} + + val join_decls : declData * declData * inputSource * pos -> declData + + type parseResult + val getResult : parseResult -> string * declData * rule list + end; + +signature PARSE_GEN_PARSER = + sig + structure Header : HEADER + val parse : string -> Header.parseResult * Header.inputSource + end; + +signature PARSE_GEN = + sig + val parseGen : string -> unit + end; + +signature GRAMMAR = + sig + + datatype term = T of int + datatype nonterm = NT of int + datatype symbol = TERM of term | NONTERM of nonterm + + (* grammar: + terminals should be numbered from 0 to terms-1, + nonterminals should be numbered from 0 to nonterms-1, + rules should be numbered between 0 and (length rules) - 1, + higher precedence binds tighter, + start nonterminal should not occur on the rhs of any rule + *) + + datatype grammar = GRAMMAR of + {rules: {lhs : nonterm, rhs : symbol list, + precedence : int option, rulenum : int } list, + terms: int, + nonterms: int, + start : nonterm, + eop : term list, + noshift : term list, + precedence : term -> int option, + termToString : term -> string, + nontermToString : nonterm -> string} + end + +(* signature for internal version of grammar *) + +signature INTGRAMMAR = + sig + structure Grammar : GRAMMAR + structure SymbolAssoc : TABLE + structure NontermAssoc : TABLE + + sharing type SymbolAssoc.key = Grammar.symbol + sharing type NontermAssoc.key = Grammar.nonterm + + datatype rule = RULE of + {lhs : Grammar.nonterm, + rhs : Grammar.symbol list, + + (* internal number of rule - convenient for producing LR graph *) + + num : int, + rulenum : int, + precedence : int option} + + val gtTerm : Grammar.term * Grammar.term -> bool + val eqTerm : Grammar.term * Grammar.term -> bool + + val gtNonterm : Grammar.nonterm * Grammar.nonterm -> bool + val eqNonterm : Grammar.nonterm * Grammar.nonterm -> bool + + val gtSymbol : Grammar.symbol * Grammar.symbol -> bool + val eqSymbol : Grammar.symbol * Grammar.symbol -> bool + + (* Debugging information will be generated only if DEBUG is true. *) + + val DEBUG : bool + + val prRule : (Grammar.symbol -> string) * (Grammar.nonterm -> string) * + (string -> 'b) -> rule -> unit + val prGrammar : (Grammar.symbol -> string)*(Grammar.nonterm -> string) * + (string -> unit) -> Grammar.grammar -> unit + end + +signature CORE = + sig + structure Grammar : GRAMMAR + structure IntGrammar : INTGRAMMAR + sharing Grammar = IntGrammar.Grammar + + datatype item = ITEM of + { rule : IntGrammar.rule, + dot : int, + +(* rhsAfter: The portion of the rhs of a rule that lies after the dot *) + + rhsAfter: Grammar.symbol list } + +(* eqItem and gtItem compare items *) + + val eqItem : item * item -> bool + val gtItem : item * item -> bool + +(* functions for maintaining ordered item lists *) + + val insert : item * item list -> item list + val union : item list * item list -> item list + +(* core: a set of items. It is represented by an ordered list of items. + The list is in ascending order The rule numbers and the positions of the + dots are used to order the items. *) + + datatype core = CORE of item list * int (* state # *) + +(* gtCore and eqCore compare the lists of items *) + + val gtCore : core * core -> bool + val eqCore : core * core -> bool + +(* functions for debugging *) + + val prItem : (Grammar.symbol -> string) * (Grammar.nonterm -> string) * + (string -> unit) -> item -> unit + val prCore : (Grammar.symbol -> string) * (Grammar.nonterm -> string) * + (string -> unit) -> core -> unit +end + +signature CORE_UTILS = + sig + + structure Grammar : GRAMMAR + structure IntGrammar : INTGRAMMAR + structure Core : CORE + + sharing Grammar = IntGrammar.Grammar = Core.Grammar + sharing IntGrammar = Core.IntGrammar + +(* mkFuncs: create functions for the set of productions derived from a + nonterminal, the cores that result from shift/gotos from a core, + and return a list of rules *) + + val mkFuncs : Grammar.grammar -> + { produces : Grammar.nonterm -> IntGrammar.rule list, + +(* shifts: take a core and compute all the cores that result from shifts/gotos + on symbols *) + + shifts : Core.core -> (Grammar.symbol*Core.item list) list, + rules: IntGrammar.rule list, + +(* epsProds: take a core compute epsilon productions for it *) + + epsProds : Core.core -> IntGrammar.rule list} + end + +signature LRGRAPH = + sig + structure Grammar : GRAMMAR + structure IntGrammar : INTGRAMMAR + structure Core : CORE + + sharing Grammar = IntGrammar.Grammar = Core.Grammar + sharing IntGrammar = Core.IntGrammar + + type graph + val edges : Core.core * graph -> {edge:Grammar.symbol,to:Core.core} list + val nodes : graph -> Core.core list + val shift : graph -> int * Grammar.symbol -> int (* int = state # *) + val core : graph -> int -> Core.core (* get core for a state *) + +(* mkGraph: compute the LR(0) sets of items *) + + val mkGraph : Grammar.grammar -> + {graph : graph, + produces : Grammar.nonterm -> IntGrammar.rule list, + rules : IntGrammar.rule list, + epsProds: Core.core -> IntGrammar.rule list} + + val prGraph: (Grammar.symbol -> string)*(Grammar.nonterm -> string) * + (string -> unit) -> graph -> unit + end + +signature LOOK = + sig + structure Grammar : GRAMMAR + structure IntGrammar : INTGRAMMAR + sharing Grammar = IntGrammar.Grammar + + val union : Grammar.term list * Grammar.term list -> Grammar.term list + val make_set : Grammar.term list -> Grammar.term list + + val mkFuncs : {rules : IntGrammar.rule list, nonterms : int, + produces : Grammar.nonterm -> IntGrammar.rule list} -> + {nullable: Grammar.nonterm -> bool, + first : Grammar.symbol list -> Grammar.term list} + + val prLook : (Grammar.term -> string) * (string -> unit) -> + Grammar.term list -> unit + end + +signature LALR_GRAPH = + sig + structure Grammar : GRAMMAR + structure IntGrammar : INTGRAMMAR + structure Core : CORE + structure Graph : LRGRAPH + + sharing Grammar = IntGrammar.Grammar = Core.Grammar = Graph.Grammar + sharing IntGrammar = Core.IntGrammar = Graph.IntGrammar + sharing Core = Graph.Core + + datatype lcore = LCORE of (Core.item * Grammar.term list) list * int + val addLookahead : {graph : Graph.graph, + first : Grammar.symbol list -> Grammar.term list, + eop : Grammar.term list, + nonterms : int, + nullable: Grammar.nonterm -> bool, + produces : Grammar.nonterm -> IntGrammar.rule list, + rules : IntGrammar.rule list, + epsProds : Core.core -> IntGrammar.rule list, + print : string -> unit, (* for debugging *) + termToString : Grammar.term -> string, + nontermToString : Grammar.nonterm -> string} -> + lcore list + val prLcore : (Grammar.symbol -> string) * (Grammar.nonterm -> string) * + (Grammar.term -> string) * (string -> unit) -> + lcore -> unit + end + +(* LR_ERRS: errors found while constructing an LR table *) + +signature LR_ERRS = + sig + structure LrTable : LR_TABLE + + (* RR = reduce/reduce, + SR = shift/reduce + NS: non-shiftable terminal found on the rhs of a rule + NOT_REDUCED n: rule number n was not reduced + START n : start symbol found on the rhs of rule n *) + + datatype err = RR of LrTable.term * LrTable.state * int * int + | SR of LrTable.term * LrTable.state * int + | NS of LrTable.term * int + | NOT_REDUCED of int + | START of int + + val summary : err list -> {rr : int, sr: int, + not_reduced : int, start : int,nonshift : int} + + val printSummary : (string -> unit) -> err list -> unit + + end + +(* PRINT_STRUCT: prints a structure which includes a value 'table' and a + structure Table whose signature matches LR_TABLE. The table in the printed + structure will contain the same information as the one passed to + printStruct, although the representation may be different. It returns + the number of entries left in the table after compaction.*) + +signature PRINT_STRUCT = + sig + structure LrTable : LR_TABLE + val makeStruct : + {table : LrTable.table, + name : string, + print: string -> unit, + verbose : bool + } -> int + end + +(* VERBOSE: signature for a structure which takes a table and creates a + verbose description of it *) + +signature VERBOSE = + sig + structure Errs : LR_ERRS + val printVerbose : + {table : Errs.LrTable.table, + entries : int, + termToString : Errs.LrTable.term -> string, + nontermToString : Errs.LrTable.nonterm -> string, + stateErrs : Errs.LrTable.state -> Errs.err list, + errs : Errs.err list, + print: string -> unit, + printCores : (string -> unit) -> Errs.LrTable.state -> unit, + printRule : (string -> unit) -> int -> unit} -> unit + end + +(* MAKE_LR_TABLE: signature for a structure which includes a structure + matching the signature LR_TABLE and a function which maps grammars + to tables *) + +signature MAKE_LR_TABLE = + sig + structure Grammar : GRAMMAR + structure Errs : LR_ERRS + structure LrTable : LR_TABLE + sharing Errs.LrTable = LrTable + + sharing type LrTable.term = Grammar.term + sharing type LrTable.nonterm = Grammar.nonterm + + (* boolean value determines whether default reductions will be used. + If it is true, reductions will be used. *) + + val mkTable : Grammar.grammar * bool -> + LrTable.table * + (LrTable.state -> Errs.err list) * (* errors in a state *) + ((string -> unit) -> LrTable.state -> unit) * + Errs.err list (* list of all errors *) + end; + +(* SHRINK_LR_TABLE: finds unique action entry rows in the action table + for the LR parser *) + +signature SHRINK_LR_TABLE = + sig + (* Takes an action table represented as a list of action rows. + It returns the number of unique rows left in the action table, + a list of integers which maps each original row to a unique + row, and a list of unique rows *) + structure LrTable : LR_TABLE + val shrinkActionList : LrTable.table * bool -> + (int * int list * + ((LrTable.term,LrTable.action) LrTable.pairlist * + LrTable.action) list) * int + end diff --git a/ml-yacc/src/utils.sig b/ml-yacc/src/utils.sig new file mode 100644 index 0000000..2566b0a --- /dev/null +++ b/ml-yacc/src/utils.sig @@ -0,0 +1,56 @@ +(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *) + +signature ORDSET = + sig + type set + type elem + exception Select_arb + val app : (elem -> unit) -> set -> unit + and card: set -> int + and closure: set * (elem -> set) -> set + and difference: set * set -> set + and elem_eq: (elem * elem -> bool) + and elem_gt : (elem * elem -> bool) + and empty: set + and exists: (elem * set) -> bool + and find : (elem * set) -> elem option + and fold: ((elem * 'b) -> 'b) -> set -> 'b -> 'b + and insert: (elem * set) -> set + and is_empty: set -> bool + and make_list: set -> elem list + and make_set: (elem list -> set) + and partition: (elem -> bool) -> (set -> set * set) + and remove: (elem * set) -> set + and revfold: ((elem * 'b) -> 'b) -> set -> 'b -> 'b + and select_arb: set -> elem + and set_eq: (set * set) -> bool + and set_gt: (set * set) -> bool + and singleton: (elem -> set) + and union: set * set -> set + end + +signature TABLE = + sig + type 'a table + type key + val size : 'a table -> int + val empty: 'a table + val exists: (key * 'a table) -> bool + val find : (key * 'a table) -> 'a option + val insert: ((key * 'a) * 'a table) -> 'a table + val make_table : (key * 'a ) list -> 'a table + val make_list : 'a table -> (key * 'a) list + val fold : ((key * 'a) * 'b -> 'b) -> 'a table -> 'b -> 'b + end + +signature HASH = + sig + type table + type elem + + val size : table -> int + val add : elem * table -> table + val find : elem * table -> int option + val exists : elem * table -> bool + val empty : table + end; diff --git a/ml-yacc/src/utils.sml b/ml-yacc/src/utils.sml new file mode 100644 index 0000000..8b2c1a3 --- /dev/null +++ b/ml-yacc/src/utils.sml @@ -0,0 +1,531 @@ +(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *) + +(* Implementation of ordered sets using ordered lists and red-black trees. The + code for red-black trees was originally written by Norris Boyd, which was + modified for use here. +*) + +(* ordered sets implemented using ordered lists. + + Upper bound running times for functions implemented here: + + app = O(n) + card = O(n) + closure = O(n^2) + difference = O(n+m), where n,m = the size of the two sets used here. + empty = O(1) + exists = O(n) + find = O(n) + fold = O(n) + insert = O(n) + is_empty = O(1) + make_list = O(1) + make_set = O(n^2) + partition = O(n) + remove = O(n) + revfold = O(n) + select_arb = O(1) + set_eq = O(n), where n = the cardinality of the smaller set + set_gt = O(n), ditto + singleton = O(1) + union = O(n+m) +*) + +functor ListOrdSet(B : sig type elem + val gt : elem * elem -> bool + val eq : elem * elem -> bool + end ) : ORDSET = + +struct + type elem = B.elem + val elem_gt = B.gt + val elem_eq = B.eq + + type set = elem list + exception Select_arb + val empty = nil + + val insert = fn (key,s) => + let fun f (l as (h::t)) = + if elem_gt(key,h) then h::(f t) + else if elem_eq(key,h) then key::t + else key::l + | f nil = [key] + in f s + end + + val select_arb = fn nil => raise Select_arb + | a::b => a + + val exists = fn (key,s) => + let fun f (h::t) = if elem_gt(key,h) then f t + else elem_eq(h,key) + | f nil = false + in f s + end + + val find = fn (key,s) => + let fun f (h::t) = if elem_gt(key,h) then f t + else if elem_eq(h,key) then SOME h + else NONE + | f nil = NONE + in f s + end + + fun revfold f lst init = List.foldl f init lst + fun fold f lst init = List.foldr f init lst + val app = List.app + +fun set_eq(h::t,h'::t') = + (case elem_eq(h,h') + of true => set_eq(t,t') + | a => a) + | set_eq(nil,nil) = true + | set_eq _ = false + +fun set_gt(h::t,h'::t') = + (case elem_gt(h,h') + of false => (case (elem_eq(h,h')) + of true => set_gt(t,t') + | a => a) + | a => a) + | set_gt(_::_,nil) = true + | set_gt _ = false + +fun union(a as (h::t),b as (h'::t')) = + if elem_gt(h',h) then h::union(t,b) + else if elem_eq(h,h') then h::union(t,t') + else h'::union(a,t') + | union(nil,s) = s + | union(s,nil) = s + +val make_list = fn s => s + +val is_empty = fn nil => true | _ => false + +val make_set = fn l => List.foldr insert [] l + +val partition = fn f => fn s => + fold (fn (e,(yes,no)) => + if (f e) then (e::yes,no) else (e::no,yes)) s (nil,nil) + +val remove = fn (e,s) => + let fun f (l as (h::t)) = if elem_gt(h,e) then l + else if elem_eq(h,e) then t + else h::(f t) + | f nil = nil + in f s + end + + (* difference: X-Y *) + + fun difference (nil,_) = nil + | difference (r,nil) = r + | difference (a as (h::t),b as (h'::t')) = + if elem_gt (h',h) then h::difference(t,b) + else if elem_eq(h',h) then difference(t,t') + else difference(a,t') + + fun singleton X = [X] + + fun card(S) = fold (fn (a,count) => count+1) S 0 + + local + fun closure'(from, f, result) = + if is_empty from then result + else + let val (more,result) = + fold (fn (a,(more',result')) => + let val more = f a + val new = difference(more,result) + in (union(more',new),union(result',new)) + end) from + (empty,result) + in closure'(more,f,result) + end + in + fun closure(start, f) = closure'(start, f, start) + end +end + +(* ordered set implemented using red-black trees: + + Upper bound running time of the functions below: + + app: O(n) + card: O(n) + closure: O(n^2 ln n) + difference: O(n ln n) + empty: O(1) + exists: O(ln n) + find: O(ln n) + fold: O(n) + insert: O(ln n) + is_empty: O(1) + make_list: O(n) + make_set: O(n ln n) + partition: O(n ln n) + remove: O(n ln n) + revfold: O(n) + select_arb: O(1) + set_eq: O(n) + set_gt: O(n) + singleton: O(1) + union: O(n ln n) +*) + +functor RbOrdSet (B : sig type elem + val eq : (elem*elem) -> bool + val gt : (elem*elem) -> bool + end + ) : ORDSET = +struct + + type elem = B.elem + val elem_gt = B.gt + val elem_eq = B.eq + + datatype Color = RED | BLACK + + abstype set = EMPTY | TREE of (B.elem * Color * set * set) + with exception Select_arb + val empty = EMPTY + + fun insert(key,t) = + let fun f EMPTY = TREE(key,RED,EMPTY,EMPTY) + | f (TREE(k,BLACK,l,r)) = + if elem_gt (key,k) + then case f r + of r as TREE(rk,RED, rl as TREE(rlk,RED,rll,rlr),rr) => + (case l + of TREE(lk,RED,ll,lr) => + TREE(k,RED,TREE(lk,BLACK,ll,lr), + TREE(rk,BLACK,rl,rr)) + | _ => TREE(rlk,BLACK,TREE(k,RED,l,rll), + TREE(rk,RED,rlr,rr))) + | r as TREE(rk,RED,rl, rr as TREE(rrk,RED,rrl,rrr)) => + (case l + of TREE(lk,RED,ll,lr) => + TREE(k,RED,TREE(lk,BLACK,ll,lr), + TREE(rk,BLACK,rl,rr)) + | _ => TREE(rk,BLACK,TREE(k,RED,l,rl),rr)) + | r => TREE(k,BLACK,l,r) + else if elem_gt(k,key) + then case f l + of l as TREE(lk,RED,ll, lr as TREE(lrk,RED,lrl,lrr)) => + (case r + of TREE(rk,RED,rl,rr) => + TREE(k,RED,TREE(lk,BLACK,ll,lr), + TREE(rk,BLACK,rl,rr)) + | _ => TREE(lrk,BLACK,TREE(lk,RED,ll,lrl), + TREE(k,RED,lrr,r))) + | l as TREE(lk,RED, ll as TREE(llk,RED,lll,llr), lr) => + (case r + of TREE(rk,RED,rl,rr) => + TREE(k,RED,TREE(lk,BLACK,ll,lr), + TREE(rk,BLACK,rl,rr)) + | _ => TREE(lk,BLACK,ll,TREE(k,RED,lr,r))) + | l => TREE(k,BLACK,l,r) + else TREE(key,BLACK,l,r) + | f (TREE(k,RED,l,r)) = + if elem_gt(key,k) then TREE(k,RED,l, f r) + else if elem_gt(k,key) then TREE(k,RED, f l, r) + else TREE(key,RED,l,r) + in case f t + of TREE(k,RED, l as TREE(_,RED,_,_), r) => TREE(k,BLACK,l,r) + | TREE(k,RED, l, r as TREE(_,RED,_,_)) => TREE(k,BLACK,l,r) + | t => t + end + + fun select_arb (TREE(k,_,l,r)) = k + | select_arb EMPTY = raise Select_arb + + fun exists(key,t) = + let fun look EMPTY = false + | look (TREE(k,_,l,r)) = + if elem_gt(k,key) then look l + else if elem_gt(key,k) then look r + else true + in look t + end + + fun find(key,t) = + let fun look EMPTY = NONE + | look (TREE(k,_,l,r)) = + if elem_gt(k,key) then look l + else if elem_gt(key,k) then look r + else SOME k + in look t + end + + fun revfold f t start = + let fun scan (EMPTY,value) = value + | scan (TREE(k,_,l,r),value) = scan(r,f(k,scan(l,value))) + in scan(t,start) + end + + fun fold f t start = + let fun scan(EMPTY,value) = value + | scan(TREE(k,_,l,r),value) = scan(l,f(k,scan(r,value))) + in scan(t,start) + end + + fun app f t = + let fun scan EMPTY = () + | scan(TREE(k,_,l,r)) = (scan l; f k; scan r) + in scan t + end + +(* equal_tree : test if two trees are equal. Two trees are equal if + the set of leaves are equal *) + + fun set_eq (tree1 as (TREE _),tree2 as (TREE _)) = + let datatype pos = L | R | M + exception Done + fun getvalue(stack as ((a,position)::b)) = + (case a + of (TREE(k,_,l,r)) => + (case position + of L => getvalue ((l,L)::(a,M)::b) + | M => (k,case r of EMPTY => b | _ => (a,R)::b) + | R => getvalue ((r,L)::b) + ) + | EMPTY => getvalue b + ) + | getvalue(nil) = raise Done + fun f (nil,nil) = true + | f (s1 as (_ :: _),s2 as (_ :: _ )) = + let val (v1,news1) = getvalue s1 + and (v2,news2) = getvalue s2 + in (elem_eq(v1,v2)) andalso f(news1,news2) + end + | f _ = false + in f ((tree1,L)::nil,(tree2,L)::nil) handle Done => false + end + | set_eq (EMPTY,EMPTY) = true + | set_eq _ = false + + (* gt_tree : Test if tree1 is greater than tree 2 *) + + fun set_gt (tree1,tree2) = + let datatype pos = L | R | M + exception Done + fun getvalue(stack as ((a,position)::b)) = + (case a + of (TREE(k,_,l,r)) => + (case position + of L => getvalue ((l,L)::(a,M)::b) + | M => (k,case r of EMPTY => b | _ => (a,R)::b) + | R => getvalue ((r,L)::b) + ) + | EMPTY => getvalue b + ) + | getvalue(nil) = raise Done + fun f (nil,nil) = false + | f (s1 as (_ :: _),s2 as (_ :: _ )) = + let val (v1,news1) = getvalue s1 + and (v2,news2) = getvalue s2 + in (elem_gt(v1,v2)) orelse (elem_eq(v1,v2) andalso f(news1,news2)) + end + | f (_,nil) = true + | f (nil,_) = false + in f ((tree1,L)::nil,(tree2,L)::nil) handle Done => false + end + + fun is_empty S = (let val _ = select_arb S in false end + handle Select_arb => true) + + fun make_list S = fold (op ::) S nil + + fun make_set l = List.foldr insert empty l + + fun partition F S = fold (fn (a,(Yes,No)) => + if F(a) then (insert(a,Yes),No) + else (Yes,insert(a,No))) + S (empty,empty) + + fun remove(X, XSet) = + let val (YSet, _) = + partition (fn a => not (elem_eq (X, a))) XSet + in YSet + end + + fun difference(Xs, Ys) = + fold (fn (p as (a,Xs')) => + if exists(a,Ys) then Xs' else insert p) + Xs empty + + fun singleton X = insert(X,empty) + + fun card(S) = fold (fn (_,count) => count+1) S 0 + + fun union(Xs,Ys)= fold insert Ys Xs + + local + fun closure'(from, f, result) = + if is_empty from then result + else + let val (more,result) = + fold (fn (a,(more',result')) => + let val more = f a + val new = difference(more,result) + in (union(more',new),union(result',new)) + end) from + (empty,result) + in closure'(more,f,result) + end + in + fun closure(start, f) = closure'(start, f, start) + end + end +end + +(* In utils.sig +signature TABLE = + sig + type 'a table + type key + val size : 'a table -> int + val empty: 'a table + val exists: (key * 'a table) -> bool + val find : (key * 'a table) -> 'a option + val insert: ((key * 'a) * 'a table) -> 'a table + val make_table : (key * 'a ) list -> 'a table + val make_list : 'a table -> (key * 'a) list + val fold : ((key * 'a) * 'b -> 'b) -> 'a table -> 'b -> 'b + end +*) + +functor Table (B : sig type key + val gt : (key * key) -> bool + end + ) : TABLE = +struct + + datatype Color = RED | BLACK + type key = B.key + + abstype 'a table = EMPTY + | TREE of ((B.key * 'a ) * Color * 'a table * 'a table) + with + + val empty = EMPTY + + fun insert(elem as (key,data),t) = + let val key_gt = fn (a,_) => B.gt(key,a) + val key_lt = fn (a,_) => B.gt(a,key) + fun f EMPTY = TREE(elem,RED,EMPTY,EMPTY) + | f (TREE(k,BLACK,l,r)) = + if key_gt k + then case f r + of r as TREE(rk,RED, rl as TREE(rlk,RED,rll,rlr),rr) => + (case l + of TREE(lk,RED,ll,lr) => + TREE(k,RED,TREE(lk,BLACK,ll,lr), + TREE(rk,BLACK,rl,rr)) + | _ => TREE(rlk,BLACK,TREE(k,RED,l,rll), + TREE(rk,RED,rlr,rr))) + | r as TREE(rk,RED,rl, rr as TREE(rrk,RED,rrl,rrr)) => + (case l + of TREE(lk,RED,ll,lr) => + TREE(k,RED,TREE(lk,BLACK,ll,lr), + TREE(rk,BLACK,rl,rr)) + | _ => TREE(rk,BLACK,TREE(k,RED,l,rl),rr)) + | r => TREE(k,BLACK,l,r) + else if key_lt k + then case f l + of l as TREE(lk,RED,ll, lr as TREE(lrk,RED,lrl,lrr)) => + (case r + of TREE(rk,RED,rl,rr) => + TREE(k,RED,TREE(lk,BLACK,ll,lr), + TREE(rk,BLACK,rl,rr)) + | _ => TREE(lrk,BLACK,TREE(lk,RED,ll,lrl), + TREE(k,RED,lrr,r))) + | l as TREE(lk,RED, ll as TREE(llk,RED,lll,llr), lr) => + (case r + of TREE(rk,RED,rl,rr) => + TREE(k,RED,TREE(lk,BLACK,ll,lr), + TREE(rk,BLACK,rl,rr)) + | _ => TREE(lk,BLACK,ll,TREE(k,RED,lr,r))) + | l => TREE(k,BLACK,l,r) + else TREE(elem,BLACK,l,r) + | f (TREE(k,RED,l,r)) = + if key_gt k then TREE(k,RED,l, f r) + else if key_lt k then TREE(k,RED, f l, r) + else TREE(elem,RED,l,r) + in case f t + of TREE(k,RED, l as TREE(_,RED,_,_), r) => TREE(k,BLACK,l,r) + | TREE(k,RED, l, r as TREE(_,RED,_,_)) => TREE(k,BLACK,l,r) + | t => t + end + + fun exists(key,t) = + let fun look EMPTY = false + | look (TREE((k,_),_,l,r)) = + if B.gt(k,key) then look l + else if B.gt(key,k) then look r + else true + in look t + end + + fun find(key,t) = + let fun look EMPTY = NONE + | look (TREE((k,data),_,l,r)) = + if B.gt(k,key) then look l + else if B.gt(key,k) then look r + else SOME data + in look t + end + + fun fold f t start = + let fun scan(EMPTY,value) = value + | scan(TREE(k,_,l,r),value) = scan(l,f(k,scan(r,value))) + in scan(t,start) + end + + fun make_table l = List.foldr insert empty l + + fun size S = fold (fn (_,count) => count+1) S 0 + + fun make_list table = fold (op ::) table nil + + end +end; + +(* assumes that a functor Table with signature TABLE from table.sml is + in the environment *) + +(* In utils.sig +signature HASH = + sig + type table + type elem + + val size : table -> int + val add : elem * table -> table + val find : elem * table -> int option + val exists : elem * table -> bool + val empty : table + end +*) + +(* hash: creates a hash table of size n which assigns each distinct member + a unique integer between 0 and n-1 *) + +functor Hash(B : sig type elem + val gt : elem * elem -> bool + end) : HASH = +struct + type elem=B.elem + structure HashTable = Table(type key=B.elem + val gt = B.gt) + + type table = {count : int, table : int HashTable.table} + + val empty = {count=0,table=HashTable.empty} + val size = fn {count,table} => count + val add = fn (e,{count,table}) => + {count=count+1,table=HashTable.insert((e,count),table)} + val find = fn (e,{table,count}) => HashTable.find(e,table) + val exists = fn (e,{table,count}) => HashTable.exists(e,table) +end; diff --git a/ml-yacc/src/verbose.sml b/ml-yacc/src/verbose.sml new file mode 100644 index 0000000..1721242 --- /dev/null +++ b/ml-yacc/src/verbose.sml @@ -0,0 +1,158 @@ +(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *) + +functor mkVerbose(structure Errs : LR_ERRS) : VERBOSE = +struct + structure Errs = Errs + open Errs Errs.LrTable + val mkPrintAction = fn print => + let val printInt = print o (Int.toString : int -> string) + in fn (SHIFT (STATE i)) => + (print "\tshift "; + printInt i; + print "\n") + | (REDUCE rulenum) => + (print "\treduce by rule "; + printInt rulenum; + print "\n") + | ACCEPT => print "\taccept\n" + | ERROR => print "\terror\n" + end + val mkPrintGoto = fn (printNonterm,print) => + let val printInt = print o (Int.toString : int -> string) + in fn (nonterm,STATE i) => + (print "\t"; + printNonterm nonterm; + print "\tgoto "; + printInt i; + print "\n") + end + + val mkPrintTermAction = fn (printTerm,print) => + let val printAction = mkPrintAction print + in fn (term,action) => + (print "\t"; + printTerm term; + printAction action) + end + val mkPrintGoto = fn (printNonterm,print) => + fn (nonterm,STATE i) => + let val printInt = print o (Int.toString : int -> string) + in (print "\t"; + printNonterm nonterm; + print "\tgoto "; + printInt i; + print "\n") + end + val mkPrintError = fn (printTerm,printRule,print) => + let val printInt = print o (Int.toString : int -> string) + val printState = fn STATE s => (print " state "; printInt s) + in fn (RR (term,state,r1,r2)) => + (print "error: "; + printState state; + print ": reduce/reduce conflict between rule "; + printInt r1; + print " and rule "; + printInt r2; + print " on "; + printTerm term; + print "\n") + | (SR (term,state,r1)) => + (print "error: "; + printState state; + print ": shift/reduce conflict "; + print "(shift "; + printTerm term; + print ", reduce by rule "; + printInt r1; + print ")\n") + | NOT_REDUCED i => + (print "warning: rule <"; + printRule i; + print "> will never be reduced\n") + | START i => + (print "warning: start symbol appears on the rhs of "; + print "<"; + printRule i; + print ">\n") + | NS (term,i) => + (print "warning: non-shiftable terminal "; + printTerm term; + print "appears on the rhs of "; + print "<"; + printRule i; + print ">\n") + end + structure PairList : sig + val app : ('a * 'b -> unit) -> ('a,'b) pairlist -> unit + val length : ('a,'b) pairlist -> int + end + = + struct + val app = fn f => + let fun g EMPTY = () + | g (PAIR(a,b,r)) = (f(a,b); g r) + in g + end + val length = fn l => + let fun g(EMPTY,len) = len + | g(PAIR(_,_,r),len) = g(r,len+1) + in g(l,0) + end + end + val printVerbose = + fn {termToString,nontermToString,table,stateErrs,entries:int, + print,printRule,errs,printCores} => + let + val printTerm = print o termToString + val printNonterm = print o nontermToString + + val printCore = printCores print + val printTermAction = mkPrintTermAction(printTerm,print) + val printAction = mkPrintAction print + val printGoto = mkPrintGoto(printNonterm,print) + val printError = mkPrintError(printTerm,printRule print,print) + + val gotos = LrTable.describeGoto table + val actions = LrTable.describeActions table + val states = numStates table + + val gotoTableSize = ref 0 + val actionTableSize = ref 0 + + val _ = if length errs > 0 + then (printSummary print errs; + print "\n"; + app printError errs) + else () + fun loop i = + if i=states then () + else let val s = STATE i + in (app printError (stateErrs s); + print "\n"; + printCore s; + let val (actionList,default) = actions s + val gotoList = gotos s + in (PairList.app printTermAction actionList; + print "\n"; + PairList.app printGoto gotoList; + print "\n"; + print "\t."; + printAction default; + print "\n"; + gotoTableSize:=(!gotoTableSize)+ + PairList.length gotoList; + actionTableSize := (!actionTableSize) + + PairList.length actionList + 1 + ) + end; + loop (i+1)) + end + in loop 0; + print (Int.toString entries ^ " of " ^ + Int.toString (!actionTableSize)^ + " action table entries left after compaction\n"); + print (Int.toString (!gotoTableSize)^ " goto table entries\n") + end +end; + + diff --git a/ml-yacc/src/yacc.grm b/ml-yacc/src/yacc.grm new file mode 100644 index 0000000..0da3de9 --- /dev/null +++ b/ml-yacc/src/yacc.grm @@ -0,0 +1,231 @@ +(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *) + +(* parser for the ML parser generator *) + +open Hdr +%% + +%name Mlyacc +%eop EOF +%noshift EOF +%right ARROW +%left ASTERISK +%pos int + +%term ARROW | ASTERISK | BLOCK | BAR | CHANGE | COLON | + COMMA | DELIMITER | EOF | FOR | + HEADER of string | ID of string*int | IDDOT of string | + PERCENT_HEADER | INT of string | KEYWORD | LBRACE | LPAREN | + NAME | NODEFAULT | NONTERM | NOSHIFT | OF | + PERCENT_EOP | PERCENT_PURE | PERCENT_POS | PERCENT_ARG | + PERCENT_TOKEN_SIG_INFO | + PREC of Header.prec | PREC_TAG | PREFER | + PROG of string | RBRACE | RPAREN | SUBST | START | + TERM | TYVAR of string | VERBOSE | VALUE | + UNKNOWN of string | BOGUS_VALUE + +%nonterm + BEGIN of string * Hdr.declData * (Hdr.rule list) | + CONSTR_LIST of (Hdr.symbol * Hdr.ty option) list | + ID_LIST of Hdr.symbol list | + LABEL of string | + MPC_DECL of Hdr.declData | + MPC_DECLS of Hdr.declData | + QUAL_ID of string | + RECORD_LIST of string | + RHS_LIST of {rhs:Hdr.symbol list,code:string, + prec:Hdr.symbol option} list | + G_RULE of Hdr.rule list | + G_RULE_LIST of Hdr.rule list | + G_RULE_PREC of Hdr.symbol option | + SUBST_DECL of (Hdr.symbol list * Hdr.symbol list) list | + SUBST_DEC of (Hdr.symbol list * Hdr.symbol list) | + CHANGE_DECL of (Hdr.symbol list * Hdr.symbol list) list | + CHANGE_DEC of (Hdr.symbol list * Hdr.symbol list) | + TY of string +%header ( +functor MlyaccLrValsFun(structure Hdr : HEADER + where type prec = Header.prec + structure Token : TOKEN) +) +%arg (inputSource) : Hdr.inputSource +%% + +BEGIN : HEADER MPC_DECLS DELIMITER G_RULE_LIST + (HEADER,MPC_DECLS,rev G_RULE_LIST) + +MPC_DECLS : MPC_DECLS MPC_DECL + (join_decls(MPC_DECLS,MPC_DECL,inputSource,MPC_DECLleft)) + +MPC_DECLS: (DECL {prec=nil,nonterm=NONE,term=NONE,eop=nil,control=nil, + keyword=nil,change=nil, + value=nil}) + +MPC_DECL: TERM CONSTR_LIST + (DECL { prec=nil,nonterm=NONE, + term = SOME CONSTR_LIST, eop =nil,control=nil, + change=nil,keyword=nil, + value=nil}) + + | NONTERM CONSTR_LIST + (DECL { prec=nil,control=nil,nonterm= SOME CONSTR_LIST, + term = NONE, eop=nil,change=nil,keyword=nil, + value=nil}) + + | PREC ID_LIST + (DECL {prec= [(PREC,ID_LIST)],control=nil, + nonterm=NONE,term=NONE,eop=nil,change=nil, + keyword=nil,value=nil}) + + | START ID + (DECL {prec=nil,control=[START_SYM (symbolMake ID)],nonterm=NONE, + term = NONE, eop = nil,change=nil,keyword=nil, + value=nil}) + + | PERCENT_EOP ID_LIST + (DECL {prec=nil,control=nil,nonterm=NONE,term=NONE, + eop=ID_LIST, change=nil,keyword=nil, + value=nil}) + + | KEYWORD ID_LIST + (DECL {prec=nil,control=nil,nonterm=NONE,term=NONE,eop=nil, + change=nil,keyword=ID_LIST, + value=nil}) + + | PREFER ID_LIST + (DECL {prec=nil,control=nil,nonterm=NONE,term=NONE,eop=nil, + change=map (fn i=>([],[i])) ID_LIST,keyword=nil, + value=nil}) + + | CHANGE CHANGE_DECL + (DECL {prec=nil,control=nil,nonterm=NONE,term=NONE,eop=nil, + change=CHANGE_DECL,keyword=nil, + value=nil}) + | SUBST SUBST_DECL + (DECL {prec=nil,control=nil,nonterm=NONE,term=NONE,eop=nil, + change=SUBST_DECL,keyword=nil, + value=nil}) + | NOSHIFT ID_LIST + (DECL {prec=nil,control=[NSHIFT ID_LIST],nonterm=NONE,term=NONE, + eop=nil,change=nil,keyword=nil, + value=nil}) + | PERCENT_HEADER PROG + (DECL {prec=nil,control=[FUNCTOR PROG],nonterm=NONE,term=NONE, + eop=nil,change=nil,keyword=nil, + value=nil}) + | PERCENT_TOKEN_SIG_INFO PROG + (DECL {prec=nil,control=[TOKEN_SIG_INFO PROG], + nonterm=NONE,term=NONE, + eop=nil,change=nil,keyword=nil, + value=nil}) + | NAME ID + (DECL {prec=nil,control=[PARSER_NAME (symbolMake ID)], + nonterm=NONE,term=NONE, + eop=nil,change=nil,keyword=nil, value=nil}) + + | PERCENT_ARG PROG COLON TY + (DECL {prec=nil,control=[PARSE_ARG(PROG,TY)],nonterm=NONE, + term=NONE,eop=nil,change=nil,keyword=nil, + value=nil}) + + | VERBOSE + (DECL {prec=nil,control=[Hdr.VERBOSE], + nonterm=NONE,term=NONE,eop=nil, + change=nil,keyword=nil, + value=nil}) + | NODEFAULT + (DECL {prec=nil,control=[Hdr.NODEFAULT], + nonterm=NONE,term=NONE,eop=nil, + change=nil,keyword=nil, + value=nil}) + | PERCENT_PURE + (DECL {prec=nil,control=[Hdr.PURE], + nonterm=NONE,term=NONE,eop=nil, + change=nil,keyword=nil, + value=nil}) + | PERCENT_POS TY + (DECL {prec=nil,control=[Hdr.POS TY], + nonterm=NONE,term=NONE,eop=nil, + change=nil,keyword=nil, + value=nil}) + | VALUE ID PROG + (DECL {prec=nil,control=nil, + nonterm=NONE,term=NONE,eop=nil, + change=nil,keyword=nil, + value=[(symbolMake ID,PROG)]}) + +CHANGE_DECL : CHANGE_DEC BAR CHANGE_DECL + (CHANGE_DEC :: CHANGE_DECL) + | CHANGE_DEC + ([CHANGE_DEC]) + +CHANGE_DEC : ID_LIST ARROW ID_LIST + (ID_LIST1, ID_LIST2) + +SUBST_DECL : SUBST_DEC BAR SUBST_DECL + (SUBST_DEC :: SUBST_DECL) + | SUBST_DEC + ([SUBST_DEC]) + +SUBST_DEC: ID FOR ID + ([symbolMake ID2],[symbolMake ID1]) + +CONSTR_LIST : CONSTR_LIST BAR ID OF TY + ((symbolMake ID,SOME (tyMake TY))::CONSTR_LIST) + + | CONSTR_LIST BAR ID + ((symbolMake ID,NONE)::CONSTR_LIST) + + | ID OF TY ([(symbolMake ID,SOME (tyMake TY))]) + + | ID ([(symbolMake ID,NONE)]) + +G_RULE : ID COLON RHS_LIST + (map (fn {rhs,code,prec} => + Hdr.RULE {lhs=symbolMake ID,rhs=rhs, + code=code,prec=prec}) + RHS_LIST) + +G_RULE_LIST: G_RULE_LIST G_RULE (G_RULE@G_RULE_LIST) + | G_RULE (G_RULE) + +ID_LIST : ID ID_LIST (symbolMake ID :: ID_LIST) + | (nil) + +RHS_LIST : ID_LIST G_RULE_PREC PROG + ([{rhs=ID_LIST,code=PROG,prec=G_RULE_PREC}]) + + | RHS_LIST BAR ID_LIST G_RULE_PREC PROG + ({rhs=ID_LIST,code=PROG,prec=G_RULE_PREC}::RHS_LIST) + +TY : TYVAR + (TYVAR) + | LBRACE RECORD_LIST RBRACE + ("{ "^RECORD_LIST^" } ") + | LBRACE RBRACE + ("{}") + | PROG + (" ( "^PROG^" ) ") + | TY QUAL_ID + (TY^" "^QUAL_ID) + | QUAL_ID + (QUAL_ID) + | TY ASTERISK TY + (TY1^"*"^TY2) + | TY ARROW TY + (TY1 ^ " -> " ^ TY2) + +RECORD_LIST : RECORD_LIST COMMA LABEL COLON TY + (RECORD_LIST^","^LABEL^":"^TY) + | LABEL COLON TY + (LABEL^":"^TY) + +QUAL_ID : ID ((fn (a,_) => a) ID) + | IDDOT QUAL_ID (IDDOT^QUAL_ID) + +LABEL : ID ((fn (a,_) => a) ID) + | INT (INT) + +G_RULE_PREC : PREC_TAG ID (SOME (symbolMake ID)) + +G_RULE_PREC : (NONE) diff --git a/ml-yacc/src/yacc.grm.sig b/ml-yacc/src/yacc.grm.sig new file mode 100644 index 0000000..48dd896 --- /dev/null +++ b/ml-yacc/src/yacc.grm.sig @@ -0,0 +1,54 @@ +signature Mlyacc_TOKENS = +sig +type ('a,'b) token +type svalue +val BOGUS_VALUE: 'a * 'a -> (svalue,'a) token +val UNKNOWN: (string) * 'a * 'a -> (svalue,'a) token +val VALUE: 'a * 'a -> (svalue,'a) token +val VERBOSE: 'a * 'a -> (svalue,'a) token +val TYVAR: (string) * 'a * 'a -> (svalue,'a) token +val TERM: 'a * 'a -> (svalue,'a) token +val START: 'a * 'a -> (svalue,'a) token +val SUBST: 'a * 'a -> (svalue,'a) token +val RPAREN: 'a * 'a -> (svalue,'a) token +val RBRACE: 'a * 'a -> (svalue,'a) token +val PROG: (string) * 'a * 'a -> (svalue,'a) token +val PREFER: 'a * 'a -> (svalue,'a) token +val PREC_TAG: 'a * 'a -> (svalue,'a) token +val PREC: (Header.prec) * 'a * 'a -> (svalue,'a) token +val PERCENT_TOKEN_SIG_INFO: 'a * 'a -> (svalue,'a) token +val PERCENT_ARG: 'a * 'a -> (svalue,'a) token +val PERCENT_POS: 'a * 'a -> (svalue,'a) token +val PERCENT_PURE: 'a * 'a -> (svalue,'a) token +val PERCENT_EOP: 'a * 'a -> (svalue,'a) token +val OF: 'a * 'a -> (svalue,'a) token +val NOSHIFT: 'a * 'a -> (svalue,'a) token +val NONTERM: 'a * 'a -> (svalue,'a) token +val NODEFAULT: 'a * 'a -> (svalue,'a) token +val NAME: 'a * 'a -> (svalue,'a) token +val LPAREN: 'a * 'a -> (svalue,'a) token +val LBRACE: 'a * 'a -> (svalue,'a) token +val KEYWORD: 'a * 'a -> (svalue,'a) token +val INT: (string) * 'a * 'a -> (svalue,'a) token +val PERCENT_HEADER: 'a * 'a -> (svalue,'a) token +val IDDOT: (string) * 'a * 'a -> (svalue,'a) token +val ID: (string*int) * 'a * 'a -> (svalue,'a) token +val HEADER: (string) * 'a * 'a -> (svalue,'a) token +val FOR: 'a * 'a -> (svalue,'a) token +val EOF: 'a * 'a -> (svalue,'a) token +val DELIMITER: 'a * 'a -> (svalue,'a) token +val COMMA: 'a * 'a -> (svalue,'a) token +val COLON: 'a * 'a -> (svalue,'a) token +val CHANGE: 'a * 'a -> (svalue,'a) token +val BAR: 'a * 'a -> (svalue,'a) token +val BLOCK: 'a * 'a -> (svalue,'a) token +val ASTERISK: 'a * 'a -> (svalue,'a) token +val ARROW: 'a * 'a -> (svalue,'a) token +end +signature Mlyacc_LRVALS= +sig +structure Tokens : Mlyacc_TOKENS +structure ParserData:PARSER_DATA +sharing type ParserData.Token.token = Tokens.token +sharing type ParserData.svalue = Tokens.svalue +end diff --git a/ml-yacc/src/yacc.grm.sml b/ml-yacc/src/yacc.grm.sml new file mode 100644 index 0000000..9915ddc --- /dev/null +++ b/ml-yacc/src/yacc.grm.sml @@ -0,0 +1,1052 @@ + + +functor MlyaccLrValsFun(structure Hdr : HEADER + where type prec = Header.prec + structure Token : TOKEN) + = +struct +structure ParserData= +struct +structure Header = +struct +(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *) + +(* parser for the ML parser generator *) + +open Hdr + +end +structure LrTable = Token.LrTable +structure Token = Token +local open LrTable in +val table=let val actionRows = +"\ +\\001\000\001\000\076\000\000\000\ +\\001\000\005\000\025\000\008\000\024\000\014\000\023\000\016\000\022\000\ +\\019\000\021\000\020\000\020\000\021\000\019\000\022\000\018\000\ +\\024\000\017\000\025\000\016\000\026\000\015\000\027\000\014\000\ +\\028\000\013\000\029\000\012\000\031\000\011\000\035\000\010\000\ +\\036\000\009\000\037\000\008\000\039\000\007\000\040\000\006\000\000\000\ +\\001\000\006\000\063\000\000\000\ +\\001\000\006\000\074\000\000\000\ +\\001\000\006\000\086\000\000\000\ +\\001\000\006\000\098\000\000\000\ +\\001\000\007\000\085\000\033\000\084\000\000\000\ +\\001\000\009\000\000\000\000\000\ +\\001\000\010\000\061\000\000\000\ +\\001\000\011\000\003\000\000\000\ +\\001\000\012\000\026\000\000\000\ +\\001\000\012\000\028\000\000\000\ +\\001\000\012\000\029\000\000\000\ +\\001\000\012\000\032\000\000\000\ +\\001\000\012\000\044\000\013\000\043\000\000\000\ +\\001\000\012\000\044\000\013\000\043\000\017\000\042\000\032\000\041\000\ +\\038\000\040\000\000\000\ +\\001\000\012\000\048\000\000\000\ +\\001\000\012\000\053\000\000\000\ +\\001\000\012\000\071\000\015\000\070\000\000\000\ +\\001\000\012\000\071\000\015\000\070\000\033\000\069\000\000\000\ +\\001\000\012\000\077\000\000\000\ +\\001\000\012\000\080\000\000\000\ +\\001\000\012\000\101\000\000\000\ +\\001\000\032\000\036\000\000\000\ +\\001\000\032\000\037\000\000\000\ +\\001\000\032\000\050\000\000\000\ +\\001\000\032\000\057\000\000\000\ +\\001\000\032\000\100\000\000\000\ +\\001\000\032\000\104\000\000\000\ +\\106\000\012\000\053\000\000\000\ +\\107\000\000\000\ +\\108\000\000\000\ +\\109\000\004\000\058\000\000\000\ +\\110\000\004\000\058\000\000\000\ +\\111\000\000\000\ +\\112\000\000\000\ +\\113\000\000\000\ +\\114\000\000\000\ +\\115\000\000\000\ +\\116\000\000\000\ +\\117\000\000\000\ +\\118\000\000\000\ +\\119\000\000\000\ +\\120\000\000\000\ +\\121\000\000\000\ +\\122\000\001\000\066\000\002\000\065\000\012\000\044\000\013\000\043\000\000\000\ +\\123\000\000\000\ +\\124\000\000\000\ +\\125\000\000\000\ +\\126\000\001\000\066\000\002\000\065\000\012\000\044\000\013\000\043\000\000\000\ +\\127\000\000\000\ +\\128\000\000\000\ +\\129\000\004\000\075\000\000\000\ +\\130\000\000\000\ +\\131\000\000\000\ +\\132\000\004\000\060\000\000\000\ +\\133\000\000\000\ +\\134\000\001\000\066\000\002\000\065\000\012\000\044\000\013\000\043\000\000\000\ +\\135\000\023\000\091\000\000\000\ +\\136\000\001\000\066\000\002\000\065\000\012\000\044\000\013\000\043\000\000\000\ +\\137\000\023\000\059\000\000\000\ +\\138\000\004\000\094\000\000\000\ +\\139\000\000\000\ +\\140\000\000\000\ +\\141\000\000\000\ +\\142\000\012\000\034\000\000\000\ +\\143\000\000\000\ +\\144\000\000\000\ +\\145\000\000\000\ +\\146\000\000\000\ +\\147\000\000\000\ +\\148\000\000\000\ +\\149\000\000\000\ +\\150\000\000\000\ +\\151\000\012\000\044\000\013\000\043\000\000\000\ +\\152\000\001\000\066\000\002\000\065\000\012\000\044\000\013\000\043\000\000\000\ +\\153\000\001\000\066\000\002\000\065\000\012\000\044\000\013\000\043\000\000\000\ +\\154\000\001\000\066\000\002\000\065\000\012\000\044\000\013\000\043\000\000\000\ +\\155\000\000\000\ +\\156\000\000\000\ +\\157\000\000\000\ +\\158\000\000\000\ +\\159\000\000\000\ +\\160\000\030\000\096\000\000\000\ +\" +val actionRowNumbers = +"\009\000\031\000\001\000\030\000\ +\\010\000\046\000\011\000\012\000\ +\\013\000\065\000\065\000\023\000\ +\\024\000\015\000\048\000\065\000\ +\\065\000\011\000\047\000\016\000\ +\\065\000\025\000\017\000\065\000\ +\\026\000\032\000\060\000\035\000\ +\\055\000\040\000\008\000\038\000\ +\\065\000\034\000\043\000\002\000\ +\\049\000\073\000\068\000\071\000\ +\\019\000\014\000\078\000\036\000\ +\\041\000\033\000\044\000\037\000\ +\\042\000\029\000\063\000\003\000\ +\\052\000\039\000\000\000\050\000\ +\\020\000\015\000\013\000\021\000\ +\\064\000\015\000\072\000\015\000\ +\\015\000\006\000\004\000\070\000\ +\\081\000\080\000\079\000\062\000\ +\\065\000\065\000\065\000\058\000\ +\\059\000\054\000\056\000\045\000\ +\\074\000\075\000\069\000\018\000\ +\\015\000\061\000\083\000\051\000\ +\\053\000\015\000\005\000\077\000\ +\\065\000\027\000\022\000\057\000\ +\\015\000\083\000\066\000\082\000\ +\\076\000\028\000\067\000\007\000" +val gotoT = +"\ +\\001\000\103\000\000\000\ +\\006\000\002\000\000\000\ +\\005\000\003\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\002\000\025\000\000\000\ +\\000\000\ +\\013\000\029\000\014\000\028\000\000\000\ +\\003\000\031\000\000\000\ +\\003\000\033\000\000\000\ +\\000\000\ +\\000\000\ +\\007\000\037\000\017\000\036\000\000\000\ +\\000\000\ +\\003\000\043\000\000\000\ +\\003\000\044\000\000\000\ +\\002\000\045\000\000\000\ +\\000\000\ +\\000\000\ +\\003\000\047\000\000\000\ +\\000\000\ +\\010\000\050\000\011\000\049\000\000\000\ +\\003\000\054\000\015\000\053\000\016\000\052\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\003\000\060\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\007\000\062\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\004\000\066\000\008\000\065\000\000\000\ +\\007\000\070\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\010\000\071\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\007\000\037\000\017\000\076\000\000\000\ +\\013\000\077\000\014\000\028\000\000\000\ +\\000\000\ +\\000\000\ +\\007\000\037\000\017\000\079\000\000\000\ +\\000\000\ +\\007\000\037\000\017\000\080\000\000\000\ +\\007\000\037\000\017\000\081\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\003\000\086\000\009\000\085\000\000\000\ +\\003\000\054\000\015\000\087\000\016\000\052\000\000\000\ +\\003\000\088\000\000\000\ +\\000\000\ +\\007\000\062\000\000\000\ +\\000\000\ +\\000\000\ +\\007\000\062\000\000\000\ +\\007\000\062\000\000\000\ +\\007\000\062\000\000\000\ +\\000\000\ +\\004\000\090\000\000\000\ +\\007\000\037\000\017\000\091\000\000\000\ +\\000\000\ +\\012\000\093\000\000\000\ +\\000\000\ +\\000\000\ +\\007\000\037\000\017\000\095\000\000\000\ +\\000\000\ +\\007\000\062\000\000\000\ +\\003\000\097\000\000\000\ +\\000\000\ +\\000\000\ +\\007\000\062\000\000\000\ +\\007\000\037\000\017\000\100\000\000\000\ +\\012\000\101\000\000\000\ +\\000\000\ +\\000\000\ +\\007\000\062\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\" +val numstates = 104 +val numrules = 55 +val s = ref "" and index = ref 0 +val string_to_int = fn () => +let val i = !index +in index := i+2; Char.ord(String.sub(!s,i)) + Char.ord(String.sub(!s,i+1)) * 256 +end +val string_to_list = fn s' => + let val len = String.size s' + fun f () = + if !index < len then string_to_int() :: f() + else nil + in index := 0; s := s'; f () + end +val string_to_pairlist = fn (conv_key,conv_entry) => + let fun f () = + case string_to_int() + of 0 => EMPTY + | n => PAIR(conv_key (n-1),conv_entry (string_to_int()),f()) + in f + end +val string_to_pairlist_default = fn (conv_key,conv_entry) => + let val conv_row = string_to_pairlist(conv_key,conv_entry) + in fn () => + let val default = conv_entry(string_to_int()) + val row = conv_row() + in (row,default) + end + end +val string_to_table = fn (convert_row,s') => + let val len = String.size s' + fun f ()= + if !index < len then convert_row() :: f() + else nil + in (s := s'; index := 0; f ()) + end +local + val memo = Array.array(numstates+numrules,ERROR) + val _ =let fun g i=(Array.update(memo,i,REDUCE(i-numstates)); g(i+1)) + fun f i = + if i=numstates then g i + else (Array.update(memo,i,SHIFT (STATE i)); f (i+1)) + in f 0 handle Subscript => () + end +in +val entry_to_action = fn 0 => ACCEPT | 1 => ERROR | j => Array.sub(memo,(j-2)) +end +val gotoT=Array.fromList(string_to_table(string_to_pairlist(NT,STATE),gotoT)) +val actionRows=string_to_table(string_to_pairlist_default(T,entry_to_action),actionRows) +val actionRowNumbers = string_to_list actionRowNumbers +val actionT = let val actionRowLookUp= +let val a=Array.fromList(actionRows) in fn i=>Array.sub(a,i) end +in Array.fromList(map actionRowLookUp actionRowNumbers) +end +in LrTable.mkLrTable {actions=actionT,gotos=gotoT,numRules=numrules, +numStates=numstates,initialState=STATE 0} +end +end +local open Header in +type pos = int +type arg = Hdr.inputSource +structure MlyValue = +struct +datatype svalue = VOID | ntVOID of unit -> unit + | UNKNOWN of unit -> (string) | TYVAR of unit -> (string) + | PROG of unit -> (string) | PREC of unit -> (Header.prec) + | INT of unit -> (string) | IDDOT of unit -> (string) + | ID of unit -> (string*int) | HEADER of unit -> (string) + | TY of unit -> (string) + | CHANGE_DEC of unit -> ( ( Hdr.symbol list * Hdr.symbol list ) ) + | CHANGE_DECL of unit -> ( ( Hdr.symbol list * Hdr.symbol list ) list) + | SUBST_DEC of unit -> ( ( Hdr.symbol list * Hdr.symbol list ) ) + | SUBST_DECL of unit -> ( ( Hdr.symbol list * Hdr.symbol list ) list) + | G_RULE_PREC of unit -> (Hdr.symbol option) + | G_RULE_LIST of unit -> (Hdr.rule list) + | G_RULE of unit -> (Hdr.rule list) + | RHS_LIST of unit -> ({ rhs:Hdr.symbol list,code:string,prec:Hdr.symbol option } list) + | RECORD_LIST of unit -> (string) | QUAL_ID of unit -> (string) + | MPC_DECLS of unit -> (Hdr.declData) + | MPC_DECL of unit -> (Hdr.declData) | LABEL of unit -> (string) + | ID_LIST of unit -> (Hdr.symbol list) + | CONSTR_LIST of unit -> ( ( Hdr.symbol * Hdr.ty option ) list) + | BEGIN of unit -> (string*Hdr.declData* ( Hdr.rule list ) ) +end +type svalue = MlyValue.svalue +type result = string*Hdr.declData* ( Hdr.rule list ) +end +structure EC= +struct +open LrTable +infix 5 $$ +fun x $$ y = y::x +val is_keyword = +fn _ => false +val preferred_change : (term list * term list) list = +nil +val noShift = +fn (T 8) => true | _ => false +val showTerminal = +fn (T 0) => "ARROW" + | (T 1) => "ASTERISK" + | (T 2) => "BLOCK" + | (T 3) => "BAR" + | (T 4) => "CHANGE" + | (T 5) => "COLON" + | (T 6) => "COMMA" + | (T 7) => "DELIMITER" + | (T 8) => "EOF" + | (T 9) => "FOR" + | (T 10) => "HEADER" + | (T 11) => "ID" + | (T 12) => "IDDOT" + | (T 13) => "PERCENT_HEADER" + | (T 14) => "INT" + | (T 15) => "KEYWORD" + | (T 16) => "LBRACE" + | (T 17) => "LPAREN" + | (T 18) => "NAME" + | (T 19) => "NODEFAULT" + | (T 20) => "NONTERM" + | (T 21) => "NOSHIFT" + | (T 22) => "OF" + | (T 23) => "PERCENT_EOP" + | (T 24) => "PERCENT_PURE" + | (T 25) => "PERCENT_POS" + | (T 26) => "PERCENT_ARG" + | (T 27) => "PERCENT_TOKEN_SIG_INFO" + | (T 28) => "PREC" + | (T 29) => "PREC_TAG" + | (T 30) => "PREFER" + | (T 31) => "PROG" + | (T 32) => "RBRACE" + | (T 33) => "RPAREN" + | (T 34) => "SUBST" + | (T 35) => "START" + | (T 36) => "TERM" + | (T 37) => "TYVAR" + | (T 38) => "VERBOSE" + | (T 39) => "VALUE" + | (T 40) => "UNKNOWN" + | (T 41) => "BOGUS_VALUE" + | _ => "bogus-term" +local open Header in +val errtermvalue= +fn _ => MlyValue.VOID +end +val terms : term list = nil + $$ (T 41) $$ (T 39) $$ (T 38) $$ (T 36) $$ (T 35) $$ (T 34) $$ (T 33) + $$ (T 32) $$ (T 30) $$ (T 29) $$ (T 27) $$ (T 26) $$ (T 25) $$ (T 24) + $$ (T 23) $$ (T 22) $$ (T 21) $$ (T 20) $$ (T 19) $$ (T 18) $$ (T 17) + $$ (T 16) $$ (T 15) $$ (T 13) $$ (T 9) $$ (T 8) $$ (T 7) $$ (T 6) $$ +(T 5) $$ (T 4) $$ (T 3) $$ (T 2) $$ (T 1) $$ (T 0)end +structure Actions = +struct +exception mlyAction of int +local open Header in +val actions = +fn (i392,defaultPos,stack, + (inputSource):arg) => +case (i392,stack) +of ( 0, ( ( _, ( MlyValue.G_RULE_LIST G_RULE_LIST1, _, +G_RULE_LIST1right)) :: _ :: ( _, ( MlyValue.MPC_DECLS MPC_DECLS1, _, _ +)) :: ( _, ( MlyValue.HEADER HEADER1, HEADER1left, _)) :: rest671)) => + let val result = MlyValue.BEGIN (fn _ => let val (HEADER as HEADER1 +) = HEADER1 () + val (MPC_DECLS as MPC_DECLS1) = MPC_DECLS1 () + val (G_RULE_LIST as G_RULE_LIST1) = G_RULE_LIST1 () + in (HEADER,MPC_DECLS,rev G_RULE_LIST) +end) + in ( LrTable.NT 0, ( result, HEADER1left, G_RULE_LIST1right), rest671 +) +end +| ( 1, ( ( _, ( MlyValue.MPC_DECL MPC_DECL1, MPC_DECLleft, +MPC_DECL1right)) :: ( _, ( MlyValue.MPC_DECLS MPC_DECLS1, +MPC_DECLS1left, _)) :: rest671)) => let val result = +MlyValue.MPC_DECLS (fn _ => let val (MPC_DECLS as MPC_DECLS1) = +MPC_DECLS1 () + val (MPC_DECL as MPC_DECL1) = MPC_DECL1 () + in (join_decls(MPC_DECLS,MPC_DECL,inputSource,MPC_DECLleft)) +end) + in ( LrTable.NT 5, ( result, MPC_DECLS1left, MPC_DECL1right), rest671 +) +end +| ( 2, ( rest671)) => let val result = MlyValue.MPC_DECLS (fn _ => ( +DECL {prec=nil,nonterm=NONE,term=NONE,eop=nil,control=nil, + keyword=nil,change=nil, + value=nil} +)) + in ( LrTable.NT 5, ( result, defaultPos, defaultPos), rest671) +end +| ( 3, ( ( _, ( MlyValue.CONSTR_LIST CONSTR_LIST1, _, +CONSTR_LIST1right)) :: ( _, ( _, TERM1left, _)) :: rest671)) => let + val result = MlyValue.MPC_DECL (fn _ => let val (CONSTR_LIST as +CONSTR_LIST1) = CONSTR_LIST1 () + in ( +DECL { prec=nil,nonterm=NONE, + term = SOME CONSTR_LIST, eop =nil,control=nil, + change=nil,keyword=nil, + value=nil} +) +end) + in ( LrTable.NT 4, ( result, TERM1left, CONSTR_LIST1right), rest671) + +end +| ( 4, ( ( _, ( MlyValue.CONSTR_LIST CONSTR_LIST1, _, +CONSTR_LIST1right)) :: ( _, ( _, NONTERM1left, _)) :: rest671)) => let + val result = MlyValue.MPC_DECL (fn _ => let val (CONSTR_LIST as +CONSTR_LIST1) = CONSTR_LIST1 () + in ( +DECL { prec=nil,control=nil,nonterm= SOME CONSTR_LIST, + term = NONE, eop=nil,change=nil,keyword=nil, + value=nil} +) +end) + in ( LrTable.NT 4, ( result, NONTERM1left, CONSTR_LIST1right), +rest671) +end +| ( 5, ( ( _, ( MlyValue.ID_LIST ID_LIST1, _, ID_LIST1right)) :: ( _, + ( MlyValue.PREC PREC1, PREC1left, _)) :: rest671)) => let val result + = MlyValue.MPC_DECL (fn _ => let val (PREC as PREC1) = PREC1 () + val (ID_LIST as ID_LIST1) = ID_LIST1 () + in ( +DECL {prec= [(PREC,ID_LIST)],control=nil, + nonterm=NONE,term=NONE,eop=nil,change=nil, + keyword=nil,value=nil} +) +end) + in ( LrTable.NT 4, ( result, PREC1left, ID_LIST1right), rest671) +end +| ( 6, ( ( _, ( MlyValue.ID ID1, _, ID1right)) :: ( _, ( _, +START1left, _)) :: rest671)) => let val result = MlyValue.MPC_DECL + (fn _ => let val (ID as ID1) = ID1 () + in ( +DECL {prec=nil,control=[START_SYM (symbolMake ID)],nonterm=NONE, + term = NONE, eop = nil,change=nil,keyword=nil, + value=nil} +) +end) + in ( LrTable.NT 4, ( result, START1left, ID1right), rest671) +end +| ( 7, ( ( _, ( MlyValue.ID_LIST ID_LIST1, _, ID_LIST1right)) :: ( _, + ( _, PERCENT_EOP1left, _)) :: rest671)) => let val result = +MlyValue.MPC_DECL (fn _ => let val (ID_LIST as ID_LIST1) = ID_LIST1 + () + in ( +DECL {prec=nil,control=nil,nonterm=NONE,term=NONE, + eop=ID_LIST, change=nil,keyword=nil, + value=nil} +) +end) + in ( LrTable.NT 4, ( result, PERCENT_EOP1left, ID_LIST1right), +rest671) +end +| ( 8, ( ( _, ( MlyValue.ID_LIST ID_LIST1, _, ID_LIST1right)) :: ( _, + ( _, KEYWORD1left, _)) :: rest671)) => let val result = +MlyValue.MPC_DECL (fn _ => let val (ID_LIST as ID_LIST1) = ID_LIST1 + () + in ( +DECL {prec=nil,control=nil,nonterm=NONE,term=NONE,eop=nil, + change=nil,keyword=ID_LIST, + value=nil} +) +end) + in ( LrTable.NT 4, ( result, KEYWORD1left, ID_LIST1right), rest671) + +end +| ( 9, ( ( _, ( MlyValue.ID_LIST ID_LIST1, _, ID_LIST1right)) :: ( _, + ( _, PREFER1left, _)) :: rest671)) => let val result = +MlyValue.MPC_DECL (fn _ => let val (ID_LIST as ID_LIST1) = ID_LIST1 + () + in ( +DECL {prec=nil,control=nil,nonterm=NONE,term=NONE,eop=nil, + change=map (fn i=>([],[i])) ID_LIST,keyword=nil, + value=nil} +) +end) + in ( LrTable.NT 4, ( result, PREFER1left, ID_LIST1right), rest671) + +end +| ( 10, ( ( _, ( MlyValue.CHANGE_DECL CHANGE_DECL1, _, +CHANGE_DECL1right)) :: ( _, ( _, CHANGE1left, _)) :: rest671)) => let + val result = MlyValue.MPC_DECL (fn _ => let val (CHANGE_DECL as +CHANGE_DECL1) = CHANGE_DECL1 () + in ( +DECL {prec=nil,control=nil,nonterm=NONE,term=NONE,eop=nil, + change=CHANGE_DECL,keyword=nil, + value=nil} +) +end) + in ( LrTable.NT 4, ( result, CHANGE1left, CHANGE_DECL1right), rest671 +) +end +| ( 11, ( ( _, ( MlyValue.SUBST_DECL SUBST_DECL1, _, SUBST_DECL1right +)) :: ( _, ( _, SUBST1left, _)) :: rest671)) => let val result = +MlyValue.MPC_DECL (fn _ => let val (SUBST_DECL as SUBST_DECL1) = +SUBST_DECL1 () + in ( +DECL {prec=nil,control=nil,nonterm=NONE,term=NONE,eop=nil, + change=SUBST_DECL,keyword=nil, + value=nil} +) +end) + in ( LrTable.NT 4, ( result, SUBST1left, SUBST_DECL1right), rest671) + +end +| ( 12, ( ( _, ( MlyValue.ID_LIST ID_LIST1, _, ID_LIST1right)) :: ( _ +, ( _, NOSHIFT1left, _)) :: rest671)) => let val result = +MlyValue.MPC_DECL (fn _ => let val (ID_LIST as ID_LIST1) = ID_LIST1 + () + in ( +DECL {prec=nil,control=[NSHIFT ID_LIST],nonterm=NONE,term=NONE, + eop=nil,change=nil,keyword=nil, + value=nil} +) +end) + in ( LrTable.NT 4, ( result, NOSHIFT1left, ID_LIST1right), rest671) + +end +| ( 13, ( ( _, ( MlyValue.PROG PROG1, _, PROG1right)) :: ( _, ( _, +PERCENT_HEADER1left, _)) :: rest671)) => let val result = +MlyValue.MPC_DECL (fn _ => let val (PROG as PROG1) = PROG1 () + in ( +DECL {prec=nil,control=[FUNCTOR PROG],nonterm=NONE,term=NONE, + eop=nil,change=nil,keyword=nil, + value=nil} +) +end) + in ( LrTable.NT 4, ( result, PERCENT_HEADER1left, PROG1right), +rest671) +end +| ( 14, ( ( _, ( MlyValue.PROG PROG1, _, PROG1right)) :: ( _, ( _, +PERCENT_TOKEN_SIG_INFO1left, _)) :: rest671)) => let val result = +MlyValue.MPC_DECL (fn _ => let val (PROG as PROG1) = PROG1 () + in ( +DECL {prec=nil,control=[TOKEN_SIG_INFO PROG], + nonterm=NONE,term=NONE, + eop=nil,change=nil,keyword=nil, + value=nil} +) +end) + in ( LrTable.NT 4, ( result, PERCENT_TOKEN_SIG_INFO1left, PROG1right) +, rest671) +end +| ( 15, ( ( _, ( MlyValue.ID ID1, _, ID1right)) :: ( _, ( _, +NAME1left, _)) :: rest671)) => let val result = MlyValue.MPC_DECL (fn + _ => let val (ID as ID1) = ID1 () + in ( +DECL {prec=nil,control=[PARSER_NAME (symbolMake ID)], + nonterm=NONE,term=NONE, + eop=nil,change=nil,keyword=nil, value=nil} +) +end) + in ( LrTable.NT 4, ( result, NAME1left, ID1right), rest671) +end +| ( 16, ( ( _, ( MlyValue.TY TY1, _, TY1right)) :: _ :: ( _, ( +MlyValue.PROG PROG1, _, _)) :: ( _, ( _, PERCENT_ARG1left, _)) :: +rest671)) => let val result = MlyValue.MPC_DECL (fn _ => let val ( +PROG as PROG1) = PROG1 () + val (TY as TY1) = TY1 () + in ( +DECL {prec=nil,control=[PARSE_ARG(PROG,TY)],nonterm=NONE, + term=NONE,eop=nil,change=nil,keyword=nil, + value=nil} +) +end) + in ( LrTable.NT 4, ( result, PERCENT_ARG1left, TY1right), rest671) + +end +| ( 17, ( ( _, ( _, VERBOSE1left, VERBOSE1right)) :: rest671)) => let + val result = MlyValue.MPC_DECL (fn _ => ( +DECL {prec=nil,control=[Hdr.VERBOSE], + nonterm=NONE,term=NONE,eop=nil, + change=nil,keyword=nil, + value=nil} +)) + in ( LrTable.NT 4, ( result, VERBOSE1left, VERBOSE1right), rest671) + +end +| ( 18, ( ( _, ( _, NODEFAULT1left, NODEFAULT1right)) :: rest671)) => + let val result = MlyValue.MPC_DECL (fn _ => ( +DECL {prec=nil,control=[Hdr.NODEFAULT], + nonterm=NONE,term=NONE,eop=nil, + change=nil,keyword=nil, + value=nil} +)) + in ( LrTable.NT 4, ( result, NODEFAULT1left, NODEFAULT1right), +rest671) +end +| ( 19, ( ( _, ( _, PERCENT_PURE1left, PERCENT_PURE1right)) :: +rest671)) => let val result = MlyValue.MPC_DECL (fn _ => ( +DECL {prec=nil,control=[Hdr.PURE], + nonterm=NONE,term=NONE,eop=nil, + change=nil,keyword=nil, + value=nil} +)) + in ( LrTable.NT 4, ( result, PERCENT_PURE1left, PERCENT_PURE1right), +rest671) +end +| ( 20, ( ( _, ( MlyValue.TY TY1, _, TY1right)) :: ( _, ( _, +PERCENT_POS1left, _)) :: rest671)) => let val result = +MlyValue.MPC_DECL (fn _ => let val (TY as TY1) = TY1 () + in ( +DECL {prec=nil,control=[Hdr.POS TY], + nonterm=NONE,term=NONE,eop=nil, + change=nil,keyword=nil, + value=nil} +) +end) + in ( LrTable.NT 4, ( result, PERCENT_POS1left, TY1right), rest671) + +end +| ( 21, ( ( _, ( MlyValue.PROG PROG1, _, PROG1right)) :: ( _, ( +MlyValue.ID ID1, _, _)) :: ( _, ( _, VALUE1left, _)) :: rest671)) => + let val result = MlyValue.MPC_DECL (fn _ => let val (ID as ID1) = +ID1 () + val (PROG as PROG1) = PROG1 () + in ( +DECL {prec=nil,control=nil, + nonterm=NONE,term=NONE,eop=nil, + change=nil,keyword=nil, + value=[(symbolMake ID,PROG)]} +) +end) + in ( LrTable.NT 4, ( result, VALUE1left, PROG1right), rest671) +end +| ( 22, ( ( _, ( MlyValue.CHANGE_DECL CHANGE_DECL1, _, +CHANGE_DECL1right)) :: _ :: ( _, ( MlyValue.CHANGE_DEC CHANGE_DEC1, +CHANGE_DEC1left, _)) :: rest671)) => let val result = +MlyValue.CHANGE_DECL (fn _ => let val (CHANGE_DEC as CHANGE_DEC1) = +CHANGE_DEC1 () + val (CHANGE_DECL as CHANGE_DECL1) = CHANGE_DECL1 () + in (CHANGE_DEC :: CHANGE_DECL) +end) + in ( LrTable.NT 14, ( result, CHANGE_DEC1left, CHANGE_DECL1right), +rest671) +end +| ( 23, ( ( _, ( MlyValue.CHANGE_DEC CHANGE_DEC1, CHANGE_DEC1left, +CHANGE_DEC1right)) :: rest671)) => let val result = +MlyValue.CHANGE_DECL (fn _ => let val (CHANGE_DEC as CHANGE_DEC1) = +CHANGE_DEC1 () + in ([CHANGE_DEC]) +end) + in ( LrTable.NT 14, ( result, CHANGE_DEC1left, CHANGE_DEC1right), +rest671) +end +| ( 24, ( ( _, ( MlyValue.ID_LIST ID_LIST2, _, ID_LIST2right)) :: _ + :: ( _, ( MlyValue.ID_LIST ID_LIST1, ID_LIST1left, _)) :: rest671)) + => let val result = MlyValue.CHANGE_DEC (fn _ => let val ID_LIST1 = + ID_LIST1 () + val ID_LIST2 = ID_LIST2 () + in (ID_LIST1, ID_LIST2) +end) + in ( LrTable.NT 15, ( result, ID_LIST1left, ID_LIST2right), rest671) + +end +| ( 25, ( ( _, ( MlyValue.SUBST_DECL SUBST_DECL1, _, SUBST_DECL1right +)) :: _ :: ( _, ( MlyValue.SUBST_DEC SUBST_DEC1, SUBST_DEC1left, _)) + :: rest671)) => let val result = MlyValue.SUBST_DECL (fn _ => let + val (SUBST_DEC as SUBST_DEC1) = SUBST_DEC1 () + val (SUBST_DECL as SUBST_DECL1) = SUBST_DECL1 () + in (SUBST_DEC :: SUBST_DECL) +end) + in ( LrTable.NT 12, ( result, SUBST_DEC1left, SUBST_DECL1right), +rest671) +end +| ( 26, ( ( _, ( MlyValue.SUBST_DEC SUBST_DEC1, SUBST_DEC1left, +SUBST_DEC1right)) :: rest671)) => let val result = +MlyValue.SUBST_DECL (fn _ => let val (SUBST_DEC as SUBST_DEC1) = +SUBST_DEC1 () + in ([SUBST_DEC]) +end) + in ( LrTable.NT 12, ( result, SUBST_DEC1left, SUBST_DEC1right), +rest671) +end +| ( 27, ( ( _, ( MlyValue.ID ID2, _, ID2right)) :: _ :: ( _, ( +MlyValue.ID ID1, ID1left, _)) :: rest671)) => let val result = +MlyValue.SUBST_DEC (fn _ => let val ID1 = ID1 () + val ID2 = ID2 () + in ([symbolMake ID2],[symbolMake ID1]) +end) + in ( LrTable.NT 13, ( result, ID1left, ID2right), rest671) +end +| ( 28, ( ( _, ( MlyValue.TY TY1, _, TY1right)) :: _ :: ( _, ( +MlyValue.ID ID1, _, _)) :: _ :: ( _, ( MlyValue.CONSTR_LIST +CONSTR_LIST1, CONSTR_LIST1left, _)) :: rest671)) => let val result = +MlyValue.CONSTR_LIST (fn _ => let val (CONSTR_LIST as CONSTR_LIST1) = + CONSTR_LIST1 () + val (ID as ID1) = ID1 () + val (TY as TY1) = TY1 () + in ((symbolMake ID,SOME (tyMake TY))::CONSTR_LIST) +end) + in ( LrTable.NT 1, ( result, CONSTR_LIST1left, TY1right), rest671) + +end +| ( 29, ( ( _, ( MlyValue.ID ID1, _, ID1right)) :: _ :: ( _, ( +MlyValue.CONSTR_LIST CONSTR_LIST1, CONSTR_LIST1left, _)) :: rest671)) + => let val result = MlyValue.CONSTR_LIST (fn _ => let val ( +CONSTR_LIST as CONSTR_LIST1) = CONSTR_LIST1 () + val (ID as ID1) = ID1 () + in ((symbolMake ID,NONE)::CONSTR_LIST) +end) + in ( LrTable.NT 1, ( result, CONSTR_LIST1left, ID1right), rest671) + +end +| ( 30, ( ( _, ( MlyValue.TY TY1, _, TY1right)) :: _ :: ( _, ( +MlyValue.ID ID1, ID1left, _)) :: rest671)) => let val result = +MlyValue.CONSTR_LIST (fn _ => let val (ID as ID1) = ID1 () + val (TY as TY1) = TY1 () + in ([(symbolMake ID,SOME (tyMake TY))]) +end) + in ( LrTable.NT 1, ( result, ID1left, TY1right), rest671) +end +| ( 31, ( ( _, ( MlyValue.ID ID1, ID1left, ID1right)) :: rest671)) => + let val result = MlyValue.CONSTR_LIST (fn _ => let val (ID as ID1) + = ID1 () + in ([(symbolMake ID,NONE)]) +end) + in ( LrTable.NT 1, ( result, ID1left, ID1right), rest671) +end +| ( 32, ( ( _, ( MlyValue.RHS_LIST RHS_LIST1, _, RHS_LIST1right)) :: + _ :: ( _, ( MlyValue.ID ID1, ID1left, _)) :: rest671)) => let val +result = MlyValue.G_RULE (fn _ => let val (ID as ID1) = ID1 () + val (RHS_LIST as RHS_LIST1) = RHS_LIST1 () + in ( +map (fn {rhs,code,prec} => + Hdr.RULE {lhs=symbolMake ID,rhs=rhs, + code=code,prec=prec}) + RHS_LIST +) +end) + in ( LrTable.NT 9, ( result, ID1left, RHS_LIST1right), rest671) +end +| ( 33, ( ( _, ( MlyValue.G_RULE G_RULE1, _, G_RULE1right)) :: ( _, ( + MlyValue.G_RULE_LIST G_RULE_LIST1, G_RULE_LIST1left, _)) :: rest671)) + => let val result = MlyValue.G_RULE_LIST (fn _ => let val ( +G_RULE_LIST as G_RULE_LIST1) = G_RULE_LIST1 () + val (G_RULE as G_RULE1) = G_RULE1 () + in (G_RULE@G_RULE_LIST) +end) + in ( LrTable.NT 10, ( result, G_RULE_LIST1left, G_RULE1right), +rest671) +end +| ( 34, ( ( _, ( MlyValue.G_RULE G_RULE1, G_RULE1left, G_RULE1right)) + :: rest671)) => let val result = MlyValue.G_RULE_LIST (fn _ => let + val (G_RULE as G_RULE1) = G_RULE1 () + in (G_RULE) +end) + in ( LrTable.NT 10, ( result, G_RULE1left, G_RULE1right), rest671) + +end +| ( 35, ( ( _, ( MlyValue.ID_LIST ID_LIST1, _, ID_LIST1right)) :: ( _ +, ( MlyValue.ID ID1, ID1left, _)) :: rest671)) => let val result = +MlyValue.ID_LIST (fn _ => let val (ID as ID1) = ID1 () + val (ID_LIST as ID_LIST1) = ID_LIST1 () + in (symbolMake ID :: ID_LIST) +end) + in ( LrTable.NT 2, ( result, ID1left, ID_LIST1right), rest671) +end +| ( 36, ( rest671)) => let val result = MlyValue.ID_LIST (fn _ => ( +nil)) + in ( LrTable.NT 2, ( result, defaultPos, defaultPos), rest671) +end +| ( 37, ( ( _, ( MlyValue.PROG PROG1, _, PROG1right)) :: ( _, ( +MlyValue.G_RULE_PREC G_RULE_PREC1, _, _)) :: ( _, ( MlyValue.ID_LIST +ID_LIST1, ID_LIST1left, _)) :: rest671)) => let val result = +MlyValue.RHS_LIST (fn _ => let val (ID_LIST as ID_LIST1) = ID_LIST1 + () + val (G_RULE_PREC as G_RULE_PREC1) = G_RULE_PREC1 () + val (PROG as PROG1) = PROG1 () + in ([{rhs=ID_LIST,code=PROG,prec=G_RULE_PREC}]) +end) + in ( LrTable.NT 8, ( result, ID_LIST1left, PROG1right), rest671) +end +| ( 38, ( ( _, ( MlyValue.PROG PROG1, _, PROG1right)) :: ( _, ( +MlyValue.G_RULE_PREC G_RULE_PREC1, _, _)) :: ( _, ( MlyValue.ID_LIST +ID_LIST1, _, _)) :: _ :: ( _, ( MlyValue.RHS_LIST RHS_LIST1, +RHS_LIST1left, _)) :: rest671)) => let val result = MlyValue.RHS_LIST + (fn _ => let val (RHS_LIST as RHS_LIST1) = RHS_LIST1 () + val (ID_LIST as ID_LIST1) = ID_LIST1 () + val (G_RULE_PREC as G_RULE_PREC1) = G_RULE_PREC1 () + val (PROG as PROG1) = PROG1 () + in ({rhs=ID_LIST,code=PROG,prec=G_RULE_PREC}::RHS_LIST) +end) + in ( LrTable.NT 8, ( result, RHS_LIST1left, PROG1right), rest671) +end +| ( 39, ( ( _, ( MlyValue.TYVAR TYVAR1, TYVAR1left, TYVAR1right)) :: +rest671)) => let val result = MlyValue.TY (fn _ => let val (TYVAR + as TYVAR1) = TYVAR1 () + in (TYVAR) +end) + in ( LrTable.NT 16, ( result, TYVAR1left, TYVAR1right), rest671) +end +| ( 40, ( ( _, ( _, _, RBRACE1right)) :: ( _, ( MlyValue.RECORD_LIST +RECORD_LIST1, _, _)) :: ( _, ( _, LBRACE1left, _)) :: rest671)) => let + val result = MlyValue.TY (fn _ => let val (RECORD_LIST as +RECORD_LIST1) = RECORD_LIST1 () + in ("{ "^RECORD_LIST^" } ") +end) + in ( LrTable.NT 16, ( result, LBRACE1left, RBRACE1right), rest671) + +end +| ( 41, ( ( _, ( _, _, RBRACE1right)) :: ( _, ( _, LBRACE1left, _)) + :: rest671)) => let val result = MlyValue.TY (fn _ => ("{}")) + in ( LrTable.NT 16, ( result, LBRACE1left, RBRACE1right), rest671) + +end +| ( 42, ( ( _, ( MlyValue.PROG PROG1, PROG1left, PROG1right)) :: +rest671)) => let val result = MlyValue.TY (fn _ => let val (PROG as +PROG1) = PROG1 () + in (" ( "^PROG^" ) ") +end) + in ( LrTable.NT 16, ( result, PROG1left, PROG1right), rest671) +end +| ( 43, ( ( _, ( MlyValue.QUAL_ID QUAL_ID1, _, QUAL_ID1right)) :: ( _ +, ( MlyValue.TY TY1, TY1left, _)) :: rest671)) => let val result = +MlyValue.TY (fn _ => let val (TY as TY1) = TY1 () + val (QUAL_ID as QUAL_ID1) = QUAL_ID1 () + in (TY^" "^QUAL_ID) +end) + in ( LrTable.NT 16, ( result, TY1left, QUAL_ID1right), rest671) +end +| ( 44, ( ( _, ( MlyValue.QUAL_ID QUAL_ID1, QUAL_ID1left, +QUAL_ID1right)) :: rest671)) => let val result = MlyValue.TY (fn _ => + let val (QUAL_ID as QUAL_ID1) = QUAL_ID1 () + in (QUAL_ID) +end) + in ( LrTable.NT 16, ( result, QUAL_ID1left, QUAL_ID1right), rest671) + +end +| ( 45, ( ( _, ( MlyValue.TY TY2, _, TY2right)) :: _ :: ( _, ( +MlyValue.TY TY1, TY1left, _)) :: rest671)) => let val result = +MlyValue.TY (fn _ => let val TY1 = TY1 () + val TY2 = TY2 () + in (TY1^"*"^TY2) +end) + in ( LrTable.NT 16, ( result, TY1left, TY2right), rest671) +end +| ( 46, ( ( _, ( MlyValue.TY TY2, _, TY2right)) :: _ :: ( _, ( +MlyValue.TY TY1, TY1left, _)) :: rest671)) => let val result = +MlyValue.TY (fn _ => let val TY1 = TY1 () + val TY2 = TY2 () + in (TY1 ^ " -> " ^ TY2) +end) + in ( LrTable.NT 16, ( result, TY1left, TY2right), rest671) +end +| ( 47, ( ( _, ( MlyValue.TY TY1, _, TY1right)) :: _ :: ( _, ( +MlyValue.LABEL LABEL1, _, _)) :: _ :: ( _, ( MlyValue.RECORD_LIST +RECORD_LIST1, RECORD_LIST1left, _)) :: rest671)) => let val result = +MlyValue.RECORD_LIST (fn _ => let val (RECORD_LIST as RECORD_LIST1) = + RECORD_LIST1 () + val (LABEL as LABEL1) = LABEL1 () + val (TY as TY1) = TY1 () + in (RECORD_LIST^","^LABEL^":"^TY) +end) + in ( LrTable.NT 7, ( result, RECORD_LIST1left, TY1right), rest671) + +end +| ( 48, ( ( _, ( MlyValue.TY TY1, _, TY1right)) :: _ :: ( _, ( +MlyValue.LABEL LABEL1, LABEL1left, _)) :: rest671)) => let val result + = MlyValue.RECORD_LIST (fn _ => let val (LABEL as LABEL1) = LABEL1 + () + val (TY as TY1) = TY1 () + in (LABEL^":"^TY) +end) + in ( LrTable.NT 7, ( result, LABEL1left, TY1right), rest671) +end +| ( 49, ( ( _, ( MlyValue.ID ID1, ID1left, ID1right)) :: rest671)) => + let val result = MlyValue.QUAL_ID (fn _ => let val (ID as ID1) = +ID1 () + in ((fn (a,_) => a) ID) +end) + in ( LrTable.NT 6, ( result, ID1left, ID1right), rest671) +end +| ( 50, ( ( _, ( MlyValue.QUAL_ID QUAL_ID1, _, QUAL_ID1right)) :: ( _ +, ( MlyValue.IDDOT IDDOT1, IDDOT1left, _)) :: rest671)) => let val +result = MlyValue.QUAL_ID (fn _ => let val (IDDOT as IDDOT1) = IDDOT1 + () + val (QUAL_ID as QUAL_ID1) = QUAL_ID1 () + in (IDDOT^QUAL_ID) +end) + in ( LrTable.NT 6, ( result, IDDOT1left, QUAL_ID1right), rest671) +end +| ( 51, ( ( _, ( MlyValue.ID ID1, ID1left, ID1right)) :: rest671)) => + let val result = MlyValue.LABEL (fn _ => let val (ID as ID1) = ID1 + () + in ((fn (a,_) => a) ID) +end) + in ( LrTable.NT 3, ( result, ID1left, ID1right), rest671) +end +| ( 52, ( ( _, ( MlyValue.INT INT1, INT1left, INT1right)) :: rest671) +) => let val result = MlyValue.LABEL (fn _ => let val (INT as INT1) + = INT1 () + in (INT) +end) + in ( LrTable.NT 3, ( result, INT1left, INT1right), rest671) +end +| ( 53, ( ( _, ( MlyValue.ID ID1, _, ID1right)) :: ( _, ( _, +PREC_TAG1left, _)) :: rest671)) => let val result = +MlyValue.G_RULE_PREC (fn _ => let val (ID as ID1) = ID1 () + in (SOME (symbolMake ID)) +end) + in ( LrTable.NT 11, ( result, PREC_TAG1left, ID1right), rest671) +end +| ( 54, ( rest671)) => let val result = MlyValue.G_RULE_PREC (fn _ + => (NONE)) + in ( LrTable.NT 11, ( result, defaultPos, defaultPos), rest671) +end +| _ => raise (mlyAction i392) +end +val void = MlyValue.VOID +val extract = fn a => (fn MlyValue.BEGIN x => x +| _ => let exception ParseInternal + in raise ParseInternal end) a () +end +end +structure Tokens : Mlyacc_TOKENS = +struct +type svalue = ParserData.svalue +type ('a,'b) token = ('a,'b) Token.token +fun ARROW (p1,p2) = Token.TOKEN (ParserData.LrTable.T 0,( +ParserData.MlyValue.VOID,p1,p2)) +fun ASTERISK (p1,p2) = Token.TOKEN (ParserData.LrTable.T 1,( +ParserData.MlyValue.VOID,p1,p2)) +fun BLOCK (p1,p2) = Token.TOKEN (ParserData.LrTable.T 2,( +ParserData.MlyValue.VOID,p1,p2)) +fun BAR (p1,p2) = Token.TOKEN (ParserData.LrTable.T 3,( +ParserData.MlyValue.VOID,p1,p2)) +fun CHANGE (p1,p2) = Token.TOKEN (ParserData.LrTable.T 4,( +ParserData.MlyValue.VOID,p1,p2)) +fun COLON (p1,p2) = Token.TOKEN (ParserData.LrTable.T 5,( +ParserData.MlyValue.VOID,p1,p2)) +fun COMMA (p1,p2) = Token.TOKEN (ParserData.LrTable.T 6,( +ParserData.MlyValue.VOID,p1,p2)) +fun DELIMITER (p1,p2) = Token.TOKEN (ParserData.LrTable.T 7,( +ParserData.MlyValue.VOID,p1,p2)) +fun EOF (p1,p2) = Token.TOKEN (ParserData.LrTable.T 8,( +ParserData.MlyValue.VOID,p1,p2)) +fun FOR (p1,p2) = Token.TOKEN (ParserData.LrTable.T 9,( +ParserData.MlyValue.VOID,p1,p2)) +fun HEADER (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 10,( +ParserData.MlyValue.HEADER (fn () => i),p1,p2)) +fun ID (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 11,( +ParserData.MlyValue.ID (fn () => i),p1,p2)) +fun IDDOT (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 12,( +ParserData.MlyValue.IDDOT (fn () => i),p1,p2)) +fun PERCENT_HEADER (p1,p2) = Token.TOKEN (ParserData.LrTable.T 13,( +ParserData.MlyValue.VOID,p1,p2)) +fun INT (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 14,( +ParserData.MlyValue.INT (fn () => i),p1,p2)) +fun KEYWORD (p1,p2) = Token.TOKEN (ParserData.LrTable.T 15,( +ParserData.MlyValue.VOID,p1,p2)) +fun LBRACE (p1,p2) = Token.TOKEN (ParserData.LrTable.T 16,( +ParserData.MlyValue.VOID,p1,p2)) +fun LPAREN (p1,p2) = Token.TOKEN (ParserData.LrTable.T 17,( +ParserData.MlyValue.VOID,p1,p2)) +fun NAME (p1,p2) = Token.TOKEN (ParserData.LrTable.T 18,( +ParserData.MlyValue.VOID,p1,p2)) +fun NODEFAULT (p1,p2) = Token.TOKEN (ParserData.LrTable.T 19,( +ParserData.MlyValue.VOID,p1,p2)) +fun NONTERM (p1,p2) = Token.TOKEN (ParserData.LrTable.T 20,( +ParserData.MlyValue.VOID,p1,p2)) +fun NOSHIFT (p1,p2) = Token.TOKEN (ParserData.LrTable.T 21,( +ParserData.MlyValue.VOID,p1,p2)) +fun OF (p1,p2) = Token.TOKEN (ParserData.LrTable.T 22,( +ParserData.MlyValue.VOID,p1,p2)) +fun PERCENT_EOP (p1,p2) = Token.TOKEN (ParserData.LrTable.T 23,( +ParserData.MlyValue.VOID,p1,p2)) +fun PERCENT_PURE (p1,p2) = Token.TOKEN (ParserData.LrTable.T 24,( +ParserData.MlyValue.VOID,p1,p2)) +fun PERCENT_POS (p1,p2) = Token.TOKEN (ParserData.LrTable.T 25,( +ParserData.MlyValue.VOID,p1,p2)) +fun PERCENT_ARG (p1,p2) = Token.TOKEN (ParserData.LrTable.T 26,( +ParserData.MlyValue.VOID,p1,p2)) +fun PERCENT_TOKEN_SIG_INFO (p1,p2) = Token.TOKEN ( +ParserData.LrTable.T 27,(ParserData.MlyValue.VOID,p1,p2)) +fun PREC (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 28,( +ParserData.MlyValue.PREC (fn () => i),p1,p2)) +fun PREC_TAG (p1,p2) = Token.TOKEN (ParserData.LrTable.T 29,( +ParserData.MlyValue.VOID,p1,p2)) +fun PREFER (p1,p2) = Token.TOKEN (ParserData.LrTable.T 30,( +ParserData.MlyValue.VOID,p1,p2)) +fun PROG (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 31,( +ParserData.MlyValue.PROG (fn () => i),p1,p2)) +fun RBRACE (p1,p2) = Token.TOKEN (ParserData.LrTable.T 32,( +ParserData.MlyValue.VOID,p1,p2)) +fun RPAREN (p1,p2) = Token.TOKEN (ParserData.LrTable.T 33,( +ParserData.MlyValue.VOID,p1,p2)) +fun SUBST (p1,p2) = Token.TOKEN (ParserData.LrTable.T 34,( +ParserData.MlyValue.VOID,p1,p2)) +fun START (p1,p2) = Token.TOKEN (ParserData.LrTable.T 35,( +ParserData.MlyValue.VOID,p1,p2)) +fun TERM (p1,p2) = Token.TOKEN (ParserData.LrTable.T 36,( +ParserData.MlyValue.VOID,p1,p2)) +fun TYVAR (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 37,( +ParserData.MlyValue.TYVAR (fn () => i),p1,p2)) +fun VERBOSE (p1,p2) = Token.TOKEN (ParserData.LrTable.T 38,( +ParserData.MlyValue.VOID,p1,p2)) +fun VALUE (p1,p2) = Token.TOKEN (ParserData.LrTable.T 39,( +ParserData.MlyValue.VOID,p1,p2)) +fun UNKNOWN (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 40,( +ParserData.MlyValue.UNKNOWN (fn () => i),p1,p2)) +fun BOGUS_VALUE (p1,p2) = Token.TOKEN (ParserData.LrTable.T 41,( +ParserData.MlyValue.VOID,p1,p2)) +end +end diff --git a/ml-yacc/src/yacc.lex b/ml-yacc/src/yacc.lex new file mode 100644 index 0000000..2775d51 --- /dev/null +++ b/ml-yacc/src/yacc.lex @@ -0,0 +1,140 @@ +(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi + + yacc.lex: Lexer specification + *) + +structure Tokens = Tokens +type svalue = Tokens.svalue +type pos = int +type ('a,'b) token = ('a,'b) Tokens.token +type lexresult = (svalue,pos) token + +type lexarg = Hdr.inputSource +type arg = lexarg + +open Tokens +val error = Hdr.error +val lineno = Hdr.lineno +val text = Hdr.text + +val pcount = ref 0 +val commentLevel = ref 0 +val actionstart = ref 0 + +val eof = fn i => (if (!pcount)>0 then + error i (!actionstart) + " eof encountered in action beginning here !" + else (); EOF(!lineno,!lineno)) + +val Add = fn s => (text := s::(!text)) + + +local val dict = [("%prec",PREC_TAG),("%term",TERM), + ("%nonterm",NONTERM), ("%eop",PERCENT_EOP),("%start",START), + ("%prefer",PREFER),("%subst",SUBST),("%change",CHANGE), + ("%keyword",KEYWORD),("%name",NAME), + ("%verbose",VERBOSE), ("%nodefault",NODEFAULT), + ("%value",VALUE), ("%noshift",NOSHIFT), + ("%header",PERCENT_HEADER),("%pure",PERCENT_PURE), + ("%token_sig_info",PERCENT_TOKEN_SIG_INFO), + ("%arg",PERCENT_ARG), + ("%pos",PERCENT_POS)] +in +fun lookup (s,left,right) = let + fun f ((a,d)::b) = if a=s then d(left,right) else f b + | f nil = UNKNOWN(s,left,right) + in + f dict + end +end + +fun inc (ri as ref i) = (ri := i+1) +fun dec (ri as ref i) = (ri := i-1) + +%% +%header ( +functor LexMLYACC(structure Tokens : Mlyacc_TOKENS + structure Hdr : HEADER (* = Header *) + where type prec = Header.prec + and type inputSource = Header.inputSource) : ARG_LEXER +); +%arg (inputSource); +%s A CODE F COMMENT STRING EMPTYCOMMENT; +ws = [\t\ ]+; +eol=("\n"|"\013\n"|"\013"); +idchars = [A-Za-z_'0-9]; +id=[A-Za-z]{idchars}*; +tyvar="'"{idchars}*; +qualid ={id}"."; +%% +"(*" => (Add yytext; YYBEGIN COMMENT; commentLevel := 1; + continue(); YYBEGIN INITIAL; continue()); +"(*" => (YYBEGIN EMPTYCOMMENT; commentLevel := 1; continue()); +"(*" => (Add yytext; YYBEGIN COMMENT; commentLevel := 1; + continue(); YYBEGIN CODE; continue()); +[^(%\013\n]+ => (Add yytext; continue()); +"%%" => (YYBEGIN A; HEADER (concat (rev (!text)),!lineno,!lineno)); +{eol} => (Add yytext; inc lineno; continue()); +. => (Add yytext; continue()); + +{eol} => (inc lineno; continue ()); +{ws}+ => (continue()); +of => (OF(!lineno,!lineno)); +for => (FOR(!lineno,!lineno)); +"{" => (LBRACE(!lineno,!lineno)); +"}" => (RBRACE(!lineno,!lineno)); +"," => (COMMA(!lineno,!lineno)); +"*" => (ASTERISK(!lineno,!lineno)); +"->" => (ARROW(!lineno,!lineno)); +"%left" => (PREC(Hdr.LEFT,!lineno,!lineno)); +"%right" => (PREC(Hdr.RIGHT,!lineno,!lineno)); +"%nonassoc" => (PREC(Hdr.NONASSOC,!lineno,!lineno)); +"%"[a-z_]+ => (lookup(yytext,!lineno,!lineno)); +{tyvar} => (TYVAR(yytext,!lineno,!lineno)); +{qualid} => (IDDOT(yytext,!lineno,!lineno)); +[0-9]+ => (INT (yytext,!lineno,!lineno)); +"%%" => (DELIMITER(!lineno,!lineno)); +":" => (COLON(!lineno,!lineno)); +"|" => (BAR(!lineno,!lineno)); +{id} => (ID ((yytext,!lineno),!lineno,!lineno)); +"(" => (pcount := 1; actionstart := (!lineno); + text := nil; YYBEGIN CODE; continue() before YYBEGIN A); +. => (UNKNOWN(yytext,!lineno,!lineno)); +"(" => (inc pcount; Add yytext; continue()); +")" => (dec pcount; + if !pcount = 0 then + PROG (concat (rev (!text)),!lineno,!lineno) + else (Add yytext; continue())); +"\"" => (Add yytext; YYBEGIN STRING; continue()); +[^()"\n\013]+ => (Add yytext; continue()); + +[(*)] => (Add yytext; continue()); +"*)" => (Add yytext; dec commentLevel; + if !commentLevel=0 + then BOGUS_VALUE(!lineno,!lineno) + else continue() + ); +"(*" => (Add yytext; inc commentLevel; continue()); +[^*()\n\013]+ => (Add yytext; continue()); + +[(*)] => (continue()); +"*)" => (dec commentLevel; + if !commentLevel=0 then YYBEGIN A else (); + continue ()); +"(*" => (inc commentLevel; continue()); +[^*()\n\013]+ => (continue()); + +"\"" => (Add yytext; YYBEGIN CODE; continue()); +\\ => (Add yytext; continue()); +{eol} => (Add yytext; error inputSource (!lineno) "unclosed string"; + inc lineno; YYBEGIN CODE; continue()); +[^"\\\n\013]+ => (Add yytext; continue()); +\\\" => (Add yytext; continue()); +\\{eol} => (Add yytext; inc lineno; YYBEGIN F; continue()); +\\[\ \t] => (Add yytext; YYBEGIN F; continue()); + +{ws} => (Add yytext; continue()); +\\ => (Add yytext; YYBEGIN STRING; continue()); +. => (Add yytext; error inputSource (!lineno) "unclosed string"; + YYBEGIN CODE; continue()); + diff --git a/ml-yacc/src/yacc.lex.sml b/ml-yacc/src/yacc.lex.sml new file mode 100644 index 0000000..4c48fa3 --- /dev/null +++ b/ml-yacc/src/yacc.lex.sml @@ -0,0 +1,1808 @@ + +functor LexMLYACC(structure Tokens : Mlyacc_TOKENS + structure Hdr : HEADER (* = Header *) + where type prec = Header.prec + and type inputSource = Header.inputSource) : ARG_LEXER + = struct + + structure yyInput : sig + + type stream + val mkStream : (int -> string) -> stream + val fromStream : TextIO.StreamIO.instream -> stream + val getc : stream -> (Char.char * stream) option + val getpos : stream -> int + val getlineNo : stream -> int + val subtract : stream * stream -> string + val eof : stream -> bool + val lastWasNL : stream -> bool + + end = struct + + structure TIO = TextIO + structure TSIO = TIO.StreamIO + structure TPIO = TextPrimIO + + datatype stream = Stream of { + strm : TSIO.instream, + id : int, (* track which streams originated + * from the same stream *) + pos : int, + lineNo : int, + lastWasNL : bool + } + + local + val next = ref 0 + in + fun nextId() = !next before (next := !next + 1) + end + + val initPos = 2 (* ml-lex bug compatibility *) + + fun mkStream inputN = let + val strm = TSIO.mkInstream + (TPIO.RD { + name = "lexgen", + chunkSize = 4096, + readVec = SOME inputN, + readArr = NONE, + readVecNB = NONE, + readArrNB = NONE, + block = NONE, + canInput = NONE, + avail = (fn () => NONE), + getPos = NONE, + setPos = NONE, + endPos = NONE, + verifyPos = NONE, + close = (fn () => ()), + ioDesc = NONE + }, "") + in + Stream {strm = strm, id = nextId(), pos = initPos, lineNo = 1, + lastWasNL = true} + end + + fun fromStream strm = Stream { + strm = strm, id = nextId(), pos = initPos, lineNo = 1, lastWasNL = true + } + + fun getc (Stream {strm, pos, id, lineNo, ...}) = (case TSIO.input1 strm + of NONE => NONE + | SOME (c, strm') => + SOME (c, Stream { + strm = strm', + pos = pos+1, + id = id, + lineNo = lineNo + + (if c = #"\n" then 1 else 0), + lastWasNL = (c = #"\n") + }) + (* end case*)) + + fun getpos (Stream {pos, ...}) = pos + + fun getlineNo (Stream {lineNo, ...}) = lineNo + + fun subtract (new, old) = let + val Stream {strm = strm, pos = oldPos, id = oldId, ...} = old + val Stream {pos = newPos, id = newId, ...} = new + val (diff, _) = if newId = oldId andalso newPos >= oldPos + then TSIO.inputN (strm, newPos - oldPos) + else raise Fail + "BUG: yyInput: attempted to subtract incompatible streams" + in + diff + end + + fun eof s = not (isSome (getc s)) + + fun lastWasNL (Stream {lastWasNL, ...}) = lastWasNL + + end + + datatype yystart_state = +A | F | CODE | STRING | COMMENT | EMPTYCOMMENT | INITIAL + structure UserDeclarations = + struct + +(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi + + yacc.lex: Lexer specification + *) + +structure Tokens = Tokens +type svalue = Tokens.svalue +type pos = int +type ('a,'b) token = ('a,'b) Tokens.token +type lexresult = (svalue,pos) token + +type lexarg = Hdr.inputSource +type arg = lexarg + +open Tokens +val error = Hdr.error +val lineno = Hdr.lineno +val text = Hdr.text + +val pcount = ref 0 +val commentLevel = ref 0 +val actionstart = ref 0 + +val eof = fn i => (if (!pcount)>0 then + error i (!actionstart) + " eof encountered in action beginning here !" + else (); EOF(!lineno,!lineno)) + +val Add = fn s => (text := s::(!text)) + + +local val dict = [("%prec",PREC_TAG),("%term",TERM), + ("%nonterm",NONTERM), ("%eop",PERCENT_EOP),("%start",START), + ("%prefer",PREFER),("%subst",SUBST),("%change",CHANGE), + ("%keyword",KEYWORD),("%name",NAME), + ("%verbose",VERBOSE), ("%nodefault",NODEFAULT), + ("%value",VALUE), ("%noshift",NOSHIFT), + ("%header",PERCENT_HEADER),("%pure",PERCENT_PURE), + ("%token_sig_info",PERCENT_TOKEN_SIG_INFO), + ("%arg",PERCENT_ARG), + ("%pos",PERCENT_POS)] +in +fun lookup (s,left,right) = let + fun f ((a,d)::b) = if a=s then d(left,right) else f b + | f nil = UNKNOWN(s,left,right) + in + f dict + end +end + +fun inc (ri as ref i) = (ri := i+1) +fun dec (ri as ref i) = (ri := i-1) + + + + end + + datatype yymatch + = yyNO_MATCH + | yyMATCH of yyInput.stream * action * yymatch + withtype action = yyInput.stream * yymatch -> UserDeclarations.lexresult + + local + + val yytable = +Vector.fromList [] + fun mk yyins = let + (* current start state *) + val yyss = ref INITIAL + fun YYBEGIN ss = (yyss := ss) + (* current input stream *) + val yystrm = ref yyins + (* get one char of input *) + val yygetc = yyInput.getc + (* create yytext *) + fun yymktext(strm) = yyInput.subtract (strm, !yystrm) + open UserDeclarations + fun lex +(yyarg as (inputSource)) () = let + fun continue() = let + val yylastwasn = yyInput.lastWasNL (!yystrm) + fun yystuck (yyNO_MATCH) = raise Fail "stuck state" + | yystuck (yyMATCH (strm, action, old)) = + action (strm, old) + val yypos = yyInput.getpos (!yystrm) + val yygetlineNo = yyInput.getlineNo + fun yyactsToMatches (strm, [], oldMatches) = oldMatches + | yyactsToMatches (strm, act::acts, oldMatches) = + yyMATCH (strm, act, yyactsToMatches (strm, acts, oldMatches)) + fun yygo actTable = + (fn (~1, _, oldMatches) => yystuck oldMatches + | (curState, strm, oldMatches) => let + val (transitions, finals') = Vector.sub (yytable, curState) + val finals = map (fn i => Vector.sub (actTable, i)) finals' + fun tryfinal() = + yystuck (yyactsToMatches (strm, finals, oldMatches)) + fun find (c, []) = NONE + | find (c, (c1, c2, s)::ts) = + if c1 <= c andalso c <= c2 then SOME s + else find (c, ts) + in case yygetc strm + of SOME(c, strm') => + (case find (c, transitions) + of NONE => tryfinal() + | SOME n => + yygo actTable + (n, strm', + yyactsToMatches (strm, finals, oldMatches))) + | NONE => tryfinal() + end) + in +let +fun yyAction0 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; + (Add yytext; YYBEGIN COMMENT; commentLevel := 1; + continue(); YYBEGIN INITIAL; continue()) + end +fun yyAction1 (strm, lastMatch : yymatch) = (yystrm := strm; + (YYBEGIN EMPTYCOMMENT; commentLevel := 1; continue())) +fun yyAction2 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; + (Add yytext; YYBEGIN COMMENT; commentLevel := 1; + continue(); YYBEGIN CODE; continue()) + end +fun yyAction3 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (Add yytext; continue()) + end +fun yyAction4 (strm, lastMatch : yymatch) = (yystrm := strm; + (YYBEGIN A; HEADER (concat (rev (!text)),!lineno,!lineno))) +fun yyAction5 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (Add yytext; inc lineno; continue()) + end +fun yyAction6 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (Add yytext; continue()) + end +fun yyAction7 (strm, lastMatch : yymatch) = (yystrm := strm; + (inc lineno; continue ())) +fun yyAction8 (strm, lastMatch : yymatch) = (yystrm := strm; (continue())) +fun yyAction9 (strm, lastMatch : yymatch) = (yystrm := strm; + (OF(!lineno,!lineno))) +fun yyAction10 (strm, lastMatch : yymatch) = (yystrm := strm; + (FOR(!lineno,!lineno))) +fun yyAction11 (strm, lastMatch : yymatch) = (yystrm := strm; + (LBRACE(!lineno,!lineno))) +fun yyAction12 (strm, lastMatch : yymatch) = (yystrm := strm; + (RBRACE(!lineno,!lineno))) +fun yyAction13 (strm, lastMatch : yymatch) = (yystrm := strm; + (COMMA(!lineno,!lineno))) +fun yyAction14 (strm, lastMatch : yymatch) = (yystrm := strm; + (ASTERISK(!lineno,!lineno))) +fun yyAction15 (strm, lastMatch : yymatch) = (yystrm := strm; + (ARROW(!lineno,!lineno))) +fun yyAction16 (strm, lastMatch : yymatch) = (yystrm := strm; + (PREC(Hdr.LEFT,!lineno,!lineno))) +fun yyAction17 (strm, lastMatch : yymatch) = (yystrm := strm; + (PREC(Hdr.RIGHT,!lineno,!lineno))) +fun yyAction18 (strm, lastMatch : yymatch) = (yystrm := strm; + (PREC(Hdr.NONASSOC,!lineno,!lineno))) +fun yyAction19 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (lookup(yytext,!lineno,!lineno)) + end +fun yyAction20 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (TYVAR(yytext,!lineno,!lineno)) + end +fun yyAction21 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (IDDOT(yytext,!lineno,!lineno)) + end +fun yyAction22 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (INT (yytext,!lineno,!lineno)) + end +fun yyAction23 (strm, lastMatch : yymatch) = (yystrm := strm; + (DELIMITER(!lineno,!lineno))) +fun yyAction24 (strm, lastMatch : yymatch) = (yystrm := strm; + (COLON(!lineno,!lineno))) +fun yyAction25 (strm, lastMatch : yymatch) = (yystrm := strm; + (BAR(!lineno,!lineno))) +fun yyAction26 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (ID ((yytext,!lineno),!lineno,!lineno)) + end +fun yyAction27 (strm, lastMatch : yymatch) = (yystrm := strm; + (pcount := 1; actionstart := (!lineno); + text := nil; YYBEGIN CODE; continue() before YYBEGIN A)) +fun yyAction28 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (UNKNOWN(yytext,!lineno,!lineno)) + end +fun yyAction29 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (inc pcount; Add yytext; continue()) + end +fun yyAction30 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; + (dec pcount; + if !pcount = 0 then + PROG (concat (rev (!text)),!lineno,!lineno) + else (Add yytext; continue())) + end +fun yyAction31 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (Add yytext; YYBEGIN STRING; continue()) + end +fun yyAction32 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (Add yytext; continue()) + end +fun yyAction33 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (Add yytext; continue()) + end +fun yyAction34 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; + (Add yytext; dec commentLevel; + if !commentLevel=0 + then BOGUS_VALUE(!lineno,!lineno) + else continue() + ) + end +fun yyAction35 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (Add yytext; inc commentLevel; continue()) + end +fun yyAction36 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (Add yytext; continue()) + end +fun yyAction37 (strm, lastMatch : yymatch) = (yystrm := strm; (continue())) +fun yyAction38 (strm, lastMatch : yymatch) = (yystrm := strm; + (dec commentLevel; + if !commentLevel=0 then YYBEGIN A else (); + continue ())) +fun yyAction39 (strm, lastMatch : yymatch) = (yystrm := strm; + (inc commentLevel; continue())) +fun yyAction40 (strm, lastMatch : yymatch) = (yystrm := strm; (continue())) +fun yyAction41 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (Add yytext; YYBEGIN CODE; continue()) + end +fun yyAction42 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (Add yytext; continue()) + end +fun yyAction43 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; + (Add yytext; error inputSource (!lineno) "unclosed string"; + inc lineno; YYBEGIN CODE; continue()) + end +fun yyAction44 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (Add yytext; continue()) + end +fun yyAction45 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (Add yytext; continue()) + end +fun yyAction46 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (Add yytext; inc lineno; YYBEGIN F; continue()) + end +fun yyAction47 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (Add yytext; YYBEGIN F; continue()) + end +fun yyAction48 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (Add yytext; continue()) + end +fun yyAction49 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (Add yytext; YYBEGIN STRING; continue()) + end +fun yyAction50 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; + (Add yytext; error inputSource (!lineno) "unclosed string"; + YYBEGIN CODE; continue()) + end +fun yyQ91 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction0(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction0(strm, yyNO_MATCH) + (* end case *)) +fun yyQ90 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction6(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"*" + then yyQ91(strm', yyMATCH(strm, yyAction6, yyNO_MATCH)) + else yyAction6(strm, yyNO_MATCH) + (* end case *)) +fun yyQ92 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction4(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction4(strm, yyNO_MATCH) + (* end case *)) +fun yyQ89 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction6(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"%" + then yyQ92(strm', yyMATCH(strm, yyAction6, yyNO_MATCH)) + else yyAction6(strm, yyNO_MATCH) + (* end case *)) +fun yyQ56 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction5(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction5(strm, yyNO_MATCH) + (* end case *)) +fun yyQ88 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction5(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\n" + then yyQ56(strm', yyMATCH(strm, yyAction5, yyNO_MATCH)) + else yyAction5(strm, yyNO_MATCH) + (* end case *)) +fun yyQ93 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction3(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\^N" + then yyQ93(strm', yyMATCH(strm, yyAction3, yyNO_MATCH)) + else if inp < #"\^N" + then if inp = #"\v" + then yyQ93(strm', yyMATCH(strm, yyAction3, yyNO_MATCH)) + else if inp < #"\v" + then if inp = #"\n" + then yyAction3(strm, yyNO_MATCH) + else yyQ93(strm', yyMATCH(strm, yyAction3, yyNO_MATCH)) + else if inp = #"\r" + then yyAction3(strm, yyNO_MATCH) + else yyQ93(strm', yyMATCH(strm, yyAction3, yyNO_MATCH)) + else if inp = #"&" + then yyQ93(strm', yyMATCH(strm, yyAction3, yyNO_MATCH)) + else if inp < #"&" + then if inp = #"%" + then yyAction3(strm, yyNO_MATCH) + else yyQ93(strm', yyMATCH(strm, yyAction3, yyNO_MATCH)) + else if inp = #"(" + then yyAction3(strm, yyNO_MATCH) + else yyQ93(strm', yyMATCH(strm, yyAction3, yyNO_MATCH)) + (* end case *)) +fun yyQ87 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction3(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\^N" + then yyQ93(strm', yyMATCH(strm, yyAction3, yyNO_MATCH)) + else if inp < #"\^N" + then if inp = #"\v" + then yyQ93(strm', yyMATCH(strm, yyAction3, yyNO_MATCH)) + else if inp < #"\v" + then if inp = #"\n" + then yyAction3(strm, yyNO_MATCH) + else yyQ93(strm', yyMATCH(strm, yyAction3, yyNO_MATCH)) + else if inp = #"\r" + then yyAction3(strm, yyNO_MATCH) + else yyQ93(strm', yyMATCH(strm, yyAction3, yyNO_MATCH)) + else if inp = #"&" + then yyQ93(strm', yyMATCH(strm, yyAction3, yyNO_MATCH)) + else if inp < #"&" + then if inp = #"%" + then yyAction3(strm, yyNO_MATCH) + else yyQ93(strm', yyMATCH(strm, yyAction3, yyNO_MATCH)) + else if inp = #"(" + then yyAction3(strm, yyNO_MATCH) + else yyQ93(strm', yyMATCH(strm, yyAction3, yyNO_MATCH)) + (* end case *)) +fun yyQ6 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"\^N" + then yyQ87(strm', lastMatch) + else if inp < #"\^N" + then if inp = #"\v" + then yyQ87(strm', lastMatch) + else if inp < #"\v" + then if inp = #"\n" + then yyQ56(strm', lastMatch) + else yyQ87(strm', lastMatch) + else if inp = #"\r" + then yyQ88(strm', lastMatch) + else yyQ87(strm', lastMatch) + else if inp = #"&" + then yyQ87(strm', lastMatch) + else if inp < #"&" + then if inp = #"%" + then yyQ89(strm', lastMatch) + else yyQ87(strm', lastMatch) + else if inp = #"(" + then yyQ90(strm', lastMatch) + else yyQ87(strm', lastMatch) + (* end case *)) +fun yyQ85 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction38(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction38(strm, yyNO_MATCH) + (* end case *)) +fun yyQ84 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction37(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #")" + then yyQ85(strm', yyMATCH(strm, yyAction37, yyNO_MATCH)) + else yyAction37(strm, yyNO_MATCH) + (* end case *)) +fun yyQ83 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction37(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction37(strm, yyNO_MATCH) + (* end case *)) +fun yyQ86 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction39(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction39(strm, yyNO_MATCH) + (* end case *)) +fun yyQ82 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction37(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"*" + then yyQ86(strm', yyMATCH(strm, yyAction37, yyNO_MATCH)) + else yyAction37(strm, yyNO_MATCH) + (* end case *)) +fun yyQ61 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction5(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\n" + then yyQ56(strm', yyMATCH(strm, yyAction5, yyNO_MATCH)) + else yyAction5(strm, yyNO_MATCH) + (* end case *)) +fun yyQ81 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction40(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\r" + then yyAction40(strm, yyNO_MATCH) + else if inp < #"\r" + then if inp = #"\n" + then yyAction40(strm, yyNO_MATCH) + else yyQ81(strm', yyMATCH(strm, yyAction40, yyNO_MATCH)) + else if inp = #"(" + then yyAction40(strm, yyNO_MATCH) + else if inp < #"(" + then yyQ81(strm', yyMATCH(strm, yyAction40, yyNO_MATCH)) + else if inp <= #"*" + then yyAction40(strm, yyNO_MATCH) + else yyQ81(strm', yyMATCH(strm, yyAction40, yyNO_MATCH)) + (* end case *)) +fun yyQ5 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"\^N" + then yyQ81(strm', lastMatch) + else if inp < #"\^N" + then if inp = #"\v" + then yyQ81(strm', lastMatch) + else if inp < #"\v" + then if inp = #"\n" + then yyQ56(strm', lastMatch) + else yyQ81(strm', lastMatch) + else if inp = #"\r" + then yyQ61(strm', lastMatch) + else yyQ81(strm', lastMatch) + else if inp = #")" + then yyQ83(strm', lastMatch) + else if inp < #")" + then if inp = #"(" + then yyQ82(strm', lastMatch) + else yyQ81(strm', lastMatch) + else if inp = #"*" + then yyQ84(strm', lastMatch) + else yyQ81(strm', lastMatch) + (* end case *)) +fun yyQ79 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction34(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction34(strm, yyNO_MATCH) + (* end case *)) +fun yyQ78 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction33(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #")" + then yyQ79(strm', yyMATCH(strm, yyAction33, yyNO_MATCH)) + else yyAction33(strm, yyNO_MATCH) + (* end case *)) +fun yyQ77 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction33(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction33(strm, yyNO_MATCH) + (* end case *)) +fun yyQ80 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction35(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction35(strm, yyNO_MATCH) + (* end case *)) +fun yyQ76 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction33(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"*" + then yyQ80(strm', yyMATCH(strm, yyAction33, yyNO_MATCH)) + else yyAction33(strm, yyNO_MATCH) + (* end case *)) +fun yyQ75 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction36(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\r" + then yyAction36(strm, yyNO_MATCH) + else if inp < #"\r" + then if inp = #"\n" + then yyAction36(strm, yyNO_MATCH) + else yyQ75(strm', yyMATCH(strm, yyAction36, yyNO_MATCH)) + else if inp = #"(" + then yyAction36(strm, yyNO_MATCH) + else if inp < #"(" + then yyQ75(strm', yyMATCH(strm, yyAction36, yyNO_MATCH)) + else if inp <= #"*" + then yyAction36(strm, yyNO_MATCH) + else yyQ75(strm', yyMATCH(strm, yyAction36, yyNO_MATCH)) + (* end case *)) +fun yyQ4 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"\^N" + then yyQ75(strm', lastMatch) + else if inp < #"\^N" + then if inp = #"\v" + then yyQ75(strm', lastMatch) + else if inp < #"\v" + then if inp = #"\n" + then yyQ56(strm', lastMatch) + else yyQ75(strm', lastMatch) + else if inp = #"\r" + then yyQ61(strm', lastMatch) + else yyQ75(strm', lastMatch) + else if inp = #")" + then yyQ77(strm', lastMatch) + else if inp < #")" + then if inp = #"(" + then yyQ76(strm', lastMatch) + else yyQ75(strm', lastMatch) + else if inp = #"*" + then yyQ78(strm', lastMatch) + else yyQ75(strm', lastMatch) + (* end case *)) +fun yyQ74 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction45(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction45(strm, yyNO_MATCH) + (* end case *)) +fun yyQ72 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction46(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction46(strm, yyNO_MATCH) + (* end case *)) +fun yyQ73 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction46(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\n" + then yyQ72(strm', yyMATCH(strm, yyAction46, yyNO_MATCH)) + else yyAction46(strm, yyNO_MATCH) + (* end case *)) +fun yyQ71 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction47(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction47(strm, yyNO_MATCH) + (* end case *)) +fun yyQ70 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction42(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\^N" + then yyAction42(strm, yyNO_MATCH) + else if inp < #"\^N" + then if inp = #"\n" + then yyQ72(strm', yyMATCH(strm, yyAction42, yyNO_MATCH)) + else if inp < #"\n" + then if inp = #"\t" + then yyQ71(strm', yyMATCH(strm, yyAction42, yyNO_MATCH)) + else yyAction42(strm, yyNO_MATCH) + else if inp = #"\r" + then yyQ73(strm', yyMATCH(strm, yyAction42, yyNO_MATCH)) + else yyAction42(strm, yyNO_MATCH) + else if inp = #"!" + then yyAction42(strm, yyNO_MATCH) + else if inp < #"!" + then if inp = #" " + then yyQ71(strm', yyMATCH(strm, yyAction42, yyNO_MATCH)) + else yyAction42(strm, yyNO_MATCH) + else if inp = #"\"" + then yyQ74(strm', yyMATCH(strm, yyAction42, yyNO_MATCH)) + else yyAction42(strm, yyNO_MATCH) + (* end case *)) +fun yyQ69 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction41(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction41(strm, yyNO_MATCH) + (* end case *)) +fun yyQ67 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction43(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction43(strm, yyNO_MATCH) + (* end case *)) +fun yyQ68 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction43(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\n" + then yyQ67(strm', yyMATCH(strm, yyAction43, yyNO_MATCH)) + else yyAction43(strm, yyNO_MATCH) + (* end case *)) +fun yyQ66 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction44(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\^N" + then yyQ66(strm', yyMATCH(strm, yyAction44, yyNO_MATCH)) + else if inp < #"\^N" + then if inp = #"\v" + then yyQ66(strm', yyMATCH(strm, yyAction44, yyNO_MATCH)) + else if inp < #"\v" + then if inp = #"\n" + then yyAction44(strm, yyNO_MATCH) + else yyQ66(strm', yyMATCH(strm, yyAction44, yyNO_MATCH)) + else if inp = #"\r" + then yyAction44(strm, yyNO_MATCH) + else yyQ66(strm', yyMATCH(strm, yyAction44, yyNO_MATCH)) + else if inp = #"#" + then yyQ66(strm', yyMATCH(strm, yyAction44, yyNO_MATCH)) + else if inp < #"#" + then if inp = #"\"" + then yyAction44(strm, yyNO_MATCH) + else yyQ66(strm', yyMATCH(strm, yyAction44, yyNO_MATCH)) + else if inp = #"\\" + then yyAction44(strm, yyNO_MATCH) + else yyQ66(strm', yyMATCH(strm, yyAction44, yyNO_MATCH)) + (* end case *)) +fun yyQ3 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"\^N" + then yyQ66(strm', lastMatch) + else if inp < #"\^N" + then if inp = #"\v" + then yyQ66(strm', lastMatch) + else if inp < #"\v" + then if inp = #"\n" + then yyQ67(strm', lastMatch) + else yyQ66(strm', lastMatch) + else if inp = #"\r" + then yyQ68(strm', lastMatch) + else yyQ66(strm', lastMatch) + else if inp = #"#" + then yyQ66(strm', lastMatch) + else if inp < #"#" + then if inp = #"\"" + then yyQ69(strm', lastMatch) + else yyQ66(strm', lastMatch) + else if inp = #"\\" + then yyQ70(strm', lastMatch) + else yyQ66(strm', lastMatch) + (* end case *)) +fun yyQ64 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction30(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction30(strm, yyNO_MATCH) + (* end case *)) +fun yyQ65 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction2(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction2(strm, yyNO_MATCH) + (* end case *)) +fun yyQ63 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction29(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"*" + then yyQ65(strm', yyMATCH(strm, yyAction29, yyNO_MATCH)) + else yyAction29(strm, yyNO_MATCH) + (* end case *)) +fun yyQ62 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ60 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction32(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\^N" + then yyQ60(strm', yyMATCH(strm, yyAction32, yyNO_MATCH)) + else if inp < #"\^N" + then if inp = #"\v" + then yyQ60(strm', yyMATCH(strm, yyAction32, yyNO_MATCH)) + else if inp < #"\v" + then if inp = #"\n" + then yyAction32(strm, yyNO_MATCH) + else yyQ60(strm', yyMATCH(strm, yyAction32, yyNO_MATCH)) + else if inp = #"\r" + then yyAction32(strm, yyNO_MATCH) + else yyQ60(strm', yyMATCH(strm, yyAction32, yyNO_MATCH)) + else if inp = #"#" + then yyQ60(strm', yyMATCH(strm, yyAction32, yyNO_MATCH)) + else if inp < #"#" + then if inp = #"\"" + then yyAction32(strm, yyNO_MATCH) + else yyQ60(strm', yyMATCH(strm, yyAction32, yyNO_MATCH)) + else if inp = #"(" + then yyAction32(strm, yyNO_MATCH) + else if inp < #"(" + then yyQ60(strm', yyMATCH(strm, yyAction32, yyNO_MATCH)) + else if inp <= #")" + then yyAction32(strm, yyNO_MATCH) + else yyQ60(strm', yyMATCH(strm, yyAction32, yyNO_MATCH)) + (* end case *)) +fun yyQ2 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"\"" + then yyQ62(strm', lastMatch) + else if inp < #"\"" + then if inp = #"\v" + then yyQ60(strm', lastMatch) + else if inp < #"\v" + then if inp = #"\n" + then yyQ56(strm', lastMatch) + else yyQ60(strm', lastMatch) + else if inp = #"\r" + then yyQ61(strm', lastMatch) + else yyQ60(strm', lastMatch) + else if inp = #")" + then yyQ64(strm', lastMatch) + else if inp < #")" + then if inp = #"(" + then yyQ63(strm', lastMatch) + else yyQ60(strm', lastMatch) + else yyQ60(strm', lastMatch) + (* end case *)) +fun yyQ58 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction49(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction49(strm, yyNO_MATCH) + (* end case *)) +fun yyQ57 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction5(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\n" + then yyQ56(strm', yyMATCH(strm, yyAction5, yyNO_MATCH)) + else yyAction5(strm, yyNO_MATCH) + (* end case *)) +fun yyQ59 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction48(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\n" + then yyAction48(strm, yyNO_MATCH) + else if inp < #"\n" + then if inp = #"\t" + then yyQ59(strm', yyMATCH(strm, yyAction48, yyNO_MATCH)) + else yyAction48(strm, yyNO_MATCH) + else if inp = #" " + then yyQ59(strm', yyMATCH(strm, yyAction48, yyNO_MATCH)) + else yyAction48(strm, yyNO_MATCH) + (* end case *)) +fun yyQ55 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction48(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\n" + then yyAction48(strm, yyNO_MATCH) + else if inp < #"\n" + then if inp = #"\t" + then yyQ59(strm', yyMATCH(strm, yyAction48, yyNO_MATCH)) + else yyAction48(strm, yyNO_MATCH) + else if inp = #" " + then yyQ59(strm', yyMATCH(strm, yyAction48, yyNO_MATCH)) + else yyAction48(strm, yyNO_MATCH) + (* end case *)) +fun yyQ54 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction50(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction50(strm, yyNO_MATCH) + (* end case *)) +fun yyQ1 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"\^N" + then yyQ54(strm', lastMatch) + else if inp < #"\^N" + then if inp = #"\n" + then yyQ56(strm', lastMatch) + else if inp < #"\n" + then if inp = #"\t" + then yyQ55(strm', lastMatch) + else yyQ54(strm', lastMatch) + else if inp = #"\r" + then yyQ57(strm', lastMatch) + else yyQ54(strm', lastMatch) + else if inp = #"!" + then yyQ54(strm', lastMatch) + else if inp < #"!" + then if inp = #" " + then yyQ55(strm', lastMatch) + else yyQ54(strm', lastMatch) + else if inp = #"\\" + then yyQ58(strm', lastMatch) + else yyQ54(strm', lastMatch) + (* end case *)) +fun yyQ24 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction12(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction12(strm, yyNO_MATCH) + (* end case *)) +fun yyQ23 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction25(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction25(strm, yyNO_MATCH) + (* end case *)) +fun yyQ22 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction11(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction11(strm, yyNO_MATCH) + (* end case *)) +fun yyQ26 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction21(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction21(strm, yyNO_MATCH) + (* end case *)) +fun yyQ25 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction26(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #":" + then yyAction26(strm, yyNO_MATCH) + else if inp < #":" + then if inp = #"." + then yyQ26(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else if inp < #"." + then if inp = #"'" + then yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else yyAction26(strm, yyNO_MATCH) + else if inp = #"/" + then yyAction26(strm, yyNO_MATCH) + else yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else if inp = #"_" + then yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else if inp < #"_" + then if inp = #"A" + then yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else if inp < #"A" + then yyAction26(strm, yyNO_MATCH) + else if inp <= #"Z" + then yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else yyAction26(strm, yyNO_MATCH) + else if inp = #"a" + then yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else if inp < #"a" + then yyAction26(strm, yyNO_MATCH) + else if inp <= #"z" + then yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else yyAction26(strm, yyNO_MATCH) + (* end case *)) +fun yyQ27 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction9(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #":" + then yyAction9(strm, yyNO_MATCH) + else if inp < #":" + then if inp = #"." + then yyQ26(strm', yyMATCH(strm, yyAction9, yyNO_MATCH)) + else if inp < #"." + then if inp = #"'" + then yyQ25(strm', yyMATCH(strm, yyAction9, yyNO_MATCH)) + else yyAction9(strm, yyNO_MATCH) + else if inp = #"/" + then yyAction9(strm, yyNO_MATCH) + else yyQ25(strm', yyMATCH(strm, yyAction9, yyNO_MATCH)) + else if inp = #"_" + then yyQ25(strm', yyMATCH(strm, yyAction9, yyNO_MATCH)) + else if inp < #"_" + then if inp = #"A" + then yyQ25(strm', yyMATCH(strm, yyAction9, yyNO_MATCH)) + else if inp < #"A" + then yyAction9(strm, yyNO_MATCH) + else if inp <= #"Z" + then yyQ25(strm', yyMATCH(strm, yyAction9, yyNO_MATCH)) + else yyAction9(strm, yyNO_MATCH) + else if inp = #"a" + then yyQ25(strm', yyMATCH(strm, yyAction9, yyNO_MATCH)) + else if inp < #"a" + then yyAction9(strm, yyNO_MATCH) + else if inp <= #"z" + then yyQ25(strm', yyMATCH(strm, yyAction9, yyNO_MATCH)) + else yyAction9(strm, yyNO_MATCH) + (* end case *)) +fun yyQ21 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction26(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"A" + then yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else if inp < #"A" + then if inp = #"." + then yyQ26(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else if inp < #"." + then if inp = #"'" + then yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else yyAction26(strm, yyNO_MATCH) + else if inp = #"0" + then yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else if inp < #"0" + then yyAction26(strm, yyNO_MATCH) + else if inp <= #"9" + then yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else yyAction26(strm, yyNO_MATCH) + else if inp = #"a" + then yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else if inp < #"a" + then if inp = #"_" + then yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else if inp < #"_" + then if inp <= #"Z" + then yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else yyAction26(strm, yyNO_MATCH) + else yyAction26(strm, yyNO_MATCH) + else if inp = #"g" + then yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else if inp < #"g" + then if inp = #"f" + then yyQ27(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else if inp <= #"z" + then yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else yyAction26(strm, yyNO_MATCH) + (* end case *)) +fun yyQ29 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction10(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #":" + then yyAction10(strm, yyNO_MATCH) + else if inp < #":" + then if inp = #"." + then yyQ26(strm', yyMATCH(strm, yyAction10, yyNO_MATCH)) + else if inp < #"." + then if inp = #"'" + then yyQ25(strm', yyMATCH(strm, yyAction10, yyNO_MATCH)) + else yyAction10(strm, yyNO_MATCH) + else if inp = #"/" + then yyAction10(strm, yyNO_MATCH) + else yyQ25(strm', yyMATCH(strm, yyAction10, yyNO_MATCH)) + else if inp = #"_" + then yyQ25(strm', yyMATCH(strm, yyAction10, yyNO_MATCH)) + else if inp < #"_" + then if inp = #"A" + then yyQ25(strm', yyMATCH(strm, yyAction10, yyNO_MATCH)) + else if inp < #"A" + then yyAction10(strm, yyNO_MATCH) + else if inp <= #"Z" + then yyQ25(strm', yyMATCH(strm, yyAction10, yyNO_MATCH)) + else yyAction10(strm, yyNO_MATCH) + else if inp = #"a" + then yyQ25(strm', yyMATCH(strm, yyAction10, yyNO_MATCH)) + else if inp < #"a" + then yyAction10(strm, yyNO_MATCH) + else if inp <= #"z" + then yyQ25(strm', yyMATCH(strm, yyAction10, yyNO_MATCH)) + else yyAction10(strm, yyNO_MATCH) + (* end case *)) +fun yyQ28 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction26(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"A" + then yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else if inp < #"A" + then if inp = #"." + then yyQ26(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else if inp < #"." + then if inp = #"'" + then yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else yyAction26(strm, yyNO_MATCH) + else if inp = #"0" + then yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else if inp < #"0" + then yyAction26(strm, yyNO_MATCH) + else if inp <= #"9" + then yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else yyAction26(strm, yyNO_MATCH) + else if inp = #"a" + then yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else if inp < #"a" + then if inp = #"_" + then yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else if inp < #"_" + then if inp <= #"Z" + then yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else yyAction26(strm, yyNO_MATCH) + else yyAction26(strm, yyNO_MATCH) + else if inp = #"s" + then yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else if inp < #"s" + then if inp = #"r" + then yyQ29(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else if inp <= #"z" + then yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else yyAction26(strm, yyNO_MATCH) + (* end case *)) +fun yyQ20 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction26(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"A" + then yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else if inp < #"A" + then if inp = #"." + then yyQ26(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else if inp < #"." + then if inp = #"'" + then yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else yyAction26(strm, yyNO_MATCH) + else if inp = #"0" + then yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else if inp < #"0" + then yyAction26(strm, yyNO_MATCH) + else if inp <= #"9" + then yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else yyAction26(strm, yyNO_MATCH) + else if inp = #"a" + then yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else if inp < #"a" + then if inp = #"_" + then yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else if inp < #"_" + then if inp <= #"Z" + then yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else yyAction26(strm, yyNO_MATCH) + else yyAction26(strm, yyNO_MATCH) + else if inp = #"p" + then yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else if inp < #"p" + then if inp = #"o" + then yyQ28(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else if inp <= #"z" + then yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else yyAction26(strm, yyNO_MATCH) + (* end case *)) +fun yyQ19 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction26(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #":" + then yyAction26(strm, yyNO_MATCH) + else if inp < #":" + then if inp = #"." + then yyQ26(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else if inp < #"." + then if inp = #"'" + then yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else yyAction26(strm, yyNO_MATCH) + else if inp = #"/" + then yyAction26(strm, yyNO_MATCH) + else yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else if inp = #"_" + then yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else if inp < #"_" + then if inp = #"A" + then yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else if inp < #"A" + then yyAction26(strm, yyNO_MATCH) + else if inp <= #"Z" + then yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else yyAction26(strm, yyNO_MATCH) + else if inp = #"a" + then yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else if inp < #"a" + then yyAction26(strm, yyNO_MATCH) + else if inp <= #"z" + then yyQ25(strm', yyMATCH(strm, yyAction26, yyNO_MATCH)) + else yyAction26(strm, yyNO_MATCH) + (* end case *)) +fun yyQ18 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction24(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction24(strm, yyNO_MATCH) + (* end case *)) +fun yyQ30 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction22(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"0" + then yyQ30(strm', yyMATCH(strm, yyAction22, yyNO_MATCH)) + else if inp < #"0" + then yyAction22(strm, yyNO_MATCH) + else if inp <= #"9" + then yyQ30(strm', yyMATCH(strm, yyAction22, yyNO_MATCH)) + else yyAction22(strm, yyNO_MATCH) + (* end case *)) +fun yyQ17 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction22(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"0" + then yyQ30(strm', yyMATCH(strm, yyAction22, yyNO_MATCH)) + else if inp < #"0" + then yyAction22(strm, yyNO_MATCH) + else if inp <= #"9" + then yyQ30(strm', yyMATCH(strm, yyAction22, yyNO_MATCH)) + else yyAction22(strm, yyNO_MATCH) + (* end case *)) +fun yyQ31 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction15(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction15(strm, yyNO_MATCH) + (* end case *)) +fun yyQ16 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction28(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #">" + then yyQ31(strm', yyMATCH(strm, yyAction28, yyNO_MATCH)) + else yyAction28(strm, yyNO_MATCH) + (* end case *)) +fun yyQ15 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction13(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction13(strm, yyNO_MATCH) + (* end case *)) +fun yyQ14 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction14(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction14(strm, yyNO_MATCH) + (* end case *)) +fun yyQ32 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction1(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction1(strm, yyNO_MATCH) + (* end case *)) +fun yyQ13 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction27(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"*" + then yyQ32(strm', yyMATCH(strm, yyAction27, yyNO_MATCH)) + else yyAction27(strm, yyNO_MATCH) + (* end case *)) +fun yyQ33 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction20(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"A" + then yyQ33(strm', yyMATCH(strm, yyAction20, yyNO_MATCH)) + else if inp < #"A" + then if inp = #"(" + then yyAction20(strm, yyNO_MATCH) + else if inp < #"(" + then if inp = #"'" + then yyQ33(strm', yyMATCH(strm, yyAction20, yyNO_MATCH)) + else yyAction20(strm, yyNO_MATCH) + else if inp = #"0" + then yyQ33(strm', yyMATCH(strm, yyAction20, yyNO_MATCH)) + else if inp < #"0" + then yyAction20(strm, yyNO_MATCH) + else if inp <= #"9" + then yyQ33(strm', yyMATCH(strm, yyAction20, yyNO_MATCH)) + else yyAction20(strm, yyNO_MATCH) + else if inp = #"`" + then yyAction20(strm, yyNO_MATCH) + else if inp < #"`" + then if inp = #"[" + then yyAction20(strm, yyNO_MATCH) + else if inp < #"[" + then yyQ33(strm', yyMATCH(strm, yyAction20, yyNO_MATCH)) + else if inp = #"_" + then yyQ33(strm', yyMATCH(strm, yyAction20, yyNO_MATCH)) + else yyAction20(strm, yyNO_MATCH) + else if inp <= #"z" + then yyQ33(strm', yyMATCH(strm, yyAction20, yyNO_MATCH)) + else yyAction20(strm, yyNO_MATCH) + (* end case *)) +fun yyQ12 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction20(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"A" + then yyQ33(strm', yyMATCH(strm, yyAction20, yyNO_MATCH)) + else if inp < #"A" + then if inp = #"(" + then yyAction20(strm, yyNO_MATCH) + else if inp < #"(" + then if inp = #"'" + then yyQ33(strm', yyMATCH(strm, yyAction20, yyNO_MATCH)) + else yyAction20(strm, yyNO_MATCH) + else if inp = #"0" + then yyQ33(strm', yyMATCH(strm, yyAction20, yyNO_MATCH)) + else if inp < #"0" + then yyAction20(strm, yyNO_MATCH) + else if inp <= #"9" + then yyQ33(strm', yyMATCH(strm, yyAction20, yyNO_MATCH)) + else yyAction20(strm, yyNO_MATCH) + else if inp = #"`" + then yyAction20(strm, yyNO_MATCH) + else if inp < #"`" + then if inp = #"[" + then yyAction20(strm, yyNO_MATCH) + else if inp < #"[" + then yyQ33(strm', yyMATCH(strm, yyAction20, yyNO_MATCH)) + else if inp = #"_" + then yyQ33(strm', yyMATCH(strm, yyAction20, yyNO_MATCH)) + else yyAction20(strm, yyNO_MATCH) + else if inp <= #"z" + then yyQ33(strm', yyMATCH(strm, yyAction20, yyNO_MATCH)) + else yyAction20(strm, yyNO_MATCH) + (* end case *)) +fun yyQ35 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction19(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"`" + then yyAction19(strm, yyNO_MATCH) + else if inp < #"`" + then if inp = #"_" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyAction19(strm, yyNO_MATCH) + else if inp <= #"z" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyAction19(strm, yyNO_MATCH) + (* end case *)) +fun yyQ42 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction17(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"`" + then yyAction17(strm, yyNO_MATCH) + else if inp < #"`" + then if inp = #"_" + then yyQ35(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyAction17(strm, yyNO_MATCH) + else if inp <= #"z" + then yyQ35(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyAction17(strm, yyNO_MATCH) + (* end case *)) +fun yyQ41 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction19(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"a" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp < #"a" + then if inp = #"_" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyAction19(strm, yyNO_MATCH) + else if inp = #"u" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp < #"u" + then if inp = #"t" + then yyQ42(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp <= #"z" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyAction19(strm, yyNO_MATCH) + (* end case *)) +fun yyQ40 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction19(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"a" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp < #"a" + then if inp = #"_" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyAction19(strm, yyNO_MATCH) + else if inp = #"i" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp < #"i" + then if inp = #"h" + then yyQ41(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp <= #"z" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyAction19(strm, yyNO_MATCH) + (* end case *)) +fun yyQ39 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction19(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"a" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp < #"a" + then if inp = #"_" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyAction19(strm, yyNO_MATCH) + else if inp = #"h" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp < #"h" + then if inp = #"g" + then yyQ40(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp <= #"z" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyAction19(strm, yyNO_MATCH) + (* end case *)) +fun yyQ38 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction19(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"a" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp < #"a" + then if inp = #"_" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyAction19(strm, yyNO_MATCH) + else if inp = #"j" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp < #"j" + then if inp = #"i" + then yyQ39(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp <= #"z" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyAction19(strm, yyNO_MATCH) + (* end case *)) +fun yyQ49 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction18(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"`" + then yyAction18(strm, yyNO_MATCH) + else if inp < #"`" + then if inp = #"_" + then yyQ35(strm', yyMATCH(strm, yyAction18, yyNO_MATCH)) + else yyAction18(strm, yyNO_MATCH) + else if inp <= #"z" + then yyQ35(strm', yyMATCH(strm, yyAction18, yyNO_MATCH)) + else yyAction18(strm, yyNO_MATCH) + (* end case *)) +fun yyQ48 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction19(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"a" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp < #"a" + then if inp = #"_" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyAction19(strm, yyNO_MATCH) + else if inp = #"d" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp < #"d" + then if inp = #"c" + then yyQ49(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp <= #"z" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyAction19(strm, yyNO_MATCH) + (* end case *)) +fun yyQ47 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction19(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"a" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp < #"a" + then if inp = #"_" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyAction19(strm, yyNO_MATCH) + else if inp = #"p" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp < #"p" + then if inp = #"o" + then yyQ48(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp <= #"z" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyAction19(strm, yyNO_MATCH) + (* end case *)) +fun yyQ46 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction19(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"a" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp < #"a" + then if inp = #"_" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyAction19(strm, yyNO_MATCH) + else if inp = #"t" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp < #"t" + then if inp = #"s" + then yyQ47(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp <= #"z" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyAction19(strm, yyNO_MATCH) + (* end case *)) +fun yyQ45 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction19(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"a" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp < #"a" + then if inp = #"_" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyAction19(strm, yyNO_MATCH) + else if inp = #"t" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp < #"t" + then if inp = #"s" + then yyQ46(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp <= #"z" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyAction19(strm, yyNO_MATCH) + (* end case *)) +fun yyQ44 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction19(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"a" + then yyQ45(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp < #"a" + then if inp = #"_" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyAction19(strm, yyNO_MATCH) + else if inp <= #"z" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyAction19(strm, yyNO_MATCH) + (* end case *)) +fun yyQ43 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction19(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"a" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp < #"a" + then if inp = #"_" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyAction19(strm, yyNO_MATCH) + else if inp = #"o" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp < #"o" + then if inp = #"n" + then yyQ44(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp <= #"z" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyAction19(strm, yyNO_MATCH) + (* end case *)) +fun yyQ37 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction19(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"a" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp < #"a" + then if inp = #"_" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyAction19(strm, yyNO_MATCH) + else if inp = #"p" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp < #"p" + then if inp = #"o" + then yyQ43(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp <= #"z" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyAction19(strm, yyNO_MATCH) + (* end case *)) +fun yyQ52 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction16(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"`" + then yyAction16(strm, yyNO_MATCH) + else if inp < #"`" + then if inp = #"_" + then yyQ35(strm', yyMATCH(strm, yyAction16, yyNO_MATCH)) + else yyAction16(strm, yyNO_MATCH) + else if inp <= #"z" + then yyQ35(strm', yyMATCH(strm, yyAction16, yyNO_MATCH)) + else yyAction16(strm, yyNO_MATCH) + (* end case *)) +fun yyQ51 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction19(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"a" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp < #"a" + then if inp = #"_" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyAction19(strm, yyNO_MATCH) + else if inp = #"u" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp < #"u" + then if inp = #"t" + then yyQ52(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp <= #"z" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyAction19(strm, yyNO_MATCH) + (* end case *)) +fun yyQ50 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction19(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"a" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp < #"a" + then if inp = #"_" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyAction19(strm, yyNO_MATCH) + else if inp = #"g" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp < #"g" + then if inp = #"f" + then yyQ51(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp <= #"z" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyAction19(strm, yyNO_MATCH) + (* end case *)) +fun yyQ36 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction19(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"a" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp < #"a" + then if inp = #"_" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyAction19(strm, yyNO_MATCH) + else if inp = #"f" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp < #"f" + then if inp = #"e" + then yyQ50(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else if inp <= #"z" + then yyQ35(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + else yyAction19(strm, yyNO_MATCH) + (* end case *)) +fun yyQ34 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction23(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction23(strm, yyNO_MATCH) + (* end case *)) +fun yyQ11 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction28(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"l" + then yyQ36(strm', yyMATCH(strm, yyAction28, yyNO_MATCH)) + else if inp < #"l" + then if inp = #"_" + then yyQ35(strm', yyMATCH(strm, yyAction28, yyNO_MATCH)) + else if inp < #"_" + then if inp = #"%" + then yyQ34(strm', yyMATCH(strm, yyAction28, yyNO_MATCH)) + else yyAction28(strm, yyNO_MATCH) + else if inp = #"`" + then yyAction28(strm, yyNO_MATCH) + else yyQ35(strm', yyMATCH(strm, yyAction28, yyNO_MATCH)) + else if inp = #"r" + then yyQ38(strm', yyMATCH(strm, yyAction28, yyNO_MATCH)) + else if inp < #"r" + then if inp = #"n" + then yyQ37(strm', yyMATCH(strm, yyAction28, yyNO_MATCH)) + else yyQ35(strm', yyMATCH(strm, yyAction28, yyNO_MATCH)) + else if inp <= #"z" + then yyQ35(strm', yyMATCH(strm, yyAction28, yyNO_MATCH)) + else yyAction28(strm, yyNO_MATCH) + (* end case *)) +fun yyQ9 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction7(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction7(strm, yyNO_MATCH) + (* end case *)) +fun yyQ10 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction7(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\n" + then yyQ9(strm', yyMATCH(strm, yyAction7, yyNO_MATCH)) + else yyAction7(strm, yyNO_MATCH) + (* end case *)) +fun yyQ53 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction8(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\n" + then yyAction8(strm, yyNO_MATCH) + else if inp < #"\n" + then if inp = #"\t" + then yyQ53(strm', yyMATCH(strm, yyAction8, yyNO_MATCH)) + else yyAction8(strm, yyNO_MATCH) + else if inp = #" " + then yyQ53(strm', yyMATCH(strm, yyAction8, yyNO_MATCH)) + else yyAction8(strm, yyNO_MATCH) + (* end case *)) +fun yyQ8 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction8(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\n" + then yyAction8(strm, yyNO_MATCH) + else if inp < #"\n" + then if inp = #"\t" + then yyQ53(strm', yyMATCH(strm, yyAction8, yyNO_MATCH)) + else yyAction8(strm, yyNO_MATCH) + else if inp = #" " + then yyQ53(strm', yyMATCH(strm, yyAction8, yyNO_MATCH)) + else yyAction8(strm, yyNO_MATCH) + (* end case *)) +fun yyQ7 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction28(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction28(strm, yyNO_MATCH) + (* end case *)) +fun yyQ0 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"-" + then yyQ16(strm', lastMatch) + else if inp < #"-" + then if inp = #"%" + then yyQ11(strm', lastMatch) + else if inp < #"%" + then if inp = #"\r" + then yyQ10(strm', lastMatch) + else if inp < #"\r" + then if inp = #"\n" + then yyQ9(strm', lastMatch) + else if inp < #"\n" + then if inp = #"\t" + then yyQ8(strm', lastMatch) + else yyQ7(strm', lastMatch) + else yyQ7(strm', lastMatch) + else if inp = #" " + then yyQ8(strm', lastMatch) + else yyQ7(strm', lastMatch) + else if inp = #")" + then yyQ7(strm', lastMatch) + else if inp < #")" + then if inp = #"'" + then yyQ12(strm', lastMatch) + else if inp = #"&" + then yyQ7(strm', lastMatch) + else yyQ13(strm', lastMatch) + else if inp = #"+" + then yyQ7(strm', lastMatch) + else if inp = #"*" + then yyQ14(strm', lastMatch) + else yyQ15(strm', lastMatch) + else if inp = #"f" + then yyQ20(strm', lastMatch) + else if inp < #"f" + then if inp = #";" + then yyQ7(strm', lastMatch) + else if inp < #";" + then if inp = #"0" + then yyQ17(strm', lastMatch) + else if inp < #"0" + then yyQ7(strm', lastMatch) + else if inp = #":" + then yyQ18(strm', lastMatch) + else yyQ17(strm', lastMatch) + else if inp = #"[" + then yyQ7(strm', lastMatch) + else if inp < #"[" + then if inp <= #"@" + then yyQ7(strm', lastMatch) + else yyQ19(strm', lastMatch) + else if inp <= #"`" + then yyQ7(strm', lastMatch) + else yyQ19(strm', lastMatch) + else if inp = #"{" + then yyQ22(strm', lastMatch) + else if inp < #"{" + then if inp = #"o" + then yyQ21(strm', lastMatch) + else yyQ19(strm', lastMatch) + else if inp = #"}" + then yyQ24(strm', lastMatch) + else if inp = #"|" + then yyQ23(strm', lastMatch) + else yyQ7(strm', lastMatch) + (* end case *)) +in + (case (!(yyss)) + of A => yyQ0(!(yystrm), yyNO_MATCH) + | F => yyQ1(!(yystrm), yyNO_MATCH) + | CODE => yyQ2(!(yystrm), yyNO_MATCH) + | STRING => yyQ3(!(yystrm), yyNO_MATCH) + | COMMENT => yyQ4(!(yystrm), yyNO_MATCH) + | EMPTYCOMMENT => yyQ5(!(yystrm), yyNO_MATCH) + | INITIAL => yyQ6(!(yystrm), yyNO_MATCH) + (* end case *)) +end + end + in + continue() + handle IO.Io{cause, ...} => raise cause + end + in + lex + end + in + fun makeLexer yyinputN = mk (yyInput.mkStream yyinputN) + fun makeLexer' ins = mk (yyInput.mkStream ins) + end + + end diff --git a/ml-yacc/src/yacc.sml b/ml-yacc/src/yacc.sml new file mode 100644 index 0000000..d7fb319 --- /dev/null +++ b/ml-yacc/src/yacc.sml @@ -0,0 +1,869 @@ +(* ML-Yacc Parser Generator (c) 1989, 1990 Andrew W. Appel, David R. Tarditi *) + +functor ParseGenFun(structure ParseGenParser : PARSE_GEN_PARSER + structure MakeTable : MAKE_LR_TABLE + structure Verbose : VERBOSE + structure PrintStruct : PRINT_STRUCT + + sharing MakeTable.LrTable = PrintStruct.LrTable + sharing MakeTable.Errs = Verbose.Errs + + structure Absyn : ABSYN + ) : PARSE_GEN = + struct + val sub = Array.sub + infix 9 sub + structure Grammar = MakeTable.Grammar + structure Header = ParseGenParser.Header + + open Header Grammar + + (* approx. maximum length of a line *) + + val lineLength = 70 + + (* record type describing names of structures in the program being + generated *) + + datatype names = NAMES + of {miscStruct : string, (* Misc{n} struct name *) + tableStruct : string, (* LR table structure *) + tokenStruct : string, (* Tokens{n} struct name *) + actionsStruct : string, (* Actions structure *) + valueStruct: string, (* semantic value structure *) + ecStruct : string, (* error correction structure *) + arg: string, (* user argument for parser *) + tokenSig : string, (* TOKENS{n} signature *) + miscSig :string, (* Signature for Misc structure *) + dataStruct:string, (* name of structure in Misc *) + (* which holds parser data *) + dataSig:string (* signature for this structure *) + + } + + val DEBUG = true + exception Semantic + + (* common functions and values used in printing out program *) + + datatype values = VALS + of {say : string -> unit, + saydot : string -> unit, + sayln : string -> unit, + pureActions: bool, + pos_type : string, + arg_type : string, + ntvoid : string, + termvoid : string, + start : Grammar.nonterm, + hasType : Grammar.symbol -> bool, + + (* actual (user) name of terminal *) + + termToString : Grammar.term -> string, + symbolToString : Grammar.symbol -> string, + + (* type symbol comes from the HDR structure, + and is now abstract *) + + term : (Header.symbol * ty option) list, + nonterm : (Header.symbol * ty option) list, + terms : Grammar.term list, + + (* tokenInfo is the user inserted spec in + the *_TOKEN sig*) + tokenInfo : string option} + + structure SymbolHash = Hash(type elem = string + val gt = (op >) : string*string -> bool) + + structure TermTable = Table(type key = Grammar.term + val gt = fn (T i,T j) => i > j) + + structure SymbolTable = Table( + type key = Grammar.symbol + val gt = fn (TERM(T i),TERM(T j)) => i>j + | (NONTERM(NT i),NONTERM(NT j)) => i>j + | (NONTERM _,TERM _) => true + | (TERM _,NONTERM _) => false) + + (* printTypes: function to print the following types in the LrValues + structure and a structure containing the datatype svalue: + + type svalue -- it holds semantic values on the parse + stack + type pos -- the type of line numbers + type result -- the type of the value that results + from the parse + + The type svalue is set equal to the datatype svalue declared + in the structure named by valueStruct. The datatype svalue + is declared inside the structure named by valueStruct to deal + with the scope of constructors. + *) + + val printTypes = fn (VALS {say,sayln,term,nonterm,symbolToString,pos_type, + arg_type, + termvoid,ntvoid,saydot,hasType,start, + pureActions,...}, + NAMES {valueStruct,...},symbolType) => + let val prConstr = fn (symbol,SOME s) => + say (" | " ^ (symbolName symbol) ^ " of " ^ + (if pureActions then "" else "unit -> ") ^ + " (" ^ tyName s ^ ")" + ) + | _ => () + in sayln "local open Header in"; + sayln ("type pos = " ^ pos_type); + sayln ("type arg = " ^ arg_type); + sayln ("structure " ^ valueStruct ^ " = "); + sayln "struct"; + say ("datatype svalue = " ^ termvoid ^ " | " ^ ntvoid ^ " of" ^ + (if pureActions then "" else " unit -> ") ^ " unit"); + app prConstr term; + app prConstr nonterm; + sayln "\nend"; + sayln ("type svalue = " ^ valueStruct ^ ".svalue"); + say "type result = "; + case symbolType (NONTERM start) + of NONE => sayln "unit" + | SOME t => (say (tyName t); sayln ""); + sayln "end" + end + + (* function to print Tokens{n} structure *) + + val printTokenStruct = + fn (VALS {say, sayln, termToString, hasType,termvoid,terms, + pureActions,tokenInfo,...}, + NAMES {miscStruct,tableStruct,valueStruct, + tokenStruct,tokenSig,dataStruct,...}) => + (sayln ("structure " ^ tokenStruct ^ " : " ^ tokenSig ^ " ="); + sayln "struct"; + (case tokenInfo of + NONE => () + | _ => sayln ("open "^dataStruct^".Header")); + sayln ("type svalue = " ^ dataStruct ^ ".svalue"); + sayln "type ('a,'b) token = ('a,'b) Token.token"; + let val f = fn term as T i => + (say "fun "; say (termToString term); + say " ("; + if (hasType (TERM term)) then say "i," else (); + say "p1,p2) = Token.TOKEN ("; + say (dataStruct ^ "." ^ tableStruct ^ ".T "); + say (Int.toString i); + say ",("; + say (dataStruct ^ "." ^ valueStruct ^ "."); + if (hasType (TERM term)) then + (say (termToString term); + if pureActions then say " i" + else say " (fn () => i)") + else say termvoid; + say ","; + sayln "p1,p2))") + in app f terms + end; + sayln "end") + + (* function to print signatures out - takes print function which + does not need to insert line breaks *) + + val printSigs = fn (VALS {term,tokenInfo,...}, + NAMES {tokenSig,tokenStruct,miscSig, + dataStruct, dataSig, ...}, + say) => + say ("signature " ^ tokenSig ^ " =\nsig\n"^ + (case tokenInfo of NONE => "" | SOME s => (s^"\n"))^ + "type ('a,'b) token\ntype svalue\n" ^ + (List.foldr (fn ((s,ty),r) => String.concat [ + "val ", symbolName s, + (case ty + of NONE => ": " + | SOME l => ": (" ^ (tyName l) ^ ") * "), + " 'a * 'a -> (svalue,'a) token\n", r]) "" term) ^ + "end\nsignature " ^ miscSig ^ + "=\nsig\nstructure Tokens : " ^ tokenSig ^ + "\nstructure " ^ dataStruct ^ ":" ^ dataSig ^ + "\nsharing type " ^ dataStruct ^ + ".Token.token = Tokens.token\nsharing type " ^ + dataStruct ^ ".svalue = Tokens.svalue\nend\n") + + (* function to print structure for error correction *) + + val printEC = fn (keyword : term list, + preferred_change : (term list * term list) list, + noshift : term list, + value : (term * string) list, + VALS {termToString, say,sayln,terms,saydot,hasType, + termvoid,pureActions,...}, + NAMES {ecStruct,tableStruct,valueStruct,...}) => + let + + val sayterm = fn (T i) => (say "(T "; say (Int.toString i); say ")") + + val printBoolCase = fn ( l : term list) => + (say "fn "; + app (fn t => (sayterm t; say " => true"; say " | ")) l; + sayln "_ => false") + + val printTermList = fn (l : term list) => + (sayln "nil"; app (fn t => (say " $$ "; sayterm t)) (rev l)) + + + fun printChange () = + (sayln "val preferred_change : (term list * term list) list = "; + app (fn (d,i) => + (say"("; printTermList d; say ","; printTermList i; + sayln ")::" + ) + ) preferred_change; + sayln "nil") + + val printErrValues = fn (l : (term * string) list) => + (sayln "local open Header in"; + sayln "val errtermvalue="; + say "fn "; + app (fn (t,s) => + (sayterm t; say " => "; + saydot valueStruct; say (termToString t); + say "("; + if pureActions then () else say "fn () => "; + say "("; say s; say "))"; + sayln " | " + ) + ) l; + say "_ => "; + say (valueStruct ^ "."); + sayln termvoid; sayln "end") + + + val printNames = fn () => + let val f = fn term => ( + sayterm term; say " => "; + sayln (String.concat["\"", termToString term, "\""]); + say " | ") + in (sayln "val showTerminal ="; + say "fn "; + app f terms; + sayln "_ => \"bogus-term\"") + end + + val ecTerms = + List.foldr (fn (t,r) => + if hasType (TERM t) orelse List.exists (fn (a,_)=>a=t) value + then r + else t::r) + [] terms + + in say "structure "; + say ecStruct; + sayln "="; + sayln "struct"; + say "open "; + sayln tableStruct; + sayln "infix 5 $$"; + sayln "fun x $$ y = y::x"; + sayln "val is_keyword ="; + printBoolCase keyword; + printChange(); + sayln "val noShift = "; + printBoolCase noshift; + printNames (); + printErrValues value; + say "val terms : term list = "; + printTermList ecTerms; + sayln "end" + end + +val printAction = fn (rules, + VALS {hasType,say,sayln,termvoid,ntvoid, + symbolToString,saydot,start,pureActions,...}, + NAMES {actionsStruct,valueStruct,tableStruct,arg,...}) => +let val printAbsynRule = Absyn.printRule(say,sayln) + val is_nonterm = fn (NONTERM i) => true | _ => false + val numberRhs = fn r => + List.foldl (fn (e,(r,table)) => + let val num = case SymbolTable.find(e,table) + of SOME i => i + | NONE => 1 + in ((e,num,hasType e orelse is_nonterm e)::r, + SymbolTable.insert((e,num+1),table)) + end) (nil,SymbolTable.empty) r + + val saySym = symbolToString + + val printCase = fn (i:int, r as {lhs=lhs as (NT lhsNum),prec, + rhs,code,rulenum}) => + + (* mkToken: Build an argument *) + + let open Absyn + val mkToken = fn (sym,num : int,typed) => + let val symString = symbolToString sym + val symNum = symString ^ (Int.toString num) + in PTUPLE[WILD, + PTUPLE[if not (hasType sym) then + (if is_nonterm sym then + PAPP(valueStruct^"."^ntvoid, + PVAR symNum) + else WILD) + else + PAPP(valueStruct^"."^symString, + if num=1 andalso pureActions + then AS(symNum,PVAR symString) + else PVAR symNum), + if num=1 then AS(symString^"left", + PVAR(symNum^"left")) + else PVAR(symNum^"left"), + if num=1 then AS(symString^"right", + PVAR(symNum^"right")) + else PVAR(symNum^"right")]] + end + + val numberedRhs = #1 (numberRhs rhs) + + (* construct case pattern *) + + val pat = PTUPLE[PINT i,PLIST(map mkToken numberedRhs, + SOME (PVAR "rest671"))] + + (* remove terminals in argument list w/o types *) + + val argsWithTypes = + List.foldr (fn ((_,_,false),r) => r + | (s as (_,_,true),r) => s::r) nil numberedRhs + + (* construct case body *) + + val defaultPos = EVAR "defaultPos" + val resultexp = EVAR "result" + val resultpat = PVAR "result" + val code = CODE code + val rest = EVAR "rest671" + + val body = + LET([VB(resultpat, + EAPP(EVAR(valueStruct^"."^ + (if hasType (NONTERM lhs) + then saySym(NONTERM lhs) + else ntvoid)), + if pureActions then code + else if argsWithTypes=nil then FN(WILD,code) + else + FN(WILD, + let val body = + LET(map (fn (sym,num:int,_) => + let val symString = symbolToString sym + val symNum = symString ^ Int.toString num + in VB(if num=1 then + AS(symString,PVAR symNum) + else PVAR symNum, + EAPP(EVAR symNum,UNIT)) + end) (rev argsWithTypes), + code) + in if hasType (NONTERM lhs) then + body else SEQ(body,UNIT) + end)))], + ETUPLE[EAPP(EVAR(tableStruct^".NT"),EINT(lhsNum)), + case rhs + of nil => ETUPLE[resultexp,defaultPos,defaultPos] + | r =>let val (rsym,rnum,_) = hd(numberedRhs) + val (lsym,lnum,_) = hd(rev numberedRhs) + in ETUPLE[resultexp, + EVAR (symbolToString lsym ^ + Int.toString lnum ^ "left"), + EVAR (symbolToString rsym ^ + Int.toString rnum ^ "right")] + end, + rest]) + in printAbsynRule (RULE(pat,body)) + end + + val prRules = fn () => + (sayln "fn (i392,defaultPos,stack,"; + say " ("; say arg; sayln "):arg) =>"; + sayln "case (i392,stack)"; + say "of "; + app (fn (rule as {rulenum,...}) => + (printCase(rulenum,rule); say "| ")) rules; + sayln "_ => raise (mlyAction i392)") + + in say "structure "; + say actionsStruct; + sayln " ="; + sayln "struct "; + sayln "exception mlyAction of int"; + sayln "local open Header in"; + sayln "val actions = "; + prRules(); + sayln "end"; + say "val void = "; + saydot valueStruct; + sayln termvoid; + say "val extract = "; + say "fn a => (fn "; + saydot valueStruct; + if hasType (NONTERM start) + then say (symbolToString (NONTERM start)) + else say "ntVOID"; + sayln " x => x"; + sayln "| _ => let exception ParseInternal"; + say "\tin raise ParseInternal end) a "; + sayln (if pureActions then "" else "()"); + sayln "end" + end + + val make_parser = fn ((header, + DECL {eop,change,keyword,nonterm,prec, + term, control,value} : declData, + rules : rule list),spec,error : pos -> string -> unit, + wasError : unit -> bool) => + let + val verbose = List.exists (fn VERBOSE=>true | _ => false) control + val defaultReductions = not (List.exists (fn NODEFAULT=>true | _ => false) control) + val pos_type = + let fun f nil = NONE + | f ((POS s)::r) = SOME s + | f (_::r) = f r + in f control + end + val start = + let fun f nil = NONE + | f ((START_SYM s)::r) = SOME s + | f (_::r) = f r + in f control + end + val name = + let fun f nil = NONE + | f ((PARSER_NAME s)::r) = SOME s + | f (_::r) = f r + in f control + end + val header_decl = + let fun f nil = NONE + | f ((FUNCTOR s)::r) = SOME s + | f (_::r) = f r + in f control + end + + val token_sig_info_decl = + let fun f nil = NONE + | f ((TOKEN_SIG_INFO s)::_) = SOME s + | f (_::r) = f r + in f control + end + + val arg_decl = + let fun f nil = ("()","unit") + | f ((PARSE_ARG s)::r) = s + | f (_::r) = f r + in f control + end + + val noshift = + let fun f nil = nil + | f ((NSHIFT s)::r) = s + | f (_::r) = f r + in f control + end + + val pureActions = + let fun f nil = false + | f ((PURE)::r) = true + | f (_::r) = f r + in f control + end + + val term = + case term + of NONE => (error 1 "missing %term definition"; nil) + | SOME l => l + + val nonterm = + case nonterm + of NONE => (error 1 "missing %nonterm definition"; nil) + | SOME l => l + + val pos_type = + case pos_type + of NONE => (error 1 "missing %pos definition"; "") + | SOME l => l + + + val termHash = + List.foldr (fn ((symbol,_),table) => + let val name = symbolName symbol + in if SymbolHash.exists(name,table) then + (error (symbolPos symbol) + ("duplicate definition of " ^ name ^ " in %term"); + table) + else SymbolHash.add(name,table) + end) SymbolHash.empty term + + val isTerm = fn name => SymbolHash.exists(name,termHash) + + val symbolHash = + List.foldr (fn ((symbol,_),table) => + let val name = symbolName symbol + in if SymbolHash.exists(name,table) then + (error (symbolPos symbol) + (if isTerm name then + name ^ " is defined as a terminal and a nonterminal" + else + "duplicate definition of " ^ name ^ " in %nonterm"); + table) + else SymbolHash.add(name,table) + end) termHash nonterm + + fun makeUniqueId s = + if SymbolHash.exists(s,symbolHash) then makeUniqueId (s ^ "'") + else s + + val _ = if wasError() then raise Semantic else () + + val numTerms = SymbolHash.size termHash + val numNonterms = SymbolHash.size symbolHash - numTerms + + val symError = fn sym => fn err => fn symbol => + error (symbolPos symbol) + (symbolName symbol^" in "^err^" is not defined as a " ^ sym) + + val termNum : string -> Header.symbol -> term = + let val termError = symError "terminal" + in fn stmt => + let val stmtError = termError stmt + in fn symbol => + case SymbolHash.find(symbolName symbol,symbolHash) + of NONE => (stmtError symbol; T ~1) + | SOME i => T (if i Header.symbol -> nonterm = + let val nontermError = symError "nonterminal" + in fn stmt => + let val stmtError = nontermError stmt + in fn symbol => + case SymbolHash.find(symbolName symbol,symbolHash) + of NONE => (stmtError symbol; NT ~1) + | SOME i => if i>=numTerms then NT (i-numTerms) + else (stmtError symbol;NT ~1) + end + end + + val symbolNum : string -> Header.symbol -> Grammar.symbol = + let val symbolError = symError "symbol" + in fn stmt => + let val stmtError = symbolError stmt + in fn symbol => + case SymbolHash.find(symbolName symbol,symbolHash) + of NONE => (stmtError symbol; NONTERM (NT ~1)) + | SOME i => if i>=numTerms then NONTERM(NT (i-numTerms)) + else TERM(T i) + end + end + +(* map all symbols in the following values to terminals and check that + the symbols are defined as terminals: + + eop : symbol list + keyword: symbol list + prec: (lexvalue * (symbol list)) list + change: (symbol list * symbol list) list +*) + + val eop = map (termNum "%eop") eop + val keyword = map (termNum "%keyword") keyword + val prec = map (fn (a,l) => + (a,case a + of LEFT => map (termNum "%left") l + | RIGHT => map (termNum "%right") l + | NONASSOC => map (termNum "%nonassoc") l + )) prec + val change = + let val mapTerm = termNum "%prefer, %subst, or %change" + in map (fn (a,b) => (map mapTerm a, map mapTerm b)) change + end + val noshift = map (termNum "%noshift") noshift + val value = + let val mapTerm = termNum "%value" + in map (fn (a,b) => (mapTerm a,b)) value + end + val (rules,_) = + let val symbolNum = symbolNum "rule" + val nontermNum = nontermNum "rule" + val termNum = termNum "%prec tag" + in List.foldr + (fn (RULE {lhs,rhs,code,prec},(l,n)) => + ( {lhs=nontermNum lhs,rhs=map symbolNum rhs, + code=code,prec=case prec + of NONE => NONE + | SOME t => SOME (termNum t), + rulenum=n}::l,n-1)) + (nil,length rules-1) rules + end + + val _ = if wasError() then raise Semantic else () + + (* termToString: map terminals back to strings *) + + val termToString = + let val data = Array.array(numTerms,"") + val unmap = fn (symbol,_) => + let val name = symbolName symbol + in Array.update(data, + case SymbolHash.find(name,symbolHash) of + SOME i => i + | NONE => raise Fail "termToString", + name) + end + val _ = app unmap term + in fn T i => + if DEBUG andalso (i<0 orelse i>=numTerms) + then "bogus-num" ^ (Int.toString i) + else data sub i + end + + val nontermToString = + let val data = Array.array(numNonterms,"") + val unmap = fn (symbol,_) => + let val name = symbolName symbol + in Array.update(data, + case SymbolHash.find(name,symbolHash) of + SOME i => i-numTerms + | NONE => raise Fail "nontermToString", + name) + end + val _ = app unmap nonterm + in fn NT i => + if DEBUG andalso (i<0 orelse i>=numNonterms) + then "bogus-num" ^ (Int.toString i) + else data sub i + end + +(* create functions mapping terminals to precedence numbers and rules to + precedence numbers. + + Precedence statements are listed in order of ascending (tighter binding) + precedence in the specification. We receive a list composed of pairs + containing the kind of precedence (left,right, or assoc) and a list of + terminals associated with that precedence. The list has the same order as + the corresponding declarations did in the specification. + + Internally, a tighter binding has a higher precedence number. We give + precedences using multiples of 3: + + p+2 = right associative (force shift of symbol) + p+1 = precedence for rule + p = left associative (force reduction of rule) + + Nonassociative terminals are given also given a precedence of p+1. The +table generator detects when the associativity of a nonassociative terminal +is being used to resolve a shift/reduce conflict by checking if the +precedences of the rule and the terminal are equal. + + A rule is given the precedence of its rightmost terminal *) + + val termPrec = + let val precData = Array.array(numTerms, NONE : int option) + val addPrec = fn termPrec => fn term as (T i) => + case precData sub i + of SOME _ => + error 1 ("multiple precedences specified for terminal " ^ + (termToString term)) + | NONE => Array.update(precData,i,termPrec) + val termPrec = fn ((LEFT,_) ,i) => i + | ((RIGHT,_),i) => i+2 + | ((NONASSOC,l),i) => i+1 + val _ = List.foldl (fn (args as ((_,l),i)) => + (app (addPrec (SOME (termPrec args))) l; i+3)) + 0 prec + in fn (T i) => + if DEBUG andalso (i < 0 orelse i >= numTerms) then + NONE + else precData sub i + end + + val elimAssoc = fn i => (i - (i mod 3) + 1) + val rulePrec = + let fun findRightTerm (nil,r) = r + | findRightTerm (TERM t :: tail,r) = + findRightTerm(tail,SOME t) + | findRightTerm (_ :: tail,r) = findRightTerm(tail,r) + in fn rhs => + case findRightTerm(rhs,NONE) + of NONE => NONE + | SOME term => + case termPrec term + of SOME i => SOME (elimAssoc i) + | a => a + end + + val grammarRules = + let val conv = fn {lhs,rhs,code,prec,rulenum} => + {lhs=lhs,rhs =rhs,precedence= + case prec + of SOME t => (case termPrec t + of SOME i => SOME(elimAssoc i) + | a => a) + | _ => rulePrec rhs, + rulenum=rulenum} + in map conv rules + end + + (* get start symbol *) + + val start = + case start + of NONE => #lhs (hd grammarRules) + | SOME name => + nontermNum "%start" name + + val symbolType = + let val data = Array.array(numTerms+numNonterms,NONE : ty option) + fun unmap (symbol,ty) = + Array.update(data, + case SymbolHash.find(symbolName symbol,symbolHash) of + SOME i => i + | NONE => raise Fail "symbolType", + ty) + val _ = (app unmap term; app unmap nonterm) + in fn NONTERM(NT i) => + if DEBUG andalso (i<0 orelse i>=numNonterms) + then NONE + else data sub (i+numTerms) + | TERM (T i) => + if DEBUG andalso (i<0 orelse i>=numTerms) + then NONE + else data sub i + end + + val symbolToString = + fn NONTERM i => nontermToString i + | TERM i => termToString i + + val grammar = GRAMMAR {rules=grammarRules, + terms=numTerms,nonterms=numNonterms, + eop = eop, start=start,noshift=noshift, + termToString = termToString, + nontermToString = nontermToString, + precedence = termPrec} + + val name' = case name + of NONE => "" + | SOME s => symbolName s + + val names = NAMES {miscStruct=name' ^ "LrValsFun", + valueStruct="MlyValue", + tableStruct="LrTable", + tokenStruct="Tokens", + actionsStruct="Actions", + ecStruct="EC", + arg= #1 arg_decl, + tokenSig = name' ^ "_TOKENS", + miscSig = name' ^ "_LRVALS", + dataStruct = "ParserData", + dataSig = "PARSER_DATA"} + + val (table,stateErrs,corePrint,errs) = + MakeTable.mkTable(grammar,defaultReductions) + + val entries = ref 0 (* save number of action table entries here *) + + in let val result = TextIO.openOut (spec ^ ".sml") + val sigs = TextIO.openOut (spec ^ ".sig") + val pos = ref 0 + val pr = fn s => TextIO.output(result,s) + val say = fn s => let val l = String.size s + val newPos = (!pos) + l + in if newPos > lineLength + then (pr "\n"; pos := l) + else (pos := newPos); + pr s + end + val saydot = fn s => (say (s ^ ".")) + val sayln = fn t => (pr t; pr "\n"; pos := 0) + val termvoid = makeUniqueId "VOID" + val ntvoid = makeUniqueId "ntVOID" + val hasType = fn s => case symbolType s + of NONE => false + | _ => true + val terms = let fun f n = if n=numTerms then nil + else (T n) :: f(n+1) + in f 0 + end + val values = VALS {say=say,sayln=sayln,saydot=saydot, + termvoid=termvoid, ntvoid = ntvoid, + hasType=hasType, pos_type = pos_type, + arg_type = #2 arg_decl, + start=start,pureActions=pureActions, + termToString=termToString, + symbolToString=symbolToString,term=term, + nonterm=nonterm,terms=terms, + tokenInfo=token_sig_info_decl} + + val (NAMES {miscStruct,tableStruct,dataStruct,tokenSig,tokenStruct,dataSig,...}) = names + in case header_decl + of NONE => (say "functor "; say miscStruct; + sayln "(structure Token : TOKEN)"; + say " : sig structure "; + say dataStruct; + say " : "; sayln dataSig; + say " structure "; + say tokenStruct; say " : "; sayln tokenSig; + sayln " end") + | SOME s => say s; + sayln " = "; + sayln "struct"; + sayln ("structure " ^ dataStruct ^ "="); + sayln "struct"; + sayln "structure Header = "; + sayln "struct"; + sayln header; + sayln "end"; + sayln "structure LrTable = Token.LrTable"; + sayln "structure Token = Token"; + sayln "local open LrTable in "; + entries := PrintStruct.makeStruct{table=table,print=pr, + name = "table", + verbose=verbose}; + sayln "end"; + printTypes(values,names,symbolType); + printEC (keyword,change,noshift,value,values,names); + printAction(rules,values,names); + sayln "end"; + printTokenStruct(values,names); + sayln "end"; + printSigs(values,names,fn s => TextIO.output(sigs,s)); + TextIO.closeOut sigs; + TextIO.closeOut result; + MakeTable.Errs.printSummary (fn s => TextIO.output(TextIO.stdOut,s)) errs + end; + if verbose then + let val f = TextIO.openOut (spec ^ ".desc") + val say = fn s=> TextIO.output(f,s) + val printRule = + let val rules = Array.fromList grammarRules + in fn say => + let val prRule = fn {lhs,rhs,precedence,rulenum} => + ((say o nontermToString) lhs; say " : "; + app (fn s => (say (symbolToString s); say " ")) rhs) + in fn i => prRule (rules sub i) + end + end + in Verbose.printVerbose + {termToString=termToString,nontermToString=nontermToString, + table=table, stateErrs=stateErrs,errs = errs,entries = !entries, + print=say, printCores=corePrint,printRule=printRule}; + TextIO.closeOut f + end + else () + end + + val parseGen = fn spec => + let val (result,inputSource) = ParseGenParser.parse spec + in make_parser(getResult result,spec,Header.error inputSource, + errorOccurred inputSource) + end +end; diff --git a/ml-yacc/tool/.cm/GUID/ext.sml b/ml-yacc/tool/.cm/GUID/ext.sml new file mode 100644 index 0000000..1720519 --- /dev/null +++ b/ml-yacc/tool/.cm/GUID/ext.sml @@ -0,0 +1 @@ +guid-$/(grm-ext.cm):ext.sml-1714016094.627 diff --git a/ml-yacc/tool/.cm/GUID/tool.sml b/ml-yacc/tool/.cm/GUID/tool.sml new file mode 100644 index 0000000..723e537 --- /dev/null +++ b/ml-yacc/tool/.cm/GUID/tool.sml @@ -0,0 +1 @@ +guid-$/(mlyacc-tool.cm):tool.sml-1714016083.388 diff --git a/ml-yacc/tool/.cm/SKEL/ext.sml b/ml-yacc/tool/.cm/SKEL/ext.sml new file mode 100644 index 0000000..8fb0bec --- /dev/null +++ b/ml-yacc/tool/.cm/SKEL/ext.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"Tools"ad"YaccGrmExt"h0 \ No newline at end of file diff --git a/ml-yacc/tool/.cm/SKEL/tool.sml b/ml-yacc/tool/.cm/SKEL/tool.sml new file mode 100644 index 0000000..e27f099 --- /dev/null +++ b/ml-yacc/tool/.cm/SKEL/tool.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"Tools"ad"YaccTool"h0 \ No newline at end of file diff --git a/ml-yacc/tool/.cm/amd64-unix/ext.sml b/ml-yacc/tool/.cm/amd64-unix/ext.sml new file mode 100644 index 0000000..b441fdb Binary files /dev/null and b/ml-yacc/tool/.cm/amd64-unix/ext.sml differ diff --git a/ml-yacc/tool/.cm/amd64-unix/tool.sml b/ml-yacc/tool/.cm/amd64-unix/tool.sml new file mode 100644 index 0000000..3968c2c Binary files /dev/null and b/ml-yacc/tool/.cm/amd64-unix/tool.sml differ diff --git a/ml-yacc/tool/ext.sml b/ml-yacc/tool/ext.sml new file mode 100644 index 0000000..c76d18e --- /dev/null +++ b/ml-yacc/tool/ext.sml @@ -0,0 +1,19 @@ +(* ext.sml + * + * Plugin for registering classifiers. + * + * Copyright (c) 2007 by The Fellowship of SML/NJ + * + * Author: Matthias Blume (blume@tti-c.org) + *) +structure YaccGrmExt = struct + local + val suffixes = ["grm", "y"] + val class = "mlyacc" + fun sfx s = + Tools.registerClassifier + (Tools.stdSfxClassifier { sfx = s, class = class }) + in + val _ = app sfx suffixes + end +end diff --git a/ml-yacc/tool/grm-ext.cm b/ml-yacc/tool/grm-ext.cm new file mode 100644 index 0000000..1842681 --- /dev/null +++ b/ml-yacc/tool/grm-ext.cm @@ -0,0 +1,12 @@ +(* + * Plugin for registering the "grm" classifier. + * + * (C) 2007 The Fellowship of SML/NJ + * + * Author: Matthias Blume (blume@tti-c.org) + *) +Library + structure YaccGrmExt +is + $smlnj/cm/tools.cm + ext.sml diff --git a/ml-yacc/tool/mlyacc-tool.cm b/ml-yacc/tool/mlyacc-tool.cm new file mode 100644 index 0000000..da18b4f --- /dev/null +++ b/ml-yacc/tool/mlyacc-tool.cm @@ -0,0 +1,12 @@ +(* + * The plugin library for ML-Yacc. + * + * (C) 2000 Lucent Technologies, Bell Laboratories + * + * Author: Matthias Blume (blume@kurims.kyoto-u.ac.jp) + *) +Library + structure YaccTool +is + $smlnj/cm/tools.cm + tool.sml diff --git a/ml-yacc/tool/tool.sml b/ml-yacc/tool/tool.sml new file mode 100644 index 0000000..fa0680e --- /dev/null +++ b/ml-yacc/tool/tool.sml @@ -0,0 +1,31 @@ +(* + * Running ML-Yacc from CM. + * + * (C) 1999 Lucent Technologies, Bell Laboratories + * + * Author: Matthias Blume (blume@kurims.kyoto-u.ac.jp) + *) +structure YaccTool = struct + local + val tool = "ML-Yacc" + val kw_sigopts = "sigoptions" + val kw_smlopts = "smloptions" + val kwl = [kw_sigopts, kw_smlopts] + (* This is a bit clumsy because we call parseOptions twice. + * However, this is not really such a big deal in practice... *) + fun get kw NONE = NONE + | get kw (SOME opts) = + #matches (Tools.parseOptions + { tool = tool, keywords = kwl, options = opts }) kw + in + val _ = Tools.registerStdShellCmdTool + { tool = tool, + class = "mlyacc", + cmdStdPath = fn () => ("ml-yacc", []), + template = NONE, + extensionStyle = + Tools.EXTEND [("sig", SOME "sml", get kw_sigopts), + ("sml", SOME "sml", get kw_smlopts)], + dflopts = [] } + end +end diff --git a/old-basis.tgz b/old-basis.tgz new file mode 100644 index 0000000..2450a65 Binary files /dev/null and b/old-basis.tgz differ diff --git a/runtime.tgz b/runtime.tgz new file mode 100644 index 0000000..4ffd48d Binary files /dev/null and b/runtime.tgz differ diff --git a/smlnj-lib.tgz b/smlnj-lib.tgz new file mode 100644 index 0000000..eabb1fe Binary files /dev/null and b/smlnj-lib.tgz differ diff --git a/smlnj-lib/CHANGES b/smlnj-lib/CHANGES new file mode 100644 index 0000000..ffeff3c --- /dev/null +++ b/smlnj-lib/CHANGES @@ -0,0 +1,1192 @@ +This is a list of changes to the SML/NJ Library; the version numbers +correspond to SML/NJ releases. + +-------------------------------------------------------------------------------- +SML/NJ 110.99.5 +------------------- + +[2024-03-14] + Rewrote the JSON parsers to work directly on the input source (instead of + using a ML-ulex lexer. This change fixes Issue #284 (ML-ULex's memoization + causes massive performance penalties for JSON parsing). For the `data.json` + file mentioned in the issue, parsing is about eight times faster, while the + speedup is even greater for the `huge.json` file. + +[2024-03-11] + Added `JSONDecode` module. + +[2024-02-26] + Added `insertWith`, `insertWithi`, and `findAndRemove` operations to the + `HASH_TABLE` and `MONO_HASH_TABLE` interfaces (and corresponding + implementations). This addition was requested in Issue #297 (Additional + operations for the `MONO_HASH_TABLE` interface). + +[2024-01-01] + Fixed a bug in `Random.randReal` on 32-bit systems. This change fixes + issue #290 (`Random.realRand` returns far smaller value than unity). + +[2023-12-22] + Fixed the error checking and documentation for the `subArray` function + in the `DynamicArray` structure and `DynamicArrayFn` functor. + +[2023-12-20] + Fixes to the `subArray` and `truncate` functions in the dynamic array + implementation (both structure `DynamicArray` and functor `DynamicArrayFn`). + These changes address development pull request #280 (`DynamicArray.subArray` + creates array with length bound+1). + +[2023-12-18] + Add `EditDistance` module to utility library. + +[2023-11-04] + Reworked the `UTF8` structure to impose stricter validation of the + encodings. Added the `Invalid` exception for when an invalid encoding + is encountered and replaced uses of the `Domain` exception with `Invalid`. + Also added the `size'` function for getting the number of UTF-8 + characters in a substring. These changes address development issue 276 + (`UTF8.getu` should validate that it's input is UTF-8). + +[2023-09-19] + Added modules for the representation of booleans, integers, and words + as hash-consed values to the HashCons library. + +-------------------------------------------------------------------------------- +SML/NJ 110.99.4 +------------------- + +[2023-07-27] + Fixed a bug in the Thompson engine and also added support for "$" + (end-of-line assertions) and full interval support in the RegExp + library. + + Also fixed issue #258 (https://github.com/smlnj/legacy/issues/258) + in the Thompson engine. + +[2023-06-14] + Reworked the code for determining the maximum hash-table size (used + in the `HashSetFn` and `HashTableRep` modules) so that it does not + depend on `Int.int` and `Word.word` being the same size (an issue + for MLton). Also split that code out into the internal `MaxHashTableSize` + module. These changes fix issue #279 (https://github.com/smlnj/legacy/issues/279). + +[2023-01-01] + Reimplementation of the `Random` structure to use the Mersenne Twister + algorithm. There are both 32-bit and 64-bit versions of the generator + (which one is included depends on the target platform). The + implementations are derived from the C code at + http://www.math.sci.hiroshima-u.ac.jp/m-mat/MT/emt.html. The `RANDOM` + signature also has some additional operations, such as generation of + native-sized integers and words. + + This change fixes issue #256 (https://github.com/smlnj/legacy/issues/256). + +[2022-12-19] + Added the `NativeInt` and `NativeWord` structure aliases to provide + a portable way to refer to the native numeric types in signatures. + While these are not necessary for the development branch (which only + supports 64-bit platforms), we include them to minimize differences + with the legacy branch of the SML/NJ Library. + +-------------------------------------------------------------------------------- +SML/NJ 110.99.3 +------------------- + +[2022-06-03] + More and improved comparisons for the `ORD_MAP` interface. The type of + the `collate` function has been made more polymorphic to allow maps with + different ranges to be compared. We have also added the `equiv` function + for testing the equivalence of two maps (again, the range types may be + different), and the `extends` function that tests if one map extends + another. These functions are linear in the size of the smaller map. + +[2022-06-01] + Fix for bug #307 (`SExpParser.parseFile` should return empty list on empty + file) + +[2022-06-01] + Fix for bug #309 (SExp: parsing the output of the printer produces a + different SExp). The *SExp* library now uses Scheme syntax for string + literals. + +[2022-04-27] + Added `findAndRemove` function to `ORD_MAP` interface. + +-------------------------------------------------------------------------------- +SML/NJ 110.99.2 +------------------- + +[2021-07-28] + Added convenience function `int` to the `JSON_STREAM_OUTPUT` interface. + +[2021-07-28] + Refactored the output modules in the JSON library. These changes should + not affect current clients of the library, but allow the addition of + printing JSON to a `CharBuffer.buf`. + +[2021-07-28] + Bug fix to `JSONUtil` module; the `FIND` path arc was not getting + handled for the update functions (`replace`, `insert`, and `append`). + +-------------------------------------------------------------------------------- +SML/NJ 110.99.1 +------------------- + +[2021-03-09] + Fixed bug #278 (ListMergeSort is documented as stable, but is not (esp. + since 110.78 rewrite!)) + +[2021-03-01] + Added some additional mechanism to the `JSONUtil` structure to make + writing robust queries easier. The additions include the functions + `hasField` and `testField`, and the constructor `FIND` for + the `JSONUtil.edge` datatype, which supports indexing arrays by + value in a path. Also added the `ElemNotFound` exception for when + `FIND` fails. + +[2021-02-08] + Fixed the handling of the empty list in the `ANSITerm` structure. + Also updated the documentation. + +-------------------------------------------------------------------------------- +SML/NJ 110.99 +------------------- + +[2020-12-23] + Split out the `CharBufferDev` structure from the `CharBufferPP` + structure. + +[2020-12-21] + Significant improvements to the pretty printing library. + Added a number of device "properties" to the `PP_DEVICE` + signature (max indentation, max depth, ellipses, ...) + with functions to set the properties. Also updated the + pretty printing engine to use the max depth and max + indentation properties when rendering. Restructured + and cleaned up a bunch of code in the process. + +-------------------------------------------------------------------------------- +SML/NJ 110.98 +------------------- + +[2020-07-08] + Fixed bug #263 (JSON parser ignores suffixes). To fix this + bug required changing the interface to the `JSONParser` + structure, including the type of the `parse` function. + The `JSONParser` structure now exports a `source` type, + which is an abstraction of a JSON input source. Several + functions are included for creating sources from strings, + input streams, etc. + This mechanism has also been added to the JSONStreamParser + structure's interface too. + +[2020-07-07] + Fixed bug #262, which was that the JSON parser didn't + handle empty objects correctly. + +[2020-07-02] + Renamed the `HTMLDev` structure to `HTML3Dev`. At some + point, the `HTML` library will also be renamed, since + "HTML" implies HTML5 these days. + +[2020-07-01] + Updated the `ANSITermDev` structure in the pretty-printing + library to use the new control modes that were introduced + in 110.97. Also moved the `HTMLDev` structure into a new + `pp-extras-lib.cm` library. Doing so eliminates a dependency + from the compiler to the HTML library. + +[2020-06-08] + Added several functions to the `MONO_HASH_SET` signature + (and `HashSetFn` functor) to make its interface closer + to that of the `ORD_SET` signature. The new functions are + `subtractc`, `subtractList`, `mapPartial`, `partition`, + `filter`, `exists`, `all`, and `find`. + In addition, also added the `copy` function. + +[2020-06-03] + Added the functions `delete` and `findAndRemove` to the + `MONO_PRIORITYQ` signature. + +[2020-05-25] + Some cleanup and additions to the `RegExpSyntax` structure in + the *RegExp Library*. Instead of having constructors for + the optional, closure, and positive-closure operators, we + now support them via functions. This change is because the + constructors were redundant with the more general `Interval` + constructor. + We also added more support for character ranges. The new + function `fromRange` constructs a character set for a range, + and pre-defined character sets are provided for the POSIX + character classes (plus ``[:ascii:]`` and ``[:word:]``. + +[2020-05-20] + Fixed bug in printing JSON strings that include characters + that need to be escaped. + +[2020-05-15] + Renamed `without` as `subtract` and `listItems` as `toList` + in the `MONO_HASH_SET` signature. The old names are still + present, but are marked as deprecated. + +[2020-05-14] + Add `toList` function to `MONO_DYNAMIC_ARRAY` signature. + +[2020-05-12] + Changed the `ListXProd` interface to follow the Basis Library + conventions (this module was written in the early 1990's + before the conventions had been established). The function + names are now `app`, `map`, and `fold` and the types follow + the Basis Library conventions (`app` expects a `unit` return + type for its first argument and the initial value for `fold` + is the second argument, instead of the third). The old + `appX`, `mapX`, and `foldX` functions are deprecated, but + remain for backward compatibility. + +[2020-05-11] + Added `mapPartial` function to `ORD_SET` interface. + +-------------------------------------------------------------------------------- +SML/NJ 110.97 +------------------- + +[2020-04-21] + Removed the `version` and `banner` components from the `LibBase` + structure, since the library has been tracking SML/NJ release + versions for a very long time. + +[2020-04-19] + Added `implode`, `map`, `app`, `all`, and `exists` functions to + the `UTF8` signature. Also implemented the handling of 4-byte + encodings, which were previously not supported. + +[2020-04-16] + Renamed the `HASH_SET` signature to `MONO_HASH_SET`, which is + the correct name given the naming conventions. + +[2020-04-14] + Added the `fromVector`, `toList`, and `toVector` functions to + the `DynamicArray:DYNAMIC_ARRAY` interface. + +[2020-04-14] + Updated the `BitArray:BIT_ARRAY` interface to follow standard patterns + (this interfaces was originally designed before the SML Basis Library, + so it did not follow the conventions). + The changes are to have the `fromString` function return `NONE`, + instead of raising an exception on bad input, and to use deprecate + the `lshift` and `rshift` operations in favor of `>>` and `<<` (which + use `word` for the shift amount). + +[2020-04-13] + Added more modes to the `ANSITerm` command set. Specifically, we added + the `Default` color specifier and the styles `DIM`, `NORMAL`, `UL_OFF`, + `BLINK_OFF`, `REV_OFF`, and `INVIS_OFF`. + +[2020-04-13] + Made the `ControlUtil.Cvt.bool` converter case insensitive. Also, + it now allows "yes"/"no" as values. + +[2020-04-11] + Replaced the "directional" fold functions (_e.g._, `foldl`, `foldri`) + with non-directional functions (_e.g., `fold`, `foldi`) in the + `HashConsSet` and `HashConsMap` structures. The reason for this + change is that the order of objects is pretty arbitrary, so there + is not any usefulness to processing elements in increasing or + decreasing order. + For backward compatibility, the old names will continue to work, + but they are deprecated amd will be removed in some future release. + +[2020-04-10] + Added `insertWith` and `insertWithi` functions to the `ORD_MAP` + signature. + +[2020-04-10] + Added operations to the `HashConsSet` and `HashConsMap` structures + (and corresponding signatures) to bring them inline with the + `ORD_SET` and `ORD_MAP` interfaces. Also reimplemented these + +[2020-04-10] + Added a `QUOTE` constructor to the `SExp.value` datatype and + cleaned up the details of the syntax of identifiers. Also + added a `compare` function for the `SExp.value` datatype. + +[2020-04-09] + Reimplemented the `find` functions in `UnixPath` to use the + `PathUtil` implementation, rather than reimplementing it. + Also changed the result types of `findFile` and `findFileOfType` + to return `string option`, instead of raising an exception. + +[2020-04-08] + The `JSONStreamPrinter` implementation now raises the `Fail` + exception when printing is attempted on a closed printer. + +[2020-04-08] + Changed the return type of the `error` call-back function in + the `JSONStreamParser.callbacks` type to `unit`. + +[2020-04-01] + Added a new library for generating and manipulating "Universally + Unique Identifiers" (UUID/uuid-lib.cm). + +[2020-04-01] + Added structure `FNVHash`, which implements the Fowler-Noll-Vo + hashing algorithm + +[2020-02-27] + Added `RESET` as a command to the `ANSITerm` structure. + +[2020-01-02] + Added the `disjoint` function to the `ORD_SET` signature. + +-------------------------------------------------------------------------------- +SML/NJ 110.94-96 +------------------- + + No changes + +-------------------------------------------------------------------------------- +SML/NJ 110.93 +------------------- + +[2019-09-04] + Modified the `Rand` and `Random` structures to use the default `Word` + structure, instead of `Word31`. This change should make no difference + on 32-bit targets, but we hopefully allow these modules to continue + working on 64-bit targets (once we have 64-bit support working). In + the long run, we will want 64-bit specific implementations of these + modules. + +-------------------------------------------------------------------------------- +SML/NJ 110.86-92 +------------------- + + No changes + +-------------------------------------------------------------------------------- +SML/NJ 110.86 +------------------- + +[2019-01-07] + Renamed the function `CharBufferPP.openOut` to `openBuf`, and added the + `TextPP` structure that supports pretty printing to either an output + stream (like `TextIOPP`) or a character buffer (like `CharBufferPP`) + +-------------------------------------------------------------------------------- +SML/NJ 110.85 +------------------- + + No changes + +-------------------------------------------------------------------------------- +SML/NJ 110.84 +------------------- + +[2018-12-12] + Added CharBufferPP to pretty-printing library. This module can be used + to generate strings from a pretty-printer. + +[2018-09-26] + Minor cleanup in the way that hash-table sizes are determined. We now + place an upper bound on table size that is derived from Array.maxLen. + +-------------------------------------------------------------------------------- +SML/NJ 110.83 +------------------- + +[2018-08-20] + Added a pretty printer (SExpPP) to the SExp library. The SExpPrinter + module does not layout the text with line breaks, which makes it hard + to read. + Also fixed a bug in the SExp.same function (lists of different lengths + would compare equal if the shorter list was a prefix of the longer list) + +[2018-08-20] + Added some documentation to the PP_STREAM signature. + +-------------------------------------------------------------------------------- +SML/NJ 110.82 +------------------- + +[2017-05-17] + Fixed a bug in the way that JSON string values were being printed. + The code previously assumed that C-style esaping will work, but + that is not true for "\'" (as well as for control and non-ASCII + characters). The new implementation assumes that the string value + is UTF-8 and uses the "\\u" escape sequences for characters outside + the JSON escapes and printable ASCII characters. + + NOTE: we should make sure that the input side of the JSON library + has the same semantics. + +-------------------------------------------------------------------------------- +SML/NJ 110.81 +------------------- + +[2017-04-12] + Added JSONUtil structure to JSON library. This module supports processing + the in-memory representation of JSON values. + +-------------------------------------------------------------------------------- +SML/NJ 110.80 +------------------- + +[2016-08-03] + Fixed bug #167 (Bug in handling of long options in GetOpt). The new behavior + is to allow long-option prefixes to overlap with other long options, but to + favor an exact match over prefix matches. For example, if the long options + are "--foo", "--foobar", and "--foobaz", then "--foo" will match the first, + but "--foob" will be flagged as ambiguous. + +[2016-01-12] + Added minItem and maxItem to ORD_SET signature. + +[2016-01-12] + Added toList to ORD_SET signature and marked listItems as + deprecated. + +[2015-12-07] + Fixed bug #144 (Splay sets are broken). + +[2015-10-28] + Added the function `help` to the Controls module that returns a + control's help string. + +[2015-10-28] + Added the functions mkOptionFlag, mkOptionReqArg, and mkOption + to the Controls module to make it easier to package controls + as command-line options. These functions provide an alternative + to the usual approach of using a "--C= form for + all of the controls. + +-------------------------------------------------------------------------------- +SML/NJ 110.79 +------------------- + +[2015-09-21] + Added additional array operations toList, fromVector, and toVector to + BitArray module so that it matches the new proposed Basis Library + specification (Proposal 2015-003). + +-------------------------------------------------------------------------------- +SML/NJ 110.78 +------------------- + +[2014-12-07] + Added HTML4Entities structure to HTML4 library. various + performance improvements to HTML4 parsing (but the code still + needs work). Also rewrote the HTML4 pretty printer. + +[2014-12-07] + New implementation of ListMergeSort. This implementation is + better on sorted data (both ascending and descending) and + faster on smaller lists. It is slower than the previous + implementation on very large lists (e.g., 10^6 elements + or more). + +[2014-11-17] + Added HTML4Attrs module to HTML4 library. This module provides + utility functions for building attribute-value pairs. + +-------------------------------------------------------------------------------- +SML/NJ 110.77 +------------------- + +[2014-07-15] + Reimplementation of delete/remove operations in red-black-tree + versions of sets and maps. The previous implementation could + result in violations of the data-structure invariants, which + was leading to unbalanced trees and loss of performance. Note + that the order invariants were correctly maintained, so correctness + of the operations was not an issue. + +[2014-03-27] + Added XML library, which is a lightweight library for parsing XML + files. The library does not support validation and will immediatly + raise an exception on syntax errors. Its purpose is to ease the + writing of applications that must process data files that are + in XML syntax. + +[2013-11-20] + Changes to S-expression parsing library. Allow multiple top-level + values in a file (parsing now returns a list). Handle LISP/Scheme-style + comments ("; ..."). + +-------------------------------------------------------------------------------- +SML/NJ 110.76 +------------------- + +[2012-10-30] + Fixed bug in DynamicArray iterators because of bad slice dimensions + (bug #108). Also switched to using Array.fromList to implement fromList + function. + +-------------------------------------------------------------------------------- +SML/NJ 110.75 +------------------- + +[2012-09-24] + Added Base64 module to support encoding and decoding Word8 vectors as + base64 strings. + +[2012-09-23] + Added exists, existsi, all, and alli functions to ORD_MAP signature and + implementations. + +[2012-09-23] + Added all function to ORD_SET signature and implementations. + +[2012-03-26] + Added subtract, subtract', and subtractList functions to ORD_SET + interface and implementations. + +[2012-03-09] + Modified PathUtil module to handle the case where the filename is an + absolute path. + +-------------------------------------------------------------------------------- +SML/NJ 110.74 +------------------- + +[2011-11-25] + Added hash-table-based implementation of sets (HASH_SET signature and + HashSetFn functor). + +[2011-05-23] + Added new S-expression library (contributed by Damon Wang) + +[2011-05-17] + Fixed bug in JSON scanner. It didn't handle escaped backslash or double + quote correctly. + +-------------------------------------------------------------------------------- +SML/NJ 110.73 +------------------- + +[2011-05-12] + Modified the implementation of GetOpt.usageInfo so that if the help + string has embedded newlines, then the extra lines are properly + indented. + +[2011-04-10] + Changed the interface of JSONStreamParser to support both parsing files + and TextIO.instreams. + +[2011-03-16] + Added findExe function to PathUtil module. + +[2010-06-30] + Added HTML4 library. + +[2010-03-09] + Fixed bug in hashed cons library (bug #55). + +-------------------------------------------------------------------------------- +SML/NJ 110.72 +------------------- + +[2009-05-13] + Added array iterators to DynamicArray module. + +[2009-03-15] + Added support for the interval syntax to the AWK RE syntax parser (this + syntax was not part of the original version of AWK, but is supported by + modern variants and the POSIX standard). + +[2008-10-03] + Improved the error messages in the JSON parser. + +[2008-06-11] + Added support for begin marks to Thompson engine. + +[2008-05-12] + Added parsing support to JSON library. + +[2008-04-18] + First commit of JSON Library. Printing works, but parsing still needs + to be implemented. + +[2008-03-18] + Committed a major overhaul of the RegExp library. There is now a new + engine that implements Ken Thompson's RE matching algorithm. The result + type of the RE matches has also been simplified by removing an unecessary + option type. + NOTE: the new RE engine is not complete in that it does not yet support + begin/end marks. + +[2007-12-13] + Also added list' and listg' which are parameterized over the delimiter- + and separator strings used for formatting lists. (The default for "list" and + "listg" is the usual "[" ", " "]".) + +[2007-12-13] + Added "elem" function as an inverse to "glue" to FormatComb module. + This makes writing extensions possible. Also added "list" and "option" + combinators for formatting list- and option types. + +[2007-11-02] + A collection of bug fixes for machine.sml in the Reactive library. + (Thanks to Timothy Bourke) + +[2007-05-15] + Made the UnixPath.path_list type concrete (= string list). + +[2007-04-24] + Added next function to Fifo and Queue modules. + +[2007-02-15] + Removed vestigial IntInf and INT_INF definitions. + +[2007-02-14] + Changed the representation type of UTF8.wchar from Word32.word to word. + Also added the UTF8.maxCodePoint value. + +[2007-01-30] + Added the lookup function to the ORD_MAP interface. + +[2007-01-30] + Added the fromList function to the ORD_SET interface. For the red-black + tree and list based implementations, this operation is linear for ordered + inputs. + +[2007-01-28] + Added UTF8 signature and structure. + +[2005-11-07] + Internal change: replaced uses of the depreciated Substring.all + with the Substring.full + +[2005-11-05] + Tweaking of the interval set API. Added iterators on items and + changed the iterators on intervals to have a "Int" suffix. + Changed addInterval to addInt, changed list to intervals. Added + the items function. + +[2005-10-31] + Fixed a bug in IntervalSetFn().intersect. + +[2005-10-25] + Added interval sets to utility library (signatures INTERVAL_DOMAIN + and INTERVAL_SET, and functor IntervalSetFn). + +[2005-08-04] + Renamed graph-scc.sml to graph-scc-fn.sml. + +[2005-07-23] + Fix PP library to export ANSITermDev, ANSITermPP, PPDescFn, and + PP_DESC. Also add nbSpace to PP_DESC signature. + +[2005-07-12] + Added ANSITermDev device that uses ANSI terminal display attributes + and added a ANSITermPP for pretty printing to such a device. + +[2005-07-06] + Added ANSITerm structure, which provides support for ANSI terminal + display attributes (e.g., red text). + +[2005-07-06] + Refactored the pretty printing library. The declarative way to + construct pretty-printing descriptions now sits on top of the + PP_STREAM interface. Also removed "onNewline" function, which + was not implemented. + +[2005-05-05] + Added singleton function to MONO_PRIORITYQ interface. + +[2005-05-04] + Added the "join" combinator to ParserComb. + +[2005-03-21] + Fixed a bug in GetOpt.getOpt. The argument processing functions + for ReqArg and OptArg, and the embedding function for ReturnInOrder + were getting applied to all arguments, even past a "--". + +[2005-02-11] + Added Atom.same and Atom.lexCompare to the Atom structure. Eventually, + Atom.sameAtom will be removed. + +[2004-12-15] + Change HashString.hashString' to hashSubstring. + +[2004-12-09] + Added two simple statistics modules: + * RealOrderStats implements selection of arbitrary order + statistics as well as the median of an array of reals + in randomized linear time. + * UnivariateStats implements mean, variance, standard and average + deviation, skew, kurtosis, and median of a univariate sample + (i.e., a set of reals). + +[2004-11-29] + Added HashString.hashString' for substrings. + Hand-inlined CharVector.fold into HashString for efficiency. + Modified atom.sml so that Atom.atom' first tries to find an + existing atom before turning its argument into a string. [-blume] + +[2003-09-03] + Minor modifications to random.sml due to changes to some + Basis interfaces (slices). + Significant surgery on bit-array.sml to make it compile with + new signature MONO_ARRAY. This will require further cleanup + in the future. + +[2003-08-28] + Made compilation of int-inf* conditional. (The Basis now has + a spec-conforming implementation of IntInf, so the one here is + no longer needed.) [-blume] + +[2003-07-31] + Added getDevice function to PP_STREAM signature. + +[2003-05-27] + Avoid poly-eq in Util/parser-comb.sml. + +[2003-05-22] + More cleanup (all in the name of eliminating incomplete matches): + - in HTML/html-attrs-fn.sml: fill in missing patterns in two + case expressions + - in HTML/html-gram: rewrote function groupDefListContents + to avoid the (unnecessary) incomplete match + - in PP/devices/html-dev.sml: made the implementation of popStyle + agree with the comment above it (popStyle on an empty + stack is supposed to be a nop) + - in PP/src/pp-stream-fn.sml: function setSize: handle case of + an empty scanStk + - in Util/prime-sizes.sml: raise Fail exceptions instead of + Match exceptions when reaching the end of a list + (Perhaps this should eventually be rewritten using an on-demand + prime-number generator that kicks when needed.) + +[2003-05-22] + Cleanup logic in Scan.scanf function. + +[2002-10-03] + Fixed bug in formating reals with the %g format. + +[2002-06-18] + Fixed a bug in MatchTree.nth (regexp library). Thanks to + Sami Nopanen. + +[2002-05-20] + Fixed bug in IntInf module: scanning of hex literals was not + handling "0x" prefixes. + +[2002-05-20] + Added ControlUtil module and stringControls function. + +[2002-05-17] + Added tyName field to ValueSyntax exception in Controls module + and path field to registry tree in ControlRegistry module. + +[2002-05-17] + Added == and != operators to INT_INF interface. These are a + placeholder until IntInf moves into the SML/NJ compiler. + +[2002-05-16] + Added mergeWith function to ORD_MAP interface. + +[2002-05-15] + Revised Controls library. + +[2002-04-17] + Added Matthias Blume's implementation of Danvey-style format + combinators to the Util library. + +[2002-03-14] + Merged in Matthias Blume's Controls library. Warning: the API + to this library is likely to change as we get some experience + with it. + +[2002-03-11] + Added consR1 .. consR5 functions to HashCons library. These + can be used to hash-cons record types. + +[2002-02-21] + Fixed infinite loop in LeftPriorityQFn.fromList when given a + singleton list as input (Chris Okasaki provided the fix). + +[2002-02-12] + Added implementation of priority queues to Util library (using + Okasaki's leftist-tree implementation). + +[2001-10-19] + Added new HashCons library. + +[2001-10-19] + Added PrimeSizes module to Util library. + +[2001-09-27] + Fixed bug in UnixEnv module (reported by Leunga). + +[2001-05-05] + Added addrToString function to SockUtil module in INet library. + +[2001-05-04] + Modified GraphSCC module to support a list of roots (Blume). + The old interface still works, but will be dropped when we move + to the new Util library. + +[2001-04-06] + Added partition function to ORD_SET interface. + +[2001-04-05] + Added hasProps function to PropList module. + +[2001-03-16] + Minor bug fix in GetOpt: don't pad the help string, since it causes + line wraps when one of the help lines is longer than the screen + width. It was also unneccesary. + +[2001-02-23] + Added inDomain function to the various kinds of hash tables. + +[2001-02-22] + Fixed bug in {Int,Word}RedBlackMapFn.insert (bug 1591). + +[2000-12-13] + Added the bit operations to the IntInf module (Leung). + +[2000-11-17] + Added where clause to GraphSCCFn result signature (Blume). + +[2000-10-20] + Made the result signature of the RegExpFn functor be opaque. + +[2000-09-28] + SML/NJ 110.0.7 release. + +[2000-09-02] + Added Word versions of hash tables (WordHashTable), finite + maps (WordRedBlackMap), and sets (WordRedBlackSet). + +[2000-09-02] + fixed bug in Format module (infinity caused infinite loop). + +[2000-07-19] + added anchored paths in CM files for NEW_CM. + +[2000-06-30] + added implementation of LWORD in Format module. + +[2000-05-16] + Fixed bug in dfa-engine.sml (bug number 1559). + +[2000-05-08] + Added setFn to PList.newProp return type. + +[2000-05-05] + Added sameHolder function to PropList module. + +[2000-04-05] + Fixed a bug in the filter/filteri hash table functions: the number + of items in the table was not being recomputed. + +[2000-04-04] + Added modify and modifyi iterators to hash tables. + +[2000-03-28] + Added anchorLink style to HTMLDev structure in PP/devices. + +[1999-12-03] + Added IntHashTable structure to Util library; this structure is + a specialization of hash tables to integer keys. + +[1999-12-03] + Added default cases to avoid "match not exhaustive" warnings. + +[1999-12-03] + Added GraphSCCFn functor to Util library, which implements a + strongly-connected components algorithm on directed graphs (written + by Matthias Blume). + +[1999-11-10] + Fixed a benign type error in RegExp/BackEnd/fsm.sml that was exposed by + the previous change. + +[1999-11-7] + Made the result signatures of ListSetFn and ListMapFn opaque. + +[1999-11-1] + Fixed bug in PP/devices/sources.cm under new CM (missing smlnj-lib.cm). + +[1999-10-20] + Fixed bug in RedBlack trees (the linear time construction of + trees from ordered sequences was producing backwards trees). + +[1999-10-18] + Changed UREF signature so that union, unify, and link operations + now return a boolean. + +[1999-10-18] + Added peekFn to PropList.newProp return result. + +[1999-10-14] + Added TextIOPP structure to pp-lib.cm (it was not being exported). + +[1999-09-21] + Changed the getOpt API. Errors are now reported using a callback + and both usage and getOpt take records as arguments. Also changed + the NoArg descriptor to take a function so that imperative argument + processing can be supported. + +[1999-09-20] + Changed CM files to be compatible with both the old (110.0.x) and + new (110.20+) versions of CM. + +[1999-09-17] + Added PropList:PROP_LIST structure. This structure implements + property lists using Stephen Weeks's technique. + +[1999-09-17] + Improved the red-black tree implementations with linear-time union, + intersection, difference, and filter operations. Also reimplemented + the delete function in a way that does not require an extra constructor. + +[1999-09-09] + Added Red-Black-Tree implementation of sets and maps. Two new + functors (RedBlackMapFn and RedBlackSetFn) and four new structures + (IntRedBlackMap, IntRedBlackSet, AtomIntRedBlackMap, and IntRedBlackSet). + Also added aliases AtomMap and AtomSet that for the RB implementations. + +[1999-09-03] + Added missing support for WORD and WORD8 format items to Format structure. + Still need to handle LWORD items!! + +[1999-08-17] + Added DynamicArray:DYNAMIC_ARRAY structure. + +[1999-07-06] + Added declarative representation of pretty-printing to PPStreamFn. + This replaces the PPDescFn and PP_DESC components (which were not + implemented anyway). + +[1999-07-02] + Moved SockUtil structure into a new INet library. Also split out the + Unix-specific socket operations into a UnixSockUtil module. (Sockets + are now supported on Win32). + +[1999-06-17] + Added listKeys function to ORD_MAP API. + +[1999-06-14] + Added Riccardo Pucella's GetOpt structure to Util library. + +[1999-06-14] + Added singleton and inDomain functions to ORD_MAP API. + +[1999-06-02] + Merged in some bug fixes from the compiler's version of the IntInf + structure. + +[1999-05-05] + Fixed a couple of bugs in the PP library having to do with nesting + absolute and relative indentations. + +[1999-04-21] + Added a way to pass control information to a PP device via the PP + stream (function control in PP_STREAM). + +[1999-04-16] + Added better style support to the HTML PP device. + +[1998-09-04] + Added isEmpty predicate and first fucntion to ORD_MAP + signature (and to implementations). + +[1998-06-23] + Fixed a bug in ParserComb.bind (reported by Andrew Kennedy). + +-------------------------------------------------------------------------------- +[1998-04-15] 110 --> 110.5 +-------------------------- + +There is now a regular expression library (mostly implemented by Riccardo +Pucella). The implementation separates front-ends (which specify the +syntax of REs) from back-ends (which implement the matching algorithms). +Currently, we support AWK syntax as the only front-end and two different +back-ends. + +Added an implementation of the SML/NJ PP API to the pretty-printer examples. + +Fixed a bug in the IntInf structure with scanning strings of the form "1+2". +Also did some clean-up of the code. + + +-------------------------------------------------------------------------------- +[1997-12-07] 109.32 --> 110 +--------------------------- + +Added the function add' to the ORD_SET signature and the function insert' +to the ORD_MAP signature. These functions fit the pattern of the fold +functions. (110 patch 3) + +Added the IntInf:INT_INF structure to the Util library. This is a subset +of the optional IntInf structure defined by the SML'97 basis. + +Changed the Rand structure to use words as seeds and results. This also +fixes bug 1047. (110 patch 3) + +Added a clear operation to the Queue and various hash table structures. + +Changed the ListFormat.formatList to ListFormat.fmt and ListFormat.scanList +to ListFormat.scan. Added the function ListFormat.listToString. + +Updated the HTML library to the final 3.2 specification. This involved adding +a new BODY element type (with attributes). Also, removed the header/footer +arguments to the pretty-printer (they were a gross hack). + +Added PathUtil structure to Util library. This is a more general version +of the old UnixPath module. + + +-------------------------------------------------------------------------------- +[1997-10-01] 109.31 --> 109.32 +------------------------------ + +Changed the type of the exec instruction in the Reactive library. + + +-------------------------------------------------------------------------------- +[1997-09-09] 109.30 --> 109.31 +------------------------------ + +Added the Reactive library, which supports reactive scripting. This is a +first cut, and hasn't been extensively tested. + +Changed the names of SockUtil.sock{Recv,Send}* to SockUtil.{recv,send}* +(since the sock prefix was redundant). + +Added some bug fixes to IntListSet that had previously been added to +the ListSetFn implementation. + + +-------------------------------------------------------------------------------- +[1997-07-17] 109.29 --> 109.30 +------------------------------ + +Added a missing case to {ListSetFn,IntListSet}.isSubset. + +The bug fix for unionWith was broken. I think I've got it right this +time. + + +-------------------------------------------------------------------------------- +[1997-07-17] 109.29 --> 109.30 +------------------------------ + +Fixed a bug in the binary-tree and splay-tree implementations of the +unionWith[i] and intersectWith[i] functions. The bug caused the order +of arguments to the merging function to be wrong in some cases. + +Fixed uses of System.Unsafe. + +Removed Array2:ARRAY2 from Util library, since the basis now defines these. + +Added MonoArrayFn functor for easy creation of monomorphic array structures. + +Added Atom.atom' operation for turning substrings into atoms. + + +-------------------------------------------------------------------------------- +[1997-06-13] 109.28 --> 109.29 +------------------------------ + +Added collate operation to ORD_MAP signature. + +Added compare operation to ORD_SET signature. + +Changed the type of and intersectWith[i] in the ORD_MAP signature to be +more general. + +Changed the type of the map function in the ORD_SET signature to return +a new set (instead of a list). + + +-------------------------------------------------------------------------------- +[1997-05-22] 109.27 --> 109.28 +------------------------------ + +Changed various sharing constraints to "where type" definitions to +compile in SML'97. + +Added AtomBinaryMap and AtomBinarySet structures to Util library. + + +-------------------------------------------------------------------------------- +[1997-03-03] 109.25.1 --> 109.25.2 +---------------------------------- + +Changed Util/time-limit.sml to reflect new location of callcc/throw. + + +-------------------------------------------------------------------------------- +[1997-02-25] 109.25 --> 109.25.1 +-------------------------------- + +Added a bunch of new operations to the ordered map modules (BinaryMapFn, +IntBinaryMap, ListMapFn, IntListFn, and SplayMapFn). The new operations +are: + val unionWith : ('a * 'a -> 'a) -> ('a map * 'a map) -> 'a map + val unionWithi : (Key.ord_key * 'a * 'a -> 'a) -> ('a map * 'a map) -> 'a map + val intersectWith : ('a * 'a -> 'a) -> ('a map * 'a map) -> 'a map + val intersectWithi : (Key.ord_key * 'a * 'a -> 'a) -> ('a map * 'a map) -> 'a map + val filter : ('a -> bool) -> 'a map -> 'a map + val filteri : (Key.ord_key * 'a -> bool) -> 'a map -> 'a map + val mapPartial : ('a -> 'b option) -> 'a map -> 'b map + val mapPartiali : (Key.ord_key * 'a -> 'b option) -> 'a map -> 'b map + +Added IOUtil : IO_UTIL structure to the Util library. This provides +support for dynamically rebinding stdIn and stdOut. + +Added KeywordFn functor to the Util library. This provides support for +implementing scanners, where keyword recognition is done outside the +basic lexical analysis. + +Fixed several bugs in the ListSetFn functor. + + +-------------------------------------------------------------------------------- +[1997-01-10] 109.23 --> 109.24 +------------------------------ + +Changed the HASH_TABLE, MONO_HASH_TABLE, MONO_HASH2_TABLE, and ORD_MAP +interfaces to provide two forms of the listItems function. listItemsi +returns a list of (key, value) pairs and listItems returns just the values. +The previous version of listItems was like listItemsi. + +Added a missing case for BASEFONT in CheckHTMLFn.check. + + +-------------------------------------------------------------------------------- +[1996-12-07] 109.21.1 --> 109.22 +-------------------------------- + +Changed the MONO_DYNAMIC_ARRAY signature to make array an eqtype (also changed +the implementation of DynamicArrayFn). + +Fixed a bug in Array2.column + + +-------------------------------------------------------------------------------- +[1996-10-18] 109.19 --> 109.20 +------------------------------ + +Added functions for sending/receiving strings over TCP sockets to the +SockUtil structure. + +Fixed a bug with reading on a closed socket in SockUtil.sockRecvVec + +Fixed a bug in the implementation of the intersection operation +in binary sets (BinarySetFn and IntBinarySet). + + +-------------------------------------------------------------------------------- +[1996-10-01] 109.18 --> 109.19 +------------------------------ + +Updated the HTML library to the August 21 version of the 3.2 DTD. + +Added html-defaults.sml to the HTML Library. + +Added Parsing combinators to the Util library. + +Added Socket utilities to the Unix library. + diff --git a/smlnj-lib/Controls/README b/smlnj-lib/Controls/README new file mode 100644 index 0000000..207ec27 --- /dev/null +++ b/smlnj-lib/Controls/README @@ -0,0 +1,5 @@ +This library is designed to support management of control flags in +an application. + +Credits: + The original design and implementation were by Matthias Blume. diff --git a/smlnj-lib/Controls/control-registry-sig.sml b/smlnj-lib/Controls/control-registry-sig.sml new file mode 100644 index 0000000..58e6c00 --- /dev/null +++ b/smlnj-lib/Controls/control-registry-sig.sml @@ -0,0 +1,60 @@ +(* control-registry-sig.sml + * + * COPYRIGHT (c) 2015 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * A registry collects together string controls; it supports generation + * of help messages and initialization from the environment. + *) + +signature CONTROL_REGISTRY = + sig + + type registry + + type control_info = { envName : string option } + + val new : { + help : string (* registry's description *) + } -> registry + + (* register a control *) + val register : registry -> { + ctl : string Controls.control, + envName : string option + } -> unit + + (* register a set of controls *) + val registerSet : registry -> { + ctls : (string, 'a) ControlSet.control_set, + mkEnvName : string -> string option + } -> unit + + (* nest a registry inside another registry *) + val nest : registry -> { + prefix : string option, + pri : Controls.priority, (* registry's priority *) + obscurity : int, (* registry's detail level; higher means *) + (* more obscure *) + reg : registry + } -> unit + + (* find a control *) + val control : registry -> string list -> string Controls.control option + + (* initialize the controls in the registry from the environment *) + val init : registry -> unit + + datatype registry_tree = RTree of { + path : string list, + help : string, + ctls : { ctl : string Controls.control, info : control_info } list, + subregs : registry_tree list + } + + (* get the registry-tree representation of a registry; an optional obscurity + * argument may be supplied to filter out obscure options. + *) + val controls : (registry * int option) -> registry_tree + + end diff --git a/smlnj-lib/Controls/control-registry.sml b/smlnj-lib/Controls/control-registry.sml new file mode 100644 index 0000000..95ac458 --- /dev/null +++ b/smlnj-lib/Controls/control-registry.sml @@ -0,0 +1,150 @@ +(* control-registry.sml + * + * COPYRIGHT (c) 2015 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +structure ControlRegistry : CONTROL_REGISTRY = + struct + + structure Rep = ControlReps + structure CSet = ControlSet + structure ATbl = AtomTable + + type control_info = { + envName : string option + } + + type ctl_set = (string, control_info) Rep.control_set + + datatype registry = Reg of { + help : string, (* registry's description *) + ctls : ctl_set, (* control's in this registry *) + qRegs : subregistry ATbl.hash_table, (* qualified sub-registries *) + uRegs : subregistry list ref (* unqualified sub-registries *) + } + + and subregistry = SubReg of { + prefix : string option, (* the key for qualified registries *) + priority : Controls.priority, (* control's priority *) + obscurity : int, (* registry's detail level; higher means *) + (* more obscure *) + reg : registry + } + + fun new {help} = Reg{ + help = help, + ctls = CSet.new(), + qRegs = ATbl.mkTable (8, Fail "qualified registries"), + uRegs = ref[] + } + + (* register a control *) + fun register (Reg{ctls, ...}) {ctl, envName} = + CSet.insert (ctls, ctl, {envName=envName}) + + (* register a set of controls *) + fun registerSet (Reg{ctls, ...}) {ctls=cs, mkEnvName} = let + fun insert {ctl, info} = + CSet.insert (ctls, ctl, {envName=mkEnvName(Controls.name ctl)}) + in + CSet.app insert cs + end + + (* nest a registry inside another registry *) + fun nest (Reg{uRegs, qRegs, ...}) {prefix, pri, obscurity, reg} = let + val subReg = SubReg{ + prefix = prefix, + priority = pri, + obscurity = obscurity, + reg = reg + } + in + case prefix + of NONE => uRegs := subReg :: !uRegs + | SOME qual => ATbl.insert qRegs (Atom.atom qual, subReg) + (* end case *) + end + + fun control reg (path : string list) = let + fun find (_, []) = NONE + | find (Reg{ctls, uRegs, ...}, [name]) = ( + case CSet.find(ctls, name) + of SOME{ctl, ...} => SOME ctl + | NONE => findInList (!uRegs, [name]) + (* end case *)) + | find (Reg{qRegs, uRegs,...}, prefix::r) = ( + case ATbl.find qRegs prefix + of NONE => findInList(!uRegs, prefix::r) + | SOME(SubReg{reg, ...}) => (case find(reg, r) + of NONE => findInList(!uRegs, prefix::r) + | someCtl => someCtl + (* end case *)) + (* end case *)) + and findInList ([], _) = NONE + | findInList (SubReg{reg, ...}::r, path) = (case find (reg, path) + of NONE => findInList(r, path) + | someCtl => someCtl + (* end case *)) + in + find (reg, List.map Atom.atom path) + end + + (* initialize the controls in the registry from the environment *) + fun init (Reg{ctls, qRegs, uRegs, ...}) = let + fun initCtl {ctl, info={envName=SOME var}} = ( + case OS.Process.getEnv var + of SOME value => Controls.set(ctl, value) + | NONE => () + (* end case *)) + | initCtl _ = () + fun initSubreg (SubReg{reg, ...}) = init reg + in + CSet.app initCtl ctls; + ATbl.app initSubreg qRegs; + List.app initSubreg (!uRegs) + end + + datatype registry_tree = RTree of { + path : string list, + help : string, + ctls : { ctl: string Controls.control, info: control_info } list, + subregs : registry_tree list + } + + val sortSubregs = + ListMergeSort.sort + (fn (SubReg{priority=p1, ...}, SubReg{priority=p2, ...}) => + Rep.priorityGT(p1, p2)) + + fun controls (root, obs) = let + (* a function to build a list of subregistries, filtering by obscurity *) + val gather = (case obs + of NONE => op :: + | SOME obs => (fn (x as SubReg{obscurity, ...}, l) => + if (obscurity < obs) then x::l else l) + (* end case *)) + fun getTree (path, root as Reg{help, ctls, qRegs, uRegs, ...}) = let + val subregs = + List.foldl gather (ATbl.fold gather [] qRegs) (!uRegs) + val subregs = sortSubregs subregs + fun getReg (SubReg{prefix=SOME prefix, reg, ...}) = + getTree(prefix::path, reg) + | getReg (SubReg{reg, ...}) = getTree (path, reg) + in + RTree{ + path = List.rev path, + help = help, + ctls = case obs + of NONE => ControlSet.listControls ctls + | SOME obs => + ControlSet.listControls' (ctls, obs) + (* end case *), + subregs = List.map getReg subregs + } + end + in + getTree ([], root) + end + + end diff --git a/smlnj-lib/Controls/control-reps.sml b/smlnj-lib/Controls/control-reps.sml new file mode 100644 index 0000000..66514ea --- /dev/null +++ b/smlnj-lib/Controls/control-reps.sml @@ -0,0 +1,41 @@ +(* control-reps.sml + * + * COPYRIGHT (c) 2015 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +structure ControlReps = + struct + + (* priorities are used for ordering help messages (lexical order) *) + type priority = int list + + datatype 'a control = Ctl of { + name : Atom.atom, (* name of the control *) + set : 'a option -> unit -> unit,(* function to set the control's value; + * it is delayed (error checking in 1st + * stage, actual assignment in 2nd); + * if the argument is NONE, then + * the 2nd stage will restore the + * value that was present during the + * first stage *) + get : unit -> 'a, (* return the control's value *) + priority : priority, (* control's priority *) + obscurity : int, (* control's detail level; higher means *) + (* more obscure *) + help : string (* control's description *) + } + + withtype ('a, 'b) control_set = + {ctl : 'a control, info : 'b} AtomTable.hash_table + + (* conversion functions for control values *) + type 'a value_cvt = { + tyName : string, + fromString : string -> 'a option, + toString : 'a -> string + } + + fun priorityGT priorities = List.collate Int.compare priorities = GREATER + + end diff --git a/smlnj-lib/Controls/control-set-sig.sml b/smlnj-lib/Controls/control-set-sig.sml new file mode 100644 index 0000000..0f39051 --- /dev/null +++ b/smlnj-lib/Controls/control-set-sig.sml @@ -0,0 +1,40 @@ +(* control-set-sig.sml + * + * COPYRIGHT (c) 2015 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +signature CONTROL_SET = + sig + + type 'a control = 'a Controls.control + type ('a, 'b) control_set + + val new : unit -> ('a, 'b) control_set + + val member : (('a, 'b) control_set * Atom.atom) -> bool + val find : (('a, 'b) control_set * Atom.atom) + -> {ctl : 'a control, info : 'b} option + val insert : (('a, 'b) control_set * 'a control * 'b) -> unit + val remove : (('a, 'b) control_set * Atom.atom) -> unit + val infoOf : ('a, 'b) control_set -> 'a control -> 'b option + + (* list the members; the list is ordered by priority. The listControls' + * function allows one to specify an obscurity level; controls with equal + * or higher obscurity are omitted from the list. + *) + val listControls : ('a, 'b) control_set -> {ctl : 'a control, info : 'b} list + val listControls' : (('a, 'b) control_set * int) + -> {ctl : 'a control, info : 'b} list + + (* apply a function to the controls in a set *) + val app : ({ctl : 'a control, info : 'b} -> unit) + -> ('a, 'b) control_set -> unit + + (* convert the controls in a set to string controls and create a new set + * for them. + *) + val stringControls : 'a Controls.value_cvt -> ('a, 'b) control_set + -> (string, 'b) control_set + + end diff --git a/smlnj-lib/Controls/control-set.sml b/smlnj-lib/Controls/control-set.sml new file mode 100644 index 0000000..47de85f --- /dev/null +++ b/smlnj-lib/Controls/control-set.sml @@ -0,0 +1,69 @@ +(* control-set.sml + * + * COPYRIGHT (c) 2015 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +structure ControlSet : CONTROL_SET = + struct + + structure Rep = ControlReps + structure ATbl = AtomTable + + type 'a control = 'a Controls.control + type ('a, 'b) control_set = ('a, 'b) ControlReps.control_set + + fun new () = ATbl.mkTable (16, Fail "control set") + + fun member (cset, name) = (case ATbl.find cset name + of NONE => false + | _ => true + (* end case *)) + + fun find (cset, name) = ATbl.find cset name + + fun insert (cset, ctl as Rep.Ctl{name, ...}, info) = + ATbl.insert cset (name, {ctl=ctl, info=info}) + + fun remove (cset, name) = (case ATbl.find cset name + of NONE => () + | _ => ignore (ATbl.remove cset name) + (* end case *)) + + fun infoOf (cset : ('a, 'b) control_set) (Rep.Ctl{name, ...}) = + Option.map #info (ATbl.find cset name) + + (* list the members; the list is ordered by descreasing priority. The + * listControls' function allows one to specify an obscurity level; controls + * with equal or higher obscurioty are omitted from the list. + *) + local + fun priorityOf {ctl=Rep.Ctl{priority, ...}, info} = priority + fun gt (a, b) = Rep.priorityGT(priorityOf a, priorityOf b) + in + fun listControls cset = ListMergeSort.sort gt (ATbl.listItems cset) + + fun listControls' (cset, obs) = let + fun add (item as {ctl=Rep.Ctl{obscurity, ...}, info}, l) = + if (obs > obscurity) + then item::l + else l + in + ListMergeSort.sort gt (ATbl.fold add [] cset) + end + end (* local *) + + fun app f cset = ATbl.app f cset + + (* convert the controls in a set to string controls and create a new set + * for them. + *) + fun stringControls cvt cset = let + val stringCtl = Controls.stringControl cvt + fun cvtCtl {ctl, info} = {ctl = stringCtl ctl, info = info} + in + ATbl.map cvtCtl cset + end + + end + diff --git a/smlnj-lib/Controls/control-util-sig.sml b/smlnj-lib/Controls/control-util-sig.sml new file mode 100644 index 0000000..3587c14 --- /dev/null +++ b/smlnj-lib/Controls/control-util-sig.sml @@ -0,0 +1,28 @@ +(* control-util-sig.sml + * + * COPYRIGHT (c) 2015 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +signature CONTROL_UTIL = + sig + + structure Cvt : sig + (* for primitive types, using respective {from,to}String functions: *) + val int : int Controls.value_cvt + val bool : bool Controls.value_cvt + val real : real Controls.value_cvt + + (* comma-separated tokens *) + val stringList : string list Controls.value_cvt + + (* for completeness' sake: *) + val string : string Controls.value_cvt + end + + structure EnvName : sig + (* convert lower case to upper case and #"-" to #"_", add prefix *) + val toUpper : string -> string -> string + end + + end diff --git a/smlnj-lib/Controls/control-util.sml b/smlnj-lib/Controls/control-util.sml new file mode 100644 index 0000000..6d75861 --- /dev/null +++ b/smlnj-lib/Controls/control-util.sml @@ -0,0 +1,48 @@ +(* control-util.sml + * + * COPYRIGHT (c) 2015 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +structure ControlUtil : CONTROL_UTIL = + struct + + structure Cvt = struct + val int = { tyName = "int", + fromString = Int.fromString, + toString = Int.toString } + val bool = { tyName = "bool", + fromString = fn s => (case String.map Char.toUpper s + of "FALSE" => SOME false + | "TRUE" => SOME true + | "NO" => SOME false + | "YES" => SOME true + | _ => NONE + (* end case *)), + toString = Bool.toString } + val real = { tyName = "real", + fromString = Real.fromString, + toString = Real.toString } + + val stringList = { + tyName = "string list", + fromString = SOME o String.fields (fn c => c = #","), + toString = ( + fn [] => "" + | [x] => x + | x::r => concat(x :: List.foldr (fn (y, l) => ","::y::l) [] r) + (* end fn *)) + } + + val string : string Controls.value_cvt = + { tyName = "string", + fromString = SOME, + toString = fn x => x } + end + + structure EnvName = struct + fun toUpper prefix s = + prefix ^ String.map (fn #"-" => #"_" | c => Char.toUpper c) s + end + + end diff --git a/smlnj-lib/Controls/controls-lib.cm b/smlnj-lib/Controls/controls-lib.cm new file mode 100644 index 0000000..c178042 --- /dev/null +++ b/smlnj-lib/Controls/controls-lib.cm @@ -0,0 +1,40 @@ +(* controls-lib.cm + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * An implementation of "controls" -- representing environment-configurable + * global state (flags, options, ...) of a program. + * + * author: Matthias Blume + *) + +Library + + signature CONTROLS + structure Controls + signature CONTROL_SET + structure ControlSet + signature CONTROL_REGISTRY + structure ControlRegistry + signature CONTROL_UTIL + structure ControlUtil + +is + +#if defined(NEW_CM) + $/basis.cm + $/smlnj-lib.cm +#else + ../Util/smlnj-lib.cm +#endif + + control-reps.sml + controls-sig.sml + controls.sml + control-registry-sig.sml + control-registry.sml + control-set-sig.sml + control-set.sml + control-util-sig.sml + control-util.sml diff --git a/smlnj-lib/Controls/controls-sig.sml b/smlnj-lib/Controls/controls-sig.sml new file mode 100644 index 0000000..59ace9d --- /dev/null +++ b/smlnj-lib/Controls/controls-sig.sml @@ -0,0 +1,88 @@ +(* controls-sig.sml + * + * COPYRIGHT (c) 2015 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * The Controls structure provides a uniform way to manage controls that can + * be set from the command line or from environment variables. + *) + +signature CONTROLS = + sig + + type priority = int list + type 'a control + + (* a converter for control values *) + type 'a value_cvt = { + tyName : string, + fromString : string -> 'a option, + toString : 'a -> string + } + + (* create a new control *) + val control : { + name : string, (* name of the control *) + pri : priority, (* control's priority *) + obscurity : int, (* control's detail level; higher means *) + (* more obscure *) + help : string, (* control's description *) + ctl : 'a ref (* ref cell holding control's state *) + } -> 'a control + + (* generate a control *) + val genControl : { + name : string, + pri : priority, + obscurity : int, + help : string, + default : 'a + } -> 'a control + + (* this exception is raised to communicate that there is a syntax error + * in a string representation of a control value. + *) + exception ValueSyntax of {tyName : string, ctlName : string, value : string} + + (* create a string control from a typed control *) + val stringControl : 'a value_cvt -> 'a control -> string control + + (* control operations *) + val name : 'a control -> string + val get : 'a control -> 'a + val set : 'a control * 'a -> unit + val set' : 'a control * 'a -> unit -> unit (* delayed; error checking in 1st stage *) + val help : 'a control -> string + val info : 'a control -> {priority : priority, obscurity : int, help : string} + + (* package a boolean control as a GetOpt option descriptor (NoArg) *) + val mkOptionFlag : { + ctl : bool control, (* the control that will be set by the command-line option *) + short : string, (* the short name for the option; either zero or one chars *) + long : string option (* an optional long-name for the option *) + } -> unit GetOpt.opt_descr + + (* package a string control as a GetOpt option descriptor with required argument (ReqArg) *) + val mkOptionReqArg : { + ctl : string control, (* the control that will be set by the command-line option *) + arg : string, (* the name for the argument, which is used in the usage message *) + short : string, (* the short name for the option; either zero or one chars *) + long : string option (* an optional long-name for the option *) + } -> unit GetOpt.opt_descr + + (* package a string control as a GetOpt option descriptor with an optional argument (OptArg) *) + val mkOption : { + ctl : string control, (* the control that will be set by the command-line option *) + arg : string, (* the name for the argument, which is used in the usage message *) + default : string, (* the default value for when no argument is given *) + short : string, (* the short name for the option; either zero or one chars *) + long : string option (* an optional long-name for the option *) + } -> unit GetOpt.opt_descr + + (* capture current value (1st stage) and restore it (2nd stage) *) + val save'restore : 'a control -> unit -> unit + + (* compare the priority of two controls *) + val compare : ('a control * 'a control) -> order + + end diff --git a/smlnj-lib/Controls/controls.sml b/smlnj-lib/Controls/controls.sml new file mode 100644 index 0000000..b7b0564 --- /dev/null +++ b/smlnj-lib/Controls/controls.sml @@ -0,0 +1,96 @@ +(* controls.sml + * + * COPYRIGHT (c) 2015 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +structure Controls : CONTROLS = + struct + + open ControlReps + + fun control {name, pri, obscurity, help, ctl} = Ctl{ + name = Atom.atom name, + get = fn () => !ctl, + set = fn SOME v => (fn () => ctl := v) + | NONE => let val v = !ctl in fn () => ctl := v end, + priority = pri, + obscurity = obscurity, + help = help + } + + fun genControl {name, pri, obscurity, help, default} = control { + name = name, pri = pri, obscurity = obscurity, help = help, + ctl = ref default + } + + (* this exception is raised to communicate that there is a syntax error + * in a string representation of a control value. + *) + exception ValueSyntax of {tyName : string, ctlName : string, value : string} + + fun stringControl {tyName, fromString, toString} (Ctl c) = let + val {name, get, set, priority, obscurity, help} = c + fun fromString' s = (case fromString s + of NONE => raise ValueSyntax{ + tyName = tyName, ctlName = Atom.toString name, value = s + } + | SOME v => v + (* end case *)) + in + Ctl{ + name = name, + get = toString o get, + set = set o Option.map fromString', + priority = priority, + obscurity = obscurity, + help = help + } + end + + fun name (Ctl{name, ...}) = Atom.toString name + fun get (Ctl{get, ...}) = get() + fun set (Ctl{set, ...}, v) = set (SOME v) () + fun set' (Ctl{set, ...}, v) = set (SOME v) + fun help (Ctl{help, ...}) = help + fun info (Ctl{priority, obscurity, help, ...}) = + { priority = priority, obscurity = obscurity, help = help } + + (* package a boolean control as a GetOpt option descriptor (NoArg). If the control + * is initialized to false command-line option will set it to true, whereas if the control + * is initialized to true, the the command-line option will set it to false. + *) + fun mkOptionFlag {ctl=Ctl{get, set, help, ...}, short, long} = { + short = short, + long = (case long of NONE => [] | SOME opt => [opt]), + desc = GetOpt.NoArg(set (SOME(not (get ())))), + help = help + } + + (* package a string control as a GetOpt option descriptor with required argument (ReqArg) *) + fun mkOptionReqArg {ctl=Ctl{set, help, ...}, arg, short, long} = { + short = short, + long = (case long of NONE => [] | SOME opt => [opt]), + desc = GetOpt.ReqArg(fn s => set (SOME s) (), arg), + help = help + } + + (* package a string control as a GetOpt option descriptor with an optional argument (OptArg) *) + fun mkOption {ctl=Ctl{set, help, ...}, arg, default, short, long} = { + short = short, + long = (case long of NONE => [] | SOME opt => [opt]), + desc = let + fun setFn NONE = set (SOME default) () + | setFn someVal = set someVal () + in + GetOpt.OptArg(setFn, arg) + end, + help = help + } + + fun save'restore (Ctl{set,...}) = set NONE + + fun compare (Ctl{priority=p1, ...}, Ctl{priority=p2, ...}) = + List.collate Int.compare (p1, p2) + + end diff --git a/smlnj-lib/Doc/Makefile.in b/smlnj-lib/Doc/Makefile.in new file mode 100644 index 0000000..e95d746 --- /dev/null +++ b/smlnj-lib/Doc/Makefile.in @@ -0,0 +1,62 @@ +# Makefile.in +# +# COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) +# All rights reserved. +# +# Makefile for SML/NJ Library documentation +# +# @configure_input@ +# + +SHELL = @SHELL@ +INSTALL = @INSTALL@ +@SET_MAKE@ + +.PHONY: help +help: + @echo "This Makefile supports the following targets:" + @echo " help -- print this message." + @echo " tools -- build documentation generation tools" + @echo " install-offline -- generate HTML documentation suitable for" + @echo " offline viewing in @OUT_DIR@." + @echo " install-web -- generate HTML documentation suitable for" + @echo " online viewing in @OUT_DIR@." + @echo " clean -- remove intermediate files" + @echo " distclean -- remove files generated during configuration" + @echo " and building; the resulting tree has the same" + @echo " files as the distribution." + @echo "The following additional targets are primarily for developers:" + @echo " devclean -- remove everything that is not part of the SVN" + @echo " repository." + +# target for generating the documentation for off-line viewing +# +.PHONY: install-offline +install-offline: + (cd src && make WEB=no install-offline) + +# target for generating the documentation for the SML/NJ website +# +.PHONY: install-web +install-web: + (cd src && make WEB=yes install) + +# target for building the documentation generation tools +# +.PHONY: tools +tools: + (cd tools && make install) + + +#################### Cleanup #################### + +CLEAN_SUBDIRS = src tools + +CLEAN_FILES = + +DISTCLEAN_FILES += Makefile config.status config.log \ + autom4te*.cache + +DEVCLEAN_FILES = configure + +include @MK_DIR@/clean-rules.gmk diff --git a/smlnj-lib/Doc/README.md b/smlnj-lib/Doc/README.md new file mode 100644 index 0000000..2f173aa --- /dev/null +++ b/smlnj-lib/Doc/README.md @@ -0,0 +1,69 @@ +This directory tree contains documentation for the +various libraries that comprise the **Standard ML of +New Jersey Library**. The documentation is written +using [AscciDoctor](https://asciidoctor.org). + +## Naming conventions + +### File naming conventions + +All source files have the `.adoc` file suffix. + +Each library `Foo`, with CM file `foo-lib.cm`, has +its documentation in the `Foo` directory and has +a root documentation file named `Foo/foo-lib.adoc`. + +Files that document modules (*i.e.*, signatures, +structures, or functors) begin with a prefix, which +is one of `sig-`, `str-`, or `fun-`, followed by the +module name and the `.adoc` file suffix. For example, +consider the the `ORD_SET` signature. It's documentation +will live in a file named `sig-ORD_SET.adoc`. That file +will document the signature as well as its various instances +(*e.g.*, `AtomSet`, `RedBlackSetFn`, *etc*.). + +### Specification naming conventions + +The description of a specification has the basic form + + kind ':' [ owner '.' ] name + +where `kind` is one of `str` (for substructures), `type` +for type and datatypes, `con` for data constructors, +`fld` for record fields, `exn` for exception constructors, +and `val` for value identifiers. The optional `owner` +(followed by a period) is used for data constructors and +record fields, where the owner is the type that they +are part of. + +Some examples: + + xref:#fld:point.x[x] + -- local reference to `x` field of point type + + xref:str-Vec3D.adoc#val:cross[cross] + -- cross-file reference to `cross` function in + the Vec3D structure. + +Note that we use `xref:` instead of `link:`, because the latter +is specific to HTML generation, whereas the former applies to +PDF as well. + +## External references + +One can refer to entities in the SML Basis Library using the +syntax + + {sml-basis-url}/file.html#anchor[ ... ] + +For example, we can cross reference the `Domain` exception in the +`General` structure as follows: + + {sml-basis-url}/general.html#SIG:GENERAL.Domain:EXN[`Domain`] + +## Odds and ends + +For inline code enclosed in back quotes, it is necessary to escape +the function type constructor (++\->++); otherwise, the entitie +`→` will be inserted in the HTML. + diff --git a/smlnj-lib/Doc/TODO b/smlnj-lib/Doc/TODO new file mode 100644 index 0000000..01954e8 --- /dev/null +++ b/smlnj-lib/Doc/TODO @@ -0,0 +1,41 @@ +HTML Library: + + needs documentation (also, we are going to change the name of the library to + HTML3). + +HTML4 Library: + + needs documentation + +PP Library: + + needs documentation (including a tutorial) + +Reactive Library: + + - str-Reactive.adoc + many functions still need documentation + +RegExp Library: + + - tutorial is incomplete + + - str-AwkSyntax.adoc + + - sig-REGEXP_ENGINE.adoc + +Util Library: + + - str-FormatComb.adoc + a few functions remain to be documented (seq, listg, optiong, and seqg). + + - str-RealOrderStats.adoc + + - str-Scan.adoc + + - str-UnivariateStats.adoc + +## General checking: + +Scan generated HTML for →, which is the Unicode arrow symbol (occurs when +we forget to write arrow as \->. diff --git a/smlnj-lib/Doc/config/check_smlnj.m4 b/smlnj-lib/Doc/config/check_smlnj.m4 new file mode 100644 index 0000000..4a9830c --- /dev/null +++ b/smlnj-lib/Doc/config/check_smlnj.m4 @@ -0,0 +1,113 @@ +dnl check_smlnj.m4 +dnl +dnl COPYRIGHT (c) 2022 The The SML/NJ Fellowship (http://smlnj.org/) +dnl +dnl @synopsis CHECK_SMLNJ(ACTION-IF-UNKNOWN) +dnl +dnl This macro attempts to figure out the location of the SML/NJ installation, +dnl as well as its version. It will check the user's path, as well as the +dnl standard locations of /usr/local/smlnj/bin and /usr/local/bin. +dnl You can override the version of SML/NJ used by defining either the SML_CMD +dnl or the SMLNJ_CMD variable in the environment (SMLNJ_CMD is for backwards +dnl compatibility; SML_CMD is prefered). +dnl This macro sets the following shell variables when it executes successfully: +dnl +dnl SML_CMD* -- the absolute path to the "sml" command +dnl SMLNJ_CMD* -- same as $SML_CMD; for backward compatibility +dnl SMLNJ_PATH* -- the +dnl SMLNJ_VERSION -- the version as "..", where +dnl the "." is optional. +dnl SMLNJ_MAJOR_VERSION -- major version number +dnl SMLNJ_MINOR_VERSION -- minor version number +dnl SMLNJ_PATCH_VERSION -- patch number (empty if there is no patch number) +dnl SMLNJ_ARCH* -- the host archectecture +dnl SMLNJ_OPSYS* -- the host operating system +dnl SMLNJ_HEAP_SUFFIX* -- the heap suffix +dnl +dnl * This macro also does an AC_SUBST for the variables marked with "*" +dnl +dnl @author John Reppy +dnl +AC_DEFUN([CHECK_SMLNJ], [ +dnl +dnl first we check for the existence of SML/NJ +dnl + if test z$SML_CMD != z ; then + SMLNJ_CMD=$SML_CMD + elif test z$SMLNJ_CMD != z ; then + SML_CMD=$SMLNJ_CMD + else + AC_PATH_PROGS(SML_CMD, sml, none, [$PATH:/usr/local/smlnj/bin:/usr/local/bin]) + SMLNJ_CMD=$SML_CMD + fi +dnl +dnl + if test $SML_CMD = none; then + $1 + else +dnl +dnl SML/NJ is installed, so determine its location +dnl + SMLNJ_PATH=`dirname $SML_CMD` +dnl +dnl Determine the version numbers +dnl + AC_MSG_CHECKING([version of SML/NJ]) + ac_check_smlnj_version=`$SML_CMD @SMLversion` + if test $? -eq 0 ; then +dnl +dnl normalize the ac_check_smlnj_version variable +dnl + case $ac_check_smlnj_version in + sml*) ac_check_smlnj_version=`echo $ac_check_smlnj_version | sed -e 's/sml //'` ;; + *) AC_MSG_ERROR([bogus SML/NJ version ($ac_check_smlnj_version) reported]);; + esac + SMLNJ_VERSION=$ac_check_smlnj_version + case $ac_check_smlnj_version in + 110) +dnl +dnl Versions 110.0.x report "sml 110" for the @SMLversion flag, so we need to +dnl do some more work. +dnl + banner=`echo "" | $SML_CMD | head -1` + [ac_check_smlnj_version=`echo $banner \ + | sed -e 's/.*Version \([0-9.]*\).*/\1/'`] + SMLNJ_VERSION=$ac_check_smlnj_version + ;; + *.*.*) ;; + *.*) ac_check_smlnj_version="$ac_check_smlnj_version".0 ;; + *) ac_check_smlnj_version="$ac_check_smlnj_version".0.0 ;; + esac + [SMLNJ_MAJOR_VERSION=`echo $ac_check_smlnj_version \ + | sed -e 's/\([0-9]*\).\([0-9]*\).\([0-9]*\)/\1/'`] + [SMLNJ_MINOR_VERSION=`echo $ac_check_smlnj_version \ + | sed -e 's/\([0-9]*\).\([0-9]*\).\([0-9]*\)/\2/'`] + [SMLNJ_PATCH_VERSION=`echo $ac_check_smlnj_version \ + | sed -e 's/\([0-9]*\).\([0-9]*\).\([0-9]*\)/\3/'`] + AC_MSG_RESULT([$SMLNJ_VERSION]) + AC_SUBST(SML_CMD) + AC_SUBST(SMLNJ_CMD) + AC_SUBST(SMLNJ_PATH) + else + $1 + fi +dnl +dnl Determine the heap suffix; we assume that this has the form - +dnl + AC_MSG_CHECKING([heap suffix of SML/NJ]) + ac_check_smlnj_suffix=`$SML_CMD @SMLsuffix` + if test $? -eq 0 ; then + SMLNJ_HEAP_SUFFIX=$ac_check_smlnj_suffix + [SMLNJ_ARCH=`echo $ac_check_smlnj_suffix \ + | sed -e 's/\([a-z0-9A-Z]*\)-\([a-z0-9A-Z]*\)/\1/'`] + [SMLNJ_OPSYS=`echo $ac_check_smlnj_suffix \ + | sed -e 's/\([a-z0-9A-Z]*\)-\([a-z0-9A-Z]*\)/\2/'`] + AC_MSG_RESULT([$SMLNJ_HEAP_SUFFIX]) + AC_SUBST(SMLNJ_HEAP_SUFFIX) + AC_SUBST(SMLNJ_ARCH) + AC_SUBST(SMLNJ_OPSYS) + else + $1 + fi + fi +])dnl diff --git a/smlnj-lib/Doc/config/config.guess b/smlnj-lib/Doc/config/config.guess new file mode 100644 index 0000000..187cd54 --- /dev/null +++ b/smlnj-lib/Doc/config/config.guess @@ -0,0 +1,1511 @@ +#! /bin/sh +# Attempt to guess a canonical system name. +# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, +# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +# 2011 Free Software Foundation, Inc. + +timestamp='2011-02-02' + +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA +# 02110-1301, USA. +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + + +# Originally written by Per Bothner. Please send patches (context +# diff format) to and include a ChangeLog +# entry. +# +# This script attempts to guess a canonical system name similar to +# config.sub. If it succeeds, it prints the system name on stdout, and +# exits with 0. Otherwise, it exits with 1. +# +# You can get the latest version of this script from: +# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD + +me=`echo "$0" | sed -e 's,.*/,,'` + +usage="\ +Usage: $0 [OPTION] + +Output the configuration name of the system \`$me' is run on. + +Operation modes: + -h, --help print this help, then exit + -t, --time-stamp print date of last modification, then exit + -v, --version print version number, then exit + +Report bugs and patches to ." + +version="\ +GNU config.guess ($timestamp) + +Originally written by Per Bothner. +Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, +2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free +Software Foundation, Inc. + +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." + +help=" +Try \`$me --help' for more information." + +# Parse command line +while test $# -gt 0 ; do + case $1 in + --time-stamp | --time* | -t ) + echo "$timestamp" ; exit ;; + --version | -v ) + echo "$version" ; exit ;; + --help | --h* | -h ) + echo "$usage"; exit ;; + -- ) # Stop option processing + shift; break ;; + - ) # Use stdin as input. + break ;; + -* ) + echo "$me: invalid option $1$help" >&2 + exit 1 ;; + * ) + break ;; + esac +done + +if test $# != 0; then + echo "$me: too many arguments$help" >&2 + exit 1 +fi + +trap 'exit 1' 1 2 15 + +# CC_FOR_BUILD -- compiler used by this script. Note that the use of a +# compiler to aid in system detection is discouraged as it requires +# temporary files to be created and, as you can see below, it is a +# headache to deal with in a portable fashion. + +# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still +# use `HOST_CC' if defined, but it is deprecated. + +# Portable tmp directory creation inspired by the Autoconf team. + +set_cc_for_build=' +trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ; +trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ; +: ${TMPDIR=/tmp} ; + { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } || + { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } || + { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } || + { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ; +dummy=$tmp/dummy ; +tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ; +case $CC_FOR_BUILD,$HOST_CC,$CC in + ,,) echo "int x;" > $dummy.c ; + for c in cc gcc c89 c99 ; do + if ($c -c -o $dummy.o $dummy.c) >/dev/null 2>&1 ; then + CC_FOR_BUILD="$c"; break ; + fi ; + done ; + if test x"$CC_FOR_BUILD" = x ; then + CC_FOR_BUILD=no_compiler_found ; + fi + ;; + ,,*) CC_FOR_BUILD=$CC ;; + ,*,*) CC_FOR_BUILD=$HOST_CC ;; +esac ; set_cc_for_build= ;' + +# This is needed to find uname on a Pyramid OSx when run in the BSD universe. +# (ghazi@noc.rutgers.edu 1994-08-24) +if (test -f /.attbin/uname) >/dev/null 2>&1 ; then + PATH=$PATH:/.attbin ; export PATH +fi + +UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown +UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown +UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown +UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown + +# Note: order is significant - the case branches are not exclusive. + +case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in + *:NetBSD:*:*) + # NetBSD (nbsd) targets should (where applicable) match one or + # more of the tupples: *-*-netbsdelf*, *-*-netbsdaout*, + # *-*-netbsdecoff* and *-*-netbsd*. For targets that recently + # switched to ELF, *-*-netbsd* would select the old + # object file format. This provides both forward + # compatibility and a consistent mechanism for selecting the + # object file format. + # + # Note: NetBSD doesn't particularly care about the vendor + # portion of the name. We always set it to "unknown". + sysctl="sysctl -n hw.machine_arch" + UNAME_MACHINE_ARCH=`(/sbin/$sysctl 2>/dev/null || \ + /usr/sbin/$sysctl 2>/dev/null || echo unknown)` + case "${UNAME_MACHINE_ARCH}" in + armeb) machine=armeb-unknown ;; + arm*) machine=arm-unknown ;; + sh3el) machine=shl-unknown ;; + sh3eb) machine=sh-unknown ;; + sh5el) machine=sh5le-unknown ;; + *) machine=${UNAME_MACHINE_ARCH}-unknown ;; + esac + # The Operating System including object format, if it has switched + # to ELF recently, or will in the future. + case "${UNAME_MACHINE_ARCH}" in + arm*|i386|m68k|ns32k|sh3*|sparc|vax) + eval $set_cc_for_build + if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ELF__ + then + # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout). + # Return netbsd for either. FIX? + os=netbsd + else + os=netbsdelf + fi + ;; + *) + os=netbsd + ;; + esac + # The OS release + # Debian GNU/NetBSD machines have a different userland, and + # thus, need a distinct triplet. However, they do not need + # kernel version information, so it can be replaced with a + # suitable tag, in the style of linux-gnu. + case "${UNAME_VERSION}" in + Debian*) + release='-gnu' + ;; + *) + release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'` + ;; + esac + # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM: + # contains redundant information, the shorter form: + # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. + echo "${machine}-${os}${release}" + exit ;; + *:OpenBSD:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'` + echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE} + exit ;; + *:ekkoBSD:*:*) + echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE} + exit ;; + *:SolidBSD:*:*) + echo ${UNAME_MACHINE}-unknown-solidbsd${UNAME_RELEASE} + exit ;; + macppc:MirBSD:*:*) + echo powerpc-unknown-mirbsd${UNAME_RELEASE} + exit ;; + *:MirBSD:*:*) + echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE} + exit ;; + alpha:OSF1:*:*) + case $UNAME_RELEASE in + *4.0) + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'` + ;; + *5.*) + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'` + ;; + esac + # According to Compaq, /usr/sbin/psrinfo has been available on + # OSF/1 and Tru64 systems produced since 1995. I hope that + # covers most systems running today. This code pipes the CPU + # types through head -n 1, so we only detect the type of CPU 0. + ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1` + case "$ALPHA_CPU_TYPE" in + "EV4 (21064)") + UNAME_MACHINE="alpha" ;; + "EV4.5 (21064)") + UNAME_MACHINE="alpha" ;; + "LCA4 (21066/21068)") + UNAME_MACHINE="alpha" ;; + "EV5 (21164)") + UNAME_MACHINE="alphaev5" ;; + "EV5.6 (21164A)") + UNAME_MACHINE="alphaev56" ;; + "EV5.6 (21164PC)") + UNAME_MACHINE="alphapca56" ;; + "EV5.7 (21164PC)") + UNAME_MACHINE="alphapca57" ;; + "EV6 (21264)") + UNAME_MACHINE="alphaev6" ;; + "EV6.7 (21264A)") + UNAME_MACHINE="alphaev67" ;; + "EV6.8CB (21264C)") + UNAME_MACHINE="alphaev68" ;; + "EV6.8AL (21264B)") + UNAME_MACHINE="alphaev68" ;; + "EV6.8CX (21264D)") + UNAME_MACHINE="alphaev68" ;; + "EV6.9A (21264/EV69A)") + UNAME_MACHINE="alphaev69" ;; + "EV7 (21364)") + UNAME_MACHINE="alphaev7" ;; + "EV7.9 (21364A)") + UNAME_MACHINE="alphaev79" ;; + esac + # A Pn.n version is a patched version. + # A Vn.n version is a released version. + # A Tn.n version is a released field test version. + # A Xn.n version is an unreleased experimental baselevel. + # 1.2 uses "1.2" for uname -r. + echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` + # Reset EXIT trap before exiting to avoid spurious non-zero exit code. + exitcode=$? + trap '' 0 + exit $exitcode ;; + Alpha\ *:Windows_NT*:*) + # How do we know it's Interix rather than the generic POSIX subsystem? + # Should we change UNAME_MACHINE based on the output of uname instead + # of the specific Alpha model? + echo alpha-pc-interix + exit ;; + 21064:Windows_NT:50:3) + echo alpha-dec-winnt3.5 + exit ;; + Amiga*:UNIX_System_V:4.0:*) + echo m68k-unknown-sysv4 + exit ;; + *:[Aa]miga[Oo][Ss]:*:*) + echo ${UNAME_MACHINE}-unknown-amigaos + exit ;; + *:[Mm]orph[Oo][Ss]:*:*) + echo ${UNAME_MACHINE}-unknown-morphos + exit ;; + *:OS/390:*:*) + echo i370-ibm-openedition + exit ;; + *:z/VM:*:*) + echo s390-ibm-zvmoe + exit ;; + *:OS400:*:*) + echo powerpc-ibm-os400 + exit ;; + arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) + echo arm-acorn-riscix${UNAME_RELEASE} + exit ;; + arm:riscos:*:*|arm:RISCOS:*:*) + echo arm-unknown-riscos + exit ;; + SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*) + echo hppa1.1-hitachi-hiuxmpp + exit ;; + Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*) + # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE. + if test "`(/bin/universe) 2>/dev/null`" = att ; then + echo pyramid-pyramid-sysv3 + else + echo pyramid-pyramid-bsd + fi + exit ;; + NILE*:*:*:dcosx) + echo pyramid-pyramid-svr4 + exit ;; + DRS?6000:unix:4.0:6*) + echo sparc-icl-nx6 + exit ;; + DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*) + case `/usr/bin/uname -p` in + sparc) echo sparc-icl-nx7; exit ;; + esac ;; + s390x:SunOS:*:*) + echo ${UNAME_MACHINE}-ibm-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + sun4H:SunOS:5.*:*) + echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*) + echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*) + echo i386-pc-auroraux${UNAME_RELEASE} + exit ;; + i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*) + eval $set_cc_for_build + SUN_ARCH="i386" + # If there is a compiler, see if it is configured for 64-bit objects. + # Note that the Sun cc does not turn __LP64__ into 1 like gcc does. + # This test works for both compilers. + if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then + if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \ + (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_64BIT_ARCH >/dev/null + then + SUN_ARCH="x86_64" + fi + fi + echo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + sun4*:SunOS:6*:*) + # According to config.sub, this is the proper way to canonicalize + # SunOS6. Hard to guess exactly what SunOS6 will be like, but + # it's likely to be more like Solaris than SunOS4. + echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + sun4*:SunOS:*:*) + case "`/usr/bin/arch -k`" in + Series*|S4*) + UNAME_RELEASE=`uname -v` + ;; + esac + # Japanese Language versions have a version number like `4.1.3-JL'. + echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'` + exit ;; + sun3*:SunOS:*:*) + echo m68k-sun-sunos${UNAME_RELEASE} + exit ;; + sun*:*:4.2BSD:*) + UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` + test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3 + case "`/bin/arch`" in + sun3) + echo m68k-sun-sunos${UNAME_RELEASE} + ;; + sun4) + echo sparc-sun-sunos${UNAME_RELEASE} + ;; + esac + exit ;; + aushp:SunOS:*:*) + echo sparc-auspex-sunos${UNAME_RELEASE} + exit ;; + # The situation for MiNT is a little confusing. The machine name + # can be virtually everything (everything which is not + # "atarist" or "atariste" at least should have a processor + # > m68000). The system name ranges from "MiNT" over "FreeMiNT" + # to the lowercase version "mint" (or "freemint"). Finally + # the system name "TOS" denotes a system which is actually not + # MiNT. But MiNT is downward compatible to TOS, so this should + # be no problem. + atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*) + echo m68k-atari-mint${UNAME_RELEASE} + exit ;; + atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*) + echo m68k-atari-mint${UNAME_RELEASE} + exit ;; + *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*) + echo m68k-atari-mint${UNAME_RELEASE} + exit ;; + milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*) + echo m68k-milan-mint${UNAME_RELEASE} + exit ;; + hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*) + echo m68k-hades-mint${UNAME_RELEASE} + exit ;; + *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*) + echo m68k-unknown-mint${UNAME_RELEASE} + exit ;; + m68k:machten:*:*) + echo m68k-apple-machten${UNAME_RELEASE} + exit ;; + powerpc:machten:*:*) + echo powerpc-apple-machten${UNAME_RELEASE} + exit ;; + RISC*:Mach:*:*) + echo mips-dec-mach_bsd4.3 + exit ;; + RISC*:ULTRIX:*:*) + echo mips-dec-ultrix${UNAME_RELEASE} + exit ;; + VAX*:ULTRIX*:*:*) + echo vax-dec-ultrix${UNAME_RELEASE} + exit ;; + 2020:CLIX:*:* | 2430:CLIX:*:*) + echo clipper-intergraph-clix${UNAME_RELEASE} + exit ;; + mips:*:*:UMIPS | mips:*:*:RISCos) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c +#ifdef __cplusplus +#include /* for printf() prototype */ + int main (int argc, char *argv[]) { +#else + int main (argc, argv) int argc; char *argv[]; { +#endif + #if defined (host_mips) && defined (MIPSEB) + #if defined (SYSTYPE_SYSV) + printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0); + #endif + #if defined (SYSTYPE_SVR4) + printf ("mips-mips-riscos%ssvr4\n", argv[1]); exit (0); + #endif + #if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD) + printf ("mips-mips-riscos%sbsd\n", argv[1]); exit (0); + #endif + #endif + exit (-1); + } +EOF + $CC_FOR_BUILD -o $dummy $dummy.c && + dummyarg=`echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` && + SYSTEM_NAME=`$dummy $dummyarg` && + { echo "$SYSTEM_NAME"; exit; } + echo mips-mips-riscos${UNAME_RELEASE} + exit ;; + Motorola:PowerMAX_OS:*:*) + echo powerpc-motorola-powermax + exit ;; + Motorola:*:4.3:PL8-*) + echo powerpc-harris-powermax + exit ;; + Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*) + echo powerpc-harris-powermax + exit ;; + Night_Hawk:Power_UNIX:*:*) + echo powerpc-harris-powerunix + exit ;; + m88k:CX/UX:7*:*) + echo m88k-harris-cxux7 + exit ;; + m88k:*:4*:R4*) + echo m88k-motorola-sysv4 + exit ;; + m88k:*:3*:R3*) + echo m88k-motorola-sysv3 + exit ;; + AViiON:dgux:*:*) + # DG/UX returns AViiON for all architectures + UNAME_PROCESSOR=`/usr/bin/uname -p` + if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ] + then + if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \ + [ ${TARGET_BINARY_INTERFACE}x = x ] + then + echo m88k-dg-dgux${UNAME_RELEASE} + else + echo m88k-dg-dguxbcs${UNAME_RELEASE} + fi + else + echo i586-dg-dgux${UNAME_RELEASE} + fi + exit ;; + M88*:DolphinOS:*:*) # DolphinOS (SVR3) + echo m88k-dolphin-sysv3 + exit ;; + M88*:*:R3*:*) + # Delta 88k system running SVR3 + echo m88k-motorola-sysv3 + exit ;; + XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3) + echo m88k-tektronix-sysv3 + exit ;; + Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD) + echo m68k-tektronix-bsd + exit ;; + *:IRIX*:*:*) + echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'` + exit ;; + ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX. + echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id + exit ;; # Note that: echo "'`uname -s`'" gives 'AIX ' + i*86:AIX:*:*) + echo i386-ibm-aix + exit ;; + ia64:AIX:*:*) + if [ -x /usr/bin/oslevel ] ; then + IBM_REV=`/usr/bin/oslevel` + else + IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} + fi + echo ${UNAME_MACHINE}-ibm-aix${IBM_REV} + exit ;; + *:AIX:2:3) + if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #include + + main() + { + if (!__power_pc()) + exit(1); + puts("powerpc-ibm-aix3.2.5"); + exit(0); + } +EOF + if $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` + then + echo "$SYSTEM_NAME" + else + echo rs6000-ibm-aix3.2.5 + fi + elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then + echo rs6000-ibm-aix3.2.4 + else + echo rs6000-ibm-aix3.2 + fi + exit ;; + *:AIX:*:[4567]) + IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'` + if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then + IBM_ARCH=rs6000 + else + IBM_ARCH=powerpc + fi + if [ -x /usr/bin/oslevel ] ; then + IBM_REV=`/usr/bin/oslevel` + else + IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} + fi + echo ${IBM_ARCH}-ibm-aix${IBM_REV} + exit ;; + *:AIX:*:*) + echo rs6000-ibm-aix + exit ;; + ibmrt:4.4BSD:*|romp-ibm:BSD:*) + echo romp-ibm-bsd4.4 + exit ;; + ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and + echo romp-ibm-bsd${UNAME_RELEASE} # 4.3 with uname added to + exit ;; # report: romp-ibm BSD 4.3 + *:BOSX:*:*) + echo rs6000-bull-bosx + exit ;; + DPX/2?00:B.O.S.:*:*) + echo m68k-bull-sysv3 + exit ;; + 9000/[34]??:4.3bsd:1.*:*) + echo m68k-hp-bsd + exit ;; + hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*) + echo m68k-hp-bsd4.4 + exit ;; + 9000/[34678]??:HP-UX:*:*) + HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` + case "${UNAME_MACHINE}" in + 9000/31? ) HP_ARCH=m68000 ;; + 9000/[34]?? ) HP_ARCH=m68k ;; + 9000/[678][0-9][0-9]) + if [ -x /usr/bin/getconf ]; then + sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` + sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` + case "${sc_cpu_version}" in + 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0 + 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1 + 532) # CPU_PA_RISC2_0 + case "${sc_kernel_bits}" in + 32) HP_ARCH="hppa2.0n" ;; + 64) HP_ARCH="hppa2.0w" ;; + '') HP_ARCH="hppa2.0" ;; # HP-UX 10.20 + esac ;; + esac + fi + if [ "${HP_ARCH}" = "" ]; then + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + + #define _HPUX_SOURCE + #include + #include + + int main () + { + #if defined(_SC_KERNEL_BITS) + long bits = sysconf(_SC_KERNEL_BITS); + #endif + long cpu = sysconf (_SC_CPU_VERSION); + + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1"); break; + case CPU_PA_RISC2_0: + #if defined(_SC_KERNEL_BITS) + switch (bits) + { + case 64: puts ("hppa2.0w"); break; + case 32: puts ("hppa2.0n"); break; + default: puts ("hppa2.0"); break; + } break; + #else /* !defined(_SC_KERNEL_BITS) */ + puts ("hppa2.0"); break; + #endif + default: puts ("hppa1.0"); break; + } + exit (0); + } +EOF + (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy` + test -z "$HP_ARCH" && HP_ARCH=hppa + fi ;; + esac + if [ ${HP_ARCH} = "hppa2.0w" ] + then + eval $set_cc_for_build + + # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating + # 32-bit code. hppa64-hp-hpux* has the same kernel and a compiler + # generating 64-bit code. GNU and HP use different nomenclature: + # + # $ CC_FOR_BUILD=cc ./config.guess + # => hppa2.0w-hp-hpux11.23 + # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess + # => hppa64-hp-hpux11.23 + + if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | + grep -q __LP64__ + then + HP_ARCH="hppa2.0w" + else + HP_ARCH="hppa64" + fi + fi + echo ${HP_ARCH}-hp-hpux${HPUX_REV} + exit ;; + ia64:HP-UX:*:*) + HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` + echo ia64-hp-hpux${HPUX_REV} + exit ;; + 3050*:HI-UX:*:*) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #include + int + main () + { + long cpu = sysconf (_SC_CPU_VERSION); + /* The order matters, because CPU_IS_HP_MC68K erroneously returns + true for CPU_PA_RISC1_0. CPU_IS_PA_RISC returns correct + results, however. */ + if (CPU_IS_PA_RISC (cpu)) + { + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break; + case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break; + default: puts ("hppa-hitachi-hiuxwe2"); break; + } + } + else if (CPU_IS_HP_MC68K (cpu)) + puts ("m68k-hitachi-hiuxwe2"); + else puts ("unknown-hitachi-hiuxwe2"); + exit (0); + } +EOF + $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` && + { echo "$SYSTEM_NAME"; exit; } + echo unknown-hitachi-hiuxwe2 + exit ;; + 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* ) + echo hppa1.1-hp-bsd + exit ;; + 9000/8??:4.3bsd:*:*) + echo hppa1.0-hp-bsd + exit ;; + *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*) + echo hppa1.0-hp-mpeix + exit ;; + hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* ) + echo hppa1.1-hp-osf + exit ;; + hp8??:OSF1:*:*) + echo hppa1.0-hp-osf + exit ;; + i*86:OSF1:*:*) + if [ -x /usr/sbin/sysversion ] ; then + echo ${UNAME_MACHINE}-unknown-osf1mk + else + echo ${UNAME_MACHINE}-unknown-osf1 + fi + exit ;; + parisc*:Lites*:*:*) + echo hppa1.1-hp-lites + exit ;; + C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*) + echo c1-convex-bsd + exit ;; + C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*) + if getsysinfo -f scalar_acc + then echo c32-convex-bsd + else echo c2-convex-bsd + fi + exit ;; + C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*) + echo c34-convex-bsd + exit ;; + C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*) + echo c38-convex-bsd + exit ;; + C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*) + echo c4-convex-bsd + exit ;; + CRAY*Y-MP:*:*:*) + echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*[A-Z]90:*:*:*) + echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \ + | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \ + -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \ + -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*TS:*:*:*) + echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*T3E:*:*:*) + echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*SV1:*:*:*) + echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + *:UNICOS/mp:*:*) + echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) + FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` + FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` + FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` + echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + exit ;; + 5000:UNIX_System_V:4.*:*) + FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` + FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'` + echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + exit ;; + i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) + echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE} + exit ;; + sparc*:BSD/OS:*:*) + echo sparc-unknown-bsdi${UNAME_RELEASE} + exit ;; + *:BSD/OS:*:*) + echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE} + exit ;; + *:FreeBSD:*:*) + case ${UNAME_MACHINE} in + pc98) + echo i386-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; + amd64) + echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; + *) + echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; + esac + exit ;; + i*:CYGWIN*:*) + echo ${UNAME_MACHINE}-pc-cygwin + exit ;; + *:MINGW*:*) + echo ${UNAME_MACHINE}-pc-mingw32 + exit ;; + i*:windows32*:*) + # uname -m includes "-pc" on this system. + echo ${UNAME_MACHINE}-mingw32 + exit ;; + i*:PW*:*) + echo ${UNAME_MACHINE}-pc-pw32 + exit ;; + *:Interix*:*) + case ${UNAME_MACHINE} in + x86) + echo i586-pc-interix${UNAME_RELEASE} + exit ;; + authenticamd | genuineintel | EM64T) + echo x86_64-unknown-interix${UNAME_RELEASE} + exit ;; + IA64) + echo ia64-unknown-interix${UNAME_RELEASE} + exit ;; + esac ;; + [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*) + echo i${UNAME_MACHINE}-pc-mks + exit ;; + 8664:Windows_NT:*) + echo x86_64-pc-mks + exit ;; + i*:Windows_NT*:* | Pentium*:Windows_NT*:*) + # How do we know it's Interix rather than the generic POSIX subsystem? + # It also conflicts with pre-2.0 versions of AT&T UWIN. Should we + # UNAME_MACHINE based on the output of uname instead of i386? + echo i586-pc-interix + exit ;; + i*:UWIN*:*) + echo ${UNAME_MACHINE}-pc-uwin + exit ;; + amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*) + echo x86_64-unknown-cygwin + exit ;; + p*:CYGWIN*:*) + echo powerpcle-unknown-cygwin + exit ;; + prep*:SunOS:5.*:*) + echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + *:GNU:*:*) + # the GNU system + echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'` + exit ;; + *:GNU/*:*:*) + # other systems with GNU libc and userland + echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu + exit ;; + i*86:Minix:*:*) + echo ${UNAME_MACHINE}-pc-minix + exit ;; + alpha:Linux:*:*) + case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in + EV5) UNAME_MACHINE=alphaev5 ;; + EV56) UNAME_MACHINE=alphaev56 ;; + PCA56) UNAME_MACHINE=alphapca56 ;; + PCA57) UNAME_MACHINE=alphapca56 ;; + EV6) UNAME_MACHINE=alphaev6 ;; + EV67) UNAME_MACHINE=alphaev67 ;; + EV68*) UNAME_MACHINE=alphaev68 ;; + esac + objdump --private-headers /bin/sh | grep -q ld.so.1 + if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi + echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} + exit ;; + arm*:Linux:*:*) + eval $set_cc_for_build + if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_EABI__ + then + echo ${UNAME_MACHINE}-unknown-linux-gnu + else + echo ${UNAME_MACHINE}-unknown-linux-gnueabi + fi + exit ;; + avr32*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + cris:Linux:*:*) + echo cris-axis-linux-gnu + exit ;; + crisv32:Linux:*:*) + echo crisv32-axis-linux-gnu + exit ;; + frv:Linux:*:*) + echo frv-unknown-linux-gnu + exit ;; + i*86:Linux:*:*) + LIBC=gnu + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #ifdef __dietlibc__ + LIBC=dietlibc + #endif +EOF + eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'` + echo "${UNAME_MACHINE}-pc-linux-${LIBC}" + exit ;; + ia64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + m32r*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + m68*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + mips:Linux:*:* | mips64:Linux:*:*) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #undef CPU + #undef ${UNAME_MACHINE} + #undef ${UNAME_MACHINE}el + #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) + CPU=${UNAME_MACHINE}el + #else + #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) + CPU=${UNAME_MACHINE} + #else + CPU= + #endif + #endif +EOF + eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'` + test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; } + ;; + or32:Linux:*:*) + echo or32-unknown-linux-gnu + exit ;; + padre:Linux:*:*) + echo sparc-unknown-linux-gnu + exit ;; + parisc64:Linux:*:* | hppa64:Linux:*:*) + echo hppa64-unknown-linux-gnu + exit ;; + parisc:Linux:*:* | hppa:Linux:*:*) + # Look for CPU level + case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in + PA7*) echo hppa1.1-unknown-linux-gnu ;; + PA8*) echo hppa2.0-unknown-linux-gnu ;; + *) echo hppa-unknown-linux-gnu ;; + esac + exit ;; + ppc64:Linux:*:*) + echo powerpc64-unknown-linux-gnu + exit ;; + ppc:Linux:*:*) + echo powerpc-unknown-linux-gnu + exit ;; + s390:Linux:*:* | s390x:Linux:*:*) + echo ${UNAME_MACHINE}-ibm-linux + exit ;; + sh64*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + sh*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + sparc:Linux:*:* | sparc64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + tile*:Linux:*:*) + echo ${UNAME_MACHINE}-tilera-linux-gnu + exit ;; + vax:Linux:*:*) + echo ${UNAME_MACHINE}-dec-linux-gnu + exit ;; + x86_64:Linux:*:*) + echo x86_64-unknown-linux-gnu + exit ;; + xtensa*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + i*86:DYNIX/ptx:4*:*) + # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. + # earlier versions are messed up and put the nodename in both + # sysname and nodename. + echo i386-sequent-sysv4 + exit ;; + i*86:UNIX_SV:4.2MP:2.*) + # Unixware is an offshoot of SVR4, but it has its own version + # number series starting with 2... + # I am not positive that other SVR4 systems won't match this, + # I just have to hope. -- rms. + # Use sysv4.2uw... so that sysv4* matches it. + echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION} + exit ;; + i*86:OS/2:*:*) + # If we were able to find `uname', then EMX Unix compatibility + # is probably installed. + echo ${UNAME_MACHINE}-pc-os2-emx + exit ;; + i*86:XTS-300:*:STOP) + echo ${UNAME_MACHINE}-unknown-stop + exit ;; + i*86:atheos:*:*) + echo ${UNAME_MACHINE}-unknown-atheos + exit ;; + i*86:syllable:*:*) + echo ${UNAME_MACHINE}-pc-syllable + exit ;; + i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*) + echo i386-unknown-lynxos${UNAME_RELEASE} + exit ;; + i*86:*DOS:*:*) + echo ${UNAME_MACHINE}-pc-msdosdjgpp + exit ;; + i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*) + UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'` + if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then + echo ${UNAME_MACHINE}-univel-sysv${UNAME_REL} + else + echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL} + fi + exit ;; + i*86:*:5:[678]*) + # UnixWare 7.x, OpenUNIX and OpenServer 6. + case `/bin/uname -X | grep "^Machine"` in + *486*) UNAME_MACHINE=i486 ;; + *Pentium) UNAME_MACHINE=i586 ;; + *Pent*|*Celeron) UNAME_MACHINE=i686 ;; + esac + echo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION} + exit ;; + i*86:*:3.2:*) + if test -f /usr/options/cb.name; then + UNAME_REL=`sed -n 's/.*Version //p' /dev/null >/dev/null ; then + UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')` + (/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486 + (/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \ + && UNAME_MACHINE=i586 + (/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \ + && UNAME_MACHINE=i686 + (/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \ + && UNAME_MACHINE=i686 + echo ${UNAME_MACHINE}-pc-sco$UNAME_REL + else + echo ${UNAME_MACHINE}-pc-sysv32 + fi + exit ;; + pc:*:*:*) + # Left here for compatibility: + # uname -m prints for DJGPP always 'pc', but it prints nothing about + # the processor, so we play safe by assuming i586. + # Note: whatever this is, it MUST be the same as what config.sub + # prints for the "djgpp" host, or else GDB configury will decide that + # this is a cross-build. + echo i586-pc-msdosdjgpp + exit ;; + Intel:Mach:3*:*) + echo i386-pc-mach3 + exit ;; + paragon:*:*:*) + echo i860-intel-osf1 + exit ;; + i860:*:4.*:*) # i860-SVR4 + if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then + echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4 + else # Add other i860-SVR4 vendors below as they are discovered. + echo i860-unknown-sysv${UNAME_RELEASE} # Unknown i860-SVR4 + fi + exit ;; + mini*:CTIX:SYS*5:*) + # "miniframe" + echo m68010-convergent-sysv + exit ;; + mc68k:UNIX:SYSTEM5:3.51m) + echo m68k-convergent-sysv + exit ;; + M680?0:D-NIX:5.3:*) + echo m68k-diab-dnix + exit ;; + M68*:*:R3V[5678]*:*) + test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;; + 3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0) + OS_REL='' + test -r /etc/.relid \ + && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4.3${OS_REL}; exit; } + /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ + && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;; + 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*) + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4; exit; } ;; + NCR*:*:4.2:* | MPRAS*:*:4.2:*) + OS_REL='.3' + test -r /etc/.relid \ + && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4.3${OS_REL}; exit; } + /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ + && { echo i586-ncr-sysv4.3${OS_REL}; exit; } + /bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \ + && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;; + m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*) + echo m68k-unknown-lynxos${UNAME_RELEASE} + exit ;; + mc68030:UNIX_System_V:4.*:*) + echo m68k-atari-sysv4 + exit ;; + TSUNAMI:LynxOS:2.*:*) + echo sparc-unknown-lynxos${UNAME_RELEASE} + exit ;; + rs6000:LynxOS:2.*:*) + echo rs6000-unknown-lynxos${UNAME_RELEASE} + exit ;; + PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*) + echo powerpc-unknown-lynxos${UNAME_RELEASE} + exit ;; + SM[BE]S:UNIX_SV:*:*) + echo mips-dde-sysv${UNAME_RELEASE} + exit ;; + RM*:ReliantUNIX-*:*:*) + echo mips-sni-sysv4 + exit ;; + RM*:SINIX-*:*:*) + echo mips-sni-sysv4 + exit ;; + *:SINIX-*:*:*) + if uname -p 2>/dev/null >/dev/null ; then + UNAME_MACHINE=`(uname -p) 2>/dev/null` + echo ${UNAME_MACHINE}-sni-sysv4 + else + echo ns32k-sni-sysv + fi + exit ;; + PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort + # says + echo i586-unisys-sysv4 + exit ;; + *:UNIX_System_V:4*:FTX*) + # From Gerald Hewes . + # How about differentiating between stratus architectures? -djm + echo hppa1.1-stratus-sysv4 + exit ;; + *:*:*:FTX*) + # From seanf@swdc.stratus.com. + echo i860-stratus-sysv4 + exit ;; + i*86:VOS:*:*) + # From Paul.Green@stratus.com. + echo ${UNAME_MACHINE}-stratus-vos + exit ;; + *:VOS:*:*) + # From Paul.Green@stratus.com. + echo hppa1.1-stratus-vos + exit ;; + mc68*:A/UX:*:*) + echo m68k-apple-aux${UNAME_RELEASE} + exit ;; + news*:NEWS-OS:6*:*) + echo mips-sony-newsos6 + exit ;; + R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*) + if [ -d /usr/nec ]; then + echo mips-nec-sysv${UNAME_RELEASE} + else + echo mips-unknown-sysv${UNAME_RELEASE} + fi + exit ;; + BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only. + echo powerpc-be-beos + exit ;; + BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only. + echo powerpc-apple-beos + exit ;; + BePC:BeOS:*:*) # BeOS running on Intel PC compatible. + echo i586-pc-beos + exit ;; + BePC:Haiku:*:*) # Haiku running on Intel PC compatible. + echo i586-pc-haiku + exit ;; + SX-4:SUPER-UX:*:*) + echo sx4-nec-superux${UNAME_RELEASE} + exit ;; + SX-5:SUPER-UX:*:*) + echo sx5-nec-superux${UNAME_RELEASE} + exit ;; + SX-6:SUPER-UX:*:*) + echo sx6-nec-superux${UNAME_RELEASE} + exit ;; + SX-7:SUPER-UX:*:*) + echo sx7-nec-superux${UNAME_RELEASE} + exit ;; + SX-8:SUPER-UX:*:*) + echo sx8-nec-superux${UNAME_RELEASE} + exit ;; + SX-8R:SUPER-UX:*:*) + echo sx8r-nec-superux${UNAME_RELEASE} + exit ;; + Power*:Rhapsody:*:*) + echo powerpc-apple-rhapsody${UNAME_RELEASE} + exit ;; + *:Rhapsody:*:*) + echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE} + exit ;; + *:Darwin:*:*) + UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown + case $UNAME_PROCESSOR in + i386) + eval $set_cc_for_build + if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then + if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ + (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_64BIT_ARCH >/dev/null + then + UNAME_PROCESSOR="x86_64" + fi + fi ;; + unknown) UNAME_PROCESSOR=powerpc ;; + esac + echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE} + exit ;; + *:procnto*:*:* | *:QNX:[0123456789]*:*) + UNAME_PROCESSOR=`uname -p` + if test "$UNAME_PROCESSOR" = "x86"; then + UNAME_PROCESSOR=i386 + UNAME_MACHINE=pc + fi + echo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE} + exit ;; + *:QNX:*:4*) + echo i386-pc-qnx + exit ;; + NEO-?:NONSTOP_KERNEL:*:*) + echo neo-tandem-nsk${UNAME_RELEASE} + exit ;; + NSE-?:NONSTOP_KERNEL:*:*) + echo nse-tandem-nsk${UNAME_RELEASE} + exit ;; + NSR-?:NONSTOP_KERNEL:*:*) + echo nsr-tandem-nsk${UNAME_RELEASE} + exit ;; + *:NonStop-UX:*:*) + echo mips-compaq-nonstopux + exit ;; + BS2000:POSIX*:*:*) + echo bs2000-siemens-sysv + exit ;; + DS/*:UNIX_System_V:*:*) + echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE} + exit ;; + *:Plan9:*:*) + # "uname -m" is not consistent, so use $cputype instead. 386 + # is converted to i386 for consistency with other x86 + # operating systems. + if test "$cputype" = "386"; then + UNAME_MACHINE=i386 + else + UNAME_MACHINE="$cputype" + fi + echo ${UNAME_MACHINE}-unknown-plan9 + exit ;; + *:TOPS-10:*:*) + echo pdp10-unknown-tops10 + exit ;; + *:TENEX:*:*) + echo pdp10-unknown-tenex + exit ;; + KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*) + echo pdp10-dec-tops20 + exit ;; + XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*) + echo pdp10-xkl-tops20 + exit ;; + *:TOPS-20:*:*) + echo pdp10-unknown-tops20 + exit ;; + *:ITS:*:*) + echo pdp10-unknown-its + exit ;; + SEI:*:*:SEIUX) + echo mips-sei-seiux${UNAME_RELEASE} + exit ;; + *:DragonFly:*:*) + echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` + exit ;; + *:*VMS:*:*) + UNAME_MACHINE=`(uname -p) 2>/dev/null` + case "${UNAME_MACHINE}" in + A*) echo alpha-dec-vms ; exit ;; + I*) echo ia64-dec-vms ; exit ;; + V*) echo vax-dec-vms ; exit ;; + esac ;; + *:XENIX:*:SysV) + echo i386-pc-xenix + exit ;; + i*86:skyos:*:*) + echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//' + exit ;; + i*86:rdos:*:*) + echo ${UNAME_MACHINE}-pc-rdos + exit ;; + i*86:AROS:*:*) + echo ${UNAME_MACHINE}-pc-aros + exit ;; +esac + +#echo '(No uname command or uname output not recognized.)' 1>&2 +#echo "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" 1>&2 + +eval $set_cc_for_build +cat >$dummy.c < +# include +#endif +main () +{ +#if defined (sony) +#if defined (MIPSEB) + /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed, + I don't know.... */ + printf ("mips-sony-bsd\n"); exit (0); +#else +#include + printf ("m68k-sony-newsos%s\n", +#ifdef NEWSOS4 + "4" +#else + "" +#endif + ); exit (0); +#endif +#endif + +#if defined (__arm) && defined (__acorn) && defined (__unix) + printf ("arm-acorn-riscix\n"); exit (0); +#endif + +#if defined (hp300) && !defined (hpux) + printf ("m68k-hp-bsd\n"); exit (0); +#endif + +#if defined (NeXT) +#if !defined (__ARCHITECTURE__) +#define __ARCHITECTURE__ "m68k" +#endif + int version; + version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`; + if (version < 4) + printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version); + else + printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version); + exit (0); +#endif + +#if defined (MULTIMAX) || defined (n16) +#if defined (UMAXV) + printf ("ns32k-encore-sysv\n"); exit (0); +#else +#if defined (CMU) + printf ("ns32k-encore-mach\n"); exit (0); +#else + printf ("ns32k-encore-bsd\n"); exit (0); +#endif +#endif +#endif + +#if defined (__386BSD__) + printf ("i386-pc-bsd\n"); exit (0); +#endif + +#if defined (sequent) +#if defined (i386) + printf ("i386-sequent-dynix\n"); exit (0); +#endif +#if defined (ns32000) + printf ("ns32k-sequent-dynix\n"); exit (0); +#endif +#endif + +#if defined (_SEQUENT_) + struct utsname un; + + uname(&un); + + if (strncmp(un.version, "V2", 2) == 0) { + printf ("i386-sequent-ptx2\n"); exit (0); + } + if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */ + printf ("i386-sequent-ptx1\n"); exit (0); + } + printf ("i386-sequent-ptx\n"); exit (0); + +#endif + +#if defined (vax) +# if !defined (ultrix) +# include +# if defined (BSD) +# if BSD == 43 + printf ("vax-dec-bsd4.3\n"); exit (0); +# else +# if BSD == 199006 + printf ("vax-dec-bsd4.3reno\n"); exit (0); +# else + printf ("vax-dec-bsd\n"); exit (0); +# endif +# endif +# else + printf ("vax-dec-bsd\n"); exit (0); +# endif +# else + printf ("vax-dec-ultrix\n"); exit (0); +# endif +#endif + +#if defined (alliant) && defined (i860) + printf ("i860-alliant-bsd\n"); exit (0); +#endif + + exit (1); +} +EOF + +$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && SYSTEM_NAME=`$dummy` && + { echo "$SYSTEM_NAME"; exit; } + +# Apollos put the system type in the environment. + +test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit; } + +# Convex versions that predate uname can use getsysinfo(1) + +if [ -x /usr/convex/getsysinfo ] +then + case `getsysinfo -f cpu_type` in + c1*) + echo c1-convex-bsd + exit ;; + c2*) + if getsysinfo -f scalar_acc + then echo c32-convex-bsd + else echo c2-convex-bsd + fi + exit ;; + c34*) + echo c34-convex-bsd + exit ;; + c38*) + echo c38-convex-bsd + exit ;; + c4*) + echo c4-convex-bsd + exit ;; + esac +fi + +cat >&2 < in order to provide the needed +information to handle your system. + +config.guess timestamp = $timestamp + +uname -m = `(uname -m) 2>/dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null` + +hostinfo = `(hostinfo) 2>/dev/null` +/bin/universe = `(/bin/universe) 2>/dev/null` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null` +/bin/arch = `(/bin/arch) 2>/dev/null` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null` + +UNAME_MACHINE = ${UNAME_MACHINE} +UNAME_RELEASE = ${UNAME_RELEASE} +UNAME_SYSTEM = ${UNAME_SYSTEM} +UNAME_VERSION = ${UNAME_VERSION} +EOF + +exit 1 + +# Local variables: +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "timestamp='" +# time-stamp-format: "%:y-%02m-%02d" +# time-stamp-end: "'" +# End: diff --git a/smlnj-lib/Doc/config/config.sub b/smlnj-lib/Doc/config/config.sub new file mode 100644 index 0000000..0ef29b0 --- /dev/null +++ b/smlnj-lib/Doc/config/config.sub @@ -0,0 +1,1739 @@ +#! /bin/sh +# Configuration validation subroutine script. +# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, +# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +# 2011 Free Software Foundation, Inc. + +timestamp='2011-02-24' + +# This file is (in principle) common to ALL GNU software. +# The presence of a machine in this file suggests that SOME GNU software +# can handle that machine. It does not imply ALL GNU software can. +# +# This file is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA +# 02110-1301, USA. +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + + +# Please send patches to . Submit a context +# diff and a properly formatted GNU ChangeLog entry. +# +# Configuration subroutine to validate and canonicalize a configuration type. +# Supply the specified configuration type as an argument. +# If it is invalid, we print an error message on stderr and exit with code 1. +# Otherwise, we print the canonical config type on stdout and succeed. + +# You can get the latest version of this script from: +# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD + +# This file is supposed to be the same for all GNU packages +# and recognize all the CPU types, system types and aliases +# that are meaningful with *any* GNU software. +# Each package is responsible for reporting which valid configurations +# it does not support. The user should be able to distinguish +# a failure to support a valid configuration from a meaningless +# configuration. + +# The goal of this file is to map all the various variations of a given +# machine specification into a single specification in the form: +# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM +# or in some cases, the newer four-part form: +# CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM +# It is wrong to echo any other type of specification. + +me=`echo "$0" | sed -e 's,.*/,,'` + +usage="\ +Usage: $0 [OPTION] CPU-MFR-OPSYS + $0 [OPTION] ALIAS + +Canonicalize a configuration name. + +Operation modes: + -h, --help print this help, then exit + -t, --time-stamp print date of last modification, then exit + -v, --version print version number, then exit + +Report bugs and patches to ." + +version="\ +GNU config.sub ($timestamp) + +Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, +2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free +Software Foundation, Inc. + +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." + +help=" +Try \`$me --help' for more information." + +# Parse command line +while test $# -gt 0 ; do + case $1 in + --time-stamp | --time* | -t ) + echo "$timestamp" ; exit ;; + --version | -v ) + echo "$version" ; exit ;; + --help | --h* | -h ) + echo "$usage"; exit ;; + -- ) # Stop option processing + shift; break ;; + - ) # Use stdin as input. + break ;; + -* ) + echo "$me: invalid option $1$help" + exit 1 ;; + + *local*) + # First pass through any local machine types. + echo $1 + exit ;; + + * ) + break ;; + esac +done + +case $# in + 0) echo "$me: missing argument$help" >&2 + exit 1;; + 1) ;; + *) echo "$me: too many arguments$help" >&2 + exit 1;; +esac + +# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any). +# Here we must recognize all the valid KERNEL-OS combinations. +maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'` +case $maybe_os in + nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \ + linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \ + knetbsd*-gnu* | netbsd*-gnu* | \ + kopensolaris*-gnu* | \ + storm-chaos* | os2-emx* | rtmk-nova*) + os=-$maybe_os + basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'` + ;; + *) + basic_machine=`echo $1 | sed 's/-[^-]*$//'` + if [ $basic_machine != $1 ] + then os=`echo $1 | sed 's/.*-/-/'` + else os=; fi + ;; +esac + +### Let's recognize common machines as not being operating systems so +### that things like config.sub decstation-3100 work. We also +### recognize some manufacturers as not being operating systems, so we +### can provide default operating systems below. +case $os in + -sun*os*) + # Prevent following clause from handling this invalid input. + ;; + -dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \ + -att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \ + -unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \ + -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\ + -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \ + -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \ + -apple | -axis | -knuth | -cray | -microblaze) + os= + basic_machine=$1 + ;; + -bluegene*) + os=-cnk + ;; + -sim | -cisco | -oki | -wec | -winbond) + os= + basic_machine=$1 + ;; + -scout) + ;; + -wrs) + os=-vxworks + basic_machine=$1 + ;; + -chorusos*) + os=-chorusos + basic_machine=$1 + ;; + -chorusrdb) + os=-chorusrdb + basic_machine=$1 + ;; + -hiux*) + os=-hiuxwe2 + ;; + -sco6) + os=-sco5v6 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco5) + os=-sco3.2v5 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco4) + os=-sco3.2v4 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco3.2.[4-9]*) + os=`echo $os | sed -e 's/sco3.2./sco3.2v/'` + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco3.2v[4-9]*) + # Don't forget version if it is 3.2v4 or newer. + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco5v6*) + # Don't forget version if it is 3.2v4 or newer. + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco*) + os=-sco3.2v2 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -udk*) + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -isc) + os=-isc2.2 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -clix*) + basic_machine=clipper-intergraph + ;; + -isc*) + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -lynx*) + os=-lynxos + ;; + -ptx*) + basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'` + ;; + -windowsnt*) + os=`echo $os | sed -e 's/windowsnt/winnt/'` + ;; + -psos*) + os=-psos + ;; + -mint | -mint[0-9]*) + basic_machine=m68k-atari + os=-mint + ;; +esac + +# Decode aliases for certain CPU-COMPANY combinations. +case $basic_machine in + # Recognize the basic CPU types without company name. + # Some are omitted here because they have special meanings below. + 1750a | 580 \ + | a29k \ + | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \ + | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \ + | am33_2.0 \ + | arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \ + | bfin \ + | c4x | clipper \ + | d10v | d30v | dlx | dsp16xx \ + | fido | fr30 | frv \ + | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \ + | i370 | i860 | i960 | ia64 \ + | ip2k | iq2000 \ + | lm32 \ + | m32c | m32r | m32rle | m68000 | m68k | m88k \ + | maxq | mb | microblaze | mcore | mep | metag \ + | mips | mipsbe | mipseb | mipsel | mipsle \ + | mips16 \ + | mips64 | mips64el \ + | mips64octeon | mips64octeonel \ + | mips64orion | mips64orionel \ + | mips64r5900 | mips64r5900el \ + | mips64vr | mips64vrel \ + | mips64vr4100 | mips64vr4100el \ + | mips64vr4300 | mips64vr4300el \ + | mips64vr5000 | mips64vr5000el \ + | mips64vr5900 | mips64vr5900el \ + | mipsisa32 | mipsisa32el \ + | mipsisa32r2 | mipsisa32r2el \ + | mipsisa64 | mipsisa64el \ + | mipsisa64r2 | mipsisa64r2el \ + | mipsisa64sb1 | mipsisa64sb1el \ + | mipsisa64sr71k | mipsisa64sr71kel \ + | mipstx39 | mipstx39el \ + | mn10200 | mn10300 \ + | moxie \ + | mt \ + | msp430 \ + | nds32 | nds32le | nds32be \ + | nios | nios2 \ + | ns16k | ns32k \ + | or32 \ + | pdp10 | pdp11 | pj | pjl \ + | powerpc | powerpc64 | powerpc64le | powerpcle | ppcbe \ + | pyramid \ + | rx \ + | score \ + | sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \ + | sh64 | sh64le \ + | sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \ + | sparcv8 | sparcv9 | sparcv9b | sparcv9v \ + | spu | strongarm \ + | tahoe | thumb | tic4x | tic54x | tic55x | tic6x | tic80 | tron \ + | ubicom32 \ + | v850 | v850e \ + | we32k \ + | x86 | xc16x | xscale | xscalee[bl] | xstormy16 | xtensa \ + | z8k | z80) + basic_machine=$basic_machine-unknown + ;; + c54x) + basic_machine=tic54x-unknown + ;; + c55x) + basic_machine=tic55x-unknown + ;; + c6x) + basic_machine=tic6x-unknown + ;; + m6811 | m68hc11 | m6812 | m68hc12 | picochip) + # Motorola 68HC11/12. + basic_machine=$basic_machine-unknown + os=-none + ;; + m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k) + ;; + ms1) + basic_machine=mt-unknown + ;; + + # We use `pc' rather than `unknown' + # because (1) that's what they normally are, and + # (2) the word "unknown" tends to confuse beginning users. + i*86 | x86_64) + basic_machine=$basic_machine-pc + ;; + # Object if more than one company name word. + *-*-*) + echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 + exit 1 + ;; + # Recognize the basic CPU types with company name. + 580-* \ + | a29k-* \ + | alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \ + | alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \ + | alphapca5[67]-* | alpha64pca5[67]-* | arc-* \ + | arm-* | armbe-* | armle-* | armeb-* | armv*-* \ + | avr-* | avr32-* \ + | bfin-* | bs2000-* \ + | c[123]* | c30-* | [cjt]90-* | c4x-* \ + | clipper-* | craynv-* | cydra-* \ + | d10v-* | d30v-* | dlx-* \ + | elxsi-* \ + | f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \ + | h8300-* | h8500-* \ + | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \ + | i*86-* | i860-* | i960-* | ia64-* \ + | ip2k-* | iq2000-* \ + | lm32-* \ + | m32c-* | m32r-* | m32rle-* \ + | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \ + | m88110-* | m88k-* | maxq-* | mcore-* | metag-* | microblaze-* \ + | mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \ + | mips16-* \ + | mips64-* | mips64el-* \ + | mips64octeon-* | mips64octeonel-* \ + | mips64orion-* | mips64orionel-* \ + | mips64r5900-* | mips64r5900el-* \ + | mips64vr-* | mips64vrel-* \ + | mips64vr4100-* | mips64vr4100el-* \ + | mips64vr4300-* | mips64vr4300el-* \ + | mips64vr5000-* | mips64vr5000el-* \ + | mips64vr5900-* | mips64vr5900el-* \ + | mipsisa32-* | mipsisa32el-* \ + | mipsisa32r2-* | mipsisa32r2el-* \ + | mipsisa64-* | mipsisa64el-* \ + | mipsisa64r2-* | mipsisa64r2el-* \ + | mipsisa64sb1-* | mipsisa64sb1el-* \ + | mipsisa64sr71k-* | mipsisa64sr71kel-* \ + | mipstx39-* | mipstx39el-* \ + | mmix-* \ + | mt-* \ + | msp430-* \ + | nds32-* | nds32le-* | nds32be-* \ + | nios-* | nios2-* \ + | none-* | np1-* | ns16k-* | ns32k-* \ + | orion-* \ + | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \ + | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* | ppcbe-* \ + | pyramid-* \ + | romp-* | rs6000-* | rx-* \ + | sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \ + | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \ + | sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \ + | sparclite-* \ + | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | strongarm-* | sv1-* | sx?-* \ + | tahoe-* | thumb-* \ + | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \ + | tile-* | tilegx-* \ + | tron-* \ + | ubicom32-* \ + | v850-* | v850e-* | vax-* \ + | we32k-* \ + | x86-* | x86_64-* | xc16x-* | xps100-* | xscale-* | xscalee[bl]-* \ + | xstormy16-* | xtensa*-* \ + | ymp-* \ + | z8k-* | z80-*) + ;; + # Recognize the basic CPU types without company name, with glob match. + xtensa*) + basic_machine=$basic_machine-unknown + ;; + # Recognize the various machine names and aliases which stand + # for a CPU type and a company and sometimes even an OS. + 386bsd) + basic_machine=i386-unknown + os=-bsd + ;; + 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc) + basic_machine=m68000-att + ;; + 3b*) + basic_machine=we32k-att + ;; + a29khif) + basic_machine=a29k-amd + os=-udi + ;; + abacus) + basic_machine=abacus-unknown + ;; + adobe68k) + basic_machine=m68010-adobe + os=-scout + ;; + alliant | fx80) + basic_machine=fx80-alliant + ;; + altos | altos3068) + basic_machine=m68k-altos + ;; + am29k) + basic_machine=a29k-none + os=-bsd + ;; + amd64) + basic_machine=x86_64-pc + ;; + amd64-*) + basic_machine=x86_64-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + amdahl) + basic_machine=580-amdahl + os=-sysv + ;; + amiga | amiga-*) + basic_machine=m68k-unknown + ;; + amigaos | amigados) + basic_machine=m68k-unknown + os=-amigaos + ;; + amigaunix | amix) + basic_machine=m68k-unknown + os=-sysv4 + ;; + apollo68) + basic_machine=m68k-apollo + os=-sysv + ;; + apollo68bsd) + basic_machine=m68k-apollo + os=-bsd + ;; + aros) + basic_machine=i386-pc + os=-aros + ;; + aux) + basic_machine=m68k-apple + os=-aux + ;; + balance) + basic_machine=ns32k-sequent + os=-dynix + ;; + blackfin) + basic_machine=bfin-unknown + os=-linux + ;; + blackfin-*) + basic_machine=bfin-`echo $basic_machine | sed 's/^[^-]*-//'` + os=-linux + ;; + bluegene*) + basic_machine=powerpc-ibm + os=-cnk + ;; + c54x-*) + basic_machine=tic54x-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + c55x-*) + basic_machine=tic55x-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + c6x-*) + basic_machine=tic6x-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + c90) + basic_machine=c90-cray + os=-unicos + ;; + cegcc) + basic_machine=arm-unknown + os=-cegcc + ;; + convex-c1) + basic_machine=c1-convex + os=-bsd + ;; + convex-c2) + basic_machine=c2-convex + os=-bsd + ;; + convex-c32) + basic_machine=c32-convex + os=-bsd + ;; + convex-c34) + basic_machine=c34-convex + os=-bsd + ;; + convex-c38) + basic_machine=c38-convex + os=-bsd + ;; + cray | j90) + basic_machine=j90-cray + os=-unicos + ;; + craynv) + basic_machine=craynv-cray + os=-unicosmp + ;; + cr16 | cr16-*) + basic_machine=cr16-unknown + os=-elf + ;; + crds | unos) + basic_machine=m68k-crds + ;; + crisv32 | crisv32-* | etraxfs*) + basic_machine=crisv32-axis + ;; + cris | cris-* | etrax*) + basic_machine=cris-axis + ;; + crx) + basic_machine=crx-unknown + os=-elf + ;; + da30 | da30-*) + basic_machine=m68k-da30 + ;; + decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn) + basic_machine=mips-dec + ;; + decsystem10* | dec10*) + basic_machine=pdp10-dec + os=-tops10 + ;; + decsystem20* | dec20*) + basic_machine=pdp10-dec + os=-tops20 + ;; + delta | 3300 | motorola-3300 | motorola-delta \ + | 3300-motorola | delta-motorola) + basic_machine=m68k-motorola + ;; + delta88) + basic_machine=m88k-motorola + os=-sysv3 + ;; + dicos) + basic_machine=i686-pc + os=-dicos + ;; + djgpp) + basic_machine=i586-pc + os=-msdosdjgpp + ;; + dpx20 | dpx20-*) + basic_machine=rs6000-bull + os=-bosx + ;; + dpx2* | dpx2*-bull) + basic_machine=m68k-bull + os=-sysv3 + ;; + ebmon29k) + basic_machine=a29k-amd + os=-ebmon + ;; + elxsi) + basic_machine=elxsi-elxsi + os=-bsd + ;; + encore | umax | mmax) + basic_machine=ns32k-encore + ;; + es1800 | OSE68k | ose68k | ose | OSE) + basic_machine=m68k-ericsson + os=-ose + ;; + fx2800) + basic_machine=i860-alliant + ;; + genix) + basic_machine=ns32k-ns + ;; + gmicro) + basic_machine=tron-gmicro + os=-sysv + ;; + go32) + basic_machine=i386-pc + os=-go32 + ;; + h3050r* | hiux*) + basic_machine=hppa1.1-hitachi + os=-hiuxwe2 + ;; + h8300hms) + basic_machine=h8300-hitachi + os=-hms + ;; + h8300xray) + basic_machine=h8300-hitachi + os=-xray + ;; + h8500hms) + basic_machine=h8500-hitachi + os=-hms + ;; + harris) + basic_machine=m88k-harris + os=-sysv3 + ;; + hp300-*) + basic_machine=m68k-hp + ;; + hp300bsd) + basic_machine=m68k-hp + os=-bsd + ;; + hp300hpux) + basic_machine=m68k-hp + os=-hpux + ;; + hp3k9[0-9][0-9] | hp9[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hp9k2[0-9][0-9] | hp9k31[0-9]) + basic_machine=m68000-hp + ;; + hp9k3[2-9][0-9]) + basic_machine=m68k-hp + ;; + hp9k6[0-9][0-9] | hp6[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hp9k7[0-79][0-9] | hp7[0-79][0-9]) + basic_machine=hppa1.1-hp + ;; + hp9k78[0-9] | hp78[0-9]) + # FIXME: really hppa2.0-hp + basic_machine=hppa1.1-hp + ;; + hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893) + # FIXME: really hppa2.0-hp + basic_machine=hppa1.1-hp + ;; + hp9k8[0-9][13679] | hp8[0-9][13679]) + basic_machine=hppa1.1-hp + ;; + hp9k8[0-9][0-9] | hp8[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hppa-next) + os=-nextstep3 + ;; + hppaosf) + basic_machine=hppa1.1-hp + os=-osf + ;; + hppro) + basic_machine=hppa1.1-hp + os=-proelf + ;; + i370-ibm* | ibm*) + basic_machine=i370-ibm + ;; +# I'm not sure what "Sysv32" means. Should this be sysv3.2? + i*86v32) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-sysv32 + ;; + i*86v4*) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-sysv4 + ;; + i*86v) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-sysv + ;; + i*86sol2) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-solaris2 + ;; + i386mach) + basic_machine=i386-mach + os=-mach + ;; + i386-vsta | vsta) + basic_machine=i386-unknown + os=-vsta + ;; + iris | iris4d) + basic_machine=mips-sgi + case $os in + -irix*) + ;; + *) + os=-irix4 + ;; + esac + ;; + isi68 | isi) + basic_machine=m68k-isi + os=-sysv + ;; + m68knommu) + basic_machine=m68k-unknown + os=-linux + ;; + m68knommu-*) + basic_machine=m68k-`echo $basic_machine | sed 's/^[^-]*-//'` + os=-linux + ;; + m88k-omron*) + basic_machine=m88k-omron + ;; + magnum | m3230) + basic_machine=mips-mips + os=-sysv + ;; + merlin) + basic_machine=ns32k-utek + os=-sysv + ;; + microblaze) + basic_machine=microblaze-xilinx + ;; + mingw32) + basic_machine=i386-pc + os=-mingw32 + ;; + mingw32ce) + basic_machine=arm-unknown + os=-mingw32ce + ;; + miniframe) + basic_machine=m68000-convergent + ;; + *mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*) + basic_machine=m68k-atari + os=-mint + ;; + mips3*-*) + basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'` + ;; + mips3*) + basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown + ;; + monitor) + basic_machine=m68k-rom68k + os=-coff + ;; + morphos) + basic_machine=powerpc-unknown + os=-morphos + ;; + msdos) + basic_machine=i386-pc + os=-msdos + ;; + ms1-*) + basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'` + ;; + mvs) + basic_machine=i370-ibm + os=-mvs + ;; + ncr3000) + basic_machine=i486-ncr + os=-sysv4 + ;; + netbsd386) + basic_machine=i386-unknown + os=-netbsd + ;; + netwinder) + basic_machine=armv4l-rebel + os=-linux + ;; + news | news700 | news800 | news900) + basic_machine=m68k-sony + os=-newsos + ;; + news1000) + basic_machine=m68030-sony + os=-newsos + ;; + news-3600 | risc-news) + basic_machine=mips-sony + os=-newsos + ;; + necv70) + basic_machine=v70-nec + os=-sysv + ;; + next | m*-next ) + basic_machine=m68k-next + case $os in + -nextstep* ) + ;; + -ns2*) + os=-nextstep2 + ;; + *) + os=-nextstep3 + ;; + esac + ;; + nh3000) + basic_machine=m68k-harris + os=-cxux + ;; + nh[45]000) + basic_machine=m88k-harris + os=-cxux + ;; + nindy960) + basic_machine=i960-intel + os=-nindy + ;; + mon960) + basic_machine=i960-intel + os=-mon960 + ;; + nonstopux) + basic_machine=mips-compaq + os=-nonstopux + ;; + np1) + basic_machine=np1-gould + ;; + neo-tandem) + basic_machine=neo-tandem + ;; + nse-tandem) + basic_machine=nse-tandem + ;; + nsr-tandem) + basic_machine=nsr-tandem + ;; + op50n-* | op60c-*) + basic_machine=hppa1.1-oki + os=-proelf + ;; + openrisc | openrisc-*) + basic_machine=or32-unknown + ;; + os400) + basic_machine=powerpc-ibm + os=-os400 + ;; + OSE68000 | ose68000) + basic_machine=m68000-ericsson + os=-ose + ;; + os68k) + basic_machine=m68k-none + os=-os68k + ;; + pa-hitachi) + basic_machine=hppa1.1-hitachi + os=-hiuxwe2 + ;; + paragon) + basic_machine=i860-intel + os=-osf + ;; + parisc) + basic_machine=hppa-unknown + os=-linux + ;; + parisc-*) + basic_machine=hppa-`echo $basic_machine | sed 's/^[^-]*-//'` + os=-linux + ;; + pbd) + basic_machine=sparc-tti + ;; + pbb) + basic_machine=m68k-tti + ;; + pc532 | pc532-*) + basic_machine=ns32k-pc532 + ;; + pc98) + basic_machine=i386-pc + ;; + pc98-*) + basic_machine=i386-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pentium | p5 | k5 | k6 | nexgen | viac3) + basic_machine=i586-pc + ;; + pentiumpro | p6 | 6x86 | athlon | athlon_*) + basic_machine=i686-pc + ;; + pentiumii | pentium2 | pentiumiii | pentium3) + basic_machine=i686-pc + ;; + pentium4) + basic_machine=i786-pc + ;; + pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*) + basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pentiumpro-* | p6-* | 6x86-* | athlon-*) + basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*) + basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pentium4-*) + basic_machine=i786-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pn) + basic_machine=pn-gould + ;; + power) basic_machine=power-ibm + ;; + ppc) basic_machine=powerpc-unknown + ;; + ppc-*) basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ppcle | powerpclittle | ppc-le | powerpc-little) + basic_machine=powerpcle-unknown + ;; + ppcle-* | powerpclittle-*) + basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ppc64) basic_machine=powerpc64-unknown + ;; + ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ppc64le | powerpc64little | ppc64-le | powerpc64-little) + basic_machine=powerpc64le-unknown + ;; + ppc64le-* | powerpc64little-*) + basic_machine=powerpc64le-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ps2) + basic_machine=i386-ibm + ;; + pw32) + basic_machine=i586-unknown + os=-pw32 + ;; + rdos) + basic_machine=i386-pc + os=-rdos + ;; + rom68k) + basic_machine=m68k-rom68k + os=-coff + ;; + rm[46]00) + basic_machine=mips-siemens + ;; + rtpc | rtpc-*) + basic_machine=romp-ibm + ;; + s390 | s390-*) + basic_machine=s390-ibm + ;; + s390x | s390x-*) + basic_machine=s390x-ibm + ;; + sa29200) + basic_machine=a29k-amd + os=-udi + ;; + sb1) + basic_machine=mipsisa64sb1-unknown + ;; + sb1el) + basic_machine=mipsisa64sb1el-unknown + ;; + sde) + basic_machine=mipsisa32-sde + os=-elf + ;; + sei) + basic_machine=mips-sei + os=-seiux + ;; + sequent) + basic_machine=i386-sequent + ;; + sh) + basic_machine=sh-hitachi + os=-hms + ;; + sh5el) + basic_machine=sh5le-unknown + ;; + sh64) + basic_machine=sh64-unknown + ;; + sparclite-wrs | simso-wrs) + basic_machine=sparclite-wrs + os=-vxworks + ;; + sps7) + basic_machine=m68k-bull + os=-sysv2 + ;; + spur) + basic_machine=spur-unknown + ;; + st2000) + basic_machine=m68k-tandem + ;; + stratus) + basic_machine=i860-stratus + os=-sysv4 + ;; + sun2) + basic_machine=m68000-sun + ;; + sun2os3) + basic_machine=m68000-sun + os=-sunos3 + ;; + sun2os4) + basic_machine=m68000-sun + os=-sunos4 + ;; + sun3os3) + basic_machine=m68k-sun + os=-sunos3 + ;; + sun3os4) + basic_machine=m68k-sun + os=-sunos4 + ;; + sun4os3) + basic_machine=sparc-sun + os=-sunos3 + ;; + sun4os4) + basic_machine=sparc-sun + os=-sunos4 + ;; + sun4sol2) + basic_machine=sparc-sun + os=-solaris2 + ;; + sun3 | sun3-*) + basic_machine=m68k-sun + ;; + sun4) + basic_machine=sparc-sun + ;; + sun386 | sun386i | roadrunner) + basic_machine=i386-sun + ;; + sv1) + basic_machine=sv1-cray + os=-unicos + ;; + symmetry) + basic_machine=i386-sequent + os=-dynix + ;; + t3e) + basic_machine=alphaev5-cray + os=-unicos + ;; + t90) + basic_machine=t90-cray + os=-unicos + ;; + # This must be matched before tile*. + tilegx*) + basic_machine=tilegx-unknown + os=-linux-gnu + ;; + tile*) + basic_machine=tile-unknown + os=-linux-gnu + ;; + tx39) + basic_machine=mipstx39-unknown + ;; + tx39el) + basic_machine=mipstx39el-unknown + ;; + toad1) + basic_machine=pdp10-xkl + os=-tops20 + ;; + tower | tower-32) + basic_machine=m68k-ncr + ;; + tpf) + basic_machine=s390x-ibm + os=-tpf + ;; + udi29k) + basic_machine=a29k-amd + os=-udi + ;; + ultra3) + basic_machine=a29k-nyu + os=-sym1 + ;; + v810 | necv810) + basic_machine=v810-nec + os=-none + ;; + vaxv) + basic_machine=vax-dec + os=-sysv + ;; + vms) + basic_machine=vax-dec + os=-vms + ;; + vpp*|vx|vx-*) + basic_machine=f301-fujitsu + ;; + vxworks960) + basic_machine=i960-wrs + os=-vxworks + ;; + vxworks68) + basic_machine=m68k-wrs + os=-vxworks + ;; + vxworks29k) + basic_machine=a29k-wrs + os=-vxworks + ;; + w65*) + basic_machine=w65-wdc + os=-none + ;; + w89k-*) + basic_machine=hppa1.1-winbond + os=-proelf + ;; + xbox) + basic_machine=i686-pc + os=-mingw32 + ;; + xps | xps100) + basic_machine=xps100-honeywell + ;; + ymp) + basic_machine=ymp-cray + os=-unicos + ;; + z8k-*-coff) + basic_machine=z8k-unknown + os=-sim + ;; + z80-*-coff) + basic_machine=z80-unknown + os=-sim + ;; + none) + basic_machine=none-none + os=-none + ;; + +# Here we handle the default manufacturer of certain CPU types. It is in +# some cases the only manufacturer, in others, it is the most popular. + w89k) + basic_machine=hppa1.1-winbond + ;; + op50n) + basic_machine=hppa1.1-oki + ;; + op60c) + basic_machine=hppa1.1-oki + ;; + romp) + basic_machine=romp-ibm + ;; + mmix) + basic_machine=mmix-knuth + ;; + rs6000) + basic_machine=rs6000-ibm + ;; + vax) + basic_machine=vax-dec + ;; + pdp10) + # there are many clones, so DEC is not a safe bet + basic_machine=pdp10-unknown + ;; + pdp11) + basic_machine=pdp11-dec + ;; + we32k) + basic_machine=we32k-att + ;; + sh[1234] | sh[24]a | sh[24]aeb | sh[34]eb | sh[1234]le | sh[23]ele) + basic_machine=sh-unknown + ;; + sparc | sparcv8 | sparcv9 | sparcv9b | sparcv9v) + basic_machine=sparc-sun + ;; + cydra) + basic_machine=cydra-cydrome + ;; + orion) + basic_machine=orion-highlevel + ;; + orion105) + basic_machine=clipper-highlevel + ;; + mac | mpw | mac-mpw) + basic_machine=m68k-apple + ;; + pmac | pmac-mpw) + basic_machine=powerpc-apple + ;; + *-unknown) + # Make sure to match an already-canonicalized machine name. + ;; + *) + echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 + exit 1 + ;; +esac + +# Here we canonicalize certain aliases for manufacturers. +case $basic_machine in + *-digital*) + basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'` + ;; + *-commodore*) + basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'` + ;; + *) + ;; +esac + +# Decode manufacturer-specific aliases for certain operating systems. + +if [ x"$os" != x"" ] +then +case $os in + # First match some system type aliases + # that might get confused with valid system types. + # -solaris* is a basic system type, with this one exception. + -auroraux) + os=-auroraux + ;; + -solaris1 | -solaris1.*) + os=`echo $os | sed -e 's|solaris1|sunos4|'` + ;; + -solaris) + os=-solaris2 + ;; + -svr4*) + os=-sysv4 + ;; + -unixware*) + os=-sysv4.2uw + ;; + -gnu/linux*) + os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'` + ;; + # First accept the basic system types. + # The portable systems comes first. + # Each alternative MUST END IN A *, to match a version number. + # -sysv* is not here because it comes later, after sysvr4. + -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \ + | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\ + | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \ + | -sym* | -kopensolaris* \ + | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \ + | -aos* | -aros* \ + | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \ + | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \ + | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \ + | -openbsd* | -solidbsd* \ + | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \ + | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \ + | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \ + | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \ + | -chorusos* | -chorusrdb* | -cegcc* \ + | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ + | -mingw32* | -linux-gnu* | -linux-android* \ + | -linux-newlib* | -linux-uclibc* \ + | -uxpv* | -beos* | -mpeix* | -udk* \ + | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \ + | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \ + | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \ + | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \ + | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \ + | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \ + | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es*) + # Remember, each alternative MUST END IN *, to match a version number. + ;; + -qnx*) + case $basic_machine in + x86-* | i*86-*) + ;; + *) + os=-nto$os + ;; + esac + ;; + -nto-qnx*) + ;; + -nto*) + os=`echo $os | sed -e 's|nto|nto-qnx|'` + ;; + -sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \ + | -windows* | -osx | -abug | -netware* | -os9* | -beos* | -haiku* \ + | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*) + ;; + -mac*) + os=`echo $os | sed -e 's|mac|macos|'` + ;; + -linux-dietlibc) + os=-linux-dietlibc + ;; + -linux*) + os=`echo $os | sed -e 's|linux|linux-gnu|'` + ;; + -sunos5*) + os=`echo $os | sed -e 's|sunos5|solaris2|'` + ;; + -sunos6*) + os=`echo $os | sed -e 's|sunos6|solaris3|'` + ;; + -opened*) + os=-openedition + ;; + -os400*) + os=-os400 + ;; + -wince*) + os=-wince + ;; + -osfrose*) + os=-osfrose + ;; + -osf*) + os=-osf + ;; + -utek*) + os=-bsd + ;; + -dynix*) + os=-bsd + ;; + -acis*) + os=-aos + ;; + -atheos*) + os=-atheos + ;; + -syllable*) + os=-syllable + ;; + -386bsd) + os=-bsd + ;; + -ctix* | -uts*) + os=-sysv + ;; + -nova*) + os=-rtmk-nova + ;; + -ns2 ) + os=-nextstep2 + ;; + -nsk*) + os=-nsk + ;; + # Preserve the version number of sinix5. + -sinix5.*) + os=`echo $os | sed -e 's|sinix|sysv|'` + ;; + -sinix*) + os=-sysv4 + ;; + -tpf*) + os=-tpf + ;; + -triton*) + os=-sysv3 + ;; + -oss*) + os=-sysv3 + ;; + -svr4) + os=-sysv4 + ;; + -svr3) + os=-sysv3 + ;; + -sysvr4) + os=-sysv4 + ;; + # This must come after -sysvr4. + -sysv*) + ;; + -ose*) + os=-ose + ;; + -es1800*) + os=-ose + ;; + -xenix) + os=-xenix + ;; + -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) + os=-mint + ;; + -aros*) + os=-aros + ;; + -kaos*) + os=-kaos + ;; + -zvmoe) + os=-zvmoe + ;; + -dicos*) + os=-dicos + ;; + -nacl*) + ;; + -none) + ;; + *) + # Get rid of the `-' at the beginning of $os. + os=`echo $os | sed 's/[^-]*-//'` + echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2 + exit 1 + ;; +esac +else + +# Here we handle the default operating systems that come with various machines. +# The value should be what the vendor currently ships out the door with their +# machine or put another way, the most popular os provided with the machine. + +# Note that if you're going to try to match "-MANUFACTURER" here (say, +# "-sun"), then you have to tell the case statement up towards the top +# that MANUFACTURER isn't an operating system. Otherwise, code above +# will signal an error saying that MANUFACTURER isn't an operating +# system, and we'll never get to this point. + +case $basic_machine in + score-*) + os=-elf + ;; + spu-*) + os=-elf + ;; + *-acorn) + os=-riscix1.2 + ;; + arm*-rebel) + os=-linux + ;; + arm*-semi) + os=-aout + ;; + c4x-* | tic4x-*) + os=-coff + ;; + tic54x-*) + os=-coff + ;; + tic55x-*) + os=-coff + ;; + tic6x-*) + os=-coff + ;; + # This must come before the *-dec entry. + pdp10-*) + os=-tops20 + ;; + pdp11-*) + os=-none + ;; + *-dec | vax-*) + os=-ultrix4.2 + ;; + m68*-apollo) + os=-domain + ;; + i386-sun) + os=-sunos4.0.2 + ;; + m68000-sun) + os=-sunos3 + # This also exists in the configure program, but was not the + # default. + # os=-sunos4 + ;; + m68*-cisco) + os=-aout + ;; + mep-*) + os=-elf + ;; + mips*-cisco) + os=-elf + ;; + mips*-*) + os=-elf + ;; + or32-*) + os=-coff + ;; + *-tti) # must be before sparc entry or we get the wrong os. + os=-sysv3 + ;; + sparc-* | *-sun) + os=-sunos4.1.1 + ;; + *-be) + os=-beos + ;; + *-haiku) + os=-haiku + ;; + *-ibm) + os=-aix + ;; + *-knuth) + os=-mmixware + ;; + *-wec) + os=-proelf + ;; + *-winbond) + os=-proelf + ;; + *-oki) + os=-proelf + ;; + *-hp) + os=-hpux + ;; + *-hitachi) + os=-hiux + ;; + i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent) + os=-sysv + ;; + *-cbm) + os=-amigaos + ;; + *-dg) + os=-dgux + ;; + *-dolphin) + os=-sysv3 + ;; + m68k-ccur) + os=-rtu + ;; + m88k-omron*) + os=-luna + ;; + *-next ) + os=-nextstep + ;; + *-sequent) + os=-ptx + ;; + *-crds) + os=-unos + ;; + *-ns) + os=-genix + ;; + i370-*) + os=-mvs + ;; + *-next) + os=-nextstep3 + ;; + *-gould) + os=-sysv + ;; + *-highlevel) + os=-bsd + ;; + *-encore) + os=-bsd + ;; + *-sgi) + os=-irix + ;; + *-siemens) + os=-sysv4 + ;; + *-masscomp) + os=-rtu + ;; + f30[01]-fujitsu | f700-fujitsu) + os=-uxpv + ;; + *-rom68k) + os=-coff + ;; + *-*bug) + os=-coff + ;; + *-apple) + os=-macos + ;; + *-atari*) + os=-mint + ;; + *) + os=-none + ;; +esac +fi + +# Here we handle the case where we know the os, and the CPU type, but not the +# manufacturer. We pick the logical manufacturer. +vendor=unknown +case $basic_machine in + *-unknown) + case $os in + -riscix*) + vendor=acorn + ;; + -sunos*) + vendor=sun + ;; + -cnk*|-aix*) + vendor=ibm + ;; + -beos*) + vendor=be + ;; + -hpux*) + vendor=hp + ;; + -mpeix*) + vendor=hp + ;; + -hiux*) + vendor=hitachi + ;; + -unos*) + vendor=crds + ;; + -dgux*) + vendor=dg + ;; + -luna*) + vendor=omron + ;; + -genix*) + vendor=ns + ;; + -mvs* | -opened*) + vendor=ibm + ;; + -os400*) + vendor=ibm + ;; + -ptx*) + vendor=sequent + ;; + -tpf*) + vendor=ibm + ;; + -vxsim* | -vxworks* | -windiss*) + vendor=wrs + ;; + -aux*) + vendor=apple + ;; + -hms*) + vendor=hitachi + ;; + -mpw* | -macos*) + vendor=apple + ;; + -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) + vendor=atari + ;; + -vos*) + vendor=stratus + ;; + esac + basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"` + ;; +esac + +echo $basic_machine$os +exit + +# Local variables: +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "timestamp='" +# time-stamp-format: "%:y-%02m-%02d" +# time-stamp-end: "'" +# End: diff --git a/smlnj-lib/Doc/config/install-sh b/smlnj-lib/Doc/config/install-sh new file mode 100644 index 0000000..36f96f3 --- /dev/null +++ b/smlnj-lib/Doc/config/install-sh @@ -0,0 +1,276 @@ +#!/bin/sh +# +# install - install a program, script, or datafile +# This comes from X11R5 (mit/util/scripts/install.sh). +# +# Copyright 1991 by the Massachusetts Institute of Technology +# +# Permission to use, copy, modify, distribute, and sell this software and its +# documentation for any purpose is hereby granted without fee, provided that +# the above copyright notice appear in all copies and that both that +# copyright notice and this permission notice appear in supporting +# documentation, and that the name of M.I.T. not be used in advertising or +# publicity pertaining to distribution of the software without specific, +# written prior permission. M.I.T. makes no representations about the +# suitability of this software for any purpose. It is provided "as is" +# without express or implied warranty. +# +# Calling this script install-sh is preferred over install.sh, to prevent +# `make' implicit rules from creating a file called install from it +# when there is no Makefile. +# +# This script is compatible with the BSD install script, but was written +# from scratch. It can only install one file at a time, a restriction +# shared with many OS's install programs. + + +# set DOITPROG to echo to test this script + +# Don't use :- since 4.3BSD and earlier shells don't like it. +doit="${DOITPROG-}" + + +# put in absolute paths if you don't have them in your path; or use env. vars. + +mvprog="${MVPROG-mv}" +cpprog="${CPPROG-cp}" +chmodprog="${CHMODPROG-chmod}" +chownprog="${CHOWNPROG-chown}" +chgrpprog="${CHGRPPROG-chgrp}" +stripprog="${STRIPPROG-strip}" +rmprog="${RMPROG-rm}" +mkdirprog="${MKDIRPROG-mkdir}" + +transformbasename="" +transform_arg="" +instcmd="$mvprog" +chmodcmd="$chmodprog 0755" +chowncmd="" +chgrpcmd="" +stripcmd="" +rmcmd="$rmprog -f" +mvcmd="$mvprog" +src="" +dst="" +dir_arg="" + +while [ x"$1" != x ]; do + case $1 in + -c) instcmd=$cpprog + shift + continue;; + + -d) dir_arg=true + shift + continue;; + + -m) chmodcmd="$chmodprog $2" + shift + shift + continue;; + + -o) chowncmd="$chownprog $2" + shift + shift + continue;; + + -g) chgrpcmd="$chgrpprog $2" + shift + shift + continue;; + + -s) stripcmd=$stripprog + shift + continue;; + + -t=*) transformarg=`echo $1 | sed 's/-t=//'` + shift + continue;; + + -b=*) transformbasename=`echo $1 | sed 's/-b=//'` + shift + continue;; + + *) if [ x"$src" = x ] + then + src=$1 + else + # this colon is to work around a 386BSD /bin/sh bug + : + dst=$1 + fi + shift + continue;; + esac +done + +if [ x"$src" = x ] +then + echo "$0: no input file specified" >&2 + exit 1 +else + : +fi + +if [ x"$dir_arg" != x ]; then + dst=$src + src="" + + if [ -d "$dst" ]; then + instcmd=: + chmodcmd="" + else + instcmd=$mkdirprog + fi +else + +# Waiting for this to be detected by the "$instcmd $src $dsttmp" command +# might cause directories to be created, which would be especially bad +# if $src (and thus $dsttmp) contains '*'. + + if [ -f "$src" ] || [ -d "$src" ] + then + : + else + echo "$0: $src does not exist" >&2 + exit 1 + fi + + if [ x"$dst" = x ] + then + echo "$0: no destination specified" >&2 + exit 1 + else + : + fi + +# If destination is a directory, append the input filename; if your system +# does not like double slashes in filenames, you may need to add some logic + + if [ -d "$dst" ] + then + dst=$dst/`basename "$src"` + else + : + fi +fi + +## this sed command emulates the dirname command +dstdir=`echo "$dst" | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'` + +# Make sure that the destination directory exists. +# this part is taken from Noah Friedman's mkinstalldirs script + +# Skip lots of stat calls in the usual case. +if [ ! -d "$dstdir" ]; then +defaultIFS=' + ' +IFS="${IFS-$defaultIFS}" + +oIFS=$IFS +# Some sh's can't handle IFS=/ for some reason. +IFS='%' +set - `echo "$dstdir" | sed -e 's@/@%@g' -e 's@^%@/@'` +IFS=$oIFS + +pathcomp='' + +while [ $# -ne 0 ] ; do + pathcomp=$pathcomp$1 + shift + + if [ ! -d "$pathcomp" ] ; + then + $mkdirprog "$pathcomp" + else + : + fi + + pathcomp=$pathcomp/ +done +fi + +if [ x"$dir_arg" != x ] +then + $doit $instcmd "$dst" && + + if [ x"$chowncmd" != x ]; then $doit $chowncmd "$dst"; else : ; fi && + if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd "$dst"; else : ; fi && + if [ x"$stripcmd" != x ]; then $doit $stripcmd "$dst"; else : ; fi && + if [ x"$chmodcmd" != x ]; then $doit $chmodcmd "$dst"; else : ; fi +else + +# If we're going to rename the final executable, determine the name now. + + if [ x"$transformarg" = x ] + then + dstfile=`basename "$dst"` + else + dstfile=`basename "$dst" $transformbasename | + sed $transformarg`$transformbasename + fi + +# don't allow the sed command to completely eliminate the filename + + if [ x"$dstfile" = x ] + then + dstfile=`basename "$dst"` + else + : + fi + +# Make a couple of temp file names in the proper directory. + + dsttmp=$dstdir/#inst.$$# + rmtmp=$dstdir/#rm.$$# + +# Trap to clean up temp files at exit. + + trap 'status=$?; rm -f "$dsttmp" "$rmtmp" && exit $status' 0 + trap '(exit $?); exit' 1 2 13 15 + +# Move or copy the file name to the temp name + + $doit $instcmd "$src" "$dsttmp" && + +# and set any options; do chmod last to preserve setuid bits + +# If any of these fail, we abort the whole thing. If we want to +# ignore errors from any of these, just make sure not to ignore +# errors from the above "$doit $instcmd $src $dsttmp" command. + + if [ x"$chowncmd" != x ]; then $doit $chowncmd "$dsttmp"; else :;fi && + if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd "$dsttmp"; else :;fi && + if [ x"$stripcmd" != x ]; then $doit $stripcmd "$dsttmp"; else :;fi && + if [ x"$chmodcmd" != x ]; then $doit $chmodcmd "$dsttmp"; else :;fi && + +# Now remove or move aside any old file at destination location. We try this +# two ways since rm can't unlink itself on some systems and the destination +# file might be busy for other reasons. In this case, the final cleanup +# might fail but the new file should still install successfully. + +{ + if [ -f "$dstdir/$dstfile" ] + then + $doit $rmcmd -f "$dstdir/$dstfile" 2>/dev/null || + $doit $mvcmd -f "$dstdir/$dstfile" "$rmtmp" 2>/dev/null || + { + echo "$0: cannot unlink or rename $dstdir/$dstfile" >&2 + (exit 1); exit + } + else + : + fi +} && + +# Now rename the file to the real destination. + + $doit $mvcmd "$dsttmp" "$dstdir/$dstfile" + +fi && + +# The final little trick to "correctly" pass the exit status to the exit trap. + +{ + (exit 0); exit +} diff --git a/smlnj-lib/Doc/config/install-sml-wrapper_sh.in b/smlnj-lib/Doc/config/install-sml-wrapper_sh.in new file mode 100644 index 0000000..0677e4e --- /dev/null +++ b/smlnj-lib/Doc/config/install-sml-wrapper_sh.in @@ -0,0 +1,57 @@ +#!/bin/sh +# +# COPYRIGHT (c) 2019 The SML/NJ Fellowship. +# +# @configure_input@ +# +# a general-purpose script for installing executable wrappers for SML/NJ +# programs. +# +# install-sml-wrapper.sh + +INSTALL="@INSTALL@" +INSTALL_DATA="@INSTALL_DATA@" +HEAP_SUFFIX="@SMLNJ_HEAP_SUFFIX@" +SIZE_OPT="@SMLNJ_SIZE@" + +if test $# -lt 2 ; then + echo "usage: install-sml-wrapper.sh " + exit 1 +fi + +SRC=$1; shift +TARGET=`basename $SRC` +HEAP_IMAGE=$SRC.$HEAP_SUFFIX +INSTALL_DIR=$1; shift +INSTALL_HEAP_DIR=$INSTALL_DIR/.heap +INSTALL_HEAP_IMAGE=$INSTALL_HEAP_DIR/$TARGET.$HEAP_SUFFIX + +if test ! -f $HEAP_IMAGE ; then + echo "heap image $HEAP_IMAGE not found" + exit 1 +fi + +# create the wrapper script +# +cat > $TARGET < registry + +val register : registry -> { + ctl : string Controls.control, + envName : string option + } -> unit + +val registerSet : registry -> { + ctls : (string, 'a) ControlSet.control_set, + mkEnvName : string -> string option + } -> unit + +val nest : registry -> { + prefix : string option, + pri : Controls.priority, + obscurity : int, + reg : registry + } -> unit + +val control : registry -> string list -> string Controls.control option + +val init : registry -> unit + +datatype registry_tree = RTree of { + path : string list, + help : string, + ctls : { ctl : string Controls.control, info : control_info } list, + subregs : registry_tree list + } + +val controls : (registry * int option) -> registry_tree +------------ + +== Description + +`[.kw]#type# registry`:: + the type of a control registry hierarchy. + +`[.kw]#type# control_info = { envName : string option }`:: + a record of information about a control. Currently, this record type + only contains an optional environment-variable name for the control. + +`[.kw]#val# new : {help : string} \-> registry`:: + `new {help}` creates a new registry, where the `help` string + describes the registry. + +`[.kw]#val# register : registry \-> {ctl, envName} \-> unit`:: + `register {ctl, envName}` adds the control `ctl` to the registry `reg`. + The optional string `envName` specifies the name of the environment + variable that can be used to specify the value of the control. + +`[.kw]#val# registerSet : registry \-> {ctls, mkEnvName} \-> unit`:: + `registerSet {ctls, mkEnvName}` registers the controls in the + xref:str-ControlSet.adoc#type:control_set[control set] `ctls`. + The function `mkEnvName` is applied to the names of the controls + to generate the optional environment-variable names. + +`[.kw]#val# nest : registry \-> {prefix, pri, obscurity, reg} \-> unit`:: + `nest parent {prefix, pri, obscurity, reg}` adds the registry `reg` as + a child of the registry `parent`. The fields of the second argument + have the following meaning: ++ +-- + `prefix : string option`:: + The prefix (or name) that qualifies the child registry + (see the xref:#val:control[control] function). + + `pri : Controls.priority`:: + The registry's priority; used when ordering the elements in a + registry. + + `obscurity : int`:: + The _obscurity_ level of the registrion (higher means more obscure). + + `reg : registry`:: + The child registry being added to `parent`. +-- + +[[val:control]] +`[.kw]#val# control : registry \-> string list \-> string Controls.control option`:: + `control reg path` searches the registry for a control with the given `path`. + +`[.kw]#val# init : registry \-> unit`:: + `init reg` uses the host process's environment (as accessed by the + {sml-basis-url}/os-process.html#SIG:OS_PROCESS.getEnv:VAL[`OS.Process.getEnv`] + function) to initialize those controls that have associated environment-variables. + +`[.kw]#datatype# registry_tree = RTree of { ... }`:: + The `registry_tree` datatype provides a concrete representation of the + registry hierarchy. ++ +-- + `path : string list`:: + is the full path to the node in the tree. + + `help : string`:: + is the description of the node in the tree. + + `ctls : { ctl : string Controls.control, info : control_info } list`:: + is a priority-ordered list of the controls at the node + in the tree. + + `subregs : registry_tree list`:: + is a priority-ordered list of the sub-registries at the node + in the tree. +-- + +`[.kw]#val# controls : (registry * int option) \-> registry_tree`:: + `controls (reg, optLevel)` returns the `registry_tree` representation + of the registry `reg`. If `optLevel` is `SOME n`, then sub-registries + that have an obscurity level greater or equal to `n` are omitted + from the result. + +== See Also + +xref:str-Controls.adoc[`Controls`], +xref:str-ControlSet.adoc[`ControlSet`], +xref:controls-lib.adoc[__The Controls Library__] diff --git a/smlnj-lib/Doc/src/Controls/str-ControlSet.adoc b/smlnj-lib/Doc/src/Controls/str-ControlSet.adoc new file mode 100644 index 0000000..0ae7171 --- /dev/null +++ b/smlnj-lib/Doc/src/Controls/str-ControlSet.adoc @@ -0,0 +1,99 @@ += The `ControlSet` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `ControlSet` structure provides facilities for managing sets +of controls, with associated information, by name. + +== Synopsis + +[source,sml] +------------ +signature CONTROL_SET +structure ControlSet : CONTROL_SET +------------ + +== Interface + +[source,sml] +------------ +type 'a control = 'a Controls.control +type ('a, 'b) control_set + +val new : unit -> ('a, 'b) control_set + +val member : (('a, 'b) control_set * Atom.atom) -> bool +val find : (('a, 'b) control_set * Atom.atom) + -> {ctl : 'a control, info : 'b} option +val insert : (('a, 'b) control_set * 'a control * 'b) -> unit +val remove : (('a, 'b) control_set * Atom.atom) -> unit +val infoOf : ('a, 'b) control_set -> 'a control -> 'b option + +val listControls : ('a, 'b) control_set -> {ctl : 'a control, info : 'b} list +val listControls' : (('a, 'b) control_set * int) -> {ctl : 'a control, info : 'b} list + +val app : ({ctl : 'a control, info : 'b} -> unit) -> ('a, 'b) control_set -> unit + +val stringControls : 'a Controls.value_cvt -> ('a, 'b) control_set + -> (string, 'b) control_set +------------ + +== Description + +[[type:control_set]] +`[.kw]#type# ('a, 'b) control_set`:: + The abstract type of _control set_, where the first type parameter + is the value-type of the controls in the set and the second is the + type of the information associated with each control. + +`[.kw]#val# new : unit \-> ('a, 'b) control_set`:: + `new ()` creates a new, empty, set of controls. + +`[.kw]#val# member : (('a, 'b) control_set * Atom.atom) \-> bool`:: + `member (ctlSet, name)` returns true if there is a control with + the given name in the set. + +`[.kw]#val# find : (('a, 'b) control_set * Atom.atom) \-> {ctl : 'a control, info : 'b} option`:: + `find (ctsSet, name)` returns `SOME{ctl, info}` when the control `ctl`, which + has the name `name` is in the set and `info` is its associated information. + Otherwise, `NONE` is returned. + +`[.kw]#val# insert : (('a, 'b) control_set * 'a control * 'b) \-> unit`:: + `insert (ctsSet, ctl, info)` inserts the control `ctl` with associated + information `into` into the control set. + +`[.kw]#val# remove : (('a, 'b) control_set * Atom.atom) \-> unit`:: + `remove (ctlSet, name)` removes the named control from the set (if + it is present). + +`[.kw]#val# infoOf : ('a, 'b) control_set \-> 'a control \-> 'b option`:: + `infoOf ctlSet ctl` returns `SOME info`, when `ctl` is in the set + with associated information `info`. If `ctl` is not in the set, + then `NONE` is returned. + +`[.kw]#val# listControls : ('a, 'b) control_set \-> {ctl : 'a control, info : 'b} list`:: + `listControls ctlSet` returns a list of the controls in the set ordered by + priority. + +`[.kw]#val# listControls' : (('a, 'b) control_set * int) \-> {ctl : 'a control, info : 'b} list`:: + `listControls (ctlSet, level)` returns a list of the controls in the set ordered by + priority, but omits any controls with an obscurity level greater or equal to + `level`. + +`[.kw]#val# app : ({ctl : 'a control, info : 'b} \-> unit) \-> ('a, 'b) control_set \-> unit`:: + `app f ctlSet` applies the function `f` to the controls (and their associated + information). The order in which `f` is applied is unspecified. + +`[.kw]#val# stringControls : 'a Controls.value_cvt \-> ('a, 'b) control_set \-> (string, 'b) control_set`:: + `stringControls cvt ctlSet` returns a set of string controls, where the controls + in the new set are created by applying the value-converter `cvt` to the controls + in `ctlSet`. The associated information is preserved. + +== See Also + +xref:str-Controls.adoc[`Controls`], +xref:str-ControlRegistry.adoc[`ControlRegistry`], +xref:controls-lib.adoc[__The Controls Library__] diff --git a/smlnj-lib/Doc/src/Controls/str-ControlUtil.adoc b/smlnj-lib/Doc/src/Controls/str-ControlUtil.adoc new file mode 100644 index 0000000..64c47ba --- /dev/null +++ b/smlnj-lib/Doc/src/Controls/str-ControlUtil.adoc @@ -0,0 +1,74 @@ += The `ControlUtil` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `ControlUtil` structure provides some utility functions +for defining controls. + +== Synopsis + +[source,sml] +------------ +signature CONTROL_UTIL +structure ControlUtil : CONTROL_UTIL +------------ + +== Interface + +[source,sml] +------------ +structure Cvt : sig + val int : int Controls.value_cvt + val bool : bool Controls.value_cvt + val real : real Controls.value_cvt + + val stringList : string list Controls.value_cvt + + val string : string Controls.value_cvt + end + +structure EnvName : sig + val toUpper : string -> string -> string + end +------------ + +== Description + +=== `structure Cvt` + +The `ControlUtil.Cvt` structure provides some common value-conversion +functions. + +`[.kw]#val# int : int xref:str-Controls.adoc#type:value_cvt[Controls.value_cvt]`:: + A xref:str-Controls.adoc#type:value_cvt[value converter] for the `int` type. + +`[.kw]#val# bool : bool xref:str-Controls.adoc#type:value_cvt[Controls.value_cvt]`:: + A xref:str-Controls.adoc#type:value_cvt[value converter] for the `bool` type. + This converter is case-insensitive, and accepts `"yes"` for `true` and + `"no"` for `false`. + +`[.kw]#val# real : real xref:str-Controls.adoc#type:value_cvt[Controls.value_cvt]`:: + A xref:str-Controls.adoc#type:value_cvt[value converter] for the `real` type. + +`[.kw]#val# stringList : string list xref:str-Controls.adoc#type:value_cvt[Controls.value_cvt]`:: + A xref:str-Controls.adoc#type:value_cvt[value converter] for the + comma-separated lists of strings. + +`[.kw]#val# string : string xref:str-Controls.adoc#type:value_cvt[Controls.value_cvt]`:: + A xref:str-Controls.adoc#type:value_cvt[value converter] for the `string` type. + This converter is just the identity. + +=== `structure EnvName` + +`[.kw]#val# toUpper : string \-> string \-> string`:: + `toUpper prefix s` returns the string ``prefix ^ s'``, where ``s'`` is + the string `s` with lower-case letters converted to upper-case and any + occurrences of the minus character converted to the underscore character. + +== See Also + +xref:str-Controls.adoc[`Controls`], +xref:controls-lib.adoc[__The Controls Library__] diff --git a/smlnj-lib/Doc/src/Controls/str-Controls.adoc b/smlnj-lib/Doc/src/Controls/str-Controls.adoc new file mode 100644 index 0000000..86d5e7b --- /dev/null +++ b/smlnj-lib/Doc/src/Controls/str-Controls.adoc @@ -0,0 +1,265 @@ += The `Controls` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `Controls` structure defines the basic types and operations +for the *Controls Library*. + +== Synopsis + +[source,sml] +------------ +signature CONTROLS +structure Controls : CONTROLS +------------ + +== Interface + +[source,sml] +------------ +type priority = int list +type 'a control + +type 'a value_cvt = { + tyName : string, + fromString : string -> 'a option, + toString : 'a -> string + } + +val control : { + name : string, + pri : priority, + obscurity : int, + help : string, + ctl : 'a ref + } -> 'a control + +val genControl : { + name : string, + pri : priority, + obscurity : int, + help : string, + default : 'a + } -> 'a control + +exception ValueSyntax of {tyName : string, ctlName : string, value : string} + +val stringControl : 'a value_cvt -> 'a control -> string control + +val name : 'a control -> string +val get : 'a control -> 'a +val set : 'a control * 'a -> unit +val set' : 'a control * 'a -> unit -> unit +val help : 'a control -> string +val info : 'a control -> {priority : priority, obscurity : int, help : string} + +val mkOptionFlag : { + ctl : bool control, + short : string, + long : string option + } -> unit GetOpt.opt_descr + +val mkOptionReqArg : { + ctl : string control, + arg : string, + short : string, + long : string option + } -> unit GetOpt.opt_descr + +val mkOption : { + ctl : string control, + arg : string, + default : string, + short : string, + long : string option + } -> unit GetOpt.opt_descr + +val save'restore : 'a control -> unit -> unit + +val compare : ('a control * 'a control) -> order +------------ + +== Description + +`[.kw]#type# priority = int list`:: + something + +`[.kw]#type# 'a control`:: + something + +[[type:value_cvt]] +`[.kw]#type# 'a value_cvt = { ... }`:: + A value converter is used to convert between strings and another + type. The fields have the following meaning: ++ +-- + `tyName : string`:: + The name of the type being converted, + + `fromString : string \-> 'a option`:: + The function for converting from strings to the type. + + `toString : 'a \-> string'`:: + The function for converting from the type to strings. +-- + +`[.kw]#val# control : {name, pri, obscurity, help, ctl} \-> 'a control`:: + `control {name, pri, obscurity, help, ctl}` creates a new control, + where the arguments are ++ +-- + `name : string`:: + the name of the control. + + `pri : priority`:: + the control's priority. + + `obscurity : int`:: + the control's obscurity level (higher means more obscure). + + `help : string`:: + the control's description. + + `ctl : 'a ref`:: + the reference cell that holds the control's state. +-- + +`[.kw]#val# genControl : {name, pri, obscurity, help, ctl, default} \-> 'a control`:: + `genControl {name, pri, obscurity, help, default}` creates a new control, + where the arguments are ++ +-- + `name : string`:: + the name of the control. + + `pri : priority`:: + the control's priority. + + `obscurity : int`:: + the control's obscurity level (higher means more obscure). + + `help : string`:: + the control's description. + + `default : 'a`:: + the initial, or default, value of the control. +-- + +`[.kw]#exception# ValueSyntax of {tyName : string, ctlName : string, value : string}`:: + This exception is raised to communicate that there is a syntax error + in a string representation of a control value. + +[[val:stringControl]] +`[.kw]#val# stringControl : 'a value_cvt \-> 'a control \-> string control`:: + `stringControl cvt ctl` creates a string-valued interface to the control `ctl` + using the given value converter. + +`[.kw]#val# name : 'a control \-> string`:: + `name ctl` returns the name of the control `ctl`. + +`[.kw]#val# get : 'a control \-> 'a`:: + `get ctl` returns the value of the control `ctl`. + +`[.kw]#val# set : 'a control * 'a \-> unit`:: + `set (ctl, v)` sets the value of the control `ctl` to `v`. + +`[.kw]#val# set' : 'a control * 'a \-> unit \-> unit (* delayed; error checking in 1st stage *)`:: + `set (ctl, v)` returns a `unit \-> unit` function that will set + the value of the control `ctl` to `v`. This staged evaluation is useful + when the control does some error checking (_i.e._, because it is the + result of xref:#val:stringControl[`stringControl`]) on the value `v`. + In that case, the value is checked for syntactic correctness and + converted when ``set'`` is applied. + +`[.kw]#val# help : 'a control \-> string`:: + `help ctl` returns the description of the control `ctl`. + +`[.kw]#val# info : 'a control \-> {priority : priority, obscurity : int, help : string}`:: + `info ctl` returns a record `{priority, obscurity, help}`, + where the fields of the result are ++ +-- + `priority : priority`:: + the control's priority. + + `obscurity : int`:: + the control's obscurity level (higher means more obscure). + + `help : string`:: + the control's description. +-- + +`[.kw]#val# mkOptionFlag : {ctl, short, long} \-> unit xref:../Util/str-GetOpt.adoc#type:opt_descr[GetOpt.opt_descr]`:: + `mkOptionFlag {ctl, short, long}` returns a command-line-option + xref:../Util/str-GetOpt.adoc#con:NoArg[`GetOpt.NoArg`] descriptor + for a boolean control. The arguments are ++ +-- + `ctl : bool control`:: + the control that will be set by the command-line option. + + `short : string`:: + the short name for the command-line option; either zero or one chars. + + `long : string option`:: + an optional long-name for the command-line option. +-- + +`[.kw]#val# mkOptionReqArg : {ctl, arg, short, long} \-> unit xref:../Util/str-GetOpt.adoc#type:opt_descr[GetOpt.opt_descr]`:: + `mkOptionReqArg {ctl, arg, short, long}` returns a command-line-option + xref:../Util/str-GetOpt.adoc#con:ReqArg[`GetOpt.ReqArg`] descriptor + for a string control, where an argument for the command-line option + is required. The arguments to the call are ++ +-- + `ctl : string control`:: + the control that will be set by the command-line option. + + `arg : string`:: + the name for the argument, which is used in the usage message. + + `short : string`:: + the short name for the option; either zero or one chars. + + `long : string option`:: + an optional long-name for the option. +-- + +`[.kw]#val# mkOption : {ctl, arg, default, short, long} \-> unit GetOpt.opt_descr`:: + `mkOptionReqArg {ctl, arg, short, long}` returns a command-line-option + xref:../Util/str-GetOpt.adoc#con:OptArg[`GetOpt.OptArg`] descriptor + for a string control, where an argument for the command-line option + is optional. The arguments to the call are ++ +-- + `ctl : string control`:: + the control that will be set by the command-line option. + + `arg : string`:: + the name for the argument, which is used in the usage message. + + `default : string`:: + the default value for when no argument is given. + + `short : string`:: + the short name for the command-line option; either zero or one chars. + + `long : string option`:: + an optional long-name for the command-line option. +-- + +`[.kw]#val# save'restore : 'a control \-> unit \-> unit`:: + ``save'restore ctl`` saves the current value of the control and + returns a `unit \-> unit` function that will restore the value. + +`[.kw]#val# compare : ('a control * 'a control) \-> order`:: + `compare (ctl1, ctl2)` returns the priority order of the two controls. + +== See Also + +xref:str-ControlUtil.adoc[`ControlUtil`], +xref:controls-lib.adoc[__The Controls Library__], +xref:../Util/str-GetOpt.adoc[`GetOpt`] diff --git a/smlnj-lib/Doc/src/HTML/MODULES b/smlnj-lib/Doc/src/HTML/MODULES new file mode 100644 index 0000000..2de14eb --- /dev/null +++ b/smlnj-lib/Doc/src/HTML/MODULES @@ -0,0 +1,7 @@ +signature HTML_ERROR +signature HTML +structure HTML +structure MakeHTML +structure PrHTML +structure HTMLDefaults +functor HTMLParserFn diff --git a/smlnj-lib/Doc/src/HTML4/MODULES b/smlnj-lib/Doc/src/HTML4/MODULES new file mode 100644 index 0000000..3fcb8a8 --- /dev/null +++ b/smlnj-lib/Doc/src/HTML4/MODULES @@ -0,0 +1,9 @@ +signature HTML4 +structure HTML4 +structure HTML4Attrs +structure HTML4Entities +structure HTML4Parser +structure HTML4Tokens +structure HTML4TokenUtils +structure HTML4Utils +structure HTML4Print diff --git a/smlnj-lib/Doc/src/HashCons/MODULES b/smlnj-lib/Doc/src/HashCons/MODULES new file mode 100644 index 0000000..359b525 --- /dev/null +++ b/smlnj-lib/Doc/src/HashCons/MODULES @@ -0,0 +1,9 @@ +structure HashCons +structure HashConsAtom +structure HashConsBool +structure HashConsInt +structure HashConsMap +structure HashConsString +structure HashConsSet +structure HashConsWord +functor HashConsGroundFn diff --git a/smlnj-lib/Doc/src/HashCons/fun-HashConsGroundFn.adoc b/smlnj-lib/Doc/src/HashCons/fun-HashConsGroundFn.adoc new file mode 100644 index 0000000..fb1fadc --- /dev/null +++ b/smlnj-lib/Doc/src/HashCons/fun-HashConsGroundFn.adoc @@ -0,0 +1,68 @@ += The `HashConsGroundFn` functor +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `HashConsGroundFn` functor provides a mechanism for defining +a hash-consed representations for "leaf" (or "ground") types. +These are types that might be atomic (_e.g._, +the xref:str-HashConsAtom.adoc[`HashConsAtom structure`]) or +datatypes, but they are treated as atomic values by the *HashCons Library* +and are the leaves of the hash-consed data structures. + +== Synopsis + +[source,sml] +------------ +functor HashConsGroundFn (T : HASH_KEY) +------------ + +== Arguments + +* `T : xref:../Util/sig-HASH_KEY.adoc[HASH_KEY]`:: + The argument structure `T` defines the type, equality function, and + hashing function a the "leaf" type. + +== Interface + +[source,sml] +------------ +type hash_key = T.hash_key +type obj = hash_key HashCons.obj + +val mk : hash_key -> obj +------------ + +== Description + +`[.kw]#type# hash_key = T.hash_key`:: + the ground type. + +`[.kw]#type# obj = hash_key HashCons.obj`:: + the hash-consed ground type. + +`[.kw]#val# mk : hash_key \-> obj`:: + map a ground type value to a hash-consed value. + +== Example + +Suppose that we wish to have pairs of integers as a ground type +for a hash-consed data structure. We might implement this using +the following functor application: + +[source,sml] +------------ +structure HCPairs = HashConsGroundFn ( + struct + type hash_key = int * int + fun sameKey (a : hash_key, b) = (a = b) + fun hashVal (a, b) = Word.xorb(Word.fromInt a, Word.fromInt b) + end) +------------ + +== See Also + +xref:str-HashCons.adoc[`HashCons`], +xref:hash-cons-lib.adoc[__The HashCons Library__] diff --git a/smlnj-lib/Doc/src/HashCons/hash-cons-lib.adoc b/smlnj-lib/Doc/src/HashCons/hash-cons-lib.adoc new file mode 100644 index 0000000..fed8b2b --- /dev/null +++ b/smlnj-lib/Doc/src/HashCons/hash-cons-lib.adoc @@ -0,0 +1,138 @@ += The HashCons Library +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +== Overview + +The *HashCons Library* supports the implementation of +hash-consed representations of data structures. Such representations +are useful to reduce space usage by sharing common substructures +and to provide constant-time equality testing for large structures. + +To use this library, you need to use a two-level definition of your +data structures. For example, we might define a hash-cons representation +of lambda terms as follows: + +[source,sml] +------------ +structure HC = HashCons + +type var = HashConsString.obj + +datatype term_node + = VAR of var + | LAM of (var * term) + | APP of (term * term) +withtype term = term_node HC.obj +------------ + +And you need to define an equality function on your terms (this function +can use the hash-cons identity on subterms). For example, here is the +equality function for our lambda terms: + +[source,sml] +------------ +fun eq (APP(t11, t12), APP(t21, t22)) = HC.same(t11, t21) andalso HC.same(t12, t22) + | eq (LAM(x, t1), LAM(y, t2)) = HC.same(x, y) andalso HC.same(t1, t2) + | eq (VAR x, VAR y) = HC.same(x, y) + | eq _ = false +------------ + +With the equality function defined, we can then create a hash-cons table: + +[source,sml] +------------ +val tbl = HC.new {eq = eq} +------------ + +And define constructor functions: + +[source,sml] +------------ +val mkAPP = HC.cons2 tbl (0wx1, APP) +val mkLAM = HC.cons2 tbl (0wx3, LAM) +val mkVAR = HC.cons1 tbl (0wx7, VAR) +val var = HW.mk +------------ + +Note that we pick successive prime numbers for the constructor hash codes. +Using these constructors, we can construct the representation of the +identity function "latexmath:[\lambda{} x . x]" as follows: + +[source,sml] +------------ +mkLAM(var "x", mkVAR(var "x")) +------------ + +In addition to term construction, this library also supports finite sets +and maps using the unique hash-cons codes as keys. + +== Contents + +xref:str-HashCons.adoc[`[.kw]#structure# HashCons`]:: + The main module in the library, which defines the basic types + and various utility functions. + +xref:str-HashConsAtom.adoc[`[.kw]#structure# HashConsAtom`]:: + Code to package the xref:../Util/str-Atom.adoc#type:atom[`Atom.atom`] type + as a hash-consed object. + +xref:str-HashConsBool.adoc[`[.kw]#structure# HashConsBool`]:: + Code to package the `bool` type as a hash-consed object. + +xref:str-HashConsInt.adoc[`[.kw]#structure# HashConsInt`]:: + Code to package the `int` type as a hash-consed object. + +xref:str-HashConsMap.adoc[`[.kw]#structure# HashConsMap`]:: + Implements finite maps keyed by hash-consed objects. + +xref:str-HashConsString.adoc[`[.kw]#structure# HashConsString`]:: + Code to package the `string` type as a hash-consed object. + +xref:str-HashConsSet.adoc[`[.kw]#structure# HashConsSet`]:: + Implements finite sets of hash-consed objects. + +xref:str-HashConsWord.adoc[`[.kw]#structure# HashConsWord`]:: + Code to package the `word` type as a hash-consed object. + +xref:fun-HashConsGroundFn.adoc[`[.kw]#functor# HashConsGroundFn`]:: + A functor for implementing new leaf types as hash-consed objects. + +== Usage + +For https://smlnj.org[*SML/NJ*], include `$/hash-cons-lib.cm` in your +*CM* file. + +For use in https://www.mlton.org/[*MLton*], include +`$(SML_LIB)/smlnj-lib/HashCons/hash-cons-lib.mlb` in your *MLB* file. + +ifdef::backend-pdf[] + +// Push titles down one level. +:leveloffset: 1 + +include::str-HashCons.adoc[] + +include::str-HashConsAtom.adoc[] + +include::str-HashConsBool.adoc[] + +include::str-HashConsInt.adoc[] + +include::str-HashConsMap.adoc[] + +include::str-HashConsString.adoc[] + +include::str-HashConsSet.adoc[] + +include::str-HashConsWord.adoc[] + +include::fun-HashConsGroundFn.adoc[] + +// Return to normal title levels. +:leveloffset: 0 + +endif::[] diff --git a/smlnj-lib/Doc/src/HashCons/str-HashCons.adoc b/smlnj-lib/Doc/src/HashCons/str-HashCons.adoc new file mode 100644 index 0000000..64bd2fb --- /dev/null +++ b/smlnj-lib/Doc/src/HashCons/str-HashCons.adoc @@ -0,0 +1,184 @@ += The `HashCons` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `HashCons` structure is the main module for the *HashCons Library*. + +== Synopsis + +[source,sml] +------------ +signature HASH_CONS +structure HashCons : HASH_CONS +------------ + +== Interface + +[source,sml] +------------ +type 'a tbl + +val new : {eq : 'a * 'a -> bool} -> 'a tbl + +val clear : 'a tbl -> unit + +type 'a obj = { nd : 'a, tag : word, hash : word } + +val node : 'a obj -> 'a +val tag : 'a obj -> word + +val same : ('a obj * 'a obj) -> bool +val compare : ('a obj * 'a obj) -> order + +val cons0 : 'a tbl -> (word * 'a) -> 'a obj +val cons1 : 'a tbl -> (word * ('b obj -> 'a)) + -> 'b obj -> 'a obj +val cons2 : 'a tbl -> (word * ('b obj * 'c obj -> 'a)) + -> 'b obj * 'c obj -> 'a obj +val cons3 : 'a tbl -> (word * ('b obj * 'c obj * 'd obj -> 'a)) + -> 'b obj * 'c obj * 'd obj -> 'a obj +val cons4 : 'a tbl -> (word * ('b obj * 'c obj * 'd obj * 'e obj -> 'a)) + -> 'b obj * 'c obj * 'd obj * 'e obj -> 'a obj +val cons5 : 'a tbl -> (word * ('b obj * 'c obj * 'd obj * 'e obj * 'f obj -> 'a)) + -> 'b obj * 'c obj * 'd obj * 'e obj * 'f obj -> 'a obj + +val consList : 'a tbl -> (word * ('b obj list -> 'a)) -> 'b obj list -> 'a obj + +val consR1 : 'a tbl -> (word * ('b obj -> 'a) * ('r -> 'b obj)) + -> 'r -> 'a obj +val consR2 : 'a tbl + -> (word * ('b obj * 'c obj -> 'a) * ('r -> 'b obj * 'c obj)) + -> 'r -> 'a obj +val consR3 : 'a tbl + -> (word * ('b obj * 'c obj * 'd obj -> 'a) + * ('r -> 'b obj * 'c obj * 'd obj)) + -> 'r -> 'a obj +val consR4 : 'a tbl + -> (word * ('b obj * 'c obj * 'd obj * 'e obj -> 'a) + * ('r -> 'b obj * 'c obj * 'd obj * 'e obj)) + -> 'r -> 'a obj +val consR5 : 'a tbl + -> (word * ('b obj * 'c obj * 'd obj * 'e obj * 'f obj -> 'a) + * ('r -> 'b obj * 'c obj * 'd obj * 'e obj * 'f obj)) + -> 'r -> 'a obj +------------ + +== Description + +`[.kw]#type# 'a tbl`:: + The type of a table for hash-consing objects of type ``'a``. Typically, only + one table per distinct type should be defined. + +`[.kw]#val# new : {eq : 'a * 'a \-> bool} \-> 'a tbl`:: + `new {eq}` creates a new hash-cons table using the equality function `eq`. + +`[.kw]#val# clear : 'a tbl \-> unit`:: + `clear tbl` clears the table of all elements. + +`[.kw]#type# 'a obj = { ... }`:: + The representation of a hash-consed object. The fields are ++ +-- +`nd`:: + the underlying representation of the object. +`tag`:: + a tag that is unique for the object (for the object's table) +`hash`:: + a hash of the object (used to index into the table) +-- + +`[.kw]#val# node : 'a obj \-> 'a`:: + `node obj` projects out the node from `obj`. + +`[.kw]#val# tag : 'a obj \-> word`:: + `tag obj` projects out the unique tag from `obj`. + +`[.kw]#val# same : ('a obj * 'a obj) \-> bool`:: + `same (obj1, obj2)` returns true if the objects are the same; this test + is constant time (it compares the object tags). + +`[.kw]#val# compare : ('a obj * 'a obj) \-> order`:: + `compare (obj1, obj2)` returns the order of the two objects; this test + is constant time (it compares the object tags). + +`[.kw]#val# cons0 : 'a tbl \-> (word * 'a) \-> 'a obj`:: + `cons0 tbl (h, x)` creates a unique (_w.r.t._ `tbl`) representation + for the value `x`, where `h` is the hash of `x`. + +`[.kw]#val# cons1 : 'a tbl \-> (word * ('b obj \-> 'a)) \-> 'b obj \-> 'a obj`:: + `cons1 tbl (h, mk) obj1` creates a unique (_w.r.t._ `tbl`) representation + for `mk obj1`, where `h` is a hash code for the term constructor `mk`. + +`[.kw]#val# cons2 : 'a tbl \-> (word * ('b obj * 'c obj \-> 'a)) \-> ...`:: + `cons2 tbl (h, mk) (obj1, obj2)` creates a unique (_w.r.t._ `tbl`) representation + for `mk (obj1, obj2)`, where `h` is a hash code for the term constructor `mk`. + +`[.kw]#val# cons3 : 'a tbl \-> (word * ('b obj * 'c obj * 'd obj \-> 'a)) \-> ...`:: + `cons3 tbl (h, mk) (obj1, obj2, obj3)` creates a unique (_w.r.t._ `tbl`) representation + for `mk (obj1, obj2, obj3)`, where `h` is a hash code for the term constructor `mk`. + +`[.kw]#val# cons4 : 'a tbl \-> (word * ('b obj * 'c obj * 'd obj * 'e obj \-> 'a)) \-> ...`:: + `cons4 tbl (h, mk) (obj1, obj2, obj3, obj4)` creates a unique (_w.r.t._ `tbl`) + representation for `mk (obj1, obj2, obj3, obj4)`, where `h` is a hash code + for the term constructor `mk`. + +`[.kw]#val# cons5 : 'a tbl \-> (word * ('b obj * 'c obj * 'd obj * 'e obj * 'f obj \-> 'a)) \-> ...`:: + `cons5 tbl (h, mk) (obj1, obj2, obj3, obj4, obj5)` creates a unique (_w.r.t._ `tbl`) + representation for `mk (obj1, obj2, obj3, obj4, obj5)`, where `h` is a hash code + for the term constructor `mk`. + +`[.kw]#val# consList : 'a tbl \-> (word * ('b obj list \-> 'a)) \-> 'b obj list \-> 'a obj`:: + `consList tbl (h, mk) objs` creates a unique (_w.r.t._ `tbl`) representation + for `mk objs`, where `h` is a hash code for the term constructor `mk`. + +`[.kw]#val# consR1 : 'a tbl \-> (word * ('b obj \-> 'a) * ('r \-> 'b obj)) \-> 'r \-> 'a obj`:: + `consR1 (h, mk, proj) r` creates a unique (_w.r.t._ `tbl`) representation + for `mk (proj r)`, where `h` is a hash code for the term constructor `mk` + and `proj` projects the sub-component of `r` as an object. + +`[.kw]#val# consR2 : 'a tbl \-> (word * ('b obj * 'c obj \-> 'a) * ('r \-> 'b obj * 'c obj)) \-> 'r \-> 'a obj`:: + `consR2 (h, mk, proj) r` creates a unique (_w.r.t._ `tbl`) representation + for `mk (proj r)`, where `h` is a hash code for the term constructor `mk` + and `proj` projects the sub-components of `r` as a tuple of objects. + +`[.kw]#val# consR3 : 'a tbl \-> (word * ('b obj * 'c obj * 'd obj \-> 'a) \-> ...`:: + `consR3 (h, mk, proj) r` creates a unique (_w.r.t._ `tbl`) representation + for `mk (proj r)`, where `h` is a hash code for the term constructor `mk` + and `proj` projects the sub-components of `r` as a tuple of objects. + +`[.kw]#val# consR4 : 'a tbl \-> (word * ('b obj * 'c obj * 'd obj * 'e obj \-> 'a) \-> ...`:: + `consR4 (h, mk, proj) r` creates a unique (_w.r.t._ `tbl`) representation + for `mk (proj r)`, where `h` is a hash code for the term constructor `mk` + and `proj` projects the sub-components of `r` as a tuple of objects. + +`[.kw]#val# consR5 : 'a tbl \-> (word * ('b obj * 'c obj * 'd obj * 'e obj * 'f obj \-> 'a) \-> ...`:: + `consR5 (h, mk, proj) r` creates a unique (_w.r.t._ `tbl`) representation + for `mk (proj r)`, where `h` is a hash code for the term constructor `mk` + and `proj` projects the sub-components of `r` as a tuple of objects. + +== Discussion + +The functions `cons1`, `cons2`, _etc_., provide an easy way to convert a data +constructor of the given arity to a hash-cons constructor. For example, if +we have +[source,sml] +------------ +datatype t = ... | Foo of (x obj * y obj * z obj) | ... +------------ +as a constructor in our two-level hash-consed datatype, then we can +define a hash-cons constructor for `Foo` has +[source,sml] +------------ +val mkFoo : x obj * y obj * z obj -> t obj = cons3 (0w17, Foo) +------------ +where `0w17` is the hash code we selected for the `Foo` constructor. + +Likewise, the `cons1R`, `cons2R`, _etc_., functions can be used when +record types are involved. + +== See Also + +xref:hash-cons-lib.adoc[__The HashCons Library__] diff --git a/smlnj-lib/Doc/src/HashCons/str-HashConsAtom.adoc b/smlnj-lib/Doc/src/HashCons/str-HashConsAtom.adoc new file mode 100644 index 0000000..053e5bd --- /dev/null +++ b/smlnj-lib/Doc/src/HashCons/str-HashConsAtom.adoc @@ -0,0 +1,45 @@ += The `HashConsAtom` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `HashConsAtom` structure embeds the xref:../Util/str-Atom.adoc#type:atom[`Atom.atom`] +type as a hash-consed object. +It is implemented using the xref:fun-HashConsGroundFn.adoc[`HashConsGroundFn`] +functor. + +== Synopsis + +[source,sml] +------------ +structure HashConsAtom +------------ + +== Interface + +[source,sml] +------------ +type hash_key = Atom.atom +type obj = hash_key HashCons.obj + +val mk : hash_key -> obj +------------ + +== Description + +`[.kw]#type# hash_key = Atom.atom`:: + The ground type being hashed. + +`[.kw]#type# obj = hash_key HashCons.obj`:: + The type of hash-consed atoms. + +`[.kw]#val# mk : hash_key \-> obj`:: + `mk a` converts the atom `a` to a hash-consed object. + +== See Also + +xref:../Util/str-Atom.adoc[`Atom`], +xref:str-HashCons.adoc[`HashCons`], +xref:hash-cons-lib.adoc[__The HashCons Library__] diff --git a/smlnj-lib/Doc/src/HashCons/str-HashConsBool.adoc b/smlnj-lib/Doc/src/HashCons/str-HashConsBool.adoc new file mode 100644 index 0000000..0278ed9 --- /dev/null +++ b/smlnj-lib/Doc/src/HashCons/str-HashConsBool.adoc @@ -0,0 +1,50 @@ += The `HashConsBool` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `HashConsBool` structure embeds the `bool` type as a hash-consed object. + +== Synopsis + +[source,sml] +------------ +structure HashConsBool +------------ + +== Interface + +[source,sml] +------------ +type hash_key = bool +type obj = hash_key HashCons.obj + +val mk : hash_key -> obj + +val hcFalse : obj +val hcTrue : obj +------------ + +== Description + +`[.kw]#type# hash_key = bool`:: + The ground type being hashed. + +`[.kw]#type# obj = hash_key HashCons.obj`:: + The type of hash-consed booleans. + +`[.kw]#val# mk : hash_key \-> obj`:: + `mk b` converts the boolean `b` to a hash-consed object. + +`[.kw]#val# hcFalse : obj`:: + the hash-consed representation of `false`. + +`[.kw]#val# hcTrue : obj`:: + the hash-consed representation of `true`. + +== See Also + +xref:str-HashCons.adoc[`HashCons`], +xref:hash-cons-lib.adoc[__The HashCons Library__] diff --git a/smlnj-lib/Doc/src/HashCons/str-HashConsInt.adoc b/smlnj-lib/Doc/src/HashCons/str-HashConsInt.adoc new file mode 100644 index 0000000..7794c61 --- /dev/null +++ b/smlnj-lib/Doc/src/HashCons/str-HashConsInt.adoc @@ -0,0 +1,43 @@ += The `HashConsInt` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `HashConsInt` structure embeds the `int` type as a hash-consed object. +It is implemented by directly using the value as the tag and hash key +(_i.e._, no hash table). + +== Synopsis + +[source,sml] +------------ +structure HashConsInt +------------ + +== Interface + +[source,sml] +------------ +type hash_key = int +type obj = hash_key HashCons.obj + +val mk : hash_key -> obj +------------ + +== Description + +`[.kw]#type# hash_key = int`:: + The ground type being hashed. + +`[.kw]#type# obj = hash_key HashCons.obj`:: + The type of hash-consed integers. + +`[.kw]#val# mk : hash_key \-> obj`:: + `mk n` converts the integer `n` to a hash-consed object. + +== See Also + +xref:str-HashCons.adoc[`HashCons`], +xref:hash-cons-lib.adoc[__The HashCons Library__] diff --git a/smlnj-lib/Doc/src/HashCons/str-HashConsMap.adoc b/smlnj-lib/Doc/src/HashCons/str-HashConsMap.adoc new file mode 100644 index 0000000..8657bd1 --- /dev/null +++ b/smlnj-lib/Doc/src/HashCons/str-HashConsMap.adoc @@ -0,0 +1,341 @@ += The `HashConsMap` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `HashConsMap` structure implements functional, finite maps keyed +by hash-consed objects. A balanced tree structure is used in the +representation. + +== Synopsis + +[source,sml] +------------ +signature HASH_CONS_MAP +structure HashConsMap : HASH_CONS_MAP +------------ + +== Interface + +[source,sml] +------------ +type 'a obj = 'a HashCons.obj + +type ('a, 'b) map + +val isEmpty : ('a, 'b) map -> bool + +val singleton : ('a obj * 'b) -> ('a, 'b) map + +val insert : ('a, 'b) map * 'a obj * 'b -> ('a, 'b) map +val insert' : (('a obj * 'b) * ('a, 'b) map) -> ('a, 'b) map + +val insertWith : (('b * 'b) -> 'b) + -> ('a, 'b) map * 'a obj * 'b -> ('a, 'b) map +val insertWithi : (('a obj * 'b * 'b) -> 'b) + -> ('a, 'b) map * 'a obj * 'b -> ('a, 'b) map + +val find : ('a, 'b) map * 'a obj -> 'b option + +val lookup : ('a, 'b) map * 'a obj -> 'b + +val inDomain : (('a, 'b) map * 'a obj) -> bool + +val remove : ('a, 'b) map * 'a obj -> ('a, 'b) map * 'b + +val empty : ('a, 'b) map + +val numItems : ('a, 'b) map -> int + +val listItems : ('a, 'b) map -> 'b list +val listItemsi : ('a, 'b) map -> ('a obj * 'b) list + +val listKeys : ('a, 'b) map -> 'a obj list + +val collate : ('b * 'b -> order) -> (('a, 'b) map * ('a, 'b) map) -> order + +val unionWith : ('b * 'b -> 'b) -> (('a, 'b) map * ('a, 'b) map) + -> ('a, 'b) map +val unionWithi : ('a obj * 'b * 'b -> 'b) -> (('a, 'b) map * ('a, 'b) map) + -> ('a, 'b) map + +val intersectWith : ('b * 'c -> 'd) -> (('a, 'b) map * ('a, 'c) map) + -> ('a, 'd) map +val intersectWithi : ('a obj * 'b * 'c -> 'd) -> (('a, 'b) map * ('a, 'c) map) + -> ('a, 'd) map + +val mergeWith : ('b option * 'c option -> 'd option) + -> (('a, 'b) map * ('a, 'c) map) -> ('a, 'd) map +val mergeWithi : ('a obj * 'b option * 'c option -> 'd option) + -> (('a, 'b) map * ('a, 'c) map) -> ('a, 'd) map + +val app : ('b -> unit) -> ('a, 'b) map -> unit +val appi : (('a obj * 'b) -> unit) -> ('a, 'b) map -> unit + +val map : ('b -> 'c) -> ('a, 'b) map -> ('a, 'c) map +val mapi : ('a obj * 'b -> 'c) -> ('a, 'b) map -> ('a, 'c) map + +val fold : ('b * 'c -> 'c) -> 'c -> ('a, 'b) map -> 'c +val foldi : ('a obj * 'b * 'c -> 'c) -> 'c -> ('a, 'b) map -> 'c + +val filter : ('b -> bool) -> ('a, 'b) map -> ('a, 'b) map +val filteri : ('a obj * 'b -> bool) -> ('a, 'b) map -> ('a, 'b) map + +val mapPartial : ('b -> 'c option) -> ('a, 'b) map -> ('a, 'c) map +val mapPartiali : ('a obj * 'b -> 'c option) -> ('a, 'b) map -> ('a, 'c) map + +val exists : ('b -> bool) -> ('a, 'b) map -> bool +val existsi : ('a obj * 'b -> bool) -> ('a, 'b) map -> bool + +val all : ('b -> bool) -> ('a, 'b) map -> bool +val alli : ('a obj * 'b -> bool) -> ('a, 'b) map -> bool +------------ + +== Description + +In the description of operations below, we write latexmath:[\mathbf{dom}(m)] +for the _domain_ of the map latexmath:[m] (_i.e_, the set of keys for which +latexmath:[m] is defined), and latexmath:[\mathbf{rng}(m)] for its _range_ +(_i.e._, the set latexmath:[\{ m(k)\;|\;k \in \mathbf{dom}(m) \}]). It is also +useful to view a map as the set of key-value pairs +latexmath:[\{ (k, m(k))\;|\;k \in \mathbf{dom}(m) \}], which we call the _items_ +of latexmath:[m]. + +`[.kw]#type# 'a obj = 'a HashCons.obj`:: + Hash-consed objects are the search keys for the finite maps. + +`[.kw]#type# ('a, 'b) map`:: + A finite map from ``'a obj`` values to ``'b`` values. + +`[.kw]#val# empty : ('a, 'b) map`:: + The empty map. + +`[.kw]#val# singleton : ('a obj * 'b) \-> ('a, 'b) map`:: + `singleton (obj, v)` creates the singleton map that maps `obj` to `v`. + +`[.kw]#val# insert : ('a, 'b) map * 'a obj * 'b \-> ('a, 'b) map`:: + `insert (m, obj, v)` adds the mapping from `obj` to `v` to `m`. + This mapping overrides any previous mapping from `obj`. + +`[.kw]#val# insert' : (('a obj * 'b) * ('a, 'b) map) \-> ('a, 'b) map`:: + `insert' ((obj, v), map)` adds the mapping from `obj` to `v` to `m`. + This mapping overrides any previous mapping from `obj`. + +`[.kw]#val# insertWith : (('b * 'b) \-> 'b) \-> ('a, 'b) map * 'a obj * 'b \-> ('a, 'b) map`:: + `insertWith comb (m, obj, v)` adds the mapping from `obj` to `value` to `m`, + where `value = comb(v', v)`, if `m` already contained a mapping from `obj` + to `v'`; otherwise, `value = v`. + +`[.kw]#val# insertWithi : (('a obj * 'b * 'b) \-> 'b) \-> ('a, 'b) map * 'a obj * 'b \-> ('a, 'b) map`:: + `insertWithi comb (m, obj, v)` adds the mapping from `obj` to `value` to `m`, + where `value = comb(obj, v', v)`, if `m` already contained a mapping from `obj` + to `v'`; otherwise, `value = v`. + +`[.kw]#val# find : ('a, 'b) map * 'a obj \-> 'b option`:: + `find (m, obj)` returns `SOME v`, if `m` maps `obj` to `v` and `NONE` otherwise. + +`[.kw]#val# lookup : ('a, 'b) map * 'a obj \-> 'b`:: + `lookup (m, obj)` returns `v`, if `m` maps `obj` to `v`; otherwise it + raises the exception xref:../Util/str-LibBase.adoc#exn:NotFound[`NotFound`]. + +`[.kw]#val# inDomain : (('a, 'b) map * 'a obj) \-> bool`:: + `inDomain (m, obj)` returns `true` if `obj` is in the domain of `m`. + +`[.kw]#val# remove : ('a, 'b) map * 'a obj \-> ('a, 'b) map * 'b`:: + `remove (m, obj)` returns the pair `(m', v)`, if `m` maps `obj` to `v` + and where `m'` is `m` with `obj` removed from its domain. If `obj` + is not in the domain of `m`, then it raises the exception + xref:../Util/str-LibBase.adoc#exn:NotFound[`NotFound`]. + +`[.kw]#val# isEmpty : ('a, 'b) map \-> bool`:: + `isEmpty m` returns true if, and only if, `m` is empty. + +`[.kw]#val# numItems : ('a, 'b) map \-> int`:: + `numItems m` returns the size of ``m``'s domain. + +`[.kw]#val# listItems : ('a, 'b) map \-> 'b list`:: + `listItems m` returns a list of the values in the _range_ of `m`. + Note that this list will contain duplicates when multiple keys in + ``m``'s domain map to the same value. + +`[.kw]#val# listKeys : ('a, 'b) map \-> 'a obj list`:: + `listKeys m` returns a list of the objects in the domain of `m`. + +`[.kw]#val# listItemsi : ('a, 'b) map \-> ('a obj * 'b) list`:: + `listItemsi m` returns a list of `(obj, v)` pairs, where `m` maps + `obj` to `v`. + +`[.kw]#val# collate : ('b * 'b \-> order) \-> (('a, 'b) map * ('a, 'b) map) \-> order`:: + `collate cmpV (m1, m2)` returns the order of the two maps, where `cmpV` is + used to compare the values in the domain. + +`[.kw]#val# unionWith : ('b * 'b \-> 'b) \-> (('a, 'b) map * ('a, 'b) map) \-> ('a, 'b) map`:: + `unionWith comb (m1, m2)` returns the union of the two maps, using the function `comb` + to combine values when there is a collision of keys. More formally, this expression + returns the map ++ +[latexmath] ++++++++++++ + \begin{array}{l} + \{ (k, \mathtt{m1}(k)) + \;|\;k \in \mathbf{dom}(\mathtt{m1}) \setminus \mathbf{dom}(\mathtt{m2}) \} + \cup \\ + \{ (k, \mathtt{m2}(k)) + \;|\;k \in \mathbf{dom}(\mathtt{m2}) \setminus \mathbf{dom}(\mathtt{m1}) \} + \cup \\ + \{ (k, \mathtt{comb}(\mathtt{m1}(k), \mathtt{m2}(k)) + \;|\;k \in \mathbf{dom}(\mathtt{m1}) \cap \mathbf{dom}(\mathtt{m2}) \} + \end{array} ++++++++++++ ++ +For example, we could implement a _multiset_ of objects by mapping objects to their +multiplicity. Then, the union of two multisets could be defined by ++ +[source,sml] +------------ +fun union (ms1, ms2) = unionWith Int.+ (ms1, ms2) +------------ + +`[.kw]#val# unionWithi : ('a obj * 'b * 'b \-> 'b) \-> (('a, 'b) map * ('a, 'b) map) \-> ('a, 'b) map`:: + `unionWithi comb (m1, m2)` returns the union of the two maps, using the function `comb` + to combine values when there is a collision of keys. More formally, this expression + returns the map ++ +[latexmath] ++++++++++++ + \begin{array}{l} + \{ (k, \mathtt{m1}(k)) + \;|\;k \in \mathbf{dom}(\mathtt{m1}) \setminus \mathbf{dom}(\mathtt{m2}) \} + \cup \\ + \{ (k, \mathtt{m2}(k)) + \;|\;k \in \mathbf{dom}(\mathtt{m2}) \setminus \mathbf{dom}(\mathtt{m1}) \} + \cup \\ + \{ (k, \mathtt{comb}(k, \mathtt{m1}(k), \mathtt{m2}(k)) + \;|\;k \in \mathbf{dom}(\mathtt{m1}) \cap \mathbf{dom}(\mathtt{m2}) \} + \end{array} ++++++++++++ + + +`[.kw]#val# intersectWith : ('b * 'c \-> 'd) \-> (('a, 'b) map * ('a, 'c) map) \-> ('a, 'd) map`:: + `intersectWith comb (m1, m2)` returns the intersection of the two maps, + where the values in the range are a computed by applying the function + `comb` to the values from the two maps. More formally, this expression + returns the map ++ +[latexmath] ++++++++++++ + \{ (k, \mathtt{comb}(\mathtt{m1}(k), \mathtt{m2}(k)) + \;|\;k \in \mathbf{dom}(\mathtt{m1}) \cap \mathbf{dom}(\mathtt{m2}) \} ++++++++++++ + +`[.kw]#val# intersectWithi : ('a obj * 'b * 'c \-> 'd) \-> (('a, 'b) map * ('a, 'c) map) \-> ('a, 'd) map`:: + `intersectWithi comb (m1, m2)` returns the intersection of the two maps, + where the values in the range are a computed by applying the function + `comb` to the kay and the values from the two maps. More formally, this + expression returns the map ++ +[latexmath] ++++++++++++ + \{ (k, \mathtt{comb}(k, \mathtt{m1}(k), \mathtt{m2}(k)) + \;|\;k \in \mathbf{dom}(\mathtt{m1}) \cap \mathbf{dom}(\mathtt{m2}) \} ++++++++++++ + +`[.kw]#val# mergeWith : ('b option * 'c option \-> 'd option) \-> (('a, 'b) map * ('a, 'c) map) \-> ('a, 'd) map`:: + `mergeWith comb (m1, m2)` merges the two maps using the function `comb` + as a decision procedure for adding elements to the new map. For each object + latexmath:[\mathtt{obj} \in \mathbf{dom}(\mathtt{m1}) \cup \mathbf{dom}(\mathtt{m2})], + we evaluate `comb(optV1, optV2)`, where `optV1` is `SOME v` if + latexmath:[(\mathtt{obj}, \mathtt{v}) \in \mathtt{m1}] and is `NONE` if + latexmath:[\mathtt{obj} \not\in \mathbf{dom}(\mathtt{m1}); likewise for `optV2`. + If `comb(optV1, optV2)` returns ``SOME v'``, then we add ``(obj, v')`` + to the result. ++ +The `mergeWith` function is a generalization of the `unionWith` and +`intersectionWith` functions. + +`[.kw]#val# mergeWithi : ('a obj * 'b option * 'c option \-> 'd option) \-> (('a, 'b) map * ('a, 'c) map) \-> ('a, 'd) map`:: + `mergeWithi comb (m1, m2)` merges the two maps using the function `comb` + as a decision procedure for adding elements to the new map. The difference + between this function and `mergeWith` is that the `comb` function takes the + `key` value in addition to the optional values from the range. + +`[.kw]#val# app : ('b \-> unit) \-> ('a, 'b) map \-> unit`:: + `app f m` applies the function `f` to the values in the range of `m`. + +`[.kw]#val# appi : (('a obj * 'b) \-> unit) \-> ('a, 'b) map \-> unit`:: + `appi f map` applies the function `f` to the key-value pairs that + define `m`. + +`[.kw]#val# map : ('b \-> 'c) \-> ('a, 'b) map \-> ('a, 'c) map`:: + `map f m` creates a new finite map ``m'`` by applying the function `f` to the + values in the range of `m`. Thus, if + latexmath:[(\mathtt{obj}, \mathtt{v}) \in \mathtt{m}], then + `(obj, f v)` will be in ``m'``. + +`[.kw]#val# mapi : ('a obj * 'b \-> 'c) \-> ('a, 'b) map \-> ('a, 'c) map`:: + `mapi f m` creates a new finite map ``m'`` by applying the function `f` to the + key-value pairs of `m`. Thus, if + latexmath:[(\mathtt{obj}, \mathtt{v}) \in \mathtt{m}], then + `(obj, f(obj, v))` will be in ``m'``. + +`[.kw]#val# fold : ('b * 'c \-> 'c) \-> 'c \-> ('a, 'b) map \-> 'c`:: + `fold f init m` folds the function `f` over the range of + `m` using `init` as the initial value. + +`[.kw]#val# foldi : ('a obj * 'b * 'c \-> 'c) \-> 'c \-> ('a, 'b) map \-> 'c`:: + `foldi f init m` folds the function `f` over the key-value pairs in + `m` using `init` as the initial value. + +`[.kw]#val# filter : ('b \-> bool) \-> ('a, 'b) map \-> ('a, 'b) map`:: + `filter pred m` filters out those items `(obj, v)` from `m`, such that + `pred v` returns `false`. More formally, this expression returns the map + latexmath:[\{ (\mathtt{obj}, \mathtt{v})\;|\;\mathtt{obj} \in \mathbf{dom}(\mathtt{m}) + \wedge \mathtt{pred}(\mathtt{v}) \}]. + +`[.kw]#val# filteri : ('a obj * 'b \-> bool) \-> ('a, 'b) map \-> ('a, 'b) map`:: + `filteri pred m` filters out those items `(obj, v)` from `m`, such that + `pred(obj, v)` returns `false`. More formally, this expression returns the map + latexmath:[\{ (\mathtt{obj}, \mathtt{v})\;|\;\mathtt{obj} \in \mathbf{dom}(\mathtt{m}) + \wedge \mathtt{pred}(\mathtt{obj}, \mathtt{v}) \}]. + +`[.kw]#val# mapPartial : ('b \-> 'c option) \-> ('a, 'b) map \-> ('a, 'c) map`:: + `mapPartial f m` maps the partial function `f` over the items of `m`. + More formally, this expression returns the map +[latexmath] ++++++++++++ + \{ (k, v') \;|\; (k, v) \in \mathtt{m} \wedge \mathtt{f}(v) = \mathtt{SOME}(v') \} ++++++++++++ + +`[.kw]#val# mapPartiali : ('a obj * 'b \-> 'c option) \-> ('a, 'b) map \-> ('a, 'c) map`:: + `mapPartiali f m` maps the partial function `f` over the items of `m`. + More formally, this expression returns the map +[latexmath] ++++++++++++ + \{ (k, v') \;|\; (k, v) \in \mathtt{m} \wedge \mathtt{f}(k, v) = \mathtt{SOME}(v') \} ++++++++++++ + +`[.kw]#val# exists : ('b \-> bool) \-> ('a, 'b) map \-> bool`:: + `exists pred m` returns `true` if, and only if, there exists an item + latexmath:[(\mathtt{obj}, \mathtt{v}) \in \mathtt{m}], + such that `pred v` returns `true`. + +`[.kw]#val# existsi : ('a obj * 'b \-> bool) \-> ('a, 'b) map \-> bool`:: + `exists pred m` returns `true` if, and only if, there exists an item + latexmath:[(\mathtt{obj}, \mathtt{v}) \in \mathtt{m}], such that + `pred(obj, v)` returns `true`. + +`[.kw]#val# all : ('b \-> bool) \-> ('a, 'b) map \-> bool`:: + `all pred m` returns `true` if, and only if, `pred v` returns `true` + for all items latexmath:[(\mathtt{obj}, \mathtt{v}) \in \mathtt{m}]. + +`[.kw]#val# alli : ('a obj * 'b \-> bool) \-> ('a, 'b) map \-> bool`:: + `all pred m` returns `true` if, and only if, `pred(obj, v)` returns `true` + for all items latexmath:[(\mathtt{obj}, \mathtt{v}) \in \mathtt{m}]. + +== See Also + +xref:str-HashCons.adoc[`HashCons`], +xref:str-HashConsSet.adoc[`HashConsSet`], +xref:hash-cons-lib.adoc[__The HashCons Library__] diff --git a/smlnj-lib/Doc/src/HashCons/str-HashConsSet.adoc b/smlnj-lib/Doc/src/HashCons/str-HashConsSet.adoc new file mode 100644 index 0000000..bf7a644 --- /dev/null +++ b/smlnj-lib/Doc/src/HashCons/str-HashConsSet.adoc @@ -0,0 +1,253 @@ += The `HashConsSet` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `HashConsSet` structure implements finite sets of hash-consed objects. + +== Synopsis + +[source,sml] +------------ +signature HASH_CONS_SET +structure HashConsSet : HASH_CONS_SET +------------ + +== Interface + +[source,sml] +------------ +type 'a obj = 'a HashCons.obj + +type 'a set + +val empty : 'a set + +val singleton : 'a obj -> 'a set + +val fromList : 'a obj list -> 'a set + +val add : 'a set * 'a obj -> 'a set +val add' : ('a obj * 'a set) -> 'a set + +val addList : 'a set * 'a obj list -> 'a set + +val subtract : 'a set * 'a obj -> 'a set +val subtract' : ('a obj * 'a set) -> 'a set + +val subtractList : 'a set * 'a obj list -> 'a set + +val delete : 'a set * 'a obj -> 'a set + +val member : 'a set * 'a obj -> bool + +val isEmpty : 'a set -> bool + +val equal : ('a set * 'a set) -> bool + +val compare : ('a set * 'a set) -> order + +val isSubset : ('a set * 'a set) -> bool + +val disjoint : 'a set * 'a set -> bool + +val numItems : 'a set -> int + +val toList : 'a set -> 'a obj list +val listItems : 'a set -> 'a obj list + +val union : 'a set * 'a set -> 'a set +val intersection : 'a set * 'a set -> 'a set +val difference : 'a set * 'a set -> 'a set + +val map : ('a obj -> 'b obj) -> 'a set -> 'b set + +val mapPartial : ('a obj -> 'b obj option) -> 'a set -> 'b set + +val app : ('a obj -> unit) -> 'a set -> unit + +val foldl : ('a obj * 'b -> 'b) -> 'b -> 'a set -> 'b +val foldr : ('a obj * 'b -> 'b) -> 'b -> 'a set -> 'b + +val partition : ('a obj -> bool) -> 'a set -> ('a set * 'a set) + +val filter : ('a obj -> bool) -> 'a set -> 'a set + +val all : ('a obj -> bool) -> 'a set -> bool +val exists : ('a obj -> bool) -> 'a set -> bool + +val find : ('a obj -> bool) -> 'a set -> 'a obj option +------------ + +== Description + +`[.kw]#type# 'a obj = 'a HashCons.obj`:: + The elements in the set are hash-cons objects. + +`[.kw]#type# 'a set`:: + A finite set of ``'a obj`` values. + +`[.kw]#val# empty : 'a set`:: + The empty set. + +`[.kw]#val# singleton : 'a obj \-> 'a set`:: + `singleton obj` creates a singleton set containing `obj`. + +`[.kw]#val# fromList : 'a obj list \-> 'a set`:: + `fromList objs` creates a set from the list of objects. + +`[.kw]#val# add : 'a set * 'a obj \-> 'a set`:: + `add (set, obj)` adds the object to the set. + +`[.kw]#val# add' : ('a obj * 'a set) \-> 'a set`:: + `add' (obj, set)` adds the object to the set. + +`[.kw]#val# addList : 'a set * 'a obj list \-> 'a set`:: + `addList (set, objs)` adds the list of objects to the set. + +`[.kw]#val# subtract : 'a set * 'a obj \-> 'a set`:: + `subtract (set, obj)` removes the object `obj` from `set`. + Acts as the identity if `obj` is not in the set. + +`[.kw]#val# subtract' : ('a obj * 'a set) \-> 'a set`:: + `subtract (obj, set)` removes the object `obj` from `set`. + Acts as the identity if `obj` is not in the set. + +`[.kw]#val# delete : 'a set * 'a obj \-> 'a set`:: + `delete (set, obj)` removes the object `obj` from `set`. + Unlike `subtract`, this function raises the + xref:../Util/str-LibBase.adoc#exn:NotFound[`NotFound`] + exception if `obj` is not in the set. + +`[.kw]#val# member : 'a set * 'a obj \-> bool`:: + `member (obj, set)` returns `true` if, and only if, `obj` + is an element of `set`. + +`[.kw]#val# isEmpty : 'a set \-> bool`:: + `isEmpty set` returns true if, and only if, `set` is empty. + +`[.kw]#val# equal : ('a set * 'a set) \-> bool`:: + `equal (set1, set2)` returns true if, and only if, the two + sets are equal (_i.e._, they contain the same elements). + +`[.kw]#val# compare : ('a set * 'a set) \-> order`:: + `compare (set1, set2)` returns the lexical order of + the two sets. + +`[.kw]#val# isSubset : ('a set * 'a set) \-> bool`:: + `isSubset (set1, set2)` returns true if, and only if, `set1` + is a subset of `set2` (_i.e._, any element of `set1` is an + element of `set2`). + +`[.kw]#val# disjoint : 'a set * 'a set \-> bool`:: + `equal (set1, set2)` returns true if, and only if, the two + sets are disjoint (_i.e._, their intersection is empty). + +`[.kw]#val# numItems : 'a set \-> int`:: + `numItems set` returns the number of items in the `set`. + +[[val:toList]] +`[.kw]#val# toList : 'a set \-> 'a obj list`:: + `toList set` returns a list of the objects in `set`. + +`[.kw]#val# union : 'a set * 'a set \-> 'a set`:: + `union (set1, set2)` returns the union of the two sets. + +`[.kw]#val# intersection : 'a set * 'a set \-> 'a set`:: + `intersection (set1, set2)` returns the intersection of the two sets. + +`[.kw]#val# difference : 'a set * 'a set \-> 'a set`:: + `difference (set1, set2)` returns the difference of the two sets; + _i.e._, the set of objects that are in `set1`, but not in + `set2`. + +`[.kw]#val# map : ('a obj \-> 'b obj) \-> 'a set \-> 'b set`:: + `map f set` constructs a new set from the result of applying the + function `f` to the elements of `set`. This expression is + equivalent to ++ +[source,sml] +------------ +fromList (List.map f (toList set)) +------------ + +`[.kw]#val# mapPartial : ('a obj \-> 'b obj option) \-> 'a set \-> 'b set`:: + `mapPartial f set` constructs a new set from the result of applying the + function `f` to the elements of `set`. This expression is + equivalent to ++ +[source,sml] +------------ +fromList (List.mapPartial f (toList set)) +------------ + +`[.kw]#val# app : ('a obj \-> unit) \-> 'a set \-> unit`:: + `app f set` applies the function `f` to the objects in `set`. + This expression is equivalent to ++ +[source,sml] +------------ +List.app f (toList set) +------------ + +[[val:fold]] +`[.kw]#val# fold : ('a obj * 'b \-> 'b) \-> 'b \-> 'a set \-> 'b`:: + `fold f init set` folds the function `f` over the objects in + `set` using `init` as the initial value. + This expression is equivalent to ++ +[source,sml] +------------ +List.foldl f init (toList set) +------------ + Although the order in which the elements are processed is unspecified. + +`[.kw]#val# partition : ('a obj \-> bool) \-> 'a set \-> ('a set * 'a set)`:: + `partition pred set` returns a pair of disjoint sets `(tSet, fSet)`, where + the predicate `pred` returns true for every element of `tSet`, + `false` for every element of `fSet`, and `set` is the union of `tSet` + and `fSet`. + +`[.kw]#val# filter : ('a obj \-> bool) \-> 'a set \-> 'a set`:: + `filter pred set` filters out any elements of set for which the + predicate `pred` returns false. + This expression is equivalent to ++ +[source,sml] +------------ +#1 (partition pred set) +------------ + +`[.kw]#val# all : ('a obj \-> bool) \-> 'a set \-> bool`:: + `all pred set` returns `true` if, and only if, `pred obj` returns + true for all elements `obj` in `set`. + +`[.kw]#val# exists : ('a obj \-> bool) \-> 'a set \-> bool`:: + `exists pred set` returns `true` if, and only if, there exists an + element `obj` in `set` such that `pred obj` returns `true`. + +`[.kw]#val# find : ('a obj \-> bool) \-> 'a set \-> 'a obj option`:: + `find pred set` returns `SOME obj` if there exists an object `obj` + in the set for which `pred obj` returns `true`; otherwise `NONE` is returned. + +=== Deprecated functions + +The following functions are part of the interface, but have been +deprecated. + +`[.kw]#val# listItems : 'a set \-> 'a obj list`:: + Use xref:#val:toList[`toList`] instead. + +`[.kw]#val# foldl : ('a obj * 'b \-> 'b) \-> 'b \-> 'a set \-> 'b`:: + Use xref:#val:fold[`fold`] instead. + +`[.kw]#val# foldr : ('a obj * 'b \-> 'b) \-> 'b \-> 'a set \-> 'b`:: + Use xref:#val:fold[`fold`] instead. + +== See Also + +xref:str-HashCons.adoc[`HashCons`], +xref:str-HashConsMap.adoc[`HashConsMap`], +xref:hash-cons-lib.adoc[__The HashCons Library__] diff --git a/smlnj-lib/Doc/src/HashCons/str-HashConsString.adoc b/smlnj-lib/Doc/src/HashCons/str-HashConsString.adoc new file mode 100644 index 0000000..388e2d6 --- /dev/null +++ b/smlnj-lib/Doc/src/HashCons/str-HashConsString.adoc @@ -0,0 +1,44 @@ += The `HashConsString` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `HashConsString` structure embeds the `string` +type as a hash-consed object. +It is implemented using the xref:fun-HashConsGroundFn.adoc[`HashConsGroundFn`] +functor. + +== Synopsis + +[source,sml] +------------ +structure HashConsString +------------ + +== Interface + +[source,sml] +------------ +type hash_key = string +type obj = hash_key HashCons.obj + +val mk : hash_key -> obj +------------ + +== Description + +`[.kw]#type# hash_key = string`:: + The ground type being hashed. + +`[.kw]#type# obj = hash_key HashCons.obj`:: + The type of hash-consed strings. + +`[.kw]#val# mk : hash_key \-> obj`:: + `mk s` converts the string `s` to a hash-consed object. + +== See Also + +xref:str-HashCons.adoc[`HashCons`], +xref:hash-cons-lib.adoc[__The HashCons Library__] diff --git a/smlnj-lib/Doc/src/HashCons/str-HashConsWord.adoc b/smlnj-lib/Doc/src/HashCons/str-HashConsWord.adoc new file mode 100644 index 0000000..5cda1a1 --- /dev/null +++ b/smlnj-lib/Doc/src/HashCons/str-HashConsWord.adoc @@ -0,0 +1,43 @@ += The `HashConsWord` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `HashConsWord` structure embeds the `word` type as a hash-consed object. +It is implemented by directly using the value as the tag and hash key +(_i.e._, no hash table). + +== Synopsis + +[source,sml] +------------ +structure HashConsWord +------------ + +== Interface + +[source,sml] +------------ +type hash_key = word +type obj = hash_key HashCons.obj + +val mk : hash_key -> obj +------------ + +== Description + +`[.kw]#type# hash_key = word`:: + The ground type being hashed. + +`[.kw]#type# obj = hash_key HashCons.obj`:: + The type of hash-consed words. + +`[.kw]#val# mk : hash_key \-> obj`:: + `mk w` converts the word `w` to a hash-consed object. + +== See Also + +xref:str-HashCons.adoc[`HashCons`], +xref:hash-cons-lib.adoc[__The HashCons Library__] diff --git a/smlnj-lib/Doc/src/INet/MODULES b/smlnj-lib/Doc/src/INet/MODULES new file mode 100644 index 0000000..c78eea8 --- /dev/null +++ b/smlnj-lib/Doc/src/INet/MODULES @@ -0,0 +1,2 @@ +structure SockUtil +structure UnixSockUtil diff --git a/smlnj-lib/Doc/src/INet/inet-lib.adoc b/smlnj-lib/Doc/src/INet/inet-lib.adoc new file mode 100644 index 0000000..3c9d8ed --- /dev/null +++ b/smlnj-lib/Doc/src/INet/inet-lib.adoc @@ -0,0 +1,40 @@ += The INet Library +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +== Overview + +The *INet Library* provides utilities for network programming with sockets. + +== Contents + +xref:str-SockUtil.adoc[`[.kw]#structure# SockUtil`]:: + Various utility functions for programming with sockets. + +xref:str-UnixSockUtil.adoc[`[.kw]#structure# UnixSockUtil`]:: + Various utility functions for programming with Unix-domain sockets. + +== Usage + +For https://smlnj.org[*SML/NJ*], include `$/inet-lib.cm` in your +*CM* file. + +For use in https://www.mlton.org/[*MLton*], include +`$(SML_LIB)/smlnj-lib/INet/inet-lib.mlb` in your *MLB* file. + +ifdef::backend-pdf[] + +// Push titles down one level. +:leveloffset: 1 + +include::str-SockUtil.adoc[] + +include::str-UnixSockUtil.adoc[] + +// Return to normal title levels. +:leveloffset: 0 + +endif::[] diff --git a/smlnj-lib/Doc/src/INet/str-SockUtil.adoc b/smlnj-lib/Doc/src/INet/str-SockUtil.adoc new file mode 100644 index 0000000..850afe5 --- /dev/null +++ b/smlnj-lib/Doc/src/INet/str-SockUtil.adoc @@ -0,0 +1,114 @@ += The `SockUtil` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `SockUtil` structure provides a collection of utility functions for +programming with the *Basis Library* +{sml-basis-url}/socket.html[`Socket`] structure + +== Synopsis + +[source,sml] +------------ +signature SOCK_UTIL +structure SockUtil : SOCK_UTIL +------------ + +== Interface + +[source,sml] +------------ +datatype port = PortNumber of int | ServName of string + +datatype hostname = HostName of string | HostAddr of NetHostDB.in_addr + +val scanAddr : (char, 'a) StringCvt.reader + -> ({host : hostname, port : port option}, 'a) StringCvt.reader +val addrFromString : string -> {host : hostname, port : port option} option + +exception BadAddr of string + +val resolveAddr : {host : hostname, port : port option} + -> {host : string, addr : NetHostDB.in_addr, port : int option} + +type 'a stream_sock = ('a, Socket.active Socket.stream) Socket.sock + +val connectINetStrm : {addr : NetHostDB.in_addr, port : int} + -> INetSock.inet stream_sock + +val recvVec : ('a stream_sock * int) -> Word8Vector.vector +val recvStr : ('a stream_sock * int) -> string + +val sendVec : ('a stream_sock * Word8Vector.vector) -> unit +val sendStr : ('a stream_sock * string) -> unit +val sendArr : ('a stream_sock * Word8Array.array) -> unit +------------ + +== Description + +`[.kw]#datatype# port = PortNumber [.kw]#of# int | ServName [.kw]#of# string`:: + specifies a port identifier, which either be a number (`Port`) + or the name of a service (`ServName`). + +`[.kw]#datatype# hostname = HostName [.kw]#of# string | HostAddr [.kw]#of# NetHostDB.in_addr`:: + something + +[[val:scanAddr]] +`[.kw]#val# scanAddr : (char, 'a) StringCvt.reader \-> ({host : hostname, port : port option}, 'a) StringCvt.reader`:: + `scanAddr getc` returns an address reader. An address is a string of + the form latexmath:[\mathit{addr}\,[\,\mathtt{:}\,\mathit{port}\,]], + where latexmath:[\mathit{addr}] may either be a numeric or symbolic host + name and the optional port is either a decimal port number or alphanumeric service + name. Legal host names must begin with a letter, and may contain any alphanumeric + character, the minus sign (`-`) and period (`.`), where the period is used as a + domain separator. + +`[.kw]#val# addrFromString : string \-> {host : hostname, port : port option} option`:: + `addrFromString addr` converts the string `addr` to a host-port address specifier. + The syntax of addresses is as described for xref:#val:scanAddr[`scanAddr`]. + +[[exn:BadAddr]] +`[.kw]#exception# BadAddr [.kw]#of# string`:: + This exception is raised by xref:#val:resolveAddr[`resolveAddr`]. + +[[val:resolveAddr]] +`[.kw]#val# resolveAddr : {host : hostname, port : port option} \-> {host : string, addr : NetHostDB.in_addr, port : int option}`:: + `resolveAddr {host, port}` resolves the hostname and optional port in the + host and service databases. If either the host or service name is not + found, then the xref:#exn:BadAddr[`BadAddr`] exception is raised. + +`[.kw]#type# 'a stream_sock = ('a, Socket.active Socket.stream) Socket.sock`:: + A type abbreviation for active stream sockets. + +`[.kw]#val# connectINetStrm : {addr : NetHostDB.in_addr, port : int} \-> INetSock.inet stream_sock`:: + `connectINetStrm {addr, port}` establishs a client-side connection to an + INET domain stream socket. + +`[.kw]#val# recvVec : ('a stream_sock * int) \-> Word8Vector.vector`:: + `recvVec (sock, n)` reads `n` bytes from the stream socket `sock`; fewer than + `n` bytes is returned when the stream is closed at the other end of the connection. + It raises the {sml-basis-url}/general.html#SIG:GENERAL.Size:EXN[`Size`] exception + when `n` is negative. + +`[.kw]#val# recvStr : ('a stream_sock * int) \-> string`:: + `recvStr (sock, n)` reads `n` characters from the stream socket `sock`; fewer than + `n` characters is returned when the stream is closed at the other end of the connection. + It raises the {sml-basis-url}/general.html#SIG:GENERAL.Size:EXN[`Size`] exception + when `n` is negative. + +`[.kw]#val# sendVec : ('a stream_sock * Word8Vector.vector) \-> unit`:: + `sendVec (sock, vec)` sends the vector `vec` on the stream socket `sock`. + +`[.kw]#val# sendStr : ('a stream_sock * string) \-> unit`:: + `sendStr (sock, s)` sends the string `s` on the stream socket `sock`. + +`[.kw]#val# sendArr : ('a stream_sock * Word8Array.array) \-> unit`:: + `sendArr (sock, arr)` sends the array `arr` on the stream socket `sock`. + +== See Also + +{sml-basis-url}/socket.html[`Socket` (*SML Basis*)], +xref:inet-lib.adoc[__The INet Library__] diff --git a/smlnj-lib/Doc/src/INet/str-UnixSockUtil.adoc b/smlnj-lib/Doc/src/INet/str-UnixSockUtil.adoc new file mode 100644 index 0000000..0b8b6e9 --- /dev/null +++ b/smlnj-lib/Doc/src/INet/str-UnixSockUtil.adoc @@ -0,0 +1,42 @@ += The `UnixSockUtil` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `UnixSockUtil` structure extends the xref:str-SockUtil.adoc[`SockUtil`] +structure with an additional operation for making Unix-domain connections. + +== Synopsis + +[source,sml] +------------ +signature UNIX_SOCK_UTIL +structure UnixSockUtil : UNIX_SOCK_UTIL +------------ + +== Interface + +[source,sml] +------------ +include SOCK_UTIL + +val connectUnixStrm : string -> UnixSock.unix stream_sock +------------ + +== Description + +`[.kw]#include# SOCK_UTIL`:: + The `UnixSockUtil` structure includes the operations from the + xref:str-SockUtil.adoc[`SockUtil`] structure. + +`[.kw]#val# connectUnixStrm : string \-> UnixSock.unix stream_sock`:: + `connectUnixStrm path` establishs a client-side connection to the + Unix-domain stream socket specified by `path`. + +== See Also + +xref:str-SockUtil.adoc[`SockUtil`], +{sml-basis-url}/unix-sock.html#UnixSock:STR:SPEC[`UnixSock` (*SML Basis*)], +xref:inet-lib.adoc[__The INet Library__] diff --git a/smlnj-lib/Doc/src/JSON/json-lib.adoc b/smlnj-lib/Doc/src/JSON/json-lib.adoc new file mode 100644 index 0000000..91bba54 --- /dev/null +++ b/smlnj-lib/Doc/src/JSON/json-lib.adoc @@ -0,0 +1,82 @@ += The JSON Library +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +== Overview +The *JSON Library* provides a representation of the +https://www.json.org/json-en.html[JavaScript Object Notation (*JSON*)] +with support for parsing, printing, and manipulating *JSON* +values as trees. There is also support for processing *JSON* data as +streams using a "SAX-style" API. + +== Contents + +=== Tree-based API + + xref:str-JSON.adoc[`[.kw]#structure# JSON`]:: + Defines the representation of *JSON* values as an *SML* datatype. + + xref:str-JSONParser.adoc[`[.kw]#structure# JSONParser`]:: + A parser for *JSON* input. + + xref:str-JSONPrinter.adoc[`[.kw]#structure# JSONPrinter`]:: + A printer for *JSON* output. + + xref:str-JSONUtil.adoc[`[.kw]#structure# JSONUtil`]:: + A collection of utility functions for working with *JSON* values. + + xref:str-JSONDecode.adoc[`[.kw]#structure# JSONDecode`]:: + A collection of combinators for decoding *JSON* values. + +=== Stream-based API + + xref:str-JSONStreamParser.adoc[`[.kw]#structure# JSONStreamParser`]:: + A stream (or event) based parser for JSON input. + + xref:sig-JSON_STREAM_OUTPUT.adoc[`[.kw]#signature# JSON_STREAM_OUTPUT`]:: + The interface to a stream-based printer for JSON output. + + xref:str-JSONBufferPrinter.adoc[`[.kw]#structure# JSONBufferPrinter`]:: + A stream-based printer for JSON output to character buffers. + + xref:str-JSONStreamPrinter.adoc[`[.kw]#structure# JSONStreamPrinter`]:: + A stream-based printer for JSON output to text files. + +== Usage + +For https://smlnj.org[*SML/NJ*], include `$/json-lib.cm` in your +*CM* file. + +For use in https://www.mlton.org/[*MLton*], include +`$(SML_LIB)/smlnj-lib/JSON/json-lib.mlb` in your *MLB* file. + +ifdef::backend-pdf[] + +// Push titles down one level. +:leveloffset: 1 + +include::str-JSON.adoc[] + +include::str-JSONParser.adoc[] + +include::str-JSONPrinter.adoc[] + +include::str-JSONUtil.adoc[] + +include::str-JSONDecode.adoc[] + +include::str-JSONStreamParser.adoc[] + +include::sig-JSON_STREAM_OUTPUT.adoc[] + +include::str-JSONBufferPrinter.adoc[] + +include::str-JSONStreamPrinter.adoc[] + +// Return to normal title levels. +:leveloffset: 0 + +endif::[] diff --git a/smlnj-lib/Doc/src/JSON/sig-JSON_STREAM_OUTPUT.adoc b/smlnj-lib/Doc/src/JSON/sig-JSON_STREAM_OUTPUT.adoc new file mode 100644 index 0000000..4b6616b --- /dev/null +++ b/smlnj-lib/Doc/src/JSON/sig-JSON_STREAM_OUTPUT.adoc @@ -0,0 +1,160 @@ += The `JSON_STREAM_OUTPUT` signature +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `JSON_STREAM_OUTPUT` signature defines an interface for +stream-output of *JSON* values. + +== Synopsis + +[source,sml] +------------ +signature JSON_STREAM_OUTPUT + +structure JSONBufferPrinter : JSON_STREAM_OUTPUT + where type outstream = CharBuffer.buf +structure JSONStreamPrinter : JSON_STREAM_OUTPUT + where type outstream = outstream +------------ + +== Interface + +[source,sml] +------------ +type outstream + +type printer + +val new : outstream -> printer +val new' : {strm : outstream, pretty : bool} -> printer + +val close : printer -> unit + +val null : printer -> unit +val boolean : printer * bool -> unit +val integer : printer * IntInf.int -> unit +val int : printer * int -> unit +val float : printer * real -> unit +val string : printer * string -> unit +val beginObject : printer -> unit +val objectKey : printer * string -> unit +val endObject : printer -> unit +val beginArray : printer -> unit +val endArray : printer -> unit + +val value : printer * JSON.value -> unit +------------ + +== Description + +`[.kw]#type# outstream`:: + The type of the text consumer that the printer is layered on top of. + +`[.kw]#type# printer`:: + The `printer` type tracks the current _state_ of the output so that it + can correctly add punctuation and white space (when pretty printing). + +`[.kw]#val# new : outstream \-> printer`:: + `new outS` creates a new printer from the output stream `outS`. + The printer produces a _condensed_ format without newlines or + indentation; use the ``new'`` function to create a pretty-printer + for *JSON* output. + +`[.kw]#val# new' : {strm : outstream, pretty : bool} \-> printer`:: + `new' {strm, pretty}` creates a new pretty-printing stream from the output + stream `strm`, where the value of the `pretty` field controls whether + the output is condensed (when `pretty` is `false`) or printed with + new lines and indentation to improve readability (when `pretty` is `true`). + +`[.kw]#val# close : printer \-> unit`:: + `close pr` closes the printer, but not the underlying output stream. + Closing the printer while there is an open object or array results in + the {sml-basis-url}/general.html#SIG:GENERAL.Fail:EXN[`Fail`] + exception being raised. Also, calling any of the below printing + functions on a closed printer will result in the + {sml-basis-url}/general.html#SIG:GENERAL.Fail:EXN[`Fail`] + exception being raised. + +`[.kw]#val# null : printer \-> unit`:: + `null pr` prints the *JSON* *null* value. Raises the + {sml-basis-url}/general.html#SIG:GENERAL.Fail:EXN[`Fail`] + exception if the printer is closed. + +`[.kw]#val# boolean : printer * bool \-> unit`:: + `boolean (pr, b)` prints the *JSON* boolean value `b`. Raises the + {sml-basis-url}/general.html#SIG:GENERAL.Fail:EXN[`Fail`] + exception if the printer is closed. + +`[.kw]#val# integer : printer * IntInf.int \-> unit`:: + `integer (pr, n)` prints the *JSON* number `n`. Raises the + {sml-basis-url}/general.html#SIG:GENERAL.Fail:EXN[`Fail`] + exception if the printer is closed. + +`[.kw]#val# int : printer * int \-> unit`:: + `int (pr, n)` prints the *JSON* number `n`. Raises the + {sml-basis-url}/general.html#SIG:GENERAL.Fail:EXN[`Fail`] + exception if the printer is closed. This function is a convenience + for when one wants to print a default-int-type value without + converting it to a `IntInf.int` first. + +`[.kw]#val# float : printer * real \-> unit`:: + `float (pr, r)` prints the *JSON* floating-point number `r`. + Raises the {sml-basis-url}/general.html#SIG:GENERAL.Fail:EXN[`Fail`] + exception if the printer is closed. + +`[.kw]#val# string : printer * string \-> unit`:: + `string (pr, s)` prints the *JSON* string `s`. Raises the + {sml-basis-url}/general.html#SIG:GENERAL.Fail:EXN[`Fail`] + exception if the printer is closed. + +`[.kw]#val# beginObject : printer \-> unit`:: + `beginArray pr` prints the opening "`{`" for a *JSON* object. + Note that each call to `beginObject` should be matched by a call + to `endObject`. Raises the + {sml-basis-url}/general.html#SIG:GENERAL.Fail:EXN[`Fail`] + exception if the printer is closed. + +`[.kw]#val# objectKey : printer * string \-> unit`:: + `objectKey (pr, key)` prints the *JSON* key-value `key` followed + by a "`:`". This function should be inside matched + `beginObject`/`endObject` calls and should be followed by + the printing of a *JSON* value. Raises the + {sml-basis-url}/general.html#SIG:GENERAL.Fail:EXN[`Fail`] + exception if the printer is closed. + +`[.kw]#val# endObject : printer \-> unit`:: + `endObject pr` prints the closing `}` for the currently open object. + The {sml-basis-url}/general.html#SIG:GENERAL.Fail:EXN[`Fail`] + exception is raised if the current context is not an open object, + if a key has been printed without an associated value, or + if the printer is closed. + +`[.kw]#val# beginArray : printer \-> unit`:: + `beginArray pr` prints the opening "`[`" for a *JSON* array. + Note that each call to `beginArray` should be matched by a call + to `endArray`. Raises the + {sml-basis-url}/general.html#SIG:GENERAL.Fail:EXN[`Fail`] + exception if the printer is closed. + +`[.kw]#val# endArray : printer \-> unit`:: + `endArray pr` prints the closing `]` for the currently open array. + The {sml-basis-url}/general.html#SIG:GENERAL.Fail:EXN[`Fail`] + exception is raised if the current context is not an open array + or if the printer is closed. + +`[.kw]#val# value : printer * JSON.value \-> unit`:: + `value (pr, v)` embeds the *JSON* value `v` in the output. It is + equivalent to recursively traversing the *JSON* value while calling + the appropriate output functions from above. + +== See Also + +xref:str-JSON.adoc[`JSON`], +xref:str-JSONBufferPrinter.adoc[`JSONBufferPrinter`], +xref:str-JSONPrinter.adoc[`JSONPrinter`], +xref:str-JSONStreamParser.adoc[`JSONStreamParser`], +xref:str-JSONStreamPrinter.adoc[`JSONStreamPrinter`], +xref:json-lib.adoc[__The JSON Library__] diff --git a/smlnj-lib/Doc/src/JSON/str-JSON.adoc b/smlnj-lib/Doc/src/JSON/str-JSON.adoc new file mode 100644 index 0000000..f45750d --- /dev/null +++ b/smlnj-lib/Doc/src/JSON/str-JSON.adoc @@ -0,0 +1,91 @@ += The `JSON` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +== Synopsis + +[source,sml] +------------ +structure JSON +------------ + +== Interface + +[source,sml] +------------ +datatype value + = OBJECT of (string * value) list + | ARRAY of value list + | NULL + | BOOL of bool + | INT of IntInf.int + | INTLIT of string + | FLOAT of real + | STRING of string +------------ + +== Description + +`[.kw]#datatype# value = ...`:: + [[type:value]] + This datatype represents *JSON* values as trees. The constructors + are ++ +-- + `OBJECT [.kw]#of# (string * value) list`:: + represents a *JSON* object value; _i.e._, a list of key-value pairs. + Note that the keys should be unique. + + `ARRAY [.kw]#of# value list`:: + represents a *JSON* array value. + + `NULL`:: + represents the *JSON* value `"null"`. + + `BOOL [.kw]#of# bool`:: + represents the *JSON* values `"true"` and `"false"`. + + `INT [.kw]#of# IntInf.int`:: + represents *JSON* integer numbers. + + `INTLIT [.kw]#of# string`:: + represents *JSON* integer numbers that have a large number of digits. + + `FLOAT [.kw]#of# real`:: + represents *JSON* floating-point numbers. + + `STRING [.kw]#of# string`:: + represents *JSON* strings, which are assumed to be UTF-8 encoded. +-- + +== Example + +The *JSON* value + +[source,json] +------------- +{ "a" : 23, + "b" : [ false, true ], + "c" : "hello world" +} +------------- + +has the following representation using the *value* datatype: + +[source,sml] +------------ +OBJECT[ + ("a", INT 23), + ("b", ARRAY[BOOL false, BOOL true]), + ("c", STRING "hello world") +] +------------ + + +== See Also + +xref:str-JSONUtil.adoc[`JSONUtil`], +xref:json-lib.adoc[__The JSON Library__] diff --git a/smlnj-lib/Doc/src/JSON/str-JSONBufferPrinter.adoc b/smlnj-lib/Doc/src/JSON/str-JSONBufferPrinter.adoc new file mode 100644 index 0000000..e087deb --- /dev/null +++ b/smlnj-lib/Doc/src/JSON/str-JSONBufferPrinter.adoc @@ -0,0 +1,26 @@ += The `JSONBufferPrinter` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `JSONBufferPrinter` structure provides an imperative +printer for producing *JSON* output to a +https://github.com/SMLFamily/BasisLibrary/wiki/2018-001-Addition-of-monomorphic-buffers[ +character buffer]. + +== Synopsis + +[source,sml] +------------ +structure JSONBufferPrinter : JSON_STREAM_OUTPUT + where type outstream = CharBuffer.buf +------------ + +== See Also + +xref:sig-JSON_STREAM_OUTPUT.adoc[`JSON_STREAM_OUTPUT`], +xref:str-JSONStreamPrinter.adoc[`JSONStreamPrinter`], +xref:str-JSONPrinter.adoc[`JSONPrinter`], +xref:json-lib.adoc[__The JSON Library__] diff --git a/smlnj-lib/Doc/src/JSON/str-JSONDecode.adoc b/smlnj-lib/Doc/src/JSON/str-JSONDecode.adoc new file mode 100644 index 0000000..754b28b --- /dev/null +++ b/smlnj-lib/Doc/src/JSON/str-JSONDecode.adoc @@ -0,0 +1,343 @@ += The `JSONDecode` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `JSONDecode` structure implements combinators for decoding *JSON* values. +The design is based on https://package.elm-lang.org/packages/elm/json/latest/Json-Decode[ +*Elm*'s `JSON.Decode` module]. + +== Synopsis + +[source,sml] +------------ +signature JSON_DECODE +structure JSONDecode : JSON_DECODE +------------ + +== Interface + +[source,sml] +------------ +exception Failure of string * JSON.value + +exception NotBool of JSON.value +exception NotInt of JSON.value +exception NotNumber of JSON.value +exception NotString of JSON.value + +exception NotObject of JSON.value +exception FieldNotFound of JSON.value * string + +exception NotArray of JSON.value +exception ArrayBounds of JSON.value * int + +val exnMessage : exn -> string + +type 'a decoder + +val decode : 'a decoder -> JSON.value -> 'a +val decodeString : 'a decoder -> string -> 'a +val decodeFile : 'a decoder -> string -> 'a + +val bool : bool decoder +val int : int decoder +val intInf : IntInf.int decoder +val number : Real64.real decoder +val string : string decoder +val null : 'a -> 'a decoder + +val raw : JSON.value decoder + +val nullable : 'a decoder -> 'a option decoder + +val try : 'a decoder -> 'a option decoder + +val seq : 'a decoder -> ('a -> 'b) decoder -> 'b decoder + +val field : string -> 'a decoder -> 'a decoder + +val reqField : string -> 'a decoder -> ('a -> 'b) decoder -> 'b decoder + +val optField : string -> 'a decoder -> ('a option -> 'b) decoder -> 'b decoder + +val dfltField : string -> 'a decoder -> 'a -> ('a -> 'b) decoder -> 'b decoder + +val array : 'a decoder -> 'a list decoder +val sub : int -> 'a decoder -> 'a decoder + +val at : JSONUtil.path -> 'a decoder -> 'a decoder + +val succeed : 'a -> 'a decoder + +val fail : string -> 'a decoder + +val andThen : ('a -> 'b decoder) -> 'a decoder -> 'b decoder + +val orElse : 'a decoder * 'a decoder -> 'a decoder + +val choose : 'a decoder list -> 'a decoder + +val map : ('a -> 'b) -> 'a decoder -> 'b decoder +val map2 : ('a * 'b -> 'res) + -> ('a decoder * 'b decoder) + -> 'res decoder +val map3 : ('a * 'b * 'c -> 'res) + -> ('a decoder * 'b decoder * 'c decoder) + -> 'res decoder +val map4 : ('a * 'b * 'c * 'd -> 'res) + -> ('a decoder * 'b decoder * 'c decoder * 'd decoder) + -> 'res decoder + +val tuple2 : ('a decoder * 'b decoder) -> ('a * 'b) decoder +val tuple3 : ('a decoder * 'b decoder * 'c decoder) -> ('a * 'b * 'c) decoder +val tuple4 : ('a decoder * 'b decoder * 'c decoder * 'd decoder) + -> ('a * 'b * 'c * 'd) decoder + +val delay : (unit -> 'a decoder) -> 'a decoder +------------ + +== Description + +`[.kw]#exception# Failure [.kw]#of# string * JSON.value`:: + [[exn:Failure]] + raised by the xref:val:fail[`fail`] decoder. + +`[.kw]#exception# NotNull [.kw]#of# JSON.value`:: + [[exn:NotNull]] + raised by the xref:val:null[`null`] decoder when the argument + is not the *JSON* `null` value. + +`[.kw]#exception# NotBool [.kw]#of# JSON.value`:: + [[exn:NotBool]] + raised by the xref:val:bool[`bool`] decode when the argument + is not a *JSON* boolean. + This exception is the same as + xref:str-JSONUtil#exn:NotBool[`JSONUtil.NotBool`]. + +`[.kw]#exception# NotInt [.kw]#of# JSON.value`:: + [[exn:NotInt]] + raised by the xref:val:int[`int`] and xref:val:intInf[`intInf`] + decoders when the argument is not a *JSON* integer number. + This exception is the same as + xref:str-JSONUtil#exn:NotInt[`JSONUtil.NotInt`]. + +`[.kw]#exception# NotNumber [.kw]#of# JSON.value`:: + [[exn:NotNumber]] + raised by the xref:val:number[`number`] decoder when the argument + is not a *JSON* number. + This exception is the same as + xref:str-JSONUtil#exn:NotNumber[`JSONUtil.NotNumber`]. + +`[.kw]#exception# NotString [.kw]#of# JSON.value`:: + [[exn:NotString]] + raised by the xref:val:string[`string`] decoder when the argument + is not a *JSON* string. + This exception is the same as + xref:str-JSONUtil#exn:NotString[`JSONUtil.NotString`]. + +`[.kw]#exception# NotObject [.kw]#of# JSON.value`:: + [[exn:NotObject]] + raised by the xref:val:field[`field`] decoder when the + argument is not a *JSON* object. + This exception is the same as + xref:str-JSONUtil#exn:NotObject[`JSONUtil.NotObject`]. + +`[.kw]#exception# FieldNotFound [.kw]#of# JSON.value * string`:: + [[exn:FieldNotFound]] + This exception is raised by the xref:val:field[`field`] decoder + when the given field is not found in an object. + This exception is the same as + xref:str-JSONUtil#exn:FieldNotFound[`JSONUtil.FieldNotFound`]. + +`[.kw]#exception# NotArray [.kw]#of# JSON.value`:: + [[exn:NotArray]] + This exception is raised by the xref:val:array[`array`] decoder + when the argument is not a *JSON* array. + This exception is the same as + xref:str-JSONUtil#exn:NotArray[`JSONUtil.NotArray`]. + +`[.kw]#exception# ArrayBounds [.kw]#of# JSON.value * int`:: + [[exn:ArrayBounds]] + This exception is raised when access to an array value is out of bounds. + This exception is the same as + xref:str-JSONUtil#exn:ArrayBounds[`JSONUtil.ArrayBounds`]. + +`[.kw]#val# exnMessage : exn \-> string`:: + `exnMessage exn` returns an error-message string for the exception value + `exn`. This function produces specialized messages for the exceptions defined + in the `JSONDecode` structure and falls back to the + {sml-basis-url}/general.html#SIG:GENERAL.exnMessage:VAL[General.exnMessage] + function for other exceptions. + +`[.kw]#type# 'a decoder'`:: + the type of a decoder that decodes a *JSON* value to a value of type ``'a``. + +`[.kw]#val# decode : 'a decoder \-> JSON.value \-> 'a`:: + `decode d jv` decodes the *JSON* value `jv` using the decoder `d`. + Failure to decode will be signaled by raising an exception that depends on the + decoder and value. + +`[.kw]#val# decodeString : 'a decoder \-> string \-> 'a`:: + `decode d s` decodes the *JSON* value that results from parsing the string `s`. + +`[.kw]#val# decodeFile : 'a decoder \-> string \-> 'a`:: + `decode d f` decodes the *JSON* value that results from parsing the file `f`. + +`[.kw]#val# bool : bool decoder`:: + decodes a *JSON* Boolean value. This decoder raises the + xref:exn:NotBool[`NotBool`] exception if the value is not a *JSON* Boolean. + +`[.kw]#val# int : int decoder`:: + decodes a *JSON* integer value. This decoder raises the + xref:exn:NotInt[`NotInt`] exception if the value is not a *JSON* integer + and the {sml-basis-url}/general.html#SIG:GENERAL.Overflow:EXN[`Overflow`] + exception if the integer is too large to be represented as an `Int.int`. + +`[.kw]#val# intInf : IntInf.int decoder`:: + decodes a *JSON* integer value. This decoder raises the + xref:exn:NotInt[`NotInt`] exception if the value is not a *JSON* integer. + +`[.kw]#val# number : Real64.real decoder`:: + decodes a *JSON* number value. This decoder raises the + xref:exn:NotNumber[`NotNumber`] exception if the value is not a *JSON* number. + +`[.kw]#val# string : string decoder`:: + decodes a *JSON* string value. This decoder raises the + xref:exn:NotString[`NotString`] exception if the value is not a *JSON* string. + +`[.kw]#val# null : 'a \-> 'a decoder`:: + `null v` returns a decoder for the *JSON* `null` value. When used to decode + a `null` value, it will return its argument `v`; otherwise it will raise the + xref:exn:NotNull[`NotNull`] exception. + +`[.kw]#val# raw : JSON.value decoder`:: + this decoder returns the raw *JSON* value that it is applied to (_i.e._, it + is the identity decoder). + +`[.kw]#val# nullable : 'a decoder \-> 'a option decoder`:: + `nullable d` returns a decoder that maps `null` to `NONE` and otherwise applies + `SOME` to the result of decoding the value using the decoder `d`. + +`[.kw]#val# try : 'a decoder \-> 'a option decoder`:: + `try d` returns a decoder that attempts to decode its argument using the decoder + `d`. If it fails, then `NONE` is returned. Otherwise, `SOME` is applied to + the result od decoding the value. + +`[.kw]#val# seq : 'a decoder -> ('a \-> 'b) decoder \-> 'b decoder`:: + `seq d k` sequences decoding operations in a continuation-passing style. + +`[.kw]#val# field : string \-> 'a decoder \-> 'a decoder`:: + `field lab d` returns a decoder that decodes the object field with the + label `lab` using the decoder `d`. It will raise the xref:exn:NotObject[`NotObject`] + exception when the argument is not a *JSON* object and the + xref:exn:FieldNotFound[`FieldNotFound`] exception when the given object does + not have a field with the specified label. + +`[.kw]#val# reqField : string \-> 'a decoder \-> ('a \-> 'b) decoder \-> 'b decoder`:: + `reqField lab d k` returns a decoder for a required object field that can be + sequenced in a continuation-passing style (it is equivalent to `seq (field lab d) k`). + It will raise the xref:exn:NotObject[`NotObject`] exception when the argument + is not a *JSON* object and the xref:exn:FieldNotFound[`FieldNotFound`] exception + when the given object does not have a field with the specified label. + +`[.kw]#val# optField : string \-> 'a decoder \-> ('a option \-> 'b) decoder \-> 'b decoder`:: + `optField lab d k` returns a decoder for an optional object field that can be + sequenced in a continuation-passing style. If the field is not present in the + object, then `NONE` is passed to `k`. + +`[.kw]#val# dfltField : string \-> 'a decoder \-> 'a \-> ('a \-> 'b) decoder \-> 'b decoder`:: + `dfltField lab d dflt k` returns a decoder for an optional object field that can be + sequenced in a continuation-passing style. If the field is not present in the + object, then `dflt` is passed to `k`. + +`[.kw]#val# array : 'a decoder \-> 'a list decoder`:: + `array d` returns a decoder that when applied to a *JSON* array, will decode + the elements of the array using the decoder `d` and return the result as a + list. It raises the xref:exn:NotArray[`NotArray`] exception if the + argument is not a *JSON* array. + +`[.kw]#val# sub : int \-> 'a decoder \-> 'a decoder`:: + `sub i d` returns a decoder that when given a *JSON* array, decodes the ``i``'th + element of the array using the decoder `d`. This decoder will raise the + xref:exn:NotArray[`NotArray`] exception if the argument is not a *JSON* array, + and the xref:exn:ArrayBounds[`ArrayBounds`] exception if the index is out of + bounds for the array. + +`[.kw]#val# at : JSONUtil.path \-> 'a decoder \-> 'a decoder`:: + `at path d` returns a decoder that uses the path to select a value + from its argument (see xref:str-JSONUtil.adoc#val:get[`JSONUtil.get`]) + and then decodes that value using the decoder `d`. + +`[.kw]#val# succeed : 'a \-> 'a decoder`:: + `succeed v` returns a decoder that always yields `v` for any argument. + +`[.kw]#val# fail : string \-> 'a decoder`:: + `fail msg` returns a decoder that raises `Failure(msg, jv)` for + any *JSON* input `jv`. + +`[.kw]#val# andThen : ('a \-> 'b decoder) \-> 'a decoder \-> 'b decoder`:: + `andThen f d` returns a decoder that first uses `d` to decode a value `v` + from its argument and then returns the result of applying `f` to `v`. + +`[.kw]#val# orElse : 'a decoder * 'a decoder \-> 'a decoder`:: + `orElse (d1, d2)` returns a decoder that tries to decode its argument using + the decoder d1` and, if that fails, tries to decode the argument using `d2`. + +`[.kw]#val# choose : 'a decoder list \-> 'a decoder`:: + `choose ds` returns a decoder that tries to decode its argument + using each of the decoders in the list `ds`, returning the first successful result. + If all of the decoders fail, the the xref:exn:Failure[`Failure`] exception is + raised. + The expression `choose [d1, ..., dn]` is equivalent to ++ +[source,sml] +------------ +orElse(d1, orElse(d2, ..., orElse(dn, fail "no choice") ... )) +------------ + +`[.kw]#val# map : ('a \-> 'b) \-> 'a decoder \-> 'b decoder`:: + `map f d` returns a decoder that applies the function `f` to the result + of decoding a *JSON* value using the decoder `d`. + +`[.kw]#val# map2 : ('a * 'b \-> 'res) \-> ... \-> 'res decoder`:: + +`[.kw]#val# map3 : ('a * 'b * 'c \-> 'res) \-> ... \-> 'res decoder`:: + +`[.kw]#val# map4 : ('a * 'b * 'c * 'd \-> 'res) \-> ... \-> 'res decoder`:: + +`[.kw]#val# tuple2 : ('a decoder * 'b decoder) \-> ('a * 'b) decoder`:: + `tuple2 (d1, d2)` is equivalent to `map2 Fn.id (d1, d2)`. + +`[.kw]#val# tuple3 : ('a decoder * 'b decoder * 'c decoder) \-> ('a * 'b * 'c) decoder`:: + `tuple3 (d1, d2, d3)` is equivalent to `map2 Fn.id (d1, d2, d3)`. + +`[.kw]#val# tuple4 : ('a decoder * 'b decoder * 'c decoder * 'd decoder) \-> ('a * 'b * 'c * 'd) decoder`:: + `tuple4 (d1, d2, d3, d4)` is equivalent to `map4 Fn.id (d1, d2, d3, d4)`. + +`[.kw]#val# delay : (unit \-> 'a decoder) \-> 'a decoder`:: + `delay f` returns a decoder that delays the application of `f` to produce the + decoder and can be used to define recursive decoders. + The expression `delay f` is equivalent to `andThen f (succeed ())`. + +== Discussion + +A number of these combinators work best when composed using a infix `pipe` operator. +For example: +[source,sml] +------------ +fun |> (x, f) = f x +infix |> + +val d = succeed (fn (n : string) => fn (a : int) => {name=n, age=a}) + |> reqField "name" string + |> reqField "age" int +------------ + +== See Also + +xref:str-JSON.adoc[`JSON`], +xref:str-JSON.adoc[`JSONUtil`], +xref:json-lib.adoc[__The JSON Library__] diff --git a/smlnj-lib/Doc/src/JSON/str-JSONParser.adoc b/smlnj-lib/Doc/src/JSON/str-JSONParser.adoc new file mode 100644 index 0000000..74c4859 --- /dev/null +++ b/smlnj-lib/Doc/src/JSON/str-JSONParser.adoc @@ -0,0 +1,89 @@ += The `JSONParser` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `JSONParser` structure implements a parser for the +xref:str-JSON.adoc#type:value[`JSON.value`] type. + +== Synopsis + +[source,sml] +------------ +structure JSONParser +------------ + +== Interface + +[source,sml] +------------ +type source + +val openStream : TextIO.instream -> source +val openFile : string -> source +val openString : string -> source + +val close : source -> unit + +val parse : source -> JSON.value + +val parseFile : string -> JSON.value +------------ + +== Description + +[[type:source]] +`[.kw]#type# source`:: + The abstract type of JSON input sources. Note that this type is the + same as xref:str-JSONStreamParser.adoc#type:source[`JSONStreamParser.source`]. + +[[val:openStream]] +`[.kw]#val# openStream : TextIO.instream -> source`:: + `openStream inS` returns a input source for the given input stream. + +[[val:openFile]] +`[.kw]#val# openFile : string -> source`:: + `openStream file` returns a input source for the given file. This function + opens an input stream for reading from the file, so one should make sure to + call xref:val:close[`close`] on the source once all of the *JSON* values + have been read from the file. + +`[.kw]#val# openString : string -> source`:: + `openStream s` returns a input source for the given string. + +[[val:close]] +`[.kw]#val# close : source -> unit`:: + `close src` closes the input source, which has the effect of marking the source + as _closed_. Furthermore, if `src` was created by a call to + xref:#val:openFile[`openFile`], then the underlying input stream that + was created for the file is closed. This function does *not* close the + input stream for sources created by xref:#val:openStream[`openStream`] + +[[val:parse]] +`[.kw]#val# parse : source \-> JSON.value`:: + `parse src` parses a *JSON* value from the input source `src`. + If `src` is closed or if there is a syntax error, then the + {sml-basis-url}/general.html#SIG:GENERAL.Fail:EXN[`Fail`] + exception is raised. + +`[.kw]#val# parseFile : string \-> JSON.value`:: + `parse f` parses a *JSON* value from the text file `f`. + If there is a syntax error, then the + {sml-basis-url}/general.html#SIG:GENERAL.Fail:EXN[`Fail`] + exception is raised. + This function can also raise the + {sml-basis-url}/io.html#SIG:IO.Io:EXN[`Io`] exception if + there is an error opening `f`. + Note that this function will only parse a single + *JSON* value from the file; to parse multiple values, one should used + the xref:val:parse[`parse`] function with a source created by + xref:val:openFile[`openFile`]. + +== See Also + +xref:str-JSON.adoc[`JSON`], +xref:str-JSONStreamParser.adoc[`JSONStreamParser`], +xref:str-JSONUtil.adoc[`JSONUtil`], +xref:json-lib.adoc[__The JSON Library__] diff --git a/smlnj-lib/Doc/src/JSON/str-JSONPrinter.adoc b/smlnj-lib/Doc/src/JSON/str-JSONPrinter.adoc new file mode 100644 index 0000000..db083ab --- /dev/null +++ b/smlnj-lib/Doc/src/JSON/str-JSONPrinter.adoc @@ -0,0 +1,42 @@ += The `JSONPrinter` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `JSONPrinter` structure implements a printer for the +xref:str-JSON.adoc#type:value[`JSON.value`] type. + +== Synopsis + +[source,sml] +------------ +structure JSONPrinter +------------ + +== Interface + +[source,sml] +------------ +val print : TextIO.outstream * JSON.value -> unit +val print' : {strm : TextIO.outstream, pretty : bool} -> JSON.value -> unit +------------ + +== Description + +`[.kw]#val# print : TextIO.outstream * JSON.value \-> unit`:: + `print (outS, jv)` prints the *JSON* value `jv` to the outstream `outS` + in _condensed_ format (_i.e._, without newlines or indentation). + +`[.kw]#val# print' : {strm : TextIO.outstream, pretty : bool} \-> JSON.value \-> unit`:: + `print' {outS, pretty} jv` prints the *JSON* value `jv` to the + outstream `outS`, where the value of the `pretty` field controls whether + the output is condensed (when `pretty` is `false`) or printed with + new lines and indentation to improve readability (when `pretty` is `true`). + +== See Also + +xref:str-JSON.adoc[`JSON`], +xref:sig-JSON_STREAM_OUTPUT.adoc[`JSON_STREAM_OUTPUT`], +xref:json-lib.adoc[__The JSON Library__] diff --git a/smlnj-lib/Doc/src/JSON/str-JSONStreamParser.adoc b/smlnj-lib/Doc/src/JSON/str-JSONStreamParser.adoc new file mode 100644 index 0000000..ab7cbf7 --- /dev/null +++ b/smlnj-lib/Doc/src/JSON/str-JSONStreamParser.adoc @@ -0,0 +1,221 @@ += The `JSONStreamParser` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `JSONStreamParser` structure provides an event-based +(or _stream_) parsing model for *JSON* files. It is suitable +for scanning large files for particular items without having +to first build an in-memory data structure. It can also +be useful to directly translate from *JSON* to a specific *SML* +datatype without having to go through the intermediate +xref:str-JSON.adoc#type:value[`JSON.value`] representation. + +== Synopsis + +[source,sml] +------------ +structure JSONStreamParser +------------ + +== Interface + +[source,sml] +------------ +type source + +type 'ctx callbacks = { + null : 'ctx -> 'ctx, + boolean : 'ctx * bool -> 'ctx, + integer : 'ctx * IntInf.int -> 'ctx, + float : 'ctx * real -> 'ctx, + string : 'ctx * string -> 'ctx, + startObject : 'ctx -> 'ctx, + objectKey : 'ctx * string -> 'ctx, + endObject : 'ctx -> 'ctx, + startArray : 'ctx -> 'ctx, + endArray : 'ctx -> 'ctx, + error : 'ctx * string -> unit + } + +val openStream : TextIO.instream -> source +val openFile : string -> source +val openString : string -> source + +val close : source -> unit + +val parse : 'ctx callbacks -> (source * 'ctx) -> 'ctx + +val parseFile : 'ctx callbacks -> (string * 'ctx) -> 'ctx +------------ + +== Description + +[[type:source]] +`[.kw]#type# source`:: + The abstract type of JSON input sources. Note that this type is the + same as xref:str-JSONStreamParser.adoc#type:source[`JSONParser.source`]. + +`[.kw]#type# 'ctx callbacks = { ... }`:: + This type is a record of the parsing-event call-back functions, where + the type parameter ``'cxt`` is instantiated to the context (or state) + needed to preserve information between events. The call-back functions + in this record type are invoked as follows: ++ +-- + `null : 'ctx \-> 'ctx`:: + called when the *JSON* *null* value is encountered. + + `boolean : 'ctx * bool \-> 'ctx`:: + called when the *JSON* *true* or *false* values are encountered. + + `integer : 'ctx * IntInf.int \-> 'ctx`:: + called when a *JSON* integral-number value encountered. + + `float : 'ctx * real \-> 'ctx`:: + called when a *JSON* floating-point-number value encountered. + + `string : 'ctx * string \-> 'ctx`:: + called when a *JSON* string value encountered. + + `startObject : 'ctx \-> 'ctx`:: + called at the beginning of a *JSON* object definition (_i.e._, + when a "`{`" is encountered). + + `objectKey : 'ctx * string \-> 'ctx`:: + called when a *JSON* object key is encountered (including the + "`:"). The next call-back will specify the value + associated with the key. + + `endObject : 'ctx \-> 'ctx`:: + called at the end of a *JSON* object definition (_i.e._, + when a "`}`" is encountered). + + `startArray : 'ctx \-> 'ctx`:: + called at the beginning of a *JSON* array definition (_i.e._, + when a "`[`" is encountered). + + `endArray : 'ctx \-> 'ctx`:: + called at the end of a *JSON* array definition (_i.e._, + when a "`]`" is encountered). + + `error : 'ctx * string \-> unit`:: + called when a syntax error is encountered in the input. The + second argument is an error message describing the error. + It is expected that this call-back does not return (_i.e._, it + either raises an exception or terminates the program). + If it does return, then the parser will raise the + {sml-basis-url}/general.html#SIG:GENERAL.Fail:EXN[`Fail`] + exception. +-- + +[[val:openStream]] +`[.kw]#val# openStream : TextIO.instream -> source`:: + `openStream inS` returns a input source for the given input stream. + +[[val:openFile]] +`[.kw]#val# openFile : string -> source`:: + `openStream file` returns a input source for the given file. This function + opens an input stream for reading from the file, so one should make sure to + call xref:val:close[`close`] on the source once all of the *JSON* values + have been read from the file. + +`[.kw]#val# openString : string -> source`:: + `openStream s` returns a input source for the given string. + +[[val:close]] +`[.kw]#val# close : source -> unit`:: + `close src` closes the input source, which has the effect of marking the source + as _closed_. Furthermore, if `src` was created by a call to + xref:#val:openFile[`openFile`], then the underlying input stream that + was created for the file is closed. This function does *not* close the + input stream for sources created by xref:#val:openStream[`openStream`] + +[[val:parse]] +`[.kw]#val# parse : 'ctx callbacks \-> (source * 'ctx) \-> 'ctx`:: + `parse cbs (src, cxt)` will parse the *JSON* input from the input source + `src`, using the record of call-back functions `cbs` and the initial + context `cxt`. + +`[.kw]#val# parseFile : 'ctx callbacks \-> (string * 'ctx) \-> 'ctx`:: + `parse cbs (f, cxt)` will parse the *JSON* input from the file + `f`, using the record of call-back functions `cbs` and the initial + context `cxt`. Note that this function will only parse a single + *JSON* value from the file; to parse multiple values, one should used + the xref:val:parse[`parse`] function with a source created by + xref:val:openFile[`openFile`]. + +== Exampless + +Consider the following *JSON* input: + +[source,json] +------------- +{ "a" : 23, + "b" : [ false, true ], + "c" : "hello world" +} +------------- + +Parsing this value has the same result as evaluating the following +function: + +[source,sml] +------------ +fun f cxt = let + val cxt = startObject cxt + val cxt = objectKey (cxt, "a") + val cxt = integer (cxt, 23) + val cxt = objectKey (cxt, "b") + val cxt = startArray cxt + val cxt = boolean (cxt, false) + val cxt = boolean (cxt, true) + val cxt = endArray cxt + val cxt = objectKey (cxt, "c") + val cxt = objectString (cxt, "hello world") + val cxt = endObject cxt + in + cxt + end +------------ + +The following function returns a list of all of the string-valued +fields labeled as `"name"` in the input file. + +[source,sml] +------------ +fun getNames file = let + fun objectKey ({names, ...}, "name") = {names = names, isName = true} + | objectKey (cxt, _) = cxt + fun string (cxt as {names, isName}, s) = if isName + then {names = s :: names, isName = false} + else cxt + fun default ({names, isName}, _) = {names = names, isName = false} + val cbs = { + null = Fn.id, + boolean = default, + integer = default, + float = default, + string = string, + startObject = Fn.id, + objectKey = objectKey, + endObject = Fn.id, + startArray = Fn.id, + endArray = Fn.id, + error = fn (_, msg) => raise Fail msg + } + val {names, ...} = + JSONStreamParser.parseFile cbs (file, {names = [], isName = false}) + in + List.rev names + end +------------ + + +== See Also + +xref:str-JSONParser.adoc[`JSONParser`], +xref:str-JSONStreamPrinter.adoc[`JSONStreamPrinter`], +xref:json-lib.adoc[__The JSON Library__] diff --git a/smlnj-lib/Doc/src/JSON/str-JSONStreamPrinter.adoc b/smlnj-lib/Doc/src/JSON/str-JSONStreamPrinter.adoc new file mode 100644 index 0000000..2fec24e --- /dev/null +++ b/smlnj-lib/Doc/src/JSON/str-JSONStreamPrinter.adoc @@ -0,0 +1,25 @@ += The `JSONStreamPrinter` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `JSONStreamPrinter` structure provides an imperative +printer for producing *JSON* output to a `TextIO` output +stream. + +== Synopsis + +[source,sml] +------------ +structure JSONStreamPrinter : JSON_STREAM_OUTPUT + where type outstream = outstream +------------ + +== See Also + +xref:sig-JSON_STREAM_OUTPUT.adoc[`JSON_STREAM_OUTPUT`], +xref:str-JSONBufferPrinter.adoc[`JSONBufferPrinter`], +xref:str-JSONPrinter.adoc[`JSONPrinter`], +xref:json-lib.adoc[__The JSON Library__] diff --git a/smlnj-lib/Doc/src/JSON/str-JSONUtil.adoc b/smlnj-lib/Doc/src/JSON/str-JSONUtil.adoc new file mode 100644 index 0000000..426a861 --- /dev/null +++ b/smlnj-lib/Doc/src/JSON/str-JSONUtil.adoc @@ -0,0 +1,225 @@ += The `JSONUtil` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `JSONUtil` module defines a collection of utility functions for +working with *JSON* values. These include operations for testing +if a value is of a given type and navigating the structure of a +*JSON* value. + +== Synopsis + +[source,sml] +------------ +structure JSONUtil +------------ + +== Interface + +[source,sml] +------------ +exception NotBool of JSON.value +exception NotInt of JSON.value +exception NotNumber of JSON.value +exception NotString of JSON.value + +exception NotObject of JSON.value + +exception FieldNotFound of JSON.value * string + +exception NotArray of JSON.value + +exception ArrayBounds of JSON.value * int + +exception ElemNotFound of JSON.value + +val exnMessage : exn -> string + +val asBool : JSON.value -> bool +val asInt : JSON.value -> Int.int +val asIntInf : JSON.value -> IntInf.int +val asNumber : JSON.value -> Real.real +val asString : JSON.value -> string + +val findField : JSON.value -> string -> JSON.value option +val lookupField : JSON.value -> string -> JSON.value +val hasField : string -> JSON.value -> bool +val testField : string -> (JSON.value -> bool) -> JSON.value -> bool + +val asArray : JSON.value -> JSON.value vector + +val arrayMap : (JSON.value -> 'a) -> JSON.value -> 'a list + +datatype edge + = SEL of string + | SUB of int + | FIND of JSON.value -> bool + +type path = edge list + +val get : JSON.value * path -> JSON.value + +val replace : JSON.value * path * JSON.value -> JSON.value + +val insert : JSON.value * path * string * JSON.value -> JSON.value + +val append : JSON.value * path * JSON.value list -> JSON.value +------------ + +== Description + +`[.kw]#exception# NotBool [.kw]#of# JSON.value`:: + [[exn:NotBool]] + raised by the xref:val:asBool[`asBool`] function when the argument + is not a *JSON* boolean. + +`[.kw]#exception# NotInt [.kw]#of# JSON.value`:: + [[exn:NotInt]] + raised by the xref:val:asInt[`asInt`] and xref:val:asIntInf[`asIntInf`] + functions when the argument is not a *JSON* integer number. + +`[.kw]#exception# NotNumber [.kw]#of# JSON.value`:: + [[exn:NotNumber]] + raised by the xref:val:asNumber[`asNumber`] function when the argument + is not a *JSON* number. + +`[.kw]#exception# NotString [.kw]#of# JSON.value`:: + [[exn:NotString]] + raised by the xref:val:asString[`asString`] function when the argument + is not a *JSON* string. + +`[.kw]#exception# NotObject [.kw]#of# JSON.value`:: + [[exn:NotObject]] + raised by the xref:val:findField[`findField`] and + xref:val:lookupField[`lookupField`] functions when the + argument is not a *JSON* object. + +`[.kw]#exception# FieldNotFound [.kw]#of# JSON.value * string`:: + [[exn:FieldNotFound]] + This exception is raised when the given field is not found in an object. + +`[.kw]#exception# NotArray [.kw]#of# JSON.value`:: + [[exn:NotArray]] + This exception is raised when trying to process a non-array value as an array. + +`[.kw]#exception# ArrayBounds [.kw]#of# JSON.value * int`:: + [[exn:ArrayBounds]] + This exception is raised when access to an array value is out of bounds. + +`[.kw]#exception# ElemNotFound [.kw]#of# JSON.value`:: + [[exn:ElemNotFound]] + This exception is raised when there is no element of an array that satisfies + the predicate of a `FIND` edge in a path. The argument will be the array + in question. + +`[.kw]#val# exnMessage : exn \-> string`:: + `exnMessage exn` returns an error-message string for the exception value + `exn`. This function produces specialized messages for the exceptions defined + in the `JSONUtil` structure and falls back to the + {sml-basis-url}/general.html#SIG:GENERAL.exnMessage:VAL[General.exnMessage] + function for other exceptions. + +`[.kw]#val# asBool : JSON.value \-> bool`:: + [[val:asBool]] + `asBool (JSON.BOOL b)` returns the value `b`. This function raises + the xref:exn:NotBool[`NotBool`] exception if the value is not a + *JSON* Boolean value. + +`[.kw]#val# asInt : JSON.value \-> int`:: + [[val:asInt]] + `asInt (JSON.INT n)` returns the value `n` converted to `int`. + This function raises the xref:exn:NotInt[`NotInt`] exception if the + value is not a *JSON* integer value. It may also raise the + {sml-basis-url}/general.html#SIG:GENERAL.Overflow:EXN[`Overflow`] + exception if `n` is too large for the default `int` type. + +`[.kw]#val# asIntInf : JSON.value \-> IntInf.int`:: + [[val:asIntInf]] + `asIntInf (JSON.INT n)` returns the value `n`. + This function raises the xref:exn:NotInt[`NotInt`] exception if the + value is not a *JSON* integer value. + +`[.kw]#val# asNumber : JSON.value \-> Real.real`:: + [[val:asNumber]] + `asNumber jv` converts the *JSON* number `jv` to an SML `real` value. + The `jv` argument can either have the form `JSON.INT n`, in which case + `n` is converted to the `real` type and returned, or `JSON.FLOAT f`, + in which case `f` is returned; otherwise, the + xref:exn:NotNumber[`NotNumber`] exception is raised. + +`[.kw]#val# asString : JSON.value \-> string`:: + [[val:asString]] + `asBool (JSON.STRING s)` returns the value `s`. This function raises + the xref:exn:NotString[`NotString`] exception if the value is not a + *JSON* string value. + +`[.kw]#val# findField : JSON.value \-> string \-> JSON.value option`:: + [[val:findField]] + `findField (JSON.OBJECT flds) key` returns `SOME jv` when the + list of fields `flds` contains `(key, jv)` and `NONE` otherwise. + If `findField` is called on a value that is not a *JSON* object, + then it raises the xref:exn:NotObject[`NotObject`] exception. + +`[.kw]#val# lookupField : JSON.value \-> string \-> JSON.value`:: + [[val:lookupField]] + `lookupField (JSON.OBJECT flds) key` returns `jv` when the + list of fields `flds` contains `(key, jv)` and raises the + xref:exn:FieldNotFound[`FieldNotFound`] exception otherwise. + If `lookupField` is called on a value that is not a *JSON* object, + then it raises the xref:exn:NotObject[`NotObject`] exception. + +`[.kw]#val# hasField : string \-> JSON.value \-> bool`:: + [[val:hasField]] + `hasField key v` returns `true` when the value `v` is a *JSON* object that + has a field with `key` as its label and `false` otherwise. + +`[.kw]#val# testField : string \-> (JSON.value \-> bool) \-> JSON.value \-> bool`:: + [[val:testField]] + `testField key pred v` returns the result of `pred jv` when + the value `v` is a *JSON* object that contains `(key, jv)`. + It returns `false` otherwise. + +`[.kw]#val# asArray : JSON.value \-> JSON.value vector`:: + [[val:asArray]] + `asArray jv` converts the *JSON* array value `jv` to an *SML* + vector value. It raises the xref:exn:NotArray[`NotArray`] exception + when `jv` is not a *JSON* array. + +`[.kw]#val# arrayMap : (JSON.value \-> 'a) \-> JSON.value \-> 'a list`:: + [[val:arrayMap]] + map a conversion function over a JSON array to produce a list; this function + raises the xref:exn:NotArray[`NotArray`] exception if the second argument + is not an array. + +`[.kw]#datatype# edge = ...`:: + specifies an edge of a path into a *JSON* value. + The constructors have the following meaning: ++ +-- + `SEL [.kw]#of# string`:: + `SEL key` specifies the value labeled by `key` in a *JSON* object. + `SUB [.kw]#of# int`:: + `SUB i` specifies the ``i``th element of a *JSON* array. + `FIND [.kw]#of# JSON.value \-> bool`:: + `FIND pred` specifies the first element of a *JSON* array that satisfies + the given predicate. +-- + +`[.kw]#type# path = edge list`:: + specifies a path into a *JSON* value. + +`[.kw]#val# get : JSON.value * path \-> JSON.value`:: + [[val:get]] + `get (jv, path)` returns the component of `jv` named by `path`. It raises + one of the xref:exn:NotObject[`NotObject`], xref:exn:NotArray[`NotArray`], + xref:exn:FieldNotFound[`FieldNotFound`], or xref:exn:ElemNotFound[`ElemNotFound`] + exceptions if there is an inconsistency between the path and the structure of `jv`. + +== See Also + +xref:str-JSON.adoc[`JSON`], +xref:str-JSON.adoc[`JSONDecode`], +xref:json-lib.adoc[__The JSON Library__] diff --git a/smlnj-lib/Doc/src/Makefile.in b/smlnj-lib/Doc/src/Makefile.in new file mode 100644 index 0000000..c8bb97d --- /dev/null +++ b/smlnj-lib/Doc/src/Makefile.in @@ -0,0 +1,91 @@ +# Makefile for SML/NJ Library documentation +# +# COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) +# All rights reserved. +# +# @configure_input@ +# + +SHELL = @SHELL@ +INSTALL = @INSTALL@ +@SET_MAKE@ + +# documentation generator tools +# +GEN_PAGES = @TOOL_BIN_DIR@/gen-pages +GEN_PAGES_FLAGS = -v +MAKE_INDEX = @TOOL_BIN_DIR@/make-index + +ifeq ($(WEB),yes) +GEN_PAGES_FLAGS += --base-url=https://smlnj.org/doc/smlnj-lib +endif + +# place to put documentation for doc target +# +HTML_DST_ROOT = @OUT_DIR@ +STYLES_DST = $(HTML_DST_ROOT)/styles +IMAGES_DST = $(HTML_DST_ROOT)/images + +# root path to place generated HTML files for the SML/NJ web site +# +WEB_HTMLDST_ROOT = @ROOT_DIR@/htdocs +WEB_STYLES_DST = $(WEB_HTML_DST_ROOT)/styles + +# we have one sub-directory per documented library +# +DOC_SUBDIRS = Controls HashCons INet JSON Reactive RegExp SExp Unix Util UUID XML + +HTML_SUBDIRS = $(addprefix $(HTML_DST_ROOT)/,$(DOC_SUBDIRS)) + +ADOC_FILES = index.adoc \ + $(wildcard */*.adoc) + +# install-offline includes the images directory so that things render +# properly offline +.PHONY: install-offline +install-offline: install + mkdir -p $(IMAGES_DST) + cp -p images/smlnj-logo.png $(IMAGES_DST) + +.PHONY: install +install: $(HTML_DST_ROOT) css html + cp -p index.html $(HTML_DST_ROOT) + cp -p styles/smlnj-lib-base.css $(STYLES_DST) + cp -p styles/smlnj-lib.css $(STYLES_DST) + cp -p styles/smlnj-lib-pygments.css $(STYLES_DST) + for dir in $(DOC_SUBDIRS) ; do \ + cp -p $$dir/*html $(HTML_DST_ROOT)/$$dir ; \ + done + +.PHONY: css +css: + scripts/gen-css.sh styles/smlnj-lib-base_css.in > styles/smlnj-lib-base.css + scripts/gen-css.sh styles/smlnj-lib_css.in > styles/smlnj-lib.css + scripts/gen-css.sh styles/smlnj-lib-pygments_css.in > styles/smlnj-lib-pygments.css + +.PHONY: html +html: index.html + +index.html: index.json + $(GEN_PAGES) $(GEN_PAGES_FLAGS) + +index.json: $(ADOC_FILES) + $(MAKE_INDEX) index.json + +$(HTML_DST_ROOT): + mkdir -p $(HTML_DST_ROOT) + mkdir -p $(STYLES_DST) + for dir in $(DOC_SUBDIRS) ; do \ + mkdir -p $(HTML_DST_ROOT)/$$dir ; \ + done + + +#################### Cleanup #################### + +CLEAN_FILES = index.html */*html index.json \ + styles/smlnj-lib-base.css \ + styles/smlnj-lib.css \ + styles/smlnj-lib-pygments.css \ + tools/gen-pages/config.sml + +include @MK_DIR@/clean-rules.gmk diff --git a/smlnj-lib/Doc/src/PP/MODULES b/smlnj-lib/Doc/src/PP/MODULES new file mode 100644 index 0000000..ffab15d --- /dev/null +++ b/smlnj-lib/Doc/src/PP/MODULES @@ -0,0 +1,15 @@ +signature PP_DESC +signature PP_DEVICE +signature PP_TOKEN +signature PP_STREAM +functor PPStreamFn +functor PPDescFn +functor PPDebugFn +structure TextIOPP +structure TextPP +structure SimpleTextIODev +structure CharBufferPP +structure ANSITermDev +structure ANSITermPP +structure HTMLDev +structure StringToken diff --git a/smlnj-lib/Doc/src/PP/tutorial.adoc b/smlnj-lib/Doc/src/PP/tutorial.adoc new file mode 100644 index 0000000..5e20026 --- /dev/null +++ b/smlnj-lib/Doc/src/PP/tutorial.adoc @@ -0,0 +1,174 @@ += Using the Pretty-Printing Library +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +== Introduction + +The pretty printing library is quite flexible, but requires some care in its use to +get the desired results. +This tutorial illustrates how the various mechanisms work and provides some examples +of how to implement common layouts. + +=== Background + +The pretty printer library was initially a port of the Ocaml `Format` module, which, in +turn is an https://doi.org/10.1145/357114.357115[_Oppen-style_] implementation of the +https://www-sop.inria.rr/croap/centaur/tutorial/tutorial.ps[_PPML] pretty-printing +model. + +Pretty printing is achived by issuing a sequence of printing commands, which the +library then uses to determine the layout. + + +=== Basics + +There are three key concepts that determine how text will be layed out: + +-- +* **boxes** are nested containers that determine the layout mode for their contents. + +* **breaks** are hints to the layout engine about where to break lines of text. + +* **indentation** controls the initial position of a line of text with respect to + the previous line. +-- + + +== Text + +// strings, tokens, nbSpace, breaks + +== Boxes + +Boxes provide a hiearchical structure that allows one to control the layout of +text. There are four main types of boxes: + +-- +* **Horizontal boxes (h) **, which pack text horizontally on a single line. + +* **Vertical boxes (v)**, which pack text vertically. + +* **Horizontal or vertical boxes (hov)**, which behave like h-boxes unless there + is insufficient space, in which case they behave like v-boxes. + +* **Horizontal then vertical boxes (hv)**, which pack text horizontally until there is + no more room, at which point a new line is started. These could also be called + _paragraph boxes_, since the layout the text in paragraph-like blocks. +-- + +In addition, there is the structural box, which is similar to hv-boxes, but with some +subtle differences that are described below. + +To add a box to the pretty printing stream, we first call the appropriate open +function (_e.g._, `openHBox`), then emit the contents of the box, and then call the `closeBox` +function. +For every open-box function call, there must be a matching call to `closeBox`. + +== Example -- Layout of C Code + +In this section, we illustrate a couple of different approaches +to formatting a simple C-like syntax. +We use the following datatypes to represent simple C statments and +functions, where we are using strings to represent expressions, _etc_. + +[source,sml] +------------ +datatype stm + = ASGN of string * string + | IF of string * stm * stm + | WHILE of string * stm list + | BLK of stm list + +datatype func + = FUNC of string * stm list +------------ + +=== Using a K&R Style Layout + +[source,sml] +------------ +fun knr ppS (FUNC(proto, body)) = let + val indent = PP.Abs 4 + fun sp () = PP.space ppS 1 + fun string s = PP.string ppS s + fun ppStm (ASGN(lhs, rhs)) = ( + PP.openHBox ppS; + string lhs; sp(); string "="; sp(); string rhs; + PP.closeBox ppS) + | ppStm (IF(cond, thenStm, elseStm)) = ( + PP.openVBox ppS (PP.Abs 0); + PP.openHBox ppS; + string "if"; sp(); string cond; + PP.closeBox ppS; + ppBlk thenStm; + PP.cut ppS; + PP.openHBox ppS; + string "else"; sp(); + PP.closeBox ppS; + ppBlk elseStm; + PP.closeBox ppS) + | ppStm (stm as BLK _) = ppBlk stm + | ppStm (WHILE(cond, body)) = ( + PP.openVBox ppS (PP.Abs 0); + PP.openHBox ppS; + string "while"; sp(); string cond; sp(); + PP.closeBox ppS; + ppBlk body; + PP.closeBox ppS) + and ppBlk (BLK stms) = ( + PP.openVBox ppS (PP.Abs 0); + string "{"; + PP.openVBox ppS indent; + List.app (fn stm => (PP.cut ppS; ppStm stm)) stms; + PP.closeBox ppS; + PP.cut ppS; + string "}"; + PP.closeBox ppS) + | ppBlk stm = ( + PP.break ppS {nsp=0, offset=inAmt}; + ppStm stm) + in + PP.openVBox ppS (PP.Abs 0); + PP.openVBox ppS indent; + PP.openHBox ppS; + string proto; + sp (); + string "{"; + PP.closeBox ppS; + List.app (fn stm => (PP.cut ppS; ppStm stm)) body; + PP.closeBox ppS; + PP.cut ppS; + string "}"; + PP.cut ppS; + PP.closeBox ppS + end +------------ + +=== Some Variations + +[source,c] +---------- +if ( ... ) { + ... +} else { + ... +} +---------- + +[source,sml] +------------ +| ppStm (IF(cond, thenBlk as BLK _, elseBlk as BLK _)) = ( + PP.openVBox ppS (PP.Abs 0); + PP.openHBox ppS; + string "if"; sp(); string cond; + PP.closeBox ppS; + ppBlk thenStm; + PP.openHBox ppS; + string "else"; sp(); + PP.closeBox ppS; + ppBlk elseStm; + PP.closeBox ppS) +------------ diff --git a/smlnj-lib/Doc/src/Reactive/MODULES b/smlnj-lib/Doc/src/Reactive/MODULES new file mode 100644 index 0000000..241dc82 --- /dev/null +++ b/smlnj-lib/Doc/src/Reactive/MODULES @@ -0,0 +1 @@ +structure Reactive diff --git a/smlnj-lib/Doc/src/Reactive/reactive-lib.adoc b/smlnj-lib/Doc/src/Reactive/reactive-lib.adoc new file mode 100644 index 0000000..621ee18 --- /dev/null +++ b/smlnj-lib/Doc/src/Reactive/reactive-lib.adoc @@ -0,0 +1,41 @@ += The Reactive Library +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +== Overview + +The *Reactive Library* is an implementation of a low-level reactive +engine (or toolkit). This implementation is a port of the Java +http://www-sop.inria.fr/meije/rc/SugarCubes/index.html[*SugarCubes*] +Library. The main difference (aside from the implementation language) +is that we support preemption of actions (as in Berry's +https://doi.org/10.1145/158511.158526[_Communicating Reactive Processes_] +model). + +== Contents + +xref:str-Reactive.adoc[`[.kw]#structure# Reactive`]:: + something + +== Usage + +For https://smlnj.org[*SML/NJ*], include `$/reactive-lib.cm` in your +*CM* file. + +For use in https://www.mlton.org/[*MLton*], include +`$(SML_LIB)/smlnj-lib/Reactive/reactive-lib.mlb` in your *MLB* file. + +ifdef::backend-pdf[] + +// Push titles down one level. +:leveloffset: 1 + +include::str-Reactive.adoc[] + +// Return to normal title levels. +:leveloffset: 0 + +endif::[] diff --git a/smlnj-lib/Doc/src/Reactive/str-Reactive.adoc b/smlnj-lib/Doc/src/Reactive/str-Reactive.adoc new file mode 100644 index 0000000..19a38c9 --- /dev/null +++ b/smlnj-lib/Doc/src/Reactive/str-Reactive.adoc @@ -0,0 +1,246 @@ += The `Reactive` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `Reactive` structure provides types and operations to build and +run reactive systems. The inputs and outputs of a reactive system +are sets of _signals_, which can either be _present_ (_i.e._, `true`) +or _absent_ (_i.e._, `false`). A reactive system runs in discrete +timesteps. + + +== Synopsis + +[source,sml] +------------ +signature REACTIVE +structure Reactive : REACTIVE +------------ + +== Interface + +[source,sml] +------------ +type machine +type instruction +type signal = Atom.atom +type config +type in_signal +type out_signal + +val machine : { + inputs : signal list, + outputs : signal list, + body : instruction + } -> machine + +val run : machine -> bool +val reset : machine -> unit + +val inputsOf : machine -> in_signal list +val outputsOf : machine -> out_signal list + +val inputSignal : in_signal -> signal +val outputSignal : out_signal -> signal + +val setInSignal : (in_signal * bool) -> unit +val getInSignal : in_signal -> bool +val getOutSignal : out_signal -> bool + +val || : (instruction * instruction) -> instruction +val & : (instruction * instruction) -> instruction + +val nothing : instruction +val stop : instruction +val suspend : instruction + +val action : (machine -> unit) -> instruction +val exec : (machine -> {stop : unit -> unit, done : unit -> bool}) + -> instruction + +val ifThenElse : ((machine -> bool) * instruction * instruction) -> instruction +val repeat : (int * instruction) -> instruction +val loop : instruction -> instruction +val close : instruction -> instruction + +val signal : (signal * instruction) -> instruction +val rebind : (signal * signal * instruction) -> instruction +val when : (config * instruction * instruction) -> instruction +val trap : (config * instruction) -> instruction +val trapWith : (config * instruction * instruction) -> instruction +val emit : signal -> instruction +val await : config -> instruction + +val posConfig : signal -> config +val negConfig : signal -> config +val orConfig : (config * config) -> config +val andConfig : (config * config) -> config +------------ + +== Description + +The description of the interface is organized into sections. + +=== Types + +`[.kw]#type# machine`:: + The type of a reactive system. + +`[.kw]#type# instruction`:: + The abstract representation of a reactive program. + +`[.kw]#type# signal = xref:../Util/str-Atom.adoc#type:atom[Atom.atom]`:: + The name of a signal. + +`[.kw]#type# config`:: + A signal configuration is a logical combination of signals. + +`[.kw]#type# in_signal`:: + An input signal for a reactive system. + +`[.kw]#type# out_signal`:: + An output signal for a reactive system. + +=== Machines + +`[.kw]#val# machine : { ... } \-> machine`:: + `machine {inputs, outputs, body}` creates a new reactive system (or machine) + from a list of input signal names, a list of output signal names, and a reactive + program. + +`[.kw]#val# run : machine \-> bool`:: + `run m` will run the reactive system `m` one instant (or activation). + It returns `true` if, and only if, the machine ends in a terminal state + (_i.e., by executing the xref:val:stop[`stop`] instruction). + +`[.kw]#val# reset : machine \-> unit`:: + `reset m` resets the state of `m` to its initial state. + +`[.kw]#val# inputsOf : machine \-> in_signal list`:: + `inputsOf m` returns a list of the input signals in the machine. + +`[.kw]#val# outputsOf : machine \-> out_signal list`:: + `outputsOf m` returns a list of the output signals in the machine. + +[[val:nameOfInput]] +`[.kw]#val# nameOfInput : in_signal \-> signal`:: + `inputSignal inSig` returns the name of the input signal. + +[[val:nameOfOutput]] +`[.kw]#val# nameOfOutput : out_signal \-> signal`:: + `inputSignal outSig` returns the name of the output signal. + +`[.kw]#val# setInSignal : (in_signal * bool) \-> unit`:: + `setInSignal (inSig, b)` sets the value of the input signal to `b`. + +`[.kw]#val# getInSignal : in_signal \-> bool`:: + `getInSignal inSig` gets the current value of the input signal. + +`[.kw]#val# getOutSignal : out_signal \-> bool`:: + `getOutSignal inSig` gets the current value of the output signal. + +=== Instructions + +`[.kw]#val# || : (instruction * instruction) \-> instruction`:: + `|| (i1, i2)` forms the parallel composition of the two programs. + Activation of the resulting program will interleave the two + programs until either one of them suspends (see the + xref:val:suspend[`suspend`] instruction) or both programs terminate. + +`[.kw]#val# & : (instruction * instruction) \-> instruction`:: + `& (i1, i2)` forms the sequential composition of the two programs. + +`[.kw]#val# nothing : instruction`:: + The program that does nothing. + +[[val:stop]] +`[.kw]#val# stop : instruction`:: + The program that stops; _i.e._, reaches the terminal state for + the current and all future activations. + +[[val:suspend]] +`[.kw]#val# suspend : instruction`:: + The program that suspends the current activation. + +`[.kw]#val# action : (machine \-> unit) \-> instruction`:: + *something* + +`[.kw]#val# exec : (machine \-> {stop : unit \-> unit, done : unit \-> bool}) \-> instruction`:: + `exec f` returns a program that encapsulates the *SML* computation defined by + the function `f`. + +`[.kw]#val# ifThenElse : ((machine \-> bool) * instruction * instruction) \-> instruction`:: + *something* + +`[.kw]#val# repeat : (int * instruction) \-> instruction`:: + *something* + +`[.kw]#val# loop : instruction \-> instruction`:: + *something* + +`[.kw]#val# close : instruction \-> instruction`:: + *something* + +`[.kw]#val# signal : (signal * instruction) \-> instruction`:: + *something* + +`[.kw]#val# rebind : (signal * signal * instruction) \-> instruction`:: + *something* + +`[.kw]#val# when : (config * instruction * instruction) \-> instruction`:: + *something* + +`[.kw]#val# trapWith : (config * instruction * instruction) \-> instruction`:: + `trapWith (cfg, i1, i2)` returns the program that ... + +`[.kw]#val# trap : (config * instruction) \-> instruction`:: + `trap (cfg, i)` + This expression is equivalent to ++ +[source,sml] +------------ +trapWith (cfg, i, nothing) +------------ + +`[.kw]#val# emit : signal \-> instruction`:: + `emit sigId` returns the program that emits the signal with the given + name (_i.e._, the signal is _present_). + +`[.kw]#val# await : config \-> instruction`:: + `await cfg` returns the program that waits for the configuration to hold. + +=== Signal configurations + +`[.kw]#val# posConfig : signal \-> config`:: + `posConfig sigId` defines a configuration that holds if, and only if, + the signal named `sigId` is present. + +`[.kw]#val# negConfig : signal \-> config`:: + `negConfig sigId` defines a configuration that holds if, and only if, + the signal named `sigId` is *not* present. + +`[.kw]#val# orConfig : (config * config) \-> config`:: + `orConfig (cfg1, cfg2)` defines a configuration that holds if either `cfg1` + or `cfg2` (inclusive) holds. + +`[.kw]#val# andConfig : (config * config) \-> config`:: + `andConfig (cfg1, cfg2)` defines a configuration that holds if both `cfg1` + and `cfg2` hold. + +=== Deprecated functions + +The following functions are part of the interface, but have been +deprecated. + +`[.kw]#val# inputSignal : in_signal -> signal`:: + use xref:#val:nameOfInput[`nameOfInput`] instead. + +`[.kw]#val# outputSignal : out_signal -> signal`:: + use xref:#val:nameOfOutput[`nameOfOutput`] instead. + +== See Also + +xref:reactive-lib.adoc[__The Reactive Library__] diff --git a/smlnj-lib/Doc/src/RegExp/MODULES b/smlnj-lib/Doc/src/RegExp/MODULES new file mode 100644 index 0000000..ef57d3c --- /dev/null +++ b/smlnj-lib/Doc/src/RegExp/MODULES @@ -0,0 +1,9 @@ +signature REGEXP_PARSER +signature REGEXP_ENGINE +structure RegExpSyntax +structure MatchTree +structure AwkSyntax +structure BackTrackEngine +structure DfaEngine +structure ThompsonEngine +functor RegExpFn diff --git a/smlnj-lib/Doc/src/RegExp/fun-RegExpFn.adoc b/smlnj-lib/Doc/src/RegExp/fun-RegExpFn.adoc new file mode 100644 index 0000000..46fd508 --- /dev/null +++ b/smlnj-lib/Doc/src/RegExp/fun-RegExpFn.adoc @@ -0,0 +1,133 @@ += The `RegExpFn` functor +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `RegExpFn` functor glues together a front-end regular-expression +parser with a back-end regular-expression engine. + +== Synopsis + +[source,sml] +------------ +signature REGEXP +functor RegExpFn ( + structure P : REGEXP_PARSER + structure E : REGEXP_ENGINE + ) :> REGEXP where type regexp = E.regexp +------------ + +== Functor Argument Interface + +[source,sml] +------------ +structure P : REGEXP_PARSER +structure E : REGEXP_ENGINE +------------ + +== Functor Argument Description + +`[.kw]#structure# P : xref:sig-REGEXP_PARSER.adoc[REGEXP_PARSER]`:: + The front-end parser for the regular-expression syntax. + +`[.kw]#structure# E : xref:sig-REGEXP_ENGINE.adoc[REGEXP_ENGINE]`:: + The back-end engine. + +== Interface + +[source,sml] +------------ +type regexp + +type 'a match = {pos : 'a, len : int} MatchTree.match_tree + +exception CannotParse + +val compile : (char,'a) StringCvt.reader -> (regexp, 'a) StringCvt.reader +val compileString : string -> regexp + +val find : regexp -> (char,'a) StringCvt.reader -> ('a match, 'a) StringCvt.reader + +val prefix : regexp -> (char,'a) StringCvt.reader -> ('a match, 'a) StringCvt.reader + +val match : (string * ('a match -> 'b)) list + -> (char,'a) StringCvt.reader -> ('b, 'a) StringCvt.reader +------------ + +== Interface Description + +`[.kw]#type# regexp`:: + The type of a compiled regular expression. + + (* a match specifies the position (as a stream) and the length of the match *) +`[.kw]#type# 'a match = {pos : 'a, len : int} MatchTree.match_tree`:: + A xref:str-MatchTree.adoc#type:match_tree[match tree] specifying the starting + position and size of matches. For a general + {sml-basis-url}/string-cvt.html#SIG:STRING_CVT.reader:TY[character reader] `getc`, + we can extract the string for a match using the following function: ++ +[source,sml] +------------ +fun getMatchString {pos, len} = let + fun get (_, 0, chrs) = String.implodeRev chrs + | get (strm, n, chrs) = let + val SOME(c, rest) = getc strm + in + get (rest, n-1, c::chrs) + end + in + get (pos, len, []) + end +------------ ++ +More direct means are possible for specific input sources (_e.g._, strings, +substrings, or text input). + +[[exn:CannotParse]] +`[.kw]#exception# CannotParse`:: + This exception is raised by the functions xref:#val:compileString[`compileString`] + xref:#val:match[`match`] when the front-end encounters a syntax error. + +`[.kw]#val# compile : (char,'a) StringCvt.reader \-> (regexp, 'a) StringCvt.reader`:: + `compile getc strm` parses and compiles a regular expression from the input + stream `strm` using the + {sml-basis-url}/string-cvt.html#SIG:STRING_CVT.reader:TY[character reader] `getc`. + If successful, it returns `SOME(re, strm')`, where `re` is the compiled + regular expression and ``strm'`` is the residual input stream. It returns + `NONE` if there is a syntax error in the input. If the source regular + expression contains features that are not supported by the back-end engine, + then the xref:str-RegExpSyntax.adoc#exn:CannotCompile[`CannotCompile`] + exception is raised. + +[[val:compileString]] +`[.kw]#val# compileString : string \-> regexp`:: + `compileString s` returns the compiled regular expression defined + by the string `s`. The xref:#exn:CannotParse[`CannotParse`] exception + is raised if there was a syntax error when parsing `s` and the + xref:str-RegExpSyntax.adoc#exn:CannotCompile[`CannotCompile`] exception + is raised if the source regular expression contains features that are + not supported by the back-end engine. + +`[.kw]#val# find : regexp \-> (char,'a) StringCvt.reader \-> ('a match, 'a) StringCvt.reader`:: + `find re getc strm` returns `SOME mt` where `mt` describes the *first* + match of `re` in the input stream; otherwise it returns `NONE` if there + is no match. + +`[.kw]#val# prefix : regexp \-> (char,'a) StringCvt.reader \-> ('a match, 'a) StringCvt.reader`:: + `prefix re getc strm` returns `SOME mt` where `mt` describes the + matching of `re` at the *beginning* of the input stream; otherwise it + returns `NONE` if `re` does not match a prefix of the input. + +[[val:match]] +`[.kw]#val# match : (string * ('a match \-> 'b)) list \-> (char,'a) StringCvt.reader \-> ('b, 'a) StringCvt.reader`:: + `match rules getc strm` attempts to match one of the `rules` starting at the + current stream position. Each rule is a pair of a regular expression and + an action. The rules are tested in order; if a rule `(re, act)` matches with + the result `mt`, then the result of `match` will be `SOME(act mt)`. + If no rule matches, then `NONE` is the result. + +== See Also + +xref:regexp-lib.adoc[__The RegExp Library__] diff --git a/smlnj-lib/Doc/src/RegExp/regexp-lib.adoc b/smlnj-lib/Doc/src/RegExp/regexp-lib.adoc new file mode 100644 index 0000000..4be02e0 --- /dev/null +++ b/smlnj-lib/Doc/src/RegExp/regexp-lib.adoc @@ -0,0 +1,79 @@ += The RegExp Library +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +== Overview + +The *RegExp Library* ... + +== Contents + +xref:tutorial.adoc[Using the *RegExp Library*]:: + A tutorial guide to using this library. + +xref:sig-REGEXP_PARSER.adoc[`[.kw]#signature# REGEXP_PARSER`]:: + Defines the interface to a parser for a concrete + regular-expression syntax. + +xref:sig-REGEXP_ENGINE.adoc[`[.kw]#signature# REGEXP_ENGINE`]:: + Defines the interface to a regular-expression search engine. + +xref:str-RegExpSyntax.adoc[`[.kw]#structure# RegExpSyntax`]:: + Defines the abstract syntax of regular expressions. + +xref:str-MatchTree.adoc[`[.kw]#structure# MatchTree`]:: + Provides a tree-structured representation + of the result of a successful regular expression match. + +xref:str-AwkSyntax.adoc[`[.kw]#structure# AwkSyntax`]:: + A parser for the regular-expression syntax defined by the AWK language. + +xref:sig-REGEXP_ENGINE.adoc#str:BackTrackEngine[`[.kw]#structure# BackTrackEngine`]:: + A regular-expression search engine implemented as a back-tracking + interpreter. + +xref:sig-REGEXP_ENGINE.adoc#str:DfaEngine[`[.kw]#structure# DfaEngine`]:: + A regular-expression search engine implemented as a deterministic + finite automata. + +xref:sig-REGEXP_ENGINE.adoc#str:ThompsonEngine[`[.kw]#structure# ThompsonEngine`]:: + A regular-expression search engine based on Ken Thompson's design. + +xref:fun-RegExpFn.adoc[`[.kw]#functor# RegExpFn`]:: + Provides a functor for combining a regular-expression parser with an engine to + form a complete implementation of an regular-expression search module. + +== Usage + +For https://smlnj.org[*SML/NJ*], include `$/regexp-lib.cm` in your +*CM* file. + +For use in https://www.mlton.org/[*MLton*], include +`$(SML_LIB)/smlnj-lib/RegExp/regexp-lib.mlb` in your *MLB* file. + +ifdef::backend-pdf[] + +// Push titles down one level. +:leveloffset: 1 + +include::tutorial.adoc[] + +include::sig-REGEXP_PARSER.adoc[] + +include::sig-REGEXP_ENGINE.adoc[] + +include::str-RegExpSyntax.adoc[] + +include::str-MatchTree.adoc[] + +include::str-AwkSyntax.adoc[] + +include::fun-RegExpFn.adoc[] + +// Return to normal title levels. +:leveloffset: 0 + +endif::[] diff --git a/smlnj-lib/Doc/src/RegExp/sig-REGEXP_ENGINE.adoc b/smlnj-lib/Doc/src/RegExp/sig-REGEXP_ENGINE.adoc new file mode 100644 index 0000000..219552b --- /dev/null +++ b/smlnj-lib/Doc/src/RegExp/sig-REGEXP_ENGINE.adoc @@ -0,0 +1,101 @@ += The `REGEXP_ENGINE` signature +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `REGEXP_ENGINE` signature ... + +== Synopsis + +[source,sml] +------------ +signature REGEXP_ENGINE + +structure BackTrackEngine : REGEXP_ENGINE +structure DfaEngine : REGEXP_ENGINE +structure ThompsonEngine : REGEXP_ENGINE +------------ + +== Interface + +[source,sml] +------------ +type regexp + +type 'a match = {pos : 'a, len : int} MatchTree.match_tree + +val compile : RegExpSyntax.syntax -> regexp + +val find : regexp -> (char,'a) StringCvt.reader -> ('a match, 'a) StringCvt.reader + +val prefix : regexp ->(char,'a) StringCvt.reader -> ('a match, 'a) StringCvt.reader + +val match : (RegExpSyntax.syntax * ('a match -> 'b)) list + -> (char,'a) StringCvt.reader -> ('b, 'a) StringCvt.reader +------------ + +== Description + +`[.kw]#type# regexp`:: + *something* + (* the type of a compiled regular expression + *) + + (* a match specifies the position (as a stream) and the length of the match *) +`[.kw]#type# 'a match = {pos : 'a, len : int} MatchTree.match_tree`:: + *something* + +`[.kw]#val# compile : RegExpSyntax.syntax \-> regexp`:: + *something* + (* compile a regular expression from the abstract syntax + *) + +`[.kw]#val# find : regexp \-> (char,'a) StringCvt.reader \-> ('a match, 'a) StringCvt.reader`:: + *something* + (* scan the stream for the first occurence of the regular expression. The call + * + * find re getc strm + * + * returns NONE if the end of stream is reached without a match. Otherwise it + * returns SOME(match, strm'), where match is the match-tree for the match and + * strm' is the stream following the match. + *) + +`[.kw]#val# prefix : regexp \->(char,'a) StringCvt.reader \-> ('a match, 'a) StringCvt.reader`:: + *something* + (* attempt to match the stream at the current position with the + * regular expression + *) + +`[.kw]#val# match : (RegExpSyntax.syntax * ('a match \-> 'b)) list \-> (char,'a) StringCvt.reader \-> ('b, 'a) StringCvt.reader`:: + *something* + (* attempt to the match the stream at the current position with one of + * the abstract syntax representations of regular expressions and trigger + * the corresponding action + *) + +== Instances + +[[str:BackTrackEngine]] +`[.kw]#structure# BackTrackEngine`:: + A backtracking interpreter for the regular-expression syntax. This implementation + requires no extra compilation time, but backtracking can result in slow searches + for some regular expressions. + +[[str:DfaEngine]] +`[.kw]#structure# DfaEngine`:: + This implementation compiles the regular expression to a nondeterministic + finite-state machine and then coverts that to a deterministic machine. + The resulting machine is fast, but can be exponential in size for some + regular expressions. + +[[str:ThompsonEngine]] +`[.kw]#structure# ThompsonEngine`:: + An implementation of Ken Thompson's famous + https://doi.org/10.1145/363347.363387[__Regular Expression Search Algorithm__]. + +== See Also + +xref:regexp-lib.adoc[__The RegExp Library__] diff --git a/smlnj-lib/Doc/src/RegExp/sig-REGEXP_PARSER.adoc b/smlnj-lib/Doc/src/RegExp/sig-REGEXP_PARSER.adoc new file mode 100644 index 0000000..882bdd1 --- /dev/null +++ b/smlnj-lib/Doc/src/RegExp/sig-REGEXP_PARSER.adoc @@ -0,0 +1,47 @@ += The `REGEXP_PARSER` signature +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `REGEXP_PARSER` signature defines the interface to a parser for a +concrete regular-expression syntax. + +== Synopsis + +[source,sml] +------------ +signature REGEXP_PARSER + +structure AwkSyntax : REGEXP_PARSER +------------ + +== Interface + +[source,sml] +------------ +val scan : (char, 'a) StringCvt.reader -> (RegExpSyntax.syntax, 'a) StringCvt.reader +------------ + +== Description + +`[.kw]#val# scan : (char, 'a) StringCvt.reader \-> (RegExpSyntax.syntax, 'a) StringCvt.reader`:: + `scan getc strm` parses a regular expression from the input stream `strm` using + the {sml-basis-url}/string-cvt.html#SIG:STRING_CVT.reader:TY[character reader] `getc`. + If successful, it returns `SOME(re, strm')`, where `re` is the abstract syntax + of the regular expression and ``strm'`` is the residual input stream. It returns + `NONE` if there is a syntax error in the input. + +== Instances + +[[str:AwkSyntax]] +`[.kw]#structure# xref:str-AwkSyntax.adoc[AwkSyntax]`:: + A parser for the regular-expression syntax defined by the AWK language. + +== See Also + +xref:str-AwkSyntax.adoc[`AwkSyntax`], +xref:fun-RegExpFn.adoc[`RegExpFn`], +xref:str-RegExpSyntax.adoc[`RegExpSyntax`], +xref:regexp-lib.adoc[__The RegExp Library__] diff --git a/smlnj-lib/Doc/src/RegExp/str-AwkSyntax.adoc b/smlnj-lib/Doc/src/RegExp/str-AwkSyntax.adoc new file mode 100644 index 0000000..24b5eb6 --- /dev/null +++ b/smlnj-lib/Doc/src/RegExp/str-AwkSyntax.adoc @@ -0,0 +1,60 @@ += The `AwkSyntax` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `AwkSyntax` structure implements the AWK syntax for regular expressions. +The syntax is defined on pp. 28-30 of _The AWK Programming Language_, +by Aho, Kernighan and Weinberger. The syntax has been extended with interval +syntax, which was added as part of the POSIX standard. + +== Synopsis + +[source,sml] +------------ +structure AwkSyntax : REGEXP_PARSER +------------ + +== Description + +The meta characters are: + "\" "^" "$" "." "[" "]" "|" "(" ")" "*" "+" "?" + + Atomic REs: + c matches the character c (for non-metacharacters c) + "^" matches the empty string at the beginning of a line + "$" matches the empty string at the end of a line + "." matches any single character (except \000 and \n) + + Escape sequences: + "\b" matches backspace + "\f" matches formfeed + "\n" matches newline (linefeed) + "\r" matches carriage return + "\t" matches tab + "\"ddd matches the character with octal code ddd. + "\"c matches the character c (e.g., \\ for \, \" for ") + "\x"dd matches the character with hex code dd. + + Character classes: + [...] matches any character in "..." + [^...] a complemented character list, which matches any character not + in the list "..." + + Compound regular expressions, where A and B are REs: + A|B matches A or B + AB matches A followed by B + A? matches zero or one As + A* matches zero or more As + A+ matches one or more As + A{n} matches n copies of A + A{n,} matches n copies of A + A{n,m} matches n copies of A + (A) matches A + + +== See Also + +xref:regexp-lib.adoc[__The RegExp Library__] diff --git a/smlnj-lib/Doc/src/RegExp/str-MatchTree.adoc b/smlnj-lib/Doc/src/RegExp/str-MatchTree.adoc new file mode 100644 index 0000000..bc79dd5 --- /dev/null +++ b/smlnj-lib/Doc/src/RegExp/str-MatchTree.adoc @@ -0,0 +1,87 @@ += The `MatchTree` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `MatchTree` structure provides a tree-structured representation +of the result of a successful regular expression match. The tree structure +corresponds to the nesting of xref:str-RegExpSyntax.adoc#con:Group[groups] +in the regular expression. + +== Synopsis + +[source,sml] +------------ +signature MATCH_TREE +structure MatchTree : MATCH_TREE +------------ + +== Interface + +[source,sml] +------------ +datatype 'a match_tree = Match of 'a * 'a match_tree list + +val root : 'a match_tree -> 'a +val nth : ('a match_tree * int) -> 'a (* raises Subscript *) +val map : ('a -> 'b) -> 'a match_tree -> 'b match_tree +val app : ('a -> unit) -> 'a match_tree -> unit +val foldl : ('a * 'b -> 'b) -> 'b -> 'a match_tree -> 'b +val foldr : ('a * 'b -> 'b) -> 'b -> 'a match_tree -> 'b +val find : ('a -> bool) -> 'a match_tree -> 'a option +val num : 'a match_tree -> int +------------ + +== Description + +[[type:match_tree]] +`[.kw]#datatype# 'a match_tree = Match of 'a * 'a match_tree list`:: + The representation of the results of a nested grouping of regular expressions. + The type variable ``'a`` is typically instantiated to information about the + particular range of the source that the node covers. For example, + it might be the pair of the start position and length of the match. + +`[.kw]#val# root : 'a match_tree \-> 'a`:: + `root mt` returns the information about the root (outermost) match in the tree. + +`[.kw]#val# nth : ('a match_tree * int) \-> 'a (* raises Subscript *)`:: + `nth (mt, i)` returns the information about the ``i``'th match in the tree, + where matches are labeled in _pre-order_ starting with `0` for the root. + This function raises the + {sml-basis-url}/general.html#SIG:GENERAL.Subscript:EXN[`Subscript`] exception + if `i < 0` or there are fewer than `i-1` nodes in the tree. + +`[.kw]#val# map : ('a \-> 'b) \-> 'a match_tree \-> 'b match_tree`:: + `map f mt` returns the result of mapping the function `f` over `mt`. + For example, this function can be used to convert a match-tree of + position information to a tree of strings. The function is applied + to the tree in pre-order. + +`[.kw]#val# app : ('a \-> unit) \-> 'a match_tree \-> unit`:: + `app f mt` applies the given function to the nodes in the tree + in pre-order. + +`[.kw]#val# foldl : ('a * 'b \-> 'b) \-> 'b \-> 'a match_tree \-> 'b`:: + `foldl f init mt` folds the function `f` over `mt` in left-to-right pre-order + using `init` as the initial value. + +`[.kw]#val# foldr : ('a * 'b \-> 'b) \-> 'b \-> 'a match_tree \-> 'b`:: + `foldr f init mt` folds the function `f` over `mt` in right-to-left post-order + using `init` as the initial value. + +`[.kw]#val# find : ('a \-> bool) \-> 'a match_tree \-> 'a option`:: + `find pred mt` returns `SOME info` where `info` is the first + information that satisfies `pred` in a pre-order traversal of + the tree. It returns `NONE` if there is no match information + that satisfies `pred`. + +`[.kw]#val# num : 'a match_tree \-> int`:: + `num mt` returns the number of *sub-matches* in the tree; _i.e._, the number + of nodes not counting the root. + +== See Also + +xref:fun-RegExpFn.adoc[`RegExpFn`], +xref:regexp-lib.adoc[__The RegExp Library__] diff --git a/smlnj-lib/Doc/src/RegExp/str-RegExpSyntax.adoc b/smlnj-lib/Doc/src/RegExp/str-RegExpSyntax.adoc new file mode 100644 index 0000000..361be3b --- /dev/null +++ b/smlnj-lib/Doc/src/RegExp/str-RegExpSyntax.adoc @@ -0,0 +1,196 @@ += The `RegExpSyntax` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `RegExpSyntax` structure provides an abstract-syntax-tree +representation of regular expressions. Its main purpose is to +provide communication between different front-ends (implementing +different RE specification languages), and different back-ends +(implementing different compilation/searching algorithms). +It is also possible, however, to use it as a way to directly +specify a regular expression for a back-end engine. + +== Synopsis + +[source,sml] +------------ +signature REGEXP_SYNTAX +structure RegExpSyntax : REGEXP_SYNTAX +------------ + +== Interface + +[source,sml] +------------ +exception CannotCompile + +structure CharSet : ORD_SET where type Key.ord_key = char + +datatype syntax + = Group of syntax + | Alt of syntax list + | Concat of syntax list + | Interval of (syntax * int * int option) + | MatchSet of CharSet.set + | NonmatchSet of CharSet.set + | Char of char + | Begin + | End + +val optional : syntax -> syntax +val closure : syntax -> syntax +val posClosure : syntax -> syntax + +val fromRange : char * char -> CharSet.set +val addRange : CharSet.set * char * char -> CharSet.set + +val allChars : CharSet.set + +val alnum : CharSet.set +val alpha : CharSet.set +val ascii : CharSet.set +val blank : CharSet.set +val cntl : CharSet.set +val digit : CharSet.set +val graph : CharSet.set +val lower : CharSet.set +val print : CharSet.set +val punct : CharSet.set +val space : CharSet.set +val upper : CharSet.set +val word : CharSet.set +val xdigit : CharSet.se +------------ + +== Description + +[[exn:CannotCompile]] +`[.kw]#exception# CannotCompile`:: + This exception is meant to be raised by back-ends when they encounter + a feature that they cannot handle. + +`[.kw]#structure# CharSet : ORD_SET where type Key.ord_key = char`:: + This substructure implements sets of 8-bit characters. Currently it + is implemented using sorted lists (_i.e._, using the + xref:../Util/fun-ListSetFn.adoc[`ListSetFn`] functor), but that may + be changed in the future. + +`[.kw]#datatype# syntax`:: + This datatype defines the abstract syntax of regular expressions that + is supported by the library. The constructors are defined as follows: ++ +-- +[[con:Group]] +* `Group re`:: + defines a match group (_i.e._, that produce a corresponding + match-tree node for the input matched by `re`. + +* `Alt[re1, re2, ..., ren]`:: + matches any of `re1`, `re2`, ..., `ren`. If the list is empty, then it + matches nothing. + +* `Concat[re1, re2, ..., ren]`:: + matches the concatenation of `re1`, `re2`, ..., `ren`. If the list + is empty, then it matches the empty string. + +* `Interval(re, n, NONE)`:: + matches `re` repeated at least `n` times. + +* `Interval(re, n, SOME m)`:: + matches `re` repeated from `n` to `m` times. + +* `MatchSet cs`:: + matches a single character that is in the set `cs`. + +* `NonmatchSet cs`:: + matches a single character that is *not* in the set `cs`. + +* `Char c`:: + matches the single character `c`. + +* `Begin`:: + matches beginning of the input stream. + +* `End`:: + matches end of the input stream. +-- + +`[.kw]#val# optional : syntax -> syntax`:: + `optional re` is equivalent to `Interval(re, 0, SOME 1)`. + +`[.kw]#val# closure : syntax -> syntax`:: + `closure re` is equivalent to `Interval(re, 0, NONE)`. + +`[.kw]#val# posClosure : syntax -> syntax`:: + `posClosure re` is equivalent to `Interval(re, 1, NONE)`. + +`[.kw]#val# fromRange : char * char \-> CharSet.set`:: + `fromRange (c1, c2)` returns the set containing the characters + in the range from `c1` to `c2` (inclusive). This expression + raises the {sml-basis-url}/general.html#SIG:GENERAL.Size:EXN[`Size`] + exception if `c2 < c1`. + +`[.kw]#val# addRange : CharSet.set * char * char \-> CharSet.set`:: + `addRange (cs, c1, c2)` adds the set of characters in the range + from `c1` to `c2` (inclusive) to `cs`. This expression raises + the {sml-basis-url}/general.html#SIG:GENERAL.Size:EXN[`Size`] + exception if `c2 < c1`. + +`[.kw]#val# allChars : CharSet.set`:: + is the set of all 8-bit characters. + +=== POSIX Character Classes + +The `RegExpSyntax` structure pre-defines the following character sets, +which are part of the POSIX regular-expression standard (plus a couple +of extras): + +`[.kw]#val# alnum : CharSet.set`:: + is the set of letters and digits. + +`[.kw]#val# alpha : CharSet.set`:: + is the set of letters. + +`[.kw]#val# ascii : CharSet.set`:: + is the set of characters `c` such that `0 \<= ord c \<= 127`. + +`[.kw]#val# blank : CharSet.set`:: + is the set of ``#"\t"`` and space. + +`[.kw]#val# cntl : CharSet.set`:: + is the set of non-printable characters. + +`[.kw]#val# digit : CharSet.set`:: + is the set of decimal digits. + +`[.kw]#val# graph : CharSet.set`:: + is the set of visible characters (does not include space). + +`[.kw]#val# lower : CharSet.set`:: + is the set of lower-case letters. + +`[.kw]#val# print : CharSet.set`:: + is the set of printable characters (includes space). + +`[.kw]#val# punct : CharSet.set`:: + is the set of visible characters other than letters and digits. + +`[.kw]#val# space : CharSet.set`:: + is the set of ``\#"\t"``, ``#"\r"``, ``\#"\n"``, ``#"\v"``, ``#"\f"``, and space. + +`[.kw]#val# upper : CharSet.set`:: + is the set of upper-case letters. + +`[.kw]#val# word : CharSet.set`:: + is the set of letters, digit, and ``#"_"``. + +`[.kw]#val# xdigit : CharSet.set`:: + is the set of hexadecimal digits. + +== See Also + +xref:sig-REGEXP_ENGINE.adoc[`REGEXP_ENGINE`], +xref:regexp-lib.adoc[__The RegExp Library__] diff --git a/smlnj-lib/Doc/src/RegExp/tutorial.adoc b/smlnj-lib/Doc/src/RegExp/tutorial.adoc new file mode 100644 index 0000000..d8fa358 --- /dev/null +++ b/smlnj-lib/Doc/src/RegExp/tutorial.adoc @@ -0,0 +1,118 @@ += Using the RegExp Library +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +== Introduction + +The *RegExp Library* is designed for flexibility; it allows mixing and +matching of different front-end syntax with back-end engines, as well +as supporting arbitrary input sources. +This flexibility, however, comes at the cost of making some of the +simple applications a bit less obvious. This tutorial shows how the +*RegExp Library* can be used in a variety of common applications. + +== Assembling an Regular Expression Matcher + +Before we can do anything else, we must assemble a regular-expression +matcher. For the purposes of this tutorial, we use a combination +of the `AwkSyntax` front-end and the `BackTrackEngine` back-end. + +[source,sml] +------------ +structure RE = RegExpFn( + structure P = AwkSyntax + structure E = BackTrackEngine) +------------ + +== Match trees + +Regular expressions may contain xref:str-RegExpSyntax.adoc#con:Group[grouping] +operators. When a pattern matches a string, these groups induce a nested +tree structure on the matched string. +The xref:str-MatchTree.adoc[``MatchTree``] structure defines +a polymorphic representation of this structure, along with a +number of utility functions for extracting information from +a match. + +[source,sml] +------------ +structure MT = MatchTree +------------ + + +== Example: scanning tokens + +The `match` function in the `REGEXP` signature allows one to distinguish +between a set of possible regular expression matches. One application of +this mechanism is a simple scanner. Let us define a datatype for tokens, +which can be white space, numbers, or identifiers. + +[source,sml] +------------ +datatype tok + = WS | NUM of IntInf.int | ID of string +------------ + +We can then define the scanner as follows: + +[source,sml] +------------ +fun scanner getc gets = let + fun getMatch cons (MT.Match({pos, len}, _)) = cons (gets (pos, len)) + in + RE.match [ + ("[ \t\n]+", getMatch (fn _ => WS)), + ("[0-9]+", getMatch (fn s => NUM(valOf(IntInf.fromString s)))), + ("[a-zA-Z][a-zA-Z0-9]*", getMatch ID) + ] getc + end +------------ + +Here the `getc` parameter is the standard character reader; we have also included +the `gets` parameter, which is a function of type + +[source,sml] +------------ +'strm * int -> string +------------ + +for getting a string from a stream. For many input sources, the `gets` function +has an efficient and direct implementation, but it can also be implemented in +terms of the `getc` function as follows: + +[source,sml] +------------ +fun gets getc (strm, n) = let + fun getChrs (0, _, chrs) = String.implodeRev chrs + | getChrs (n, strm, chrs) = (case getc strm + of NONE => raise Fail "empty stream" + | SOME(c, strm) => getChrs (n-1, strm, c::chrs) + (* end case *)) + in + getChrs (n, strm, []) + end; +------------ + +Because this function is only called *after* the `scanner` function has matched +a sequence of `n` characters from `strm`, the `"empty stream"` case will not +occur for well behaving input streams. + +Here is an example of using the scanner to tokenize strings, where we use the +*Basis Library* substring type to implement the stream type: + +[source,sml] +------------ +fun tokens s = let + fun gets (ss, n) = Substring.string(Substring.slice (ss, 0, SOME n)) + val scan = scanner Substring.getc gets + fun lp (ss, toks) = (case scan ss + of SOME(tok, ss') => lp (ss', tok::toks) + | NONE => List.rev toks + (* end case *)) + in + lp (Substring.full s, []) + end; +------------ diff --git a/smlnj-lib/Doc/src/SExp/MODULES b/smlnj-lib/Doc/src/SExp/MODULES new file mode 100644 index 0000000..3fc44db --- /dev/null +++ b/smlnj-lib/Doc/src/SExp/MODULES @@ -0,0 +1,4 @@ +structure SExp +structure SExpParser +structure SExpPP +structure SExpPrinter diff --git a/smlnj-lib/Doc/src/SExp/sexp-lib.adoc b/smlnj-lib/Doc/src/SExp/sexp-lib.adoc new file mode 100644 index 0000000..3afb3ab --- /dev/null +++ b/smlnj-lib/Doc/src/SExp/sexp-lib.adoc @@ -0,0 +1,105 @@ += The SExp Library +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +== Overview + +The *SExp Library* supports the reading and writing of structured data using +the S-expression syntax. It is a work in progress, and does not fully conform with +any formal S-exp specification. + +- End-of-line comments begin with a semicolon (`;`) + +- An S-Expression is either an atomic token (boolean, number, symbol, or + string), a quoted expression, or a list of S-Expressions enclosed in brackets. + +- Quoted expressions are formed by the single-quote character (``'``) followed + by an expression. + +- Lists are delimited by matched pairs of `()` `[]` or `{}`, nested freely. + +- List items are separated with whitespace (space, tab, newlines, or carriage + returns). + +- Symbols (or _identifiers_) begin with an initial character followed by + zero or more _subsequent_ characters, where an initial character is + either a letter or one of the characters `-+.@!$%&*/:<=>?^_~` and + a subsequent character is either an initial character, a decimal digit, + or the character `#`. + +- Booleans are represented by the literals `#f` (false) and `#t` (true). + +- Numbers are either signed integers or floating-point numbers; the + sign (if present) is one of "'+'," "`-`," or "`~`". + +- Integers may be specified in decimal without any prefix, or in hexadecimal + with the prefix "0x". In hex, the value is assumed to be unsigned, so -255 + should be written "-0xff" rather than "0x-ff". + +- The format of a floating point number is described by the following + regular expression: ++ +[latexmath] +++++ + \mathit{sign}^{?}\,\mathit{digit}^{+}\,\mathtt{.}\;\mathit{digit}^{+}\, + ([\mathtt{eE}]\,\mathit{sign}^{?}\,\mathit{digit}^{+})^{?} +++++ ++ +Notably, "`1.`" and "`.1`" are invalid and "`1`" is parsed as an +integer -- floats must have a dot with digits +on both sides. + +- Strings are sequences of ASCII characters enclosed in double quotes (``"``). + We follow the syntax of *Scheme* strings as described in + https://www.scheme.com/tspl4/grammar.html#./grammar:strings[] ++ +The difference between symbols and strings is that symbols are represented as +Atom.atom types, so equality comparisons are more efficient. + +The original version of the library was written by Damon Wang +at the University of Chicago. It has since been modified and +maintained by John Reppy. + +== Contents + +xref:str-SExp.adoc[`[.kw]#structure# SExp`]:: + Defines the tree representation of S-expression data. + +xref:str-SExpParser.adoc[`[.kw]#structure# SExpParser`]:: + Implements an S-Expression parser. + +xref:str-SExpPP.adoc[`[.kw]#structure# SExpPP`]:: + Implements an S-Expression pretty-printer. + +xref:str-SExpPrinter.adoc[`[.kw]#structure# SExpPrinter`]:: + Implements an S-Expression printer that produces _condensed_ + output without indentation or line breaks. + +== Usage + +For https://smlnj.org[*SML/NJ*], include `$/sexp-lib.cm` in your +*CM* file. + +For use in https://www.mlton.org/[*MLton*], include +`$(SML_LIB)/smlnj-lib/SExp/sexp-lib.mlb` in your *MLB* file. + +ifdef::backend-pdf[] + +// Push titles down one level. +:leveloffset: 1 + +include::str-SExp.adoc[] + +include::str-SExpParser.adoc[] + +include::str-SExpPP.adoc[] + +include::str-SExpPrinter.adoc[] + +// Return to normal title levels. +:leveloffset: 0 + +endif::[] diff --git a/smlnj-lib/Doc/src/SExp/str-SExp.adoc b/smlnj-lib/Doc/src/SExp/str-SExp.adoc new file mode 100644 index 0000000..3280b16 --- /dev/null +++ b/smlnj-lib/Doc/src/SExp/str-SExp.adoc @@ -0,0 +1,76 @@ += The `SExp` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `SExp` structure defines the tree representation of S-expression data. + +== Synopsis + +[source,sml] +------------ +structure SExp +------------ + +== Interface + +[source,sml] +------------ +datatype value + = SYMBOL of Atom.atom + | BOOL of bool + | INT of IntInf.int + | FLOAT of real + | STRING of string + | QUOTE of value + | LIST of value list + +val same : value * value -> bool +val compare : value * value -> order +------------ + +== Description + +`[.kw]#datatype# value = ...`:: + This datatype represents S-expression trees. The constuctors are + defined as follows: ++ +-- + `SYMBOL [.kw]#of# Atom.atom`:: + represents an identifier. We use the xref:../Util/str-Atom.adoc#type:atom[`atom`] + type to represent these for fast equality testing. + + `BOOL [.kw]#of# bool`:: + represents a boolean literal. + + `INT [.kw]#of# IntInf.int`:: + represents an integer number. + + `FLOAT [.kw]#of# real`:: + represents a floating-point number. + + `STRING [.kw]#of# string`:: + represents a string value. + + `QUOTE [.kw]#of# value list`:: + represents a quoted value. + + `LIST [.kw]#of# value list`:: + represents a list of values. +-- + +`[.kw]#val# same : value * value \-> bool`:: + `same (se1, se2)` compares `se1` and `se2` for equality and returns + true if, and only if, they are equal. + +`[.kw]#val# compares : value * value \-> order`:: + `same (se1, se2)` compares `se1` and `se2` for their order. + +== See Also + +xref:str-SExpParser.adoc[`SExpParser`], +xref:str-SExpPP.adoc[`SExpPP`], +xref:str-SExpPrinter.adoc[`SExpPrinter`], +xref:sexp-lib.adoc[__The SExp Library__] diff --git a/smlnj-lib/Doc/src/SExp/str-SExpPP.adoc b/smlnj-lib/Doc/src/SExp/str-SExpPP.adoc new file mode 100644 index 0000000..7eaaafd --- /dev/null +++ b/smlnj-lib/Doc/src/SExp/str-SExpPP.adoc @@ -0,0 +1,34 @@ += The `SExpPP` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `SExpPP` structure implements a pretty printer for S-Expressions. + +== Synopsis + +[source,sml] +------------ +structure SExpPP +------------ + +== Interface + +[source,sml] +------------ +val output : TextIOPP.stream * SExp.value -> unit +------------ + +== Description + +`[.kw]#val# output : TextIOPP.stream * SExp.value \-> unit`:: + `output (ppS, v)` pretty prints the value using the specified + pretty-printing stream. + +== See Also + +xref:str-SExp.adoc[`SExp`], +xref:str-SExpParser.adoc[`SExpParser`], +xref:sexp-lib.adoc[__The SExp Library__] diff --git a/smlnj-lib/Doc/src/SExp/str-SExpParser.adoc b/smlnj-lib/Doc/src/SExp/str-SExpParser.adoc new file mode 100644 index 0000000..ce66b56 --- /dev/null +++ b/smlnj-lib/Doc/src/SExp/str-SExpParser.adoc @@ -0,0 +1,41 @@ += The `SExpParser` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `SExpParser` structure implements a parser for S-Expressions. + +== Synopsis + +[source,sml] +------------ +structure SExpParser +------------ + +== Interface + +[source,sml] +------------ +val parse : TextIO.instream -> SExp.value list + +val parseFile : string -> SExp.value list +------------ + +== Description + +`[.kw]#val# parse : TextIO.instream \-> SExp.value list`:: + `parse inS` parses a sequence of S-Expressions from the input + stream `inS`. The {sml-basis-url}/general.html#SIG:GENERAL.Fail:EXN[`Fail`] + exception is raised if a syntax error is encountered. + +`[.kw]#val# parseFile : string \-> SExp.value list`:: + `parse f` parses a sequence of S-Expressions from the file `f`. + The {sml-basis-url}/general.html#SIG:GENERAL.Fail:EXN[`Fail`] + exception is raised if a syntax error is encountered. + +== See Also + +xref:str-SExp.adoc[`SExp`], +xref:sexp-lib.adoc[__The SExp Library__] diff --git a/smlnj-lib/Doc/src/SExp/str-SExpPrinter.adoc b/smlnj-lib/Doc/src/SExp/str-SExpPrinter.adoc new file mode 100644 index 0000000..4ff429f --- /dev/null +++ b/smlnj-lib/Doc/src/SExp/str-SExpPrinter.adoc @@ -0,0 +1,36 @@ += The `SExpPrinter` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `SExpPrinter` structure implements a printer for S-Expressions. +This printer does not introduce any line breaks. For output that +is more readable, use the xref:str-SExpPP.adoc[`SExpPP`] module. + +== Synopsis + +[source,sml] +------------ +structure SExpPrinter +------------ + +== Interface + +[source,sml] +------------ +val print : TextIO.outstream * SExp.value -> unit +------------ + +== Description + +`[.kw]#val# print : TextIO.outstream * SExp.value \-> unit`:: + `print (outS, v)` prints the S-Expression value `v` to the + output stream `outS`. + +== See Also + +xref:str-SExp.adoc[`SExp`], +xref:str-SExpPP.adoc[`SExpPP`], +xref:sexp-lib.adoc[__The SExp Library__] diff --git a/smlnj-lib/Doc/src/Templates/README.md b/smlnj-lib/Doc/src/Templates/README.md new file mode 100644 index 0000000..9477e79 --- /dev/null +++ b/smlnj-lib/Doc/src/Templates/README.md @@ -0,0 +1,16 @@ +This directory contains templates for creating *asciidoctor* files +to document libraries in the SML/NJ Library. The templates are + + `lib.adoc`:: + main file for documenting a library + + `sig.adoc`:: + template for documenting a signature + + `str.adoc`:: + template for documenting a structure that does not + have a named signature + + `fun.adoc`:: + template for documenting a functor + diff --git a/smlnj-lib/Doc/src/Templates/fun.adoc b/smlnj-lib/Doc/src/Templates/fun.adoc new file mode 100644 index 0000000..6f5df8f --- /dev/null +++ b/smlnj-lib/Doc/src/Templates/fun.adoc @@ -0,0 +1,47 @@ += The `@NAME@` functor +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `@NAME@` functor ... + +== Synopsis + +[source,sml] +------------ +functor @NAME@ () +------------ + +== Functor Argument Interface + +[source,sml] +------------ +------------ + +== Functor Argument Description + +`[.kw]#type# foo`:: + something + +`[.kw]#val# bar : foo -> foo`:: + something + +== Interface + +[source,sml] +------------ +------------ + +== Interface Description + +`[.kw]#type# foo`:: + something + +`[.kw]#val# bar : foo -> foo`:: + something + +== See Also + +xref:@LIBRARY@.adoc[__The @DIR@ Library__] diff --git a/smlnj-lib/Doc/src/Templates/lib-foot.adoc b/smlnj-lib/Doc/src/Templates/lib-foot.adoc new file mode 100644 index 0000000..fcdb6e1 --- /dev/null +++ b/smlnj-lib/Doc/src/Templates/lib-foot.adoc @@ -0,0 +1,5 @@ + +// Return to normal title levels. +:leveloffset: 0 + +endif::[] diff --git a/smlnj-lib/Doc/src/Templates/lib-head.adoc b/smlnj-lib/Doc/src/Templates/lib-head.adoc new file mode 100644 index 0000000..3b4a2fc --- /dev/null +++ b/smlnj-lib/Doc/src/Templates/lib-head.adoc @@ -0,0 +1,13 @@ += The @DIR@ Library +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +== Overview + +The *@DIR@ Library* ... + +== Contents + diff --git a/smlnj-lib/Doc/src/Templates/lib-mid.adoc b/smlnj-lib/Doc/src/Templates/lib-mid.adoc new file mode 100644 index 0000000..5ea5a48 --- /dev/null +++ b/smlnj-lib/Doc/src/Templates/lib-mid.adoc @@ -0,0 +1,13 @@ +== Usage + +For https://smlnj.org[*SML/NJ*], include `$/@LIBRARY@.cm` in your +*CM* file. + +For use in https://www.mlton.org/[*MLton*], include +`$(SML_LIB)/smlnj-lib/@DIR@/@LIBRARY@.mlb` in your *MLB* file. + +ifdef::backend-pdf[] + +// Push titles down one level. +:leveloffset: 1 + diff --git a/smlnj-lib/Doc/src/Templates/sig.adoc b/smlnj-lib/Doc/src/Templates/sig.adoc new file mode 100644 index 0000000..fb7d386 --- /dev/null +++ b/smlnj-lib/Doc/src/Templates/sig.adoc @@ -0,0 +1,33 @@ += The `@NAME@` signature +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `@NAME@` signature ... + +== Synopsis + +[source,sml] +------------ +signature @NAME@ +------------ + +== Interface + +[source,sml] +------------ +------------ + +== Description + +`[.kw]#type# foo`:: + something + +`[.kw]#val# bar : foo -> foo`:: + something + +== See Also + +xref:@LIBRARY@.adoc[__The @DIR@ Library__] diff --git a/smlnj-lib/Doc/src/Templates/str.adoc b/smlnj-lib/Doc/src/Templates/str.adoc new file mode 100644 index 0000000..366dc39 --- /dev/null +++ b/smlnj-lib/Doc/src/Templates/str.adoc @@ -0,0 +1,33 @@ += The `@NAME@` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `@NAME@` structure ... + +== Synopsis + +[source,sml] +------------ +structure @NAME@ +------------ + +== Interface + +[source,sml] +------------ +------------ + +== Description + +`[.kw]#type# foo`:: + something + +`[.kw]#val# bar : foo -> foo`:: + something + +== See Also + +xref:@LIBRARY@.adoc[__The @DIR@ Library__] diff --git a/smlnj-lib/Doc/src/UUID/MODULES b/smlnj-lib/Doc/src/UUID/MODULES new file mode 100644 index 0000000..535e1e1 --- /dev/null +++ b/smlnj-lib/Doc/src/UUID/MODULES @@ -0,0 +1,2 @@ +structure UUID +structure GenUUID diff --git a/smlnj-lib/Doc/src/UUID/str-GenUUID.adoc b/smlnj-lib/Doc/src/UUID/str-GenUUID.adoc new file mode 100644 index 0000000..d4b7a03 --- /dev/null +++ b/smlnj-lib/Doc/src/UUID/str-GenUUID.adoc @@ -0,0 +1,34 @@ += The `GenUUID` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `GenUUID` structure implements a generator for random +(Variant 1, Type 4) UUIDs. The random number generator +is seeded by the current time. + +== Synopsis + +[source,sml] +------------ +structure GenUUID +------------ + +== Interface + +[source,sml] +------------ +val new : unit -> UUID.t +------------ + +== Description + +`[.kw]#val# new : unit \-> UUID.t`:: + `new ()` generates a new Variant 1, Type 4 UUID. + +== See Also + +xref:str-UUID.adoc[`UUID`], +xref:uuid-lib.adoc[__The UUID Library__] diff --git a/smlnj-lib/Doc/src/UUID/str-UUID.adoc b/smlnj-lib/Doc/src/UUID/str-UUID.adoc new file mode 100644 index 0000000..f079b44 --- /dev/null +++ b/smlnj-lib/Doc/src/UUID/str-UUID.adoc @@ -0,0 +1,93 @@ += The `UUID` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `UUID` structure provides an implementation of +https://en.wikipedia.org/wiki/Universally_unique_identifier[ +UUIDs (__**U**niversally **U**nique **ID**entifiers__)]. +UUIDs, which are also known as GUIDs (__**G**lobally **U**nique **ID**entifiers__), +are sequences of 16-bytes. + +== Synopsis + +[source,sml] +------------ +structure UUID +------------ + +== Interface + +[source,sml] +------------ +type t + +val null : t + +val compare : t * t -> order + +val same : t * t -> bool + +val hash : t -> word + +val toString : t -> string + +val fromString : string -> t option + +val toBytes : t -> Word8Vector.vector + +val fromBytes : Word8Vector.vector -> t +------------ + +== Description + +`[.kw]#type# t`:: + the abstract type of UUIDs. + +`[.kw]#val# null : t`:: + `null` is the all-zeros UUID + +`[.kw]#val# compare : t * t \-> order`:: + `compare (uuid1, uuid2)` does a byte-wise comparison of the two + UUIDs and returns their order. + +`[.kw]#val# same : t * t \-> bool`:: + `same (uuid1, uuid2)` does a byte-wise comparison of the two + UUIDs and returns `true` is they are equal and `false` otherwise. + +`[.kw]#val# hash : t \-> word`:: + `hash uuid` returns a hash of the UUID. + +[[val:toString]] +`[.kw]#val# toString : t \-> string`:: + `toString uuid` formats `uuid` as a string of the form +[source,sml] +------------ +"xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" +------------ + where each "`x`" is a lower-case hexadecimal digit. The first two digits + in the string correspond to the first byte, and so on. + +`[.kw]#val# fromString : string \-> t option`:: + `fromString s` converts the string `s`, which should be of the form returned + by xref:val:toString[`toString`] to `SOME uuid`, where `uuid` is the UUID + denoted by the string. Leading whitespace is ignored. If the string does + not have the correct format, then `NONE` is returned. + +`[.kw]#val# toBytes : t \-> Word8Vector.vector`:: + `toBytes uuid` returns the 16-element `Word8Vector.vector` value + that represents `uuid`. + +`[.kw]#val# fromBytes : Word8Vector.vector \-> t`:: + `fromBytes bytes` takes a 16-element vector of bytes and converts it to + a UUID. The {sml-basis-url}/general.html#SIG:GENERAL.Size:EXN[`Size`] + exception is raised if the length of the vector is not exactly 16. + Otherwise, there is no validity chechking of the UUID (_i.e._, the variant + and type are not checked). + +== See Also + +xref:str-GenUUID.adoc[`GenUUID`], +xref:uuid-lib.adoc[__The UUID Library__] diff --git a/smlnj-lib/Doc/src/UUID/uuid-lib.adoc b/smlnj-lib/Doc/src/UUID/uuid-lib.adoc new file mode 100644 index 0000000..6001471 --- /dev/null +++ b/smlnj-lib/Doc/src/UUID/uuid-lib.adoc @@ -0,0 +1,48 @@ += The UUID Library +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +== Overview + +The *UUID Library* is a library for generating +https://en.wikipedia.org/wiki/Universally_unique_identifier[ +UUIDs (__**U**niversally **U**nique **ID**entifiers__)], which +are also known as GUIDs (__**G**lobally **U**nique **ID**entifiers__). +It currently only supports generating Variant 1, Type 4 UUIDs, which +are random bit strings (these are the UUIDs generated by the `NewGuid` +function on **Microsoft Windows**). + +The *UUID Library* was first included in Version 110.97 of *SML/NJ*. + +== Contents + +xref:str-UUID.adoc[`[.kw]#structure# UUID`]:: + This structure implements an abstract type for representing UUIDs. + +xref:str-GenUUID.adoc[`[.kw]#structure# GenUUID`]:: + This structure implements a generator for UUIDs. + +== Usage + +For https://smlnj.org[*SML/NJ*], include `$/uuid-lib.cm` in your +*CM* file. + +For use in https://www.mlton.org/[*MLton*], include +`$(SML_LIB)/smlnj-lib/UUID/uuid-lib.mlb` in your *MLB* file. + +ifdef::backend-pdf[] + +// Push titles down one level. +:leveloffset: 1 + +include::str-UUID.adoc[] + +include::str-GenUUID.adoc[] + +// Return to normal title levels. +:leveloffset: 0 + +endif::[] diff --git a/smlnj-lib/Doc/src/Unix/MODULES b/smlnj-lib/Doc/src/Unix/MODULES new file mode 100644 index 0000000..da418be --- /dev/null +++ b/smlnj-lib/Doc/src/Unix/MODULES @@ -0,0 +1,2 @@ +structure UnixEnv +structure UnixPath diff --git a/smlnj-lib/Doc/src/Unix/str-UnixEnv.adoc b/smlnj-lib/Doc/src/Unix/str-UnixEnv.adoc new file mode 100644 index 0000000..e7512b4 --- /dev/null +++ b/smlnj-lib/Doc/src/Unix/str-UnixEnv.adoc @@ -0,0 +1,77 @@ += The `UnixEnv` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `UnixEnv` structure supports operations on the host process's _environment_, +which is essentially a list of strings of the form "``__name__=__value__``", where +the "`=`" character does not appear in ``__name__``. We assume that environments +are "well formed;" _i.e._, that an environment variable is only defined once. + +[WARNING] +========= +Binding the user's environment as an SML value and then exporting the +SML heap image can result in incorrect behavior, since the environment bound +in the heap image may differ from the user's environment when the exported +heap image is loaded. +========= + +== Synopsis + +[source,sml] +------------ +signature UNIX_ENV +structure UnixEnv : UNIX_ENV +------------ + +== Interface + +[source,sml] +------------ +val getFromEnv : (string * string list) -> string option + +val getValue : {name : string, default : string, env : string list} -> string + +val removeFromEnv : (string * string list) -> string list + +val addToEnv : (string * string list) -> string list + +val environ : unit -> string list + +val getEnv : string -> string option +------------ + +== Description + +`[.kw]#val# getFromEnv : (string * string list) \-> string option`:: + `getEnv (name, env)` returns `SOME v` if `(name, v)` is in the environment + `env`. Otherwise, it returns `NONE` if `name` is not bound in `env`. + +`[.kw]#val# getValue : {name : string, default : string, env : string list} \-> string`:: + `getEnv {name, default, env}` returns `v` if `(name, v)` is in the + environment `env`. Otherwise, it returns `default` if `name` is not + bound in `env`. + +`[.kw]#val# removeFromEnv : (string * string list) \-> string list`:: + `removeFromEnv (name, env)` removes any binding of `name` from the + environment. Note that if `env` has multiple bindings of `name` + (_i.e._, `env` is *not* well formed), then only the first binding + is removed. + +`[.kw]#val# addToEnv : (string * string list) \-> string list`:: + `addToEnv (bind, env)` adds the binding `bind`, which should be of the + form "``__name__=__value__``", to the environment. If there was an + existing binding of ``__name__`` in `env`, then it will be replaced. + +`[.kw]#val# environ : unit \-> string list`:: + `env ()` returns the user's (host process) environment. + +`[.kw]#val# getEnv : string \-> string option`:: + `getEnv name` returns the binding of the environment variable `name` + in the user's (host process) environment. + +== See Also + +xref:unix-lib.adoc[__The Unix Library__] diff --git a/smlnj-lib/Doc/src/Unix/str-UnixPath.adoc b/smlnj-lib/Doc/src/Unix/str-UnixPath.adoc new file mode 100644 index 0000000..8fe5b43 --- /dev/null +++ b/smlnj-lib/Doc/src/Unix/str-UnixPath.adoc @@ -0,0 +1,86 @@ += The `UnixPath` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `UnixPath` structure provides support for searching for files +in the *Unix* file system using a list of possible locations. + +Note that this module is largely superseded by the +xref:../Util/str-PathUtil.adoc[`PathUtil`] module +in the xref:../Util/smlnj-lib.adoc[*Util Library*]. + +== Synopsis + +[source,sml] +------------ +signature UNIX_PATH +structure UnixPath : UNIX_PATH +------------ + +== Interface + +[source,sml] +------------ +type path_list = string list + +val getPath : unit -> path_list + +datatype access_mode = datatype OS.FileSys.access_mode +datatype file_type = F_REGULAR | F_DIR | F_SYMLINK | F_SOCK | F_CHR | F_BLK + +val findFile : (path_list * access_mode list) -> string -> string option + +val findFiles : (path_list * access_mode list) -> string -> string list + +val findFileOfType : (path_list * file_type * access_mode list) -> string -> string option + +val findFilesOfType : (path_list * file_type * access_mode list) -> string -> string list +------------ + +== Description + +`[.kw]#type# path_list = string list`:: + A list of file-system paths that is used to search for files. + +`[.kw]#val# getPath : unit \-> path_list`:: + `getPath ()` return's the value of the user's `PATH` shell variable + as a `path_list`. + +`[.kw]#datatype# access_mode = [.kw]#datatype# {sml-basis-url}os-file-sys.html#SIG:OS_FILE_SYS.access_mode:TY[OS.FileSys.access_mode]`:: + Rebind the file-system access-mode constructors. + +`[.kw]#datatype# file_type = F_REGULAR | F_DIR | F_SYMLINK | F_SOCK | F_CHR | F_BLK`:: + The different types of file-system objects in *Unix*. + +`[.kw]#val# findFile : (path_list * access_mode list) \-> string \-> string option`:: + `findFile (paths, mode) __name__` returns `SOME path`, where `path` is a string of + the form ``"__p__/__name__"`` and ``__p__`` is the first string in `paths` + such that `path` has the given access modes (the empty list of access modes + is used to test for existence). If no such file exists, then `NONE` is returned. + +`[.kw]#val# findFiles : (path_list * access_mode list) \-> string \-> string list`:: + `findFiles (paths, mode) __name__` returns a list of strings, such that each string + `s` in the result has the form ``"__p__/__name__"`` with ``__p__`` in `paths` + and the file named by `path` has the specified access modes. + +`[.kw]#val# findFileOfType : (path_list * file_type * access_mode list) \-> string \-> string option`:: + `findFileOfType (paths, ftype, mode) __name__` returns the `SOME path`, where `path` is + a string of the form ``"__p__/__name__"`` and ``__p__`` is the first string + in `paths` such that `path` has the given access modes (the empty list of + access modes is used to test for existence) and is of the specified file type. + If no such file exists, then `NONE` is returned. + +`[.kw]#val# findFilesOfType : (path_list * file_type * access_mode list) \-> string \-> string list`:: + `findFilesOfType (paths, mode) __name__` returns a list of strings, + such that each string `s` in the result has the form ``"__p__/__name__"`` + with ``__p__`` in `paths` and the file named by `path` has the specified + access modes and is of the specified file type. + +== See Also + +xref:str-UnixEnv.adoc[`UnixEnv`], +xref:../Util/str-PathUtil.adoc[`PathUtil`], +xref:unix-lib.adoc[__The Unix Library__] diff --git a/smlnj-lib/Doc/src/Unix/unix-lib.adoc b/smlnj-lib/Doc/src/Unix/unix-lib.adoc new file mode 100644 index 0000000..c786b60 --- /dev/null +++ b/smlnj-lib/Doc/src/Unix/unix-lib.adoc @@ -0,0 +1,40 @@ += The Unix Library +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +== Overview + +The *Unix Library* provides some *Unix*-specific utilities. + +== Contents + +xref:str-UnixEnv.adoc[`[.kw]#structure# UnixEnv`]:: + This structure provides support for querying the user's environment. + +xref:str-UnixPath.adoc[`[.kw]#structure# UnixPath`]:: + This structure supports *Unix*-specific file-system searches. + +== Usage + +For https://smlnj.org[*SML/NJ*], include `$/unix-lib.cm` in your +*CM* file. + +For use in https://www.mlton.org/[*MLton*], include +`$(SML_LIB)/smlnj-lib/Unix/unix-lib.mlb` in your *MLB* file. + +ifdef::backend-pdf[] + +// Push titles down one level. +:leveloffset: 1 + +include::str-UnixEnv.adoc[] + +include::str-UnixPath.adoc[] + +// Return to normal title levels. +:leveloffset: 0 + +endif::[] diff --git a/smlnj-lib/Doc/src/Util/INSTANCES b/smlnj-lib/Doc/src/Util/INSTANCES new file mode 100644 index 0000000..714d8b6 --- /dev/null +++ b/smlnj-lib/Doc/src/Util/INSTANCES @@ -0,0 +1,77 @@ +structure AtomBinaryMap +structure AtomBinarySet +structure AtomRedBlackMap +structure AtomRedBlackSet +structure IntBinaryMap +structure IntBinarySet +structure IntHashTable (* should be IntTable *) +structure IntListMap +structure IntListSet +structure IntRedBlackMap +structure IntRedBlackSet +structure WordHashTable (* should be WordTable *) +structure WordRedBlackMap +structure WordRedBlackSet +functor BinaryMapFn +functor BinarySetFn +functor ListMapFn +functor ListSetFn +functor RedBlackMapFn +functor RedBlackSetFn +functor SplayMapFn +functor SplaySetFn + + +signature HASH_KEY + +signature HASH_SET + functor HashSetFn + +signature MONO_ARRAY_SORT + structure ArrayQSort + functor ArrayQSortFn + +signature MONO_DYNAMIC_ARRAY + functor DynamicArrayFn + +signature MONO_HASH_TABLE + structure AtomTable :> MONO_HASH_TABLE + structure IntHashTable :> MONO_HASH_TABLE where type Key.hash_key = int + structure WordHashTable :> MONO_HASH_TABLE where type Key.hash_key = word + functor HashTableFn + +signature MONO_HASH2_TABLE + +signature MONO_PRIORITYQ + +signature ORD_KEY + +signature ORD_MAP + structure AtomBinaryMap + structure AtomRedBlackMap + structure IntBinaryMap + structure IntListMap + structure IntRedBlackMap + structure WordRedBlackMap + functor BinaryMapFn + functor ListMapFn + functor RedBlackMapFn + functor SplayMapFn + +signature ORD_SET + structure AtomBinarySet + structure AtomRedBlackSet + structure IntBinarySet + structure IntListSet + structure IntRedBlackSet + structure WordRedBlackSet + functor BinarySetFn + functor ListSetFn + functor RedBlackSetFn + functor SplaySetFn + +signature PRIORITY + +signature UREF + structure SimpleURef + structure URef diff --git a/smlnj-lib/Doc/src/Util/MODULES b/smlnj-lib/Doc/src/Util/MODULES new file mode 100644 index 0000000..b591433 --- /dev/null +++ b/smlnj-lib/Doc/src/Util/MODULES @@ -0,0 +1,54 @@ +signature HASH_KEY +signature HASH_SET +signature MONO_ARRAY_SORT +signature MONO_DYNAMIC_ARRAY +signature MONO_HASH_TABLE +signature MONO_PRIORITYQ +signature ORD_KEY +signature ORD_MAP +signature ORD_SET +signature PRIORITY +signature UREF +structure ANSITerm +structure Atom +structure Base64 +structure BitArray +structure BitVector +structure CharMap +structure DynamicArray +structure EditDistance +structure Fifo +structure FNVHash +structure Format +structure FormatComb +structure HashString +structure HashTable +structure IOUtil +structure GetOpt +structure LibBase +structure ListFormat +structure ListMergeSort +structure ListXProd +structure NativeInt +structure NativeWord +structure ParserComb +structure PathUtil +structure PrimeSizes +structure PropList +structure Queue +structure Rand +structure Random +structure Scan +structure SplayTree +structure TimeLimit +structure RealOrderStats +structure UnivariateStats +structure UTF8 +functor BSearchFn +functor GraphSCCFn +functor HashSetFn +functor HashTableFn +functor Hash2TableFn +functor IntervalSetFn +functor KeywordFn +functor MonoArrayFn diff --git a/smlnj-lib/Doc/src/Util/fun-ArrayQSortFn.adoc b/smlnj-lib/Doc/src/Util/fun-ArrayQSortFn.adoc new file mode 100644 index 0000000..b5c046c --- /dev/null +++ b/smlnj-lib/Doc/src/Util/fun-ArrayQSortFn.adoc @@ -0,0 +1,63 @@ += The `ArrayQSortFn` functor +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `ArrayQSortFn` functor provides _in situ_ sorting of monomorphic arrays +using the quicksort algorithm. + +== Synopsis + +[source,sml] +------------ +signature MONO_ARRAY_SORT +functor ArrayQSortFn (A : MONO_ARRAY) : MONO_ARRAY_SORT +------------ + +== Functor Argument Interface + +[source,sml] +------------ +A : MONO_ARRAY +------------ + +== Functor Argument Description + +`A : MONO_ARRAY`:: + A structure that implements the + {sml-basis-url}/mono-array.html[`MONO_ARRAY`] signature from + the {sml-basis-url}/index.html[*SML Basis Library*]. + +== Interface + +[source,sml] +------------ +structure A : MONO_ARRAY + +val sort : (A.elem * A.elem -> order) -> A.array -> unit + +val sorted : (A.elem * A.elem -> order) -> A.array -> bool +------------ + +== Interface Description + +`[.kw]#structure# A : MONO_ARRAY`:: + The argument structure. + +`[.kw]#val# sort : (A.elem * A.elem \-> order) \-> A.array \-> unit`:: + `sort cmp arr` sorts the array `arr` into ascending order + according to the comparison function `cmp`. + +`[.kw]#val# sorted : (A.elem * A.elem \-> order) \-> A.array \-> bool`:: + `sorted cmp arr` returns true if, and only if, the array `arr=` is + sorted in ascending order. + + +== See Also + +xref:str-ArrayQSort.adoc[`ArrayQSort`], +{sml-basis-url}/Basis/mono-array.html[`MONO_ARRAY`], +xref:fun-MonoArrayFn.adoc[`MonoArrayFn`], +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/fun-BSearchFn.adoc b/smlnj-lib/Doc/src/Util/fun-BSearchFn.adoc new file mode 100644 index 0000000..71584b0 --- /dev/null +++ b/smlnj-lib/Doc/src/Util/fun-BSearchFn.adoc @@ -0,0 +1,61 @@ += The `BSearchFn` functor +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `BSearchFn` functor provides binary search on sorted monomorphic +arrays. + +== Synopsis + +[source,sml] +------------ +functor BSearchFn (A : MONO_ARRAY) +------------ + +== Functor Argument Interface + +[source,sml] +------------ +A : MONO_ARRAY +------------ + +== Functor Argument Description + +`A : MONO_ARRAY`:: + A structure that implements the + {sml-basis-url}/mono-array.html[`MONO_ARRAY`] signature from + the {sml-basis-url}/index.html[*SML Basis Library*]. + +== Interface + +[source,sml] +------------ +structure A : MONO_ARRAY + +val bsearch : (('a * A.elem) -> order) -> ('a * A.array) -> (int * A.elem) option +------------ + +== Description + +`[.kw]#structure# A : {sml-basis-url}/Basis/mono-array.html[MONO_ARRAY]`:: + The array structure that defines the element and array types. + +`[.kw]#val# bsearch : (('a * A.elem) \-> order) \-> ('a * A.array) \-> (int * A.elem) option`:: + `bsearch cmp (key, arr)` returns `SOME(ix, elem)` where `A.sub(arr, ix)` is + `elem` and `cmp(key, elem)` returns `EQUAL`; if no such element is present, then + `NONE` is returned. This function uses a binary search over the array, + which requires that the elements be arranged in increasing order by the `cmp` + function. Usually, the type of the search key will be `A.elem`, but the + interface allows some computation on the elements, as long as the ordering + is respected. + +== See Also + +{sml-basis-url}/Basis/mono-array.html[`MONO_ARRAY`], +xref:fun-MonoArrayFn.adoc[`MonoArrayFn`], +xref:fun-ArrayQSortFn.adoc[`ArrayQSortFn`], +xref:smlnj-lib.adoc[__The Util Library__] + diff --git a/smlnj-lib/Doc/src/Util/fun-BinaryMapFn.adoc b/smlnj-lib/Doc/src/Util/fun-BinaryMapFn.adoc new file mode 100644 index 0000000..456a502 --- /dev/null +++ b/smlnj-lib/Doc/src/Util/fun-BinaryMapFn.adoc @@ -0,0 +1,31 @@ += The `BinaryMapFn` functor +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `BinaryMapFn` functor provides a balanced-binary-tree implementation of the +xref:sig-ORD_MAP.adoc[`ORD_MAP`] signature parameterized over the key type. + +The original implementation was written by Stephen Adams and was based +on the paper https://doi.org/10.1137/0202005[Binary Search Trees of Bounded Balance] +by Nievergelt and Reingold (SIAM Journal of Computing; March 1973). + +It is recommended, however, that one use the xref:fun-RedBlackMapFn.adoc[`RedBlackMapFn`] +functor instead of `BinaryMapFn`, since experimentation has shown it to be +faster across the board. + +== Synopsis + +[source,sml] +------------ +functor BinaryMapFn (K : ORD_KEY) :> ORD_MAP where type Key.ord_key = K.ord_key +------------ + +== See Also + +xref:sig-ORD_KEY.adoc[`ORD_KEY`], +xref:sig-ORD_MAP.adoc[`ORD_MAP`], +xref:fun-RedBlackMapFn.adoc[`RedBlackMapFn`], +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/fun-BinarySetFn.adoc b/smlnj-lib/Doc/src/Util/fun-BinarySetFn.adoc new file mode 100644 index 0000000..53b4cbe --- /dev/null +++ b/smlnj-lib/Doc/src/Util/fun-BinarySetFn.adoc @@ -0,0 +1,31 @@ += The `BinarySetFn` functor +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `BinarySetFn` functor provides a balanced-binary-tree implementation of the +xref:sig-ORD_SET.adoc[`ORD_SET`] signature parameterized over the element type. + +The original implementation was written by Stephen Adams and was based +on the paper https://doi.org/10.1137/0202005[Binary Search Trees of Bounded Balance] +by Nievergelt and Reingold (SIAM Journal of Computing; March 1973). + +It is recommended, however, that one use the xref:fun-RedBlackSetFn.adoc[`RedBlackSetFn`] +functor instead of `BinarySetFn`, since experimentation has shown it to be +faster across the board. + +== Synopsis + +[source,sml] +------------ +functor BinarySetFn (K : ORD_KEY) :> ORD_SET where type Key.ord_key = K.ord_key +------------ + +== See Also + +xref:sig-ORD_KEY.adoc[`ORD_KEY`], +xref:sig-ORD_SET.adoc[`ORD_SET`], +xref:fun-RedBlackSetFn.adoc[`RedBlackSetFn`], +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/fun-DynamicArrayFn.adoc b/smlnj-lib/Doc/src/Util/fun-DynamicArrayFn.adoc new file mode 100644 index 0000000..da9dcb2 --- /dev/null +++ b/smlnj-lib/Doc/src/Util/fun-DynamicArrayFn.adoc @@ -0,0 +1,135 @@ += The `DynamicArrayFn` functor +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `DynamicArrayFn` functor provides dynamically sized monomorphic +arrays. Each array has an associated _default_ value, which is +covers those elements that have not been explicitly initialized +(conceptually, one can view an array as having an infinite size). +Thus, reads from indices above the bound will return the default value. +The __bound__ of an array is the highest index of an initialized +element (or `~1` if there are no initialized elements). The +__defined range__ of the array are the elements in the positions +indexed from zero to the bound. + +== Synopsis + +[source,sml] +------------ +signature MONO_DYNAMIC_ARRAY +functor DynamicArrayFn (A : MONO_ARRAY) : MONO_DYNAMIC_ARRAY +------------ + +== Functor Argument Interface + +[source,sml] +------------ +A : MONO_ARRAY +------------ + +== Functor Argument Description + +`A : MONO_ARRAY`:: + A structure that implements the + {sml-basis-url}/mono-array.html[`MONO_ARRAY`] signature from + the {sml-basis-url}/index.html[*SML Basis Library*]. + +== Interface + +[source,sml] +------------ +type elem +type array + +val array : (int * elem) -> array + +val subArray : array * int * int -> array + +val fromList : elem list * elem -> array +val toList : array -> elem list + +val tabulate: int * (int -> elem) * elem -> array + +val default : array -> elem + +val sub : array * int -> elem + +val update : array * int * elem -> unit + +val bound : array -> int + +val truncate : array * int -> unit +------------ + +== Interface Description + +`[.kw]#type# elem`:: + The type of array elements. + +`[.kw]#type# array`:: + The type of dynamic arrays. + +`[.kw]#val# array : (int * elem) \-> array`:: + `array (sz, dflt)` returns a new array with bound `~1` and default + value `dflt`. The `sz` argument, which must be non-negative, is used + as a hint of the potential range of indices. This function raises + the {sml-basis-url}/general.html#SIG:GENERAL.Size:EXN:SPEC[`Size`] + exception if `sz < 0`. + +`[.kw]#val# subArray : array * int * int \-> array`:: + `subArray (arr, lo, hi)` returns a new array with the same default + as `arr`, and whose values in the range `[0, hi-lo]` are equal to + the values in `arr` in the range `[lo, hi]`. This function raises + the {sml-basis-url}/general.html#SIG:GENERAL.Size:EXN:SPEC[`Size`] + exception if `lo < 0` or `hi < lo-1`. + +`[.kw]#val# fromList : elem list * elem \-> array`:: + `fromList (lst, dflt)` returns a new array created from the elements + of `lst` and with default value `dflt`. The bound of the array will be + `length lst - 1`. + +`[.kw]#val# toList : 'a array \-> 'a list`:: + `toList arr` returns a list of the array's contents. The resulting + list will have the array's bound plus one elements. + +`[.kw]#val# tabulate: int * (int \-> elem) * elem \-> array`:: + `tabulate (sz, init, dflt)` returns a new array with the first + `sz` elements initialized using the function `init` and the + default value `dflt`. This function raises the + {sml-basis-url}/general.html#SIG:GENERAL.Size:EXN:SPEC[`Size`] + exception if `sz < 0`. + +`[.kw]#val# default : array \-> elem`:: + `default arr` returns the default value for the array. + +`[.kw]#val# sub : array * int \-> elem`:: + `sub (arr, ix)` returns the value of the array at index `ix`. + If that value has not been explicitly set, then it returns the array's + default value. This function raises the + {sml-basis-url}/general.html#SIG:GENERAL.Subscript:EXN:SPEC[`Subscript`] + exception if `ix < 0`. + +`[.kw]#val# update : array * int * elem \-> unit`:: + `update (arr, ix, v)` sets the value at index `ix` of the array to `v`. + If `ix` is greater than the current bound of the array, then the bound + is set to `ix`. This function raises the + {sml-basis-url}/general.html#SIG:GENERAL.Subscript:EXN:SPEC[`Subscript`] + exception if `ix < 0`. + +`[.kw]#val# bound : array \-> int`:: + `bound arr` returns the current bound of the array, which is the highest + index that has been explicitly set (__e.g.__, by `update`). + +`[.kw]#val# truncate : array * int \-> unit`:: + `truncate (arr, sz)` sets every entry with index greater or equal to + `sz` to the array's default value. + +== See Also + +xref:str-DynamicArray.adoc[`DynamicArray`], +{sml-basis-url}/Basis/mono-array.html[`MONO_ARRAY`], +xref:fun-MonoArrayFn.adoc[`MonoArrayFn`], +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/fun-GraphSCCFn.adoc b/smlnj-lib/Doc/src/Util/fun-GraphSCCFn.adoc new file mode 100644 index 0000000..72995b6 --- /dev/null +++ b/smlnj-lib/Doc/src/Util/fun-GraphSCCFn.adoc @@ -0,0 +1,80 @@ += The `GraphSCCFn` functor +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `GraphSCCFn` functor implements an algorithm for +calculating the strongly-connected components of a directed graph. +The resulting components are topologically-sorted; _i.e._, if a component +_A_ comes before a component _B_ in the result, then there is no +path from _B_ to _A_ (but there might be a path from _A_ to _B_). + +== Synopsis + +[source,sml] +------------ +functor GraphSCCFn (Nd: ORD_KEY) :> GRAPH_SCC where Nd = Nd +------------ + +== Arguments + +[source,sml] +------------ +Nd: ORD_KEY +------------ +* `Nd : xref:sig-ORD_KEY.adoc[ORD_KEY]`:: + The argument structure `Nd` defines the type of graph nodes paired with + a comparison function that is used by the algorithm to implement finite + maps keyed by nodes. + +== Interface + +[source,sml] +------------ +structure Nd : ORD_KEY + +type node = Nd.ord_key + +datatype component + = SIMPLE of node + | RECURSIVE of node list + +val topOrder' : { roots: node list, follow: node \-> node list } -> component list + +val topOrder : { root: node, follow: node \-> node list } -> component list +------------ + +== Description + +`structure Nd : ORD_KEY`:: + The argument structure. + +`[.kw]#type# node = Nd.ord_key`:: + The type of a node in the graph. + +`[.kw]#datatype# component`:: + The type of a component in the result. Components are either + `SIMPLE`, consisting of a single node, or `RECURSIVE`, consisting + of a list of nodes that are all connected by cyclic paths. + A single node with a self loop is represented by the `RECURSIVE` + constructor. + +`[.kw]#val# topOrder': { roots: node list, follow: node \-> node list } \-> component list`:: + `topOrder` {roots, follow}` returns a topologically-sorted list of + strongly-connected components for a directed graph. The graph is specified + by a list of root nodes and a _follow_ (or _successor_) function that returns + the list of successors for a node. The first component in the result will + contain the first node in the `roots` list. + +`[.kw]#val# topOrder : { root: node, follow: node \-> node list } \-> component list`:: + `topOrder {root, follow}` is equivalent to the expression +[source,sml] +------------ +topOrder' {roots = [root], follow = follow} +------------ + +== See Also + +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/fun-Hash2TableFn.adoc b/smlnj-lib/Doc/src/Util/fun-Hash2TableFn.adoc new file mode 100644 index 0000000..688cd6c --- /dev/null +++ b/smlnj-lib/Doc/src/Util/fun-Hash2TableFn.adoc @@ -0,0 +1,222 @@ += The `Hash2TableFn` functor +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `Hash2TableFn` functor provides hash tables that are keyed by two +different key types. Items are inserted with two keys, either of +which may be used to lookup the item. Essentially, it is a pair of +hash tables that are kept synchronized. + +The tables are implemented as an array of _buckets_, which are +lists of key-value pairs. The number of buckets grows with the number +of table entries. + +== Synopsis + +[source,sml] +------------ +functor Hash2TableFn ( + structure Key1 : HASH_KEY + structure Key2 : HASH_KEY + ) : MONO_HASH2_TABLE +------------ + +== Functor Argument Interface + +[source,sml] +------------ +structure Key1 : HASH_KEY +structure Key2 : HASH_KEY +------------ + +== Functor Argument Description + +`[.kw]#structure# Key1 : xref:sig-HASH_KEY.adoc[HASH_KEY]`:: + The argument structure that specifies the first key type + with its hashing and equality functions. + +`[.kw]#structure# Key2 : xref:sig-HASH_KEY.adoc[HASH_KEY]`:: + The substructure that specifies the second key type + with its hashing and equality functions. + +== Interface + +[source,sml] +------------ +structure Key1 : HASH_KEY +structure Key2 : HASH_KEY + +type 'a hash_table + +val mkTable : (int * exn) -> 'a hash_table + +val clear : 'a hash_table -> unit + +val insert : 'a hash_table -> (Key1.hash_key * Key2.hash_key * 'a) -> unit + +val inDomain1 : 'a hash_table -> Key1.hash_key -> bool +val inDomain2 : 'a hash_table -> Key2.hash_key -> bool + +val lookup1 : 'a hash_table -> Key1.hash_key -> 'a +val lookup2 : 'a hash_table -> Key2.hash_key -> 'a + +val find1 : 'a hash_table -> Key1.hash_key -> 'a option +val find2 : 'a hash_table -> Key2.hash_key -> 'a option + +val remove1 : 'a hash_table -> Key1.hash_key -> 'a +val remove2 : 'a hash_table -> Key2.hash_key -> 'a + +val numItems : 'a hash_table -> int + +val listItems : 'a hash_table -> 'a list +val listItemsi : 'a hash_table -> (Key1.hash_key * Key2.hash_key * 'a) list + +val app : ('a -> unit) -> 'a hash_table -> unit +val appi : ((Key1.hash_key * Key2.hash_key * 'a) -> unit) -> 'a hash_table + -> unit + +val map : ('a -> 'b) -> 'a hash_table -> 'b hash_table +val mapi : ((Key1.hash_key * Key2.hash_key * 'a) -> 'b) -> 'a hash_table + -> 'b hash_table + +val fold : (('a * 'b) -> 'b) -> 'b -> 'a hash_table -> 'b +val foldi : ((Key1.hash_key * Key2.hash_key * 'a * 'b) -> 'b) -> 'b + +val filter : ('a -> bool) -> 'a hash_table -> unit +val filteri : ((Key1.hash_key * Key2.hash_key * 'a) -> bool) -> 'a hash_table + -> unit + +val copy : 'a hash_table -> 'a hash_table + +val bucketSizes : 'a hash_table -> (int list * int list) +------------ + +== Interface Description + +`[.kw]#structure# Key1 : xref:sig-HASH_KEY.adoc[HASH_KEY]`:: + The substructure that specifies the first key type. + +`[.kw]#structure# Key2 : xref:sig-HASH_KEY.adoc[HASH_KEY]`:: + The substructure that specifies the second key type. + +`[.kw]#type# 'a hash_table`:: + The type of imperative hash tables indexed by the key types. + +`[.kw]#val# mkTable : (int * exn) \-> 'a hash_table`:: + `mkTable (n, ex)` creates a new hash table; the table will be initially + sized to hold at least `n` items. The exception `ex` is raised by the + lookup and remove functions (described below) + when the search key is not in the domain. + +`[.kw]#val# clear : 'a hash_table \-> unit`:: + `clear tbl` removes all of the entries in the table. + +`[.kw]#val# insert : 'a hash_table \-> (Key1.hash_key * Key2.hash_key * 'a) \-> unit`:: + `insert tbl (key1, key2, item)` inserts a mappings from `key1` and `key2` + to `item` into `tbl`. Any existing mapping of the keys is discarded. + +`[.kw]#val# inDomain1 : 'a hash_table \-> Key1.hash_key \-> bool`:: + `inDomain1 tbl key` returns `true` if, and only if, `key` is in the + first domain of the table + +`[.kw]#val# inDomain2 : 'a hash_table \-> Key2.hash_key \-> bool`:: + `inDomain2 tbl key` returns `true` if, and only if, `key` is in the + second domain of the table + +[[val:lookup1]] +`[.kw]#val# lookup1 : 'a hash_table \-> Key1.hash_key \-> 'a`:: + `lookup1 tbl key` returns the item that `key` maps to if `key` is in + the first mapping of `tbl`. Otherwise, the table's exception is raised. + +[[val:lookup2]] +`[.kw]#val# lookup2 : 'a hash_table \-> Key2.hash_key \-> 'a`:: + `lookup2 tbl key` returns the item that `key` maps to if `key` is in + the second mapping of `tbl`. Otherwise, the table's exception is raised. + +`[.kw]#val# find1 : 'a hash_table \-> Key1.hash_key \-> 'a option`:: + `find1 tbl key` returns the `SOME v` if `key` is in the first domain + of `tbl` and is mapped to `v`. Otherwise, it returns `NONE`. + +`[.kw]#val# find2 : 'a hash_table \-> Key2.hash_key \-> 'a option`:: + `find2 tbl key` returns the `SOME v` if `key` is in the second domain + of `tbl` and is mapped to `v`. Otherwise, it returns `NONE`. + +[[val:remove1]] +`[.kw]#val# remove1 : 'a hash_table \-> Key1.hash_key \-> 'a`:: + `remove1 tbl key1` returns the item that `key1` maps to if `key1` is in + the first mapping of `tbl`. Furthermore, if the item was inserted with + keys `key1` and `key2`, then `key1` is removed from the first mapping + and `key2` is removed from the second mapping. If `key1` is not in the + first domain of the table, then the table's exception is raised. + +[[val:remove2]] +`[.kw]#val# remove2 : 'a hash_table \-> Key2.hash_key \-> 'a`:: + `remove2 tbl key2` returns the item that `key2` maps to if `key2` is in + the second mapping of `tbl`. Furthermore, if the item was inserted with + keys `key1` and `key2`, then `key1` is removed from the first mapping + and `key2` is removed from the second mapping. If `key2` is not in the + second domain of the table, then the table's exception is raised. + +`[.kw]#val# numItems : 'a hash_table \-> int`:: + `numItems tbl` returns the number of entries in the table. + +`[.kw]#val# listItems : 'a hash_table \-> 'a list`:: + `listItems tbl` returns a list of the items in the range of `tbl`. + +`[.kw]#val# listItemsi : 'a hash_table \-> (Key1.hash_key * Key2.hash_key * 'a) list`:: + `listItemsi tbl` returns a list of the `(key1, key2, item)` triples that + are in `tbl`. + +`[.kw]#val# app : ('a \-> unit) \-> 'a hash_table \-> unit`:: + `app f tbl` applies the function `f` to each item in `tbl`. + +`[.kw]#val# appi : ((Key1.hash_key * Key2.hash_key * 'a) \-> unit) \-> 'a hash_table`:: + `appi f tbl` applies the function `f` to each `(key1, key2, item)` triple in `tbl`. + +`[.kw]#val# map : ('a \-> 'b) \-> 'a hash_table \-> 'b hash_table`:: + `map f tbl` creates a new table with an entry `(key1, key2, f item)` + in the new table for every `(key1, key2, item)` triple in `tbl`. + The exception for the new table is copied from `tbl`. + +`[.kw]#val# mapi : ((Key1.hash_key * Key2.hash_key * 'a) \-> 'b) \-> 'a hash_table \-> 'b hash_table`:: + `mapi f tbl` creates a new table with an entry `(key1, key2, f(key1, key2, item))` + in the new table for every `(key1, key2, item)` triple in `tbl`. + The exception for the new table is copied from `tbl`. + +`[.kw]#val# fold : (('a * 'b) \-> 'b) \-> 'b \-> 'a hash_table \-> 'b`:: + `fold f init tbl` folds the function `f` over the items in the range of `tbl` + using `init` as an initial value. + +`[.kw]#val# foldi : ((Key1.hash_key * Key2.hash_key * 'a * 'b) \-> 'b) \-> 'b \-> 'a hash_table \-> 'b`:: + `foldi f init tbl` folds the function `f` over the `(key1, key2, item)` + triples in `tbl` using `init` as an initial value. + +`[.kw]#val# filter : ('a \-> bool) \-> 'a hash_table \-> unit`:: + `filter pred tbl` removes any entry `(key1, key2, item)` from `tbl` for which + `pred item` returns `false`. + +`[.kw]#val# filteri : ((Key1.hash_key * Key2.hash_key * 'a) \-> bool) \-> 'a hash_table \-> unit`:: + `filteri pred tbl` removes any entry `(key1, key2, item)` from `tbl` for which + `pred(key1, key2, item)` returns `false`. + +`[.kw]#val# copy : 'a hash_table \-> 'a hash_table`:: + `copy tbl` creates a copy of `tbl`. This expression is equivalent to ++ +[source,sml] +------------ +map (fn x => x) tbl +------------ + +`[.kw]#val# bucketSizes : 'a hash_table \-> (int list * int list)`:: + `bucketSizes tbl` returns a list of the current number of items per + bucket for each of the tables. This function allows users to gauge + the quality of their hashing functions. + +== See Also + +xref:sig-HASH_KEY.adoc[`HASH_KEY`], +xref:fun-HashTableFn.adoc[`HashTableFn`], +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/fun-HashSetFn.adoc b/smlnj-lib/Doc/src/Util/fun-HashSetFn.adoc new file mode 100644 index 0000000..6036978 --- /dev/null +++ b/smlnj-lib/Doc/src/Util/fun-HashSetFn.adoc @@ -0,0 +1,225 @@ += The `HashSetFn` functor +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `HashSetFn` functor provides a hash-table-based implementation +of imperative sets parameterized over a `Key` structure. + +== Synopsis + +[source,sml] +------------ +signature MONO_HASH_SET +functor HashSetFn (Key : HASH_KEY) : MONO_HASH_SET +------------ + +== Functor Argument Interface + +[source,sml] +------------ +Key : HASH_KEY +------------ + +== Functor Argument Description + +`Key : xref:sig:HASH_KEY.adoc[HASH_KEY]`:: + A structure that implements the xref:sig:HASH_KEY.adoc[HASH_KEY]` + signature, where xref:sig:HASH_KEY.adoc#type:hash_key[`Key.hash_key`] + will be the type of the elements in the hash set. + +== Interface + +[source,sml] +------------ +structure Key : HASH_KEY + +type item = Key.hash_key +type set + +val mkEmpty : int -> set + +val mkSingleton : item -> set + +val mkFromList : item list -> set + +val toList : set -> item list + +val add : set * item -> unit +val addc : set -> item -> unit + +val addList : set * item list -> unit + +val subtract : set * item -> unit +val subtractc : set -> item -> unit + +val subtractList : set * item list -> unit + +val delete : set * item -> bool + +val member : set * item -> bool + +val isEmpty : set -> bool + +val isSubset : (set * set) -> bool + +val numItems : set -> int + +val map : (item -> item) -> set -> set +val mapPartial : (item -> item option) -> set -> set +val app : (item -> unit) -> set -> unit +val fold : (item * 'b -> 'b) -> 'b -> set -> 'b + +val partition : (item -> bool) -> set -> (set * set) + +val filter : (item -> bool) -> set -> unit + +val exists : (item -> bool) -> set -> bool +val all : (item -> bool) -> set -> bool + +val find : (item -> bool) -> set -> item option + +val listItems : set -> item list +val without : set * item -> unit +------------ + +== Interface Description + +`[.kw]#structure# Key : HASH_KEY`:: + This substructure is the argument structure, which defines the type + of set elements, and hash and equality functions on the key type. + +`[.kw]#type# item = Key.hash_key`:: + The type of items in the sets. + +`[.kw]#type# set`:: + The type of imperative sets of items. + +`[.kw]#val# mkEmpty : int \-> set`:: + `mkEmpty n` creates an empty set that has initial space to store + at least `n` items. + +`[.kw]#val# mkSingleton : item \-> set`:: + `mkSingleton item` creates a set with `item` as its only initial element. + +`[.kw]#val# mkFromList : item list \-> set`:: + `mkFromList items` creates a set with `items` as its initial elements. + +[[val:toList]] +`[.kw]#val# toList : set \-> item list`:: + `toList set` returns a list of the items in `set`. + +`[.kw]#val# add : set * item \-> unit`:: + `add (set, item)` destructively adds the item to the set. + +`[.kw]#val# addc : set \-> item \-> unit`:: + `addc set item` destructively adds the item to the set. + +`[.kw]#val# addList : set * item list \-> unit`:: + `addList (set, items)` destructively adds the list of items to the set. + +[[val:subtract]] +`[.kw]#val# subtract : set * item \-> unit`:: + `subtract (set, item)` removes the object `item` from `set`; it has no + effect if `item` is not in `set`. + +`[.kw]#val# subtractc : set \-> item \-> unit`:: + `subtractc set item` removes the object `item` from `set`; it has no + effect if `item` is not in `set`. + +`[.kw]#val# subtractList : set \-> item list \-> unit`:: + `subtractList set items` removes the `items` from `set`. This expression + is equivalent to ++ +[source,sml] +------------ +List.app (subtractc set) items +------------ + +`[.kw]#val# delete : set * item \-> bool`:: + `subtract (set, item)` removes the object `item` from `set` (if present) + and returns `true` if the item was removed and `false` if it was not + present. + +`[.kw]#val# member : set * item \-> bool`:: + `member (item, set)` returns `true` if, and only if, `item` + is an element of `set`. + +`[.kw]#val# isEmpty : set \-> bool`:: + `isEmpty set` returns true if, and only if, `set` is empty. + +`[.kw]#val# isSubset : (set * set) \-> bool`:: + `isSubset (set1, set2)` returns true if, and only if, `set1` + is a subset of `set2` (_i.e._, any element of `set1` is an + element of `set2`). + +`[.kw]#val# numItems : set \-> int`:: + `numItems set` returns the number of items in the `set`. + +`[.kw]#val# map : (item \-> item) \-> set \-> set`:: + `map f set` creates a new set from the result of applying the + function `f` to the elements of `set`. This expression is + equivalent to ++ +[source,sml] +------------ +mkFromList (List.map f (toList set)) +------------ + +`[.kw]#val# mapPartial : (item \-> item option) \-> set \-> set`:: + `mapPartial f set` creates a new set from the result of applying the + partial function `f` to the elements of `set`. This expression is + equivalent to ++ +[source,sml] +------------ +mkFromList (List.mapPartial f (toList set)) +------------ + +`[.kw]#val# app : (item \-> unit) \-> set \-> unit`:: + `app f set` applies the function `f` to the items in `set`. + +`[.kw]#val# fold : (item * 'b \-> 'b) \-> 'b \-> set \-> 'b`:: + `foldl f init set` folds the function `f` over the items in + `set` using `init` as the initial value. + +`[.kw]#val# partition : (item \-> bool) \-> set \-> (set * set)`:: + `partition pred set` returns a pair of disjoint sets `(tSet, fSet)`, + where the predicate `pred` returns true for every element of `tSet`, + `false` for every element of `fSet`, and `set` is the union of `tSet` + and `fSet`. + +`[.kw]#val# filter : (item \-> bool) \-> set \-> unit`:: + `filter pred set` removes any elements of set for which the + predicate `pred` returns false. + +`[.kw]#val# exists : (item \-> bool) \-> set \-> bool`:: + `all pred set` returns `true` if, and only if, `pred item` returns + true for all elements `item` in `set`. Elements are checked in + an undefined order. + +`[.kw]#val# all : (item \-> bool) \-> set \-> bool`:: + `exists pred set` returns `true` if, and only if, there exists an + element `item` in `set` such that `pred item` returns `true`. + Elements are checked in an undefined order. + +`[.kw]#val# find : (item \-> bool) \-> set \-> item option`:: + `find pred set` returns `SOME item` if there exists an object `item` + in the set for which `pred item` returns `true`; otherwise `NONE` is + returned. Items are tested in an undefined order. + +=== Deprecated functions + +`[.kw]#val# without : set * item \-> unit`:: + Use xref:#val:subtract[`subtract`] instead. + +`[.kw]#val# listItems : set \-> item list`:: + Use xref:#val:toList[`toList`] instead. + +== See Also + +xref:sig-HASH_KEY.adoc[`HASH_KEY`], +xref:sig-ORD_SET.adoc[`ORD_SET`], +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/fun-HashTableFn.adoc b/smlnj-lib/Doc/src/Util/fun-HashTableFn.adoc new file mode 100644 index 0000000..e91f892 --- /dev/null +++ b/smlnj-lib/Doc/src/Util/fun-HashTableFn.adoc @@ -0,0 +1,28 @@ += The `HashTableFn` functor +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `HashTableFn` functor provides an implementation of the +xref:sig-MONO_HASH_TABLE.adoc[`MONO_HASH_TABLE`] signature parameterized +over the key type. + +The tables are implemented as an array of _buckets_, which are +lists of key-value pairs. The number of buckets grows with the number +of table entries. + +== Synopsis + +[source,sml] +------------ +functor HashTableFn (Key : HASH_KEY) : MONO_HASH_TABLE +------------ + +== See Also + +xref:sig-HASH_KEY.adoc[`HASH_KEY`], +xref:str-HashTable.adoc[`HashTable`], +xref:sig-MONO_HASH_TABLE.adoc[`MONO_HASH_TABLE`], +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/fun-IntervalSetFn.adoc b/smlnj-lib/Doc/src/Util/fun-IntervalSetFn.adoc new file mode 100644 index 0000000..53a90f6 --- /dev/null +++ b/smlnj-lib/Doc/src/Util/fun-IntervalSetFn.adoc @@ -0,0 +1,281 @@ += The `IntervalSetFn` functor +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `IntervalSetFn` functor provides sets over a discrete ordered domain, +where the sets are represented by intervals. It is meant for representing +dense sets (__e.g.__, unicode character classes). + +== Synopsis + +[source,sml] +------------ +signature INTERVAL_SET +functor IntervalSetFn (D : INTERVAL_DOMAIN) : INTERVAL_SET +------------ + +== Functor Argument Description + +`D : xref:sig-INTERVAL_DOMAIN.adoc[INTERVAL_DOMAIN]`:: + The argument defines the type of points in the domain and + their order structure. + +== Interface + +[source,sml] +------------ +structure D : INTERVAL_DOMAIN + +type item = D.point +type interval = (item * item) +type set + +val empty : set +val universe : set + +val singleton : item -> set + +val interval : item * item -> set + +val isEmpty : set -> bool +val isUniverse : set -> bool + +val member : set * item -> bool + +val items : set -> item list + +val intervals : set -> interval list + +val add : set * item -> set +val add' : item * set -> set + +val addInt : set * interval -> set +val addInt' : interval * set -> set + +val complement : set -> set +val union : (set * set) -> set +val intersect : (set * set) -> set +val difference : (set * set) -> set + +val app : (item -> unit) -> set -> unit +val foldl : (item * 'a -> 'a) -> 'a -> set -> 'a +val foldr : (item * 'a -> 'a) -> 'a -> set -> 'a +val filter : (item -> bool) -> set -> set +val exists : (item -> bool) -> set -> bool +val all : (item -> bool) -> set -> bool + +val appInt : (interval -> unit) -> set -> unit +val foldlInt : (interval * 'a -> 'a) -> 'a -> set -> 'a +val foldrInt : (interval * 'a -> 'a) -> 'a -> set -> 'a +val filterInt : (interval -> bool) -> set -> set +val existsInt : (interval -> bool) -> set -> bool +val allInt : (interval -> bool) -> set -> bool + +val compare : set * set -> order +val isSubset : set * set -> bool +------------ + +== Interface Description + +`[.kw]#structure# D : INTERVAL_DOMAIN`:: + The argument structure. + +`[.kw]#type# item = D.point`:: + The type of items in the set. + +`[.kw]#type# interval = (item * item)`:: + A collection of items defined by an interval. + +`[.kw]#type# set`:: + The type of a set of items. + +`[.kw]#val# empty : set`:: + The empty set. + +`[.kw]#val# universe : set`:: + The set of all elements in the domain, which is specified as the + interval `(xref:sig-INTERVAL_DOMAIN.adoc#val:minPt[D.minPt], xref:sig-INTERVAL_DOMAIN.adoc#val:maxPt[D.maxPt])`. + +`[.kw]#val# singleton : item \-> set`:: + `singleton item` returns the singleton set containing `item`. + +`[.kw]#val# fromList : item list \-> set`:: + `fromList items` returns the set containing the list of items. + +`[.kw]#val# interval : item * item \-> set`:: + `singleton (pt1, pt2)` returns a set containing the items between + the items `pt1` and `pt2` (as ordered by + xref:sig-INTERVAL_DOMAIN.adoc#val:compare[`D.compare`]). + This expression raises the + {sml-basis-url}/general.html#SIG:GENERAL.Domain:EXN[`Domain`] exception + if `D.compare(pt1, pt2) = GREATER`. + +`[.kw]#val# isEmpty : set \-> bool`:: + `isEmpty set` returns `true` if, and only if, `set` is empty. + +`[.kw]#val# isUniverse : set \-> bool`:: + `isUniverse set` returns `true` if, and only if, `set` contains all of + the elements of the domain. + +`[.kw]#val# member : set * item \-> bool`:: + `isEmpty (set, item)` returns `true` if, and only if, `item` is contained + in `set`. + +[[val:toList]] +`[.kw]#val# toList : set \-> item list`:: + `toList set` returns a list of the items in `set`. The items will be + sorted in increasing order. + +`[.kw]#val# intervals : set \-> interval list`:: + `intervals set` returns a list of disjoint intervals that represents + the set. The intervals will be sorted in increasing order. + +`[.kw]#val# add : set * item \-> set`:: + `add (set, item)` adds `item` to `set` and returns the resulting set. + +`[.kw]#val# add' : item * set \-> set`:: + `add' (item, set)` adds `item` to `set` and returns the resulting set. + + (* add an interval to the set *) +`[.kw]#val# addInt : set * interval \-> set`:: + `addInt (set, (pt1, pt2))` adds the items between the items `pt1` and `pt2` + (as ordered by xref:sig-INTERVAL_DOMAIN.adoc#val:compare[`D.compare`]) + to `set`. This expression raises the + {sml-basis-url}/general.html#SIG:GENERAL.Domain:EXN[`Domain`] exception + if `D.compare(pt1, pt2) = GREATER`. + +`[.kw]#val# addInt' : interval * set \-> set`:: + `addInt' ((pt1, pt2), set)` adds the items between the items `pt1` and `pt2` + (as ordered by xref:sig-INTERVAL_DOMAIN.adoc#val:compare[`D.compare`]) + to `set`. This expression raises the + {sml-basis-url}/general.html#SIG:GENERAL.Domain:EXN[`Domain`] exception + if `D.compare(pt1, pt2) = GREATER`. + +`[.kw]#val# complement : set \-> set`:: + `complement set` returns the complement of `set` (_i.e._, the set of + items from the universe that are *not* in `set`). + +`[.kw]#val# union : (set * set) \-> set`:: + `union (set1, set2)` returns the union of `set1` and `set2`; + (_i.e._, the set of items that are in `set1` or in `set2`). + +`[.kw]#val# intersect : (set * set) \-> set`:: + `intersect (set1, set2)` returns the intersection of `set1` and `set2`; + (_i.e._, the set of items that are in both `set1` and`set2`). + +`[.kw]#val# difference : (set * set) \-> set`:: + `difference (set1, set2)` returns the set difference of `set1` and `set2`; + (_i.e._, the set of items that are in `set1`, but not in `set2`). + +`[.kw]#val# app : (item \-> unit) \-> set \-> unit`:: + `app f set` applies the function `f` to the items in `set`. + This expression is equivalent to ++ +[source,sml] +------------ +List.app f (toList set) +------------ + +`[.kw]#val# foldl : (item * 'a \-> 'a) \-> 'a \-> set \-> 'a`:: + `foldl f init set` folds the function `f` over the items in + `set` in increasing order using `init` as the initial value. + This expression is equivalent to ++ +[source,sml] +------------ +List.foldl f init (toList set) +------------ + +`[.kw]#val# foldr : (item * 'a \-> 'a) \-> 'a \-> set \-> 'a`:: + `foldr f init set` folds the function `f` over the items in + `set` in decreasing order using `init` as the initial value. + This expression is equivalent to ++ +[source,sml] +------------ +List.foldr f init (toList set) +------------ + +`[.kw]#val# filter : (item \-> bool) \-> set \-> set`:: + `filter pred set` filters out any items of set for which the + predicate `pred` returns false. + +`[.kw]#val# exists : (item \-> bool) \-> set \-> bool`:: + `exists pred set` returns `true` if, and only if, there is an item + in the set for which `pred` returns `true`. This function + short-circuits evaluation once an item is encountered for which + `pred` returns `true`. + +`[.kw]#val# all : (item \-> bool) \-> set \-> bool`:: + `all pred set` returns `true` if, and only if, `pred` returns `true` + for all items in the set. This function short-circuits evaluation + once an item is encountered for which `pred` returns `false`. + +`[.kw]#val# appInt : (interval \-> unit) \-> set \-> unit`:: + `appInt f set` applies the function `f` to the intervals in `set`. + This expression is equivalent to ++ +[source,sml] +------------ +List.app f (intervals set) +------------ + +`[.kw]#val# foldlInt : (interval * 'a \-> 'a) \-> 'a \-> set \-> 'a`:: + `foldlInt f init set` folds the function `f` over the intervals in + `set` in increasing order using `init` as the initial value. + This expression is equivalent to ++ +[source,sml] +------------ +List.foldl f init (intervals set) +------------ + +`[.kw]#val# foldrInt : (interval * 'a \-> 'a) \-> 'a \-> set \-> 'a`:: + `foldrInt f init set` folds the function `f` over the intervals in + `set` in decreasing order using `init` as the initial value. + This expression is equivalent to ++ +[source,sml] +------------ +List.foldr f init (intervals set) +------------ + +`[.kw]#val# filterInt : (interval \-> bool) \-> set \-> set`:: + `filterInt pred set` filters out any intervals of set for which the + predicate `pred` returns false. + +`[.kw]#val# existsInt : (interval \-> bool) \-> set \-> bool`:: + `existsInt pred set` returns `true` if, and only if, there is an interval + in the set for which `pred` returns `true`. This function + short-circuits evaluation once an interval is encountered for which + `pred` returns `true`. + +`[.kw]#val# allInt : (interval \-> bool) \-> set \-> bool`:: + `allInt pred set` returns `true` if, and only if, `pred` returns `true` + for all of the intervals in the set. This function short-circuits evaluation + once an interval is encountered for which `pred` returns `false`. + +`[.kw]#val# compare : set * set \-> order`:: + `compare (set1, set2)` returns the lexical order of + the two sets. + +`[.kw]#val# isSubset : set * set \-> bool`:: + `isSubset (set1, set2)` returns true if, and only if, `set1` + is a subset of `set2` (_i.e._, any element of `set1` is an + element of `set2`). + +=== Deprecated functions + +The following functions are part of the interface, but have been +deprecated. + +`[.kw]#val# items : set \-> item list``:: + Use xref:#val:toList[`toList`] instead. + +== See Also + +xref:sig-INTERVAL_DOMAIN.adoc[`INTERVAL_DOMAIN`], +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/fun-KeywordFn.adoc b/smlnj-lib/Doc/src/Util/fun-KeywordFn.adoc new file mode 100644 index 0000000..07e290f --- /dev/null +++ b/smlnj-lib/Doc/src/Util/fun-KeywordFn.adoc @@ -0,0 +1,82 @@ += The `KeywordFn` functor +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `KeywordFn` functor provides a simple way to support a table of keyword +(or reserved) identifiers in a scanner. + +== Synopsis + +[source,sml] +------------ +functor KeywordFn () +------------ + +== Functor Argument Interface + +[source,sml] +------------ +type token +type pos +val ident : (Atom.atom * pos * pos) -> token +val keywords : (string * ((pos * pos) -> token)) list +------------ + +== Functor Argument Description + +`[.kw]#type# token`:: + The type of tokens in the scanner. + +`[.kw]#type# pos`:: + The type of source file positions used by the scanner (_e.g._, character + positions in the source file). + +[[val:ident]] +`[.kw]#val# ident : (Atom.atom * pos * pos) \-> token`:: + `ident (id, pos, pos)` is used to create an identifier token (_i.e._, non-keyword) + for the given string, and start and end file positions. + +[[val:keywords]] +`[.kw]#val# keywords : (string * ((pos * pos) \-> token)) list`:: + A list of string-function pairs, where the strings are the keywords and + the functions are used to create the corresponding scanner tokens from + start and end file positions. + +== Interface + +[source,sml] +------------ + type token + type pos + val keyword : (string * pos * pos) -> token +------------ + +== Interface Description + +`[.kw]#type# token`:: + The type of tokens in the scanner. + +`[.kw]#type# pos`:: + The type of source file positions used by the scanner. + +`[.kw]#val# keyword : (string * pos * pos) -> token`:: + `keyword (id, p1, p2)` returns the token for the identifier `id`, + which is either one of the keyword tokens from the + xref:#val:keywords[`keywords`] list or otherwise is an identifier + token created using the xref:#val:ident[`ident`] function. + +== Discussion + +This functor was designed for the https://smlnj.org/doc/ML-Yacc/index.html[*ml-yacc*] +scanner interface, where tokens contain their file position. It is not clear that +it adds much utility over just using the xref:str-Atom.adoc[`atom`] type, but +is maintained for backward compatibility. + +== See Also + +xref:str-Atom.adoc[`Atom`], +xref:sig-MONO_HASH_TABLE.adoc#str:AtomTable[`AtomTable`], +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/fun-LeftPriorityQFn.adoc b/smlnj-lib/Doc/src/Util/fun-LeftPriorityQFn.adoc new file mode 100644 index 0000000..3e5138f --- /dev/null +++ b/smlnj-lib/Doc/src/Util/fun-LeftPriorityQFn.adoc @@ -0,0 +1,128 @@ += The `MONO_PRIORITYQ` signature +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `LeftPriorityQFn` functor provides a functional implementation of priority +queues using leaftist heaps. + +== Synopsis + +[source,sml] +------------ +signature PRIORITY +signature MONO_PRIORITYQ +functor LeftPriorityQFn (P : PRIORITY) : MONO_PRIORITYQ +------------ + +== Functor Argument Interface + +[source,sml] +------------ +type priority +val compare : (priority * priority) -> order + +type item +val priority : item -> priority +------------ + +== Functor Argument Description + +`[.kw]#type# priority`:: + The abstract type of priority values. + +`[.kw]#val# compare : (priority * priority) \-> order`:: + `compare (pri1, pri2)` returns the order of the two priority values. + +`[.kw]#type# item`:: + The type of items in the priority queue. + +`[.kw]#val# priority : item \-> priority`:: + `priority item` returns the priority value for `item`. + +== Interface + +[source,sml] +------------ +type item +type queue + +val empty : queue + +val singleton : item -> queue + +val fromList : item list -> queue + +val insert : (item * queue) -> queue + +val remove : queue -> (item * queue) + +val next : queue -> (item * queue) option + +val findAndRemove : queue * (item -> bool) -> (item * queue) option + +val delete : queue * (item -> bool) -> queue + +val merge : (queue * queue) -> queue + +val numItems : queue -> int + +val isEmpty : queue -> bool +------------ + +== Interface Description + +`[.kw]#type# item`:: + The type of items in the priority queue. + +`[.kw]#type# queue`:: + The priority queue type. + +`[.kw]#val# empty : queue`:: + The empty priority queue. + +`[.kw]#val# singleton : item \-> queue`:: + `singleton item` returns a queue containing just `item`. + +`[.kw]#val# fromList : item list \-> queue`:: + `fromList items` returns a queue containing the `items`. + +`[.kw]#val# insert : (item * queue) \-> queue`:: + `insert (pq, item)` returns the queue that is `pq` with `item` added. + +`[.kw]#val# remove : queue \-> (item * queue)`:: + `remove pq` returns `(item, pq')`, where `item` is the highest priority item + in `pq` and ``pq'`` is the result of removing `item` from `pq`. This function + raises the {sml-basis-url}/list.html#SIG:LIST.Empty:EXN[Empty] exception + when `pq` is empty. + +`[.kw]#val# next : queue \-> (item * queue) option`:: + `remove pq` returns `SOME(item, pq')`, where `item` is the highest + priority item in `pq` and ``pq'`` is the result of removing `item` + from `pq`. If `pq` is empty, then `NONE` is returned. + +`[.kw]#val# findAndRemove : queue * (item -> bool) -> (item * queue) option`:: + `findAndRemove (pq, pred)` returns `SOME(item, pq')`, where `item` is the + highest priority item in `pq` such that `pred item` returns `true`, and + and ``pq'`` is the result of removing `item` from `pq`. If no such item + exists, then `NONE` is returned. + +`[.kw]#val# delete : queue * (item -> bool) -> queue`:: + `delete (pq, pred)` deletes any item in `pq` that satisfies the predicate + and returns the resulting queue. + +`[.kw]#val# merge : (queue * queue) \-> queue`:: + `merge (pq1, pq2)` returns the priority queue formed by merging the items in + the two queues. + +`[.kw]#val# numItems : queue \-> int`:: + `numItems pq` returns the number of items in `pq`. + +`[.kw]#val# isEmpty : queue \-> bool`:: + `isEmpty pq` returns `true` if, and only if, `pq` is empty. + +== See Also + +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/fun-ListMapFn.adoc b/smlnj-lib/Doc/src/Util/fun-ListMapFn.adoc new file mode 100644 index 0000000..68b58c3 --- /dev/null +++ b/smlnj-lib/Doc/src/Util/fun-ListMapFn.adoc @@ -0,0 +1,26 @@ += The `ListMapFn` functor +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `ListMapFn` functor provides a sorted-list implementation of the +xref:sig-ORD_MAP.adoc[`ORD_MAP`] signature parameterized over the key type. +This implementation is light weight and fast for small domains, but +for larger applications, it is recommended that one use the +xref:fun-RedBlackMapFn.adoc[`RedBlackMapFn`] functor instead. + +== Synopsis + +[source,sml] +------------ +functor ListMapFn (K : ORD_KEY) :> ORD_MAP where type Key.ord_key = K.ord_key +------------ + +== See Also + +xref:sig-ORD_KEY.adoc[`ORD_KEY`], +xref:sig-ORD_MAP.adoc[`ORD_MAP`], +xref:fun-RedBlackMapFn.adoc[`RedBlackMapFn`], +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/fun-ListSetFn.adoc b/smlnj-lib/Doc/src/Util/fun-ListSetFn.adoc new file mode 100644 index 0000000..6122e80 --- /dev/null +++ b/smlnj-lib/Doc/src/Util/fun-ListSetFn.adoc @@ -0,0 +1,25 @@ += The `ListSetFn` functor +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `ListSetFn` functor provides a sorted-list implementation of the +xref:sig-ORD_SET.adoc[`ORD_SET`] signature parameterized over the element type. +This implementation is light weight and fast for small sets, but +for larger applications, it is recommended that one use the +xref:fun-RedBlackSetFn.adoc[`RedBlackSetFn`] functor instead. + +== Synopsis + +[source,sml] +------------ +functor ListSetFn (K : ORD_KEY) :> ORD_SET where type Key.ord_key = K.ord_key +------------ + +== See Also + +xref:sig-ORD_KEY.adoc[`ORD_KEY`], +xref:sig-ORD_SET.adoc[`ORD_SET`], +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/fun-MonoArrayFn.adoc b/smlnj-lib/Doc/src/Util/fun-MonoArrayFn.adoc new file mode 100644 index 0000000..c934656 --- /dev/null +++ b/smlnj-lib/Doc/src/Util/fun-MonoArrayFn.adoc @@ -0,0 +1,33 @@ += The `MonoArrayFn` functor +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `MonoArrayFn` functor allows easy construction of new monomorphic array +structures. + +== Synopsis + +[source,sml] +------------ +functor MonoArrayFn (type elem) :> MONO_ARRAY where type elem = elem +------------ + +== Description + +This functor takes an element type and defines a structure that +matches the +{sml-basis-url}/mono-array.html[`MONO_ARRAY`] +signature from the {sml-basis-url}/Basis/index.html[*SML Basis Library*], +which can then be supplied to other functors that require such +an argument. + +== See Also + +xref:fun-ArrayQSortFn.adoc[`ArrayQSortFn`], +xref:fun-BSearchFn.adoc[`BSearchFn`], +xref:fun-DynamicArrayFn.adoc[`DynamicArrayFn`], +{sml-basis-url}/Basis/mono-array.html[`MONO_ARRAY`], +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/fun-RedBlackMapFn.adoc b/smlnj-lib/Doc/src/Util/fun-RedBlackMapFn.adoc new file mode 100644 index 0000000..83e469b --- /dev/null +++ b/smlnj-lib/Doc/src/Util/fun-RedBlackMapFn.adoc @@ -0,0 +1,25 @@ += The `RedBlackMapFn` functor +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `RedBlackMapFn` functor provides a red-black-tree implementation of the +xref:sig-ORD_MAP.adoc[`ORD_MAP`] signature parameterized over the key type. + +This functor is the preferred implementation of finite maps over ordered +keys as it is faster than the other implementations. + +== Synopsis + +[source,sml] +------------ +functor RedBlackMapFn (K : ORD_KEY) :> ORD_MAP where type Key.ord_key = K.ord_key +------------ + +== See Also + +xref:sig-ORD_KEY.adoc[`ORD_KEY`], +xref:sig-ORD_MAP.adoc[`ORD_MAP`], +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/fun-RedBlackSetFn.adoc b/smlnj-lib/Doc/src/Util/fun-RedBlackSetFn.adoc new file mode 100644 index 0000000..3ac177e --- /dev/null +++ b/smlnj-lib/Doc/src/Util/fun-RedBlackSetFn.adoc @@ -0,0 +1,25 @@ += The `RedBlackSetFn` functor +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `RedBlackSetFn` functor provides a red-black-tree implementation of the +xref:sig-ORD_SET.adoc[`ORD_SET`] signature parameterized over the element type. + +This functor is the preferred implementation of finite sets of ordered +elements as it is faster than the other implementations. + +== Synopsis + +[source,sml] +------------ +functor RedBlackSetFn (K : ORD_KEY) :> ORD_SET where type Key.ord_key = K.ord_key +------------ + +== See Also + +xref:sig-ORD_KEY.adoc[`ORD_KEY`], +xref:sig-ORD_SET.adoc[`ORD_SET`], +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/fun-SplayMapFn.adoc b/smlnj-lib/Doc/src/Util/fun-SplayMapFn.adoc new file mode 100644 index 0000000..b2da315 --- /dev/null +++ b/smlnj-lib/Doc/src/Util/fun-SplayMapFn.adoc @@ -0,0 +1,27 @@ += The `SplayMapFn` functor +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `SplayMapFn` functor provides a splay-tree implementation of the +xref:sig-ORD_MAP.adoc[`ORD_MAP`] signature parameterized over the key type. + +It is recommended, however, that one use the xref:fun-RedBlackMapFn.adoc[`RedBlackMapFn`] +functor instead of `SplayMapFn`, since experimentation has shown it to be +faster across the board. + +== Synopsis + +[source,sml] +------------ +functor SplayMapFn (K : ORD_KEY) :> ORD_MAP where type Key.ord_key = K.ord_key +------------ + +== See Also + +xref:sig-ORD_KEY.adoc[`ORD_KEY`], +xref:sig-ORD_MAP.adoc[`ORD_MAP`], +xref:fun-RedBlackMapFn.adoc[`RedBlackMapFn`], +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/fun-SplaySetFn.adoc b/smlnj-lib/Doc/src/Util/fun-SplaySetFn.adoc new file mode 100644 index 0000000..cc46e73 --- /dev/null +++ b/smlnj-lib/Doc/src/Util/fun-SplaySetFn.adoc @@ -0,0 +1,27 @@ += The `SplaySetFn` functor +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `SplaySetFn` functor provides a splay-tree implementation of the +xref:sig-ORD_SET.adoc[`ORD_SET`] signature parameterized over the element type. + +It is recommended, however, that one use the xref:fun-RedBlackSetFn.adoc[`RedBlackSetFn`] +functor instead of `SplaySetFn`, since experimentation has shown it to be +faster across the board. + +== Synopsis + +[source,sml] +------------ +functor SplaySetFn (K : ORD_KEY) :> ORD_SET where type Key.ord_key = K.ord_key +------------ + +== See Also + +xref:sig-ORD_KEY.adoc[`ORD_KEY`], +xref:sig-ORD_SET.adoc[`ORD_SET`], +xref:fun-RedBlackSetFn.adoc[`RedBlackSetFn`], +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/sig-HASH_KEY.adoc b/smlnj-lib/Doc/src/Util/sig-HASH_KEY.adoc new file mode 100644 index 0000000..508512a --- /dev/null +++ b/smlnj-lib/Doc/src/Util/sig-HASH_KEY.adoc @@ -0,0 +1,52 @@ += The `HASH_KEY` signature +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `HASH_KEY` signature describes a monomorphic type with an equality +test and hashing function. It is used as the argument signature for +the xref:fun-HashSetFn.adoc[`HashSetFn`] and xref:fun-HashTableFn.adoc[`HashTableFn`] +functors, and as a sub-structure signature in the +xref:fun-HashSetFn.adoc[`MONO_HASH_SET`] and +xref:fun-HashTableFn.adoc[`MONO_HASH_TABLE`] signatures. + +== Synopsis + +[source,sml] +------------ +signature HASH_KEY +------------ + +== Interface + +[source,sml] +------------ +type hash_key + +val hashVal : hash_key -> word + +val sameKey : (hash_key * hash_key) -> bool +------------ + +== Description + +[[type:hash_key]] +`[.kw]#type# hash_key`:: + The type of key values. + +`[.kw]#val# hashVal : hash_key \-> word`:: + `hashVal key` returns a hash value for the key. + +`[.kw]#val# sameKey : (hash_key * hash_key) \-> bool`:: + `sameKey (key1, key2)` returns true of two hash keys are equal. + Implementations of this signature should ensure that if + `sameKey (key1, key2)`, then `hashVal key1 = hashVal key2`. + +== See Also + +xref:fun-HashSetFn.adoc[`HashSetFn`], +xref:fun-HashTableFn.adoc[`HashTableFn`], +xref:sig-MONO_HASH_TABLE.adoc[`MONO_HASH_TABLE`], +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/sig-INTERVAL_DOMAIN.adoc b/smlnj-lib/Doc/src/Util/sig-INTERVAL_DOMAIN.adoc new file mode 100644 index 0000000..6e06a8e --- /dev/null +++ b/smlnj-lib/Doc/src/Util/sig-INTERVAL_DOMAIN.adoc @@ -0,0 +1,94 @@ += The `INTERVAL_DOMAIN` signature +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `INTERVAL_DOMAIN` signature defines a representation of an +abstract ordered domain. It is required that the domain +consist of discrete values that are totally orders and that there +be a minimum and maximum value. This signature is used as the +argument signature for the xref:fun-IntervalSetFn.adoc[`IntervalSetFn`] +functor. + +== Synopsis + +[source,sml] +------------ +signature INTERVAL_DOMAIN +------------ + +== Interface + +[source,sml] +------------ +type point + +val compare : (point * point) -> order + +val succ : point -> point +val pred : point -> point + +val isSucc : (point * point) -> bool + +val minPt : point +val maxPt : point +------------ + +== Description + +`[.kw]#type# point`:: + The abstract type of elements in the ordered domain. + +[[val:compare]] +`[.kw]#val# compare : (point * point) \-> order`:: + `compare (pt1, pt2)` returns the relation between two points + in the domain. + +`[.kw]#val# succ : point \-> point`:: + `succ item` returns the successor to `item`. If `item` is + the maximum element (xref:#val:maxPt[`maxPt`]), then `maxPt` is + returned. + +`[.kw]#val# pred : point \-> point`:: + `succ item` returns the successor to `item`. If `item` is + the minimum element (xref:#val:minPt[`minPt`]), then `minPt` is + returned. + +`[.kw]#val# isSucc : (point * point) \-> bool`:: + `isSucc (pt1, pt2)` returns `true` if `pt1` is the predecessor + of `pt2` and `pt2 is the successor of `pt1`. + +[[val:minPt]] +`[.kw]#val# minPt : point`:: + The minimum point in the domain. + +[[val:maxPt]] +`[.kw]#val# maxPt : point`:: + The maximum point in the domain. + +== Example + +Here is an example of the 8-bit character type as an interval domain. + +[source,sml] +------------ +structure CharDom : INTERVAL_DOMAIN = + struct + type point = char + val compare = Char.compare + fun succ #"\255" = #"\255" + | succ c = chr(ord c + 1) + fun pred #"\000" = #"\000" + | pred c = chr(ord c - 1) + fun isSucc (c1, c2) = (ord c1 + 1 = ord c2) + val minPt = #"\000" + val maxPt = #"\255" + end +------------ + +== See Also + +xref:fun-IntervalSetFn.adoc[`IntervalSetFn`], +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/sig-MONO_HASH_TABLE.adoc b/smlnj-lib/Doc/src/Util/sig-MONO_HASH_TABLE.adoc new file mode 100644 index 0000000..bc8a7bf --- /dev/null +++ b/smlnj-lib/Doc/src/Util/sig-MONO_HASH_TABLE.adoc @@ -0,0 +1,221 @@ += The `MONO_HASH_TABLE` signature +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `MONO_HASH_TABLE` signature defines an interface to imperative hash +tables with monomorphic keys. The **SML/NJ Library** provides two +specialized implementations of this signature, as well as a functor for +constructing additional implementations. + +The tables are implemented as an array of _buckets_, which are +lists of key-value pairs. The number of buckets grows with the number +of table entries. + +== Synopsis + +[source,sml] +------------ +signature MONO_HASH_TABLE + +structure AtomTable :> MONO_HASH_TABLE where type Key.hash_key = Atom.atom +structure IntHashTable :> MONO_HASH_TABLE where type Key.hash_key = int +structure WordHashTable :> MONO_HASH_TABLE where type Key.hash_key = word +------------ + +== Interface + +[source,sml] +------------ +structure Key : HASH_KEY + +type 'a hash_table + +val mkTable : (int * exn) -> 'a hash_table + +val clear : 'a hash_table -> unit + +val insert : 'a hash_table -> (Key.hash_key * 'a) -> unit + +val insertWith : ('a * 'a -> 'a) -> 'a hash_table -> Key.hash_key * 'a -> unit +val insertWithi : (Key.hash_key * 'a * 'a -> 'a) + -> 'a hash_table + -> Key.hash_key * 'a + -> unit + +val inDomain : 'a hash_table -> Key.hash_key -> bool + +val lookup : 'a hash_table -> Key.hash_key -> 'a +val find : 'a hash_table -> Key.hash_key -> 'a option + +val findAndRemove : 'a hash_table -> Key.hash_key -> 'a option + +val remove : 'a hash_table -> Key.hash_key -> 'a + +val numItems : 'a hash_table -> int + +val listItems : 'a hash_table -> 'a list +val listItemsi : 'a hash_table -> (Key.hash_key * 'a) list + +val app : ('a -> unit) -> 'a hash_table -> unit +val appi : ((Key.hash_key * 'a) -> unit) -> 'a hash_table -> unit + +val map : ('a -> 'b) -> 'a hash_table -> 'b hash_table +val mapi : ((Key.hash_key * 'a) -> 'b) -> 'a hash_table -> 'b hash_table + +val fold : (('a * 'b) -> 'b) -> 'b -> 'a hash_table -> 'b +val foldi : ((Key.hash_key * 'a * 'b) -> 'b) -> 'b -> 'a hash_table -> 'b + +val modify : ('a -> 'a) -> 'a hash_table -> unit +val modifyi : ((Key.hash_key * 'a) -> 'a) -> 'a hash_table -> unit + +val filter : ('a -> bool) -> 'a hash_table -> unit +val filteri : ((Key.hash_key * 'a) -> bool) -> 'a hash_table -> unit + +val copy : 'a hash_table -> 'a hash_table + +val bucketSizes : 'a hash_table -> int list +------------ + +== Description + +`[.kw]#structure# Key : HASH_KEY`:: + This substructure defines the type of keys used to index the tables and + hash and equality functions on the key type. + +`[.kw]#type# 'a hash_table`:: + The type of imperative hash tables indexed by `Key.hash_key` values + +`[.kw]#val# mkTable : (int * exn) \-> 'a hash_table`:: + `mkTable (n, ex)` creates a new hash table; the table will be initially + sized to hold at least `n` items. The exception `ex` is raised by the + xref:#val:lookup[`lookup`] and xref:#val:remove[`remove`] functions + when the search key is not in the domain. + +`[.kw]#val# clear : 'a hash_table \-> unit`:: + `clear tbl` removes all of the entries in the table. + +`[.kw]#val# insert : 'a hash_table \-> (Key.hash_key * 'a) \-> unit`:: + `insert tbl (key, item)` inserts a mapping from `key` to `item` into `tbl`. + Any existing mapping of `key` is discarded. + +`[.kw]#val# insertWith : ('a * 'a -> 'a) \-> 'a hash_table \-> Key.hash_key * 'a \-> unit`:: + `insertWith comb (tbl, key, v)` adds the mapping from `key` to `value` to `tbl`, + where `value = comb(v', v)`, if `tbl` already contained a mapping from `key` + to `v'`; otherwise, `value = v`. + +`[.kw]#val# insertWithi : (Key.hash_key * 'a * 'a -> 'a) \-> 'a hash_table \-> Key.hash_key * 'a \-> unit`:: + `insertWithi comb (tbl, key, v)` adds the mapping from `key` to `value` to `tbl`, + where `value = comb(key, v', v)`, if `m` already contained a mapping from `key` + to `v'`; otherwise, `value = v`. + +`[.kw]#val# inDomain : 'a hash_table \-> Key.hash_key \-> bool`:: + `inDomain tbl key` returns `true` if, and only if, `key` is in the + domain of the table + +[[val:lookup]] +`[.kw]#val# lookup : 'a hash_table \-> Key.hash_key \-> 'a`:: + `lookup tbl key` returns the item that `key` maps to if `key` is in + the domain of `tbl`. Otherwise, the table's exception is raised. + +`[.kw]#val# find : 'a hash_table \-> Key.hash_key \-> 'a option`:: + `find tbl key` returns the `SOME v` if `key` is mapped to `v` in `tbl`. + Otherwise, it returns `NONE`. + +`[.kw]#val# findAndRemove : 'a hash_table -> Key.hash_key -> 'a option`:: + `findAndRemove (tbl, key)` returns `SOME v` and removes `key` from the + table if `tbl` maps `key` to `v`. If `key` is not in the domain of `tbl`, + then `NONE` is returned and `tbl` is unchanged. + +[[val:remove]] +`[.kw]#val# remove : 'a hash_table \-> Key.hash_key \-> 'a`:: + `remove tbl key` returns the item that `key` maps to if `key` is in + the domain of `tbl` and removes it from the table. Otherwise, the + table's exception is raised. + +`[.kw]#val# numItems : 'a hash_table \-> int`:: + `numItems tbl` returns the number of entries in the table. + +`[.kw]#val# listItems : 'a hash_table \-> 'a list`:: + `listItems tbl` returns a list of the items in the range of `tbl`. + +`[.kw]#val# listItemsi : 'a hash_table \-> (Key.hash_key * 'a) list`:: + `listItemsi tbl` returns a list of the key-value entries in `tbl`. + +`[.kw]#val# app : ('a \-> unit) \-> 'a hash_table \-> unit`:: + `app f tbl` applies the function `f` to each item in the range of `tbl`. + +`[.kw]#val# appi : ((Key.hash_key * 'a) \-> unit) \-> 'a hash_table \-> unit`:: + `appi f tbl` applies the function `f` to each item in the + key-value entries in `tbl`. + +`[.kw]#val# map : ('a \-> 'b) \-> 'a hash_table \-> 'b hash_table`:: + `map f tbl` creates a new table with an entry `(key, f(lookup tbl key))` + in the new table for every `key` in `tbl`. The exception for the new + table is copied from `tbl`. + +`[.kw]#val# mapi : ((Key.hash_key * 'a) \-> 'b) \-> 'a hash_table \-> 'b hash_table`:: + `mapi f tbl` creates a new table with an entry `(key, f(key, lookup tbl key))` + in the new table for every `key` in `tbl`. The exception for the new + table is copied from `tbl`. + +`[.kw]#val# fold : (('a * 'b) \-> 'b) \-> 'b \-> 'a hash_table \-> 'b`:: + `fold f init tbl` folds the function `f` over the items in the range of `tbl` + using `init` as an initial value. + +`[.kw]#val# foldi : ((Key.hash_key * 'a * 'b) \-> 'b) \-> 'b \-> 'a hash_table \-> 'b`:: + `foldi f init tbl` folds the function `f` over the key-value entries in `tbl` + using `init` as an initial value. + +`[.kw]#val# modify : ('a \-> 'a) \-> 'a hash_table \-> unit`:: + `modify f tbl` applies the function `f` for effect to the items in the + range of `tbl`, replacing the old items with the result of applying `f`. + +`[.kw]#val# modifyi : ((Key.hash_key * 'a) \-> 'a) \-> 'a hash_table \-> unit`:: + `modifyi f tbl` applies the function `f` for effect to the key-value + entries in `tbl`, replacing the old items with the result of applying `f`. + +`[.kw]#val# filter : ('a \-> bool) \-> 'a hash_table \-> unit`:: + `filter pred tbl` removes any entry `(key, item)` from `tbl` for which + `pred item` returns `false`. + +`[.kw]#val# filteri : ((Key.hash_key * 'a) \-> bool) \-> 'a hash_table \-> unit`:: + `filteri pred tbl` removes any entry `(key, item)` from `tbl` for which + `pred(key, item)` returns `false`. + +`[.kw]#val# copy : 'a hash_table \-> 'a hash_table`:: + `copy tbl` creates a copy of `tbl`. This expression is equivalent to ++ +[source,sml] +------------ +map (fn x => x) tbl +------------ + +`[.kw]#val# bucketSizes : 'a hash_table \-> int list`:: + `bucketSizes tbl` returns a list of the current number of items per + bucket. This function allows users to gauge the quality of their + hashing function. + +== Instances + +[[str:AtomTable]] +`[.kw]#structure# AtomTable`:: + This structure implements hash tables keyed by the + xref:str-Atom.adoc#type:atom[`Atom.atom`] type. + +[[str:IntHashTable]] +`[.kw]#structure# IntHashTable`:: + This structure implements hash tables keyed by the default `int` type. + +[[str:WordHashTable]] +`[.kw]#structure# WordHashTable`:: + This structure implements hash tables keyed by the default `word` type. + +== See Also + +xref:sig-HASH_KEY.adoc[`HASH_KEY`], +xref:str-HashTable.adoc[`HashTable`], +xref:fun-HashTableFn.adoc[`HashTableFn`], +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/sig-ORD_KEY.adoc b/smlnj-lib/Doc/src/Util/sig-ORD_KEY.adoc new file mode 100644 index 0000000..cb66693 --- /dev/null +++ b/smlnj-lib/Doc/src/Util/sig-ORD_KEY.adoc @@ -0,0 +1,42 @@ += The `ORD_KEY` signature +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `ORD_KEY` signature defines an interface to an abstract type +of keys with a comparison function. This signature is used as +the argument to various implementations of finite maps and sets. + +== Synopsis + +[source,sml] +------------ +signature ORD_KEY +------------ + +== Interface + +[source,sml] +------------ +type ord_key + +val compare : ord_key * ord_key -> order +------------ + +== Description + +`[.kw]#type# ord_key`:: + The abstract key type. + +`[.kw]#val# compare : ord_key * ord_key \-> order`:: + `compare (k1, k2)` returns the relation between the two keys. This + function should define a *total* order on the `ord_key` type. + +== See Also + +xref:sig-ORD_MAP.adoc[`ORD_MAP`], +xref:sig-ORD_SET.adoc[`ORD_SET`], +xref:fun-GraphSCCFn.adoc[`GraphSCCFn`], +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/sig-ORD_MAP.adoc b/smlnj-lib/Doc/src/Util/sig-ORD_MAP.adoc new file mode 100644 index 0000000..90b2dbb --- /dev/null +++ b/smlnj-lib/Doc/src/Util/sig-ORD_MAP.adoc @@ -0,0 +1,417 @@ += The `ORD_MAP` signature +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `ORD_MAP` signature defines an interface to finite maps +over ordered keys. The **SML/NJ Library** provides a number of +different implementations of this interface. Functors are +provided for constructing maps for user-defined key types; +in addition, a number of instances for specific types +are also provided. + +== Synopsis + +[source,sml] +------------ +signature ORD_MAP + +structure AtomMap : ORD_MAP where type Key.ord_key = Atom.atom +structure AtomBinaryMap : ORD_MAP where type Key.ord_key = Atom.atom +structure AtomRedBlackMap : ORD_MAP where type Key.ord_key = Atom.atom +structure IntBinaryMap : ORD_MAP where type Key.ord_key = int +structure IntListMap : ORD_MAP where type Key.ord_key = int +structure IntRedBlackMap : ORD_MAP where type Key.ord_key = int +structure WordRedBlackMap : ORD_MAP where type Key.ord_key = word +------------ + +== Interface + +[source,sml] +------------ +structure Key : ORD_KEY + +type 'a map + +val empty : 'a map + +val isEmpty : 'a map -> bool + +val singleton : (Key.ord_key * 'a) -> 'a map + +val insert : 'a map * Key.ord_key * 'a -> 'a map +val insert' : ((Key.ord_key * 'a) * 'a map) -> 'a map + +val insertWith : ('a * 'a -> 'a) -> 'a map * Key.ord_key * 'a -> 'a map +val insertWithi : (Key.ord_key * 'a * 'a -> 'a) -> 'a map * Key.ord_key * 'a -> 'a map + +val find : 'a map * Key.ord_key -> 'a option + +val lookup : 'a map * Key.ord_key -> 'a + +val inDomain : ('a map * Key.ord_key) -> bool + +val remove : 'a map * Key.ord_key -> 'a map * 'a + +val findAndRemove : 'a map * Key.ord_key -> ('a map * 'a) option + +val first : 'a map -> 'a option +val firsti : 'a map -> (Key.ord_key * 'a) option + +val numItems : 'a map -> int + +val listItems : 'a map -> 'a list +val listItemsi : 'a map -> (Key.ord_key * 'a) list + +val listKeys : 'a map -> Key.ord_key list + +val collate : ('a * 'a -> order) -> ('a map * 'a map) -> order + +val unionWith : ('a * 'a -> 'a) -> ('a map * 'a map) -> 'a map +val unionWithi : (Key.ord_key * 'a * 'a -> 'a) -> ('a map * 'a map) -> 'a map + +val intersectWith : ('a * 'b -> 'c) -> ('a map * 'b map) -> 'c map +val intersectWithi : (Key.ord_key * 'a * 'b -> 'c) -> ('a map * 'b map) -> 'c map + +val mergeWith : ('a option * 'b option -> 'c option) + -> ('a map * 'b map) -> 'c map +val mergeWithi : (Key.ord_key * 'a option * 'b option -> 'c option) + -> ('a map * 'b map) -> 'c map + +val app : ('a -> unit) -> 'a map -> unit +val appi : ((Key.ord_key * 'a) -> unit) -> 'a map -> unit + +val map : ('a -> 'b) -> 'a map -> 'b map +val mapi : (Key.ord_key * 'a -> 'b) -> 'a map -> 'b map + +val foldl : ('a * 'b -> 'b) -> 'b -> 'a map -> 'b +val foldli : (Key.ord_key * 'a * 'b -> 'b) -> 'b -> 'a map -> 'b +val foldr : ('a * 'b -> 'b) -> 'b -> 'a map -> 'b +val foldri : (Key.ord_key * 'a * 'b -> 'b) -> 'b -> 'a map -> 'b + +val filter : ('a -> bool) -> 'a map -> 'a map +val filteri : (Key.ord_key * 'a -> bool) -> 'a map -> 'a map + +val mapPartial : ('a -> 'b option) -> 'a map -> 'b map +val mapPartiali : (Key.ord_key * 'a -> 'b option) -> 'a map -> 'b map + +val exists : ('a -> bool) -> 'a map -> bool +val existsi : (Key.ord_key * 'a -> bool) -> 'a map -> bool +val all : ('a -> bool) -> 'a map -> bool +val alli : (Key.ord_key * 'a -> bool) -> 'a map -> bool +------------ + +== Description + +`[.kw]#structure# Key : ORD_KEY`:: + This substructure defines the type of keys used to index the maps and + the comparison function used to order them. + +`[.kw]#type# 'a map`:: + A finite map from `Key.ord_key` values to ``'b`` values. + +`[.kw]#val# empty : 'a map`:: + The empty map. + +`[.kw]#val# isEmpty : 'a map \-> bool`:: + `isEmpty m` returns true if, and only if, `m` is empty. + +`[.kw]#val# singleton : (Key.ord_key * 'a) \-> 'a map`:: + `singleton (key, v)` creates the singleton map that maps `key` to `v`. + +`[.kw]#val# insert : 'a map * Key.ord_key * 'a \-> 'a map`:: + `insert (m, key, v)` adds the mapping from `key` to `v` to `m`. + This mapping overrides any previous mapping from `key`. + +`[.kw]#val# insert' : ((Key.ord_key * 'a) * 'a map) \-> 'a map`:: + `insert' ((key, v), map)` adds the mapping from `key` to `v` to `m`. + This mapping overrides any previous mapping from `key`. + +`[.kw]#val# insertWith : ('a * 'a \-> 'a) \-> 'a map * Key.ord_key * 'a \-> 'a map`:: + `insertWith comb (m, key, v)` adds the mapping from `key` to `value` to `m`, + where `value = comb(v', v)`, if `m` already contained a mapping from `key` + to `v'`; otherwise, `value = v`. + +`[.kw]#val# insertWithi : (Key.ord_key * 'a * 'a \-> 'a) \-> 'a map * Key.ord_key * 'a \-> 'a map`:: + `insertWithi comb (m, key, v)` adds the mapping from `key` to `value` to `m`, + where `value = comb(key, v', v)`, if `m` already contained a mapping from `key` + to `v'`; otherwise, `value = v`. + +`[.kw]#val# find : 'a map * Key.ord_key \-> 'a option`:: + `find (m, key)` returns `SOME v`, if `m` maps `key` to `v` and `NONE` otherwise. + +`[.kw]#val# lookup : 'a map * Key.ord_key \-> 'a`:: + `lookup (m, key)` returns `v`, if `m` maps `key` to `v`; otherwise it + raises the exception xref:str-LibBase.adoc#exn:NotFound[`NotFound`]. + +`[.kw]#val# inDomain : ('a map * Key.ord_key) \-> bool`:: + `inDomain (m, key)` returns `true` if `key` is in the domain of `m`. + +`[.kw]#val# remove : 'a map * Key.ord_key \-> 'a map * 'a`:: + `remove (m, key)` returns the pair `(m', v)`, if `m` maps `key` to `v` + and where `m'` is `m` with `key` removed from its domain. If `key` + is not in the domain of `m`, then it raises the exception + xref:str-LibBase.adoc#exn:NotFound[`NotFound`]. + +`[.kw]#val# findAndRemove : 'a map * Key.ord_key \-> ('a map * 'a) option`:: + `findAndRemove (m, key)` returns `SOME(m', v)`, if `m` maps `key` to `v` + and where `m'` is `m` with `key` removed from its domain. If `key` + is not in the domain of `m`, then it returns `NONE`. + +`[.kw]#val# first : 'a map \-> 'a option`:: + `first m` returns `SOME item` when `item` is the value associated with + the first (or smallest) key in the domain of the map `m`. It returns + `NONE` when the map is empty. + +`[.kw]#val# firsti : 'a map \-> (Key.ord_key * 'a) option`:: + `first m` returns `SOME(key, item)` when `key` is the first (or smallest) + key in the domain of the map `m` and `key` maps to `item`. It returns + `NONE` when the map is empty. + +`[.kw]#val# numItems : 'a map \-> int`:: + `numItems m` returns the size of ``m``'s domain. + +`[.kw]#val# listItems : 'a map \-> 'a list`:: + `listItems m` returns a list of the values in the _range_ of `m`. + Note that this list will contain duplicates when multiple keys in + ``m``'s domain map to the same value. + +`[.kw]#val# listItemsi : 'a map \-> (Key.ord_key * 'a) list`:: + `listItemsi m` returns a list of the key-value pairs in `m`. + +`[.kw]#val# listKeys : 'a map \-> Key.ord_key list`:: + `listKeys m` returns a list of the keys in the domain of `m`. + +`[.kw]#val# equiv : ('a * 'b \-> order) \-> ('a map * 'b map) \-> bool`:: + `equiv eqV (m1, m2)` returns true if the two maps have the same domains + and if, for all `x` in the domain of the maps, `eqV(lookup(m1, x), lookup(m2, x))` + evaluates to `true`. + +`[.kw]#val# collate : ('a * 'b \-> order) \-> ('a map * 'b map) \-> order`:: + `collate cmpV (m1, m2)` returns the order of the two maps, where `cmpV` is + used to compare the values in the range of the maps. + +`[.kw]#val# extends : ('a * 'b \-> order) \-> ('a map * 'b map) \-> order`:: + `extends exV (m1, m2)` returns `true` if the domain of `m2` is a subset of the + domain of `m1` and if, for all `x` in the domain of `m2`, + `exV(lookup(m1, x), lookup(m2, x))` evaluates to `true`. + +`[.kw]#val# unionWith : ('a * 'a \-> 'a) \-> ('a map * 'a map) \-> 'a map`:: + `unionWith comb (m1, m2)` returns the union of the two maps, using the function `comb` + to combine values when there is a collision of keys. More formally, this expression + returns the map ++ +[latexmath] ++++++++++++ + \begin{array}{l} + \{ (k, \mathtt{m1}(k)) + \;|\;k \in \mathbf{dom}(\mathtt{m1}) \setminus \mathbf{dom}(\mathtt{m2}) \} + \cup \\ + \{ (k, \mathtt{m2}(k)) + \;|\;k \in \mathbf{dom}(\mathtt{m2}) \setminus \mathbf{dom}(\mathtt{m1}) \} + \cup \\ + \{ (k, \mathtt{comb}(\mathtt{m1}(k), \mathtt{m2}(k)) + \;|\;k \in \mathbf{dom}(\mathtt{m1}) \cap \mathbf{dom}(\mathtt{m2}) \} + \end{array} ++++++++++++ ++ +For example, we could implement a _multiset_ of keys by mapping keys to their +multiplicity. Then, the union of two multisets could be defined by ++ +[source,sml] +------------ +fun union (ms1, ms2) = unionWith Int.+ (ms1, ms2) +------------ + +`[.kw]#val# unionWithi : (Key.ord_key * 'a * 'a \-> 'a) \-> ('a map * 'a map) \-> 'a map`:: + `unionWithi comb (m1, m2)` returns the union of the two maps, using the function `comb` + to combine values when there is a collision of keys. More formally, this expression + returns the map ++ +[latexmath] ++++++++++++ + \begin{array}{l} + \{ (k, \mathtt{m1}(k)) + \;|\;k \in \mathbf{dom}(\mathtt{m1}) \setminus \mathbf{dom}(\mathtt{m2}) \} + \cup \\ + \{ (k, \mathtt{m2}(k)) + \;|\;k \in \mathbf{dom}(\mathtt{m2}) \setminus \mathbf{dom}(\mathtt{m1}) \} + \cup \\ + \{ (k, \mathtt{comb}(k, \mathtt{m1}(k), \mathtt{m2}(k)) + \;|\;k \in \mathbf{dom}(\mathtt{m1}) \cap \mathbf{dom}(\mathtt{m2}) \} + \end{array} ++++++++++++ + +`[.kw]#val# intersectWith : ('a * 'b \-> 'c) \-> ('a map * 'b map) \-> 'c map`:: + `intersectWith comb (m1, m2)` returns the intersection of the two maps, + where the values in the range are a computed by applying the function + `comb` to the values from the two maps. More formally, this expression + returns the map ++ +[latexmath] ++++++++++++ + \{ (k, \mathtt{comb}(\mathtt{m1}(k), \mathtt{m2}(k)) + \;|\;k \in \mathbf{dom}(\mathtt{m1}) \cap \mathbf{dom}(\mathtt{m2}) \} ++++++++++++ + +`[.kw]#val# intersectWithi : (Key.ord_key * 'a * 'b \-> 'c) \-> ('a map * 'b map) \-> 'c map`:: + `intersectWithi comb (m1, m2)` returns the intersection of the two maps, + where the values in the range are a computed by applying the function + `comb` to the kay and the values from the two maps. More formally, this + expression returns the map ++ +[latexmath] ++++++++++++ + \{ (k, \mathtt{comb}(k, \mathtt{m1}(k), \mathtt{m2}(k)) + \;|\;k \in \mathbf{dom}(\mathtt{m1}) \cap \mathbf{dom}(\mathtt{m2}) \} ++++++++++++ + +`[.kw]#val# mergeWith : ('a option * 'b option \-> 'c option) \-> ('a map * 'b map) \-> 'c map`:: + `mergeWith comb (m1, m2)` merges the two maps using the function `comb` + as a decision procedure for adding elements to the new map. For each key + latexmath:[\mathtt{key} \in \mathbf{dom}(\mathtt{m1}) \cup \mathbf{dom}(\mathtt{m2})], + we evaluate `comb(optV1, optV2)`, where `optV1` is `SOME v` if + latexmath:[(\mathtt{key}, \mathtt{v}) \in \mathtt{m1}] and is `NONE` if + latexmath:[\mathtt{key} \not\in \mathbf{dom}(\mathtt{m1}); likewise for `optV2`. + If `comb(optV1, optV2)` returns ``SOME v'``, then we add ``(key, v')`` + to the result. ++ +The `mergeWith` function is a generalization of the `unionWith` and +`intersectionWith` functions. + +`[.kw]#val# mergeWithi : (Key.ord_key * 'a option * 'b option \-> 'c option) \-> ('a map * 'b map) \-> 'c map`:: + `mergeWithi comb (m1, m2)` merges the two maps using the function `comb` + as a decision procedure for adding elements to the new map. The difference + between this function and `mergeWith` is that the `comb` function takes the + `key` value in addition to the optional values from the range. + +`[.kw]#val# app : ('a \-> unit) \-> 'a map \-> unit`:: + `app f m` applies the function `f` to the values in the range of `m`. + +`[.kw]#val# appi : ((Key.ord_key * 'a) \-> unit) \-> 'a map \-> unit`:: + `appi f map` applies the function `f` to the key-value pairs that + define `m`. + +`[.kw]#val# map : ('a \-> 'b) \-> 'a map \-> 'b map`:: + `map f m` creates a new finite map ``m'`` by applying the function `f` to the + values in the range of `m`. Thus, if + latexmath:[(\mathtt{key}, \mathtt{v}) \in \mathtt{m}], then + `(key, f v)` will be in ``m'``. + +`[.kw]#val# mapi : (Key.ord_key * 'a \-> 'b) \-> 'a map \-> 'b map`:: + `mapi f m` creates a new finite map ``m'`` by applying the function `f` to the + key-value pairs of `m`. Thus, if + latexmath:[(\mathtt{key}, \mathtt{v}) \in \mathtt{m}], then + `(key, f(key, v))` will be in ``m'``. + +`[.kw]#val# foldl : ('a * 'b \-> 'b) \-> 'b \-> 'a map \-> 'b`:: + `foldl fl init m` folds the function `f` over the range of + `m` using `init` as the initial value. Items are processed in + increasing order of their key values. + +`[.kw]#val# foldli : (Key.ord_key * 'a * 'b \-> 'b) \-> 'b \-> 'a map \-> 'b`:: + `foldli f init m` folds the function `f` over the key-value pairs in + `m` using `init` as the initial value. Items are processed in + increasing order of their key values. + +`[.kw]#val# foldr : ('a * 'b \-> 'b) \-> 'b \-> 'a map \-> 'b`:: + `foldr fl init m` folds the function `f` over the range of + `m` using `init` as the initial value. Items are processed in + decreasing order of their key values. + +`[.kw]#val# foldri : (Key.ord_key * 'a * 'b \-> 'b) \-> 'b \-> 'a map \-> 'b`:: + `foldri f init m` folds the function `f` over the key-value pairs in + `m` using `init` as the initial value. Items are processed in + decreasing order of their key values. + +`[.kw]#val# filter : ('a \-> bool) \-> 'a map \-> 'a map`:: + `filter pred m` filters out those items `(key, v)` from `m`, such that + `pred v` returns `false`. More formally, this expression returns the map + latexmath:[\{ (\mathtt{key}, \mathtt{v})\;|\;\mathtt{key} \in \mathbf{dom}(\mathtt{m}) + \wedge \mathtt{pred}(\mathtt{v}) \}]. + +`[.kw]#val# filteri : (Key.ord_key * 'a \-> bool) \-> 'a map \-> 'a map`:: + `filteri pred m` filters out those items `(key, v)` from `m`, such that + `pred(key, v)` returns `false`. More formally, this expression returns the map + latexmath:[\{ (\mathtt{key}, \mathtt{v})\;|\;\mathtt{key} \in \mathbf{dom}(\mathtt{m}) + \wedge \mathtt{pred}(\mathtt{key}, \mathtt{v}) \}]. + +`[.kw]#val# mapPartial : ('a \-> 'b option) \-> 'a map \-> 'b map`:: + `mapPartial f m` maps the partial function `f` over the items of `m`. + More formally, this expression returns the map +[latexmath] ++++++++++++ + \{ (k, v') \;|\; (k, v) \in \mathtt{m} \wedge \mathtt{f}(v) = \mathtt{SOME}(v') \} ++++++++++++ + +`[.kw]#val# mapPartiali : (Key.ord_key * 'a \-> 'b option) \-> 'a map \-> 'b map`:: + `mapPartiali f m` maps the partial function `f` over the items of `m`. + More formally, this expression returns the map +[latexmath] ++++++++++++ + \{ (k, v') \;|\; (k, v) \in \mathtt{m} \wedge \mathtt{f}(k, v) = \mathtt{SOME}(v') \} ++++++++++++ + +`[.kw]#val# exists : ('a \-> bool) \-> 'a map \-> bool`:: + `exists pred m` returns `true` if, and only if, there exists an item + latexmath:[(\mathtt{key}, \mathtt{v}) \in \mathtt{m}], + such that `pred v` returns `true`. + +`[.kw]#val# existsi : (Key.ord_key * 'a \-> bool) \-> 'a map \-> bool`:: + `exists pred m` returns `true` if, and only if, there exists an item + latexmath:[(\mathtt{key}, \mathtt{v}) \in \mathtt{m}], such that + `pred(key, v)` returns `true`. + +`[.kw]#val# all : ('a \-> bool) \-> 'a map \-> bool`:: + `all pred m` returns `true` if, and only if, `pred v` returns `true` + for all items latexmath:[(\mathtt{key}, \mathtt{v}) \in \mathtt{m}]. + +`[.kw]#val# alli : (Key.ord_key * 'a \-> bool) \-> 'a map \-> bool`:: + `all pred m` returns `true` if, and only if, `pred(key, v)` returns `true` + for all items latexmath:[(\mathtt{key}, \mathtt{v}) \in \mathtt{m}]. + +== Instances + +[[str:AtomMap]] +`[.kw]#structure# AtomMap`:: + This structure is an alias for `AtomRedBlackMap`. + +[[str:AtomBinaryMap]] +`[.kw]#structure# AtomBinaryMap`:: + Maps over atoms implemented using balanced binary trees. + Note that it is recommended that one use the `AtomMap` structure + as it provides better performance. + +[[str:AtomRedBlackMap]] +`[.kw]#structure# AtomRedBlackMap`:: + Maps over atoms implemented using red-black trees. + +[[str:IntBinaryMap]] +`[.kw]#structure# IntBinaryMap`:: + Maps over ints implemented using balanced binary trees. + Note that it is recommended that one use the `IntRedBlackMap` structure + as it provides better performance. + +[[str:IntListMap]] +`[.kw]#structure# IntListMap`:: + Maps over words implemented using sorted lists. This implementation + is fast for small sets, but does not scale well to large sizes. + +[[str:IntRedBlackMap]] +`[.kw]#structure# IntRedBlackMap`:: + Maps over ints implemented using red-black binary trees. + +[[str:WordRedBlackMap]] +`[.kw]#structure# WordRedBlackMap`:: + Maps over words implemented using red-black binary trees. + +== See Also + +xref:fun-BinaryMapFn.adoc[`BinaryMapFn`], +xref:fun-ListMapFn.adoc[`ListMapFn`], +xref:sig-ORD_KEY.adoc[`ORD_KEY`], +xref:fun-RedBlackMapFn.adoc[`RedBlackMapFn`], +xref:fun-SplayMapFn.adoc[`SplayMapFn`], +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/sig-ORD_SET.adoc b/smlnj-lib/Doc/src/Util/sig-ORD_SET.adoc new file mode 100644 index 0000000..2f75c3e --- /dev/null +++ b/smlnj-lib/Doc/src/Util/sig-ORD_SET.adoc @@ -0,0 +1,327 @@ += The `ORD_SET` signature +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `ORD_SET` signature defines an interface to finite sets of +ordered elements. The **SML/NJ Library** provides a number of +different implementations of this interface. Functors are +provided for constructing sets for user-defined item types; +in addition, a number of instances for specific types +are also provided. + +== Synopsis + +[source,sml] +------------ +signature ORD_SET + +structure AtomSet : ORD_SET where type Key.ord_key = Atom.atom +structure AtomBinarySet : ORD_SET where type Key.ord_key = Atom.atom +structure AtomRedBlackSet : ORD_SET where type Key.ord_key = Atom.atom +structure IntBinarySet : ORD_SET where type Key.ord_key = int +structure IntListSet : ORD_SET where type Key.ord_key = int +structure IntRedBlackSet : ORD_SET where type Key.ord_key = int +structure WordRedBlackSet : ORD_SET where type Key.ord_key = word +------------ + +== Interface + +[source,sml] +------------ +structure Key : ORD_KEY + +type item = Key.ord_key +type set + +val empty : set + +val singleton : item -> set + +val fromList : item list -> set + +val toList : set -> item list + +val add : set * item -> set +val add' : (item * set) -> set + +val addList : set * item list -> set + +val subtract : set * item -> set +val subtract' : (item * set) -> set + +val subtractList : set * item list -> set + +val delete : set * item -> set + +val member : set * item -> bool + +val isEmpty : set -> bool + +val minItem : set -> item +val maxItem : set -> item + +val equal : (set * set) -> bool + +val compare : (set * set) -> order + +val isSubset : (set * set) -> bool + +val disjoint : set * set -> bool + +val numItems : set -> int + +val listItems : set -> item list + +val union : set * set -> set +val intersection : set * set -> set +val difference : set * set -> set + +val map : (item -> item) -> set -> set +val mapPartial : (item -> item option) -> set -> set +val app : (item -> unit) -> set -> unit +val foldl : (item * 'b -> 'b) -> 'b -> set -> 'b +val foldr : (item * 'b -> 'b) -> 'b -> set -> 'b + +val partition : (item -> bool) -> set -> (set * set) +val filter : (item -> bool) -> set -> set + +val exists : (item -> bool) -> set -> bool +val all : (item -> bool) -> set -> bool + +val find : (item -> bool) -> set -> item option +------------ + +== Description + +`[.kw]#`structure# Key : ORD_KEY`:: + This substructure defines the type of elements in the set and + the comparison function used to order them. + +`[.kw]#type# item = Key.ord_key`:: + The type of elements in the set. + +`[.kw]#type# set`:: + A finite set of `item` values. + +`[.kw]#val# empty : set`:: + The empty set. + +`[.kw]#val# singleton : item \-> set`:: + `singleton item` returns a singleton set containing `item`. + +`[.kw]#val# fromList : item list \-> set`:: + `fromList items` returns the set containing the list of items. + +[[val:toList]] +`[.kw]#val# toList : set \-> item list`:: + `toList set` returns a list of the items in `set`. The items will be + sorted in increasing order. + +`[.kw]#val# add : set * item \-> set`:: + `add (set, item)` adds the item to the set. + +`[.kw]#val# add' : (item * set) \-> set`:: + `add' (item, set)` adds the item to the set. + +`[.kw]#val# addList : set * item list \-> set`:: + `addList (set, items)` adds the list of items to the set. + +`[.kw]#val# subtract : set * item \-> set`:: + `subtract (set, item)` removes the object `item` from `set`. + Acts as the identity if `item` is not in the set. + +`[.kw]#val# subtract' : (item * set) \-> set`:: + `subtract' (item, set)` removes the object `item` from `set`. + Acts as the identity if `item` is not in the set. + +`[.kw]#val# subtractList : set * item list \-> set`:: + `subtractList (set, items)` removes the items from the set. + +`[.kw]#val# delete : set * item \-> set`:: + `delete (set, item)` removes the object `item` from `set`. + Unlike `subtract`, the `delete` function raises the + xref:str-LibBase.adoc#exn:NotFound[`NotFound`] + exception if `item` is not in the set. + +`[.kw]#val# member : set * item \-> bool`:: + `member (item, set)` returns `true` if, and only if, `item` + is an element of `set`. + +`[.kw]#val# isEmpty : set \-> bool`:: + `isEmpty set` returns true if, and only if, `set` is empty. + +`[.kw]#val# minItem : set \-> item`:: + `minItem set` returns the *smallest* element of the set. This function + raises the {sml-basis-url}/list.html#SIG:LIST.Empty:EXN[`Empty`] + exception if the set is empty. + +`[.kw]#val# maxItem : set \-> item`:: + `minItem set` returns the *largest* element of the set. This function + raises the {sml-basis-url}/list.html#SIG:LIST.Empty:EXN[`Empty`] + exception if the set is empty. + +`[.kw]#val# equal : (set * set) \-> bool`:: + `equal (set1, set2)` returns true if, and only if, the two + sets are equal (_i.e._, they contain the same elements). + +`[.kw]#val# compare : (set * set) \-> order`:: + `compare (set1, set2)` returns the lexical order of + the two sets. + +`[.kw]#val# isSubset : (set * set) \-> bool`:: + `isSubset (set1, set2)` returns true if, and only if, `set1` + is a subset of `set2` (_i.e._, any element of `set1` is an + element of `set2`). + +`[.kw]#val# disjoint : set * set \-> bool`:: + `equal (set1, set2)` returns true if, and only if, the two + sets are disjoint (_i.e._, their intersection is empty). + +`[.kw]#val# numItems : set \-> int`:: + `numItems set` returns the number of items in the `set`. + +`[.kw]#val# union : set * set \-> set`:: + `union (set1, set2)` returns the union of the two sets. + +`[.kw]#val# intersection : set * set \-> set`:: + `intersection (set1, set2)` returns the intersection of the two sets. + +`[.kw]#val# difference : set * set \-> set`:: + `difference (set1, set2)` returns the difference of the two sets; + _i.e._, the set of items that are in `set1`, but not in + `set2`. + +`[.kw]#val# map : (item \-> item) \-> set \-> set`:: + `map f set` constructs a new set from the result of applying the + function `f` to the elements of `set`. This expression is + equivalent to ++ +[source,sml] +------------ +fromList (List.map f (toList set)) +------------ + +`[.kw]#val# mapPartial : (item \-> item option) \-> set \-> set``:: + `mapPartial f set` constructs a new set from the result of applying the + function `f` to the elements of `set`. This expression is + equivalent to ++ +[source,sml] +------------ +fromList (List.mapPartial f (toList set)) +------------ + +`[.kw]#val# app : (item \-> unit) \-> set \-> unit`:: + `app f set` applies the function `f` to the items in `set`. + This expression is equivalent to ++ +[source,sml] +------------ +List.app f (toList set) +------------ + +`[.kw]#val# foldl : (item * 'b \-> 'b) \-> 'b \-> set \-> 'b`:: + `foldl f init set` folds the function `f` over the items in + `set` in increasing order using `init` as the initial value. + This expression is equivalent to ++ +[source,sml] +------------ +List.foldl f init (toList set) +------------ + +`[.kw]#val# foldr : (item * 'b \-> 'b) \-> 'b \-> set \-> 'b`:: + `foldr f init set` folds the function `f` over the items in + `set` in decreasing order using `init` as the initial value. + This expression is equivalent to ++ +[source,sml] +------------ +List.foldr f init (toList set) +------------ + +`[.kw]#val# partition : (item \-> bool) \-> set \-> (set * set)`:: + `partition pred set` returns a pair of disjoint sets `(tSet, fSet)`, where + the predicate `pred` returns true for every element of `tSet`, + `false` for every element of `fSet`, and `set` is the union of `tSet` + and `fSet`. + +`[.kw]#val# filter : (item \-> bool) \-> set \-> set`:: + `filter pred set` filters out any elements of set for which the + predicate `pred` returns false. + This expression is equivalent to ++ +[source,sml] +------------ +#1 (partition pred set) +------------ + +`[.kw]#val# exists : (item \-> bool) \-> set \-> bool`:: + `all pred set` returns `true` if, and only if, `pred item` returns + true for all elements `item` in `set`. Elements are checked in increasing + order. + +`[.kw]#val# all : (item \-> bool) \-> set \-> bool`:: + `exists pred set` returns `true` if, and only if, there exists an + element `item` in `set` such that `pred item` returns `true`. + Elements are checked in increasing order. + +`[.kw]#val# find : (item \-> bool) \-> set \-> item option`:: + `find pred set` returns `SOME item` if there exists an object `item` + in the set for which `pred item` returns `true`; otherwise `NONE` is + returned. Items are tested in increasing order. + +=== Deprecated functions + +The following functions are part of the interface, but have been +deprecated. + +`[.kw]#val# listItems : set \-> item list``:: + Use xref:#val:toList[`toList`] instead. + +== Instances + +[[str:AtomSet]] +`[.kw]#structure# AtomSet`:: + This structure is an alias for `AtomRedBlackSet`. + +[[str:AtomBinarySet]] +`[.kw]#structure# AtomBinarySet`:: + Sets of atoms implemented using balanced binary trees. + Note that it is recommended that one use the `AtomSet` structure + as it provides better performance. + +[[str:AtomRedBlackSet]] +`[.kw]#structure# AtomRedBlackSet`:: + Sets of atoms implemented using red-black trees. + +[[str:IntBinarySet]] +`[.kw]#structure# IntBinarySet`:: + Sets of ints implemented using balanced binary trees. + Note that it is recommended that one use the `IntRedBlackSet` structure + as it provides better performance. + +[[str:IntListSet]] +`[.kw]#structure# IntListSet`:: + Sets of words implemented using sorted lists. This implementation + is fast for small sets, but does not scale well to large sizes. + +[[str:IntRedBlackSet]] +`[.kw]#structure# IntRedBlackSet`:: + Sets of ints implemented using red-black binary trees. + +[[str:WordRedBlackSet]] +`[.kw]#structure# WordRedBlackSet`:: + Sets of words implemented using red-black binary trees. + +== See Also + +xref:fun-BinarySetFn.adoc[`BinarySetFn`], +xref:fun-ListSetFn.adoc[`ListSetFn`], +xref:sig-ORD_KEY.adoc[`ORD_KEY`], +xref:fun-RedBlackSetFn.adoc[`RedBlackSetFn`], +xref:fun-SplaySetFn.adoc[`SplaySetFn`], +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/smlnj-lib.adoc b/smlnj-lib/Doc/src/Util/smlnj-lib.adoc new file mode 100644 index 0000000..725b4e3 --- /dev/null +++ b/smlnj-lib/Doc/src/Util/smlnj-lib.adoc @@ -0,0 +1,389 @@ += The Util Library +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +== Overview + +The *Util Library* provides a large collection of utility data structures +and algorithms. It is the core library in the *SML/NJ Library* suite +and dates back to the early 1990's, where it was originally developed +by Emden Gansner and John Reppy as part of the *eXene* *X11* toolkit. +Modules from this library are autoloaded into the *SML/NJ* interactive +environment by default. + +== Contents + +xref:str-ANSITerm.adoc[`[.kw]#structure# ANSITerm`]:: + provides support for displaying stylized text using the + https://en.wikipedia.org/wiki/ANSI_escape_code[ANSI escape codes]. + +xref:str-ArrayQSort.adoc[`[.kw]#structure# ArrayQSort`]:: + Provides _in situ_ sorting of polymorphic arrays + using the quicksort algorithm. + +xref:fun-ArrayQSortFn.adoc[`[.kw]#functor# ArrayQSortFn`]:: + Provides _in situ_ sorting of monomorphic arrays + using the quicksort algorithm. + +xref:str-Atom.adoc[`[.kw]#structure# Atom`]:: + provides hashed strings that have fast equality testing. + +xref:str-Base64.adoc[`[.kw]#structure# Base64`]:: + Provides support for *Base-64* encoding/decoding as specified + by https://www.ietf.org/rfc/rfc4648.txt[RFC 4648]. + +xref:fun-BinaryMapFn.adoc[`[.kw]#functor# BinaryMapFn`]:: + Provides a balanced-binary-tree implementation of the + xref:sig-ORD_MAP.adoc[`ORD_MAP`] signature parameterized over the key type. + +xref:fun-BinarySetFn.adoc[`[.kw]#functor# BinarySetFn`]:: + Provides a balanced-binary-tree implementation of the + xref:sig-ORD_SET.adoc[`ORD_SET`] signature parameterized over the element type. + +xref:str-BitArray.adoc[`[.kw]#structure# BitArray`]:: + Provides mutable arrays of booleans represented by one bit per element. + +xref:fun-BSearchFn.adoc[`[.kw]#functor# BSearchFn`]:: + Provides binary search on sorted monomorphic arrays. + +xref:str-CharMap.adoc[`[.kw]#structure# CharMap`]:: + Provides fast, read-only, maps from 8-bit characters to values. + +// 10 + +xref:str-DynamicArray.adoc[`[.kw]#structure# DynamicArray`]:: + Provides dynamically sized polymorphic arrays. + +xref:fun-DynamicArrayFn.adoc[`[.kw]#functor# DynamicArrayFn`]:: + Provides dynamically sized monomorphic arrays. + +xref:str-EditDistance.adoc[`[.kw]#structure# EditDistance`]:: + Provides a function for computing Levenshtein between distance between strings. + +xref:str-Fifo.adoc[`[.kw]#structure# Fifo`]:: + Provides a functional queue data structure. + +xref:str-FNVHash.adoc[`[.kw]#structure# FNVHash`]:: + Provides an implementation of the + https://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function[__Fowler-Noll-Vo__] + hashing algorithm. + +xref:str-Format.adoc[`[.kw]#structure# Format`]:: + Provides `printf`-style string formatting. + +xref:str-FormatComb.adoc[`[.kw]#structure# FormatComb`]:: + **To be written** + +xref:str-GetOpt.adoc[`[.kw]#structure# GetOpt`]:: + Provides command-line argument processing. + +xref:fun-GraphSCCFn.adoc[`[.kw]#functor# GraphSCCFn`]:: + Provides an algorithm for computing the strongly-connected + components of a directed graph. + +xref:fun-Hash2TableFn.adoc[`[.kw]#functor# Hash2TableFn`]:: + Provides hash tables that are keyed by two different key types. + +// 20 + +xref:sig-HASH_KEY.adoc[`[.kw]#signature# HASH_KEY`]:: + Defines an interface to a monomorphic type with an equality + test and hashing function. + +xref:fun-HashSetFn.adoc[`[.kw]#functor# HashSetFn`]:: + Provides a hash-table-based implementation of imperative sets + parameterized over a `Key` structure. + +xref:str-HashString.adoc[`[.kw]#structure# HashString`]:: + Provides hashing functions for strings and substrings. + +xref:str-HashTable.adoc[`[.kw]#structure# HashTable`]:: + Provides an implementation of polymorphic hash tables. + +xref:fun-HashTableFn.adoc[`[.kw]#functor# HashTableFn`]:: + Provides a functor that implements the + xref:sig-MONO_HASH_TABLE.adoc[`MONO_HASH_TABLE`] signature. + +xref:sig-INTERVAL_DOMAIN.adoc[`[.kw]#signature# INTERVAL_DOMAIN`]:: + Defines a representation of an abstract ordered domain that is + used as an argument to the xref:fun-IntervalSetFn.adoc[`IntervalSetFn`] + functor. + +xref:fun-IntervalSetFn.adoc[`[.kw]#functor# IntervalSetFn`]:: + Provides sets over a discrete ordered domain, where the + sets are represented by intervals. + +xref:str-IOUtil.adoc[`[.kw]#structure# IOUtil`]:: + Provides support for redirecting the standard input and output streams. + +xref:fun-KeywordFn.adoc[`[.kw]#functor# KeywordFn`]:: + Provides a simple way to support a table of keyword + (or reserved) identifiers in a scanner. + +xref:fun-LeftPriorityQFn.adoc[`[.kw]#functor# LeftPriorityQFn`]:: + Provides a functional implementation of priority + queues using leaftist heaps. + +// 30 + +xref:str-LibBase.adoc[`[.kw]#structure# LibBase`]:: + Provides some common definitions that are + shared across the *SML/NJ Lbrary*. + +xref:str-ListFormat.adoc[`[.kw]#structure# ListFormat`]:: + Provides some utility functions for converting + lists into strings (and back). + +xref:fun-ListMapFn.adoc[`[.kw]#functor# ListMapFn`]:: + Provides a sorted-list implementation of the + xref:sig-ORD_MAP.adoc[`ORD_MAP`] signature parameterized over the key type. + +xref:str-ListMergeSort.adoc[`[.kw]#structure# ListMergeSort`]:: + Provides a merge-sort algorithm for lists. + +xref:fun-ListSetFn.adoc[`[.kw]#functor# ListSetFn`]:: + Provides a sorted-list implementation of the + xref:sig-ORD_SET.adoc[`ORD_SET`] signature parameterized over the element type. + +xref:str-ListXProd.adoc[`[.kw]#structure# ListXProd`]:: + Provides list combinators for computing + over the "Cartesian product" of two lists. + +xref:fun-MonoArrayFn.adoc[`[.kw]#functor# MonoArrayFn`]:: + Provides easy construction of new monomorphic array structures. + +xref:sig-MONO_HASH_TABLE.adoc[`[.kw]#signature# MONO_HASH_TABLE`]:: + Defines an interface to imperative hash tables with monomorphic keys. + +xref:str-Native.adoc[`[.kw]#structure# NativeInt`]:: + An alias to the native-sized integer structure (_e.g._, `Int64`). + +xref:str-Native.adoc[`[.kw]#structure# NativeWord`]:: + An alias to the native-sized word structure (_e.g._, `Word64`). + +// 40 + +xref:sig-ORD_KEY.adoc[`[.kw]#signature# ORD_KEY`]:: + Defines an interface to an abstract type + of keys with a comparison function. + +xref:sig-ORD_MAP.adoc[`[.kw]#signature# ORD_MAP`]:: + Defines an interface to finite maps over ordered keys. + +xref:sig-ORD_SET.adoc[`[.kw]#signature# ORD_SET`]:: + Defines an interface to finite sets of ordered elements. + +xref:str-ParserComb.adoc[`[.kw]#structure# ParserComb`]:: + Provides parser combinators over character readers. + +xref:str-PathUtil.adoc[`[.kw]#structure# PathUtil`]:: + Provides support for searching for files + in the file system using a list of possible locations. + +xref:str-PropList.adoc[`[.kw]#structure# PropList`]:: + Provides a extensible, but type safe, implementation + of property lists. + +xref:str-Queue.adoc[`[.kw]#structure# Queue`]:: + Provides an imperative queue data structure. + +xref:str-Rand.adoc[`[.kw]#structure# Rand`]:: + Provides a simple random number generator. + +xref:str-Random.adoc[`[.kw]#structure# Random`]:: + Pseudo-random-number generation using the + http://www.math.sci.hiroshima-u.ac.jp/m-mat/MT/emt.html[_Mersenne Twister_ + algorithm]. + +xref:str-RealOrderStats.adoc[`[.kw]#structure# RealOrderStats`]:: + **To be written** + +// 50 + +xref:fun-RedBlackMapFn.adoc[`[.kw]#functor# RedBlackMapFn`]:: + Provides a red-black-tree implementation of the + xref:sig-ORD_MAP.adoc[`ORD_MAP`] signature parameterized over the key type. + +xref:fun-RedBlackSetFn.adoc[`[.kw]#functor# RedBlackSetFn`]:: + Provides a red-black-tree implementation of the + xref:sig-ORD_SET.adoc[`ORD_SET`] signature parameterized over the element type. + +xref:str-Scan.adoc[`[.kw]#structure# Scan`]:: + **To be written** + +xref:fun-SplayMapFn.adoc[`[.kw]#functor# SplayMapFn`]:: + Provides a splay-tree implementation of the + xref:sig-ORD_MAP.adoc[`ORD_MAP`] signature parameterized over the key type. + +xref:fun-SplaySetFn.adoc[`[.kw]#functor# SplaySetFn`]:: + Provides a splay-tree implementation of the + xref:sig-ORD_SET.adoc[`ORD_SET`] signature parameterized over the element type. + +xref:str-TimeLimit.adoc[`[.kw]#structure# TimeLimit`]:: + Provides a mechanism for limiting the execution + time of a computation. + +xref:str-UnivariateStats.adoc[`[.kw]#structure# UnivariateStats`]:: + **To be written** + +xref:str-URef.adoc[`[.kw]#structure# URef`]:: + Provides mutable references with __Union-Find__ semantics. + +xref:str-UTF8.adoc[`[.kw]#structure# UTF8`]:: + Provides support for working with https://en.wikipedia.org/wiki/UTF-8[*UTF-8*] + encoded strings. + +// 59 + +== Usage + +By default, the *Util Library* is autoloaded by *CM*, which means that it is +immediately available to interactive use in the *SML/NJ* REPL. + +For https://smlnj.org[*SML/NJ*], include `$/smlnj-lib.cm` in your +*CM* file. + +For use in https://www.mlton.org/[*MLton*], include +`$(SML_LIB)/smlnj-lib/Util/smlnj-lib.mlb` in your *MLB* file. + +ifdef::backend-pdf[] + +// Push titles down one level. +:leveloffset: 1 + +include::str-ANSITerm.adoc[] + +include::str-ArrayQSort.adoc[] + +include::fun-ArrayQSortFn.adoc[] + +include::str-Atom.adoc[] + +include::str-Base64.adoc[] + +include::fun-BinaryMapFn.adoc[] + +include::fun-BinarySetFn.adoc[] + +include::str-BitArray.adoc[] + +include::fun-BSearchFn.adoc[] + +include::str-CharMap.adoc[] + +// 10 + +include::str-DynamicArray.adoc[] + +include::fun-DynamicArrayFn.adoc[] + +include::str-EditDistance.adoc[] + +include::str-Fifo.adoc[] + +include::str-FNVHash.adoc[] + +include::str-Format.adoc[] + +include::str-FormatComb.adoc[] + +include::str-GetOpt.adoc[] + +include::fun-GraphSCCFn.adoc[] + +include::fun-Hash2TableFn.adoc[] + +// 20 + +include::sig-HASH_KEY.adoc[] + +include::fun-HashSetFn.adoc[] + +include::str-HashString.adoc[] + +include::str-HashTable.adoc[] + +include::fun-HashTableFn.adoc[] + +include::sig-INTERVAL_DOMAIN.adoc[] + +include::fun-IntervalSetFn.adoc[] + +include::str-IOUtil.adoc[] + +include::fun-KeywordFn.adoc[] + +include::fun-LeftPriorityQFn.adoc[] + +// 30 + +include::str-LibBase.adoc[] + +include::str-ListFormat.adoc[] + +include::fun-ListMapFn.adoc[] + +include::str-ListMergeSort.adoc[] + +include::fun-ListSetFn.adoc[] + +include::str-ListXProd.adoc[] + +include::fun-MonoArrayFn.adoc[] + +include::sig-MONO_HASH_TABLE.adoc[] + +include::str-Native.adoc[] + +include::sig-ORD_KEY.adoc[] + +// 40 + +include::sig-ORD_MAP.adoc[] + +include::sig-ORD_SET.adoc[] + +include::str-ParserComb.adoc[] + +include::str-PathUtil.adoc[] + +include::str-PropList.adoc[] + +include::str-Queue.adoc[] + +include::str-Rand.adoc[] + +include::str-Random.adoc[] + +include::str-RealOrderStats.adoc[] + +include::fun-RedBlackMapFn.adoc[] + +// 50 + +include::fun-RedBlackSetFn.adoc[] + +include::str-Scan.adoc[] + +include::fun-SplayMapFn.adoc[] + +include::fun-SplaySetFn.adoc[] + +include::str-TimeLimit.adoc[] + +include::str-UnivariateStats.adoc[] + +include::str-URef.adoc[] + +include::str-UTF8.adoc[] + +// 58 -- note that `str-Native.adoc` is referenced twice by the HTML, +// but only once by the PDF + +// Return to normal title levels. +:leveloffset: 0 + +endif::[] diff --git a/smlnj-lib/Doc/src/Util/str-ANSITerm.adoc b/smlnj-lib/Doc/src/Util/str-ANSITerm.adoc new file mode 100644 index 0000000..e82d378 --- /dev/null +++ b/smlnj-lib/Doc/src/Util/str-ANSITerm.adoc @@ -0,0 +1,118 @@ += The `ANSITerm` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `ANSITerm` structure provides support for displaying stylized +text using the https://en.wikipedia.org/wiki/ANSI_escape_code[ANSI escape codes]. + +Note that currently this module only supports the limited palette of +eight fixed colors. + +== Synopsis + +[source,sml] +------------ +structure ANSITerm +------------ + +== Interface + +[source,sml] +------------ +datatype color + = Black | Red | Green | Yellow | Blue | Magenta | Cyan | White | Default + +datatype style + = FG of color + | BG of color + | BF + | DIM + | NORMAL + | UL + | UL_OFF + | BLINK + | BLINK_OFF + | REV + | REV_OFF + | INVIS + | INVIS_OFF + | RESET + +val toString : style list -> string + +val setStyle : TextIO.outstream * style list -> unit +------------ + +== Description + +`[.kw]#datatype# color = ...`:: + This datatype can be used to specify the eight standard colors, plus the default + color. + +`[.kw]#datatype# style = ...`:: + This datatype specifies the different styles that can be used when + displaying text. The styles are ++ +-- + `FG [.kw]#of# color`:: + specifies the foreground color of the text. + + `BG [.kw]#of# color`:: + specifies the background color of the text. + + `BF`:: + specifies bold or bright text (note that this does not cancel the effect + of `DIM`). + + `DIM`:: + specifies dim text (note that this does not cancel the effect + of `BF`). + + `NORMAL`:: + specifies normal color and intensity (cancels the effect of + `BF` and `DIM`). + + `UL`:: + enables underlining of the text. + + `UL_OFF`:: + cancels underlining. + + `BLINK`:: + enables blinking text. + + `BLINK_OFF`:: + cancels blinking mode. + + `REV`:: + reverses the foreground and background colors. + + `REV_OFF`:: + cancels reverse mode. + + `INVIS`:: + makes the text invisible. + + `INVIS_OFF`:: + cancels invisible mode. + + `RESET`:: + resets the style to the default mode. +-- + +`[.kw]#val# toString : style list \-> string`:: + `toString styles` returns a command string that will cause the terminal + to switch to the specified styles. Specifying the empty list is + equivalent to `[RESET]`. + +`[.kw]#val# setStyle : TextIO.outstream * style list \-> unit`:: + `setStyle (outS, styles)` sets the styles for the terminal connected + to the output stream `outS`. Specifying the empty list is + equivalent to `[RESET]`. + +== See Also + +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/str-ArrayQSort.adoc b/smlnj-lib/Doc/src/Util/str-ArrayQSort.adoc new file mode 100644 index 0000000..5316d7a --- /dev/null +++ b/smlnj-lib/Doc/src/Util/str-ArrayQSort.adoc @@ -0,0 +1,40 @@ += The `ArrayQSort` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `ArrayQSort` structure provides _in situ_ sorting of polymorphic arrays +using the quicksort algorithm. + +== Synopsis + +[source,sml] +------------ +signature ARRAY_SORT +structure ArrayQSort : ARRAY_SORT +------------ + +== Interface + +[source,sml] +------------ +val sort : ('a * 'a -> order) -> 'a array -> unit +val sorted : ('a * 'a -> order) -> 'a array -> bool +------------ + +== Description + +`[.kw]#val# sort : ('a * 'a \-> order) \-> 'a array \-> unit`:: + `sort cmp arr` sorts the array `arr` into ascending order + according to the comparison function `cmp`. + +`[.kw]#val# sorted : ('a * 'a \-> order) \-> 'a array \-> bool`:: + `sorted cmp arr` returns true if, and only if, the array `arr=` is + sorted in ascending order. + +== See Also + +xref:fun-ArrayQSortFn.adoc[`ArrayQSortFn`], +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/str-Atom.adoc b/smlnj-lib/Doc/src/Util/str-Atom.adoc new file mode 100644 index 0000000..8b62db8 --- /dev/null +++ b/smlnj-lib/Doc/src/Util/str-Atom.adoc @@ -0,0 +1,86 @@ += The `Atom` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `Atom` structure provides hashed strings that have fast +equality testing. The *Util Library* provides predefined +finite maps and sets of atoms, as well as hash tables keyed by atoms. + +== Synopsis + +[source,sml] +------------ +signature ATOM +structure Atom : ATOM +------------ + +== Interface + +[source,sml] +------------ +type atom + +val atom : string -> atom +val atom' : substring -> atom + +val toString : atom -> string + +val same : (atom * atom) -> bool +val sameAtom : (atom * atom) -> bool + +val compare : (atom * atom) -> order +val lexCompare : (atom * atom) -> order + +val hash : atom -> word +------------ + +== Description + +[[type:atom]] +`[.kw]#type# atom`:: + The abstract type of hashed strings that support fast equality testing. + +`[.kw]#val# atom : string \-> atom`:: + `atom s` returns the unique hashed representation of the string `s` as an atom. + +`[.kw]#val# atom' : substring \-> atom`:: + `atom ss` returns the unique hashed representation of the substring `ss` + as an atom. + +`[.kw]#val# toString : atom \-> string`:: + `toString atm` returns the string representation of the atom `atm`. + +[[val:same]] +`[.kw]#val# same : (atom * atom) \-> bool`:: + `same (atm1, atm2)` returns true if the two atoms are the same (_i.e._, their + string representations are equal). + +`[.kw]#val# compare : (atom * atom) \-> order`:: + `compare (atm1, atm2)` returns the relation of the two atoms in some + unspecified total order. Use xref:#val:lexCompare[`lexCompare`] to + compare atoms lexicographically. + +[[val:lexCompare]] +`[.kw]#val# lexCompare : (atom * atom) \-> order`:: + `compare (atm1, atm2)` returns the relation of the two atoms in lexical order. + +`[.kw]#val# hash : atom \-> word`:: + `hash atm` returns a hash key for the atom. + +=== Deprecated functions + +The following functions are part of the interface, but have been +deprecated. + +`[.kw]#val# sameAtom : (atom * atom) \-> bool`:: + Use xref:#val:same[`same`] instead. + +== See Also + +xref:sig-ORD_MAP.adoc#str:AtomMap[`AtomMap`], +xref:sig-ORD_SET.adoc#str:AtomSet[`AtomSet`], +xref:sig-MONO_HASH_TABLE.adoc#str:AtomTable[`AtomTable`], +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/str-Base64.adoc b/smlnj-lib/Doc/src/Util/str-Base64.adoc new file mode 100644 index 0000000..ba52a89 --- /dev/null +++ b/smlnj-lib/Doc/src/Util/str-Base64.adoc @@ -0,0 +1,95 @@ += The `Base64` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `Base64` structure provides support for *Base-64* encoding/decoding +as specified by https://www.ietf.org/rfc/rfc4648.txt[RFC 4648]. + +== Synopsis + +[source,sml] +------------ +signature BASE64 +structure Base64 : BASE64 +------------ + +== Interface + +[source,sml] +------------ +val isBase64 : char -> bool + +val encode : Word8Vector.vector -> string +val encodeSlice : Word8VectorSlice.slice -> string + +exception Incomplete + +exception Invalid of (int * char) + +val decode : string -> Word8Vector.vector +val decodeSlice : substring -> Word8Vector.vector + +val decodeStrict : string -> Word8Vector.vector +val decodeSliceStrict : substring -> Word8Vector.vector +------------ + +== Description + +`[.kw]#val# isBase64 : char \-> bool`:: + `isBase64 c` returns true if the character `c` is in the *Base-64* + alphabet (_i.e._, a letter, digit, or a slash or plus character). + Note that the padding character (the equals sign) is not considered + to be in the alphabet. + +`[.kw]#val# encode : Word8Vector.vector \-> string`:: + `encode bv` returns a string that is the *Base-64* encoding of + the byte vector `bv`. + +`[.kw]#val# encodeSlice : Word8VectorSlice.slice \-> string`:: + `encode bvs` returns a string that is the *Base-64* encoding of + the byte-vector slice `bvs`. + +[[exn:Incomplete]] +`[.kw]#exception# Incomplete`:: + This exception is raised by the decoding functions if a *Base-64* string + does not end in a complete encoding quantum (_i.e._, four characters including + padding characters). + +[[exn:Invalid]] +`[.kw]#exception# Invalid of (int * char)`:: + This exception is raised by the decoding functions if an invalid *Base-64* + character is encountered. The int is the position of the character and + the char is the invalid character. + +`[.kw]#val# decode : string \-> Word8Vector.vector`:: + `decode s` returns the result of decoding the *Base-64* string `s`. + This function ignores whitespace (_e.g._, line breaks), but + will raise the xref:#exn:Incomplete[`Incomplete`] exception if the + last quantum is incomplete. + +`[.kw]#val# decodeSlice : substring \-> Word8Vector.vector`:: + `decode ss` returns the result of decoding the *Base-64* substring `ss`. + This function ignores whitespace (_e.g._, line breaks), but + will raise the xref:#exn:Incomplete[`Incomplete`] exception if the + last quantum is incomplete. + +`[.kw]#val# decodeStrict : string \-> Word8Vector.vector`:: + `decodeStrict s` returns the result of decoding the *Base-64* string `s`. + The string `s` maust only contain valid *Base-64* characters, otherwise + the xref:#exn:Invalid[`Invalid`] exception is raised. This function + will also raise the xref:#exn:Incomplete[`Incomplete`] exception if the + last quantum is incomplete. + +`[.kw]#val# decodeSliceStrict : substring \-> Word8Vector.vector`:: + `decode ss` returns the result of decoding the *Base-64* substring `ss`. + The string `s` maust only contain valid *Base-64* characters, otherwise + the xref:#exn:Invalid[`Invalid`] exception is raised. This function + will also raise the xref:#exn:Incomplete[`Incomplete`] exception if the + last quantum is incomplete. + +== See Also + +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/str-BitArray.adoc b/smlnj-lib/Doc/src/Util/str-BitArray.adoc new file mode 100644 index 0000000..bcc5005 --- /dev/null +++ b/smlnj-lib/Doc/src/Util/str-BitArray.adoc @@ -0,0 +1,211 @@ += The `BitArray` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `BitArray` structure provides a implementation of +monomorphic arrays of booleans implemented one bit per +element. The `BitArray` structure extends the +{sml-basis-url}/mono-array.html[`MONO_ARRAY`] signature +with bit-level operations. + +== Synopsis + +[source,sml] +------------ +signature BIT_ARRAY +structure BitArray :> BIT_ARRAY +------------ + +== Interface + +[source,sml] +------------ +include MONO_ARRAY + +val fromString : string -> array option + +val bits : (int * int list) -> array + +val getBits : array -> int list + +val toString : array -> string + +val isZero : array -> bool + +val extend0 : (array * int) -> array +val extend1 : (array * int) -> array + +val eqBits : (array * array) -> bool +val equal : (array * array) -> bool + +val andb : (array * array * int) -> array +val orb : (array * array * int) -> array +val xorb : (array * array * int) -> array + +val notb : array -> array + +val << : (array * word) -> array +val >> : (array * word) -> array + +val setBit : (array * int) -> unit +val clrBit : (array * int) -> unit + +val union : array -> array -> unit +val intersection : array -> array -> unit + +val complement : array -> unit + +val lshift : (array * int) -> array +val rshift : (array * int) -> array +------------ + +== Description + +`[.kw]#include# {sml-basis-url}/mono-array.html[MONO_ARRAY]`:: + The `BIT_ARRAY` signature extends the + {sml-basis-url}/mono-array.html[`MONO_ARRAY`] signature from + the *Standard ML Basis Library*. Note that while the + `MONO_ARRAY` signature includes a corresponding monomorphic + `vector` type, there is currently no implementation of a + corresponding `BitVector` structure. + +`[.kw]#val# fromString : string \-> array option`:: + `fromString s` returns `SOME ba` when the string `s` is + a sequence of hexadecimal digits. The length of `ba` will + be `4*(length s)`. Returns `NONE` of a non-hexadecimal + character is encountered. + +`[.kw]#val# bits : (int * int list) \-> array`:: + `bits (n, ixs)` returns a new array `ba` of length `n`, where + `ba[ix]` is `true` for each `ix` in the list of indices `ixs`. + This function raises the + link:{sml-basis-url}/general.html#SIG:GENERAL.Size:EXN:SPEC[`Size`] + exception if `n < 0` and the + link:{sml-basis-url}/general.html#SIG:GENERAL.Subscript:EXN:SPEC[`Subscript`] + exception if any index is out of bounds. + +`[.kw]#val# getBits : array \-> int list`:: + `getBits ba` returns a list of indices `ix` for which `ba[ix]` is true + in increasing order. + +`[.kw]#val# toString : array \-> string`:: + `toString ba` returns a string representation of the array as a sequence + of hexadecimal digits in little-endian order (_i.e._, ba[0] is represented + as the high-order bit in the first digit). + +`[.kw]#val# isZero : array \-> bool`:: + `isZero ba` returns true if, and only if, no elements are `true` in `ba`. + +`[.kw]#val# extend0 : (array * int) \-> array`:: + `extend0 (ba, n)` returns a new array ``ba'`` that is `max(n, length ba)` + elements long, where the `length ba` elements of ``ba'`` are copied from + `ba` and the remaining elements are `false`. + This function raises the + link:{sml-basis-url}/general.html#SIG:GENERAL.Size:EXN:SPEC[`Size`] + exception if `n < 0`. + +`[.kw]#val# extend1 : (array * int) \-> array`:: + `extend1 (ba, n)` returns a new array ``ba'`` that is `max(n, length ba)` + elements long, where the `length ba` elements of ``ba'`` are copied from + `ba` and the remaining elements are `true`. + This function raises the + link:{sml-basis-url}/general.html#SIG:GENERAL.Size:EXN:SPEC[`Size`] + exception if `n < 0`. + +`[.kw]#val# eqBits : (array * array) \-> bool`:: + `eqBits (ba1, ba2)` returns true if the two arrays have + the same `true` entries. In other words, the following identity + holds ++ +[source,sml] +------------ +eqBits(ba1, ba2) = (getBits ba1 = getBits ba2) +------------ + +`[.kw]#val# equal : (array * array) \-> bool`:: + `equal (ba1, ba2)` returns true if the two arrays are the same length + and have the same elements. + +`[.kw]#val# andb : (array * array * int) \-> array`:: + `andb (ba1, ba2, n)` returns a new array `ba` of length `n`, + where the element `ba[ix]` is the logical *AND* of `ba1[ix]` + and `ba2[ix]`, where the inputs are extended with `false` as + necessary. + This function raises the + link:{sml-basis-url}/general.html#SIG:GENERAL.Size:EXN:SPEC[`Size`] + exception if `n < 0`. + +`[.kw]#val# orb : (array * array * int) \-> array`:: + `orb (ba1, ba2, n)` returns a new array `ba` of length `n`, + where the element `ba[ix]` is the logical *OR* of `ba1[ix]` + and `ba2[ix]`, where the inputs are extended with `false` as + necessary. + This function raises the + link:{sml-basis-url}/general.html#SIG:GENERAL.Size:EXN:SPEC[`Size`] + exception if `n < 0`. + +`[.kw]#val# xorb : (array * array * int) \-> array`:: + `xorb (ba1, ba2, n)` returns a new array `ba` of length `n`, + where the element `ba[ix]` is the logical *XOR* of `ba1[ix]` + and `ba2[ix]`, where the inputs are extended with `false` as + necessary. + This function raises the + link:{sml-basis-url}/general.html#SIG:GENERAL.Size:EXN:SPEC[`Size`] + exception if `n < 0`. + +`[.kw]#val# notb : array \-> array`:: + `notb ba` returns a new array of the same length as `ba` with + the elements negated. + +[[val:_lt_lt_]] +`[.kw]#val# << : (array * word) \-> array`:: + `<< (ba, n)`returns a new array by appending `n` `false` + elements on the end of `ba`. The new array will have + length equal to `n + length ba`. + +[[val:_gt_gt_]] +`[.kw]#val# >> : (array * word) \-> array`:: + `>> (ba, n)`returns a new array by trimming `n` elements + from the "right" of `ba`. The new array will have + `max(0, length ba - n)` elements. + +`[.kw]#val# setBit : (array * int) \-> unit`:: + `setBit (ba, ix)` sets the element of `ba` at index `ix` to `true`. + This function raises the + link:{sml-basis-url}/general.html#SIG:GENERAL.Subscript:EXN:SPEC[`Subscript`] + exception if `ix` is out of bounds. + +`[.kw]#val# clrBit : (array * int) \-> unit`:: + `setBit (ba, ix)` sets the element of `ba` at index `ix` to `false`. + This function raises the + link:{sml-basis-url}/general.html#SIG:GENERAL.Subscript:EXN:SPEC[`Subscript`] + exception if `ix` is out of bounds. + +`[.kw]#val# union : array \-> array \-> unit`:: + `union ba1 ba2` updates `ba1` by setting each element `ba1[ix]` to + the logical *OR* of `ba1[ix]` and `ba2[ix]`, where `ba2[ix]` is + extended with `false` elements as necessary to match the length of `ba1`. + +`[.kw]#val# intersection : array \-> array \-> unit`:: + `intersection ba1 ba2` updates `ba1` by setting each element `ba1[ix]` to + the logical *AND* of `ba1[ix]` and `ba2[ix]`, where `ba2[ix]` is + extended with `false` elements as necessary to match the length of `ba1`. + +`[.kw]#val# complement : array \-> unit`:: + `complement ba` logically negates all of the elements of `ba`. + +=== Deprecated Functions + +`[.kw]#val# lshift : (array * int) \-> array`:: + Use xref:#val:_lt_lt_[`<<`] instead. + +`[.kw]#val# rshift : (array * int) \-> array`:: + Use xref:#val:_gt_gt_[`>>`] instead. + +== See Also + +{sml-basis-url}/mono-array.html[`MONO_ARRAY`], +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/str-CharMap.adoc b/smlnj-lib/Doc/src/Util/str-CharMap.adoc new file mode 100644 index 0000000..a6de802 --- /dev/null +++ b/smlnj-lib/Doc/src/Util/str-CharMap.adoc @@ -0,0 +1,68 @@ += The `CharMap` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `CharMap` structure provides fast, read-only, maps from 8-bit characters +to values. + +== Synopsis + +[source,sml] +------------ +signature CHAR_MAP +structure CharMap :> CHAR_MAP +------------ + +== Interface + +[source,sml] +------------ +type 'a char_map + +val mkCharMap : {default : 'a, bindings : (string * 'a) list} -> 'a char_map + +val mapChr : 'a char_map -> char -> 'a +val mapStrChr : 'a char_map -> (string * int) -> 'a +------------ + +== Description + +`[.kw]#type# 'a char_map`:: + The type of a mapping from 8-bit characters to ``'a``. + +`[.kw]#val# mkCharMap : {default : 'a, bindings : (string * 'a) list} \-> 'a char_map`:: + `mkCharMap {default, bindings}` creates a new character map. For each + item `(s, v)` in the `bindings` list, the characters in `s` are mapped to + the value `v`. If a character is bound multiple times, then the rightmost + binding is used. Characters not covered by a binding are mapped to the + default value. For example, the following code creates a mapping that classifies + characters into lower and upper case letter, digits, and other characters: ++ +[source,sml] +------------ +datatype char_kind = LOWER | UPPER | DIGIT | OTHER + +val cmap = mkCharMap { + default = OTHER, + bindings = [ + ("abcdefghijklmnopqrstuvwxyz", LOWER), + ("ABCDEFGHIJKLMNOPQRSTUVWXYZ", UPPER), + ("0123456789", DIGIT) + ] + } +------------ + +`[.kw]#val# mapChr : 'a char_map \-> char \-> 'a`:: + `mapChr cmap c` applies the map to the character. + +`[.kw]#val# mapStrChr : 'a char_map \-> (string * int) \-> 'a`:: + `mapStrChr cmap (s, i)` applies the map to the `i`th character in `s`. + The {sml-basis-url}/general.html#SIG:GENERAL.Subscript:EXN:SPEC[`Subscript`] + exception is raised if `i` is out of bounds. + +== See Also + +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/str-DynamicArray.adoc b/smlnj-lib/Doc/src/Util/str-DynamicArray.adoc new file mode 100644 index 0000000..d89d2a1 --- /dev/null +++ b/smlnj-lib/Doc/src/Util/str-DynamicArray.adoc @@ -0,0 +1,216 @@ += The `DynamicArray` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `DynamicArray` structure provides dynamically sized polymorphic +arrays. Each array has an associated _default_ value, which is +covers those elements that have not been explicitly initialized +(conceptually, one can view an array as having an infinite size). +Thus, reads from indices above the bound will return the default value. +The __bound__ of an array is the highest index of an initialized +element (or `~1` if there are no initialized elements). The +__defined range__ of the array are the elements in the positions +indexed from zero to the bound. + +== Synopsis + +[source,sml] +------------ +structure DynamicArray +------------ + +== Interface + +[source,sml] +------------ +type 'a array + +val array : (int * 'a) -> 'a array + +val subArray : ('a array * int * int) -> 'a array + +val fromList : 'a list * 'a -> 'a array +val fromVector : 'a vector * 'a -> 'a array + +val toList : 'a array -> 'a list +val toVector : 'a array -> 'a vector + +val tabulate: (int * (int -> 'a) * 'a) -> 'a array + +val default : 'a array -> 'a + +val sub : ('a array * int) -> 'a + +val update : ('a array * int * 'a) -> unit + +val bound : 'a array -> int + +val truncate : ('a array * int) -> unit + +val appi : (int * 'a -> unit) -> 'a array -> unit +val app : ('a -> unit) -> 'a array -> unit +val modifyi : (int * 'a -> 'a) -> 'a array -> unit +val modify : ('a -> 'a) -> 'a array -> unit +val foldli : (int * 'a * 'b -> 'b) -> 'b -> 'a array -> 'b +val foldri : (int * 'a * 'b -> 'b) -> 'b -> 'a array -> 'b +val foldl : ('a * 'b -> 'b) -> 'b -> 'a array -> 'b +val foldr : ('a * 'b -> 'b) -> 'b -> 'a array -> 'b +val findi : (int * 'a -> bool) -> 'a array -> (int * 'a) option +val find : ('a -> bool) -> 'a array -> 'a option +val exists : ('a -> bool) -> 'a array -> bool +val all : ('a -> bool) -> 'a array -> bool +val collate : ('a * 'a -> order) -> 'a array * 'a array -> order + +val vector : 'a array -> 'a vector +------------ + +== Description + +`[.kw]#type# 'a array`:: + The polymorphic type of dynamic arrays. Each array has a _default value_ + and a _bound_, where the _bound_ is the largest index for which an explicit + value has been set. Reads from indices above the bound will return the + default value. + +`[.kw]#val# array : (int * 'a) \-> 'a array`:: + `array (sz, dflt)` returns a new array with bound `~1` and default + value `dflt`. The `sz` argument, which must be non-negative, is used + as a hint of the potential range of indices. This function raises + the {sml-basis-url}/general.html#SIG:GENERAL.Size:EXN:SPEC[`Size`] + exception if `sz < 0`. + +`[.kw]#val# subArray : ('a array * int * int) \-> 'a array`:: + `subArray (arr, lo, hi)` returns a new array with the same default + as `arr`, and whose values in the range `[0, hi-lo]` are equal to + the values in `arr` in the range `[lo, hi]`. This function raises + the {sml-basis-url}/general.html#SIG:GENERAL.Size:EXN:SPEC[`Size`] + exception if `lo < 0` or `hi < lo-1`. + +`[.kw]#val# fromList : 'a list * 'a \-> 'a array`:: + `fromList (lst, dflt)` returns a new array created from the elements + of `lst` and with default value `dflt`. The bound of the array will be + `length lst - 1`. + +`[.kw]#val# fromVector : 'a Vector.vector * 'a \-> 'a array`:: + `fromVector (vec, dflt)` returns a new array created from the elements + of `vec` and with default value `dflt`. The bound of the array will be + `length vec - 1`. + +`[.kw]#val# toList : 'a array \-> 'a list`:: + `toList arr` returns a list of the array's contents. The resulting + list will have the array's bound plus one elements. + +[[val:toVector]] +`[.kw]#val# toVector : 'a array \-> 'a vector`:: + `toVector arr` returns a list of the array's contents. The resulting + vector will have the array's bound plus one elements. + +`[.kw]#val# tabulate: (int * (int \-> 'a) * 'a) \-> 'a array`:: + `tabulate (sz, init, dflt)` returns a new array with the first + `sz` elements initialized using the function `init` and the + default value `dflt`. This function raises the + {sml-basis-url}/general.html#SIG:GENERAL.Size:EXN:SPEC[`Size`] + exception if `sz < 0`. + +`[.kw]#val# default : 'a array \-> 'a`:: + `default arr` returns the default value for the array. + +`[.kw]#val# sub : ('a array * int) \-> 'a`:: + `sub (arr, ix)` returns the value of the array at index `ix`. + If that value has not been explicitly set, then it returns the array's + default value. This function raises the + {sml-basis-url}/general.html#SIG:GENERAL.Subscript:EXN:SPEC[`Subscript`] + exception if `ix < 0`. + +`[.kw]#val# update : ('a array * int * 'a) \-> unit`:: + `update (arr, ix, v)` sets the value at index `ix` of the array to `v`. + If `ix` is greater than the current bound of the array, then the bound + is set to `ix`. This function raises the + {sml-basis-url}/general.html#SIG:GENERAL.Subscript:EXN:SPEC[`Subscript`] + exception if `ix < 0`. + +`[.kw]#val# bound : 'a array \-> int`:: + `bound arr` returns the current bound of the array, which is the highest + index that has been explicitly set (__e.g.__, by `update`). + +`[.kw]#val# truncate : ('a array * int) \-> unit`:: + `truncate (arr, sz)` sets every entry with index greater or equal to + `sz` to the array's default value. + +`[.kw]#val# appi : (int * 'a \-> unit) \-> 'a array \-> unit`:: + `appi f arr` behaves like the + {sml-basis-url}/mono-array.html#SIG:ARRAY.appi:VAL[`Array.appi`] + function on the defined range of `arr`. + +`[.kw]#val# app : ('a \-> unit) \-> 'a array \-> unit`:: + `app f arr` behaves like the + {sml-basis-url}/mono-array.html#SIG:ARRAY.app:VAL[`Array.app`] + function on the defined range of `arr`. + +`[.kw]#val# modifyi : (int * 'a \-> 'a) \-> 'a array \-> unit`:: + `modifyi f arr` behaves like the + {sml-basis-url}/mono-array.html#SIG:ARRAY.modifyi:VAL[`Array.modifyi`] + function on the defined range of `arr`. + +`[.kw]#val# modify : ('a \-> 'a) \-> 'a array \-> unit`:: + `modify f arr` behaves like the + {sml-basis-url}/mono-array.html#SIG:ARRAY.modify:VAL[`Array.modify`] + function on the defined range of `arr`. + +`[.kw]#val# foldli : (int * 'a * 'b \-> 'b) \-> 'b \-> 'a array \-> 'b`:: + `foldli f init arr` behaves like the + {sml-basis-url}/mono-array.html#SIG:ARRAY.foldli:VAL[`Array.foldli`] + function on the defined range of `arr`. + +`[.kw]#val# foldri : (int * 'a * 'b \-> 'b) \-> 'b \-> 'a array \-> 'b`:: + `foldri f init arr` behaves like the + {sml-basis-url}/mono-array.html#SIG:ARRAY.foldri:VAL[`Array.foldri`] + function on the defined range of `arr`. + +`[.kw]#val# foldl : ('a * 'b \-> 'b) \-> 'b \-> 'a array \-> 'b`:: + `foldl f init arr` behaves like the + {sml-basis-url}/mono-array.html#SIG:ARRAY.foldl:VAL[`Array.foldl`] + function on the defined range of `arr`. + +`[.kw]#val# foldr : ('a * 'b \-> 'b) \-> 'b \-> 'a array \-> 'b`:: + `foldr f init arr` behaves like the + {sml-basis-url}/mono-array.html#SIG:ARRAY.foldr:VAL[`Array.foldr`] + function on the defined range of `arr`. + +`[.kw]#val# findi : (int * 'a \-> bool) \-> 'a array \-> (int * 'a) option`:: + `findi f arr` behaves like the + {sml-basis-url}/mono-array.html#SIG:ARRAY.findi:VAL[`Array.findi`] + function on the defined range of `arr`. + +`[.kw]#val# find : ('a \-> bool) \-> 'a array \-> 'a option`:: + `find f arr` behaves like the + {sml-basis-url}/mono-array.html#SIG:ARRAY.find:VAL[`Array.find`] + function on the defined range of `arr`. + +`[.kw]#val# exists : ('a \-> bool) \-> 'a array \-> bool`:: + `exists f arr` behaves like the + {sml-basis-url}/mono-array.html#SIG:ARRAY.exists:VAL[`Array.exists`] + function on the defined range of `arr`. + +`[.kw]#val# all : ('a \-> bool) \-> 'a array \-> bool`:: + `all f arr` behaves like the + {sml-basis-url}/mono-array.html#SIG:ARRAY.all:VAL[`Array.all`] + function on the defined range of `arr`. + +`[.kw]#val# collate : ('a * 'a \-> order) \-> 'a array * 'a array \-> order`:: + `collate cmp (arr1, arr2)` return the lexicographic order of the defined + ranges of the two arrays using the given comparison `cmp` on elements. + +=== Deprecated functions + +`[.kw]#val# vector : 'a array \-> 'a vector`:: + Use xref:#val:toVector[`toVector`] instead. + +== See Also + +xref:fun-DynamicArrayFn.adoc[`DynamicArrayFn`], +{sml-basis-url}/array.html[`Array`], +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/str-EditDistance.adoc b/smlnj-lib/Doc/src/Util/str-EditDistance.adoc new file mode 100644 index 0000000..ab06182 --- /dev/null +++ b/smlnj-lib/Doc/src/Util/str-EditDistance.adoc @@ -0,0 +1,37 @@ += The `EditDistance` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `EditDistance` structure computes the "optimal string alignment" +(or Levenshtein) distance between two strings. + +== Synopsis + +[source,sml] +------------ +structure EditDistance +------------ + +== Interface + +[source,sml] +------------ +val distance : string * string -> int +------------ + +== Description + +`[.kw]#val# distance : string * string \-> int`:: + `distance (s1, s2)` returns the number of edit operations required to make the + two strings equal. Edit operations include deleting a character, inserting a + character, replacing a character with another, and swapping two adjacent + characters. + +== See Also + +xref:smlnj-lib.adoc[__The Util Library__], +https://en.wikipedia.org/wiki/Damerau–Levenshtein_distance[Wikipedia article on +Damerau-Levenshtein distance] \ No newline at end of file diff --git a/smlnj-lib/Doc/src/Util/str-FNVHash.adoc b/smlnj-lib/Doc/src/Util/str-FNVHash.adoc new file mode 100644 index 0000000..e5b1f4e --- /dev/null +++ b/smlnj-lib/Doc/src/Util/str-FNVHash.adoc @@ -0,0 +1,67 @@ += The `FNVHash` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `FNVHash` structure provides an implementation of the +https://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function[__Fowler-Noll-Vo__] +(FNV) hashing algorithm (specifically the 64-bit FNV-1a algorithm). + +== Synopsis + +[source,sml] +------------ +structure FNVHash +------------ + +== Interface + +[source,sml] +------------ +val offsetBasis : Word64.word + +val hashByte : Word8.word * Word64.word -> Word64.word +val hashChar : char * Word64.word -> Word64.word + +val hashString : string -> word +val hashSubstring : substring -> word +------------ + +== Description + +`[.kw]#val# offsetBasis : Word64.word`:: + The `offsetBasis` should be used as the initial value when + using `hashByte` and/or `hashChar` to incrementally hash a + data value. + +`[.kw]#val# hashByte : Word8.word * Word64.word \-> Word64.word`:: + `hashByte (b, h)` computes one step of the FNV hashing algorithm + for byte `b` and initial hash value `h`. + +`[.kw]#val# hashChar : char * Word64.word \-> Word64.word`:: + `hashByte (c, h)` computes one step of the FNV hashing algorithm + for character `c` and initial hash value `h`. + +`[.kw]#val# hashString : string \-> word`:: + `hashString s` returns the hash of the given string. It is equivalent to + the expression ++ +[source,sml] +------------ +CharVector.foldl hashChar offsetBasis s +------------ + +`[.kw]#val# hashSubstring : substring \-> word`:: + `hashSubstring ss` returns the hash of the given substring. It is equivalent to + the expression ++ +[source,sml] +------------ +Substring.foldl hashChar offsetBasis ss +------------ + +== See Also + +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/str-Fifo.adoc b/smlnj-lib/Doc/src/Util/str-Fifo.adoc new file mode 100644 index 0000000..77ac0ca --- /dev/null +++ b/smlnj-lib/Doc/src/Util/str-Fifo.adoc @@ -0,0 +1,127 @@ += The `Fifo` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `Fifo` structure provides a functional queue data structure, +which are implemented as a pair of stacks (lists) representing the +front and rear of the queue. **Single-threaded** enqueuing and +dequeuing operations will have amortized constant time. + +== Synopsis + +[source,sml] +------------ +signature FIFO +structure Fifo :> FIFO +------------ + +== Interface + +[source,sml] +------------ +type 'a fifo + +exception Dequeue + +val empty : 'a fifo +val isEmpty : 'a fifo -> bool +val enqueue : 'a fifo * 'a -> 'a fifo +val dequeue : 'a fifo -> 'a fifo * 'a +val next : 'a fifo -> ('a * 'a fifo) option +val delete : ('a fifo * ('a -> bool)) -> 'a fifo +val head : 'a fifo -> 'a +val peek : 'a fifo -> 'a option +val length : 'a fifo -> int +val contents : 'a fifo -> 'a list +val app : ('a -> unit) -> 'a fifo -> unit +val map : ('a -> 'b) -> 'a fifo -> 'b fifo +val foldl : ('a * 'b -> 'b) -> 'b -> 'a fifo -> 'b +val foldr : ('a * 'b -> 'b) -> 'b -> 'a fifo -> 'b +------------ + +== Description + +`[.kw]#type# 'a fifo`:: + The type constructor for functional queues. + +[[exn:Dequeue]] +`[.kw]#exception# Dequeue`:: + This exceptions is raised when the `dequeue` function is + applied to an empty queue. + +`[.kw]#val# empty : 'a fifo`:: + The empty queue. + +`[.kw]#val# isEmpty : 'a fifo \-> bool`:: + `ifEmpty q` returns true if the queue is empty. + +`[.kw]#val# enqueue : 'a fifo * 'a \-> 'a fifo`:: + `enqueue (q, x)` returns a queue with `x` added to the end. + +`[.kw]#val# dequeue : 'a fifo \-> 'a fifo * 'a`:: + `dequeue q` returns a pair `(q', x)`, where `x` was the first + element in `q` and ``q'`` is the queue with `x` removed. + This function raises the xref:#exn:Dequeue[`Dequeue`] exception + if it is called on an empty queue. + +`[.kw]#val# next : 'a fifo \-> ('a * 'a fifo) option`:: + `next q` returns `SOME(q', x)`, where `x` was the first + element in `q` and ``q'`` is the queue with `x` removed, or + `NONE` if `q` is empty. + +`[.kw]#val# delete : ('a fifo * ('a \-> bool)) \-> 'a fifo`:: + `delete (q, pred)` removes those items from `q` for which the + function `pred` returns `true` and returns the resulting queue. + +`[.kw]#val# head : 'a fifo \-> 'a`:: + `head q` returns the first element of `q` or raises the exception + xref:#exn:Dequeue[`Dequeue`] if `q` is empty. + +`[.kw]#val# peek : 'a fifo \-> 'a option`:: + `peek q` returns `SOME x`, where `x` is the first element of `q`, + or `NONE` if `q` is empty. + +`[.kw]#val# length : 'a fifo \-> int`:: + `length q` returns the number of elements in the queue. + +`[.kw]#val# contents : 'a fifo \-> 'a list`:: + `contents q` returns the contents of `q` as a list. + +`[.kw]#val# app : ('a \-> unit) \-> 'a fifo \-> unit`:: + `app f q` applies the function `f` to the elements of `q`. + This expression is equivalent to ++ +[source,sml] +------------ +List.app f (contents q) +------------ + +`[.kw]#val# map : ('a \-> 'b) \-> 'a fifo \-> 'b fifo`:: + `map f q` returns the queue that results from mapping + the function `f` across the elements of the queue. + +`[.kw]#val# foldl : ('a * 'b \-> 'b) \-> 'b \-> 'a fifo \-> 'b`:: + `foldl f init q` folds the function `f` over the elements of `q` from + front to back. This expression is equivalent to ++ +[source,sml] +------------ +List.foldl f init (contents q) +------------ + +`[.kw]#val# foldr : ('a * 'b \-> 'b) \-> 'b \-> 'a fifo \-> 'b`:: + `foldr f init q` folds the function `f` over the elements of `q` from + back to front. This expression is equivalent to ++ +[source,sml] +------------ +List.foldr f init (contents q) +------------ + +== See Also + +xref:str-Queue.adoc[`Queue`], +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/str-Format.adoc b/smlnj-lib/Doc/src/Util/str-Format.adoc new file mode 100644 index 0000000..7c6b1b8 --- /dev/null +++ b/smlnj-lib/Doc/src/Util/str-Format.adoc @@ -0,0 +1,222 @@ += The `Format` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `Format` structure provides `printf`-style string formatting. +The syntax of format strings is a subset of that +supported by the *C* `printf` function. + +== Synopsis + +[source,sml] +------------ +structure Format +------------ + +== Interface + +[source,sml] +------------ +datatype fmt_item + = ATOM of Atom.atom + | LINT of LargeInt.int + | INT of Int.int + | LWORD of LargeWord.word + | WORD of Word.word + | WORD8 of Word8.word + | BOOL of bool + | CHR of char + | STR of string + | REAL of Real.real + | LREAL of LargeReal.real + | LEFT of (int * fmt_item) + | RIGHT of (int * fmt_item) + +exception BadFormat +exception BadFmtList + +val format : string -> fmt_item list -> string +val formatf : string -> (string -> unit) -> fmt_item list -> unit +------------ + +== Description + +[[type:fmt_item]] +`[.kw]#datatype# fmt_item = ...`:: + The `fmt_item` datatype is a union of the types that the `format` function + supports. The constructors are interpreted as follows: ++ +-- + `ATOM atm`:: specifies an atom `atm` to convert (the conversion + specifier must be "`s`"). + + `LINT n`:: specifies a large integer value `n` to convert + (the conversion specifier must be one of "`d`", "`o`", "`x`", or "`X`"). + + `INT n`:: specifies a default integer value `n` to convert + (the conversion specifier must be one of "`d`", "`o`", "`x`", or "`X`"). + + `LWORD w`:: specifies a large word value `w` to convert + (the conversion specifier must be one of "`d`", "`o`", "`x`", or "`X`"). + + `WORD w`:: specifies a default word value `w` to convert + (the conversion specifier must be one of "`d`", "`o`", "`x`", or "`X`"). + + `WORD8 w`:: specifies an 8-bit word value `w` to convert + (the conversion specifier must be one of "`d`", "`o`", "`x`", or "`X`"). + + `BOOL b`:: specifies a Boolean value `b` to convert + (the conversion specifier must be "`b`"). + + `CHR c`:: specifies a character value + (the conversion specifier must be "`c`"). + + `STR s`:: specifies a string value `s` to convert + (the conversion specifier must be "`s`"). The conversion is the + identity; _e.g._, `STR "\n"` will produce a newline in the result + string. + + `REAL r`:: specifies a default real value `r` to convert + (the conversion specifier must be one of "`e`", "`E`", "`f`", + "`F`", "`g`", or "`G`"). + + `LREAL r`:: specifies a large real value `r` to convert + (the conversion specifier must be one of "`e`", "`E`", "`f`", + "`F`", "`g`", or "`G`"). + + `LEFT(wid, item)`:: specifies a left-padded (right-justified) + conversion, where the result of formatting `item` + is padded on the left with spaces to the width specified + by `wid`. Note that the padding occurs _after_ `item` + formatted, so it can be combined with width specifiers and + zero padding. + + `RIGHT(wid, item)`:: specifies a right-padded (left-justified) + conversion, where the result of formatting `item` + is padded on the right with spaces to the width specified + by `wid`. Note that the padding occurs _after_ `item` + formatted, so it can be combined with width specifiers and + zero padding. +-- + +`[.kw]#exception# BadFormat`:: + This exception is raised when either `format` or `formatf` is applied + to an ill-formed format string. + +`[.kw]#exception# BadFmtList`:: + This exception is raised when there is a mismatch in either + number or type between the format string and the list of items. + +`[.kw]#val# format : string \-> fmt_item list \-> string`:: + `format fmt` returns a function for formating a list of format items as + a string by converting the list of items according to the format + string `fmt`. If the format string is ill formed, the `BadFormat` + exception will be raised. Likewise, if there is a mismatch between + the conversion specifiers in the format string and the list of items, + then the `BadFmtList` exception is raised. + +`[.kw]#val# formatf : string \-> (string \-> unit) \-> fmt_item list \-> unit`:: + `format fmt consumer items` is equivalent to the expression ++ +[source,sml] +------------ +consumer (format fmt items) +------------ + +== Format Strings + +The `format and `formatf` functions take a format string and a list of +format items as arguments. The format string is composed of zero or more +directives, which are either ordinary characters (excluding `%`), which are +copied to the result, or conversion specifiers, which are used to convert +the corresponding format items to strings that are then added to the result. + +Conversion specifiers begin with the percent (`%`) character followed by +the following in sequence: + +* Zero or more of the following single-character flags. Note that these + only apply to the numeric conversion specifiers. + +** A "++ ++" (space), which means that a space character is used as the + sign for positive numbers. This flag is incompatible with the + "`+`" flag. + +** A "`+`", which means that a `+` character is used as the sign for positive + numbers. This flag is incompatible with the "` `" flag. + +** A "`~`", which means that the tilde character is used as the sign for negative + numbers (_i.e._, *SML* syntax). + +** A "`0`", which means that the zero character should be used to pad the number + (on the left) to the requested width. + +** A "`-`", which means that the minus character is used as the sign for negative + numbers, which is the default behavior. Note that this interpretation + of the "`-`" flag differs from the *C* `printf` function, where it is + used to specify left justification; use the `LEFT` constructor for that + purpose. + +** A "`#`", which means that a _base_ specifier should be prepended to + the representation of the number. + +* an optional decimal number specifying a minimum field width. If the + converted value has fewer characters than the field width, it will be padded + on the left with spaces (or zeros, when zero-padding has been specified). + +* An optional precision, in the form of a period "`.`" followed by an optional + decimal number. If the number is omitted, the precision is taken as zero. + The precision specifies the the number of digits to appear after the + decimal-point for "`a`", "`A`", "`e`", "`E`", "`f`", and "`F`" conversions, + the maximum number of significant digits for "`g`" and "`G`" conversions, and + the maximum number of characters for the "`s`" conversion. + +* The conversion-specifier character, which must match the corresponding + format item. The conversion character is one of the following: + +** A "`d`", which specifies the conversion of an integer (`INT` or `LINT`) + or word (`WORD`, `LWORD`, or `WORD8`) item to its decimal representation. + +** An "`o`", which specifies the conversion of an integer (`INT` or `LINT`) + or word (`WORD`, `LWORD`, or `WORD8`) item to its octal representation. If the + "`#`" flag was specifies, then a leading `"0"` is prepended to the result. + +** An "`x`" or "`X`", which specifies the conversion of an integer + (`INT` or `LINT`) or word (`WORD`, `LWORD`, or `WORD8`) item to its hexadecimal + representation. The digits are lower-case for "`x`" and upper-case + for "`X`". If the "`#`" flag was specifies, then a leading "`0x`" (or "`0X`") + is prepended to the result. + +** An "`e`" or "`E`", which specifies the conversion of a real (`REAL` or + `LREAL`) item to the format ``__s__ __d__ *.* __ddd__ *e* __s__ __dd__``, + where there is one digit before the decimal-point character and the + number of digits after the decimal-point is equal to the precision. + (The "``*e*``" is replaced by "``*E*``" for the "`E`" conversion specifier.) + If the precision is missing, it defaults to six and if the precision is + zero, no decimal-point character appears. The signs (``__s__``) + of the number and exponent are displayed as specified by the flags. + +** A "`f`" or "`F`", which specifies the conversion of a real (`REAL` or + `LREAL`) item to the format ``__s__ __ddd__ *.* __ddd__``, where the + number of digits after the decimal-point is equal to the precision + specification (or six if not specified). + +** A "`g`" or "`G`", which specifies the conversion of a real (`REAL` or + `LREAL`) item to either the format specified by "`e`" or "`f`" + (or "`E`" or "`F`" in the case of "`G`"). + +** A "`b`", which specifies the conversion of a boolean (`BOOL`) item, + which will be displayed as either "`true`" or "`false`." + +** A "`c`", which specifies the identity conversion of a character (`CHAR`) item. + +** A "`s`", which specifies the identity conversion of a string (`STR`) or + atom (`ATOM`) item. + +== See Also + +xref:str-FormatComb.adoc[`FormatComb`], +xref:str-Scan.adoc[`Scan`], +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/str-FormatComb.adoc b/smlnj-lib/Doc/src/Util/str-FormatComb.adoc new file mode 100644 index 0000000..90093d7 --- /dev/null +++ b/smlnj-lib/Doc/src/Util/str-FormatComb.adoc @@ -0,0 +1,323 @@ += The `FormatComb` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `FormatComb` structure provides well-typed formating (or _unparsing_) +combinators in the style of Olivier Danvy's +https://www.brics.dk/RS/98/12/[__Functional Unparsing__] work. + +The idea is to use combinators for constructing something akin to +the format string of *C*'s `printf` function. The difference is, however, +that our formats are not strings. Instead, format fragments have +meaningful types, and passing them to the `format` function results +in a curried function whose arguments have precisely the types that +correspond to argument-consuming parts of the format. (Such +argument-consuming parts are similar to the conversion-specifications +the xref:str-Format.adoc[`Format`] structure.) + +There is an underlying notion of "abstract formats" of type `'a format`, +but the user operates at the level of "format fragments," which +have type `('a, 'b) fragment` and are typically polymorphic +in ``'a`` (where ``'b`` is instantiated to some type containing ``'a``). +Fragments are functions from formats to formats and can be composed +freely using the infix function-composition operator (`o`). This +form of format composition translates to a corresponding concatenation +of the resulting output. + +Fragments are composed from two kids of primitve fragments called +_elements_ and _glue_, respectively. An element is a fragment that +consumes some argument (which thanks to the typing magic appears as a +curried argument when the format gets executed). Glue are fragments +that do not consume arguments but merely insert fixed text (fixed +at format construction time) into the output. + +There are also adjustment operations that pad, trim, or fit the output +of entire fragments (primitive or not) to a given size. + +Matthias Blume wrote the code for this module. + +== Synopsis + +[source,sml] +------------ +signature FORMAT_COMB +structure FormatComb : FORMAT_COMB +------------ + +== Interface + +[source,sml] +------------ +type 'a format +type ('a, 'b) fragment = 'a format -> 'b format + +type 'a glue = ('a, 'a) fragment +type ('a, 't) element = ('a, 't -> 'a) fragment +type 'a gg + +val format : (string, 'a) fragment -> 'a + +val format' : (string list -> 'b) -> ('b, 'a) fragment -> 'a + +val using : ('t -> string) -> ('a, 't) element + +val int : ('a, int) element +val real : ('a, real) element +val bool : ('a, bool) element +val string : ('a, string) element +val string' : ('a, string) element +val char : ('a, char) element +val char' : ('a, char) element + +val int' : StringCvt.radix -> ('a, int) element +val real' : StringCvt.realfmt -> ('a, real) element + +val list : ('a, 'x) element -> ('a, 'x list) element +val option : ('a, 'x) element -> ('a, 'x option) element +val seq : (('x * 'a gg -> 'a gg) -> 'a gg -> 's -> 'a gg) + -> 'a glue + -> ('a, 'x) element + -> ('a, 's) element + +val glue : ('a, 't) element -> 't -> 'a glue + +val elem : ('t -> 'a glue) -> ('a, 't) element + +val nothing : 'a glue +val text : string -> 'a glue +val sp : int -> 'a glue +val nl : 'a glue +val tab : 'a glue + +val listg : ('t -> 'a glue) -> ('t list -> 'a glue) +val optiong : ('t -> 'a glue) -> ('t option -> 'a glue) + +val seqg : (('x * 'a gg -> 'a gg) -> 'a gg -> 's -> 'a gg) + -> 'a glue + -> ('x -> 'a glue) + -> 's -> 'a glue + +type place +val left : place +val center : place +val right : place + +val pad : place -> int -> ('a, 't) fragment -> ('a, 't) fragment +val trim : place -> int -> ('a, 't) fragment -> ('a, 't) fragment +val fit : place -> int -> ('a, 't) fragment -> ('a, 't) fragment + +val padl : int -> ('a, 't) fragment -> ('a, 't) fragment +val padr : int -> ('a, 't) fragment -> ('a, 't) fragment +------------ + +== Description + +`[.kw]#type# 'a format`:: + An abstract type; values of this type are internal to the + implementation. + +`[.kw]#type# ('a, 'b) fragment = 'a format \-> 'b format`:: + A fragment is a function from formats to formats. + +`[.kw]#type# 'a glue = ('a, 'a) fragment`:: + A `glue` fragment insertes text into the output without consuming + and argument. + +`[.kw]#type# ('a, 't) element = ('a, 't \-> 'a) fragment`:: + An `element` fragment consumes an argument of type ``'t``, converts it to a string, + and inserts the result in the output. + +`[.kw]#type# 'a gg`:: + An abstract helper type that is internal to the implementation. + +[[val:format]] +`[.kw]#val# format : (string, 'a) fragment \-> 'a`:: + `format fmt` returns a format function as defined by the expression `fmt` + that will produce a string result when applied to the necessary arguments. + +`[.kw]#val# format' : (string list \-> 'b) \-> ('b, 'a) fragment \-> 'a`:: + `format' consumer fmt` returns a format function as defined by the + expression `fmt` that will invoke the `consumer` on the list of strings + produced by formating when applied to the necessary arguments. + (The argument to the `consumer` is a string list to avoid + premature string concatenation in the implementation). + Note that the xref:#val:format[`format`] function can be defined in + terms of ``format'`` as follows: +[source,sml] +------------ +val format = format' String.concat +------------ + + (* Make a type-specific element given a toString function for this type *) +`[.kw]#val# using : ('t \-> string) \-> ('a, 't) element`:: + `using tos` returns an element fragment that represents the given + "value-to-string" conversion. + +`[.kw]#val# int : ('a, int) element`:: + an element fragment for formating integers; it is equivalent to the + expression `using {sml-basis-url}/integer.html#SIG:INTEGER.toString:VAL[Int.toString]`. + +`[.kw]#val# int' : StringCvt.radix \-> ('a, int) element`:: + `int' radix` returns an element fragment for formating integers in the specified + `radix`. It is equivalent to the expression + `using ({sml-basis-url}/integer.html#SIG:INTEGER.fmt:VAL[Int.fmt] radix)`. + +`[.kw]#val# real : ('a, real) element`:: + an element fragment for formating reals; it is equivalent to the + expression `using {sml-basis-url}/real.html#SIG:REAL.toString:VAL[Real.toString]`. + +`[.kw]#val# real' : StringCvt.realfmt \-> ('a, real) element`:: + `real' rf` returns an element fragment for formating reals with the specified + format. It is equivalent to the expression + `using ({sml-basis-url}/real.html#SIG:REAL.fmt:VAL[Real.fmt] rf)`. + +`[.kw]#val# bool : ('a, bool) element`:: + an element fragment for formating booleans; it is equivalent to the + expression `using {sml-basis-url}/bool.html#SIG:BOOL.toString:VAL[Bool.toString]`. + +`[.kw]#val# string : ('a, string) element`:: + an element fragment for formating raw strings; it is equivalent to the + expression `using (fn x => x)`. + +`[.kw]#val# string' : ('a, string) element`:: + an element fragment for formating strings with escapes; it is equivalent to the + expression `using {sml-basis-url}/string.html#SIG:STRING.toString:VAL[String.toString]`. + +`[.kw]#val# char : ('a, char) element (* using String.str *)`:: + an element fragment for formating raw characters; it is equivalent to the + expression `using {sml-basis-url}/string.html#SIG:STRING.str:VAL[String.str]`. + +`[.kw]#val# char' : ('a, char) element (* using Char.toString *)`:: + an element fragment for formating characters with escapes; it is equivalent to the + expression `using {sml-basis-url}/char.html#SIG:CHAR.toString:VAL[Char.toString]`. + + (* "polymorphic" elements *) +`[.kw]#val# list : ('a, 'x) element \-> ('a, 'x list) element (* "[", ", ", "]" *)`:: + `list elemFmt` returns an element fragment that formats lists of items + using the `elemFmt` element fragment to format items. The list will be enclosed + in brackets ("``[``" "``]``") with elements separated by commas. + +`[.kw]#val# option : ('a, 'x) element \-> ('a, 'x option) element`:: + `option elemFmt` returns an element fragment that formats optional items + using the `elemFmt` element fragment to format the item value. For + an argument of `NONE`, the string `"NONE"` is returned, while for an argument + of `SOME v`, the string `"SOME(s)"` is returned, where `s` is the result + of formatting `v` using `elemFmt`. + +`[.kw]#val# seq : (('x * 'a gg \-> 'a gg) \-> 'a gg \-> 's \-> 'a gg) \-> 'a glue \-> ('a, 'x) element \-> ('a, 's) element`:: + something + +[[val:glue]] +`[.kw]#val# glue : ('a, 't) element \-> 't \-> 'a glue`:: + `glue fmt arg` returns a glue element that renders as the string + that results from using `fmt` to convert `arg` to a string. + +`[.kw]#val# elem : ('t \-> 'a glue) \-> ('a, 't) element`:: + `elem glueGen` returns an element for rendering arguments to the + `glueGen` function. This function is the inverse of xref:#val:glue[`glue`] + and is useful for extending the set of combinators. + +`[.kw]#val# nothing : 'a glue`:: + A glue fragment that renders as the empty string. + +`[.kw]#val# text : string \-> 'a glue`:: + `text s` returns a glue fragment that renders as the text `s`. + +`[.kw]#val# sp : int \-> 'a glue`:: + `sp n` returns a glue fragment that renders as `n` space characters. + +`[.kw]#val# nl : 'a glue`:: + A glue fragment that renders as a newline character. + +`[.kw]#val# tab : 'a glue`:: + A glue fragment that renders as a tab character. + +`[.kw]#val# listg : ('t \-> 'a glue) \-> ('t list \-> 'a glue)`:: + something + +`[.kw]#val# optiong : ('t \-> 'a glue) \-> ('t option \-> 'a glue)`:: + something + +`[.kw]#val# seqg : (('x * 'a gg \-> 'a gg) \-> 'a gg \-> 's \-> 'a gg) \-> 'a glue \-> ('x \-> 'a glue) \-> 's \-> 'a glue`:: + something + +`[.kw]#type# place`:: + An abstract type that represents how to pad or trim of string. + +`[.kw]#val# left : place`:: + Pad or trim the left side of a string. + +`[.kw]#val# center : place`:: + Pad or trim both sides of a string. + +`[.kw]#val# right : place`:: + Pad or trim the left side of a string. + +[[val:pad]] +`[.kw]#val# pad : place \-> int \-> ('a, 't) fragment \-> ('a, 't) fragment`:: + `pad place n frag` wraps the fragment `frag` with padding to bring the total + with to no fewer than `n` characters. The `place` specifies where padding + spaces will be added. Padding never reduces the size of the result. + +[[val:trim]] +`[.kw]#val# trim : place \-> int \-> ('a, 't) fragment \-> ('a, 't) fragment`:: + `trim place n frag` wraps the fragment `frag` with a trimming operation + to bring the total with to no more than `n` characters. The `place` specifies + where trimming occurs. Trimming never increases the size of the result. + +[[val:fit]] +`[.kw]#val# fit : place \-> int \-> ('a, 't) fragment \-> ('a, 't) fragment`:: + `fit place n frag` wraps the fragment `frag` with an operation that + guarantees the result will be exactly `n` characters by either padding or + trimming as necessary. + +`[.kw]#val# padl : int \-> ('a, 't) fragment \-> ('a, 't) fragment`:: + `padl n frag` is equivalent to `pad left n frag`. + +`[.kw]#val# padr : int \-> ('a, 't) fragment \-> ('a, 't) fragment`:: + `padr n frag` is equivalent to `pad right n frag`. + +== Examples + +Here are examples on how to use this facility. + +[source,sml] +------------ +format nothing (* ==> "" *) +format int 1234 (* ==> "1234" *) + +format (text "The square of " o int o text " is " o int o text ".") 2 4 + (* ==> "The square of 2 is 4." *) + +format (int o bool o char) 1 true #"x" + (* ==> "1truex" *) + +format (glue string "glue vs. " o string o glue int 42 o sp 5 o int) + "ordinary text " 17 + (* ==> "glue vs. ordinary text 42 17" *) +------------ + +and here are examples of how the +xref:#val:pad[`pad`]/xref:#val:trim[`trim`]/xref:#val:fit[`fit`] functions work. + +[source,sml] +------------ +format (pad left 6 int) 1234 (* ==> " 1234" *) +format (pad center 6 int) 1234 (* ==> " 1234 " *) +format (pad right 6 int) 1234 (* ==> "1234 " *) +format (trim left 2 int) 1234 (* ==> "34" *) +format (trim center 2 int) 1234 (* ==> "23" *) +format (trim right 2 int) 1234 (* ==> "12" *) +format (fit left 3 int) 12 (* ==> " 12" *) +format (fit left 3 int) 123 (* ==> "123" *) +format (fit left 3 int) 1234 (* ==> "234" *) +------------ + + +== See Also + +xref:str-Format.adoc[`Format`], +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/str-GetOpt.adoc b/smlnj-lib/Doc/src/Util/str-GetOpt.adoc new file mode 100644 index 0000000..294a033 --- /dev/null +++ b/smlnj-lib/Doc/src/Util/str-GetOpt.adoc @@ -0,0 +1,274 @@ += The `GetOpt` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `GetOpt` structure provides command-line argument processing similar +to the GNU *getopt* library. It supports both short options (a single +character preceded by a single minus character) and long options (multi-character +names preceded by two minus characters). Options may require an argument; +for short options, the argument is the next command-line argument, while for +long options, the argument follows an equal character (_e.g._, +"++--foo=bar++"). If the command-line arguments contains the string +"++--++", then all subsequent arguments are passed through +as non-options. + +This implementation was ported from Sven Panne's Haskell implementation +by Riccardo Pucella and has then been updated in various ways. + +== Synopsis + +[source,sml] +------------ +structure GetOpt +------------ + +== Interface + +[source,sml] +------------ +datatype 'a arg_order + = RequireOrder + | Permute + | ReturnInOrder of string -> 'a + +datatype 'a arg_descr + = NoArg of unit -> 'a + | ReqArg of (string -> 'a) * string + | OptArg of (string option -> 'a) * string + +type 'a opt_descr = { + short : string, + long : string list, + desc : 'a arg_descr, + help : string + } + +val usageInfo : { + header : string, + options : 'a opt_descr list + } -> string + +val getOpt : { + argOrder : 'a arg_order, + options : 'a opt_descr list, + errFn : string -> unit + } -> string list -> ('a list * string list) +------------ + +== Description + +`[.kw]#datatype# 'a arg_order = ...`:: + This datatype is used to specify the ordering policy for command-line + arguments. The constructors are interpreted as follows: ++ +-- +`RequireOrder`:: + No options are processed after the first non-option argument is encountered. + +`Permute`:: + Options and non-options may be freely mixed. + +`ReturnInOrder [.kw]#of# string \-> 'a`:: + Non-options are converted to options using the supplied function. +-- + +`[.kw]#datatype# 'a arg_descr` = ...:: + This datatype is used to describe the optional argument of an option. + Each of the constructors has a function as an argument that is used + to generate the representation of the processed option. + The constructors are interpreted as follows: ++ +-- +[[con:NoArg]] +`NoArg of unit \-> 'a`:: + The option does not have an argument, the supplied function is applied to + unit when processing the option. + +[[con:ReqArg]] +`ReqArg of (string \-> 'a) * string`:: + The option requires an argument, which is handled by the given function. + The string is the name of the argument used when printing a usage message. + +[[con:OptArg]] +`OptArg of (string option \-> 'a) * string`:: + The argument is optional and + The string is the name of the argument used when printing a usage message. +-- + +[[type:opt_descr]] +`[.kw]#type# 'a opt_descr = { ... }`:: + This record type describes the properties of a command-line option. + Its fields have the following meaning: ++ +-- +`short : string`:: + A string containing the allowed short flags for the option. + +`long : string list`:: + A list of the allowed long flags for the option. + +`desc : 'a arg_descr`:: + The description of how to process the option's argument. + +`help : string`:: + A descriptive message that is used to construct the usage message + (see the `usageInfo` function). +-- + +`[.kw]#val# usageInfo : {header, options} \-> string`:: + `usageInfo {header, options}` returns a usage string suitable for a help + message. The `header` argument is prepended to the message (with a newline + between it and the rest of the message). Each option is described on its + own line. + +`[.kw]#val# getOpt : {...} \-> string list \-> ('a list * string list)`:: + `getOpt {argOrder, options, errFn}` returns a function for processing + command-line options, which will return a list of results from processing + the options and a list of the residual command-line arguments. The + arguments to the call are ++ +-- +`argOrder : 'a arg_order`:: + Specifies the ordering policy for processing command-line arguments. +`options : 'a opt_descr list`:: + The descriptors for the command-line options. +`errFn : string \-> unit`:: + An error callback function that is used to report errors during + argument processing. +-- + +== Examples + +There are two common approaches to using the `GetOpt` structure. The first +is to define a type that classifies the command-line options. For example, + +[source,sml] +------------ +datatype opt = AFlg | B of string | C of int | Other of string | Bad + +val opts = [ + { short = "aA", long = [], + desc = NoArg(fn () => AFlg), + help = "Set A flag" + }, + { short = "b", long = ["set-b"], + desc = ReqArg(B, ""), help = "Set B name" + }, + { short = "", long = ["cval"], + desc = OptArg ( + fn (SOME s) => (case Int.fromString s + of SOME n => C n + | NONE => Bad) + | NONE => C 0, + ""), + help = "Set C value (default 0)" + } + ] + +fun usage () = print (usageInfo{header = "usage:", options = opts}) + +val doOpts = getOpt { + argOrder = ReturnInOrder (fn s => Other s), + options = opts, + errFn = fn msg => raise Fail msg + } +------------ + +The usage function will print the following text: + +[source] +-------- +usage: + -a, -A Set A flag + -b --set-b= Set B name + --cval[=] Set C value (default 0) +-------- + +Applying the `doOpts` function with the following arguments + +[source,sml] +------------ +doOpts ["-A", "foo", "--", "-c", "baz"]; +------------ + +results in + +[source,sml] +------------ +([AFlg, Other "foo", Other "--", Other "-c", Other "baz"], []) +------------ + +Note that the second component of the result will always be the empty list +because the non-options were wrapped with `Other`. The "`-c`" argument was +treated as a non-option because it came after the "++--++." + +The other approach to using the `GetOpt` structure is to define references +for the various options and then update them in the argument-descriptor +functions. For example: +[source,sml] +------------ +val aFlg : bool ref = ref false +val bOpt : string option ref = ref NONE +val cVal : int option ref = ref NONE +val errorFlg : bool ref = ref false + +val opts = [ + { short = "aA", long = [], + desc = NoArg(fn () => aFlg := true), + help = "Set A flag" + }, + { short = "b", long = ["set-b"], + desc = ReqArg(fn s => bOpt := SOME s, ""), + help = "Set B name" + }, + { short = "", long = ["cval"], + desc = OptArg ( + fn (SOME s) => (case Int.fromString s + of NONE => errorFlg := true + | someN => cVal := someN) + | NONE => cVal := SOME 0, + ""), + help = "Set C value (default 0)" + } + ] + +val doOpts = getOpt { + argOrder = Permute, + options = opts, + errFn = fn msg => raise Fail msg + } +------------ + +With this version, applying the `doOpts` function with the following arguments + +[source,sml] +------------ +doOpts ["-A", "foo", "--", "-c", "baz"]; +------------ + +results in + +[source,sml] +------------ +([()], ["foo", "--", "-c", "baz"]) +------------ + +with the `aFlg` set to `true` and the other flags unchanged. One reason +for using this imperative approach is that it is supported by the +xref:../Controls/controls-lib.adoc[*Controls Library*]. + + +== Bugs + +The function arguments to `ReqArg` and `OptArg` should really have +an option return type so that the case where the argument is badly formed +can be identified in the `GetOpt` implementation. + +== See Also + +xref:../Controls/str-ControlUtil.adoc[`Controls`], +xref:../Controls/controls-lib.adoc[__The Controls Library__], +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/str-HashString.adoc b/smlnj-lib/Doc/src/Util/str-HashString.adoc new file mode 100644 index 0000000..31da79e --- /dev/null +++ b/smlnj-lib/Doc/src/Util/str-HashString.adoc @@ -0,0 +1,39 @@ += The `HashString` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `HashString` structure provides hashing functions for strings +and substrings. It is currently implemented as an alias for the +same functions from the xref:str-FNVHash.adoc[`FNVHash`] structure. + +== Synopsis + +[source,sml] +------------ +structure HashString +------------ + +== Interface + +[source,sml] +------------ +val hashString : string -> word + +val hashSubstring : substring -> word +------------ + +== Description + +`[.kw]#val# hashString : string \-> word`:: + `hashString s` returns a hash code for the string `s`. + +`[.kw]#val# hashSubstring : substring \-> word`:: + `hashSubstring s` returns a hash code for the substring `ss`. + +== See Also + +xref:str-FNVHash.adoc[`FNVHash`], +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/str-HashTable.adoc b/smlnj-lib/Doc/src/Util/str-HashTable.adoc new file mode 100644 index 0000000..192b5c4 --- /dev/null +++ b/smlnj-lib/Doc/src/Util/str-HashTable.adoc @@ -0,0 +1,191 @@ += The `HashTable` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `HashTable` structure implements hash tables that are polymorphic +in the key type. + +The tables are implemented as an array of _buckets_, which are +lists of key-value pairs. The number of buckets grows with the number +of table entries. + +== Synopsis + +[source,sml] +------------ +structure HashTable +------------ + +== Interface + +[source,sml] +------------ +type ('a, 'b) hash_table + +val mkTable : (('a -> word) * (('a * 'a) -> bool)) -> (int * exn) + -> ('a,'b) hash_table + +val clear : ('a, 'b) hash_table -> unit + +val insert : ('a, 'b) hash_table -> ('a * 'b) -> unit + +val insertWith : ('b * 'b -> 'b) -> ('a, 'b) hash_table -> 'a * 'b -> unit +val insertWithi : ('a * 'b * 'b -> 'b) -> ('a, 'b) hash_table -> 'a * 'b -> unit + +val inDomain : ('a, 'b) hash_table -> 'a -> bool + +val lookup : ('a, 'b) hash_table -> 'a -> 'b +val find : ('a, 'b) hash_table -> 'a -> 'b option + +val findAndRemove : ('a, 'b) hash_table -> 'a -> 'b option + +val remove : ('a, 'b) hash_table -> 'a -> 'b + +val numItems : ('a, 'b) hash_table -> int + +val listItems : ('a, 'b) hash_table -> 'b list +val listItemsi : ('a, 'b) hash_table -> ('a * 'b) list + +val app : ('b -> unit) -> ('a, 'b) hash_table -> unit +val appi : (('a * 'b) -> unit) -> ('a, 'b) hash_table -> unit + +val map : ('b -> 'c) -> ('a, 'b) hash_table -> ('a, 'c) hash_table +val mapi : (('a * 'b) -> 'c) -> ('a, 'b) hash_table -> ('a, 'c) hash_table + +val fold : (('b *'c) -> 'c) -> 'c -> ('a, 'b) hash_table -> 'c +val foldi : (('a * 'b * 'c) -> 'c) -> 'c -> ('a, 'b) hash_table -> 'c + +val modify : ('b -> 'b) -> ('a, 'b) hash_table -> unit +val modifyi : (('a * 'b) -> 'b) -> ('a, 'b) hash_table -> unit + +val filter : ('b -> bool) -> ('a, 'b) hash_table -> unit +val filteri : (('a * 'b) -> bool) -> ('a, 'b) hash_table -> unit + +val copy : ('a, 'b) hash_table -> ('a, 'b) hash_table + +val bucketSizes : ('a, 'b) hash_table -> int list +------------ + +== Description + +`[.kw]#type# ('a, 'b) hash_table`:: + The type of imperative hash tables indexed by ``'a`` values + +`[.kw]#val# mkTable : (('a \-> word) * (('a * 'a) \-> bool)) \-> (int * exn) \-> ('a,'b) hash_table`:: + `mkTable (hash, same) (n, ex)` creates a new hash table that uses the `hash` + function to compute hash values for keys and the `same` function to test + key equality. The table will be initially sized to hold at least `n` items. + The exception `ex` is raised by the xref:#val:lookup[`lookup`] and + xref:#val:remove[`remove`] functions when the search key is not in the domain. + +`[.kw]#val# clear : ('a, 'b) hash_table \-> unit`:: + `clear tbl` removes all of the entries in the table. + +`[.kw]#val# insert : ('a, 'b) hash_table \-> ('a * 'b) \-> unit`:: + `insert tbl (key, item)` inserts a mapping from `key` to `item` into `tbl`. + Any existing mapping of `key` is discarded. + +`[.kw]#val# insertWith : ('b * 'b -> 'b) \-> ('a, 'b) hash_table \-> 'a * 'b \-> unit`:: + `insertWith comb (tbl, key, v)` adds the mapping from `key` to `value` to `tbl`, + where `value = comb(v', v)`, if `tbl` already contained a mapping from `key` + to `v'`; otherwise, `value = v`. + +`[.kw]#val# insertWithi : ('a * 'b * 'b -> 'b) \-> ('a, 'b) hash_table \-> 'a * 'b \-> unit``:: + `insertWithi comb (tbl, key, v)` adds the mapping from `key` to `value` to `tbl`, + where `value = comb(key, v', v)`, if `m` already contained a mapping from `key` + to `v'`; otherwise, `value = v`. + +`[.kw]#val# inDomain : ('a, 'b) hash_table \-> 'a \-> bool`:: + `inDomain tbl key` returns `true` if, and only if, `key` is in the + domain of the table + +[[val:lookup]] +`[.kw]#val# lookup : ('a, 'b) hash_table \-> 'a \-> 'b`:: + `lookup tbl key` returns the item that `key` maps to if `key` is in + the domain of `tbl`. Otherwise, the table's exception is raised. + +`[.kw]#val# find : ('a, 'b) hash_table \-> 'a \-> 'b option`:: + `find tbl key` returns the `SOME v` if `key` is mapped to `v` in `tbl`. + Otherwise, it returns `NONE`. + +`[.kw]#val# findAndRemove : ('a, 'b) hash_table -> 'a -> 'b option`:: + `findAndRemove (tbl, key)` returns `SOME v` and removes `key` from the + table if `tbl` maps `key` to `v`. If `key` is not in the domain of `tbl`, + then `NONE` is returned and `tbl` is unchanged. + +[[val:remove]] +`[.kw]#val# remove : ('a, 'b) hash_table \-> 'a \-> 'b`:: + `remove tbl key` returns the item that `key` maps to if `key` is in + the domain of `tbl` and removes it from the table. Otherwise, the + table's exception is raised. + +`[.kw]#val# numItems : ('a, 'b) hash_table \-> int`:: + `numItems tbl` returns the number of entries in the table. + +`[.kw]#val# listItems : ('a, 'b) hash_table \-> 'b list`:: + `listItems tbl` returns a list of the items in the range of `tbl`. + +`[.kw]#val# listItemsi : ('a, 'b) hash_table \-> ('a * 'b) list`:: + `listItemsi tbl` returns a list of the key-value entries in `tbl`. + +`[.kw]#val# app : ('b \-> unit) \-> ('a, 'b) hash_table \-> unit`:: + `app f tbl` applies the function `f` to each item in the range of `tbl`. + +`[.kw]#val# appi : (('a * 'b) \-> unit) \-> ('a, 'b) hash_table \-> unit`:: + `appi f tbl` applies the function `f` to each item in the + key-value entries in `tbl`. + +`[.kw]#val# map : ('b \-> 'c) \-> ('a, 'b) hash_table \-> ('a, 'c) hash_table`:: + `map f tbl` creates a new table with an entry `(key, f(lookup tbl key))` + in the new table for every `key` in `tbl`. The new table inherits its + hash and key-equality functions, and exception from `tbl`. + +`[.kw]#val# mapi : (('a * 'b) \-> 'c) \-> ('a, 'b) hash_table \-> ('a, 'c) hash_table`:: + `mapi f tbl` creates a new table with an entry `(key, f(key, lookup tbl key))` + in the new table for every `key` in `tbl`. The new table inherits its + hash and key-equality functions, and exception from `tbl`. + +`[.kw]#val# fold : (('b *'c) \-> 'c) \-> 'c \-> ('a, 'b) hash_table \-> 'c`:: + `fold f init tbl` folds the function `f` over the items in the range of `tbl` + using `init` as an initial value. + +`[.kw]#val# foldi : (('a * 'b * 'c) \-> 'c) \-> 'c \-> ('a, 'b) hash_table \-> 'c`:: + `foldi f init tbl` folds the function `f` over the key-velu entries in `tbl` + using `init` as an initial value. + +`[.kw]#val# modify : ('b \-> 'b) \-> ('a, 'b) hash_table \-> unit`:: + `modify f tbl` applies the function `f` for effect to the items in the + range of `tbl`, replacing the old items with the result of applying `f`. + +`[.kw]#val# modifyi : (('a * 'b) \-> 'b) \-> ('a, 'b) hash_table \-> unit`:: + `modifyi f tbl` applies the function `f` for effect to the key-value + entries in `tbl`, replacing the old items with the result of applying `f`. + +`[.kw]#val# filter : ('b \-> bool) \-> ('a, 'b) hash_table \-> unit`:: + `filter pred tbl` removes any entry `(key, item)` from `tbl` for which + `pred item` returns `false`. + +`[.kw]#val# filteri : (('a * 'b) \-> bool) \-> ('a, 'b) hash_table \-> unit`:: + `filteri pred tbl` removes any entry `(key, item)` from `tbl` for which + `pred(key, item)` returns `false`. + +`[.kw]#val# copy : ('a, 'b) hash_table \-> ('a, 'b) hash_table`:: + `copy tbl` creates a copy of `tbl`. This expression is equivalent to ++ +[source,sml] +------------ +map (fn x => x) tbl +------------ + +`[.kw]#val# bucketSizes : ('a, 'b) hash_table \-> int list`:: + `bucketSizes tbl` returns a list of the current number of items per + bucket. This function allows users to gauge the quality of their + hashing function. + +== See Also + +xref:sig-MONO_HASH_TABLE.adoc[`MONO_HASH_TABLE`], +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/str-IOUtil.adoc b/smlnj-lib/Doc/src/Util/str-IOUtil.adoc new file mode 100644 index 0000000..3b1af83 --- /dev/null +++ b/smlnj-lib/Doc/src/Util/str-IOUtil.adoc @@ -0,0 +1,65 @@ += The `IOUtil` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `IOUtil` structure provides support for redirecting the standard input +and output streams. + +== Synopsis + +[source,sml] +------------ +signature IO_UTIL +structure IOUtil : IO_UTIL +------------ + +== Interface + +[source,sml] +------------ +val withInputFile : string * ('a -> 'b) -> 'a -> 'b +val withInstream : TextIO.instream * ('a -> 'b) -> 'a -> 'b + +val withOutputFile : string * ('a -> 'b) -> 'a -> 'b +val withOutstream : TextIO.outstream * ('a -> 'b) -> 'a -> 'b +------------ + +== Description + +`[.kw]#val# withInputFile : string * ('a \-> 'b) \-> 'a \-> 'b`:: + `withInputFile (file, f) x` evaluates the expression `f x` with + standard input bound to `file`. The file is closed and the + `TextIO.stdIn` input stream is restored to its original binding + once evaluation terminates. + +`[.kw]#val# withInstream : TextIO.instream * ('a \-> 'b) \-> 'a \-> 'b`:: + `withInstream (inS, f) x` evaluates the expression `f x` with + standard output redirected to `inS`. The `TextIO.stdIn` input stream + is restored to its original binding once evaluation terminates. + +`[.kw]#val# withOutputFile : string * ('a \-> 'b) \-> 'a \-> 'b`:: + `withOutputFile (file, f) x` evaluates the expression `f x` with + standard output redirected to `file`. The file is closed and the + `TextIO.stdOut` output stream is restored to its original destination + once evaluation terminates. + +`[.kw]#val# withOutstream : TextIO.outstream * ('a \-> 'b) \-> 'a \-> 'b`:: + `withOutstream (outS, f) x` evaluates the expression `f x` with + standard output redirected to `outS`. The `TextIO.stdOut` output stream + is restored to its original destination once evaluation terminates. + +== Example + +The following expression will put its output in the file "hello.txt": + +[source,sml] +------------ +withOutputFile ("hello.txt", fn () => print "hello world\n") () +------------ + +== See Also + +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/str-LibBase.adoc b/smlnj-lib/Doc/src/Util/str-LibBase.adoc new file mode 100644 index 0000000..9c54794 --- /dev/null +++ b/smlnj-lib/Doc/src/Util/str-LibBase.adoc @@ -0,0 +1,52 @@ += The `LibBase` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `LibBase` structure provides some common definitions that are +shared across the *SML/NJ Lbrary*. + +== Synopsis + +[source,sml] +------------ +signature LIB_BASE +structure LibBase : LIB_BASE +------------ + +== Interface + +[source,sml] +------------ +exception Unimplemented of string +exception Impossible of string + +exception NotFound + +val failure : {module : string, func : string, msg : string} -> 'a +------------ + +== Description + +`[.kw]#exception# Unimplemented of string`:: + This exception is raised to report unimplemented features. + +`[.kw]#exception# Impossible of string`:: + This exception is raised to report internal errors. + +[[exn:NotFound]] +`[.kw]#exception# NotFound`:: + This exception is raised by searching operations when something being + searched for is missing. + +`[.kw]#val# failure : {module : string, func : string, msg : string} \-> 'a`:: + `failure {module, func, msg}` raises the + {sml-basis-url}/general.html#SIG:GENERAL.Fail:EXN[`Fail`] exception + with a message in a standard format. It is used internally to report + errors. + +== See Also + +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/str-ListFormat.adoc b/smlnj-lib/Doc/src/Util/str-ListFormat.adoc new file mode 100644 index 0000000..71659cf --- /dev/null +++ b/smlnj-lib/Doc/src/Util/str-ListFormat.adoc @@ -0,0 +1,75 @@ += The `ListFormat` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `ListFormat` structure provides some utility functions for converting +lists into strings (and back). + +== Synopsis + +[source,sml] +------------ +signature LIST_FORMAT +structure ListFormat : LIST_FORMAT +------------ + +== Interface + +[source,sml] +------------ +val fmt : { + init : string, + sep : string, + final : string, + fmt : 'a -> string + } -> 'a list -> string + +val listToString : ('a -> string) -> 'a list -> string + +val scan : { + init : string, + sep : string, + final : string, + scan : (char, 'b) StringCvt.reader -> ('a, 'b) StringCvt.reader + } -> (char, 'b) StringCvt.reader -> ('a list, 'b) StringCvt.reader +------------ + +== Description + +`[.kw]#val# fmt : { ... } \-> 'a list \-> string`:: + `fmt {init, sep, final, fmt} lst` converts the list `lst` to a string, + where `init` is an initial string, `sep` is the separator, `final` is + the final string, and `fmt` is a function for converting the list + elements to strings. For the list value `[a, b, ..., c]`, the resulting + string will be formatted as ++ +[source,sml] +------------ +init ^ (fmt a) ^ sep ^ (fmt b) ^ sep ^ ... ^ sep ^ (fmt c) ^ final +------------ + +`[.kw]#val# listToString : ('a \-> string) \-> 'a list \-> string`:: + `listToString fmt lst` returns a string representing `lst` using **SML**'s + list notation. In other words, the above expression is equivalent to ++ +[source,sml] +------------ +fmt {init="[", sep=",", final="]", fmt=fmt} lst +------------ + +`[.kw]#val# scan : { ... } \-> (char, 'b) StringCvt.reader \-> ('a list, 'b) StringCvt.reader`:: + `scan {init, sep, final, scan} getc` returns a + {sml-basis-url}/string-cvt.html#SIG:STRING_CVT.reader:TY[`reader`] + for scanning lists of items from a character stream. The resulting + reader expects the list to begin with the `init` string, use `sep` as + a separator, and end with the `final` string. The reader uses the `scan` + argument function to scan individual list elements. + + The reader will skip extra whitespace, so to scan a list of items separated + by spaces, use the empty string (`""`) as the separator. + +== See Also + +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/str-ListMergeSort.adoc b/smlnj-lib/Doc/src/Util/str-ListMergeSort.adoc new file mode 100644 index 0000000..d168b93 --- /dev/null +++ b/smlnj-lib/Doc/src/Util/str-ListMergeSort.adoc @@ -0,0 +1,50 @@ += The `ListMergeSort` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `ListMergeSort` structure implements the merge-sort +algorithm for lists. + +== Synopsis + +[source,sml] +------------ +signature LIST_SORT +structure ListMergeSort : LIST_SORT +------------ + +== Interface + +[source,sml] +------------ + val sort : ('a * 'a -> bool) -> 'a list -> 'a list + + val uniqueSort : ('a * 'a -> order) -> 'a list -> 'a list + + val sorted : ('a * 'a -> bool) -> 'a list -> bool +------------ + +== Description + +`[.kw]#val# sort : ('a * 'a \-> bool) \-> 'a list \-> 'a list`:: + `sort gt l` sorts the list `l` in _ascending_ order using the + "greater-than" relationship defined by `gt`. This sort is _stable_ + and detects initial increasing and decreasing runs and thus is linear + time on ordered inputs. + +`[.kw]#val# uniqueSort : ('a * 'a \-> order) \-> 'a list \-> 'a list`:: + `uniquesort cmp l` sorts the list `l` in _ascending_ order using the + comparison function `cmp`, while removing duplicate elements. + +`[.kw]#val# sorted : ('a * 'a \-> bool) \-> 'a list \-> bool`:: + `sorted gt l` returns `true` if the list is sorted in _ascending_ + order under the _greater-than_ relation `gt`. + +== See Also + +sig-MONO_ARRAY_SORT.adoc[`MONO_ARRAY_SORT`], +sig-ARRAY_SORT.adoc[`ARRAY_SORT`], +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/str-ListXProd.adoc b/smlnj-lib/Doc/src/Util/str-ListXProd.adoc new file mode 100644 index 0000000..ef7678b --- /dev/null +++ b/smlnj-lib/Doc/src/Util/str-ListXProd.adoc @@ -0,0 +1,76 @@ += The `ListXProd` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `ListXProd` structure provides list combinators for computing +over the "Cartesian product" of two lists. For lists `[a, b, c]` +and `[x, y, z]`, the elements are processed in the order +[source,sml] +------------ +[ (a, x), (a, y), (a, z), + (b, x), (b, y), (b, z), + (c, x), (c, y), (c, z) +] +------------ + + +== Synopsis + +[source,sml] +------------ +signature LIST_XPROD +structure ListXProd : LIST_XPROD +------------ + +== Interface + +[source,sml] +------------ +val app : (('a * 'b) -> unit) -> ('a list * 'b list) -> unit +val map : (('a * 'b) -> 'c) -> ('a list * 'b list) -> 'c list +val fold : (('a * 'b * 'c) -> 'c) -> 'c -> ('a list * 'b list) -> 'c + +val appX : (('a * 'b) -> unit) -> ('a list * 'b list) -> unit +val mapX : (('a * 'b) -> 'c) -> ('a list * 'b list) -> 'c list +val foldX : (('a * 'b * 'c) -> 'c) -> ('a list * 'b list) -> 'c -> 'c +------------ + +== Description + +[[val:app]] +`[.kw]#val# app : (('a * 'b) \-> unit) \-> ('a list * 'b list) \-> unit`:: + `appX f (l1, l2)` applies the function `f` to the Cartesian product of the + to lists `l1` and `l2`. + +[[val:map]] +`[.kw]#val# map : (('a * 'b) \-> 'c) \-> ('a list * 'b list) \-> 'c list`:: + `mapX f (l1, l2)` maps the function `f` over the Cartesian product of the + to lists `l1` and `l2` to produce a new list. + +[[val:fold]] +`[.kw]#val# fold : (('a * 'b * 'c) \-> 'c) \-> 'c \->('a list * 'b list) \-> 'c`:: + `foldX f init (l1, l2)` folds the function `f` over the Cartesian product of the + to lists `l1` and `l2`, using `init` as the initial value. + +=== Deprecated functions + +The following functions are part of the interface, but have been +deprecated. + +`[.kw]#val# appX : (('a * 'b) \-> 'c) \-> ('a list * 'b list) \-> unit`:: + Use xref:#val:app[`app`] instead. Note that xref:#val:app[`app`] expects + that its first argument will have a `unit` return type. + +`[.kw]#val# mapX : (('a * 'b) \-> 'c) \-> ('a list * 'b list) \-> 'c list`:: + Use xref:#val:map[`map`] instead. + +`[.kw]#val# foldX : (('a * 'b * 'c) \-> 'c) \-> 'c \->('a list * 'b list) \-> 'c`:: + Use xref:#val:fold[`fold`] instead. Note that the second and third + arguments of xref:#val:fold[`fold`] are swapped with respect to `foldX`. + +== See Also + +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/str-Native.adoc b/smlnj-lib/Doc/src/Util/str-Native.adoc new file mode 100644 index 0000000..f505cfc --- /dev/null +++ b/smlnj-lib/Doc/src/Util/str-Native.adoc @@ -0,0 +1,23 @@ += Native structure aliases +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +These structure aliases provide portable names for the host machine's +native numeric types. + +== Synopsis + +[source,sml] +------------ +structure NativeInt : INTEGER +structure NativeWord : WORD +------------ + +== Description + +See the {sml-basis-url}/integer.html[`INTEGER`] +and {sml-basis-url}/word.html[`WORD`] signatures for +details about the structures. diff --git a/smlnj-lib/Doc/src/Util/str-ParserComb.adoc b/smlnj-lib/Doc/src/Util/str-ParserComb.adoc new file mode 100644 index 0000000..35b965f --- /dev/null +++ b/smlnj-lib/Doc/src/Util/str-ParserComb.adoc @@ -0,0 +1,256 @@ += The `ParserComb` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `ParserComb` structure provides parser combinators over +character readers. This structure is modeled after the Haskell +combinators of Hutton and Meijer. The main difference is that +they return a single result, instead of a list of results. +This fact means that the `or` combinator is a *committed* choice; +once one branch succeeds, the others will not be enabled. While +this property is somewhat limiting, for many applications it +will not be a problem. + +== Synopsis + +[source,sml] +------------ +signature PARSER_COMB +structure ParserComb : PARSER_COMB +------------ + +== Interface + +[source,sml] +------------ +type ('a, 'strm) parser = + (char, 'strm) StringCvt.reader -> ('a, 'strm) StringCvt.reader + +val result : 'a -> ('a, 'strm) parser + +val failure : ('a, 'strm) parser + +val wrap : (('a, 'strm) parser * ('a -> 'b)) -> ('b, 'strm) parser + +val seq : (('a, 'strm) parser * ('b, 'strm) parser) -> (('a * 'b), 'strm) parser +val seqWith : (('a * 'b) -> 'c) + -> (('a, 'strm) parser * ('b, 'strm) parser) + -> ('c, 'strm) parser + +val bind : (('a, 'strm) parser * ('a -> ('b, 'strm) parser)) + -> ('b, 'strm) parser + +val eatChar : (char -> bool) -> (char, 'strm) parser + +val char : char -> (char, 'strm) parser +val string : string -> (string, 'strm) parser + +val skipBefore : (char -> bool) -> ('a, 'strm) parser -> ('a, 'strm) parser + +val or : (('a, 'strm) parser * ('a, 'strm) parser) -> ('a, 'strm) parser +val or' : ('a, 'strm) parser list -> ('a, 'strm) parser + +val zeroOrMore : ('a, 'strm) parser -> ('a list, 'strm) parser +val oneOrMore : ('a, 'strm) parser -> ('a list, 'strm) parser + +val option : ('a, 'strm) parser -> ('a option, 'strm) parser +val join : ('a option, 'strm) parser -> ('a, 'strm) parser + +val token : (char -> bool) -> (string, 'strm) parser +------------ + +== Description + +[[type:parser]] +`[.kw]#type# ('a, 'strm) parser = (char, 'strm) StringCvt.reader \-> ('a, 'strm) StringCvt.reader`:: + A parser is a function that takes a + {sml-basis-url}/string-cvt.html#SIG:STRING_CVT.reader:TY[character reader] + and returns reader for the type of values the parser accepts. + +`[.kw]#val# result : 'a \-> ('a, 'strm) parser`:: + `result v getc strm` returns `SOME(v, strm)`; _i.e._, `result v` + lifts the value `v` to a parser that returns `v` without consuming + any input. + +`[.kw]#val# failure : ('a, 'strm) parser`:: + `failure getc strm` returns `NONE`; _i.e._ it is the parser that + does not accept any input. + +`[.kw]#val# wrap : (('a, 'strm) parser * ('a \-> 'b)) \-> ('b, 'strm) parser`:: + `wrap parser f` composes the function `f` with `parser` + +`[.kw]#val# seq : (('a, 'strm) parser * ('b, 'strm) parser) \-> (('a * 'b), 'strm) parser`:: + `seq (parser1, parser2)` is the sequential combination of the + two parsers; _i.e._, a parser that will first parse a value `v1` + from the input using `parser1` and then parse a value `v2` using `parser2` + yielding the pair `(v1, v2)`. + +`[.kw]#val# seqWith : (('a * 'b) \-> 'c) \-> (('a, 'strm) parser * ('b, 'strm) parser) \-> ('c, 'strm) parser`:: + `seqWith f (parser1, parser2)` is the sequential combination of the two parsers composed + with the function `f`; _i.e._, a parser that will first parse a value `v1` + from the input using `parser1` and then parse a value `v2` using `parser2` + yielding the result of `f(v1, v2)`. This expression is equivalent to ++ +[source,sml] +------------ +wrap (seq (parser1, parser2), f) +------------ + +`[.kw]#val# bind : (('a, 'strm) parser * ('a \-> ('b, 'strm) parser)) \-> ('b, 'strm) parser`:: + `bind parser f` returns a parser that first uses `parser` to parse a value `v` + from the input and then continues using the parser that results from `f v`. + +`[.kw]#val# eatChar : (char \-> bool) \-> (char, 'strm) parser`:: + `eatChar pred` returns a parser that parses *one* character `c` for which + `pred c` returns `true`. + +`[.kw]#val# char : char \-> (char, 'strm) parser`:: + `char c` returns a parser that parses the character `c`. + +`[.kw]#val# string : string \-> (string, 'strm) parser`:: + `string s`returns a parser that parses the string `s`. + +`[.kw]#val# skipBefore : (char \-> bool) \-> ('a, 'strm) parser \-> ('a, 'strm) parser`:: + `skipBefore pred parser` returns a parser that first skips any prefix of characters + that satisfy the predicate `pred` and then applies `parser` to the input. + +`[.kw]#val# or : (('a, 'strm) parser * ('a, 'strm) parser) \-> ('a, 'strm) parser`:: + `or (parser1, parser2)` returns the ordered choice of the two parsers; _i.e._, + it returns a parser that first attempts to parse the input using `parser1`; if + `parser1` fails on the input, then it uses `parser2`. + +`[.kw]#val# or' : ('a, 'strm) parser list \-> ('a, 'strm) parser`:: + `or' parsers` returns the ordered choice of a list of parsers. This + expression is equivalent to ++ +[source,sml] +------------ +List.foldr or failure parsers +------------ + +`[.kw]#val# zeroOrMore : ('a, 'strm) parser \-> ('a list, 'strm) parser`:: + `zeroOrMore parser` returns a parser that parses a list of zero or more + items using `parser`. + +`[.kw]#val# oneOrMore : ('a, 'strm) parser \-> ('a list, 'strm) parser`:: + `oneOrMore parser` returns a parser that parses a list of one or more + items using `parser`. + +`[.kw]#val# option : ('a, 'strm) parser \-> ('a option, 'strm) parser`:: + `option parser` returns a parser that parses an optional item + (_i.e._, zero or one occurrences) using `parser`. + +`[.kw]#val# join : ('a option, 'strm) parser \-> ('a, 'strm) parser`:: + `join parser` returns a parser that requires the optional item parsed + by `parser` to be present. + +`[.kw]#val# token : (char \-> bool) \-> (string, 'strm) parser`:: + `token pred` returns a parser for a string of characters, where every + character satisfies the predicate function `pred`. + +== Examples + +As noted above, the xref:#type:parser[`parser`] type and combinators are +designed around the +{sml-basis-url}/string-cvt.html#SIG:STRING_CVT.reader:TY[StringCvt.reader] +representation of input streams. +Thus, the `scan` functions defined in the {basis-lib-url}/index.html[*Basis Library*] +are compatible with the `parser` type defined here. For example, + +[source,sml] +------------ +val boolParser : (bool, 'strm) parser = Bool.scan +val intParser : (int, 'strm) parser = Int.scan StringCvt.DEC +------------ + +Let us define the abstract syntax of a small expression language with +addition, numbers, and let-bound variables. + +[source,sml] +------------ +datatype exp + = VAR of string + | NUM of int + | ADD of exp * exp + | LET of string * exp * exp +------------ + +We can use parser combinators to implement a simple parser +for this language as follows. + +We start by defining a few utility definitions: + +[source,sml] +------------ +structure P = ParserComb + +val +> = P.seq +infixr 3 +> + +fun skipWS getc = P.skipBefore Char.isSpace getc +------------ + +We can then define parsers for the atomic expressions +(numbers and variables): + +[source,sml] +------------ +fun numParser getc = P.wrap (Int.scan StringCvt.DEC, NUM) getc +fun idParser getc = P.seqWith + (fn (a, SOME b) => a ^ b | (a, NONE) => a) + (P.wrap (P.eatChar Char.isAlpha, str), + P.option (P.token Char.isAlphaNum)) + getc +fun varParser getc = P.wrap(idParser, VAR) getc +------------ + +We need the separate `idParser` to parse let-bound identifiers. + +We then define three, mutually-recursive, functions to parse +expressions. + +[source,sml] +------------ +fun letParser getc = P.wrap ( + P.string "let" +> skipWS(idParser) +> skipWS(P.char #"=") +> expParser + +> skipWS(P.string "in") +> expParser, + fn (_, (x, (_, (e1, (_, e2))))) => LET(x, e1, e2)) getc +and expParser getc = P.wrap ( + skipWS (P.seq ( + P.or' [letParser, numParser, varParser], + addParser)), + fn (e, es) => List.foldl (fn (a, b) => ADD(b, a)) e es) getc +and addParser getc = + P.zeroOrMore (skipWS (P.wrap (P.char #"+" +> expParser, #2))) getc +------------ + +Note that the `letParser` must appear before the `varParser` in the +list of parsers combined by ``or'`` to avoid treating the string `"let"` +as a variable. Another detail is that we use +{sml-basis-url}/list.html#SIG:LIST.foldl:VAL[`List.foldl`] with a +function that swaps the order of its arguments in order +that addition is left associative. + +If we evaluate the expression + +[source,sml] +------------ +StringCvt.scanString expParser " let x = 1+2 in x + x "; +------------ + +we get the expected result + +[source,sml] +------------ +SOME (LET ("x", ADD (NUM 1, NUM 2), ADD (VAR "x", VAR "x"))) +------------ + + +== See Also + +https://smlnj.org/doc/ml-lpt/manual.pdf[__SML/NJ Language Processing Tools: User Guide__], +https://smlnj.org/doc/ML-Yacc/index.html[__ML-Yacc User's Manual__], +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/str-PathUtil.adoc b/smlnj-lib/Doc/src/Util/str-PathUtil.adoc new file mode 100644 index 0000000..c21c840 --- /dev/null +++ b/smlnj-lib/Doc/src/Util/str-PathUtil.adoc @@ -0,0 +1,79 @@ += The `PathUtil` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `PathUtil` structure provides support for searching for files +in the file system using a list of possible locations. It is implemented +using the {sml-basis-url}/index.html[*SML Basis Library*] portable +file-system mechanisms and, thus, it itself portable across different +operating systems. + +== Synopsis + +[source,sml] +------------ +signature PATH_UTIL +structure PathUtil : PATH_UTIL +------------ + +== Interface + +[source,sml] +------------ +val findFile : string list -> string -> string option + +val findFiles : string list -> string -> string list + +val existsFile : (string -> bool) -> string list -> string -> string option + +val allFiles : (string -> bool) -> string list -> string -> string list + +val findExe : string list -> string -> string option +------------ + +== Description + +`[.kw]#val# findFile : string list \-> string \-> string option`:: + `findFile paths __name__` returns `SOME path`, where `path` is a string of + the form ``"__p__/__name__"`` and ``__p__`` is the first string in `paths` + such that `path` exists. If no such file exists, then `NONE` is returned. + If `__name__` is an absolute path, then `SOME __name__` is returned + if it exists. + +`[.kw]#val# findFiles : string list \-> string \-> string list`:: + `findFiles (paths, mode) __name__` returns a list of strings, such that each string + `s` in the result has the form ``"__p__/__name__"`` with ``__p__`` in `paths` + and the file named by `path` existing in the file system. + +`[.kw]#val# existsFile : (string \-> bool) \-> string list \-> string \-> string option`:: + `existsFile pred paths name` returns `SOME path`, where `path` is a string of + the form ``"__p__/__name__"`` and ``__p__`` is the first string in `paths` + such that `path` exists and that `pred path` returns true. If no such file exists, + then `NONE` is returned. If `__name__` is an absolute path, then + `SOME __name__` is returned if it exists and satisfies the predicate. + +`[.kw]#val# allFiles : (string \-> bool) \-> string list \-> string \-> string list`:: + `allFiles pred paths name` returns a list of strings, such that each string + `s` in the result has the form ``"__p__/__name__"`` with ``__p__`` in `paths`, + the file named by `path` existing in the file system, and `pred path` returns + `true`. The order of the path list is preserved in the result. If `__name__` + is an absolute path, then the list `[__name__]` is returned if `__name__` exists + and satisfies the predicate. + +`[.kw]#val# findExe : string list \-> string \-> string option`:: + `findExe paths name` searches `paths` for an _executable_ file with the given + name. This expression is equivalent to ++ +[source,sml] +------------ +existsFile (fn p => OS.FileSys.access(p, [OS.FileSys.A_EXEC])) paths name +------------ + + +== See Also + +xref:../Unix/str-UnixPath.adoc[`UnixPath`], +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/str-PropList.adoc b/smlnj-lib/Doc/src/Util/str-PropList.adoc new file mode 100644 index 0000000..2e75912 --- /dev/null +++ b/smlnj-lib/Doc/src/Util/str-PropList.adoc @@ -0,0 +1,139 @@ += The `PropList` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `PropList` structure provides a extensible, but type safe, implementation +of property lists. + +== Synopsis + +[source,sml] +------------ +structure PropList +------------ + +== Interface + +[source,sml] +------------ +type holder + +val newHolder : unit -> holder + +val hasProps : holder -> bool + +val clearHolder : holder -> unit + +val sameHolder : (holder * holder) -> bool + +val newProp : (('a -> holder) * ('a -> 'b)) -> { + peekFn : 'a -> 'b option, + getFn : 'a -> 'b, + setFn : ('a * 'b) -> unit, + clrFn : 'a -> unit + } + +val newFlag : ('a -> holder) -> { + getFn : 'a -> bool, + setFn : ('a * bool) -> unit + } +------------ + +== Description + +`[.kw]#type# holder`:: + The type of a property-list container. + +`[.kw]#val# newHolder : unit \-> holder`:: + `newHolder ()` creates a new property-list holder. + +[[val:hasProps]] +`[.kw]#val# hasProps : holder \-> bool`:: + `hasProps holder` return `true` if, and only if, the holder contains + properties (including set flags). + +`[.kw]#val# clearHolder : holder \-> unit`:: + `clearHolder holder` removes all properties and flags from the holder. + +`[.kw]#val# sameHolder : (holder * holder) \-> bool`:: + `sameHolder (holder1, holder2)` returns `true` if, and only if, + the two holders are the same. + +`[.kw]#val# newProp : (('a \-> holder) * ('a \-> 'b)) \-> { ... }`:: + `newProp (getHolder, init)` creates a new property of type ``'b`` + associated with values of type ``'a``, where `getHolder` is a function + for getting the holder from a value and `init` is a function for + defining the initial value of the property for a value. The property + is represented by a record of operations, which are as follows: ++ +-- + `peekFn : 'a \-> 'b option`:: + `peekFn obj` returns `SOME v`, where `v` is the value of the + property for `obj`. If the property has not been set for `obj`, + then `NONE` is returned. + + `getFn : 'a \-> 'b`:: + `getFn obj` returns the value of the property for `obj`. If the + property has not been set for `obj`, then the `init` function is + used to set the initial value of the property. + + `setFn: ('a * 'b) \-> unit`:: + `setFn (obj, v)` sets the value of the property to `v` for `obj`. + + `clrFn : 'a \-> unit`:: + `clrFn obj` removes the property from `obj`. +-- + +`[.kw]#val# newFlag : ('a \-> holder) \-> { ... }`:: + `newFlag getHolder` creates a new boolean property for values of type ``'a``. + The property is represented by a record of two functions: ++ +-- + `getFn : 'a \-> bool`:: + `getFn obj` returns the value of the flag for `obj`. + + `setFn : ('a * bool) \-> unit`:: + `setFn (obj, b)` sets the value of the flag to `b` for `obj`. +-- ++ +Flags represent boolean properties in a way that is more space efficient +than using `newProp`. Basically, a `true` value is represented by the +presence of the property in the holder, while `false` is represented by +its absence. This representation affects the behavior of +xref:val:hasProps[`hasProps`] as flags that are `false` are not counted. + +== Examples + +A common use of property lists is to provide a mechanism for attaching +attributes to existing types. For example, we might define a representation +of variables in a compiler as: + +[source,sml] +------------ +datatype var = V of { + name : string, + props : PropList.holder + } +------------ + +We might define a _use count_ property as follows: + +[source,sml] +------------ +local + val {getFn, setFn, ...} = PropList.newProp ( + fn (V{props, ...}) => props, + fn _ => 0) +in +fun use x = setFn(x, getFn x + 1) +fun countOf x = getFn x +end +------------ + + +== See Also + +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/str-Queue.adoc b/smlnj-lib/Doc/src/Util/str-Queue.adoc new file mode 100644 index 0000000..9cbe134 --- /dev/null +++ b/smlnj-lib/Doc/src/Util/str-Queue.adoc @@ -0,0 +1,129 @@ += The `Queue` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `Queue` structure provides an imperative queue data structure, +which are implemented as a pair of stacks (lists) representing the +front and rear of the queue. **Single-threaded** enqueuing and +dequeuing operations will have amortized constant time. + +== Synopsis + +[source,sml] +------------ +signature QUEUE +structure Queue :> QUEUE +------------ + +== Interface + +[source,sml] +------------ +type 'a queue + +exception Dequeue + +val mkQueue : unit -> 'a queue +val clear : 'a queue -> unit +val isEmpty : 'a queue -> bool +val enqueue : 'a queue * 'a -> unit +val dequeue : 'a queue -> 'a +val next : 'a queue -> 'a option +val delete : ('a queue * ('a -> bool)) -> unit +val head : 'a queue -> 'a +val peek : 'a queue -> 'a option +val length : 'a queue -> int +val contents : 'a queue -> 'a list +val app : ('a -> unit) -> 'a queue -> unit +val map : ('a -> 'b) -> 'a queue -> 'b queue +val foldl : ('a * 'b -> 'b) -> 'b -> 'a queue -> 'b +val foldr : ('a * 'b -> 'b) -> 'b -> 'a queue -> 'b +------------ + +== Description + +`[.kw]#type# 'a queue`:: + The type constructor for queues. + +[[exn:Dequeue]] +`[.kw]#exception# Dequeue`:: + This exceptions is raised when the `dequeue` function is + applied to an empty queue. + +`[.kw]#val# mkQueue : unit \-> 'a queue`:: + `mkQueue ()` returns a new empty queue. + +`[.kw]#val# clear : 'a queue \-> unit`:: + `clear q` removes any elements from `q` leaving it empty. + +`[.kw]#val# isEmpty : 'a queue \-> bool`:: + `ifEmpty q` returns true if the queue is empty. + +`[.kw]#val# enqueue : 'a queue * 'a \-> unit`:: + `enqueue (q, x)` adds `x` to the end of `q`. + +`[.kw]#val# dequeue : 'a queue \-> 'a`:: + `dequeue q` removes and returns the first element in `q`. + This function raises the xref:#exn:Dequeue[`Dequeue`] exception + if it is called on an empty queue. + +`[.kw]#val# next : 'a queue \-> 'a option`:: + `next q` returns `SOME x` and removes `x` from `q`, where `x` was the first + element in `q`, or `NONE` if `q` is empty. + +`[.kw]#val# delete : ('a queue * ('a \-> bool)) \-> unit`:: + `delete (q, pred)` removes those items from `q` for which the + function `pred` returns `true`. + +`[.kw]#val# head : 'a queue \-> 'a`:: + `head q` returns the first element of `q` or raises the exception + xref:#exn:Dequeue[`Dequeue`] if `q` is empty. The queue is unchanged. + +`[.kw]#val# peek : 'a queue \-> 'a option`:: + `peek q` returns `SOME x`, where `x` is the first element of `q`, + or `NONE` if `q` is empty. The queue is unchanged. + +`[.kw]#val# length : 'a queue \-> int`:: + `length q` returns the number of elements in the queue. + +`[.kw]#val# contents : 'a queue \-> 'a list`:: + `contents q` returns the contents of `q` as a list. + +`[.kw]#val# app : ('a \-> unit) \-> 'a queue \-> unit`:: + `app f q` applies the function `f` to the elements of `q`. + This expression is equivalent to ++ +[source,sml] +------------ +List.app f (contents q) +------------ + +`[.kw]#val# map : ('a \-> 'b) \-> 'a queue \-> 'b queue`:: + `map f q` returns a new queue that results from mapping + the function `f` across the elements of the queue. + +`[.kw]#val# foldl : ('a * 'b \-> 'b) \-> 'b \-> 'a queue \-> 'b`:: + `foldl f init q` folds the function `f` over the elements of `q` from + front to back. This expression is equivalent to ++ +[source,sml] +------------ +List.foldl f init (contents q) +------------ + +`[.kw]#val# foldr : ('a * 'b \-> 'b) \-> 'b \-> 'a queue \-> 'b`:: + `foldr f init q` folds the function `f` over the elements of `q` from + back to front. This expression is equivalent to ++ +[source,sml] +------------ +List.foldr f init (contents q) +------------ + +== See Also + +xref:str-Fifo.adoc[`Fifo`], +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/str-Rand.adoc b/smlnj-lib/Doc/src/Util/str-Rand.adoc new file mode 100644 index 0000000..830facd --- /dev/null +++ b/smlnj-lib/Doc/src/Util/str-Rand.adoc @@ -0,0 +1,84 @@ += The `Rand` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `Rand` structure provides a simple random number generator as described +in Larry Paulson in __ML for the Working Programmer__ (pp. 170-171). +The original algorithm was recommended by Park and Miller in +https://doi.org/10.1145/63039.63042[__Random number generators: good ones +are hard to find__], CACM 1988 (pp 1192-1201) with modifications described +in CACM 1993 (pp. 105-110). + +Note: it is recommended that one use the xref:str-Random.adoc[`Random`] +structure when the quality of the generated numbers is at all important. +The main advantages of this implementation is that it is functional +(the generators provided by the `Random` structure are imperative) and +it is fast. + +== Synopsis + +[source,sml] +------------ +structure Rand +------------ + +== Interface + +[source,sml] +------------ +type rand = Word.word + +val randMin : rand +val randMax : rand + +val random : rand -> rand + +val mkRandom : rand -> unit -> rand + +val norm : rand -> real + +val range : (int * int) -> rand -> int +------------ + +== Description + +`[.kw]#type# rand = Word.word`:: + The "state" of the generator, which is just a single word. + +`[.kw]#val# randMin : rand`:: + The minimum allowed value for the state. + +`[.kw]#val# randMax : rand`:: + The maximum allowed value for the state. + +`[.kw]#val# random : rand \-> rand`:: + `random seed` returns a pseudo-random value in the range + `[randMin .. randMax]`. Iteratively using the value returned by + `random` as the next seed will produce a sequence of pseudo-random + numbers. + +`[.kw]#val# mkRandom : rand \-> unit \-> rand`:: + `mkRandom seed` returns a function that generates a fresh random number + in the range `[randMin .. randMax]`. + +`[.kw]#val# norm : rand \-> real`:: + `norm rand` maps the random number in the range `[randMin .. randMax]` + to the real interval `(0..1)`. + +`[.kw]#val# range : (int * int) \-> rand \-> int`:: + `range (lo, hi) rand` maps the random number in the range `[randMin .. randMax]` + to the interval `[lo..hi]`. This function will raise the + {sml-basis-url}/general.html#SIG:GENERAL.Fail:EXN[`Fail`] exception + if `hi < lo`. + +== Bugs + +This implementation needs to be updated for 64-bit systems. + +== See Also + +xref:str-Random.adoc[`Random`], +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/str-Random.adoc b/smlnj-lib/Doc/src/Util/str-Random.adoc new file mode 100644 index 0000000..3437154 --- /dev/null +++ b/smlnj-lib/Doc/src/Util/str-Random.adoc @@ -0,0 +1,127 @@ += The `Random` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `Random` structure implements a random number generator +using the http://www.math.sci.hiroshima-u.ac.jp/m-mat/MT/emt.html[ +_Mersenne Twister_ algorithm]. The implementation is specialized +to the native word size. + +Note that prior to 2023.1 (and 110.99.4), this structure was implemented +using a _subtract-with-borrow_ algorithm. + +== Synopsis + +[source,sml] +------------ +structure Random :> RANDOM +------------ + +== Interface + +[source,sml] +------------ +type rand + +val rand : (int * int) -> rand + +val fromList : NativeWord.word list -> rand + +val toBytes : rand -> Word8Vector.vector +val fromBytes : Word8Vector.vector -> rand + +val toString : rand -> string +val fromString : string -> rand + +val randNativeInt : rand -> NativeInt.int + +val randNativeWord : rand -> NativeWord.word + +val randInt : rand -> int + +val randWord : rand -> int + +val randNat : rand -> int + +val randReal : rand -> real + +val randRange : (int * int) -> rand -> int +------------ + +== Description + +`[.kw]#type# rand`:: + Represents the internal state of a random number generator. + +`[.kw]#val# rand : (int * int) \-> rand`:: + `rand (n1, n2)` creates a random number generator from the + initial seed specified by the pair `(n1, n2)`. + This function is kept for backward compatibility with the old + implementation, but it is recommended that one use the `fromList` + function in new code. + +`[.kw]#val# fromList : NativeWord.word list \-> rand`:: + creates a random number generator from the given list of initial seeds. + +`[.kw]#val# toBytes : rand \-> Word8Vector.vector`:: + `toBytes rand` returns a byte vector representing the current state of + the generator. + +`[.kw]#val# fromBytes : Word8Vector.vector \-> rand`:: + `fromBytes bv` creates a generator with the initial state that was encoded + in the byte vector `bv`. This expression will raise + {sml-basis-url}/general.html#SIG:GENERAL.Fail:EXN[`Fail`] exception + if the byte vector is invalid. + +`[.kw]#val# toString : rand \-> string`:: + `toString rand` returns a string representing the random-number-generator + state `rand`. This string is a xref:str-Base64.adoc[`Base64`] encoding + of the result of `toBytes rand`. + +`[.kw]#val# fromString : string \-> rand`:: + `fromString s` returns the random-number-generator encoded as the string `s` + (presumably generated by `toString`). This expression will raise + {sml-basis-url}/general.html#SIG:GENERAL.Fail:EXN[`Fail`] exception + if the string `s` does not have the proper form. + +`[.kw]#val# randNativeWord : rand \-> NativeWord.word`:: + `randNativeWord rand` generates a uniform random number in the range + latexmath:[[0 .. 2^k-1]], where latexmath:[k] is the host platform's + native word size (__e.g.__, 32 or 64). + +`[.kw]#val# randNativeInt : rand \-> NativeInt.int`:: + `randNativeInt rand` generates a random word a uniform distribution in + the range latexmath:[[0 .. 2^{k-1}-1]], where latexmath:[k] is the host + platform's native word size (__e.g.__, 32 or 64). + +`[.kw]#val# randInt : rand \-> int`:: + `randInt rand` generates a random integer with a uniform distribution in + the range latexmath:[[-2^{k-1} .. 2^{k-1}-1]], where latexmath:[k] + is the host platform's default word size (__e.g.__, 31 or 63). + +`[.kw]#val# randWord : rand \-> word`:: + `randWord rand` generates a random word with a uniform distribution in + the range latexmath:[[0 .. 2^k-1]], where latexmath:[k] + is the host platform's default word size (__e.g.__, 31 or 63). + +`[.kw]#val# randNat : rand \-> int`:: + `randNat rand` generates a random integer with a uniform distribution in + the range latexmath:[[0 .. 2^{k-1}-1]], where latexmath:[k] + is the host platform's default word size (__e.g.__, 31 or 63). + +`[.kw]#val# randReal : rand \-> real`:: + `randReal rand` generates a random real number in the range `[0..1)`. + +`[.kw]#val# randRange : (int * int) \-> rand \-> int`:: + `randRange (lo, hi) rand` generates a random number in the + `[lo..hi]`. This function will raise the + {sml-basis-url}/general.html#SIG:GENERAL.Fail:EXN[`Fail`] exception + if `hi < lo`. + +== See Also + +xref:str-Rand.adoc[`Rand`], +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/str-RealOrderStats.adoc b/smlnj-lib/Doc/src/Util/str-RealOrderStats.adoc new file mode 100644 index 0000000..b0c06a2 --- /dev/null +++ b/smlnj-lib/Doc/src/Util/str-RealOrderStats.adoc @@ -0,0 +1,33 @@ += The `RealOrderStats` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `RealOrderStats` structure ... + +== Synopsis + +[source,sml] +------------ +structure RealOrderStats +------------ + +== Interface + +[source,sml] +------------ +------------ + +== Description + +`[.kw]#type# foo`:: + something + +`[.kw]#val# bar : foo \-> foo`:: + something + +== See Also + +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/str-Scan.adoc b/smlnj-lib/Doc/src/Util/str-Scan.adoc new file mode 100644 index 0000000..8734c41 --- /dev/null +++ b/smlnj-lib/Doc/src/Util/str-Scan.adoc @@ -0,0 +1,135 @@ += The `Scan` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `Scan` structure provides **C**-style conversions from string +representations. + +== Synopsis + +[source,sml] +------------ +signature SCAN +structure Scan : SCAN +------------ + +== Interface + +[source,sml] +------------ +datatype fmt_item + = ATOM of Atom.atom + | LINT of LargeInt.int + | INT of Int.int + | LWORD of LargeWord.word + | WORD of Word.word + | WORD8 of Word8.word + | BOOL of bool + | CHR of char + | STR of string + | REAL of Real.real + | LREAL of LargeReal.real + | LEFT of (int * fmt_item) + | RIGHT of (int * fmt_item) + +exception BadFormat + +val sscanf : string -> string -> fmt_item list option +val scanf : string -> (char, 'a) StringCvt.reader + -> (fmt_item list, 'a) StringCvt.reader +------------ + +== Description + + +[[type:fmt_item]] +`[.kw]#datatype# fmt_item`:: +This datatype, which is the same type as +xref:str-Format.adoc#type:fmt_item[`Format.fmt_item`], is used as a union +type to represent the results of scanning input. + ++ +-- + `LINT n`:: wraps a large integer value `n` to convert + (the conversion specifier must be one of "`d`", "`o`", "`x`", or "`X`"). + + `INT n`:: wraps a default integer value `n` to convert + (the conversion specifier must be one of "`d`", "`o`", "`x`", or "`X`"). + + `BOOL b`:: wraps a Boolean value `b` to convert + (the conversion specifier must be "`b`"). + + `CHR c`:: wraps a character value + (the conversion specifier must be "`c`"). + + `STR s`:: wraps a string value `s` to convert + (the conversion specifier must be "`s`"). The conversion is the + identity; _e.g._, `STR "\n"` will produce a newline in the result + string. + + `REAL r`:: wraps a default real value `r` to convert + (the conversion specifier must be one of "`e`", "`E`", "`f`", + "`F`", "`g`", or "`G`"). + + `ATOM atm`:: this constructor will never be returned by `scanf` or `sscanf`. + + `LWORD w`:: this constructor will never be returned by `scanf` or `sscanf`. + + `WORD w`:: this constructor will never be returned by `scanf` or `sscanf`. + + `WORD8 w`:: this constructor will never be returned by `scanf` or `sscanf`. + + `LREAL r`:: this constructor will never be returned by `scanf` or `sscanf`. + + `LEFT _`:: this constructor will never be returned by `scanf` or `sscanf`. + + `RIGHT _`:: this constructor will never be returned by `scanf` or `sscanf`. +-- + +[[exn:BadFormat]] +`[.kw]#exception# BadFormat`:: + This exception is raised when either `sscanf` or `scanf` is applied + to an ill-formed format string. + +[[val:sscanf]] +`[.kw]#val# sscanf : string -> string -> fmt_item list option`:: + `sscanf fmt s` scans the string `s` using the format specifier `fmt`. If + successful, `SOME items` is returned, where each item in the `items` list + corresponds to a specified item in `fmt`. If the input cannot be scanned + according to `fmt`, then `NONE` is returned. + If the format string is ill formed, then the `BadFormat` exception + will be raised when `sscanf fmt` is evaluated. + +[[val:scanf]] +`[.kw]#val# scanf : string -> (char, 'a) StringCvt.reader -> (fmt_item list, 'a) StringCvt.reader`:: + `sscanf fmt getc` returns a reader that scan a character stream using + the format specifier `fmt`. If the format string is ill formed, then the + `BadFormat` exception will be raised when `scanf fmt` is evaluated. + +== Format Strings + +The `sscanf and `scanf` functions take a format string as their first +argument. The format string is composed of zero or more +directives, which are either ordinary characters (excluding `%`) +or conversion specifiers. The result of applying one of the scan functions +to an input will be a list of xref:#type:fmt_item[`fmt_item`] corresponding +to the conversion specifiers in the format string. All conversions are +introduced by the `%` character. The format string may also contain +other characters. White space (such as blanks, tabs, or newlines) in the format +string match any amount of white space, including none, in the input. Everything +else matches only itself. Scanning stops when an input character does not match +such a format character. Scanning also stops when an input conversion cannot be +made (see below). + +//Conversion specifiers begin with the percent (`%`) character followed by +//the following in sequence: + +**To be written** + +== See Also + +xref:str-Format.adoc[`Format`], +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/str-TimeLimit.adoc b/smlnj-lib/Doc/src/Util/str-TimeLimit.adoc new file mode 100644 index 0000000..b7ce296 --- /dev/null +++ b/smlnj-lib/Doc/src/Util/str-TimeLimit.adoc @@ -0,0 +1,41 @@ += The `TimeLimit` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `TimeLimit` structure provides a mechanism for limiting the execution +time of a computation. The mechanism is implemented using the runtime +system's interval timer and the *SML/NJ* signal mechanism. + +== Synopsis + +[source,sml] +------------ +structure TimeLimit +------------ + +== Interface + +[source,sml] +------------ +exception TimeOut + +val timeLimit : Time.time -> ('a -> 'b) -> 'a -> 'b +------------ + +== Description + +[[exn:TimeOut]] +`[.kw]#exception# TimeOut`:: + The exception that is raised if the time limit expires. + +`[.kw]#val# timeLimit : Time.time \-> ('a \-> 'b) \-> 'a \-> 'b`:: + `timeLimit t f x` computes the expression `f x`. If the computation + takes longer than the time limit `t`, then the + xref:#exn:TimeOut[`TimeOut`] exception is raised. + +== See Also + +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/str-URef.adoc b/smlnj-lib/Doc/src/Util/str-URef.adoc new file mode 100644 index 0000000..1f9fc7f --- /dev/null +++ b/smlnj-lib/Doc/src/Util/str-URef.adoc @@ -0,0 +1,94 @@ += The `URef` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `URef` structure provides mutable references with __Union-Find__ +semantics. The interface is similar to that of references, but +adds operations to union two references together. When two `uref` +values are joined by one of the union operations, they become +equal (and, thus, their contents will be equal too). + +The original design and implementation of this module was by Fritz Henglein. + +== Synopsis + +[source,sml] +------------ +signature UREF +structure URef : UREF +------------ + +== Interface + +[source,sml] +------------ +type 'a uref + +val uRef: 'a -> 'a uref + +val equal: 'a uref * 'a uref -> bool + +val !! : 'a uref -> 'a + +val update : 'a uref * 'a -> unit + +val unify : ('a * 'a -> 'a) -> 'a uref * 'a uref -> bool + +val union : 'a uref * 'a uref -> bool + +val link : 'a uref * 'a uref -> bool +------------ + +== Description + +`[.kw]#type# 'a uref`:: + The type constructor for union-find references. + +[[val:uRef]] +`[.kw]#val# uRef: 'a \-> 'a uref`:: + `uRef v` creates a new reference with contents `v`. + +`[.kw]#val# equal: 'a uref * 'a uref \-> bool`:: + `equal (ur1, ur2)` returns `true` if, and only if, `ur1` and `ur2` were + created by the same call to xref:#val:uRef[`uRef`] or if they have been + unioned by a xref:#val:link[`link`], xref:#val:union[`union`], or + xref:#val:unify[`unify`] operation. + +`[.kw]#val# !! : 'a uref \-> 'a`:: + `!! ur` returns the contents of `ur`. + +`[.kw]#val# update : 'a uref * 'a \-> unit`:: + `update (ur, v)` updates the contents of `ur` to be `v`. + +[[val:unify]] +`[.kw]#val# unify : ('a * 'a \-> 'a) \-> 'a uref * 'a uref \-> bool`:: + `unify f (ur1, ur2)` unions `ur1` and `ur2` (_i.e._, after this + call, the expression `equal(r1, ur2)` will return `true`) and + returns `true` if they were *not* equal prior to the call to `unify`. + The contents of the unioned reference is set to `f (v1, v2)`, where `v1` + (resp. `v2`) was the contents of `ur1` (resp. `ur2`) prior to the + call to `unify`. + +[[val:union]] +`[.kw]#val# union : 'a uref * 'a uref \-> bool`:: + `union (ur1, ur2)` unions `ur1` and `ur2` (_i.e._, after this + call, the expression `equal(r1, ur2)` will return `true`) and + returns `true` if they were *not* equal prior to the call to `union`. + The contents of the unioned reference is set to one of `v1` or `v2`, + where `v1` (resp. `v2`) was the contents of `ur1` (resp. `ur2`) + prior to the call to `union`. + +[[val:link]] +`[.kw]#val# link : 'a uref * 'a uref \-> bool`:: + `link (ur1, ur2)` unions `ur1` and `ur2` (_i.e._, after this + call, the expression `equal(r1, ur2)` will return `true`) and + returns `true` if they were *not* equal prior to the call to `link`. + The contents of the unioned reference is set to `v1`, where + `v1` was the contents of `ur1` prior to the call to `link`. + +== See Also + +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/str-UTF8.adoc b/smlnj-lib/Doc/src/Util/str-UTF8.adoc new file mode 100644 index 0000000..8b8f288 --- /dev/null +++ b/smlnj-lib/Doc/src/Util/str-UTF8.adoc @@ -0,0 +1,230 @@ += The `UTF8` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `UTF8` structure provides support for working +with https://en.wikipedia.org/wiki/UTF-8[*UTF-8*] +encoded strings. *UTF-8* is a way to represent *Unicode* +code points in an 8-bit character type while being backward +compatible with the *ASCII* encoding for 7-bit characters. +The encoding scheme uses one to four bytes as follows: + +[cols="^4a,4*^1a",options="header",align="center",grid="rows",width="80%"] +|=== +| Wide Character Bits | Byte 0 | Byte 1 | Byte 2 | Byte 3 +| `00000` `00000000` `0xxxxxxx` | `0xxxxxxx` | | | +| `00000` `00000yyy` `yyxxxxxx` | `110yyyyy` | `10xxxxxx` | | +| `00000` `zzzzyyyy` `yyxxxxxx` | `1110zzzz` | `10yyyyyy` | `10xxxxxx` | +| `wwwzz` `zzzzyyyy` `yyxxxxxx` | `11110www` | `10zzzzzz` | `10yyyyyy` | `10xxxxxx` +|=== + +There are three additional well-formedness restrictions on UTF-8 encodings +that were introduced in the Unicode 3.1 and 3.2 standards. +-- +* Characters cannot be larger than `0x10FFFF` (the maximum code point). +* Characters must be in the **shortest** encoding for the codepoint (_e.g._, + using two bytes to encode an ASCII character is invalid). +* Surogate pairs should be encoded as a single three-byte character instead of + as two three-byte sequences. +-- + +== Synopsis + +[source,sml] +------------ +signature UTF8 +structure UTF8 :> UTF8 +------------ + +== Interface + +[source,sml] +------------ +type wchar = word + +val maxCodePoint : wchar + +exception Incomplete +exception Invalid + +val getu : (char, 'strm) StringCvt.reader -> (wchar, 'strm) StringCvt.reader + +val encode : wchar -> string + +val isAscii : wchar -> bool +val toAscii : wchar -> char +val fromAscii : char -> wchar + +val toString : wchar -> string + +val size : string -> int + +val size' : substring -> int + +val explode : string -> wchar list +val implode : wchar list -> string + +val map : (wchar -> wchar) -> string -> string +val app : (wchar -> unit) -> string -> unit +val fold : ((wchar * 'a) -> 'a) -> 'a -> string -> 'a +val all : (wchar -> bool) -> string -> bool +val exists : (wchar -> bool) -> string -> bool +------------ + +== Description + +`[.kw]#type# wchar = word`:: + The type of a *Unicode* code point. + + Note that we use the `word` type for this because *SML/NJ* does not currently + have a wide-character type. If such a type is introduced, then this type + definition will likely change. + +`[.kw]#val# maxCodePoint : wchar`:: + The maximum code point in the *Unicode* character set (`0wx10FFFF`). + +[[exn:Incomplete]] +`[.kw]#exception# Incomplete`:: + This exception is raised when certain operations are applied to incomplete + strings (_i.e._, strings that end in the middle of multi-byte *UTF-8* character + encoding). + +[[exn:Invalid]] +`[.kw]#exception# Invalid`:: + This exception is raised when invalid UTF-8 encodings, such as + non-shortest-length encodings, are encountered. + +`[.kw]#val# getu : (char, 'strm) {sml-basis-url}/string-cvt.html#SIG:STRING_CVT.reader:TY[StringCvt.reader] \-> (wchar, 'strm) {sml-basis-url}/string-cvt.html#SIG:STRING_CVT.reader:TY[StringCvt.reader]`:: + `getu getc` returns a wide-character reader for the character reader `getc`. + The resulting reader raises the xref:#exn:Incomplete[`Incomplete`] exception + if it encounters an incomplete *UTF-8* character and it raises the + xref:#exn:Invalid[`Invalid`] exception if it encounters an invalid encoding. + +`[.kw]#val# encode : wchar \-> string`:: + `encode wc` returns the *UTF-8* encoding of the wide character `wc`. + This expression raises the + xref:#exn:Invalid[`Invalid`] exception if `wc` is greater than the + maximum *Unicode* code point. + +`[.kw]#val# isAscii : wchar \-> bool`:: + `isAscii wc` returns `true` if, and only if, `wc` is an ASCII character. + +`[.kw]#val# toAscii : wchar \-> char (* truncates to 7-bits *)`:: + `toAscii wc` converts `wc` to an 8-bit character by truncating `wc` + to its low seven bits. + +`[.kw]#val# fromAscii : char \-> wchar (* truncates to 7-bits *)`:: + `toAscii c` converts the 8-bit character `c` to a wide character in + the ASCII range (the high bit of `c` is ignored). + +`[.kw]#val# toString : wchar \-> string`:: + `toString wc` returns a printable string representation of a wide character + as a *Unicode* escape sequence. + +`[.kw]#val# size : string \-> int`:: + `size s` returns the number of *UTF-8* encoded *Unicode* characters + in the string `s`. This expression raises the + xref:#exn:Incomplete[`Incomplete`] exception if an incomplete + character is encountered. + +`[.kw]#val# size : string \-> int`:: + `size s` returns the number of *UTF-8* encoded *Unicode* characters + in the string `s`. This expression raises the + xref:#exn:Incomplete[`Incomplete`] exception + if it encounters an incomplete *UTF-8* character and it raises the + xref:#exn:Invalid[`Invalid`] exception if it encounters an invalid encoding. + +`[.kw]#val# size' : substring \-> int`:: + `size' ss` returns the number of *UTF-8* encoded *Unicode* characters + in the substring `ss`. This expression raises the + xref:#exn:Incomplete[`Incomplete`] exception + if it encounters an incomplete *UTF-8* character and it raises the + xref:#exn:Invalid[`Invalid`] exception if it encounters an invalid encoding. + +`[.kw]#val# explode : string \-> wchar list`:: + `explode s` returns the list of *UTF-8* encoded Unicode characters that + comprise the string `s`. + +`[.kw]#val# implode : wchar list \-> string`:: + `implode wcs` returns the *UTF-8* encoded string that represents + the list `wcs` of Unicode code points. + This expression raises the + xref:#exn:Invalid[`Invalid`] exception if it encounters an invalid encoding. + +`[.kw]#val# map : (wchar \-> wchar) \-> string \-> string`:: + `map f s` maps the function `f` over the *UTF-8* encoded characters + in the string `s` to produce a new *UTF-8* string. This expression raises + the xref:#exn:Incomplete[`Incomplete`] exception + if it encounters an incomplete *UTF-8* character and it raises the + xref:#exn:Invalid[`Invalid`] exception if it encounters an invalid encoding. + It is equivalent to the expression ++ +[source,sml] +------------ +implode (List.map f (explode s)) +------------ + +`[.kw]#val# app : (wchar \-> unit) \-> string \-> unit`:: + `app f s` applies the function `f` to the *UTF-8* encoded characters + in the string `s`. This expression raises the + xref:#exn:Incomplete[`Incomplete`] exception + if it encounters an incomplete *UTF-8* character and it raises the + xref:#exn:Invalid[`Invalid`] exception if it encounters an invalid encoding. + It is equivalent to the expression ++ +[source,sml] +------------ +List.app f (explode s) +------------ + +`[.kw]#val# fold : ((wchar * 'a) \-> 'a) \-> 'a \-> string \-> 'a`:: + `fold f init s` folds a function from left-to-right over the + *UTF-8* encoded characters in the string. xref:#exn:Incomplete[`Incomplete`] exception + if it encounters an incomplete *UTF-8* character and it raises the + xref:#exn:Invalid[`Invalid`] exception if it encounters an invalid encoding. + It is equivalent to the expression ++ +[source,sml] +------------ +List.foldl f init (explode s) +------------ + +`[.kw]#val# all : (wchar \-> bool) \-> string \-> bool`:: + `all pred s` returns `true` if, and only if, the function `pred` + returns true for all of the *UTF-8* encoded characters in the + string. It short-circuits evaluation as soon as a character + is encountered for which `pred` returns `false`. This expression + raises the xref:#exn:Incomplete[`Incomplete`] exception + if it encounters an incomplete *UTF-8* character and it raises the + xref:#exn:Invalid[`Invalid`] exception if it encounters an invalid encoding. + It is equivalent to the expression ++ +[source,sml] +------------ +List.all pred (explode s) +------------ ++ +when `s` only contains complete characters. + +`[.kw]#val# exists : (wchar \-> bool) \-> string \-> bool`:: + `exists pred s` returns `true` if, and only if, the function `pred` + returns `true` for at least one *UTF-8* encoded character in + the string `s`. It short-circuits evaluation as soon as a character + is encountered for which `pred` returns `true`. This expression raises + the xref:#exn:Incomplete[`Incomplete`] exception + if it encounters an incomplete *UTF-8* character and it raises the + xref:#exn:Invalid[`Invalid`] exception if it encounters an invalid encoding. + It is equivalent to the expression ++ +[source,sml] +------------ +List.exists pred (explode s) +------------ ++ +when `s` only contains complete characters. + +== See Also + +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/Util/str-UnivariateStats.adoc b/smlnj-lib/Doc/src/Util/str-UnivariateStats.adoc new file mode 100644 index 0000000..ca3df82 --- /dev/null +++ b/smlnj-lib/Doc/src/Util/str-UnivariateStats.adoc @@ -0,0 +1,33 @@ += The `UnivariateStats` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `UnivariateStats` structure ... + +== Synopsis + +[source,sml] +------------ +structure UnivariateStats +------------ + +== Interface + +[source,sml] +------------ +------------ + +== Description + +`[.kw]#type# foo`:: + something + +`[.kw]#val# bar : foo \-> foo`:: + something + +== See Also + +xref:smlnj-lib.adoc[__The Util Library__] diff --git a/smlnj-lib/Doc/src/XML/MODULES b/smlnj-lib/Doc/src/XML/MODULES new file mode 100644 index 0000000..7e1bbe8 --- /dev/null +++ b/smlnj-lib/Doc/src/XML/MODULES @@ -0,0 +1,5 @@ +signature XML_SCHEMA +functor XMLTreeFn +functor XMLParserFn +structure GenericXMLTree + diff --git a/smlnj-lib/Doc/src/XML/fun-XMLParserFn.adoc b/smlnj-lib/Doc/src/XML/fun-XMLParserFn.adoc new file mode 100644 index 0000000..62a39d2 --- /dev/null +++ b/smlnj-lib/Doc/src/XML/fun-XMLParserFn.adoc @@ -0,0 +1,64 @@ += The `XMLParserFn` functor +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `XMLParserFn` functor generates a parser for the given *SML* +tree representation. The treatment of whitespace and comments +when parsing is determined by the `Schema` substructure of the +functor argument. + +== Synopsis + +[source,sml] +------------ +signature XML_PARSER +functor XMLParserFn (XT : XML_TREE) : XML_PARSER +------------ + +== Functor Argument Interface + +[source,sml] +------------ +XT : XML_TREE +------------ + +== Functor Argument Description + +`XT : XML_TREE`:: + Defines the tree representation used for the result of the parser, as well + as the policy for handling whitespace and comments. + +== Interface + +[source,sml] +------------ +structure XMLTree : XML_TREE + +val parseFile : string -> XMLTree.tree + +exception ParseError of string +------------ + +== Interface Description + +`[.kw]#structure# XMLTree : XML_TREE`:: + The argument structure. + +`[.kw]#val# parseFile : string \-> XMLTree.tree`:: + `parseFile file` returns the tree representation of the named text file. + The xref:#exn:ParseError[`ParseError`] exception is raised if a syntax + error is encountered during parsing. + +[[exn:ParseError]] +`[.kw]#exception# ParseError of string`:: + This exception is raise with a useful error message as its argument when + a syntax error is encountered by the parser. + +== See Also + +xref:sig-XML_SCHEMA.adoc[`XML_SCHEMA`], +xref:sig-XML_TREE.adoc[`XML_TREE`], +xref:xml-lib.adoc[__The XML Library__] diff --git a/smlnj-lib/Doc/src/XML/fun-XMLTreeFn.adoc b/smlnj-lib/Doc/src/XML/fun-XMLTreeFn.adoc new file mode 100644 index 0000000..0d44079 --- /dev/null +++ b/smlnj-lib/Doc/src/XML/fun-XMLTreeFn.adoc @@ -0,0 +1,22 @@ += The `XMLTreeFn` functor +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `XMLTreeFn` functor generates a tree representation of *XML* +files from a schema structure argument. + +== Synopsis + +[source,sml] +------------ +functor XMLTreeFn (Schema : XML_SCHEMA) : XML_TREE +------------ + +== See Also + +xref:sig-XML_SCHEMA.adoc[`XML_SCHEMA`], +xref:sig-XML_TREE.adoc[`XML_TREE`], +xref:xml-lib.adoc[__The XML Library__] diff --git a/smlnj-lib/Doc/src/XML/sig-XML_SCHEMA.adoc b/smlnj-lib/Doc/src/XML/sig-XML_SCHEMA.adoc new file mode 100644 index 0000000..6fa32a4 --- /dev/null +++ b/smlnj-lib/Doc/src/XML/sig-XML_SCHEMA.adoc @@ -0,0 +1,78 @@ += The `XML_SCHEMA` signature +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `XML_SCHEMA` signature describes the elements and attributes of +an *XML* schema that are required to parse files of that schema. + +== Synopsis + +[source,sml] +------------ +signature XML_SCHEMA +------------ + +== Interface + +[source,sml] +------------ +type element +type attribute + +val element : string -> element option + +val preserveWS : element -> bool + +val preserveComment : element -> bool + +val same : element * element -> bool + +val toString : element -> string + +val attribute : (string * string) -> attribute +------------ + +== Description + +`[.kw]#type# element`:: + the representation type for *XML* elements. Note that this type is *not* + the type of a tree node in an *XML* tree, but rather the label or tag + associated with the tree. + +`[.kw]#type# attribute`:: + the representation type for *XML* attributes. + +`[.kw]#val# element : string \-> element option`:: + `element tag` returns `SOME elem` where `elem` is the representation of + the element with the given tag. It returns `NONE` when `tag` is not the + name of any element in the schema. + +`[.kw]#val# preserveWS : element \-> bool`:: + `preserveWS elem` returns `true` if whitespace should be preserved in + the element's content and `false` if whitespace can be normalized in + the element's content. Note that if true, this property is inherited by any + nested elements. + +`[.kw]#val# preserveComment : element \-> bool`:: + `preserveComment elem` returns `true` if comments should be preserved in + the element's content and `false` if comments if they should be deleted. + +`[.kw]#val# same : element * element \-> bool`:: + `same (elem1, elem2)` returns `true` if, and only if, the two elements + are the same. + +`[.kw]#val# toString : element \-> string`:: + `toString elem` returns the string representation of the element + (without the "`<`" and "`>`" brackets). + +`[.kw]#val# attribute : (string * string) \-> attribute`:: + `attribute (name, value)` returns an attribute value for the given + name-value pair. + +== See Also + +xref:fun-XMLTreeFn.adoc[`XMLTreeFn`], +xref:xml-lib.adoc[__The XML Library__] diff --git a/smlnj-lib/Doc/src/XML/sig-XML_TREE.adoc b/smlnj-lib/Doc/src/XML/sig-XML_TREE.adoc new file mode 100644 index 0000000..294d038 --- /dev/null +++ b/smlnj-lib/Doc/src/XML/sig-XML_TREE.adoc @@ -0,0 +1,110 @@ += The `XML_TREE` signature +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `XML_TREE` signature defines a tree representation of *XML* files. + +== Synopsis + +[source,sml] +------------ +signature XML_TREE +------------ + +== Interface + +[source,sml] +------------ +structure Schema : XML_SCHEMA + +datatype doctype = DOCTYPE of string * external_id option + +and external_id + = SYSTEM of string + | PUBLIC of string * string + +datatype content + = TEXT of string + | CDATA of string + | ELEMENT of { + name : Schema.element, + attrs : Schema.attribute list, + content : content list + } + +type tree = { + xmlDecl : Schema.attribute list option, + doctype : doctype option, + content : content + } +------------ + +== Description + +`[.kw]#structure# Schema : XML_SCHEMA`:: + This substructure defines the representation of elements and attributes. + +`[.kw]#datatype# doctype = DOCTYPE of string * external_id option`:: + This datatype represents the contents of the optional `DOCTYPE` + element found the beginning of the file (following the optional + *XML* declaration). We currently only support external DTDs. + +`[.kw]#datatype# external_id`:: + This datatype represents an external DTD specification; its constructors + are defined as follows: ++ +-- + `SYSTEM of url`:: + specifies a "private" external DTD, where the string `url` specifies + the DTD's location. + + `PUBLIC(name, url)`:: + specifies a "public" external DTD, where the string `name` is the name + of the DTD and `url` specifies the DTD's location. +-- + +`[.kw]#datatype# content`:: + This datatype is used to represent the content of an *XML* file as a tree. + The constructors have the following meanings: ++ +-- + `TEXT s`:: + represents the text described by the string `s`. When parsing, entities + in the source (_e.g._, `<`) are replaced by their definition and, + when printing, special characters (_e.g._, `<`) are replaced by their + entities. + + `CDATA s`:: + represents the literal text described by the string `s`. + + `ELEMENT{name, attrs, content}`:: + represents a subtree enclosed by "`` ... `" tags, + where `name` is the name of the element, `attrs` is a list of attributes + in the start tag, and `content` is the stuff between the tags. +-- + +`[.kw]#type# tree`:: + An *XML* tree, which is a record type with the following fields: ++ +-- + `xmlDecl : Schema.attribute list option`:: + This field represents the optional *XML* declaration at the beginning of + a file, where a value of `SOME attrs` means that there was an + *XML* declaration present with the list of attributes `attrs`. + + `doctype : doctype option`:: + This field represents the optional `DOCTYPE` element that follows the + *XML* declaration. + + `content : content`:: + This field is the root of the content and will always be an `ELEMENT`. +-- + +== See Also + +xref:sig-XML_SCHEMA.adoc[`XML_SCHEMA`], +xref:fun-XMLTreeFn.adoc[`XMLTreeFn`], +xref:xml-lib.adoc[__The XML Library__] diff --git a/smlnj-lib/Doc/src/XML/str-GenericXMLTree.adoc b/smlnj-lib/Doc/src/XML/str-GenericXMLTree.adoc new file mode 100644 index 0000000..8acf577 --- /dev/null +++ b/smlnj-lib/Doc/src/XML/str-GenericXMLTree.adoc @@ -0,0 +1,25 @@ += The `GenericXMLTree` structure +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +The `GenericXMLTree` structure provides a generic representation of +*XML* trees where elements and attribute names are represented as +xref:../Util/str-Atom.adoc#type:atom[`Atom.atom`] values. + +== Synopsis + +[source,sml] +------------ +structure GenericXMLTree : XML_TREE + where type Schema.element = Atom.atom + where type Scheme.attribute = Atom.atom * string +------------ + +== See Also + +xref:sig-XML_SCHEMA.adoc[`XML_SCHEMA`], +xref:sig-XML_TREE.adoc[`XML_TREE`], +xref:xml-lib.adoc[__The XML Library__] diff --git a/smlnj-lib/Doc/src/XML/xml-lib.adoc b/smlnj-lib/Doc/src/XML/xml-lib.adoc new file mode 100644 index 0000000..1aeefb1 --- /dev/null +++ b/smlnj-lib/Doc/src/XML/xml-lib.adoc @@ -0,0 +1,80 @@ += The XML Library +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +== Overview + +The *XML Library* is a small library for parsing +https://www.w3.org/TR/REC-xml[*XML*] files. It does +*not* support validation (_e.g._, against a *DTD* or *Schema*). The +basic idea is that the user supplies a "schema" module that describes +the elements and attribute representation of an *XML* document. From this, +one builds an XML tree representation + +[source,sml] +------------ +structure MyXMLTree = XMLTreeFn (MyXMLSchema) +------------ + +and an XML parser + +[source,sml] +------------ +structure MyXMLParser = XMLParserFn (MyXMLTree) +------------ + +The library also provides a generic *XML* tree definition +(_i.e._, one that allows any element name). + +For a more complete library for XML processing, use the +https://smlnj.org/dist/other/fxp-2.1.tgz[*fxp library*]. + +== Contents + +xref:sig-XML_SCHEMA.adoc[`[.kw]#signature# XML_SCHEMA`]:: + describes the elements and attributes of an *XML* schema + that are required to parse files of that schema. + +xref:sig-XML_TREE.adoc[`[.kw]#signature# XML_TREE`]:: + Defines a tree representation of *XML* files. + +xref:fun-XMLTreeFn.adoc[`[.kw]#functor# XMLTreeFn`]:: + Generates a tree representation of *XML* + files from a schema structure argument. + +xref:fun-XMLParserFn.adoc[`[.kw]#functor# XMLParserFn`]:: + generates a parser for the given *SML* tree representation. + +xref:str-GenericXMLTree.adoc[`[.kw]#structure# GenericXMLTree`]:: + Provides a generic representation of *XML* trees. + +== Usage + +For https://smlnj.org[*SML/NJ*], include `$/xml-lib.cm` in your +*CM* file. + +For use in https://www.mlton.org/[*MLton*], include +`$(SML_LIB)/smlnj-lib/XML/xml-lib.mlb` in your *MLB* file. + +ifdef::backend-pdf[] + +// Push titles down one level. +:leveloffset: 1 + +include::sig-XML_SCHEMA.adoc[] + +include::sig-XML_TREE.adoc[] + +include::fun-XMLTreeFn.adoc[] + +include::fun-XMLParserFn.adoc[] + +include::str-GenericXMLTree.adoc[] + +// Return to normal title levels. +:leveloffset: 0 + +endif::[] diff --git a/smlnj-lib/Doc/src/fragments/footer.in b/smlnj-lib/Doc/src/fragments/footer.in new file mode 100644 index 0000000..778cf2d --- /dev/null +++ b/smlnj-lib/Doc/src/fragments/footer.in @@ -0,0 +1,13 @@ + + + + + + diff --git a/smlnj-lib/Doc/src/fragments/header.in b/smlnj-lib/Doc/src/fragments/header.in new file mode 100644 index 0000000..bcc2097 --- /dev/null +++ b/smlnj-lib/Doc/src/fragments/header.in @@ -0,0 +1,54 @@ + + + + + + + @AUTHOR@@KEYWORDS@ + + @TITLE@ + + + + + + + + +
    +
    +
    +
    + +
    + +
    @STYLED-TITLE@
    +
    +
    +
    diff --git a/smlnj-lib/Doc/src/fragments/shim.in b/smlnj-lib/Doc/src/fragments/shim.in new file mode 100644 index 0000000..2170f70 --- /dev/null +++ b/smlnj-lib/Doc/src/fragments/shim.in @@ -0,0 +1,2 @@ +
    +
    diff --git a/smlnj-lib/Doc/src/images/smlnj-logo.png b/smlnj-lib/Doc/src/images/smlnj-logo.png new file mode 100644 index 0000000..3dfd7ff Binary files /dev/null and b/smlnj-lib/Doc/src/images/smlnj-logo.png differ diff --git a/smlnj-lib/Doc/src/index.adoc b/smlnj-lib/Doc/src/index.adoc new file mode 100644 index 0000000..144e491 --- /dev/null +++ b/smlnj-lib/Doc/src/index.adoc @@ -0,0 +1,108 @@ += Overview +:title: SML/NJ Library Overview +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} + +== Introduction + +The **Standard ML of New Jersey Library** (**SML/NJ Library**) +is a collection of over 200 modules grouped into 14 libraries. +It was designed to build on the {sml-basis-url}/index.html[**Standard ML Basis Library**] +by providing higher-level and application-specific services +that were out of scope for the Basis Library. +The **SML/NJ Library** is distributed as part of both the +https://smlnj.org[**SML/NJ**] and +https://mlton.org[**MLton SML Compiler**] systems. + +ifdef::backend-html[] + +== Libraries + +The following is a list of the libraries that comprise the +**SML/NJ Library** with a brief description of what they +provide. These are listed in alphabetical order: + +=== xref:Controls/controls-lib.adoc[Controls Library] +Support for managing application controls. + +=== xref:HTML/html-lib.adoc[HTML Library] +__not yet written__ + +=== xref:HTML4/html4-lib.adoc[HTML4 Library] +__not yet written__ + +=== xref:HashCons/hash-cons-lib.adoc[HashCons Library] +Supports the implementation of hash-consed representations of data structures. + +=== xref:INet/inet-lib.adoc[INet Library] +Provides utilities for network programming with sockets + +=== xref:JSON/json-lib.adoc[JSON Library] +The *JSON Library* provides support for parsing, printing, and +manipulating https://www.json.org/json-en.html[*JSON*] data. + +=== xref:PP/pp-lib.adoc[PP Library] +__not yet written__ + +=== xref:Reactive/reactive-lib.adoc[Reactive Library] +__not yet written__ + +=== xref:RegExp/regexp-lib.adoc[RegExp Library] +__not yet written__ + +=== xref:SExp/sexp-lib.adoc[SExp Library] +Supports the parsing and printing of semi-structured data as +https://en.wikipedia.org/wiki/S-expression[S-expressions]. + +=== xref:Unix/unix-lib.adoc[Unix Library] +The *Unix Library* provides some *Unix*-specific utilities. + +=== xref:Util/smlnj-lib.adoc[Util Library] +The *Util Library* provides a grab bag of data structures and +algorithms. Modules from this library are autoloaded into +the *SML/NJ* interactive environment by default. + +=== xref:UUID/uuid-lib.adoc[UUID Library] +The *UUID Library* provides support for generating and using +UUIDs (_aka_ GUIDs). + +=== xref:XML/xml-lib.adoc[XML Library] +The *XML Library* is a small library for parsing +https://www.w3.org/TR/REC-xml[*XML*] files. + +endif::[] + +ifdef::backend-pdf[] + +include::Controls/controls-lib.adoc[] + +//include::HTML/html-lib.adoc[] + +//include::HTML4/html4-lib.adoc[] + +include::HashCons/hash-cons-lib.adoc[] + +include::INet/inet-lib.adoc[] + +include::JSON/json-lib.adoc[] + +//include::PP/pp-lib.adoc[] + +include::Reactive/reactive-lib.adoc[] + +include::RegExp/regexp-lib.adoc[] + +include::SExp/sexp-lib.adoc[] + +include::Unix/unix-lib.adoc[] + +include::Util/smlnj-lib.adoc[] + +include::UUID/uuid-lib.adoc[] + +include::XML/xml-lib.adoc[] + +endif::[] diff --git a/smlnj-lib/Doc/src/root.adoc b/smlnj-lib/Doc/src/root.adoc new file mode 100644 index 0000000..5f960d9 --- /dev/null +++ b/smlnj-lib/Doc/src/root.adoc @@ -0,0 +1,63 @@ +The Standard ML of New Jersey Library: Reference Manual +======================================================= +:Author: John Reppy +:Date: {release-date} +:stem: latexmath +:source-highlighter: pygments +:VERSION: {smlnj-version} +:toc: + +== Introduction + +The **Standard ML of New Jersey Library** (**SML/NJ Library**) +is a collection of over 200 modules grouped into 14 libraries. +It was designed to build on the {sml-basis-url}/index.html[**Standard ML Basis Library**] +by providing higher-level and application-specific services +that were out of scope for the Basis Library. +The **SML/NJ Library** is distributed as part of both the +https://smlnj.org[**SML/NJ**] and +https://mlton.org[**MLton SML Compiler**] systems. + + +// include library root documents + +include::Controls/controls-lib.adoc[] + +//// +\include::HTML/html-lib.adoc[] +//// + +//// +\include::HTML4/html4-lib.adoc[] +//// + +include::HashCons/hash-cons-lib.adoc[] + +include::INet/inet-lib.adoc[] + +include::JSON/json-lib.adoc[] + +//// +\include::PP/pp-lib.adoc[] +//// + +//// +\include::Reactive/reactive-lib.adoc[] +//// + +//// +\include::RegExp/regexp-lib.adoc[] +//// + +include::SExp/sexp-lib.adoc[] + +include::UUID/uuid-lib.adoc[] + +include::Unix/unix-lib.adoc[] + +include::Util/smlnj-lib.adoc[] + +//// +\include::XML/xml-lib.adoc[] +//// + diff --git a/smlnj-lib/Doc/src/scripts/gen-css.sh b/smlnj-lib/Doc/src/scripts/gen-css.sh new file mode 100755 index 0000000..7585d3c --- /dev/null +++ b/smlnj-lib/Doc/src/scripts/gen-css.sh @@ -0,0 +1,83 @@ +#!/bin/sh +# +# COPYRIGHT (c) 2013 The SML3d Project (http://sml3d.cs.uchicago.edu) +# All rights reserved. +# +# Script to generate CSS files for SML3d webpages by substituting specific colors +# for symbolic color names +# +# usage: +# gen-css.sh src > dst + +# +# general document colors +# +BACKGROUND="#fff" +LIGHT_BACKGROUND="#f8f8f7" +MEDIUM_BACKGROUND="#ececec" +#MEDIUM_BACKGROUND="#f8f8f7" +#MEDIUM_BACKGROUND="#99ccff" +DARK_BACKGROUND="#cae5ff" # previously #369 +BANNER_BACKGROUND="$MEDIUM_BACKGROUND" +TOC_HIGHLIGHT="#99ccff" # hover color for TOC hrefs +HIGHLIGHT_BACKGROUND="yellow" # background for highlighted text +TITLE_COLOR="#000099" # color for title in banner +MAJOR_HEADER_COLOR="#cc6600" # orange/tawny for major headers +MINOR_HEADER_COLOR="#003366" # dark blue for minor headers +DEFAULT_COLOR="#000044" # default color for text +HR_COLOR="#527bbd" # color for horizontal rules +LINK_COLOR="#000099" # color for hyperlinks +VISITED_COLOR="#6666ff" # color for visited links +EM_COLOR="#000044" # color for emphasis elements +STRONG_COLOR="#000044" # color for strong elements +BULLET_COLOR="green" # color for UL bullets +BLOCK_BORDER="#527bbd" # border for inset blocks +BLOCK_BACKGROUND="#f8f8f8" # background for inset blocks +BLOCK_COLOR="#839496" # foreground color for inset blocks + +# +# code highlighting colors (pallete from http://ethanschoonover.com/solarized) +# +CODE_BORDER="$BLOCK_BORDER" # border for listing block +CODE_BACKGROUND="$BLOCK_BACKGROUND" # background color for listings +CODE_COLOR="#6c71c4" # default foreground color for code +KW_COLOR="#268bd2" # keyword color +COM_COLOR="#dc322f" # comment color +LIT_COLOR="#b58900" # color for literals +BIND_COLOR="#6c71c4" # color for identifiers at binding sites +PUNCT_COLOR="#6c71c4" # color for punctuation +# NOTE: the following two colors are from the default pygments color scheme +TV_COLOR="#aa22ff" # color for type variables +TY_COLOR="#b00040" # color for type names + +sed -e s/@BACKGROUND@/$BACKGROUND/g \ + -e s/@LIGHT_BACKGROUND@/$MEDIUM_BACKGROUND/g \ + -e s/@MEDIUM_BACKGROUND@/$MEDIUM_BACKGROUND/g \ + -e s/@DARK_BACKGROUND@/$DARK_BACKGROUND/g \ + -e s/@BANNER_BACKGROUND@/$BANNER_BACKGROUND/g \ + -e s/@TOC_HIGHLIGHT@/$TOC_HIGHLIGHT/g \ + -e s/@HIGHLIGHT_BACKGROUND@/$HIGHLIGHT_BACKGROUND/g \ + -e s/@TITLE_COLOR@/$TITLE_COLOR/g \ + -e s/@MAJOR_HEADER_COLOR@/$MAJOR_HEADER_COLOR/g \ + -e s/@MINOR_HEADER_COLOR@/$MINOR_HEADER_COLOR/g \ + -e s/@DEFAULT_COLOR@/$DEFAULT_COLOR/g \ + -e s/@HR_COLOR@/$HR_COLOR/g \ + -e s/@LINK_COLOR@/$LINK_COLOR/g \ + -e s/@VISITED_COLOR@/$VISITED_COLOR/g \ + -e s/@EM_COLOR@/$EM_COLOR/g \ + -e s/@STRONG_COLOR@/$STRONG_COLOR/g \ + -e s/@BULLET_COLOR@/$BULLET_COLOR/g \ + -e s/@BLOCK_BORDER@/$BLOCK_BORDER/g \ + -e s/@BLOCK_BACKGROUND@/$BLOCK_BACKGROUND/g \ + -e s/@BLOCK_COLOR@/$BLOCK_COLOR/g \ + -e s/@CODE_BORDER@/$CODE_BORDER/g \ + -e s/@CODE_BACKGROUND@/$CODE_BACKGROUND/g \ + -e s/@CODE_COLOR@/$CODE_COLOR/g \ + -e s/@KW_COLOR@/$KW_COLOR/g \ + -e s/@COM_COLOR@/$COM_COLOR/g \ + -e s/@LIT_COLOR@/$LIT_COLOR/g \ + -e s/@BIND_COLOR@/$BIND_COLOR/g \ + -e s/@PUNCT_COLOR@/$PUNCT_COLOR/g \ + -e s/@TV_COLOR@/$TV_COLOR/g \ + -e s/@TY_COLOR@/$TY_COLOR/g \ + $1 diff --git a/smlnj-lib/Doc/src/scripts/prepare-fun.sh b/smlnj-lib/Doc/src/scripts/prepare-fun.sh new file mode 100755 index 0000000..2232a92 --- /dev/null +++ b/smlnj-lib/Doc/src/scripts/prepare-fun.sh @@ -0,0 +1,44 @@ +#!/bin/sh +# +# Script for adding a functor description file to an existing +# library documentation directory. +# +# usage: scripts/prepare-fun.sh +# + +function usage { + echo "usage: scripts/prepare-str.sh " + exit 1 +} + +if [ ! -x scripts/prepare-fun.sh ] ; then + usage +fi + +if [ $# -ne 2 ] ; then + usage +fi + +d=$1 ; shift +name=$1 ; shift + +if [ ! -d "$d" ] ; then + echo "$0: missing library directory $d" + exit 1 +fi + +lib=$(basename $d/*-lib.adoc .adoc) + +template=Templates/fun.adoc +stem="fun-$name" +mod_adoc="$d/$stem.adoc" + +if [ -f $mod_adoc ] ; then + echo "$0: file '$mod_adoc' already exists" + exit 1 +fi + +## create the placeholder for the module +sed -e "s/@DIR@/$d/" -e "s/@LIBRARY@/$lib/" -e "s/@NAME@/$name/" $template > $mod_adoc + +exit 0 diff --git a/smlnj-lib/Doc/src/scripts/prepare-lib.sh b/smlnj-lib/Doc/src/scripts/prepare-lib.sh new file mode 100755 index 0000000..7b66bab --- /dev/null +++ b/smlnj-lib/Doc/src/scripts/prepare-lib.sh @@ -0,0 +1,83 @@ +#!/bin/sh +# +# usage scripts/prepare-lib.sh +# + +function usage { + echo "usage: scripts/prepare-lib.sh " + exit 1 +} + +if [ ! -x scripts/prepare-lib.sh ] ; then + usage +fi + +if [ $# -ne 1 ] ; then + usage +fi + +d=$1 ; shift + +if [ ! -f $d/MODULES ] ; then + echo "missing $d/MODULES" + exit 1 +fi + +lib=$(basename ../../$d/*-lib.cm .cm) +lib_adoc=$d/$lib.adoc +## copy the header +sed -e "s/@DIR@/$d/" -e "s/@LIBRARY@/$lib/" Templates/lib-head.adoc > $lib_adoc +## add module entries +while read -r line ; do + name=$(echo $line | sed -e 's/structure //' -e 's/signature //' -e 's/functor //') + case $line in + signature*) + template=Templates/sig.adoc + kw="[.kw]#signature#" + stem="sig-$name" + ;; + structure*) + template=Templates/str.adoc + kw="[.kw]#structure#" + stem="str-$name" + ;; + functor*) + template=Templates/fun.adoc + kw="[.kw]#functor#" + stem="fun-$name" + ;; + esac + mod_adoc="$d/$stem.adoc" + link="xref:$stem.adoc" + ## add the module to the library file + echo $link"[\`"$kw $name"\`]::" >> $lib_adoc + echo " something" >> $lib_adoc + echo "" >> $lib_adoc + ## create the placeholder for the module + sed -e "s/@DIR@/$d/" -e "s/@LIBRARY@/$lib/" -e "s/@NAME@/$name/" $template > $mod_adoc +done < $d/MODULES +## copy the middle part +sed -e "s/@DIR@/$d/" -e "s/@LIBRARY@/$lib/" Templates/lib-mid.adoc >> $lib_adoc +## add the module entries for the PDF version +while read -r line ; do + name=$(echo $line | sed -e 's/structure //' -e 's/signature //' -e 's/functor //') + case $line in + signature*) + echo "include::sig-$name.adoc[]" >> $lib_adoc + echo "" >> $lib_adoc + ;; + structure*) + echo "include::str-$name.adoc[]" >> $lib_adoc + echo "" >> $lib_adoc + ;; + functor*) + echo "include::fun-$name.adoc[]" >> $lib_adoc + echo "" >> $lib_adoc + ;; + esac +done < $d/MODULES +## copy the footer +sed -e "s/@DIR@/$d/" -e "s/@LIBRARY@/$lib/" Templates/lib-foot.adoc >> $lib_adoc + + +exit 0 diff --git a/smlnj-lib/Doc/src/scripts/prepare-sig.sh b/smlnj-lib/Doc/src/scripts/prepare-sig.sh new file mode 100755 index 0000000..63d22d5 --- /dev/null +++ b/smlnj-lib/Doc/src/scripts/prepare-sig.sh @@ -0,0 +1,46 @@ +#!/bin/sh +# +# Script for adding a signature description file to an existing +# library documentation directory. +# +# usage: scripts/prepare-sig.sh +# + +function usage { + echo "usage: scripts/prepare-sig.sh " + exit 1 +} + +if [ ! -x scripts/prepare-sig.sh ] ; then + usage +fi + +if [ $# -ne 2 ] ; then + usage +fi + +d=$1 ; shift +name=$1 ; shift + +if [ ! -d "$d" ] ; then + echo "$0: missing library directory $d" + exit 1 +fi + +lib=$(basename $d/*-lib.adoc .adoc) + +template=Templates/sig.adoc +stem="sig-$name" +mod_adoc="$d/$stem.adoc" + +if [ -f $mod_adoc ] ; then + echo "$0: file '$mod_adoc' already exists" + exit 1 +fi + +echo "creating $mod_adoc" + +## create the placeholder for the module +sed -e "s/@DIR@/$d/" -e "s/@LIBRARY@/$lib/" -e "s/@NAME@/$name/" $template > $mod_adoc + +exit 0 diff --git a/smlnj-lib/Doc/src/scripts/prepare-str.sh b/smlnj-lib/Doc/src/scripts/prepare-str.sh new file mode 100755 index 0000000..8f6c9d1 --- /dev/null +++ b/smlnj-lib/Doc/src/scripts/prepare-str.sh @@ -0,0 +1,44 @@ +#!/bin/sh +# +# Script for adding a structure description file to an existing +# library documentation directory. +# +# usage: scripts/prepare-str.sh +# + +function usage { + echo "usage: scripts/prepare-str.sh " + exit 1 +} + +if [ ! -x scripts/prepare-str.sh ] ; then + usage +fi + +if [ $# -ne 2 ] ; then + usage +fi + +d=$1 ; shift +name=$1 ; shift + +if [ ! -d "$d" ] ; then + echo "$0: missing library directory $d" + exit 1 +fi + +lib=$(basename $d/*-lib.adoc .adoc) + +template=Templates/str.adoc +stem="str-$name" +mod_adoc="$d/$stem.adoc" + +if [ -f $mod_adoc ] ; then + echo "$0: file '$mod_adoc' already exists" + exit 1 +fi + +## create the placeholder for the module +sed -e "s/@DIR@/$d/" -e "s/@LIBRARY@/$lib/" -e "s/@NAME@/$name/" $template > $mod_adoc + +exit 0 diff --git a/smlnj-lib/Doc/src/styles/pygments-default.css b/smlnj-lib/Doc/src/styles/pygments-default.css new file mode 100644 index 0000000..16d4c78 --- /dev/null +++ b/smlnj-lib/Doc/src/styles/pygments-default.css @@ -0,0 +1,69 @@ +pre.pygments .hll { background-color: #ffffcc } +pre.pygments { background: #f8f8f8; } +pre.pygments .tok-c { color: #408080; font-style: italic } /* Comment */ +pre.pygments .tok-err { border: 1px solid #FF0000 } /* Error */ +pre.pygments .tok-k { color: #008000; font-weight: bold } /* Keyword */ +pre.pygments .tok-o { color: #666666 } /* Operator */ +pre.pygments .tok-ch { color: #408080; font-style: italic } /* Comment.Hashbang */ +pre.pygments .tok-cm { color: #408080; font-style: italic } /* Comment.Multiline */ +pre.pygments .tok-cp { color: #BC7A00 } /* Comment.Preproc */ +pre.pygments .tok-cpf { color: #408080; font-style: italic } /* Comment.PreprocFile */ +pre.pygments .tok-c1 { color: #408080; font-style: italic } /* Comment.Single */ +pre.pygments .tok-cs { color: #408080; font-style: italic } /* Comment.Special */ +pre.pygments .tok-gd { color: #A00000 } /* Generic.Deleted */ +pre.pygments .tok-ge { font-style: italic } /* Generic.Emph */ +pre.pygments .tok-gr { color: #FF0000 } /* Generic.Error */ +pre.pygments .tok-gh { color: #000080; font-weight: bold } /* Generic.Heading */ +pre.pygments .tok-gi { color: #00A000 } /* Generic.Inserted */ +pre.pygments .tok-go { color: #888888 } /* Generic.Output */ +pre.pygments .tok-gp { color: #000080; font-weight: bold } /* Generic.Prompt */ +pre.pygments .tok-gs { font-weight: bold } /* Generic.Strong */ +pre.pygments .tok-gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +pre.pygments .tok-gt { color: #0044DD } /* Generic.Traceback */ +pre.pygments .tok-kc { color: #008000; font-weight: bold } /* Keyword.Constant */ +pre.pygments .tok-kd { color: #008000; font-weight: bold } /* Keyword.Declaration */ +pre.pygments .tok-kn { color: #008000; font-weight: bold } /* Keyword.Namespace */ +pre.pygments .tok-kp { color: #008000 } /* Keyword.Pseudo */ +pre.pygments .tok-kr { color: #008000; font-weight: bold } /* Keyword.Reserved */ +pre.pygments .tok-kt { color: #B00040 } /* Keyword.Type */ +pre.pygments .tok-m { color: #666666 } /* Literal.Number */ +pre.pygments .tok-s { color: #BA2121 } /* Literal.String */ +pre.pygments .tok-na { color: #7D9029 } /* Name.Attribute */ +pre.pygments .tok-nb { color: #008000 } /* Name.Builtin */ +pre.pygments .tok-nc { color: #0000FF; font-weight: bold } /* Name.Class */ +pre.pygments .tok-no { color: #880000 } /* Name.Constant */ +pre.pygments .tok-nd { color: #AA22FF } /* Name.Decorator */ +pre.pygments .tok-ni { color: #999999; font-weight: bold } /* Name.Entity */ +pre.pygments .tok-ne { color: #D2413A; font-weight: bold } /* Name.Exception */ +pre.pygments .tok-nf { color: #0000FF } /* Name.Function */ +pre.pygments .tok-nl { color: #A0A000 } /* Name.Label */ +pre.pygments .tok-nn { color: #0000FF; font-weight: bold } /* Name.Namespace */ +pre.pygments .tok-nt { color: #008000; font-weight: bold } /* Name.Tag */ +pre.pygments .tok-nv { color: #19177C } /* Name.Variable */ +pre.pygments .tok-ow { color: #AA22FF; font-weight: bold } /* Operator.Word */ +pre.pygments .tok-w { color: #bbbbbb } /* Text.Whitespace */ +pre.pygments .tok-mb { color: #666666 } /* Literal.Number.Bin */ +pre.pygments .tok-mf { color: #666666 } /* Literal.Number.Float */ +pre.pygments .tok-mh { color: #666666 } /* Literal.Number.Hex */ +pre.pygments .tok-mi { color: #666666 } /* Literal.Number.Integer */ +pre.pygments .tok-mo { color: #666666 } /* Literal.Number.Oct */ +pre.pygments .tok-sa { color: #BA2121 } /* Literal.String.Affix */ +pre.pygments .tok-sb { color: #BA2121 } /* Literal.String.Backtick */ +pre.pygments .tok-sc { color: #BA2121 } /* Literal.String.Char */ +pre.pygments .tok-dl { color: #BA2121 } /* Literal.String.Delimiter */ +pre.pygments .tok-sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */ +pre.pygments .tok-s2 { color: #BA2121 } /* Literal.String.Double */ +pre.pygments .tok-se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */ +pre.pygments .tok-sh { color: #BA2121 } /* Literal.String.Heredoc */ +pre.pygments .tok-si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */ +pre.pygments .tok-sx { color: #008000 } /* Literal.String.Other */ +pre.pygments .tok-sr { color: #BB6688 } /* Literal.String.Regex */ +pre.pygments .tok-s1 { color: #BA2121 } /* Literal.String.Single */ +pre.pygments .tok-ss { color: #19177C } /* Literal.String.Symbol */ +pre.pygments .tok-bp { color: #008000 } /* Name.Builtin.Pseudo */ +pre.pygments .tok-fm { color: #0000FF } /* Name.Function.Magic */ +pre.pygments .tok-vc { color: #19177C } /* Name.Variable.Class */ +pre.pygments .tok-vg { color: #19177C } /* Name.Variable.Global */ +pre.pygments .tok-vi { color: #19177C } /* Name.Variable.Instance */ +pre.pygments .tok-vm { color: #19177C } /* Name.Variable.Magic */ +pre.pygments .tok-il { color: #666666 } /* Literal.Number.Integer.Long */ \ No newline at end of file diff --git a/smlnj-lib/Doc/src/styles/pygments-lovelace.css b/smlnj-lib/Doc/src/styles/pygments-lovelace.css new file mode 100644 index 0000000..9f54499 --- /dev/null +++ b/smlnj-lib/Doc/src/styles/pygments-lovelace.css @@ -0,0 +1,70 @@ +pre.pygments .hll { background-color: #ffffcc } +pre.pygments { background: #ffffff; } +pre.pygments .tok-c { color: #888888; font-style: italic } /* Comment */ +pre.pygments .tok-err { background-color: #a848a8 } /* Error */ +pre.pygments .tok-k { color: #2838b0 } /* Keyword */ +pre.pygments .tok-o { color: #666666 } /* Operator */ +pre.pygments .tok-p { color: #888888 } /* Punctuation */ +pre.pygments .tok-ch { color: #287088; font-style: italic } /* Comment.Hashbang */ +pre.pygments .tok-cm { color: #888888; font-style: italic } /* Comment.Multiline */ +pre.pygments .tok-cp { color: #289870 } /* Comment.Preproc */ +pre.pygments .tok-cpf { color: #888888; font-style: italic } /* Comment.PreprocFile */ +pre.pygments .tok-c1 { color: #888888; font-style: italic } /* Comment.Single */ +pre.pygments .tok-cs { color: #888888; font-style: italic } /* Comment.Special */ +pre.pygments .tok-gd { color: #c02828 } /* Generic.Deleted */ +pre.pygments .tok-ge { font-style: italic } /* Generic.Emph */ +pre.pygments .tok-gr { color: #c02828 } /* Generic.Error */ +pre.pygments .tok-gh { color: #666666 } /* Generic.Heading */ +pre.pygments .tok-gi { color: #388038 } /* Generic.Inserted */ +pre.pygments .tok-go { color: #666666 } /* Generic.Output */ +pre.pygments .tok-gp { color: #444444 } /* Generic.Prompt */ +pre.pygments .tok-gs { font-weight: bold } /* Generic.Strong */ +pre.pygments .tok-gu { color: #444444 } /* Generic.Subheading */ +pre.pygments .tok-gt { color: #2838b0 } /* Generic.Traceback */ +pre.pygments .tok-kc { color: #444444; font-style: italic } /* Keyword.Constant */ +pre.pygments .tok-kd { color: #2838b0; font-style: italic } /* Keyword.Declaration */ +pre.pygments .tok-kn { color: #2838b0 } /* Keyword.Namespace */ +pre.pygments .tok-kp { color: #2838b0 } /* Keyword.Pseudo */ +pre.pygments .tok-kr { color: #2838b0 } /* Keyword.Reserved */ +pre.pygments .tok-kt { color: #2838b0; font-style: italic } /* Keyword.Type */ +pre.pygments .tok-m { color: #444444 } /* Literal.Number */ +pre.pygments .tok-s { color: #b83838 } /* Literal.String */ +pre.pygments .tok-na { color: #388038 } /* Name.Attribute */ +pre.pygments .tok-nb { color: #388038 } /* Name.Builtin */ +pre.pygments .tok-nc { color: #287088 } /* Name.Class */ +pre.pygments .tok-no { color: #b85820 } /* Name.Constant */ +pre.pygments .tok-nd { color: #287088 } /* Name.Decorator */ +pre.pygments .tok-ni { color: #709030 } /* Name.Entity */ +pre.pygments .tok-ne { color: #908828 } /* Name.Exception */ +pre.pygments .tok-nf { color: #785840 } /* Name.Function */ +pre.pygments .tok-nl { color: #289870 } /* Name.Label */ +pre.pygments .tok-nn { color: #289870 } /* Name.Namespace */ +pre.pygments .tok-nt { color: #2838b0 } /* Name.Tag */ +pre.pygments .tok-nv { color: #b04040 } /* Name.Variable */ +pre.pygments .tok-ow { color: #a848a8 } /* Operator.Word */ +pre.pygments .tok-w { color: #a89028 } /* Text.Whitespace */ +pre.pygments .tok-mb { color: #444444 } /* Literal.Number.Bin */ +pre.pygments .tok-mf { color: #444444 } /* Literal.Number.Float */ +pre.pygments .tok-mh { color: #444444 } /* Literal.Number.Hex */ +pre.pygments .tok-mi { color: #444444 } /* Literal.Number.Integer */ +pre.pygments .tok-mo { color: #444444 } /* Literal.Number.Oct */ +pre.pygments .tok-sa { color: #444444 } /* Literal.String.Affix */ +pre.pygments .tok-sb { color: #b83838 } /* Literal.String.Backtick */ +pre.pygments .tok-sc { color: #a848a8 } /* Literal.String.Char */ +pre.pygments .tok-dl { color: #b85820 } /* Literal.String.Delimiter */ +pre.pygments .tok-sd { color: #b85820; font-style: italic } /* Literal.String.Doc */ +pre.pygments .tok-s2 { color: #b83838 } /* Literal.String.Double */ +pre.pygments .tok-se { color: #709030 } /* Literal.String.Escape */ +pre.pygments .tok-sh { color: #b83838 } /* Literal.String.Heredoc */ +pre.pygments .tok-si { color: #b83838; text-decoration: underline } /* Literal.String.Interpol */ +pre.pygments .tok-sx { color: #a848a8 } /* Literal.String.Other */ +pre.pygments .tok-sr { color: #a848a8 } /* Literal.String.Regex */ +pre.pygments .tok-s1 { color: #b83838 } /* Literal.String.Single */ +pre.pygments .tok-ss { color: #b83838 } /* Literal.String.Symbol */ +pre.pygments .tok-bp { color: #388038; font-style: italic } /* Name.Builtin.Pseudo */ +pre.pygments .tok-fm { color: #b85820 } /* Name.Function.Magic */ +pre.pygments .tok-vc { color: #b04040 } /* Name.Variable.Class */ +pre.pygments .tok-vg { color: #908828 } /* Name.Variable.Global */ +pre.pygments .tok-vi { color: #b04040 } /* Name.Variable.Instance */ +pre.pygments .tok-vm { color: #b85820 } /* Name.Variable.Magic */ +pre.pygments .tok-il { color: #444444 } /* Literal.Number.Integer.Long */ \ No newline at end of file diff --git a/smlnj-lib/Doc/src/styles/pygments-manni.css b/smlnj-lib/Doc/src/styles/pygments-manni.css new file mode 100644 index 0000000..1145a4a --- /dev/null +++ b/smlnj-lib/Doc/src/styles/pygments-manni.css @@ -0,0 +1,69 @@ +pre.pygments .hll { background-color: #ffffcc } +pre.pygments { background: #f0f3f3; } +pre.pygments .tok-c { color: #0099FF; font-style: italic } /* Comment */ +pre.pygments .tok-err { color: #AA0000; background-color: #FFAAAA } /* Error */ +pre.pygments .tok-k { color: #006699; font-weight: bold } /* Keyword */ +pre.pygments .tok-o { color: #555555 } /* Operator */ +pre.pygments .tok-ch { color: #0099FF; font-style: italic } /* Comment.Hashbang */ +pre.pygments .tok-cm { color: #0099FF; font-style: italic } /* Comment.Multiline */ +pre.pygments .tok-cp { color: #009999 } /* Comment.Preproc */ +pre.pygments .tok-cpf { color: #0099FF; font-style: italic } /* Comment.PreprocFile */ +pre.pygments .tok-c1 { color: #0099FF; font-style: italic } /* Comment.Single */ +pre.pygments .tok-cs { color: #0099FF; font-weight: bold; font-style: italic } /* Comment.Special */ +pre.pygments .tok-gd { background-color: #FFCCCC; border: 1px solid #CC0000 } /* Generic.Deleted */ +pre.pygments .tok-ge { font-style: italic } /* Generic.Emph */ +pre.pygments .tok-gr { color: #FF0000 } /* Generic.Error */ +pre.pygments .tok-gh { color: #003300; font-weight: bold } /* Generic.Heading */ +pre.pygments .tok-gi { background-color: #CCFFCC; border: 1px solid #00CC00 } /* Generic.Inserted */ +pre.pygments .tok-go { color: #AAAAAA } /* Generic.Output */ +pre.pygments .tok-gp { color: #000099; font-weight: bold } /* Generic.Prompt */ +pre.pygments .tok-gs { font-weight: bold } /* Generic.Strong */ +pre.pygments .tok-gu { color: #003300; font-weight: bold } /* Generic.Subheading */ +pre.pygments .tok-gt { color: #99CC66 } /* Generic.Traceback */ +pre.pygments .tok-kc { color: #006699; font-weight: bold } /* Keyword.Constant */ +pre.pygments .tok-kd { color: #006699; font-weight: bold } /* Keyword.Declaration */ +pre.pygments .tok-kn { color: #006699; font-weight: bold } /* Keyword.Namespace */ +pre.pygments .tok-kp { color: #006699 } /* Keyword.Pseudo */ +pre.pygments .tok-kr { color: #006699; font-weight: bold } /* Keyword.Reserved */ +pre.pygments .tok-kt { color: #007788; font-weight: bold } /* Keyword.Type */ +pre.pygments .tok-m { color: #FF6600 } /* Literal.Number */ +pre.pygments .tok-s { color: #CC3300 } /* Literal.String */ +pre.pygments .tok-na { color: #330099 } /* Name.Attribute */ +pre.pygments .tok-nb { color: #336666 } /* Name.Builtin */ +pre.pygments .tok-nc { color: #00AA88; font-weight: bold } /* Name.Class */ +pre.pygments .tok-no { color: #336600 } /* Name.Constant */ +pre.pygments .tok-nd { color: #9999FF } /* Name.Decorator */ +pre.pygments .tok-ni { color: #999999; font-weight: bold } /* Name.Entity */ +pre.pygments .tok-ne { color: #CC0000; font-weight: bold } /* Name.Exception */ +pre.pygments .tok-nf { color: #CC00FF } /* Name.Function */ +pre.pygments .tok-nl { color: #9999FF } /* Name.Label */ +pre.pygments .tok-nn { color: #00CCFF; font-weight: bold } /* Name.Namespace */ +pre.pygments .tok-nt { color: #330099; font-weight: bold } /* Name.Tag */ +pre.pygments .tok-nv { color: #003333 } /* Name.Variable */ +pre.pygments .tok-ow { color: #000000; font-weight: bold } /* Operator.Word */ +pre.pygments .tok-w { color: #bbbbbb } /* Text.Whitespace */ +pre.pygments .tok-mb { color: #FF6600 } /* Literal.Number.Bin */ +pre.pygments .tok-mf { color: #FF6600 } /* Literal.Number.Float */ +pre.pygments .tok-mh { color: #FF6600 } /* Literal.Number.Hex */ +pre.pygments .tok-mi { color: #FF6600 } /* Literal.Number.Integer */ +pre.pygments .tok-mo { color: #FF6600 } /* Literal.Number.Oct */ +pre.pygments .tok-sa { color: #CC3300 } /* Literal.String.Affix */ +pre.pygments .tok-sb { color: #CC3300 } /* Literal.String.Backtick */ +pre.pygments .tok-sc { color: #CC3300 } /* Literal.String.Char */ +pre.pygments .tok-dl { color: #CC3300 } /* Literal.String.Delimiter */ +pre.pygments .tok-sd { color: #CC3300; font-style: italic } /* Literal.String.Doc */ +pre.pygments .tok-s2 { color: #CC3300 } /* Literal.String.Double */ +pre.pygments .tok-se { color: #CC3300; font-weight: bold } /* Literal.String.Escape */ +pre.pygments .tok-sh { color: #CC3300 } /* Literal.String.Heredoc */ +pre.pygments .tok-si { color: #AA0000 } /* Literal.String.Interpol */ +pre.pygments .tok-sx { color: #CC3300 } /* Literal.String.Other */ +pre.pygments .tok-sr { color: #33AAAA } /* Literal.String.Regex */ +pre.pygments .tok-s1 { color: #CC3300 } /* Literal.String.Single */ +pre.pygments .tok-ss { color: #FFCC33 } /* Literal.String.Symbol */ +pre.pygments .tok-bp { color: #336666 } /* Name.Builtin.Pseudo */ +pre.pygments .tok-fm { color: #CC00FF } /* Name.Function.Magic */ +pre.pygments .tok-vc { color: #003333 } /* Name.Variable.Class */ +pre.pygments .tok-vg { color: #003333 } /* Name.Variable.Global */ +pre.pygments .tok-vi { color: #003333 } /* Name.Variable.Instance */ +pre.pygments .tok-vm { color: #003333 } /* Name.Variable.Magic */ +pre.pygments .tok-il { color: #FF6600 } /* Literal.Number.Integer.Long */ \ No newline at end of file diff --git a/smlnj-lib/Doc/src/styles/smlnj-lib-base_css.in b/smlnj-lib/Doc/src/styles/smlnj-lib-base_css.in new file mode 100644 index 0000000..555cc0c --- /dev/null +++ b/smlnj-lib/Doc/src/styles/smlnj-lib-base_css.in @@ -0,0 +1,456 @@ +/* Modified version of default asciidoctor.css */ + +/* Asciidoctor default stylesheet | MIT License | https://asciidoctor.org */ +/* Uncomment @import statement when using as custom stylesheet */ +/*@import "https://fonts.googleapis.com/css?family=Open+Sans:300,300italic,400,400italic,600,600italic%7CNoto+Serif:400,400italic,700,700italic%7CDroid+Sans+Mono:400,700";*/ +article,aside,details,figcaption,figure,footer,header,hgroup,main,nav,section{display:block} +audio,canvas,video{display:inline-block} +audio:not([controls]){display:none;height:0} +script{display:none!important} +html{font-family:sans-serif;-ms-text-size-adjust:100%;-webkit-text-size-adjust:100%} +a{background:none} +a:focus{outline:thin dotted} +a:active,a:hover{outline:0} +h1{font-size:2em;margin:.67em 0} +abbr[title]{border-bottom:1px dotted} +b,strong{font-weight:bold} +dfn{font-style:italic} +hr{-moz-box-sizing:content-box;box-sizing:content-box;height:0} +mark{background:#ff0;color:#000} +code,kbd,pre,samp{font-family:monospace;font-size:1em} +pre{white-space:pre-wrap} +q{quotes:"\201C" "\201D" "\2018" "\2019"} +small{font-size:80%} +sub,sup{font-size:75%;line-height:0;position:relative;vertical-align:baseline} +sup{top:-.5em} +sub{bottom:-.25em} +img{border:0} +svg:not(:root){overflow:hidden} +figure{margin:0} +fieldset{border:1px solid silver;margin:0 2px;padding:.35em .625em .75em} +legend{border:0;padding:0} +button,input,select,textarea{font-family:inherit;font-size:100%;margin:0} +button,input{line-height:normal} +button,select{text-transform:none} +button,html input[type="button"],input[type="reset"],input[type="submit"]{-webkit-appearance:button;cursor:pointer} +button[disabled],html input[disabled]{cursor:default} +input[type="checkbox"],input[type="radio"]{box-sizing:border-box;padding:0} +button::-moz-focus-inner,input::-moz-focus-inner{border:0;padding:0} +textarea{overflow:auto;vertical-align:top} +table{border-collapse:collapse;border-spacing:0} +*,*::before,*::after{-moz-box-sizing:border-box;-webkit-box-sizing:border-box;box-sizing:border-box} +html,body{font-size:100%} +body{background:#fff;color:rgba(0,0,0,.8);padding:0;margin:0;font-family:"Noto Serif","DejaVu Serif",serif;font-weight:400;font-style:normal;line-height:1;position:relative;cursor:auto;tab-size:4;-moz-osx-font-smoothing:grayscale;-webkit-font-smoothing:antialiased} +a:hover{cursor:pointer} +img,object,embed{max-width:100%;height:auto} +object,embed{height:100%} +img{-ms-interpolation-mode:bicubic} +.left{float:left!important} +.right{float:right!important} +.text-left{text-align:left!important} +.text-right{text-align:right!important} +.text-center{text-align:center!important} +.text-justify{text-align:justify!important} +.hide{display:none} +img,object,svg{display:inline-block;vertical-align:middle} +textarea{height:auto;min-height:50px} +select{width:100%} +.center{margin-left:auto;margin-right:auto} +.stretch{width:100%} +.subheader,.admonitionblock td.content>.title,.audioblock>.title,.exampleblock>.title,.imageblock>.title,.listingblock>.title,.literalblock>.title,.stemblock>.title,.openblock>.title,.paragraph>.title,.quoteblock>.title,table.tableblock>.title,.verseblock>.title,.videoblock>.title,.dlist>.title,.olist>.title,.ulist>.title,.qlist>.title,.hdlist>.title{line-height:1.45;color:#7a2518;font-weight:400;margin-top:0;margin-bottom:.25em} +div,dl,dt,dd,ul,ol,li,h1,h2,h3,#toctitle,.sidebarblock>.content>.title,h4,h5,h6,pre,form,p,blockquote,th,td{margin:0;padding:0;direction:ltr} +a{color:#2156a5;text-decoration:underline;line-height:inherit} +a:hover,a:focus{color:#1d4b8f} +a img{border:0} +p{font-family:inherit;font-weight:400;font-size:1em;line-height:1.6;margin-bottom:1.25em;text-rendering:optimizeLegibility} +p aside{font-size:.875em;line-height:1.35;font-style:italic} +h1,h2,h3,#toctitle,.sidebarblock>.content>.title,h4,h5,h6{font-family:"Open Sans","DejaVu Sans",sans-serif;font-weight:300;font-style:normal;color:#ba3925;text-rendering:optimizeLegibility;margin-top:1em;margin-bottom:.5em;line-height:1.0125em} +h1 small,h2 small,h3 small,#toctitle small,.sidebarblock>.content>.title small,h4 small,h5 small,h6 small{font-size:60%;color:#e99b8f;line-height:0} +h1{font-size:2.125em} +h2{font-size:1.6875em} +h3,#toctitle,.sidebarblock>.content>.title{font-size:1.375em} +h4,h5{font-size:1.125em} +h6{font-size:1em} +hr{border:solid #dddddf;border-width:1px 0 0;clear:both;margin:1.25em 0 1.1875em;height:0} +em,i{font-style:italic;line-height:inherit} +strong,b{font-weight:bold;line-height:inherit} +small{font-size:60%;line-height:inherit} +code{font-family:"Droid Sans Mono","DejaVu Sans Mono",monospace;font-weight:400;color:rgba(0,0,0,.9)} +ul,ol,dl{font-size:1em;line-height:1.6;margin-bottom:1.25em;list-style-position:outside;font-family:inherit} +ul,ol{margin-left:1.5em} +ul li ul,ul li ol{margin-left:1.25em;margin-bottom:0;font-size:1em} +ul.square li ul,ul.circle li ul,ul.disc li ul{list-style:inherit} +ul.square{list-style-type:square} +ul.circle{list-style-type:circle} +ul.disc{list-style-type:disc} +ol li ul,ol li ol{margin-left:1.25em;margin-bottom:0} +dl dt{margin-bottom:.3125em;font-weight:bold} +dl dd{margin-bottom:1.25em} +abbr,acronym{text-transform:uppercase;font-size:90%;color:rgba(0,0,0,.8);border-bottom:1px dotted #ddd;cursor:help} +abbr{text-transform:none} +blockquote{margin:0 0 1.25em;padding:.5625em 1.25em 0 1.1875em;border-left:1px solid #ddd} +blockquote cite{display:block;font-size:.9375em;color:rgba(0,0,0,.6)} +blockquote cite::before{content:"\2014 \0020"} +blockquote cite a,blockquote cite a:visited{color:rgba(0,0,0,.6)} +blockquote,blockquote p{line-height:1.6;color:rgba(0,0,0,.85)} +@media screen and (min-width:768px){h1,h2,h3,#toctitle,.sidebarblock>.content>.title,h4,h5,h6{line-height:1.2} +h1{font-size:2.75em} +h2{font-size:2.3125em} +h3,#toctitle,.sidebarblock>.content>.title{font-size:1.6875em} +h4{font-size:1.4375em}} +table{background:#fff;margin-bottom:1.25em;border:solid 1px #dedede} +table thead,table tfoot{background:#f7f8f7} +table thead tr th,table thead tr td,table tfoot tr th,table tfoot tr td{padding:.5em .625em .625em;font-size:inherit;color:rgba(0,0,0,.8);text-align:left} +table tr th,table tr td{padding:.5625em .625em;font-size:inherit;color:rgba(0,0,0,.8)} +table tr.even,table tr.alt{background:#f8f8f7} +table thead tr th,table tfoot tr th,table tbody tr td,table tr td,table tfoot tr td{display:table-cell;line-height:1.6} +h1,h2,h3,#toctitle,.sidebarblock>.content>.title,h4,h5,h6{line-height:1.2;word-spacing:-.05em} +h1 strong,h2 strong,h3 strong,#toctitle strong,.sidebarblock>.content>.title strong,h4 strong,h5 strong,h6 strong{font-weight:400} +.clearfix::before,.clearfix::after,.float-group::before,.float-group::after{content:" ";display:table} +.clearfix::after,.float-group::after{clear:both} +:not(pre):not([class^=L])>code{font-size:.9375em;font-style:normal!important;letter-spacing:0;padding:.1em .5ex;word-spacing:-.15em;background:#f7f7f8;-webkit-border-radius:4px;border-radius:4px;line-height:1.45;text-rendering:optimizeSpeed;word-wrap:break-word} +:not(pre)>code.nobreak{word-wrap:normal} +:not(pre)>code.nowrap{white-space:nowrap} +pre{color:rgba(0,0,0,.9);font-family:"Droid Sans Mono","DejaVu Sans Mono",monospace;line-height:1.45;text-rendering:optimizeSpeed} +pre code,pre pre{color:inherit;font-size:inherit;line-height:inherit} +pre>code{display:block} +pre.nowrap,pre.nowrap pre{white-space:pre;word-wrap:normal} +em em{font-style:normal} +strong strong{font-weight:400} +.keyseq{color:rgba(51,51,51,.8)} +kbd{font-family:"Droid Sans Mono","DejaVu Sans Mono",monospace;display:inline-block;color:rgba(0,0,0,.8);font-size:.65em;line-height:1.45;background:#f7f7f7;border:1px solid #ccc;-webkit-border-radius:3px;border-radius:3px;-webkit-box-shadow:0 1px 0 rgba(0,0,0,.2),0 0 0 .1em white inset;box-shadow:0 1px 0 rgba(0,0,0,.2),0 0 0 .1em #fff inset;margin:0 .15em;padding:.2em .5em;vertical-align:middle;position:relative;top:-.1em;white-space:nowrap} +.keyseq kbd:first-child{margin-left:0} +.keyseq kbd:last-child{margin-right:0} +.menuseq,.menuref{color:#000} +.menuseq b:not(.caret),.menuref{font-weight:inherit} +.menuseq{word-spacing:-.02em} +.menuseq b.caret{font-size:1.25em;line-height:.8} +.menuseq i.caret{font-weight:bold;text-align:center;width:.45em} +b.button::before,b.button::after{position:relative;top:-1px;font-weight:400} +b.button::before{content:"[";padding:0 3px 0 2px} +b.button::after{content:"]";padding:0 2px 0 3px} +p a>code:hover{color:rgba(0,0,0,.9)} +#header,#content,#footnotes,#footer{width:100%;margin-left:auto;margin-right:auto;margin-top:0;margin-bottom:0;max-width:62.5em;*zoom:1;position:relative;padding-left:.9375em;padding-right:.9375em} +#header::before,#header::after,#content::before,#content::after,#footnotes::before,#footnotes::after,#footer::before,#footer::after{content:" ";display:table} +#header::after,#content::after,#footnotes::after,#footer::after{clear:both} +#content{margin-top:1.25em} +#content::before{content:none} +#header>h1:first-child{color:rgba(0,0,0,.85);margin-top:2.25rem;margin-bottom:0} +#header>h1:first-child+#toc{margin-top:8px;border-top:1px solid #dddddf} +#header>h1:only-child,body.toc2 #header>h1:nth-last-child(2){border-bottom:1px solid #dddddf;padding-bottom:8px} +#header .details{border-bottom:1px solid #dddddf;line-height:1.45;padding-top:.25em;padding-bottom:.25em;padding-left:.25em;color:rgba(0,0,0,.6);display:-ms-flexbox;display:-webkit-flex;display:flex;-ms-flex-flow:row wrap;-webkit-flex-flow:row wrap;flex-flow:row wrap} +#header .details span:first-child{margin-left:-.125em} +#header .details span.email a{color:rgba(0,0,0,.85)} +#header .details br{display:none} +#header .details br+span::before{content:"\00a0\2013\00a0"} +#header .details br+span.author::before{content:"\00a0\22c5\00a0";color:rgba(0,0,0,.85)} +#header .details br+span#revremark::before{content:"\00a0|\00a0"} +#header #revnumber{text-transform:capitalize} +#header #revnumber::after{content:"\00a0"} +#content>h1:first-child:not([class]){color:rgba(0,0,0,.85);border-bottom:1px solid #dddddf;padding-bottom:8px;margin-top:0;padding-top:1rem;margin-bottom:1.25rem} +#toc{border-bottom:1px solid #e7e7e9;padding-bottom:.5em} +#toc>ul{margin-left:.125em} +#toc ul.sectlevel0>li>a{font-style:italic} +#toc ul.sectlevel0 ul.sectlevel1{margin:.5em 0} +#toc ul{font-family:"Open Sans","DejaVu Sans",sans-serif;list-style-type:none} +#toc li{line-height:1.3334;margin-top:.3334em} +#toc a{text-decoration:none} +#toc a:active{text-decoration:underline} +#toctitle{color:#7a2518;font-size:1.2em} +@media screen and (min-width:768px){#toctitle{font-size:1.375em} +body.toc2{padding-left:15em;padding-right:0} +#toc.toc2{ + margin-top:0!important; + background:#f8f8f7; + position:fixed; + width:15em; + left:0; top:0; + border-right:1px solid #e7e7e9; + border-top-width:0!important; + border-bottom-width:0!important; + z-index:1000; + padding:1.25em 1em; + height:100%; + overflow:auto +} +#toc.toc2 #toctitle{margin-top:0;margin-bottom:.8rem;font-size:1.2em} +#toc.toc2>ul{font-size:.9em;margin-bottom:0} +#toc.toc2 ul ul{margin-left:0;padding-left:1em} +#toc.toc2 ul.sectlevel0 ul.sectlevel1{padding-left:0;margin-top:.5em;margin-bottom:.5em} +body.toc2.toc-right{padding-left:0;padding-right:15em} +body.toc2.toc-right #toc.toc2{border-right-width:0;border-left:1px solid #e7e7e9;left:auto;right:0}} +@media screen and (min-width:1280px){body.toc2{padding-left:20em;padding-right:0} +#toc.toc2{width:20em} +#toc.toc2 #toctitle{font-size:1.375em} +#toc.toc2>ul{font-size:.95em} +#toc.toc2 ul ul{padding-left:1.25em} +body.toc2.toc-right{padding-left:0;padding-right:20em}} +#content #toc{ + border-style:solid; + border-width:1px; + border-color:#e0e0dc; + margin-bottom:1.25em; + padding:1.25em; + background:#f8f8f7; + -webkit-border-radius:4px; + border-radius:4px +} +#content #toc>:first-child{margin-top:0} +#content #toc>:last-child{margin-bottom:0} +#footer{max-width:100%;background:rgba(0,0,0,.8);padding:1.25em} +#footer-text{color:rgba(255,255,255,.8);line-height:1.44} +#content{margin-bottom:.625em} +.sect1{padding-bottom:.625em} +@media screen and (min-width:768px){#content{margin-bottom:1.25em} +.sect1{padding-bottom:1.25em}} +.sect1:last-child{padding-bottom:0} +.sect1+.sect1{border-top:1px solid #e7e7e9} +#content h1>a.anchor,h2>a.anchor,h3>a.anchor,#toctitle>a.anchor,.sidebarblock>.content>.title>a.anchor,h4>a.anchor,h5>a.anchor,h6>a.anchor{position:absolute;z-index:1001;width:1.5ex;margin-left:-1.5ex;display:block;text-decoration:none!important;visibility:hidden;text-align:center;font-weight:400} +#content h1>a.anchor::before,h2>a.anchor::before,h3>a.anchor::before,#toctitle>a.anchor::before,.sidebarblock>.content>.title>a.anchor::before,h4>a.anchor::before,h5>a.anchor::before,h6>a.anchor::before{content:"\00A7";font-size:.85em;display:block;padding-top:.1em} +#content h1:hover>a.anchor,#content h1>a.anchor:hover,h2:hover>a.anchor,h2>a.anchor:hover,h3:hover>a.anchor,#toctitle:hover>a.anchor,.sidebarblock>.content>.title:hover>a.anchor,h3>a.anchor:hover,#toctitle>a.anchor:hover,.sidebarblock>.content>.title>a.anchor:hover,h4:hover>a.anchor,h4>a.anchor:hover,h5:hover>a.anchor,h5>a.anchor:hover,h6:hover>a.anchor,h6>a.anchor:hover{visibility:visible} +#content h1>a.link,h2>a.link,h3>a.link,#toctitle>a.link,.sidebarblock>.content>.title>a.link,h4>a.link,h5>a.link,h6>a.link{color:#ba3925;text-decoration:none} +#content h1>a.link:hover,h2>a.link:hover,h3>a.link:hover,#toctitle>a.link:hover,.sidebarblock>.content>.title>a.link:hover,h4>a.link:hover,h5>a.link:hover,h6>a.link:hover{color:#a53221} +details,.audioblock,.imageblock,.literalblock,.listingblock,.stemblock,.videoblock{margin-bottom:1.25em} +details>summary:first-of-type{cursor:pointer;display:list-item;outline:none;margin-bottom:.75em} +.admonitionblock td.content>.title,.audioblock>.title,.exampleblock>.title,.imageblock>.title,.listingblock>.title,.literalblock>.title,.stemblock>.title,.openblock>.title,.paragraph>.title,.quoteblock>.title,table.tableblock>.title,.verseblock>.title,.videoblock>.title,.dlist>.title,.olist>.title,.ulist>.title,.qlist>.title,.hdlist>.title{text-rendering:optimizeLegibility;text-align:left;font-family:"Noto Serif","DejaVu Serif",serif;font-size:1rem;font-style:italic} +table.tableblock.fit-content>caption.title{white-space:nowrap;width:0} +.paragraph.lead>p,#preamble>.sectionbody>[class="paragraph"]:first-of-type p{font-size:1.21875em;line-height:1.6;color:rgba(0,0,0,.85)} +table.tableblock #preamble>.sectionbody>[class="paragraph"]:first-of-type p{font-size:inherit} +.admonitionblock>table{border-collapse:separate;border:0;background:none;width:100%} +.admonitionblock>table td.icon{text-align:center;width:80px} +.admonitionblock>table td.icon img{max-width:none} +.admonitionblock>table td.icon .title{font-weight:bold;font-family:"Open Sans","DejaVu Sans",sans-serif;text-transform:uppercase} +.admonitionblock>table td.content{padding-left:1.125em;padding-right:1.25em;border-left:1px solid #dddddf;color:rgba(0,0,0,.6)} +.admonitionblock>table td.content>:last-child>:last-child{margin-bottom:0} +.exampleblock>.content{border-style:solid;border-width:1px;border-color:#e6e6e6;margin-bottom:1.25em;padding:1.25em;background:#fff;-webkit-border-radius:4px;border-radius:4px} +.exampleblock>.content>:first-child{margin-top:0} +.exampleblock>.content>:last-child{margin-bottom:0} +.sidebarblock{border-style:solid;border-width:1px;border-color:#dbdbd6;margin-bottom:1.25em;padding:1.25em;background:#f3f3f2;-webkit-border-radius:4px;border-radius:4px} +.sidebarblock>:first-child{margin-top:0} +.sidebarblock>:last-child{margin-bottom:0} +.sidebarblock>.content>.title{color:#7a2518;margin-top:0;text-align:center} +.exampleblock>.content>:last-child>:last-child,.exampleblock>.content .olist>ol>li:last-child>:last-child,.exampleblock>.content .ulist>ul>li:last-child>:last-child,.exampleblock>.content .qlist>ol>li:last-child>:last-child,.sidebarblock>.content>:last-child>:last-child,.sidebarblock>.content .olist>ol>li:last-child>:last-child,.sidebarblock>.content .ulist>ul>li:last-child>:last-child,.sidebarblock>.content .qlist>ol>li:last-child>:last-child{margin-bottom:0} +.literalblock pre,.listingblock>.content>pre{-webkit-border-radius:4px;border-radius:4px;word-wrap:break-word;overflow-x:auto;padding:1em;font-size:.8125em} +@media screen and (min-width:768px){.literalblock pre,.listingblock>.content>pre{font-size:.90625em}} +@media screen and (min-width:1280px){.literalblock pre,.listingblock>.content>pre{font-size:1em}} +.literalblock.output pre{color:#f7f7f8;background:rgba(0,0,0,.9)} +.listingblock>.content>pre:not(.highlight),.listingblock>.content>pre[class="highlight"],.listingblock>.content>pre[class^="highlight "]{background:#f7f7f8} +.listingblock>.content{position:relative} +.listingblock code[data-lang]::before{display:none;content:attr(data-lang);position:absolute;font-size:.75em;top:.425rem;right:.5rem;line-height:1;text-transform:uppercase;color:inherit;opacity:.5} +.listingblock:hover code[data-lang]::before{display:block} +.listingblock.terminal pre .command::before{content:attr(data-prompt);padding-right:.5em;color:inherit;opacity:.5} +.listingblock.terminal pre .command:not([data-prompt])::before{content:"$"} +.listingblock pre.highlightjs{padding:0} +.listingblock pre.highlightjs>code{padding:1em;-webkit-border-radius:4px;border-radius:4px} +.listingblock pre.prettyprint{border-width:0} +.prettyprint{background:#f7f7f8} +pre.prettyprint .linenums{line-height:1.45;margin-left:2em} +pre.prettyprint li{background:none;list-style-type:inherit;padding-left:0} +pre.prettyprint li code[data-lang]::before{opacity:1} +pre.prettyprint li:not(:first-child) code[data-lang]::before{display:none} +table.linenotable{border-collapse:separate;border:0;margin-bottom:0;background:none} +table.linenotable td[class]{color:inherit;vertical-align:top;padding:0;line-height:inherit;white-space:normal} +table.linenotable td.code{padding-left:.75em} +table.linenotable td.linenos{border-right:1px solid currentColor;opacity:.35;padding-right:.5em} +pre.pygments .lineno{border-right:1px solid currentColor;opacity:.35;display:inline-block;margin-right:.75em} +pre.pygments .lineno::before{content:"";margin-right:-.125em} +.quoteblock{margin:0 1em 1.25em 1.5em;display:table} +.quoteblock>.title{margin-left:-1.5em;margin-bottom:.75em} +.quoteblock blockquote,.quoteblock p{color:rgba(0,0,0,.85);font-size:1.15rem;line-height:1.75;word-spacing:.1em;letter-spacing:0;font-style:italic;text-align:justify} +.quoteblock blockquote{margin:0;padding:0;border:0} +.quoteblock blockquote::before{content:"\201c";float:left;font-size:2.75em;font-weight:bold;line-height:.6em;margin-left:-.6em;color:#7a2518;text-shadow:0 1px 2px rgba(0,0,0,.1)} +.quoteblock blockquote>.paragraph:last-child p{margin-bottom:0} +.quoteblock .attribution{margin-top:.75em;margin-right:.5ex;text-align:right} +.verseblock{margin:0 1em 1.25em} +.verseblock pre{font-family:"Open Sans","DejaVu Sans",sans;font-size:1.15rem;color:rgba(0,0,0,.85);font-weight:300;text-rendering:optimizeLegibility} +.verseblock pre strong{font-weight:400} +.verseblock .attribution{margin-top:1.25rem;margin-left:.5ex} +.quoteblock .attribution,.verseblock .attribution{font-size:.9375em;line-height:1.45;font-style:italic} +.quoteblock .attribution br,.verseblock .attribution br{display:none} +.quoteblock .attribution cite,.verseblock .attribution cite{display:block;letter-spacing:-.025em;color:rgba(0,0,0,.6)} +.quoteblock.abstract blockquote::before,.quoteblock.excerpt blockquote::before,.quoteblock .quoteblock blockquote::before{display:none} +.quoteblock.abstract blockquote,.quoteblock.abstract p,.quoteblock.excerpt blockquote,.quoteblock.excerpt p,.quoteblock .quoteblock blockquote,.quoteblock .quoteblock p{line-height:1.6;word-spacing:0} +.quoteblock.abstract{margin:0 1em 1.25em;display:block} +.quoteblock.abstract>.title{margin:0 0 .375em;font-size:1.15em;text-align:center} +.quoteblock.excerpt,.quoteblock .quoteblock{margin:0 0 1.25em;padding:0 0 .25em 1em;border-left:.25em solid #dddddf} +.quoteblock.excerpt blockquote,.quoteblock.excerpt p,.quoteblock .quoteblock blockquote,.quoteblock .quoteblock p{color:inherit;font-size:1.0625rem} +.quoteblock.excerpt .attribution,.quoteblock .quoteblock .attribution{color:inherit;text-align:left;margin-right:0} +table.tableblock{max-width:100%;border-collapse:separate} +p.tableblock:last-child{margin-bottom:0} +td.tableblock>.content{margin-bottom:-1.25em} +table.tableblock,th.tableblock,td.tableblock{border:0 solid #dedede} +table.grid-all>thead>tr>.tableblock,table.grid-all>tbody>tr>.tableblock{border-width:0 1px 1px 0} +table.grid-all>tfoot>tr>.tableblock{border-width:1px 1px 0 0} +table.grid-cols>*>tr>.tableblock{border-width:0 1px 0 0} +table.grid-rows>thead>tr>.tableblock,table.grid-rows>tbody>tr>.tableblock{border-width:0 0 1px} +table.grid-rows>tfoot>tr>.tableblock{border-width:1px 0 0} +table.grid-all>*>tr>.tableblock:last-child,table.grid-cols>*>tr>.tableblock:last-child{border-right-width:0} +table.grid-all>tbody>tr:last-child>.tableblock,table.grid-all>thead:last-child>tr>.tableblock,table.grid-rows>tbody>tr:last-child>.tableblock,table.grid-rows>thead:last-child>tr>.tableblock{border-bottom-width:0} +table.frame-all{border-width:1px} +table.frame-sides{border-width:0 1px} +table.frame-topbot,table.frame-ends{border-width:1px 0} +table.stripes-all tr,table.stripes-odd tr:nth-of-type(odd),table.stripes-even tr:nth-of-type(even),table.stripes-hover tr:hover{background:#f8f8f7} +th.halign-left,td.halign-left{text-align:left} +th.halign-right,td.halign-right{text-align:right} +th.halign-center,td.halign-center{text-align:center} +th.valign-top,td.valign-top{vertical-align:top} +th.valign-bottom,td.valign-bottom{vertical-align:bottom} +th.valign-middle,td.valign-middle{vertical-align:middle} +table thead th,table tfoot th{font-weight:bold} +tbody tr th{display:table-cell;line-height:1.6;background:#f7f8f7} +tbody tr th,tbody tr th p,tfoot tr th,tfoot tr th p{color:rgba(0,0,0,.8);font-weight:bold} +p.tableblock>code:only-child{background:none;padding:0} +p.tableblock{font-size:1em} +ol{margin-left:1.75em} +ul li ol{margin-left:1.5em} +dl dd{margin-left:1.125em} +dl dd:last-child,dl dd:last-child>:last-child{margin-bottom:0} +ol>li p,ul>li p,ul dd,ol dd,.olist .olist,.ulist .ulist,.ulist .olist,.olist .ulist{margin-bottom:.625em} +ul.checklist,ul.none,ol.none,ul.no-bullet,ol.no-bullet,ol.unnumbered,ul.unstyled,ol.unstyled{list-style-type:none} +ul.no-bullet,ol.no-bullet,ol.unnumbered{margin-left:.625em} +ul.unstyled,ol.unstyled{margin-left:0} +ul.checklist{margin-left:.625em} +ul.checklist li>p:first-child>.fa-square-o:first-child,ul.checklist li>p:first-child>.fa-check-square-o:first-child{width:1.25em;font-size:.8em;position:relative;bottom:.125em} +ul.checklist li>p:first-child>input[type="checkbox"]:first-child{margin-right:.25em} +ul.inline{display:-ms-flexbox;display:-webkit-box;display:flex;-ms-flex-flow:row wrap;-webkit-flex-flow:row wrap;flex-flow:row wrap;list-style:none;margin:0 0 .625em -1.25em} +ul.inline>li{margin-left:1.25em} +.unstyled dl dt{font-weight:400;font-style:normal} +ol.arabic{list-style-type:decimal} +ol.decimal{list-style-type:decimal-leading-zero} +ol.loweralpha{list-style-type:lower-alpha} +ol.upperalpha{list-style-type:upper-alpha} +ol.lowerroman{list-style-type:lower-roman} +ol.upperroman{list-style-type:upper-roman} +ol.lowergreek{list-style-type:lower-greek} +.hdlist>table,.colist>table{border:0;background:none} +.hdlist>table>tbody>tr,.colist>table>tbody>tr{background:none} +td.hdlist1,td.hdlist2{vertical-align:top;padding:0 .625em} +td.hdlist1{font-weight:bold;padding-bottom:1.25em} +.literalblock+.colist,.listingblock+.colist{margin-top:-.5em} +.colist td:not([class]):first-child{padding:.4em .75em 0;line-height:1;vertical-align:top} +.colist td:not([class]):first-child img{max-width:none} +.colist td:not([class]):last-child{padding:.25em 0} +.thumb,.th{line-height:0;display:inline-block;border:solid 4px #fff;-webkit-box-shadow:0 0 0 1px #ddd;box-shadow:0 0 0 1px #ddd} +.imageblock.left{margin:.25em .625em 1.25em 0} +.imageblock.right{margin:.25em 0 1.25em .625em} +.imageblock>.title{margin-bottom:0} +.imageblock.thumb,.imageblock.th{border-width:6px} +.imageblock.thumb>.title,.imageblock.th>.title{padding:0 .125em} +.image.left,.image.right{margin-top:.25em;margin-bottom:.25em;display:inline-block;line-height:0} +.image.left{margin-right:.625em} +.image.right{margin-left:.625em} +a.image{text-decoration:none;display:inline-block} +a.image object{pointer-events:none} +sup.footnote,sup.footnoteref{font-size:.875em;position:static;vertical-align:super} +sup.footnote a,sup.footnoteref a{text-decoration:none} +sup.footnote a:active,sup.footnoteref a:active{text-decoration:underline} +#footnotes{padding-top:.75em;padding-bottom:.75em;margin-bottom:.625em} +#footnotes hr{width:20%;min-width:6.25em;margin:-.25em 0 .75em;border-width:1px 0 0} +#footnotes .footnote{padding:0 .375em 0 .225em;line-height:1.3334;font-size:.875em;margin-left:1.2em;margin-bottom:.2em} +#footnotes .footnote a:first-of-type{font-weight:bold;text-decoration:none;margin-left:-1.05em} +#footnotes .footnote:last-of-type{margin-bottom:0} +#content #footnotes{margin-top:-.625em;margin-bottom:0;padding:.75em 0} +.gist .file-data>table{border:0;background:#fff;width:100%;margin-bottom:0} +.gist .file-data>table td.line-data{width:99%} +div.unbreakable{page-break-inside:avoid} +.big{font-size:larger} +.small{font-size:smaller} +.underline{text-decoration:underline} +.overline{text-decoration:overline} +.line-through{text-decoration:line-through} +.aqua{color:#00bfbf} +.aqua-background{background:#00fafa} +.black{color:#000} +.black-background{background:#000} +.blue{color:#0000bf} +.blue-background{background:#0000fa} +.fuchsia{color:#bf00bf} +.fuchsia-background{background:#fa00fa} +.gray{color:#606060} +.gray-background{background:#7d7d7d} +.green{color:#006000} +.green-background{background:#007d00} +.lime{color:#00bf00} +.lime-background{background:#00fa00} +.maroon{color:#600000} +.maroon-background{background:#7d0000} +.navy{color:#000060} +.navy-background{background:#00007d} +.olive{color:#606000} +.olive-background{background:#7d7d00} +.purple{color:#600060} +.purple-background{background:#7d007d} +.red{color:#bf0000} +.red-background{background:#fa0000} +.silver{color:#909090} +.silver-background{background:#bcbcbc} +.teal{color:#006060} +.teal-background{background:#007d7d} +.white{color:#bfbfbf} +.white-background{background:#fafafa} +.yellow{color:#bfbf00} +.yellow-background{background:#fafa00} +span.icon>.fa{cursor:default} +a span.icon>.fa{cursor:inherit} +.admonitionblock td.icon [class^="fa icon-"]{font-size:2.5em;text-shadow:1px 1px 2px rgba(0,0,0,.5);cursor:default} +.admonitionblock td.icon .icon-note::before{content:"\f05a";color:#19407c} +.admonitionblock td.icon .icon-tip::before{content:"\f0eb";text-shadow:1px 1px 2px rgba(155,155,0,.8);color:#111} +.admonitionblock td.icon .icon-warning::before{content:"\f071";color:#bf6900} +.admonitionblock td.icon .icon-caution::before{content:"\f06d";color:#bf3400} +.admonitionblock td.icon .icon-important::before{content:"\f06a";color:#bf0000} +.conum[data-value]{display:inline-block;color:#fff!important;background:rgba(0,0,0,.8);-webkit-border-radius:100px;border-radius:100px;text-align:center;font-size:.75em;width:1.67em;height:1.67em;line-height:1.67em;font-family:"Open Sans","DejaVu Sans",sans-serif;font-style:normal;font-weight:bold} +.conum[data-value] *{color:#fff!important} +.conum[data-value]+b{display:none} +.conum[data-value]::after{content:attr(data-value)} +pre .conum[data-value]{position:relative;top:-.125em} +b.conum *{color:inherit!important} +.conum:not([data-value]):empty{display:none} +dt,th.tableblock,td.content,div.footnote{text-rendering:optimizeLegibility} +h1,h2,p,td.content,span.alt{letter-spacing:-.01em} +p strong,td.content strong,div.footnote strong{letter-spacing:-.005em} +p,blockquote,dt,td.content,span.alt{font-size:1.0625rem} +p{margin-bottom:1.25rem} +.sidebarblock p,.sidebarblock dt,.sidebarblock td.content,p.tableblock{font-size:1em} +.exampleblock>.content{background:#fffef7;border-color:#e0e0dc;-webkit-box-shadow:0 1px 4px #e0e0dc;box-shadow:0 1px 4px #e0e0dc} +.print-only{display:none!important} +@page{margin:1.25cm .75cm} +@media print{*{-webkit-box-shadow:none!important;box-shadow:none!important;text-shadow:none!important} +html{font-size:80%} +a{color:inherit!important;text-decoration:underline!important} +a.bare,a[href^="#"],a[href^="mailto:"]{text-decoration:none!important} +a[href^="http:"]:not(.bare)::after,a[href^="https:"]:not(.bare)::after{content:"(" attr(href) ")";display:inline-block;font-size:.875em;padding-left:.25em} +abbr[title]::after{content:" (" attr(title) ")"} +pre,blockquote,tr,img,object,svg{page-break-inside:avoid} +thead{display:table-header-group} +svg{max-width:100%} +p,blockquote,dt,td.content{font-size:1em;orphans:3;widows:3} +h2,h3,#toctitle,.sidebarblock>.content>.title{page-break-after:avoid} +#toc,.sidebarblock,.exampleblock>.content{background:none!important} +#toc{border-bottom:1px solid #dddddf!important;padding-bottom:0!important} +body.book #header{text-align:center} +body.book #header>h1:first-child{border:0!important;margin:2.5em 0 1em} +body.book #header .details{border:0!important;display:block;padding:0!important} +body.book #header .details span:first-child{margin-left:0!important} +body.book #header .details br{display:block} +body.book #header .details br+span::before{content:none!important} +body.book #toc{border:0!important;text-align:left!important;padding:0!important;margin:0!important} +body.book #toc,body.book #preamble,body.book h1.sect0,body.book .sect1>h2{page-break-before:always} +.listingblock code[data-lang]::before{display:block} +#footer{padding:0 .9375em} +.hide-on-print{display:none!important} +.print-only{display:block!important} +.hide-for-print{display:none!important} +.show-for-print{display:inherit!important}} +@media print,amzn-kf8{#header>h1:first-child{margin-top:1.25rem} +.sect1{padding:0!important} +.sect1+.sect1{border:0} +#footer{background:none} +#footer-text{color:rgba(0,0,0,.6);font-size:.9em}} +@media amzn-kf8{#header,#content,#footnotes,#footer{padding:0}} + +/* additional code styling support */ +dt > code { color: #19177C } /* tok-nv in pygments */ +code > span.kw { color: #008000; font-weight: bold } /* tok-k in pygments */ +code > span.tv { color: #AA22FF } /* tok-nd in pygments */ +code > span.ty { color: #B00040 } /* tok-kt in pygments */ +code > span.con { color: #0000FF; font-weight: bold } /* tok-nc in pygments */ diff --git a/smlnj-lib/Doc/src/styles/smlnj-lib-pygments_css.in b/smlnj-lib/Doc/src/styles/smlnj-lib-pygments_css.in new file mode 100644 index 0000000..ae7fac6 --- /dev/null +++ b/smlnj-lib/Doc/src/styles/smlnj-lib-pygments_css.in @@ -0,0 +1,25 @@ +/* + pygmentize filter for SML code +*/ +pre.pygments .hll { background-color: #ffffcc } +pre.pygments { font-size: 0.9em; background: @CODE_BACKGROUND@; } /* code background */ +pre.pygments .tok-k { color: @KW_COLOR@; font-weight: bold } /* Keyword */ +pre.pygments .tok-kr { color: @KW_COLOR@; font-weight: bold; } /* keywords */ +pre.pygments .tok-kt { color: @TY_COLOR@; } /* type */ +pre.pygments .tok-o { color: @KW_COLOR@; } /* Operator */ +pre.pygments .tok-c { color: @COM_COLOR@; font-style: italic; } /* comments */ +pre.pygments .tok-cm { color: @COM_COLOR@; font-style: italic; } /* comments */ +pre.pygments .tok-s2 { color: @LIT_COLOR@; } /* strings */ +pre.pygments .tok-mb { color: @LIT_COLOR@; } /* Literal.Number.Bin */ +pre.pygments .tok-mf { color: @LIT_COLOR@; } /* Literal.Number.Float */ +pre.pygments .tok-mh { color: @LIT_COLOR@; } /* Literal.Number.Hex */ +pre.pygments .tok-mi { color: @LIT_COLOR@; } /* Literal.Number.Integer */ +pre.pygments .tok-mo { color: @LIT_COLOR@; } /* Literal.Number.Oct */ +pre.pygments .tok-kt { color: @BIND_COLOR@; } /* lhs type names */ +pre.pygments .tok-n { color: @CODE_COLOR@; } /* type names on rhs; parameters */ +pre.pygments .tok-p { color: @PUNCT_COLOR@; } /* punctuation */ +pre.pygments .tok-nn { color: @BIND_COLOR@; font-weight: bold; } /* Module names */ +pre.pygments .tok-nv { color: @BIND_COLOR@; font-weight: bold; } /* variable names in val bindings */ +pre.pygments .tok-nf { color: @BIND_COLOR@; font-weight: bold; } /* function names in fun bindings */ +pre.pygments .tok-nc { color: @BIND_COLOR@; font-weight: bold; } /* data constructor name in rhs of datatype */ +pre.pygments .tok-nd { color: @TV_COLOR@; } /* type variable */ diff --git a/smlnj-lib/Doc/src/styles/smlnj-lib_css.in b/smlnj-lib/Doc/src/styles/smlnj-lib_css.in new file mode 100644 index 0000000..457e6c4 --- /dev/null +++ b/smlnj-lib/Doc/src/styles/smlnj-lib_css.in @@ -0,0 +1,184 @@ +/* CSS for SML/NJ Library documentation + * This file overrides the default asciidoctor definitions + * in smlnj-lib-base.css. + */ + +/* page layout */ + +#layout-top-mask { /* masks out scrolling material */ + position: fixed; + top: 0px; + left: 0px; + height: 2em; + width: 100%; + background-color: @BACKGROUND@; + z-index: 5; +} + +#layout-main { + margin: 0.5em 0.5em 0.5em 0.5em; +} + +#layout-banner-spacer { + position: relative; + width: 100%; + height: 10em; + clear: both; +} + +#layout-banner { + margin: 0; + padding: 1.25em; + background-color: @BANNER_BACKGROUND@; + font-family: Arial, Helvetica, Geneva, sans-serif; + text-align: left; + -webkit-border-top-left-radius: 1em; + -webkit-border-top-right-radius: 1em; + -webkit-border-bottom-right-radius: 1em; + -moz-border-radius-topright: 1em; + -moz-border-radius-bottomright: 1em; + border-top-right-radius: 1em; + border-bottom-right-radius: 1em; + position: fixed; + top: 0.5em; /* should match layout-main margin */ + left: 0.5em; + right: 0.5em; + z-index: 10; +} + +#layout-title { + height: 7.5em; + color: @TITLE_COLOR@; +} + +#layout-title-main { + font-size: 4.0em; + margin: 0 0 5px; + color: inherit; +} + +#layout-title-main a { + color: inherit; + text-decoration: none; +} + +#layout-title-description { + font-size: 2.4em; + margin: 0; + color: inherit; +} + +#layout-title-description code { + color: inherit; + background-color: inherit; +} + +#layout-logo { + height: 7.5em; + float: left; + width: 14em; +} + +#layout-logo img { + height: 7.5em; + max-width: none; /* override smlnj-lib-base.css */ +} + +#layout-toc { + position: fixed; + left: 0.5em; + width: 15em; + top: 10em; + bottom: 0; + z-index: 2; + background-color: @BANNER_BACKGROUND@; + color: @TITLE_COLOR@; + overflow: auto; +} + +#layout-content-box { + margin-left: 15em; + z-index: 0; +} + +#layout-footer-box { + margin-left: 15em; + margin-top: 4.0em; + padding-left: 1.0em; + padding-right: 0.4em; + padding-top: 0.5em; +} + +#layout-footer { + border-top: 2px solid @HR_COLOR@; +} + +#layout-footer-text { + float: left; + font-size: 0.8em; + padding-top: 0.2em; + padding-bottom: 0.5em; + border-bottom: 2px solid @HR_COLOR@; + margin-bottom: 1em; +} + +/* Navigation (aka TOC) */ +#toc { + margin-top: 2em; + margin-left: 0; + margin-bottom: 2em; +} +#toc>:first-child{margin-top:0} +#toc>:last-child{margin-bottom:0} + +#toc a{ + color: inherit; + background-color: inherit; + text-decoration: none; +} + +#toc a:hover{ + background-color: @TOC_HIGHLIGHT@; +} + +#toc code{ + color: inherit; + background-color: inherit; +} + +ul.toc-lib-list{ + font-size: 1.1em; + list-style-type:none; + padding: 0; + margin-left: 0; +} + +li.toc-lib{ + margin-left: 0; + margin-bottom: 0.2em; + padding: 2px; +} + +ul.toc-page-list{ + font-size: 0.9em; + list-style-type: none; + padding: 0; + margin-left: 1em; +} + +li.toc-page{ + margin-left: 0; + margin-bottom: 0.2em; + padding: 2px; +} + +#toc-current { + color: blue; +} + +/* additional code styling support */ +dt > code { color: #19177C } /* tok-nv in pygments */ +code > span.kw { color: @KW_COLOR@; font-weight: bold } /* tok-k in pygments */ +code > span.tv { color: #AA22FF } /* tok-nd in pygments */ +code > span.ty { color: #B00040 } /* tok-kt in pygments */ +code > span.con { color: @BIND_COLOR@; font-weight: bold } /* tok-nc in pygments */ diff --git a/smlnj-lib/Doc/tools/Makefile.in b/smlnj-lib/Doc/tools/Makefile.in new file mode 100644 index 0000000..1c2b1ee --- /dev/null +++ b/smlnj-lib/Doc/tools/Makefile.in @@ -0,0 +1,29 @@ +# Makefile.in +# +# COPYRIGHT (c) 2023 The Fellowship of SML/NJ (http://www.smlnj.org) +# All rights reserved. +# +# Makefile for SML/NJ Library documentation tools +# +# @configure_input@ +# + +SHELL = @SHELL@ +INSTALL = @INSTALL@ +@SET_MAKE@ + +SUBDIRS = make-index gen-pages + +.PHONY: install +install: + for d in $(SUBDIRS) ; do \ + (cd $$d && $(MAKE) install) || exit $$?; \ + done + +include @MK_DIR@/clean-rules.gmk + +CLEAN_FILES = + +CLEAN_SUBDIRS = $(SUBDIRS) + +CLEANDIST_FILES = $(TOOL_BIN_DIR)/* $(TOOL_BIN_DIR)/.heap diff --git a/smlnj-lib/Doc/tools/README.md b/smlnj-lib/Doc/tools/README.md new file mode 100644 index 0000000..b7f7f56 --- /dev/null +++ b/smlnj-lib/Doc/tools/README.md @@ -0,0 +1,4 @@ +This directory tree contains some tools for extracting information +from the document sources that can used for generating HTML +navigation, etc. + diff --git a/smlnj-lib/Doc/tools/gen-pages/Makefile.in b/smlnj-lib/Doc/tools/gen-pages/Makefile.in new file mode 100644 index 0000000..5461a41 --- /dev/null +++ b/smlnj-lib/Doc/tools/gen-pages/Makefile.in @@ -0,0 +1,35 @@ +# Makefile.in +# +# COPYRIGHT (c) 2023 The Fellowship of SML/NJ (http://www.smlnj.org) +# All rights reserved. +# +# Makefile for gen-pages tool +# +# @configure_input@ +# + +SHELL = @SHELL@ +INSTALL = @INSTALL@ +@SET_MAKE@ + +TOOL_BIN = @TOOL_BIN_DIR@ +ML_BUILD = @ML_BUILD@ +HEAP_SUFFIX = @SMLNJ_HEAP_SUFFIX@ +INSTALL_WRAPPER = @ROOT_DIR@/bin/install-sml-wrapper.sh + +PROGRAM = gen-pages + +HEAP_IMAGE = $(PROGRAM).$(HEAP_SUFFIX) + +.PHONY: install +install: $(HEAP_IMAGE) + $(INSTALL_WRAPPER) $(PROGRAM) $(TOOL_BIN) + +$(HEAP_IMAGE): $(wildcard ../*.sml) $(wildcard *.sml) + $(ML_BUILD) sources.cm Main.main $(PROGRAM) + +include @MK_DIR@/clean-rules.gmk + +CLEAN_FILES = $(HEAP_IMAGE) + +DISTCLEAN_FILES = config.sml diff --git a/smlnj-lib/Doc/tools/gen-pages/config_sml.in b/smlnj-lib/Doc/tools/gen-pages/config_sml.in new file mode 100644 index 0000000..164ebb7 --- /dev/null +++ b/smlnj-lib/Doc/tools/gen-pages/config_sml.in @@ -0,0 +1,17 @@ +(* config_sml.in + * + * COPYRIGHT (c) 2024 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Information set by the configuration. + *) + +structure Config = + struct + + val releaseDate = "@SMLNJ_RELEASE_DATE@" + val version = "@SMLNJ_RELEASE_VERSION@" + val asciidoctor = "@ASCIIDOCTOR@" + val fragDir = "@SRC_DIR@/fragments" + + end diff --git a/smlnj-lib/Doc/tools/gen-pages/copy-file.sml b/smlnj-lib/Doc/tools/gen-pages/copy-file.sml new file mode 100644 index 0000000..57d5f5e --- /dev/null +++ b/smlnj-lib/Doc/tools/gen-pages/copy-file.sml @@ -0,0 +1,78 @@ +(* copy-file.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +structure CopyFile : sig + + (* `copy subst (srcFile, outS)` copys the contents of `srcFile` to the output + * stream while replacing strings of the form `@name@` with `value` where + * `(name, value)` occurs in the substitution list `subst`. + *) + val copy : (string * string) list -> string * TextIO.outstream -> unit + + end = struct + + structure SS = Substring + + structure Tbl = HashTableFn ( + struct + type hash_key = substring + val hashVal = HashString.hashSubstring + fun sameKey (ss1, ss2) = (SS.compare (ss1, ss2) = EQUAL) + end) + + fun copy subst = let + val find = let + val tbl = Tbl.mkTable (List.length subst, Fail "subst-tbl") + fun ins (ss1, s2) = Tbl.insert tbl (SS.full ss1, s2) + in + ins ("", "@"); (* map "@@" to "@" *) + List.app ins subst; + Tbl.find tbl + end + fun rewriteLine (outS, ln) = let + fun scanLine ss = let + val (prefix, rest) = SS.splitl (fn #"@" => false | _ => true) ss + in + TextIO.outputSubstr (outS, prefix); + if (SS.size rest = 0) + then () + else scanPlaceholder (SS.triml 1 rest) + end + (* scan a `@NAME@` placeholder starting from the first character of `NAME` *) + and scanPlaceholder start = let + fun scan (ss, n) = (case SS.getc ss + of NONE => raise Fail "Incomplete placeholder" + | SOME(#"@", rest) => (SS.slice(start, 0, SOME n), rest) + | SOME(_, rest) => scan (rest, n+1) + (* end case *)) + val (placeholder, rest) = scan (start, 0) + in + case find placeholder + of SOME expansion => ( + TextIO.output(outS, expansion); + scanLine rest) + | NONE => raise Fail(concat[ + "unknown placeholder @", SS.string placeholder, "@" + ]) + (* end case *) + end + in + scanLine (SS.full ln) + end + fun rewrite (srcFile, outS) = let + val inS = TextIO.openIn srcFile + fun lp () = (case TextIO.inputLine inS + of NONE => TextIO.closeIn inS + | SOME s => (rewriteLine(outS, s); lp ()) + (* end case *)) + in + lp() + end + in + rewrite + end + + end diff --git a/smlnj-lib/Doc/tools/gen-pages/gen-toc.sml b/smlnj-lib/Doc/tools/gen-pages/gen-toc.sml new file mode 100644 index 0000000..930329d --- /dev/null +++ b/smlnj-lib/Doc/tools/gen-pages/gen-toc.sml @@ -0,0 +1,167 @@ +(* gen-toc.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This module is used to generate the HTML fragment for a sidebar + * table of contents. + * + * TODO: eventually, we should include the individual definitions from + * the file in the sidebar. + *) + +structure GenTOC : sig + + val root : FileTree.t -> TextIO.outstream -> unit + val lib : FileTree.t * FileTree.library -> TextIO.outstream -> unit + val page : FileTree.t * FileTree.library * FileTree.page -> TextIO.outstream -> unit + + end = struct + + structure FT = FileTree + + fun pr (outS, s) = TextIO.output(outS, s) + fun prl (outS, s) = TextIO.output(outS, concat s) + + fun doTOC outS f = ( + prl (outS, [ + "
    \n", + "
    \n" + ]); + f outS; + prl (outS, [ + "
    \n", + "
    \n" + ])) + + val styleText = Util.style + + (* returns the title string for the opening of a library item in the library + * list. The flags are as follows: + * link -- generate an HREF link + * cur -- this library is the current page + * inRoot -- the containing page is at root level + *) + fun libItemOpen {lib, link, cur, inRoot} = let + val FT.FILE{dir, stem, title, ...} = lib + val s = if link then [""] else [] + val s = if cur + then "" + :: styleText title :: "" :: s + else "" :: styleText title :: "" :: s + val s = if link + then let + val dir = if inRoot orelse cur then dir else "../" ^ dir + in + "" :: s + end + else s + val s = "
  • " :: s + in + concat s + end + + (* returns the string for a page item in the pages list. The + * flags are as follows: + * link -- generate an HREF link + * cur -- this library is the current page + *) + fun pageItem {page, link, cur} = let + val title = styleText(FT.getTitle page) + val s = if link then ["
  • "] else [""] + val s = if cur + then "" + :: title :: "" :: s + else "" :: title :: "" :: s + val s = if link + then "" :: s + else s + val s = "
  • " :: s + in + concat s + end + + fun doRoot outS (true, _) = pr(outS, "
      \n") + | doRoot outS (false, _) = pr(outS, "
    \n") + + (* generate a TOC file for the root page *) + fun root (ft) outS = doTOC outS (fn outS => let + fun doLib (true, _, lib) = + prl(outS, [ + " ", libItemOpen{lib=lib, link=true, cur=false, inRoot=true} + ]) + | doLib (false, _, _) = pr(outS, "
  • \n") + in + FT.walk { + root = doRoot outS, + lib = doLib, + page = fn _ => () + } ft + end) + + (* generate a TOC file for a library page *) + fun lib (ft, lib) outS = doTOC outS (fn outS => let + fun doLib (true, _, lib') = let + val cur = FT.same(lib, lib') + in + prl(outS, [ + " ", libItemOpen{lib=lib', link=not cur, cur=cur, inRoot=false} + ]); + if cur + then pr(outS, "\n
      \n") + else () + end + | doLib (false, _, lib') = if FT.same(lib, lib') + then prl(outS, ["
    \n", " \n"]) + else pr(outS, "\n") + fun doPage (_, lib', page) = if FT.same(lib, lib') + then prl(outS, [ + " ", + pageItem {page=page, link=true, cur=false}, + "\n" + ]) + else () + in + FT.walk { + root = doRoot outS, + lib = doLib, + page = doPage + } ft + end) + + (* generate a TOC file for a manual page *) + fun page (ft, lib, page) outS = + doTOC outS (fn outS => let + fun doLib (true, _, lib') = let + val cur = FT.same(lib, lib') + in + prl(outS, [ + " ", libItemOpen{lib=lib', link=true, cur=false, inRoot=false} + ]); + if cur + then pr(outS, "\n
      \n") + else () + end + | doLib (false, _, lib') = if FT.same(lib, lib') + then prl(outS, ["
    \n", " \n"]) + else pr(outS, "\n") + fun doPage (_, lib', page') = if FT.same(lib, lib') + then let + val cur = FT.same(page, page') + in + prl(outS, [ + " ", + pageItem {page=page', link=not cur, cur=cur}, + "\n" + ]) + end + else () + in + FT.walk { + root = doRoot outS, + lib = doLib, + page = doPage + } ft + end) + + end diff --git a/smlnj-lib/Doc/tools/gen-pages/main.sml b/smlnj-lib/Doc/tools/gen-pages/main.sml new file mode 100644 index 0000000..ff31049 --- /dev/null +++ b/smlnj-lib/Doc/tools/gen-pages/main.sml @@ -0,0 +1,123 @@ +(* main.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Main function for *gen-pages* tool. This program has the following + * usage: + * + * gen-pages [options] + * + * Options: + * + * -h, --help print help message and exit + * -v, --verbose run in verbose mode + * --release-date= + * --version= + * --base-url= + * --basis-lib-url= + * --index= + * + *) + +structure Main : sig + + val main : string * string list -> OS.Process.status + + end = struct + + structure FT = FileTree + structure P = OS.Path + +(* TODO: support relative path for @IMAGES_URL@. The actual value depends on + * the path of the file we are generating, since we may need an extra ".." prefix. + *) + + (* create a copy-file function with the specified substitutions *) + fun copy {meta : FT.meta, title, file, base} = let + val d = Date.fromTimeUniv(OS.FileSys.modTime file) + val date = Date.fmt "%Y-%m-%d" d + val time = Date.fmt "%H:%M:%S UTC" d + val imagesURL = Option.getOpt(!Options.imagesURL, base ^ "images") + val substs = [ + ("STYLED-TITLE", Util.style title), + ("DATE", !Options.releaseDate), + ("VERSION", !Options.version), + ("FILEDATE", date), + ("FILETIME", time), + ("BASE", base), + ("IMAGES_URL", imagesURL) + ] + fun metaStr (k, v) = concat[ + "\n " + ] + (* the contents of the element in the <head> *) + val substs = (case #title meta + of SOME s => ("TITLE", s) :: substs + | NONE => ("TITLE", Util.clean title) :: substs + (* end case *)) + val substs = (case #author meta + of SOME s => ("AUTHOR", metaStr("author", s)) :: substs + | NONE => ("AUTHOR", "") :: substs + (* end case *)) + val substs = (case #kws meta + of [] => ("KEYWORDS", "") :: substs + | l => ("KEYWORDS", metaStr("keywords", String.concatWith "," l)) + :: substs + (* end case *)) + in + CopyFile.copy substs + end + + fun gen genTOC (FT.FILE{dir, stem, title, meta, ...}) = let + val srcFile = P.concat(dir, stem ^ ".adoc") + val htmlFile = P.concat(dir, stem ^ ".html") + val copy = copy { + meta = meta, + title = title, + file = srcFile, + base = if dir = "" then "" else "../" + } + val outS = TextIO.openOut htmlFile + in + if !Options.verbose + then print(concat["generating ", htmlFile, "\n"]) + else (); + copy (P.concat(Config.fragDir, "header.in"), outS); + genTOC outS; + copy (P.concat(Config.fragDir, "shim.in"), outS); + RunAsciidoctor.run (srcFile, outS); + copy (P.concat(Config.fragDir, "footer.in"), outS); + TextIO.closeOut outS + end + + (* generate the root page *) + fun appRoot ft = gen (GenTOC.root ft) ft + + (* generate a library page *) + fun appLib (ft, lib) = gen (GenTOC.lib (ft, lib)) lib + + (* generate a TOC file for a manual page *) + fun appPage (ft, lib, page) = gen (GenTOC.page (ft, lib, page)) page + + fun loadIndex indexFile = FT.fromJSON (JSONParser.parseFile indexFile) + + (* for every documentation page, generate a table of contents file *) + val walkTree = FT.app {root = appRoot, lib = appLib, page = appPage} + + fun main (cmd, opts) = ( + Options.process opts; + walkTree (loadIndex (!Options.indexFile)); + if not(!Options.verbose) then print " done\n" else print "done\n"; + OS.Process.success) + handle ex => ( + TextIO.output(TextIO.stdErr, concat[ + "uncaught exception ", General.exnName ex, + " [", General.exnMessage ex, "]\n" + ]); + List.app + (fn s => TextIO.output(TextIO.stdErr, concat [" raised at ", s, "\n"])) + (SMLofNJ.exnHistory ex); + OS.Process.failure) + + end diff --git a/smlnj-lib/Doc/tools/gen-pages/options.sml b/smlnj-lib/Doc/tools/gen-pages/options.sml new file mode 100644 index 0000000..67162c1 --- /dev/null +++ b/smlnj-lib/Doc/tools/gen-pages/options.sml @@ -0,0 +1,103 @@ +(* options.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Handle command-line options for the gen-page tool. The options are + * + * -h, --help + * -v, --verbose + * --release-date=<date> + * --version=<version> + * --base-url=<url> + * --basis-lib-url=<url> + * --index=<file> + *) + +structure Options : sig + + val process : string list -> unit + + val usage : OS.Process.status -> unit + + (* option flags *) + val verbose : bool ref + val releaseDate : string ref + val version : string ref + val baseURL : string option ref + val basisLibURL : string ref + val indexFile : string ref + val imagesURL : string option ref + + end = struct + + structure G = GetOpt + + val verbose = ref false + val releaseDate = ref Config.releaseDate + val version = ref Config.version + val baseURL : string option ref = ref NONE + val basisLibURL = ref "https://standardml.org/Basis" + val indexFile = ref "index.json" + val imagesURL = ref(SOME "https://smlnj.org/images") + + val helpFlg = ref false + + fun setOpt (r, name) = G.ReqArg(fn s => r := s, name) + + val opts = [ + { short = "h", long = ["help"], + desc = G.NoArg(fn () => helpFlg := true), + help = "Print this message" + }, + { short = "v", long = ["verbose"], + desc = G.NoArg(fn () => verbose := true), + help = "Print progress messages" + }, + { short = "", long = ["release-date"], + desc = setOpt (releaseDate, "<date>"), + help = "Specify the release date attribute" + }, + { short = "", long = ["version"], + desc = setOpt (version, "<version>"), + help = "Specify the SML/NJ version" + }, + { short = "", long = ["base-url"], + desc = G.ReqArg(fn s => baseURL := SOME s, "<url>"), + help = "Specify a base URL for the documentation" + }, + { short = "", long = ["basis-lib-url"], + desc = setOpt (basisLibURL, "<url>"), + help = "Specify the URL for the SML Basis Library documentation" + }, + { short = "", long = ["index"], + desc = setOpt (indexFile, "<file>"), + help = "Specify the name of the index JSON file" + } + ] + + fun usage sts = ( + print (G.usageInfo{header="gen-pages [options]", options=opts}); + OS.Process.exit sts) + + fun process args = let + val errFlg = ref false + val (_, excess) = G.getOpt { + argOrder = G.RequireOrder, + options = opts, + errFn = fn msg => ( + TextIO.output(TextIO.stdErr, concat["gen-pages: ", msg, "\n"]); + errFlg := true) + } args + in + if !helpFlg + then usage OS.Process.success + else if !errFlg orelse not(null excess) + then usage OS.Process.failure + else (); + case !baseURL + of NONE => imagesURL := NONE (* use relative path to images *) + | _ => () + end + + end diff --git a/smlnj-lib/Doc/tools/gen-pages/run-asciidoctor.sml b/smlnj-lib/Doc/tools/gen-pages/run-asciidoctor.sml new file mode 100644 index 0000000..4ccd4ae --- /dev/null +++ b/smlnj-lib/Doc/tools/gen-pages/run-asciidoctor.sml @@ -0,0 +1,51 @@ +(* run-asciidoctor.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +structure RunAsciidoctor : sig + + val run : string * TextIO.outstream -> OS.Process.status + + end = struct + + fun pipe (inS, outS) = (case TextIO.input inS + of "" => TextIO.closeIn inS + | s => (TextIO.output(outS, s); pipe(inS, outS)) + (* end case *)) + + val cmd = Config.asciidoctor + + fun args file = [ + "-b", "html", + "--no-header-footer", + "-o", "-", + "-a", concat["sml-basis-url=", !Options.basisLibURL], + "-a", concat["smlnj-version=", !Options.version], + "-a", concat["release-date=", !Options.releaseDate], + file + ] + + fun run' (srcFile, outS) = let + val args = args srcFile + val _ = if !Options.verbose + then print(String.concatWith " " (cmd :: args @ ["\n"])) + else () + val proc = Unix.execute (cmd, args) + val (fromProc, toProc) = Unix.streamsOf proc + in + pipe (fromProc, outS); + TextIO.closeOut toProc; + Unix.reap proc + before (if not (!Options.verbose) then print "." else ()) + end + + (* wrapper that first verifies that the file exists *) + fun run (srcFile, outS) = if OS.FileSys.access (srcFile, [OS.FileSys.A_READ]) + then run' (srcFile, outS) + else raise Fail(concat[ + "RunAsciidoctor.run: \"", String.toString srcFile, "\" not found" + ]) + + end diff --git a/smlnj-lib/Doc/tools/gen-pages/sources.cm b/smlnj-lib/Doc/tools/gen-pages/sources.cm new file mode 100644 index 0000000..6e8d2de --- /dev/null +++ b/smlnj-lib/Doc/tools/gen-pages/sources.cm @@ -0,0 +1,26 @@ +(* sources.cm + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +Library + + structure Main + +is + + $/basis.cm + $/smlnj-lib.cm + $/json-lib.cm + + ../index/sources.cm + + config.sml + copy-file.sml + gen-toc.sml + main.sml + options.sml + run-asciidoctor.sml + util.sml + diff --git a/smlnj-lib/Doc/tools/gen-pages/util.sml b/smlnj-lib/Doc/tools/gen-pages/util.sml new file mode 100644 index 0000000..14eef71 --- /dev/null +++ b/smlnj-lib/Doc/tools/gen-pages/util.sml @@ -0,0 +1,42 @@ +(* util.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Utilities for processing simple styled text + *) + +structure Util : sig + + val style : string -> string + val clean : string -> string + + end = struct + + (* handle some simple style notations found in the Asciidoctor title text. + * We handle code "`" and bold "*" markup. We assume that these are _not_ + * nested. + *) + fun style s = let + fun bold (#"*" :: #"*" :: r, acc) = (r, "</b>" :: acc) + | bold (#"*" :: r, acc) = (r, "</b>" :: acc) + | bold (c :: r, acc) = bold (r, str c :: acc) + | bold arg = arg + and code (#"`" :: #"`" :: r, acc) = (r, "</code>" :: acc) + | code (#"`" :: r, acc) = (r, "</code>" :: acc) + | code (c :: r, acc) = code (r, str c :: acc) + | code arg = arg + and text (#"*" :: #"*" :: r, acc) = text (bold (r, "<b>" :: acc)) + | text (#"*" :: r, acc) = text (bold (r, "<b>" :: acc)) + | text (#"`" :: #"`" :: r, acc) = text (code (r, "<code>" :: acc)) + | text (#"`" :: r, acc) = text (code (r, "<code>" :: acc)) + | text (c :: r, acc) = text (r, str c :: acc) + | text ([], acc) = acc + in + String.concat (List.rev (text (explode s, []))) + end + + (* strip code and bold markup *) + fun clean s = String.translate (fn #"`" => "" | #"*" => "" | c => str c) s + + end diff --git a/smlnj-lib/Doc/tools/gen-toc/gen-toc.sml b/smlnj-lib/Doc/tools/gen-toc/gen-toc.sml new file mode 100644 index 0000000..6843592 --- /dev/null +++ b/smlnj-lib/Doc/tools/gen-toc/gen-toc.sml @@ -0,0 +1,236 @@ +(* gen-toc.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * NOTE: the TOC generation has been integrated into the gen-pages tool. + * This program is preserved for historical interest only. + * + * This program is used to generate the HTML fragment for a sidebar + * table of contents. + * + * Usage: + * + * gen-toc index.json + * + * This command will produce a file file.toc for each source file in + * the index (including the root). These files are HTML fragments + * for a navigation sidebar specialized to the corresponding documentation + * file. + * + * NOTE: this program must be run from the root of the documentation source! + * + * TODO: eventually, we should include the individual definitions from + * the file in the sidebar. + *) + +structure GenTOC : sig + + val main : string * string list -> OS.Process.status + + end = struct + + structure FT = FileTree + structure P = OS.Path + + fun usage sts = ( + TextIO.print "usage: gen-toc index.json\n"; + OS.Process.exit sts) + + fun error msg = ( + TextIO.print(concat["gen-toc: ", msg, "\n"]); + OS.Process.exit OS.Process.failure) + + fun loadIndex indexFile = FT.fromJSON (JSONParser.parseFile indexFile) + + fun withOutS file f = let + val outS = TextIO.openOut file + in + (f outS) handle ex => (TextIO.closeOut outS; raise ex); + TextIO.closeOut outS + end + + fun pr (outS, s) = TextIO.output(outS, s) + fun prl (outS, s) = TextIO.output(outS, concat s) + + fun doTOC file f = let + fun f' outS = ( + prl (outS, [ + "<div id=\"toc\">\n" + ]); + f outS; + prl (outS, [ + "</div> <!-- toc -->\n" + ])) + in + withOutS file f' + end + + (* handle some simple style notations found in the Asciidoctor title text. + * We handle code "`", bold "*", and italic "_" markup. We assume that + * these are _not_ nested. + *) + fun styleText s = let + fun bold (#"*" :: #"*" :: r, acc) = (r, "</b>" :: acc) + | bold (#"*" :: r, acc) = (r, "</b>" :: acc) + | bold (c :: r, acc) = bold (r, str c :: acc) + | bold arg = arg + and code (#"`" :: #"`" :: r, acc) = (r, "</code>" :: acc) + | code (#"`" :: r, acc) = (r, "</code>" :: acc) + | code (c :: r, acc) = code (r, str c :: acc) + | code arg = arg + and italic (#"_" :: #"_" :: r, acc) = (r, "</i>" :: acc) + | italic (#"_" :: r, acc) = (r, "</i>" :: acc) + | italic (c :: r, acc) = code (r, str c :: acc) + | italic arg = arg + and text (#"*" :: #"*" :: r, acc) = text (bold (r, "<b>" :: acc)) + | text (#"*" :: r, acc) = text (bold (r, "<b>" :: acc)) + | text (#"`" :: #"`" :: r, acc) = text (code (r, "<code>" :: acc)) + | text (#"`" :: r, acc) = text (code (r, "<code>" :: acc)) + | text (#"_" :: #"_" :: r, acc) = text (italic (r, "<i>" :: acc)) + | text (#"_" :: r, acc) = text (italic (r, "<i>" :: acc)) + | text (c :: r, acc) = text (r, str c :: acc) + | text ([], acc) = acc + in + String.concat (List.rev (text (explode s, []))) + end + + (* returns the title string for the opening of a library item in the library + * list. The flags are as follows: + * link -- generate an HREF link + * cur -- this library is the current page + * inRoot -- the containing page is at root level + *) + fun libItemOpen {lib, link, cur, inRoot} = let + val FT.LIB{dir, stem, title, ...} = lib + val s = if link then ["</a>"] else [] + val s = if cur + then "<span id=\"toc:current\">" :: styleText title :: "</span>" :: s + else "<span class=\"toc:lib-title\">" :: styleText title :: "</span>" :: s + val s = if link + then let + val dir = if inRoot orelse cur then dir else "../" ^ dir + in + "<a href=\"" :: dir :: "/" :: stem :: ".html\">" :: s + end + else s + val s = "<li class=\"toc:lib\">" :: s + in + concat s + end + + (* returns the string for a page item in the pages list. The + * flags are as follows: + * link -- generate an HREF link + * cur -- this library is the current page + *) + fun pageItem {page, link, cur} = let + val FT.PAGE{stem, title, ...} = page + val s = if link then ["</a></li>"] else ["</li>"] + val s = if cur + then "<span id=\"toc:current\">" :: styleText title :: "</span>" :: s + else "<span class=\"toc:lib-page\">" :: styleText title :: "</span>" :: s + val s = if link + then "<a href=\"" :: stem :: ".html\">" :: s + else s + val s = "<li class=\"toc:page\">" :: s + in + concat s + end + + fun doRoot outS (true, _) = pr(outS, "<ul class=\"toc:lib-list\">\n") + | doRoot outS (false, _) = pr(outS, "</ul>\n") + + (* generate a TOC file for the root page *) + fun appRoot (ft as FT.ROOT{stem, ...}) = doTOC (stem ^ ".toc") (fn outS => let + fun doLib (true, _, lib) = + prl(outS, [ + " ", libItemOpen{lib=lib, link=true, cur=false, inRoot=true} + ]) + | doLib (false, _, _) = pr(outS, "</li>\n") + in + FT.walk { + root = doRoot outS, + lib = doLib, + page = fn _ => () + } ft + end) + + (* generate a TOC file for a library page *) + fun appLib (ft, lib as FT.LIB{dir, stem, ...}) = + doTOC (P.concat(dir, stem ^ ".toc")) (fn outS => let + fun doLib (true, _, lib') = let + val cur = FT.sameLib(lib, lib') + in + prl(outS, [ + " ", libItemOpen{lib=lib', link=false, cur=cur, inRoot=false} + ]); + if cur + then pr(outS, "\n <ul class=\"toc:page-list\">\n") + else () + end + | doLib (false, _, lib') = if FT.sameLib(lib, lib') + then prl(outS, [" </ul>\n", " </li>\n"]) + else pr(outS, "<li>\n") + fun doPage (_, lib', page) = if FT.sameLib(lib, lib') + then prl(outS, [ + " ", + pageItem {page=page, link=true, cur=false}, + "\n" + ]) + else () + in + FT.walk { + root = doRoot outS, + lib = doLib, + page = doPage + } ft + end) + + (* generate a TOC file for a manual page *) + fun appPage (ft, lib, page as FT.PAGE{dir, stem, ...}) = + doTOC (P.concat(dir, stem ^ ".toc")) (fn outS => let + fun doLib (true, _, lib') = let + val cur = FT.sameLib(lib, lib') + in + prl(outS, [ + " ", libItemOpen{lib=lib', link=false, cur=false, inRoot=false} + ]); + if cur + then pr(outS, "\n <ul class=\"toc:page-list\">\n") + else () + end + | doLib (false, _, lib') = if FT.sameLib(lib, lib') + then prl(outS, [" </ul>\n", " </li>\n"]) + else pr(outS, "<li>\n") + fun doPage (_, lib', page') = if FT.sameLib(lib, lib') + then let + val cur = FT.samePage(page, page') + in + prl(outS, [ + " ", + pageItem {page=page', link=not cur, cur=cur}, + "\n" + ]) + end + else () + in + FT.walk { + root = doRoot outS, + lib = doLib, + page = doPage + } ft + end) + + (* for every documentation page, generate a table of contents file *) + val walkTree = FT.app {root = appRoot, lib = appLib, page = appPage} + + fun main (cmd, args) = (case args + of "-h"::_ => usage OS.Process.success + | [indexFile] => ( + walkTree (loadIndex indexFile); + OS.Process.success) + | _ => usage OS.Process.failure + (* end case *)) + + end diff --git a/smlnj-lib/Doc/tools/gen-toc/sources.cm b/smlnj-lib/Doc/tools/gen-toc/sources.cm new file mode 100644 index 0000000..68998df --- /dev/null +++ b/smlnj-lib/Doc/tools/gen-toc/sources.cm @@ -0,0 +1,19 @@ +(* sources.cm + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +Library + + structure GenTOC + +is + + $/basis.cm + $/smlnj-lib.cm + $/json-lib.cm + + ../index/sources.cm + + gen-toc.sml diff --git a/smlnj-lib/Doc/tools/index/extract-index.sml b/smlnj-lib/Doc/tools/index/extract-index.sml new file mode 100644 index 0000000..ced7868 --- /dev/null +++ b/smlnj-lib/Doc/tools/index/extract-index.sml @@ -0,0 +1,283 @@ +(* extract-index.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This module implements a tree walker that extracts index information + * from the documentation sources. + *) + +structure ExtractIndex : sig + + (* given the root directory of the documentation sources, extract + * index information. + *) + val extract : string -> FileTree.t + + end = struct + + structure FT = FileTree + structure P = OS.Path + structure RE = RegExpFn( + structure P = AwkSyntax + structure E = BackTrackEngine) + structure MT = MatchTree + structure SS = Substring + structure SIO = TextIO.StreamIO + + (* error reporting *) + fun error msg = ( + TextIO.output(TextIO.stdErr, concat("Error: " :: msg)); + TextIO.output1 (TextIO.stdErr, #"\n"); + OS.Process.exit OS.Process.failure) + + val attrRE = RE.compileString "^:!?([^!:]+)!?:(.*)" + val includeRE = RE.compileString "^include::([^.]+\\.adoc)\\[\\]" + val xrefRE = RE.compileString + "[ ]*xref:([^.]+\\.adoc)\\[([^\\]]+|`\\[\\.kw\\]#[a-z]+# [^`]+`)\\]::" + (* match the title text for a module xref *) + val pageRefRE = RE.compileString "`\\[\\.kw\\]#([a-z]+)# ([^`]+)`" + (* module specifications in the synopsis *) + val moduleRE = RE.compileString "^(signature|structure|functor)[ \t]+([A-Za-z0-9_]+)" + + (* `match re ln` attempts to match the line of text `ln` using the regular + * expression `re`. If successful, the result will be a match tree of strings + * corresponding to the matches (and submatches) of `re`. + *) + fun match re = let + val prefix = StringCvt.scanString (RE.prefix re) + fun getSubstrs s = MT.map (fn {pos, len} => String.substring(s, pos, len)) + in + fn s => Option.map (getSubstrs s) (prefix s) + end + + fun openIn (rootDir, path) = let + val file = P.concat(rootDir, path) + in + if OS.FileSys.access(file, [OS.FileSys.A_READ]) + then TextIO.openIn file + else error ["file '", file, "' does not exist or is not readable"] + end + + fun trimWS ss = SS.dropr Char.isSpace (SS.dropl Char.isSpace ss) + + (* match an asciidoctor atrribute *) + val matchAttr = match attrRE + + (* extract attribute values from lines immediately following the title *) + fun scanMeta inputStrm = let + val inS' = TextIO.getInstream inputStrm + val author = ref NONE + val keywords = ref [] + val title = ref NONE + fun trim s = SS.string(trimWS(SS.full s)) + fun scan inS = (case SIO.inputLine inS + of SOME(ln, inS') => (case matchAttr ln + of SOME(MT.Match(_, [MT.Match(a, []), MT.Match(v, [])])) => ( + case String.map Char.toLower a + of "author" => author := SOME(trim v) + | "keywords" => keywords := + List.map trim + (String.tokens (fn #"," => true | _ => false) v) + | "title" => title := SOME(trim v) + | _ => () + (* end case *); + scan inS') + | _ => inS + (* end case *)) + | NONE => inS + (* end case *)) + in + TextIO.setInstream (inputStrm, scan (TextIO.getInstream inputStrm)); + { author = !author, kws = !keywords, title = !title } + end + + (* `scanFile root path getContents processContents` scans the file + * specified by `path` using the `getContents` function to read the + * file and the `processContents` to process it. + *) + fun scanFile rootDir path getContents processContent = let + val dir = P.dir path + val stem = P.base(P.file path) + val inS = openIn (rootDir, path) + val SOME firstLn = TextIO.inputLine inS + val title = if String.isPrefix "= " firstLn + then SS.string(trimWS(SS.extract(firstLn, 2, NONE))) + else "<title>" + val meta = scanMeta inS + val contents = getContents inS + in + TextIO.closeIn inS; + FT.FILE{ + dir = dir, + stem = stem, + title = title, + meta = meta, + info = processContent contents + } + end + + (* scan the input stream until a line for which f returns `SOME v` is encountered *) + fun scanLines f inS = let + fun lp () = (case TextIO.inputLine inS + of SOME ln => (case f ln + of NONE => lp () + | someV => someV + (* end case *)) + | NONE => NONE + (* end case *)) + in + lp () + end + + (* scan the input until a line with the given prefix is encountered; this function + * returns true if it finds such a line and false otherwise. + *) + fun existsPrefix prefix = let + val isPrefix = String.isPrefix prefix + fun lp inS = (case TextIO.inputLine inS + of NONE => false + | SOME ln => isPrefix ln orelse lp inS + (* end case *)) + in + lp + end + + (* find the next "include" directive in the input stream *) + fun findInclude inS = scanLines (match includeRE) inS + + (* find the next "xref" directive in the input stream *) + fun findXRef inS = scanLines (match xrefRE) inS + + (* match a module page reference *) + val matchPageRef = match pageRefRE + + (* match a module specification in the synopsis *) + val matchModule = match moduleRE + + (* extract the modules that are listed in the "Synopsis" of a page. We first look + * for a line of the form + * + * == Synopsis + * + * and then for an SML source-code block. In the body of the source-code block + * we expect to see lines that have prefixes of the form + * + * signature NAME + * structure NAME ... + * functor NAME ... + *) + fun doPage rootDir libDir {file, info={kind, name}} = let + val pagePath = P.joinDirFile{dir = libDir, file = file} + (* skip to the SML code block in the synopsis *) + fun findSynopsis inS = + existsPrefix "== Synopsis" inS + andalso existsPrefix "[source,sml]" inS + andalso existsPrefix "----" inS + (* extract module names from the code block *) + fun getSynopsis inS = let + fun lp mods = (case TextIO.inputLine inS + of NONE => error ["unexpected EOF in synopsis"] + | SOME ln => (case matchModule ln + of SOME(MT.Match(_, [MT.Match(mk, _), MT.Match(id, _)])) => + let + val mk = (case mk + of "signature" => FT.SIGNATURE + | "structure" => FT.STRUCTURE + | "functor" => FT.FUNCTOR + | _ => error ["expected module kind"] + (* end case *)) + in + lp ((mk, id)::mods) + end + | _ => if String.isPrefix "----" ln + then List.rev mods + else lp mods + (* end case *)) + (* end case *)) + in + lp [] + end + fun scanContents inS = (case kind + of FT.OtherPage => {kind = kind, name = name, synopsis = []} + | _ => { + kind = kind, name = name, + synopsis = if findSynopsis inS + then getSynopsis inS + else [] + } + (* end case *)) + in + scanFile rootDir pagePath scanContents Fn.id + end + + (* extract the list of page files from a library document *) + fun getPagesFromLib inS = let + (* first we get the `xref` list items *) + fun getPages pages = (case findXRef inS + of SOME(MT.Match(_, [MT.Match(file, []), MT.Match(title, [])])) => ( + case matchPageRef title + of SOME(MT.Match(_, [MT.Match(kw, []), MT.Match(name, [])])) => let + val kind = (case kw + of "signature" => FT.sigPage + | "structure" => FT.structPage + | "functor" => FT.functPage + | _ => error ["**bogus keyword \"", kw, "\""] + (* end case *)) + val page = { + file = file, + info = {kind = kind, name = name} + } + in + getPages (page :: pages) + end + | _ => let (* non-module page *) + val page = { + file = file, + info = {kind = FT.OtherPage, name = title} + } + in + getPages (page :: pages) + end + (* end case *)) + | NONE => List.rev pages + | SOME(MT.Match(s, _)) => error [ + "**bogus xref \"", String.toString s, "\"" + ] + (* end case *)) + in + {pages = getPages []} + end + + (* process a library file *) + fun doLib rootDir libPath = let + val libDir = P.dir libPath + in + scanFile rootDir libPath + getPagesFromLib + (fn {pages} => {pages = List.map (doPage rootDir libDir) pages}) + end + + (* extract the list of library files from the root document *) + fun getLibsFromRoot inS = let + fun getIncludes incs = (case findInclude inS + of SOME(MT.Match(_, [MT.Match(path, [])])) => + getIncludes(path :: incs) + | NONE => List.rev incs + | SOME(MT.Match(s, _)) => error [ + "**bogus include \"", String.toString s, "\"" + ] + (* end case *)) + in + getIncludes [] + end + + fun extract rootDir = let + val rootDir = OS.FileSys.fullPath rootDir + in + scanFile rootDir "index.adoc" + getLibsFromRoot + (fn libs => {libs = List.map (doLib rootDir) libs}) + end + + end diff --git a/smlnj-lib/Doc/tools/index/file-tree.sml b/smlnj-lib/Doc/tools/index/file-tree.sml new file mode 100644 index 0000000..f053c1b --- /dev/null +++ b/smlnj-lib/Doc/tools/index/file-tree.sml @@ -0,0 +1,315 @@ +(* file-tree.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * The file tree represents the structure of the documentation as a three-level + * tree. This structure is generated by walking over the documentation source. + *) + +structure FileTree : sig + + (* meta data extracted from the attributes *) + type meta = { + author : string option, (* value of "author" attribute *) + kws : string list, (* value of "keywords" attribute *) + title : string option (* alternate title to use in document header *) + } + + (* a node in the file tree; the `info` field is used to distinguish the kind + * of node (root, library, or page). + *) + datatype 'a file = FILE of { + dir : string, (* directory relative to the root of the documentation *) + stem : string, (* file-name stem; e.g., "json-lib" *) + title : string, (* title of page *) + meta : meta, (* meta data from attributes *) + info : 'a (* information specific to the type of file in the tree *) + } + + datatype module_kind = SIGNATURE | STRUCTURE | FUNCTOR + + datatype page_kind = ModulePage of module_kind | OtherPage + + val sigPage : page_kind (* = ModulePage SIGNATURE *) + val structPage : page_kind (* = ModulePage STRUCTURE *) + val functPage : page_kind (* = ModulePage FUNCTOR *) + + (* the leaves of the tree are the pages that describe the modules *) + type page = { + kind : page_kind, (* specifies the kind of the main module + * or `OtherPage` for other material. + *) + name : string, (* the main module name or page title *) + synopsis : (module_kind * string) list + (* summary of the "Synopsis" section; will be `nil` + * for `OtherPage` pages. + *) + } file + + (* the interior nodes correspond to libraries. The info describes the pages + * that comprise the library's documentation. + *) + type library = { + pages : page list (* list of manual pages for library *) + } file + + (* the root file of the documentation; the info is the list of libraries *) + type t = {libs : library list} file + + (* test equality by name *) + val same : 'a file * 'a file -> bool + + (* is a page in a library? *) + val inLibrary : library -> page -> bool + + (* get various components of a tree node *) + val getAuthor : 'a file -> string option + val getKeywords : 'a file -> string list + val getAltTitle : 'a file -> string option + val getDir : 'a file -> string + val getStem : 'a file -> string + val getTitle : 'a file -> string + val getLibs : t -> library list + val getPages : library -> page list + + (* apply the functions to the nodes of the tree *) + val app : { + root : t -> unit, + lib : t * library -> unit, + page : t * library * page -> unit + } -> t -> unit + + (* do a walk over the tree; for interior nodes (root and libraries), + * the function is called with "true" on entry and with "false" after + * visiting the node's kids. + *) + val walk : { + root : bool * t -> unit, + lib : bool * t * library -> unit, + page : t * library * page -> unit + } -> t -> unit + + (* convert between JSON objects and the file tree datatypes *) + val fromJSON : JSON.value -> t + val toJSON : t -> JSON.value + + end = struct + + structure U = JSONUtil + + (* meta data extracted from the attributes *) + type meta = { + author : string option, (* value of "author" attribute *) + kws : string list, (* value of "keywords" attribute *) + title : string option (* alternate title to use in document header *) + } + + datatype 'a file = FILE of { + dir : string, (* directory relative to the root of the documentation *) + stem : string, (* file-name stem; e.g., "json-lib" *) + title : string, (* title of page *) + meta : meta, (* meta data from attributes *) + info : 'a (* information specific to the type of file in the tree *) + } + + datatype module_kind = SIGNATURE | STRUCTURE | FUNCTOR + + datatype page_kind = ModulePage of module_kind | OtherPage + + val sigPage = ModulePage SIGNATURE + val structPage = ModulePage STRUCTURE + val functPage = ModulePage FUNCTOR + + (* the leaves of the tree are the pages that describe the modules *) + type page = { + kind : page_kind, (* specifies the kind of the main module + * or `OtherPage` for other material. + *) + name : string, (* the main module name or page title *) + synopsis : (module_kind * string) list + (* summary of the "Synopsis" section; will be `nil` + * for `OtherPage` pages. + *) + } file + + (* the interior nodes correspond to libraries. The info describes the pages + * that comprise the library's documentation. + *) + type library = { + pages : page list (* list of manual pages for library *) + } file + + (* the root file of the documentation; the info is the list of libraries *) + type t = {libs : library list} file + + (* test equality by name *) + fun same (FILE f1, FILE f2) = (#dir f1 = #dir f2) andalso (#stem f1 = #stem f2) + + (* is a page in a library? *) + fun inLibrary (FILE lib) (FILE p) = (#dir lib = #dir p) + + (* get various components of a tree node *) + fun getAuthor (FILE{meta={author, ...}, ...}) = author + fun getKeywords (FILE{meta={kws, ...}, ...}) = kws + fun getAltTitle (FILE{meta={title, ...}, ...}) = title + fun getDir (FILE{dir, ...}) = dir + fun getStem (FILE{stem, ...}) = stem + fun getTitle (FILE{title, ...}) = title + fun getLibs (FILE{info={libs}, ...}) = libs + fun getPages (FILE{info={pages}, ...}) = pages + + fun inLibrary (FILE lib) (FILE p) = (#dir lib = #dir p) + + fun app {root, lib, page} (ft : t) = let + fun walkLib (l : library) = ( + lib (ft, l); + List.app (fn p => page(ft, l, p)) (getPages l)) + in + root ft; + List.app walkLib (getLibs ft) + end + + fun walk {root, lib, page} (ft : t) = let + fun walkLib (l : library) = ( + lib (true, ft, l); + List.app (fn p => page(ft, l, p)) (getPages l); + lib (false, ft, l)) + in + root (true, ft); + List.app walkLib (getLibs ft); + root (false, ft) + end + + datatype value = datatype JSON.value + + fun fromJSON root = let + fun jsonToMeta findField = { + author = (case findField "meta-author" + of NONE => NONE + | SOME NULL => NONE + | SOME obj => SOME(U.asString obj) + (* end case *)), + kws = (case findField "meta-keywords" + of NONE => [] + | SOME NULL => [] + | SOME kws => U.arrayMap U.asString kws + (* end case *)), + title = (case findField "meta-title" + of NONE => NONE + | SOME NULL => NONE + | SOME obj => SOME(U.asString obj) + (* end case *)) + } + fun jsonToFile obj getInfo = let + val get = U.lookupField obj + val find = U.findField obj + in + FILE{ + dir = U.asString(get "dir"), + stem = U.asString(get "stem"), + title = U.asString(get "title"), + meta = jsonToMeta find, + info = getInfo (get, find) + } + end + fun jsonToSynopsis obj = (case U.findField obj "synopsis" + of SOME(ARRAY mods) => let + fun getSpec obj = let + val name = U.lookupField obj "name" + val mk = (case U.lookupField obj "kind" + of STRING "signature" => SIGNATURE + | STRING "structure" => STRUCTURE + | STRING "functor" => FUNCTOR + | _ => raise Fail "unknown module kind" + (* end case *)) + in + (mk, U.asString name) + end + in + List.map getSpec mods + end + | _ => [] + (* end case *)) + fun jsonToPage obj = jsonToFile obj (fn (_, find) => ( + case (find "kind", find "name") + of (SOME k, SOME n) => let + val (kind, synopsis) = (case U.asString k + of "signature" => (sigPage, jsonToSynopsis obj) + | "structure" => (structPage, jsonToSynopsis obj) + | "functor" => (functPage, jsonToSynopsis obj) + | "other" => (OtherPage, []) + | s => raise Fail(concat["unknown page kind \"", s, "\""]) + (* end case *)) + in + {kind = kind, name = U.asString n, synopsis = synopsis} + end + | _ => raise Fail "misformed page" + (* end case *))) + fun jsonToLib obj = jsonToFile obj (fn (get, find) => { + pages = U.arrayMap jsonToPage (get "pages") + }) + in + jsonToFile root (fn (get, _) => { + libs = U.arrayMap jsonToLib (get "libraries") + }) + end + + fun toJSON root = let + fun metaToJSON (meta : meta, fields) = let + val fields = (case #title meta + of NONE => fields + | SOME s => ("meta-title", STRING s) :: fields + (* end case *)) + val fields = (case #kws meta + of [] => fields + | kws => ("meta-keywords", ARRAY(List.map STRING kws)) :: fields + (* end case *)) + val fields = (case #author meta + of NONE => fields + | SOME s => ("meta-author", STRING s) :: fields + (* end case *)) + in + fields + end + fun fileToJSON (FILE{dir, stem, title, meta, info}) infoToJSON = let + val fields = infoToJSON info + val fields = metaToJSON (meta, fields) + in + OBJECT( + ("dir", STRING dir) :: + ("stem", STRING stem) :: + ("title", STRING title) :: + fields) + end + fun pageToJSON page = let + fun infoToJSON {kind, name, synopsis} = let + fun modKindToJSON SIGNATURE = STRING "signature" + | modKindToJSON STRUCTURE = STRING "structure" + | modKindToJSON FUNCTOR = STRING "functor" + fun modSpecToJSON (mk, name) = OBJECT[ + ("kind", modKindToJSON mk), + ("name", STRING name) + ] + val kind = (case kind + of ModulePage SIGNATURE => "signature" + | ModulePage STRUCTURE => "structure" + | ModulePage FUNCTOR => "functor" + | OtherPage => "other" + (* end case *)) + in [ + ("kind", STRING kind), + ("name", STRING name), + ("synopsis", ARRAY(List.map modSpecToJSON synopsis)) + ] end + in + fileToJSON page infoToJSON + end + fun libToJSON lib = fileToJSON lib + (fn _ => [("pages", ARRAY(map pageToJSON (getPages lib)))]) + in + fileToJSON root + (fn {libs} => [("libraries", ARRAY(map libToJSON libs))]) + end + + end diff --git a/smlnj-lib/Doc/tools/index/sources.cm b/smlnj-lib/Doc/tools/index/sources.cm new file mode 100644 index 0000000..fd65bae --- /dev/null +++ b/smlnj-lib/Doc/tools/index/sources.cm @@ -0,0 +1,20 @@ +(* sources.cm + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +Library + + structure ExtractIndex + structure FileTree + +is + + $/basis.cm + $/smlnj-lib.cm + $/json-lib.cm + $/regexp-lib.cm + + extract-index.sml + file-tree.sml diff --git a/smlnj-lib/Doc/tools/make-index/Makefile.in b/smlnj-lib/Doc/tools/make-index/Makefile.in new file mode 100644 index 0000000..1622d08 --- /dev/null +++ b/smlnj-lib/Doc/tools/make-index/Makefile.in @@ -0,0 +1,33 @@ +# Makefile.in +# +# COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) +# All rights reserved. +# +# Makefile for make-index tool +# +# @configure_input@ +# + +SHELL = @SHELL@ +INSTALL = @INSTALL@ +@SET_MAKE@ + +TOOL_BIN = @TOOL_BIN_DIR@ +ML_BUILD = @ML_BUILD@ +HEAP_SUFFIX = @SMLNJ_HEAP_SUFFIX@ +INSTALL_WRAPPER = @ROOT_DIR@/bin/install-sml-wrapper.sh + +PROGRAM = make-index + +HEAP_IMAGE = $(PROGRAM).$(HEAP_SUFFIX) + +.PHONY: install +install: $(HEAP_IMAGE) + $(INSTALL_WRAPPER) $(PROGRAM) $(TOOL_BIN) + +$(HEAP_IMAGE): $(wildcard ../*.sml) make-index.sml + $(ML_BUILD) sources.cm MakeIndex.main $(PROGRAM) + +include @MK_DIR@/clean-rules.gmk + +CLEAN_FILES = $(HEAP_IMAGE) diff --git a/smlnj-lib/Doc/tools/make-index/make-index.sml b/smlnj-lib/Doc/tools/make-index/make-index.sml new file mode 100644 index 0000000..2af66e3 --- /dev/null +++ b/smlnj-lib/Doc/tools/make-index/make-index.sml @@ -0,0 +1,41 @@ +(* make-index.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This program is used to generate extract index information from + * the document sources and then write it out as a JSON file. + * + * Usage: + * + * make-index index.json + *) + +structure MakeIndex : sig + + val main : string * string list -> OS.Process.status + + end = struct + + fun usage sts = ( + TextIO.print "usage: make-index <outfile>\n"; + OS.Process.exit sts) + + fun main (cmd, args) = (case args + of "-h"::_ => usage OS.Process.success + | [outFile] => let + val tree = FileTree.toJSON(ExtractIndex.extract ".") + val outS = TextIO.openOut outFile + in + JSONPrinter.print' {strm = outS, pretty = true} tree; + OS.Process.success + end + | _ => usage OS.Process.failure + (* end case *)) + handle exn => ( + TextIO.output(TextIO.stdErr, concat[ + "make-index: uncaught exception (", exnMessage exn, ")" + ]); + OS.Process.failure) + + end diff --git a/smlnj-lib/Doc/tools/make-index/sources.cm b/smlnj-lib/Doc/tools/make-index/sources.cm new file mode 100644 index 0000000..1f1b1ff --- /dev/null +++ b/smlnj-lib/Doc/tools/make-index/sources.cm @@ -0,0 +1,19 @@ +(* sources.cm + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +Library + + structure MakeIndex + +is + + $/basis.cm + $/smlnj-lib.cm + $/json-lib.cm + + ../index/sources.cm + + make-index.sml diff --git a/smlnj-lib/HTML/.cm/GUID/check-html-fn.sml b/smlnj-lib/HTML/.cm/GUID/check-html-fn.sml new file mode 100644 index 0000000..b0ca8b9 --- /dev/null +++ b/smlnj-lib/HTML/.cm/GUID/check-html-fn.sml @@ -0,0 +1 @@ +guid-$/(html-lib.cm):check-html-fn.sml-1714016086.206 diff --git a/smlnj-lib/HTML/.cm/GUID/html-attr-vals.sml b/smlnj-lib/HTML/.cm/GUID/html-attr-vals.sml new file mode 100644 index 0000000..63f3e64 --- /dev/null +++ b/smlnj-lib/HTML/.cm/GUID/html-attr-vals.sml @@ -0,0 +1 @@ +guid-$/(html-lib.cm):html-attr-vals.sml-1714016084.154 diff --git a/smlnj-lib/HTML/.cm/GUID/html-attrs-fn.sml b/smlnj-lib/HTML/.cm/GUID/html-attrs-fn.sml new file mode 100644 index 0000000..55595da --- /dev/null +++ b/smlnj-lib/HTML/.cm/GUID/html-attrs-fn.sml @@ -0,0 +1 @@ +guid-$/(html-lib.cm):html-attrs-fn.sml-1714016084.171 diff --git a/smlnj-lib/HTML/.cm/GUID/html-attrs-sig.sml b/smlnj-lib/HTML/.cm/GUID/html-attrs-sig.sml new file mode 100644 index 0000000..3a42c69 --- /dev/null +++ b/smlnj-lib/HTML/.cm/GUID/html-attrs-sig.sml @@ -0,0 +1 @@ +guid-$/(html-lib.cm):html-attrs-sig.sml-1714016084.160 diff --git a/smlnj-lib/HTML/.cm/GUID/html-defaults.sml b/smlnj-lib/HTML/.cm/GUID/html-defaults.sml new file mode 100644 index 0000000..2aa5566 --- /dev/null +++ b/smlnj-lib/HTML/.cm/GUID/html-defaults.sml @@ -0,0 +1 @@ +guid-$/(html-lib.cm):html-defaults.sml-1714016084.145 diff --git a/smlnj-lib/HTML/.cm/GUID/html-elements-fn.sml b/smlnj-lib/HTML/.cm/GUID/html-elements-fn.sml new file mode 100644 index 0000000..2c7284b --- /dev/null +++ b/smlnj-lib/HTML/.cm/GUID/html-elements-fn.sml @@ -0,0 +1 @@ +guid-$/(html-lib.cm):html-elements-fn.sml-1714016085.742 diff --git a/smlnj-lib/HTML/.cm/GUID/html-error-sig.sml b/smlnj-lib/HTML/.cm/GUID/html-error-sig.sml new file mode 100644 index 0000000..5b521bb --- /dev/null +++ b/smlnj-lib/HTML/.cm/GUID/html-error-sig.sml @@ -0,0 +1 @@ +guid-$/(html-lib.cm):html-error-sig.sml-1714016083.610 diff --git a/smlnj-lib/HTML/.cm/GUID/html-gram.sig b/smlnj-lib/HTML/.cm/GUID/html-gram.sig new file mode 100644 index 0000000..a93044a --- /dev/null +++ b/smlnj-lib/HTML/.cm/GUID/html-gram.sig @@ -0,0 +1 @@ +guid-$/(html-lib.cm):html-gram.sig-1714016084.481 diff --git a/smlnj-lib/HTML/.cm/GUID/html-gram.sml b/smlnj-lib/HTML/.cm/GUID/html-gram.sml new file mode 100644 index 0000000..4ad3977 --- /dev/null +++ b/smlnj-lib/HTML/.cm/GUID/html-gram.sml @@ -0,0 +1 @@ +guid-$/(html-lib.cm):html-gram.sml-1714016084.492 diff --git a/smlnj-lib/HTML/.cm/GUID/html-lex.sml b/smlnj-lib/HTML/.cm/GUID/html-lex.sml new file mode 100644 index 0000000..33ca089 --- /dev/null +++ b/smlnj-lib/HTML/.cm/GUID/html-lex.sml @@ -0,0 +1 @@ +guid-$/(html-lib.cm):html-lex.sml-1714016086.014 diff --git a/smlnj-lib/HTML/.cm/GUID/html-parser-fn.sml b/smlnj-lib/HTML/.cm/GUID/html-parser-fn.sml new file mode 100644 index 0000000..fe7ad87 --- /dev/null +++ b/smlnj-lib/HTML/.cm/GUID/html-parser-fn.sml @@ -0,0 +1 @@ +guid-$/(html-lib.cm):html-parser-fn.sml-1714016086.259 diff --git a/smlnj-lib/HTML/.cm/GUID/html-sig.sml b/smlnj-lib/HTML/.cm/GUID/html-sig.sml new file mode 100644 index 0000000..9dc8f79 --- /dev/null +++ b/smlnj-lib/HTML/.cm/GUID/html-sig.sml @@ -0,0 +1 @@ +guid-$/(html-lib.cm):html-sig.sml-1714016083.615 diff --git a/smlnj-lib/HTML/.cm/GUID/html.sml b/smlnj-lib/HTML/.cm/GUID/html.sml new file mode 100644 index 0000000..6132486 --- /dev/null +++ b/smlnj-lib/HTML/.cm/GUID/html.sml @@ -0,0 +1 @@ +guid-$/(html-lib.cm):html.sml-1714016083.660 diff --git a/smlnj-lib/HTML/.cm/GUID/make-html.sml b/smlnj-lib/HTML/.cm/GUID/make-html.sml new file mode 100644 index 0000000..9127cf8 --- /dev/null +++ b/smlnj-lib/HTML/.cm/GUID/make-html.sml @@ -0,0 +1 @@ +guid-$/(html-lib.cm):make-html.sml-1714016084.108 diff --git a/smlnj-lib/HTML/.cm/GUID/pr-html.sml b/smlnj-lib/HTML/.cm/GUID/pr-html.sml new file mode 100644 index 0000000..97efbeb --- /dev/null +++ b/smlnj-lib/HTML/.cm/GUID/pr-html.sml @@ -0,0 +1 @@ +guid-$/(html-lib.cm):pr-html.sml-1714016083.739 diff --git a/smlnj-lib/HTML/.cm/SKEL/check-html-fn.sml b/smlnj-lib/HTML/.cm/SKEL/check-html-fn.sml new file mode 100644 index 0000000..b0421c6 --- /dev/null +++ b/smlnj-lib/HTML/.cm/SKEL/check-html-fn.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ae"CheckHTMLFn"i2aErr"gp1c"HTML_ERROR"f4d"Format"d"List"d"HTML"j0 \ No newline at end of file diff --git a/smlnj-lib/HTML/.cm/SKEL/html-attr-vals.sml b/smlnj-lib/HTML/.cm/SKEL/html-attr-vals.sml new file mode 100644 index 0000000..f6211f6 --- /dev/null +++ b/smlnj-lib/HTML/.cm/SKEL/html-attr-vals.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ad"HTMLAttrVals"h0 \ No newline at end of file diff --git a/smlnj-lib/HTML/.cm/SKEL/html-attrs-fn.sml b/smlnj-lib/HTML/.cm/SKEL/html-attrs-fn.sml new file mode 100644 index 0000000..b75e390 --- /dev/null +++ b/smlnj-lib/HTML/.cm/SKEL/html-attrs-fn.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ae"HTMLAttrsFn"i1aErr"gp1c"HTML_ERROR"jh3egp1d"HTMLAttrVals"f8d"HashString"d"Char"d"List"Cd"Int"d"HTML"d"String"d"Array"Nad"HTbl"jh0gp1e"HashTableFn"gp1c"HTML_ATTRS" \ No newline at end of file diff --git a/smlnj-lib/HTML/.cm/SKEL/html-attrs-sig.sml b/smlnj-lib/HTML/.cm/SKEL/html-attrs-sig.sml new file mode 100644 index 0000000..9223acd --- /dev/null +++ b/smlnj-lib/HTML/.cm/SKEL/html-attrs-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f2d"HTML"d"HTMLAttrVals"ac"HTML_ATTRS"h0 \ No newline at end of file diff --git a/smlnj-lib/HTML/.cm/SKEL/html-defaults.sml b/smlnj-lib/HTML/.cm/SKEL/html-defaults.sml new file mode 100644 index 0000000..544f2eb --- /dev/null +++ b/smlnj-lib/HTML/.cm/SKEL/html-defaults.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"HTML"ad"HTMLDefaults"h0 \ No newline at end of file diff --git a/smlnj-lib/HTML/.cm/SKEL/html-elements-fn.sml b/smlnj-lib/HTML/.cm/SKEL/html-elements-fn.sml new file mode 100644 index 0000000..c4a6bb5 --- /dev/null +++ b/smlnj-lib/HTML/.cm/SKEL/html-elements-fn.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ae"HTMLElementsFn"i4aTokens"HTML_TOKENS"aErr"gp1c"HTML_ERROR"aHTMLAttrs"gp1c"HTML_ATTRS"f5Cd"HashString"d"Char"d"List"d"String"0Njh4aT"gp1ad"A"gp1ad"HTbl"jh0gp1e"HashTableFn"ad"SS"gp1d"Substring"h1a \ No newline at end of file diff --git a/smlnj-lib/HTML/.cm/SKEL/html-error-sig.sml b/smlnj-lib/HTML/.cm/SKEL/html-error-sig.sml new file mode 100644 index 0000000..ef1c2a5 --- /dev/null +++ b/smlnj-lib/HTML/.cm/SKEL/html-error-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ac"HTML_ERROR"h0 \ No newline at end of file diff --git a/smlnj-lib/HTML/.cm/SKEL/html-gram.sig b/smlnj-lib/HTML/.cm/SKEL/html-gram.sig new file mode 100644 index 0000000..000748c --- /dev/null +++ b/smlnj-lib/HTML/.cm/SKEL/html-gram.sig @@ -0,0 +1,2 @@ +Skeleton 5 +d3f1d"HTMLAttrVals"aHTML_TOKENS"h0ac"HTML_LRVALS"h2ad"Tokens"gp1ad"ParserData"gp1c"PARSER_DATA" \ No newline at end of file diff --git a/smlnj-lib/HTML/.cm/SKEL/html-gram.sml b/smlnj-lib/HTML/.cm/SKEL/html-gram.sml new file mode 100644 index 0000000..7872fab --- /dev/null +++ b/smlnj-lib/HTML/.cm/SKEL/html-gram.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ae"HTMLLrValsFn"i3aToken"gp1c"TOKEN"aHTMLAttrs"gp1c"HTML_ATTRS"f3List"HTML"h2ad"ParserData"h7aHeader"0aLrTable"gp24Cagp1b4f6d"Char"C4d"General"d"String"d"Array"Nb(d2f3 d"HTMLAttrVals"'aMlyValue"1ad"EC"h2bf2$'ad"Actions"h2bf44 $'f1$Nad"Tokens"j1gp1c"HTML_TOKENS" \ No newline at end of file diff --git a/smlnj-lib/HTML/.cm/SKEL/html-lex.sml b/smlnj-lib/HTML/.cm/SKEL/html-lex.sml new file mode 100644 index 0000000..27fb9b9 --- /dev/null +++ b/smlnj-lib/HTML/.cm/SKEL/html-lex.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ae"HTMLLexFn"i4aTokens"gp1c"HTML_TOKENS"aErr"gp1c"HTML_ERROR"aHTMLAttrs"gp1c"HTML_ATTRS"f6Char"CList"String"TextIO"Vector"Nh3ayyInput"jh3aTIO"gp1:ad"TSIO"gp2d"StreamIO"ad"TPIO"gp1d"TextPrimIO"h0aUserDeclarations"h2aT"aElems"jh3a8a+gp1+agp1gp1e"HTMLElementsFn"bbd2egp1f9d"IO" C4(>/Nf0f1 \ No newline at end of file diff --git a/smlnj-lib/HTML/.cm/SKEL/html-parser-fn.sml b/smlnj-lib/HTML/.cm/SKEL/html-parser-fn.sml new file mode 100644 index 0000000..1cd0c73 --- /dev/null +++ b/smlnj-lib/HTML/.cm/SKEL/html-parser-fn.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ae"HTMLParserFn"i2aErr"gp1c"HTML_ERROR"f2d"HTML"jh6ad"TIO"gp1d"TextIO"CaHTMLAttrs"jgp1e"HTMLAttrsFn"aHTMLLrVals"jh2aToken"gp2LrParser" gp1e"HTMLLrValsFn"aLex"jh3aaTokens"gp20gp1e"HTMLLexFn"ad"Parser"jh3a7gp17a gp1 aParserData"gp20 gp1e"JoinWithArg"ad"CheckHTML"jgp1e"CheckHTMLFn"Nh0 \ No newline at end of file diff --git a/smlnj-lib/HTML/.cm/SKEL/html-sig.sml b/smlnj-lib/HTML/.cm/SKEL/html-sig.sml new file mode 100644 index 0000000..ef82071 --- /dev/null +++ b/smlnj-lib/HTML/.cm/SKEL/html-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ac"HTML"h9ad"HttpMethod"0ad"InputType"ad"IAlign"ad"HAlign"Cad"CellVAlign"ad"CaptionAlign"ad"ULStyle"ad"Shape"ad"TextFlowCtl"N \ No newline at end of file diff --git a/smlnj-lib/HTML/.cm/SKEL/html.sml b/smlnj-lib/HTML/.cm/SKEL/html.sml new file mode 100644 index 0000000..b844d3d --- /dev/null +++ b/smlnj-lib/HTML/.cm/SKEL/html.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f4d"Char"d"List"d"General"d"String"adHTML"jh9ad"HttpMethod"0ad"InputType">ad"IAlign">ad"HAlign">Cad"CellVAlign">ad"CaptionAlign">ad"ULStyle">ad"Shape">ad"TextFlowCtl">Ngp1c' \ No newline at end of file diff --git a/smlnj-lib/HTML/.cm/SKEL/make-html.sml b/smlnj-lib/HTML/.cm/SKEL/make-html.sml new file mode 100644 index 0000000..e5da5e5 --- /dev/null +++ b/smlnj-lib/HTML/.cm/SKEL/make-html.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"HTML"ad"MakeHTML"j0 \ No newline at end of file diff --git a/smlnj-lib/HTML/.cm/SKEL/pr-html.sml b/smlnj-lib/HTML/.cm/SKEL/pr-html.sml new file mode 100644 index 0000000..67716ca --- /dev/null +++ b/smlnj-lib/HTML/.cm/SKEL/pr-html.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f4d"ListFormat"d"List"d"Int"HTML"ad"PrHTML"jh2ad"H"gp1ad"F"gp1d"Format"h0 \ No newline at end of file diff --git a/smlnj-lib/HTML/.cm/amd64-unix/check-html-fn.sml b/smlnj-lib/HTML/.cm/amd64-unix/check-html-fn.sml new file mode 100644 index 0000000..261a1ab Binary files /dev/null and b/smlnj-lib/HTML/.cm/amd64-unix/check-html-fn.sml differ diff --git a/smlnj-lib/HTML/.cm/amd64-unix/html-attr-vals.sml b/smlnj-lib/HTML/.cm/amd64-unix/html-attr-vals.sml new file mode 100644 index 0000000..d5394a3 Binary files /dev/null and b/smlnj-lib/HTML/.cm/amd64-unix/html-attr-vals.sml differ diff --git a/smlnj-lib/HTML/.cm/amd64-unix/html-attrs-fn.sml b/smlnj-lib/HTML/.cm/amd64-unix/html-attrs-fn.sml new file mode 100644 index 0000000..8ebc59f Binary files /dev/null and b/smlnj-lib/HTML/.cm/amd64-unix/html-attrs-fn.sml differ diff --git a/smlnj-lib/HTML/.cm/amd64-unix/html-attrs-sig.sml b/smlnj-lib/HTML/.cm/amd64-unix/html-attrs-sig.sml new file mode 100644 index 0000000..4959a5e Binary files /dev/null and b/smlnj-lib/HTML/.cm/amd64-unix/html-attrs-sig.sml differ diff --git a/smlnj-lib/HTML/.cm/amd64-unix/html-defaults.sml b/smlnj-lib/HTML/.cm/amd64-unix/html-defaults.sml new file mode 100644 index 0000000..1400ec0 Binary files /dev/null and b/smlnj-lib/HTML/.cm/amd64-unix/html-defaults.sml differ diff --git a/smlnj-lib/HTML/.cm/amd64-unix/html-elements-fn.sml b/smlnj-lib/HTML/.cm/amd64-unix/html-elements-fn.sml new file mode 100644 index 0000000..01f1295 Binary files /dev/null and b/smlnj-lib/HTML/.cm/amd64-unix/html-elements-fn.sml differ diff --git a/smlnj-lib/HTML/.cm/amd64-unix/html-error-sig.sml b/smlnj-lib/HTML/.cm/amd64-unix/html-error-sig.sml new file mode 100644 index 0000000..cc6f97d Binary files /dev/null and b/smlnj-lib/HTML/.cm/amd64-unix/html-error-sig.sml differ diff --git a/smlnj-lib/HTML/.cm/amd64-unix/html-gram.sig b/smlnj-lib/HTML/.cm/amd64-unix/html-gram.sig new file mode 100644 index 0000000..a2ab446 Binary files /dev/null and b/smlnj-lib/HTML/.cm/amd64-unix/html-gram.sig differ diff --git a/smlnj-lib/HTML/.cm/amd64-unix/html-gram.sml b/smlnj-lib/HTML/.cm/amd64-unix/html-gram.sml new file mode 100644 index 0000000..2069afc Binary files /dev/null and b/smlnj-lib/HTML/.cm/amd64-unix/html-gram.sml differ diff --git a/smlnj-lib/HTML/.cm/amd64-unix/html-lex.sml b/smlnj-lib/HTML/.cm/amd64-unix/html-lex.sml new file mode 100644 index 0000000..6251e93 Binary files /dev/null and b/smlnj-lib/HTML/.cm/amd64-unix/html-lex.sml differ diff --git a/smlnj-lib/HTML/.cm/amd64-unix/html-parser-fn.sml b/smlnj-lib/HTML/.cm/amd64-unix/html-parser-fn.sml new file mode 100644 index 0000000..2410074 Binary files /dev/null and b/smlnj-lib/HTML/.cm/amd64-unix/html-parser-fn.sml differ diff --git a/smlnj-lib/HTML/.cm/amd64-unix/html-sig.sml b/smlnj-lib/HTML/.cm/amd64-unix/html-sig.sml new file mode 100644 index 0000000..defec10 Binary files /dev/null and b/smlnj-lib/HTML/.cm/amd64-unix/html-sig.sml differ diff --git a/smlnj-lib/HTML/.cm/amd64-unix/html.sml b/smlnj-lib/HTML/.cm/amd64-unix/html.sml new file mode 100644 index 0000000..5a255ed Binary files /dev/null and b/smlnj-lib/HTML/.cm/amd64-unix/html.sml differ diff --git a/smlnj-lib/HTML/.cm/amd64-unix/make-html.sml b/smlnj-lib/HTML/.cm/amd64-unix/make-html.sml new file mode 100644 index 0000000..299c020 Binary files /dev/null and b/smlnj-lib/HTML/.cm/amd64-unix/make-html.sml differ diff --git a/smlnj-lib/HTML/.cm/amd64-unix/pr-html.sml b/smlnj-lib/HTML/.cm/amd64-unix/pr-html.sml new file mode 100644 index 0000000..93d4b5e Binary files /dev/null and b/smlnj-lib/HTML/.cm/amd64-unix/pr-html.sml differ diff --git a/smlnj-lib/HTML/HTML32.dtd b/smlnj-lib/HTML/HTML32.dtd new file mode 100644 index 0000000..04e60d3 --- /dev/null +++ b/smlnj-lib/HTML/HTML32.dtd @@ -0,0 +1,597 @@ +<!-- + W3C Document Type Definition for the HyperText Markup Language + version 3.2 as ratified by a vote of W3C member companies. + For more information on W3C look at URL http://www.w3.org/ + + Date: Tuesday January 14th 1997 + + Author: Dave Raggett <dsr@w3.org> + + HTML 3.2 aims to capture recommended practice as of early '96 + and as such to be used as a replacement for HTML 2.0 (RFC 1866). + Widely deployed rendering attributes are included where they + have been shown to be interoperable. SCRIPT and STYLE are + included to smooth the introduction of client-side scripts + and style sheets. Browsers must avoid showing the contents + of these element Otherwise support for them is not required. + ID, CLASS and STYLE attributes are not included in this version + of HTML. +--> + +<!ENTITY % HTML.Version + "-//W3C//DTD HTML 3.2 Final//EN" + + -- Typical usage: + + <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN"> + <html> + ... + </html> + -- + > + +<!--================== Deprecated Features Switch =========================--> + +<!ENTITY % HTML.Deprecated "INCLUDE"> + +<!--================== Imported Names =====================================--> + +<!ENTITY % Content-Type "CDATA" + -- meaning a MIME content type, as per RFC1521 + --> + +<!ENTITY % HTTP-Method "GET | POST" + -- as per HTTP specification + --> + +<!ENTITY % URL "CDATA" + -- The term URL means a CDATA attribute + whose value is a Uniform Resource Locator, + See RFC1808 (June 95) and RFC1738 (Dec 94). + --> + +<!-- Parameter Entities --> + +<!ENTITY % head.misc "SCRIPT|STYLE|META|LINK" -- repeatable head elements --> + +<!ENTITY % heading "H1|H2|H3|H4|H5|H6"> + +<!ENTITY % list "UL | OL | DIR | MENU"> + +<![ %HTML.Deprecated [ + <!ENTITY % preformatted "PRE | XMP | LISTING"> +]]> + +<!ENTITY % preformatted "PRE"> + +<!--================ Character mnemonic entities ==========================--> + +<!ENTITY % ISOlat1 PUBLIC + "ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML"> +%ISOlat1; + +<!--================ Entities for special symbols =========================--> +<!-- &trade and &cbsp are not widely deployed and so not included here --> + +<!ENTITY amp CDATA "&" -- ampersand --> +<!ENTITY gt CDATA ">" -- greater than --> +<!ENTITY lt CDATA "<" -- less than --> + +<!--=================== Text Markup =======================================--> + +<!ENTITY % font "TT | I | B | U | STRIKE | BIG | SMALL | SUB | SUP"> + +<!ENTITY % phrase "EM | STRONG | DFN | CODE | SAMP | KBD | VAR | CITE"> + +<!ENTITY % special "A | IMG | APPLET | FONT | BASEFONT | BR | SCRIPT | MAP"> + +<!ENTITY % form "INPUT | SELECT | TEXTAREA"> + +<!ENTITY % text "#PCDATA | %font | %phrase | %special | %form"> + +<!ELEMENT (%font|%phrase) - - (%text)*> + +<!-- there are also 16 widely known color names although + the resulting colors are implementation dependent: + + aqua, black, blue, fuchsia, gray, green, lime, maroon, + navy, olive, purple, red, silver, teal, white, and yellow + + These colors were originally picked as being the standard + 16 colors supported with the Windows VGA palette. + --> + +<!ELEMENT FONT - - (%text)* -- local change to font --> +<!ATTLIST FONT + size CDATA #IMPLIED -- [+]nn e.g. size="+1", size=4 -- + color CDATA #IMPLIED -- #RRGGBB in hex, e.g. red: color="#FF0000" -- + > + +<!ELEMENT BASEFONT - O EMPTY -- base font size (1 to 7)--> +<!ATTLIST BASEFONT + size CDATA #IMPLIED -- e.g. size=3 -- + > + +<!ELEMENT BR - O EMPTY -- forced line break --> +<!ATTLIST BR + clear (left|all|right|none) none -- control of text flow -- + > + +<!--================== HTML content models ================================--> +<!-- + HTML has three basic content models: + + %text character level elements and text strings + %flow block-like elements e.g. paragraphs and lists + %bodytext as %flow plus headers H1-H6 and ADDRESS +--> + +<!ENTITY % block + "P | %list | %preformatted | DL | DIV | CENTER | + BLOCKQUOTE | FORM | ISINDEX | HR | TABLE"> + +<!-- %flow is used for DD and LI --> + +<!ENTITY % flow "(%text | %block)*"> + +<!--=================== Document Body =====================================--> + +<!ENTITY % body.content "(%heading | %text | %block | ADDRESS)*"> + +<!ENTITY % color "CDATA" -- a color specification: #HHHHHH @@ details? --> + +<!ENTITY % body-color-attrs " + bgcolor %color #IMPLIED + text %color #IMPLIED + link %color #IMPLIED + vlink %color #IMPLIED + alink %color #IMPLIED + "> + +<!ELEMENT BODY O O %body.content> +<!ATTLIST BODY + background %URL #IMPLIED -- texture tile for document background -- + %body-color-attrs; -- bgcolor, text, link, vlink, alink -- + > + +<!ENTITY % address.content "((%text;) | P)*"> + +<!ELEMENT ADDRESS - - %address.content> + +<!ELEMENT DIV - - %body.content> +<!ATTLIST DIV + align (left|center|right) #IMPLIED -- alignment of following text -- + > + +<!-- CENTER is a shorthand for DIV with ALIGN=CENTER --> +<!ELEMENT center - - %body.content> + +<!--================== The Anchor Element =================================--> + +<!ELEMENT A - - (%text)* -(A)> +<!ATTLIST A + name CDATA #IMPLIED -- named link end -- + href %URL #IMPLIED -- URL for linked resource -- + rel CDATA #IMPLIED -- forward link types -- + rev CDATA #IMPLIED -- reverse link types -- + title CDATA #IMPLIED -- advisory title string -- + > + +<!--================== Client-side image maps ============================--> + +<!-- These can be placed in the same document or grouped in a + separate document although this isn't yet widely supported --> + +<!ENTITY % SHAPE "(rect|circle|poly)"> +<!ENTITY % COORDS "CDATA" -- comma separated list of numbers --> + +<!ELEMENT MAP - - (AREA)*> +<!ATTLIST MAP + name CDATA #IMPLIED + > + +<!ELEMENT AREA - O EMPTY> +<!ATTLIST AREA + shape %SHAPE rect + coords %COORDS #IMPLIED -- defines coordinates for shape -- + href %URL #IMPLIED -- this region acts as hypertext link -- + nohref (nohref) #IMPLIED -- this region has no action -- + alt CDATA #REQUIRED -- needed for non-graphical user agents -- + > + +<!--================== The LINK Element ==================================--> + +<!ENTITY % Types "CDATA" + -- See Internet Draft: draft-ietf-html-relrev-00.txt + LINK has been part of HTML since the early days + although few browsers as yet take advantage of it. + + Relationship values can be used in principle: + + a) for document specific toolbars/menus when used + with the LINK element in the document head: + b) to link to a separate style sheet + c) to make a link to a script + d) by stylesheets to control how collections of + html nodes are rendered into printed documents + e) to make a link to a printable version of this document + e.g. a postscript or pdf version +--> + +<!ELEMENT LINK - O EMPTY> +<!ATTLIST LINK + href %URL #IMPLIED -- URL for linked resource -- + rel %Types #IMPLIED -- forward link types -- + rev %Types #IMPLIED -- reverse link types -- + title CDATA #IMPLIED -- advisory title string -- + > + +<!--=================== Images ============================================--> + +<!ENTITY % Length "CDATA" -- nn for pixels or nn% for percentage length --> +<!ENTITY % Pixels "NUMBER" -- integer representing length in pixels --> + +<!-- Suggested widths are used for negotiating image size + with the module responsible for painting the image. + align=left or right cause image to float to margin + and for subsequent text to wrap around image --> + +<!ENTITY % IAlign "(top|middle|bottom|left|right)"> + +<!ELEMENT IMG - O EMPTY -- Embedded image --> +<!ATTLIST IMG + src %URL #REQUIRED -- URL of image to embed -- + alt CDATA #IMPLIED -- for display in place of image -- + align %IAlign #IMPLIED -- vertical or horizontal alignment -- + height %Pixels #IMPLIED -- suggested height in pixels -- + width %Pixels #IMPLIED -- suggested width in pixels -- + border %Pixels #IMPLIED -- suggested link border width -- + hspace %Pixels #IMPLIED -- suggested horizontal gutter -- + vspace %Pixels #IMPLIED -- suggested vertical gutter -- + usemap %URL #IMPLIED -- use client-side image map -- + ismap (ismap) #IMPLIED -- use server image map -- + > + +<!-- USEMAP points to a MAP element which may be in this document + or an external document, although the latter is not widely supported --> + +<!--=================== Java APPLET tag ===================================--> +<!-- + This tag is supported by all Java enabled browsers. Applet resources + (including their classes) are normally loaded relative to the document + URL (or <BASE> element if it is defined). The CODEBASE attribute is used + to change this default behavior. If the CODEBASE attribute is defined then + it specifies a different location to find applet resources. The value + can be an absolute URL or a relative URL. The absolute URL is used as is + without modification and is not effected by the documents <BASE> element. + When the codebase attribute is relative, then it is relative to the + document URL (or <BASE> tag if defined). +--> +<!ELEMENT APPLET - - (PARAM | %text)*> +<!ATTLIST APPLET + codebase %URL #IMPLIED -- code base -- + code CDATA #REQUIRED -- class file -- + alt CDATA #IMPLIED -- for display in place of applet -- + name CDATA #IMPLIED -- applet name -- + width %Pixels #REQUIRED -- suggested width in pixels -- + height %Pixels #REQUIRED -- suggested height in pixels -- + align %IAlign #IMPLIED -- vertical or horizontal alignment -- + hspace %Pixels #IMPLIED -- suggested horizontal gutter -- + vspace %Pixels #IMPLIED -- suggested vertical gutter -- + > + +<!ELEMENT PARAM - O EMPTY> +<!ATTLIST PARAM + name NMTOKEN #REQUIRED -- The name of the parameter -- + value CDATA #IMPLIED -- The value of the parameter -- + > + +<!-- +Here is an example: + + <applet codebase="applets/NervousText" + code=NervousText.class + width=300 + height=50> + <param name=text value="Java is Cool!"> + <img src=sorry.gif alt="This looks better with Java support"> + </applet> +--> + +<!--=================== Horizontal Rule ===================================--> + +<!ELEMENT HR - O EMPTY> +<!ATTLIST HR + align (left|right|center) #IMPLIED + noshade (noshade) #IMPLIED + size %Pixels #IMPLIED + width %Length #IMPLIED + > +<!--=================== Paragraphs=========================================--> + +<!ELEMENT P - O (%text)*> +<!ATTLIST P + align (left|center|right) #IMPLIED + > + +<!--=================== Headings ==========================================--> + +<!-- + There are six levels of headers from H1 (the most important) + to H6 (the least important). +--> + +<!ELEMENT ( %heading ) - - (%text;)*> +<!ATTLIST ( %heading ) + align (left|center|right) #IMPLIED + > + +<!--=================== Preformatted Text =================================--> + +<!-- excludes images and changes in font size --> + +<!ENTITY % pre.exclusion "IMG|BIG|SMALL|SUB|SUP|FONT"> + +<!ELEMENT PRE - - (%text)* -(%pre.exclusion)> +<!ATTLIST PRE + width NUMBER #implied -- is this widely supported? -- + > + +<![ %HTML.Deprecated [ + +<!ENTITY % literal "CDATA" + -- historical, non-conforming parsing mode where + the only markup signal is the end tag + in full + --> + +<!ELEMENT (XMP|LISTING) - - %literal> +<!ELEMENT PLAINTEXT - O %literal> + +]]> + +<!--=================== Block-like Quotes =================================--> + +<!ELEMENT BLOCKQUOTE - - %body.content> + +<!--=================== Lists =============================================--> + +<!-- + HTML 3.2 allows you to control the sequence number for ordered lists. + You can set the sequence number with the START and VALUE attributes. + The TYPE attribute may be used to specify the rendering of ordered + and unordered lists. +--> + +<!-- definition lists - DT for term, DD for its definition --> + +<!ELEMENT DL - - (DT|DD)+> +<!ATTLIST DL + compact (compact) #IMPLIED -- more compact style -- + > + +<!ELEMENT DT - O (%text)*> +<!ELEMENT DD - O %flow;> + +<!-- Ordered lists OL, and unordered lists UL --> +<!ELEMENT (OL|UL) - - (LI)+> + +<!-- + Numbering style + 1 arablic numbers 1, 2, 3, ... + a lower alpha a, b, c, ... + A upper alpha A, B, C, ... + i lower roman i, ii, iii, ... + I upper roman I, II, III, ... + + The style is applied to the sequence number which by default + is reset to 1 for the first list item in an ordered list. + + This can't be expressed directly in SGML due to case folding. +--> + +<!ENTITY % OLStyle "CDATA" -- constrained to: [1|a|A|i|I] --> + +<!ATTLIST OL -- ordered lists -- + type %OLStyle #IMPLIED -- numbering style -- + start NUMBER #IMPLIED -- starting sequence number -- + compact (compact) #IMPLIED -- reduced interitem spacing -- + > + +<!-- bullet styles --> + +<!ENTITY % ULStyle "disc|square|circle"> + +<!ATTLIST UL -- unordered lists -- + type (%ULStyle) #IMPLIED -- bullet style -- + compact (compact) #IMPLIED -- reduced interitem spacing -- + > + +<!ELEMENT (DIR|MENU) - - (LI)+ -(%block)> +<!ATTLIST DIR + compact (compact) #IMPLIED + > +<!ATTLIST MENU + compact (compact) #IMPLIED + > + +<!-- <DIR> Directory list --> +<!-- <DIR COMPACT> Compact list style --> +<!-- <MENU> Menu list --> +<!-- <MENU COMPACT> Compact list style --> + +<!-- The type attribute can be used to change the bullet style + in unordered lists and the numbering style in ordered lists --> + +<!ENTITY % LIStyle "CDATA" -- constrained to: "(%ULStyle|%OLStyle)" --> + +<!ELEMENT LI - O %flow -- list item --> +<!ATTLIST LI + type %LIStyle #IMPLIED -- list item style -- + value NUMBER #IMPLIED -- reset sequence number -- + > + +<!--================ Forms ===============================================--> + +<!ELEMENT FORM - - %body.content -(FORM)> +<!ATTLIST FORM + action %URL #IMPLIED -- server-side form handler -- + method (%HTTP-Method) GET -- see HTTP specification -- + enctype %Content-Type; "application/x-www-form-urlencoded" + > + +<!ENTITY % InputType + "(TEXT | PASSWORD | CHECKBOX | RADIO | SUBMIT + | RESET | FILE | HIDDEN | IMAGE)"> + +<!ELEMENT INPUT - O EMPTY> +<!ATTLIST INPUT + type %InputType TEXT -- what kind of widget is needed -- + name CDATA #IMPLIED -- required for all but submit and reset -- + value CDATA #IMPLIED -- required for radio and checkboxes -- + checked (checked) #IMPLIED -- for radio buttons and check boxes -- + size CDATA #IMPLIED -- specific to each type of field -- + maxlength NUMBER #IMPLIED -- max chars allowed in text fields -- + src %URL #IMPLIED -- for fields with background images -- + align %IAlign #IMPLIED -- vertical or horizontal alignment -- + > + +<!ELEMENT SELECT - - (OPTION+)> +<!ATTLIST SELECT + name CDATA #REQUIRED + size NUMBER #IMPLIED + multiple (multiple) #IMPLIED + > + +<!ELEMENT OPTION - O (#PCDATA)*> +<!ATTLIST OPTION + selected (selected) #IMPLIED + value CDATA #IMPLIED -- defaults to element content -- + > + +<!-- Multi-line text input field. --> + +<!ELEMENT TEXTAREA - - (#PCDATA)*> +<!ATTLIST TEXTAREA + name CDATA #REQUIRED + rows NUMBER #REQUIRED + cols NUMBER #REQUIRED + > + +<!--======================= Tables ========================================--> + +<!-- Widely deployed subset of the full table standard, see RFC 1942 + e.g. at http://www.ics.uci.edu/pub/ietf/html/rfc1942.txt --> + +<!-- horizontal placement of table relative to window --> +<!ENTITY % Where "(left|center|right)"> + +<!-- horizontal alignment attributes for cell contents --> +<!ENTITY % cell.halign + "align (left|center|right) #IMPLIED" + > + +<!-- vertical alignment attributes for cell contents --> +<!ENTITY % cell.valign + "valign (top|middle|bottom) #IMPLIED" + > + +<!ELEMENT table - - (caption?, tr+)> +<!ELEMENT tr - O (th|td)*> +<!ELEMENT (th|td) - O %body.content> + +<!ATTLIST table -- table element -- + align %Where; #IMPLIED -- table position relative to window -- + width %Length #IMPLIED -- table width relative to window -- + border %Pixels #IMPLIED -- controls frame width around table -- + cellspacing %Pixels #IMPLIED -- spacing between cells -- + cellpadding %Pixels #IMPLIED -- spacing within cells -- + > + +<!ELEMENT CAPTION - - (%text;)* -- table or figure caption --> +<!ATTLIST CAPTION + align (top|bottom) #IMPLIED + > + +<!ATTLIST tr -- table row -- + %cell.halign; -- horizontal alignment in cells -- + %cell.valign; -- vertical alignment in cells -- + > + +<!ATTLIST (th|td) -- header or data cell -- + nowrap (nowrap) #IMPLIED -- suppress word wrap -- + rowspan NUMBER 1 -- number of rows spanned by cell -- + colspan NUMBER 1 -- number of cols spanned by cell -- + %cell.halign; -- horizontal alignment in cell -- + %cell.valign; -- vertical alignment in cell -- + width %Pixels #IMPLIED -- suggested width for cell -- + height %Pixels #IMPLIED -- suggested height for cell -- + > + +<!--================ Document Head ========================================--> + +<!-- %head.misc defined earlier on as "SCRIPT|STYLE|META|LINK" --> + +<!ENTITY % head.content "TITLE & ISINDEX? & BASE?"> + +<!ELEMENT HEAD O O (%head.content) +(%head.misc)> + +<!ELEMENT TITLE - - (#PCDATA)* -(%head.misc) + -- The TITLE element is not considered part of the flow of text. + It should be displayed, for example as the page header or + window title. + --> + +<!ELEMENT ISINDEX - O EMPTY> +<!ATTLIST ISINDEX + prompt CDATA #IMPLIED -- prompt message --> + +<!-- + The BASE element gives an absolute URL for dereferencing relative + URLs, e.g. + + <BASE href="http://foo.com/index.html"> + ... + <IMG SRC="images/bar.gif"> + + The image is deferenced to + + http://foo.com/images/bar.gif + + In the absence of a BASE element the document URL should be used. + Note that this is not necessarily the same as the URL used to + request the document, as the base URL may be overridden by an HTTP + header accompanying the document. +--> + +<!ELEMENT BASE - O EMPTY> +<!ATTLIST BASE + href %URL #REQUIRED + > + +<!ELEMENT META - O EMPTY -- Generic Metainformation --> +<!ATTLIST META + http-equiv NAME #IMPLIED -- HTTP response header name -- + name NAME #IMPLIED -- metainformation name -- + content CDATA #REQUIRED -- associated information -- + > + +<!-- SCRIPT/STYLE are place holders for transition to next version of HTML --> + +<!ELEMENT STYLE - - CDATA -- placeholder for style info --> +<!ELEMENT SCRIPT - - CDATA -- placeholder for script statements --> + +<!--================ Document Structure ===================================--> + +<!ENTITY % version.attr "VERSION CDATA #FIXED '%HTML.Version;'"> + +<![ %HTML.Deprecated [ + <!ENTITY % html.content "HEAD, BODY, PLAINTEXT?"> +]]> + +<!ENTITY % html.content "HEAD, BODY"> + +<!ELEMENT HTML O O (%html.content)> +<!ATTLIST HTML + %version.attr; + > diff --git a/smlnj-lib/HTML/README b/smlnj-lib/HTML/README new file mode 100644 index 0000000..5b7279e --- /dev/null +++ b/smlnj-lib/HTML/README @@ -0,0 +1,32 @@ +This library provides support for parsing and pretty-printing HTML. +The file html-sig.sml and html.sml define a parse-tree representation +of HTML 3.2 (as defined in the January 14th 1997 specification). + +The HTML structure implements a collection of datatypes +that describe the syntax of HTML trees according to the 3.2 spec. +The library provides support for parsing and printing these +datatypes. The functor HTMLParserFn implements the parser and +the PrHTML structure provides printing of HTML trees. Also, there +is a structure called MakeHTML, which provides helper functions for +constructing HTML trees (mostly defaults of attribute values). + +Note that this is a fairly strict interpretation of the standard; +non-standard tags and attributes will cause errors. To allow some +flexibility, the parser is functorized over a collection of error +reporting functions (see html-error-sig.sml). + +The file test-parser.sml gives an example of how the parser part of the +library can be used. + +TODO LIST: + +The parser has trouble with white-space in the <HEAD>. + +The pretty-printer needs to be rewritten to deal with line breaks better. + +Some library support for entities (e.g., "<" should be provided). + + John Reppy + Bell Labs, Lucent Technologies + jhr@research.bell-labs.com + diff --git a/smlnj-lib/HTML/check-html-fn.sml b/smlnj-lib/HTML/check-html-fn.sml new file mode 100644 index 0000000..2968d62 --- /dev/null +++ b/smlnj-lib/HTML/check-html-fn.sml @@ -0,0 +1,207 @@ +(* check-html-fn.sml + * + * COPYRIGHT (c) 1996 AT&T Research. + * + * This implements a tree walk over an HTML file to check for + * errors, such as violations of exclusions. + *) + +functor CheckHTMLFn (Err : HTML_ERROR) : sig + + type context = {file : string option, line : int} + + val check : context -> HTML.html -> unit + + end = struct + + type context = Err.context + + fun check context (HTML.HTML{body=HTML.BODY{content, ...}, ...}) = let + fun error (elem, ctx) = + Err.syntaxError context + (Format.format "unexpected %s element in %s" [ + Format.STR elem, Format.STR ctx + ]) + fun contentError ctx = + Err.syntaxError context + (Format.format "unexpected element in %s" [Format.STR ctx]) + fun formError elem = + Err.syntaxError context + (Format.format "unexpected %s element not in FORM" [ + Format.STR elem + ]) + fun attrError attr = Err.missingAttr context attr + fun checkBodyContent {inForm} b = (case b + of (HTML.Hn{n, align, content}) => checkText { + inAnchor=false, inForm=inForm, inPre=false, inApplet=false + } content + | (HTML.ADDRESS block) => checkAddress {inForm=inForm} block + | (HTML.BlockList bl) => + List.app (checkBodyContent {inForm=inForm}) bl + | block => checkBlock {inForm=inForm} block + (* end case *)) + and checkAddress {inForm} blk = (case blk + of (HTML.BlockList bl) => + List.app (checkAddress {inForm=inForm}) bl + | (HTML.TextBlock txt) => checkText { + inAnchor=false, inForm=inForm, inPre=false, inApplet = false + } txt + | (HTML.P{content, ...}) => checkText { + inAnchor=false, inForm=inForm, inPre=false, inApplet = false + } content + | _ => contentError "ADDRESS" + (* end case *)) + and checkBlock {inForm} blk = (case blk + of (HTML.BlockList bl) => + List.app (checkBlock {inForm=inForm}) bl + | (HTML.TextBlock txt) => checkText { + inAnchor=false, inForm=inForm, inPre=false, inApplet = false + } txt + | (HTML.P{content, ...}) => checkText { + inAnchor=false, inForm=inForm, inPre=false, inApplet = false + } content + | (HTML.UL{content, ...}) => + checkItems {inForm=inForm, inDirOrMenu=false} content + | (HTML.OL{content, ...}) => + checkItems {inForm=inForm, inDirOrMenu=false} content + | (HTML.DIR{content, ...}) => + checkItems {inForm=inForm, inDirOrMenu=true} content + | (HTML.MENU{content, ...}) => + checkItems {inForm=inForm, inDirOrMenu=true} content + | (HTML.DL{content, ...}) => + checkDLItems {inForm=inForm} content + | (HTML.PRE{content, ...}) => checkText { + inAnchor=false, inForm=inForm, inPre=true, inApplet = false + } content + | (HTML.DIV{content, ...}) => + checkBodyContent {inForm=inForm} content + | (HTML.CENTER content) => + checkBodyContent {inForm=inForm} content + | (HTML.BLOCKQUOTE content) => + checkBodyContent {inForm=inForm} content + | (HTML.FORM{content, ...}) => ( + if inForm then error("FORM", "FORM") else (); + checkBodyContent {inForm=true} content) + | (HTML.ISINDEX _) => () + | (HTML.HR _) => () + | (HTML.TABLE{ + caption=SOME(HTML.CAPTION{content=caption, ...}), + content, ... + }) => ( + checkText { + inAnchor=false, inForm=inForm, inPre=false, + inApplet = false + } caption; + checkRows {inForm=inForm} content) + | (HTML.TABLE{content, ...}) => checkRows {inForm=inForm} content + | (HTML.Hn _) => error ("Hn", "block") + | (HTML.ADDRESS _) => error ("ADDRESS", "block") + (* end case *)) + and checkItems {inForm, inDirOrMenu} items = let + fun chkBlk (HTML.BlockList bl) = List.app chkBlk bl + | chkBlk (HTML.TextBlock txt) = () + | chkBlk (HTML.P _) = () + | chkBlk _ = error ("block", "DIR/MENU") + val chk = if inDirOrMenu + then (fn (HTML.LI{content, ...}) => ( + chkBlk content; checkBlock {inForm=inForm} content)) + else (fn (HTML.LI{content, ...}) => ( + checkBlock {inForm=inForm} content)) + in + List.app chk items + end + and checkDLItems {inForm} items = let + fun chk {dt, dd} = ( + List.app + (checkText { + inAnchor=false, inForm=inForm, inPre=false, inApplet=false + }) + dt; + checkBlock {inForm=inForm} dd) + in + List.app chk items + end + and checkRows {inForm} rows = let + fun chkCell (HTML.TH{content, ...}) = + checkBodyContent {inForm=inForm} content + | chkCell (HTML.TD{content, ...}) = + checkBodyContent {inForm=inForm} content + fun chkRow (HTML.TR{content, ...}) = List.app chkCell content + in + List.app chkRow rows + end + and checkText {inAnchor, inForm, inPre, inApplet} = let + fun chk txt = (case txt + of (HTML.TextList tl) => List.app chk tl + | (HTML.PCDATA _) => () + | (HTML.TT txt) => chk txt + | (HTML.I txt) => chk txt + | (HTML.B txt) => chk txt + | (HTML.U txt) => chk txt + | (HTML.STRIKE txt) => chk txt + | (HTML.BIG txt) => ( + if inPre then error("BIG", "PRE") else (); + chk txt) + | (HTML.SMALL txt) => ( + if inPre then error("SMALL", "PRE") else (); + chk txt) + | (HTML.SUB txt) => ( + if inPre then error("SUB", "PRE") else (); + chk txt) + | (HTML.SUP txt) => ( + if inPre then error("SUP", "PRE") else (); + chk txt) + | (HTML.EM txt) => chk txt + | (HTML.STRONG txt) => chk txt + | (HTML.DFN txt) => chk txt + | (HTML.CODE txt) => chk txt + | (HTML.SAMP txt) => chk txt + | (HTML.KBD txt) => chk txt + | (HTML.VAR txt) => chk txt + | (HTML.CITE txt) => chk txt + | (HTML.A{content, ...}) => ( + if (inAnchor) then error("anchor", "anchor") else (); + checkText { + inAnchor=true, inForm=inForm, inPre=inPre, + inApplet=inApplet + } content) + | (HTML.IMG _) => + if inPre then error("IMG", "PRE") else () + | (HTML.APPLET{content, ...}) => checkText { + inAnchor=false, inForm=inForm, inPre=inPre, + inApplet=true + } content + | (HTML.PARAM _) => + if inApplet then error ("param", "applet") else () + | (HTML.FONT{content, ...}) => + if inPre then error("FONT", "PRE") else () + | (HTML.BASEFONT{content, ...}) => + if inPre then error("BASEFONT", "PRE") else () + | (HTML.BR _) => () + | (HTML.MAP _) => () + | (HTML.INPUT{ty, name, value, ...}) => ( + if (not inForm) then formError "INPUT" else (); + if ((name = NONE) + andalso (ty <> SOME(HTML.InputType.submit)) + andalso (ty <> SOME(HTML.InputType.reset))) + then attrError "NAME" + else (); + if ((value = NONE) + andalso ((ty = SOME(HTML.InputType.radio)) + orelse (ty = SOME(HTML.InputType.checkbox)))) + then attrError "VALUE" + else ()) + | (HTML.SELECT _) => + if (not inForm) then formError "SELECT" else () + | (HTML.TEXTAREA _) => + if (not inForm) then formError "TEXTAREA" else () + | (HTML.SCRIPT _) => () + (* end case *)) + in + chk + end + in + checkBodyContent {inForm=false} content + end + + end diff --git a/smlnj-lib/HTML/html-attr-vals.sml b/smlnj-lib/HTML/html-attr-vals.sml new file mode 100644 index 0000000..9e4650c --- /dev/null +++ b/smlnj-lib/HTML/html-attr-vals.sml @@ -0,0 +1,20 @@ +(* html-attr-vals.sml + * + * COPYRIGHT (c) 1996 AT&T Research. + * + * This structure is necessary since the attrs type is used in the parser, + * and there is no way to get it into the parser's signature. + *) + +structure HTMLAttrVals = + struct + + (* support for building elements that have attributes *) + datatype attr_val + = NAME of string (* [a-zA-Z.-]+ *) + | STRING of string (* a string enclosed in "" or '' *) + | IMPLICIT + + type attrs = (string * attr_val) list + + end; diff --git a/smlnj-lib/HTML/html-attrs-fn.sml b/smlnj-lib/HTML/html-attrs-fn.sml new file mode 100644 index 0000000..fde0845 --- /dev/null +++ b/smlnj-lib/HTML/html-attrs-fn.sml @@ -0,0 +1,914 @@ +(* html-attrs-fn.sml + * + * COPYRIGHT (c) 1996 AT&T Research. + * + * This provides support for parsing element start tags. + *) + +functor HTMLAttrsFn (Err : HTML_ERROR) : HTML_ATTRS = + struct + + open HTMLAttrVals (* inherit types *) + + fun attrValToString (NAME s) = s + | attrValToString (STRING s) = s + | attrValToString IMPLICIT = "" + + datatype attr_ty + = AT_TEXT (* either a string or name value *) + | AT_NAMES of string list (* one of a list of names *) + | AT_NUMBER (* an integer attribute *) + | AT_IMPLICIT + | AT_INSTANCE (* if an attribute FOO has type AT_NAMES with *) + (* values BAR and BAZ, then BAR and BAZ are *) + (* legal attributes, being shorthand for *) + (* FOO=BAR and FOO=BAZ. We introduce an *) + (* (k, AT_INSTANCE) entry for BAR and BAZ, where *) + (* k is the slot that FOO has been assigned. *) + + type context = Err.context + + structure HTbl = HashTableFn (struct + type hash_key = string + val hashVal = HashString.hashString + val sameKey = (op = : (string * string) -> bool) + end) + + (* an attribute map (attr_map) is a map from attribute names to attribute + * value slots and types. + *) + abstype attr_map = AMap of { + numAttrs : int, + attrTbl : (int * attr_ty) HTbl.hash_table + } + and attr_vec = AVec of { + vec : attr_val option Array.array, + ctx : context + } + with + (* create an attr_map from the list of attribute names and types. *) + fun mkAttrs data = let + val n = length data + val tbl = HTbl.mkTable (n, Fail "Attrs") + fun ins ((name, ty), id) = ( + HTbl.insert tbl (name, (id, ty)); + case ty + of (AT_NAMES l) => let + fun ins' nm = if (nm <> name) + then HTbl.insert tbl (nm, (id, AT_INSTANCE)) + else () + in + List.app ins' l + end + | _ => () + (* end case *); + id+1) + in + List.foldl ins 0 data; + AMap{numAttrs = n, attrTbl = tbl} + end + (* create an atttribute vector of attribute values using the attribute + * map to assign slots and typecheck the values. + *) + fun attrListToVec (ctx, AMap{numAttrs, attrTbl}, attrs) = let + val attrArray = Array.array (numAttrs, NONE) + fun update (_, NONE) = () + | update (id, SOME v) = (case Array.sub(attrArray, id) + of NONE => Array.update(attrArray, id, SOME v) + | (SOME _) => (* ignore multiple attribute definition *) () + (* end case *)) + (* compare two names for case-insensitive equality, where the second + * name is known to be all uppercase. + *) + fun eqName name name' = let + fun cmpC (c1, c2) = Char.compare(Char.toUpper c1, c2) + in + (String.collate cmpC (name, name')) = EQUAL + end + fun ins (attrName, attrVal) = let + fun error () = ( + Err.badAttrVal ctx (attrName, attrValToString attrVal); + NONE) + fun cvt (AT_IMPLICIT, IMPLICIT) = SOME IMPLICIT + | cvt (AT_INSTANCE, IMPLICIT) = SOME(NAME attrName) + | cvt (AT_TEXT, v) = SOME v + | cvt (AT_NUMBER, v) = SOME v + | cvt (AT_NAMES names, (NAME s | STRING s)) = ( + case (List.find (eqName s) names) + of NONE => error() + | (SOME name) => SOME(NAME name) + (* end case *)) + | cvt (AT_IMPLICIT, (NAME s | STRING s)) = + if (s = attrName) + then SOME IMPLICIT + else error() + | cvt _ = error() + in + case (HTbl.find attrTbl attrName) + of NONE => Err.unknownAttr ctx attrName + | (SOME(id, ty)) => update (id, cvt (ty, attrVal)) + (* end case *) + end + in + List.app ins attrs; + AVec{vec = attrArray, ctx = ctx} + end + (* given an attribute map and attribute name, return a function that + * fetches a value from the attribute's slot in an attribute vector. + *) + fun bindFindAttr (AMap{attrTbl, ...}, attr) = let + val (id, _) = HTbl.lookup attrTbl attr + in + fn (AVec{vec, ...}) => Array.sub(vec, id) + end + (* return the context of the element that contains the attribute vector *) + fun getContext (AVec{ctx, ...}) = ctx + end (* abstype *) + + fun getFlag (attrMap, attr) = let + val getFn = bindFindAttr (attrMap, attr) + fun get attrVec = (case (getFn attrVec) + of NONE => false + | _ => true + (* end case *)) + in + get + end + fun getCDATA (attrMap, attr) = let + val getFn = bindFindAttr (attrMap, attr) + fun get attrVec = (case (getFn attrVec) + of NONE => NONE + | (SOME((STRING s) | (NAME s))) => SOME s + | _ => ( + Err.missingAttrVal (getContext attrVec) attr; + NONE) + (* end case *)) + in + get + end + fun getNAMES fromString (attrMap, attr) = let + val getFn = bindFindAttr (attrMap, attr) + fun get attrVec = (case (getFn attrVec) + of NONE => NONE + | (SOME(NAME s)) => fromString s + | (SOME v) => + (** This case should be impossible, since attrListToVec + ** ensures that AT_NAMES valued attributes are always NAME. + **) + raise Fail "getNAMES" + (* end case *)) + in + get + end + fun getNUMBER (attrMap, attr) = let + val getFn = bindFindAttr (attrMap, attr) + fun get attrVec = (case (getFn attrVec) + of NONE => NONE + | (SOME((STRING s) | (NAME s))) => (case (Int.fromString s) + of NONE => ( + Err.badAttrVal (getContext attrVec) (attr, s); + NONE) + | someN => someN + (* end case *)) + | SOME IMPLICIT => raise Fail "getNUMBER: IMPLICIT unexpected" + (* end case *)) + in + get + end + fun getChar (attrMap, attr) = let + val getFn = bindFindAttr (attrMap, attr) + fun get attrVec = (case (getFn attrVec) + of NONE => NONE + | (SOME((STRING s) | (NAME s))) => + if (size s = 1) then SOME(String.sub(s, 0)) +(** NOTE: we should probably accept &#xx; as a character value **) + else ( + Err.badAttrVal (getContext attrVec) (attr, s); + NONE) + | SOME IMPLICIT => raise Fail "getChar: IMPLICIT unexpected" + (* end case *)) + in + get + end + + fun require (getFn, attrMap, attr, dflt) = let + val getFn = getFn (attrMap, attr) + fun get attrVec = (case getFn attrVec + of NONE => (Err.missingAttr (getContext attrVec) attr; dflt) + | (SOME v) => v + (* end case *)) + in + get + end + + (**** Element ISINDEX ****) + local + val attrMap = mkAttrs [ + ("PROMPT", AT_TEXT) + ] + val getPROMPT = getCDATA (attrMap, "PROMPT") + in + (* the ISINDEX element can occur in both the HEAD an BODY, so there are + * two datatype constructors for it. We just define the argument of the + * constructor here. + *) + fun mkISINDEX (ctx, attrs) = { + prompt = getPROMPT (attrListToVec(ctx, attrMap, attrs)) + } + end (* local *) + + (**** Element BASE ****) + local + val attrMap = mkAttrs [ + ("HREF", AT_TEXT) + ] + val getHREF = require (getCDATA, attrMap, "HREF", "") + in + fun mkBASE (ctx, attrs) = HTML.Head_BASE{ + href = getHREF(attrListToVec(ctx, attrMap, attrs)) + } + end (* local *) + + (**** Element META ****) + local + val attrMap = mkAttrs [ + ("HTTP-EQUIV", AT_TEXT), + ("NAME", AT_TEXT), + ("CONTENT", AT_TEXT) + ] + val getHTTP_EQUIV = getCDATA (attrMap, "HTTP-EQUIV") + val getNAME = getCDATA (attrMap, "NAME") + val getCONTENT = require (getCDATA, attrMap, "CONTENT", "") + in + fun mkMETA (ctx, attrs) = let + val attrVec = attrListToVec(ctx, attrMap, attrs) + in + HTML.Head_META{ + httpEquiv = getHTTP_EQUIV attrVec, + name = getNAME attrVec, + content = getCONTENT attrVec + } + end + end (* local *) + + (**** Element LINK ****) + local + val attrMap = mkAttrs [ + ("HREF", AT_TEXT), + ("ID", AT_TEXT), + ("TITLE", AT_TEXT), + ("REL", AT_TEXT), + ("REV", AT_TEXT) + ] + val getHREF = getCDATA (attrMap, "HREF") + val getID = getCDATA (attrMap, "ID") + val getREL = getCDATA (attrMap, "REL") + val getREV = getCDATA (attrMap, "REV") + val getTITLE = getCDATA (attrMap, "TITLE") + in + fun mkLINK (ctx, attrs) = let + val attrVec = attrListToVec(ctx, attrMap, attrs) + in + HTML.Head_LINK{ + href = getHREF attrVec, + id = getID attrVec, + rel = getREL attrVec, + rev = getREV attrVec, + title = getTITLE attrVec + } + end + end (* local *) + + (**** Element BODY ****) + local + val attrMap = mkAttrs [ + ("BACKGROUND", AT_TEXT), + ("BGCOLOR", AT_TEXT), + ("TEXT", AT_TEXT), + ("LINK", AT_TEXT), + ("VLINK", AT_TEXT), + ("ALINK", AT_TEXT) + ] + val getBACKGROUND = getCDATA (attrMap, "BACKGROUND") + val getBGCOLOR = getCDATA (attrMap, "BGCOLOR") + val getTEXT = getCDATA (attrMap, "TEXT") + val getLINK = getCDATA (attrMap, "LINK") + val getVLINK = getCDATA (attrMap, "VLINK") + val getALINK = getCDATA (attrMap, "ALINK") + in + fun mkBODY (ctx, attrs, blk) = let + val attrVec = attrListToVec(ctx, attrMap, attrs) + in + HTML.BODY{ + background = getBACKGROUND attrVec, + bgcolor = getBGCOLOR attrVec, + text = getTEXT attrVec, + link = getLINK attrVec, + vlink = getVLINK attrVec, + alink = getALINK attrVec, + content = blk + } + end + end (* local *) + + (**** Elements H1, H2, H3, H4, H5, H6 and P ****) + local + val attrMap = mkAttrs [ + ("ALIGN", AT_NAMES["LEFT", "CENTER", "RIGHT"]) + ] + val getALIGN = getNAMES HTML.HAlign.fromString (attrMap, "ALIGN") + in + fun mkHn (n, ctx, attrs, text) = HTML.Hn{ + n = n, + align = getALIGN(attrListToVec(ctx, attrMap, attrs)), + content = text + } + fun mkP (ctx, attrs, text) = HTML.P{ + align = getALIGN(attrListToVec(ctx, attrMap, attrs)), + content = text + } + end (* local *) + + (**** Element UL ****) + local + val attrMap = mkAttrs [ + ("COMPACT", AT_IMPLICIT), + ("TYPE", AT_NAMES["DISC", "SQUARE", "CIRCLE"]) + ] + val getCOMPACT = getFlag(attrMap, "COMPACT") + val getTYPE = getNAMES HTML.ULStyle.fromString (attrMap, "TYPE") + in + fun mkUL (ctx, attrs, items) = let + val attrVec = attrListToVec(ctx, attrMap, attrs) + in + HTML.UL{ + ty = getTYPE attrVec, + compact = getCOMPACT attrVec, + content = items + } + end + end (* local *) + + (**** Element OL ****) + local + val attrMap = mkAttrs [ + ("COMPACT", AT_IMPLICIT), + ("START", AT_NUMBER), + ("TYPE", AT_TEXT) + ] + val getCOMPACT = getFlag(attrMap, "COMPACT") + val getSTART = getNUMBER(attrMap, "START") + val getTYPE = getCDATA(attrMap, "TYPE") + in + fun mkOL (ctx, attrs, items) = let + val attrVec = attrListToVec(ctx, attrMap, attrs) + in + HTML.OL{ + compact = getCOMPACT attrVec, + start = getSTART attrVec, + ty = getTYPE attrVec, + content = items + } + end + end (* local *) + + (**** Elements DIR, MENU and DL ****) + local + val attrMap = mkAttrs [ + ("COMPACT", AT_IMPLICIT) + ] + val getCOMPACT = getFlag(attrMap, "COMPACT") + in + fun mkDIR (ctx, attrs, items) = HTML.DIR{ + compact = getCOMPACT (attrListToVec(ctx, attrMap, attrs)), + content = items + } + fun mkMENU (ctx, attrs, items) = HTML.MENU{ + compact = getCOMPACT (attrListToVec(ctx, attrMap, attrs)), + content = items + } + fun mkDL (ctx, attrs, items) = HTML.DL{ + compact = getCOMPACT (attrListToVec(ctx, attrMap, attrs)), + content = items + } + end (* local *) + + (**** Element LI ****) + local + val attrMap = mkAttrs [ + ("TYPE", AT_TEXT), + ("VALUE", AT_NUMBER) + ] + val getTYPE = getCDATA(attrMap, "TYPE") + val getVALUE = getNUMBER(attrMap, "VALUE") + in + fun mkLI (ctx, attrs, text) = let + val attrVec = attrListToVec(ctx, attrMap, attrs) + in + HTML.LI{ + ty = getTYPE attrVec, + value = getVALUE attrVec, + content = text + } + end + end (* local *) + + (**** Element PRE ****) + local + val attrMap = mkAttrs [ + ("WIDTH", AT_NUMBER) + ] + val getWIDTH = getNUMBER(attrMap, "WIDTH") + in + fun mkPRE (ctx, attrs, text) = HTML.PRE{ + width = getWIDTH (attrListToVec (ctx, attrMap, attrs)), + content = text + } + end (* local *) + + (**** Element DIV ****) + local + val attrMap = mkAttrs [ + ("ALIGN", AT_NAMES["LEFT", "CENTER", "RIGHT"]) + ] + val getALIGN = require (getNAMES HTML.HAlign.fromString, + attrMap, "ALIGN", HTML.HAlign.left) + in + fun mkDIV (ctx, attrs, content) = HTML.DIV{ + align = getALIGN(attrListToVec(ctx, attrMap, attrs)), + content = content + } + end (* local *) + + (**** Element FORM ****) + local + val attrMap = mkAttrs [ + ("ACTION", AT_TEXT), + ("METHOD", AT_NAMES["GET", "PUT"]), + ("ENCTYPE", AT_TEXT) + ] + val getACTION = getCDATA (attrMap, "ACTION") + val getMETHOD = require (getNAMES HTML.HttpMethod.fromString, + attrMap, "METHOD", HTML.HttpMethod.get) + val getENCTYPE = getCDATA (attrMap, "ENCTYPE") + in + fun mkFORM (ctx, attrs, contents) = let + val attrVec = attrListToVec(ctx, attrMap, attrs) + in + HTML.FORM{ + action = getACTION attrVec, + method = getMETHOD attrVec, + enctype = getENCTYPE attrVec, + content = contents + } + end + end (* local *) + + (**** Element HR ****) + local + val attrMap = mkAttrs [ + ("ALIGN", AT_NAMES["LEFT", "CENTER", "RIGHT"]), + ("NOSHADE", AT_IMPLICIT), + ("SIZE", AT_TEXT), + ("WIDTH", AT_TEXT) + ] + val getALIGN = getNAMES HTML.HAlign.fromString (attrMap, "ALIGN") + val getNOSHADE = getFlag (attrMap, "NOSHADE") + val getSIZE = getCDATA (attrMap, "SIZE") + val getWIDTH = getCDATA (attrMap, "WIDTH") + in + fun mkHR (ctx, attrs) = let + val attrVec = attrListToVec(ctx, attrMap, attrs) + in + HTML.HR{ + align = getALIGN attrVec, + noshade = getNOSHADE attrVec, + size = getSIZE attrVec, + width = getWIDTH attrVec + } + end + end (* local *) + + (**** Element TABLE ****) + local + val attrMap = mkAttrs [ + ("ALIGN", AT_NAMES["LEFT", "CENTER", "RIGHT"]), + ("BORDER", AT_TEXT), + ("CELLSPACING", AT_TEXT), + ("CELLPADDING", AT_TEXT), + ("WIDTH", AT_TEXT) + ] + val getALIGN = getNAMES HTML.HAlign.fromString (attrMap, "ALIGN") + val getBORDER = getCDATA (attrMap, "BORDER") + val getCELLSPACING = getCDATA (attrMap, "CELLSPACING") + val getCELLPADDING = getCDATA (attrMap, "CELLPADDING") + val getWIDTH = getCDATA (attrMap, "WIDTH") + in + fun mkTABLE (ctx, attrs, {caption, body}) = let + val attrVec = attrListToVec(ctx, attrMap, attrs) + in + HTML.TABLE{ + align = getALIGN attrVec, + border = getBORDER attrVec, + cellspacing = getCELLSPACING attrVec, + cellpadding = getCELLPADDING attrVec, + width = getWIDTH attrVec, + caption = caption, + content = body + } + end + end (* local *) + + (**** Element CAPTION ****) + local + val attrMap = mkAttrs [ + ("ALIGN", AT_NAMES["TOP", "BOTTOM"]) + ] + val getALIGN = getNAMES HTML.CaptionAlign.fromString (attrMap, "ALIGN") + in + fun mkCAPTION (ctx, attrs, text) = HTML.CAPTION{ + align = getALIGN(attrListToVec(ctx, attrMap, attrs)), + content = text + } + end (* local *) + + (**** Element TR ****) + local + val attrMap = mkAttrs [ + ("ALIGN", AT_NAMES["LEFT", "CENTER", "RIGHT"]), + ("VALIGN", AT_NAMES["TOP", "MIDDLE", "BOTTOM", "BASELINE"]) + ] + val getALIGN = getNAMES HTML.HAlign.fromString (attrMap, "ALIGN") + val getVALIGN = getNAMES HTML.CellVAlign.fromString (attrMap, "VALIGN") + in + fun mkTR (ctx, attrs, cells) = let + val attrVec = attrListToVec(ctx, attrMap, attrs) + in + HTML.TR{ + align = getALIGN attrVec, + valign = getVALIGN attrVec, + content = cells + } + end + end (* local *) + + (**** Elements TH and TD ****) + local + val attrMap = mkAttrs [ + ("ALIGN", AT_NAMES["LEFT", "CENTER", "RIGHT"]), + ("COLSPAN", AT_NUMBER), + ("HEIGHT", AT_TEXT), + ("NOWRAP", AT_IMPLICIT), + ("ROWSPAN", AT_NUMBER), + ("VALIGN", AT_NAMES["TOP", "MIDDLE", "BOTTOM", "BASELINE"]), + ("WIDTH", AT_TEXT) + ] + val getALIGN = getNAMES HTML.HAlign.fromString (attrMap, "ALIGN") + val getCOLSPAN = getNUMBER (attrMap, "COLSPAN") + val getHEIGHT = getCDATA (attrMap, "HEIGHT") + val getNOWRAP = getFlag (attrMap, "NOWRAP") + val getROWSPAN = getNUMBER (attrMap, "ROWSPAN") + val getVALIGN = getNAMES HTML.CellVAlign.fromString (attrMap, "VALIGN") + val getWIDTH = getCDATA (attrMap, "WIDTH") + fun mkCell (ctx, attrs, cells) = let + val attrVec = attrListToVec(ctx, attrMap, attrs) + in + { align = getALIGN attrVec, + colspan = getCOLSPAN attrVec, + height = getHEIGHT attrVec, + nowrap = getNOWRAP attrVec, + rowspan = getROWSPAN attrVec, + valign = getVALIGN attrVec, + width = getWIDTH attrVec, + content = cells + } + end + in + fun mkTH arg = HTML.TH(mkCell arg) + fun mkTD arg = HTML.TD(mkCell arg) + end (* local *) + + (**** Element A ****) + local + val attrMap = mkAttrs [ + ("HREF", AT_TEXT), + ("NAME", AT_TEXT), + ("REL", AT_TEXT), + ("REV", AT_TEXT), + ("TITLE", AT_TEXT) + ] + val getHREF = getCDATA (attrMap, "HREF") + val getNAME = getCDATA (attrMap, "NAME") + val getREL = getCDATA (attrMap, "REL") + val getREV = getCDATA (attrMap, "REV") + val getTITLE = getCDATA (attrMap, "TITLE") + in + fun mkA (ctx, attrs, contents) = let + val attrVec = attrListToVec(ctx, attrMap, attrs) + in + HTML.A{ + name = getNAME attrVec, + href = getHREF attrVec, + rel = getREL attrVec, + rev = getREV attrVec, + title = getTITLE attrVec, + content = contents + } + end + end (* local *) + + (**** Element IMG ****) + local + val attrMap = mkAttrs [ + ("ALIGN", AT_NAMES["TOP", "MIDDLE", "BOTTOM", "LEFT", "RIGHT"]), + ("ALT", AT_TEXT), + ("BORDER", AT_TEXT), + ("HEIGHT", AT_TEXT), + ("HSPACE", AT_TEXT), + ("ISMAP", AT_IMPLICIT), + ("SRC", AT_TEXT), + ("USEMAP", AT_TEXT), + ("VSPACE", AT_TEXT), + ("WIDTH", AT_TEXT) + ] + val getALIGN = getNAMES HTML.IAlign.fromString (attrMap, "ALIGN") + val getALT = getCDATA (attrMap, "ALT") + val getBORDER = getCDATA (attrMap, "BORDER") + val getHEIGHT = getCDATA (attrMap, "HEIGHT") + val getHSPACE = getCDATA (attrMap, "HSPACE") + val getISMAP = getFlag (attrMap, "ISMAP") + val getSRC = require (getCDATA, attrMap, "SRC", "") + val getUSEMAP = getCDATA (attrMap, "USEMAP") + val getVSPACE = getCDATA (attrMap, "VSPACE") + val getWIDTH = getCDATA (attrMap, "WIDTH") + in + fun mkIMG (ctx, attrs) = let + val attrVec = attrListToVec(ctx, attrMap, attrs) + in + HTML.IMG{ + src = getSRC attrVec, + alt = getALT attrVec, + align = getALIGN attrVec, + height = getHEIGHT attrVec, + width = getWIDTH attrVec, + border = getBORDER attrVec, + hspace = getHSPACE attrVec, + vspace = getVSPACE attrVec, + usemap = getUSEMAP attrVec, + ismap = getISMAP attrVec + } + end + end (* local *) + + (**** Element APPLET ****) + local + val attrMap = mkAttrs [ + ("ALIGN", AT_NAMES["TOP", "MIDDLE", "BOTTOM", "LEFT", "RIGHT"]), + ("ALT", AT_TEXT), + ("CODE", AT_TEXT), + ("CODEBASE", AT_TEXT), + ("HEIGHT", AT_TEXT), + ("HSPACE", AT_TEXT), + ("NAME", AT_TEXT), + ("VSPACE", AT_TEXT), + ("WIDTH", AT_TEXT) + ] + val getALIGN = getNAMES HTML.IAlign.fromString (attrMap, "ALIGN") + val getALT = getCDATA (attrMap, "ALT") + val getCODE = require (getCDATA, attrMap, "CODE", "") + val getCODEBASE = getCDATA (attrMap, "CODEBASE") + val getHEIGHT = getCDATA (attrMap, "HEIGHT") + val getHSPACE = getCDATA (attrMap, "HSPACE") + val getNAME = getCDATA (attrMap, "NAME") + val getVSPACE = getCDATA (attrMap, "VSPACE") + val getWIDTH = getCDATA (attrMap, "WIDTH") + in + fun mkAPPLET (ctx, attrs, content) = let + val attrVec = attrListToVec(ctx, attrMap, attrs) + in + HTML.APPLET{ + codebase = getCODEBASE attrVec, + code = getCODE attrVec, + name = getNAME attrVec, + alt = getALT attrVec, + align = getALIGN attrVec, + height = getHEIGHT attrVec, + width = getWIDTH attrVec, + hspace = getHSPACE attrVec, + vspace = getVSPACE attrVec, + content = content + } + end + end (* local *) + + (**** Element PARAM ****) + local + val attrMap = mkAttrs [ + ("NAME", AT_TEXT), + ("VALUE", AT_TEXT) + ] + val getNAME = require (getCDATA, attrMap, "NAME", "") + val getVALUE = getCDATA (attrMap, "VALUE") + in + fun mkPARAM (ctx, attrs) = let + val attrVec = attrListToVec(ctx, attrMap, attrs) + in + HTML.PARAM{ + name = getNAME attrVec, + value = getVALUE attrVec + } + end + end (* local *) + + (**** Element FONT ****) + local + val attrMap = mkAttrs [ + ("COLOR", AT_TEXT), + ("SIZE", AT_TEXT) + ] + val getCOLOR = getCDATA (attrMap, "COLOR") + val getSIZE = getCDATA (attrMap, "SIZE") + in + fun mkFONT (ctx, attrs, content) = let + val attrVec = attrListToVec(ctx, attrMap, attrs) + in + HTML.FONT{ + size = getSIZE attrVec, + color = getCOLOR attrVec, + content = content + } + end + end (* local *) + + (**** Element BASEFONT ****) + local + val attrMap = mkAttrs [ + ("SIZE", AT_TEXT) + ] + val getSIZE = getCDATA (attrMap, "SIZE") + in + fun mkBASEFONT (ctx, attrs, content) = HTML.BASEFONT{ + size = getSIZE(attrListToVec(ctx, attrMap, attrs)), + content = content + } + end (* local *) + + (**** Element BR ****) + local + val attrMap = mkAttrs [ + ("CLEAR", AT_NAMES["LEFT", "RIGHT", "ALL", "NONE"]) + ] + val getCLEAR = getNAMES HTML.TextFlowCtl.fromString (attrMap, "CLEAR") + in + fun mkBR (ctx, attrs) = HTML.BR{ + clear = getCLEAR(attrListToVec(ctx, attrMap, attrs)) + } + end (* local *) + + (**** Element MAP ****) + local + val attrMap = mkAttrs [ + ("NAME", AT_TEXT) + ] + val getNAME = getCDATA (attrMap, "NAME") + in + fun mkMAP (ctx, attrs, content) = HTML.MAP{ + name = getNAME (attrListToVec(ctx, attrMap, attrs)), + content = content + } + end (* local *) + + (**** Element INPUT ****) + local + val attrMap = mkAttrs [ + ("ALIGN", AT_NAMES["TOP", "MIDDLE", "BOTTOM", "LEFT", "RIGHT"]), + ("CHECKED", AT_IMPLICIT), + ("MAXLENGTH", AT_NUMBER), + ("NAME", AT_TEXT), + ("SIZE", AT_TEXT), + ("SRC", AT_TEXT), + ("TYPE", AT_NAMES[ + "TEXT", "PASSWORD", "CHECKBOX", + "RADIO", "SUBMIT", "RESET", + "FILE", "HIDDEN", "IMAGE" + ]), + ("VALUE", AT_TEXT) + ] + val getALIGN = getNAMES HTML.IAlign.fromString (attrMap, "ALIGN") + val getCHECKED = getFlag (attrMap, "CHECKED") + val getMAXLENGTH = getNUMBER (attrMap, "MAXLENGTH") + val getNAME = getCDATA (attrMap, "NAME") + val getSIZE = getCDATA (attrMap, "SIZE") + val getSRC = getCDATA (attrMap, "SRC") + val getTYPE = getNAMES HTML.InputType.fromString (attrMap, "TYPE") + val getVALUE = getCDATA (attrMap, "VALUE") + in + fun mkINPUT (ctx, attrs) = let + val attrVec = attrListToVec(ctx, attrMap, attrs) + in + HTML.INPUT{ + ty = getTYPE attrVec, + name = getNAME attrVec, + value = getVALUE attrVec, + src = getSRC attrVec, + checked = getCHECKED attrVec, + size = getSIZE attrVec, + maxlength = getMAXLENGTH attrVec, + align = getALIGN attrVec + } + end + end (* local *) + + (**** Element SELECT ****) + local + val attrMap = mkAttrs [ + ("NAME", AT_TEXT), + ("SIZE", AT_TEXT) + ] + val getNAME = require (getCDATA, attrMap, "NAME", "") + val getSIZE = getNUMBER (attrMap, "SIZE") + in + fun mkSELECT (ctx, attrs, contents) = let + val attrVec = attrListToVec(ctx, attrMap, attrs) + in + HTML.SELECT{ + name = getNAME attrVec, + size = getSIZE attrVec, + content = contents + } + end + end (* local *) + + (**** Element TEXTAREA ****) + local + val attrMap = mkAttrs [ + ("NAME", AT_TEXT), + ("ROWS", AT_NUMBER), + ("COLS", AT_NUMBER) + ] + val getNAME = require (getCDATA, attrMap, "NAME", "") + val getROWS = require (getNUMBER, attrMap, "ROWS", 0) + val getCOLS = require (getNUMBER, attrMap, "COLS", 0) + in + fun mkTEXTAREA (ctx, attrs, contents) = let + val attrVec = attrListToVec(ctx, attrMap, attrs) + in + HTML.TEXTAREA{ + name = getNAME attrVec, + rows = getROWS attrVec, + cols = getCOLS attrVec, + content = contents + } + end + end (* local *) + + (**** Element AREA ****) + local + val attrMap = mkAttrs [ + ("ALT", AT_TEXT), + ("COORDS", AT_TEXT), + ("HREF", AT_TEXT), + ("NOHREF", AT_IMPLICIT), + ("SHAPE", AT_NAMES["RECT", "CIRCLE", "POLY", "DEFAULT"]) + ] + val getALT = require (getCDATA, attrMap, "ALT", "") + val getCOORDS = getCDATA (attrMap, "COORDS") + val getHREF = getCDATA (attrMap, "HREF") + val getNOHREF = getFlag (attrMap, "NOHREF") + val getSHAPE = getNAMES HTML.Shape.fromString (attrMap, "SHAPE") + in + fun mkAREA (ctx, attrs) = let + val attrVec = attrListToVec(ctx, attrMap, attrs) + in + HTML.AREA{ + shape = getSHAPE attrVec, + coords = getCOORDS attrVec, + href = getHREF attrVec, + nohref = getNOHREF attrVec, + alt = getALT attrVec + } + end + end (* local *) + + (**** Element OPTION ****) + local + val attrMap = mkAttrs [ + ("SELECTED", AT_IMPLICIT), + ("VALUE", AT_TEXT) + ] + val getSELECTED = getFlag (attrMap, "SELECTED") + val getVALUE = getCDATA (attrMap, "VALUE") + in + fun mkOPTION (ctx, attrs, contents) = let + val attrVec = attrListToVec(ctx, attrMap, attrs) + in + HTML.OPTION{ + selected = getSELECTED attrVec, + value = getVALUE attrVec, + content = contents + } + end + end (* local *) + + end + diff --git a/smlnj-lib/HTML/html-attrs-sig.sml b/smlnj-lib/HTML/html-attrs-sig.sml new file mode 100644 index 0000000..10bed8f --- /dev/null +++ b/smlnj-lib/HTML/html-attrs-sig.sml @@ -0,0 +1,59 @@ +(* html-attrs.sml + * + * COPYRIGHT (c) 1996 AT&T Research. + * + * This is the interface to HTMLAttrs, which provides support for parsing + * element start tags. + *) + +signature HTML_ATTRS = + sig + + type context = {file : string option, line : int} + + (* support for building elements that have attributes *) + datatype attr_val = datatype HTMLAttrVals.attr_val + type attrs = (string * attr_val) list + + val mkISINDEX : (context * attrs) -> {prompt : HTML.cdata option} + val mkBASE : (context * attrs) -> HTML.head_content + val mkMETA : (context * attrs) -> HTML.head_content + val mkLINK : (context * attrs) -> HTML.head_content + val mkBODY : (context * attrs * HTML.block) -> HTML.body + val mkHn : (int * context * attrs * HTML.text) -> HTML.block + val mkP : (context * attrs * HTML.text) -> HTML.block + val mkUL : (context * attrs * HTML.list_item list) -> HTML.block + val mkOL : (context * attrs * HTML.list_item list) -> HTML.block + val mkDIR : (context * attrs * HTML.list_item list) -> HTML.block + val mkMENU : (context * attrs * HTML.list_item list) -> HTML.block + val mkLI : (context * attrs * HTML.block) -> HTML.list_item + val mkDL : (context * attrs * {dt : HTML.text list, dd : HTML.block} list) + -> HTML.block + val mkPRE : (context * attrs * HTML.text) -> HTML.block + val mkDIV : (context * attrs * HTML.block) -> HTML.block + val mkFORM : (context * attrs * HTML.block) -> HTML.block + val mkHR : (context * attrs) -> HTML.block + val mkTABLE : (context * attrs * { + caption : HTML.caption option, + body : HTML.tr list + }) -> HTML.block + val mkCAPTION : (context * attrs * HTML.text) -> HTML.caption + val mkTR : (context * attrs * HTML.table_cell list) -> HTML.tr + val mkTH : (context * attrs * HTML.block) -> HTML.table_cell + val mkTD : (context * attrs * HTML.block) -> HTML.table_cell + val mkA : (context * attrs * HTML.text) -> HTML.text + val mkIMG : (context * attrs) -> HTML.text + val mkAPPLET : (context * attrs * HTML.text) -> HTML.text + val mkPARAM : (context * attrs) -> HTML.text + val mkFONT : (context * attrs * HTML.text) -> HTML.text + val mkBASEFONT : (context * attrs * HTML.text) -> HTML.text + val mkBR : (context * attrs) -> HTML.text + val mkMAP : (context * attrs * HTML.area list) -> HTML.text + val mkINPUT : (context * attrs) -> HTML.text + val mkSELECT : (context * attrs * HTML.select_option list) -> HTML.text + val mkTEXTAREA : (context * attrs * HTML.pcdata) -> HTML.text + val mkAREA : (context * attrs) -> HTML.area + val mkOPTION : (context * attrs * HTML.pcdata) -> HTML.select_option + + end + diff --git a/smlnj-lib/HTML/html-defaults.sml b/smlnj-lib/HTML/html-defaults.sml new file mode 100644 index 0000000..35709ed --- /dev/null +++ b/smlnj-lib/HTML/html-defaults.sml @@ -0,0 +1,24 @@ +(* html-defaults.sml + * + * COPYRIGHT (c) 1996 AT&T Research. + * + * Some HTML attributes have default values specified by the DTD; this + * file defines values for these. + *) + +structure HTMLDefaults = + struct + + val br_clear = HTML.TextFlowCtl.none + val area_shape = HTML.Shape.rect + val form_method = HTML.HttpMethod.get + val form_enctype = "application/x-www-form-urlencoded" + val input_type = HTML.InputType.text + val input_align = HTML.IAlign.top + val th_rowspan = 1 + val th_colspan = 1 + val td_rowspan = 1 + val td_colspan = 1 + + end; + diff --git a/smlnj-lib/HTML/html-elements-fn.sml b/smlnj-lib/HTML/html-elements-fn.sml new file mode 100644 index 0000000..9495983 --- /dev/null +++ b/smlnj-lib/HTML/html-elements-fn.sml @@ -0,0 +1,249 @@ +(* html-elements-fn.sml + * + * COPYRIGHT (c) 1996 AT&T REsearch. + * + * This module builds element tags for the lexer. + *) + +functor HTMLElementsFn ( + structure Tokens : HTML_TOKENS + structure Err : HTML_ERROR + structure HTMLAttrs : HTML_ATTRS + ) : sig + + structure T : HTML_TOKENS + + type pos = int + + val startTag : string option + -> (string * pos * pos) -> (T.svalue, pos) T.token option + val endTag : string option + -> (string * pos * pos) -> (T.svalue, pos) T.token option + + end = struct + + structure T = Tokens + structure A = HTMLAttrs + + type pos = int + + datatype start_tag + = WAttrs of ((A.attrs * pos * pos) -> (T.svalue, pos) T.token) + | WOAttrs of ((pos * pos) -> (T.svalue, pos) T.token) + datatype end_tag + = End of ((pos * pos) -> (T.svalue, pos) T.token) + | Empty + + val tokenData = [ + ("A", WAttrs T.START_A, End T.END_A), + ("ADDRESS", WOAttrs T.START_ADDRESS, End T.END_ADDRESS), + ("APPLET", WAttrs T.START_APPLET, End T.END_APPLET), + ("AREA", WAttrs T.TAG_AREA, Empty), + ("B", WOAttrs T.START_B, End T.END_B), + ("BASE", WAttrs T.TAG_BASE, Empty), + ("BASEFONT", WAttrs T.START_BASEFONT, End T.END_BASEFONT), + ("BIG", WOAttrs T.START_BIG, End T.END_BIG), + ("BLOCKQUOTE", WOAttrs T.START_BLOCKQUOTE, End T.END_BLOCKQUOTE), + ("BODY", WAttrs T.START_BODY, End T.END_BODY), + ("BR", WAttrs T.TAG_BR, Empty), + ("CAPTION", WAttrs T.START_CAPTION, End T.END_CAPTION), + ("CENTER", WOAttrs T.START_CENTER, End T.END_CENTER), + ("CITE", WOAttrs T.START_CITE, End T.END_CITE), + ("CODE", WOAttrs T.START_CODE, End T.END_CODE), + ("DD", WOAttrs T.START_DD, End T.END_DD), + ("DFN", WOAttrs T.START_DFN, End T.END_DFN), + ("DIR", WAttrs T.START_DIR, End T.END_DIR), + ("DIV", WAttrs T.START_DIV, End T.END_DIV), + ("DL", WAttrs T.START_DL, End T.END_DL), + ("DT", WOAttrs T.START_DT, End T.END_DT), + ("EM", WOAttrs T.START_EM, End T.END_EM), + ("FONT", WAttrs T.START_FONT, End T.END_FONT), + ("FORM", WAttrs T.START_FORM, End T.END_FORM), + ("H1", WAttrs T.START_H1, End T.END_H1), + ("H2", WAttrs T.START_H2, End T.END_H2), + ("H3", WAttrs T.START_H3, End T.END_H3), + ("H4", WAttrs T.START_H4, End T.END_H4), + ("H5", WAttrs T.START_H5, End T.END_H5), + ("H6", WAttrs T.START_H6, End T.END_H6), + ("HEAD", WOAttrs T.START_HEAD, End T.END_HEAD), + ("HR", WAttrs T.TAG_HR, Empty), + ("HTML", WOAttrs T.START_HTML, End T.END_HTML), + ("I", WOAttrs T.START_I, End T.END_I), + ("IMG", WAttrs T.TAG_IMG, Empty), + ("INPUT", WAttrs T.TAG_INPUT, Empty), + ("ISINDEX", WAttrs T.TAG_ISINDEX, Empty), + ("KBD", WOAttrs T.START_KBD, End T.END_KBD), + ("LI", WAttrs T.START_LI, End T.END_LI), + ("LINK", WAttrs T.TAG_LINK, Empty), + ("MAP", WAttrs T.START_MAP, End T.END_MAP), + ("MENU", WAttrs T.START_MENU, End T.END_MENU), + ("META", WAttrs T.TAG_META, Empty), + ("OL", WAttrs T.START_OL, End T.END_OL), + ("OPTION", WAttrs T.START_OPTION, End T.END_OPTION), + ("P", WAttrs T.START_P, End T.END_P), + ("PARAM", WAttrs T.TAG_PARAM, Empty), + ("PRE", WAttrs T.START_PRE, End T.END_PRE), + ("SAMP", WOAttrs T.START_SAMP, End T.END_SAMP), + ("SCRIPT", WOAttrs T.START_SCRIPT, End T.END_SCRIPT), + ("SELECT", WAttrs T.START_SELECT, End T.END_SELECT), + ("SMALL", WOAttrs T.START_SMALL, End T.END_SMALL), + ("STRIKE", WOAttrs T.START_STRIKE, End T.END_STRIKE), + ("STRONG", WOAttrs T.START_STRONG, End T.END_STRONG), + ("STYLE", WOAttrs T.START_STYLE, End T.END_STYLE), + ("SUB", WOAttrs T.START_SUB, End T.END_SUB), + ("SUP", WOAttrs T.START_SUP, End T.END_SUP), + ("TABLE", WAttrs T.START_TABLE, End T.END_TABLE), + ("TD", WAttrs T.START_TD, End T.END_TD), + ("TEXTAREA", WAttrs T.START_TEXTAREA, End T.END_TEXTAREA), + ("TH", WAttrs T.START_TH, End T.END_TH), + ("TITLE", WOAttrs T.START_TITLE, End T.END_TITLE), + ("TR", WAttrs T.START_TR, End T.END_TR), + ("TT", WOAttrs T.START_TT, End T.END_TT), + ("U", WOAttrs T.START_U, End T.END_U), + ("UL", WAttrs T.START_UL, End T.END_UL), + ("VAR", WOAttrs T.START_VAR, End T.END_VAR) + ] + + structure HTbl = HashTableFn (struct + type hash_key = string + val hashVal = HashString.hashString + val sameKey = (op = : (string * string) -> bool) + end) + + val elemTbl = let + val tbl = HTbl.mkTable (length tokenData, Fail "HTMLElements") + fun ins (tag, startTok, endTok) = + HTbl.insert tbl (tag, {startT=startTok, endT=endTok}) + in + List.app ins tokenData; tbl + end + + structure SS = Substring + + fun canonName name = SS.translate (String.str o Char.toUpper) name + + fun find name = (HTbl.find elemTbl (canonName name)) + + val skipWS = SS.dropl Char.isSpace + + fun scanStr (ctx, quoteChar, ss) = let + val (str, rest) = SS.splitl (fn c => (c <> quoteChar)) ss + in + if (SS.isEmpty rest) + then ( + Err.lexError ctx "missing close quote for string"; + (A.STRING(SS.string str), rest)) + else (A.STRING(SS.string str), SS.triml 1 rest) + end + + (* scan an attribute value from a substring, returning the value, and + * the rest of the substring. Attribute values have one of the following + * forms: + * 1) a name token (a sequence of letters, digits, periods, or hyphens). + * 2) a string literal enclosed in "" + * 3) a string literal enclosed in '' + *) + fun scanAttrVal (ctx, attrName, ss) = let + fun isNameChar (#"." | #"-") = true + | isNameChar c = (Char.isAlphaNum c) + in + case SS.getc ss + of NONE => (A.IMPLICIT, ss) + | (SOME(#"\"", rest)) => scanStr (ctx, #"\"", rest) + | (SOME(#"'", rest)) => scanStr (ctx, #"'", rest) + | (SOME(c, _)) => let + (** + * Unquoted attributes should be Names, but this is often not + * the case, so we terminate them on whitespace or ">". + *) + val notNameChar = ref false + fun isAttrChar c = + if ((Char.isSpace c) orelse (c = #">")) + then false + else ( + if isNameChar c then () else notNameChar := true; + true) + val (value, rest) = SS.splitl isAttrChar ss + in + if (SS.isEmpty value) + then ( + Err.badAttrVal ctx (SS.string attrName, ""); + (A.IMPLICIT, ss)) + else if (! notNameChar) + then ( + Err.unquotedAttrVal ctx (SS.string attrName); + (A.STRING(SS.string value), rest)) + else (A.NAME(SS.string value), rest) + end + (* end case *) + end + + fun scanStartTag (ctx, ss) = let + val (name, rest) = SS.splitl (not o Char.isSpace) ss + fun scanAttrs (rest, attrs) = let + val rest = skipWS rest + in + case SS.getc rest + of NONE => (name, List.rev attrs) + | (SOME(#"\"", rest)) => ( + Err.lexError ctx "bogus text in element"; + scanAttrs (#2(scanStr (ctx, #"\"", rest)), attrs)) + | (SOME(#"'", rest)) => ( + Err.lexError ctx "bogus text in element"; + scanAttrs (#2(scanStr (ctx, #"'", rest)), attrs)) + | (SOME(c, rest')) => + if Char.isAlpha c + then let + val (aName, rest) = SS.splitl Char.isAlphaNum rest + val rest = skipWS rest + in + case (SS.getc rest) + of (SOME(#"=", rest)) => let + (* get the attribute value *) + val (aVal, rest) = + scanAttrVal (ctx, aName, skipWS rest) + in + scanAttrs (rest, (canonName aName, aVal)::attrs) + end + | _ => scanAttrs (rest, + (canonName aName, A.IMPLICIT)::attrs) + (* end case *) + end + else ( + Err.lexError ctx "bogus character in element"; + scanAttrs (rest', attrs)) + (* end case *) + end + in + scanAttrs(rest, []) + end + + fun startTag file (tag, p1, p2) = let + val ctx = {file=file, line=p1} + val tag' = SS.triml 1 (SS.trimr 1 (SS.full tag)) + val (name, attrs) = scanStartTag (ctx, tag') + in + case (find name, attrs) + of (NONE, _) => (Err.badStartTag ctx (SS.string name); NONE) + | (SOME{startT=WOAttrs _, ...}, _::_) => ( + List.app (Err.unknownAttr ctx o #1) attrs; NONE) + | (SOME{startT=WOAttrs tag, ...}, []) => + SOME(tag (p1, p2)) + | (SOME{startT=WAttrs tag, ...}, attrs) => + SOME(tag (attrs, p1, p2)) + (* end case *) + end + + fun endTag file (tag, p1, p2) = let + val ctx = {file=file, line=p1} + val name = SS.triml 2 (SS.trimr 1 (SS.full tag)) + in + case (find name) + of NONE => (Err.badEndTag ctx (SS.string name); NONE) + | (SOME{endT=Empty, ...}) => (Err.badEndTag ctx (SS.string name); NONE) + | (SOME{endT=End endTok, ...}) => SOME(endTok (p1, p2)) + (* end case *) + end + + end + diff --git a/smlnj-lib/HTML/html-error-sig.sml b/smlnj-lib/HTML/html-error-sig.sml new file mode 100644 index 0000000..3db0258 --- /dev/null +++ b/smlnj-lib/HTML/html-error-sig.sml @@ -0,0 +1,41 @@ +(* html-error-sig.sml + * + * COPYRIGHT (c) 1996 AT&T Research. + * + * This is the interface of the error functions supplied to the lexer + * (and transitively, to HTMLElemnts). + *) + +signature HTML_ERROR = + sig + + type context = {file : string option, line : int} + + val badStartTag : context -> string -> unit + (* called on unrecognized start tags; the string is the tag name *) + val badEndTag : context -> string -> unit + (* called on unrecognized end tags, or end tags for empty elements; + * the string is the tag name. + *) + val badAttrVal : context -> (string * string) -> unit + (* called on ill-formed attribute values; the first string is the + * attribute name, and the second is the value. + *) + val lexError : context -> string -> unit + (* called on other lexical errors; the string is an error message. *) + val syntaxError : context -> string -> unit + (* called on syntax errors; the string is an error message. *) + val missingAttrVal : context -> string -> unit + (* called when an attribute name is given without a value *) + val missingAttr : context -> string -> unit + (* called on a missing required attribute; the string is the attribute + * name. + *) + val unknownAttr : context -> string -> unit + (* called on unknown attributes; the string is the attribute name. *) + val unquotedAttrVal : context -> string -> unit + (* called when the attribute value should have been quoted, but wasn't; + * the string is the attribute name. + *) + + end diff --git a/smlnj-lib/HTML/html-gram b/smlnj-lib/HTML/html-gram new file mode 100644 index 0000000..e629564 --- /dev/null +++ b/smlnj-lib/HTML/html-gram @@ -0,0 +1,693 @@ +(* html-gram + * + * COPYRIGHT (c) 1996 AT&T Research. + * + * This grammar parses HTML 3.2. Note that it does not enforce exclusions + * (for the content of FORM, PRE, etc). Exclusions should be enforced as + * a second pass over the parse tree. + *) + +fun textList [text] = text + | textList l = HTML.TextList l + +fun blockList [blk] = blk + | blockList l = HTML.BlockList l + +fun textBlock l = HTML.TextBlock(textList l) + +(* The elements of a definition list (<DL>) are tags (<DT>) and items (<DD>). + * To avoid shift/reduce problems, we parse them and then group them. + *) +datatype deflist_item + = DL_tag of HTML.text + | DL_item of HTML.block + +fun groupDefListContents [] = [] + | groupDefListContents (h :: t) = let + fun gdlc (DL_tag tag, []) = ({dt=[tag], dd=HTML.BlockList[]}, []) + | gdlc (DL_tag tag, h :: t) = let + val ({dt, dd}, r) = gdlc (h, t) + in + ({dt = tag :: dt, dd = dd}, r) + end + | gdlc (DL_item blk, r) = ({dt=[],dd=blk}, groupDefListContents r) + in + op :: (gdlc (h, t)) + end + +(* A list of Text, paragraphs and blocks requires grouping the Text items and + * making an implicit paragraph. We cannot directly use TextList because of + * conflicts. + *) +datatype blklist_item + = BL_text of HTML.text list + | BL_block of HTML.block list + +fun consText (txt, BL_text tl :: r) = BL_text(txt::tl) :: r + | consText (txt, l) = BL_text[txt] :: l + +fun consBlock (blk, BL_block bl :: r) = BL_block(blk::bl) :: r + | consBlock (blk, l) = BL_block[blk] :: l + +fun mkBlock blks = let + fun f (BL_text tl) = textBlock tl + | f (BL_block bl) = blockList bl + in + blockList(List.map f blks) + end + +fun mkBody blks = HTML.BODY{ + background = NONE, + bgcolor = NONE, + text = NONE, + link = NONE, + vlink = NONE, + alink = NONE, + content = mkBlock blks + } + +%% + +(* %pure *) +%verbose + +%nonterm Document of HTML.html + | StartHTML of HTML.cdata option + | EndHTML + | Head of HTML.head_content list + | StartHEAD + | EndHEAD + | HeadContents of HTML.head_content list + | HeadElements of HTML.head_content list + | HeadElement of HTML.head_content + | Body of HTML.body + | StartBODY + | EndBODY + | BodyContent of HTML.block + | BodyContent0 of HTML.body + | BodyContent1 of blklist_item list + | BodyContent2 of blklist_item list + | BodyElement of HTML.block + | AddressContent1 of blklist_item list + | AddressContent2 of blklist_item list + | BlockWOIndex of HTML.block + | Block of HTML.block + | Paragraph of HTML.block + | List of HTML.block + | ListItemList of HTML.list_item list + | ListItem of HTML.list_item + | DLItemList of deflist_item list + | DLItem of deflist_item + | Flow1 of blklist_item list + | Flow2 of blklist_item list + | EndLI + | EndDT + | EndDD + | Preformatted of HTML.block + | optCaption of HTML.caption option + | TableRowList of HTML.tr list + | TableRow of HTML.tr + | TableCellList of HTML.table_cell list + | TableCell of HTML.table_cell + | TextList of HTML.text + | TextList' of HTML.text list + | TextWOScript of HTML.text + | Text of HTML.text + | Font of HTML.text + | Phrase of HTML.text + | Special of HTML.text + | AreaList of HTML.area list + | Form of HTML.text + | OptionList of HTML.select_option list + | EndOPTION + | PCData of HTML.pcdata + | PCDataList of HTML.pcdata list + | PCDataElem of HTML.pcdata + +%term EOF + (* tags in alphabetical order *) + | START_A of HTMLAttrVals.attrs | END_A + | START_ADDRESS | END_ADDRESS + | START_APPLET of HTMLAttrVals.attrs | END_APPLET + | TAG_AREA of HTMLAttrVals.attrs + | START_B | END_B + | TAG_BASE of HTMLAttrVals.attrs + | START_BIG | END_BIG + | START_BLOCKQUOTE | END_BLOCKQUOTE + | START_BODY of HTMLAttrVals.attrs | END_BODY + | TAG_BR of HTMLAttrVals.attrs + | START_CAPTION of HTMLAttrVals.attrs | END_CAPTION + | START_CENTER | END_CENTER + | START_CITE | END_CITE + | START_CODE | END_CODE + | START_DD | END_DD + | START_DFN | END_DFN + | START_DIR of HTMLAttrVals.attrs | END_DIR + | START_DIV of HTMLAttrVals.attrs | END_DIV + | START_DL of HTMLAttrVals.attrs | END_DL + | START_DT | END_DT + | START_EM | END_EM + | START_FONT of HTMLAttrVals.attrs | END_FONT + | START_BASEFONT of HTMLAttrVals.attrs | END_BASEFONT + | START_FORM of HTMLAttrVals.attrs | END_FORM + | START_H1 of HTMLAttrVals.attrs | END_H1 + | START_H2 of HTMLAttrVals.attrs | END_H2 + | START_H3 of HTMLAttrVals.attrs | END_H3 + | START_H4 of HTMLAttrVals.attrs | END_H4 + | START_H5 of HTMLAttrVals.attrs | END_H5 + | START_H6 of HTMLAttrVals.attrs | END_H6 + | START_HEAD | END_HEAD + | TAG_HR of HTMLAttrVals.attrs + | START_HTML | END_HTML + | START_I | END_I + | TAG_IMG of HTMLAttrVals.attrs + | TAG_INPUT of HTMLAttrVals.attrs + | TAG_ISINDEX of HTMLAttrVals.attrs + | START_KBD | END_KBD + | START_LI of HTMLAttrVals.attrs | END_LI + | TAG_LINK of HTMLAttrVals.attrs + | START_MAP of HTMLAttrVals.attrs | END_MAP + | START_MENU of HTMLAttrVals.attrs | END_MENU + | TAG_META of HTMLAttrVals.attrs + | START_OL of HTMLAttrVals.attrs | END_OL + | START_OPTION of HTMLAttrVals.attrs | END_OPTION + | START_P of HTMLAttrVals.attrs | END_P + | TAG_PARAM of HTMLAttrVals.attrs + | START_PRE of HTMLAttrVals.attrs | END_PRE + | START_SAMP | END_SAMP + | START_SCRIPT | END_SCRIPT + | START_SELECT of HTMLAttrVals.attrs | END_SELECT + | START_SMALL | END_SMALL + | START_STRIKE | END_STRIKE + | START_STRONG | END_STRONG + | START_STYLE | END_STYLE + | START_SUB | END_SUB + | START_SUP | END_SUP + | START_TABLE of HTMLAttrVals.attrs | END_TABLE + | START_TD of HTMLAttrVals.attrs | END_TD + | START_TEXTAREA of HTMLAttrVals.attrs | END_TEXTAREA + | START_TH of HTMLAttrVals.attrs | END_TH + | START_TITLE | END_TITLE + | START_TR of HTMLAttrVals.attrs | END_TR + | START_TT | END_TT + | START_U | END_U + | START_UL of HTMLAttrVals.attrs | END_UL + | START_VAR | END_VAR + (* raw text data *) + | PCDATA of string + | CHAR_REF of string (* &#dd; *) + | ENTITY_REF of string (* &#name; *) + +%pure +%pos int +%name HTML +%start Document + +%header ( + functor HTMLLrValsFn ( + structure Token : TOKEN + structure HTMLAttrs : HTML_ATTRS)) + +%arg (ctx) : int -> HTMLAttrs.context + +%eop EOF +%noshift EOF (* avoids infinite loop in error recovery *) + +(** Some error-correction support **) +%value START_A ([]) +%value START_APPLET ([ + ("CODE", HTMLAttrs.NAME ""), + ("WIDTH", HTMLAttrs.NAME ""), + ("HEIGHT", HTMLAttrs.NAME "") + ]) +%value TAG_AREA ([("ALT", HTMLAttrs.NAME "")]) +%value TAG_BASE ([("URL", HTMLAttrs.NAME "")]) +%value START_BODY ([]) +%value TAG_BR ([]) +%value START_CAPTION ([]) +%value START_DIR ([]) +%value START_DIV ([]) +%value START_DL ([]) +%value START_FONT ([]) +%value START_BASEFONT ([]) +%value START_FORM ([]) +%value START_H1 ([]) +%value START_H2 ([]) +%value START_H3 ([]) +%value START_H4 ([]) +%value START_H5 ([]) +%value START_H6 ([]) +%value TAG_HR ([]) +%value TAG_IMG ([("SRC", HTMLAttrs.NAME "")]) +%value TAG_INPUT ([]) +%value TAG_ISINDEX ([]) +%value TAG_LINK ([]) +%value START_MAP ([]) +%value START_MENU ([]) +%value TAG_META ([("CONTENT", HTMLAttrs.NAME "")]) +%value START_OL ([]) +%value START_OPTION ([]) +%value START_P ([]) +%value TAG_PARAM ([("NAME", HTMLAttrs.NAME "")]) +%value START_PRE ([]) +%value START_SELECT ([("NAME", HTMLAttrs.NAME "")]) +%value START_TABLE ([]) +%value START_TD ([]) +%value START_TEXTAREA ([ + ("NAME", HTMLAttrs.NAME ""), + ("ROWS", HTMLAttrs.NAME "0"), + ("COLS", HTMLAttrs.NAME "0") + ]) +%value START_TH ([]) +%value START_TR ([]) +%value START_UL ([]) + +%% + +Document + : StartHTML Head Body EndHTML + (HTML.HTML{version=StartHTML, head=Head, body=Body}) + +StartHTML + : (* empty *) (NONE) + | START_HTML (NONE) + +EndHTML + : (* empty *) () + | END_HTML () + + +(*** Head markup ***) + +Head + : StartHEAD HeadContents EndHEAD + (HeadContents) + +StartHEAD + : (* empty *) () + | START_HEAD () +EndHEAD + : (* empty *) () + | END_HEAD () + +HeadContents + : HeadElements START_TITLE PCData END_TITLE HeadElements + (HeadElements1 @ (HTML.Head_TITLE PCData :: HeadElements2)) + +HeadElements + : (* empty *) + ([]) + | HeadElement HeadElements + (HeadElement :: HeadElements) + +HeadElement + : TAG_META + (HTMLAttrs.mkMETA(ctx TAG_METAleft, TAG_META)) + | TAG_LINK + (HTMLAttrs.mkLINK(ctx TAG_LINKleft, TAG_LINK)) + | TAG_ISINDEX + (let val stuff = + HTMLAttrs.mkISINDEX (ctx TAG_ISINDEXleft, TAG_ISINDEX) + in HTML.Head_ISINDEX stuff end + ) + | TAG_BASE + (HTMLAttrs.mkBASE(ctx TAG_BASEleft, TAG_BASE)) + | START_STYLE PCData END_STYLE + (HTML.Head_STYLE(PCData)) + | START_SCRIPT PCData END_SCRIPT + (HTML.Head_SCRIPT(PCData)) + + +(*** Body content ***) + +Body + : BodyContent0 EndBODY + (BodyContent0) + +EndBODY + : (* empty *) () + | END_BODY () + +BodyContent + : BodyContent1 + (mkBlock BodyContent1) + +BodyContent0 + : START_BODY BodyContent + (HTMLAttrs.mkBODY(ctx START_BODYleft, START_BODY, BodyContent)) + | TextWOScript BodyContent1 + (mkBody(consText(TextWOScript, BodyContent1))) + | BodyElement BodyContent1 + (mkBody(consBlock(BodyElement, BodyContent1))) + | BlockWOIndex BodyContent1 + (mkBody(consBlock(BlockWOIndex, BodyContent1))) + | Paragraph END_P BodyContent1 + (mkBody(consBlock(Paragraph, BodyContent1))) + | Paragraph BodyContent2 + (mkBody(consBlock(Paragraph, BodyContent2))) + +BodyContent1 + : (* empty *) + ([]) + | Text BodyContent1 + (consText(Text, BodyContent1)) + | BodyElement BodyContent1 + (consBlock(BodyElement, BodyContent1)) + | Block BodyContent1 + (consBlock(Block, BodyContent1)) + | Paragraph END_P BodyContent1 + (consBlock(Paragraph, BodyContent1)) + | Paragraph BodyContent2 + (consBlock(Paragraph, BodyContent2)) + +BodyContent2 + : (* empty *) + ([]) + | BodyElement BodyContent1 + (consBlock(BodyElement, BodyContent1)) + | Block BodyContent1 + (consBlock(Block, BodyContent1)) + | Paragraph END_P BodyContent1 + (consBlock(Paragraph, BodyContent1)) + | Paragraph BodyContent2 + (consBlock(Paragraph, BodyContent2)) + +BodyElement + : START_H1 TextList END_H1 + (HTMLAttrs.mkHn(1, ctx START_H1left, START_H1, TextList)) + | START_H2 TextList END_H2 + (HTMLAttrs.mkHn(2, ctx START_H2left, START_H2, TextList)) + | START_H3 TextList END_H3 + (HTMLAttrs.mkHn(3, ctx START_H3left, START_H3, TextList)) + | START_H4 TextList END_H4 + (HTMLAttrs.mkHn(4, ctx START_H4left, START_H4, TextList)) + | START_H5 TextList END_H5 + (HTMLAttrs.mkHn(5, ctx START_H5left, START_H5, TextList)) + | START_H6 TextList END_H6 + (HTMLAttrs.mkHn(6, ctx START_H6left, START_H6, TextList)) + | START_ADDRESS AddressContent1 END_ADDRESS + (HTML.ADDRESS(mkBlock AddressContent1)) + +AddressContent1 + : (* empty *) + ([]) + | Text AddressContent1 + (consText(Text, AddressContent1)) + | Paragraph END_P AddressContent1 + (consBlock(Paragraph, AddressContent1)) + | Paragraph AddressContent2 + (consBlock(Paragraph, AddressContent2)) + +AddressContent2 + : (* empty *) + ([]) + | Paragraph END_P AddressContent1 + (consBlock(Paragraph, AddressContent1)) + | Paragraph AddressContent2 + (consBlock(Paragraph, AddressContent2)) + +(*** Block ***) + +BlockWOIndex + : List + (List) + | Preformatted + (Preformatted) + | START_DIV BodyContent END_DIV + (HTMLAttrs.mkDIV(ctx START_DIVleft, START_DIV, BodyContent)) + | START_CENTER BodyContent END_CENTER + (HTML.CENTER BodyContent) + | START_BLOCKQUOTE BodyContent END_BLOCKQUOTE + (HTML.BLOCKQUOTE BodyContent) + | START_FORM BodyContent END_FORM + (HTMLAttrs.mkFORM(ctx START_FORMleft, START_FORM, BodyContent)) + | TAG_HR + (HTMLAttrs.mkHR(ctx TAG_HRleft, TAG_HR)) + | START_TABLE optCaption TableRowList END_TABLE + (HTMLAttrs.mkTABLE( + ctx START_TABLEleft, START_TABLE, + {caption = optCaption, body = TableRowList}) + ) + +Block + : BlockWOIndex + (BlockWOIndex) + | TAG_ISINDEX + (let val stuff = + HTMLAttrs.mkISINDEX (ctx TAG_ISINDEXleft, TAG_ISINDEX) + in HTML.ISINDEX stuff end + ) + +Paragraph + : START_P TextList + (HTMLAttrs.mkP(ctx START_Pleft, START_P, TextList)) + +List + : START_UL ListItemList END_UL + (HTMLAttrs.mkUL(ctx START_ULleft, START_UL, ListItemList)) + | START_OL ListItemList END_OL + (HTMLAttrs.mkOL(ctx START_OLleft, START_OL, ListItemList)) + | START_DIR ListItemList END_DIR + (HTMLAttrs.mkDIR(ctx START_DIRleft, START_DIR, ListItemList)) + | START_MENU ListItemList END_MENU + (HTMLAttrs.mkMENU(ctx START_MENUleft, START_MENU, ListItemList)) + | START_DL DLItemList END_DL + (HTMLAttrs.mkDL( + ctx START_DLleft, START_DL, + groupDefListContents DLItemList) + ) + +ListItemList + : (* empty *) + ([]) + | ListItem ListItemList + (ListItem :: ListItemList) + +ListItem + : START_LI Flow1 EndLI + (HTMLAttrs.mkLI(ctx START_LIleft, START_LI, mkBlock Flow1)) + +DLItemList + : (* empty *) + ([]) + | DLItem DLItemList + (DLItem :: DLItemList) + +DLItem + : START_DT TextList EndDT + (DL_tag TextList) + | START_DD Flow1 EndDD + (DL_item(mkBlock Flow1)) + +Flow1 + : (* empty *) + ([]) + | Text Flow1 + (consText(Text, Flow1)) + | Block Flow1 + (consBlock(Block, Flow1)) + | Paragraph END_P Flow1 + (consBlock(Paragraph, Flow1)) + | Paragraph Flow2 + (consBlock(Paragraph, Flow2)) + +Flow2 + : (* empty *) + ([]) + | Block Flow1 + (consBlock(Block, Flow1)) + | Paragraph END_P Flow1 + (consBlock(Paragraph, Flow1)) + | Paragraph Flow2 + (consBlock(Paragraph, Flow2)) + +EndLI + : (* empty *) () + | END_LI () +EndDT + : (* empty *) () + | END_DT () +EndDD + : (* empty *) () + | END_DD () + +Preformatted + : START_PRE TextList END_PRE + (HTMLAttrs.mkPRE(ctx START_PREleft, START_PRE, TextList)) + +(*** Tables ***) + +optCaption + : (* empty *) + (NONE) + | START_CAPTION TextList END_CAPTION + (SOME(HTMLAttrs.mkCAPTION( + ctx START_CAPTIONleft, START_CAPTION, TextList))) + +TableRowList + : TableRow + ([TableRow]) + | TableRow TableRowList + (TableRow :: TableRowList) + +TableRow + : START_TR TableCellList + (HTMLAttrs.mkTR(ctx START_TRleft, START_TR, TableCellList)) + | START_TR TableCellList END_TR + (HTMLAttrs.mkTR(ctx START_TRleft, START_TR, TableCellList)) + +TableCellList + : TableCell + ([TableCell]) + | TableCell TableCellList + (TableCell :: TableCellList) + +TableCell + : START_TH BodyContent END_TH + (HTMLAttrs.mkTH(ctx START_THleft, START_TH, BodyContent)) + | START_TH BodyContent + (HTMLAttrs.mkTH(ctx START_THleft, START_TH, BodyContent)) + | START_TD BodyContent END_TD + (HTMLAttrs.mkTD(ctx START_TDleft, START_TD, BodyContent)) + | START_TD BodyContent + (HTMLAttrs.mkTD(ctx START_TDleft, START_TD, BodyContent)) + +(*** Text ***) + +TextList + : TextList' + (textList TextList') + +TextList' + : (* empty *) + ([]) + | Text TextList' + (Text :: TextList') + +TextWOScript + : PCDataElem + (HTML.PCDATA PCDataElem) + | Font + (Font) + | Phrase + (Phrase) + | Special + (Special) + | Form + (Form) + +Text + : TextWOScript + (TextWOScript) + | START_SCRIPT PCData END_SCRIPT + (HTML.SCRIPT PCData) + +Font + : START_TT TextList END_TT + (HTML.TT(TextList)) + | START_I TextList END_I + (HTML.I(TextList)) + | START_B TextList END_B + (HTML.B(TextList)) + | START_U TextList END_U + (HTML.U(TextList)) + | START_STRIKE TextList END_STRIKE + (HTML.STRIKE(TextList)) + | START_BIG TextList END_BIG + (HTML.BIG(TextList)) + | START_SMALL TextList END_SMALL + (HTML.SMALL(TextList)) + | START_SUB TextList END_SUB + (HTML.SUB(TextList)) + | START_SUP TextList END_SUP + (HTML.SUP(TextList)) +Phrase + : START_EM TextList END_EM + (HTML.EM(TextList)) + | START_STRONG TextList END_STRONG + (HTML.STRONG(TextList)) + | START_CODE TextList END_CODE + (HTML.CODE(TextList)) + | START_DFN TextList END_DFN + (HTML.DFN(TextList)) + | START_SAMP TextList END_SAMP + (HTML.SAMP(TextList)) + | START_KBD TextList END_KBD + (HTML.KBD(TextList)) + | START_VAR TextList END_VAR + (HTML.VAR(TextList)) + | START_CITE TextList END_CITE + (HTML.CITE(TextList)) + +Special + : START_A TextList END_A + (HTMLAttrs.mkA(ctx START_Aleft, START_A, TextList)) + | TAG_IMG + (HTMLAttrs.mkIMG(ctx TAG_IMGleft, TAG_IMG)) + | START_APPLET TextList END_APPLET + (HTMLAttrs.mkAPPLET(ctx START_APPLETleft, START_APPLET, TextList)) + | START_FONT TextList END_FONT + (HTMLAttrs.mkFONT(ctx START_FONTleft, START_FONT, TextList)) + | START_BASEFONT TextList END_BASEFONT + (HTMLAttrs.mkBASEFONT( + ctx START_BASEFONTleft, START_BASEFONT, TextList) + ) + | TAG_BR + (HTMLAttrs.mkBR(ctx TAG_BRleft, TAG_BR)) + | START_MAP AreaList END_MAP + (HTMLAttrs.mkMAP(ctx START_MAPleft, START_MAP, AreaList)) + | TAG_PARAM + (HTMLAttrs.mkPARAM(ctx TAG_PARAMleft, TAG_PARAM)) + +AreaList + : (* empty *) + ([]) + | TAG_AREA AreaList + (HTMLAttrs.mkAREA(ctx TAG_AREAleft, TAG_AREA) :: AreaList) + +Form + : TAG_INPUT + (HTMLAttrs.mkINPUT(ctx TAG_INPUTleft, TAG_INPUT)) + | START_SELECT OptionList END_SELECT + (HTMLAttrs.mkSELECT(ctx START_SELECTleft, START_SELECT, OptionList)) + | START_TEXTAREA PCData END_TEXTAREA + (HTMLAttrs.mkTEXTAREA( + ctx START_TEXTAREAleft, START_TEXTAREA, + PCData) + ) + +OptionList + : (* empty *) + ([]) + | START_OPTION PCData EndOPTION OptionList + (HTMLAttrs.mkOPTION(ctx START_OPTIONleft, START_OPTION, PCData) + :: OptionList + ) + +EndOPTION + : (* empty *) + () + | END_OPTION + () + + +(*** PCDATA list ***) + +PCData + : PCDataList + (concat PCDataList) + +PCDataList + : (* empty *) + ([]) + | PCDataElem PCDataList + (PCDataElem :: PCDataList) + +PCDataElem + : PCDATA + (PCDATA) + | CHAR_REF + (CHAR_REF) + | ENTITY_REF + (ENTITY_REF) + diff --git a/smlnj-lib/HTML/html-gram.sig b/smlnj-lib/HTML/html-gram.sig new file mode 100644 index 0000000..c61b473 --- /dev/null +++ b/smlnj-lib/HTML/html-gram.sig @@ -0,0 +1,140 @@ +signature HTML_TOKENS = +sig +type ('a,'b) token +type svalue +val ENTITY_REF: (string) * 'a * 'a -> (svalue,'a) token +val CHAR_REF: (string) * 'a * 'a -> (svalue,'a) token +val PCDATA: (string) * 'a * 'a -> (svalue,'a) token +val END_VAR: 'a * 'a -> (svalue,'a) token +val START_VAR: 'a * 'a -> (svalue,'a) token +val END_UL: 'a * 'a -> (svalue,'a) token +val START_UL: (HTMLAttrVals.attrs) * 'a * 'a -> (svalue,'a) token +val END_U: 'a * 'a -> (svalue,'a) token +val START_U: 'a * 'a -> (svalue,'a) token +val END_TT: 'a * 'a -> (svalue,'a) token +val START_TT: 'a * 'a -> (svalue,'a) token +val END_TR: 'a * 'a -> (svalue,'a) token +val START_TR: (HTMLAttrVals.attrs) * 'a * 'a -> (svalue,'a) token +val END_TITLE: 'a * 'a -> (svalue,'a) token +val START_TITLE: 'a * 'a -> (svalue,'a) token +val END_TH: 'a * 'a -> (svalue,'a) token +val START_TH: (HTMLAttrVals.attrs) * 'a * 'a -> (svalue,'a) token +val END_TEXTAREA: 'a * 'a -> (svalue,'a) token +val START_TEXTAREA: (HTMLAttrVals.attrs) * 'a * 'a -> (svalue,'a) token +val END_TD: 'a * 'a -> (svalue,'a) token +val START_TD: (HTMLAttrVals.attrs) * 'a * 'a -> (svalue,'a) token +val END_TABLE: 'a * 'a -> (svalue,'a) token +val START_TABLE: (HTMLAttrVals.attrs) * 'a * 'a -> (svalue,'a) token +val END_SUP: 'a * 'a -> (svalue,'a) token +val START_SUP: 'a * 'a -> (svalue,'a) token +val END_SUB: 'a * 'a -> (svalue,'a) token +val START_SUB: 'a * 'a -> (svalue,'a) token +val END_STYLE: 'a * 'a -> (svalue,'a) token +val START_STYLE: 'a * 'a -> (svalue,'a) token +val END_STRONG: 'a * 'a -> (svalue,'a) token +val START_STRONG: 'a * 'a -> (svalue,'a) token +val END_STRIKE: 'a * 'a -> (svalue,'a) token +val START_STRIKE: 'a * 'a -> (svalue,'a) token +val END_SMALL: 'a * 'a -> (svalue,'a) token +val START_SMALL: 'a * 'a -> (svalue,'a) token +val END_SELECT: 'a * 'a -> (svalue,'a) token +val START_SELECT: (HTMLAttrVals.attrs) * 'a * 'a -> (svalue,'a) token +val END_SCRIPT: 'a * 'a -> (svalue,'a) token +val START_SCRIPT: 'a * 'a -> (svalue,'a) token +val END_SAMP: 'a * 'a -> (svalue,'a) token +val START_SAMP: 'a * 'a -> (svalue,'a) token +val END_PRE: 'a * 'a -> (svalue,'a) token +val START_PRE: (HTMLAttrVals.attrs) * 'a * 'a -> (svalue,'a) token +val TAG_PARAM: (HTMLAttrVals.attrs) * 'a * 'a -> (svalue,'a) token +val END_P: 'a * 'a -> (svalue,'a) token +val START_P: (HTMLAttrVals.attrs) * 'a * 'a -> (svalue,'a) token +val END_OPTION: 'a * 'a -> (svalue,'a) token +val START_OPTION: (HTMLAttrVals.attrs) * 'a * 'a -> (svalue,'a) token +val END_OL: 'a * 'a -> (svalue,'a) token +val START_OL: (HTMLAttrVals.attrs) * 'a * 'a -> (svalue,'a) token +val TAG_META: (HTMLAttrVals.attrs) * 'a * 'a -> (svalue,'a) token +val END_MENU: 'a * 'a -> (svalue,'a) token +val START_MENU: (HTMLAttrVals.attrs) * 'a * 'a -> (svalue,'a) token +val END_MAP: 'a * 'a -> (svalue,'a) token +val START_MAP: (HTMLAttrVals.attrs) * 'a * 'a -> (svalue,'a) token +val TAG_LINK: (HTMLAttrVals.attrs) * 'a * 'a -> (svalue,'a) token +val END_LI: 'a * 'a -> (svalue,'a) token +val START_LI: (HTMLAttrVals.attrs) * 'a * 'a -> (svalue,'a) token +val END_KBD: 'a * 'a -> (svalue,'a) token +val START_KBD: 'a * 'a -> (svalue,'a) token +val TAG_ISINDEX: (HTMLAttrVals.attrs) * 'a * 'a -> (svalue,'a) token +val TAG_INPUT: (HTMLAttrVals.attrs) * 'a * 'a -> (svalue,'a) token +val TAG_IMG: (HTMLAttrVals.attrs) * 'a * 'a -> (svalue,'a) token +val END_I: 'a * 'a -> (svalue,'a) token +val START_I: 'a * 'a -> (svalue,'a) token +val END_HTML: 'a * 'a -> (svalue,'a) token +val START_HTML: 'a * 'a -> (svalue,'a) token +val TAG_HR: (HTMLAttrVals.attrs) * 'a * 'a -> (svalue,'a) token +val END_HEAD: 'a * 'a -> (svalue,'a) token +val START_HEAD: 'a * 'a -> (svalue,'a) token +val END_H6: 'a * 'a -> (svalue,'a) token +val START_H6: (HTMLAttrVals.attrs) * 'a * 'a -> (svalue,'a) token +val END_H5: 'a * 'a -> (svalue,'a) token +val START_H5: (HTMLAttrVals.attrs) * 'a * 'a -> (svalue,'a) token +val END_H4: 'a * 'a -> (svalue,'a) token +val START_H4: (HTMLAttrVals.attrs) * 'a * 'a -> (svalue,'a) token +val END_H3: 'a * 'a -> (svalue,'a) token +val START_H3: (HTMLAttrVals.attrs) * 'a * 'a -> (svalue,'a) token +val END_H2: 'a * 'a -> (svalue,'a) token +val START_H2: (HTMLAttrVals.attrs) * 'a * 'a -> (svalue,'a) token +val END_H1: 'a * 'a -> (svalue,'a) token +val START_H1: (HTMLAttrVals.attrs) * 'a * 'a -> (svalue,'a) token +val END_FORM: 'a * 'a -> (svalue,'a) token +val START_FORM: (HTMLAttrVals.attrs) * 'a * 'a -> (svalue,'a) token +val END_BASEFONT: 'a * 'a -> (svalue,'a) token +val START_BASEFONT: (HTMLAttrVals.attrs) * 'a * 'a -> (svalue,'a) token +val END_FONT: 'a * 'a -> (svalue,'a) token +val START_FONT: (HTMLAttrVals.attrs) * 'a * 'a -> (svalue,'a) token +val END_EM: 'a * 'a -> (svalue,'a) token +val START_EM: 'a * 'a -> (svalue,'a) token +val END_DT: 'a * 'a -> (svalue,'a) token +val START_DT: 'a * 'a -> (svalue,'a) token +val END_DL: 'a * 'a -> (svalue,'a) token +val START_DL: (HTMLAttrVals.attrs) * 'a * 'a -> (svalue,'a) token +val END_DIV: 'a * 'a -> (svalue,'a) token +val START_DIV: (HTMLAttrVals.attrs) * 'a * 'a -> (svalue,'a) token +val END_DIR: 'a * 'a -> (svalue,'a) token +val START_DIR: (HTMLAttrVals.attrs) * 'a * 'a -> (svalue,'a) token +val END_DFN: 'a * 'a -> (svalue,'a) token +val START_DFN: 'a * 'a -> (svalue,'a) token +val END_DD: 'a * 'a -> (svalue,'a) token +val START_DD: 'a * 'a -> (svalue,'a) token +val END_CODE: 'a * 'a -> (svalue,'a) token +val START_CODE: 'a * 'a -> (svalue,'a) token +val END_CITE: 'a * 'a -> (svalue,'a) token +val START_CITE: 'a * 'a -> (svalue,'a) token +val END_CENTER: 'a * 'a -> (svalue,'a) token +val START_CENTER: 'a * 'a -> (svalue,'a) token +val END_CAPTION: 'a * 'a -> (svalue,'a) token +val START_CAPTION: (HTMLAttrVals.attrs) * 'a * 'a -> (svalue,'a) token +val TAG_BR: (HTMLAttrVals.attrs) * 'a * 'a -> (svalue,'a) token +val END_BODY: 'a * 'a -> (svalue,'a) token +val START_BODY: (HTMLAttrVals.attrs) * 'a * 'a -> (svalue,'a) token +val END_BLOCKQUOTE: 'a * 'a -> (svalue,'a) token +val START_BLOCKQUOTE: 'a * 'a -> (svalue,'a) token +val END_BIG: 'a * 'a -> (svalue,'a) token +val START_BIG: 'a * 'a -> (svalue,'a) token +val TAG_BASE: (HTMLAttrVals.attrs) * 'a * 'a -> (svalue,'a) token +val END_B: 'a * 'a -> (svalue,'a) token +val START_B: 'a * 'a -> (svalue,'a) token +val TAG_AREA: (HTMLAttrVals.attrs) * 'a * 'a -> (svalue,'a) token +val END_APPLET: 'a * 'a -> (svalue,'a) token +val START_APPLET: (HTMLAttrVals.attrs) * 'a * 'a -> (svalue,'a) token +val END_ADDRESS: 'a * 'a -> (svalue,'a) token +val START_ADDRESS: 'a * 'a -> (svalue,'a) token +val END_A: 'a * 'a -> (svalue,'a) token +val START_A: (HTMLAttrVals.attrs) * 'a * 'a -> (svalue,'a) token +val EOF: 'a * 'a -> (svalue,'a) token +end +signature HTML_LRVALS= +sig +structure Tokens : HTML_TOKENS +structure ParserData:PARSER_DATA +sharing type ParserData.Token.token = Tokens.token +sharing type ParserData.svalue = Tokens.svalue +end diff --git a/smlnj-lib/HTML/html-gram.sml b/smlnj-lib/HTML/html-gram.sml new file mode 100644 index 0000000..5d220fb --- /dev/null +++ b/smlnj-lib/HTML/html-gram.sml @@ -0,0 +1,2477 @@ + + + functor HTMLLrValsFn ( + structure Token : TOKEN + structure HTMLAttrs : HTML_ATTRS) = +struct +structure ParserData= +struct +structure Header = +struct +(* html-gram + * + * COPYRIGHT (c) 1996 AT&T Research. + * + * This grammar parses HTML 3.2. Note that it does not enforce exclusions + * (for the content of FORM, PRE, etc). Exclusions should be enforced as + * a second pass over the parse tree. + *) + +fun textList [text] = text + | textList l = HTML.TextList l + +fun blockList [blk] = blk + | blockList l = HTML.BlockList l + +fun textBlock l = HTML.TextBlock(textList l) + +(* The elements of a definition list (<DL>) are tags (<DT>) and items (<DD>). + * To avoid shift/reduce problems, we parse them and then group them. + *) +datatype deflist_item + = DL_tag of HTML.text + | DL_item of HTML.block + +fun groupDefListContents [] = [] + | groupDefListContents (h :: t) = let + fun gdlc (DL_tag tag, []) = ({dt=[tag], dd=HTML.BlockList[]}, []) + | gdlc (DL_tag tag, h :: t) = let + val ({dt, dd}, r) = gdlc (h, t) + in + ({dt = tag :: dt, dd = dd}, r) + end + | gdlc (DL_item blk, r) = ({dt=[],dd=blk}, groupDefListContents r) + in + op :: (gdlc (h, t)) + end + +(* A list of Text, paragraphs and blocks requires grouping the Text items and + * making an implicit paragraph. We cannot directly use TextList because of + * conflicts. + *) +datatype blklist_item + = BL_text of HTML.text list + | BL_block of HTML.block list + +fun consText (txt, BL_text tl :: r) = BL_text(txt::tl) :: r + | consText (txt, l) = BL_text[txt] :: l + +fun consBlock (blk, BL_block bl :: r) = BL_block(blk::bl) :: r + | consBlock (blk, l) = BL_block[blk] :: l + +fun mkBlock blks = let + fun f (BL_text tl) = textBlock tl + | f (BL_block bl) = blockList bl + in + blockList(List.map f blks) + end + +fun mkBody blks = HTML.BODY{ + background = NONE, + bgcolor = NONE, + text = NONE, + link = NONE, + vlink = NONE, + alink = NONE, + content = mkBlock blks + } + + +end +structure LrTable = Token.LrTable +structure Token = Token +local open LrTable in +val table=let val actionRows = +"\ +\\001\000\001\000\000\000\000\000\ +\\001\000\002\000\081\000\004\000\080\000\006\000\079\000\009\000\078\000\ +\\012\000\077\000\014\000\076\000\016\000\075\000\018\000\074\000\ +\\021\000\073\000\023\000\072\000\025\000\071\000\029\000\070\000\ +\\031\000\069\000\033\000\068\000\035\000\067\000\039\000\066\000\ +\\041\000\065\000\043\000\064\000\045\000\063\000\047\000\062\000\ +\\049\000\061\000\051\000\060\000\053\000\059\000\055\000\058\000\ +\\057\000\057\000\061\000\056\000\064\000\055\000\066\000\054\000\ +\\067\000\053\000\069\000\052\000\074\000\051\000\076\000\050\000\ +\\079\000\049\000\083\000\048\000\085\000\047\000\086\000\046\000\ +\\088\000\045\000\092\000\044\000\094\000\043\000\096\000\042\000\ +\\098\000\041\000\102\000\040\000\104\000\039\000\106\000\038\000\ +\\110\000\037\000\118\000\036\000\120\000\035\000\122\000\034\000\ +\\124\000\033\000\126\000\032\000\127\000\031\000\128\000\030\000\000\000\ +\\001\000\003\000\241\000\000\000\ +\\001\000\005\000\240\000\000\000\ +\\001\000\007\000\235\000\000\000\ +\\001\000\010\000\234\000\000\000\ +\\001\000\013\000\233\000\000\000\ +\\001\000\015\000\232\000\000\000\ +\\001\000\020\000\004\001\000\000\ +\\001\000\022\000\231\000\000\000\ +\\001\000\024\000\230\000\000\000\ +\\001\000\026\000\229\000\000\000\ +\\001\000\030\000\228\000\000\000\ +\\001\000\032\000\227\000\000\000\ +\\001\000\034\000\226\000\000\000\ +\\001\000\036\000\223\000\000\000\ +\\001\000\040\000\221\000\000\000\ +\\001\000\042\000\220\000\000\000\ +\\001\000\044\000\219\000\000\000\ +\\001\000\046\000\218\000\000\000\ +\\001\000\048\000\217\000\000\000\ +\\001\000\050\000\216\000\000\000\ +\\001\000\052\000\215\000\000\000\ +\\001\000\054\000\214\000\000\000\ +\\001\000\056\000\213\000\000\000\ +\\001\000\058\000\212\000\000\000\ +\\001\000\065\000\211\000\000\000\ +\\001\000\070\000\210\000\000\000\ +\\001\000\075\000\208\000\000\000\ +\\001\000\077\000\207\000\000\000\ +\\001\000\080\000\206\000\000\000\ +\\001\000\087\000\205\000\000\000\ +\\001\000\089\000\204\000\000\000\ +\\001\000\091\000\170\000\000\000\ +\\001\000\091\000\244\000\000\000\ +\\001\000\093\000\202\000\000\000\ +\\001\000\095\000\201\000\000\000\ +\\001\000\097\000\200\000\000\000\ +\\001\000\099\000\199\000\000\000\ +\\001\000\101\000\169\000\000\000\ +\\001\000\103\000\198\000\000\000\ +\\001\000\105\000\197\000\000\000\ +\\001\000\107\000\255\000\000\000\ +\\001\000\108\000\003\001\112\000\002\001\000\000\ +\\001\000\111\000\192\000\000\000\ +\\001\000\114\000\083\000\000\000\ +\\001\000\115\000\242\000\000\000\ +\\001\000\116\000\195\000\000\000\ +\\001\000\119\000\191\000\000\000\ +\\001\000\121\000\190\000\000\000\ +\\001\000\123\000\185\000\000\000\ +\\001\000\125\000\183\000\000\000\ +\\029\001\000\000\ +\\030\001\062\000\004\000\000\000\ +\\031\001\000\000\ +\\032\001\063\000\109\000\000\000\ +\\033\001\000\000\ +\\034\001\000\000\ +\\035\001\059\000\007\000\000\000\ +\\036\001\000\000\ +\\037\001\060\000\085\000\000\000\ +\\038\001\000\000\ +\\039\001\000\000\ +\\040\001\011\000\016\000\068\000\015\000\073\000\014\000\078\000\013\000\ +\\090\000\012\000\100\000\011\000\000\000\ +\\041\001\000\000\ +\\042\001\000\000\ +\\043\001\000\000\ +\\044\001\000\000\ +\\045\001\000\000\ +\\046\001\000\000\ +\\047\001\000\000\ +\\048\001\000\000\ +\\049\001\017\000\107\000\000\000\ +\\050\001\000\000\ +\\051\001\000\000\ +\\052\001\000\000\ +\\053\001\000\000\ +\\054\001\000\000\ +\\055\001\000\000\ +\\056\001\000\000\ +\\057\001\000\000\ +\\058\001\002\000\081\000\004\000\080\000\006\000\079\000\009\000\078\000\ +\\012\000\077\000\014\000\076\000\018\000\074\000\021\000\073\000\ +\\023\000\072\000\025\000\071\000\029\000\070\000\031\000\069\000\ +\\033\000\068\000\035\000\067\000\039\000\066\000\041\000\065\000\ +\\043\000\064\000\045\000\063\000\047\000\062\000\049\000\061\000\ +\\051\000\060\000\053\000\059\000\055\000\058\000\057\000\057\000\ +\\061\000\056\000\064\000\055\000\066\000\054\000\067\000\053\000\ +\\068\000\098\000\069\000\052\000\074\000\051\000\076\000\050\000\ +\\079\000\049\000\083\000\048\000\085\000\047\000\086\000\046\000\ +\\088\000\045\000\090\000\097\000\092\000\044\000\094\000\043\000\ +\\096\000\042\000\098\000\041\000\102\000\040\000\104\000\039\000\ +\\106\000\038\000\110\000\037\000\118\000\036\000\120\000\035\000\ +\\122\000\034\000\124\000\033\000\126\000\032\000\127\000\031\000\ +\\128\000\030\000\000\000\ +\\059\001\000\000\ +\\060\001\000\000\ +\\061\001\000\000\ +\\062\001\000\000\ +\\063\001\000\000\ +\\064\001\004\000\080\000\014\000\076\000\021\000\073\000\031\000\069\000\ +\\033\000\068\000\035\000\067\000\045\000\063\000\047\000\062\000\ +\\049\000\061\000\051\000\060\000\053\000\059\000\055\000\058\000\ +\\057\000\057\000\061\000\056\000\068\000\098\000\076\000\050\000\ +\\079\000\049\000\083\000\048\000\084\000\103\000\086\000\046\000\ +\\106\000\038\000\122\000\034\000\000\000\ +\\064\001\004\000\080\000\014\000\076\000\021\000\073\000\031\000\069\000\ +\\033\000\068\000\035\000\067\000\045\000\063\000\047\000\062\000\ +\\049\000\061\000\051\000\060\000\053\000\059\000\055\000\058\000\ +\\057\000\057\000\061\000\056\000\068\000\098\000\076\000\050\000\ +\\079\000\049\000\083\000\048\000\084\000\173\000\086\000\046\000\ +\\106\000\038\000\122\000\034\000\000\000\ +\\064\001\004\000\080\000\014\000\076\000\021\000\073\000\031\000\069\000\ +\\033\000\068\000\035\000\067\000\045\000\063\000\047\000\062\000\ +\\049\000\061\000\051\000\060\000\053\000\059\000\055\000\058\000\ +\\057\000\057\000\061\000\056\000\068\000\098\000\076\000\050\000\ +\\079\000\049\000\083\000\048\000\084\000\178\000\086\000\046\000\ +\\106\000\038\000\122\000\034\000\000\000\ +\\065\001\000\000\ +\\066\001\000\000\ +\\067\001\000\000\ +\\068\001\000\000\ +\\069\001\000\000\ +\\070\001\000\000\ +\\071\001\000\000\ +\\072\001\000\000\ +\\073\001\000\000\ +\\074\001\000\000\ +\\075\001\000\000\ +\\076\001\002\000\081\000\006\000\079\000\009\000\078\000\012\000\077\000\ +\\018\000\074\000\023\000\072\000\025\000\071\000\029\000\070\000\ +\\039\000\066\000\041\000\065\000\043\000\064\000\064\000\055\000\ +\\066\000\054\000\067\000\053\000\069\000\052\000\074\000\051\000\ +\\083\000\048\000\085\000\047\000\088\000\045\000\090\000\097\000\ +\\092\000\044\000\094\000\043\000\096\000\042\000\098\000\041\000\ +\\102\000\040\000\104\000\039\000\110\000\037\000\118\000\036\000\ +\\120\000\035\000\124\000\033\000\126\000\032\000\127\000\031\000\ +\\128\000\030\000\000\000\ +\\077\001\000\000\ +\\078\001\000\000\ +\\079\001\000\000\ +\\080\001\083\000\048\000\084\000\239\000\000\000\ +\\080\001\083\000\048\000\084\000\012\001\000\000\ +\\081\001\000\000\ +\\082\001\000\000\ +\\083\001\000\000\ +\\084\001\000\000\ +\\085\001\000\000\ +\\086\001\000\000\ +\\087\001\000\000\ +\\088\001\000\000\ +\\089\001\000\000\ +\\090\001\000\000\ +\\091\001\000\000\ +\\092\001\000\000\ +\\093\001\000\000\ +\\094\001\000\000\ +\\095\001\000\000\ +\\096\001\000\000\ +\\097\001\000\000\ +\\098\001\000\000\ +\\099\001\071\000\115\000\000\000\ +\\100\001\000\000\ +\\101\001\000\000\ +\\102\001\027\000\151\000\037\000\150\000\000\000\ +\\103\001\000\000\ +\\104\001\000\000\ +\\105\001\000\000\ +\\106\001\002\000\081\000\006\000\079\000\009\000\078\000\012\000\077\000\ +\\014\000\076\000\018\000\074\000\021\000\073\000\023\000\072\000\ +\\025\000\071\000\029\000\070\000\031\000\069\000\033\000\068\000\ +\\035\000\067\000\039\000\066\000\041\000\065\000\043\000\064\000\ +\\045\000\063\000\061\000\056\000\064\000\055\000\066\000\054\000\ +\\067\000\053\000\068\000\098\000\069\000\052\000\074\000\051\000\ +\\076\000\050\000\079\000\049\000\083\000\048\000\085\000\047\000\ +\\086\000\046\000\088\000\045\000\090\000\097\000\092\000\044\000\ +\\094\000\043\000\096\000\042\000\098\000\041\000\102\000\040\000\ +\\104\000\039\000\106\000\038\000\110\000\037\000\118\000\036\000\ +\\120\000\035\000\122\000\034\000\124\000\033\000\126\000\032\000\ +\\127\000\031\000\128\000\030\000\000\000\ +\\107\001\000\000\ +\\108\001\000\000\ +\\109\001\000\000\ +\\110\001\000\000\ +\\111\001\014\000\076\000\021\000\073\000\031\000\069\000\033\000\068\000\ +\\035\000\067\000\045\000\063\000\061\000\056\000\068\000\098\000\ +\\076\000\050\000\079\000\049\000\083\000\048\000\084\000\252\000\ +\\086\000\046\000\106\000\038\000\122\000\034\000\000\000\ +\\111\001\014\000\076\000\021\000\073\000\031\000\069\000\033\000\068\000\ +\\035\000\067\000\045\000\063\000\061\000\056\000\068\000\098\000\ +\\076\000\050\000\079\000\049\000\083\000\048\000\084\000\016\001\ +\\086\000\046\000\106\000\038\000\122\000\034\000\000\000\ +\\112\001\000\000\ +\\113\001\000\000\ +\\114\001\000\000\ +\\115\001\072\000\248\000\000\000\ +\\116\001\000\000\ +\\117\001\038\000\008\001\000\000\ +\\118\001\000\000\ +\\119\001\028\000\010\001\000\000\ +\\120\001\000\000\ +\\121\001\000\000\ +\\122\001\019\000\120\000\000\000\ +\\123\001\000\000\ +\\124\001\116\000\195\000\000\000\ +\\125\001\000\000\ +\\126\001\117\000\020\001\000\000\ +\\127\001\000\000\ +\\128\001\108\000\003\001\112\000\002\001\000\000\ +\\129\001\000\000\ +\\130\001\000\000\ +\\131\001\113\000\026\001\000\000\ +\\132\001\000\000\ +\\133\001\109\000\027\001\000\000\ +\\134\001\000\000\ +\\135\001\002\000\081\000\006\000\079\000\009\000\078\000\012\000\077\000\ +\\018\000\074\000\023\000\072\000\025\000\071\000\029\000\070\000\ +\\039\000\066\000\041\000\065\000\043\000\064\000\064\000\055\000\ +\\066\000\054\000\067\000\053\000\069\000\052\000\074\000\051\000\ +\\085\000\047\000\088\000\045\000\090\000\097\000\092\000\044\000\ +\\094\000\043\000\096\000\042\000\098\000\041\000\102\000\040\000\ +\\104\000\039\000\110\000\037\000\118\000\036\000\120\000\035\000\ +\\124\000\033\000\126\000\032\000\127\000\031\000\128\000\030\000\000\000\ +\\136\001\000\000\ +\\137\001\000\000\ +\\138\001\000\000\ +\\139\001\000\000\ +\\140\001\000\000\ +\\141\001\000\000\ +\\142\001\000\000\ +\\143\001\000\000\ +\\144\001\000\000\ +\\145\001\000\000\ +\\146\001\000\000\ +\\147\001\000\000\ +\\148\001\000\000\ +\\149\001\000\000\ +\\150\001\000\000\ +\\151\001\000\000\ +\\152\001\000\000\ +\\153\001\000\000\ +\\154\001\000\000\ +\\155\001\000\000\ +\\156\001\000\000\ +\\157\001\000\000\ +\\158\001\000\000\ +\\159\001\000\000\ +\\160\001\000\000\ +\\161\001\000\000\ +\\162\001\000\000\ +\\163\001\000\000\ +\\164\001\000\000\ +\\165\001\000\000\ +\\166\001\000\000\ +\\167\001\000\000\ +\\168\001\000\000\ +\\169\001\008\000\134\000\000\000\ +\\170\001\000\000\ +\\171\001\000\000\ +\\172\001\000\000\ +\\173\001\000\000\ +\\174\001\081\000\127\000\000\000\ +\\175\001\000\000\ +\\176\001\082\000\006\001\000\000\ +\\177\001\000\000\ +\\178\001\000\000\ +\\179\001\126\000\032\000\127\000\031\000\128\000\030\000\000\000\ +\\180\001\000\000\ +\\181\001\000\000\ +\\182\001\000\000\ +\\183\001\000\000\ +\" +val actionRowNumbers = +"\053\000\058\000\054\000\063\000\ +\\001\000\059\000\063\000\045\000\ +\\060\000\206\000\206\000\065\000\ +\\066\000\067\000\068\000\164\000\ +\\168\000\167\000\166\000\165\000\ +\\081\000\110\000\109\000\087\000\ +\\081\000\081\000\072\000\055\000\ +\\210\000\209\000\208\000\162\000\ +\\125\000\162\000\162\000\206\000\ +\\149\000\162\000\162\000\162\000\ +\\162\000\162\000\201\000\162\000\ +\\162\000\195\000\162\000\125\000\ +\\125\000\196\000\162\000\198\000\ +\\189\000\162\000\115\000\162\000\ +\\162\000\162\000\162\000\162\000\ +\\162\000\081\000\162\000\162\000\ +\\162\000\128\000\081\000\125\000\ +\\162\000\162\000\162\000\081\000\ +\\193\000\081\000\081\000\162\000\ +\\162\000\162\000\101\000\162\000\ +\\064\000\206\000\057\000\061\000\ +\\206\000\205\000\039\000\033\000\ +\\081\000\169\000\088\000\081\000\ +\\117\000\081\000\076\000\206\000\ +\\118\000\089\000\081\000\081\000\ +\\080\000\081\000\078\000\077\000\ +\\071\000\073\000\052\000\056\000\ +\\162\000\161\000\051\000\125\000\ +\\050\000\132\000\049\000\048\000\ +\\044\000\047\000\162\000\041\000\ +\\040\000\038\000\037\000\036\000\ +\\035\000\206\000\032\000\031\000\ +\\119\000\030\000\029\000\028\000\ +\\196\000\027\000\026\000\025\000\ +\\024\000\023\000\022\000\021\000\ +\\020\000\074\000\019\000\018\000\ +\\017\000\016\000\128\000\015\000\ +\\162\000\132\000\014\000\013\000\ +\\012\000\011\000\010\000\009\000\ +\\075\000\007\000\006\000\005\000\ +\\004\000\101\000\105\000\003\000\ +\\002\000\046\000\207\000\069\000\ +\\070\000\082\000\086\000\081\000\ +\\084\000\083\000\034\000\093\000\ +\\081\000\091\000\090\000\079\000\ +\\163\000\186\000\126\000\120\000\ +\\132\000\142\000\137\000\132\000\ +\\174\000\171\000\200\000\151\000\ +\\042\000\043\000\008\000\179\000\ +\\178\000\181\000\175\000\177\000\ +\\199\000\203\000\184\000\148\000\ +\\121\000\123\000\194\000\197\000\ +\\185\000\172\000\099\000\098\000\ +\\097\000\096\000\095\000\094\000\ +\\114\000\192\000\191\000\180\000\ +\\129\000\124\000\144\000\146\000\ +\\111\000\122\000\183\000\182\000\ +\\187\000\112\000\113\000\176\000\ +\\173\000\190\000\102\000\106\000\ +\\104\000\101\000\100\000\188\000\ +\\063\000\085\000\170\000\092\000\ +\\133\000\127\000\143\000\136\000\ +\\138\000\132\000\132\000\134\000\ +\\152\000\116\000\155\000\153\000\ +\\081\000\081\000\150\000\201\000\ +\\204\000\130\000\145\000\131\000\ +\\147\000\108\000\101\000\103\000\ +\\062\000\141\000\132\000\139\000\ +\\135\000\156\000\154\000\158\000\ +\\160\000\202\000\107\000\140\000\ +\\157\000\159\000\000\000" +val gotoT = +"\ +\\001\000\026\001\002\000\001\000\000\000\ +\\004\000\004\000\005\000\003\000\000\000\ +\\000\000\ +\\007\000\008\000\008\000\007\000\009\000\006\000\000\000\ +\\010\000\027\000\014\000\026\000\017\000\025\000\020\000\024\000\ +\\022\000\023\000\023\000\022\000\033\000\021\000\041\000\020\000\ +\\043\000\019\000\044\000\018\000\045\000\017\000\047\000\016\000\ +\\052\000\015\000\000\000\ +\\000\000\ +\\008\000\080\000\009\000\006\000\000\000\ +\\000\000\ +\\006\000\082\000\000\000\ +\\050\000\086\000\051\000\085\000\052\000\084\000\000\000\ +\\050\000\087\000\051\000\085\000\052\000\084\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\015\000\094\000\017\000\093\000\020\000\092\000\021\000\091\000\ +\\022\000\090\000\023\000\022\000\033\000\021\000\041\000\089\000\ +\\042\000\088\000\043\000\019\000\044\000\018\000\045\000\017\000\ +\\047\000\016\000\052\000\015\000\000\000\ +\\000\000\ +\\000\000\ +\\016\000\100\000\017\000\099\000\020\000\092\000\021\000\098\000\ +\\022\000\097\000\023\000\022\000\033\000\021\000\000\000\ +\\015\000\102\000\017\000\093\000\020\000\092\000\021\000\091\000\ +\\022\000\090\000\023\000\022\000\033\000\021\000\041\000\089\000\ +\\042\000\088\000\043\000\019\000\044\000\018\000\045\000\017\000\ +\\047\000\016\000\052\000\015\000\000\000\ +\\015\000\103\000\017\000\093\000\020\000\092\000\021\000\091\000\ +\\022\000\090\000\023\000\022\000\033\000\021\000\041\000\089\000\ +\\042\000\088\000\043\000\019\000\044\000\018\000\045\000\017\000\ +\\047\000\016\000\052\000\015\000\000\000\ +\\012\000\104\000\000\000\ +\\003\000\106\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\039\000\110\000\040\000\109\000\041\000\089\000\042\000\108\000\ +\\043\000\019\000\044\000\018\000\045\000\017\000\047\000\016\000\ +\\052\000\015\000\000\000\ +\\024\000\112\000\025\000\111\000\000\000\ +\\039\000\114\000\040\000\109\000\041\000\089\000\042\000\108\000\ +\\043\000\019\000\044\000\018\000\045\000\017\000\047\000\016\000\ +\\052\000\015\000\000\000\ +\\039\000\115\000\040\000\109\000\041\000\089\000\042\000\108\000\ +\\043\000\019\000\044\000\018\000\045\000\017\000\047\000\016\000\ +\\052\000\015\000\000\000\ +\\050\000\116\000\051\000\085\000\052\000\084\000\000\000\ +\\034\000\117\000\000\000\ +\\039\000\119\000\040\000\109\000\041\000\089\000\042\000\108\000\ +\\043\000\019\000\044\000\018\000\045\000\017\000\047\000\016\000\ +\\052\000\015\000\000\000\ +\\039\000\120\000\040\000\109\000\041\000\089\000\042\000\108\000\ +\\043\000\019\000\044\000\018\000\045\000\017\000\047\000\016\000\ +\\052\000\015\000\000\000\ +\\039\000\121\000\040\000\109\000\041\000\089\000\042\000\108\000\ +\\043\000\019\000\044\000\018\000\045\000\017\000\047\000\016\000\ +\\052\000\015\000\000\000\ +\\039\000\122\000\040\000\109\000\041\000\089\000\042\000\108\000\ +\\043\000\019\000\044\000\018\000\045\000\017\000\047\000\016\000\ +\\052\000\015\000\000\000\ +\\039\000\123\000\040\000\109\000\041\000\089\000\042\000\108\000\ +\\043\000\019\000\044\000\018\000\045\000\017\000\047\000\016\000\ +\\052\000\015\000\000\000\ +\\048\000\124\000\000\000\ +\\039\000\126\000\040\000\109\000\041\000\089\000\042\000\108\000\ +\\043\000\019\000\044\000\018\000\045\000\017\000\047\000\016\000\ +\\052\000\015\000\000\000\ +\\039\000\127\000\040\000\109\000\041\000\089\000\042\000\108\000\ +\\043\000\019\000\044\000\018\000\045\000\017\000\047\000\016\000\ +\\052\000\015\000\000\000\ +\\000\000\ +\\039\000\128\000\040\000\109\000\041\000\089\000\042\000\108\000\ +\\043\000\019\000\044\000\018\000\045\000\017\000\047\000\016\000\ +\\052\000\015\000\000\000\ +\\024\000\129\000\025\000\111\000\000\000\ +\\024\000\130\000\025\000\111\000\000\000\ +\\046\000\131\000\000\000\ +\\039\000\133\000\040\000\109\000\041\000\089\000\042\000\108\000\ +\\043\000\019\000\044\000\018\000\045\000\017\000\047\000\016\000\ +\\052\000\015\000\000\000\ +\\000\000\ +\\000\000\ +\\039\000\134\000\040\000\109\000\041\000\089\000\042\000\108\000\ +\\043\000\019\000\044\000\018\000\045\000\017\000\047\000\016\000\ +\\052\000\015\000\000\000\ +\\000\000\ +\\039\000\135\000\040\000\109\000\041\000\089\000\042\000\108\000\ +\\043\000\019\000\044\000\018\000\045\000\017\000\047\000\016\000\ +\\052\000\015\000\000\000\ +\\039\000\136\000\040\000\109\000\041\000\089\000\042\000\108\000\ +\\043\000\019\000\044\000\018\000\045\000\017\000\047\000\016\000\ +\\052\000\015\000\000\000\ +\\039\000\137\000\040\000\109\000\041\000\089\000\042\000\108\000\ +\\043\000\019\000\044\000\018\000\045\000\017\000\047\000\016\000\ +\\052\000\015\000\000\000\ +\\039\000\138\000\040\000\109\000\041\000\089\000\042\000\108\000\ +\\043\000\019\000\044\000\018\000\045\000\017\000\047\000\016\000\ +\\052\000\015\000\000\000\ +\\039\000\139\000\040\000\109\000\041\000\089\000\042\000\108\000\ +\\043\000\019\000\044\000\018\000\045\000\017\000\047\000\016\000\ +\\052\000\015\000\000\000\ +\\039\000\140\000\040\000\109\000\041\000\089\000\042\000\108\000\ +\\043\000\019\000\044\000\018\000\045\000\017\000\047\000\016\000\ +\\052\000\015\000\000\000\ +\\013\000\142\000\015\000\141\000\017\000\093\000\020\000\092\000\ +\\021\000\091\000\022\000\090\000\023\000\022\000\033\000\021\000\ +\\041\000\089\000\042\000\088\000\043\000\019\000\044\000\018\000\ +\\045\000\017\000\047\000\016\000\052\000\015\000\000\000\ +\\039\000\143\000\040\000\109\000\041\000\089\000\042\000\108\000\ +\\043\000\019\000\044\000\018\000\045\000\017\000\047\000\016\000\ +\\052\000\015\000\000\000\ +\\039\000\144\000\040\000\109\000\041\000\089\000\042\000\108\000\ +\\043\000\019\000\044\000\018\000\045\000\017\000\047\000\016\000\ +\\052\000\015\000\000\000\ +\\039\000\145\000\040\000\109\000\041\000\089\000\042\000\108\000\ +\\043\000\019\000\044\000\018\000\045\000\017\000\047\000\016\000\ +\\052\000\015\000\000\000\ +\\026\000\147\000\027\000\146\000\000\000\ +\\013\000\150\000\015\000\141\000\017\000\093\000\020\000\092\000\ +\\021\000\091\000\022\000\090\000\023\000\022\000\033\000\021\000\ +\\041\000\089\000\042\000\088\000\043\000\019\000\044\000\018\000\ +\\045\000\017\000\047\000\016\000\052\000\015\000\000\000\ +\\024\000\151\000\025\000\111\000\000\000\ +\\039\000\152\000\040\000\109\000\041\000\089\000\042\000\108\000\ +\\043\000\019\000\044\000\018\000\045\000\017\000\047\000\016\000\ +\\052\000\015\000\000\000\ +\\039\000\153\000\040\000\109\000\041\000\089\000\042\000\108\000\ +\\043\000\019\000\044\000\018\000\045\000\017\000\047\000\016\000\ +\\052\000\015\000\000\000\ +\\039\000\154\000\040\000\109\000\041\000\089\000\042\000\108\000\ +\\043\000\019\000\044\000\018\000\045\000\017\000\047\000\016\000\ +\\052\000\015\000\000\000\ +\\013\000\155\000\015\000\141\000\017\000\093\000\020\000\092\000\ +\\021\000\091\000\022\000\090\000\023\000\022\000\033\000\021\000\ +\\041\000\089\000\042\000\088\000\043\000\019\000\044\000\018\000\ +\\045\000\017\000\047\000\016\000\052\000\015\000\000\000\ +\\000\000\ +\\013\000\156\000\015\000\141\000\017\000\093\000\020\000\092\000\ +\\021\000\091\000\022\000\090\000\023\000\022\000\033\000\021\000\ +\\041\000\089\000\042\000\088\000\043\000\019\000\044\000\018\000\ +\\045\000\017\000\047\000\016\000\052\000\015\000\000\000\ +\\013\000\157\000\015\000\141\000\017\000\093\000\020\000\092\000\ +\\021\000\091\000\022\000\090\000\023\000\022\000\033\000\021\000\ +\\041\000\089\000\042\000\088\000\043\000\019\000\044\000\018\000\ +\\045\000\017\000\047\000\016\000\052\000\015\000\000\000\ +\\039\000\158\000\040\000\109\000\041\000\089\000\042\000\108\000\ +\\043\000\019\000\044\000\018\000\045\000\017\000\047\000\016\000\ +\\052\000\015\000\000\000\ +\\039\000\159\000\040\000\109\000\041\000\089\000\042\000\108\000\ +\\043\000\019\000\044\000\018\000\045\000\017\000\047\000\016\000\ +\\052\000\015\000\000\000\ +\\039\000\160\000\040\000\109\000\041\000\089\000\042\000\108\000\ +\\043\000\019\000\044\000\018\000\045\000\017\000\047\000\016\000\ +\\052\000\015\000\000\000\ +\\018\000\163\000\022\000\162\000\041\000\089\000\042\000\161\000\ +\\043\000\019\000\044\000\018\000\045\000\017\000\047\000\016\000\ +\\052\000\015\000\000\000\ +\\039\000\164\000\040\000\109\000\041\000\089\000\042\000\108\000\ +\\043\000\019\000\044\000\018\000\045\000\017\000\047\000\016\000\ +\\052\000\015\000\000\000\ +\\000\000\ +\\050\000\165\000\051\000\085\000\052\000\084\000\000\000\ +\\000\000\ +\\000\000\ +\\051\000\166\000\052\000\084\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\015\000\169\000\017\000\093\000\020\000\092\000\021\000\091\000\ +\\022\000\090\000\023\000\022\000\033\000\021\000\041\000\089\000\ +\\042\000\088\000\043\000\019\000\044\000\018\000\045\000\017\000\ +\\047\000\016\000\052\000\015\000\000\000\ +\\000\000\ +\\016\000\170\000\017\000\099\000\020\000\092\000\021\000\098\000\ +\\022\000\097\000\023\000\022\000\033\000\021\000\000\000\ +\\015\000\172\000\017\000\093\000\020\000\092\000\021\000\091\000\ +\\022\000\090\000\023\000\022\000\033\000\021\000\041\000\089\000\ +\\042\000\088\000\043\000\019\000\044\000\018\000\045\000\017\000\ +\\047\000\016\000\052\000\015\000\000\000\ +\\000\000\ +\\015\000\173\000\017\000\093\000\020\000\092\000\021\000\091\000\ +\\022\000\090\000\023\000\022\000\033\000\021\000\041\000\089\000\ +\\042\000\088\000\043\000\019\000\044\000\018\000\045\000\017\000\ +\\047\000\016\000\052\000\015\000\000\000\ +\\000\000\ +\\050\000\174\000\051\000\085\000\052\000\084\000\000\000\ +\\000\000\ +\\016\000\175\000\017\000\099\000\020\000\092\000\021\000\098\000\ +\\022\000\097\000\023\000\022\000\033\000\021\000\000\000\ +\\015\000\177\000\017\000\093\000\020\000\092\000\021\000\091\000\ +\\022\000\090\000\023\000\022\000\033\000\021\000\041\000\089\000\ +\\042\000\088\000\043\000\019\000\044\000\018\000\045\000\017\000\ +\\047\000\016\000\052\000\015\000\000\000\ +\\015\000\178\000\017\000\093\000\020\000\092\000\021\000\091\000\ +\\022\000\090\000\023\000\022\000\033\000\021\000\041\000\089\000\ +\\042\000\088\000\043\000\019\000\044\000\018\000\045\000\017\000\ +\\047\000\016\000\052\000\015\000\000\000\ +\\000\000\ +\\015\000\179\000\017\000\093\000\020\000\092\000\021\000\091\000\ +\\022\000\090\000\023\000\022\000\033\000\021\000\041\000\089\000\ +\\042\000\088\000\043\000\019\000\044\000\018\000\045\000\017\000\ +\\047\000\016\000\052\000\015\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\040\000\180\000\041\000\089\000\042\000\108\000\043\000\019\000\ +\\044\000\018\000\045\000\017\000\047\000\016\000\052\000\015\000\000\000\ +\\000\000\ +\\000\000\ +\\024\000\182\000\025\000\111\000\000\000\ +\\000\000\ +\\020\000\092\000\021\000\187\000\022\000\186\000\023\000\022\000\ +\\028\000\185\000\033\000\021\000\041\000\089\000\042\000\184\000\ +\\043\000\019\000\044\000\018\000\045\000\017\000\047\000\016\000\ +\\052\000\015\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\035\000\192\000\036\000\191\000\000\000\ +\\039\000\194\000\040\000\109\000\041\000\089\000\042\000\108\000\ +\\043\000\019\000\044\000\018\000\045\000\017\000\047\000\016\000\ +\\052\000\015\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\050\000\201\000\051\000\085\000\052\000\084\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\046\000\207\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\026\000\220\000\027\000\146\000\000\000\ +\\000\000\ +\\039\000\222\000\040\000\109\000\041\000\089\000\042\000\108\000\ +\\043\000\019\000\044\000\018\000\045\000\017\000\047\000\016\000\ +\\052\000\015\000\000\000\ +\\020\000\092\000\021\000\187\000\022\000\186\000\023\000\022\000\ +\\028\000\223\000\033\000\021\000\041\000\089\000\042\000\184\000\ +\\043\000\019\000\044\000\018\000\045\000\017\000\047\000\016\000\ +\\052\000\015\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\018\000\234\000\022\000\162\000\041\000\089\000\042\000\161\000\ +\\043\000\019\000\044\000\018\000\045\000\017\000\047\000\016\000\ +\\052\000\015\000\000\000\ +\\019\000\236\000\022\000\235\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\015\000\241\000\017\000\093\000\020\000\092\000\021\000\091\000\ +\\022\000\090\000\023\000\022\000\033\000\021\000\041\000\089\000\ +\\042\000\088\000\043\000\019\000\044\000\018\000\045\000\017\000\ +\\047\000\016\000\052\000\015\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\015\000\243\000\017\000\093\000\020\000\092\000\021\000\091\000\ +\\022\000\090\000\023\000\022\000\033\000\021\000\041\000\089\000\ +\\042\000\088\000\043\000\019\000\044\000\018\000\045\000\017\000\ +\\047\000\016\000\052\000\015\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\020\000\092\000\021\000\187\000\022\000\186\000\023\000\022\000\ +\\028\000\244\000\033\000\021\000\041\000\089\000\042\000\184\000\ +\\043\000\019\000\044\000\018\000\045\000\017\000\047\000\016\000\ +\\052\000\015\000\000\000\ +\\030\000\245\000\000\000\ +\\020\000\092\000\021\000\249\000\022\000\248\000\023\000\022\000\ +\\029\000\247\000\033\000\021\000\000\000\ +\\020\000\092\000\021\000\187\000\022\000\186\000\023\000\022\000\ +\\028\000\251\000\033\000\021\000\041\000\089\000\042\000\184\000\ +\\043\000\019\000\044\000\018\000\045\000\017\000\047\000\016\000\ +\\052\000\015\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\035\000\252\000\036\000\191\000\000\000\ +\\000\000\ +\\037\000\255\000\038\000\254\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\049\000\003\001\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\031\000\005\001\000\000\ +\\032\000\007\001\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\019\000\009\001\022\000\235\000\000\000\ +\\000\000\ +\\018\000\011\001\022\000\162\000\041\000\089\000\042\000\161\000\ +\\043\000\019\000\044\000\018\000\045\000\017\000\047\000\016\000\ +\\052\000\015\000\000\000\ +\\000\000\ +\\000\000\ +\\008\000\012\001\009\000\006\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\020\000\092\000\021\000\249\000\022\000\248\000\023\000\022\000\ +\\029\000\013\001\033\000\021\000\000\000\ +\\020\000\092\000\021\000\187\000\022\000\186\000\023\000\022\000\ +\\028\000\015\001\033\000\021\000\041\000\089\000\042\000\184\000\ +\\043\000\019\000\044\000\018\000\045\000\017\000\047\000\016\000\ +\\052\000\015\000\000\000\ +\\020\000\092\000\021\000\187\000\022\000\186\000\023\000\022\000\ +\\028\000\016\001\033\000\021\000\041\000\089\000\042\000\184\000\ +\\043\000\019\000\044\000\018\000\045\000\017\000\047\000\016\000\ +\\052\000\015\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\037\000\017\001\038\000\254\000\000\000\ +\\000\000\ +\\013\000\019\001\015\000\141\000\017\000\093\000\020\000\092\000\ +\\021\000\091\000\022\000\090\000\023\000\022\000\033\000\021\000\ +\\041\000\089\000\042\000\088\000\043\000\019\000\044\000\018\000\ +\\045\000\017\000\047\000\016\000\052\000\015\000\000\000\ +\\013\000\020\001\015\000\141\000\017\000\093\000\020\000\092\000\ +\\021\000\091\000\022\000\090\000\023\000\022\000\033\000\021\000\ +\\041\000\089\000\042\000\088\000\043\000\019\000\044\000\018\000\ +\\045\000\017\000\047\000\016\000\052\000\015\000\000\000\ +\\000\000\ +\\048\000\021\001\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\018\000\022\001\022\000\162\000\041\000\089\000\042\000\161\000\ +\\043\000\019\000\044\000\018\000\045\000\017\000\047\000\016\000\ +\\052\000\015\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\020\000\092\000\021\000\187\000\022\000\186\000\023\000\022\000\ +\\028\000\023\001\033\000\021\000\041\000\089\000\042\000\184\000\ +\\043\000\019\000\044\000\018\000\045\000\017\000\047\000\016\000\ +\\052\000\015\000\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\\000\000\ +\" +val numstates = 283 +val numrules = 155 +val s = ref "" and index = ref 0 +val string_to_int = fn () => +let val i = !index +in index := i+2; Char.ord(String.sub(!s,i)) + Char.ord(String.sub(!s,i+1)) * 256 +end +val string_to_list = fn s' => + let val len = String.size s' + fun f () = + if !index < len then string_to_int() :: f() + else nil + in index := 0; s := s'; f () + end +val string_to_pairlist = fn (conv_key,conv_entry) => + let fun f () = + case string_to_int() + of 0 => EMPTY + | n => PAIR(conv_key (n-1),conv_entry (string_to_int()),f()) + in f + end +val string_to_pairlist_default = fn (conv_key,conv_entry) => + let val conv_row = string_to_pairlist(conv_key,conv_entry) + in fn () => + let val default = conv_entry(string_to_int()) + val row = conv_row() + in (row,default) + end + end +val string_to_table = fn (convert_row,s') => + let val len = String.size s' + fun f ()= + if !index < len then convert_row() :: f() + else nil + in (s := s'; index := 0; f ()) + end +local + val memo = Array.array(numstates+numrules,ERROR) + val _ =let fun g i=(Array.update(memo,i,REDUCE(i-numstates)); g(i+1)) + fun f i = + if i=numstates then g i + else (Array.update(memo,i,SHIFT (STATE i)); f (i+1)) + in f 0 handle General.Subscript => () + end +in +val entry_to_action = fn 0 => ACCEPT | 1 => ERROR | j => Array.sub(memo,(j-2)) +end +val gotoT=Array.fromList(string_to_table(string_to_pairlist(NT,STATE),gotoT)) +val actionRows=string_to_table(string_to_pairlist_default(T,entry_to_action),actionRows) +val actionRowNumbers = string_to_list actionRowNumbers +val actionT = let val actionRowLookUp= +let val a=Array.fromList(actionRows) in fn i=>Array.sub(a,i) end +in Array.fromList(List.map actionRowLookUp actionRowNumbers) +end +in LrTable.mkLrTable {actions=actionT,gotos=gotoT,numRules=numrules, +numStates=numstates,initialState=STATE 0} +end +end +local open Header in +type pos = int +type arg = int -> HTMLAttrs.context +structure MlyValue = +struct +datatype svalue = VOID | ntVOID of unit | ENTITY_REF of (string) + | CHAR_REF of (string) | PCDATA of (string) + | START_UL of (HTMLAttrVals.attrs) + | START_TR of (HTMLAttrVals.attrs) + | START_TH of (HTMLAttrVals.attrs) + | START_TEXTAREA of (HTMLAttrVals.attrs) + | START_TD of (HTMLAttrVals.attrs) + | START_TABLE of (HTMLAttrVals.attrs) + | START_SELECT of (HTMLAttrVals.attrs) + | START_PRE of (HTMLAttrVals.attrs) + | TAG_PARAM of (HTMLAttrVals.attrs) + | START_P of (HTMLAttrVals.attrs) + | START_OPTION of (HTMLAttrVals.attrs) + | START_OL of (HTMLAttrVals.attrs) + | TAG_META of (HTMLAttrVals.attrs) + | START_MENU of (HTMLAttrVals.attrs) + | START_MAP of (HTMLAttrVals.attrs) + | TAG_LINK of (HTMLAttrVals.attrs) + | START_LI of (HTMLAttrVals.attrs) + | TAG_ISINDEX of (HTMLAttrVals.attrs) + | TAG_INPUT of (HTMLAttrVals.attrs) + | TAG_IMG of (HTMLAttrVals.attrs) | TAG_HR of (HTMLAttrVals.attrs) + | START_H6 of (HTMLAttrVals.attrs) + | START_H5 of (HTMLAttrVals.attrs) + | START_H4 of (HTMLAttrVals.attrs) + | START_H3 of (HTMLAttrVals.attrs) + | START_H2 of (HTMLAttrVals.attrs) + | START_H1 of (HTMLAttrVals.attrs) + | START_FORM of (HTMLAttrVals.attrs) + | START_BASEFONT of (HTMLAttrVals.attrs) + | START_FONT of (HTMLAttrVals.attrs) + | START_DL of (HTMLAttrVals.attrs) + | START_DIV of (HTMLAttrVals.attrs) + | START_DIR of (HTMLAttrVals.attrs) + | START_CAPTION of (HTMLAttrVals.attrs) + | TAG_BR of (HTMLAttrVals.attrs) + | START_BODY of (HTMLAttrVals.attrs) + | TAG_BASE of (HTMLAttrVals.attrs) + | TAG_AREA of (HTMLAttrVals.attrs) + | START_APPLET of (HTMLAttrVals.attrs) + | START_A of (HTMLAttrVals.attrs) | PCDataElem of (HTML.pcdata) + | PCDataList of (HTML.pcdata list) | PCData of (HTML.pcdata) + | OptionList of (HTML.select_option list) | Form of (HTML.text) + | AreaList of (HTML.area list) | Special of (HTML.text) + | Phrase of (HTML.text) | Font of (HTML.text) + | Text of (HTML.text) | TextWOScript of (HTML.text) + | TextList' of (HTML.text list) | TextList of (HTML.text) + | TableCell of (HTML.table_cell) + | TableCellList of (HTML.table_cell list) | TableRow of (HTML.tr) + | TableRowList of (HTML.tr list) + | optCaption of (HTML.caption option) + | Preformatted of (HTML.block) | Flow2 of (blklist_item list) + | Flow1 of (blklist_item list) | DLItem of (deflist_item) + | DLItemList of (deflist_item list) | ListItem of (HTML.list_item) + | ListItemList of (HTML.list_item list) | List of (HTML.block) + | Paragraph of (HTML.block) | Block of (HTML.block) + | BlockWOIndex of (HTML.block) + | AddressContent2 of (blklist_item list) + | AddressContent1 of (blklist_item list) + | BodyElement of (HTML.block) | BodyContent2 of (blklist_item list) + | BodyContent1 of (blklist_item list) | BodyContent0 of (HTML.body) + | BodyContent of (HTML.block) | Body of (HTML.body) + | HeadElement of (HTML.head_content) + | HeadElements of (HTML.head_content list) + | HeadContents of (HTML.head_content list) + | Head of (HTML.head_content list) + | StartHTML of (HTML.cdata option) | Document of (HTML.html) +end +type svalue = MlyValue.svalue +type result = HTML.html +end +structure EC= +struct +open LrTable +infix 5 $$ +fun x $$ y = y::x +val is_keyword = +fn _ => false +val preferred_change : (term list * term list) list = +nil +val noShift = +fn (T 0) => true | _ => false +val showTerminal = +fn (T 0) => "EOF" + | (T 1) => "START_A" + | (T 2) => "END_A" + | (T 3) => "START_ADDRESS" + | (T 4) => "END_ADDRESS" + | (T 5) => "START_APPLET" + | (T 6) => "END_APPLET" + | (T 7) => "TAG_AREA" + | (T 8) => "START_B" + | (T 9) => "END_B" + | (T 10) => "TAG_BASE" + | (T 11) => "START_BIG" + | (T 12) => "END_BIG" + | (T 13) => "START_BLOCKQUOTE" + | (T 14) => "END_BLOCKQUOTE" + | (T 15) => "START_BODY" + | (T 16) => "END_BODY" + | (T 17) => "TAG_BR" + | (T 18) => "START_CAPTION" + | (T 19) => "END_CAPTION" + | (T 20) => "START_CENTER" + | (T 21) => "END_CENTER" + | (T 22) => "START_CITE" + | (T 23) => "END_CITE" + | (T 24) => "START_CODE" + | (T 25) => "END_CODE" + | (T 26) => "START_DD" + | (T 27) => "END_DD" + | (T 28) => "START_DFN" + | (T 29) => "END_DFN" + | (T 30) => "START_DIR" + | (T 31) => "END_DIR" + | (T 32) => "START_DIV" + | (T 33) => "END_DIV" + | (T 34) => "START_DL" + | (T 35) => "END_DL" + | (T 36) => "START_DT" + | (T 37) => "END_DT" + | (T 38) => "START_EM" + | (T 39) => "END_EM" + | (T 40) => "START_FONT" + | (T 41) => "END_FONT" + | (T 42) => "START_BASEFONT" + | (T 43) => "END_BASEFONT" + | (T 44) => "START_FORM" + | (T 45) => "END_FORM" + | (T 46) => "START_H1" + | (T 47) => "END_H1" + | (T 48) => "START_H2" + | (T 49) => "END_H2" + | (T 50) => "START_H3" + | (T 51) => "END_H3" + | (T 52) => "START_H4" + | (T 53) => "END_H4" + | (T 54) => "START_H5" + | (T 55) => "END_H5" + | (T 56) => "START_H6" + | (T 57) => "END_H6" + | (T 58) => "START_HEAD" + | (T 59) => "END_HEAD" + | (T 60) => "TAG_HR" + | (T 61) => "START_HTML" + | (T 62) => "END_HTML" + | (T 63) => "START_I" + | (T 64) => "END_I" + | (T 65) => "TAG_IMG" + | (T 66) => "TAG_INPUT" + | (T 67) => "TAG_ISINDEX" + | (T 68) => "START_KBD" + | (T 69) => "END_KBD" + | (T 70) => "START_LI" + | (T 71) => "END_LI" + | (T 72) => "TAG_LINK" + | (T 73) => "START_MAP" + | (T 74) => "END_MAP" + | (T 75) => "START_MENU" + | (T 76) => "END_MENU" + | (T 77) => "TAG_META" + | (T 78) => "START_OL" + | (T 79) => "END_OL" + | (T 80) => "START_OPTION" + | (T 81) => "END_OPTION" + | (T 82) => "START_P" + | (T 83) => "END_P" + | (T 84) => "TAG_PARAM" + | (T 85) => "START_PRE" + | (T 86) => "END_PRE" + | (T 87) => "START_SAMP" + | (T 88) => "END_SAMP" + | (T 89) => "START_SCRIPT" + | (T 90) => "END_SCRIPT" + | (T 91) => "START_SELECT" + | (T 92) => "END_SELECT" + | (T 93) => "START_SMALL" + | (T 94) => "END_SMALL" + | (T 95) => "START_STRIKE" + | (T 96) => "END_STRIKE" + | (T 97) => "START_STRONG" + | (T 98) => "END_STRONG" + | (T 99) => "START_STYLE" + | (T 100) => "END_STYLE" + | (T 101) => "START_SUB" + | (T 102) => "END_SUB" + | (T 103) => "START_SUP" + | (T 104) => "END_SUP" + | (T 105) => "START_TABLE" + | (T 106) => "END_TABLE" + | (T 107) => "START_TD" + | (T 108) => "END_TD" + | (T 109) => "START_TEXTAREA" + | (T 110) => "END_TEXTAREA" + | (T 111) => "START_TH" + | (T 112) => "END_TH" + | (T 113) => "START_TITLE" + | (T 114) => "END_TITLE" + | (T 115) => "START_TR" + | (T 116) => "END_TR" + | (T 117) => "START_TT" + | (T 118) => "END_TT" + | (T 119) => "START_U" + | (T 120) => "END_U" + | (T 121) => "START_UL" + | (T 122) => "END_UL" + | (T 123) => "START_VAR" + | (T 124) => "END_VAR" + | (T 125) => "PCDATA" + | (T 126) => "CHAR_REF" + | (T 127) => "ENTITY_REF" + | _ => "bogus-term" +local open Header in +val errtermvalue= +fn (T 1) => MlyValue.START_A(([])) | +(T 5) => MlyValue.START_APPLET(( +[ + ("CODE", HTMLAttrs.NAME ""), + ("WIDTH", HTMLAttrs.NAME ""), + ("HEIGHT", HTMLAttrs.NAME "") + ] +)) | +(T 7) => MlyValue.TAG_AREA(([("ALT", HTMLAttrs.NAME "")])) | +(T 10) => MlyValue.TAG_BASE(([("URL", HTMLAttrs.NAME "")])) | +(T 15) => MlyValue.START_BODY(([])) | +(T 17) => MlyValue.TAG_BR(([])) | +(T 18) => MlyValue.START_CAPTION(([])) | +(T 30) => MlyValue.START_DIR(([])) | +(T 32) => MlyValue.START_DIV(([])) | +(T 34) => MlyValue.START_DL(([])) | +(T 40) => MlyValue.START_FONT(([])) | +(T 42) => MlyValue.START_BASEFONT(([])) | +(T 44) => MlyValue.START_FORM(([])) | +(T 46) => MlyValue.START_H1(([])) | +(T 48) => MlyValue.START_H2(([])) | +(T 50) => MlyValue.START_H3(([])) | +(T 52) => MlyValue.START_H4(([])) | +(T 54) => MlyValue.START_H5(([])) | +(T 56) => MlyValue.START_H6(([])) | +(T 60) => MlyValue.TAG_HR(([])) | +(T 65) => MlyValue.TAG_IMG(([("SRC", HTMLAttrs.NAME "")])) | +(T 66) => MlyValue.TAG_INPUT(([])) | +(T 67) => MlyValue.TAG_ISINDEX(([])) | +(T 72) => MlyValue.TAG_LINK(([])) | +(T 73) => MlyValue.START_MAP(([])) | +(T 75) => MlyValue.START_MENU(([])) | +(T 77) => MlyValue.TAG_META(([("CONTENT", HTMLAttrs.NAME "")])) | +(T 78) => MlyValue.START_OL(([])) | +(T 80) => MlyValue.START_OPTION(([])) | +(T 82) => MlyValue.START_P(([])) | +(T 84) => MlyValue.TAG_PARAM(([("NAME", HTMLAttrs.NAME "")])) | +(T 85) => MlyValue.START_PRE(([])) | +(T 91) => MlyValue.START_SELECT(([("NAME", HTMLAttrs.NAME "")])) | +(T 105) => MlyValue.START_TABLE(([])) | +(T 107) => MlyValue.START_TD(([])) | +(T 109) => MlyValue.START_TEXTAREA(( +[ + ("NAME", HTMLAttrs.NAME ""), + ("ROWS", HTMLAttrs.NAME "0"), + ("COLS", HTMLAttrs.NAME "0") + ] +)) | +(T 111) => MlyValue.START_TH(([])) | +(T 115) => MlyValue.START_TR(([])) | +(T 121) => MlyValue.START_UL(([])) | +_ => MlyValue.VOID +end +val terms : term list = nil + $$ (T 124) $$ (T 123) $$ (T 122) $$ (T 120) $$ (T 119) $$ (T 118) $$ +(T 117) $$ (T 116) $$ (T 114) $$ (T 113) $$ (T 112) $$ (T 110) $$ (T +108) $$ (T 106) $$ (T 104) $$ (T 103) $$ (T 102) $$ (T 101) $$ (T 100) + $$ (T 99) $$ (T 98) $$ (T 97) $$ (T 96) $$ (T 95) $$ (T 94) $$ (T 93) + $$ (T 92) $$ (T 90) $$ (T 89) $$ (T 88) $$ (T 87) $$ (T 86) $$ (T 83) + $$ (T 81) $$ (T 79) $$ (T 76) $$ (T 74) $$ (T 71) $$ (T 69) $$ (T 68) + $$ (T 64) $$ (T 63) $$ (T 62) $$ (T 61) $$ (T 59) $$ (T 58) $$ (T 57) + $$ (T 55) $$ (T 53) $$ (T 51) $$ (T 49) $$ (T 47) $$ (T 45) $$ (T 43) + $$ (T 41) $$ (T 39) $$ (T 38) $$ (T 37) $$ (T 36) $$ (T 35) $$ (T 33) + $$ (T 31) $$ (T 29) $$ (T 28) $$ (T 27) $$ (T 26) $$ (T 25) $$ (T 24) + $$ (T 23) $$ (T 22) $$ (T 21) $$ (T 20) $$ (T 19) $$ (T 16) $$ (T 14) + $$ (T 13) $$ (T 12) $$ (T 11) $$ (T 9) $$ (T 8) $$ (T 6) $$ (T 4) $$ +(T 3) $$ (T 2) $$ (T 0)end +structure Actions = +struct +exception mlyAction of int +local open Header in +val actions = +fn (i392,defaultPos,stack, + (ctx):arg) => +case (i392,stack) +of ( 0, ( ( _, ( _, _, EndHTML1right)) :: ( _, ( MlyValue.Body Body, + _, _)) :: ( _, ( MlyValue.Head Head, _, _)) :: ( _, ( +MlyValue.StartHTML StartHTML, StartHTML1left, _)) :: rest671)) => let + val result = MlyValue.Document ( +HTML.HTML{version=StartHTML, head=Head, body=Body}) + in ( LrTable.NT 0, ( result, StartHTML1left, EndHTML1right), rest671) + +end +| ( 1, ( rest671)) => let val result = MlyValue.StartHTML (NONE) + in ( LrTable.NT 1, ( result, defaultPos, defaultPos), rest671) +end +| ( 2, ( ( _, ( _, START_HTML1left, START_HTML1right)) :: rest671)) + => let val result = MlyValue.StartHTML (NONE) + in ( LrTable.NT 1, ( result, START_HTML1left, START_HTML1right), +rest671) +end +| ( 3, ( rest671)) => let val result = MlyValue.ntVOID () + in ( LrTable.NT 2, ( result, defaultPos, defaultPos), rest671) +end +| ( 4, ( ( _, ( _, END_HTML1left, END_HTML1right)) :: rest671)) => + let val result = MlyValue.ntVOID () + in ( LrTable.NT 2, ( result, END_HTML1left, END_HTML1right), rest671) + +end +| ( 5, ( ( _, ( _, _, EndHEAD1right)) :: ( _, ( MlyValue.HeadContents + HeadContents, _, _)) :: ( _, ( _, StartHEAD1left, _)) :: rest671)) => + let val result = MlyValue.Head (HeadContents) + in ( LrTable.NT 3, ( result, StartHEAD1left, EndHEAD1right), rest671) + +end +| ( 6, ( rest671)) => let val result = MlyValue.ntVOID () + in ( LrTable.NT 4, ( result, defaultPos, defaultPos), rest671) +end +| ( 7, ( ( _, ( _, START_HEAD1left, START_HEAD1right)) :: rest671)) + => let val result = MlyValue.ntVOID () + in ( LrTable.NT 4, ( result, START_HEAD1left, START_HEAD1right), +rest671) +end +| ( 8, ( rest671)) => let val result = MlyValue.ntVOID () + in ( LrTable.NT 5, ( result, defaultPos, defaultPos), rest671) +end +| ( 9, ( ( _, ( _, END_HEAD1left, END_HEAD1right)) :: rest671)) => + let val result = MlyValue.ntVOID () + in ( LrTable.NT 5, ( result, END_HEAD1left, END_HEAD1right), rest671) + +end +| ( 10, ( ( _, ( MlyValue.HeadElements HeadElements2, _, +HeadElements2right)) :: _ :: ( _, ( MlyValue.PCData PCData, _, _)) :: + _ :: ( _, ( MlyValue.HeadElements HeadElements1, HeadElements1left, _ +)) :: rest671)) => let val result = MlyValue.HeadContents ( +HeadElements1 @ (HTML.Head_TITLE PCData :: HeadElements2)) + in ( LrTable.NT 6, ( result, HeadElements1left, HeadElements2right), +rest671) +end +| ( 11, ( rest671)) => let val result = MlyValue.HeadElements ([]) + in ( LrTable.NT 7, ( result, defaultPos, defaultPos), rest671) +end +| ( 12, ( ( _, ( MlyValue.HeadElements HeadElements, _, +HeadElements1right)) :: ( _, ( MlyValue.HeadElement HeadElement, +HeadElement1left, _)) :: rest671)) => let val result = +MlyValue.HeadElements (HeadElement :: HeadElements) + in ( LrTable.NT 7, ( result, HeadElement1left, HeadElements1right), +rest671) +end +| ( 13, ( ( _, ( MlyValue.TAG_META TAG_META, (TAG_METAleft as +TAG_META1left), TAG_META1right)) :: rest671)) => let val result = +MlyValue.HeadElement (HTMLAttrs.mkMETA(ctx TAG_METAleft, TAG_META)) + in ( LrTable.NT 8, ( result, TAG_META1left, TAG_META1right), rest671) + +end +| ( 14, ( ( _, ( MlyValue.TAG_LINK TAG_LINK, (TAG_LINKleft as +TAG_LINK1left), TAG_LINK1right)) :: rest671)) => let val result = +MlyValue.HeadElement (HTMLAttrs.mkLINK(ctx TAG_LINKleft, TAG_LINK)) + in ( LrTable.NT 8, ( result, TAG_LINK1left, TAG_LINK1right), rest671) + +end +| ( 15, ( ( _, ( MlyValue.TAG_ISINDEX TAG_ISINDEX, (TAG_ISINDEXleft + as TAG_ISINDEX1left), TAG_ISINDEX1right)) :: rest671)) => let val +result = MlyValue.HeadElement ( +let val stuff = + HTMLAttrs.mkISINDEX (ctx TAG_ISINDEXleft, TAG_ISINDEX) + in HTML.Head_ISINDEX stuff end + +) + in ( LrTable.NT 8, ( result, TAG_ISINDEX1left, TAG_ISINDEX1right), +rest671) +end +| ( 16, ( ( _, ( MlyValue.TAG_BASE TAG_BASE, (TAG_BASEleft as +TAG_BASE1left), TAG_BASE1right)) :: rest671)) => let val result = +MlyValue.HeadElement (HTMLAttrs.mkBASE(ctx TAG_BASEleft, TAG_BASE)) + in ( LrTable.NT 8, ( result, TAG_BASE1left, TAG_BASE1right), rest671) + +end +| ( 17, ( ( _, ( _, _, END_STYLE1right)) :: ( _, ( MlyValue.PCData +PCData, _, _)) :: ( _, ( _, START_STYLE1left, _)) :: rest671)) => let + val result = MlyValue.HeadElement (HTML.Head_STYLE(PCData)) + in ( LrTable.NT 8, ( result, START_STYLE1left, END_STYLE1right), +rest671) +end +| ( 18, ( ( _, ( _, _, END_SCRIPT1right)) :: ( _, ( MlyValue.PCData +PCData, _, _)) :: ( _, ( _, START_SCRIPT1left, _)) :: rest671)) => let + val result = MlyValue.HeadElement (HTML.Head_SCRIPT(PCData)) + in ( LrTable.NT 8, ( result, START_SCRIPT1left, END_SCRIPT1right), +rest671) +end +| ( 19, ( ( _, ( _, _, EndBODY1right)) :: ( _, ( +MlyValue.BodyContent0 BodyContent0, BodyContent01left, _)) :: rest671) +) => let val result = MlyValue.Body (BodyContent0) + in ( LrTable.NT 9, ( result, BodyContent01left, EndBODY1right), +rest671) +end +| ( 20, ( rest671)) => let val result = MlyValue.ntVOID () + in ( LrTable.NT 11, ( result, defaultPos, defaultPos), rest671) +end +| ( 21, ( ( _, ( _, END_BODY1left, END_BODY1right)) :: rest671)) => + let val result = MlyValue.ntVOID () + in ( LrTable.NT 11, ( result, END_BODY1left, END_BODY1right), rest671 +) +end +| ( 22, ( ( _, ( MlyValue.BodyContent1 BodyContent1, +BodyContent11left, BodyContent11right)) :: rest671)) => let val +result = MlyValue.BodyContent (mkBlock BodyContent1) + in ( LrTable.NT 12, ( result, BodyContent11left, BodyContent11right), + rest671) +end +| ( 23, ( ( _, ( MlyValue.BodyContent BodyContent, _, +BodyContent1right)) :: ( _, ( MlyValue.START_BODY START_BODY, ( +START_BODYleft as START_BODY1left), _)) :: rest671)) => let val +result = MlyValue.BodyContent0 ( +HTMLAttrs.mkBODY(ctx START_BODYleft, START_BODY, BodyContent)) + in ( LrTable.NT 13, ( result, START_BODY1left, BodyContent1right), +rest671) +end +| ( 24, ( ( _, ( MlyValue.BodyContent1 BodyContent1, _, +BodyContent11right)) :: ( _, ( MlyValue.TextWOScript TextWOScript, +TextWOScript1left, _)) :: rest671)) => let val result = +MlyValue.BodyContent0 (mkBody(consText(TextWOScript, BodyContent1))) + in ( LrTable.NT 13, ( result, TextWOScript1left, BodyContent11right), + rest671) +end +| ( 25, ( ( _, ( MlyValue.BodyContent1 BodyContent1, _, +BodyContent11right)) :: ( _, ( MlyValue.BodyElement BodyElement, +BodyElement1left, _)) :: rest671)) => let val result = +MlyValue.BodyContent0 (mkBody(consBlock(BodyElement, BodyContent1))) + in ( LrTable.NT 13, ( result, BodyElement1left, BodyContent11right), +rest671) +end +| ( 26, ( ( _, ( MlyValue.BodyContent1 BodyContent1, _, +BodyContent11right)) :: ( _, ( MlyValue.BlockWOIndex BlockWOIndex, +BlockWOIndex1left, _)) :: rest671)) => let val result = +MlyValue.BodyContent0 (mkBody(consBlock(BlockWOIndex, BodyContent1))) + in ( LrTable.NT 13, ( result, BlockWOIndex1left, BodyContent11right), + rest671) +end +| ( 27, ( ( _, ( MlyValue.BodyContent1 BodyContent1, _, +BodyContent11right)) :: _ :: ( _, ( MlyValue.Paragraph Paragraph, +Paragraph1left, _)) :: rest671)) => let val result = +MlyValue.BodyContent0 (mkBody(consBlock(Paragraph, BodyContent1))) + in ( LrTable.NT 13, ( result, Paragraph1left, BodyContent11right), +rest671) +end +| ( 28, ( ( _, ( MlyValue.BodyContent2 BodyContent2, _, +BodyContent21right)) :: ( _, ( MlyValue.Paragraph Paragraph, +Paragraph1left, _)) :: rest671)) => let val result = +MlyValue.BodyContent0 (mkBody(consBlock(Paragraph, BodyContent2))) + in ( LrTable.NT 13, ( result, Paragraph1left, BodyContent21right), +rest671) +end +| ( 29, ( rest671)) => let val result = MlyValue.BodyContent1 ([]) + in ( LrTable.NT 14, ( result, defaultPos, defaultPos), rest671) +end +| ( 30, ( ( _, ( MlyValue.BodyContent1 BodyContent1, _, +BodyContent11right)) :: ( _, ( MlyValue.Text Text, Text1left, _)) :: +rest671)) => let val result = MlyValue.BodyContent1 ( +consText(Text, BodyContent1)) + in ( LrTable.NT 14, ( result, Text1left, BodyContent11right), rest671 +) +end +| ( 31, ( ( _, ( MlyValue.BodyContent1 BodyContent1, _, +BodyContent11right)) :: ( _, ( MlyValue.BodyElement BodyElement, +BodyElement1left, _)) :: rest671)) => let val result = +MlyValue.BodyContent1 (consBlock(BodyElement, BodyContent1)) + in ( LrTable.NT 14, ( result, BodyElement1left, BodyContent11right), +rest671) +end +| ( 32, ( ( _, ( MlyValue.BodyContent1 BodyContent1, _, +BodyContent11right)) :: ( _, ( MlyValue.Block Block, Block1left, _)) + :: rest671)) => let val result = MlyValue.BodyContent1 ( +consBlock(Block, BodyContent1)) + in ( LrTable.NT 14, ( result, Block1left, BodyContent11right), +rest671) +end +| ( 33, ( ( _, ( MlyValue.BodyContent1 BodyContent1, _, +BodyContent11right)) :: _ :: ( _, ( MlyValue.Paragraph Paragraph, +Paragraph1left, _)) :: rest671)) => let val result = +MlyValue.BodyContent1 (consBlock(Paragraph, BodyContent1)) + in ( LrTable.NT 14, ( result, Paragraph1left, BodyContent11right), +rest671) +end +| ( 34, ( ( _, ( MlyValue.BodyContent2 BodyContent2, _, +BodyContent21right)) :: ( _, ( MlyValue.Paragraph Paragraph, +Paragraph1left, _)) :: rest671)) => let val result = +MlyValue.BodyContent1 (consBlock(Paragraph, BodyContent2)) + in ( LrTable.NT 14, ( result, Paragraph1left, BodyContent21right), +rest671) +end +| ( 35, ( rest671)) => let val result = MlyValue.BodyContent2 ([]) + in ( LrTable.NT 15, ( result, defaultPos, defaultPos), rest671) +end +| ( 36, ( ( _, ( MlyValue.BodyContent1 BodyContent1, _, +BodyContent11right)) :: ( _, ( MlyValue.BodyElement BodyElement, +BodyElement1left, _)) :: rest671)) => let val result = +MlyValue.BodyContent2 (consBlock(BodyElement, BodyContent1)) + in ( LrTable.NT 15, ( result, BodyElement1left, BodyContent11right), +rest671) +end +| ( 37, ( ( _, ( MlyValue.BodyContent1 BodyContent1, _, +BodyContent11right)) :: ( _, ( MlyValue.Block Block, Block1left, _)) + :: rest671)) => let val result = MlyValue.BodyContent2 ( +consBlock(Block, BodyContent1)) + in ( LrTable.NT 15, ( result, Block1left, BodyContent11right), +rest671) +end +| ( 38, ( ( _, ( MlyValue.BodyContent1 BodyContent1, _, +BodyContent11right)) :: _ :: ( _, ( MlyValue.Paragraph Paragraph, +Paragraph1left, _)) :: rest671)) => let val result = +MlyValue.BodyContent2 (consBlock(Paragraph, BodyContent1)) + in ( LrTable.NT 15, ( result, Paragraph1left, BodyContent11right), +rest671) +end +| ( 39, ( ( _, ( MlyValue.BodyContent2 BodyContent2, _, +BodyContent21right)) :: ( _, ( MlyValue.Paragraph Paragraph, +Paragraph1left, _)) :: rest671)) => let val result = +MlyValue.BodyContent2 (consBlock(Paragraph, BodyContent2)) + in ( LrTable.NT 15, ( result, Paragraph1left, BodyContent21right), +rest671) +end +| ( 40, ( ( _, ( _, _, END_H11right)) :: ( _, ( MlyValue.TextList +TextList, _, _)) :: ( _, ( MlyValue.START_H1 START_H1, (START_H1left + as START_H11left), _)) :: rest671)) => let val result = +MlyValue.BodyElement ( +HTMLAttrs.mkHn(1, ctx START_H1left, START_H1, TextList)) + in ( LrTable.NT 16, ( result, START_H11left, END_H11right), rest671) + +end +| ( 41, ( ( _, ( _, _, END_H21right)) :: ( _, ( MlyValue.TextList +TextList, _, _)) :: ( _, ( MlyValue.START_H2 START_H2, (START_H2left + as START_H21left), _)) :: rest671)) => let val result = +MlyValue.BodyElement ( +HTMLAttrs.mkHn(2, ctx START_H2left, START_H2, TextList)) + in ( LrTable.NT 16, ( result, START_H21left, END_H21right), rest671) + +end +| ( 42, ( ( _, ( _, _, END_H31right)) :: ( _, ( MlyValue.TextList +TextList, _, _)) :: ( _, ( MlyValue.START_H3 START_H3, (START_H3left + as START_H31left), _)) :: rest671)) => let val result = +MlyValue.BodyElement ( +HTMLAttrs.mkHn(3, ctx START_H3left, START_H3, TextList)) + in ( LrTable.NT 16, ( result, START_H31left, END_H31right), rest671) + +end +| ( 43, ( ( _, ( _, _, END_H41right)) :: ( _, ( MlyValue.TextList +TextList, _, _)) :: ( _, ( MlyValue.START_H4 START_H4, (START_H4left + as START_H41left), _)) :: rest671)) => let val result = +MlyValue.BodyElement ( +HTMLAttrs.mkHn(4, ctx START_H4left, START_H4, TextList)) + in ( LrTable.NT 16, ( result, START_H41left, END_H41right), rest671) + +end +| ( 44, ( ( _, ( _, _, END_H51right)) :: ( _, ( MlyValue.TextList +TextList, _, _)) :: ( _, ( MlyValue.START_H5 START_H5, (START_H5left + as START_H51left), _)) :: rest671)) => let val result = +MlyValue.BodyElement ( +HTMLAttrs.mkHn(5, ctx START_H5left, START_H5, TextList)) + in ( LrTable.NT 16, ( result, START_H51left, END_H51right), rest671) + +end +| ( 45, ( ( _, ( _, _, END_H61right)) :: ( _, ( MlyValue.TextList +TextList, _, _)) :: ( _, ( MlyValue.START_H6 START_H6, (START_H6left + as START_H61left), _)) :: rest671)) => let val result = +MlyValue.BodyElement ( +HTMLAttrs.mkHn(6, ctx START_H6left, START_H6, TextList)) + in ( LrTable.NT 16, ( result, START_H61left, END_H61right), rest671) + +end +| ( 46, ( ( _, ( _, _, END_ADDRESS1right)) :: ( _, ( +MlyValue.AddressContent1 AddressContent1, _, _)) :: ( _, ( _, +START_ADDRESS1left, _)) :: rest671)) => let val result = +MlyValue.BodyElement (HTML.ADDRESS(mkBlock AddressContent1)) + in ( LrTable.NT 16, ( result, START_ADDRESS1left, END_ADDRESS1right), + rest671) +end +| ( 47, ( rest671)) => let val result = MlyValue.AddressContent1 ([] +) + in ( LrTable.NT 17, ( result, defaultPos, defaultPos), rest671) +end +| ( 48, ( ( _, ( MlyValue.AddressContent1 AddressContent1, _, +AddressContent11right)) :: ( _, ( MlyValue.Text Text, Text1left, _)) + :: rest671)) => let val result = MlyValue.AddressContent1 ( +consText(Text, AddressContent1)) + in ( LrTable.NT 17, ( result, Text1left, AddressContent11right), +rest671) +end +| ( 49, ( ( _, ( MlyValue.AddressContent1 AddressContent1, _, +AddressContent11right)) :: _ :: ( _, ( MlyValue.Paragraph Paragraph, +Paragraph1left, _)) :: rest671)) => let val result = +MlyValue.AddressContent1 (consBlock(Paragraph, AddressContent1)) + in ( LrTable.NT 17, ( result, Paragraph1left, AddressContent11right), + rest671) +end +| ( 50, ( ( _, ( MlyValue.AddressContent2 AddressContent2, _, +AddressContent21right)) :: ( _, ( MlyValue.Paragraph Paragraph, +Paragraph1left, _)) :: rest671)) => let val result = +MlyValue.AddressContent1 (consBlock(Paragraph, AddressContent2)) + in ( LrTable.NT 17, ( result, Paragraph1left, AddressContent21right), + rest671) +end +| ( 51, ( rest671)) => let val result = MlyValue.AddressContent2 ([] +) + in ( LrTable.NT 18, ( result, defaultPos, defaultPos), rest671) +end +| ( 52, ( ( _, ( MlyValue.AddressContent1 AddressContent1, _, +AddressContent11right)) :: _ :: ( _, ( MlyValue.Paragraph Paragraph, +Paragraph1left, _)) :: rest671)) => let val result = +MlyValue.AddressContent2 (consBlock(Paragraph, AddressContent1)) + in ( LrTable.NT 18, ( result, Paragraph1left, AddressContent11right), + rest671) +end +| ( 53, ( ( _, ( MlyValue.AddressContent2 AddressContent2, _, +AddressContent21right)) :: ( _, ( MlyValue.Paragraph Paragraph, +Paragraph1left, _)) :: rest671)) => let val result = +MlyValue.AddressContent2 (consBlock(Paragraph, AddressContent2)) + in ( LrTable.NT 18, ( result, Paragraph1left, AddressContent21right), + rest671) +end +| ( 54, ( ( _, ( MlyValue.List List, List1left, List1right)) :: +rest671)) => let val result = MlyValue.BlockWOIndex (List) + in ( LrTable.NT 19, ( result, List1left, List1right), rest671) +end +| ( 55, ( ( _, ( MlyValue.Preformatted Preformatted, +Preformatted1left, Preformatted1right)) :: rest671)) => let val +result = MlyValue.BlockWOIndex (Preformatted) + in ( LrTable.NT 19, ( result, Preformatted1left, Preformatted1right), + rest671) +end +| ( 56, ( ( _, ( _, _, END_DIV1right)) :: ( _, ( MlyValue.BodyContent + BodyContent, _, _)) :: ( _, ( MlyValue.START_DIV START_DIV, ( +START_DIVleft as START_DIV1left), _)) :: rest671)) => let val result + = MlyValue.BlockWOIndex ( +HTMLAttrs.mkDIV(ctx START_DIVleft, START_DIV, BodyContent)) + in ( LrTable.NT 19, ( result, START_DIV1left, END_DIV1right), rest671 +) +end +| ( 57, ( ( _, ( _, _, END_CENTER1right)) :: ( _, ( +MlyValue.BodyContent BodyContent, _, _)) :: ( _, ( _, +START_CENTER1left, _)) :: rest671)) => let val result = +MlyValue.BlockWOIndex (HTML.CENTER BodyContent) + in ( LrTable.NT 19, ( result, START_CENTER1left, END_CENTER1right), +rest671) +end +| ( 58, ( ( _, ( _, _, END_BLOCKQUOTE1right)) :: ( _, ( +MlyValue.BodyContent BodyContent, _, _)) :: ( _, ( _, +START_BLOCKQUOTE1left, _)) :: rest671)) => let val result = +MlyValue.BlockWOIndex (HTML.BLOCKQUOTE BodyContent) + in ( LrTable.NT 19, ( result, START_BLOCKQUOTE1left, +END_BLOCKQUOTE1right), rest671) +end +| ( 59, ( ( _, ( _, _, END_FORM1right)) :: ( _, ( +MlyValue.BodyContent BodyContent, _, _)) :: ( _, ( MlyValue.START_FORM + START_FORM, (START_FORMleft as START_FORM1left), _)) :: rest671)) => + let val result = MlyValue.BlockWOIndex ( +HTMLAttrs.mkFORM(ctx START_FORMleft, START_FORM, BodyContent)) + in ( LrTable.NT 19, ( result, START_FORM1left, END_FORM1right), +rest671) +end +| ( 60, ( ( _, ( MlyValue.TAG_HR TAG_HR, (TAG_HRleft as TAG_HR1left), + TAG_HR1right)) :: rest671)) => let val result = +MlyValue.BlockWOIndex (HTMLAttrs.mkHR(ctx TAG_HRleft, TAG_HR)) + in ( LrTable.NT 19, ( result, TAG_HR1left, TAG_HR1right), rest671) + +end +| ( 61, ( ( _, ( _, _, END_TABLE1right)) :: ( _, ( +MlyValue.TableRowList TableRowList, _, _)) :: ( _, ( +MlyValue.optCaption optCaption, _, _)) :: ( _, ( MlyValue.START_TABLE +START_TABLE, (START_TABLEleft as START_TABLE1left), _)) :: rest671)) + => let val result = MlyValue.BlockWOIndex ( +HTMLAttrs.mkTABLE( + ctx START_TABLEleft, START_TABLE, + {caption = optCaption, body = TableRowList}) + +) + in ( LrTable.NT 19, ( result, START_TABLE1left, END_TABLE1right), +rest671) +end +| ( 62, ( ( _, ( MlyValue.BlockWOIndex BlockWOIndex, +BlockWOIndex1left, BlockWOIndex1right)) :: rest671)) => let val +result = MlyValue.Block (BlockWOIndex) + in ( LrTable.NT 20, ( result, BlockWOIndex1left, BlockWOIndex1right), + rest671) +end +| ( 63, ( ( _, ( MlyValue.TAG_ISINDEX TAG_ISINDEX, (TAG_ISINDEXleft + as TAG_ISINDEX1left), TAG_ISINDEX1right)) :: rest671)) => let val +result = MlyValue.Block ( +let val stuff = + HTMLAttrs.mkISINDEX (ctx TAG_ISINDEXleft, TAG_ISINDEX) + in HTML.ISINDEX stuff end + +) + in ( LrTable.NT 20, ( result, TAG_ISINDEX1left, TAG_ISINDEX1right), +rest671) +end +| ( 64, ( ( _, ( MlyValue.TextList TextList, _, TextList1right)) :: ( + _, ( MlyValue.START_P START_P, (START_Pleft as START_P1left), _)) :: +rest671)) => let val result = MlyValue.Paragraph ( +HTMLAttrs.mkP(ctx START_Pleft, START_P, TextList)) + in ( LrTable.NT 21, ( result, START_P1left, TextList1right), rest671) + +end +| ( 65, ( ( _, ( _, _, END_UL1right)) :: ( _, ( MlyValue.ListItemList + ListItemList, _, _)) :: ( _, ( MlyValue.START_UL START_UL, ( +START_ULleft as START_UL1left), _)) :: rest671)) => let val result = +MlyValue.List ( +HTMLAttrs.mkUL(ctx START_ULleft, START_UL, ListItemList)) + in ( LrTable.NT 22, ( result, START_UL1left, END_UL1right), rest671) + +end +| ( 66, ( ( _, ( _, _, END_OL1right)) :: ( _, ( MlyValue.ListItemList + ListItemList, _, _)) :: ( _, ( MlyValue.START_OL START_OL, ( +START_OLleft as START_OL1left), _)) :: rest671)) => let val result = +MlyValue.List ( +HTMLAttrs.mkOL(ctx START_OLleft, START_OL, ListItemList)) + in ( LrTable.NT 22, ( result, START_OL1left, END_OL1right), rest671) + +end +| ( 67, ( ( _, ( _, _, END_DIR1right)) :: ( _, ( +MlyValue.ListItemList ListItemList, _, _)) :: ( _, ( +MlyValue.START_DIR START_DIR, (START_DIRleft as START_DIR1left), _)) + :: rest671)) => let val result = MlyValue.List ( +HTMLAttrs.mkDIR(ctx START_DIRleft, START_DIR, ListItemList)) + in ( LrTable.NT 22, ( result, START_DIR1left, END_DIR1right), rest671 +) +end +| ( 68, ( ( _, ( _, _, END_MENU1right)) :: ( _, ( +MlyValue.ListItemList ListItemList, _, _)) :: ( _, ( +MlyValue.START_MENU START_MENU, (START_MENUleft as START_MENU1left), _ +)) :: rest671)) => let val result = MlyValue.List ( +HTMLAttrs.mkMENU(ctx START_MENUleft, START_MENU, ListItemList)) + in ( LrTable.NT 22, ( result, START_MENU1left, END_MENU1right), +rest671) +end +| ( 69, ( ( _, ( _, _, END_DL1right)) :: ( _, ( MlyValue.DLItemList +DLItemList, _, _)) :: ( _, ( MlyValue.START_DL START_DL, (START_DLleft + as START_DL1left), _)) :: rest671)) => let val result = +MlyValue.List ( +HTMLAttrs.mkDL( + ctx START_DLleft, START_DL, + groupDefListContents DLItemList) + +) + in ( LrTable.NT 22, ( result, START_DL1left, END_DL1right), rest671) + +end +| ( 70, ( rest671)) => let val result = MlyValue.ListItemList ([]) + in ( LrTable.NT 23, ( result, defaultPos, defaultPos), rest671) +end +| ( 71, ( ( _, ( MlyValue.ListItemList ListItemList, _, +ListItemList1right)) :: ( _, ( MlyValue.ListItem ListItem, +ListItem1left, _)) :: rest671)) => let val result = +MlyValue.ListItemList (ListItem :: ListItemList) + in ( LrTable.NT 23, ( result, ListItem1left, ListItemList1right), +rest671) +end +| ( 72, ( ( _, ( _, _, EndLI1right)) :: ( _, ( MlyValue.Flow1 Flow1, + _, _)) :: ( _, ( MlyValue.START_LI START_LI, (START_LIleft as +START_LI1left), _)) :: rest671)) => let val result = +MlyValue.ListItem ( +HTMLAttrs.mkLI(ctx START_LIleft, START_LI, mkBlock Flow1)) + in ( LrTable.NT 24, ( result, START_LI1left, EndLI1right), rest671) + +end +| ( 73, ( rest671)) => let val result = MlyValue.DLItemList ([]) + in ( LrTable.NT 25, ( result, defaultPos, defaultPos), rest671) +end +| ( 74, ( ( _, ( MlyValue.DLItemList DLItemList, _, DLItemList1right) +) :: ( _, ( MlyValue.DLItem DLItem, DLItem1left, _)) :: rest671)) => + let val result = MlyValue.DLItemList (DLItem :: DLItemList) + in ( LrTable.NT 25, ( result, DLItem1left, DLItemList1right), rest671 +) +end +| ( 75, ( ( _, ( _, _, EndDT1right)) :: ( _, ( MlyValue.TextList +TextList, _, _)) :: ( _, ( _, START_DT1left, _)) :: rest671)) => let + val result = MlyValue.DLItem (DL_tag TextList) + in ( LrTable.NT 26, ( result, START_DT1left, EndDT1right), rest671) + +end +| ( 76, ( ( _, ( _, _, EndDD1right)) :: ( _, ( MlyValue.Flow1 Flow1, + _, _)) :: ( _, ( _, START_DD1left, _)) :: rest671)) => let val +result = MlyValue.DLItem (DL_item(mkBlock Flow1)) + in ( LrTable.NT 26, ( result, START_DD1left, EndDD1right), rest671) + +end +| ( 77, ( rest671)) => let val result = MlyValue.Flow1 ([]) + in ( LrTable.NT 27, ( result, defaultPos, defaultPos), rest671) +end +| ( 78, ( ( _, ( MlyValue.Flow1 Flow1, _, Flow11right)) :: ( _, ( +MlyValue.Text Text, Text1left, _)) :: rest671)) => let val result = +MlyValue.Flow1 (consText(Text, Flow1)) + in ( LrTable.NT 27, ( result, Text1left, Flow11right), rest671) +end +| ( 79, ( ( _, ( MlyValue.Flow1 Flow1, _, Flow11right)) :: ( _, ( +MlyValue.Block Block, Block1left, _)) :: rest671)) => let val result + = MlyValue.Flow1 (consBlock(Block, Flow1)) + in ( LrTable.NT 27, ( result, Block1left, Flow11right), rest671) +end +| ( 80, ( ( _, ( MlyValue.Flow1 Flow1, _, Flow11right)) :: _ :: ( _, +( MlyValue.Paragraph Paragraph, Paragraph1left, _)) :: rest671)) => + let val result = MlyValue.Flow1 (consBlock(Paragraph, Flow1)) + in ( LrTable.NT 27, ( result, Paragraph1left, Flow11right), rest671) + +end +| ( 81, ( ( _, ( MlyValue.Flow2 Flow2, _, Flow21right)) :: ( _, ( +MlyValue.Paragraph Paragraph, Paragraph1left, _)) :: rest671)) => let + val result = MlyValue.Flow1 (consBlock(Paragraph, Flow2)) + in ( LrTable.NT 27, ( result, Paragraph1left, Flow21right), rest671) + +end +| ( 82, ( rest671)) => let val result = MlyValue.Flow2 ([]) + in ( LrTable.NT 28, ( result, defaultPos, defaultPos), rest671) +end +| ( 83, ( ( _, ( MlyValue.Flow1 Flow1, _, Flow11right)) :: ( _, ( +MlyValue.Block Block, Block1left, _)) :: rest671)) => let val result + = MlyValue.Flow2 (consBlock(Block, Flow1)) + in ( LrTable.NT 28, ( result, Block1left, Flow11right), rest671) +end +| ( 84, ( ( _, ( MlyValue.Flow1 Flow1, _, Flow11right)) :: _ :: ( _, +( MlyValue.Paragraph Paragraph, Paragraph1left, _)) :: rest671)) => + let val result = MlyValue.Flow2 (consBlock(Paragraph, Flow1)) + in ( LrTable.NT 28, ( result, Paragraph1left, Flow11right), rest671) + +end +| ( 85, ( ( _, ( MlyValue.Flow2 Flow2, _, Flow21right)) :: ( _, ( +MlyValue.Paragraph Paragraph, Paragraph1left, _)) :: rest671)) => let + val result = MlyValue.Flow2 (consBlock(Paragraph, Flow2)) + in ( LrTable.NT 28, ( result, Paragraph1left, Flow21right), rest671) + +end +| ( 86, ( rest671)) => let val result = MlyValue.ntVOID () + in ( LrTable.NT 29, ( result, defaultPos, defaultPos), rest671) +end +| ( 87, ( ( _, ( _, END_LI1left, END_LI1right)) :: rest671)) => let + val result = MlyValue.ntVOID () + in ( LrTable.NT 29, ( result, END_LI1left, END_LI1right), rest671) + +end +| ( 88, ( rest671)) => let val result = MlyValue.ntVOID () + in ( LrTable.NT 30, ( result, defaultPos, defaultPos), rest671) +end +| ( 89, ( ( _, ( _, END_DT1left, END_DT1right)) :: rest671)) => let + val result = MlyValue.ntVOID () + in ( LrTable.NT 30, ( result, END_DT1left, END_DT1right), rest671) + +end +| ( 90, ( rest671)) => let val result = MlyValue.ntVOID () + in ( LrTable.NT 31, ( result, defaultPos, defaultPos), rest671) +end +| ( 91, ( ( _, ( _, END_DD1left, END_DD1right)) :: rest671)) => let + val result = MlyValue.ntVOID () + in ( LrTable.NT 31, ( result, END_DD1left, END_DD1right), rest671) + +end +| ( 92, ( ( _, ( _, _, END_PRE1right)) :: ( _, ( MlyValue.TextList +TextList, _, _)) :: ( _, ( MlyValue.START_PRE START_PRE, ( +START_PREleft as START_PRE1left), _)) :: rest671)) => let val result + = MlyValue.Preformatted ( +HTMLAttrs.mkPRE(ctx START_PREleft, START_PRE, TextList)) + in ( LrTable.NT 32, ( result, START_PRE1left, END_PRE1right), rest671 +) +end +| ( 93, ( rest671)) => let val result = MlyValue.optCaption (NONE) + in ( LrTable.NT 33, ( result, defaultPos, defaultPos), rest671) +end +| ( 94, ( ( _, ( _, _, END_CAPTION1right)) :: ( _, ( +MlyValue.TextList TextList, _, _)) :: ( _, ( MlyValue.START_CAPTION +START_CAPTION, (START_CAPTIONleft as START_CAPTION1left), _)) :: +rest671)) => let val result = MlyValue.optCaption ( +SOME(HTMLAttrs.mkCAPTION( + ctx START_CAPTIONleft, START_CAPTION, TextList)) +) + in ( LrTable.NT 33, ( result, START_CAPTION1left, END_CAPTION1right), + rest671) +end +| ( 95, ( ( _, ( MlyValue.TableRow TableRow, TableRow1left, +TableRow1right)) :: rest671)) => let val result = +MlyValue.TableRowList ([TableRow]) + in ( LrTable.NT 34, ( result, TableRow1left, TableRow1right), rest671 +) +end +| ( 96, ( ( _, ( MlyValue.TableRowList TableRowList, _, +TableRowList1right)) :: ( _, ( MlyValue.TableRow TableRow, +TableRow1left, _)) :: rest671)) => let val result = +MlyValue.TableRowList (TableRow :: TableRowList) + in ( LrTable.NT 34, ( result, TableRow1left, TableRowList1right), +rest671) +end +| ( 97, ( ( _, ( MlyValue.TableCellList TableCellList, _, +TableCellList1right)) :: ( _, ( MlyValue.START_TR START_TR, ( +START_TRleft as START_TR1left), _)) :: rest671)) => let val result = +MlyValue.TableRow ( +HTMLAttrs.mkTR(ctx START_TRleft, START_TR, TableCellList)) + in ( LrTable.NT 35, ( result, START_TR1left, TableCellList1right), +rest671) +end +| ( 98, ( ( _, ( _, _, END_TR1right)) :: ( _, ( +MlyValue.TableCellList TableCellList, _, _)) :: ( _, ( +MlyValue.START_TR START_TR, (START_TRleft as START_TR1left), _)) :: +rest671)) => let val result = MlyValue.TableRow ( +HTMLAttrs.mkTR(ctx START_TRleft, START_TR, TableCellList)) + in ( LrTable.NT 35, ( result, START_TR1left, END_TR1right), rest671) + +end +| ( 99, ( ( _, ( MlyValue.TableCell TableCell, TableCell1left, +TableCell1right)) :: rest671)) => let val result = +MlyValue.TableCellList ([TableCell]) + in ( LrTable.NT 36, ( result, TableCell1left, TableCell1right), +rest671) +end +| ( 100, ( ( _, ( MlyValue.TableCellList TableCellList, _, +TableCellList1right)) :: ( _, ( MlyValue.TableCell TableCell, +TableCell1left, _)) :: rest671)) => let val result = +MlyValue.TableCellList (TableCell :: TableCellList) + in ( LrTable.NT 36, ( result, TableCell1left, TableCellList1right), +rest671) +end +| ( 101, ( ( _, ( _, _, END_TH1right)) :: ( _, ( MlyValue.BodyContent + BodyContent, _, _)) :: ( _, ( MlyValue.START_TH START_TH, ( +START_THleft as START_TH1left), _)) :: rest671)) => let val result = +MlyValue.TableCell ( +HTMLAttrs.mkTH(ctx START_THleft, START_TH, BodyContent)) + in ( LrTable.NT 37, ( result, START_TH1left, END_TH1right), rest671) + +end +| ( 102, ( ( _, ( MlyValue.BodyContent BodyContent, _, +BodyContent1right)) :: ( _, ( MlyValue.START_TH START_TH, ( +START_THleft as START_TH1left), _)) :: rest671)) => let val result = +MlyValue.TableCell ( +HTMLAttrs.mkTH(ctx START_THleft, START_TH, BodyContent)) + in ( LrTable.NT 37, ( result, START_TH1left, BodyContent1right), +rest671) +end +| ( 103, ( ( _, ( _, _, END_TD1right)) :: ( _, ( MlyValue.BodyContent + BodyContent, _, _)) :: ( _, ( MlyValue.START_TD START_TD, ( +START_TDleft as START_TD1left), _)) :: rest671)) => let val result = +MlyValue.TableCell ( +HTMLAttrs.mkTD(ctx START_TDleft, START_TD, BodyContent)) + in ( LrTable.NT 37, ( result, START_TD1left, END_TD1right), rest671) + +end +| ( 104, ( ( _, ( MlyValue.BodyContent BodyContent, _, +BodyContent1right)) :: ( _, ( MlyValue.START_TD START_TD, ( +START_TDleft as START_TD1left), _)) :: rest671)) => let val result = +MlyValue.TableCell ( +HTMLAttrs.mkTD(ctx START_TDleft, START_TD, BodyContent)) + in ( LrTable.NT 37, ( result, START_TD1left, BodyContent1right), +rest671) +end +| ( 105, ( ( _, ( MlyValue.TextList' TextList', TextList'1left, +TextList'1right)) :: rest671)) => let val result = MlyValue.TextList + (textList TextList') + in ( LrTable.NT 38, ( result, TextList'1left, TextList'1right), +rest671) +end +| ( 106, ( rest671)) => let val result = MlyValue.TextList' ([]) + in ( LrTable.NT 39, ( result, defaultPos, defaultPos), rest671) +end +| ( 107, ( ( _, ( MlyValue.TextList' TextList', _, TextList'1right)) + :: ( _, ( MlyValue.Text Text, Text1left, _)) :: rest671)) => let val + result = MlyValue.TextList' (Text :: TextList') + in ( LrTable.NT 39, ( result, Text1left, TextList'1right), rest671) + +end +| ( 108, ( ( _, ( MlyValue.PCDataElem PCDataElem, PCDataElem1left, +PCDataElem1right)) :: rest671)) => let val result = +MlyValue.TextWOScript (HTML.PCDATA PCDataElem) + in ( LrTable.NT 40, ( result, PCDataElem1left, PCDataElem1right), +rest671) +end +| ( 109, ( ( _, ( MlyValue.Font Font, Font1left, Font1right)) :: +rest671)) => let val result = MlyValue.TextWOScript (Font) + in ( LrTable.NT 40, ( result, Font1left, Font1right), rest671) +end +| ( 110, ( ( _, ( MlyValue.Phrase Phrase, Phrase1left, Phrase1right)) + :: rest671)) => let val result = MlyValue.TextWOScript (Phrase) + in ( LrTable.NT 40, ( result, Phrase1left, Phrase1right), rest671) + +end +| ( 111, ( ( _, ( MlyValue.Special Special, Special1left, +Special1right)) :: rest671)) => let val result = +MlyValue.TextWOScript (Special) + in ( LrTable.NT 40, ( result, Special1left, Special1right), rest671) + +end +| ( 112, ( ( _, ( MlyValue.Form Form, Form1left, Form1right)) :: +rest671)) => let val result = MlyValue.TextWOScript (Form) + in ( LrTable.NT 40, ( result, Form1left, Form1right), rest671) +end +| ( 113, ( ( _, ( MlyValue.TextWOScript TextWOScript, +TextWOScript1left, TextWOScript1right)) :: rest671)) => let val +result = MlyValue.Text (TextWOScript) + in ( LrTable.NT 41, ( result, TextWOScript1left, TextWOScript1right), + rest671) +end +| ( 114, ( ( _, ( _, _, END_SCRIPT1right)) :: ( _, ( MlyValue.PCData +PCData, _, _)) :: ( _, ( _, START_SCRIPT1left, _)) :: rest671)) => let + val result = MlyValue.Text (HTML.SCRIPT PCData) + in ( LrTable.NT 41, ( result, START_SCRIPT1left, END_SCRIPT1right), +rest671) +end +| ( 115, ( ( _, ( _, _, END_TT1right)) :: ( _, ( MlyValue.TextList +TextList, _, _)) :: ( _, ( _, START_TT1left, _)) :: rest671)) => let + val result = MlyValue.Font (HTML.TT(TextList)) + in ( LrTable.NT 42, ( result, START_TT1left, END_TT1right), rest671) + +end +| ( 116, ( ( _, ( _, _, END_I1right)) :: ( _, ( MlyValue.TextList +TextList, _, _)) :: ( _, ( _, START_I1left, _)) :: rest671)) => let + val result = MlyValue.Font (HTML.I(TextList)) + in ( LrTable.NT 42, ( result, START_I1left, END_I1right), rest671) + +end +| ( 117, ( ( _, ( _, _, END_B1right)) :: ( _, ( MlyValue.TextList +TextList, _, _)) :: ( _, ( _, START_B1left, _)) :: rest671)) => let + val result = MlyValue.Font (HTML.B(TextList)) + in ( LrTable.NT 42, ( result, START_B1left, END_B1right), rest671) + +end +| ( 118, ( ( _, ( _, _, END_U1right)) :: ( _, ( MlyValue.TextList +TextList, _, _)) :: ( _, ( _, START_U1left, _)) :: rest671)) => let + val result = MlyValue.Font (HTML.U(TextList)) + in ( LrTable.NT 42, ( result, START_U1left, END_U1right), rest671) + +end +| ( 119, ( ( _, ( _, _, END_STRIKE1right)) :: ( _, ( +MlyValue.TextList TextList, _, _)) :: ( _, ( _, START_STRIKE1left, _)) + :: rest671)) => let val result = MlyValue.Font ( +HTML.STRIKE(TextList)) + in ( LrTable.NT 42, ( result, START_STRIKE1left, END_STRIKE1right), +rest671) +end +| ( 120, ( ( _, ( _, _, END_BIG1right)) :: ( _, ( MlyValue.TextList +TextList, _, _)) :: ( _, ( _, START_BIG1left, _)) :: rest671)) => let + val result = MlyValue.Font (HTML.BIG(TextList)) + in ( LrTable.NT 42, ( result, START_BIG1left, END_BIG1right), rest671 +) +end +| ( 121, ( ( _, ( _, _, END_SMALL1right)) :: ( _, ( MlyValue.TextList + TextList, _, _)) :: ( _, ( _, START_SMALL1left, _)) :: rest671)) => + let val result = MlyValue.Font (HTML.SMALL(TextList)) + in ( LrTable.NT 42, ( result, START_SMALL1left, END_SMALL1right), +rest671) +end +| ( 122, ( ( _, ( _, _, END_SUB1right)) :: ( _, ( MlyValue.TextList +TextList, _, _)) :: ( _, ( _, START_SUB1left, _)) :: rest671)) => let + val result = MlyValue.Font (HTML.SUB(TextList)) + in ( LrTable.NT 42, ( result, START_SUB1left, END_SUB1right), rest671 +) +end +| ( 123, ( ( _, ( _, _, END_SUP1right)) :: ( _, ( MlyValue.TextList +TextList, _, _)) :: ( _, ( _, START_SUP1left, _)) :: rest671)) => let + val result = MlyValue.Font (HTML.SUP(TextList)) + in ( LrTable.NT 42, ( result, START_SUP1left, END_SUP1right), rest671 +) +end +| ( 124, ( ( _, ( _, _, END_EM1right)) :: ( _, ( MlyValue.TextList +TextList, _, _)) :: ( _, ( _, START_EM1left, _)) :: rest671)) => let + val result = MlyValue.Phrase (HTML.EM(TextList)) + in ( LrTable.NT 43, ( result, START_EM1left, END_EM1right), rest671) + +end +| ( 125, ( ( _, ( _, _, END_STRONG1right)) :: ( _, ( +MlyValue.TextList TextList, _, _)) :: ( _, ( _, START_STRONG1left, _)) + :: rest671)) => let val result = MlyValue.Phrase ( +HTML.STRONG(TextList)) + in ( LrTable.NT 43, ( result, START_STRONG1left, END_STRONG1right), +rest671) +end +| ( 126, ( ( _, ( _, _, END_CODE1right)) :: ( _, ( MlyValue.TextList +TextList, _, _)) :: ( _, ( _, START_CODE1left, _)) :: rest671)) => let + val result = MlyValue.Phrase (HTML.CODE(TextList)) + in ( LrTable.NT 43, ( result, START_CODE1left, END_CODE1right), +rest671) +end +| ( 127, ( ( _, ( _, _, END_DFN1right)) :: ( _, ( MlyValue.TextList +TextList, _, _)) :: ( _, ( _, START_DFN1left, _)) :: rest671)) => let + val result = MlyValue.Phrase (HTML.DFN(TextList)) + in ( LrTable.NT 43, ( result, START_DFN1left, END_DFN1right), rest671 +) +end +| ( 128, ( ( _, ( _, _, END_SAMP1right)) :: ( _, ( MlyValue.TextList +TextList, _, _)) :: ( _, ( _, START_SAMP1left, _)) :: rest671)) => let + val result = MlyValue.Phrase (HTML.SAMP(TextList)) + in ( LrTable.NT 43, ( result, START_SAMP1left, END_SAMP1right), +rest671) +end +| ( 129, ( ( _, ( _, _, END_KBD1right)) :: ( _, ( MlyValue.TextList +TextList, _, _)) :: ( _, ( _, START_KBD1left, _)) :: rest671)) => let + val result = MlyValue.Phrase (HTML.KBD(TextList)) + in ( LrTable.NT 43, ( result, START_KBD1left, END_KBD1right), rest671 +) +end +| ( 130, ( ( _, ( _, _, END_VAR1right)) :: ( _, ( MlyValue.TextList +TextList, _, _)) :: ( _, ( _, START_VAR1left, _)) :: rest671)) => let + val result = MlyValue.Phrase (HTML.VAR(TextList)) + in ( LrTable.NT 43, ( result, START_VAR1left, END_VAR1right), rest671 +) +end +| ( 131, ( ( _, ( _, _, END_CITE1right)) :: ( _, ( MlyValue.TextList +TextList, _, _)) :: ( _, ( _, START_CITE1left, _)) :: rest671)) => let + val result = MlyValue.Phrase (HTML.CITE(TextList)) + in ( LrTable.NT 43, ( result, START_CITE1left, END_CITE1right), +rest671) +end +| ( 132, ( ( _, ( _, _, END_A1right)) :: ( _, ( MlyValue.TextList +TextList, _, _)) :: ( _, ( MlyValue.START_A START_A, (START_Aleft as +START_A1left), _)) :: rest671)) => let val result = MlyValue.Special + (HTMLAttrs.mkA(ctx START_Aleft, START_A, TextList)) + in ( LrTable.NT 44, ( result, START_A1left, END_A1right), rest671) + +end +| ( 133, ( ( _, ( MlyValue.TAG_IMG TAG_IMG, (TAG_IMGleft as +TAG_IMG1left), TAG_IMG1right)) :: rest671)) => let val result = +MlyValue.Special (HTMLAttrs.mkIMG(ctx TAG_IMGleft, TAG_IMG)) + in ( LrTable.NT 44, ( result, TAG_IMG1left, TAG_IMG1right), rest671) + +end +| ( 134, ( ( _, ( _, _, END_APPLET1right)) :: ( _, ( +MlyValue.TextList TextList, _, _)) :: ( _, ( MlyValue.START_APPLET +START_APPLET, (START_APPLETleft as START_APPLET1left), _)) :: rest671) +) => let val result = MlyValue.Special ( +HTMLAttrs.mkAPPLET(ctx START_APPLETleft, START_APPLET, TextList)) + in ( LrTable.NT 44, ( result, START_APPLET1left, END_APPLET1right), +rest671) +end +| ( 135, ( ( _, ( _, _, END_FONT1right)) :: ( _, ( MlyValue.TextList +TextList, _, _)) :: ( _, ( MlyValue.START_FONT START_FONT, ( +START_FONTleft as START_FONT1left), _)) :: rest671)) => let val +result = MlyValue.Special ( +HTMLAttrs.mkFONT(ctx START_FONTleft, START_FONT, TextList)) + in ( LrTable.NT 44, ( result, START_FONT1left, END_FONT1right), +rest671) +end +| ( 136, ( ( _, ( _, _, END_BASEFONT1right)) :: ( _, ( +MlyValue.TextList TextList, _, _)) :: ( _, ( MlyValue.START_BASEFONT +START_BASEFONT, (START_BASEFONTleft as START_BASEFONT1left), _)) :: +rest671)) => let val result = MlyValue.Special ( +HTMLAttrs.mkBASEFONT( + ctx START_BASEFONTleft, START_BASEFONT, TextList) + +) + in ( LrTable.NT 44, ( result, START_BASEFONT1left, END_BASEFONT1right +), rest671) +end +| ( 137, ( ( _, ( MlyValue.TAG_BR TAG_BR, (TAG_BRleft as TAG_BR1left) +, TAG_BR1right)) :: rest671)) => let val result = MlyValue.Special ( +HTMLAttrs.mkBR(ctx TAG_BRleft, TAG_BR)) + in ( LrTable.NT 44, ( result, TAG_BR1left, TAG_BR1right), rest671) + +end +| ( 138, ( ( _, ( _, _, END_MAP1right)) :: ( _, ( MlyValue.AreaList +AreaList, _, _)) :: ( _, ( MlyValue.START_MAP START_MAP, ( +START_MAPleft as START_MAP1left), _)) :: rest671)) => let val result + = MlyValue.Special ( +HTMLAttrs.mkMAP(ctx START_MAPleft, START_MAP, AreaList)) + in ( LrTable.NT 44, ( result, START_MAP1left, END_MAP1right), rest671 +) +end +| ( 139, ( ( _, ( MlyValue.TAG_PARAM TAG_PARAM, (TAG_PARAMleft as +TAG_PARAM1left), TAG_PARAM1right)) :: rest671)) => let val result = +MlyValue.Special (HTMLAttrs.mkPARAM(ctx TAG_PARAMleft, TAG_PARAM)) + in ( LrTable.NT 44, ( result, TAG_PARAM1left, TAG_PARAM1right), +rest671) +end +| ( 140, ( rest671)) => let val result = MlyValue.AreaList ([]) + in ( LrTable.NT 45, ( result, defaultPos, defaultPos), rest671) +end +| ( 141, ( ( _, ( MlyValue.AreaList AreaList, _, AreaList1right)) :: +( _, ( MlyValue.TAG_AREA TAG_AREA, (TAG_AREAleft as TAG_AREA1left), _) +) :: rest671)) => let val result = MlyValue.AreaList ( +HTMLAttrs.mkAREA(ctx TAG_AREAleft, TAG_AREA) :: AreaList) + in ( LrTable.NT 45, ( result, TAG_AREA1left, AreaList1right), rest671 +) +end +| ( 142, ( ( _, ( MlyValue.TAG_INPUT TAG_INPUT, (TAG_INPUTleft as +TAG_INPUT1left), TAG_INPUT1right)) :: rest671)) => let val result = +MlyValue.Form (HTMLAttrs.mkINPUT(ctx TAG_INPUTleft, TAG_INPUT)) + in ( LrTable.NT 46, ( result, TAG_INPUT1left, TAG_INPUT1right), +rest671) +end +| ( 143, ( ( _, ( _, _, END_SELECT1right)) :: ( _, ( +MlyValue.OptionList OptionList, _, _)) :: ( _, ( MlyValue.START_SELECT + START_SELECT, (START_SELECTleft as START_SELECT1left), _)) :: rest671 +)) => let val result = MlyValue.Form ( +HTMLAttrs.mkSELECT(ctx START_SELECTleft, START_SELECT, OptionList)) + in ( LrTable.NT 46, ( result, START_SELECT1left, END_SELECT1right), +rest671) +end +| ( 144, ( ( _, ( _, _, END_TEXTAREA1right)) :: ( _, ( +MlyValue.PCData PCData, _, _)) :: ( _, ( MlyValue.START_TEXTAREA +START_TEXTAREA, (START_TEXTAREAleft as START_TEXTAREA1left), _)) :: +rest671)) => let val result = MlyValue.Form ( +HTMLAttrs.mkTEXTAREA( + ctx START_TEXTAREAleft, START_TEXTAREA, + PCData) + +) + in ( LrTable.NT 46, ( result, START_TEXTAREA1left, END_TEXTAREA1right +), rest671) +end +| ( 145, ( rest671)) => let val result = MlyValue.OptionList ([]) + in ( LrTable.NT 47, ( result, defaultPos, defaultPos), rest671) +end +| ( 146, ( ( _, ( MlyValue.OptionList OptionList, _, OptionList1right +)) :: _ :: ( _, ( MlyValue.PCData PCData, _, _)) :: ( _, ( +MlyValue.START_OPTION START_OPTION, (START_OPTIONleft as +START_OPTION1left), _)) :: rest671)) => let val result = +MlyValue.OptionList ( +HTMLAttrs.mkOPTION(ctx START_OPTIONleft, START_OPTION, PCData) + :: OptionList + +) + in ( LrTable.NT 47, ( result, START_OPTION1left, OptionList1right), +rest671) +end +| ( 147, ( rest671)) => let val result = MlyValue.ntVOID () + in ( LrTable.NT 48, ( result, defaultPos, defaultPos), rest671) +end +| ( 148, ( ( _, ( _, END_OPTION1left, END_OPTION1right)) :: rest671)) + => let val result = MlyValue.ntVOID () + in ( LrTable.NT 48, ( result, END_OPTION1left, END_OPTION1right), +rest671) +end +| ( 149, ( ( _, ( MlyValue.PCDataList PCDataList, PCDataList1left, +PCDataList1right)) :: rest671)) => let val result = MlyValue.PCData ( +concat PCDataList) + in ( LrTable.NT 49, ( result, PCDataList1left, PCDataList1right), +rest671) +end +| ( 150, ( rest671)) => let val result = MlyValue.PCDataList ([]) + in ( LrTable.NT 50, ( result, defaultPos, defaultPos), rest671) +end +| ( 151, ( ( _, ( MlyValue.PCDataList PCDataList, _, PCDataList1right +)) :: ( _, ( MlyValue.PCDataElem PCDataElem, PCDataElem1left, _)) :: +rest671)) => let val result = MlyValue.PCDataList ( +PCDataElem :: PCDataList) + in ( LrTable.NT 50, ( result, PCDataElem1left, PCDataList1right), +rest671) +end +| ( 152, ( ( _, ( MlyValue.PCDATA PCDATA, PCDATA1left, PCDATA1right)) + :: rest671)) => let val result = MlyValue.PCDataElem (PCDATA) + in ( LrTable.NT 51, ( result, PCDATA1left, PCDATA1right), rest671) + +end +| ( 153, ( ( _, ( MlyValue.CHAR_REF CHAR_REF, CHAR_REF1left, +CHAR_REF1right)) :: rest671)) => let val result = MlyValue.PCDataElem + (CHAR_REF) + in ( LrTable.NT 51, ( result, CHAR_REF1left, CHAR_REF1right), rest671 +) +end +| ( 154, ( ( _, ( MlyValue.ENTITY_REF ENTITY_REF, ENTITY_REF1left, +ENTITY_REF1right)) :: rest671)) => let val result = +MlyValue.PCDataElem (ENTITY_REF) + in ( LrTable.NT 51, ( result, ENTITY_REF1left, ENTITY_REF1right), +rest671) +end +| _ => raise (mlyAction i392) +end +val void = MlyValue.VOID +val extract = fn a => (fn MlyValue.Document x => x +| _ => let exception ParseInternal + in raise ParseInternal end) a +end +end +structure Tokens : HTML_TOKENS = +struct +type svalue = ParserData.svalue +type ('a,'b) token = ('a,'b) Token.token +fun EOF (p1,p2) = Token.TOKEN (ParserData.LrTable.T 0,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_A (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 1,( +ParserData.MlyValue.START_A i,p1,p2)) +fun END_A (p1,p2) = Token.TOKEN (ParserData.LrTable.T 2,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_ADDRESS (p1,p2) = Token.TOKEN (ParserData.LrTable.T 3,( +ParserData.MlyValue.VOID,p1,p2)) +fun END_ADDRESS (p1,p2) = Token.TOKEN (ParserData.LrTable.T 4,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_APPLET (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 5,( +ParserData.MlyValue.START_APPLET i,p1,p2)) +fun END_APPLET (p1,p2) = Token.TOKEN (ParserData.LrTable.T 6,( +ParserData.MlyValue.VOID,p1,p2)) +fun TAG_AREA (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 7,( +ParserData.MlyValue.TAG_AREA i,p1,p2)) +fun START_B (p1,p2) = Token.TOKEN (ParserData.LrTable.T 8,( +ParserData.MlyValue.VOID,p1,p2)) +fun END_B (p1,p2) = Token.TOKEN (ParserData.LrTable.T 9,( +ParserData.MlyValue.VOID,p1,p2)) +fun TAG_BASE (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 10,( +ParserData.MlyValue.TAG_BASE i,p1,p2)) +fun START_BIG (p1,p2) = Token.TOKEN (ParserData.LrTable.T 11,( +ParserData.MlyValue.VOID,p1,p2)) +fun END_BIG (p1,p2) = Token.TOKEN (ParserData.LrTable.T 12,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_BLOCKQUOTE (p1,p2) = Token.TOKEN (ParserData.LrTable.T 13,( +ParserData.MlyValue.VOID,p1,p2)) +fun END_BLOCKQUOTE (p1,p2) = Token.TOKEN (ParserData.LrTable.T 14,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_BODY (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 15,( +ParserData.MlyValue.START_BODY i,p1,p2)) +fun END_BODY (p1,p2) = Token.TOKEN (ParserData.LrTable.T 16,( +ParserData.MlyValue.VOID,p1,p2)) +fun TAG_BR (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 17,( +ParserData.MlyValue.TAG_BR i,p1,p2)) +fun START_CAPTION (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 18,( +ParserData.MlyValue.START_CAPTION i,p1,p2)) +fun END_CAPTION (p1,p2) = Token.TOKEN (ParserData.LrTable.T 19,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_CENTER (p1,p2) = Token.TOKEN (ParserData.LrTable.T 20,( +ParserData.MlyValue.VOID,p1,p2)) +fun END_CENTER (p1,p2) = Token.TOKEN (ParserData.LrTable.T 21,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_CITE (p1,p2) = Token.TOKEN (ParserData.LrTable.T 22,( +ParserData.MlyValue.VOID,p1,p2)) +fun END_CITE (p1,p2) = Token.TOKEN (ParserData.LrTable.T 23,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_CODE (p1,p2) = Token.TOKEN (ParserData.LrTable.T 24,( +ParserData.MlyValue.VOID,p1,p2)) +fun END_CODE (p1,p2) = Token.TOKEN (ParserData.LrTable.T 25,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_DD (p1,p2) = Token.TOKEN (ParserData.LrTable.T 26,( +ParserData.MlyValue.VOID,p1,p2)) +fun END_DD (p1,p2) = Token.TOKEN (ParserData.LrTable.T 27,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_DFN (p1,p2) = Token.TOKEN (ParserData.LrTable.T 28,( +ParserData.MlyValue.VOID,p1,p2)) +fun END_DFN (p1,p2) = Token.TOKEN (ParserData.LrTable.T 29,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_DIR (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 30,( +ParserData.MlyValue.START_DIR i,p1,p2)) +fun END_DIR (p1,p2) = Token.TOKEN (ParserData.LrTable.T 31,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_DIV (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 32,( +ParserData.MlyValue.START_DIV i,p1,p2)) +fun END_DIV (p1,p2) = Token.TOKEN (ParserData.LrTable.T 33,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_DL (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 34,( +ParserData.MlyValue.START_DL i,p1,p2)) +fun END_DL (p1,p2) = Token.TOKEN (ParserData.LrTable.T 35,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_DT (p1,p2) = Token.TOKEN (ParserData.LrTable.T 36,( +ParserData.MlyValue.VOID,p1,p2)) +fun END_DT (p1,p2) = Token.TOKEN (ParserData.LrTable.T 37,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_EM (p1,p2) = Token.TOKEN (ParserData.LrTable.T 38,( +ParserData.MlyValue.VOID,p1,p2)) +fun END_EM (p1,p2) = Token.TOKEN (ParserData.LrTable.T 39,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_FONT (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 40,( +ParserData.MlyValue.START_FONT i,p1,p2)) +fun END_FONT (p1,p2) = Token.TOKEN (ParserData.LrTable.T 41,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_BASEFONT (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 42,( +ParserData.MlyValue.START_BASEFONT i,p1,p2)) +fun END_BASEFONT (p1,p2) = Token.TOKEN (ParserData.LrTable.T 43,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_FORM (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 44,( +ParserData.MlyValue.START_FORM i,p1,p2)) +fun END_FORM (p1,p2) = Token.TOKEN (ParserData.LrTable.T 45,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_H1 (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 46,( +ParserData.MlyValue.START_H1 i,p1,p2)) +fun END_H1 (p1,p2) = Token.TOKEN (ParserData.LrTable.T 47,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_H2 (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 48,( +ParserData.MlyValue.START_H2 i,p1,p2)) +fun END_H2 (p1,p2) = Token.TOKEN (ParserData.LrTable.T 49,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_H3 (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 50,( +ParserData.MlyValue.START_H3 i,p1,p2)) +fun END_H3 (p1,p2) = Token.TOKEN (ParserData.LrTable.T 51,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_H4 (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 52,( +ParserData.MlyValue.START_H4 i,p1,p2)) +fun END_H4 (p1,p2) = Token.TOKEN (ParserData.LrTable.T 53,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_H5 (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 54,( +ParserData.MlyValue.START_H5 i,p1,p2)) +fun END_H5 (p1,p2) = Token.TOKEN (ParserData.LrTable.T 55,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_H6 (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 56,( +ParserData.MlyValue.START_H6 i,p1,p2)) +fun END_H6 (p1,p2) = Token.TOKEN (ParserData.LrTable.T 57,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_HEAD (p1,p2) = Token.TOKEN (ParserData.LrTable.T 58,( +ParserData.MlyValue.VOID,p1,p2)) +fun END_HEAD (p1,p2) = Token.TOKEN (ParserData.LrTable.T 59,( +ParserData.MlyValue.VOID,p1,p2)) +fun TAG_HR (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 60,( +ParserData.MlyValue.TAG_HR i,p1,p2)) +fun START_HTML (p1,p2) = Token.TOKEN (ParserData.LrTable.T 61,( +ParserData.MlyValue.VOID,p1,p2)) +fun END_HTML (p1,p2) = Token.TOKEN (ParserData.LrTable.T 62,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_I (p1,p2) = Token.TOKEN (ParserData.LrTable.T 63,( +ParserData.MlyValue.VOID,p1,p2)) +fun END_I (p1,p2) = Token.TOKEN (ParserData.LrTable.T 64,( +ParserData.MlyValue.VOID,p1,p2)) +fun TAG_IMG (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 65,( +ParserData.MlyValue.TAG_IMG i,p1,p2)) +fun TAG_INPUT (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 66,( +ParserData.MlyValue.TAG_INPUT i,p1,p2)) +fun TAG_ISINDEX (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 67,( +ParserData.MlyValue.TAG_ISINDEX i,p1,p2)) +fun START_KBD (p1,p2) = Token.TOKEN (ParserData.LrTable.T 68,( +ParserData.MlyValue.VOID,p1,p2)) +fun END_KBD (p1,p2) = Token.TOKEN (ParserData.LrTable.T 69,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_LI (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 70,( +ParserData.MlyValue.START_LI i,p1,p2)) +fun END_LI (p1,p2) = Token.TOKEN (ParserData.LrTable.T 71,( +ParserData.MlyValue.VOID,p1,p2)) +fun TAG_LINK (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 72,( +ParserData.MlyValue.TAG_LINK i,p1,p2)) +fun START_MAP (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 73,( +ParserData.MlyValue.START_MAP i,p1,p2)) +fun END_MAP (p1,p2) = Token.TOKEN (ParserData.LrTable.T 74,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_MENU (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 75,( +ParserData.MlyValue.START_MENU i,p1,p2)) +fun END_MENU (p1,p2) = Token.TOKEN (ParserData.LrTable.T 76,( +ParserData.MlyValue.VOID,p1,p2)) +fun TAG_META (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 77,( +ParserData.MlyValue.TAG_META i,p1,p2)) +fun START_OL (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 78,( +ParserData.MlyValue.START_OL i,p1,p2)) +fun END_OL (p1,p2) = Token.TOKEN (ParserData.LrTable.T 79,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_OPTION (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 80,( +ParserData.MlyValue.START_OPTION i,p1,p2)) +fun END_OPTION (p1,p2) = Token.TOKEN (ParserData.LrTable.T 81,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_P (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 82,( +ParserData.MlyValue.START_P i,p1,p2)) +fun END_P (p1,p2) = Token.TOKEN (ParserData.LrTable.T 83,( +ParserData.MlyValue.VOID,p1,p2)) +fun TAG_PARAM (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 84,( +ParserData.MlyValue.TAG_PARAM i,p1,p2)) +fun START_PRE (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 85,( +ParserData.MlyValue.START_PRE i,p1,p2)) +fun END_PRE (p1,p2) = Token.TOKEN (ParserData.LrTable.T 86,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_SAMP (p1,p2) = Token.TOKEN (ParserData.LrTable.T 87,( +ParserData.MlyValue.VOID,p1,p2)) +fun END_SAMP (p1,p2) = Token.TOKEN (ParserData.LrTable.T 88,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_SCRIPT (p1,p2) = Token.TOKEN (ParserData.LrTable.T 89,( +ParserData.MlyValue.VOID,p1,p2)) +fun END_SCRIPT (p1,p2) = Token.TOKEN (ParserData.LrTable.T 90,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_SELECT (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 91,( +ParserData.MlyValue.START_SELECT i,p1,p2)) +fun END_SELECT (p1,p2) = Token.TOKEN (ParserData.LrTable.T 92,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_SMALL (p1,p2) = Token.TOKEN (ParserData.LrTable.T 93,( +ParserData.MlyValue.VOID,p1,p2)) +fun END_SMALL (p1,p2) = Token.TOKEN (ParserData.LrTable.T 94,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_STRIKE (p1,p2) = Token.TOKEN (ParserData.LrTable.T 95,( +ParserData.MlyValue.VOID,p1,p2)) +fun END_STRIKE (p1,p2) = Token.TOKEN (ParserData.LrTable.T 96,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_STRONG (p1,p2) = Token.TOKEN (ParserData.LrTable.T 97,( +ParserData.MlyValue.VOID,p1,p2)) +fun END_STRONG (p1,p2) = Token.TOKEN (ParserData.LrTable.T 98,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_STYLE (p1,p2) = Token.TOKEN (ParserData.LrTable.T 99,( +ParserData.MlyValue.VOID,p1,p2)) +fun END_STYLE (p1,p2) = Token.TOKEN (ParserData.LrTable.T 100,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_SUB (p1,p2) = Token.TOKEN (ParserData.LrTable.T 101,( +ParserData.MlyValue.VOID,p1,p2)) +fun END_SUB (p1,p2) = Token.TOKEN (ParserData.LrTable.T 102,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_SUP (p1,p2) = Token.TOKEN (ParserData.LrTable.T 103,( +ParserData.MlyValue.VOID,p1,p2)) +fun END_SUP (p1,p2) = Token.TOKEN (ParserData.LrTable.T 104,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_TABLE (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 105,( +ParserData.MlyValue.START_TABLE i,p1,p2)) +fun END_TABLE (p1,p2) = Token.TOKEN (ParserData.LrTable.T 106,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_TD (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 107,( +ParserData.MlyValue.START_TD i,p1,p2)) +fun END_TD (p1,p2) = Token.TOKEN (ParserData.LrTable.T 108,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_TEXTAREA (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 109,( +ParserData.MlyValue.START_TEXTAREA i,p1,p2)) +fun END_TEXTAREA (p1,p2) = Token.TOKEN (ParserData.LrTable.T 110,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_TH (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 111,( +ParserData.MlyValue.START_TH i,p1,p2)) +fun END_TH (p1,p2) = Token.TOKEN (ParserData.LrTable.T 112,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_TITLE (p1,p2) = Token.TOKEN (ParserData.LrTable.T 113,( +ParserData.MlyValue.VOID,p1,p2)) +fun END_TITLE (p1,p2) = Token.TOKEN (ParserData.LrTable.T 114,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_TR (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 115,( +ParserData.MlyValue.START_TR i,p1,p2)) +fun END_TR (p1,p2) = Token.TOKEN (ParserData.LrTable.T 116,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_TT (p1,p2) = Token.TOKEN (ParserData.LrTable.T 117,( +ParserData.MlyValue.VOID,p1,p2)) +fun END_TT (p1,p2) = Token.TOKEN (ParserData.LrTable.T 118,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_U (p1,p2) = Token.TOKEN (ParserData.LrTable.T 119,( +ParserData.MlyValue.VOID,p1,p2)) +fun END_U (p1,p2) = Token.TOKEN (ParserData.LrTable.T 120,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_UL (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 121,( +ParserData.MlyValue.START_UL i,p1,p2)) +fun END_UL (p1,p2) = Token.TOKEN (ParserData.LrTable.T 122,( +ParserData.MlyValue.VOID,p1,p2)) +fun START_VAR (p1,p2) = Token.TOKEN (ParserData.LrTable.T 123,( +ParserData.MlyValue.VOID,p1,p2)) +fun END_VAR (p1,p2) = Token.TOKEN (ParserData.LrTable.T 124,( +ParserData.MlyValue.VOID,p1,p2)) +fun PCDATA (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 125,( +ParserData.MlyValue.PCDATA i,p1,p2)) +fun CHAR_REF (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 126,( +ParserData.MlyValue.CHAR_REF i,p1,p2)) +fun ENTITY_REF (i,p1,p2) = Token.TOKEN (ParserData.LrTable.T 127,( +ParserData.MlyValue.ENTITY_REF i,p1,p2)) +end +end diff --git a/smlnj-lib/HTML/html-lex b/smlnj-lib/HTML/html-lex new file mode 100644 index 0000000..c7ac928 --- /dev/null +++ b/smlnj-lib/HTML/html-lex @@ -0,0 +1,131 @@ +(* html-lex + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * COPYRIGHT (c) 1996 AT&T Research. + * + * A scanner for HTML. + * + * TODO: + * Recognize the DOCTYPE element + * <!DOCTYPE HTML PUBLIC "..."> + * Clean-up the scanning of start tags (do we need Err?). + * Whitespace in PRE elements should be preserved, but how? + *) + +structure T = Tokens +structure Elems = HTMLElementsFn ( + structure Tokens = Tokens + structure Err = Err + structure HTMLAttrs = HTMLAttrs) + +type pos = int +type svalue = T.svalue +type arg = (((string * int * int) -> unit) * string option) +type ('a, 'b) token = ('a, 'b) T.token +type lexresult= (svalue, pos) token + +fun eof _ = Tokens.EOF(0, 0) + +(* a buffer for collecting a string piecewise *) +val buffer = ref ([] : string list) +fun addStr s = (buffer := s :: !buffer) +fun getStr () = (String.concat(List.rev(! buffer)) before (buffer := [])) + +%% + +%s COM1 COM2 STAG; + +%header (functor HTMLLexFn ( + structure Tokens : HTML_TOKENS + structure Err : HTML_ERROR + structure HTMLAttrs : HTML_ATTRS)); + +%arg (errorFn, file); + +%full +%count + +alpha=[A-Za-z]; +digit=[0-9]; +namechar=[-A-Za-z0-9.]; +tag=({alpha}{namechar}*); +ws = [\ \t]; + +%% + +<INITIAL>"<"{tag} + => (addStr yytext; YYBEGIN STAG; continue()); +<STAG>">" + => (addStr yytext; + YYBEGIN INITIAL; + case Elems.startTag file (getStr(), !yylineno, !yylineno) + of NONE => continue() + | (SOME tag) => tag + (* end case *)); +<STAG>\n + => (addStr " "; continue()); +<STAG>{ws}+ + => (addStr yytext; continue()); +<STAG>{namechar}+ + => (addStr yytext; continue()); +<STAG>"=" + => (addStr yytext; continue()); +<STAG>"\""[^\"\n]*"\"" + => (addStr yytext; continue()); +<STAG>"'"[^'\n]*"'" + => (addStr yytext; continue()); +<STAG>. + => (addStr yytext; continue()); + +<INITIAL>"</"{tag}{ws}*">" + => (case Elems.endTag file (yytext, !yylineno, !yylineno) + of NONE => continue() + | (SOME tag) => tag + (* end case *)); + +<INITIAL>"<!--" + => (YYBEGIN COM1; continue()); +<COM1>"--" + => (YYBEGIN COM2; continue()); +<COM1>\n + => (continue()); +<COM1>. + => (continue()); +<COM2>"--" + => (YYBEGIN COM1; continue()); +<COM2>">" + => (YYBEGIN INITIAL; continue()); +<COM2>\n + => (continue()); +<COM2>{ws} + => (continue()); +<COM2>. + => (errorFn("bad comment syntax", !yylineno, !yylineno+1); + YYBEGIN INITIAL; + continue()); + +<INITIAL>"&#"[A-Za-z]+";" + => ( +(** At some point, we should support &#SPACE; and &#TAB; **) + continue()); + +<INITIAL>"&#"[0-9]+";" + => (T.CHAR_REF(yytext, !yylineno, !yylineno)); + +<INITIAL>"&"{tag}";" + => (T.ENTITY_REF(yytext, !yylineno, !yylineno)); + +<INITIAL>"\n" + => (continue()); +<INITIAL>{ws} + => (continue()); + +<INITIAL>[^<]+ + => (T.PCDATA(yytext, !yylineno, !yylineno)); +<INITIAL>. + => (errorFn(concat[ + "bogus character #\"", Char.toString(String.sub(yytext, 0)), + "\" in PCDATA\n" + ], !yylineno, !yylineno+1); + continue()); + diff --git a/smlnj-lib/HTML/html-lex.sml b/smlnj-lib/HTML/html-lex.sml new file mode 100644 index 0000000..949565a --- /dev/null +++ b/smlnj-lib/HTML/html-lex.sml @@ -0,0 +1,995 @@ +functor HTMLLexFn ( + structure Tokens : HTML_TOKENS + structure Err : HTML_ERROR + structure HTMLAttrs : HTML_ATTRS) = struct + + structure yyInput : sig + + type stream + val mkStream : (int -> string) -> stream + val fromStream : TextIO.StreamIO.instream -> stream + val getc : stream -> (Char.char * stream) option + val getpos : stream -> int + val getlineNo : stream -> int + val subtract : stream * stream -> string + val eof : stream -> bool + val lastWasNL : stream -> bool + + end = struct + + structure TIO = TextIO + structure TSIO = TIO.StreamIO + structure TPIO = TextPrimIO + + datatype stream = Stream of { + strm : TSIO.instream, + id : int, (* track which streams originated + * from the same stream *) + pos : int, + lineNo : int, + lastWasNL : bool + } + + local + val next = ref 0 + in + fun nextId() = !next before (next := !next + 1) + end + + val initPos = 2 (* ml-lex bug compatibility *) + + fun mkStream inputN = let + val strm = TSIO.mkInstream + (TPIO.RD { + name = "lexgen", + chunkSize = 4096, + readVec = SOME inputN, + readArr = NONE, + readVecNB = NONE, + readArrNB = NONE, + block = NONE, + canInput = NONE, + avail = (fn () => NONE), + getPos = NONE, + setPos = NONE, + endPos = NONE, + verifyPos = NONE, + close = (fn () => ()), + ioDesc = NONE + }, "") + in + Stream {strm = strm, id = nextId(), pos = initPos, lineNo = 1, + lastWasNL = true} + end + + fun fromStream strm = Stream { + strm = strm, id = nextId(), pos = initPos, lineNo = 1, lastWasNL = true + } + + fun getc (Stream {strm, pos, id, lineNo, ...}) = (case TSIO.input1 strm + of NONE => NONE + | SOME (c, strm') => + SOME (c, Stream { + strm = strm', + pos = pos+1, + id = id, + lineNo = lineNo + + (if c = #"\n" then 1 else 0), + lastWasNL = (c = #"\n") + }) + (* end case*)) + + fun getpos (Stream {pos, ...}) = pos + + fun getlineNo (Stream {lineNo, ...}) = lineNo + + fun subtract (new, old) = let + val Stream {strm = strm, pos = oldPos, id = oldId, ...} = old + val Stream {pos = newPos, id = newId, ...} = new + val (diff, _) = if newId = oldId andalso newPos >= oldPos + then TSIO.inputN (strm, newPos - oldPos) + else raise Fail + "BUG: yyInput: attempted to subtract incompatible streams" + in + diff + end + + fun eof s = not (isSome (getc s)) + + fun lastWasNL (Stream {lastWasNL, ...}) = lastWasNL + + end + + datatype yystart_state = +COM2 | COM1 | STAG | INITIAL + structure UserDeclarations = + struct + +(* html-lex + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * COPYRIGHT (c) 1996 AT&T Research. + * + * A scanner for HTML. + * + * TODO: + * Recognize the DOCTYPE element + * <!DOCTYPE HTML PUBLIC "..."> + * Clean-up the scanning of start tags (do we need Err?). + * Whitespace in PRE elements should be preserved, but how? + *) + +structure T = Tokens +structure Elems = HTMLElementsFn ( + structure Tokens = Tokens + structure Err = Err + structure HTMLAttrs = HTMLAttrs) + +type pos = int +type svalue = T.svalue +type arg = (((string * int * int) -> unit) * string option) +type ('a, 'b) token = ('a, 'b) T.token +type lexresult= (svalue, pos) token + +fun eof _ = Tokens.EOF(0, 0) + +(* a buffer for collecting a string piecewise *) +val buffer = ref ([] : string list) +fun addStr s = (buffer := s :: !buffer) +fun getStr () = (String.concat(List.rev(! buffer)) before (buffer := [])) + + + + end + + datatype yymatch + = yyNO_MATCH + | yyMATCH of yyInput.stream * action * yymatch + withtype action = yyInput.stream * yymatch -> UserDeclarations.lexresult + + local + + val yytable = +Vector.fromList [] + fun mk yyins = let + (* current start state *) + val yyss = ref INITIAL + fun YYBEGIN ss = (yyss := ss) + (* current input stream *) + val yystrm = ref yyins + (* get one char of input *) + val yygetc = yyInput.getc + (* create yytext *) + fun yymktext(strm) = yyInput.subtract (strm, !yystrm) + open UserDeclarations + fun lex +(yyarg as (errorFn, file)) () = let + fun continue() = let + val yylastwasn = yyInput.lastWasNL (!yystrm) + fun yystuck (yyNO_MATCH) = raise Fail "stuck state" + | yystuck (yyMATCH (strm, action, old)) = + action (strm, old) + val yypos = yyInput.getpos (!yystrm) + val yygetlineNo = yyInput.getlineNo + fun yyactsToMatches (strm, [], oldMatches) = oldMatches + | yyactsToMatches (strm, act::acts, oldMatches) = + yyMATCH (strm, act, yyactsToMatches (strm, acts, oldMatches)) + fun yygo actTable = + (fn (~1, _, oldMatches) => yystuck oldMatches + | (curState, strm, oldMatches) => let + val (transitions, finals') = Vector.sub (yytable, curState) + val finals = List.map (fn i => Vector.sub (actTable, i)) finals' + fun tryfinal() = + yystuck (yyactsToMatches (strm, finals, oldMatches)) + fun find (c, []) = NONE + | find (c, (c1, c2, s)::ts) = + if c1 <= c andalso c <= c2 then SOME s + else find (c, ts) + in case yygetc strm + of SOME(c, strm') => + (case find (c, transitions) + of NONE => tryfinal() + | SOME n => + yygo actTable + (n, strm', + yyactsToMatches (strm, finals, oldMatches))) + | NONE => tryfinal() + end) + in +let +fun yyAction0 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (addStr yytext; YYBEGIN STAG; continue()) + end +fun yyAction1 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + val yytext = yymktext(strm) + in + yystrm := strm; + (addStr yytext; + YYBEGIN INITIAL; + case Elems.startTag file (getStr(), !yylineno, !yylineno) + of NONE => continue() + | (SOME tag) => tag + (* end case *)) + end +fun yyAction2 (strm, lastMatch : yymatch) = (yystrm := strm; + (addStr " "; continue())) +fun yyAction3 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (addStr yytext; continue()) + end +fun yyAction4 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (addStr yytext; continue()) + end +fun yyAction5 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (addStr yytext; continue()) + end +fun yyAction6 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (addStr yytext; continue()) + end +fun yyAction7 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (addStr yytext; continue()) + end +fun yyAction8 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; (addStr yytext; continue()) + end +fun yyAction9 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + val yytext = yymktext(strm) + in + yystrm := strm; + (case Elems.endTag file (yytext, !yylineno, !yylineno) + of NONE => continue() + | (SOME tag) => tag + (* end case *)) + end +fun yyAction10 (strm, lastMatch : yymatch) = (yystrm := strm; + (YYBEGIN COM1; continue())) +fun yyAction11 (strm, lastMatch : yymatch) = (yystrm := strm; + (YYBEGIN COM2; continue())) +fun yyAction12 (strm, lastMatch : yymatch) = (yystrm := strm; (continue())) +fun yyAction13 (strm, lastMatch : yymatch) = (yystrm := strm; (continue())) +fun yyAction14 (strm, lastMatch : yymatch) = (yystrm := strm; + (YYBEGIN COM1; continue())) +fun yyAction15 (strm, lastMatch : yymatch) = (yystrm := strm; + (YYBEGIN INITIAL; continue())) +fun yyAction16 (strm, lastMatch : yymatch) = (yystrm := strm; (continue())) +fun yyAction17 (strm, lastMatch : yymatch) = (yystrm := strm; (continue())) +fun yyAction18 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + in + yystrm := strm; + (errorFn("bad comment syntax", !yylineno, !yylineno+1); + YYBEGIN INITIAL; + continue()) + end +fun yyAction19 (strm, lastMatch : yymatch) = (yystrm := strm; + ( +(** At some point, we should support &#SPACE; and &#TAB; **) + continue())) +fun yyAction20 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + val yytext = yymktext(strm) + in + yystrm := strm; (T.CHAR_REF(yytext, !yylineno, !yylineno)) + end +fun yyAction21 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + val yytext = yymktext(strm) + in + yystrm := strm; (T.ENTITY_REF(yytext, !yylineno, !yylineno)) + end +fun yyAction22 (strm, lastMatch : yymatch) = (yystrm := strm; (continue())) +fun yyAction23 (strm, lastMatch : yymatch) = (yystrm := strm; (continue())) +fun yyAction24 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + val yytext = yymktext(strm) + in + yystrm := strm; (T.PCDATA(yytext, !yylineno, !yylineno)) + end +fun yyAction25 (strm, lastMatch : yymatch) = let + val yylineno = ref(yygetlineNo(!(yystrm))) + val yytext = yymktext(strm) + in + yystrm := strm; + (errorFn(concat[ + "bogus character #\"", Char.toString(String.sub(yytext, 0)), + "\" in PCDATA\n" + ], !yylineno, !yylineno+1); + continue()) + end +fun yyQ35 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction0(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #":" + then yyAction0(strm, yyNO_MATCH) + else if inp < #":" + then if inp = #"/" + then yyAction0(strm, yyNO_MATCH) + else if inp < #"/" + then if inp <= #"," + then yyAction0(strm, yyNO_MATCH) + else yyQ35(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else yyQ35(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp = #"[" + then yyAction0(strm, yyNO_MATCH) + else if inp < #"[" + then if inp <= #"@" + then yyAction0(strm, yyNO_MATCH) + else yyQ35(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp = #"a" + then yyQ35(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp < #"a" + then yyAction0(strm, yyNO_MATCH) + else if inp <= #"z" + then yyQ35(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else yyAction0(strm, yyNO_MATCH) + (* end case *)) +fun yyQ38 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction9(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction9(strm, yyNO_MATCH) + (* end case *)) +fun yyQ37 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #" " + then yyQ37(strm', lastMatch) + else if inp < #" " + then if inp = #"\t" + then yyQ37(strm', lastMatch) + else yystuck(lastMatch) + else if inp = #">" + then yyQ38(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ36 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"0" + then yyQ36(strm', lastMatch) + else if inp < #"0" + then if inp = #" " + then yyQ37(strm', lastMatch) + else if inp < #" " + then if inp = #"\t" + then yyQ37(strm', lastMatch) + else yystuck(lastMatch) + else if inp = #"-" + then yyQ36(strm', lastMatch) + else if inp < #"-" + then yystuck(lastMatch) + else if inp = #"/" + then yystuck(lastMatch) + else yyQ36(strm', lastMatch) + else if inp = #"A" + then yyQ36(strm', lastMatch) + else if inp < #"A" + then if inp = #">" + then yyQ38(strm', lastMatch) + else if inp < #">" + then if inp <= #"9" + then yyQ36(strm', lastMatch) + else yystuck(lastMatch) + else yystuck(lastMatch) + else if inp = #"a" + then yyQ36(strm', lastMatch) + else if inp < #"a" + then if inp <= #"Z" + then yyQ36(strm', lastMatch) + else yystuck(lastMatch) + else if inp <= #"z" + then yyQ36(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ34 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"[" + then yystuck(lastMatch) + else if inp < #"[" + then if inp <= #"@" + then yystuck(lastMatch) + else yyQ36(strm', lastMatch) + else if inp = #"a" + then yyQ36(strm', lastMatch) + else if inp < #"a" + then yystuck(lastMatch) + else if inp <= #"z" + then yyQ36(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ40 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction10(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction10(strm, yyNO_MATCH) + (* end case *)) +fun yyQ39 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"-" + then yyQ40(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ33 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"-" + then yyQ39(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ32 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction25(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"0" + then yyAction25(strm, yyNO_MATCH) + else if inp < #"0" + then if inp = #"\"" + then yyAction25(strm, yyNO_MATCH) + else if inp < #"\"" + then if inp = #"!" + then yyQ33(strm', yyMATCH(strm, yyAction25, yyNO_MATCH)) + else yyAction25(strm, yyNO_MATCH) + else if inp = #"/" + then yyQ34(strm', yyMATCH(strm, yyAction25, yyNO_MATCH)) + else yyAction25(strm, yyNO_MATCH) + else if inp = #"[" + then yyAction25(strm, yyNO_MATCH) + else if inp < #"[" + then if inp <= #"@" + then yyAction25(strm, yyNO_MATCH) + else yyQ35(strm', yyMATCH(strm, yyAction25, yyNO_MATCH)) + else if inp = #"a" + then yyQ35(strm', yyMATCH(strm, yyAction25, yyNO_MATCH)) + else if inp < #"a" + then yyAction25(strm, yyNO_MATCH) + else if inp <= #"z" + then yyQ35(strm', yyMATCH(strm, yyAction25, yyNO_MATCH)) + else yyAction25(strm, yyNO_MATCH) + (* end case *)) +fun yyQ41 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction24(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"<" + then yyAction24(strm, yyNO_MATCH) + else yyQ41(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + (* end case *)) +fun yyQ44 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction21(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"<" + then yyAction21(strm, yyNO_MATCH) + else yyQ41(strm', yyMATCH(strm, yyAction21, yyNO_MATCH)) + (* end case *)) +fun yyQ43 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction24(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"<" + then yyAction24(strm, yyNO_MATCH) + else if inp < #"<" + then if inp = #"0" + then yyQ43(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else if inp < #"0" + then if inp = #"-" + then yyQ43(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else if inp < #"-" + then yyQ41(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else if inp = #"/" + then yyQ41(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else yyQ43(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else if inp = #":" + then yyQ41(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else if inp = #";" + then yyQ44(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else yyQ43(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else if inp = #"[" + then yyQ41(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else if inp < #"[" + then if inp <= #"@" + then yyQ41(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else yyQ43(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else if inp = #"a" + then yyQ43(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else if inp < #"a" + then yyQ41(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else if inp <= #"z" + then yyQ43(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else yyQ41(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + (* end case *)) +fun yyQ47 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction19(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"<" + then yyAction19(strm, yyNO_MATCH) + else yyQ41(strm', yyMATCH(strm, yyAction19, yyNO_MATCH)) + (* end case *)) +fun yyQ46 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction24(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"A" + then yyQ46(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else if inp < #"A" + then if inp = #"<" + then yyAction24(strm, yyNO_MATCH) + else if inp < #"<" + then if inp = #";" + then yyQ47(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else yyQ41(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else yyQ41(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else if inp = #"a" + then yyQ46(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else if inp < #"a" + then if inp <= #"Z" + then yyQ46(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else yyQ41(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else if inp <= #"z" + then yyQ46(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else yyQ41(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + (* end case *)) +fun yyQ48 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction20(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"<" + then yyAction20(strm, yyNO_MATCH) + else yyQ41(strm', yyMATCH(strm, yyAction20, yyNO_MATCH)) + (* end case *)) +fun yyQ45 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction24(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #";" + then yyQ48(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else if inp < #";" + then if inp = #"0" + then yyQ45(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else if inp < #"0" + then yyQ41(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else if inp = #":" + then yyQ41(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else yyQ45(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else if inp = #"<" + then yyAction24(strm, yyNO_MATCH) + else yyQ41(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + (* end case *)) +fun yyQ42 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction24(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"=" + then yyQ41(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else if inp < #"=" + then if inp = #":" + then yyQ41(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else if inp < #":" + then if inp <= #"/" + then yyQ41(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else yyQ45(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else if inp = #"<" + then yyAction24(strm, yyNO_MATCH) + else yyQ41(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else if inp = #"[" + then yyQ41(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else if inp < #"[" + then if inp <= #"@" + then yyQ41(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else yyQ46(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else if inp = #"a" + then yyQ46(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else if inp < #"a" + then yyQ41(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else if inp <= #"z" + then yyQ46(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else yyQ41(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + (* end case *)) +fun yyQ31 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction24(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"=" + then yyQ41(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else if inp < #"=" + then if inp = #"$" + then yyQ41(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else if inp < #"$" + then if inp = #"#" + then yyQ42(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else yyQ41(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else if inp = #"<" + then yyAction24(strm, yyNO_MATCH) + else yyQ41(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else if inp = #"[" + then yyQ41(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else if inp < #"[" + then if inp <= #"@" + then yyQ41(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else yyQ43(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else if inp = #"a" + then yyQ43(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else if inp < #"a" + then yyQ41(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else if inp <= #"z" + then yyQ43(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + else yyQ41(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + (* end case *)) +fun yyQ30 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction22(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"<" + then yyAction22(strm, yyNO_MATCH) + else yyQ41(strm', yyMATCH(strm, yyAction22, yyNO_MATCH)) + (* end case *)) +fun yyQ29 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction23(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"<" + then yyAction23(strm, yyNO_MATCH) + else yyQ41(strm', yyMATCH(strm, yyAction23, yyNO_MATCH)) + (* end case *)) +fun yyQ28 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction24(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"<" + then yyAction24(strm, yyNO_MATCH) + else yyQ41(strm', yyMATCH(strm, yyAction24, yyNO_MATCH)) + (* end case *)) +fun yyQ3 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"!" + then yyQ28(strm', lastMatch) + else if inp < #"!" + then if inp = #"\n" + then yyQ30(strm', lastMatch) + else if inp < #"\n" + then if inp = #"\t" + then yyQ29(strm', lastMatch) + else yyQ28(strm', lastMatch) + else if inp = #" " + then yyQ29(strm', lastMatch) + else yyQ28(strm', lastMatch) + else if inp = #"'" + then yyQ28(strm', lastMatch) + else if inp < #"'" + then if inp = #"&" + then yyQ31(strm', lastMatch) + else yyQ28(strm', lastMatch) + else if inp = #"<" + then yyQ32(strm', lastMatch) + else yyQ28(strm', lastMatch) + (* end case *)) +fun yyQ21 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction1(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction1(strm, yyNO_MATCH) + (* end case *)) +fun yyQ20 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction5(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction5(strm, yyNO_MATCH) + (* end case *)) +fun yyQ22 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction4(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #":" + then yyAction4(strm, yyNO_MATCH) + else if inp < #":" + then if inp = #"/" + then yyAction4(strm, yyNO_MATCH) + else if inp < #"/" + then if inp <= #"," + then yyAction4(strm, yyNO_MATCH) + else yyQ22(strm', yyMATCH(strm, yyAction4, yyNO_MATCH)) + else yyQ22(strm', yyMATCH(strm, yyAction4, yyNO_MATCH)) + else if inp = #"[" + then yyAction4(strm, yyNO_MATCH) + else if inp < #"[" + then if inp <= #"@" + then yyAction4(strm, yyNO_MATCH) + else yyQ22(strm', yyMATCH(strm, yyAction4, yyNO_MATCH)) + else if inp = #"a" + then yyQ22(strm', yyMATCH(strm, yyAction4, yyNO_MATCH)) + else if inp < #"a" + then yyAction4(strm, yyNO_MATCH) + else if inp <= #"z" + then yyQ22(strm', yyMATCH(strm, yyAction4, yyNO_MATCH)) + else yyAction4(strm, yyNO_MATCH) + (* end case *)) +fun yyQ19 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction4(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #":" + then yyAction4(strm, yyNO_MATCH) + else if inp < #":" + then if inp = #"/" + then yyAction4(strm, yyNO_MATCH) + else if inp < #"/" + then if inp <= #"," + then yyAction4(strm, yyNO_MATCH) + else yyQ22(strm', yyMATCH(strm, yyAction4, yyNO_MATCH)) + else yyQ22(strm', yyMATCH(strm, yyAction4, yyNO_MATCH)) + else if inp = #"[" + then yyAction4(strm, yyNO_MATCH) + else if inp < #"[" + then if inp <= #"@" + then yyAction4(strm, yyNO_MATCH) + else yyQ22(strm', yyMATCH(strm, yyAction4, yyNO_MATCH)) + else if inp = #"a" + then yyQ22(strm', yyMATCH(strm, yyAction4, yyNO_MATCH)) + else if inp < #"a" + then yyAction4(strm, yyNO_MATCH) + else if inp <= #"z" + then yyQ22(strm', yyMATCH(strm, yyAction4, yyNO_MATCH)) + else yyAction4(strm, yyNO_MATCH) + (* end case *)) +fun yyQ24 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction7(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction7(strm, yyNO_MATCH) + (* end case *)) +fun yyQ23 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"\v" + then yyQ23(strm', lastMatch) + else if inp < #"\v" + then if inp = #"\n" + then yystuck(lastMatch) + else yyQ23(strm', lastMatch) + else if inp = #"'" + then yyQ24(strm', lastMatch) + else yyQ23(strm', lastMatch) + (* end case *)) +fun yyQ18 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction8(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\v" + then yyQ23(strm', yyMATCH(strm, yyAction8, yyNO_MATCH)) + else if inp < #"\v" + then if inp = #"\n" + then yyAction8(strm, yyNO_MATCH) + else yyQ23(strm', yyMATCH(strm, yyAction8, yyNO_MATCH)) + else if inp = #"'" + then yyQ24(strm', yyMATCH(strm, yyAction8, yyNO_MATCH)) + else yyQ23(strm', yyMATCH(strm, yyAction8, yyNO_MATCH)) + (* end case *)) +fun yyQ26 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction6(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction6(strm, yyNO_MATCH) + (* end case *)) +fun yyQ25 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"\v" + then yyQ25(strm', lastMatch) + else if inp < #"\v" + then if inp = #"\n" + then yystuck(lastMatch) + else yyQ25(strm', lastMatch) + else if inp = #"\"" + then yyQ26(strm', lastMatch) + else yyQ25(strm', lastMatch) + (* end case *)) +fun yyQ17 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction8(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\v" + then yyQ25(strm', yyMATCH(strm, yyAction8, yyNO_MATCH)) + else if inp < #"\v" + then if inp = #"\n" + then yyAction8(strm, yyNO_MATCH) + else yyQ25(strm', yyMATCH(strm, yyAction8, yyNO_MATCH)) + else if inp = #"\"" + then yyQ26(strm', yyMATCH(strm, yyAction8, yyNO_MATCH)) + else yyQ25(strm', yyMATCH(strm, yyAction8, yyNO_MATCH)) + (* end case *)) +fun yyQ16 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction2(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction2(strm, yyNO_MATCH) + (* end case *)) +fun yyQ27 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction3(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\n" + then yyAction3(strm, yyNO_MATCH) + else if inp < #"\n" + then if inp = #"\t" + then yyQ27(strm', yyMATCH(strm, yyAction3, yyNO_MATCH)) + else yyAction3(strm, yyNO_MATCH) + else if inp = #" " + then yyQ27(strm', yyMATCH(strm, yyAction3, yyNO_MATCH)) + else yyAction3(strm, yyNO_MATCH) + (* end case *)) +fun yyQ15 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction3(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"\n" + then yyAction3(strm, yyNO_MATCH) + else if inp < #"\n" + then if inp = #"\t" + then yyQ27(strm', yyMATCH(strm, yyAction3, yyNO_MATCH)) + else yyAction3(strm, yyNO_MATCH) + else if inp = #" " + then yyQ27(strm', yyMATCH(strm, yyAction3, yyNO_MATCH)) + else yyAction3(strm, yyNO_MATCH) + (* end case *)) +fun yyQ14 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction8(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction8(strm, yyNO_MATCH) + (* end case *)) +fun yyQ2 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"-" + then yyQ19(strm', lastMatch) + else if inp < #"-" + then if inp = #"!" + then yyQ14(strm', lastMatch) + else if inp < #"!" + then if inp = #"\n" + then yyQ16(strm', lastMatch) + else if inp < #"\n" + then if inp = #"\t" + then yyQ15(strm', lastMatch) + else yyQ14(strm', lastMatch) + else if inp = #" " + then yyQ15(strm', lastMatch) + else yyQ14(strm', lastMatch) + else if inp = #"'" + then yyQ18(strm', lastMatch) + else if inp < #"'" + then if inp = #"\"" + then yyQ17(strm', lastMatch) + else yyQ14(strm', lastMatch) + else yyQ14(strm', lastMatch) + else if inp = #">" + then yyQ21(strm', lastMatch) + else if inp < #">" + then if inp = #"0" + then yyQ19(strm', lastMatch) + else if inp < #"0" + then if inp = #"/" + then yyQ14(strm', lastMatch) + else yyQ19(strm', lastMatch) + else if inp = #":" + then yyQ14(strm', lastMatch) + else if inp < #":" + then yyQ19(strm', lastMatch) + else if inp = #"=" + then yyQ20(strm', lastMatch) + else yyQ14(strm', lastMatch) + else if inp = #"[" + then yyQ14(strm', lastMatch) + else if inp < #"[" + then if inp <= #"@" + then yyQ14(strm', lastMatch) + else yyQ19(strm', lastMatch) + else if inp = #"a" + then yyQ19(strm', lastMatch) + else if inp < #"a" + then yyQ14(strm', lastMatch) + else if inp <= #"z" + then yyQ19(strm', lastMatch) + else yyQ14(strm', lastMatch) + (* end case *)) +fun yyQ13 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction11(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction11(strm, yyNO_MATCH) + (* end case *)) +fun yyQ12 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction13(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"-" + then yyQ13(strm', yyMATCH(strm, yyAction13, yyNO_MATCH)) + else yyAction13(strm, yyNO_MATCH) + (* end case *)) +fun yyQ11 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction12(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction12(strm, yyNO_MATCH) + (* end case *)) +fun yyQ10 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction13(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction13(strm, yyNO_MATCH) + (* end case *)) +fun yyQ1 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"\v" + then yyQ10(strm', lastMatch) + else if inp < #"\v" + then if inp = #"\n" + then yyQ11(strm', lastMatch) + else yyQ10(strm', lastMatch) + else if inp = #"-" + then yyQ12(strm', lastMatch) + else yyQ10(strm', lastMatch) + (* end case *)) +fun yyQ8 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction15(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction15(strm, yyNO_MATCH) + (* end case *)) +fun yyQ9 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction14(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction14(strm, yyNO_MATCH) + (* end case *)) +fun yyQ7 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction18(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = #"-" + then yyQ9(strm', yyMATCH(strm, yyAction18, yyNO_MATCH)) + else yyAction18(strm, yyNO_MATCH) + (* end case *)) +fun yyQ6 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction16(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction16(strm, yyNO_MATCH) + (* end case *)) +fun yyQ5 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction17(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction17(strm, yyNO_MATCH) + (* end case *)) +fun yyQ4 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction18(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction18(strm, yyNO_MATCH) + (* end case *)) +fun yyQ0 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if yyInput.eof(!(yystrm)) + then UserDeclarations.eof(yyarg) + else yystuck(lastMatch) + | SOME(inp, strm') => + if inp = #"!" + then yyQ4(strm', lastMatch) + else if inp < #"!" + then if inp = #"\n" + then yyQ6(strm', lastMatch) + else if inp < #"\n" + then if inp = #"\t" + then yyQ5(strm', lastMatch) + else yyQ4(strm', lastMatch) + else if inp = #" " + then yyQ5(strm', lastMatch) + else yyQ4(strm', lastMatch) + else if inp = #"." + then yyQ4(strm', lastMatch) + else if inp < #"." + then if inp = #"-" + then yyQ7(strm', lastMatch) + else yyQ4(strm', lastMatch) + else if inp = #">" + then yyQ8(strm', lastMatch) + else yyQ4(strm', lastMatch) + (* end case *)) +in + (case (!(yyss)) + of COM2 => yyQ0(!(yystrm), yyNO_MATCH) + | COM1 => yyQ1(!(yystrm), yyNO_MATCH) + | STAG => yyQ2(!(yystrm), yyNO_MATCH) + | INITIAL => yyQ3(!(yystrm), yyNO_MATCH) + (* end case *)) +end + end + in + continue() + handle IO.Io{cause, ...} => raise cause + end + in + lex + end + in + fun makeLexer yyinputN = mk (yyInput.mkStream yyinputN) + end + + end diff --git a/smlnj-lib/HTML/html-lib.cm b/smlnj-lib/HTML/html-lib.cm new file mode 100644 index 0000000..8968fa1 --- /dev/null +++ b/smlnj-lib/HTML/html-lib.cm @@ -0,0 +1,47 @@ +(* html-lib.cm + * + * COPYRIGHT (c) 1996 AT&T Research. + * + * Sources file for HTML parsing/pretty-printing library; part of the SML/NJ + * Library suite. + *) + +Library + signature HTML_ERROR + signature HTML + structure HTML + structure MakeHTML + structure PrHTML + structure HTMLDefaults + functor HTMLParserFn +is +#if defined(NEW_CM) + $/basis.cm + $/smlnj-lib.cm + $/ml-yacc-lib.cm +#else + ../Util/smlnj-lib.cm + ml-yacc-lib.cm +#endif + +#if defined(NO_PLUGINS) + html-lex.sml + html-gram.sig + html-gram.sml +#else + html-lex : MLLex + html-gram : MLYacc +#endif + + html-error-sig.sml + html-sig.sml + html.sml + html-attr-vals.sml + html-attrs-sig.sml + html-attrs-fn.sml + html-elements-fn.sml + html-parser-fn.sml + check-html-fn.sml + html-defaults.sml + make-html.sml + pr-html.sml diff --git a/smlnj-lib/HTML/html-parser-fn.sml b/smlnj-lib/HTML/html-parser-fn.sml new file mode 100644 index 0000000..126b5ec --- /dev/null +++ b/smlnj-lib/HTML/html-parser-fn.sml @@ -0,0 +1,53 @@ +(* html-parser-fn.sml + * + * COPYRIGHT (c) 1996 AT&T REsearch. + * + * This glues the lexer and parser together. + *) + +functor HTMLParserFn (Err : HTML_ERROR) : sig + + val parseFile : string -> HTML.html + + end = struct + + structure TIO = TextIO + + structure HTMLAttrs = HTMLAttrsFn(Err) + structure HTMLLrVals = HTMLLrValsFn( + structure Token = LrParser.Token + structure HTMLAttrs = HTMLAttrs) + structure Lex = HTMLLexFn( + structure Err = Err + structure Tokens = HTMLLrVals.Tokens + structure HTMLAttrs = HTMLAttrs) + structure Parser = JoinWithArg( + structure Lex= Lex + structure LrParser = LrParser + structure ParserData = HTMLLrVals.ParserData) + structure CheckHTML = CheckHTMLFn(Err) + + fun parseFile fname = let + (* build a context to hand to the HTMLAttrs build functions. *) + fun ctx lnum = {file = SOME fname, line=lnum} + fun lexError (msg, lnum, _) = + Err.lexError {file = SOME fname, line = lnum} msg + fun syntaxError (msg, lnum, _) = + Err.syntaxError {file = SOME fname, line = lnum} msg + val inStrm = TIO.openIn fname + fun close () = TIO.closeIn inStrm + val lexer = Parser.makeLexer (fn n => TIO.inputN(inStrm, n)) + (lexError, SOME fname) + val (result, _) = Parser.parse ( + 15, (* lookahead *) + lexer, + syntaxError, + ctx) + in + CheckHTML.check (ctx 0) result + handle ex => (close(); raise ex) + close(); + result + end + + end; diff --git a/smlnj-lib/HTML/html-sig.sml b/smlnj-lib/HTML/html-sig.sml new file mode 100644 index 0000000..90e3819 --- /dev/null +++ b/smlnj-lib/HTML/html-sig.sml @@ -0,0 +1,374 @@ +(* html-sig.sml + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * COPYRIGHT (c) 1996 AT&T Research. + * + * This file defines the abstract syntax of HTML documents. The + * AST follows the HTML 3.2 Proposed Standard. + *) + +signature HTML = + sig + + val htmlVersion : string (* = "3.2" *) + + (* the HTML data representations (these are all string) *) + type pcdata = string + type cdata = string + type url = string + type pixels = cdata + type name = string + type id = string + + (* the different types of HTTP methods *) + structure HttpMethod : sig + eqtype method + val get : method + val put : method + val toString : method -> string + val fromString : string -> method option + end + + (* the different types of INPUT elements *) + structure InputType : sig + eqtype ty + val text : ty + val password : ty + val checkbox : ty + val radio : ty + val submit : ty + val reset : ty + val file : ty + val hidden : ty + val image : ty + val toString : ty -> string + val fromString : string -> ty option + end + + (* alignment attributes for IMG, APPLET and INPUT elements *) + structure IAlign : sig + eqtype align + val top : align + val middle : align + val bottom : align + val left : align + val right : align + val toString : align -> string + val fromString : string -> align option + end + + structure HAlign : sig + eqtype align + val left : align + val center : align + val right : align + val toString : align -> string + val fromString : string -> align option + end + + structure CellVAlign : sig + eqtype align + val top : align + val middle : align + val bottom : align + val baseline : align + val toString : align -> string + val fromString : string -> align option + end + + structure CaptionAlign : sig + eqtype align + val top : align + val bottom : align + val toString : align -> string + val fromString : string -> align option + end + + structure ULStyle : sig + eqtype style + val disc : style + val square : style + val circle : style + val toString : style -> string + val fromString : string -> style option + end + + structure Shape : sig + eqtype shape + val rect : shape + val circle : shape + val poly : shape + val default : shape + val toString : shape -> string + val fromString : string -> shape option + end + + structure TextFlowCtl : sig + eqtype control + val left : control + val right : control + val all : control + val none : control + val toString : control -> string + val fromString : string -> control option + end + + datatype html = HTML of { + version : cdata option, + head : head_content list, + body : body + } + + and head_content + = Head_TITLE of pcdata + | Head_ISINDEX of {prompt : cdata option} + | Head_BASE of {href : url} + | Head_META of { + httpEquiv : name option, + name : name option, + content : cdata + } + | Head_LINK of { + id : id option, + href : url option, + rel : cdata option, + rev : cdata option, + title : cdata option + } + (* SCRIPT/STYLE elements are placeholders for the next version of HTML *) + | Head_SCRIPT of pcdata + | Head_STYLE of pcdata + + and body = BODY of { + background : url option, + bgcolor : cdata option, + text : cdata option, + link : cdata option, + vlink : cdata option, + alink : cdata option, + content : block + } + + and block + = BlockList of block list + | TextBlock of text + | Hn of { + n : int, + align : HAlign.align option, + content : text + } + (* NOTE: the content of an ADDRESS element is really (text | P)* *) + | ADDRESS of block + | P of { + align : HAlign.align option, + content : text + } + | UL of { + ty : ULStyle.style option, + compact : bool, + content : list_item list + } + | OL of { + ty : cdata option, + start : int option, + compact : bool, + content : list_item list + } + | DIR of { + compact : bool, + content : list_item list + } + | MENU of { + compact : bool, + content : list_item list + } + | DL of { + compact : bool, + content : {dt : text list, dd : block} list + } + | PRE of { + width : int option, + content : text + } + | DIV of { + align : HAlign.align, + content : block + } + | CENTER of block + | BLOCKQUOTE of block + | FORM of { + action : url option, + method : HttpMethod.method, + enctype : cdata option, + content : block (* -(FORM) *) + } + | ISINDEX of {prompt : cdata option} + | HR of { + align : HAlign.align option, + noshade : bool, + size : pixels option, + width : cdata option + } + | TABLE of { + align : HAlign.align option, + width : cdata option, + border : pixels option, + cellspacing : pixels option, + cellpadding : pixels option, + caption : caption option, + content : tr list + } + + and list_item = LI of { + ty : cdata option, + value : int option, + content : block + } + + (** table content **) + and caption = CAPTION of { + align : CaptionAlign.align option, + content : text + } + and tr = TR of { + align : HAlign.align option, + valign : CellVAlign.align option, + content : table_cell list + } + and table_cell + = TH of { + nowrap : bool, + rowspan : int option, + colspan : int option, + align : HAlign.align option, + valign : CellVAlign.align option, + width : pixels option, + height : pixels option, + content : block + } + | TD of { + nowrap : bool, + rowspan : int option, + colspan : int option, + align : HAlign.align option, + valign : CellVAlign.align option, + width : pixels option, + height : pixels option, + content : block + } + + (** Text **) + and text + = TextList of text list + | PCDATA of pcdata + | TT of text + | I of text + | B of text + | U of text + | STRIKE of text + | BIG of text + | SMALL of text + | SUB of text + | SUP of text + | EM of text + | STRONG of text + | DFN of text + | CODE of text + | SAMP of text + | KBD of text + | VAR of text + | CITE of text + | A of { + name : cdata option, + href : url option, + rel : cdata option, + rev : cdata option, + title : cdata option, + content : text (* -(A) *) + } + | IMG of { + src : url, + alt : cdata option, + align : IAlign.align option, + height : pixels option, + width : pixels option, + border : pixels option, + hspace : pixels option, + vspace : pixels option, + usemap : url option, + ismap : bool + } + | APPLET of { + codebase : url option, + code : cdata, + name : cdata option, + alt : cdata option, + align : IAlign.align option, + height : pixels option, + width : pixels option, + hspace : pixels option, + vspace : pixels option, + content : text + } + | PARAM of { (* applet parameter *) + name : name, + value : cdata option + } + | FONT of { + size : cdata option, + color : cdata option, + content : text + } + | BASEFONT of { + size : cdata option, + content : text + } + | BR of { + clear : TextFlowCtl.control option + } + | MAP of { + name : cdata option, + content : area list + } + | INPUT of { + ty : InputType.ty option, + name : cdata option, + value : cdata option, + checked : bool, + size : cdata option, + maxlength : int option, + src : url option, + align : IAlign.align option + } + | SELECT of { + name : cdata, + size : int option, + content : select_option list + } + | TEXTAREA of { + name : cdata, + rows : int, + cols : int, + content : pcdata + } + (* SCRIPT elements are placeholders for the next version of HTML *) + | SCRIPT of pcdata + + (* map areas *) + and area = AREA of { + shape : Shape.shape option, + coords : cdata option, + href : url option, + nohref : bool, + alt : cdata + } + + (* SELECT options *) + and select_option = OPTION of { + selected : bool, + value : cdata option, + content : pcdata + } + + end (* signature HTML *) + diff --git a/smlnj-lib/HTML/html.sml b/smlnj-lib/HTML/html.sml new file mode 100644 index 0000000..e8b2a25 --- /dev/null +++ b/smlnj-lib/HTML/html.sml @@ -0,0 +1,395 @@ +(* html.sml + * + * COPYRIGHT (c) 1995 AT&T Bell Laboratories. + * COPYRIGHT (c) 1996 AT&T Research. + * + * This file defines the abstract syntax of HTML documents. The + * AST follows the HTML 3.2 Proposed Standard. + *) + +structure HTML : HTML = + struct + + val htmlVersion = "3.2 Final" + + type pcdata = string + type cdata = string + type url = string + type pixels = cdata + type name = string + type id = string + + fun match sl s = let + val cmp = String.collate + (fn (c1, c2) => Char.compare(Char.toUpper c1, c2)) + fun eq (ref s') = (cmp(s, s') = General.EQUAL) + in + List.find eq sl + end + + (* the different types of HTTP methods *) + structure HttpMethod = + struct + type method = string ref + val get = ref "GET" + val put = ref "PUT" + fun toString (ref s) = s + val fromString = match [get, put] + end + + (* the different types of INPUT elements *) + structure InputType = + struct + type ty = string ref + val text = ref "TEXT" + val password = ref "PASSWORD" + val checkbox = ref "CHECKBOX" + val radio = ref "RADIO" + val submit = ref "SUBMIT" + val reset = ref "RESET" + val file = ref "FILE" + val hidden = ref "HIDDEN" + val image = ref "IMAGE" + fun toString (ref s) = s + val fromString = match [ + text, password, checkbox, + radio, submit, reset, + file, hidden, image + ] + end + + (* alignment attributes for IMG, APPLET and INPUT elements *) + structure IAlign = + struct + type align = string ref + val top = ref "TOP" + val middle = ref "MIDDLE" + val bottom = ref "BOTTOM" + val left = ref "LEFT" + val right = ref "RIGHT" + fun toString (ref s) = s + val fromString = match [top, middle, bottom, left, right] + end + + structure HAlign = + struct + type align = string ref + val left = ref "LEFT" + val center = ref "CENTER" + val right = ref "RIGHT" + fun toString (ref s) = s + val fromString = match [left, center, right] + end + + structure CellVAlign = + struct + type align = string ref + val top = ref "TOP" + val middle = ref "MIDDLE" + val bottom = ref "BOTTOM" + val baseline = ref "BASELINE" + fun toString (ref s) = s + val fromString = match [top, middle, bottom, baseline] + end + + structure CaptionAlign = + struct + type align = string ref + val top = ref "TOP" + val bottom = ref "BOTTOM" + val left = ref "LEFT" + val right = ref "RIGHT" + fun toString (ref s) = s + val fromString = match [top, bottom, left, right] + end + + structure ULStyle = + struct + type style = string ref + val disc = ref "DISC" + val square = ref "SQUARE" + val circle = ref "CIRCLE" + fun toString (ref s) = s + val fromString = match [disc, square, circle] + end + + structure Shape = + struct + type shape = string ref + val rect = ref "RECT" + val circle = ref "CIRCLE" + val poly = ref "POLY" + val default = ref "DEFAULT" + fun toString (ref s) = s + val fromString = match [rect, circle, poly, default] + end + + structure TextFlowCtl = + struct + type control = string ref + val left = ref "LEFT" + val right = ref "RIGHT" + val all = ref "ALL" + val none = ref "NONE" + fun toString (ref s) = s + val fromString = match [left, right, all, none] + end + + datatype html = HTML of { + version : cdata option, + head : head_content list, + body : body + } + + and head_content + = Head_TITLE of pcdata + | Head_ISINDEX of {prompt : cdata option} + | Head_BASE of {href : url} + | Head_META of { + httpEquiv : name option, + name : name option, + content : cdata + } + | Head_LINK of { + id : id option, + href : url option, + rel : cdata option, + rev : cdata option, + title : cdata option + } + (* SCRIPT/STYLE elements are placeholders for the next version of HTML *) + | Head_SCRIPT of pcdata + | Head_STYLE of pcdata + + and body = BODY of { + background : url option, + bgcolor : cdata option, + text : cdata option, + link : cdata option, + vlink : cdata option, + alink : cdata option, + content : block + } + + and block + = BlockList of block list + | TextBlock of text + | Hn of { + n : int, + align : HAlign.align option, + content : text + } + | ADDRESS of block + | P of { + align : HAlign.align option, + content : text + } + | UL of { + ty : ULStyle.style option, + compact : bool, + content : list_item list + } + | OL of { + ty : cdata option, + start : int option, + compact : bool, + content : list_item list + } + | DIR of { + compact : bool, + content : list_item list + } + | MENU of { + compact : bool, + content : list_item list + } + | DL of { + compact : bool, + content : {dt : text list, dd : block} list + } + | PRE of { + width : int option, + content : text + } + | DIV of { + align : HAlign.align, + content : block + } + | CENTER of block + | BLOCKQUOTE of block + | FORM of { + action : url option, + method : HttpMethod.method, + enctype : cdata option, + content : block (* -(FORM) *) + } + | ISINDEX of {prompt : cdata option} + | HR of { + align : HAlign.align option, + noshade : bool, + size : pixels option, + width : cdata option + } + | TABLE of { + align : HAlign.align option, + width : cdata option, + border : pixels option, + cellspacing : pixels option, + cellpadding : pixels option, + caption : caption option, + content : tr list + } + + and list_item = LI of { + ty : cdata option, + value : int option, + content : block + } + + (** table content **) + and caption = CAPTION of { + align : CaptionAlign.align option, + content : text + } + and tr = TR of { + align : HAlign.align option, + valign : CellVAlign.align option, + content : table_cell list + } + and table_cell + = TH of { + nowrap : bool, + rowspan : int option, + colspan : int option, + align : HAlign.align option, + valign : CellVAlign.align option, + width : pixels option, + height : pixels option, + content : block + } + | TD of { + nowrap : bool, + rowspan : int option, + colspan : int option, + align : HAlign.align option, + valign : CellVAlign.align option, + width : pixels option, + height : pixels option, + content : block + } + + (** Text **) + and text + = TextList of text list + | PCDATA of pcdata + | TT of text + | I of text + | B of text + | U of text + | STRIKE of text + | BIG of text + | SMALL of text + | SUB of text + | SUP of text + | EM of text + | STRONG of text + | DFN of text + | CODE of text + | SAMP of text + | KBD of text + | VAR of text + | CITE of text + | A of { + name : cdata option, + href : url option, + rel : cdata option, + rev : cdata option, + title : cdata option, + content : text (* -(A) *) + } + | IMG of { + src : url, + alt : cdata option, + align : IAlign.align option, + height : pixels option, + width : pixels option, + border : pixels option, + hspace : pixels option, + vspace : pixels option, + usemap : url option, + ismap : bool + } + | APPLET of { + codebase : url option, + code : cdata, + name : cdata option, + alt : cdata option, + align : IAlign.align option, + height : pixels option, + width : pixels option, + hspace : pixels option, + vspace : pixels option, + content : text + } + | PARAM of { (* applet parameter *) + name : name, + value : cdata option + } + | FONT of { + size : cdata option, + color : cdata option, + content : text + } + | BASEFONT of { + size : cdata option, + content : text + } + | BR of { + clear : TextFlowCtl.control option + } + | MAP of { + name : cdata option, + content : area list + } + | INPUT of { + ty : InputType.ty option, + name : cdata option, + value : cdata option, + checked : bool, + size : cdata option, + maxlength : int option, + src : url option, + align : IAlign.align option + } + | SELECT of { + name : cdata, + size : int option, + content : select_option list + } + | TEXTAREA of { + name : cdata, + rows : int, + cols : int, + content : pcdata + } + (* SCRIPT elements are placeholders for the next version of HTML *) + | SCRIPT of pcdata + + (* map areas *) + and area = AREA of { + shape : Shape.shape option, + coords : cdata option, + href : url option, + nohref : bool, + alt : cdata + } + + (* SELECT options *) + and select_option = OPTION of { + selected : bool, + value : cdata option, + content : pcdata + } + + end (* signature HTML *) + diff --git a/smlnj-lib/HTML/make-html.sml b/smlnj-lib/HTML/make-html.sml new file mode 100644 index 0000000..f7f1423 --- /dev/null +++ b/smlnj-lib/HTML/make-html.sml @@ -0,0 +1,124 @@ +(* make-html.sml + * + * COPYRIGHT (c) 1996 AT&T Research. + * + * This is a collection of constructors for building some of the common + * kinds of HTML elements. + *) + +structure MakeHTML : sig + + val blockList : HTML.block list -> HTML.block + val textList : HTML.text list -> HTML.text + + val mkH : (int * HTML.pcdata) -> HTML.block + + val mkP : HTML.text -> HTML.block + val mkUL : HTML.list_item list -> HTML.block + val mkOL : HTML.list_item list -> HTML.block + val mkDL : {dt : HTML.text list, dd : HTML.block} list -> HTML.block + val HR : HTML.block + val BR : HTML.text + + val mkLI : HTML.block -> HTML.list_item + + val mkA_HREF : {href : HTML.url, content : HTML.text} -> HTML.text + val mkA_NAME : {name : HTML.cdata, content : HTML.text} -> HTML.text + + val mkTR : HTML.table_cell list -> HTML.tr + val mkTH : HTML.block -> HTML.table_cell + val mkTH_COLSPAN : {colspan : int, content : HTML.block} -> HTML.table_cell + val mkTD : HTML.block -> HTML.table_cell + val mkTD_COLSPAN : {colspan : int, content : HTML.block} -> HTML.table_cell + + end = struct + + fun blockList [b] = b + | blockList bl = HTML.BlockList bl + + fun textList [t] = t + | textList tl = HTML.TextList tl + + fun mkH (n, hdr) = HTML.Hn{n = n, align=NONE, content=HTML.PCDATA hdr} + + fun mkP content = HTML.P{align=NONE, content=content} + + fun mkUL items = HTML.UL{compact=false, ty=NONE, content=items} + + fun mkOL items = HTML.OL{compact=false, ty=NONE, start = NONE, content=items} + + fun mkDL items = HTML.DL{compact=false, content=items} + + val HR = HTML.HR{align=NONE, noshade=false, size=NONE, width=NONE} + + val BR = HTML.BR{clear = NONE} + + fun mkLI blk = HTML.LI{ty=NONE, value=NONE, content=blk} + + fun mkA_HREF {href, content} = HTML.A{ + href = SOME href, + title = NONE, + name = NONE, + rel = NONE, + rev = NONE, + content = content + } + + fun mkA_NAME {name, content} = HTML.A{ + href = NONE, + title = NONE, + name = SOME name, + rel = NONE, + rev = NONE, + content = content + } + + fun mkTR content = HTML.TR{ + align = NONE, + valign = NONE, + content = content + } + + fun mkTH content = HTML.TH{ + nowrap = false, + rowspan = NONE, + colspan = NONE, + align = NONE, + valign = NONE, + width = NONE, + height = NONE, + content = content + } + fun mkTH_COLSPAN {colspan, content} = HTML.TH{ + nowrap = false, + rowspan = NONE, + colspan = SOME colspan, + align = NONE, + valign = NONE, + width = NONE, + height = NONE, + content = content + } + + fun mkTD content = HTML.TD{ + nowrap = false, + rowspan = NONE, + colspan = NONE, + align = NONE, + valign = NONE, + width = NONE, + height = NONE, + content = content + } + fun mkTD_COLSPAN {colspan, content} = HTML.TD{ + nowrap = false, + rowspan = NONE, + colspan = SOME colspan, + align = NONE, + valign = NONE, + width = NONE, + height = NONE, + content = content + } + + end; diff --git a/smlnj-lib/HTML/pr-html.sml b/smlnj-lib/HTML/pr-html.sml new file mode 100644 index 0000000..af39d68 --- /dev/null +++ b/smlnj-lib/HTML/pr-html.sml @@ -0,0 +1,480 @@ +(* pr-html.sml + * + * COPYRIGHT (c) 1996 AT&T REsearch. + * + * Pretty-print an HTML tree. + *) + +structure PrHTML : sig + + val prHTML : { + putc : char -> unit, + puts : string -> unit + } -> HTML.html -> unit + + end = struct + + structure H = HTML + structure F = Format + + datatype outstream = OS of { + putc : char -> unit, + puts : string -> unit + } + + fun putc (OS{putc, ...}, c) = putc c + fun puts (OS{puts, ...}, s) = puts s + + datatype attr_data + = IMPLICIT of bool + | CDATA of string option + | NAME of string option + | NUMBER of int option + + local + fun name toString NONE = NAME NONE + | name toString (SOME x) = NAME(SOME(toString x)) + in + val httpMethod = name HTML.HttpMethod.toString + val inputType = name HTML.InputType.toString + val iAlign = name HTML.IAlign.toString + val hAlign = name HTML.HAlign.toString + val cellVAlign = name HTML.CellVAlign.toString + val captionAlign = name HTML.CaptionAlign.toString + val ulStyle = name HTML.ULStyle.toString + val shape = name HTML.Shape.toString + val textFlowCtl = name HTML.TextFlowCtl.toString + end (* local *) + + fun fmtTag (tag, []) = concat["<", tag, ">"] + | fmtTag (tag, attrs) = let + fun fmtAttr (attrName, IMPLICIT true) = SOME attrName + | fmtAttr (attrName, CDATA(SOME s)) = + SOME(F.format "%s=\"%s\"" [F.STR attrName, F.STR s]) + | fmtAttr (attrName, NAME(SOME s)) = + SOME(F.format "%s=%s" [F.STR attrName, F.STR s]) + | fmtAttr (attrName, NUMBER(SOME n)) = + SOME(F.format "%s=%d" [F.STR attrName, F.INT n]) + | fmtAttr _ = NONE + val attrs = List.mapPartial fmtAttr attrs + in + ListFormat.fmt { + init = "<", + sep = " ", + final = ">", + fmt = fn x => x + } (tag :: attrs) + end + + fun fmtEndTag tag = concat["</", tag, ">"] + + fun prTag (OS{puts, ...}, tag, attrs) = puts(fmtTag (tag, attrs)) + fun prEndTag (OS{puts, ...}, tag) = puts(fmtEndTag tag) + fun newLine (OS{putc, ...}) = putc #"\n" + fun space (OS{putc, ...}) = putc #" " + + (** NOTE: once we are doing linebreaks for text, this becomes + ** important. + **) + fun setPre (_, _) = () + + fun prBlock (strm, blk : HTML.block) = (case blk + of (HTML.BlockList bl) => + List.app (fn b => prBlock (strm, b)) bl + | (HTML.TextBlock txt) => (prText (strm, txt); newLine strm) + | (HTML.Hn{n, align, content}) => let + val tag = "H" ^ Int.toString n + in + prTag (strm, tag, [("align", hAlign align)]); + prText (strm, content); + prEndTag (strm, tag); + newLine strm + end + | (HTML.ADDRESS blk) => ( + prTag (strm, "ADDRESS", []); + newLine strm; + prBlock (strm, blk); + prEndTag (strm, "ADDRESS"); + newLine strm) + | (HTML.P{align, content}) => ( + prTag (strm, "P", [("ALIGN", hAlign align)]); + newLine strm; + prText (strm, content); + newLine strm) + | (HTML.UL{ty, compact, content}) => ( + prTag (strm, "UL", [ + ("TYPE", ulStyle ty), + ("COMPACT", IMPLICIT compact) + ]); + newLine strm; + prListItems (strm, content); + prEndTag (strm, "UL"); + newLine strm) + | (HTML.OL{ty, start, compact, content}) => ( + prTag (strm, "OL", [ + ("TYPE", CDATA ty), + ("START", NUMBER start), + ("COMPACT", IMPLICIT compact) + ]); + newLine strm; + prListItems (strm, content); + prEndTag (strm, "OL"); + newLine strm) + | (HTML.DIR{compact, content}) => ( + prTag (strm, "DIR", [("COMPACT", IMPLICIT compact)]); + newLine strm; + prListItems (strm, content); + prEndTag (strm, "DIR"); + newLine strm) + | (HTML.MENU{compact, content}) => ( + prTag (strm, "MENU", [("COMPACT", IMPLICIT compact)]); + newLine strm; + prListItems (strm, content); + prEndTag (strm, "MENU"); + newLine strm) + | (HTML.DL{compact, content}) => ( + prTag (strm, "DL", [("COMPACT", IMPLICIT compact)]); + newLine strm; + prDLItems (strm, content); + prEndTag (strm, "DL"); + newLine strm) + | (HTML.PRE{width, content}) => ( + prTag (strm, "PRE", [("WIDTH", NUMBER width)]); + newLine strm; + setPre (strm, true); + prText (strm, content); + setPre (strm, false); + newLine strm; + prEndTag (strm, "PRE"); + newLine strm) + | (HTML.DIV{align, content}) => ( + prTag (strm, "DIV", [("ALIGN", hAlign(SOME align))]); + newLine strm; + prBlock (strm, content); + prEndTag (strm, "DIV"); + newLine strm) + | (HTML.CENTER bl) => ( + prTag (strm, "CENTER", []); + newLine strm; + prBlock (strm, bl); + prEndTag (strm, "CENTER"); + newLine strm) + | (HTML.BLOCKQUOTE bl) => ( + prTag (strm, "BLOCKQUOTE", []); + newLine strm; + prBlock (strm, bl); + prEndTag (strm, "BLOCKQUOTE"); + newLine strm) + | (HTML.FORM{action, method, enctype, content}) => ( + prTag (strm, "FORM", [ + ("ACTION", CDATA action), + ("METHOD", httpMethod(SOME method)), + ("ENCTYPE", CDATA enctype) + ]); + newLine strm; + prBlock (strm, content); + prEndTag (strm, "FORM"); + newLine strm) + | (HTML.ISINDEX{prompt}) => ( + prTag (strm, "ISINDEX", [("PROMPT", CDATA prompt)]); + newLine strm) + | (HTML.HR{align, noshade, size, width}) => ( + prTag (strm, "HR", [ + ("ALIGN", hAlign align), + ("NOSHADE", IMPLICIT noshade), + ("SIZE", CDATA size), + ("WIDTH", CDATA width) + ]); + newLine strm) + | (HTML.TABLE{ + align, width, border, cellspacing, cellpadding, + caption, content + }) => ( + prTag (strm, "TABLE", [ + ("ALIGN", hAlign align), + ("WIDTH", CDATA width), + ("BORDER", CDATA border), + ("CELLSPACING", CDATA cellspacing), + ("CELLPADDING", CDATA cellpadding) + ]); + newLine strm; + prCaption (strm, caption); + prTableRows (strm, content); + prEndTag (strm, "TABLE"); + newLine strm) + (* end case *)) + + and prListItems (strm, items) = let + fun prItem (HTML.LI{ty, value, content}) = ( + prTag (strm, "LI", [("TYPE", CDATA ty), ("VALUE", NUMBER value)]); + newLine strm; + prBlock (strm, content)) + in + List.app prItem items + end + + and prDLItems (strm, items) = let + fun prDT txt = ( + prTag (strm, "DT", []); + space strm; + prText (strm, txt); + newLine strm) + fun prDD blk = ( + prTag (strm, "DD", []); + newLine strm; + prBlock (strm, blk)) + fun prItem ({dt, dd}) = (List.app prDT dt; prDD dd) + in + List.app prItem items + end + + and prCaption (strm, NONE) = () + | prCaption (strm, SOME(HTML.CAPTION{align, content})) = ( + prTag (strm, "CAPTION", [("ALIGN", captionAlign align)]); + newLine strm; + prText (strm, content); + prEndTag (strm, "CAPTION"); + newLine strm) + + and prTableRows (strm, rows) = let + fun prTR (HTML.TR{align, valign, content}) = ( + prTag (strm, "TR", [ + ("ALIGN", hAlign align), + ("VALIGN", cellVAlign valign) + ]); + newLine strm; + List.app (prTableCell strm) content) + in + List.app prTR rows + end + + and prTableCell strm cell = let + fun prCell (tag, { + nowrap, rowspan, colspan , align, valign, width, height, + content + }) = ( + prTag (strm, tag, [ + ("NOWRAP", IMPLICIT nowrap), + ("ROWSPAN", NUMBER rowspan), + ("COLSPAN", NUMBER colspan), + ("ALIGN", hAlign align), + ("VALIGN", cellVAlign valign), + ("WIDTH", CDATA width), + ("HEIGHT", CDATA height) + ]); + newLine strm; + prBlock (strm, content)) + in + case cell + of (HTML.TH stuff) => prCell ("TH", stuff) + | (HTML.TD stuff) => prCell ("TD", stuff) + (* end case *) + end + + and prEmph (strm, tag, text) = ( + prTag (strm, tag, []); + prText (strm, text); + prEndTag (strm, tag)) + + and prText (strm, text) = (case text + of (HTML.TextList tl) => + List.app (fn txt => prText(strm, txt)) tl + | (HTML.PCDATA pcdata) => prPCData(strm, pcdata) + | (HTML.TT txt) => prEmph (strm, "TT", txt) + | (HTML.I txt) => prEmph (strm, "I", txt) + | (HTML.B txt) => prEmph (strm, "B", txt) + | (HTML.U txt) => prEmph (strm, "U", txt) + | (HTML.STRIKE txt) => prEmph (strm, "STRIKE", txt) + | (HTML.BIG txt) => prEmph (strm, "BIG", txt) + | (HTML.SMALL txt) => prEmph (strm, "SMALL", txt) + | (HTML.SUB txt) => prEmph (strm, "SUB", txt) + | (HTML.SUP txt) => prEmph (strm, "SUP", txt) + | (HTML.EM txt) => prEmph (strm, "EM", txt) + | (HTML.STRONG txt) => prEmph (strm, "STRONG", txt) + | (HTML.DFN txt) => prEmph (strm, "DFN", txt) + | (HTML.CODE txt) => prEmph (strm, "CODE", txt) + | (HTML.SAMP txt) => prEmph (strm, "SAMP", txt) + | (HTML.KBD txt) => prEmph (strm, "KBD", txt) + | (HTML.VAR txt) => prEmph (strm, "VAR", txt) + | (HTML.CITE txt) => prEmph (strm, "CITE", txt) + | (HTML.A{name, href, rel, rev, title, content}) => ( + prTag (strm, "A", [ + ("NAME", CDATA name), + ("HREF", CDATA href), + ("REL", CDATA rel), + ("REV", CDATA rev), + ("TITLE", CDATA title) + ]); + prText (strm, content); + prEndTag (strm, "A")) + | (HTML.IMG{ + src, alt, align, height, width, border, + hspace, vspace, usemap, ismap + }) => prTag (strm, "IMG", [ + ("SRC", CDATA(SOME src)), + ("ALT", CDATA alt), + ("ALIGN", iAlign align), + ("HEIGHT", CDATA height), + ("WIDTH", CDATA width), + ("BORDER", CDATA border), + ("HSPACE", CDATA hspace), + ("VSPACE", CDATA vspace), + ("USEMAP", CDATA usemap), + ("ISMAP", IMPLICIT ismap) + ]) + | (HTML.APPLET{ + codebase, code, name, alt, align, height, width, + hspace, vspace, content + }) => ( + prTag (strm, "APPLET", [ + ("CODEBASE", CDATA codebase), + ("CODE", CDATA(SOME code)), + ("NAME", CDATA name), + ("ALT", CDATA alt), + ("ALIGN", iAlign align), + ("HEIGHT", CDATA height), + ("WIDTH", CDATA width), + ("HSPACE", CDATA hspace), + ("VSPACE", CDATA vspace) + ]); + prText (strm, content); + prEndTag (strm, "APPLET")) + | (HTML.PARAM{name, value}) => + prTag (strm, "PARAM", [ + ("NAME", NAME(SOME name)), + ("VALUE", CDATA value) + ]) + | (HTML.FONT{size, color, content}) => ( + prTag (strm, "FONT", [ + ("SIZE", CDATA size), + ("COLOR", CDATA color) + ]); + prText (strm, content); + prEndTag (strm, "FONT")) + | (HTML.BASEFONT{size, content}) => ( + prTag (strm, "BASEFONT", [("SIZE", CDATA size)]); + prText (strm, content); + prEndTag (strm, "BASEFONT")) + | (HTML.BR{clear}) => ( + prTag (strm, "BR", [("CLEAR", textFlowCtl clear)]); + newLine strm) + | (HTML.MAP{name, content}) => ( + prTag (strm, "MAP", [("NAME", CDATA name)]); + List.app (prArea strm) content; + prEndTag (strm, "MAP")) + | (HTML.INPUT{ + ty, name, value, checked, size, maxlength, src, align + }) => prTag (strm, "INPUT", [ + ("TYPE", inputType ty), + ("NAME", NAME name), + ("VALUE", CDATA value), + ("CHECKED", IMPLICIT checked), + ("SIZE", CDATA size), + ("MAXLENGTH", NUMBER maxlength), + ("SRC", CDATA src), + ("ALIGN", iAlign align) + ]) + | (HTML.SELECT{name, size, content}) => ( + prTag (strm, "SELECT", [ + ("NAME", NAME(SOME name)), + ("SIZE", NUMBER size) + ]); + List.app (prOption strm) content; + prEndTag (strm, "SELECT")) + | (HTML.TEXTAREA{name, rows, cols, content}) => ( + prTag (strm, "TEXTAREA", [ + ("NAME", NAME(SOME name)), + ("ROWS", NUMBER(SOME rows)), + ("COLS", NUMBER(SOME cols)) + ]); + prPCData (strm, content); + prEndTag (strm, "TEXTAREA")) + (* SCRIPT elements are placeholders for the next version of HTML *) + | (HTML.SCRIPT pcdata) => () + (* end case *)) + + and prArea strm (HTML.AREA{shape=s, coords, href, nohref, alt}) = + prTag (strm, "AREA", [ + ("SHAPE", shape s), + ("COORDS", CDATA coords), + ("HREF", CDATA href), + ("nohref", IMPLICIT nohref), + ("ALT", CDATA(SOME alt)) + ]) + + and prOption strm (HTML.OPTION{selected, value, content}) = ( + prTag (strm, "OPTION", [ + ("SELECTED", IMPLICIT selected), + ("VALUE", CDATA value) + ]); + prPCData (strm, content)) + + and prPCData (strm, data) = puts (strm, data) + + fun prBody (strm, HTML.BODY{ + background, bgcolor, text, link, vlink, alink, content + }) = ( + prTag (strm, "BODY", [ + ("BACKGROUND", CDATA background), + ("BGCOLOR", CDATA bgcolor), + ("TEXT", CDATA text), + ("LINK", CDATA link), + ("VLINK", CDATA vlink), + ("ALINK", CDATA alink) + ]); + prBlock (strm, content); + prEndTag (strm, "BODY")) + + fun prHeadElement strm elem = (case elem + of (HTML.Head_TITLE pcdata) => ( + prTag (strm, "TITLE", []); + prPCData(strm, pcdata); + prEndTag (strm, "TITLE"); + newLine strm) + | (HTML.Head_ISINDEX{prompt}) => ( + prTag (strm, "ISINDEX", [("PROMPT", CDATA prompt)]); + newLine strm) + | (HTML.Head_BASE{href}) => ( + prTag (strm, "BASE", [("HREF", CDATA(SOME href))]); + newLine strm) + | (HTML.Head_META{httpEquiv, name, content}) => ( + prTag (strm, "META", [ + ("HTTP-EQUIV", NAME httpEquiv), + ("NAME", NAME name), + ("CONTENT", CDATA(SOME content)) + ]); + newLine strm) + | (HTML.Head_LINK{id, href, rel, rev, title}) => ( + prTag (strm, "LINK", [ + ("ID", NAME id), + ("HREF", CDATA href), + ("REL", CDATA rel), + ("REV", CDATA rev), + ("TITLE", CDATA title) + ]); + newLine strm) + (* SCRIPT/STYLE elements are placeholders for the next version of HTML *) + | (HTML.Head_SCRIPT pcdata) => () + | (HTML.Head_STYLE pcdata) => () + (* end case *)) + + fun prHTML {putc, puts} html = let + val strm = OS{putc=putc, puts=puts} + val HTML.HTML{head, body, version} = html + in + case version + of NONE => () + | (SOME v) => puts (F.format + "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML %s//EN\">\n" + [F.STR v]) + (* end case *); + puts "<HTML>\n"; + puts "<HEAD>\n"; + List.app (prHeadElement strm) head; + puts "</HEAD>\n"; + prBody (strm, body); + puts "</HTML>\n" + end + + end + diff --git a/smlnj-lib/HTML/test-parser.sml b/smlnj-lib/HTML/test-parser.sml new file mode 100644 index 0000000..e3625c1 --- /dev/null +++ b/smlnj-lib/HTML/test-parser.sml @@ -0,0 +1,68 @@ +(* test-parser.sml + * + * COPYRIGHT (c) 1996 AT&T REsearch. + * + * This is a simple test driver for the HTML parser. + *) + +structure Main : sig + + val doit : string -> HTML.html option + val main : (string * string list) -> OS.Process.status + + end = struct + + structure Err = + struct + type context = {file : string option, line : int} + + structure F = Format + + fun prf ({file, line}, fmt, args) = ( + case file + of NONE => TextIO.output ( + TextIO.stdErr, + F.format "line %3d: " [F.INT line]) + | (SOME fname) => TextIO.output ( + TextIO.stdErr, + F.format "%s[%d]: " [F.STR fname, F.INT line]) + (* end case *); + TextIO.output(TextIO.stdErr, F.format fmt args); + TextIO.output1(TextIO.stdErr, #"\n")) + + fun badStartTag ctx tagName = + prf (ctx, "unrecognized start tag \"%s\"",[F.STR tagName]) + + fun badEndTag ctx tagName = + prf (ctx, "unrecognized end tag \"%s\"",[F.STR tagName]) + + fun badAttrVal ctx (attrName, attrVal) = + prf (ctx, "bad value \"%s\" for attribute \"%s\"", + [F.STR attrVal, F.STR attrName]) + + fun lexError ctx msg = prf (ctx, "%s", [F.STR msg]) + + fun syntaxError ctx msg = prf (ctx, "%s", [F.STR msg]) + + fun missingAttrVal ctx attrName = + prf (ctx, "missing value for \"%s\" attribute", [F.STR attrName]) + + fun missingAttr ctx attrName = + prf (ctx, "missing \"%s\" attribute", [F.STR attrName]) + + fun unknownAttr ctx attrName = + prf (ctx, "unknown attribute \"%s\"", [F.STR attrName]) + + fun unquotedAttrVal ctx attrName = + prf (ctx, "attribute value for \"%s\" should be quoted", + [F.STR attrName]) + + end + + structure P = HTMLParserFn(Err); + + fun doit fname = SOME(P.parseFile fname) (* handle _ => NONE *) + + fun main (_, files) = (List.app (ignore o doit) files; OS.Process.success) + + end; diff --git a/smlnj-lib/HTML4/.cm/GUID/html4-attr.g.sml b/smlnj-lib/HTML4/.cm/GUID/html4-attr.g.sml new file mode 100644 index 0000000..df3c53b --- /dev/null +++ b/smlnj-lib/HTML4/.cm/GUID/html4-attr.g.sml @@ -0,0 +1 @@ +guid-$/(html4-lib.cm):html4-attr.g.sml-1714016086.966 diff --git a/smlnj-lib/HTML4/.cm/GUID/html4-attr.l.sml b/smlnj-lib/HTML4/.cm/GUID/html4-attr.l.sml new file mode 100644 index 0000000..aeefeca --- /dev/null +++ b/smlnj-lib/HTML4/.cm/GUID/html4-attr.l.sml @@ -0,0 +1 @@ +guid-$/(html4-lib.cm):html4-attr.l.sml-1714016087.024 diff --git a/smlnj-lib/HTML4/.cm/GUID/html4-attrs.sml b/smlnj-lib/HTML4/.cm/GUID/html4-attrs.sml new file mode 100644 index 0000000..75a26ac --- /dev/null +++ b/smlnj-lib/HTML4/.cm/GUID/html4-attrs.sml @@ -0,0 +1 @@ +guid-$/(html4-lib.cm):html4-attrs.sml-1714016091.180 diff --git a/smlnj-lib/HTML4/.cm/GUID/html4-entities.sml b/smlnj-lib/HTML4/.cm/GUID/html4-entities.sml new file mode 100644 index 0000000..848f275 --- /dev/null +++ b/smlnj-lib/HTML4/.cm/GUID/html4-entities.sml @@ -0,0 +1 @@ +guid-$/(html4-lib.cm):html4-entities.sml-1714016091.173 diff --git a/smlnj-lib/HTML4/.cm/GUID/html4-parser.sml b/smlnj-lib/HTML4/.cm/GUID/html4-parser.sml new file mode 100644 index 0000000..b20c99f --- /dev/null +++ b/smlnj-lib/HTML4/.cm/GUID/html4-parser.sml @@ -0,0 +1 @@ +guid-$/(html4-lib.cm):html4-parser.sml-1714016090.077 diff --git a/smlnj-lib/HTML4/.cm/GUID/html4-print.sml b/smlnj-lib/HTML4/.cm/GUID/html4-print.sml new file mode 100644 index 0000000..b5a2448 --- /dev/null +++ b/smlnj-lib/HTML4/.cm/GUID/html4-print.sml @@ -0,0 +1 @@ +guid-$/(html4-lib.cm):html4-print.sml-1714016091.071 diff --git a/smlnj-lib/HTML4/.cm/GUID/html4-token-utils.sml b/smlnj-lib/HTML4/.cm/GUID/html4-token-utils.sml new file mode 100644 index 0000000..867caba --- /dev/null +++ b/smlnj-lib/HTML4/.cm/GUID/html4-token-utils.sml @@ -0,0 +1 @@ +guid-$/(html4-lib.cm):html4-token-utils.sml-1714016089.612 diff --git a/smlnj-lib/HTML4/.cm/GUID/html4-utils.sml b/smlnj-lib/HTML4/.cm/GUID/html4-utils.sml new file mode 100644 index 0000000..95b943e --- /dev/null +++ b/smlnj-lib/HTML4/.cm/GUID/html4-utils.sml @@ -0,0 +1 @@ +guid-$/(html4-lib.cm):html4-utils.sml-1714016087.098 diff --git a/smlnj-lib/HTML4/.cm/GUID/html4.g.sml b/smlnj-lib/HTML4/.cm/GUID/html4.g.sml new file mode 100644 index 0000000..1d29fa1 --- /dev/null +++ b/smlnj-lib/HTML4/.cm/GUID/html4.g.sml @@ -0,0 +1 @@ +guid-$/(html4-lib.cm):html4.g.sml-1714016087.171 diff --git a/smlnj-lib/HTML4/.cm/GUID/html4.l.sml b/smlnj-lib/HTML4/.cm/GUID/html4.l.sml new file mode 100644 index 0000000..6692125 --- /dev/null +++ b/smlnj-lib/HTML4/.cm/GUID/html4.l.sml @@ -0,0 +1 @@ +guid-$/(html4-lib.cm):html4.l.sml-1714016089.907 diff --git a/smlnj-lib/HTML4/.cm/GUID/html4.sig b/smlnj-lib/HTML4/.cm/GUID/html4.sig new file mode 100644 index 0000000..719162b --- /dev/null +++ b/smlnj-lib/HTML4/.cm/GUID/html4.sig @@ -0,0 +1 @@ +guid-$/(html4-lib.cm):html4.sig-1714016089.880 diff --git a/smlnj-lib/HTML4/.cm/GUID/html4.sml b/smlnj-lib/HTML4/.cm/GUID/html4.sml new file mode 100644 index 0000000..73ef203 --- /dev/null +++ b/smlnj-lib/HTML4/.cm/GUID/html4.sml @@ -0,0 +1 @@ +guid-$/(html4-lib.cm):html4.sml-1714016089.896 diff --git a/smlnj-lib/HTML4/.cm/SKEL/html4-attr.g.sml b/smlnj-lib/HTML4/.cm/SKEL/html4-attr.g.sml new file mode 100644 index 0000000..d061df6 --- /dev/null +++ b/smlnj-lib/HTML4/.cm/SKEL/html4-attr.g.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d3f1Atom"aHTML4AttrTokens"0ae"HTML4AttrParseFn"i2aLex"gp1c"ANTLR_LEXER"f47d"List"d"String"h1bd4aTok"gp1 ad"UserCode"ad"Err"jh2a*gp1*a7gp17gp1e"AntlrErrHandler"ad"EBNF"f0 \ No newline at end of file diff --git a/smlnj-lib/HTML4/.cm/SKEL/html4-attr.l.sml b/smlnj-lib/HTML4/.cm/SKEL/html4-attr.l.sml new file mode 100644 index 0000000..90e9edf --- /dev/null +++ b/smlnj-lib/HTML4/.cm/SKEL/html4-attr.l.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f7ULexBuffer"AntlrStreamPos"Cd"UTF8"d"String"d"Substring"d"TextIO"Vector"Nad"HTML4AttrLexer"h1bd2aUserDeclarations"h1egp1d"HTML4AttrTokens"bd2egp1*f6C*d"List"d"Atom"Nf0f3* \ No newline at end of file diff --git a/smlnj-lib/HTML4/.cm/SKEL/html4-attrs.sml b/smlnj-lib/HTML4/.cm/SKEL/html4-attrs.sml new file mode 100644 index 0000000..59f6cec --- /dev/null +++ b/smlnj-lib/HTML4/.cm/SKEL/html4-attrs.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"Atom"ad"HTML4Attrs"h0 \ No newline at end of file diff --git a/smlnj-lib/HTML4/.cm/SKEL/html4-entities.sml b/smlnj-lib/HTML4/.cm/SKEL/html4-entities.sml new file mode 100644 index 0000000..7ff9b6e --- /dev/null +++ b/smlnj-lib/HTML4/.cm/SKEL/html4-entities.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f2d"Atom"d"HTML4"ad"HTML4Entities"h0 \ No newline at end of file diff --git a/smlnj-lib/HTML4/.cm/SKEL/html4-parser.sml b/smlnj-lib/HTML4/.cm/SKEL/html4-parser.sml new file mode 100644 index 0000000..c70eadf --- /dev/null +++ b/smlnj-lib/HTML4/.cm/SKEL/html4-parser.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ad"HTML4Parser"h7aH4"gp1d"HTML4"aH4U"gp1d"HTML4Utils"CaH4T"gp1d"HTML4Tokens"aH4TU"gp1d"HTML4TokenUtils"aAtomMap"gp20baTheParser"jgp1HTML4Lexer"gp1e"HTML4ParseFn"egp1f8d"AntlrStreamPos"Cd"Char"0d"CharVector"d"Option"#C;d"Atom"d"String"d"TextIO"NN \ No newline at end of file diff --git a/smlnj-lib/HTML4/.cm/SKEL/html4-print.sml b/smlnj-lib/HTML4/.cm/SKEL/html4-print.sml new file mode 100644 index 0000000..4c1d7b4 --- /dev/null +++ b/smlnj-lib/HTML4/.cm/SKEL/html4-print.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f5Cd"List"d"Atom"d"String"HTML4"d"IntInf"Nad"HTML4Print"jh2ad"H"gp1ad"F"gp1d"Format"h0 \ No newline at end of file diff --git a/smlnj-lib/HTML4/.cm/SKEL/html4-token-utils.sml b/smlnj-lib/HTML4/.cm/SKEL/html4-token-utils.sml new file mode 100644 index 0000000..050f503 --- /dev/null +++ b/smlnj-lib/HTML4/.cm/SKEL/html4-token-utils.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f4d"AntlrStreamPos"String"d"TextIO"HTML4AttrLexer"ad"HTML4TokenUtils"h5CaH4U"gp1HTML4Utils"ad"HTML4AttrParser"jgp1'gp1e"HTML4AttrParseFn"egp1d"HTML4Tokens"f8d"Char"d"CharVectorSlice"Cd"Atom"d"Substring"d"IntInf"Nad"AtomMap"jjh0gp1e"RedBlackMapFn"gp1c"ORD_MAP"N \ No newline at end of file diff --git a/smlnj-lib/HTML4/.cm/SKEL/html4-utils.sml b/smlnj-lib/HTML4/.cm/SKEL/html4-utils.sml new file mode 100644 index 0000000..c38ebbd --- /dev/null +++ b/smlnj-lib/HTML4/.cm/SKEL/html4-utils.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f2d"Atom"d"String"ad"HTML4Utils"h0 \ No newline at end of file diff --git a/smlnj-lib/HTML4/.cm/SKEL/html4.g.sml b/smlnj-lib/HTML4/.cm/SKEL/html4.g.sml new file mode 100644 index 0000000..e8dfcc9 --- /dev/null +++ b/smlnj-lib/HTML4/.cm/SKEL/html4.g.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d3f3HTML4Utils"Atom"d"IntInf"aHTML4Tokens"0ae"HTML4ParseFn"i2aLex"gp1c"ANTLR_LEXER"f2d"List"h1bd4aTok"gp1"ad"UserCode"h2egp1f4".ad"Err"jh2a.gp1.agp1gp1e"AntlrErrHandler"ad"EBNF"0f0 \ No newline at end of file diff --git a/smlnj-lib/HTML4/.cm/SKEL/html4.l.sml b/smlnj-lib/HTML4/.cm/SKEL/html4.l.sml new file mode 100644 index 0000000..d1adefd --- /dev/null +++ b/smlnj-lib/HTML4/.cm/SKEL/html4.l.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f7ULexBuffer"AntlrStreamPos"Cd"UTF8"String"Substring"d"TextIO"Vector"Nad"HTML4Lexer"h1bd2aUserDeclarations"h2egp1d"HTML4TokenUtils"f5Cd"StringCvt"List"*3d"IntInf"Nbd2egp1&f7C& d"Atom"3Nf0f3& \ No newline at end of file diff --git a/smlnj-lib/HTML4/.cm/SKEL/html4.sig b/smlnj-lib/HTML4/.cm/SKEL/html4.sig new file mode 100644 index 0000000..079bda9 --- /dev/null +++ b/smlnj-lib/HTML4/.cm/SKEL/html4.sig @@ -0,0 +1,2 @@ +Skeleton 5 +d2f2d"Atom"d"IntInf"ac"HTML4"h0 \ No newline at end of file diff --git a/smlnj-lib/HTML4/.cm/SKEL/html4.sml b/smlnj-lib/HTML4/.cm/SKEL/html4.sml new file mode 100644 index 0000000..e9d6801 --- /dev/null +++ b/smlnj-lib/HTML4/.cm/SKEL/html4.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f2d"Atom"d"IntInf"adHTML4"jh0gp1c \ No newline at end of file diff --git a/smlnj-lib/HTML4/.cm/amd64-unix/html4-attr.g.sml b/smlnj-lib/HTML4/.cm/amd64-unix/html4-attr.g.sml new file mode 100644 index 0000000..a57ecbc Binary files /dev/null and b/smlnj-lib/HTML4/.cm/amd64-unix/html4-attr.g.sml differ diff --git a/smlnj-lib/HTML4/.cm/amd64-unix/html4-attr.l.sml b/smlnj-lib/HTML4/.cm/amd64-unix/html4-attr.l.sml new file mode 100644 index 0000000..35fa52b Binary files /dev/null and b/smlnj-lib/HTML4/.cm/amd64-unix/html4-attr.l.sml differ diff --git a/smlnj-lib/HTML4/.cm/amd64-unix/html4-attrs.sml b/smlnj-lib/HTML4/.cm/amd64-unix/html4-attrs.sml new file mode 100644 index 0000000..d4f8994 Binary files /dev/null and b/smlnj-lib/HTML4/.cm/amd64-unix/html4-attrs.sml differ diff --git a/smlnj-lib/HTML4/.cm/amd64-unix/html4-entities.sml b/smlnj-lib/HTML4/.cm/amd64-unix/html4-entities.sml new file mode 100644 index 0000000..b3d3f84 Binary files /dev/null and b/smlnj-lib/HTML4/.cm/amd64-unix/html4-entities.sml differ diff --git a/smlnj-lib/HTML4/.cm/amd64-unix/html4-parser.sml b/smlnj-lib/HTML4/.cm/amd64-unix/html4-parser.sml new file mode 100644 index 0000000..71f0ac3 Binary files /dev/null and b/smlnj-lib/HTML4/.cm/amd64-unix/html4-parser.sml differ diff --git a/smlnj-lib/HTML4/.cm/amd64-unix/html4-print.sml b/smlnj-lib/HTML4/.cm/amd64-unix/html4-print.sml new file mode 100644 index 0000000..cb79578 Binary files /dev/null and b/smlnj-lib/HTML4/.cm/amd64-unix/html4-print.sml differ diff --git a/smlnj-lib/HTML4/.cm/amd64-unix/html4-token-utils.sml b/smlnj-lib/HTML4/.cm/amd64-unix/html4-token-utils.sml new file mode 100644 index 0000000..44be9b3 Binary files /dev/null and b/smlnj-lib/HTML4/.cm/amd64-unix/html4-token-utils.sml differ diff --git a/smlnj-lib/HTML4/.cm/amd64-unix/html4-utils.sml b/smlnj-lib/HTML4/.cm/amd64-unix/html4-utils.sml new file mode 100644 index 0000000..31e78fe Binary files /dev/null and b/smlnj-lib/HTML4/.cm/amd64-unix/html4-utils.sml differ diff --git a/smlnj-lib/HTML4/.cm/amd64-unix/html4.g.sml b/smlnj-lib/HTML4/.cm/amd64-unix/html4.g.sml new file mode 100644 index 0000000..6a1e3f5 Binary files /dev/null and b/smlnj-lib/HTML4/.cm/amd64-unix/html4.g.sml differ diff --git a/smlnj-lib/HTML4/.cm/amd64-unix/html4.l.sml b/smlnj-lib/HTML4/.cm/amd64-unix/html4.l.sml new file mode 100644 index 0000000..79e5d02 Binary files /dev/null and b/smlnj-lib/HTML4/.cm/amd64-unix/html4.l.sml differ diff --git a/smlnj-lib/HTML4/.cm/amd64-unix/html4.sig b/smlnj-lib/HTML4/.cm/amd64-unix/html4.sig new file mode 100644 index 0000000..86cf733 Binary files /dev/null and b/smlnj-lib/HTML4/.cm/amd64-unix/html4.sig differ diff --git a/smlnj-lib/HTML4/.cm/amd64-unix/html4.sml b/smlnj-lib/HTML4/.cm/amd64-unix/html4.sml new file mode 100644 index 0000000..9a47acd Binary files /dev/null and b/smlnj-lib/HTML4/.cm/amd64-unix/html4.sml differ diff --git a/smlnj-lib/HTML4/README b/smlnj-lib/HTML4/README new file mode 100644 index 0000000..9fa8afd --- /dev/null +++ b/smlnj-lib/HTML4/README @@ -0,0 +1,38 @@ +______________________________________________________________________ +README +______________________________________________________________________ + +This is the README for a new HTML 4.01 library for SML/NJ. + +____________________________________________________________ +Build targets +____________________________________________________________ + +The primary target is in html4-lib.cm. This will build everything +needed to provide the HTML4 structure. + +There are several test targets coded in CM files. + +The first one tests the lexer, accepting a list of files as an +argument, and outputing a token per line. For example: + +$ ml-build html4-lex-test.cm +$ sml @SMLload=html4-lex-test.platform tests/test001.html + +The next one tests the parser, accepting a list of files as input, and +outputing the parse tree (TODO). For example: + +$ ml-build html4-test.cm +$ sml @SMLload=html4-test.platform tests/test001.html + +____________________________________________________________ +Helper script +____________________________________________________________ + +We've included a helper script, which we used to create the lexical +token constructors. This script, helper.py, processes a table of +elements, published in the HTML 4.01 specification. + +______________________________________________________________________ +End of README +______________________________________________________________________ diff --git a/smlnj-lib/HTML4/TODO b/smlnj-lib/HTML4/TODO new file mode 100644 index 0000000..98474f8 --- /dev/null +++ b/smlnj-lib/HTML4/TODO @@ -0,0 +1,8 @@ +TODO list for the HTML4 library: + + - fix bugs in conformance tests (these are parsing issues) + - streamline parser; we should be able to fold the processing of attributes (html4-attr.{lg}) + into the scaner/parser for elements. + - the printer should be "PRE" aware. + - add the rest of the standard HTML4 entites to html4-entities.sml + diff --git a/smlnj-lib/HTML4/helper.py b/smlnj-lib/HTML4/helper.py new file mode 100755 index 0000000..30ce150 --- /dev/null +++ b/smlnj-lib/HTML4/helper.py @@ -0,0 +1,108 @@ +#! /usr/bin/env python +# ______________________________________________________________________ +"""helper.py + +Quick helper script for some minor code generation tasks. Requires +the BeautifulSoup library. +""" +# ______________________________________________________________________ +# Module imports + +from BeautifulSoup import BeautifulSoup + +# ______________________________________________________________________ +# Function definitions + +def element_data_to_tuple_str ((tag_name, start_tag_data, end_tag_data, + empty_data, depr_data, dtd_data, desc_data)): + otag_ctor = u"START%s" % tag_name + if end_tag_data == u"F": + etag_ctor = u"NONE" + else: + etag_ctor = u"SOME END%s" % tag_name + return u'("%s", %s, %s)' % (tag_name, otag_ctor, etag_ctor) + +# ______________________________________________________________________ + +def element_data_to_string_fun ((tag_name, start_tag_data, end_tag_data, + empty_data, depr_data, dtd_data, desc_data)): + print (u'| tokToString (START%s payload) = "START%s " ^ (payloadToString ' + 'payload)' % (tag_name, tag_name)) + if end_tag_data != u"F": + print u'| tokToString END%s = "END%s"' % (tag_name, tag_name) + +# ______________________________________________________________________ + +def element_data_to_production ((tag_name, start_tag_data, end_tag_data, + empty_data, depr_data, dtd_data, desc_data)): + end_str = u";" + if end_tag_data != u"F": + end_str = u"END%s ;" % tag_name + if empty_data != u"E": + end_str = u"XXX %s" % end_str + print u"%s : START%s %s" % (tag_name.lower(), tag_name, end_str) + print + +# ______________________________________________________________________ +# Main routine + +def main (): + elements_doc = BeautifulSoup(open("tests/elements.html").read(), + convertEntities = BeautifulSoup.HTML_ENTITIES) + # Skip the header row. + crnt_element_row = elements_doc.table.tr.findNextSibling("tr") + strict_list = [] + loose_list = [] + frameset_list = [] + while crnt_element_row is not None: + cols = crnt_element_row.findAll("td") + assert len(cols) == 7 + element_data = tuple([cols[0].a.string.strip()] + + [table_entry.string.strip() + for table_entry in cols[1:]]) + (tag_name, start_tag_data, end_tag_data, empty_data, depr_data, + dtd_data, desc_data) = element_data + print u"| START%s of token_payload" % tag_name + if end_tag_data != u"F": + print u"| END%s" % tag_name + else: + print u"(* No END tag for %s element. *)" % tag_name + if dtd_data == u'': + strict_list.append(element_data) + elif dtd_data == u'F': + frameset_list.append(element_data) + else: + assert dtd_data == u'L' + loose_list.append(element_data) + crnt_element_row = crnt_element_row.findNextSibling("tr") + element_data_list = strict_list + loose_list + frameset_list + element_data_list.sort() + print + print "val strict_tuple_list = [" + print u",\n".join((element_data_to_tuple_str(elem_data) + for elem_data in strict_list)) + print "]" + print + print "val loose_tuple_list = [" + print u",\n".join((element_data_to_tuple_str(elem_data) + for elem_data in loose_list)) + print "]" + print + print "val frameset_tuple_list = [" + print u",\n".join((element_data_to_tuple_str(elem_data) + for elem_data in frameset_list)) + print "]" + print + for element_data in element_data_list: + element_data_to_string_fun(element_data) + print + for element_data in element_data_list: + element_data_to_production(element_data) + +# ______________________________________________________________________ + +if __name__ == "__main__": + main() + +# ______________________________________________________________________ +# End of helper.py diff --git a/smlnj-lib/HTML4/html4-attr.g b/smlnj-lib/HTML4/html4-attr.g new file mode 100644 index 0000000..173ad5d --- /dev/null +++ b/smlnj-lib/HTML4/html4-attr.g @@ -0,0 +1,39 @@ +(* html4-attr.g + * + * COPYRIGHT (c) 2014 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Grammar for parsing HTML 4 attributes. + * + * FIXME: we could probably just do this in the scanner!!! + *) + +%name HTML4Attr; + +%tokens : NAME of Atom.atom + | EQUALS ("=") + | STRINGLIT of string + | DOT (".") + | NUMBER of string +; + +%start attrs; + +attrs : attr* + ; + +attr : NAME (EQUALS attr_value => (attr_value))? + => ((NAME, SR)) + ; + +attr_value : STRINGLIT + | NAME (DOT NAME => (NAME))* + => ((Atom.toString NAME) ^ (String.concatWith "." + (map Atom.toString SR))) + | NUMBER (DOT NUMBER => (NUMBER))* + => (NUMBER ^ (String.concatWith "." SR)) + ; + +(* ______________________________________________________________________ + End of html4-attr.g + ______________________________________________________________________ *) diff --git a/smlnj-lib/HTML4/html4-attr.g.sml b/smlnj-lib/HTML4/html4-attr.g.sml new file mode 100644 index 0000000..f99807d --- /dev/null +++ b/smlnj-lib/HTML4/html4-attr.g.sml @@ -0,0 +1,241 @@ +structure HTML4AttrTokens = + struct + datatype token + = NAME of Atom.atom + | EQUALS + | STRINGLIT of string + | DOT + | NUMBER of string + | EOF + val allToks = [ + EQUALS, DOT, EOF + ] + fun toString tok = +(case (tok) + of (NAME(_)) => "NAME" + | (EQUALS) => "=" + | (STRINGLIT(_)) => "STRINGLIT" + | (DOT) => "." + | (NUMBER(_)) => "NUMBER" + | (EOF) => "EOF" +(* end case *)) + fun isKW tok = +(case (tok) + of (NAME(_)) => false + | (EQUALS) => false + | (STRINGLIT(_)) => false + | (DOT) => false + | (NUMBER(_)) => false + | (EOF) => false +(* end case *)) + fun isEOF EOF = true + | isEOF _ = false + end (* HTML4AttrTokens *) + +functor HTML4AttrParseFn (Lex : ANTLR_LEXER) = struct + + local + structure Tok = +HTML4AttrTokens + structure UserCode = + struct + +fun attr_PROD_1_SUBRULE_1_PROD_1_ACT (attr_value, EQUALS, NAME, attr_value_SPAN : (Lex.pos * Lex.pos), EQUALS_SPAN : (Lex.pos * Lex.pos), NAME_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (attr_value) +fun attr_PROD_1_ACT (SR, NAME, SR_SPAN : (Lex.pos * Lex.pos), NAME_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ((NAME, SR)) +fun attr_value_PROD_2_SUBRULE_1_PROD_1_ACT (NAME, DOT, NAME_SPAN : (Lex.pos * Lex.pos), DOT_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (NAME) +fun attr_value_PROD_2_ACT (SR, NAME, SR_SPAN : (Lex.pos * Lex.pos), NAME_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + ((Atom.toString NAME) ^ (String.concatWith "." + (map Atom.toString SR))) +fun attr_value_PROD_3_SUBRULE_1_PROD_1_ACT (NUMBER, DOT, NUMBER_SPAN : (Lex.pos * Lex.pos), DOT_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (NUMBER) +fun attr_value_PROD_3_ACT (SR, NUMBER, SR_SPAN : (Lex.pos * Lex.pos), NUMBER_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)) = + (NUMBER ^ (String.concatWith "." SR)) + end (* UserCode *) + + structure Err = AntlrErrHandler( + structure Tok = Tok + structure Lex = Lex) + +(* replace functor with inline structure for better optimization + structure EBNF = AntlrEBNF( + struct + type strm = Err.wstream + val getSpan = Err.getSpan + end) +*) + structure EBNF = + struct + fun optional (pred, parse, strm) = + if pred strm + then let + val (y, span, strm') = parse strm + in + (SOME y, span, strm') + end + else (NONE, Err.getSpan strm, strm) + + fun closure (pred, parse, strm) = let + fun iter (strm, (left, right), ys) = + if pred strm + then let + val (y, (_, right'), strm') = parse strm + in iter (strm', (left, right'), y::ys) + end + else (List.rev ys, (left, right), strm) + in + iter (strm, Err.getSpan strm, []) + end + + fun posclos (pred, parse, strm) = let + val (y, (left, _), strm') = parse strm + val (ys, (_, right), strm'') = closure (pred, parse, strm') + in + (y::ys, (left, right), strm'') + end + end + + fun mk lexFn = let +fun getS() = {} +fun putS{} = () +fun unwrap (ret, strm, repairs) = (ret, strm, repairs) + val (eh, lex) = Err.mkErrHandler {get = getS, put = putS} + fun fail() = Err.failure eh + fun tryProds (strm, prods) = let + fun try [] = fail() + | try (prod :: prods) = + (Err.whileDisabled eh (fn() => prod strm)) + handle Err.ParseError => try (prods) + in try prods end +fun matchNAME strm = (case (lex(strm)) + of (Tok.NAME(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchEQUALS strm = (case (lex(strm)) + of (Tok.EQUALS, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchSTRINGLIT strm = (case (lex(strm)) + of (Tok.STRINGLIT(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchDOT strm = (case (lex(strm)) + of (Tok.DOT, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) +fun matchNUMBER strm = (case (lex(strm)) + of (Tok.NUMBER(x), span, strm') => (x, span, strm') + | _ => fail() +(* end case *)) +fun matchEOF strm = (case (lex(strm)) + of (Tok.EOF, span, strm') => ((), span, strm') + | _ => fail() +(* end case *)) + +val (attrs_NT) = +let +fun attr_value_NT (strm) = let + fun attr_value_PROD_1 (strm) = let + val (STRINGLIT_RES, STRINGLIT_SPAN, strm') = matchSTRINGLIT(strm) + val FULL_SPAN = (#1(STRINGLIT_SPAN), #2(STRINGLIT_SPAN)) + in + ((STRINGLIT_RES), FULL_SPAN, strm') + end + fun attr_value_PROD_2 (strm) = let + val (NAME_RES, NAME_SPAN, strm') = matchNAME(strm) + fun attr_value_PROD_2_SUBRULE_1_NT (strm) = let + val (DOT_RES, DOT_SPAN, strm') = matchDOT(strm) + val (NAME_RES, NAME_SPAN, strm') = matchNAME(strm') + val FULL_SPAN = (#1(DOT_SPAN), #2(NAME_SPAN)) + in + (UserCode.attr_value_PROD_2_SUBRULE_1_PROD_1_ACT (NAME_RES, DOT_RES, NAME_SPAN : (Lex.pos * Lex.pos), DOT_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun attr_value_PROD_2_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.DOT, _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.closure(attr_value_PROD_2_SUBRULE_1_PRED, attr_value_PROD_2_SUBRULE_1_NT, strm') + val FULL_SPAN = (#1(NAME_SPAN), #2(SR_SPAN)) + in + (UserCode.attr_value_PROD_2_ACT (SR_RES, NAME_RES, SR_SPAN : (Lex.pos * Lex.pos), NAME_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun attr_value_PROD_3 (strm) = let + val (NUMBER_RES, NUMBER_SPAN, strm') = matchNUMBER(strm) + fun attr_value_PROD_3_SUBRULE_1_NT (strm) = let + val (DOT_RES, DOT_SPAN, strm') = matchDOT(strm) + val (NUMBER_RES, NUMBER_SPAN, strm') = matchNUMBER(strm') + val FULL_SPAN = (#1(DOT_SPAN), #2(NUMBER_SPAN)) + in + (UserCode.attr_value_PROD_3_SUBRULE_1_PROD_1_ACT (NUMBER_RES, DOT_RES, NUMBER_SPAN : (Lex.pos * Lex.pos), DOT_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun attr_value_PROD_3_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.DOT, _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.closure(attr_value_PROD_3_SUBRULE_1_PRED, attr_value_PROD_3_SUBRULE_1_NT, strm') + val FULL_SPAN = (#1(NUMBER_SPAN), #2(SR_SPAN)) + in + (UserCode.attr_value_PROD_3_ACT (SR_RES, NUMBER_RES, SR_SPAN : (Lex.pos * Lex.pos), NUMBER_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + in + (case (lex(strm)) + of (Tok.NUMBER(_), _, strm') => attr_value_PROD_3(strm) + | (Tok.STRINGLIT(_), _, strm') => attr_value_PROD_1(strm) + | (Tok.NAME(_), _, strm') => attr_value_PROD_2(strm) + | _ => fail() + (* end case *)) + end +fun attr_NT (strm) = let + val (NAME_RES, NAME_SPAN, strm') = matchNAME(strm) + fun attr_PROD_1_SUBRULE_1_NT (strm) = let + val (EQUALS_RES, EQUALS_SPAN, strm') = matchEQUALS(strm) + val (attr_value_RES, attr_value_SPAN, strm') = attr_value_NT(strm') + val FULL_SPAN = (#1(EQUALS_SPAN), #2(attr_value_SPAN)) + in + (UserCode.attr_PROD_1_SUBRULE_1_PROD_1_ACT (attr_value_RES, EQUALS_RES, NAME_RES, attr_value_SPAN : (Lex.pos * Lex.pos), EQUALS_SPAN : (Lex.pos * Lex.pos), NAME_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end + fun attr_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.EQUALS, _, strm') => true + | _ => false + (* end case *)) + val (SR_RES, SR_SPAN, strm') = EBNF.optional(attr_PROD_1_SUBRULE_1_PRED, attr_PROD_1_SUBRULE_1_NT, strm') + val FULL_SPAN = (#1(NAME_SPAN), #2(SR_SPAN)) + in + (UserCode.attr_PROD_1_ACT (SR_RES, NAME_RES, SR_SPAN : (Lex.pos * Lex.pos), NAME_SPAN : (Lex.pos * Lex.pos), FULL_SPAN : (Lex.pos * Lex.pos)), + FULL_SPAN, strm') + end +fun attrs_NT (strm) = let + fun attrs_PROD_1_SUBRULE_1_NT (strm) = let + val (attr_RES, attr_SPAN, strm') = attr_NT(strm) + val FULL_SPAN = (#1(attr_SPAN), #2(attr_SPAN)) + in + ((attr_RES), FULL_SPAN, strm') + end + fun attrs_PROD_1_SUBRULE_1_PRED (strm) = (case (lex(strm)) + of (Tok.NAME(_), _, strm') => true + | _ => false + (* end case *)) + val (attr_RES, attr_SPAN, strm') = EBNF.closure(attrs_PROD_1_SUBRULE_1_PRED, attrs_PROD_1_SUBRULE_1_NT, strm) + val FULL_SPAN = (#1(attr_SPAN), #2(attr_SPAN)) + in + ((attr_RES), FULL_SPAN, strm') + end +in + (attrs_NT) +end +val attrs_NT = fn s => unwrap (Err.launch (eh, lexFn, attrs_NT , true) s) + +in (attrs_NT) end + in +fun parse lexFn s = let val (attrs_NT) = mk lexFn in attrs_NT s end + + end + +end diff --git a/smlnj-lib/HTML4/html4-attr.l b/smlnj-lib/HTML4/html4-attr.l new file mode 100644 index 0000000..3a5e06f --- /dev/null +++ b/smlnj-lib/HTML4/html4-attr.l @@ -0,0 +1,36 @@ +(* html4-attr.l + * + * COPYRIGHT (c) 2014 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Lexer for parsing HTML 4 attributes. + *) + +%name HTML4AttrLexer; + +%defs ( +open HTML4AttrTokens + +fun eof() = EOF +type lex_result = token +); + +%let alpha=[A-Za-z]; +%let digit=[0-9]; +%let ws=[\ \t\r\n]; + +(* some XHTML attributes have ":" in their name *) +{alpha}({alpha}|{digit}|[-.:])* => (NAME (Atom.atom yytext)); +{digit}+ => (NUMBER yytext); +"=" => (EQUALS); +"." => (DOT); +[\"][^\"]*[\"] => (STRINGLIT yytext); +[\'][^\']*[\'] => (STRINGLIT yytext); + +{ws}+ => (continue()); + +. => ((* error; invalid character *) continue()); + +(* ______________________________________________________________________ + End of html4-attr.l + ______________________________________________________________________ *) \ No newline at end of file diff --git a/smlnj-lib/HTML4/html4-attr.l.sml b/smlnj-lib/HTML4/html4-attr.l.sml new file mode 100644 index 0000000..4d665e3 --- /dev/null +++ b/smlnj-lib/HTML4/html4-attr.l.sml @@ -0,0 +1,412 @@ +structure HTML4AttrLexer = struct + + datatype yystart_state = +INITIAL + local + + structure UserDeclarations = + struct + + +open HTML4AttrTokens + +fun eof() = EOF +type lex_result = token + + end + + datatype yymatch + = yyNO_MATCH + | yyMATCH of ULexBuffer.stream * action * yymatch + withtype action = ULexBuffer.stream * yymatch -> UserDeclarations.lex_result + + val yytable : ((UTF8.wchar * UTF8.wchar * int) list * int list) Vector.vector = +Vector.fromList [] + fun yystreamify' p input = ULexBuffer.mkStream (p, input) + + fun yystreamifyReader' p readFn strm = let + val s = ref strm + fun iter(strm, n, accum) = + if n > 1024 then (String.implode (rev accum), strm) + else (case readFn strm + of NONE => (String.implode (rev accum), strm) + | SOME(c, strm') => iter (strm', n+1, c::accum)) + fun input() = let + val (data, strm) = iter(!s, 0, []) + in + s := strm; + data + end + in + yystreamify' p input + end + + fun yystreamifyInstream' p strm = yystreamify' p (fn ()=>TextIO.input strm) + + fun innerLex +(yystrm_, yyss_, yysm) = let + (* current start state *) + val yyss = ref yyss_ + fun YYBEGIN ss = (yyss := ss) + (* current input stream *) + val yystrm = ref yystrm_ + fun yysetStrm strm = yystrm := strm + fun yygetPos() = ULexBuffer.getpos (!yystrm) + fun yystreamify input = yystreamify' (yygetPos()) input + fun yystreamifyReader readFn strm = yystreamifyReader' (yygetPos()) readFn strm + fun yystreamifyInstream strm = yystreamifyInstream' (yygetPos()) strm + (* start position of token -- can be updated via skip() *) + val yystartPos = ref (yygetPos()) + (* get one char of input *) + fun yygetc strm = (case ULexBuffer.getu strm + of (SOME (0w10, s')) => + (AntlrStreamPos.markNewLine yysm (ULexBuffer.getpos strm); + SOME (0w10, s')) + | x => x) + fun yygetList getc strm = let + val get1 = UTF8.getu getc + fun iter (strm, accum) = + (case get1 strm + of NONE => rev accum + | SOME (w, strm') => iter (strm', w::accum) + (* end case *)) + in + iter (strm, []) + end + (* create yytext *) + fun yymksubstr(strm) = ULexBuffer.subtract (strm, !yystrm) + fun yymktext(strm) = Substring.string (yymksubstr strm) + fun yymkunicode(strm) = yygetList Substring.getc (yymksubstr strm) + open UserDeclarations + fun lex () = let + fun yystuck (yyNO_MATCH) = raise Fail "lexer reached a stuck state" + | yystuck (yyMATCH (strm, action, old)) = + action (strm, old) + val yypos = yygetPos() + fun yygetlineNo strm = AntlrStreamPos.lineNo yysm (ULexBuffer.getpos strm) + fun yygetcolNo strm = AntlrStreamPos.colNo yysm (ULexBuffer.getpos strm) + fun yyactsToMatches (strm, [], oldMatches) = oldMatches + | yyactsToMatches (strm, act::acts, oldMatches) = + yyMATCH (strm, act, yyactsToMatches (strm, acts, oldMatches)) + fun yygo actTable = + (fn (~1, _, oldMatches) => yystuck oldMatches + | (curState, strm, oldMatches) => let + val (transitions, finals') = Vector.sub (yytable, curState) + val finals = List.map (fn i => Vector.sub (actTable, i)) finals' + fun tryfinal() = + yystuck (yyactsToMatches (strm, finals, oldMatches)) + fun find (c, []) = NONE + | find (c, (c1, c2, s)::ts) = + if c1 <= c andalso c <= c2 then SOME s + else find (c, ts) + in case yygetc strm + of SOME(c, strm') => + (case find (c, transitions) + of NONE => tryfinal() + | SOME n => + yygo actTable + (n, strm', + yyactsToMatches (strm, finals, oldMatches))) + | NONE => tryfinal() + end) + val yylastwasnref = ref (ULexBuffer.lastWasNL (!yystrm)) + fun continue() = let val yylastwasn = !yylastwasnref in +let +fun yyAction0 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; NAME (Atom.atom yytext) + end +fun yyAction1 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; NUMBER yytext + end +fun yyAction2 (strm, lastMatch : yymatch) = (yystrm := strm; EQUALS) +fun yyAction3 (strm, lastMatch : yymatch) = (yystrm := strm; DOT) +fun yyAction4 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; STRINGLIT yytext + end +fun yyAction5 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; STRINGLIT yytext + end +fun yyAction6 (strm, lastMatch : yymatch) = (yystrm := strm; continue()) +fun yyAction7 (strm, lastMatch : yymatch) = (yystrm := strm; + (* error; invalid character *) continue()) +fun yyQ9 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction0(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx3B + then yyAction0(strm, yyNO_MATCH) + else if inp < 0wx3B + then if inp = 0wx2F + then yyAction0(strm, yyNO_MATCH) + else if inp < 0wx2F + then if inp <= 0wx2C + then yyAction0(strm, yyNO_MATCH) + else yyQ9(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else yyQ9(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp = 0wx5B + then yyAction0(strm, yyNO_MATCH) + else if inp < 0wx5B + then if inp <= 0wx40 + then yyAction0(strm, yyNO_MATCH) + else yyQ9(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp = 0wx61 + then yyQ9(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp < 0wx61 + then yyAction0(strm, yyNO_MATCH) + else if inp <= 0wx7A + then yyQ9(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else yyAction0(strm, yyNO_MATCH) + (* end case *)) +fun yyQ8 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction0(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx3B + then yyAction0(strm, yyNO_MATCH) + else if inp < 0wx3B + then if inp = 0wx2F + then yyAction0(strm, yyNO_MATCH) + else if inp < 0wx2F + then if inp <= 0wx2C + then yyAction0(strm, yyNO_MATCH) + else yyQ9(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else yyQ9(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp = 0wx5B + then yyAction0(strm, yyNO_MATCH) + else if inp < 0wx5B + then if inp <= 0wx40 + then yyAction0(strm, yyNO_MATCH) + else yyQ9(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp = 0wx61 + then yyQ9(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp < 0wx61 + then yyAction0(strm, yyNO_MATCH) + else if inp <= 0wx7A + then yyQ9(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else yyAction0(strm, yyNO_MATCH) + (* end case *)) +fun yyQ7 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction2(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction2(strm, yyNO_MATCH) + (* end case *)) +fun yyQ10 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction1(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx30 + then yyQ10(strm', yyMATCH(strm, yyAction1, yyNO_MATCH)) + else if inp < 0wx30 + then yyAction1(strm, yyNO_MATCH) + else if inp <= 0wx39 + then yyQ10(strm', yyMATCH(strm, yyAction1, yyNO_MATCH)) + else yyAction1(strm, yyNO_MATCH) + (* end case *)) +fun yyQ6 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction1(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx30 + then yyQ10(strm', yyMATCH(strm, yyAction1, yyNO_MATCH)) + else if inp < 0wx30 + then yyAction1(strm, yyNO_MATCH) + else if inp <= 0wx39 + then yyQ10(strm', yyMATCH(strm, yyAction1, yyNO_MATCH)) + else yyAction1(strm, yyNO_MATCH) + (* end case *)) +fun yyQ5 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction3(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction3(strm, yyNO_MATCH) + (* end case *)) +fun yyQ12 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction5(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction5(strm, yyNO_MATCH) + (* end case *)) +fun yyQ11 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx27 + then yyQ12(strm', lastMatch) + else yyQ11(strm', lastMatch) + (* end case *)) +fun yyQ4 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction7(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx27 + then yyQ12(strm', yyMATCH(strm, yyAction7, yyNO_MATCH)) + else yyQ11(strm', yyMATCH(strm, yyAction7, yyNO_MATCH)) + (* end case *)) +fun yyQ14 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction4(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction4(strm, yyNO_MATCH) + (* end case *)) +fun yyQ13 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx22 + then yyQ14(strm', lastMatch) + else yyQ13(strm', lastMatch) + (* end case *)) +fun yyQ3 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction7(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx22 + then yyQ14(strm', yyMATCH(strm, yyAction7, yyNO_MATCH)) + else yyQ13(strm', yyMATCH(strm, yyAction7, yyNO_MATCH)) + (* end case *)) +fun yyQ15 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction6(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wxD + then yyQ15(strm', yyMATCH(strm, yyAction6, yyNO_MATCH)) + else if inp < 0wxD + then if inp = 0wx9 + then yyQ15(strm', yyMATCH(strm, yyAction6, yyNO_MATCH)) + else if inp < 0wx9 + then yyAction6(strm, yyNO_MATCH) + else if inp <= 0wxA + then yyQ15(strm', yyMATCH(strm, yyAction6, yyNO_MATCH)) + else yyAction6(strm, yyNO_MATCH) + else if inp = 0wx20 + then yyQ15(strm', yyMATCH(strm, yyAction6, yyNO_MATCH)) + else yyAction6(strm, yyNO_MATCH) + (* end case *)) +fun yyQ2 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction6(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wxD + then yyQ15(strm', yyMATCH(strm, yyAction6, yyNO_MATCH)) + else if inp < 0wxD + then if inp = 0wx9 + then yyQ15(strm', yyMATCH(strm, yyAction6, yyNO_MATCH)) + else if inp < 0wx9 + then yyAction6(strm, yyNO_MATCH) + else if inp <= 0wxA + then yyQ15(strm', yyMATCH(strm, yyAction6, yyNO_MATCH)) + else yyAction6(strm, yyNO_MATCH) + else if inp = 0wx20 + then yyQ15(strm', yyMATCH(strm, yyAction6, yyNO_MATCH)) + else yyAction6(strm, yyNO_MATCH) + (* end case *)) +fun yyQ1 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction7(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction7(strm, yyNO_MATCH) + (* end case *)) +fun yyQ0 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if ULexBuffer.eof(!(yystrm)) + then let + val yycolno = ref(yygetcolNo(!(yystrm))) + val yylineno = ref(yygetlineNo(!(yystrm))) + in + (case (!(yyss)) + of _ => (UserDeclarations.eof()) + (* end case *)) + end + else yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx28 + then yyQ1(strm', lastMatch) + else if inp < 0wx28 + then if inp = 0wx20 + then yyQ2(strm', lastMatch) + else if inp < 0wx20 + then if inp = 0wxB + then yyQ1(strm', lastMatch) + else if inp < 0wxB + then if inp <= 0wx8 + then yyQ1(strm', lastMatch) + else yyQ2(strm', lastMatch) + else if inp = 0wxD + then yyQ2(strm', lastMatch) + else yyQ1(strm', lastMatch) + else if inp = 0wx23 + then yyQ1(strm', lastMatch) + else if inp < 0wx23 + then if inp = 0wx21 + then yyQ1(strm', lastMatch) + else yyQ3(strm', lastMatch) + else if inp = 0wx27 + then yyQ4(strm', lastMatch) + else yyQ1(strm', lastMatch) + else if inp = 0wx3D + then yyQ7(strm', lastMatch) + else if inp < 0wx3D + then if inp = 0wx2F + then yyQ1(strm', lastMatch) + else if inp < 0wx2F + then if inp = 0wx2E + then yyQ5(strm', lastMatch) + else yyQ1(strm', lastMatch) + else if inp <= 0wx39 + then yyQ6(strm', lastMatch) + else yyQ1(strm', lastMatch) + else if inp = 0wx5B + then yyQ1(strm', lastMatch) + else if inp < 0wx5B + then if inp <= 0wx40 + then yyQ1(strm', lastMatch) + else yyQ8(strm', lastMatch) + else if inp = 0wx61 + then yyQ8(strm', lastMatch) + else if inp < 0wx61 + then yyQ1(strm', lastMatch) + else if inp <= 0wx7A + then yyQ8(strm', lastMatch) + else yyQ1(strm', lastMatch) + (* end case *)) +in + (case (!(yyss)) + of INITIAL => yyQ0(!(yystrm), yyNO_MATCH) + (* end case *)) +end +end + and skip() = (yystartPos := yygetPos(); + yylastwasnref := ULexBuffer.lastWasNL (!yystrm); + continue()) + in (continue(), (!yystartPos, yygetPos()-1), !yystrm, !yyss) end + in + lex() + end + in + type pos = AntlrStreamPos.pos + type span = AntlrStreamPos.span + type tok = UserDeclarations.lex_result + + datatype prestrm = STRM of ULexBuffer.stream * + (yystart_state * tok * span * prestrm * yystart_state) option ref + type strm = (prestrm * yystart_state) + + fun lex sm +(STRM (yystrm, memo), ss) = (case !memo + of NONE => let + val (tok, span, yystrm', ss') = innerLex +(yystrm, ss, sm) + val strm' = STRM (yystrm', ref NONE); + in + memo := SOME (ss, tok, span, strm', ss'); + (tok, span, (strm', ss')) + end + | SOME (ss', tok, span, strm', ss'') => + if ss = ss' then + (tok, span, (strm', ss'')) + else ( + memo := NONE; + lex sm +(STRM (yystrm, memo), ss)) + (* end case *)) + + fun streamify input = (STRM (yystreamify' 0 input, ref NONE), INITIAL) + fun streamifyReader readFn strm = (STRM (yystreamifyReader' 0 readFn strm, ref NONE), + INITIAL) + fun streamifyInstream strm = (STRM (yystreamifyInstream' 0 strm, ref NONE), + INITIAL) + + fun getPos (STRM (strm, _), _) = ULexBuffer.getpos strm + + end +end + diff --git a/smlnj-lib/HTML4/html4-attrs.sml b/smlnj-lib/HTML4/html4-attrs.sml new file mode 100644 index 0000000..49de52c --- /dev/null +++ b/smlnj-lib/HTML4/html4-attrs.sml @@ -0,0 +1,274 @@ +(* html4-attrs.sml + * + * COPYRIGHT (c) 2014 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Helper functions for creating attribute-value pairs to include in + * HTML tags. We omit the deprecated attributes (commented out below), + * but include both the strict, loose, and frameset attributes. + * + * The source of the attribute list comes from + * + * http://www.w3.org/TR/html4/index/attributes.html + *) + +structure HTML4Attrs = + struct + + local + val a_abbr = Atom.atom "abbr" + val a_accept_charset = Atom.atom "accept-charset" + val a_accept = Atom.atom "accept" + val a_action = Atom.atom "action" + val a_align = Atom.atom "align" + val a_alink = Atom.atom "alink" + val a_alt = Atom.atom "alt" + val a_archive = Atom.atom "archive" + val a_axis = Atom.atom "axis" + val a_background = Atom.atom "background" + val a_bgcolor = Atom.atom "bgcolor" + val a_border = Atom.atom "border" + val a_cellpadding = Atom.atom "cellpadding" + val a_cellspacing = Atom.atom "cellspacing" + val a_char = Atom.atom "char" + val a_charoff = Atom.atom "charoff" + val a_charset = Atom.atom "charset" + val a_checked = Atom.atom "checked" + val a_cite = Atom.atom "cite" + val a_class = Atom.atom "class" + val a_classid = Atom.atom "classid" + val a_clear = Atom.atom "clear" + val a_code = Atom.atom "code" + val a_codebase = Atom.atom "codebase" + val a_codetype = Atom.atom "codetype" + val a_color = Atom.atom "color" + val a_cols = Atom.atom "cols" + val a_colspan = Atom.atom "colspan" + val a_compact = Atom.atom "compact" + val a_content = Atom.atom "content" + val a_coords = Atom.atom "coords" + val a_data = Atom.atom "data" + val a_datetime = Atom.atom "datetime" + val a_declare = Atom.atom "declare" + val a_defer = Atom.atom "defer" + val a_dir = Atom.atom "dir" + val a_disabled = Atom.atom "disabled" + val a_enctype = Atom.atom "enctype" + val a_face = Atom.atom "face" + val a_for = Atom.atom "for" + val a_frame = Atom.atom "frame" + val a_frameborder = Atom.atom "frameborder" + val a_headers = Atom.atom "headers" + val a_height = Atom.atom "height" + val a_href = Atom.atom "href" + val a_hreflang = Atom.atom "hreflang" + val a_hspace = Atom.atom "hspace" + val a_http_equiv = Atom.atom "http-equiv" + val a_id = Atom.atom "id" + val a_ismap = Atom.atom "ismap" + val a_label = Atom.atom "label" + val a_lang = Atom.atom "lang" + val a_language = Atom.atom "language" + val a_link = Atom.atom "link" + val a_longdesc = Atom.atom "longdesc" + val a_marginheight = Atom.atom "marginheight" + val a_marginwidth = Atom.atom "marginwidth" + val a_maxlength = Atom.atom "maxlength" + val a_media = Atom.atom "media" + val a_method = Atom.atom "method" + val a_multiple = Atom.atom "multiple" + val a_name = Atom.atom "name" + val a_nohref = Atom.atom "nohref" + val a_noresize = Atom.atom "noresize" + val a_noshade = Atom.atom "noshade" + val a_nowrap = Atom.atom "nowrap" + val a_object = Atom.atom "object" + val a_onblur = Atom.atom "onblur" + val a_onchange = Atom.atom "onchange" + val a_onclick = Atom.atom "onclick" + val a_ondblclick = Atom.atom "ondblclick" + val a_onfocus = Atom.atom "onfocus" + val a_onkeydown = Atom.atom "onkeydown" + val a_onkeypress = Atom.atom "onkeypress" + val a_onkeyup = Atom.atom "onkeyup" + val a_onload = Atom.atom "onload" + val a_onmousedown = Atom.atom "onmousedown" + val a_onmousemove = Atom.atom "onmousemove" + val a_onmouseout = Atom.atom "onmouseout" + val a_onmouseover = Atom.atom "onmouseover" + val a_onmouseup = Atom.atom "onmouseup" + val a_onreset = Atom.atom "onreset" + val a_onselect = Atom.atom "onselect" + val a_onsubmit = Atom.atom "onsubmit" + val a_onunload = Atom.atom "onunload" + val a_profile = Atom.atom "profile" + val a_prompt = Atom.atom "prompt" + val a_readonly = Atom.atom "readonly" + val a_rel = Atom.atom "rel" + val a_rev = Atom.atom "rev" + val a_rows = Atom.atom "rows" + val a_rowspan = Atom.atom "rowspan" + val a_rules = Atom.atom "rules" + val a_scheme = Atom.atom "scheme" + val a_scope = Atom.atom "scope" + val a_scrolling = Atom.atom "scrolling" + val a_selected = Atom.atom "selected" + val a_shape = Atom.atom "shape" + val a_size = Atom.atom "size" + val a_span = Atom.atom "span" + val a_src = Atom.atom "src" + val a_standby = Atom.atom "standby" + val a_start = Atom.atom "start" + val a_style = Atom.atom "style" + val a_summary = Atom.atom "summary" + val a_tabindex = Atom.atom "tabindex" + val a_target = Atom.atom "target" + val a_text = Atom.atom "text" + val a_title = Atom.atom "title" + val a_type = Atom.atom "type" + val a_usemap = Atom.atom "usemap" + val a_valign = Atom.atom "valign" + val a_value = Atom.atom "value" + val a_valuetype = Atom.atom "valuetype" + val a_version = Atom.atom "version" + val a_vlink = Atom.atom "vlink" + val a_vspace = Atom.atom "vspace" + val a_width = Atom.atom "width" + in + fun abbr (v : string) = (a_abbr, SOME v) + fun accept_charset (v : string) = (a_accept_charset, SOME v) + fun accept (v : string) = (a_accept, SOME v) + fun action (v : string) = (a_action, SOME v) + fun align (v : string) = (a_align, SOME v) + fun alink (v : string) = (a_alink, SOME v) + fun alt (v : string) = (a_alt, SOME v) + fun archive (v : string) = (a_archive, SOME v) + fun axis (v : string) = (a_axis, SOME v) +(* DEPRECATED + fun background (v : string) = (a_background, SOME v) (* deprecated *) + fun bgcolor (v : string) = (a_bgcolor, SOME v) (* deprecated *) +*) + fun border (v : string) = (a_border, SOME v) + fun cellpadding (v : string) = (a_cellpadding, SOME v) + fun cellspacing (v : string) = (a_cellspacing, SOME v) + fun char (v : string) = (a_char, SOME v) + fun charoff (v : string) = (a_charoff, SOME v) + fun charset (v : string) = (a_charset, SOME v) + fun checked (v : string) = (a_checked, SOME v) + fun cite (v : string) = (a_cite, SOME v) + fun class (v : string) = (a_class, SOME v) + fun classid (v : string) = (a_classid, SOME v) + fun clear (v : string) = (a_clear, SOME v) + fun code (v : string) = (a_code, SOME v) + fun codebase (v : string) = (a_codebase, SOME v) + fun codetype (v : string) = (a_codetype, SOME v) + fun color (v : string) = (a_color, SOME v) + fun cols (v : string) = (a_cols, SOME v) + fun colspan (v : string) = (a_colspan, SOME v) +(* DEPRECATED + val compact = (a_compact, NONE) (* deprecated *) +*) + fun content (v : string) = (a_content, SOME v) + fun coords (v : string) = (a_coords, SOME v) + fun data (v : string) = (a_data, SOME v) + fun datetime (v : string) = (a_datetime, SOME v) + val declare = (a_declare, NONE) + val defer = (a_defer, NONE) + fun dir (v : string) = (a_dir, SOME v) + val disabled = (a_disabled, NONE) + fun enctype (v : string) = (a_enctype, SOME v) +(* DEPRECATED + fun face (v : string) = (a_face, SOME v) (* deprecated *) +*) + fun for (v : string) = (a_for, SOME v) + fun frame (v : string) = (a_frame, SOME v) + fun frameborder (v : string) = (a_frameborder, SOME v) + fun headers (v : string) = (a_headers, SOME v) + fun height (v : string) = (a_height, SOME v) + fun href (v : string) = (a_href, SOME v) + fun hreflang (v : string) = (a_hreflang, SOME v) + fun hspace (v : string) = (a_hspace, SOME v) + fun http_equiv (v : string) = (a_http_equiv, SOME v) + fun id (v : string) = (a_id, SOME v) + fun ismap (v : string) = (a_ismap, SOME v) + fun label (v : string) = (a_label, SOME v) + fun lang (v : string) = (a_lang, SOME v) +(* DEPRECATED + fun language (v : string) = (a_language, SOME v) (* deprecated *) + fun link (v : string) = (a_link, SOME v) (* deprecated *) +*) + fun longdesc (v : string) = (a_longdesc, SOME v) + fun marginheight (v : string) = (a_marginheight, SOME v) + fun marginwidth (v : string) = (a_marginwidth, SOME v) + fun maxlength (v : string) = (a_maxlength, SOME v) + fun media (v : string) = (a_media, SOME v) + fun method (v : string) = (a_method, SOME v) + fun multiple (v : string) = (a_multiple, SOME v) + fun name (v : string) = (a_name, SOME v) + val nohref = (a_nohref, NONE) + val noresize = (a_noresize, NONE) +(* DEPRECATED + val noshade = (a_noshade, NONE) (* deprecated *) + val nowrap = (a_nowrap, NONE) (* deprecated *) + fun object (v : string) = (a_object, SOME v) (* deprecated *) +*) + fun onblur (v : string) = (a_onblur, SOME v) + fun onchange (v : string) = (a_onchange, SOME v) + fun onclick (v : string) = (a_onclick, SOME v) + fun ondblclick (v : string) = (a_ondblclick, SOME v) + fun onfocus (v : string) = (a_onfocus, SOME v) + fun onkeydown (v : string) = (a_onkeydown, SOME v) + fun onkeypress (v : string) = (a_onkeypress, SOME v) + fun onkeyup (v : string) = (a_onkeyup, SOME v) + fun onload (v : string) = (a_onload, SOME v) + fun onmousedown (v : string) = (a_onmousedown, SOME v) + fun onmousemove (v : string) = (a_onmousemove, SOME v) + fun onmouseout (v : string) = (a_onmouseout, SOME v) + fun onmouseover (v : string) = (a_onmouseover, SOME v) + fun onmouseup (v : string) = (a_onmouseup, SOME v) + fun onreset (v : string) = (a_onreset, SOME v) + fun onselect (v : string) = (a_onselect, SOME v) + fun onsubmit (v : string) = (a_onsubmit, SOME v) + fun onunload (v : string) = (a_onunload, SOME v) + fun profile (v : string) = (a_profile, SOME v) +(* DEPRECATED + fun prompt (v : string) = (a_prompt, SOME v) (* deprecated *) +*) + val readonly = (a_readonly, NONE) + fun rel (v : string) = (a_rel, SOME v) + fun rev (v : string) = (a_rev, SOME v) + fun rows (v : string) = (a_rows, SOME v) + fun rowspan (v : string) = (a_rowspan, SOME v) + fun rules (v : string) = (a_rules, SOME v) + fun scheme (v : string) = (a_scheme, SOME v) + fun scope (v : string) = (a_scope, SOME v) + fun scrolling (v : string) = (a_scrolling, SOME v) + val selected = (a_selected, NONE) + fun shape (v : string) = (a_shape, SOME v) + fun size (v : string) = (a_size, SOME v) + fun span (v : string) = (a_span, SOME v) + fun src (v : string) = (a_src, SOME v) + fun standby (v : string) = (a_standby, SOME v) +(* DEPRECATED + fun start (v : string) = (a_start, SOME v) (* deprecated *) +*) + fun style (v : string) = (a_style, SOME v) + fun summary (v : string) = (a_summary, SOME v) + fun tabindex (v : string) = (a_tabindex, SOME v) + fun target (v : string) = (a_target, SOME v) + fun text (v : string) = (a_text, SOME v) + fun title (v : string) = (a_title, SOME v) + fun type' (v : string) = (a_type, SOME v) + fun usemap (v : string) = (a_usemap, SOME v) + fun valign (v : string) = (a_valign, SOME v) + fun value (v : string) = (a_value, SOME v) + fun valuetype (v : string) = (a_valuetype, SOME v) + fun version (v : string) = (a_version, SOME v) +(* DEPRECATED + fun vlink (v : string) = (a_vlink, SOME v) (* deprecated *) +*) + fun vspace (v : string) = (a_vspace, SOME v) + fun width (v : string) = (a_width, SOME v) + end (* local *) + + end (* HTML4Attrs *) diff --git a/smlnj-lib/HTML4/html4-entities.sml b/smlnj-lib/HTML4/html4-entities.sml new file mode 100644 index 0000000..9279117 --- /dev/null +++ b/smlnj-lib/HTML4/html4-entities.sml @@ -0,0 +1,25 @@ +(* html4-entities.sml + * + * COPYRIGHT (c) 2014 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +structure HTML4Entities = + struct + + val nbsp = HTML4.ENTITY(Atom.atom "nbsp") + val lt = HTML4.ENTITY(Atom.atom "lt") + val gt = HTML4.ENTITY(Atom.atom "gt") + val amp = HTML4.ENTITY(Atom.atom "amp") + val quot = HTML4.ENTITY(Atom.atom "quot") + val cent = HTML4.ENTITY(Atom.atom "cent") + val pound = HTML4.ENTITY(Atom.atom "pound") + val yen = HTML4.ENTITY(Atom.atom "yen") + val euro = HTML4.ENTITY(Atom.atom "euro") + val copy = HTML4.ENTITY(Atom.atom "copy") + val reg = HTML4.ENTITY(Atom.atom "reg") + +(* TODO: add the full set of HTML entities *) + + end + diff --git a/smlnj-lib/HTML4/html4-lex-test-toks.sml b/smlnj-lib/HTML4/html4-lex-test-toks.sml new file mode 100644 index 0000000..5886bca --- /dev/null +++ b/smlnj-lib/HTML4/html4-lex-test-toks.sml @@ -0,0 +1,484 @@ +(* html4-lex-test-toks.sml + * + * COPYRIGHT (c) 2014 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +structure HTML4Tokens = struct + +datatype token = EOF + | OPENTAG of Atom.atom * HTML4Utils.tag_payload + | CLOSETAG of Atom.atom + | COMMENT of string + | PCDATA of string + | DOCTYPE of string + | CHAR_REF of IntInf.int + | ENTITY_REF of Atom.atom + | XML_PROCESSING of string + (* HTML 4 element tokens. *) + | STARTA of HTML4Utils.tag_payload + | ENDA + | STARTABBR of HTML4Utils.tag_payload + | ENDABBR + | STARTACRONYM of HTML4Utils.tag_payload + | ENDACRONYM + | STARTADDRESS of HTML4Utils.tag_payload + | ENDADDRESS + | STARTAPPLET of HTML4Utils.tag_payload + | ENDAPPLET + | STARTAREA of HTML4Utils.tag_payload + (* No END tag for AREA element. *) + | STARTB of HTML4Utils.tag_payload + | ENDB + | STARTBASE of HTML4Utils.tag_payload + (* No END tag for BASE element. *) + | STARTBASEFONT of HTML4Utils.tag_payload + (* No END tag for BASEFONT element. *) + | STARTBDO of HTML4Utils.tag_payload + | ENDBDO + | STARTBIG of HTML4Utils.tag_payload + | ENDBIG + | STARTBLOCKQUOTE of HTML4Utils.tag_payload + | ENDBLOCKQUOTE + | STARTBODY of HTML4Utils.tag_payload + | ENDBODY + | STARTBR of HTML4Utils.tag_payload + (* No END tag for BR element. *) + | STARTBUTTON of HTML4Utils.tag_payload + | ENDBUTTON + | STARTCAPTION of HTML4Utils.tag_payload + | ENDCAPTION + | STARTCENTER of HTML4Utils.tag_payload + | ENDCENTER + | STARTCITE of HTML4Utils.tag_payload + | ENDCITE + | STARTCODE of HTML4Utils.tag_payload + | ENDCODE + | STARTCOL of HTML4Utils.tag_payload + (* No END tag for COL element. *) + | STARTCOLGROUP of HTML4Utils.tag_payload + | ENDCOLGROUP + | STARTDD of HTML4Utils.tag_payload + | ENDDD + | STARTDEL of HTML4Utils.tag_payload + | ENDDEL + | STARTDFN of HTML4Utils.tag_payload + | ENDDFN + | STARTDIR of HTML4Utils.tag_payload + | ENDDIR + | STARTDIV of HTML4Utils.tag_payload + | ENDDIV + | STARTDL of HTML4Utils.tag_payload + | ENDDL + | STARTDT of HTML4Utils.tag_payload + | ENDDT + | STARTEM of HTML4Utils.tag_payload + | ENDEM + | STARTFIELDSET of HTML4Utils.tag_payload + | ENDFIELDSET + | STARTFONT of HTML4Utils.tag_payload + | ENDFONT + | STARTFORM of HTML4Utils.tag_payload + | ENDFORM + | STARTFRAME of HTML4Utils.tag_payload + (* No END tag for FRAME element. *) + | STARTFRAMESET of HTML4Utils.tag_payload + | ENDFRAMESET + | STARTH1 of HTML4Utils.tag_payload + | ENDH1 + | STARTH2 of HTML4Utils.tag_payload + | ENDH2 + | STARTH3 of HTML4Utils.tag_payload + | ENDH3 + | STARTH4 of HTML4Utils.tag_payload + | ENDH4 + | STARTH5 of HTML4Utils.tag_payload + | ENDH5 + | STARTH6 of HTML4Utils.tag_payload + | ENDH6 + | STARTHEAD of HTML4Utils.tag_payload + | ENDHEAD + | STARTHR of HTML4Utils.tag_payload + (* No END tag for HR element. *) + | STARTHTML of HTML4Utils.tag_payload + | ENDHTML + | STARTI of HTML4Utils.tag_payload + | ENDI + | STARTIFRAME of HTML4Utils.tag_payload + | ENDIFRAME + | STARTIMG of HTML4Utils.tag_payload + (* No END tag for IMG element. *) + | STARTINPUT of HTML4Utils.tag_payload + (* No END tag for INPUT element. *) + | STARTINS of HTML4Utils.tag_payload + | ENDINS + | STARTISINDEX of HTML4Utils.tag_payload + (* No END tag for ISINDEX element. *) + | STARTKBD of HTML4Utils.tag_payload + | ENDKBD + | STARTLABEL of HTML4Utils.tag_payload + | ENDLABEL + | STARTLEGEND of HTML4Utils.tag_payload + | ENDLEGEND + | STARTLI of HTML4Utils.tag_payload + | ENDLI + | STARTLINK of HTML4Utils.tag_payload + (* No END tag for LINK element. *) + | STARTMAP of HTML4Utils.tag_payload + | ENDMAP + | STARTMENU of HTML4Utils.tag_payload + | ENDMENU + | STARTMETA of HTML4Utils.tag_payload + (* No END tag for META element. *) + | STARTNOFRAMES of HTML4Utils.tag_payload + | ENDNOFRAMES + | STARTNOSCRIPT of HTML4Utils.tag_payload + | ENDNOSCRIPT + | STARTOBJECT of HTML4Utils.tag_payload + | ENDOBJECT + | STARTOL of HTML4Utils.tag_payload + | ENDOL + | STARTOPTGROUP of HTML4Utils.tag_payload + | ENDOPTGROUP + | STARTOPTION of HTML4Utils.tag_payload + | ENDOPTION + | STARTP of HTML4Utils.tag_payload + | ENDP + | STARTPARAM of HTML4Utils.tag_payload + (* No END tag for PARAM element. *) + | STARTPRE of HTML4Utils.tag_payload + | ENDPRE + | STARTQ of HTML4Utils.tag_payload + | ENDQ + | STARTS of HTML4Utils.tag_payload + | ENDS + | STARTSAMP of HTML4Utils.tag_payload + | ENDSAMP + | STARTSCRIPT of HTML4Utils.tag_payload + | ENDSCRIPT + | STARTSELECT of HTML4Utils.tag_payload + | ENDSELECT + | STARTSMALL of HTML4Utils.tag_payload + | ENDSMALL + | STARTSPAN of HTML4Utils.tag_payload + | ENDSPAN + | STARTSTRIKE of HTML4Utils.tag_payload + | ENDSTRIKE + | STARTSTRONG of HTML4Utils.tag_payload + | ENDSTRONG + | STARTSTYLE of HTML4Utils.tag_payload + | ENDSTYLE + | STARTSUB of HTML4Utils.tag_payload + | ENDSUB + | STARTSUP of HTML4Utils.tag_payload + | ENDSUP + | STARTTABLE of HTML4Utils.tag_payload + | ENDTABLE + | STARTTBODY of HTML4Utils.tag_payload + | ENDTBODY + | STARTTD of HTML4Utils.tag_payload + | ENDTD + | STARTTEXTAREA of HTML4Utils.tag_payload + | ENDTEXTAREA + | STARTTFOOT of HTML4Utils.tag_payload + | ENDTFOOT + | STARTTH of HTML4Utils.tag_payload + | ENDTH + | STARTTHEAD of HTML4Utils.tag_payload + | ENDTHEAD + | STARTTITLE of HTML4Utils.tag_payload + | ENDTITLE + | STARTTR of HTML4Utils.tag_payload + | ENDTR + | STARTTT of HTML4Utils.tag_payload + | ENDTT + | STARTU of HTML4Utils.tag_payload + | ENDU + | STARTUL of HTML4Utils.tag_payload + | ENDUL + | STARTVAR of HTML4Utils.tag_payload + | ENDVAR + +fun tokToString EOF = "EOF" + | tokToString (OPENTAG (tagname, tagdata)) = + String.concat ["OPENTAG ", Atom.toString tagname, " ", + HTML4Utils.payloadToStr tagdata] + | tokToString (CLOSETAG tagname) = "CLOSETAG " ^ (Atom.toString tagname) + | tokToString (DOCTYPE docdata) = "DOCTYPE " ^ docdata + | tokToString (PCDATA pcdata) = ("PCDATA \"" ^ (String.toString pcdata) + ^ "\"") + | tokToString (COMMENT comment) = "COMMENT " ^ comment + | tokToString (CHAR_REF refint) = "CHAR REF " ^ (IntInf.toString refint) + | tokToString (ENTITY_REF refatom) = "ENTITY REF " ^ (Atom.toString refatom) + | tokToString (XML_PROCESSING directive) = "XML DIRECTIVE " ^ directive + + (* Automatically generated via helper.py: *) + | tokToString (STARTA payload) = + "STARTA " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDA = "ENDA" + | tokToString (STARTABBR payload) = + "STARTABBR " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDABBR = "ENDABBR" + | tokToString (STARTACRONYM payload) = + "STARTACRONYM " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDACRONYM = "ENDACRONYM" + | tokToString (STARTADDRESS payload) = + "STARTADDRESS " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDADDRESS = "ENDADDRESS" + | tokToString (STARTAREA payload) = + "STARTAREA " ^ (HTML4Utils.payloadToStr payload) + | tokToString (STARTB payload) = + "STARTB " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDB = "ENDB" + | tokToString (STARTBASE payload) = + "STARTBASE " ^ (HTML4Utils.payloadToStr payload) + | tokToString (STARTBDO payload) = + "STARTBDO " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDBDO = "ENDBDO" + | tokToString (STARTBIG payload) = + "STARTBIG " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDBIG = "ENDBIG" + | tokToString (STARTBLOCKQUOTE payload) = + "STARTBLOCKQUOTE " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDBLOCKQUOTE = "ENDBLOCKQUOTE" + | tokToString (STARTBODY payload) = + "STARTBODY " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDBODY = "ENDBODY" + | tokToString (STARTBR payload) = + "STARTBR " ^ (HTML4Utils.payloadToStr payload) + | tokToString (STARTBUTTON payload) = + "STARTBUTTON " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDBUTTON = "ENDBUTTON" + | tokToString (STARTCAPTION payload) = + "STARTCAPTION " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDCAPTION = "ENDCAPTION" + | tokToString (STARTCITE payload) = + "STARTCITE " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDCITE = "ENDCITE" + | tokToString (STARTCODE payload) = + "STARTCODE " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDCODE = "ENDCODE" + | tokToString (STARTCOL payload) = + "STARTCOL " ^ (HTML4Utils.payloadToStr payload) + | tokToString (STARTCOLGROUP payload) = + "STARTCOLGROUP " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDCOLGROUP = "ENDCOLGROUP" + | tokToString (STARTDD payload) = + "STARTDD " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDDD = "ENDDD" + | tokToString (STARTDEL payload) = + "STARTDEL " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDDEL = "ENDDEL" + | tokToString (STARTDFN payload) = + "STARTDFN " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDDFN = "ENDDFN" + | tokToString (STARTDIV payload) = + "STARTDIV " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDDIV = "ENDDIV" + | tokToString (STARTDL payload) = + "STARTDL " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDDL = "ENDDL" + | tokToString (STARTDT payload) = + "STARTDT " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDDT = "ENDDT" + | tokToString (STARTEM payload) = + "STARTEM " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDEM = "ENDEM" + | tokToString (STARTFIELDSET payload) = + "STARTFIELDSET " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDFIELDSET = "ENDFIELDSET" + | tokToString (STARTFORM payload) = + "STARTFORM " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDFORM = "ENDFORM" + | tokToString (STARTH1 payload) = + "STARTH1 " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDH1 = "ENDH1" + | tokToString (STARTH2 payload) = + "STARTH2 " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDH2 = "ENDH2" + | tokToString (STARTH3 payload) = + "STARTH3 " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDH3 = "ENDH3" + | tokToString (STARTH4 payload) = + "STARTH4 " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDH4 = "ENDH4" + | tokToString (STARTH5 payload) = + "STARTH5 " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDH5 = "ENDH5" + | tokToString (STARTH6 payload) = + "STARTH6 " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDH6 = "ENDH6" + | tokToString (STARTHEAD payload) = + "STARTHEAD " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDHEAD = "ENDHEAD" + | tokToString (STARTHR payload) = + "STARTHR " ^ (HTML4Utils.payloadToStr payload) + | tokToString (STARTHTML payload) = + "STARTHTML " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDHTML = "ENDHTML" + | tokToString (STARTI payload) = + "STARTI " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDI = "ENDI" + | tokToString (STARTIMG payload) = + "STARTIMG " ^ (HTML4Utils.payloadToStr payload) + | tokToString (STARTINPUT payload) = + "STARTINPUT " ^ (HTML4Utils.payloadToStr payload) + | tokToString (STARTINS payload) = + "STARTINS " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDINS = "ENDINS" + | tokToString (STARTKBD payload) = + "STARTKBD " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDKBD = "ENDKBD" + | tokToString (STARTLABEL payload) = + "STARTLABEL " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDLABEL = "ENDLABEL" + | tokToString (STARTLEGEND payload) = + "STARTLEGEND " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDLEGEND = "ENDLEGEND" + | tokToString (STARTLI payload) = + "STARTLI " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDLI = "ENDLI" + | tokToString (STARTLINK payload) = + "STARTLINK " ^ (HTML4Utils.payloadToStr payload) + | tokToString (STARTMAP payload) = + "STARTMAP " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDMAP = "ENDMAP" + | tokToString (STARTMETA payload) = + "STARTMETA " ^ (HTML4Utils.payloadToStr payload) + | tokToString (STARTNOSCRIPT payload) = + "STARTNOSCRIPT " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDNOSCRIPT = "ENDNOSCRIPT" + | tokToString (STARTOBJECT payload) = + "STARTOBJECT " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDOBJECT = "ENDOBJECT" + | tokToString (STARTOL payload) = + "STARTOL " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDOL = "ENDOL" + | tokToString (STARTOPTGROUP payload) = + "STARTOPTGROUP " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDOPTGROUP = "ENDOPTGROUP" + | tokToString (STARTOPTION payload) = + "STARTOPTION " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDOPTION = "ENDOPTION" + | tokToString (STARTP payload) = + "STARTP " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDP = "ENDP" + | tokToString (STARTPARAM payload) = + "STARTPARAM " ^ (HTML4Utils.payloadToStr payload) + | tokToString (STARTPRE payload) = + "STARTPRE " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDPRE = "ENDPRE" + | tokToString (STARTQ payload) = + "STARTQ " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDQ = "ENDQ" + | tokToString (STARTSAMP payload) = + "STARTSAMP " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDSAMP = "ENDSAMP" + | tokToString (STARTSCRIPT payload) = + "STARTSCRIPT " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDSCRIPT = "ENDSCRIPT" + | tokToString (STARTSELECT payload) = + "STARTSELECT " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDSELECT = "ENDSELECT" + | tokToString (STARTSMALL payload) = + "STARTSMALL " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDSMALL = "ENDSMALL" + | tokToString (STARTSPAN payload) = + "STARTSPAN " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDSPAN = "ENDSPAN" + | tokToString (STARTSTRONG payload) = + "STARTSTRONG " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDSTRONG = "ENDSTRONG" + | tokToString (STARTSTYLE payload) = + "STARTSTYLE " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDSTYLE = "ENDSTYLE" + | tokToString (STARTSUB payload) = + "STARTSUB " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDSUB = "ENDSUB" + | tokToString (STARTSUP payload) = + "STARTSUP " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDSUP = "ENDSUP" + | tokToString (STARTTABLE payload) = + "STARTTABLE " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDTABLE = "ENDTABLE" + | tokToString (STARTTBODY payload) = + "STARTTBODY " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDTBODY = "ENDTBODY" + | tokToString (STARTTD payload) = + "STARTTD " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDTD = "ENDTD" + | tokToString (STARTTEXTAREA payload) = + "STARTTEXTAREA " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDTEXTAREA = "ENDTEXTAREA" + | tokToString (STARTTFOOT payload) = + "STARTTFOOT " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDTFOOT = "ENDTFOOT" + | tokToString (STARTTH payload) = + "STARTTH " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDTH = "ENDTH" + | tokToString (STARTTHEAD payload) = + "STARTTHEAD " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDTHEAD = "ENDTHEAD" + | tokToString (STARTTITLE payload) = + "STARTTITLE " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDTITLE = "ENDTITLE" + | tokToString (STARTTR payload) = + "STARTTR " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDTR = "ENDTR" + | tokToString (STARTTT payload) = + "STARTTT " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDTT = "ENDTT" + | tokToString (STARTUL payload) = + "STARTUL " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDUL = "ENDUL" + | tokToString (STARTVAR payload) = + "STARTVAR " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDVAR = "ENDVAR" + | tokToString (STARTAPPLET payload) = + "STARTAPPLET " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDAPPLET = "ENDAPPLET" + | tokToString (STARTBASEFONT payload) = + "STARTBASEFONT " ^ (HTML4Utils.payloadToStr payload) + | tokToString (STARTCENTER payload) = + "STARTCENTER " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDCENTER = "ENDCENTER" + | tokToString (STARTDIR payload) = + "STARTDIR " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDDIR = "ENDDIR" + | tokToString (STARTFONT payload) = + "STARTFONT " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDFONT = "ENDFONT" + | tokToString (STARTIFRAME payload) = + "STARTIFRAME " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDIFRAME = "ENDIFRAME" + | tokToString (STARTISINDEX payload) = + "STARTISINDEX " ^ (HTML4Utils.payloadToStr payload) + | tokToString (STARTMENU payload) = + "STARTMENU " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDMENU = "ENDMENU" + | tokToString (STARTS payload) = + "STARTS " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDS = "ENDS" + | tokToString (STARTSTRIKE payload) = + "STARTSTRIKE " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDSTRIKE = "ENDSTRIKE" + | tokToString (STARTU payload) = + "STARTU " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDU = "ENDU" + | tokToString (STARTFRAME payload) = + "STARTFRAME " ^ (HTML4Utils.payloadToStr payload) + | tokToString (STARTFRAMESET payload) = + "STARTFRAMESET " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDFRAMESET = "ENDFRAMESET" + | tokToString (STARTNOFRAMES payload) = + "STARTNOFRAMES " ^ (HTML4Utils.payloadToStr payload) + | tokToString ENDNOFRAMES = "ENDNOFRAMES" + + (* Should cause a "match redundant" error if code is all in synch: *) + (* | tokToString _ = "???" *) + +end + +(* ______________________________________________________________________ + End of html4-lex-test-toks.sml + ______________________________________________________________________ *) diff --git a/smlnj-lib/HTML4/html4-lex-test.cm b/smlnj-lib/HTML4/html4-lex-test.cm new file mode 100644 index 0000000..a915ada --- /dev/null +++ b/smlnj-lib/HTML4/html4-lex-test.cm @@ -0,0 +1,22 @@ +(* html4-lex-test.cm + * + * COPYRIGHT (c) 2014 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +Group is + $/basis.cm + $/smlnj-lib.cm + $/ml-lpt-lib.cm + + html4-utils.sml + html4-lex-test-toks.sml + html4-attr.l : ml-ulex + html4-attr.g : ml-antlr + html4-token-utils.sml + html4.l : ml-ulex + html4-lex-test.sml + +(* ______________________________________________________________________ + End of html4-lex-test.cm + ______________________________________________________________________ *) diff --git a/smlnj-lib/HTML4/html4-lex-test.sml b/smlnj-lib/HTML4/html4-lex-test.sml new file mode 100644 index 0000000..8e74dcc --- /dev/null +++ b/smlnj-lib/HTML4/html4-lex-test.sml @@ -0,0 +1,44 @@ +(* html4-lex-test.sml + * + * COPYRIGHT (c) 2014 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +structure Test = struct + +open HTML4Tokens + +fun handle_tok tok = print ((tokToString tok) ^ "\n") + +fun handle_toks (source_map, lex_stream) = + let val (tok, span, lex_stream') = HTML4Lexer.lex source_map lex_stream + handle ex => + (print ("Exception at " ^ (Int.toString (HTML4Lexer.getPos + lex_stream)) ^ + "\n"); raise ex) + in + handle_tok tok; + (* XXX Getting some weird equality type complaint if I use the + equality operator here... *) + case tok of EOF => () | _ => handle_toks(source_map, lex_stream') + end + +fun handle_file file_name = + let + val source_map = AntlrStreamPos.mkSourcemap () + val in_strm = TextIO.openIn file_name + val lex_strm = HTML4Lexer.streamifyInstream in_strm + val _ = handle_toks(source_map, lex_strm) + handle ex => (TextIO.closeIn in_strm; + raise ex) + in + TextIO.closeIn in_strm + end + +fun main (_, args) = (List.app handle_file args; OS.Process.success) + +end + +(* ______________________________________________________________________ + End of html4-lex-test.sml + ______________________________________________________________________ *) diff --git a/smlnj-lib/HTML4/html4-lib.cm b/smlnj-lib/HTML4/html4-lib.cm new file mode 100644 index 0000000..7736208 --- /dev/null +++ b/smlnj-lib/HTML4/html4-lib.cm @@ -0,0 +1,49 @@ +(* html4-lib.cm + * + * COPYRIGHT (c) 2014 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * CM file for the HTML4 library. + *) + +Library + + signature HTML4 + structure HTML4 + structure HTML4Attrs + structure HTML4Entities + structure HTML4Parser + structure HTML4Tokens + structure HTML4TokenUtils + structure HTML4Utils + structure HTML4Print + +is + $/basis.cm + $/smlnj-lib.cm + $/ml-lpt-lib.cm + +#if defined(NO_PLUGINS) + html4.g.sml + html4.l.sml + html4-attr.g.sml + html4-attr.l.sml +#else + html4.g : ml-antlr + html4.l : ml-ulex + html4-attr.g : ml-antlr + html4-attr.l : ml-ulex +#endif + + html4.sig + html4.sml + html4-attrs.sml + html4-entities.sml + html4-parser.sml + html4-print.sml + html4-token-utils.sml + html4-utils.sml + +(* ______________________________________________________________________ + End of html4-lib.cm + ______________________________________________________________________ *) diff --git a/smlnj-lib/HTML4/html4-parser.sml b/smlnj-lib/HTML4/html4-parser.sml new file mode 100644 index 0000000..7e7a36b --- /dev/null +++ b/smlnj-lib/HTML4/html4-parser.sml @@ -0,0 +1,707 @@ +(* html4-parser.sml + * + * COPYRIGHT (c) 2014 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Defines the HTML4Parser structure, which defunctorizes the + * automatically generated parser, defines an additional set of + * utilities for working with the parser. + *) + +structure HTML4Parser = struct + +structure H4 = HTML4 +structure H4U = HTML4Utils +structure H4T = HTML4Tokens +structure H4TU = HTML4TokenUtils +structure AtomMap = H4TU.AtomMap + +local + structure TheParser = HTML4ParseFn(HTML4Lexer) +in +open TheParser +end + +fun parseStream inStream = + let + val sourceMap = AntlrStreamPos.mkSourcemap () + val lex = HTML4Lexer.lex sourceMap + val stream = HTML4Lexer.streamifyInstream inStream + val (result, _, _) = parse lex stream + in + result + end + +exception IllFormedHTMLParseStream of + H4T.token H4U.parsevisitation H4U.stream * string option + +val tokVisitationToString = H4U.visitationToString H4T.toString + +val strVisitationToString = H4U.visitationToString (fn x : string => x) + +val visitationSimilar = let + fun tokSimilarToString (tok1, tokStr2) = (H4T.toString tok1) = tokStr2 +in H4U.visitationSame tokSimilarToString end + +fun expect expectedVisit pstrm = + let val pstrmHd = H4U.stream_hd pstrm + handle _ => H4U.VisitT H4T.EOF + fun expectationError () = + let val msg = String.concat + ["Expected ", + strVisitationToString expectedVisit, ", got ", + tokVisitationToString pstrmHd, " instead."] + in IllFormedHTMLParseStream(pstrm, SOME msg) end + in + if visitationSimilar(pstrmHd, expectedVisit) then H4U.stream_tl pstrm + else raise (expectationError()) + end + +fun expectEnterNT nt = expect (H4U.EnterNT (Atom.atom nt)) + +fun expectExitNT nt = expect (H4U.ExitNT (Atom.atom nt)) + +fun expectVisitT tokStr = expect (H4U.VisitT tokStr) + +fun expectEnterNTInDomain ntMap pstrm = let + val pstrmHd = H4U.stream_hd pstrm + handle _ => H4U.VisitT H4T.EOF + fun expectationError () = let + val expectedNTs = String.concatWith ", " (map Atom.toString (AtomMap.listKeys ntMap)) + val msg = String.concat [ + "Expected entry of one of ", expectedNTs, "; got ", + tokVisitationToString pstrmHd, " instead." + ] + in + IllFormedHTMLParseStream(pstrm, SOME msg) + end + in + case pstrmHd + of H4U.EnterNT ntAtom => + if AtomMap.inDomain (ntMap, ntAtom) + then AtomMap.lookup (ntMap, ntAtom) + else raise (expectationError ()) + | _ => raise (expectationError ()) + (* end case *) + end + +fun optional optVisit (strm as H4U.StreamCons(strmHd, _)) = + if visitationSimilar(strmHd, optVisit) + then (H4U.stream_tl strm, SOME strmHd) + else (strm, NONE) + | optional _ _ = (H4U.StreamNil, NONE) + +fun optVisitTok tokName strm = + case optional (H4U.VisitT tokName) strm + of (strm', SOME (H4U.VisitT tok)) => (strm', SOME tok) + | _ => (strm, NONE) + +fun isEnterNT nt pstrm = (expectEnterNT nt pstrm; true) + handle IllFormedHTMLParseStream _ => false + +fun isExitNT nt pstrm = (expectExitNT nt pstrm; true) + handle IllFormedHTMLParseStream _ => false + +fun isVisitT tokStr pstrm = (expectVisitT tokStr pstrm; true) + handle IllFormedHTMLParseStream _ => false + +fun isEither (is1, is2) pstrm = (is1 pstrm) orelse (is2 pstrm) + +fun streamSkipUntil _ H4U.StreamNil = H4U.StreamNil + | streamSkipUntil pred (strm as H4U.StreamCons (strmHd, _)) = + if pred strmHd then strm else streamSkipUntil pred (H4U.stream_tl strm) + +fun streamSkipWhile pred = streamSkipUntil (fn strmHd => not (pred strmHd)) + +fun streamConsumeUntil consumer until strm = + let fun streamConsumeUntil' strm' acc = + if until strm' then (strm', rev acc) + else let val (strm'', consumerVal) = consumer strm' + in streamConsumeUntil' strm'' (consumerVal :: acc) end + in streamConsumeUntil' strm [] end + +fun tokIsSpace (H4T.PCDATA pcstr) = CharVector.all Char.isSpace pcstr + | tokIsSpace _ = false + +fun tokIsComment (H4T.COMMENT _) = true + | tokIsComment _ = false + +fun visitationIsSpace (H4U.VisitT tok) = tokIsSpace tok + | visitationIsSpace _ = false + +(* XXX I don't like the solution of skipping both whitespace and +comments, but I don't know how to munge CDATA and comments into block +elements, given the current HTML 4 data structure (I could add these, +but it would break the "purity" of the existing data type). *) + +fun visitationIsSpaceOrComment (H4U.VisitT tok) = (tokIsSpace tok) orelse + (tokIsComment tok) + | visitationIsSpaceOrComment _ = false + +val skipWhitespace = streamSkipWhile visitationIsSpace + +val skipWhitespaceOrComment = streamSkipWhile visitationIsSpaceOrComment + +fun tokIsCdata (H4T.PCDATA _) = true + | tokIsCdata (H4T.ENTITY_REF _) = true + | tokIsCdata (H4T.CHAR_REF _) = true + | tokIsCdata (H4T.COMMENT _) = true + | tokIsCdata _ = false + +fun isNotCdata (H4U.StreamCons(H4U.VisitT tok, _)) = not (tokIsCdata tok) + | isNotCdata _ = true + +exception InvalidToken of H4T.token + +fun tokToCdata (H4T.PCDATA str) = H4.PCDATA str + | tokToCdata (H4T.ENTITY_REF ent) = H4.ENTITY ent + | tokToCdata (H4T.CHAR_REF chr) = H4.CHAR chr + | tokToCdata (H4T.COMMENT cmt) = H4.COMMENT cmt + | tokToCdata tok = raise (InvalidToken tok) + +(*+DEBUG*) +fun tokToString (H4T.DOCTYPE doctypeStr) = doctypeStr + | tokToString (H4T.PCDATA dataStr) = ("PCDATA \"" ^ (String.toString dataStr) + ^ "\"") + | tokToString (H4T.COMMENT commentStr) = commentStr + | tokToString tok = H4TU.tokToString tok + +fun printVisitationStream strm = + print ((H4U.visitationToString tokToString (H4U.stream_hd strm)) ^ "\n") + +fun printIllFormedErr (IllFormedHTMLParseStream (strm, msgOpt)) = ( + print "Error in parse stream at: "; + printVisitationStream strm; + (case msgOpt of SOME msg => print(concat["Message: ", msg, "\n"]) | _ => ())) + | printIllFormedErr exn = raise exn +(*-DEBUG*) + +fun getAttrsFromStream (H4U.StreamCons (H4U.VisitT tok, _)) = + (case H4TU.tokGetAttrs tok + of SOME attrs => attrs + | NONE => []) + | getAttrsFromStream _ = [] + +fun html0aryFromParseStream tag ctor pstrm = + let val pstrm1 = expectEnterNT tag pstrm + val pstrm2 = expectVisitT ("START" ^ tag) pstrm1 + val attrs = getAttrsFromStream pstrm1 + val pstrm3 = expectExitNT tag (skipWhitespaceOrComment pstrm2) + in + (pstrm3, SOME (ctor attrs)) + end + +fun listOfOptsToList lst = map Option.valOf lst +(*DEBUG*) handle ex => raise ex + +fun htmlNaryFromParseStream tag ctor childFromParseStream pstrm0 = + let val endTag = "END" ^ tag + val pstrm1 = expectEnterNT tag pstrm0 + val pstrm2 = expectVisitT ("START" ^ tag) pstrm1 + val attrs = getAttrsFromStream pstrm1 + val (pstrm3, children) = + streamConsumeUntil childFromParseStream + (isEither (isVisitT endTag, isExitNT tag)) + (skipWhitespaceOrComment pstrm2) + val (pstrm4, _) = optVisitTok endTag pstrm3 + val pstrm5 = expectExitNT tag (skipWhitespaceOrComment pstrm4) + in (pstrm5, SOME (ctor (attrs, listOfOptsToList children))) end + +type parseVisitStream = H4T.token H4U.parsevisitation H4U.stream + +(* FIXME: might as well use AtomTable.hash_table for these, since we are initializing them later *) +val headContentNTMap : (parseVisitStream -> parseVisitStream * H4.head_content option) AtomMap.map ref = + ref AtomMap.empty + +val blockNTMap : (parseVisitStream -> parseVisitStream * H4.block option) AtomMap.map ref = + ref AtomMap.empty + +val inlineNTMap : (parseVisitStream -> parseVisitStream * H4.inline option) AtomMap.map ref = + ref AtomMap.empty + +val tableDataNTMap : (parseVisitStream -> parseVisitStream * H4.table_data option) AtomMap.map ref = + ref AtomMap.empty + +fun cvtBlock ctor (SOME block) = SOME (ctor block) + | cvtBlock _ NONE = NONE + +fun cvtInline ctor (SOME inline) = SOME (ctor inline) + | cvtInline _ NONE = NONE + +fun cvtFlow ctor (SOME flow) = SOME (ctor flow) + | cvtFlow _ _ = NONE + +fun cvtOption ctor (SOME htmlopt) = SOME (ctor htmlopt) + | cvtOption _ _ = NONE + +fun cvtParam ctor (SOME param) = SOME (ctor param) + | cvtParam _ _ = NONE + +fun cvtFrameset ctor (SOME frameset) = SOME (ctor frameset) + | cvtFrameset _ NONE = NONE + +fun cvtScript ctor (SOME script) = SOME (ctor script) + | cvtScript _ _ = NONE + +fun cdataFromParseStream pstrm = + if isNotCdata pstrm + then raise (IllFormedHTMLParseStream(pstrm, + SOME "Expected character data.")) + else + let val pstrmHd = H4U.stream_hd pstrm + val pstrmTl = H4U.stream_tl pstrm + in case pstrmHd + of H4U.VisitT tok => (pstrmTl, SOME (tokToCdata tok)) + | _ => (pstrmTl, NONE) + end + +fun htmlFromParseStream pstrm0 = + let val pstrm1 = + (skipWhitespaceOrComment o (expectEnterNT "DOCUMENT")) pstrm0 + val (pstrm2, doctypeTokOpt) = optVisitTok "DOCTYPE" pstrm1 + val theVersion = (case doctypeTokOpt + of SOME (H4T.DOCTYPE doctype) => SOME doctype + | _ => NONE) + val (pstrm3, starthtmlTokOpt) = + optVisitTok "STARTHTML" (skipWhitespaceOrComment pstrm2) + val (pstrm4, headDataListOpt) = headFromParseStream + (skipWhitespaceOrComment pstrm3) + in if not (isSome headDataListOpt) + then (pstrm4, NONE) + else (case bodyOrFramesetFromParseStream pstrm4 + of (pstrm5, SOME content) => let + val (pstrm6, _) = optVisitTok "ENDHTML" pstrm5 + val pstrm7 = (skipWhitespaceOrComment o + (expectExitNT "DOCUMENT") o + skipWhitespaceOrComment) pstrm6 + in ( + pstrm7, + SOME (H4.HTML{ + version = theVersion, + head = [], + content = content + }) + ) end + | (pstrm5, NONE) => (pstrm5, NONE) + (* end case *)) + end +and headFromParseStream pstrm0 = + let val pstrm1 = (skipWhitespaceOrComment o (expectEnterNT "HEAD")) pstrm0 + val (pstrm2, startheadTokOpt) = optVisitTok "STARTHEAD" pstrm1 + val (pstrm3, children) = + streamConsumeUntil headContentFromParseStream + (isEither(isExitNT "HEAD", isVisitT "ENDHEAD")) + (skipWhitespaceOrComment pstrm2) + val (pstrm4, _) = optVisitTok "ENDHEAD" pstrm3 + val pstrm5 = expectExitNT "HEAD" (skipWhitespaceOrComment pstrm4) + in (pstrm5, SOME (listOfOptsToList children)) end +and headContentFromParseStream pstrm = + let val ntFunc = expectEnterNTInDomain (!headContentNTMap) pstrm + val (pstrm', resultOpt) = ntFunc pstrm + in (skipWhitespaceOrComment pstrm', resultOpt) end +and bodyOrFramesetFromParseStream pstrm = + let fun cvtBody (SOME body) = SOME (H4.BodyOrFrameset_BODY body) + | cvtBody _ = NONE + in + if isEnterNT "BODY" pstrm + then let val (pstrm', bodyOpt) = bodyFromParseStream pstrm + in (pstrm', cvtBody bodyOpt) end + else let val (pstrm', framesetOpt) = framesetFromParseStream pstrm + in (pstrm', + cvtFrameset H4.BodyOrFrameset_FRAMESET framesetOpt) end + end +and bodyFromParseStream pstrm0 = + let val pstrm1 = expectEnterNT "BODY" pstrm0 + val (pstrm2, startbodyTokOpt) = optVisitTok "STARTBODY" pstrm1 + val attrs = (case startbodyTokOpt + of SOME startbody => (case H4TU.tokGetAttrs startbody + of SOME attrs => attrs + | NONE => [] + (* end case *)) + | NONE => [] + (* end case *)) + val (pstrm3, children) = + streamConsumeUntil blockOrScriptFromParseStream + (isEither(isExitNT "BODY", isVisitT "ENDBODY")) + (skipWhitespaceOrComment pstrm2) + val (pstrm4, _) = optVisitTok "ENDBODY" pstrm3 + val pstrm5 = expectExitNT "BODY" (skipWhitespaceOrComment pstrm4) + in (pstrm5, SOME (H4.BODY (attrs, listOfOptsToList children))) end +and framesetFromParseStream pstrm0 = + let val pstrm1 = expectEnterNT "FRAMESET" pstrm0 + val pstrm2 = expectVisitT "STARTFRAMESET" pstrm1 + val attrs = getAttrsFromStream pstrm1 + val (pstrm3, children) = + streamConsumeUntil framesetOrFrameFromParseStream + (isEither(isVisitT "ENDFRAMESET", + isEnterNT "NOFRAMES")) + (skipWhitespaceOrComment pstrm2) + val (pstrm4, noframesOpt) = + if isEnterNT "NOFRAMES" pstrm3 then + let val (pstrm4', noframesOpt') = + noFramesFromParseStream pstrm3 + in (skipWhitespaceOrComment pstrm4', noframesOpt') end + else (pstrm3, NONE) + val pstrm5 = expectVisitT "ENDFRAMESET" pstrm4 + val pstrm6 = expectExitNT "FRAMESET" (skipWhitespaceOrComment pstrm5) + in + (pstrm6, SOME (H4.FRAMESET (attrs, listOfOptsToList children, + noframesOpt))) + end +and framesetOrFrameFromParseStream pstrm0 = + let val pstrm1 = skipWhitespaceOrComment pstrm0 + val (pstrm2, result) = + if isEnterNT "FRAMESET" pstrm1 + then let val (pstrm', result') = framesetFromParseStream pstrm1 + in (pstrm', + cvtFrameset H4.FramesetOrFrame_FRAMESET result') end + else html0aryFromParseStream "FRAME" H4.FRAME pstrm1 + in (skipWhitespaceOrComment pstrm2, result) end +and noFramesFromParseStream pstrm0 = + let val pstrm1 = expectEnterNT "NOFRAMES" pstrm0 + val pstrm2 = expectVisitT "STARTNOFRAMES" pstrm1 + val attrs = getAttrsFromStream pstrm1 + val (pstrm3, bodyOpt) = bodyFromParseStream pstrm2 + val pstrm4 = expectVisitT "ENDNOFRAMES" pstrm3 + val pstrm5 = expectExitNT "NOFRAMES" pstrm4 + in (pstrm5, SOME (H4.NOFRAMES (attrs, valOf bodyOpt))) +(* DEBUG *)handle ex => raise ex +end +and flowFromParseStream pstrm = + let val pstrmHd = H4U.stream_hd pstrm + fun procInline pstrm = + let val (pstrm', result') = inlineFromParseStream pstrm + in (pstrm', cvtInline H4.Flow_INLINE result') end + in case pstrmHd + of H4U.EnterNT ntAtom => + if AtomMap.inDomain (!blockNTMap, ntAtom) + then let val (pstrm', result') = blockFromParseStream pstrm + in (pstrm', cvtBlock H4.Flow_BLOCK result') end + else procInline pstrm + | _ => procInline pstrm + end +and blockFromParseStream pstrm = + (expectEnterNTInDomain (!blockNTMap) pstrm) pstrm +and inlineFromParseStream pstrm = + let val pstrmHd = H4U.stream_hd pstrm + in case pstrmHd + of H4U.VisitT tok => + let val (pstrm', cdataOptList) = + streamConsumeUntil cdataFromParseStream isNotCdata pstrm + in (pstrm', SOME (H4.CDATA (listOfOptsToList cdataOptList))) end + | _ => (expectEnterNTInDomain (!inlineNTMap) pstrm) pstrm + end +and listItemFromParseStream pstrm = + htmlNaryFromParseStream "LI" H4.LI flowFromParseStream pstrm +and scriptFromParseStream pstrm = + htmlNaryFromParseStream "SCRIPT" H4.SCRIPT cdataFromParseStream pstrm +and paramFromParseStream pstrm = + html0aryFromParseStream "PARAM" H4.PARAM pstrm +and legendFromParseStream pstrm = + htmlNaryFromParseStream "LEGEND" H4.LEGEND inlineFromParseStream pstrm +and defTermOrDescFromParseStream pstrm = + if isEnterNT "DT" pstrm + then htmlNaryFromParseStream "DT" H4.DT inlineFromParseStream pstrm + else htmlNaryFromParseStream "DD" H4.DD flowFromParseStream pstrm +and tableDataFromParseStream pstrm = + (expectEnterNTInDomain (!tableDataNTMap) pstrm) pstrm +and trFromParseStream pstrm = + htmlNaryFromParseStream "TR" H4.TR thOrTdFromParseStream pstrm +and thOrTdFromParseStream pstrm = + if isEnterNT "TH" pstrm + then htmlNaryFromParseStream "TH" H4.TH flowFromParseStream pstrm + else htmlNaryFromParseStream "TD" H4.TD flowFromParseStream pstrm +and optgroupOrOptionFromParseStream pstrm = + if isEnterNT "OPTGROUP" pstrm + then let + fun parseOpt pstrm = (case htmlOptionFromParseStream pstrm + of (pstrm', SOME(H4.OPTION stuff)) => (pstrm', SOME stuff) + | (pstrm', _) => (pstrm', NONE) + (* end case *)) + in htmlNaryFromParseStream "OPTGROUP" H4.OPTGROUP parseOpt pstrm end + else htmlOptionFromParseStream pstrm +and htmlOptionFromParseStream pstrm = + htmlNaryFromParseStream "OPTION" H4.OPTION cdataFromParseStream pstrm +and flowOrParamFromParseStream pstrm = + if isEnterNT "PARAM" pstrm + then let val (pstrm', paramOpt) = paramFromParseStream pstrm + in (pstrm', cvtParam H4.FlowOrParam_PARAM paramOpt) end + else let val (pstrm', flowOpt) = flowFromParseStream pstrm + in (pstrm', cvtFlow H4.FlowOrParam_FLOW flowOpt) end +and blockOrScriptFromParseStream pstrm = + if isEnterNT "SCRIPT" pstrm + then let val (pstrm', scriptOpt) = scriptFromParseStream pstrm + in (skipWhitespaceOrComment pstrm', + cvtScript H4.BlockOrScript_SCRIPT scriptOpt) end + else let val (pstrm', blockOpt) = blockFromParseStream pstrm + in (skipWhitespaceOrComment pstrm', + cvtBlock H4.BlockOrScript_BLOCK blockOpt) end +and blockOrAreaFromParseStream pstrm = + if isEnterNT "AREA" pstrm + then html0aryFromParseStream "AREA" H4.AREA pstrm + else let val (pstrm', blockOpt) = blockFromParseStream pstrm + in (pstrm', cvtBlock H4.BlockOrArea_BLOCK blockOpt) end +and headObjectFromParseStream pstrm = + htmlNaryFromParseStream "OBJECT" H4.Head_OBJECT flowOrParamFromParseStream + pstrm +and headScriptFromParseStream pstrm = + let val (pstrm', scriptOpt) = scriptFromParseStream pstrm + in (pstrm', cvtScript H4.Head_SCRIPT scriptOpt) end + +val titleFromParseStream = + htmlNaryFromParseStream "TITLE" H4.Head_TITLE cdataFromParseStream +val baseFromParseStream = html0aryFromParseStream "BASE" H4.Head_BASE +val metaFromParseStream = html0aryFromParseStream "META" H4.Head_META +val linkFromParseStream = html0aryFromParseStream "LINK" H4.Head_LINK +val pFromParseStream = htmlNaryFromParseStream "P" H4.P inlineFromParseStream +val h1FromParseStream = + htmlNaryFromParseStream "H1" H4.H1 inlineFromParseStream +val h2FromParseStream = + htmlNaryFromParseStream "H2" H4.H2 inlineFromParseStream +val h3FromParseStream = + htmlNaryFromParseStream "H3" H4.H3 inlineFromParseStream +val h4FromParseStream = + htmlNaryFromParseStream "H4" H4.H4 inlineFromParseStream +val h5FromParseStream = + htmlNaryFromParseStream "H5" H4.H5 inlineFromParseStream +val h6FromParseStream = + htmlNaryFromParseStream "H6" H4.H6 inlineFromParseStream +val ulFromParseStream = + htmlNaryFromParseStream "UL" H4.UL listItemFromParseStream +val olFromParseStream = + htmlNaryFromParseStream "OL" H4.OL listItemFromParseStream +val dirFromParseStream = + htmlNaryFromParseStream "DIR" H4.DIR listItemFromParseStream +val menuFromParseStream = + htmlNaryFromParseStream "MENU" H4.MENU listItemFromParseStream +val preFromParseStream = + (* XXX This will not properly track whitespace currently. *) + htmlNaryFromParseStream "PRE" H4.PRE inlineFromParseStream +val dlFromParseStream = + htmlNaryFromParseStream "DL" H4.DL defTermOrDescFromParseStream +val divFromParseStream = + htmlNaryFromParseStream "DIV" H4.DIV flowFromParseStream +val noscriptFromParseStream = + htmlNaryFromParseStream "NOSCRIPT" H4.NOSCRIPT blockFromParseStream +val blockquoteFromParseStream = + htmlNaryFromParseStream "BLOCKQUOTE" H4.BLOCKQUOTE + blockOrScriptFromParseStream +val formFromParseStream = + htmlNaryFromParseStream "FORM" H4.FORM blockOrScriptFromParseStream +val hrFromParseStream = html0aryFromParseStream "HR" H4.HR +val tableFromParseStream = + htmlNaryFromParseStream "TABLE" H4.TABLE tableDataFromParseStream +fun fieldsetFromParseStream pstrm0 = + let val pstrm1 = expectEnterNT "FIELDSET" pstrm0 + val pstrm2 = expectVisitT "STARTFIELDSET" pstrm1 + val attrs = getAttrsFromStream pstrm1 + val (pstrm3, legendOpt) = + legendFromParseStream (skipWhitespaceOrComment pstrm2) + val (pstrm4, flows) = + streamConsumeUntil flowFromParseStream (isVisitT "ENDFIELDSET") + pstrm3 + val pstrm5 = expectVisitT "ENDFIELDSET" pstrm4 + val pstrm6 = expectExitNT "FIELDSET" pstrm5 + in (pstrm5, SOME (H4.FIELDSET (attrs, legendOpt, + listOfOptsToList flows))) end +val addressFromParseStream = + htmlNaryFromParseStream "ADDRESS" H4.ADDRESS inlineFromParseStream +val centerFromParseStream = + htmlNaryFromParseStream "CENTER" H4.CENTER flowFromParseStream +val isindexFromParseStream = html0aryFromParseStream "ISINDEX" H4.ISINDEX +val ttFromParseStream = + htmlNaryFromParseStream "TT" H4.TT inlineFromParseStream +val iFromParseStream = + htmlNaryFromParseStream "I" H4.I inlineFromParseStream +val bFromParseStream = + htmlNaryFromParseStream "B" H4.B inlineFromParseStream +val bigFromParseStream = + htmlNaryFromParseStream "BIG" H4.BIG inlineFromParseStream +val smallFromParseStream = + htmlNaryFromParseStream "SMALL" H4.SMALL inlineFromParseStream +val uFromParseStream = + htmlNaryFromParseStream "U" H4.U inlineFromParseStream +val sFromParseStream = + htmlNaryFromParseStream "S" H4.S inlineFromParseStream +val strikeFromParseStream = + htmlNaryFromParseStream "STRIKE" H4.STRIKE inlineFromParseStream +val emFromParseStream = + htmlNaryFromParseStream "EM" H4.EM inlineFromParseStream +val strongFromParseStream = + htmlNaryFromParseStream "STRONG" H4.STRONG inlineFromParseStream +val dfnFromParseStream = + htmlNaryFromParseStream "DFN" H4.DFN inlineFromParseStream +val codeFromParseStream = + htmlNaryFromParseStream "CODE" H4.CODE inlineFromParseStream +val sampFromParseStream = + htmlNaryFromParseStream "SAMP" H4.SAMP inlineFromParseStream +val kbdFromParseStream = + htmlNaryFromParseStream "KBD" H4.KBD inlineFromParseStream +val varFromParseStream = + htmlNaryFromParseStream "VAR" H4.VAR inlineFromParseStream +val citeFromParseStream = + htmlNaryFromParseStream "CITE" H4.CITE inlineFromParseStream +val abbrFromParseStream = + htmlNaryFromParseStream "ABBR" H4.ABBR inlineFromParseStream +val acronymFromParseStream = + htmlNaryFromParseStream "ACRONYM" H4.ACRONYM inlineFromParseStream +val aFromParseStream = + htmlNaryFromParseStream "A" H4.A inlineFromParseStream +val imgFromParseStream = + html0aryFromParseStream "IMG" H4.IMG +val objectFromParseStream = + htmlNaryFromParseStream "OBJECT" H4.OBJECT flowOrParamFromParseStream +val brFromParseStream = + html0aryFromParseStream "BR" H4.BR +fun inlineScriptFromParseStream pstrm = + let val (pstrm', scriptOpt) = scriptFromParseStream pstrm + in (pstrm', cvtScript H4.Inline_SCRIPT scriptOpt) end +val mapFromParseStream = + htmlNaryFromParseStream "MAP" H4.MAP blockOrAreaFromParseStream +val qFromParseStream = + htmlNaryFromParseStream "Q" H4.Q inlineFromParseStream +val subFromParseStream = + htmlNaryFromParseStream "SUB" H4.SUB inlineFromParseStream +val supFromParseStream = + htmlNaryFromParseStream "SUP" H4.SUP inlineFromParseStream +val spanFromParseStream = + htmlNaryFromParseStream "SPAN" H4.SPAN inlineFromParseStream +val bdoFromParseStream = + htmlNaryFromParseStream "BDO" H4.BDO inlineFromParseStream +val appletFromParseStream = + htmlNaryFromParseStream "APPLET" H4.APPLET flowOrParamFromParseStream +val basefontFromParseStream = + html0aryFromParseStream "BASEFONT" H4.BASEFONT +val fontFromParseStream = + htmlNaryFromParseStream "FONT" H4.FONT inlineFromParseStream +val iframeFromParseStream = + htmlNaryFromParseStream "IFRAME" H4.IFRAME flowFromParseStream +val inputFromParseStream = + html0aryFromParseStream "INPUT" H4.INPUT +val selectFromParseStream = + htmlNaryFromParseStream "SELECT" H4.SELECT optgroupOrOptionFromParseStream +val textareaFromParseStream = + htmlNaryFromParseStream "TEXTAREA" H4.TEXTAREA cdataFromParseStream +val labelFromParseStream = + htmlNaryFromParseStream "LABEL" H4.LABEL inlineFromParseStream +val buttonFromParseStream = + htmlNaryFromParseStream "BUTTON" H4.BUTTON flowFromParseStream +val captionFromParseStream = + htmlNaryFromParseStream "CAPTION" H4.CAPTION inlineFromParseStream +val colFromParseStream = + html0aryFromParseStream "COL" H4.COL +val colgroupFromParseStream = + let fun consumeCol pstrm = + let val (pstrm', colOptVal) = colFromParseStream pstrm + fun cvtCol (SOME (H4.COL attrs)) = SOME attrs + | cvtCol _ = NONE + in (skipWhitespaceOrComment pstrm', cvtCol colOptVal) end + in htmlNaryFromParseStream "COLGROUP" H4.COLGROUP consumeCol end +val theadFromParseStream = + htmlNaryFromParseStream "THEAD" H4.THEAD trFromParseStream +val tfootFromParseStream = + htmlNaryFromParseStream "TFOOT" H4.TFOOT trFromParseStream +val tbodyFromParseStream = + htmlNaryFromParseStream "TBODY" H4.TBODY trFromParseStream + +val _ = + (headContentNTMap + := (foldl AtomMap.insert' AtomMap.empty + [ (Atom.atom "TITLE", titleFromParseStream), + (Atom.atom "BASE", baseFromParseStream), + (Atom.atom "SCRIPT", headScriptFromParseStream), + (Atom.atom "META", metaFromParseStream), + (Atom.atom "LINK", linkFromParseStream), + (Atom.atom "OBJECT", headObjectFromParseStream)]), + blockNTMap + := (foldl AtomMap.insert' AtomMap.empty + [ (Atom.atom "P", pFromParseStream), + (Atom.atom "H1", h1FromParseStream), + (Atom.atom "H2", h2FromParseStream), + (Atom.atom "H3", h3FromParseStream), + (Atom.atom "H4", h4FromParseStream), + (Atom.atom "H5", h5FromParseStream), + (Atom.atom "H6", h6FromParseStream), + (Atom.atom "UL", ulFromParseStream), + (Atom.atom "OL", olFromParseStream), + (Atom.atom "DIR", dirFromParseStream), + (Atom.atom "MENU", menuFromParseStream), + (Atom.atom "PRE", preFromParseStream), + (Atom.atom "DL", dlFromParseStream), + (Atom.atom "DIV", divFromParseStream), + (Atom.atom "NOSCRIPT", noscriptFromParseStream), + (Atom.atom "BLOCKQUOTE", blockquoteFromParseStream), + (Atom.atom "FORM", formFromParseStream), + (Atom.atom "HR", hrFromParseStream), + (Atom.atom "TABLE", tableFromParseStream), + (Atom.atom "FIELDSET", fieldsetFromParseStream), + (Atom.atom "ADDRESS", addressFromParseStream), + (Atom.atom "ISINDEX", isindexFromParseStream), + (Atom.atom "CENTER", centerFromParseStream)]), + inlineNTMap + := (foldl AtomMap.insert' AtomMap.empty + [ (Atom.atom "TT", ttFromParseStream), + (Atom.atom "I", iFromParseStream), + (Atom.atom "B", bFromParseStream), + (Atom.atom "BIG", bigFromParseStream), + (Atom.atom "SMALL", smallFromParseStream), + (Atom.atom "U", uFromParseStream), + (Atom.atom "S", sFromParseStream), + (Atom.atom "STRIKE", strikeFromParseStream), + (Atom.atom "EM", emFromParseStream), + (Atom.atom "STRONG", strongFromParseStream), + (Atom.atom "DFN", dfnFromParseStream), + (Atom.atom "CODE", codeFromParseStream), + (Atom.atom "SAMP", sampFromParseStream), + (Atom.atom "KBD", kbdFromParseStream), + (Atom.atom "VAR", varFromParseStream), + (Atom.atom "CITE", citeFromParseStream), + (Atom.atom "ABBR", abbrFromParseStream), + (Atom.atom "ACRONYM", acronymFromParseStream), + (Atom.atom "A", aFromParseStream), + (Atom.atom "IMG", imgFromParseStream), + (Atom.atom "OBJECT", objectFromParseStream), + (Atom.atom "BR", brFromParseStream), + (Atom.atom "SCRIPT", inlineScriptFromParseStream), + (Atom.atom "MAP", mapFromParseStream), + (Atom.atom "Q", qFromParseStream), + (Atom.atom "SUB", subFromParseStream), + (Atom.atom "SUP", supFromParseStream), + (Atom.atom "SPAN", spanFromParseStream), + (Atom.atom "BDO", bdoFromParseStream), + (Atom.atom "APPLET", appletFromParseStream), + (Atom.atom "BASEFONT", basefontFromParseStream), + (Atom.atom "FONT", fontFromParseStream), + (Atom.atom "IFRAME", iframeFromParseStream), + (Atom.atom "INPUT", inputFromParseStream), + (Atom.atom "SELECT", selectFromParseStream), + (Atom.atom "TEXTAREA", textareaFromParseStream), + (Atom.atom "LABEL", labelFromParseStream), + (Atom.atom "BUTTON", buttonFromParseStream)]), + tableDataNTMap + := (foldl AtomMap.insert' AtomMap.empty + [ (Atom.atom "CAPTION", captionFromParseStream), + (Atom.atom "COL", colFromParseStream), + (Atom.atom "COLGROUP", colgroupFromParseStream), + (Atom.atom "THEAD", theadFromParseStream), + (Atom.atom "TFOOT", tfootFromParseStream), + (Atom.atom "TBODY", tbodyFromParseStream)]) + ) + +fun fromParseTree pt = + let val (_, result) = + htmlFromParseStream (H4U.parsetreeToVisitationStream pt) + in result end + +fun fromString str = let + val pt_opt = parseStream (TextIO.openString str) +in case pt_opt + of NONE => NONE + | SOME pt => fromParseTree pt +end + +end (* HTML4ParserUtils *) + +(* ______________________________________________________________________ + End of html4-parser.sml + ______________________________________________________________________ *) diff --git a/smlnj-lib/HTML4/html4-print.sml b/smlnj-lib/HTML4/html4-print.sml new file mode 100644 index 0000000..a33468e --- /dev/null +++ b/smlnj-lib/HTML4/html4-print.sml @@ -0,0 +1,334 @@ +(* html4-print.sml + * + * COPYRIGHT (c) 2022 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * To print to an output stream (i.e., TextIO.outstream): + * + * fun output outS = HTML4Print.prHTML { + * putc = fn c => TextIO.output1 (outS, c), + * puts = fn s => TextIO.output (outS, s) + * } + * + * To generate a string, use a character buffer: + * + * fun toString html = let + * val buf = CharBuffer.new 1024 + * in + * HTML4Print.prHTML { + * putc = fn c => CharBuffer.add1 (buf, c), + * puts = fn s => CharBuffer.addVec (buf, s) + * } html; + * CharBuffer.contents buf + * end + *) + +structure HTML4Print : sig + + val prHTML : { + putc : char -> unit, + puts : string -> unit + } -> HTML4.html -> unit + + val prBODY : { + putc : char -> unit, + puts : string -> unit + } -> HTML4.body -> unit + + end = struct + + structure H = HTML4 + structure F = Format + + datatype outstream = OS of { + putc : char -> unit, + puts : string -> unit + } + + fun putc (OS{putc, ...}, c) = putc c + fun puts (OS{puts, ...}, s) = puts s + + (* format an open tag *) + fun fmtTag (tag, attrs) = let + fun fmtAttr ((attrName, NONE), l) = " " :: Atom.toString attrName :: l + | fmtAttr ((attrName, SOME s), l) = " " :: Atom.toString attrName :: "=\"" :: s :: "\"" :: l + in + String.concat("<" :: tag :: List.foldr fmtAttr [">"] attrs) + end + + (* format an tag with no content tag *) + fun fmtEmptyTag (tag, attrs) = let + fun fmtAttr ((attrName, NONE), l) = " " :: Atom.toString attrName :: l + | fmtAttr ((attrName, SOME s), l) = " " :: Atom.toString attrName :: "=\"" :: s :: "\"" :: l + in + String.concat("<" :: tag :: List.foldr fmtAttr ["/>"] attrs) + end + + fun fmtEndTag tag = concat["</", tag, ">"] + + fun prTag (OS{puts, ...}, tag, attrs) = puts(fmtTag (tag, attrs)) + fun prEmptyTag (OS{puts, ...}, tag, attrs) = puts(fmtEmptyTag (tag, attrs)) + fun prEndTag (OS{puts, ...}, tag) = puts(fmtEndTag tag) + fun newline (OS{putc, ...}) = putc #"\n" + fun space (OS{putc, ...}) = putc #" " + + (* the various HTML4 headers *) + val strictHdr = "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">" + val looseHdr = "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">" + val framesetHdr = "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Frameset//EN\" \"http://www.w3.org/TR/html4/frameset.dtd\">" + val xhtmlHdr = "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">" + + fun isStrict _ = true (* FIXME: should check content to see if there are uses of Loose elements *) + + fun prCDATA (outS, txt) = let + fun pr (H.CHAR chNum) = puts (outS, concat["&#", (IntInf.toString chNum), ";"]) + | pr (H.COMMENT com) = puts (outS, concat["<!-- ", com, " -->"]) + | pr (H.ENTITY ent) = puts (outS, concat["&", Atom.toString ent, ";"]) + | pr (H.PCDATA s) = puts (outS, s) + in + List.app pr txt + end + + fun prScript (outS, H.SCRIPT(attrs, content)) = ( + prTag (outS, "STYLE", attrs); prCDATA (outS, content); prEndTag (outS, "STYLE")) + + fun prParam (outS, H.PARAM attrs) = prEmptyTag (outS, "PARAM", attrs) + + (* because an OBJECT element can appear in the HEAD and it contains a flow, we introduce + * a union type for the argument to the body printing code + *) + datatype body_or_flow = Body of H.body | Flow of H.flow + + fun prBodyOrFlow (outS, element) = let + fun prFlowList (nl, []) = nl + | prFlowList (nl, H.Flow_BLOCK blk :: r) = ( + if nl then () else newline outS; + prBlock blk; + prFlowList (true, r)) + | prFlowList (_, H.Flow_INLINE txt :: t) = ( + prInline txt; + prFlowList (false, t)) + and prFlowListElem (inline, tag, attrs, content) = ( + if inline + andalso not (List.exists (fn (H.Flow_BLOCK _) => true | _ => false) content) + then ( + prTag (outS, tag, attrs); + ignore (prFlowList (false, content)); + prEndTag (outS, tag)) + else ( + prTag (outS, tag, attrs); + if prFlowList (true, content) then () else newline outS; + prEndTag (outS, tag))) + and prBlock elem = ( + case elem + of H.P(attrs, content) => prInlineElem("P", attrs, content) + | H.H1(attrs, content) => prInlineElem("H1", attrs, content) + | H.H2(attrs, content) => prInlineElem("H2", attrs, content) + | H.H3(attrs, content) => prInlineElem("H3", attrs, content) + | H.H4(attrs, content) => prInlineElem("H4", attrs, content) + | H.H5(attrs, content) => prInlineElem("H5", attrs, content) + | H.H6(attrs, content) => prInlineElem("H6", attrs, content) + | H.UL(attrs, content) => prListElem("UL", attrs, content) + | H.OL(attrs, content) => prListElem("OL", attrs, content) + | H.DIR(attrs, content) => prListElem("DIR", attrs, content) + | H.MENU(attrs, content) => prListElem("MENU", attrs, content) + | H.PRE(attrs, content) => prInlineElem("PRE", attrs, content) + | H.DL(attrs, content) => let + fun prItem (H.DT(attrs, content)) = ( + prInlineElem ("DT", attrs, content); newline outS) + | prItem (H.DD(attrs, content)) = ( + prFlowListElem (false, "DD", attrs, content); newline outS) + in + prTag (outS, "DL", attrs); newline outS; + List.app prItem content; + prEndTag (outS, "DL") + end + | H.DIV(attrs, content) => prFlowListElem (false, "DIV", attrs, content) + | H.NOSCRIPT(attrs, content) => ( + prTag (outS, "NOSCRIPT", attrs); + List.app prBlock content; + prEndTag (outS, "NOSCRIPT")) + | H.BLOCKQUOTE(attrs, content) => ( + prTag (outS, "BLOCKQUOTE", attrs); newline outS; + List.app prBlockOrScript content; + prEndTag (outS, "BLOCKQUOTE")) + | H.FORM(attrs, content) => ( + prTag (outS, "FORM", attrs); newline outS; + List.app prBlockOrScript content; + prEndTag (outS, "FORM")) + | H.HR attrs => prEmptyTag(outS, "HR", attrs) + | H.TABLE(attrs, content) => let + fun prCOL attrs = prTag (outS, "COL", attrs) + fun prData (H.CAPTION(attrs, content)) = ( + prInlineElem("CAPTION", attrs, content); newline outS) + | prData (H.COL attrs) = (prCOL attrs; newline outS) + | prData (H.COLGROUP(attrs, content)) = ( + prTag (outS, "COLGROUP", attrs); + List.app prCOL content; + prEndTag (outS, "COLGROUP"); newline outS) + | prData (H.THEAD(attrs, content)) = prTableElem ("THEAD", attrs, content) + | prData (H.TFOOT(attrs, content)) = prTableElem ("TFOOT", attrs, content) + | prData (H.TBODY(attrs, content)) = prTableElem ("TBODY", attrs, content) + and prTableElem (tag, attrs, content) = let + fun prRow (H.TR(attrs, content)) = let + fun prCell (H.TH(attrs, content)) = + prFlowListElem (true, "TH", attrs, content) + | prCell (H.TD(attrs, content)) = + prFlowListElem (true, "TD", attrs, content) + in + prTag (outS, "TR", attrs); newline outS; + List.app prCell content; newline outS; + prEndTag (outS, "TR"); newline outS + end + in + prTag (outS, "TR", attrs); newline outS; + List.app prRow content; + prEndTag (outS, "TR"); newline outS + end + in + prTag (outS, "TABLE", attrs); newline outS; + List.app prData content; + prEndTag (outS, "TABLE") + end + | H.FIELDSET(attrs, legend, content) => ( + prTag (outS, "FIELDSET", attrs); newline outS; + case legend + of SOME(H.LEGEND(attrs, content)) => ( + prInlineElem("LEGEND", attrs, content); newline outS) + | NONE => () + (* end case *); + if prFlowList (false, content) then () else newline outS; + prEndTag (outS, "FIELDSET")) + | H.ADDRESS(attrs, content) => prInlineElem("ADDRESS", attrs, content) + | H.CENTER(attrs, content) => prFlowListElem (false, "CENTER", attrs, content) + | H.ISINDEX attrs => prEmptyTag(outS, "ISINDEX", attrs) + (* end case *); + newline outS) + and prListElem (tag, attrs, content) = let + fun prItem (H.LI(attrs, content)) = ( + prFlowListElem (true, "LI", attrs, content); + newline outS) + in + prTag (outS, tag, attrs); newline outS; + List.app prItem content; + prEndTag (outS, tag); newline outS + end + and prInline elem = (case elem + of H.TT(attrs, content) => prInlineElem("TT", attrs, content) + | H.I(attrs, content) => prInlineElem("I", attrs, content) + | H.B(attrs, content) => prInlineElem("B", attrs, content) + | H.BIG(attrs, content) => prInlineElem("BIG", attrs, content) + | H.SMALL(attrs, content) => prInlineElem("SMALL", attrs, content) + | H.U(attrs, content) => prInlineElem("U", attrs, content) + | H.S(attrs, content) => prInlineElem("S", attrs, content) + | H.STRIKE(attrs, content) => prInlineElem("STRIKE", attrs, content) + | H.EM(attrs, content) => prInlineElem("EM", attrs, content) + | H.STRONG(attrs, content) => prInlineElem("STRONG", attrs, content) + | H.DFN(attrs, content) => prInlineElem("DFN", attrs, content) + | H.CODE(attrs, content) => prInlineElem("CODE", attrs, content) + | H.SAMP(attrs, content) => prInlineElem("SAMP", attrs, content) + | H.KBD(attrs, content) => prInlineElem("KBD", attrs, content) + | H.VAR(attrs, content) => prInlineElem("VAR", attrs, content) + | H.CITE(attrs, content) => prInlineElem("CITE", attrs, content) + | H.ABBR(attrs, content) => prInlineElem("ABBR", attrs, content) + | H.ACRONYM(attrs, content) => prInlineElem("ACRONYM", attrs, content) + | H.A(attrs, content) => prInlineElem("A", attrs, content) + | H.IMG attrs => prEmptyTag(outS, "IMG", attrs) + | H.OBJECT(attrs, content) => ( + prTag (outS, "OBJECT", attrs); newline outS; + prFlowOrParamList content; + prEndTag (outS, "OBJECT"); newline outS) + | H.BR attrs => prEmptyTag(outS, "BR", attrs) + | H.Inline_SCRIPT script => (newline outS; prScript (outS, script)) + | H.MAP(attrs, content) => raise Fail "FIXME" + | H.Q(attrs, content) => prInlineElem("Q", attrs, content) + | H.SUB(attrs, content) => prInlineElem("SUB", attrs, content) + | H.SUP(attrs, content) => prInlineElem("SUP", attrs, content) + | H.SPAN(attrs, content) => prInlineElem("SPAN", attrs, content) + | H.BDO(attrs, content) => prInlineElem("BDO", attrs, content) + | H.APPLET(attrs, content) => ( + prTag (outS, "APPLET", attrs); newline outS; + prFlowOrParamList content; + prEndTag (outS, "APPLET"); newline outS) + | H.BASEFONT attrs => prEmptyTag(outS, "BASEFONT", attrs) + | H.FONT(attrs, content) => prInlineElem("FONT", attrs, content) + | H.IFRAME(attrs, content) => prFlowListElem (true, "IFRAME", attrs, content) + | H.INPUT attrs => prEmptyTag(outS, "INPUT", attrs) + | H.SELECT(attrs, content) => raise Fail "FIXME" + | H.TEXTAREA(attrs, content) => ( + prTag (outS, "TEXTAREA", attrs); + prCDATA (outS, content); + prEndTag (outS, "TEXTAREA")) + | H.LABEL(attrs, content) => prInlineElem("LABEL", attrs, content) + | H.BUTTON(attrs, content) => prFlowListElem (true, "BUTTON", attrs, content) + | H.CDATA txt => prCDATA (outS, txt) + (* end case *)) + and prInlineElem (tag, attrs, content) = ( + prTag (outS, tag, attrs); + List.app prInline content; + prEndTag (outS, tag)) + and prBlockOrScript (H.BlockOrScript_BLOCK blk) = prBlock blk + | prBlockOrScript (H.BlockOrScript_SCRIPT script) = (prScript (outS, script); newline outS) + and prFlowOrParamList content = let + fun pr (H.FlowOrParam_FLOW(H.Flow_BLOCK blk), nl) = ( + if nl then () else newline outS; prBlock blk; true) + | pr (H.FlowOrParam_FLOW(H.Flow_INLINE txt), _) = (prInline txt; false) + | pr (H.FlowOrParam_PARAM param, _) = (prParam (outS, param); false) + in + (* This function is always called after a newline *) + ignore (List.foldl pr true content) + end + in + case element + of Body(H.BODY(attrs, content)) => ( + prTag (outS, "BODY", attrs); newline outS; + List.app prBlockOrScript content; + prEndTag (outS, "BODY"); newline outS) + | Flow(H.Flow_BLOCK blk) => prBlock blk + | Flow(H.Flow_INLINE txt) => prInline txt + (* end case *) + end (* prBodyOrFlow *) + + fun prHTML outS (H.HTML{version, head, content}) = let + val outS = OS outS + fun prHeadContent (H.Head_TITLE(attrs, txt)) = ( + prTag (outS, "TITLE", attrs); newline outS; + prCDATA (outS, txt); newline outS; + prEndTag (outS, "TITLE"); newline outS) + | prHeadContent (H.Head_BASE attrs) = ( + prTag (outS, "TITLE", attrs); newline outS) + | prHeadContent (H.Head_SCRIPT script) = ( + prScript (outS, script); newline outS) + | prHeadContent (H.Head_STYLE(attrs, content)) = ( + prTag (outS, "STYLE", attrs); newline outS; + prCDATA (outS, content); newline outS; + prEndTag (outS, "STYLE"); newline outS) + | prHeadContent (H.Head_META attrs) = ( + prTag (outS, "META", attrs); newline outS) + | prHeadContent (H.Head_LINK attrs) = ( + prTag (outS, "LINK", attrs); newline outS) + | prHeadContent (H.Head_OBJECT(attrs, content)) = raise Fail "FIXME" + and prBodyOrFrameset (H.BodyOrFrameset_BODY body) = prBodyOrFlow (outS, Body body) + | prBodyOrFrameset (H.BodyOrFrameset_FRAMESET frameset) = prFrameset frameset + and prFrameset (H.FRAMESET(attrs, content, noframes)) = raise Fail "FIXME" + in + case (version, content) + of (SOME vers, _) => puts (outS, vers) + | (NONE, H.BodyOrFrameset_BODY(H.BODY(attrs, children))) => + if isStrict children + then puts (outS, strictHdr) + else puts (outS, looseHdr) + | (NONE, H.BodyOrFrameset_FRAMESET _) => puts (outS, framesetHdr) + (* end case *); + newline outS; + puts (outS, "<HTML>\n"); + puts (outS, "<HEAD>\n"); + List.app prHeadContent head; + puts (outS, "</HEAD>\n"); + prBodyOrFrameset content; + puts (outS, "</HTML>\n") + end + + fun prBODY outS body = prBodyOrFlow (OS outS, Body body) + + end diff --git a/smlnj-lib/HTML4/html4-printer.sml b/smlnj-lib/HTML4/html4-printer.sml new file mode 100644 index 0000000..2b146b9 --- /dev/null +++ b/smlnj-lib/HTML4/html4-printer.sml @@ -0,0 +1,406 @@ +(* html4-printer.sml + * + * COPYRIGHT (c) 2014 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +structure HTML4Printer : sig + + val toString : HTML4.html -> string + +end = struct + +structure H4 = HTML4 + +structure PP = PrettyPrint + +exception NotImplemented + +structure HTML4TagStrings = struct + (* Strict *) + val A = "A" + val ABBR = "ABBR" + val ACRONYM = "ACRONYM" + val ADDRESS = "ADDRESS" + val AREA = "AREA" + val B = "B" + val BASE = "BASE" + val BDO = "BDO" + val BIG = "BIG" + val BLOCKQUOTE = "BLOCKQUOTE" + val BODY = "BODY" + val BR = "BR" + val BUTTON = "BUTTON" + val CAPTION = "CAPTION" + val CITE = "CITE" + val CODE = "CODE" + val COL = "COL" + val COLGROUP = "COLGROUP" + val DD = "DD" + val DEL = "DEL" + val DFN = "DFN" + val DIV = "DIV" + val DL = "DL" + val DT = "DT" + val EM = "EM" + val FIELDSET = "FIELDSET" + val FORM = "FORM" + val H1 = "H1" + val H2 = "H2" + val H3 = "H3" + val H4 = "H4" + val H5 = "H5" + val H6 = "H6" + val HEAD = "HEAD" + val HR = "HR" + val HTML = "HTML" + val I = "I" + val IMG = "IMG" + val INPUT = "INPUT" + val INS = "INS" + val KBD = "KBD" + val LABEL = "LABEL" + val LEGEND = "LEGEND" + val LI = "LI" + val LINK = "LINK" + val MAP = "MAP" + val META = "META" + val NOSCRIPT = "NOSCRIPT" + val OBJECT = "OBJECT" + val OL = "OL" + val OPTGROUP = "OPTGROUP" + val OPTION = "OPTION" + val P = "P" + val PARAM = "PARAM" + val PRE = "PRE" + val Q = "Q" + val SAMP = "SAMP" + val SCRIPT = "SCRIPT" + val SELECT = "SELECT" + val SMALL = "SMALL" + val SPAN = "SPAN" + val STRONG = "STRONG" + val STYLE = "STYLE" + val SUB = "SUB" + val SUP = "SUP" + val TABLE = "TABLE" + val TBODY = "TBODY" + val TD = "TD" + val TEXTAREA = "TEXTAREA" + val TFOOT = "TFOOT" + val TH = "TH" + val THEAD = "THEAD" + val TITLE = "TITLE" + val TR = "TR" + val TT = "TT" + val UL = "UL" + val VAR = "VAR" + (* Frameset *) + val FRAME = "FRAME" + val FRAMESET = "FRAMESET" + val NOFRAMES = "NOFRAMES" + (* Loose *) + val APPLET = "APPLET" + val BASEFONT = "BASEFONT" + val CENTER = "CENTER" + val DIR = "DIR" + val FONT = "FONT" + val IFRAME = "IFRAME" + val ISINDEX = "ISINDEX" + val MENU = "MENU" + val S = "S" + val STRIKE = "STRIKE" + val U = "U" +end + +structure S = HTML4TagStrings + +val strictStr = "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">" + +val looseStr = "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">" + +val framesetStr = "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Frameset//EN\" \"http://www.w3.org/TR/html4/frameset.dtd\">" + +val xhtmlStr = "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">" + +(* ____________________________________________________________ *) + +fun isStrict _ = true + +fun getVersionStr (doc as H4.HTML {head, content, ...}) = + case content of + H4.BodyOrFrameset_BODY (H4.BODY (attrs, children)) => + if isStrict children then strictStr else looseStr + | H4.BodyOrFrameset_FRAMESET _ => framesetStr + +(* ____________________________________________________________ *) + +fun ppOpenTag ppstrm (tag, attributes) = + PP.string ppstrm (String.concat + ["<", tag, + case attributes of + attrs as _::_ => " " ^ (HTML4Utils.attrsToStr attrs) + | [] => "", + ">"]) + +fun ppCloseTag ppstrm tag = + PP.string ppstrm (String.concat ["</", tag, ">"]) + +fun ppChildren _ _ [] = () + | ppChildren ppstrm ppChild children = ( + PP.openHVBox ppstrm (PP.Abs 2); + PP.newline ppstrm; + app (ppChild ppstrm) children; + PP.closeBox ppstrm; + PP.newline ppstrm + ) + +fun ppTagAndChildren ppstrm ppChild tag (attrs, children) = ( + ppOpenTag ppstrm (tag, attrs); + ppChildren ppstrm ppChild children; + ppCloseTag ppstrm tag + ) + +(* ____________________________________________________________ *) + +local + open H4 +in + fun ppCol ppstrm attrs = ppOpenTag ppstrm (S.COL, attrs) + + fun ppCdata ppstrm (CHAR chNum) = + PP.string ppstrm ("&#" ^ (IntInf.toString chNum) ^ ";") + | ppCdata ppstrm (COMMENT comment_string) = + PP.string ppstrm comment_string + | ppCdata ppstrm (ENTITY ent) = + PP.string ppstrm ("&" ^ (Atom.toString ent) ^ ";") + | ppCdata ppstrm (PCDATA string_data) = PP.string ppstrm string_data + and ppHtml ppstrm (doc as HTML {version, head, content}) = ( + PP.string ppstrm (case version of + SOME doctype_str => doctype_str + | NONE => getVersionStr doc); + PP.newline ppstrm; + ppOpenTag ppstrm (S.HTML, []); + PP.openHVBox ppstrm (PP.Abs 2); + PP.newline ppstrm; + ppOpenTag ppstrm (S.HEAD, []); + ppChildren ppstrm ppHead_content head; + ppCloseTag ppstrm S.HEAD; + PP.newline ppstrm; + ppBody_or_frameset ppstrm content; + PP.closeBox ppstrm; + PP.newline ppstrm; + ppCloseTag ppstrm S.HTML; + PP.newline ppstrm + ) + and ppHead_content ppstrm (Head_BASE attrs) = + ppOpenTag ppstrm (S.BASE, attrs) + | ppHead_content ppstrm (Head_LINK attrs) = + ppOpenTag ppstrm (S.LINK, attrs) + | ppHead_content ppstrm (Head_META attrs) = + ppOpenTag ppstrm (S.META, attrs) + | ppHead_content ppstrm (Head_OBJECT contents) = + ppTagAndChildren ppstrm ppFlow_or_param S.OBJECT contents + | ppHead_content ppstrm (Head_SCRIPT child) = ppScript ppstrm child + | ppHead_content ppstrm (Head_STYLE contents) = + ppTagAndChildren ppstrm ppCdata S.STYLE contents + | ppHead_content ppstrm (Head_TITLE contents) = + ppTagAndChildren ppstrm ppCdata S.TITLE contents + and ppBody_or_frameset ppstrm (BodyOrFrameset_BODY body) = + ppBody ppstrm body + | ppBody_or_frameset ppstrm (BodyOrFrameset_FRAMESET frameset) = + ppFrameset ppstrm frameset + and ppBody ppstrm (BODY content) = + ppTagAndChildren ppstrm ppBlock_or_script S.BODY content + and ppFrameset ppstrm (FRAMESET (attrs, children, noframesOpt)) = ( + ppOpenTag ppstrm (S.FRAMESET, attrs); + ppChildren ppstrm ppFrameset_or_frame children; + case noframesOpt of + SOME noframes => (PP.newline ppstrm; + ppNoframes ppstrm noframes) + | _ => (); + ppCloseTag ppstrm S.FRAMESET + ) + and ppFrameset_or_frame ppstrm (FRAME attrs) = + ppOpenTag ppstrm (S.FRAME, attrs) + | ppFrameset_or_frame ppstrm (FramesetOrFrame_FRAMESET frameset) = + ppFrameset ppstrm frameset + and ppNoframes ppstrm (NOFRAMES (attrs, body)) = + (ppOpenTag ppstrm (S.NOFRAMES, attrs); + ppBody ppstrm body; + ppCloseTag ppstrm S.NOFRAMES) + and ppFlow ppstrm (Flow_BLOCK block) = ppBlock ppstrm block + | ppFlow ppstrm (Flow_INLINE inline) = ppInline ppstrm inline + and ppBlock ppstrm (ADDRESS content) = + ppTagAndChildren ppstrm ppInline S.ADDRESS content + | ppBlock ppstrm (BLOCKQUOTE content) = + ppTagAndChildren ppstrm ppBlock_or_script S.BLOCKQUOTE content + | ppBlock ppstrm (CENTER content) = + ppTagAndChildren ppstrm ppFlow S.CENTER content + | ppBlock ppstrm (DIR content) = + ppTagAndChildren ppstrm ppList_item S.DIR content + | ppBlock ppstrm (DIV content) = + ppTagAndChildren ppstrm ppFlow S.DIV content + | ppBlock ppstrm (DL content) = + ppTagAndChildren ppstrm ppDef_term_or_desc S.DL content + | ppBlock ppstrm (FIELDSET (attrs, legend_opt, children)) = ( + ppOpenTag ppstrm (S.FIELDSET, attrs); + case legend_opt of SOME legend => ppLegend ppstrm legend | NONE => (); + ppChildren ppstrm ppFlow children; + ppCloseTag ppstrm S.FIELDSET + ) + | ppBlock ppstrm (FORM content) = + ppTagAndChildren ppstrm ppBlock_or_script S.FORM content + | ppBlock ppstrm (H1 content) = + ppTagAndChildren ppstrm ppInline S.H1 content + | ppBlock ppstrm (H2 content) = + ppTagAndChildren ppstrm ppInline S.H2 content + | ppBlock ppstrm (H3 content) = + ppTagAndChildren ppstrm ppInline S.H3 content + | ppBlock ppstrm (H4 content) = + ppTagAndChildren ppstrm ppInline S.H4 content + | ppBlock ppstrm (H5 content) = + ppTagAndChildren ppstrm ppInline S.H5 content + | ppBlock ppstrm (H6 content) = + ppTagAndChildren ppstrm ppInline S.H6 content + | ppBlock ppstrm (HR attrs) = ppOpenTag ppstrm (S.HR, attrs) + | ppBlock ppstrm (ISINDEX attrs) = ppOpenTag ppstrm (S.ISINDEX, attrs) + | ppBlock ppstrm (MENU content) = + ppTagAndChildren ppstrm ppList_item S.MENU content + | ppBlock ppstrm (NOSCRIPT content) = + ppTagAndChildren ppstrm ppBlock S.NOSCRIPT content + | ppBlock ppstrm (OL content) = + ppTagAndChildren ppstrm ppList_item S.OL content + | ppBlock ppstrm (P content) = + ppTagAndChildren ppstrm ppInline S.P content + | ppBlock ppstrm (PRE content) = + ppTagAndChildren ppstrm ppInline S.PRE content + | ppBlock ppstrm (TABLE content) = + ppTagAndChildren ppstrm ppTable_data S.TABLE content + | ppBlock ppstrm (UL content) = + ppTagAndChildren ppstrm ppList_item S.UL content + and ppInline ppstrm (A content) = + ppTagAndChildren ppstrm ppInline S.A content + | ppInline ppstrm (ABBR content) = + ppTagAndChildren ppstrm ppInline S.ABBR content + | ppInline ppstrm (ACRONYM content) = + ppTagAndChildren ppstrm ppInline S.ACRONYM content + | ppInline ppstrm (APPLET content) = + ppTagAndChildren ppstrm ppFlow_or_param S.APPLET content + | ppInline ppstrm (B content) = + ppTagAndChildren ppstrm ppInline S.B content + | ppInline ppstrm (BASEFONT attrs) = ppOpenTag ppstrm (S.BASEFONT, attrs) + | ppInline ppstrm (BDO content) = + ppTagAndChildren ppstrm ppInline S.BDO content + | ppInline ppstrm (BIG content) = + ppTagAndChildren ppstrm ppInline S.BIG content + | ppInline ppstrm (BR attrs) = ppOpenTag ppstrm (S.BR, attrs) + | ppInline ppstrm (BUTTON content) = + ppTagAndChildren ppstrm ppFlow S.BUTTON content + | ppInline ppstrm (CDATA children) = app (ppCdata ppstrm) children + | ppInline ppstrm (CITE content) = + ppTagAndChildren ppstrm ppInline S.CITE content + | ppInline ppstrm (CODE content) = + ppTagAndChildren ppstrm ppInline S.CODE content + | ppInline ppstrm (DFN content) = + ppTagAndChildren ppstrm ppInline S.DFN content + | ppInline ppstrm (EM content) = + ppTagAndChildren ppstrm ppInline S.EM content + | ppInline ppstrm (FONT content) = + ppTagAndChildren ppstrm ppInline S.FONT content + | ppInline ppstrm (I content) = + ppTagAndChildren ppstrm ppInline S.I content + | ppInline ppstrm (IFRAME content) = + ppTagAndChildren ppstrm ppFlow S.IFRAME content + | ppInline ppstrm (IMG attrs) = ppOpenTag ppstrm (S.IMG, attrs) + | ppInline ppstrm (INPUT attrs) = ppOpenTag ppstrm (S.INPUT, attrs) + | ppInline ppstrm (Inline_SCRIPT script) = ppScript ppstrm script + | ppInline ppstrm (KBD content) = + ppTagAndChildren ppstrm ppInline S.KBD content + | ppInline ppstrm (LABEL content) = + ppTagAndChildren ppstrm ppInline S.LABEL content + | ppInline ppstrm (MAP content) = + ppTagAndChildren ppstrm ppBlock_or_area S.MAP content + | ppInline ppstrm (OBJECT content) = + ppTagAndChildren ppstrm ppFlow_or_param S.OBJECT content + | ppInline ppstrm (Q content) = + ppTagAndChildren ppstrm ppInline S.Q content + | ppInline ppstrm (S content) = + ppTagAndChildren ppstrm ppInline S.S content + | ppInline ppstrm (SAMP content) = + ppTagAndChildren ppstrm ppInline S.SAMP content + | ppInline ppstrm (SELECT content) = + ppTagAndChildren ppstrm ppOptgroup_or_option S.SELECT content + | ppInline ppstrm (SMALL content) = + ppTagAndChildren ppstrm ppInline S.SMALL content + | ppInline ppstrm (SPAN content) = + ppTagAndChildren ppstrm ppInline S.SPAN content + | ppInline ppstrm (STRIKE content) = + ppTagAndChildren ppstrm ppInline S.STRIKE content + | ppInline ppstrm (STRONG content) = + ppTagAndChildren ppstrm ppInline S.STRONG content + | ppInline ppstrm (SUB content) = + ppTagAndChildren ppstrm ppInline S.SUB content + | ppInline ppstrm (SUP content) = + ppTagAndChildren ppstrm ppInline S.SUP content + | ppInline ppstrm (TEXTAREA content) = + ppTagAndChildren ppstrm ppCdata S.TEXTAREA content + | ppInline ppstrm (TT content) = + ppTagAndChildren ppstrm ppInline S.TT content + | ppInline ppstrm (U content) = + ppTagAndChildren ppstrm ppInline S.U content + | ppInline ppstrm (VAR content) = + ppTagAndChildren ppstrm ppInline S.VAR content + and ppList_item ppstrm (LI content) = + ppTagAndChildren ppstrm ppFlow S.LI content + and ppScript ppstrm (SCRIPT content) = + ppTagAndChildren ppstrm ppCdata S.SCRIPT content + and ppParam ppstrm (PARAM attrs) = ppOpenTag ppstrm (S.PARAM, attrs) + and ppLegend ppstrm (LEGEND content) = + ppTagAndChildren ppstrm ppInline S.LEGEND content + and ppDef_term_or_desc ppstrm (DD content) = + ppTagAndChildren ppstrm ppFlow S.DD content + | ppDef_term_or_desc ppstrm (DT content) = + ppTagAndChildren ppstrm ppInline S.DT content + and ppTable_data ppstrm (CAPTION content) = + ppTagAndChildren ppstrm ppInline S.CAPTION content + | ppTable_data ppstrm (COL col) = ppCol ppstrm col + | ppTable_data ppstrm (COLGROUP content) = + ppTagAndChildren ppstrm ppCol S.COLGROUP content + | ppTable_data ppstrm (TBODY content) = + ppTagAndChildren ppstrm ppTr S.TBODY content + | ppTable_data ppstrm (TFOOT content) = + ppTagAndChildren ppstrm ppTr S.TFOOT content + | ppTable_data ppstrm (THEAD content) = + ppTagAndChildren ppstrm ppTr S.THEAD content + and ppTr ppstrm (TR content) = + ppTagAndChildren ppstrm ppTh_or_td S.TR content + and ppTh_or_td ppstrm (TD content) = + ppTagAndChildren ppstrm ppFlow S.TD content + | ppTh_or_td ppstrm (TH content) = + ppTagAndChildren ppstrm ppFlow S.TH content + and ppOptgroup_or_option ppstrm (OPTGROUP content) = + ppTagAndChildren ppstrm + (fn pstrm => fn opt => ppTagAndChildren ppstrm ppCdata S.OPTION opt) + S.OPTGROUP content + | ppOptgroup_or_option ppstrm (OPTION content) = + ppTagAndChildren ppstrm ppCdata S.OPTION content + and ppFlow_or_param ppstrm (FlowOrParam_FLOW flow) = ppFlow ppstrm flow + | ppFlow_or_param ppstrm (FlowOrParam_PARAM param) = ppParam ppstrm param + and ppBlock_or_script ppstrm (BlockOrScript_BLOCK block) = + ppBlock ppstrm block + | ppBlock_or_script ppstrm (BlockOrScript_SCRIPT script) = + ppScript ppstrm script + and ppBlock_or_area ppstrm (AREA attrs) = ppOpenTag ppstrm (S.AREA, attrs) + | ppBlock_or_area ppstrm (BlockOrArea_BLOCK block) = ppBlock ppstrm block +end + +(* ____________________________________________________________ *) + +val toString = PP.pp_to_string 80 ppHtml + +end (* HTML4Printer *) + +(* ______________________________________________________________________ + End of html4-printer.sml + ______________________________________________________________________ *) diff --git a/smlnj-lib/HTML4/html4-test.cm b/smlnj-lib/HTML4/html4-test.cm new file mode 100644 index 0000000..db70914 --- /dev/null +++ b/smlnj-lib/HTML4/html4-test.cm @@ -0,0 +1,17 @@ +(* html4-test.cm + * + * COPYRIGHT (c) 2014 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +Group is + $/basis.cm + $/smlnj-lib.cm + $/ml-lpt-lib.cm + + html4-lib.cm + html4-test.sml + +(* ______________________________________________________________________ + End of html4-test.cm + ______________________________________________________________________ *) diff --git a/smlnj-lib/HTML4/html4-test.sml b/smlnj-lib/HTML4/html4-test.sml new file mode 100644 index 0000000..271d068 --- /dev/null +++ b/smlnj-lib/HTML4/html4-test.sml @@ -0,0 +1,74 @@ +(* html4-test.sml + * + * COPYRIGHT (c) 2014 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +structure Test = + struct + + val parsetreeStreamToString = + HTML4Utils.mkParsetreeStreamToString HTML4Tokens.toString + + fun handleFile outS fileName = let + val inStream = TextIO.openIn fileName + val concrete_pt_opt = HTML4Parser.parseStream inStream + val pt_visit_strm = (case concrete_pt_opt + of SOME concrete_pt => HTML4Utils.parsetreeToVisitationStream concrete_pt + | NONE => HTML4Utils.StreamNil + (* end case *)) + val (_, htmlOpt) = HTML4Parser.htmlFromParseStream pt_visit_strm + handle HTML4Parser.IllFormedHTMLParseStream (strm, SOME msg) => + (HTML4Parser.printVisitationStream strm; + print (msg ^ "\n\n"); (strm, NONE)) + in + TextIO.closeIn inStream; + TextIO.output(outS, concat["<!-- ******************** start ", fileName, " ******************** -->\n"]); + case htmlOpt + of SOME html => HTML4Print.prHTML { + putc = fn c => TextIO.output1(outS, c), + puts = fn s => TextIO.output(outS, s) + } html + | NONE => TextIO.output (outS, parsetreeStreamToString pt_visit_strm) + (* end case *); + TextIO.output(outS, concat["<!-- ******************** end ", fileName, " ******************** -->\n"]) + end + + fun main (_, args) = let + val outS = TextIO.openOut "html4-test.out" + in + (List.app (handleFile outS) args; TextIO.closeOut outS; OS.Process.success) + handle ex => ( + TextIO.closeOut outS; + print(concat["uncaught exception: ", exnMessage ex, "\n"]); + List.app (fn s => print(concat[" ", s, "\n"])) (SMLofNJ.exnHistory ex); + OS.Process.failure) + end + + val tests = [ + "tests/abbr.html", + "tests/dir.html", + "tests/edit.html", + "tests/elements.html", + "tests/entities.html", + "tests/forms.html", + "tests/framebody.html", + "tests/frames.html", + "tests/id.html", + "tests/objaudio.html", + "tests/objects.html", + "tests/objvideo.html", + "tests/quote.html", + "tests/scripts.html", + "tests/spchars.html", + "tests/tables.html", + "tests/template.html", + "tests/test001.html", + "tests/test002.html" + ] + + end + +(* ______________________________________________________________________ + End of html4-test.sml + ______________________________________________________________________ *) diff --git a/smlnj-lib/HTML4/html4-token-utils.sml b/smlnj-lib/HTML4/html4-token-utils.sml new file mode 100644 index 0000000..1012b4e --- /dev/null +++ b/smlnj-lib/HTML4/html4-token-utils.sml @@ -0,0 +1,588 @@ +(* html4-token-utils.sml + * + * COPYRIGHT (c) 2014 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * A set of utilities used for working with tokens used in the HTML 4 + * parser. + *) + +structure HTML4TokenUtils = struct + +structure H4U = HTML4Utils + +(* ____________________________________________________________ *) +(* Attribute handling *) +(* XXX Is this too heavyweight? It certainly gives us some + flexibility in the future. *) + +structure HTML4AttrParser = HTML4AttrParseFn(HTML4AttrLexer) + +fun parseAttrsFromStream inStream = let + val sourceMap = AntlrStreamPos.mkSourcemap () + val lex = HTML4AttrLexer.lex sourceMap + val stream = HTML4AttrLexer.streamifyInstream inStream + in + case HTML4AttrParser.parse lex stream + of (SOME result, _, _) => result + | _ => [] + (* end case *) + end + +fun parseAttrs inStr = parseAttrsFromStream (TextIO.openString inStr) +(*DEBUG*)handle ex => (print(concat["parseAttrs: \"", String.toString inStr, "\"\n"]); raise ex) + +(* ____________________________________________________________ *) +open HTML4Tokens + +val strict_tuple_list = [ + ("A", STARTA, SOME ENDA), + ("ABBR", STARTABBR, SOME ENDABBR), + ("ACRONYM", STARTACRONYM, SOME ENDACRONYM), + ("ADDRESS", STARTADDRESS, SOME ENDADDRESS), + ("AREA", STARTAREA, NONE), + ("B", STARTB, SOME ENDB), + ("BASE", STARTBASE, NONE), + ("BDO", STARTBDO, SOME ENDBDO), + ("BIG", STARTBIG, SOME ENDBIG), + ("BLOCKQUOTE", STARTBLOCKQUOTE, SOME ENDBLOCKQUOTE), + ("BODY", STARTBODY, SOME ENDBODY), + ("BR", STARTBR, NONE), + ("BUTTON", STARTBUTTON, SOME ENDBUTTON), + ("CAPTION", STARTCAPTION, SOME ENDCAPTION), + ("CITE", STARTCITE, SOME ENDCITE), + ("CODE", STARTCODE, SOME ENDCODE), + ("COL", STARTCOL, NONE), + ("COLGROUP", STARTCOLGROUP, SOME ENDCOLGROUP), + ("DD", STARTDD, SOME ENDDD), + ("DEL", STARTDEL, SOME ENDDEL), + ("DFN", STARTDFN, SOME ENDDFN), + ("DIV", STARTDIV, SOME ENDDIV), + ("DL", STARTDL, SOME ENDDL), + ("DT", STARTDT, SOME ENDDT), + ("EM", STARTEM, SOME ENDEM), + ("FIELDSET", STARTFIELDSET, SOME ENDFIELDSET), + ("FORM", STARTFORM, SOME ENDFORM), + ("H1", STARTH1, SOME ENDH1), + ("H2", STARTH2, SOME ENDH2), + ("H3", STARTH3, SOME ENDH3), + ("H4", STARTH4, SOME ENDH4), + ("H5", STARTH5, SOME ENDH5), + ("H6", STARTH6, SOME ENDH6), + ("HEAD", STARTHEAD, SOME ENDHEAD), + ("HR", STARTHR, NONE), + ("HTML", STARTHTML, SOME ENDHTML), + ("I", STARTI, SOME ENDI), + ("IMG", STARTIMG, NONE), + ("INPUT", STARTINPUT, NONE), + ("INS", STARTINS, SOME ENDINS), + ("KBD", STARTKBD, SOME ENDKBD), + ("LABEL", STARTLABEL, SOME ENDLABEL), + ("LEGEND", STARTLEGEND, SOME ENDLEGEND), + ("LI", STARTLI, SOME ENDLI), + ("LINK", STARTLINK, NONE), + ("MAP", STARTMAP, SOME ENDMAP), + ("META", STARTMETA, NONE), + ("NOSCRIPT", STARTNOSCRIPT, SOME ENDNOSCRIPT), + ("OBJECT", STARTOBJECT, SOME ENDOBJECT), + ("OL", STARTOL, SOME ENDOL), + ("OPTGROUP", STARTOPTGROUP, SOME ENDOPTGROUP), + ("OPTION", STARTOPTION, SOME ENDOPTION), + ("P", STARTP, SOME ENDP), + ("PARAM", STARTPARAM, NONE), + ("PRE", STARTPRE, SOME ENDPRE), + ("Q", STARTQ, SOME ENDQ), + ("SAMP", STARTSAMP, SOME ENDSAMP), + ("SCRIPT", STARTSCRIPT, SOME ENDSCRIPT), + ("SELECT", STARTSELECT, SOME ENDSELECT), + ("SMALL", STARTSMALL, SOME ENDSMALL), + ("SPAN", STARTSPAN, SOME ENDSPAN), + ("STRONG", STARTSTRONG, SOME ENDSTRONG), + ("STYLE", STARTSTYLE, SOME ENDSTYLE), + ("SUB", STARTSUB, SOME ENDSUB), + ("SUP", STARTSUP, SOME ENDSUP), + ("TABLE", STARTTABLE, SOME ENDTABLE), + ("TBODY", STARTTBODY, SOME ENDTBODY), + ("TD", STARTTD, SOME ENDTD), + ("TEXTAREA", STARTTEXTAREA, SOME ENDTEXTAREA), + ("TFOOT", STARTTFOOT, SOME ENDTFOOT), + ("TH", STARTTH, SOME ENDTH), + ("THEAD", STARTTHEAD, SOME ENDTHEAD), + ("TITLE", STARTTITLE, SOME ENDTITLE), + ("TR", STARTTR, SOME ENDTR), + ("TT", STARTTT, SOME ENDTT), + ("UL", STARTUL, SOME ENDUL), + ("VAR", STARTVAR, SOME ENDVAR) +] + +val loose_tuple_list = [ + ("APPLET", STARTAPPLET, SOME ENDAPPLET), + ("BASEFONT", STARTBASEFONT, NONE), + ("CENTER", STARTCENTER, SOME ENDCENTER), + ("DIR", STARTDIR, SOME ENDDIR), + ("FONT", STARTFONT, SOME ENDFONT), + ("IFRAME", STARTIFRAME, SOME ENDIFRAME), + ("ISINDEX", STARTISINDEX, NONE), + ("MENU", STARTMENU, SOME ENDMENU), + ("S", STARTS, SOME ENDS), + ("STRIKE", STARTSTRIKE, SOME ENDSTRIKE), + ("U", STARTU, SOME ENDU) +] + +val frameset_tuple_list = [ + ("FRAME", STARTFRAME, NONE), + ("FRAMESET", STARTFRAMESET, SOME ENDFRAMESET), + ("NOFRAMES", STARTNOFRAMES, SOME ENDNOFRAMES) +] + +val endTagNameTest = Char.notContains " \t\r\n>" + +fun splitTagStart inStr = + Substring.splitl endTagNameTest (Substring.full inStr) + +fun extractTag str = let + val (tagNameChs, _) = splitTagStart str + val tagNameChs = (case CharVectorSlice.getItem tagNameChs + of SOME(#"<", r) => (case CharVectorSlice.getItem r + of SOME(#"/", r) => r + | _ => r + (* end case *)) + | _ => tagNameChs + (* end case *)) + in + Atom.atom (CharVectorSlice.map Char.toUpper tagNameChs) + end + +fun extractAttrs str = + let + val (_, tagRest) = splitTagStart str + val (tagRest', _) = Substring.splitr (fn c => c = #">") tagRest + in + parseAttrs (Substring.string tagRest') + end + +structure AtomMap : ORD_MAP = RedBlackMapFn(struct + type ord_key = Atom.atom + val compare = Atom.compare + end) + +fun element_tuple_to_ctor_maps ((tag_name, open_ctor, close_ctor_opt), + (open_map, close_map)) = + let val tag_atom = Atom.atom tag_name + val open_map' = AtomMap.insert(open_map, tag_atom, open_ctor) + val close_map' = case close_ctor_opt of + NONE => close_map + | SOME close_tok => AtomMap.insert( + close_map, tag_atom, close_tok) + in (open_map', close_map') end + +val (strict_open_map, strict_close_map) = + foldl element_tuple_to_ctor_maps (AtomMap.empty, AtomMap.empty) + strict_tuple_list + +val (loose_open_map, loose_close_map) = + foldl element_tuple_to_ctor_maps (strict_open_map, strict_close_map) + loose_tuple_list + +val (frameset_open_map, frameset_close_map) = + foldl element_tuple_to_ctor_maps (strict_open_map, strict_close_map) + frameset_tuple_list + +val open_map_ref = ref strict_open_map + +val close_map_ref = ref strict_close_map + +fun mkOpenTag payloadStr = + let val tag_atom = extractTag payloadStr + in case AtomMap.find(!open_map_ref, tag_atom) of + NONE => OPENTAG (tag_atom, (payloadStr, extractAttrs payloadStr)) + | SOME ctor => ctor (payloadStr, extractAttrs payloadStr) + end + +fun mkCloseTag payloadStr = + let val tag_atom = extractTag payloadStr + in case AtomMap.find(!close_map_ref, tag_atom) of + NONE => CLOSETAG tag_atom + | SOME tok => tok + end + +(* ____________________________________________________________ *) +(* Alternative tokToString useful for printing. + *) + +fun payloadToString (_, []) = "" + | payloadToString (_, attrs as (_ :: _)) = + " " ^ (HTML4Utils.attrsToStr attrs) + +fun tokToString EOF = "EOF" + | tokToString (OPENTAG (tagname, tagdata)) = + String.concat ["OPENTAG ", Atom.toString tagname, " ", + payloadToString tagdata] + | tokToString (CLOSETAG tagname) = "CLOSETAG " ^ (Atom.toString tagname) + | tokToString (DOCTYPE docdata) = docdata + | tokToString (PCDATA pcdata) = pcdata + | tokToString (COMMENT comment) = comment + | tokToString (CHAR_REF refint) = "&#" ^ (IntInf.toString refint) ^ ";" + | tokToString (ENTITY_REF refatom) = "&" ^ (Atom.toString refatom) ^ ";" + | tokToString (XML_PROCESSING directive) = "XML DIRECTIVE " ^ directive + | tokToString (STARTA payload) = + "<A" ^ (payloadToString payload) ^ ">" + | tokToString ENDA = "</A>" + | tokToString (STARTABBR payload) = + "<ABBR" ^ (payloadToString payload) ^ ">" + | tokToString ENDABBR = "</ABBR>" + | tokToString (STARTACRONYM payload) = + "<ACRONYM" ^ (payloadToString payload) ^ ">" + | tokToString ENDACRONYM = "</ACRONYM>" + | tokToString (STARTADDRESS payload) = + "<ADDRESS" ^ (payloadToString payload) ^ ">" + | tokToString ENDADDRESS = "</ADDRESS>" + | tokToString (STARTAREA payload) = + "<AREA" ^ (payloadToString payload) ^ ">" + | tokToString (STARTB payload) = + "<B" ^ (payloadToString payload) ^ ">" + | tokToString ENDB = "</B>" + | tokToString (STARTBASE payload) = + "<BASE" ^ (payloadToString payload) ^ ">" + | tokToString (STARTBDO payload) = + "<BDO" ^ (payloadToString payload) ^ ">" + | tokToString ENDBDO = "</BDO>" + | tokToString (STARTBIG payload) = + "<BIG" ^ (payloadToString payload) ^ ">" + | tokToString ENDBIG = "</BIG>" + | tokToString (STARTBLOCKQUOTE payload) = + "<BLOCKQUOTE" ^ (payloadToString payload) ^ ">" + | tokToString ENDBLOCKQUOTE = "</BLOCKQUOTE>" + | tokToString (STARTBODY payload) = + "<BODY" ^ (payloadToString payload) ^ ">" + | tokToString ENDBODY = "</BODY>" + | tokToString (STARTBR payload) = + "<BR" ^ (payloadToString payload) ^ ">" + | tokToString (STARTBUTTON payload) = + "<BUTTON" ^ (payloadToString payload) ^ ">" + | tokToString ENDBUTTON = "</BUTTON>" + | tokToString (STARTCAPTION payload) = + "<CAPTION" ^ (payloadToString payload) ^ ">" + | tokToString ENDCAPTION = "</CAPTION>" + | tokToString (STARTCITE payload) = + "<CITE" ^ (payloadToString payload) ^ ">" + | tokToString ENDCITE = "</CITE>" + | tokToString (STARTCODE payload) = + "<CODE" ^ (payloadToString payload) ^ ">" + | tokToString ENDCODE = "</CODE>" + | tokToString (STARTCOL payload) = + "<COL" ^ (payloadToString payload) ^ ">" + | tokToString (STARTCOLGROUP payload) = + "<COLGROUP" ^ (payloadToString payload) ^ ">" + | tokToString ENDCOLGROUP = "</COLGROUP>" + | tokToString (STARTDD payload) = + "<DD" ^ (payloadToString payload) ^ ">" + | tokToString ENDDD = "</DD>" + | tokToString (STARTDEL payload) = + "<DEL" ^ (payloadToString payload) ^ ">" + | tokToString ENDDEL = "</DEL>" + | tokToString (STARTDFN payload) = + "<DFN" ^ (payloadToString payload) ^ ">" + | tokToString ENDDFN = "</DFN>" + | tokToString (STARTDIV payload) = + "<DIV" ^ (payloadToString payload) ^ ">" + | tokToString ENDDIV = "</DIV>" + | tokToString (STARTDL payload) = + "<DL" ^ (payloadToString payload) ^ ">" + | tokToString ENDDL = "</DL>" + | tokToString (STARTDT payload) = + "<DT" ^ (payloadToString payload) ^ ">" + | tokToString ENDDT = "</DT>" + | tokToString (STARTEM payload) = + "<EM" ^ (payloadToString payload) ^ ">" + | tokToString ENDEM = "</EM>" + | tokToString (STARTFIELDSET payload) = + "<FIELDSET" ^ (payloadToString payload) ^ ">" + | tokToString ENDFIELDSET = "</FIELDSET>" + | tokToString (STARTFORM payload) = + "<FORM" ^ (payloadToString payload) ^ ">" + | tokToString ENDFORM = "</FORM>" + | tokToString (STARTH1 payload) = + "<H1" ^ (payloadToString payload) ^ ">" + | tokToString ENDH1 = "</H1>" + | tokToString (STARTH2 payload) = + "<H2" ^ (payloadToString payload) ^ ">" + | tokToString ENDH2 = "</H2>" + | tokToString (STARTH3 payload) = + "<H3" ^ (payloadToString payload) ^ ">" + | tokToString ENDH3 = "</H3>" + | tokToString (STARTH4 payload) = + "<H4" ^ (payloadToString payload) ^ ">" + | tokToString ENDH4 = "</H4>" + | tokToString (STARTH5 payload) = + "<H5" ^ (payloadToString payload) ^ ">" + | tokToString ENDH5 = "</H5>" + | tokToString (STARTH6 payload) = + "<H6" ^ (payloadToString payload) ^ ">" + | tokToString ENDH6 = "</H6>" + | tokToString (STARTHEAD payload) = + "<HEAD" ^ (payloadToString payload) ^ ">" + | tokToString ENDHEAD = "</HEAD>" + | tokToString (STARTHR payload) = + "<HR" ^ (payloadToString payload) ^ ">" + | tokToString (STARTHTML payload) = + "<HTML" ^ (payloadToString payload) ^ ">" + | tokToString ENDHTML = "</HTML>" + | tokToString (STARTI payload) = + "<I" ^ (payloadToString payload) ^ ">" + | tokToString ENDI = "</I>" + | tokToString (STARTIMG payload) = + "<IMG" ^ (payloadToString payload) ^ ">" + | tokToString (STARTINPUT payload) = + "<INPUT" ^ (payloadToString payload) ^ ">" + | tokToString (STARTINS payload) = + "<INS" ^ (payloadToString payload) ^ ">" + | tokToString ENDINS = "</INS>" + | tokToString (STARTKBD payload) = + "<KBD" ^ (payloadToString payload) ^ ">" + | tokToString ENDKBD = "</KBD>" + | tokToString (STARTLABEL payload) = + "<LABEL" ^ (payloadToString payload) ^ ">" + | tokToString ENDLABEL = "</LABEL>" + | tokToString (STARTLEGEND payload) = + "<LEGEND" ^ (payloadToString payload) ^ ">" + | tokToString ENDLEGEND = "</LEGEND>" + | tokToString (STARTLI payload) = + "<LI" ^ (payloadToString payload) ^ ">" + | tokToString ENDLI = "</LI>" + | tokToString (STARTLINK payload) = + "<LINK" ^ (payloadToString payload) ^ ">" + | tokToString (STARTMAP payload) = + "<MAP" ^ (payloadToString payload) ^ ">" + | tokToString ENDMAP = "</MAP>" + | tokToString (STARTMETA payload) = + "<META" ^ (payloadToString payload) ^ ">" + | tokToString (STARTNOSCRIPT payload) = + "<NOSCRIPT" ^ (payloadToString payload) ^ ">" + | tokToString ENDNOSCRIPT = "</NOSCRIPT>" + | tokToString (STARTOBJECT payload) = + "<OBJECT" ^ (payloadToString payload) ^ ">" + | tokToString ENDOBJECT = "</OBJECT>" + | tokToString (STARTOL payload) = + "<OL" ^ (payloadToString payload) ^ ">" + | tokToString ENDOL = "</OL>" + | tokToString (STARTOPTGROUP payload) = + "<OPTGROUP" ^ (payloadToString payload) ^ ">" + | tokToString ENDOPTGROUP = "</OPTGROUP>" + | tokToString (STARTOPTION payload) = + "<OPTION" ^ (payloadToString payload) ^ ">" + | tokToString ENDOPTION = "</OPTION>" + | tokToString (STARTP payload) = + "<P" ^ (payloadToString payload) ^ ">" + | tokToString ENDP = "</P>" + | tokToString (STARTPARAM payload) = + "<PARAM" ^ (payloadToString payload) ^ ">" + | tokToString (STARTPRE payload) = + "<PRE" ^ (payloadToString payload) ^ ">" + | tokToString ENDPRE = "</PRE>" + | tokToString (STARTQ payload) = + "<Q" ^ (payloadToString payload) ^ ">" + | tokToString ENDQ = "</Q>" + | tokToString (STARTSAMP payload) = + "<SAMP" ^ (payloadToString payload) ^ ">" + | tokToString ENDSAMP = "</SAMP>" + | tokToString (STARTSCRIPT payload) = + "<SCRIPT" ^ (payloadToString payload) ^ ">" + | tokToString ENDSCRIPT = "</SCRIPT>" + | tokToString (STARTSELECT payload) = + "<SELECT" ^ (payloadToString payload) ^ ">" + | tokToString ENDSELECT = "</SELECT>" + | tokToString (STARTSMALL payload) = + "<SMALL" ^ (payloadToString payload) ^ ">" + | tokToString ENDSMALL = "</SMALL>" + | tokToString (STARTSPAN payload) = + "<SPAN" ^ (payloadToString payload) ^ ">" + | tokToString ENDSPAN = "</SPAN>" + | tokToString (STARTSTRONG payload) = + "<STRONG" ^ (payloadToString payload) ^ ">" + | tokToString ENDSTRONG = "</STRONG>" + | tokToString (STARTSTYLE payload) = + "<STYLE" ^ (payloadToString payload) ^ ">" + | tokToString ENDSTYLE = "</STYLE>" + | tokToString (STARTSUB payload) = + "<SUB" ^ (payloadToString payload) ^ ">" + | tokToString ENDSUB = "</SUB>" + | tokToString (STARTSUP payload) = + "<SUP" ^ (payloadToString payload) ^ ">" + | tokToString ENDSUP = "</SUP>" + | tokToString (STARTTABLE payload) = + "<TABLE" ^ (payloadToString payload) ^ ">" + | tokToString ENDTABLE = "</TABLE>" + | tokToString (STARTTBODY payload) = + "<TBODY" ^ (payloadToString payload) ^ ">" + | tokToString ENDTBODY = "</TBODY>" + | tokToString (STARTTD payload) = + "<TD" ^ (payloadToString payload) ^ ">" + | tokToString ENDTD = "</TD>" + | tokToString (STARTTEXTAREA payload) = + "<TEXTAREA" ^ (payloadToString payload) ^ ">" + | tokToString ENDTEXTAREA = "</TEXTAREA>" + | tokToString (STARTTFOOT payload) = + "<TFOOT" ^ (payloadToString payload) ^ ">" + | tokToString ENDTFOOT = "</TFOOT>" + | tokToString (STARTTH payload) = + "<TH" ^ (payloadToString payload) ^ ">" + | tokToString ENDTH = "</TH>" + | tokToString (STARTTHEAD payload) = + "<THEAD" ^ (payloadToString payload) ^ ">" + | tokToString ENDTHEAD = "</THEAD>" + | tokToString (STARTTITLE payload) = + "<TITLE" ^ (payloadToString payload) ^ ">" + | tokToString ENDTITLE = "" + | tokToString (STARTTR payload) = + "" + | tokToString ENDTR = "
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    NameStart TagEnd TagEmptyDepr.DTDDescription
    A     anchor
    ABBR     abbreviated form (e.g., WWW, HTTP, +etc.)
    ACRONYM      
    ADDRESS     information on author
    APPLET   DLJava applet
    AREA FE  client-side image map area
    B     bold text style
    BASE FE  document base URI
    +BASEFONT FEDLbase font size
    BDO     I18N BiDi over-ride
    BIG     large text style
    +BLOCKQUOTE     long quotation
    BODYOO   document body
    BR FE  forced line break
    BUTTON     push button
    CAPTION     table caption
    CENTER   DLshorthand for DIV align=center
    CITE     citation
    CODE     computer code fragment
    COL FE  table column
    +COLGROUP O   table column group
    DD O   definition description
    DEL     deleted text
    DFN     instance definition
    DIR   DLdirectory list
    DIV     generic language/style container
    DL     definition list
    DT O   definition term
    EM     emphasis
    +FIELDSET     form control group
    FONT   DLlocal change to font
    FORM     interactive form
    FRAME FE Fsubwindow
    +FRAMESET    Fwindow subdivision
    H1     heading
    H2     heading
    H3     heading
    H4     heading
    H5     heading
    H6     heading
    HEADOO   document head
    HR FE  horizontal rule
    HTMLOO   document root element
    I     italic text style
    IFRAME    Linline subwindow
    IMG FE  Embedded image
    INPUT FE  form control
    INS     inserted text
    ISINDEX FEDLsingle line prompt
    KBD     text to be entered by the user
    LABEL     form field label text
    LEGEND     fieldset legend
    LI O   list item
    LINK FE  a media-independent link
    MAP     client-side image map
    MENU   DLmenu list
    META FE  generic metainformation
    +NOFRAMES    Falternate content container for non +frame-based rendering
    +NOSCRIPT     alternate content container for non +script-based rendering
    OBJECT     generic embedded object
    OL     ordered list
    +OPTGROUP     option group
    OPTION O   selectable choice
    P O   paragraph
    PARAM FE  named property value
    PRE     preformatted text
    Q     short inline quotation
    S   DLstrike-through text style
    SAMP     sample program output, scripts, +etc.
    SCRIPT     script statements
    SELECT     option selector
    SMALL     small text style
    SPAN     generic language/style container
    STRIKE   DLstrike-through text
    STRONG     strong emphasis
    STYLE     style info
    SUB     subscript
    SUP     superscript
    TABLE      
    TBODYOO   table body
    TD O   table data cell
    +TEXTAREA     multi-line text field
    TFOOT O   table footer
    TH O   table header cell
    THEAD O   table header
    TITLE     document title
    TR O   table row
    TT     teletype or monospaced text style
    U   DLunderlined text style
    UL     unordered list
    VAR     instance of a variable or program +argument
    + + + + + diff --git a/smlnj-lib/HTML4/tests/entities.html b/smlnj-lib/HTML4/tests/entities.html new file mode 100644 index 0000000..a9ad52d --- /dev/null +++ b/smlnj-lib/HTML4/tests/entities.html @@ -0,0 +1,1224 @@ + + + + + Characters [Robin’s HTML 4.0 Conformance Test] + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    Characters (§24)

    + + + + + + + + + + + + + + + + + + + + + + + + +
    Color Key
    HTML 1.0 entity
    HTML 2.0 entities
    HTML 4.0 entities
    + in Adobe Symbol font
    HTML 4.0 entities
    XHTML 1.0 entities
    + +
    +

    These are the character entities from all of the HTML specifications.

    + +

    HTML 4.0 defines many new character entities, primarily glyphs from the widely available Abobe font Symbol and punctuation from the Win1252 (Windows: Western) character set.

    + +

    HTML 4.0 also introduces hexadecimal numeric entity references, and specifies that numeric entities be treated as Unicode glyphs, which includes just about every typographical character known to typesetters.

    + +

    To test your Web browser’s support for the full range of Unicode characters, I recommend the test suite in Alan Wood’s Unicode Resources.

    + +

    You may also view this table with MathML entities.

    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Character Entities
    Name
    Decimal
    Hex
    Named
    Basic Latin (ASCII)
    quotation mark
    #34"
    #x22"
    quot"
    ampersand
    #38&
    #x26&
    amp&
    apostrophe
    #39'
    #x27'
    apos'
    less-than sign
    #60<
    #x3c<
    lt<
    greater-than sign
    #62>
    #x3e>
    gt>
    Latin-1 Supplement (ISO 8859-1)
    no-break space
    #160 
    #xa0 
    nbsp 
    inverted exclamation mark
    #161¡
    #xa1¡
    iexcl¡
    cent sign
    #162¢
    #xa2¢
    cent¢
    pound sign
    #163£
    #xa3£
    pound£
    currency sign
    #164¤
    #xa4¤
    curren¤
    yen sign
    #165¥
    #xa5¥
    yen¥
    broken bar
    #166¦
    #xa6¦
    brvbar¦
    section sign
    #167§
    #xa7§
    sect§
    diaeresis
    #168¨
    #xa8¨
    uml¨
    copyright sign
    #169©
    #xa9©
    copy©
    feminine ordinal indicator
    #170ª
    #xaaª
    ordfª
    left-pointing double angle quotation mark
    #171«
    #xab«
    laquo«
    not sign
    #172¬
    #xac¬
    not¬
    soft hyphen
    #173#xadshy
    registered sign
    #174®
    #xae®
    reg®
    macron
    #175¯
    #xaf¯
    macr¯
    degree sign
    #176°
    #xb0°
    deg°
    plus-minus sign
    #177±
    #xb1±
    plusmn±
    superscript two
    #178²
    #xb2²
    sup2²
    superscript three
    #179³
    #xb3³
    sup3³
    acute accent
    #180´
    #xb4´
    acute´
    micro sign
    #181µ
    #xb5µ
    microµ
    pilcrow sign
    #182
    #xb6
    para
    middle dot
    #183·
    #xb7·
    middot·
    cedilla
    #184¸
    #xb8¸
    cedil¸
    superscript one
    #185¹
    #xb9¹
    sup1¹
    masculine ordinal indicator
    #186º
    #xbaº
    ordmº
    right-pointing double angle quotation mark
    #187»
    #xbb»
    raquo»
    vulgar fraction one quarter
    #188¼
    #xbc¼
    frac14¼
    vulgar fraction one half
    #189½
    #xbd½
    frac12½
    vulgar fraction three quarters
    #190¾
    #xbe¾
    frac34¾
    inverted question mark
    #191¿
    #xbf¿
    iquest¿
    Latin capital letter A with grave
    #192À
    #xc0À
    AgraveÀ
    Latin capital letter A with acute
    #193Á
    #xc1Á
    AacuteÁ
    Latin capital letter A with circumflex
    #194Â
    #xc2Â
    AcircÂ
    Latin capital letter A with tilde
    #195Ã
    #xc3Ã
    AtildeÃ
    Latin capital letter A with diaeresis
    #196Ä
    #xc4Ä
    AumlÄ
    Latin capital letter A with ring above
    #197Å
    #xc5Å
    AringÅ
    Latin capital letter AE
    #198Æ
    #xc6Æ
    AEligÆ
    Latin capital letter C with cedilla
    #199Ç
    #xc7Ç
    CcedilÇ
    Latin capital letter E with grave
    #200È
    #xc8È
    EgraveÈ
    Latin capital letter E with acute
    #201É
    #xc9É
    EacuteÉ
    Latin capital letter E with circumflex
    #202Ê
    #xcaÊ
    EcircÊ
    Latin capital letter E with diaeresis
    #203Ë
    #xcbË
    EumlË
    Latin capital letter I with grave
    #204Ì
    #xccÌ
    IgraveÌ
    Latin capital letter I with acute
    #205Í
    #xcdÍ
    IacuteÍ
    Latin capital letter I with circumflex
    #206Î
    #xceÎ
    IcircÎ
    Latin capital letter I with diaeresis
    #207Ï
    #xcfÏ
    IumlÏ
    Latin capital letter ETH
    #208Ð
    #xd0Ð
    ETHÐ
    Latin capital letter N with tilde
    #209Ñ
    #xd1Ñ
    NtildeÑ
    Latin capital letter O with grave
    #210Ò
    #xd2Ò
    OgraveÒ
    Latin capital letter O with acute
    #211Ó
    #xd3Ó
    OacuteÓ
    Latin capital letter O with circumflex
    #212Ô
    #xd4Ô
    OcircÔ
    Latin capital letter O with tilde
    #213Õ
    #xd5Õ
    OtildeÕ
    Latin capital letter O with diaeresis
    #214Ö
    #xd6Ö
    OumlÖ
    multiplication sign
    #215×
    #xd7×
    times×
    Latin capital letter O with stroke
    #216Ø
    #xd8Ø
    OslashØ
    Latin capital letter U with grave
    #217Ù
    #xd9Ù
    UgraveÙ
    Latin capital letter U with acute
    #218Ú
    #xdaÚ
    UacuteÚ
    Latin capital letter U with circumflex
    #219Û
    #xdbÛ
    UcircÛ
    Latin capital letter U with diaeresis
    #220Ü
    #xdcÜ
    UumlÜ
    Latin capital letter Y with acute
    #221Ý
    #xddÝ
    YacuteÝ
    Latin capital letter Thorn
    #222Þ
    #xdeÞ
    THORNÞ
    Latin small letter sharp s
    #223ß
    #xdfß
    szligß
    Latin small letter a with grave
    #224à
    #xe0à
    agraveà
    Latin small letter a with acute
    #225á
    #xe1á
    aacuteá
    Latin small letter a with circumflex
    #226â
    #xe2â
    acircâ
    Latin small letter a with tilde
    #227ã
    #xe3ã
    atildeã
    Latin small letter a with diaeresis
    #228ä
    #xe4ä
    aumlä
    Latin small letter a with ring above
    #229å
    #xe5å
    aringå
    Latin small letter ae
    #230æ
    #xe6æ
    aeligæ
    Latin small letter c with cedilla
    #231ç
    #xe7ç
    ccedilç
    Latin small letter e with grave
    #232è
    #xe8è
    egraveè
    Latin small letter e with acute
    #233é
    #xe9é
    eacuteé
    Latin small letter e with circumflex
    #234ê
    #xeaê
    ecircê
    Latin small letter e with diaeresis
    #235ë
    #xebë
    eumlë
    Latin small letter i with grave
    #236ì
    #xecì
    igraveì
    Latin small letter i with acute
    #237í
    #xedí
    iacuteí
    Latin small letter i with circumflex
    #238î
    #xeeî
    icircî
    Latin small letter i with diaeresis
    #239ï
    #xefï
    iumlï
    Latin small letter eth
    #240ð
    #xf0ð
    ethð
    Latin small letter n with tilde
    #241ñ
    #xf1ñ
    ntildeñ
    Latin small letter o with grave
    #242ò
    #xf2ò
    ograveò
    Latin small letter o with acute
    #243ó
    #xf3ó
    oacuteó
    Latin small letter o with circumflex
    #244ô
    #xf4ô
    ocircô
    Latin small letter o with tilde
    #245õ
    #xf5õ
    otildeõ
    Latin small letter o with diaeresis
    #246ö
    #xf6ö
    oumlö
    division sign
    #247÷
    #xf7÷
    divide÷
    Latin small letter o with stroke
    #248ø
    #xf8ø
    oslashø
    Latin small letter u with grave
    #249ù
    #xf9ù
    ugraveù
    Latin small letter u with acute
    #250ú
    #xfaú
    uacuteú
    Latin small letter u with circumflex
    #251û
    #xfbû
    ucircû
    Latin small letter u with diaeresis
    #252ü
    #xfcü
    uumlü
    Latin small letter y with acute
    #253ý
    #xfdý
    yacuteý
    Latin small letter thorn
    #254þ
    #xfeþ
    thornþ
    Latin small letter y with diaeresis
    #255ÿ
    #xffÿ
    yumlÿ
    Latin Extended-A
    Latin capital ligature OE
    #338Œ
    #x152Œ
    OEligŒ
    Latin small ligature oe
    #339œ
    #x153œ
    oeligœ
    Latin capital letter S with caron
    #352Š
    #x160Š
    ScaronŠ
    Latin small letter s with caron
    #353š
    #x161š
    scaronš
    Latin capital letter Y with diaeresis
    #376Ÿ
    #x178Ÿ
    YumlŸ
    Latin Extended-B
    Latin small letter f with hook
    #402ƒ
    #x192ƒ
    fnofƒ
    Spacing Modifier Letters
    modifier letter circumflex accent
    #710ˆ
    #x2c6ˆ
    circˆ
    small tilde
    #732˜
    #x2dc˜
    tilde˜
    Greek and Coptic
    Greek capital letter Alpha
    #913Α
    #x391Α
    AlphaΑ
    Greek capital letter Beta
    #914Β
    #x392Β
    BetaΒ
    Greek capital letter Gamma
    #915Γ
    #x393Γ
    GammaΓ
    Greek capital letter Delta
    #916Δ
    #x394Δ
    DeltaΔ
    Greek capital letter Epsilon
    #917Ε
    #x395Ε
    EpsilonΕ
    Greek capital letter Zeta
    #918Ζ
    #x396Ζ
    ZetaΖ
    Greek capital letter Eta
    #919Η
    #x397Η
    EtaΗ
    Greek capital letter Theta
    #920Θ
    #x398Θ
    ThetaΘ
    Greek capital letter Iota
    #921Ι
    #x399Ι
    IotaΙ
    Greek capital letter Kappa
    #922Κ
    #x39aΚ
    KappaΚ
    Greek capital letter Lamda
    #923Λ
    #x39bΛ
    LambdaΛ
    Greek capital letter Mu
    #924Μ
    #x39cΜ
    MuΜ
    Greek capital letter Nu
    #925Ν
    #x39dΝ
    NuΝ
    Greek capital letter Xi
    #926Ξ
    #x39eΞ
    XiΞ
    Greek capital letter Omicron
    #927Ο
    #x39fΟ
    OmicronΟ
    Greek capital letter Pi
    #928Π
    #x3a0Π
    PiΠ
    Greek capital letter Rho
    #929Ρ
    #x3a1Ρ
    RhoΡ
    Greek capital letter Sigma
    #931Σ
    #x3a3Σ
    SigmaΣ
    Greek capital letter Tau
    #932Τ
    #x3a4Τ
    TauΤ
    Greek capital letter Upsilon
    #933Υ
    #x3a5Υ
    UpsilonΥ
    Greek capital letter Phi
    #934Φ
    #x3a6Φ
    PhiΦ
    Greek capital letter Chi
    #935Χ
    #x3a7Χ
    ChiΧ
    Greek capital letter Psi
    #936Ψ
    #x3a8Ψ
    PsiΨ
    Greek capital letter Omega
    #937Ω
    #x3a9Ω
    OmegaΩ
    Greek small letter alpha
    #945α
    #x3b1α
    alphaα
    Greek small letter beta
    #946β
    #x3b2β
    betaβ
    Greek small letter gamma
    #947γ
    #x3b3γ
    gammaγ
    Greek small letter delta
    #948δ
    #x3b4δ
    deltaδ
    Greek small letter epsilon
    #949ε
    #x3b5ε
    epsilonε
    Greek small letter zeta
    #950ζ
    #x3b6ζ
    zetaζ
    Greek small letter eta
    #951η
    #x3b7η
    etaη
    Greek small letter theta
    #952θ
    #x3b8θ
    thetaθ
    Greek small letter iota
    #953ι
    #x3b9ι
    iotaι
    Greek small letter kappa
    #954κ
    #x3baκ
    kappaκ
    Greek small letter lamda
    #955λ
    #x3bbλ
    lambdaλ
    Greek small letter mu
    #956μ
    #x3bcμ
    muμ
    Greek small letter nu
    #957ν
    #x3bdν
    nuν
    Greek small letter xi
    #958ξ
    #x3beξ
    xiξ
    Greek small letter omicron
    #959ο
    #x3bfο
    omicronο
    Greek small letter pi
    #960π
    #x3c0π
    piπ
    Greek small letter rho
    #961ρ
    #x3c1ρ
    rhoρ
    Greek small letter final sigma
    #962ς
    #x3c2ς
    sigmafς
    Greek small letter sigma
    #963σ
    #x3c3σ
    sigmaσ
    Greek small letter tau
    #964τ
    #x3c4τ
    tauτ
    Greek small letter upsilon
    #965υ
    #x3c5υ
    upsilonυ
    Greek small letter phi
    #966φ
    #x3c6φ
    phiφ
    Greek small letter chi
    #967χ
    #x3c7χ
    chiχ
    Greek small letter psi
    #968ψ
    #x3c8ψ
    psiψ
    Greek small letter omega
    #969ω
    #x3c9ω
    omegaω
    Greek theta symbol
    #977ϑ
    #x3d1ϑ
    thetasymϑ
    Greek upsilon with hook symbol
    #978ϒ
    #x3d2ϒ
    upsihϒ
    Greek pi symbol
    #982ϖ
    #x3d6ϖ
    pivϖ
    General Punctuation
    en space
    #8194
    #x2002
    ensp
    em space
    #8195
    #x2003
    emsp
    thin space
    #8201
    #x2009
    thinsp
    zero width non-joiner
    #8204#x200czwnj
    zero width joiner
    #8205#x200dzwj
    left-to-right mark
    #8206#x200elrm
    right-to-left mark
    #8207#x200frlm
    en dash
    #8211
    #x2013
    ndash
    em dash
    #8212
    #x2014
    mdash
    left single quotation mark
    #8216
    #x2018
    lsquo
    right single quotation mark
    #8217
    #x2019
    rsquo
    single low-9 quotation mark
    #8218
    #x201a
    sbquo
    left double quotation mark
    #8220
    #x201c
    ldquo
    right double quotation mark
    #8221
    #x201d
    rdquo
    double low-9 quotation mark
    #8222
    #x201e
    bdquo
    dagger
    #8224
    #x2020
    dagger
    double dagger
    #8225
    #x2021
    Dagger
    bullet
    #8226
    #x2022
    bull
    horizontal ellipsis
    #8230
    #x2026
    hellip
    per mille sign
    #8240
    #x2030
    permil
    prime
    #8242
    #x2032
    prime
    double prime
    #8243
    #x2033
    Prime
    single left-pointing angle quotation mark
    #8249
    #x2039
    lsaquo
    single right-pointing angle quotation mark
    #8250
    #x203a
    rsaquo
    overline
    #8254
    #x203e
    oline
    fraction slash
    #8260
    #x2044
    frasl
    Currency Symbols
    euro sign
    #8364
    #x20ac
    euro
    Letterlike Symbols
    black-letter capital I
    #8465
    #x2111
    image
    script capital P
    #8472
    #x2118
    weierp
    black-letter capital R
    #8476
    #x211c
    real
    trade mark sign
    #8482
    #x2122
    trade
    alef symbol
    #8501
    #x2135
    alefsym
    Arrows
    leftwards arrow
    #8592
    #x2190
    larr
    upwards arrow
    #8593
    #x2191
    uarr
    rightwards arrow
    #8594
    #x2192
    rarr
    downwards arrow
    #8595
    #x2193
    darr
    left right arrow
    #8596
    #x2194
    harr
    downwards arrow with corner leftwards
    #8629
    #x21b5
    crarr
    leftwards double arrow
    #8656
    #x21d0
    lArr
    upwards double arrow
    #8657
    #x21d1
    uArr
    rightwards double arrow
    #8658
    #x21d2
    rArr
    downwards double arrow
    #8659
    #x21d3
    dArr
    left right double arrow
    #8660
    #x21d4
    hArr
    Mathematical Operators
    for all
    #8704
    #x2200
    forall
    partial differential
    #8706
    #x2202
    part
    there exists
    #8707
    #x2203
    exist
    empty set
    #8709
    #x2205
    empty
    nabla
    #8711
    #x2207
    nabla
    element of
    #8712
    #x2208
    isin
    not an element of
    #8713
    #x2209
    notin
    contains as member
    #8715
    #x220b
    ni
    n-ary product
    #8719
    #x220f
    prod
    n-ary summation
    #8721
    #x2211
    sum
    minus sign
    #8722
    #x2212
    minus
    asterisk operator
    #8727
    #x2217
    lowast
    square root
    #8730
    #x221a
    radic
    proportional to
    #8733
    #x221d
    prop
    infinity
    #8734
    #x221e
    infin
    angle
    #8736
    #x2220
    ang
    logical and
    #8743
    #x2227
    and
    logical or
    #8744
    #x2228
    or
    intersection
    #8745
    #x2229
    cap
    union
    #8746
    #x222a
    cup
    integral
    #8747
    #x222b
    int
    therefore
    #8756
    #x2234
    there4
    tilde operator
    #8764
    #x223c
    sim
    approximately equal to
    #8773
    #x2245
    cong
    almost equal to
    #8776
    #x2248
    asymp
    not equal to
    #8800
    #x2260
    ne
    identical to
    #8801
    #x2261
    equiv
    less-than or equal to
    #8804
    #x2264
    le
    greater-than or equal to
    #8805
    #x2265
    ge
    subset of
    #8834
    #x2282
    sub
    superset of
    #8835
    #x2283
    sup
    not a subset of
    #8836
    #x2284
    nsub
    subset of or equal to
    #8838
    #x2286
    sube
    superset of or equal to
    #8839
    #x2287
    supe
    circled plus
    #8853
    #x2295
    oplus
    circled times
    #8855
    #x2297
    otimes
    up tack
    #8869
    #x22a5
    perp
    dot operator
    #8901
    #x22c5
    sdot
    Miscellaneous Technical
    left ceiling
    #8968
    #x2308
    lceil
    right ceiling
    #8969
    #x2309
    rceil
    left floor
    #8970
    #x230a
    lfloor
    right floor
    #8971
    #x230b
    rfloor
    left-pointing angle bracket
    #9001
    #x2329
    lang
    right-pointing angle bracket
    #9002
    #x232a
    rang
    Geometric Shapes
    lozenge
    #9674
    #x25ca
    loz
    Miscellaneous Symbols
    black spade suit
    #9824
    #x2660
    spades
    black club suit
    #9827
    #x2663
    clubs
    black heart suit
    #9829
    #x2665
    hearts
    black diamond suit
    #9830
    #x2666
    diams
    + +
    +

    Your Browser

    + +

    Your Web browser identified itself as Wget/1.11.4 when it requested this page.

    + + + +
    + + + + + diff --git a/smlnj-lib/HTML4/tests/forms.html b/smlnj-lib/HTML4/tests/forms.html new file mode 100644 index 0000000..bdde919 --- /dev/null +++ b/smlnj-lib/HTML4/tests/forms.html @@ -0,0 +1,624 @@ + + + Forms [Robin’s HTML 4.0 Conformance Test] + + + + + + + + + + + + + + + + +

    Forms (§17)

    + +
    +

    Fieldsets and tab indexes

    + +
    +
      +
    • <fieldset> groups related form controls into sections. +
    • <legend> assigns a caption to a <fieldset>. +
    + +
      +
    • tabindex lets you set the order in which hyperlinks and form controls gain focus. +
    + +

    XHTML 2.0: The W3C is discussing replacing tabindex in XHTML 2.0 with common nextfocus and prevfocus attributes, which contain an id reference of the next element to be activated.

    +
    + +
    +

    Example:

    +

    The controls are numbered according to their tab order. Tabbing forward from "1:" should take you to "2:", and so forth.

    + + +
    + <form action="#test">
    +
    + <fieldset>
    +
    + <legend>Tab Index</legend>
    + <p>The focus should move through the following controls in numerical order.</p>
    + <table>
    +
    + <tr>
    +
    + <td>1: <td><input tabindex=1 value="One" size=6>
    + <td>3: <td><input tabindex=3 value="Three" size=6>
    + <td>7: <td><input tabindex=7 value="Seven" size=6>
    +
    + <tr>
    +
    + <td>2: <td><input tabindex=2 value="Two" size=6>
    + <td>4: <td><input tabindex=4 value="Four" size=6>
    + <td>6: <td><input tabindex=6 value="Six" size=6>
    +
    +
    + </table>
    + <table>
    +
    + <tr>
    +
    + <td>5:
    + <td>
    +
    + <select tabindex=5>
    +
    + <option selected>Five
    +
    + </select>
    +
    +
    + <tr>
    +
    + <td>9:
    + <td><textarea rows=2 cols=3 tabindex=9>Nine</textarea>
    +
    +
    + </table>
    + 8: <button type="button" tabindex=8>Eight</button>
    +
    + </fieldset>
    +
    + </form>
    +
    +

    Your Web browser renders it like this:

    +
    +
    +
    + Tab Index +

    The focus should move through the following controls in numerical order.

    + + + +
    1: + 3: + 7: +
    2: + 4: + 6: +
    + + + +
    5: + + +
    9: + +
    + 8: +
    +
    +
    +
    +
    + +
    +

    Labels and access keys

    + +
    +

    <label> associates its contents with a form control. In a Web browser, selecting a label could give its corresponding control focus.

    +

    The accesskey attribute assigns a key to give focus to an element.

    + +

    XHTML 2.0: The W3C is discussing replacing accesskey in XHTML 2.0 with a common access attribute to assign an access name that will activate an element. accesskey is too associated with particular user interfaces. Instead of keystrokes, access associate words with elements which users may select them by other means, such as selecting them from a menu or speaking an access name into a microphone.

    +
    + +
    +

    Example:

    +

    Selecting (ex. clicking on) the following labels should activate the corresponding field. The highlighted letters are designated as shortcut keys for the field, which may be supported in different ways by different Web browsers.

    +

    For example, in Internet Explorer 4.0— and Netscape 6.0— for Windows, Alt‐U will select the “User name:” field.

    + + +
    + <form action="#test">
    +
    + <fieldset>
    +
    + <legend accesskey="L"><strong>L</strong>abels and Access Keys</legend>
    + <p>Selecting the following labels should activate the corresponding form control. You may also be able to use the highlighted letters to select a form control.</p>
    +
    + <label accesskey="U"><strong>U</strong>ser name: <input type="text" name="uname" size=40></label><br>
    + <label for="email"><strong>E</strong>&#8208;mail: <input id="email" accesskey="E" type="text" name="email" size=40></label><br>
    +
    + <p>Sex:
    + <label accesskey="M"><input type="radio" name="sex" value="male"><strong>M</strong>ale</label><br>
    +
    + <input id="female" accesskey="F" type="radio" name="sex" value="female"><label for="female"><strong>F</strong>emale</label></p>
    +
    + <p><label accesskey="G"><strong>G</strong>ender:
    + <select size=2>
    +
    + <option>Male
    + <option>Female
    +
    + </select></label></p>
    +
    + <p><label for="honorific" accesskey="H"><strong>H</strong>onorific:</label>
    + <select id="honorific" size=3>
    +
    + <option>Mr.
    + <option>Ms.
    + <option>Miss
    + <option>Mrs.
    + <option>Dr.
    +
    + </select></p>
    +
    + <p>Where did you hear about us? (check all that apply)<br>
    + <label accesskey="S"><input type="checkbox" name="where" value="search"><strong>S</strong>earch engine</label>
    +
    + <label><input type="checkbox" accesskey="O" name="where" value="other"><strong>O</strong>ther</label></p>
    +
    + <p><label accesskey="W">If other, <strong>w</strong>here?<br>
    + <textarea rows=2 cols=40></textarea></label></p>
    +
    + <p><label><strong>C</strong>omments:<br>
    + <textarea rows=2 cols=40 accesskey="C"></textarea></label></p>
    +
    + </fieldset>
    +
    + </form>
    +
    +

    Your Web browser renders it like this:

    +
    +
    +
    + Labels and Access Keys +

    Selecting the following labels should activate the corresponding form control. You may also be able to use the highlighted letters to select a form control.

    + +
    +
    + +

    Sex: + + +

    + +

    + +

    +

    + +

    Where did you hear about us? (check all that apply)
    + + +

    + +

    + +

    +
    +
    +
    + +

    Related Mozilla bug reports: Accesskey-XUL, Legend-Accesskey, Fieldset-Eats-Spaces, Checkbox-Accesskey.

    +

    Related Internet Explorer bug reports: Channel9 Wiki: Internet Explorer Standards Support.

    +
    +
    + +
    +

    Buttons

    + +
    +

    <button> allows push button controls to contain any sort of markup, not just simple text.

    +
    + +
    +

    Example:

    +

    This form has buttons of each type. The change message buttons change the message field when selected, if your Web browser supports ECMAScript. If you type over the text in the message field, Reset should restore the message to its default. Submitting the form should reload this page.

    + + +
    + <form action="#test">
    +
    + <fieldset>
    +
    + <legend>Buttons</legend>
    + <p>Message: <input id="message" type="text" name="message" size="40" value="Change this then try Reset."></p>
    + <p>Change message:
    + <button type="button" onclick="document .getElementById('message') .setAttribute('value', 'Take it easy!')">Get Some R&amp;R</button>
    + <button type="button" onclick="document .getElementById('message') .setAttribute('value', 'Have a nice day!')">Smile <img src="/images/smiley/smile.png" alt=":-)"></button>
    + <button type="button" onclick="document .getElementById('message') .setAttribute('value', 'Lather, rinse, repeat')">
    +
    + <ul>
    +
    + <li>Lather
    + <li>Rinse
    + <li>Repeat
    +
    + </ul>
    +
    + </button></p>
    + <p><button type="submit">Submit</button>
    + <button type="reset">Reset</button></p>
    +
    + </fieldset>
    +
    + </form>
    +
    +

    Your Web browser renders it like this:

    +
    +
    +
    + Buttons +

    Message:

    +

    Change message: + + +

    +

    +

    +
    +
    +
    +
    +
    + +
    +

    Option groups

    + +
    +

    <optgroup> groups into sections the <option> elements in a <select> control. A Web browser could add dividers or headings to the drop‐down list, or collapse them into cascading drop‐down menus.

    +

    The label attribute on the <optgroup> and <option> elements provides a name to use within grouped selection menus.

    +
    + +
    +

    Example:

    + +
    + <form action="#test">
    +
    + <fieldset>
    +
    + <legend>Option Groups</legend>
    + <p>Browser of Choice:
    + <select name="agent">
    +
    + <optgroup label="Microsoft Internet Explorer">
    +
    + <option label="5.x6.x" value="ie5">Microsoft Internet Explorer 5.x&ndash;6.x
    + <option label="4.x" value="ie4">Microsoft Internet Explorer 4.x
    + <option label="3.0 or lower" value="ie3">Microsoft Internet Explorer 3.0 or lower
    +
    + </optgroup>
    + <optgroup label="Mozilla">
    +
    + <option label="Firefox" value="fox">Mozilla Firefox
    + <option value="moz" selected>Mozilla
    +
    + </optgroup>
    + <optgroup label="Netscape">
    +
    + <option label="6.x" value="n6">Netscape 6.x
    + <option label="4.x" value="n4">Netscape 4.x
    + <option label="3.x or lower" value="n3">Netscape 3.x or lower
    +
    + </optgroup>
    + <option value="op">Opera
    + <option value="safari">Safari
    + <option value="omniweb">OmniWeb
    + <option value="icab">iCab
    + <option value="lynx">Lynx
    + <option value="other">Other
    +
    + </select></p>
    +
    + </fieldset>
    +
    + </form>
    +
    +

    Your Web browser renders it like this:

    +
    +
    +
    + Option Groups +

    Browser of Choice: +

    +
    +
    +
    + +

    Related Mozilla bug reports: Option-Label.

    +

    Related Internet Explorer bug reports: Channel9 Wiki: Internet Explorer Standards Support.

    +
    + +
    +

    Example:

    +

    The previous example again, but with size="10" set on the <select> element.

    +

    Your Web browser renders it like this:

    +
    +
    +
    + Option Groups +

    Browser of Choice: +

    +
    +
    +
    + +

    Related Mozilla bug reports: Option-Label.

    +

    Related Internet Explorer bug reports: Channel9 Wiki: Internet Explorer Standards Support.

    +
    +
    + +
    +

    Disabled and Read‐only

    + +
    +
      +
    • disabled prevents a form control from receiving focus or accepting input. Disabled controls do not send data when the form is submitted. +
    • readonly prohibits changes to a form control. Read‐only controls still send data when the form is submitted. +
    +
    + +
    +

    Though HTML 4.0’s definition of readonly implies that checkboxes and radio buttons may be set read‐only (and no errata has changed the definition), readonly is meant only for text or password.

    +

    In a working draft of the Web Forms 2.0 standard, WHAT Working Group clarifies, Specifically, it does not apply to radio buttons, check boxes, file upload fields, select elements, or any of the button types; the interface concept of readonly values does not apply to button‐like interfaces. Also, the Document Object Model level 2 standard describes the associated readOnly method as Relevant only when type has the value “text” or “password”.

    +

    (Opera 5.x supported read‐only checkboxes and radio buttons, but removed support for them in version 6.0.)

    +
    + +
    +

    Example:

    + +
    + <form action="#test">
    +
    + <fieldset>
    +
    + <legend>Disabled and Read&#8208;Only</legend>

    + <fieldset>
    +
    + <legend>Normal</legend>
    + <p>These controls should be fully functional.</p>
    + <input type="text" value="Changeable" size=14>
    + <input type="checkbox" checked>
    + <input type="radio" name="yn1">Yes <input type="radio" name="yn1" checked>No
    + <br>
    + <textarea rows=3 cols=20>Can edit this.</textarea>
    + <select size=3>
    +
    + <option>One
    + <option selected>Two
    + <option>Three
    +
    + </select>
    +
    + </fieldset>

    + <fieldset>
    +
    + <legend>Disabled</legend>
    + <p>You should not be able to select these controls.</p>
    + <input type="text" value="Unselectable" size=14 disabled>
    + <input type="checkbox" checked disabled>
    + <input type="radio" name="yn3" disabled>Yes <input type="radio" name="yn3" checked disabled>No
    + <br>
    + <textarea rows=3 cols=20 disabled>Can't touch this.</textarea>
    + <select size=3 disabled>
    +
    + <option>One
    + <option selected>Two
    + <option>Three
    +
    + </select>
    + <input type="reset" disabled>
    + <button type="reset" disabled>Undo changes</button>
    + <br>
    + <p>You should be able to select only option A or B.</p>
    + <select size=7>
    +
    + <optgroup label="Pass">
    +
    + <option>A
    + <option>B
    + <option disabled>C (unavailable)
    +
    + </optgroup>
    + <optgroup label="Fail (unavailable)" disabled>
    +
    + <option>D
    + <option>F
    +
    + </optgroup>
    +
    + </select>
    +
    + </fieldset>

    + <fieldset>
    +
    + <legend>Read&#8208;Only</legend>
    + <p>You should be able to select but not change these controls.</p>
    + <input type="text" value="Unchangeable" size=14 readonly>
    + <br>
    + <textarea rows=3 cols=20 readonly>Can't edit this.</textarea>
    +
    + </fieldset>
    +
    + </fieldset>
    +
    + </form>
    +
    +

    Your Web browser renders it like this:

    +
    +
    +
    + Disabled and Read‐Only + +
    + Normal +

    These controls should be fully functional.

    + + + Yes No +
    + + +
    + +
    + Disabled +

    You should not be able to select these controls.

    + + + Yes No +
    + + + + + +

    You should be able to select only option A or B.

    + +
    + +
    + Read‐Only +

    You should be able to select but not change these controls.

    + +
    + +
    +
    +
    +
    +
    +
    + +
    +

    Your Browser

    + +

    Your Web browser identified itself as Wget/1.11.4 when it requested this page.

    + + + +
    + + + diff --git a/smlnj-lib/HTML4/tests/framebody.html b/smlnj-lib/HTML4/tests/framebody.html new file mode 100644 index 0000000..0cf9073 --- /dev/null +++ b/smlnj-lib/HTML4/tests/framebody.html @@ -0,0 +1,87 @@ + + + Frames (subframe) [Robin’s HTML 4.0 Conformance Test] + + + + + + + + + + + + + + + + +

    Frames (§16)

    + +
    +

    This whole page is one large frame. If you disable frames in your Web browser, you should see the contents of the <frameset> element’s <noframes> element.

    +

    i‐Bench’s frame tests can do a more thorough check of your Web browser’s support for frames, including floating frames.

    + +

    XHTML 1.1: The W3C phased out frames in XHTML 1.1. Frames are destined to be replaced by more powerful markup like XLink.

    +

    Jakob Neilsen has written about why frames are fundamentally flawed and their use should be avoided.

    +
    + +
    +

    Inline frames (§16.5)

    + +
    +

    The <iframe> element generates inline frames, like a simplified <object> element.

    +
    + +
    +

    Example:

    + +
    + <iframe src="test.txt" longdesc="longdesc.txt">
    +
    + <p>An internal frame will replace this text if your Web browser supports it.</p>
    +
    + </iframe> +
    +

    Your Web browser renders it like this:

    +
    + +
    + +

    Related Mozilla bug reports: Frame-Longdesc.

    +
    +
    + +
    +

    Your Browser

    + +

    Your Web browser identified itself as Wget/1.11.4 when it requested this page.

    + + + +
    + + + diff --git a/smlnj-lib/HTML4/tests/frames.html b/smlnj-lib/HTML4/tests/frames.html new file mode 100644 index 0000000..b7329f2 --- /dev/null +++ b/smlnj-lib/HTML4/tests/frames.html @@ -0,0 +1,62 @@ + + + Frames [Robin’s HTML 4.0 Conformance Test] + + + + + + + + + + + + + + + + + <body> + <div id="navbar"> + <ul class="nl" id="breadcrumbs"><li><a href="./" rel="first">Robin&rsquo;s HTML 4.0 Test</a><ul><li>Frames</li></ul></li></ul> + <ul class="nl"> + <li><a href="./" rel="first" accesskey="1" title="First: Robin&rsquo;s HTML 4.0 Test">&#8676;<strong>1</strong>st</a></li> + <li><a href="forms" rel="prev" accesskey="P" title="Prior: Forms">&#8592;<strong>P</strong>rior</a></li> + <li><a href="objects" rel="next" accesskey="N" title="Next: Objects"><strong>N</strong>ext&#8594;</a></li> + </ul> + </div> + + <h1>Frames (<a class="specsect" href="http://www.w3.org/TR/html4/present/frames.html" title="HTML 4.0 Specification: Frames">&sect;16</a>)</h1> + + <div class="bodytext"> + <p>Your Web browser is displaying the contents of this document&rsquo;s <a class="specsect" href="http://www.w3.org/TR/html4/present/frames.html#edef-noframes" title="HTML 4.0 Specification: Frames"><code class="element">&lt;noframes></code></a> element, indicating that it does not support frames.</p> + <p>If you have disabled frames, try viewing this page with frames enabled.</p> + </div> + + <div id="browserver"> + <h2>Your Browser</h2> + + <p>Your Web browser identified itself as <strong><code id="useragent">Wget/1.11.4</code></strong> when it requested this page.</p> + + <script type="text/javascript" src="browserid.js"></script> + <noscript><p>Your Web browser did not run a script to reveal how it identifies itself to scripts.</p></noscript> + </div> + + <div id="footer"> + <div id="buttons"> + <a href="http://www.mozilla.com/firefox/" title="Mozilla Firefox"><object type="image/png" width="80" height="15" id="firefox_button" data="/images/buttons/firefox.png">Get Firefox</object></a> + <a href="http://my.opera.com/rlionheart/affiliate/" title="Opera"><object type="image/png" width="80" height="15" id="opera_button" data="/images/buttons/opera.png">Get Opera</object></a> + <a rel="license" href="http://creativecommons.org/licenses/by-sa/2.5/" title="Creative Commons Deed"><object type="image/png" width="80" height="15" id="cc_button" data="/images/buttons/cc.png">Some rights reserved.</object></a> + <a href="http://www.htmlhelp.com/cgi-bin/validate.cgi?url=referer" title="WDG HTML Validation Service"><object type="image/png" width="80" height="15" id="html401_button" data="/images/buttons/html401.png">Valid HTML 4.01 Frameset</object></a> + <a href="http://jigsaw.w3.org/css-validator/check/referer" title="W3C CSS Validation Service"><object type="image/png" width="80" height="15" id="css_button" data="/images/buttons/css.png">Valid CSS</object></a> + </div> + <p><a rel="copyright" href="/copyright" class="local">&copy;</a> 1998 <span class="vcard"><a rel="home" href="/" class="local fn">Robin Lionheart</a> (<a rev="made" href="mailto&#58;lionheart&#64;robinlionheart&#46;com" class="email local">lionheart&#64;robin&shy;lionheart&#46;com</a>) [<a href="/robin/pubkey" class="local">public key</a>]</span></p> + <div id="dehanced"> + <p>Best read with a browser that supports <abbr title="Extensible Hypertext Markup Language">XHTML</abbr>, <abbr title="Cascading Style Sheets level 2">CSS 2</abbr>, and <abbr title="Portable Network Graphics">PNG</abbr> images. I recommend <a href="http://www.mozilla.org/products/firefox/">Firefox</a>.</p> + <p>Degraded to fit your browser (JavaScript MIME type).</p> + </div> </div> + + + + diff --git a/smlnj-lib/HTML4/tests/id.html b/smlnj-lib/HTML4/tests/id.html new file mode 100644 index 0000000..504ea83 --- /dev/null +++ b/smlnj-lib/HTML4/tests/id.html @@ -0,0 +1,153 @@ + + + Common Attributes [Robin’s HTML 4.0 Conformance Test] + + + + + + + + + + + + + + + + + +

    Common Attributes

    + +
    +

    Identifiers as anchors (§7.5.2)

    + +
    +

    The id attribute assigns a unique name to an element. Similar to the name attribute of the <a> element, but it allows any element in the document body to be the destination of a link.

    +
    + +
    +

    Example:

    +

    The footnotes section of the Results page is in a division with the identifier footnotes:

    +
    + <div class="section bodytext" id="footnotes"> +
    + +

    This code is a hyperlink to that section:

    +
    + <a href="results.xhtml#footnotes">Footnotes</a> are listed beneath the results table. +
    + +

    This link should take you to that section:

    +
    + Footnotes are listed beneath the results table. +
    +
    +
    + +
    +

    Identifiers as style sheet selectors

    + +
    +

    Style sheet rules can be assigned to specific id values (unlike the obsolete name attribute).

    +
    + +
    +

    Example:

    +

    If your Web browser supports style sheets, the following should produce a centered and italicized paragraph.

    + +

    This page includes a style rule for the poem identifier:

    +
    + #poem { font-style: italic; text-align: center; } +
    + +

    Let’s use that identifier for a poem by Nancy Stark:

    +
    + <p id="poem">Higgledy&#8208;piggledy<br>
    +Nic&rsquo;laus Copernicus<br>
    +Looked at the Universe<br>
    +Spoke to the throng<br>
    +Give up your Ptolemy<br>
    +Rise up and follow me,<br>
    +Heliocentrically<br>
    +Ptolemy&rsquo;s wrong</p> +
    + +

    Your Web browser renders it like this:

    +
    +

    Higgledy‐piggledy
    + Nic’laus Copernicus
    + Looked at the Universe
    + Spoke to the throng
    + Give up your Ptolemy
    + Rise up and follow me,
    + Heliocentrically
    + Ptolemy’s wrong

    +
    +
    +
    + + + +
    +

    Classes (§7.5.2)

    + +
    +

    The class attribute assigns class names to an element. Classes are used as style sheet selectors.

    +
    + +
    +

    Example:

    +

    If your Web browser supports style sheets, the following should produce a paragraph with an underlined, blue phrase.

    + +

    This page includes a style rule for elements belonging to both the fake and link classes:

    +
    + .fake.link { text-decoration: underline; color: blue; }
    +
    + +

    This code has a <span> with two classes, fake and link:

    +
    + Hyperlinks traditionally appear <span class="fake link">underlined and blue</span> to distinguish them from body text. +
    + +

    Your Web browser renders it like this:

    +
    + Hyperlinks traditionally appear underlined and blue to distinguish them from body text. +
    +
    +
    + +
    +

    Your Browser

    + +

    Your Web browser identified itself as Wget/1.11.4 when it requested this page.

    + + + +
    + + + diff --git a/smlnj-lib/HTML4/tests/objaudio.html b/smlnj-lib/HTML4/tests/objaudio.html new file mode 100644 index 0000000..741f940 --- /dev/null +++ b/smlnj-lib/HTML4/tests/objaudio.html @@ -0,0 +1,118 @@ + + + Objects: Audio [Robin’s HTML 4.0 Conformance Test] + + + + + + + + + + + + + + + + +

    Objects: Audio (§13.3)

    + +
    +
    +

    <object> could also be used for audio clips. Having a visual rendering is not a requirement, since not all Web browsers are visual. As images are to graphical Web browsers, audio clips are to speechreaders.

    +

    Your Web browser may require you to download and install plugins to play the following.

    +
    +
    + +
    +

    MIDI

    + +
    +

    Many plugins support MIDI using the generally accepted content type audio/x-midi.

    + +

    Some websites incorrectly use the invalid content type audio/midi, which has not yet been registered. However, a draft registration is under development.

    +
    +
    +

    Example:

    +

    If your Web browser cannot play this classical music, it should present a link to download it.

    + + +
    + <object data="/audio/pachelbel.mid" type="audio/x-midi">
    +
    + <p>Listen to <a href="/audio/pachelbel.mid" type="audio/x-midi">Pachelbel&rsquo;s Canon in D</a> (MIDI</acronym>, 10 <abbr title="kibibytes">KiB</abbr>).</p>
    +
    + </object> +
    +

    Your Web browser renders it like this:

    +
    + +

    Listen to Pachelbel’s Canon in D (MIDI, 10 KiB).

    +
    +
    +
    +
    + +
    +

    Ogg Vorbis

    + +
    +

    Ogg Vorbis is an open, cross‐platform, patent‐free audio format.

    + +

    Ogg Vorbis uses the Ogg container file format, which may contain other multimedia, like Ogg Theora video.

    +
    + +
    +

    Example:

    +

    If your Web browser cannot play this sample of a song by Piero Umiliani, it should present a link to download it.

    + + +
    + <object data="/audio/mahnamahna.ogg" type="application/ogg">
    +
    + <p>Listen to <a href="/audio/mahnamahna.ogg" type="application/ogg">Mah Na&rsquo; Mah Na&rsquo;</a> (Ogg Vorbis, 864 <abbr title="kibibytes">KiB</abbr>).</p>
    +
    + </object> +
    +

    Your Web browser renders it like this:

    +
    + +

    Listen to a sample of Mah Na’ Mah Na’ (Ogg Vorbis, 864 KiB).

    +
    +
    +
    +
    + +
    +

    Your Browser

    + +

    Your Web browser identified itself as Wget/1.11.4 when it requested this page.

    + + + +
    + + + + diff --git a/smlnj-lib/HTML4/tests/objects.html b/smlnj-lib/HTML4/tests/objects.html new file mode 100644 index 0000000..6101b91 --- /dev/null +++ b/smlnj-lib/HTML4/tests/objects.html @@ -0,0 +1,406 @@ + + + Objects [Robin’s HTML 4.0 Conformance Test] + + + + + + + + + + + + + + + +

    Objects (§13.3)

    + +
    +
    +

    <object> embeds objects of any type: images, movies, sounds, applets, or anything else you can send.

    +

    The all‐purpose <object> was potentially the most important addition in HTML 4.0. However, this ambitous but flawed element is terribly overloaded. For example, it has 17 element‐specific attributes with varied meanings and uses. Some of it is baggage inherited from Internet Explorer’s earlier, proprietary version of <object>.

    + +

    XHTML 2.0: The W3C is considering removing <img> and <applet> in XHTML 2.0 in favor of a somewhat scaled back <object>, which addresses a couple of Jukka Korpela’s criticisms of it.

    + +

    For more thorough testing of <object>, I recommend Antti Näyhä’s OBJECT test suite. That suite inspired some of the tests below.

    +
    +
    + +
    +

    Content types

    + +
    +

    The optional type attribute provides the content type of the object. A Web browser can use this information to avoid downloading content types that it does not support.

    +
    + +
    +

    Example:

    +

    Your Web browser should not try to render the image/x-unsupported image. It should render the alternate markup and not a broken image placeholder.

    + + +
    + <object data="data:image/x-unsupported,TEST" type="image/x-unsupported" width=100 height=100>
    +
    + <p>As expected, I could not render the <code>image/x-unsupported</code> image.</p>
    +
    + </object> +
    +

    Your Web browser renders it like this:

    +
    + +

    As expected, I could not render the image/x-unsupported image.

    +
    +
    +
    +
    + +
    +

    Alternate markup (§13.3.1)

    + +
    +

    If an object is not rendered (ex. to ignore an unsupported content type, or to reject potentially dangerous ActiveX controls), the contents of the <object> element are rendered instead.

    +

    An image marked up <img> can have alternate text, but an image marked up with <object> can have alternate markup.

    +
    + +
    +

    Example:

    +

    The <object> element allows more versatile replacements for images.

    + + +
    + <p>Suppose we want to use an image as a fancy horizontal divider:</p>
    +
    + <div align="center"><img src="/images/test/hr-grapevine.png" height=24 width=507 alt="~~~~~~~~~~~~~~~~~~ * ~~~~~~~~~~~~~~~~~~~"><div>
    +
    + <p>With <code>&lt;img&gt;</code>, the most appropriate alternate text for a horizontal divider is generally a series of dashes:</p>
    +
    + <div align="center"><img src="data:image/x-unsupported,TEST" alt="----------------------------------------"></div>
    +
    + <p>But with <code>&lt;object&gt;</code>, a horizontal rule can be alternate markup:</p>
    +
    + <div align="center"><object data="data:image/x-unsupported,TEST" type="image/x-unsupported">
    +
    + <hr width=507>
    +
    + </object></div>
    +
    +

    Your Web browser renders it like this:

    +
    +

    Suppose we want to use an image of a fancy horizontal divider:

    + +
    ~~~~~~~~~~~~~~~~~~ * ~~~~~~~~~~~~~~~~~~~
    + +

    With <img>, the most appropriate alternate text for a horizontal divider is generally a series of dashes:

    + +
    ----------------------------------------
    + +

    But with <object>, a horizontal rule can be alternate markup:

    + +
    +
    +
    +
    +
    +
    + +
    +

    Nested objects

    + +
    +

    You can even use another object as alternate markup. By nesting <object> elements inside each other, you can provide several alternate content types. If a Web browser doesn’t support the primary content type, it can gracefully switch to an alternate one.

    +

    The object need not be of a similar type. For example, an embedded video clip could have a static image as an alternate.

    +
    + +
    +

    Example:

    +

    Here is a picture of my cat in TIFF, PNG and JPEG formats.

    +

    If your Web browser supports any of these image formats (every graphical browser I’ve used supports JPEG), it should render one and only one of the images below. The image should be rendered inline, not as a frame with scrollbars.

    +

    If your Web browser can not render any of them, or if it has images disabled, it should render the alternate markup and not a broken image placeholder.

    +

    Few Web browsers support TIFF images. If yours does not, watch any status messages as your Web browser loads the page; it should not even try to download the TIFF version of the image.

    + + +
    + <object data="data:image/x-unsupported,TEST" type="image/x-unsupported" width=100 height=100>
    +
    + <object data="/images/test/test.tiff" type="image/tiff" width=100 height=100>
    +
    + <object data="/images/test/test.png" type="image/png" width=100 height=100>
    +
    + <object data="/images/test/test.jpeg" type="image/jpeg" width=100 height=100>
    +
    + <p>My cat <a href="/robin/petpix">Velcro</a> enjoys a good roll in the dirt.</p>
    +
    + </object>
    +
    + </object>
    +
    + </object>
    +
    + </object> +
    +

    Your Web browser renders it like this:

    +
    + + + + +

    My cat Velcro enjoys a good roll in the dirt.

    +
    +
    +
    +
    +
    + +

    Related Internet Explorer bug reports: Channel9 Wiki: Internet Explorer Standards Support.

    +
    +
    + +
    +

    Scaling (§13.7.1)

    + +
    +

    The width and height attributes tell a Web browser how much space to reserve for an object.

    +

    Images should be scaled (squashed or stretched) to the specified width and/or height, overriding their natural dimensions.

    +
    + +
    +

    Example:

    +

    If your Web browser renders this image, it should be taller and narrower than normal.

    + +
    + <object data="/images/test/test.png" type="image/png" width=75 height=150>
    +
    + <p>My cat <a href="/robin/petpix" title="Pet Pix">Velcro</a> enjoys a good roll in the dirt.</p>
    +
    + </object> +
    +

    Your Web browser renders it like this:

    +
    + +

    My cat Velcro enjoys a good roll in the dirt.

    +
    +
    + +

    Related Internet Explorer bug reports: Channel9 Wiki: Internet Explorer Standards Support.

    +
    + +
    +

    Example:

    +

    Let’s complicate it by trying some nested images of different scales.

    +

    If your Web browser renders the PNG image, it should be stretched taller and narrower than normal. If it renders the JPEG image, it should be squashed shorter and wider than normal.

    + +
    + <object data="data:image/x-unsupported,TEST" type="image/x-unsupported" width=30 height=30>
    +
    + <object data="/images/test/test.png" type="image/png" width=75 height=150>
    +
    + <object data="/images/test/test.jpeg" type="image/jpeg" width=150 height=75>
    +
    + <p>My cat <a href="/robin/petpix" title="Pet Pix">Velcro</a> enjoys a good roll in the dirt.</p>
    +
    + </object> +
    + </object> +
    + </object> +
    +

    Your Web browser renders it like this:

    +
    + + + +

    My cat Velcro enjoys a good roll in the dirt.

    +
    +
    +
    +
    + +

    Related Internet Explorer bug reports: Channel9 Wiki: Internet Explorer Standards Support.

    +
    +
    + +
    +

    Image maps (§13.6)

    + +
    +

    Image maps link regions of an image to other webpages. Using <object>, you can define those regions in the links within the alternate markup. Not only does this simplify the source code, it also ensures that every link in the image map can be accessed by Web browsers that do not load the image.

    +
    + +
    +

    Example:

    +

    In a graphical Web browser, selecting the image of the cat (and not the text identifying the image format) should bring you to a page of cat pictures.

    + +
    + <object data="/images/test/test.jpeg" type="image/jpeg" width=100 height=100 usemap="#catmap">
    +
    + <map name="catmap">
    +
    + <p>My cat <a href="/robin/petpix" title="Pet Pix" shape="rect" coords="45,15,200,230">Velcro</a> enjoys a good roll in the dirt.</p>
    +
    + </map>
    +
    + </object> +
    +

    Your Web browser renders it like this:

    +
    + + +

    My cat Velcro enjoys a good roll in the dirt.

    +
    +
    +
    + +

    Related Internet Explorer bug reports: Channel9 Wiki: Internet Explorer Standards Support.

    +
    + +
    +

    Example:

    +

    Image maps can have multiple, overlapping regions of different shapes.

    +

    The overlapped regions should be mapped to the shape in the foreground.

    + +
    + <object data="/images/test/maptest.svg" type="image/svg+xml" width=200 height=200 usemap="#shapes">
    +
    + <object data="/images/test/maptest.png" type="image/png" width=200 height=200 usemap="#shapes">
    +
    + <object data="/images/test/maptest.jpeg" type="image/jpeg" width=200 height=200 usemap="#shapes">
    +
    + <map name="shapes">
    +
    + <ul>
    +
    + <li><a href="#triangle" title="green triangle" shape="poly" coords="100,10, 58,94, 142,94, 100,10">green triangle</a>
    + <li><a href="#cross" title="blue cross" shape="poly" coords="80,108, 60,128, 80,148, 60,168, 80,188, 100,168, 120,188, 140,168, 120,148, 140,128, 120,108, 100,128, 80,108">blue cross</a>
    + <li><a href="#square" title="magenta square" shape="rect" coords="10,58, 94,142">magenta square</a>
    + <li><a href="#circle" title="red circle" shape="circle" coords="148,100, 42">red circle</a>
    +
    + </ul>
    +
    + </map>
    +
    + </object> +
    + </object> +
    + </object> +
    +

    Your Web browser renders it like this:

    +
    + + + + + + + + + +
    + +

    Related Mozilla bug reports: Bearded-Imagemap.

    +

    Related Internet Explorer bug reports: Channel9 Wiki: Internet Explorer Standards Support.

    +
    +
    + +
    +

    Codebase

    + +
    +

    The codebase attribute sets the base location from which to resolve a relative URL in the data and other attributes. The codebase attribute is used mainly for applets that require loading several files.

    + +

    XHTML 2.0: In XHTML 2.0, the W3C is considering replacing the codebase attribute with a more general xml:base attribute.

    +
    + +
    +

    Example:

    +

    If your Web browser can display JPEG images and implements codebase correctly, the image below should be rendered.

    + + +
    + <object data="test.jpeg" codebase="/images/test/" type="image/jpeg" width=100 height=100>
    +
    + <p>My cat <a href="/robin/petpix" title="Pet Pix">Velcro</a> enjoys a good roll in the dirt.</p>
    +
    + </object> +
    +

    Your Web browser renders it like this:

    +
    + +

    My cat Velcro enjoys a good roll in the dirt.

    +
    +
    + +

    Related Mozilla bug reports: Object-Codebase.

    +

    Related Internet Explorer bug reports: Channel9 Wiki: Internet Explorer Standards Support.

    +
    +
    + +
    +

    Embedded pages (§13.5)

    + +
    +

    Though this page concentrates on images, <object> may be used with audio clips, video clips, applets, and other kinds of objects.

    +
    + +
    +

    Example:

    +

    You can even use it with text files and HTML, to embed one page in another like a floating frame.

    + + +
    + <object data="test.txt" type="text/plain" standby="Loading...">
    +
    + <p>If your Web browser had supported it, <a href="test.txt">a simple text file</a> would be embedded here.</p>
    +
    + </object> +
    +

    Your Web browser renders it like this:

    +
    + +

    If your Web browser had supported it, a simple text file would be embedded here.

    +
    +
    + +

    Related Internet Explorer bug reports: Channel9 Wiki: Internet Explorer Standards Support.

    +
    +
    + +
    +

    Your Browser

    + +

    Your Web browser identified itself as Wget/1.11.4 when it requested this page.

    + + + +
    + + + + diff --git a/smlnj-lib/HTML4/tests/objvideo.html b/smlnj-lib/HTML4/tests/objvideo.html new file mode 100644 index 0000000..5b2d10a --- /dev/null +++ b/smlnj-lib/HTML4/tests/objvideo.html @@ -0,0 +1,221 @@ + + + Objects: Video [Robin’s HTML 4.0 Conformance Test] + + + + + + + + + + + + + + + + +

    Objects: Video (§13.3)

    + +
    +
    +

    <object> may be used to embed video clips as well as still images.

    +

    The <param> element defines a parameter to pass information to an object when it runs.

    +

    Your Web browser may require you to download and install plugins to play the following.

    +
    +
    + +
    +

    QuickTime

    + +
    +

    QuickTime is a proprietary streaming video format frequently used on the Web. The official content type for QuickTime is video/quicktime.

    +

    Apple provides free plugins to play QuickTime for MacOS and Windows operating systems.

    +
    + +
    +

    Example:

    +

    If your Web browser supports QuickTime, the following may play an “uncommercial”. Otherwise, it should render an image of three still frames from the video clip and a transcript of its message.

    +

    Setting an autoplay parameter to false tells the QuickTime plugin not to play the movie automatically as soon as it loads.

    +

    Though this movie is 320×240, the dimensions 320×256 are used because the plugin controls take up an additional 16 pixels.

    + + +
    + <object data="/video/gdp.mov" type="video/quicktime" width="320" height="256" standby="Loading&hellip;">
    +
    + <param name="autoplay" value="false">
    + <object data="/video/un-gdp.jpg" type="image/jpeg" width="420" height="130"></object>

    + + <blockquote>
    +
    + <p><q>For years, economists have defined the economic health of a country by its Gross Domestic Product. Trouble is, every time a forest falls, the <abbr>GDP</abbr> goes up. With every oil spill, the <abbr>GDP</abbr> goes up. Every time a cancer patient is diagnosed, the <abbr>GDP</abbr> goes up. Is this how we measure economic <em>progress</em>? Economists &hellip; must learn to subtract.</q></p>
    +
    + </blockquote>
    +
    + </object> +
    +

    Your Web browser renders it like this:

    +
    + + + + +
    +

    For years, economists have defined the economic health of a country by its Gross Domestic Product. Trouble is, every time a forest falls, the GDP goes up. With every oil spill, the GDP goes up. Every time a cancer patient is diagnosed, the GDP goes up. Is this how we measure economic progress? Economists … must learn to subtract.

    +
    +
    +
    +

    This message was brought to you by Adbusters.

    +
    +
    + +
    +

    Windows Media Player

    + +
    +

    Microsoft also has a proprietary streaming video format for its Windows Media Player. Microsoft recommends using the content type video/x-ms-wmv for Windows Media Player files that contain audio and video.

    +
    + +
    +

    Example:

    +

    Here is an embedded Windows Media Player video clip. You may have to download and install a plugin before you can play the movie.

    +

    If your Web browser supports Windows Media, the following may play a clip from Face the Nation. Otherwise, it should render a still image and a transcript of the clip.

    +

    Setting an autostart parameter to 0 tells the Windows Media Player plugin not to play the movie automatically as soon as it loads. Different plugins use different parameter names.

    +

    Though this movie is 320×240, the dimensions 320×285 are used because the plugin controls take up an additional 45 pixels.

    + + +
    + <object data="/video/rumsfelddeny4.wmv" type="video/x-ms-wmv" width="320" height="285" standby="Loading&hellip;">
    +
    + <param name="autostart" value="0">
    + <object data="/video/rumsfeld-caught-lying.jpg" type="image/jpeg" width="250" height="187"></object>

    + <blockquote cite="http://www.cbsnews.com/htdocs/pdf/face_031404.pdf" title="Face the Nation (14 March 2004)">
    +
    + <p><em>Schieffer:</em> Well, let me just ask you this. If they did not have these weapons of mass destruction, though, granted all of that is true, why then did they pose an immediate threat to us, to this country?</p>

    + + <p><em>Rumsfeld:</em> Well, you&rsquo;re the&mdash;you and a few other critics are the only people I've heard use the phrase <q>immediate threat</q>. I didn&rsquo;t. The president didn&rsquo;t. And it&rsquo;s become kind of folklore that that&rsquo;s&mdash;that&rsquo;s what&rsquo;s happened. The president went&mdash;</p>

    + + <p><em>Schieffer:</em> You&rsquo;re saying that nobody in the administration said that.</p>

    + + <p><em>Rumsfeld:</em> I&mdash;I can&rsquo;t speak for nobody&mdash;everybody in the administration and say nobody said that.</p>

    + + <p><em>Schieffer:</em> Vice president didn&rsquo;t say that? The&mdash;</p>

    + + <p><em>Rumsfeld:</em> Not&mdash;if&mdash;if you have any citations, I&rsquo;d like to see &rsquo;em.</p>

    + + <p><em>Friedman:</em> We have one here. It says <q>some have argued that the nu</q>&mdash;this is you speaking&mdash;<q>that the nuclear threat from Iraq is not imminent, that Saddam is at least five to seven years away from having nuclear weapons. I would not be so certain.</q></p>

    + + <p><em>Rumsfeld:</em> And&mdash;and&mdash;</p>

    + + <p><em>Friedman:</em> It was close to imminent.</p>

    + + <p><em>Rumsfeld:</em> Well, I&rsquo;ve&mdash;I&rsquo;ve tried to be precise, and I&rsquo;ve tried to be accurate. I&rsquo;m s&mdash;suppose I&rsquo;ve&mdash;</p>

    + + <p><em>Friedman:</em> <q>No terrorist state poses a greater or more <em>immediate threat</em> to the security of our people and the stability of the world and the regime of Saddam Hussein in Iraq.</q></p>

    + + <p><em>Rumsfeld:</em> Mm&#8208;hmm. It&mdash;my view of&mdash;of the situation was that he&mdash;he had&mdash;we&mdash;we believe, the best intelligence that we had and other countries had and that&mdash;that we believed and we still do not know&mdash;we will know.</p>
    +
    + </blockquote>
    +
    + </object> +
    +

    Your Web browser renders it like this:

    +
    + + + +
    +

    Schieffer: Well, let me just ask you this. If they did not have these weapons of mass destruction, though, granted all of that is true, why then did they pose an immediate threat to us, to this country?

    + +

    Rumsfeld: Well, you’re the—you and a few other critics are the only people I've heard use the phrase immediate threat. I didn’t. The president didn’t. And it’s become kind of folklore that that’s—that’s what’s happened. The president went—

    + +

    Schieffer: You’re saying that nobody in the administration said that.

    + +

    Rumsfeld: I—I can’t speak for nobody—everybody in the administration and say nobody said that.

    + +

    Schieffer: Vice president didn’t say that? The—

    + +

    Rumsfeld: Not—if—if you have any citations, I’d like to see ’em.

    + +

    Friedman: We have one here. It says some have argued that the nu—this is you speaking—that the nuclear threat from Iraq is not imminent, that Saddam is at least five to seven years away from having nuclear weapons. I would not be so certain.

    + +

    Rumsfeld: And—and—

    + +

    Friedman: It was close to imminent.

    + +

    Rumsfeld: Well, I’ve—I’ve tried to be precise, and I’ve tried to be accurate. I’m s—suppose I’ve—

    + +

    Friedman: No terrorist state poses a greater or more immediate threat to the security of our people and the stability of the world and the regime of Saddam Hussein in Iraq.

    + +

    Rumsfeld: Mm‐hmm. It—my view of—of the situation was that he—he had—we—we believe, the best intelligence that we had and other countries had and that—that we believed and we still do not know—we will know.

    +
    +
    +
    + +
    +
    + +
    +

    Matroska

    + +
    +

    Matroska is an open, cross‐platform, patent‐free multimedia file format. It supports advanced media features like chapters, menus, and multiple video, audio, and subtitle tracks.

    +
    + +
    +

    Example:

    +

    This embedded Matroska video clip has 16 subtitle tracks.

    +

    If your Web browser can’t play this clip from Tokyo Mew Mew (東京ミュウミュウ), it should render a still image and present a link to download it.

    + + +
    + <object data="/video/mewmew-vorbis-ssa.mkv" type="application/x-matroska" width="320" height="240" standby="Loading&hellip;">
    +
    + <object data="/video/mkvtmm-a.jpg" type="image/jpeg" width="320" height="240"></object>

    + <p>Download a <a href="/video/mewmew-vorbis-ssa.mkv" type="application/x-matroska">Matroska demo</a> (7.8 <abbr title="mebibytes">MiB</abbr>).</p> +
    + </object> +
    +

    Your Web browser renders it like this:

    +
    + + +

    Download a Matroska demo (7.8 MiB).

    +
    +
    + +
    +
    +
    +

    Your Browser

    + +

    Your Web browser identified itself as Wget/1.11.4 when it requested this page.

    + + + +
    + + + + diff --git a/smlnj-lib/HTML4/tests/quote.html b/smlnj-lib/HTML4/tests/quote.html new file mode 100644 index 0000000..3328860 --- /dev/null +++ b/smlnj-lib/HTML4/tests/quote.html @@ -0,0 +1,184 @@ + + + Quotations [Robin’s HTML 4.0 Conformance Test] + + + + + + + + + + + + + + + + + +

    Quotations (§9.2.2)

    + +
    +

    Quote marks

    + +
    +

    The <q> element provides a language‐independent way of specifying quotations.

    + +

    A visual Web browser must produce quotation marks at the beginning and end of the <q> element. Depending on its capabilities, it could use curly left and right quotes instead of straight ASCII quote marks. It could also use different quote marks appropriate to the text’s language (for example, guillemets in French documents).

    + +

    Authors can recommend which quote marks <q> should use through style sheets. In CSS level 2, the quotes style description specifies which characters to use.

    + +

    XHTML 2.0: The W3C is considering replacing <q> with a <quote> element in XHTML 2.0. The difference between them is that <quote> would not generate quotation marks like <q> does. Web browsers that do not support <quote> would still render the quotation marks in the content.

    +
    + +
    +

    Example:

    +

    This document has style rules specifying language‐dependent renderings for quotation marks, to various depths:

    +
    + :lang(en-gb) { quotes: "\2018" "\2019" "\201C" "\201D" }
    + :lang(en-us),
    + :lang(es) { quotes: "\201C" "\201D" "\2018" "\2019" }
    + :lang(fr) { quotes: "\2009" "\2009" "\201C" "\201D" "\2018" "\2019" }
    + :lang(de) { quotes: "\201E" "\201C" "\201A" "\2018" }
    + :lang(it) { quotes: "" "" "\2039" "\203A" }
    + :lang(no) { quotes: "" "" "\2018" "\2019" }
    +
    + +

    These paragraphs declare the language of their contents. Each should invoke the style rule for that language and set the quotation marks appropriately:

    + +
    + <p><q>What is this thing called <q>love</q>?</q></p>
    + <p lang="es"><q>Cul es esta cosa llamada <q>amor</q>?</q></p>
    + <p lang="fr"><q>Qu&rsquo;est&#8208;ce que c&rsquo;est que <q>l&rsquo;amour</q>?</q></p>
    + <p lang="de"><q>Was ist dies Ding, das man <q>Liebe</q> nennt?</q></p>
    + <p lang="it"><q>Che cosa questa cosa chiamata <q>amore</q>?</q></p> +
    +

    It should be rendered similar to this:

    +
    “What is this thing called ‘love’?”
    +
    +“Cul es esta cosa llamada ‘amor’?”
    +
    +« Qu’est‐ce que c’est que “l’amour”? »
    +
    +„Was ist dies Ding, das man ‚Liebe‘ nennt?“
    +
    +«Che cosa  questa cosa chiamata ‹amore›?»
    +
    +

    Your Web browser renders it like this:

    +
    +

    What is this thing called love?

    +

    Cul es esta cosa llamada amor?

    +

    Qu’est‐ce que c’est que l’amour?

    +

    Was ist dies Ding, das man Liebe nennt?

    +

    Che cosa questa cosa chiamata amore?

    +
    + +

    Feel free to send me translations and the quote conventions for other languages, to add to the above example.

    + +

    Related Mozilla bug reports: Nested-Quotes, Default-Quotes.

    +

    Related Konqueror bug reports: #29576.

    +

    Related Internet Explorer bug reports: Channel9 Wiki: Internet Explorer Standards Support.

    +
    +
    + +
    +

    Source citations

    + +
    +

    The cite attribute of the <q> and <blockquote> elements is intended to give information about the source from which the quotation was borrowed, according to the standard. It could be used to link to a quotation’s source or to other information about it.

    +

    Some Web browsers allow you to access the source of the quotation by clicking on the quotation or selecting an option from a context menu.

    +
    + +
    +

    Example:

    +

    This block quotation cites the URL of its source:

    +
    + <blockquote cite="//www.geocities.com/~rosemarylake/stories/ft-frogmanor.html" title="&ldquo;The Well at the World&rsquo;s End&rdquo; by Rosemary Lake">
    +
    + <p>One day when Marianne had finished scrubbing the kitchen floor and was putting cakes and fruit on a plate to take to old Nurse, her Uncle threw a sieve at her and commanded: <q>Here, go and fill this with water from the Well at the World&rsquo;s End.</q> For he was sure there was no such place, and if there were, no one could fill a sieve with water anyway.</p>
    +
    + </blockquote> +
    +

    You should be able to navigate to the source of this quotation:

    +
    +
    +

    One day when Marianne had finished scrubbing the kitchen floor and was putting cakes and fruit on a plate to take to old Nurse, her Uncle threw a sieve at her and commanded: Here, go and fill this with water from the Well at the World’s End. For he was sure there was no such place, and if there were, no one could fill a sieve with water anyway.

    +
    +
    +
    + +
    +

    Example:

    +

    This block quotation is about quotation marks:

    +
    + <blockquote cite="//www.suck.com/daily/2000/03/20/" title="suck: The Jawbone of a Scare Quote (20 Mar 2000)">
    +
    + <p>The scare quotes let you know the jury&rsquo;s still out&mdash;the euphemism may become reality if it&rsquo;s tenacious enough to get the nod from the dictionary someday or to shed the scare quotes in the paper. A reader caught <cite>The New York Times</cite> at this practice in its 5 May 1993 edition and used Orwell&rsquo;s &ldquo;Politics and the English Language&rdquo; as the bag of oranges for the beatdown. The Gray Lady had suddenly dropped the scare quotes around the obvious doublespeak &ldquo;ethnic cleansing,&rdquo; thereby legitimizing a term for genocide favored by killers looking to cast their murderous policies in a less sinister light. The upshot is that today we all know what ethnic cleansing is, and &ldquo;genocide&rdquo; somehow seems a PC fallback&#8208;word for whiners. The terms have changed places and the legit description now reads as suspect.</p>
    + + +
    + </blockquote> +
    +

    You should be able to navigate to the source of this quotation:

    +
    +
    + +

    The scare quotes let you know the jury’s still out—the euphemism may become reality if it’s tenacious enough to get the nod from the dictionary someday or to shed the scare quotes in the paper. A reader caught The New York Times at this practice in its 5 May 1993 edition and used Orwell’s “Politics and the English Language” as the bag of oranges for the beatdown. The Gray Lady had suddenly dropped the scare quotes around the obvious doublespeak ethnic cleansing, thereby legitimizing a term for genocide favored by killers looking to cast their murderous policies in a less sinister light. The upshot is that today we all know what ethnic cleansing is, and genocide somehow seems a PC fallback‐word for whiners. The terms have changed places and the legit description now reads as suspect.

    +
    +
    +
    + +
    +

    Example:

    +

    A quotation within a quotation, both with source citations.

    +
    + <blockquote cite="//www.wired.com/wired/archive/4.01/white.paper_pr.html" title="Wired: The Copyright Grab (Jan 1996)">
    +
    + <p>Universal and Disney once sued Sony to stop distribution of its videotape machines, arguing that private noncommercial copying of their motion pictures by purchasers of Betamax machines was no more excusable than the theft of a necklace because the thief intended to wear it only at home for noncommercial purposes. The Supreme Court pointed out that the person who steals a necklace deprives its owner of possession and use of the item, whereas the copying of programs off the air <q cite="//cyber.law.harvard.edu/metaschool/Fisher/integrity/Links/Cases/sony.html" title="Sony v. Universal Studios">does not even remotely entail comparable consequences for the copyright owner.</q> The Court held that it was fair use for consumers to copy programs off the air for time&#8208;shifting purposes.</p>
    +
    + </blockquote> +
    +

    You should be able to navigate to the source of both quotations:

    +
    +
    +

    Universal and Disney once sued Sony to stop distribution of its videotape machines, arguing that private noncommercial copying of their motion pictures by purchasers of Betamax machines was no more excusable than the theft of a necklace because the thief intended to wear it only at home for noncommercial purposes. The Supreme Court pointed out that the person who steals a necklace deprives its owner of possession and use of the item, whereas the copying of programs off the air does not even remotely entail comparable consequences for the copyright owner. The Court held that it was fair use for consumers to copy programs off the air for time‐shifting purposes.

    +
    +
    + +

    Related Mozilla bug reports: Metadata.

    +

    Related Internet Explorer bug reports: Channel9 Wiki: Internet Explorer Standards Support.

    +
    +
    + +
    +

    Your Browser

    + +

    Your Web browser identified itself as Wget/1.11.4 when it requested this page.

    + + + +
    + + + diff --git a/smlnj-lib/HTML4/tests/scripts.html b/smlnj-lib/HTML4/tests/scripts.html new file mode 100644 index 0000000..6addaec --- /dev/null +++ b/smlnj-lib/HTML4/tests/scripts.html @@ -0,0 +1,763 @@ + + + Scripts [Robin’s HTML 4.0 Conformance Test] + + + + + + + + + + + + + + + + + +

    Scripts (§18)

    + +
    +

    Noscript nitpicking (§18.3.1)

    + +
    +

    An HTML 4.0 compliant Web browser that cannot render scripts must render contents of a <noscript> element. Compliant Web browsers that can render scripts should render contents of a <noscript> element when scripting is disabled.

    +

    The standard also says Web browsers should render <noscript> after a script it could not render earlier in a document. This feature is of questionable desirability, and is rarely observed in practice.

    +
    + +
    +

    Example:

    + +
    + <script type="application/x-fakescript"><!--
    +
    + // There is no such thing as FakeScript.
    + alert("I should not be running a FakeScript script!");
    + document.writeln('<p><em>This Web browser executed a FakeScript script.<\/em><\/p>');
    + // -->
    +
    + </script>
    + <noscript>
    +
    + <p>Technically, your Web browser should render this line whether or not scripting is enabled.</p>
    +
    + </noscript> +
    + +

    Your Web browser renders it like this:

    +
    + + +
    +
    +
    + + + + + + + + +
    +

    Content types

    + +
    +

    The type attribute specifies the content type of a script (that is, what language the script is in).

    + +

    On 27 June 2005, the IETF has at last registered content types for JavaScript, application/javascript, application/ecmascript, text/javascript (marked obsolete), and text/ecmascript (marked obsolete). Examples in the HTML 4.0 specification incorrectly used then‐unregistered content type text/javascript for JavaScript instead of well‐established though unofficial content type application/x-javascript. Using a subtype of text for code to be executed instead of text for humans to read was a mistake and can cause problems. Examples also incorrectly use text/vbscript (instead of application/x-vbscript) and text/tcl (instead of application/x-tcl).

    +

    The type attribute replaces the ill‐defined language attribute used by older browsers. HTML 4.0 simultaneously introduces language for compatibility, and deprecates it in favor of type.

    +
    + +
    +

    Example:

    + +
    + <div id="validtypes">
    +
    + <p>This Web browser executes scripts with these valid content types:</p>
    +
    + <script type="application/javascript"><!--
    +
    + // Correct but rarely supported type for JavaScript
    + + if (document.createTextNode) {
    +
    + // the standards-compliant DOM way
    + var newPara = document.createElement("p");

    + var textNode = document.createTextNode("\u2022 ");
    + newPara.appendChild(textNode);

    + var codeNode = document.createElement("code");
    + newPara.appendChild(codeNode);

    + textNode = document.createTextNode("application/javascript");
    + codeNode.appendChild(textNode);

    + var div = document.getElementById("validtypes");
    + div.appendChild(newPara);
    +
    + } else {
    +
    + // the old DHTML way
    + document.writeln("<p>&bull; <code>application/javascript<\/code> <i>(without the <abbr>DOM<\/abbr>)<\/i><\/p>");
    +
    + }
    + // -->
    +
    + </script>
    + <script type="application/ecmascript"><!--
    +
    + // Correct but rarely supported type for ECMAscript
    + // Proposed by ECMAScript standard
    + + var newPara = document.createElement("p");

    + var textNode = document.createTextNode("\u2022 ");
    + newPara.appendChild(textNode);

    + var codeNode = document.createElement("code");
    + newPara.appendChild(codeNode);

    + textNode = document.createTextNode("application/ecmascript");
    + codeNode.appendChild(textNode);

    + var div = document.getElementById("validtypes");
    + div.appendChild(newPara);
    + + // -->
    +
    + </script>
    + <script type="application/x-vbscript"><!--
    +
    + ' Correct unregistered type for VBScript
    + option explicit
    + dim newPara, textNode, codeNode, div

    + set newPara = document.createElement("p")
    + set textNode = document.createTextNode(ChrW(&H2022) + " ")
    + newPara.appendChild(textNode)

    + set codeNode = document.createElement("code")
    + newPara.appendChild(codeNode)

    + set textNode = document.createTextNode("application/x-vbscript")
    + codeNode.appendChild(textNode)

    + set div = document.getElementById("validtypes")
    + div.appendChild(newPara)
    + ' -->
    +
    + </script>
    + <script type="application/x-tcl"><!--
    +
    + # Correct unregistered media type for Tcl.
    + # I know of no Web browsers that run Tcl.
    + document writeln "<p>&bull; <i><code>text/tcl<\/code><\/i><\/p>"
    + # -->
    +
    + </script>
    + <script type="application/x-perlscript"><!--
    +
    + # Correct unregistered media type for PerlScript
    + # I know of no Web browsers that run PerlScript,
    + # but some Web servers can generate page content with it.
    + $window->document->writeln("<p>&bull; <code>application/x-perlscript<\/code><\/p>");
    + # -->
    +
    + </script>
    + <script type="application/x-cobolscript"><!--
    +
    + * You youngsters are so spoiled.
    + * Back in the day, we wrote Web pages on punched cards...
    +  IDENTIFICATION DIVISION.
    +  PROGRAM-ID. Test_COBOLScript.
    +  DATA DIVISION.
    +  WORKING-STORAGE DIVISION.
    +  PROCEDURE DIVISION.
    +
    +  INVOKE HOST "document.write" USING BY VALUE "<p>&bull; <code>application/x-cobolscript<".
    +  INVOKE HOST "document.write" USING BY VALUE "/code><".
    +  INVOKE HOST "document.write" USING BY VALUE "/p>"
    +
    +  END PROGRAM.
    + * -->
    +
    + </script>
    +
    + </div> +
    + <div id="obsoletetypes">
    +
    + <p>This Web browser executes scripts with these valid, obsolete content types:</p>
    +
    + <script type="text/javascript"><!--
    +
    + // Historically misused for JavaScript
    + // Registered as obsolete in 2005
    + if (document.createTextNode) {
    +
    + // the standards-compliant DOM way
    + var newPara = document.createElement("p");

    + var textNode = document.createTextNode("\u2022 ");
    + newPara.appendChild(textNode);

    + var codeNode = document.createElement("code");
    + newPara.appendChild(codeNode);

    + textNode = document.createTextNode("text/javascript");
    + codeNode.appendChild(textNode);

    + var div = document.getElementById("obsoletetypes");
    + div.appendChild(newPara);
    +
    + } else {
    +
    + // the old DHTML way
    + document.writeln("<p>&bull; <code>text/javascript<\/code> <i>(without the <abbr>DOM<\/abbr>)<\/i><\/p>");
    +
    + }
    + // -->
    +
    + </script>
    + <script type="text/ecmascript"><!--
    +
    + // Poorly-chosen default for scripts in SVG
    + // Registered as obsolete in 2005
    + + var newPara = document.createElement("p");

    + var textNode = document.createTextNode("\u2022 ");
    + newPara.appendChild(textNode);

    + var codeNode = document.createElement("code");
    + newPara.appendChild(codeNode);

    + textNode = document.createTextNode("text/ecmascript");
    + codeNode.appendChild(textNode);

    + var div = document.getElementById("obsoletetypes");
    + div.appendChild(newPara);
    + + // -->
    +
    + </script>
    + <script type="application/x-javascript"><!--
    +
    + // Widely-accepted unregistered type for JavaScript
    + // Used since the days of Netscape 3.x
    + if (document.createTextNode) {
    +
    + // the standards-compliant DOM way
    + var newPara = document.createElement("p");

    + var textNode = document.createTextNode("\u2022 ");
    + newPara.appendChild(textNode);

    + var codeNode = document.createElement("code");
    + newPara.appendChild(codeNode);

    + textNode = document.createTextNode("application/x-javascript");
    + codeNode.appendChild(textNode);

    + var div = document.getElementById("obsoletetypes");
    + div.appendChild(newPara);
    +
    + } else {
    +
    + // the old DHTML way
    + document.writeln("<p>&bull; <code>application/x-javascript<\/code> <i>(without the <abbr>DOM<\/abbr>)<\/i><\/p>");
    +
    + }
    + // -->
    +
    + </script>
    + <script type="application/x-ecmascript"><!--
    +
    + // Recommended by ECMAScript standard
    + // To be replaced by application/ecmascript
    + + var newPara = document.createElement("p");

    + var textNode = document.createTextNode("\u2022 ");
    + newPara.appendChild(textNode);

    + var codeNode = document.createElement("code");
    + newPara.appendChild(codeNode);

    + textNode = document.createTextNode("application/x-ecmascript");
    + codeNode.appendChild(textNode);

    + var div = document.getElementById("obsoletetypes");
    + div.appendChild(newPara);
    + + // -->
    +
    + </script>
    +
    + </div> +
    + <div id="invalidtypes">
    +
    + <p>This Web browser executes scripts with these unregistered content types:</p>
    +
    + <script type="text/fakescript"><!--
    +
    + alert("I should not be running a text/fakescript script!");
    + document.writeln('<p>&bull; <em>text/fakescript<\/em><\/p>');
    + // -->
    +
    + </script>
    + <script type="text/jscript"><!--
    +
    + // Microsoft's proprietary version
    + + if (document.createTextNode) {
    +
    + // the standards-compliant DOM way
    + var newPara = document.createElement("p");

    + var textNode = document.createTextNode("\u2022 ");
    + newPara.appendChild(textNode);

    + var codeNode = document.createElement("code");
    + newPara.appendChild(codeNode);

    + textNode = document.createTextNode("text/jscript");
    + codeNode.appendChild(textNode);

    + var div = document.getElementById("invalidtypes");
    + div.appendChild(newPara);
    +
    + } else {
    +
    + // the old DHTML way
    + document.writeln("<p>&bull; <code>text/jscript<\/code> <i>(without the <abbr>DOM<\/abbr>)<\/i><\/p>");
    +
    + }
    + // -->
    +
    + </script>
    + <script type="text/livescript"><!--
    +
    + // iCab's alias
    + + if (document.createTextNode) {
    +
    + // the standards-compliant DOM way
    + var newPara = document.createElement("p");

    + var textNode = document.createTextNode("\u2022 ");
    + newPara.appendChild(textNode);

    + var codeNode = document.createElement("code");
    + newPara.appendChild(codeNode);

    + textNode = document.createTextNode("text/livescript");
    + codeNode.appendChild(textNode);

    + var div = document.getElementById("invalidtypes");
    + div.appendChild(newPara);
    +
    + } else {
    +
    + // the old DHTML way
    + document.writeln("<p>&bull; <code>text/livescript<\/code> <i>(without the <abbr>DOM<\/abbr>)<\/i><\/p>");
    +
    + }
    + // -->
    +
    + </script>
    + <script type="text/vbscript"><!--
    +
    + + option explicit
    + dim newPara, textNode, codeNode, div

    + set newPara = document.createElement("p")
    + set textNode = document.createTextNode(ChrW(&H2022) + " ")
    + newPara.appendChild(textNode)

    + set codeNode = document.createElement("code")
    + newPara.appendChild(codeNode)

    + set textNode = document.createTextNode("text/vbscript")
    + codeNode.appendChild(textNode)

    + set div = document.getElementById("invalidtypes")
    + div.appendChild(newPara)
    + ' -->
    +
    + </script>
    + <script type="text/vbs"><!--
    +
    + + option explicit
    + dim newPara, textNode, codeNode, div

    + set newPara = document.createElement("p")
    + set textNode = document.createTextNode(ChrW(&H2022) + " ")
    + newPara.appendChild(textNode)

    + set codeNode = document.createElement("code")
    + newPara.appendChild(codeNode)

    + set textNode = document.createTextNode("text/vbs")
    + codeNode.appendChild(textNode)

    + set div = document.getElementById("invalidtypes")
    + div.appendChild(newPara)
    + ' -->
    +
    + </script>
    + <script type="text/tcl"><!--
    +
    + # I know of no Web browsers that run Tcl.
    + # The standard mentions text/tcl as an example value
    + # for the Content-Script-Type header.
    + document writeln "<p>&bull; <i><code>text/tcl<\/code><\/i><\/p>"
    + # -->
    +
    + </script>
    +
    + </div> +
    + +

    Your Web browser renders it like this:

    +
    +
    +

    This Web browser executes scripts with these valid content types:

    + + + + + + + +
    +
    +

    This Web browser executes scripts with these valid, obsolete content types:

    + + + + + +
    +
    +

    This Web browser executes scripts with these unregistered content types:

    + + + + + + + + +
    +
    +

    Related Mozilla bug reports: text-ecmascript.

    +
    +
    + +
    +

    Deferring execution

    + +
    +

    The defer attribute marks scripts that may be executed after the rest of a page has finished rendering. It is used for scripts that do not generate document content (using methods like document.writeln() functions in the scripts above).

    + +

    Deferring document.writeln() functions until a page is fully loaded would cause generated content to be appended to the end of the page. However, one could defer a script that used DOM methods to insert content within a particular element in a document.

    +
    + +
    +

    Example:

    + +
    + <script type="text/javascript"><!--
    +
    + var defer_support = "used";
    + // -->
    +
    + </script>
    + <script type="text/javascript" defer><!--
    +
    + // may be executed after the following script
    + var defer_support = "ignored";
    + // -->
    +
    + </script>
    + <script type="text/javascript"><!--
    +
    + document.writeln("<p>The <code>defer<\/code> attribute was ", defer_support, ".<\/p>");
    + // -->
    +
    + </script>
    +
    + +

    Your Web browser renders it like this:

    +
    + + + +
    + +

    Related Mozilla bug reports: Defer.

    +

    Related Internet Explorer bug reports: Channel9 Wiki: Internet Explorer Standards Support.

    +
    +
    + +
    +

    Your Browser

    + +

    Your Web browser identified itself as Wget/1.11.4 when it requested this page.

    + + + +
    + + + + + diff --git a/smlnj-lib/HTML4/tests/smldec-to-html.cm b/smlnj-lib/HTML4/tests/smldec-to-html.cm new file mode 100644 index 0000000..32a5cc1 --- /dev/null +++ b/smlnj-lib/HTML4/tests/smldec-to-html.cm @@ -0,0 +1,17 @@ +(* ______________________________________________________________________ + smldec-to-html.cm + ______________________________________________________________________ *) + +Group is + $/basis.cm + $/smlnj-lib.cm + $smlnj/compiler/current.cm + $smlnj/viscomp/core.cm + $smlnj/viscomp/basics.cm + ../html4-lib.cm + + smldec-to-html.sml + +(* ______________________________________________________________________ + End of smldec-to-html.cm + ______________________________________________________________________ *) diff --git a/smlnj-lib/HTML4/tests/smldec-to-html.sml b/smlnj-lib/HTML4/tests/smldec-to-html.sml new file mode 100644 index 0000000..ceca189 --- /dev/null +++ b/smlnj-lib/HTML4/tests/smldec-to-html.sml @@ -0,0 +1,412 @@ +(* ______________________________________________________________________ + smldec-to-html.sml + ______________________________________________________________________ *) + +structure Test = struct + +structure H4U = HTML4Utils + +structure H4T = HTML4Tokens + +structure H4TU = HTML4TokenUtils + +structure H4P = HTML4Parser + +(* ____________________________________________________________ *) +(* Most of the following set of functions were automatically + generated, with additional pattern matching and recursion added in an + ad hoc fashion to arrive at a string H4U.parsetree that approximately + shows a structure and the functions it defines. + *) + +local + open Ast +in + fun handleFixitem (handleItem : 'a -> string H4U.parsetree) + ({item, fixity, region} : 'a fixitem) = + H4U.Nd(Atom.atom "fixity", + [H4U.Lf (case fixity of SOME sym => Symbol.name sym + | NONE => "NONE"), + handleItem item]) + + fun handleSigConst _ NoSig = H4U.Nd(Atom.atom "NoSig", nil) + | handleSigConst handleElem (Opaque elem) = + H4U.Nd(Atom.atom "Opaque", [handleElem elem]) + | handleSigConst handleElem (Transparent elem) = + H4U.Nd(Atom.atom "Transparent", [handleElem elem]) + and handleExp (AndalsoExp _) = H4U.Nd(Atom.atom "AndalsoExp", nil) + | handleExp (AppExp _) = H4U.Nd(Atom.atom "AppExp", nil) + | handleExp (CaseExp _) = H4U.Nd(Atom.atom "CaseExp", nil) + | handleExp (CharExp _) = H4U.Nd(Atom.atom "CharExp", nil) + | handleExp (ConstraintExp _) = H4U.Nd(Atom.atom "ConstraintExp", nil) + | handleExp (FlatAppExp exps) = + H4U.Nd(Atom.atom "FlatAppExp", map (handleFixitem handleExp) exps) + | handleExp (FnExp _) = H4U.Nd(Atom.atom "FnExp", nil) + | handleExp (HandleExp _) = H4U.Nd(Atom.atom "HandleExp", nil) + | handleExp (IfExp _) = H4U.Nd(Atom.atom "IfExp", nil) + | handleExp (IntExp _) = H4U.Nd(Atom.atom "IntExp", nil) + | handleExp (LetExp _) = H4U.Nd(Atom.atom "LetExp", nil) + | handleExp (ListExp _) = H4U.Nd(Atom.atom "ListExp", nil) + | handleExp (MarkExp (theexp, _)) = H4U.Nd(Atom.atom "MarkExp", + [handleExp theexp]) + | handleExp (OrelseExp _) = H4U.Nd(Atom.atom "OrelseExp", nil) + | handleExp (RaiseExp _) = H4U.Nd(Atom.atom "RaiseExp", nil) + | handleExp (RealExp _) = H4U.Nd(Atom.atom "RealExp", nil) + | handleExp (RecordExp _) = H4U.Nd(Atom.atom "RecordExp", nil) + | handleExp (SelectorExp _) = H4U.Nd(Atom.atom "SelectorExp", nil) + | handleExp (SeqExp _) = H4U.Nd(Atom.atom "SeqExp", nil) + | handleExp (StringExp _) = H4U.Nd(Atom.atom "StringExp", nil) + | handleExp (TupleExp _) = H4U.Nd(Atom.atom "TupleExp", nil) + | handleExp (VarExp _) = H4U.Nd(Atom.atom "VarExp", nil) + | handleExp (VectorExp _) = H4U.Nd(Atom.atom "VectorExp", nil) + | handleExp (WhileExp _) = H4U.Nd(Atom.atom "WhileExp", nil) + | handleExp (WordExp _) = H4U.Nd(Atom.atom "WordExp", nil) + and handleRule (Rule _) = H4U.Nd(Atom.atom "Rule", nil) + and handlePat (AppPat _) = H4U.Nd(Atom.atom "AppPat", nil) + | handlePat (CharPat _) = H4U.Nd(Atom.atom "CharPat", nil) + | handlePat (ConstraintPat _) = H4U.Nd(Atom.atom "ConstraintPat", nil) + | handlePat (FlatAppPat _) = H4U.Nd(Atom.atom "FlatAppPat", nil) + | handlePat (IntPat _) = H4U.Nd(Atom.atom "IntPat", nil) + | handlePat (LayeredPat _) = H4U.Nd(Atom.atom "LayeredPat", nil) + | handlePat (ListPat _) = H4U.Nd(Atom.atom "ListPat", nil) + | handlePat (MarkPat _) = H4U.Nd(Atom.atom "MarkPat", nil) + | handlePat (OrPat _) = H4U.Nd(Atom.atom "OrPat", nil) + | handlePat (RecordPat _) = H4U.Nd(Atom.atom "RecordPat", nil) + | handlePat (StringPat _) = H4U.Nd(Atom.atom "StringPat", nil) + | handlePat (TuplePat _) = H4U.Nd(Atom.atom "TuplePat", nil) + | handlePat (VarPat _) = H4U.Nd(Atom.atom "VarPat", nil) + | handlePat (VectorPat _) = H4U.Nd(Atom.atom "VectorPat", nil) + | handlePat WildPat = H4U.Nd(Atom.atom "WildPat", nil) + | handlePat (WordPat _) = H4U.Nd(Atom.atom "WordPat", nil) + and handleStrexp (AppStr _) = H4U.Nd(Atom.atom "AppStr", nil) + | handleStrexp (AppStrI _) = H4U.Nd(Atom.atom "AppStrI", nil) + | handleStrexp (BaseStr thedec) = H4U.Nd(Atom.atom "BaseStr", + [handleDec thedec]) + | handleStrexp (ConstrainedStr _) = H4U.Nd(Atom.atom "ConstrainedStr", + nil) + | handleStrexp (LetStr _) = H4U.Nd(Atom.atom "LetStr", nil) + | handleStrexp (MarkStr (thestr, _)) = H4U.Nd(Atom.atom "MarkStr", + [handleStrexp thestr]) + | handleStrexp (VarStr _) = H4U.Nd(Atom.atom "VarStr", nil) + and handleFctexp (AppFct _) = H4U.Nd(Atom.atom "AppFct", nil) + | handleFctexp (BaseFct _) = H4U.Nd(Atom.atom "BaseFct", nil) + | handleFctexp (LetFct _) = H4U.Nd(Atom.atom "LetFct", nil) + | handleFctexp (MarkFct _) = H4U.Nd(Atom.atom "MarkFct", nil) + | handleFctexp (VarFct _) = H4U.Nd(Atom.atom "VarFct", nil) + and handleWherespec (WhStruct _) = H4U.Nd(Atom.atom "WhStruct", nil) + | handleWherespec (WhType _) = H4U.Nd(Atom.atom "WhType", nil) + and handleSigexp (AugSig _) = H4U.Nd(Atom.atom "AugSig", nil) + | handleSigexp (BaseSig _) = H4U.Nd(Atom.atom "BaseSig", nil) + | handleSigexp (MarkSig _) = H4U.Nd(Atom.atom "MarkSig", nil) + | handleSigexp (VarSig _) = H4U.Nd(Atom.atom "VarSig", nil) + and handleFsigexp (BaseFsig _) = H4U.Nd(Atom.atom "BaseFsig", nil) + | handleFsigexp (MarkFsig _) = H4U.Nd(Atom.atom "MarkFsig", nil) + | handleFsigexp (VarFsig _) = H4U.Nd(Atom.atom "VarFsig", nil) + and handleSpec (DataSpec _) = H4U.Nd(Atom.atom "DataSpec", nil) + | handleSpec (ExceSpec _) = H4U.Nd(Atom.atom "ExceSpec", nil) + | handleSpec (FctSpec _) = H4U.Nd(Atom.atom "FctSpec", nil) + | handleSpec (IncludeSpec _) = H4U.Nd(Atom.atom "IncludeSpec", nil) + | handleSpec (MarkSpec _) = H4U.Nd(Atom.atom "MarkSpec", nil) + | handleSpec (ShareStrSpec _) = H4U.Nd(Atom.atom "ShareStrSpec", nil) + | handleSpec (ShareTycSpec _) = H4U.Nd(Atom.atom "ShareTycSpec", nil) + | handleSpec (StrSpec _) = H4U.Nd(Atom.atom "StrSpec", nil) + | handleSpec (TycSpec _) = H4U.Nd(Atom.atom "TycSpec", nil) + | handleSpec (ValSpec _) = H4U.Nd(Atom.atom "ValSpec", nil) + and handleDec (AbsDec _) = H4U.Nd(Atom.atom "AbsDec", nil) + | handleDec (AbstypeDec _) = H4U.Nd(Atom.atom "AbstypeDec", nil) + | handleDec (DatatypeDec _) = H4U.Nd(Atom.atom "DatatypeDec", nil) + | handleDec (ExceptionDec _) = H4U.Nd(Atom.atom "ExceptionDec", nil) + | handleDec (FctDec _) = H4U.Nd(Atom.atom "FctDec", nil) + | handleDec (FixDec _) = H4U.Nd(Atom.atom "FixDec", nil) + | handleDec (FsigDec _) = H4U.Nd(Atom.atom "FsigDec", nil) + | handleDec (FunDec (fbs, tyvars)) = + H4U.Nd(Atom.atom "FunDec", [H4U.Nd(Atom.atom "fbs", map handleFb fbs), + H4U.Nd(Atom.atom "tyvars", + map handleTyvar tyvars)]) + | handleDec (LocalDec (dec1, dec2)) = + H4U.Nd(Atom.atom "LocalDec", [handleDec dec1, handleDec dec2]) + | handleDec (MarkDec (thedec, _)) = H4U.Nd(Atom.atom "MarkDec", + [handleDec thedec]) + | handleDec (OpenDec _) = H4U.Nd(Atom.atom "OpenDec", nil) + | handleDec (OvldDec _) = H4U.Nd(Atom.atom "OvldDec", nil) + | handleDec (SeqDec decs) = H4U.Nd(Atom.atom "SeqDec", + map handleDec decs) + | handleDec (SigDec sigbs) = H4U.Nd(Atom.atom "SigDec", + map handleSigb sigbs) + | handleDec (StrDec strbs) = H4U.Nd(Atom.atom "StrDec", + map handleStrb strbs) + | handleDec (TypeDec _) = H4U.Nd(Atom.atom "TypeDec", nil) + | handleDec (ValDec _) = H4U.Nd(Atom.atom "ValDec", nil) + | handleDec (ValrecDec _) = H4U.Nd(Atom.atom "ValrecDec", nil) + and handleVb (MarkVb _) = H4U.Nd(Atom.atom "MarkVb", nil) + | handleVb (Vb _) = H4U.Nd(Atom.atom "Vb", nil) + and handleRvb (MarkRvb _) = H4U.Nd(Atom.atom "MarkRvb", nil) + | handleRvb (Rvb _) = H4U.Nd(Atom.atom "Rvb", nil) + and handleFb (Fb (clauses, flag)) = + H4U.Nd(Atom.atom "Fb", (map handleClause clauses) @ + [if flag then H4U.Lf "true" + else H4U.Lf "false"]) + | handleFb (MarkFb (thefb, _)) = H4U.Nd(Atom.atom "MarkFb", + [handleFb thefb]) + and handleClause (Clause {exp, pats, resultty}) = + H4U.Nd(Atom.atom "Clause", [ + H4U.Nd (Atom.atom "pats", map (handleFixitem handlePat) pats), + H4U.Nd (Atom.atom "exp", [handleExp exp]), + H4U.Nd (Atom.atom "resultty", [ + case resultty of SOME tyast => handleTy tyast + | NONE => H4U.Lf "NONE"]) + ]) + and handleTb (MarkTb _) = H4U.Nd(Atom.atom "MarkTb", nil) + | handleTb (Tb _) = H4U.Nd(Atom.atom "Tb", nil) + and handleDb (Db _) = H4U.Nd(Atom.atom "Db", nil) + | handleDb (MarkDb _) = H4U.Nd(Atom.atom "MarkDb", nil) + and handleDbrhs (Constrs _) = H4U.Nd(Atom.atom "Constrs", nil) + | handleDbrhs (Repl _) = H4U.Nd(Atom.atom "Repl", nil) + and handleEb (EbDef _) = H4U.Nd(Atom.atom "EbDef", nil) + | handleEb (EbGen _) = H4U.Nd(Atom.atom "EbGen", nil) + | handleEb (MarkEb _) = H4U.Nd(Atom.atom "MarkEb", nil) + and handleStrb (MarkStrb (thestrb, _)) = H4U.Nd(Atom.atom "MarkStrb", + [handleStrb thestrb]) + | handleStrb (Strb {name, constraint, def}) = + H4U.Nd(Atom.atom "Strb", + [H4U.Nd(Atom.atom "name", [H4U.Lf (Symbol.name name)]), + H4U.Nd(Atom.atom "constraint", [handleSigConst handleSigexp + constraint]), + H4U.Nd(Atom.atom "def", [handleStrexp def])]) + and handleFctb (Fctb _) = H4U.Nd(Atom.atom "Fctb", nil) + | handleFctb (MarkFctb _) = H4U.Nd(Atom.atom "MarkFctb", nil) + and handleSigb (MarkSigb _) = H4U.Nd(Atom.atom "MarkSigb", nil) + | handleSigb (Sigb _) = H4U.Nd(Atom.atom "Sigb", nil) + and handleFsigb (Fsigb _) = H4U.Nd(Atom.atom "Fsigb", nil) + | handleFsigb (MarkFsigb _) = H4U.Nd(Atom.atom "MarkFsigb", nil) + and handleTyvar (MarkTyv (thetyv, _)) = H4U.Nd(Atom.atom "MarkTyv", + [handleTyvar thetyv]) + | handleTyvar (Tyv _) = H4U.Nd(Atom.atom "Tyv", nil) + and handleTy (ConTy _) = H4U.Nd(Atom.atom "ConTy", nil) + | handleTy (MarkTy _) = H4U.Nd(Atom.atom "MarkTy", nil) + | handleTy (RecordTy _) = H4U.Nd(Atom.atom "RecordTy", nil) + | handleTy (TupleTy _) = H4U.Nd(Atom.atom "TupleTy", nil) + | handleTy (VarTy _) = H4U.Nd(Atom.atom "VarTy", nil) +end + +(* ____________________________________________________________ *) + +val tokIsSpace = H4P.tokIsSpace + +fun filterSpaceFromParseStream strm = + let fun pred (H4U.VisitT tok) = not (tokIsSpace tok) + | pred _ = true + in H4U.stream_filter pred strm end + +fun tokIsOpenTag tok = String.isPrefix "START" (HTML4Tokens.toString tok) + +fun tokIsCloseTag tok = String.isPrefix "END" (HTML4Tokens.toString tok) + +(* ____________________________________________________________ *) + +val templateStream = + let val instrm = TextIO.openIn "template.html" + val template_pt_opt = HTML4Parser.parseStream instrm + in + TextIO.closeIn instrm; + case template_pt_opt of + SOME (H4U.Nd(_, children)) => + H4U.stream_concatl (map H4U.parsetreeToVisitationStream children) + | _ => H4U.StreamNil + end handle ex => H4U.StreamNil + +(* ____________________________________________________________ *) + +exception IllFormedHTMLParseStream of H4T.token H4U.parsevisitation H4U.stream + +fun outputHTMLParseStream (istrm, ostrm) = + let fun visit (H4U.EnterNT _, indent) = indent ^ " " + | visit (H4U.ExitNT _, indent) = String.extract(indent, 1, NONE) + | visit (H4U.VisitT tok, indent) = + (TextIO.output(ostrm, + String.concat [indent, H4TU.tokToString tok, "\n"]); + indent) + val _ = H4U.stream_foldl visit "" istrm + in () end + +structure PP = PrettyPrint + +fun ppHTMLParseStream ppstrm istrm = + let fun visit (H4U.EnterNT _) = + PP.openHVBox ppstrm (PP.Rel 2) + | visit (H4U.ExitNT _) = + PP.closeBox ppstrm + | visit (H4U.VisitT tok) = + (PP.string ppstrm (H4TU.tokToString tok); + PP.cut ppstrm) + val _ = H4U.stream_app visit istrm + in () end + +(* __________________________________________________ *) + +(* The following was an attempt at a fancier pretty printer, but it +was not meant to be. *) + +fun ppHTMLParseStream' ppstrm istrm = + let exception BadStream + fun do_closes 0 = () + | do_closes n = (PP.closeBox ppstrm; do_closes (n - 1)) + fun visit (H4U.EnterNT _, (opens, openstk)) = + (PP.openHVBox ppstrm (PP.Rel 1); (1, opens::openstk)) + | visit (H4U.ExitNT _, (opens, opens'::openstk)) = + (do_closes opens; (opens', openstk)) + | visit (H4U.ExitNT _, (_, [])) = raise BadStream + | visit (H4U.VisitT tok, (opens, openstk)) = + let val opens' = ref opens + in + if tokIsCloseTag tok then ( + PP.closeBox ppstrm; + PP.newline ppstrm; + opens' := (!opens') - 1) + else (); + PP.string ppstrm (H4TU.tokToString tok); + if tokIsOpenTag tok then ( + PP.newline ppstrm; + PP.openHVBox ppstrm (PP.Rel 1); + opens' := (!opens') + 1) + else PP.space ppstrm 1; + (!opens', openstk) + end + val _ = H4U.stream_foldl visit (0,[]) istrm + handle BadStream => raise IllFormedHTMLParseStream istrm + in () end + +(* ____________________________________________________________ *) + +exception NotPossible + +structure CommentMap = ListMapFn(struct + type ord_key = String.string + val compare = String.compare + end) + +fun commentFilter commentMap = + let fun guard (HTML4Tokens.COMMENT comStr) = + CommentMap.inDomain(commentMap, comStr) + | guard _ = false + fun mapper (HTML4Tokens.COMMENT comStr) = + CommentMap.lookup (commentMap, comStr) + | mapper _ = raise NotPossible + in H4U.parsetreeStreamMapTStream(guard, mapper) end + +(* ____________________________________________________________ *) + +fun parseFile filename = + let val stream = TextIO.openIn filename + val source = Source.newSource(filename, 1, stream, false, + ErrorMsg.defaultConsumer()) + val result = SmlFile.parse source + in Source.closeSource source; result end + +(* ____________________________________________________________ *) + +val aEm = Atom.atom "em" +val aUl = Atom.atom "ul" +val aLi = Atom.atom "li" + +(* Here is a "simple" "little" example of many to many stream transduction. *) + +fun scrubEmptyULs (orig as H4U.StreamCons(orig_enter as H4U.EnterNT ntAtom, + tl_thunk)) = + if Atom.same(aUl, ntAtom) then let + val thunk_val = tl_thunk () + in case thunk_val of + H4U.StreamCons(orig_start as H4U.VisitT (H4T.STARTUL _), + tl_thunk') => + let val thunk_val' = tl_thunk' () + in case thunk_val' of + H4U.StreamCons(H4U.VisitT H4T.ENDUL, tl_thunk'') => + let val thunk_val'' = tl_thunk'' () + in case thunk_val'' of + H4U.StreamCons(H4U.ExitNT ntAtom, tl_thunk''') => + if Atom.same(aUl, ntAtom) + then scrubEmptyULs (tl_thunk'''()) + else raise IllFormedHTMLParseStream orig + | _ => raise IllFormedHTMLParseStream orig + end + | _ => let + fun new_thunk' () = scrubEmptyULs thunk_val' + fun new_thunk () = H4U.StreamCons(orig_start, + new_thunk') + in H4U.StreamCons(orig_enter, new_thunk) end + end + | _ => raise IllFormedHTMLParseStream orig + end + else H4U.StreamCons(orig_enter, fn () => scrubEmptyULs (tl_thunk ())) + | scrubEmptyULs (H4U.StreamCons (orig, tl_thunk)) = + H4U.StreamCons(orig, fn () => scrubEmptyULs (tl_thunk ())) + | scrubEmptyULs (orig as H4U.StreamNil) = orig + +(* ____________________________________________________________ *) + +fun handleFile filename = + let val intree = parseFile filename + val decStrm = H4U.parsetreeToVisitationStream (handleDec intree) + fun ptStrmToHTMLPtStrm _ (H4U.EnterNT ntAtom) = + H4U.stream_fromList [ + H4U.EnterNT aLi, + H4U.VisitT (H4T.STARTLI("
  • ", [])), + H4U.EnterNT aEm, + H4U.VisitT (H4T.STARTEM("", [])), + H4U.VisitT (H4T.PCDATA (Atom.toString ntAtom)), + H4U.VisitT H4T.ENDEM, + H4U.ExitNT aEm, + H4U.EnterNT aUl, + H4U.VisitT (H4T.STARTUL("
      ", [])) + ] + | ptStrmToHTMLPtStrm _ (H4U.ExitNT ntAtom) = + H4U.stream_fromList [ + H4U.VisitT H4T.ENDUL, + H4U.ExitNT aUl, + H4U.VisitT H4T.ENDLI, + H4U.ExitNT aLi + ] + | ptStrmToHTMLPtStrm tokToString (H4U.VisitT tok) = + H4U.stream_fromList [ + H4U.EnterNT aLi, + H4U.VisitT (H4T.STARTLI("
    • ", [])), + H4U.VisitT (H4T.PCDATA (tokToString tok)), + H4U.VisitT H4T.ENDLI, + H4U.ExitNT aLi + ] + val decHTMLStrm = + H4U.stream_concatl [ + H4U.stream_fromList [H4U.EnterNT aUl, + H4U.VisitT (H4T.STARTUL (" x)) + decStrm), + H4U.stream_fromList [H4U.VisitT H4T.ENDUL, + H4U.ExitNT aUl]] + val commentMap = + foldl CommentMap.insert' CommentMap.empty + [("", + H4U.stream_singleton (H4U.VisitT (HTML4Tokens.PCDATA + filename))), + ("", + H4U.stream_singleton (H4U.VisitT (HTML4Tokens.PCDATA + filename))), + ("", decHTMLStrm) + ] + val filterTemplate = + (commentFilter commentMap) o filterSpaceFromParseStream + val outstream = TextIO.openOut (filename ^ ".html") + val _ = outputHTMLParseStream(filterTemplate templateStream, outstream) + in TextIO.closeOut outstream end + +(* ____________________________________________________________ + Main routine. + *) + +fun main (_, args) = (List.app handleFile args; OS.Process.success) + handle ex => OS.Process.failure + +end + +(* ______________________________________________________________________ + End of smldec-to-html.sml + ______________________________________________________________________ *) diff --git a/smlnj-lib/HTML4/tests/spchars.html b/smlnj-lib/HTML4/tests/spchars.html new file mode 100644 index 0000000..9c412af --- /dev/null +++ b/smlnj-lib/HTML4/tests/spchars.html @@ -0,0 +1,249 @@ + + + Special Characters [Robin’s HTML 4.0 Conformance Test] + + + + + + + + + + + + + + + + + +

      Special Characters

      + +
      +

      Soft hyphens (§9.3.3)

      + +
      +

      A soft hyphen (&shy;) indicates where an optional word break may occur. When a soft hyphen breaks a word between one line and the next, a hyphen character is displayed at the end of the first line. When a soft hyphen does not break a word between lines, the hyphen must not be displayed.

      + +

      Soft hyphens are vital for text that must be displayed on a tiny screen or in a narrow frame. Web browsers have no excuse for rendering them incorrectly, when they can be minimally compliant by ignoring them completely.

      + +

      Although technically, the &shy; character entity was defined in HTML 3.2, I’ll treat soft hyphens as a new feature of HTML 4.0. Until HTML 4.0 explicitly spelled out how they should work, soft hyphens had ambiguous semantics and a history of contradictory interpretations.

      +

      In addition to the soft hyphen, there is also a hard hyphen (&#8208; or &#x2010;) which always renders, and a nonbreaking hyphen character (&#8209; or &#x2011;), for hyphens that do not break words across lines.

      +
      + +
      +

      Example:

      +

      For the following test, you may have to resize your screen or window so that a hyphenated word could be broken at the end of a line. (Widths of 80 columns and 640 pixels worked in local testing.)

      + + +
      + The 1992 <cite>Guinness Book of World Records</cite> calls the 29&#8208;letter

      + <em>floc&shy;ci&shy;nau&shy;ci&shy;
      + ni&shy;hil&shy;i&shy;pil&shy;i&shy;
      + fi&shy;ca&shy;tion</em>

      + <q>the longest real word in <cite>Oxford English Dictionary</cite></q>, dismissing the 45&#8208;letter

      + <em>pneu&shy;mo&shy;no&shy;ul&shy;tra&shy;
      + mi&shy;cro&shy;scop&shy;ic&shy;
      + sili&shy;co&shy;vol&shy;ca&shy;no&shy;
      + co&shy;ni&shy;o&shy;sis</em>

      + as <q>the longest made&#8209;up word in the <cite>Oxford English Dictionary</cite></q>. +
      +

      Line breaks have been added to the above source for readability. The sample below consists of a single line.

      + +

      Your Web browser renders it like this:

      +
      + The 1992 Guinness Book of World Records calls the 29‐letter floc­ci­nau­ci­ni­hil­i­pil­i­fi­ca­tion the longest real word in the Oxford English Dictionary, dismissing the 45‐letter pneu­mo­no­ultra­mi­cro­scop­ic­sili­co­vol­ca­no­co­ni­o­sis as the longest made‑up word in the Oxford English Dictionary. +
      + +

      Related Mozilla bug reports: shy.

      +

      Related Konqueror bug reports: #33798, #33855.

      +
      +
      + +
      +

      En spaces, em spaces, and thin spaces (§24)

      + +
      +

      HTML 4.0 has named entities for three fixed‐width spaces: the en space, the em space, and the thin space. Unlike ordinary spaces, which may vary in width when text is justified, the en, em, and thin spaces should not change in width.

      +

      The fixed‐width spaces are not white space characters, so two of them in sequence should not collapse into a single space. They should not be replaced by line breaks at the end of the line, though line breaks may occur immediately after them.

      +
      + +
      +

      Example:

      + +
      + <dl>
      +
      + <dt>space
      + <dd>The width of a space varies with the display font.
      + <dt>&amp;thinsp;
      + <dd>MathML&thinsp;defines&thinsp;thin&thinsp;spaces
      + &thinsp;as&thinsp;spaces&thinsp;of&thinsp;width
      + &thinsp;3&frasl;18&thinsp;as&thinsp;wide&thinsp;as
      + &thinsp;an&thinsp;em&thinsp;space.
      + <dt>&amp;ensp;
      + <dd>En&ensp;spaces&ensp;are&ensp;&frac12;&ensp;
      + as&ensp;wide&ensp;as&ensp;
      + an&ensp;em&ensp;space.
      + <dt>&amp;emsp;
      + <dd>The&emsp;width&emsp;of&emsp;an&emsp;em&emsp;
      + space&emsp;is&emsp;traditionally&emsp;
      + equal&emsp;to&emsp;the&emsp;point&emsp;size.
      +
      + </dl> +
      +

      Line breaks have been added above for readability.

      + +

      Your Web browser renders it like this:

      +
      +
      +
      space +
      The width of a space varies with the display font. +
      &thinsp; +
      MathML defines thin spaces as spaces 3⁄18 as wide as an em space. +
      &ensp; +
      En spaces are ½ as wide as an em space. +
      &emsp; +
      The width of an em space is traditionally equal to the point size. +
      +
      +
      +
      + +
      +

      Zero‐width spaces (§9.1)

      + +
      +

      Long lines usually wrap at spaces between words, but in languages without spaces between words (like Thai), sentences may appear as if they were one continuous word.

      +

      Zero‐width spaces put “invisible spaces” between words where they can wrap to the next line.[1]. Zero‐width spaces divide long sequences of characters into smaller units that may wrap from one line to the next.

      +

      HTML 4.0 lacks a character entity name like &zws;, so we must use a numeric reference like &#8203; or &#x200B;. (MathML uses the entity name &ZeroWidthSpace; for this character.)

      +

      Zero‐width spaces function similarly to the proprietary <wbr> word break element in early versions of Netscape.

      +

      [1] Invisible in theory, anyway. Some Web browsers display zero‐width spaces as a visible unknown‐character glyph, which is technically not incorrect. Perhaps a future version of the standard will mandate how zero‐width spaces should be rendered, as HTML 4.0 does with soft hyphens.

      +
      + +
      +

      Example:

      +

      The following sentence contains a very long number, in which I’ve helpfully included zero‐width spaces every 5 digits. In a visual Web browser that doesn’t treat zero‐width spaces as white space, this page will probably scroll horizontally.

      + + +
      + &pi;=3.14159&#8203;26535&#8203;89793&#8203;
      + 23846&#8203;26433&#8203;83279&#8203;50288&#8203;
      + 41971&#8203;69399&#8203;37510&#8203;58209&#8203;
      + 74944&#8203;59230&#8203;78164&#8203;06286&#8203;
      + 20899&#8203;86280&#8203;34825&#8203;34211&#8203;
      + 70679&hellip; +
      +

      Line breaks have been added to the above source for readability. The sample below consists of a single line.

      + +

      Your Web browser renders it like this:

      +
      + π=3.14159​26535​89793​23846​26433​83279​50288​41971​69399​37510​58209​74944​59230​78164​06286​20899​86280​34825​34211​70679… +
      + +

      Related Mozilla bug reports: zws.

      +

      Related Konqueror bug reports: #29575.

      +
      +
      + +
      +

      Joining Controls (§8.2.5)

      + +
      +

      In Arabic scripts, individual characters join with following ones. However, sometimes Web browsers must be informed to join characters that normally do not, or not to join characters that normally do.

      +

      The &zwnj; entity prevents joining where joining would occur, but should not. The &zwj; entity forces joining when it would not occur, but should.

      +
      + +
      +

      Example:

      +

      Here is an example of &zwj; and &zwnj; being used with Devanagari characters.

      + +
      + <p>&#2325;&#2381; + &#2340; = &#2325;&#2381;&#2340; (a glyph of kta)</p>
      + <p>&#2325;&#2381; + &amp;zwj; + &#2340; = &#2325;&#2381;&zwj;&#2340; (half&#8208;ka and ta)</p>
      + <p>&#2325;&#2381; + &amp;zwnj; + &#2340; = &#2325;&#2381;&zwnj;&#2340; (ka&#8208;halant and ta)</p> +
      +

      Your Web browser renders it like this:

      +
      +

      क् + त = क्त (a glyph of kta)

      +

      क् + &zwj; + त = क्‍त (half‐ka and ta)

      +

      क् + &zwnj; + त = क्‌त (ka‐halant and ta)

      +
      + +

      Related Mozilla bug reports: #202352.

      +
      +
      + +
      +

      Ligatures

      + +
      +

      In English‐language text, &zwj; may be used to form ligatures.

      +

      The MacOS character sets contains ligatures for “fi” and “fl”. Many fonts developed for both MacOS and Microsoft Windows contain the five main f‐ligatures.

      + + + + + + + + + + + + +
      LigatureEntityCharacter
      ff&#64256;
      fi&#64257;
      fl&#64258;
      ffi&#64259;
      ffl&#64260;
      st&#64262;
      + +

      However, using these character numbers with a typeface that does not support them could result in unknown‐character glyphs. Instead, you could request these ligatures with zero‐width joiners, allowing Web browsers that cannot generate them to gracefully degrade to unjoined characters.

      +

      Zero‐width joiners may also request ligatures without official characters in Unicode. Germanic typefaces sometimes have traditional ligatures for “ch”, “ck”, and “tz”. Adobe makes some fancy OpenType fonts with ligatures for “fj”, “ffj”, “Th”, “ct”, and “sp”.

      + +

      Using &zwj; to form ligatures in Latin text is controversial. Some think &zwj; should be used for ligatures only in contexts that absolutely require them, and consider it an abuse to request ligatures when unjoined letters convey the same meaning.

      +
      + +
      +

      Example:

      +

      The following text requests the five main f‐ligatures. Your Web browser should either join the letters, or gracefully degrade by rendering them as separate letters. The zero‐width joiner itself should always be invisible.

      + + +
      + The f&zwj;lower in the f&zwj;ile made the of&zwj;f&zwj;ice staf&zwj;f snif&zwj;f&zwj;le. +
      +

      Your Web browser renders it like this:

      +
      + The f‍lower in the f‍ile made the of‍fice staf‍f snif‍f‍le. +
      + +

      Related Mozilla bug reports: Ligatures.

      +
      +
      +
      +

      Your Browser

      + +

      Your Web browser identified itself as Wget/1.11.4 when it requested this page.

      + + + +
      + + + diff --git a/smlnj-lib/HTML4/tests/tables.html b/smlnj-lib/HTML4/tests/tables.html new file mode 100644 index 0000000..06ef456 --- /dev/null +++ b/smlnj-lib/HTML4/tests/tables.html @@ -0,0 +1,977 @@ + + + Tables [Robin’s HTML 4.0 Conformance Test] + + + + + + + + + + + + + + + + + + + +

      Tables (§11)

      + + + + +
      +

      Row and column groups

      + +
      +
        +
      • <thead>, <tfoot>, and <tbody> group table rows into a header, a footer, and one or more body sections.

        +

        At this writing, no Web browsers to my knowledge put this feature to either of the interesting uses suggested by the official specification: This division enables user agents to support scrolling of table bodies independently of the table head and foot. When long tables are printed, the table head and foot information may be repeated on each page that contains table data.

        +

        Related Mozilla bug reports: thead-UI.

        +
      • <colgroup> groups columns together into divisions. (Rows are grouped with <tbody>.) +
      • <col> applies its parameters to a table column. +
      + +
        +
      • rules declares where borders are drawn between cells. +
      + +

      XHTML 2.0: The W3C is considering eliminating the presentational rules attribute in XHTML 2.0.

      +
      + +
      +

      Example:

      +

      The rules attribute can take these five values.

      + +

      Here is a sample table of filtering features in major Web browsers. It should have no border and no rules:

      +
      + + + + + + + + + + + +
      Filtering Features
      Web Browser + Cookies + Images + Pop‐Up Windows +
      external server + by domain + throw away on exit + external server + by domain + by URL + all + by domain +
      Internet Explorer YesYesNo NoNoNo NoNo +
      Mozilla YesYesYes YesYesNo YesYes +
      Opera YesYesYes NoNoNo YesNo +
      iCab NoNoYes YesYesYes NoNo +
      +
      + +

      This table has the default value of rules="none". It should have no rules:

      +
      + + + + + + + + + + + +
      Filtering Features
      Web Browser + Cookies + Images + Pop‐Up Windows +
      external server + by domain + throw away on exit + external server + by domain + by URL + all + by domain +
      Internet Explorer YesYesNo NoNoNo NoNo +
      Mozilla YesYesYes YesYesNo YesYes +
      Opera YesYesYes NoNoNo YesNo +
      iCab NoNoYes YesYesYes NoNo +
      +
      + +

      This table has rules="rows". It should have rules between rows only:

      +
      + + + + + + + + + + + +
      Filtering Features
      Web Browser + Cookies + Images + Pop‐Up Windows +
      external server + by domain + throw away on exit + external server + by domain + by URL + all + by domain +
      Internet Explorer YesYesNo NoNoNo NoNo +
      Mozilla YesYesYes YesYesNo YesYes +
      Opera YesYesYes NoNoNo YesNo +
      iCab NoNoYes YesYesYes NoNo +
      +
      + +

      This table has rules="cols". It should have rules between columns only:

      +
      + + + + + + + + + + + +
      Filtering Features
      Web Browser + Cookies + Images + Pop‐Up Windows +
      external server + by domain + throw away on exit + external server + by domain + by URL + all + by domain +
      Internet Explorer YesYesNo NoNoNo NoNo +
      Mozilla YesYesYes YesYesNo YesYes +
      Opera YesYesYes NoNoNo YesNo +
      iCab NoNoYes YesYesYes NoNo +
      +
      + +

      This table has rules="all". It should have rules between both rows and columns:

      +
      + + + + + + + + + + + +
      Filtering Features
      Web Browser + Cookies + Images + Pop‐Up Windows +
      external server + by domain + throw away on exit + external server + by domain + by URL + all + by domain +
      Internet Explorer YesYesNo NoNoNo NoNo +
      Mozilla YesYesYes YesYesNo YesYes +
      Opera YesYesYes NoNoNo YesNo +
      iCab NoNoYes YesYesYes NoNo +
      +
      + +

      This table has rules="groups". It should only have rules between the row groups (between the header and body) and between the column groups:

      +
      + + + + + + + + + + + +
      Filtering Features
      Web Browser + Cookies + Images + Pop‐Up Windows +
      external server + by domain + throw away on exit + external server + by domain + by URL + all + by domain +
      Internet Explorer YesYesNo NoNoNo NoNo +
      Mozilla YesYesYes YesYesNo YesYes +
      Opera YesYesYes NoNoNo YesNo +
      iCab NoNoYes YesYesYes NoNo +
      +
      + +

      Related Mozilla bug reports: Table-Rules.

      +

      Related Konqueror bug reports: #47412.

      +

      Related Internet Explorer bug reports: Channel9 Wiki: Internet Explorer Standards Support.

      +
      + +
      +
        +
      • frame declares on which sides borders are drawn around the table. +
      + +

      XHTML 2.0: Like rules, frame too may be on the chopping block.

      +
      + +
      +

      Example:

      +

      The code attribute can take these values.

      + +

      This table has the default value of frame="void". There should be no border.

      +
      + + + + + + + + + + + +
      Filtering Features
      Web Browser + Cookies + Images + Pop‐Up Windows +
      external server + by domain + throw away on exit + external server + by domain + by URL + all + by domain +
      Internet Explorer YesYesNo NoNoNo NoNo +
      Mozilla YesYesYes YesYesNo YesYes +
      Opera YesYesYes NoNoNo YesNo +
      iCab NoNoYes YesYesYes NoNo +
      +
      + +

      This table has frame="box". There should be a border on all four sides of the table:

      +
      + + + + + + + + + + + +
      Filtering Features
      Web Browser + Cookies + Images + Pop‐Up Windows +
      external server + by domain + throw away on exit + external server + by domain + by URL + all + by domain +
      Internet Explorer YesYesNo NoNoNo NoNo +
      Mozilla YesYesYes YesYesNo YesYes +
      Opera YesYesYes NoNoNo YesNo +
      iCab NoNoYes YesYesYes NoNo +
      +
      + +

      This table has frame="hsides". There should be a border on the top and bottom sides of the table only:

      +
      + + + + + + + + + + + +
      Filtering Features
      Web Browser + Cookies + Images + Pop‐Up Windows +
      external server + by domain + throw away on exit + external server + by domain + by URL + all + by domain +
      Internet Explorer YesYesNo NoNoNo NoNo +
      Mozilla YesYesYes YesYesNo YesYes +
      Opera YesYesYes NoNoNo YesNo +
      iCab NoNoYes YesYesYes NoNo +
      +
      + +

      This table has frame="vsides". There should be a border on the left and right sides of the table only:

      +
      + + + + + + + + + + + +
      Filtering Features
      Web Browser + Cookies + Images + Pop‐Up Windows +
      external server + by domain + throw away on exit + external server + by domain + by URL + all + by domain +
      Internet Explorer YesYesNo NoNoNo NoNo +
      Mozilla YesYesYes YesYesNo YesYes +
      Opera YesYesYes NoNoNo YesNo +
      iCab NoNoYes YesYesYes NoNo +
      +
      + +

      This table has frame="above". There should be a border at the top of the table only:

      +
      + + + + + + + + + + + +
      Filtering Features
      Web Browser + Cookies + Images + Pop‐Up Windows +
      external server + by domain + throw away on exit + external server + by domain + by URL + all + by domain +
      Internet Explorer YesYesNo NoNoNo NoNo +
      Mozilla YesYesYes YesYesNo YesYes +
      Opera YesYesYes NoNoNo YesNo +
      iCab NoNoYes YesYesYes NoNo +
      +
      + +

      This table has frame="below". There should be a border at the bottom of the table only:

      +
      + + + + + + + + + + + +
      Filtering Features
      Web Browser + Cookies + Images + Pop‐Up Windows +
      external server + by domain + throw away on exit + external server + by domain + by URL + all + by domain +
      Internet Explorer YesYesNo NoNoNo NoNo +
      Mozilla YesYesYes YesYesNo YesYes +
      Opera YesYesYes NoNoNo YesNo +
      iCab NoNoYes YesYesYes NoNo +
      +
      + +

      This table has frame="lhs". There should be a border at the left of the table only:

      +
      + + + + + + + + + + + +
      Filtering Features
      Web Browser + Cookies + Images + Pop‐Up Windows +
      external server + by domain + throw away on exit + external server + by domain + by URL + all + by domain +
      Internet Explorer YesYesNo NoNoNo NoNo +
      Mozilla YesYesYes YesYesNo YesYes +
      Opera YesYesYes NoNoNo YesNo +
      iCab NoNoYes YesYesYes NoNo +
      +
      + +

      This table has frame="rhs". There should be a border at the right of the table only:

      +
      + + + + + + + + + + + +
      Filtering Features
      Web Browser + Cookies + Images + Pop‐Up Windows +
      external server + by domain + throw away on exit + external server + by domain + by URL + all + by domain +
      Internet Explorer YesYesNo NoNoNo NoNo +
      Mozilla YesYesYes YesYesNo YesYes +
      Opera YesYesYes NoNoNo YesNo +
      iCab NoNoYes YesYesYes NoNo +
      +
      + +

      Related Mozilla bug reports: Table-Rules.

      +

      Related Konqueror bug reports: #47412.

      +

      Related Internet Explorer bug reports: Channel9 Wiki: Internet Explorer Standards Support.

      +
      +
      + +
      +

      Alignment and style

      + +
      +

      Columns and column groups can be used to apply alignment or style properties to every cell in a column.

      +
        +
      • char specifies a character to align a column around when character alignment is used. +
      +
      + +
      +

      Example:

      + +
      + <table frame="void" rules="groups">
      +
      + <caption>Grocery Bill</caption>
      + <colgroup>
      +
      + <col align="left">
      + <col align="char" char=".">
      + <col align="right">
      +
      + <colgroup>
      +
      + <col align="char" char=".">
      +
      + <thead>
      +
      + <tr>
      +
      + <th>Item
      + <th align="right">Unit Cost
      + <th>Qty.
      + <th align="right">Price +
      +
      + <tfoot>
      +
      + <tr>
      +
      + <th colspan=3 align="right">Total
      + <td>$11.97
      +
      +
      + <tbody>
      +
      + <tr><td>Cookies <td>2.49 <td>2 <td>4.98
      + <tr><td>Soda <td>.99 <td>1 <td>.99
      + <tr><td>Ice Cream <td>6 <td>1 <td>6.
      +
      +
      + </table>
      +
      + +

      The decimal points should be lined up like this:

      +
                Grocery Bill
      +
      +Item         Unit   Qty. |  Price
      +-------------------------+-------
      +Cookies      2.49      1 |   4.98
      +Soda          .99      1 |    .99
      +Ice Cream    6         1 |   6.  
      +-------------------------+-------
      +                   Total | $11.97
      +

      Your Web browser renders it like this:

      +
      + + + + + + + + + + + + + +
      Grocery Bill
      Item + Unit Cost + Qty. + Price +
      Total + $11.97 +
      Cookies 2.49 2 4.98 +
      Soda .99 1 .99 +
      Ice Cream 6 1 6. +
      +
      + +

      Related Mozilla bug reports: Column, Character-Alignment, Table-Rules.

      +

      Related Konqueror bug reports: #29577.

      +
      + +
      +

      You can also set styles on columns and column groups, but according to the CSS2 standard, only the border, background, width, and visibility descriptions apply to columns and column groups.

      +
      + +
      +

      Example:

      +

      Let’s add a little style to the previous example.

      + + +
      + <table frame="void" rules="groups">
      +
      + <caption>Grocery Bill</caption>
      + <colgroup style="background-color: #acc">
      +
      + <col align="left">
      + <col align="char" char="." style="border-left-style: dotted">
      + <col align="right">
      +
      + <colgroup>
      +
      + <col align="char" char="." style="background-color: #aca">
      +
      + <thead>
      +
      + <tr>
      +
      + <th>Item
      + <th align="right">Unit Cost
      + <th>Qty.
      + <th align="right">Price +
      +
      + <tfoot>
      +
      + <tr>
      +
      + <th colspan=3 align="right">Total
      + <td>$11.97
      +
      +
      + <tbody>
      +
      + <tr><td>Cookies <td>2.49 <td>2 <td>4.98
      + <tr><td>Soda <td>.99 <td>1 <td>.99
      + <tr><td>Ice Cream <td>6 <td>1 <td>6.
      +
      +
      + </table>
      +
      + +

      It should be rendered similar to this:

      +
                Grocery Bill
      +
      +Item       : Unit   Qty. |  Price
      +-------------------------+-------
      +Cookies    : 2.49      1 |   4.98
      +Soda       :  .99      1 |    .99
      +Ice Cream  : 6         1 |   6.  
      +-------------------------+-------
      +                   Total | $11.97
      +

      Your Web browser renders it like this:

      +
      + + + + + + + + + + + + + +
      Grocery Bill
      Item + Unit Cost + Qty. + Price +
      Total + $11.97 +
      Cookies 2.49 2 4.98 +
      Soda .99 1 .99 +
      Ice Cream 6 1 6. +
      +
      + +

      Related Mozilla bug reports: Column, Character-Alignment, Table-Rules.

      +

      Related Konqueror bug reports: #29577.

      +
      +
      + +
      +

      Accessibility for nonvisual Web browsers (§11.4)

      + +
      +
        +
      • summary provides a long description of table contents. +
      • scope specifies whether a header cell provides information for a row, column, row group, or column group. +
      • headers lists which table cells provide header information for that cell. +
      + +

      XHTML 2.0: The W3C is considering changing summary from an attribute to a <summary> element within <table>, so that it can contain markup.

      +
      + +
      +

      Example:

      +

      Instead of colors, let’s add some attributes to the previous table to help speech synthesizers read it aloud.

      + + +
      + <table frame="void" rules="groups" summary="The total for cookies, soda, and ice cream is $11.97.">
      +
      + <caption>Grocery Bill</caption>
      + <colgroup>
      +
      + <col align="left">
      + <col align="char" char=".">
      + <col align="right">
      +
      + <colgroup>
      +
      + <col align="char" char=".">
      +
      + <thead>
      +
      + <tr>
      +
      + <th scope="col">Item
      + <th scope="col" align="right">Unit Cost
      + <th scope="col">Qty.
      + <th scope="col" align="right">Price +
      +
      + <tfoot>
      +
      + <tr>
      +
      + <th id="total_header" colspan=3 align="right">Total
      + <td headers="total_header">$11.97
      +
      +
      + <tbody>
      +
      + <tr><td>Cookies <td>2.49 <td>2 <td>4.98
      + <tr><td>Soda <td>.99 <td>1 <td>.99
      + <tr><td>Ice Cream <td>6 <td>1 <td>6.
      +
      +
      + </table>
      +
      + +

      A speech synthesizer could read this table aloud like:

      +
      Caption: Grocery Bill
      +Summary: The total for cookies, soda, and ice cream is $11.97.
      +Item: Cookies    Unit Cost: 2.49  Qty.: 2  Price:   4.98
      +Item: Soda       Unit Cost:  .99  Qty.: 1  Price:    .99
      +Item: Ice Cream  Unit Cost: 6     Qty.: 1  Price:   6.
      +                                           Total: $11.97
      +

      Your Web browser renders it like this:

      +
      + + + + + + + + + + + + + +
      Grocery Bill
      Item + Unit Cost + Qty. + Price +
      Total + $11.97 +
      Cookies 2.49 2 4.98 +
      Soda .99 1 .99 +
      Ice Cream 6 1 6. +
      +
      + +

      Related Internet Explorer bug reports: Channel9 Wiki: Internet Explorer Standards Support.

      +
      + +
      +
        +
      • abbr provides an abbreviated form of a cell’s content. Useful for long headers that speech synthesizers may repeat frequently. +
      • axis declares categories for cells. In effect, it provides header information which does not exist as actual headers in the table. +
      +
        +
      • span specifies the number of columns in a <colgroup>. +
      • width specifies the width of a column or each column in a <colgroup>. +
      +
      + +
      +

      Example:

      + +

      Each column in the following table should be the same width except for the first, which should be twice as wide as the others.

      + +
      + <table rules="groups">
      + <caption>Filtering Features</caption>
      + <colgroup>
      + <col width="2*" align="right">
      + <colgroup span=3 width="*" align="center">
      + <colgroup span=3 width="*" align="center">
      + <colgroup span=2 width="*" align="center">
      + <thead>
      +
      + <tr>
      +
      + <th rowspan=2 scope="col" abbr="Browser">Web Browser
      + <th colspan=3 scope="colgroup" axis="Feature">Cookies
      + <th colspan=3 scope="colgroup" axis="Feature">Images
      + <th colspan=2 scope="colgroup" axis="Feature">Pop&#8208;Up Windows
      +
      + <tr>
      +
      + <th scope="col">external server
      + <th scope="col">by domain
      + <th scope="col">throw away on exit
      + <th scope="col">external server
      + <th scope="col">by domain
      + <th scope="col">by URL
      + <th scope="col">all
      + <th scope="col">by domain
      +
      +
      + <tbody>
      +
      + <tr><th scope="row">Internet Explorer <td>Yes<td>Yes<td>No <td>No<td>No<td>No <td>No<td>No
      + <tr><th scope="row">Mozilla <td>Yes<td>Yes<td>Yes <td>Yes<td>Yes<td>No <td>Yes<td>Yes
      + <tr><th scope="row">Opera <td>Yes<td>Yes<td>Yes <td>No<td>No<td>No <td>Yes<td>No
      + <tr><th scope="row">iCab <td>No<td>No<td>Yes <td>Yes<td>Yes<td>Yes <td>No<td>No
      +
      + </table>
      +
      +

      A speech synthesizer could read this table aloud as:

      +
      Caption: Filtering Features
      +Browser: Internet Explorer
      +	Cookies:
      +		external server: Yes
      +		by domain: Yes
      +		throw away on exit: No
      +	Images:
      +		external server: No
      +		by domain: No
      +		throw away on exit: No
      +	Pop‐Up Windows:
      +		all: No
      +		by domain: No
      +Browser: Mozilla
      +	Cookies:
      +		external server: Yes
      +		by domain: Yes
      +		throw away on exit: Yes
      +	Images:
      +		external server: Yes
      +		by domain: Yes
      +		throw away on exit: No
      +	Pop‐Ups:
      +		all: Yes
      +		by domain: Yes
      +Browser: Opera
      +	Cookies:
      +		external server: Yes
      +		by domain: Yes
      +		throw away on exit: Yes
      +	Images:
      +		external server: No
      +		by domain: No
      +		throw away on exit: No
      +	Pop‐Ups:
      +		all: Yes
      +		by domain: No
      +Browser: iCab
      +	Cookies:
      +		external server: No
      +		by domain: No
      +		throw away on exit: Yes
      +	Images:
      +		external server: Yes
      +		by domain: Yes
      +		throw away on exit: Yes
      +	Pop‐Ups:
      +		all: No
      +		by domain: No
      +
      +

      Your Web browser renders it like this:

      +
      + + + + + + + + + + + +
      Filtering Features
      Web Browser + Cookies + Images + Pop‐Up Windows +
      external server + by domain + throw away on exit + external server + by domain + by URL + all + by domain +
      Internet Explorer YesYesNo NoNoNo NoNo +
      Mozilla YesYesYes YesYesNo YesYes +
      Opera YesYesYes NoNoNo YesNo +
      iCab NoNoYes YesYesYes NoNo +
      +
      +
      +
      + +
      +

      Your Browser

      + +

      Your Web browser identified itself as Wget/1.11.4 when it requested this page.

      + + + +
      + + + diff --git a/smlnj-lib/HTML4/tests/template.html b/smlnj-lib/HTML4/tests/template.html new file mode 100644 index 0000000..13be5bc --- /dev/null +++ b/smlnj-lib/HTML4/tests/template.html @@ -0,0 +1,15 @@ + + + + + <!--title--> + + + +

      Abstract syntax tree for

      +
      + +

      + + + diff --git a/smlnj-lib/HTML4/tests/test001.html b/smlnj-lib/HTML4/tests/test001.html new file mode 100644 index 0000000..b338f64 --- /dev/null +++ b/smlnj-lib/HTML4/tests/test001.html @@ -0,0 +1,23 @@ + + + + + Test Page for HTML 4 Library + + + +

      Test Page for HTML 4 Library

      + + + +
      +
      joe@example.com
      + +

      + Valid HTML 4.01 Strict +

      + + + diff --git a/smlnj-lib/HTML4/tests/test002.html b/smlnj-lib/HTML4/tests/test002.html new file mode 100644 index 0000000..3add548 --- /dev/null +++ b/smlnj-lib/HTML4/tests/test002.html @@ -0,0 +1,11 @@ + + +Test 002 + + +

      This is a test & stuff.

      +

      This is a test & stuff.

      +

      This is a test && stuff.

      +

      This is a test && stuff.

      + + diff --git a/smlnj-lib/HashCons/.cm/GUID/hash-cons-atom.sml b/smlnj-lib/HashCons/.cm/GUID/hash-cons-atom.sml new file mode 100644 index 0000000..beb9482 --- /dev/null +++ b/smlnj-lib/HashCons/.cm/GUID/hash-cons-atom.sml @@ -0,0 +1 @@ +guid-$/(hash-cons-lib.cm):hash-cons-atom.sml-1714016082.929 diff --git a/smlnj-lib/HashCons/.cm/GUID/hash-cons-bool.sml b/smlnj-lib/HashCons/.cm/GUID/hash-cons-bool.sml new file mode 100644 index 0000000..3e13394 --- /dev/null +++ b/smlnj-lib/HashCons/.cm/GUID/hash-cons-bool.sml @@ -0,0 +1 @@ +guid-$/(hash-cons-lib.cm):hash-cons-bool.sml-1714016083.076 diff --git a/smlnj-lib/HashCons/.cm/GUID/hash-cons-ground-fn.sml b/smlnj-lib/HashCons/.cm/GUID/hash-cons-ground-fn.sml new file mode 100644 index 0000000..1ed9e53 --- /dev/null +++ b/smlnj-lib/HashCons/.cm/GUID/hash-cons-ground-fn.sml @@ -0,0 +1 @@ +guid-$/(hash-cons-lib.cm):hash-cons-ground-fn.sml-1714016082.915 diff --git a/smlnj-lib/HashCons/.cm/GUID/hash-cons-int.sml b/smlnj-lib/HashCons/.cm/GUID/hash-cons-int.sml new file mode 100644 index 0000000..802c20e --- /dev/null +++ b/smlnj-lib/HashCons/.cm/GUID/hash-cons-int.sml @@ -0,0 +1 @@ +guid-$/(hash-cons-lib.cm):hash-cons-int.sml-1714016082.932 diff --git a/smlnj-lib/HashCons/.cm/GUID/hash-cons-map-sig.sml b/smlnj-lib/HashCons/.cm/GUID/hash-cons-map-sig.sml new file mode 100644 index 0000000..81e57ff --- /dev/null +++ b/smlnj-lib/HashCons/.cm/GUID/hash-cons-map-sig.sml @@ -0,0 +1 @@ +guid-$/(hash-cons-lib.cm):hash-cons-map-sig.sml-1714016082.938 diff --git a/smlnj-lib/HashCons/.cm/GUID/hash-cons-map.sml b/smlnj-lib/HashCons/.cm/GUID/hash-cons-map.sml new file mode 100644 index 0000000..6328f0b --- /dev/null +++ b/smlnj-lib/HashCons/.cm/GUID/hash-cons-map.sml @@ -0,0 +1 @@ +guid-$/(hash-cons-lib.cm):hash-cons-map.sml-1714016082.945 diff --git a/smlnj-lib/HashCons/.cm/GUID/hash-cons-set-sig.sml b/smlnj-lib/HashCons/.cm/GUID/hash-cons-set-sig.sml new file mode 100644 index 0000000..f0ea866 --- /dev/null +++ b/smlnj-lib/HashCons/.cm/GUID/hash-cons-set-sig.sml @@ -0,0 +1 @@ +guid-$/(hash-cons-lib.cm):hash-cons-set-sig.sml-1714016082.925 diff --git a/smlnj-lib/HashCons/.cm/GUID/hash-cons-set.sml b/smlnj-lib/HashCons/.cm/GUID/hash-cons-set.sml new file mode 100644 index 0000000..eb9beb8 --- /dev/null +++ b/smlnj-lib/HashCons/.cm/GUID/hash-cons-set.sml @@ -0,0 +1 @@ +guid-$/(hash-cons-lib.cm):hash-cons-set.sml-1714016083.080 diff --git a/smlnj-lib/HashCons/.cm/GUID/hash-cons-sig.sml b/smlnj-lib/HashCons/.cm/GUID/hash-cons-sig.sml new file mode 100644 index 0000000..1f66ebf --- /dev/null +++ b/smlnj-lib/HashCons/.cm/GUID/hash-cons-sig.sml @@ -0,0 +1 @@ +guid-$/(hash-cons-lib.cm):hash-cons-sig.sml-1714016082.850 diff --git a/smlnj-lib/HashCons/.cm/GUID/hash-cons-string.sml b/smlnj-lib/HashCons/.cm/GUID/hash-cons-string.sml new file mode 100644 index 0000000..31d0792 --- /dev/null +++ b/smlnj-lib/HashCons/.cm/GUID/hash-cons-string.sml @@ -0,0 +1 @@ +guid-$/(hash-cons-lib.cm):hash-cons-string.sml-1714016082.921 diff --git a/smlnj-lib/HashCons/.cm/GUID/hash-cons-word.sml b/smlnj-lib/HashCons/.cm/GUID/hash-cons-word.sml new file mode 100644 index 0000000..b03c125 --- /dev/null +++ b/smlnj-lib/HashCons/.cm/GUID/hash-cons-word.sml @@ -0,0 +1 @@ +guid-$/(hash-cons-lib.cm):hash-cons-word.sml-1714016082.935 diff --git a/smlnj-lib/HashCons/.cm/GUID/hash-cons.sml b/smlnj-lib/HashCons/.cm/GUID/hash-cons.sml new file mode 100644 index 0000000..245316d --- /dev/null +++ b/smlnj-lib/HashCons/.cm/GUID/hash-cons.sml @@ -0,0 +1 @@ +guid-$/(hash-cons-lib.cm):hash-cons.sml-1714016082.856 diff --git a/smlnj-lib/HashCons/.cm/SKEL/hash-cons-atom.sml b/smlnj-lib/HashCons/.cm/SKEL/hash-cons-atom.sml new file mode 100644 index 0000000..ae02c6a --- /dev/null +++ b/smlnj-lib/HashCons/.cm/SKEL/hash-cons-atom.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"Atom"ad"HashConsAtom"jh0gp1e"HashConsGroundFn" \ No newline at end of file diff --git a/smlnj-lib/HashCons/.cm/SKEL/hash-cons-bool.sml b/smlnj-lib/HashCons/.cm/SKEL/hash-cons-bool.sml new file mode 100644 index 0000000..0b30d14 --- /dev/null +++ b/smlnj-lib/HashCons/.cm/SKEL/hash-cons-bool.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"HashCons"ad"HashConsBool"j0 \ No newline at end of file diff --git a/smlnj-lib/HashCons/.cm/SKEL/hash-cons-ground-fn.sml b/smlnj-lib/HashCons/.cm/SKEL/hash-cons-ground-fn.sml new file mode 100644 index 0000000..729b513 --- /dev/null +++ b/smlnj-lib/HashCons/.cm/SKEL/hash-cons-ground-fn.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ae"HashConsGroundFn"i2aT"gp1c"HASH_KEY"f2HashCons"jh1ad"HC"gp1-h0 \ No newline at end of file diff --git a/smlnj-lib/HashCons/.cm/SKEL/hash-cons-int.sml b/smlnj-lib/HashCons/.cm/SKEL/hash-cons-int.sml new file mode 100644 index 0000000..688d3a9 --- /dev/null +++ b/smlnj-lib/HashCons/.cm/SKEL/hash-cons-int.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f2d"Word"d"HashCons"ad"HashConsInt"j0& \ No newline at end of file diff --git a/smlnj-lib/HashCons/.cm/SKEL/hash-cons-map-sig.sml b/smlnj-lib/HashCons/.cm/SKEL/hash-cons-map-sig.sml new file mode 100644 index 0000000..e546494 --- /dev/null +++ b/smlnj-lib/HashCons/.cm/SKEL/hash-cons-map-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"HashCons"ac"HASH_CONS_MAP"h0 \ No newline at end of file diff --git a/smlnj-lib/HashCons/.cm/SKEL/hash-cons-map.sml b/smlnj-lib/HashCons/.cm/SKEL/hash-cons-map.sml new file mode 100644 index 0000000..a86f79f --- /dev/null +++ b/smlnj-lib/HashCons/.cm/SKEL/hash-cons-map.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"Option"ad"HashConsMap"jh2ad"HC"gp1d"HashCons"ad"Map"gp1d"WordRedBlackMap"gp1c"HASH_CONS_MAP" \ No newline at end of file diff --git a/smlnj-lib/HashCons/.cm/SKEL/hash-cons-set-sig.sml b/smlnj-lib/HashCons/.cm/SKEL/hash-cons-set-sig.sml new file mode 100644 index 0000000..afd5a4c --- /dev/null +++ b/smlnj-lib/HashCons/.cm/SKEL/hash-cons-set-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"HashCons"ac"HASH_CONS_SET"h0 \ No newline at end of file diff --git a/smlnj-lib/HashCons/.cm/SKEL/hash-cons-set.sml b/smlnj-lib/HashCons/.cm/SKEL/hash-cons-set.sml new file mode 100644 index 0000000..5c022cf --- /dev/null +++ b/smlnj-lib/HashCons/.cm/SKEL/hash-cons-set.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f3d"Word"d"List"d"LibBase"ad"HashConsSet"jh1ad"HC"gp1d"HashCons"gp1c"HASH_CONS_SET" \ No newline at end of file diff --git a/smlnj-lib/HashCons/.cm/SKEL/hash-cons-sig.sml b/smlnj-lib/HashCons/.cm/SKEL/hash-cons-sig.sml new file mode 100644 index 0000000..890a989 --- /dev/null +++ b/smlnj-lib/HashCons/.cm/SKEL/hash-cons-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ac"HASH_CONS"h0 \ No newline at end of file diff --git a/smlnj-lib/HashCons/.cm/SKEL/hash-cons-string.sml b/smlnj-lib/HashCons/.cm/SKEL/hash-cons-string.sml new file mode 100644 index 0000000..43e580d --- /dev/null +++ b/smlnj-lib/HashCons/.cm/SKEL/hash-cons-string.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"HashString"ad"HashConsString"jh0gp1e"HashConsGroundFn" \ No newline at end of file diff --git a/smlnj-lib/HashCons/.cm/SKEL/hash-cons-word.sml b/smlnj-lib/HashCons/.cm/SKEL/hash-cons-word.sml new file mode 100644 index 0000000..ad364be --- /dev/null +++ b/smlnj-lib/HashCons/.cm/SKEL/hash-cons-word.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"HashCons"ad"HashConsWord"j0 \ No newline at end of file diff --git a/smlnj-lib/HashCons/.cm/SKEL/hash-cons.sml b/smlnj-lib/HashCons/.cm/SKEL/hash-cons.sml new file mode 100644 index 0000000..48ffddc --- /dev/null +++ b/smlnj-lib/HashCons/.cm/SKEL/hash-cons.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f4d"Word"d"PrimeSizes"d"List"d"Array"ad"HashCons"jh0gp1c"HASH_CONS" \ No newline at end of file diff --git a/smlnj-lib/HashCons/.cm/amd64-unix/hash-cons-atom.sml b/smlnj-lib/HashCons/.cm/amd64-unix/hash-cons-atom.sml new file mode 100644 index 0000000..7017db8 Binary files /dev/null and b/smlnj-lib/HashCons/.cm/amd64-unix/hash-cons-atom.sml differ diff --git a/smlnj-lib/HashCons/.cm/amd64-unix/hash-cons-bool.sml b/smlnj-lib/HashCons/.cm/amd64-unix/hash-cons-bool.sml new file mode 100644 index 0000000..ecbf783 Binary files /dev/null and b/smlnj-lib/HashCons/.cm/amd64-unix/hash-cons-bool.sml differ diff --git a/smlnj-lib/HashCons/.cm/amd64-unix/hash-cons-ground-fn.sml b/smlnj-lib/HashCons/.cm/amd64-unix/hash-cons-ground-fn.sml new file mode 100644 index 0000000..948d128 Binary files /dev/null and b/smlnj-lib/HashCons/.cm/amd64-unix/hash-cons-ground-fn.sml differ diff --git a/smlnj-lib/HashCons/.cm/amd64-unix/hash-cons-int.sml b/smlnj-lib/HashCons/.cm/amd64-unix/hash-cons-int.sml new file mode 100644 index 0000000..f4d661d Binary files /dev/null and b/smlnj-lib/HashCons/.cm/amd64-unix/hash-cons-int.sml differ diff --git a/smlnj-lib/HashCons/.cm/amd64-unix/hash-cons-map-sig.sml b/smlnj-lib/HashCons/.cm/amd64-unix/hash-cons-map-sig.sml new file mode 100644 index 0000000..7269a0b Binary files /dev/null and b/smlnj-lib/HashCons/.cm/amd64-unix/hash-cons-map-sig.sml differ diff --git a/smlnj-lib/HashCons/.cm/amd64-unix/hash-cons-map.sml b/smlnj-lib/HashCons/.cm/amd64-unix/hash-cons-map.sml new file mode 100644 index 0000000..5cbae33 Binary files /dev/null and b/smlnj-lib/HashCons/.cm/amd64-unix/hash-cons-map.sml differ diff --git a/smlnj-lib/HashCons/.cm/amd64-unix/hash-cons-set-sig.sml b/smlnj-lib/HashCons/.cm/amd64-unix/hash-cons-set-sig.sml new file mode 100644 index 0000000..d8b1162 Binary files /dev/null and b/smlnj-lib/HashCons/.cm/amd64-unix/hash-cons-set-sig.sml differ diff --git a/smlnj-lib/HashCons/.cm/amd64-unix/hash-cons-set.sml b/smlnj-lib/HashCons/.cm/amd64-unix/hash-cons-set.sml new file mode 100644 index 0000000..1b45d67 Binary files /dev/null and b/smlnj-lib/HashCons/.cm/amd64-unix/hash-cons-set.sml differ diff --git a/smlnj-lib/HashCons/.cm/amd64-unix/hash-cons-sig.sml b/smlnj-lib/HashCons/.cm/amd64-unix/hash-cons-sig.sml new file mode 100644 index 0000000..a95ffd3 Binary files /dev/null and b/smlnj-lib/HashCons/.cm/amd64-unix/hash-cons-sig.sml differ diff --git a/smlnj-lib/HashCons/.cm/amd64-unix/hash-cons-string.sml b/smlnj-lib/HashCons/.cm/amd64-unix/hash-cons-string.sml new file mode 100644 index 0000000..b678536 Binary files /dev/null and b/smlnj-lib/HashCons/.cm/amd64-unix/hash-cons-string.sml differ diff --git a/smlnj-lib/HashCons/.cm/amd64-unix/hash-cons-word.sml b/smlnj-lib/HashCons/.cm/amd64-unix/hash-cons-word.sml new file mode 100644 index 0000000..7756169 Binary files /dev/null and b/smlnj-lib/HashCons/.cm/amd64-unix/hash-cons-word.sml differ diff --git a/smlnj-lib/HashCons/.cm/amd64-unix/hash-cons.sml b/smlnj-lib/HashCons/.cm/amd64-unix/hash-cons.sml new file mode 100644 index 0000000..09ce9f5 Binary files /dev/null and b/smlnj-lib/HashCons/.cm/amd64-unix/hash-cons.sml differ diff --git a/smlnj-lib/HashCons/README b/smlnj-lib/HashCons/README new file mode 100644 index 0000000..22107af --- /dev/null +++ b/smlnj-lib/HashCons/README @@ -0,0 +1,45 @@ +This directory contains a library supporting the implementation of +hash-consing of data structures. + +To use this library, you need to use a two-level definition of your +data structures. For example, we might define a hash-cons representation +of lambda terms as follows: + + structure HC = HashCons + type var = HashConsString.obj + datatype term_node + = VAR of var + | LAM of (var * term) + | APP of (term * term) + withtype term = term_node HC.obj + +And you need to define an equality function on your terms (this function +can use the hash-cons identity on subterms). For example, here is the +code for our lambda terms: + + fun eq (APP(t11, t12), APP(t21, t22)) = + HC.same(t11, t21) andalso HC.same(t12, t22) + | eq (LAM(x, t1), LAM(y, t2)) = HC.same(x, y) andalso HC.same(t1, t2) + | eq (VAR x, VAR y) = HC.same(x, y) + | eq _ = false + +With the equality function, we can create a hash-cons table: + + val tbl = HC.new {eq = eq} + +And then we can then define constructor functions: + + val mkAPP = HC.cons2 tbl (0wx1, APP) + val mkLAM = HC.cons2 tbl (0wx3, LAM) + val mkVAR = HC.cons1 tbl (0wx7, VAR) + val var = HW.mk + +Note that we pick successive prime numbers for the constructor hash codes. +Using these constructors, we can construct the representation of the +identity function (\x.x) as follows: + + mkLAM(var "x", mkVAR(var "x")) + +In addition to term construction, this library also supports finite sets +and maps using the unique hash-cons codes as keys. + diff --git a/smlnj-lib/HashCons/hash-cons-atom.sml b/smlnj-lib/HashCons/hash-cons-atom.sml new file mode 100644 index 0000000..d33b39e --- /dev/null +++ b/smlnj-lib/HashCons/hash-cons-atom.sml @@ -0,0 +1,12 @@ +(* hash-cons-atom.sml + * + * COPYRIGHT (c) 2011 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +structure HashConsAtom = HashConsGroundFn ( + struct + type hash_key = Atom.atom + val sameKey = Atom.same + val hashVal = Atom.hash + end) diff --git a/smlnj-lib/HashCons/hash-cons-bool.sml b/smlnj-lib/HashCons/hash-cons-bool.sml new file mode 100644 index 0000000..05dd10d --- /dev/null +++ b/smlnj-lib/HashCons/hash-cons-bool.sml @@ -0,0 +1,31 @@ +(* hash-cons-bool.sml + * + * Implementation of hash-consed booleans. + * + * COPYRIGHT (c) 2023 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +structure HashConsBool : sig + + type hash_key = bool + type obj = hash_key HashCons.obj + + val mk : hash_key -> obj + + (* the hash-consed boolean values *) + val hcFalse : obj + val hcTrue : obj + + end = struct + + type hash_key = bool + type obj = hash_key HashCons.obj + + val hcFalse = {nd = false, tag = 0w0, hash = 0w17} + val hcTrue = {nd = true, tag = 0w1, hash = 0w13} + + fun mk false = hcFalse + | mk true = hcTrue + + end diff --git a/smlnj-lib/HashCons/hash-cons-ground-fn.sml b/smlnj-lib/HashCons/hash-cons-ground-fn.sml new file mode 100644 index 0000000..1a47fdb --- /dev/null +++ b/smlnj-lib/HashCons/hash-cons-ground-fn.sml @@ -0,0 +1,29 @@ +(* hash-cons-ground-fn.sml + * + * Functor for defining hashed-cons representation of ground terms. + * + * COPYRIGHT (c) 2011 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +functor HashConsGroundFn (T : HASH_KEY) : sig + + type hash_key = T.hash_key + type obj = hash_key HashCons.obj + + val mk : hash_key -> obj + + end = struct + + structure HC = HashCons + + type hash_key = T.hash_key + type obj = hash_key HC.obj + + val tbl = HC.new {eq = T.sameKey} + + val cons = HC.cons0 tbl + + fun mk term = cons(T.hashVal term, term) + + end diff --git a/smlnj-lib/HashCons/hash-cons-int.sml b/smlnj-lib/HashCons/hash-cons-int.sml new file mode 100644 index 0000000..adc11fa --- /dev/null +++ b/smlnj-lib/HashCons/hash-cons-int.sml @@ -0,0 +1,24 @@ +(* hash-cons-int.sml + * + * Hash-cons wrapper for `int` values; this directly uses the value + * as its representation w/o a tabke. + * + * COPYRIGHT (c) 2023 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +structure HashConsInt : sig + + type hash_key = int + type obj = hash_key HashCons.obj + + val mk : hash_key -> obj + + end = struct + + type hash_key = int + type obj = hash_key HashCons.obj + + fun mk n = {nd = n, tag = Word.fromInt n, hash = Word.fromInt n} + + end diff --git a/smlnj-lib/HashCons/hash-cons-lib.cm b/smlnj-lib/HashCons/hash-cons-lib.cm new file mode 100644 index 0000000..f2f9bae --- /dev/null +++ b/smlnj-lib/HashCons/hash-cons-lib.cm @@ -0,0 +1,48 @@ +(* hash-cons-lib.cm + * + * This library supports the implementation of hash-consed of data structures. + * + * COPYRIGHT (c) 2023 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +Library + + signature HASH_CONS + signature HASH_CONS_MAP + signature HASH_CONS_SET + + structure HashCons + structure HashConsMap + structure HashConsSet + + (* standard base types *) + structure HashConsAtom + structure HashConsBool + structure HashConsInt + structure HashConsString + structure HashConsWord + + functor HashConsGroundFn + +is + +#if defined(NEW_CM) + $/basis.cm + $/smlnj-lib.cm +#else + ../Util/smlnj-lib.cm +#endif + + hash-cons-atom.sml + hash-cons-bool.sml + hash-cons-ground-fn.sml + hash-cons-int.sml + hash-cons-map-sig.sml + hash-cons-map.sml + hash-cons-sig.sml + hash-cons.sml + hash-cons-set-sig.sml + hash-cons-set.sml + hash-cons-string.sml + hash-cons-word.sml diff --git a/smlnj-lib/HashCons/hash-cons-map-sig.sml b/smlnj-lib/HashCons/hash-cons-map-sig.sml new file mode 100644 index 0000000..fb9de72 --- /dev/null +++ b/smlnj-lib/HashCons/hash-cons-map-sig.sml @@ -0,0 +1,137 @@ +(* hash-cons-map-sig.sml + * + * COPYRIGHT (c) 2011 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +signature HASH_CONS_MAP = + sig + + type 'a obj = 'a HashCons.obj + + type ('a, 'b) map + + val empty : ('a, 'b) map + (* The empty map *) + + val singleton : ('a obj * 'b) -> ('a, 'b) map + (* return the specified singleton map *) + + val insert : ('a, 'b) map * 'a obj * 'b -> ('a, 'b) map + val insert' : (('a obj * 'b) * ('a, 'b) map) -> ('a, 'b) map + (* Insert an item. *) + + val insertWith : (('b * 'b) -> 'b) + -> ('a, 'b) map * 'a obj * 'b -> ('a, 'b) map + (* Insert an item with a combining function to resolve collisions. + * The first argument to the combining function is the existing value, + * and the second argument is the value being inserted into the map. + *) + val insertWithi : (('a obj * 'b * 'b) -> 'b) + -> ('a, 'b) map * 'a obj * 'b -> ('a, 'b) map + (* Like insertWith, except that the combining function also takes the + * key as an argument. + *) + + val find : ('a, 'b) map * 'a obj -> 'b option + (* Look for an item, return NONE if the item doesn't exist *) + + val lookup : ('a, 'b) map * 'a obj -> 'b + (* look for an item, raise the NotFound exception if it doesn't exist *) + + val inDomain : (('a, 'b) map * 'a obj) -> bool + (* return true, if the key is in the domain of the map *) + + val remove : ('a, 'b) map * 'a obj -> ('a, 'b) map * 'b + (* Remove an item, returning new map and value removed. + * Raises LibBase.NotFound if not found. + *) + + val isEmpty : ('a, 'b) map -> bool + (* Return true if and only if the map is empty *) + + val numItems : ('a, 'b) map -> int + (* Return the number of items in the map *) + + val listItems : ('a, 'b) map -> 'b list + val listItemsi : ('a, 'b) map -> ('a obj * 'b) list + (* Return an ordered list of the items (and their keys) in the map. *) + + val listKeys : ('a, 'b) map -> 'a obj list + (* return an ordered list of the keys in the map. *) + + val collate : ('b * 'b -> order) -> (('a, 'b) map * ('a, 'b) map) -> order + (* given an ordering on the map's range, return an ordering + * on the map. + *) + + val unionWith : ('b * 'b -> 'b) -> (('a, 'b) map * ('a, 'b) map) + -> ('a, 'b) map + val unionWithi : ('a obj * 'b * 'b -> 'b) -> (('a, 'b) map * ('a, 'b) map) + -> ('a, 'b) map + (* return a map whose domain is the union of the domains of the two input + * maps, using the supplied function to define the map on elements that + * are in both domains. + *) + + val intersectWith : ('b * 'c -> 'd) -> (('a, 'b) map * ('a, 'c) map) + -> ('a, 'd) map + val intersectWithi : ('a obj * 'b * 'c -> 'd) -> (('a, 'b) map * ('a, 'c) map) + -> ('a, 'd) map + (* return a map whose domain is the intersection of the domains of the + * two input maps, using the supplied function to define the range. + *) + + val mergeWith : ('b option * 'c option -> 'd option) + -> (('a, 'b) map * ('a, 'c) map) -> ('a, 'd) map + val mergeWithi : ('a obj * 'b option * 'c option -> 'd option) + -> (('a, 'b) map * ('a, 'c) map) -> ('a, 'd) map + (* merge two maps using the given function to control the merge. For + * each key k in the union of the two maps domains, the function + * is applied to the image of the key under the map. If the function + * returns SOME y, then (k, y) is added to the resulting map. + *) + + val app : ('b -> unit) -> ('a, 'b) map -> unit + val appi : (('a obj * 'b) -> unit) -> ('a, 'b) map -> unit + (* Apply a function to the entries of the map. *) + + val map : ('b -> 'c) -> ('a, 'b) map -> ('a, 'c) map + val mapi : ('a obj * 'b -> 'c) -> ('a, 'b) map -> ('a, 'c) map + (* Create a new map by applying a map function to the + * name/value pairs in the map. + *) + + val fold : ('b * 'c -> 'c) -> 'c -> ('a, 'b) map -> 'c + val foldi : ('a obj * 'b * 'c -> 'c) -> 'c -> ('a, 'b) map -> 'c + (* Apply a folding function to the entries of the map *) + + val foldl : ('b * 'c -> 'c) -> 'c -> ('a, 'b) map -> 'c + val foldli : ('a obj * 'b * 'c -> 'c) -> 'c -> ('a, 'b) map -> 'c + val foldr : ('b * 'c -> 'c) -> 'c -> ('a, 'b) map -> 'c + val foldri : ('a obj * 'b * 'c -> 'c) -> 'c -> ('a, 'b) map -> 'c + (* these functions are DEPRECATED *) + + val filter : ('b -> bool) -> ('a, 'b) map -> ('a, 'b) map + val filteri : ('a obj * 'b -> bool) -> ('a, 'b) map -> ('a, 'b) map + (* Filter out those elements of the map that do not satisfy the + * predicate. + *) + + val mapPartial : ('b -> 'c option) -> ('a, 'b) map -> ('a, 'c) map + val mapPartiali : ('a obj * 'b -> 'c option) -> ('a, 'b) map -> ('a, 'c) map + (* map a partial function over the elements of a map. *) + + val exists : ('b -> bool) -> ('a, 'b) map -> bool + val existsi : ('a obj * 'b -> bool) -> ('a, 'b) map -> bool + (* check the elements of a map with a predicate and return true if + * any element satisfies the predicate. Return false otherwise. + *) + + val all : ('b -> bool) -> ('a, 'b) map -> bool + val alli : ('a obj * 'b -> bool) -> ('a, 'b) map -> bool + (* check the elements of a map with a predicate and return true if + * they all satisfy the predicate. Return false otherwise. + *) + + end diff --git a/smlnj-lib/HashCons/hash-cons-map.sml b/smlnj-lib/HashCons/hash-cons-map.sml new file mode 100644 index 0000000..835ee1f --- /dev/null +++ b/smlnj-lib/HashCons/hash-cons-map.sml @@ -0,0 +1,111 @@ +(* hash-cons-map.sml + * + * COPYRIGHT (c) 2011 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This is an implementation of the HASH_CONS_MAP signature that is built + * on top of the WordRedBlackMap structure. Eventually, it will be replaced + * by an implmementation that uses Patricia trees. + *) + +structure HashConsMap : HASH_CONS_MAP = + struct + + structure HC = HashCons + structure Map = WordRedBlackMap + + type 'a obj = 'a HC.obj + + type ('a, 'b) map = ('a obj * 'b) Map.map + + fun lift2 f ((_, a), (_, b)) = f(a, b) + fun lift2i f (_, (k, a), (_, b)) = f(k, a, b) + + val empty = Map.empty + fun singleton (obj, v) = Map.singleton (HC.tag obj, (obj, v)) + fun insert (m, obj, v) = Map.insert(m, HC.tag obj, (obj, v)) + fun insert' (p as (obj, v), m) = Map.insert(m, HC.tag obj, p) + + fun insertWith merge (m, obj, v) = let + val tag = HC.tag obj + in + case Map.find(m, tag) + of NONE => Map.insert(m, tag, (obj, v)) + | SOME(_, v') => Map.insert(m, tag, (obj, merge(v', v))) + (* end case *) + end + fun insertWithi merge (m, obj, v) = let + val tag = HC.tag obj + in + case Map.find(m, tag) + of NONE => Map.insert(m, tag, (obj, v)) + | SOME(_, v') => Map.insert(m, tag, (obj, merge(obj, v', v))) + (* end case *) + end + + fun find (map : ('a, 'b) map, obj) = Option.map #2 (Map.find(map, HC.tag obj)) + fun lookup (map : ('a, 'b) map, obj) = #2 (Map.lookup(map, HC.tag obj)) + fun inDomain (map, obj) = Map.inDomain (map, HC.tag obj) + fun remove (map, obj) = let + val (map, (_, v)) = Map.remove (map, HC.tag obj) + in + (map, v) + end + val isEmpty = Map.isEmpty + val numItems = Map.numItems + fun listItems map = Map.foldr (fn ((_, v), vs) => v::vs) [] map + val listItemsi = Map.listItems + fun listKeys map = Map.foldr (fn ((k, _), ks) => k::ks) [] map + fun collate cmp = Map.collate (lift2 cmp) + fun unionWith merge = + Map.unionWith (fn ((k, a), (_, b)) => (k, merge(a, b))) + fun unionWithi merge = + Map.unionWithi (lift2i (fn (k, a, b) => (k, merge(k, a, b)))) + fun intersectWith join = + Map.intersectWith (fn ((k, a), (_, b)) => (k, join(a, b))) + fun intersectWithi join = + Map.intersectWithi (lift2i (fn (k, a, b) => (k, join(k, a, b)))) + fun mergeWith join = let + fun result (k, SOME c) = SOME(k, c) + | result (_, NONE) = NONE + fun join' (SOME(k, a), SOME(_, b)) = result (k, join(SOME a, SOME b)) + | join' (SOME(k, a), NONE) = result (k, join(SOME a, NONE)) + | join' (NONE, SOME(k, b)) = result (k, join(NONE, SOME b)) + | join' (NONE, NONE) = raise Fail "impossible" + in + Map.mergeWith join' + end + fun mergeWithi join = let + fun result (k, SOME c) = SOME(k, c) + | result (_, NONE) = NONE + fun join' (SOME(k, a), SOME(_, b)) = result (k, join(k, SOME a, SOME b)) + | join' (SOME(k, a), NONE) = result (k, join(k, SOME a, NONE)) + | join' (NONE, SOME(k, b)) = result (k, join(k, NONE, SOME b)) + | join' (NONE, NONE) = raise Fail "impossible" + in + Map.mergeWith join' + end + fun app f = Map.app (fn (_, v) => f v) + val appi = Map.app + fun map f = Map.map (fn (k, v) => (k, f v)) + fun mapi f = Map.map (fn (k, v) => (k, f(k, v))) + fun fold f = Map.foldl (fn ((_, x), acc) => f(x, acc)) + fun foldi f = Map.foldl (fn ((k, x), acc) => f(k, x, acc)) + val foldl = fold (* DEPRECATED *) + val foldli = foldi (* DEPRECATED *) + val foldr = fold (* DEPRECATED *) + val foldri = foldi (* DEPRECATED *) + fun filter pred = Map.filter (fn (_, x) => pred x) + val filteri = Map.filter + fun mapPartial f = + Map.mapPartial + (fn (k, v) => case f v of SOME v => SOME(k, v) | NONE => NONE) + fun mapPartiali f = + Map.mapPartial + (fn (k, v) => case f(k, v) of SOME v => SOME(k, v) | NONE => NONE) + fun exists f = Map.exists (fn (k, v) => f v) + fun existsi f = Map.exists f + fun all f = Map.all (fn (k, v) => f v) + fun alli f = Map.all f + + end diff --git a/smlnj-lib/HashCons/hash-cons-set-sig.sml b/smlnj-lib/HashCons/hash-cons-set-sig.sml new file mode 100644 index 0000000..eaa4a0e --- /dev/null +++ b/smlnj-lib/HashCons/hash-cons-set-sig.sml @@ -0,0 +1,126 @@ +(* hash-cons-set-sig.sml + * + * COPYRIGHT (c) 2011 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Finnite sets of hash-consed objects. + *) + + +signature HASH_CONS_SET = + sig + + type 'a obj = 'a HashCons.obj + + type 'a set + + val empty : 'a set + (* The empty set *) + + val singleton : 'a obj -> 'a set + (* Create a singleton set *) + + val fromList : 'a obj list -> 'a set + (* create a set from a list of items *) + + val add : 'a set * 'a obj -> 'a set + val add' : ('a obj * 'a set) -> 'a set + (* Insert an 'a obj. *) + + val addList : 'a set * 'a obj list -> 'a set + (* Insert items from list. *) + + val subtract : 'a set * 'a obj -> 'a set + val subtract' : ('a obj * 'a set) -> 'a set + (* Subtract an 'a obj from a set; has no effect if the 'a obj is not in the set *) + + val subtractList : 'a set * 'a obj list -> 'a set + (* Subtract a list of items from the set. *) + + val delete : 'a set * 'a obj -> 'a set + (* Remove an 'a obj. Raise NotFound if not found. *) + + val member : 'a set * 'a obj -> bool + (* Return true if and only if 'a obj is an element in the set *) + + val isEmpty : 'a set -> bool + (* Return true if and only if the set is empty *) + + val equal : ('a set * 'a set) -> bool + (* Return true if and only if the two sets are equal *) + + val compare : ('a set * 'a set) -> order + (* does a lexical comparison of two sets *) + + val isSubset : ('a set * 'a set) -> bool + (* Return true if and only if the first set is a subset of the second *) + + val disjoint : 'a set * 'a set -> bool + (* are the two sets disjoint? *) + + val numItems : 'a set -> int + (* Return the number of items in the table *) + + val toList : 'a set -> 'a obj list + (* Return an ordered list of the items in the set *) + + val listItems : 'a set -> 'a obj list + (* Return an ordered list of the items in the set. This function is + * deprecated in favor of `toList` + *) + + val union : 'a set * 'a set -> 'a set + (* Union *) + + val intersection : 'a set * 'a set -> 'a set + (* Intersection *) + + val difference : 'a set * 'a set -> 'a set + (* Difference *) + + val map : ('a obj -> 'b obj) -> 'a set -> 'b set + (* Create a new set by applying a function to the elements + * of the set. + *) + + val mapPartial : ('a obj -> 'b obj option) -> 'a set -> 'b set + (* Create a new set by applying a partial function to the elements + * of the set. + *) + + val app : ('a obj -> unit) -> 'a set -> unit + (* Apply a function to the entries of the set *) + + val fold : ('a obj * 'b -> 'b) -> 'b -> 'a set -> 'b + (* Apply a folding function to the entries of the set *) + + val foldl : ('a obj * 'b -> 'b) -> 'b -> 'a set -> 'b + val foldr : ('a obj * 'b -> 'b) -> 'b -> 'a set -> 'b + (* these functions are DEPRECATED *) + + val partition : ('a obj -> bool) -> 'a set -> ('a set * 'a set) + (* partition a set into two based using the given predicate. Returns two + * sets, where the first contains those elements for which the predicate is + * true and the second contains those elements for which the predicate is + * false. + *) + + val filter : ('a obj -> bool) -> 'a set -> 'a set + (* filter a set by the given predicate returning only those elements for + * which the predicate is true. + *) + + val all : ('a obj -> bool) -> 'a set -> bool + (* check the elements of a set with a predicate and return true if + * they all satisfy the predicate. Return false otherwise. + *) + + val exists : ('a obj -> bool) -> 'a set -> bool + (* check the elements of a set with a predicate and return true if + * any element satisfies the predicate. Return false otherwise. + *) + + val find : ('a obj -> bool) -> 'a set -> 'a obj option + (* find an element in the set for which the predicate is true *) + + end diff --git a/smlnj-lib/HashCons/hash-cons-set.sml b/smlnj-lib/HashCons/hash-cons-set.sml new file mode 100644 index 0000000..bab9426 --- /dev/null +++ b/smlnj-lib/HashCons/hash-cons-set.sml @@ -0,0 +1,490 @@ +(* hash-cons-set.sml + * + * This is an implementation of the HASH_CONS_SET signature that uses Red-Black + * trees. Eventually, it should be replaced by an implmementation that uses + * Patricia trees. + * + * COPYRIGHT (c) 2011 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +structure HashConsSet : HASH_CONS_SET = + struct + + structure HC = HashCons + + datatype color = R | B + + type 'a obj = 'a HC.obj + + datatype 'a tree + = E + | T of (color * 'a tree * 'a obj * 'a tree) + + datatype 'a set = SET of (int * 'a tree) + +(* NOTE: we should use the Word.< and = operations instead of Word.compare *) + fun cmpObj (a : 'a obj, b : 'a obj) = Word.compare(#tag a, #tag b) + + fun isEmpty (SET(_, E)) = true + | isEmpty _ = false + + val empty = SET(0, E) + + fun singleton x = SET(1, T(B, E, x, E)) + + fun add (SET(nItems, m), x) = let + val nItems' = ref nItems + fun ins E = (nItems' := nItems+1; T(R, E, x, E)) + | ins (s as T(color, a, y, b)) = (case cmpObj(x, y) + of LESS => (case a + of T(R, c, z, d) => (case cmpObj(x, z) + of LESS => (case ins c + of T(R, e, w, f) => T(R, T(B,e,w,f), z, T(B,d,y,b)) + | c => T(B, T(R,c,z,d), y, b) + (* end case *)) + | EQUAL => T(color, T(R, c, x, d), y, b) + | GREATER => (case ins d + of T(R, e, w, f) => T(R, T(B,c,z,e), w, T(B,f,y,b)) + | d => T(B, T(R,c,z,d), y, b) + (* end case *)) + (* end case *)) + | _ => T(B, ins a, y, b) + (* end case *)) + | EQUAL => T(color, a, x, b) + | GREATER => (case b + of T(R, c, z, d) => (case cmpObj(x, z) + of LESS => (case ins c + of T(R, e, w, f) => T(R, T(B,a,y,e), w, T(B,f,z,d)) + | c => T(B, a, y, T(R,c,z,d)) + (* end case *)) + | EQUAL => T(color, a, y, T(R, c, x, d)) + | GREATER => (case ins d + of T(R, e, w, f) => T(R, T(B,a,y,c), z, T(B,e,w,f)) + | d => T(B, a, y, T(R,c,z,d)) + (* end case *)) + (* end case *)) + | _ => T(B, a, y, ins b) + (* end case *)) + (* end case *)) + val T(_, a, y, b) = ins m + in + SET(!nItems', T(B, a, y, b)) + end + fun add' (x, m) = add (m, x) + + fun addList (s, []) = s + | addList (s, x::r) = addList(add(s, x), r) + + (* Remove an item. Raises LibBase.NotFound if not found. *) + local + datatype 'a zipper + = TOP + | LEFT of (color * 'a obj * 'a tree * 'a zipper) + | RIGHT of (color * 'a tree * 'a obj * 'a zipper) + in + fun delete (SET(nItems, t), k) = let + (* zip the zipper *) + fun zip (TOP, t) = t + | zip (LEFT(color, x, b, p), a) = zip(p, T(color, a, x, b)) + | zip (RIGHT(color, a, x, p), b) = zip(p, T(color, a, x, b)) + (* zip the zipper while resolving a black deficit *) + fun fixupZip (TOP, t) = (true, t) + (* case 1 from CLR *) + | fixupZip (LEFT(B, x, T(R, a, y, b), p), t) = (case a + of T(_, T(R, a11, w, a12), z, a2) => (* case 1L ==> case 3L ==> case 4L *) + (false, zip (p, T(B, T(R, T(B, t, x, a11), w, T(B, a12, z, a2)), y, b))) + | T(_, a1, z, T(R, a21, w, t22)) => (* case 1L ==> case 4L *) + (false, zip (p, T(B, T(R, T(B, t, x, a1), z, T(B, a21, w, t22)), y, b))) + | T(_, a1, z, a2) => (* case 1L ==> case 2L; rotate + recolor fixes deficit *) + (false, zip (p, T(B, T(B, t, x, T(R, a1, z, a2)), y, b))) + | _ => fixupZip (LEFT(R, x, a, LEFT(B, y, b, p)), t) + (* end case *)) + | fixupZip (RIGHT(B, T(R, a, x, b), y, p), t) = (case b + of T(_, b1, z, T(R, b21, w, b22)) => (* case 1R ==> case 3R ==> case 4R *) + (false, zip (p, T(B, a, x, T(R, T(B, b1, z, b21), w, T(B, b22, y, t))))) + | T(_, T(R, b11, w, b12), z, b2) => (* case 1R ==> case 4R *) + (false, zip (p, T(B, a, x, T(R, T(B, b11, w, b12), z, T(B, b2, y, t))))) + | T(_, b1, z, b2) => (* case 1L ==> case 2L; rotate + recolor fixes deficit *) + (false, zip (p, T(B, a, x, T(B, T(R, b1, z, b2), y, t)))) + | _ => fixupZip (RIGHT(R, b, y, RIGHT(B, a, x, p)), t) + (* end case *)) + (* case 3 from CLR *) + | fixupZip (LEFT(color, x, T(B, T(R, a1, y, a2), z, b), p), t) = + (* case 3L ==> case 4L *) + (false, zip (p, T(color, T(B, t, x, a1), y, T(B, a2, z, b)))) + | fixupZip (RIGHT(color, T(B, a, x, T(R, b1, y, b2)), z, p), t) = + (* case 3R ==> case 4R; rotate, recolor, plus rotate fixes deficit *) + (false, zip (p, T(color, T(B, a, x, b1), y, T(B, b2, z, t)))) + (* case 4 from CLR *) + | fixupZip (LEFT(color, x, T(B, a, y, T(R, b1, z, b2)), p), t) = + (false, zip (p, T(color, T(B, t, x, a), y, T(B, b1, z, b2)))) + | fixupZip (RIGHT(color, T(B, T(R, a1, z, a2), x, b), y, p), t) = + (false, zip (p, T(color, T(B, a1, z, a2), x, T(B, b, y, t)))) + (* case 2 from CLR; note that "a" and "b" are guaranteed to be black, since we did + * not match cases 3 or 4. + *) + | fixupZip (LEFT(R, x, T(B, a, y, b), p), t) = + (false, zip (p, T(B, t, x, T(R, a, y, b)))) + | fixupZip (LEFT(B, x, T(B, a, y, b), p), t) = + fixupZip (p, T(B, t, x, T(R, a, y, b))) + | fixupZip (RIGHT(R, T(B, a, x, b), y, p), t) = + (false, zip (p, T(B, T(R, a, x, b), y, t))) + | fixupZip (RIGHT(B, T(B, a, x, b), y, p), t) = + fixupZip (p, T(B, T(R, a, x, b), y, t)) + (* push deficit up the tree by recoloring a black node as red *) + | fixupZip (LEFT(_, y, E, p), t) = fixupZip (p, T(R, t, y, E)) + | fixupZip (RIGHT(_, E, y, p), t) = fixupZip (p, T(R, E, y, t)) + (* impossible cases that violate the red invariant *) + | fixupZip _ = raise Fail "Red invariant violation" + (* delete the minimum value from a non-empty tree, returning a triple + * (elem, bd, tr), where elem is the minimum element, tr is the residual + * tree with elem removed, and bd is true if tr has a black-depth that is + * less than the original tree. + *) + fun delMin (T(R, E, y, b), p) = + (* replace the node by its right subtree (which must be E) *) + (y, false, zip(p, b)) + | delMin (T(B, E, y, T(R, a', y', b')), p) = + (* replace the node with its right child, while recoloring the child black to + * preserve the black invariant. + *) + (y, false, zip (p, T(B, a', y', b'))) + | delMin (T(B, E, y, E), p) = let + (* delete the node, which reduces the black-depth by one, so we attempt to fix + * the deficit on the path back. + *) + val (blkDeficit, t) = fixupZip (p, E) + in + (y, blkDeficit, t) + end + | delMin (T(color, a, y, b), z) = delMin(a, LEFT(color, y, b, z)) + | delMin (E, _) = raise Match + fun del (E, z) = raise LibBase.NotFound + | del (T(color, a, y, b), p) = (case cmpObj(k, y) + of LESS => del (a, LEFT(color, y, b, p)) + | EQUAL => (case (color, a, b) + of (R, E, E) => zip(p, E) + | (B, E, E) => #2 (fixupZip (p, E)) + | (_, T(_, a', y', b'), E) => + (* node is black and left child is red; we replace the node with its + * left child recolored to black. + *) + zip(p, T(B, a', y', b')) + | (_, E, T(_, a', y', b')) => + (* node is black and right child is red; we replace the node with its + * right child recolored to black. + *) + zip(p, T(B, a', y', b')) + | _ => let + val (minSucc, blkDeficit, b) = delMin (b, TOP) + in + if blkDeficit + then #2 (fixupZip (RIGHT(color, a, minSucc, p), b)) + else zip (p, T(color, a, minSucc, b)) + end + (* end case *)) + | GREATER => del (b, RIGHT(color, a, y, p)) + (* end case *)) + in + case del(t, TOP) + of T(R, a, x, b) => SET(nItems-1, T(B, a, x, b)) + | t => SET(nItems-1, t) + (* end case *) + end + end (* local *) + + (* Return true if and only if item is an element in the set *) + fun member (SET(_, t), k) = let + fun find' E = false + | find' (T(_, a, y, b)) = (case cmpObj(k, y) + of LESS => find' a + | EQUAL => true + | GREATER => find' b + (* end case *)) + in + find' t + end + + (* Return the number of items in the map *) + fun numItems (SET(n, _)) = n + + fun fold f = let + fun foldf (E, accum) = accum + | foldf (T(_, a, x, b), accum) = + foldf(b, f(x, foldf(a, accum))) + in + fn init => fn (SET(_, m)) => foldf(m, init) + end + + val foldl = fold (* DEPRECATED *) + val foldr = fold (* DEPRECATED *) + + (* return an ordered list of the items in the set. *) + fun toList s = foldr (fn (x, l) => x::l) [] s + + (* functions for walking the tree while keeping a stack of parents + * to be visited. + *) + fun next ((t as T(_, _, _, b))::rest) = (t, left(b, rest)) + | next _ = (E, []) + and left (E, rest) = rest + | left (t as T(_, a, _, _), rest) = left(a, t::rest) + fun start m = left(m, []) + + (* Return true if and only if the two sets are equal *) + fun equal (SET(_, s1), SET(_, s2)) = let + fun cmp (t1, t2) = (case (next t1, next t2) + of ((E, _), (E, _)) => true + | ((E, _), _) => false + | (_, (E, _)) => false + | ((T(_, _, x, _), r1), (T(_, _, y, _), r2)) => ( + case cmpObj(x, y) + of EQUAL => cmp (r1, r2) + | _ => false + (* end case *)) + (* end case *)) + in + cmp (start s1, start s2) + end + + (* Return the lexical order of two sets *) + fun compare (SET(_, s1), SET(_, s2)) = let + fun cmp (t1, t2) = (case (next t1, next t2) + of ((E, _), (E, _)) => EQUAL + | ((E, _), _) => LESS + | (_, (E, _)) => GREATER + | ((T(_, _, x, _), r1), (T(_, _, y, _), r2)) => ( + case cmpObj(x, y) + of EQUAL => cmp (r1, r2) + | order => order + (* end case *)) + (* end case *)) + in + cmp (start s1, start s2) + end + + (* Return true if and only if the first set is a subset of the second *) + fun isSubset (SET(_, s1), SET(_, s2)) = let + fun cmp (t1, t2) = (case (next t1, next t2) + of ((E, _), (E, _)) => true + | ((E, _), _) => true + | (_, (E, _)) => false + | ((T(_, _, x, _), r1), (T(_, _, y, _), r2)) => ( + case cmpObj(x, y) + of LESS => false + | EQUAL => cmp (r1, r2) + | GREATER => cmp (t1, r2) + (* end case *)) + (* end case *)) + in + cmp (start s1, start s2) + end + + (* Return true if the two sets are disjoint *) + fun disjoint (SET(0, _), _) = true + | disjoint (_, SET(0, _)) = true + | disjoint (SET(_, s1), SET(_, s2)) = let + fun walk ((E, _), _) = true + | walk (_, (E, _)) = true + | walk (t1 as (T(_, _, x, _), r1), t2 as (T(_, _, y, _), r2)) = ( + case cmpObj(x, y) + of LESS => walk (next r1, t2) + | EQUAL => false + | GREATER => walk (t1, next r2) + (* end case *)) + in + walk (next (start s1), next (start s2)) + end + + (* support for constructing red-black trees in linear time from increasing + * ordered sequences (based on a description by R. Hinze). Note that the + * elements in the digits are ordered with the largest on the left, whereas + * the elements of the trees are ordered with the largest on the right. + *) + datatype 'a digit + = ZERO + | ONE of ('a obj * 'a tree * 'a digit) + | TWO of ('a obj * 'a tree * 'a obj * 'a tree * 'a digit) + (* add an item that is guaranteed to be larger than any in l *) + fun addItem (a, l) = let + fun incr (a, t, ZERO) = ONE(a, t, ZERO) + | incr (a1, t1, ONE(a2, t2, r)) = TWO(a1, t1, a2, t2, r) + | incr (a1, t1, TWO(a2, t2, a3, t3, r)) = + ONE(a1, t1, incr(a2, T(B, t3, a3, t2), r)) + in + incr(a, E, l) + end + (* link the digits into a tree *) + fun linkAll t = let + fun link (t, ZERO) = t + | link (t1, ONE(a, t2, r)) = link(T(B, t2, a, t1), r) + | link (t, TWO(a1, t1, a2, t2, r)) = + link(T(B, T(R, t2, a2, t1), a1, t), r) + in + link (E, t) + end + + (* create a set from a list of items; this function works in linear time if the list + * is in increasing order. + *) + fun fromList [] = empty + | fromList (first::rest) = let + fun add (prev, x::xs, n, accum) = (case cmpObj(prev, x) + of LESS => add(x, xs, n+1, addItem(x, accum)) + | _ => (* list not in order, so fall back to addList code *) + addList(SET(n, linkAll accum), x::xs) + (* end case *)) + | add (_, [], n, accum) = SET(n, linkAll accum) + in + add (first, rest, 1, addItem(first, ZERO)) + end + + (* return the union of the two sets *) + fun union (SET(_, s1), SET(_, s2)) = let + fun ins ((E, _), n, result) = (n, result) + | ins ((T(_, _, x, _), r), n, result) = + ins(next r, n+1, addItem(x, result)) + fun union' (t1, t2, n, result) = (case (next t1, next t2) + of ((E, _), (E, _)) => (n, result) + | ((E, _), t2) => ins(t2, n, result) + | (t1, (E, _)) => ins(t1, n, result) + | ((T(_, _, x, _), r1), (T(_, _, y, _), r2)) => ( + case cmpObj(x, y) + of LESS => union' (r1, t2, n+1, addItem(x, result)) + | EQUAL => union' (r1, r2, n+1, addItem(x, result)) + | GREATER => union' (t1, r2, n+1, addItem(y, result)) + (* end case *)) + (* end case *)) + val (n, result) = union' (start s1, start s2, 0, ZERO) + in + SET(n, linkAll result) + end + + (* return the intersection of the two sets *) + fun intersection (SET(_, s1), SET(_, s2)) = let + fun intersect (t1, t2, n, result) = (case (next t1, next t2) + of ((T(_, _, x, _), r1), (T(_, _, y, _), r2)) => ( + case cmpObj(x, y) + of LESS => intersect (r1, t2, n, result) + | EQUAL => intersect (r1, r2, n+1, addItem(x, result)) + | GREATER => intersect (t1, r2, n, result) + (* end case *)) + | _ => (n, result) + (* end case *)) + val (n, result) = intersect (start s1, start s2, 0, ZERO) + in + SET(n, linkAll result) + end + + (* return the set difference *) + fun difference (SET(_, s1), SET(_, s2)) = let + fun ins ((E, _), n, result) = (n, result) + | ins ((T(_, _, x, _), r), n, result) = + ins(next r, n+1, addItem(x, result)) + fun diff (t1, t2, n, result) = (case (next t1, next t2) + of ((E, _), _) => (n, result) + | (t1, (E, _)) => ins(t1, n, result) + | ((T(_, _, x, _), r1), (T(_, _, y, _), r2)) => ( + case cmpObj(x, y) + of LESS => diff (r1, t2, n+1, addItem(x, result)) + | EQUAL => diff (r1, r2, n, result) + | GREATER => diff (t1, r2, n, result) + (* end case *)) + (* end case *)) + val (n, result) = diff (start s1, start s2, 0, ZERO) + in + SET(n, linkAll result) + end + + fun subtract (s, item) = difference (s, singleton item) + fun subtract' (item, s) = subtract (s, item) + + fun subtractList (l, items) = let + val items' = List.foldl (fn (x, set) => add(set, x)) (SET(0, E)) items + in + difference (l, items') + end + + fun app f = let + fun appf E = () + | appf (T(_, a, x, b)) = (appf a; f x; appf b) + in + fn (SET(_, m)) => appf m + end + + fun map f = let + fun addf (x, m) = add(m, f x) + in + foldl addf empty + end + + fun mapPartial f (SET(_, m)) = let + fun mapf (E, acc) = acc + | mapf (T(_, a, x, b), acc) = (case f x + of SOME y => mapf (b, mapf (a, add (acc, y))) + | NONE => mapf (b, mapf (a, acc)) + (* end case *)) + in + mapf (m, empty) + end + + (* Filter out those elements of the set that do not satisfy the + * predicate. The filtering is done in increasing map order. + *) + fun filter pred (SET(_, t)) = let + fun walk (E, n, result) = (n, result) + | walk (T(_, a, x, b), n, result) = let + val (n, result) = walk(a, n, result) + in + if (pred x) + then walk(b, n+1, addItem(x, result)) + else walk(b, n, result) + end + val (n, result) = walk (t, 0, ZERO) + in + SET(n, linkAll result) + end + + fun partition pred (SET(_, t)) = let + fun walk (E, n1, result1, n2, result2) = (n1, result1, n2, result2) + | walk (T(_, a, x, b), n1, result1, n2, result2) = let + val (n1, result1, n2, result2) = walk(a, n1, result1, n2, result2) + in + if (pred x) + then walk(b, n1+1, addItem(x, result1), n2, result2) + else walk(b, n1, result1, n2+1, addItem(x, result2)) + end + val (n1, result1, n2, result2) = walk (t, 0, ZERO, 0, ZERO) + in + (SET(n1, linkAll result1), SET(n2, linkAll result2)) + end + + fun exists pred = let + fun test E = false + | test (T(_, a, x, b)) = test a orelse pred x orelse test b + in + fn (SET(_, t)) => test t + end + + fun all pred = let + fun test E = true + | test (T(_, a, x, b)) = test a andalso pred x andalso test b + in + fn (SET(_, t)) => test t + end + + fun find pred = let + fun test E = NONE + | test (T(_, a, x, b)) = (case test a + of NONE => if pred x then SOME x else test b + | someItem => someItem + (* end case *)) + in + fn (SET(_, t)) => test t + end + + (* deprecated *) + val listItems = toList + + end diff --git a/smlnj-lib/HashCons/hash-cons-sig.sml b/smlnj-lib/HashCons/hash-cons-sig.sml new file mode 100644 index 0000000..a0c8cbc --- /dev/null +++ b/smlnj-lib/HashCons/hash-cons-sig.sml @@ -0,0 +1,76 @@ +(* hash-cons-sig.sml + * + * COPYRIGHT (c) 2011 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * TODO: better support for nodes that mix lists and non-lists as args. + * (perhaps a hashed-cons list rep)? + *) + +signature HASH_CONS = + sig + + (* hash table for consing *) + type 'a tbl + + (* create a new hash-cons table using the given equality function *) + val new : {eq : 'a * 'a -> bool} -> 'a tbl + + (* clear a table of all elements *) + val clear : 'a tbl -> unit + + (* a hashed-cons object *) + type 'a obj = { + nd : 'a, (* the underlying representation *) + tag : word, (* a tag that is unique for the object (for the object's table) *) + hash : word (* a hash of the object (used to index the table) *) + } + + (* projections *) + val node : 'a obj -> 'a + val tag : 'a obj -> word + + (* comparisons *) + val same : ('a obj * 'a obj) -> bool + val compare : ('a obj * 'a obj) -> order + + (* constructors for nodes formed from tuples of children *) + val cons0 : 'a tbl -> (word * 'a) -> 'a obj + val cons1 : 'a tbl -> (word * ('b obj -> 'a)) + -> 'b obj -> 'a obj + val cons2 : 'a tbl -> (word * ('b obj * 'c obj -> 'a)) + -> 'b obj * 'c obj -> 'a obj + val cons3 : 'a tbl -> (word * ('b obj * 'c obj * 'd obj -> 'a)) + -> 'b obj * 'c obj * 'd obj -> 'a obj + val cons4 : 'a tbl -> (word * ('b obj * 'c obj * 'd obj * 'e obj -> 'a)) + -> 'b obj * 'c obj * 'd obj * 'e obj -> 'a obj + val cons5 : 'a tbl -> (word * ('b obj * 'c obj * 'd obj * 'e obj * 'f obj -> 'a)) + -> 'b obj * 'c obj * 'd obj * 'e obj * 'f obj -> 'a obj + + (* constructor for nodes formed from a list of children *) + val consList : 'a tbl -> (word * ('b obj list -> 'a)) -> 'b obj list -> 'a obj + + (* constructors for nodes formed from records of children; the arguments include + * a node constructor from a tuple of children and a projection from the record + * type to a tuple type. + *) + val consR1 : 'a tbl -> (word * ('b obj -> 'a) * ('r -> 'b obj)) + -> 'r -> 'a obj + val consR2 : 'a tbl + -> (word * ('b obj * 'c obj -> 'a) * ('r -> 'b obj * 'c obj)) + -> 'r -> 'a obj + val consR3 : 'a tbl + -> (word * ('b obj * 'c obj * 'd obj -> 'a) + * ('r -> 'b obj * 'c obj * 'd obj)) + -> 'r -> 'a obj + val consR4 : 'a tbl + -> (word * ('b obj * 'c obj * 'd obj * 'e obj -> 'a) + * ('r -> 'b obj * 'c obj * 'd obj * 'e obj)) + -> 'r -> 'a obj + val consR5 : 'a tbl + -> (word * ('b obj * 'c obj * 'd obj * 'e obj * 'f obj -> 'a) + * ('r -> 'b obj * 'c obj * 'd obj * 'e obj * 'f obj)) + -> 'r -> 'a obj + + end + diff --git a/smlnj-lib/HashCons/hash-cons-string.sml b/smlnj-lib/HashCons/hash-cons-string.sml new file mode 100644 index 0000000..5015d3c --- /dev/null +++ b/smlnj-lib/HashCons/hash-cons-string.sml @@ -0,0 +1,12 @@ +(* hash-cons-string.sml + * + * COPYRIGHT (c) 2011 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +structure HashConsString = HashConsGroundFn ( + struct + type hash_key = string + val sameKey = (op = : string * string -> bool) + val hashVal = HashString.hashString + end) diff --git a/smlnj-lib/HashCons/hash-cons-word.sml b/smlnj-lib/HashCons/hash-cons-word.sml new file mode 100644 index 0000000..9d3ccef --- /dev/null +++ b/smlnj-lib/HashCons/hash-cons-word.sml @@ -0,0 +1,24 @@ +(* hash-cons-word.sml + * + * Hash-cons wrapper for `word` values; this directly uses the value + * as its representation w/o a tabke. + * + * COPYRIGHT (c) 2023 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +structure HashConsWord : sig + + type hash_key = word + type obj = hash_key HashCons.obj + + val mk : hash_key -> obj + + end = struct + + type hash_key = word + type obj = hash_key HashCons.obj + + fun mk w = {nd = w, tag = w, hash = w} + + end diff --git a/smlnj-lib/HashCons/hash-cons.sml b/smlnj-lib/HashCons/hash-cons.sml new file mode 100644 index 0000000..3a4d54f --- /dev/null +++ b/smlnj-lib/HashCons/hash-cons.sml @@ -0,0 +1,91 @@ +(* hash-cons.sml + * + * COPYRIGHT (c) 2011 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +structure HashCons :> HASH_CONS = + struct + + type 'a obj = {nd : 'a, tag : word, hash : word} + + datatype 'a tbl = Tbl of { + eq : 'a * 'a -> bool, + nextTag : word ref, + tbl : 'a obj list Array.array ref + } + + val tblSz = PrimeSizes.pick 64 + + fun new {eq} = Tbl{ + eq = eq, + nextTag = ref 0w0, + tbl = ref(Array.array(tblSz, [])) + } + + fun clear (Tbl{nextTag, tbl, ...}) = ( + nextTag := 0w0; + Array.modify (fn _ => []) (!tbl)) + + fun insert (Tbl{eq, nextTag, tbl}, h, term) = let + val tbl' = !tbl + val i = Word.toIntX(Word.mod(h, Word.fromInt(Array.length tbl'))) + val bucket = Array.sub(tbl', i) + fun find [] = let + val id = !nextTag + val obj = {nd = term, hash = h, tag = id} + in + nextTag := id + 0w1; + Array.update(tbl', i, obj::bucket); +(* check for table resize *) + obj + end + | find ((obj as {nd, hash, ...})::r) = + if (h = hash) andalso eq(term, nd) + then obj + else find r + in + find bucket + end + + fun node {nd, tag, hash} = nd + fun tag {nd, tag, hash} = tag + + fun same (a : 'a obj, b : 'a obj) = (#tag a = #tag b) + fun compare (a : 'a obj, b : 'a obj) = Word.compare(#tag a, #tag b) + + fun <+ (a, b) = Word.<<(a, 0w1) + b + infix <+ + + fun cons0 tbl (id, c) = insert (tbl, id, c) + + fun cons1 tbl (id, cf) (b : 'b obj) = + insert (tbl, id <+ (#tag b), cf b) + + fun cons2 tbl (id, cf) (b : 'b obj, c : 'c obj) = + insert (tbl, id <+ (#tag b) <+ (#tag c), cf(b, c)) + + fun cons3 tbl (id, cf) (b : 'b obj, c : 'c obj, d : 'd obj) = + insert (tbl, id <+ (#tag b) <+ (#tag c) <+ (#tag d), cf(b, c, d)) + + fun cons4 tbl (id, cf) (b : 'b obj, c : 'c obj, d : 'd obj, e : 'e obj) = + insert (tbl, id <+ (#tag b) <+ (#tag c) <+ (#tag d) <+ (#tag e), + cf(b, c, d, e)) + + fun cons5 tbl (id, cf) + (b : 'b obj, c : 'c obj, d : 'd obj, e : 'e obj, f : 'f obj) = + insert (tbl, + id <+ (#tag b) <+ (#tag c) <+ (#tag d) <+ (#tag e) <+ (#tag f), + cf(b, c, d, e, f)) + + fun consList tbl (id, cf) (l : 'b obj list) = + insert (tbl, List.foldl (fn ({tag, ...}, sum) => sum <+ tag) id l, cf l) + + (* consing for records *) + fun consR1 tbl (id, inj, prj) r = cons1 tbl (id, inj) (prj r) + fun consR2 tbl (id, inj, prj) r = cons2 tbl (id, inj) (prj r) + fun consR3 tbl (id, inj, prj) r = cons3 tbl (id, inj) (prj r) + fun consR4 tbl (id, inj, prj) r = cons4 tbl (id, inj) (prj r) + fun consR5 tbl (id, inj, prj) r = cons5 tbl (id, inj) (prj r) + + end diff --git a/smlnj-lib/INet/.cm/GUID/sock-util-sig.sml b/smlnj-lib/INet/.cm/GUID/sock-util-sig.sml new file mode 100644 index 0000000..67db6bc --- /dev/null +++ b/smlnj-lib/INet/.cm/GUID/sock-util-sig.sml @@ -0,0 +1 @@ +guid-$/(inet-lib.cm):sock-util-sig.sml-1714016091.431 diff --git a/smlnj-lib/INet/.cm/GUID/sock-util.sml b/smlnj-lib/INet/.cm/GUID/sock-util.sml new file mode 100644 index 0000000..d53c4c4 --- /dev/null +++ b/smlnj-lib/INet/.cm/GUID/sock-util.sml @@ -0,0 +1 @@ +guid-$/(inet-lib.cm):sock-util.sml-1714016091.437 diff --git a/smlnj-lib/INet/.cm/GUID/unix-sock-util.sml b/smlnj-lib/INet/.cm/GUID/unix-sock-util.sml new file mode 100644 index 0000000..03e5e6b --- /dev/null +++ b/smlnj-lib/INet/.cm/GUID/unix-sock-util.sml @@ -0,0 +1 @@ +guid-$/(inet-lib.cm):unix-sock-util.sml-1714016091.485 diff --git a/smlnj-lib/INet/.cm/SKEL/sock-util-sig.sml b/smlnj-lib/INet/.cm/SKEL/sock-util-sig.sml new file mode 100644 index 0000000..69cb72c --- /dev/null +++ b/smlnj-lib/INet/.cm/SKEL/sock-util-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f6d"StringCvt"Cd"NetHostDB"d"INetSock"d"Word8Vector"d"Word8Array"d"Socket"Nac"SOCK_UTIL"h0 \ No newline at end of file diff --git a/smlnj-lib/INet/.cm/SKEL/sock-util.sml b/smlnj-lib/INet/.cm/SKEL/sock-util.sml new file mode 100644 index 0000000..6406c3a --- /dev/null +++ b/smlnj-lib/INet/.cm/SKEL/sock-util.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f7d"OS"d"StringCvt"Cd"Byte"d"Word8VectorSlice"d"Word8ArraySlice"d"NetHostDB"d"Int"Cd"NetServDB"d"INetSock"d"Word8Vector"d"Word8Array"d"Socket"Nad"SockUtil"jh2ad"C"gp1d"Char"ad"PC"gp1d"ParserComb"gp1c"SOCK_UTIL" \ No newline at end of file diff --git a/smlnj-lib/INet/.cm/SKEL/unix-sock-util.sml b/smlnj-lib/INet/.cm/SKEL/unix-sock-util.sml new file mode 100644 index 0000000..7f832c3 --- /dev/null +++ b/smlnj-lib/INet/.cm/SKEL/unix-sock-util.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2aUNIX_SOCK_UTIL"h2egp1c"SOCK_UTIL"f1UnixSock"ad"UnixSockUtil"jh2egp1d"SockUtil"f2(d"Socket"gp1 \ No newline at end of file diff --git a/smlnj-lib/INet/.cm/amd64-unix/sock-util-sig.sml b/smlnj-lib/INet/.cm/amd64-unix/sock-util-sig.sml new file mode 100644 index 0000000..9c33fdb Binary files /dev/null and b/smlnj-lib/INet/.cm/amd64-unix/sock-util-sig.sml differ diff --git a/smlnj-lib/INet/.cm/amd64-unix/sock-util.sml b/smlnj-lib/INet/.cm/amd64-unix/sock-util.sml new file mode 100644 index 0000000..f7a48ca Binary files /dev/null and b/smlnj-lib/INet/.cm/amd64-unix/sock-util.sml differ diff --git a/smlnj-lib/INet/.cm/amd64-unix/unix-sock-util.sml b/smlnj-lib/INet/.cm/amd64-unix/unix-sock-util.sml new file mode 100644 index 0000000..87badf8 Binary files /dev/null and b/smlnj-lib/INet/.cm/amd64-unix/unix-sock-util.sml differ diff --git a/smlnj-lib/INet/inet-lib.cm b/smlnj-lib/INet/inet-lib.cm new file mode 100644 index 0000000..050dad2 --- /dev/null +++ b/smlnj-lib/INet/inet-lib.cm @@ -0,0 +1,31 @@ +(* inet-lib.cm + * + * COPYRIGHT (c) 1999 Bell Labs, Lucent Technologies. + * + * Sources file for INet library; part of the SML/NJ library suite. + *) + +Library + signature SOCK_UTIL + structure SockUtil + +#if defined(OPSYS_UNIX) + signature UNIX_SOCK_UTIL + structure UnixSockUtil +#endif + +is +#if defined(NEW_CM) + $/basis.cm + $/smlnj-lib.cm +#else + ../Util/smlnj-lib.cm +#endif + + sock-util-sig.sml + sock-util.sml + +#if defined(OPSYS_UNIX) + unix-sock-util.sml +#endif + diff --git a/smlnj-lib/INet/sock-util-sig.sml b/smlnj-lib/INet/sock-util-sig.sml new file mode 100644 index 0000000..f053367 --- /dev/null +++ b/smlnj-lib/INet/sock-util-sig.sml @@ -0,0 +1,50 @@ +(* sock-util-sig.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Various utility functions for programming with sockets. + *) + +signature SOCK_UTIL = + sig + + datatype port = PortNumber of int | ServName of string + (* a port can be identified by number, or by the name of a service *) + + datatype hostname = HostName of string | HostAddr of NetHostDB.in_addr + + val scanAddr : (char, 'a) StringCvt.reader + -> ({host : hostname, port : port option}, 'a) StringCvt.reader + (* scan an address, which has the form + * addr [ ":" port ] + * where the addr may either be numeric or symbolic host name and the + * port is either a service name or a decimal number. Legal host names + * must begin with a letter, and may contain any alphanumeric character, + * the minus sign (-) and period (.), where the period is used as a + * domain separator. + *) + val addrFromString : string -> {host : hostname, port : port option} option + + exception BadAddr of string + + val resolveAddr : {host : hostname, port : port option} + -> {host : string, addr : NetHostDB.in_addr, port : int option} + (* Given a hostname and optional port, resolve them in the host + * and service database. If either the host or service name is not + * found, then BadAddr is raised. + *) + + type 'a stream_sock = ('a, Socket.active Socket.stream) Socket.sock + + val connectINetStrm : {addr : NetHostDB.in_addr, port : int} + -> INetSock.inet stream_sock + (* establish a client-side connection to an INET domain stream socket *) + + val recvVec : ('a stream_sock * int) -> Word8Vector.vector + val recvStr : ('a stream_sock * int) -> string + val sendVec : ('a stream_sock * Word8Vector.vector) -> unit + val sendStr : ('a stream_sock * string) -> unit + val sendArr : ('a stream_sock * Word8Array.array) -> unit + + end; diff --git a/smlnj-lib/INet/sock-util.sml b/smlnj-lib/INet/sock-util.sml new file mode 100644 index 0000000..0eb1dce --- /dev/null +++ b/smlnj-lib/INet/sock-util.sml @@ -0,0 +1,138 @@ +(* sock-util.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Various utility functions for programming with sockets. + *) + +structure SockUtil : SOCK_UTIL = + struct + + structure C = Char + structure PC = ParserComb + + datatype port = PortNumber of int | ServName of string + (* a port can be identified by number, or by the name of a service *) + + datatype hostname = HostName of string | HostAddr of NetHostDB.in_addr + +(** This belongs in an Option structure **) + fun filterPartial pred NONE = NONE + | filterPartial pred (SOME x) = if (pred x) then SOME x else NONE + + fun scanName getc strm = let + fun isNameChr (#".", _) = true + | isNameChr (#"-", _) = true + | isNameChr (c, _) = C.isAlphaNum c + fun getName (strm, cl) = (case filterPartial isNameChr (getc strm) + of SOME(c, strm') => getName(strm', c::cl) + | NONE => SOME(implode(rev cl), strm) + (* end case *)) + in + case (filterPartial (C.isAlpha o #1) (getc strm)) + of SOME(c, strm) => getName(strm, [c]) + | NONE => NONE + (* end case *) + end + + (* scan an address, which has the form + * addr [ ":" port ] + * where the addr may either be numeric or symbolic host name and the + * port is either a service name or a decimal number. Legal host names + * must begin with a letter, and may contain any alphanumeric character, + * the minus sign (-) and period (.), where the period is used as a + * domain separator. + *) + fun scanAddr getc strm = + PC.seqWith (fn (host, port) => {host=host, port=port}) ( + PC.or ( + PC.wrap (scanName, HostName), + PC.wrap (NetHostDB.scan, HostAddr)), + PC.option ( + PC.seqWith #2 ( + PC.eatChar (fn c => (c = #":")), + PC.or ( + PC.wrap (scanName, ServName), + PC.wrap (Int.scan StringCvt.DEC, PortNumber))))) getc strm + + val addrFromString = StringCvt.scanString scanAddr + + exception BadAddr of string + + fun resolveAddr {host, port} = let + fun err (a, b) = raise BadAddr(concat[a, " \"", b, "\" not found"]) + val (name, addr) = (case host + of HostName s => (case NetHostDB.getByName s + of NONE => err ("hostname", s) + | (SOME entry) => (s, NetHostDB.addr entry) + (* end case *)) + | HostAddr addr => (case NetHostDB.getByAddr addr + of NONE => err ("host address", NetHostDB.toString addr) + | (SOME entry) => (NetHostDB.name entry, addr) + (* end case *)) + (* end case *)) + val port = (case port + of (SOME(PortNumber n)) => SOME n + | (SOME(ServName s)) => (case NetServDB.getByName(s, NONE) + of (SOME entry) => SOME(NetServDB.port entry) + | NONE => err("service", s) + (* end case *)) + | NONE => NONE + (* end case *)) + in + {host = name, addr = addr, port = port} + end + + type 'a stream_sock = ('a, Socket.active Socket.stream) Socket.sock + + (* establish a client-side connection to a INET domain stream socket *) + fun connectINetStrm {addr, port} = let + val sock = INetSock.TCP.socket () + in + Socket.connect (sock, INetSock.toAddr(addr, port)); + sock + end + +(** If the server closes the connection, do we get 0 bytes or an error??? **) + (* read exactly n bytes from a stream socket *) + fun recvVec (sock, n) = let + fun get (0, data) = Word8Vector.concat(rev data) + | get (n, data) = let + val v = Socket.recvVec (sock, n) + in + if (Word8Vector.length v = 0) + then raise OS.SysErr("closed socket", NONE) + else get (n - Word8Vector.length v, v::data) + end + in + if (n < 0) then raise Size else get (n, []) + end + + fun recvStr arg = Byte.bytesToString (recvVec arg) + + (* send the complete contents of a vector *) + fun sendVec (sock, vec) = let + val len = Word8Vector.length vec + fun send i = Socket.sendVec (sock, Word8VectorSlice.slice (vec, i, NONE)) + fun put i = if (i < len) + then put(i + send i) + else () + in + put 0 + end + + fun sendStr (sock, str) = sendVec (sock, Byte.stringToBytes str) + + (* send the complete contents of an array *) + fun sendArr (sock, arr) = let + val len = Word8Array.length arr + fun send i = Socket.sendArr (sock, Word8ArraySlice.slice (arr, i, NONE)) + fun put i = if (i < len) + then put(i + send i) + else () + in + put 0 + end + + end; diff --git a/smlnj-lib/INet/unix-sock-util.sml b/smlnj-lib/INet/unix-sock-util.sml new file mode 100644 index 0000000..b889a9e --- /dev/null +++ b/smlnj-lib/INet/unix-sock-util.sml @@ -0,0 +1,33 @@ +(* unix-sock-util.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Bind SockUtil structure on Unix systems + *) + +signature UNIX_SOCK_UTIL = + sig + + include SOCK_UTIL + + val connectUnixStrm : string -> UnixSock.unix stream_sock + (* establish a client-side connection to a Unix-domain stream socket *) + + end + + +structure UnixSockUtil : UNIX_SOCK_UTIL = + struct + + open SockUtil + + (* establish a client-side connection to a Unix-domain stream socket *) + fun connectUnixStrm path = let + val sock = UnixSock.Strm.socket () + in + Socket.connect (sock, UnixSock.toAddr path); + sock + end + + end diff --git a/smlnj-lib/JSON/.cm/GUID/errors.sml b/smlnj-lib/JSON/.cm/GUID/errors.sml new file mode 100644 index 0000000..309be5d --- /dev/null +++ b/smlnj-lib/JSON/.cm/GUID/errors.sml @@ -0,0 +1 @@ +guid-$/(json-lib.cm):errors.sml-1714016091.869 diff --git a/smlnj-lib/JSON/.cm/GUID/json-buffer-printer.sml b/smlnj-lib/JSON/.cm/GUID/json-buffer-printer.sml new file mode 100644 index 0000000..74a8649 --- /dev/null +++ b/smlnj-lib/JSON/.cm/GUID/json-buffer-printer.sml @@ -0,0 +1 @@ +guid-$/(json-lib.cm):json-buffer-printer.sml-1714016091.654 diff --git a/smlnj-lib/JSON/.cm/GUID/json-decode.sml b/smlnj-lib/JSON/.cm/GUID/json-decode.sml new file mode 100644 index 0000000..9621426 --- /dev/null +++ b/smlnj-lib/JSON/.cm/GUID/json-decode.sml @@ -0,0 +1 @@ +guid-$/(json-lib.cm):json-decode.sml-1714016092.056 diff --git a/smlnj-lib/JSON/.cm/GUID/json-parser.sml b/smlnj-lib/JSON/.cm/GUID/json-parser.sml new file mode 100644 index 0000000..b57d1f9 --- /dev/null +++ b/smlnj-lib/JSON/.cm/GUID/json-parser.sml @@ -0,0 +1 @@ +guid-$/(json-lib.cm):json-parser.sml-1714016091.933 diff --git a/smlnj-lib/JSON/.cm/GUID/json-printer.sml b/smlnj-lib/JSON/.cm/GUID/json-printer.sml new file mode 100644 index 0000000..421e6e6 --- /dev/null +++ b/smlnj-lib/JSON/.cm/GUID/json-printer.sml @@ -0,0 +1 @@ +guid-$/(json-lib.cm):json-printer.sml-1714016092.152 diff --git a/smlnj-lib/JSON/.cm/GUID/json-source.sml b/smlnj-lib/JSON/.cm/GUID/json-source.sml new file mode 100644 index 0000000..bea00c7 --- /dev/null +++ b/smlnj-lib/JSON/.cm/GUID/json-source.sml @@ -0,0 +1 @@ +guid-$/(json-lib.cm):json-source.sml-1714016091.660 diff --git a/smlnj-lib/JSON/.cm/GUID/json-stream-output.fun b/smlnj-lib/JSON/.cm/GUID/json-stream-output.fun new file mode 100644 index 0000000..29f59cf --- /dev/null +++ b/smlnj-lib/JSON/.cm/GUID/json-stream-output.fun @@ -0,0 +1 @@ +guid-$/(json-lib.cm):json-stream-output.fun-1714016091.592 diff --git a/smlnj-lib/JSON/.cm/GUID/json-stream-output.sig b/smlnj-lib/JSON/.cm/GUID/json-stream-output.sig new file mode 100644 index 0000000..4b2704b --- /dev/null +++ b/smlnj-lib/JSON/.cm/GUID/json-stream-output.sig @@ -0,0 +1 @@ +guid-$/(json-lib.cm):json-stream-output.sig-1714016091.588 diff --git a/smlnj-lib/JSON/.cm/GUID/json-stream-parser.sml b/smlnj-lib/JSON/.cm/GUID/json-stream-parser.sml new file mode 100644 index 0000000..f12b6bd --- /dev/null +++ b/smlnj-lib/JSON/.cm/GUID/json-stream-parser.sml @@ -0,0 +1 @@ +guid-$/(json-lib.cm):json-stream-parser.sml-1714016091.672 diff --git a/smlnj-lib/JSON/.cm/GUID/json-stream-printer.sml b/smlnj-lib/JSON/.cm/GUID/json-stream-printer.sml new file mode 100644 index 0000000..b81bfd3 --- /dev/null +++ b/smlnj-lib/JSON/.cm/GUID/json-stream-printer.sml @@ -0,0 +1 @@ +guid-$/(json-lib.cm):json-stream-printer.sml-1714016091.862 diff --git a/smlnj-lib/JSON/.cm/GUID/json-util.sml b/smlnj-lib/JSON/.cm/GUID/json-util.sml new file mode 100644 index 0000000..d78eb80 --- /dev/null +++ b/smlnj-lib/JSON/.cm/GUID/json-util.sml @@ -0,0 +1 @@ +guid-$/(json-lib.cm):json-util.sml-1714016091.881 diff --git a/smlnj-lib/JSON/.cm/GUID/json.sml b/smlnj-lib/JSON/.cm/GUID/json.sml new file mode 100644 index 0000000..5e0c8fe --- /dev/null +++ b/smlnj-lib/JSON/.cm/GUID/json.sml @@ -0,0 +1 @@ +guid-$/(json-lib.cm):json.sml-1714016091.584 diff --git a/smlnj-lib/JSON/.cm/SKEL/errors.sml b/smlnj-lib/JSON/.cm/SKEL/errors.sml new file mode 100644 index 0000000..6ddb138 --- /dev/null +++ b/smlnj-lib/JSON/.cm/SKEL/errors.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f4d"JSON"d"Int"d"General"d"String"ad"Errors"h0 \ No newline at end of file diff --git a/smlnj-lib/JSON/.cm/SKEL/json-buffer-printer.sml b/smlnj-lib/JSON/.cm/SKEL/json-buffer-printer.sml new file mode 100644 index 0000000..d200957 --- /dev/null +++ b/smlnj-lib/JSON/.cm/SKEL/json-buffer-printer.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"CharBuffer"ad"JSONBufferPrinter"jjh0gp1e"JSONStreamOutputFn"gp1c"JSON_STREAM_OUTPUT" \ No newline at end of file diff --git a/smlnj-lib/JSON/.cm/SKEL/json-decode.sml b/smlnj-lib/JSON/.cm/SKEL/json-decode.sml new file mode 100644 index 0000000..2962535 --- /dev/null +++ b/smlnj-lib/JSON/.cm/SKEL/json-decode.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f4d"Real64"JSON"JSONUtil"d"IntInf"ad"JSONDecode"jh3aU"gp1egp1d"Errors"f7d"Fn":C d"List"d"Int"d"JSONParser"d"Real"Nh0 \ No newline at end of file diff --git a/smlnj-lib/JSON/.cm/SKEL/json-parser.sml b/smlnj-lib/JSON/.cm/SKEL/json-parser.sml new file mode 100644 index 0000000..0320f31 --- /dev/null +++ b/smlnj-lib/JSON/.cm/SKEL/json-parser.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f9d"Unsafe"d"Char"d"IEEEReal"d"JSON"Cd"List"d"Real"d"TextIO"d"JSONSource"d"IntInf"Nad"JSONParser"jh1ad"W"gp1d"Word"h0 \ No newline at end of file diff --git a/smlnj-lib/JSON/.cm/SKEL/json-printer.sml b/smlnj-lib/JSON/.cm/SKEL/json-printer.sml new file mode 100644 index 0000000..b28fc82 --- /dev/null +++ b/smlnj-lib/JSON/.cm/SKEL/json-printer.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f2JSON"d"TextIO"ad"JSONPrinter"jh2ad"J"gp1ad"JSP"gp1d"JSONStreamPrinter"h0 \ No newline at end of file diff --git a/smlnj-lib/JSON/.cm/SKEL/json-source.sml b/smlnj-lib/JSON/.cm/SKEL/json-source.sml new file mode 100644 index 0000000..8fd3b8a --- /dev/null +++ b/smlnj-lib/JSON/.cm/SKEL/json-source.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f2d"Int"d"TextIO"ad"JSONSource"h0 \ No newline at end of file diff --git a/smlnj-lib/JSON/.cm/SKEL/json-stream-output.fun b/smlnj-lib/JSON/.cm/SKEL/json-stream-output.fun new file mode 100644 index 0000000..0ece5d2 --- /dev/null +++ b/smlnj-lib/JSON/.cm/SKEL/json-stream-output.fun @@ -0,0 +1,2 @@ +Skeleton 5 +d2aTEXT_OUTPUT_STREAM"h0ae"JSONStreamOutputFn"i2aOut"gp1f8d"StringCvt"d"UTF8"3Cd"CharVector"d"JSON"d"List"d"String"d"Real"Njh1ad"F"gp1d"Format"gp1c"JSON_STREAM_OUTPUT" \ No newline at end of file diff --git a/smlnj-lib/JSON/.cm/SKEL/json-stream-output.sig b/smlnj-lib/JSON/.cm/SKEL/json-stream-output.sig new file mode 100644 index 0000000..4f01cb4 --- /dev/null +++ b/smlnj-lib/JSON/.cm/SKEL/json-stream-output.sig @@ -0,0 +1,2 @@ +Skeleton 5 +d2f2d"JSON"d"IntInf"ac"JSON_STREAM_OUTPUT"h0 \ No newline at end of file diff --git a/smlnj-lib/JSON/.cm/SKEL/json-stream-parser.sml b/smlnj-lib/JSON/.cm/SKEL/json-stream-parser.sml new file mode 100644 index 0000000..353ab95 --- /dev/null +++ b/smlnj-lib/JSON/.cm/SKEL/json-stream-parser.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f8d"Unsafe"d"Char"d"IEEEReal"Cd"List"d"Real"d"TextIO"d"JSONSource"d"IntInf"Nad"JSONStreamParser"jh1ad"W"gp1d"Word"h0 \ No newline at end of file diff --git a/smlnj-lib/JSON/.cm/SKEL/json-stream-printer.sml b/smlnj-lib/JSON/.cm/SKEL/json-stream-printer.sml new file mode 100644 index 0000000..9ba3977 --- /dev/null +++ b/smlnj-lib/JSON/.cm/SKEL/json-stream-printer.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"TextIO"ad"JSONStreamPrinter"jjh0gp1e"JSONStreamOutputFn"gp1c"JSON_STREAM_OUTPUT" \ No newline at end of file diff --git a/smlnj-lib/JSON/.cm/SKEL/json-util.sml b/smlnj-lib/JSON/.cm/SKEL/json-util.sml new file mode 100644 index 0000000..df05763 --- /dev/null +++ b/smlnj-lib/JSON/.cm/SKEL/json-util.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f4JSON"Int"Real"d"IntInf"ad"JSONUtil"jh3aJ"gp1egp1d"Errors"f61Cd"List" d"Vector"Nh0 \ No newline at end of file diff --git a/smlnj-lib/JSON/.cm/SKEL/json.sml b/smlnj-lib/JSON/.cm/SKEL/json.sml new file mode 100644 index 0000000..3db9093 --- /dev/null +++ b/smlnj-lib/JSON/.cm/SKEL/json.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"IntInf"ad"JSON"h0 \ No newline at end of file diff --git a/smlnj-lib/JSON/.cm/amd64-unix/errors.sml b/smlnj-lib/JSON/.cm/amd64-unix/errors.sml new file mode 100644 index 0000000..48c1b67 Binary files /dev/null and b/smlnj-lib/JSON/.cm/amd64-unix/errors.sml differ diff --git a/smlnj-lib/JSON/.cm/amd64-unix/json-buffer-printer.sml b/smlnj-lib/JSON/.cm/amd64-unix/json-buffer-printer.sml new file mode 100644 index 0000000..fba580a Binary files /dev/null and b/smlnj-lib/JSON/.cm/amd64-unix/json-buffer-printer.sml differ diff --git a/smlnj-lib/JSON/.cm/amd64-unix/json-decode.sml b/smlnj-lib/JSON/.cm/amd64-unix/json-decode.sml new file mode 100644 index 0000000..8738e34 Binary files /dev/null and b/smlnj-lib/JSON/.cm/amd64-unix/json-decode.sml differ diff --git a/smlnj-lib/JSON/.cm/amd64-unix/json-parser.sml b/smlnj-lib/JSON/.cm/amd64-unix/json-parser.sml new file mode 100644 index 0000000..462df31 Binary files /dev/null and b/smlnj-lib/JSON/.cm/amd64-unix/json-parser.sml differ diff --git a/smlnj-lib/JSON/.cm/amd64-unix/json-printer.sml b/smlnj-lib/JSON/.cm/amd64-unix/json-printer.sml new file mode 100644 index 0000000..ace5c54 Binary files /dev/null and b/smlnj-lib/JSON/.cm/amd64-unix/json-printer.sml differ diff --git a/smlnj-lib/JSON/.cm/amd64-unix/json-source.sml b/smlnj-lib/JSON/.cm/amd64-unix/json-source.sml new file mode 100644 index 0000000..030edc2 Binary files /dev/null and b/smlnj-lib/JSON/.cm/amd64-unix/json-source.sml differ diff --git a/smlnj-lib/JSON/.cm/amd64-unix/json-stream-output.fun b/smlnj-lib/JSON/.cm/amd64-unix/json-stream-output.fun new file mode 100644 index 0000000..b82eeb1 Binary files /dev/null and b/smlnj-lib/JSON/.cm/amd64-unix/json-stream-output.fun differ diff --git a/smlnj-lib/JSON/.cm/amd64-unix/json-stream-output.sig b/smlnj-lib/JSON/.cm/amd64-unix/json-stream-output.sig new file mode 100644 index 0000000..a24b0b3 Binary files /dev/null and b/smlnj-lib/JSON/.cm/amd64-unix/json-stream-output.sig differ diff --git a/smlnj-lib/JSON/.cm/amd64-unix/json-stream-parser.sml b/smlnj-lib/JSON/.cm/amd64-unix/json-stream-parser.sml new file mode 100644 index 0000000..cc0f02f Binary files /dev/null and b/smlnj-lib/JSON/.cm/amd64-unix/json-stream-parser.sml differ diff --git a/smlnj-lib/JSON/.cm/amd64-unix/json-stream-printer.sml b/smlnj-lib/JSON/.cm/amd64-unix/json-stream-printer.sml new file mode 100644 index 0000000..20bf252 Binary files /dev/null and b/smlnj-lib/JSON/.cm/amd64-unix/json-stream-printer.sml differ diff --git a/smlnj-lib/JSON/.cm/amd64-unix/json-util.sml b/smlnj-lib/JSON/.cm/amd64-unix/json-util.sml new file mode 100644 index 0000000..c43ddcc Binary files /dev/null and b/smlnj-lib/JSON/.cm/amd64-unix/json-util.sml differ diff --git a/smlnj-lib/JSON/.cm/amd64-unix/json.sml b/smlnj-lib/JSON/.cm/amd64-unix/json.sml new file mode 100644 index 0000000..f166c0e Binary files /dev/null and b/smlnj-lib/JSON/.cm/amd64-unix/json.sml differ diff --git a/smlnj-lib/JSON/README b/smlnj-lib/JSON/README new file mode 100644 index 0000000..d0f085e --- /dev/null +++ b/smlnj-lib/JSON/README @@ -0,0 +1,14 @@ +This library supports the reading and writing of structured data using +the "JavaScript Object Notation" (JSON). This format is specified by +RFC 426 (http://tools.ietf.org/html/rfc4627). + +There are two levels of I/O supported. The "stream" level supports +event-based parsing (e.g., like a SAX parser for XML) and output at +the same level. Use this mode to extract small amounts of information +from large files or when you want to directly build your own representation +of the file. The "file" level supports a "DOM-style" approach that reads/writes +trees (see json.sml for the representation). + +TODO: + add support for UBJSON (http://ubjson.org) or possibly one of the + other binary JSON representations diff --git a/smlnj-lib/JSON/errors.sml b/smlnj-lib/JSON/errors.sml new file mode 100644 index 0000000..1f1589d --- /dev/null +++ b/smlnj-lib/JSON/errors.sml @@ -0,0 +1,75 @@ +(* errors.sml + * + * COPYRIGHT (c) 2024 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * The error exceptions used in JSONDecode and JSONUtil. + *) + +structure Errors = + struct + + (* exceptions used as errors *) + exception Failure of string * JSON.value + exception NotNull of JSON.value + exception NotBool of JSON.value + exception NotInt of JSON.value + exception NotNumber of JSON.value + exception NotString of JSON.value + exception NotObject of JSON.value + exception FieldNotFound of JSON.value * string + exception NotArray of JSON.value + exception ArrayBounds of JSON.value * int + exception ElemNotFound of JSON.value + + (* map the above exceptions to a message string; we use `General.exnMessage` + * for other exceptions. + *) + fun exnMessage exn = let + fun v2s (JSON.ARRAY _) = "array" + | v2s (JSON.BOOL false) = "'false'" + | v2s (JSON.BOOL true) = "'true'" + | v2s (JSON.FLOAT _) = "number" + | v2s (JSON.INT _) = "number" + | v2s JSON.NULL = "'null'" + | v2s (JSON.OBJECT _) = "object" + | v2s (JSON.STRING _) = "string" + in + case exn + of Failure(msg, v) => String.concat["Failure: ", msg] + | NotNull v => String.concat[ + "expected 'null', but found ", v2s v + ] + | NotBool v => String.concat[ + "expected boolean, but found ", v2s v + ] + | NotInt(JSON.FLOAT _) => "expected integer, but found floating-point number" + | NotInt v => String.concat[ + "expected integer, but found ", v2s v + ] + | NotNumber v => String.concat[ + "expected number, but found ", v2s v + ] + | NotString v => String.concat[ + "expected string, but found ", v2s v + ] + | NotObject v => String.concat[ + "expected object, but found ", v2s v + ] + | FieldNotFound(v, fld) => String.concat[ + "no definition for field \"", fld, "\" in object" + ] + | NotArray v => String.concat[ + "expected array, but found ", v2s v + ] + | ArrayBounds(_, i) => String.concat[ + "index ", Int.toString i, " out of bounds for array" + ] + | ElemNotFound v => String.concat[ + "no matching element found in ", v2s v + ] + | _ => General.exnMessage exn + (* end case *) + end + + end diff --git a/smlnj-lib/JSON/json-buffer-printer.sml b/smlnj-lib/JSON/json-buffer-printer.sml new file mode 100644 index 0000000..29c3ad6 --- /dev/null +++ b/smlnj-lib/JSON/json-buffer-printer.sml @@ -0,0 +1,17 @@ +(* json-buffer-printer.sml + * + * COPYRIGHT (c) 2021 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Print JSON to a `CharBuffer.buf` + *) + +structure JSONBufferPrinter + : JSON_STREAM_OUTPUT where type outstream = CharBuffer.buf + = JSONStreamOutputFn ( + struct + type outstream = CharBuffer.buf + val output1 = CharBuffer.add1 + val output = CharBuffer.addVec + val outputSlice = CharBuffer.addSlice + end) diff --git a/smlnj-lib/JSON/json-decode.sml b/smlnj-lib/JSON/json-decode.sml new file mode 100644 index 0000000..aafa74a --- /dev/null +++ b/smlnj-lib/JSON/json-decode.sml @@ -0,0 +1,273 @@ +(* json-decode.sml + * + * COPYRIGHT (c) 2023 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +structure JSONDecode :> sig + + (* exceptions used as errors; note that most of these come from the + * JSONUtil module. + *) + exception Failure of string * JSON.value + exception NotNull of JSON.value + exception NotBool of JSON.value + exception NotInt of JSON.value + exception NotNumber of JSON.value + exception NotString of JSON.value + exception NotObject of JSON.value + exception FieldNotFound of JSON.value * string + exception NotArray of JSON.value + exception ArrayBounds of JSON.value * int + exception ElemNotFound of JSON.value + + val exnMessage : exn -> string + + type 'a decoder + + val decode : 'a decoder -> JSON.value -> 'a + val decodeString : 'a decoder -> string -> 'a + val decodeFile : 'a decoder -> string -> 'a + + val bool : bool decoder + val int : int decoder + val intInf : IntInf.int decoder + val number : Real64.real decoder + val string : string decoder + + val null : 'a -> 'a decoder + + (* returns the raw JSON value without further decoding *) + val raw : JSON.value decoder + + (* returns a decoder that maps the JSON `null` value to `NONE` and otherwise + * returns `SOME v`, where `v` is the result of decoding the value using + * the supplied decoder. + *) + val nullable : 'a decoder -> 'a option decoder + + (* returns a decoder that attempts to decode a value and returns `NONE` + * on failure (instead of an error result). + *) + val try : 'a decoder -> 'a option decoder + + (* sequence decoders using "continuation-passing" style; for example + * + * seq (field "x" number) + * (succeed (fn x => x*x)) + *) + val seq : 'a decoder -> ('a -> 'b) decoder -> 'b decoder + + (* `field key d` returns a decoder that decodes the specified object field + * using the decoder `d`. + *) + val field : string -> 'a decoder -> 'a decoder + + (* decode a required field *) + val reqField : string -> 'a decoder -> ('a -> 'b) decoder -> 'b decoder + + (* decode an optional field *) + val optField : string -> 'a decoder -> ('a option -> 'b) decoder -> 'b decoder + + (* decode an optional field that has a default value *) + val dfltField : string -> 'a decoder -> 'a -> ('a -> 'b) decoder -> 'b decoder + + (* decodes a JSON ARRAY into a list of values *) + val array : 'a decoder -> 'a list decoder + + (* `sub i d` returns a decoder that when given a JSON array, decodes the i'th + * array element. + *) + val sub : int -> 'a decoder -> 'a decoder + + (* returns a decoder that decodes the value at the location specified by + * the path. + *) + val at : JSONUtil.path -> 'a decoder -> 'a decoder + + (* `succeed v` returns a decoder that always yields `v` for any JSON input *) + val succeed : 'a -> 'a decoder + + (* `fail msg` returns a decoder that raises `Failure(msg, jv)` for + * any JSON input `jv`. + *) + val fail : string -> 'a decoder + + val andThen : ('a -> 'b decoder) -> 'a decoder -> 'b decoder + + (* `orElse (d1, d2)` returns a decoder that first trys `d1` and returns its + * result if it succeeds. If `d1` fails, then it returns the result of trying + * `d2`. + *) + val orElse : 'a decoder * 'a decoder -> 'a decoder + + (* `choose [d1, ..., dn]` is equivalent to + * `orElse(d1, orElse(d2, ..., orElse(dn, fail "no choice") ... ))` + *) + val choose : 'a decoder list -> 'a decoder + + val map : ('a -> 'b) -> 'a decoder -> 'b decoder + val map2 : ('a * 'b -> 'res) + -> ('a decoder * 'b decoder) + -> 'res decoder + val map3 : ('a * 'b * 'c -> 'res) + -> ('a decoder * 'b decoder * 'c decoder) + -> 'res decoder + val map4 : ('a * 'b * 'c * 'd -> 'res) + -> ('a decoder * 'b decoder * 'c decoder * 'd decoder) + -> 'res decoder + + (* versions of the map combinators that just apply the identity to the tuple *) + val tuple2 : ('a decoder * 'b decoder) -> ('a * 'b) decoder + val tuple3 : ('a decoder * 'b decoder * 'c decoder) -> ('a * 'b * 'c) decoder + val tuple4 : ('a decoder * 'b decoder * 'c decoder * 'd decoder) + -> ('a * 'b * 'c * 'd) decoder + + (* a delay combinator for defining recursive decoders *) + val delay : (unit -> 'a decoder) -> 'a decoder + + end = struct + + structure U = JSONUtil + + (* import the error exceptions and exnMessage *) + open Errors + + datatype value = datatype JSON.value + + datatype 'a decoder = D of value -> 'a + + fun decode (D d) jv = d jv + + fun decodeString decoder s = + (decode decoder (JSONParser.parse(JSONParser.openString s))) + + fun decodeFile decoder fname = + (decode decoder (JSONParser.parseFile fname)) + + fun asBool (BOOL b) = b + | asBool v = raise NotBool v + val bool = D asBool + + fun asInt jv = (case jv + of INT n => Int.fromLarge n + | _ => raise NotInt jv + (* end case *)) + val int = D asInt + + fun asIntInf (INT n) = n + | asIntInf v = raise NotInt v + val intInf = D asIntInf + + fun asNumber (INT n) = Real.fromLargeInt n + | asNumber (FLOAT f) = f + | asNumber v = raise NotNumber v + val number = D asNumber + + fun asString (STRING s) = s + | asString v = raise NotString v + val string = D asString + + fun null dflt = D(fn NULL => dflt | jv => raise NotNull jv) + + val raw = D(fn jv => jv) + + fun nullable (D decoder) = let + fun decoder' NULL = NONE + | decoder' jv = SOME(decoder jv) + in + D decoder' + end + + fun array (D elemDecoder) = let + fun decodeList ([], elems) = List.rev elems + | decodeList (jv::jvs, elems) = decodeList(jvs, elemDecoder jv :: elems) + fun decoder (ARRAY elems) = decodeList (elems, []) + | decoder jv = raise NotArray jv + in + D decoder + end + + fun try (D d) = D(fn jv => (SOME(d jv) handle _ => NONE)) + + fun seq (D d1) (D d2) = D(fn jv => let + val v = d1 jv + val k = d2 jv + in + k v + end) + + fun field key valueDecoder = D(fn jv => (case jv + of OBJECT fields => (case List.find (fn (l, v) => (l = key)) fields + of SOME(_, v) => decode valueDecoder v + | _ => raise FieldNotFound( + jv, + concat["no definition for field \"", key, "\""]) + (* end case *)) + | _ => raise NotObject jv + (* end case *))) + + fun reqField key valueDecoder k = seq (field key valueDecoder) k + + fun optField key (D valueDecoder) (D objDecoder) = let + fun objDecoder' optFld jv = (objDecoder jv) optFld + fun decoder jv = (case U.findField jv key + of SOME NULL => objDecoder' NONE jv + | SOME jv' => objDecoder' (SOME(valueDecoder jv')) jv + | NONE => objDecoder' NONE jv + (* end case *)) + in + D decoder + end + + fun dfltField key (D valueDecoder) dfltVal (D objDecoder) = let + fun objDecoder' fld jv = (objDecoder jv) fld + fun decoder jv = (case U.findField jv key + of SOME NULL => objDecoder' dfltVal jv + | SOME jv' => objDecoder' (valueDecoder jv') jv + | NONE => objDecoder' dfltVal jv + (* end case *)) + in + D decoder + end + + fun sub i (D d) = D(fn jv => (case jv + of jv as ARRAY arr => let + fun get (0, item::_) = d item + | get (_, []) = raise ArrayBounds(jv, i) + | get (i, _::r) = get (i-1, r) + in + if (i < 0) then raise ArrayBounds(jv, i) else get (i, arr) + end + | _ => raise NotArray jv + (* end case *))) + + fun at path (D d) = D(fn jv => d (U.get(jv, path))) + + fun succeed x = D(fn _ => x) + + fun fail msg = D(fn jv => raise Failure(msg, jv)) + + fun andThen f (D d) = D(fn jv => decode (f (d jv)) jv) + + fun orElse (D d1, D d2) = D(fn jv => (d1 jv handle _ => d2 jv)) + + (* `choose [d1, ..., dn]` is equivalent to + * `orElse(d1, orElse(d2, ..., orElse(dn, fail "no choice") ... ))` + *) + fun choose [] = fail "no choice" + | choose (d::ds) = orElse(d, choose ds) + + fun map f (D decoder) = D(fn jv => f (decoder jv)) + + fun map2 f (D d1, D d2) = D(fn jv => f(d1 jv, d2 jv)) + fun map3 f (D d1, D d2, D d3) = D(fn jv => f(d1 jv, d2 jv, d3 jv)) + fun map4 f (D d1, D d2, D d3, D d4) = D(fn jv => f(d1 jv, d2 jv, d3 jv, d4 jv)) + + fun tuple2 (d1, d2) = map2 Fn.id (d1, d2) + fun tuple3 (d1, d2, d3) = map3 Fn.id (d1, d2, d3) + fun tuple4 (d1, d2, d3, d4) = map4 Fn.id (d1, d2, d3, d4) + + fun delay dd = andThen dd (succeed ()) + + end diff --git a/smlnj-lib/JSON/json-lib.cm b/smlnj-lib/JSON/json-lib.cm new file mode 100644 index 0000000..f207d70 --- /dev/null +++ b/smlnj-lib/JSON/json-lib.cm @@ -0,0 +1,38 @@ +(* json-lib.cm + * + * COPYRIGHT (c) 2021 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +Library + +(* DOM-style API (tree based) *) + structure JSON + structure JSONParser + structure JSONPrinter + structure JSONDecode + structure JSONUtil + +(* SAX-style API (event based) *) + signature JSON_STREAM_OUTPUT + structure JSONStreamParser + structure JSONBufferPrinter + structure JSONStreamPrinter + +is + + $/basis.cm + $/smlnj-lib.cm + + errors.sml + json.sml + json-buffer-printer.sml + json-decode.sml + json-parser.sml + json-printer.sml + json-source.sml + json-stream-output.sig + json-stream-output.fun + json-stream-parser.sml + json-stream-printer.sml + json-util.sml diff --git a/smlnj-lib/JSON/json-lib.mlb b/smlnj-lib/JSON/json-lib.mlb new file mode 100644 index 0000000..afbd211 --- /dev/null +++ b/smlnj-lib/JSON/json-lib.mlb @@ -0,0 +1,57 @@ +(* json-lib.mlb + * + * COPYRIGHT (c) 2021 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * An MLB file for the JSON library, so that it can be used by MLton programs. + *) + +local + + $(SML_LIB)/basis/basis.mlb + $(SML_LIB)/smlnj-lib/Util/smlnj-lib.mlb + $(SML_LIB)/mllpt-lib/ml-lpt-lib.mlb + + ann + "nonexhaustiveMatch warn" "redundantMatch warn" + "sequenceNonUnit ignore" + "warnUnused false" "forceUsed" + in + + json-tokens.sml + json.lex.sml + json.sml + json-source.sml + json-stream-parser.sml + json-parser.sml + json-stream-output.sig + json-stream-output.fun +(* NOTE: MLton does not support the CharBuffer Basis Library extension + json-buffer-printer.sml +*) + json-stream-printer.sml + json-printer.sml + errors.sml + json-util.sml + json-decode.sml + + end + +in + +(* DOM-style API (tree based) *) + structure JSON + structure JSONParser + structure JSONPrinter + structure JSONDecode + structure JSONUtil + +(* SAX-style API (event based) *) + signature JSON_STREAM_OUTPUT + structure JSONStreamParser +(* NOTE: MLton does not support the CharBuffer Basis Library extension + structure JSONBufferPrinter +*) + structure JSONStreamPrinter + +end diff --git a/smlnj-lib/JSON/json-parser.sml b/smlnj-lib/JSON/json-parser.sml new file mode 100644 index 0000000..8392790 --- /dev/null +++ b/smlnj-lib/JSON/json-parser.sml @@ -0,0 +1,585 @@ +(* json-parser.sml + * + * COPYRIGHT (c) 2008 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +structure JSONParser :> sig + + (* abstract type of JSON input *) + type source = JSONSource.source + + (* open a text input stream as a source *) + val openStream : TextIO.instream -> source + + (* open a text file as a source *) + val openFile : string -> source + + (* open a string as a source *) + val openString : string -> source + + (* close a source *) + val close : source -> unit + + val parse : source -> JSON.value + + val parseFile : string -> JSON.value + + end = struct + + structure W = Word + + datatype error_code = datatype JSONSource.error_code + + type source = JSONSource.source + + val openStream = JSONSource.openStream + val openFile = JSONSource.openFile + val openString = JSONSource.openString + val close = JSONSource.close + + (* fast (no overflow checking) increment/decrement operations *) + fun inc n = W.toIntX(W.fromInt n + 0w1) + fun dec n = W.toIntX(W.fromInt n - 0w1) + + (* local copy of list reverse that the compiler can inline *) + fun reverse xs = let + fun rev' ([], ys) = ys + | rev' (x::xs, ys) = rev' (xs, x::ys) + in + rev' (xs, []) + end + + (* make a string from a list of characters in reverse order; the first argument + * is the number of characters, which must be equal or greater than the length + * of the input list + *) + fun mkString (_, []) = "" + | mkString (n, cs) = let + val s = Unsafe.CharVector.create n + fun init (_, []) = s + | init (i, c::cs) = ( + Unsafe.CharVector.update(s, i, c); + init (dec i, cs)) + in + init (dec n, cs) + end + + fun next (src as (strm, nLines)) = (case TextIO.StreamIO.input1 strm + of SOME(#"\n", strm') => (#"\n", (strm', nLines+1)) + | SOME(c, strm') => (c, (strm', nLines)) + | NONE => (#"\000", src) + (* end case *)) + + (* skip white space *) + fun skipWS (src as (strm, nLines)) = (case TextIO.StreamIO.input1 strm + of SOME(#" ", strm') => skipWS (strm', nLines) + | SOME(#"\t", strm') => skipWS (strm', nLines) + | SOME(#"\r", strm') => skipWS (strm', nLines) + | SOME(#"\n", strm') => skipWS (strm', nLines+1) + | SOME(c, strm') => (c, (strm', nLines)) + | NONE => (#"\000", src) + (* end case *)) + + fun parse source = let + fun error' (src, ec) = let + val msg = JSONSource.errorMsg (src, ec) + in + raise Fail msg + end + fun matchC (src, c) = let + val (c', src') = next src + in + if (c = c') + then src' + else error' (src, InvalidLiteral) + end + (* parse a JSON value *) + fun parseValue src = (case skipWS src + of (#"[", src) => parseArray src + | (#"{", src) => parseObject src + | (#"-", src) => (case next src + of (#"0", src') => scanNumber(src', true, 0) + | (#"1", src') => scanNumber(src', true, 1) + | (#"2", src') => scanNumber(src', true, 2) + | (#"3", src') => scanNumber(src', true, 3) + | (#"4", src') => scanNumber(src', true, 4) + | (#"5", src') => scanNumber(src', true, 5) + | (#"6", src') => scanNumber(src', true, 6) + | (#"7", src') => scanNumber(src', true, 7) + | (#"8", src') => scanNumber(src', true, 8) + | (#"9", src') => scanNumber(src', true, 9) + | _ => error' (src, InvalidNumber) + (* end case *)) + | (#"0", src') => scanNumber(src', false, 0) + | (#"1", src') => scanNumber(src', false, 1) + | (#"2", src') => scanNumber(src', false, 2) + | (#"3", src') => scanNumber(src', false, 3) + | (#"4", src') => scanNumber(src', false, 4) + | (#"5", src') => scanNumber(src', false, 5) + | (#"6", src') => scanNumber(src', false, 6) + | (#"7", src') => scanNumber(src', false, 7) + | (#"8", src') => scanNumber(src', false, 8) + | (#"9", src') => scanNumber(src', false, 9) + | (#"\"", src) => scanStringValue src + | (#"f", src) => let (* match "a" "l" "s" "e" *) + val src = matchC (src, #"a") + val src = matchC (src, #"l") + val src = matchC (src, #"s") + val src = matchC (src, #"e") + in + (JSON.BOOL false, src) + end + | (#"n", src) => let (* match "u" "l" "l" *) + val src = matchC (src, #"u") + val src = matchC (src, #"l") + val src = matchC (src, #"l") + in + (JSON.NULL, src) + end + | (#"t", src) => let (* match "r" "u" "e" *) + val src = matchC (src, #"r") + val src = matchC (src, #"u") + val src = matchC (src, #"e") + in + (JSON.BOOL true, src) + end +(* + | (#"/", src) => if comments + then parseValue (skipComment src) + else error' (src, CommentsNotAllowed) +*) + (* currently, comments are always allowed *) + | (#"/", src) => parseValue (skipComment src) + | _ => error' (src, InvalidCharacter) + (* end case *)) + (* parse a JSON array assuming that the '[' has been consumed *) + and parseArray src = let + (* loop to scan one or more items *) + fun lp (src, items) = let + val (item, src) = parseValue src + val items = item::items + in + case skipWS src + of (#",", src) => lp (src, items) + | (#"]", src) => (JSON.ARRAY(reverse items), src) + | _ => error' (src, InvalidArray) + (* end case *) + end + in + case skipWS src + of (#"]", src) => (JSON.ARRAY[], src) + | _ => lp (src, []) + (* end case *) + end + (* parse a JSON object assuming that the '[' has been consumed *) + and parseObject src = let + (* loop to scan one or more key-value pairs *) + fun lp (src, items) = (case skipWS src + of (#"\"", src) => let + val (key, src) = scanString src + in + case skipWS src + of (#":", src) => let + val (v, src) = parseValue src + val items = (key, v)::items + in + case skipWS src + of (#",", src) => lp (src, items) + | (#"}", src) => (JSON.OBJECT(reverse items), src) + | _ => error' (src, InvalidObject) + (* end case *) + end + | _ => error' (src, ExpectedColon) + (* end case *) + end + | _ => error' (src, ExpectedKey) + (* end case *)) + in + case skipWS src + of (#"}", src) => (JSON.OBJECT[], src) + | _ => lp (src, []) + (* end case *) + end + (* scan a string value assuming that the first quote has been consumed *) + and scanString start = let + fun c2w c = W.fromInt(ord c) + fun w2c w = Char.chr(W.toInt w) + fun scan (src, n, cs) = (case next src + of (#"\000", _) => error' (start, UnclosedString) + | (#"\"", src) => (mkString(n, cs), src) + | (#"\\", src) => scanEscape (src, n, cs) + | (c, src) => if (#" " <= c) andalso (c < #"\127") + (* printable ASCII character *) + then scan (src, inc n, c::cs) + (* either non-printable ASCII or UTF-8 byte sequence *) + else scanUTF8 (src, c, c2w c, n, cs) + (* end case *)) + and scanEscape (src, n, cs) = let + fun return (src, c) = scan (src, inc n, c::cs) + in + case next src + of (#"\"", src) => return (src, #"\"") + | (#"\\", src) => return (src, #"\\") + | (#"/", src) => return (src, #"/") + | (#"b", src) => return (src, #"\008") (* backspace *) + | (#"f", src) => return (src, #"\012") (* form feed *) + | (#"n", src) => return (src, #"\010") (* line feed *) + | (#"r", src) => return (src, #"\013") (* carriage return *) + | (#"t", src) => return (src, #"\009") (* tab *) + | (#"u", src) => scanUnicodeEscape (src, n, cs) + | _ => error' (src, InvalidEscape) + (* end case *) + end + (* scan a Unicode escape sequence; we have already consumed the "\u" + * prefix, so we just need to parse the four hex digits followed by + * a possible second escape sequence for a surrogate pair. The result + * is encoded as a UTF-8 byte sequence. + *) + and scanUnicodeEscape (src, n, cs) = let + fun getDigit src = (case next src + of (#"0", src) => (0w0, src) + | (#"1", src) => (0w1, src) + | (#"2", src) => (0w2, src) + | (#"3", src) => (0w3, src) + | (#"4", src) => (0w4, src) + | (#"5", src) => (0w5, src) + | (#"6", src) => (0w6, src) + | (#"7", src) => (0w7, src) + | (#"8", src) => (0w8, src) + | (#"9", src) => (0w9, src) + | (#"a", src) => (0w10, src) + | (#"A", src) => (0w10, src) + | (#"b", src) => (0w11, src) + | (#"B", src) => (0w11, src) + | (#"c", src) => (0w12, src) + | (#"C", src) => (0w12, src) + | (#"d", src) => (0w13, src) + | (#"D", src) => (0w13, src) + | (#"e", src) => (0w14, src) + | (#"E", src) => (0w14, src) + | (#"f", src) => (0w15, src) + | (#"F", src) => (0w15, src) + | _ => error' (src, InvalidUnicodeEscape) + (* end case *)) + fun getDigits src = let + (* get four digits *) + val (d0, src) = getDigit src + val (d1, src) = getDigit src + val (d2, src) = getDigit src + val (d3, src) = getDigit src + val n = W.<<(d0, 0w24) + + W.<<(d1, 0w16) + + W.<<(d2, 0w8) + + d3 + in + (n, src) + end + val (u0, src) = getDigits src + (* get the second 16-bit code point of a surrogate pair *) + fun scanLowSurrogate src = ( + (* match "\uxxxx" *) + case next src + of (#"\\", src) => (case next src + of (#"u", src) => let + val (u1, src) = getDigits src + in + if (u1 < 0wxDC00) orelse (0wxDFFF < u1) + then error' (src, InvalidUnicodeSurrogatePair) + (* convert pair to a Unicode code point + * and then to UTF-8 bytes. + *) + else toUTF8 (src, + 0wx10000 + + W.<<(u0 - 0wxD800, 0w10) + + (u1 - 0wxDC00)) + end + | _ => error' (src, InvalidUnicodeSurrogatePair) + (* end case *)) + | _ => error' (src, InvalidUnicodeSurrogatePair) + (* end case *)) + (* convert a word to a UTF-8 sequence *) + and toUTF8 (src, w) = if (w <= 0wx7f) + then scan (src, inc n, w2c w :: cs) + else if (w <= 0wx7ff) + then scan (src, + n+2, + w2c(W.orb(0wxc0, W.>>(w, 0w6))) + :: w2c(W.orb(0wx80, W.andb(w, 0wx3f))) + :: cs) + else if (w <= 0wxffff) + then scan (src, + n+3, + w2c(W.orb(0wxe0, W.>>(w, 0w12))) + :: w2c(W.orb(0wx80, W.andb(W.>>(w, 0w6), 0wx3f))) + :: w2c(W.orb(0wx80, W.andb(w, 0wx3f))) + :: cs) + else if (w <= 0wx10ffff) + then scan (src, + n+4, + w2c(W.orb(0wxf0, W.>>(w, 0w18))) + :: w2c(W.orb(0wx80, W.andb(W.>>(w, 0w12), 0wx3f))) + :: w2c(W.orb(0wx80, W.andb(W.>>(w, 0w6), 0wx3f))) + :: w2c(W.orb(0wx80, W.andb(w, 0wx3f))) + :: cs) + else error' (src, InvalidUnicodeEscape) + in + if (u0 < 0wxD800) + then toUTF8 (src, u0) + else if (u0 <= 0wxDBFF) + then scanLowSurrogate src + else error' (src, InvalidUnicodeEscape) + end (* scanUnicodeEscape *) + (* a simple state machine for scanning a valid UTF-8 byte sequence. See + * https://unicode.org/mail-arch/unicode-ml/y2003-m02/att-0467/01-The_Algorithm_to_Valide_an_UTF-8_String + * for a description of the state machine. + *) + and scanUTF8 (src, chr0, byte0, n, cs) = let + fun getByte src = (case next src + of (#"\000", _) => error' (src, IncompleteUTF8) + | (c, src') => (c2w c, c, src') + (* end case *)) + fun inRange (minB : word, b, maxB) = ((b - minB) <= maxB - minB) + (* handles last byte for all multi-byte sequences *) + fun stateA (src, n, chrs) = let + val (b, c, src) = getByte src + in + if inRange(0wx80, b, 0wxbf) + then scan (src, inc n, c::chrs) + else error' (src, InvalidUTF8) + end + (* handles second/third byte for three/four-byte sequences *) + and stateB (src, n, chrs) = let + val (b, c, src) = getByte src + in + if inRange(0wx80, b, 0wxbf) + then stateA (src, inc n, c::chrs) + else error' (src, InvalidUTF8) + end + (* byte0 = 0b1110_0000 (3-byte sequence) *) + and stateC (src, n, chrs) = let + val (b, c, src) = getByte src + in + if inRange(0wxa0, b, 0wxbf) + then stateA (src, inc n, c::chrs) + else error' (src, InvalidUTF8) + end + (* byte0 = 0b1110_1101 (3-byte sequence) *) + and stateD (src, n, chrs) = let + val (b, c, src) = getByte src + in + if inRange(0wx80, b, 0wx9f) + then stateA (src, inc n, c::chrs) + else error' (src, InvalidUTF8) + end + (* byte0 = 0b1111_0001 .. 0b1111_0011 (4-byte sequence) *) + and stateE (src, n, chrs) = let + val (b, c, src) = getByte src + in + if inRange(0wx80, b, 0wxbf) + then stateB (src, inc n, c::chrs) + else error' (src, InvalidUTF8) + end + (* byte0 = 0b1111_0000 (4-byte sequence) *) + and stateF (src, n, chrs) = let + val (b, c, src) = getByte src + in + if inRange(0wx90, b, 0wxbf) + then stateB (src, inc n, c::chrs) + else error' (src, InvalidUTF8) + end + (* byte0 = 0b1111_1000 (4-byte sequence) *) + and stateG (src, n, chrs) = let + val (b, c, src) = getByte src + in + if inRange(0wx80, b, 0wx8f) + then stateB (src, inc n, c::chrs) + else error' (src, InvalidUTF8) + end + in + if (byte0 <= 0wx7f) + (* this case only occurs for non-printing ASCII characters *) + then error' (src, NonPrintingASCII) + else if inRange(0wxc2, byte0, 0wxdf) + then stateA (src, n, cs) + else if inRange(0wxe1, byte0, 0wxec) + orelse inRange(0wxee, byte0, 0wxef) + then stateB (src, n, cs) + else if (byte0 = 0wxe0) + then stateC (src, n, cs) + else if (byte0 = 0wxed) + then stateD (src, n, cs) + else if inRange(0wxf1, byte0, 0wxf3) + then stateE (src, n, cs) + else if (byte0 = 0wxf0) + then stateF (src, n, cs) + else if (byte0 = 0wxf4) + then stateG (src, n, cs) + else error' (src, InvalidUTF8) + end (* scanUTF8 *) + in + scan (start, 0, []) + end (* scanString *) + and scanStringValue src = let + val (s, src) = scanString src + in + (JSON.STRING s, src) + end + (* scan an integer or floating-point number. If the number of digits + * for an integer literal exceeds the `maxDigits` limit, then we signal + * a `NumberTooLarge` error. + *) + and scanNumber (src, isNeg, firstDigit) = let + (* make a JSON `FLOAT` value from pieces. The lists of digits + * are in reverse order and are in the range [0..9]. + *) + fun mkFloat (sign, whole, frac, exp, src) = let + val f = valOf(Real.fromDecimal { + class = IEEEReal.NORMAL, + sign = sign, + digits = List.revAppend(whole, reverse frac), + exp = exp + List.length whole + }) handle Overflow => if sign then Real.negInf else Real.posInf + in + if Real.isFinite f + then (JSON.FLOAT f, src) + else error' (src, NumberTooLarge) + end + (* scan an integer or the whole part of a float *) + fun scanWhole (src, digits) = (case next src + of (#"0", src) => scanWhole (src, 0::digits) + | (#"1", src) => scanWhole (src, 1::digits) + | (#"2", src) => scanWhole (src, 2::digits) + | (#"3", src) => scanWhole (src, 3::digits) + | (#"4", src) => scanWhole (src, 4::digits) + | (#"5", src) => scanWhole (src, 5::digits) + | (#"6", src) => scanWhole (src, 6::digits) + | (#"7", src) => scanWhole (src, 7::digits) + | (#"8", src) => scanWhole (src, 8::digits) + | (#"9", src) => scanWhole (src, 9::digits) + | (#".", src) => scanFrac (src, digits) + | (#"e", src) => scanExp (src, digits, []) + | (#"E", src) => scanExp (src, digits, []) + | _ => let + fun cvt ([], _, n) = if isNeg + then (JSON.INT(~n), src) + else (JSON.INT n, src) + | cvt (d::ds, k, n) = + cvt (ds, inc k, 10*n + IntInf.fromInt d) + in + cvt (reverse digits, 0, 0) + end + (* end case *)) + (* scan the fractional part of a real; the '.' has already been + * consumed. + *) + and scanFrac (src, wDigits) = let + fun scanF (src, fDigits) = (case next src + of (#"0", src) => scanF (src, 0::fDigits) + | (#"1", src) => scanF (src, 1::fDigits) + | (#"2", src) => scanF (src, 2::fDigits) + | (#"3", src) => scanF (src, 3::fDigits) + | (#"4", src) => scanF (src, 4::fDigits) + | (#"5", src) => scanF (src, 5::fDigits) + | (#"6", src) => scanF (src, 6::fDigits) + | (#"7", src) => scanF (src, 7::fDigits) + | (#"8", src) => scanF (src, 8::fDigits) + | (#"9", src) => scanF (src, 9::fDigits) + | (#"e", src) => scanExp (src, wDigits, fDigits) + | (#"E", src) => scanExp (src, wDigits, fDigits) + | _ => mkFloat (isNeg, wDigits, fDigits, 0, src) + (* end case *)) + in + scanF (src, []) + end + (* scan the exponent part of a real; the "e"/"E" has already been + * consumed. + *) + and scanExp (src, whole, frac) = let + val (expSign, exp, seenDigit, src) = (case next src + of (#"-", src) => (~1, 0, false, src) + | (#"+", src) => (1, 0, false, src) + | (#"0", src) => (1, 0, true, src) + | (#"1", src) => (1, 1, true, src) + | (#"2", src) => (1, 2, true, src) + | (#"3", src) => (1, 3, true, src) + | (#"4", src) => (1, 4, true, src) + | (#"5", src) => (1, 5, true, src) + | (#"6", src) => (1, 6, true, src) + | (#"7", src) => (1, 7, true, src) + | (#"8", src) => (1, 8, true, src) + | (#"9", src) => (1, 9, true, src) + | _ => error' (src, InvalidNumber) + (* end case *)) + fun scanE (src, seenDigit, exp) = (case next src + of (#"0", src) => scanE (src, true, 10 * exp) + | (#"1", src) => scanE (src, true, 10 * exp + 1) + | (#"2", src) => scanE (src, true, 10 * exp + 2) + | (#"3", src) => scanE (src, true, 10 * exp + 3) + | (#"4", src) => scanE (src, true, 10 * exp + 4) + | (#"5", src) => scanE (src, true, 10 * exp + 5) + | (#"6", src) => scanE (src, true, 10 * exp + 6) + | (#"7", src) => scanE (src, true, 10 * exp + 7) + | (#"8", src) => scanE (src, true, 10 * exp + 8) + | (#"9", src) => scanE (src, true, 10 * exp + 9) + | _ => if seenDigit + then mkFloat (isNeg, whole, frac, expSign * exp, src) + else error' (src, InvalidNumber) + (* end case *)) + in + scanE (src, seenDigit, exp) + handle Overflow => error' (src, NumberTooLarge) + end + in + if (firstDigit = 0) + then (case next src + of (#".", src) => scanFrac(src, []) + | (#"e", src) => scanExp(src, [], []) + | (#"E", src) => scanExp(src, [], []) + | _ => (JSON.INT 0, src) + (* end case *)) + else scanWhole (src, [firstDigit]) + end (* scanNumber *) + (* skip over a C-style comment; the initial '/' has been consumed *) + and skipComment src = let + fun skip src = (case next src + of (#"*", src) => let + (* look for "/" (possibly preceded by stars) *) + fun lp src = (case next src + of (#"/", src) => src + | (#"*", src) => lp src + | (#"\000", src) => error' (src, UnclosedComment) + | (_, src) => skip src + (* end case *)) + in + lp src + end + | (#"\000", src) => error' (src, UnclosedComment) + | (_, src) => skip src + (* end case *)) + in + case next src + of (#"*", src) => skip src + | _ => error' (src, InvalidCharacter) + (* end case *) + end + val src = (case !source + of SOME src => src + | NONE => raise Fail "closed JSON source" + (* end case *)) + val (jv, src) = parseValue src + in + source := SOME src; + jv + end (* parse *) + + fun parseFile fileName = let + val inStrm = openFile fileName + val v = parse inStrm + handle ex => (close inStrm; raise ex) + in + close inStrm; + v + end + + end diff --git a/smlnj-lib/JSON/json-printer.sml b/smlnj-lib/JSON/json-printer.sml new file mode 100644 index 0000000..391fd9d --- /dev/null +++ b/smlnj-lib/JSON/json-printer.sml @@ -0,0 +1,25 @@ +(* json-printer.sml + * + * COPYRIGHT (c) 2008 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * A printer for JSON values. + *) + +structure JSONPrinter : sig + + val print : TextIO.outstream * JSON.value -> unit + val print' : {strm : TextIO.outstream, pretty : bool} -> JSON.value -> unit + + end = struct + + structure J = JSON + structure JSP = JSONStreamPrinter + + fun printWith printer v = (JSP.value(printer, v); JSP.close printer) + + fun print (strm, v) = printWith (JSP.new strm) v + + fun print' {strm, pretty} = printWith (JSP.new' {strm=strm, pretty=pretty}) + + end diff --git a/smlnj-lib/JSON/json-source.sml b/smlnj-lib/JSON/json-source.sml new file mode 100644 index 0000000..3c6b129 --- /dev/null +++ b/smlnj-lib/JSON/json-source.sml @@ -0,0 +1,78 @@ +(* json-source.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * JSON input sources. Note that this module is internal the the library. + *) + +structure JSONSource = + struct + + (* the state of a source is a functional input stream and a count + * of lines (starting at 1) + *) + type state = (TextIO.StreamIO.instream * int) + + type source = state option ref + + (* open a text input stream as a source *) + fun openStream inS = ref(SOME(TextIO.getInstream inS, 1)) + + (* open a text file as a source *) + fun openFile file = openStream (TextIO.openIn file) + + (* open a string as a source *) + fun openString s = openStream (TextIO.openString s) + + (* close a source *) + fun close (source as ref(SOME(inS, _))) = ( + TextIO.StreamIO.closeIn inS; + source := NONE) + | close _ = () + + (* syntax-error codes *) + datatype error_code + = InvalidCharacter + | InvalidLiteral + | NumberTooLarge + | InvalidNumber + | InvalidArray + | InvalidObject + | ExpectedKey + | ExpectedColon + | CommentsNotAllowed + | UnclosedComment + | UnclosedString + | InvalidEscape + | InvalidUTF8 + | IncompleteUTF8 + | InvalidUnicodeSurrogatePair + | InvalidUnicodeEscape + | NonPrintingASCII + + fun errorMsg (src : state, ec) = let + val msg = (case ec + of InvalidCharacter => "invalid character" + | InvalidLiteral => "invalid literal identifier" + | NumberTooLarge => "number exceeds maximum number of digits" + | InvalidNumber => "invalid number syntax" + | InvalidArray => "invalid array syntax; expected ',' or ']'" + | InvalidObject => "invalid object syntax; expected ',' or '}'" + | ExpectedKey => "invalid object syntax; expected key" + | ExpectedColon => "invalid object syntax; expected ':'" + | CommentsNotAllowed => "JSON comments not allowed" + | UnclosedComment => "unclosed comment" + | UnclosedString => "unclosed string" + | InvalidEscape => "invalid escape sequence" + | InvalidUTF8 => "invalid UTF-8" + | IncompleteUTF8 => "incomplete UTF-8" + | InvalidUnicodeSurrogatePair => "invalid Unicode surrogate pair" + | InvalidUnicodeEscape => "invalid Unicode escape sequence" + | NonPrintingASCII => "non-printing ASCII character" + (* end case *)) + in + concat("Error at line " :: Int.toString(#2 src) :: ": " :: [msg]) + end + + end diff --git a/smlnj-lib/JSON/json-stream-output.fun b/smlnj-lib/JSON/json-stream-output.fun new file mode 100644 index 0000000..6f3f54b --- /dev/null +++ b/smlnj-lib/JSON/json-stream-output.fun @@ -0,0 +1,221 @@ +(* json-stream-output.fun + * + * COPYRIGHT (c) 2021 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * A functor that implements serialization of JSON values using an + * abstract output stream. + *) + +(* TODO: move this signature to the utility library and add implementations + * on top of streams and buffers. + *) +signature TEXT_OUTPUT_STREAM = + sig + + type outstream + + val output1 : outstream * char -> unit + val output : outstream * string -> unit + val outputSlice : outstream * substring -> unit + + end + +functor JSONStreamOutputFn (Out : TEXT_OUTPUT_STREAM) : JSON_STREAM_OUTPUT + where type outstream = Out.outstream + = struct + + structure F = Format + + type outstream = Out.outstream + + datatype printer = P of { + strm : outstream, + indent : int ref, + ctx : context ref, + pretty : bool + } + + (* the context is used to keep track of the printing state for indentation + * and punctuation, etc. + *) + and context + = CLOSED (* closed printer *) + | TOP (* top-most context *) + | FIRST of context (* first element of object or array; the argument *) + (* must be one of OBJECT or ARRAY. *) + | OBJECT of context (* in an object (after the first element) *) + | ARRAY of context (* in an array (after the first element) *) + | KEY of context (* after the key of a object field *) + + fun new' {strm, pretty} = P{ + strm = strm, + indent = ref 0, + ctx = ref TOP, + pretty = pretty + } + + fun new strm = new' {strm = strm, pretty = false} + + fun close (P{ctx, strm, ...}) = (case !ctx + of CLOSED => () + | TOP => (Out.output(strm, "\n"); ctx := CLOSED) + | _ => raise Fail "premature close" + (* end case *)) + + fun pr (P{strm, ...}, s) = Out.output(strm, s) + + fun indent (P{pretty = false, ...}, _) = () + | indent (P{strm, indent, ...}, offset) = let + val tenSpaces = " " + fun prIndent n = if (n <= 10) + then Out.output(strm, String.extract(tenSpaces, 10-n, NONE)) + else (Out.output(strm, tenSpaces); prIndent(n-10)) + in + prIndent ((!indent+offset) * 2) + end + + fun incIndent (P{indent, ...}, n) = indent := !indent + n; + fun decIndent (P{indent, ...}, n) = indent := !indent - n; + + fun nl (P{pretty = false, ...}) = () + | nl (P{strm, ...}) = Out.output(strm, "\n") + + fun comma (P{strm, pretty = false, ...}) = Out.output(strm, ",") + | comma (p as P{strm, ...}) = ( + Out.output(strm, ",\n"); indent(p, 0)) + + fun optComma (p as P{ctx, pretty, ...}) = (case !ctx + of FIRST ctx' => (indent(p, 0); ctx := ctx') + | OBJECT _ => comma p + | ARRAY _ => comma p + | KEY ctx' => ( + pr (p, if pretty then " : " else ":"); + ctx := ctx') + | _ => () + (* end case *)) + + (* print a value, which may be proceeded by a comma if it is in a sequence *) + fun prVal (P{ctx = ref CLOSED, ...}, _) = raise Fail "closed printer" + | prVal (p, v) = (optComma p; pr(p, v)) + + fun null p = prVal (p, "null") + fun boolean (p, false) = prVal (p, "false") + | boolean (p, true) = prVal (p, "true") + fun int (p, n) = prVal (p, F.format "%d" [F.INT n]) + fun integer (p, n) = prVal (p, F.format "%d" [F.LINT n]) + fun float (p, f) = let + (* print with 17 digits of precision, which is sufficient for any + * double-precision IEEE float. We first convert to a string using + * SML syntax and then replace any "~" characters with "-". + *) + val s = Real.fmt (StringCvt.GEN(SOME 17)) f + in + if CharVector.exists (fn #"~" => true | _ => false) s + then prVal (p, String.map (fn #"~" => #"-" | c => c) s) + else prVal (p, s) + end + fun string (p, s) = let + fun getChar i = if (i < size s) then SOME(String.sub(s, i), i+1) else NONE + val getWChar = UTF8.getu getChar + fun tr (i, chrs) = (case getWChar i + of SOME(wchr, i) => if (wchr <= 0w126) + then let + val c = (case UTF8.toAscii wchr + of #"\"" => "\\\"" + | #"\\" => "\\\\" + | #"\b" => "\\b" + | #"\f" => "\\f" + | #"\n" => "\\n" + | #"\r" => "\\r" + | #"\t" => "\\t" + | c => if (wchr < 0w32) + then F.format "\\u%04x" [F.WORD wchr] + else str c + (* end case *)) + in + tr (i, c :: chrs) + end + else tr(i, F.format "\\u%04x" [F.WORD wchr] :: chrs) + | NONE => String.concat(List.rev chrs) + (* end case *)) + in + prVal (p, F.format "\"%s\"" [F.STR(tr (0, []))]) + end + + fun beginObject (p as P{ctx, ...}) = (case !ctx + of CLOSED => raise Fail "closed printer" + | _ => ( + optComma p; + pr (p, "{"); incIndent(p, 2); nl p; + ctx := FIRST(OBJECT(!ctx))) + (* end case *)) + + fun objectKey (p as P{ctx, ...}, field) = (case !ctx + of CLOSED => raise Fail "closed printer" + | KEY _ => raise Fail(concat[ + "objectKey \"", field, "\" where value was expected" + ]) + | _ => ( + string (p, field); + ctx := KEY(!ctx)) + (* end case *)) + + fun endObject (p as P{ctx, ...}) = let + fun prEnd ctx' = ( + ctx := ctx'; + indent(p, ~1); pr(p, "}"); decIndent (p, 2)) + in + case !ctx + of CLOSED => raise Fail "closed printer" + | OBJECT ctx' => (nl p; prEnd ctx') + | FIRST(OBJECT ctx') => prEnd ctx' + | KEY _ => raise Fail "expecting value after key" + | _ => raise Fail "endObject not in object context" + (* end case *) + end + + fun beginArray (p as P{ctx, ...}) = (case !ctx + of CLOSED => raise Fail "closed printer" + | _ => ( + optComma p; + pr (p, "["); incIndent(p, 2); nl p; + ctx := FIRST(ARRAY(!ctx))) + (* end case *)) + + fun endArray (p as P{ctx, ...}) = let + fun prEnd ctx' = ( + ctx := ctx'; + indent(p, ~1); pr(p, "]"); decIndent (p, 2)) + in + case !ctx + of CLOSED => raise Fail "closed printer" + | ARRAY ctx' => (nl p; prEnd ctx') + | FIRST(ARRAY ctx') => prEnd ctx' + | _ => raise Fail "endArray not in array context" + (* end case *) + end + + (* embed a JSON value into the output *) + fun value (printer, v) = let + fun pr (JSON.OBJECT fields) = let + fun prField (key, v) = (objectKey(printer, key); pr v) + in + beginObject printer; + List.app prField fields; + endObject printer + end + | pr (JSON.ARRAY vs) = ( + beginArray printer; + List.app pr vs; + endArray printer) + | pr JSON.NULL = null printer + | pr (JSON.BOOL b) = boolean (printer, b) + | pr (JSON.INT n) = integer (printer, n) + | pr (JSON.FLOAT f) = float (printer, f) + | pr (JSON.STRING s) = string (printer, s) + in + pr v + end + + end diff --git a/smlnj-lib/JSON/json-stream-output.sig b/smlnj-lib/JSON/json-stream-output.sig new file mode 100644 index 0000000..e97cefd --- /dev/null +++ b/smlnj-lib/JSON/json-stream-output.sig @@ -0,0 +1,59 @@ +(* json-stream-output.sig + * + * COPYRIGHT (c) 2021 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +signature JSON_STREAM_OUTPUT = + sig + + type outstream + + (* a `printer` packages up the output state needed to match + * braces and bracket, and indentation (when pretty printing). + *) + type printer + + (* print a `null` value *) + val null : printer -> unit + (* print a boolean value *) + val boolean : printer * bool -> unit + (* print an integer numeric value *) + val int : printer * int -> unit + (* print an integer numeric value *) + val integer : printer * IntInf.int -> unit + (* print a floating-point numeric value *) + val float : printer * real -> unit + (* print a UTF8 string value; any necessary escape sequences will be added *) + val string : printer * string -> unit + + (* begin a JSON object; this function also prints "{" *) + val beginObject : printer -> unit + (* print the key field for a JSON object *) + val objectKey : printer * string -> unit + (* end a JSON object; this function also prints "}" *) + val endObject : printer -> unit + (* begin a JSON object; this function also prints "{" *) + val beginArray : printer -> unit + (* end a JSON object; this function also prints "]" *) + val endArray : printer -> unit + + (* create a new printer; `new outS` is equivalent to the expression + * `new' {strm = outS, pretty=false}` + *) + val new : outstream -> printer + (* create a new printer with the pretty printing mode specified. If set to `true`, + * then newlines and indentation will be used to make the output human readable. + * Otherwise, the output will have no excess whitespace. + *) + val new' : {strm : outstream, pretty : bool} -> printer + (* close the printer; this function checks that there are no pending open object/ + * array values and raises `Fail` if there are. + * Note that it **does not** close the underlying output stream. + *) + val close : printer -> unit + + (* embed the given value into the output *) + val value : printer * JSON.value -> unit + + end diff --git a/smlnj-lib/JSON/json-stream-parser.sml b/smlnj-lib/JSON/json-stream-parser.sml new file mode 100644 index 0000000..ab70132 --- /dev/null +++ b/smlnj-lib/JSON/json-stream-parser.sml @@ -0,0 +1,620 @@ +(* json-stream-parser.sml + * + * COPYRIGHT (c) 2024 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +structure JSONStreamParser :> sig + + (* abstract type of JSON input *) + type source = JSONSource.source + + (* open a text input stream as a source *) + val openStream : TextIO.instream -> source + + (* open a text file as a source *) + val openFile : string -> source + + (* open a string as a source *) + val openString : string -> source + + (* close a source *) + val close : source -> unit + + (* callback functions for the different parsing events *) + type 'ctx callbacks = { + null : 'ctx -> 'ctx, + boolean : 'ctx * bool -> 'ctx, + integer : 'ctx * IntInf.int -> 'ctx, + float : 'ctx * real -> 'ctx, + string : 'ctx * string -> 'ctx, + startObject : 'ctx -> 'ctx, + objectKey : 'ctx * string -> 'ctx, + endObject : 'ctx -> 'ctx, + startArray : 'ctx -> 'ctx, + endArray : 'ctx -> 'ctx, + error : 'ctx * string -> unit + } + + val parse : 'ctx callbacks -> (source * 'ctx) -> 'ctx + + val parseFile : 'ctx callbacks -> (string * 'ctx) -> 'ctx + + end = struct + + structure W = Word + + datatype error_code = datatype JSONSource.error_code + + type source = JSONSource.source + + val openStream = JSONSource.openStream + val openFile = JSONSource.openFile + val openString = JSONSource.openString + val close = JSONSource.close + + (* callback functions for the different parsing events *) + type 'ctx callbacks = { + null : 'ctx -> 'ctx, + boolean : 'ctx * bool -> 'ctx, + integer : 'ctx * IntInf.int -> 'ctx, + float : 'ctx * real -> 'ctx, + string : 'ctx * string -> 'ctx, + startObject : 'ctx -> 'ctx, + objectKey : 'ctx * string -> 'ctx, + endObject : 'ctx -> 'ctx, + startArray : 'ctx -> 'ctx, + endArray : 'ctx -> 'ctx, + error : 'ctx * string -> unit + } + + fun error (cb : 'a callbacks, ctx, msg) = ( + #error cb (ctx, msg); + raise Fail "error") + + (* fast (no overflow checking) increment/decrement operations *) + fun inc n = W.toIntX(W.fromInt n + 0w1) + fun dec n = W.toIntX(W.fromInt n - 0w1) + + (* local copy of list reverse that the compiler can inline *) + fun reverse xs = let + fun rev' ([], ys) = ys + | rev' (x::xs, ys) = rev' (xs, x::ys) + in + rev' (xs, []) + end + + (* make a string from a list of characters in reverse order; the first argument + * is the number of characters, which must be equal or greater than the length + * of the input list + *) + fun mkString (_, []) = "" + | mkString (n, cs) = let + val s = Unsafe.CharVector.create n + fun init (_, []) = s + | init (i, c::cs) = ( + Unsafe.CharVector.update(s, i, c); + init (dec i, cs)) + in + init (dec n, cs) + end + + fun next (src as (strm, nLines)) = (case TextIO.StreamIO.input1 strm + of SOME(#"\n", strm') => (#"\n", (strm', nLines+1)) + | SOME(c, strm') => (c, (strm', nLines)) + | NONE => (#"\000", src) + (* end case *)) + + (* skip white space *) + fun skipWS (src as (strm, nLines)) = (case TextIO.StreamIO.input1 strm + of SOME(#" ", strm') => skipWS (strm', nLines) + | SOME(#"\t", strm') => skipWS (strm', nLines) + | SOME(#"\r", strm') => skipWS (strm', nLines) + | SOME(#"\n", strm') => skipWS (strm', nLines+1) + | SOME(c, strm') => (c, (strm', nLines)) + | NONE => (#"\000", src) + (* end case *)) + + fun parse (cb : 'a callbacks) (source : JSONSource.source, ctx) = let + fun error' (ctx, src, ec) = let + val msg = JSONSource.errorMsg (src, ec) + in + #error cb (ctx, msg); + raise Fail msg + end + fun matchC (src, c) = let + val (c', src') = next src + in + if (c = c') + then src' + else error' (ctx, src, InvalidLiteral) + end + (* parse a JSON value *) + fun parseValue (ctx, src : JSONSource.state) = (case skipWS src + of (#"[", src) => parseArray (ctx, src) + | (#"{", src) => parseObject (ctx, src) + | (#"-", src) => (case next src + of (#"0", src) => scanNumber (ctx, src, true, 0) + | (#"1", src) => scanNumber (ctx, src, true, 1) + | (#"2", src) => scanNumber (ctx, src, true, 2) + | (#"3", src) => scanNumber (ctx, src, true, 3) + | (#"4", src) => scanNumber (ctx, src, true, 4) + | (#"5", src) => scanNumber (ctx, src, true, 5) + | (#"6", src) => scanNumber (ctx, src, true, 6) + | (#"7", src) => scanNumber (ctx, src, true, 7) + | (#"8", src) => scanNumber (ctx, src, true, 8) + | (#"9", src) => scanNumber (ctx, src, true, 9) + | _ => error' (ctx, src, InvalidCharacter) + (* end case *)) + | (#"0", src) => scanNumber (ctx, src, false, 0) + | (#"1", src) => scanNumber (ctx, src, false, 1) + | (#"2", src) => scanNumber (ctx, src, false, 2) + | (#"3", src) => scanNumber (ctx, src, false, 3) + | (#"4", src) => scanNumber (ctx, src, false, 4) + | (#"5", src) => scanNumber (ctx, src, false, 5) + | (#"6", src) => scanNumber (ctx, src, false, 6) + | (#"7", src) => scanNumber (ctx, src, false, 7) + | (#"8", src) => scanNumber (ctx, src, false, 8) + | (#"9", src) => scanNumber (ctx, src, false, 9) + | (#"\"", src) => scanStringValue (ctx, src) + | (#"f", src) => let (* match "a" "l" "s" "e" *) + val src = matchC (src, #"a") + val src = matchC (src, #"l") + val src = matchC (src, #"s") + val src = matchC (src, #"e") + in + (#boolean cb (ctx, false), src) + end + | (#"n", src) => let (* match "u" "l" "l" *) + val src = matchC (src, #"u") + val src = matchC (src, #"l") + val src = matchC (src, #"l") + in + (#null cb ctx, src) + end + | (#"t", src) => let (* match "r" "u" "e" *) + val src = matchC (src, #"r") + val src = matchC (src, #"u") + val src = matchC (src, #"e") + in + (#boolean cb (ctx, true), src) + end +(* + | (#"/", src) => if comments + then parseValue (ctx, skipComment src) + else error' (ctx, src, CommentsNotAllowed) +*) + (* currently, comments are always allowed *) + | (#"/", src) => parseValue (ctx, skipComment src) + | _ => error' (ctx, src, InvalidCharacter) + (* end case *)) + (* parse a JSON array assuming that the '[' has been consumed *) + and parseArray (ctx, src) = let + (* loop to scan one or more items *) + fun lp (ctx, src) = let + val (ctx, src) = parseValue (ctx, src) + in + case skipWS src + of (#",", src) => lp (ctx, src) + | (#"]", src) => (#endArray cb ctx, src) + | _ => error' (ctx, src, InvalidArray) + (* end case *) + end + val ctx = #startArray cb ctx + in + case skipWS src + of (#"]", src) => (#endArray cb ctx, src) + | _ => lp (ctx, src) + (* end case *) + end + (* parse a JSON object assuming that the '{' has been consumed *) + and parseObject (ctx, src) = let + (* loop to scan one or more key-value pairs *) + fun lp (ctx, src) = (case skipWS src + of (#"\"", src) => let + val (key, src) = scanString src + val ctx = #objectKey cb (ctx, key) + in + case skipWS src + of (#":", src) => let + val (ctx, src) = parseValue (ctx, src) + in + case skipWS src + of (#",", src) => lp (ctx, src) + | (#"}", src) => (#endObject cb ctx, src) + | _ => error' (ctx, src, InvalidObject) + (* end case *) + end + | _ => error' (ctx, src, ExpectedColon) + (* end case *) + end + | _ => error' (ctx, src, ExpectedKey) + (* end case *)) + val ctx = #startObject cb ctx + in + case skipWS src + of (#"}", src) => (#endObject cb ctx, src) + | _ => lp (ctx, src) + (* end case *) + end + (* scan a string assuming that the first quote has been consumed *) + and scanString (start : JSONSource.state) = let + fun c2w c = W.fromInt(ord c) + fun w2c w = Char.chr(W.toInt w) + fun scan (src, n, cs) = (case next src + of (#"\000", _) => error' (ctx, start, UnclosedString) + | (#"\"", src) => (mkString(n, cs), src) + | (#"\\", src) => scanEscape (src, n, cs) + | (c, src) => if (#" " <= c) andalso (c < #"\127") + (* printable ASCII character *) + then scan (src, inc n, c::cs) + (* either non-printable ASCII or UTF-8 byte sequence *) + else scanUTF8 (src, c, c2w c, n, cs) + (* end case *)) + and scanEscape (src, n, cs) = let + fun return (src, c) = scan (src, inc n, c::cs) + in + case next src + of (#"\"", src) => return (src, #"\"") + | (#"\\", src) => return (src, #"\\") + | (#"/", src) => return (src, #"/") + | (#"b", src) => return (src, #"\008") (* backspace *) + | (#"f", src) => return (src, #"\012") (* form feed *) + | (#"n", src) => return (src, #"\010") (* line feed *) + | (#"r", src) => return (src, #"\013") (* carriage return *) + | (#"t", src) => return (src, #"\009") (* tab *) + | (#"u", src) => scanUnicodeEscape (src, n, cs) + | _ => error' (ctx, src, InvalidEscape) + (* end case *) + end + (* scan a Unicode escape sequence; we have already consumed the "\u" + * prefix, so we just need to parse the four hex digits followed by + * a possible second escape sequence for a surrogate pair. The result + * is encoded as a UTF-8 byte sequence. + *) + and scanUnicodeEscape (src, n, cs) = let + fun getDigit src = (case next src + of (#"0", src) => (0w0, src) + | (#"1", src) => (0w1, src) + | (#"2", src) => (0w2, src) + | (#"3", src) => (0w3, src) + | (#"4", src) => (0w4, src) + | (#"5", src) => (0w5, src) + | (#"6", src) => (0w6, src) + | (#"7", src) => (0w7, src) + | (#"8", src) => (0w8, src) + | (#"9", src) => (0w9, src) + | (#"a", src) => (0w10, src) + | (#"A", src) => (0w10, src) + | (#"b", src) => (0w11, src) + | (#"B", src) => (0w11, src) + | (#"c", src) => (0w12, src) + | (#"C", src) => (0w12, src) + | (#"d", src) => (0w13, src) + | (#"D", src) => (0w13, src) + | (#"e", src) => (0w14, src) + | (#"E", src) => (0w14, src) + | (#"f", src) => (0w15, src) + | (#"F", src) => (0w15, src) + | _ => error' (ctx, src, InvalidUnicodeEscape) + (* end case *)) + fun getDigits src = let + (* get four digits *) + val (d0, src) = getDigit src + val (d1, src) = getDigit src + val (d2, src) = getDigit src + val (d3, src) = getDigit src + val n = W.<<(d0, 0w24) + + W.<<(d1, 0w16) + + W.<<(d2, 0w8) + + d3 + in + (n, src) + end + val (u0, src) = getDigits src + (* get the second 16-bit code point of a surrogate pair *) + fun scanLowSurrogate src = ( + (* match "\uxxxx" *) + case next src + of (#"\\", src) => (case next src + of (#"u", src) => let + val (u1, src) = getDigits src + in + if (u1 < 0wxDC00) orelse (0wxDFFF < u1) + then error' (ctx, src, InvalidUnicodeSurrogatePair) + (* convert pair to a Unicode code point + * and then to UTF-8 bytes. + *) + else toUTF8 (src, + 0wx10000 + + W.<<(u0 - 0wxD800, 0w10) + + (u1 - 0wxDC00)) + end + | _ => error' (ctx, src, InvalidUnicodeSurrogatePair) + (* end case *)) + | _ => error' (ctx, src, InvalidUnicodeSurrogatePair) + (* end case *)) + (* convert a word to a UTF-8 sequence *) + and toUTF8 (src, w) = if (w <= 0wx7f) + then scan (src, inc n, w2c w :: cs) + else if (w <= 0wx7ff) + then scan (src, + n+2, + w2c(W.orb(0wxc0, W.>>(w, 0w6))) + :: w2c(W.orb(0wx80, W.andb(w, 0wx3f))) + :: cs) + else if (w <= 0wxffff) + then scan (src, + n+3, + w2c(W.orb(0wxe0, W.>>(w, 0w12))) + :: w2c(W.orb(0wx80, W.andb(W.>>(w, 0w6), 0wx3f))) + :: w2c(W.orb(0wx80, W.andb(w, 0wx3f))) + :: cs) + else if (w <= 0wx10ffff) + then scan (src, + n+4, + w2c(W.orb(0wxf0, W.>>(w, 0w18))) + :: w2c(W.orb(0wx80, W.andb(W.>>(w, 0w12), 0wx3f))) + :: w2c(W.orb(0wx80, W.andb(W.>>(w, 0w6), 0wx3f))) + :: w2c(W.orb(0wx80, W.andb(w, 0wx3f))) + :: cs) + else error' (ctx, src, InvalidUnicodeEscape) + in + if (u0 < 0wxD800) + then toUTF8 (src, u0) + else if (u0 <= 0wxDBFF) + then scanLowSurrogate src + else error' (ctx, src, InvalidUnicodeEscape) + end (* scanUnicodeEscape *) + (* a simple state machine for getting a valid UTF-8 byte sequence. See + * https://unicode.org/mail-arch/unicode-ml/y2003-m02/att-0467/01-The_Algorithm_to_Valide_an_UTF-8_String + * for a description of the state machine. + *) + and scanUTF8 (src, chr0, byte0, n, cs) = let + fun getByte src = (case next src + of (#"\000", _) => error' (ctx, src, IncompleteUTF8) + | (c, src') => (c2w c, c, src') + (* end case *)) + fun inRange (minB : word, b, maxB) = ((b - minB) <= maxB - minB) + (* handles last byte for all multi-byte sequences *) + fun stateA (src, n, chrs) = let + val (b, c, src) = getByte src + in + if inRange(0wx80, b, 0wxbf) + then scan (src, inc n, c::chrs) + else error' (ctx, src, InvalidUTF8) + end + (* handles second/third byte for three/four-byte sequences *) + and stateB (src, n, chrs) = let + val (b, c, src) = getByte src + in + if inRange(0wx80, b, 0wxbf) + then stateA (src, inc n, c::chrs) + else error' (ctx, src, InvalidUTF8) + end + (* byte0 = 0b1110_0000 (3-byte sequence) *) + and stateC (src, n, chrs) = let + val (b, c, src) = getByte src + in + if inRange(0wxa0, b, 0wxbf) + then stateA (src, inc n, c::chrs) + else error' (ctx, src, InvalidUTF8) + end + (* byte0 = 0b1110_1101 (3-byte sequence) *) + and stateD (src, n, chrs) = let + val (b, c, src) = getByte src + in + if inRange(0wx80, b, 0wx9f) + then stateA (src, inc n, c::chrs) + else error' (ctx, src, InvalidUTF8) + end + (* byte0 = 0b1111_0001 .. 0b1111_0011 (4-byte sequence) *) + and stateE (src, n, chrs) = let + val (b, c, src) = getByte src + in + if inRange(0wx80, b, 0wxbf) + then stateB (src, inc n, c::chrs) + else error' (ctx, src, InvalidUTF8) + end + (* byte0 = 0b1111_0000 (4-byte sequence) *) + and stateF (src, n, chrs) = let + val (b, c, src) = getByte src + in + if inRange(0wx90, b, 0wxbf) + then stateB (src, inc n, c::chrs) + else error' (ctx, src, InvalidUTF8) + end + (* byte0 = 0b1111_1000 (4-byte sequence) *) + and stateG (src, n, chrs) = let + val (b, c, src) = getByte src + in + if inRange(0wx80, b, 0wx8f) + then stateB (src, inc n, c::chrs) + else error' (ctx, src, InvalidUTF8) + end + in + if (byte0 <= 0wx7f) + (* this case only occurs for non-printing ASCII characters *) + then error' (ctx, src, NonPrintingASCII) + else if inRange(0wxc2, byte0, 0wxdf) + then stateA (src, n, cs) + else if inRange(0wxe1, byte0, 0wxec) + orelse inRange(0wxee, byte0, 0wxef) + then stateB (src, n, cs) + else if (byte0 = 0wxe0) + then stateC (src, n, cs) + else if (byte0 = 0wxed) + then stateD (src, n, cs) + else if inRange(0wxf1, byte0, 0wxf3) + then stateE (src, n, cs) + else if (byte0 = 0wxf0) + then stateF (src, n, cs) + else if (byte0 = 0wxf4) + then stateG (src, n, cs) + else error' (ctx, src, InvalidUTF8) + end (* scanUTF8 *) + in + scan (start, 0, []) + end (* scanString *) + (* scan a string value assuming that the first quote has been consumed *) + and scanStringValue (ctx, src) = let + val (s, src) = scanString src + in + (#string cb (ctx, s), src) + end + (* scan an integer or floating-point number. *) + and scanNumber (ctx, startSrc : JSONSource.state, isNeg, first) = let + fun mkFloat (sign, whole, frac, exp, src) = let + val f = valOf(Real.fromDecimal { + class = IEEEReal.NORMAL, + sign = sign, + digits = List.revAppend(whole, reverse frac), + exp = exp + List.length whole + }) handle Overflow => if sign then Real.negInf else Real.posInf + in + if Real.isFinite f + then (#float cb (ctx, f), src) + else error' (ctx, startSrc, NumberTooLarge) + end + (* scan an integer or the whole part of a float *) + fun scanWhole (src, n, digits) = (case next src + of (#"0", src) => scanWhole (src, inc n, 0::digits) + | (#"1", src) => scanWhole (src, inc n, 1::digits) + | (#"2", src) => scanWhole (src, inc n, 2::digits) + | (#"3", src) => scanWhole (src, inc n, 3::digits) + | (#"4", src) => scanWhole (src, inc n, 4::digits) + | (#"5", src) => scanWhole (src, inc n, 5::digits) + | (#"6", src) => scanWhole (src, inc n, 6::digits) + | (#"7", src) => scanWhole (src, inc n, 7::digits) + | (#"8", src) => scanWhole (src, inc n, 8::digits) + | (#"9", src) => scanWhole (src, inc n, 9::digits) + | (#".", src) => scanFrac (src, digits) + | (#"e", src) => scanExp (src, digits, []) + | (#"E", src) => scanExp (src, digits, []) + | _ => let + fun cvt ([], k) = + (#integer cb (ctx, if isNeg then ~k else k), src) + | cvt (d::ds, k) = + cvt (ds, 10*k + IntInf.fromInt d) + in + cvt (reverse digits, 0) + end + (* end case *)) + (* scan the fractional part of a real; the '.' has already been + * consumed. + *) + and scanFrac (src, wDigits) = let + fun scanF (src, fDigits) = (case next src + of (#"0", src) => scanF (src, 0::fDigits) + | (#"1", src) => scanF (src, 1::fDigits) + | (#"2", src) => scanF (src, 2::fDigits) + | (#"3", src) => scanF (src, 3::fDigits) + | (#"4", src) => scanF (src, 4::fDigits) + | (#"5", src) => scanF (src, 5::fDigits) + | (#"6", src) => scanF (src, 6::fDigits) + | (#"7", src) => scanF (src, 7::fDigits) + | (#"8", src) => scanF (src, 8::fDigits) + | (#"9", src) => scanF (src, 9::fDigits) + | (#"e", src) => scanExp (src, wDigits, fDigits) + | (#"E", src) => scanExp (src, wDigits, fDigits) + | _ => mkFloat (isNeg, wDigits, fDigits, 0, src) + (* end case *)) + in + scanF (src, []) + end + (* scan the exponent part of a real; the "e"/"E" has already been + * consumed. + *) + and scanExp (src, whole, frac) = let + val (expSign, exp, seenDigit, src) = (case next src + of (#"-", src) => (~1, 0, false, src) + | (#"+", src) => (1, 0, false, src) + | (#"0", src) => (1, 0, true, src) + | (#"1", src) => (1, 1, true, src) + | (#"2", src) => (1, 2, true, src) + | (#"3", src) => (1, 3, true, src) + | (#"4", src) => (1, 4, true, src) + | (#"5", src) => (1, 5, true, src) + | (#"6", src) => (1, 6, true, src) + | (#"7", src) => (1, 7, true, src) + | (#"8", src) => (1, 8, true, src) + | (#"9", src) => (1, 9, true, src) + | _ => error' (ctx, startSrc, InvalidNumber) + (* end case *)) + fun scanE (src, seenDigit, exp) = (case next src + of (#"0", src) => scanE (src, true, 10 * exp) + | (#"1", src) => scanE (src, true, 10 * exp + 1) + | (#"2", src) => scanE (src, true, 10 * exp + 2) + | (#"3", src) => scanE (src, true, 10 * exp + 3) + | (#"4", src) => scanE (src, true, 10 * exp + 4) + | (#"5", src) => scanE (src, true, 10 * exp + 5) + | (#"6", src) => scanE (src, true, 10 * exp + 6) + | (#"7", src) => scanE (src, true, 10 * exp + 7) + | (#"8", src) => scanE (src, true, 10 * exp + 8) + | (#"9", src) => scanE (src, true, 10 * exp + 9) + | _ => if seenDigit + then mkFloat (isNeg, whole, frac, expSign * exp, src) + else error' (ctx, startSrc, InvalidNumber) + (* end case *)) + in + scanE (src, seenDigit, exp) + handle Overflow => error' (ctx, startSrc, NumberTooLarge) + end + in + if (first = 0) + then (case next startSrc + of (#".", src) => scanFrac(src, []) + | (#"e", src) => scanExp(src, [], []) + | (#"E", src) => scanExp(src, [], []) + | _ => (#integer cb (ctx, 0), startSrc) + (* end case *)) + else scanWhole (startSrc, 1, [first]) + end + (* skip over a C-style comment; the initial '/' has been consumed *) + and skipComment (src : JSONSource.state) = let + fun skip src = (case next src + of (#"*", src) => let + (* look for "/" (possibly preceded by stars) *) + fun lp src = (case next src + of (#"/", src) => src + | (#"*", src) => lp src + | (#"\000", src) => error' (ctx, src, UnclosedComment) + | (_, src) => skip src + (* end case *)) + in + lp src + end + | (#"\000", src) => error' (ctx, src, UnclosedComment) + | (_, src) => skip src + (* end case *)) + in + case next src + of (#"*", src) => skip src + | _ => error' (ctx, src, InvalidCharacter) + (* end case *) + end + val src = (case !source + of SOME src => src + | NONE => raise Fail "closed JSON source" + (* end case *)) + val (ctx, src) = parseValue (ctx, src) + in + source := SOME src; + ctx + end + + fun parseFile cb = let + val parse = parse cb + fun parser (fileName, ctx) = let + val source = openFile fileName + val ctx = parse (source, ctx) + handle ex => (close source; raise ex) + in + close source; + ctx + end + in + parser + end + + end diff --git a/smlnj-lib/JSON/json-stream-printer.sml b/smlnj-lib/JSON/json-stream-printer.sml new file mode 100644 index 0000000..caaf249 --- /dev/null +++ b/smlnj-lib/JSON/json-stream-printer.sml @@ -0,0 +1,17 @@ +(* json-stream-printer.sml + * + * Support for printing to `TextIO` output streams. + * + * COPYRIGHT (c) 2021 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +structure JSONStreamPrinter + : JSON_STREAM_OUTPUT where type outstream = TextIO.outstream + = JSONStreamOutputFn ( + struct + type outstream = TextIO.outstream + val output1 = TextIO.output1 + val output = TextIO.output + val outputSlice = TextIO.outputSubstr + end) diff --git a/smlnj-lib/JSON/json-util.sml b/smlnj-lib/JSON/json-util.sml new file mode 100644 index 0000000..ee501e6 --- /dev/null +++ b/smlnj-lib/JSON/json-util.sml @@ -0,0 +1,277 @@ +(* json-util.sml + * + * COPYRIGHT (c) 2017 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Utility functions for processing the JSON in-memory representation. + *) + +structure JSONUtil : sig + + (* exceptions for conversion functions *) + exception NotBool of JSON.value + exception NotInt of JSON.value + exception NotNumber of JSON.value + exception NotString of JSON.value + + (* exception that is raised when trying to process a non-object value as an object *) + exception NotObject of JSON.value + + (* exception that is raised when the given field is not found in an object *) + exception FieldNotFound of JSON.value * string + + (* exception that is raised when trying to process a non-array value as an array *) + exception NotArray of JSON.value + + (* exception that is raised when access to an array value is out of bounds *) + exception ArrayBounds of JSON.value * int + + (* exception that is raised when a `FIND` edge does not match any array element *) + exception ElemNotFound of JSON.value + + (* map the above exceptions to a message string; we use General.exnMessage + * for other exceptions. + *) + val exnMessage : exn -> string + + (* conversion functions for atomic values. These raise the corresponding + * "NotXXX" exceptions when their argument has the wrong shape. Also note + * that asNumber will accept both integers and floats and asInt may raise + * Overflow if the number is too large. + *) + val asBool : JSON.value -> bool + val asInt : JSON.value -> Int.int + val asIntInf : JSON.value -> IntInf.int + val asNumber : JSON.value -> Real.real + val asString : JSON.value -> string + + (* find a field in an object; this function raises the NotObject exception when + * the supplied value is not an object. + *) + val findField : JSON.value -> string -> JSON.value option + + (* lookup a field in an object; this function raises the NotObject exception when + * the supplied value is not an object and raises FieldNotFound if the value is + * an object, but does not have the specified field. + *) + val lookupField : JSON.value -> string -> JSON.value + + (* does a JSON object have a given field? This function returns false if called + * on a non-object value. + *) + val hasField : string -> JSON.value -> bool + + (* does the specified field of an object satisfy the given predicate? This function + * returns false if called on a non-object value. + *) + val testField : string -> (JSON.value -> bool) -> JSON.value -> bool + + (* convert a JSON array to an SML vector *) + val asArray : JSON.value -> JSON.value vector + + (* map a conversion function over a JSON array to produce a list; this function + * raises the NotArray exception if the second argument is not an array. + *) + val arrayMap : (JSON.value -> 'a) -> JSON.value -> 'a list + + (* path specification for indexing into JSON values *) + datatype edge + = SEL of string (* select field of object *) + | SUB of int (* index into array component *) + | FIND of JSON.value -> bool + (* first array component that satisfies the predicate *) + + type path = edge list + + (* `get (jv, path)` returns the component of `jv` named by `path`. It raises + * the NotObject, NotArray, FieldNotFound, and ArrayBounds exceptions if there + * is an inconsistency between the path and the structure of `jv`. + *) + val get : JSON.value * path -> JSON.value + + (* `replace (jv, path, v)` replaces the component of `jv` named by `path` + * with the value `v`. + *) + val replace : JSON.value * path * JSON.value -> JSON.value + + (* `insert (jv, path, lab, v)` inserts `lab : v` into the object named by `path` + * in `jv` + *) + val insert : JSON.value * path * string * JSON.value -> JSON.value + + (* `append (jv, path, vs)` appends the list of values `vs` onto the array named by `path` + * in `jv` + *) + val append : JSON.value * path * JSON.value list -> JSON.value + + end = struct + + structure J = JSON + + (* import the error exceptions and exnMessage *) + open Errors + + fun asBool (J.BOOL b) = b + | asBool v = raise NotBool v + + fun asInt (J.INT n) = Int.fromLarge n + | asInt v = raise NotInt v + + fun asIntInf (J.INT n) = n + | asIntInf v = raise NotInt v + + fun asNumber (J.INT n) = Real.fromLargeInt n + | asNumber (J.FLOAT f) = f + | asNumber v = raise NotNumber v + + fun asString (J.STRING s) = s + | asString v = raise NotString v + + fun findField (J.OBJECT fields) = let + fun find lab = (case List.find (fn (l, v) => (l = lab)) fields + of NONE => NONE + | SOME(_, v) => SOME v + (* end case *)) + in + find + end + | findField v = raise NotObject v + + fun lookupField (v as J.OBJECT fields) = let + fun find lab = (case List.find (fn (l, v) => (l = lab)) fields + of NONE => raise FieldNotFound(v, concat["no definition for field \"", lab, "\""]) + | SOME(_, v) => v + (* end case *)) + in + find + end + | lookupField v = raise NotObject v + + fun hasField lab (J.OBJECT fields) = List.exists (fn (lab', _) => lab = lab') fields + | hasField _ _ = false + + fun testField lab pred (J.OBJECT fields) = ( + case List.find (fn (lab', _) => lab = lab') fields + of SOME(_, v) => pred v + | NONE => false + (* end case *)) + | testField _ _ _ = false + + fun asArray (J.ARRAY vs) = Vector.fromList vs + | asArray v = raise NotArray v + + fun arrayMap f (J.ARRAY vs) = List.map f vs + | arrayMap f v = raise NotArray v + + (* path specification for indexing into JSON values *) + datatype edge + = SEL of string (* select field of object *) + | SUB of int (* index into array component *) + | FIND of JSON.value -> bool + (* first array component that satisfies the predicate *) + + type path = edge list + + fun get (v, []) = v + | get (v as J.OBJECT fields, SEL lab :: rest) = + (case List.find (fn (l, v) => (l = lab)) fields + of NONE => raise FieldNotFound(v, lab) + | SOME(_, v) => get (v, rest) + (* end case *)) + | get (v, SEL _ :: _) = raise NotObject v + | get (v as J.ARRAY vs, SUB i :: rest) = let + fun nth ([], _) = raise ArrayBounds(v, i) + | nth (elem::_, 0) = elem + | nth (_::r, i) = nth(r, i-1) + in + if (i < 0) + then raise ArrayBounds(v, i) + else get (nth(vs, i), rest) + end + | get (v, SUB _ :: _) = raise (NotArray v) + | get (v as J.ARRAY vs, FIND pred :: rest) = (case List.find pred vs + of NONE => raise ElemNotFound v + | SOME v => get (v, rest) + (* end case *)) + | get (v, FIND _ :: _) = raise (NotArray v) + + (* top-down zipper to support functional editing *) + datatype zipper + = ZNIL + | ZOBJ of { + prefix : (string * J.value) list, + label : string, + child : zipper, + suffix : (string * J.value) list + } + | ZARR of { + prefix : J.value list, + child : zipper, + suffix : J.value list + } + + (* follow a path into a JSON value while constructing a zipper *) + fun unzip (v, []) = (ZNIL, v) + | unzip (v as J.OBJECT fields, SEL lab :: rest) = let + fun find (_, []) = raise FieldNotFound(v, lab) + | find (pre, (l, v)::flds) = if (l = lab) + then let + val (zipper, v) = unzip (v, rest) + in + (ZOBJ{prefix=pre, label=lab, suffix=flds, child=zipper}, v) + end + else find ((l, v)::pre, flds) + in + find ([], fields) + end + | unzip (v, SEL _ :: _) = raise NotObject v + | unzip (v as J.ARRAY vs, SUB i :: rest) = let + fun sub (_, [], _) = raise ArrayBounds(v, i) + | sub (prefix, v::vs, 0) = let + val (zipper, v) = unzip (v, rest) + in + (ZARR{prefix = prefix, child = zipper, suffix = vs}, v) + end + | sub (prefix, v::vs, i) = sub (v::prefix, vs, i-1) + in + sub ([], vs, i) + end + | unzip (v, SUB _ :: _) = raise NotArray v + | unzip (v as J.ARRAY vs, FIND pred :: rest) = let + fun find (_, []) = raise ElemNotFound v + | find (prefix, v::vs) = if pred v + then let + val (zipper, v) = unzip (v, rest) + in + (ZARR{prefix = prefix, child = zipper, suffix = vs}, v) + end + else find (v::prefix, vs) + in + find ([], vs) + end + | unzip (v, FIND _ :: _) = raise (NotArray v) + + (* zip up a zipper *) + fun zip (zipper, v) = let + fun zip' ZNIL = v + | zip' (ZOBJ{prefix, label, suffix, child}) = + J.OBJECT(List.revAppend(prefix, (label, zip' child)::suffix)) + | zip' (ZARR{prefix, child, suffix}) = + J.ARRAY(List.revAppend(prefix, zip' child :: suffix)) + in + zip' zipper + end + + fun replace (jv, path, v) = zip (#1 (unzip (jv, path)), v) + + fun insert (jv, path, label, v) = (case unzip (jv, path) + of (zipper, J.OBJECT fields) => zip (zipper, J.OBJECT((label, v)::fields)) + | (_, v) => raise NotObject v + (* end case *)) + + fun append (jv, path, vs) = (case unzip (jv, path) + of (zipper, J.ARRAY jvs) => zip (zipper, J.ARRAY(jvs @ vs)) + | (_, v) => raise NotArray v + (* end case *)) + + end diff --git a/smlnj-lib/JSON/json.sml b/smlnj-lib/JSON/json.sml new file mode 100644 index 0000000..a0cdd79 --- /dev/null +++ b/smlnj-lib/JSON/json.sml @@ -0,0 +1,22 @@ +(* json.sml + * + * COPYRIGHT (c) 2008 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This is the tree representation of a JSON data as produced/consumed + * by the tree parser. + *) + +structure JSON = + struct + + datatype value + = OBJECT of (string * value) list + | ARRAY of value list + | NULL + | BOOL of bool + | INT of IntInf.int + | FLOAT of real + | STRING of string (* note that string is assumed to be UTF-8 *) + + end diff --git a/smlnj-lib/LICENSE b/smlnj-lib/LICENSE new file mode 100644 index 0000000..dc6853a --- /dev/null +++ b/smlnj-lib/LICENSE @@ -0,0 +1,20 @@ +STANDARD ML OF NEW JERSEY COPYRIGHT NOTICE, LICENSE AND DISCLAIMER. + +Copyright (c) 1989-2002 by Lucent Technologies + +Permission to use, copy, modify, and distribute this software and its +documentation for any purpose and without fee is hereby granted, +provided that the above copyright notice appear in all copies and that +both the copyright notice and this permission notice and warranty +disclaimer appear in supporting documentation, and that the name of +Lucent Technologies, Bell Labs or any Lucent entity not be used in +advertising or publicity pertaining to distribution of the software +without specific, written prior permission. + +Lucent disclaims all warranties with regard to this software, +including all implied warranties of merchantability and fitness. In no +event shall Lucent be liable for any special, indirect or +consequential damages or any damages whatsoever resulting from loss of +use, data or profits, whether in an action of contract, negligence or +other tortious action, arising out of or in connection with the use +or performance of this software. diff --git a/smlnj-lib/PORTING b/smlnj-lib/PORTING new file mode 100644 index 0000000..bd6c052 --- /dev/null +++ b/smlnj-lib/PORTING @@ -0,0 +1,174 @@ +Here is a brief description of the changes since the 0.3beta release of +the SML/NJ library in 1994. Modules are listed in alphabetical order. + +-------------------------------------------------------------------- +Summary +------- + - eliminate weak types in signatures. + + - name changes to track SML Basis conventions. + - change names, argument order of iteration combinators + - use MONO_XXX/XXX instead of XXX/POLY_XXX for signatures. + + - consolidate signatures + - e.g., use ORD_MAP for the Int*Map structures. + + - Name structure is renamed Atom. + - removed name_tbl type and operations and define free-standing + structures AtomBinaryMap, AtomBinarySet and AtomTable. + + - split Format structure into Format and Scan + + - moved Unix specific modules to Unix library (use unix-lib.cm + in sources file to access). + + - remove signatures and structures that are redundant with the + SML Basis Library. + +-------------------------------------------------------------------- +Detailed Changes +---------------- + +functor ArrayQSort (...) : ARRAY_SORT + ==> ArrayQSortFn (...) : MONO_ARRAY_SORT + +structure BigInt : BIGINT + ==> IntInf : INT_INF + The IntInf structure mostly matches that defined by SML'97. + +functor BinaryDict(...) : DICT + ==> BinaryMapFn(...) : ORD_MAP + +functor BinarySet(...) + ==> BinarySetFn(...) + +structure CType : CTYPE + Deleted. Use operations from Char structure in basis. + +signature DICT + ==> ORD_MAP + Also, various function names changed to track the SML'97 naming + conventions: + val find : ... + Deleted. + val peek : ... + ==> find : ... + val listItems : ... + ==> listItemsi : ... + val app : ... + ==> appi : ... + val revapp : ... + Deleted. + val map : ... + ==> mapi : ... + val fold : ... + ==> foldr + val revfold : ... + ==> foldl + val transform : ... + ==> map : ... + Also, the ORD_MAP interface provides the following new operations: + insert', listItems, collate, unionWith, unionWithi, intersectWith, + intersectWithi, app, foldl, foldr, filter, filteri, mapPartial, + and mapPartiali. + +structure Finalizer : FINALIZER + Deleted. No replacement as of yet. + +structure Format : FORMAT + The scanning functions were moved to a new module (Scan : SCAN). + Also, the fmt_item datatype has changed. + +functor HashTable (...) : HASH_TABLE + ==> HashTableFn (...) : MONO_HASH_TABLE + +structure IntMap : INTMAP + ==> IntBinaryMap : ORD_MAP + also IntListMap : ORD_MAP + +structure IntSet : INTSET + ==> IntBinarySet : ORD_MAP + also IntListSet : ORD_MAP + +structure LibBase + Uses of the LibBase.BadArg exception have been replaced by + the Fail exception from the SML'97 basis. The function + LibBase.failure replaces LibBase.badArg. + Uses of the type LibBase.relation have been replaced by the + order type from the SML'97 basis. + The type of the version value has changed, and the value + versionName is now called banner. + +structure ListUtil : LIST_UTIL + Deleted. Use the operations from List and ListPair in the + SML'97 basis. + +structure Name : NAME + ==> Atom : ATOM + and AtomTable : ATOM_TABLE + +structure MakeString : MAKESTRING + Deleted. In SML'97, basic types provide their own conversion + functions. + +signature ORD_SET + Many function names changed to track the SML'97 naming conventions: + val find : ... + Deleted + val peek : ... + ==> find : ... + val fold : ... + ==> foldr : ... + val revfold : ... + ==> foldl : ... + Also, the ORD_SET interface provides the following new operations: + add', compare, and filter. + +structure PolyHashTable : POLY_HASH_TABLE + ==> HashTable : HASH_TABLE + +structure PolyArrayQSort : POLY_ARRAY_SORT + ==> ArrayQSort : ARRAY_SORT + +structure Rand : RAND + ==> Random : RANDOM + +structure Random : RANDOM + ==> Rand : RAND + +functor SplayDict (...) : DICT + ==> SplayMapFn (...) : ORD_MAP + +functor SplaySet (...) + ==> SplaySetFn (...) + +structure StringUtil : STRING_UTIL + Deleted. Use operations from String and Substring structures. + +structure StringCvt : STRING_CVT + Deleted. In SML'97, basic types provide their own conversion + functions. + +structure UnixPath : UNIX_PATH + ==> structure PathUtil : PATH_UTIL + Many of the operations defined in the UnixPath structure are + now part of the OS.FileSys and OS.Path structures in the SML'97 + basis. The remaining operations are findFile and findFiles. + In addition, two new functions are provided: existsFile and + allFiles. + +-------------------------------------------------------------------- +New modules +----------- +KeywordFn +Hash2TableFn +structure ParserComb : PARSER_COMB +structure SimpleURef : UREF +structure URef : UREF + +-------------------------------------------------------------------- +New Libraries +------------- +HTML library +Reactive library +Regular expression library diff --git a/smlnj-lib/PP/README b/smlnj-lib/PP/README new file mode 100644 index 0000000..718107a --- /dev/null +++ b/smlnj-lib/PP/README @@ -0,0 +1,35 @@ +README: PP -- the pretty printer library +*** under construction *** + +This is a library for pretty printing. It provides a much richer +pretty printing model than the Oppen-style pretty printer provided +by the compiler. + +Its implementation is based on the FORMAT library by Pierre Weis +(Copyright INRIA 1996), which was written in CAML, but there are +a number of differences: + + 1) this pretty printer does not support tabulation boxes yet. + 2) this pretty-printer is functorized over abstract output + devices and input tokens. + 3) this pretty-printer supports styled text. + 4) this pretty-printer supports both absolute and relative indentation + modes (the former is necessary for block-structured language + indentation). + 5) this pretty-printer supports both imperative pretty-printer streams + and declarative pretty-printing layouts, which can be intermixed. + +The main signatures and implementation can be found in the src directory. +The devices directory has some sample output devices (including one for +generating layouts in HTML). Someday, the examples directory will have +some example pretty-printers; for now, you can look in the tests directory. + +For reference, the current OCaml implementation is available at + + https://github.com/ocaml/ocaml/blob/trunk/stdlib/format.mli + https://github.com/ocaml/ocaml/blob/trunk/stdlib/format.ml + +A short tutorial on the OCaml FORMAT pretty pringing library can be +found at + + https://ocaml.org/learn/tutorials/format.html diff --git a/smlnj-lib/PP/devices/.cm/GUID/html3-dev.sml b/smlnj-lib/PP/devices/.cm/GUID/html3-dev.sml new file mode 100644 index 0000000..3e17302 --- /dev/null +++ b/smlnj-lib/PP/devices/.cm/GUID/html3-dev.sml @@ -0,0 +1 @@ +guid-$/(pp-extras-lib.cm):devices/html3-dev.sml-1714016092.189 diff --git a/smlnj-lib/PP/devices/.cm/SKEL/html3-dev.sml b/smlnj-lib/PP/devices/.cm/SKEL/html3-dev.sml new file mode 100644 index 0000000..5c6d68f --- /dev/null +++ b/smlnj-lib/PP/devices/.cm/SKEL/html3-dev.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f3d"List"HTML"d"String"ad"HTML3Dev"jh0h2egp1c"PP_DEVICE"f1 \ No newline at end of file diff --git a/smlnj-lib/PP/devices/.cm/amd64-unix/html3-dev.sml b/smlnj-lib/PP/devices/.cm/amd64-unix/html3-dev.sml new file mode 100644 index 0000000..6bf5d3f Binary files /dev/null and b/smlnj-lib/PP/devices/.cm/amd64-unix/html3-dev.sml differ diff --git a/smlnj-lib/PP/devices/ansi-term-dev.sml b/smlnj-lib/PP/devices/ansi-term-dev.sml new file mode 100644 index 0000000..3a46349 --- /dev/null +++ b/smlnj-lib/PP/devices/ansi-term-dev.sml @@ -0,0 +1,201 @@ +(* ansi-term-dev.sml + * + * COPYRIGHT (c) 2020 John Reppy (http://www.cs.uchicago.edu/~jhr) + * All rights reserved. + * + * A pretty-printing device for text output to ANSI terminals. This device + * supports the standard ANSI output attributes. + *) + +structure ANSITermDev : sig + + include PP_DEVICE + where type style = ANSITerm.style list + + (* create an output device; if the underlying stream is connected to a TTY, + * then styled output is enabled, otherwise it will be disabled. + *) + val openDev : {dst : TextIO.outstream, wid : int} -> device + + (* enable/disable/query styled output. + * + * styleMode (dev, NONE) -- query current mode + * styleMode (dev, SOME true) -- enable styled output + * styleMode (dev, SOME false) -- disable styled output + * + * This function returns the previous state of the device. + * NOTE: this function raises Fail if called while a style is active. + *) + val styleMode : (device * bool option) -> bool + + end = struct + + structure A = ANSITerm + + type state = { + fg : A.color option, (* NONE is default color for terminal *) + bg : A.color option, (* NONE is default color for terminal *) + bold : bool, + blink : bool, + ul : bool, + rev : bool, + invis : bool + } + + val initState = { + fg=NONE, bg=NONE, + bold=false, blink=false, ul=false, rev=false, invis=false + } + + (* compute the commands to transition from one state to another *) + fun transition (s1 : state, s2 : state) = let + (* compute the commands to set the foreground color *) + val mv = (case (#fg s1, #fg s2) + of (SOME c1, SOME c2) => if c1 = c2 then [] else [A.FG c2] + | (_, SOME c) => [A.FG c] + | (_, NONE) => [A.FG A.Default] + (* end case *)) + (* compute the commands to set the background color *) + val mv = (case (#bg s1, #bg s2) + of (SOME c1, SOME c2) => if c1 = c2 then mv else A.FG c2 :: mv + | (_, SOME c) => A.BG c :: mv + | (_, NONE) => A.FG A.Default :: mv + (* end case *)) + (* compute the commands to set the other display attributes *) + fun add (proj, cmd, off, mv) = (case (proj s1, proj s2) + of (false, true) => cmd::mv + | (true, false) => off::mv + | _ => mv + (* end case *)) + val mv = add (#bold, A.BF, A.NORMAL, mv) + val mv = add (#blink, A.BLINK, A.BLINK_OFF, mv) + val mv = add (#ul, A.UL, A.UL_OFF, mv) + val mv = add (#rev, A.REV, A.REV_OFF, mv) + val mv = add (#invis, A.INVIS, A.INVIS_OFF, mv) + in + if null mv then "" else A.toString mv + end + + (* apply a command to a state *) + fun updateState1 (cmd, style as {fg, bg, bold, blink, ul, rev, invis}) = ( + case cmd + of A.FG c => + {fg=SOME c, bg=bg, bold=bold, blink=blink, ul=ul, rev=rev, invis=invis} + | A.BG c => + {fg=fg, bg=SOME c, bold=bold, blink=blink, ul=ul, rev=rev, invis=invis} + | A.BF => + {fg=fg, bg=bg, bold=true, blink=blink, ul=ul, rev=rev, invis=invis} + | A.BLINK => + {fg=fg, bg=bg, bold=bold, blink=true, ul=ul, rev=rev, invis=invis} + | A.UL => + {fg=fg, bg=bg, bold=bold, blink=blink, ul=true, rev=rev, invis=invis} + | A.REV => + {fg=fg, bg=bg, bold=bold, blink=blink, ul=ul, rev=true, invis=invis} + | A.INVIS => + {fg=fg, bg=bg, bold=bold, blink=blink, ul=ul, rev=rev, invis=true} +(* TODO: add support for A.DIM *) + | _ => style + (* end case *)) + + (* apply a sequence of commands to a state *) + fun updateState (cmds, st) = List.foldl updateState1 st cmds + + type style = A.style list + + datatype device = DEV of { + mode : bool ref, + dst : TextIO.outstream, + wid : int option ref, + stk : state list ref + } + + fun top [] = initState + | top (st::r) = st + + fun sameStyle (s1 : style, s2) = (s1 = s2) + + fun pushStyle (DEV{mode, dst, wid, stk}, sty) = + if (! mode) + then let + val curSt = top (!stk) + val newSt = updateState (sty, curSt) + in + TextIO.output (dst, transition(curSt, newSt)); + stk := newSt :: !stk + end + else () + + fun popStyle (DEV{mode, dst, wid, stk}) = + if (! mode) + then (case !stk + of [] => () + | curSt::r => let + val newSt = top r + in + TextIO.output (dst, transition(curSt, newSt)); + stk := r + end + (* end case *)) + else () + + fun defaultStyle _ = [] + + (* return true if an outstream is a TTY *) + fun isTTY outS = let + val (TextPrimIO.WR{ioDesc, ...}, _) = + TextIO.StreamIO.getWriter(TextIO.getOutstream outS) + in + case ioDesc + of SOME iod => (OS.IO.kind iod = OS.IO.Kind.tty) + | _ => false + end + + fun openDev {dst, wid} = DEV{ + dst = dst, wid = ref(SOME wid), mode = ref(isTTY dst), stk = ref[] + } + + (* maximum printing depth (in terms of boxes) *) + fun maxDepth _ = NONE + fun setMaxDepth _ = () + + fun ellipses _ = ("", 0) + fun setEllipses _ = () + fun setEllipsesWithSz _ = () + + (* the width of the device *) + fun lineWidth (DEV{wid, ...}) = !wid + fun setLineWidth (DEV{wid, ...}, w) = wid := w + + (* the suggested maximum width of indentation; `NONE` is interpreted as no limit. *) + fun maxIndent _ = NONE + fun setMaxIndent _ = () + + (* the suggested maximum width of text on a line *) + fun textWidth _ = NONE + fun setTextWidth _ = () + + (* output some number of spaces to the device *) + fun space (DEV{dst, ...}, n) = TextIO.output (dst, StringCvt.padLeft #" " n "") + + (* output an indentation of the given width to the device *) + val indent = space + + (* output a new-line to the device *) + fun newline (DEV{dst, ...}) = TextIO.output1 (dst, #"\n") + + (* output a string/character in the current style to the device *) + fun string (DEV{dst, ...}, s) = TextIO.output (dst, s) + fun char (DEV{dst, ...}, c) = TextIO.output1 (dst, c) + + (* if the device is buffered, then flush any buffered output *) + fun flush (DEV{dst, ...}) = TextIO.flushOut dst + + (* enable styled output by passing true to this function. It returns + * the previous state of the device. + *) + fun styleMode (DEV{stk = ref(_::_), ...}, _) = + raise Fail "attempt to change mode inside scope of style" + | styleMode (DEV{mode, ...}, NONE) = !mode + | styleMode (DEV{mode as ref m, ...}, SOME flg) = (mode := flg; m) + + end diff --git a/smlnj-lib/PP/devices/ansi-term-pp.sml b/smlnj-lib/PP/devices/ansi-term-pp.sml new file mode 100644 index 0000000..025f60e --- /dev/null +++ b/smlnj-lib/PP/devices/ansi-term-pp.sml @@ -0,0 +1,42 @@ +(* ansi-term-pp.sml + * + * COPYRIGHT (c) 2005 John Reppy (http://www.cs.uchicago.edu/~jhr) + * All rights reserved. + *) + +structure ANSITermPP : sig + + structure Tok : sig + include PP_TOKEN + where type style = ANSITermDev.style + val token : (ANSITermDev.style * string) -> token + end + + include PP_STREAM + where type device = ANSITermDev.device + where type style = ANSITermDev.style + where type token = Tok.token + + val openOut : {dst : TextIO.outstream, wid : int} -> stream + + end = struct + + structure Tok = + struct + type style = ANSITermDev.style + datatype token = Tok of (style * string) + fun string (Tok(sty, s)) = s + fun style (Tok(sty, s)) = sty + fun size (Tok(sty, s)) = String.size s + val token = Tok + end + + structure PP = PPStreamFn ( + structure Token = Tok + structure Device = ANSITermDev) + + open PP + + fun openOut arg = openStream(ANSITermDev.openDev arg) + + end diff --git a/smlnj-lib/PP/devices/char-buffer-dev.sml b/smlnj-lib/PP/devices/char-buffer-dev.sml new file mode 100644 index 0000000..07e66d3 --- /dev/null +++ b/smlnj-lib/PP/devices/char-buffer-dev.sml @@ -0,0 +1,44 @@ +(* char-buffer-dev.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * A pretty printer that puts its output in a CharBuffer.buf object. There + * are no styles and tokens are strings. + *) + +structure CharBufferDev : sig + + include PP_DEVICE + + val openDev : {dst : CharBuffer.buf, wid : int} -> device + + end = struct + + structure DevOps = struct + type t = CharBuffer.buf + (* no style support *) + type style = unit + fun sameStyle _ = true + fun pushStyle _ = () + fun popStyle _ = () + fun defaultStyle _ = () + (* output some number of spaces to the device *) + fun space (dst, n) = CharBuffer.addVec (dst, StringCvt.padLeft #" " n "") + val indent = space + (* output a new-line to the device *) + fun newline dst = CharBuffer.add1 (dst, #"\n") + (* output a string/character in the current style to the device *) + fun string (dst, s) = CharBuffer.addVec (dst, s) + fun char (dst, c) = CharBuffer.add1 (dst, c) + (* nothing to flush *) + fun flush dst = () + end + + structure Device = DefaultDeviceFn (DevOps) + + open Device + + fun openDev {dst, wid} = Device.newWithWidth (dst, wid) + + end diff --git a/smlnj-lib/PP/devices/char-buffer-pp.sml b/smlnj-lib/PP/devices/char-buffer-pp.sml new file mode 100644 index 0000000..8e2d9fc --- /dev/null +++ b/smlnj-lib/PP/devices/char-buffer-pp.sml @@ -0,0 +1,35 @@ +(* char-buffer-pp.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * A pretty printer that puts its output in a CharBuffer.buf object. There + * are no styles and tokens are strings. You can use this module to pretty-print + * into a string as follows: + * + * val buf = CharBuffer.new 1024 + * val ppStrm = CharBufferPP.openBuf {dst = buf, wid = 80} + * .... pretty printing .... + * val result = CharBuffer.contents buf + *) + +structure CharBufferPP : sig + + include PP_STREAM + where type token = string + + val openBuf : {dst : CharBuffer.buf, wid : int} -> stream + + end = struct + + structure Device = CharBufferDev + + structure PP = PPStreamFn ( + structure Token = StringToken + structure Device = Device) + + open PP + + fun openBuf arg = PP.openStream (Device.openDev arg) + + end diff --git a/smlnj-lib/PP/devices/default-device-fn.sml b/smlnj-lib/PP/devices/default-device-fn.sml new file mode 100644 index 0000000..d3f0426 --- /dev/null +++ b/smlnj-lib/PP/devices/default-device-fn.sml @@ -0,0 +1,122 @@ +(* default-device-fn.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * A functor that implements the device properties for a device + *) + +signature DEVICE_OPS = + sig + + type t + type style + + (* style operations *) + val sameStyle : (style * style) -> bool + val pushStyle : (t * style) -> unit + val popStyle : t -> unit + val defaultStyle : t -> style + + (* Output operations *) + val indent : (t * int) -> unit + val space : (t * int) -> unit + val newline : t -> unit + val string : (t * string) -> unit + val char : (t * char) -> unit + val flush : t -> unit + + end + +functor DefaultDeviceFn (D : DEVICE_OPS) : sig + + include PP_DEVICE + + structure DevOps : DEVICE_OPS + + (* create a new device with default properties *) + val new : DevOps.t -> device + + (* create a new device with the specified line width *) + val newWithWidth : DevOps.t * int -> device + + (* create a new device with the specified properties *) + val newWithProps : { + ops : DevOps.t, + maxDepth : int option, + ellipses : (string * int), + lineW : int option, + textW : int option, + maxIndent : int option + } -> device + + end = struct + + structure DevOps = D + + datatype device = Dev of { + ops : D.t, + depthLimit : int option ref, + ellipses : (string * int) ref, + lineWid : int option ref, + textWid : int option ref, + indentLimit : int option ref + } + + type style = D.style + + fun newWithProps {ops, maxDepth, ellipses, lineW, textW, maxIndent} = Dev{ + ops = ops, + depthLimit = ref maxDepth, + ellipses = ref ellipses, + lineWid = ref lineW, + textWid = ref textW, + indentLimit = ref maxIndent + } + + fun newWithWidth (ops, w) = newWithProps { + ops = ops, + maxDepth = NONE, + ellipses = ("...", 3), + lineW = SOME w, + textW = NONE, + maxIndent = NONE + } + + fun new ops = newWithProps { + ops = ops, + maxDepth = NONE, + ellipses = ("...", 3), + lineW = NONE, + textW = NONE, + maxIndent = NONE + } + + (* style operations *) + val sameStyle = D.sameStyle + fun pushStyle (Dev{ops, ...}, sty) = D.pushStyle (ops, sty) + fun popStyle (Dev{ops, ...}) = D.popStyle ops + fun defaultStyle (Dev{ops, ...}) = D.defaultStyle ops + + (* Output operations *) + fun indent (Dev{ops, ...}, n) = D.indent (ops, n) + fun space (Dev{ops, ...}, n) = D.space (ops, n) + fun newline (Dev{ops, ...}) = D.newline ops + fun string (Dev{ops, ...}, s) = D.string (ops, s) + fun char (Dev{ops, ...}, c) = D.char (ops, c) + fun flush (Dev{ops, ...}) = D.flush ops + + (* device properties *) + fun maxDepth (Dev{depthLimit, ...}) = !depthLimit + fun setMaxDepth (Dev{depthLimit, ...}, d) = depthLimit := d + fun ellipses (Dev{ellipses, ...}) = !ellipses + fun setEllipses (Dev{ellipses, ...}, s) = ellipses := (s, String.size s) + fun setEllipsesWithSz (Dev{ellipses, ...}, s, sz) = ellipses := (s, sz) + fun lineWidth (Dev{lineWid, ...}) = !lineWid + fun setLineWidth (Dev{lineWid, ...}, w) = lineWid := w + fun maxIndent (Dev{indentLimit, ...}) = !indentLimit + fun setMaxIndent (Dev{indentLimit, ...}, n) = indentLimit := n + fun textWidth (Dev{textWid, ...}) = !textWid + fun setTextWidth (Dev{textWid, ...}, n) = textWid := n + + end (* functor DefaultDeviceFn *) diff --git a/smlnj-lib/PP/devices/html3-dev.sml b/smlnj-lib/PP/devices/html3-dev.sml new file mode 100644 index 0000000..291afb6 --- /dev/null +++ b/smlnj-lib/PP/devices/html3-dev.sml @@ -0,0 +1,210 @@ +(* html3-dev.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * A pretty printing device that uses HTML (Version 3.2) markup to control layout. + *) + +structure HTML3Dev : sig + + include PP_DEVICE + + (* combine two styles into one *) + val combineStyle : (style * style) -> style + + (* unstyled text *) + val styleNONE : style + + (* standard HTML text styles *) + val styleTT : style + val styleI : style + val styleB : style + val styleU : style + val styleSTRIKE : style + val styleEM : style + val styleSTRONG : style + val styleDFN : style + val styleCODE : style + val styleSAMP : style + val styleKBD : style + val styleVAR : style + val styleCITE : style + + (* color text (using FONT element) *) + val color : string -> style + + (* hyper-text links and anchors *) + val link : string -> style + val anchor : string -> style + val linkAnchor : {name : string, href : string} -> style + + val openDev : {wid : int, textWid : int option} -> device + val done : device -> HTML.text + + end = struct + + datatype style + = NOEMPH + | TT | I | B | U | STRIKE | EM + | STRONG | DFN | CODE | SAMP | KBD + | VAR | CITE + | COLOR of string + | A of {href : string option, name : string option} + | STYS of style list + + datatype device = DEV of { + lineWid : int option ref, + textWid : int option ref, + emphStk : (HTML.text list * style) list ref, + txt : HTML.text list ref + } + + (* return the current emphasis *) + fun curEmph (DEV{emphStk, ...}) = (case !emphStk + of [] => NOEMPH + | ((_, em)::r) => em + (* end case *)) + + (* add PCDATA to the text list *) + fun pcdata (DEV{txt, ...}, s) = txt := HTML.PCDATA s :: !txt + + (* replace the sequence of PCDATA elements at the head of the + * txt list with its concatenation. + *) + fun concatTxt (DEV{txt, ...}) = let + fun f ([], []) = [] + | f (HTML.PCDATA s :: r, l) = f (r, s::l) + | f (r, l) = HTML.PCDATA(String.concat l) :: r + in + f (!txt, []) + end + + (* are two styles the same? *) + fun sameStyle (s1 : style, s2) = (s1 = s2) + + fun wrapStyle (sty, [], tl') = tl' + | wrapStyle (sty, tl, tl') = let + fun wrap (NOEMPH, t) = t + | wrap (TT, t) = HTML.TT t + | wrap (I, t) = HTML.I t + | wrap (B, t) = HTML.B t + | wrap (U, t) = HTML.U t + | wrap (STRIKE, t) = HTML.STRIKE t + | wrap (EM, t) = HTML.EM t + | wrap (STRONG, t) = HTML.STRONG t + | wrap (DFN, t) = HTML.DFN t + | wrap (CODE, t) = HTML.CODE t + | wrap (SAMP, t) = HTML.SAMP t + | wrap (KBD, t) = HTML.KBD t + | wrap (VAR, t) = HTML.VAR t + | wrap (CITE, t) = HTML.CITE t + | wrap (COLOR c, t) = HTML.FONT{color=SOME c, size=NONE, content=t} + | wrap (A{name, href}, t) = HTML.A{ + name = name, href = href, + rel = NONE, rev = NONE, title = NONE, + content = t + } + | wrap (STYS l, t) = List.foldr wrap t l + val t = (case tl of [t] => t | _ => HTML.TextList(List.rev tl)) + in + wrap(sty, t) :: tl' + end + + (* push/pop a style from the devices style stack. A pop on an + * empty style stack is a nop. + *) + fun pushStyle (dev as DEV{emphStk, txt, ...}, sty) = ( + emphStk := (concatTxt dev, sty) :: !emphStk; + txt := []) + fun popStyle (DEV{emphStk as ref[], ...}) = () + | popStyle (dev as DEV{emphStk as ref ((tl, sty) :: r), txt, ...}) = ( + txt := wrapStyle (sty, concatTxt dev, tl); + emphStk := r) + + (* the default style for the device (this is the current style, + * if the style stack is empty). + *) + fun defaultStyle _ = NOEMPH + + (* maximum printing depth (in terms of boxes) *) + fun maxDepth _ = NONE + fun setMaxDepth _ = () +(* DEPRECATED *) + val depth = maxDepth + + (* the sized string to print in place of boxes when the maximum depth is reached. *) + fun ellipses _ = ("", 0) + fun setEllipses _ = () + fun setEllipsesWithSz _ = () + + (* the width of the device *) + fun lineWidth (DEV{lineWid, ...}) = !lineWid + fun setLineWidth (DEV{lineWid, ...}, w) = lineWid := w + + (* the suggested maximum width of indentation; `NONE` is interpreted as no limit. *) + fun maxIndent _ = NONE + fun setMaxIndent _ = () + + (* the suggested maximum width of text on a line *) + fun textWidth (DEV{textWid, ...}) = !textWid + fun setTextWidth (DEV{textWid, ...}, w) = textWid := w + + (* output some number of spaces to the device *) + fun space (dev, n) = + pcdata(dev, concat(List.tabulate (n, fn _ => " "))) + + (* output an indentation of the given width to the device *) + val indent = space + + (* output a new-line to the device *) + fun newline (dev as DEV{txt, ...}) = + txt := HTML.BR{clear=NONE} :: (concatTxt dev) + + (* output a string/character in the current style to the device *) + val string = pcdata + fun char (dev, c) = pcdata(dev, str c) + + (* flush is a nop for us *) + fun flush _ = () + + fun combineStyle (NOEMPH, sty) = sty + | combineStyle (sty, NOEMPH) = sty + | combineStyle (STYS l1, STYS l2) = STYS(l1 @ l2) + | combineStyle (sty, STYS l) = STYS(sty::l) + | combineStyle (sty1, sty2) = STYS[sty1, sty2] + + val styleNONE = NOEMPH + val styleTT = TT + val styleI = I + val styleB = B + val styleU = U + val styleSTRIKE = STRIKE + val styleEM = EM + val styleSTRONG = STRONG + val styleDFN = DFN + val styleCODE = CODE + val styleSAMP = SAMP + val styleKBD = KBD + val styleVAR = VAR + val styleCITE = CITE + val color = COLOR + fun link s = A{href=SOME s, name=NONE} + fun anchor s = A{href=NONE, name=SOME s} + fun linkAnchor {name, href} = A{href=SOME href, name = SOME name} + + fun openDev {wid, textWid} = DEV{ + txt = ref [], + emphStk = ref [], + lineWid = ref (SOME wid), + textWid = ref textWid + } + + fun done (dev as DEV{emphStk = ref [], txt, ...}) = (case (concatTxt dev) + of [t] => (txt := []; t) + | l => (txt := []; HTML.TextList(List.rev l)) + (* end case *)) + | done _ = raise Fail "device is not done yet" + + end; (* HTMLDev *) + diff --git a/smlnj-lib/PP/devices/simple-textio-dev.sml b/smlnj-lib/PP/devices/simple-textio-dev.sml new file mode 100644 index 0000000..1e1741f --- /dev/null +++ b/smlnj-lib/PP/devices/simple-textio-dev.sml @@ -0,0 +1,43 @@ +(* simple-textio-dev.sml + * + * COPYRIGHT (c) 2018 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * A simple (no styles) pretty-printing device for output to TextIO outstreams. + *) + +structure SimpleTextIODev : sig + + include PP_DEVICE + + val openDev : {dst : TextIO.outstream, wid : int} -> device + + end = struct + + structure DevOps = struct + type t = TextIO.outstream + (* no style support *) + type style = unit + fun sameStyle _ = true + fun pushStyle _ = () + fun popStyle _ = () + fun defaultStyle _ = () + (* output some number of spaces to the device *) + fun space (dst, n) = TextIO.output (dst, StringCvt.padLeft #" " n "") + val indent = space + (* output a new-line to the device *) + fun newline dst = TextIO.output1 (dst, #"\n") + (* output a string/character in the current style to the device *) + fun string (dst, s) = TextIO.output (dst, s) + fun char (dst, c) = TextIO.output1 (dst, c) + (* flush output stream *) + fun flush dst = TextIO.flushOut dst + end + + structure Device = DefaultDeviceFn (DevOps) + + open Device + + fun openDev {dst, wid} = Device.newWithWidth (dst, wid) + + end; diff --git a/smlnj-lib/PP/devices/sources.cm b/smlnj-lib/PP/devices/sources.cm new file mode 100644 index 0000000..c6b3b75 --- /dev/null +++ b/smlnj-lib/PP/devices/sources.cm @@ -0,0 +1,34 @@ +(* sources.cm + * + * COPYRIGHT (c) 2018 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Sources file for Pretty printer device library. + *) + +Group + + structure ANSITermDev + structure ANSITermPP + structure CharBufferDev + structure CharBufferPP + structure SimpleTextIODev + structure StringToken + structure TextIOPP + structure TextPP + +is + + $/basis.cm + $/smlnj-lib.cm + ../src/sources.cm + + ansi-term-dev.sml + ansi-term-pp.sml + char-buffer-dev.sml + char-buffer-pp.sml + default-device-fn.sml + simple-textio-dev.sml + string-token.sml + textio-pp.sml + text-pp.sml diff --git a/smlnj-lib/PP/devices/string-token.sml b/smlnj-lib/PP/devices/string-token.sml new file mode 100644 index 0000000..37a5415 --- /dev/null +++ b/smlnj-lib/PP/devices/string-token.sml @@ -0,0 +1,16 @@ +(* string-token.sml + * + * COPYRIGHT (c) 2018 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * A trivial implementation of tokens as strings w/o style information. + *) + +structure StringToken : PP_TOKEN = + struct + type style = unit + type token = string + fun string s = s + fun style _ = () + fun size s = String.size s + end diff --git a/smlnj-lib/PP/devices/text-pp.sml b/smlnj-lib/PP/devices/text-pp.sml new file mode 100644 index 0000000..cf4a142 --- /dev/null +++ b/smlnj-lib/PP/devices/text-pp.sml @@ -0,0 +1,74 @@ +(* text-pp.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * A pretty printer that generates plain text; either to a TextIO.outstream + * or to a CharBuffer.buf object. It essentially unifies the behavior of + * the TextIOPP and CharBufferPP structures. + *) + +structure TextPP : sig + + include PP_STREAM + where type token = string + + val openOutstream : {dst : TextIO.outstream, wid : int} -> stream + + val openBuffer : {dst : CharBuffer.buf, wid : int} -> stream + + end = struct + + structure DevOps = struct + datatype t = OPS of { + add1 : char -> unit, + addVec : string -> unit, + flush : unit -> unit + } + (* no style support *) + type style = unit + fun sameStyle _ = true + fun pushStyle _ = () + fun popStyle _ = () + fun defaultStyle _ = () + (* output some number of spaces to the device *) + fun space (OPS{addVec, ...}, n) = addVec (StringCvt.padLeft #" " n "") + val indent = space + (* output a new-line to the device *) + fun newline (OPS{add1, ...}) = add1 #"\n" + (* output a string/character in the current style to the device *) + fun string (OPS{addVec, ...}, s) = addVec s + fun char (OPS{add1, ...}, c) = add1 c + (* flush output *) + fun flush (OPS{flush, ...}) = flush() + end + + structure Device = DefaultDeviceFn (DevOps) + + structure PP = PPStreamFn ( + structure Token = StringToken + structure Device = Device) + + open PP + + fun openOutstream {dst, wid} = let + val dev = Device.newWithWidth (DevOps.OPS{ + add1 = fn c => TextIO.output1 (dst, c), + addVec = fn c => TextIO.output (dst, c), + flush = fn () => TextIO.flushOut dst + }, wid) + in + PP.openStream dev + end + + fun openBuffer {dst, wid} = let + val dev = Device.newWithWidth (DevOps.OPS{ + add1 = fn c => CharBuffer.add1 (dst, c), + addVec = fn c => CharBuffer.addVec (dst, c), + flush = fn () => () + }, wid) + in + PP.openStream dev + end + + end diff --git a/smlnj-lib/PP/devices/textio-pp.sml b/smlnj-lib/PP/devices/textio-pp.sml new file mode 100644 index 0000000..9b02ffc --- /dev/null +++ b/smlnj-lib/PP/devices/textio-pp.sml @@ -0,0 +1,28 @@ +(* textio-pp.sml + * + * COPYRIGHT (c) 2018 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * A pretty printer with TextIO output; there are no styles and + * tokens are atoms. + *) + +structure TextIOPP : sig + + include PP_STREAM + where type token = string + + val openOut : {dst : TextIO.outstream, wid : int} -> stream + + end = struct + + structure PP = PPStreamFn ( + structure Token = StringToken + structure Device = SimpleTextIODev) + + open PP + + fun openOut arg = openStream(SimpleTextIODev.openDev arg) + + end; + diff --git a/smlnj-lib/PP/examples/old-pp.sml b/smlnj-lib/PP/examples/old-pp.sml new file mode 100644 index 0000000..4367478 --- /dev/null +++ b/smlnj-lib/PP/examples/old-pp.sml @@ -0,0 +1,118 @@ +(* old-pp.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * An implementation of the SML/NJ's old PP interface on top of the PP library. + *) + +signature OLD_PRETTYPRINT = + sig + type ppstream + type ppconsumer = { + consumer : string -> unit, + linewidth : int, + flush : unit -> unit + } + + datatype break_style = CONSISTENT | INCONSISTENT + + exception PP_FAIL of string + + val mk_ppstream : ppconsumer -> ppstream + val dest_ppstream : ppstream -> ppconsumer + val add_break : ppstream -> int * int -> unit + val add_newline : ppstream -> unit + val add_string : ppstream -> string -> unit + val begin_block : ppstream -> break_style -> int -> unit + val end_block : ppstream -> unit + val clear_ppstream : ppstream -> unit + val flush_ppstream : ppstream -> unit + val with_pp : ppconsumer -> (ppstream -> unit) -> unit + val pp_to_string : int -> (ppstream -> 'a -> unit) -> 'a -> string + + end; + +structure OldPrettyPrint :> OLD_PRETTYPRINT = + struct + + type ppconsumer = { + consumer : string -> unit, + linewidth : int, + flush : unit -> unit + } + + structure Dev = + struct + type device = ppconsumer + type style = unit + fun sameStyle _ = true + fun pushStyle _ = () + fun popStyle _ = () + fun defaultStyle _ = () + fun maxDepth _ = NONE + fun setMaxDepth _ = () + fun ellipses _ = ("", 0) + fun setEllipses _ = () + fun setEllipsesWithSz _ = () + fun lineWidth {consumer, linewidth, flush} = SOME linewidth + fun setLineWidth _ = () + fun maxIndent _ = NONE + fun setMaxIndent _ = () + fun textWidth _ = NONE + fun setTextWidth _ = () + fun space ({consumer, linewidth, flush}, n) = + consumer (StringCvt.padLeft #" " n "") + val indent = space + fun newline {consumer, linewidth, flush} = consumer "\n" + fun string ({consumer, linewidth, flush}, s) = consumer s + fun char ({consumer, linewidth, flush}, c) = consumer(str c) + fun flush {consumer, linewidth, flush} = flush() + end + + structure PP = PPStreamFn(structure Token = StringToken structure Device = Dev) + + datatype ppstream = STRM of { + consumer : ppconsumer, + strm : PP.stream + } + + datatype break_style = CONSISTENT | INCONSISTENT + + exception PP_FAIL of string + + fun mk_ppstream ppc = STRM{ + consumer = ppc, + strm = PP.openStream ppc + } + fun dest_ppstream (STRM{consumer, ...}) = consumer + fun add_break (STRM{strm, ...}) (nsp, offset) = + PP.break strm {nsp=nsp, offset=offset} + fun add_newline (STRM{strm, ...}) = PP.newline strm + fun add_string (STRM{strm, ...}) s = PP.string strm s + fun begin_block (STRM{strm, ...}) CONSISTENT indent = + PP.openHVBox strm (PP.Rel indent) + | begin_block (STRM{strm, ...}) INCONSISTENT indent = + PP.openHOVBox strm (PP.Rel indent) + fun end_block (STRM{strm, ...}) = PP.closeBox strm + fun clear_ppstream(STRM{strm, ...}) = + raise Fail "clear_ppstream not implemented" + fun flush_ppstream (STRM{strm, ...}) = PP.flushStream strm + fun with_pp ppc f = let + val (ppStrm as (STRM{strm, ...})) = mk_ppstream ppc + in + f ppStrm; + PP.closeStream strm + end + fun pp_to_string wid ppFn obj = let + val l = ref ([] : string list) + fun attach s = l := s :: !l + in + with_pp { + consumer = attach, linewidth = wid, flush = fn()=>() + } (fn ppStrm => ppFn ppStrm obj); + String.concat(List.rev(!l)) + end + + end; + diff --git a/smlnj-lib/PP/pp-extras-lib.cm b/smlnj-lib/PP/pp-extras-lib.cm new file mode 100644 index 0000000..de7f903 --- /dev/null +++ b/smlnj-lib/PP/pp-extras-lib.cm @@ -0,0 +1,38 @@ +(* pp-extras-lib.cm + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * CM file for compiling the PP Library with the extra devices. + *) + +Library + + signature PP_DESC + signature PP_DEVICE + signature PP_TOKEN + signature PP_STREAM + + functor PPStreamFn + functor PPDescFn + functor PPDebugFn + +(* devices *) + structure TextIOPP + structure TextPP + structure SimpleTextIODev + structure CharBufferPP + structure ANSITermDev + structure ANSITermPP + structure StringToken + +(* extras *) + structure HTML3Dev + +is + + $/basis.cm + $/html-lib.cm + $/pp-lib.cm + + devices/html3-dev.sml diff --git a/smlnj-lib/PP/pp-lib.cm b/smlnj-lib/PP/pp-lib.cm new file mode 100644 index 0000000..2332fc1 --- /dev/null +++ b/smlnj-lib/PP/pp-lib.cm @@ -0,0 +1,31 @@ +(* pp-lib.cm + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * The main sources file for the PP library. + *) + +Library + + signature PP_DESC + signature PP_DEVICE + signature PP_TOKEN + signature PP_STREAM + + functor PPStreamFn + functor PPDescFn + functor PPDebugFn + +(* devices *) + structure TextIOPP + structure TextPP + structure SimpleTextIODev + structure CharBufferPP + structure ANSITermDev + structure ANSITermPP + structure StringToken + +is + src/sources.cm + devices/sources.cm diff --git a/smlnj-lib/PP/src/FORMAT b/smlnj-lib/PP/src/FORMAT new file mode 100644 index 0000000..5abe351 --- /dev/null +++ b/smlnj-lib/PP/src/FORMAT @@ -0,0 +1,18 @@ +%h{ -- open hbox +%v{ -- open vbox +%hov{ -- open hov box +%hv{ -- open hv box +%b{ -- open box +%} -- close box + +%t -- token +%d -- integer +%s -- string +%c -- character +%b -- boolean + + -- space n +\n -- newline +%, -- cut +%; -- break +%_ -- non-breakable space diff --git a/smlnj-lib/PP/src/pp-debug-fn.sml b/smlnj-lib/PP/src/pp-debug-fn.sml new file mode 100644 index 0000000..635be9e --- /dev/null +++ b/smlnj-lib/PP/src/pp-debug-fn.sml @@ -0,0 +1,63 @@ +(* pp-debug-fn.sml + * + * COPYRIGHT (c) 2017 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * A wrapper for the PPStreamFn, which dumps the current PP state prior + * to each operation. + *) + +functor PPDebugFn (PP : sig + include PP_STREAM + val dump : (TextIO.outstream * stream) -> unit + end) : sig + include PP_STREAM + val debugStrm : TextIO.outstream ref + end = struct + + type device = PP.device + type stream = PP.stream + type token = PP.token + type style = PP.style + datatype indent = datatype PP.indent + + val debugStrm = ref TextIO.stdErr + + fun debug name f strm arg = ( + TextIO.output(!debugStrm, concat["*** ", name, ": "]); + PP.dump (!debugStrm, strm); + TextIO.flushOut(!debugStrm); + f strm arg) + fun debug' name f strm = ( + TextIO.output(!debugStrm, concat["*** ", name, ": "]); + PP.dump (!debugStrm, strm); + TextIO.flushOut(!debugStrm); + f strm) + + val openStream = PP.openStream + val flushStream = debug' "flushStream" PP.flushStream + val closeStream = debug' "closeStream" PP.closeStream + val getDevice = PP.getDevice + + val openHBox = debug' "openHBox" PP.openHBox + val openVBox = debug "openVBox" PP.openVBox + val openHVBox = debug "openHVBox" PP.openHVBox + val openHOVBox = debug "openHOVBox" PP.openHOVBox + val openBox = debug "openBox" PP.openBox + val closeBox = debug' "closeBox" PP.closeBox + + val token = debug "token" PP.token + val string = debug "string" PP.string + + val pushStyle = PP.pushStyle + val popStyle = PP.popStyle + + val break = debug "break" PP.break + val space = debug "space" PP.space + val cut = debug' "cut" PP.cut + val newline = debug' "newline" PP.newline + val nbSpace = debug "nbSpace" PP.nbSpace + val control = debug "control" PP.control + + end; + diff --git a/smlnj-lib/PP/src/pp-desc-fn.sml b/smlnj-lib/PP/src/pp-desc-fn.sml new file mode 100644 index 0000000..35ecc78 --- /dev/null +++ b/smlnj-lib/PP/src/pp-desc-fn.sml @@ -0,0 +1,72 @@ +(* pp-desc-fn.sml + * + * COPYRIGHT (c) 2017 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This functor implements a declarative way to specify pretty-printing + * (see pp-desc-sig.sml). + *) + +functor PPDescFn (S : PP_STREAM) : PP_DESC = + struct + + structure PPS = S + + type token = PPS.token + type style = PPS.style + type indent = PPS.indent + + (* The pp_desc type is a concrete representation of a PP layout. *) + datatype pp_desc + = HBox of pp_desc list + | VBox of (indent * pp_desc list) + | HVBox of (indent * pp_desc list) + | HOVBox of (indent * pp_desc list) + | Box of (indent * pp_desc list) + | Token of token + | String of string + | Style of (style * pp_desc list) + | Break of {nsp : int, offset : int} + | NewLine + | NBSpace of int + | Control of (PPS.device -> unit) + + (* pretty print a description *) + fun description (strm, ppd) = let + fun pp (HBox l) = (PPS.openHBox strm; ppList l; PPS.closeBox strm) + | pp (VBox(i, l)) = (PPS.openVBox strm i; ppList l; PPS.closeBox strm) + | pp (HVBox(i, l)) = (PPS.openHVBox strm i; ppList l; PPS.closeBox strm) + | pp (HOVBox(i, l)) = (PPS.openHOVBox strm i; ppList l; PPS.closeBox strm) + | pp (Box(i, l)) = (PPS.openBox strm i; ppList l; PPS.closeBox strm) + | pp (Token tok) = PPS.token strm tok + | pp (String s) = PPS.string strm s + | pp (Style(sty, l)) = ( + PPS.pushStyle(strm, sty); ppList l; PPS.popStyle strm) + | pp (Break brk) = PPS.break strm brk + | pp NewLine = PPS.newline strm + | pp (NBSpace n) = PPS.nbSpace strm n + | pp (Control ctlFn) = PPS.control strm ctlFn + and ppList [] = () + | ppList (item::r) = (pp item; ppList r) + in + pp ppd + end + + (* exported PP description constructors *) + val hBox = HBox + val vBox = VBox + val hvBox = HVBox + val hovBox = HOVBox + val box = Box + val token = Token + val string = String + val style = Style + val break = Break + fun space n = Break{nsp = n, offset = 0} + val cut = Break{nsp = 0, offset = 0} + val newline = NewLine + val nbSpace = NBSpace + val control = Control + + end; + diff --git a/smlnj-lib/PP/src/pp-desc-sig.sml b/smlnj-lib/PP/src/pp-desc-sig.sml new file mode 100644 index 0000000..b3dd661 --- /dev/null +++ b/smlnj-lib/PP/src/pp-desc-sig.sml @@ -0,0 +1,37 @@ +(* pp-desc-sig.sml + * + * COPYRIGHT (c) 2017 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This interface provides a declarative way to specify pretty-printing. + *) + +signature PP_DESC = + sig + structure PPS : PP_STREAM + + type pp_desc + + val hBox : pp_desc list -> pp_desc + val vBox : (PPS.indent * pp_desc list) -> pp_desc + val hvBox : (PPS.indent * pp_desc list) -> pp_desc + val hovBox : (PPS.indent * pp_desc list) -> pp_desc + val box : (PPS.indent * pp_desc list) -> pp_desc + + val token : PPS.token -> pp_desc + val string : string -> pp_desc + + val style : (PPS.style * pp_desc list) -> pp_desc + + val break : {nsp : int, offset : int} -> pp_desc + val space : int -> pp_desc + val cut : pp_desc + val newline : pp_desc + val nbSpace : int -> pp_desc + + val control : (PPS.device -> unit) -> pp_desc + + val description : PPS.stream * pp_desc -> unit + + end + diff --git a/smlnj-lib/PP/src/pp-device-sig.sml b/smlnj-lib/PP/src/pp-device-sig.sml new file mode 100644 index 0000000..514a096 --- /dev/null +++ b/smlnj-lib/PP/src/pp-device-sig.sml @@ -0,0 +1,93 @@ +(* pp-device-sig.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * A pretty-printer device is an abstraction of an output stream. + *) + +signature PP_DEVICE = + sig + + type device + (* a device is an abstraction of an output stream. *) + + type style + (* an abstraction of font and color information. For devices that + * support styled text, they should maintain a stack of styles, with + * the top of stack being the "current" style. Implementers of this + * signature should extend it with functions for creating style values. + *) + + + (***** Style operations *****) + + val sameStyle : (style * style) -> bool + (* are two styles the same? *) + + val pushStyle : (device * style) -> unit + val popStyle : device -> unit + (* push/pop a style from the devices style stack. A pop on an + * empty style stack is a nop. + *) + + val defaultStyle : device -> style + (* the default style for the device (this is the current style, + * if the style stack is empty). + *) + + + (***** Device properties ***** + ** + ** Note that the pretty-printer stream may cache these values, so that + ** changing them mid-flight may not affect existing pretty-printing + ** streams. Devices may also not support various features, in which + ** case, the `set` functions are no-ops. + **) + + (* the maximum printing depth (in number of open boxes); `NONE` is + * interpreted as no limit. + *) + val maxDepth : device -> int option + val setMaxDepth : device * int option -> unit + + (* the sized string to print in place of boxes when the maximum depth is reached. *) + val ellipses : device -> string * int + val setEllipses : device * string -> unit + val setEllipsesWithSz : device * string * int -> unit + + (* the width of the line for the device; `NONE` is infinite *) + val lineWidth : device -> int option + val setLineWidth : device * int option -> unit + + (* the suggested maximum width of indentation; `NONE` is interpreted as no limit. *) + val maxIndent : device -> int option + val setMaxIndent : device * int option -> unit + + (* the suggested maximum width of text on a line (i.e., not counting indentation). + * `NONE` is interpreted as no limit. + * NOTE: the pretty printer currently ignores this value. + *) + val textWidth : device -> int option + val setTextWidth : device * int option -> unit + + + (***** Output operations *****) + + val indent : (device * int) -> unit + (* output an indentation of the given width to the device *) + + val space : (device * int) -> unit + (* output some number of spaces to the device *) + + val newline : device -> unit + (* output a new-line to the device *) + + val string : (device * string) -> unit + val char : (device * char) -> unit + (* output a string/character in the current style to the device *) + + val flush : device -> unit + (* if the device is buffered, then flush any buffered output *) + + end; diff --git a/smlnj-lib/PP/src/pp-stream-fn.sml b/smlnj-lib/PP/src/pp-stream-fn.sml new file mode 100644 index 0000000..59a7afd --- /dev/null +++ b/smlnj-lib/PP/src/pp-stream-fn.sml @@ -0,0 +1,569 @@ +(* pp-stream-fn.sml + * + * COPYRIGHT (c) 2017 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * The implementation of PP streams, where all the action is. + *) + +functor PPStreamFn ( + structure Token : PP_TOKEN + structure Device : PP_DEVICE + sharing type Token.style = Device.style +(** + ) : PP_STREAM = +**) + ) : sig + include PP_STREAM + val dump : (TextIO.outstream * stream) -> unit + end = struct + + structure D = Device + structure T = Token + structure Q = Queue + + (* imperative stacks *) + structure Stk :> sig + type 'a t + val new : unit -> 'a t (* create a new stack *) + val clear : 'a t -> unit (* reset the stack to empty *) + val push : 'a t * 'a -> unit (* push an item *) + val pop : 'a t -> 'a option (* pop an item (`NONE` on empty) *) + val top : 'a t -> 'a option (* top of stack or `NONE` *) + val discard : 'a t -> unit (* discard top element (or nop on empty) *) + val toList : 'a t -> 'a list (* list of items; top first *) + end = struct + type 'a t = 'a list ref + fun new () : 'a t = ref[] + fun clear (stk : 'a t) = stk := [] + fun push (stk : 'a t, x) = stk := x :: !stk + fun pop (stk : 'a t) = (case !stk + of [] => NONE + | x::r => (stk := r; SOME x) + (* end case *)) + fun top (stk : 'a t) = (case !stk of [] => NONE | x::_ => SOME x) + fun discard (stk : 'a t) = (case !stk of [] => () | _::r => stk := r) + fun toList (stk : 'a t) = !stk + end + + type device = D.device + type token = T.token + type style = T.style + + datatype indent + = Abs of int (* indent relative to outer indentation *) + | Rel of int (* indent relative to start of box *) + + (**** DATA STRUCTURES ****) + + (* tokens represent pending pretty-printing operations in the queue *) + datatype pp_token + = TEXT of string (* raw text. This includes tokens. The *) + (* width and style information is taken *) + (* care of when they are inserted in *) + (* queue. *) + | NBSP of int (* some number of non-breakable spaces *) + | BREAK of { (* a potential line break *) + nsp : int, (* width of whitespace used if there is no break *) + offset : int (* indentation offset of next line if there is a break *) + } + | BEGIN of { (* the beginning of a box *) + indent : indent, (* the box's indentation mode and width *) + ty : box_ty (* the type of box *) + } + | END (* the end of a box *) + | PUSH_STYLE of style (* push a style on the style stack *) + | POP_STYLE (* pop a style off of the style stack *) + | NL (* hard newline *) + | IF_NL (* [unimplemented] *) + | CTL of (device -> unit) (* device control operation *) + + (* the types of boxes *) + and box_ty + = HBOX (* horizontal box: breaks map to spaces *) + | VBOX (* vertical box: break map to newlines *) + | HVBOX (* horizontal/vertical box: like an HBOX if the stuff fits, + * otherwise like a VBOX. + *) + | HOVBOX (* packing box: breaks are converted to spaces when they + * fix; otherwise line breaks are introduced. + *) + | BOX (* structural box: like a packing box, but breaks are mapped + * to newlines when the result of doing so would move the + * indent to the left. + *) + | FITS (* internal marker for boxes that have been determined to + * fit on the current line. + *) + + type pp_queue_elem = { (* elements of the PP queue *) + tok : pp_token, (* the element *) + sz : int ref, (* size of block (set when known) *) + len : int (* the display length of the token for strings and breaks; + * all other tokens have len = 0. + *) + } + + datatype stream = PP of { + dev : device, (* the underlying device *) + closed : bool ref, (* set to true, when the stream is *) + (* closed *) + width : int, (* the width of the device *) + maxIndent : int, (* the maximum indentation allowed *) + maxDepth : int, (* maximum nesting depth of open boxes *) + spaceLeft : int ref, (* space left on current line *) + curIndent : int ref, (* current indentation *) + curDepth : int ref, (* current nesting level of boxes. *) + leftTot : int ref, (* total width of tokens already printed *) + rightTot : int ref, (* total width of tokens ever inserted *) + (* into the queue. *) + newline : bool ref, (* `true` when we are at the start of a new line *) + queue : pp_queue_elem Q.queue, (* the queue of pending tokens *) + fmtStk : (box_ty * int) Stk.t, (* active blocks. The int is the indentation *) + (* of the block *) + scanStk + : (int * pp_queue_elem) Stk.t, + styleStk : style Stk.t + } + + (**** DEBUGGING FUNCTIONS ****) + structure F = Format + fun boxTypeToString HBOX = "HBOX" + | boxTypeToString VBOX = "VBOX" + | boxTypeToString HVBOX = "HVBOX" + | boxTypeToString HOVBOX = "HOVBOX" + | boxTypeToString BOX = "BOX" + | boxTypeToString FITS = "FITS" + fun indentToString (Abs n) = concat["Abs ", Int.toString n] + | indentToString (Rel n) = concat["Rel ", Int.toString n] + fun tokToString (TEXT s) = concat["TEXT \"", String.toString s, "\""] + | tokToString (NBSP n) = concat["NBSP ", Int.toString n] + | tokToString (BREAK{nsp, offset}) = + F.format "BREAK{nsp=%d, offset=%d}" [F.INT nsp, F.INT offset] + | tokToString (BEGIN{indent, ty}) = F.format "BEGIN{indent=%s, ty=%s}" [ + F.STR(indentToString indent), F.STR(boxTypeToString ty) + ] + | tokToString END = "END" + | tokToString (PUSH_STYLE _) = "PUSH_STYLE _" + | tokToString POP_STYLE = "POP_STYLE" + | tokToString NL = "NL" + | tokToString IF_NL = "IF_NL" + | tokToString (CTL f) = "CTL _" + fun qelemToString {tok, sz, len} = F.format "{tok=%s, sz=%d, len=%d}" [ + F.STR(tokToString tok), F.INT(!sz), F.INT len + ] + fun scanElemToString (n, elem) = + F.format "(%d, %s)" [F.INT n, F.STR(qelemToString elem)] + fun fmtElemToString (ty, n) = + F.format "(%s, %d)" [F.STR(boxTypeToString ty), F.INT n] + fun dump (outStrm, PP pp) = let + fun pr s = TextIO.output(outStrm, s) + fun prf (fmt, items) = pr(F.format fmt items) + fun prl fmtElem [] = pr "[]" + | prl fmtElem l = pr(ListFormat.fmt { + init = "[\n ", final = "]", sep = "\n ", fmt = fmtElem + } l) + in + pr ("BEGIN\n"); + prf (" width = %3d, spaceLeft = %3d\n", [ + F.INT(#width pp), F.INT(!(#spaceLeft pp)) + ]); + prf (" curIndent = %3d, curDepth = %3d\n", [ + F.INT(!(#curIndent pp)), F.INT(!(#curDepth pp)) + ]); + prf (" leftTot = %3d, rightTot = %3d\n", [ + F.INT(!(#leftTot pp)), F.INT(!(#rightTot pp)) + ]); + pr " queue = "; prl qelemToString (Q.contents(#queue pp)); pr "\n"; + pr " fmtStk = "; prl fmtElemToString (Stk.toList(#fmtStk pp)); pr "\n"; + pr " scanStk = "; prl scanElemToString (Stk.toList(#scanStk pp)); pr "\n"; + pr ("END\n") + end + + (**** UTILITY FUNCTIONS ****) + + (* use as a limit value for when the device does not specify a limit *) + val infinity = (case Int.maxInt of SOME n => n-1 | _ => 1000000) + + (* output text to the device; note that the size is specified separately, + * since it might be different from the actual string length (e.g., UTF8 + * multibyte characters) + *) + fun output (_, "", 0) = () + | output (PP{dev, spaceLeft, newline, ...}, s, sz) = ( + spaceLeft := !spaceLeft - sz; + newline := false; + D.string(dev, s)) + + (* output a newline to the device *) + fun outputNL (PP{dev, newline, ...}) = ( + newline := true; + D.newline dev) + +(* TODO: add `indent` function to device API *) + (* output indentation to the device *) + fun outputIndent (_, 0) = () + | outputIndent (PP{dev, ...}, n) = D.space (dev, n) + + (* output non-indent spaces to the device *) + fun blanks (_, 0) = () + | blanks (PP{dev, ...}, n) = D.space (dev, n) + + (* add a token to the pretty-printer queue *) + fun enqueueTok (PP{rightTot, queue, ...}, tok) = ( + rightTot := !rightTot + #len tok; + Q.enqueue(queue, tok)) + + (* format a break as a newline; indenting the new line. + * strm -- PP stream + * offset -- the extra indent amount supplied by the break + * wid -- the remaining line width at the opening of the + * innermost enclosing box. + *) + fun breakNewLine (strm, offset, wid) = let + val PP{width, maxIndent, curIndent, spaceLeft, ...} = strm + (* limit indentation to maximum amount *) + val indent = Int.min(maxIndent, (width - wid) + offset) + in + outputNL strm; + curIndent := indent; + spaceLeft := width - indent; + outputIndent (strm, indent) + end + + (* format a break as spaces. + * strm -- PP stream + * nsp -- number of spaces to output. + *) + fun breakSameLine (strm as PP{spaceLeft, ...}, nsp) = ( + spaceLeft := !spaceLeft - nsp; + blanks (strm, nsp)) + + (* force a line break when opening a box would make the indentation larger than + * the limit. + *) + fun forceLineBreak (strm as PP{fmtStk, spaceLeft, ...}) = (case Stk.top fmtStk + of SOME(ty, wid) => if (wid > !spaceLeft) + then (case ty + of (FITS | HBOX) => () + | _ => breakNewLine (strm, 0, wid) + (* end case *)) + else () + | NONE => outputNL strm + (* end case *)) + + (* skip a token *) + fun skip (PP{queue, leftTot, spaceLeft, ...}) = (case Q.next queue + of NONE => () + | SOME{tok, sz, len} => ( + leftTot := !leftTot - len; + spaceLeft := !spaceLeft + !sz) + (* end case *)) + + (* return the current style of the PP stream *) + fun currentStyle (PP{styleStk, dev, ...}) = (case Stk.top styleStk + of NONE => D.defaultStyle dev + | SOME sty => sty + (* end case *)) + + (**** FORMATTING ****) + + (* `format (strm, sz, tok)` formats a PP token that has the specified size *) + fun format (strm, sz, tok) = ( + case tok + of (TEXT s) => output (strm, s, sz) + | (NBSP n) => let + val PP{spaceLeft, ...} = strm + in + spaceLeft := !spaceLeft - sz; + blanks (strm, n) + end + | (BREAK{nsp, offset}) => let + val PP{fmtStk, spaceLeft, width, curIndent, newline, ...} = strm + in + case Stk.top fmtStk + of SOME(HBOX, wid) => breakSameLine (strm, nsp) + | SOME(VBOX, wid) => breakNewLine (strm, offset, wid) + | SOME(HVBOX, wid) => breakNewLine (strm, offset, wid) + | SOME(HOVBOX, wid) => if (sz > !spaceLeft) + then breakNewLine (strm, offset, wid) + else breakSameLine (strm, nsp) + | SOME(BOX, wid) => + if !newline + then breakSameLine (strm, nsp) + else if (sz > !spaceLeft) + then breakNewLine (strm, offset, wid) + else if (!curIndent > (width - wid) + offset) + then breakNewLine (strm, offset, wid) + else breakSameLine (strm, nsp) + | SOME(FITS, wid) => breakSameLine (strm, nsp) + | NONE => () (* no open box *) + (* end case *) + end + | (BEGIN{indent, ty}) => let + val PP{maxIndent, curIndent, spaceLeft, width, fmtStk, ...} = strm + val _ = if (width - !spaceLeft) > maxIndent + then forceLineBreak strm + else () + val spaceLeft' = !spaceLeft + (* compute offset from right margin of this block's indent *) + val offset = (case indent + of (Rel off) => spaceLeft' - off + | (Abs off) => (case Stk.top fmtStk + of SOME(_, wid) => wid - off + | NONE => width - (!curIndent + off) + (* end case *)) + (* end case *)) + val ty' = (case ty + of VBOX => VBOX + | _ => if (sz > spaceLeft') then ty else FITS + (* end case *)) + in + Stk.push (fmtStk, (ty', offset)) + end + | END => let + val PP{fmtStk, ...} = strm + in + Stk.discard fmtStk + end + | (PUSH_STYLE sty) => let + val PP{dev, ...} = strm + in + D.pushStyle (dev, sty) + end + | POP_STYLE => let + val PP{dev, ...} = strm + in + D.popStyle dev + end + | NL => let + val PP{fmtStk, ...} = strm + in + case Stk.top fmtStk + of SOME(_, wid) => breakNewLine (strm, 0, wid) + | NONE => outputNL strm + (* end case *) + end + | IF_NL => let + val PP{newline, ...} = strm + in +(* NOTE: the Ocaml version tests if !curIndent = width - !spaceLeft, but the newline + * flag should be true in that case. + *) + if !newline then () else skip strm + end + | (CTL ctlFn) => let + val PP{dev, ...} = strm + in + ctlFn dev + end + (* end case *)) + + fun advanceLeft strm = let + val PP{spaceLeft, leftTot, rightTot, queue, ...} = strm + fun advance () = (case Q.peek queue + of (SOME{tok, sz=ref sz, len}) => + if ((sz >= 0) orelse (!rightTot - !leftTot >= !spaceLeft)) + then ( + ignore(Q.dequeue queue); + format (strm, if sz < 0 then infinity else sz, tok); + leftTot := len + !leftTot; + advance()) + else () + | NONE => () + (* end case *)) + in + advance () + end + + fun enqueueAndAdvance (strm, tok) = ( + enqueueTok (strm, tok); + advanceLeft strm) + + fun enqueueTokenWithLen (strm, tok, len) = + enqueueAndAdvance (strm, {sz = ref len, len = len, tok = tok}) + + fun enqueueStringWithLen (strm, s, len) = + enqueueTokenWithLen (strm, TEXT s, len) + + fun enqueueToken (strm, tok) = enqueueTokenWithLen (strm, tok, 0) + + (* the scan stack always has this element on its bottom *) + val scanStkBot = (~1, {sz = ref ~1, tok = TEXT "", len = 0}) + + (* clear the scan stack *) + fun clearScanStk (PP{scanStk, ...}) = ( + Stk.clear scanStk; + Stk.push(scanStk, scanStkBot)) + + (* Set the size of the element on the top of the scan stack. The isBreak + * flag is set to true for breaks and false for boxes. + *) + fun setSize (strm as PP{leftTot, rightTot, scanStk, ...}, isBreak) = ( + case Stk.top scanStk + of NONE => raise Fail "PPStreamFn:setSize: impossible: scanStk is empty" + | SOME(leftTot', elem) => + (* check for obsolete elements *) + if (leftTot' < !leftTot) + then clearScanStk strm + else (case (elem, isBreak) + of ({sz, tok=BREAK _, ...}, true) => ( + sz := !sz + !rightTot; + Stk.discard scanStk) + | ({sz, tok=BEGIN _, ...}, false) => ( + sz := !sz + !rightTot; + Stk.discard scanStk) + | _ => () + (* end case *)) + (* end case *)) + + fun pushScanElem (strm as PP{scanStk, rightTot, ...}, setSz, tok) = ( + enqueueTok (strm, tok); + if setSz then setSize (strm, true) else (); + Stk.push (scanStk, (!rightTot, tok))) + + (* Open a new box *) + fun ppOpenBox (strm, indent, boxTy) = let + val PP{dev, rightTot, maxDepth, curDepth, ...} = strm + in + curDepth := !curDepth + 1; + if (!curDepth < maxDepth) + then pushScanElem (strm, false, { + sz = ref(~(!rightTot)), + tok = BEGIN{indent=indent, ty=boxTy}, + len = 0 + }) + else if (!curDepth = maxDepth) + then let + val (s, len) = D.ellipses dev + in + enqueueStringWithLen (strm, s, len) + end + else () + end + + (* the root box, which is always open *) + fun openSysBox strm = ppOpenBox (strm, Rel 0, HOVBOX) + + (* close a box *) + fun ppCloseBox (strm as PP{maxDepth, curDepth as ref depth, ...}) = + if (depth <= 1) + then raise Fail "unmatched close box" + else if (depth < maxDepth) + then ( + enqueueTok (strm, {sz = ref 0, tok = END, len = 0}); + setSize (strm, true); + setSize (strm, false); + curDepth := depth-1) + else curDepth := depth-1 + + fun ppBreak (strm as PP{rightTot, ...}, arg) = ( + pushScanElem (strm, true, { + sz = ref(~(!rightTot)), tok = BREAK arg, len = #nsp arg + })) + + fun ppInit (strm as PP pp) = ( + Q.clear(#queue pp); + clearScanStk strm; + #spaceLeft pp := #width pp; + #curIndent pp := 0; + #curDepth pp := 0; + #leftTot pp := 1; + #rightTot pp := 1; + #newline pp := true; + Stk.clear (#fmtStk pp); + Stk.clear (#styleStk pp); + openSysBox strm) + + fun ppNewline strm = + enqueueAndAdvance (strm, {sz = ref 0, tok = NL, len = 0}) + + fun ppFlush (strm as PP{dev, curDepth, rightTot, ...}, withNL) = let + fun closeBoxes () = if (!curDepth > 1) + then (ppCloseBox strm; closeBoxes()) + else () + in + closeBoxes (); + rightTot := infinity; + advanceLeft strm; + if withNL then outputNL strm else (); + D.flush dev; + ppInit strm + end + + (**** USER FUNCTIONS ****) + fun openStream d = let + fun limit optInt = Option.getOpt(optInt, infinity) + val width = limit(D.lineWidth d) + val maxIndent = Int.min(limit(D.maxIndent d), width-1) + val maxDepth = Int.max(limit(D.maxIndent d), 2) + val strm = PP{ + dev = d, + closed = ref false, + width = width, + maxIndent = maxIndent, + maxDepth = maxDepth, + spaceLeft = ref 0, + curIndent = ref 0, + curDepth = ref 0, + leftTot = ref 1, (* why 1 ? *) + rightTot = ref 1, (* why 1 ? *) + newline = ref true, + queue = Q.mkQueue(), + fmtStk = Stk.new(), + scanStk = Stk.new(), + styleStk = Stk.new() + } + in + if (width < 0) orelse (maxIndent < 0) orelse (maxDepth < 0) + orelse (width < maxIndent) + then raise Size + else (); + ppInit strm; + strm + end + + fun flushStream strm = ppFlush(strm, false) + fun closeStream (strm as PP{closed, ...}) = (flushStream strm; closed := true) + fun getDevice (PP{dev, ...}) = dev + + fun openHBox strm = ppOpenBox (strm, Abs 0, HBOX) + fun openVBox strm indent = ppOpenBox (strm, indent, VBOX) + fun openHVBox strm indent = ppOpenBox (strm, indent, HVBOX) + fun openHOVBox strm indent = ppOpenBox (strm, indent, HOVBOX) + fun openBox strm indent = ppOpenBox (strm, indent, BOX) + fun closeBox strm = ppCloseBox strm + + fun token (strm as PP{dev, ...}) t = let + val tokStyle = T.style t + in + if (D.sameStyle(currentStyle strm, tokStyle)) + then enqueueStringWithLen (strm, T.string t, T.size t) + else ( + enqueueToken (strm, PUSH_STYLE tokStyle); + enqueueStringWithLen (strm, T.string t, T.size t); + enqueueToken (strm, POP_STYLE)) + end + fun string strm s = enqueueStringWithLen(strm, s, size s) + + fun pushStyle (strm as PP{styleStk, ...}, sty) = ( + if (D.sameStyle(currentStyle strm, sty)) + then () + else enqueueToken (strm, PUSH_STYLE sty); + Stk.push (styleStk, sty)) + fun popStyle (strm as PP{styleStk, ...}) = (case Stk.pop styleStk + of NONE => raise Fail "PP: unmatched popStyle" + | SOME sty => if (D.sameStyle(currentStyle strm, sty)) + then () + else enqueueToken (strm, POP_STYLE) + (* end case *)) + + fun break strm arg = ppBreak (strm, arg) + fun space strm n = break strm {nsp=n, offset=0} + fun cut strm = break strm {nsp=0, offset=0} + fun newline strm = ppNewline strm + fun nbSpace strm n = enqueueTokenWithLen (strm, NBSP n, n) + + fun control strm ctlFn = enqueueToken (strm, CTL ctlFn) + + end diff --git a/smlnj-lib/PP/src/pp-stream-sig.sml b/smlnj-lib/PP/src/pp-stream-sig.sml new file mode 100644 index 0000000..329e414 --- /dev/null +++ b/smlnj-lib/PP/src/pp-stream-sig.sml @@ -0,0 +1,81 @@ +(* pp-stream-sig.sml + * + * COPYRIGHT (c) 2017 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This interface provides a output stream interface to pretty printing. + *) + +signature PP_STREAM = + sig + type device + type stream + + type token + (* tokens are an abstraction of strings (allowing for different + * widths and style information). + *) + type style + + datatype indent + = Abs of int (* indent relative to outer indentation *) + | Rel of int (* indent relative to start of box *) + + val openStream : device -> stream + val flushStream : stream -> unit + val closeStream : stream -> unit + val getDevice : stream -> device + + (* open an horizontal box (HBox); breaks and spaces are mapped to spaces, + * even if the line width is exceeded. + *) + val openHBox : stream -> unit + + (* open a vertical box (VBox); breaks and spaces are mapped to newlines *) + val openVBox : stream -> indent -> unit + + (* open a horizontal/vertical box; this box behaves like an HBox, unless the + * contents does not fit on the line (or there is an explicit newline), in + * which case it behaves like a VBox. + *) + val openHVBox : stream -> indent -> unit + + (* open a horizontal or vertical box; the contents are layed out in horizontal + * mode until the line is full, at which point a line break is introduced and + * a new line is started. This box is essentially a paragraph box. + *) + val openHOVBox : stream -> indent -> unit + + (* similar to the `openHOVBox` function, except that breaks split the current + * line if splitting would move to the left. + *) + val openBox : stream -> indent -> unit + + (* close the most recently opened box. Note that every openBox call must be + * matched by a closeBox. + *) + val closeBox : stream -> unit + + val token : stream -> token -> unit + val string : stream -> string -> unit + + val pushStyle : (stream * style) -> unit + val popStyle : stream -> unit + + (* `break strm {nsp, offset}` expands to either `nsp` spaces or + * a line break with the specified indentation `offset`. + *) + val break : stream -> {nsp : int, offset : int} -> unit + (* `space strm n` is equivalent to `break strm {nsp=n, offset=0}` *) + val space : stream -> int -> unit + (* `cut strm` is equivalent to `break{nsp=0, offset=0}` *) + val cut : stream -> unit + (* break the current line *) + val newline : stream -> unit + (* emits a nonbreakable space *) + val nbSpace : stream -> int -> unit + + val control : stream -> (device -> unit) -> unit + + end + diff --git a/smlnj-lib/PP/src/pp-token-sig.sml b/smlnj-lib/PP/src/pp-token-sig.sml new file mode 100644 index 0000000..b0bba1f --- /dev/null +++ b/smlnj-lib/PP/src/pp-token-sig.sml @@ -0,0 +1,24 @@ +(* pp-token-sig.sml + * + * COPYRIGHT (c) 2017 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * User-defined pretty-printer tokens. Tokens pair text with style information + * for convinence. For example, one could define tokens for the keywords of + * a language. + *) + +signature PP_TOKEN = + sig + type token + type style + + (* returns the text of the token *) + val string : token -> string + (* returns the associated style of the token *) + val style : token -> style + (* returns the size of the token's text *) + val size : token -> int + + end; + diff --git a/smlnj-lib/PP/src/sources.cm b/smlnj-lib/PP/src/sources.cm new file mode 100644 index 0000000..ece5bcf --- /dev/null +++ b/smlnj-lib/PP/src/sources.cm @@ -0,0 +1,35 @@ +(* sources.cm + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Sources file for Pretty Printing Library. + *) + +Group + + signature PP_DESC + signature PP_DEVICE + signature PP_TOKEN + signature PP_STREAM + + functor PPStreamFn + functor PPDescFn + functor PPDebugFn + +is +#if defined(NEW_CM) + $/basis.cm + $/smlnj-lib.cm +#else + ../../Util/smlnj-lib.cm +#endif + + pp-device-sig.sml + pp-token-sig.sml + pp-desc-fn.sml + pp-stream-sig.sml + pp-stream-fn.sml + pp-debug-fn.sml + pp-desc-sig.sml + diff --git a/smlnj-lib/PP/tests/base.sml b/smlnj-lib/PP/tests/base.sml new file mode 100644 index 0000000..b74735f --- /dev/null +++ b/smlnj-lib/PP/tests/base.sml @@ -0,0 +1,37 @@ +(* base.sml + * + * COPYRIGHT (c) 2013 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Some common code for testing. + *) + +CM.make "sources.cm"; (* to compile and load the PP library *) + +structure TextToken = + struct + type token = string + type style = unit + fun string t = t + fun style t = () + fun size t = String.size t + end; + +structure PP = PPDebugFn(PPStreamFn( + structure Token = TextToken + structure Device = SimpleTextIODev)); + +fun withPP (name, wid) ppFn = let + val saveStrm = !PP.debugStrm + val _ = PP.debugStrm := TextIO.openAppend("out") + val ppStrm = + PP.openStream(SimpleTextIODev.openDev{dst=TextIO.stdOut, wid=wid}) + in + print(concat[name, ": width = ", Int.toString wid, "\n"]); + ppFn ppStrm; + PP.closeStream ppStrm; + print "\n"; + TextIO.closeOut (!PP.debugStrm); + PP.debugStrm := saveStrm + end; + diff --git a/smlnj-lib/PP/tests/sources.cm b/smlnj-lib/PP/tests/sources.cm new file mode 100644 index 0000000..23c68ab --- /dev/null +++ b/smlnj-lib/PP/tests/sources.cm @@ -0,0 +1,24 @@ +(* sources.cm + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +Library + functor PPStreamFn + functor PPDebugFn + structure SimpleTextIODev + structure HTMLDev + + structure HTML + structure PrHTML +is +#if defined(NEW_CM) + $/basis.cm + $/html-lib.cm + $/pp-lib.cm +#else + ../../HTML/html-lib.cm + ../src/sources.cm + ../devices/sources.cm +#endif diff --git a/smlnj-lib/PP/tests/test-html.sml b/smlnj-lib/PP/tests/test-html.sml new file mode 100644 index 0000000..a592bb5 --- /dev/null +++ b/smlnj-lib/PP/tests/test-html.sml @@ -0,0 +1,196 @@ +(* test.sml + * + * COPYRIGHT (c) 1997 Bell Labs, Lucent Technologies. + *) + +CM.make(); (* to compile and load the PP library *) + +structure TextToken = + struct + type token = string + type style = HTMLDev.style + fun string t = t + fun style t = HTMLDev.styleTT + fun size t = String.size t + end; + +structure PP = PPDebugFn(PPStreamFn( + structure Token = TextToken + structure Device = HTMLDev)); + +fun kw strm s = ( + PP.pushStyle(strm, HTMLDev.styleB); + PP.string strm s; + PP.popStyle strm) + +fun withPP (name, wid) ppFn = let + val saveStrm = !PP.debugStrm + val _ = PP.debugStrm := TextIO.openAppend("out") + val ppDev = HTMLDev.openDev{wid=wid, textWid=NONE} + val ppStrm = PP.openStream ppDev + in + PP.pushStyle(ppStrm, HTMLDev.styleTT); + ppFn ppStrm; + PP.popStyle ppStrm; + PP.closeStream ppStrm; + PrHTML.prHTML { + putc = fn c => TextIO.output1 (TextIO.stdOut, c), + puts = fn s => TextIO.output (TextIO.stdOut, s) + } (HTML.HTML{ + version = NONE, + head = [HTML.Head_TITLE name], + body = HTML.BODY{ + background = NONE, bgcolor = NONE, text = NONE, + link = NONE, vlink = NONE, alink = NONE, + content = HTML.TextBlock(HTMLDev.done ppDev) + } + }); + TextIO.closeOut (!PP.debugStrm); + PP.debugStrm := saveStrm + end; + +local + fun repeat c n = StringCvt.padLeft c n "" + fun simple1 (name, w, n, openBox) () = + withPP (name, w) (fn strm => ( + openBox strm (PP.Rel 0); + PP.string strm (repeat #"x" n); + PP.cut strm; + PP.string strm (repeat #"y" n); + PP.cut strm; + PP.string strm (repeat #"z" n); + PP.closeBox strm)) + fun simple2 (name, w, n, openBox1, openBox2) () = + withPP (name, w) (fn strm => ( + openBox1 strm (PP.Rel 0); + PP.string strm (repeat #"v" n); + PP.cut strm; + openBox2 strm (PP.Abs 2); + PP.string strm (repeat #"w" n); + PP.cut strm; + PP.string strm (repeat #"x" n); + PP.cut strm; + PP.string strm (repeat #"y" n); + PP.closeBox strm; + PP.cut strm; + PP.string strm (repeat #"z" n); + PP.closeBox strm)) +fun openHBox strm _ = PP.openHBox strm +in +val t01a = simple1 ("Test 01a [hbox]", 10, 2, openHBox) +val t01b = simple1 ("Test 01b [hbox]", 10, 3, openHBox) +val t02a = simple1 ("Test 02a [vbox]", 10, 2, PP.openVBox) +val t02b = simple1 ("Test 02b [vbox]", 10, 3, PP.openVBox) +val t03a = simple1 ("Test 03a [hvbox]", 10, 2, PP.openHVBox) +val t03b = simple1 ("Test 03b [hvbox]", 10, 4, PP.openHVBox) +val t04a = simple1 ("Test 04a [hovbox]", 10, 2, PP.openHOVBox) +val t04b = simple1 ("Test 04b [hovbox]", 10, 4, PP.openHOVBox) +val t05a = simple1 ("Test 05a [box]", 10, 2, PP.openBox) +val t05b = simple1 ("Test 05b [box]", 10, 4, PP.openBox) + +val t11a = simple2 ("Test 11a [hbox/hbox]", 10, 2, openHBox, openHBox) +val t11b = simple2 ("Test 11b [hbox/hbox]", 10, 3, openHBox, openHBox) +val t11c = simple2 ("Test 11c [hbox/hbox]", 10, 4, openHBox, openHBox) +val t12a = simple2 ("Test 12a [hbox/vbox]", 10, 2, openHBox, PP.openVBox) +val t12b = simple2 ("Test 12b [hbox/vbox]", 10, 3, openHBox, PP.openVBox) +val t12c = simple2 ("Test 12c [hbox/vbox]", 10, 4, openHBox, PP.openVBox) +val t13a = simple2 ("Test 13a [hbox/hvbox]", 10, 2, openHBox, PP.openHVBox) +val t13b = simple2 ("Test 13b [hbox/hvbox]", 10, 3, openHBox, PP.openHVBox) +val t13c = simple2 ("Test 13c [hbox/hvbox]", 10, 4, openHBox, PP.openHVBox) +val t14a = simple2 ("Test 14a [hbox/hovbox]", 10, 2, openHBox, PP.openHOVBox) +val t14b = simple2 ("Test 14b [hbox/hovbox]", 10, 3, openHBox, PP.openHOVBox) +val t14c = simple2 ("Test 14c [hbox/hovbox]", 10, 4, openHBox, PP.openHOVBox) +val t15a = simple2 ("Test 15a [hbox/box]", 10, 2, openHBox, PP.openBox) +val t15b = simple2 ("Test 15b [hbox/box]", 10, 3, openHBox, PP.openBox) +val t15c = simple2 ("Test 15c [hbox/box]", 10, 4, openHBox, PP.openBox) +val t16a = simple2 ("Test 16a [vbox/hbox]", 10, 2, PP.openVBox, openHBox) +val t16b = simple2 ("Test 16b [vbox/hbox]", 10, 3, PP.openVBox, openHBox) +val t16c = simple2 ("Test 16c [vbox/hbox]", 10, 4, PP.openVBox, openHBox) +val t17a = simple2 ("Test 17a [vbox/vbox]", 10, 2, PP.openVBox, PP.openVBox) +val t17b = simple2 ("Test 17b [vbox/vbox]", 10, 3, PP.openVBox, PP.openVBox) +val t17c = simple2 ("Test 17c [vbox/vbox]", 10, 4, PP.openVBox, PP.openVBox) +val t18a = simple2 ("Test 18a [vbox/hvbox]", 10, 2, PP.openVBox, PP.openHVBox) +val t18b = simple2 ("Test 18b [vbox/hvbox]", 10, 3, PP.openVBox, PP.openHVBox) +val t18c = simple2 ("Test 18c [vbox/hvbox]", 10, 4, PP.openVBox, PP.openHVBox) +val t19a = simple2 ("Test 19a [vbox/hovbox]", 10, 2, PP.openVBox, PP.openHOVBox) +val t19b = simple2 ("Test 19b [vbox/hovbox]", 10, 3, PP.openVBox, PP.openHOVBox) +val t19c = simple2 ("Test 19c [vbox/hovbox]", 10, 4, PP.openVBox, PP.openHOVBox) +val t20a = simple2 ("Test 20a [vbox/box]", 10, 2, PP.openVBox, PP.openBox) +val t20b = simple2 ("Test 20b [vbox/box]", 10, 3, PP.openVBox, PP.openBox) +val t20c = simple2 ("Test 20c [vbox/box]", 10, 4, PP.openVBox, PP.openBox) +val t21a = simple2 ("Test 21a [hvbox/hbox]", 10, 2, PP.openHVBox, openHBox) +val t21b = simple2 ("Test 21b [hvbox/hbox]", 10, 3, PP.openHVBox, openHBox) +val t21c = simple2 ("Test 21c [hvbox/hbox]", 10, 4, PP.openHVBox, openHBox) +val t22a = simple2 ("Test 22a [hvbox/vbox]", 10, 2, PP.openHVBox, PP.openVBox) +val t22b = simple2 ("Test 22b [hvbox/vbox]", 10, 3, PP.openHVBox, PP.openVBox) +val t22c = simple2 ("Test 22c [hvbox/vbox]", 10, 4, PP.openHVBox, PP.openVBox) +val t23a = simple2 ("Test 23a [hvbox/hvbox]", 10, 2, PP.openHVBox, PP.openHVBox) +val t23b = simple2 ("Test 23b [hvbox/hvbox]", 10, 3, PP.openHVBox, PP.openHVBox) +val t23c = simple2 ("Test 23c [hvbox/hvbox]", 10, 4, PP.openHVBox, PP.openHVBox) +val t24a = simple2 ("Test 24a [hvbox/hovbox]", 10, 2, PP.openHVBox, PP.openHOVBox) +val t24b = simple2 ("Test 24b [hvbox/hovbox]", 10, 3, PP.openHVBox, PP.openHOVBox) +val t24c = simple2 ("Test 24c [hvbox/hovbox]", 10, 4, PP.openHVBox, PP.openHOVBox) +val t25a = simple2 ("Test 25a [hvbox/box]", 10, 2, PP.openHVBox, PP.openBox) +val t25b = simple2 ("Test 25b [hvbox/box]", 10, 3, PP.openHVBox, PP.openBox) +val t25c = simple2 ("Test 25c [hvbox/box]", 10, 4, PP.openHVBox, PP.openBox) +val t26a = simple2 ("Test 26a [hovbox/hbox]", 10, 2, PP.openHOVBox, openHBox) +val t26b = simple2 ("Test 26b [hovbox/hbox]", 10, 3, PP.openHOVBox, openHBox) +val t26c = simple2 ("Test 26c [hovbox/hbox]", 10, 4, PP.openHOVBox, openHBox) +val t27a = simple2 ("Test 27a [hovbox/vbox]", 10, 2, PP.openHOVBox, PP.openVBox) +val t27b = simple2 ("Test 27b [hovbox/vbox]", 10, 3, PP.openHOVBox, PP.openVBox) +val t27c = simple2 ("Test 27c [hovbox/vbox]", 10, 4, PP.openHOVBox, PP.openVBox) +val t28a = simple2 ("Test 28a [hovbox/hvbox]", 10, 2, PP.openHOVBox, PP.openHVBox) +val t28b = simple2 ("Test 28b [hovbox/hvbox]", 10, 3, PP.openHOVBox, PP.openHVBox) +val t28c = simple2 ("Test 28c [hovbox/hvbox]", 10, 4, PP.openHOVBox, PP.openHVBox) +val t29a = simple2 ("Test 29a [hovbox/hovbox]", 10, 2, PP.openHOVBox, PP.openHOVBox) +val t29b = simple2 ("Test 29b [hovbox/hovbox]", 10, 3, PP.openHOVBox, PP.openHOVBox) +val t29c = simple2 ("Test 29c [hovbox/hovbox]", 10, 4, PP.openHOVBox, PP.openHOVBox) +val t30a = simple2 ("Test 30a [hovbox/box]", 10, 2, PP.openHOVBox, PP.openBox) +val t30b = simple2 ("Test 30b [hovbox/box]", 10, 3, PP.openHOVBox, PP.openBox) +val t30c = simple2 ("Test 30c [hovbox/box]", 10, 4, PP.openHOVBox, PP.openBox) +val t31a = simple2 ("Test 31a [box/hbox]", 10, 2, PP.openBox, openHBox) +val t31b = simple2 ("Test 31b [box/hbox]", 10, 3, PP.openBox, openHBox) +val t31c = simple2 ("Test 31c [box/hbox]", 10, 4, PP.openBox, openHBox) +val t32a = simple2 ("Test 32a [box/vbox]", 10, 2, PP.openBox, PP.openVBox) +val t32b = simple2 ("Test 32b [box/vbox]", 10, 3, PP.openBox, PP.openVBox) +val t32c = simple2 ("Test 32c [box/vbox]", 10, 4, PP.openBox, PP.openVBox) +val t33a = simple2 ("Test 33a [box/hvbox]", 10, 2, PP.openBox, PP.openHVBox) +val t33b = simple2 ("Test 33b [box/hvbox]", 10, 3, PP.openBox, PP.openHVBox) +val t33c = simple2 ("Test 33c [box/hvbox]", 10, 4, PP.openBox, PP.openHVBox) +val t34a = simple2 ("Test 34a [box/hovbox]", 10, 2, PP.openBox, PP.openHOVBox) +val t34b = simple2 ("Test 34b [box/hovbox]", 10, 3, PP.openBox, PP.openHOVBox) +val t34c = simple2 ("Test 34c [box/hovbox]", 10, 4, PP.openBox, PP.openHOVBox) +val t35a = simple2 ("Test 35a [box/box]", 10, 2, PP.openBox, PP.openBox) +val t35b = simple2 ("Test 35b [box/box]", 10, 3, PP.openBox, PP.openBox) +val t35c = simple2 ("Test 35c [box/box]", 10, 4, PP.openBox, PP.openBox) +end + +fun t40 () = withPP ("Test 20 [C code]", 20) (fn strm => ( + PP.openHBox strm; + kw strm "if"; + PP.space strm 1; + PP.string strm "(x < y)"; + PP.space strm 1; + PP.string strm "{"; + PP.openHVBox strm (PP.Abs 4); + PP.space strm 1; + PP.string strm "stmt1;"; PP.space strm 1; + PP.openHBox strm; + kw strm "if"; + PP.space strm 1; + PP.string strm "(w < z)"; + PP.space strm 1; + PP.string strm "{"; + PP.openHVBox strm (PP.Abs 4); + PP.space strm 1; PP.string strm "stmt2;"; + PP.space strm 1; PP.string strm "stmt3;"; + PP.space strm 1; PP.string strm "stmt4;"; + PP.closeBox strm; PP.newline strm; + PP.string strm "}"; + PP.closeBox strm; + PP.space strm 1; PP.string strm "stmt5;"; + PP.space strm 1; PP.string strm "stmt6;"; + PP.closeBox strm; PP.newline strm; + PP.string strm "}"; + PP.closeBox strm)); + diff --git a/smlnj-lib/PP/tests/test.sml b/smlnj-lib/PP/tests/test.sml new file mode 100644 index 0000000..90c3dc0 --- /dev/null +++ b/smlnj-lib/PP/tests/test.sml @@ -0,0 +1,212 @@ +(* test.sml + * + * COPYRIGHT (c) 1997 Bell Labs, Lucent Technologies. + *) + +use "base.sml"; + +local + fun repeat c n = StringCvt.padLeft c n "" + fun simple1 (name, w, n, openBox) () = + withPP (name, w) (fn strm => ( + openBox strm (PP.Rel 0); + PP.string strm (repeat #"x" n); + PP.cut strm; + PP.string strm (repeat #"y" n); + PP.cut strm; + PP.string strm (repeat #"z" n); + PP.closeBox strm)) +(** + fun simple1 (name, w, n, openBox) () = let + val outS = TextIO.openAppend "out" + fun dump (lab, strm) = (TextIO.output(outS, lab^": "); PP.dump(outS, strm)) + in + TextIO.output(outS, concat["***** ", name, " *****\n"]); + withPP (name, w) (fn strm => ( + dump ("1", strm); + openBox strm (PP.Rel 0); + dump ("2", strm); + PP.string strm (repeat #"x" n); + dump ("3", strm); + PP.cut strm; + dump ("4", strm); + PP.string strm (repeat #"y" n); + dump ("5", strm); + PP.cut strm; + dump ("6", strm); + PP.string strm (repeat #"z" n); + dump ("7", strm); + PP.closeBox strm; + dump ("8", strm))); + TextIO.closeOut outS + end +**) + fun simple2 (name, w, n, openBox1, openBox2) () = + withPP (name, w) (fn strm => ( + openBox1 strm (PP.Rel 0); + PP.string strm (repeat #"v" n); + PP.cut strm; + openBox2 strm (PP.Abs 2); + PP.string strm (repeat #"w" n); + PP.cut strm; + PP.string strm (repeat #"x" n); + PP.cut strm; + PP.string strm (repeat #"y" n); + PP.closeBox strm; + PP.cut strm; + PP.string strm (repeat #"z" n); + PP.closeBox strm)) +fun openHBox strm _ = PP.openHBox strm +in +val t01a = simple1 ("Test 01a [hbox]", 10, 2, openHBox) +val t01b = simple1 ("Test 01b [hbox]", 10, 3, openHBox) +val t02a = simple1 ("Test 02a [vbox]", 10, 2, PP.openVBox) +val t02b = simple1 ("Test 02b [vbox]", 10, 3, PP.openVBox) +val t03a = simple1 ("Test 03a [hvbox]", 10, 2, PP.openHVBox) +val t03b = simple1 ("Test 03b [hvbox]", 10, 4, PP.openHVBox) +val t04a = simple1 ("Test 04a [hovbox]", 10, 2, PP.openHOVBox) +val t04b = simple1 ("Test 04b [hovbox]", 10, 4, PP.openHOVBox) +val t05a = simple1 ("Test 05a [box]", 10, 2, PP.openBox) +val t05b = simple1 ("Test 05b [box]", 10, 4, PP.openBox) + +val t11a = simple2 ("Test 11a [hbox/hbox]", 10, 2, openHBox, openHBox) +val t11b = simple2 ("Test 11b [hbox/hbox]", 10, 3, openHBox, openHBox) +val t11c = simple2 ("Test 11c [hbox/hbox]", 10, 4, openHBox, openHBox) +val t12a = simple2 ("Test 12a [hbox/vbox]", 10, 2, openHBox, PP.openVBox) +val t12b = simple2 ("Test 12b [hbox/vbox]", 10, 3, openHBox, PP.openVBox) +val t12c = simple2 ("Test 12c [hbox/vbox]", 10, 4, openHBox, PP.openVBox) +val t13a = simple2 ("Test 13a [hbox/hvbox]", 10, 2, openHBox, PP.openHVBox) +val t13b = simple2 ("Test 13b [hbox/hvbox]", 10, 3, openHBox, PP.openHVBox) +val t13c = simple2 ("Test 13c [hbox/hvbox]", 10, 4, openHBox, PP.openHVBox) +val t14a = simple2 ("Test 14a [hbox/hovbox]", 10, 2, openHBox, PP.openHOVBox) +val t14b = simple2 ("Test 14b [hbox/hovbox]", 10, 3, openHBox, PP.openHOVBox) +val t14c = simple2 ("Test 14c [hbox/hovbox]", 10, 4, openHBox, PP.openHOVBox) +val t15a = simple2 ("Test 15a [hbox/box]", 10, 2, openHBox, PP.openBox) +val t15b = simple2 ("Test 15b [hbox/box]", 10, 3, openHBox, PP.openBox) +val t15c = simple2 ("Test 15c [hbox/box]", 10, 4, openHBox, PP.openBox) +val t16a = simple2 ("Test 16a [vbox/hbox]", 10, 2, PP.openVBox, openHBox) +val t16b = simple2 ("Test 16b [vbox/hbox]", 10, 3, PP.openVBox, openHBox) +val t16c = simple2 ("Test 16c [vbox/hbox]", 10, 4, PP.openVBox, openHBox) +val t17a = simple2 ("Test 17a [vbox/vbox]", 10, 2, PP.openVBox, PP.openVBox) +val t17b = simple2 ("Test 17b [vbox/vbox]", 10, 3, PP.openVBox, PP.openVBox) +val t17c = simple2 ("Test 17c [vbox/vbox]", 10, 4, PP.openVBox, PP.openVBox) +val t18a = simple2 ("Test 18a [vbox/hvbox]", 10, 2, PP.openVBox, PP.openHVBox) +val t18b = simple2 ("Test 18b [vbox/hvbox]", 10, 3, PP.openVBox, PP.openHVBox) +val t18c = simple2 ("Test 18c [vbox/hvbox]", 10, 4, PP.openVBox, PP.openHVBox) +val t19a = simple2 ("Test 19a [vbox/hovbox]", 10, 2, PP.openVBox, PP.openHOVBox) +val t19b = simple2 ("Test 19b [vbox/hovbox]", 10, 3, PP.openVBox, PP.openHOVBox) +val t19c = simple2 ("Test 19c [vbox/hovbox]", 10, 4, PP.openVBox, PP.openHOVBox) +val t20a = simple2 ("Test 20a [vbox/box]", 10, 2, PP.openVBox, PP.openBox) +val t20b = simple2 ("Test 20b [vbox/box]", 10, 3, PP.openVBox, PP.openBox) +val t20c = simple2 ("Test 20c [vbox/box]", 10, 4, PP.openVBox, PP.openBox) +val t21a = simple2 ("Test 21a [hvbox/hbox]", 10, 2, PP.openHVBox, openHBox) +val t21b = simple2 ("Test 21b [hvbox/hbox]", 10, 3, PP.openHVBox, openHBox) +val t21c = simple2 ("Test 21c [hvbox/hbox]", 10, 4, PP.openHVBox, openHBox) +val t22a = simple2 ("Test 22a [hvbox/vbox]", 10, 2, PP.openHVBox, PP.openVBox) +val t22b = simple2 ("Test 22b [hvbox/vbox]", 10, 3, PP.openHVBox, PP.openVBox) +val t22c = simple2 ("Test 22c [hvbox/vbox]", 10, 4, PP.openHVBox, PP.openVBox) +val t23a = simple2 ("Test 23a [hvbox/hvbox]", 10, 2, PP.openHVBox, PP.openHVBox) +val t23b = simple2 ("Test 23b [hvbox/hvbox]", 10, 3, PP.openHVBox, PP.openHVBox) +val t23c = simple2 ("Test 23c [hvbox/hvbox]", 10, 4, PP.openHVBox, PP.openHVBox) +val t24a = simple2 ("Test 24a [hvbox/hovbox]", 10, 2, PP.openHVBox, PP.openHOVBox) +val t24b = simple2 ("Test 24b [hvbox/hovbox]", 10, 3, PP.openHVBox, PP.openHOVBox) +val t24c = simple2 ("Test 24c [hvbox/hovbox]", 10, 4, PP.openHVBox, PP.openHOVBox) +val t25a = simple2 ("Test 25a [hvbox/box]", 10, 2, PP.openHVBox, PP.openBox) +val t25b = simple2 ("Test 25b [hvbox/box]", 10, 3, PP.openHVBox, PP.openBox) +val t25c = simple2 ("Test 25c [hvbox/box]", 10, 4, PP.openHVBox, PP.openBox) +val t26a = simple2 ("Test 26a [hovbox/hbox]", 10, 2, PP.openHOVBox, openHBox) +val t26b = simple2 ("Test 26b [hovbox/hbox]", 10, 3, PP.openHOVBox, openHBox) +val t26c = simple2 ("Test 26c [hovbox/hbox]", 10, 4, PP.openHOVBox, openHBox) +val t27a = simple2 ("Test 27a [hovbox/vbox]", 10, 2, PP.openHOVBox, PP.openVBox) +val t27b = simple2 ("Test 27b [hovbox/vbox]", 10, 3, PP.openHOVBox, PP.openVBox) +val t27c = simple2 ("Test 27c [hovbox/vbox]", 10, 4, PP.openHOVBox, PP.openVBox) +val t28a = simple2 ("Test 28a [hovbox/hvbox]", 10, 2, PP.openHOVBox, PP.openHVBox) +val t28b = simple2 ("Test 28b [hovbox/hvbox]", 10, 3, PP.openHOVBox, PP.openHVBox) +val t28c = simple2 ("Test 28c [hovbox/hvbox]", 10, 4, PP.openHOVBox, PP.openHVBox) +val t29a = simple2 ("Test 29a [hovbox/hovbox]", 10, 2, PP.openHOVBox, PP.openHOVBox) +val t29b = simple2 ("Test 29b [hovbox/hovbox]", 10, 3, PP.openHOVBox, PP.openHOVBox) +val t29c = simple2 ("Test 29c [hovbox/hovbox]", 10, 4, PP.openHOVBox, PP.openHOVBox) +val t30a = simple2 ("Test 30a [hovbox/box]", 10, 2, PP.openHOVBox, PP.openBox) +val t30b = simple2 ("Test 30b [hovbox/box]", 10, 3, PP.openHOVBox, PP.openBox) +val t30c = simple2 ("Test 30c [hovbox/box]", 10, 4, PP.openHOVBox, PP.openBox) +val t31a = simple2 ("Test 31a [box/hbox]", 10, 2, PP.openBox, openHBox) +val t31b = simple2 ("Test 31b [box/hbox]", 10, 3, PP.openBox, openHBox) +val t31c = simple2 ("Test 31c [box/hbox]", 10, 4, PP.openBox, openHBox) +val t32a = simple2 ("Test 32a [box/vbox]", 10, 2, PP.openBox, PP.openVBox) +val t32b = simple2 ("Test 32b [box/vbox]", 10, 3, PP.openBox, PP.openVBox) +val t32c = simple2 ("Test 32c [box/vbox]", 10, 4, PP.openBox, PP.openVBox) +val t33a = simple2 ("Test 33a [box/hvbox]", 10, 2, PP.openBox, PP.openHVBox) +val t33b = simple2 ("Test 33b [box/hvbox]", 10, 3, PP.openBox, PP.openHVBox) +val t33c = simple2 ("Test 33c [box/hvbox]", 10, 4, PP.openBox, PP.openHVBox) +val t34a = simple2 ("Test 34a [box/hovbox]", 10, 2, PP.openBox, PP.openHOVBox) +val t34b = simple2 ("Test 34b [box/hovbox]", 10, 3, PP.openBox, PP.openHOVBox) +val t34c = simple2 ("Test 34c [box/hovbox]", 10, 4, PP.openBox, PP.openHOVBox) +val t35a = simple2 ("Test 35a [box/box]", 10, 2, PP.openBox, PP.openBox) +val t35b = simple2 ("Test 35b [box/box]", 10, 3, PP.openBox, PP.openBox) +val t35c = simple2 ("Test 35c [box/box]", 10, 4, PP.openBox, PP.openBox) +end + +fun t40 () = withPP ("Test 20 [C code]", 20) (fn strm => ( + PP.openHBox strm; + PP.string strm "if"; + PP.space strm 1; + PP.string strm "(x < y)"; + PP.space strm 1; + PP.string strm "{"; + PP.openHVBox strm (PP.Abs 4); + PP.space strm 1; + PP.string strm "stmt1;"; PP.space strm 1; + PP.openHBox strm; + PP.string strm "if"; + PP.space strm 1; + PP.string strm "(w < z)"; + PP.space strm 1; + PP.string strm "{"; + PP.openHVBox strm (PP.Abs 4); + PP.space strm 1; PP.string strm "stmt2;"; + PP.space strm 1; PP.string strm "stmt3;"; + PP.space strm 1; PP.string strm "stmt4;"; + PP.closeBox strm; PP.newline strm; + PP.string strm "}"; + PP.closeBox strm; + PP.space strm 1; PP.string strm "stmt5;"; + PP.space strm 1; PP.string strm "stmt6;"; + PP.closeBox strm; PP.newline strm; + PP.string strm "}"; + PP.closeBox strm)); + +(* a test of VBox *) +fun t50 () = withPP ("Test 40 [vbox]", 20) (fn strm => let + fun pp l = let + fun pp' [] = () + | pp' [s] = PP.string strm s + | pp' (s::r) = (PP.string strm s; PP.space strm 1; pp' r) + in + PP.openHBox strm; pp' l; PP.closeBox strm + end + in + PP.openVBox strm (PP.Abs 0); + pp ["0:", "line", "1"]; PP.newline strm; + pp ["0:", "line", "2"]; PP.newline strm; + PP.openVBox strm (PP.Abs 2); + pp ["2:", "line", "3"]; PP.newline strm; + pp ["2:", "line", "4"]; + PP.closeBox strm; + PP.newline strm; + PP.openVBox strm (PP.Abs 2); + pp ["2:", "line", "5"]; PP.newline strm; + pp ["2:", "line", "6"]; + PP.closeBox strm; + PP.newline strm; + pp ["0:", "line", "7"]; PP.newline strm; + pp ["0:", "line", "8"]; PP.newline strm; + PP.openVBox strm (PP.Abs 4); + pp ["4:", "line", "9"]; PP.newline strm; + pp ["4:", "line", "10"]; + PP.closeBox strm; + PP.newline strm; + pp ["0:", "line", "11"]; PP.newline strm; + pp ["0:", "line", "12"]; PP.newline strm; + PP.closeBox strm + end) + diff --git a/smlnj-lib/PP/tests/typp.sml b/smlnj-lib/PP/tests/typp.sml new file mode 100644 index 0000000..c3274fe --- /dev/null +++ b/smlnj-lib/PP/tests/typp.sml @@ -0,0 +1,96 @@ +(* typp.sml + * + * COPYRIGHT (c) 1997 Bell Labs, Lucent Technologies. + * + * A pretty-printer for ML type expressions. + *) + +use "base.sml"; + +datatype ty + = VarTy of string + | BaseTy of (ty list * string) + | FnTy of (ty * ty) + | TupleTy of ty list + | RecordTy of (string * ty) list + +fun ppTy (strm, ty) = let + fun ppComma () = (PP.string strm ","; PP.space strm 1) + fun ppStar () = (PP.space strm 1; PP.string strm "*"; PP.nbSpace strm 1) + fun pp (VarTy s) = PP.string strm s + | pp (BaseTy([], s) = PP.string strm s + | pp (BaseTy([ty], s) = + | pp (BaseTy(l, s) = + | pp (FnTy(ty1, ty2)) = + | pp (TupleTy []) = PP.string strm "()" + | pp (TupleTy [ty]) = pp ty + | pp (TupleTy l) = + | pp (RecordTy []) = PP.string strm "{}" + | pp (RecordTy l) = let + fun ppElem (lab, ty) = ( + PP.openHVBox strm (PP.Abs 2); + PP.string lab; + PP.space strm 1; + PP.string strm ":"; + PP.nbSpace strm; + pp ty + PP.closeBox()) + in + PP.openHBox strm; + PP.string strm "{"; + PP.openHVBox (strm, PP.Abs 4); + ppl (ppElem, ppComma) l; + PP.break strm {nsp=0, offset=2}; + PP.closeBox strm; + PP.string strm "}"; + PP.closeBox strm + end + and ppParenTy ty = + and ppl (ppElem, ppSep) l = let + fun ppl' [] = () + | ppl' [ty] = ppElem ty + | ppl' (ty::r) = (ppElem ty; ppSep(); ppl' r) + in + ppl' l + end + in + PP.openHOVBox (strm, PP.Abs 2); + pp ty; + PP.closeBox strm + end; + +local + val stringTy = BaseTy([], "string") + val intTy = BaseTy([], "int") + val boolTy = BaseTy([], "bool") + val unitTy = BaseTy([], "unit") + val posTy = BaseTy([], "pos") + fun optionTy arg = BaseTy([arg], "option") + val vecBufTy = RecordTy [ + ("buf", BaseTy([], "vector")), + ("i", intTy), + ("sz", optionTy intTy) + ] + val arrBufTy = RecordTy [ + ("buf", BaseTy([], "array")), + ("i", intTy), + ("sz", optionTy intTy) + ] +in +val wrTy = RecordTy of [ + ("name", stringTy), + ("chunkSize", intTy), + ("writeVec", optionTy(FnTy(vecBufTy, intTy))), + ("writeArr", optionTy(FnTy(arrBufTy, intTy))), + ("writeVecNB", optionTy(FnTy(vecBufTy, optionTy intTy))), + ("writeArrNB", optionTy(FnTy(arrBufTy, optionTy intTy))), + ("block", optionTy(FnTy(unitTy, unitTy)), + ("canOutput", optionTy(FnTy(unitTy, boolTy)), + ("getPos", optionTy(FnTy(unitTy, posTy))), + ("setPos", optionTy(FnTy(posTy, unitTy))), + ("endPos", optionTy(FnTy(unitTy, posTy))), + ("verifyPos", optionTy(FnTy(unitTy, posTy))), + ("close", optionTy(FnTy(unitTy, unitTy))), + ("ioDesc", optionTy(BaseTy([], "OS.IO.iodesc"))) + ] +end; diff --git a/smlnj-lib/README b/smlnj-lib/README new file mode 100644 index 0000000..8229c34 --- /dev/null +++ b/smlnj-lib/README @@ -0,0 +1,36 @@ +This is the SML/NJ Library. It is free software distributed under the SML/NJ +system's license (see the LICENSE file for details). + +The library is organized as a collection of CM libraries. Here is a roadmap of +the library structure: + + Directory Sources File Description + ------------------------------------------------- + Util smlnj-lib.cm This is the general utility library. + + Controls controls-lib.cm This is a library of support code + for managing application controls. + + HashCons hash-cons-lib.cm This is a library supporting hash-consing + of data structures and efficient sets and + maps using hash-consed keys. + + HTML html-lib.cm This library provides parsing and pretty + printing of HTML (Version 3.2). + + INet inet-lib.cm Networking utilities (for both Unix + and Windows). + + PP pp-lib.cm Pretty-printing library. + + Reactive reactive-lib.cm A low-level reactive scripting library. + + RegExp regexp-lib.cm Regular-expression library. + + Unix unix-lib.cm Unix specific utilities. + +The first column is the source directory, the second gives the library sources +file alias (to be used in your application's sources.cm file), and the third +column gives a brief description. Look at the Doc dicrectory and at the README +files in the individual subdirectories for more documentation. + diff --git a/smlnj-lib/Reactive/.cm/GUID/instruction.sml b/smlnj-lib/Reactive/.cm/GUID/instruction.sml new file mode 100644 index 0000000..81b1daf --- /dev/null +++ b/smlnj-lib/Reactive/.cm/GUID/instruction.sml @@ -0,0 +1 @@ +guid-$/(reactive-lib.cm):instruction.sml-1714016092.874 diff --git a/smlnj-lib/Reactive/.cm/GUID/machine.sml b/smlnj-lib/Reactive/.cm/GUID/machine.sml new file mode 100644 index 0000000..90ca852 --- /dev/null +++ b/smlnj-lib/Reactive/.cm/GUID/machine.sml @@ -0,0 +1 @@ +guid-$/(reactive-lib.cm):machine.sml-1714016092.879 diff --git a/smlnj-lib/Reactive/.cm/GUID/reactive-sig.sml b/smlnj-lib/Reactive/.cm/GUID/reactive-sig.sml new file mode 100644 index 0000000..4b6e72f --- /dev/null +++ b/smlnj-lib/Reactive/.cm/GUID/reactive-sig.sml @@ -0,0 +1 @@ +guid-$/(reactive-lib.cm):reactive-sig.sml-1714016092.970 diff --git a/smlnj-lib/Reactive/.cm/GUID/reactive.sml b/smlnj-lib/Reactive/.cm/GUID/reactive.sml new file mode 100644 index 0000000..416fa09 --- /dev/null +++ b/smlnj-lib/Reactive/.cm/GUID/reactive.sml @@ -0,0 +1 @@ +guid-$/(reactive-lib.cm):reactive.sml-1714016092.975 diff --git a/smlnj-lib/Reactive/.cm/SKEL/instruction.sml b/smlnj-lib/Reactive/.cm/SKEL/instruction.sml new file mode 100644 index 0000000..0eda3f7 --- /dev/null +++ b/smlnj-lib/Reactive/.cm/SKEL/instruction.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"Atom"ad"Instruction"h0 \ No newline at end of file diff --git a/smlnj-lib/Reactive/.cm/SKEL/machine.sml b/smlnj-lib/Reactive/.cm/SKEL/machine.sml new file mode 100644 index 0000000..d75a36b --- /dev/null +++ b/smlnj-lib/Reactive/.cm/SKEL/machine.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f2d"List"d"Atom"ad"Machine"jh1ad"I"gp1d"Instruction"h0 \ No newline at end of file diff --git a/smlnj-lib/Reactive/.cm/SKEL/reactive-sig.sml b/smlnj-lib/Reactive/.cm/SKEL/reactive-sig.sml new file mode 100644 index 0000000..b31c8bc --- /dev/null +++ b/smlnj-lib/Reactive/.cm/SKEL/reactive-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"Atom"ac"REACTIVE"h0 \ No newline at end of file diff --git a/smlnj-lib/Reactive/.cm/SKEL/reactive.sml b/smlnj-lib/Reactive/.cm/SKEL/reactive.sml new file mode 100644 index 0000000..08e6db4 --- /dev/null +++ b/smlnj-lib/Reactive/.cm/SKEL/reactive.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f2Instruction"d"List"ad"Reactive"jh3ad"I"gp1ad"M"gp1d"Machine"ad"AMap"gp1d"AtomBinaryMap"gp1c"REACTIVE" \ No newline at end of file diff --git a/smlnj-lib/Reactive/.cm/amd64-unix/instruction.sml b/smlnj-lib/Reactive/.cm/amd64-unix/instruction.sml new file mode 100644 index 0000000..b1e0f35 Binary files /dev/null and b/smlnj-lib/Reactive/.cm/amd64-unix/instruction.sml differ diff --git a/smlnj-lib/Reactive/.cm/amd64-unix/machine.sml b/smlnj-lib/Reactive/.cm/amd64-unix/machine.sml new file mode 100644 index 0000000..2a1ff56 Binary files /dev/null and b/smlnj-lib/Reactive/.cm/amd64-unix/machine.sml differ diff --git a/smlnj-lib/Reactive/.cm/amd64-unix/reactive-sig.sml b/smlnj-lib/Reactive/.cm/amd64-unix/reactive-sig.sml new file mode 100644 index 0000000..6b76e95 Binary files /dev/null and b/smlnj-lib/Reactive/.cm/amd64-unix/reactive-sig.sml differ diff --git a/smlnj-lib/Reactive/.cm/amd64-unix/reactive.sml b/smlnj-lib/Reactive/.cm/amd64-unix/reactive.sml new file mode 100644 index 0000000..b9d6186 Binary files /dev/null and b/smlnj-lib/Reactive/.cm/amd64-unix/reactive.sml differ diff --git a/smlnj-lib/Reactive/README b/smlnj-lib/Reactive/README new file mode 100644 index 0000000..8bc1802 --- /dev/null +++ b/smlnj-lib/Reactive/README @@ -0,0 +1,79 @@ +This is an implementation of a low-level reactive engine (or toolkit), +which is mostly translated from SugarCubes (a Java toolkit). The main +difference (aside from the implementation language) is that we support +preemption of actions (as in Berry's Communicating Reactive Processes +model). This library is meant to be the target of higher-level reactive +models. + +Note also that we use the term "signal" (al la Esterel) instead of "event". + +Information about SugarCubes can be found at + + http://www-sop.inria.fr/meije/rc/SugarCubes/index.html + +A reactive script is written as an abstract syntax tree of type +Machine.instruction, which is defined in the Instruction module. +This type is parameterized over a context type, which is the +type of the runtime environment. + +Reactive scripting language: + + i1 || i2 interleave the activation of i1 and i2 until both terminate or one + suspends. + + i1 & i2 activate in sequence + + nothing do nothing + + stop stop activation; is terminated for all future activations + + suspend suspends activation + + action(act) An atomic action; apply to the machine. + + exec(exe) Activation causes exe to be applied to the machine, which returns two + functions + + stop : unit -> unit + done : unit -> bool + + ifThenElse(pred, i1, i2) + Conditional. Evaluates predicate applied to the machine and then + activates either e1 (if the predicate is true) or e2. + + repeat(n, i) Activate the script i n times. It terminates when either the loop + has been repeated n times or i terminates. + + loop Loop forever. + + close : instruction -> instruction + + signal(sig, i) + Binds the signal sig in the enclosed script i. + + rebind(sig1, sig2, i) + Binds sig2 to the signal sig1 in the enclosed script i. + + when(cfg, i1, i2) + + trap(cfg, i) + + trapWith(cfg, i1, i2) + + emit(sig) Generate the named signal this instance. + + await(cfg) Wait for the configuration to hold. + +Signal configurations have the following forms: + + posConfig sig + Holds if sig is present this instance. + + negConfig sig + Holds if sig is not present this instance. + + orConfig(cfg1, cfg2) + Holds if either cfg1 or cfg2 holds. + + andConfig(cfg1, cfg2) + Holds if neither cfg1 or cfg2 holds. diff --git a/smlnj-lib/Reactive/instruction.sml b/smlnj-lib/Reactive/instruction.sml new file mode 100644 index 0000000..230e769 --- /dev/null +++ b/smlnj-lib/Reactive/instruction.sml @@ -0,0 +1,39 @@ +(* instruction.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * An AST representation of reactive scripts. + *) + +structure Instruction = + struct + + datatype 'a config + = posConfig of 'a + | negConfig of 'a + | orConfig of ('a config * 'a config) + | andConfig of ('a config * 'a config) + + type signal = Atom.atom + + datatype 'ctxt instr + = || of ('ctxt instr * 'ctxt instr) (* merge *) + | & of ('ctxt instr * 'ctxt instr) (* sequencing *) + | nothing (* nop *) + | stop (* stop execution *) + | suspend (* suspend execution *) + | action of 'ctxt -> unit (* an atomic action *) + | exec of 'ctxt -> {stop : unit -> unit, done : unit -> bool} + | ifThenElse of (('ctxt -> bool) * 'ctxt instr * 'ctxt instr) + | repeat of (int * 'ctxt instr) (* repeat loop *) + | loop of 'ctxt instr (* infinite loop *) + | close of 'ctxt instr + | signal of (signal * 'ctxt instr) (* define a signal *) + | rebind of (signal * signal * 'ctxt instr) (* rename a signal *) + | when of (signal config * 'ctxt instr * 'ctxt instr) + | trapWith of (signal config * 'ctxt instr * 'ctxt instr) + | emit of signal (* generate a signal *) + | await of signal config (* wait for a signal *) + + end; diff --git a/smlnj-lib/Reactive/machine.sml b/smlnj-lib/Reactive/machine.sml new file mode 100644 index 0000000..b9239e5 --- /dev/null +++ b/smlnj-lib/Reactive/machine.sml @@ -0,0 +1,589 @@ +(* machine.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This is an implementation of the reactive interpreter instructions, + * and functions to generate them. + *) + +structure Machine : sig + + (* activation return codes *) + datatype state + = TERM (* execution of the instruction is complete; activation + * at future instances has no effect. + *) + | STOP (* execution is stopped in this instant, but could + * progress in the next instant. + *) + | SUSP (* exeuction is suspended and must be resumed during this + * instant. + *) + + type in_signal + type out_signal + + type instant = int + type signal_state = instant ref + + datatype signal = SIG of { + name : Atom.atom, + id : int, + state : signal_state + } + + datatype machine = M of { + now : instant ref, + moveFlg : bool ref, + endOfInstant : bool ref, + prog : code, + signals : signal list, + inputs : signal list, + outputs : signal list + } + + and code = C of { + isTerm : unit -> bool, + terminate : unit -> unit, + reset : unit -> unit, + preempt : machine -> unit, + activation : machine -> state + } + + val runMachine : machine -> bool + val resetMachine : machine -> unit + val inputsOf : machine -> in_signal list + val outputsOf : machine -> out_signal list + + val inputSignal : in_signal -> Atom.atom + val outputSignal : out_signal -> Atom.atom + val setInSignal : (in_signal * bool) -> unit + val getInSignal : in_signal -> bool + val getOutSignal : out_signal -> bool + + type config + + val || : (code * code) -> code + val & : (code * code) -> code + val nothing : code + val stop : unit -> code + val suspend : unit -> code + val action : (machine -> unit) -> code + val exec : (machine -> {stop : unit -> unit, done : unit -> bool}) -> code + val ifThenElse : ((machine -> bool) * code * code) -> code + val repeat : (int * code) -> code + val loop : code -> code + val close : code -> code + val emit : signal -> code + val await : config -> code + val when : (config * code * code) -> code + val trapWith : (config * code * code) -> code + + end = struct + + structure I = Instruction (* for the config type *) + + datatype state + = TERM + | STOP + | SUSP + + type instant = int + type signal_state = instant ref + + datatype signal = SIG of { + name : Atom.atom, + id : int, + state : signal_state + } + + type config = signal I.config + + datatype machine = M of { + now : instant ref, + moveFlg : bool ref, + endOfInstant : bool ref, + prog : code, + signals : signal list, + inputs : signal list, + outputs : signal list + } + + and code = C of { + isTerm : unit -> bool, + terminate : unit -> unit, + reset : unit -> unit, + preempt : machine -> unit, + activation : machine -> state + } + + + fun now (M{now=t, ...}) = !t + fun newMove (M{moveFlg, ...}) = moveFlg := true + fun isEndOfInstant (M{endOfInstant, ...}) = !endOfInstant + + datatype presence = PRESENT | ABSENT | UNKNOWN + + fun presence (m, SIG{state, ...}) = let + val ts = !state + val now = now m + in + if (now = ts) then PRESENT + else if ((now = ~ts) orelse (isEndOfInstant m)) then ABSENT + else UNKNOWN + end + + fun present (m, SIG{state, ...}) = (now m = !state) + fun prePresent (m, SIG{state, ...}) = (now m = !state + 1) + fun absent (m, SIG{state, ...}) = (now m = ~(!state)) + fun emitSig (m, SIG{state, ...}) = state := now m + fun emitNot (m, SIG{state, ...}) = state := ~(now m) + + datatype in_signal = IN of (machine * signal) + datatype out_signal = OUT of (machine * signal) + + fun inputSignal (IN(_, SIG{name, ...})) = name + fun outputSignal (OUT(_, SIG{name, ...})) = name + fun setInSignal (IN(m, s), false) = emitNot(m, s) + | setInSignal (IN(m, s), true) = emitSig(m, s) + fun getInSignal (IN(m, s)) = present(m, s) + fun getOutSignal (OUT(m, s)) = prePresent(m, s) + + fun terminate (C{terminate=f, ...}) = f() + fun isTerm (C{isTerm=f, ...}) = f() + fun reset (C{reset=f, ...}) = f() + fun preemption (C{preempt=f, ...}, m) = f m + fun activation (C{activation=f, ...}, m) = f m + + fun activate (i, m) = if (isTerm i) + then TERM + else (case activation(i, m) + of TERM => (terminate i; TERM) + | res => res + (* end case *)) + + fun preempt (i, m) = if (isTerm i) + then () + else (preemption(i, m); terminate i) + + (* default instruction methods *) + fun isTermMeth termFlg () = !termFlg + fun terminateMeth termFlg () = termFlg := true + fun resetMeth termFlg () = termFlg := false + + fun || (i1, i2) = let + val termFlg = ref false + val leftSts = ref SUSP + val rightSts = ref SUSP + fun resetMeth () = ( + termFlg := false; leftSts := SUSP; rightSts := SUSP; + reset i1; reset i2) + fun preemptMeth m = (preempt(i1, m); preempt(i2, m)) + fun activationMeth m = ( + if (!leftSts = SUSP) then leftSts := activate(i1, m) else (); + if (!rightSts = SUSP) then rightSts := activate(i2, m) else (); + case (!leftSts, !rightSts) + of (TERM, TERM) => TERM + | (SUSP, _) => SUSP + | (_, SUSP) => SUSP + | _ => (leftSts := SUSP; rightSts := SUSP; STOP) + (* end case *)) + in + C{ isTerm = isTermMeth termFlg, + terminate = terminateMeth termFlg, + reset = resetMeth, + preempt = preemptMeth, + activation = activationMeth + } + end + + fun & (i1, i2) = let + val termFlg = ref false + fun resetMeth () = (termFlg := false; reset i1; reset i2) + fun preemptMeth m = (preempt(i1, m); preempt(i2, m)) + fun activationMeth m = + if (isTerm i1) + then activate(i2, m) + else (case activate(i1, m) + of TERM => activate(i2, m) + | res => res + (* end case *)) + in + C{ isTerm = isTermMeth termFlg, + terminate = terminateMeth termFlg, + reset = resetMeth, + preempt = preemptMeth, + activation = activationMeth + } + end + + val nothing = C{ + isTerm = fn () => true, + terminate = fn () => (), + reset = fn () => (), + preempt = fn _ => (), + activation = fn _ => TERM + } + + fun stop () = let + val termFlg = ref false + in + C{ isTerm = isTermMeth termFlg, + terminate = terminateMeth termFlg, + reset = resetMeth termFlg, + preempt = fn _ => (), + activation = fn _ => (termFlg := true; STOP) + } + end + + fun suspend () = let + val termFlg = ref false + in + C{ isTerm = isTermMeth termFlg, + terminate = terminateMeth termFlg, + reset = resetMeth termFlg, + preempt = fn _ => (), + activation = fn _ => (termFlg := true; SUSP) + } + end + + fun action f = let + val termFlg = ref false + in + C{ isTerm = isTermMeth termFlg, + terminate = terminateMeth termFlg, + reset = resetMeth termFlg, + preempt = fn _ => (), + activation = fn m => (f m; TERM) + } + end + + fun exec f = let + val termFlg = ref false + val ops = ref(NONE : {stop : unit -> unit, done : unit -> bool} option) +(** NOTE: what if a reset occurs while we are running? We would need to change + ** the type of resetMeth to take a machine parameter. + **) + fun resetMeth () = (termFlg := false) + fun preemptMeth m = (case !ops + of NONE => () + | SOME{stop, ...} => (ops := NONE; stop()) + (* end case *)) + fun activationMeth m = (case !ops + of SOME{done, ...} => if done () + then (ops := NONE; TERM) + else STOP + | NONE => (ops := SOME(f m); SUSP) + (* end case *)) + in + C{ isTerm = isTermMeth termFlg, + terminate = terminateMeth termFlg, + reset = resetMeth, + preempt = preemptMeth, + activation = activationMeth + } + end + + fun ifThenElse (pred, i1, i2) = let + val termFlg = ref false + val cond = ref NONE + fun resetMeth () = ( + termFlg := false; + case !cond + of (SOME true) => reset i1 + | (SOME false) => reset i2 + | NONE => () + (* end case *); + cond := NONE) + fun preemptMeth m = (case !cond + of (SOME true) => preempt(i1, m) + | (SOME false) => preempt(i2, m) + | NONE => () + (* end case *)) + fun activationMeth m = (case !cond + of (SOME true) => activate(i1, m) + | (SOME false) => activate(i2, m) + | NONE => let + val b = pred m + in + cond := SOME b; + if b then activate(i1, m) else activate(i2, m) + end + (* end case *)) + in + C{ isTerm = isTermMeth termFlg, + terminate = terminateMeth termFlg, + reset = resetMeth, + preempt = preemptMeth, + activation = activationMeth + } + end + + fun repeat (n, i) = let + val termFlg = ref false + val counter = ref n + fun resetMeth () = (termFlg := false; counter := n) + fun preemptMeth m = preempt(i, m) + fun activationMeth m = + if (!counter > 0) + then (case activate(i, m) + of TERM => ( + counter := !counter-1; reset i; + activationMeth m) + | res => res + (* end case *)) + else TERM + in + C{ isTerm = isTermMeth termFlg, + terminate = terminateMeth termFlg, + reset = resetMeth, + preempt = preemptMeth, + activation = activationMeth + } + end + + fun loop i = let + val termFlg = ref false + val endReached = ref false + fun resetMeth () = (termFlg := false; reset i; endReached := false) + fun preemptMeth m = preempt (i, m) + fun activationMeth m = (case activate(i, m) + of TERM => if (!endReached) + then ( +(* say(m, "instantaneous loop detected\n"); *) + STOP) + else (endReached := true; reset i; activationMeth m) + | STOP => (endReached := false; STOP) + | SUSP => SUSP + (* end case *)) + in + C{ isTerm = isTermMeth termFlg, + terminate = terminateMeth termFlg, + reset = resetMeth, + preempt = preemptMeth, + activation = activationMeth + } + end + + fun close i = let + val termFlg = ref false + fun activationMeth m = (case activate(i, m) + of SUSP => activationMeth m + | res => res + (* end case *)) + in + C{ isTerm = isTermMeth termFlg, + terminate = terminateMeth termFlg, + reset = resetMeth termFlg, + preempt = fn _ => (), + activation = activationMeth + } + end + + (** Configuration evaluation **) + fun fixed (m, c) = let + fun fix (I.posConfig id) = (presence(m, id) <> UNKNOWN) + | fix (I.negConfig id) = (presence(m, id) <> UNKNOWN) + | fix (I.orConfig(c1, c2)) = let + val b1 = fix c1 and b2 = fix c2 + in + (b1 andalso evaluate(m, c1)) orelse + (b2 andalso evaluate(m, c2)) orelse + (b1 andalso b2) + end + | fix (I.andConfig(c1, c2)) = let + val b1 = fix c1 and b2 = fix c2 + in + (b1 andalso not(evaluate(m, c1))) orelse + (b2 andalso not(evaluate(m, c2))) orelse + (b1 andalso b2) + end + in + fix c + end + + and evaluate (m, c) = let + fun eval (I.posConfig id) = present(m, id) + | eval (I.negConfig id) = not(present(m, id)) + | eval (I.orConfig(c1, c2)) = eval c1 orelse eval c2 + | eval (I.andConfig(c1, c2)) = eval c1 andalso eval c2 + in + eval c + end + + (* evaluate the signal configuration `c` returning `SOME b` if + * the signals are in known state and `b` is the result of the + * configuration. Otherwise, return `NONE` if one or more signals + * are in unknown state. + *) + fun fixedEval (m, c) = let + fun f (I.posConfig id) = (case presence(m, id) + of UNKNOWN => NONE + | PRESENT => SOME true + | ABSENT => SOME false + (* end case *)) + | f (I.negConfig id) = (case presence(m, id) + of UNKNOWN => NONE + | PRESENT => SOME false + | ABSENT => SOME true + (* end case *)) + | f (I.andConfig(c1, c2)) = (case (f c1, f c2) + of (SOME false, _) => SOME false + | (_, SOME false) => SOME false + | (SOME true, SOME true) => SOME true + | _ => NONE + (* end case *)) + | f (I.orConfig(c1, c2)) = (case (f c1, f c2) + of (SOME true, _) => SOME true + | (_, SOME true) => SOME true + | (SOME false, SOME false) => SOME false + | _ => NONE + (* end case *)) + in + f c + end + + fun emit signal = let + val termFlg = ref false + fun activationMeth m = ( + newMove m; + emitSig(m, signal); + TERM) + in + C{ isTerm = isTermMeth termFlg, + terminate = terminateMeth termFlg, + reset = resetMeth termFlg, + preempt = fn _ => (), + activation = activationMeth + } + end + + fun await c = let + val termFlg = ref false + fun activationMeth m = (case fixedEval(m, c) + of NONE => SUSP + | (SOME false) => STOP + | (SOME true) => if (isEndOfInstant m) then STOP else TERM + (* end case *)) + in + C{ isTerm = isTermMeth termFlg, + terminate = terminateMeth termFlg, + reset = resetMeth termFlg, + preempt = fn _ => (), + activation = activationMeth + } + end + + fun when (c, i1, i2) = let + val termFlg = ref false + val value = ref NONE + fun resetMeth m = ( + termFlg := false; + reset i1; reset i2; + value := NONE) + fun preemptMeth m = (preempt(i1, m); preempt(i2, m)) + fun activationMeth m = (case !value + of NONE => (case fixedEval(m, c) + of NONE => SUSP + | (SOME v) => ( + value := SOME v; + if (isEndOfInstant m) + then STOP + else if v + then activate(i1, m) + else activate(i2, m)) + (* end case *)) + | (SOME true) => activate(i1, m) + | (SOME false) => activate(i2, m) + (* end case *)) + in + C{ isTerm = isTermMeth termFlg, + terminate = terminateMeth termFlg, + reset = resetMeth, + preempt = preemptMeth, + activation = activationMeth + } + end + + fun trapWith (c, i1, i2) = let + val termFlg = ref false + val activeHandle = ref false + val resumeBody = ref true + fun resetMeth m = ( + termFlg := false; + reset i1; reset i2; + activeHandle := false; + resumeBody := true) + fun preemptMeth m = if (! activeHandle) + then preempt(i2, m) + else preempt(i1, m) + fun activationMeth m = + if (! activeHandle) + then activate(i2, m) + else let + fun chkConfig () = (case fixedEval(m, c) + of NONE => SUSP + | (SOME true) => ( (* actual preemption *) + preempt(i1, m); + activeHandle := true; + if (isEndOfInstant m) + then STOP + else activate(i2, m)) + | (SOME false) => ( + resumeBody := true; + STOP) + (* end case *)) + in + if (! resumeBody) + then (case activate(i1, m) + of STOP => (resumeBody := false; chkConfig()) + | res => res + (* end case *)) + else chkConfig() + end + in + C{ isTerm = isTermMeth termFlg, + terminate = terminateMeth termFlg, + reset = resetMeth, + preempt = preemptMeth, + activation = activationMeth + } + end + + (* run a machine to a stable state; return true if that is a terminal state *) + fun runMachine (m as M{now, moveFlg, endOfInstant, prog, ...}) = let + fun untilStop () = (case activate(prog, m) + of SUSP => ( + if !moveFlg + then moveFlg := false + else endOfInstant := true; + untilStop ()) + | STOP => false + | TERM => true + (* end case *)) + in + endOfInstant := false; + moveFlg := false; + untilStop () before now := !now+1 + end + + (* reset a machine back to its initial state *) + fun resetMachine (M{ + now, moveFlg, endOfInstant, prog, signals, inputs, outputs + }) = let + fun resetSig (SIG{state, ...}) = state := 0 + in + now := 2; + moveFlg := false; + endOfInstant := false; + reset prog; + List.app resetSig signals; + List.app resetSig inputs; + List.app resetSig outputs + end + + fun inputsOf (m as M{inputs, ...}) = List.map (fn s => IN(m, s)) inputs + fun outputsOf (m as M{outputs, ...}) = List.map (fn s => OUT(m, s)) outputs + + end; diff --git a/smlnj-lib/Reactive/reactive-lib.cm b/smlnj-lib/Reactive/reactive-lib.cm new file mode 100644 index 0000000..d27194b --- /dev/null +++ b/smlnj-lib/Reactive/reactive-lib.cm @@ -0,0 +1,24 @@ +(* reactive-lib.cm + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Sources file for reactive engine library. + *) + +Library + signature REACTIVE + structure Reactive +is +#if defined(NEW_CM) + $/basis.cm + $/smlnj-lib.cm +#else + ../Util/smlnj-lib.cm +#endif + + instruction.sml + machine.sml + reactive-sig.sml + reactive.sml + diff --git a/smlnj-lib/Reactive/reactive-sig.sml b/smlnj-lib/Reactive/reactive-sig.sml new file mode 100644 index 0000000..d4cb3d6 --- /dev/null +++ b/smlnj-lib/Reactive/reactive-sig.sml @@ -0,0 +1,72 @@ +(* reactive-sig.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * A simple ractive engine modelled after RC and SugarCubes. + *) + +signature REACTIVE = + sig + + type machine + type instruction + type signal = Atom.atom + type config + type in_signal + type out_signal + + val machine : { + inputs : signal list, + outputs : signal list, + body : instruction + } -> machine + + val run : machine -> bool + (* run the machine one instant. Returns true, if the machine ends in + * a terminal state. + *) + val reset : machine -> unit + (* reset a machine to its initial state. *) + + val inputsOf : machine -> in_signal list + val outputsOf : machine -> out_signal list + + val inputSignal : in_signal -> signal + val outputSignal : out_signal -> signal + + val setInSignal : (in_signal * bool) -> unit + val getInSignal : in_signal -> bool + val getOutSignal : out_signal -> bool + + val || : (instruction * instruction) -> instruction + val & : (instruction * instruction) -> instruction + + val nothing : instruction + val stop : instruction + val suspend : instruction + + val action : (machine -> unit) -> instruction + val exec : (machine -> {stop : unit -> unit, done : unit -> bool}) + -> instruction + + val ifThenElse : ((machine -> bool) * instruction * instruction) -> instruction + val repeat : (int * instruction) -> instruction + val loop : instruction -> instruction + val close : instruction -> instruction + + val signal : (signal * instruction) -> instruction + val rebind : (signal * signal * instruction) -> instruction + val when : (config * instruction * instruction) -> instruction + val trap : (config * instruction) -> instruction + val trapWith : (config * instruction * instruction) -> instruction + val emit : signal -> instruction + val await : config -> instruction + + (* signal configurations *) + val posConfig : signal -> config + val negConfig : signal -> config + val orConfig : (config * config) -> config + val andConfig : (config * config) -> config + + end diff --git a/smlnj-lib/Reactive/reactive.sml b/smlnj-lib/Reactive/reactive.sml new file mode 100644 index 0000000..f34985e --- /dev/null +++ b/smlnj-lib/Reactive/reactive.sml @@ -0,0 +1,127 @@ +(* reactive.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * A simple ractive engine modelled after RC and SugarCubes. + *) + +structure Reactive : REACTIVE = + struct + + structure I = Instruction + structure M = Machine + + type machine = M.machine + type instruction = machine Instruction.instr + type signal = I.signal + type config = I.signal I.config + type in_signal = M.in_signal + type out_signal = M.out_signal + + (* used to bind internal signal names *) + structure AMap = AtomBinaryMap + + exception UnboundSignal of I.signal + + fun machine {inputs, outputs, body} = let + val nextId = ref 0 + val sigList = ref [] + fun newSignal s = let + val id = !nextId + val s' = M.SIG{name=s, id=id, state = ref 0} + in + nextId := id+1; + sigList := s' :: !sigList; + s' + end + fun bindSig (env, s) = (case AMap.find (env, s) + of NONE => raise UnboundSignal s + | (SOME s') => s' + (* end case *)) + fun trans (instr, env) = (case instr + of I.||(i1, i2) => M.||(trans(i1, env), trans(i2, env)) + | I.&(i1, i2) => M.&(trans(i1, env), trans(i2, env)) + | I.nothing => M.nothing + | I.stop => M.stop() + | I.suspend => M.suspend() + | I.action act => M.action act + | I.exec f => M.exec f + | I.ifThenElse(pred, i1, i2) => + M.ifThenElse(pred, trans(i1, env), trans(i2, env)) + | I.repeat(cnt, i) => M.repeat(cnt, trans(i, env)) + | I.loop i => M.loop(trans(i, env)) + | I.close i => M.close(trans(i, env)) + | I.signal(s, i) => trans(i, AMap.insert(env, s, newSignal s)) + | I.rebind(s1, s2, i) => + trans(i, AMap.insert(env, s2, bindSig(env, s1))) + | I.emit s => M.emit(bindSig(env, s)) + | I.await cfg => M.await(transConfig(cfg, env)) + | I.when(cfg, i1, i2) => + M.when(transConfig(cfg, env), trans(i1, env), trans(i2, env)) + | I.trapWith(cfg, i1, i2) => + M.trapWith(transConfig(cfg, env), trans(i1, env), trans(i2, env)) + (* end case *)) + and transConfig (cfg, env) = let + fun transCfg (I.posConfig s) = I.posConfig(bindSig(env, s)) + | transCfg (I.negConfig s) = I.negConfig(bindSig(env, s)) + | transCfg (I.orConfig(cfg1, cfg2)) = + I.orConfig(transCfg cfg1, transCfg cfg2) + | transCfg (I.andConfig(cfg1, cfg2)) = + I.andConfig(transCfg cfg1, transCfg cfg2) + in + transCfg cfg + end + val inputs' = List.map newSignal inputs + val outputs' = List.map newSignal outputs + fun ins (s as M.SIG{name, ...}, env) = AMap.insert(env, name, s) + val initialEnv = + List.foldl ins (List.foldl ins AMap.empty inputs') outputs' + val body' = trans (body, initialEnv) + in + M.M{ + now = ref 0, + moveFlg = ref false, + endOfInstant = ref false, + prog = body', + signals = !sigList, + inputs = inputs', + outputs = outputs' + } + end + + val run = M.runMachine + val reset = M.resetMachine + val inputsOf = M.inputsOf + val outputsOf = M.outputsOf + val inputSignal = M.inputSignal + val outputSignal = M.outputSignal + val setInSignal = M.setInSignal + val getInSignal = M.getInSignal + val getOutSignal = M.getOutSignal + + val posConfig = I.posConfig + val negConfig = I.negConfig + val orConfig = I.orConfig + val andConfig = I.andConfig + + val || = I.|| + val & = I.& + val nothing = I.nothing + val stop = I.stop + val suspend = I.suspend + val action = I.action + val exec = I.exec + val ifThenElse = I.ifThenElse + val repeat = I.repeat + val loop = I.loop + val close = I.close + val signal = I.signal + val rebind = I.rebind + val when = I.when + val trapWith = I.trapWith + fun trap (c, i) = I.trapWith(c, i, I.nothing) + val emit = I.emit + val await = I.await + + end diff --git a/smlnj-lib/RegExp/BackEnd/.cm/GUID/bt-engine.sml b/smlnj-lib/RegExp/BackEnd/.cm/GUID/bt-engine.sml new file mode 100644 index 0000000..b2623a3 --- /dev/null +++ b/smlnj-lib/RegExp/BackEnd/.cm/GUID/bt-engine.sml @@ -0,0 +1 @@ +guid-$/(regexp-lib.cm):BackEnd/bt-engine.sml-1714016092.375 diff --git a/smlnj-lib/RegExp/BackEnd/.cm/GUID/dfa-engine.sml b/smlnj-lib/RegExp/BackEnd/.cm/GUID/dfa-engine.sml new file mode 100644 index 0000000..adafe60 --- /dev/null +++ b/smlnj-lib/RegExp/BackEnd/.cm/GUID/dfa-engine.sml @@ -0,0 +1 @@ +guid-$/(regexp-lib.cm):BackEnd/dfa-engine.sml-1714016092.742 diff --git a/smlnj-lib/RegExp/BackEnd/.cm/GUID/engine-sig.sml b/smlnj-lib/RegExp/BackEnd/.cm/GUID/engine-sig.sml new file mode 100644 index 0000000..e0bf158 --- /dev/null +++ b/smlnj-lib/RegExp/BackEnd/.cm/GUID/engine-sig.sml @@ -0,0 +1 @@ +guid-$/(regexp-lib.cm):BackEnd/engine-sig.sml-1714016092.372 diff --git a/smlnj-lib/RegExp/BackEnd/.cm/GUID/fsm.sml b/smlnj-lib/RegExp/BackEnd/.cm/GUID/fsm.sml new file mode 100644 index 0000000..2c2d7ac --- /dev/null +++ b/smlnj-lib/RegExp/BackEnd/.cm/GUID/fsm.sml @@ -0,0 +1 @@ +guid-$/(regexp-lib.cm):BackEnd/fsm.sml-1714016092.562 diff --git a/smlnj-lib/RegExp/BackEnd/.cm/GUID/thompson-engine.sml b/smlnj-lib/RegExp/BackEnd/.cm/GUID/thompson-engine.sml new file mode 100644 index 0000000..ae970f6 --- /dev/null +++ b/smlnj-lib/RegExp/BackEnd/.cm/GUID/thompson-engine.sml @@ -0,0 +1 @@ +guid-$/(regexp-lib.cm):BackEnd/thompson-engine.sml-1714016092.490 diff --git a/smlnj-lib/RegExp/BackEnd/.cm/SKEL/bt-engine.sml b/smlnj-lib/RegExp/BackEnd/.cm/SKEL/bt-engine.sml new file mode 100644 index 0000000..77ccfae --- /dev/null +++ b/smlnj-lib/RegExp/BackEnd/.cm/SKEL/bt-engine.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f3d"List"MatchTree"d"Option"ad"BackTrackEngine"jh2ad"S"gp1d"RegExpSyntax"ad"M"gp1 gp1c"REGEXP_ENGINE" \ No newline at end of file diff --git a/smlnj-lib/RegExp/BackEnd/.cm/SKEL/dfa-engine.sml b/smlnj-lib/RegExp/BackEnd/.cm/SKEL/dfa-engine.sml new file mode 100644 index 0000000..0770348 --- /dev/null +++ b/smlnj-lib/RegExp/BackEnd/.cm/SKEL/dfa-engine.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f3d"RegExpSyntax"MatchTree"d"Vector"ad"DfaEngine"jh2ad"D"gp1d"Dfa"ad"M"gp1gp1c"REGEXP_ENGINE" \ No newline at end of file diff --git a/smlnj-lib/RegExp/BackEnd/.cm/SKEL/engine-sig.sml b/smlnj-lib/RegExp/BackEnd/.cm/SKEL/engine-sig.sml new file mode 100644 index 0000000..ff80430 --- /dev/null +++ b/smlnj-lib/RegExp/BackEnd/.cm/SKEL/engine-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f3d"StringCvt"d"RegExpSyntax"d"MatchTree"ac"REGEXP_ENGINE"h0 \ No newline at end of file diff --git a/smlnj-lib/RegExp/BackEnd/.cm/SKEL/fsm.sml b/smlnj-lib/RegExp/BackEnd/.cm/SKEL/fsm.sml new file mode 100644 index 0000000..b6f7a31 --- /dev/null +++ b/smlnj-lib/RegExp/BackEnd/.cm/SKEL/fsm.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d5Cf6d"ListPair"CRegExpSyntax"d"Char"d"List"d"Int"d"TextIO"NaNFA"h1aIntSet"gp1c"ORD_SET"aNfa"jh9aS"gp1a0ListSetFn"Int2Set"6MoveSet"6CaCharSet"gp2)&I"I2" M"C"&Ngp1?aDFA"7ad"Dfa"jh9aN"gp1agp2aIntSetSet"6Ca&6ad"IS"gp19Cad"A2"gp1d"Array2"ad"A"gp1d"Array"ad"Map"j7gp1e"ListMapFn"Ngp1-N \ No newline at end of file diff --git a/smlnj-lib/RegExp/BackEnd/.cm/SKEL/thompson-engine.sml b/smlnj-lib/RegExp/BackEnd/.cm/SKEL/thompson-engine.sml new file mode 100644 index 0000000..997bac3 --- /dev/null +++ b/smlnj-lib/RegExp/BackEnd/.cm/SKEL/thompson-engine.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f5Cd"StringCvt"d"List"MatchTree"d"Array"d"Vector"Nad"ThompsonEngine"jh3aRE"gp1d"RegExpSyntax"ad"CSet"gp2 d"CharSet"ad"M"gp1gp1c"REGEXP_ENGINE" \ No newline at end of file diff --git a/smlnj-lib/RegExp/BackEnd/.cm/amd64-unix/bt-engine.sml b/smlnj-lib/RegExp/BackEnd/.cm/amd64-unix/bt-engine.sml new file mode 100644 index 0000000..8b56a0d Binary files /dev/null and b/smlnj-lib/RegExp/BackEnd/.cm/amd64-unix/bt-engine.sml differ diff --git a/smlnj-lib/RegExp/BackEnd/.cm/amd64-unix/dfa-engine.sml b/smlnj-lib/RegExp/BackEnd/.cm/amd64-unix/dfa-engine.sml new file mode 100644 index 0000000..dfd775e Binary files /dev/null and b/smlnj-lib/RegExp/BackEnd/.cm/amd64-unix/dfa-engine.sml differ diff --git a/smlnj-lib/RegExp/BackEnd/.cm/amd64-unix/engine-sig.sml b/smlnj-lib/RegExp/BackEnd/.cm/amd64-unix/engine-sig.sml new file mode 100644 index 0000000..3c6f779 Binary files /dev/null and b/smlnj-lib/RegExp/BackEnd/.cm/amd64-unix/engine-sig.sml differ diff --git a/smlnj-lib/RegExp/BackEnd/.cm/amd64-unix/fsm.sml b/smlnj-lib/RegExp/BackEnd/.cm/amd64-unix/fsm.sml new file mode 100644 index 0000000..654059c Binary files /dev/null and b/smlnj-lib/RegExp/BackEnd/.cm/amd64-unix/fsm.sml differ diff --git a/smlnj-lib/RegExp/BackEnd/.cm/amd64-unix/thompson-engine.sml b/smlnj-lib/RegExp/BackEnd/.cm/amd64-unix/thompson-engine.sml new file mode 100644 index 0000000..efd7ca5 Binary files /dev/null and b/smlnj-lib/RegExp/BackEnd/.cm/amd64-unix/thompson-engine.sml differ diff --git a/smlnj-lib/RegExp/BackEnd/bt-engine.sml b/smlnj-lib/RegExp/BackEnd/bt-engine.sml new file mode 100644 index 0000000..cf48c21 --- /dev/null +++ b/smlnj-lib/RegExp/BackEnd/bt-engine.sml @@ -0,0 +1,230 @@ +(* bt-engine.sml + * + * COPYRIGHT (c) 2008 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Implements a regular expressions matcher based on a backtracking search. + *) + +structure BackTrackEngine : REGEXP_ENGINE = + struct + + exception Error + + structure S = RegExpSyntax + structure M = MatchTree + + type regexp = S.syntax + + (* a match specifies the position (as a stream) and the length of the match *) + type 'a match = {pos : 'a, len : int} MatchTree.match_tree + + fun compile r = r + + fun scan (regexp,getc,pos,stream) = + let fun getc' (s) = (case (getc (s)) + of SOME v => v + | NONE => raise Subscript) + (* This function gets an empty match structure, for when the appropriate + * alternative is not followed at all + *) + fun getMatchStructure (S.Group e) = (*[M.Match (NONE,getMatchStructure e)]*) getMatchStructure e + | getMatchStructure (S.Alt l) = List.concat (map getMatchStructure l) + | getMatchStructure (S.Concat l) = List.concat (map getMatchStructure l) + | getMatchStructure (S.Interval (e,_,_)) = getMatchStructure e + | getMatchStructure (_) = [] + (* Walk a regular expression in continuation-passing style + * The continuation is simply a list of all that is left to do + * Continuations only seem to arise when concatenation is considered + * + * Walk returns the boolean status of the beast, and a match_tree + * containing the match information. + * Also: the last position scanned and the remainder stream + * MODIFICATION: walk returns a list of matches + * (because we need to extract the longest match) + *) + fun max [] sel = raise Error + | max (x::xs) sel = + let fun max' [] curr currSel = curr + | max' (x::xs) curr currSel = let val xSel = sel(x) + in if (xSel>currSel) + then max' xs x xSel + else max' xs curr currSel + end + in + max' xs x (sel x) + end + fun longest l = max l (#3: 'a * 'b * int * 'c -> int) + fun optMinus1 (SOME i) = SOME (i-1) + | optMinus1 NONE = NONE + fun walk (S.Group e,cont,p,inits) = + (case walk (e,[],p,inits) + of [] => [(false,[],p,inits)] + | ((b,matches,last,s)::ls) => + let fun loop [] cLast 1 cCont cList = + let val [(b,matches,last,s)] = cList + val [(b',matches',last',s')] = cCont + in + [(b', (M.Match ({pos=inits, len=last-p}, + matches))::matches',last',s')] + end + | loop [] cLast n cCont cList = raise Error + | loop ((b,matches,last,s)::es) cLen cNum cCont cList = + let val v as (_,_,last',_) = longest (walk (S.Concat [], cont,last,s)) + in + if (last' > cLen) + then loop es last' 1 [v] [(b,matches,last,s)] + else if (last' = cLen) + then loop es cLen (cNum+1) (v::cCont) + ((b,matches,last,s)::cList) + else loop es cLen cNum cCont cList + end + in + loop ls last 1 [longest(walk (S.Concat [],cont,last,s))] + [(b,matches,last,s)] + end) + | walk (S.Alt [],[],p,inits) = [(true,[],p,inits)] + | walk (S.Alt [], (c::cs),p,inits) = walk (c,cs,p,inits) + | walk (S.Alt l, cont,p,inits) = + let fun loop [] = [] + | loop (e::es) = let val g = longest (walk (e,cont,p,inits)) + in + if (#1 g) + then g::(loop es) + else loop es + end + in + loop l + end + | walk (S.Concat [],[],p,inits) = [(true,[],p,inits)] + | walk (S.Concat [], (c::cs),p,inits) = walk (c,cs,p,inits) + | walk (S.Concat (e::es),cont,p,inits) = walk (e,(es@cont),p,inits) + | walk (S.Interval (e,0,SOME 0),[],p,inits) = [(true,[],p,inits)] + | walk (S.Interval (e,0,SOME 0),(c::cs),p,inits) = walk (c,cs,p,inits) + | walk (S.Interval(e,0,k), cont, p, inits) = + let val (b',matches',last',s') = longest (walk (S.Concat [],cont,p,inits)) + val (b,matches,last,s) = longest (walk (S.Interval (e,1,k),cont,p,inits)) + in + if ((b andalso b' andalso last >= last') orelse (b andalso (not b'))) + then [(b,matches,last,s)] + else if ((b' andalso b andalso last' > last) orelse (b' andalso (not b))) +(* FIXME: getMatchStructure isn't really doing anything so we need to fix this code. But how? *) + then [(b',(getMatchStructure e)@matches',last',s')] + else [(false,[],p,inits)] + end + | walk (S.Interval (e,1,SOME 1),cont,p,inits) = walk (e,cont,p,inits) + | walk (S.Interval (e,1,k),cont,p,inits) = + let val (b',matches',last',s') = longest (walk (e,[],p,inits)) (* need to match 1 *) + in + if (not b') + then [(false, [], p, inits)] + else let val (b,matches,last,s) = longest (walk (S.Interval (e,1,optMinus1 k), + cont,last',s')) + val (b'',matches'',last'',s'') = longest (walk (S.Concat [], + cont,last',s')) + in + if (b andalso b'' andalso last'' >= last) + then [(b'',matches'@matches'',last'',s'')] + else if (b) + then [(b,matches,last,s)] + else [(b'',matches'@matches'',last'',s'')] + end + end + | walk (S.Interval(e,n1,k), cont, p, inits) = + walk (S.Concat [e,S.Interval (e,n1-1,optMinus1 k)],cont,p,inits) + | walk (S.MatchSet set,[],p,inits) = + if (S.CharSet.isEmpty set) + then [(true,[],p,inits)] + else + (case (getc (inits)) + of SOME (chr,s) => + let val b = S.CharSet.member (set,chr) + in + [(b,[],p+(if b then 1 else 0),(if b then s else inits))] + end + | NONE => [(false,[],p,inits)]) + | walk (S.MatchSet set,(c::cs),p,inits) = + if (S.CharSet.isEmpty set) + then walk (c,cs,p,inits) + else (case (getc (inits)) + of SOME (chr,s) => + if (S.CharSet.member (set,chr)) + then walk (c,cs,(p+1),s) + else [(false,[],p,inits)] + | NONE => [(false,[],p,inits)]) + | walk (S.NonmatchSet set,[],p,inits) = + (case (getc (inits)) + of SOME (chr,s) => + let val b = not (S.CharSet.member (set,chr)) + in + [(b, [], p+(if b then 1 else 0),(if b then s else inits))] + end + | NONE => [(false,[],p,inits)]) + | walk (S.NonmatchSet set,(c::cs),p,inits) = + (case (getc (inits)) + of SOME (chr,s) => if (S.CharSet.member (set,chr)) + then [(false,[],p,inits)] + else walk (c,cs,(p+1),s) + | NONE => [(false,[],p,inits)]) + | walk (S.Char ch,[],p,inits) = + (case (getc (inits)) + of SOME (chr,s) => + let val b = (chr = ch) + in + [(b, [],p+(if b then 1 else 0),(if b then s else inits))] + end + | NONE => [(false,[],p,inits)]) + | walk (S.Char ch,(c::cs),p,inits) = + (case (getc (inits)) + of SOME (chr,s) => if (chr = ch) + then walk (c,cs,(p+1),s) + else [(false,[],p,inits)] + | NONE => [(false,[],p,inits)]) + | walk (S.Begin,[],p,inits) = [(p=0,[],p,inits)] + | walk (S.Begin,(c::cs),p,inits) = if (p=0) + then walk (c,cs,p,inits) + else [(false,[],p,inits)] + | walk (S.End,[],p,inits) = [(not (Option.isSome (getc (inits))),[],p,inits)] + | walk (S.End,(c::cs),p,inits) = if (Option.isSome (getc (inits))) + then [(false,[],p,inits)] + else walk (c,cs,p,inits) + val l = walk (regexp,[],pos,stream) handle Subscript => [(false,[],pos,stream)] + val v as (result,matches,last,s') = longest l handle _ => (false,[],pos,stream) + in + if result then SOME(M.Match ({pos=stream,len=last-pos}, matches),s') + else NONE + end + + fun prefix regexp getc stream = scan (regexp,getc,0,stream) + + fun find regexp getc stream = + let fun loop (p,s) = (case (scan (regexp,getc,p,s)) + of NONE => (case (getc (s)) + of SOME (_,s') => loop (p+1,s') + | NONE => NONE) + | SOME v => SOME v) + in + loop (0,stream) + end + + fun match [] getc stream = NONE + | match l getc stream = + let val m = map (fn (r,f) => (scan (r, getc,0,stream), f)) l + (* find the longest SOME *) + fun loop ([],max,len) = max + | loop ((NONE,_)::xs,max,maxlen) = loop (xs,max,maxlen) + | loop ((SOME(m,cs),f)::xs,max,maxlen) = + let val {len, pos} = MatchTree.root m + in + if (len>maxlen) + then loop (xs,(SOME(m,cs),f),len) + else loop (xs,max,maxlen) + end + val (max,f) = loop (tl(m),hd(m),~1) + in + case max + of NONE => NONE + | SOME (m,cs) => SOME (f m,cs) + end + + end diff --git a/smlnj-lib/RegExp/BackEnd/dfa-engine.sml b/smlnj-lib/RegExp/BackEnd/dfa-engine.sml new file mode 100644 index 0000000..1e52316 --- /dev/null +++ b/smlnj-lib/RegExp/BackEnd/dfa-engine.sml @@ -0,0 +1,81 @@ +(* dfa-engine.sml + * + * COPYRIGHT (c) 2008 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Implements a matcher engine based on deterministic finite + * automata. + *) + +structure DfaEngine : REGEXP_ENGINE = + struct + + structure D = Dfa + structure M = MatchTree + + type regexp = D.dfa + + (* a match specifies the position (as a stream) and the length of the match *) + type 'a match = {pos : 'a, len : int} MatchTree.match_tree + + fun compile r = D.build r handle _ => raise RegExpSyntax.CannotCompile + + (* scan looks at a stream and attempts to match the dfa. + * it returns `NONE` if it fails + * it returns `SOME(pattern#, Match, rest of stream)` upon success + *) + fun scan (regexp, getc, p, stream) = let + val move = D.move regexp + val accepting = D.accepting regexp + fun loop (state, p, inits, lastAccepting) = (case getc inits + of NONE => lastAccepting + | SOME(c, s') => (case move (state, c) + of NONE => lastAccepting + | SOME new => (case accepting new + of SOME n => loop (new, p+1, s', SOME(p+1,s',n)) + | NONE => loop (new, p+1, s', lastAccepting) + (* end case *)) + (* end case *)) + (* end case *)) + fun try0 stream = (case (accepting 0) + of (SOME n) => SOME(n, M.Match({pos=stream,len=0},[]), stream) + | NONE => NONE + (* end case *)) + in + case getc stream + of NONE => try0 stream + | SOME(c, s') => (case loop (0, p, stream, NONE) + of NONE => try0 stream + | SOME(last, cs, n) => + SOME(n, M.Match({pos=stream,len=last-p},[]), cs) + (* end case *)) + (* end case *) + end + + fun prefix regexp getc stream = ( + case scan (regexp,getc,0,stream) + of NONE => NONE + | SOME (n,m,cs) => SOME (m,cs) + (* end case *)) + + fun find regexp getc stream = + let fun loop (p,s) = (case (scan (regexp,getc,p,s)) + of NONE => (case (getc (s)) + of SOME (_,s') => loop (p+1,s') + | NONE => NONE) + | SOME (n,m,cs) => SOME (m,cs)) + in + loop (0,stream) + end + + fun match [] = (fn getc => fn stream => NONE) + | match l = + let val dfa = D.buildPattern (map #1 l) + val a = Vector.fromList (map (fn (a,b) => b) l) + in + fn getc => fn stream => case (scan (dfa,getc,0,stream)) + of NONE => NONE + | SOME(n,m,cs) => SOME((Vector.sub (a,n)) m,cs) + end + + end diff --git a/smlnj-lib/RegExp/BackEnd/engine-sig.sml b/smlnj-lib/RegExp/BackEnd/engine-sig.sml new file mode 100644 index 0000000..4cde4d8 --- /dev/null +++ b/smlnj-lib/RegExp/BackEnd/engine-sig.sml @@ -0,0 +1,43 @@ +(* engine-sig.sml + * + * COPYRIGHT (c) 2008 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +signature REGEXP_ENGINE = + sig + + type regexp + (* the type of a compiled regular expression + *) + + (* a match specifies the position (as a stream) and the length of the match *) + type 'a match = {pos : 'a, len : int} MatchTree.match_tree + + val compile : RegExpSyntax.syntax -> regexp + (* compile a regular expression from the abstract syntax + *) + + val find : regexp -> (char,'a) StringCvt.reader -> ('a match, 'a) StringCvt.reader + (* scan the stream for the first occurence of the regular expression. The call + * + * find re getc strm + * + * returns NONE if the end of stream is reached without a match. Otherwise it + * returns SOME(match, strm'), where match is the match-tree for the match and + * strm' is the stream following the match. + *) + + val prefix : regexp ->(char,'a) StringCvt.reader -> ('a match, 'a) StringCvt.reader + (* attempt to match the stream at the current position with the + * regular expression + *) + + val match : (RegExpSyntax.syntax * ('a match -> 'b)) list + -> (char,'a) StringCvt.reader -> ('b, 'a) StringCvt.reader + (* attempt to the match the stream at the current position with one of + * the abstract syntax representations of regular expressions and trigger + * the corresponding action + *) + + end diff --git a/smlnj-lib/RegExp/BackEnd/fsm.sml b/smlnj-lib/RegExp/BackEnd/fsm.sml new file mode 100644 index 0000000..5d75729 --- /dev/null +++ b/smlnj-lib/RegExp/BackEnd/fsm.sml @@ -0,0 +1,504 @@ +(* fsm.sml + * + * COPYRIGHT (c) 2008 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Non-deterministic and deterministic finite-state machines. + *) + +signature NFA = + sig + + exception SyntaxNotHandled + + structure IntSet : ORD_SET where type Key.ord_key = int + + type nfa + + val build : RegExpSyntax.syntax * int -> nfa + val buildPattern : RegExpSyntax.syntax list -> nfa + val start : nfa -> IntSet.set + val move : nfa -> int * char -> IntSet.set + val chars : nfa -> int -> char list + val accepting : nfa -> int -> int option + + val print : nfa -> unit + end + +structure Nfa : NFA = + struct + + exception SyntaxNotHandled + + datatype move = Move of int * char option * int + + fun compareCharOption (NONE, NONE) = EQUAL + | compareCharOption (NONE, SOME c) = LESS + | compareCharOption (SOME c, NONE) = GREATER + | compareCharOption (SOME c, SOME c') = Char.compare (c,c') + + structure S = RegExpSyntax + structure IntSet = ListSetFn ( + struct + type ord_key = int + val compare = Int.compare + end) + structure Int2Set = ListSetFn ( + struct + type ord_key = int * int + fun compare ((i1,i2), (j1,j2)) = (case Int.compare (i1,j1) + of EQUAL => Int.compare (i2,j2) + | v => v + (* end case *)) + end) + structure MoveSet = ListSetFn ( + struct + type ord_key = move + fun compare (Move (i,c,j),Move (i',c',j')) = (case (Int.compare (i,i')) + of EQUAL => (case (compareCharOption (c, c')) + of EQUAL => Int.compare (j,j') + | v => v) + | v => v) + end) + structure CharSet = S.CharSet + + structure I = IntSet + structure I2 = Int2Set + structure M = MoveSet + structure C = CharSet + + (* create sets from lists *) + fun iList l = I.addList (I.empty,l) + fun mList l = M.addList (M.empty,l) + + datatype nfa = Nfa of {states : I.set, + moves : M.set, + accepting : I2.set} + + fun print (Nfa {states,moves,accepting}) = + let val pr = TextIO.print + val prI = TextIO.print o Int.toString + val prI2 = TextIO.print o (fn (i1,i2) => (Int.toString i1)) + val prC = TextIO.print o Char.toString + in + pr ("States: 0 -> "); + prI (I.numItems (states)-1); + pr "\nAccepting:"; + I2.app (fn k => (pr " "; prI2 k)) accepting; + pr "\nMoves\n"; + M.app (fn (Move (i,NONE,d)) => (pr " "; + prI i; + pr " --@--> "; + prI d; + pr "\n") + | (Move (i,SOME c,d)) => (pr " "; + prI i; + pr " --"; + prC c; + pr "--> "; + prI d; + pr "\n")) moves + end + + fun nullAccept n = Nfa {states=iList [0,1], moves=M.add (M.empty, Move (0,NONE,1)), + accepting=I2.singleton (1,n)} + fun nullRefuse n = Nfa {states=iList [0,1], moves=M.empty, + accepting=I2.singleton (1,n)} + + fun renumber n st = n + st + fun renumberMove n (Move (s,c,s')) = Move (renumber n s, c, renumber n s') + fun renumberAcc n (st,n') = (n+st,n') + + fun build' n (S.Group e) = build' n e + | build' n (S.Alt l) = + foldr (fn (Nfa {states=s1, + moves=m1,...}, + Nfa {states=s2, + moves=m2,...}) => + let val k1 = I.numItems s1 + val k2 = I.numItems s2 + val s1' = I.map (renumber 1) s1 + val s2' = I.map (renumber (k1+1)) s2 + val m1' = M.map (renumberMove 1) m1 + val m2' = M.map (renumberMove (k1+1)) m2 + in + Nfa {states=I.addList (I.union (s1',s2'), + [0,k1+k2+1]), + moves=M.addList (M.union (m1',m2'), + [Move (0,NONE,1), + Move (0,NONE,k1+1), + Move (k1,NONE,k1+k2+1), + Move (k1+k2,NONE,k1+k2+1)]), + accepting=I2.singleton (k1+k2+1,n)} + end) + (nullRefuse n) (map (build' n) l) + | build' n (S.Concat l) = + foldr (fn (Nfa {states=s1,moves=m1,...}, + Nfa {states=s2,moves=m2,accepting}) => + let val k = I.numItems s1 - 1 + val s2' = I.map (renumber k) s2 + val m2' = M.map (renumberMove k) m2 + val accepting' = I2.map (renumberAcc k) accepting + in + Nfa {states=I.union (s1,s2'), + moves=M.union (m1,m2'), + accepting=accepting'} + end) + (nullAccept n) (map (build' n) l) + | build' n (S.Interval(e, 0, SOME 1)) = build' n (S.Alt [S.Concat [], e]) + | build' n (S.Interval(e, 0, NONE)) = + build' n (S.Alt [S.Concat [], S.posClosure e]) + | build' n (S.Interval(e, 1, NONE)) = let + val (Nfa {states,moves,...}) = build' n e + val m = I.numItems states + in + Nfa { + states=I.add (states,m), + moves=M.addList (moves, [Move (m-1,NONE,m), Move (m-1,NONE,0)]), + accepting=I2.singleton (m,n) + } + end + | build' n (S.Interval(e, n1, n2)) = raise SyntaxNotHandled + | build' n (S.MatchSet s) = + if (S.CharSet.isEmpty s) then nullAccept (n) + else + let val moves = S.CharSet.foldl (fn (c,moveSet) => M.add (moveSet,Move (0,SOME c,1))) + M.empty s + in + Nfa {states=iList [0,1], + moves=moves, + accepting=I2.singleton (1,n)} + end + | build' n (S.NonmatchSet s) = + let val moves = S.CharSet.foldl (fn (c,moveSet) => M.add (moveSet,Move (0,SOME c,1))) + M.empty (S.CharSet.difference (S.allChars,s)) + in + Nfa {states=iList [0,1], + moves=moves, + accepting=I2.singleton (1,n)} + end + | build' n (S.Char c) = Nfa {states=iList [0,1], + moves=M.singleton (Move (0,SOME c,1)), + accepting=I2.singleton (1,n)} + | build' n (S.Begin) = raise SyntaxNotHandled + | build' n (S.End) = raise SyntaxNotHandled + + + fun build (r,n) = let val (Nfa {states,moves,accepting}) = build' n r + (* Clean up the nfa to remove epsilon moves. + * A simple way to do this: + * 1. states={0}, moves={} + * 2. for every s in states, + * 3. compute closure(s) + * 4. for any move (i,c,o) with i in closure (s) + * 5. add move (0,c,o) to moves + * 6. add state o to states + * 7. repeat until no modifications to states and moves + *) + in + Nfa {states=states, moves=moves, accepting=accepting} + end + + fun buildPattern rs = + let fun loop ([],_) = [] + | loop (r::rs,n) = (build (r,n))::(loop (rs,n+1)) + val rs' = loop (rs,0) + val renums = foldr (fn (Nfa {states,...},acc) => 1::(map (fn k=>k+I.numItems states) + acc)) [] rs' + val news = ListPair.map (fn (Nfa {states,moves,accepting},renum) => + let val newStates=I.map (renumber renum) states + val newMoves=M.map (renumberMove renum) moves + val newAcc=I2.map (renumberAcc renum) accepting + in + Nfa{states=newStates, + moves=newMoves, + accepting=newAcc} + end) (rs',renums) + val (states,moves,accepting) = foldl (fn (Nfa{states,moves,accepting},(accS,accM,accA))=> + (I.union (states,accS), + M.union (moves,accM), + I2.union (accepting,accA))) + (I.singleton 0, + M.addList (M.empty, + map (fn k => Move (0,NONE,k)) renums), + I2.empty) news + in + Nfa {states=states,moves=moves,accepting=accepting} + + end + + fun accepting (Nfa {accepting,...}) state = + let val item = I2.find (fn (i,_) => (i=state)) accepting + in + case item + of NONE => NONE + | SOME (s,n) => SOME (n) + end + + (* Compute possible next states from orig with character c *) + fun oneMove (Nfa {moves,...}) (orig,char) = + M.foldr (fn (Move (_,NONE,_),set) => set + | (Move (or,SOME c,d),set) => + if (c=char) andalso (or=orig) + then I.add (set,d) + else set) + I.empty moves + + fun closure (Nfa {moves,...}) origSet = + let fun addState (Move (orig,NONE,dest),(b,states)) = + if (I.member (states,orig) andalso + not (I.member (states,dest))) + then (true,I.add (states,dest)) + else (b,states) + | addState (_,bs) = bs + fun loop (states) = + let val (modified,new) = M.foldr addState + (false,states) moves + in + if modified + then loop (new) + else new + end + in + loop (origSet) + end + + fun move nfa = + let val closure = closure nfa + val oneMove = oneMove nfa + in + closure o oneMove + end + + fun start nfa = closure nfa (I.singleton 0) + + fun chars (Nfa{moves,...}) state = let + fun f (Move(s1, SOME c, s2), s) = + if (s1 = state) then C.add(s, c) else s + | f (_, s) = s + in + C.listItems (M.foldl f C.empty moves) + end + + end + +signature DFA = + sig + + exception SyntaxNotHandled + + type dfa + + val build : RegExpSyntax.syntax -> dfa + val buildPattern : RegExpSyntax.syntax list -> dfa + val move : dfa -> int * char -> int option + val accepting : dfa -> int -> int option + val canStart : dfa -> char -> bool + + end + +structure Dfa : DFA = + struct + + exception SyntaxNotHandled + + datatype move = Move of int * char option * int + + fun compareCharOption (NONE,NONE) = EQUAL + | compareCharOption (NONE,SOME (c)) = LESS + | compareCharOption (SOME(c),NONE) = GREATER + | compareCharOption (SOME(c),SOME(c')) = Char.compare (c,c') + + structure N = Nfa + structure IntSet = N.IntSet + structure IntSetSet = + ListSetFn (struct + type ord_key = IntSet.set + val compare = IntSet.compare + end) + structure Int2Set = + ListSetFn (struct + type ord_key = int * int + fun compare ((i1,i2),(j1,j2)) = + case (Int.compare (i1,j1)) + of EQUAL => Int.compare (i2,j2) + | v => v + end) + structure MoveSet = + ListSetFn (struct + type ord_key = move + fun compare (Move (i,c,j),Move (i',c',j')) = + (case (Int.compare (i,i')) + of EQUAL => + (case (compareCharOption (c,c')) + of EQUAL => Int.compare (j,j') + | v => v) + | v => v) + end) + structure CharSet = + ListSetFn (struct + type ord_key = char + val compare = Char.compare + end) + + structure IS = IntSetSet + structure I = IntSet + structure I2 = Int2Set + structure M = MoveSet + structure C = CharSet + structure A2 = Array2 + structure A = Array + structure Map = ListMapFn (struct + type ord_key = IntSet.set + val compare = IntSet.compare + end) + + (* create sets from lists *) + fun iList l = I.addList (I.empty,l) + fun mList l = M.addList (M.empty,l) + + datatype dfa = Dfa of {states : I.set, + moves : M.set, + accepting : I2.set, + table : int option A2.array, + accTable : (int option) A.array, + startTable : bool A.array} + + fun print (Dfa {states,moves,accepting,...}) = + let val pr = TextIO.print + val prI = TextIO.print o Int.toString + val prI2 = TextIO.print o (fn (i1,i2) => Int.toString i1) + val prC = TextIO.print o Char.toString + in + pr ("States: 0 -> "); + prI (I.numItems (states)-1); + pr "\nAccepting:"; + I2.app (fn k => (pr " "; prI2 k)) accepting; + pr "\nMoves\n"; + M.app (fn (Move (i,NONE,d)) => (pr " "; + prI i; + pr " --@--> "; + prI d; + pr "\n") + | (Move (i,SOME c,d)) => (pr " "; + prI i; + pr " --"; + prC c; + pr "--> "; + prI d; + pr "\n")) moves + end + + + fun move' moves (i,c) = + (case (M.find (fn (Move (s1,SOME c',s2)) => + (s1=i andalso c=c')) + moves) + of NONE => NONE + | SOME (Move (s1,SOME c',s2)) => SOME s2) +(* fun move (Dfa {moves,...}) (i,c) = move' moves (i,c) *) + fun move (Dfa {table,...}) (i,c) = A2.sub (table,i,ord(c)-ord(Char.minChar)) + + fun accepting' accepting i = I2.foldr (fn ((s,n),NONE) => if (s=i) + then SOME(n) + else NONE + | ((s,n),SOME(n')) => if (s=i) + then SOME(n) + else SOME(n')) + NONE accepting +(* fun accepting (Dfa {accepting,...}) i = accepting' accepting i *) + fun accepting (Dfa {accTable,...}) i = A.sub (accTable,i) + + fun canStart (Dfa {startTable,...}) c = A.sub (startTable,ord(c)) + + fun build' nfa = + let val move = N.move nfa + val accepting = N.accepting nfa + val start = N.start nfa + val chars = N.chars nfa + fun getAllChars (ps) = + I.foldl + (fn (s,cs) => C.addList (cs,chars s)) + C.empty ps + val initChars = getAllChars (start) + fun getAllStates (ps,c) = + I.foldl + (fn (s,ss) => I.union (ss,move (s,c))) + I.empty ps + fun loop ([],set,moves) = (set,moves) + | loop (x::xs,set,moves) = + let val cl = getAllChars (x) + val (nstack,sdu,ml) = + C.foldl + (fn (c,(ns,sd,ml)) => + let val u = getAllStates (x,c) + in + if (not (IS.member (set,u)) + andalso (not (IS.member (sd,u)))) + then (u::ns, + IS.add (sd,u), + (x,c,u)::ml) + else (ns,sd,(x,c,u)::ml) + end) ([],IS.empty,[]) cl + in + loop (nstack@xs,IS.union(set,sdu),ml@moves) + end + val (sSet,mList) = loop ([start],IS.singleton (start), []) + val num = ref 1 + fun new () = let val n = !num + in + num := n+1 ; n + end + val sMap = Map.insert (Map.empty, start, 0) + val sSet' = IS.delete (sSet,start) + val sMap = IS.foldl (fn (is,map) => Map.insert (map,is,new ())) + sMap sSet' + val states = I.addList (I.empty,List.tabulate(!num,fn x => x)) + val moves = M.addList (M.empty, + map (fn (is1,c,is2) => + Move (valOf (Map.find (sMap,is1)), + SOME c, + valOf (Map.find (sMap,is2)))) + mList) + (* Given a set of accepting states, look for a given state, + * with the minimal corresponding pattern number + *) + fun minPattern accSet = let val l = map (valOf o accepting) (I.listItems accSet) + fun loop ([],min) = min + | loop (n::ns,min) = + if (n + let val items = I.filter (fn k => + case (accepting k) + of SOME _ => true + | NONE => false) is + in + if (I.isEmpty items) + then cis + else + I2.add (cis,(valOf (Map.find (sMap,is)), + minPattern items)) + end) I2.empty sSet + val table = A2.tabulate A2.RowMajor (!num, + ord(Char.maxChar)-ord(Char.minChar)+1, + fn (s,c) => move' moves (s,chr(c+ord(Char.minChar)))) + val accTable = A.tabulate (!num, + fn (s) => accepting' accept s) + val startTable = A.tabulate (ord(Char.maxChar)- + ord(Char.minChar)+1, + fn (c) => C.member (initChars, + chr(c+ord(Char.minChar)))) + in + Dfa {states=states,moves=moves,accepting=accept, + table=table,accTable=accTable,startTable=startTable} + end + + fun build r = build' (N.build (r,0)) + + fun buildPattern rs = build' (N.buildPattern rs) + + end diff --git a/smlnj-lib/RegExp/BackEnd/thompson-engine.sml b/smlnj-lib/RegExp/BackEnd/thompson-engine.sml new file mode 100644 index 0000000..fe7953b --- /dev/null +++ b/smlnj-lib/RegExp/BackEnd/thompson-engine.sml @@ -0,0 +1,417 @@ +(* thompson-engine.sml + * + * COPYRIGHT (c) 2008 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This is an implementation of Ken Thompson's RE matchine algorithm from + * CACM (1968). It is based on the description of the algorithm by Russ + * Cox at http://swtch.com/~rsc/regexp/regexp1.html. + *) + +structure ThompsonEngine : REGEXP_ENGINE = + struct + + structure RE = RegExpSyntax + structure CSet = RE.CharSet + structure M = MatchTree + + (* a match specifies the position (as a stream) and the length of the match *) + type 'a match = {pos : 'a, len : int} MatchTree.match_tree + + (* intermediate representation of states *) + datatype state_kind + = CHR' of (char * state' ref) + | CSET' of (CSet.set * state' ref) + | NCSET' of (CSet.set * state' ref) + | SPLIT' of (state' ref * state' ref) + | BOL' of state' ref (* assert beginning of line *) + | EOL' of state' ref (* assert end of line *) + | FINAL' + + withtype state' = {id : int, kind : state_kind} + + (* the intermediate representation of an NFA *) + type frag = {start : state', out : state' ref list} + + (* return the ID of a state *) + fun idOf {id, kind} = id + + val final = {id = 0, kind = FINAL'} + + (* interpreter representation of states *) + datatype state + = CHR of (char * int) + | CSET of (CSet.set * int) + | NCSET of (CSet.set * int) + | SPLIT of (int * int) + | BOL of int (* assert beginning of line *) + | EOL of int (* assert end of line *) + | FINAL + + fun cvtState {id, kind} = (case kind + of CHR'(c, out) => CHR(c, idOf(!out)) + | CSET'(cset, out) => CSET(cset, idOf(!out)) + | NCSET'(cset, out) => NCSET(cset, idOf(!out)) + | SPLIT'(out1, out2) => SPLIT(idOf(!out1), idOf(!out2)) + | BOL' out => BOL(idOf(!out)) + | EOL' out => EOL(idOf(!out)) + | FINAL' => FINAL + (* end case *)) + + (* the representation of the NFA. The `start` field is the start state. + * The `state` vector maps state indices to states. By convention, + * state 0 is the accepting state. + *) + datatype regexp = RE of {start : int, states : state vector} + + fun compile re = let + (* the list of states; state 0 is always the accepting state *) + val nStates = ref 1 + val states = ref [final] + (* create new states *) + fun new kind = let + val id = !nStates + val s = {id = id, kind = kind} + in + states := s :: !states; + nStates := id+1; + s + end + fun newChr (c, out) = new (CHR'(c, out)) + fun newCset (cset, out) = new (CSET'(cset, out)) + fun newNcset (cset, out) = new (NCSET'(cset, out)) + fun newSplit (out1, out2) = new (SPLIT'(out1, out2)) + fun newBOL out = new (BOL' out) + fun newEOL out = new (EOL' out) + (* update the outputs of a fragment *) + fun setOuts (f : frag, s : state') = List.app (fn r => r := s) (#out f) + (* compile an RE *) + fun reComp re = (case re + of RE.Group re => reComp re + | RE.Alt[] => raise RE.CannotCompile + | RE.Alt[re] => reComp re + | RE.Alt(re::rest) => let + val f1 = reComp re + val f2 = reComp (RE.Alt rest) + val s = newSplit(ref(#start f1), ref(#start f2)) + in + {start = s, out = #out f1 @ #out f2} + end + | RE.Concat[] => raise RE.CannotCompile + | RE.Concat[re] => reComp re + | RE.Concat(re::rest) => cat (re, RE.Concat rest) + | RE.Interval(re, 0, SOME 1) => option re + | RE.Interval(re, 0, NONE) => closure re + | RE.Interval(re, 1, NONE) => posClosure re + | RE.Interval(re, min, optMax) => let + (* the suffix matches instances of `re` after the first `min` + * iterations. It is either `re*` (when `optMax` is `NONE`) + * or a sequence of `m - min` SPLITs, where one edge goes to + * out and the other goes to next SPLIT in the sequence + * (when `optMax` is `SOME m`). + *) + val suffix : frag = (case optMax + of NONE => closure re + | SOME m => let + val out = ref final + fun mkSuffix 0 = {start=final, out=[out]} + | mkSuffix i = let + val f = reComp re + val f' = mkSuffix(i-1) + val s = newSplit(out, ref(#start f)) + in + setOuts (f, #start f'); + {start = s, out = [out]} + end + in + if (m <= min) then raise RE.CannotCompile else (); + mkSuffix (m - min) + end + (* end case *)) + (* the prefix is `min` iterations of `re` *) + fun mkPrefix 0 = suffix + | mkPrefix i = let + val f = reComp re + val f' = mkPrefix (i-1) + in + setOuts (f, #start f'); + {start = #start f, out = #out f'} + end + in + mkPrefix min + end + | RE.MatchSet cset => let + val out = ref final + in + {start = newCset(cset, out), out = [out]} + end + | RE.NonmatchSet cset => let + val out = ref final + in + {start = newNcset(cset, out), out = [out]} + end + | RE.Char c => let + val out = ref final + in + {start = newChr(c, out), out = [out]} + end + | RE.Begin => let + val out = ref final + in + {start = newBOL out, out = [out]} + end + | RE.End => let + val out = ref final + in + {start = newEOL out, out = [out]} + end + (* end case *)) + (* compile re1 . re2 *) + and cat (re1, re2) = let + val f1 = reComp re1 + val f2 = reComp re2 + in + setOuts (f1, #start f2); + {start = #start f1, out = #out f2} + end + (* compile re? *) + and option re = let + val f = reComp re + val out = ref final + val s = newSplit(ref(#start f), out) + in + {start = s, out = out :: #out f} + end + (* compile re* *) + and closure re = let + val f = reComp re + val out = ref final + val s = newSplit(ref(#start f), out) + in + setOuts (f, s); + {start = s, out = [out]} + end + (* compile re+ *) + and posClosure re = let + val f = reComp re + val out = ref final + val s = newSplit(ref(#start f), out) + in + setOuts (f, s); + {start = #start f, out = [out]} + end + (* generate the intermediate state representation *) + val frag = reComp re + val _ = setOuts (frag, final) + (* convert the states to the final representation; note that we reverse the list + * so that the states are now in increasing order. + *) + val states = List.foldl (fn (s, l) => cvtState s :: l) [] (!states) + in + RE{ start = idOf(#start frag), states = Vector.fromList states } + end + +(* +DEBUG ** + fun stateToString (CHR(c, out)) = + concat["CHR (#\"", Char.toString c, "\", ", Int.toString out, ")"] + | stateToString (CSET(cs, out)) = concat["CSET (-, ", Int.toString out, ")"] + | stateToString (NCSET(cs, out)) = concat["NCSET (-, ", Int.toString out, ")"] + | stateToString (SPLIT(out1, out2)) = + concat["SPLIT (", Int.toString out1, ", ", Int.toString out2, ")"] + | stateToString (BOL out) = concat["BOL ", Int.toString out] + | stateToString (EOL out) = concat["EOL ", Int.toString out] + | stateToString FINAL = "FINAL" + fun dump (RE{start, states}) = let + fun prState st = print(stateToString st) + in + print(concat["start = ", Int.toString start, "\n"]); + Vector.appi + (fn (i, st) => (print(Int.toString i ^ ": "); prState st; print "\n")) + states + end +** -DEBUG *) + + (* is a stream at the end of line? *) + fun isEOL NONE = true + | isEOL (SOME(#"\n", _)) = true + | isEOL _ = false + + (* scan the stream for the first occurrence of the regular expression *) + fun scan (RE{start, states}, getc : (char,'a) StringCvt.reader) = let +(* DEBUG val _ = dump (RE{start=start, states=states}) *) + (* to make elimination of duplicates in a state set cheap, we map state IDs + * to a stamp of the last set that they were added to. + *) + val stamp = ref 0w1 + fun incr () = let val s = !stamp + 0w1 in stamp := s; s end + val lastStamp = Array.array(Vector.length states, 0w0) + (* conditionally add the epsilon closure of the state `id` to `stateList` *) + fun addState (isFirst, strm, stamp', stateList, id) = let + fun add (stateList, id) = + if (Array.sub(lastStamp, id) = stamp') + (* the state is already in the list *) + then stateList + else ( + Array.update(lastStamp, id, stamp'); + case Vector.sub(states, id) + of SPLIT(out1, out2) => add (add (stateList, out1), out2) + | BOL out => if isFirst + then add(stateList, out) + else stateList + | EOL out => if isEOL (getc strm) + then add(stateList, out) + else stateList + | state => state :: stateList + (* end case *)) + in + add (stateList, id) + end + (* get the list of start states by performing epsilon moves *) + fun startStates strm = let + val stamp' = incr() + in + addState (true, strm, stamp', [], start) + end + (* is the accepting state in the current set of states? *) + fun isMatch stamp = (Array.sub(lastStamp, 0) = stamp) + (* attempt to match the RE; the parameters are + * - isFirst true if the current stream position is the start of + * a line (or the input) + * - strm the initial stream to scan + *) + fun find' (isFirst, strm) = let + (* scanning the input; the parameters are + * - isFirst true if the current position is the start of + * a line (or the input) + * - n the number of characters matched so far + * - strm the current position of the input stream + * - nfaStates the current list of NFA states (guaranteed to be + * non-empty) + * - lastMatch if we were previously in a accepting state during + * this scan, then `lastMatch` is the value + * `SOME(startPos, k, strm')`, where `k` is the length + * of the match and `strm'` is stream position + * immediately following the match. + *) + fun scan (isFirst, n, strm, nfaStates, lastMatch) = (case getc strm + of NONE => lastMatch + | SOME(c, strm') => let + (* bump the stamp counter *) + val stamp' = incr() + (* compute the next set of NFA states by seeing if there + * is a transition labeled with the character `c` + *) + fun test ([], nextStates) = nextStates + | test (s::r, nextStates) = let + fun continue nextStates = test(r, nextStates) + fun add out = + continue( + addState ( + isFirst, strm', stamp', nextStates, out)) + in + case s + of CHR(c', out) => if (c = c') + then add out + else continue nextStates + | CSET(cset, out) => if CSet.member(cset, c) + then add out + else continue nextStates + | NCSET(cset, out) => if CSet.member(cset, c) + then continue nextStates + else add out + | _ => continue nextStates + (* end case *) + end + val next = test (nfaStates, []) + in +(* +DEBUG ** +print(concat[ +"{", String.concatWithMap "," stateToString nfaStates, "} -- ", +"#\"", Char.toString c, "\" --> {", +String.concatWithMap "," stateToString next, "}\n"]); +let val stamps = Array.foldri (fn (i, s, stmps) => (i, s)::stmps) [] lastStamp +fun w2s w = Int.toString(Word.toIntX w) +fun stamp2s (i, s) = concat[Int.toString i, ":", w2s s] +in +print(concat[ +" stamps [", String.concatWithMap "," stamp2s stamps, "] @ ", w2s stamp', "\n"]); +case lastMatch + of NONE => print " lastMatch = NONE\n" + | SOME(n, _) => print(concat[" lastMatch = SOME(", Int.toString n, ", -)\n"]) +(* end case *) +end; +** -DEBUG *) + case next + of [] => lastMatch + | _ => let + val n = n+1 + val isFirst = (c = #"\n") + val lastMatch = if isMatch stamp' + then SOME(n, strm') + else lastMatch + in + scan (isFirst, n, strm', next, lastMatch) + end + (* end case *) + end + (* end case *)) + val nfaStart = startStates strm + val lastMatch = if isMatch(!stamp) + then SOME(0, strm) + else NONE + in + scan (isFirst, 0, strm, nfaStart, lastMatch) + end (* find' *) + in + fn (isFirst, strm) => (case find'(isFirst, strm) + of SOME(n, strm') => SOME(M.Match({pos=strm, len=n}, []), strm') + | NONE => NONE + (* end case *)) + end + + fun find re getc strm = let + val scan = scan (re, getc) +(* TODO: this is potentially expensive backtracking at the top level; is we had +* support for groups, then we could modify the state machine to match ".*|(re)", +* which would avoid backtracking. +*) + fun loop (isFirst, s) = (case (scan (isFirst, s)) + of NONE => (case (getc s) + of SOME(#"\n", s') => loop (true, s') + | SOME(_, s') => loop (false, s') + | NONE => NONE + (* end case *)) + | someMatch => someMatch + (* end case *)) + in + loop (true, strm) + end + + fun prefix re getc strm = scan (re, getc) (true, strm) + + fun match [] = (fn getc => fn strm => NONE) + | match l = let + (* compile the REs *) + val l = List.map (fn (re, act) => (compile re, act)) l + fun match' getc strm = let + (* find the longest SOME *) + fun loop ([], max, _) = max + | loop ((re, act)::r, max, maxLen) = ( + case scan(re, getc) (true, strm) + of NONE => loop (r, max, maxLen) + | SOME(m as MatchTree.Match({len, ...}, _), cs) => + if (len > maxLen) + then loop (r, SOME(m, act, cs), len) + else loop (r, max, maxLen) + (* end case *)) + in + case loop (l, NONE, ~1) + of NONE => NONE + | SOME(m, act, cs) => SOME(act m, cs) + (* end case *) + end + in + match' + end + + end diff --git a/smlnj-lib/RegExp/FrontEnd/.cm/GUID/awk-syntax.sml b/smlnj-lib/RegExp/FrontEnd/.cm/GUID/awk-syntax.sml new file mode 100644 index 0000000..a312234 --- /dev/null +++ b/smlnj-lib/RegExp/FrontEnd/.cm/GUID/awk-syntax.sml @@ -0,0 +1 @@ +guid-$/(regexp-lib.cm):FrontEnd/awk-syntax.sml-1714016092.770 diff --git a/smlnj-lib/RegExp/FrontEnd/.cm/GUID/parser-sig.sml b/smlnj-lib/RegExp/FrontEnd/.cm/GUID/parser-sig.sml new file mode 100644 index 0000000..0dde61e --- /dev/null +++ b/smlnj-lib/RegExp/FrontEnd/.cm/GUID/parser-sig.sml @@ -0,0 +1 @@ +guid-$/(regexp-lib.cm):FrontEnd/parser-sig.sml-1714016092.465 diff --git a/smlnj-lib/RegExp/FrontEnd/.cm/GUID/syntax-sig.sml b/smlnj-lib/RegExp/FrontEnd/.cm/GUID/syntax-sig.sml new file mode 100644 index 0000000..6ec3df6 --- /dev/null +++ b/smlnj-lib/RegExp/FrontEnd/.cm/GUID/syntax-sig.sml @@ -0,0 +1 @@ +guid-$/(regexp-lib.cm):FrontEnd/syntax-sig.sml-1714016092.341 diff --git a/smlnj-lib/RegExp/FrontEnd/.cm/GUID/syntax.sml b/smlnj-lib/RegExp/FrontEnd/.cm/GUID/syntax.sml new file mode 100644 index 0000000..04b2c9b --- /dev/null +++ b/smlnj-lib/RegExp/FrontEnd/.cm/GUID/syntax.sml @@ -0,0 +1 @@ +guid-$/(regexp-lib.cm):FrontEnd/syntax.sml-1714016092.352 diff --git a/smlnj-lib/RegExp/FrontEnd/.cm/SKEL/awk-syntax.sml b/smlnj-lib/RegExp/FrontEnd/.cm/SKEL/awk-syntax.sml new file mode 100644 index 0000000..440920d --- /dev/null +++ b/smlnj-lib/RegExp/FrontEnd/.cm/SKEL/awk-syntax.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f2d"List"d"Int"ad"AwkSyntax"jh4ad"R"gp1d"RegExpSyntax"ad"SC"gp1d"StringCvt"ad"W8"gp1d"Word8"ad"C"gp1d"Char"gp1c"REGEXP_PARSER" \ No newline at end of file diff --git a/smlnj-lib/RegExp/FrontEnd/.cm/SKEL/parser-sig.sml b/smlnj-lib/RegExp/FrontEnd/.cm/SKEL/parser-sig.sml new file mode 100644 index 0000000..67f5245 --- /dev/null +++ b/smlnj-lib/RegExp/FrontEnd/.cm/SKEL/parser-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f2d"StringCvt"d"RegExpSyntax"ac"REGEXP_PARSER"h0 \ No newline at end of file diff --git a/smlnj-lib/RegExp/FrontEnd/.cm/SKEL/syntax-sig.sml b/smlnj-lib/RegExp/FrontEnd/.cm/SKEL/syntax-sig.sml new file mode 100644 index 0000000..769e3a7 --- /dev/null +++ b/smlnj-lib/RegExp/FrontEnd/.cm/SKEL/syntax-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ac"REGEXP_SYNTAX"h1ad"CharSet"gp1c"ORD_SET" \ No newline at end of file diff --git a/smlnj-lib/RegExp/FrontEnd/.cm/SKEL/syntax.sml b/smlnj-lib/RegExp/FrontEnd/.cm/SKEL/syntax.sml new file mode 100644 index 0000000..ce99f04 --- /dev/null +++ b/smlnj-lib/RegExp/FrontEnd/.cm/SKEL/syntax.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f2d"Char"d"List"ad"RegExpSyntax"jh1ad"CharSet"jh0gp1e"ListSetFn"gp1c"REGEXP_SYNTAX" \ No newline at end of file diff --git a/smlnj-lib/RegExp/FrontEnd/.cm/amd64-unix/awk-syntax.sml b/smlnj-lib/RegExp/FrontEnd/.cm/amd64-unix/awk-syntax.sml new file mode 100644 index 0000000..7e5c08b Binary files /dev/null and b/smlnj-lib/RegExp/FrontEnd/.cm/amd64-unix/awk-syntax.sml differ diff --git a/smlnj-lib/RegExp/FrontEnd/.cm/amd64-unix/parser-sig.sml b/smlnj-lib/RegExp/FrontEnd/.cm/amd64-unix/parser-sig.sml new file mode 100644 index 0000000..902f6f5 Binary files /dev/null and b/smlnj-lib/RegExp/FrontEnd/.cm/amd64-unix/parser-sig.sml differ diff --git a/smlnj-lib/RegExp/FrontEnd/.cm/amd64-unix/syntax-sig.sml b/smlnj-lib/RegExp/FrontEnd/.cm/amd64-unix/syntax-sig.sml new file mode 100644 index 0000000..0ecf8ee Binary files /dev/null and b/smlnj-lib/RegExp/FrontEnd/.cm/amd64-unix/syntax-sig.sml differ diff --git a/smlnj-lib/RegExp/FrontEnd/.cm/amd64-unix/syntax.sml b/smlnj-lib/RegExp/FrontEnd/.cm/amd64-unix/syntax.sml new file mode 100644 index 0000000..d44cb9d Binary files /dev/null and b/smlnj-lib/RegExp/FrontEnd/.cm/amd64-unix/syntax.sml differ diff --git a/smlnj-lib/RegExp/FrontEnd/awk-syntax.sml b/smlnj-lib/RegExp/FrontEnd/awk-syntax.sml new file mode 100644 index 0000000..326dbe5 --- /dev/null +++ b/smlnj-lib/RegExp/FrontEnd/awk-syntax.sml @@ -0,0 +1,254 @@ +(* awk-syntax.sml + * + * COPYRIGHT (c) 2008 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This module implements the AWK syntax for regular expressions. The + * syntax is defined on pp. 28-30 of "The AWK Programming Language," + * by Aho, Kernighan and Weinberger. We have extended it with interval + * syntax, which was added as part of the POSIX standard. + * + * The meta characters are: + * "\" "^" "$" "." "[" "]" "|" "(" ")" "*" "+" "?" + * + * Atomic REs: + * c matches the character c (for non-metacharacters c) + * "^" matches the empty string at the beginning of a line + * "$" matches the empty string at the end of a line + * "." matches any single character (except \000 and \n) + * + * Escape sequences: + * "\b" matches backspace + * "\f" matches formfeed + * "\n" matches newline (linefeed) + * "\r" matches carriage return + * "\t" matches tab + * "\"ddd matches the character with octal code ddd. + * "\"c matches the character c (e.g., \\ for \, \" for ") + * "\x"dd matches the character with hex code dd. + * + * Character classes: + * [...] matches any character in "..." + * [^...] a complemented character list, which matches any character not + * in the list "..." + * + * Compound regular expressions, where A and B are REs: + * A|B matches A or B + * AB matches A followed by B + * A? matches zero or one As + * A* matches zero or more As + * A+ matches one or more As + * A{n} matches n copies of A + * A{n,} matches n or more copies of A + * A{n,m} matches from n to m copies of A + * (A) matches A + *) + +structure AwkSyntax : REGEXP_PARSER = + struct + + structure R = RegExpSyntax + + structure SC = StringCvt + structure W8 = Word8 + structure C = Char + + val isMeta = C.contains "\\^$.[]|()*+?" + + exception Error + + val dotMatch = R.NonmatchSet (R.CharSet.addList (R.CharSet.empty,explode "\000\n")) + + fun scan getc cs = let + fun getc' cs = (case (getc cs) + of NONE => raise Error + | (SOME arg) => arg) + (* end case *) + fun isOctDigit c = (#"0" <= c) andalso (c <= #"7") + fun returnVal (v, cl, cs) = let + val (n, _) = valOf (Int.scan v List.getItem cl) + in + (C.chr n, cs) handle _ => raise Error + (* SC.scanString (Int.scan SC.OCT) (implode [c1,c2,c3]) *) + end + fun getDecimal cs = let + fun lp (cs, digits) = (case getc cs + of NONE => (cs, List.rev digits) + | SOME(c, cs') => + if (C.isDigit c) then lp(cs', c::digits) + else (cs, List.rev digits) + (* end case *)) + in + case lp (cs, []) + of (_, []) => raise Error (* no digits *) + | (cs, digits) => let + val SOME(n, _) = (Int.scan SC.DEC List.getItem digits) + in + (n, cs) + end + (* end case *) + end + fun getHexChar (c,cs) = (case (getc cs) + of NONE => returnVal (SC.HEX,[c],cs) + | SOME (c',cs') => + if not (C.isHexDigit c') then returnVal (SC.HEX,[c],cs) + else returnVal (SC.HEX,[c,c'],cs') + (* end case *)) + fun getOctalChar (c,cs) = (case (getc cs) + of NONE => returnVal (SC.OCT,[c],cs) + | SOME(c',cs') => + if not (isOctDigit c') then returnVal (SC.OCT,[c],cs) + else (case (getc cs') + of NONE => returnVal (SC.OCT,[c,c'],cs') + | SOME (c'',cs'') => + if not (isOctDigit c'') then returnVal (SC.OCT,[c,c'],cs') + else returnVal (SC.OCT,[c,c',c''],cs''))) + fun getEscapeChar cs = (case (getc' cs) + of (#"b", cs) => (#"\008", cs) + | (#"f", cs) => (#"\012", cs) + | (#"n", cs) => (#"\n", cs) + | (#"r", cs) => (#"\013", cs) + | (#"t", cs) => (#"\t", cs) + | (#"x", cs) => let val (c1,cs) = getc' cs + in + if (C.isHexDigit c1) then getHexChar (c1,cs) else raise Error + end + | (c1, cs) => + if (isOctDigit c1) then getOctalChar (c1,cs) else (c1, cs)) + fun scanAlt (stk, cs) = let + val (re, cs') = scanSeq([], cs) + in + case (stk, getc cs') + of ([], NONE) => (re, cs') + | (_, SOME(#"|", cs'')) => scanAlt(re::stk, cs'') + | _ => (R.Alt(rev(re::stk)), cs') + (* end case *) + end + and scanSeq (stk, cs) = let + fun continue (re, cs') = scanSeq(re::stk, cs') + fun done () = (R.Concat(rev stk), cs) + in + case (stk, getc cs) + of ([], NONE) => raise Error + | ([re], NONE) => (re, cs) + | (_, NONE) => done() + | (re::r, SOME(#"{", cs')) => let + val (n, m, cs'') = scanInterval cs' + in + scanSeq (R.Interval(re, n, m)::r, cs'') + end + | (re::r, SOME(#"?", cs')) => scanSeq (R.optional re :: r, cs') + | (re::r, SOME(#"*", cs')) => scanSeq (R.closure re :: r, cs') + | (re::r, SOME(#"+", cs')) => scanSeq (R.posClosure re :: r, cs') + | (_, SOME(#"|", _)) => done() + | (_, SOME(#")", _)) => done() + | (_, SOME(#"(", cs')) => continue(scanGrp cs') + | (_, SOME(#".", cs')) => continue(dotMatch, cs') + | (_, SOME(#"^", cs')) => continue(R.Begin, cs') + | (_, SOME(#"$",cs')) => continue(R.End, cs') + | (_, SOME(#"[", cs')) => continue(scanClass cs') + | (_, SOME(#"\\", cs')) => continue(scanEscape cs') + | (_, SOME(c, cs')) => if (isMeta c) + then raise Error + else scanSeq((R.Char c)::stk, cs') + (* end case *) + end + (* scan the tail of "{n}", "{n,}", or "{n,m"}", where the leading "{" has already + * been scanned. + *) + and scanInterval cs = let + val (n, cs) = getDecimal cs + in + case getc cs + of SOME(#",", cs) => (case getc cs + of SOME(#"}", cs) => (n, NONE, cs) + | _ => let + val (m, cs) = getDecimal cs + in + case getc cs + of SOME(#"}", cs) => (n, SOME m, cs) + | _ => raise Error + (* end case *) + end + (* end case *)) + | SOME(#"}", cs) => (n, SOME n, cs) + | _ => raise Error + (* end case *) + end + and scanGrp cs = let + val (re, cs') = scanAlt ([], cs) + in + case (getc' cs') + of (#")", cs'') => (R.Group re, cs'') + | _ => raise Error + (* end case *) + end + and scanClass cs = let + fun scanClass' cs = let + fun scanRange1 (set, cs) = (case (getc' cs) + of (#"]", cs) => (set,cs) + | (#"\\", cs) => let + val (c, cs) = getEscapeChar cs + in + scanRange2 (set, c, cs) + end + | (c, cs) => scanRange2 (set, c, cs) + (* end case *)) + and scanRange2 (set, c, cs) = (case (getc' cs) + of (#"]", cs) => (R.CharSet.add(set, c), cs) + | (#"\\", cs) => let + val (c', cs) = getEscapeChar cs + in + scanRange2 (R.CharSet.add(set, c), c', cs) + end + | (#"-", cs) => scanRange3 (set, c, cs) + | (c', cs) => + scanRange2 (R.CharSet.add(set, c), c', cs) + (* end case *)) + and scanRange3 (set, minC, cs) = (case (getc' cs) + of (#"]", cs) => + (R.CharSet.add(R.CharSet.add(set, minC), #"-"), cs) + | (#"\\", cs) => let + val (c, cs) = getEscapeChar cs + in + chkRange(set, minC, c, cs) + end + | (c, cs) => chkRange(set, minC, c, cs) + (* end case *)) + and chkRange (set, minC, maxC, cs) = + if (minC > maxC) + then scanRange1 (set,cs ) (*raise Error *) (* as per bwk test suite *) + else scanRange1 (R.addRange (set,minC,maxC),cs) (*R.CharSet.addList (set,List.tabulate (ord(maxC)-ord(minC)+1,fn v => chr (v+ord(minC)))), cs) *) + in + case (getc' cs) + of (#"-", cs) => + scanRange1(R.CharSet.add(R.CharSet.empty, #"-"), cs) + | (#"]", cs) => + scanRange2(R.CharSet.empty, #"]", cs) (* as per bwk test suite *) + | _ => scanRange1(R.CharSet.empty, cs) + (* end case *) + end + in + case (getc' cs) + of (#"^", cs) => let + val (set, cs) = scanClass' cs + in + (R.NonmatchSet set, cs) + end + | _ => let + val (set, cs) = scanClass' cs + in + (R.MatchSet set, cs) + end + (* end case *) + end + and scanEscape cs = let val (c, cs) = getEscapeChar cs + in + (R.Char c, cs) + end + in + SOME(scanAlt([], cs)) handle Error => NONE + end + + + end (* AWK_RE_Syntax *) diff --git a/smlnj-lib/RegExp/FrontEnd/parser-sig.sml b/smlnj-lib/RegExp/FrontEnd/parser-sig.sml new file mode 100644 index 0000000..bae0700 --- /dev/null +++ b/smlnj-lib/RegExp/FrontEnd/parser-sig.sml @@ -0,0 +1,17 @@ +(* parser-sig.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +signature REGEXP_PARSER = + sig + + val scan : (char, 'a) StringCvt.reader + -> (RegExpSyntax.syntax, 'a) StringCvt.reader + (* read an external representation of a regular expression + * from the stream and return an abstract syntax representation + *) + + end + diff --git a/smlnj-lib/RegExp/FrontEnd/syntax-sig.sml b/smlnj-lib/RegExp/FrontEnd/syntax-sig.sml new file mode 100644 index 0000000..5f014a1 --- /dev/null +++ b/smlnj-lib/RegExp/FrontEnd/syntax-sig.sml @@ -0,0 +1,60 @@ +(* syntax-sig.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This is the abstract syntax tree used to represent regular expressions. + * It serves as the glue between different front-ends (implementing + * different RE specification languages), and different back-ends (implementing + * different compilation/searching algorithms). + *) + +signature REGEXP_SYNTAX = + sig + + exception CannotCompile + + structure CharSet : ORD_SET where type Key.ord_key = char + + datatype syntax + = Group of syntax + | Alt of syntax list + | Concat of syntax list + | Interval of (syntax * int * int option) + (* iteration closure: the first integer is the minimum + * number of matches and the second is the maximum, with + * `NONE` meaning infinity. + *) + | MatchSet of CharSet.set + | NonmatchSet of CharSet.set + | Char of char + | Begin (* Matches beginning of stream *) + | End (* Matches end of stream *) + + (* shorthand for standard RE forms *) + val optional : syntax -> syntax (* == Interval(re, 0, SOME 1) *) + val closure : syntax -> syntax (* == Interval(re, 0, NONE) *) + val posClosure : syntax -> syntax (* == Interval(re, 1, NONE) *) + + val fromRange : char * char -> CharSet.set + val addRange : CharSet.set * char * char -> CharSet.set + + val allChars : CharSet.set + + (* POSIX character sets (plus a couple) *) + val alnum : CharSet.set (* letters and digits *) + val alpha : CharSet.set (* letters *) + val ascii : CharSet.set (* 0 <= ord c <= 127 *) + val blank : CharSet.set (* #"\t" and space *) + val cntl : CharSet.set (* non-printable characters *) + val digit : CharSet.set (* decimal digits *) + val graph : CharSet.set (* visible characters (does not include space) *) + val lower : CharSet.set (* lower-case letters *) + val print : CharSet.set (* printable characters (includes space) *) + val punct : CharSet.set (* visible characters other than letters and digits *) + val space : CharSet.set (* #"\t", #"\r", #"\n", #"\v", #"\f", and space *) + val upper : CharSet.set (* upper-case letters *) + val word : CharSet.set (* letters, digit, and underscore *) + val xdigit : CharSet.set (* hexadecimal digits *) + + end; diff --git a/smlnj-lib/RegExp/FrontEnd/syntax.sml b/smlnj-lib/RegExp/FrontEnd/syntax.sml new file mode 100644 index 0000000..2a1bf19 --- /dev/null +++ b/smlnj-lib/RegExp/FrontEnd/syntax.sml @@ -0,0 +1,66 @@ +(* syntax.sml + * + * COPYRIGHT (c) 2008 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This is the abstract syntax tree used to represent regular expressions. + * It serves as the glue between different front-ends (implementing + * different RE specification languages), and different back-ends (implementing + * different compilation/searching algorithms). + *) + +structure RegExpSyntax : REGEXP_SYNTAX = + struct + + exception CannotCompile + + structure CharSet = ListSetFn ( + struct type ord_key = char + val compare = Char.compare end) + + datatype syntax + = Group of syntax + | Alt of syntax list + | Concat of syntax list + | Interval of (syntax * int * int option) + | MatchSet of CharSet.set + | NonmatchSet of CharSet.set + | Char of char + | Begin + | End + + fun optional re = Interval(re, 0, SOME 1) + fun closure re = Interval(re, 0, NONE) + fun posClosure re = Interval(re, 1, NONE) + + fun addRange (s, minC ,maxC) = let + val fst = ord minC + val lst = ord maxC + in + CharSet.addList (s, List.tabulate (lst - fst + 1, fn v => chr(v + fst))) + end + fun fromRange (minC, maxC) = addRange (CharSet.empty, minC, maxC) + + val allChars = fromRange (Char.minChar, Char.maxChar) + + val digit = fromRange (#"0", #"9") + val lower = fromRange (#"a", #"z") + val upper = fromRange (#"A", #"Z") + val alpha = CharSet.union(lower, upper) + val alnum = CharSet.union(alpha, digit) + val ascii = fromRange (#"\000", #"\127") + val blank = CharSet.fromList [#" ", #"\t"] + val cntl = addRange(CharSet.singleton #"\127", #"\000", #"\031") + val graph = fromRange (#"\033", #"\126") + val print = CharSet.add (graph, #" ") + val punct = CharSet.fromList [ + #"]", #"[", #"!", #"\"", #"#", #"$", #"%", #"&", + #"'", #"(", #")", #"*", #"+", #",", #".", #"/", + #":", #";", #"<", #"=", #">", #"?", #"@", #"\\", + #"^", #"_", #"`", #"{", #"|", #"}", #"~", #"-" + ] + val space = CharSet.fromList [#"\t", #"\r", #"\n", #"\v", #"\f", #" "] + val word = CharSet.add(alnum, #"_") + val xdigit = addRange (addRange (digit, #"a", #"f"), #"A", #"F") + + end diff --git a/smlnj-lib/RegExp/Glue/.cm/GUID/match-tree.sml b/smlnj-lib/RegExp/Glue/.cm/GUID/match-tree.sml new file mode 100644 index 0000000..4e3880e --- /dev/null +++ b/smlnj-lib/RegExp/Glue/.cm/GUID/match-tree.sml @@ -0,0 +1 @@ +guid-$/(regexp-lib.cm):Glue/match-tree.sml-1714016092.310 diff --git a/smlnj-lib/RegExp/Glue/.cm/GUID/regexp-fn.sml b/smlnj-lib/RegExp/Glue/.cm/GUID/regexp-fn.sml new file mode 100644 index 0000000..db3b1e9 --- /dev/null +++ b/smlnj-lib/RegExp/Glue/.cm/GUID/regexp-fn.sml @@ -0,0 +1 @@ +guid-$/(regexp-lib.cm):Glue/regexp-fn.sml-1714016092.472 diff --git a/smlnj-lib/RegExp/Glue/.cm/GUID/regexp-sig.sml b/smlnj-lib/RegExp/Glue/.cm/GUID/regexp-sig.sml new file mode 100644 index 0000000..23c4d1b --- /dev/null +++ b/smlnj-lib/RegExp/Glue/.cm/GUID/regexp-sig.sml @@ -0,0 +1 @@ +guid-$/(regexp-lib.cm):Glue/regexp-sig.sml-1714016092.468 diff --git a/smlnj-lib/RegExp/Glue/.cm/SKEL/match-tree.sml b/smlnj-lib/RegExp/Glue/.cm/SKEL/match-tree.sml new file mode 100644 index 0000000..6c73230 --- /dev/null +++ b/smlnj-lib/RegExp/Glue/.cm/SKEL/match-tree.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d3f2d"List"d"Either"aMATCH_TREE"0ad"MatchTree"j"gp1 \ No newline at end of file diff --git a/smlnj-lib/RegExp/Glue/.cm/SKEL/regexp-fn.sml b/smlnj-lib/RegExp/Glue/.cm/SKEL/regexp-fn.sml new file mode 100644 index 0000000..1b84402 --- /dev/null +++ b/smlnj-lib/RegExp/Glue/.cm/SKEL/regexp-fn.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ae"RegExpFn"i3aP"gp1c"REGEXP_PARSER"aE"gp1c"REGEXP_ENGINE"f4d"StringCvt"'MatchTree"jh1ad"M"gp1gp1c"REGEXP" \ No newline at end of file diff --git a/smlnj-lib/RegExp/Glue/.cm/SKEL/regexp-sig.sml b/smlnj-lib/RegExp/Glue/.cm/SKEL/regexp-sig.sml new file mode 100644 index 0000000..413f8d6 --- /dev/null +++ b/smlnj-lib/RegExp/Glue/.cm/SKEL/regexp-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f2d"StringCvt"d"MatchTree"ac"REGEXP"h0 \ No newline at end of file diff --git a/smlnj-lib/RegExp/Glue/.cm/amd64-unix/match-tree.sml b/smlnj-lib/RegExp/Glue/.cm/amd64-unix/match-tree.sml new file mode 100644 index 0000000..9289270 Binary files /dev/null and b/smlnj-lib/RegExp/Glue/.cm/amd64-unix/match-tree.sml differ diff --git a/smlnj-lib/RegExp/Glue/.cm/amd64-unix/regexp-fn.sml b/smlnj-lib/RegExp/Glue/.cm/amd64-unix/regexp-fn.sml new file mode 100644 index 0000000..d378982 Binary files /dev/null and b/smlnj-lib/RegExp/Glue/.cm/amd64-unix/regexp-fn.sml differ diff --git a/smlnj-lib/RegExp/Glue/.cm/amd64-unix/regexp-sig.sml b/smlnj-lib/RegExp/Glue/.cm/amd64-unix/regexp-sig.sml new file mode 100644 index 0000000..9144812 Binary files /dev/null and b/smlnj-lib/RegExp/Glue/.cm/amd64-unix/regexp-sig.sml differ diff --git a/smlnj-lib/RegExp/Glue/match-tree.sml b/smlnj-lib/RegExp/Glue/match-tree.sml new file mode 100644 index 0000000..080971c --- /dev/null +++ b/smlnj-lib/RegExp/Glue/match-tree.sml @@ -0,0 +1,116 @@ +(* match-tree.sml + * + * COPYRIGHT (c) 2008 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Match trees are used to represent the results of matching regular + * expressions. + *) + +signature MATCH_TREE = + sig + + (* a match tree is used to represent the results of a nested + * grouping of regular expressions. + *) + datatype 'a match_tree = Match of 'a * 'a match_tree list + + val root : 'a match_tree -> 'a + (* return the root (outermost) match in the tree *) + val nth : ('a match_tree * int) -> 'a (* raises Subscript *) + (* return the nth match in the tree; matches are labeled in pre-order + * starting at 0. + *) + val map : ('a -> 'b) -> 'a match_tree -> 'b match_tree + (* map a function over the tree (in preorder) *) + val app : ('a -> unit) -> 'a match_tree -> unit + (* apply a given function over ever element of the tree (in preorder) *) + val foldl : ('a * 'b -> 'b) -> 'b -> 'a match_tree -> 'b + (* fold in left-to-right pre-order *) + val foldr : ('a * 'b -> 'b) -> 'b -> 'a match_tree -> 'b + (* fold in right-to-left post-order *) + val find : ('a -> bool) -> 'a match_tree -> 'a option + (* find the first match that satisfies the predicate (or NONE) *) + val num : 'a match_tree -> int + (* return the number of submatches included in the match tree *) + + end; + +structure MatchTree :> MATCH_TREE = + struct + + datatype 'a match_tree = Match of 'a * 'a match_tree list + + fun num m = let + fun count ([], n) = n + | count (Match(_, l) :: ms, n) = count (ms, count (l, n + 1)) + in + count([m], ~1) + end + + (* return the root (outermost) match in the tree *) + fun root (Match(x,_)) = x + + (* return the nth match in the tree; matches are labeled in pre-order + * starting at 0. + *) + fun nth (t, n) = let + datatype sum = datatype Either.either + fun walk (0, Match (x, _)) = INR x + | walk (i, Match (_, children)) = let + fun walkList (i, []) = INL i + | walkList (i, m::r) = (case walk(i, m) + of (INL j) => walkList (j, r) + | result => result + (* end case *)) + in + walkList (i-1, children) + end + in + case walk(n, t) + of (INR x) => x + | (INL _) => raise Subscript + (* end case *) + end + + (* map a function over the tree (in preorder) *) + fun map f = let + fun mapf (Match (x, children)) = Match(f x, mapl children) + and mapl [] = [] + | mapl (x::r) = (mapf x) :: (mapl r) + in + mapf + end + + fun app f (Match (c,children)) = (f c; List.app (app f) children) + + (* fold in left-to-right pre-order *) + fun foldl f init mt = let + fun foldf (Match(x, kids), acc) = List.foldl foldf (f (x, acc)) kids + in + foldf (mt, init) + end + + (* fold in right-to-left post-order *) + fun foldr f init mt = let + fun foldf (Match(x, kids), acc) = f (x, List.foldr foldf acc kids) + in + foldf (mt, init) + end + + (* find the first match that satisfies the predicate *) + fun find pred = let + fun findP (Match (x, children)) = + if (pred x) + then SOME x + else findList children + and findList [] = NONE + | findList (m::r) = (case (findP m) + of NONE => findList r + | result => result + (* end case *)) + in + findP + end + + end (* MatchTree *) diff --git a/smlnj-lib/RegExp/Glue/regexp-fn.sml b/smlnj-lib/RegExp/Glue/regexp-fn.sml new file mode 100644 index 0000000..dfb3bd9 --- /dev/null +++ b/smlnj-lib/RegExp/Glue/regexp-fn.sml @@ -0,0 +1,47 @@ +(* regexp-fn.sml + * + * COPYRIGHT (c) 2008 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Functor that implements a regular expressions matcher by combining + * a surface syntax and a matching engine. + *) + +functor RegExpFn ( + structure P : REGEXP_PARSER + structure E : REGEXP_ENGINE + ) :> REGEXP where type regexp = E.regexp = struct + + structure M = MatchTree + + type regexp = E.regexp + + (* a match specifies the position (as a stream) and the length of the match *) + type 'a match = {pos : 'a, len : int} MatchTree.match_tree + + exception CannotParse + + fun compile reader s = (case (P.scan reader s) + of SOME (syntax, s') => SOME(E.compile syntax, s') + | NONE => NONE + (* end case *)) + + fun compileString str = (case (StringCvt.scanString P.scan str) + of SOME syntax => E.compile syntax + | NONE => raise CannotParse + (* end case *)) + + val prefix = E.prefix + val find = E.find + + fun match l = let + fun parse (s, f) = (case (StringCvt.scanString P.scan s) + of SOME r => (r, f) + | NONE => raise CannotParse + (* end case *)) + val m = E.match (map parse l) + in + fn getc => fn stream => m getc stream + end + + end diff --git a/smlnj-lib/RegExp/Glue/regexp-sig.sml b/smlnj-lib/RegExp/Glue/regexp-sig.sml new file mode 100644 index 0000000..a805b56 --- /dev/null +++ b/smlnj-lib/RegExp/Glue/regexp-sig.sml @@ -0,0 +1,46 @@ +(* regexp-sig.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Main signature for regular expressions. + *) + +signature REGEXP = + sig + + type regexp + (* the type of a compiled regular expression + *) + + (* a match specifies the position (as a stream) and the length of the match *) + type 'a match = {pos : 'a, len : int} MatchTree.match_tree + + exception CannotParse + (* raised by compileString and match on syntax errors *) + + val compile : (char,'a) StringCvt.reader -> (regexp, 'a) StringCvt.reader + (* read an external representation of a regular expression from a stream + *) + + val compileString : string -> regexp + (* read an external representation of a regular expression from a string + *) + + val find : regexp -> (char,'a) StringCvt.reader -> ('a match, 'a) StringCvt.reader + (* scan the stream for the first occurence of the regular expression + *) + + val prefix : regexp -> (char,'a) StringCvt.reader -> ('a match, 'a) StringCvt.reader + (* attempt to match the stream at the current position with the + * regular expression + *) + + val match : (string * ('a match -> 'b)) list + -> (char,'a) StringCvt.reader -> ('b, 'a) StringCvt.reader + (* attempt to match the stream at the current position with one + * of the external representations of regular expressions and trigger + * the corresponding action + *) + + end diff --git a/smlnj-lib/RegExp/README b/smlnj-lib/RegExp/README new file mode 100644 index 0000000..fd6c861 --- /dev/null +++ b/smlnj-lib/RegExp/README @@ -0,0 +1,88 @@ +This is a regular expressions library. It is based on a decoupling +of the surface syntax used to specify regular expressions and the +backend engine that implements the matcher. An abstract syntax is used +to communicate between the front end and the back end of the system, + + + USING REGULAR EXPRESSIONS + +Given a structure S1 describing a surface syntax and a structure S2 +describing a matching engine, a regular expression package can be +defined by applying the functor: + + RegExpFn (structure P=S1 structure E=S2) : REGEXP + +To match a regular expression, one first needs to compile a +representation in the surface syntax. The type of a compiled regular +expression is given in the REGEXP signature as: + + type regexp + +Two functions are provided in +the REGEXP signature: + + val compile : (char,'a) StringCvt.reader -> (regexp, 'a) StringCvt.reader + val compileString : string -> regexp + +The function compile is a regexp reader, while compileString is +specialized to strings. + +Once a regular expression has been compiled, three functions are +provided to perform the matching: + + val find : + val prefix : [[ See types in Glue/regexp-sig.sml ]] + val match : + +The function find returns a reader that searches a stream and attempts +to match the given regular expression. The function prefix returns a +reader that attempts to match the regular expression at the current +position in the stream. The function match takes a list of regular +expressions and functions and returns a reader that attempts to match +one of the regular expressions at the current position in the +stream. The function corresponding to the matched regular expression +is invoked on the matching information. + + + MATCHING INFORMATION + +Once a match is found, it is returned as a match_tree datatype +(defined in Glue/match-tree.sml). This is a hierarchical structure +describing the matches of the various subexpressions appearing in the +matched regular expression. A match for an expression is a record +containing the position of the match and its length. The root of the +structure always described the outermost match (the whole string +matched by the regular expression). See the file Glue/match-tree.sml +for more details. + + + ROADMAP + +FrontEnd/ Implementation of various surface syntaxes +BackEnd/ Implementation of various matching engines +Glue/ Glue code +Tests/ Testing code + + + FRONT ENDS + +A single surface syntax is currently implemented, providing an AWK +syntax to describe regular expressions. See the file +FrontEnd/awk-syntax.sml for a description of the actual syntax. + + + BACK ENDS + +Three matching engines are implemented: +1) A backtracking engine (BackEnd/bt-engine.sml), that provides full + submatching information. Slow, low memory footprint, low startup + cost. +2) An automaton-based engine (BackEnd/dfa-engine.sml), that provides + only top-level matching information (the string matched). Fast, + but memory-intensive and high startup cost (the cost of + constructing the automaton in the first place) +3) An implementation of Ken Thompson's RE matching algorithm. This + essentially simulates the NFA using sets of states. It provides + very fast RE construction time and reasonable scanning time. + It currently does not implement groups or begin/end markers. + diff --git a/smlnj-lib/RegExp/Tests/engines.sml b/smlnj-lib/RegExp/Tests/engines.sml new file mode 100644 index 0000000..903cc2e --- /dev/null +++ b/smlnj-lib/RegExp/Tests/engines.sml @@ -0,0 +1,23 @@ +(* engines.sml + * + * COPYRIGHT (c) 2008 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +structure ThompsonRE = TestFn ( + val engineName = "Thompson's engine" + structure RE = RegExpFn( + structure P = AwkSyntax + structure E = ThompsonEngine)) + +structure DfaRE = TestFn ( + val engineName = "DFA engine" + structure RE = RegExpFn( + structure P = AwkSyntax + structure E = DfaEngine)) + +structure BackTrackRE = TestFn ( + val engineName = "Back-tracking engine" + structure RE = RegExpFn( + structure P = AwkSyntax + structure E = BackTrackEngine)) diff --git a/smlnj-lib/RegExp/Tests/run.sh b/smlnj-lib/RegExp/Tests/run.sh new file mode 100755 index 0000000..b5702d9 --- /dev/null +++ b/smlnj-lib/RegExp/Tests/run.sh @@ -0,0 +1,11 @@ +#!/bin/sh +# +# Run the RE tests +# + +sml sources.cm < NoMatch + | SOME(M.Match({pos, len}, _), n) => + if (pos + len <> n) + then raise Fail "invalid next position" + else Match{pos=pos, len=len, next=n} + (* end case *)) + handle ex => Error ex) + + fun test (name, re, data) = let + val _ = print(concat[" ", name, ": "]) + val re = RE.compileString re handle ex => (print "compile failed\n"; raise ex) + in + case find (re, getc data) + of NoMatch => print "match failed\n" + | Error exn => print(concat["Error: ", General.exnMessage exn, "\n"]) + | Match{pos, len, next} => + print(concat[ + "match at ", Int.toString pos, " = \"", + String.toString(String.substring(data, pos, len)), "\"; next = ", + Int.toString next, "\n" + ]) + (* end case *) + end + handle _ => () + + fun doTests () = ( + print(concat[" testing ", engineName, "\n"]); + test ("01", "[0-9]+", "abc123xyz"); + test ("02", "^[0-9]+", "abc123def\n987xyz"); + test ("03", "[0-9]+$", "abc123def\n987xyz456"); + test ("04", "[0-9]+$", "987xyz456\nabc123"); + test ("05", "^$", ""); + test ("06", ".", "a"); + test ("07", "^foo$", "foo"); + test ("08", "^...$", "foo"); + test ("09", "^.*$", "foo"); + test ("10", "^.*foo@bar\\.com$", "foo@bar.com"); + test ("11", "(abc)","abc"); + test ("12", "\\(abc\\)","(abc)"); + test ("13", "(abc){2,4}$", "abcabc"); + test ("14", "(abc){2,4}$", "abcabcabc"); + test ("15", "(abc){2,4}$", "abcabcabcabc"); + test ("16", "[true]+", "truexxx"); + test ("17", "true", "truexxx")) + + end diff --git a/smlnj-lib/RegExp/regexp-lib.cm b/smlnj-lib/RegExp/regexp-lib.cm new file mode 100644 index 0000000..9478431 --- /dev/null +++ b/smlnj-lib/RegExp/regexp-lib.cm @@ -0,0 +1,55 @@ +(* regexp-lib.cm + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +Library + +signature REGEXP_SYNTAX +signature REGEXP_PARSER +signature MATCH_TREE +signature REGEXP_ENGINE +signature REGEXP + +structure RegExpSyntax +structure MatchTree +structure AwkSyntax +structure BackTrackEngine +structure DfaEngine +structure ThompsonEngine + +functor RegExpFn + +is +#if defined(NEW_CM) + $/basis.cm + $/smlnj-lib.cm +#else + ../Util/smlnj-lib.cm +#endif + +(* utility functions *) + Glue/match-tree.sml + BackEnd/fsm.sml + +(* internal glue language *) + FrontEnd/syntax-sig.sml + FrontEnd/syntax.sml + +(* front/back-ends signatures *) + FrontEnd/parser-sig.sml + BackEnd/engine-sig.sml + +(* Frontends *) + FrontEnd/awk-syntax.sml + +(* Engines *) + BackEnd/bt-engine.sml + BackEnd/dfa-engine.sml + BackEnd/thompson-engine.sml + +(* Glue functor *) + Glue/regexp-sig.sml + Glue/regexp-fn.sml + diff --git a/smlnj-lib/SExp/.cm/GUID/sexp-parser.sml b/smlnj-lib/SExp/.cm/GUID/sexp-parser.sml new file mode 100644 index 0000000..7335f9e --- /dev/null +++ b/smlnj-lib/SExp/.cm/GUID/sexp-parser.sml @@ -0,0 +1 @@ +guid-$/(sexp-lib.cm):sexp-parser.sml-1714016093.648 diff --git a/smlnj-lib/SExp/.cm/GUID/sexp-pp.sml b/smlnj-lib/SExp/.cm/GUID/sexp-pp.sml new file mode 100644 index 0000000..c251f40 --- /dev/null +++ b/smlnj-lib/SExp/.cm/GUID/sexp-pp.sml @@ -0,0 +1 @@ +guid-$/(sexp-lib.cm):sexp-pp.sml-1714016093.693 diff --git a/smlnj-lib/SExp/.cm/GUID/sexp-printer.sml b/smlnj-lib/SExp/.cm/GUID/sexp-printer.sml new file mode 100644 index 0000000..4e41515 --- /dev/null +++ b/smlnj-lib/SExp/.cm/GUID/sexp-printer.sml @@ -0,0 +1 @@ +guid-$/(sexp-lib.cm):sexp-printer.sml-1714016093.682 diff --git a/smlnj-lib/SExp/.cm/GUID/sexp-string-util.sml b/smlnj-lib/SExp/.cm/GUID/sexp-string-util.sml new file mode 100644 index 0000000..7b6d246 --- /dev/null +++ b/smlnj-lib/SExp/.cm/GUID/sexp-string-util.sml @@ -0,0 +1 @@ +guid-$/(sexp-lib.cm):sexp-string-util.sml-1714016093.673 diff --git a/smlnj-lib/SExp/.cm/GUID/sexp-tokens.sml b/smlnj-lib/SExp/.cm/GUID/sexp-tokens.sml new file mode 100644 index 0000000..85ee98c --- /dev/null +++ b/smlnj-lib/SExp/.cm/GUID/sexp-tokens.sml @@ -0,0 +1 @@ +guid-$/(sexp-lib.cm):sexp-tokens.sml-1714016093.123 diff --git a/smlnj-lib/SExp/.cm/GUID/sexp.lex.sml b/smlnj-lib/SExp/.cm/GUID/sexp.lex.sml new file mode 100644 index 0000000..554563d --- /dev/null +++ b/smlnj-lib/SExp/.cm/GUID/sexp.lex.sml @@ -0,0 +1 @@ +guid-$/(sexp-lib.cm):sexp.lex.sml-1714016093.134 diff --git a/smlnj-lib/SExp/.cm/GUID/sexp.sml b/smlnj-lib/SExp/.cm/GUID/sexp.sml new file mode 100644 index 0000000..93af31c --- /dev/null +++ b/smlnj-lib/SExp/.cm/GUID/sexp.sml @@ -0,0 +1 @@ +guid-$/(sexp-lib.cm):sexp.sml-1714016093.105 diff --git a/smlnj-lib/SExp/.cm/SKEL/sexp-parser.sml b/smlnj-lib/SExp/.cm/SKEL/sexp-parser.sml new file mode 100644 index 0000000..f91995f --- /dev/null +++ b/smlnj-lib/SExp/.cm/SKEL/sexp-parser.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f5Cd"AntlrStreamPos"d"List"SExp"d"Atom"d"TextIO"Nad"SExpParser"jh3ad"Lex"gp1d"SExpLexer"ad"T"gp1d"SExpTokens"ad"S"gp1h0 \ No newline at end of file diff --git a/smlnj-lib/SExp/.cm/SKEL/sexp-pp.sml b/smlnj-lib/SExp/.cm/SKEL/sexp-pp.sml new file mode 100644 index 0000000..79c569b --- /dev/null +++ b/smlnj-lib/SExp/.cm/SKEL/sexp-pp.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f5CTextIOPP"d"List"SExp"d"SExpStringUtil"d"Atom"Nad"SExpPP"jh3ad"S"gp1ad"PP"gp1ad"F"gp1d"Format"h0 \ No newline at end of file diff --git a/smlnj-lib/SExp/.cm/SKEL/sexp-printer.sml b/smlnj-lib/SExp/.cm/SKEL/sexp-printer.sml new file mode 100644 index 0000000..967ef56 --- /dev/null +++ b/smlnj-lib/SExp/.cm/SKEL/sexp-printer.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f5Cd"List"SExp"d"SExpStringUtil"d"Atom"d"TextIO"Nad"SExpPrinter"jh2ad"S"gp1 ad"F"gp1d"Format"h0 \ No newline at end of file diff --git a/smlnj-lib/SExp/.cm/SKEL/sexp-string-util.sml b/smlnj-lib/SExp/.cm/SKEL/sexp-string-util.sml new file mode 100644 index 0000000..76a5900 --- /dev/null +++ b/smlnj-lib/SExp/.cm/SKEL/sexp-string-util.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f4d"StringCvt"d"Char"d"Int"d"String"ad"SExpStringUtil"j09 \ No newline at end of file diff --git a/smlnj-lib/SExp/.cm/SKEL/sexp-tokens.sml b/smlnj-lib/SExp/.cm/SKEL/sexp-tokens.sml new file mode 100644 index 0000000..317ead8 --- /dev/null +++ b/smlnj-lib/SExp/.cm/SKEL/sexp-tokens.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f5Cd"UTF8"d"List"d"String"d"Real"d"IntInf"Nad"SExpTokens"h0 \ No newline at end of file diff --git a/smlnj-lib/SExp/.cm/SKEL/sexp.lex.sml b/smlnj-lib/SExp/.cm/SKEL/sexp.lex.sml new file mode 100644 index 0000000..1d3368e --- /dev/null +++ b/smlnj-lib/SExp/.cm/SKEL/sexp.lex.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f7ULexBuffer"StringCvt"CAntlrStreamPos"d"UTF8"d"LargeReal"List"d"Int"CString"d"Substring"d"TextIO"Vector"IntInf"Nad"SExpLexer"h1bd2aUserDeclarations"h1aT"gp1d"SExpTokens"bd2egp1f9C*.7Nf0f3 \ No newline at end of file diff --git a/smlnj-lib/SExp/.cm/SKEL/sexp.sml b/smlnj-lib/SExp/.cm/SKEL/sexp.sml new file mode 100644 index 0000000..11783ae --- /dev/null +++ b/smlnj-lib/SExp/.cm/SKEL/sexp.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f6d"ListPair"Cd"List"d"Atom"d"String"d"Real"d"IntInf"Nad"SExp"h0 \ No newline at end of file diff --git a/smlnj-lib/SExp/.cm/amd64-unix/sexp-parser.sml b/smlnj-lib/SExp/.cm/amd64-unix/sexp-parser.sml new file mode 100644 index 0000000..5bbd74a Binary files /dev/null and b/smlnj-lib/SExp/.cm/amd64-unix/sexp-parser.sml differ diff --git a/smlnj-lib/SExp/.cm/amd64-unix/sexp-pp.sml b/smlnj-lib/SExp/.cm/amd64-unix/sexp-pp.sml new file mode 100644 index 0000000..be199f9 Binary files /dev/null and b/smlnj-lib/SExp/.cm/amd64-unix/sexp-pp.sml differ diff --git a/smlnj-lib/SExp/.cm/amd64-unix/sexp-printer.sml b/smlnj-lib/SExp/.cm/amd64-unix/sexp-printer.sml new file mode 100644 index 0000000..7d83208 Binary files /dev/null and b/smlnj-lib/SExp/.cm/amd64-unix/sexp-printer.sml differ diff --git a/smlnj-lib/SExp/.cm/amd64-unix/sexp-string-util.sml b/smlnj-lib/SExp/.cm/amd64-unix/sexp-string-util.sml new file mode 100644 index 0000000..988e144 Binary files /dev/null and b/smlnj-lib/SExp/.cm/amd64-unix/sexp-string-util.sml differ diff --git a/smlnj-lib/SExp/.cm/amd64-unix/sexp-tokens.sml b/smlnj-lib/SExp/.cm/amd64-unix/sexp-tokens.sml new file mode 100644 index 0000000..2dd966a Binary files /dev/null and b/smlnj-lib/SExp/.cm/amd64-unix/sexp-tokens.sml differ diff --git a/smlnj-lib/SExp/.cm/amd64-unix/sexp.lex.sml b/smlnj-lib/SExp/.cm/amd64-unix/sexp.lex.sml new file mode 100644 index 0000000..532d1b5 Binary files /dev/null and b/smlnj-lib/SExp/.cm/amd64-unix/sexp.lex.sml differ diff --git a/smlnj-lib/SExp/.cm/amd64-unix/sexp.sml b/smlnj-lib/SExp/.cm/amd64-unix/sexp.sml new file mode 100644 index 0000000..497203c Binary files /dev/null and b/smlnj-lib/SExp/.cm/amd64-unix/sexp.sml differ diff --git a/smlnj-lib/SExp/README b/smlnj-lib/SExp/README new file mode 100644 index 0000000..c4e6b87 --- /dev/null +++ b/smlnj-lib/SExp/README @@ -0,0 +1,47 @@ +The *SExp Library* supports the reading and writing of structured data using +the S-expression syntax. It is a work in progress, and does not fully +conform with any formal S-exp specification. + +- End-of-line comments begin with a semicolon character + +- An S-Expression is either an atomic token (boolean, number, symbol, or + string), a quoted expression, or a list of S-Expressions enclosed in brackets. + +- Quoted expressions are formed by the single-quote character (``'``) followed + by an expression. + +- Lists are delimited by matched pairs of `()` `[]` or `{}`, nested freely. + +- List items are separated with whitespace (space, tab, newlines, or carriage + returns). + +- Symbols (or _identifiers_) begin with an initial character followed by + zero or more _subsequent_ characters, where an initial character is + either a letter or one of the characters `-+.@!$%&*/:<=>?^_~` and + a subsequent character is either an initial character, a decimal digit, + or the character `#`. + +- Booleans are represented by the literals `#f` (false) and `#t` (true). + +- Numbers are either signed integers or floating-point numbers; the + sign (if present) is one of "'+'," "`-`," or "`~`". + +- Integers may be specified in decimal without any prefix, or in hexadecimal + with the prefix "0x". In hex, the value is assumed to be unsigned, so -255 + should be written "-0xff" rather than "0x-ff". + +- The format of a floating point number is described by the following + regular expression: ? + '.' + ([eE] ? +) + Notably, "`1.`" and "`.1`" are invalid and "`1`" is an + integer --- floats must have a dot with digits + on both sides. + +- Strings are enclosed in double quotes (``"``) and mostly follow the Scheme + syntax as described in https://www.scheme.com/tspl4/grammar.html#./grammar:strings ++ + The difference between symbols and strings is that symbols are represented as + Atom.atom types, so equality comparisons are more efficient. + +The original version of the library was written by Damon Wang +at the University of Chicago. It has since been modified and +maintained by John Reppy. diff --git a/smlnj-lib/SExp/sexp-lib.cm b/smlnj-lib/SExp/sexp-lib.cm new file mode 100644 index 0000000..a516ae1 --- /dev/null +++ b/smlnj-lib/SExp/sexp-lib.cm @@ -0,0 +1,34 @@ +(* sexp-lib.cm + * + * COPYRIGHT (c) 2022 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Author: Damon Wang (with modifications by John Reppy) + *) + +Library + + structure SExp + structure SExpParser + structure SExpPP + structure SExpPrinter + +is + + $/basis.cm + $/smlnj-lib.cm + $/pp-lib.cm + $/ml-lpt-lib.cm + +#if defined(NO_PLUGINS) + sexp.lex.sml +#else + sexp.lex : ml-ulex +#endif + + sexp.sml + sexp-parser.sml + sexp-pp.sml + sexp-printer.sml + sexp-string-util.sml + sexp-tokens.sml diff --git a/smlnj-lib/SExp/sexp-lib.mlb b/smlnj-lib/SExp/sexp-lib.mlb new file mode 100644 index 0000000..dcf8e53 --- /dev/null +++ b/smlnj-lib/SExp/sexp-lib.mlb @@ -0,0 +1,50 @@ +(* sexp-lib.mlb + * + * COPYRIGHT (c) 2022 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Author: Damon Wang (with modifications by John Reppy) + * + * An MLB file for the JSON library, so that it can be used by MLton programs. + *) + +local + + $(SML_LIB)/basis/basis.mlb + $(SML_LIB)/smlnj-lib/Util/smlnj-lib.mlb + $(SML_LIB)/smlnj-lib/PP/pp-lib.mlb + $(SML_LIB)/mllpt-lib/mllpt-lib.mlb + + ann + "nonexhaustiveMatch warn" "redundantMatch warn" + "sequenceNonUnit ignore" + "warnUnused false" "forceUsed" + in + + sexp-tokens.sml + sexp.lex.sml + sexp.sml +(* sexp-stream-parser.sml *) + sexp-parser.sml + sexp-string-util.sml +(* sexp-stream-printer.sml *) + sexp-pp.sml + sexp-printer.sml + + end + +in + +(* DOM-style API (tree based) *) + structure SExp + structure SExpParser + structure SExpPP + structure SExpPrinter + +(* SAX-style API (event based) *) +(* TODO + structure SExpStreamParser + structure SExpStreamPrinter +*) + +end diff --git a/smlnj-lib/SExp/sexp-parser.sml b/smlnj-lib/SExp/sexp-parser.sml new file mode 100644 index 0000000..ff7e71e --- /dev/null +++ b/smlnj-lib/SExp/sexp-parser.sml @@ -0,0 +1,106 @@ +(* sexp-parser.sml + * + * COPYRIGHT (c) 2022 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Author: Damon Wang (with modifications by John Reppy) + *) + +structure SExpParser : sig + + val parse : TextIO.instream -> SExp.value list + + val parseFile : string -> SExp.value list + + end = struct + + structure Lex = SExpLexer + structure T = SExpTokens + structure S = SExp + + fun parse' (srcMap, inStrm) = let + fun error (pos, msg, tok) = raise Fail(concat[ + "error ", AntlrStreamPos.spanToString srcMap pos, ": ", + msg, ", found '", T.toString tok, "'" + ]) +(* TODO: add support for lexer errors *) + (* the lexer *) + val lexer = Lex.lex srcMap + (* returns (tok, pos, nextStrm, strm) where the difference between + * nextStrm and strm is that tok is the next token in strm + * whereas the token _after_ tok is the next up in nextStrm. + * + * In other words, strm has had all leading whitespace consumed. + *) + fun lexNWS (strm : Lex.strm) = (case lexer strm + of (T.WHITE, _, strm) => lexNWS strm + | (tok, pos, nextStrm) => (tok, pos, nextStrm, strm) + (* end case *)) + fun parseValue (strm : Lex.strm) = let + val (tok, pos, strm) = lexer strm + in + case tok + of T.DELIM(delim, T.OPEN) => parseList (delim, strm) + | T.QUOTE => let val (strm', value) = parseValue strm + in + (strm, S.QUOTE value) + end + | T.KW_true => (strm, S.BOOL true) + | T.KW_false => (strm, S.BOOL false) + | T.INT n => (strm, S.INT n) + | T.FLOAT f => (strm, S.FLOAT f) + | T.STRING s => (strm, S.STRING s) + | T.SYMBOL s => (strm, S.SYMBOL (Atom.atom s)) + | _ => error (pos, "parsing value", tok) + (* end case *) + end + (* parse a list, where delim is the type of delimiter enclosing the list *) + and parseList (delim : T.delim_type, strm : Lex.strm) = let + fun matchDelim (T.DELIM(dType, T.CLOSE)) = (dType = delim) + | matchDelim tok = false + val (tok, _, nextStrm, strm) = lexNWS strm + in + if matchDelim tok + then (nextStrm, S.LIST []) + else let + fun loop (strm, items) = let +(* FIXME: better error reporting for lists; unclosed vs mismatched delims *) + val (strm, v) = parseValue strm + (* expect either a separator (whitespace) or a delimiter *) + val (tok, pos, nextStrm, strm) = lexNWS strm + in + if matchDelim tok + then (nextStrm, v::items) + else loop(strm, v::items) + end + val (strm, items) = loop (strm, []) + in + (strm, S.LIST (List.rev items)) + end + end + (* parse top-level s-expressions until EOF *) + fun parseSExps (strm, sexps) = (case lexer strm + of (T.WHITE, _, strm) => parseSExps (strm, sexps) + | (T.EOF, _, _) => List.rev sexps + | _ => let + val (strm, sexp) = parseValue strm + in + parseSExps (strm, sexp::sexps) + end + (* end case *)) + in + parseSExps (Lex.streamifyInstream inStrm, []) + end + + fun parse inStrm = parse' (AntlrStreamPos.mkSourcemap (), inStrm) + + fun parseFile fileName = let + val inStrm = TextIO.openIn fileName + val v = parse' (AntlrStreamPos.mkSourcemap' fileName, inStrm) + handle ex => (TextIO.closeIn inStrm; raise ex) + in + TextIO.closeIn inStrm; + v + end + + end diff --git a/smlnj-lib/SExp/sexp-pp.sml b/smlnj-lib/SExp/sexp-pp.sml new file mode 100644 index 0000000..89484ec --- /dev/null +++ b/smlnj-lib/SExp/sexp-pp.sml @@ -0,0 +1,50 @@ +(* sexp-pp.sml + * + * COPYRIGHT (c) 2022 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * A pretty printer for SExp values. + *) + +structure SExpPP : sig + + val output : TextIOPP.stream * SExp.value -> unit + + end = struct + + structure S = SExp + structure PP = TextIOPP + structure F = Format + + fun output (strm, sexp) = let + val str = PP.string strm + fun sp () = PP.space strm 1 + fun ppList [] = str "()" + | ppList [v] = ( + PP.openHBox strm; + str "("; ppVal v; str ")"; + PP.closeBox strm) + | ppList (v1::v2::vr) = ( + PP.openHBox strm; + str "("; ppVal v1; sp(); + PP.openHOVBox strm (PP.Rel 0); + ppVal v2; + List.app (fn v => (sp(); ppVal v)) vr; + str ")"; + PP.closeBox strm; + PP.closeBox strm) + and ppVal (S.SYMBOL value) = str (Atom.toString value) + | ppVal (S.BOOL value) = str (if value then "#t" else "#f") + | ppVal (S.INT value) = str (F.format "%d" [F.LINT value]) + | ppVal (S.FLOAT value) = str (F.format "%g" [F.REAL value]) + | ppVal (S.STRING value) = str (SExpStringUtil.toString value) + | ppVal (S.QUOTE value) = (str "'"; ppVal value) + | ppVal (S.LIST values) = ppList values + in + PP.openVBox strm (PP.Abs 0); + ppVal sexp; + PP.newline strm; + PP.closeBox strm + end + + end diff --git a/smlnj-lib/SExp/sexp-printer.sml b/smlnj-lib/SExp/sexp-printer.sml new file mode 100644 index 0000000..fc99e5e --- /dev/null +++ b/smlnj-lib/SExp/sexp-printer.sml @@ -0,0 +1,38 @@ +(* sexp-printer.sml + * + * COPYRIGHT (c) 2022 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Author: Damon Wang (with modifications by John Reppy) + * + * A printer for SExp values. This printer does not introduce any line + * breaks. For output that is more readable, use the SExpPP module. + *) + +structure SExpPrinter : sig + + val print : TextIO.outstream * SExp.value -> unit + + end = struct + + structure S = SExp + structure F = Format + + fun print (strm, sexp) = let + fun pr s = TextIO.output(strm, s) + fun prList [] = pr ("()") + | prList [v] = (pr "("; prVal v; pr ")") + | prList (v::vs) = ( + pr "("; prVal v; List.app (fn v => (pr " "; prVal v)) vs; pr ")") + and prVal (S.SYMBOL value) = pr (Atom.toString value) + | prVal (S.BOOL value) = pr (if value then "#t" else "#f") + | prVal (S.INT value) = pr (F.format "%d" [F.LINT value]) + | prVal (S.FLOAT value) = pr (F.format "%g" [F.REAL value]) + | prVal (S.STRING value) = pr (SExpStringUtil.toString value) + | prVal (S.QUOTE value) = (pr "'"; prVal value) + | prVal (S.LIST values) = prList values + in + prVal sexp + end + + end diff --git a/smlnj-lib/SExp/sexp-string-util.sml b/smlnj-lib/SExp/sexp-string-util.sml new file mode 100644 index 0000000..62a5105 --- /dev/null +++ b/smlnj-lib/SExp/sexp-string-util.sml @@ -0,0 +1,47 @@ +(* sexp-string-util.sml + * + * COPYRIGHT (c) 2022 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Common code for rendering string values according to the Scheme syntax. + * See https://www.scheme.com/tspl4/grammar.html#./grammar:strings. + *) + +structure SExpStringUtil : sig + + (* map a S-Expression STRING value to its printable representation *) + val toString : string -> string + + end = struct + + (* map a character to its hexadecimal escape sequence *) + local + val i2x = Int.fmt StringCvt.HEX + val zeroPad = StringCvt.padLeft #"0" 2 + in + fun hexEscape c = String.concat ["\\x", zeroPad (i2x (Char.ord c)), ";"] + end (* local *) + + (* translate a character to its representation in a string. *) + fun trChar #"\"" = "\\\"" + | trChar #"\\" = "\\\\" + | trChar c = if Char.isGraph c + then String.str c + else if (c < #" ") + then (case c + of #"\a" => "\\a" + | #"\b" => "\\b" + | #"\f" => "\\f" + | #"\n" => "\\n" + | #"\r" => "\\r" + | #"\t" => "\\t" + | #"\v" => "\\v" + | _ => hexEscape c + (* end case *)) + else hexEscape c + + val trString = String.translate trChar + + fun toString s = String.concat["\"", trString s, "\""] + + end diff --git a/smlnj-lib/SExp/sexp-tests.cm b/smlnj-lib/SExp/sexp-tests.cm new file mode 100644 index 0000000..6647cf2 --- /dev/null +++ b/smlnj-lib/SExp/sexp-tests.cm @@ -0,0 +1,27 @@ +(* sexp-tests.cm + * + * COPYRIGHT (c) 2011 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Author: Damon Wang (with modifications by John Reppy) + *) + +Library + + structure SExp + structure SExpParser + structure SExpPrinter + structure SExpTests + structure TestPP + +is + + $/basis.cm + $/smlnj-lib.cm + $/pp-lib.cm + $/ml-lpt-lib.cm + + sexp-lib.cm + + sexp-tests.sml + test-pp.sml diff --git a/smlnj-lib/SExp/sexp-tests.sml b/smlnj-lib/SExp/sexp-tests.sml new file mode 100644 index 0000000..5ac15c3 --- /dev/null +++ b/smlnj-lib/SExp/sexp-tests.sml @@ -0,0 +1,254 @@ +(* sexp-tests.sml + * + * COPYRIGHT (c) 2011 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Author: Damon Wang (with modifications by John Reppy) + * + * Some test cases for the SExp library 11 May 2011 by Damon Wang + *) + +structure TEST : sig + + exception ERROR of string option (* an error message *) + + datatype outcome = PASS of string (* test name *) + | FAIL of (string * exn) (* test name, FAIL instance *) + | PARTIAL of (string * outcome list) (* name, results *) + + datatype testcase = CASE of { name : string, test : unit -> unit } + | SUITE of { name : string, tests : testcase list } + + val run : testcase -> outcome + val count : outcome -> (int * int) (* number passed, number run *) + val summary : outcome -> string + + (* args: (msg, cond) where + * msg is a description to be printed on failure + * cond is a boolean---false means failure *) + val assert : (string * bool) -> unit + (* args: (cond) *) + val assert' : bool -> unit + + (* args: (msg, name, func) where + * msg is a description to be printed on failure + * name is the exnName of the exception to expect + * func is the function which should raise the exception *) + val throws : (string * string * (unit -> unit)) -> unit + (* args: (name, func) *) + val throws' : (string * (unit -> unit )) -> unit + +end = struct + + exception ERROR of string option (* an error message *) + + datatype outcome = PASS of string (* test name *) + | FAIL of (string * exn) (* test name, FAIL instance *) + | PARTIAL of (string * outcome list) (* name, results *) + + datatype testcase = CASE of { name : string, test : unit -> unit } + | SUITE of { name : string, tests : testcase list } + + fun run (SUITE {name, tests}) = PARTIAL(name, List.map run tests) + | run (CASE {name, test}) = (test(); PASS(name)) handle e => FAIL(name, e) + + fun addVec ((a, b), (c, d)) = (a + c, b + d) + + fun count (PASS _) = (1, 1) + | count (FAIL _) = (0, 1) + | count (PARTIAL (_, results)) = let + in + List.foldl addVec (0,0) (List.map count results) + end + + fun assert (msg, cond) = if not cond then raise ERROR (SOME msg) else () + + fun assert' cond = if not cond then raise ERROR NONE else () + + fun throws (msg, name, func) = let + fun wrongExn e = raise ERROR (SOME (String.concat [msg, + "---expected exception '", name, "' but got '", exnName e, "' with msg\n", + exnMessage e])) + fun noExn () = raise ERROR (SOME (String.concat [msg, + "---expected exception '", name, "' but got nothing"])) + in + if (func (); true) handle e => + if exnName e = name + then false + else (wrongExn e; false) + then noExn () + else () + end + + fun throws' (name, func) = throws ("", name, func) + + local + fun splitMsg msg = String.fields (fn c => EQUAL = Char.compare (#"\n", c)) msg + fun indent lines = List.map (fn s => "\t" ^ s) lines + fun summary' (PASS name) : (string list * (int * int)) = + ([ String.concat [" ", name, "\n"] ], (1, 1)) + | summary' (FAIL (name, ERROR msg)) = + (List.concat [ [ String.concat ["FAIL ", name ] ], + (case msg + of SOME msg => indent + (List.concat [["\n"], (splitMsg msg), ["\n"] ]) + | NONE => ["\n"]) ], + (0, 1)) + | summary' (FAIL (name, e)) = + (List.concat [ [ String.concat ["FAIL ", name, " with external error ", + exnName e, "\n" ] ], + indent (splitMsg (exnMessage e)), + [ "\n"] ], + (0, 1)) + | summary' (PARTIAL (name, results)) = let + val (lines, counts) = ListPair.unzip (List.map summary' results) + val indented = indent (List.concat lines) + val (n_passed, n_run) = List.foldl addVec (0, 0) counts + in + (String.concat [ "[ ", Int.toString n_passed, " / ", Int.toString n_run, + " ] ", name, "\n"] :: indented, (n_passed, n_run)) + end + in + fun summary results = String.concat (#1 (summary' results)) + end + +end + +structure SExpTests : sig + val run : unit -> unit +end = struct + + structure P = SExpParser + structure S = SExp + + val assert = TEST.assert + val assert' = TEST.assert' + val throws' = TEST.throws' + + fun pS str = hd (P.parse (TextIO.openString str)) + + val tests = TEST.SUITE{name="parsing", tests=[ + + TEST.SUITE{name="bool", tests=[ + TEST.CASE{name="true", test=fn () => + assert' (S.same(pS "#t", S.BOOL true)) }, + TEST.CASE{name="false", test=fn () => + assert' (S.same(pS "#f", S.BOOL false))} ] }, + + TEST.SUITE{name="int", tests=[ + TEST.CASE{name="negative", test=fn () => + assert' (S.same(pS "-1", S.INT ~1)) }, + TEST.CASE{name="zero", test=fn () => + assert' (S.same(pS "0", S.INT 0)) }, + TEST.CASE{name="positive", test=fn () => + assert' (S.same(pS "1", S.INT 1)) }, + TEST.CASE{name="32-bit signed max", test=fn () => + assert' (S.same(pS "2147483647", S.INT 2147483647)) }, + TEST.CASE{name="32-bit signed min", test=fn () => + assert' (S.same(pS "-2147483648", S.INT ~2147483648)) }, + TEST.CASE{name="bigger than 32-bit", test=fn () => + assert' (S.same(pS "3147483647", S.INT 3147483647)) }, + TEST.CASE{name="leading plus", test=fn () => + assert' (S.same(pS "+1", S.INT 1)) }, + TEST.CASE{name="hex", test=fn () => + assert' (S.same(pS "0xdeadbeef", S.INT 0xdeadbeef)) }, + TEST.CASE{name="positive hex", test=fn () => + assert' (S.same(pS "+0xdeadbeef", S.INT 0xdeadbeef)) }, + TEST.CASE{name="negative hex", test=fn () => + assert' (S.same(pS "-0xdeadbeef", S.INT ~0xdeadbeef)) } + ]}, + + TEST.SUITE{name="float", tests=[ + TEST.CASE{name="decimal", test=fn () => + assert' (S.same(pS "1.0", S.FLOAT 1.0))}, + TEST.CASE{name="exponent", test=fn () => + assert' (S.same(pS "1e2", S.FLOAT 100.0))}, + TEST.CASE{name="decimal and exponent", test=fn () => + assert' (S.same(pS "1.2e2", S.FLOAT 120.0))}, + TEST.CASE{name="negative", test=fn () => + assert' (S.same(pS "-1.0", S.FLOAT ~1.0))}, + TEST.CASE{name="negative exponent", test=fn () => + assert' (S.same(pS "1.0e-2", S.FLOAT 0.01))}, + TEST.CASE{name="zero first digit", test=fn () => + assert' (S.same(pS "0.1", S.FLOAT 0.1))} ]}, + + TEST.SUITE{name="string", tests=[ + TEST.CASE{name="empty", test=fn () => + assert' (S.same(pS "\"\"", S.STRING "")) }, + TEST.CASE{name="characters", test=fn () => + assert' (S.same(pS "\"foo\"", S.STRING "foo")) }, + TEST.CASE{name="escapes", test=fn () => + assert' (S.same(pS "\" \\\\ \\\" \\/ \\b \\f \\n \\r \\t \"", + S.STRING " \\ \" / \b \f \n \r \t ")) } ]}, + + TEST.SUITE{name="comments", tests=[ + TEST.CASE{name="empty", test=fn () => + assert' (S.same(pS "/* */1", S.INT 1))}, + TEST.CASE{name="with text", test=fn () => + assert' (S.same(pS "/* 1 */1", S.INT 1))}, + TEST.CASE{name="multiline allowed", test=fn () => + assert' (S.same(pS "/* 1 \n 1 */1", S.INT 1))}, + TEST.CASE{name="cannot be nested", test=fn ()=> + throws' ("Fail", fn () => (pS "/* 0 /* 1 */ 2 */ 3"; ())) } + ]}, + + TEST.SUITE{name="lists", tests=[ + TEST.CASE{name="empty", test=fn () => + assert' (S.same(pS "()", S.LIST []))}, + TEST.CASE{name="empty with spaces", test=fn () => + assert' (S.same(pS "( )", S.LIST []))}, + TEST.CASE{name="one int", test=fn () => + assert' (S.same(pS "(1)", S.LIST [ (S.INT 1) ]))}, + TEST.CASE{name="one int with space", test=fn () => + assert' (S.same(pS "( 1)", S.LIST [ (S.INT 1) ]))}, + TEST.CASE{name="one int with spaces", test=fn () => + assert' (S.same(pS "( 1 )", S.LIST [ (S.INT 1) ]))}, + TEST.CASE{name="with empty list ", test=fn () => + assert' (S.same(pS "( () )", S.LIST [ (S.LIST []) ]))}, + TEST.CASE{name="nested", test=fn () => + assert' (S.same(pS "( ( 1 ) )", S.LIST [ S.LIST [ S.INT 1 ] ]))}, + TEST.CASE{name="two elements", test=fn () => + assert' (S.same(pS "( 1 2 )", S.LIST [ S.INT 1, S.INT 2]))}, + TEST.CASE{name="three elements", test=fn () => + assert' (S.same(pS "( 1 2 3 )", S.LIST [ S.INT 1, S.INT 2]))}, + TEST.CASE{name="mixed elements", test=fn () => + assert' (S.same(pS "( 1 2.5 \"foo\" (2))", + S.LIST [ S.INT 1, S.FLOAT 2.5, S.STRING "foo", S.LIST [ S.INT 2 ] ]))}, + TEST.CASE{name="brackets", test=fn () => + assert' (S.same(pS "[ 1 2 3 ]", S.LIST [ S.INT 1, S.INT 2]))}, + TEST.CASE{name="braces", test=fn () => + assert' (S.same(pS "{ 1 2 3 }", S.LIST [ S.INT 1, S.INT 2]))}, + TEST.CASE{name="mixed delimiters", test=fn () => + assert' (S.same(pS "{ [ ( ) ] }", S.LIST [ S.LIST [ S.LIST [] ] ]))}, + TEST.CASE{name="delimiters must match", test=fn () => + throws' ("Fail", fn () => (pS "(]"; ())) }, + TEST.CASE{name="alternative separaters", test=fn () => + assert' (S.same(pS "(1;2,3)", S.LIST [ S.INT 1, S.INT 2, S.INT 3])) } + ]}, + + TEST.SUITE{name="symbols", tests=[ + TEST.CASE{name="characters", test=fn () => + assert' (S.same(pS "ab", S.SYMBOL (Atom.atom "ab")))}, + TEST.CASE{name="trailing digits", test=fn () => + assert' (S.same(pS "x0", S.SYMBOL (Atom.atom "x0")))}, + TEST.CASE{name="quoted", test=fn ()=> + assert' (S.same(pS "'0", S.SYMBOL (Atom.atom "0")))} + ]}, + + TEST.SUITE{name="bugs", tests=[ + TEST.CASE{name="bug01", test= fn () => + assert' (S.same( + pS "(set pi 3.141592653589793 :documentation \"The value of $\\pi$.\")", + S.LIST[ + S.SYMBOL(Atom.atom "set"), S.SYMBOL(Atom.atom "pi"), + S.FLOAT 3.141592653589793, S.SYMBOL(Atom.atom ":documentation"), + S.STRING "The value of $\\pi$." + ])) + } + ]} + ]} + + fun run () = TextIO.print (TEST.summary (TEST.run tests)) +end + +(* SExpTests.run (); *) diff --git a/smlnj-lib/SExp/sexp-tokens.sml b/smlnj-lib/SExp/sexp-tokens.sml new file mode 100644 index 0000000..a028aef --- /dev/null +++ b/smlnj-lib/SExp/sexp-tokens.sml @@ -0,0 +1,55 @@ +(* sexp-tokens.sml + * + * COPYRIGHT (c) 2011 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Author: Damon Wang (with modifications by John Reppy) +s * + * The tokens returned by the SExp lexer. + *) + +structure SExpTokens = + struct + + datatype delim_type = PAREN | BRACKET | BRACE + datatype delim_open = OPEN | CLOSE + + datatype token + = EOF (* end-of-file *) + | DELIM of (delim_type * delim_open) + | QUOTE (* "'" *) + | KW_true (* "#t" *) + | KW_false (* "#f" *) + | INT of IntInf.int + | FLOAT of real + | STRING of string + | WHITE (* whitespace, which separates list items *) + | SYMBOL of string + (* TODO: add a HEX constructor for encoding non-printable characters in + * some human-readable way *) + + fun toString EOF = "" + | toString (DELIM(PAREN, OPEN)) = "(" + | toString (DELIM(PAREN, CLOSE)) = ")" + | toString (DELIM(BRACKET, OPEN)) = "[" + | toString (DELIM(BRACKET, CLOSE)) = "]" + | toString (DELIM(BRACE, OPEN)) = "{" + | toString (DELIM(BRACE, CLOSE)) = "}" + | toString QUOTE = "'" + | toString KW_true = "#t" + | toString KW_false = "#f" + | toString (INT i) = + if (i < 0) then "-" ^ IntInf.toString(~i) + else IntInf.toString i + | toString (FLOAT f) = + if (f < 0.0) then "-" ^ Real.toString(~f) + else Real.toString f + | toString (STRING s) = let + fun f (wchr, l) = UTF8.toString wchr :: l + in + String.concat("\"" :: (List.foldr f ["\""] (UTF8.explode s))) + end + | toString (SYMBOL str) = str + | toString WHITE = " " + + end diff --git a/smlnj-lib/SExp/sexp.lex b/smlnj-lib/SExp/sexp.lex new file mode 100644 index 0000000..ea28f79 --- /dev/null +++ b/smlnj-lib/SExp/sexp.lex @@ -0,0 +1,116 @@ +(* sexp.lex + * + * COPYRIGHT (c) 2022 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Author: Damon Wang (with modifications by John Reppy) + * + * Lexer for Sexp files. + * + * TODO: + * EOF rules for strings + * error messages for unknown characters + *) + +%name SExpLexer; + +%defs ( + structure T = SExpTokens + type lex_result = T.token + fun eof () = T.EOF + fun int s = T.INT(valOf(IntInf.fromString s)) + fun float s = T.FLOAT(valOf(LargeReal.fromString s)) +(* support for incremental construction of strings *) + val sbuf : string list ref = ref [] + fun addStr s = (sbuf := s :: !sbuf) + fun addHexEsc lit = let + (* trim the "\x" prefix and ";" suffix *) + val digits = Substring.trimr 1 (Substring.triml 2 lit) + val SOME(d, _) = Int.scan StringCvt.HEX Substring.getc digits + in + addStr(String.str(chr d)) + end + fun finishString () = (T.STRING(String.concat(List.rev(!sbuf))) before sbuf := []) +); + +%let digit1_9 = [1-9]; +%let digit = [0-9]; +%let digits = {digit}+; +%let int = [+-]?({digit} | {digit1_9}{digits}+); +%let frac = "."{digits}; +%let exp = [eE][+-]?{digits}; +%let xdigit = {digit}|[a-fA-F]; +%let alpha = ([a-zA-Z]); +%let initial = {alpha} | "^" | [-+.@!$%&*/:<=>?_~]; +%let subsequent = {initial} | {digit}; +%let ident = {initial} {subsequent}*; +%let interlnws = " "|"\t"; (* intraline whitespace *) +%let eol = "\n"|"\r\n"|"\r"; + +%states S; + +[ \t\n\r]+ => ( T.WHITE ); +";"[^\n\r]*[\n\r]+ => ( skip() (* comment *)); + +{ident} => ( T.SYMBOL (yytext) ); + +"'" => ( T.QUOTE ); +"(" => ( T.DELIM (T.PAREN, T.OPEN) ); +")" => ( T.DELIM (T.PAREN, T.CLOSE) ); +"[" => ( T.DELIM (T.BRACKET, T.OPEN) ); +"]" => ( T.DELIM (T.BRACKET, T.CLOSE) ); +"{" => ( T.DELIM (T.BRACE, T.OPEN) ); +"}" => ( T.DELIM (T.BRACE, T.CLOSE) ); +"#t" => ( T.KW_true ); +"#f" => ( T.KW_false ); + +(* takes a string of form "0xdeadbeef", strips the leading "0x", and returns + * an IntInf with hex value deadbeef. Note that the hex value is unsigned; to + * get negatives, write "-0xdeadbeef". This means that the string from C's + * `printf("%x", -1)` will be parsed as INT_MAX. TODO is this a good idea? + *) +[+-]?"0x"{xdigit}+ => ( + let + (* TODO Doesn't StringCvt.HEX handle stripping the "0x" prefix? *) + val digits = if String.isPrefix "+" yytext (* "+0xdeadbeef" *) + then String.extract(yytext, 3, NONE) + else if String.isPrefix "-" yytext (* "-0xdeadbeef" *) + then "-" ^ String.extract(yytext, 3, NONE) + else String.extract(yytext, 2, NONE) (* "0xdeadbeef" *) + val SOME(value) = StringCvt.scanString (IntInf.scan StringCvt.HEX) digits + in + T.INT(value) + end); + +{int} => ( T.INT(valOf(IntInf.fromString yytext)) ); + +{int}{frac} => ( float yytext ); +{int}{exp} => ( float yytext ); +{int}{frac}{exp} => ( float yytext ); + +(* string values follow the syntax of Scheme as described in + * + * https://www.scheme.com/tspl4/grammar.html#./grammar:strings + *) +"\"" => ( YYBEGIN S; continue() ); + +"\\\\" => ( addStr "\\"; continue() ); +"\\\"" => ( addStr "\""; continue() ); +"\\a" => ( addStr "\a"; continue() ); +"\\b" => ( addStr "\b"; continue() ); +"\\f" => ( addStr "\f"; continue() ); +"\\n" => ( addStr "\n"; continue() ); +"\\r" => ( addStr "\r"; continue() ); +"\\t" => ( addStr "\t"; continue() ); +"\\v" => ( addStr "\v"; continue() ); +"\\x"{xdigit}";" => ( addHexEsc yysubstr; continue() ); +"\\x"{xdigit}{2}";" => ( addHexEsc yysubstr; continue() ); +[^\\"]+ => ( addStr yytext; continue() ); +"\\"{interlnws}*{eol}{interlnws}* + => ( continue() ); +"\"" => ( YYBEGIN INITIAL; finishString() ); +(* FIXME: add some error reporting *) +. => ( continue() ); + +(* FIXME: add some error reporting *) +. => ( skip() ); diff --git a/smlnj-lib/SExp/sexp.lex.sml b/smlnj-lib/SExp/sexp.lex.sml new file mode 100644 index 0000000..e45a6e8 --- /dev/null +++ b/smlnj-lib/SExp/sexp.lex.sml @@ -0,0 +1,1799 @@ +structure SExpLexer = struct + + datatype yystart_state = +S | INITIAL + local + + structure UserDeclarations = + struct + + + structure T = SExpTokens + type lex_result = T.token + fun eof () = T.EOF + fun int s = T.INT(valOf(IntInf.fromString s)) + fun float s = T.FLOAT(valOf(LargeReal.fromString s)) +(* support for incremental construction of strings *) + val sbuf : string list ref = ref [] + fun addStr s = (sbuf := s :: !sbuf) + fun addHexEsc lit = let + (* trim the "\x" prefix and ";" suffix *) + val digits = Substring.trimr 1 (Substring.triml 2 lit) + val SOME(d, _) = Int.scan StringCvt.HEX Substring.getc digits + in + addStr(String.str(chr d)) + end + fun finishString () = (T.STRING(String.concat(List.rev(!sbuf))) before sbuf := []) + + end + + datatype yymatch + = yyNO_MATCH + | yyMATCH of ULexBuffer.stream * action * yymatch + withtype action = ULexBuffer.stream * yymatch -> UserDeclarations.lex_result + + val yytable : ((UTF8.wchar * UTF8.wchar * int) list * int list) Vector.vector = +Vector.fromList [] + fun yystreamify' p input = ULexBuffer.mkStream (p, input) + + fun yystreamifyReader' p readFn strm = let + val s = ref strm + fun iter(strm, n, accum) = + if n > 1024 then (String.implode (rev accum), strm) + else (case readFn strm + of NONE => (String.implode (rev accum), strm) + | SOME(c, strm') => iter (strm', n+1, c::accum)) + fun input() = let + val (data, strm) = iter(!s, 0, []) + in + s := strm; + data + end + in + yystreamify' p input + end + + fun yystreamifyInstream' p strm = yystreamify' p (fn ()=>TextIO.input strm) + + fun innerLex +(yystrm_, yyss_, yysm) = let + (* current start state *) + val yyss = ref yyss_ + fun YYBEGIN ss = (yyss := ss) + (* current input stream *) + val yystrm = ref yystrm_ + fun yysetStrm strm = yystrm := strm + fun yygetPos() = ULexBuffer.getpos (!yystrm) + fun yystreamify input = yystreamify' (yygetPos()) input + fun yystreamifyReader readFn strm = yystreamifyReader' (yygetPos()) readFn strm + fun yystreamifyInstream strm = yystreamifyInstream' (yygetPos()) strm + (* start position of token -- can be updated via skip() *) + val yystartPos = ref (yygetPos()) + (* get one char of input *) + fun yygetc strm = (case ULexBuffer.getu strm + of (SOME (0w10, s')) => + (AntlrStreamPos.markNewLine yysm (ULexBuffer.getpos strm); + SOME (0w10, s')) + | x => x) + fun yygetList getc strm = let + val get1 = UTF8.getu getc + fun iter (strm, accum) = + (case get1 strm + of NONE => rev accum + | SOME (w, strm') => iter (strm', w::accum) + (* end case *)) + in + iter (strm, []) + end + (* create yytext *) + fun yymksubstr(strm) = ULexBuffer.subtract (strm, !yystrm) + fun yymktext(strm) = Substring.string (yymksubstr strm) + fun yymkunicode(strm) = yygetList Substring.getc (yymksubstr strm) + open UserDeclarations + fun lex () = let + fun yystuck (yyNO_MATCH) = raise Fail "lexer reached a stuck state" + | yystuck (yyMATCH (strm, action, old)) = + action (strm, old) + val yypos = yygetPos() + fun yygetlineNo strm = AntlrStreamPos.lineNo yysm (ULexBuffer.getpos strm) + fun yygetcolNo strm = AntlrStreamPos.colNo yysm (ULexBuffer.getpos strm) + fun yyactsToMatches (strm, [], oldMatches) = oldMatches + | yyactsToMatches (strm, act::acts, oldMatches) = + yyMATCH (strm, act, yyactsToMatches (strm, acts, oldMatches)) + fun yygo actTable = + (fn (~1, _, oldMatches) => yystuck oldMatches + | (curState, strm, oldMatches) => let + val (transitions, finals') = Vector.sub (yytable, curState) + val finals = List.map (fn i => Vector.sub (actTable, i)) finals' + fun tryfinal() = + yystuck (yyactsToMatches (strm, finals, oldMatches)) + fun find (c, []) = NONE + | find (c, (c1, c2, s)::ts) = + if c1 <= c andalso c <= c2 then SOME s + else find (c, ts) + in case yygetc strm + of SOME(c, strm') => + (case find (c, transitions) + of NONE => tryfinal() + | SOME n => + yygo actTable + (n, strm', + yyactsToMatches (strm, finals, oldMatches))) + | NONE => tryfinal() + end) + val yylastwasnref = ref (ULexBuffer.lastWasNL (!yystrm)) + fun continue() = let val yylastwasn = !yylastwasnref in +let +fun yyAction0 (strm, lastMatch : yymatch) = (yystrm := strm; T.WHITE ) +fun yyAction1 (strm, lastMatch : yymatch) = (yystrm := strm; + skip() (* comment *)) +fun yyAction2 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; T.SYMBOL (yytext) + end +fun yyAction3 (strm, lastMatch : yymatch) = (yystrm := strm; T.QUOTE ) +fun yyAction4 (strm, lastMatch : yymatch) = (yystrm := strm; + T.DELIM (T.PAREN, T.OPEN) ) +fun yyAction5 (strm, lastMatch : yymatch) = (yystrm := strm; + T.DELIM (T.PAREN, T.CLOSE) ) +fun yyAction6 (strm, lastMatch : yymatch) = (yystrm := strm; + T.DELIM (T.BRACKET, T.OPEN) ) +fun yyAction7 (strm, lastMatch : yymatch) = (yystrm := strm; + T.DELIM (T.BRACKET, T.CLOSE) ) +fun yyAction8 (strm, lastMatch : yymatch) = (yystrm := strm; + T.DELIM (T.BRACE, T.OPEN) ) +fun yyAction9 (strm, lastMatch : yymatch) = (yystrm := strm; + T.DELIM (T.BRACE, T.CLOSE) ) +fun yyAction10 (strm, lastMatch : yymatch) = (yystrm := strm; T.KW_true ) +fun yyAction11 (strm, lastMatch : yymatch) = (yystrm := strm; T.KW_false ) +fun yyAction12 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; + + let + (* TODO Doesn't StringCvt.HEX handle stripping the "0x" prefix? *) + val digits = if String.isPrefix "+" yytext (* "+0xdeadbeef" *) + then String.extract(yytext, 3, NONE) + else if String.isPrefix "-" yytext (* "-0xdeadbeef" *) + then "-" ^ String.extract(yytext, 3, NONE) + else String.extract(yytext, 2, NONE) (* "0xdeadbeef" *) + val SOME(value) = StringCvt.scanString (IntInf.scan StringCvt.HEX) digits + in + T.INT(value) + end + end +fun yyAction13 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; T.INT(valOf(IntInf.fromString yytext)) + end +fun yyAction14 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; float yytext + end +fun yyAction15 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; float yytext + end +fun yyAction16 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; float yytext + end +fun yyAction17 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN S; continue() ) +fun yyAction18 (strm, lastMatch : yymatch) = (yystrm := strm; + addStr "\\"; continue() ) +fun yyAction19 (strm, lastMatch : yymatch) = (yystrm := strm; + addStr "\""; continue() ) +fun yyAction20 (strm, lastMatch : yymatch) = (yystrm := strm; + addStr "\a"; continue() ) +fun yyAction21 (strm, lastMatch : yymatch) = (yystrm := strm; + addStr "\b"; continue() ) +fun yyAction22 (strm, lastMatch : yymatch) = (yystrm := strm; + addStr "\f"; continue() ) +fun yyAction23 (strm, lastMatch : yymatch) = (yystrm := strm; + addStr "\n"; continue() ) +fun yyAction24 (strm, lastMatch : yymatch) = (yystrm := strm; + addStr "\r"; continue() ) +fun yyAction25 (strm, lastMatch : yymatch) = (yystrm := strm; + addStr "\t"; continue() ) +fun yyAction26 (strm, lastMatch : yymatch) = (yystrm := strm; + addStr "\v"; continue() ) +fun yyAction27 (strm, lastMatch : yymatch) = let + val yysubstr = yymksubstr(strm) + in + yystrm := strm; addHexEsc yysubstr; continue() + end +fun yyAction28 (strm, lastMatch : yymatch) = let + val yysubstr = yymksubstr(strm) + in + yystrm := strm; addHexEsc yysubstr; continue() + end +fun yyAction29 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; addStr yytext; continue() + end +fun yyAction30 (strm, lastMatch : yymatch) = (yystrm := strm; continue() ) +fun yyAction31 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN INITIAL; finishString() ) +fun yyAction32 (strm, lastMatch : yymatch) = (yystrm := strm; continue() ) +fun yyAction33 (strm, lastMatch : yymatch) = (yystrm := strm; skip() ) +fun yyQ38 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction9(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction9(strm, yyNO_MATCH) + (* end case *)) +fun yyQ37 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction8(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction8(strm, yyNO_MATCH) + (* end case *)) +fun yyQ36 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction7(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction7(strm, yyNO_MATCH) + (* end case *)) +fun yyQ35 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction6(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction6(strm, yyNO_MATCH) + (* end case *)) +fun yyQ40 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction1(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wxB + then yyAction1(strm, yyNO_MATCH) + else if inp < 0wxB + then if inp = 0wxA + then yyQ40(strm', yyMATCH(strm, yyAction1, yyNO_MATCH)) + else yyAction1(strm, yyNO_MATCH) + else if inp = 0wxD + then yyQ40(strm', yyMATCH(strm, yyAction1, yyNO_MATCH)) + else yyAction1(strm, yyNO_MATCH) + (* end case *)) +fun yyQ39 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wxB + then yyQ39(strm', lastMatch) + else if inp < 0wxB + then if inp = 0wxA + then yyQ40(strm', lastMatch) + else yyQ39(strm', lastMatch) + else if inp = 0wxD + then yyQ40(strm', lastMatch) + else yyQ39(strm', lastMatch) + (* end case *)) +fun yyQ34 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction33(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wxB + then yyQ39(strm', yyMATCH(strm, yyAction33, yyNO_MATCH)) + else if inp < 0wxB + then if inp = 0wxA + then yyQ40(strm', yyMATCH(strm, yyAction33, yyNO_MATCH)) + else yyQ39(strm', yyMATCH(strm, yyAction33, yyNO_MATCH)) + else if inp = 0wxD + then yyQ40(strm', yyMATCH(strm, yyAction33, yyNO_MATCH)) + else yyQ39(strm', yyMATCH(strm, yyAction33, yyNO_MATCH)) + (* end case *)) +fun yyQ45 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction15(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx30 + then yyQ45(strm', yyMATCH(strm, yyAction15, yyNO_MATCH)) + else if inp < 0wx30 + then yyAction15(strm, yyNO_MATCH) + else if inp <= 0wx39 + then yyQ45(strm', yyMATCH(strm, yyAction15, yyNO_MATCH)) + else yyAction15(strm, yyNO_MATCH) + (* end case *)) +fun yyQ44 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx30 + then yyQ45(strm', lastMatch) + else if inp < 0wx30 + then yystuck(lastMatch) + else if inp <= 0wx39 + then yyQ45(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ43 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx2D + then yyQ44(strm', lastMatch) + else if inp < 0wx2D + then if inp = 0wx2B + then yyQ44(strm', lastMatch) + else yystuck(lastMatch) + else if inp = 0wx30 + then yyQ45(strm', lastMatch) + else if inp < 0wx30 + then yystuck(lastMatch) + else if inp <= 0wx39 + then yyQ45(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ49 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction16(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx30 + then yyQ49(strm', yyMATCH(strm, yyAction16, yyNO_MATCH)) + else if inp < 0wx30 + then yyAction16(strm, yyNO_MATCH) + else if inp <= 0wx39 + then yyQ49(strm', yyMATCH(strm, yyAction16, yyNO_MATCH)) + else yyAction16(strm, yyNO_MATCH) + (* end case *)) +fun yyQ48 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx30 + then yyQ49(strm', lastMatch) + else if inp < 0wx30 + then yystuck(lastMatch) + else if inp <= 0wx39 + then yyQ49(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ47 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx2D + then yyQ48(strm', lastMatch) + else if inp < 0wx2D + then if inp = 0wx2B + then yyQ48(strm', lastMatch) + else yystuck(lastMatch) + else if inp = 0wx30 + then yyQ49(strm', lastMatch) + else if inp < 0wx30 + then yystuck(lastMatch) + else if inp <= 0wx39 + then yyQ49(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ46 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction14(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx45 + then yyQ47(strm', yyMATCH(strm, yyAction14, yyNO_MATCH)) + else if inp < 0wx45 + then if inp = 0wx30 + then yyQ46(strm', yyMATCH(strm, yyAction14, yyNO_MATCH)) + else if inp < 0wx30 + then yyAction14(strm, yyNO_MATCH) + else if inp <= 0wx39 + then yyQ46(strm', yyMATCH(strm, yyAction14, yyNO_MATCH)) + else yyAction14(strm, yyNO_MATCH) + else if inp = 0wx65 + then yyQ47(strm', yyMATCH(strm, yyAction14, yyNO_MATCH)) + else yyAction14(strm, yyNO_MATCH) + (* end case *)) +fun yyQ41 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx30 + then yyQ46(strm', lastMatch) + else if inp < 0wx30 + then yystuck(lastMatch) + else if inp <= 0wx39 + then yyQ46(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ42 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction13(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx3A + then yyAction13(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp = 0wx2F + then yyAction13(strm, yyNO_MATCH) + else if inp < 0wx2F + then if inp = 0wx2E + then yyQ41(strm', yyMATCH(strm, yyAction13, yyNO_MATCH)) + else yyAction13(strm, yyNO_MATCH) + else yyQ42(strm', yyMATCH(strm, yyAction13, yyNO_MATCH)) + else if inp = 0wx46 + then yyAction13(strm, yyNO_MATCH) + else if inp < 0wx46 + then if inp = 0wx45 + then yyQ43(strm', yyMATCH(strm, yyAction13, yyNO_MATCH)) + else yyAction13(strm, yyNO_MATCH) + else if inp = 0wx65 + then yyQ43(strm', yyMATCH(strm, yyAction13, yyNO_MATCH)) + else yyAction13(strm, yyNO_MATCH) + (* end case *)) +fun yyQ33 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction13(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx3A + then yyAction13(strm, yyNO_MATCH) + else if inp < 0wx3A + then if inp = 0wx2F + then yyAction13(strm, yyNO_MATCH) + else if inp < 0wx2F + then if inp = 0wx2E + then yyQ41(strm', yyMATCH(strm, yyAction13, yyNO_MATCH)) + else yyAction13(strm, yyNO_MATCH) + else yyQ42(strm', yyMATCH(strm, yyAction13, yyNO_MATCH)) + else if inp = 0wx46 + then yyAction13(strm, yyNO_MATCH) + else if inp < 0wx46 + then if inp = 0wx45 + then yyQ43(strm', yyMATCH(strm, yyAction13, yyNO_MATCH)) + else yyAction13(strm, yyNO_MATCH) + else if inp = 0wx65 + then yyQ43(strm', yyMATCH(strm, yyAction13, yyNO_MATCH)) + else yyAction13(strm, yyNO_MATCH) + (* end case *)) +fun yyQ51 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction12(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx41 + then yyQ51(strm', yyMATCH(strm, yyAction12, yyNO_MATCH)) + else if inp < 0wx41 + then if inp = 0wx30 + then yyQ51(strm', yyMATCH(strm, yyAction12, yyNO_MATCH)) + else if inp < 0wx30 + then yyAction12(strm, yyNO_MATCH) + else if inp <= 0wx39 + then yyQ51(strm', yyMATCH(strm, yyAction12, yyNO_MATCH)) + else yyAction12(strm, yyNO_MATCH) + else if inp = 0wx61 + then yyQ51(strm', yyMATCH(strm, yyAction12, yyNO_MATCH)) + else if inp < 0wx61 + then if inp <= 0wx46 + then yyQ51(strm', yyMATCH(strm, yyAction12, yyNO_MATCH)) + else yyAction12(strm, yyNO_MATCH) + else if inp <= 0wx66 + then yyQ51(strm', yyMATCH(strm, yyAction12, yyNO_MATCH)) + else yyAction12(strm, yyNO_MATCH) + (* end case *)) +fun yyQ50 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx41 + then yyQ51(strm', lastMatch) + else if inp < 0wx41 + then if inp = 0wx30 + then yyQ51(strm', lastMatch) + else if inp < 0wx30 + then yystuck(lastMatch) + else if inp <= 0wx39 + then yyQ51(strm', lastMatch) + else yystuck(lastMatch) + else if inp = 0wx61 + then yyQ51(strm', lastMatch) + else if inp < 0wx61 + then if inp <= 0wx46 + then yyQ51(strm', lastMatch) + else yystuck(lastMatch) + else if inp <= 0wx66 + then yyQ51(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ32 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction13(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx46 + then yyAction13(strm, yyNO_MATCH) + else if inp < 0wx46 + then if inp = 0wx2F + then yyAction13(strm, yyNO_MATCH) + else if inp < 0wx2F + then if inp = 0wx2E + then yyQ41(strm', yyMATCH(strm, yyAction13, yyNO_MATCH)) + else yyAction13(strm, yyNO_MATCH) + else if inp = 0wx45 + then yyQ43(strm', yyMATCH(strm, yyAction13, yyNO_MATCH)) + else yyAction13(strm, yyNO_MATCH) + else if inp = 0wx66 + then yyAction13(strm, yyNO_MATCH) + else if inp < 0wx66 + then if inp = 0wx65 + then yyQ43(strm', yyMATCH(strm, yyAction13, yyNO_MATCH)) + else yyAction13(strm, yyNO_MATCH) + else if inp = 0wx78 + then yyQ50(strm', yyMATCH(strm, yyAction13, yyNO_MATCH)) + else yyAction13(strm, yyNO_MATCH) + (* end case *)) +fun yyQ52 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction2(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx3B + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx3B + then if inp = 0wx27 + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx27 + then if inp = 0wx22 + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx22 + then if inp = 0wx21 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else if inp <= 0wx23 + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx2C + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx2C + then if inp <= 0wx29 + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx61 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx61 + then if inp = 0wx5E + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx5E + then if inp <= 0wx5A + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else if inp = 0wx60 + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx7E + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx7E + then if inp <= 0wx7A + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else yyAction2(strm, yyNO_MATCH) + (* end case *)) +fun yyQ59 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction2(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx3A + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx3A + then if inp = 0wx27 + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx27 + then if inp = 0wx22 + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx22 + then if inp = 0wx21 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else if inp <= 0wx23 + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx2C + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx2C + then if inp <= 0wx29 + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp <= 0wx2F + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ59(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx60 + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx60 + then if inp = 0wx5B + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx5B + then if inp = 0wx3B + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp <= 0wx5D + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx7E + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx7E + then if inp <= 0wx7A + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else yyAction2(strm, yyNO_MATCH) + (* end case *)) +fun yyQ58 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction2(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx3A + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx3A + then if inp = 0wx27 + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx27 + then if inp = 0wx22 + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx22 + then if inp = 0wx21 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else if inp <= 0wx23 + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx2C + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx2C + then if inp <= 0wx29 + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp <= 0wx2F + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ59(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx60 + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx60 + then if inp = 0wx5B + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx5B + then if inp = 0wx3B + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp <= 0wx5D + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx7E + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx7E + then if inp <= 0wx7A + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else yyAction2(strm, yyNO_MATCH) + (* end case *)) +fun yyQ57 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction2(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx30 + then yyQ59(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx30 + then if inp = 0wx2A + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx2A + then if inp = 0wx22 + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx22 + then if inp = 0wx21 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else if inp = 0wx24 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx24 + then yyAction2(strm, yyNO_MATCH) + else if inp <= 0wx26 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else if inp = 0wx2D + then yyQ58(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx2D + then if inp = 0wx2B + then yyQ58(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx5E + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx5E + then if inp = 0wx3B + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx3B + then if inp = 0wx3A + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ59(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp <= 0wx5A + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else if inp = 0wx7B + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx7B + then if inp = 0wx60 + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx7E + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + (* end case *)) +fun yyQ63 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction2(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx3A + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx3A + then if inp = 0wx27 + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx27 + then if inp = 0wx22 + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx22 + then if inp = 0wx21 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else if inp <= 0wx23 + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx2C + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx2C + then if inp <= 0wx29 + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp <= 0wx2F + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ63(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx60 + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx60 + then if inp = 0wx5B + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx5B + then if inp = 0wx3B + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp <= 0wx5D + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx7E + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx7E + then if inp <= 0wx7A + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else yyAction2(strm, yyNO_MATCH) + (* end case *)) +fun yyQ62 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction2(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx3A + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx3A + then if inp = 0wx27 + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx27 + then if inp = 0wx22 + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx22 + then if inp = 0wx21 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else if inp <= 0wx23 + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx2C + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx2C + then if inp <= 0wx29 + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp <= 0wx2F + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ63(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx60 + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx60 + then if inp = 0wx5B + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx5B + then if inp = 0wx3B + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp <= 0wx5D + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx7E + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx7E + then if inp <= 0wx7A + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else yyAction2(strm, yyNO_MATCH) + (* end case *)) +fun yyQ61 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction2(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx30 + then yyQ63(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx30 + then if inp = 0wx2A + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx2A + then if inp = 0wx22 + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx22 + then if inp = 0wx21 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else if inp = 0wx24 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx24 + then yyAction2(strm, yyNO_MATCH) + else if inp <= 0wx26 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else if inp = 0wx2D + then yyQ62(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx2D + then if inp = 0wx2B + then yyQ62(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx5E + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx5E + then if inp = 0wx3B + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx3B + then if inp = 0wx3A + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ63(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp <= 0wx5A + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else if inp = 0wx7B + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx7B + then if inp = 0wx60 + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx7E + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + (* end case *)) +fun yyQ60 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction2(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx3C + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx3C + then if inp = 0wx2A + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx2A + then if inp = 0wx22 + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx22 + then if inp = 0wx21 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else if inp = 0wx24 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx24 + then yyAction2(strm, yyNO_MATCH) + else if inp <= 0wx26 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else if inp = 0wx30 + then yyQ60(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx30 + then if inp = 0wx2C + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx3A + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx3B + then yyAction2(strm, yyNO_MATCH) + else yyQ60(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx61 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx61 + then if inp = 0wx5B + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx5B + then if inp = 0wx45 + then yyQ61(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx5E + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx5E + then yyAction2(strm, yyNO_MATCH) + else if inp = 0wx60 + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx7B + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx7B + then if inp = 0wx65 + then yyQ61(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx7E + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + (* end case *)) +fun yyQ55 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction2(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx3A + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx3A + then if inp = 0wx27 + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx27 + then if inp = 0wx22 + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx22 + then if inp = 0wx21 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else if inp <= 0wx23 + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx2C + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx2C + then if inp <= 0wx29 + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp <= 0wx2F + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ60(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx60 + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx60 + then if inp = 0wx5B + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx5B + then if inp = 0wx3B + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp <= 0wx5D + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx7E + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx7E + then if inp <= 0wx7A + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else yyAction2(strm, yyNO_MATCH) + (* end case *)) +fun yyQ56 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction2(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx3B + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx3B + then if inp = 0wx2C + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx2C + then if inp = 0wx24 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx24 + then if inp = 0wx21 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else if inp = 0wx27 + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx27 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp <= 0wx29 + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx2F + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx2F + then if inp = 0wx2D + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ55(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx3A + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ56(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx61 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx61 + then if inp = 0wx5B + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx5B + then if inp = 0wx45 + then yyQ57(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx5E + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx5E + then yyAction2(strm, yyNO_MATCH) + else if inp = 0wx60 + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx7B + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx7B + then if inp = 0wx65 + then yyQ57(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx7E + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + (* end case *)) +fun yyQ54 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction2(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx3B + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx3B + then if inp = 0wx2C + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx2C + then if inp = 0wx24 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx24 + then if inp = 0wx21 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else if inp = 0wx27 + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx27 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp <= 0wx29 + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx2F + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx2F + then if inp = 0wx2D + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ55(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx3A + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ56(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx61 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx61 + then if inp = 0wx5B + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx5B + then if inp = 0wx45 + then yyQ57(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx5E + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx5E + then yyAction2(strm, yyNO_MATCH) + else if inp = 0wx60 + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx7B + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx7B + then if inp = 0wx65 + then yyQ57(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx7E + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + (* end case *)) +fun yyQ65 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction2(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx3C + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx3C + then if inp = 0wx2A + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx2A + then if inp = 0wx22 + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx22 + then if inp = 0wx21 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else if inp = 0wx24 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx24 + then yyAction2(strm, yyNO_MATCH) + else if inp <= 0wx26 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else if inp = 0wx30 + then yyQ65(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx30 + then if inp = 0wx2C + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx3A + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx3B + then yyAction2(strm, yyNO_MATCH) + else yyQ65(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx60 + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx60 + then if inp = 0wx47 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx47 + then if inp <= 0wx40 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ65(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx5B + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx5B + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp <= 0wx5D + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx7B + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx7B + then if inp <= 0wx66 + then yyQ65(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx7E + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + (* end case *)) +fun yyQ64 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction2(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx3C + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx3C + then if inp = 0wx2A + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx2A + then if inp = 0wx22 + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx22 + then if inp = 0wx21 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else if inp = 0wx24 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx24 + then yyAction2(strm, yyNO_MATCH) + else if inp <= 0wx26 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else if inp = 0wx30 + then yyQ65(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx30 + then if inp = 0wx2C + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx3A + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx3B + then yyAction2(strm, yyNO_MATCH) + else yyQ65(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx60 + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx60 + then if inp = 0wx47 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx47 + then if inp <= 0wx40 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ65(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx5B + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx5B + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp <= 0wx5D + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx7B + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx7B + then if inp <= 0wx66 + then yyQ65(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx7E + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + (* end case *)) +fun yyQ53 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction2(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx45 + then yyQ57(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx45 + then if inp = 0wx2C + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx2C + then if inp = 0wx24 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx24 + then if inp = 0wx21 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else if inp = 0wx27 + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx27 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp <= 0wx29 + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx2F + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx2F + then if inp = 0wx2D + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ55(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx3B + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx66 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx66 + then if inp = 0wx60 + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx60 + then if inp = 0wx5B + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx5B + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp <= 0wx5D + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx65 + then yyQ57(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx7B + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx7B + then if inp = 0wx78 + then yyQ64(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx7E + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + (* end case *)) +fun yyQ31 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction2(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx3A + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx3A + then if inp = 0wx2A + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx2A + then if inp = 0wx22 + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx22 + then if inp = 0wx21 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else if inp = 0wx24 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx24 + then yyAction2(strm, yyNO_MATCH) + else if inp <= 0wx26 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else if inp = 0wx2D + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx2D + then if inp = 0wx2C + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx30 + then yyQ53(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp <= 0wx2F + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ54(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx60 + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx60 + then if inp = 0wx5B + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx5B + then if inp = 0wx3B + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp <= 0wx5D + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx7E + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx7E + then if inp <= 0wx7A + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else yyAction2(strm, yyNO_MATCH) + (* end case *)) +fun yyQ30 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction5(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction5(strm, yyNO_MATCH) + (* end case *)) +fun yyQ29 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction4(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction4(strm, yyNO_MATCH) + (* end case *)) +fun yyQ28 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction3(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction3(strm, yyNO_MATCH) + (* end case *)) +fun yyQ67 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction10(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction10(strm, yyNO_MATCH) + (* end case *)) +fun yyQ66 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction11(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction11(strm, yyNO_MATCH) + (* end case *)) +fun yyQ27 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction33(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx67 + then yyAction33(strm, yyNO_MATCH) + else if inp < 0wx67 + then if inp = 0wx66 + then yyQ66(strm', yyMATCH(strm, yyAction33, yyNO_MATCH)) + else yyAction33(strm, yyNO_MATCH) + else if inp = 0wx74 + then yyQ67(strm', yyMATCH(strm, yyAction33, yyNO_MATCH)) + else yyAction33(strm, yyNO_MATCH) + (* end case *)) +fun yyQ26 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction17(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction17(strm, yyNO_MATCH) + (* end case *)) +fun yyQ25 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction2(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx3B + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx3B + then if inp = 0wx27 + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx27 + then if inp = 0wx22 + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx22 + then if inp = 0wx21 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else if inp <= 0wx23 + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx2C + then yyAction2(strm, yyNO_MATCH) + else if inp < 0wx2C + then if inp <= 0wx29 + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx61 + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx61 + then if inp = 0wx5E + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx5E + then if inp <= 0wx5A + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else if inp = 0wx60 + then yyAction2(strm, yyNO_MATCH) + else yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp = 0wx7E + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else if inp < 0wx7E + then if inp <= 0wx7A + then yyQ52(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + else yyAction2(strm, yyNO_MATCH) + (* end case *)) +fun yyQ68 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction0(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wxD + then yyQ68(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp < 0wxD + then if inp = 0wx9 + then yyQ68(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp < 0wx9 + then yyAction0(strm, yyNO_MATCH) + else if inp <= 0wxA + then yyQ68(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else yyAction0(strm, yyNO_MATCH) + else if inp = 0wx20 + then yyQ68(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else yyAction0(strm, yyNO_MATCH) + (* end case *)) +fun yyQ24 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction0(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wxD + then yyQ68(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp < 0wxD + then if inp = 0wx9 + then yyQ68(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else if inp < 0wx9 + then yyAction0(strm, yyNO_MATCH) + else if inp <= 0wxA + then yyQ68(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else yyAction0(strm, yyNO_MATCH) + else if inp = 0wx20 + then yyQ68(strm', yyMATCH(strm, yyAction0, yyNO_MATCH)) + else yyAction0(strm, yyNO_MATCH) + (* end case *)) +fun yyQ23 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction33(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction33(strm, yyNO_MATCH) + (* end case *)) +fun yyQ1 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if ULexBuffer.eof(!(yystrm)) + then let + val yycolno = ref(yygetcolNo(!(yystrm))) + val yylineno = ref(yygetlineNo(!(yystrm))) + in + (case (!(yyss)) + of _ => (UserDeclarations.eof()) + (* end case *)) + end + else yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx2E + then yyQ25(strm', lastMatch) + else if inp < 0wx2E + then if inp = 0wx23 + then yyQ27(strm', lastMatch) + else if inp < 0wx23 + then if inp = 0wxE + then yyQ23(strm', lastMatch) + else if inp < 0wxE + then if inp = 0wxB + then yyQ23(strm', lastMatch) + else if inp < 0wxB + then if inp <= 0wx8 + then yyQ23(strm', lastMatch) + else yyQ24(strm', lastMatch) + else if inp = 0wxD + then yyQ24(strm', lastMatch) + else yyQ23(strm', lastMatch) + else if inp = 0wx21 + then yyQ25(strm', lastMatch) + else if inp < 0wx21 + then if inp = 0wx20 + then yyQ24(strm', lastMatch) + else yyQ23(strm', lastMatch) + else yyQ26(strm', lastMatch) + else if inp = 0wx2A + then yyQ25(strm', lastMatch) + else if inp < 0wx2A + then if inp = 0wx28 + then yyQ29(strm', lastMatch) + else if inp < 0wx28 + then if inp = 0wx27 + then yyQ28(strm', lastMatch) + else yyQ25(strm', lastMatch) + else yyQ30(strm', lastMatch) + else if inp = 0wx2C + then yyQ23(strm', lastMatch) + else yyQ31(strm', lastMatch) + else if inp = 0wx5D + then yyQ36(strm', lastMatch) + else if inp < 0wx5D + then if inp = 0wx3B + then yyQ34(strm', lastMatch) + else if inp < 0wx3B + then if inp = 0wx31 + then yyQ33(strm', lastMatch) + else if inp < 0wx31 + then if inp = 0wx30 + then yyQ32(strm', lastMatch) + else yyQ25(strm', lastMatch) + else if inp = 0wx3A + then yyQ25(strm', lastMatch) + else yyQ33(strm', lastMatch) + else if inp = 0wx5B + then yyQ35(strm', lastMatch) + else if inp = 0wx5C + then yyQ23(strm', lastMatch) + else yyQ25(strm', lastMatch) + else if inp = 0wx7C + then yyQ23(strm', lastMatch) + else if inp < 0wx7C + then if inp = 0wx61 + then yyQ25(strm', lastMatch) + else if inp < 0wx61 + then if inp = 0wx60 + then yyQ23(strm', lastMatch) + else yyQ25(strm', lastMatch) + else if inp = 0wx7B + then yyQ37(strm', lastMatch) + else yyQ25(strm', lastMatch) + else if inp = 0wx7E + then yyQ25(strm', lastMatch) + else if inp = 0wx7D + then yyQ38(strm', lastMatch) + else yyQ23(strm', lastMatch) + (* end case *)) +fun yyQ20 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction27(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction27(strm, yyNO_MATCH) + (* end case *)) +fun yyQ21 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction28(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction28(strm, yyNO_MATCH) + (* end case *)) +fun yyQ19 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx3B + then yyQ21(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ18 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx3C + then yystuck(lastMatch) + else if inp < 0wx3C + then if inp = 0wx3A + then yystuck(lastMatch) + else if inp < 0wx3A + then if inp <= 0wx2F + then yystuck(lastMatch) + else yyQ19(strm', lastMatch) + else yyQ20(strm', lastMatch) + else if inp = 0wx47 + then yystuck(lastMatch) + else if inp < 0wx47 + then if inp <= 0wx40 + then yystuck(lastMatch) + else yyQ19(strm', lastMatch) + else if inp = 0wx61 + then yyQ19(strm', lastMatch) + else if inp < 0wx61 + then yystuck(lastMatch) + else if inp <= 0wx66 + then yyQ19(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ17 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx41 + then yyQ18(strm', lastMatch) + else if inp < 0wx41 + then if inp = 0wx30 + then yyQ18(strm', lastMatch) + else if inp < 0wx30 + then yystuck(lastMatch) + else if inp <= 0wx39 + then yyQ18(strm', lastMatch) + else yystuck(lastMatch) + else if inp = 0wx61 + then yyQ18(strm', lastMatch) + else if inp < 0wx61 + then if inp <= 0wx46 + then yyQ18(strm', lastMatch) + else yystuck(lastMatch) + else if inp <= 0wx66 + then yyQ18(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ16 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction26(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction26(strm, yyNO_MATCH) + (* end case *)) +fun yyQ15 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction25(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction25(strm, yyNO_MATCH) + (* end case *)) +fun yyQ14 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction24(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction24(strm, yyNO_MATCH) + (* end case *)) +fun yyQ13 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction23(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction23(strm, yyNO_MATCH) + (* end case *)) +fun yyQ12 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction22(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction22(strm, yyNO_MATCH) + (* end case *)) +fun yyQ11 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction21(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction21(strm, yyNO_MATCH) + (* end case *)) +fun yyQ10 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction20(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction20(strm, yyNO_MATCH) + (* end case *)) +fun yyQ9 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction18(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction18(strm, yyNO_MATCH) + (* end case *)) +fun yyQ8 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction19(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction19(strm, yyNO_MATCH) + (* end case *)) +fun yyQ6 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction30(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wxA + then yyAction30(strm, yyNO_MATCH) + else if inp < 0wxA + then if inp = 0wx9 + then yyQ6(strm', yyMATCH(strm, yyAction30, yyNO_MATCH)) + else yyAction30(strm, yyNO_MATCH) + else if inp = 0wx20 + then yyQ6(strm', yyMATCH(strm, yyAction30, yyNO_MATCH)) + else yyAction30(strm, yyNO_MATCH) + (* end case *)) +fun yyQ7 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction30(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wxB + then yyAction30(strm, yyNO_MATCH) + else if inp < 0wxB + then if inp <= 0wx8 + then yyAction30(strm, yyNO_MATCH) + else yyQ6(strm', yyMATCH(strm, yyAction30, yyNO_MATCH)) + else if inp = 0wx20 + then yyQ6(strm', yyMATCH(strm, yyAction30, yyNO_MATCH)) + else yyAction30(strm, yyNO_MATCH) + (* end case *)) +fun yyQ5 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wxD + then yyQ7(strm', lastMatch) + else if inp < 0wxD + then if inp = 0wxA + then yyQ6(strm', lastMatch) + else if inp < 0wxA + then if inp = 0wx9 + then yyQ5(strm', lastMatch) + else yystuck(lastMatch) + else yystuck(lastMatch) + else if inp = 0wx20 + then yyQ5(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ4 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction32(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx62 + then yyQ11(strm', yyMATCH(strm, yyAction32, yyNO_MATCH)) + else if inp < 0wx62 + then if inp = 0wx20 + then yyQ5(strm', yyMATCH(strm, yyAction32, yyNO_MATCH)) + else if inp < 0wx20 + then if inp = 0wxB + then yyAction32(strm, yyNO_MATCH) + else if inp < 0wxB + then if inp = 0wx9 + then yyQ5(strm', yyMATCH(strm, yyAction32, yyNO_MATCH)) + else if inp = 0wxA + then yyQ6(strm', yyMATCH(strm, yyAction32, yyNO_MATCH)) + else yyAction32(strm, yyNO_MATCH) + else if inp = 0wxD + then yyQ7(strm', yyMATCH(strm, yyAction32, yyNO_MATCH)) + else yyAction32(strm, yyNO_MATCH) + else if inp = 0wx5C + then yyQ9(strm', yyMATCH(strm, yyAction32, yyNO_MATCH)) + else if inp < 0wx5C + then if inp = 0wx22 + then yyQ8(strm', yyMATCH(strm, yyAction32, yyNO_MATCH)) + else yyAction32(strm, yyNO_MATCH) + else if inp = 0wx61 + then yyQ10(strm', yyMATCH(strm, yyAction32, yyNO_MATCH)) + else yyAction32(strm, yyNO_MATCH) + else if inp = 0wx73 + then yyAction32(strm, yyNO_MATCH) + else if inp < 0wx73 + then if inp = 0wx6E + then yyQ13(strm', yyMATCH(strm, yyAction32, yyNO_MATCH)) + else if inp < 0wx6E + then if inp = 0wx66 + then yyQ12(strm', yyMATCH(strm, yyAction32, yyNO_MATCH)) + else yyAction32(strm, yyNO_MATCH) + else if inp = 0wx72 + then yyQ14(strm', yyMATCH(strm, yyAction32, yyNO_MATCH)) + else yyAction32(strm, yyNO_MATCH) + else if inp = 0wx77 + then yyAction32(strm, yyNO_MATCH) + else if inp < 0wx77 + then if inp = 0wx75 + then yyAction32(strm, yyNO_MATCH) + else if inp = 0wx74 + then yyQ15(strm', yyMATCH(strm, yyAction32, yyNO_MATCH)) + else yyQ16(strm', yyMATCH(strm, yyAction32, yyNO_MATCH)) + else if inp = 0wx78 + then yyQ17(strm', yyMATCH(strm, yyAction32, yyNO_MATCH)) + else yyAction32(strm, yyNO_MATCH) + (* end case *)) +fun yyQ3 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ22 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction29(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx23 + then yyQ22(strm', yyMATCH(strm, yyAction29, yyNO_MATCH)) + else if inp < 0wx23 + then if inp = 0wx22 + then yyAction29(strm, yyNO_MATCH) + else yyQ22(strm', yyMATCH(strm, yyAction29, yyNO_MATCH)) + else if inp = 0wx5C + then yyAction29(strm, yyNO_MATCH) + else yyQ22(strm', yyMATCH(strm, yyAction29, yyNO_MATCH)) + (* end case *)) +fun yyQ2 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction29(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx23 + then yyQ22(strm', yyMATCH(strm, yyAction29, yyNO_MATCH)) + else if inp < 0wx23 + then if inp = 0wx22 + then yyAction29(strm, yyNO_MATCH) + else yyQ22(strm', yyMATCH(strm, yyAction29, yyNO_MATCH)) + else if inp = 0wx5C + then yyAction29(strm, yyNO_MATCH) + else yyQ22(strm', yyMATCH(strm, yyAction29, yyNO_MATCH)) + (* end case *)) +fun yyQ0 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if ULexBuffer.eof(!(yystrm)) + then let + val yycolno = ref(yygetcolNo(!(yystrm))) + val yylineno = ref(yygetlineNo(!(yystrm))) + in + (case (!(yyss)) + of _ => (UserDeclarations.eof()) + (* end case *)) + end + else yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx23 + then yyQ2(strm', lastMatch) + else if inp < 0wx23 + then if inp = 0wx22 + then yyQ3(strm', lastMatch) + else yyQ2(strm', lastMatch) + else if inp = 0wx5C + then yyQ4(strm', lastMatch) + else yyQ2(strm', lastMatch) + (* end case *)) +in + (case (!(yyss)) + of S => yyQ0(!(yystrm), yyNO_MATCH) + | INITIAL => yyQ1(!(yystrm), yyNO_MATCH) + (* end case *)) +end +end + and skip() = (yystartPos := yygetPos(); + yylastwasnref := ULexBuffer.lastWasNL (!yystrm); + continue()) + in (continue(), (!yystartPos, yygetPos()-1), !yystrm, !yyss) end + in + lex() + end + in + type pos = AntlrStreamPos.pos + type span = AntlrStreamPos.span + type tok = UserDeclarations.lex_result + + datatype prestrm = STRM of ULexBuffer.stream * + (yystart_state * tok * span * prestrm * yystart_state) option ref + type strm = (prestrm * yystart_state) + + fun lex sm +(STRM (yystrm, memo), ss) = (case !memo + of NONE => let + val (tok, span, yystrm', ss') = innerLex +(yystrm, ss, sm) + val strm' = STRM (yystrm', ref NONE); + in + memo := SOME (ss, tok, span, strm', ss'); + (tok, span, (strm', ss')) + end + | SOME (ss', tok, span, strm', ss'') => + if ss = ss' then + (tok, span, (strm', ss'')) + else ( + memo := NONE; + lex sm +(STRM (yystrm, memo), ss)) + (* end case *)) + + fun streamify input = (STRM (yystreamify' 0 input, ref NONE), INITIAL) + fun streamifyReader readFn strm = (STRM (yystreamifyReader' 0 readFn strm, ref NONE), + INITIAL) + fun streamifyInstream strm = (STRM (yystreamifyInstream' 0 strm, ref NONE), + INITIAL) + + fun getPos (STRM (strm, _), _) = ULexBuffer.getpos strm + + end +end + diff --git a/smlnj-lib/SExp/sexp.sml b/smlnj-lib/SExp/sexp.sml new file mode 100644 index 0000000..d02d510 --- /dev/null +++ b/smlnj-lib/SExp/sexp.sml @@ -0,0 +1,58 @@ +(* sexp.sml + * + * COPYRIGHT (c) 2011 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Author: Damon Wang (with modifications by John Reppy) + * + * This is the tree representation of a SExp data as produced/consumed + * by the tree parser. + *) + +structure SExp = + struct + + datatype value + = SYMBOL of Atom.atom + | BOOL of bool + | INT of IntInf.int + | FLOAT of real + | STRING of string + | QUOTE of value + | LIST of value list + + fun same (SYMBOL a, SYMBOL b) = Atom.same (a, b) + | same (BOOL a, BOOL b) = (a = b) + | same (INT a, INT b) = (a = b) + | same (FLOAT a, FLOAT b) = Real.==(a, b) + | same (STRING a, STRING b) = (a = b) + | same (QUOTE a, QUOTE b) = same(a, b) + | same (LIST a, LIST b) = ListPair.allEq same (a, b) + | same _ = false + + fun compare (a, b) = (case (a, b) + of (SYMBOL a, SYMBOL b) => Atom.compare(a, b) + | (SYMBOL _, _) => LESS + | (_, SYMBOL _) => GREATER + | (BOOL a, BOOL b) => + if (a = b) then EQUAL + else if a then LESS + else GREATER + | (BOOL _, _) => GREATER + | (_, BOOL _) => LESS + | (INT a, INT b) => IntInf.compare (a, b) + | (INT _, _) => LESS + | (_, INT _) => GREATER + | (FLOAT a, FLOAT b) => Real.compare(a, b) + | (FLOAT _, _) => LESS + | (_, FLOAT _) => GREATER + | (STRING a, STRING b) => String.compare(a, b) + | (STRING _, _) => LESS + | (_, STRING _) => GREATER + | (QUOTE a, QUOTE b) => compare(a, b) + | (QUOTE _, _) => LESS + | (_, QUOTE _) => GREATER + | (LIST a, LIST b) => List.collate compare (a, b) + (* end case *)) + + end diff --git a/smlnj-lib/SExp/test-pp.sml b/smlnj-lib/SExp/test-pp.sml new file mode 100644 index 0000000..9419b65 --- /dev/null +++ b/smlnj-lib/SExp/test-pp.sml @@ -0,0 +1,35 @@ +(* test-pp.sml + * + * COPYRIGHT (c) 2018 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Test the pretty printing of S-Expressions. + *) + +structure TestPP = + struct + + local + structure S = SExp + structure PP = TextIOPP + + fun pr wid sexp = let + val ppStrm = PP.openOut{dst = TextIO.stdOut, wid=wid} + in + SExpPP.output (ppStrm, sexp); + PP.closeStream ppStrm + end + + fun list items = S.LIST(S.SYMBOL(Atom.atom "list") :: items) + in + (* a large list *) + fun prList wid = pr wid (list (List.tabulate(100, fn i => S.INT(IntInf.fromInt i)))) + (* list of lists *) + fun prListOfLists wid = let + fun mkList n = list (List.tabulate(n, fn i => S.INT(IntInf.fromInt i))) + in + pr wid (list (List.tabulate(50, mkList))) + end + end (* local *) + + end diff --git a/smlnj-lib/TODO b/smlnj-lib/TODO new file mode 100644 index 0000000..f765c5d --- /dev/null +++ b/smlnj-lib/TODO @@ -0,0 +1,44 @@ +Utility library +--------------- + Polymorphic version of dynamic arrays. + New hash table implementation/API. + +HTML Library +------------ + + Rewrite parser as recursive descent (the ML-Yacc version doesn't handle + whitespace in the HEAD element very well). + +PP Library +---------- + + Add support for tabular layout. + +RegExp library +-------------- + + Add support for Interval REs to the DFA and Thompson engines + + Add support for "$" to the Thompson engine + + Add support for POSIX character classes to the AWK syntax. These + are + [:alnum:] Alphanumeric characters. + [:alpha:] Alphabetic characters. + [:blank:] Space and TAB characters. + [:cntrl:] Control characters. + [:digit:] Decimal digits. + [:graph:] Characters that are both printable and visible. + (A space is printable but not visible, whereas an ‘a’ is both.) + [:lower:] Lowercase alphabetic characters. + [:print:] Printable characters (characters that are not control characters). + [:punct:] Punctuation characters (characters that are not letters, + digits, control characters, or space characters). + [:space:] Space characters (such as space, TAB, and formfeed, etc). + [:upper:] Uppercase alphabetic characters. + [:xdigit:] Hexadecimal digits. + +New libraries +------------- + + CGI scripting diff --git a/smlnj-lib/UUID/.cm/GUID/gen-uuid.sml b/smlnj-lib/UUID/.cm/GUID/gen-uuid.sml new file mode 100644 index 0000000..7586388 --- /dev/null +++ b/smlnj-lib/UUID/.cm/GUID/gen-uuid.sml @@ -0,0 +1 @@ +guid-$/(uuid-lib.cm):gen-uuid.sml-1714016093.754 diff --git a/smlnj-lib/UUID/.cm/GUID/uuid.sml b/smlnj-lib/UUID/.cm/GUID/uuid.sml new file mode 100644 index 0000000..4ded655 --- /dev/null +++ b/smlnj-lib/UUID/.cm/GUID/uuid.sml @@ -0,0 +1 @@ +guid-$/(uuid-lib.cm):uuid.sml-1714016093.724 diff --git a/smlnj-lib/UUID/.cm/SKEL/gen-uuid.sml b/smlnj-lib/UUID/.cm/SKEL/gen-uuid.sml new file mode 100644 index 0000000..c20e8ca --- /dev/null +++ b/smlnj-lib/UUID/.cm/SKEL/gen-uuid.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f7d"Word8"d"Random"Cd"LargeWord"d"UUID"d"Int"d"Time"d"IntInf"Nad"GenUUID"jh1ad"W8V"gp1d"Word8Vector"h0 \ No newline at end of file diff --git a/smlnj-lib/UUID/.cm/SKEL/uuid.sml b/smlnj-lib/UUID/.cm/SKEL/uuid.sml new file mode 100644 index 0000000..bc1bcb4 --- /dev/null +++ b/smlnj-lib/UUID/.cm/SKEL/uuid.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f7d"StringCvt"d"Word64"Cd"Word8"d"Word"d"List"d"String"Word8Vector"Nad"UUID"jh3ad"W8V"gp19ad"W8VS"gp1d"Word8VectorSlice"ad"SS"gp1d"Substring"h0 \ No newline at end of file diff --git a/smlnj-lib/UUID/.cm/amd64-unix/gen-uuid.sml b/smlnj-lib/UUID/.cm/amd64-unix/gen-uuid.sml new file mode 100644 index 0000000..7a15e11 Binary files /dev/null and b/smlnj-lib/UUID/.cm/amd64-unix/gen-uuid.sml differ diff --git a/smlnj-lib/UUID/.cm/amd64-unix/uuid.sml b/smlnj-lib/UUID/.cm/amd64-unix/uuid.sml new file mode 100644 index 0000000..aecb027 Binary files /dev/null and b/smlnj-lib/UUID/.cm/amd64-unix/uuid.sml differ diff --git a/smlnj-lib/UUID/README b/smlnj-lib/UUID/README new file mode 100644 index 0000000..636e866 --- /dev/null +++ b/smlnj-lib/UUID/README @@ -0,0 +1,8 @@ +This is a library for generating "Universally Unique IDentifiers" (UUIDs). +It currently only supports generating Variant 1, Type 4 UUIDs, which +are random bit strings (these are the UUIDs generated by the `NewGuid` +function on Microsoft Windows). + +See https://en.wikipedia.org/wiki/Universally_unique_identifier for +more information about GUIDs. + diff --git a/smlnj-lib/UUID/gen-uuid.sml b/smlnj-lib/UUID/gen-uuid.sml new file mode 100644 index 0000000..5c1a60b --- /dev/null +++ b/smlnj-lib/UUID/gen-uuid.sml @@ -0,0 +1,67 @@ +(* gen-uuid.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +structure GenUUID : sig + + (* generate a new Variant 1, Type 4 UUID. *) + val new : unit -> UUID.t + + end = struct + + structure W8V = Word8Vector + + (* maximum `int` value plus 1 *) + val maxInt = IntInf.fromInt (valOf Int.maxInt) + 1 + + (* get the current time as (seconds, useconds) *) + fun getTime () = IntInf.divMod (Time.toMicroseconds(Time.now()), 1000000) + + fun seedRand () = let + val (secs, usecs) = getTime () + (* initial random seed *) + val r = Random.rand (Int.fromLarge(secs mod maxInt), Int.fromLarge usecs) + (* run the random number generator a few steps *) + val r = let + val n = let val (s, us) = getTime() + in + IntInf.andb(IntInf.xorb(secs, usecs), 0x1F) + end + fun lp 0 = r + | lp i = (ignore (Random.randInt r); lp (i-1)) + in + lp n + end + in + r + end + + fun randByte r () = let + val w = LargeWord.fromInt(Random.randInt r) + in + Word8.fromLargeWord(LargeWord.>>(w, 0w7)) + end + + (* generate a Variant 1, Type 4 UUID. *) + fun new () = let + val randByte = randByte (seedRand()) + fun gen 6 = let + (* byte 6 has the version (0b0100) in the upper 4 bits *) + val b = randByte() + in + Word8.orb(0wx40, Word8.andb(b, 0wxF)) + end + | gen 8 = let + (* byte 8 has the variant (0b10) in the upper 2 bits *) + val b = randByte() + in + Word8.orb(0wx80, Word8.andb(b, 0wx3F)) + end + | gen _ = randByte() + in + UUID.fromBytes (W8V.tabulate(16, gen)) + end + + end diff --git a/smlnj-lib/UUID/uuid-lib.cm b/smlnj-lib/UUID/uuid-lib.cm new file mode 100644 index 0000000..442aab0 --- /dev/null +++ b/smlnj-lib/UUID/uuid-lib.cm @@ -0,0 +1,20 @@ +(* uuid-lib.cm + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Sources file for UUID library. + *) + +Library + + structure UUID + structure GenUUID + +is + + $/basis.cm + $/smlnj-lib.cm + + gen-uuid.sml + uuid.sml diff --git a/smlnj-lib/UUID/uuid.sml b/smlnj-lib/UUID/uuid.sml new file mode 100644 index 0000000..d28f449 --- /dev/null +++ b/smlnj-lib/UUID/uuid.sml @@ -0,0 +1,132 @@ +(* uuid.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * An implementation of Universally Unique IDs (UUIDs). To generate + * UUIDs, use the GenUUID structure. + *) + +structure UUID :> sig + + type t + + (* the all-zeros UUID *) + val null : t + + (* compare two UUIDs *) + val compare : t * t -> order + + (* are two UUIDs the same *) + val same : t * t -> bool + + (* hash a UUID; we use the 64-bit FNV-1a hash function for this purpose *) + val hash : t -> word + + (* format the UUID as "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" where each "x" is + * a lower-case hexadecimal digit. + *) + val toString : t -> string + + (* convert a string of the form produced by `toString` to a UUID value; leading whitespace + * is ignored. + *) + val fromString : string -> t option + + (* return the UUID as a big-endian sequence of bytes *) + val toBytes : t -> Word8Vector.vector + + (* convert a 16-element byte vector to a UUID. The Size exception is raised + * if the length of the vector is not exactly 16 bytes. Otherwise, there is + * no validity chechking of the UUID (i.e., the variant and type are not checked). + *) + val fromBytes : Word8Vector.vector -> t + + end = struct + + structure W8V = Word8Vector + structure W8VS = Word8VectorSlice + structure SS = Substring + + type t = W8V.vector + + val null : t = W8V.tabulate(16, fn _ => 0w0) + + (* compare two UUIDs *) + val compare = W8V.collate Word8.compare + + fun same (uuid1 : t, uuid2 : t) = (uuid1 = uuid2) + +(* NOTE: For 110.98, we can switch to using the new FNVHash module in the Util library *) + (* hash a UUID; we use the 64-bit FNV-1a hash function for this purpose *) + fun hash uuid = let + val offsetBasis : Word64.word = 0wxcbf29ce484222325 + val prime : Word64.word = 0wx00000100000001B3 + fun hashByte (b, h) = + Word64.xorb (Word64.fromLargeWord(Word8.toLargeWord b), h) * prime + in + Word.fromLarge(W8V.foldl hashByte offsetBasis uuid) + end + + fun toString (uuid : t) = let + fun n2c b = String.sub("0123456789abcdef", Word8.toInt b) + fun b2list (b, l) = n2c(Word8.>>(b, 0w4)) :: n2c(Word8.andb(b, 0wxf)) :: l + fun slice2list (start, len, l) = + W8VS.foldr b2list l (W8VS.slice(uuid, start, SOME len)) + val chrs = slice2list (10, 6, []) + val chrs = slice2list (8, 2, #"-" :: chrs) + val chrs = slice2list (6, 2, #"-" :: chrs) + val chrs = slice2list (4, 2, #"-" :: chrs) + val chrs = slice2list (0, 4, #"-" :: chrs) + in + String.implode chrs + end + + local + (* the lengths of the fields *) + val fieldLens = [8, 4, 4, 4, 12] + val scan8 = Word8.scan StringCvt.HEX SS.getc + (* converts a list of fields to a list of bytes. If there is the wrong number of + * fields, or an incorrect length field, or a invalid digit, then `NONE` is + * returned. + *) + fun fields2bytes flds = let + fun toBytes ([], [], bytes) = SOME(List.rev bytes) + | toBytes (fld::flds, len::lens, bytes) = if SS.size fld <> len + then NONE + else let + fun lp (ss, bytes) = if SS.isEmpty ss + then toBytes (flds, lens, bytes) + else let + val (b, rest) = SS.splitAt (ss, 2) + in + case scan8 b + of SOME(b, _) => lp (rest, b::bytes) + | NONE => NONE + end + in + lp (fld, bytes) + end + | toBytes _ = NONE + in + toBytes (flds, fieldLens, []) + end + val splitFields = SS.fields (fn #"-" => true | _ => false) + in + fun fromString s = let + val ss = StringCvt.skipWS SS.getc (SS.full s) + in + (* the length of the UUID should be 36 characters (32 digits plus four "-"s) *) + if (SS.size ss >= 36) + then (case fields2bytes(splitFields (SS.slice(ss, 0, SOME 36))) + of SOME bytes => SOME(W8V.fromList bytes) + | _ => NONE + (* end case *)) + else NONE + end + end (* local *) + + fun toBytes uuid = uuid + fun fromBytes v = if (W8V.length v <> 16) then raise Size else v + + end diff --git a/smlnj-lib/Unix/.cm/GUID/unix-env-sig.sml b/smlnj-lib/Unix/.cm/GUID/unix-env-sig.sml new file mode 100644 index 0000000..fa344f3 --- /dev/null +++ b/smlnj-lib/Unix/.cm/GUID/unix-env-sig.sml @@ -0,0 +1 @@ +guid-$/(unix-lib.cm):unix-env-sig.sml-1714016082.761 diff --git a/smlnj-lib/Unix/.cm/GUID/unix-env.sml b/smlnj-lib/Unix/.cm/GUID/unix-env.sml new file mode 100644 index 0000000..4c44724 --- /dev/null +++ b/smlnj-lib/Unix/.cm/GUID/unix-env.sml @@ -0,0 +1 @@ +guid-$/(unix-lib.cm):unix-env.sml-1714016082.763 diff --git a/smlnj-lib/Unix/.cm/GUID/unix-path-sig.sml b/smlnj-lib/Unix/.cm/GUID/unix-path-sig.sml new file mode 100644 index 0000000..43c91a2 --- /dev/null +++ b/smlnj-lib/Unix/.cm/GUID/unix-path-sig.sml @@ -0,0 +1 @@ +guid-$/(unix-lib.cm):unix-path-sig.sml-1714016082.779 diff --git a/smlnj-lib/Unix/.cm/GUID/unix-path.sml b/smlnj-lib/Unix/.cm/GUID/unix-path.sml new file mode 100644 index 0000000..35556ca --- /dev/null +++ b/smlnj-lib/Unix/.cm/GUID/unix-path.sml @@ -0,0 +1 @@ +guid-$/(unix-lib.cm):unix-path.sml-1714016082.783 diff --git a/smlnj-lib/Unix/.cm/SKEL/unix-env-sig.sml b/smlnj-lib/Unix/.cm/SKEL/unix-env-sig.sml new file mode 100644 index 0000000..023c0bd --- /dev/null +++ b/smlnj-lib/Unix/.cm/SKEL/unix-env-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ac"UNIX_ENV"h0 \ No newline at end of file diff --git a/smlnj-lib/Unix/.cm/SKEL/unix-env.sml b/smlnj-lib/Unix/.cm/SKEL/unix-env.sml new file mode 100644 index 0000000..358f6ee --- /dev/null +++ b/smlnj-lib/Unix/.cm/SKEL/unix-env.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"Posix"ad"UnixEnv"jh1ad"SS"gp1d"Substring"gp1c"UNIX_ENV" \ No newline at end of file diff --git a/smlnj-lib/Unix/.cm/SKEL/unix-path-sig.sml b/smlnj-lib/Unix/.cm/SKEL/unix-path-sig.sml new file mode 100644 index 0000000..2c090b8 --- /dev/null +++ b/smlnj-lib/Unix/.cm/SKEL/unix-path-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"OS"ac"UNIX_PATH"h0 \ No newline at end of file diff --git a/smlnj-lib/Unix/.cm/SKEL/unix-path.sml b/smlnj-lib/Unix/.cm/SKEL/unix-path.sml new file mode 100644 index 0000000..63523ae --- /dev/null +++ b/smlnj-lib/Unix/.cm/SKEL/unix-path.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f4d"OS"d"UnixEnv"d"String"Posix"ad"UnixPath"jh1baST"gp3d"FileSys"5f1d"PathUtil"gp1c"UNIX_PATH" \ No newline at end of file diff --git a/smlnj-lib/Unix/.cm/amd64-unix/unix-env-sig.sml b/smlnj-lib/Unix/.cm/amd64-unix/unix-env-sig.sml new file mode 100644 index 0000000..3594cbe Binary files /dev/null and b/smlnj-lib/Unix/.cm/amd64-unix/unix-env-sig.sml differ diff --git a/smlnj-lib/Unix/.cm/amd64-unix/unix-env.sml b/smlnj-lib/Unix/.cm/amd64-unix/unix-env.sml new file mode 100644 index 0000000..50a137c Binary files /dev/null and b/smlnj-lib/Unix/.cm/amd64-unix/unix-env.sml differ diff --git a/smlnj-lib/Unix/.cm/amd64-unix/unix-path-sig.sml b/smlnj-lib/Unix/.cm/amd64-unix/unix-path-sig.sml new file mode 100644 index 0000000..4d7a886 Binary files /dev/null and b/smlnj-lib/Unix/.cm/amd64-unix/unix-path-sig.sml differ diff --git a/smlnj-lib/Unix/.cm/amd64-unix/unix-path.sml b/smlnj-lib/Unix/.cm/amd64-unix/unix-path.sml new file mode 100644 index 0000000..0f8cf19 Binary files /dev/null and b/smlnj-lib/Unix/.cm/amd64-unix/unix-path.sml differ diff --git a/smlnj-lib/Unix/unix-env-sig.sml b/smlnj-lib/Unix/unix-env-sig.sml new file mode 100644 index 0000000..70e790c --- /dev/null +++ b/smlnj-lib/Unix/unix-env-sig.sml @@ -0,0 +1,40 @@ +(* unix-env-sig.sml + * + * COPYRIGHT (c) 2007 The Fellowship of SML/NJ (http://smlnj.org) + * All rights reserved. + * + * A UNIX environment is a list of strings of the form "name=value", where + * the "=" character does not appear in name. + * NOTE: binding the user's environment as an ML value and then exporting the + * ML image can result in incorrect behavior, since the environment bound in the + * heap image may differ from the user's environment when the exported image + * is used. + *) + +signature UNIX_ENV = + sig + + val getFromEnv : (string * string list) -> string option + (* return the value, if any, bound to the name. *) + + val getValue : {name : string, default : string, env : string list} -> string + (* return the value bound to the name, or a default value *) + + val removeFromEnv : (string * string list) -> string list + (* remove a binding from an environment *) + + val addToEnv : (string * string list) -> string list + (* add a binding to an environment, replacing an existing binding + * if necessary. + *) + + val environ : unit -> string list + (* return the user's environment *) + + val getEnv : string -> string option + (* return the binding of an environment variable in the + * user's environment. + *) + + end; (* UNIX_ENV *) + diff --git a/smlnj-lib/Unix/unix-env.sml b/smlnj-lib/Unix/unix-env.sml new file mode 100644 index 0000000..b65a4de --- /dev/null +++ b/smlnj-lib/Unix/unix-env.sml @@ -0,0 +1,85 @@ +(* unix-env.sml + * + * COPYRIGHT (c) 1993 by AT&T Bell Laboratories. See COPYRIGHT file for details. + * + * A UNIX environment is a list of strings of the form "name=value", where + * the "=" character does not appear in name. + * NOTE: binding the user's environment as an ML value and then exporting the + * ML image can result in incorrect behavior, since the environment bound in the + * heap image may differ from the user's environment when the exported image + * is used. + *) + +structure UnixEnv : UNIX_ENV = + struct + + structure SS = Substring + + local + fun notEqual #"=" = false | notEqual _ = true + val split = SS.splitl notEqual + in + fun splitBinding s = let + val (a, b) = split(SS.full s) + in + if SS.isEmpty b + then (s, "") + else (SS.string a, SS.string(SS.triml 1 b)) + end + end + + (* return the value, if any, bound to the name. *) + fun getFromEnv (name, env) = let + fun look [] = NONE + | look (s::r) = let + val (n, v) = splitBinding s + in + if (n = name) then (SOME v) else look r + end + in + look env + end + + (* return the value bound to the name, or a default value *) + fun getValue {name, default, env} = (case getFromEnv(name, env) + of (SOME s) => s + | NONE => default + (* end case *)) + + (* remove a binding from an environment *) + fun removeFromEnv (name, env) = let + fun look [] = [] + | look (s::r) = let + val (n, v) = splitBinding s + in + if (n = name) then r else (s :: look r) + end + in + look env + end + + (* add a binding to an environment, replacing an existing binding + * if necessary. + *) + fun addToEnv (nameValue, env) = let + val (name, _) = splitBinding nameValue + fun look [] = [nameValue] + | look (s::r) = let + val (n, v) = splitBinding s + in + if (n = name) then r else (s :: look r) + end + in + look env + end + + (* return the user's environment *) + val environ = Posix.ProcEnv.environ + + (* return the binding of an environment variable in the + * user's environment. + *) + fun getEnv name = getFromEnv(name, environ()) + + end; (* UnixEnv *) + diff --git a/smlnj-lib/Unix/unix-lib.cm b/smlnj-lib/Unix/unix-lib.cm new file mode 100644 index 0000000..cb6ceaf --- /dev/null +++ b/smlnj-lib/Unix/unix-lib.cm @@ -0,0 +1,27 @@ +(* unix-lib.cm + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Sources file for Unix utility library; part of the SML/NJ library suite. + *) + +Library + signature UNIX_ENV + signature UNIX_PATH + + structure UnixEnv + structure UnixPath + +is +#if defined(NEW_CM) + $/basis.cm + $/smlnj-lib.cm +#else + ../Util/smlnj-lib.cm +#endif + + unix-env-sig.sml + unix-env.sml + unix-path-sig.sml + unix-path.sml diff --git a/smlnj-lib/Unix/unix-path-sig.sml b/smlnj-lib/Unix/unix-path-sig.sml new file mode 100644 index 0000000..abd746f --- /dev/null +++ b/smlnj-lib/Unix/unix-path-sig.sml @@ -0,0 +1,44 @@ +(* unix-path-sig.sml + * + * COPYRIGHT (c) 2007 The Fellowship of SML/NJ (http://smlnj.org) + * All rights reserved. + * + * Note that this module is largely superseded by the `PathUtil` module + * in the *Util Library*. + *) + +signature UNIX_PATH = + sig + + type path_list = string list + + (* get the user's PATH environment variable. *) + val getPath : unit -> path_list + + datatype access_mode = datatype OS.FileSys.access_mode + datatype file_type = F_REGULAR | F_DIR | F_SYMLINK | F_SOCK | F_CHR | F_BLK + + (* findFile (paths, mode) name + * returns the p/name, where p is the first path in paths such that p/name + * has the given access modes. + *) + val findFile : (path_list * access_mode list) -> string -> string option + + (* findFiles (paths, mode) name + * returns a list of p/name, where p is in paths and p/name has the given access modes. + *) + val findFiles : (path_list * access_mode list) -> string -> string list + + (* findFileOfType (paths, ftype, mode) name + * returns the p/name, where p is the first path in paths such that p/name + * has the given file type and access mode. + *) + val findFileOfType : (path_list * file_type * access_mode list) -> string -> string option + + (* findFileOfTypes (paths, ftype, mode) name + * returns a list of p/name, where p is in paths and p/name has the given file type + * and access mode. + *) + val findFilesOfType : (path_list * file_type * access_mode list) -> string -> string list + + end (* UNIX_PATH *) diff --git a/smlnj-lib/Unix/unix-path.sml b/smlnj-lib/Unix/unix-path.sml new file mode 100644 index 0000000..bc0ce20 --- /dev/null +++ b/smlnj-lib/Unix/unix-path.sml @@ -0,0 +1,59 @@ +(* unix-path.sml + * + * COPYRIGHT (c) 2007 The Fellowship of SML/NJ (http://smlnj.org) + * All rights reserved. + * + * Note that this module is largely superseded by the `PathUtil` module + * in the *Util Library*. + *) + +structure UnixPath : UNIX_PATH = + struct + + datatype access_mode = datatype OS.FileSys.access_mode + + datatype file_type = F_REGULAR | F_DIR | F_SYMLINK | F_SOCK | F_CHR | F_BLK + + (** Path lists **) + + type path_list = string list + + fun getPath () = let + val path = (case (UnixEnv.getEnv "PATH") of (SOME p) => p | _ => "") + in + String.fields (fn #":" => true | _ => false) path + end (* getPath *) + + local + + structure ST = Posix.FileSys.ST + fun isFileTy (path, ty) = let + val st = Posix.FileSys.stat path + in + case ty + of F_REGULAR => ST.isReg st + | F_DIR => ST.isDir st + | F_SYMLINK => ST.isLink st + | F_SOCK => ST.isSock st + | F_CHR => ST.isChr st + | F_BLK => ST.isBlk st + (* end case *) + end + fun access mode pathname = (OS.FileSys.access(pathname, mode)) + fun accessAndType (mode, ftype) pathname = ( + OS.FileSys.access(pathname, mode) + andalso isFileTy(pathname, ftype)) + handle _ => false + + in + + fun findFile (pl, mode) = PathUtil.existsFile (access mode) pl + fun findFiles (pl, mode) = PathUtil.allFiles (access mode) pl + fun findFileOfType (pl, ftype, mode) = + PathUtil.existsFile (accessAndType(mode, ftype)) pl + fun findFilesOfType (pl, ftype, mode) = + PathUtil.allFiles (accessAndType(mode, ftype)) pl + + end (* local *) + + end (* UnixPath *) diff --git a/smlnj-lib/Util/NEW-UTIL-NOTES b/smlnj-lib/Util/NEW-UTIL-NOTES new file mode 100644 index 0000000..d758b09 --- /dev/null +++ b/smlnj-lib/Util/NEW-UTIL-NOTES @@ -0,0 +1,20 @@ +The **Util Library** has code that is over 30 years old and many of the interfaces +could do with a redesign. Since such a change would break lots of existing code, +we will instead create a new library called the **SML/NJ Util Library** with +CM file `smlnj-util-lib.cm`. This file contains notes about things that should be +fixed/changed in that redesign. + +The HASH_KEY and ORD_KEY signatures should be changed to match newer conventions. + +The MONO_ARRAY_SORT should have a fixed comparison function and should also +contain the Binary Search function from the `BSearchFn` functor. + +Eliminate currying from the hash-table modules + +Eliminate the SplayTree and BinaryTree implementations of maps and sets; the +RedBlackTree implementation is more performant across the board. We'll keep +the sorted-list based versions too for a lightweight implementation. + +The logical bit-sequence operations in BitArray need rethinking. Specifically +w.r.t. padding lengths. + diff --git a/smlnj-lib/Util/README b/smlnj-lib/Util/README new file mode 100644 index 0000000..bd6a2a3 --- /dev/null +++ b/smlnj-lib/Util/README @@ -0,0 +1,15 @@ +This is the utility library of the SML/NJ Library. The services provided +can be grouped as follows: + + - Data structures + - Hash tables + - Finite maps of ordered keys + - Finite sets of ordered elements + - Queues + - Arrays + + - Searching and sorting + + - String conversions + + - Some miscellaneous utility modules. diff --git a/smlnj-lib/Util/TODO b/smlnj-lib/Util/TODO new file mode 100644 index 0000000..3b43c60 --- /dev/null +++ b/smlnj-lib/Util/TODO @@ -0,0 +1,5 @@ +Improve implementation of unionWith/intersectWith in BinaryMapFn, IntBinaryMap, +and SplayMapFn. + +New iterator module + diff --git a/smlnj-lib/Util/Target32Bit/native.sml b/smlnj-lib/Util/Target32Bit/native.sml new file mode 100644 index 0000000..1d47ec0 --- /dev/null +++ b/smlnj-lib/Util/Target32Bit/native.sml @@ -0,0 +1,12 @@ +(* target32-native.sml + * + * COPYRIGHT (c) 2022 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Aliases for the native-size word and integer structures on 32-bit + * platforms. These are defined so that we can refer to them in + * signatures. + *) + +structure NativeInt = Int32 +structure NativeWord = Word32 diff --git a/smlnj-lib/Util/Target32Bit/prime-sizes.sml b/smlnj-lib/Util/Target32Bit/prime-sizes.sml new file mode 100644 index 0000000..feffbdb --- /dev/null +++ b/smlnj-lib/Util/Target32Bit/prime-sizes.sml @@ -0,0 +1,40 @@ +(* target32-prim-sizes.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * A list of prime numbers for sizing hash tables, etc. on 32-bit targets. + *) + +structure PrimeSizes : sig + + val pick : int -> int + + end = struct + + (* This is a sequence of prime numbers; each number is approx. sqrt(2) + * larger than the previous one in the series. The list is organized + * into sublists to make searches faster. + *) + val primes = [ + (47, [11, 13, 17, 23, 37, 47]), + (367, [67, 97, 131, 191, 257, 367]), + (2897, [521, 727, 1031, 1451, 2053, 2897]), + (23173, [4099, 5801, 8209, 11587, 16411, 23173]), + (185369, [32771, 46349, 65537, 92683, 131101, 185369]), + (1482919, [262147, 370759, 524309, 741457, 1048583, 1482919]), + (2097169, [2097169]) + ] + + fun pick i = let + fun f [] = raise Fail "PrimeSizes.pick: out of sequences" + | f [(p, _)] = p + | f ((hi, l)::r) = if (i < hi) then g l else f r + and g [] = raise Fail "PrimeSizes.pick: out of primes in sequence" + | g [p] = p + | g (p::r) = if (i < p) then p else g r + in + f primes + end + + end diff --git a/smlnj-lib/Util/Target32Bit/random.sml b/smlnj-lib/Util/Target32Bit/random.sml new file mode 100644 index 0000000..9fd154c --- /dev/null +++ b/smlnj-lib/Util/Target32Bit/random.sml @@ -0,0 +1,286 @@ +(* random.sml + * + * Stateful pseudo-random generation using the 32-bit Mersenne Twister + * algorithm. + * + * COPYRIGHT (c) 2023 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This code is derived from the 32-bit C version that can be found at + * + * http://www.math.sci.hiroshima-u.ac.jp/m-mat/MT/emt.html + * + * That code is covered by the following Copyright and license: + * + * Copyright (C) 2004, 2014, Makoto Matsumoto and Takuji Nishimura, + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of its contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + *) + +structure Random :> RANDOM = + struct + + structure W32 = Word32 + structure W32A : MONO_ARRAY = struct + open Array + type array = W32.word array + type elem = W32.word + type vector = W32.word vector + end + + val && = W32.andb + val || = W32.orb + val ^^ = W32.xorb + val >> = W32.>> + val << = W32.<< + + infix 0 << >> + infix 1 || ^^ + infix 2 && + + val kN = 624 + val kM = 397 + val kMatrixA : W32.word = 0wx9908b0df + val kUMask : W32.word = 0wx80000000 (* most significant w-r bits *) + val kLMask : W32.word = 0wx7FFFFFFF (* least significant r bits *) + + datatype rand = RandState of { + mt : W32A.array, + mti : int ref + } + + fun error (f, msg) = LibBase.failure {module="MTRandom", func=f, msg=msg} + + val bytesPerWord = 4 + val magic = Byte.stringToBytes "MT32" + val bufLen = 4 + 2 + kN * bytesPerWord (* magic + mti + words *) + + fun w32ToByte w = Word8.fromLarge(W32.toLarge w) + fun byteToW32 b = W32.fromLarge(Word8.toLarge b) + + fun toBytes (RandState{mti, mt}) = let + val buf = Word8Buffer.new bufLen + fun w32ToByte w = Word8.fromLarge(W32.toLarge w) + val mti' = W32.fromInt(!mti) + (* add a 32-bit word to the buffer in little-endian order *) + fun addW32 w = let + fun lp (i, w) = if (i < bytesPerWord) + then ( + Word8Buffer.add1 (buf, w32ToByte(w && 0wxFF)); + lp (i+1, w >> 0w8)) + else () + in + lp (0, w) + end + in + (* add the magic tag to the front *) + Word8Buffer.addVec (buf, magic); + (* then the `mti` value as two bytes in little-endian order*) + Word8Buffer.add1 (buf, w32ToByte(mti' && 0wxFF)); + Word8Buffer.add1 (buf, w32ToByte(mti' >> 0w8)); + (* add the words in the buffer *) + W32A.app addW32 mt; + (* extract the result *) + Word8Buffer.contents buf + end + + fun fromBytes vec = if (Word8Vector.length vec <> bufLen) + then error ("fromBytes", "wrong number of bytes") + else let + val SOME(magic', rest) = Word8VectorSlice.getVec(Word8VectorSlice.full vec, 4) + fun get i = byteToW32(Word8VectorSlice.sub(rest, i)) + val arr = W32A.array(kN, 0w0) + in + if (magic' <> magic) + then error ("fromBytes", "invalid tag") + else let + val mti = W32.toInt(get 0 || (get 1 << 0w8)) + fun getLp (i, bi) = if (i < kN) + then let + fun getW32 (j, w) = if (0 <= j) + then getW32 (j-1, (w << 0w8) || get (bi + j)) + else w + in + W32A.update (arr, i, getW32(bytesPerWord-1, 0w0)); + getLp (i+1, bi+4) + end + else () + in + if (mti <= kN) + then ( + getLp (0, 2); + RandState{mti = ref mti, mt = arr}) + else error ("fromBytes", "invalid index") + end + end + + (* use Base64 for string encoding *) + val toString = Base64.encode o toBytes + fun fromString s = ((fromBytes (Base64.decode s)) + handle Base64.Incomplete => error ("fromString", "incomplete string") + | Base64.Invalid _ => error ("fromString", "invalid string") + | ex => raise ex) + + fun init seed = let + val arr = W32A.array(kN, 0w0) + fun lp (i, prev) = if (i < kN) + then let + val next = 0w1812433253 * (prev ^^ (prev >> 0w30)) + + W32.fromInt i + in + W32A.update(arr, i, next); + lp (i+1, next) + end + else () + in + W32A.update(arr, 0, seed); + lp (1, seed); + RandState{mt = arr, mti = ref kN} + end + + fun fromList ws = let + val rs as RandState{mt, mti} = init 0w19650218 + fun mtAt i = W32A.sub(mt, i) + (* process `kN` elements taken from repeated `ws` *) + fun lp1 (_, i, _, 0) = i + | lp1 ([], i, j, k) = (* restart iteration over `ws` *) + lp1 (ws, i, 0, k) + | lp1 (w::wr, i, j, k) = let + val mt_im1 = mtAt(i-1) + val x = (mtAt i ^^ ((mt_im1 ^^ (mt_im1 >> 0w30)) * 0w1664525)) + + w + W32.fromInt j + val _ = W32A.update(mt, i, x) + val i = if (i >= kN-1) + then (W32A.update(mt, 0, mtAt(kN-1)); 1) + else i+1 + in + lp1 (wr, i, j+1, k-1) + end + val i = if null ws + then 1 + else lp1 (ws, 1, 0, Int.max(kN, List.length ws)) + (* another pass over the array *) + fun lp2 (i, 0) = () + | lp2 (i, k) = let + val mt_im1 = mtAt(i-1) + val x = (mtAt i ^^ ((mt_im1 ^^ (mt_im1 >> 0w30)) * 0w1566083941)) + - W32.fromInt i + val _ = W32A.update(mt, i, x) + val i = if (i >= kN-1) + then (W32A.update(mt, 0, mtAt(kN-1)); 1) + else i+1 + in + lp2 (i, k-1) + end + val _ = lp2 (i, kN-1) + in + W32A.update(mt, 0, 0wx80000000); (* MSB is 1; assuring non-zero initial array *) + rs + end + + fun randNativeWord (RandState{mt, mti}) = let + fun mtAt i = W32A.sub(mt, i) + fun mag01Xor (w, x) = if ((x && 0w1) = 0w0) + then w + else w ^^ kMatrixA + (* for-loop combinator *) + fun for (init, bnd) body = let + fun lp i = if (i < bnd) then (body i; lp(i+1)) else i + in + lp init + end + val _ = if (!mti >= kN) + then let (* generate fresh words for array *) + fun update offset i = let + val x = (mtAt i && kUMask) || (mtAt(i+1) && kLMask) + val y = mag01Xor(mtAt(i+offset) ^^ (x >> 0w1), x) + in + W32A.update(mt, i, y) + end + val i = for (0, kN-kM) (update kM) + val _ = for (i, kN-1) (update (kM-kN)) + val x = (mtAt(kN-1) && kUMask) || (mtAt 0 && kLMask) + val y = mag01Xor(mtAt(kM-1) ^^ (x >> 0w1), x) + in + W32A.update(mt, kN-1, y); + mti := 0 + end + else () + val y = mtAt(!mti) + val _ = (mti := !mti + 1) + (* Tempering *) + val y = y ^^ (y >> 0w11) + val y = y ^^ ((y << 0w7) && 0wx9d2c5680) + val y = y ^^ ((y << 0w15) && 0wxefc60000) + val y = y ^^ (y >> 0w18) + in + y + end + + fun rand (a, b) = fromList [W32.fromInt a, W32.fromInt b] + + fun randNativeInt rs = Int32.fromLarge(W32.toLargeIntX(W32.>>(randNativeWord rs, 0w1))) + + (***** old Random functions *****) + + fun randInt rs = let + val w = randNativeWord rs + in + W32.toIntX(W32.~>>(w, 0w1)) + end + + fun randNat rs = let + val w = randNativeWord rs + in + W32.toIntX(W32.>>(w, 0w2)) + end + + fun randWord rs = let + val w = randNativeWord rs + in + Word.fromLarge(W32.toLarge(W32.>>(w, 0w1))) + end + + fun randReal rs = let + val w = randNativeWord rs + val r = Real.fromLargeInt(W32.toLargeInt w) + in + r * (1.0/4294967296.0) + end + + fun randRange (i, j) = if j < i + then error ("randRange", "hi < lo") + else let + (* use IntInf arithmetic to avoid overflow *) + val n = W32.fromLargeInt(IntInf.fromInt j - IntInf.fromInt i) + in + fn rs => i + W32.toInt(W32.mod(randNativeWord rs, n)) + end + + end diff --git a/smlnj-lib/Util/Target64Bit/native.sml b/smlnj-lib/Util/Target64Bit/native.sml new file mode 100644 index 0000000..25dd54c --- /dev/null +++ b/smlnj-lib/Util/Target64Bit/native.sml @@ -0,0 +1,12 @@ +(* target64-native.sml + * + * COPYRIGHT (c) 2022 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Aliases for the native-size word and integer structures on 64-bit + * platforms. These are defined so that we can refer to them in + * signatures. + *) + +structure NativeInt = Int64 +structure NativeWord = Word64 diff --git a/smlnj-lib/Util/Target64Bit/prime-sizes.sml b/smlnj-lib/Util/Target64Bit/prime-sizes.sml new file mode 100644 index 0000000..f667199 --- /dev/null +++ b/smlnj-lib/Util/Target64Bit/prime-sizes.sml @@ -0,0 +1,40 @@ +(* target64-prim-sizes.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * A list of prime numbers for sizing hash tables, etc. on 64-bit targets. + *) + +structure PrimeSizes : sig + + val pick : int -> int + + end = struct + + (* This is a sequence of prime numbers; each number is approx. sqrt(2) + * larger than the previous one in the series. The list is organized + * into sublists to make searches faster. + *) + val primes = [ + (47, [11, 13, 17, 23, 37, 47]), + (367, [67, 97, 131, 191, 257, 367]), + (2897, [521, 727, 1031, 1451, 2053, 2897]), + (23173, [4099, 5801, 8209, 11587, 16411, 23173]), + (185369, [32771, 46349, 65537, 92683, 131101, 185369]), + (1482919, [262147, 370759, 524309, 741457, 1048583, 1482919]), + (2097169, [2097169]) + ] + + fun pick i = let + fun f [] = raise Fail "PrimeSizes.pick: out of sequences" + | f [(p, _)] = p + | f ((hi, l)::r) = if (i < hi) then g l else f r + and g [] = raise Fail "PrimeSizes.pick: out of primes in sequence" + | g [p] = p + | g (p::r) = if (i < p) then p else g r + in + f primes + end + + end diff --git a/smlnj-lib/Util/Target64Bit/random.sml b/smlnj-lib/Util/Target64Bit/random.sml new file mode 100644 index 0000000..362fdce --- /dev/null +++ b/smlnj-lib/Util/Target64Bit/random.sml @@ -0,0 +1,294 @@ +(* random.sml + * + * Stateful pseudo-random generation using the 64-bit Mersenne Twister + * algorithm. + * + * COPYRIGHT (c) 2023 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This code is derived from the 64-bit C version that can be found at + * + * http://www.math.sci.hiroshima-u.ac.jp/m-mat/MT/emt.html + * + * That code is covered by the following Copyright and license: + * + * Copyright (C) 2004, 2014, Makoto Matsumoto and Takuji Nishimura, + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of its contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + *) + +structure Random :> RANDOM = + struct + + structure W64 = Word64 + + (* the following structure should be replaced with a packed + * implementation once it is available. + *) + structure W64A : MONO_ARRAY = struct + open Array + type array = W64.word array + type elem = W64.word + type vector = W64.word vector + end + + val && = W64.andb + val || = W64.orb + val ^^ = W64.xorb + val >> = W64.>> + val << = W64.<< + + infix 0 << >> + infix 1 || ^^ + infix 2 && + + val kNN = 312 + val kMM = 156 + val kMatrixA : W64.word = 0wxB5026F5AA96619E9 + val kUMask : W64.word = 0wxFFFFFFFF80000000 (* Most significant 33 bits *) + val kLMask : W64.word = 0wx7FFFFFFF (* Least significant 31 bits *) + + datatype rand = RandState of { + mt : W64A.array, + mti : int ref + } + + fun error (f, msg) = LibBase.failure {module="MTRandom", func=f, msg=msg} + + val bytesPerWord = 8 + val magic = Byte.stringToBytes "MT64" + val bufLen = 4 + 2 + kNN * bytesPerWord (* magic + mti + words *) + + fun toBytes (RandState{mti, mt}) = let + val buf = Word8Buffer.new bufLen + fun w64ToByte w = Word8.fromLarge w + val mti' = W64.fromInt(!mti) + (* add a 64-bit word to the buffer in little-endian order *) + fun addW64 w = let + fun lp (i, w) = if (i < bytesPerWord) + then ( + Word8Buffer.add1 (buf, w64ToByte(w && 0wxFF)); + lp (i+1, w >> 0w8)) + else () + in + lp (0, w) + end + in + (* add the magic tag to the front *) + Word8Buffer.addVec (buf, magic); + (* then the `mti` value as two bytes in little-endian order*) + Word8Buffer.add1 (buf, w64ToByte(mti' && 0wxFF)); + Word8Buffer.add1 (buf, w64ToByte(mti' >> 0w8)); + (* add the words in the buffer *) + W64A.app addW64 mt; + (* extract the result *) + Word8Buffer.contents buf + end + + fun fromBytes vec = if (Word8Vector.length vec <> bufLen) + then error ("fromBytes", "wrong number of bytes") + else let + val SOME(magic', rest) = Word8VectorSlice.getVec(Word8VectorSlice.full vec, 4) + fun get i = Word8.toLarge(Word8VectorSlice.sub(rest, i)) + val arr = W64A.array(kNN, 0w0) + in + if (magic' <> magic) + then error ("fromBytes", "invalid tag") + else let + val mti = W64.toInt(get 0 || (get 1 << 0w8)) + fun getLp (i, bi) = if (i < kNN) + then let + fun getW64 (j, w) = if (0 <= j) + then getW64 (j-1, (w << 0w8) || get (bi + j)) + else w + in + W64A.update (arr, i, getW64(bytesPerWord-1, 0w0)); + getLp (i+1, bi+8) + end + else () + in + if (mti <= kNN) + then ( + getLp (0, 2); + RandState{mti = ref mti, mt = arr}) + else error ("fromBytes", "invalid index") + end + end + + (* use Base64 for string encoding *) + val toString = Base64.encode o toBytes + fun fromString s = ((fromBytes (Base64.decode s)) + handle Base64.Incomplete => error ("fromString", "incomplete string") + | Base64.Invalid _ => error ("fromString", "invalid string") + | ex => raise ex) + + fun copy {src, dst} = let + val RandState{mti=srcMTI, mt=srcMT} = src + val RandState{mti=dstMTI, mt=dstMT} = dst + in + dstMTI := !srcMTI; + W64A.copy{src = srcMT, dst = dstMT, di = 0} + end + + fun init seed = let + val arr = W64A.array(kNN, 0w0) + fun lp (i, prev) = if (i < kNN) + then let + val next = 0w6364136223846793005 * (prev ^^ (prev >> 0w62)) + + W64.fromInt i + in + W64A.update(arr, i, next); + lp (i+1, next) + end + else () + in + W64A.update(arr, 0, seed); + lp (1, seed); + RandState{mt = arr, mti = ref kNN} + end + + fun fromList ws = let + val rs as RandState{mt, mti} = init 0w19650218 + fun mtAt i = W64A.sub(mt, i) + (* process `kNN` elements taken from repeated `ws` *) + fun lp1 (_, i, _, 0) = i + | lp1 ([], i, j, k) = (* restart iteration over `ws` *) + lp1 (ws, i, 0, k) + | lp1 (w::wr, i, j, k) = let + val mt_im1 = mtAt(i-1) + val x = (mtAt i ^^ ((mt_im1 ^^ (mt_im1 >> 0w62)) * 0w3935559000370003845)) + + w + W64.fromInt j + val _ = W64A.update(mt, i, x) + val i = if (i >= kNN-1) + then (W64A.update(mt, 0, mtAt(kNN-1)); 1) + else i+1 + in + lp1 (wr, i, j+1, k-1) + end + val i = if null ws + then 1 + else lp1 (ws, 1, 0, Int.max(kNN, List.length ws)) + (* another pass over the array *) + fun lp2 (i, 0) = () + | lp2 (i, k) = let + val mt_im1 = mtAt(i-1) + val x = (mtAt i ^^ ((mt_im1 ^^ (mt_im1 >> 0w62)) * 0w2862933555777941757)) + - W64.fromInt i + val _ = W64A.update(mt, i, x) + val i = if (i >= kNN-1) + then (W64A.update(mt, 0, mtAt(kNN-1)); 1) + else i+1 + in + lp2 (i, k-1) + end + val _ = lp2 (i, kNN-1) + in + W64A.update(mt, 0, 0wx8000000000000000); (* MSB is 1; assuring non-zero initial array *) + rs + end + + fun rand (a, b) = fromList [W64.fromInt a, W64.fromInt b] + + fun randNativeWord (RandState{mt, mti}) = let + fun mtAt i = W64A.sub(mt, i) + fun mag01Xor (w, x) = if ((x && 0w1) = 0w0) + then w + else w ^^ kMatrixA + (* for-loop combinator *) + fun for (init, bnd) body = let + fun lp i = if (i < bnd) then (body i; lp(i+1)) else i + in + lp init + end + val _ = if (!mti >= kNN) + then let (* generate fresh words for array *) + fun update offset i = let + val x = (mtAt i && kUMask) || (mtAt(i+1) && kLMask) + val y = mag01Xor(mtAt(i+offset) ^^ (x >> 0w1), x) + in + W64A.update(mt, i, y) + end + val i = for (0, kNN-kMM) (update kMM) + val _ = for (i, kNN-1) (update (kMM-kNN)) + val x = (mtAt(kNN-1) && kUMask) || (mtAt 0 && kLMask) + val y = mag01Xor(mtAt(kMM-1) ^^ (x >> 0w1), x) + in + W64A.update(mt, kNN-1, y); + mti := 0 + end + else () + val x = mtAt(!mti) + val _ = (mti := !mti + 1) + val x = x ^^ ((x >> 0w29) && 0wx5555555555555555) + val x = x ^^ ((x << 0w17) && 0wx71D67FFFEDA60000) + val x = x ^^ ((x << 0w37) && 0wxFFF7EEE000000000) + val x = x ^^ (x >> 0w43) + in + x + end + + fun randNativeInt rs = Int64.fromLarge(W64.toLargeIntX(W64.>>(randNativeWord rs, 0w1))) + + (***** old Random functions *****) + + fun randInt rs = let + val w = randNativeWord rs + in + W64.toIntX(W64.~>>(w, 0w1)) + end + + fun randNat rs = let + val w = randNativeWord rs + in + W64.toIntX(W64.>>(w, 0w2)) + end + + fun randWord rs = let + val w = randNativeWord rs + in + Word.fromLargeWord(W64.>>(w, 0w1)) + end + + fun randReal rs = let + val w = randNativeWord rs + val r = Real.fromLargeInt(W64.toLargeIntX(W64.>>(w, 0w11))) + in + r * (1.0/9007199254740992.0) + end + + fun randRange (i, j) = if j < i + then error ("randRange", "hi < lo") + else let + (* use IntInf arithmetic to avoid overflow *) + val n = W64.fromLargeInt(IntInf.fromInt j - IntInf.fromInt i) + in + fn rs => i + W64.toInt(W64.mod(randNativeWord rs, n)) + end + + end diff --git a/smlnj-lib/Util/ansi-term.sml b/smlnj-lib/Util/ansi-term.sml new file mode 100644 index 0000000..88cbf95 --- /dev/null +++ b/smlnj-lib/Util/ansi-term.sml @@ -0,0 +1,97 @@ +(* ansi-term.sml + * + * COPYRIGHT (c) 2020 John Reppy (http://www.cs.uchicago.edu/~jhr) + * All rights reserved. + * + * Support for ANSI terminal control codes. Currently, this support + * is just for display attributes. + *) + +structure ANSITerm : sig + + datatype color + = Black | Red | Green | Yellow | Blue | Magenta | Cyan | White | Default + + datatype style + = FG of color (* foreground color *) + | BG of color (* background color *) + | BF (* bold/bright *) + | DIM (* dim *) + | NORMAL (* normal intensity/brightness *) + | UL (* underline *) + | UL_OFF (* underline off *) + | BLINK (* blinking text *) + | BLINK_OFF (* blinking off *) + | REV (* reverse video *) + | REV_OFF (* reverse video off *) + | INVIS (* invisible *) + | INVIS_OFF (* invisible off *) + | RESET + + (* return the command string for the given styles; the empty list is "normal" *) + val toString : style list -> string + + (* output commands to set the given styles; the empty list is "normal" *) + val setStyle : (TextIO.outstream * style list) -> unit + + end = struct + + datatype color + = Black | Red | Green | Yellow | Blue | Magenta | Cyan | White | Default + + datatype style + = FG of color (* foreground color *) + | BG of color (* background color *) + | BF (* bold/bright *) + | DIM (* dim *) + | NORMAL (* normal intensity/brightness *) + | UL (* underline *) + | UL_OFF (* underline off *) + | BLINK (* blinking text *) + | BLINK_OFF (* blinking off *) + | REV (* reverse video *) + | REV_OFF (* reverse video off *) + | INVIS (* invisible *) + | INVIS_OFF (* invisible off *) + | RESET + + (* basic color codes *) + fun colorToCmd Black = 0 + | colorToCmd Red = 1 + | colorToCmd Green = 2 + | colorToCmd Yellow = 3 + | colorToCmd Blue = 4 + | colorToCmd Magenta = 5 + | colorToCmd Cyan = 6 + | colorToCmd White = 7 + | colorToCmd Default = 9 + + (* convert style to integer command *) + fun styleToCmd (FG c) = 30 + colorToCmd c + | styleToCmd (BG c) = 40 + colorToCmd c + | styleToCmd BF = 1 + | styleToCmd DIM = 2 + | styleToCmd NORMAL = 22 + | styleToCmd UL = 4 + | styleToCmd UL_OFF = 24 + | styleToCmd BLINK = 5 + | styleToCmd BLINK_OFF = 25 + | styleToCmd REV = 7 + | styleToCmd REV_OFF = 27 + | styleToCmd INVIS = 8 + | styleToCmd INVIS_OFF = 28 + | styleToCmd RESET = 0 + + fun cmdStr [] = "" + | cmdStr (cmd :: r) = let + fun f (cmd, l) = ";" :: Int.toString cmd :: l + in + concat ("\027[" :: Int.toString cmd :: List.foldr f ["m"] r) + end + + fun toString [] = cmdStr[0] + | toString stys = cmdStr(List.map styleToCmd stys) + + fun setStyle (outStrm, stys) = TextIO.output(outStrm, toString stys) + + end diff --git a/smlnj-lib/Util/array-qsort-fn.sml b/smlnj-lib/Util/array-qsort-fn.sml new file mode 100644 index 0000000..590143d --- /dev/null +++ b/smlnj-lib/Util/array-qsort-fn.sml @@ -0,0 +1,163 @@ +(* array-qsort-fn.sml + * + * COPYRIGHT (c) 1993 by AT&T Bell Laboratories. See COPYRIGHT file for details. + * + * Functor for in-place sorting of abstract arrays. + * Uses an engineered version of quicksort due to + * Bentley and McIlroy. + * + *) + +functor ArrayQSortFn (A : MONO_ARRAY) : MONO_ARRAY_SORT = + struct + + structure A = A + + fun isort (array, start, n, cmp) = let + fun item i = A.sub(array,i) + fun swap (i,j) = let + val tmp = A.sub(array,i) + in A.update(array,i,A.sub(array,j)); A.update(array,j,tmp) end + fun vecswap (i,j,0) = () + | vecswap (i,j,n) = (swap(i,j);vecswap(i+1,j+1,n-1)) + fun insertSort (start, n) = let + val limit = start+n + fun outer i = + if i >= limit then () + else let + fun inner j = + if j = start then outer(i+1) + else let + val j' = j - 1 + in + if cmp(item j',item j) = GREATER + then (swap(j,j'); inner j') + else outer(i+1) + end + in inner i end + in + outer (start+1) + end + in insertSort (start, n); array end + + fun sortRange (array, start, n, cmp) = let + fun item i = A.sub(array,i) + fun swap (i,j) = let + val tmp = A.sub(array,i) + in A.update(array,i,A.sub(array,j)); A.update(array,j,tmp) end + fun vecswap (i,j,0) = () + | vecswap (i,j,n) = (swap(i,j);vecswap(i+1,j+1,n-1)) + fun insertSort (start, n) = let + val limit = start+n + fun outer i = + if i >= limit then () + else let + fun inner j = + if j = start then outer(i+1) + else let + val j' = j - 1 + in + if cmp(item j',item j) = GREATER + then (swap(j,j'); inner j') + else outer(i+1) + end + in inner i end + in + outer (start+1) + end + + fun med3(a,b,c) = let + val a' = item a and b' = item b and c' = item c + in + case (cmp(a', b'),cmp(b', c')) + of (LESS, LESS) => b + | (LESS, _) => ( + case cmp(a', c') of LESS => c | _ => a) + | (_, GREATER) => b + | _ => (case cmp(a', c') of LESS => a | _ => c) + (* end case *) + end + + fun getPivot (a,n) = + if n <= 7 then a + n div 2 + else let + val p1 = a + val pm = a + n div 2 + val pn = a + n - 1 + in + if n <= 40 then med3(p1,pm,pn) + else let + val d = n div 8 + val p1 = med3(p1,p1+d,p1+2*d) + val pm = med3(pm-d,pm,pm+d) + val pn = med3(pn-2*d,pn-d,pn) + in + med3(p1,pm,pn) + end + end + + fun quickSort (arg as (a, n)) = let + fun bottom limit = let + fun loop (arg as (pa,pb)) = + if pb > limit then arg + else case cmp(item pb,item a) of + GREATER => arg + | LESS => loop (pa,pb+1) + | _ => (swap arg; loop (pa+1,pb+1)) + in loop end + + fun top limit = let + fun loop (arg as (pc,pd)) = + if limit > pc then arg + else case cmp(item pc,item a) of + LESS => arg + | GREATER => loop (pc-1,pd) + | _ => (swap arg; loop (pc-1,pd-1)) + in loop end + + fun split (pa,pb,pc,pd) = let + val (pa,pb) = bottom pc (pa,pb) + val (pc,pd) = top pb (pc,pd) + in + if pb > pc then (pa,pb,pc,pd) + else (swap(pb,pc); split(pa,pb+1,pc-1,pd)) + end + + val pm = getPivot arg + val _ = swap(a,pm) + val pa = a + 1 + val pc = a + (n-1) + val (pa,pb,pc,pd) = split(pa,pa,pc,pc) + val pn = a + n + val r = Int.min(pa - a, pb - pa) + val _ = vecswap(a, pb-r, r) + val r = Int.min(pd - pc, pn - pd - 1) + val _ = vecswap(pb, pn-r, r) + val n' = pb - pa + val _ = if n' > 1 then sort(a,n') else () + val n' = pd - pc + val _ = if n' > 1 then sort(pn-n',n') else () + in () end + + and sort (arg as (_, n)) = if n < 7 then insertSort arg + else quickSort arg + in sort (start,n) end + + fun sort cmp array = sortRange(array,0,A.length array, cmp) + + fun sorted cmp array = let + val len = A.length array + fun s (v,i) = let + val v' = A.sub(array,i) + in + case cmp(v,v') of + GREATER => false + | _ => if i+1 = len then true else s(v',i+1) + end + in + if len = 0 orelse len = 1 then true + else s(A.sub(array,0),1) + end + + end (* ArraySortFn *) + diff --git a/smlnj-lib/Util/array-qsort.sml b/smlnj-lib/Util/array-qsort.sml new file mode 100644 index 0000000..0c24eab --- /dev/null +++ b/smlnj-lib/Util/array-qsort.sml @@ -0,0 +1,168 @@ +(* array-qsort.sml + * + * COPYRIGHT (c) 1993 by AT&T Bell Laboratories. See COPYRIGHT file for details. + * + * Structure for in-place sorting of polymorphic arrays. + * Uses an engineered version of quicksort due to + * Bentley and McIlroy. + * + *) + +structure ArrayQSort : ARRAY_SORT = + struct + + structure A = Array + + type 'a array = 'a A.array + + val sub = Unsafe.Array.sub + val update = Unsafe.Array.update + + fun isort (array, start, n, cmp) = let + fun item i = sub(array,i) + fun swap (i,j) = let + val tmp = sub(array,i) + in update(array,i,sub(array,j)); update(array,j,tmp) end + fun vecswap (i,j,0) = () + | vecswap (i,j,n) = (swap(i,j);vecswap(i+1,j+1,n-1)) + fun insertSort (start, n) = let + val limit = start+n + fun outer i = + if i >= limit then () + else let + fun inner j = + if j = start then outer(i+1) + else let + val j' = j - 1 + in + if cmp(item j',item j) = GREATER + then (swap(j,j'); inner j') + else outer(i+1) + end + in inner i end + in + outer (start+1) + end + in insertSort (start, n); array end + + fun sortRange (array, start, n, cmp) = let + fun item i = sub(array,i) + fun swap (i,j) = let + val tmp = sub(array,i) + in update(array,i,sub(array,j)); update(array,j,tmp) end + fun vecswap (i,j,0) = () + | vecswap (i,j,n) = (swap(i,j);vecswap(i+1,j+1,n-1)) + fun insertSort (start, n) = let + val limit = start+n + fun outer i = + if i >= limit then () + else let + fun inner j = + if j = start then outer(i+1) + else let + val j' = j - 1 + in + if cmp(item j',item j) = GREATER + then (swap(j,j'); inner j') + else outer(i+1) + end + in inner i end + in + outer (start+1) + end + + fun med3(a,b,c) = let + val a' = item a and b' = item b and c' = item c + in + case (cmp(a', b'),cmp(b', c')) + of (LESS, LESS) => b + | (LESS, _) => ( + case cmp(a', c') of LESS => c | _ => a) + | (_, GREATER) => b + | _ => (case cmp(a', c') of LESS => a | _ => c) + (* end case *) + end + + fun getPivot (a,n) = + if n <= 7 then a + n div 2 + else let + val p1 = a + val pm = a + n div 2 + val pn = a + n - 1 + in + if n <= 40 then med3(p1,pm,pn) + else let + val d = n div 8 + val p1 = med3(p1,p1+d,p1+2*d) + val pm = med3(pm-d,pm,pm+d) + val pn = med3(pn-2*d,pn-d,pn) + in + med3(p1,pm,pn) + end + end + + fun quickSort (arg as (a, n)) = let + fun bottom limit = let + fun loop (arg as (pa,pb)) = + if pb > limit then arg + else case cmp(item pb,item a) of + GREATER => arg + | LESS => loop (pa,pb+1) + | _ => (swap arg; loop (pa+1,pb+1)) + in loop end + + fun top limit = let + fun loop (arg as (pc,pd)) = + if limit > pc then arg + else case cmp(item pc,item a) of + LESS => arg + | GREATER => loop (pc-1,pd) + | _ => (swap arg; loop (pc-1,pd-1)) + in loop end + + fun split (pa,pb,pc,pd) = let + val (pa,pb) = bottom pc (pa,pb) + val (pc,pd) = top pb (pc,pd) + in + if pb > pc then (pa,pb,pc,pd) + else (swap(pb,pc); split(pa,pb+1,pc-1,pd)) + end + + val pm = getPivot arg + val _ = swap(a,pm) + val pa = a + 1 + val pc = a + (n-1) + val (pa,pb,pc,pd) = split(pa,pa,pc,pc) + val pn = a + n + val r = Int.min(pa - a, pb - pa) + val _ = vecswap(a, pb-r, r) + val r = Int.min(pd - pc, pn - pd - 1) + val _ = vecswap(pb, pn-r, r) + val n' = pb - pa + val _ = if n' > 1 then sort(a,n') else () + val n' = pd - pc + val _ = if n' > 1 then sort(pn-n',n') else () + in () end + + and sort (arg as (_, n)) = if n < 7 then insertSort arg + else quickSort arg + in sort (start,n) end + + fun sort cmp array = sortRange(array, 0, A.length array, cmp) + + fun sorted cmp array = let + val len = A.length array + fun chk (v, i) = let + val v' = sub(array,i) + in + case cmp(v,v') + of GREATER => false + | _ => if i+1 = len then true else chk(v',i+1) + (* end case *) + end + in + len <= 1 orelse chk(sub(array,0), 1) + end + + end (* ArraySort *) + diff --git a/smlnj-lib/Util/array-sort-sig.sml b/smlnj-lib/Util/array-sort-sig.sml new file mode 100644 index 0000000..3b12b61 --- /dev/null +++ b/smlnj-lib/Util/array-sort-sig.sml @@ -0,0 +1,16 @@ +(* array-sort-sig.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Signature for in-place sorting of polymorphic arrays + *) + +signature ARRAY_SORT = + sig + + val sort : ('a * 'a -> order) -> 'a array -> unit + val sorted : ('a * 'a -> order) -> 'a array -> bool + + end (* ARRAY_SORT *) + diff --git a/smlnj-lib/Util/atom-binary-map.sml b/smlnj-lib/Util/atom-binary-map.sml new file mode 100644 index 0000000..9c84655 --- /dev/null +++ b/smlnj-lib/Util/atom-binary-map.sml @@ -0,0 +1,13 @@ +(* atom-binary-map.sml + * + * COPYRIGHT (c) 1997 Bell Labs, Lucent Technologies. + * + * Functional finite maps with atom keys. + *) + +structure AtomBinaryMap = + BinaryMapFn ( + struct + type ord_key = Atom.atom + val compare = Atom.compare + end) diff --git a/smlnj-lib/Util/atom-binary-set.sml b/smlnj-lib/Util/atom-binary-set.sml new file mode 100644 index 0000000..f5619ff --- /dev/null +++ b/smlnj-lib/Util/atom-binary-set.sml @@ -0,0 +1,13 @@ +(* atom-binary-map.sml + * + * COPYRIGHT (c) 1997 Bell Labs, Lucent Technologies. + * + * Functional sets of atoms. + *) + +structure AtomBinarySet = + BinarySetFn ( + struct + type ord_key = Atom.atom + val compare = Atom.compare + end) diff --git a/smlnj-lib/Util/atom-map.sml b/smlnj-lib/Util/atom-map.sml new file mode 100644 index 0000000..0e9916f --- /dev/null +++ b/smlnj-lib/Util/atom-map.sml @@ -0,0 +1,8 @@ +(* atom-map.sml + * + * COPYRIGHT (c) 1999 Bell Labs, Lucent Technologies. + * + * Functional finite maps with atom keys. + *) + +structure AtomMap = AtomRedBlackMap diff --git a/smlnj-lib/Util/atom-redblack-map.sml b/smlnj-lib/Util/atom-redblack-map.sml new file mode 100644 index 0000000..83bed33 --- /dev/null +++ b/smlnj-lib/Util/atom-redblack-map.sml @@ -0,0 +1,13 @@ +(* atom-redblack-map.sml + * + * COPYRIGHT (c) 1999 Bell Labs, Lucent Technologies. + * + * Functional finite maps with atom keys. + *) + +structure AtomRedBlackMap = + RedBlackMapFn ( + struct + type ord_key = Atom.atom + val compare = Atom.compare + end) diff --git a/smlnj-lib/Util/atom-redblack-set.sml b/smlnj-lib/Util/atom-redblack-set.sml new file mode 100644 index 0000000..f1a9967 --- /dev/null +++ b/smlnj-lib/Util/atom-redblack-set.sml @@ -0,0 +1,13 @@ +(* atom-redblack-set.sml + * + * COPYRIGHT (c) 1999 Bell Labs, Lucent Technologies. + * + * Functional sets of atoms. + *) + +structure AtomRedBlackSet = + RedBlackSetFn ( + struct + type ord_key = Atom.atom + val compare = Atom.compare + end) diff --git a/smlnj-lib/Util/atom-set.sml b/smlnj-lib/Util/atom-set.sml new file mode 100644 index 0000000..5063f90 --- /dev/null +++ b/smlnj-lib/Util/atom-set.sml @@ -0,0 +1,8 @@ +(* atom-map.sml + * + * COPYRIGHT (c) 1999 Bell Labs, Lucent Technologies. + * + * Functional sets of atoms. + *) + +structure AtomSet = AtomRedBlackSet diff --git a/smlnj-lib/Util/atom-sig.sml b/smlnj-lib/Util/atom-sig.sml new file mode 100644 index 0000000..ab7efe0 --- /dev/null +++ b/smlnj-lib/Util/atom-sig.sml @@ -0,0 +1,45 @@ +(* atom-sig.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * COPYRIGHT (c) 1996 by AT&T Research + * + * AUTHOR: John Reppy + * AT&T Bell Laboratories + * Murray Hill, NJ 07974 + * jhr@research.att.com + * + * TODO: add a gensym operation? + *) + +signature ATOM = + sig + + type atom + (* Atoms are hashed strings that support fast equality testing. *) + + val atom : string -> atom + val atom' : substring -> atom + (* Map a string/substring to the corresponding unique atom. *) + + val toString : atom -> string + (* return the string representation of the atom *) + + val same : (atom * atom) -> bool + val sameAtom : (atom * atom) -> bool + (* return true if the atoms are the same; we provide "sameAtom" for + * backward compatibility. + *) + + val compare : (atom * atom) -> order + (* compare two atoms for their relative order; note that this is + * not lexical order! + *) + val lexCompare : (atom * atom) -> order + (* compare two atoms for their lexical order *) + + val hash : atom -> word + (* return a hash key for the atom *) + + end (* signature ATOM *) diff --git a/smlnj-lib/Util/atom-table.sml b/smlnj-lib/Util/atom-table.sml new file mode 100644 index 0000000..ce43550 --- /dev/null +++ b/smlnj-lib/Util/atom-table.sml @@ -0,0 +1,13 @@ +(* atom-table.sml + * + * COPYRIGHT (c) 1996 AT&T Research. + * + * Hash tables of atoms. + *) + +structure AtomTable = HashTableFn (struct + type hash_key = Atom.atom + val hashVal = Atom.hash + val sameKey = Atom.same + end); + diff --git a/smlnj-lib/Util/atom.sml b/smlnj-lib/Util/atom.sml new file mode 100644 index 0000000..9b5b55d --- /dev/null +++ b/smlnj-lib/Util/atom.sml @@ -0,0 +1,102 @@ +(* atom.sml + * + * COPYRIGHT (c) 1996 by AT&T Research + * + * AUTHOR: John Reppy + * AT&T Bell Laboratories + * Murray Hill, NJ 07974 + * jhr@research.att.com + * + * TODO: add a gensym operation? + *) + +structure Atom :> ATOM = + struct + + (* Atoms are hashed strings that support fast equality testing. *) + datatype atom = ATOM of { + hash : word, + id : string + } + + (* return the string representation of the atom *) + fun toString (ATOM{id, ...}) = id + + (* return a hash key for the atom *) + fun hash (ATOM{hash, ...}) = hash + + (* return true if the atoms are the same *) + fun same (ATOM{hash=h1, id=id1}, ATOM{hash=h2, id=id2}) = + (h1 = h2) andalso (id1 = id2) + + (* for backward compatibility *) + val sameAtom = same + + (* compare two names for their relative order; note that this is + * not lexical order! + *) + fun compare (ATOM{hash=h1, id=id1}, ATOM{hash=h2, id=id2}) = + if h1 = h2 then String.compare (id1, id2) + else if h1 < h2 then LESS + else GREATER + + (* compare two atoms for their lexical order *) + fun lexCompare (ATOM{id=id1, ...}, ATOM{id=id2, ...}) = String.compare(id1, id2) + + (* the unique name hash table *) + val tableSz = 64 + val table = ref(Array.array(tableSz, [] : atom list)) + val numItems = ref 0 + + infix % + fun h % m = Word.toIntX (Word.andb (h, m)) + + (* Map a string or substring s to the corresponding unique atom. *) + fun atom0 (toString, hashString, sameString) s = let + val h = hashString s + val tbl = !table + val sz = Array.length tbl + val indx = h % (Word.fromInt sz - 0w1) + fun look ((a as ATOM{hash, id}) :: rest) = + if (hash = h) andalso sameString(s, id) + then a + else look rest + | look [] = let + fun new (tbl, indx) = let + val a = ATOM {hash = h, id = toString s} + in + Array.update (tbl, indx, a :: Array.sub (tbl, indx)); + a + end + in + if !numItems < sz + then new (tbl, indx) + else let + val newSz = sz + sz + val newMask = Word.fromInt newSz - 0w1 + val newTbl = Array.array (newSz, []) + fun ins (item as ATOM{hash, ...}) = let + val indx = hash % newMask + in + Array.update (newTbl, indx, item :: Array.sub (newTbl, indx)) + end + in + Array.app (app ins) tbl; + table := newTbl; + new (newTbl, h % newMask) + end + end + in + look (Array.sub (tbl, indx)) + end + + (* instantiate atom0 for the string case *) + val atom = atom0 (fn s => s, FNVHash.hashString, op = ) + + (* instantiate atom0 for the substring case *) + val atom' = atom0 ( + Substring.string, + FNVHash.hashSubstring, + fn (ss, s) => (Substring.compare(ss, Substring.full s) = EQUAL)) + + end (* structure Atom *) diff --git a/smlnj-lib/Util/base64-sig.sml b/smlnj-lib/Util/base64-sig.sml new file mode 100644 index 0000000..700ae3c --- /dev/null +++ b/smlnj-lib/Util/base64-sig.sml @@ -0,0 +1,38 @@ +(* base64-sig.sml + * + * COPYRIGHT (c) 2012 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Support for Base64 encoding/decoding as specified by RFC 4648. + * + * http://www.ietf.org/rfc/rfc4648.txt + *) + +signature BASE64 = + sig + + (* return true if a character is in the base64 alphabet *) + val isBase64 : char -> bool + + val encode : Word8Vector.vector -> string + val encodeSlice : Word8VectorSlice.slice -> string + + (* raised if a Base64 string does not end in a complete encoding quantum (i.e., 4 + * characters including padding characters). + *) + exception Incomplete + + (* raised if an invalid Base64 character is encountered during decode. The int + * is the position of the character and the char is the invalid character. + *) + exception Invalid of (int * char) + + (* decode functions that ignore whitespace *) + val decode : string -> Word8Vector.vector + val decodeSlice : substring -> Word8Vector.vector + + (* strict decode functions that only accept the base64 characters *) + val decodeStrict : string -> Word8Vector.vector + val decodeSliceStrict : substring -> Word8Vector.vector + + end diff --git a/smlnj-lib/Util/base64.sml b/smlnj-lib/Util/base64.sml new file mode 100644 index 0000000..2432737 --- /dev/null +++ b/smlnj-lib/Util/base64.sml @@ -0,0 +1,246 @@ +(* base64.sml + * + * COPYRIGHT (c) 2012 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Support for Base64 encoding/decoding as specified by RFC 4648. + * + * http://www.ietf.org/rfc/rfc4648.txt + *) + +structure Base64 : BASE64 = + struct + + structure W8 = Word8 + structure W8V = Word8Vector + structure W8A = Word8Array + structure UCV = Unsafe.CharVector + structure UW8V = Unsafe.Word8Vector + + (* encoding table *) + val encTbl = "\ + \ABCDEFGHIJKLMNOPQRSTUVWXYZ\ + \abcdefghijklmnopqrstuvwxyz\ + \0123456789+/\ + \" + val padChar = #"=" + fun incByte b = UCV.sub(encTbl, Word8.toIntX b) + + (* return true if a character is in the base64 alphabet *) + val isBase64 = Char.contains encTbl + + (* encode a triple of bytes into four base-64 characters *) + fun encode3 (b1, b2, b3) = let + val c1 = W8.>>(b1, 0w2) + val c2 = W8.orb(W8.<<(W8.andb(b1, 0wx3), 0w4), W8.>>(b2, 0w4)) + val c3 = W8.orb(W8.<<(W8.andb(0wxF, b2), 0w2), W8.>>(b3, 0w6)) + val c4 = W8.andb(0wx3f, b3) + in + (incByte c1, incByte c2, incByte c3, incByte c4) + end + + (* encode a pair of bytes into three base-64 characters plus a padding character *) + fun encode2 (b1, b2) = let + val c1 = W8.>>(b1, 0w2) + val c2 = W8.orb(W8.<<(W8.andb(b1, 0wx3), 0w4), W8.>>(b2, 0w4)) + val c3 = W8.<<(W8.andb(0wxF, b2), 0w2) + in + (incByte c1, incByte c2, incByte c3, padChar) + end + + (* encode a byte into two base-64 characters plus two padding characters *) + fun encode1 b1 = let + val c1 = W8.>>(b1, 0w2) + val c2 = W8.<<(W8.andb(b1, 0wx3), 0w4) + in + (incByte c1, incByte c2, padChar, padChar) + end + + local + fun encode64 (vec, start, len) = let + val outLen = 4 * Int.quot(len + 2, 3) + val outBuf = Unsafe.CharVector.create outLen + val nTriples = Int.quot(len, 3) + val extra = Int.rem(len, 3) + fun insBuf (i, (c1, c2, c3, c4)) = let + val idx = 4*i + in + UCV.update(outBuf, idx, c1); + UCV.update(outBuf, idx+1, c2); + UCV.update(outBuf, idx+2, c3); + UCV.update(outBuf, idx+3, c4) + end + fun loop (i, idx) = if (i < nTriples) + then ( + insBuf(i, encode3(UW8V.sub(vec, idx), UW8V.sub(vec, idx+1), UW8V.sub(vec, idx+2))); + loop (i+1, idx+3)) + else (case extra + of 1 => insBuf(i, encode1(UW8V.sub(vec, idx))) + | 2 => insBuf(i, encode2(UW8V.sub(vec, idx), UW8V.sub(vec, idx+1))) + | _ => () + (* end case *)) + in + loop (0, start); + outBuf + end + in + + fun encode vec = encode64 (vec, 0, W8V.length vec) + + fun encodeSlice slice = encode64 (Word8VectorSlice.base slice) + + end (* local *) + + (* raised if a Base64 string does not end in a complete encoding quantum (i.e., 4 + * characters including padding characters). + *) + exception Incomplete + + (* raised if an invalid Base64 character is encountered during decode. The int + * is the position of the character and the char is the invalid character. + *) + exception Invalid of (int * char) + + (* decoding tags *) + val errCode : W8.word = 0w255 + val spCode : W8.word = 0w65 + val padCode : W8.word = 0w66 + val decTbl = let + val tbl = W8A.array(256, errCode) + fun ins (w, c) = W8A.update(tbl, Char.ord c, w) + in + (* add space codes *) + ins(spCode, #"\t"); + ins(spCode, #"\n"); + ins(spCode, #"\r"); + ins(spCode, #" "); + (* add decoding codes *) + CharVector.appi (fn (i, c) => ins(Word8.fromInt i, c)) encTbl; + (* convert to vector *) + W8V.tabulate (256, fn i => W8A.sub(tbl, i)) + end + val strictDecTbl = let + val tbl = W8A.array(256, errCode) + fun ins (w, c) = W8A.update(tbl, Char.ord c, w) + in + (* add decoding codes *) + CharVector.appi (fn (i, c) => ins(Word8.fromInt i, c)) encTbl; + (* convert to vector *) + W8V.tabulate (256, fn i => W8A.sub(tbl, i)) + end + + fun decode64 decTbl (s, start, len) = let + fun decodeChr c = W8V.sub(decTbl, Char.ord c) + fun getc i = if (i < len) + then let + val c = String.sub(s, start+i) + val b = decodeChr c + in + if (b = errCode) then raise Invalid(i, c) + else if (b = spCode) then getc (i+1) + else (b, i+1) + end + else raise Incomplete + (* first we deal with possible padding. There are three possible situations: + * 1. the final quantum is 24 bits, so there is no padding + * 2. the final quantum is 16 bits, so there are three code characters and + * one pad character. + * 3. the final quantum is 8 bits, so there are two code characters and + * two pad characters. + *) + val (lastQ, len, tailLen) = let + fun getTail (i, n, chrs) = if (i < 0) + then raise Incomplete + else if (n < 4) + then (case String.sub(s, start+i) + of #"=" => getTail (i-1, n+1, (#"=", i)::chrs) + | c => let + val b = decodeChr c + in + if (b = spCode) + then getTail (i-1, n, chrs) (* skip whitespace *) + else if (b = errCode) + then raise Invalid(i, c) + else getTail (i-1, n+1, (c, i)::chrs) + end + (* end case *)) + else (i, chrs) + fun cvt (c, i) = let + val b = decodeChr c + in + if (b < 0w64) then b else raise Invalid(i, c) + end + in + case getTail (len-1, 0, []) + of (len, [ci0, ci1, (#"=", _), (#"=", _)]) => let + val c0 = cvt ci0 + val c1 = cvt ci1 + val b0 = W8.orb(W8.<<(c0, 0w2), W8.>>(c1, 0w4)) + in + ([b0], len, 1) + end + | (len, [ci0, ci1, ci2, (#"=", _)]) => let + val c0 = cvt ci0 + val c1 = cvt ci1 + val c2 = cvt ci2 + val b0 = W8.orb(W8.<<(c0, 0w2), W8.>>(c1, 0w4)) + val b1 = W8.orb(W8.<<(c1, 0w4), W8.>>(c2, 0w2)) + in + ([b0, b1], len, 2) + end + | (_, [_, _, _, _]) => ([], len, 0) (* fallback to regular path below *) + | (_, []) => ([], len, 0) + | _ => raise Incomplete + (* end case *) + end + (* compute upper bound on number of output bytes *) + val nBytes = 3 * Word.toIntX(Word.>>(Word.fromInt len + 0w3, 0w2)) + tailLen + val buffer = W8A.array(nBytes, 0w0) + fun cvt (inIdx, outIdx) = if (inIdx < len) + then let + val (c0, i) = getc inIdx + val (c1, i) = getc i + val (c2, i) = getc i + val (c3, i) = getc i + val b0 = W8.orb(W8.<<(c0, 0w2), W8.>>(c1, 0w4)) + val b1 = W8.orb(W8.<<(c1, 0w4), W8.>>(c2, 0w2)) + val b2 = W8.orb(W8.<<(c2, 0w6), c3) + in + W8A.update(buffer, outIdx, b0); + W8A.update(buffer, outIdx+1, b1); + W8A.update(buffer, outIdx+2, b2); + cvt (i, outIdx+3) + end + else outIdx + val outLen = cvt (0, 0) (*handle Subscript => raise Incomplete*) + (* deal with the last quantum *) + val outLen = (case lastQ + of [b0, b1] => ( + W8A.update(buffer, outLen, b0); + W8A.update(buffer, outLen+1, b1); + outLen+2) + | [b0] => ( + W8A.update(buffer, outLen, b0); + outLen+1) + | _ => outLen + (* end case *)) + in + Word8ArraySlice.vector(Word8ArraySlice.slice(buffer, 0, SOME outLen)) + end + + fun decode s = decode64 decTbl (s, 0, size s) + fun decodeSlice ss = decode64 decTbl (Substring.base ss) + + fun decodeStrict s = decode64 strictDecTbl (s, 0, size s) + fun decodeSliceStrict ss = decode64 strictDecTbl (Substring.base ss) + + end + +(* simple test code + +val v = Word8Vector.tabulate(256, fn i => Word8.fromInt i); +val enc = Base64.encode v +val v' = Base64.decode enc +val ok = (v = v') + +*) diff --git a/smlnj-lib/Util/binary-map-fn.sml b/smlnj-lib/Util/binary-map-fn.sml new file mode 100644 index 0000000..6cee1ba --- /dev/null +++ b/smlnj-lib/Util/binary-map-fn.sml @@ -0,0 +1,535 @@ +(* binary-map-fn.sml + * + * COPYRIGHT (c) 2012 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This code was adapted from Stephen Adams' binary tree implementation + * of applicative integer sets. + * + * Copyright 1992 Stephen Adams. + * + * This software may be used freely provided that: + * 1. This copyright notice is attached to any copy, derived work, + * or work including all or part of this software. + * 2. Any derived work must contain a prominent notice stating that + * it has been altered from the original. + * + * + * Name(s): Stephen Adams. + * Department, Institution: Electronics & Computer Science, + * University of Southampton + * Address: Electronics & Computer Science + * University of Southampton + * Southampton SO9 5NH + * Great Britian + * E-mail: sra@ecs.soton.ac.uk + * + * Comments: + * + * 1. The implementation is based on Binary search trees of Bounded + * Balance, similar to Nievergelt & Reingold, SIAM J. Computing + * 2(1), March 1973. The main advantage of these trees is that + * they keep the size of the tree in the node, giving a constant + * time size operation. + * + * 2. The bounded balance criterion is simpler than N&R's alpha. + * Simply, one subtree must not have more than `weight' times as + * many elements as the opposite subtree. Rebalancing is + * guaranteed to reinstate the criterion for weight>2.23, but + * the occasional incorrect behaviour for weight=2 is not + * detrimental to performance. + * + *) + +functor BinaryMapFn (K : ORD_KEY) :> ORD_MAP where type Key.ord_key = K.ord_key = + struct + + structure Key = K + + (* + ** val weight = 3 + ** fun wt i = weight * i + *) + fun wt (i : int) = i + i + i + + datatype 'a map + = E + | T of { + key : K.ord_key, + value : 'a, + cnt : int, + left : 'a map, + right : 'a map + } + + val empty = E + + fun isEmpty E = true + | isEmpty _ = false + + fun numItems E = 0 + | numItems (T{cnt,...}) = cnt + + (* return the first item in the map (or NONE if it is empty) *) + fun first E = NONE + | first (T{value, left=E, ...}) = SOME value + | first (T{left, ...}) = first left + + (* return the first item in the map and its key (or NONE if it is empty) *) + fun firsti E = NONE + | firsti (T{key, value, left=E, ...}) = SOME(key, value) + | firsti (T{left, ...}) = firsti left + +local + fun N(k,v,E,E) = T{key=k,value=v,cnt=1,left=E,right=E} + | N(k,v,E,r as T n) = T{key=k,value=v,cnt=1+(#cnt n),left=E,right=r} + | N(k,v,l as T n,E) = T{key=k,value=v,cnt=1+(#cnt n),left=l,right=E} + | N(k,v,l as T n,r as T n') = + T{key=k,value=v,cnt=1+(#cnt n)+(#cnt n'),left=l,right=r} + + fun single_L (a,av,x,T{key=b,value=bv,left=y,right=z,...}) = + N(b,bv,N(a,av,x,y),z) + | single_L _ = raise Match + fun single_R (b,bv,T{key=a,value=av,left=x,right=y,...},z) = + N(a,av,x,N(b,bv,y,z)) + | single_R _ = raise Match + fun double_L (a,av,w,T{key=c,value=cv,left=T{key=b,value=bv,left=x,right=y,...},right=z,...}) = + N(b,bv,N(a,av,w,x),N(c,cv,y,z)) + | double_L _ = raise Match + fun double_R (c,cv,T{key=a,value=av,left=w,right=T{key=b,value=bv,left=x,right=y,...},...},z) = + N(b,bv,N(a,av,w,x),N(c,cv,y,z)) + | double_R _ = raise Match + + fun T' (k,v,E,E) = T{key=k,value=v,cnt=1,left=E,right=E} + | T' (k,v,E,r as T{right=E,left=E,...}) = + T{key=k,value=v,cnt=2,left=E,right=r} + | T' (k,v,l as T{right=E,left=E,...},E) = + T{key=k,value=v,cnt=2,left=l,right=E} + + | T' (p as (_,_,E,T{left=T _,right=E,...})) = double_L p + | T' (p as (_,_,T{left=E,right=T _,...},E)) = double_R p + + (* these cases almost never happen with small weight*) + | T' (p as (_,_,E,T{left=T{cnt=ln,...},right=T{cnt=rn,...},...})) = + if ln < rn then single_L p else double_L p + | T' (p as (_,_,T{left=T{cnt=ln,...},right=T{cnt=rn,...},...},E)) = + if ln > rn then single_R p else double_R p + + | T' (p as (_,_,E,T{left=E,...})) = single_L p + | T' (p as (_,_,T{right=E,...},E)) = single_R p + + | T' (p as (k,v,l as T{cnt=ln,left=ll,right=lr,...}, + r as T{cnt=rn,left=rl,right=rr,...})) = + if rn >= wt ln then (*right is too big*) + let val rln = numItems rl + val rrn = numItems rr + in + if rln < rrn then single_L p else double_L p + end + + else if ln >= wt rn then (*left is too big*) + let val lln = numItems ll + val lrn = numItems lr + in + if lrn < lln then single_R p else double_R p + end + + else T{key=k,value=v,cnt=ln+rn+1,left=l,right=r} + + local + fun min (T{left=E,key,value,...}) = (key,value) + | min (T{left,...}) = min left + | min _ = raise Match + + fun delmin (T{left=E,right,...}) = right + | delmin (T{key,value,left,right,...}) = T'(key,value,delmin left,right) + | delmin _ = raise Match + in + fun delete' (E,r) = r + | delete' (l,E) = l + | delete' (l,r) = let val (mink,minv) = min r in + T'(mink,minv,l,delmin r) + end + end +in + fun mkDict () = E + + fun singleton (x,v) = T{key=x,value=v,cnt=1,left=E,right=E} + + fun insert (E,x,v) = T{key=x,value=v,cnt=1,left=E,right=E} + | insert (T(set as {key,left,right,value,...}),x,v) = + case K.compare (key,x) of + GREATER => T'(key,value,insert(left,x,v),right) + | LESS => T'(key,value,left,insert(right,x,v)) + | _ => T{key=x,value=v,left=left,right=right,cnt= #cnt set} + fun insert' ((k, x), m) = insert(m, k, x) + + fun insertWithi comb (m, x, v) = let + fun insert E = T{key=x,value=v,cnt=1,left=E,right=E} + | insert (T{key,left,right,value,cnt}) = ( + case K.compare (key,x) + of GREATER => T'(key, value, insert left, right) + | LESS => T'(key, value, left, insert right) + | EQUAL => let + val v' = comb(x, value, v) + in + T{key=x,value=v',left=left,right=right,cnt=cnt} + end + (* end case *)) + in + insert m + end + fun insertWith comb = insertWithi (fn (_, x1, x2) => comb(x1, x2)) + + fun inDomain (set, x) = let + fun mem E = false + | mem (T(n as {key,left,right,...})) = (case K.compare (x,key) + of GREATER => mem right + | EQUAL => true + | LESS => mem left + (* end case *)) + in + mem set + end + + fun find (set, x) = let + fun mem E = NONE + | mem (T(n as {key,left,right,...})) = (case K.compare (x,key) + of GREATER => mem right + | EQUAL => SOME(#value n) + | LESS => mem left + (* end case *)) + in + mem set + end + + fun lookup (set, x) = let + fun mem E = raise LibBase.NotFound + | mem (T(n as {key,left,right,...})) = (case K.compare (x,key) + of GREATER => mem right + | EQUAL => #value n + | LESS => mem left + (* end case *)) + in + mem set + end + + fun remove (E,x) = raise LibBase.NotFound + | remove (set as T{key,left,right,value,...},x) = ( + case K.compare (key,x) + of GREATER => let + val (left', v) = remove(left, x) + in + (T'(key, value, left', right), v) + end + | LESS => let + val (right', v) = remove (right, x) + in + (T'(key, value, left, right'), v) + end + | _ => (delete'(left,right),value) + (* end case *)) + + fun findAndRemove arg = SOME(remove arg) handle LibBase.NotFound => NONE + + fun listItems d = let + fun d2l (E, l) = l + | d2l (T{key,value,left,right,...}, l) = + d2l(left, value::(d2l(right,l))) + in + d2l (d,[]) + end + + fun listItemsi d = let + fun d2l (E, l) = l + | d2l (T{key,value,left,right,...}, l) = + d2l(left, (key,value)::(d2l(right,l))) + in + d2l (d,[]) + end + + fun listKeys d = let + fun d2l (E, l) = l + | d2l (T{key,left,right,...}, l) = d2l(left, key::(d2l(right,l))) + in + d2l (d,[]) + end + + local + fun next ((t as T{right, ...})::rest) = (t, left(right, rest)) + | next _ = (E, []) + and left (E, rest) = rest + | left (t as T{left=l, ...}, rest) = left(l, t::rest) + fun start m = left(m, []) + in + fun equiv rngEq (m1, m2) = let + fun cmp (t1, t2) = (case (next t1, next t2) + of ((E, _), (E, _)) => true + | ((E, _), _) => false + | (_, (E, _)) => false + | ((T{key=xk, value=x, ...}, r1), (T{key=yk, value=y, ...}, r2)) => ( + case Key.compare(xk, yk) + of EQUAL => rngEq(x, y) andalso cmp (r1, r2) + | _ => false + (* end case *)) + (* end case *)) + in + cmp (start m1, start m2) + end + fun collate rngCmp (m1, m2) = let + fun cmp (t1, t2) = (case (next t1, next t2) + of ((E, _), (E, _)) => EQUAL + | ((E, _), _) => LESS + | (_, (E, _)) => GREATER + | ((T{key=xk, value=x, ...}, r1), (T{key=yk, value=y, ...}, r2)) => ( + case Key.compare(xk, yk) + of EQUAL => (case rngCmp(x, y) + of EQUAL => cmp (r1, r2) + | order => order + (* end case *)) + | order => order + (* end case *)) + (* end case *)) + in + cmp (start m1, start m2) + end + fun extends rngEx (m1, m2) = let + (* does t1 extend t2? *) + fun cmp (t1, t2) = (case (next t1, next t2) + of ((E, _), (E, _)) => true + | (_, (E, _)) => true + | ((E, _), _) => false + | ((T{key=xk, value=x, ...}, r1), (T{key=yk, value=y, ...}, r2)) => ( + case Key.compare(xk, yk) + of LESS => cmp (r1, t2) + | EQUAL => rngEx(x, y) andalso cmp (r1, r2) + | GREATER => false + (* end case *)) + (* end case *)) + in + cmp (start m1, start m2) + end + end (* local *) + + fun appi f d = let + fun app' E = () + | app' (T{key,value,left,right,...}) = ( + app' left; f(key, value); app' right) + in + app' d + end + fun app f d = let + fun app' E = () + | app' (T{value,left,right,...}) = ( + app' left; f value; app' right) + in + app' d + end + + fun mapi f d = let + fun map' E = E + | map' (T{key,value,left,right,cnt}) = let + val left' = map' left + val value' = f(key, value) + val right' = map' right + in + T{cnt=cnt, key=key, value=value', left = left', right = right'} + end + in + map' d + end + fun map f d = mapi (fn (_, x) => f x) d + + fun foldli f init d = let + fun fold (E, v) = v + | fold (T{key,value,left,right,...}, v) = + fold (right, f(key, value, fold(left, v))) + in + fold (d, init) + end + fun foldl f init d = foldli (fn (_, v, accum) => f (v, accum)) init d + + fun foldri f init d = let + fun fold (E,v) = v + | fold (T{key,value,left,right,...},v) = + fold (left, f(key, value, fold(right, v))) + in + fold (d, init) + end + fun foldr f init d = foldri (fn (_, v, accum) => f (v, accum)) init d + +(** To be implemented ** + val filter : ('a -> bool) -> 'a map -> 'a map + val filteri : (Key.ord_key * 'a -> bool) -> 'a map -> 'a map +**) + + end (* local *) + +(* the following are generic implementations of the unionWith, intersectWith, + * and mergeWith operetions. These should be specialized for the internal + * representations at some point. + *) + fun unionWith f (m1, m2) = let + fun ins f (key, x, m) = (case find(m, key) + of NONE => insert(m, key, x) + | (SOME x') => insert(m, key, f(x, x')) + (* end case *)) + in + if (numItems m1 > numItems m2) + then foldli (ins (fn (a, b) => f (b, a))) m1 m2 + else foldli (ins f) m2 m1 + end + fun unionWithi f (m1, m2) = let + fun ins f (key, x, m) = (case find(m, key) + of NONE => insert(m, key, x) + | (SOME x') => insert(m, key, f(key, x, x')) + (* end case *)) + in + if (numItems m1 > numItems m2) + then foldli (ins (fn (k, a, b) => f (k, b, a))) m1 m2 + else foldli (ins f) m2 m1 + end + + fun intersectWith f (m1, m2) = let + (* iterate over the elements of m1, checking for membership in m2 *) + fun intersect f (m1, m2) = let + fun ins (key, x, m) = (case find(m2, key) + of NONE => m + | (SOME x') => insert(m, key, f(x, x')) + (* end case *)) + in + foldli ins empty m1 + end + in + if (numItems m1 > numItems m2) + then intersect f (m1, m2) + else intersect (fn (a, b) => f(b, a)) (m2, m1) + end + fun intersectWithi f (m1, m2) = let + (* iterate over the elements of m1, checking for membership in m2 *) + fun intersect f (m1, m2) = let + fun ins (key, x, m) = (case find(m2, key) + of NONE => m + | (SOME x') => insert(m, key, f(key, x, x')) + (* end case *)) + in + foldli ins empty m1 + end + in + if (numItems m1 > numItems m2) + then intersect f (m1, m2) + else intersect (fn (k, a, b) => f(k, b, a)) (m2, m1) + end + + fun mergeWith f (m1, m2) = let + fun merge ([], [], m) = m + | merge ((k1, x1)::r1, [], m) = mergef (k1, SOME x1, NONE, r1, [], m) + | merge ([], (k2, x2)::r2, m) = mergef (k2, NONE, SOME x2, [], r2, m) + | merge (m1 as ((k1, x1)::r1), m2 as ((k2, x2)::r2), m) = ( + case Key.compare (k1, k2) + of LESS => mergef (k1, SOME x1, NONE, r1, m2, m) + | EQUAL => mergef (k1, SOME x1, SOME x2, r1, r2, m) + | GREATER => mergef (k2, NONE, SOME x2, m1, r2, m) + (* end case *)) + and mergef (k, x1, x2, r1, r2, m) = (case f (x1, x2) + of NONE => merge (r1, r2, m) + | SOME y => merge (r1, r2, insert(m, k, y)) + (* end case *)) + in + merge (listItemsi m1, listItemsi m2, empty) + end + fun mergeWithi f (m1, m2) = let + fun merge ([], [], m) = m + | merge ((k1, x1)::r1, [], m) = mergef (k1, SOME x1, NONE, r1, [], m) + | merge ([], (k2, x2)::r2, m) = mergef (k2, NONE, SOME x2, [], r2, m) + | merge (m1 as ((k1, x1)::r1), m2 as ((k2, x2)::r2), m) = ( + case Key.compare (k1, k2) + of LESS => mergef (k1, SOME x1, NONE, r1, m2, m) + | EQUAL => mergef (k1, SOME x1, SOME x2, r1, r2, m) + | GREATER => mergef (k2, NONE, SOME x2, m1, r2, m) + (* end case *)) + and mergef (k, x1, x2, r1, r2, m) = (case f (k, x1, x2) + of NONE => merge (r1, r2, m) + | SOME y => merge (r1, r2, insert(m, k, y)) + (* end case *)) + in + merge (listItemsi m1, listItemsi m2, empty) + end + + (* this is a generic implementation of filter. It should + * be specialized to the data-structure at some point. + *) + fun filter predFn m = let + fun f (key, item, m) = if predFn item + then insert(m, key, item) + else m + in + foldli f empty m + end + fun filteri predFn m = let + fun f (key, item, m) = if predFn(key, item) + then insert(m, key, item) + else m + in + foldli f empty m + end + + (* this is a generic implementation of mapPartial. It should + * be specialized to the data-structure at some point. + *) + fun mapPartial f m = let + fun g (key, item, m) = (case f item + of NONE => m + | (SOME item') => insert(m, key, item') + (* end case *)) + in + foldli g empty m + end + fun mapPartiali f m = let + fun g (key, item, m) = (case f(key, item) + of NONE => m + | (SOME item') => insert(m, key, item') + (* end case *)) + in + foldli g empty m + end + + (* check the elements of a map with a predicate and return true if + * any element satisfies the predicate. Return false otherwise. + * Elements are checked in key order. + *) + fun exists pred = let + fun exists' E = false + | exists' (T{value, left, right, ...}) = + exists' left orelse pred value orelse exists' right + in + exists' + end + fun existsi pred = let + fun exists' E = false + | exists' (T{key, value, left, right, ...}) = + exists' left orelse pred(key, value) orelse exists' right + in + exists' + end + + (* check the elements of a map with a predicate and return true if + * they all satisfy the predicate. Return false otherwise. Elements + * are checked in key order. + *) + fun all pred = let + fun all' E = true + | all' (T{value, left, right, ...}) = + all' left andalso pred value andalso all' right + in + all' + end + fun alli pred = let + fun all' E = true + | all' (T{key, value, left, right, ...}) = + all' left andalso pred(key, value) andalso all' right + in + all' + end + + end (* functor BinaryMapFn *) diff --git a/smlnj-lib/Util/binary-set-fn.sml b/smlnj-lib/Util/binary-set-fn.sml new file mode 100644 index 0000000..f412367 --- /dev/null +++ b/smlnj-lib/Util/binary-set-fn.sml @@ -0,0 +1,474 @@ +(* binary-set-fn.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This code was adapted from Stephen Adams' binary tree implementation + * of applicative integer sets. + * + * Copyright 1992 Stephen Adams. + * + * This software may be used freely provided that: + * 1. This copyright notice is attached to any copy, derived work, + * or work including all or part of this software. + * 2. Any derived work must contain a prominent notice stating that + * it has been altered from the original. + * + * Name(s): Stephen Adams. + * Department, Institution: Electronics & Computer Science, + * University of Southampton + * Address: Electronics & Computer Science + * University of Southampton + * Southampton SO9 5NH + * Great Britian + * E-mail: sra@ecs.soton.ac.uk + * + * Comments: + * + * 1. The implementation is based on Binary search trees of Bounded + * Balance, similar to Nievergelt & Reingold, SIAM J. Computing + * 2(1), March 1973. The main advantage of these trees is that + * they keep the size of the tree in the node, giving a constant + * time size operation. + * + * 2. The bounded balance criterion is simpler than N&R's alpha. + * Simply, one subtree must not have more than `weight' times as + * many elements as the opposite subtree. Rebalancing is + * guaranteed to reinstate the criterion for weight>2.23, but + * the occasional incorrect behaviour for weight=2 is not + * detrimental to performance. + * + * 3. There are two implementations of union. The default, + * hedge_union, is much more complex and usually 20% faster. I + * am not sure that the performance increase warrants the + * complexity (and time it took to write), but I am leaving it + * in for the competition. It is derived from the original + * union by replacing the split_lt(gt) operations with a lazy + * version. The `obvious' version is called old_union. + * + * 4. Most time is spent in T', the rebalancing constructor. If my + * understanding of the output of * in the sml batch + * compiler is correct then the code produced by NJSML 0.75 + * (sparc) for the final case is very disappointing. Most + * invocations fall through to this case and most of these cases + * fall to the else part, i.e. the plain contructor, + * T(v,ln+rn+1,l,r). The poor code allocates a 16 word vector + * and saves lots of registers into it. In the common case it + * then retrieves a few of the registers and allocates the 5 + * word T node. The values that it retrieves were live in + * registers before the massive save. + * + * Modified to functor to support general ordered values + *) + +functor BinarySetFn (K : ORD_KEY) :> ORD_SET where type Key.ord_key = K.ord_key = + struct + + structure Key = K + + type item = K.ord_key + + datatype set + = E + | T of { + elt : item, + cnt : int, + left : set, + right : set + } + + fun numItems E = 0 + | numItems (T{cnt,...}) = cnt + + fun isEmpty E = true + | isEmpty _ = false + + fun minItem E = raise Empty + | minItem (T{elt, left=E, ...}) = elt + | minItem (T{left, ...}) = minItem left + + fun maxItem E = raise Empty + | maxItem (T{elt, right=E, ...}) = elt + | maxItem (T{right, ...}) = maxItem right + + fun mkT(v,n,l,r) = T{elt=v,cnt=n,left=l,right=r} + + (* N(v,l,r) = T(v,1+numItems(l)+numItems(r),l,r) *) + fun N(v,E,E) = mkT(v,1,E,E) + | N(v,E,r as T{cnt=n,...}) = mkT(v,n+1,E,r) + | N(v,l as T{cnt=n,...}, E) = mkT(v,n+1,l,E) + | N(v,l as T{cnt=n,...}, r as T{cnt=m,...}) = mkT(v,n+m+1,l,r) + + fun single_L (a,x,T{elt=b,left=y,right=z,...}) = N(b,N(a,x,y),z) + | single_L _ = raise Match + fun single_R (b,T{elt=a,left=x,right=y,...},z) = N(a,x,N(b,y,z)) + | single_R _ = raise Match + fun double_L (a,w,T{elt=c,left=T{elt=b,left=x,right=y,...},right=z,...}) = + N(b,N(a,w,x),N(c,y,z)) + | double_L _ = raise Match + fun double_R (c,T{elt=a,left=w,right=T{elt=b,left=x,right=y,...},...},z) = + N(b,N(a,w,x),N(c,y,z)) + | double_R _ = raise Match + + (* + ** val weight = 3 + ** fun wt i = weight * i + *) + fun wt (i : int) = i + i + i + + fun T' (v,E,E) = mkT(v,1,E,E) + | T' (v,E,r as T{left=E,right=E,...}) = mkT(v,2,E,r) + | T' (v,l as T{left=E,right=E,...},E) = mkT(v,2,l,E) + + | T' (p as (_,E,T{left=T _,right=E,...})) = double_L p + | T' (p as (_,T{left=E,right=T _,...},E)) = double_R p + + (* these cases almost never happen with small weight*) + | T' (p as (_,E,T{left=T{cnt=ln,...},right=T{cnt=rn,...},...})) = + if lnrn then single_R p else double_R p + + | T' (p as (_,E,T{left=E,...})) = single_L p + | T' (p as (_,T{right=E,...},E)) = single_R p + + | T' (p as (v,l as T{elt=lv,cnt=ln,left=ll,right=lr}, + r as T{elt=rv,cnt=rn,left=rl,right=rr})) = + if rn >= wt ln (*right is too big*) + then + let val rln = numItems rl + val rrn = numItems rr + in + if rln < rrn then single_L p else double_L p + end + else if ln >= wt rn (*left is too big*) + then + let val lln = numItems ll + val lrn = numItems lr + in + if lrn < lln then single_R p else double_R p + end + else mkT(v,ln+rn+1,l,r) + + fun add (E,x) = mkT(x,1,E,E) + | add (set as T{elt=v,left=l,right=r,cnt},x) = + case K.compare(x,v) of + LESS => T'(v,add(l,x),r) + | GREATER => T'(v,l,add(r,x)) + | EQUAL => mkT(x,cnt,l,r) + fun add' (s, x) = add(x, s) + + fun concat3 (E,v,r) = add(r,v) + | concat3 (l,v,E) = add(l,v) + | concat3 (l as T{elt=v1,cnt=n1,left=l1,right=r1}, v, + r as T{elt=v2,cnt=n2,left=l2,right=r2}) = + if wt n1 < n2 then T'(v2,concat3(l,v,l2),r2) + else if wt n2 < n1 then T'(v1,l1,concat3(r1,v,r)) + else N(v,l,r) + + fun split_lt (E,x) = E + | split_lt (T{elt=v,left=l,right=r,...},x) = + case K.compare(v,x) of + GREATER => split_lt(l,x) + | LESS => concat3(l,v,split_lt(r,x)) + | _ => l + + fun split_gt (E,x) = E + | split_gt (T{elt=v,left=l,right=r,...},x) = + case K.compare(v,x) of + LESS => split_gt(r,x) + | GREATER => concat3(split_gt(l,x),v,r) + | _ => r + + fun min (T{elt=v,left=E,...}) = v + | min (T{left=l,...}) = min l + | min _ = raise Match + + fun delmin (T{left=E,right=r,...}) = r + | delmin (T{elt=v,left=l,right=r,...}) = T'(v,delmin l,r) + | delmin _ = raise Match + + fun delete' (E,r) = r + | delete' (l,E) = l + | delete' (l,r) = T'(min r,l,delmin r) + + fun concat (E, s) = s + | concat (s, E) = s + | concat (t1 as T{elt=v1,cnt=n1,left=l1,right=r1}, + t2 as T{elt=v2,cnt=n2,left=l2,right=r2}) = + if wt n1 < n2 then T'(v2,concat(t1,l2),r2) + else if wt n2 < n1 then T'(v1,l1,concat(r1,t2)) + else T'(min t2,t1, delmin t2) + + + local + fun trim (lo,hi,E) = E + | trim (lo,hi,s as T{elt=v,left=l,right=r,...}) = + if K.compare(v,lo) = GREATER + then if K.compare(v,hi) = LESS then s else trim(lo,hi,l) + else trim(lo,hi,r) + + fun uni_bd (s,E,_,_) = s + | uni_bd (E,T{elt=v,left=l,right=r,...},lo,hi) = + concat3(split_gt(l,lo),v,split_lt(r,hi)) + | uni_bd (T{elt=v,left=l1,right=r1,...}, + s2 as T{elt=v2,left=l2,right=r2,...},lo,hi) = + concat3(uni_bd(l1,trim(lo,v,s2),lo,v), + v, + uni_bd(r1,trim(v,hi,s2),v,hi)) + (* inv: lo < v < hi *) + + (* all the other versions of uni and trim are + * specializations of the above two functions with + * lo=-infinity and/or hi=+infinity + *) + + fun trim_lo (_, E) = E + | trim_lo (lo,s as T{elt=v,right=r,...}) = + case K.compare(v,lo) of + GREATER => s + | _ => trim_lo(lo,r) + + fun trim_hi (_, E) = E + | trim_hi (hi,s as T{elt=v,left=l,...}) = + case K.compare(v,hi) of + LESS => s + | _ => trim_hi(hi,l) + + fun uni_hi (s,E,_) = s + | uni_hi (E,T{elt=v,left=l,right=r,...},hi) = + concat3(l,v,split_lt(r,hi)) + | uni_hi (T{elt=v,left=l1,right=r1,...}, + s2 as T{elt=v2,left=l2,right=r2,...},hi) = + concat3(uni_hi(l1,trim_hi(v,s2),v),v,uni_bd(r1,trim(v,hi,s2),v,hi)) + + fun uni_lo (s,E,_) = s + | uni_lo (E,T{elt=v,left=l,right=r,...},lo) = + concat3(split_gt(l,lo),v,r) + | uni_lo (T{elt=v,left=l1,right=r1,...}, + s2 as T{elt=v2,left=l2,right=r2,...},lo) = + concat3(uni_bd(l1,trim(lo,v,s2),lo,v),v,uni_lo(r1,trim_lo(v,s2),v)) + + fun uni (s,E) = s + | uni (E,s) = s + | uni (T{elt=v,left=l1,right=r1,...}, + s2 as T{elt=v2,left=l2,right=r2,...}) = + concat3(uni_hi(l1,trim_hi(v,s2),v), v, uni_lo(r1,trim_lo(v,s2),v)) + + in + val hedge_union = uni + end + + (* The old_union version is about 20% slower than + * hedge_union in most cases + *) + fun old_union (E,s2) = s2 + | old_union (s1,E) = s1 + | old_union (T{elt=v,left=l,right=r,...},s2) = + let val l2 = split_lt(s2,v) + val r2 = split_gt(s2,v) + in + concat3(old_union(l,l2),v,old_union(r,r2)) + end + + val empty = E + fun singleton x = T{elt=x,cnt=1,left=E,right=E} + + fun addList (s,l) = List.foldl (fn (i,s) => add(s,i)) s l + + fun fromList l = addList (E, l) + + val add = add + + fun member (set, x) = let + fun pk E = false + | pk (T{elt=v, left=l, right=r, ...}) = ( + case K.compare(x,v) + of LESS => pk l + | EQUAL => true + | GREATER => pk r + (* end case *)) + in + pk set + end + + local + (* true if every item in t is in t' *) + fun treeIn (t,t') = let + fun isIn E = true + | isIn (T{elt,left=E,right=E,...}) = member(t',elt) + | isIn (T{elt,left,right=E,...}) = + member(t',elt) andalso isIn left + | isIn (T{elt,left=E,right,...}) = + member(t',elt) andalso isIn right + | isIn (T{elt,left,right,...}) = + member(t',elt) andalso isIn left andalso isIn right + in + isIn t + end + in + fun isSubset (E,_) = true + | isSubset (_,E) = false + | isSubset (t as T{cnt=n,...},t' as T{cnt=n',...}) = + (n<=n') andalso treeIn (t,t') + + fun equal (E,E) = true + | equal (t as T{cnt=n,...},t' as T{cnt=n',...}) = + (n=n') andalso treeIn (t,t') + | equal _ = false + end + + local + fun next ((t as T{right, ...})::rest) = (t, left(right, rest)) + | next _ = (E, []) + and left (E, rest) = rest + | left (t as T{left=l, ...}, rest) = left(l, t::rest) + in + fun compare (s1, s2) = let + fun cmp (t1, t2) = (case (next t1, next t2) + of ((E, _), (E, _)) => EQUAL + | ((E, _), _) => LESS + | (_, (E, _)) => GREATER + | ((T{elt=e1, ...}, r1), (T{elt=e2, ...}, r2)) => ( + case Key.compare(e1, e2) + of EQUAL => cmp (r1, r2) + | order => order + (* end case *)) + (* end case *)) + in + cmp (left(s1, []), left(s2, [])) + end + + fun disjoint (s1, s2) = let + fun walk (t1, t2) = (case (next t1, next t2) + of ((E, _), _) => true + | (_, (E, _)) => true + | ((T{elt=e1, ...}, r1), (T{elt=e2, ...}, r2)) => ( + case Key.compare(e1, e2) + of LESS => walk (r1, t2) + | EQUAL => false + | GREATER => walk (t1, r2) + (* end case *)) + (* end case *)) + in + walk (left(s1, []), left(s2, [])) + end + end + + fun delete (E,x) = raise LibBase.NotFound + | delete (set as T{elt=v,left=l,right=r,...},x) = + case K.compare(x,v) of + LESS => T'(v,delete(l,x),r) + | GREATER => T'(v,l,delete(r,x)) + | _ => delete'(l,r) + + val union = hedge_union + + fun intersection (E, _) = E + | intersection (_, E) = E + | intersection (s, T{elt=v,left=l,right=r,...}) = let + val l2 = split_lt(s,v) + val r2 = split_gt(s,v) + in + if member(s,v) + then concat3(intersection(l2,l),v,intersection(r2,r)) + else concat(intersection(l2,l),intersection(r2,r)) + end + + fun difference (E,s) = E + | difference (s,E) = s + | difference (s, T{elt=v,left=l,right=r,...}) = + let val l2 = split_lt(s,v) + val r2 = split_gt(s,v) + in + concat(difference(l2,l),difference(r2,r)) + end + + fun subtract (s, item) = difference (s, singleton item) + fun subtract' (item, s) = subtract (s, item) + + fun subtractList (l, items) = let + val items' = List.foldl (fn (x, set) => add(set, x)) E items + in + difference (l, items') + end + + fun map f set = let + fun map'(acc, E) = acc + | map'(acc, T{elt,left,right,...}) = + map' (add (map' (acc, left), f elt), right) + in + map' (E, set) + end + + fun mapPartial f set = let + fun map' (acc, E) = acc + | map' (acc, T{elt, left, right, ...}) = let + val acc = map' (acc, left) + in + case f elt + of NONE => map' (acc, right) + | SOME elt' => map' (add (acc, elt'), right) + (* end case *) + end + in + map' (E, set) + end + + fun app apf = + let fun apply E = () + | apply (T{elt,left,right,...}) = + (apply left;apf elt; apply right) + in + apply + end + + fun foldl f b set = let + fun foldf (E, b) = b + | foldf (T{elt,left,right,...}, b) = + foldf (right, f(elt, foldf (left, b))) + in + foldf (set, b) + end + + fun foldr f b set = let + fun foldf (E, b) = b + | foldf (T{elt,left,right,...}, b) = + foldf (left, f(elt, foldf (right, b))) + in + foldf (set, b) + end + + fun toList set = foldr (op::) [] set + + fun filter pred set = + foldl (fn (item, s) => if (pred item) then add(s, item) else s) + empty set + + fun partition pred set = + foldl + (fn (item, (s1, s2)) => + if (pred item) then (add(s1, item), s2) else (s1, add(s2, item)) + ) + (empty, empty) set + + fun exists p E = false + | exists p (T{elt, left, right,...}) = + (exists p left) orelse (p elt) orelse (exists p right) + + fun all p E = true + | all p (T{elt, left, right,...}) = + (all p left) andalso (p elt) andalso (all p right) + + fun find p E = NONE + | find p (T{elt,left,right,...}) = (case find p left + of NONE => if (p elt) + then SOME elt + else find p right + | a => a + (* end case *)) + + (* deprecated *) + val listItems = toList + + end (* BinarySetFn *) diff --git a/smlnj-lib/Util/bit-array-sig.sml b/smlnj-lib/Util/bit-array-sig.sml new file mode 100644 index 0000000..ee4466a --- /dev/null +++ b/smlnj-lib/Util/bit-array-sig.sml @@ -0,0 +1,116 @@ +(* bit-array-sig.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Signature for mutable bit array. The model here treats bit array as an + * array of bools. + *) + +signature BIT_ARRAY = + sig + + include MONO_ARRAY + + val fromString : string -> array option + (* The string argument gives a hexadecimal + * representation of the bits set in the + * array. Characters 0-9, a-f and A-F are + * allowed. For example, + * fromString "1af8" = 0001101011111000 + * (by convention, 0 corresponds to false and 1 corresponds + * to true, bit 0 appears on the right, + * and indices increase to the left) + * The length of the array will be 4*(size string). + * Returns NONE if a non-hexadecimal character + * appears in the string. + *) + + val bits : (int * int list) -> array + (* Create array of the given length with the indices of its set bits + * given by the list argument. + * Raises Subscript if a list item is < 0 or >= length. + *) + + val getBits : array -> int list + (* Returns list of bits set in bit array, in increasing + * order of indices. + *) + + val toString : array -> string + (* Inverse of stringToBits. + * The bit array is zero-padded to the next + * length that is a multiple of 4. + *) + + val isZero : array -> bool + (* Returns true if and only if no bits are set. *) + + val extend0 : (array * int) -> array + val extend1 : (array * int) -> array + (* Extend bit array by 0's or 1's to given length. + * If bit array is already >= argument length, return a copy + * of the bit array. + * Raises Size if length < 0. + *) + + val eqBits : (array * array) -> bool + (* true if set bits are identical *) + val equal : (array * array) -> bool + (* true if same length and same set bits *) + + val andb : (array * array * int) -> array + val orb : (array * array * int) -> array + val xorb : (array * array * int) -> array + (* Create new array of the given length + * by logically combining bits of original + * array using and, or and xor, respectively. + * If necessary, the array are + * implicitly extended by 0 to be the same length + * as the new array. + *) + + val notb : array -> array + (* Create new array with all bits of original + * array inverted. + *) + + val >> : (array * word) -> array + val << : (array * word) -> array + (* shift operations *) + + val lshift : (array * int) -> array (* DEPRECATED *) + (* lshift(ba,n) creates a new array by + * inserting n 0's on the right of ba. + * The new array has length n + length ba. + *) + + val rshift : (array * int) -> array (* DEPRECATED *) + (* rshift(ba,n) creates a new array of + * of length max(0,length ba - n) consisting + * of bits n,n+1,...,length ba - 1 of ba. + * If n >= length ba, the new array has length 0. + *) + + (* mutable operations for array *) + + val setBit : (array * int) -> unit + val clrBit : (array * int) -> unit + (* Update value at given index to new value. + * Raises Subscript if index < 0 or >= length. + * setBit(ba,i) = update(ba,i,true) + * clrBit(ba,i) = update(ba,i,false) + *) + + val union : array -> array -> unit + val intersection : array -> array -> unit + (* Or (and) second bitarray into the first. Second is + * implicitly truncated or extended by 0's to match + * the length of the first. + *) + + val complement : array -> unit + (* Invert all bits. *) + + end (* BIT_ARRAY *) + where type elem = bool diff --git a/smlnj-lib/Util/bit-array.sml b/smlnj-lib/Util/bit-array.sml new file mode 100644 index 0000000..a034760 --- /dev/null +++ b/smlnj-lib/Util/bit-array.sml @@ -0,0 +1,801 @@ +(* bit-array.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +structure BitArray :> BIT_ARRAY = + struct + + structure Vector = + struct + local + structure W8A = Word8Array + structure W8V = Word8Vector + val sub = W8A.sub + val << = Word8.<< + val >> = Word8.>> + val ++ = Word8.orb + val & = Word8.andb + infix sub << >> ++ & + + fun badArg(f,msg) = LibBase.failure{module="BitArray",func=f,msg=msg} + + val hexs = Byte.stringToBytes "0123456789abcdef" + val lomask = W8V.fromList [0wx00, 0wx01, 0wx03, 0wx07, + 0wx0f, 0wx1f, 0wx3f, 0wx7f, 0wxff] + val himask = W8V.fromList [0wxff, 0wxfe, 0wxfc, 0wxf8, + 0wxf0, 0wxe0, 0wxc0, 0wx80, 0wx00] + fun hibits i = W8V.sub(himask,i) + fun lobits i = W8V.sub(lomask,i) + fun wmask7 i = Word.andb(Word.fromInt i, 0w7) + val mask7 = Word.toIntX o wmask7 + + (* return the number of bytes needed to represent the given number of bits *) + fun sizeOf n = ((n + 7) div 8) + + (* return the index of byte that holds bit i *) + fun byteOf i = (i div 8) + + (* return the mask for bit i in a byte *) + fun bit i : Word8.word = (0w1 << Word.andb(Word.fromInt i, 0w7)) + + in + + (* A bitvector is stored in a Word8Array.array. + * Bit n is stored in bit (n mod 8) of word (n div 8). + * We maintain the invariant that all bits >= nbits are 0. + *) + type elem = bool + datatype vector = BA of {nbits : int, bits : W8A.array} + + val maxLen = 8*Word8Array.maxLen + handle Overflow => Word8Array.maxLen (* would valOf Int.maxInt be better? *) + + fun array (0, init) = BA{nbits=0,bits=W8A.array(0, 0w0)} + | array (len, false) = BA{nbits=len,bits=W8A.array(sizeOf len, 0w0)} + | array (len, true) = let + val sz = sizeOf len + val bits = W8A.array (sz, 0wxff) + in + case len mod 8 of 0 => () | idx => W8A.update(bits,sz-1,lobits idx); + BA{nbits = len, bits = bits} + end + + val char0 = Byte.charToByte #"0" + val char9 = Byte.charToByte #"9" + val charA = Byte.charToByte #"A" + val charF = Byte.charToByte #"F" + val chara = Byte.charToByte #"a" + val charf = Byte.charToByte #"f" + fun fromString s = let + val len = 4*(size s) (* 4 bits per hex digit *) + val (bv as BA{bits, ...}) = array (len, false) + fun nibble x = let + val d = Byte.charToByte x + in + if (char0 <= d) andalso (d <= char9) + then d - char0 + else if (charA <= d) andalso (d <= charF) + then d - charA + 0w10 + else if (chara <= d) andalso (d <= charf) + then d - chara + 0w10 + else raise Domain + end + fun init ([], _) = SOME bv + | init ([x], i) = (W8A.update(bits, i, nibble x); SOME bv) + | init (x1::x2::r, i) = ( + W8A.update(bits, i, ((nibble x2) << 0w4) ++ (nibble x1)); + init (r, i+1)) + in + init (rev(explode s), 0) + end + handle _ => NONE + + fun toString (BA{nbits=0,...}) = "" + | toString (BA{nbits,bits}) = let + val len = (nbits + 3) div 4 + val buf = W8A.array (len, 0w0) + fun put (i,j) = let + val v = bits sub i + in + W8A.update (buf, j, W8V.sub(hexs, Word8.toInt(v & 0wx0f))); + W8A.update (buf, j-1, W8V.sub(hexs, Word8.toInt(v >> 0w4))); + put (i+1, j-2) + end + in + (put(0,len-1)) handle _ => (); + Byte.bytesToString (W8A.vector buf) + end + + fun bits (len,l) = let + val (bv as BA{bits, ...}) = array (len, false) + fun init i = let + val idx = byteOf i + val b = 0w1 << Word.andb(Word.fromInt i, 0w7) + in + W8A.update (bits, idx, ((bits sub idx) ++ b)) + end + in + List.app init l; + bv + end + + fun fromList [] = array (0, false) + | fromList l = let + val len = length l + val ba as BA{bits,...} = array (len, false) + fun getbyte ([],_,b) = ([],b) + | getbyte (l,0w0,b) = (l,b) + | getbyte (false::r,bit,b) = getbyte(r,bit << 0w1,b) + | getbyte (true::r,bit,b) = getbyte(r,bit << 0w1,b ++ bit) + fun fill ([],_) = () + | fill (l,idx) = let + val (l',byte) = getbyte (l,0w1,0w0) + in + if byte <> 0w0 then W8A.update(bits,idx,byte) else (); + fill (l',idx+1) + end + in + fill (l,0); + ba + end + + fun tabulate (len, genf) = let + val ba as BA{bits,...} = array (len, false) + fun getbyte (cnt,0w0,b) = (cnt,b) + | getbyte (cnt,bit,b) = + if cnt = len then (cnt,b) + else case genf cnt of + false => getbyte(cnt+1,bit << 0w1,b) + | true => getbyte(cnt+1,bit << 0w1,b ++ bit) + fun fill (cnt,idx) = + if cnt = len then () + else let + val (cnt',byte) = getbyte (cnt,0w1,0w0) + in + if byte <> 0w0 then W8A.update(bits,idx,byte) else (); + fill (cnt',idx+1) + end + in + fill (0,0); + ba + end + + fun getBits (BA{nbits = 0, ...}) = [] + | getBits (BA{nbits, bits}) = let + fun extractBits (_, 0w0, l) = l + | extractBits (bit, data, l) = let + val l' = if (data & 0wx80) = 0w0 then l else bit :: l + in + extractBits (bit-1, data<<0w1, l') + end + fun extract (~1, _, l) = l + | extract (i, bitnum, l) = + extract (i-1,bitnum-8,extractBits (bitnum, bits sub i, l)) + val maxbit = nbits-1 + val hiByte = byteOf maxbit + val delta = Word.andb(Word.fromInt maxbit, 0w7) + in + extract (hiByte-1, maxbit - (Word.toIntX delta) - 1, + extractBits(maxbit,(bits sub hiByte) << (0w7-delta),[])) + end + + fun bitOf (BA{nbits, bits}, i) = + if i >= nbits + then raise Subscript + else ((W8A.sub (bits,byteOf i)) & (bit i)) <> 0w0 + + fun isZero (BA{bits,...}) = let + fun isz i = (bits sub i) = 0w0 andalso (isz (i+1)) + in + isz 0 + end handle _ => true + + fun copybits (bits, newbits) = let + fun cpy i = (W8A.update(newbits, i, bits sub i); cpy(i+1)) + in + (cpy 0) handle _ => () + end + + fun mkCopy (BA{nbits, bits}) = let + val ba as BA{bits=newbits,...} = array (nbits, false) + in + copybits(bits, newbits); + ba + end + + fun eqBits arg = let + fun order (arg as (ba as BA{nbits,...},ba' as BA{nbits=nbits',...})) = + if nbits >= nbits' + then arg + else (ba',ba) + val (BA{nbits,bits}, BA{bits=bits',nbits=nbits'}) = order arg + val minlen = W8A.length bits' + fun iszero i = (bits sub i) = 0w0 andalso (iszero (i+1)) + fun cmp i = + if i = minlen + then iszero i + else (bits sub i) = (bits' sub i) andalso cmp(i+1) + in + (cmp 0) handle _ => true + end + fun equal (arg as (BA{nbits,...},BA{nbits=nbits',...})) = + nbits = nbits' andalso eqBits arg + + fun extend0 (ba as BA{nbits, bits}, n) = + if (nbits >= n) + then mkCopy ba + else let + val newbits = W8A.array(sizeOf n, 0w0) + fun cpy i = (W8A.update(newbits, i, bits sub i); cpy(i+1)) + in + (cpy 0) handle _ => (); + BA{nbits=n, bits=newbits} + end + + fun extend1 (ba as BA{nbits, bits}, n) = + if (nbits >= n) + then mkCopy ba + else let + val len = sizeOf n + val newbits = W8A.array(len, 0wxff) + val nbytes = byteOf nbits + val left = mask7 nbits + fun last () = (case mask7 n + of 0 => () + | lft => W8A.update(newbits, len-1, (newbits sub (len-1)) & (lobits lft)) + (* end case *)) + fun adjust j = ( + if left = 0 + then () + else W8A.update(newbits, j, (bits sub j) ++ (hibits left)); + last ()) + fun cpy i = + if i = nbytes + then adjust i + else (W8A.update(newbits, i, bits sub i); cpy(i+1)) + in + cpy 0; + BA{nbits=n, bits=newbits} + end + + fun fit(lb,rb,rbits) = (rb & (lobits rbits)) ++ (lb & (hibits rbits)) + + fun simpleCopy (src,dest,lastbyte,len) arg = let + fun last (s,d) = (case mask7 len + of 0 => W8A.update(dest,d,src sub s) + | lft => W8A.update(dest,d,fit(dest sub d,src sub s,lft)) + (* end case *)) + fun cpy (arg as (s,d)) = + if d = lastbyte + then last arg + else (W8A.update(dest,d,src sub s);cpy(s+1,d+1)) + in + cpy arg + end + + (* rightblt copies bits [shft,shft+len-1] of src to + * bits [0,len-1] in target. + * Assume all parameters and lengths are okay. + *) + fun rightblt (src,dest,shft,len) = let + val byte = byteOf shft and bitshift = wmask7 shft + fun copy lastbyte = let + val lshift = 0w8 - bitshift + fun finish (sb,s,d) = let + val left = mask7 (len - 1) + 1 + in + if Word.fromInt left <= lshift (* enough bits in sb *) + then W8A.update(dest,d,fit(dest sub d,sb >> bitshift,left)) + else let + val sb' = (sb >> bitshift) ++ ((src sub s) << lshift) + in + W8A.update(dest,d,fit(dest sub d,sb',left)) + end + end + fun loop (arg as (sb, s, d)) = + if d = lastbyte then finish arg + else let + val sb' = src sub s + in + W8A.update(dest,d,(sb >> bitshift) ++ ((sb' << lshift) & 0wxFF)); + loop(sb',s+1,d+1) + end + in + loop (src sub byte, byte+1, 0) + end + in + if bitshift = 0w0 then simpleCopy (src,dest,byteOf(len-1),len) (byte,0) + else copy (byteOf (len-1)) + end + + (* leftblt copies bits [0,len-1] of src to + * bits [shft,shft+len-1] in target. + * Assume all parameters and lengths are okay. + *) + fun leftblt (_,_,_,0) = () + | leftblt (src,dest,shft,len) = let + val byte = byteOf shft and bitshift = wmask7 shft + val lastbyte = byteOf (shft+len-1) + fun sliceCopy (s,d,len) = let + val mask = (lobits len) << bitshift + val sb = ((src sub s) << bitshift) & mask + val db = (dest sub d) & (Word8.notb mask) + in + W8A.update(dest,d,sb ++ db) + end + fun copy () = let + val sb = src sub 0 + val rshift = 0w8 - bitshift + fun finish (sb,s,d) = let + val left = mask7(shft + len - 1) + 1 + in + if Word.fromInt left <= bitshift (* enough bits in sb *) + then W8A.update(dest,d,fit(dest sub d,sb >> rshift,left)) + else let + val sb' = (sb >> rshift) ++ ((src sub s) << bitshift) + in + W8A.update(dest,d,fit(dest sub d,sb',left)) + end + end + fun loop (arg as (sb, s, d)) = + if d = lastbyte then finish arg + else let + val sb' = src sub s + in + W8A.update(dest,d,(sb >> rshift) ++ ((sb' << bitshift) & 0wxFF)); + loop(sb',s+1,d+1) + end + in + W8A.update(dest,byte,fit(sb << bitshift, dest sub byte, Word.toIntX bitshift)); + loop (sb, 1, byte+1) + end + in + if bitshift = 0w0 then simpleCopy (src,dest,lastbyte,len) (0,byte) + else if lastbyte = byte then sliceCopy (0,byte,len) + else copy () + end + + fun lshift_w (ba, 0w0) = mkCopy ba + | lshift_w (BA{nbits, bits}, n) = let + val shft = Word.toIntX n + val newlen = nbits + shft + val newbits = W8A.array(sizeOf newlen, 0w0) + in + leftblt(bits,newbits,shft,nbits); + BA{nbits=newlen,bits=newbits} + end + + fun lshift (ba, shft) = if shft < 0 + then badArg("lshift", "negative shift") + else lshift_w(ba, Word.fromInt shft) + + fun op @ (BA{nbits,bits},BA{nbits=nbits',bits=bits'}) = let + val newlen = nbits + nbits' + val newbits = W8A.array(sizeOf newlen, 0w0) + in + copybits(bits',newbits); + leftblt(bits,newbits,nbits',nbits); + BA{nbits=newlen,bits=newbits} + end + + fun concat [] = array (0, false) + | concat [ba] = mkCopy ba + | concat (l as (BA{bits,nbits}::tl)) = let + val newlen = (foldl (fn (BA{nbits,...},a) => a+nbits) 0 l) + handle Overflow => raise Size + val newbits = W8A.array(sizeOf newlen,0w0) + fun cpy (BA{bits,nbits}, shft) = ( + leftblt(bits,newbits,shft,nbits); + shft+nbits + ) + in + copybits(bits,newbits); + foldl cpy nbits tl; + BA{nbits=newlen,bits=newbits} + end + + fun slice (ba as BA{nbits,bits},sbit,0) = array (0, false) + | slice (ba as BA{nbits,bits},sbit,len) = let + val newbits = W8A.array(sizeOf len,0w0) + in + rightblt(bits,newbits,sbit,len); + BA{nbits=len,bits=newbits} + end + + fun extract (ba as BA{nbits,bits},sbit,SOME len) = + if sbit < 0 orelse len < 0 orelse sbit > nbits - len + then raise Subscript + else slice (ba,sbit,len) + | extract (ba as BA{nbits,bits},sbit,NONE) = + if sbit < 0 orelse sbit > nbits + then raise Subscript + else slice (ba,sbit,nbits-sbit) + + fun rshift_w (ba, 0w0) = mkCopy ba + | rshift_w (BA{nbits, bits}, n) = let + val shft = Word.toIntX n + in + if shft >= nbits + then array (0, false) + else let + val newlen = nbits - shft + val newbits = W8A.array(sizeOf newlen,0w0) + in + rightblt (bits, newbits, shft, newlen); + BA{nbits=newlen, bits=newbits} + end + end + + fun rshift (ba, shft) = if shft < 0 + then badArg("rshift", "negative shift") + else rshift_w(ba, Word.fromInt shft) + + fun trim (tgt,len) = + case mask7 len of + 0 => () + | lft => let + val n = (W8A.length tgt) - 1 + in + W8A.update(tgt, n, (tgt sub n) & (lobits lft)) + end + + fun andBlend (BA{nbits,bits},BA{bits=bits',nbits=nbits'},tgt,len) = let + fun copy i = (W8A.update(tgt,i,(bits sub i)&(bits' sub i));copy(i+1)) + in + (copy 0) handle _ => (); + trim (tgt,len) + end + + fun orBlend f (ba,ba',tgt,len) = let + fun order (arg as (ba as BA{nbits,...},ba' as BA{nbits=nbits',...})) = + if nbits >= nbits' + then arg + else (ba',ba) + val (BA{nbits,bits},BA{bits=bits',nbits=nbits'}) = order (ba,ba') + val bnd = W8A.length bits' (* number of bytes in smaller array *) + fun copy2 i = (W8A.update(tgt,i,bits sub i);copy2(i+1)) + fun copy1 i = + if i = bnd + then copy2 bnd + else (W8A.update(tgt,i,f(bits sub i, bits' sub i));copy1(i+1)) + in + (copy1 0) handle _ => (); + trim (tgt,len) + end + + fun newblend blendf (ba,ba',len) = let + val nb as BA{bits,...} = array (len, false) + in + blendf(ba,ba',bits,len); + nb + end + + val orb = newblend (orBlend Word8.orb) + val xorb = newblend (orBlend Word8.xorb) + val andb = newblend andBlend + + fun union ba ba' = let + val BA{bits,nbits} = ba + val BA{bits=bits',nbits=nbits'} = ba' + val nbytes = W8A.length bits + val nbytes' = W8A.length bits' + fun copy bnd = let + fun loop i = + if i = bnd + then () + else (W8A.update(bits,i,(bits sub i) ++ (bits' sub i));loop(i+1)) + in + loop 0 + end + in + if nbytes <= nbytes' + then (copy nbytes; trim (bits,nbits)) + else copy nbytes' + end + + fun intersection ba ba' = let + val BA{bits,nbits} = ba + val BA{bits=bits',nbits=nbits'} = ba' + val nbytes = W8A.length bits + val nbytes' = W8A.length bits' + fun zeroFrom(b,j) = let + fun loop i = (W8A.update(b,i,0w0);loop(i+1)) + in + (loop j) handle _ => () + end + in + if nbytes <= nbytes' + then andBlend(ba,ba',bits,nbytes * 8) + else ( + andBlend(ba,ba',bits,nbytes' * 8); + zeroFrom (bits,nbytes') + ) + end + + fun flip (nbits, src, tgt) = let + val nbytes = byteOf nbits and left = mask7 nbits + fun last j = + W8A.update(tgt,j,(Word8.notb(src sub j)) & (lobits left)) + fun flp i = + if i = nbytes + then if left = 0 then () else last i + else (W8A.update(tgt,i,Word8.notb(src sub i) & 0wxff); flp(i+1)) + in + flp 0 + end + + fun notb (BA{nbits, bits}) = let + val ba as BA{bits=newbits,...} = array (nbits, false) + in + flip (nbits,bits,newbits); + ba + end + + fun setBit (BA{nbits, bits}, i) = let + val j = byteOf i and b = bit i + in + if i >= nbits + then raise Subscript + else W8A.update (bits, j, ((bits sub j) ++ b)) + end + + fun clrBit (BA{nbits, bits}, i) = let + val j = byteOf i and b = Word8.notb(bit i) + in + if i >= nbits + then raise Subscript + else W8A.update (bits, j, ((bits sub j) & b)) + end + + fun complement (BA{bits,nbits}) = flip(nbits, bits, bits) + + fun update (ba,i,true) = setBit (ba,i) + | update (ba,i,_) = clrBit (ba,i) + + fun op sub arg = bitOf arg + + fun length (BA{nbits, ...}) = nbits + + fun app f (BA{nbits=0,bits}) = () + | app f (BA{nbits,bits}) = let + val last = byteOf (nbits-1) + fun loop (0,_) = () + | loop (n,byte) = (f ((byte&0w1) = 0w1); loop (n-1,byte >> 0w1)) + fun f' (i,byte) = + if i < last then loop (8,byte) + else loop (mask7 (nbits - 1) + 1, byte) + in + W8A.appi f' bits + end + + (* FIX: Reimplement using W8A.foldi *) + fun foldl f a (BA{nbits,bits}) = let + fun loop (i,a) = + if i = nbits then a + else let + val b = ((W8A.sub (bits,byteOf i)) & (bit i)) <> 0w0 + in + loop (i+1, f(b,a)) + end + in + loop (0,a) + end + + (* FIX: Reimplement using W8A.foldr *) + fun foldr f a (BA{nbits,bits}) = let + fun loop (~1,a) = a + | loop (i,a) = let + val b = ((W8A.sub (bits,byteOf i)) & (bit i)) <> 0w0 + in + loop (i-1, f(b,a)) + end + in + loop (nbits-1,a) + end + + fun valid (nbits,sbit,NONE) = + if sbit < 0 orelse sbit > nbits + then raise Subscript + else nbits - sbit + | valid (nbits,sbit,SOME len) = + if sbit < 0 orelse len < 0 orelse sbit > nbits - len + then raise Subscript + else len + + (* FIX: Reimplement using W8A.appi *) + fun appi' f (BA{nbits=0,bits},_,_) = () + | appi' f (BA{nbits,bits},sbit,l) = let + val len = valid (nbits, sbit, l) + fun loop (_, 0) = () + | loop (i, n) = let + val b = ((W8A.sub (bits,byteOf i)) & (bit i)) <> 0w0 + in + f(i,b); + loop (i+1,n-1) + end + in + loop (sbit,len) + end + + (* FIX: Reimplement using W8A.foldi *) + fun foldli' f a (BA{nbits,bits},sbit,l) = let + val len = valid (nbits, sbit, l) + val last = sbit+len + fun loop (i,a) = + if i = last then a + else let + val b = ((W8A.sub (bits,byteOf i)) & (bit i)) <> 0w0 + in + loop (i+1, f(i,b,a)) + end + in + loop (sbit,a) + end + + (* FIX: Reimplement using W8A.foldr *) + fun foldri' f a (BA{nbits,bits},sbit,l) = let + val len = valid (nbits, sbit, l) + fun loop (i,a) = + if i < sbit then a + else let + val b = ((W8A.sub (bits,byteOf i)) & (bit i)) <> 0w0 + in + loop (i-1, f(i,b,a)) + end + in + loop (sbit+len-1,a) + end + + (* FIX: Reimplement using general-purpose copy *) + fun copy' {src = src as BA{nbits,bits},si,len,dst,di} = let + val l = valid (nbits, si, len) + val BA{nbits=nbits',bits=bits'} = dst + val _ = if di < 0 orelse nbits' - di < l then raise Subscript + else () + val last = si + l + fun loop (si,di) = + if si = last then () + else ( + if bitOf (src, si) then setBit(dst,di) else clrBit(dst,di); + loop (si+1,di+1) + ) + in + loop (si,di) + end + + fun modify f (BA{nbits=0,bits}) = () + | modify f (BA{nbits,bits}) = let + val last = byteOf (nbits-1) + fun loop (0,_,a,_) = a + | loop (n,byte,a,mask) = + if f ((byte&mask) = mask) + then loop (n-1,byte, a&mask, mask << 0w1) + else loop (n-1,byte, a, mask << 0w1) + fun f' (i,byte) = + if i < last then loop (8,byte,0w0,0w1) + else loop (mask7 (nbits - 1) + 1, byte,0w0,0w1) + in + W8A.modifyi f' bits + end + + (* FIX: Reimplement using W8A.modifyi *) + fun modifyi' f (BA{nbits=0,bits},sbit,l) = () + | modifyi' f (BA{nbits,bits},sbit,l) = let + val len = valid (nbits, sbit, l) + val last = sbit+len + fun loop i = + if i = last then () + else let + val index = byteOf i + val biti = bit i + val byte = W8A.sub (bits,index) + val b = (byte & biti) <> 0w0 + val b' = f(i,b) + in + if b = b' then () + else if b' then W8A.update(bits,index,byte ++ biti) + else W8A.update(bits,index,byte & (Word8.notb biti)); + loop (i+1) + end + in + loop sbit + end + + (* additional operation from Basis Library proposal 2015-003 *) + fun toList (BA{nbits, bits}) = let + fun loop (~1, a) = a + | loop (i, a) = let + val b = ((Word8Array.sub (bits,byteOf i)) & (bit i)) <> 0w0 + in + loop (i-1, b :: a) + end + in + loop (nbits-1, []) + end + + end (* local *) + + val << = lshift_w + val >> = rshift_w + end (* structure Vector *) + + open Vector + type array = vector + + fun vector a = a + + fun copy { src, dst, di } = copy' { src = src, dst = dst, di = di, + si = 0, len = NONE } + + val copyVec = copy + + fun appi f a = appi' f (a, 0, NONE) + fun modifyi f a = modifyi' f (a, 0, NONE) + fun foldli f init a = foldli' f init (a, 0, NONE) + fun foldri f init a = foldri' f init (a, 0, NONE) + + (* These are slow, pedestrian implementations.... *) + fun findi p a = let + val len = length a + fun fnd i = + if i >= len then NONE + else let val x = sub (a, i) + in + if p (i, x) then SOME (i, x) else fnd (i + 1) + end + in + fnd 0 + end + + fun find p a = let + val len = length a + fun fnd i = + if i >= len then NONE + else let val x = sub (a, i) + in + if p x then SOME x else fnd (i + 1) + end + in + fnd 0 + end + + fun exists p a = let + val len = length a + fun ex i = i < len andalso (p (sub (a, i)) orelse ex (i + 1)) + in + ex 0 + end + + fun all p a = let + val len = length a + fun al i = i >= len orelse (p (sub (a, i)) andalso al (i + 1)) + in + al 0 + end + + fun collate c (a1, a2) = let + val l1 = length a1 + val l2 = length a2 + val l12 = Int.min (l1, l2) + fun col i = + if i >= l12 then Int.compare (l1, l2) + else case c (sub (a1, i), sub (a2, i)) of + EQUAL => col (i + 1) + | unequal => unequal + in + col 0 + end + + (* additional operations from Basis Library proposal 2015-003 *) + fun fromVector (Vector.BA{nbits, bits}) = let + val newBits = Unsafe.Word8Array.create(Word8Array.length bits) + in + Word8Array.copy{di = 0, dst = newBits, src = bits}; + BA{nbits = nbits, bits = newBits} + end + + val toVector = fromVector + + end (* structure BitArray *) diff --git a/smlnj-lib/Util/bsearch-fn.sml b/smlnj-lib/Util/bsearch-fn.sml new file mode 100644 index 0000000..9f046dd --- /dev/null +++ b/smlnj-lib/Util/bsearch-fn.sml @@ -0,0 +1,45 @@ +(* bsearch-fn.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Binary searching on sorted monomorphic arrays. + *) + +functor BSearchFn (A : MONO_ARRAY) : sig + + structure A : MONO_ARRAY + + val bsearch : (('a * A.elem) -> order) + -> ('a * A.array) -> (int * A.elem) option + (* binary search on ordered monomorphic arrays. The comparison function + * cmp embeds a projection function from the element type to the key + * type. + *) + + end = struct + + structure A = A + + (* binary search on ordered monomorphic arrays. The comparison function + * cmp embeds a projection function from the element type to the key + * type. + *) + fun bsearch cmp (key, arr) = let + fun look (lo, hi) = + if hi >= lo then let + val m = lo + (hi - lo) div 2 + val x = A.sub(arr, m) + in + case cmp(key, x) + of LESS => look(lo, m-1) + | EQUAL => (SOME(m, x)) + | GREATER => look(m+1, hi) + (* end case *) + end + else NONE + in + look (0, A.length arr - 1) + end + + end; (* BSearch *) diff --git a/smlnj-lib/Util/char-map-sig.sml b/smlnj-lib/Util/char-map-sig.sml new file mode 100644 index 0000000..62dff66 --- /dev/null +++ b/smlnj-lib/Util/char-map-sig.sml @@ -0,0 +1,26 @@ +(* char-map-sig.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Fast, read-only, maps from characters to values. + *) + +signature CHAR_MAP = + sig + + type 'a char_map + (* a finite map from characters to 'a *) + + val mkCharMap : {default : 'a, bindings : (string * 'a) list} -> 'a char_map + (* make a character map which maps the bound characters to their + * bindings and maps everything else to the default value. + *) + + val mapChr : 'a char_map -> char -> 'a + (* map the given character *) + val mapStrChr : 'a char_map -> (string * int) -> 'a + (* (mapStrChr c (s, i)) is equivalent to (mapChr c (String.sub(s, i))) *) + + end (* CHAR_MAP *) + diff --git a/smlnj-lib/Util/char-map.sml b/smlnj-lib/Util/char-map.sml new file mode 100644 index 0000000..eab4c94 --- /dev/null +++ b/smlnj-lib/Util/char-map.sml @@ -0,0 +1,40 @@ +(* char-map.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Fast, read-only, maps from characters to values. + *) + +structure CharMap :> CHAR_MAP = + struct + + (* we can use unchecked array operations, since the indices are always + * in range. + *) + val sub = Unsafe.Array.sub + val update = Unsafe.Array.update + + (* a finite map from characters to 'a *) + type 'a char_map = 'a Array.array + + (* make a character map which maps the bound characters to their + * bindings and maps everything else to the default value. + *) + fun mkCharMap {default, bindings} = let + (* this array maps characters to indices in the valMap *) + val arr = Array.array (Char.maxOrd, default) + fun doBinding (s, v) = + CharVector.app (fn c => update(arr, Char.ord c, v)) s + in + List.map doBinding bindings; + arr + end + + (* map the given character ordinal *) + fun mapChr cm i = sub(cm, Char.ord i) + + (* (mapStrChr c (s, i)) is equivalent to (mapChr c (String.sub(s, i))) *) + fun mapStrChr cm (s, i) = sub(cm, Char.ord(String.sub(s, i))) + + end (* CharMap *) diff --git a/smlnj-lib/Util/dynamic-array-fn.sml b/smlnj-lib/Util/dynamic-array-fn.sml new file mode 100644 index 0000000..a40e35e --- /dev/null +++ b/smlnj-lib/Util/dynamic-array-fn.sml @@ -0,0 +1,124 @@ +(* dynamic-array-fn.sml + * + * COPYRIGHT (c) 2023 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Monomorphic arrays of unbounded length + * + * TODO: add the missing operations that the DynamicArray structure + * provides. Adding these will require including the MONO_ARRAY_SLICE + * structure as a functor parameter. + *) + +functor DynamicArrayFn (A : MONO_ARRAY) : MONO_DYNAMIC_ARRAY = + struct + + type elem = A.elem + + (* BLOCK(arr, dflt, bnd): + * arr - current data store; is at least !bnd+1 elements with arr[0..bnd] + * being the range of "defined" entries. + * dflt - default value + * bnd - values at indices above !bnd are default for reading + *) + datatype array = BLOCK of A.array ref * elem * int ref + + exception Subscript = General.Subscript + exception Size = General.Size + + fun array (sz, dflt) = BLOCK(ref (A.array (sz, dflt)), dflt, ref (~1)) + + (* fromList (l, v) creates an array using the list of values l + * plus the default value v. + *) + fun fromList (initList, dflt) = let + val arr = A.fromList initList + in + BLOCK(ref arr, dflt, ref(A.length arr - 1)) + end + + fun toList (BLOCK(ref arr, _, bnd)) = let + val len = !bnd + 1 + in + List.tabulate (len, fn i => A.sub(arr, i)) + end + + (* tabulate (sz, fill, dflt) acts like Array.tabulate, plus + * stores default value dflt. Raises Size if sz < 0. + *) + fun tabulate (sz, fillFn, dflt) = + BLOCK(ref(A.tabulate(sz, fillFn)), dflt, ref(sz-1)) + + (* create a new dynamic array with the elements from arr[lo..hi] *) + fun subArray' (BLOCK(arr, dflt, bnd), lo, hi) = let + val arrval = !arr + val bnd = !bnd + (* copy the [lo..top] elements from the source array, where top is + * the minimum of `bnd` and `hi`. + *) + fun make top = BLOCK( + ref(A.tabulate(top-lo+1, fn i => A.sub(arrval, i+lo))), + dflt, + ref(top-lo)) + in + if hi <= bnd + (* the new array is a slice of the defined range *) + then make hi + else if (lo <= bnd) + (* the new array includes both the "defined" and default ranges *) + then make bnd + (* the new array only includes the default range *) + else array(0, dflt) + end + + (* check that the specified bounds are valid and then call subArray' to do + * the actual work. + *) + fun subArray (da, lo, hi) = + if (lo < 0) orelse (hi < lo-1) + then raise Size + else subArray' (da, lo, hi) + + fun default (BLOCK(_, dflt, _)) = dflt + + fun sub (BLOCK(arr, dflt, _), idx) = (A.sub(!arr, idx)) + handle Subscript => if idx < 0 then raise Subscript else dflt + + fun bound (BLOCK(_, _, bnd)) = (!bnd) + + fun expand (arr, oldlen, newlen, dflt) = let + fun fillfn i = if i < oldlen then A.sub(arr,i) else dflt + in + A.tabulate(newlen, fillfn) + end + + fun update (BLOCK(arr, dflt, bnd), idx, v) = let + val len = A.length (!arr) + in + if idx >= len + then arr := expand(!arr, len, Int.max(len+len, idx+1), dflt) + else (); + A.update(!arr, idx, v); + if idx > !bnd then bnd := idx else () + end + + fun truncate (a as BLOCK(arr, dflt, bndref), sz) = let + val bnd = !bndref + val newbnd = sz - 1 + val arr_val = !arr + val array_sz = A.length arr_val + fun fillDflt (i, stop) = + if i = stop then () + else (A.update(arr_val, i, dflt); fillDflt(i-1, stop)) + in + if newbnd < 0 then (bndref := ~1; arr := A.array(0, dflt)) + else if newbnd >= bnd then () + else if 3 * sz < array_sz then let + val BLOCK(arr', _, bnd') = subArray'(a, 0, newbnd) + in + (bndref := !bnd'; arr := !arr') + end + else (bndref := newbnd; fillDflt(bnd, newbnd)) + end + + end (* DynamicArrayFn *) diff --git a/smlnj-lib/Util/dynamic-array-sig.sml b/smlnj-lib/Util/dynamic-array-sig.sml new file mode 100644 index 0000000..f644963 --- /dev/null +++ b/smlnj-lib/Util/dynamic-array-sig.sml @@ -0,0 +1,95 @@ +(* dynamic-array-sig.sml + * + * COPYRIGHT (c) 2023 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Signature for unbounded polymorphic arrays. + *) + +signature DYNAMIC_ARRAY = + sig + type 'a array + + val array : (int * 'a) -> 'a array + (* array (sz, e) creates an unbounded array all of whose elements + * are initialized to e. sz (>= 0) is used as a + * hint of the potential range of indices. Raises Size if a + * negative hint is given. + *) + + val subArray : ('a array * int * int) -> 'a array + (* `subArray (a, lo, hi)` creates a new array with the same default + * as `a`, and whose values in the range [0,hi-lo] are equal to + * the values in `a` in the range [lo, hi]. + * Raises Size if lo < 0 or hi < lo-1. + *) + + val fromList : 'a list * 'a -> 'a array + (* fromList (l, v) creates an array using the list of values l + * plus the default value v. + *) + + val fromVector : 'a Vector.vector * 'a -> 'a array + (* fromVector (vec, dflt) creates an array using the vector of values vec + * plus the default value dflt. + *) + + val toList : 'a array -> 'a list + (* return the array's contents as a list *) + + val toVector : 'a array -> 'a vector + (* return the array's contents as a vector *) + + val tabulate: (int * (int -> 'a) * 'a) -> 'a array + (* tabulate (sz, fill, dflt) acts like Array.tabulate, plus + * stores default value dflt. Raises Size if sz < 0. + *) + + val default : 'a array -> 'a + (* default returns array's default value *) + + val sub : ('a array * int) -> 'a + (* sub (a,idx) returns value of the array at index idx. + * If that value has not been set by update, it returns the default value. + * Raises Subscript if idx < 0 + *) + + val update : ('a array * int * 'a) -> unit + (* update (a,idx,v) sets the value at index idx of the array to v. + * Raises Subscript if idx < 0 + *) + + val bound : 'a array -> int + (* bound returns an upper bound on the index of values that have been + * changed; i.e., values at indices above the bound are the default. + *) + + val truncate : ('a array * int) -> unit + (* truncate (a,sz) makes every entry with index > sz the default value *) + + (* standard array iterators *) + val appi : (int * 'a -> unit) -> 'a array -> unit + val app : ('a -> unit) -> 'a array -> unit + val modifyi : (int * 'a -> 'a) -> 'a array -> unit + val modify : ('a -> 'a) -> 'a array -> unit + val foldli : (int * 'a * 'b -> 'b) -> 'b -> 'a array -> 'b + val foldri : (int * 'a * 'b -> 'b) -> 'b -> 'a array -> 'b + val foldl : ('a * 'b -> 'b) -> 'b -> 'a array -> 'b + val foldr : ('a * 'b -> 'b) -> 'b -> 'a array -> 'b + val findi : (int * 'a -> bool) -> 'a array -> (int * 'a) option + val find : ('a -> bool) -> 'a array -> 'a option + val exists : ('a -> bool) -> 'a array -> bool + val all : ('a -> bool) -> 'a array -> bool + val collate : ('a * 'a -> order) -> 'a array * 'a array -> order + +(* TODO + val copy : {di:int, dst:'a array, src:'a array} -> unit + val copyVec : {di:int, dst:'a array, src:'a vector} -> unit +*) + + val vector : 'a array -> 'a vector + (* return the array's contents as a vector. + * Note: this function is DEPRECATED in favor of toVector. + *) + + end (* DYNAMIC_ARRAY *) diff --git a/smlnj-lib/Util/dynamic-array.sml b/smlnj-lib/Util/dynamic-array.sml new file mode 100644 index 0000000..aea3e85 --- /dev/null +++ b/smlnj-lib/Util/dynamic-array.sml @@ -0,0 +1,157 @@ +(* dynamic-array.sml + * + * COPYRIGHT (c) 2023 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Polymorhic arrays of unbounded length + *) + +structure DynamicArray :> DYNAMIC_ARRAY = + struct + + structure A = Array + + (* BLOCK(arr, dflt, bnd): + * arr - current data store; is at least !bnd+1 elements with arr[0..bnd] + * being the range of "defined" entries. + * dflt - default value + * bnd - values at indices above !bnd are default for reading + *) + datatype 'a array = BLOCK of ('a A.array ref * 'a * int ref) + + exception Subscript = General.Subscript + exception Size = General.Size + + fun array (sz, dflt) = BLOCK(ref(A.array(sz, dflt)), dflt, ref(~1)) + + (* fromList (l, v) creates an array using the list of values l + * plus the default value v. + *) + fun fromList (initList, dflt) = let + val arr = A.fromList initList + in + BLOCK(ref arr, dflt, ref(A.length arr - 1)) + end + + fun toList (BLOCK(ref arr, _, bnd)) = let + val len = !bnd + 1 + in + List.tabulate (len, fn i => A.sub(arr, i)) + end + + fun fromVector (vec, dflt) = let + val arr = A.fromVector vec + in + BLOCK(ref arr, dflt, ref(Vector.length vec - 1)) + end + + fun toVector (BLOCK(ref arr, _, bnd)) = let + val len = !bnd + 1 + in + ArraySlice.vector (ArraySlice.slice(arr, 0, SOME len)) + end + + (* tabulate (sz, fill, dflt) acts like Array.tabulate, plus + * stores default value dflt. Raises Size if sz < 0. + *) + fun tabulate (sz, fillFn, dflt) = + BLOCK(ref(A.tabulate(sz, fillFn)), dflt, ref(sz-1)) + + (* create a new dynamic array with the elements from arr[lo..hi] *) + fun subArray' (BLOCK(arr, dflt, bnd), lo, hi) = let + val arrval = !arr + val bnd = !bnd + (* copy the [lo..top] elements from the source array, where top is + * the minimum of `bnd` and `hi`. + *) + fun make top = BLOCK( + ref(A.tabulate(top-lo+1, fn i => A.sub(arrval, i+lo))), + dflt, + ref(top-lo)) + in + if hi <= bnd + (* the new array is a slice of the defined range *) + then make hi + else if (lo <= bnd) + (* the new array includes both the "defined" and default ranges *) + then make bnd + (* the new array only includes the default range *) + else array(0, dflt) + end + + (* check that the specified bounds are valid and then call subArray' to do + * the actual work. + *) + fun subArray (da, lo, hi) = + if (lo < 0) orelse (hi < lo-1) + then raise Size + else subArray' (da, lo, hi) + + fun default (BLOCK(_, dflt, _)) = dflt + + fun sub (BLOCK(arr, dflt, _), idx) = (A.sub(!arr, idx)) + handle Subscript => if idx < 0 then raise Subscript else dflt + + fun bound (BLOCK(_, _, bnd)) = (!bnd) + + fun expand (arr, oldlen, newlen, dflt) = let + fun fillfn i = if i < oldlen then A.sub(arr,i) else dflt + in + A.tabulate(newlen, fillfn) + end + + fun update (BLOCK(arr, dflt, bnd), idx, v) = let + val len = A.length (!arr) + in + if idx >= len + then arr := expand(!arr, len, Int.max(len+len, idx+1), dflt) + else (); + A.update(!arr, idx, v); + if idx > !bnd then bnd := idx else () + end + + fun truncate (a as BLOCK(arr, dflt, bndref), sz) = let + val bnd = !bndref + val newbnd = sz - 1 + val arr_val = !arr + val array_sz = A.length arr_val + fun fillDflt (i, stop) = + if i = stop then () + else (A.update(arr_val, i, dflt); fillDflt(i-1, stop)) + in + if newbnd < 0 then (bndref := ~1; arr := A.array(0, dflt)) + else if newbnd >= bnd then () + else if 3 * sz < array_sz then let + val BLOCK(arr', _, bnd') = subArray'(a, 0, newbnd) + in + (bndref := !bnd'; arr := !arr') + end + else (bndref := newbnd; fillDflt(bnd, newbnd)) + end + + (* get the array slice that covers the defined portion of the array *) + fun slice (BLOCK(arr, _, bnd)) = + ArraySlice.slice(!arr, 0, SOME(!bnd + 1)) + + (* we implement the iterators by using the array slice operations *) + fun vector arr = ArraySlice.vector (slice arr) + fun appi f arr = ArraySlice.appi f (slice arr) + fun app f arr = ArraySlice.app f (slice arr) + fun modifyi f arr = ArraySlice.modifyi f (slice arr) + fun modify f arr = ArraySlice.modify f (slice arr) + fun foldli f init arr = ArraySlice.foldli f init (slice arr) + fun foldri f init arr = ArraySlice.foldri f init (slice arr) + fun foldl f init arr = ArraySlice.foldl f init (slice arr) + fun foldr f init arr = ArraySlice.foldr f init (slice arr) + fun findi pred arr = ArraySlice.findi pred (slice arr) + fun find pred arr = ArraySlice.find pred (slice arr) + fun exists pred arr = ArraySlice.exists pred (slice arr) + fun all pred arr = ArraySlice.all pred (slice arr) + fun collate cmp (arr1, arr2) = ArraySlice.collate cmp (slice arr1, slice arr2) + +(* TODO + val copy : {di:int, dst:'a array, src:'a array} -> unit + val copyVec : {di:int, dst:'a array, src:'a vector} -> unit +*) + + end (* DynamicArrayFn *) diff --git a/smlnj-lib/Util/edit-distance.sml b/smlnj-lib/Util/edit-distance.sml new file mode 100644 index 0000000..b6030cd --- /dev/null +++ b/smlnj-lib/Util/edit-distance.sml @@ -0,0 +1,116 @@ +(* edit-distance.sml + * + * Compute the "optimal string alignment" (or Levenshtein) distance. We + * allow four kinds of edits: deletion, insertion, replacement, and + * transposition of adjacent characters. + * + * The implementation is based on the pseudocode for "OSA" distance at + * + * https://en.wikipedia.org/wiki/Damerau–Levenshtein_distance + * + * COPYRIGHT (c) 2023 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +structure EditDistance : sig + + val distance : string * string -> int + + end = struct + +(* safe version + val stringSub = String.sub + val arraySub = Array.sub + val arrayUpdate = Array.update +*) + (* unsafe string operations *) + val stringSub = Unsafe.CharVector.sub + val arraySub = Unsafe.Array.sub + val arrayUpdate = Unsafe.Array.update + + fun min3 (a : int, b, c) = if (a <= b) + then if (a <= c) then a else c + else if (b <= c) then b else c + + fun distance ("", b) = size b + | distance (a, "") = size a + | distance (a, b) = let + val na = size a + val nb = size b + (* [0..na]x[0..nb] array of costs *) + val d = Array.array((na+1)*(nb+1), ~1) + fun get (i, j) = arraySub(d, i*nb + j) + fun set (i, j, k) = arrayUpdate(d, i*nb + j, k) + (* compute min cost for position [i,j], where `cost` is the cost + * of replacement at position [i,j]. Transposition cost is handled + * separately. + *) + fun editCost (i, j, cost) = min3( + get(i-1, j) + 1, (* deletion cost *) + get(i, j-1) + 1, (* insertion cost *) + get(i-1, j-1) + cost) (* substitution cost *) + (* initialization for d[-, 0] and d[0, -] *) + val _ = let + fun init1 i = if (i <= na) then (set(i, 0, i); init1(i+1)) else () + fun init2 j = if (j <= nb) then (set(0, j, j); init2(j+1)) else () + in + init1 0; init2 0 + end + (* for the first character position in either string, transposition is + * not an option, so we handle those separately. + *) + val a1 = stringSub(a, 0) + val b1 = stringSub(b, 0) + val _ = let + fun update (i, j, cost) = set (i, j, editCost (i, j, cost)) + (* loop for i=1 *) + fun lp1 j = if (j <= nb) + then ( + if (a1 = stringSub(b, j-1)) + then update (1, j, 0) + else update (1, j, 1); + lp1 (j+1)) + else () + (* loop for j=1 *) + fun lp2 i = if (i <= na) + then ( + if (stringSub(a, i-1) = b1) + then update (i, 1, 0) + else update (i, 1, 1); + lp2 (i+1)) + else () + in + lp1 1; lp2 1 + end + (* loop for i = 2..na *) + fun lpi (i, aim1) = if (i <= na) + then let + val ai = stringSub(a, i-1) + (* loop for j = 2..nb *) + fun lpj (j, bjm1) = if (j <= nb) + then let + val bj = stringSub(b, j-1) + fun body cost = let + val dij = editCost(i, j, cost) + in + (* check for transposition *) + if (aim1 = bj) andalso (bjm1 = ai) + then set(i, j, Int.min(dij, get(i-2, j-2))) + else set(i, j, dij) + end + in + if (ai = bj) then body 0 else body 1; + lpj (j+1, bj) + end + else () + in + lpj (2, b1); lpi (i+1, ai) + end + else () + in + lpi (2, a1); + (* get the distance *) + get(na, nb) + end + + end diff --git a/smlnj-lib/Util/fifo-sig.sml b/smlnj-lib/Util/fifo-sig.sml new file mode 100644 index 0000000..dd34c5e --- /dev/null +++ b/smlnj-lib/Util/fifo-sig.sml @@ -0,0 +1,30 @@ +(* fifo-sig.sml + * + * COPYRIGHT (c) 1993 by AT&T Bell Laboratories. See COPYRIGHT file for details. + * + * Applicative fifos + * + *) + +signature FIFO = + sig + type 'a fifo + + exception Dequeue + + val empty : 'a fifo + val isEmpty : 'a fifo -> bool + val enqueue : 'a fifo * 'a -> 'a fifo + val dequeue : 'a fifo -> 'a fifo * 'a + val next : 'a fifo -> ('a * 'a fifo) option + val delete : ('a fifo * ('a -> bool)) -> 'a fifo + val head : 'a fifo -> 'a + val peek : 'a fifo -> 'a option + val length : 'a fifo -> int + val contents : 'a fifo -> 'a list + val app : ('a -> unit) -> 'a fifo -> unit + val map : ('a -> 'b) -> 'a fifo -> 'b fifo + val foldl : ('a * 'b -> 'b) -> 'b -> 'a fifo -> 'b + val foldr : ('a * 'b -> 'b) -> 'b -> 'a fifo -> 'b + + end (* FIFO *) diff --git a/smlnj-lib/Util/fifo.sml b/smlnj-lib/Util/fifo.sml new file mode 100644 index 0000000..c565264 --- /dev/null +++ b/smlnj-lib/Util/fifo.sml @@ -0,0 +1,61 @@ +(* fifo.sml + * + * COPYRIGHT (c) 1993 by AT&T Bell Laboratories. See COPYRIGHT file for details. + * + * Applicative fifos + * + *) + +structure Fifo : FIFO = + struct + datatype 'a fifo = Q of {front: 'a list, rear: 'a list} + + exception Dequeue + + val empty = Q{front=[],rear=[]} + + fun isEmpty (Q{front=[],rear=[]}) = true + | isEmpty _ = false + + fun enqueue (Q{front,rear},x) = Q{front=front,rear=(x::rear)} + + fun dequeue (Q{front=(hd::tl),rear}) = (Q{front=tl,rear=rear},hd) + | dequeue (Q{rear=[],...}) = raise Dequeue + | dequeue (Q{rear,...}) = dequeue(Q{front=rev rear,rear=[]}) + + fun next (Q{front=(hd::tl),rear}) = SOME(hd, Q{front=tl,rear=rear}) + | next (Q{rear=[],...}) = NONE + | next (Q{rear,...}) = next(Q{front=rev rear,rear=[]}) + + fun delete (Q{front, rear}, pred) = let + fun doFront [] = {front = doRear(rev rear), rear = []} + | doFront (x::r) = if (pred x) + then {front = r, rear = rear} + else let val {front, rear} = doFront r + in {front = x :: front, rear = rear} end + and doRear [] = [] + | doRear (x::r) = if (pred x) then r else x :: (doRear r) + in + Q(doFront front) + end + + fun peek (Q{front=(hd::_), ...}) = SOME hd + | peek (Q{rear=[], ...}) = NONE + | peek (Q{rear, ...}) = SOME(hd(rev rear)) + + fun head (Q{front=(hd::_),...}) = hd + | head (Q{rear=[],...}) = raise Dequeue + | head (Q{rear,...}) = hd(rev rear) + + fun length (Q {rear,front}) = (List.length rear) + (List.length front) + + fun contents (Q {rear, front}) = (front @ (rev rear)) + + fun app f (Q{front,rear}) = (List.app f front; List.app f (List.rev rear)) + fun map f (Q{front,rear}) = + Q{front = List.map f front, rear = rev(List.map f(rev rear))} + fun foldl f b (Q{front,rear}) = List.foldr f (List.foldl f b front) rear + fun foldr f b (Q{front,rear}) = List.foldr f (List.foldl f b rear) front + + end + diff --git a/smlnj-lib/Util/fmt-fields.sml b/smlnj-lib/Util/fmt-fields.sml new file mode 100644 index 0000000..4c54648 --- /dev/null +++ b/smlnj-lib/Util/fmt-fields.sml @@ -0,0 +1,243 @@ +(* fmt-fields.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This module defines types and routines that are common to both + * the Format and Scan structures. + *) + +structure FmtFields : sig + + (* precompiled format specifiers *) + datatype sign + = DfltSign (* default: put a sign on negative numbers *) + | AlwaysSign (* "+" always has sign (+ or -) *) + | BlankSign (* " " put a blank in the sign field for positive numbers *) + + datatype neg_sign + = MinusSign (* default: use "-" for negative numbers *) + | TildeSign (* "~" use "~" for negative numbers *) + + type field_flags = { + sign : sign, + neg_char : neg_sign, + zero_pad : bool, + base : bool, + large : bool + } + + datatype field_wid = NoPad | Wid of int + + datatype real_format + = F_Format (* "%f" *) + | E_Format of bool (* "%e" or "%E" *) + | G_Format of bool (* "%g" or "%G" *) + + datatype field_type + = OctalField + | IntField + | HexField + | CapHexField + | CharField + | BoolField + | StrField of int option + | RealField of {prec : int, format : real_format} + + datatype fmt_spec + = Raw of substring + | CharSet of char -> bool + | Field of (field_flags * field_wid * field_type) + + datatype fmt_item + = ATOM of Atom.atom + | LINT of LargeInt.int + | INT of Int.int + | LWORD of LargeWord.word + | WORD of Word.word + | WORD8 of Word8.word + | BOOL of bool + | CHR of char + | STR of string + | REAL of Real.real + | LREAL of LargeReal.real + | LEFT of (int * fmt_item) (* left justify in field of given width *) + | RIGHT of (int * fmt_item) (* right justify in field of given width *) + + exception BadFormat (* bad format string *) + + val scanFieldSpec : substring -> (fmt_spec * substring) + val scanField : substring -> (fmt_spec * substring) + + end = struct + + structure SS = Substring + structure SC = StringCvt + + (* precompiled format specifiers *) + datatype sign + = DfltSign (* default: put a sign on negative numbers *) + | AlwaysSign (* "+" always has sign (+ or -) *) + | BlankSign (* " " put a blank in the sign field for positive numbers *) + + datatype neg_sign + = MinusSign (* default: use "-" for negative numbers *) + | TildeSign (* "~" use "~" for negative numbers *) + + type field_flags = { + sign : sign, + neg_char : neg_sign, + zero_pad : bool, + base : bool, + large : bool + } + + datatype field_wid = NoPad | Wid of int + + datatype real_format + = F_Format (* "%f" *) + | E_Format of bool (* "%e" or "%E" *) + | G_Format of bool (* "%g" or "%G" *) + + datatype field_type + = OctalField + | IntField + | HexField + | CapHexField + | CharField + | BoolField + | StrField of int option + | RealField of {prec : int, format : real_format} + + datatype fmt_spec + = Raw of substring + | CharSet of char -> bool + | Field of (field_flags * field_wid * field_type) + + datatype fmt_item + = ATOM of Atom.atom + | LINT of LargeInt.int + | INT of Int.int + | LWORD of LargeWord.word + | WORD of Word.word + | WORD8 of Word8.word + | BOOL of bool + | CHR of char + | STR of string + | REAL of Real.real + | LREAL of LargeReal.real + | LEFT of (int * fmt_item) (* left justify in field of given width *) + | RIGHT of (int * fmt_item) (* right justify in field of given width *) + + exception BadFormat (* bad format string *) + + (* string to int conversions *) + val decToInt : (char, substring) SC.reader -> (Int.int, substring) SC.reader + = Int.scan SC.DEC + + (* scan a field specification. Assume that the previous character in the + * base string was "%" and that the first character in the substring fmtStr + * is not "%". + *) + fun scanFieldSpec fmtStr = let + val (fmtStr, flags) = let + fun doFlags (ss, flags : field_flags) = ( + case (SS.getc ss, flags) + of (SOME(#" ", _), {sign=AlwaysSign, ...}) => + raise BadFormat + | (SOME(#" ", ss'), _) => + doFlags (ss', { + sign = BlankSign, neg_char = #neg_char flags, + zero_pad = #zero_pad flags, base = #base flags, + large = #large flags + }) + | (SOME(#"+", _), {sign=BlankSign, ...}) => + raise BadFormat + | (SOME(#"+", ss'), _) => + doFlags (ss', { + sign = AlwaysSign, neg_char = #neg_char flags, + zero_pad = #zero_pad flags, base = #base flags, + large = #large flags + }) + | (SOME(#"~", ss'), _) => + doFlags (ss', { + sign = #sign flags, neg_char = TildeSign, + zero_pad = #zero_pad flags, base = #base flags, + large = #large flags + }) + | (SOME(#"-", _), {neg_char = TildeSign, ...}) => + raise BadFormat + | (SOME(#"-", ss'), _) => + doFlags (ss', { + sign = #sign flags, neg_char = MinusSign, + zero_pad = #zero_pad flags, base = #base flags, + large = #large flags + }) + | (SOME(#"#", ss'), _) => + doFlags (ss', { + sign = #sign flags, neg_char = #neg_char flags, + zero_pad = #zero_pad flags, base = true, + large = #large flags + }) + | (SOME(#"0", ss'), _) => + doFlags (ss', { + sign = #sign flags, neg_char = #neg_char flags, + zero_pad = true, base = #base flags, + large = #large flags + }) + | _ => (ss, flags) + (* end case *)) + in + doFlags (fmtStr, { + sign = DfltSign, neg_char = MinusSign, + zero_pad = false, base = false, large = false + }) + end + val (wid, fmtStr) = if (Char.isDigit(valOf(SS.first fmtStr))) + then let + val (n, fmtStr) = valOf (decToInt SS.getc fmtStr) + in (Wid n, fmtStr) end + else (NoPad, fmtStr) + val (ty, fmtStr) = (case SS.getc fmtStr + of (SOME(#"d", ss)) => (IntField, ss) + | (SOME(#"X", ss)) => (CapHexField, ss) + | (SOME(#"x", ss)) => (HexField, ss) + | (SOME(#"o", ss)) => (OctalField, ss) + | (SOME(#"c", ss)) => (CharField, ss) + | (SOME(#"s", ss)) => (StrField NONE, ss) + | (SOME(#"b", ss)) => (BoolField, ss) + | (SOME(#".", ss)) => let +(* NOTE: "." ought to be allowed for d,X,x,o and s formats as it is in ANSI C *) + val (n, ss) = (case decToInt SS.getc ss + of SOME(n, ss') => (n, ss') + | NONE => (0, ss) + (* end case *)) + fun real (format, ss) = (RealField{prec = n, format = format}, ss) + in + case SS.getc ss + of (SOME(#"E" , ss))=> real(E_Format true, ss) + | (SOME(#"e" , ss))=> real(E_Format false, ss) + | (SOME(#"f" , ss))=> real(F_Format, ss) + | (SOME(#"G" , ss))=> real(G_Format true, ss) + | (SOME(#"g", ss)) => real(G_Format false, ss) + | (SOME(#"s", ss)) => (StrField(SOME n), ss) + | _ => raise BadFormat + (* end case *) + end + | (SOME(#"E", ss)) => (RealField{prec=6, format=E_Format true}, ss) + | (SOME(#"e", ss)) => (RealField{prec=6, format=E_Format false}, ss) + | (SOME(#"f", ss)) => (RealField{prec=6, format=F_Format}, ss) + | (SOME(#"G", ss)) => (RealField{prec=6, format=G_Format true}, ss) + | (SOME(#"g", ss)) => (RealField{prec=6, format=G_Format false}, ss) + | _ => raise BadFormat + (* end case *)) + in + (Field(flags, wid, ty), fmtStr) + end (* scanFieldSpec *) + + fun scanField fmtStr = (case SS.getc fmtStr + of (SOME(#"%", fmtStr')) => (Raw(SS.slice(fmtStr, 0, SOME 1)), fmtStr') + | _ => scanFieldSpec fmtStr + (* end case *)) + + end diff --git a/smlnj-lib/Util/fnv-hash.sml b/smlnj-lib/Util/fnv-hash.sml new file mode 100644 index 0000000..3647d9f --- /dev/null +++ b/smlnj-lib/Util/fnv-hash.sml @@ -0,0 +1,37 @@ +(* fnv-hash.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * The interface to an implementation of the Fowler–Noll–Vo (FNV) hashing + * algorithm. We use the 64-bit FNV-1a algorithm. + * + * See https://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function for details. + *) + +structure FNVHash : sig + + val offsetBasis : Word64.word + + val hashByte : Word8.word * Word64.word -> Word64.word + val hashChar : char * Word64.word -> Word64.word + + val hashString : string -> word + val hashSubstring : substring -> word + + end = struct + + (* values from https://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function *) + val offsetBasis : Word64.word = 0wxcbf29ce484222325 + val prime : Word64.word = 0wx00000100000001B3 + + fun hashOne (b, h) = Word64.xorb(b, h) * prime + + fun hashByte (b, h) = hashOne (Word64.fromLargeWord(Word8.toLargeWord b), h) + + fun hashChar (c, h) = hashOne (Word64.fromInt(Char.ord c), h) + + fun hashString s = Word.fromLarge (CharVector.foldl hashChar offsetBasis s) + fun hashSubstring ss = Word.fromLarge (Substring.foldl hashChar offsetBasis ss) + + end diff --git a/smlnj-lib/Util/format-comb-sig.sml b/smlnj-lib/Util/format-comb-sig.sml new file mode 100644 index 0000000..0aa9c42 --- /dev/null +++ b/smlnj-lib/Util/format-comb-sig.sml @@ -0,0 +1,179 @@ +(* format-comb-sig.sml + * + * COPYRIGHT (c) 2007 The Fellowship of SML/NJ + * + * Well-typed "printf" for SML, aka "Unparsing Combinators". + * This code was written by Matthias Blume (2002). Inspiration + * obtained from Olivier Danvy's "Functional Unparsing" work. + * + * Description: + * + * The idea is to use combinators for constructing something akin to + * the format string of C's printf function. The difference is, however, + * that our formats aren't strings. Instead, format( fragment)s have + * meaningful types, and passing them to function "format" results + * in a curried function whose arguments have precisely the types that + * correspond to argument-consuming parts of the format. (Such + * argument-consuming parts are similar to the %-specifications of printf.) + * + * Here is how the typing works: There is an underlying notion of + * "abstract formats" of type 'a format. However, the user operates + * at the level of "format fragments" which have type ('a, 'b) + * fragment and are typically polymorphic in 'a (where 'b is + * instantiated to some type containing 'a). Fragments are + * functions from formats to formats and can be composed freely using + * the function composition operator 'o'. This form of format + * composition translates to a corresponding concatenation of the + * resulting output. + * + * Fragments are composed from two kids of primitve fragments called + * "elements" and "glue", respectively. An "element" is a fragment that + * consumes some argument (which thanks to the typing magic appears as a + * curried argument when the format gets executed). As "glue" we refer + * to fragments that do not consume arguments but merely insert fixed + * text (fixed at format construction time) into the output. + * + * There are also adjustment operations that pad, trim, or fit the output + * of entire fragments (primitive or not) to a given size. + * + * A number of elements and some glue has been predefined. Here are + * examples on how to use this facility: + * + * open FormatComb + * + * format nothing ==> "" + * + * format int ==> fn: int -> string + * format int 1234 ==> "1234" + * + * format (text "The square of " o int o text " is " o int o text ".") + * ==> fn: int -> int -> string + * format (text "The square of " o int o text " is " o int o text ".") 2 4 + * ==> "The square of 2 is 4." + * + * format (int o bool o char) ==> fn : int -> bool -> char -> string + * format (int o bool o char) 1 true #"x" + * ==> "1truex" + * + * format (glue string "glue vs. " o string o glue int 42 o sp 5 o int) + * "ordinary text " 17 + * ==> "glue vs. ordinary text 42 17" + * + * Fragments can be padded, trimmed, or fitted to generate text pieces of + * specified sizes. Padding/trimming/fitting may be nested. + * The operations are parameterized by a place (left, center, right) and + * a width. Padding never shrinks strings, trimming never extends + * strings, and fitting is done as necessary by either padding or trimming. + * Examples: + * + * format (pad left 6 int) 1234 ==> " 1234" + * format (pad center 6 int) 1234 ==> " 1234 " + * format (pad right 6 int) 1234 ==> "1234 " + * format (trim left 2 int) 1234 ==> "34" + * format (trim center 2 int) 1234 ==> "23" + * format (trim right 2 int) 1234 ==> "12" + * format (fit left 3 int) 12 ==> " 12" + * format (fit left 3 int) 123 ==> "123" + * format (fit left 3 int) 1234 ==> "234" + * + * Nesting: + * + * format (pad right 20 (int o pad left 10 real) o text "x") 12 22.3 + * ==> "12 22.3 x" + *) + +signature FORMAT_COMB = + sig + + (* We reveal "fragments" to be functions from abstract formats + * to abstract formats. This is to make sure we can use function + * composition on them. + *) + type 'a format + type ('a, 'b) fragment = 'a format -> 'b format + + (* Two primitive kinds of fragments: Glue inserts some text + * into the output without consuming an argument. Elements + * insert text corresponding to some (curried) argument into + * the output: + *) + type 'a glue = ('a, 'a) fragment + type ('a, 't) element = ('a, 't -> 'a) fragment + type 'a gg (* abstract helper type *) + + (* Format execution... *) + (* 1. Simple version, produce final result as a string: *) + val format : (string, 'a) fragment -> 'a + + (* 2. Complex version, take a receiver function that will + * be invoked with the final result. The result is + * still in non-concatenated form at this time. + * (Internally, the combinators avoid string concatenation + * as long as there is no padding/trimming/fitting going on.) + *) + val format' : (string list -> 'b) -> ('b, 'a) fragment -> 'a + + (* Make a type-specific element given a toString function for this type *) + val using : ('t -> string) -> ('a, 't) element + + (* Instantiate 'using' for a few types... *) + val int : ('a, int) element (* using Int.toString *) + val real : ('a, real) element (* using Real.toString *) + val bool : ('a, bool) element (* using Bool.toString *) + val string : ('a, string) element (* using (fn x => x) *) + val string' : ('a, string) element (* using String.toString *) + val char : ('a, char) element (* using String.str *) + val char' : ('a, char) element (* using Char.toString *) + + (* Parameterized elements... *) + val int' : StringCvt.radix -> ('a, int) element (* using (Int.fmt r) *) + val real' : StringCvt.realfmt -> ('a, real) element (* using(Real.fmt f) *) + + (* "polymorphic" elements *) + val list : ('a, 'x) element -> ('a, 'x list) element (* "[", ", ", "]" *) + val option : ('a, 'x) element -> ('a, 'x option) element + val seq : (('x * 'a gg -> 'a gg) -> 'a gg -> 's -> 'a gg) -> (* foldr *) + 'a glue -> (* separator *) + ('a, 'x) element -> ('a, 's) element + + (* Generic "gluifier". *) + val glue : ('a, 't) element -> 't -> 'a glue + + (* Inverse -- useful for writing extensions *) + val elem : ('t -> 'a glue) -> ('a, 't) element + + (* Other glue... *) + val nothing : 'a glue (* null glue *) + val text : string -> 'a glue (* constant text glue *) + val sp : int -> 'a glue (* n spaces glue *) + val nl : 'a glue (* newline glue *) + val tab : 'a glue (* tabulator glue *) + + (* glue generator constructors *) + val listg : ('t -> 'a glue) -> ('t list -> 'a glue) + val optiong : ('t -> 'a glue) -> ('t option -> 'a glue) + + val seqg : (('x * 'a gg -> 'a gg) -> 'a gg -> 's -> 'a gg) -> (* foldr *) + 'a glue -> (* separator *) + ('x -> 'a glue) -> (* glue maker *) + 's -> 'a glue (* glue maker for container *) + + + (* "Places" say which side of a string to pad or trim... *) + type place + val left : place + val center : place + val right : place + + (* Pad, trim, or fit to size n the output corresponding to + * a format fragment: + *) + val pad : place -> int -> ('a, 't) fragment -> ('a, 't) fragment + val trim : place -> int -> ('a, 't) fragment -> ('a, 't) fragment + val fit : place -> int -> ('a, 't) fragment -> ('a, 't) fragment + + (* specialized padding (left and right) *) + val padl : int -> ('a, 't) fragment -> ('a, 't) fragment + val padr : int -> ('a, 't) fragment -> ('a, 't) fragment + + end diff --git a/smlnj-lib/Util/format-comb.sml b/smlnj-lib/Util/format-comb.sml new file mode 100644 index 0000000..ad66339 --- /dev/null +++ b/smlnj-lib/Util/format-comb.sml @@ -0,0 +1,94 @@ +(* format-comb.sml + * + * COPYRIGHT (c) 2002 Bell Labs, Lucent Technologies + * + * Well-typed "printf" for SML, aka "Unparsing Combinators". + * This code was written by Matthias Blume (2002). Inspiration + * obtained from Olivier Danvy's "Functional Unparsing" work. + * + * See format-comb-sig.sml for details. + *) +structure FormatComb :> FORMAT_COMB = + struct + + type 'a format = string list -> 'a + type ('a, 'b) fragment = 'a format -> 'b format + type 'a glue = ('a, 'a) fragment + type ('a, 't) element = ('a, 't -> 'a) fragment + type 'a gg = 'a glue * 'a glue + + type place = int * int -> int + fun left (a, i) = a - i + fun center (a, i) = Int.quot (a - i, 2) + fun right (a, i) = 0 + + local + (* Generic padding, trimming, and fitting. Nestability + * is achieved by remembering the current state s, passing + * a new empty one to the fragment, adjusting the output + * from that, and fitting the result back into the remembered + * state. ("States" are string lists and correspond to + * output coming from fragments to the left of the current point.) *) + fun ptf adj pl n fr fm s = let + fun work s' = let + val x' = concat (rev s') + val sz = size x' + in + adj (x', sz, n, pl (sz, n)) :: s + end + in + (fr (fm o work)) [] + end + + val padRight = StringCvt.padRight #" " + val padLeft = StringCvt.padLeft #" " + fun pad0 (s, sz, n, off) = padRight n (padLeft (sz - off) s) + fun trim0 (s, _, n, off) = String.substring (s, off, n) + fun pad1 (arg as (s, sz, n, _)) = if n < sz then s else pad0 arg + fun trim1 (arg as (s, sz, n, _)) = if n > sz then s else trim0 arg + fun fit1 (arg as (_, sz, n, _)) = (if n < sz then trim0 else pad0) arg + in + fun format' rcv fr = fr (rcv o rev) [] + fun format fr = format' concat fr + + fun using cvt fm x a = fm (cvt a :: x) + + fun int fm = using Int.toString fm + fun real fm = using Real.toString fm + fun bool fm = using Bool.toString fm + fun string fm = using (fn x => x) fm + fun string' fm = using String.toString fm + fun char fm = using String.str fm + fun char' fm = using Char.toString fm + + fun int' rdx fm = using (Int.fmt rdx) fm + fun real' rfmt fm = using (Real.fmt rfmt) fm + + fun pad place = ptf pad1 place + fun trim place = ptf trim1 place + fun fit place = ptf fit1 place + end + + fun padl n = pad left n + fun padr n = pad right n + + fun glue e a fm x = e fm x a + fun elem gm fm x a = gm a fm x + + fun nothing fm = fm + fun text s = glue string s + fun sp n = pad left n nothing + fun nl fm = text "\n" fm + fun tab fm = text "\t" fm + + fun seqg (foldr : ('x * 'a gg -> 'a gg) -> 'a gg -> 'c -> 'a gg) sep g s = + #2 (foldr (fn (x, (f, r)) => (sep, g x o f o r)) (nothing, nothing) s) + fun listg g l = text "[" o seqg List.foldr (text ", ") g l o text "]" + fun optiong g NONE = text "NONE" + | optiong g (SOME a) = text "SOME(" o g a o text ")" + + fun seq foldr sep e = elem (seqg foldr sep (glue e)) + fun list e = elem (listg (glue e)) + fun option e = elem (optiong (glue e)) + + end diff --git a/smlnj-lib/Util/format-sig.sml b/smlnj-lib/Util/format-sig.sml new file mode 100644 index 0000000..3bded32 --- /dev/null +++ b/smlnj-lib/Util/format-sig.sml @@ -0,0 +1,37 @@ +(* format-sig.sml + * + * COPYRIGHT (c) 1992 by AT&T Bell Laboratories. See COPYRIGHT file for details. + * + * Formatted conversion to and from strings. + * + * AUTHOR: John Reppy + * AT&T Bell Laboratories + * Murray Hill, NJ 07974 + * jhr@research.att.com + *) + +signature FORMAT = + sig + + datatype fmt_item + = ATOM of Atom.atom + | LINT of LargeInt.int + | INT of Int.int + | LWORD of LargeWord.word + | WORD of Word.word + | WORD8 of Word8.word + | BOOL of bool + | CHR of char + | STR of string + | REAL of Real.real + | LREAL of LargeReal.real + | LEFT of (int * fmt_item) (* left justify in field of given width *) + | RIGHT of (int * fmt_item) (* right justify in field of given width *) + + exception BadFormat (* bad format string *) + exception BadFmtList (* raised on specifier/item type mismatch *) + + val format : string -> fmt_item list -> string + val formatf : string -> (string -> unit) -> fmt_item list -> unit + + end (* FORMAT *) diff --git a/smlnj-lib/Util/format.sml b/smlnj-lib/Util/format.sml new file mode 100644 index 0000000..71df44f --- /dev/null +++ b/smlnj-lib/Util/format.sml @@ -0,0 +1,306 @@ +(* format.sml + * + * COPYRIGHT (c) 2024 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * AUTHOR: John Reppy + * + * TODO + * - field widths in scan + * - add PREC of (int * fmt_item) constructor to allow dynamic control of + * precision. + * - precision in %d, %s, ... + * - * flag in scan (checks, but doesn't scan input) + * - %n specifier in scan + *) + +structure Format : FORMAT = + struct + + structure SS = Substring + structure SC = StringCvt + + open FmtFields + + exception BadFmtList + + fun padLeft (str, pad) = SC.padLeft #" " pad str + fun padRight (str, pad) = SC.padRight #" " pad str + fun zeroLPad (str, pad) = SC.padLeft #"0" pad str + fun zeroRPad (str, pad) = SC.padRight #"0" pad str + + (* int to string conversions (for positive integers only) *) + local + val (maxInt8, maxInt10, maxInt16) = (case LargeInt.maxInt + of (SOME n) => let + val maxP1 = LargeWord.fromLargeInt n + 0w1 + in + ( LargeWord.fmt SC.OCT maxP1, + LargeWord.fmt SC.DEC maxP1, + LargeWord.fmt SC.HEX maxP1 + ) + end + | NONE => ("", "", "") + (* end case *)) + in + (* MaxInt is used to represent the absolute value of the largest negative + * representable integer. + *) + datatype posint = PosInt of LargeInt.int | MaxInt + fun intToOctal MaxInt = maxInt8 + | intToOctal (PosInt i) = LargeInt.fmt SC.OCT i + fun intToStr MaxInt = maxInt10 + | intToStr (PosInt i) = LargeInt.toString i + fun intToHex MaxInt = maxInt16 + | intToHex (PosInt i) = LargeInt.fmt SC.HEX i + fun intToHeX i = String.map Char.toUpper (intToHex i) + end (* local *) + + (* word to string conversions *) + val wordToOctal = LargeWord.fmt SC.OCT + val wordToStr = LargeWord.fmt SC.DEC + val wordToHex = LargeWord.fmt SC.HEX + fun wordToHeX i = String.map Char.toUpper (wordToHex i) + + fun compileFormat str = let + val split = SS.splitl (fn #"%" => false | _ => true) + fun scan (ss, l) = + if (SS.isEmpty ss) + then rev l + else let val (ss1, ss2) = split ss + in + case (SS.getc ss2) + of (SOME(#"%", ss')) => let val (field, ss3) = scanField ss' + in + scan(ss3, field::(Raw ss1)::l) + end + | _ => rev((Raw ss1)::l) + (* end case *) + end + in + scan (SS.full str, []) + end + + fun format s = let + val fmts = compileFormat s + fun doField (flags : field_flags, wid, ty, arg) = let + fun padFn s = (case wid + of NoPad => s + | Wid i => padLeft(s, i) + (* end case *)) + fun zeroPadFn (sign, s) = (case wid + of NoPad => raise BadFormat + | (Wid i) => zeroLPad(s, i - (String.size sign)) + (* end case *)) + fun trimFn (NONE, s) = padFn s + | trimFn (SOME maxWid, s) = let + val s = if (size s > maxWid) + then String.substring(s, 0, maxWid) + else s + in + padFn s + end + fun negate i = ((PosInt(~i)) handle _ => MaxInt) + fun doSign i = (case (i < 0, #sign flags, #neg_char flags) + of (false, AlwaysSign, _) => ("+", PosInt i) + | (false, BlankSign, _) => (" ", PosInt i) + | (false, _, _) => ("", PosInt i) + | (true, _, TildeSign) => ("~", negate i) + | (true, _, _) => ("-", negate i) + (* end case *)) + fun doRealSign sign = (case (sign, #sign flags, #neg_char flags) + of (false, AlwaysSign, _) => "+" + | (false, BlankSign, _) => " " + | (false, _, _) => "" + | (true, _, TildeSign) => "~" + | (true, _, _) => "-" + (* end case *)) + fun doExpSign (exp, isCap) = let + val e = if isCap then "E" else "e" + fun mkExp e = zeroLPad(Int.toString e, 2) + in + case (exp < 0, #neg_char flags) + of (false, _) => [e, mkExp exp] + | (true, TildeSign) => [e, "~", mkExp(~exp)] + | (true, _) => [e, "-", mkExp(~exp)] + (* end case *) + end + fun octal i = let + val (sign, i) = doSign i + val sign = if (#base flags) then sign^"0" else sign + val s = intToOctal i + in + if (#zero_pad flags) + then sign ^ zeroPadFn(sign, s) + else padFn (sign ^ s) + end + fun decimal i = let + val (sign, i) = doSign i + val s = intToStr i + in + if (#zero_pad flags) + then sign ^ zeroPadFn(sign, s) + else padFn (sign ^ s) + end + fun hexidecimal i = let + val (sign, i) = doSign i + val sign = if (#base flags) then sign^"0x" else sign + val s = intToHex i + in + if (#zero_pad flags) + then sign ^ zeroPadFn(sign, s) + else padFn (sign ^ s) + end + fun capHexidecimal i = let + val (sign, i) = doSign i + val sign = if (#base flags) then sign^"0X" else sign + val s = intToHeX i + in + if (#zero_pad flags) + then sign ^ zeroPadFn(sign, s) + else padFn (sign ^ s) + end + (* word formatting *) + fun doWordSign () = (case (#sign flags) + of AlwaysSign => "+" + | BlankSign => " " + | _ => "" + (* end case *)) + fun octalW i = let + val sign = doWordSign () + val sign = if (#base flags) then sign^"0" else sign + val s = wordToOctal i + in + if (#zero_pad flags) + then sign ^ zeroPadFn(sign, s) + else padFn (sign ^ s) + end + fun decimalW i = let + val sign = doWordSign () + val s = wordToStr i + in + if (#zero_pad flags) + then sign ^ zeroPadFn(sign, s) + else padFn (sign ^ s) + end + fun hexidecimalW i = let + val sign = doWordSign () + val sign = if (#base flags) then sign^"0x" else sign + val s = wordToHex i + in + if (#zero_pad flags) + then sign ^ zeroPadFn(sign, s) + else padFn (sign ^ s) + end + fun capHexidecimalW i = let + val sign = doWordSign () + val sign = if (#base flags) then sign^"0X" else sign + val s = wordToHeX i + in + if (#zero_pad flags) + then sign ^ zeroPadFn(sign, s) + else padFn (sign ^ s) + end + in + case (ty, arg) + of (OctalField, LINT i) => octal i + | (OctalField, INT i) => octal(Int.toLarge i) + | (OctalField, WORD w) => octalW (Word.toLargeWord w) + | (OctalField, LWORD w) => octalW w + | (OctalField, WORD8 w) => octalW (Word8.toLargeWord w) + | (IntField, LINT i) => decimal i + | (IntField, INT i) => decimal(Int.toLarge i) + | (IntField, WORD w) => decimalW (Word.toLargeWord w) + | (IntField, LWORD w) => decimalW w + | (IntField, WORD8 w) => decimalW (Word8.toLargeWord w) + | (HexField, LINT i) => hexidecimal i + | (HexField, INT i) => hexidecimal(Int.toLarge i) + | (HexField, WORD w) => hexidecimalW (Word.toLargeWord w) + | (HexField, LWORD w) => hexidecimalW w + | (HexField, WORD8 w) => hexidecimalW (Word8.toLargeWord w) + | (CapHexField, LINT i) => capHexidecimal i + | (CapHexField, INT i) => capHexidecimal(Int.toLarge i) + | (CapHexField, WORD w) => capHexidecimalW (Word.toLargeWord w) + | (CapHexField, LWORD w) => capHexidecimalW w + | (CapHexField, WORD8 w) => capHexidecimalW (Word8.toLargeWord w) + | (CharField, CHR c) => padFn(String.str c) + | (BoolField, BOOL false) => padFn "false" + | (BoolField, BOOL true) => padFn "true" + | (StrField prec, ATOM s) => trimFn(prec, Atom.toString s) + | (StrField prec, STR s) => trimFn(prec, s) + | (RealField{prec, format}, REAL r) => + if (Real.isFinite r) + then (case format + of F_Format => let + val {sign, mantissa} = + RealFormat.realFFormat(r, prec) + val sign = doRealSign sign + in + if ((prec = 0) andalso (#base flags)) + then padFn(concat[sign, mantissa, "."]) + else padFn(sign ^ mantissa) + end + | E_Format isCap => let + val {sign, mantissa, exp} = + RealFormat.realEFormat(r, prec) + val sign = doRealSign sign + val expStr = doExpSign(exp, isCap) + in + if ((prec = 0) andalso (#base flags)) + then padFn(concat(sign :: mantissa :: "." + :: expStr)) + else padFn(concat(sign :: mantissa :: expStr)) + end + | G_Format isCap => let + val prec = if (prec = 0) then 1 else prec + val {sign, whole, frac, exp} = + RealFormat.realGFormat(r, prec) + val sign = doRealSign sign + val expStr = (case exp + of SOME e => doExpSign(e, isCap) + | NONE => [] + (* end csae *)) + val num = if (#base flags) + then let + val diff = + prec - ((size whole) + (size frac)) + in + if (diff > 0) + then zeroRPad(frac, (size frac)+diff) + else frac + end + else if (frac = "") + then "" + else ("." ^ frac) + in + padFn(concat(sign::whole::num::expStr)) + end + (* end case *)) + else if Real.==(Real.negInf, r) + then doRealSign true ^ "inf" + else if Real.==(Real.posInf, r) + then doRealSign false ^ "inf" + else "nan" + | (_, LEFT(w, arg)) => + StringCvt.padLeft #" " w (doField (flags, wid, ty, arg)) + | (_, RIGHT(w, arg)) => + StringCvt.padRight #" " w (doField (flags, Wid w, ty, arg)) + | _ => raise BadFmtList + (* end case *) + end + fun doArgs ([], [], l) = SS.concat(rev l) + | doArgs ((Raw s)::rf, args, l) = doArgs(rf, args, s::l) + | doArgs (Field(flags, wid, ty)::rf, arg::ra, l) = + doArgs (rf, ra, SS.full (doField (flags, wid, ty, arg)) :: l) + | doArgs _ = raise BadFmtList + in + fn args => doArgs (fmts, args, []) + end (* format *) + + fun formatf fmt = let + val f = format fmt + in + fn consumer => fn args => consumer(f args) + end + + end (* Format *) diff --git a/smlnj-lib/Util/getopt-sig.sml b/smlnj-lib/Util/getopt-sig.sml new file mode 100644 index 0000000..0f6db5f --- /dev/null +++ b/smlnj-lib/Util/getopt-sig.sml @@ -0,0 +1,83 @@ +(* getopt-sig.sml + * + * COPYRIGHT (c) 2016 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * A SML port of GNU's getopt library. + * + * This port is derived from Sven Panne's + * + * implementation of the getopt library in Haskell + * + * The following comments are lifted from Sven's code: + * + * Two rather obscure features are missing: The Bash 2.0 non-option hack (if + * you don't already know it, you probably don't want to hear about it...) + * and the recognition of long options with a single dash (e.g. '-help' is + * recognised as '--help', as long as there is no short option 'h'). + * + * Other differences between GNU's getopt and this implementation: + * * To enforce a coherent description of options and arguments, there are + * explanation fields in the option/argument descriptor. + * * Error messages are now more informative, but no longer POSIX + * compliant... :-( + * + * A difference with Sven's port: errors now invoke an error callback, rather + * than returning error strings while continuing processing options. + * The full generality of the latter does not seem justified. + *) + + +signature GET_OPT = + sig + + datatype 'a arg_order + = RequireOrder + | Permute + | ReturnInOrder of string -> 'a + (* What to do with options following non-options: + * RequireOrder: no option processing after first non-option + * Permute: freely intersperse options and non-options + * ReturnInOrder: wrap non-options into options + *) + + datatype 'a arg_descr + = NoArg of unit -> 'a + | ReqArg of (string -> 'a) * string + | OptArg of (string option -> 'a) * string + (* Description of an argument option: + * NoArg: no argument required + * ReqArg: option requires an argument; the string is the argument name + * OptArg: optional argument; the string is the argument name + *) + + type 'a opt_descr = { + short : string, + long : string list, + desc : 'a arg_descr, + help : string + } + (* Description of a single option *) + + val usageInfo : { + header : string, + options : 'a opt_descr list + } -> string + (* takes a header string and a list of option descriptions and + * returns a string explaining the usage information. A newline will + * be added following the header, so it should not be newline terminated. + *) + + val getOpt : { + argOrder : 'a arg_order, + options : 'a opt_descr list, + errFn : string -> unit + } -> string list -> ('a list * string list) + (* takes as argument an arg_order to specify the non-options + * handling, a list of option descriptions, an error callback, + * and a command line containing the options and arguments, + * and returns a list of (options, non-options) + *) + + end + diff --git a/smlnj-lib/Util/getopt.sml b/smlnj-lib/Util/getopt.sml new file mode 100644 index 0000000..3e933a6 --- /dev/null +++ b/smlnj-lib/Util/getopt.sml @@ -0,0 +1,212 @@ +(* getopt.sml + * + * COPYRIGHT (c) 2016 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +structure GetOpt :> GET_OPT = + struct + + datatype 'a arg_order + = RequireOrder + | Permute + | ReturnInOrder of string -> 'a + + datatype 'a arg_descr + = NoArg of unit -> 'a + | ReqArg of (string -> 'a) * string + | OptArg of (string option -> 'a) * string + + type 'a opt_descr = { + short : string, + long : string list, + desc : 'a arg_descr, + help : string + } + + datatype 'a opt_kind + = Opt of 'a + | NonOpt + + structure SS = Substring + structure S = String + + + (* helper functions *) + fun sepBy (sep, []) = "" + | sepBy (sep, x::xs) = + concat (x::foldr (fn (elem,l) => sep::elem::l) [] xs) + + val breakAtEq = SS.splitl (fn #"=" => false | _ => true) + + (* formatting of options *) + + fun fmtShort (NoArg _) so = concat ["-", str so] + | fmtShort (ReqArg (_,ad)) so = concat ["-", str so," ",ad] + | fmtShort (OptArg (_,ad)) so = concat ["-", str so,"[",ad,"]"] + + fun fmtLong (NoArg _) lo = concat ["--",lo] + | fmtLong (ReqArg (_,ad)) lo = concat ["--",lo,"=",ad] + | fmtLong (OptArg (_,ad)) lo = concat ["--",lo,"[=",ad,"]"] + + fun fmtOpt {short=sos, long=los, desc=ad, help=descr} = ( + String.concatWith ", " (map (fmtShort ad) (S.explode sos)), + String.concatWith ", " (map (fmtLong ad) los), + descr) + + (* Usage information *) + fun usageInfo {header, options} = let + fun unlines l = sepBy ("\n", l) + val fmtOptions = List.map fmtOpt options + val (ms1, ms2) = foldl + (fn ((e1,e2,_), (m1,m2)) => ( + Int.max (size e1, m1), + Int.max (size e2, m2) + )) (0,0) fmtOptions + val indent = StringCvt.padLeft #" " (ms1 + ms2 + 6) + val pad = StringCvt.padRight #" " + fun doEntry ((e1, e2, e3), l) = ( + case String.fields (fn #"\n" => true | _ => false) e3 + of [] => concat[" ", pad ms1 e1, " ", pad ms2 e2] :: l + | [e3] => concat[" ", pad ms1 e1, " ", pad ms2 e2, " ", e3] :: l + | fst::rest => + concat[" ", pad ms1 e1, " ", pad ms2 e2, " ", fst] + :: List.foldr (fn (s, l) => (indent "" ^ s) :: l) l rest + (* end case *)) + val table = List.foldr doEntry [""] fmtOptions + in + String.concatWith "\n" (header::table) + end + + (* entry point of the library + *) + + fun getOpt {argOrder, options : 'a opt_descr list, errFn} = let + (* Some error handling functions *) + fun errAmbig optStr = errFn(usageInfo{ + header = concat[ + "option `", optStr, "' is ambiguous; could be one of:" + ], + options = options + }) + fun errReq (d, optStr) = errFn(concat[ + "option `", optStr, "' requires an argument ", d + ]) + fun errUnrec optStr = errFn(concat[ + "unrecognized option `", optStr, "'" + ]) + fun errNoArg optStr = errFn(concat[ + "option `", optStr, "' does not allow an argument" + ]) + (* handle long option; `subs` is the command-line flag (minus the "--" prefix) + * and `rest` are the rest of the command-line arguments. + *) + fun longOpt (subs, rest) = let + val (opt, arg) = breakAtEq subs + val opt' = SS.string opt + val optStr = "--"^opt' + (* handle the selected options *) + fun handleLong argDesc = (case (argDesc, rest) + of (NoArg act, _) => if (SS.isEmpty arg) + then (Opt(act()), rest) + else if (SS.isPrefix "=" arg) + then (errNoArg optStr; (NonOpt, rest)) + else raise Fail "longOpt: impossible" + | (ReqArg(act, argName), []) => if (SS.isEmpty arg) + then (errReq(argName, optStr); (NonOpt, [])) + else if (SS.isPrefix "=" arg) + then (Opt(act (SS.string (SS.triml 1 arg))), []) + else raise Fail "longOpt: impossible" + | (ReqArg(act, _), r::rs) => if (SS.isEmpty arg) + then (Opt(act r), rs) + else if (SS.isPrefix "=" arg) + then (Opt(act (SS.string (SS.triml 1 arg))), rest) + else raise Fail "longOpt: impossible" + | (OptArg(act, _), _) => if (SS.isEmpty arg) + then (Opt(act NONE), rest) + else if (SS.isPrefix "=" arg) + then (Opt(act (SOME (SS.string (SS.triml 1 arg)))), rest) + else raise Fail "longOpt: impossible" + (* end case *)) + (* search the long options for a match; we allow a unique prefix of an + * option, but an exact match will take precedence. E.g., if the long options + * are "--foo", "--foobar", and "--foobaz", then "--foo" will match the first, + * but "--foob" will be flagged as ambiguous. + *) + fun findOption ([], [], NONE) = (errUnrec optStr; (NonOpt, rest)) + | findOption ([], _, SOME argDesc) = handleLong argDesc + | findOption ([], [argDesc], NONE) = handleLong argDesc + | findOption ([], _::_::_, NONE) = (errAmbig optStr; (NonOpt, rest)) + | findOption ((descr : 'a opt_descr)::descrs, prefixMatches, exactMatch) = ( + case List.filter (S.isPrefix opt') (#long descr) + of [] => findOption (descrs, prefixMatches, exactMatch) + | flgs => if List.exists (fn flg => (flg = opt')) flgs + then if Option.isSome exactMatch + then (errAmbig optStr; (NonOpt, rest)) + else findOption (descrs, prefixMatches, SOME(#desc descr)) + else findOption (descrs, #desc descr :: prefixMatches, exactMatch) + (* end case *)) + in + findOption (options, [], NONE) + end + (* handle short option. x is the option character, subs is the + * rest of the option string, rest is the rest of the command-line + * options. + *) + fun shortOpt (x, subs, rest) = let + val options = + List.filter (fn {short,...} => Char.contains short x) options + val ads = map #desc options + val optStr = "-"^(str x) + in + case (ads, rest) + of (_::_::_, rest1) => (errAmbig optStr; (NonOpt, rest1)) + | ((NoArg a)::_, rest') => + if (SS.isEmpty subs) + then (Opt(a()), rest') + else (Opt(a()), ("-"^(SS.string subs))::rest') + | ((ReqArg(f,d))::_, []) => + if (SS.isEmpty subs) + then (errReq(d, optStr); (NonOpt, [])) + else (Opt(f (SS.string subs)), []) + | ((ReqArg(f,_))::_, rest' as (r::rs)) => + if (SS.isEmpty subs) + then (Opt(f r), rs) + else (Opt(f (SS.string subs)), rest') + | ((OptArg(f,_))::_, rest') => + if (SS.isEmpty subs) + then (Opt(f NONE), rest') + else (Opt(f (SOME(SS.string subs))), rest') + | ([], rest') => (errUnrec optStr; (NonOpt, rest')) + (* end case *) + end + fun get ([], opts, nonOpts) = (List.rev opts, List.rev nonOpts) + | get ("--"::rest, opts, nonOpts) = let + val nonOpts = List.revAppend(nonOpts, "--" :: rest) + in + case argOrder + of ReturnInOrder f => (List.revAppend(opts, List.map f nonOpts), []) + | _ => (List.rev opts, nonOpts) + (* end case *) + end + | get (arg::rest, opts, nonOpts) = let + val arg' = SS.full arg + fun addOpt (Opt opt, rest) = get(rest, opt::opts, nonOpts) + | addOpt (NonOpt, rest) = get(rest, opts, arg::nonOpts) + in + if (SS.isPrefix "--" arg') + then addOpt(longOpt (SS.triml 2 arg', rest)) + else if (SS.isPrefix "-" arg') + then addOpt(shortOpt (SS.sub(arg', 1), SS.triml 2 arg', rest)) + else (case argOrder + of RequireOrder => (List.rev opts, List.revAppend(nonOpts, arg::rest)) + | Permute => get(rest, opts, arg::nonOpts) + | ReturnInOrder f => get(rest, f arg :: opts, nonOpts) + (* end case *)) + end + in + fn args => get(args, [], []) + end (* getOpt *) + + end + diff --git a/smlnj-lib/Util/graph-scc-fn.sml b/smlnj-lib/Util/graph-scc-fn.sml new file mode 100644 index 0000000..da4c441 --- /dev/null +++ b/smlnj-lib/Util/graph-scc-fn.sml @@ -0,0 +1,138 @@ +(* graph-scc-fn.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Calculate strongly-connected components of directed graph. + * The graph can have nodes with self-loops. + * + * author: Matthias Blume + *) + +functor GraphSCCFn (Nd: ORD_KEY) :> GRAPH_SCC where Nd = Nd = + struct + structure Nd = Nd + + type node = Nd.ord_key + + structure Map = RedBlackMapFn (Nd) + + datatype component + = SIMPLE of node + | RECURSIVE of node list + + fun eq x y = (Nd.compare(x, y) = EQUAL) + + fun topOrder' { roots, follow } = let + + fun getNode (n, nm as (npre, m)) = ( + case Map.find (m, n) + of NONE => let + val r = { pre = npre, low = ref npre } + val m' = Map.insert (m, n, r) + in + ((npre + 1, m'), r) + end + | SOME r => (nm, r) + (* end case *)) + + fun component (x, []) = + if List.exists (eq x) (follow x) then RECURSIVE [x] + else SIMPLE x + | component (x, xl) = RECURSIVE (x :: xl) + + (* depth-first search in continuation-passing, state-passing style *) + fun dfs args = let + + (* the nodemap represents the mapping from nodes to + * pre-order numbers and low-numbers. The latter are ref-cells. + * nodemap also remembers the next available pre-order number. + * The current node itself is not given as an argument. + * Instead, it is represented by grab_cont -- a function + * that "grabs" a component from the current stack and then + * continues with the regular continuation. We do it this + * way to be able to handle the topmost virtual component -- + * the one whose sole element is the virtual root node. *) + val { follow_nodes, grab_cont, + node_pre, node_low, parent_low, nodemap, + stack, sccl, nograb_cont } = args + + (* loop over the follow-set of a node *) + fun loop (tn :: tnl) (nodemap as (npre, theMap), stack, sccl) = + let val is_tn = eq tn + in + case Map.find (theMap, tn) of + SOME{ pre = tn_pre, low = tn_low } => let + val tl = !tn_low + in + if tl < (!node_low) andalso + List.exists is_tn stack then + node_low := tl + else (); + loop tnl (nodemap, stack, sccl) + end + | NONE =>let + (* lookup failed -> tn is a new node *) + val tn_pre = npre + val tn_low = ref npre + val npre = npre + 1 + val theMap = + Map.insert (theMap, tn, + { pre = tn_pre, low = tn_low }) + val nodemap = (npre, theMap) + val tn_nograb_cont = loop tnl + fun tn_grab_cont (nodemap, sccl) = let + fun grab (top :: stack, scc) = + if eq tn top then + tn_nograb_cont + (nodemap, stack, + component (top, scc) :: sccl) + else + grab (stack, top :: scc) + | grab _ = + raise Fail "scc:grab: empty stack" + in + grab + end + in + dfs { follow_nodes = follow tn, + grab_cont = tn_grab_cont, + node_pre = tn_pre, node_low = tn_low, + parent_low = node_low, + nodemap = nodemap, + stack = tn :: stack, + sccl = sccl, + nograb_cont = tn_nograb_cont } + end + end + | loop [] (nodemap, stack, sccl) = + let val nl = !node_low + in + if nl = node_pre then + grab_cont (nodemap, sccl) (stack, []) + else + ((* propagate node_low up *) + if nl < (!parent_low) then parent_low := nl else (); + (* `return' *) + nograb_cont (nodemap, stack, sccl)) + end + in + loop (rev follow_nodes) (nodemap, stack, sccl) + end + fun top_grab_cont (nodemap, sccl) ([], []) = sccl + | top_grab_cont _ _ = raise Fail "scc:top_grab: stack not empty" + in + dfs { follow_nodes = roots, + grab_cont = top_grab_cont, + node_pre = 0, + node_low = ref 0, (* low of virtual root *) + parent_low = ref 0, (* low of virtual parent of virtual root *) + nodemap = (1, Map.empty), + stack = [], + sccl = [], + nograb_cont = fn (_, _, _) => raise Fail "scc:top_nograb_cont" } + end + + fun topOrder { root, follow } = + topOrder' { roots = [root], follow = follow } + end diff --git a/smlnj-lib/Util/graph-scc-sig.sml b/smlnj-lib/Util/graph-scc-sig.sml new file mode 100644 index 0000000..3f7c7ad --- /dev/null +++ b/smlnj-lib/Util/graph-scc-sig.sml @@ -0,0 +1,37 @@ +(* graph-scc-sig.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Calculate strongly-connected components of directed graph. + * The graph can have nodes with self-loops. + * + * author: Matthias Blume + *) + +signature GRAPH_SCC = + sig + + structure Nd : ORD_KEY + + type node = Nd.ord_key + + datatype component + = SIMPLE of node (* singleton, no self-loop *) + | RECURSIVE of node list + + val topOrder': { roots: node list, follow: node -> node list } + -> component list + (* take root node(s) and follow function and return + * list of topologically sorted strongly-connected components; + * the component that contains the first of the given "roots" + * goes first + *) + + val topOrder : { root: node, follow: node -> node list } + -> component list + (* for backward compatibility; + * AXIOM: topOrder{root,follow}==topOrder'{roots=[root],follow=follow} + *) + + end diff --git a/smlnj-lib/Util/hash-key-sig.sml b/smlnj-lib/Util/hash-key-sig.sml new file mode 100644 index 0000000..7f48895 --- /dev/null +++ b/smlnj-lib/Util/hash-key-sig.sml @@ -0,0 +1,27 @@ +(* hash-key-sig.sml + * + * COPYRIGHT (c) 2018 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Abstract hash table keys. This is the argument signature for the hash table + * functor (see hash-table-sig.sml and hash-table.sml). + * + * AUTHOR: John Reppy + * University of Chicago + * https://cs.uchicago.edu/~jhr + *) + +signature HASH_KEY = + sig + type hash_key + + val hashVal : hash_key -> word + (* Compute an unsigned integer key from a hash key. *) + + val sameKey : (hash_key * hash_key) -> bool + (* Return true if two keys are the same. + * NOTE: if sameKey(h1, h2), then it must be the + * case that (hashVal h1 = hashVal h2). + *) + + end (* HASH_KEY *) diff --git a/smlnj-lib/Util/hash-set-fn.sml b/smlnj-lib/Util/hash-set-fn.sml new file mode 100644 index 0000000..1746da2 --- /dev/null +++ b/smlnj-lib/Util/hash-set-fn.sml @@ -0,0 +1,327 @@ +(* hash-set-fn.sml + * + * COPYRIGHT (c) 2023 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * AUTHOR: John Reppy + * University of Chicago + * https://cs.uchicago.edu/~jhr + *) + +functor HashSetFn (Key : HASH_KEY) : MONO_HASH_SET = + struct + + structure Key = Key + (* NOTE: someday we will change the HASH_KEY signature to follow the naming conventions of + * the SML basis, so we use those names internally to ease future porting. + *) + type item = Key.hash_key + val hash = Key.hashVal + val same = Key.sameKey + + datatype bucket + = NIL + | B of (word * item * bucket) + + datatype set = SET of { + table : bucket array ref, + nItems : int ref + } + + fun index (i, sz) = Word.toIntX(Word.andb(i, Word.fromInt sz - 0w1)) + + (* minimum and maximum hash table sizes. We use powers of two for hash table + * sizes, since that give efficient indexing, and assume a minimum size of 32. + *) + val minSize = 32 + val maxSize = MaxHashTableSize.maxSize + + (* round up `n` to the next hash-table size *) + fun roundUp n = if (n >= maxSize) + then maxSize + else let + fun f i = if (i >= n) then i else f(i + i) + in + f minSize + end + + (* Create a new table; the int is a size hint and the exception + * is to be raised by find. + *) + fun alloc sizeHint = Array.array(roundUp sizeHint, NIL) + + (* grow a table to the specified size *) + fun growTable (table, newSz) = let + val newArr = Array.array (newSz, NIL) + fun copy NIL = () + | copy (B(h, key, rest)) = let + val indx = index (h, newSz) + in + Array.update (newArr, indx, B(h, key, Array.sub(newArr, indx))); + copy rest + end + in + Array.app copy table; + newArr + end + + (* conditionally grow a table; return true if it grew. *) + fun growTableIfNeeded (table, nItems) = let + val arr = !table + val sz = Array.length arr + in + if (nItems >= sz) + then (table := growTable (arr, sz+sz); true) + else false + end + + (* reverse-append for buckets *) + fun revAppend (NIL, b) = b + | revAppend (B(h, x, r), b) = revAppend(r, B(h, x, b)) + + fun addWithHash (tbl as SET{table, nItems}, h, item) = let + val arr = !table + val sz = Array.length arr + val indx = index (h, sz) + fun look NIL = ( + Array.update(arr, indx, B(h, item, Array.sub(arr, indx))); + nItems := !nItems + 1; + growTableIfNeeded (table, !nItems); + NIL) + | look (B(h', item', r)) = if ((h = h') andalso same(item, item')) + then NIL (* item already present *) + else (case (look r) + of NIL => NIL + | rest => B(h', item', rest) + (* end case *)) + in + case (look (Array.sub (arr, indx))) + of NIL => () + | b => Array.update(arr, indx, b) + (* end case *) + end + + (* Add an item to a set *) + fun add (tbl, item) = addWithHash(tbl, hash item, item) + fun addc set item = add(set, item) + + (* The empty set *) + fun mkEmpty sizeHint = SET{ + table = ref (alloc sizeHint), + nItems = ref 0 + } + + (* Create a singleton set *) + fun mkSingleton item = let + val set = mkEmpty minSize + in + add (set, item); + set + end + + (* create a set from a list of items *) + fun mkFromList items = let + val set = mkEmpty(List.length items) + in + List.app (addc set) items; + set + end + + fun copy (SET{table=ref tbl, nItems}) = SET{ + table = ref(Array.tabulate(Array.length tbl, fn i => Array.sub(tbl, i))), + nItems = ref(!nItems) + } + + (* Return a list of the items in the set *) + fun toList (SET{table, nItems}) = + if (!nItems = 0) + then [] + else let + fun f (NIL, l) = l + | f (B(_, x, r), l) = f(r, x::l) + in + Array.foldl f [] (!table) + end + + (* Insert items from list. *) + fun addList (set, items) = List.app (addc set) items + + (* Remove an item. Raise NotFound if not found. *) + fun delete (SET{table, nItems}, item) = let + val arr = !table + val sz = Array.length arr + val h = hash item + val indx = index (h, sz) + fun look (_, NIL) = false + | look (prefix, B(h', item', r)) = if ((h = h') andalso same(item, item')) + then ( + Array.update(arr, indx, revAppend(prefix, r)); + nItems := !nItems - 1; + true) + else look (B(h', item', prefix), r) + in + look (NIL, Array.sub(arr, indx)) + end + + (* Remove the item, if it is in the set. Otherwise the set is unchanged. *) + fun subtract (set, item) = ignore(delete (set, item)) + fun subtractc set item = subtract(set, item) + + fun subtractList (set, items) = List.app (subtractc set) items + + (* Return true if and only if item is an element in the set *) + fun member (SET{table, ...}, item) = let + val arr = !table + val sz = Array.length arr + val h = hash item + val indx = index (h, sz) + fun look NIL = false + | look (B(h', item', r)) = ((h = h') andalso same(item, item')) orelse look r + in + look (Array.sub(arr, indx)) + end + + (* Return true if and only if the set is empty *) + fun isEmpty (SET{nItems, ...}) = (!nItems = 0) + + (* Return true if and only if the first set is a subset of the second *) + fun isSubset (SET{table=tbl1, nItems=n1}, s2 as SET{table=tbl2, nItems=n2}) = + if (!n1 <= !n2) + then let + val arr1 = !tbl1 and arr2 = !tbl2 + val sz1 = Array.length arr1 and sz2 = Array.length arr2 + fun lp i = if (i <= sz1) + then let + (* iterate over the items in bucket i *) + fun look1 NIL = lp(i+1) + | look1 (B(h, item, r)) = let + (* search s2 for the item *) + fun look2 NIL = false + | look2 (B(h', item', r')) = + if ((h = h') andalso same(item, item')) + then look1 r + else look2 r' + in + look2 (Array.sub(arr2, index (h, sz2))) + end + in + look1 (Array.sub(arr1, i)) + end + else true + in + lp 0 + end + else false + + (* Return the number of items in the table *) + fun numItems (SET{nItems, ...}) = !nItems + + (* Create a new set by applying a map function to the elements + * of the set. + *) + fun map f (SET{nItems, table}) = let + val s = mkEmpty (!nItems) + fun mapf NIL = () + | mapf (B(_, x, r)) = (add(s, f x); mapf r) + in + Array.app mapf (!table); + s + end + + fun mapPartial f (SET{nItems, table}) = let + val s = mkEmpty (!nItems) + fun mapf NIL = () + | mapf (B(_, x, r)) = (case f x + of SOME x' => (add(s, x'); mapf r) + | NONE => mapf r + (* end case *)) + in + Array.app mapf (!table); + s + end + + (* Apply a function to the entries of the set. *) + fun app f (SET{nItems, table}) = let + fun appf NIL = () + | appf (B(_, x, r)) = (f x; appf r) + in + Array.app appf (!table) + end + + (* Apply a folding function to the entries of the set. *) + fun fold f init (SET{nItems, table}) = let + fun foldf (NIL, acc) = acc + | foldf (B(_, x, r), acc) = foldf (r, f(x, acc)) + in + Array.foldl foldf init (!table) + end + + fun partition pred (SET{table, nItems}) = let + val n = (!nItems div 2) + 1 + val ts = mkEmpty n + val fs = mkEmpty n + fun part NIL = () + | part (B(h, x, r)) = if pred x + then (addWithHash(ts, h, x); part r) + else (addWithHash(fs, h, x); part r) + in + Array.app part (!table); + (ts, fs) + end + + fun filter pred (SET{table=ref tbl, nItems}) = let + val len = Array.length tbl + fun remove (_, 0) = () + | remove (i, n) = if (i < len) + then (case Array.sub(tbl, i) + of NIL => remove(i+1, n) + | bucket => let + fun rmv (NIL, items, n) = ( + Array.update(tbl, i, items); + remove (i+1, n)) + | rmv (B(h, x, r), items, n) = if pred x + then rmv(r, B(h, x, items), n) + else rmv(r, items, n-1) + in + rmv (bucket, NIL, n) + end + (* end case *)) + else nItems := n + in + remove (0, !nItems) + end + + fun exists pred (SET{table, ...}) = let + fun chk NIL = false + | chk (B(_, x, r)) = pred x orelse chk r + in + Array.exists chk (!table) + end + + fun all pred (SET{table, ...}) = let + fun chk NIL = true + | chk (B(_, x, r)) = pred x andalso chk r + in + Array.all chk (!table) + end + + fun find pred (SET{table=ref tbl, ...}) = let + val len = Array.length tbl + fun find' i = if (i < len) + then let + fun chk NIL = find' (i+1) + | chk (B(_, x, r)) = if pred x then SOME x else chk r + in + chk (Array.sub(tbl, i)) + end + else NONE + in + find' 0 + end + + (* DEPRECATED FUNCTIONS *) + + val listItems = toList + val without = subtract + + end diff --git a/smlnj-lib/Util/hash-string.sml b/smlnj-lib/Util/hash-string.sml new file mode 100644 index 0000000..98f9eeb --- /dev/null +++ b/smlnj-lib/Util/hash-string.sml @@ -0,0 +1,14 @@ +(* hash-string.sml + * + * COPYRIGHT (c) 2020 + * All rights reserved. + *) + +structure HashString : sig + + val hashString : string -> word + + val hashSubstring : substring -> word + + end = FNVHash + diff --git a/smlnj-lib/Util/hash-table-fn.sml b/smlnj-lib/Util/hash-table-fn.sml new file mode 100644 index 0000000..5ef14db --- /dev/null +++ b/smlnj-lib/Util/hash-table-fn.sml @@ -0,0 +1,223 @@ +(* hash-table-fn.sml + * + * COPYRIGHT (c) 2024 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * A hash table functor. It takes a key type with two operations: sameKey and + * hashVal as arguments (see hash-key-sig.sml). + * + * AUTHOR: John Reppy + * University of Chicago + * https://cs.uchicago.edu/~jhr + *) + +functor HashTableFn (Key : HASH_KEY) : MONO_HASH_TABLE = + struct + + structure Key = Key + open Key + + structure HTRep = HashTableRep + + datatype 'a hash_table = HT of { + not_found : exn, + table : (hash_key, 'a) HTRep.table ref, + n_items : int ref + } + + fun index (i, sz) = Word.toIntX(Word.andb(i, Word.fromInt sz - 0w1)) + + (* Create a new table; the int is a size hint and the exception + * is to be raised by find. + *) + fun mkTable (sizeHint, notFound) = HT{ + not_found = notFound, + table = ref (HTRep.alloc sizeHint), + n_items = ref 0 + } + + (* remove all elements from the table *) + fun clear (HT{table, n_items, ...}) = (HTRep.clear(!table); n_items := 0) + + fun insertWithi combine (tbl as HT{table, n_items, ...}) (key, item) = let + val arr = !table + val sz = Array.length arr + val hash = hashVal key + val indx = index (hash, sz) + fun look HTRep.NIL = ( + Array.update(arr, indx, HTRep.B(hash, key, item, Array.sub(arr, indx))); + n_items := !n_items + 1; + HTRep.growTableIfNeeded (table, !n_items); + HTRep.NIL) + | look (HTRep.B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k)) + then HTRep.B(hash, key, combine(k, v, item), r) + else (case (look r) + of HTRep.NIL => HTRep.NIL + | rest => HTRep.B(h, k, v, rest) + (* end case *)) + in + case (look (Array.sub (arr, indx))) + of HTRep.NIL => () + | b => Array.update(arr, indx, b) + (* end case *) + end + + fun insertWith combine = insertWithi (fn (_, v1, v2) => combine(v1, v2)) + + (* Insert an item. If the key already has an item associated with it, + * then the old item is discarded. + *) + fun insert (tbl as HT{table, n_items, ...}) (key, item) = let + val arr = !table + val sz = Array.length arr + val hash = hashVal key + val indx = index (hash, sz) + fun look HTRep.NIL = ( + Array.update(arr, indx, HTRep.B(hash, key, item, Array.sub(arr, indx))); + n_items := !n_items + 1; + HTRep.growTableIfNeeded (table, !n_items); + HTRep.NIL) + | look (HTRep.B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k)) + then HTRep.B(hash, key, item, r) + else (case (look r) + of HTRep.NIL => HTRep.NIL + | rest => HTRep.B(h, k, v, rest) + (* end case *)) + in + case (look (Array.sub (arr, indx))) + of HTRep.NIL => () + | b => Array.update(arr, indx, b) + (* end case *) + end + + (* return true, if the key is in the domain of the table *) + fun inDomain (HT{table, ...}) key = let + val arr = !table + val hash = hashVal key + val indx = index (hash, Array.length arr) + fun look HTRep.NIL = false + | look (HTRep.B(h, k, v, r)) = + ((hash = h) andalso sameKey(key, k)) orelse look r + in + look (Array.sub (arr, indx)) + end + + (* find an item, the table's exception is raised if the item doesn't exist *) + fun lookup (HT{table, not_found, ...}) key = let + val arr = !table + val hash = hashVal key + val indx = index (hash, Array.length arr) + fun look HTRep.NIL = raise not_found + | look (HTRep.B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k)) + then v + else look r + in + look (Array.sub (arr, indx)) + end + + (* look for an item, return NONE if the item doesn't exist *) + fun find (HT{table, ...}) key = let + val arr = !table + val sz = Array.length arr + val hash = hashVal key + val indx = index (hash, sz) + fun look HTRep.NIL = NONE + | look (HTRep.B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k)) + then SOME v + else look r + in + look (Array.sub (arr, indx)) + end + + fun findAndRemove (HT{not_found, table, n_items}) key = let + val arr = !table + val sz = Array.length arr + val hash = hashVal key + val indx = index (hash, sz) + fun look HTRep.NIL = raise not_found + | look (HTRep.B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k)) + then (v, r) + else let + val (v', r') = look r + in + (v', HTRep.B(h, k, v, r')) + end + val (v, bucket) = look (Array.sub (arr, indx)) + in + Array.update (arr, indx, bucket); SOME v + end + handle _ => NONE + + (* Remove an item. The table's exception is raised if + * the item doesn't exist. + *) + fun remove (HT{not_found, table, n_items}) key = let + val arr = !table + val sz = Array.length arr + val hash = hashVal key + val indx = index (hash, sz) + fun look HTRep.NIL = raise not_found + | look (HTRep.B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k)) + then (v, r) + else let val (item, r') = look r in (item, HTRep.B(h, k, v, r')) end + val (item, bucket) = look (Array.sub (arr, indx)) + in + Array.update (arr, indx, bucket); + n_items := !n_items - 1; + item + end (* remove *) + + (* Return the number of items in the table *) + fun numItems (HT{n_items, ...}) = !n_items + + (* return a list of the items in the table *) + fun listItems (HT{table = ref arr, n_items, ...}) = + HTRep.listItems (arr, n_items) + fun listItemsi (HT{table = ref arr, n_items, ...}) = + HTRep.listItemsi (arr, n_items) + + (* Apply a function to the entries of the table *) + fun appi f (HT{table, ...}) = HTRep.appi f (! table) + fun app f (HT{table, ...}) = HTRep.app f (! table) + + (* Map a table to a new table that has the same keys and exception *) + fun mapi f (HT{table, n_items, not_found}) = HT{ + table = ref(HTRep.mapi f (! table)), + n_items = ref(!n_items), + not_found = not_found + } + fun map f (HT{table, n_items, not_found}) = HT{ + table = ref(HTRep.map f (! table)), + n_items = ref(!n_items), + not_found = not_found + } + + (* Fold a function over the entries of the table *) + fun foldi f init (HT{table, ...}) = HTRep.foldi f init (! table) + fun fold f init (HT{table, ...}) = HTRep.fold f init (! table) + + (* modify the hash-table items in place *) + fun modifyi f (HT{table, ...}) = HTRep.modifyi f (!table) + fun modify f (HT{table, ...}) = HTRep.modify f (!table) + + (* remove any hash table items that do not satisfy the given + * predicate. + *) + fun filteri pred (HT{table, n_items, ...}) = + n_items := HTRep.filteri pred (! table) + fun filter pred (HT{table, n_items, ...}) = + n_items := HTRep.filter pred (! table) + + (* Create a copy of a hash table *) + fun copy (HT{table, n_items, not_found}) = HT{ + table = ref(HTRep.copy(! table)), + n_items = ref(!n_items), + not_found = not_found + } + + (* returns a list of the sizes of the various buckets. This is to + * allow users to gauge the quality of their hashing function. + *) + fun bucketSizes (HT{table, ...}) = HTRep.bucketSizes (! table) + + end (* HashTableFn *) diff --git a/smlnj-lib/Util/hash-table-rep.sml b/smlnj-lib/Util/hash-table-rep.sml new file mode 100644 index 0000000..430673f --- /dev/null +++ b/smlnj-lib/Util/hash-table-rep.sml @@ -0,0 +1,253 @@ +(* hash-table-rep.sml + * + * COPYRIGHT (c) 2023 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This is the internal representation of hash tables, along with some + * utility functions. It is used in both the polymorphic and functor + * hash table implementations. + * + * AUTHOR: John Reppy + * University of Chicago + * https://cs.uchicago.edu/~jhr + *) + +structure HashTableRep : sig + + datatype ('a, 'b) bucket + = NIL + | B of (word * 'a * 'b * ('a, 'b) bucket) + + type ('a, 'b) table = ('a, 'b) bucket array + + val alloc : int -> ('a, 'b) table + (* allocate a table of at least the given size *) + + val growTable : (('a, 'b) table * int) -> ('a, 'b) table + (* grow a table to the specified size *) + + val growTableIfNeeded : (('a, 'b) table ref * int) -> bool + (* conditionally grow a table; the second argument is the number + * of items currently in the table. + *) + + val clear : ('a, 'b) table -> unit + (* remove all items *) + + val listItems : (('a, 'b) table * int ref) -> 'b list + val listItemsi : (('a, 'b) table * int ref) -> ('a * 'b) list + + val appi : ('a * 'b -> 'c) -> ('a, 'b) table -> unit + val app : ('a -> 'b) -> ('c, 'a) table -> unit + + val mapi : ('a * 'b -> 'c) -> ('a, 'b) table -> ('a, 'c) table + val map : ('a -> 'b) -> ('c, 'a) table -> ('c, 'b) table + + val foldi : ('a * 'b * 'c -> 'c) -> 'c -> ('a, 'b) table -> 'c + val fold : ('a * 'b -> 'b) -> 'b -> ('c, 'a) table -> 'b + + val modify : ('b -> 'b) -> ('a, 'b) table -> unit + val modifyi : (('a * 'b) -> 'b) -> ('a, 'b) table -> unit + + val filteri : ('a * 'b -> bool) -> ('a, 'b) table -> int + val filter : ('a -> bool) -> ('b,'a) table -> int + + val copy : ('a, 'b) table -> ('a, 'b) table + + val bucketSizes : ('a, 'b) table -> int list + + end = struct + + datatype ('a, 'b) bucket + = NIL + | B of (word * 'a * 'b * ('a, 'b) bucket) + + type ('a, 'b) table = ('a, 'b) bucket array + + fun index (i, sz) = Word.toIntX(Word.andb(i, Word.fromInt sz - 0w1)) + + (* minimum and maximum hash table sizes. We use powers of two for hash table + * sizes, since that give efficient indexing, and assume a minimum size of 32. + *) + val minSize = 32 + val maxSize = MaxHashTableSize.maxSize + + (* round up `n` to the next hash-table size *) + fun roundUp n = if (n >= maxSize) + then maxSize + else let + fun f i = if (i >= n) then i else f(i + i) + in + f minSize + end + + (* Create a new table; the int is a size hint and the exception + * is to be raised by find. + *) + fun alloc sizeHint = Array.array(roundUp sizeHint, NIL) + + (* grow a table to the specified size *) + fun growTable (table, newSz) = let + val newArr = Array.array (newSz, NIL) + fun copy NIL = () + | copy (B(h, key, v, rest)) = let + val indx = index (h, newSz) + in + Array.update (newArr, indx, + B(h, key, v, Array.sub(newArr, indx))); + copy rest + end + in + Array.app copy table; + newArr + end + + (* conditionally grow a table; return true if it grew. *) + fun growTableIfNeeded (table, nItems) = let + val arr = !table + val sz = Array.length arr + in + if (nItems >= sz) + then (table := growTable (arr, sz+sz); true) + else false + end + + (* remove all items *) + fun clear table = Array.modify (fn _ => NIL) table + + (* return a list of the items in the table *) + fun listItems (table, nItems) = let + fun f (_, l, 0) = l + | f (~1, l, _) = l + | f (i, l, n) = let + fun g (NIL, l, n) = f (i-1, l, n) + | g (B(_, k, v, r), l, n) = g(r, v::l, n-1) + in + g (Array.sub(table, i), l, n) + end + in + f ((Array.length table) - 1, [], !nItems) + end (* listItems *) + fun listItemsi (table, nItems) = let + fun f (_, l, 0) = l + | f (~1, l, _) = l + | f (i, l, n) = let + fun g (NIL, l, n) = f (i-1, l, n) + | g (B(_, k, v, r), l, n) = g(r, (k, v)::l, n-1) + in + g (Array.sub(table, i), l, n) + end + in + f ((Array.length table) - 1, [], !nItems) + end (* listItems *) + + (* Apply a function to the entries of the table *) + fun appi f table = let + fun appF NIL = () + | appF (B(_, key, item, rest)) = (f (key, item); appF rest) + in + Array.app appF table + end (* appi *) + fun app f table = let + fun appF NIL = () + | appF (B(_, key, item, rest)) = (f item; appF rest) + in + Array.app appF table + end (* app *) + + (* Map a table to a new table that has the same keys *) + fun mapi f table = let + fun mapF NIL = NIL + | mapF (B(hash, key, item, rest)) = + B(hash, key, f (key, item), mapF rest) + val newTbl = Array.tabulate ( + Array.length table, + fn i => mapF (Array.sub(table, i))) + in + newTbl + end (* transform *) + + (* Map a table to a new table that has the same keys *) + fun map f table = let + fun mapF NIL = NIL + | mapF (B(hash, key, item, rest)) = B(hash, key, f item, mapF rest) + val newTbl = Array.tabulate ( + Array.length table, + fn i => mapF (Array.sub(table, i))) + in + newTbl + end (* map *) + + fun foldi f init table = let + fun foldF (NIL, accum) = accum + | foldF (B(hash, key, item, rest), accum) = + foldF(rest, f(key, item, accum)) + in + Array.foldl foldF init table + end + fun fold f init table = let + fun foldF (NIL, accum) = accum + | foldF (B(hash, key, item, rest), accum) = + foldF(rest, f(item, accum)) + in + Array.foldl foldF init table + end + + (* modify the hash-table items in place *) + fun modify f table = let + fun modifyF NIL = NIL + | modifyF (B(hash, key, item, rest)) = B(hash, key, f item, modifyF rest) + in + Array.modify modifyF table + end + fun modifyi f table = let + fun modifyF NIL = NIL + | modifyF (B(hash, key, item, rest)) = + B(hash, key, f(key, item), modifyF rest) + in + Array.modify modifyF table + end + + (* remove any hash table items that do not satisfy the given + * predicate. Return the number of items left in the table. + *) + fun filteri pred table = let + val nItems = ref 0 + fun filterP NIL = NIL + | filterP (B(hash, key, item, rest)) = if (pred(key, item)) + then ( + nItems := !nItems+1; + B(hash, key, item, filterP rest)) + else filterP rest + in + Array.modify filterP table; + !nItems + end (* filteri *) + fun filter pred table = let + val nItems = ref 0 + fun filterP NIL = NIL + | filterP (B(hash, key, item, rest)) = if (pred item) + then ( + nItems := !nItems+1; + B(hash, key, item, filterP rest)) + else filterP rest + in + Array.modify filterP table; + !nItems + end (* filter *) + + (* Create a copy of a hash table *) + fun copy table = + Array.tabulate (Array.length table, fn i => Array.sub(table, i)); + + (* returns a list of the sizes of the various buckets. This is to + * allow users to gauge the quality of their hashing function. + *) + fun bucketSizes table = let + fun len (NIL, n) = n + | len (B(_, _, _, r), n) = len(r, n+1) + in + Array.foldr (fn (b, l) => len(b, 0) :: l) [] table + end + + end (* HashTableRep *) diff --git a/smlnj-lib/Util/hash-table-sig.sml b/smlnj-lib/Util/hash-table-sig.sml new file mode 100644 index 0000000..8df0f25 --- /dev/null +++ b/smlnj-lib/Util/hash-table-sig.sml @@ -0,0 +1,106 @@ +(* hash-table-sig.sml + * + * COPYRIGHT (c) 2018 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * The signature of the polymorphic hash table structure. + * + * AUTHOR: John Reppy + * University of Chicago + * https://cs.uchicago.edu/~jhr + *) + +signature HASH_TABLE = + sig + + type ('a, 'b) hash_table + (* type of hash table mapping 'a to 'b *) + + val mkTable : (('a -> word) * (('a * 'a) -> bool)) -> (int * exn) + -> ('a,'b) hash_table + (* Given a hashing function and an equality predicate, create a new table; + * the int is a size hint and the exception is to be raised by find. + *) + + val clear : ('a, 'b) hash_table -> unit + (* remove all elements from the table *) + + val insert : ('a, 'b) hash_table -> ('a * 'b) -> unit + (* Insert an item. If the key already has an item associated with it, + * then the old item is discarded. + *) + + val insertWith : ('b * 'b -> 'b) + -> ('a, 'b) hash_table + -> 'a * 'b + -> unit + (* Insert an item with a combining function to resolve collisions. + * The first argument to the combining function is the existing value, + * and the second argument is the value being inserted into the table. + *) + val insertWithi : ('a * 'b * 'b -> 'b) + -> ('a, 'b) hash_table + -> 'a * 'b + -> unit + (* Like insertWith, except that the combining function also takes the + * key as an argument. + *) + + val inDomain : ('a, 'b) hash_table -> 'a -> bool + (* return true, if the key is in the domain of the table *) + + val lookup : ('a, 'b) hash_table -> 'a -> 'b + (* Find an item, the table's exception is raised if the item doesn't exist *) + + val find : ('a, 'b) hash_table -> 'a -> 'b option + (* Look for an item, return NONE if the item doesn't exist *) + + val findAndRemove : ('a, 'b) hash_table -> 'a -> 'b option + (* If an item with the specified key exists in the table, then it + * is removed and the item is returned. Otherwise, `NONE` is + * returned. + *) + + val remove : ('a, 'b) hash_table -> 'a -> 'b + (* Remove an item, returning the item. The table's exception is raised if + * the item doesn't exist. + *) + + val numItems : ('a, 'b) hash_table -> int + (* Return the number of items in the table *) + + val listItems : ('a, 'b) hash_table -> 'b list + val listItemsi : ('a, 'b) hash_table -> ('a * 'b) list + (* Return a list of the items (and their keys) in the table *) + + val app : ('b -> unit) -> ('a, 'b) hash_table -> unit + val appi : (('a * 'b) -> unit) -> ('a, 'b) hash_table -> unit + (* Apply a function to the entries of the table *) + + val map : ('b -> 'c) -> ('a, 'b) hash_table -> ('a, 'c) hash_table + val mapi : (('a * 'b) -> 'c) -> ('a, 'b) hash_table -> ('a, 'c) hash_table + (* Map a table to a new table that has the same keys *) + + val fold : (('b *'c) -> 'c) -> 'c -> ('a, 'b) hash_table -> 'c + val foldi : (('a * 'b * 'c) -> 'c) -> 'c -> ('a, 'b) hash_table -> 'c + (* Fold a function over the elements of a table *) + + val modify : ('b -> 'b) -> ('a, 'b) hash_table -> unit + val modifyi : (('a * 'b) -> 'b) -> ('a, 'b) hash_table -> unit + (* modify the hash-table items in place *) + + val filter : ('b -> bool) -> ('a, 'b) hash_table -> unit + val filteri : (('a * 'b) -> bool) -> ('a, 'b) hash_table -> unit + (* remove any hash table items that do not satisfy the given + * predicate. + *) + + val copy : ('a, 'b) hash_table -> ('a, 'b) hash_table + (* Create a copy of a hash table *) + + val bucketSizes : ('a, 'b) hash_table -> int list + (* returns a list of the sizes of the various buckets. This is to + * allow users to gauge the quality of their hashing function. + *) + + end (* HASH_TABLE *) diff --git a/smlnj-lib/Util/hash-table.sml b/smlnj-lib/Util/hash-table.sml new file mode 100644 index 0000000..433bcdd --- /dev/null +++ b/smlnj-lib/Util/hash-table.sml @@ -0,0 +1,236 @@ +(* hash-table.sml + * + * COPYRIGHT (c) 2018 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Polymorphic hash tables. + * + * AUTHOR: John Reppy + * University of Chicago + * https://cs.uchicago.edu/~jhr + *) + +structure HashTable : HASH_TABLE = + struct + + structure HTRep = HashTableRep + + datatype ('a, 'b) hash_table = HT of { + hash_fn : 'a -> word, + eq_pred : ('a * 'a) -> bool, + not_found : exn, + table : ('a, 'b) HTRep.table ref, + n_items : int ref + } + + fun index (i, sz) = Word.toIntX(Word.andb(i, Word.fromInt sz - 0w1)) + + (* find smallest power of 2 (>= 32) that is >= n *) + fun roundUp n = let + fun f i = if (i >= n) then i else f(i * 2) + in + f 32 + end + + (* Create a new table; the int is a size hint and the exception + * is to be raised by find. + *) + fun mkTable (hash, eq) (sizeHint, notFound) = HT{ + hash_fn = hash, + eq_pred = eq, + not_found = notFound, + table = ref (HTRep.alloc sizeHint), + n_items = ref 0 + } + + (* remove all elements from the table *) + fun clear (HT{table, n_items, ...}) = (HTRep.clear(!table); n_items := 0) + + fun insertWithi combine (tbl as HT{hash_fn, eq_pred, table, n_items, ...}) (key, item) = let + val arr = !table + val sz = Array.length arr + val hash = hash_fn key + val indx = index (hash, sz) + fun look HTRep.NIL = ( + Array.update(arr, indx, + HTRep.B(hash, key, item, Array.sub(arr, indx))); + n_items := !n_items + 1; + HTRep.growTableIfNeeded (table, !n_items); + HTRep.NIL) + | look (HTRep.B(h, k, v, r)) = if ((hash = h) andalso eq_pred(key, k)) + then HTRep.B(hash, key, combine(k, v, item), r) + else (case (look r) + of HTRep.NIL => HTRep.NIL + | rest => HTRep.B(h, k, v, rest) + (* end case *)) + in + case (look (Array.sub (arr, indx))) + of HTRep.NIL => () + | b => Array.update(arr, indx, b) + (* end case *) + end + + fun insertWith combine = insertWithi (fn (_, v1, v2) => combine(v1, v2)) + + (* Insert an item. If the key already has an item associated with it, + * then the old item is discarded. + *) + fun insert (tbl as HT{hash_fn, eq_pred, table, n_items, ...}) (key, item) = let + val arr = !table + val sz = Array.length arr + val hash = hash_fn key + val indx = index (hash, sz) + fun look HTRep.NIL = ( + Array.update(arr, indx, + HTRep.B(hash, key, item, Array.sub(arr, indx))); + n_items := !n_items + 1; + HTRep.growTableIfNeeded (table, !n_items); + HTRep.NIL) + | look (HTRep.B(h, k, v, r)) = if ((hash = h) andalso eq_pred(key, k)) + then HTRep.B(hash, key, item, r) + else (case (look r) + of HTRep.NIL => HTRep.NIL + | rest => HTRep.B(h, k, v, rest) + (* end case *)) + in + case (look (Array.sub (arr, indx))) + of HTRep.NIL => () + | b => Array.update(arr, indx, b) + end + + (* return true, if the key is in the domain of the table *) + fun inDomain (HT{hash_fn, eq_pred, table, ...}) key = let + val arr = !table + val hash = hash_fn key + val indx = index (hash, Array.length arr) + fun look HTRep.NIL = false + | look (HTRep.B(h, k, v, r)) = + ((hash = h) andalso eq_pred(key, k)) orelse look r + in + look (Array.sub (arr, indx)) + end + + (* find an item, the table's exception is raised if the item doesn't exist *) + fun lookup (HT{hash_fn, eq_pred, table, not_found, ...}) key = let + val arr = !table + val sz = Array.length arr + val hash = hash_fn key + val indx = index (hash, sz) + fun look HTRep.NIL = raise not_found + | look (HTRep.B(h, k, v, r)) = if ((hash = h) andalso eq_pred(key, k)) + then v + else look r + in + look (Array.sub (arr, indx)) + end + + (* look for an item, return NONE if the item doesn't exist *) + fun find (HT{hash_fn, eq_pred, table, ...}) key = let + val arr = !table + val sz = Array.length arr + val hash = hash_fn key + val indx = index (hash, sz) + fun look HTRep.NIL = NONE + | look (HTRep.B(h, k, v, r)) = if ((hash = h) andalso eq_pred(key, k)) + then SOME v + else look r + in + look (Array.sub (arr, indx)) + end + + fun findAndRemove (HT{hash_fn, eq_pred, not_found, table, n_items}) key = let + val arr = !table + val sz = Array.length arr + val hash = hash_fn key + val indx = index (hash, sz) + fun look HTRep.NIL = raise not_found + | look (HTRep.B(h, k, v, r)) = if ((hash = h) andalso eq_pred(key, k)) + then (v, r) + else let + val (v', r') = look r + in + (v', HTRep.B(h, k, v, r')) + end + val (v, bucket) = look (Array.sub (arr, indx)) + in + Array.update (arr, indx, bucket); SOME v + end + handle _ => NONE + + (* Remove an item. The table's exception is raised if + * the item doesn't exist. + *) + fun remove (HT{hash_fn, eq_pred, not_found, table, n_items}) key = let + val arr = !table + val sz = Array.length arr + val hash = hash_fn key + val indx = index (hash, sz) + fun look HTRep.NIL = raise not_found + | look (HTRep.B(h, k, v, r)) = if ((hash = h) andalso eq_pred(key, k)) + then (v, r) + else let val (item, r') = look r in (item, HTRep.B(h, k, v, r')) end + val (item, bucket) = look (Array.sub (arr, indx)) + in + Array.update (arr, indx, bucket); + n_items := !n_items - 1; + item + end (* remove *) + + (* Return the number of items in the table *) + fun numItems (HT{n_items, ...}) = !n_items + + (* return a list of the items in the table *) + fun listItems (HT{table = ref arr, n_items, ...}) = + HTRep.listItems (arr, n_items) + fun listItemsi (HT{table = ref arr, n_items, ...}) = + HTRep.listItemsi (arr, n_items) + + (* Apply a function to the entries of the table *) + fun appi f (HT{table, ...}) = HTRep.appi f (! table) + fun app f (HT{table, ...}) = HTRep.app f (! table) + + (* Map a table to a new table that has the same keys and exception *) + fun mapi f (HT{hash_fn, eq_pred, table, n_items, not_found}) = HT{ + hash_fn = hash_fn, eq_pred = eq_pred, + table = ref(HTRep.mapi f (! table)), + n_items = ref(!n_items), + not_found = not_found + } + + (* Map a table to a new table that has the same keys and exception *) + fun map f (HT{hash_fn, eq_pred, table, n_items, not_found}) = HT{ + hash_fn = hash_fn, eq_pred = eq_pred, + table = ref(HTRep.map f (! table)), + n_items = ref(!n_items), + not_found = not_found + } + + (* Fold a function over the entries of the table *) + fun foldi f init (HT{table, ...}) = HTRep.foldi f init (! table) + fun fold f init (HT{table, ...}) = HTRep.fold f init (! table) + + (* modify the hash-table items in place *) + fun modifyi f (HT{table, ...}) = HTRep.modifyi f (!table) + fun modify f (HT{table, ...}) = HTRep.modify f (!table) + + (* remove any hash table items that do not satisfy the given + * predicate. + *) + fun filteri pred (HT{table, n_items, ...}) = + n_items := HTRep.filteri pred (! table) + fun filter pred (HT{table, n_items, ...}) = + n_items := HTRep.filter pred (! table) + + (* Create a copy of a hash table *) + fun copy (HT{hash_fn, eq_pred, table, n_items, not_found}) =HT{ + hash_fn = hash_fn, eq_pred = eq_pred, + table = ref(HTRep.copy (! table)), n_items = ref(!n_items), + not_found = not_found + } + + (* returns a list of the sizes of the various buckets. This is to + * allow users to gauge the quality of their hashing function. + *) + fun bucketSizes (HT{table, ...}) = HTRep.bucketSizes(! table) + + end (* HashTable *) diff --git a/smlnj-lib/Util/hash2-table-fn.sml b/smlnj-lib/Util/hash2-table-fn.sml new file mode 100644 index 0000000..55f35d3 --- /dev/null +++ b/smlnj-lib/Util/hash2-table-fn.sml @@ -0,0 +1,276 @@ +(* hash2-table-fn.sml + * + * COPYRIGHT (c) 2018 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Hash tables that are keyed by two keys (in different domains). + * + * AUTHOR: John Reppy + * University of Chicago + * https://cs.uchicago.edu/~jhr + *) + +functor Hash2TableFn ( + structure Key1 : HASH_KEY + structure Key2 : HASH_KEY + ) : MONO_HASH2_TABLE = struct + + structure Key1 = Key1 + structure Key2 = Key2 + + structure HTRep = HashTableRep + + (* the representation of a double-keyed hash table is two tables + * that will always hold the same number of items and be the same + * size. + *) + datatype 'a hash_table = TBL of { + not_found : exn, + tbl1 : (Key1.hash_key, Key2.hash_key * 'a) HTRep.table ref, + tbl2 : (Key2.hash_key, Key1.hash_key * 'a) HTRep.table ref, + n_items : int ref + } + + fun index (i, sz) = Word.toIntX(Word.andb(i, Word.fromInt sz - 0w1)) + + (* Create a new table; the int is a size hint and the exception + * is to be raised by find. + *) + fun mkTable (n, exn) = TBL{ + not_found = exn, + tbl1 = ref(HTRep.alloc n), + tbl2 = ref(HTRep.alloc n), + n_items = ref 0 + } + + (* remove all elements from the table *) + fun clear (TBL{tbl1, tbl2, n_items, ...}) = ( + HTRep.clear(!tbl1); HTRep.clear(!tbl2); n_items := 0) + + (* Remove an item, returning the item. The table's exception is raised if + * the item doesn't exist. + *) + fun remove (hashVal, sameKey) (arr, not_found, key) = let + val hash = hashVal key + val indx = index (hash, Array.length arr) + fun look HTRep.NIL = raise not_found + | look (HTRep.B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k)) + then (v, r) + else let val (item, r') = look r in (item, HTRep.B(h, k, v, r')) end + val (item, bucket) = look (Array.sub (arr, indx)) + in + Array.update (arr, indx, bucket); + item + end (* remove *) + fun delete1 (tbl, not_found, k) = + remove (Key1.hashVal, Key1.sameKey) (tbl, not_found, k) + fun delete2 (tbl, not_found, k) = + remove (Key2.hashVal, Key2.sameKey) (tbl, not_found, k) + + fun remove1 (TBL{tbl1, tbl2, n_items, not_found, ...}) k1 = let + val (k2, item) = delete1 (!tbl1, not_found, k1) + in + delete2 (!tbl2, not_found, k2); + n_items := !n_items - 1; + item + end + fun remove2 (TBL{tbl1, tbl2, n_items, not_found, ...}) k2 = let + val (k1, item) = delete2 (!tbl2, not_found, k2) + in + delete1 (!tbl1, not_found, k1); + n_items := !n_items - 1; + item + end + + (* Insert an item. If there is already an item that has either of the two keys, + * then the old item is discarded (from both tables) + *) + fun insert (TBL{tbl1, tbl2, n_items, ...}) (k1, k2, item) = let + val arr1 = !tbl1 and arr2 = !tbl2 + val sz = Array.length arr1 + val h1 = Key1.hashVal k1 and h2 = Key2.hashVal k2 + val i1 = index(h1, sz) and i2 = index(h2, sz) + fun look1 HTRep.NIL = ( + Array.update(arr1, i1, + HTRep.B(h1, k1, (k2, item), Array.sub(arr1, i1))); + (* we increment the number of items and grow the tables here, + * but not when inserting into tbl2. + *) + n_items := !n_items + 1; + if (HTRep.growTableIfNeeded (tbl1, !n_items)) + then tbl2 := HTRep.growTable (arr2, Array.length(! tbl1)) + else (); + HTRep.NIL) + | look1 (HTRep.B(h1', k1', (k2', v), r)) = + if ((h1' = h1) andalso Key1.sameKey(k1', k1)) + then ( + if not(Key2.sameKey(k2, k2')) + then ignore(delete2 (arr2, Fail "insert.look1", k2')) + else (); + HTRep.B(h1, k1, (k2, item), r)) + else (case (look1 r) + of HTRep.NIL => HTRep.NIL + | rest => HTRep.B(h1', k1', (k2', v), rest) + (* end case *)) + fun look2 HTRep.NIL = ( + Array.update(arr2, i2, + HTRep.B(h2, k2, (k1, item), Array.sub(arr2, i2))); + HTRep.NIL) + | look2 (HTRep.B(h2', k2', (k1', v), r)) = + if ((h2' = h2) andalso Key2.sameKey(k2', k2)) + then ( + if not(Key1.sameKey(k1, k1')) + then ignore(delete1 (arr1, Fail "insert.look2", k1')) + else (); + HTRep.B(h2, k2, (k1, item), r)) + else (case (look2 r) + of HTRep.NIL => HTRep.NIL + | rest => HTRep.B(h2, k2, (k1, v), rest) + (* end case *)) + in + case (look1 (Array.sub (arr1, i1)), look2 (Array.sub (arr2, i2))) + of (HTRep.NIL, HTRep.NIL) => () + | (b1, b2) => ( + (* NOTE: both b1 and b2 should be non-nil, since we should + * have replaced an item in both tables. + *) + Array.update(arr1, i1, b1); + Array.update(arr2, i2, b2)) + (* end case *) + end + + (* return true, if the key is in the domain of the table *) + fun inDomain (hashVal, sameKey) tbl key = let + val arr = !tbl + val hash = hashVal key + val indx = index (hash, Array.length arr) + fun look HTRep.NIL = false + | look (HTRep.B(h, k, v, r)) = + ((hash = h) andalso sameKey(key, k)) orelse look r + in + look (Array.sub (arr, indx)) + end + fun inDomain1 (TBL{tbl1, ...}) = inDomain (Key1.hashVal, Key1.sameKey) tbl1 + fun inDomain2 (TBL{tbl2, ...}) = inDomain (Key2.hashVal, Key2.sameKey) tbl2 + + (* Look for an item, the table's exception is raised if the item doesn't exist *) + fun lookup (hashVal, sameKey) (tbl, not_found) key = let + val arr = !tbl + val hash = hashVal key + val indx = index (hash, Array.length arr) + fun look HTRep.NIL = raise not_found + | look (HTRep.B(h, k, (_, v), r)) = + if ((hash = h) andalso sameKey(key, k)) then v else look r + in + look (Array.sub (arr, indx)) + end + fun lookup1 (TBL{tbl1, not_found, ...}) = + lookup (Key1.hashVal, Key1.sameKey) (tbl1, not_found) + fun lookup2 (TBL{tbl2, not_found, ...}) = + lookup (Key2.hashVal, Key2.sameKey) (tbl2, not_found) + + (* Look for an item, return NONE if the item doesn't exist *) + fun find (hashVal, sameKey) table key = let + val arr = !table + val sz = Array.length arr + val hash = hashVal key + val indx = index (hash, sz) + fun look HTRep.NIL = NONE + | look (HTRep.B(h, k, (_, v), r)) = if ((hash = h) andalso sameKey(key, k)) + then SOME v + else look r + in + look (Array.sub (arr, indx)) + end + fun find1 (TBL{tbl1, ...}) = find (Key1.hashVal, Key1.sameKey) tbl1 + fun find2 (TBL{tbl2, ...}) = find (Key2.hashVal, Key2.sameKey) tbl2 + + (* Return the number of items in the table *) + fun numItems (TBL{n_items, ...}) = !n_items + + (* Return a list of the items (and their keys) in the table *) + fun listItems (TBL{tbl1, ...}) = + HTRep.fold (fn ((_, item), l) => item::l) [] (! tbl1) + fun listItemsi (TBL{tbl1, ...}) = + HTRep.foldi (fn (k1, (k2, item), l) => (k1, k2, item)::l) [] (! tbl1) + + (* Apply a function to the entries of the table *) + fun app f (TBL{tbl1, ...}) = + HTRep.app (fn (_, v) => f v) (! tbl1) + fun appi f (TBL{tbl1, ...}) = + HTRep.appi (fn (k1, (k2, v)) => f(k1, k2, v)) (! tbl1) + + (* Map a table to a new table that has the same keys *) + fun map f (TBL{tbl1, tbl2, n_items, not_found}) = let + val sz = Array.length (! tbl1) + val newTbl = TBL{ + tbl1 = ref (HTRep.alloc sz), + tbl2 = ref (HTRep.alloc sz), + n_items = ref 0, + not_found = not_found + } + fun ins (k1, (k2, v)) = insert newTbl (k1, k2, f v) + in + HTRep.appi ins (! tbl1); newTbl + end + fun mapi f (TBL{tbl1, tbl2, n_items, not_found}) = let + val sz = Array.length (! tbl1) + val newTbl = TBL{ + tbl1 = ref (HTRep.alloc sz), + tbl2 = ref (HTRep.alloc sz), + n_items = ref 0, + not_found = not_found + } + fun ins (k1, (k2, v)) = insert newTbl (k1, k2, f(k1, k2, v)) + in + HTRep.appi ins (! tbl1); newTbl + end + +(* TODO: add mapPartial and mapPartiali *) + + fun fold f init (TBL{tbl1, ...}) = + HTRep.fold (fn ((_, v), accum) => f(v, accum)) init (! tbl1) + fun foldi f init (TBL{tbl1, ...}) = + HTRep.foldi (fn (k1, (k2, v), accum) => f(k1, k2, v, accum)) init (! tbl1) + +(* TODO: add modify and modifyi *) + + (* remove any hash table items that do not satisfy the given + * predicate. + *) + fun filter pred (TBL{tbl1, tbl2, n_items, ...}) = let + fun ins (k1, (k2, v)) = if (pred v) + then () + else ( + delete1 (!tbl1, Fail "filter", k1); + delete2 (!tbl2, Fail "filter", k2); + n_items := !n_items-1) + in + HTRep.appi ins (! tbl1) + end + fun filteri pred (TBL{tbl1, tbl2, n_items, not_found}) = let + fun ins (k1, (k2, v)) = if (pred(k1, k2, v)) + then () + else ( + delete1 (!tbl1, Fail "filteri", k1); + delete2 (!tbl2, Fail "filteri", k2); + n_items := !n_items-1) + in + HTRep.appi ins (! tbl1) + end + + (* Create a copy of a hash table *) + fun copy (TBL{tbl1, tbl2, n_items, not_found}) = TBL{ + tbl1 = ref(HTRep.copy (! tbl1)), + tbl2 = ref(HTRep.copy (! tbl2)), + n_items = ref(! n_items), + not_found = not_found + } + + (* returns a list of the sizes of the various buckets. This is to + * allow users to gauge the quality of their hashing function. + *) + fun bucketSizes (TBL{tbl1, tbl2, ...}) = + (HTRep.bucketSizes(! tbl1), HTRep.bucketSizes(! tbl2)) + + end (* MONO_HASH2_TABLE *) diff --git a/smlnj-lib/Util/int-binary-map.sml b/smlnj-lib/Util/int-binary-map.sml new file mode 100644 index 0000000..d718218 --- /dev/null +++ b/smlnj-lib/Util/int-binary-map.sml @@ -0,0 +1,516 @@ +(* int-binary-map.sml + * + * COPYRIGHT (c) 2012 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * COPYRIGHT (c) 1993 by AT&T Bell Laboratories. See COPYRIGHT file for details. + * + * This code was adapted from Stephen Adams' binary tree implementation + * of applicative integer sets. + * + * Copyright 1992 Stephen Adams. + * + * This software may be used freely provided that: + * 1. This copyright notice is attached to any copy, derived work, + * or work including all or part of this software. + * 2. Any derived work must contain a prominent notice stating that + * it has been altered from the original. + * + * + * Name(s): Stephen Adams. + * Department, Institution: Electronics & Computer Science, + * University of Southampton + * Address: Electronics & Computer Science + * University of Southampton + * Southampton SO9 5NH + * Great Britian + * E-mail: sra@ecs.soton.ac.uk + * + * Comments: + * + * 1. The implementation is based on Binary search trees of Bounded + * Balance, similar to Nievergelt & Reingold, SIAM J. Computing + * 2(1), March 1973. The main advantage of these trees is that + * they keep the size of the tree in the node, giving a constant + * time size operation. + * + * 2. The bounded balance criterion is simpler than N&R's alpha. + * Simply, one subtree must not have more than `weight' times as + * many elements as the opposite subtree. Rebalancing is + * guaranteed to reinstate the criterion for weight>2.23, but + * the occasional incorrect behaviour for weight=2 is not + * detrimental to performance. + * + * Altered to work as a geneal intmap - Emden Gansner + *) + +structure IntBinaryMap :> ORD_MAP where type Key.ord_key = Int.int = + struct + + structure Key = + struct + type ord_key = Int.int + val compare = Int.compare + end + + (* + ** val weight = 3 + ** fun wt i = weight * i + *) + fun wt (i : int) = i + i + i + + datatype 'a map + = E + | T of { + key : int, + value : 'a, + cnt : int, + left : 'a map, + right : 'a map + } + + fun isEmpty E = true + | isEmpty _ = false + + fun numItems E = 0 + | numItems (T{cnt,...}) = cnt + + (* return the first item in the map (or NONE if it is empty) *) + fun first E = NONE + | first (T{value, left=E, ...}) = SOME value + | first (T{left, ...}) = first left + + (* return the first item in the map and its key (or NONE if it is empty) *) + fun firsti E = NONE + | firsti (T{key, value, left=E, ...}) = SOME(key, value) + | firsti (T{left, ...}) = firsti left + + local + fun N(k,v,E,E) = T{key=k,value=v,cnt=1,left=E,right=E} + | N(k,v,E,r as T n) = T{key=k,value=v,cnt=1+(#cnt n),left=E,right=r} + | N(k,v,l as T n,E) = T{key=k,value=v,cnt=1+(#cnt n),left=l,right=E} + | N(k,v,l as T n,r as T n') = + T{key=k,value=v,cnt=1+(#cnt n)+(#cnt n'),left=l,right=r} + + fun single_L (a,av,x,T{key=b,value=bv,left=y,right=z,...}) = + N(b,bv,N(a,av,x,y),z) + | single_L _ = raise Match + fun single_R (b,bv,T{key=a,value=av,left=x,right=y,...},z) = + N(a,av,x,N(b,bv,y,z)) + | single_R _ = raise Match + fun double_L (a,av,w,T{key=c,value=cv,left=T{key=b,value=bv,left=x,right=y,...},right=z,...}) = + N(b,bv,N(a,av,w,x),N(c,cv,y,z)) + | double_L _ = raise Match + fun double_R (c,cv,T{key=a,value=av,left=w,right=T{key=b,value=bv,left=x,right=y,...},...},z) = + N(b,bv,N(a,av,w,x),N(c,cv,y,z)) + | double_R _ = raise Match + + fun T' (k,v,E,E) = T{key=k,value=v,cnt=1,left=E,right=E} + | T' (k,v,E,r as T{right=E,left=E,...}) = + T{key=k,value=v,cnt=2,left=E,right=r} + | T' (k,v,l as T{right=E,left=E,...},E) = + T{key=k,value=v,cnt=2,left=l,right=E} + + | T' (p as (_,_,E,T{left=T _,right=E,...})) = double_L p + | T' (p as (_,_,T{left=E,right=T _,...},E)) = double_R p + + (* these cases almost never happen with small weight*) + | T' (p as (_,_,E,T{left=T{cnt=ln,...},right=T{cnt=rn,...},...})) = + if ln < rn then single_L p else double_L p + | T' (p as (_,_,T{left=T{cnt=ln,...},right=T{cnt=rn,...},...},E)) = + if ln > rn then single_R p else double_R p + + | T' (p as (_,_,E,T{left=E,...})) = single_L p + | T' (p as (_,_,T{right=E,...},E)) = single_R p + + | T' (p as (k,v,l as T{cnt=ln,left=ll,right=lr,...}, + r as T{cnt=rn,left=rl,right=rr,...})) = + if rn >= wt ln then (*right is too big*) + let val rln = numItems rl + val rrn = numItems rr + in + if rln < rrn then single_L p else double_L p + end + + else if ln >= wt rn then (*left is too big*) + let val lln = numItems ll + val lrn = numItems lr + in + if lrn < lln then single_R p else double_R p + end + + else T{key=k,value=v,cnt=ln+rn+1,left=l,right=r} + + local + fun min (T{left=E,key,value,...}) = (key,value) + | min (T{left,...}) = min left + | min _ = raise Match + + fun delmin (T{left=E,right,...}) = right + | delmin (T{key,value,left,right,...}) = T'(key,value,delmin left,right) + | delmin _ = raise Match + in + fun delete' (E,r) = r + | delete' (l,E) = l + | delete' (l,r) = let + val (mink,minv) = min r + in + T'(mink,minv,l,delmin r) + end + end (* local *) + in + val empty = E + + fun singleton (x,v) = T{key=x,value=v,cnt=1,left=E,right=E} + + fun insert (E,x,v) = T{key=x,value=v,cnt=1,left=E,right=E} + | insert (T(set as {key,left,right,value,...}),x,v) = + if key > x then T'(key,value,insert(left,x,v),right) + else if key < x then T'(key,value,left,insert(right,x,v)) + else T{key=x,value=v,left=left,right=right,cnt= #cnt set} + fun insert' ((k, x), m) = insert(m, k, x) + + fun insertWithi comb (m, x, v) = let + fun insert E = T{key=x, value=v, cnt=1, left=E, right=E} + | insert (T{key, left, right, value, cnt}) = + if key > x then T'(key, value, insert left,right) + else if key < x then T'(key, value, left, insert right) + else let + val v' = comb(x, value, v) + in + T{key=x,value=v,left=left,right=right,cnt=cnt} + end + in + insert m + end + fun insertWith comb = insertWithi (fn (_, x1, x2) => comb(x1, x2)) + + fun inDomain (set, x) = let + fun mem E = false + | mem (T(n as {key,left,right,...})) = + if x > key then mem right + else if x < key then mem left + else true + in + mem set + end + + fun find (set, x) = let + fun mem E = NONE + | mem (T(n as {key,left,right,...})) = + if x > key then mem right + else if x < key then mem left + else SOME(#value n) + in + mem set + end + + fun lookup (set, x) = let + fun mem E = raise LibBase.NotFound + | mem (T(n as {key,left,right,...})) = + if x > key then mem right + else if x < key then mem left + else #value n + in + mem set + end + + fun remove (E,x) = raise LibBase.NotFound + | remove (set as T{key,left,right,value,...},x) = + if key > x then + let val (left',v) = remove(left,x) + in (T'(key,value,left',right),v) end + else if key < x then + let val (right',v) = remove(right,x) + in (T'(key,value,left,right'),v) end + else (delete'(left,right),value) + + fun findAndRemove arg = SOME(remove arg) handle LibBase.NotFound => NONE + + fun listItems d = let + fun d2l (E, l) = l + | d2l (T{key,value,left,right,...}, l) = + d2l(left, value::(d2l(right,l))) + in + d2l (d,[]) + end + + fun listItemsi d = let + fun d2l (E, l) = l + | d2l (T{key,value,left,right,...}, l) = + d2l(left, (key,value)::(d2l(right,l))) + in + d2l (d,[]) + end + + fun listKeys d = let + fun d2l (E, l) = l + | d2l (T{key,left,right,...}, l) = d2l(left, key::(d2l(right,l))) + in + d2l (d,[]) + end + + local + fun next ((t as T{right, ...})::rest) = (t, left(right, rest)) + | next _ = (E, []) + and left (E, rest) = rest + | left (t as T{left=l, ...}, rest) = left(l, t::rest) + fun start m = left(m, []) + in + fun equiv rngEq (m1, m2) = let + fun cmp (t1, t2) = (case (next t1, next t2) + of ((E, _), (E, _)) => true + | ((E, _), _) => false + | (_, (E, _)) => false + | ((T{key=xk, value=x, ...}, r1), (T{key=yk, value=y, ...}, r2)) => + (xk = yk) andalso rngEq(x, y) andalso cmp (r1, r2) + (* end case *)) + in + cmp (start m1, start m2) + end + fun collate rngCmp (m1, m2) = let + fun cmp (t1, t2) = (case (next t1, next t2) + of ((E, _), (E, _)) => EQUAL + | ((E, _), _) => LESS + | (_, (E, _)) => GREATER + | ((T{key=xk, value=x, ...}, r1), (T{key=yk, value=y, ...}, r2)) => + if (xk = yk) + then (case rngCmp(x, y) + of EQUAL => cmp (r1, r2) + | order => order + (* end case *)) + else if (xk < yk) + then LESS + else GREATER + (* end case *)) + in + cmp (start m1, start m2) + end + fun extends rngEx (m1, m2) = let + (* does t1 extend t2? *) + fun cmp (t1, t2) = (case (next t1, next t2) + of ((E, _), (E, _)) => true + | (_, (E, _)) => true + | ((E, _), _) => false + | ((T{key=xk, value=x, ...}, r1), (T{key=yk, value=y, ...}, r2)) => + if (xk < yk) then cmp (r1, t2) + else (yk = xk) andalso rngEx(x, y) andalso cmp (r1, r2) + (* end case *)) + in + cmp (start m1, start m2) + end + end (* local *) + + fun appi f d = let + fun appf E = () + | appf (T{key,value,left,right,...}) = ( + appf left; f(key,value); appf right) + in + appf d + end + fun app f d = appi (fn (_, v) => f v) d + + fun mapi f d = let + fun mapf E = E + | mapf (T{key,value,left,right,cnt}) = let + val left' = mapf left + val value' = f(key, value) + val right' = mapf right + in + T{cnt=cnt, key=key, value=value', left = left', right = right'} + end + in + mapf d + end + fun map f d = mapi (fn (_, x) => f x) d + + fun foldli f init d = let + fun fold (E,v) = v + | fold (T{key,value,left,right,...},v) = + fold (right, f(key, value, fold(left, v))) + in + fold (d, init) + end + fun foldl f init d = foldli (fn (_, v, accum) => f (v, accum)) init d + + fun foldri f init d = let + fun fold (E,v) = v + | fold (T{key,value,left,right,...},v) = + fold (left, f(key, value, fold(right, v))) + in + fold (d, init) + end + fun foldr f init d = foldri (fn (_, v, accum) => f (v, accum)) init d + + end (* local *) + +(* the following are generic implementations of the unionWith, intersectWith, + * and mergeWith operetions. These should be specialized for the internal + * representations at some point. + *) + fun unionWith f (m1, m2) = let + fun ins f (key, x, m) = (case find(m, key) + of NONE => insert(m, key, x) + | (SOME x') => insert(m, key, f(x, x')) + (* end case *)) + in + if (numItems m1 > numItems m2) + then foldli (ins (fn (a, b) => f (b, a))) m1 m2 + else foldli (ins f) m2 m1 + end + fun unionWithi f (m1, m2) = let + fun ins f (key, x, m) = (case find(m, key) + of NONE => insert(m, key, x) + | (SOME x') => insert(m, key, f(key, x, x')) + (* end case *)) + in + if (numItems m1 > numItems m2) + then foldli (ins (fn (k, a, b) => f (k, b, a))) m1 m2 + else foldli (ins f) m2 m1 + end + + fun intersectWith f (m1, m2) = let + (* iterate over the elements of m1, checking for membership in m2 *) + fun intersect f (m1, m2) = let + fun ins (key, x, m) = (case find(m2, key) + of NONE => m + | (SOME x') => insert(m, key, f(x, x')) + (* end case *)) + in + foldli ins empty m1 + end + in + if (numItems m1 > numItems m2) + then intersect f (m1, m2) + else intersect (fn (a, b) => f(b, a)) (m2, m1) + end + fun intersectWithi f (m1, m2) = let + (* iterate over the elements of m1, checking for membership in m2 *) + fun intersect f (m1, m2) = let + fun ins (key, x, m) = (case find(m2, key) + of NONE => m + | (SOME x') => insert(m, key, f(key, x, x')) + (* end case *)) + in + foldli ins empty m1 + end + in + if (numItems m1 > numItems m2) + then intersect f (m1, m2) + else intersect (fn (k, a, b) => f(k, b, a)) (m2, m1) + end + + fun mergeWith f (m1, m2) = let + fun merge ([], [], m) = m + | merge ((k1, x1)::r1, [], m) = mergef (k1, SOME x1, NONE, r1, [], m) + | merge ([], (k2, x2)::r2, m) = mergef (k2, NONE, SOME x2, [], r2, m) + | merge (m1 as ((k1, x1)::r1), m2 as ((k2, x2)::r2), m) = ( + if (k1 < k2) + then mergef (k1, SOME x1, NONE, r1, m2, m) + else if (k1 = k2) + then mergef (k1, SOME x1, SOME x2, r1, r2, m) + else mergef (k2, NONE, SOME x2, m1, r2, m) + (* end case *)) + and mergef (k, x1, x2, r1, r2, m) = (case f (x1, x2) + of NONE => merge (r1, r2, m) + | SOME y => merge (r1, r2, insert(m, k, y)) + (* end case *)) + in + merge (listItemsi m1, listItemsi m2, empty) + end + fun mergeWithi f (m1, m2) = let + fun merge ([], [], m) = m + | merge ((k1, x1)::r1, [], m) = mergef (k1, SOME x1, NONE, r1, [], m) + | merge ([], (k2, x2)::r2, m) = mergef (k2, NONE, SOME x2, [], r2, m) + | merge (m1 as ((k1, x1)::r1), m2 as ((k2, x2)::r2), m) = ( + if (k1 < k2) + then mergef (k1, SOME x1, NONE, r1, m2, m) + else if (k1 = k2) + then mergef (k1, SOME x1, SOME x2, r1, r2, m) + else mergef (k2, NONE, SOME x2, m1, r2, m) + (* end case *)) + and mergef (k, x1, x2, r1, r2, m) = (case f (k, x1, x2) + of NONE => merge (r1, r2, m) + | SOME y => merge (r1, r2, insert(m, k, y)) + (* end case *)) + in + merge (listItemsi m1, listItemsi m2, empty) + end + + (* this is a generic implementation of filter. It should + * be specialized to the data-structure at some point. + *) + fun filter predFn m = let + fun f (key, item, m) = if predFn item + then insert(m, key, item) + else m + in + foldli f empty m + end + fun filteri predFn m = let + fun f (key, item, m) = if predFn(key, item) + then insert(m, key, item) + else m + in + foldli f empty m + end + + (* this is a generic implementation of mapPartial. It should + * be specialized to the data-structure at some point. + *) + fun mapPartial f m = let + fun g (key, item, m) = (case f item + of NONE => m + | (SOME item') => insert(m, key, item') + (* end case *)) + in + foldli g empty m + end + fun mapPartiali f m = let + fun g (key, item, m) = (case f(key, item) + of NONE => m + | (SOME item') => insert(m, key, item') + (* end case *)) + in + foldli g empty m + end + + (* check the elements of a map with a predicate and return true if + * any element satisfies the predicate. Return false otherwise. + * Elements are checked in key order. + *) + fun exists pred = let + fun exists' E = false + | exists' (T{value, left, right, ...}) = + exists' left orelse pred value orelse exists' right + in + exists' + end + fun existsi pred = let + fun exists' E = false + | exists' (T{key, value, left, right, ...}) = + exists' left orelse pred(key, value) orelse exists' right + in + exists' + end + + (* check the elements of a map with a predicate and return true if + * they all satisfy the predicate. Return false otherwise. Elements + * are checked in key order. + *) + fun all pred = let + fun all' E = true + | all' (T{value, left, right, ...}) = + all' left andalso pred value andalso all' right + in + all' + end + fun alli pred = let + fun all' E = true + | all' (T{key, value, left, right, ...}) = + all' left andalso pred(key, value) andalso all' right + in + all' + end + + end diff --git a/smlnj-lib/Util/int-binary-set.sml b/smlnj-lib/Util/int-binary-set.sml new file mode 100644 index 0000000..617aa57 --- /dev/null +++ b/smlnj-lib/Util/int-binary-set.sml @@ -0,0 +1,476 @@ +(* int-binary-set.sml + * + * COPYRIGHT (c) 1993 by AT&T Bell Laboratories. See COPYRIGHT file for details. + * + * This code was adapted from Stephen Adams' binary tree implementation + * of applicative integer sets. + * + * Copyright 1992 Stephen Adams. + * + * This software may be used freely provided that: + * 1. This copyright notice is attached to any copy, derived work, + * or work including all or part of this software. + * 2. Any derived work must contain a prominent notice stating that + * it has been altered from the original. + * + * Altered to conform to SML library interface - Emden Gansner + * + * + * Name(s): Stephen Adams. + * Department, Institution: Electronics & Computer Science, + * University of Southampton + * Address: Electronics & Computer Science + * University of Southampton + * Southampton SO9 5NH + * Great Britian + * E-mail: sra@ecs.soton.ac.uk + * + * Comments: + * + * 1. The implementation is based on Binary search trees of Bounded + * Balance, similar to Nievergelt & Reingold, SIAM J. Computing + * 2(1), March 1973. The main advantage of these trees is that + * they keep the size of the tree in the node, giving a constant + * time size operation. + * + * 2. The bounded balance criterion is simpler than N&R's alpha. + * Simply, one subtree must not have more than `weight' times as + * many elements as the opposite subtree. Rebalancing is + * guaranteed to reinstate the criterion for weight>2.23, but + * the occasional incorrect behaviour for weight=2 is not + * detrimental to performance. + * + * 3. There are two implementations of union. The default, + * hedge_union, is much more complex and usually 20% faster. I + * am not sure that the performance increase warrants the + * complexity (and time it took to write), but I am leaving it + * in for the competition. It is derived from the original + * union by replacing the split_lt(gt) operations with a lazy + * version. The `obvious' version is called old_union. + * + * 4. Most time is spent in T', the rebalancing constructor. If my + * understanding of the output of * in the sml batch + * compiler is correct then the code produced by NJSML 0.75 + * (sparc) for the final case is very disappointing. Most + * invocations fall through to this case and most of these cases + * fall to the else part, i.e. the plain contructor, + * T(v,ln+rn+1,l,r). The poor code allocates a 16 word vector + * and saves lots of registers into it. In the common case it + * then retrieves a few of the registers and allocates the 5 + * word T node. The values that it retrieves were live in + * registers before the massive save. + *) + +structure IntBinarySet :> ORD_SET where type Key.ord_key = Int.int = + struct + + structure Key = + struct + type ord_key = Int.int + val compare = Int.compare + end + + type item = Key.ord_key + + datatype set + = E + | T of { + elt : item, + cnt : int, + left : set, + right : set + } + + fun numItems E = 0 + | numItems (T{cnt,...}) = cnt + + fun isEmpty E = true + | isEmpty _ = false + + fun minItem E = raise Empty + | minItem (T{elt, left=E, ...}) = elt + | minItem (T{left, ...}) = minItem left + + fun maxItem E = raise Empty + | maxItem (T{elt, right=E, ...}) = elt + | maxItem (T{right, ...}) = maxItem right + + fun mkT(v,n,l,r) = T{elt=v,cnt=n,left=l,right=r} + + (* N(v,l,r) = T(v,1+numItems(l)+numItems(r),l,r) *) + fun N(v,E,E) = mkT(v,1,E,E) + | N(v,E,r as T{cnt=n,...}) = mkT(v,n+1,E,r) + | N(v,l as T{cnt=n,...}, E) = mkT(v,n+1,l,E) + | N(v,l as T{cnt=n,...}, r as T{cnt=m,...}) = mkT(v,n+m+1,l,r) + + fun single_L (a,x,T{elt=b,left=y,right=z,...}) = N(b,N(a,x,y),z) + | single_L _ = raise Match + fun single_R (b,T{elt=a,left=x,right=y,...},z) = N(a,x,N(b,y,z)) + | single_R _ = raise Match + fun double_L (a,w,T{elt=c,left=T{elt=b,left=x,right=y,...},right=z,...}) = + N(b,N(a,w,x),N(c,y,z)) + | double_L _ = raise Match + fun double_R (c,T{elt=a,left=w,right=T{elt=b,left=x,right=y,...},...},z) = + N(b,N(a,w,x),N(c,y,z)) + | double_R _ = raise Match + + (* + ** val weight = 3 + ** fun wt i = weight * i + *) + fun wt (i : int) = i + i + i + + fun T' (v,E,E) = mkT(v,1,E,E) + | T' (v,E,r as T{left=E,right=E,...}) = mkT(v,2,E,r) + | T' (v,l as T{left=E,right=E,...},E) = mkT(v,2,l,E) + + | T' (p as (_,E,T{left=T _,right=E,...})) = double_L p + | T' (p as (_,T{left=E,right=T _,...},E)) = double_R p + + (* these cases almost never happen with small weight*) + | T' (p as (_,E,T{left=T{cnt=ln,...},right=T{cnt=rn,...},...})) = + if lnrn then single_R p else double_R p + + | T' (p as (_,E,T{left=E,...})) = single_L p + | T' (p as (_,T{right=E,...},E)) = single_R p + + | T' (p as (v,l as T{elt=lv,cnt=ln,left=ll,right=lr}, + r as T{elt=rv,cnt=rn,left=rl,right=rr})) = + if rn >= wt ln (*right is too big*) + then + let val rln = numItems rl + val rrn = numItems rr + in + if rln < rrn then single_L p else double_L p + end + else if ln >= wt rn (*left is too big*) + then + let val lln = numItems ll + val lrn = numItems lr + in + if lrn < lln then single_R p else double_R p + end + else mkT(v,ln+rn+1,l,r) + + fun add (E,x) = mkT(x,1,E,E) + | add (set as T{elt=v,left=l,right=r,cnt},x) = ( + case Key.compare(x,v) + of LESS => T'(v,add(l,x),r) + | GREATER => T'(v,l,add(r,x)) + | EQUAL => mkT(x,cnt,l,r) + (* end case *)) + fun add' (s, x) = add(x, s) + + fun concat3 (E,v,r) = add(r,v) + | concat3 (l,v,E) = add(l,v) + | concat3 (l as T{elt=v1,cnt=n1,left=l1,right=r1}, v, + r as T{elt=v2,cnt=n2,left=l2,right=r2}) = + if wt n1 < n2 then T'(v2,concat3(l,v,l2),r2) + else if wt n2 < n1 then T'(v1,l1,concat3(r1,v,r)) + else N(v,l,r) + + fun split_lt (E,x) = E + | split_lt (T{elt=v,left=l,right=r,...},x) = + case Key.compare(v,x) of + GREATER => split_lt(l,x) + | LESS => concat3(l,v,split_lt(r,x)) + | _ => l + + fun split_gt (E,x) = E + | split_gt (T{elt=v,left=l,right=r,...},x) = + case Key.compare(v,x) of + LESS => split_gt(r,x) + | GREATER => concat3(split_gt(l,x),v,r) + | _ => r + + fun min (T{elt=v,left=E,...}) = v + | min (T{left=l,...}) = min l + | min _ = raise Match + + fun delmin (T{left=E,right=r,...}) = r + | delmin (T{elt=v,left=l,right=r,...}) = T'(v,delmin l,r) + | delmin _ = raise Match + + fun delete' (E,r) = r + | delete' (l,E) = l + | delete' (l,r) = T'(min r,l,delmin r) + + fun concat (E, s) = s + | concat (s, E) = s + | concat (t1 as T{elt=v1,cnt=n1,left=l1,right=r1}, + t2 as T{elt=v2,cnt=n2,left=l2,right=r2}) = + if wt n1 < n2 then T'(v2,concat(t1,l2),r2) + else if wt n2 < n1 then T'(v1,l1,concat(r1,t2)) + else T'(min t2,t1, delmin t2) + + + local + fun trim (lo,hi,E) = E + | trim (lo,hi,s as T{elt=v,left=l,right=r,...}) = + if (v > lo) + then if (v < hi) then s else trim(lo,hi,l) + else trim(lo,hi,r) + + fun uni_bd (s,E,_,_) = s + | uni_bd (E,T{elt=v,left=l,right=r,...},lo,hi) = + concat3(split_gt(l,lo),v,split_lt(r,hi)) + | uni_bd (T{elt=v,left=l1,right=r1,...}, + s2 as T{elt=v2,left=l2,right=r2,...},lo,hi) = + concat3(uni_bd(l1,trim(lo,v,s2),lo,v), + v, + uni_bd(r1,trim(v,hi,s2),v,hi)) + (* inv: lo < v < hi *) + + (* all the other versions of uni and trim are + * specializations of the above two functions with + * lo=-infinity and/or hi=+infinity + *) + + fun trim_lo (_, E) = E + | trim_lo (lo,s as T{elt=v,right=r,...}) = + case Key.compare(v,lo) of + GREATER => s + | _ => trim_lo(lo,r) + + fun trim_hi (_, E) = E + | trim_hi (hi,s as T{elt=v,left=l,...}) = + case Key.compare(v,hi) of + LESS => s + | _ => trim_hi(hi,l) + + fun uni_hi (s,E,_) = s + | uni_hi (E,T{elt=v,left=l,right=r,...},hi) = + concat3(l,v,split_lt(r,hi)) + | uni_hi (T{elt=v,left=l1,right=r1,...}, + s2 as T{elt=v2,left=l2,right=r2,...},hi) = + concat3(uni_hi(l1,trim_hi(v,s2),v),v,uni_bd(r1,trim(v,hi,s2),v,hi)) + + fun uni_lo (s,E,_) = s + | uni_lo (E,T{elt=v,left=l,right=r,...},lo) = + concat3(split_gt(l,lo),v,r) + | uni_lo (T{elt=v,left=l1,right=r1,...}, + s2 as T{elt=v2,left=l2,right=r2,...},lo) = + concat3(uni_bd(l1,trim(lo,v,s2),lo,v),v,uni_lo(r1,trim_lo(v,s2),v)) + + fun uni (s,E) = s + | uni (E,s) = s + | uni (T{elt=v,left=l1,right=r1,...}, + s2 as T{elt=v2,left=l2,right=r2,...}) = + concat3(uni_hi(l1,trim_hi(v,s2),v), v, uni_lo(r1,trim_lo(v,s2),v)) + + in + val hedge_union = uni + end + + (* The old_union version is about 20% slower than + * hedge_union in most cases + *) + fun old_union (E,s2) = s2 + | old_union (s1,E) = s1 + | old_union (T{elt=v,left=l,right=r,...},s2) = + let val l2 = split_lt(s2,v) + val r2 = split_gt(s2,v) + in + concat3(old_union(l,l2),v,old_union(r,r2)) + end + + val empty = E + fun singleton x = T{elt=x,cnt=1,left=E,right=E} + + fun addList (s,l) = List.foldl (fn (i,s) => add(s,i)) s l + + fun fromList l = addList (E, l) + + val add = add + + fun member (set, x) = let + fun pk E = false + | pk (T{elt=v, left=l, right=r, ...}) = ( + case Key.compare(x,v) + of LESS => pk l + | EQUAL => true + | GREATER => pk r + (* end case *)) + in + pk set + end + + local + (* true if every item in t is in t' *) + fun treeIn (t,t') = let + fun isIn E = true + | isIn (T{elt,left=E,right=E,...}) = member(t',elt) + | isIn (T{elt,left,right=E,...}) = + member(t',elt) andalso isIn left + | isIn (T{elt,left=E,right,...}) = + member(t',elt) andalso isIn right + | isIn (T{elt,left,right,...}) = + member(t',elt) andalso isIn left andalso isIn right + in + isIn t + end + in + fun isSubset (E,_) = true + | isSubset (_,E) = false + | isSubset (t as T{cnt=n,...},t' as T{cnt=n',...}) = + (n<=n') andalso treeIn (t,t') + + fun equal (E,E) = true + | equal (t as T{cnt=n,...},t' as T{cnt=n',...}) = + (n=n') andalso treeIn (t,t') + | equal _ = false + end + + local + fun next ((t as T{right, ...})::rest) = (t, left(right, rest)) + | next _ = (E, []) + and left (E, rest) = rest + | left (t as T{left=l, ...}, rest) = left(l, t::rest) + in + fun compare (s1, s2) = let + fun cmp (t1, t2) = (case (next t1, next t2) + of ((E, _), (E, _)) => EQUAL + | ((E, _), _) => LESS + | (_, (E, _)) => GREATER + | ((T{elt=e1, ...}, r1), (T{elt=e2, ...}, r2)) => ( + case Key.compare(e1, e2) + of EQUAL => cmp (r1, r2) + | order => order + (* end case *)) + (* end case *)) + in + cmp (left(s1, []), left(s2, [])) + end + + fun disjoint (s1, s2) = let + fun walk (t1, t2) = (case (next t1, next t2) + of ((E, _), _) => true + | (_, (E, _)) => true + | ((T{elt=e1, ...}, r1), (T{elt=e2, ...}, r2)) => + ((e1 < e2) andalso walk (r1, t2)) + orelse ((e1 > e2) andalso walk (t1, r2)) + (* end case *)) + in + walk (left(s1, []), left(s2, [])) + end + end + + fun delete (E,x) = raise LibBase.NotFound + | delete (set as T{elt=v,left=l,right=r,...},x) = + case Key.compare(x,v) of + LESS => T'(v,delete(l,x),r) + | GREATER => T'(v,l,delete(r,x)) + | _ => delete'(l,r) + + val union = hedge_union + + fun intersection (E, _) = E + | intersection (_, E) = E + | intersection (s, T{elt=v,left=l,right=r,...}) = let + val l2 = split_lt(s,v) + val r2 = split_gt(s,v) + in + if member(s,v) + then concat3(intersection(l2,l),v,intersection(r2,r)) + else concat(intersection(l2,l),intersection(r2,r)) + end + + fun difference (E,s) = E + | difference (s,E) = s + | difference (s, T{elt=v,left=l,right=r,...}) = + let val l2 = split_lt(s,v) + val r2 = split_gt(s,v) + in + concat(difference(l2,l),difference(r2,r)) + end + + fun subtract (s, item) = difference (s, singleton item) + fun subtract' (item, s) = subtract (s, item) + + fun subtractList (l, items) = let + val items' = List.foldl (fn (x, set) => add(set, x)) E items + in + difference (l, items') + end + + fun map f set = let + fun map'(acc, E) = acc + | map'(acc, T{elt,left,right,...}) = + map' (add (map' (acc, left), f elt), right) + in + map' (E, set) + end + + fun mapPartial f set = let + fun map' (acc, E) = acc + | map' (acc, T{elt, left, right, ...}) = let + val acc = map' (acc, left) + in + case f elt + of NONE => map' (acc, right) + | SOME elt' => map' (add (acc, elt'), right) + (* end case *) + end + in + map' (E, set) + end + + fun app apf = + let fun apply E = () + | apply (T{elt,left,right,...}) = + (apply left;apf elt; apply right) + in + apply + end + + fun foldl f b set = let + fun foldf (E, b) = b + | foldf (T{elt,left,right,...}, b) = + foldf (right, f(elt, foldf (left, b))) + in + foldf (set, b) + end + + fun foldr f b set = let + fun foldf (E, b) = b + | foldf (T{elt,left,right,...}, b) = + foldf (left, f(elt, foldf (right, b))) + in + foldf (set, b) + end + + fun toList set = foldr (op::) [] set + + fun filter pred set = + foldl (fn (item, s) => if (pred item) then add(s, item) else s) + empty set + + fun partition pred set = + foldl + (fn (item, (s1, s2)) => + if (pred item) then (add(s1, item), s2) else (s1, add(s2, item)) + ) + (empty, empty) set + + fun exists p E = false + | exists p (T{elt, left, right,...}) = + (exists p left) orelse (p elt) orelse (exists p right) + + fun all p E = true + | all p (T{elt, left, right,...}) = + (all p left) andalso (p elt) andalso (all p right) + + fun find p E = NONE + | find p (T{elt,left,right,...}) = (case find p left + of NONE => if (p elt) + then SOME elt + else find p right + | a => a + (* end case *)) + + (* deprecated *) + val listItems = toList + + end (* IntBinarySet *) diff --git a/smlnj-lib/Util/int-hash-table.sml b/smlnj-lib/Util/int-hash-table.sml new file mode 100644 index 0000000..f13ac40 --- /dev/null +++ b/smlnj-lib/Util/int-hash-table.sml @@ -0,0 +1,228 @@ +(* int-hash-table.sml + * + * COPYRIGHT (c) 2024 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * A specialization of the hash table functor to integer keys. + * + * AUTHOR: John Reppy + * University of Chicago + * https://cs.uchicago.edu/~jhr + *) + +structure IntHashTable :> MONO_HASH_TABLE where type Key.hash_key = int = + struct + + structure Key = + struct + type hash_key = int + fun sameKey (a : int, b) = (a = b) + fun hashVal a = Word.fromInt a + end + + open Key + + structure HTRep = HashTableRep + + datatype 'a hash_table = HT of { + not_found : exn, + table : (hash_key, 'a) HTRep.table ref, + n_items : int ref + } + + fun index (i, sz) = Word.toIntX(Word.andb(i, Word.fromInt sz - 0w1)) + + (* Create a new table; the int is a size hint and the exception + * is to be raised by find. + *) + fun mkTable (sizeHint, notFound) = HT{ + not_found = notFound, + table = ref (HTRep.alloc sizeHint), + n_items = ref 0 + } + + (* remove all elements from the table *) + fun clear (HT{table, n_items, ...}) = (HTRep.clear(!table); n_items := 0) + + fun insertWithi combine (tbl as HT{table, n_items, ...}) (key, item) = let + val arr = !table + val sz = Array.length arr + val hash = hashVal key + val indx = index (hash, sz) + fun look HTRep.NIL = ( + Array.update(arr, indx, HTRep.B(hash, key, item, Array.sub(arr, indx))); + n_items := !n_items + 1; + HTRep.growTableIfNeeded (table, !n_items); + HTRep.NIL) + | look (HTRep.B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k)) + then HTRep.B(hash, key, combine(k, v, item), r) + else (case (look r) + of HTRep.NIL => HTRep.NIL + | rest => HTRep.B(h, k, v, rest) + (* end case *)) + in + case (look (Array.sub (arr, indx))) + of HTRep.NIL => () + | b => Array.update(arr, indx, b) + (* end case *) + end + + fun insertWith combine = insertWithi (fn (_, v1, v2) => combine(v1, v2)) + + (* Insert an item. If the key already has an item associated with it, + * then the old item is discarded. + *) + fun insert (tbl as HT{table, n_items, ...}) (key, item) = let + val arr = !table + val sz = Array.length arr + val hash = hashVal key + val indx = index (hash, sz) + fun look HTRep.NIL = ( + Array.update(arr, indx, HTRep.B(hash, key, item, Array.sub(arr, indx))); + n_items := !n_items + 1; + HTRep.growTableIfNeeded (table, !n_items); + HTRep.NIL) + | look (HTRep.B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k)) + then HTRep.B(hash, key, item, r) + else (case (look r) + of HTRep.NIL => HTRep.NIL + | rest => HTRep.B(h, k, v, rest) + (* end case *)) + in + case (look (Array.sub (arr, indx))) + of HTRep.NIL => () + | b => Array.update(arr, indx, b) + (* end case *) + end + + (* return true, if the key is in the domain of the table *) + fun inDomain (HT{table, ...}) key = let + val arr = !table + val hash = hashVal key + val indx = index (hash, Array.length arr) + fun look HTRep.NIL = false + | look (HTRep.B(h, k, v, r)) = + ((hash = h) andalso sameKey(key, k)) orelse look r + in + look (Array.sub (arr, indx)) + end + + (* find an item, the table's exception is raised if the item doesn't exist *) + fun lookup (HT{table, not_found, ...}) key = let + val arr = !table + val hash = hashVal key + val indx = index (hash, Array.length arr) + fun look HTRep.NIL = raise not_found + | look (HTRep.B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k)) + then v + else look r + in + look (Array.sub (arr, indx)) + end + + (* look for an item, return NONE if the item doesn't exist *) + fun find (HT{table, ...}) key = let + val arr = !table + val sz = Array.length arr + val hash = hashVal key + val indx = index (hash, sz) + fun look HTRep.NIL = NONE + | look (HTRep.B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k)) + then SOME v + else look r + in + look (Array.sub (arr, indx)) + end + + fun findAndRemove (HT{not_found, table, n_items}) key = let + val arr = !table + val sz = Array.length arr + val hash = hashVal key + val indx = index (hash, sz) + fun look HTRep.NIL = raise not_found + | look (HTRep.B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k)) + then (v, r) + else let + val (v', r') = look r + in + (v', HTRep.B(h, k, v, r')) + end + val (v, bucket) = look (Array.sub (arr, indx)) + in + Array.update (arr, indx, bucket); SOME v + end + handle _ => NONE + + (* Remove an item. The table's exception is raised if + * the item doesn't exist. + *) + fun remove (HT{not_found, table, n_items}) key = let + val arr = !table + val sz = Array.length arr + val hash = hashVal key + val indx = index (hash, sz) + fun look HTRep.NIL = raise not_found + | look (HTRep.B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k)) + then (v, r) + else let val (item, r') = look r in (item, HTRep.B(h, k, v, r')) end + val (item, bucket) = look (Array.sub (arr, indx)) + in + Array.update (arr, indx, bucket); + n_items := !n_items - 1; + item + end (* remove *) + + (* Return the number of items in the table *) + fun numItems (HT{n_items, ...}) = !n_items + + (* return a list of the items in the table *) + fun listItems (HT{table = ref arr, n_items, ...}) = + HTRep.listItems (arr, n_items) + fun listItemsi (HT{table = ref arr, n_items, ...}) = + HTRep.listItemsi (arr, n_items) + + (* Apply a function to the entries of the table *) + fun appi f (HT{table, ...}) = HTRep.appi f (! table) + fun app f (HT{table, ...}) = HTRep.app f (! table) + + (* Map a table to a new table that has the same keys and exception *) + fun mapi f (HT{table, n_items, not_found}) = HT{ + table = ref(HTRep.mapi f (! table)), + n_items = ref(!n_items), + not_found = not_found + } + fun map f (HT{table, n_items, not_found}) = HT{ + table = ref(HTRep.map f (! table)), + n_items = ref(!n_items), + not_found = not_found + } + + (* Fold a function over the entries of the table *) + fun foldi f init (HT{table, ...}) = HTRep.foldi f init (! table) + fun fold f init (HT{table, ...}) = HTRep.fold f init (! table) + + (* modify the hash-table items in place *) + fun modifyi f (HT{table, ...}) = HTRep.modifyi f (!table) + fun modify f (HT{table, ...}) = HTRep.modify f (!table) + + (* remove any hash table items that do not satisfy the given + * predicate. + *) + fun filteri pred (HT{table, n_items, ...}) = + n_items := HTRep.filteri pred (! table) + fun filter pred (HT{table, n_items, ...}) = + n_items := HTRep.filter pred (! table) + + (* Create a copy of a hash table *) + fun copy (HT{table, n_items, not_found}) = HT{ + table = ref(HTRep.copy(! table)), + n_items = ref(!n_items), + not_found = not_found + } + + (* returns a list of the sizes of the various buckets. This is to + * allow users to gauge the quality of their hashing function. + *) + fun bucketSizes (HT{table, ...}) = HTRep.bucketSizes (! table) + + end (* HashTableFn *) diff --git a/smlnj-lib/Util/int-list-map.sml b/smlnj-lib/Util/int-list-map.sml new file mode 100644 index 0000000..2ce7928 --- /dev/null +++ b/smlnj-lib/Util/int-list-map.sml @@ -0,0 +1,325 @@ +(* int-list-map.sml + * + * COPYRIGHT (c) 2012 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * COPYRIGHT (c) 1996 by AT&T Research. See COPYRIGHT file for details. + * + * An implementation of finite maps on integer keys, which uses a sorted list + * representation. + *) + +structure IntListMap :> ORD_MAP where type Key.ord_key = Int.int = + struct + + structure Key = + struct + type ord_key = int + val compare = Int.compare + end + + type 'a map = (int * 'a) list + + val empty = [] + + fun isEmpty [] = true + | isEmpty _ = false + + (* return the first item in the map (or NONE if it is empty) *) + fun first [] = NONE + | first ((_, value)::_) = SOME value + + (* return the first item in the map and its key (or NONE if it is empty) *) + fun firsti [] = NONE + | firsti ((key, value)::_) = SOME(key, value) + + fun singleton (key, item) = [(key, item)] + + fun insert (l, key, item) = let + fun f [] = [(key, item)] + | f ((elem as (key', _))::r) = (case Key.compare(key, key') + of LESS => (key, item) :: elem :: r + | EQUAL => (key, item) :: r + | GREATER => elem :: (f r) + (* end case *)) + in + f l + end + fun insert' ((k, x), m) = insert(m, k, x) + + fun insertWithi comb (l, key, item) = let + fun f [] = [(key, item)] + | f ((elem as (key', item'))::r) = (case Key.compare(key, key') + of LESS => (key, item) :: elem :: r + | EQUAL => (key, comb(key, item', item)) :: r + | GREATER => elem :: (f r) + (* end case *)) + in + f l + end + fun insertWith comb = insertWithi (fn (_, x1, x2) => comb(x1, x2)) + + (* return true if the key is in the map's domain *) + fun inDomain (l, key) = let + fun f [] = false + | f ((key', x) :: r) = (key' <= key) andalso ((key' = key) orelse f r) + in + f l + end + + (* Look for an item, return NONE if the item doesn't exist *) + fun find (l, key) = let + fun f [] = NONE + | f ((key', x) :: r) = + if (key < key') then NONE + else if (key = key') then SOME x + else f r + in + f l + end + + (* Look for an item, raise NotFound if the item doesn't exist *) + fun lookup (l, key) = let + fun f [] = raise LibBase.NotFound + | f ((key', x) :: r) = + if (key' < key) then f r + else if (key' = key) then x + else raise LibBase.NotFound + in + f l + end + + (* Remove an item, returning new map and value removed. + * Raise LibBase.NotFound if not found. + *) + fun remove (l, key) = let + fun f (_, []) = raise LibBase.NotFound + | f (prefix, (elem as (key', x)) :: r) = + if (key' < key) then f(elem :: prefix, r) + else if (key' = key) then (List.revAppend(prefix, r), x) + else raise LibBase.NotFound + in + f ([], l) + end + + fun findAndRemove (l, key) = let + fun f (_, []) = raise LibBase.NotFound + | f (prefix, (elem as (key', x)) :: r) = + if (key' < key) then f(elem :: prefix, r) + else if (key' = key) then SOME(List.revAppend(prefix, r), x) + else NONE + in + f ([], l) + end + + (* Return the number of items in the map *) + fun numItems l = List.length l + + (* Return a list of the items (and their keys) in the map *) + fun listItems (l : 'a map) = List.map #2 l + fun listItemsi l = l + + fun listKeys (l : 'a map) = List.map #1 l + + fun equiv rngEq = let + fun cmp ([], []) = true + | cmp ((xk, x)::xr, (yk, y)::yr) = + (xk = yk) andalso rngEq(x, y) andalso cmp(xr, yr) + | cmp _ = false + in + cmp + end + + fun collate cmpRng = let + fun cmp ([], []) = EQUAL + | cmp ([], _) = LESS + | cmp (_, []) = GREATER + | cmp (xs as ((xk, x)::xr), (yk, y)::yr) = + if (xk = yk) + then (case cmpRng(x, y) + of EQUAL => cmp (xr, yr) + | order => order + (* end case *)) + else if (xk < yk) + then LESS + else GREATER + in + cmp + end + + fun extends rngEx = let + fun cmp ([], []) = true + | cmp (_, []) = true + | cmp ([], _) = false + | cmp ((xk, x)::xr, ys as ((yk, y)::yr)) = + if (xk < yk) then cmp (xr, ys) + else (xk = yk) andalso rngEx(x, y) andalso cmp (xr, yr) + in + cmp + end + + (* return a map whose domain is the union of the domains of the two input + * maps, using the supplied function to define the map on elements that + * are in both domains. + *) + fun unionWith f (m1 : 'a map, m2 : 'a map) = let + fun merge ([], [], l) = List.rev l + | merge ([], m2, l) = List.revAppend(l, m2) + | merge (m1, [], l) = List.revAppend(l, m1) + | merge (m1 as ((k1, x1)::r1), m2 as ((k2, x2)::r2), l) = ( + case Key.compare (k1, k2) + of LESS => merge (r1, m2, (k1, x1)::l) + | EQUAL => merge (r1, r2, (k1, f(x1, x2)) :: l) + | GREATER => merge (m1, r2, (k2, x2)::l) + (* end case *)) + in + merge (m1, m2, []) + end + fun unionWithi f (m1 : 'a map, m2 : 'a map) = let + fun merge ([], [], l) = List.rev l + | merge ([], m2, l) = List.revAppend(l, m2) + | merge (m1, [], l) = List.revAppend(l, m1) + | merge (m1 as ((k1, x1)::r1), m2 as ((k2, x2)::r2), l) = ( + case Key.compare (k1, k2) + of LESS => merge (r1, m2, (k1, x1)::l) + | EQUAL => merge (r1, r2, (k1, f(k1, x1, x2)) :: l) + | GREATER => merge (m1, r2, (k2, x2)::l) + (* end case *)) + in + merge (m1, m2, []) + end + + (* return a map whose domain is the intersection of the domains of the + * two input maps, using the supplied function to define the range. + *) + fun intersectWith f (m1 : 'a map, m2 : 'b map) = let + fun merge (m1 as ((k1, x1)::r1), m2 as ((k2, x2)::r2), l) = ( + case Key.compare (k1, k2) + of LESS => merge (r1, m2, l) + | EQUAL => merge (r1, r2, (k1, f(x1, x2)) :: l) + | GREATER => merge (m1, r2, l) + (* end case *)) + | merge (_, _, l) = List.rev l + in + merge (m1, m2, []) + end + fun intersectWithi f (m1 : 'a map, m2 : 'b map) = let + fun merge (m1 as ((k1, x1)::r1), m2 as ((k2, x2)::r2), l) = ( + case Key.compare (k1, k2) + of LESS => merge (r1, m2, l) + | EQUAL => merge (r1, r2, (k1, f(k1, x1, x2)) :: l) + | GREATER => merge (m1, r2, l) + (* end case *)) + | merge (_, _, l) = List.rev l + in + merge (m1, m2, []) + end + + fun mergeWith f (m1 : 'a map, m2 : 'b map) = let + fun merge (m1 as ((k1, x1)::r1), m2 as ((k2, x2)::r2), l) = + if (k1 < k2) + then mergef (k1, SOME x1, NONE, r1, m2, l) + else if (k1 = k2) + then mergef (k1, SOME x1, SOME x2, r1, r2, l) + else mergef (k2, NONE, SOME x2, m1, r2, l) + | merge ([], [], l) = List.rev l + | merge ((k1, x1)::r1, [], l) = mergef (k1, SOME x1, NONE, r1, [], l) + | merge ([], (k2, x2)::r2, l) = mergef (k2, NONE, SOME x2, [], r2, l) + and mergef (k, x1, x2, r1, r2, l) = (case f (x1, x2) + of NONE => merge (r1, r2, l) + | SOME y => merge (r1, r2, (k, y)::l) + (* end case *)) + in + merge (m1, m2, []) + end + fun mergeWithi f (m1 : 'a map, m2 : 'b map) = let + fun merge (m1 as ((k1, x1)::r1), m2 as ((k2, x2)::r2), l) = + if (k1 < k2) + then mergef (k1, SOME x1, NONE, r1, m2, l) + else if (k1 = k2) + then mergef (k1, SOME x1, SOME x2, r1, r2, l) + else mergef (k2, NONE, SOME x2, m1, r2, l) + | merge ([], [], l) = List.rev l + | merge ((k1, x1)::r1, [], l) = mergef (k1, SOME x1, NONE, r1, [], l) + | merge ([], (k2, x2)::r2, l) = mergef (k2, NONE, SOME x2, [], r2, l) + and mergef (k, x1, x2, r1, r2, l) = (case f (k, x1, x2) + of NONE => merge (r1, r2, l) + | SOME y => merge (r1, r2, (k, y)::l) + (* end case *)) + in + merge (m1, m2, []) + end + + (* Apply a function to the entries of the map in map order. *) + val appi = List.app + fun app f l = appi (fn (_, item) => f item) l + + (* Create a new table by applying a map function to the + * name/value pairs in the table. + *) + fun mapi f l = List.map (fn (key, item) => (key, f(key, item))) l + fun map f l = List.map (fn (key, item) => (key, f item)) l + + (* Apply a folding function to the entries of the map + * in increasing map order. + *) + fun foldli f init l = + List.foldl (fn ((key, item), accum) => f(key, item, accum)) init l + fun foldl f init l = List.foldl (fn ((_, item), accum) => f(item, accum)) init l + + (* Apply a folding function to the entries of the map + * in decreasing map order. + *) + fun foldri f init l = + List.foldr (fn ((key, item), accum) => f(key, item, accum)) init l + fun foldr f init l = List.foldr (fn ((_, item), accum) => f(item, accum)) init l + + fun filter pred l = List.filter (fn (_, item) => pred item) l + fun filteri pred l = List.filter pred l + + fun mapPartiali f l = let + fun f' (key, item) = (case f (key, item) + of NONE => NONE + | SOME y => SOME(key, y) + (* end case *)) + in + List.mapPartial f' l + end + fun mapPartial f l = mapPartiali (fn (_, item) => f item) l + + (* check the elements of a map with a predicate and return true if + * any element satisfies the predicate. Return false otherwise. + * Elements are checked in key order. + *) + fun exists pred = let + fun exists' [] = false + | exists' ((_, x)::r) = pred x orelse exists' r + in + exists' + end + fun existsi pred = let + fun exists' [] = false + | exists' (arg::r) = pred arg orelse exists' r + in + exists' + end + + (* check the elements of a map with a predicate and return true if + * they all satisfy the predicate. Return false otherwise. Elements + * are checked in key order. + *) + fun all pred = let + fun all' [] = false + | all' ((_, x)::r) = pred x andalso all' r + in + all' + end + fun alli pred = let + fun all' [] = false + | all' (arg::r) = pred arg andalso all' r + in + all' + end + + end (* IntListMap *) + diff --git a/smlnj-lib/Util/int-list-set.sml b/smlnj-lib/Util/int-list-set.sml new file mode 100644 index 0000000..1787dbc --- /dev/null +++ b/smlnj-lib/Util/int-list-set.sml @@ -0,0 +1,203 @@ +(* int-list-set.sml + * + * COPYRIGHT (c) 1996 by AT&T Research. See COPYRIGHT file for details. + * + * An implementation of finite sets of integers, which uses a sorted list + * representation. + *) + +structure IntListSet :> ORD_SET where type Key.ord_key = Int.int = + struct + + structure Key = + struct + type ord_key = int + val compare = Int.compare + end + + (* sets are represented as ordered lists of integers *) + type item = Key.ord_key + type set = item list + + val empty = [] + + fun singleton x = [x] + + fun add (l, item) = let + fun f [] = [item] + | f (elem::r) = (case Key.compare(item, elem) + of LESS => item :: elem :: r + | EQUAL => item :: r + | GREATER => elem :: (f r) + (* end case *)) + in + f l + end + fun add' (s, x) = add(x, s) + + fun union (s1, s2) = let + fun merge ([], l2) = l2 + | merge (l1, []) = l1 + | merge (x::r1, y::r2) = (case Key.compare(x, y) + of LESS => x :: merge(r1, y::r2) + | EQUAL => x :: merge(r1, r2) + | GREATER => y :: merge(x::r1, r2) + (* end case *)) + in + merge (s1, s2) + end + + fun intersection (s1, s2) = let + fun merge ([], l2) = [] + | merge (l1, []) = [] + | merge (x::r1, y::r2) = (case Key.compare(x, y) + of LESS => merge(r1, y::r2) + | EQUAL => x :: merge(r1, r2) + | GREATER => merge(x::r1, r2) + (* end case *)) + in + merge (s1, s2) + end + + fun difference (s1, s2) = let + fun merge ([], l2) = [] + | merge (l1, []) = l1 + | merge (x::r1, y::r2) = (case Key.compare(x, y) + of LESS => x :: merge(r1, y::r2) + | EQUAL => merge(r1, r2) + | GREATER => merge(x::r1, r2) + (* end case *)) + in + merge (s1, s2) + end + + fun addList (l, items) = let + val items' = List.foldl (fn (x, set) => add(set, x)) [] items + in + union (l, items') + end + + fun subtract (l, item) = let + fun f ([], _) = l + | f (elem::r, prefix) = (case Key.compare(item, elem) + of LESS => l + | EQUAL => List.revAppend(prefix, r) + | GREATER => f (r, elem::prefix) + (* end case *)) + in + f (l, []) + end + fun subtract' (item, l) = subtract (l, item) + + fun subtractList (l, items) = let + val items' = List.foldl (fn (x, set) => add(set, x)) [] items + in + difference (l, items') + end + + (* create a set from a list of items; this function works in linear time if the list + * is in increasing order. + *) + fun fromList [] = [] + | fromList (first::rest) = let + fun add (prev, x::xs, s) = (case Key.compare(prev, x) + of LESS => add (x, xs, x::s) + | _ => (* not ordered, so fallback to addList *) + addList (List.rev s, x::xs) + (* end case *)) + | add (_, [], s) = List.rev s + in + add (first, rest, [first]) + end + + (* Remove an item, returning new map and value removed. + * Raise LibBase.NotFound if not found. + *) + fun delete (l, elem) = let + fun f (_, []) = raise LibBase.NotFound + | f (prefix, elem' :: r) = (case Key.compare(elem, elem') + of LESS => raise LibBase.NotFound + | EQUAL => List.revAppend(prefix, r) + | GREATER => f(elem' :: prefix, r) + (* end case *)) + in + f ([], l) + end + + fun member (l, item) = let + fun f [] = false + | f (elem :: r) = (case Key.compare(item, elem) + of LESS => false + | EQUAL => true + | GREATER => f r + (* end case *)) + in + f l + end + + fun isEmpty [] = true + | isEmpty _ = false + + fun minItem [] = raise Empty + | minItem (x::_) = x + + fun maxItem xs = List.last xs + + fun equal (s1, s2) = let + fun f ([], []) = true + | f ((x : int)::r1, y::r2) = (x = y) andalso f (r1, r2) + | f _ = false + in + f (s1, s2) + end + + fun compare ([], []) = EQUAL + | compare ([], _) = LESS + | compare (_, []) = GREATER + | compare (x1::r1, x2::r2) = (case Key.compare(x1, x2) + of EQUAL => compare (r1, r2) + | order => order + (* end case *)) + + (* Return true if and only if the first set is a subset of the second *) + fun isSubset (s1, s2) = let + fun f ([], _) = true + | f (_, []) = false + | f (x::r1, y::r2) = + ((x = y) andalso f (r1, r2)) + orelse ((x > y) andalso f (x::r1, r2)) + in + f (s1, s2) + end + + fun disjoint ([], _) = true + | disjoint (_, []) = true + | disjoint (x::r1, y::r2) = + ((x < y) andalso disjoint (r1, y::r2)) + orelse ((x > y) andalso disjoint (x::r1, r2)) + + (* Return the number of items in the set *) + fun numItems l = List.length l + + (* Return a list of the items in the set *) + fun toList l = l + + val app = List.app + fun map f s1 = List.foldl (fn (x, s) => add(s, f x)) [] s1 + fun mapPartial f s = let + fun f' (x, acc) = (case f x of SOME x' => add(acc, x') | NONE => acc) + in + List.foldl f' [] s + end + val foldr = List.foldr + val foldl = List.foldl + val filter = List.filter + val partition = List.partition + val exists = List.exists + val all = List.all + val find = List.find + + (* deprecated *) + val listItems = toList + + end (* IntListMap *) diff --git a/smlnj-lib/Util/int-redblack-map.sml b/smlnj-lib/Util/int-redblack-map.sml new file mode 100644 index 0000000..7ed756f --- /dev/null +++ b/smlnj-lib/Util/int-redblack-map.sml @@ -0,0 +1,686 @@ +(* int-redblack-map.sml + * + * COPYRIGHT (c) 2014 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * COPYRIGHT (c) 1999 Bell Labs, Lucent Technologies. + * + * This code is based on Chris Okasaki's implementation of + * red-black trees. The linear-time tree construction code is + * based on the paper "Constructing red-black trees" by Hinze, + * and the delete function is based on the description in Cormen, + * Leiserson, and Rivest. + * + * A red-black tree should satisfy the following two invariants: + * + * Red Invariant: each red node has black children (empty nodes are + * considered black). + * + * Black Invariant: each path from the root to an empty node has the + * same number of black nodes (the tree's black height). + * + * The Black invariant implies that any node with only one child + * will be black and its child will be a red leaf. + *) + +structure IntRedBlackMap :> ORD_MAP where type Key.ord_key = int = + struct + + structure Key = + struct + type ord_key = int + val compare = Int.compare + end + + datatype color = R | B + + datatype 'a tree + = E + | T of (color * 'a tree * Key.ord_key * 'a * 'a tree) + + datatype 'a map = MAP of (int * 'a tree) + + fun isEmpty (MAP(_, E)) = true + | isEmpty _ = false + + val empty = MAP(0, E) + + fun singleton (xk, x) = MAP(1, T(B, E, xk, x, E)) + + fun insert (MAP(nItems, m), xk, x) = let + val nItems' = ref nItems + fun ins E = (nItems' := nItems+1; T(R, E, xk, x, E)) + | ins (s as T(color, a, yk, y, b)) = + if (xk < yk) + then (case a + of T(R, c, zk, z, d) => + if (xk < zk) + then (case ins c + of T(R, e, wk, w, f) => T(R, T(B,e,wk,w,f), zk, z, T(B,d,yk,y,b)) + | c => T(B, T(R,c,zk,z,d), yk, y, b) + (* end case *)) + else if (xk = zk) + then T(color, T(R, c, xk, x, d), yk, y, b) + else (case ins d + of T(R, e, wk, w, f) => T(R, T(B,c,zk,z,e), wk, w, T(B,f,yk,y,b)) + | d => T(B, T(R,c,zk,z,d), yk, y, b) + (* end case *)) + | _ => T(B, ins a, yk, y, b) + (* end case *)) + else if (xk = yk) + then T(color, a, xk, x, b) + else (case b + of T(R, c, zk, z, d) => + if (xk < zk) + then (case ins c + of T(R, e, wk, w, f) => T(R, T(B,a,yk,y,e), wk, w, T(B,f,zk,z,d)) + | c => T(B, a, yk, y, T(R,c,zk,z,d)) + (* end case *)) + else if (xk = zk) + then T(color, a, yk, y, T(R, c, xk, x, d)) + else (case ins d + of T(R, e, wk, w, f) => T(R, T(B,a,yk,y,c), zk, z, T(B,e,wk,w,f)) + | d => T(B, a, yk, y, T(R,c,zk,z,d)) + (* end case *)) + | _ => T(B, a, yk, y, ins b) + (* end case *)) + val T(_, a, yk, y, b) = ins m + in + MAP(!nItems', T(B, a, yk, y, b)) + end + fun insert' ((xk, x), m) = insert (m, xk, x) + + fun insertWithi comb (MAP(nItems, m), xk, x) = let + val nItems' = ref nItems + fun ins E = (nItems' := nItems+1; T(R, E, xk, x, E)) + | ins (s as T(color, a, yk, y, b)) = + if (xk < yk) + then (case a + of T(R, c, zk, z, d) => + if (xk < zk) + then (case ins c + of T(R, e, wk, w, f) => T(R, T(B,e,wk,w,f), zk, z, T(B,d,yk,y,b)) + | c => T(B, T(R,c,zk,z,d), yk, y, b) + (* end case *)) + else if (xk = zk) + then T(color, T(R, c, xk, comb(xk, z, x), d), yk, y, b) + else (case ins d + of T(R, e, wk, w, f) => T(R, T(B,c,zk,z,e), wk, w, T(B,f,yk,y,b)) + | d => T(B, T(R,c,zk,z,d), yk, y, b) + (* end case *)) + | _ => T(B, ins a, yk, y, b) + (* end case *)) + else if (xk = yk) + then T(color, a, xk, comb(xk, y, x), b) + else (case b + of T(R, c, zk, z, d) => + if (xk < zk) + then (case ins c + of T(R, e, wk, w, f) => T(R, T(B,a,yk,y,e), wk, w, T(B,f,zk,z,d)) + | c => T(B, a, yk, y, T(R,c,zk,z,d)) + (* end case *)) + else if (xk = zk) + then T(color, a, yk, y, T(R, c, xk, comb(xk, z, x), d)) + else (case ins d + of T(R, e, wk, w, f) => T(R, T(B,a,yk,y,c), zk, z, T(B,e,wk,w,f)) + | d => T(B, a, yk, y, T(R,c,zk,z,d)) + (* end case *)) + | _ => T(B, a, yk, y, ins b) + (* end case *)) + val T(_, a, yk, y, b) = ins m + in + MAP(!nItems', T(B, a, yk, y, b)) + end + fun insertWith comb = insertWithi (fn (_, x1, x2) => comb(x1, x2)) + + (* Is a key in the domain of the map? *) + fun inDomain (MAP(_, t), k) = let + fun find' E = false + | find' (T(_, a, yk, y, b)) = + (k = yk) orelse ((k < yk) andalso find' a) orelse (find' b) + in + find' t + end + + (* Look for an item, return NONE if the item doesn't exist *) + fun find (MAP(_, t), k) = let + fun find' E = NONE + | find' (T(_, a, yk, y, b)) = + if (k < yk) + then find' a + else if (k = yk) + then SOME y + else find' b + in + find' t + end + + (* Look for an item, raise NotFound if the item doesn't exist *) + fun lookup (MAP(_, t), k) = let + fun look E = raise LibBase.NotFound + | look (T(_, a, yk, y, b)) = + if (k < yk) + then look a + else if (k = yk) + then y + else look b + in + look t + end + + (* Remove an item, returning new map and value removed. + * Raises LibBase.NotFound if not found. + *) + local + datatype 'a zipper + = TOP + | LEFT of (color * Key.ord_key * 'a * 'a tree * 'a zipper) + | RIGHT of (color * 'a tree * Key.ord_key * 'a * 'a zipper) + datatype 'a result = FOUND of 'a * 'a tree | NOT_FOUND + in + fun remove' (t, k) = let + (* zip the zipper *) + fun zip (TOP, t) = t + | zip (LEFT(color, xk, x, b, z), a) = zip(z, T(color, a, xk, x, b)) + | zip (RIGHT(color, a, xk, x, z), b) = zip(z, T(color, a, xk, x, b)) + (* zip the zipper while resolving a black deficit *) + fun fixupZip (TOP, t) = (true, t) + (* case 1 from CLR *) + | fixupZip (LEFT(B, xk, x, T(R, a, yk, y, b), p), t) = (case a + of T(_, T(R, a11, wk, w, a12), zk, z, a2) => (* case 1L ==> case 3L ==> case 4L *) + (false, zip (p, T(B, T(R, T(B, t, xk, x, a11), wk, w, T(B, a12, zk, z, a2)), yk, y, b))) + | T(_, a1, zk, z, T(R, a21, wk, w, t22)) => (* case 1L ==> case 4L *) + (false, zip (p, T(B, T(R, T(B, t, xk, x, a1), zk, z, T(B, a21, wk, w, t22)), yk, y, b))) + | T(_, a1, zk, z, a2) => (* case 1L ==> case 2L; rotate + recolor fixes deficit *) + (false, zip (p, T(B, T(B, t, xk, x, T(R, a1, zk, z, a2)), yk, y, b))) + | _ => fixupZip (LEFT(R, xk, x, a, LEFT(B, yk, y, b, p)), t) + (* end case *)) + | fixupZip (RIGHT(B, T(R, a, xk, x, b), yk, y, p), t) = (case b + of T(_, b1, zk, z, T(R, b21, wk, w, b22)) => (* case 1R ==> case 3R ==> case 4R *) + (false, zip (p, T(B, a, xk, x, T(R, T(B, b1, zk, z, b21), wk, w, T(B, b22, yk, y, t))))) + | T(_, T(R, b11, wk, w, b12), zk, z, b2) => (* case 1R ==> case 4R *) + (false, zip (p, T(B, a, xk, x, T(R, T(B, b11, wk, w, b12), zk, z, T(B, b2, yk, y, t))))) + | T(_, b1, zk, z, b2) => (* case 1L ==> case 2L; rotate + recolor fixes deficit *) + (false, zip (p, T(B, a, xk, x, T(B, T(R, b1, zk, z, b2), yk, y, t)))) + | _ => fixupZip (RIGHT(R, b, yk, y, RIGHT(B, a, xk, x, p)), t) + (* end case *)) + (* case 3 from CLR *) + | fixupZip (LEFT(color, xk, x, T(B, T(R, a1, yk, y, a2), zk, z, b), p), t) = + (* case 3L ==> case 4L *) + (false, zip (p, T(color, T(B, t, xk, x, a1), yk, y, T(B, a2, zk, z, b)))) + | fixupZip (RIGHT(color, T(B, a, xk, x, T(R, b1, yk, y, b2)), zk, z, p), t) = + (* case 3R ==> case 4R; rotate, recolor, plus rotate fixes deficit *) + (false, zip (p, T(color, T(B, a, xk, x, b1), yk, y, T(B, b2, zk, z, t)))) + (* case 4 from CLR *) + | fixupZip (LEFT(color, xk, x, T(B, a, yk, y, T(R, b1, zk, z, b2)), p), t) = + (false, zip (p, T(color, T(B, t, xk, x, a), yk, y, T(B, b1, zk, z, b2)))) + | fixupZip (RIGHT(color, T(B, T(R, a1, zk, z, a2), xk, x, b), yk, y, p), t) = + (false, zip (p, T(color, T(B, a1, zk, z, a2), xk, x, T(B, b, yk, y, t)))) + (* case 2 from CLR; note that "a" and "b" are guaranteed to be black, since we did + * not match cases 3 or 4. + *) + | fixupZip (LEFT(R, xk, x, T(B, a, yk, y, b), p), t) = + (false, zip (p, T(B, t, xk, x, T(R, a, yk, y, b)))) + | fixupZip (LEFT(B, xk, x, T(B, a, yk, y, b), p), t) = + fixupZip (p, T(B, t, xk, x, T(R, a, yk, y, b))) + | fixupZip (RIGHT(R, T(B, a, xk, x, b), yk, y, p), t) = + (false, zip (p, T(B, T(R, a, xk, x, b), yk, y, t))) + | fixupZip (RIGHT(B, T(B, a, xk, x, b), yk, y, p), t) = + fixupZip (p, T(B, T(R, a, xk, x, b), yk, y, t)) + (* push deficit up the tree by recoloring a black node as red *) + | fixupZip (LEFT(_, yk, y, E, p), t) = fixupZip (p, T(R, t, yk, y, E)) + | fixupZip (RIGHT(_, E, yk, y, p), t) = fixupZip (p, T(R, E, yk, y, t)) + (* impossible cases that violate the red invariant *) + | fixupZip _ = raise Fail "Red invariant violation" + (* delete the minimum value from a non-empty tree, returning a 4-tuple + * (key, elem, bd, tr), where key is the minimum key, elem is the element + * named by key, tr is the residual tree with elem removed, and bd is true + * if tr has a black-depth that is less than the original tree. + *) + fun delMin (T(R, E, yk, y, b), p) = + (* replace the node by its right subtree (which must be E) *) + (yk, y, false, zip(p, b)) + | delMin (T(B, E, yk, y, T(R, a', yk', y', b')), p) = + (* replace the node with its right child, while recoloring the child black to + * preserve the black invariant. + *) + (yk, y, false, zip (p, T(B, a', yk', y', b'))) + | delMin (T(B, E, yk, y, E), p) = let + (* delete the node, which reduces the black-depth by one, so we attempt to fix + * the deficit on the path back. + *) + val (blkDeficit, t) = fixupZip (p, E) + in + (yk, y, blkDeficit, t) + end + | delMin (T(color, a, yk, y, b), z) = delMin(a, LEFT(color, yk, y, b, z)) + | delMin (E, _) = raise Match + fun del (E, p) = NOT_FOUND + | del (T(color, a, yk, y, b), p) = + if (k < yk) + then del (a, LEFT(color, yk, y, b, p)) + else if (k = yk) + then (case (color, a, b) + of (R, E, E) => FOUND(y, zip(p, E)) + | (B, E, E) => FOUND(y, #2 (fixupZip (p, E))) + | (_, T(_, a', yk', y', b'), E) => + (* node is black and left child is red; we replace the node with its + * left child recolored to black. + *) + FOUND(y, zip(p, T(B, a', yk', y', b'))) + | (_, E, T(_, a', yk', y', b')) => + (* node is black and right child is red; we replace the node with its + * right child recolored to black. + *) + FOUND(y, zip(p, T(B, a', yk', y', b'))) + | _ => let + val (minKey, minElem, blkDeficit, b) = delMin (b, TOP) + in + if blkDeficit + then FOUND(y, #2 (fixupZip (RIGHT(color, a, minKey, minElem, p), b))) + else FOUND(y, zip (p, T(color, a, minKey, minElem, b))) + end + (* end case *)) + else del (b, RIGHT(color, a, yk, y, p)) + in + del (t, TOP) + end + (* Remove an item, returning new map and value removed. + * Raises LibBase.NotFound if not found. + *) + fun remove (MAP(nItems, t), k) = (case remove' (t, k) + of FOUND(item, T(R, a, xk, x, b)) => (MAP(nItems-1, T(B, a, xk, x, b)), item) + | FOUND(item, t) => (MAP(nItems-1, t), item) + | NOT_FOUND => raise LibBase.NotFound + (* end case *)) + fun findAndRemove (MAP(nItems, t), k) = (case remove' (t, k) + of FOUND(item, T(R, a, xk, x, b)) => SOME(MAP(nItems-1, T(B, a, xk, x, b)), item) + | FOUND(item, t) => SOME(MAP(nItems-1, t), item) + | NOT_FOUND => NONE + (* end case *)) + end (* local *) + + (* return the first item in the map (or NONE if it is empty) *) + fun first (MAP(_, t)) = let + fun f E = NONE + | f (T(_, E, _, x, _)) = SOME x + | f (T(_, a, _, _, _)) = f a + in + f t + end + fun firsti (MAP(_, t)) = let + fun f E = NONE + | f (T(_, E, xk, x, _)) = SOME(xk, x) + | f (T(_, a, _, _, _)) = f a + in + f t + end + + (* Return the number of items in the map *) + fun numItems (MAP(n, _)) = n + + fun foldl f = let + fun foldf (E, accum) = accum + | foldf (T(_, a, _, x, b), accum) = + foldf(b, f(x, foldf(a, accum))) + in + fn init => fn (MAP(_, m)) => foldf(m, init) + end + fun foldli f = let + fun foldf (E, accum) = accum + | foldf (T(_, a, xk, x, b), accum) = + foldf(b, f(xk, x, foldf(a, accum))) + in + fn init => fn (MAP(_, m)) => foldf(m, init) + end + + fun foldr f = let + fun foldf (E, accum) = accum + | foldf (T(_, a, _, x, b), accum) = + foldf(a, f(x, foldf(b, accum))) + in + fn init => fn (MAP(_, m)) => foldf(m, init) + end + fun foldri f = let + fun foldf (E, accum) = accum + | foldf (T(_, a, xk, x, b), accum) = + foldf(a, f(xk, x, foldf(b, accum))) + in + fn init => fn (MAP(_, m)) => foldf(m, init) + end + + fun listItems m = foldr (op ::) [] m + fun listItemsi m = foldri (fn (xk, x, l) => (xk, x)::l) [] m + + (* return an ordered list of the keys in the map. *) + fun listKeys m = foldri (fn (k, _, l) => k::l) [] m + + (* functions for walking the tree while keeping a stack of parents + * to be visited. + *) + fun next ((t as T(_, _, _, _, b))::rest) = (t, left(b, rest)) + | next _ = (E, []) + and left (E, rest) = rest + | left (t as T(_, a, _, _, _), rest) = left(a, t::rest) + fun start m = left(m, []) + + (* Given two maps `f` and `g`, return true if they have equal domains and if + * for every `x` in their domain, `rngEq(f x, g x) = true`. + *) + fun equiv rngEq (MAP(n1, m1), MAP(n2, m2)) = let + fun cmp (t1, t2) = (case (next t1, next t2) + of ((E, _), (E, _)) => true + | ((E, _), _) => false + | (_, (E, _)) => false + | ((T(_, _, xk, x, _), r1), (T(_, _, yk, y, _), r2)) => + (xk = yk) andalso rngEq(x, y) andalso cmp (r1, r2) + (* end case *)) + in + (n1 = n2) andalso cmp (start m1, start m2) + end + + (* Given two maps `f` and `g`, and a comparison function `rngCmp` on their + * range types, return the order of the maps. + *) + fun collate rngCmp (MAP(_, m1), MAP(_, m2)) = let + fun cmp (t1, t2) = (case (next t1, next t2) + of ((E, _), (E, _)) => EQUAL + | ((E, _), _) => LESS + | (_, (E, _)) => GREATER + | ((T(_, _, xk, x, _), r1), (T(_, _, yk, y, _), r2)) => + if (xk = yk) + then (case rngCmp(x, y) + of EQUAL => cmp (r1, r2) + | order => order + (* end case *)) + else if (xk < yk) + then LESS + else GREATER + (* end case *)) + in + cmp (start m1, start m2) + end + + (* Given two maps `f` and `g`, return true if the domain of `g` is a subset + * of the domain of `f` and for every `x` in the domain of `g`, + * `rngEq(g x, f x) = true`. + *) + fun extends rngEx (MAP(n1, m1), MAP(n2, m2)) = let + (* does t1 extend t2? *) + fun cmp (t1, t2) = (case (next t1, next t2) + of ((E, _), (E, _)) => true + | (_, (E, _)) => true + | ((E, _), _) => false + | ((T(_, _, xk, x, _), r1), (T(_, _, yk, y, _), r2)) => + if (xk < yk) then cmp (r1, t2) + else (yk = xk) andalso rngEx(x, y) andalso cmp (r1, r2) + (* end case *)) + in + (n1 >= n2) andalso cmp (start m1, start m2) + end + + (* support for constructing red-black trees in linear time from increasing + * ordered sequences (based on a description by R. Hinze). Note that the + * elements in the digits are ordered with the largest on the left, whereas + * the elements of the trees are ordered with the largest on the right. + *) + datatype 'a digit + = ZERO + | ONE of (Key.ord_key * 'a * 'a tree * 'a digit) + | TWO of (Key.ord_key * 'a * 'a tree * Key.ord_key * 'a * 'a tree * 'a digit) + (* add an item that is guaranteed to be larger than any in l *) + fun addItem (ak, a, l) = let + fun incr (ak, a, t, ZERO) = ONE(ak, a, t, ZERO) + | incr (ak1, a1, t1, ONE(ak2, a2, t2, r)) = + TWO(ak1, a1, t1, ak2, a2, t2, r) + | incr (ak1, a1, t1, TWO(ak2, a2, t2, ak3, a3, t3, r)) = + ONE(ak1, a1, t1, incr(ak2, a2, T(B, t3, ak3, a3, t2), r)) + in + incr(ak, a, E, l) + end + (* link the digits into a tree *) + fun linkAll t = let + fun link (t, ZERO) = t + | link (t1, ONE(ak, a, t2, r)) = link(T(B, t2, ak, a, t1), r) + | link (t, TWO(ak1, a1, t1, ak2, a2, t2, r)) = + link(T(B, T(R, t2, ak2, a2, t1), ak1, a1, t), r) + in + link (E, t) + end + + local + fun wrap f (MAP(_, m1), MAP(_, m2)) = let + val (n, result) = f (start m1, start m2, 0, ZERO) + in + MAP(n, linkAll result) + end + fun ins ((E, _), n, result) = (n, result) + | ins ((T(_, _, xk, x, _), r), n, result) = + ins(next r, n+1, addItem(xk, x, result)) + in + + (* return a map whose domain is the union of the domains of the two input + * maps, using the supplied function to define the map on elements that + * are in both domains. + *) + fun unionWith mergeFn = let + fun union (t1, t2, n, result) = (case (next t1, next t2) + of ((E, _), (E, _)) => (n, result) + | ((E, _), t2) => ins(t2, n, result) + | (t1, (E, _)) => ins(t1, n, result) + | ((T(_, _, xk, x, _), r1), (T(_, _, yk, y, _), r2)) => + if (xk < yk) + then union (r1, t2, n+1, addItem(xk, x, result)) + else if (xk = yk) + then union (r1, r2, n+1, addItem(xk, mergeFn(x, y), result)) + else union (t1, r2, n+1, addItem(yk, y, result)) + (* end case *)) + in + wrap union + end + fun unionWithi mergeFn = let + fun union (t1, t2, n, result) = (case (next t1, next t2) + of ((E, _), (E, _)) => (n, result) + | ((E, _), t2) => ins(t2, n, result) + | (t1, (E, _)) => ins(t1, n, result) + | ((T(_, _, xk, x, _), r1), (T(_, _, yk, y, _), r2)) => + if (xk < yk) + then union (r1, t2, n+1, addItem(xk, x, result)) + else if (xk = yk) + then + union (r1, r2, n+1, addItem(xk, mergeFn(xk, x, y), result)) + else union (t1, r2, n+1, addItem(yk, y, result)) + (* end case *)) + in + wrap union + end + + (* return a map whose domain is the intersection of the domains of the + * two input maps, using the supplied function to define the range. + *) + fun intersectWith mergeFn = let + fun intersect (t1, t2, n, result) = (case (next t1, next t2) + of ((T(_, _, xk, x, _), r1), (T(_, _, yk, y, _), r2)) => + if (xk < yk) + then intersect (r1, t2, n, result) + else if (xk = yk) + then intersect ( + r1, r2, n+1, addItem(xk, mergeFn(x, y), result)) + else intersect (t1, r2, n, result) + | _ => (n, result) + (* end case *)) + in + wrap intersect + end + fun intersectWithi mergeFn = let + fun intersect (t1, t2, n, result) = (case (next t1, next t2) + of ((T(_, _, xk, x, _), r1), (T(_, _, yk, y, _), r2)) => + if (xk < yk) + then intersect (r1, t2, n, result) + else if (xk = yk) + then intersect (r1, r2, n+1, + addItem(xk, mergeFn(xk, x, y), result)) + else intersect (t1, r2, n, result) + | _ => (n, result) + (* end case *)) + in + wrap intersect + end + + fun mergeWith mergeFn = let + fun merge (t1, t2, n, result) = (case (next t1, next t2) + of ((E, _), (E, _)) => (n, result) + | ((E, _), (T(_, _, yk, y, _), r2)) => + mergef(yk, NONE, SOME y, t1, r2, n, result) + | ((T(_, _, xk, x, _), r1), (E, _)) => + mergef(xk, SOME x, NONE, r1, t2, n, result) + | ((T(_, _, xk, x, _), r1), (T(_, _, yk, y, _), r2)) => + if (xk < yk) + then mergef(xk, SOME x, NONE, r1, t2, n, result) + else if (xk = yk) + then mergef(xk, SOME x, SOME y, r1, r2, n, result) + else mergef(yk, NONE, SOME y, t1, r2, n, result) + (* end case *)) + and mergef (k, x1, x2, r1, r2, n, result) = (case mergeFn(x1, x2) + of NONE => merge (r1, r2, n, result) + | SOME y => merge (r1, r2, n+1, addItem(k, y, result)) + (* end case *)) + in + wrap merge + end + fun mergeWithi mergeFn = let + fun merge (t1, t2, n, result) = (case (next t1, next t2) + of ((E, _), (E, _)) => (n, result) + | ((E, _), (T(_, _, yk, y, _), r2)) => + mergef(yk, NONE, SOME y, t1, r2, n, result) + | ((T(_, _, xk, x, _), r1), (E, _)) => + mergef(xk, SOME x, NONE, r1, t2, n, result) + | ((T(_, _, xk, x, _), r1), (T(_, _, yk, y, _), r2)) => + if (xk < yk) + then mergef(xk, SOME x, NONE, r1, t2, n, result) + else if (xk = yk) + then mergef(xk, SOME x, SOME y, r1, r2, n, result) + else mergef(yk, NONE, SOME y, t1, r2, n, result) + (* end case *)) + and mergef (k, x1, x2, r1, r2, n, result) = (case mergeFn(k, x1, x2) + of NONE => merge (r1, r2, n, result) + | SOME y => merge (r1, r2, n+1, addItem(k, y, result)) + (* end case *)) + in + wrap merge + end + end (* local *) + + fun app f = let + fun appf E = () + | appf (T(_, a, _, x, b)) = (appf a; f x; appf b) + in + fn (MAP(_, m)) => appf m + end + fun appi f = let + fun appf E = () + | appf (T(_, a, xk, x, b)) = (appf a; f(xk, x); appf b) + in + fn (MAP(_, m)) => appf m + end + + fun map f = let + fun mapf E = E + | mapf (T(color, a, xk, x, b)) = + T(color, mapf a, xk, f x, mapf b) + in + fn (MAP(n, m)) => MAP(n, mapf m) + end + fun mapi f = let + fun mapf E = E + | mapf (T(color, a, xk, x, b)) = + T(color, mapf a, xk, f(xk, x), mapf b) + in + fn (MAP(n, m)) => MAP(n, mapf m) + end + + (* Filter out those elements of the map that do not satisfy the + * predicate. The filtering is done in increasing map order. + *) + fun filter pred (MAP(_, t)) = let + fun walk (E, n, result) = (n, result) + | walk (T(_, a, xk, x, b), n, result) = let + val (n, result) = walk(a, n, result) + in + if (pred x) + then walk(b, n+1, addItem(xk, x, result)) + else walk(b, n, result) + end + val (n, result) = walk (t, 0, ZERO) + in + MAP(n, linkAll result) + end + fun filteri pred (MAP(_, t)) = let + fun walk (E, n, result) = (n, result) + | walk (T(_, a, xk, x, b), n, result) = let + val (n, result) = walk(a, n, result) + in + if (pred(xk, x)) + then walk(b, n+1, addItem(xk, x, result)) + else walk(b, n, result) + end + val (n, result) = walk (t, 0, ZERO) + in + MAP(n, linkAll result) + end + + (* map a partial function over the elements of a map in increasing + * map order. + *) + fun mapPartial f = let + fun f' (xk, x, m) = (case f x + of NONE => m + | (SOME y) => insert(m, xk, y) + (* end case *)) + in + foldli f' empty + end + fun mapPartiali f = let + fun f' (xk, x, m) = (case f(xk, x) + of NONE => m + | (SOME y) => insert(m, xk, y) + (* end case *)) + in + foldli f' empty + end + + (* check the elements of a map with a predicate and return true if + * any element satisfies the predicate. Return false otherwise. + * Elements are checked in key order. + *) + fun exists pred = let + fun exists' E = false + | exists' (T(_, a, _, x, b)) = exists' a orelse pred x orelse exists' b + in + fn (MAP(_, m)) => exists' m + end + fun existsi pred = let + fun exists' E = false + | exists' (T(_, a, k, x, b)) = exists' a orelse pred(k, x) orelse exists' b + in + fn (MAP(_, m)) => exists' m + end + + (* check the elements of a map with a predicate and return true if + * they all satisfy the predicate. Return false otherwise. Elements + * are checked in key order. + *) + fun all pred = let + fun all' E = true + | all' (T(_, a, _, x, b)) = all' a andalso pred x andalso all' b + in + fn (MAP(_, m)) => all' m + end + fun alli pred = let + fun all' E = true + | all' (T(_, a, k, x, b)) = all' a andalso pred(k, x) andalso all' b + in + fn (MAP(_, m)) => all' m + end + + end (* structure IntRedBlackMap *) diff --git a/smlnj-lib/Util/int-redblack-set.sml b/smlnj-lib/Util/int-redblack-set.sml new file mode 100644 index 0000000..13cbbf4 --- /dev/null +++ b/smlnj-lib/Util/int-redblack-set.sml @@ -0,0 +1,515 @@ +(* int-redblack-set.sml + * + * COPYRIGHT (c) 2014 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * COPYRIGHT (c) 1999 Bell Labs, Lucent Technologies. + * + * This code is based on Chris Okasaki's implementation of + * red-black trees. The linear-time tree construction code is + * based on the paper "Constructing red-black trees" by Hinze, + * and the delete function is based on the description in Cormen, + * Leiserson, and Rivest. + * + * A red-black tree should satisfy the following two invariants: + * + * Red Invariant: each red node has black children (empty nodes are + * considered black). + * + * Black Invariant: each path from the root to an empty node has the + * same number of black nodes (the tree's black height). + * + * The Black invariant implies that any node with only one child + * will be black and its child will be a red leaf. + *) + +structure IntRedBlackSet :> ORD_SET where type Key.ord_key = int = + struct + + structure Key = + struct + type ord_key = int + val compare = Int.compare + end + + type item = Key.ord_key + + datatype color = R | B + + datatype tree + = E + | T of (color * tree * item * tree) + + datatype set = SET of (int * tree) + + fun isEmpty (SET(_, E)) = true + | isEmpty _ = false + + val empty = SET(0, E) + + fun minItem (SET(_, tr)) = let + fun min E = raise Empty + | min (T(_, E, item, _)) = item + | min (T(_, tr, _, _)) = min tr + in + min tr + end + + fun maxItem (SET(_, tr)) = let + fun max E = raise Empty + | max (T(_, _, item, E)) = item + | max (T(_, _, _, tr)) = max tr + in + max tr + end + + fun singleton x = SET(1, T(B, E, x, E)) + + fun add (SET(nItems, m), x) = let + val nItems' = ref nItems + fun ins E = (nItems' := nItems+1; T(R, E, x, E)) + | ins (s as T(color, a, y, b)) = + if (x < y) + then (case a + of T(R, c, z, d) => + if (x < z) + then (case ins c + of T(R, e, w, f) => T(R, T(B,e,w,f), z, T(B,d,y,b)) + | c => T(B, T(R,c,z,d), y, b) + (* end case *)) + else if (x = z) + then T(color, T(R, c, x, d), y, b) + else (case ins d + of T(R, e, w, f) => T(R, T(B,c,z,e), w, T(B,f,y,b)) + | d => T(B, T(R,c,z,d), y, b) + (* end case *)) + | _ => T(B, ins a, y, b) + (* end case *)) + else if (x = y) + then T(color, a, x, b) + else (case b + of T(R, c, z, d) => + if (x < z) + then (case ins c + of T(R, e, w, f) => T(R, T(B,a,y,e), w, T(B,f,z,d)) + | c => T(B, a, y, T(R,c,z,d)) + (* end case *)) + else if (x = z) + then T(color, a, y, T(R, c, x, d)) + else (case ins d + of T(R, e, w, f) => T(R, T(B,a,y,c), z, T(B,e,w,f)) + | d => T(B, a, y, T(R,c,z,d)) + (* end case *)) + | _ => T(B, a, y, ins b) + (* end case *)) + val T(_, a, y, b) = ins m + in + SET(!nItems', T(B, a, y, b)) + end + fun add' (x, m) = add (m, x) + + fun addList (s, []) = s + | addList (s, x::r) = addList(add(s, x), r) + + (* Remove an item. Raises LibBase.NotFound if not found. *) + local + datatype zipper + = TOP + | LEFT of (color * item * tree * zipper) + | RIGHT of (color * tree * item * zipper) + in + fun delete (SET(nItems, t), k) = let + (* zip the zipper *) + fun zip (TOP, t) = t + | zip (LEFT(color, x, b, p), a) = zip(p, T(color, a, x, b)) + | zip (RIGHT(color, a, x, p), b) = zip(p, T(color, a, x, b)) + (* zip the zipper while resolving a black deficit *) + fun fixupZip (TOP, t) = (true, t) + (* case 1 from CLR *) + | fixupZip (LEFT(B, x, T(R, a, y, b), p), t) = (case a + of T(_, T(R, a11, w, a12), z, a2) => (* case 1L ==> case 3L ==> case 4L *) + (false, zip (p, T(B, T(R, T(B, t, x, a11), w, T(B, a12, z, a2)), y, b))) + | T(_, a1, z, T(R, a21, w, t22)) => (* case 1L ==> case 4L *) + (false, zip (p, T(B, T(R, T(B, t, x, a1), z, T(B, a21, w, t22)), y, b))) + | T(_, a1, z, a2) => (* case 1L ==> case 2L; rotate + recolor fixes deficit *) + (false, zip (p, T(B, T(B, t, x, T(R, a1, z, a2)), y, b))) + | _ => fixupZip (LEFT(R, x, a, LEFT(B, y, b, p)), t) + (* end case *)) + | fixupZip (RIGHT(B, T(R, a, x, b), y, p), t) = (case b + of T(_, b1, z, T(R, b21, w, b22)) => (* case 1R ==> case 3R ==> case 4R *) + (false, zip (p, T(B, a, x, T(R, T(B, b1, z, b21), w, T(B, b22, y, t))))) + | T(_, T(R, b11, w, b12), z, b2) => (* case 1R ==> case 4R *) + (false, zip (p, T(B, a, x, T(R, T(B, b11, w, b12), z, T(B, b2, y, t))))) + | T(_, b1, z, b2) => (* case 1L ==> case 2L; rotate + recolor fixes deficit *) + (false, zip (p, T(B, a, x, T(B, T(R, b1, z, b2), y, t)))) + | _ => fixupZip (RIGHT(R, b, y, RIGHT(B, a, x, p)), t) + (* end case *)) + (* case 3 from CLR *) + | fixupZip (LEFT(color, x, T(B, T(R, a1, y, a2), z, b), p), t) = + (* case 3L ==> case 4L *) + (false, zip (p, T(color, T(B, t, x, a1), y, T(B, a2, z, b)))) + | fixupZip (RIGHT(color, T(B, a, x, T(R, b1, y, b2)), z, p), t) = + (* case 3R ==> case 4R; rotate, recolor, plus rotate fixes deficit *) + (false, zip (p, T(color, T(B, a, x, b1), y, T(B, b2, z, t)))) + (* case 4 from CLR *) + | fixupZip (LEFT(color, x, T(B, a, y, T(R, b1, z, b2)), p), t) = + (false, zip (p, T(color, T(B, t, x, a), y, T(B, b1, z, b2)))) + | fixupZip (RIGHT(color, T(B, T(R, a1, z, a2), x, b), y, p), t) = + (false, zip (p, T(color, T(B, a1, z, a2), x, T(B, b, y, t)))) + (* case 2 from CLR; note that "a" and "b" are guaranteed to be black, since we did + * not match cases 3 or 4. + *) + | fixupZip (LEFT(R, x, T(B, a, y, b), p), t) = + (false, zip (p, T(B, t, x, T(R, a, y, b)))) + | fixupZip (LEFT(B, x, T(B, a, y, b), p), t) = + fixupZip (p, T(B, t, x, T(R, a, y, b))) + | fixupZip (RIGHT(R, T(B, a, x, b), y, p), t) = + (false, zip (p, T(B, T(R, a, x, b), y, t))) + | fixupZip (RIGHT(B, T(B, a, x, b), y, p), t) = + fixupZip (p, T(B, T(R, a, x, b), y, t)) + (* push deficit up the tree by recoloring a black node as red *) + | fixupZip (LEFT(_, y, E, p), t) = fixupZip (p, T(R, t, y, E)) + | fixupZip (RIGHT(_, E, y, p), t) = fixupZip (p, T(R, E, y, t)) + (* impossible cases that violate the red invariant *) + | fixupZip _ = raise Fail "Red invariant violation" + (* delete the minimum value from a non-empty tree, returning a triple + * (elem, bd, tr), where elem is the minimum element, tr is the residual + * tree with elem removed, and bd is true if tr has a black-depth that is + * less than the original tree. + *) + fun delMin (T(R, E, y, b), p) = + (* replace the node by its right subtree (which must be E) *) + (y, false, zip(p, b)) + | delMin (T(B, E, y, T(R, a', y', b')), p) = + (* replace the node with its right child, while recoloring the child black to + * preserve the black invariant. + *) + (y, false, zip (p, T(B, a', y', b'))) + | delMin (T(B, E, y, E), p) = let + (* delete the node, which reduces the black-depth by one, so we attempt to fix + * the deficit on the path back. + *) + val (blkDeficit, t) = fixupZip (p, E) + in + (y, blkDeficit, t) + end + | delMin (T(color, a, y, b), z) = delMin(a, LEFT(color, y, b, z)) + | delMin (E, _) = raise Match + fun del (E, z) = raise LibBase.NotFound + | del (T(color, a, y, b), p) = + if (k < y) + then del (a, LEFT(color, y, b, p)) + else if (k = y) + then (case (color, a, b) + of (R, E, E) => zip(p, E) + | (B, E, E) => #2 (fixupZip (p, E)) + | (_, T(_, a', y', b'), E) => + (* node is black and left child is red; we replace the node with its + * left child recolored to black. + *) + zip(p, T(B, a', y', b')) + | (_, E, T(_, a', y', b')) => + (* node is black and right child is red; we replace the node with its + * right child recolored to black. + *) + zip(p, T(B, a', y', b')) + | _ => let + val (minSucc, blkDeficit, b) = delMin (b, TOP) + in + if blkDeficit + then #2 (fixupZip (RIGHT(color, a, minSucc, p), b)) + else zip (p, T(color, a, minSucc, b)) + end + (* end case *)) + else del (b, RIGHT(color, a, y, p)) + in + case del(t, TOP) + of T(R, a, x, b) => SET(nItems-1, T(B, a, x, b)) + | t => SET(nItems-1, t) + (* end case *) + end + end (* local *) + + (* Return true if and only if item is an element in the set *) + fun member (SET(_, t), k) = let + fun find' E = false + | find' (T(_, a, y, b)) = + (k = y) orelse ((k < y) andalso find' a) orelse find' b + in + find' t + end + + (* Return the number of items in the map *) + fun numItems (SET(n, _)) = n + + fun foldl f = let + fun foldf (E, accum) = accum + | foldf (T(_, a, x, b), accum) = + foldf(b, f(x, foldf(a, accum))) + in + fn init => fn (SET(_, m)) => foldf(m, init) + end + + fun foldr f = let + fun foldf (E, accum) = accum + | foldf (T(_, a, x, b), accum) = + foldf(a, f(x, foldf(b, accum))) + in + fn init => fn (SET(_, m)) => foldf(m, init) + end + + (* return an ordered list of the items in the set. *) + fun toList s = foldr (fn (x, l) => x::l) [] s + + (* functions for walking the tree while keeping a stack of parents + * to be visited. + *) + fun next ((t as T(_, _, _, b))::rest) = (t, left(b, rest)) + | next _ = (E, []) + and left (E, rest) = rest + | left (t as T(_, a, _, _), rest) = left(a, t::rest) + fun start m = left(m, []) + + (* Return true if and only if the two sets are equal *) + fun equal (SET(_, s1), SET(_, s2)) = let + fun cmp (t1, t2) = (case (next t1, next t2) + of ((E, _), (E, _)) => true + | ((E, _), _) => false + | (_, (E, _)) => false + | ((T(_, _, x, _), r1), (T(_, _, y, _), r2)) => + (x = y) andalso cmp (r1, r2) + (* end case *)) + in + cmp (start s1, start s2) + end + + (* Return the lexical order of two sets *) + fun compare (SET(_, s1), SET(_, s2)) = let + fun cmp (t1, t2) = (case (next t1, next t2) + of ((E, _), (E, _)) => EQUAL + | ((E, _), _) => LESS + | (_, (E, _)) => GREATER + | ((T(_, _, x, _), r1), (T(_, _, y, _), r2)) => + if (x = y) + then cmp (r1, r2) + else if (x < y) + then LESS + else GREATER + (* end case *)) + in + cmp (start s1, start s2) + end + + (* Return true if and only if the first set is a subset of the second *) + fun isSubset (SET(_, s1), SET(_, s2)) = let + fun cmp (t1, t2) = (case (next t1, next t2) + of ((E, _), (E, _)) => true + | ((E, _), _) => true + | (_, (E, _)) => false + | ((T(_, _, x, _), r1), (T(_, _, y, _), r2)) => + ((x = y) andalso cmp (r1, r2)) + orelse ((x > y) andalso cmp (t1, r2)) + (* end case *)) + in + cmp (start s1, start s2) + end + + (* Return true if the two sets are disjoint *) + fun disjoint (SET(0, _), _) = true + | disjoint (_, SET(0, _)) = true + | disjoint (SET(_, s1), SET(_, s2)) = let + fun walk ((E, _), _) = true + | walk (_, (E, _)) = true + | walk (t1 as (T(_, _, x, _), r1), t2 as (T(_, _, y, _), r2)) = + ((x < y) andalso walk (next r1, t2)) + orelse ((x > y) andalso walk (t1, next r2)) + in + walk (next (start s1), next (start s2)) + end + + (* support for constructing red-black trees in linear time from increasing + * ordered sequences (based on a description by R. Hinze). Note that the + * elements in the digits are ordered with the largest on the left, whereas + * the elements of the trees are ordered with the largest on the right. + *) + datatype digit + = ZERO + | ONE of (item * tree * digit) + | TWO of (item * tree * item * tree * digit) + (* add an item that is guaranteed to be larger than any in l *) + fun addItem (a, l) = let + fun incr (a, t, ZERO) = ONE(a, t, ZERO) + | incr (a1, t1, ONE(a2, t2, r)) = TWO(a1, t1, a2, t2, r) + | incr (a1, t1, TWO(a2, t2, a3, t3, r)) = + ONE(a1, t1, incr(a2, T(B, t3, a3, t2), r)) + in + incr(a, E, l) + end + (* link the digits into a tree *) + fun linkAll t = let + fun link (t, ZERO) = t + | link (t1, ONE(a, t2, r)) = link(T(B, t2, a, t1), r) + | link (t, TWO(a1, t1, a2, t2, r)) = + link(T(B, T(R, t2, a2, t1), a1, t), r) + in + link (E, t) + end + + (* create a set from a list of items; this function works in linear time if the list + * is in increasing order. + *) + fun fromList [] = empty + | fromList (first::rest) = let + fun add (prev, x::xs, n, accum) = if (prev < x) + then add(x, xs, n+1, addItem(x, accum)) + else (* list not in order, so fall back to addList code *) + addList(SET(n, linkAll accum), x::xs) + | add (_, [], n, accum) = SET(n, linkAll accum) + in + add (first, rest, 1, addItem(first, ZERO)) + end + + (* return the union of the two sets *) + fun union (SET(_, s1), SET(_, s2)) = let + fun ins ((E, _), n, result) = (n, result) + | ins ((T(_, _, x, _), r), n, result) = + ins(next r, n+1, addItem(x, result)) + fun union' (t1, t2, n, result) = (case (next t1, next t2) + of ((E, _), (E, _)) => (n, result) + | ((E, _), t2) => ins(t2, n, result) + | (t1, (E, _)) => ins(t1, n, result) + | ((T(_, _, x, _), r1), (T(_, _, y, _), r2)) => + if (x < y) + then union' (r1, t2, n+1, addItem(x, result)) + else if (x = y) + then union' (r1, r2, n+1, addItem(x, result)) + else union' (t1, r2, n+1, addItem(y, result)) + (* end case *)) + val (n, result) = union' (start s1, start s2, 0, ZERO) + in + SET(n, linkAll result) + end + + (* return the intersection of the two sets *) + fun intersection (SET(_, s1), SET(_, s2)) = let + fun intersect (t1, t2, n, result) = (case (next t1, next t2) + of ((T(_, _, x, _), r1), (T(_, _, y, _), r2)) => + if (x < y) + then intersect (r1, t2, n, result) + else if (x = y) + then intersect (r1, r2, n+1, addItem(x, result)) + else intersect (t1, r2, n, result) + | _ => (n, result) + (* end case *)) + val (n, result) = intersect (start s1, start s2, 0, ZERO) + in + SET(n, linkAll result) + end + + (* return the set difference *) + fun difference (SET(_, s1), SET(_, s2)) = let + fun ins ((E, _), n, result) = (n, result) + | ins ((T(_, _, x, _), r), n, result) = + ins(next r, n+1, addItem(x, result)) + fun diff (t1, t2, n, result) = (case (next t1, next t2) + of ((E, _), _) => (n, result) + | (t1, (E, _)) => ins(t1, n, result) + | ((T(_, _, x, _), r1), (T(_, _, y, _), r2)) => + if (x < y) + then diff (r1, t2, n+1, addItem(x, result)) + else if (x = y) + then diff (r1, r2, n, result) + else diff (t1, r2, n, result) + (* end case *)) + val (n, result) = diff (start s1, start s2, 0, ZERO) + in + SET(n, linkAll result) + end + + fun subtract (s, item) = difference (s, singleton item) + fun subtract' (item, s) = subtract (s, item) + + fun subtractList (l, items) = let + val items' = List.foldl (fn (x, set) => add(set, x)) (SET(0, E)) items + in + difference (l, items') + end + + fun app f = let + fun appf E = () + | appf (T(_, a, x, b)) = (appf a; f x; appf b) + in + fn (SET(_, m)) => appf m + end + + fun map f = let + fun addf (x, m) = add(m, f x) + in + foldl addf empty + end + + fun mapPartial f = let + fun f' (x, acc) = (case f x of SOME x' => add(acc, x') | NONE => acc) + in + foldl f' empty + end + + (* Filter out those elements of the set that do not satisfy the + * predicate. The filtering is done in increasing map order. + *) + fun filter pred (SET(_, t)) = let + fun walk (E, n, result) = (n, result) + | walk (T(_, a, x, b), n, result) = let + val (n, result) = walk(a, n, result) + in + if (pred x) + then walk(b, n+1, addItem(x, result)) + else walk(b, n, result) + end + val (n, result) = walk (t, 0, ZERO) + in + SET(n, linkAll result) + end + + fun partition pred (SET(_, t)) = let + fun walk (E, n1, result1, n2, result2) = (n1, result1, n2, result2) + | walk (T(_, a, x, b), n1, result1, n2, result2) = let + val (n1, result1, n2, result2) = walk(a, n1, result1, n2, result2) + in + if (pred x) + then walk(b, n1+1, addItem(x, result1), n2, result2) + else walk(b, n1, result1, n2+1, addItem(x, result2)) + end + val (n1, result1, n2, result2) = walk (t, 0, ZERO, 0, ZERO) + in + (SET(n1, linkAll result1), SET(n2, linkAll result2)) + end + + fun exists pred = let + fun test E = false + | test (T(_, a, x, b)) = test a orelse pred x orelse test b + in + fn (SET(_, t)) => test t + end + + fun all pred = let + fun test E = true + | test (T(_, a, x, b)) = test a andalso pred x andalso test b + in + fn (SET(_, t)) => test t + end + + fun find pred = let + fun test E = NONE + | test (T(_, a, x, b)) = (case test a + of NONE => if pred x then SOME x else test b + | someItem => someItem + (* end case *)) + in + fn (SET(_, t)) => test t + end + + (* deprecated *) + val listItems = toList + + end; diff --git a/smlnj-lib/Util/interval-domain-sig.sml b/smlnj-lib/Util/interval-domain-sig.sml new file mode 100644 index 0000000..4d931d9 --- /dev/null +++ b/smlnj-lib/Util/interval-domain-sig.sml @@ -0,0 +1,31 @@ +(* interval-domain-sig.sml + * + * COPYRIGHT (c) 2005 John Reppy (http://www.cs.uchicago.edu/~jhr) + * All rights reserved. + * + * The domain over which we define interval sets. + *) + +signature INTERVAL_DOMAIN = + sig + + (* the abstract type of elements in the domain *) + type point + + (* compare the order of two points *) + val compare : (point * point) -> order + + (* successor and predecessor functions on the domain *) + val succ : point -> point + val pred : point -> point + + (* isSucc(a, b) ==> (succ a) = b and a = (pred b). *) + val isSucc : (point * point) -> bool + + (* the minimum and maximum bounds of the domain; we require that + * pred minPt = minPt and succ maxPt = maxPt. + *) + val minPt : point + val maxPt : point + + end diff --git a/smlnj-lib/Util/interval-set-fn.sml b/smlnj-lib/Util/interval-set-fn.sml new file mode 100644 index 0000000..6a469b2 --- /dev/null +++ b/smlnj-lib/Util/interval-set-fn.sml @@ -0,0 +1,352 @@ +(* interfun-set-fn.sml + * + * COPYRIGHT (c) 2005 John Reppy (http://www.cs.uchicago.edu/~jhr) + * All rights reserved. + * + * An implementation of sets over a discrete ordered domain, where the + * sets are represented by intervals. It is meant for representing + * dense sets (e.g., unicode character classes). + *) + +functor IntervalSetFn (D : INTERVAL_DOMAIN) : INTERVAL_SET = + struct + + structure D = D + + type item = D.point + type interval = (D.point * D.point) + + fun min (a, b) = (case D.compare(a, b) + of LESS => a + | _ => b + (* end case *)) + + (* the set is represented by an ordered list of disjoint, non-adjacent intervals *) + datatype set = SET of interval list + + val empty = SET[] + val universe = SET[(D.minPt, D.maxPt)] + + fun isEmpty (SET []) = true + | isEmpty _ = false + + fun isUniverse (SET[(a, b)]) = + (D.compare(a, D.minPt) = EQUAL) andalso (D.compare(b, D.maxPt) = EQUAL) + | isUniverse _ = false + + fun singleton x = SET[(x, x)] + + fun interval (a, b) = (case D.compare(a, b) + of GREATER => raise Domain + | _ => SET[(a, b)] + (* end case *)) + + fun addInt (SET l, (a, b)) = let + fun ins (a, b, []) = [(a, b)] + | ins (a, b, (x, y)::r) = (case D.compare(b, x) + of LESS => if (D.isSucc(b, x)) + then (a, y)::r + else (a, b)::(x, y)::r + | EQUAL => (a, y)::r + | GREATER => (case D.compare(a, y) + of GREATER => if (D.isSucc(y, a)) + then (x, b) :: r + else (x, y) :: ins(a, b, r) + | EQUAL => ins(x, b, r) + | LESS => (case D.compare(b, y) + of GREATER => ins (min(a, x), b, r) + | _ => ins (min(a, x), y, r) + (* end case *)) + (* end case *)) + (* end case *)) + in + case D.compare(a, b) + of GREATER => raise Domain + | _ => SET(ins (a, b, l)) + (* end case *) + end + fun addInt' (x, m) = addInt (m, x) + + fun add (SET l, a) = let + fun ins (a, []) = [(a, a)] + | ins (a, (x, y)::r) = (case D.compare(a, x) + of LESS => if (D.isSucc(a, x)) + then (a, y)::r + else (a, a)::(x, y)::r + | EQUAL => (a, y)::r + | GREATER => (case D.compare(a, y) + of GREATER => if (D.isSucc(y, a)) + then (x, a) :: r + else (x, y) :: ins(a, r) + | _ => (x, y)::r + (* end case *)) + (* end case *)) + in + SET(ins (a, l)) + end + fun add' (x, m) = add (m, x) + + fun fromList items = List.foldl add' empty items + + (* is a point in any of the intervals in the set *) + fun member (SET l, pt) = let + fun look [] = false + | look ((a, b) :: r) = (case D.compare(a, pt) + of LESS => (case D.compare(pt, b) + of GREATER => look r + | _ => true + (* end case *)) + | EQUAL => true + | GREATER => false + (* end case *)) + in + look l + end + + fun complement (SET[]) = universe + | complement (SET((a, b)::r)) = let + fun comp (start, (a, b)::r, l) = + comp(D.succ b, r, (start, D.pred a)::l) + | comp (start, [], l) = (case D.compare(start, D.maxPt) + of LESS => SET(List.rev((start, D.maxPt)::l)) + | _ => SET(List.rev l) + (* end case *)) + in + case D.compare(D.minPt, a) + of LESS => comp(D.succ b, r, [(D.minPt, D.pred a)]) + | _ => comp(D.succ b, r, []) + (* end case *) + end + + fun union (SET l1, SET l2) = let + fun join ([], l2) = l2 + | join (l1, []) = l1 + | join ((a1, b1)::r1, (a2, b2)::r2) = (case D.compare(a1, a2) + of LESS => (case D.compare(b1, b2) + of LESS => if D.isSucc(b1, a2) + then join(r1, (a1, b2)::r2) + else (a1, b1) :: join(r1, (a2, b2)::r2) + | EQUAL => (a1, b1) :: join(r1, r2) + | GREATER => join ((a1, b1)::r1, r2) + (* end case *)) + | EQUAL => (case D.compare(b1, b2) + of LESS => join(r1, (a2, b2)::r2) + | EQUAL => (a1, b1) :: join(r1, r2) + | GREATER => join ((a1, b1)::r1, r2) + (* end case *)) + | GREATER => (case D.compare(a1, b2) + of LESS => (case D.compare(b1, b2) + of LESS => join (r1, (a2, b2)::r2) + | EQUAL => (a2, b2) :: join(r1, r2) + | GREATER => join ((a2, b1)::r1, r2) + (* end case *)) + | EQUAL => (* a2 < a1 = b2 <= b1 *) + join ((a2, b1)::r1, r2) + | GREATER => if D.isSucc(b2, a1) + then join ((a2, b1)::r1, r2) + else (a2, b2) :: join ((a1, b1)::r1, r2) + (* end case *)) + (* end case *)) + in + SET(join(l1, l2)) + end + + fun intersect (SET l1, SET l2) = let + (* cons a possibly empty interval onto the front of l *) + fun cons (a, b, l) = (case D.compare(a, b) + of GREATER => l + | _ => (a, b) :: l + (* end case *)) + fun meet ([], _) = [] + | meet (_, []) = [] + | meet ((a1, b1)::r1, (a2, b2)::r2) = (case D.compare(a1, a2) + of LESS => (case D.compare(b1, a2) + of LESS => (* a1 <= b1 < a2 <= b2 *) + meet (r1, (a2, b2)::r2) + | EQUAL => (* a1 <= b1 = a2 <= b2 *) + (b1, b1) :: meet (r1, cons(D.succ b1, b2, r2)) + | GREATER => (case D.compare (b1, b2) + of LESS => (* a1 < a2 < b1 < b2 *) + (a2, b1) :: meet (r1, cons(D.succ b1, b2, r2)) + | EQUAL => (* a1 < a2 < b1 = b2 *) + (a2, b1) :: meet (r1, r2) + | GREATER => (* a1 < a2 < b1 & b2 < b1 *) + (a2, b2) :: meet (cons(D.succ b2, b1, r1), r2) + (* end case *)) + (* end case *)) + | EQUAL => (case D.compare(b1, b2) + of LESS => (a1, b1) :: meet (r1, cons(D.succ b1, b2, r2)) + | EQUAL => (a1, b1) :: meet (r1, r2) + | GREATER => (a1, b2) :: meet ((D.succ b2, b1)::r1, r2) + (* end case *)) + | GREATER => (case D.compare(b2, a1) + of LESS => (* a2 <= b2 < a1 <= b1 *) + meet ((a1, b1)::r1, r2) + | EQUAL => (* a2 < b2 = a1 <= b1 *) + (b2, b2) :: meet (cons(D.succ b2, b1, r1), r2) + | GREATER => (case D.compare(b1, b2) + of LESS => (* a2 < a1 <= b1 < b2 *) + (a1, b1) :: meet (r1, cons(D.succ b1, b2, r2)) + | EQUAL => (* a2 < a1 <= b1 = b2 *) + (a1, b1) :: meet (r1, r2) + | GREATER => (* a2 < a1 < b2 < b1 *) + (a1, b2) :: meet (cons(D.succ b2, b1, r1), r2) + (* end case *)) + (* end case *)) + (* end case *)) + in + SET(meet(l1, l2)) + end + + (* FIXME: replace the following with a direct implementation *) + fun difference (s1, s2) = intersect(s1, complement s2) + + (***** iterators on elements *****) + local + fun next [] = NONE + | next ((a, b)::r) = + if D.compare(a, b) = EQUAL + then SOME(a, r) + else SOME(a, (D.succ a, b)::r) + in + fun toList (SET l) = let + fun list (l, items) = (case next l + of NONE => List.rev items + | SOME(x, r) => list(r, x::items) + (* end case *)) + in + list (l, []) + end + fun app f (SET l) = let + fun appf l = (case next l + of NONE => () + | SOME(x, r) => (f x; appf r) + (* end case *)) + in + appf l + end + fun foldl f = let + fun foldf (l, acc) = (case next l + of NONE => acc + | SOME(x, r) => foldf(r, f(x, acc)) + (* end case *)) + in + fn init => fn (SET l) => foldf(l, init) + end + fun foldr f init (SET l) = let + fun foldf l = (case next l + of NONE => init + | SOME(x, r) => f (x, foldf r) + (* end case *)) + in + foldf l + end + fun filter pred (SET l) = let + (* given an interval [a, b], filter its elements and add the subintervals that pass + * the predicate to the list l. + *) + fun filterInt ((a, b), l) = let + fun lp (start, item, last, l) = let + val next = D.succ item + in + if pred next + then if (D.compare(next, last) = EQUAL) + then (start, next)::l + else lp(start, next, last, l) + else scan(D.succ next, last, (start, item)::l) + end + and scan (next, last, l) = if pred next + then lp (next, next, last, l) + else if (D.compare(next, last) = EQUAL) + then l + else scan(D.succ next, last, l) + in + scan (a, b, l) + end + (* filter the intervals *) + fun filter' ([], l) = SET(List.rev l) + | filter' (i::r, l) = filter' (r, filterInt (i, l)) + in + filter' (l, []) + end + fun all pred (SET l) = let + fun all' l = (case next l + of NONE => true + | SOME(x, r) => (pred x andalso all' r) + (* end case *)) + in + all' l + end + fun exists pred (SET l) = let + fun exists' l = (case next l + of NONE => false + | SOME(x, r) => (pred x orelse exists' r) + (* end case *)) + in + exists' l + end + end (* local *) + + (***** Iterators on interfuns *****) + fun intervals (SET l) = l + + fun appInt f (SET l) = List.app f l + + fun foldlInt f init (SET l) = List.foldl f init l + + fun foldrInt f init (SET l) = List.foldl f init l + + fun filterInt pred (SET l) = let + fun f' ([], l) = SET(List.rev l) + | f' (i::r, l) = if pred i + then f'(r, i::l) + else f'(r, l) + in + f' (l, []) + end + + fun existsInt pred (SET l) = List.exists pred l + + fun allInt pred (SET l) = List.all pred l + + fun compare (SET l1, SET l2) = let + fun comp ([], []) = EQUAL + | comp ((a1, b1)::r1, (a2, b2)::r2) = (case D.compare(a1, a2) + of EQUAL => (case D.compare(b1, b2) + of EQUAL => comp (r1, r2) + | someOrder => someOrder + (* end case *)) + | someOrder => someOrder + (* end case *)) + | comp ([], _) = LESS + | comp (_, []) = GREATER + in + comp(l1, l2) + end + + fun isSubset (SET l1, SET l2) = let + (* is the interval [a, b] covered by [x, y]? *) + fun isCovered (a, b, x, y) = (case D.compare(a, x) + of LESS => false + | _ => (case D.compare(y, b) + of LESS => false + | _ => true + (* end case *)) + (* end case *)) + fun test ([], _) = true + | test (_, []) = false + | test ((a1, b1)::r1, (a2, b2)::r2) = + if isCovered (a1, b1, a2, b2) + then test (r1, (a2, b2)::r2) + else (case D.compare(b2, a1) + of LESS => test ((a1, b1)::r1, r2) + | _ => false + (* end case *)) + in + test (l1, l2) + end + + (* DEPRECATED FUNCTIONS *) + val listItems = toList + + end diff --git a/smlnj-lib/Util/interval-set-sig.sml b/smlnj-lib/Util/interval-set-sig.sml new file mode 100644 index 0000000..7570342 --- /dev/null +++ b/smlnj-lib/Util/interval-set-sig.sml @@ -0,0 +1,80 @@ +(* interval-set-sig.sml + * + * COPYRIGHT (c) 2005 John Reppy (http://www.cs.uchicago.edu/~jhr) + * All rights reserved. + * + * This signature is the interface to sets over a discrete ordered domain, where the + * sets are represented by intervals. It is meant for representing dense sets (e.g., + * unicode character classes). + *) + +signature INTERVAL_SET = + sig + + structure D : INTERVAL_DOMAIN + + type item = D.point + type interval = (item * item) + type set + + (* the empty set and the set of all elements *) + val empty : set + val universe : set + + (* a set of a single element *) + val singleton : item -> set + + val fromList : item list -> set + + (* set that covers the given interval *) + val interval : item * item -> set + + val isEmpty : set -> bool + val isUniverse : set -> bool + + val member : set * item -> bool + + (* return the list of items in the set *) + val toList : set -> item list + + (* return a list of intervals that represents the set *) + val intervals : set -> interval list + + (* add a single element to the set *) + val add : set * item -> set + val add' : item * set -> set + + (* add an interval to the set *) + val addInt : set * interval -> set + val addInt' : interval * set -> set + + (* set operations *) + val complement : set -> set + val union : (set * set) -> set + val intersect : (set * set) -> set + val difference : (set * set) -> set + + (* iterators on elements *) + val app : (item -> unit) -> set -> unit + val foldl : (item * 'a -> 'a) -> 'a -> set -> 'a + val foldr : (item * 'a -> 'a) -> 'a -> set -> 'a + val filter : (item -> bool) -> set -> set + val all : (item -> bool) -> set -> bool + val exists : (item -> bool) -> set -> bool + + (* iterators on intervals *) + val appInt : (interval -> unit) -> set -> unit + val foldlInt : (interval * 'a -> 'a) -> 'a -> set -> 'a + val foldrInt : (interval * 'a -> 'a) -> 'a -> set -> 'a + val filterInt : (interval -> bool) -> set -> set + val allInt : (interval -> bool) -> set -> bool + val existsInt : (interval -> bool) -> set -> bool + + (* ordering on sets *) + val compare : set * set -> order + val isSubset : set * set -> bool + + (* DEPRECATED FUNCTIONS *) + val listItems : set -> item list + + end diff --git a/smlnj-lib/Util/io-util-sig.sml b/smlnj-lib/Util/io-util-sig.sml new file mode 100644 index 0000000..b0145a1 --- /dev/null +++ b/smlnj-lib/Util/io-util-sig.sml @@ -0,0 +1,20 @@ +(* io-util-sig.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Support for redirecting stdIn/stdOut. + *) + +signature IO_UTIL = + sig + + (* rebind stdIn *) + val withInputFile : string * ('a -> 'b) -> 'a -> 'b + val withInstream : TextIO.instream * ('a -> 'b) -> 'a -> 'b + + (* rebind stdOut *) + val withOutputFile : string * ('a -> 'b) -> 'a -> 'b + val withOutstream : TextIO.outstream * ('a -> 'b) -> 'a -> 'b + + end diff --git a/smlnj-lib/Util/io-util.sml b/smlnj-lib/Util/io-util.sml new file mode 100644 index 0000000..7ee5ba7 --- /dev/null +++ b/smlnj-lib/Util/io-util.sml @@ -0,0 +1,60 @@ +(* io-util.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + *) + +structure IOUtil : IO_UTIL = + struct + + type instream = TextIO.instream + type outstream = TextIO.outstream + + fun swapInstrm (s, s') = + TextIO.getInstream s before TextIO.setInstream(s, s') + + fun withInputFile (s, f) x = let + val oldStrm = swapInstrm(TextIO.stdIn, TextIO.getInstream(TextIO.openIn s)) + fun cleanUp () = + TextIO.StreamIO.closeIn(swapInstrm(TextIO.stdIn, oldStrm)) + val res = (f x) handle ex => (cleanUp(); raise ex) + in + cleanUp(); + res + end + + fun withInstream (strm, f) x = let + val oldStrm = swapInstrm(TextIO.stdIn, TextIO.getInstream strm) + fun cleanUp () = + TextIO.setInstream(strm, swapInstrm(TextIO.stdIn, oldStrm)) + val res = (f x) handle ex => (cleanUp(); raise ex) + in + cleanUp(); + res + end + + fun swapOutstrm (s, s') = + TextIO.getOutstream s before TextIO.setOutstream(s, s') + + fun withOutputFile (s, f) x = let + val oldStrm = swapOutstrm(TextIO.stdOut, TextIO.getOutstream(TextIO.openOut s)) + fun cleanUp () = + TextIO.StreamIO.closeOut(swapOutstrm(TextIO.stdOut, oldStrm)) + val res = (f x) handle ex => (cleanUp(); raise ex) + in + cleanUp(); + res + end + + fun withOutstream (strm, f) x = let + val oldStrm = swapOutstrm(TextIO.stdOut, TextIO.getOutstream strm) + fun cleanUp () = + TextIO.setOutstream(strm, swapOutstrm(TextIO.stdOut, oldStrm)) + val res = (f x) handle ex => (cleanUp(); raise ex) + in + cleanUp(); + res + end + + end (* IOUtil *) diff --git a/smlnj-lib/Util/iterate-sig.sml b/smlnj-lib/Util/iterate-sig.sml new file mode 100644 index 0000000..5adb1e5 --- /dev/null +++ b/smlnj-lib/Util/iterate-sig.sml @@ -0,0 +1,31 @@ +(* iterate-sig.sml + * + * COPYRIGHT (c) 1993 by AT&T Bell Laboratories. See COPYRIGHT file for details. + * + *) + +signature ITERATE = + sig + + val iterate : ('a -> 'a) -> int -> 'a -> 'a + (* iterate f cnt init = f(f(...f(f(init))...)) (cnt times) + * iterate f 0 init = init + * raises BadArg if cnt < 0 + *) + + val repeat : (int * 'a -> 'a) -> int -> 'a -> 'a + (* repeat f cnt init + * = #2(iterate (fn (i,v) => (i+1,f(i,v))) cnt (0,init)) + *) + + val for : (int * 'a -> 'a) -> (int * int * int) -> 'a -> 'a + (* for f (start,stop,inc) init + * "for loop" + * implements f(...f(start+2*inc,f(start+inc,f(start,init)))...) + * until the first argument of f > stop if inc > 0 + * or the first argument of f < stop if inc < 0 + * raises BadArg if inc <= 0 and start < stop or if inc >=0 and + * start > stop. + *) + + end (* ITERATE *) diff --git a/smlnj-lib/Util/iterate.sml b/smlnj-lib/Util/iterate.sml new file mode 100644 index 0000000..b5d0bdb --- /dev/null +++ b/smlnj-lib/Util/iterate.sml @@ -0,0 +1,42 @@ +(* iterate.sml + * + * COPYRIGHT (c) 1993 by AT&T Bell Laboratories. See COPYRIGHT file for details. + * + *) + +structure Iterate : ITERATE = + struct + + fun badArg (f,msg) = LibBase.failure {module="Iterate",func=f,msg=msg} + + fun iterate f cnt init = let + fun iter (0,v) = v + | iter (n,v) = iter(n-1,f v) + in + if cnt < 0 + then badArg ("iterate","count < 0") + else iter (cnt,init) + end + + fun repeat f cnt init = let + fun iter (n,v) = if n = cnt then v else iter(n+1,f(n,v)) + in + if cnt < 0 + then badArg ("repeat","count < 0") + else iter (0,init) + end + + fun for f (start,stop,inc) = let + fun up (n,v) = if n > stop then v else up(n+inc,f(n,v)) + fun down (n,v) = if n < stop then v else down(n+inc,f(n,v)) + in + if start < stop + then if inc <= 0 then badArg ("for","inc <= 0 with start < stop") + else fn v => up(start,v) + else if stop < start + then if inc >= 0 then badArg ("for","inc >= 0 with start > stop") + else fn v => down(start,v) + else fn v => f(start,v) + end + + end (* Iterate *) diff --git a/smlnj-lib/Util/keyword-fn.sml b/smlnj-lib/Util/keyword-fn.sml new file mode 100644 index 0000000..eb2a932 --- /dev/null +++ b/smlnj-lib/Util/keyword-fn.sml @@ -0,0 +1,51 @@ +(* keyword-fn.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This functor is meant to be used as part of a scanner, where identifiers + * and keywords are scanned using the same lexical rules and are then + * further analyzed. + *) + +functor KeywordFn (KW : sig + type token + type pos + val ident : (Atom.atom * pos * pos) -> token + val keywords : (string * ((pos * pos) -> token)) list + end) : sig + type token + type pos + val keyword : (string * pos * pos) -> token + end = struct + + structure A = Atom + structure Tbl = AtomTable + + type token = KW.token + type pos = KW.pos + + (* the keyword hash table *) + exception Keyword + val kwTbl : ((pos * pos) -> token) Tbl.hash_table = + Tbl.mkTable(List.length KW.keywords, Keyword) + + (* insert the reserved words into the keyword hash table *) + val _ = let + val insert = Tbl.insert kwTbl + fun ins (s, item) = insert (A.atom s, item) + in + app ins KW.keywords + end + + fun keyword (s, p1, p2) = let + val name = A.atom s + in + case (Tbl.find kwTbl name) + of (SOME tokFn) => tokFn(p1, p2) + | NONE => KW.ident(name, p1, p2) + (* end case *) + end + + end; + diff --git a/smlnj-lib/Util/left-priorityq-fn.sml b/smlnj-lib/Util/left-priorityq-fn.sml new file mode 100644 index 0000000..233feb6 --- /dev/null +++ b/smlnj-lib/Util/left-priorityq-fn.sml @@ -0,0 +1,88 @@ +(* left-priorityq-fn.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * An implementation of priority queues based on leaftist heaps (see + * Purely Functional Data Structures by Chris Okasaki). + *) + +functor LeftPriorityQFn (P : PRIORITY) : MONO_PRIORITYQ = + struct + + type item = P.item + + datatype queue = Q of (int * heap) + and heap = EMPTY | ND of (int * item * heap * heap) + + val empty = Q(0, EMPTY) + + fun singletonHeap x = ND(1, x, EMPTY, EMPTY) + fun singleton x = Q(1, singletonHeap x) + + fun rank EMPTY = 0 + | rank (ND(r, _, _, _)) = r + + fun mkNode (x, a, b) = if (rank a >= rank b) + then ND(rank b + 1, x, a, b) + else ND(rank a + 1, x, b, a) + + fun mergeHeap (h, EMPTY) = h + | mergeHeap (EMPTY, h) = h + | mergeHeap (h1 as ND(_, x, h11, h12), h2 as ND(_, y, h21, h22)) = ( + case P.compare(P.priority x, P.priority y) + of GREATER => mkNode(x, h11, mergeHeap(h12, h2)) + | _ => mkNode(y, h21, mergeHeap(h1, h22)) + (* end case *)) + + fun insertHeap (h, x) = mergeHeap(singletonHeap x, h) + + fun insert (x, Q(n, h)) = Q(n+1, insertHeap (h, x)) + + fun next (Q(_, EMPTY)) = NONE + | next (Q(n, ND(_, x, h1, h2))) = SOME(x, Q(n-1, mergeHeap(h1, h2))) + + fun remove (Q(_, EMPTY)) = raise List.Empty + | remove (Q(n, ND(_, x, h1, h2))) = (x, Q(n-1, mergeHeap(h1, h2))) + + (* this is a somewhat brute force implementation that probably could be improved *) + fun findAndRemove (Q(n, heap), pred) = let + fun find (EMPTY, rejects) = NONE + | find (ND(_, x, h1, h2), rejects) = if pred x + then SOME(x, Q(n-1, mergeHeap(rejects, mergeHeap(h1, h2)))) + else find (mergeHeap(h1, h2), insertHeap (rejects, x)) + in + find (heap, EMPTY) + end + + fun delete (Q(n, heap), pred) = let + fun filter (EMPTY, (n, residual)) = (n, residual) + | filter (ND(_, x, h1, h2), (n, residual)) = if pred x + then filter (h2, filter (h1, (n-1, residual))) + else filter (h2, filter (h1, (n, insertHeap (residual, x)))) + in + Q (filter (heap, (n, EMPTY))) + end + + fun merge (Q(n1, h1), Q(n2, h2)) = Q(n1+n2, mergeHeap(h1, h2)) + + fun numItems (Q(n, _)) = n + + fun isEmpty (Q(_, EMPTY)) = true + | isEmpty _ = false + + fun fromList [] = empty + | fromList [x] = Q(1, singletonHeap x) + | fromList l = let + fun init ([], n, items) = (n, items) + | init (x::r, n, items) = init (r, n+1, singletonHeap x :: items) + fun merge ([], [h]) = h + | merge ([], hl) = merge (hl, []) + | merge ([h], hl) = merge (h::hl, []) + | merge (h1::h2::r, l) = merge (r, mergeHeap(h1, h2) :: l) + val (len, hs) = init (l, 0, []) + in + Q(len, merge (hs, [])) + end + + end; diff --git a/smlnj-lib/Util/lib-base-sig.sml b/smlnj-lib/Util/lib-base-sig.sml new file mode 100644 index 0000000..eed869a --- /dev/null +++ b/smlnj-lib/Util/lib-base-sig.sml @@ -0,0 +1,21 @@ +(* lib-base-sig.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +signature LIB_BASE = + sig + + exception Unimplemented of string + (* raised to report unimplemented features *) + exception Impossible of string + (* raised to report internal errors *) + + exception NotFound + (* raised by searching operations *) + + val failure : {module : string, func : string, msg : string} -> 'a + (* raise the exception Fail with a standard format message. *) + + end (* LIB_BASE *) diff --git a/smlnj-lib/Util/lib-base.sml b/smlnj-lib/Util/lib-base.sml new file mode 100644 index 0000000..c2b1bfe --- /dev/null +++ b/smlnj-lib/Util/lib-base.sml @@ -0,0 +1,23 @@ +(* lib-base.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +structure LibBase : LIB_BASE = + struct + + (* raised to report unimplemented features *) + exception Unimplemented of string + + (* raised to report internal errors *) + exception Impossible of string + + (* raised by searching operations *) + exception NotFound + + (* raise the exception Fail with a standard format message. *) + fun failure {module, func, msg} = + raise (Fail(concat[module, ".", func, ": ", msg])) + + end (* LibBase *) diff --git a/smlnj-lib/Util/list-format-sig.sml b/smlnj-lib/Util/list-format-sig.sml new file mode 100644 index 0000000..d02d42b --- /dev/null +++ b/smlnj-lib/Util/list-format-sig.sml @@ -0,0 +1,37 @@ +(* list-format-sig.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +signature LIST_FORMAT = + sig + + val fmt : { + init : string, + sep : string, + final : string, + fmt : 'a -> string + } -> 'a list -> string + (* given an initial string (init), a separator (sep), a terminating + * string (final), and an item formating function (fmt), return a list + * formatting function. The list ``[a, b, ..., c]'' gets formated as + * ``init ^ (fmt a) ^ sep ^ (fmt b) ^ sep ^ ... ^ sep ^ (fmt c) ^ final.'' + *) + + val listToString : ('a -> string) -> 'a list -> string + (* formats a list in SML style (i.e., init="[", sep=",", final="]"). *) + + val scan : { + init : string, + sep : string, + final : string, + scan : (char, 'b) StringCvt.reader -> ('a, 'b) StringCvt.reader + } -> (char, 'b) StringCvt.reader -> ('a list, 'b) StringCvt.reader + (* given an expected initial string, a separator, a terminating + * string, and an item scanning function, return a function that + * scans a string for a list of items. Whitespace is ignored. + * If the input string has the incorrect syntax, then NONE is returned. + *) + + end; (* LIST_FORMAT *) diff --git a/smlnj-lib/Util/list-format.sml b/smlnj-lib/Util/list-format.sml new file mode 100644 index 0000000..3705791 --- /dev/null +++ b/smlnj-lib/Util/list-format.sml @@ -0,0 +1,84 @@ +(* list-format.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +structure ListFormat : LIST_FORMAT = + struct + + (* given an initial string (init), a separator (sep), a terminating + * string (final), and an item formating function (fmt), return a list + * formatting function. The list ``[a, b, ..., c]'' gets formated as + * ``init ^ (fmt a) ^ sep ^ (fmt b) ^ sep ^ ... ^ sep ^ (fmt c) ^ final.'' + *) + fun fmt {init, sep, final, fmt} = let + fun format [] = init ^ final + | format [x] = concat[init, fmt x, final] + | format (x::r) = let + fun f ([], l) = concat(rev(final::l)) + | f (x::r, l) = f (r, (fmt x) :: sep :: l) + in + f (r, [fmt x, init]) + end + in + format + end (* formatList *) + + fun listToString f = fmt {init="[", sep=",", final="]", fmt=f} + + (* given an expected initial string, a separator, a terminating + * string, and an item scanning function, return a function that + * scans a string for a list of items. Whitespace is ignored. + * If the input string has the incorrect syntax, then the exception + * ScanList is raised with the position of the first error. + *) + fun scan {init, sep, final, scan} getc strm = let + val skipWS = StringCvt.skipWS getc + val scanItem = scan getc + fun eat "" = (fn strm => (true, skipWS strm)) + | eat s = let + val n = size s + fun isPrefix (i, strm) = + if (i = n) then SOME strm + else (case getc strm + of (SOME(c, strm)) => if (String.sub(s, i) = c) + then isPrefix(i+1, strm) + else NONE + | NONE => NONE + (* end case *)) + fun eat' strm = ( + case isPrefix (0, skipWS strm) + of (SOME strm) => (true, strm) + | NONE => (false, strm) + (* end case *)) + in + eat' + end + val isInit = eat init + val isSep = eat sep + val isFinal = eat final + fun scan (strm, l) = (case (isFinal strm) + of (true, strm) => SOME(rev l, strm) + | (false, strm) => (case isSep strm + of (true, strm) => (case scanItem (skipWS strm) + of (SOME(x, strm)) => scan (strm, x::l) + | NONE => NONE + (* end case *)) + | _ => NONE + (* end case *)) + (* end case *)) + in + case (isInit strm) + of (true, strm) => (case (isFinal strm) + of (true, strm) => SOME([], strm) + | (false, strm) => (case scanItem (skipWS strm) + of (SOME(x, strm)) => scan (strm, [x]) + | NONE => NONE + (* end case *)) + (* end case *)) + | (false, i) => NONE + (* end case *) + end (* scan *) + + end; (* ListFormat *) diff --git a/smlnj-lib/Util/list-map-fn.sml b/smlnj-lib/Util/list-map-fn.sml new file mode 100644 index 0000000..62de987 --- /dev/null +++ b/smlnj-lib/Util/list-map-fn.sml @@ -0,0 +1,332 @@ +(* list-map-fn.sml + * + * COPYRIGHT (c) 2012 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * COPYRIGHT (c) 1996 by AT&T Research. See COPYRIGHT file for details. + * + * An implementation of finite maps on ordered keys, which uses a sorted list + * representation. + *) + +functor ListMapFn (K : ORD_KEY) :> ORD_MAP where type Key.ord_key = K.ord_key = + struct + + structure Key = K + + type 'a map = (K.ord_key * 'a) list + + val empty = [] + + fun isEmpty [] = true + | isEmpty _ = false + + (* return the first item in the map (or NONE if it is empty) *) + fun first [] = NONE + | first ((_, value)::_) = SOME value + + (* return the first item in the map and its key (or NONE if it is empty) *) + fun firsti [] = NONE + | firsti ((key, value)::_) = SOME(key, value) + + fun singleton (key, item) = [(key, item)] + + fun insert (l, key, item) = let + fun f [] = [(key, item)] + | f ((elem as (key', _))::r) = (case Key.compare(key, key') + of LESS => (key, item) :: elem :: r + | EQUAL => (key, item) :: r + | GREATER => elem :: (f r) + (* end case *)) + in + f l + end + fun insert' ((k, x), m) = insert(m, k, x) + + fun insertWithi comb (l, key, item) = let + fun f [] = [(key, item)] + | f ((elem as (key', item'))::r) = (case Key.compare(key, key') + of LESS => (key, item) :: elem :: r + | EQUAL => (key, comb(key, item', item)) :: r + | GREATER => elem :: (f r) + (* end case *)) + in + f l + end + fun insertWith comb = insertWithi (fn (_, x1, x2) => comb(x1, x2)) + + (* return true if the key is in the map's domain *) + fun inDomain (l, key) = let + fun f [] = false + | f ((key', x) :: r) = (case Key.compare(key, key') + of LESS => false + | EQUAL => true + | GREATER => f r + (* end case *)) + in + f l + end + + (* Look for an item, return NONE if the item doesn't exist *) + fun find (l, key) = let + fun f [] = NONE + | f ((key', x) :: r) = (case Key.compare(key, key') + of LESS => NONE + | EQUAL => SOME x + | GREATER => f r + (* end case *)) + in + f l + end + + (* Look for an item, raise NotFound if the item doesn't exist *) + fun lookup (l, key) = let + fun f [] = raise LibBase.NotFound + | f ((key', x) :: r) = (case Key.compare(key, key') + of LESS => raise LibBase.NotFound + | EQUAL => x + | GREATER => f r + (* end case *)) + in + f l + end + + (* Remove an item, returning new map and value removed. + * Raise LibBase.NotFound if not found. + *) + fun remove (l, key) = let + fun f (_, []) = raise LibBase.NotFound + | f (prefix, (elem as (key', x)) :: r) = (case Key.compare(key, key') + of LESS => raise LibBase.NotFound + | EQUAL => (List.revAppend(prefix, r), x) + | GREATER => f(elem :: prefix, r) + (* end case *)) + in + f ([], l) + end + + fun findAndRemove (l, key) = let + fun f (_, []) = NONE + | f (prefix, (elem as (key', x)) :: r) = (case Key.compare(key, key') + of LESS => NONE + | EQUAL => SOME(List.revAppend(prefix, r), x) + | GREATER => f(elem :: prefix, r) + (* end case *)) + in + f ([], l) + end + + (* Return the number of items in the map *) + fun numItems l = List.length l + + (* Return a list of the items (and their keys) in the map *) + fun listItems (l : 'a map) = List.map #2 l + fun listItemsi l = l + + fun listKeys (l : 'a map) = List.map #1 l + + fun equiv rngEq = let + fun cmp ([], []) = true + | cmp ((xk, x)::xr, (yk, y)::yr) = (case Key.compare(xk, yk) + of EQUAL => rngEq(x, y) andalso cmp(xr, yr) + | _ => false + (* end case *)) + | cmp _ = false + in + cmp + end + + fun collate cmpRng = let + fun cmp ([], []) = EQUAL + | cmp ([], _) = LESS + | cmp (_, []) = GREATER + | cmp ((xk, x)::xr, (yk, y)::yr) = (case Key.compare(xk, yk) + of EQUAL => (case cmpRng(x, y) + of EQUAL => cmp (xr, yr) + | order => order + (* end case *)) + | order => order + (* end case *)) + in + cmp + end + + fun extends rngEx = let + fun cmp ([], []) = true + | cmp (_, []) = true + | cmp ([], _) = false + | cmp ((xk, x)::xr, ys as ((yk, y)::yr)) = ( + case Key.compare(xk, yk) + of LESS => cmp (xr, ys) + | EQUAL => rngEx(x, y) andalso cmp (xr, yr) + | GREATER => false + (* end case *)) + in + cmp + end + + (* return a map whose domain is the union of the domains of the two input + * maps, using the supplied function to define the map on elements that + * are in both domains. + *) + fun unionWith f (m1 : 'a map, m2 : 'a map) = let + fun merge ([], [], l) = List.rev l + | merge ([], m2, l) = List.revAppend(l, m2) + | merge (m1, [], l) = List.revAppend(l, m1) + | merge (m1 as ((k1, x1)::r1), m2 as ((k2, x2)::r2), l) = ( + case Key.compare (k1, k2) + of LESS => merge (r1, m2, (k1, x1)::l) + | EQUAL => merge (r1, r2, (k1, f(x1, x2)) :: l) + | GREATER => merge (m1, r2, (k2, x2)::l) + (* end case *)) + in + merge (m1, m2, []) + end + fun unionWithi f (m1 : 'a map, m2 : 'a map) = let + fun merge ([], [], l) = List.rev l + | merge ([], m2, l) = List.revAppend(l, m2) + | merge (m1, [], l) = List.revAppend(l, m1) + | merge (m1 as ((k1, x1)::r1), m2 as ((k2, x2)::r2), l) = ( + case Key.compare (k1, k2) + of LESS => merge (r1, m2, (k1, x1)::l) + | EQUAL => merge (r1, r2, (k1, f(k1, x1, x2)) :: l) + | GREATER => merge (m1, r2, (k2, x2)::l) + (* end case *)) + in + merge (m1, m2, []) + end + + (* return a map whose domain is the intersection of the domains of the + * two input maps, using the supplied function to define the range. + *) + fun intersectWith f (m1 : 'a map, m2 : 'b map) = let + fun merge (m1 as ((k1, x1)::r1), m2 as ((k2, x2)::r2), l) = ( + case Key.compare (k1, k2) + of LESS => merge (r1, m2, l) + | EQUAL => merge (r1, r2, (k1, f(x1, x2)) :: l) + | GREATER => merge (m1, r2, l) + (* end case *)) + | merge (_, _, l) = List.rev l + in + merge (m1, m2, []) + end + fun intersectWithi f (m1 : 'a map, m2 : 'b map) = let + fun merge (m1 as ((k1, x1)::r1), m2 as ((k2, x2)::r2), l) = ( + case Key.compare (k1, k2) + of LESS => merge (r1, m2, l) + | EQUAL => merge (r1, r2, (k1, f(k1, x1, x2)) :: l) + | GREATER => merge (m1, r2, l) + (* end case *)) + | merge (_, _, l) = List.rev l + in + merge (m1, m2, []) + end + + fun mergeWith f (m1 : 'a map, m2 : 'b map) = let + fun merge (m1 as ((k1, x1)::r1), m2 as ((k2, x2)::r2), l) = ( + case Key.compare (k1, k2) + of LESS => mergef (k1, SOME x1, NONE, r1, m2, l) + | EQUAL => mergef (k1, SOME x1, SOME x2, r1, r2, l) + | GREATER => mergef (k2, NONE, SOME x2, m1, r2, l) + (* end case *)) + | merge ([], [], l) = List.rev l + | merge ((k1, x1)::r1, [], l) = mergef (k1, SOME x1, NONE, r1, [], l) + | merge ([], (k2, x2)::r2, l) = mergef (k2, NONE, SOME x2, [], r2, l) + and mergef (k, x1, x2, r1, r2, l) = (case f (x1, x2) + of NONE => merge (r1, r2, l) + | SOME y => merge (r1, r2, (k, y)::l) + (* end case *)) + in + merge (m1, m2, []) + end + fun mergeWithi f (m1 : 'a map, m2 : 'b map) = let + fun merge (m1 as ((k1, x1)::r1), m2 as ((k2, x2)::r2), l) = ( + case Key.compare (k1, k2) + of LESS => mergef (k1, SOME x1, NONE, r1, m2, l) + | EQUAL => mergef (k1, SOME x1, SOME x2, r1, r2, l) + | GREATER => mergef (k2, NONE, SOME x2, m1, r2, l) + (* end case *)) + | merge ([], [], l) = List.rev l + | merge ((k1, x1)::r1, [], l) = mergef (k1, SOME x1, NONE, r1, [], l) + | merge ([], (k2, x2)::r2, l) = mergef (k2, NONE, SOME x2, [], r2, l) + and mergef (k, x1, x2, r1, r2, l) = (case f (k, x1, x2) + of NONE => merge (r1, r2, l) + | SOME y => merge (r1, r2, (k, y)::l) + (* end case *)) + in + merge (m1, m2, []) + end + + (* Apply a function to the entries of the map in map order. *) + val appi = List.app + fun app f l = appi (fn (_, item) => f item) l + + (* Create a new table by applying a map function to the + * name/value pairs in the table. + *) + fun mapi f l = List.map (fn (key, item) => (key, f(key, item))) l + fun map f l = List.map (fn (key, item) => (key, f item)) l + + (* Apply a folding function to the entries of the map + * in increasing map order. + *) + fun foldli f init l = + List.foldl (fn ((key, item), accum) => f(key, item, accum)) init l + fun foldl f init l = List.foldl (fn ((_, item), accum) => f(item, accum)) init l + + (* Apply a folding function to the entries of the map + * in decreasing map order. + *) + fun foldri f init l = + List.foldr (fn ((key, item), accum) => f(key, item, accum)) init l + fun foldr f init l = List.foldr (fn ((_, item), accum) => f(item, accum)) init l + + fun filter pred l = List.filter (fn (_, item) => pred item) l + fun filteri pred l = List.filter pred l + + fun mapPartiali f l = let + fun f' (key, item) = (case f (key, item) + of NONE => NONE + | SOME y => SOME(key, y) + (* end case *)) + in + List.mapPartial f' l + end + fun mapPartial f l = mapPartiali (fn (_, item) => f item) l + + (* check the elements of a map with a predicate and return true if + * any element satisfies the predicate. Return false otherwise. + * Elements are checked in key order. + *) + fun exists pred = let + fun exists' [] = false + | exists' ((_, x)::r) = pred x orelse exists' r + in + exists' + end + fun existsi pred = let + fun exists' [] = false + | exists' (arg::r) = pred arg orelse exists' r + in + exists' + end + + (* check the elements of a map with a predicate and return true if + * they all satisfy the predicate. Return false otherwise. Elements + * are checked in key order. + *) + fun all pred = let + fun all' [] = false + | all' ((_, x)::r) = pred x andalso all' r + in + all' + end + fun alli pred = let + fun all' [] = false + | all' (arg::r) = pred arg andalso all' r + in + all' + end + + end (* functor ListMapFn *) + diff --git a/smlnj-lib/Util/list-mergesort.sml b/smlnj-lib/Util/list-mergesort.sml new file mode 100644 index 0000000..b59aa66 --- /dev/null +++ b/smlnj-lib/Util/list-mergesort.sml @@ -0,0 +1,142 @@ +(* list-mergesort.sml + * + * COPYRIGHT (c) 2014 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +structure ListMergeSort : LIST_SORT = + struct + + (* Given a ">" relation, sort the list into increasing order. This sort + * detects initial increasing and decreasing runs and thus is linear + * time on ordered input. It is also stable. + *) + fun sort gt = let + fun revAppend ([], ys) = ys + | revAppend (x::xs, ys) = revAppend(xs, x::ys) + fun reverse (x, xs) = revAppend (xs, [x]) + (* merge two sorted lists, where we assume that the elements of the first list + * appeared before the elements of the second list. + *) + fun merge ([], ys, acc) = revAppend(acc, ys) + | merge (xs, [], acc) = revAppend(acc, xs) + | merge (xs as (x::xr), ys as (y::yr), acc) = + (* note that if `x` and `y` are equal, then we want `x` first in the result *) + if gt(y, x) + then merge (xr, ys, x::acc) + else merge (xs, yr, y::acc) + (* given a list of lists, where the order of the sublists corresponds to the original + * list order, merge neighboring pairs of sublists. + *) + fun mergeNeighbors ([], yss) = finishPass yss + | mergeNeighbors ([xs], yss) = finishPass (xs::yss) + | mergeNeighbors (xs1::xs2::xss, yss) = + mergeNeighbors (xss, merge(xs1, xs2, [])::yss) + (* finish a mergeNeighbors pass *) + and finishPass [] = [] + | finishPass [xs] = xs + | finishPass xss = mergeNeighborsRev (xss, []) + (* given a list of lists, where the order of the sublists is the reverse of the + * original list order, merge neighboring pairs of sublists. + *) + and mergeNeighborsRev ([], yss) = finishPassRev yss + | mergeNeighborsRev ([xs], yss) = finishPassRev (xs::yss) + | mergeNeighborsRev (xs1::xs2::xss, yss) = + mergeNeighborsRev (xss, merge(xs2, xs1, [])::yss) + (* finish a mergeNeighborsRev pass *) + and finishPassRev [] = [] + | finishPassRev [xs] = xs + | finishPassRev xss = mergeNeighbors (xss, []) + (* the initialization pass computes an initial list of lists, where the + * elements of each sub list are either equal or ordered in decreasing + * order. + *) + fun init (prev, [], yss) = mergeNeighbors ([prev]::yss, []) + | init (prev, x::xs, yss) = if gt(prev, x) + then runDn (x, xs, [prev], yss) + else if gt(x, prev) + then runUp (x, xs, [prev], yss) + else runEq (x, xs, [prev], yss) + (* identify a run of strictly increasing values; we know that `prev::run` + * is a strictly increasing run. + *) + and runUp (prev, [], run, yss) = mergeNeighbors (reverse(prev, run)::yss, []) + | runUp (prev, x::xr, run, yss) = + if gt(x, prev) + then runUp (x, xr, prev::run, yss) + else init (x, xr, reverse(prev, run)::yss) + (* identify a run of strictly decreasing values; we know that `prev::run` + * is a strictly decreasing run. + *) + and runDn (prev, [], run, yss) = mergeNeighbors ((prev::run)::yss, []) + | runDn (prev, x::xr, run, yss) = + if gt(prev, x) + then runDn (x, xr, prev::run, yss) + else init (x, xr, (prev::run)::yss) + (* identify a run of equal values; note that to preserve stability of the + * sort, we need to reverse the order of the run when it is finished. + *) + and runEq (prev, [], run, yss) = mergeNeighbors (reverse(prev, run)::yss, []) + | runEq (prev, x::xr, run, yss) = + if gt(prev, x) orelse gt(x, prev) + then init (x, xr, reverse(prev, run)::yss) + else runEq (x, xr, prev::run, yss) + in + fn [] => [] | (x::xs) => init(x, xs, []) + end + + (* Given a comparison function, sort the sequence in ascending order while eliminating + * duplicates. This sort detects initial increasing and decreasing runs and thus is linear + * time on ordered input. + *) + fun uniqueSort cmp = let + fun revAppend ([], ys) = ys + | revAppend (x::xs, ys) = revAppend(xs, x::ys) + fun reverse (x, xs) = revAppend (xs, [x]) + fun merge ([], ys, acc) = revAppend(acc, ys) + | merge (xs, [], acc) = revAppend(acc, xs) + | merge (xs as (x::xr), ys as (y::yr), acc) = ( + case cmp (x, y) + of LESS => merge (xr, ys, x::acc) + | EQUAL => merge (xr, yr, x::acc) (* discard duplicate *) + | GREATER => merge (xs, yr, y::acc) + (* end case *)) + fun mergeNeighbors ([], yss) = finishPass yss + | mergeNeighbors ([xs], yss) = finishPass (xs::yss) + | mergeNeighbors (xs1::xs2::xss, yss) = + mergeNeighbors (xss, merge(xs1, xs2, [])::yss) + and finishPass [] = [] + | finishPass [xs] = xs + | finishPass xss = mergeNeighbors (xss, []) + fun init (prev, [], yss) = mergeNeighbors ([prev]::yss, []) + | init (prev, x::xs, yss) = (case cmp(prev, x) + of LESS => runUp (x, xs, [prev], yss) + | EQUAL => init (prev, xs, yss) (* discard duplicate *) + | GREATER => runDn (x, xs, [prev], yss) + (* end case *)) + and runUp (prev, [], run, yss) = mergeNeighbors (reverse(prev, run)::yss, []) + | runUp (prev, x::xr, run, yss) = (case cmp (prev, x) + of LESS => runUp (x, xr, prev::run, yss) + | EQUAL => runUp (prev, xr, run, yss) (* discard duplicate *) + | GREATER => init (x, xr, reverse(prev, run)::yss) + (* end case *)) + and runDn (prev, [], run, yss) = mergeNeighbors ((prev::run)::yss, []) + | runDn (prev, x::xr, run, yss) = (case cmp (prev, x) + of LESS => init (x, xr, (prev::run)::yss) + | EQUAL => runDn (prev, xr, run, yss) (* discard duplicate *) + | GREATER => runDn (x, xr, prev::run, yss) + (* end case *)) + in + fn [] => [] | (x::xs) => init(x, xs, []) + end + + (* is the list sorted in ascending order according to the given ">" relation? *) + fun sorted (op >) = let + fun chk (_, []) = true + | chk (x1, x2::xs) = not(x1>x2) andalso chk(x2, xs) + in + fn [] => true + | (x::xs) => chk(x, xs) + end + + end (* ListMergeSort *) diff --git a/smlnj-lib/Util/list-set-fn.sml b/smlnj-lib/Util/list-set-fn.sml new file mode 100644 index 0000000..4db2cd9 --- /dev/null +++ b/smlnj-lib/Util/list-set-fn.sml @@ -0,0 +1,205 @@ +(* list-set-fn.sml + * + * COPYRIGHT (c) 2012 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * An implementation of finite sets of ordered values, which uses a sorted list + * representation. + *) + +functor ListSetFn (K : ORD_KEY) :> ORD_SET where type Key.ord_key = K.ord_key = + struct + + structure Key = K + + (* sets are represented as ordered lists of key values *) + type item = Key.ord_key + type set = item list + + val empty = [] + + fun singleton x = [x] + + fun add (l, item) = let + fun f [] = [item] + | f (elem::r) = (case Key.compare(item, elem) + of LESS => item :: elem :: r + | EQUAL => item :: r + | GREATER => elem :: (f r) + (* end case *)) + in + f l + end + fun add' (s, x) = add(x, s) + + fun union (s1, s2) = let + fun merge ([], l2) = l2 + | merge (l1, []) = l1 + | merge (x::r1, y::r2) = (case Key.compare(x, y) + of LESS => x :: merge(r1, y::r2) + | EQUAL => x :: merge(r1, r2) + | GREATER => y :: merge(x::r1, r2) + (* end case *)) + in + merge (s1, s2) + end + + fun intersection (s1, s2) = let + fun merge ([], l2) = [] + | merge (l1, []) = [] + | merge (x::r1, y::r2) = (case Key.compare(x, y) + of LESS => merge(r1, y::r2) + | EQUAL => x :: merge(r1, r2) + | GREATER => merge(x::r1, r2) + (* end case *)) + in + merge (s1, s2) + end + + fun difference (s1, s2) = let + fun merge ([], l2) = [] + | merge (l1, []) = l1 + | merge (x::r1, y::r2) = (case Key.compare(x, y) + of LESS => x :: merge(r1, y::r2) + | EQUAL => merge(r1, r2) + | GREATER => merge(x::r1, r2) + (* end case *)) + in + merge (s1, s2) + end + + fun addList (l, items) = let + val items' = List.foldl (fn (x, set) => add(set, x)) [] items + in + union (l, items') + end + + fun subtract (l, item) = let + fun f ([], _) = l + | f (elem::r, prefix) = (case Key.compare(item, elem) + of LESS => l + | EQUAL => List.revAppend(prefix, r) + | GREATER => f (r, elem::prefix) + (* end case *)) + in + f (l, []) + end + fun subtract' (item, l) = subtract (l, item) + + fun subtractList (l, items) = let + val items' = List.foldl (fn (x, set) => add(set, x)) [] items + in + difference (l, items') + end + + (* create a set from a list of items; this function works in linear time if the list + * is in increasing order. + *) + fun fromList [] = [] + | fromList (first::rest) = let + fun add (prev, x::xs, s) = (case Key.compare(prev, x) + of LESS => add (x, xs, x::s) + | _ => (* not ordered, so fallback to addList *) + addList (List.rev s, x::xs) + (* end case *)) + | add (_, [], s) = List.rev s + in + add (first, rest, [first]) + end + + (* Remove an item, returning new map and value removed. + * Raise LibBase.NotFound if not found. + *) + fun delete (l, elem) = let + fun f (_, []) = raise LibBase.NotFound + | f (prefix, elem' :: r) = (case Key.compare(elem, elem') + of LESS => raise LibBase.NotFound + | EQUAL => List.revAppend(prefix, r) + | GREATER => f(elem' :: prefix, r) + (* end case *)) + in + f ([], l) + end + + fun member (l, item) = let + fun f [] = false + | f (elem :: r) = (case Key.compare(item, elem) + of LESS => false + | EQUAL => true + | GREATER => f r + (* end case *)) + in + f l + end + + fun isEmpty [] = true + | isEmpty _ = false + + fun minItem [] = raise Empty + | minItem (x::_) = x + + fun maxItem xs = List.last xs + + fun equal (s1, s2) = let + fun f ([], []) = true + | f (x::r1, y::r2) = (Key.compare(x, y) = EQUAL) andalso f (r1, r2) + | f _ = false + in + f (s1, s2) + end + + fun compare ([], []) = EQUAL + | compare ([], _) = LESS + | compare (_, []) = GREATER + | compare (x1::r1, x2::r2) = (case Key.compare(x1, x2) + of EQUAL => compare (r1, r2) + | order => order + (* end case *)) + + (* Return true if and only if the first set is a subset of the second *) + fun isSubset (s1, s2) = let + fun f ([], _) = true + | f (_, []) = false + | f (x::r1, y::r2) = (case Key.compare(x, y) + of LESS => false + | EQUAL => f (r1, r2) + | GREATER => f (x::r1, r2) + (* end case *)) + in + f (s1, s2) + end + + fun disjoint ([], _) = true + | disjoint (_, []) = true + | disjoint (x::r1, y::r2) = (case Key.compare(x, y) + of LESS => disjoint (r1, y::r2) + | EQUAL => false + | GREATER => disjoint (x::r1, r2) + (* end case *)) + + (* Return the number of items in the set *) + fun numItems l = List.length l + + (* Return a list of the items in the set *) + fun toList l = l + + val app = List.app + fun map f s1 = List.foldl (fn (x, s) => add(s, f x)) [] s1 + fun mapPartial f s = let + fun f' (x, acc) = (case f x of SOME x' => add(acc, x') | NONE => acc) + in + List.foldl f' [] s + end + val foldr = List.foldr + val foldl = List.foldl + val filter = List.filter + val partition = List.partition + val exists = List.exists + val all = List.all + val find = List.find + + (* deprecated *) + val listItems = toList + + end (* IntListMap *) + diff --git a/smlnj-lib/Util/list-xprod-sig.sml b/smlnj-lib/Util/list-xprod-sig.sml new file mode 100644 index 0000000..44a751a --- /dev/null +++ b/smlnj-lib/Util/list-xprod-sig.sml @@ -0,0 +1,27 @@ +(* list-xprod-sig.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Functions for computing with the Cartesian product of two lists. + *) + +signature LIST_XPROD = + sig + + val app : (('a * 'b) -> unit) -> ('a list * 'b list) -> unit + (* apply a function to the Cartesian product of two lists *) + + val map : (('a * 'b) -> 'c) -> ('a list * 'b list) -> 'c list + (* map a function across the Cartesian product of two lists *) + + val fold : (('a * 'b * 'c) -> 'c) -> 'c -> ('a list * 'b list) -> 'c + (* fold a function across the Cartesian product of two lists *) + + (* DEPRECATED FUNCTIONS *) + + val appX : (('a * 'b) -> unit) -> ('a list * 'b list) -> unit + val mapX : (('a * 'b) -> 'c) -> ('a list * 'b list) -> 'c list + val foldX : (('a * 'b * 'c) -> 'c) -> ('a list * 'b list) -> 'c -> 'c + + end; (* LIST_XPROD *) diff --git a/smlnj-lib/Util/list-xprod.sml b/smlnj-lib/Util/list-xprod.sml new file mode 100644 index 0000000..296717a --- /dev/null +++ b/smlnj-lib/Util/list-xprod.sml @@ -0,0 +1,57 @@ +(* list-xprod.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Functions for computing with the Cartesian product of two lists. + *) + +structure ListXProd : LIST_XPROD = + struct + + (* apply a function to the Cartesian product of two lists *) + fun app f (l1, l2) = let + fun lp1 [] = () + | lp1 (x::r) = let + fun lp2 [] = lp1 r + | lp2 (y::r) = (f(x, y); lp2 r) + in + lp2 l2 + end + in + lp1 l1 + end + + (* map a function across the Cartesian product of two lists *) + fun map f (l1, l2) = let + fun lp1 ([], resL) = rev resL + | lp1 (x::r, resL) = let + fun lp2 ([], resL) = lp1 (r, resL) + | lp2 (y::r, resL) = lp2 (r, f(x, y) :: resL) + in + lp2 (l2, resL) + end + in + lp1 (l1, []) + end + + (* fold a function across the Cartesian product of two lists *) + fun fold f init (l1, l2) = let + fun lp1 ([], accum) = accum + | lp1 (x::r, accum) = let + fun lp2 ([], accum) = lp1 (r, accum) + | lp2 (y::r, accum) = lp2 (r, f(x, y, accum)) + in + lp2 (l2, accum) + end + in + lp1 (l1, init) + end + + (* DEPRECATED FUNCTIONS *) + + val appX = app + val mapX = map + fun foldX f arg init = fold f init arg + + end; (* ListXProd *) diff --git a/smlnj-lib/Util/listsort-sig.sml b/smlnj-lib/Util/listsort-sig.sml new file mode 100644 index 0000000..220a70a --- /dev/null +++ b/smlnj-lib/Util/listsort-sig.sml @@ -0,0 +1,27 @@ +(* listsort-sig.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * The generic list sorting interface. Taken from the SML/NJ compiler. + *) + +signature LIST_SORT = + sig + + val sort : ('a * 'a -> bool) -> 'a list -> 'a list + (* (sort gt l) sorts the list l in ascending order using the + * ``greater-than'' relationship defined by gt. + *) + + val uniqueSort : ('a * 'a -> order) -> 'a list -> 'a list + (* uniquesort produces an increasing list, removing equal + * elements + *) + + val sorted : ('a * 'a -> bool) -> 'a list -> bool + (* (sorted gt l) returns true if the list is sorted in ascending + * order under the ``greater-than'' predicate gt. + *) + + end; (* LIST_SORT *) diff --git a/smlnj-lib/Util/max-hash-table-size.sml b/smlnj-lib/Util/max-hash-table-size.sml new file mode 100644 index 0000000..6ad1a52 --- /dev/null +++ b/smlnj-lib/Util/max-hash-table-size.sml @@ -0,0 +1,35 @@ +(* max-hash-table-size.sml + * + * COPYRIGHT (c) 2023 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Common code for computing an upper-limit on the size of hash tables + * (hash-table-rep.sml) and hash sets (hash-set-fn.sml). + *) + +structure MaxHashTableSize : sig + + val maxSize : int + + end = struct + + structure W = Word + + (* return largest k such that 2^k <= n *) + fun log2 (n : int) = let + fun lp (n, k) = if (n <= 0) then k-1 else lp(n div 2, k+1) + in + lp(n, 0) + end + + (* return 2^k *) + fun pow2 k = let + fun lp (k, n) = if (k <= 0) then n else lp(k-1, n+n) + in + lp (k, 1) + end + + (* pick the largest power of 2 that is <= than the maximum array size *) + val maxSize = pow2 (log2 Array.maxLen) + + end diff --git a/smlnj-lib/Util/mono-array-fn.sml b/smlnj-lib/Util/mono-array-fn.sml new file mode 100644 index 0000000..3760e94 --- /dev/null +++ b/smlnj-lib/Util/mono-array-fn.sml @@ -0,0 +1,17 @@ +(* mono-array-fn.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This simple functor allows easy construction of new monomorphic array + * structures. + *) + +functor MonoArrayFn (type elem) :> MONO_ARRAY where type elem = elem + = struct + open Array + type elem = elem + type array = elem Array.array + type vector = elem Vector.vector + end + diff --git a/smlnj-lib/Util/mono-array-sort-sig.sml b/smlnj-lib/Util/mono-array-sort-sig.sml new file mode 100644 index 0000000..2ef0ed3 --- /dev/null +++ b/smlnj-lib/Util/mono-array-sort-sig.sml @@ -0,0 +1,19 @@ +(* mono-array-sort-sig.sml + * + * COPYRIGHT (c) 1993 by AT&T Bell Laboratories. See COPYRIGHT file for details. + * + * Signature for in-place sorting of monomorphic arrays + * + *) + +signature MONO_ARRAY_SORT = + sig + + structure A : MONO_ARRAY + + val sort : (A.elem * A.elem -> order) -> A.array -> unit + + val sorted : (A.elem * A.elem -> order) -> A.array -> bool + + end; (* MONO_ARRAY_SORT *) + diff --git a/smlnj-lib/Util/mono-dynamic-array-sig.sml b/smlnj-lib/Util/mono-dynamic-array-sig.sml new file mode 100644 index 0000000..1d5fe1b --- /dev/null +++ b/smlnj-lib/Util/mono-dynamic-array-sig.sml @@ -0,0 +1,87 @@ +(* mono-dynamic-array-sig.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Signature for monomorphic unbounded arrays. + * + * TODO: add the missing operations that the DynamicArray structure + * provides. + *) + +signature MONO_DYNAMIC_ARRAY = + sig + type elem + type array + + val array : (int * elem) -> array + (* array (sz, e) creates an unbounded array all of whose elements + * are initialized to e. sz (>= 0) is used as a + * hint of the potential range of indices. Raises Size if a + * negative hint is given. + *) + + val subArray : array * int * int -> array + (* subArray (a,lo,hi) creates a new array with the same default + * as a, and whose values in the range [0,hi-lo] are equal to + * the values in b in the range [lo, hi]. + * Raises Size if lo > hi + *) + + val fromList : elem list * elem -> array + (* arrayoflist (l, v) creates an array using the list of values l + * plus the default value v. + *) + + val toList : array -> elem list + (* return the array's contents as a list *) + + val tabulate: int * (int -> elem) * elem -> array + (* tabulate (sz,fill,dflt) acts like Array.tabulate, plus + * stores default value dflt. Raises Size if sz < 0. + *) + + val default : array -> elem + (* default returns array's default value *) + + val sub : array * int -> elem + (* sub (a,idx) returns value of the array at index idx. + * If that value has not been set by update, it returns the default value. + * Raises Subscript if idx < 0 + *) + + val update : array * int * elem -> unit + (* update (a,idx,v) sets the value at index idx of the array to v. + * Raises Subscript if idx < 0 + *) + + val bound : array -> int + (* bound returns an upper bound on the index of values that have been + * changed. + *) + + val truncate : array * int -> unit + (* truncate (a,sz) makes every entry with index > sz the default value *) + +(** what about iterators??? **) +(* + val vector : array -> 'a vector + val copy : {di:int, dst:array, src:array} -> unit + val copyVec : {di:int, dst:array, src:'a vector} -> unit + val appi : (int * 'a -> unit) -> array -> unit + val app : ('a -> unit) -> array -> unit + val modifyi : (int * 'a -> 'a) -> array -> unit + val modify : ('a -> 'a) -> array -> unit + val foldli : (int * 'a * 'b -> 'b) -> 'b -> array -> 'b + val foldri : (int * 'a * 'b -> 'b) -> 'b -> array -> 'b + val foldl : ('a * 'b -> 'b) -> 'b -> array -> 'b + val foldr : ('a * 'b -> 'b) -> 'b -> array -> 'b + val findi : (int * 'a -> bool) -> array -> (int * 'a) option + val find : ('a -> bool) -> array -> 'a option + val exists : ('a -> bool) -> array -> bool + val all : ('a -> bool) -> array -> bool + val collate : ('a * 'a -> order) -> array * array -> order +*) + + end (* MONO_DYNAMIC_ARRAY *) + diff --git a/smlnj-lib/Util/mono-hash-set-sig.sml b/smlnj-lib/Util/mono-hash-set-sig.sml new file mode 100644 index 0000000..b58cb44 --- /dev/null +++ b/smlnj-lib/Util/mono-hash-set-sig.sml @@ -0,0 +1,110 @@ +(* mono-hash-set-sig.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +signature MONO_HASH_SET = + sig + + structure Key : HASH_KEY + + type item = Key.hash_key + type set + + val mkEmpty : int -> set + (* The empty set; argument specifies initial table size *) + + val mkSingleton : item -> set + (* Create a singleton set *) + + val mkFromList : item list -> set + (* create a set from a list of items *) + + val copy : set -> set + (* returns a copy of the set *) + + val toList : set -> item list + (* Return a list of the items in the set *) + + val add : set * item -> unit + val addc : set -> item -> unit + (* Insert an item. *) + + val addList : set * item list -> unit + (* Insert items from list. *) + + val subtract : set * item -> unit + val subtractc : set -> item -> unit + (* Remove the item, if it is in the set. Otherwise the set is unchanged. + * The `without` function is deprecated in favor of `subtract`, whose name + * is consistent with the other set-like APIs. + *) + + val subtractList : set * item list -> unit + (* Subtract a list of items from the set. *) + + val delete : set * item -> bool + (* Remove an item. Return false if the item was not present. *) + + val member : set * item -> bool + (* Return true if and only if item is an element in the set *) + + val isEmpty : set -> bool + (* Return true if and only if the set is empty *) + + val isSubset : (set * set) -> bool + (* Return true if and only if the first set is a subset of the second *) + + val numItems : set -> int + (* Return the number of items in the table *) + + val map : (item -> item) -> set -> set + (* Create a new set by applying a map function to the elements + * of the set. + *) + + val mapPartial : (item -> item option) -> set -> set + (* Create a new set by mapping a partial function over the + * items in the set. + *) + + val app : (item -> unit) -> set -> unit + (* Apply a function to the entries of the set. *) + + val fold : (item * 'b -> 'b) -> 'b -> set -> 'b + (* Apply a folding function to the entries of the set. *) + + val partition : (item -> bool) -> set -> (set * set) + (* partition a set into two based using the given predicate. Returns two + * sets, where the first contains those elements for which the predicate is + * true and the second contains those elements for which the predicate is + * false. + *) + + val filter : (item -> bool) -> set -> unit + (* filter a set by removing those elements for which the predicate + * is false. + *) + + val exists : (item -> bool) -> set -> bool + (* check the elements of a set with a predicate and return true if + * any element satisfies the predicate. Return false otherwise. + * Elements are checked in key order. + *) + + val all : (item -> bool) -> set -> bool + (* check the elements of a set with a predicate and return true if + * they all satisfy the predicate. Return false otherwise. Elements + * are checked in key order. + *) + + val find : (item -> bool) -> set -> item option + (* find an element in the set for which the predicate is true *) + + (* DEPRECATED FUNCTIONS *) + + val listItems : set -> item list + val without : set * item -> unit + + end (* MONO_HASH_SET *) diff --git a/smlnj-lib/Util/mono-hash-table-sig.sml b/smlnj-lib/Util/mono-hash-table-sig.sml new file mode 100644 index 0000000..1894e78 --- /dev/null +++ b/smlnj-lib/Util/mono-hash-table-sig.sml @@ -0,0 +1,106 @@ +(* mono-hash-table-sig.sml + * + * COPYRIGHT (c) 2018 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * The result signature of the hash table functor (see hash-table.sml). + * + * AUTHOR: John Reppy + * University of Chicago + * https://cs.uchicago.edu/~jhr + *) + +signature MONO_HASH_TABLE = + sig + + structure Key : HASH_KEY + + type 'a hash_table + + val mkTable : (int * exn) -> 'a hash_table + (* Create a new table; the int is a size hint and the exception + * is to be raised by find. + *) + + val clear : 'a hash_table -> unit + (* remove all elements from the table *) + + val insert : 'a hash_table -> (Key.hash_key * 'a) -> unit + (* Insert an item. If the key already has an item associated with it, + * then the old item is discarded. + *) + + val insertWith : ('a * 'a -> 'a) + -> 'a hash_table + -> Key.hash_key * 'a + -> unit + (* Insert an item with a combining function to resolve collisions. + * The first argument to the combining function is the existing value, + * and the second argument is the value being inserted into the table. + *) + val insertWithi : (Key.hash_key * 'a * 'a -> 'a) + -> 'a hash_table + -> Key.hash_key * 'a + -> unit + (* Like insertWith, except that the combining function also takes the + * key as an argument. + *) + + val inDomain : 'a hash_table -> Key.hash_key -> bool + (* return true, if the key is in the domain of the table *) + + val lookup : 'a hash_table -> Key.hash_key -> 'a + (* Find an item, the table's exception is raised if the item doesn't exist *) + + val find : 'a hash_table -> Key.hash_key -> 'a option + (* Look for an item, return NONE if the item doesn't exist *) + + val findAndRemove : 'a hash_table -> Key.hash_key -> 'a option + (* If an item with the specified key exists in the table, then it + * is removed and the item is returned. Otherwise, `NONE` is + * returned. + *) + + val remove : 'a hash_table -> Key.hash_key -> 'a + (* Remove an item, returning the item. The table's exception is raised if + * the item doesn't exist. + *) + + val numItems : 'a hash_table -> int + (* Return the number of items in the table *) + + val listItems : 'a hash_table -> 'a list + val listItemsi : 'a hash_table -> (Key.hash_key * 'a) list + (* Return a list of the items (and their keys) in the table *) + + val app : ('a -> unit) -> 'a hash_table -> unit + val appi : ((Key.hash_key * 'a) -> unit) -> 'a hash_table -> unit + (* Apply a function to the entries of the table *) + + val map : ('a -> 'b) -> 'a hash_table -> 'b hash_table + val mapi : ((Key.hash_key * 'a) -> 'b) -> 'a hash_table -> 'b hash_table + (* Map a table to a new table that has the same keys *) + + val fold : (('a * 'b) -> 'b) -> 'b -> 'a hash_table -> 'b + val foldi : ((Key.hash_key * 'a * 'b) -> 'b) -> 'b -> 'a hash_table -> 'b + + val modify : ('a -> 'a) -> 'a hash_table -> unit + val modifyi : ((Key.hash_key * 'a) -> 'a) -> 'a hash_table -> unit + (* modify the hash-table items in place *) + +(** Also mapPartial?? *) + val filter : ('a -> bool) -> 'a hash_table -> unit + val filteri : ((Key.hash_key * 'a) -> bool) -> 'a hash_table -> unit + (* remove any hash table items that do not satisfy the given + * predicate. + *) + + val copy : 'a hash_table -> 'a hash_table + (* Create a copy of a hash table *) + + val bucketSizes : 'a hash_table -> int list + (* returns a list of the sizes of the various buckets. This is to + * allow users to gauge the quality of their hashing function. + *) + + end (* MONO_HASH_TABLE *) diff --git a/smlnj-lib/Util/mono-hash2-table-sig.sml b/smlnj-lib/Util/mono-hash2-table-sig.sml new file mode 100644 index 0000000..e3cafc5 --- /dev/null +++ b/smlnj-lib/Util/mono-hash2-table-sig.sml @@ -0,0 +1,92 @@ +(* mono-hash2-table-sig.sml + * + * COPYRIGHT (c) 2018 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Hash tables that are keyed by two keys (in different domains). + * + * AUTHOR: John Reppy + * University of Chicago + * https://cs.uchicago.edu/~jhr + *) + +signature MONO_HASH2_TABLE = + sig + + structure Key1 : HASH_KEY + structure Key2 : HASH_KEY + + type 'a hash_table + + val mkTable : (int * exn) -> 'a hash_table + (* Create a new table; the int is a size hint and the exception + * is to be raised by find. + *) + + val clear : 'a hash_table -> unit + (* remove all elements from the table *) + + val insert : 'a hash_table -> (Key1.hash_key * Key2.hash_key * 'a) -> unit + (* Insert an item. If the key already has an item associated with it, + * then the old item is discarded. + *) + + val inDomain1 : 'a hash_table -> Key1.hash_key -> bool + val inDomain2 : 'a hash_table -> Key2.hash_key -> bool + (* return true, if the key is in the domain of the table *) + + val lookup1 : 'a hash_table -> Key1.hash_key -> 'a + val lookup2 : 'a hash_table -> Key2.hash_key -> 'a + (* Find an item, the table's exception is raised if the item doesn't exist *) + + val find1 : 'a hash_table -> Key1.hash_key -> 'a option + val find2 : 'a hash_table -> Key2.hash_key -> 'a option + (* Look for an item, return NONE if the item doesn't exist *) + + val remove1 : 'a hash_table -> Key1.hash_key -> 'a + val remove2 : 'a hash_table -> Key2.hash_key -> 'a + (* Remove an item, returning the item. The table's exception is raised if + * the item doesn't exist. + *) + + val numItems : 'a hash_table -> int + (* Return the number of items in the table *) + + val listItems : 'a hash_table -> 'a list + val listItemsi : 'a hash_table -> (Key1.hash_key * Key2.hash_key * 'a) list + (* Return a list of the items (and their keys) in the table *) + + val app : ('a -> unit) -> 'a hash_table -> unit + val appi : ((Key1.hash_key * Key2.hash_key * 'a) -> unit) -> 'a hash_table + -> unit + (* Apply a function to the entries of the table *) + + val map : ('a -> 'b) -> 'a hash_table -> 'b hash_table + val mapi : ((Key1.hash_key * Key2.hash_key * 'a) -> 'b) -> 'a hash_table + -> 'b hash_table + (* Map a table to a new table that has the same keys *) + +(* TODO: add mapPartial and mapPartiali *) + + val fold : (('a * 'b) -> 'b) -> 'b -> 'a hash_table -> 'b + val foldi : ((Key1.hash_key * Key2.hash_key * 'a * 'b) -> 'b) -> 'b + -> 'a hash_table -> 'b + +(* TODO: add modify and modifyi *) + + val filter : ('a -> bool) -> 'a hash_table -> unit + val filteri : ((Key1.hash_key * Key2.hash_key * 'a) -> bool) -> 'a hash_table + -> unit + (* remove any hash table items that do not satisfy the given + * predicate. + *) + + val copy : 'a hash_table -> 'a hash_table + (* Create a copy of a hash table *) + + val bucketSizes : 'a hash_table -> (int list * int list) + (* returns a list of the sizes of the various buckets. This is to + * allow users to gauge the quality of their hashing function. + *) + + end (* MONO_HASH2_TABLE *) diff --git a/smlnj-lib/Util/mono-priorityq-sig.sml b/smlnj-lib/Util/mono-priorityq-sig.sml new file mode 100644 index 0000000..090d237 --- /dev/null +++ b/smlnj-lib/Util/mono-priorityq-sig.sml @@ -0,0 +1,52 @@ +(* mono-priorityq-sig.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This signature describes the interface to monomorphic functional + * priority queues. + *) + +signature MONO_PRIORITYQ = + sig + + type item + type queue + + val empty : queue + + val singleton : item -> queue + (* create a queue from a single item *) + + val fromList : item list -> queue + (* build a queue from a list of items *) + + val insert : (item * queue) -> queue + (* insert an item *) + + val remove : queue -> (item * queue) + (* remove the highest priority item from the queue; raise List.Empty + * if the queue is empty. + *) + + val next : queue -> (item * queue) option + (* remove the highest priority item from the queue; return NONE + * if the queue is empty. + *) + + val findAndRemove : queue * (item -> bool) -> (item * queue) option + (* find the item with the highest priority that satisfies the predicate *) + + val delete : queue * (item -> bool) -> queue + (* delete all elements satisfying the given predicate *) + + val merge : (queue * queue) -> queue + (* Merge two queues. *) + + val numItems : queue -> int + (* return the number of items in the queue *) + + val isEmpty : queue -> bool + (* return true, if the queue is empty *) + + end; diff --git a/smlnj-lib/Util/ord-key-sig.sml b/smlnj-lib/Util/ord-key-sig.sml new file mode 100644 index 0000000..e7f6b25 --- /dev/null +++ b/smlnj-lib/Util/ord-key-sig.sml @@ -0,0 +1,18 @@ +(* ord-key-sig.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Abstract linearly ordered keys. + *) + +signature ORD_KEY = + sig + + (* the type of keys *) + type ord_key + + (* defines a total ordering on the ord_key type *) + val compare : ord_key * ord_key -> order + + end (* ORD_KEY *) diff --git a/smlnj-lib/Util/ord-map-sig.sml b/smlnj-lib/Util/ord-map-sig.sml new file mode 100644 index 0000000..c702073 --- /dev/null +++ b/smlnj-lib/Util/ord-map-sig.sml @@ -0,0 +1,162 @@ +(* ord-map-sig.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * COPYRIGHT (c) 1996 by AT&T Research. See COPYRIGHT file for details. + * + * Abstract signature of an applicative-style finite maps (dictionaries) + * structure over ordered monomorphic keys. + *) + +signature ORD_MAP = + sig + + structure Key : ORD_KEY + (* the map's domain and its comparison function *) + + type 'a map + + val empty : 'a map + (* The empty map *) + + val isEmpty : 'a map -> bool + (* Return true if and only if the map is empty *) + + val singleton : (Key.ord_key * 'a) -> 'a map + (* return the specified singleton map *) + + val insert : 'a map * Key.ord_key * 'a -> 'a map + val insert' : ((Key.ord_key * 'a) * 'a map) -> 'a map + (* Insert an item. *) + + val insertWith : ('a * 'a -> 'a) -> 'a map * Key.ord_key * 'a -> 'a map + (* Insert an item with a combining function to resolve collisions. + * The first argument to the combining function is the existing value, + * and the second argument is the value being inserted into the map. + *) + val insertWithi : (Key.ord_key * 'a * 'a -> 'a) -> 'a map * Key.ord_key * 'a -> 'a map + (* Like insertWith, except that the combining function also takes the + * key as an argument. + *) + + val find : 'a map * Key.ord_key -> 'a option + (* Look for an item, return NONE if the item doesn't exist *) + + val lookup : 'a map * Key.ord_key -> 'a + (* look for an item, raise the NotFound exception if it doesn't exist *) + + val inDomain : ('a map * Key.ord_key) -> bool + (* return true, if the key is in the domain of the map *) + + val findAndRemove : 'a map * Key.ord_key -> ('a map * 'a) option + (* If an item with the specified key exists in the map, then it + * is removed and the residual map and the item are returned. + * Otherwise, `NONE` is returned. + *) + + val remove : 'a map * Key.ord_key -> 'a map * 'a + (* Remove an item, returning new map and value removed. + * Raises LibBase.NotFound if not found. + *) + + val first : 'a map -> 'a option + val firsti : 'a map -> (Key.ord_key * 'a) option + (* return the first item in the map (or NONE if it is empty) *) + + val numItems : 'a map -> int + (* Return the number of items in the map *) + + val listItems : 'a map -> 'a list + val listItemsi : 'a map -> (Key.ord_key * 'a) list + (* Return an ordered list of the items (and their keys) in the map. *) + + val listKeys : 'a map -> Key.ord_key list + (* return an ordered list of the keys in the map. *) + + val unionWith : ('a * 'a -> 'a) -> ('a map * 'a map) -> 'a map + val unionWithi : (Key.ord_key * 'a * 'a -> 'a) -> ('a map * 'a map) -> 'a map + (* return a map whose domain is the union of the domains of the two input + * maps, using the supplied function to define the map on elements that + * are in both domains. + *) + + val intersectWith : ('a * 'b -> 'c) -> ('a map * 'b map) -> 'c map + val intersectWithi : (Key.ord_key * 'a * 'b -> 'c) -> ('a map * 'b map) -> 'c map + (* return a map whose domain is the intersection of the domains of the + * two input maps, using the supplied function to define the range. + *) + + val mergeWith : ('a option * 'b option -> 'c option) + -> ('a map * 'b map) -> 'c map + val mergeWithi : (Key.ord_key * 'a option * 'b option -> 'c option) + -> ('a map * 'b map) -> 'c map + (* merge two maps using the given function to control the merge. For + * each key k in the union of the two maps domains, the function + * is applied to the image of the key under the map. If the function + * returns SOME y, then (k, y) is added to the resulting map. + *) + + val equiv : ('a * 'b -> bool) -> ('a map * 'b map) -> bool + (* `equiv rngEq (f, g)` returns true if `f` and `g`` have equal domains + * and if for every `x` in their domain, `rngEq(f x, g x) = true`. + *) + val collate : ('a * 'b -> order) -> ('a map * 'b map) -> order + (* Given two maps `f` and `g`, and a comparison function `rngCmp` on their + * range types, return the order of the maps. + *) + val extends : ('a * 'b -> bool) -> ('a map * 'b map) -> bool + (* `extends rngEx (f, g)` returns true if the domain of `g` is a + * subset of the domain of `f` and for every `x` in the domain of `g`, + * `rngEx(g x, f x) = true`. + *) + + val app : ('a -> unit) -> 'a map -> unit + val appi : ((Key.ord_key * 'a) -> unit) -> 'a map -> unit + (* Apply a function to the entries of the map in map order. *) + + val map : ('a -> 'b) -> 'a map -> 'b map + val mapi : (Key.ord_key * 'a -> 'b) -> 'a map -> 'b map + (* Create a new map by applying a map function to the + * name/value pairs in the map. + *) + + val foldl : ('a * 'b -> 'b) -> 'b -> 'a map -> 'b + val foldli : (Key.ord_key * 'a * 'b -> 'b) -> 'b -> 'a map -> 'b + (* Apply a folding function to the entries of the map + * in increasing map order. + *) + + val foldr : ('a * 'b -> 'b) -> 'b -> 'a map -> 'b + val foldri : (Key.ord_key * 'a * 'b -> 'b) -> 'b -> 'a map -> 'b + (* Apply a folding function to the entries of the map + * in decreasing map order. + *) + + val filter : ('a -> bool) -> 'a map -> 'a map + val filteri : (Key.ord_key * 'a -> bool) -> 'a map -> 'a map + (* Filter out those elements of the map that do not satisfy the + * predicate. The filtering is done in increasing map order. + *) + + val mapPartial : ('a -> 'b option) -> 'a map -> 'b map + val mapPartiali : (Key.ord_key * 'a -> 'b option) -> 'a map -> 'b map + (* map a partial function over the elements of a map in increasing + * map order. + *) + + val exists : ('a -> bool) -> 'a map -> bool + val existsi : (Key.ord_key * 'a -> bool) -> 'a map -> bool + (* check the elements of a map with a predicate and return true if + * any element satisfies the predicate. Return false otherwise. + * Elements are checked in key order. + *) + + val all : ('a -> bool) -> 'a map -> bool + val alli : (Key.ord_key * 'a -> bool) -> 'a map -> bool + (* check the elements of a map with a predicate and return true if + * they all satisfy the predicate. Return false otherwise. Elements + * are checked in key order. + *) + + end (* ORD_MAP *) diff --git a/smlnj-lib/Util/ord-set-sig.sml b/smlnj-lib/Util/ord-set-sig.sml new file mode 100644 index 0000000..f018628 --- /dev/null +++ b/smlnj-lib/Util/ord-set-sig.sml @@ -0,0 +1,143 @@ +(* ordset-sig.sml + * + * COPYRIGHT (c) 1993 by AT&T Bell Laboratories. See COPYRIGHT file for details. + * + * Signature for a set of values with an order relation. + *) + +signature ORD_SET = + sig + + structure Key : ORD_KEY + (* the set elements and their comparison function *) + + type item = Key.ord_key + type set + + val empty : set + (* The empty set *) + + val singleton : item -> set + (* Create a singleton set *) + + val fromList : item list -> set + (* create a set from a list of items *) + + val toList : set -> item list + (* Return an ordered list of the items in the set. + * Added in SML/NJ 110.80. + *) + + val add : set * item -> set + val add' : (item * set) -> set + (* Add an item. *) + + val addList : set * item list -> set + (* Add a list of items. *) + + val subtract : set * item -> set + val subtract' : (item * set) -> set + (* Subtract an item from a set; has no effect if the item is not in the set *) + + val subtractList : set * item list -> set + (* Subtract a list of items from the set. *) + + val delete : set * item -> set + (* Remove an item. Raise NotFound if not found. *) + + val member : set * item -> bool + (* Return true if and only if item is an element in the set *) + + val isEmpty : set -> bool + (* Return true if and only if the set is empty *) + + val minItem : set -> item + (* return the smallest element of the set (raises Empty if the set is empty). + * Added in SML/NJ 110.80. + *) + + val maxItem : set -> item + (* return the largest element of the set (raises Empty if the set is empty). + * Added in SML/NJ 110.80. + *) + + val equal : (set * set) -> bool + (* Return true if and only if the two sets are equal *) + + val compare : (set * set) -> order + (* does a lexical comparison of two sets *) + + val isSubset : (set * set) -> bool + (* Return true if and only if the first set is a subset of the second *) + + val disjoint : set * set -> bool + (* are the two sets disjoint? *) + + val numItems : set -> int + (* Return the number of items in the set *) + + val union : set * set -> set + (* Union *) + + val intersection : set * set -> set + (* Intersection *) + + val difference : set * set -> set + (* Difference *) + + val map : (item -> item) -> set -> set + (* Create a new set by applying a map function to the elements + * of the set. + *) + + val mapPartial : (item -> item option) -> set -> set + (* Create a new set by mapping a partial function over the + * items in the set. + *) + + val app : (item -> unit) -> set -> unit + (* Apply a function to the entries of the set + * in increasing order + *) + + val foldl : (item * 'b -> 'b) -> 'b -> set -> 'b + (* Apply a folding function to the entries of the set + * in increasing order + *) + + val foldr : (item * 'b -> 'b) -> 'b -> set -> 'b + (* Apply a folding function to the entries of the set + * in decreasing order + *) + + val partition : (item -> bool) -> set -> (set * set) + (* partition a set into two based using the given predicate. Returns two + * sets, where the first contains those elements for which the predicate is + * true and the second contains those elements for which the predicate is + * false. + *) + + val filter : (item -> bool) -> set -> set + (* filter a set by the given predicate returning only those elements for + * which the predicate is true. + *) + + val exists : (item -> bool) -> set -> bool + (* check the elements of a set with a predicate and return true if + * any element satisfies the predicate. Return false otherwise. + * Elements are checked in key order. + *) + + val all : (item -> bool) -> set -> bool + (* check the elements of a set with a predicate and return true if + * they all satisfy the predicate. Return false otherwise. Elements + * are checked in key order. + *) + + val find : (item -> bool) -> set -> item option + (* find an element in the set for which the predicate is true *) + + (* DEPRECATED FUNCTIONS *) + val listItems : set -> item list + + end (* ORD_SET *) diff --git a/smlnj-lib/Util/parser-comb-sig.sml b/smlnj-lib/Util/parser-comb-sig.sml new file mode 100644 index 0000000..f73a2de --- /dev/null +++ b/smlnj-lib/Util/parser-comb-sig.sml @@ -0,0 +1,59 @@ +(* parser-comb-sig.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * COPYRIGHT (c) 1996 AT&T Research. + * + * Parser combinators over readers. These are modeled after the Haskell + * combinators of Hutton and Meijer. The main difference is that they + * return a single result, instead of a list of results. This means that + * "or" is a committed choice; once one branch succeeds, the others will not + * be enabled. While this is somewhat limiting, for many applications it + * will not be a problem. For more substantial parsing problems, one should + * use ML-Yacc and/or ML-Lex. + *) + +signature PARSER_COMB = + sig + + type ('a, 'strm) parser = + (char, 'strm) StringCvt.reader -> ('a, 'strm) StringCvt.reader + + val result : 'a -> ('a, 'strm) parser + + val failure : ('a, 'strm) parser + + val wrap : (('a, 'strm) parser * ('a -> 'b)) -> ('b, 'strm) parser + + val seq : (('a, 'strm) parser * ('b, 'strm) parser) -> (('a * 'b), 'strm) parser + val seqWith : (('a * 'b) -> 'c) + -> (('a, 'strm) parser * ('b, 'strm) parser) + -> ('c, 'strm) parser + + val bind : (('a, 'strm) parser * ('a -> ('b, 'strm) parser)) + -> ('b, 'strm) parser + + val eatChar : (char -> bool) -> (char, 'strm) parser + + val char : char -> (char, 'strm) parser + val string : string -> (string, 'strm) parser + + val skipBefore : (char -> bool) -> ('a, 'strm) parser -> ('a, 'strm) parser + + val or : (('a, 'strm) parser * ('a, 'strm) parser) -> ('a, 'strm) parser + val or' : ('a, 'strm) parser list -> ('a, 'strm) parser + + val zeroOrMore : ('a, 'strm) parser -> ('a list, 'strm) parser + val oneOrMore : ('a, 'strm) parser -> ('a list, 'strm) parser + + val option : ('a, 'strm) parser -> ('a option, 'strm) parser + val join : ('a option, 'strm) parser -> ('a, 'strm) parser + + val token : (char -> bool) -> (string, 'strm) parser + (* parse a token consisting of characters satisfying the predicate. + * If this succeeds, then the resulting string is guaranteed to be + * non-empty. + *) + + end; diff --git a/smlnj-lib/Util/parser-comb.sml b/smlnj-lib/Util/parser-comb.sml new file mode 100644 index 0000000..5a10511 --- /dev/null +++ b/smlnj-lib/Util/parser-comb.sml @@ -0,0 +1,121 @@ +(* parser-comb.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Parser combinators over readers. These are modeled after the Haskell + * combinators of Hutton and Meijer. The main difference is that they + * return a single result, instead of a list of results. This means that + * "or" is a committed choice; once one branch succeeds, the others will not + * be enabled. While this is somewhat limiting, for many applications it + * will not be a problem. For more substantial parsing problems, one should + * use a parser generator like ML-Antlr, ML-Yacc and/or ML-Lex. + *) + +structure ParserComb : PARSER_COMB = + struct + structure SC = StringCvt + + type ('a, 'strm) parser = (char, 'strm) SC.reader -> ('a, 'strm) SC.reader + + fun result v getc strm = SOME(v, strm) + + fun failure getc strm = NONE + + fun wrap (p, f) getc strm = (case (p getc strm) + of NONE => NONE + | (SOME(x, strm)) => SOME(f x, strm) + (* end case *)) + + fun seqWith f (p1, p2) getc strm = (case (p1 getc strm) + of SOME(t1, strm1) => (case (p2 getc strm1) + of SOME(t2, strm2) => SOME(f(t1, t2), strm2) + | NONE => NONE + (* end case *)) + | NONE => NONE + (* end case *)) + fun seq (p1, p2) = seqWith (fn x => x) (p1, p2) + + fun bind (p1, p2') getc strm = (case (p1 getc strm) + of SOME(t1, strm1) => p2' t1 getc strm1 + | NONE => NONE + (* end case *)) + + fun eatChar pred getc strm = (case getc strm + of (res as SOME(c, strm')) => if (pred c) then res else NONE + | _ => NONE + (* end case *)) + + fun char (c: char) = eatChar (fn c' => (c = c')) + + fun string s getc strm = let + fun eat (ss, strm) = (case (Substring.getc ss, getc strm) + of (SOME(c1, ss'), SOME(c2, strm')) => + if (c1 = c2) then eat(ss', strm') else NONE + | (NONE, _) => SOME(s, strm) + | _ => NONE + (* end case *)) + in + eat (Substring.full s, strm) + end + + fun skipBefore pred p getc strm = let + fun skip' strm = (case getc strm + of NONE => NONE + | SOME(c, strm') => + if (pred c) then skip' strm' else p getc strm + (* end case *)) + in + skip' strm + end + + fun or (p1, p2) getc strm = (case (p1 getc strm) + of NONE => (case (p2 getc strm) + of NONE => NONE + | res => res + (* end case *)) + | res => res + (* end case *)) + + fun or' l getc strm = let + fun tryNext [] = NONE + | tryNext (p::r) = (case (p getc strm) + of NONE => tryNext r + | res => res + (* end case *)) + in + tryNext l + end + + fun zeroOrMore p getc strm = let + val p = p getc + fun parse (l, strm) = (case (p strm) + of (SOME(item, strm)) => parse (item::l, strm) + | NONE => SOME(rev l, strm) + (* end case *)) + in + parse ([], strm) + end + + fun oneOrMore p getc strm = (case (zeroOrMore p getc strm) + of (res as (SOME(_::_, _))) => res + | _ => NONE + (* end case *)) + + fun option p getc strm = (case (p getc strm) + of SOME(x, strm) => SOME(SOME x, strm) + | NONE => SOME(NONE, strm) + (* end case *)) + + fun join p = bind (p, fn (SOME x) => result x | NONE => failure) + + (* parse a token consisting of characters satisfying the predicate. + * If this succeeds, then the resulting string is guaranteed to be + * non-empty. + *) + fun token pred getc strm = (case (zeroOrMore (eatChar pred) getc strm) + of (SOME(res as _::_, strm)) => SOME(implode res, strm) + | _ => NONE + (* end case *)) + + end; diff --git a/smlnj-lib/Util/path-util-sig.sml b/smlnj-lib/Util/path-util-sig.sml new file mode 100644 index 0000000..4c4fb2f --- /dev/null +++ b/smlnj-lib/Util/path-util-sig.sml @@ -0,0 +1,46 @@ +(* path-util-sig.sml + * + * COPYRIGHT (c) 2012 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Various higher-level pathname and searching utilities. + *) + +signature PATH_UTIL = + sig + + (* findFile paths name + * returns SOME(p/name), where p is the first path in paths such that p/name exists. + * If no such file exists, then NONE is returned. If name is an absolute path, then + * SOME name is returned if it exists. + *) + val findFile : string list -> string -> string option + + (* findFiles (paths, mode) name + * returns a list of p/name values, where p is in paths and p/name exists. If name is an + * absolute path, then [name] is returned if it exists. + *) + val findFiles : string list -> string -> string list + + (* existsFile pred paths name + * returns SOME(p/name), where p is the first path in paths such that p/name satisfies + * the given predicate. If no such file exists, then NONE is returned. If name is an + * absolute path, then SOME name is returned if it satisfies the predicate. + *) + val existsFile : (string -> bool) -> string list -> string -> string option + + (* allFiles pred paths name + * returns a list of all p/name values, such that p is in paths and p/name satisfies + * the given predicate. The order of the path list is preserved in the result. If name + * is an absolute path, then [name] is returned if it satisfies the predicate. + *) + val allFiles : (string -> bool) -> string list -> string -> string list + + (* findExe paths name + * returns SOME(p/name), where p is the first path in paths such that p/name exists and + * is executable. If no such file exists, then NONE is returned. If name is an + * absolute path, then SOME name is returned if it is executable. + *) + val findExe : string list -> string -> string option + + end diff --git a/smlnj-lib/Util/path-util.sml b/smlnj-lib/Util/path-util.sml new file mode 100644 index 0000000..45bdc4f --- /dev/null +++ b/smlnj-lib/Util/path-util.sml @@ -0,0 +1,49 @@ +(* path-util.sml + * + * COPYRIGHT (c) 2012 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Various higher-level pathname and searching utilities. + *) + +structure PathUtil : PATH_UTIL = + struct + + structure P = OS.Path + structure F = OS.FileSys + + fun existsFile pred pathList fileName = let + fun chk s = if (pred s) then SOME s else NONE + fun iter [] = NONE + | iter (p::r) = (case chk(P.joinDirFile{dir=p, file=fileName}) + of NONE => iter r + | res => res + (* end case *)) + in + if P.isAbsolute fileName + then chk fileName + else iter pathList + end + fun allFiles pred pathList fileName = let + fun chk s = if (pred s) then SOME s else NONE + fun iter ([], l) = rev l + | iter (p::r, l) = (case chk(P.joinDirFile{dir=p, file=fileName}) + of NONE => iter(r, l) + | (SOME s) => iter(r, s::l) + (* end case *)) + in + if not(P.isAbsolute fileName) + then iter (pathList, []) + else if (pred fileName) + then [fileName] + else [] + end + + fun fileExists s = F.access(s, []) + + val findFile = existsFile fileExists + val findFiles = allFiles fileExists + + val findExe = existsFile (fn p => OS.FileSys.access(p, [OS.FileSys.A_EXEC])) + + end diff --git a/smlnj-lib/Util/plist-sig.sml b/smlnj-lib/Util/plist-sig.sml new file mode 100644 index 0000000..dd6fca2 --- /dev/null +++ b/smlnj-lib/Util/plist-sig.sml @@ -0,0 +1,50 @@ +(* plist-sig.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Property lists using Stephen Weeks's implementation. + *) + +signature PROP_LIST = + sig + + type holder + + val newHolder : unit -> holder + + val hasProps : holder -> bool + (* return true if the holder has any properties. *) + + val clearHolder : holder -> unit + (* remove all properties and flags from the holder *) + + val sameHolder : (holder * holder) -> bool + (* returns true, if two holders are the same *) + + (* newProp (selHolder, init) + * creates a new property for objects of type 'a and returns + * functions to get the property, set it, and clear it. The function + * selHolder is used to select the holder field from an object + * and init is used to create the initial property value. + * Typically, properties are reference cells, so that they can + * be modified. The difference between peekFn and getFn is that + * peekFn returns NONE when the property has not yet been created, + * whereas getFn will allocate and initialize the property. The + * setFn function can either be used to initialize an undefined property + * or to override a property's current value. + *) + val newProp : (('a -> holder) * ('a -> 'b)) -> { + peekFn : 'a -> 'b option, + getFn : 'a -> 'b, + setFn : ('a * 'b) -> unit, + clrFn : 'a -> unit + } + + val newFlag : ('a -> holder) -> { + getFn : 'a -> bool, + setFn : ('a * bool) -> unit + } + + end + diff --git a/smlnj-lib/Util/plist.sml b/smlnj-lib/Util/plist.sml new file mode 100644 index 0000000..8f78038 --- /dev/null +++ b/smlnj-lib/Util/plist.sml @@ -0,0 +1,90 @@ +(* plist.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Property lists using Stephen Weeks's implementation. + *) + +structure PropList :> PROP_LIST = + struct + + type holder = exn list ref + + fun newHolder() : holder = ref [] + + fun hasProps (ref []) = false + | hasProps _ = true + + fun clearHolder r = (r := []) + + fun sameHolder (r1 : holder, r2) = (r1 = r2) + + fun mkProp () = let + exception E of 'a + fun cons (a, l) = E a :: l + fun peek [] = NONE + | peek (E a :: _) = SOME a + | peek (_ :: l) = peek l + fun delete [] = [] + | delete (E a :: r) = r + | delete (x :: r) = x :: delete r + in + { cons = cons, peek = peek, delete = delete } + end + + fun mkFlag () = let + exception E + fun peek [] = false + | peek (E :: _) = true + | peek (_ :: l) = peek l + fun set (l, flg) = let + fun set ([], _) = if flg then E::l else l + | set (E::r, xs) = if flg then l else List.revAppend(xs, r) + | set (x::r, xs) = set (r, x::xs) + in + set (l, []) + end + in + { set = set, peek = peek } + end + + fun newProp (selHolder : 'a -> holder, init : 'a -> 'b) = let + val {peek, cons, delete} = mkProp() + fun peekFn a = peek(!(selHolder a)) + fun getF a = let + val h = selHolder a + in + case peek(!h) + of NONE => let val b = init a in h := cons(b, !h); b end + | (SOME b) => b + (* end case *) + end + fun clrF a = let + val h = selHolder a + in + h := delete(!h) + end + fun setFn (a, x) = let + val h = selHolder a + in + h := cons(x, delete(!h)) + end + in + {peekFn = peekFn, getFn = getF, clrFn = clrF, setFn = setFn} + end + + fun newFlag (selHolder : 'a -> holder) = let + val {peek, set} = mkFlag() + fun getF a = peek(!(selHolder a)) + fun setF (a, flg) = let + val h = selHolder a + in + h := set(!h, flg) + end + in + {getFn = getF, setFn = setF} + end + + end + diff --git a/smlnj-lib/Util/priority-sig.sml b/smlnj-lib/Util/priority-sig.sml new file mode 100644 index 0000000..31fed03 --- /dev/null +++ b/smlnj-lib/Util/priority-sig.sml @@ -0,0 +1,15 @@ +(* priority-sig.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Argument signature for functors that implement priority queues. + *) + +signature PRIORITY = + sig + type priority + val compare : (priority * priority) -> order + type item + val priority : item -> priority + end; diff --git a/smlnj-lib/Util/queue-sig.sml b/smlnj-lib/Util/queue-sig.sml new file mode 100644 index 0000000..3e0c8b0 --- /dev/null +++ b/smlnj-lib/Util/queue-sig.sml @@ -0,0 +1,40 @@ +(* queue-sig.sml + * + * COPYRIGHT (c) 1993 by AT&T Bell Laboratories. See COPYRIGHT file for details. + * + * Imperative fifos + * + *) + +signature QUEUE = + sig + type 'a queue + + exception Dequeue + + val mkQueue : unit -> 'a queue + (* make a new queue *) + val clear : 'a queue -> unit + (* remove all elements *) + val isEmpty : 'a queue -> bool + (* test for empty queue *) + val enqueue : 'a queue * 'a -> unit + (* enqueue an element at the rear *) + val dequeue : 'a queue -> 'a + (* remove the front element (raise Dequeue if empty) *) + val next : 'a queue -> 'a option + (* remove the first element; return NONE if the queue is empty *) + val delete : ('a queue * ('a -> bool)) -> unit + (* delete all elements satisfying the given predicate *) + val head : 'a queue -> 'a + (* return the first queue element without removing it *) + val peek : 'a queue -> 'a option + (* peek at the first queue element without removing it *) + val length : 'a queue -> int + val contents : 'a queue -> 'a list + val app : ('a -> unit) -> 'a queue -> unit + val map : ('a -> 'b) -> 'a queue -> 'b queue + val foldl : ('a * 'b -> 'b) -> 'b -> 'a queue -> 'b + val foldr : ('a * 'b -> 'b) -> 'b -> 'a queue -> 'b + + end diff --git a/smlnj-lib/Util/queue.sml b/smlnj-lib/Util/queue.sml new file mode 100644 index 0000000..f1c42ed --- /dev/null +++ b/smlnj-lib/Util/queue.sml @@ -0,0 +1,44 @@ +(* queue.sml + * + * COPYRIGHT (c) 1993 by AT&T Bell Laboratories. See COPYRIGHT file for details. + * + * Imperative fifos + * + *) + +structure Queue :> QUEUE = + struct + type 'a queue = 'a Fifo.fifo ref + + exception Dequeue = Fifo.Dequeue + + fun mkQueue () = ref Fifo.empty + + fun clear q = (q := Fifo.empty) + + fun enqueue (q,x) = q := (Fifo.enqueue (!q, x)) + + fun dequeue q = let + val (newq, x) = Fifo.dequeue (!q) + in + q := newq; + x + end + + fun next q = (case Fifo.next (!q) + of SOME(x, newq) => (q := newq; SOME x) + | NONE => NONE + (* end case *)) + + fun delete (q, pred) = (q := Fifo.delete (!q, pred)) + fun head q = Fifo.head (!q) + fun peek q = Fifo.peek (!q) + fun isEmpty q = Fifo.isEmpty (!q) + fun length q = Fifo.length (!q) + fun contents q = Fifo.contents (!q) + fun app f q = Fifo.app f (!q) + fun map f q = ref(Fifo.map f (!q)) + fun foldl f b q = Fifo.foldl f b (!q) + fun foldr f b q = Fifo.foldr f b (!q) + + end diff --git a/smlnj-lib/Util/rand-sig.sml b/smlnj-lib/Util/rand-sig.sml new file mode 100644 index 0000000..d7137cb --- /dev/null +++ b/smlnj-lib/Util/rand-sig.sml @@ -0,0 +1,40 @@ +(* rand-sig.sml + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Signature for a simple random number generator. + * + *) + +signature RAND = + sig + + type rand = Word.word + + val randMin : rand + val randMax : rand + + val random : rand -> rand + (* Given seed, return value randMin <= v <= randMax + * Iteratively using the value returned by random as the + * next seed to random will produce a sequence of pseudo-random + * numbers. + *) + + val mkRandom : rand -> unit -> rand + (* Given seed, return function generating a sequence of + * random numbers randMin <= v <= randMax + *) + + val norm : rand -> real + (* Map values in the range [randMin,randMax] to (0.0,1.0) *) + + val range : (int * int) -> rand -> int + (* Map v, randMin <= v <= randMax, to integer range [i,j] + * Exception - + * Fail if j < i + *) + + end (* RAND *) + diff --git a/smlnj-lib/Util/rand.sml b/smlnj-lib/Util/rand.sml new file mode 100644 index 0000000..e09b9df --- /dev/null +++ b/smlnj-lib/Util/rand.sml @@ -0,0 +1,72 @@ +(* rand.sml + * + * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Random number generator taken from Paulson, pp. 170-171. + * Recommended by Stephen K. Park and Keith W. Miller, + * Random number generators: good ones are hard to find, + * CACM 31 (1988), 1192-1201 + * Updated to include the new preferred multiplier of 48271 + * CACM 36 (1993), 105-110 + * Updated to use on Word31. + * + * Note: The Random structure provides a better generator. + * + * TODO: provide a proper 64-bit implementation. + *) + +structure Rand : RAND = + struct + + type rand = Word.word + type rand' = Int32.int (* internal representation *) + + val a : rand' = 48271 + val m : rand' = 2147483647 (* 2^31 - 1 *) + val m_1 = m - 1 + val q = m div a + val r = m mod a + + val extToInt = Int32.fromLarge o Word.toLargeInt + val intToExt = Word.fromLargeInt o Int32.toLarge + + val randMin : rand = 0w1 + val randMax : rand = intToExt m_1 + + fun chk 0w0 = 1 + | chk 0wx7fffffff = m_1 + | chk seed = extToInt seed + + fun random' seed = let + val hi = seed div q + val lo = seed mod q + val test = a * lo - r * hi + in + if test > 0 then test else test + m + end + + val random = intToExt o random' o chk + + fun mkRandom seed = let + val seed = ref (chk seed) + in + fn () => (seed := random' (!seed); intToExt (!seed)) + end + + val real_m = Real.fromLargeInt (Int32.toLarge m) + fun norm s = (Real.fromLargeInt (Word.toLargeInt s)) / real_m + + fun range (i,j) = + if j < i + then LibBase.failure{module="Rand",func="range",msg="hi < lo"} + else if j = i then fn _ => i + else let + val R = Int32.fromInt j - Int32.fromInt i + val cvt = Word.toIntX o Word.fromLargeInt o Int32.toLarge + in + if R = m then Word.toIntX + else fn s => i + cvt ((extToInt s) mod (R+1)) + end + + end (* Rand *) diff --git a/smlnj-lib/Util/random-sig.sml b/smlnj-lib/Util/random-sig.sml new file mode 100644 index 0000000..7b35e1e --- /dev/null +++ b/smlnj-lib/Util/random-sig.sml @@ -0,0 +1,58 @@ +(* random-sig.sml + * + * An interface to stateful pseudo-random number generators. + * + * COPYRIGHT (c) 2022 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +signature RANDOM = + sig + + (* the internal state of a random number generator *) + type rand + + (* create rand from initial seed *) + val rand : (int * int) -> rand + + (* create a random state from a list of seeds *) + val fromList : NativeWord.word list -> rand + + val toBytes : rand -> Word8Vector.vector + val fromBytes : Word8Vector.vector -> rand + (* convert state to and from byte vectors. + * fromBytes raises Fail if its argument + * does not have the proper form. + *) + + val toString : rand -> string + val fromString : string -> rand + (* convert state to and from string + * fromString raises Fail if its argument + * does not have the proper form. + *) + + val randNativeInt : rand -> NativeInt.int + (* generate ints uniformly in [0,NativeInt.maxInt] *) + + val randNativeWord : rand -> NativeWord.word + (* generate ints uniformly in [0w0,maxWord] *) + + val randInt : rand -> int + (* generate ints uniformly in [minInt,NativeInt.maxInt] *) + + val randWord : rand -> word + (* generate ints uniformly in [0w0,maxWord] *) + + val randNat : rand -> int + (* generate ints uniformly in [0w0,NativeInt.maxInt] *) + + val randReal : rand -> real + (* generate reals uniformly in [0.0,1.0) *) + + val randRange : (int * int) -> rand -> int + (* randRange (lo,hi) generates integers uniformly [lo,hi]. + * Raises Fail if hi < lo. + *) + + end; (* RANDOM *) diff --git a/smlnj-lib/Util/real-format.sml b/smlnj-lib/Util/real-format.sml new file mode 100644 index 0000000..e69be79 --- /dev/null +++ b/smlnj-lib/Util/real-format.sml @@ -0,0 +1,175 @@ +(* real-format.sml + * + * COPYRIGHT (c) 1992 by AT&T Bell Laboratories. + * + * Basic real to string conversions. This module is use internally, but is + * not part of the exported library interface. It duplicates code in the + * SML/NJ boot directory, but it is more portable not to rely on it. + * + * AUTHOR: Emden Gansner & John Reppy + * AT&T Bell Laboratories + * Murray Hill, NJ 07974 + * erg@ulysses.att.com & jhr@research.att.com + *) + +structure RealFormat : sig + + (* Low-level real to string conversion routines. For F and E format, the precision + * specifies the number of fractional digits with 0's appended if necessary. + * For G format, precision specifies the number of significant digits, but + * trailing 0's in the fractional part are dropped. + *) + val realFFormat : (real * int) -> {sign : bool, mantissa : string} + val realEFormat : (real * int) -> {sign : bool, mantissa : string, exp : int} + val realGFormat : (real * int) + -> {sign : bool, whole : string, frac : string, exp : int option} + + end = struct + + exception BadPrecision + (* raised by real to string conversions, if the precision is < 0. *) + + fun zeroLPad (s, w) = StringCvt.padLeft #"0" w s + fun zeroRPad (s, w) = StringCvt.padRight #"0" w s + + (* convert an integer between 0..9 to a single digit *) + fun mkDigit (i : int) = String.sub("0123456789", i) + + (* decompose a non-zero real into a list of at most maxPrec significant digits + * (the first digit non-zero), and integer exponent. The return value + * (a::b::c..., exp) + * is produced from real argument + * a.bc... * (10 ^^ exp) + * If the list would consist of all 9's, the list consisting of 1 followed by + * all 0's is returned instead. + *) + val maxPrec = 15 + fun decompose (f, e, precisionFn) = let + fun scaleUp (x, e) = if (x < 1.0) then scaleUp(10.0*x, e-1) else (x, e) + fun scaleDn (x, e) = if (x >= 10.0) then scaleDn(0.1*x, e+1) else (x, e) + fun mkdigits (f, 0) = ([], if f < 5.0 then 0 else 1) + | mkdigits (f, i) = let + val d = floor f + val (digits, carry) = mkdigits (10.0 * (f - real d), i - 1) + val (digit, c) = (case (d, carry) + of (9, 1) => (0, 1) + | _ => (d + carry, 0) + (* end case *)) + in + (digit::digits, c) + end + val (f, e) = if (f < 1.0) + then scaleUp (f, e) + else if (f >= 10.0) + then scaleDn (f, e) + else (f, e) + val (digits, carry) = + mkdigits(f, Int.max(0, Int.min(precisionFn e, maxPrec))) + in + case carry + of 0 => (digits, e) + | _ => (1::digits, e+1) + end + + fun realFFormat (r, prec) = let + fun pf e = e + prec + 1 + fun rtoa (digits, e) = let + fun doFrac (_, 0, l) = implode(rev l) + | doFrac ([], p, l) = doFrac([], p-1, #"0"::l) + | doFrac (hd::tl, p, l) = doFrac(tl, p-1, (mkDigit hd) :: l) + fun doWhole ([], e, l) = if e >= 0 + then doWhole ([], e-1, #"0" :: l) + else if prec = 0 + then implode(rev l) + else doFrac ([], prec, #"." :: l) + | doWhole (arg as (hd::tl), e, l) = if e >= 0 + then doWhole(tl, e-1, (mkDigit hd) :: l) + else if prec = 0 + then implode(rev l) + else doFrac(arg, prec, #"." :: l) + fun doZeros (n, 0, l) = implode(rev l) + | doZeros (1, p, l) = doFrac(digits, p, l) + | doZeros (n, p, l) = doZeros(n-1, p-1, #"0" :: l) + in + if (e >= 0) + then doWhole(digits, e, []) + else if (prec = 0) + then "0" + else doZeros (~e, prec, [#".", #"0"]) + end + in + if (prec < 0) then raise BadPrecision else (); + if (r < 0.0) + then {sign = true, mantissa = rtoa(decompose(~r, 0, pf))} + else if (r > 0.0) + then {sign=false, mantissa = rtoa(decompose(r, 0, pf))} + else if (prec = 0) + then {sign=false, mantissa = "0"} + else {sign=false, mantissa = zeroRPad("0.", prec+2)} + end (* realFFormat *) + + fun realEFormat (r, prec) = let + fun pf _ = prec + 1 + fun rtoa (sign, (digits, e)) = let + fun mkRes (m, e) = {sign = sign, mantissa = m, exp = e} + fun doFrac (_, 0, l) = implode(rev l) + | doFrac ([], n, l) = zeroRPad(implode(rev l), n) + | doFrac (hd::tl, n, l) = doFrac (tl, n-1, (mkDigit hd) :: l) + in + if (prec = 0) + then mkRes(String.str(mkDigit(hd digits)), e) + else + mkRes(doFrac(tl digits, prec, [#".", mkDigit(hd digits)]), e) + end + in + if (prec < 0) then raise BadPrecision else (); + if (r < 0.0) + then rtoa (true, decompose(~r, 0, pf)) + else if (r > 0.0) + then rtoa (false, decompose(r, 0, pf)) + else if (prec = 0) + then {sign = false, mantissa = "0", exp = 0} + else {sign = false, mantissa = zeroRPad("0.", prec+2), exp=0} + end (* realEFormat *) + + fun realGFormat (r, prec) = let + fun pf _ = prec + fun rtoa (sign, (digits, e)) = let + fun mkRes (w, f, e) = {sign = sign, whole = w, frac = f, exp = e} + fun doFrac [] = [] + | doFrac (0::tl) = (case doFrac tl + of [] => [] + | rest => #"0" :: rest + (* end case *)) + | doFrac (hd::tl) = (mkDigit hd) :: (doFrac tl) + fun doWhole ([], e, wh) = + if e >= 0 + then doWhole([], e-1, #"0"::wh) + else mkRes(implode(rev wh), "", NONE) + | doWhole (arg as (hd::tl), e, wh) = + if e >= 0 + then doWhole(tl, e-1, (mkDigit hd)::wh) + else mkRes(implode(rev wh), implode(doFrac arg), NONE) + in + if (e < ~4) orelse (e >= prec) + then mkRes( + String.str(mkDigit(hd digits)), + implode(doFrac(tl digits)), SOME e) + else if e >= 0 + then doWhole(digits, e, []) + else let + val frac = implode(doFrac digits) + in + mkRes("0", zeroLPad(frac, (size frac) + (~1 - e)), NONE) + end + end + in + if (prec < 1) then raise BadPrecision else (); + if (r < 0.0) + then rtoa(true, decompose(~r, 0, pf)) + else if (r > 0.0) + then rtoa(false, decompose(r, 0, pf)) + else {sign=false, whole="0", frac="", exp=NONE} + end (* realGFormat *) + + end (* RealFormat *) diff --git a/smlnj-lib/Util/real-order-stats.sml b/smlnj-lib/Util/real-order-stats.sml new file mode 100644 index 0000000..9dc231c --- /dev/null +++ b/smlnj-lib/Util/real-order-stats.sml @@ -0,0 +1,73 @@ +(* real-order-stats.sml + * + * Randomized linear-time selection from an unordered sample. + * + * Copyright (c) 2004 by The Fellowship of SML/NJ + * + * Author: Matthias Blume (blume@tti-c.org) + *) +structure RealOrderStats : sig + + (* WARNING: Each of the functions exported from this module + * modifies its argument array by (partially) sorting it. *) + + (* select the i-th order statistic *) + val select : real array * int -> real + val select' : real ArraySlice.slice * int -> real + + (* calculate the median: + * if N is odd, then this is the (floor(N/2))th order statistic + * otherwise it is the average of (N/2-1)th and (N/2)th *) + val median : real array -> real + val median' : real ArraySlice.slice -> real + +end = struct + + infix 8 $ val op $ = Unsafe.Array.sub + infix 3 <- fun (a, i) <- x = Unsafe.Array.update (a, i, x) + + (* initialize random number generator *) + val rand = Random.rand (123, 73256) + + (* select i-th order statistic from unsorted array with + * starting point p and ending point r (inclusive): *) + fun select0 (a: real array, p, r, i) = + let fun x + y = Word.toIntX (Word.+ (Word.fromInt x, Word.fromInt y)) + fun x - y = Word.toIntX (Word.- (Word.fromInt x, Word.fromInt y)) + (* random partition: *) + fun rp (p, r) = + let fun sw(i,j) = let val t=a$i in (a,i)<-a$j; (a,j)<-t end + val q = Random.randRange (p, r) rand + val qv = a$q + val _ = if q<>p then ((a,q)<-a$p; (a,p)<-qv) else () + fun up i = if i>r orelse qv < a$i then i else up(i+1) + fun dn i = if i>=p andalso qv < a$i then dn(i-1) else i + fun lp (i, j) = + let val (i, j) = (up i, dn j) + in if i>j then let val q' = i-1 in sw(p,q'); (q',qv) end + else (sw(i,j); lp (i+1, j-1)) + end + in lp (p+1, r) end + (* random select: *) + fun rs (p, r) = + if p=r then a$r + else let val (q, qv) = rp (p, r) + in if i=q then qv else if i=mid then m else l(i+1, Real.max(a$i,m)) + in if len mod 2 = 1 then m0 else (l(p+1,a$p) + m0) / 2.0 + end + + fun median a = median0 (a, 0, Array.length a) + fun median' s = median0 (ArraySlice.base s) +end diff --git a/smlnj-lib/Util/redblack-map-fn.sml b/smlnj-lib/Util/redblack-map-fn.sml new file mode 100644 index 0000000..9c3a30a --- /dev/null +++ b/smlnj-lib/Util/redblack-map-fn.sml @@ -0,0 +1,695 @@ +(* redblack-map-fn.sml + * + * COPYRIGHT (c) 2014 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This code is based on Chris Okasaki's implementation of + * red-black trees. The linear-time tree construction code is + * based on the paper "Constructing red-black trees" by Hinze, + * and the delete function is based on the description in Cormen, + * Leiserson, and Rivest. + * + * A red-black tree should satisfy the following two invariants: + * + * Red Invariant: each red node has black children (empty nodes are + * considered black). + * + * Black Invariant: each path from the root to an empty node has the + * same number of black nodes (the tree's black height). + * + * The Black invariant implies that any node with only one child + * will be black and its child will be a red leaf. + *) + +functor RedBlackMapFn (K : ORD_KEY) :> ORD_MAP where type Key.ord_key = K.ord_key = + struct + + structure Key = K + + datatype color = R | B + + datatype 'a tree + = E + | T of (color * 'a tree * K.ord_key * 'a * 'a tree) + + datatype 'a map = MAP of (int * 'a tree) + + fun isEmpty (MAP(_, E)) = true + | isEmpty _ = false + + val empty = MAP(0, E) + + fun singleton (xk, x) = MAP(1, T(B, E, xk, x, E)) + + fun insert (MAP(nItems, m), xk, x) = let + val nItems' = ref nItems + fun ins E = (nItems' := nItems+1; T(R, E, xk, x, E)) + | ins (s as T(color, a, yk, y, b)) = (case K.compare(xk, yk) + of LESS => (case a + of T(R, c, zk, z, d) => (case K.compare(xk, zk) + of LESS => (case ins c + of T(R, e, wk, w, f) => + T(R, T(B,e,wk, w,f), zk, z, T(B,d,yk,y,b)) + | c => T(B, T(R,c,zk,z,d), yk, y, b) + (* end case *)) + | EQUAL => T(color, T(R, c, xk, x, d), yk, y, b) + | GREATER => (case ins d + of T(R, e, wk, w, f) => + T(R, T(B,c,zk,z,e), wk, w, T(B,f,yk,y,b)) + | d => T(B, T(R,c,zk,z,d), yk, y, b) + (* end case *)) + (* end case *)) + | _ => T(B, ins a, yk, y, b) + (* end case *)) + | EQUAL => T(color, a, xk, x, b) + | GREATER => (case b + of T(R, c, zk, z, d) => (case K.compare(xk, zk) + of LESS => (case ins c + of T(R, e, wk, w, f) => + T(R, T(B,a,yk,y,e), wk, w, T(B,f,zk,z,d)) + | c => T(B, a, yk, y, T(R,c,zk,z,d)) + (* end case *)) + | EQUAL => T(color, a, yk, y, T(R, c, xk, x, d)) + | GREATER => (case ins d + of T(R, e, wk, w, f) => + T(R, T(B,a,yk,y,c), zk, z, T(B,e,wk,w,f)) + | d => T(B, a, yk, y, T(R,c,zk,z,d)) + (* end case *)) + (* end case *)) + | _ => T(B, a, yk, y, ins b) + (* end case *)) + (* end case *)) + val T(_, a, yk, y, b) = ins m + in + MAP(!nItems', T(B, a, yk, y, b)) + end + fun insert' ((xk, x), m) = insert (m, xk, x) + + fun insertWithi comb (MAP(nItems, m), xk, x) = let + val nItems' = ref nItems + fun ins E = (nItems' := nItems+1; T(R, E, xk, x, E)) + | ins (s as T(color, a, yk, y, b)) = (case K.compare(xk, yk) + of LESS => (case a + of T(R, c, zk, z, d) => (case K.compare(xk, zk) + of LESS => (case ins c + of T(R, e, wk, w, f) => + T(R, T(B,e,wk, w,f), zk, z, T(B,d,yk,y,b)) + | c => T(B, T(R,c,zk,z,d), yk, y, b) + (* end case *)) + | EQUAL => let + val x' = comb(xk, z, x) + in + T(color, T(R, c, xk, x', d), yk, y, b) + end + | GREATER => (case ins d + of T(R, e, wk, w, f) => + T(R, T(B,c,zk,z,e), wk, w, T(B,f,yk,y,b)) + | d => T(B, T(R,c,zk,z,d), yk, y, b) + (* end case *)) + (* end case *)) + | _ => T(B, ins a, yk, y, b) + (* end case *)) + | EQUAL => T(color, a, xk, comb(xk, y, x), b) + | GREATER => (case b + of T(R, c, zk, z, d) => (case K.compare(xk, zk) + of LESS => (case ins c + of T(R, e, wk, w, f) => + T(R, T(B,a,yk,y,e), wk, w, T(B,f,zk,z,d)) + | c => T(B, a, yk, y, T(R,c,zk,z,d)) + (* end case *)) + | EQUAL => let + val x' = comb(xk, z, x) + in + T(color, a, yk, y, T(R, c, xk, x', d)) + end + | GREATER => (case ins d + of T(R, e, wk, w, f) => + T(R, T(B,a,yk,y,c), zk, z, T(B,e,wk,w,f)) + | d => T(B, a, yk, y, T(R,c,zk,z,d)) + (* end case *)) + (* end case *)) + | _ => T(B, a, yk, y, ins b) + (* end case *)) + (* end case *)) + val T(_, a, yk, y, b) = ins m + in + MAP(!nItems', T(B, a, yk, y, b)) + end + fun insertWith comb = insertWithi (fn (_, x1, x2) => comb(x1, x2)) + + (* Is a key in the domain of the map? *) + fun inDomain (MAP(_, t), k) = let + fun find' E = false + | find' (T(_, a, yk, y, b)) = (case K.compare(k, yk) + of LESS => find' a + | EQUAL => true + | GREATER => find' b + (* end case *)) + in + find' t + end + + (* Look for an item, return NONE if the item doesn't exist *) + fun find (MAP(_, t), k) = let + fun find' E = NONE + | find' (T(_, a, yk, y, b)) = (case K.compare(k, yk) + of LESS => find' a + | EQUAL => SOME y + | GREATER => find' b + (* end case *)) + in + find' t + end + + (* Look for an item, raise NotFound if the item doesn't exist *) + fun lookup (MAP(_, t), k) = let + fun look E = raise LibBase.NotFound + | look (T(_, a, yk, y, b)) = (case K.compare(k, yk) + of LESS => look a + | EQUAL => y + | GREATER => look b + (* end case *)) + in + look t + end + + local + datatype 'a zipper + = TOP + | LEFT of (color * K.ord_key * 'a * 'a tree * 'a zipper) + | RIGHT of (color * 'a tree * K.ord_key * 'a * 'a zipper) + datatype 'a result = FOUND of 'a * 'a tree | NOT_FOUND + in + fun remove' (t, k) = let + (* zip the zipper *) + fun zip (TOP, t) = t + | zip (LEFT(color, xk, x, b, z), a) = zip(z, T(color, a, xk, x, b)) + | zip (RIGHT(color, a, xk, x, z), b) = zip(z, T(color, a, xk, x, b)) + (* zip the zipper while resolving a black deficit *) + fun fixupZip (TOP, t) = (true, t) + (* case 1 from CLR *) + | fixupZip (LEFT(B, xk, x, T(R, a, yk, y, b), p), t) = (case a + of T(_, T(R, a11, wk, w, a12), zk, z, a2) => (* case 1L ==> case 3L ==> case 4L *) + (false, zip (p, T(B, T(R, T(B, t, xk, x, a11), wk, w, T(B, a12, zk, z, a2)), yk, y, b))) + | T(_, a1, zk, z, T(R, a21, wk, w, t22)) => (* case 1L ==> case 4L *) + (false, zip (p, T(B, T(R, T(B, t, xk, x, a1), zk, z, T(B, a21, wk, w, t22)), yk, y, b))) + | T(_, a1, zk, z, a2) => (* case 1L ==> case 2L; rotate + recolor fixes deficit *) + (false, zip (p, T(B, T(B, t, xk, x, T(R, a1, zk, z, a2)), yk, y, b))) + | _ => fixupZip (LEFT(R, xk, x, a, LEFT(B, yk, y, b, p)), t) + (* end case *)) + | fixupZip (RIGHT(B, T(R, a, xk, x, b), yk, y, p), t) = (case b + of T(_, b1, zk, z, T(R, b21, wk, w, b22)) => (* case 1R ==> case 3R ==> case 4R *) + (false, zip (p, T(B, a, xk, x, T(R, T(B, b1, zk, z, b21), wk, w, T(B, b22, yk, y, t))))) + | T(_, T(R, b11, wk, w, b12), zk, z, b2) => (* case 1R ==> case 4R *) + (false, zip (p, T(B, a, xk, x, T(R, T(B, b11, wk, w, b12), zk, z, T(B, b2, yk, y, t))))) + | T(_, b1, zk, z, b2) => (* case 1L ==> case 2L; rotate + recolor fixes deficit *) + (false, zip (p, T(B, a, xk, x, T(B, T(R, b1, zk, z, b2), yk, y, t)))) + | _ => fixupZip (RIGHT(R, b, yk, y, RIGHT(B, a, xk, x, p)), t) + (* end case *)) + (* case 3 from CLR *) + | fixupZip (LEFT(color, xk, x, T(B, T(R, a1, yk, y, a2), zk, z, b), p), t) = + (* case 3L ==> case 4L *) + (false, zip (p, T(color, T(B, t, xk, x, a1), yk, y, T(B, a2, zk, z, b)))) + | fixupZip (RIGHT(color, T(B, a, xk, x, T(R, b1, yk, y, b2)), zk, z, p), t) = + (* case 3R ==> case 4R; rotate, recolor, plus rotate fixes deficit *) + (false, zip (p, T(color, T(B, a, xk, x, b1), yk, y, T(B, b2, zk, z, t)))) + (* case 4 from CLR *) + | fixupZip (LEFT(color, xk, x, T(B, a, yk, y, T(R, b1, zk, z, b2)), p), t) = + (false, zip (p, T(color, T(B, t, xk, x, a), yk, y, T(B, b1, zk, z, b2)))) + | fixupZip (RIGHT(color, T(B, T(R, a1, zk, z, a2), xk, x, b), yk, y, p), t) = + (false, zip (p, T(color, T(B, a1, zk, z, a2), xk, x, T(B, b, yk, y, t)))) + (* case 2 from CLR; note that "a" and "b" are guaranteed to be black, since we did + * not match cases 3 or 4. + *) + | fixupZip (LEFT(R, xk, x, T(B, a, yk, y, b), p), t) = + (false, zip (p, T(B, t, xk, x, T(R, a, yk, y, b)))) + | fixupZip (LEFT(B, xk, x, T(B, a, yk, y, b), p), t) = + fixupZip (p, T(B, t, xk, x, T(R, a, yk, y, b))) + | fixupZip (RIGHT(R, T(B, a, xk, x, b), yk, y, p), t) = + (false, zip (p, T(B, T(R, a, xk, x, b), yk, y, t))) + | fixupZip (RIGHT(B, T(B, a, xk, x, b), yk, y, p), t) = + fixupZip (p, T(B, T(R, a, xk, x, b), yk, y, t)) + (* push deficit up the tree by recoloring a black node as red *) + | fixupZip (LEFT(_, yk, y, E, p), t) = fixupZip (p, T(R, t, yk, y, E)) + | fixupZip (RIGHT(_, E, yk, y, p), t) = fixupZip (p, T(R, E, yk, y, t)) + (* impossible cases that violate the red invariant *) + | fixupZip _ = raise Fail "Red invariant violation" + (* delete the minimum value from a non-empty tree, returning a 4-tuple + * (key, elem, bd, tr), where key is the minimum key, elem is the element + * named by key, tr is the residual tree with elem removed, and bd is true + * if tr has a black-depth that is less than the original tree. + *) + fun delMin (T(R, E, yk, y, b), p) = + (* replace the node by its right subtree (which must be E) *) + (yk, y, false, zip(p, b)) + | delMin (T(B, E, yk, y, T(R, a', yk', y', b')), p) = + (* replace the node with its right child, while recoloring the child black to + * preserve the black invariant. + *) + (yk, y, false, zip (p, T(B, a', yk', y', b'))) + | delMin (T(B, E, yk, y, E), p) = let + (* delete the node, which reduces the black-depth by one, so we attempt to fix + * the deficit on the path back. + *) + val (blkDeficit, t) = fixupZip (p, E) + in + (yk, y, blkDeficit, t) + end + | delMin (T(color, a, yk, y, b), z) = delMin(a, LEFT(color, yk, y, b, z)) + | delMin (E, _) = raise Match + fun del (E, p) = NOT_FOUND + | del (T(color, a, yk, y, b), p) = (case K.compare(k, yk) + of LESS => del (a, LEFT(color, yk, y, b, p)) + | EQUAL => (case (color, a, b) + of (R, E, E) => FOUND(y, zip(p, E)) + | (B, E, E) => FOUND(y, #2 (fixupZip (p, E))) + | (_, T(_, a', yk', y', b'), E) => + (* node is black and left child is red; we replace the node with its + * left child recolored to black. + *) + FOUND(y, zip(p, T(B, a', yk', y', b'))) + | (_, E, T(_, a', yk', y', b')) => + (* node is black and right child is red; we replace the node with its + * right child recolored to black. + *) + FOUND(y, zip(p, T(B, a', yk', y', b'))) + | _ => let + val (minKey, minElem, blkDeficit, b) = delMin (b, TOP) + in + if blkDeficit + then FOUND(y, #2 (fixupZip (RIGHT(color, a, minKey, minElem, p), b))) + else FOUND(y, zip (p, T(color, a, minKey, minElem, b))) + end + (* end case *)) + | GREATER => del (b, RIGHT(color, a, yk, y, p)) + (* end case *)) + in + del (t, TOP) + end + (* Remove an item, returning new map and value removed. + * Raises LibBase.NotFound if not found. + *) + fun remove (MAP(nItems, t), k) = (case remove' (t, k) + of FOUND(item, T(R, a, xk, x, b)) => (MAP(nItems-1, T(B, a, xk, x, b)), item) + | FOUND(item, t) => (MAP(nItems-1, t), item) + | NOT_FOUND => raise LibBase.NotFound + (* end case *)) + fun findAndRemove (MAP(nItems, t), k) = (case remove' (t, k) + of FOUND(item, T(R, a, xk, x, b)) => SOME(MAP(nItems-1, T(B, a, xk, x, b)), item) + | FOUND(item, t) => SOME(MAP(nItems-1, t), item) + | NOT_FOUND => NONE + (* end case *)) + end (* local *) + + (* return the first item in the map (or NONE if it is empty) *) + fun first (MAP(_, t)) = let + fun f E = NONE + | f (T(_, E, _, x, _)) = SOME x + | f (T(_, a, _, _, _)) = f a + in + f t + end + fun firsti (MAP(_, t)) = let + fun f E = NONE + | f (T(_, E, xk, x, _)) = SOME(xk, x) + | f (T(_, a, _, _, _)) = f a + in + f t + end + + (* Return the number of items in the map *) + fun numItems (MAP(n, _)) = n + + fun foldl f = let + fun foldf (E, accum) = accum + | foldf (T(_, a, _, x, b), accum) = + foldf(b, f(x, foldf(a, accum))) + in + fn init => fn (MAP(_, m)) => foldf(m, init) + end + fun foldli f = let + fun foldf (E, accum) = accum + | foldf (T(_, a, xk, x, b), accum) = + foldf(b, f(xk, x, foldf(a, accum))) + in + fn init => fn (MAP(_, m)) => foldf(m, init) + end + + fun foldr f = let + fun foldf (E, accum) = accum + | foldf (T(_, a, _, x, b), accum) = + foldf(a, f(x, foldf(b, accum))) + in + fn init => fn (MAP(_, m)) => foldf(m, init) + end + fun foldri f = let + fun foldf (E, accum) = accum + | foldf (T(_, a, xk, x, b), accum) = + foldf(a, f(xk, x, foldf(b, accum))) + in + fn init => fn (MAP(_, m)) => foldf(m, init) + end + + fun listItems m = foldr (op ::) [] m + fun listItemsi m = foldri (fn (xk, x, l) => (xk, x)::l) [] m + + (* return an ordered list of the keys in the map. *) + fun listKeys m = foldri (fn (k, _, l) => k::l) [] m + + (* functions for walking the tree while keeping a stack of parents + * to be visited. + *) + fun next ((t as T(_, _, _, _, b))::rest) = (t, left(b, rest)) + | next _ = (E, []) + and left (E, rest) = rest + | left (t as T(_, a, _, _, _), rest) = left(a, t::rest) + fun start m = left(m, []) + + (* Given two maps `f` and `g`, return true if they have equal domains and if + * for every `x` in their domain, `rngEq(f x, g x) = true`. + *) + fun equiv rngEq (MAP(n1, m1), MAP(n2, m2)) = let + fun cmp (t1, t2) = (case (next t1, next t2) + of ((E, _), (E, _)) => true + | ((E, _), _) => false + | (_, (E, _)) => false + | ((T(_, _, xk, x, _), r1), (T(_, _, yk, y, _), r2)) => ( + case Key.compare(xk, yk) + of EQUAL => rngEq(x, y) andalso cmp (r1, r2) + | _ => false + (* end case *)) + (* end case *)) + in + (n1 = n2) andalso cmp (start m1, start m2) + end + + (* Given two maps `f` and `g`, and a comparison function `rngCmp` on their + * range types, return the order of the maps. + *) + fun collate rngCmp (MAP(_, m1), MAP(_, m2)) = let + fun cmp (t1, t2) = (case (next t1, next t2) + of ((E, _), (E, _)) => EQUAL + | ((E, _), _) => LESS + | (_, (E, _)) => GREATER + | ((T(_, _, xk, x, _), r1), (T(_, _, yk, y, _), r2)) => ( + case Key.compare(xk, yk) + of EQUAL => (case rngCmp(x, y) + of EQUAL => cmp (r1, r2) + | order => order + (* end case *)) + | order => order + (* end case *)) + (* end case *)) + in + cmp (start m1, start m2) + end + + (* Given two maps `f` and `g`, return true if the domain of `g` is a subset + * of the domain of `f` and for every `x` in the domain of `g`, + * `rngEq(g x, f x) = true`. + *) + fun extends rngEx (MAP(n1, m1), MAP(n2, m2)) = let + (* does t1 extend t2? *) + fun cmp (t1, t2) = (case (next t1, next t2) + of ((E, _), (E, _)) => true + | (_, (E, _)) => true + | ((E, _), _) => false + | ((T(_, _, xk, x, _), r1), (T(_, _, yk, y, _), r2)) => ( + case Key.compare(xk, yk) + of LESS => cmp (r1, t2) + | EQUAL => rngEx(x, y) andalso cmp (r1, r2) + | GREATER => false + (* end case *)) + (* end case *)) + in + (n1 >= n2) andalso cmp (start m1, start m2) + end + + (* support for constructing red-black trees in linear time from increasing + * ordered sequences (based on a description by R. Hinze). Note that the + * elements in the digits are ordered with the largest on the left, whereas + * the elements of the trees are ordered with the largest on the right. + *) + datatype 'a digit + = ZERO + | ONE of (K.ord_key * 'a * 'a tree * 'a digit) + | TWO of (K.ord_key * 'a * 'a tree * K.ord_key * 'a * 'a tree * 'a digit) + (* add an item that is guaranteed to be larger than any in l *) + fun addItem (ak, a, l) = let + fun incr (ak, a, t, ZERO) = ONE(ak, a, t, ZERO) + | incr (ak1, a1, t1, ONE(ak2, a2, t2, r)) = + TWO(ak1, a1, t1, ak2, a2, t2, r) + | incr (ak1, a1, t1, TWO(ak2, a2, t2, ak3, a3, t3, r)) = + ONE(ak1, a1, t1, incr(ak2, a2, T(B, t3, ak3, a3, t2), r)) + in + incr(ak, a, E, l) + end + (* link the digits into a tree *) + fun linkAll t = let + fun link (t, ZERO) = t + | link (t1, ONE(ak, a, t2, r)) = link(T(B, t2, ak, a, t1), r) + | link (t, TWO(ak1, a1, t1, ak2, a2, t2, r)) = + link(T(B, T(R, t2, ak2, a2, t1), ak1, a1, t), r) + in + link (E, t) + end + + local + fun wrap f (MAP(_, m1), MAP(_, m2)) = let + val (n, result) = f (start m1, start m2, 0, ZERO) + in + MAP(n, linkAll result) + end + fun ins ((E, _), n, result) = (n, result) + | ins ((T(_, _, xk, x, _), r), n, result) = + ins(next r, n+1, addItem(xk, x, result)) + in + + (* return a map whose domain is the union of the domains of the two input + * maps, using the supplied function to define the map on elements that + * are in both domains. + *) + fun unionWith mergeFn = let + fun union (t1, t2, n, result) = (case (next t1, next t2) + of ((E, _), (E, _)) => (n, result) + | ((E, _), t2) => ins(t2, n, result) + | (t1, (E, _)) => ins(t1, n, result) + | ((T(_, _, xk, x, _), r1), (T(_, _, yk, y, _), r2)) => ( + case Key.compare(xk, yk) + of LESS => union (r1, t2, n+1, addItem(xk, x, result)) + | EQUAL => + union (r1, r2, n+1, addItem(xk, mergeFn(x, y), result)) + | GREATER => union (t1, r2, n+1, addItem(yk, y, result)) + (* end case *)) + (* end case *)) + in + wrap union + end + fun unionWithi mergeFn = let + fun union (t1, t2, n, result) = (case (next t1, next t2) + of ((E, _), (E, _)) => (n, result) + | ((E, _), t2) => ins(t2, n, result) + | (t1, (E, _)) => ins(t1, n, result) + | ((T(_, _, xk, x, _), r1), (T(_, _, yk, y, _), r2)) => ( + case Key.compare(xk, yk) + of LESS => union (r1, t2, n+1, addItem(xk, x, result)) + | EQUAL => union ( + r1, r2, n+1, addItem(xk, mergeFn(xk, x, y), result)) + | GREATER => union (t1, r2, n+1, addItem(yk, y, result)) + (* end case *)) + (* end case *)) + in + wrap union + end + + (* return a map whose domain is the intersection of the domains of the + * two input maps, using the supplied function to define the range. + *) + fun intersectWith mergeFn = let + fun intersect (t1, t2, n, result) = (case (next t1, next t2) + of ((T(_, _, xk, x, _), r1), (T(_, _, yk, y, _), r2)) => ( + case Key.compare(xk, yk) + of LESS => intersect (r1, t2, n, result) + | EQUAL => + intersect (r1, r2, n+1, + addItem(xk, mergeFn(x, y), result)) + | GREATER => intersect (t1, r2, n, result) + (* end case *)) + | _ => (n, result) + (* end case *)) + in + wrap intersect + end + fun intersectWithi mergeFn = let + fun intersect (t1, t2, n, result) = (case (next t1, next t2) + of ((T(_, _, xk, x, _), r1), (T(_, _, yk, y, _), r2)) => ( + case Key.compare(xk, yk) + of LESS => intersect (r1, t2, n, result) + | EQUAL => + intersect (r1, r2, n+1, + addItem(xk, mergeFn(xk, x, y), result)) + | GREATER => intersect (t1, r2, n, result) + (* end case *)) + | _ => (n, result) + (* end case *)) + in + wrap intersect + end + + fun mergeWith mergeFn = let + fun merge (t1, t2, n, result) = (case (next t1, next t2) + of ((E, _), (E, _)) => (n, result) + | ((E, _), (T(_, _, yk, y, _), r2)) => + mergef(yk, NONE, SOME y, t1, r2, n, result) + | ((T(_, _, xk, x, _), r1), (E, _)) => + mergef(xk, SOME x, NONE, r1, t2, n, result) + | ((T(_, _, xk, x, _), r1), (T(_, _, yk, y, _), r2)) => ( + case Key.compare(xk, yk) + of LESS => mergef(xk, SOME x, NONE, r1, t2, n, result) + | EQUAL => mergef(xk, SOME x, SOME y, r1, r2, n, result) + | GREATER => mergef(yk, NONE, SOME y, t1, r2, n, result) + (* end case *)) + (* end case *)) + and mergef (k, x1, x2, r1, r2, n, result) = (case mergeFn(x1, x2) + of NONE => merge (r1, r2, n, result) + | SOME y => merge (r1, r2, n+1, addItem(k, y, result)) + (* end case *)) + in + wrap merge + end + fun mergeWithi mergeFn = let + fun merge (t1, t2, n, result) = (case (next t1, next t2) + of ((E, _), (E, _)) => (n, result) + | ((E, _), (T(_, _, yk, y, _), r2)) => + mergef(yk, NONE, SOME y, t1, r2, n, result) + | ((T(_, _, xk, x, _), r1), (E, _)) => + mergef(xk, SOME x, NONE, r1, t2, n, result) + | ((T(_, _, xk, x, _), r1), (T(_, _, yk, y, _), r2)) => ( + case Key.compare(xk, yk) + of LESS => mergef(xk, SOME x, NONE, r1, t2, n, result) + | EQUAL => mergef(xk, SOME x, SOME y, r1, r2, n, result) + | GREATER => mergef(yk, NONE, SOME y, t1, r2, n, result) + (* end case *)) + (* end case *)) + and mergef (k, x1, x2, r1, r2, n, result) = (case mergeFn(k, x1, x2) + of NONE => merge (r1, r2, n, result) + | SOME y => merge (r1, r2, n+1, addItem(k, y, result)) + (* end case *)) + in + wrap merge + end + end (* local *) + + fun app f = let + fun appf E = () + | appf (T(_, a, _, x, b)) = (appf a; f x; appf b) + in + fn (MAP(_, m)) => appf m + end + fun appi f = let + fun appf E = () + | appf (T(_, a, xk, x, b)) = (appf a; f(xk, x); appf b) + in + fn (MAP(_, m)) => appf m + end + + fun map f = let + fun mapf E = E + | mapf (T(color, a, xk, x, b)) = + T(color, mapf a, xk, f x, mapf b) + in + fn (MAP(n, m)) => MAP(n, mapf m) + end + fun mapi f = let + fun mapf E = E + | mapf (T(color, a, xk, x, b)) = + T(color, mapf a, xk, f(xk, x), mapf b) + in + fn (MAP(n, m)) => MAP(n, mapf m) + end + + (* Filter out those elements of the map that do not satisfy the + * predicate. The filtering is done in increasing map order. + *) + fun filter pred (MAP(_, t)) = let + fun walk (E, n, result) = (n, result) + | walk (T(_, a, xk, x, b), n, result) = let + val (n, result) = walk(a, n, result) + in + if (pred x) + then walk(b, n+1, addItem(xk, x, result)) + else walk(b, n, result) + end + val (n, result) = walk (t, 0, ZERO) + in + MAP(n, linkAll result) + end + fun filteri pred (MAP(_, t)) = let + fun walk (E, n, result) = (n, result) + | walk (T(_, a, xk, x, b), n, result) = let + val (n, result) = walk(a, n, result) + in + if (pred(xk, x)) + then walk(b, n+1, addItem(xk, x, result)) + else walk(b, n, result) + end + val (n, result) = walk (t, 0, ZERO) + in + MAP(n, linkAll result) + end + + (* map a partial function over the elements of a map in increasing + * map order. + *) + fun mapPartial f = let + fun f' (xk, x, m) = (case f x + of NONE => m + | (SOME y) => insert(m, xk, y) + (* end case *)) + in + foldli f' empty + end + fun mapPartiali f = let + fun f' (xk, x, m) = (case f(xk, x) + of NONE => m + | (SOME y) => insert(m, xk, y) + (* end case *)) + in + foldli f' empty + end + + (* check the elements of a map with a predicate and return true if + * any element satisfies the predicate. Return false otherwise. + * Elements are checked in key order. + *) + fun exists pred = let + fun exists' E = false + | exists' (T(_, a, _, x, b)) = exists' a orelse pred x orelse exists' b + in + fn (MAP(_, m)) => exists' m + end + fun existsi pred = let + fun exists' E = false + | exists' (T(_, a, k, x, b)) = exists' a orelse pred(k, x) orelse exists' b + in + fn (MAP(_, m)) => exists' m + end + + (* check the elements of a map with a predicate and return true if + * they all satisfy the predicate. Return false otherwise. Elements + * are checked in key order. + *) + fun all pred = let + fun all' E = true + | all' (T(_, a, _, x, b)) = all' a andalso pred x andalso all' b + in + fn (MAP(_, m)) => all' m + end + fun alli pred = let + fun all' E = true + | all' (T(_, a, k, x, b)) = all' a andalso pred(k, x) andalso all' b + in + fn (MAP(_, m)) => all' m + end + + end (* functor RedBlackMapFn *) diff --git a/smlnj-lib/Util/redblack-set-fn.sml b/smlnj-lib/Util/redblack-set-fn.sml new file mode 100644 index 0000000..b10aa2e --- /dev/null +++ b/smlnj-lib/Util/redblack-set-fn.sml @@ -0,0 +1,517 @@ +(* redblack-set-fn.sml + * + * COPYRIGHT (c) 2014 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This code is based on Chris Okasaki's implementation of + * red-black trees. The linear-time tree construction code is + * based on the paper "Constructing red-black trees" by Hinze, + * and the delete function is based on the description in Cormen, + * Leiserson, and Rivest. + * + * A red-black tree should satisfy the following two invariants: + * + * Red Invariant: each red node has black children (empty nodes are + * considered black). + * + * Black Invariant: each path from the root to an empty node has the + * same number of black nodes (the tree's black height). + * + * The Black invariant implies that any node with only one child + * will be black and its child will be a red leaf. + *) + +functor RedBlackSetFn (K : ORD_KEY) :> ORD_SET where type Key.ord_key = K.ord_key = + struct + + structure Key = K + + type item = K.ord_key + + datatype color = R | B + + datatype tree + = E + | T of (color * tree * item * tree) + + datatype set = SET of (int * tree) + + fun isEmpty (SET(_, E)) = true + | isEmpty _ = false + + val empty = SET(0, E) + + fun minItem (SET(_, tr)) = let + fun min E = raise Empty + | min (T(_, E, item, _)) = item + | min (T(_, tr, _, _)) = min tr + in + min tr + end + + fun maxItem (SET(_, tr)) = let + fun max E = raise Empty + | max (T(_, _, item, E)) = item + | max (T(_, _, _, tr)) = max tr + in + max tr + end + + fun singleton x = SET(1, T(B, E, x, E)) + + fun add (SET(nItems, m), x) = let + val nItems' = ref nItems + fun ins E = (nItems' := nItems+1; T(R, E, x, E)) + | ins (s as T(color, a, y, b)) = (case K.compare(x, y) + of LESS => (case a + of T(R, c, z, d) => (case K.compare(x, z) + of LESS => (case ins c + of T(R, e, w, f) => T(R, T(B,e,w,f), z, T(B,d,y,b)) + | c => T(B, T(R,c,z,d), y, b) + (* end case *)) + | EQUAL => T(color, T(R, c, x, d), y, b) + | GREATER => (case ins d + of T(R, e, w, f) => T(R, T(B,c,z,e), w, T(B,f,y,b)) + | d => T(B, T(R,c,z,d), y, b) + (* end case *)) + (* end case *)) + | _ => T(B, ins a, y, b) + (* end case *)) + | EQUAL => T(color, a, x, b) + | GREATER => (case b + of T(R, c, z, d) => (case K.compare(x, z) + of LESS => (case ins c + of T(R, e, w, f) => T(R, T(B,a,y,e), w, T(B,f,z,d)) + | c => T(B, a, y, T(R,c,z,d)) + (* end case *)) + | EQUAL => T(color, a, y, T(R, c, x, d)) + | GREATER => (case ins d + of T(R, e, w, f) => T(R, T(B,a,y,c), z, T(B,e,w,f)) + | d => T(B, a, y, T(R,c,z,d)) + (* end case *)) + (* end case *)) + | _ => T(B, a, y, ins b) + (* end case *)) + (* end case *)) + val T(_, a, y, b) = ins m + in + SET(!nItems', T(B, a, y, b)) + end + fun add' (x, m) = add (m, x) + + fun addList (s, []) = s + | addList (s, x::r) = addList(add(s, x), r) + + (* Remove an item. Raises LibBase.NotFound if not found. *) + local + datatype zipper + = TOP + | LEFT of (color * item * tree * zipper) + | RIGHT of (color * tree * item * zipper) + in + fun delete (SET(nItems, t), k) = let + (* zip the zipper *) + fun zip (TOP, t) = t + | zip (LEFT(color, x, b, p), a) = zip(p, T(color, a, x, b)) + | zip (RIGHT(color, a, x, p), b) = zip(p, T(color, a, x, b)) + (* zip the zipper while resolving a black deficit *) + fun fixupZip (TOP, t) = (true, t) + (* case 1 from CLR *) + | fixupZip (LEFT(B, x, T(R, a, y, b), p), t) = (case a + of T(_, T(R, a11, w, a12), z, a2) => (* case 1L ==> case 3L ==> case 4L *) + (false, zip (p, T(B, T(R, T(B, t, x, a11), w, T(B, a12, z, a2)), y, b))) + | T(_, a1, z, T(R, a21, w, t22)) => (* case 1L ==> case 4L *) + (false, zip (p, T(B, T(R, T(B, t, x, a1), z, T(B, a21, w, t22)), y, b))) + | T(_, a1, z, a2) => (* case 1L ==> case 2L; rotate + recolor fixes deficit *) + (false, zip (p, T(B, T(B, t, x, T(R, a1, z, a2)), y, b))) + | _ => fixupZip (LEFT(R, x, a, LEFT(B, y, b, p)), t) + (* end case *)) + | fixupZip (RIGHT(B, T(R, a, x, b), y, p), t) = (case b + of T(_, b1, z, T(R, b21, w, b22)) => (* case 1R ==> case 3R ==> case 4R *) + (false, zip (p, T(B, a, x, T(R, T(B, b1, z, b21), w, T(B, b22, y, t))))) + | T(_, T(R, b11, w, b12), z, b2) => (* case 1R ==> case 4R *) + (false, zip (p, T(B, a, x, T(R, T(B, b11, w, b12), z, T(B, b2, y, t))))) + | T(_, b1, z, b2) => (* case 1L ==> case 2L; rotate + recolor fixes deficit *) + (false, zip (p, T(B, a, x, T(B, T(R, b1, z, b2), y, t)))) + | _ => fixupZip (RIGHT(R, b, y, RIGHT(B, a, x, p)), t) + (* end case *)) + (* case 3 from CLR *) + | fixupZip (LEFT(color, x, T(B, T(R, a1, y, a2), z, b), p), t) = + (* case 3L ==> case 4L *) + (false, zip (p, T(color, T(B, t, x, a1), y, T(B, a2, z, b)))) + | fixupZip (RIGHT(color, T(B, a, x, T(R, b1, y, b2)), z, p), t) = + (* case 3R ==> case 4R; rotate, recolor, plus rotate fixes deficit *) + (false, zip (p, T(color, T(B, a, x, b1), y, T(B, b2, z, t)))) + (* case 4 from CLR *) + | fixupZip (LEFT(color, x, T(B, a, y, T(R, b1, z, b2)), p), t) = + (false, zip (p, T(color, T(B, t, x, a), y, T(B, b1, z, b2)))) + | fixupZip (RIGHT(color, T(B, T(R, a1, z, a2), x, b), y, p), t) = + (false, zip (p, T(color, T(B, a1, z, a2), x, T(B, b, y, t)))) + (* case 2 from CLR; note that "a" and "b" are guaranteed to be black, since we did + * not match cases 3 or 4. + *) + | fixupZip (LEFT(R, x, T(B, a, y, b), p), t) = + (false, zip (p, T(B, t, x, T(R, a, y, b)))) + | fixupZip (LEFT(B, x, T(B, a, y, b), p), t) = + fixupZip (p, T(B, t, x, T(R, a, y, b))) + | fixupZip (RIGHT(R, T(B, a, x, b), y, p), t) = + (false, zip (p, T(B, T(R, a, x, b), y, t))) + | fixupZip (RIGHT(B, T(B, a, x, b), y, p), t) = + fixupZip (p, T(B, T(R, a, x, b), y, t)) + (* push deficit up the tree by recoloring a black node as red *) + | fixupZip (LEFT(_, y, E, p), t) = fixupZip (p, T(R, t, y, E)) + | fixupZip (RIGHT(_, E, y, p), t) = fixupZip (p, T(R, E, y, t)) + (* impossible cases that violate the red invariant *) + | fixupZip _ = raise Fail "Red invariant violation" + (* delete the minimum value from a non-empty tree, returning a triple + * (elem, bd, tr), where elem is the minimum element, tr is the residual + * tree with elem removed, and bd is true if tr has a black-depth that is + * less than the original tree. + *) + fun delMin (T(R, E, y, b), p) = + (* replace the node by its right subtree (which must be E) *) + (y, false, zip(p, b)) + | delMin (T(B, E, y, T(R, a', y', b')), p) = + (* replace the node with its right child, while recoloring the child black to + * preserve the black invariant. + *) + (y, false, zip (p, T(B, a', y', b'))) + | delMin (T(B, E, y, E), p) = let + (* delete the node, which reduces the black-depth by one, so we attempt to fix + * the deficit on the path back. + *) + val (blkDeficit, t) = fixupZip (p, E) + in + (y, blkDeficit, t) + end + | delMin (T(color, a, y, b), z) = delMin(a, LEFT(color, y, b, z)) + | delMin (E, _) = raise Match + fun del (E, z) = raise LibBase.NotFound + | del (T(color, a, y, b), p) = (case K.compare(k, y) + of LESS => del (a, LEFT(color, y, b, p)) + | EQUAL => (case (color, a, b) + of (R, E, E) => zip(p, E) + | (B, E, E) => #2 (fixupZip (p, E)) + | (_, T(_, a', y', b'), E) => + (* node is black and left child is red; we replace the node with its + * left child recolored to black. + *) + zip(p, T(B, a', y', b')) + | (_, E, T(_, a', y', b')) => + (* node is black and right child is red; we replace the node with its + * right child recolored to black. + *) + zip(p, T(B, a', y', b')) + | _ => let + val (minSucc, blkDeficit, b) = delMin (b, TOP) + in + if blkDeficit + then #2 (fixupZip (RIGHT(color, a, minSucc, p), b)) + else zip (p, T(color, a, minSucc, b)) + end + (* end case *)) + | GREATER => del (b, RIGHT(color, a, y, p)) + (* end case *)) + in + case del(t, TOP) + of T(R, a, x, b) => SET(nItems-1, T(B, a, x, b)) + | t => SET(nItems-1, t) + (* end case *) + end + end (* local *) + + (* Return true if and only if item is an element in the set *) + fun member (SET(_, t), k) = let + fun find' E = false + | find' (T(_, a, y, b)) = (case K.compare(k, y) + of LESS => find' a + | EQUAL => true + | GREATER => find' b + (* end case *)) + in + find' t + end + + (* Return the number of items in the map *) + fun numItems (SET(n, _)) = n + + fun foldl f = let + fun foldf (E, accum) = accum + | foldf (T(_, a, x, b), accum) = + foldf(b, f(x, foldf(a, accum))) + in + fn init => fn (SET(_, m)) => foldf(m, init) + end + + fun foldr f = let + fun foldf (E, accum) = accum + | foldf (T(_, a, x, b), accum) = + foldf(a, f(x, foldf(b, accum))) + in + fn init => fn (SET(_, m)) => foldf(m, init) + end + + (* return an ordered list of the items in the set. *) + fun toList s = foldr (fn (x, l) => x::l) [] s + + (* functions for walking the tree while keeping a stack of parents + * to be visited. + *) + fun next ((t as T(_, _, _, b))::rest) = (t, left(b, rest)) + | next _ = (E, []) + and left (E, rest) = rest + | left (t as T(_, a, _, _), rest) = left(a, t::rest) + fun start m = left(m, []) + + (* Return true if and only if the two sets are equal *) + fun equal (SET(_, s1), SET(_, s2)) = let + fun cmp (t1, t2) = (case (next t1, next t2) + of ((E, _), (E, _)) => true + | ((E, _), _) => false + | (_, (E, _)) => false + | ((T(_, _, x, _), r1), (T(_, _, y, _), r2)) => ( + case Key.compare(x, y) + of EQUAL => cmp (r1, r2) + | _ => false + (* end case *)) + (* end case *)) + in + cmp (start s1, start s2) + end + + (* Return the lexical order of two sets *) + fun compare (SET(_, s1), SET(_, s2)) = let + fun cmp (t1, t2) = (case (next t1, next t2) + of ((E, _), (E, _)) => EQUAL + | ((E, _), _) => LESS + | (_, (E, _)) => GREATER + | ((T(_, _, x, _), r1), (T(_, _, y, _), r2)) => ( + case Key.compare(x, y) + of EQUAL => cmp (r1, r2) + | order => order + (* end case *)) + (* end case *)) + in + cmp (start s1, start s2) + end + + (* Return true if and only if the first set is a subset of the second *) + fun isSubset (SET(_, s1), SET(_, s2)) = let + fun cmp (t1, t2) = (case (next t1, next t2) + of ((E, _), (E, _)) => true + | ((E, _), _) => true + | (_, (E, _)) => false + | ((T(_, _, x, _), r1), (T(_, _, y, _), r2)) => ( + case Key.compare(x, y) + of LESS => false + | EQUAL => cmp (r1, r2) + | GREATER => cmp (t1, r2) + (* end case *)) + (* end case *)) + in + cmp (start s1, start s2) + end + + (* Return true if the two sets are disjoint *) + fun disjoint (SET(0, _), _) = true + | disjoint (_, SET(0, _)) = true + | disjoint (SET(_, s1), SET(_, s2)) = let + fun walk ((E, _), _) = true + | walk (_, (E, _)) = true + | walk (t1 as (T(_, _, x, _), r1), t2 as (T(_, _, y, _), r2)) = ( + case Key.compare(x, y) + of LESS => walk (next r1, t2) + | EQUAL => false + | GREATER => walk (t1, next r2) + (* end case *)) + in + walk (next (start s1), next (start s2)) + end + + (* support for constructing red-black trees in linear time from increasing + * ordered sequences (based on a description by R. Hinze). Note that the + * elements in the digits are ordered with the largest on the left, whereas + * the elements of the trees are ordered with the largest on the right. + *) + datatype digit + = ZERO + | ONE of (item * tree * digit) + | TWO of (item * tree * item * tree * digit) + (* add an item that is guaranteed to be larger than any in l *) + fun addItem (a, l) = let + fun incr (a, t, ZERO) = ONE(a, t, ZERO) + | incr (a1, t1, ONE(a2, t2, r)) = TWO(a1, t1, a2, t2, r) + | incr (a1, t1, TWO(a2, t2, a3, t3, r)) = + ONE(a1, t1, incr(a2, T(B, t3, a3, t2), r)) + in + incr(a, E, l) + end + (* link the digits into a tree *) + fun linkAll t = let + fun link (t, ZERO) = t + | link (t1, ONE(a, t2, r)) = link(T(B, t2, a, t1), r) + | link (t, TWO(a1, t1, a2, t2, r)) = + link(T(B, T(R, t2, a2, t1), a1, t), r) + in + link (E, t) + end + + (* create a set from a list of items; this function works in linear time if the list + * is in increasing order. + *) + fun fromList [] = empty + | fromList (first::rest) = let + fun add (prev, x::xs, n, accum) = (case Key.compare(prev, x) + of LESS => add(x, xs, n+1, addItem(x, accum)) + | _ => (* list not in order, so fall back to addList code *) + addList(SET(n, linkAll accum), x::xs) + (* end case *)) + | add (_, [], n, accum) = SET(n, linkAll accum) + in + add (first, rest, 1, addItem(first, ZERO)) + end + + (* return the union of the two sets *) + fun union (SET(_, s1), SET(_, s2)) = let + fun ins ((E, _), n, result) = (n, result) + | ins ((T(_, _, x, _), r), n, result) = + ins(next r, n+1, addItem(x, result)) + fun union' (t1, t2, n, result) = (case (next t1, next t2) + of ((E, _), (E, _)) => (n, result) + | ((E, _), t2) => ins(t2, n, result) + | (t1, (E, _)) => ins(t1, n, result) + | ((T(_, _, x, _), r1), (T(_, _, y, _), r2)) => ( + case Key.compare(x, y) + of LESS => union' (r1, t2, n+1, addItem(x, result)) + | EQUAL => union' (r1, r2, n+1, addItem(x, result)) + | GREATER => union' (t1, r2, n+1, addItem(y, result)) + (* end case *)) + (* end case *)) + val (n, result) = union' (start s1, start s2, 0, ZERO) + in + SET(n, linkAll result) + end + + (* return the intersection of the two sets *) + fun intersection (SET(_, s1), SET(_, s2)) = let + fun intersect (t1, t2, n, result) = (case (next t1, next t2) + of ((T(_, _, x, _), r1), (T(_, _, y, _), r2)) => ( + case Key.compare(x, y) + of LESS => intersect (r1, t2, n, result) + | EQUAL => intersect (r1, r2, n+1, addItem(x, result)) + | GREATER => intersect (t1, r2, n, result) + (* end case *)) + | _ => (n, result) + (* end case *)) + val (n, result) = intersect (start s1, start s2, 0, ZERO) + in + SET(n, linkAll result) + end + + (* return the set difference *) + fun difference (SET(_, s1), SET(_, s2)) = let + fun ins ((E, _), n, result) = (n, result) + | ins ((T(_, _, x, _), r), n, result) = + ins(next r, n+1, addItem(x, result)) + fun diff (t1, t2, n, result) = (case (next t1, next t2) + of ((E, _), _) => (n, result) + | (t1, (E, _)) => ins(t1, n, result) + | ((T(_, _, x, _), r1), (T(_, _, y, _), r2)) => ( + case Key.compare(x, y) + of LESS => diff (r1, t2, n+1, addItem(x, result)) + | EQUAL => diff (r1, r2, n, result) + | GREATER => diff (t1, r2, n, result) + (* end case *)) + (* end case *)) + val (n, result) = diff (start s1, start s2, 0, ZERO) + in + SET(n, linkAll result) + end + + fun subtract (s, item) = difference (s, singleton item) + fun subtract' (item, s) = subtract (s, item) + + fun subtractList (l, items) = let + val items' = List.foldl (fn (x, set) => add(set, x)) (SET(0, E)) items + in + difference (l, items') + end + + fun app f = let + fun appf E = () + | appf (T(_, a, x, b)) = (appf a; f x; appf b) + in + fn (SET(_, m)) => appf m + end + + fun map f = let + fun addf (x, m) = add(m, f x) + in + foldl addf empty + end + + fun mapPartial f = let + fun f' (x, acc) = (case f x of SOME x' => add(acc, x') | NONE => acc) + in + foldl f' empty + end + + (* Filter out those elements of the set that do not satisfy the + * predicate. The filtering is done in increasing map order. + *) + fun filter pred (SET(_, t)) = let + fun walk (E, n, result) = (n, result) + | walk (T(_, a, x, b), n, result) = let + val (n, result) = walk(a, n, result) + in + if (pred x) + then walk(b, n+1, addItem(x, result)) + else walk(b, n, result) + end + val (n, result) = walk (t, 0, ZERO) + in + SET(n, linkAll result) + end + + fun partition pred (SET(_, t)) = let + fun walk (E, n1, result1, n2, result2) = (n1, result1, n2, result2) + | walk (T(_, a, x, b), n1, result1, n2, result2) = let + val (n1, result1, n2, result2) = walk(a, n1, result1, n2, result2) + in + if (pred x) + then walk(b, n1+1, addItem(x, result1), n2, result2) + else walk(b, n1, result1, n2+1, addItem(x, result2)) + end + val (n1, result1, n2, result2) = walk (t, 0, ZERO, 0, ZERO) + in + (SET(n1, linkAll result1), SET(n2, linkAll result2)) + end + + fun exists pred = let + fun test E = false + | test (T(_, a, x, b)) = test a orelse pred x orelse test b + in + fn (SET(_, t)) => test t + end + + fun all pred = let + fun test E = true + | test (T(_, a, x, b)) = test a andalso pred x andalso test b + in + fn (SET(_, t)) => test t + end + + fun find pred = let + fun test E = NONE + | test (T(_, a, x, b)) = (case test a + of NONE => if pred x then SOME x else test b + | someItem => someItem + (* end case *)) + in + fn (SET(_, t)) => test t + end + + (* DEPRECATED FUNCTIONS *) + val listItems = toList + + end; diff --git a/smlnj-lib/Util/scan-sig.sml b/smlnj-lib/Util/scan-sig.sml new file mode 100644 index 0000000..9c414ea --- /dev/null +++ b/smlnj-lib/Util/scan-sig.sml @@ -0,0 +1,37 @@ +(* scan-sig.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * C-style conversions from string representations. + * + * TODO: replace the fmt_item type with a datatype that reflects the more + * limited set of possible results. Also replace the shared implementation + * of format-string processing with processing that is specific to scanning. + *) + +signature SCAN = + sig + + datatype fmt_item + = ATOM of Atom.atom + | LINT of LargeInt.int + | INT of Int.int + | LWORD of LargeWord.word + | WORD of Word.word + | WORD8 of Word8.word + | BOOL of bool + | CHR of char + | STR of string + | REAL of Real.real + | LREAL of LargeReal.real + | LEFT of (int * fmt_item) (* left justify in field of given width *) + | RIGHT of (int * fmt_item) (* right justify in field of given width *) + + exception BadFormat (* bad format string *) + + val sscanf : string -> string -> fmt_item list option + val scanf : string -> (char, 'a) StringCvt.reader + -> (fmt_item list, 'a) StringCvt.reader + + end (* SCAN *) diff --git a/smlnj-lib/Util/scan.sml b/smlnj-lib/Util/scan.sml new file mode 100644 index 0000000..d1a64ac --- /dev/null +++ b/smlnj-lib/Util/scan.sml @@ -0,0 +1,164 @@ +(* scan.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * C-style conversions from string representations. + * + * TODO: replace the fmt_item type with a datatype that reflects the more + * limited set of possible results. Also replace the shared implementation + * of format-string processing with processing that is specific to scanning. + *) + +structure Scan : SCAN = + struct + + structure SS = Substring + structure SC = StringCvt + + open FmtFields + + (* character sets *) + abstype charset = CS of Word8Array.array + with + fun mkCharSet () = CS(Word8Array.array(Char.maxOrd+1, 0w0)) + fun addChar (CS ba, c) = Word8Array.update(ba, Char.ord c, 0w1) + fun addRange (CS ba, c1, c2) = let + val ord_c2 = Char.ord c2 + fun add i = if (i <= ord_c2) + then (Word8Array.update(ba, i, 0w1); add(i+1)) + else () + in + if (c1 <= c2) then (add(Char.ord c1)) else raise BadFormat + end + fun inSet (CS ba) arg = (Word8Array.sub(ba, Char.ord arg) = 0w1) + fun notInSet (CS ba) arg = (Word8Array.sub(ba, Char.ord arg) = 0w0) + end + + fun scanCharSet fmtStr = let + val cset = mkCharSet() + val (isNot, fmtStr) = (case SS.getc fmtStr + of (SOME(#"^", ss)) => (true, ss) + | _ => (false, fmtStr) + (* end case *)) + fun scan (nextChar, ss) = (case (SS.getc ss) + of (SOME(#"-", ss)) => (case (SS.getc ss) + of (SOME(#"]", ss)) => ( + addChar(cset, nextChar); + addChar(cset, #"-"); + ss) + | (SOME(c, ss)) => ( + addRange(cset, nextChar, c); + scanNext ss) + | NONE => raise BadFormat + (* end case *)) + | (SOME(#"]", ss)) => (addChar(cset, nextChar); ss) + | (SOME(c, ss)) => (addChar(cset, nextChar); scan(c, ss)) + | NONE => raise BadFormat + (* end case *)) + and scanNext ss = (case (SS.getc ss) + of (SOME(#"-", ss)) => raise BadFormat + | (SOME(#"]", ss)) => ss + | (SOME(c, ss)) => scan(c, ss) + | NONE => raise BadFormat + (* end case *)) + and scanChar (SOME arg) = scan arg + | scanChar NONE = raise BadFormat + val fmtStr = scanChar (SS.getc fmtStr) + in + if isNot + then (CharSet(notInSet cset), fmtStr) + else (CharSet(inSet cset), fmtStr) + end + + fun compileScanFormat str = let + val split = SS.splitl (Char.notContains "\n\t %[") + fun scan (ss, l) = + if (SS.isEmpty ss) + then rev l + else let val (ss1, ss2) = split ss + in + case (SS.getc ss2) + of (SOME(#"%", ss')) => let val (field, ss3) = scanField ss' + in + scan(ss3, field :: (Raw ss1) :: l) + end + | (SOME(#"[", ss')) => let val (cs, ss3) = scanCharSet ss' + in + scan (ss3, cs :: (Raw ss1) :: l) + end + | (SOME(_, ss')) => + scan (SS.dropl Char.isSpace ss', (Raw ss1) :: l) + | NONE => rev((Raw ss1)::l) + (* end case *) + end + in + scan (SS.full str, []) + end + +(** NOTE: for the time being, this function ignores flags and field width **) + fun scanf fmt getc strm = let + val fmts = compileScanFormat fmt + val skipWS = SC.dropl Char.isSpace getc + fun scan (strm, [], items) = SOME(rev items, strm) + | scan (strm, (Raw ss)::rf, items) = let + fun match (strm, ss) = (case (getc strm, SS.getc ss) + of (SOME(c', strm'), SOME(c, ss)) => + if (c' = c) then match (strm', ss) else NONE + | (_, NONE) => scan (strm, rf, items) + | _ => NONE + (* end case *)) + in + match (skipWS strm, ss) + end + | scan (strm, (CharSet pred)::rf, items) = let + fun scanSet strm = (case (getc strm) + of (SOME(c, strm')) => + if (pred c) then scanSet strm' else strm + | NONE => strm + (* end case *)) + in + scan (scanSet strm, rf, items) + end + | scan (strm, Field(flags, wid, ty)::rf, items) = let + val strm = skipWS strm + fun next (con, SOME(x, strm')) = scan (strm', rf, con(x)::items) + | next _ = NONE + fun getInt fmt = if (#large flags) + then next(LINT, LargeInt.scan fmt getc strm) + else next(INT, Int.scan fmt getc strm) + in + case ty + of OctalField => getInt SC.OCT + | IntField => getInt SC.DEC + | HexField => getInt SC.HEX + | CapHexField => getInt SC.HEX + | CharField => next(CHR, getc strm) + | BoolField => next(BOOL, Bool.scan getc strm) +(* QUESTION: should we use the precision? *) + | StrField prec => let + val notSpace = not o Char.isSpace + val pred = (case wid + of NoPad => notSpace + | (Wid n) => let val cnt = ref n + in + fn c => (case !cnt + of 0 => false + | n => (cnt := n-1; notSpace c) + (* end case *)) + end + (* end case *)) + val (s, strm) = SC.splitl pred getc strm + in + scan (strm, rf, STR s :: items) + end + | (RealField _) => next(REAL, LargeReal.scan getc strm) + (* end case *) + end + in + scan(strm, fmts, []) + end (* scanf *) + + fun sscanf fmt = SC.scanString (scanf fmt) + + end (* Scan *) diff --git a/smlnj-lib/Util/simple-uref.sml b/smlnj-lib/Util/simple-uref.sml new file mode 100644 index 0000000..20d96c4 --- /dev/null +++ b/smlnj-lib/Util/simple-uref.sml @@ -0,0 +1,58 @@ +(* simple-uref.sml + * + * UNIONFIND DATA STRUCTURE WITH PATH COMPRESSION + * + * Author: + * Fritz Henglein + * DIKU, University of Copenhagen + * henglein@diku.dk + *) + +structure SimpleURef : UREF = + struct + + exception UnionFind of string + + datatype 'a urefC + = ECR of 'a + | PTR of 'a uref + withtype 'a uref = 'a urefC ref + + fun find (p as ref(ECR _)) = p + | find (p as ref(PTR p')) = let + val p'' = find p' + in + p := PTR p''; p'' + end + + fun uRef x = ref (ECR x) + + fun !! p = (case !(find p) + of ECR x => x + | _ => raise Match + (* end case *)) + + fun equal (p, p') = (find p = find p') + + fun update (p, x) = let val p' = find p + in + p' := ECR x + end + + fun link (p, q) = let + val p' = find p + val q' = find q + in + if p' = q' then false else (p' := PTR q'; true) + end + + val union = link + + fun unify f (p, q) = let + val v = f(!!p, !!q) + in + union (p, q) before update (q, v) + end + + end (* SimpleURef *) + diff --git a/smlnj-lib/Util/smlnj-lib.cm b/smlnj-lib/Util/smlnj-lib.cm new file mode 100644 index 0000000..3ea4178 --- /dev/null +++ b/smlnj-lib/Util/smlnj-lib.cm @@ -0,0 +1,254 @@ +(* smlnj-lib.cm + * + * COPYRIGHT (c) 2011 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * The sources specification for the SML/NJ Utility library; part of the + * SML/NJ Library suite. + *) + +Library + +signature ARRAY_SORT +signature ATOM +signature BASE64 +signature BIT_ARRAY +signature CHAR_MAP +signature DYNAMIC_ARRAY +signature FIFO +signature FORMAT +signature FORMAT_COMB +signature GRAPH_SCC +signature HASH_KEY +signature HASH_TABLE +signature INTERVAL_DOMAIN +signature INTERVAL_SET +signature IO_UTIL +signature GET_OPT +signature LIB_BASE +signature LIST_FORMAT +signature LIST_SORT +signature LIST_XPROD +signature MONO_ARRAY_SORT +signature MONO_DYNAMIC_ARRAY +signature MONO_HASH_SET +signature MONO_HASH_TABLE +signature MONO_HASH2_TABLE +signature MONO_PRIORITYQ +signature ORD_KEY +signature ORD_MAP +signature ORD_SET +signature PARSER_COMB +signature PATH_UTIL +signature PRIORITY +signature PROP_LIST +signature QUEUE +signature RAND +signature RANDOM +signature SCAN +signature SPLAY_TREE +signature UREF +signature UTF8 + +structure ANSITerm +structure ArrayQSort +structure Atom +structure AtomBinaryMap (* to be removed *) +structure AtomBinarySet (* to be removed *) +structure AtomRedBlackMap +structure AtomRedBlackSet +structure AtomMap +structure AtomSet +structure AtomTable +structure Base64 +structure BitArray +structure CharMap +structure DynamicArray +structure EditDistance +structure Fifo +structure FNVHash +structure Format +structure FormatComb +structure HashString +structure HashTable +structure IntBinaryMap (* to be removed *) +structure IntBinarySet (* to be removed *) +structure IntHashTable +structure IntListMap +structure IntListSet +structure IntRedBlackMap +structure IntRedBlackSet +structure IOUtil +structure GetOpt +structure LibBase +structure ListFormat +structure ListMergeSort +structure ListXProd +structure Random +structure NativeInt +structure NativeWord +structure ParserComb +structure PathUtil +structure PrimeSizes +structure PropList +structure Queue +structure Rand +structure Random +structure Scan +structure SimpleURef +structure TimeLimit +structure URef +structure WordHashTable +structure WordRedBlackMap +structure WordRedBlackSet +structure RealOrderStats +structure UnivariateStats +structure UTF8 + +functor ArrayQSortFn +functor BSearchFn +functor BinaryMapFn +functor BinarySetFn +functor DynamicArrayFn +functor GraphSCCFn +functor HashSetFn +functor HashTableFn +functor Hash2TableFn +functor IntervalSetFn +functor KeywordFn +functor LeftPriorityQFn +functor ListMapFn +functor ListSetFn +functor MonoArrayFn +functor RedBlackMapFn +functor RedBlackSetFn +functor SplayMapFn +functor SplaySetFn + +is + +$/basis.cm + +ansi-term.sml +array-qsort-fn.sml +array-qsort.sml +array-sort-sig.sml +atom-sig.sml +atom-binary-map.sml +atom-binary-set.sml +atom-redblack-map.sml +atom-redblack-set.sml +atom-map.sml +atom-set.sml +atom-table.sml +atom.sml +base64-sig.sml +base64.sml +binary-map-fn.sml +binary-set-fn.sml +bit-array-sig.sml +bit-array.sml +bsearch-fn.sml +char-map-sig.sml +char-map.sml +dynamic-array-sig.sml +dynamic-array.sml +dynamic-array-fn.sml +edit-distance.sml +fifo-sig.sml +fifo.sml +fmt-fields.sml +fnv-hash.sml +format-sig.sml +format.sml +format-comb-sig.sml +format-comb.sml +graph-scc-sig.sml +graph-scc-fn.sml +hash-key-sig.sml +hash-string.sml +hash-set-fn.sml +hash-table-rep.sml +hash-table-sig.sml +hash-table.sml +hash-table-fn.sml +hash2-table-fn.sml +keyword-fn.sml +int-binary-map.sml +int-binary-set.sml +int-hash-table.sml +int-list-map.sml +int-list-set.sml +int-redblack-map.sml +int-redblack-set.sml +interval-domain-sig.sml +interval-set-fn.sml +interval-set-sig.sml +io-util-sig.sml +io-util.sml +getopt-sig.sml +getopt.sml +left-priorityq-fn.sml +lib-base-sig.sml +lib-base.sml +list-format-sig.sml +list-format.sml +list-map-fn.sml +list-mergesort.sml +list-set-fn.sml +list-xprod-sig.sml +list-xprod.sml +listsort-sig.sml +max-hash-table-size.sml (* internal *) +mono-array-fn.sml +mono-array-sort-sig.sml +mono-dynamic-array-sig.sml +mono-hash-set-sig.sml +mono-hash-table-sig.sml +mono-hash2-table-sig.sml +mono-priorityq-sig.sml +ord-key-sig.sml +ord-map-sig.sml +ord-set-sig.sml +parser-comb-sig.sml +parser-comb.sml +path-util-sig.sml +path-util.sml +plist-sig.sml +plist.sml +priority-sig.sml +queue-sig.sml +queue.sml +rand-sig.sml +rand.sml +random-sig.sml +real-format.sml +redblack-map-fn.sml +redblack-set-fn.sml +scan-sig.sml +scan.sml +simple-uref.sml +splay-map-fn.sml +splay-set-fn.sml +splaytree-sig.sml +splaytree.sml +time-limit.sml +uref-sig.sml +uref.sml +word-hash-table.sml +word-redblack-map.sml +word-redblack-set.sml +real-order-stats.sml +univariate-stats.sml +utf8-sig.sml +utf8.sml + +#if defined(SIZE_32) + Target32Bit/native.sml + Target32Bit/random.sml + Target32Bit/prime-sizes.sml +#else + Target64Bit/native.sml + Target64Bit/random.sml + Target64Bit/prime-sizes.sml +#endif diff --git a/smlnj-lib/Util/splay-map-fn.sml b/smlnj-lib/Util/splay-map-fn.sml new file mode 100644 index 0000000..38eef7a --- /dev/null +++ b/smlnj-lib/Util/splay-map-fn.sml @@ -0,0 +1,494 @@ +(* splay-map-fn.sml + * + * COPYRIGHT (c) 2012 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Functor implementing dictionaries using splay trees. + * + *) + +functor SplayMapFn (K : ORD_KEY) :> ORD_MAP where type Key.ord_key = K.ord_key = + struct + structure Key = K + open SplayTree + + datatype 'a map + = EMPTY + | MAP of { + root : (K.ord_key * 'a) splay ref, + nobj : int + } + + fun cmpf k (k', _) = K.compare(k',k) + + val empty = EMPTY + + fun isEmpty EMPTY = true + | isEmpty _ = false + + (* return the first item in the map (or NONE if it is empty) *) + fun first EMPTY = NONE + | first (MAP{root, ...}) = let + fun f (SplayObj{value=(_, value), left=SplayNil, ...}) = SOME value + | f (SplayObj{left, ...}) = f left + | f SplayNil = raise Fail "SplayMapFn.first" + in + f (!root) + end + + (* return the first item in the map and its key (or NONE if it is empty) *) + fun firsti EMPTY = NONE + | firsti (MAP{root, ...}) = let + fun f (SplayObj{value=(key, value), left=SplayNil, ...}) = SOME(key, value) + | f (SplayObj{left, ...}) = f left + | f SplayNil = raise Fail "SplayMapFn.firsti" + in + f (!root) + end + + fun singleton (key, v) = + MAP{nobj=1,root=ref(SplayObj{value=(key,v),left=SplayNil,right=SplayNil})} + + (* Insert an item. *) + fun insert (EMPTY,key,v) = + MAP{nobj=1,root=ref(SplayObj{value=(key,v),left=SplayNil,right=SplayNil})} + | insert (MAP{root,nobj},key,v) = + case splay (cmpf key, !root) of + (EQUAL,SplayObj{value,left,right}) => + MAP{nobj=nobj,root=ref(SplayObj{value=(key,v),left=left,right=right})} + | (LESS,SplayObj{value,left,right}) => + MAP{ + nobj=nobj+1, + root=ref(SplayObj{value=(key,v),left=SplayObj{value=value,left=left,right=SplayNil},right=right}) + } + | (GREATER,SplayObj{value,left,right}) => + MAP{ + nobj=nobj+1, + root=ref(SplayObj{ + value=(key,v), + left=left, + right=SplayObj{value=value,left=SplayNil,right=right} + }) + } + | (_,SplayNil) => raise LibBase.Impossible "SplayMapFn.insert SplayNil" + fun insert' ((k, x), m) = insert(m, k, x) + + fun insertWithi comb (m, key, v) = let + fun insert EMPTY = + MAP{nobj=1,root=ref(SplayObj{value=(key,v),left=SplayNil,right=SplayNil})} + | insert (MAP{root,nobj}) = (case splay (cmpf key, !root) + of (EQUAL, SplayObj{value,left,right}) => let + val v' = (key, comb(key, #2 value, v)) + in + MAP{ + nobj=nobj, + root=ref(SplayObj{value=v', left=left, right=right}) + } + end + | (LESS,SplayObj{value,left,right}) => MAP{ + nobj=nobj+1, + root=ref(SplayObj{value=(key,v),left=SplayObj{value=value,left=left,right=SplayNil},right=right}) + } + | (GREATER,SplayObj{value,left,right}) => MAP{ + nobj=nobj+1, + root=ref(SplayObj{ + value=(key,v), + left=left, + right=SplayObj{value=value,left=SplayNil,right=right} + }) + } + | (_,SplayNil) => raise LibBase.Impossible "SplayMapFn.insert SplayNil" + (* end case *)) + in + insert m + end + fun insertWith comb = insertWithi (fn (_, x1, x2) => comb(x1, x2)) + + fun inDomain (EMPTY, _) = false + | inDomain (MAP{root,nobj}, key) = (case splay (cmpf key, !root) + of (EQUAL, r as SplayObj{value,...}) => (root := r; true) + | (_, r) => (root := r; false) + (* end case *)) + + (* Look for an item, return NONE if the item doesn't exist *) + fun find (EMPTY,_) = NONE + | find (MAP{root,nobj},key) = (case splay (cmpf key, !root) + of (EQUAL, r as SplayObj{value,...}) => (root := r; SOME(#2 value)) + | (_, r) => (root := r; NONE) + (* end case *)) + + (* Look for an item, raise NotFound if the item doesn't exist *) + fun lookup (EMPTY,_) = raise LibBase.NotFound + | lookup (MAP{root,nobj},key) = (case splay (cmpf key, !root) + of (EQUAL, r as SplayObj{value,...}) => (root := r; #2 value) + | (_, r) => (root := r; raise LibBase.NotFound) + (* end case *)) + + (* Remove an item. + * Raise LibBase.NotFound if not found + *) + fun remove (EMPTY, _) = raise LibBase.NotFound + | remove (MAP{root,nobj}, key) = (case (splay (cmpf key, !root)) + of (EQUAL, SplayObj{value, left, right}) => + if nobj = 1 + then (EMPTY, #2 value) + else (MAP{root=ref(join(left,right)),nobj=nobj-1}, #2 value) + | (_,r) => (root := r; raise LibBase.NotFound) + (* end case *)) + fun findAndRemove arg = SOME(remove arg) handle LibBase.NotFound => NONE + + (* Return the number of items in the table *) + fun numItems EMPTY = 0 + | numItems (MAP{nobj,...}) = nobj + + (* Return a list of the items (and their keys) in the dictionary *) + fun listItems EMPTY = [] + | listItems (MAP{root,...}) = let + fun apply (SplayNil, l) = l + | apply (SplayObj{value=(_, v), left, right}, l) = + apply(left, v::(apply (right,l))) + in + apply (!root, []) + end + fun listItemsi EMPTY = [] + | listItemsi (MAP{root,...}) = let + fun apply (SplayNil,l) = l + | apply (SplayObj{value,left,right},l) = + apply(left, value::(apply (right,l))) + in + apply (!root,[]) + end + + fun listKeys EMPTY = [] + | listKeys (MAP{root,...}) = let + fun apply (SplayNil, l) = l + | apply (SplayObj{value=(key, _),left,right},l) = + apply(left, key::(apply (right,l))) + in + apply (!root, []) + end + + local + fun next ((t as SplayObj{right, ...})::rest) = (t, left(right, rest)) + | next _ = (SplayNil, []) + and left (SplayNil, rest) = rest + | left (t as SplayObj{left=l, ...}, rest) = left(l, t::rest) + fun start s = left(!s, []) + in + fun equiv rngEq (MAP{root=s1, nobj=n1}, MAP{root=s2, nobj=n2}) = let + fun cmp (t1, t2) = (case (next t1, next t2) + of ((SplayObj{value=(xk, x), ...}, r1), (SplayObj{value=(yk, y), ...}, r2)) => ( + case Key.compare(xk, yk) + of EQUAL => rngEq (x, y) andalso cmp (r1, r2) + | _ => false + (* end case *)) + | ((SplayNil, _), (SplayNil, _)) => true + | _ => false + (* end case *)) + in + (n1 = n2) andalso cmp(start s1, start s2) + end + | equiv _ (EMPTY, EMPTY) = true + | equiv _ _ = false + + fun collate rngCmp (EMPTY, EMPTY) = EQUAL + | collate rngCmp (EMPTY, _) = LESS + | collate rngCmp (_, EMPTY) = GREATER + | collate rngCmp (MAP{root=s1, ...}, MAP{root=s2, ...}) = let + fun cmp (t1, t2) = (case (next t1, next t2) + of ((SplayNil, _), (SplayNil, _)) => EQUAL + | ((SplayNil, _), _) => LESS + | (_, (SplayNil, _)) => GREATER + | ((SplayObj{value=(xk, x), ...}, r1), (SplayObj{value=(yk, y), ...}, r2)) => ( + case Key.compare(xk, yk) + of EQUAL => (case rngCmp (x, y) + of EQUAL => cmp (r1, r2) + | order => order + (* end case *)) + | order => order + (* end case *)) + (* end case *)) + in + cmp (start s1, start s2) + end + + fun extends rngEx (MAP{root=s1, nobj=n1}, MAP{root=s2, nobj=n2}) = let + fun cmp (t1, t2) = (case (next t1, next t2) + of ((SplayNil, _), (SplayNil, _)) => true + | ((SplayNil, _), _) => false (* domain of second map is bigger than first *) + | (_, (SplayNil, _)) => true + | ((SplayObj{value=(xk, x), ...}, r1), (SplayObj{value=(yk, y), ...}, r2)) => ( + case Key.compare(xk, yk) + of LESS => cmp (r1, t2) + | EQUAL => rngEx (x, y) andalso cmp (r1, r2) + | GREATER => false + (* end case *)) + (* end case *)) + in + (n1 >= n2) andalso cmp (start s1, start s2) + end + | extends _ (EMPTY, MAP _) = false + | extends _ _ = true + end (* local *) + + (* Apply a function to the entries of the dictionary *) + fun appi af EMPTY = () + | appi af (MAP{root,...}) = + let fun apply SplayNil = () + | apply (SplayObj{value,left,right}) = + (apply left; af value; apply right) + in + apply (!root) + end + + fun app af EMPTY = () + | app af (MAP{root,...}) = + let fun apply SplayNil = () + | apply (SplayObj{value=(_,value),left,right}) = + (apply left; af value; apply right) + in + apply (!root) + end +(* + fun revapp af (MAP{root,...}) = + let fun apply SplayNil = () + | apply (SplayObj{value,left,right}) = + (apply right; af value; apply left) + in + apply (!root) + end +*) + + (* Fold function *) + fun foldri (abf : K.ord_key * 'a * 'b -> 'b) b EMPTY = b + | foldri (abf : K.ord_key * 'a * 'b -> 'b) b (MAP{root,...}) = + let fun apply (SplayNil : (K.ord_key * 'a) splay, b) = b + | apply (SplayObj{value,left,right},b) = + apply(left,abf(#1 value,#2 value,apply(right,b))) + in + apply (!root,b) + end + + fun foldr (abf : 'a * 'b -> 'b) b EMPTY = b + | foldr (abf : 'a * 'b -> 'b) b (MAP{root,...}) = + let fun apply (SplayNil : (K.ord_key * 'a) splay, b) = b + | apply (SplayObj{value=(_,value),left,right},b) = + apply(left,abf(value,apply(right,b))) + in + apply (!root,b) + end + + fun foldli (abf : K.ord_key * 'a * 'b -> 'b) b EMPTY = b + | foldli (abf : K.ord_key * 'a * 'b -> 'b) b (MAP{root,...}) = + let fun apply (SplayNil : (K.ord_key * 'a) splay, b) = b + | apply (SplayObj{value,left,right},b) = + apply(right,abf(#1 value,#2 value,apply(left,b))) + in + apply (!root,b) + end + + fun foldl (abf : 'a * 'b -> 'b) b EMPTY = b + | foldl (abf : 'a * 'b -> 'b) b (MAP{root,...}) = + let fun apply (SplayNil : (K.ord_key * 'a) splay, b) = b + | apply (SplayObj{value=(_,value),left,right},b) = + apply(right,abf(value,apply(left,b))) + in + apply (!root,b) + end + + (* Map a table to a new table that has the same keys*) + fun mapi (af : K.ord_key * 'a -> 'b) EMPTY = EMPTY + | mapi (af : K.ord_key * 'a -> 'b) (MAP{root,nobj}) = + let fun ap (SplayNil : (K.ord_key * 'a) splay) = SplayNil + | ap (SplayObj{value,left,right}) = let + val left' = ap left + val value' = (#1 value, af value) + in + SplayObj{value = value', left = left', right = ap right} + end + in + MAP{root = ref(ap (!root)), nobj = nobj} + end + + fun map (af : 'a -> 'b) EMPTY = EMPTY + | map (af : 'a -> 'b) (MAP{root,nobj}) = + let fun ap (SplayNil : (K.ord_key * 'a) splay) = SplayNil + | ap (SplayObj{value,left,right}) = let + val left' = ap left + val value' = (#1 value, af (#2 value)) + in + SplayObj{value = value', left = left', right = ap right} + end + in + MAP{root = ref(ap (!root)), nobj = nobj} + end + +(* the following are generic implementations of the unionWith, intersectWith, + * and mergeWith operetions. These should be specialized for the internal + * representations at some point. + *) + fun unionWith f (m1, m2) = let + fun ins f (key, x, m) = (case find(m, key) + of NONE => insert(m, key, x) + | (SOME x') => insert(m, key, f(x, x')) + (* end case *)) + in + if (numItems m1 > numItems m2) + then foldli (ins (fn (a, b) => f(b, a))) m1 m2 + else foldli (ins f) m2 m1 + end + fun unionWithi f (m1, m2) = let + fun ins f (key, x, m) = (case find(m, key) + of NONE => insert(m, key, x) + | (SOME x') => insert(m, key, f(key, x, x')) + (* end case *)) + in + if (numItems m1 > numItems m2) + then foldli (ins (fn (k, a, b) => f(k, b, a))) m1 m2 + else foldli (ins f) m2 m1 + end + + fun intersectWith f (m1, m2) = let + (* iterate over the elements of m1, checking for membership in m2 *) + fun intersect f (m1, m2) = let + fun ins (key, x, m) = (case find(m2, key) + of NONE => m + | (SOME x') => insert(m, key, f(x, x')) + (* end case *)) + in + foldli ins empty m1 + end + in + if (numItems m1 > numItems m2) + then intersect f (m1, m2) + else intersect (fn (a, b) => f(b, a)) (m2, m1) + end + + fun intersectWithi f (m1, m2) = let + (* iterate over the elements of m1, checking for membership in m2 *) + fun intersect f (m1, m2) = let + fun ins (key, x, m) = (case find(m2, key) + of NONE => m + | (SOME x') => insert(m, key, f(key, x, x')) + (* end case *)) + in + foldli ins empty m1 + end + in + if (numItems m1 > numItems m2) + then intersect f (m1, m2) + else intersect (fn (k, a, b) => f(k, b, a)) (m2, m1) + end + + fun mergeWith f (m1, m2) = let + fun merge ([], [], m) = m + | merge ((k1, x1)::r1, [], m) = mergef (k1, SOME x1, NONE, r1, [], m) + | merge ([], (k2, x2)::r2, m) = mergef (k2, NONE, SOME x2, [], r2, m) + | merge (m1 as ((k1, x1)::r1), m2 as ((k2, x2)::r2), m) = ( + case Key.compare (k1, k2) + of LESS => mergef (k1, SOME x1, NONE, r1, m2, m) + | EQUAL => mergef (k1, SOME x1, SOME x2, r1, r2, m) + | GREATER => mergef (k2, NONE, SOME x2, m1, r2, m) + (* end case *)) + and mergef (k, x1, x2, r1, r2, m) = (case f (x1, x2) + of NONE => merge (r1, r2, m) + | SOME y => merge (r1, r2, insert(m, k, y)) + (* end case *)) + in + merge (listItemsi m1, listItemsi m2, empty) + end + + fun mergeWithi f (m1, m2) = let + fun merge ([], [], m) = m + | merge ((k1, x1)::r1, [], m) = mergef (k1, SOME x1, NONE, r1, [], m) + | merge ([], (k2, x2)::r2, m) = mergef (k2, NONE, SOME x2, [], r2, m) + | merge (m1 as ((k1, x1)::r1), m2 as ((k2, x2)::r2), m) = ( + case Key.compare (k1, k2) + of LESS => mergef (k1, SOME x1, NONE, r1, m2, m) + | EQUAL => mergef (k1, SOME x1, SOME x2, r1, r2, m) + | GREATER => mergef (k2, NONE, SOME x2, m1, r2, m) + (* end case *)) + and mergef (k, x1, x2, r1, r2, m) = (case f (k, x1, x2) + of NONE => merge (r1, r2, m) + | SOME y => merge (r1, r2, insert(m, k, y)) + (* end case *)) + in + merge (listItemsi m1, listItemsi m2, empty) + end + + (* this is a generic implementation of mapPartial. It should + * be specialized to the data-structure at some point. + *) + fun mapPartial f m = let + fun g (key, item, m) = (case f item + of NONE => m + | (SOME item') => insert(m, key, item') + (* end case *)) + in + foldli g empty m + end + fun mapPartiali f m = let + fun g (key, item, m) = (case f(key, item) + of NONE => m + | (SOME item') => insert(m, key, item') + (* end case *)) + in + foldli g empty m + end + + (* this is a generic implementation of filter. It should + * be specialized to the data-structure at some point. + *) + fun filter predFn m = let + fun f (key, item, m) = if predFn item + then insert(m, key, item) + else m + in + foldli f empty m + end + fun filteri predFn m = let + fun f (key, item, m) = if predFn(key, item) + then insert(m, key, item) + else m + in + foldli f empty m + end + + (* check the elements of a map with a predicate and return true if + * any element satisfies the predicate. Return false otherwise. + * Elements are checked in key order. + *) + fun exists pred = let + fun exists' SplayNil = false + | exists' (SplayObj{value=(_, x), left, right}) = + exists' left orelse pred x orelse exists' right + in + fn EMPTY => false | (MAP{root, ...}) => exists' (!root) + end + fun existsi pred = let + fun exists' SplayNil = false + | exists' (SplayObj{value, left, right}) = + exists' left orelse pred value orelse exists' right + in + fn EMPTY => false | (MAP{root, ...}) => exists' (!root) + end + + (* check the elements of a map with a predicate and return true if + * they all satisfy the predicate. Return false otherwise. Elements + * are checked in key order. + *) + fun all pred = let + fun all' SplayNil = true + | all' (SplayObj{value=(_, x), left, right}) = + all' left andalso pred x andalso all' right + in + fn EMPTY => true | (MAP{root, ...}) => all' (!root) + end + fun alli pred = let + fun all' SplayNil = true + | all' (SplayObj{value, left, right}) = all' left andalso pred value andalso all' right + in + fn EMPTY => true | (MAP{root, ...}) => all' (!root) + end + + end (* SplayDictFn *) diff --git a/smlnj-lib/Util/splay-set-fn.sml b/smlnj-lib/Util/splay-set-fn.sml new file mode 100644 index 0000000..202a426 --- /dev/null +++ b/smlnj-lib/Util/splay-set-fn.sml @@ -0,0 +1,406 @@ +(* splay-set-fn.sml + * + * COPYRIGHT (c) 2015 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Functor implementing ordered sets using splay trees. + * + *) + +functor SplaySetFn (K : ORD_KEY) :> ORD_SET where type Key.ord_key = K.ord_key = + struct + structure Key = K + open SplayTree + + type item = K.ord_key + + datatype set + = EMPTY + | SET of { + root : item splay ref, + nobj : int + } + + fun cmpf k = fn k' => K.compare(k',k) + + val empty = EMPTY + + fun singleton v = SET{root = ref(SplayObj{value=v,left=SplayNil,right=SplayNil}),nobj=1} + + (* Primitive insertion. + *) + fun insert (v,(nobj,root)) = + case splay (cmpf v, root) of + (EQUAL,SplayObj{value,left,right}) => + (nobj,SplayObj{value=v,left=left,right=right}) + | (LESS,SplayObj{value,left,right}) => + (nobj+1, + SplayObj{ + value=v, + left=SplayObj{value=value,left=left,right=SplayNil}, + right=right}) + | (GREATER,SplayObj{value,left,right}) => + (nobj+1, + SplayObj{ + value=v, + left=left, + right=SplayObj{value=value,left=SplayNil,right=right}}) + | (_,SplayNil) => (1,SplayObj{value=v,left=SplayNil,right=SplayNil}) + + (* Add an item. + *) + fun add (EMPTY,v) = singleton v + | add (SET{root,nobj},v) = let + val (cnt,t) = insert(v,(nobj,!root)) + in + SET{nobj=cnt,root=ref t} + end + fun add' (s, x) = add(x, s) + + (* Insert a list of items. + *) + fun addList (set,[]) = set + | addList (set,l) = let + val arg = case set of EMPTY => (0,SplayNil) + | SET{root,nobj} => (nobj,!root) + val (cnt,t) = List.foldl insert arg l + in + SET{nobj=cnt,root=ref t} + end + + fun fromList l = addList (empty, l) + + (* Remove an item. + * Raise LibBase.NotFound if not found + *) + fun delete (EMPTY,_) = raise LibBase.NotFound + | delete (SET{root,nobj},key) = + case splay (cmpf key, !root) of + (EQUAL,SplayObj{value,left,right}) => + if nobj = 1 then EMPTY + else SET{root=ref(join(left,right)),nobj=nobj-1} + | (_,r) => (root := r; raise LibBase.NotFound) + + (* return true if the item is in the set *) + fun member (EMPTY, key) = false + | member (SET{root,nobj}, key) = (case splay (cmpf key, !root) + of (EQUAL, r) => (root := r; true) + | (_, r) => (root := r; false) + (* end case *)) + + fun isEmpty EMPTY = true + | isEmpty _ = false + + fun minItem EMPTY = raise Empty + | minItem (SET{root, ...}) = let + fun min (SplayObj{value, left=SplayNil, ...}) = value + | min (SplayObj{left, ...}) = min left + | min SplayNil = raise Fail "impossible" + in + min (!root) + end + + fun maxItem EMPTY = raise Empty + | maxItem (SET{root, ...}) = let + fun max (SplayObj{value, right=SplayNil, ...}) = value + | max (SplayObj{right, ...}) = max right + | max SplayNil = raise Fail "impossible" + in + max (!root) + end + + local + fun member (x,tree) = let + fun mbr SplayNil = false + | mbr (SplayObj{value,left,right}) = + case K.compare(x,value) of + LESS => mbr left + | GREATER => mbr right + | _ => true + in mbr tree end + + (* true if every item in t is in t' *) + fun treeIn (t,t') = let + fun isIn SplayNil = true + | isIn (SplayObj{value,left=SplayNil,right=SplayNil}) = + member(value, t') + | isIn (SplayObj{value,left,right=SplayNil}) = + member(value, t') andalso isIn left + | isIn (SplayObj{value,left=SplayNil,right}) = + member(value, t') andalso isIn right + | isIn (SplayObj{value,left,right}) = + member(value, t') andalso isIn left andalso isIn right + in + isIn t + end + in + fun equal (SET{root=rt,nobj=n},SET{root=rt',nobj=n'}) = + (n=n') andalso treeIn (!rt,!rt') + | equal (EMPTY, EMPTY) = true + | equal _ = false + + fun isSubset (SET{root=rt,nobj=n},SET{root=rt',nobj=n'}) = + (n<=n') andalso treeIn (!rt,!rt') + | isSubset (EMPTY,_) = true + | isSubset _ = false + end + + local + fun next ((t as SplayObj{right, ...})::rest) = (t, left(right, rest)) + | next _ = (SplayNil, []) + and left (SplayNil, rest) = rest + | left (t as SplayObj{left=l, ...}, rest) = left(l, t::rest) + in + fun compare (EMPTY, EMPTY) = EQUAL + | compare (EMPTY, _) = LESS + | compare (_, EMPTY) = GREATER + | compare (SET{root=s1, ...}, SET{root=s2, ...}) = let + fun cmp (t1, t2) = (case (next t1, next t2) + of ((SplayNil, _), (SplayNil, _)) => EQUAL + | ((SplayNil, _), _) => LESS + | (_, (SplayNil, _)) => GREATER + | ((SplayObj{value=e1, ...}, r1), (SplayObj{value=e2, ...}, r2)) => ( + case Key.compare(e1, e2) + of EQUAL => cmp (r1, r2) + | order => order + (* end case *)) + (* end case *)) + in + cmp (left(!s1, []), left(!s2, [])) + end + + fun disjoint (EMPTY, _) = true + | disjoint (_, EMPTY) = true + | disjoint (SET{root=rt, ...}, SET{root=rt', ...}) = let + fun walk (t1, t2) = (case (next t1, next t2) + of ((SplayNil, _), _) => true + | (_, (SplayNil, _)) => true + | ((SplayObj{value=e1, ...}, r1), (SplayObj{value=e2, ...}, r2)) => ( + case Key.compare(e1, e2) + of LESS => walk(r1, t2) + | EQUAL => false + | GREATER => walk(t1, r2) + (* end case *)) + (* end case *)) + in + walk (left(!rt, []), left(!rt', [])) + end + end (* local *) + + (* Return the number of items in the table *) + fun numItems EMPTY = 0 + | numItems (SET{nobj,...}) = nobj + + fun toList EMPTY = [] + | toList (SET{root,...}) = + let fun apply (SplayNil,l) = l + | apply (SplayObj{value,left,right},l) = + apply(left, value::(apply (right,l))) + in + apply (!root,[]) + end + + fun split (value,s) = + case splay(cmpf value, s) of + (EQUAL,SplayObj{value,left,right}) => (SOME value, left, right) + | (LESS,SplayObj{value,left,right}) => (NONE, SplayObj{value=value,left=left,right=SplayNil},right) + | (GREATER,SplayObj{value,left,right}) => (NONE, left, SplayObj{value=value,right=right,left=SplayNil}) + | (_,SplayNil) => (NONE, SplayNil, SplayNil) + + fun intersection (EMPTY,_) = EMPTY + | intersection (_,EMPTY) = EMPTY + | intersection (SET{root,...},SET{root=root',...}) = + let fun inter(SplayNil,_) = (SplayNil,0) + | inter(_,SplayNil) = (SplayNil,0) + | inter(s, SplayObj{value,left,right}) = + case split(value,s) of + (SOME v, l, r) => + let val (l',lcnt) = inter(l,left) + val (r',rcnt) = inter(r,right) + in + (SplayObj{value=v,left=l',right=r'},lcnt+rcnt+1) + end + | (_,l,r) => + let val (l',lcnt) = inter(l,left) + val (r',rcnt) = inter(r,right) + in + (join(l',r'),lcnt+rcnt) + end + in + case inter(!root,!root') of + (_,0) => EMPTY + | (root,cnt) => SET{root = ref root, nobj = cnt} + end + + fun count st = + let fun cnt(SplayNil,n) = n + | cnt(SplayObj{left,right,...},n) = cnt(left,cnt(right,n+1)) + in + cnt(st,0) + end + + fun difference (EMPTY,_) = EMPTY + | difference (s,EMPTY) = s + | difference (SET{root,...}, SET{root=root',...}) = + let fun diff(SplayNil,_) = (SplayNil,0) + | diff(s,SplayNil) = (s, count s) + | diff(s,SplayObj{value,right,left}) = + let val (_,l,r) = split(value,s) + val (l',lcnt) = diff(l,left) + val (r',rcnt) = diff(r,right) + in + (join(l',r'),lcnt+rcnt) + end + in + case diff(!root,!root') of + (_,0) => EMPTY + | (root,cnt) => SET{root = ref root, nobj = cnt} + end + + fun union (EMPTY,s) = s + | union (s,EMPTY) = s + | union (SET{root,...}, SET{root=root',...}) = + let fun uni(SplayNil,s) = (s,count s) + | uni(s,SplayNil) = (s, count s) + | uni(s,SplayObj{value,right,left}) = + let val (_,l,r) = split(value,s) + val (l',lcnt) = uni(l,left) + val (r',rcnt) = uni(r,right) + in + (SplayObj{value=value,right=r',left=l'},lcnt+rcnt+1) + end + val (root,cnt) = uni(!root,!root') + in + SET{root = ref root, nobj = cnt} + end + + fun subtract (s, item) = difference (s, singleton item) + fun subtract' (item, s) = subtract (s, item) + + fun subtractList (l, items) = let + val items' = List.foldl (fn (x, set) => add(set, x)) EMPTY items + in + difference (l, items') + end + + fun map f EMPTY = EMPTY + | map f (SET{root, ...}) = let + fun mapf (acc, SplayNil) = acc + | mapf (acc, SplayObj{value,left,right}) = + mapf (add (mapf (acc, left), f value), right) + in + mapf (EMPTY, !root) + end + + fun mapPartial f EMPTY = EMPTY + | mapPartial f (SET{root, ...}) = let + fun mapf (acc, SplayNil) = acc + | mapf (acc, SplayObj{value,left,right}) = let + val acc = mapf (acc, left) + in + case f value + of SOME value' => mapf (add(acc, value'), right) + | NONE => mapf (acc, right) + (* end case *) + end + in + mapf (EMPTY, !root) + end + + fun app af EMPTY = () + | app af (SET{root,...}) = + let fun apply SplayNil = () + | apply (SplayObj{value,left,right}) = + (apply left; af value; apply right) + in apply (!root) end +(* + fun revapp af (SET{root,...}) = + let fun apply SplayNil = () + | apply (SplayObj{value,left,right}) = + (apply right; af value; apply left) + in apply (!root) end +*) + (* Fold function *) + fun foldr abf b EMPTY = b + | foldr abf b (SET{root,...}) = + let fun apply (SplayNil, b) = b + | apply (SplayObj{value,left,right},b) = + apply(left,abf(value,apply(right,b))) + in + apply (!root,b) + end + + fun foldl abf b EMPTY = b + | foldl abf b (SET{root,...}) = + let fun apply (SplayNil, b) = b + | apply (SplayObj{value,left,right},b) = + apply(right,abf(value,apply(left,b))) + in + apply (!root,b) + end + + fun filter p EMPTY = EMPTY + | filter p (SET{root,...}) = let + fun filt (SplayNil,tree) = tree + | filt (SplayObj{value,left,right},tree) = let + val t' = filt(right,filt(left,tree)) + in + if p value then insert(value,t') else t' + end + in + case filt(!root,(0,SplayNil)) of + (0,_) => EMPTY + | (cnt,t) => SET{nobj=cnt,root=ref t} + end + + fun partition p EMPTY = (EMPTY, EMPTY) + | partition p (SET{root,...}) = let + fun filt (SplayNil, tree1, tree2) = (tree1, tree2) + | filt (SplayObj{value,left,right}, tree1, tree2) = let + val (t1, t2) = filt(left, tree1, tree2) + val (t1', t2') = filt(right, t1, t2) + in + if p value + then (insert(value, t1'), t2') + else (t1', insert(value, t2')) + end + fun mk (0, _) = EMPTY + | mk (cnt, t) = SET{nobj=cnt, root=ref t} + val (t1, t2) = filt (!root, (0, SplayNil), (0, SplayNil)) + in + (mk t1, mk t2) + end + + fun exists p EMPTY = false + | exists p (SET{root,...}) = let + fun ex SplayNil = false + | ex (SplayObj{value=v, left=l, right=r}) = p v orelse ex l orelse ex r + in + ex (!root) + end + + fun all p EMPTY = true + | all p (SET{root,...}) = let + fun all' SplayNil = true + | all' (SplayObj{value=v, left=l, right=r}) = p v andalso all' l andalso all' r + in + all' (!root) + end + + fun find p EMPTY = NONE + | find p (SET{root,...}) = let + fun ex SplayNil = NONE + | ex (SplayObj{value=v,left=l,right=r}) = + if p v then SOME v + else (case ex l + of NONE => ex r + | a => a + (* end case *)) + in + ex (!root) + end + + (* deprecated *) + val listItems = toList + + end (* SplaySet *) diff --git a/smlnj-lib/Util/splaytree-sig.sml b/smlnj-lib/Util/splaytree-sig.sml new file mode 100644 index 0000000..83a0982 --- /dev/null +++ b/smlnj-lib/Util/splaytree-sig.sml @@ -0,0 +1,33 @@ +(* splaytree-sig.sml + * + * COPYRIGHT (c) 2015 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Signature for a splay tree data structure. + * + *) + +signature SPLAY_TREE = + sig + + datatype 'a splay + = SplayObj of { + value : 'a, + right : 'a splay, + left : 'a splay + } + | SplayNil + + val splay : (('a -> order) * 'a splay) -> (order * 'a splay) + (* (r,tree') = splay (cmp,tree) + * where tree' is tree adjusted using the comparison function cmp + * and, if tree' = SplayObj{value,...}, r = cmp value. + * tree' = SplayNil iff tree = SplayNil, in which case r is undefined. + *) + + val join : 'a splay * 'a splay -> 'a splay + (* join(t,t') returns a new splay tree formed of t and t' + *) + + end (* SPLAY_TREE *) + diff --git a/smlnj-lib/Util/splaytree.sml b/smlnj-lib/Util/splaytree.sml new file mode 100644 index 0000000..c478901 --- /dev/null +++ b/smlnj-lib/Util/splaytree.sml @@ -0,0 +1,111 @@ +(* splaytree.sml + * + * COPYRIGHT (c) 2015 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Splay tree structure. + * + *) + +structure SplayTree : SPLAY_TREE = + struct + + datatype 'a splay + = SplayObj of { + value : 'a, + right : 'a splay, + left : 'a splay + } + | SplayNil + + datatype 'a ans_t = No | Eq of 'a | Lt of 'a | Gt of 'a + + fun splay (compf, root) = let + fun adj SplayNil = (No,SplayNil,SplayNil) + | adj (arg as SplayObj{value,left,right}) = + (case compf value of + EQUAL => (Eq value, left, right) + | GREATER => + (case left of + SplayNil => (Gt value,SplayNil,right) + | SplayObj{value=value',left=left',right=right'} => + (case compf value' of + EQUAL => (Eq value',left', + SplayObj{value=value,left=right',right=right}) + | GREATER => + (case left' of + SplayNil => (Gt value',left',SplayObj{value=value,left=right',right=right}) + | _ => + let val (V,L,R) = adj left' + val rchild = SplayObj{value=value,left=right',right=right} + in + (V,L,SplayObj{value=value',left=R,right=rchild}) + end + ) (* end case *) + | _ => + (case right' of + SplayNil => (Lt value',left',SplayObj{value=value,left=right',right=right}) + | _ => + let val (V,L,R) = adj right' + val rchild = SplayObj{value=value,left=R,right=right} + val lchild = SplayObj{value=value',left=left',right=L} + in + (V,lchild,rchild) + end + ) (* end case *) + ) (* end case *) + ) (* end case *) + | _ => + (case right of + SplayNil => (Lt value,left,SplayNil) + | SplayObj{value=value',left=left',right=right'} => + (case compf value' of + EQUAL => + (Eq value',SplayObj{value=value,left=left,right=left'},right') + | LESS => + (case right' of + SplayNil => (Lt value',SplayObj{value=value,left=left,right=left'},right') + | _ => + let val (V,L,R) = adj right' + val lchild = SplayObj{value=value,left=left,right=left'} + in + (V,SplayObj{value=value',left=lchild,right=L},R) + end + ) (* end case *) + | _ => + (case left' of + SplayNil => (Gt value',SplayObj{value=value,left=left,right=left'},right') + | _ => + let val (V,L,R) = adj left' + val rchild = SplayObj{value=value',left=R,right=right'} + val lchild = SplayObj{value=value,left=left,right=L} + in + (V,lchild,rchild) + end + ) (* end case *) + ) (* end case *) + ) (* end case *) + ) (* end case *) + in + case adj root + of (No,_,_) => (GREATER,SplayNil) + | (Eq v,l,r) => (EQUAL,SplayObj{value=v,left=l,right=r}) + | (Lt v,l,r) => (LESS,SplayObj{value=v,left=l,right=r}) + | (Gt v,l,r) => (GREATER,SplayObj{value=v,left=l,right=r}) + end + + fun lrotate SplayNil = SplayNil + | lrotate (arg as SplayObj{value,left,right=SplayNil}) = arg + | lrotate (SplayObj{value,left,right=SplayObj{value=v,left=l,right=r}}) = + lrotate (SplayObj{value=v,left=SplayObj{value=value,left=left,right=l},right=r}) + + fun join (SplayNil,SplayNil) = SplayNil + | join (SplayNil,t) = t + | join (t,SplayNil) = t + | join (l,r) = + case lrotate l of + SplayNil => r (* impossible as l is not SplayNil *) + | SplayObj{value,left,right} => SplayObj{value=value,left=left,right=r} + + end (* SplayTree *) + diff --git a/smlnj-lib/Util/time-limit.sml b/smlnj-lib/Util/time-limit.sml new file mode 100644 index 0000000..ea8ec01 --- /dev/null +++ b/smlnj-lib/Util/time-limit.sml @@ -0,0 +1,35 @@ +(* time-limit.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Limit the execution time of a computation. + *) + +structure TimeLimit : sig + + exception TimeOut + + val timeLimit : Time.time -> ('a -> 'b) -> 'a -> 'b + + end = struct + + exception TimeOut + + fun timeLimit t f x = let + val setitimer = SMLofNJ.IntervalTimer.setIntTimer + fun timerOn () = ignore(setitimer (SOME t)) + fun timerOff () = ignore(setitimer NONE) + val escapeCont = SMLofNJ.Cont.callcc (fn k => ( + SMLofNJ.Cont.callcc (fn k' => (SMLofNJ.Cont.throw k k')); + timerOff(); + raise TimeOut)) + fun handler _ = escapeCont + in + Signals.setHandler (Signals.sigALRM, Signals.HANDLER handler); + timerOn(); + ((f x) handle ex => (timerOff(); raise ex)) + before timerOff() + end + + end; (* TimeLimit *) diff --git a/smlnj-lib/Util/univariate-stats.sml b/smlnj-lib/Util/univariate-stats.sml new file mode 100644 index 0000000..2387fc7 --- /dev/null +++ b/smlnj-lib/Util/univariate-stats.sml @@ -0,0 +1,120 @@ +(* univariate-stats.sml + * + * Some statistical functions on unweighted univariate samples. + * + * Copyright (c) 2004 by The Fellowship of SML/NJ + * + * Author: Matthias Blume (blume@tti-c.org) + *) +structure UnivariateStats :> sig + + (* We distinguish between two kinds of samples. Only the "heavy" + * kind permits calculation of average deviation and median. + * It is also considerably more expensive because it keeps an + * array of all points while the "light" variety is constant-size. *) + type light + type heavy + + type 'a sample (* light or heavy *) + type 'a evaluation (* light or heavy *) + + (* Samples are built incrementally by adding points to an initially + * empty sample: *) + val lempty : light sample + val hempty : unit -> heavy sample + val ladd : real * light sample -> light sample (* constant *) + val hadd : real * heavy sample -> heavy sample (* amortized constant *) + + (* Evaluate the sample; this completes all the expensive work except + * for things that depend on "heavy" samples: *) + val evaluate : 'a sample -> 'a evaluation (* constant *) + + (* extracting of "cheap" information (constant-time): *) + val N : 'a evaluation -> int + val n : 'a evaluation -> real (* N as real *) + val mean : 'a evaluation -> real + val variance : 'a evaluation -> real + val standardDeviation : 'a evaluation -> real + val skew : 'a evaluation -> real + val kurtosis : 'a evaluation -> real + + (* extracting of "expensive" information: *) + val median : heavy evaluation -> real (* randomized linear *) + val averageDeviation : heavy evaluation -> real (* linear *) + +end = struct + + infix 8 $ val op $ = Unsafe.Array.sub + infix 3 <- fun (a, i) <- x = Unsafe.Array.update (a, i, x) + + (* two kinds of "extra info" *) + type light = unit (* nothing *) + type heavy = real array * int (* rubber array of points, size *) + (* a sample : extra info * N * sum x^4 * sum x^3 * sum x^2 * sum x : *) + type 'a sample = 'a * int * real * real * real * real + (* an evaluation: extra info * N * N as real * + * mean * variance * standard deviation * + * skew * kurtosis : *) + datatype 'a evaluation = + E of { ext_info: 'a, (* extra info *) + ni: int, (* number of points *) + nr: real, (* number of points (as real) *) + mean: real, + sd2: real, (* sd*sd = variance *) + sd: real, (* standard deviation *) + skew: real, + kurtosis: real } + + val SZ = 1024 (* minimum allocated size of heavy array *) + val lempty = ((), 0, 0.0, 0.0, 0.0, 0.0) + fun hempty () = ((Array.array (SZ, 0.0), SZ), 0, 0.0, 0.0, 0.0, 0.0) + + fun ladd (x:real, ((), n, sx4, sx3, sx2, sx1)) = + let val x2 = x*x val (x3, x4) = (x2*x, x2*x2) + in ((), n+1, sx4+x4, sx3+x3, sx2+x2, sx1+x) + end + + fun hadd (x:real, ((a, sz), n, sx4, sx3, sx2, sx1)) = + let val x2 = x*x val (x3, x4) = (x2*x, x2*x2) + val (a, sz) = + if n < sz then (a, sz) + else let val sz = sz+sz + val b = Array.tabulate + (sz, fn i => if i=ni then ds/nr else ad (i+1, ds + abs(a$i-m)) + in ad (0, 0.0) end +end diff --git a/smlnj-lib/Util/uref-sig.sml b/smlnj-lib/Util/uref-sig.sml new file mode 100644 index 0000000..0fd3c1e --- /dev/null +++ b/smlnj-lib/Util/uref-sig.sml @@ -0,0 +1,91 @@ +(* uref-sig.sml + * + * Interface to UnionFind package. + * + * Author: + * Fritz Henglein + * DIKU, University of Copenhagen + * henglein@diku.dk + * + * DESCRIPTION + * + * Union/Find data type with ref-like interface. A Union/Find structure + * consists of a type constructor 'a uref with operations for + * making an element of 'a uref (make), getting the contents of + * an element (!!), checking for equality of two elements (equal), and + * for joining two elements (union). uref is analogous to ref as + * expressed in the following table: + * + * ------------------------------------------------------------------- + * type 'a ref 'a uref + * ------------------------------------------------------------------- + * introduction ref uref + * elimination ! !! + * equality = equal + * updating := update + * unioning link, union, unify + * ------------------------------------------------------------------- + * + * The main difference between 'a ref and 'a uref is in the union + * operation. Without union 'a ref and 'a uref can be used + * interchangebly. An assignment to a reference changes only the + * contents of the reference, but not the reference itself. In + * particular, any two pointers that were different (in the sense of the + * equality predicate = returning false) before an assignment will still + * be so. Their contents may or may not be equal after the assignment, + * though. In contrast, applying the union operations (link, union, + * unify) to two uref elements makes the two elements themselves + * equal (in the sense of the predicate equal returning true). As a + * consequence their contents will also be identical: in the case of link + * and union it will be the contents of one of the two unioned elements, + * in the case of unify the contents is determined by a binary + * function parameter. The link, union, and unify functions return true + * when the elements were previously NOT equal. + *) + +signature UREF = + sig + + type 'a uref + (* type of uref-elements with contents of type 'a *) + + val uRef: 'a -> 'a uref + (* uref x creates a new element with contents x *) + + val equal: 'a uref * 'a uref -> bool + (* equal (e, e') returns true if and only if e and e' are either made by + * the same call to uref or if they have been unioned (see below). + *) + + val !! : 'a uref -> 'a + (* !!e returns the contents of e. + * Note: if 'a is an equality type then !!(uref x) = x, and + * equal(uref (!!x), x) = false. + *) + + val update : 'a uref * 'a -> unit + (* update(e, x) updates the contents of e to be x *) + + val unify : ('a * 'a -> 'a) -> 'a uref * 'a uref -> bool + (* unify f (e, e') makes e and e' equal; if v and v' are the + * contents of e and e', respectively, before unioning them, + * then the contents of the unioned element is f(v,v'). Returns + * true, when elements were not equal prior to the call. + *) + + val union : 'a uref * 'a uref -> bool + (* union (e, e') makes e and e' equal; the contents of the unioned + * element is the contents of one of e and e' before the union operation. + * After union(e, e') elements e and e' will be congruent in the + * sense that they are interchangeable in any context.. Returns + * true, when elements were not equal prior to the call. + *) + + val link : 'a uref * 'a uref -> bool + (* link (e, e') makes e and e' equal; the contents of the linked + * element is the contents of e' before the link operation. Returns + * true, when elements were not equal prior to the call. + *) + + end; (* UREF *) + diff --git a/smlnj-lib/Util/uref.sml b/smlnj-lib/Util/uref.sml new file mode 100644 index 0000000..3cde2e6 --- /dev/null +++ b/smlnj-lib/Util/uref.sml @@ -0,0 +1,88 @@ +(* uref.sml + * + * COPYRIGHT (c) 2020 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * UNIONFIND DATA STRUCTURE WITH PATH COMPRESSION AND RANKED UNION + * + * Author: + * Fritz Henglein + * DIKU, University of Copenhagen + * henglein@diku.dk + *) + +structure URef : UREF = + struct + + datatype 'a urefC + = ECR of 'a * int + | PTR of 'a uref + withtype 'a uref = 'a urefC ref + + fun find (p as ref (ECR _)) = p + | find (p as ref (PTR p')) = let + val p'' = find p' + in + p := PTR p''; p'' + end + + fun uRef x = ref (ECR(x, 0)) + + fun !! p = (case !(find p) + of ECR (x, _) => x + | _ => raise Match + (* end case *)) + + fun equal (p, p') = (find p = find p') + + fun update (p, x) = (case find p + of (p' as ref(ECR(_, r))) => p' := ECR(x, r) + | _ => raise Match + (* end case *)) + + fun link (p, q) = let + val p' = find p + val q' = find q + in + if (p' = q') then false else (p' := PTR q; true) + end + + fun unify f (p, q) = (case (find p, find q) + of (p' as ref(ECR(pc, pr)), q' as ref(ECR(qc, qr))) => + let + val newC = f (pc, qc) + in + if p' = q' + then (p' := ECR(newC, pr); false) + else ( + if pr = qr + then (q' := ECR(newC, qr+1); p' := PTR q') + else if pr < qr + then (q' := ECR(newC, qr); p' := PTR q') + else ((* pr > qr *) + p' := ECR(newC, pr); + q':= PTR p'); + true) + end + | _ => raise Match + (* end case *)) + + fun union (p, q) = let + val p' = find p + val q' = find q + in + if (p' = q') + then false + else (case (!p', !q') + of (ECR(pc, pr), ECR(qc, qr)) => ( + if pr = qr + then (q' := ECR(qc, qr+1); p' := PTR q') + else if pr < qr + then p' := PTR q' + else q':= PTR p'; + true) + | _ => raise Match + (* end case *)) + end + + end diff --git a/smlnj-lib/Util/utf8-sig.sml b/smlnj-lib/Util/utf8-sig.sml new file mode 100644 index 0000000..7e00a4a --- /dev/null +++ b/smlnj-lib/Util/utf8-sig.sml @@ -0,0 +1,73 @@ +(* utf8-sig.sml + * + * COPYRIGHT (c) 2020 John Reppy (http://www.cs.uchicago.edu/~jhr) + * All rights reserved. + * + * Routines for working with UTF8 encoded strings. + *) + +signature UTF8 = + sig + + type wchar = word + + val maxCodePoint : wchar (* = 0wx0010FFFF *) + + (* raised by some operations when applied to incomplete strings. *) + exception Incomplete + + (* raised when invalid Unicode/UTF8 characters are encountered in certain + * situations. + *) + exception Invalid + + (** Character operations **) + + (* convert a character reader to a wide-character reader; the reader + * raises `Incomplete` when the end of input is encountered in the middle + * of a multi-byte encoding and `Invalid` when an invalid UTF8 encoding + * is encountered. + *) + val getu : (char, 'strm) StringCvt.reader -> (wchar, 'strm) StringCvt.reader + + (* return the UTF8 encoding of a wide character; raises Invalid if the + * wide character is larger than the maxCodePoint, but does not do any + * other validity checking. + *) + val encode : wchar -> string + + val isAscii : wchar -> bool + val toAscii : wchar -> char (* truncates to 7-bits *) + val fromAscii : char -> wchar (* truncates to 7-bits *) + + (* return a printable string representation of a wide character; raises + * Invalid if the wide character is larger than the maxCodePoint, but + * does not do any other validity checking. + *) + val toString : wchar -> string + + (** String operations **) + + val size : string -> int + (* return the number of Unicode characters in a string *) + + val size' : substring -> int + (* return the number of Unicode characters in a substring *) + + val explode : string -> wchar list + (* return the list of wide characters that are encoded by a string *) + val implode : wchar list -> string + (* return the UTF-8 encoded string that represents the list of + * Unicode code points. + *) + + val map : (wchar -> wchar) -> string -> string + (* map a function over the Unicode characters in the string *) + val app : (wchar -> unit) -> string -> unit + (* apply a function to the Unicode characters in the string *) + val fold : ((wchar * 'a) -> 'a) -> 'a -> string -> 'a + (* fold a function over the Unicode characters in the string *) + val all : (wchar -> bool) -> string -> bool + val exists : (wchar -> bool) -> string -> bool + + end diff --git a/smlnj-lib/Util/utf8.sml b/smlnj-lib/Util/utf8.sml new file mode 100644 index 0000000..3288434 --- /dev/null +++ b/smlnj-lib/Util/utf8.sml @@ -0,0 +1,333 @@ +(* utf8.sml + * + * COPYRIGHT (c) 2020 John Reppy (http://www.cs.uchicago.edu/~jhr) + * All rights reserved. + * + * Routines for working with UTF8 encoded strings. + * + * Unicode value 1st byte 2nd byte 3rd byte 4th byte + * ----------------------- -------- -------- -------- -------- + * 00000 00000000 0xxxxxxx 0xxxxxxx + * 00000 00000yyy yyxxxxxx 110yyyyy 10xxxxxx + * 00000 zzzzyyyy yyxxxxxx 1110zzzz 10yyyyyy 10xxxxxx + * wwwzz zzzzyyyy yyxxxxxx 11110www 10zzzzzz 10yyyyyy 10xxxxxx + * + *) + +structure UTF8 :> UTF8 = + struct + + structure W = Word + structure SS = Substring + + type wchar = W.word + + fun w2c w = Char.chr(W.toInt w) + + val maxCodePoint : wchar = 0wx0010FFFF + + (* maximum values for the first byte for each encoding length *) + val max1Byte : W.word = 0wx7f (* 0xxx xxxx *) + val max2Byte : W.word = 0wxdf (* 110x xxxx *) + val max3Byte : W.word = 0wxef (* 1110 xxxx *) + val max4Byte : W.word = 0wxf7 (* 1111 0xxx *) + + (* bit masks for the first byte for each encoding length *) + val mask2Byte : W.word = 0wx1f + val mask3Byte : W.word = 0wx0f + val mask4Byte : W.word = 0wx07 + + (* bit mask for continuation bytes *) + val maskContByte : W.word = 0wx3f + + + exception Incomplete + exception Invalid + + (* a simple state machine for getting a valid UTF8 byte sequence. + * See https://unicode.org/mail-arch/unicode-ml/y2003-m02/att-0467/01-The_Algorithm_to_Valide_an_UTF-8_String + * for a description of the state machine. + *) + fun getu getc = let + fun getByte inS = (case getc inS + of SOME(c, inS') => (Word.fromInt(ord c), inS') + | NONE => raise Incomplete + (* end case *)) + fun inRange (minB : word, b, maxB) = ((b - minB) <= maxB - minB) + (* add the bits of a continuation byte to the accumulator *) + fun addContBits (accum, b) = W.orb(W.<<(accum, 0w6), W.andb(0wx3f, b)) + (* handles last byte for all multi-byte sequences *) + fun stateA (inS, accum) = let + val (b, inS) = getByte inS + in + if inRange(0wx80, b, 0wxbf) + then SOME(addContBits(accum, b), inS) + else raise Invalid + end + (* handles second/third byte for three/four-byte sequences *) + and stateB (inS, accum) = let + val (b, inS) = getByte inS + in + if inRange(0wx80, b, 0wxbf) + then stateA (inS, addContBits(accum, b)) + else raise Invalid + end + (* byte0 = 0b1110_0000 (3-byte sequence) *) + and stateC (inS, accum) = let + val (b, inS) = getByte inS + in + if inRange(0wxa0, b, 0wxbf) + then stateA (inS, addContBits(accum, b)) + else raise Invalid + end + (* byte0 = 0b1110_1101 (3-byte sequence) *) + and stateD (inS, accum) = let + val (b, inS) = getByte inS + in + if inRange(0wx80, b, 0wx9f) + then stateA (inS, addContBits(accum, b)) + else raise Invalid + end + (* byte0 = 0b1111_0001 .. 0b1111_0011 (4-byte sequence) *) + and stateE (inS, accum) = let + val (b, inS) = getByte inS + in + if inRange(0wx80, b, 0wxbf) + then stateB (inS, addContBits(accum, b)) + else raise Invalid + end + (* byte0 = 0b1111_0000 (4-byte sequence) *) + and stateF (inS, accum) = let + val (b, inS) = getByte inS + in + if inRange(0wx90, b, 0wxbf) + then stateB (inS, addContBits(accum, b)) + else raise Invalid + end + (* byte0 = 0b1111_1000 (4-byte sequence) *) + and stateG (inS, accum) = let + val (b, inS) = getByte inS + in + if inRange(0wx80, b, 0wx8f) + then stateB (inS, addContBits(accum, b)) + else raise Invalid + end + and start inS = (case getc inS + of SOME(c, inS) => let + val byte0 = Word.fromInt(ord c) + in + if (byte0 <= 0wx7f) + then SOME(byte0, inS) (* ASCII character *) + else if inRange(0wxc2, byte0, 0wxdf) + then stateA (inS, W.andb(byte0, mask2Byte)) + else if inRange(0wxe1, byte0, 0wxec) + orelse inRange(0wxee, byte0, 0wxef) + then stateB (inS, W.andb(byte0, mask3Byte)) + else if (byte0 = 0wxe0) + then stateC (inS, W.andb(byte0, mask3Byte)) + else if (byte0 = 0wxed) + then stateD (inS, W.andb(byte0, mask3Byte)) + else if inRange(0wxf1, byte0, 0wxf3) + then stateE (inS, W.andb(byte0, mask4Byte)) + else if (byte0 = 0wxf0) + then stateF (inS, W.andb(byte0, mask4Byte)) + else if (byte0 = 0wxf4) + then stateG (inS, W.andb(byte0, mask4Byte)) + else raise Invalid + end + | NONE => NONE + (* end case *)) + in + start + end + + fun isAscii (wc : wchar) = (wc <= max1Byte) + fun toAscii (wc : wchar) = w2c(W.andb(0wx7f, wc)) + fun fromAscii c = W.andb(0wx7f, W.fromInt(Char.ord c)) + + (* return a printable string representation of a wide character *) + fun toString wc = + if isAscii wc + then Char.toCString(toAscii wc) + else if (wc <= max2Byte) + then "\\u" ^ (StringCvt.padLeft #"0" 4 (W.toString wc)) + (* NOTE: the following is not really SML syntax *) + else "\\u" ^ (StringCvt.padLeft #"0" 8 (W.toString wc)) + + (* return a list of characters that is the UTF8 encoding of a wide character *) + fun encode' (wc, chrs) = if (wc <= 0wx7f) + then w2c wc :: chrs + else if (wc <= 0wx7ff) + then w2c(W.orb(0wxc0, W.>>(wc, 0w6))) :: + w2c(W.orb(0wx80, W.andb(wc, 0wx3f))) :: chrs + else if (wc <= 0wxffff) + then w2c(W.orb(0wxe0, W.>>(wc, 0w12))) :: + w2c(W.orb(0wx80, W.andb(W.>>(wc, 0w6), 0wx3f))) :: + w2c(W.orb(0wx80, W.andb(wc, 0wx3f))) :: chrs + else if (wc <= maxCodePoint) + then w2c(W.orb(0wxf0, W.>>(wc, 0w18))) :: + w2c(W.orb(0wx80, W.andb(W.>>(wc, 0w12), 0wx3f))) :: + w2c(W.orb(0wx80, W.andb(W.>>(wc, 0w6), 0wx3f))) :: + w2c(W.orb(0wx80, W.andb(wc, 0wx3f))) :: chrs + else raise Invalid + + fun encode wc = String.implode(encode'(wc, [])) + + (* consume one valid UTF8 character from a substring *) + fun eatOneUTF8Char ss = let + fun getByte ss = (case SS.getc ss + of SOME(c, ss') => (Word.fromInt(ord c), ss') + | NONE => raise Incomplete + (* end case *)) + fun inRange (minB : word, b, maxB) = ((b - minB) <= maxB - minB) + (* handles last byte for all multi-byte sequences *) + fun stateA ss = let + val (b, ss) = getByte ss + in + if inRange(0wx80, b, 0wxbf) + then ss + else raise Invalid + end + (* handles second/third byte for three/four-byte sequences *) + and stateB ss = let + val (b, ss) = getByte ss + in + if inRange(0wx80, b, 0wxbf) + then stateA ss + else raise Invalid + end + (* byte0 = 0b1110_0000 (3-byte sequence) *) + and stateC ss = let + val (b, ss) = getByte ss + in + if inRange(0wxa0, b, 0wxbf) + then stateA ss + else raise Invalid + end + (* byte0 = 0b1110_1101 (3-byte sequence) *) + and stateD ss = let + val (b, ss) = getByte ss + in + if inRange(0wx80, b, 0wx9f) + then stateA ss + else raise Invalid + end + (* byte0 = 0b1111_0001 .. 0b1111_0011 (4-byte sequence) *) + and stateE ss = let + val (b, ss) = getByte ss + in + if inRange(0wx80, b, 0wxbf) + then stateB ss + else raise Invalid + end + (* byte0 = 0b1111_0000 (4-byte sequence) *) + and stateF ss = let + val (b, ss) = getByte ss + in + if inRange(0wx90, b, 0wxbf) + then stateB ss + else raise Invalid + end + (* byte0 = 0b1111_1000 (4-byte sequence) *) + and stateG ss = let + val (b, ss) = getByte ss + in + if inRange(0wx80, b, 0wx8f) + then stateB ss + else raise Invalid + end + in + case SS.getc ss + of SOME(c, ss) => let + val byte0 = Word.fromInt(ord c) + in + if (byte0 <= 0wx7f) + then ss (* ASCII character *) + else if inRange(0wxc2, byte0, 0wxdf) + then stateA ss + else if inRange(0wxe1, byte0, 0wxec) + orelse inRange(0wxee, byte0, 0wxef) + then stateB ss + else if (byte0 = 0wxe0) + then stateC ss + else if (byte0 = 0wxed) + then stateD ss + else if inRange(0wxf1, byte0, 0wxf3) + then stateE ss + else if (byte0 = 0wxf0) + then stateF ss + else if (byte0 = 0wxf4) + then stateG ss + else raise Invalid + end + | NONE => ss + (* end case *) + end + + (* return the number of Unicode characters in a substring *) + fun size' ss = let + fun len (ss, n) = if SS.isEmpty ss + then n + else len (eatOneUTF8Char ss, n+1) + in + len (ss, 0) + end + + (* return the number of Unicode characters in a string *) + fun size s = size' (SS.full s) + + (* get wide characters from substrings *) + val getWC = getu SS.getc + + fun map f s = let + fun mapf (ss, chrs) = (case getWC ss + of NONE => String.implodeRev chrs + | SOME(wc, ss) => mapf (ss, List.revAppend(encode'(wc, []), chrs)) + (* end case *)) + in + mapf (SS.full s, []) + end + + fun app f s = let + fun appf ss = (case getWC ss + of NONE => () + | SOME(wc, ss) => (f wc; appf ss) + (* end case *)) + in + appf (SS.full s) + end + + (* fold a function over the Unicode characters in the string *) + fun fold f = let + fun foldf (ss, acc) = (case getWC ss + of NONE => acc + | SOME(wc, ss) => foldf (ss, f (wc, acc)) + (* end case *)) + in + fn init => fn s => foldf (SS.full s, init) + end + + fun all pred s = let + fun allf ss = (case getWC ss + of NONE => true + | SOME(wc, ss) => pred wc andalso allf ss + (* end case *)) + in + allf (SS.full s) + end + + fun exists pred s = let + fun existsf ss = (case getWC ss + of NONE => true + | SOME(wc, ss) => pred wc orelse existsf ss + (* end case *)) + in + existsf (SS.full s) + end + + (* return the list of wide characters that are encoded by a string *) + fun explode s = List.rev(fold (op ::) [] s) + + fun implode wcs = String.implode(List.foldr encode' [] wcs) + + end diff --git a/smlnj-lib/Util/word-hash-table.sml b/smlnj-lib/Util/word-hash-table.sml new file mode 100644 index 0000000..9af6cd4 --- /dev/null +++ b/smlnj-lib/Util/word-hash-table.sml @@ -0,0 +1,228 @@ +(* word-hash-table.sml + * + * COPYRIGHT (c) 2024 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * A specialization of the hash table functor to word keys. + * + * AUTHOR: John Reppy + * University of Chicago + * https://cs.uchicago.edu/~jhr + *) + +structure WordHashTable :> MONO_HASH_TABLE where type Key.hash_key = word = + struct + + structure Key = + struct + type hash_key = word + fun sameKey (a : word, b) = (a = b) + fun hashVal a = a + end + + open Key + + structure HTRep = HashTableRep + + datatype 'a hash_table = HT of { + not_found : exn, + table : (hash_key, 'a) HTRep.table ref, + n_items : int ref + } + + fun index (i, sz) = Word.toIntX(Word.andb(i, Word.fromInt sz - 0w1)) + + (* Create a new table; the int is a size hint and the exception + * is to be raised by find. + *) + fun mkTable (sizeHint, notFound) = HT{ + not_found = notFound, + table = ref (HTRep.alloc sizeHint), + n_items = ref 0 + } + + (* remove all elements from the table *) + fun clear (HT{table, n_items, ...}) = (HTRep.clear(!table); n_items := 0) + + fun insertWithi combine (tbl as HT{table, n_items, ...}) (key, item) = let + val arr = !table + val sz = Array.length arr + val hash = hashVal key + val indx = index (hash, sz) + fun look HTRep.NIL = ( + Array.update(arr, indx, HTRep.B(hash, key, item, Array.sub(arr, indx))); + n_items := !n_items + 1; + HTRep.growTableIfNeeded (table, !n_items); + HTRep.NIL) + | look (HTRep.B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k)) + then HTRep.B(hash, key, combine(k, v, item), r) + else (case (look r) + of HTRep.NIL => HTRep.NIL + | rest => HTRep.B(h, k, v, rest) + (* end case *)) + in + case (look (Array.sub (arr, indx))) + of HTRep.NIL => () + | b => Array.update(arr, indx, b) + (* end case *) + end + + fun insertWith combine = insertWithi (fn (_, v1, v2) => combine(v1, v2)) + + (* Insert an item. If the key already has an item associated with it, + * then the old item is discarded. + *) + fun insert (tbl as HT{table, n_items, ...}) (key, item) = let + val arr = !table + val sz = Array.length arr + val hash = hashVal key + val indx = index (hash, sz) + fun look HTRep.NIL = ( + Array.update(arr, indx, HTRep.B(hash, key, item, Array.sub(arr, indx))); + n_items := !n_items + 1; + HTRep.growTableIfNeeded (table, !n_items); + HTRep.NIL) + | look (HTRep.B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k)) + then HTRep.B(hash, key, item, r) + else (case (look r) + of HTRep.NIL => HTRep.NIL + | rest => HTRep.B(h, k, v, rest) + (* end case *)) + in + case (look (Array.sub (arr, indx))) + of HTRep.NIL => () + | b => Array.update(arr, indx, b) + (* end case *) + end + + (* return true, if the key is in the domain of the table *) + fun inDomain (HT{table, ...}) key = let + val arr = !table + val hash = hashVal key + val indx = index (hash, Array.length arr) + fun look HTRep.NIL = false + | look (HTRep.B(h, k, v, r)) = + ((hash = h) andalso sameKey(key, k)) orelse look r + in + look (Array.sub (arr, indx)) + end + + (* find an item, the table's exception is raised if the item doesn't exist *) + fun lookup (HT{table, not_found, ...}) key = let + val arr = !table + val hash = hashVal key + val indx = index (hash, Array.length arr) + fun look HTRep.NIL = raise not_found + | look (HTRep.B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k)) + then v + else look r + in + look (Array.sub (arr, indx)) + end + + (* look for an item, return NONE if the item doesn't exist *) + fun find (HT{table, ...}) key = let + val arr = !table + val sz = Array.length arr + val hash = hashVal key + val indx = index (hash, sz) + fun look HTRep.NIL = NONE + | look (HTRep.B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k)) + then SOME v + else look r + in + look (Array.sub (arr, indx)) + end + + fun findAndRemove (HT{not_found, table, n_items}) key = let + val arr = !table + val sz = Array.length arr + val hash = hashVal key + val indx = index (hash, sz) + fun look HTRep.NIL = raise not_found + | look (HTRep.B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k)) + then (v, r) + else let + val (v', r') = look r + in + (v', HTRep.B(h, k, v, r')) + end + val (v, bucket) = look (Array.sub (arr, indx)) + in + Array.update (arr, indx, bucket); SOME v + end + handle _ => NONE + + (* Remove an item. The table's exception is raised if + * the item doesn't exist. + *) + fun remove (HT{not_found, table, n_items}) key = let + val arr = !table + val sz = Array.length arr + val hash = hashVal key + val indx = index (hash, sz) + fun look HTRep.NIL = raise not_found + | look (HTRep.B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k)) + then (v, r) + else let val (item, r') = look r in (item, HTRep.B(h, k, v, r')) end + val (item, bucket) = look (Array.sub (arr, indx)) + in + Array.update (arr, indx, bucket); + n_items := !n_items - 1; + item + end (* remove *) + + (* Return the number of items in the table *) + fun numItems (HT{n_items, ...}) = !n_items + + (* return a list of the items in the table *) + fun listItems (HT{table = ref arr, n_items, ...}) = + HTRep.listItems (arr, n_items) + fun listItemsi (HT{table = ref arr, n_items, ...}) = + HTRep.listItemsi (arr, n_items) + + (* Apply a function to the entries of the table *) + fun appi f (HT{table, ...}) = HTRep.appi f (! table) + fun app f (HT{table, ...}) = HTRep.app f (! table) + + (* Map a table to a new table that has the same keys and exception *) + fun mapi f (HT{table, n_items, not_found}) = HT{ + table = ref(HTRep.mapi f (! table)), + n_items = ref(!n_items), + not_found = not_found + } + fun map f (HT{table, n_items, not_found}) = HT{ + table = ref(HTRep.map f (! table)), + n_items = ref(!n_items), + not_found = not_found + } + + (* Fold a function over the entries of the table *) + fun foldi f init (HT{table, ...}) = HTRep.foldi f init (! table) + fun fold f init (HT{table, ...}) = HTRep.fold f init (! table) + + (* modify the hash-table items in place *) + fun modifyi f (HT{table, ...}) = HTRep.modifyi f (!table) + fun modify f (HT{table, ...}) = HTRep.modify f (!table) + + (* remove any hash table items that do not satisfy the given + * predicate. + *) + fun filteri pred (HT{table, n_items, ...}) = + n_items := HTRep.filteri pred (! table) + fun filter pred (HT{table, n_items, ...}) = + n_items := HTRep.filter pred (! table) + + (* Create a copy of a hash table *) + fun copy (HT{table, n_items, not_found}) = HT{ + table = ref(HTRep.copy(! table)), + n_items = ref(!n_items), + not_found = not_found + } + + (* returns a list of the sizes of the various buckets. This is to + * allow users to gauge the quality of their hashing function. + *) + fun bucketSizes (HT{table, ...}) = HTRep.bucketSizes (! table) + + end (* HashTableFn *) diff --git a/smlnj-lib/Util/word-redblack-map.sml b/smlnj-lib/Util/word-redblack-map.sml new file mode 100644 index 0000000..8d3ae36 --- /dev/null +++ b/smlnj-lib/Util/word-redblack-map.sml @@ -0,0 +1,686 @@ +(* word-redblack-map.sml + * + * COPYRIGHT (c) 2014 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * COPYRIGHT (c) 2000 Bell Labs, Lucent Technologies. + * + * This code is based on Chris Okasaki's implementation of + * red-black trees. The linear-time tree construction code is + * based on the paper "Constructing red-black trees" by Hinze, + * and the delete function is based on the description in Cormen, + * Leiserson, and Rivest. + * + * A red-black tree should satisfy the following two invariants: + * + * Red Invariant: each red node has black children (empty nodes are + * considered black). + * + * Black Invariant: each path from the root to an empty node has the + * same number of black nodes (the tree's black height). + * + * The Black invariant implies that any node with only one child + * will be black and its child will be a red leaf. + *) + +structure WordRedBlackMap :> ORD_MAP where type Key.ord_key = word = + struct + + structure Key = + struct + type ord_key = word + val compare = Word.compare + end + + datatype color = R | B + + datatype 'a tree + = E + | T of (color * 'a tree * Key.ord_key * 'a * 'a tree) + + datatype 'a map = MAP of (int * 'a tree) + + fun isEmpty (MAP(_, E)) = true + | isEmpty _ = false + + val empty = MAP(0, E) + + fun singleton (xk, x) = MAP(1, T(B, E, xk, x, E)) + + fun insert (MAP(nItems, m), xk, x) = let + val nItems' = ref nItems + fun ins E = (nItems' := nItems+1; T(R, E, xk, x, E)) + | ins (s as T(color, a, yk, y, b)) = + if (xk < yk) + then (case a + of T(R, c, zk, z, d) => + if (xk < zk) + then (case ins c + of T(R, e, wk, w, f) => T(R, T(B,e,wk,w,f), zk, z, T(B,d,yk,y,b)) + | c => T(B, T(R,c,zk,z,d), yk, y, b) + (* end case *)) + else if (xk = zk) + then T(color, T(R, c, xk, x, d), yk, y, b) + else (case ins d + of T(R, e, wk, w, f) => T(R, T(B,c,zk,z,e), wk, w, T(B,f,yk,y,b)) + | d => T(B, T(R,c,zk,z,d), yk, y, b) + (* end case *)) + | _ => T(B, ins a, yk, y, b) + (* end case *)) + else if (xk = yk) + then T(color, a, xk, x, b) + else (case b + of T(R, c, zk, z, d) => + if (xk < zk) + then (case ins c + of T(R, e, wk, w, f) => T(R, T(B,a,yk,y,e), wk, w, T(B,f,zk,z,d)) + | c => T(B, a, yk, y, T(R,c,zk,z,d)) + (* end case *)) + else if (xk = zk) + then T(color, a, yk, y, T(R, c, xk, x, d)) + else (case ins d + of T(R, e, wk, w, f) => T(R, T(B,a,yk,y,c), zk, z, T(B,e,wk,w,f)) + | d => T(B, a, yk, y, T(R,c,zk,z,d)) + (* end case *)) + | _ => T(B, a, yk, y, ins b) + (* end case *)) + val T(_, a, yk, y, b) = ins m + in + MAP(!nItems', T(B, a, yk, y, b)) + end + fun insert' ((xk, x), m) = insert (m, xk, x) + + fun insertWithi comb (MAP(nItems, m), xk, x) = let + val nItems' = ref nItems + fun ins E = (nItems' := nItems+1; T(R, E, xk, x, E)) + | ins (s as T(color, a, yk, y, b)) = + if (xk < yk) + then (case a + of T(R, c, zk, z, d) => + if (xk < zk) + then (case ins c + of T(R, e, wk, w, f) => T(R, T(B,e,wk,w,f), zk, z, T(B,d,yk,y,b)) + | c => T(B, T(R,c,zk,z,d), yk, y, b) + (* end case *)) + else if (xk = zk) + then T(color, T(R, c, xk, comb(xk, z, x), d), yk, y, b) + else (case ins d + of T(R, e, wk, w, f) => T(R, T(B,c,zk,z,e), wk, w, T(B,f,yk,y,b)) + | d => T(B, T(R,c,zk,z,d), yk, y, b) + (* end case *)) + | _ => T(B, ins a, yk, y, b) + (* end case *)) + else if (xk = yk) + then T(color, a, xk, comb(xk, y, x), b) + else (case b + of T(R, c, zk, z, d) => + if (xk < zk) + then (case ins c + of T(R, e, wk, w, f) => T(R, T(B,a,yk,y,e), wk, w, T(B,f,zk,z,d)) + | c => T(B, a, yk, y, T(R,c,zk,z,d)) + (* end case *)) + else if (xk = zk) + then T(color, a, yk, y, T(R, c, xk, comb(xk, z, x), d)) + else (case ins d + of T(R, e, wk, w, f) => T(R, T(B,a,yk,y,c), zk, z, T(B,e,wk,w,f)) + | d => T(B, a, yk, y, T(R,c,zk,z,d)) + (* end case *)) + | _ => T(B, a, yk, y, ins b) + (* end case *)) + val T(_, a, yk, y, b) = ins m + in + MAP(!nItems', T(B, a, yk, y, b)) + end + fun insertWith comb = insertWithi (fn (_, x1, x2) => comb(x1, x2)) + + (* Is a key in the domain of the map? *) + fun inDomain (MAP(_, t), k) = let + fun find' E = false + | find' (T(_, a, yk, y, b)) = + (k = yk) orelse ((k < yk) andalso find' a) orelse (find' b) + in + find' t + end + + (* Look for an item, return NONE if the item doesn't exist *) + fun find (MAP(_, t), k) = let + fun find' E = NONE + | find' (T(_, a, yk, y, b)) = + if (k < yk) + then find' a + else if (k = yk) + then SOME y + else find' b + in + find' t + end + + (* Look for an item, raise NotFound if the item doesn't exist *) + fun lookup (MAP(_, t), k) = let + fun look E = raise LibBase.NotFound + | look (T(_, a, yk, y, b)) = + if (k < yk) + then look a + else if (k = yk) + then y + else look b + in + look t + end + + (* Remove an item, returning new map and value removed. + * Raises LibBase.NotFound if not found. + *) + local + datatype 'a zipper + = TOP + | LEFT of (color * Key.ord_key * 'a * 'a tree * 'a zipper) + | RIGHT of (color * 'a tree * Key.ord_key * 'a * 'a zipper) + datatype 'a result = FOUND of 'a * 'a tree | NOT_FOUND + in + fun remove' (t, k) = let + (* zip the zipper *) + fun zip (TOP, t) = t + | zip (LEFT(color, xk, x, b, z), a) = zip(z, T(color, a, xk, x, b)) + | zip (RIGHT(color, a, xk, x, z), b) = zip(z, T(color, a, xk, x, b)) + (* zip the zipper while resolving a black deficit *) + fun fixupZip (TOP, t) = (true, t) + (* case 1 from CLR *) + | fixupZip (LEFT(B, xk, x, T(R, a, yk, y, b), p), t) = (case a + of T(_, T(R, a11, wk, w, a12), zk, z, a2) => (* case 1L ==> case 3L ==> case 4L *) + (false, zip (p, T(B, T(R, T(B, t, xk, x, a11), wk, w, T(B, a12, zk, z, a2)), yk, y, b))) + | T(_, a1, zk, z, T(R, a21, wk, w, t22)) => (* case 1L ==> case 4L *) + (false, zip (p, T(B, T(R, T(B, t, xk, x, a1), zk, z, T(B, a21, wk, w, t22)), yk, y, b))) + | T(_, a1, zk, z, a2) => (* case 1L ==> case 2L; rotate + recolor fixes deficit *) + (false, zip (p, T(B, T(B, t, xk, x, T(R, a1, zk, z, a2)), yk, y, b))) + | _ => fixupZip (LEFT(R, xk, x, a, LEFT(B, yk, y, b, p)), t) + (* end case *)) + | fixupZip (RIGHT(B, T(R, a, xk, x, b), yk, y, p), t) = (case b + of T(_, b1, zk, z, T(R, b21, wk, w, b22)) => (* case 1R ==> case 3R ==> case 4R *) + (false, zip (p, T(B, a, xk, x, T(R, T(B, b1, zk, z, b21), wk, w, T(B, b22, yk, y, t))))) + | T(_, T(R, b11, wk, w, b12), zk, z, b2) => (* case 1R ==> case 4R *) + (false, zip (p, T(B, a, xk, x, T(R, T(B, b11, wk, w, b12), zk, z, T(B, b2, yk, y, t))))) + | T(_, b1, zk, z, b2) => (* case 1L ==> case 2L; rotate + recolor fixes deficit *) + (false, zip (p, T(B, a, xk, x, T(B, T(R, b1, zk, z, b2), yk, y, t)))) + | _ => fixupZip (RIGHT(R, b, yk, y, RIGHT(B, a, xk, x, p)), t) + (* end case *)) + (* case 3 from CLR *) + | fixupZip (LEFT(color, xk, x, T(B, T(R, a1, yk, y, a2), zk, z, b), p), t) = + (* case 3L ==> case 4L *) + (false, zip (p, T(color, T(B, t, xk, x, a1), yk, y, T(B, a2, zk, z, b)))) + | fixupZip (RIGHT(color, T(B, a, xk, x, T(R, b1, yk, y, b2)), zk, z, p), t) = + (* case 3R ==> case 4R; rotate, recolor, plus rotate fixes deficit *) + (false, zip (p, T(color, T(B, a, xk, x, b1), yk, y, T(B, b2, zk, z, t)))) + (* case 4 from CLR *) + | fixupZip (LEFT(color, xk, x, T(B, a, yk, y, T(R, b1, zk, z, b2)), p), t) = + (false, zip (p, T(color, T(B, t, xk, x, a), yk, y, T(B, b1, zk, z, b2)))) + | fixupZip (RIGHT(color, T(B, T(R, a1, zk, z, a2), xk, x, b), yk, y, p), t) = + (false, zip (p, T(color, T(B, a1, zk, z, a2), xk, x, T(B, b, yk, y, t)))) + (* case 2 from CLR; note that "a" and "b" are guaranteed to be black, since we did + * not match cases 3 or 4. + *) + | fixupZip (LEFT(R, xk, x, T(B, a, yk, y, b), p), t) = + (false, zip (p, T(B, t, xk, x, T(R, a, yk, y, b)))) + | fixupZip (LEFT(B, xk, x, T(B, a, yk, y, b), p), t) = + fixupZip (p, T(B, t, xk, x, T(R, a, yk, y, b))) + | fixupZip (RIGHT(R, T(B, a, xk, x, b), yk, y, p), t) = + (false, zip (p, T(B, T(R, a, xk, x, b), yk, y, t))) + | fixupZip (RIGHT(B, T(B, a, xk, x, b), yk, y, p), t) = + fixupZip (p, T(B, T(R, a, xk, x, b), yk, y, t)) + (* push deficit up the tree by recoloring a black node as red *) + | fixupZip (LEFT(_, yk, y, E, p), t) = fixupZip (p, T(R, t, yk, y, E)) + | fixupZip (RIGHT(_, E, yk, y, p), t) = fixupZip (p, T(R, E, yk, y, t)) + (* impossible cases that violate the red invariant *) + | fixupZip _ = raise Fail "Red invariant violation" + (* delete the minimum value from a non-empty tree, returning a 4-tuple + * (key, elem, bd, tr), where key is the minimum key, elem is the element + * named by key, tr is the residual tree with elem removed, and bd is true + * if tr has a black-depth that is less than the original tree. + *) + fun delMin (T(R, E, yk, y, b), p) = + (* replace the node by its right subtree (which must be E) *) + (yk, y, false, zip(p, b)) + | delMin (T(B, E, yk, y, T(R, a', yk', y', b')), p) = + (* replace the node with its right child, while recoloring the child black to + * preserve the black invariant. + *) + (yk, y, false, zip (p, T(B, a', yk', y', b'))) + | delMin (T(B, E, yk, y, E), p) = let + (* delete the node, which reduces the black-depth by one, so we attempt to fix + * the deficit on the path back. + *) + val (blkDeficit, t) = fixupZip (p, E) + in + (yk, y, blkDeficit, t) + end + | delMin (T(color, a, yk, y, b), z) = delMin(a, LEFT(color, yk, y, b, z)) + | delMin (E, _) = raise Match + fun del (E, p) = NOT_FOUND + | del (T(color, a, yk, y, b), p) = + if (k < yk) + then del (a, LEFT(color, yk, y, b, p)) + else if (k = yk) + then (case (color, a, b) + of (R, E, E) => FOUND(y, zip(p, E)) + | (B, E, E) => FOUND(y, #2 (fixupZip (p, E))) + | (_, T(_, a', yk', y', b'), E) => + (* node is black and left child is red; we replace the node with its + * left child recolored to black. + *) + FOUND(y, zip(p, T(B, a', yk', y', b'))) + | (_, E, T(_, a', yk', y', b')) => + (* node is black and right child is red; we replace the node with its + * right child recolored to black. + *) + FOUND(y, zip(p, T(B, a', yk', y', b'))) + | _ => let + val (minKey, minElem, blkDeficit, b) = delMin (b, TOP) + in + if blkDeficit + then FOUND(y, #2 (fixupZip (RIGHT(color, a, minKey, minElem, p), b))) + else FOUND(y, zip (p, T(color, a, minKey, minElem, b))) + end + (* end case *)) + else del (b, RIGHT(color, a, yk, y, p)) + in + del (t, TOP) + end + (* Remove an item, returning new map and value removed. + * Raises LibBase.NotFound if not found. + *) + fun remove (MAP(nItems, t), k) = (case remove' (t, k) + of FOUND(item, T(R, a, xk, x, b)) => (MAP(nItems-1, T(B, a, xk, x, b)), item) + | FOUND(item, t) => (MAP(nItems-1, t), item) + | NOT_FOUND => raise LibBase.NotFound + (* end case *)) + fun findAndRemove (MAP(nItems, t), k) = (case remove' (t, k) + of FOUND(item, T(R, a, xk, x, b)) => SOME(MAP(nItems-1, T(B, a, xk, x, b)), item) + | FOUND(item, t) => SOME(MAP(nItems-1, t), item) + | NOT_FOUND => NONE + (* end case *)) + end (* local *) + + (* return the first item in the map (or NONE if it is empty) *) + fun first (MAP(_, t)) = let + fun f E = NONE + | f (T(_, E, _, x, _)) = SOME x + | f (T(_, a, _, _, _)) = f a + in + f t + end + fun firsti (MAP(_, t)) = let + fun f E = NONE + | f (T(_, E, xk, x, _)) = SOME(xk, x) + | f (T(_, a, _, _, _)) = f a + in + f t + end + + (* Return the number of items in the map *) + fun numItems (MAP(n, _)) = n + + fun foldl f = let + fun foldf (E, accum) = accum + | foldf (T(_, a, _, x, b), accum) = + foldf(b, f(x, foldf(a, accum))) + in + fn init => fn (MAP(_, m)) => foldf(m, init) + end + fun foldli f = let + fun foldf (E, accum) = accum + | foldf (T(_, a, xk, x, b), accum) = + foldf(b, f(xk, x, foldf(a, accum))) + in + fn init => fn (MAP(_, m)) => foldf(m, init) + end + + fun foldr f = let + fun foldf (E, accum) = accum + | foldf (T(_, a, _, x, b), accum) = + foldf(a, f(x, foldf(b, accum))) + in + fn init => fn (MAP(_, m)) => foldf(m, init) + end + fun foldri f = let + fun foldf (E, accum) = accum + | foldf (T(_, a, xk, x, b), accum) = + foldf(a, f(xk, x, foldf(b, accum))) + in + fn init => fn (MAP(_, m)) => foldf(m, init) + end + + fun listItems m = foldr (op ::) [] m + fun listItemsi m = foldri (fn (xk, x, l) => (xk, x)::l) [] m + + (* return an ordered list of the keys in the map. *) + fun listKeys m = foldri (fn (k, _, l) => k::l) [] m + + (* functions for walking the tree while keeping a stack of parents + * to be visited. + *) + fun next ((t as T(_, _, _, _, b))::rest) = (t, left(b, rest)) + | next _ = (E, []) + and left (E, rest) = rest + | left (t as T(_, a, _, _, _), rest) = left(a, t::rest) + fun start m = left(m, []) + + (* Given two maps `f` and `g`, return true if they have equal domains and if + * for every `x` in their domain, `rngEq(f x, g x) = true`. + *) + fun equiv rngEq (MAP(n1, m1), MAP(n2, m2)) = let + fun cmp (t1, t2) = (case (next t1, next t2) + of ((E, _), (E, _)) => true + | ((E, _), _) => false + | (_, (E, _)) => false + | ((T(_, _, xk, x, _), r1), (T(_, _, yk, y, _), r2)) => + (xk = yk) andalso rngEq(x, y) andalso cmp (r1, r2) + (* end case *)) + in + (n1 = n2) andalso cmp (start m1, start m2) + end + + (* Given two maps `f` and `g`, and a comparison function `rngCmp` on their + * range types, return the order of the maps. + *) + fun collate rngCmp (MAP(_, m1), MAP(_, m2)) = let + fun cmp (t1, t2) = (case (next t1, next t2) + of ((E, _), (E, _)) => EQUAL + | ((E, _), _) => LESS + | (_, (E, _)) => GREATER + | ((T(_, _, xk, x, _), r1), (T(_, _, yk, y, _), r2)) => + if (xk = yk) + then (case rngCmp(x, y) + of EQUAL => cmp (r1, r2) + | order => order + (* end case *)) + else if (xk < yk) + then LESS + else GREATER + (* end case *)) + in + cmp (start m1, start m2) + end + + (* Given two maps `f` and `g`, return true if the domain of `g` is a subset + * of the domain of `f` and for every `x` in the domain of `g`, + * `rngEq(g x, f x) = true`. + *) + fun extends rngEx (MAP(n1, m1), MAP(n2, m2)) = let + (* does t1 extend t2? *) + fun cmp (t1, t2) = (case (next t1, next t2) + of ((E, _), (E, _)) => true + | (_, (E, _)) => true + | ((E, _), _) => false + | ((T(_, _, xk, x, _), r1), (T(_, _, yk, y, _), r2)) => + if (xk < yk) then cmp (r1, t2) + else (yk = xk) andalso rngEx(x, y) andalso cmp (r1, r2) + (* end case *)) + in + (n1 >= n2) andalso cmp (start m1, start m2) + end + + (* support for constructing red-black trees in linear time from increasing + * ordered sequences (based on a description by R. Hinze). Note that the + * elements in the digits are ordered with the largest on the left, whereas + * the elements of the trees are ordered with the largest on the right. + *) + datatype 'a digit + = ZERO + | ONE of (Key.ord_key * 'a * 'a tree * 'a digit) + | TWO of (Key.ord_key * 'a * 'a tree * Key.ord_key * 'a * 'a tree * 'a digit) + (* add an item that is guaranteed to be larger than any in l *) + fun addItem (ak, a, l) = let + fun incr (ak, a, t, ZERO) = ONE(ak, a, t, ZERO) + | incr (ak1, a1, t1, ONE(ak2, a2, t2, r)) = + TWO(ak1, a1, t1, ak2, a2, t2, r) + | incr (ak1, a1, t1, TWO(ak2, a2, t2, ak3, a3, t3, r)) = + ONE(ak1, a1, t1, incr(ak2, a2, T(B, t3, ak3, a3, t2), r)) + in + incr(ak, a, E, l) + end + (* link the digits into a tree *) + fun linkAll t = let + fun link (t, ZERO) = t + | link (t1, ONE(ak, a, t2, r)) = link(T(B, t2, ak, a, t1), r) + | link (t, TWO(ak1, a1, t1, ak2, a2, t2, r)) = + link(T(B, T(R, t2, ak2, a2, t1), ak1, a1, t), r) + in + link (E, t) + end + + local + fun wrap f (MAP(_, m1), MAP(_, m2)) = let + val (n, result) = f (start m1, start m2, 0, ZERO) + in + MAP(n, linkAll result) + end + fun ins ((E, _), n, result) = (n, result) + | ins ((T(_, _, xk, x, _), r), n, result) = + ins(next r, n+1, addItem(xk, x, result)) + in + + (* return a map whose domain is the union of the domains of the two input + * maps, using the supplied function to define the map on elements that + * are in both domains. + *) + fun unionWith mergeFn = let + fun union (t1, t2, n, result) = (case (next t1, next t2) + of ((E, _), (E, _)) => (n, result) + | ((E, _), t2) => ins(t2, n, result) + | (t1, (E, _)) => ins(t1, n, result) + | ((T(_, _, xk, x, _), r1), (T(_, _, yk, y, _), r2)) => + if (xk < yk) + then union (r1, t2, n+1, addItem(xk, x, result)) + else if (xk = yk) + then union (r1, r2, n+1, addItem(xk, mergeFn(x, y), result)) + else union (t1, r2, n+1, addItem(yk, y, result)) + (* end case *)) + in + wrap union + end + fun unionWithi mergeFn = let + fun union (t1, t2, n, result) = (case (next t1, next t2) + of ((E, _), (E, _)) => (n, result) + | ((E, _), t2) => ins(t2, n, result) + | (t1, (E, _)) => ins(t1, n, result) + | ((T(_, _, xk, x, _), r1), (T(_, _, yk, y, _), r2)) => + if (xk < yk) + then union (r1, t2, n+1, addItem(xk, x, result)) + else if (xk = yk) + then + union (r1, r2, n+1, addItem(xk, mergeFn(xk, x, y), result)) + else union (t1, r2, n+1, addItem(yk, y, result)) + (* end case *)) + in + wrap union + end + + (* return a map whose domain is the intersection of the domains of the + * two input maps, using the supplied function to define the range. + *) + fun intersectWith mergeFn = let + fun intersect (t1, t2, n, result) = (case (next t1, next t2) + of ((T(_, _, xk, x, _), r1), (T(_, _, yk, y, _), r2)) => + if (xk < yk) + then intersect (r1, t2, n, result) + else if (xk = yk) + then intersect ( + r1, r2, n+1, addItem(xk, mergeFn(x, y), result)) + else intersect (t1, r2, n, result) + | _ => (n, result) + (* end case *)) + in + wrap intersect + end + fun intersectWithi mergeFn = let + fun intersect (t1, t2, n, result) = (case (next t1, next t2) + of ((T(_, _, xk, x, _), r1), (T(_, _, yk, y, _), r2)) => + if (xk < yk) + then intersect (r1, t2, n, result) + else if (xk = yk) + then intersect (r1, r2, n+1, + addItem(xk, mergeFn(xk, x, y), result)) + else intersect (t1, r2, n, result) + | _ => (n, result) + (* end case *)) + in + wrap intersect + end + + fun mergeWith mergeFn = let + fun merge (t1, t2, n, result) = (case (next t1, next t2) + of ((E, _), (E, _)) => (n, result) + | ((E, _), (T(_, _, yk, y, _), r2)) => + mergef(yk, NONE, SOME y, t1, r2, n, result) + | ((T(_, _, xk, x, _), r1), (E, _)) => + mergef(xk, SOME x, NONE, r1, t2, n, result) + | ((T(_, _, xk, x, _), r1), (T(_, _, yk, y, _), r2)) => + if (xk < yk) + then mergef(xk, SOME x, NONE, r1, t2, n, result) + else if (xk = yk) + then mergef(xk, SOME x, SOME y, r1, r2, n, result) + else mergef(yk, NONE, SOME y, t1, r2, n, result) + (* end case *)) + and mergef (k, x1, x2, r1, r2, n, result) = (case mergeFn(x1, x2) + of NONE => merge (r1, r2, n, result) + | SOME y => merge (r1, r2, n+1, addItem(k, y, result)) + (* end case *)) + in + wrap merge + end + fun mergeWithi mergeFn = let + fun merge (t1, t2, n, result) = (case (next t1, next t2) + of ((E, _), (E, _)) => (n, result) + | ((E, _), (T(_, _, yk, y, _), r2)) => + mergef(yk, NONE, SOME y, t1, r2, n, result) + | ((T(_, _, xk, x, _), r1), (E, _)) => + mergef(xk, SOME x, NONE, r1, t2, n, result) + | ((T(_, _, xk, x, _), r1), (T(_, _, yk, y, _), r2)) => + if (xk < yk) + then mergef(xk, SOME x, NONE, r1, t2, n, result) + else if (xk = yk) + then mergef(xk, SOME x, SOME y, r1, r2, n, result) + else mergef(yk, NONE, SOME y, t1, r2, n, result) + (* end case *)) + and mergef (k, x1, x2, r1, r2, n, result) = (case mergeFn(k, x1, x2) + of NONE => merge (r1, r2, n, result) + | SOME y => merge (r1, r2, n+1, addItem(k, y, result)) + (* end case *)) + in + wrap merge + end + end (* local *) + + fun app f = let + fun appf E = () + | appf (T(_, a, _, x, b)) = (appf a; f x; appf b) + in + fn (MAP(_, m)) => appf m + end + fun appi f = let + fun appf E = () + | appf (T(_, a, xk, x, b)) = (appf a; f(xk, x); appf b) + in + fn (MAP(_, m)) => appf m + end + + fun map f = let + fun mapf E = E + | mapf (T(color, a, xk, x, b)) = + T(color, mapf a, xk, f x, mapf b) + in + fn (MAP(n, m)) => MAP(n, mapf m) + end + fun mapi f = let + fun mapf E = E + | mapf (T(color, a, xk, x, b)) = + T(color, mapf a, xk, f(xk, x), mapf b) + in + fn (MAP(n, m)) => MAP(n, mapf m) + end + + (* Filter out those elements of the map that do not satisfy the + * predicate. The filtering is done in increasing map order. + *) + fun filter pred (MAP(_, t)) = let + fun walk (E, n, result) = (n, result) + | walk (T(_, a, xk, x, b), n, result) = let + val (n, result) = walk(a, n, result) + in + if (pred x) + then walk(b, n+1, addItem(xk, x, result)) + else walk(b, n, result) + end + val (n, result) = walk (t, 0, ZERO) + in + MAP(n, linkAll result) + end + fun filteri pred (MAP(_, t)) = let + fun walk (E, n, result) = (n, result) + | walk (T(_, a, xk, x, b), n, result) = let + val (n, result) = walk(a, n, result) + in + if (pred(xk, x)) + then walk(b, n+1, addItem(xk, x, result)) + else walk(b, n, result) + end + val (n, result) = walk (t, 0, ZERO) + in + MAP(n, linkAll result) + end + + (* map a partial function over the elements of a map in increasing + * map order. + *) + fun mapPartial f = let + fun f' (xk, x, m) = (case f x + of NONE => m + | (SOME y) => insert(m, xk, y) + (* end case *)) + in + foldli f' empty + end + fun mapPartiali f = let + fun f' (xk, x, m) = (case f(xk, x) + of NONE => m + | (SOME y) => insert(m, xk, y) + (* end case *)) + in + foldli f' empty + end + + (* check the elements of a map with a predicate and return true if + * any element satisfies the predicate. Return false otherwise. + * Elements are checked in key order. + *) + fun exists pred = let + fun exists' E = false + | exists' (T(_, a, _, x, b)) = exists' a orelse pred x orelse exists' b + in + fn (MAP(_, m)) => exists' m + end + fun existsi pred = let + fun exists' E = false + | exists' (T(_, a, k, x, b)) = exists' a orelse pred(k, x) orelse exists' b + in + fn (MAP(_, m)) => exists' m + end + + (* check the elements of a map with a predicate and return true if + * they all satisfy the predicate. Return false otherwise. Elements + * are checked in key order. + *) + fun all pred = let + fun all' E = true + | all' (T(_, a, _, x, b)) = all' a andalso pred x andalso all' b + in + fn (MAP(_, m)) => all' m + end + fun alli pred = let + fun all' E = true + | all' (T(_, a, k, x, b)) = all' a andalso pred(k, x) andalso all' b + in + fn (MAP(_, m)) => all' m + end + + end (* structure WordRedBlackMap *) diff --git a/smlnj-lib/Util/word-redblack-set.sml b/smlnj-lib/Util/word-redblack-set.sml new file mode 100644 index 0000000..75c0b46 --- /dev/null +++ b/smlnj-lib/Util/word-redblack-set.sml @@ -0,0 +1,515 @@ +(* word-redblack-set.sml + * + * COPYRIGHT (c) 2014 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * COPYRIGHT (c) 1999 Bell Labs, Lucent Technologies. + * + * This code is based on Chris Okasaki's implementation of + * red-black trees. The linear-time tree construction code is + * based on the paper "Constructing red-black trees" by Hinze, + * and the delete function is based on the description in Cormen, + * Leiserson, and Rivest. + * + * A red-black tree should satisfy the following two invariants: + * + * Red Invariant: each red node has black children (empty nodes are + * considered black). + * + * Black Invariant: each path from the root to an empty node has the + * same number of black nodes (the tree's black height). + * + * The Black invariant implies that any node with only one child + * will be black and its child will be a red leaf. + *) + +structure WordRedBlackSet :> ORD_SET where type Key.ord_key = word = + struct + + structure Key = + struct + type ord_key = word + val compare = Word.compare + end + + type item = Key.ord_key + + datatype color = R | B + + datatype tree + = E + | T of (color * tree * item * tree) + + datatype set = SET of (int * tree) + + fun isEmpty (SET(_, E)) = true + | isEmpty _ = false + + val empty = SET(0, E) + + fun minItem (SET(_, tr)) = let + fun min E = raise Empty + | min (T(_, E, item, _)) = item + | min (T(_, tr, _, _)) = min tr + in + min tr + end + + fun maxItem (SET(_, tr)) = let + fun max E = raise Empty + | max (T(_, _, item, E)) = item + | max (T(_, _, _, tr)) = max tr + in + max tr + end + + fun singleton x = SET(1, T(B, E, x, E)) + + fun add (SET(nItems, m), x) = let + val nItems' = ref nItems + fun ins E = (nItems' := nItems+1; T(R, E, x, E)) + | ins (s as T(color, a, y, b)) = + if (x < y) + then (case a + of T(R, c, z, d) => + if (x < z) + then (case ins c + of T(R, e, w, f) => T(R, T(B,e,w,f), z, T(B,d,y,b)) + | c => T(B, T(R,c,z,d), y, b) + (* end case *)) + else if (x = z) + then T(color, T(R, c, x, d), y, b) + else (case ins d + of T(R, e, w, f) => T(R, T(B,c,z,e), w, T(B,f,y,b)) + | d => T(B, T(R,c,z,d), y, b) + (* end case *)) + | _ => T(B, ins a, y, b) + (* end case *)) + else if (x = y) + then T(color, a, x, b) + else (case b + of T(R, c, z, d) => + if (x < z) + then (case ins c + of T(R, e, w, f) => T(R, T(B,a,y,e), w, T(B,f,z,d)) + | c => T(B, a, y, T(R,c,z,d)) + (* end case *)) + else if (x = z) + then T(color, a, y, T(R, c, x, d)) + else (case ins d + of T(R, e, w, f) => T(R, T(B,a,y,c), z, T(B,e,w,f)) + | d => T(B, a, y, T(R,c,z,d)) + (* end case *)) + | _ => T(B, a, y, ins b) + (* end case *)) + val T(_, a, y, b) = ins m + in + SET(!nItems', T(B, a, y, b)) + end + fun add' (x, m) = add (m, x) + + fun addList (s, []) = s + | addList (s, x::r) = addList(add(s, x), r) + + (* Remove an item. Raises LibBase.NotFound if not found. *) + local + datatype zipper + = TOP + | LEFT of (color * item * tree * zipper) + | RIGHT of (color * tree * item * zipper) + in + fun delete (SET(nItems, t), k) = let + (* zip the zipper *) + fun zip (TOP, t) = t + | zip (LEFT(color, x, b, p), a) = zip(p, T(color, a, x, b)) + | zip (RIGHT(color, a, x, p), b) = zip(p, T(color, a, x, b)) + (* zip the zipper while resolving a black deficit *) + fun fixupZip (TOP, t) = (true, t) + (* case 1 from CLR *) + | fixupZip (LEFT(B, x, T(R, a, y, b), p), t) = (case a + of T(_, T(R, a11, w, a12), z, a2) => (* case 1L ==> case 3L ==> case 4L *) + (false, zip (p, T(B, T(R, T(B, t, x, a11), w, T(B, a12, z, a2)), y, b))) + | T(_, a1, z, T(R, a21, w, t22)) => (* case 1L ==> case 4L *) + (false, zip (p, T(B, T(R, T(B, t, x, a1), z, T(B, a21, w, t22)), y, b))) + | T(_, a1, z, a2) => (* case 1L ==> case 2L; rotate + recolor fixes deficit *) + (false, zip (p, T(B, T(B, t, x, T(R, a1, z, a2)), y, b))) + | _ => fixupZip (LEFT(R, x, a, LEFT(B, y, b, p)), t) + (* end case *)) + | fixupZip (RIGHT(B, T(R, a, x, b), y, p), t) = (case b + of T(_, b1, z, T(R, b21, w, b22)) => (* case 1R ==> case 3R ==> case 4R *) + (false, zip (p, T(B, a, x, T(R, T(B, b1, z, b21), w, T(B, b22, y, t))))) + | T(_, T(R, b11, w, b12), z, b2) => (* case 1R ==> case 4R *) + (false, zip (p, T(B, a, x, T(R, T(B, b11, w, b12), z, T(B, b2, y, t))))) + | T(_, b1, z, b2) => (* case 1L ==> case 2L; rotate + recolor fixes deficit *) + (false, zip (p, T(B, a, x, T(B, T(R, b1, z, b2), y, t)))) + | _ => fixupZip (RIGHT(R, b, y, RIGHT(B, a, x, p)), t) + (* end case *)) + (* case 3 from CLR *) + | fixupZip (LEFT(color, x, T(B, T(R, a1, y, a2), z, b), p), t) = + (* case 3L ==> case 4L *) + (false, zip (p, T(color, T(B, t, x, a1), y, T(B, a2, z, b)))) + | fixupZip (RIGHT(color, T(B, a, x, T(R, b1, y, b2)), z, p), t) = + (* case 3R ==> case 4R; rotate, recolor, plus rotate fixes deficit *) + (false, zip (p, T(color, T(B, a, x, b1), y, T(B, b2, z, t)))) + (* case 4 from CLR *) + | fixupZip (LEFT(color, x, T(B, a, y, T(R, b1, z, b2)), p), t) = + (false, zip (p, T(color, T(B, t, x, a), y, T(B, b1, z, b2)))) + | fixupZip (RIGHT(color, T(B, T(R, a1, z, a2), x, b), y, p), t) = + (false, zip (p, T(color, T(B, a1, z, a2), x, T(B, b, y, t)))) + (* case 2 from CLR; note that "a" and "b" are guaranteed to be black, since we did + * not match cases 3 or 4. + *) + | fixupZip (LEFT(R, x, T(B, a, y, b), p), t) = + (false, zip (p, T(B, t, x, T(R, a, y, b)))) + | fixupZip (LEFT(B, x, T(B, a, y, b), p), t) = + fixupZip (p, T(B, t, x, T(R, a, y, b))) + | fixupZip (RIGHT(R, T(B, a, x, b), y, p), t) = + (false, zip (p, T(B, T(R, a, x, b), y, t))) + | fixupZip (RIGHT(B, T(B, a, x, b), y, p), t) = + fixupZip (p, T(B, T(R, a, x, b), y, t)) + (* push deficit up the tree by recoloring a black node as red *) + | fixupZip (LEFT(_, y, E, p), t) = fixupZip (p, T(R, t, y, E)) + | fixupZip (RIGHT(_, E, y, p), t) = fixupZip (p, T(R, E, y, t)) + (* impossible cases that violate the red invariant *) + | fixupZip _ = raise Fail "Red invariant violation" + (* delete the minimum value from a non-empty tree, returning a triple + * (elem, bd, tr), where elem is the minimum element, tr is the residual + * tree with elem removed, and bd is true if tr has a black-depth that is + * less than the original tree. + *) + fun delMin (T(R, E, y, b), p) = + (* replace the node by its right subtree (which must be E) *) + (y, false, zip(p, b)) + | delMin (T(B, E, y, T(R, a', y', b')), p) = + (* replace the node with its right child, while recoloring the child black to + * preserve the black invariant. + *) + (y, false, zip (p, T(B, a', y', b'))) + | delMin (T(B, E, y, E), p) = let + (* delete the node, which reduces the black-depth by one, so we attempt to fix + * the deficit on the path back. + *) + val (blkDeficit, t) = fixupZip (p, E) + in + (y, blkDeficit, t) + end + | delMin (T(color, a, y, b), z) = delMin(a, LEFT(color, y, b, z)) + | delMin (E, _) = raise Match + fun del (E, z) = raise LibBase.NotFound + | del (T(color, a, y, b), p) = + if (k < y) + then del (a, LEFT(color, y, b, p)) + else if (k = y) + then (case (color, a, b) + of (R, E, E) => zip(p, E) + | (B, E, E) => #2 (fixupZip (p, E)) + | (_, T(_, a', y', b'), E) => + (* node is black and left child is red; we replace the node with its + * left child recolored to black. + *) + zip(p, T(B, a', y', b')) + | (_, E, T(_, a', y', b')) => + (* node is black and right child is red; we replace the node with its + * right child recolored to black. + *) + zip(p, T(B, a', y', b')) + | _ => let + val (minSucc, blkDeficit, b) = delMin (b, TOP) + in + if blkDeficit + then #2 (fixupZip (RIGHT(color, a, minSucc, p), b)) + else zip (p, T(color, a, minSucc, b)) + end + (* end case *)) + else del (b, RIGHT(color, a, y, p)) + in + case del(t, TOP) + of T(R, a, x, b) => SET(nItems-1, T(B, a, x, b)) + | t => SET(nItems-1, t) + (* end case *) + end + end (* local *) + + (* Return true if and only if item is an element in the set *) + fun member (SET(_, t), k) = let + fun find' E = false + | find' (T(_, a, y, b)) = + (k = y) orelse ((k < y) andalso find' a) orelse find' b + in + find' t + end + + (* Return the number of items in the map *) + fun numItems (SET(n, _)) = n + + fun foldl f = let + fun foldf (E, accum) = accum + | foldf (T(_, a, x, b), accum) = + foldf(b, f(x, foldf(a, accum))) + in + fn init => fn (SET(_, m)) => foldf(m, init) + end + + fun foldr f = let + fun foldf (E, accum) = accum + | foldf (T(_, a, x, b), accum) = + foldf(a, f(x, foldf(b, accum))) + in + fn init => fn (SET(_, m)) => foldf(m, init) + end + + (* return an ordered list of the items in the set. *) + fun toList s = foldr (fn (x, l) => x::l) [] s + + (* functions for walking the tree while keeping a stack of parents + * to be visited. + *) + fun next ((t as T(_, _, _, b))::rest) = (t, left(b, rest)) + | next _ = (E, []) + and left (E, rest) = rest + | left (t as T(_, a, _, _), rest) = left(a, t::rest) + fun start m = left(m, []) + + (* Return true if and only if the two sets are equal *) + fun equal (SET(_, s1), SET(_, s2)) = let + fun cmp (t1, t2) = (case (next t1, next t2) + of ((E, _), (E, _)) => true + | ((E, _), _) => false + | (_, (E, _)) => false + | ((T(_, _, x, _), r1), (T(_, _, y, _), r2)) => + (x = y) andalso cmp (r1, r2) + (* end case *)) + in + cmp (start s1, start s2) + end + + (* Return the lexical order of two sets *) + fun compare (SET(_, s1), SET(_, s2)) = let + fun cmp (t1, t2) = (case (next t1, next t2) + of ((E, _), (E, _)) => EQUAL + | ((E, _), _) => LESS + | (_, (E, _)) => GREATER + | ((T(_, _, x, _), r1), (T(_, _, y, _), r2)) => + if (x = y) + then cmp (r1, r2) + else if (x < y) + then LESS + else GREATER + (* end case *)) + in + cmp (start s1, start s2) + end + + (* Return true if and only if the first set is a subset of the second *) + fun isSubset (SET(_, s1), SET(_, s2)) = let + fun cmp (t1, t2) = (case (next t1, next t2) + of ((E, _), (E, _)) => true + | ((E, _), _) => true + | (_, (E, _)) => false + | ((T(_, _, x, _), r1), (T(_, _, y, _), r2)) => + ((x = y) andalso cmp (r1, r2)) + orelse ((x > y) andalso cmp (t1, r2)) + (* end case *)) + in + cmp (start s1, start s2) + end + + (* Return true if the two sets are disjoint *) + fun disjoint (SET(0, _), _) = true + | disjoint (_, SET(0, _)) = true + | disjoint (SET(_, s1), SET(_, s2)) = let + fun walk ((E, _), _) = true + | walk (_, (E, _)) = true + | walk (t1 as (T(_, _, x, _), r1), t2 as (T(_, _, y, _), r2)) = + ((x < y) andalso walk (next r1, t2)) + orelse ((x > y) andalso walk (t1, next r2)) + in + walk (next (start s1), next (start s2)) + end + + (* support for constructing red-black trees in linear time from increasing + * ordered sequences (based on a description by R. Hinze). Note that the + * elements in the digits are ordered with the largest on the left, whereas + * the elements of the trees are ordered with the largest on the right. + *) + datatype digit + = ZERO + | ONE of (item * tree * digit) + | TWO of (item * tree * item * tree * digit) + (* add an item that is guaranteed to be larger than any in l *) + fun addItem (a, l) = let + fun incr (a, t, ZERO) = ONE(a, t, ZERO) + | incr (a1, t1, ONE(a2, t2, r)) = TWO(a1, t1, a2, t2, r) + | incr (a1, t1, TWO(a2, t2, a3, t3, r)) = + ONE(a1, t1, incr(a2, T(B, t3, a3, t2), r)) + in + incr(a, E, l) + end + (* link the digits into a tree *) + fun linkAll t = let + fun link (t, ZERO) = t + | link (t1, ONE(a, t2, r)) = link(T(B, t2, a, t1), r) + | link (t, TWO(a1, t1, a2, t2, r)) = + link(T(B, T(R, t2, a2, t1), a1, t), r) + in + link (E, t) + end + + (* create a set from a list of items; this function works in linear time if the list + * is in increasing order. + *) + fun fromList [] = empty + | fromList (first::rest) = let + fun add (prev, x::xs, n, accum) = if (prev < x) + then add(x, xs, n+1, addItem(x, accum)) + else (* list not in order, so fall back to addList code *) + addList(SET(n, linkAll accum), x::xs) + | add (_, [], n, accum) = SET(n, linkAll accum) + in + add (first, rest, 1, addItem(first, ZERO)) + end + + (* return the union of the two sets *) + fun union (SET(_, s1), SET(_, s2)) = let + fun ins ((E, _), n, result) = (n, result) + | ins ((T(_, _, x, _), r), n, result) = + ins(next r, n+1, addItem(x, result)) + fun union' (t1, t2, n, result) = (case (next t1, next t2) + of ((E, _), (E, _)) => (n, result) + | ((E, _), t2) => ins(t2, n, result) + | (t1, (E, _)) => ins(t1, n, result) + | ((T(_, _, x, _), r1), (T(_, _, y, _), r2)) => + if (x < y) + then union' (r1, t2, n+1, addItem(x, result)) + else if (x = y) + then union' (r1, r2, n+1, addItem(x, result)) + else union' (t1, r2, n+1, addItem(y, result)) + (* end case *)) + val (n, result) = union' (start s1, start s2, 0, ZERO) + in + SET(n, linkAll result) + end + + (* return the intersection of the two sets *) + fun intersection (SET(_, s1), SET(_, s2)) = let + fun intersect (t1, t2, n, result) = (case (next t1, next t2) + of ((T(_, _, x, _), r1), (T(_, _, y, _), r2)) => + if (x < y) + then intersect (r1, t2, n, result) + else if (x = y) + then intersect (r1, r2, n+1, addItem(x, result)) + else intersect (t1, r2, n, result) + | _ => (n, result) + (* end case *)) + val (n, result) = intersect (start s1, start s2, 0, ZERO) + in + SET(n, linkAll result) + end + + (* return the set difference *) + fun difference (SET(_, s1), SET(_, s2)) = let + fun ins ((E, _), n, result) = (n, result) + | ins ((T(_, _, x, _), r), n, result) = + ins(next r, n+1, addItem(x, result)) + fun diff (t1, t2, n, result) = (case (next t1, next t2) + of ((E, _), _) => (n, result) + | (t1, (E, _)) => ins(t1, n, result) + | ((T(_, _, x, _), r1), (T(_, _, y, _), r2)) => + if (x < y) + then diff (r1, t2, n+1, addItem(x, result)) + else if (x = y) + then diff (r1, r2, n, result) + else diff (t1, r2, n, result) + (* end case *)) + val (n, result) = diff (start s1, start s2, 0, ZERO) + in + SET(n, linkAll result) + end + + fun subtract (s, item) = difference (s, singleton item) + fun subtract' (item, s) = subtract (s, item) + + fun subtractList (l, items) = let + val items' = List.foldl (fn (x, set) => add(set, x)) (SET(0, E)) items + in + difference (l, items') + end + + fun app f = let + fun appf E = () + | appf (T(_, a, x, b)) = (appf a; f x; appf b) + in + fn (SET(_, m)) => appf m + end + + fun map f = let + fun addf (x, m) = add(m, f x) + in + foldl addf empty + end + + fun mapPartial f = let + fun f' (x, acc) = (case f x of SOME x' => add(acc, x') | NONE => acc) + in + foldl f' empty + end + + (* Filter out those elements of the set that do not satisfy the + * predicate. The filtering is done in increasing map order. + *) + fun filter pred (SET(_, t)) = let + fun walk (E, n, result) = (n, result) + | walk (T(_, a, x, b), n, result) = let + val (n, result) = walk(a, n, result) + in + if (pred x) + then walk(b, n+1, addItem(x, result)) + else walk(b, n, result) + end + val (n, result) = walk (t, 0, ZERO) + in + SET(n, linkAll result) + end + + fun partition pred (SET(_, t)) = let + fun walk (E, n1, result1, n2, result2) = (n1, result1, n2, result2) + | walk (T(_, a, x, b), n1, result1, n2, result2) = let + val (n1, result1, n2, result2) = walk(a, n1, result1, n2, result2) + in + if (pred x) + then walk(b, n1+1, addItem(x, result1), n2, result2) + else walk(b, n1, result1, n2+1, addItem(x, result2)) + end + val (n1, result1, n2, result2) = walk (t, 0, ZERO, 0, ZERO) + in + (SET(n1, linkAll result1), SET(n2, linkAll result2)) + end + + fun exists pred = let + fun test E = false + | test (T(_, a, x, b)) = test a orelse pred x orelse test b + in + fn (SET(_, t)) => test t + end + + fun all pred = let + fun test E = true + | test (T(_, a, x, b)) = test a andalso pred x andalso test b + in + fn (SET(_, t)) => test t + end + + fun find pred = let + fun test E = NONE + | test (T(_, a, x, b)) = (case test a + of NONE => if pred x then SOME x else test b + | someItem => someItem + (* end case *)) + in + fn (SET(_, t)) => test t + end + + (* deprecated *) + val listItems = toList + + end; diff --git a/smlnj-lib/XML/.cm/GUID/generic-xml-tree.sml b/smlnj-lib/XML/.cm/GUID/generic-xml-tree.sml new file mode 100644 index 0000000..e9b56f2 --- /dev/null +++ b/smlnj-lib/XML/.cm/GUID/generic-xml-tree.sml @@ -0,0 +1 @@ +guid-$/(xml-lib.cm):generic-xml-tree.sml-1714016094.579 diff --git a/smlnj-lib/XML/.cm/GUID/xml-lexer.lex.sml b/smlnj-lib/XML/.cm/GUID/xml-lexer.lex.sml new file mode 100644 index 0000000..40c8833 --- /dev/null +++ b/smlnj-lib/XML/.cm/GUID/xml-lexer.lex.sml @@ -0,0 +1 @@ +guid-$/(xml-lib.cm):xml-lexer.lex.sml-1714016093.881 diff --git a/smlnj-lib/XML/.cm/GUID/xml-parser-fn.sml b/smlnj-lib/XML/.cm/GUID/xml-parser-fn.sml new file mode 100644 index 0000000..e1d6d09 --- /dev/null +++ b/smlnj-lib/XML/.cm/GUID/xml-parser-fn.sml @@ -0,0 +1 @@ +guid-$/(xml-lib.cm):xml-parser-fn.sml-1714016094.498 diff --git a/smlnj-lib/XML/.cm/GUID/xml-schema-sig.sml b/smlnj-lib/XML/.cm/GUID/xml-schema-sig.sml new file mode 100644 index 0000000..07069b9 --- /dev/null +++ b/smlnj-lib/XML/.cm/GUID/xml-schema-sig.sml @@ -0,0 +1 @@ +guid-$/(xml-lib.cm):xml-schema-sig.sml-1714016093.862 diff --git a/smlnj-lib/XML/.cm/GUID/xml-tokens.sml b/smlnj-lib/XML/.cm/GUID/xml-tokens.sml new file mode 100644 index 0000000..beb29a6 --- /dev/null +++ b/smlnj-lib/XML/.cm/GUID/xml-tokens.sml @@ -0,0 +1 @@ +guid-$/(xml-lib.cm):xml-tokens.sml-1714016093.870 diff --git a/smlnj-lib/XML/.cm/GUID/xml-tree-fn.sml b/smlnj-lib/XML/.cm/GUID/xml-tree-fn.sml new file mode 100644 index 0000000..c62b228 --- /dev/null +++ b/smlnj-lib/XML/.cm/GUID/xml-tree-fn.sml @@ -0,0 +1 @@ +guid-$/(xml-lib.cm):xml-tree-fn.sml-1714016094.574 diff --git a/smlnj-lib/XML/.cm/GUID/xml-tree-sig.sml b/smlnj-lib/XML/.cm/GUID/xml-tree-sig.sml new file mode 100644 index 0000000..b2c75e3 --- /dev/null +++ b/smlnj-lib/XML/.cm/GUID/xml-tree-sig.sml @@ -0,0 +1 @@ +guid-$/(xml-lib.cm):xml-tree-sig.sml-1714016093.866 diff --git a/smlnj-lib/XML/.cm/SKEL/generic-xml-tree.sml b/smlnj-lib/XML/.cm/SKEL/generic-xml-tree.sml new file mode 100644 index 0000000..179f961 --- /dev/null +++ b/smlnj-lib/XML/.cm/SKEL/generic-xml-tree.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f3d"Char"d"CharVector"d"Atom"ad"GenericXMLTree"jh1bd2aSchema"h0aTree"jgp18gp1e"XMLTreeFn"egp1gp1c"XML_TREE" \ No newline at end of file diff --git a/smlnj-lib/XML/.cm/SKEL/xml-lexer.lex.sml b/smlnj-lib/XML/.cm/SKEL/xml-lexer.lex.sml new file mode 100644 index 0000000..768a8f2 --- /dev/null +++ b/smlnj-lib/XML/.cm/SKEL/xml-lexer.lex.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f6ULexBuffer"CAntlrStreamPos"d"UTF8"d"Word"List"d"Int"Cd"Option"String"d"Substring"d"TextIO"Vector"Nad"XMLLexer"h1bd2aUserDeclarations"h1aT"gp1d"XMLTokens"bd2egp1f7C1&Nf0f3 \ No newline at end of file diff --git a/smlnj-lib/XML/.cm/SKEL/xml-parser-fn.sml b/smlnj-lib/XML/.cm/SKEL/xml-parser-fn.sml new file mode 100644 index 0000000..893a69a --- /dev/null +++ b/smlnj-lib/XML/.cm/SKEL/xml-parser-fn.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2aXML_PARSER"h1aXMLTree"XML_TREE"ae"XMLParserFn"i2aXT"f7=d"AntlrStreamPos"Cd"XMLLexer"XMLTokens"d"List"d"String"d"TextIO"Njh3agp1=ad"XS"gp2=d"Schema"ad"Tok"gp1%gp1 \ No newline at end of file diff --git a/smlnj-lib/XML/.cm/SKEL/xml-schema-sig.sml b/smlnj-lib/XML/.cm/SKEL/xml-schema-sig.sml new file mode 100644 index 0000000..95b5303 --- /dev/null +++ b/smlnj-lib/XML/.cm/SKEL/xml-schema-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ac"XML_SCHEMA"h0 \ No newline at end of file diff --git a/smlnj-lib/XML/.cm/SKEL/xml-tokens.sml b/smlnj-lib/XML/.cm/SKEL/xml-tokens.sml new file mode 100644 index 0000000..072a653 --- /dev/null +++ b/smlnj-lib/XML/.cm/SKEL/xml-tokens.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ad"XMLTokens"h0 \ No newline at end of file diff --git a/smlnj-lib/XML/.cm/SKEL/xml-tree-fn.sml b/smlnj-lib/XML/.cm/SKEL/xml-tree-fn.sml new file mode 100644 index 0000000..99dda7f --- /dev/null +++ b/smlnj-lib/XML/.cm/SKEL/xml-tree-fn.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ae"XMLTreeFn"i1aSchema"gp1c"XML_SCHEMA"jh1agp1gp1c"XML_TREE" \ No newline at end of file diff --git a/smlnj-lib/XML/.cm/SKEL/xml-tree-sig.sml b/smlnj-lib/XML/.cm/SKEL/xml-tree-sig.sml new file mode 100644 index 0000000..af0666c --- /dev/null +++ b/smlnj-lib/XML/.cm/SKEL/xml-tree-sig.sml @@ -0,0 +1,2 @@ +Skeleton 5 +ac"XML_TREE"h1ad"Schema"gp1c"XML_SCHEMA" \ No newline at end of file diff --git a/smlnj-lib/XML/.cm/amd64-unix/generic-xml-tree.sml b/smlnj-lib/XML/.cm/amd64-unix/generic-xml-tree.sml new file mode 100644 index 0000000..44297e8 Binary files /dev/null and b/smlnj-lib/XML/.cm/amd64-unix/generic-xml-tree.sml differ diff --git a/smlnj-lib/XML/.cm/amd64-unix/xml-lexer.lex.sml b/smlnj-lib/XML/.cm/amd64-unix/xml-lexer.lex.sml new file mode 100644 index 0000000..48a9034 Binary files /dev/null and b/smlnj-lib/XML/.cm/amd64-unix/xml-lexer.lex.sml differ diff --git a/smlnj-lib/XML/.cm/amd64-unix/xml-parser-fn.sml b/smlnj-lib/XML/.cm/amd64-unix/xml-parser-fn.sml new file mode 100644 index 0000000..ec997f2 Binary files /dev/null and b/smlnj-lib/XML/.cm/amd64-unix/xml-parser-fn.sml differ diff --git a/smlnj-lib/XML/.cm/amd64-unix/xml-schema-sig.sml b/smlnj-lib/XML/.cm/amd64-unix/xml-schema-sig.sml new file mode 100644 index 0000000..1e1cc7b Binary files /dev/null and b/smlnj-lib/XML/.cm/amd64-unix/xml-schema-sig.sml differ diff --git a/smlnj-lib/XML/.cm/amd64-unix/xml-tokens.sml b/smlnj-lib/XML/.cm/amd64-unix/xml-tokens.sml new file mode 100644 index 0000000..a5a4df2 Binary files /dev/null and b/smlnj-lib/XML/.cm/amd64-unix/xml-tokens.sml differ diff --git a/smlnj-lib/XML/.cm/amd64-unix/xml-tree-fn.sml b/smlnj-lib/XML/.cm/amd64-unix/xml-tree-fn.sml new file mode 100644 index 0000000..d2a2017 Binary files /dev/null and b/smlnj-lib/XML/.cm/amd64-unix/xml-tree-fn.sml differ diff --git a/smlnj-lib/XML/.cm/amd64-unix/xml-tree-sig.sml b/smlnj-lib/XML/.cm/amd64-unix/xml-tree-sig.sml new file mode 100644 index 0000000..4ec26aa Binary files /dev/null and b/smlnj-lib/XML/.cm/amd64-unix/xml-tree-sig.sml differ diff --git a/smlnj-lib/XML/README b/smlnj-lib/XML/README new file mode 100644 index 0000000..99ff8ad --- /dev/null +++ b/smlnj-lib/XML/README @@ -0,0 +1,22 @@ +This directory contains a small library for parsing XML files. It does +not support validation (e.g., against a DTD or Schema). The basic idea +is that the user supplies a "schema" module that describes the elements +and attribute representation of an XML document. From this, one builds +an XML tree representation + + structure MyXMLTree = XMLTreeFn (MyXMLSchema); + +and an XML parser + + structure MyXMLParser = XMLParserFn (MyXMLTree); + +A generic XML schema (that allows any element name) is provided. + +For a more complete library for XML processing, use the fxp library. + +TODO: + handle internal subsets in "" definitions + handle processing markup ("") + support a SAX-type parsing interface + support a DOM API a la TinyXML (http://www.grinninglizard.com/tinyxml) + or TinyXML-2 (http://www.grinninglizard.com/tinyxml2/) diff --git a/smlnj-lib/XML/generic-xml-tree.sml b/smlnj-lib/XML/generic-xml-tree.sml new file mode 100644 index 0000000..767fec8 --- /dev/null +++ b/smlnj-lib/XML/generic-xml-tree.sml @@ -0,0 +1,43 @@ +(* generic-xml-tree.sml + * + * COPYRIGHT (c) 2013 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * This is a generic instantiation of the XMLTreeFn with a representation of + * elements and attributes as Atom.atom values. It does not preserve whitespace. + *) + +structure GenericXMLTree : XML_TREE + where type Schema.element = Atom.atom + where type Schema.attribute = Atom.atom * string + = struct + local + structure Schema = + struct + + type element = Atom.atom + type attribute = (Atom.atom * string) + + (* create an element; returns NONE if the element name is unrecognized *) + fun element s = SOME(Atom.atom(CharVector.map Char.toUpper s)) + + (* should leading and trailing whitespace be preserved in the content of this element? *) + fun preserveWS _ = false + + (* should comments be preserved *) + fun preserveComment _ = false + + (* equality test *) + val same = Atom.same + + val toString = Atom.toString + + (* create an attribute from a name/value pair *) + fun attribute (id, value) = (Atom.atom id, value) + + end + structure Tree = XMLTreeFn (Schema) + in + open Tree + end (* local *) + end diff --git a/smlnj-lib/XML/xml-lexer.lex b/smlnj-lib/XML/xml-lexer.lex new file mode 100644 index 0000000..a039941 --- /dev/null +++ b/smlnj-lib/XML/xml-lexer.lex @@ -0,0 +1,119 @@ +(* xml-lexer.lex + * + * COPYRIGHT (c) 2013 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * An ML-ULex parser for parsing XML files. + * + * TODO: line ending normalization? + *) + +%name XMLLexer; + +%arg (lexErr); + +%defs ( + structure T = XMLTokens + type lex_result = T.token + fun eof () = T.EOF + +(* list of strings to build attribute values *) + val text : string list ref = ref [] + fun addText s = (text := s :: !text) + fun addDecimalEscape s = addText(UTF8.encode(Word.fromInt(Option.valOf(Int.fromString s)))) + fun addHexEscape s = addText(UTF8.encode(Option.valOf(Word.fromString s))) + fun textToString () = let + val s = String.concat(List.rev(!text)) + in + text := []; s + end + +(* trim m characters from the left and n characters from the right *) + fun trim (m, ss, n) = Substring.string(Substring.triml m (Substring.trimr n ss)) +); + +%let ws = [ \t\n\v\f\r]; +%let digit = [0-9]; +%let alpha = [a-zA-Z]; +%let idstartchr = [a-zA-Z_:]; +%let idchr = ({idstartchr}|[-.0-9]); +%let pubidchr1 = [ \n\n\t] | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]; +%let pubidchr2 = [ \n\n\t] | [a-zA-Z0-9] | [-()+,./:=?;!*#@$_%]; (* without ' *) + +(* the lexer states: + * INITIAL + * COM scanning inside "" => (addText yytext; YYBEGIN INITIAL; T.COM(textToString())); +. => (addText yytext; continue()); + +"<" => (YYBEGIN TAG; T.OPEN_START_TAG); +" (YYBEGIN TAG; T.OPEN_END_TAG); +" (YYBEGIN TAG; T.OPEN_XML_TAG); +" (YYBEGIN DOCTYPE; T.OPEN_DOCTYPE); + +[pP][uU][bB][lL][iI][cC] + => (T.PUBLIC); +[sS][yY][sS][tT][eE][mM] + => (T.SYSTEM); +"\""{pubidchr1}*"\"" => (T.LIT(String.substring(yytext, 1, size yytext - 2))); +"'"{pubidchr2}*"'" => (T.LIT(String.substring(yytext, 1, size yytext - 2))); +">" => (YYBEGIN INITIAL; T.CLOSE_TAG); + +{ws}+ => (skip()); +"?>" => (YYBEGIN INITIAL; T.CLOSE_PI_TAG); +">" => (YYBEGIN INITIAL; T.CLOSE_TAG); +"/>" => (YYBEGIN INITIAL; T.CLOSE_EMPTY_TAG); +"=" => (T.SYM_EQ); +{idstartchr}{idchr}* + => (T.ID yytext); +"\"" => (YYBEGIN LIT1; continue()); +"'" => (YYBEGIN LIT2; continue()); + +"\"" => (YYBEGIN TAG; T.LIT(textToString())); +"\'" => (YYBEGIN TAG; T.LIT(textToString())); +""" => (addText ("\""); continue()); +"<" => (addText ("<"); continue()); +">" => (addText (">"); continue()); +"&" => (addText ("&"); continue()); +"'" => (addText ("'"); continue()); +"&#"[0-9]+";" => (addDecimalEscape(trim(2, yysubstr, 1)); continue()); +"&#x"[a-fA-F0-9]+";" => (addHexEscape(trim(3, yysubstr, 1)); continue()); +[^"<>&]+ => (addText yytext; continue()); +[^'<>&]+ => (addText yytext; continue()); + +(* we handle whitespace specially, so that initial/trailing whitespace can be preserved + * when necessary. + *) +{ws}+ => (T.WS yytext); +[^ \n\t\r<&]+ => (T.TEXT yytext); +""" => (T.TEXT "\""); +"<" => (T.TEXT "<"); +">" => (T.TEXT ">"); +"&" => (T.TEXT "&"); +"'" => (T.TEXT "'"); +"" => (T.CDATA(trim (9, yysubstr, 3))); + +. => (lexErr(yypos, [ + "bad character `", String.toString yytext, "'" + ]); + continue()); +. => (lexErr(yypos, [ + "bad character `", String.toString yytext, "' in DOCTYPE" + ]); + continue()); +. => (lexErr(yypos, [ + "bad character `", String.toString yytext, "' in tag" + ]); + continue()); +. => (lexErr(yypos, [ + "bad character `", String.toString yytext, "' in attribute value" + ]); + continue()); diff --git a/smlnj-lib/XML/xml-lexer.lex.sml b/smlnj-lib/XML/xml-lexer.lex.sml new file mode 100644 index 0000000..d8f7267 --- /dev/null +++ b/smlnj-lib/XML/xml-lexer.lex.sml @@ -0,0 +1,2264 @@ +structure XMLLexer = struct + + datatype yystart_state = +DOCTYPE | COM | LIT2 | LIT1 | INITIAL | TAG + local + + structure UserDeclarations = + struct + + + structure T = XMLTokens + type lex_result = T.token + fun eof () = T.EOF + +(* list of strings to build attribute values *) + val text : string list ref = ref [] + fun addText s = (text := s :: !text) + fun addDecimalEscape s = addText(UTF8.encode(Word.fromInt(Option.valOf(Int.fromString s)))) + fun addHexEscape s = addText(UTF8.encode(Option.valOf(Word.fromString s))) + fun textToString () = let + val s = String.concat(List.rev(!text)) + in + text := []; s + end + +(* trim m characters from the left and n characters from the right *) + fun trim (m, ss, n) = Substring.string(Substring.triml m (Substring.trimr n ss)) + + end + + datatype yymatch + = yyNO_MATCH + | yyMATCH of ULexBuffer.stream * action * yymatch + withtype action = ULexBuffer.stream * yymatch -> UserDeclarations.lex_result + + val yytable : ((UTF8.wchar * UTF8.wchar * int) list * int list) Vector.vector = +Vector.fromList [] + fun yystreamify' p input = ULexBuffer.mkStream (p, input) + + fun yystreamifyReader' p readFn strm = let + val s = ref strm + fun iter(strm, n, accum) = + if n > 1024 then (String.implode (rev accum), strm) + else (case readFn strm + of NONE => (String.implode (rev accum), strm) + | SOME(c, strm') => iter (strm', n+1, c::accum)) + fun input() = let + val (data, strm) = iter(!s, 0, []) + in + s := strm; + data + end + in + yystreamify' p input + end + + fun yystreamifyInstream' p strm = yystreamify' p (fn ()=>TextIO.input strm) + + fun innerLex +(yyarg as lexErr)(yystrm_, yyss_, yysm) = let + (* current start state *) + val yyss = ref yyss_ + fun YYBEGIN ss = (yyss := ss) + (* current input stream *) + val yystrm = ref yystrm_ + fun yysetStrm strm = yystrm := strm + fun yygetPos() = ULexBuffer.getpos (!yystrm) + fun yystreamify input = yystreamify' (yygetPos()) input + fun yystreamifyReader readFn strm = yystreamifyReader' (yygetPos()) readFn strm + fun yystreamifyInstream strm = yystreamifyInstream' (yygetPos()) strm + (* start position of token -- can be updated via skip() *) + val yystartPos = ref (yygetPos()) + (* get one char of input *) + fun yygetc strm = (case ULexBuffer.getu strm + of (SOME (0w10, s')) => + (AntlrStreamPos.markNewLine yysm (ULexBuffer.getpos strm); + SOME (0w10, s')) + | x => x) + fun yygetList getc strm = let + val get1 = UTF8.getu getc + fun iter (strm, accum) = + (case get1 strm + of NONE => rev accum + | SOME (w, strm') => iter (strm', w::accum) + (* end case *)) + in + iter (strm, []) + end + (* create yytext *) + fun yymksubstr(strm) = ULexBuffer.subtract (strm, !yystrm) + fun yymktext(strm) = Substring.string (yymksubstr strm) + fun yymkunicode(strm) = yygetList Substring.getc (yymksubstr strm) + open UserDeclarations + fun lex () = let + fun yystuck (yyNO_MATCH) = raise Fail "lexer reached a stuck state" + | yystuck (yyMATCH (strm, action, old)) = + action (strm, old) + val yypos = yygetPos() + fun yygetlineNo strm = AntlrStreamPos.lineNo yysm (ULexBuffer.getpos strm) + fun yygetcolNo strm = AntlrStreamPos.colNo yysm (ULexBuffer.getpos strm) + fun yyactsToMatches (strm, [], oldMatches) = oldMatches + | yyactsToMatches (strm, act::acts, oldMatches) = + yyMATCH (strm, act, yyactsToMatches (strm, acts, oldMatches)) + fun yygo actTable = + (fn (~1, _, oldMatches) => yystuck oldMatches + | (curState, strm, oldMatches) => let + val (transitions, finals') = Vector.sub (yytable, curState) + val finals = List.map (fn i => Vector.sub (actTable, i)) finals' + fun tryfinal() = + yystuck (yyactsToMatches (strm, finals, oldMatches)) + fun find (c, []) = NONE + | find (c, (c1, c2, s)::ts) = + if c1 <= c andalso c <= c2 then SOME s + else find (c, ts) + in case yygetc strm + of SOME(c, strm') => + (case find (c, transitions) + of NONE => tryfinal() + | SOME n => + yygo actTable + (n, strm', + yyactsToMatches (strm, finals, oldMatches))) + | NONE => tryfinal() + end) + val yylastwasnref = ref (ULexBuffer.lastWasNL (!yystrm)) + fun continue() = let val yylastwasn = !yylastwasnref in +let +fun yyAction0 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; addText yytext; YYBEGIN COM; continue() + end +fun yyAction1 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; addText yytext; YYBEGIN INITIAL; T.COM(textToString()) + end +fun yyAction2 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; addText yytext; continue() + end +fun yyAction3 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN TAG; T.OPEN_START_TAG) +fun yyAction4 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN TAG; T.OPEN_END_TAG) +fun yyAction5 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN TAG; T.OPEN_XML_TAG) +fun yyAction6 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN DOCTYPE; T.OPEN_DOCTYPE) +fun yyAction7 (strm, lastMatch : yymatch) = (yystrm := strm; T.PUBLIC) +fun yyAction8 (strm, lastMatch : yymatch) = (yystrm := strm; T.SYSTEM) +fun yyAction9 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; T.LIT(String.substring(yytext, 1, size yytext - 2)) + end +fun yyAction10 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; T.LIT(String.substring(yytext, 1, size yytext - 2)) + end +fun yyAction11 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN INITIAL; T.CLOSE_TAG) +fun yyAction12 (strm, lastMatch : yymatch) = (yystrm := strm; skip()) +fun yyAction13 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN INITIAL; T.CLOSE_PI_TAG) +fun yyAction14 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN INITIAL; T.CLOSE_TAG) +fun yyAction15 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN INITIAL; T.CLOSE_EMPTY_TAG) +fun yyAction16 (strm, lastMatch : yymatch) = (yystrm := strm; T.SYM_EQ) +fun yyAction17 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; T.ID yytext + end +fun yyAction18 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN LIT1; continue()) +fun yyAction19 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN LIT2; continue()) +fun yyAction20 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN TAG; T.LIT(textToString())) +fun yyAction21 (strm, lastMatch : yymatch) = (yystrm := strm; + YYBEGIN TAG; T.LIT(textToString())) +fun yyAction22 (strm, lastMatch : yymatch) = (yystrm := strm; + addText ("\""); continue()) +fun yyAction23 (strm, lastMatch : yymatch) = (yystrm := strm; + addText ("<"); continue()) +fun yyAction24 (strm, lastMatch : yymatch) = (yystrm := strm; + addText (">"); continue()) +fun yyAction25 (strm, lastMatch : yymatch) = (yystrm := strm; + addText ("&"); continue()) +fun yyAction26 (strm, lastMatch : yymatch) = (yystrm := strm; + addText ("'"); continue()) +fun yyAction27 (strm, lastMatch : yymatch) = let + val yysubstr = yymksubstr(strm) + in + yystrm := strm; addDecimalEscape(trim(2, yysubstr, 1)); continue() + end +fun yyAction28 (strm, lastMatch : yymatch) = let + val yysubstr = yymksubstr(strm) + in + yystrm := strm; addHexEscape(trim(3, yysubstr, 1)); continue() + end +fun yyAction29 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; addText yytext; continue() + end +fun yyAction30 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; addText yytext; continue() + end +fun yyAction31 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; T.WS yytext + end +fun yyAction32 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; T.TEXT yytext + end +fun yyAction33 (strm, lastMatch : yymatch) = (yystrm := strm; T.TEXT "\"") +fun yyAction34 (strm, lastMatch : yymatch) = (yystrm := strm; T.TEXT "<") +fun yyAction35 (strm, lastMatch : yymatch) = (yystrm := strm; T.TEXT ">") +fun yyAction36 (strm, lastMatch : yymatch) = (yystrm := strm; T.TEXT "&") +fun yyAction37 (strm, lastMatch : yymatch) = (yystrm := strm; T.TEXT "'") +fun yyAction38 (strm, lastMatch : yymatch) = let + val yysubstr = yymksubstr(strm) + in + yystrm := strm; T.CDATA(trim (9, yysubstr, 3)) + end +fun yyAction39 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; + lexErr(yypos, [ + "bad character `", String.toString yytext, "'" + ]); + continue() + end +fun yyAction40 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; + lexErr(yypos, [ + "bad character `", String.toString yytext, "' in DOCTYPE" + ]); + continue() + end +fun yyAction41 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; + lexErr(yypos, [ + "bad character `", String.toString yytext, "' in tag" + ]); + continue() + end +fun yyAction42 (strm, lastMatch : yymatch) = let + val yytext = yymktext(strm) + in + yystrm := strm; + lexErr(yypos, [ + "bad character `", String.toString yytext, "' in attribute value" + ]); + continue() + end +fun yyQ128 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction13(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction13(strm, yyNO_MATCH) + (* end case *)) +fun yyQ127 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction41(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx3E + then yyQ128(strm', yyMATCH(strm, yyAction41, yyNO_MATCH)) + else yyAction41(strm, yyNO_MATCH) + (* end case *)) +fun yyQ126 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction14(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction14(strm, yyNO_MATCH) + (* end case *)) +fun yyQ125 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction16(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction16(strm, yyNO_MATCH) + (* end case *)) +fun yyQ14 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction17(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx41 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx41 + then if inp = 0wx2F + then yyAction17(strm, yyNO_MATCH) + else if inp < 0wx2F + then if inp <= 0wx2C + then yyAction17(strm, yyNO_MATCH) + else yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp <= 0wx3A + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyAction17(strm, yyNO_MATCH) + else if inp = 0wx60 + then yyAction17(strm, yyNO_MATCH) + else if inp < 0wx60 + then if inp = 0wx5B + then yyAction17(strm, yyNO_MATCH) + else if inp < 0wx5B + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp = 0wx5F + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyAction17(strm, yyNO_MATCH) + else if inp <= 0wx7A + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyAction17(strm, yyNO_MATCH) + (* end case *)) +fun yyQ124 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction17(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx41 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx41 + then if inp = 0wx2F + then yyAction17(strm, yyNO_MATCH) + else if inp < 0wx2F + then if inp <= 0wx2C + then yyAction17(strm, yyNO_MATCH) + else yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp <= 0wx3A + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyAction17(strm, yyNO_MATCH) + else if inp = 0wx60 + then yyAction17(strm, yyNO_MATCH) + else if inp < 0wx60 + then if inp = 0wx5B + then yyAction17(strm, yyNO_MATCH) + else if inp < 0wx5B + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp = 0wx5F + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyAction17(strm, yyNO_MATCH) + else if inp <= 0wx7A + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyAction17(strm, yyNO_MATCH) + (* end case *)) +fun yyQ129 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction15(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction15(strm, yyNO_MATCH) + (* end case *)) +fun yyQ123 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction41(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx3E + then yyQ129(strm', yyMATCH(strm, yyAction41, yyNO_MATCH)) + else yyAction41(strm, yyNO_MATCH) + (* end case *)) +fun yyQ122 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction19(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction19(strm, yyNO_MATCH) + (* end case *)) +fun yyQ121 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction18(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction18(strm, yyNO_MATCH) + (* end case *)) +fun yyQ29 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction12(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wxE + then yyAction12(strm, yyNO_MATCH) + else if inp < 0wxE + then if inp <= 0wx8 + then yyAction12(strm, yyNO_MATCH) + else yyQ29(strm', yyMATCH(strm, yyAction12, yyNO_MATCH)) + else if inp = 0wx20 + then yyQ29(strm', yyMATCH(strm, yyAction12, yyNO_MATCH)) + else yyAction12(strm, yyNO_MATCH) + (* end case *)) +fun yyQ120 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction12(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wxE + then yyAction12(strm, yyNO_MATCH) + else if inp < 0wxE + then if inp <= 0wx8 + then yyAction12(strm, yyNO_MATCH) + else yyQ29(strm', yyMATCH(strm, yyAction12, yyNO_MATCH)) + else if inp = 0wx20 + then yyQ29(strm', yyMATCH(strm, yyAction12, yyNO_MATCH)) + else yyAction12(strm, yyNO_MATCH) + (* end case *)) +fun yyQ119 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction41(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction41(strm, yyNO_MATCH) + (* end case *)) +fun yyQ5 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if ULexBuffer.eof(!(yystrm)) + then let + val yycolno = ref(yygetcolNo(!(yystrm))) + val yylineno = ref(yygetlineNo(!(yystrm))) + in + (case (!(yyss)) + of _ => (UserDeclarations.eof()) + (* end case *)) + end + else yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx3A + then yyQ124(strm', lastMatch) + else if inp < 0wx3A + then if inp = 0wx22 + then yyQ121(strm', lastMatch) + else if inp < 0wx22 + then if inp = 0wxE + then yyQ119(strm', lastMatch) + else if inp < 0wxE + then if inp <= 0wx8 + then yyQ119(strm', lastMatch) + else yyQ120(strm', lastMatch) + else if inp = 0wx20 + then yyQ120(strm', lastMatch) + else yyQ119(strm', lastMatch) + else if inp = 0wx28 + then yyQ119(strm', lastMatch) + else if inp < 0wx28 + then if inp = 0wx27 + then yyQ122(strm', lastMatch) + else yyQ119(strm', lastMatch) + else if inp = 0wx2F + then yyQ123(strm', lastMatch) + else yyQ119(strm', lastMatch) + else if inp = 0wx41 + then yyQ124(strm', lastMatch) + else if inp < 0wx41 + then if inp = 0wx3E + then yyQ126(strm', lastMatch) + else if inp < 0wx3E + then if inp = 0wx3D + then yyQ125(strm', lastMatch) + else yyQ119(strm', lastMatch) + else if inp = 0wx3F + then yyQ127(strm', lastMatch) + else yyQ119(strm', lastMatch) + else if inp = 0wx60 + then yyQ119(strm', lastMatch) + else if inp < 0wx60 + then if inp = 0wx5B + then yyQ119(strm', lastMatch) + else if inp < 0wx5B + then yyQ124(strm', lastMatch) + else if inp = 0wx5F + then yyQ124(strm', lastMatch) + else yyQ119(strm', lastMatch) + else if inp <= 0wx7A + then yyQ124(strm', lastMatch) + else yyQ119(strm', lastMatch) + (* end case *)) +fun yyQ77 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction5(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction5(strm, yyNO_MATCH) + (* end case *)) +fun yyQ76 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx4D + then yystuck(lastMatch) + else if inp < 0wx4D + then if inp = 0wx4C + then yyQ77(strm', lastMatch) + else yystuck(lastMatch) + else if inp = 0wx6C + then yyQ77(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ75 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx4E + then yystuck(lastMatch) + else if inp < 0wx4E + then if inp = 0wx4D + then yyQ76(strm', lastMatch) + else yystuck(lastMatch) + else if inp = 0wx6D + then yyQ76(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ74 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx59 + then yystuck(lastMatch) + else if inp < 0wx59 + then if inp = 0wx58 + then yyQ75(strm', lastMatch) + else yystuck(lastMatch) + else if inp = 0wx78 + then yyQ75(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ73 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction4(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction4(strm, yyNO_MATCH) + (* end case *)) +fun yyQ86 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx5D + then yyQ87(strm', lastMatch) + else yyQ86(strm', lastMatch) + (* end case *)) +and yyQ87 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx5D + then yyQ88(strm', lastMatch) + else yyQ86(strm', lastMatch) + (* end case *)) +and yyQ88 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx3F + then yyQ86(strm', lastMatch) + else if inp < 0wx3F + then if inp = 0wx3E + then yyQ89(strm', lastMatch) + else yyQ86(strm', lastMatch) + else if inp = 0wx5D + then yyQ88(strm', lastMatch) + else yyQ86(strm', lastMatch) + (* end case *)) +and yyQ89 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction38(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5D + then yyQ87(strm', yyMATCH(strm, yyAction38, yyNO_MATCH)) + else yyQ86(strm', yyMATCH(strm, yyAction38, yyNO_MATCH)) + (* end case *)) +fun yyQ85 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx5B + then yyQ86(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ84 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx41 + then yyQ85(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ83 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx54 + then yyQ84(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ82 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx41 + then yyQ83(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ81 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx44 + then yyQ82(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ80 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx43 + then yyQ81(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ95 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction6(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction6(strm, yyNO_MATCH) + (* end case *)) +fun yyQ94 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx45 + then yyQ95(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ93 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx50 + then yyQ94(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ92 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx59 + then yyQ93(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ91 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx54 + then yyQ92(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ90 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx43 + then yyQ91(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ79 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx4F + then yyQ90(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ96 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction0(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction0(strm, yyNO_MATCH) + (* end case *)) +fun yyQ78 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx2D + then yyQ96(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ72 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx44 + then yyQ79(strm', lastMatch) + else if inp < 0wx44 + then if inp = 0wx2D + then yyQ78(strm', lastMatch) + else yystuck(lastMatch) + else if inp = 0wx5B + then yyQ80(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ71 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction3(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx2F + then yyQ73(strm', yyMATCH(strm, yyAction3, yyNO_MATCH)) + else if inp < 0wx2F + then if inp = 0wx21 + then yyQ72(strm', yyMATCH(strm, yyAction3, yyNO_MATCH)) + else yyAction3(strm, yyNO_MATCH) + else if inp = 0wx3F + then yyQ74(strm', yyMATCH(strm, yyAction3, yyNO_MATCH)) + else yyAction3(strm, yyNO_MATCH) + (* end case *)) +fun yyQ104 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction33(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction33(strm, yyNO_MATCH) + (* end case *)) +fun yyQ103 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx3B + then yyQ104(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ102 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx74 + then yyQ103(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ101 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx6F + then yyQ102(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ100 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx75 + then yyQ101(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ106 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction34(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction34(strm, yyNO_MATCH) + (* end case *)) +fun yyQ105 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx3B + then yyQ106(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ99 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx74 + then yyQ105(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ108 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction35(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction35(strm, yyNO_MATCH) + (* end case *)) +fun yyQ107 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx3B + then yyQ108(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ98 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx74 + then yyQ107(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ113 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction37(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction37(strm, yyNO_MATCH) + (* end case *)) +fun yyQ112 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx3B + then yyQ113(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ111 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx73 + then yyQ112(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ110 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx6F + then yyQ111(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ115 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction36(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction36(strm, yyNO_MATCH) + (* end case *)) +fun yyQ114 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx3B + then yyQ115(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ109 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx70 + then yyQ114(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ97 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx6E + then yystuck(lastMatch) + else if inp < 0wx6E + then if inp = 0wx6D + then yyQ109(strm', lastMatch) + else yystuck(lastMatch) + else if inp = 0wx70 + then yyQ110(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ70 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction39(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx68 + then yyAction39(strm, yyNO_MATCH) + else if inp < 0wx68 + then if inp = 0wx62 + then yyAction39(strm, yyNO_MATCH) + else if inp < 0wx62 + then if inp = 0wx61 + then yyQ97(strm', yyMATCH(strm, yyAction39, yyNO_MATCH)) + else yyAction39(strm, yyNO_MATCH) + else if inp = 0wx67 + then yyQ98(strm', yyMATCH(strm, yyAction39, yyNO_MATCH)) + else yyAction39(strm, yyNO_MATCH) + else if inp = 0wx6D + then yyAction39(strm, yyNO_MATCH) + else if inp < 0wx6D + then if inp = 0wx6C + then yyQ99(strm', yyMATCH(strm, yyAction39, yyNO_MATCH)) + else yyAction39(strm, yyNO_MATCH) + else if inp = 0wx71 + then yyQ100(strm', yyMATCH(strm, yyAction39, yyNO_MATCH)) + else yyAction39(strm, yyNO_MATCH) + (* end case *)) +fun yyQ117 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wxE + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wxE + then if inp <= 0wx8 + then yyAction31(strm, yyNO_MATCH) + else yyQ117(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx20 + then yyQ117(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ116 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction32(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx20 + then yyAction32(strm, yyNO_MATCH) + else if inp < 0wx20 + then if inp = 0wxB + then yyQ116(strm', yyMATCH(strm, yyAction32, yyNO_MATCH)) + else if inp < 0wxB + then if inp <= 0wx8 + then yyQ116(strm', yyMATCH(strm, yyAction32, yyNO_MATCH)) + else yyAction32(strm, yyNO_MATCH) + else if inp = 0wxD + then yyAction32(strm, yyNO_MATCH) + else yyQ116(strm', yyMATCH(strm, yyAction32, yyNO_MATCH)) + else if inp = 0wx27 + then yyQ116(strm', yyMATCH(strm, yyAction32, yyNO_MATCH)) + else if inp < 0wx27 + then if inp = 0wx26 + then yyAction32(strm, yyNO_MATCH) + else yyQ116(strm', yyMATCH(strm, yyAction32, yyNO_MATCH)) + else if inp = 0wx3C + then yyAction32(strm, yyNO_MATCH) + else yyQ116(strm', yyMATCH(strm, yyAction32, yyNO_MATCH)) + (* end case *)) +fun yyQ118 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx20 + then yyQ117(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx20 + then if inp = 0wxB + then yyQ118(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wxB + then if inp <= 0wx8 + then yyQ116(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyQ117(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wxD + then yyQ117(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wxC + then yyQ118(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyQ116(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx27 + then yyQ116(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx27 + then if inp = 0wx26 + then yyAction31(strm, yyNO_MATCH) + else yyQ116(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx3C + then yyAction31(strm, yyNO_MATCH) + else yyQ116(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + (* end case *)) +fun yyQ69 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx20 + then yyQ117(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx20 + then if inp = 0wxB + then yyQ118(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wxB + then if inp <= 0wx8 + then yyQ116(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyQ117(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wxD + then yyQ117(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp <= 0wxC + then yyQ118(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyQ116(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx27 + then yyQ116(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp < 0wx27 + then if inp = 0wx26 + then yyAction31(strm, yyNO_MATCH) + else yyQ116(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx3C + then yyAction31(strm, yyNO_MATCH) + else yyQ116(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + (* end case *)) +fun yyQ68 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction31(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wxE + then yyAction31(strm, yyNO_MATCH) + else if inp < 0wxE + then if inp <= 0wx8 + then yyAction31(strm, yyNO_MATCH) + else yyQ117(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else if inp = 0wx20 + then yyQ117(strm', yyMATCH(strm, yyAction31, yyNO_MATCH)) + else yyAction31(strm, yyNO_MATCH) + (* end case *)) +fun yyQ67 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction32(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx20 + then yyAction32(strm, yyNO_MATCH) + else if inp < 0wx20 + then if inp = 0wxB + then yyQ116(strm', yyMATCH(strm, yyAction32, yyNO_MATCH)) + else if inp < 0wxB + then if inp <= 0wx8 + then yyQ116(strm', yyMATCH(strm, yyAction32, yyNO_MATCH)) + else yyAction32(strm, yyNO_MATCH) + else if inp = 0wxD + then yyAction32(strm, yyNO_MATCH) + else yyQ116(strm', yyMATCH(strm, yyAction32, yyNO_MATCH)) + else if inp = 0wx27 + then yyQ116(strm', yyMATCH(strm, yyAction32, yyNO_MATCH)) + else if inp < 0wx27 + then if inp = 0wx26 + then yyAction32(strm, yyNO_MATCH) + else yyQ116(strm', yyMATCH(strm, yyAction32, yyNO_MATCH)) + else if inp = 0wx3C + then yyAction32(strm, yyNO_MATCH) + else yyQ116(strm', yyMATCH(strm, yyAction32, yyNO_MATCH)) + (* end case *)) +fun yyQ4 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if ULexBuffer.eof(!(yystrm)) + then let + val yycolno = ref(yygetcolNo(!(yystrm))) + val yylineno = ref(yygetlineNo(!(yystrm))) + in + (case (!(yyss)) + of _ => (UserDeclarations.eof()) + (* end case *)) + end + else yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx20 + then yyQ68(strm', lastMatch) + else if inp < 0wx20 + then if inp = 0wxB + then yyQ69(strm', lastMatch) + else if inp < 0wxB + then if inp <= 0wx8 + then yyQ67(strm', lastMatch) + else yyQ68(strm', lastMatch) + else if inp = 0wxD + then yyQ68(strm', lastMatch) + else if inp <= 0wxC + then yyQ69(strm', lastMatch) + else yyQ67(strm', lastMatch) + else if inp = 0wx27 + then yyQ67(strm', lastMatch) + else if inp < 0wx27 + then if inp = 0wx26 + then yyQ70(strm', lastMatch) + else yyQ67(strm', lastMatch) + else if inp = 0wx3C + then yyQ71(strm', lastMatch) + else yyQ67(strm', lastMatch) + (* end case *)) +fun yyQ37 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction42(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction42(strm, yyNO_MATCH) + (* end case *)) +fun yyQ46 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction22(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction22(strm, yyNO_MATCH) + (* end case *)) +fun yyQ45 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx3B + then yyQ46(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ44 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx74 + then yyQ45(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ43 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx6F + then yyQ44(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ42 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx75 + then yyQ43(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ48 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction23(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction23(strm, yyNO_MATCH) + (* end case *)) +fun yyQ47 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx3B + then yyQ48(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ41 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx74 + then yyQ47(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ50 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction24(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction24(strm, yyNO_MATCH) + (* end case *)) +fun yyQ49 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx3B + then yyQ50(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ40 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx74 + then yyQ49(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ55 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction26(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction26(strm, yyNO_MATCH) + (* end case *)) +fun yyQ54 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx3B + then yyQ55(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ53 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx73 + then yyQ54(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ52 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx6F + then yyQ53(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ57 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction25(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction25(strm, yyNO_MATCH) + (* end case *)) +fun yyQ56 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx3B + then yyQ57(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ51 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx70 + then yyQ56(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ39 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx6E + then yystuck(lastMatch) + else if inp < 0wx6E + then if inp = 0wx6D + then yyQ51(strm', lastMatch) + else yystuck(lastMatch) + else if inp = 0wx70 + then yyQ52(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ61 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction28(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction28(strm, yyNO_MATCH) + (* end case *)) +fun yyQ60 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx3C + then yystuck(lastMatch) + else if inp < 0wx3C + then if inp = 0wx3A + then yystuck(lastMatch) + else if inp < 0wx3A + then if inp <= 0wx2F + then yystuck(lastMatch) + else yyQ60(strm', lastMatch) + else yyQ61(strm', lastMatch) + else if inp = 0wx47 + then yystuck(lastMatch) + else if inp < 0wx47 + then if inp <= 0wx40 + then yystuck(lastMatch) + else yyQ60(strm', lastMatch) + else if inp = 0wx61 + then yyQ60(strm', lastMatch) + else if inp < 0wx61 + then yystuck(lastMatch) + else if inp <= 0wx66 + then yyQ60(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ59 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx41 + then yyQ60(strm', lastMatch) + else if inp < 0wx41 + then if inp = 0wx30 + then yyQ60(strm', lastMatch) + else if inp < 0wx30 + then yystuck(lastMatch) + else if inp <= 0wx39 + then yyQ60(strm', lastMatch) + else yystuck(lastMatch) + else if inp = 0wx61 + then yyQ60(strm', lastMatch) + else if inp < 0wx61 + then if inp <= 0wx46 + then yyQ60(strm', lastMatch) + else yystuck(lastMatch) + else if inp <= 0wx66 + then yyQ60(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ62 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction27(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction27(strm, yyNO_MATCH) + (* end case *)) +fun yyQ58 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx3A + then yystuck(lastMatch) + else if inp < 0wx3A + then if inp <= 0wx2F + then yystuck(lastMatch) + else yyQ58(strm', lastMatch) + else if inp = 0wx3B + then yyQ62(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ38 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx3A + then yystuck(lastMatch) + else if inp < 0wx3A + then if inp <= 0wx2F + then yystuck(lastMatch) + else yyQ58(strm', lastMatch) + else if inp = 0wx78 + then yyQ59(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ35 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction42(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx67 + then yyQ40(strm', yyMATCH(strm, yyAction42, yyNO_MATCH)) + else if inp < 0wx67 + then if inp = 0wx24 + then yyAction42(strm, yyNO_MATCH) + else if inp < 0wx24 + then if inp = 0wx23 + then yyQ38(strm', yyMATCH(strm, yyAction42, yyNO_MATCH)) + else yyAction42(strm, yyNO_MATCH) + else if inp = 0wx61 + then yyQ39(strm', yyMATCH(strm, yyAction42, yyNO_MATCH)) + else yyAction42(strm, yyNO_MATCH) + else if inp = 0wx6D + then yyAction42(strm, yyNO_MATCH) + else if inp < 0wx6D + then if inp = 0wx6C + then yyQ41(strm', yyMATCH(strm, yyAction42, yyNO_MATCH)) + else yyAction42(strm, yyNO_MATCH) + else if inp = 0wx71 + then yyQ42(strm', yyMATCH(strm, yyAction42, yyNO_MATCH)) + else yyAction42(strm, yyNO_MATCH) + (* end case *)) +fun yyQ65 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction20(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction20(strm, yyNO_MATCH) + (* end case *)) +fun yyQ66 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction29(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx27 + then yyQ66(strm', yyMATCH(strm, yyAction29, yyNO_MATCH)) + else if inp < 0wx27 + then if inp = 0wx23 + then yyQ66(strm', yyMATCH(strm, yyAction29, yyNO_MATCH)) + else if inp < 0wx23 + then if inp = 0wx22 + then yyAction29(strm, yyNO_MATCH) + else yyQ66(strm', yyMATCH(strm, yyAction29, yyNO_MATCH)) + else if inp = 0wx26 + then yyAction29(strm, yyNO_MATCH) + else yyQ66(strm', yyMATCH(strm, yyAction29, yyNO_MATCH)) + else if inp = 0wx3D + then yyQ66(strm', yyMATCH(strm, yyAction29, yyNO_MATCH)) + else if inp < 0wx3D + then if inp = 0wx3C + then yyAction29(strm, yyNO_MATCH) + else yyQ66(strm', yyMATCH(strm, yyAction29, yyNO_MATCH)) + else if inp = 0wx3E + then yyAction29(strm, yyNO_MATCH) + else yyQ66(strm', yyMATCH(strm, yyAction29, yyNO_MATCH)) + (* end case *)) +fun yyQ64 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction29(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx27 + then yyQ66(strm', yyMATCH(strm, yyAction29, yyNO_MATCH)) + else if inp < 0wx27 + then if inp = 0wx23 + then yyQ66(strm', yyMATCH(strm, yyAction29, yyNO_MATCH)) + else if inp < 0wx23 + then if inp = 0wx22 + then yyAction29(strm, yyNO_MATCH) + else yyQ66(strm', yyMATCH(strm, yyAction29, yyNO_MATCH)) + else if inp = 0wx26 + then yyAction29(strm, yyNO_MATCH) + else yyQ66(strm', yyMATCH(strm, yyAction29, yyNO_MATCH)) + else if inp = 0wx3D + then yyQ66(strm', yyMATCH(strm, yyAction29, yyNO_MATCH)) + else if inp < 0wx3D + then if inp = 0wx3C + then yyAction29(strm, yyNO_MATCH) + else yyQ66(strm', yyMATCH(strm, yyAction29, yyNO_MATCH)) + else if inp = 0wx3E + then yyAction29(strm, yyNO_MATCH) + else yyQ66(strm', yyMATCH(strm, yyAction29, yyNO_MATCH)) + (* end case *)) +fun yyQ3 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if ULexBuffer.eof(!(yystrm)) + then let + val yycolno = ref(yygetcolNo(!(yystrm))) + val yylineno = ref(yygetlineNo(!(yystrm))) + in + (case (!(yyss)) + of _ => (UserDeclarations.eof()) + (* end case *)) + end + else yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx27 + then yyQ64(strm', lastMatch) + else if inp < 0wx27 + then if inp = 0wx23 + then yyQ64(strm', lastMatch) + else if inp < 0wx23 + then if inp = 0wx22 + then yyQ65(strm', lastMatch) + else yyQ64(strm', lastMatch) + else if inp = 0wx26 + then yyQ35(strm', lastMatch) + else yyQ64(strm', lastMatch) + else if inp = 0wx3D + then yyQ64(strm', lastMatch) + else if inp < 0wx3D + then if inp = 0wx3C + then yyQ37(strm', lastMatch) + else yyQ64(strm', lastMatch) + else if inp = 0wx3E + then yyQ37(strm', lastMatch) + else yyQ64(strm', lastMatch) + (* end case *)) +fun yyQ36 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction21(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction21(strm, yyNO_MATCH) + (* end case *)) +fun yyQ63 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction30(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx3C + then yyAction30(strm, yyNO_MATCH) + else if inp < 0wx3C + then if inp = 0wx26 + then yyAction30(strm, yyNO_MATCH) + else if inp < 0wx26 + then yyQ63(strm', yyMATCH(strm, yyAction30, yyNO_MATCH)) + else if inp <= 0wx27 + then yyAction30(strm, yyNO_MATCH) + else yyQ63(strm', yyMATCH(strm, yyAction30, yyNO_MATCH)) + else if inp = 0wx3E + then yyAction30(strm, yyNO_MATCH) + else yyQ63(strm', yyMATCH(strm, yyAction30, yyNO_MATCH)) + (* end case *)) +fun yyQ34 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction30(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx3C + then yyAction30(strm, yyNO_MATCH) + else if inp < 0wx3C + then if inp = 0wx26 + then yyAction30(strm, yyNO_MATCH) + else if inp < 0wx26 + then yyQ63(strm', yyMATCH(strm, yyAction30, yyNO_MATCH)) + else if inp <= 0wx27 + then yyAction30(strm, yyNO_MATCH) + else yyQ63(strm', yyMATCH(strm, yyAction30, yyNO_MATCH)) + else if inp = 0wx3E + then yyAction30(strm, yyNO_MATCH) + else yyQ63(strm', yyMATCH(strm, yyAction30, yyNO_MATCH)) + (* end case *)) +fun yyQ2 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if ULexBuffer.eof(!(yystrm)) + then let + val yycolno = ref(yygetcolNo(!(yystrm))) + val yylineno = ref(yygetlineNo(!(yystrm))) + in + (case (!(yyss)) + of _ => (UserDeclarations.eof()) + (* end case *)) + end + else yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx3C + then yyQ37(strm', lastMatch) + else if inp < 0wx3C + then if inp = 0wx27 + then yyQ36(strm', lastMatch) + else if inp < 0wx27 + then if inp = 0wx26 + then yyQ35(strm', lastMatch) + else yyQ34(strm', lastMatch) + else yyQ34(strm', lastMatch) + else if inp = 0wx3E + then yyQ37(strm', lastMatch) + else yyQ34(strm', lastMatch) + (* end case *)) +fun yyQ33 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction1(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction1(strm, yyNO_MATCH) + (* end case *)) +fun yyQ32 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx3E + then yyQ33(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ31 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction2(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx2D + then yyQ32(strm', yyMATCH(strm, yyAction2, yyNO_MATCH)) + else yyAction2(strm, yyNO_MATCH) + (* end case *)) +fun yyQ30 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction2(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction2(strm, yyNO_MATCH) + (* end case *)) +fun yyQ1 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if ULexBuffer.eof(!(yystrm)) + then let + val yycolno = ref(yygetcolNo(!(yystrm))) + val yylineno = ref(yygetlineNo(!(yystrm))) + in + (case (!(yyss)) + of _ => (UserDeclarations.eof()) + (* end case *)) + end + else yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx2D + then yyQ31(strm', lastMatch) + else yyQ30(strm', lastMatch) + (* end case *)) +fun yyQ19 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction8(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx41 + then yyQ14(strm', yyMATCH(strm, yyAction8, yyNO_MATCH)) + else if inp < 0wx41 + then if inp = 0wx2F + then yyAction8(strm, yyNO_MATCH) + else if inp < 0wx2F + then if inp <= 0wx2C + then yyAction8(strm, yyNO_MATCH) + else yyQ14(strm', yyMATCH(strm, yyAction8, yyNO_MATCH)) + else if inp <= 0wx3A + then yyQ14(strm', yyMATCH(strm, yyAction8, yyNO_MATCH)) + else yyAction8(strm, yyNO_MATCH) + else if inp = 0wx60 + then yyAction8(strm, yyNO_MATCH) + else if inp < 0wx60 + then if inp = 0wx5B + then yyAction8(strm, yyNO_MATCH) + else if inp < 0wx5B + then yyQ14(strm', yyMATCH(strm, yyAction8, yyNO_MATCH)) + else if inp = 0wx5F + then yyQ14(strm', yyMATCH(strm, yyAction8, yyNO_MATCH)) + else yyAction8(strm, yyNO_MATCH) + else if inp <= 0wx7A + then yyQ14(strm', yyMATCH(strm, yyAction8, yyNO_MATCH)) + else yyAction8(strm, yyNO_MATCH) + (* end case *)) +fun yyQ18 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction17(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx4E + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx4E + then if inp = 0wx30 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx30 + then if inp = 0wx2D + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx2D + then yyAction17(strm, yyNO_MATCH) + else if inp = 0wx2F + then yyAction17(strm, yyNO_MATCH) + else yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx41 + then if inp <= 0wx3A + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyAction17(strm, yyNO_MATCH) + else if inp = 0wx4D + then yyQ19(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp = 0wx61 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx61 + then if inp = 0wx5F + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx5F + then if inp <= 0wx5A + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyAction17(strm, yyNO_MATCH) + else yyAction17(strm, yyNO_MATCH) + else if inp = 0wx6E + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx6E + then if inp = 0wx6D + then yyQ19(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyAction17(strm, yyNO_MATCH) + (* end case *)) +fun yyQ17 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction17(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx46 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx46 + then if inp = 0wx30 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx30 + then if inp = 0wx2D + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx2D + then yyAction17(strm, yyNO_MATCH) + else if inp = 0wx2F + then yyAction17(strm, yyNO_MATCH) + else yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx41 + then if inp <= 0wx3A + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyAction17(strm, yyNO_MATCH) + else if inp = 0wx45 + then yyQ18(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp = 0wx61 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx61 + then if inp = 0wx5F + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx5F + then if inp <= 0wx5A + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyAction17(strm, yyNO_MATCH) + else yyAction17(strm, yyNO_MATCH) + else if inp = 0wx66 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx66 + then if inp = 0wx65 + then yyQ18(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyAction17(strm, yyNO_MATCH) + (* end case *)) +fun yyQ16 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction17(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx55 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx55 + then if inp = 0wx30 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx30 + then if inp = 0wx2D + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx2D + then yyAction17(strm, yyNO_MATCH) + else if inp = 0wx2F + then yyAction17(strm, yyNO_MATCH) + else yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx41 + then if inp <= 0wx3A + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyAction17(strm, yyNO_MATCH) + else if inp = 0wx54 + then yyQ17(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp = 0wx61 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx61 + then if inp = 0wx5F + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx5F + then if inp <= 0wx5A + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyAction17(strm, yyNO_MATCH) + else yyAction17(strm, yyNO_MATCH) + else if inp = 0wx75 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx75 + then if inp = 0wx74 + then yyQ17(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyAction17(strm, yyNO_MATCH) + (* end case *)) +fun yyQ15 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction17(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx54 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx54 + then if inp = 0wx30 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx30 + then if inp = 0wx2D + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx2D + then yyAction17(strm, yyNO_MATCH) + else if inp = 0wx2F + then yyAction17(strm, yyNO_MATCH) + else yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx41 + then if inp <= 0wx3A + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyAction17(strm, yyNO_MATCH) + else if inp = 0wx53 + then yyQ16(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp = 0wx61 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx61 + then if inp = 0wx5F + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx5F + then if inp <= 0wx5A + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyAction17(strm, yyNO_MATCH) + else yyAction17(strm, yyNO_MATCH) + else if inp = 0wx74 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx74 + then if inp = 0wx73 + then yyQ16(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyAction17(strm, yyNO_MATCH) + (* end case *)) +fun yyQ13 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction17(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx5A + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx5A + then if inp = 0wx30 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx30 + then if inp = 0wx2D + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx2D + then yyAction17(strm, yyNO_MATCH) + else if inp = 0wx2F + then yyAction17(strm, yyNO_MATCH) + else yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx41 + then if inp <= 0wx3A + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyAction17(strm, yyNO_MATCH) + else if inp = 0wx59 + then yyQ15(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp = 0wx61 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx61 + then if inp = 0wx5F + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyAction17(strm, yyNO_MATCH) + else if inp = 0wx7A + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx7A + then if inp = 0wx79 + then yyQ15(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyAction17(strm, yyNO_MATCH) + (* end case *)) +fun yyQ24 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction7(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx41 + then yyQ14(strm', yyMATCH(strm, yyAction7, yyNO_MATCH)) + else if inp < 0wx41 + then if inp = 0wx2F + then yyAction7(strm, yyNO_MATCH) + else if inp < 0wx2F + then if inp <= 0wx2C + then yyAction7(strm, yyNO_MATCH) + else yyQ14(strm', yyMATCH(strm, yyAction7, yyNO_MATCH)) + else if inp <= 0wx3A + then yyQ14(strm', yyMATCH(strm, yyAction7, yyNO_MATCH)) + else yyAction7(strm, yyNO_MATCH) + else if inp = 0wx60 + then yyAction7(strm, yyNO_MATCH) + else if inp < 0wx60 + then if inp = 0wx5B + then yyAction7(strm, yyNO_MATCH) + else if inp < 0wx5B + then yyQ14(strm', yyMATCH(strm, yyAction7, yyNO_MATCH)) + else if inp = 0wx5F + then yyQ14(strm', yyMATCH(strm, yyAction7, yyNO_MATCH)) + else yyAction7(strm, yyNO_MATCH) + else if inp <= 0wx7A + then yyQ14(strm', yyMATCH(strm, yyAction7, yyNO_MATCH)) + else yyAction7(strm, yyNO_MATCH) + (* end case *)) +fun yyQ23 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction17(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx44 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx44 + then if inp = 0wx30 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx30 + then if inp = 0wx2D + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx2D + then yyAction17(strm, yyNO_MATCH) + else if inp = 0wx2F + then yyAction17(strm, yyNO_MATCH) + else yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx41 + then if inp <= 0wx3A + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyAction17(strm, yyNO_MATCH) + else if inp = 0wx43 + then yyQ24(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp = 0wx61 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx61 + then if inp = 0wx5F + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx5F + then if inp <= 0wx5A + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyAction17(strm, yyNO_MATCH) + else yyAction17(strm, yyNO_MATCH) + else if inp = 0wx64 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx64 + then if inp = 0wx63 + then yyQ24(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyAction17(strm, yyNO_MATCH) + (* end case *)) +fun yyQ22 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction17(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx4A + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx4A + then if inp = 0wx30 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx30 + then if inp = 0wx2D + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx2D + then yyAction17(strm, yyNO_MATCH) + else if inp = 0wx2F + then yyAction17(strm, yyNO_MATCH) + else yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx41 + then if inp <= 0wx3A + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyAction17(strm, yyNO_MATCH) + else if inp = 0wx49 + then yyQ23(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp = 0wx61 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx61 + then if inp = 0wx5F + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx5F + then if inp <= 0wx5A + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyAction17(strm, yyNO_MATCH) + else yyAction17(strm, yyNO_MATCH) + else if inp = 0wx6A + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx6A + then if inp = 0wx69 + then yyQ23(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyAction17(strm, yyNO_MATCH) + (* end case *)) +fun yyQ21 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction17(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx4D + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx4D + then if inp = 0wx30 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx30 + then if inp = 0wx2D + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx2D + then yyAction17(strm, yyNO_MATCH) + else if inp = 0wx2F + then yyAction17(strm, yyNO_MATCH) + else yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx41 + then if inp <= 0wx3A + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyAction17(strm, yyNO_MATCH) + else if inp = 0wx4C + then yyQ22(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp = 0wx61 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx61 + then if inp = 0wx5F + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx5F + then if inp <= 0wx5A + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyAction17(strm, yyNO_MATCH) + else yyAction17(strm, yyNO_MATCH) + else if inp = 0wx6D + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx6D + then if inp = 0wx6C + then yyQ22(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyAction17(strm, yyNO_MATCH) + (* end case *)) +fun yyQ20 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction17(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx43 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx43 + then if inp = 0wx30 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx30 + then if inp = 0wx2D + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx2D + then yyAction17(strm, yyNO_MATCH) + else if inp = 0wx2F + then yyAction17(strm, yyNO_MATCH) + else yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx41 + then if inp <= 0wx3A + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyAction17(strm, yyNO_MATCH) + else yyQ21(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp = 0wx61 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx61 + then if inp = 0wx5F + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx5F + then if inp <= 0wx5A + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyAction17(strm, yyNO_MATCH) + else yyAction17(strm, yyNO_MATCH) + else if inp = 0wx63 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx63 + then yyQ21(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyAction17(strm, yyNO_MATCH) + (* end case *)) +fun yyQ12 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction17(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx56 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx56 + then if inp = 0wx30 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx30 + then if inp = 0wx2D + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx2D + then yyAction17(strm, yyNO_MATCH) + else if inp = 0wx2F + then yyAction17(strm, yyNO_MATCH) + else yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp = 0wx41 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx41 + then if inp <= 0wx3A + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyAction17(strm, yyNO_MATCH) + else if inp = 0wx55 + then yyQ20(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp = 0wx61 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx61 + then if inp = 0wx5F + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx5F + then if inp <= 0wx5A + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyAction17(strm, yyNO_MATCH) + else yyAction17(strm, yyNO_MATCH) + else if inp = 0wx76 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx76 + then if inp = 0wx75 + then yyQ20(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp <= 0wx7A + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyAction17(strm, yyNO_MATCH) + (* end case *)) +fun yyQ11 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction11(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction11(strm, yyNO_MATCH) + (* end case *)) +fun yyQ10 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction17(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx41 + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp < 0wx41 + then if inp = 0wx2F + then yyAction17(strm, yyNO_MATCH) + else if inp < 0wx2F + then if inp <= 0wx2C + then yyAction17(strm, yyNO_MATCH) + else yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp <= 0wx3A + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyAction17(strm, yyNO_MATCH) + else if inp = 0wx60 + then yyAction17(strm, yyNO_MATCH) + else if inp < 0wx60 + then if inp = 0wx5B + then yyAction17(strm, yyNO_MATCH) + else if inp < 0wx5B + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else if inp = 0wx5F + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyAction17(strm, yyNO_MATCH) + else if inp <= 0wx7A + then yyQ14(strm', yyMATCH(strm, yyAction17, yyNO_MATCH)) + else yyAction17(strm, yyNO_MATCH) + (* end case *)) +fun yyQ26 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction10(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction10(strm, yyNO_MATCH) + (* end case *)) +fun yyQ25 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx3C + then yystuck(lastMatch) + else if inp < 0wx3C + then if inp = 0wx22 + then yystuck(lastMatch) + else if inp < 0wx22 + then if inp = 0wxB + then yystuck(lastMatch) + else if inp < 0wxB + then if inp <= 0wx8 + then yystuck(lastMatch) + else yyQ25(strm', lastMatch) + else if inp <= 0wx1F + then yystuck(lastMatch) + else yyQ25(strm', lastMatch) + else if inp = 0wx27 + then yyQ26(strm', lastMatch) + else if inp < 0wx27 + then if inp = 0wx26 + then yystuck(lastMatch) + else yyQ25(strm', lastMatch) + else yyQ25(strm', lastMatch) + else if inp = 0wx5F + then yyQ25(strm', lastMatch) + else if inp < 0wx5F + then if inp = 0wx3F + then yyQ25(strm', lastMatch) + else if inp < 0wx3F + then if inp = 0wx3D + then yyQ25(strm', lastMatch) + else yystuck(lastMatch) + else if inp <= 0wx5A + then yyQ25(strm', lastMatch) + else yystuck(lastMatch) + else if inp = 0wx61 + then yyQ25(strm', lastMatch) + else if inp < 0wx61 + then yystuck(lastMatch) + else if inp <= 0wx7A + then yyQ25(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ9 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction40(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx3C + then yyAction40(strm, yyNO_MATCH) + else if inp < 0wx3C + then if inp = 0wx22 + then yyAction40(strm, yyNO_MATCH) + else if inp < 0wx22 + then if inp = 0wxB + then yyAction40(strm, yyNO_MATCH) + else if inp < 0wxB + then if inp <= 0wx8 + then yyAction40(strm, yyNO_MATCH) + else yyQ25(strm', yyMATCH(strm, yyAction40, yyNO_MATCH)) + else if inp <= 0wx1F + then yyAction40(strm, yyNO_MATCH) + else yyQ25(strm', yyMATCH(strm, yyAction40, yyNO_MATCH)) + else if inp = 0wx27 + then yyQ26(strm', yyMATCH(strm, yyAction40, yyNO_MATCH)) + else if inp < 0wx27 + then if inp = 0wx26 + then yyAction40(strm, yyNO_MATCH) + else yyQ25(strm', yyMATCH(strm, yyAction40, yyNO_MATCH)) + else yyQ25(strm', yyMATCH(strm, yyAction40, yyNO_MATCH)) + else if inp = 0wx5F + then yyQ25(strm', yyMATCH(strm, yyAction40, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3F + then yyQ25(strm', yyMATCH(strm, yyAction40, yyNO_MATCH)) + else if inp < 0wx3F + then if inp = 0wx3D + then yyQ25(strm', yyMATCH(strm, yyAction40, yyNO_MATCH)) + else yyAction40(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ25(strm', yyMATCH(strm, yyAction40, yyNO_MATCH)) + else yyAction40(strm, yyNO_MATCH) + else if inp = 0wx61 + then yyQ25(strm', yyMATCH(strm, yyAction40, yyNO_MATCH)) + else if inp < 0wx61 + then yyAction40(strm, yyNO_MATCH) + else if inp <= 0wx7A + then yyQ25(strm', yyMATCH(strm, yyAction40, yyNO_MATCH)) + else yyAction40(strm, yyNO_MATCH) + (* end case *)) +fun yyQ28 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction9(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction9(strm, yyNO_MATCH) + (* end case *)) +fun yyQ27 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx3C + then yystuck(lastMatch) + else if inp < 0wx3C + then if inp = 0wx22 + then yyQ28(strm', lastMatch) + else if inp < 0wx22 + then if inp = 0wxB + then yystuck(lastMatch) + else if inp < 0wxB + then if inp <= 0wx8 + then yystuck(lastMatch) + else yyQ27(strm', lastMatch) + else if inp <= 0wx1F + then yystuck(lastMatch) + else yyQ27(strm', lastMatch) + else if inp = 0wx26 + then yystuck(lastMatch) + else yyQ27(strm', lastMatch) + else if inp = 0wx5F + then yyQ27(strm', lastMatch) + else if inp < 0wx5F + then if inp = 0wx3F + then yyQ27(strm', lastMatch) + else if inp < 0wx3F + then if inp = 0wx3D + then yyQ27(strm', lastMatch) + else yystuck(lastMatch) + else if inp <= 0wx5A + then yyQ27(strm', lastMatch) + else yystuck(lastMatch) + else if inp = 0wx61 + then yyQ27(strm', lastMatch) + else if inp < 0wx61 + then yystuck(lastMatch) + else if inp <= 0wx7A + then yyQ27(strm', lastMatch) + else yystuck(lastMatch) + (* end case *)) +fun yyQ8 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction40(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wx3C + then yyAction40(strm, yyNO_MATCH) + else if inp < 0wx3C + then if inp = 0wx22 + then yyQ28(strm', yyMATCH(strm, yyAction40, yyNO_MATCH)) + else if inp < 0wx22 + then if inp = 0wxB + then yyAction40(strm, yyNO_MATCH) + else if inp < 0wxB + then if inp <= 0wx8 + then yyAction40(strm, yyNO_MATCH) + else yyQ27(strm', yyMATCH(strm, yyAction40, yyNO_MATCH)) + else if inp <= 0wx1F + then yyAction40(strm, yyNO_MATCH) + else yyQ27(strm', yyMATCH(strm, yyAction40, yyNO_MATCH)) + else if inp = 0wx26 + then yyAction40(strm, yyNO_MATCH) + else yyQ27(strm', yyMATCH(strm, yyAction40, yyNO_MATCH)) + else if inp = 0wx5F + then yyQ27(strm', yyMATCH(strm, yyAction40, yyNO_MATCH)) + else if inp < 0wx5F + then if inp = 0wx3F + then yyQ27(strm', yyMATCH(strm, yyAction40, yyNO_MATCH)) + else if inp < 0wx3F + then if inp = 0wx3D + then yyQ27(strm', yyMATCH(strm, yyAction40, yyNO_MATCH)) + else yyAction40(strm, yyNO_MATCH) + else if inp <= 0wx5A + then yyQ27(strm', yyMATCH(strm, yyAction40, yyNO_MATCH)) + else yyAction40(strm, yyNO_MATCH) + else if inp = 0wx61 + then yyQ27(strm', yyMATCH(strm, yyAction40, yyNO_MATCH)) + else if inp < 0wx61 + then yyAction40(strm, yyNO_MATCH) + else if inp <= 0wx7A + then yyQ27(strm', yyMATCH(strm, yyAction40, yyNO_MATCH)) + else yyAction40(strm, yyNO_MATCH) + (* end case *)) +fun yyQ7 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction12(strm, yyNO_MATCH) + | SOME(inp, strm') => + if inp = 0wxE + then yyAction12(strm, yyNO_MATCH) + else if inp < 0wxE + then if inp <= 0wx8 + then yyAction12(strm, yyNO_MATCH) + else yyQ29(strm', yyMATCH(strm, yyAction12, yyNO_MATCH)) + else if inp = 0wx20 + then yyQ29(strm', yyMATCH(strm, yyAction12, yyNO_MATCH)) + else yyAction12(strm, yyNO_MATCH) + (* end case *)) +fun yyQ6 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => yyAction40(strm, yyNO_MATCH) + | SOME(inp, strm') => yyAction40(strm, yyNO_MATCH) + (* end case *)) +fun yyQ0 (strm, lastMatch : yymatch) = (case (yygetc(strm)) + of NONE => + if ULexBuffer.eof(!(yystrm)) + then let + val yycolno = ref(yygetcolNo(!(yystrm))) + val yylineno = ref(yygetlineNo(!(yystrm))) + in + (case (!(yyss)) + of _ => (UserDeclarations.eof()) + (* end case *)) + end + else yystuck(lastMatch) + | SOME(inp, strm') => + if inp = 0wx41 + then yyQ10(strm', lastMatch) + else if inp < 0wx41 + then if inp = 0wx23 + then yyQ6(strm', lastMatch) + else if inp < 0wx23 + then if inp = 0wx20 + then yyQ7(strm', lastMatch) + else if inp < 0wx20 + then if inp = 0wx9 + then yyQ7(strm', lastMatch) + else if inp < 0wx9 + then yyQ6(strm', lastMatch) + else if inp <= 0wxD + then yyQ7(strm', lastMatch) + else yyQ6(strm', lastMatch) + else if inp = 0wx21 + then yyQ6(strm', lastMatch) + else yyQ8(strm', lastMatch) + else if inp = 0wx3A + then yyQ10(strm', lastMatch) + else if inp < 0wx3A + then if inp = 0wx27 + then yyQ9(strm', lastMatch) + else yyQ6(strm', lastMatch) + else if inp = 0wx3E + then yyQ11(strm', lastMatch) + else yyQ6(strm', lastMatch) + else if inp = 0wx60 + then yyQ6(strm', lastMatch) + else if inp < 0wx60 + then if inp = 0wx53 + then yyQ13(strm', lastMatch) + else if inp < 0wx53 + then if inp = 0wx50 + then yyQ12(strm', lastMatch) + else yyQ10(strm', lastMatch) + else if inp = 0wx5B + then yyQ6(strm', lastMatch) + else if inp < 0wx5B + then yyQ10(strm', lastMatch) + else if inp = 0wx5F + then yyQ10(strm', lastMatch) + else yyQ6(strm', lastMatch) + else if inp = 0wx73 + then yyQ13(strm', lastMatch) + else if inp < 0wx73 + then if inp = 0wx70 + then yyQ12(strm', lastMatch) + else yyQ10(strm', lastMatch) + else if inp <= 0wx7A + then yyQ10(strm', lastMatch) + else yyQ6(strm', lastMatch) + (* end case *)) +in + (case (!(yyss)) + of DOCTYPE => yyQ0(!(yystrm), yyNO_MATCH) + | COM => yyQ1(!(yystrm), yyNO_MATCH) + | LIT2 => yyQ2(!(yystrm), yyNO_MATCH) + | LIT1 => yyQ3(!(yystrm), yyNO_MATCH) + | INITIAL => yyQ4(!(yystrm), yyNO_MATCH) + | TAG => yyQ5(!(yystrm), yyNO_MATCH) + (* end case *)) +end +end + and skip() = (yystartPos := yygetPos(); + yylastwasnref := ULexBuffer.lastWasNL (!yystrm); + continue()) + in (continue(), (!yystartPos, yygetPos()-1), !yystrm, !yyss) end + in + lex() + end + in + type pos = AntlrStreamPos.pos + type span = AntlrStreamPos.span + type tok = UserDeclarations.lex_result + + datatype prestrm = STRM of ULexBuffer.stream * + (yystart_state * tok * span * prestrm * yystart_state) option ref + type strm = (prestrm * yystart_state) + + fun lex sm +(yyarg as lexErr)(STRM (yystrm, memo), ss) = (case !memo + of NONE => let + val (tok, span, yystrm', ss') = innerLex +yyarg(yystrm, ss, sm) + val strm' = STRM (yystrm', ref NONE); + in + memo := SOME (ss, tok, span, strm', ss'); + (tok, span, (strm', ss')) + end + | SOME (ss', tok, span, strm', ss'') => + if ss = ss' then + (tok, span, (strm', ss'')) + else ( + memo := NONE; + lex sm +yyarg(STRM (yystrm, memo), ss)) + (* end case *)) + + fun streamify input = (STRM (yystreamify' 0 input, ref NONE), INITIAL) + fun streamifyReader readFn strm = (STRM (yystreamifyReader' 0 readFn strm, ref NONE), + INITIAL) + fun streamifyInstream strm = (STRM (yystreamifyInstream' 0 strm, ref NONE), + INITIAL) + + fun getPos (STRM (strm, _), _) = ULexBuffer.getpos strm + + end +end + diff --git a/smlnj-lib/XML/xml-lib.cm b/smlnj-lib/XML/xml-lib.cm new file mode 100644 index 0000000..1271e4c --- /dev/null +++ b/smlnj-lib/XML/xml-lib.cm @@ -0,0 +1,32 @@ +(* xml-lib.cm + * + * COPYRIGHT (c) 2010 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +Library + + signature XML_SCHEMA + signature XML_TREE + signature XML_PARSER + + functor XMLTreeFn + functor XMLParserFn + + structure GenericXMLTree + +is + + $/basis.cm + $/smlnj-lib.cm + $/ml-lpt-lib.cm + + generic-xml-tree.sml + + xml-parser-fn.sml + xml-schema-sig.sml + xml-tokens.sml + xml-tree-fn.sml + xml-tree-sig.sml + + xml-lexer.lex : ml-ulex diff --git a/smlnj-lib/XML/xml-parser-fn.sml b/smlnj-lib/XML/xml-parser-fn.sml new file mode 100644 index 0000000..afdb912 --- /dev/null +++ b/smlnj-lib/XML/xml-parser-fn.sml @@ -0,0 +1,310 @@ +(* xml-parser-fn.sml + * + * COPYRIGHT (c) 2013 The Fellowship of SML/NJ http://www.smlnj.org) + * All rights reserved. + *) + +signature XML_PARSER = + sig + + structure XMLTree : XML_TREE + + val parseFile : string -> XMLTree.tree + + exception ParseError of string + + end + +functor XMLParserFn (XT : XML_TREE) : XML_PARSER = + struct + + structure XMLTree = XT + structure XS = XT.Schema + structure Tok = XMLTokens + + (***** Error messages *****) + + exception ParseError of string + + datatype error_tag + = S of string + | TK of Tok.token + | E of XT.Schema.element + + fun error msg = let + fun cvt (S s, l) = s :: l + | cvt (TK tok, l) = XMLTokens.toString tok :: l + | cvt (E elem, l) = XS.toString elem :: l + in + raise ParseError(String.concat(List.foldr cvt [] msg)) + end + + (***** Token streams wrap the ML-ULex generated lexer ***** + * + * We cache tokens to avoid rescanning the source. + *) + + type lexer_state = XMLLexer.prestrm * XMLLexer.yystart_state + + datatype token_strm_rep + = TOK of {tok : Tok.token, span : XMLLexer.span, more : token_strm} + | MORE of { + state : lexer_state, + get : lexer_state -> Tok.token * XMLLexer.span * lexer_state + } + + withtype token_strm = token_strm_rep ref + + fun newTokenStrm (initialState, lexFn) = + ref(MORE{state = initialState, get=lexFn}) + + fun nextTok (ref(TOK{tok, span, more})) = (tok, span, more) + | nextTok (strm as ref(MORE{state, get})) = let + val (tok, span, state) = get state + val more = ref(MORE{state=state, get=get}) + val rep = TOK{tok=tok, span=span, more=more} + in + strm := rep; (* cache lexer result *) + (tok, span, more) + end + + (* skip whitespace and comments *) + fun skipWS tokStrm = (case nextTok tokStrm + of (Tok.WS _, _, tokStrm) => skipWS tokStrm + | (Tok.COM _, _, tokStrm) => skipWS tokStrm + | _ => tokStrm + (* end case *)) + + (****** Tracking the content of an element *****) + + type content = XT.content list + + type state = { + content : content, (* parsed content in reverse order *) + preWS : string option (* preceeding WS when we are not preserving whitespace *) + } + +(* FIXME: this function doesn't seem right *) + fun mergeWS (NONE, content) = content + | mergeWS (SOME ws, XT.TEXT txt :: content) = XT.TEXT(txt ^ ws) :: content + | mergeWS (SOME s, content) = XT.TEXT s :: content + + fun addElem ({content, preWS}, elem) = + {content = elem :: mergeWS (preWS, content), preWS = NONE} + + fun addWS ({content, preWS}, ws) = (case preWS + of SOME ws' => {content = content, preWS = SOME(ws' ^ ws)} + | NONE => {content = content, preWS = SOME ws} + (* end case *)) + + fun addCom (state, com) = state (* FIXME*) + + fun addText ({content, preWS}, txt) = let + val content = (case (preWS, content) + of (NONE, XT.TEXT txt' :: content) => XT.TEXT(txt' ^ txt) :: content + | (NONE, content) => XT.TEXT txt :: content + | (SOME ws, XT.TEXT txt' :: content) => XT.TEXT(concat[txt', ws, txt]) :: content + | (SOME ws, content) => XT.TEXT(txt ^ ws) :: content + (* end case *)) + in + {content = content, preWS = NONE} + end + + fun addCData ({content, preWS}, cdata) = + {content = XT.CDATA cdata :: mergeWS (preWS, content), preWS = NONE} + + fun finish ({content, preWS} : state) = List.rev content + + (***** Parsing *****) + + fun parser (name, inStrm) = let + val srcMap = AntlrStreamPos.mkSourcemap' name + fun err (span, msg) = + error(S "Error [" :: S(AntlrStreamPos.spanToString srcMap span) :: S "]: " :: msg) + (* scan an element identifier *) + fun getElementId tokStrm = (case nextTok tokStrm + of (Tok.ID id, span, tokStrm) => (case XS.element id + of SOME elem => (elem, tokStrm) + | NONE => err(span, [S "unrecognized element ", S id]) + (* end case *)) + | (tok, span, _) => err(span, [S "expected identifier, but found ", TK tok]) + (* end case *)) + (* parse the attributes of a start tag. We expect: (ID "=" LIT)* *) + fun parseAttributes tokStrm = let + fun parseAttr (tokStrm, attrs) = (case nextTok tokStrm + of (Tok.ID id, _, tokStrm) => (case nextTok tokStrm + of (Tok.SYM_EQ, _, tokStrm) => (case nextTok tokStrm + of (Tok.LIT v, _, tokStrm) => + parseAttr (tokStrm, XS.attribute(id, v)::attrs) + | (tok, span, _) => err(span, [S "expected attribute value, but found ", TK tok]) + (* end case *)) + | (tok, span, _) => err(span, [S "expected \"=\", but found ", TK tok]) + (* end case *)) + | _ => (List.rev attrs, tokStrm) + (* end case *)) + in + parseAttr (tokStrm, []) + end + (* parse an element. We assume that the initial "<" has been consumed. *) + fun parseElement (tokStrm, preserveWS) = let + val (elem, tokStrm) = getElementId tokStrm + val (attrs, tokStrm) = parseAttributes tokStrm + in + case (nextTok tokStrm) + of (Tok.CLOSE_TAG, _, tokStrm) => let + val preserveWS = preserveWS orelse XS.preserveWS elem + val (content, tokStrm) = parseContent (tokStrm, preserveWS, XS.preserveComment elem) + in + (* here we expect to see the matching close tag for the element *) + case nextTok tokStrm + of (Tok.OPEN_END_TAG, span, tokStrm) => let + val (elem', tokStrm) = getElementId tokStrm + in + if XS.same(elem, elem') + then (case nextTok tokStrm + of (Tok.CLOSE_TAG, _, tokStrm) => + (XT.ELEMENT{name=elem, attrs=attrs, content=content}, tokStrm) + | (tok, span, _) => err (span, [ + S "expected \">\", but found ", TK tok + ]) + (* end case *)) + else err (span, [ + S "mismatched close tag: expected ", E elem, S ", but found ", E elem' + ]) + end + | (tok, span, _) => err(span, [ + S "impossible: unexpected ", TK tok, + S " when expected" + ]) + (* end case *) + end + | (Tok.CLOSE_EMPTY_TAG, _, tokStrm) => + (XT.ELEMENT{name=elem, attrs=attrs, content=[]}, tokStrm) + | (tok, span, _) => err(span, [S "expected \">\" or \"/>\", but found ", TK tok]) + (* end case *) + end + (* parse the content of an element; we return when we *) + and parseContent (tokStrm, preserveWS, preserveCom) : (XT.content list * token_strm) = let + fun parse (tokStrm, state) = (case nextTok tokStrm + of (Tok.EOF, _, _) => (finish state, tokStrm) + | (Tok.OPEN_START_TAG, _, tokStrm) => let + val (elem, tokStrm) = parseElement (tokStrm, preserveWS) + in + parse (tokStrm, addElem(state, elem)) + end + | (Tok.OPEN_END_TAG, _, _) => (finish state, tokStrm) + | (Tok.WS s, _, tokStrm) => + if preserveWS + then parse (tokStrm, addText(state, s)) + else parse (tokStrm, addWS(state, s)) + | (Tok.TEXT s, _, tokStrm) => parse (tokStrm, addText(state, s)) + | (Tok.COM s, _, tokStrm) => + if preserveCom + then parse (tokStrm, addCom(state, s)) + else parse (tokStrm, state) + | (Tok.CDATA s, _, tokStrm) => parse (tokStrm, addCData(state, s)) + | (tok, span, _) => err(span, [S "impossible: unexpected ", TK tok]) + (* end case *)) + in + parse (tokStrm, {preWS=NONE, content=[]}) + end + (* expect: Attributes "?>" *) + and parseXMLDecl tokStrm = let + val (attrs, tokStrm) = parseAttributes tokStrm + in + case nextTok tokStrm + of (Tok.CLOSE_PI_TAG, _, tokStrm) => (SOME attrs, tokStrm) + | (tok, span, _) => err(span, [S "expected \"?>\", but found ", TK tok]) + (* end case *) + end + (* expect: ID (S ExternalID)? S? '>' + * where + * ExternalID ::= 'SYSTEM' LIT + * | 'PUBLIC' LIT LIT + *) + fun parseDOCTYPE tokStrm = let + val (id, tokStrm) = (case nextTok tokStrm + of (Tok.ID id, _, tokStrm) => (id, tokStrm) + | (tok, span, _) => err(span, [S "expected identifier, but found ", TK tok]) + (* end case *)) + fun getLiteral tokStrm = (case nextTok tokStrm + of (Tok.LIT lit, _, tokStrm) => (lit, tokStrm) + | (tok, span, _) => err (span, [S "expected literal, but found ", TK tok]) + (* end case *)) + val (external, tokStrm) = (case nextTok tokStrm + of (Tok.SYSTEM, _, tokStrm) => let + val (lit, tokStrm) = getLiteral tokStrm + in + (SOME(XT.SYSTEM lit), tokStrm) + end + | (Tok.PUBLIC, _, tokStrm) => let + val (lit1, tokStrm) = getLiteral tokStrm + val (lit2, tokStrm) = getLiteral tokStrm + in + (SOME(XT.PUBLIC(lit1, lit2)), tokStrm) + end + | _ => (NONE, tokStrm) + (* end case *)) + in + (* expect ">" *) + case nextTok tokStrm + of (Tok.CLOSE_TAG, _, tokStrm) => (SOME(XT.DOCTYPE(id, external)), tokStrm) + | (tok, span, tokStrm) => err(span, [S "expected \">\", but found ", TK tok]) + (* end case *) + end + (* initialize the token stream *) + val tokStrm = newTokenStrm ( + XMLLexer.streamifyInstream inStrm, + XMLLexer.lex srcMap (fn (pos, msg) => err((pos, pos), List.map S msg))) + (* parse the XML Decl (if any) *) + val (xmlDecl, tokStrm) = (case nextTok (skipWS tokStrm) + of (Tok.OPEN_XML_TAG, _, tokStrm) => parseXMLDecl tokStrm + | _ => (NONE, tokStrm) + (* end case *)) + (* parse the DOCTYPE (if any) *) + val (doctype, tokStrm) = (case nextTok (skipWS tokStrm) + of (Tok.OPEN_DOCTYPE, _, tokStrm) => parseDOCTYPE tokStrm + | _ => (NONE, tokStrm) + (* end case *)) +(* QUESTION: should we preserve comments at top-level by default? *) + val (body, _) = parseContent (skipWS tokStrm, false, false) + in + case body + of [] => error [S "empty document"] + | [elem as XT.ELEMENT _] => { + xmlDecl = xmlDecl, + doctype = doctype, + content = elem + } + | _ => error [S "body of document is not a single element"] + (* end case *) + end (* parser *) + +(* + (* parse XMLDecl? Content *) + and parse tokStrm = let + fun parse tokStrm = (case nextTok tokStrm + of (EOF, _) => {xmlDecl = xmlDecl, content = TEXT ""} + | (Tok.OPEN_START_TAG, tokStrm) => let + val finalState = parseStartTag (tokStrm, content, stk) + in + {xmlDecl = xmlDecl, content = ??} + end + | Tok.WS _ => parse tokStrm + | tok, _) => err(?, [S "impossible: unexpected ", TK tok]) + (* end case *)) + in + parse tokStrm before close tokStrm + end +*) + + fun parseFile file = let + val inStrm = TextIO.openIn file + val tree = parser (file, inStrm) + handle ex => (TextIO.closeIn inStrm; raise ex) + in + TextIO.closeIn inStrm; + tree + end + + end diff --git a/smlnj-lib/XML/xml-schema-sig.sml b/smlnj-lib/XML/xml-schema-sig.sml new file mode 100644 index 0000000..d2a92b7 --- /dev/null +++ b/smlnj-lib/XML/xml-schema-sig.sml @@ -0,0 +1,38 @@ +(* xml-schema-sig.sml + * + * COPYRIGHT (c) 2013 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * Information about an XML schema (or DTD) that is used in the implementation of + * a parser. + *) + +signature XML_SCHEMA = + sig + + type element + type attribute + + (* create an element; returns NONE if the element name is unrecognized *) + val element : string -> element option + + (* If this function returns true for an element, then all whitespace in the + * element's content is preserved. Otherwise, whitespace between tags is + * not preserved. Note that if true, this property is inherited by any + * nested elements. + *) + val preserveWS : element -> bool + + (* should comments be preserved *) + val preserveComment : element -> bool + + (* equality test *) + val same : element * element -> bool + + (* the string representation of the element (w/o the "<" and ">" brackets) *) + val toString : element -> string + + (* create an attribute from a name/value pair *) + val attribute : (string * string) -> attribute + + end diff --git a/smlnj-lib/XML/xml-tokens.sml b/smlnj-lib/XML/xml-tokens.sml new file mode 100644 index 0000000..e1c0da7 --- /dev/null +++ b/smlnj-lib/XML/xml-tokens.sml @@ -0,0 +1,52 @@ +(* xml-tokens.sml + * + * COPYRIGHT (c) 2013 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +structure XMLTokens = + struct + + datatype token + = EOF + | OPEN_START_TAG (* "<" *) + | OPEN_END_TAG (* "" *) + | CLOSE_EMPTY_TAG (* "/>" *) + | CLOSE_PI_TAG (* "?>" also closes XML tags *) + | SYM_EQ (* "=" inside a tag *) + | ID of string (* element or attribute name *) + | LIT of string (* quoted attribute value *) + (* the following tags are content *) + | TEXT of string (* non-whitespace/non-comment text *) + | WS of string (* whitespace *) + | COM of string (* XML comment; string does not include "" *) + | CDATA of string (* CDATA text; string does not include "" *) + | PUBLIC (* "PUBLIC" in *) + | SYSTEM (* "SYSTEM" in *) + + fun toString tok = (case tok + of EOF => "EOF" + | OPEN_START_TAG => "<" + | OPEN_END_TAG => " " " " ">" + | CLOSE_EMPTY_TAG => "/>" + | CLOSE_PI_TAG => "?>" + | SYM_EQ => "=" + | ID s => s + | LIT _ => "LIT" + | TEXT _ => "TEXT" + | WS _ => "WS" + | COM _=> "COM" + | CDATA _ => "CDATA" + | PUBLIC => "PUBLIC" + | SYSTEM => "SYSTEM" + (* end case *)) + + end diff --git a/smlnj-lib/XML/xml-tree-fn.sml b/smlnj-lib/XML/xml-tree-fn.sml new file mode 100644 index 0000000..56bf852 --- /dev/null +++ b/smlnj-lib/XML/xml-tree-fn.sml @@ -0,0 +1,36 @@ +(* xml-tree-fn.sml + * + * COPYRIGHT (c) 2013 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + *) + +functor XMLTreeFn (Schema : XML_SCHEMA) : XML_TREE = + struct + + structure Schema = Schema + + (* limited support for declarations. Internal subsets are not + * current supported. + *) + datatype doctype = DOCTYPE of string * external_id option + + and external_id + = SYSTEM of string + | PUBLIC of string * string + + datatype content + = TEXT of string + | CDATA of string + | ELEMENT of { + name : Schema.element, + attrs : Schema.attribute list, + content : content list + } + + type tree = { + xmlDecl : Schema.attribute list option, (* NONE if there is no decl *) + doctype : doctype option, + content : content (* will be an ELEMENT *) + } + + end diff --git a/smlnj-lib/XML/xml-tree-sig.sml b/smlnj-lib/XML/xml-tree-sig.sml new file mode 100644 index 0000000..91dc9bf --- /dev/null +++ b/smlnj-lib/XML/xml-tree-sig.sml @@ -0,0 +1,35 @@ +(* xml-tree-sig.sml + * + * COPYRIGHT (c) 2013 The Fellowship of SML/NJ (http://www.smlnj.org) + * All rights reserved. + * + * A tree representation of an XML file. + *) + +signature XML_TREE = + sig + + structure Schema : XML_SCHEMA + + datatype doctype = DOCTYPE of string * external_id option + + and external_id + = SYSTEM of string + | PUBLIC of string * string + + datatype content + = TEXT of string + | CDATA of string + | ELEMENT of { + name : Schema.element, + attrs : Schema.attribute list, + content : content list + } + + type tree = { + xmlDecl : Schema.attribute list option, (* NONE if there is no decl *) + doctype : doctype option, + content : content (* will be an ELEMENT *) + } + + end diff --git a/sumints.tgz b/sumints.tgz new file mode 100644 index 0000000..48477a8 Binary files /dev/null and b/sumints.tgz differ diff --git a/sumints/Makefile b/sumints/Makefile new file mode 100644 index 0000000..bf5d1a4 --- /dev/null +++ b/sumints/Makefile @@ -0,0 +1,29 @@ + +SYSTAG = amd64-linux +CLEANED_FILES = sumints-main.$(SYSTAG) sumints test101-out sumints.tgz + +sumints: sumints-main.$(SYSTAG) Makefile + echo "sml @SMLload=sumints-main.${SYSTAG}" > $@ + chmod u+x $@ + +sumints-main.$(SYSTAG): sumints.sml Makefile + sml $< + +test101: sumints + seq 101 | ./$< | tee test101-out + echo 5151 | diff - test101-out + +.PHONY: clean package + +clean: + rm -f $(CLEANED_FILES) + +package: sumints.tgz + +sumints.tgz: sumints.sml README Makefile + $(eval TDIR := $(shell mktemp -d make-package-XXXXXX)) + echo ${TDIR} + mkdir ${TDIR}/sumints + cp -p sumints.sml README Makefile ${TDIR}/sumints + cd ${TDIR} && tar zcf sumints.tgz sumints && mv sumints.tgz .. + rm -rf ${TDIR} diff --git a/sumints/README b/sumints/README new file mode 100644 index 0000000..b57ed58 --- /dev/null +++ b/sumints/README @@ -0,0 +1,13 @@ +This package provides a simple demonstration of creating and packaging +a standalone program using Standard ML of New Jersey. + +Copyright 2024-04-16 Sudarshan S Chawathe + +Running 'make' should compile the necessary files, producing +executable sumints. + +A simple test can be run using 'make test101'. + +See Makefile for more. + + diff --git a/sumints/chatgps.sml b/sumints/chatgps.sml new file mode 100644 index 0000000..efaffb3 --- /dev/null +++ b/sumints/chatgps.sml @@ -0,0 +1,22 @@ +fun read_input () = + let + val line = TextIO.inputLine TextIO.stdIn + in + case line of + SOME s => s :: read_input () + | NONE => [] + end; + +fun key s = ListMergeSort.sort Char.compare (explode s); + +fun sort_anagrams xs = + let + fun insert [] kv = [kv] + | insert ((k, vs) :: kvs) (k', v') = + if k = k' then (k, v' :: vs) :: kvs + else (k, vs) :: insert kvs (k', v') + fun build [] kvs = kvs + | build ((k, v) :: kvs) [] = build kvs [(k, [v])] + | build ((k, v) :: kvs) ((k', vs) :: kvs') = + if k = k' then build kvs ((k, v :: vs) :: kvs') + else build ((k, v) :: kvs) (insert kvs' (k', \ No newline at end of file diff --git a/sumints/sml.json b/sumints/sml.json new file mode 100644 index 0000000..f52fceb --- /dev/null +++ b/sumints/sml.json @@ -0,0 +1,5 @@ +{ + "cm": { + "make/onSave": "development.cm" + } +} \ No newline at end of file diff --git a/sumints/sumints b/sumints/sumints new file mode 100755 index 0000000..8749e19 --- /dev/null +++ b/sumints/sumints @@ -0,0 +1 @@ +sml @SMLload=sumints-main.amd64-linux diff --git a/sumints/sumints-main.amd64-linux b/sumints/sumints-main.amd64-linux new file mode 100644 index 0000000..40b2e1b Binary files /dev/null and b/sumints/sumints-main.amd64-linux differ diff --git a/sumints/sumints.sml b/sumints/sumints.sml new file mode 100644 index 0000000..45867bd --- /dev/null +++ b/sumints/sumints.sml @@ -0,0 +1,42 @@ +(* sumints: print to stdout the sum of a sequence of integers provided on stdin. + Copyright 2024-04-16 Sudarshan S Chawathe +*) + +(* Return a list of strings that are the whitespace-separated tokens + read from stdin (until EOF on stdin). +*) + + +fun readStdinToks () = + let + fun readLineToks acc = + case TextIO.inputLine TextIO.stdIn of + NONE => acc + | SOME s => readLineToks (acc @ (String.tokens Char.isSpace s)) + in + readLineToks [] + end + +(* Print a list of strings *) +fun printList [] = () + | printList (x::xs) = (print x; print "\n"; printList xs) + +(* Print to stdout the sum of the integers found on stdin. + Non-integer tokens are treated as zero-valued. +*) +fun main (p, args) = + let + fun compareStrings (x,y) = + case String.compare(x,y) of + LESS => false + | EQUAL => true + | GREATER => true + val listOfStrings = readStdinToks() + val () = printList(ListMergeSort.sort compareStrings listOfStrings) + in + 0 + end + +(* Compile main; see Makefile. +*) +val r = SMLofNJ.exportFn("sumints-main", main) diff --git a/trace-debug-profile.tgz b/trace-debug-profile.tgz new file mode 100644 index 0000000..6f7dfb7 Binary files /dev/null and b/trace-debug-profile.tgz differ diff --git a/trace-debug-profile/.cm/GUID/back-trace.sml b/trace-debug-profile/.cm/GUID/back-trace.sml new file mode 100644 index 0000000..78a34b8 --- /dev/null +++ b/trace-debug-profile/.cm/GUID/back-trace.sml @@ -0,0 +1 @@ +guid-$smlnj-tdp/(plugins.cm):back-trace.sml-1714016094.688 diff --git a/trace-debug-profile/.cm/GUID/coverage.sml b/trace-debug-profile/.cm/GUID/coverage.sml new file mode 100644 index 0000000..ee9d866 --- /dev/null +++ b/trace-debug-profile/.cm/GUID/coverage.sml @@ -0,0 +1 @@ +guid-$smlnj-tdp/(plugins.cm):coverage.sml-1714016094.742 diff --git a/trace-debug-profile/.cm/GUID/install-back-trace.sml b/trace-debug-profile/.cm/GUID/install-back-trace.sml new file mode 100644 index 0000000..0d9fe03 --- /dev/null +++ b/trace-debug-profile/.cm/GUID/install-back-trace.sml @@ -0,0 +1 @@ +guid-$smlnj-tdp/(back-trace.cm):install-back-trace.sml-1714016094.792 diff --git a/trace-debug-profile/.cm/GUID/install-coverage.sml b/trace-debug-profile/.cm/GUID/install-coverage.sml new file mode 100644 index 0000000..0a86ef0 --- /dev/null +++ b/trace-debug-profile/.cm/GUID/install-coverage.sml @@ -0,0 +1 @@ +guid-$smlnj-tdp/(coverage.cm):install-coverage.sml-1714016094.801 diff --git a/trace-debug-profile/.cm/SKEL/back-trace.sml b/trace-debug-profile/.cm/SKEL/back-trace.sml new file mode 100644 index 0000000..4c9928f --- /dev/null +++ b/trace-debug-profile/.cm/SKEL/back-trace.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f3d"SMLofNJ"d"Control"d"General"ad"BackTrace"jh1ad"M"gp1d"IntRedBlackMap"h0 \ No newline at end of file diff --git a/trace-debug-profile/.cm/SKEL/coverage.sml b/trace-debug-profile/.cm/SKEL/coverage.sml new file mode 100644 index 0000000..5adea4a --- /dev/null +++ b/trace-debug-profile/.cm/SKEL/coverage.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f6d"ListMergeSort"Cd"Control"d"List"d"Int"d"General"d"Array"Nad"Coverage"jh3ad"M"gp1d"IntRedBlackMap"ad"F"gp1d"FormatComb"aTDP"gp3d"SMLofNJ"d"Internals"=h0 \ No newline at end of file diff --git a/trace-debug-profile/.cm/SKEL/install-back-trace.sml b/trace-debug-profile/.cm/SKEL/install-back-trace.sml new file mode 100644 index 0000000..d05b86f --- /dev/null +++ b/trace-debug-profile/.cm/SKEL/install-back-trace.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"BackTrace"ad"InstallBackTrace"h0 \ No newline at end of file diff --git a/trace-debug-profile/.cm/SKEL/install-coverage.sml b/trace-debug-profile/.cm/SKEL/install-coverage.sml new file mode 100644 index 0000000..2091ffd --- /dev/null +++ b/trace-debug-profile/.cm/SKEL/install-coverage.sml @@ -0,0 +1,2 @@ +Skeleton 5 +d2f1d"Coverage"ad"InstallCoverage"h0 \ No newline at end of file diff --git a/trace-debug-profile/.cm/amd64-unix/back-trace.sml b/trace-debug-profile/.cm/amd64-unix/back-trace.sml new file mode 100644 index 0000000..6b8f43e Binary files /dev/null and b/trace-debug-profile/.cm/amd64-unix/back-trace.sml differ diff --git a/trace-debug-profile/.cm/amd64-unix/coverage.sml b/trace-debug-profile/.cm/amd64-unix/coverage.sml new file mode 100644 index 0000000..d3f4b77 Binary files /dev/null and b/trace-debug-profile/.cm/amd64-unix/coverage.sml differ diff --git a/trace-debug-profile/.cm/amd64-unix/install-back-trace.sml b/trace-debug-profile/.cm/amd64-unix/install-back-trace.sml new file mode 100644 index 0000000..bbbaa12 Binary files /dev/null and b/trace-debug-profile/.cm/amd64-unix/install-back-trace.sml differ diff --git a/trace-debug-profile/.cm/amd64-unix/install-coverage.sml b/trace-debug-profile/.cm/amd64-unix/install-coverage.sml new file mode 100644 index 0000000..ab5d9ed Binary files /dev/null and b/trace-debug-profile/.cm/amd64-unix/install-coverage.sml differ diff --git a/trace-debug-profile/back-trace.cm b/trace-debug-profile/back-trace.cm new file mode 100644 index 0000000..99aa692 --- /dev/null +++ b/trace-debug-profile/back-trace.cm @@ -0,0 +1,15 @@ +(* back-trace.cm + * + * Library that (when loaded via CM.make) causes the test back-trace + * plugin to be installed into its core hook. + * + * Copyright (c) 2004 by The Fellowship of SML/NJ + * + * Author: Matthias Blume (blume@tti-c.org) + *) +Library + structure BackTrace + structure InstallBackTrace +is + $smlnj-tdp/plugins.cm + install-back-trace.sml diff --git a/trace-debug-profile/back-trace.sml b/trace-debug-profile/back-trace.sml new file mode 100644 index 0000000..6b96b9d --- /dev/null +++ b/trace-debug-profile/back-trace.sml @@ -0,0 +1,237 @@ +(* back-trace.sml + * + * A plug-in module for back-tracing. This module hooks itself into + * the core environment so that tdp-instrumented code will invoke the + * provided functions "enter", "push", "save", and "report". + * + * This module keeps track of the dynamic call-chain of instrumented modules. + * Non-tail calls are maintained in a stack-like fashion, and in addition + * to this the module will also track tail-calls so that a sequence of + * GOTO-like jumps from loop-cluster to loop-cluster can be shown. + * + * This strategy, while certainly costly, has no more than constant-factor + * overhead in space and time and will keep tail-recursive code + * tail-recursive. + * + * Copyright (c) 2004 by The Fellowship of SML/NJ + * + * Author: Matthias Blume (blume@tti-c.org) + *) +structure BackTrace : sig + val trigger : unit -> 'a + val monitor : (unit -> 'a) -> 'a + val install : unit -> unit +end = struct + + structure M = IntRedBlackMap + + (* Home-cooked set representation: + * This relies on two things: + * - we don't need a lookup operation + * - we only join sets that are known to be disjoint *) + datatype set = + EMPTY + | SINGLETON of int + | UNION of set * set + + fun fold f i EMPTY = i + | fold f i (SINGLETON x) = f (x, i) + | fold f i (UNION (x, y)) = fold f (fold f i y) x + + datatype descr = + STEP of int + | LOOP of set + + type stage = { num: int, from: int, descr: descr } + + type frame = { depth: int, map: int M.map, stages: stage list } + + type history = frame * frame list + + datatype state = + NORMAL of history + | PENDING of int * history + + val cur : state ref = + ref (NORMAL ({ depth = 0, map = M.empty, stages = [] }, [])) + + val names = ref (M.empty: string M.map) + + fun register (module, _: int, id, s) = + names := M.insert (!names, module + id, s) + + fun enter (module, fct) = let + val i = module + fct + val (from, front, back) = + case !cur of + PENDING (from, (front, back)) => (from, front, back) + | NORMAL (front, back) => (~1, front, back) + val { depth, map, stages } = front + in + case M.find (map, i) of + SOME num => let + fun toSet (STEP i) = SINGLETON i + | toSet (LOOP s) = s + fun join (set, d) = UNION (set, toSet d) + fun finish (stages, from, c, EMPTY) = + let val stage = { num = num, from = from, + descr = LOOP (toSet c) } + val front' = { depth = depth, + map = map, + stages = stage :: stages } + in + cur := NORMAL (front', back) + end + | finish (stages, from, c, set) = + let val stage = { num = num, from = from, + descr = LOOP (join (set, c)) } + fun ins (i, m) = M.insert (m, i, num) + val front' = { depth = depth, + map = fold ins map set, + stages = stage :: stages } + in + cur := NORMAL (front', back) + end + fun loop ([], set) = () (* cannot happen! *) + | loop ({ num = n', from, descr = d' } :: t, set) = + if num = n' then finish (t, from, d', set) + else loop (t, join (set, d')) + in + loop (stages, EMPTY) + end + | NONE => let + val num = case stages of + [] => 0 + | s0 :: _ => #num s0 + 1 + val stage = { num = num, from = from, descr = STEP i} + val front' = { depth = depth, + map = M.insert (map, i, num), + stages = stage :: stages } + in + cur := NORMAL (front' , back) + end + end + + fun push (module, loc) = let + val id = module + loc + val (NORMAL old | PENDING (_, old)) = !cur + val (front, _) = old + val front' = { depth = #depth front + 1, map = M.empty, stages = [] } + in + cur := PENDING (id, (front', op :: old)); + fn () => cur := NORMAL old + end + + fun nopush (module, loc) = let + val id = module + loc + val (NORMAL old | PENDING (_, old)) = !cur + in + cur := PENDING (id, old) + end + + fun save () = let + val old = !cur + in + fn () => cur := old + end + + fun report () = let + val (NORMAL top | PENDING (_, top)) = !cur + val (front, back) = top + fun do_report () = let + val (NORMAL bot | PENDING (_, bot)) = !cur + val (front', _) = bot + val bot_depth = #depth front' + fun isBot (f: frame) = #depth f = bot_depth + fun name (w, pad, from, i) = let + fun find x = getOpt (M.find (!names, x), "???") + val n = find i + val tail = case from of + NONE => ["\n"] + | SOME j => ["\n (from: ", find j, ")\n"] + in + concat (w :: pad :: " " :: n :: tail) + end + fun stage (w, { num, from, descr = STEP i }, a) = + name (w, " ", SOME from, i) :: a + | stage (w, { num, from, descr = LOOP s }, a) = let + fun loop ([], a) = a + | loop ([i], a) = name (w, "-\\", SOME from, i) :: a + | loop (h :: t, a) = + loop (t, name (" ", " |", NONE, h) :: a) + fun start ([], a) = a + | start ([i], a) = name (w, "-(", SOME from, i) :: a + | start (h :: t, a) = + loop (t, name (" ", " /", NONE, h) :: a) + in + start (fold (op ::) [] s, a) + end + fun jumps ([], a) = a + | jumps ([n], a) = stage ("CALL", n, a) + | jumps (h :: t, a) = jumps (t, stage ("GOTO", h, a)) + fun calls (h, [], a) = jumps (#stages h, a) + | calls (h, h' :: t, a) = let + val a = jumps (#stages h, a) + in + if isBot h then a else calls (h', t, a) + end + in + rev (calls (front, back, [])) + end + in + do_report + end + + exception BTraceTriggered of unit -> string list + + fun monitor0 (report_final_exn, work) = + let val restore = save () + fun last (x, []) = x + | last (_, x :: xs) = last (x, xs) + fun emsg e = + case SMLofNJ.exnHistory e of + [] => General.exnMessage e + | (h :: t) => + concat [last (h, t), ": ", General.exnMessage e] + fun hdl (e, []) = + (if report_final_exn then + Control.Print.say (emsg e ^ "\n\n") + else (); + raise e) + | hdl (e, hist) = + (Control.Print.say + (concat ("\n*** BACK-TRACE ***\n" :: hist)); + if report_final_exn then + Control.Print.say (concat ["\n", emsg e, "\n\n"]) + else (); + raise e) + in + work () + handle e as BTraceTriggered do_report => + (restore (); + hdl (e, do_report ())) + | e => + let val do_report = report () + in + restore (); + hdl (e, do_report ()) + end + end + + fun monitor work = monitor0 (true, work) + + val name = "btrace" + + fun install () = + let val plugin = { name = name, save = save, + push = push, nopush = nopush, + enter = enter, register = register } + val monitor = { name = name, monitor = monitor0 } + fun addto r x = r := x :: !r + in + addto SMLofNJ.Internals.TDP.active_plugins plugin; + addto SMLofNJ.Internals.TDP.active_monitors monitor + end + + fun trigger () = raise BTraceTriggered (report ()) +end diff --git a/trace-debug-profile/coverage.cm b/trace-debug-profile/coverage.cm new file mode 100644 index 0000000..46bd2d7 --- /dev/null +++ b/trace-debug-profile/coverage.cm @@ -0,0 +1,15 @@ +(* coverage.cm + * + * Library that (when loaded via CM.make) causes the test coverage + * plugin to be installed into its core hook. + * + * Copyright (c) 2004 by The Fellowship of SML/NJ + * + * Author: Matthias Blume (blume@tti-c.org) + *) +Library + structure Coverage + structure InstallCoverage +is + $smlnj-tdp/plugins.cm + install-coverage.sml diff --git a/trace-debug-profile/coverage.sml b/trace-debug-profile/coverage.sml new file mode 100644 index 0000000..8db9730 --- /dev/null +++ b/trace-debug-profile/coverage.sml @@ -0,0 +1,108 @@ +(* coverage.sml + * + * Using the generic trace/debug/profile framework for test coverage. + * + * Copyright (c) 2004 by The Fellowship of SML/NJ + * + * Author: Matthias Blume (blume@tti-c.org) + *) +structure Coverage : sig + + type kind + + val functions: kind + val tail_calls: kind + val non_tail_calls: kind + + val not_covered : kind list -> unit + val hot_spots : kind list -> int -> unit + + val install : unit -> unit +end = struct + + structure M = IntRedBlackMap + structure F = FormatComb + + structure TDP = SMLofNJ.Internals.TDP + + type kind = int + val functions = TDP.idk_entry_point + val tail_calls = TDP.idk_tail_call + val non_tail_calls = TDP.idk_non_tail_call + + type record = { kind : int, descr: string } + + val records = ref (M.empty : record M.map) + + val counters = ref (Array.fromList [0]) + + fun count idx = Array.sub (!counters, idx) handle General.Subscript => 0 + + fun bump (module, id) = + let val idx = module + id + val a = !counters + in + Array.update (a, idx, Array.sub (a, idx) + 1) + handle General.Subscript => + let val olen = Array.length a + val nlen = Int.min (idx + 1, olen + olen) + fun cp i = if i < olen then Array.sub (a, i) + else if i = idx then 1 + else 0 + in + counters := Array.tabulate (nlen, cp) + end + end + + val enter = bump + fun push mi = (bump mi; fn () => ()) + val nopush = bump + + fun register (module, kind, id, s) = + let val idx = module + id + val r = { kind = kind, descr = s } + in + records := M.insert (!records, idx, r) + end + + fun save () () = () + + val name = "coverage" + + fun install () = + let val plugin = { name = name, save = save, + push = push, nopush = nopush, + enter = enter, register = register } + fun addto r x = r := x :: !r + in + addto TDP.active_plugins plugin + end + + fun not_covered kinds = + let fun zerocnt (idx, r: record) = + count idx = 0 andalso List.exists (fn k => k = #kind r) kinds + val zrecords = M.filteri zerocnt (!records) + fun tell { descr, kind } = + Control.Print.say (descr ^ "\n") + in + M.app tell zrecords + end + + fun hot_spots kinds n = + let fun getcount (idx, r: record) = + if List.exists (fn k => k = #kind r) kinds then + SOME (#descr r, count idx) + else NONE + val countmap = M.mapPartiali getcount (!records) + val countlist = M.listItems countmap + fun lt ((_, c), (_, c')) = c < c' + val sortedcountlist = ListMergeSort.sort lt countlist + fun loop ([], _) = () + | loop (_, 0) = () + | loop ((descr, count) :: rest, n) = + (Control.Print.say (F.format (F.padl 3 F.int o F.sp 1 o F.string o F.nl) count descr); + loop (rest, n - 1)) + in + loop (sortedcountlist, n) + end +end diff --git a/trace-debug-profile/install-back-trace.sml b/trace-debug-profile/install-back-trace.sml new file mode 100644 index 0000000..3929d6a --- /dev/null +++ b/trace-debug-profile/install-back-trace.sml @@ -0,0 +1,12 @@ +(* install-back-trace.sml + * + * A module that causes (at link time) to have the back-trace + * plugin installed into its core hook. + * + * Copyright (c) 2004 by The Fellowship of SML/NJ + * + * Author: Matthias Blume (blume@tti-c.org) + *) +structure InstallBackTrace = struct + val _ = BackTrace.install () +end diff --git a/trace-debug-profile/install-coverage.sml b/trace-debug-profile/install-coverage.sml new file mode 100644 index 0000000..5be1024 --- /dev/null +++ b/trace-debug-profile/install-coverage.sml @@ -0,0 +1,12 @@ +(* install-coverage.sml + * + * A module that causes (at link time) to have the test coverage + * plugin installed into its core hook. + * + * Copyright (c) 2004 by The Fellowship of SML/NJ + * + * Author: Matthias Blume (blume@tti-c.org) + *) +structure InstallCoverage = struct + val _ = Coverage.install () +end diff --git a/trace-debug-profile/plugins.cm b/trace-debug-profile/plugins.cm new file mode 100644 index 0000000..e32b1de --- /dev/null +++ b/trace-debug-profile/plugins.cm @@ -0,0 +1,19 @@ +(* plugins.cm + * + * Library of plug-in modules for tracing, debugging, and profiling. + * + * Copyright (c) 2004 by The Fellowship of SML/NJ + * + * Author: Matthias Blume (blume@tti-c.org) + *) + +Library + structure BackTrace + structure Coverage +is + back-trace.sml + coverage.sml + + $smlnj/basis/basis.cm + $smlnj/smlnj-lib/smlnj-lib.cm + $smlnj/compiler.cm